Merge "Project import generated by Copybara." into 11.0.10

commit: 017c3036efc179312f18eb3dfa01f3deadbb7aa3 [log] [tgz]
author: Phil Abercrombie <pabercrombie@google.com> Wed Mar 27 03:33:19 2024 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Wed Mar 27 03:33:19 2024 +0000
tree: 7d6a022292311ddbcb5d0485a4259ad6ecbcdddd
parent: 839ffed0d213f7991a8414e738189fd3eb4a7e11 [diff]
parent: 7497c4e43f1fb791dfc4ea04c146ce143101ac9b [diff]
diff --git a/scst/.gitignore b/scst/.gitignore
index b2b65ad..60d04ac 100644
--- a/scst/.gitignore
+++ b/scst/.gitignore

@@ -2,6 +2,7 @@
 # :3,$!LC_ALL=C sort -fu
 
 *.cmd
+*.gcno
 *.ko
 *.ko.unsigned
 *.mod

diff --git a/scst/.mailmap b/scst/.mailmap
new file mode 100644
index 0000000..33cde67
--- /dev/null
+++ b/scst/.mailmap

@@ -0,0 +1,17 @@
+#
+# This list is used by git-shortlog to fix a few botched name translations
+# in the git archive, either because the author's full name was messed up
+# and/or not always written the same way, making contributions from the
+# same person appearing not to be so or badly displayed. Also allows for
+# old email addresses to map to new email addresses.
+#
+# For format details, see "MAPPING AUTHORS" in "man git-shortlog".
+#
+# Please keep this list dictionary sorted.
+#
+Bart Van Assche <bvanassche@acm.org> Bart Van Assche <bart@debian10-vm>
+Bart Van Assche <bvanassche@acm.org> Bart Van Assche <bart.vanassche@sandisk.com>
+Bart Van Assche <bvanassche@acm.org> Bart Van Assche <bart.vanassche@wdc.com>
+Bart Van Assche <bvanassche@acm.org> Bart Van Assche <bvanassche@users.noreply.github.com>
+Gleb Chesnokov <Chesnokov.G@raidix.com> Chesnokov Gleb <Chesnokov.G@raidix.com>
+Gleb Chesnokov <gleb.chesnokov@scst.dev> <Chesnokov.G@raidix.com>

diff --git a/scst/INSTALL.md b/scst/INSTALL.md
index 9b03148..bbea35d 100644
--- a/scst/INSTALL.md
+++ b/scst/INSTALL.md

@@ -41,9 +41,22 @@
 Since the above step installs several kernel modules into directory
 /lib/modules/$(uname -r), that step has to be repeated every time a new kernel
 or a kernel update has been installed. If you want to avoid this, install the
-scst-dkms package instead of the scst package. That will cause SCST to be
-rebuilt and installed every time a kernel version is booted for which the SCST
-kernel modules had not yet been built.
+scst-dkms package instead of the scst package.
+
+For example, if you want to have dkms support for your SCST rpm install, then
+you would use the following command to make your SCST packages:
+
+    make rpm-dkms 
+
+or
+
+    make scst-dkms-rpm
+
+make rpm-dkms also builds scstadmin packages in addition to the SCST dkms
+packages. Both make commands will create rpm packages that will cause SCST to be
+automatically rebuilt and installed every time a new kernel version is
+installed and booted for which the SCST kernel modules had not yet been built
+so that SCST rpm packages will not need to be rebuilt after each kernel update.
 
 ## Configuring SCST
 

diff --git a/scst/Makefile b/scst/Makefile
index 01845a6..8687ce8 100644
--- a/scst/Makefile
+++ b/scst/Makefile

@@ -35,13 +35,31 @@
 		cat $(KDIR)/include/config/kernel.release 2>/dev/null || \
 		make -s -C $(KDIR) kernelversion))
      endif
+else
+     ifndef KVER
+	KVER=$(strip $(shell uname -r))
+     endif
+     KDIR=/lib/modules/$(KVER)/build
 endif
 
+OLD_QLA_INI_DIR=qla2x00t
+OLD_QLA_DIR=$(OLD_QLA_INI_DIR)/qla2x00-target
+
+NEW_QLA_INI_DIR=qla2x00t-32gbit
+NEW_QLA_DIR=$(NEW_QLA_INI_DIR)/qla2x00-target
+
+ifeq ($(QLA_32GBIT),no)
+    QLA_INI_DIR=$(OLD_QLA_INI_DIR)
+    QLA_DIR=$(OLD_QLA_DIR)
+else
+    QLA_INI_DIR=$(NEW_QLA_INI_DIR)
+    QLA_DIR=$(NEW_QLA_DIR)
+endif
+
+
 SCST_DIR=scst
 DOC_DIR=doc
 SCSTADM_DIR=scstadmin
-QLA_INI_DIR=qla2x00t
-QLA_DIR=qla2x00t/qla2x00-target
 USR_DIR=usr
 SRP_DIR=srpt
 SCST_LOCAL_DIR=scst_local
@@ -53,12 +71,19 @@
 REVISION ?= $(shell if [ -e .svn ]; then				\
 		      svn info | sed -n 's/^Revision:[[:blank:]]*/./p';	\
 		    elif [ -e .git ]; then				\
+                      echo -n .;					\
 		      git log | grep -c ^commit;			\
 		    fi)
-VERSION = $(shell echo -n "$$(sed -n 's/^\#define[[:blank:]]SCST_VERSION_NAME[[:blank:]]*\"\([^-]*\).*\"/\1/p' scst/include/scst_const.h)")
-DEBIAN_REVISION=1
+VERSION_WITHOUT_REVISION := $(shell echo -n "$$(sed -n 's/^\#define[[:blank:]]SCST_VERSION_NAME[[:blank:]]*\"\([^-]*\).*\"/\1/p' scst/include/scst_const.h)")
+VERSION := $(VERSION_WITHOUT_REVISION)$(REVISION)
+DEBIAN_REVISION=1.1
 RPMTOPDIR ?= $(shell if [ $$(id -u) = 0 ]; then echo /usr/src/packages;\
 		else echo $$PWD/rpmbuilddir; fi)
+SCST_SOURCE_FILES = $(shell if [ -e scripts/list-source-files ]; then	\
+				scripts/list-source-files;		\
+			else						\
+				echo scripts-source-files-is-missing;	\
+			fi)
 
 help:
 	@echo "		all               : make all"
@@ -129,11 +154,20 @@
 		$(SCST_LOCAL_DIR) $(FCST_DIR) $(USR_DIR) $(SCSTADM_DIR); do \
 		$(MAKE) -j$$(nproc) -C "$$d" $@ || break;		    \
 	done
-	find . -type d -name "rpmbuilddir" | xargs rm -rf
 
 tags:
 	find . -type f -name "*.[ch]" | ctags --c-kinds=+p --fields=+iaS --extra=+q -e -L-
 
+cov-build:
+	-for d in $(SCST_DIR) $(ISCSI_DIR) $(OLD_QLA_DIR) $(NEW_QLA_DIR) $(SRP_DIR)  \
+		$(SCST_LOCAL_DIR) $(FCST_DIR) $(USR_DIR) $(SCSTADM_DIR); do	     \
+		if [[ $$d = $(OLD_QLA_DIR) || $$d = $(NEW_QLA_DIR) ]]; then	     \
+			BUILD_2X_MODULE=y $(MAKE) -j$$(nproc) -C "$$d" all || break; \
+		else								     \
+			$(MAKE) -j$$(nproc) -C "$$d" all || break;		     \
+		fi								     \
+	done
+
 scst:
 	cd $(SCST_DIR) && $(MAKE) all
 
@@ -288,8 +322,9 @@
 	mkdir "$${name}-$(3)" &&					\
 	{								\
 	  {								\
-	    scripts/list-source-files &&				\
+	    scripts/list-source-files | grep -v '/\.gitignore' &&	\
 	    if [ -e debian/changelog ]; then echo debian/changelog; fi;	\
+	    if [ -e debian/compat ]; then echo debian/compat; fi;	\
 	  } |								\
 	  $(4) |							\
 	  tar -T- -cf- |						\
@@ -299,8 +334,10 @@
 	tar -c$(1) -f "$${name}-$(3).tar.$(2)" "$${name}-$(3)" &&	\
 	rm -rf "$${name}-$(3)"
 
-scst-dist-gzip:
-	$(call make-scst-dist,j,bz2,$(VERSION),grep -E '^doc/|^fcst/|^iscsi-scst/|^Makefile|^qla2x00t(|_git)/|^scripts/|^scst.spec|^scst/|^scst_local/|^srpt/|^usr/|^scstadmin/')
+scst-dist-gzip: scst-$(VERSION).tar.bz2
+
+scst-$(VERSION).tar.bz2: $(SCST_SOURCE_FILES)
+	$(call make-scst-dist,j,bz2,$(VERSION),grep -E '^debian/|^doc/|^fcst/|^iscsi-scst/|^Makefile|^qla2x00t(|-32gbit)/|^scripts/|^scst.spec|^scst/|^scst_local/|^srpt/|^usr/|^scstadmin/')
 
 scst-rpm:
 	name=scst &&							\
@@ -314,9 +351,10 @@
 	    -e "s|@depmod@|$(shell which depmod)|g"			\
 		<$${name}.spec.in >$${name}.spec &&			\
 	MAKE="$(MAKE)" rpmbuild --define="%_topdir $${rpmtopdir}"	\
-					--define="%rpm_release $(REVISION)"                     \
 	    $(if $(KVER),--define="%kversion $(KVER)")			\
 	    $(if $(KDIR),--define="%kdir $(KDIR)")			\
+		--define="debug_package %{nil}" \
+		--define="__strip /bin/true" \
 	    -ba $${name}.spec &&					\
 	rm -f $${name}-$(VERSION).tar.bz2
 
@@ -339,10 +377,15 @@
 rpm:
 	$(MAKE) scst-rpm
 	$(MAKE) -C scstadmin rpm
-	# generate version file for fileio_tgt
-	$(MAKE) -C scst/src ../include/scst_itf_ver.h
-	$(MAKE) -C usr/fileio rpm
+	@if [ "$$(id -u)" != 0 ]; then			\
+	    echo;					\
+	    echo "The following RPMs have been built:";	\
+	    find -name '*.rpm';				\
+	fi
 
+rpm-dkms:
+	$(MAKE) scst-dkms-rpm
+	$(MAKE) -C scstadmin rpm
 	@if [ "$$(id -u)" != 0 ]; then			\
 	    echo;					\
 	    echo "The following RPMs have been built:";	\
@@ -353,25 +396,31 @@
 	sed 's/%{scst_version}/$(VERSION)-$(DEBIAN_REVISION)/'		\
 	  <debian/changelog.in >debian/changelog
 
-../scst_$(VERSION).orig.tar.gz: debian/changelog Makefile
+debian/compat:
+	dpkg-query -W --showformat='$${Version}\n' debhelper 2>/dev/null | \
+	sed 's/\..*//' >$@
+
+../scst_$(VERSION).orig.tar.gz: debian/changelog debian/compat Makefile	\
+		$(SCST_SOURCE_FILES)
 	$(call make-scst-dist,z,gz,$(VERSION),cat) &&			\
 	mv "scst-$(VERSION).tar.gz" "$@"
 
-../scst_$(VERSION).orig.tar.xz: debian/changelog Makefile
+../scst_$(VERSION).orig.tar.xz: debian/changelog debian/compat Makefile	\
+		$(SCST_SOURCE_FILES)
 	$(call make-scst-dist,J,xz,$(VERSION),cat) &&			\
 	mv "scst-$(VERSION).tar.xz" "$@"
 
 dpkg: ../scst_$(VERSION).orig.tar.gz
-	@if [ -z "$$DEBEMAIL" ]; then					\
-	  echo "Error: \$$DEBEMAIL has not been set";			\
-	  false;							\
-	fi &&								\
-	if [ -z "$$DEBFULLNAME" ]; then					\
-	  echo "Error: \$$DEBFULLNAME has not been set";		\
-	  false;							\
-	fi &&								\
+	@[ -z "$$DEBEMAIL" ] || export DEBEMAIL=bvanassche@acm.org &&	\
+	[ -z "$$DEBFULLNAME" ] || export DEBFULLNAME="Bart Van Assche" &&\
+	echo "KDIR=$(KDIR)" &&						\
+	echo "KVER=$(KVER)" &&						\
 	sed 's/%{scst_version}/$(VERSION)/'				\
 	  <debian/scst.dkms.in >debian/scst.dkms &&			\
+	sed 's/%{KVER}/$(KVER)/'					\
+	  <debian/scst.preinst.in >debian/scst.preinst &&		\
+	sed 's/%{KVER}/$(KVER)/'					\
+	  <debian/scst.postinst.in >debian/scst.postinst &&		\
 	output_files=(							\
 		../*_$(VERSION)-$(DEBIAN_REVISION)_*.deb		\
 		../*_$(VERSION)-$(DEBIAN_REVISION)_*.ddeb		\
@@ -392,11 +441,8 @@
 	else								\
 	  buildopts+=(-j4);						\
 	fi &&								\
-	if false; then							\
-	  dpkg-buildpackage "$${buildopts[@]}";				\
-	else								\
-	  debuild "$${buildopts[@]}" --lintian-opts --profile debian;	\
-	fi &&								\
+	DEB_CC_SET="$(CC)" DEB_KVER_SET=$(KVER) DEB_KDIR_SET=$(KDIR) DEB_QLA_DIR_SET=$(QLA_DIR) \
+	   DEB_QLA_INI_DIR_SET=$(QLA_INI_DIR) debuild "$${buildopts[@]}" --lintian-opts --profile debian && \
 	mkdir -p dpkg &&						\
 	for f in "$${output_files[@]}" ../scst_$(VERSION).orig.tar.[gx]z; do\
 		mv $$f dpkg || true;					\
@@ -406,6 +452,13 @@
 
 release-archive:
 	$(MAKE) 2release
+	scripts/generate-release-archive scst "$(VERSION_WITHOUT_REVISION)"
+	md5sum ../scst-$(VERSION_WITHOUT_REVISION).tar.bz2	\
+	  > ../scst-$(VERSION_WITHOUT_REVISION).tar.bz2.md5sum
+	$(MAKE) 2debug
+
+multiple-release-archives:
+	$(MAKE) 2release
 	for m in $$(find -name Makefile |			\
 		    xargs grep -l '^release-archive:' |		\
 		    grep -v '^\./Makefile');			\

diff --git a/scst/README.md b/scst/README.md
index 58d6c7f..5330deb 100644
--- a/scst/README.md
+++ b/scst/README.md

@@ -1,3 +1,5 @@
+[![Coverity](https://img.shields.io/coverity/scan/25131.svg)](https://scan.coverity.com/projects/scst-project)
+
 # Overview
 
 This is the source code repository of the SCST project. SCST is a collection
@@ -17,13 +19,22 @@
 
 ## QLogic target driver
 
-Two QLogic target drivers are included in the SCST project. The driver in
-the qla2x00t directory is a very stable driver that supports up to 16 Gb/s
-adapters. It is very stable, well tested and actively used in many production
-setups.
+Two QLogic target drivers are included in the SCST project.
 
-There is also a newer driver that supports 32 Gb/s FC in the qla2x00t-32gbit
-directory. That driver has not yet reached the same maturity level as the
-old qla2x00t driver.
+The default driver is located in qla2x00t-32gbit directory and it supports up
+to 32 Gb/s FC. It is the newer one.
+
+May anyone wish to switch back to the older driver that only supported up to
+16 Gb/s adapters, it is located in qla2x00t directory. To make use of the
+older driver build scst with environment variable `QLA_32GBIT=no` set.
 
 Vladislav Bolkhovitin <vst@vlnb.net>, http://scst.sourceforge.net
+
+## Sourceforge achievements
+<p align="middle">
+<img src="./www/images/sourceforge_badges/oss-users-love-us-white.svg" width="125" />
+<img src="./www/images/sourceforge_badges/oss-community-choice-white.svg" width="125" />
+<img src="./www/images/sourceforge_badges/oss-sf-favorite-white.svg" width="125" />
+<img src="./www/images/sourceforge_badges/oss-community-leader-white.svg" width="125" />
+<img src="./www/images/sourceforge_badges/oss-open-source-excellence-white.svg" width="125" />
+</p>

diff --git a/scst/README.performance b/scst/README.performance
new file mode 100644
index 0000000..7a41d64
--- /dev/null
+++ b/scst/README.performance

@@ -0,0 +1,67 @@
+SCST Performance
+================
+
+A question that is asked often is how to tune performance. This means how to
+improve the IOPS and/or bandwidth measured at the initiator side. In this
+README it is explained how to optimize storage performance.
+
+Local storage
+-------------
+Start with measuring the performance of the local block device(s). IOPS can be
+measured e.g. as follows:
+
+    fio --ioengine=libaio --rw=randread --ioscheduler=none --numjobs=$(nproc) \
+    --runtime=60 --group_reporting=1 --gtod_reduce=1 --norandommap \
+    --thread --buffered=0 --iodepth=256 --iodepth_batch=128 --bs=4k \
+    --name=bdev --filename=/dev/...
+
+The bandwidth supported by a block device can be measured by increasing the
+block size to a larger value in the above command, e.g. --bs=1M.
+
+Storage network
+---------------
+Start with measuring the network bandwidth using your favorite tool, e.g.
+netperf for non-RDMA networks or ib_write_bw for RDMA networks.
+
+Next, add a nullio LUN to SCST, e.g. by adding the following in /etc/scst.conf:
+
+    HANDLER vdisk_nullio {
+        DEVICE disk09 {
+            blocksize 4096
+            size_mb   256
+        }
+    }
+
+    TARGET_DRIVER ... {
+        TARGET ... {
+	    LUN ... disk09
+	}
+    }
+
+After a nullio LUN has been added, verify that this LUN is visible at the
+initiator side. If it is not visible at the initiator side, consider
+rescanning LUNs or disconnecting and reconnecting the initiator system to the
+SCST server.
+
+Once the nullio LUN is visible at the initiator side, measure IOPS and
+bandwidth. The bandwidth should be close to the network bandwidth. When using
+the Linux iSCSI initiator this may require configuring multiple iSCSI sessions
+and activating multipathd on top of the multiple iSCSI sessions. The sequence
+for logging in with iSCSI and activating multiple iSCSI sessions is as follows:
+
+    iscsiadm -m iface -I iface2 -o new
+    iscsiadm -m iface -I iface2 -o update -n iface.initiatorname -v ${iqn2}
+    iscsiadm -m discovery -t st -p ${scst_ip_address}
+    iscsiadm -m discovery -t st -p ${scst_ip_address} -I iface2
+    iscsiadm -m node -p ${scst_ip_address} -l
+    iscsiadm -m node -p ${scst_ip_address} -I iface2 -l
+
+SCST Configuration
+------------------
+If the number of IOPS measured at the initiator side is significantly lower
+than the minimum of the IOPS supported by the local storage and the storage
+network, further tuning is required. Look up in /proc/interrupts which CPU
+cores process the most network and storage interrupts and configure the SCST
+kernel threads such that these run on other CPU cores than those that process
+the most interrupts by configuring the cpu_mask attribute. More information
+about the SCST cpu_mask sysfs attribute is available in the SCST README.

diff --git a/scst/SVN_TAGS b/scst/SVN_TAGS
index 94c9b32..95113d4 100644
--- a/scst/SVN_TAGS
+++ b/scst/SVN_TAGS

@@ -27,3 +27,5 @@
 3.3.0					7830 on the 3.3.x branch
 3.4.x branch start                      8675, which is a copy of trunk r8674
 3.4.0					8681 on the trunk
+3.5.x branch start			9293 on the trunk
+3.5.0 	     				9293 on the trunk

diff --git a/scst/debian/compat b/scst/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/scst/debian/compat
+++ /dev/null

@@ -1 +0,0 @@
-9

diff --git a/scst/debian/rules b/scst/debian/rules
index b7ca759..0efc12d 100755
--- a/scst/debian/rules
+++ b/scst/debian/rules

@@ -13,7 +13,15 @@
 # package maintainers to append LDFLAGS
 #export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
 
-SUBDIRS=scst fcst iscsi-scst qla2x00t/qla2x00-target scst_local scstadmin srpt
+# rules won't see variables unless they're using DEB_foo_SET syntax. So use that as
+# an intermediary. Also, export variables for sub-makes to be able to see them.
+export KVER=$(DEB_KVER_SET)
+export KDIR=$(DEB_KDIR_SET)
+export CC=$(DEB_CC_SET)
+export QLA_DIR=$(DEB_QLA_DIR_SET)
+export QLA_INI_DIR=$(DEB_QLA_INI_DIR_SET)
+
+SUBDIRS=scst $(shell grep -qw '^CONFIG_LIBFC' /boot/config-$(KVER) && echo fcst) iscsi-scst $(QLA_DIR) scst_local scstadmin srpt
 DESTDIR=$(CURDIR)/debian/tmp
 VERSION:=$(shell head -n1 debian/changelog | sed 's/.*(\([0-9.]*\).*).*/\1/')
 
@@ -24,18 +32,20 @@
 clean:
 	dh_testdir &&							\
 	dh_prep -Xqla_isp/TAGS -Xdebian/changelog &&			\
-	scripts/clean-source-tree -x debian/changelog &&		\
-	rm -f scstadmin/scstadmin
+	scripts/clean-source-tree -x debian/changelog -x debian/compat -x debian/scst.preinst \
+	    -x debian/scst.postinst
 
 build:
+	[ -n "$(QLA_INI_DIR)" ] &&					\
 	make 2release &&						\
 	export BUILD_2X_MODULE=y &&					\
-	export CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y &&	\
+	export CONFIG_SCSI_QLA_FC=y &&					\
+	export CONFIG_SCSI_QLA2XXX_TARGET=y &&				\
 	for d in $(SUBDIRS); do $(MAKE) -C $$d; done &&			\
 	{								\
 		echo dkms.conf &&					\
 		echo Makefile &&					\
-		for d in fcst iscsi-scst qla2x00t scst scst_local srpt; do\
+		for d in fcst iscsi-scst $(QLA_INI_DIR) scst scst_local srpt; do\
 			echo $$d;					\
 		done;							\
 	} | sed "s,^,usr/src/scst-$(VERSION)/," >debian/scst-dkms.install
@@ -50,7 +60,8 @@
 	export PREFIX=/usr &&						\
 	export DESTDIR="$(DESTDIR)" &&					\
 	export BUILD_2X_MODULE=y &&					\
-	export CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y &&	\
+	export CONFIG_SCSI_QLA_FC=y &&					\
+	export CONFIG_SCSI_QLA2XXX_TARGET=y &&				\
 	for d in $(SUBDIRS); do						\
 	    if [ $$d = scst ]; then					\
 		{ $(MAKE) -C $$d install || break; }			\
@@ -69,7 +80,7 @@
 	cp debian/scst.dkms						\
 		$(DESTDIR)/usr/src/scst-$(VERSION)/dkms.conf &&		\
 	scripts/list-source-files |					\
-	grep -E '^Makefile$$|^(fcst|iscsi-scst|qla2x00t|scst|scst_local|srpt)/'|\
+	grep -E '^Makefile$$|^(fcst|iscsi-scst|$(QLA_INI_DIR)|scst|scst_local|srpt)/'|\
 	tar -T- -cf- |							\
 	tar -C $(DESTDIR)/usr/src/scst-$(VERSION) -xf- &&		\
 	find $(DESTDIR) -type f -print0 | xargs -0 -r chmod 0644 &&	\
@@ -97,10 +108,8 @@
 override_dh_installinit:
 	dh_installinit --onlyscripts
 
-# dh_make generated override targets
-# This is example for Cmake (See https://bugs.debian.org/641051 )
-#override_dh_auto_configure:
-#	dh_auto_configure -- #	-DCMAKE_LIBRARY_PATH=$(DEB_HOST_MULTIARCH)
+override_dh_auto_configure:
+	true
 
 .PHONY: clean binary binary-arch binary-indep build build-arch build-indep \
 	install

diff --git a/scst/debian/scst.dkms.in b/scst/debian/scst.dkms.in
index 615e76b..adba7de 100644
--- a/scst/debian/scst.dkms.in
+++ b/scst/debian/scst.dkms.in

@@ -1,7 +1,7 @@
 PACKAGE_VERSION="${PACKAGE_VERSION}"
 PACKAGE_NAME="scst"
 AUTOINSTALL=yes
-MAKE[0]="export KVER=${kernelver} KDIR=${kernel_source_dir} BUILD_2X_MODULE=y CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y && make 2release && make -sC scst && make -sC fcst && make -sC iscsi-scst && make -sC qla2x00t/qla2x00-target && make -sC scst_local && make -sC srpt"
+MAKE[0]="export KVER=${kernelver} KDIR=${kernel_source_dir} BUILD_2X_MODULE=y CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y && make 2release && make -sC scst && make -sC fcst && make -sC iscsi-scst && make -sC qla2x00t-32gbit/qla2x00-target && make -sC scst_local && make -sC srpt"
 CLEAN="make clean"
 # Remove any existing ib_srpt.ko kernel modules
 PRE_INSTALL="find /lib/modules/${kernelver} -name ib_srpt.ko -exec rm {} \;"
@@ -21,10 +21,10 @@
 BUILT_MODULE_LOCATION[3]="iscsi-scst/kernel/isert-scst"
 DEST_MODULE_LOCATION[3]="/extra"
 BUILT_MODULE_NAME[4]="qla2x00tgt"
-BUILT_MODULE_LOCATION[4]="qla2x00t/qla2x00-target"
+BUILT_MODULE_LOCATION[4]="qla2x00t-32gbit/qla2x00-target"
 DEST_MODULE_LOCATION[4]="/extra"
 BUILT_MODULE_NAME[5]="qla2xxx_scst"
-BUILT_MODULE_LOCATION[5]="qla2x00t"
+BUILT_MODULE_LOCATION[5]="qla2x00t-32gbit"
 DEST_MODULE_LOCATION[5]="/extra"
 BUILT_MODULE_NAME[6]="scst_cdrom"
 BUILT_MODULE_LOCATION[6]="scst/src/dev_handlers"

diff --git a/scst/debian/scst.postinst b/scst/debian/scst.postinst
deleted file mode 100644
index d1569e3..0000000
--- a/scst/debian/scst.postinst
+++ /dev/null

@@ -1,42 +0,0 @@
-#!/bin/sh
-# postinst script for scst
-#
-# see: dh_installdeb(1)
-
-set -e
-
-# summary of how this script can be called:
-#        * <postinst> `configure' <most-recently-configured-version>
-#        * <old-postinst> `abort-upgrade' <new version>
-#        * <conflictor's-postinst> `abort-remove' `in-favour' <package>
-#          <new-version>
-#        * <postinst> `abort-remove'
-#        * <deconfigured's-postinst> `abort-deconfigure' `in-favour'
-#          <failed-install-package> <version> `removing'
-#          <conflicting-package> <version>
-# for details, see https://www.debian.org/doc/debian-policy/ or
-# the debian-policy package
-
-
-case "$1" in
-    configure)
-	mkdir -p /var/lib/scst/dif_tags
-	mkdir -p /var/lib/scst/pr
-	mkdir -p /var/lib/scst/vdev_mode_pages
-	depmod;;
-
-    abort-upgrade|abort-remove|abort-deconfigure)
-	;;
-
-    *)
-	echo "postinst called with unknown argument \`$1'" >&2
-	exit 1
-	;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0

diff --git a/scst/debian/scst.postinst.in b/scst/debian/scst.postinst.in
new file mode 100644
index 0000000..f535eeb
--- /dev/null
+++ b/scst/debian/scst.postinst.in

@@ -0,0 +1,42 @@
+#!/bin/sh
+# postinst script for scst
+#
+# see: dh_installdeb(1)
+
+set -e
+
+# summary of how this script can be called:
+#        * <postinst> `configure' <most-recently-configured-version>
+#        * <old-postinst> `abort-upgrade' <new version>
+#        * <conflictor's-postinst> `abort-remove' `in-favour' <package>
+#          <new-version>
+#        * <postinst> `abort-remove'
+#        * <deconfigured's-postinst> `abort-deconfigure' `in-favour'
+#          <failed-install-package> <version> `removing'
+#          <conflicting-package> <version>
+# for details, see https://www.debian.org/doc/debian-policy/ or
+# the debian-policy package
+
+
+case "$1" in
+    configure)
+	mkdir -p /var/lib/scst/dif_tags
+	mkdir -p /var/lib/scst/pr
+	mkdir -p /var/lib/scst/vdev_mode_pages
+	depmod "%{KVER}";;
+
+    abort-upgrade|abort-remove|abort-deconfigure)
+	;;
+
+    *)
+	echo "postinst called with unknown argument \`$1'" >&2
+	exit 1
+	;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
+
+exit 0

diff --git a/scst/debian/scst.preinst b/scst/debian/scst.preinst
deleted file mode 100644
index 4d5af2e..0000000
--- a/scst/debian/scst.preinst
+++ /dev/null

@@ -1,47 +0,0 @@
-#!/bin/sh
-# preinst script for scst
-#
-# see: dh_installdeb(1)
-
-set -e
-
-# summary of how this script can be called:
-#        * <new-preinst> `install'
-#        * <new-preinst> `install' <old-version>
-#        * <new-preinst> `upgrade' <old-version>
-#        * <old-preinst> `abort-upgrade' <new-version>
-# for details, see https://www.debian.org/doc/debian-policy/ or
-# the debian-policy package
-
-
-case "$1" in
-    install)
-	# Remove any existing ib_srpt.ko kernel modules
-	find "/lib/modules/$(uname -r)" -name ib_srpt.ko -exec rm {} \;
-	# Remove files installed by "make install"
-	rm -f /usr/local/man/man5/iscsi-scstd.conf.5
-	rm -f /usr/local/man/man8/iscsi-scst-adm.8
-	rm -f /usr/local/man/man8/iscsi-scstd.8
-	rm -f /usr/local/sbin/iscsi-scst-adm
-	rm -f /usr/local/sbin/iscsi-scstd
-	rm -rf /usr/local/include/scst
-	;;
-
-    upgrade)
-        ;;
-
-    abort-upgrade)
-        ;;
-
-    *)
-        echo "preinst called with unknown argument \`$1'" >&2
-        exit 1
-        ;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0

diff --git a/scst/debian/scst.preinst.in b/scst/debian/scst.preinst.in
new file mode 100644
index 0000000..199a4e2
--- /dev/null
+++ b/scst/debian/scst.preinst.in

@@ -0,0 +1,47 @@
+#!/bin/sh
+# preinst script for scst
+#
+# see: dh_installdeb(1)
+
+set -e
+
+# summary of how this script can be called:
+#        * <new-preinst> `install'
+#        * <new-preinst> `install' <old-version>
+#        * <new-preinst> `upgrade' <old-version>
+#        * <old-preinst> `abort-upgrade' <new-version>
+# for details, see https://www.debian.org/doc/debian-policy/ or
+# the debian-policy package
+
+
+case "$1" in
+    install)
+	# Remove any existing ib_srpt.ko kernel modules
+	find "/lib/modules/%{KVER}" -name ib_srpt.ko -exec rm {} \;
+	# Remove files installed by "make install"
+	rm -f /usr/local/man/man5/iscsi-scstd.conf.5
+	rm -f /usr/local/man/man8/iscsi-scst-adm.8
+	rm -f /usr/local/man/man8/iscsi-scstd.8
+	rm -f /usr/local/sbin/iscsi-scst-adm
+	rm -f /usr/local/sbin/iscsi-scstd
+	rm -rf /usr/local/include/scst
+	;;
+
+    upgrade)
+        ;;
+
+    abort-upgrade)
+        ;;
+
+    *)
+        echo "preinst called with unknown argument \`$1'" >&2
+        exit 1
+        ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
+
+exit 0

diff --git a/scst/doc/scst_pg.sgml b/scst/doc/scst_pg.sgml
index 5750db6..81dd910 100644
--- a/scst/doc/scst_pg.sgml
+++ b/scst/doc/scst_pg.sgml

@@ -163,11 +163,10 @@
 
 	int threads_num;
 
-	int (*detect) (struct scst_tgt_template *tgt_template);
 	int (*release)(struct scst_tgt *tgt);
 
 	int (*xmit_response)(struct scst_cmd *cmd);
-	int (* rdy_to_xfer)(struct scst_cmd *cmd);
+	int (*rdy_to_xfer)(struct scst_cmd *cmd);
 
 	void (*on_hw_pending_cmd_timeout) (struct scst_cmd *cmd);
 
@@ -230,13 +229,6 @@
 It is the target driver's duty to ensure that not more, than that number
 of threads, are blocked in those functions at any time.
 
-<item><bf/int (*detect) (struct scst_tgt_template *tgt_template)/ - this
-function is intended to detect the target adapters that are present in
-the system. Each found adapter should be registered by calling
-<it/scst_register_target()/. The function should return a value >= 0 to
-signify the number of detected target adapters. A negative value should
-be returned whenever there is an error. Must be defined.
-
 <item><bf/int (*release)(struct scst_tgt *tgt)/ - this function is
 intended to free up resources allocated to the device. The function
 should return 0 to indicate successful release or a negative value if

diff --git a/scst/doc/scst_user_spec.sgml b/scst/doc/scst_user_spec.sgml
index f3d8679..e6ba972 100644
--- a/scst/doc/scst_user_spec.sgml
+++ b/scst/doc/scst_user_spec.sgml

@@ -10,7 +10,7 @@
 	<name>Vladislav Bolkhovitin</name>
 </author>
 
-<date>Version 3.5.0</date>
+<date>Version 3.7.0</date>
 
 <toc>
 

diff --git a/scst/fcst/Kbuild b/scst/fcst/Kbuild
index 1954f60..be21875 100644
--- a/scst/fcst/Kbuild
+++ b/scst/fcst/Kbuild

@@ -1,5 +1,5 @@
 KBUILD_EXTRA_SYMBOLS=$(src)/../scst/src/Module.symvers
-ccflags-y += -I$(src)/../scst/include
+ccflags-y += -I$(src)/../scst/include $(shell if [ -e include/scsi ]; then header_dir=.; else header_dir=$$(sed -n 's/^include[[:blank:]]\+\(.*\)\/Makefile$$/\1/p;s/^MAKEARGS := -C \([^ ]*\) .*/\1/p' Makefile); fi; if false; then echo "header_dir=$${header_dir}" >&2; fi; grep -qw fc_fill_fc_hdr "$${header_dir}/include/scsi/fc_encode.h" 2>/dev/null && echo -DFC_FILL_FC_HDR_IN_SCSI_FC_ENCODE_H)
 
 obj-$(CONFIG_FCST) += fcst.o
 

diff --git a/scst/fcst/fcst.h b/scst/fcst/fcst.h
index 7191c0d..a29556c 100644
--- a/scst/fcst/fcst.h
+++ b/scst/fcst/fcst.h

@@ -27,10 +27,18 @@
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || \
 	defined(CONFIG_SUSE_KERNEL) && \
 	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+/*
+ * See also commit 768c72cc34a2 ("scsi: libfc: Replace ->exch_done callback
+ * with function call") # v4.10
+ * and commit 9625cc483b8c ("scsi: libfc: Replace ->seq_release callback with
+ * function call") # v4.10
+ * and commit c6865b30be7e ("scsi: libfc: Replace ->seq_start_next callback
+ * with function call") # v4.10.
+ */
 #define NEW_LIBFC_API
 #endif
 
-#define FT_VERSION	"3.5.0"
+#define FT_VERSION	"3.7.0"
 #define FT_MODULE	"fcst"
 
 #define FT_MAX_HW_PENDING_TIME	20	/* max I/O time in seconds */

diff --git a/scst/fcst/ft_cmd.c b/scst/fcst/ft_cmd.c
index e14e534..3519a0a 100644
--- a/scst/fcst/ft_cmd.c
+++ b/scst/fcst/ft_cmd.c

@@ -14,10 +14,21 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+
+#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <scsi/libfc.h>
+/*
+ * See also upstream commit e31ac898ac29 ("scsi: libfc: Move scsi/fc_encode.h
+ * to libfc"). That commit moved fc_fill_fc_hdr() from <scsi/fc_encode.h> into
+ * <scsi/fc_frame.h>.
+ */
+#if defined(FC_FILL_FC_HDR_IN_SCSI_FC_ENCODE_H)
 #include <scsi/fc_encode.h>
+#else
+#include <scsi/fc_frame.h>
+#endif
 #include "fcst.h"
 
 /*
@@ -118,14 +129,12 @@
 		lport->tt.exch_done(sp);
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
 	if (fr_seq(fp))
 #ifdef NEW_LIBFC_API
 		fc_seq_release(fr_seq(fp));
 #else
 		lport->tt.seq_release(fr_seq(fp));
 #endif
-#endif
 
 	fc_frame_free(fp);
 	kfree(fcmd);
@@ -394,9 +403,6 @@
 	struct fcp_resp_with_ext *fcp;
 	struct fcp_resp_rsp_info *info;
 	struct fc_lport *lport;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-	struct fc_exch *ep;
-#endif
 
 	fh = fc_frame_header_get(rx_fp);
 	FT_IO_DBG("FCP error response: did %x oxid %x status %x code %x\n",
@@ -407,11 +413,8 @@
 		len += sizeof(*info);
 	fp = fc_frame_alloc(lport, len);
 	if (!fp)
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-		goto out;
-#else
 		return;
-#endif
+
 	fcp = fc_frame_payload_get(fp, len);
 	memset(fcp, 0, len);
 	fcp->resp.fr_status = status;
@@ -422,17 +425,6 @@
 		info->rsp_code = code;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-	sp = fr_seq(rx_fp);
-	sp = lport->tt.seq_start_next(sp);
-	ep = fc_seq_exch(sp);
-	fc_fill_fc_hdr(fp, FC_RCTL_DD_CMD_STATUS, ep->did, ep->sid, FC_TYPE_FCP,
-		       FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ, 0);
-
-	lport->tt.seq_send(lport, sp, fp);
-out:
-	;
-#else
 	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_DD_CMD_STATUS, 0);
 	sp = fr_seq(fp);
 	if (sp)
@@ -443,7 +435,6 @@
 #endif
 	else
 		lport->tt.frame_send(lport, fp);
-#endif
 }
 
 /*
@@ -502,13 +493,7 @@
 
 	scst_rx_mgmt_params_init(&params);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) || \
-	defined(CONFIG_SUSE_KERNEL) && \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 101)
 	params.lun = fcp->fc_lun.scsi_lun;
-#else
-	params.lun = fcp->fc_lun;
-#endif
 	params.lun_len = sizeof(fcp->fc_lun);
 	params.lun_set = 1;
 	params.atomic = SCST_ATOMIC;
@@ -564,9 +549,6 @@
 
 	lport = sess->tport->lport;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-	sp = fr_seq(fp);
-#else
 #ifdef NEW_LIBFC_API
 	sp = fc_seq_assign(lport, fp);
 #else
@@ -574,7 +556,6 @@
 #endif
 	if (!sp)
 		goto busy;
-#endif
 
 	fcmd = kzalloc(sizeof(*fcmd), GFP_ATOMIC);
 	if (!fcmd)
@@ -603,16 +584,9 @@
 	cdb_len += sizeof(fcp->fc_cdb);
 	data_len = ntohl(*(__be32 *)(fcp->fc_cdb + cdb_len));
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) || \
-	defined(CONFIG_SUSE_KERNEL) && \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 101)
 	cmd = scst_rx_cmd(sess->scst_sess, fcp->fc_lun.scsi_lun,
 			  sizeof(fcp->fc_lun), fcp->fc_cdb, cdb_len,
 			  SCST_ATOMIC);
-#else
-	cmd = scst_rx_cmd(sess->scst_sess, fcp->fc_lun, sizeof(fcp->fc_lun),
-			  fcp->fc_cdb, cdb_len, SCST_ATOMIC);
-#endif
 	if (!cmd)
 		goto busy;
 	fcmd->scst_cmd = cmd;
@@ -686,30 +660,6 @@
 static void ft_cmd_ls_rjt(struct fc_frame *rx_fp, enum fc_els_rjt_reason reason,
 			  enum fc_els_rjt_explan explan)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-	struct fc_seq *sp = fr_seq(rx_fp);
-	struct fc_frame *fp;
-	struct fc_els_ls_rjt *rjt;
-	struct fc_lport *lport;
-	struct fc_exch *ep;
-
-	ep = fc_seq_exch(sp);
-	lport = ep->lp;
-	fp = fc_frame_alloc(lport, sizeof(*rjt));
-	if (!fp)
-		return;
-
-	rjt = fc_frame_payload_get(fp, sizeof(*rjt));
-	memset(rjt, 0, sizeof(*rjt));
-	rjt->er_cmd = ELS_LS_RJT;
-	rjt->er_reason = reason;
-	rjt->er_explan = explan;
-
-	sp = lport->tt.seq_start_next(sp);
-	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid, FC_TYPE_FCP,
-		       FC_FC_EX_CTX | FC_FC_END_SEQ | FC_FC_LAST_SEQ, 0);
-	lport->tt.seq_send(lport, sp, fp);
-#else
 	struct fc_seq_els_data rjt_data;
 
 	rjt_data.reason = reason;
@@ -719,7 +669,6 @@
 #else
 	fr_dev(rx_fp)->tt.seq_els_rsp_send(rx_fp, ELS_LS_RJT, &rjt_data);
 #endif
-#endif
 }
 
 /*
@@ -758,9 +707,6 @@
 	default:
 		pr_info("%s: unhandled frame r_ctl %x\n", __func__,
 			fh->fh_r_ctl);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-		sess->tport->lport->tt.exch_done(fr_seq(fp));
-#endif
 		fc_frame_free(fp);
 		break;
 	}

diff --git a/scst/fcst/ft_io.c b/scst/fcst/ft_io.c
index 1ab44f4..face322 100644
--- a/scst/fcst/ft_io.c
+++ b/scst/fcst/ft_io.c

@@ -19,10 +19,21 @@
  * You should have received a copy of the GNU General Public License along with
  * this program.
  */
+
+#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <scsi/libfc.h>
+/*
+ * See also upstream commit e31ac898ac29 ("scsi: libfc: Move scsi/fc_encode.h
+ * to libfc"). That commit moved fc_fill_fc_hdr() from <scsi/fc_encode.h> into
+ * <scsi/fc_frame.h>.
+ */
+#if defined(FC_FILL_FC_HDR_IN_SCSI_FC_ENCODE_H)
 #include <scsi/fc_encode.h>
+#else
+#include <scsi/fc_frame.h>
+#endif
 #include "fcst.h"
 
 /*

diff --git a/scst/fcst/ft_sess.c b/scst/fcst/ft_sess.c
index ef5991e..5deafb7 100644
--- a/scst/fcst/ft_sess.c
+++ b/scst/fcst/ft_sess.c

@@ -81,18 +81,6 @@
 	return tport;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-/*
- * Free tport via RCU.
- */
-static void ft_tport_rcu_free(struct rcu_head *rcu)
-{
-	struct ft_tport *tport = container_of(rcu, struct ft_tport, rcu);
-
-	kfree(tport);
-}
-#endif
-
 /*
  * Delete target local port, if any, associated with the local port.
  * Caller holds ft_lport_lock.
@@ -111,11 +99,7 @@
 	rcu_assign_pointer(*(void __force __rcu **)&lport->prov[FC_TYPE_FCP],
 			   NULL);
 	tport->lport = NULL;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
 	kfree_rcu(tport, rcu);
-#else
-	call_rcu(&tport->rcu, ft_tport_rcu_free);
-#endif
 }
 
 /*
@@ -180,9 +164,6 @@
 {
 	struct ft_tport *tport;
 	struct hlist_head *head;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	struct hlist_node *pos;
-#endif
 	struct ft_sess *sess;
 
 	rcu_read_lock();
@@ -192,11 +173,7 @@
 		goto out;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
-#else
 	hlist_for_each_entry_rcu(sess, head, hash) {
-#endif
 		if (sess->port_id == port_id) {
 			if (!kref_get_unless_zero(&sess->kref))
 				sess = NULL;
@@ -221,9 +198,6 @@
 	struct ft_sess *sess;
 	struct scst_session *scst_sess;
 	struct hlist_head *head;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	struct hlist_node *pos;
-#endif
 	u32 port_id;
 	char name[FT_NAMELEN];
 
@@ -234,11 +208,7 @@
 	}
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
-#else
 	hlist_for_each_entry_rcu(sess, head, hash) {
-#endif
 		if (sess->port_id == port_id) {
 			sess->params = fcp_parm;
 			return 0;
@@ -299,17 +269,10 @@
 static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id)
 {
 	struct hlist_head *head;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	struct hlist_node *pos;
-#endif
 	struct ft_sess *sess;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
-#else
 	hlist_for_each_entry_rcu(sess, head, hash) {
-#endif
 		if (sess->port_id == port_id) {
 			ft_sess_unhash(sess);
 			return sess;
@@ -348,7 +311,9 @@
 		u8	__resv1[7];
 		__be64	port_name;	/* N_Port Name */
 		u8	__resv2[8];
-	} __attribute__((__packed__)) *id;
+	} *id;
+
+	BUILD_BUG_ON(sizeof(*id) != 24);
 
 	if (!scst_sess)
 		return SCSI_TRANSPORTID_PROTOCOLID_FCP2;
@@ -432,7 +397,7 @@
 }
 
 /**
- * tcm_fcp_prli() - Handle incoming or outgoing PRLI for the FCP target
+ * ft_prli() - Handle incoming or outgoing PRLI for the FCP target
  * @rdata: remote port private
  * @spp_len: service parameter page length
  * @rspp: received service parameter page (NULL for outgoing PRLI)
@@ -502,25 +467,12 @@
 	rdata->prli_count--;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) && !defined(RHEL_MAJOR)
-static inline u32 fc_frame_sid(const struct fc_frame *fp)
-{
-	return ntoh24(fc_frame_header_get(fp)->fh_s_id);
-}
-#endif
-
 /*
  * Handle incoming FCP request.
  * Caller has verified that the frame is type FCP.
  * Note that this may be called directly from the softirq context.
  */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) \
-	&& (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 <= 5)
-static void ft_recv(struct fc_lport *lport, struct fc_seq *sp,
-		    struct fc_frame *fp)
-#else
 static void ft_recv(struct fc_lport *lport, struct fc_frame *fp)
-#endif
 {
 	struct ft_sess *sess;
 	u32 sid = fc_frame_sid(fp);
@@ -530,10 +482,6 @@
 	sess = ft_sess_get(lport, sid);
 	if (!sess) {
 		FT_SESS_DBG("sid %x sess lookup failed\n", sid);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) \
-	&& (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 <= 5)
-		lport->tt.exch_done(sp);
-#endif
 		/* TBD XXX - if FCP_CMND, send LOGO */
 		fc_frame_free(fp);
 		return;
@@ -553,9 +501,6 @@
 {
 	struct ft_tport *tport;
 	struct hlist_head *head;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	struct hlist_node *pos;
-#endif
 	struct ft_sess *sess;
 
 	tport = scst_tgt_get_tgt_priv(tgt);
@@ -563,11 +508,7 @@
 	tport->lport->service_params &= ~FCP_SPPF_TARG_FCN;
 
 	for (head = tport->hash; head < &tport->hash[FT_SESS_HASH_SIZE]; head++)
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-		hlist_for_each_entry_rcu(sess, pos, head, hash)
-#else
 		hlist_for_each_entry_rcu(sess, head, hash)
-#endif
 			ft_sess_close(sess);
 
 	synchronize_rcu();

diff --git a/scst/iscsi-scst/Makefile b/scst/iscsi-scst/Makefile
index 73befbe..557088e 100644
--- a/scst/iscsi-scst/Makefile
+++ b/scst/iscsi-scst/Makefile

@@ -168,7 +168,6 @@
 	  $(shell [ -n "$(PASS_CC_TO_MAKE)" ] && echo CC="$(CC)")	\
 	  $$([ -n "$(DEPMOD)" ] && echo "DEPMOD=$(DEPMOD)")		\
 	  CONFIG_MODULE_SIG_ALL= modules_install
-		chmod u+x $(INSTALL_DIR)/*.ko
 	echo "$@:  INFINIBAND_ENABLED = $(INFINIBAND_ENABLED)"
 	if $(INFINIBAND_ENABLED); then					\
 	  (cd $(ISERTMOD) && KDIR=$(KDIR) ../../../scripts/sign-modules);\
@@ -215,12 +214,6 @@
 	echo "$(call run_conftest,cm_event_mod,				\
 		-DCM_HANDLER_EVENT_MODIFIER=const,-DCM_HANDLER_EVENT_MODIFIER=)" >"$@"
 
-conftest/cm_listen/result-$(KVER).txt:					\
-	conftest/cm_listen/cm_listen.c					\
-	conftest/cm_listen/Kbuild
-	echo "$(call run_conftest,cm_listen,				\
-		-DIB_CM_LISTEN_TAKES_FOURTH_ARG)" >"$@"
-
 conftest/create_cq/result-$(KVER).txt:					\
 	conftest/create_cq/create_cq.c					\
 	conftest/create_cq/Kbuild

diff --git a/scst/iscsi-scst/README b/scst/iscsi-scst/README
index 687d52b..14f323b 100644
--- a/scst/iscsi-scst/README
+++ b/scst/iscsi-scst/README

@@ -1,7 +1,7 @@
 iSCSI SCST target driver
 ========================
 
-Version 3.5.0, 21 December 2020
+Version 3.7.0, 26 December 2022
 ----------------------------
 
 ISCSI-SCST is a deeply reworked fork of iSCSI Enterprise Target (IET)
@@ -183,7 +183,7 @@
 
  - allowed_portal[num] - optional attribute, which specifies, on which
    portals (target's IP addresses) this target will be available. If not
-   specified (default) the target will be available on all all portals.
+   specified (default) the target will be available on all portals.
    As soon as at least one allowed_portal specified, the target will be
    accessible for initiators only on the specified portals. There might
    be any number of the allowed_portal attributes. The portals
@@ -738,7 +738,7 @@
 
 will disable all portals.
 
-2. If you want to want to allow only only specific set of initiators be
+2. If you want to want to allow only specific set of initiators be
 able to connect to your target, you should don't add any default LUNs
 for the target and create for allowed initiators a security group to
 which they will be assigned.
@@ -852,7 +852,7 @@
 the same speed as via any single port. Thus, using such adapters in MPIO
 configuration can't improve performance. To allow MPIO to have double
 performance you should either use separate network adapters, or find a
-dual-port adapter capable to to transfer data simultaneously on both
+dual-port adapter capable to transfer data simultaneously on both
 ports. You can check it by running 2 iperf's through both ports in
 parallel.
 

diff --git a/scst/iscsi-scst/README_in-tree b/scst/iscsi-scst/README_in-tree
index 50a6bb1..0413184 100644
--- a/scst/iscsi-scst/README_in-tree
+++ b/scst/iscsi-scst/README_in-tree

@@ -66,7 +66,7 @@
 
  - allowed_portal[num] - optional attribute, which specifies, on which
    portals (target's IP addresses) this target will be available. If not
-   specified (default) the target will be available on all all portals.
+   specified (default) the target will be available on all portals.
    As soon as at least one allowed_portal specified, the target will be
    accessible for initiators only on the specified portals. There might
    be any number of the allowed_portal attributes. The portals
@@ -582,7 +582,7 @@
 
 will disable all portals.
 
-2. If you want to want to allow only only specific set of initiators be
+2. If you want to want to allow only specific set of initiators be
 able to connect to your target, you should don't add any default LUNs
 for the target and create for allowed initiators a security group to
 which they will be assigned.
@@ -694,7 +694,7 @@
 the same speed as via any single port. Thus, using such adapters in MPIO
 configuration can't improve performance. To allow MPIO to have double
 performance you should either use separate network adapters, or find a
-dual-port adapter capable to to transfer data simultaneously on both
+dual-port adapter capable to transfer data simultaneously on both
 ports. You can check it by running 2 iperf's through both ports in
 parallel.
 

diff --git a/scst/iscsi-scst/conftest/cm_listen/Kbuild b/scst/iscsi-scst/conftest/cm_listen/Kbuild
deleted file mode 100644
index 50520e3..0000000
--- a/scst/iscsi-scst/conftest/cm_listen/Kbuild
+++ /dev/null

@@ -1,3 +0,0 @@
-LINUXINCLUDE := $(CONFTEST_CFLAGS) $(LINUXINCLUDE)
-
-obj-m += cm_listen.o

diff --git a/scst/iscsi-scst/conftest/cm_listen/cm_listen.c b/scst/iscsi-scst/conftest/cm_listen/cm_listen.c
deleted file mode 100644
index 4f871ef..0000000
--- a/scst/iscsi-scst/conftest/cm_listen/cm_listen.c
+++ /dev/null

@@ -1,11 +0,0 @@
-#include <linux/module.h>
-#include <rdma/ib_cm.h>
-
-static int __init modinit(void)
-{
-	return ib_cm_listen(NULL, 0, 0, NULL);
-}
-
-module_init(modinit);
-
-MODULE_LICENSE("GPL");

diff --git a/scst/iscsi-scst/doc/manpages/iscsi-scstd.8 b/scst/iscsi-scst/doc/manpages/iscsi-scstd.8
index 6167a65..895db2a 100644
--- a/scst/iscsi-scst/doc/manpages/iscsi-scstd.8
+++ b/scst/iscsi-scst/doc/manpages/iscsi-scstd.8

@@ -42,8 +42,8 @@
 .BI \-g\  GID ,\ \-\-gid= GID
 Specify running group id, default is current gid.
 .TP
-.BI \-a\  address ,\ \-\-address= address
-Specify on which local address the server should listen, default is any.
+.BI \-a\  address\ ... ,\ \-\-address= address\ ...
+Specify on which space-separated list of local addresses the server should listen, default is any.
 .TP
 .BI \-p\  port ,\ \-\-port= port
 Specify on which port the server should listen, default is 3260.

diff --git a/scst/iscsi-scst/include/iscsi_scst.h b/scst/iscsi-scst/include/iscsi_scst.h
index d6b7acd..985621c 100644
--- a/scst/iscsi-scst/include/iscsi_scst.h
+++ b/scst/iscsi-scst/include/iscsi_scst.h

@@ -20,6 +20,12 @@
 #include <linux/uaccess.h>  /* mm_segment_t */
 #include <linux/version.h>
 
+#ifdef INSIDE_KERNEL_TREE
+#include <scst/backport.h>
+#else
+#include "backport.h"
+#endif
+
 /* <asm/uaccess.h> */
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
@@ -32,6 +38,21 @@
  * also https://lwn.net/Articles/832121/. The definitions below make it easy
  * to write kernel code that is compatible with all kernel versions.
  */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) ||		\
+	(defined(RHEL_RELEASE_CODE) &&				\
+	 RHEL_RELEASE_CODE -0 >= RHEL_RELEASE_VERSION(9, 2))
+/*
+ * Backport mm_segment_t to save compatibility with older kernel versions.
+ *
+ * mm_segment_t was dropped in commit
+ * 967747bbc084 ("uaccess: remove CONFIG_SET_FS") # v5.18
+ */
+typedef struct {
+	/* empty dummy */
+} mm_segment_t;
+#endif
+
 #define KERNEL_DS ((mm_segment_t) { })
 static inline mm_segment_t get_fs(void) { return ((mm_segment_t) { }); }
 static inline void set_fs(mm_segment_t seg) { }
@@ -180,7 +201,6 @@
 	u32 cid;
 	u32 code;
 	u32 cookie;
-	char target_name[ISCSI_NAME_LEN];
 	u32 param1_size;
 	u32 param2_size;
 };

diff --git a/scst/iscsi-scst/include/iscsi_scst_ver.h b/scst/iscsi-scst/include/iscsi_scst_ver.h
index de8a336..da4d91f 100644
--- a/scst/iscsi-scst/include/iscsi_scst_ver.h
+++ b/scst/iscsi-scst/include/iscsi_scst_ver.h

@@ -17,4 +17,4 @@
 
 #define ISCSI_VERSION_STRING_SUFFIX
 
-#define ISCSI_VERSION_STRING	"3.5.0" ISCSI_VERSION_STRING_SUFFIX
+#define ISCSI_VERSION_STRING	"3.7.0" ISCSI_VERSION_STRING_SUFFIX

diff --git a/scst/iscsi-scst/kernel/config.c b/scst/iscsi-scst/kernel/config.c
index f3a5749..307775f 100644
--- a/scst/iscsi-scst/kernel/config.c
+++ b/scst/iscsi-scst/kernel/config.c

@@ -501,11 +501,9 @@
 	struct iscsi_attr *tgt_attr;
 	struct list_head *attrs_list;
 	const char *name;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	static struct lock_class_key __key;
 #endif
-#endif
 
 	TRACE_ENTRY();
 
@@ -551,14 +549,9 @@
 	list_add(&tgt_attr->attrs_list_entry, attrs_list);
 
 	tgt_attr->attr.attr.name = tgt_attr->name;
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 35)
-	tgt_attr->attr.attr.owner = THIS_MODULE;
-#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	tgt_attr->attr.attr.key = &__key;
 #endif
-#endif
 	tgt_attr->attr.attr.mode = attr_info->mode & (S_IRUGO | S_IWUGO);
 	tgt_attr->attr.show = iscsi_attr_show;
 	tgt_attr->attr.store = iscsi_attr_store;

diff --git a/scst/iscsi-scst/kernel/conn.c b/scst/iscsi-scst/kernel/conn.c
index a27ac09..031e6c3 100644
--- a/scst/iscsi-scst/kernel/conn.c
+++ b/scst/iscsi-scst/kernel/conn.c

@@ -26,13 +26,11 @@
 #include "iscsi.h"
 #include "digest.h"
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 #if defined(CONFIG_LOCKDEP) && !defined(CONFIG_SCST_PROC)
 static struct lock_class_key scst_conn_key;
 static struct lockdep_map scst_conn_dep_map =
 	STATIC_LOCKDEP_MAP_INIT("iscsi_conn_kref", &scst_conn_key);
 #endif
-#endif
 
 static int print_conn_state(char *p, size_t size, struct iscsi_conn *conn)
 {
@@ -132,19 +130,10 @@
 	switch (sk->sk_family) {
 	case AF_INET:
 		pos = scnprintf(buf, size,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-			 "%u.%u.%u.%u", NIPQUAD(inet_sk(sk)->saddr));
-#else
 			"%pI4", &inet_sk(sk)->inet_saddr);
-#endif
 		break;
 #ifdef CONFIG_IPV6
 	case AF_INET6:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-		pos = scnprintf(buf, size,
-			 "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]",
-			 NIP6(inet6_sk(sk)->saddr));
-#else
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
 		pos = scnprintf(buf, size, "[%pI6]", &inet6_sk(sk)->saddr);
 #else
@@ -152,7 +141,6 @@
 #endif
 #endif
 		break;
-#endif
 	default:
 		pos = scnprintf(buf, size, "Unknown family %d",
 			sk->sk_family);
@@ -580,18 +568,10 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void conn_nop_in_delayed_work_fn(void *p)
-#else
 static void conn_nop_in_delayed_work_fn(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct iscsi_conn *conn = p;
-#else
 	struct iscsi_conn *conn = container_of(work, struct iscsi_conn,
 					       nop_in_delayed_work.work);
-#endif
 	unsigned long next_timeout = 0;
 
 	TRACE_ENTRY();
@@ -889,13 +869,8 @@
 	conn->conn_thr_pool = session->sess_thr_pool;
 
 	conn->nop_in_ttt = 0;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
 	INIT_DELAYED_WORK(&conn->nop_in_delayed_work,
 			  conn_nop_in_delayed_work_fn);
-#else
-	INIT_WORK(&conn->nop_in_delayed_work, conn_nop_in_delayed_work_fn,
-		conn);
-#endif
 	conn->last_rcv_time = jiffies;
 	conn->data_rsp_timeout = session->tgt_params.rsp_timeout * HZ;
 	conn->nop_in_interval = session->tgt_params.nop_in_interval * HZ;

diff --git a/scst/iscsi-scst/kernel/digest.c b/scst/iscsi-scst/kernel/digest.c
index 5b87519..baca70b 100644
--- a/scst/iscsi-scst/kernel/digest.c
+++ b/scst/iscsi-scst/kernel/digest.c

@@ -39,12 +39,9 @@
 }
 
 /**
- * initialize support for digest calculation.
- *
- * digest_init -
+ * digest_init - initialize support for digest calculation.
  * @conn: ptr to connection to make use of digests
- *
- * @return: 0 on success, < 0 on error
+ * Returns: 0 on success, < 0 on error
  */
 int digest_init(struct iscsi_conn *conn)
 {
@@ -142,10 +139,10 @@
 	if (unlikely(crc != cmnd->hdigest)) {
 		PRINT_ERROR("%s", "RX header digest failed");
 		return -EIO;
-	} else {
-		TRACE_DBG("RX header digest OK for cmd %p", cmnd);
 	}
 
+	TRACE_DBG("RX header digest OK for cmd %p", cmnd);
+
 	return 0;
 }
 

diff --git a/scst/iscsi-scst/kernel/event.c b/scst/iscsi-scst/kernel/event.c
index f879191..f40234c 100644
--- a/scst/iscsi-scst/kernel/event.c
+++ b/scst/iscsi-scst/kernel/event.c

@@ -49,11 +49,7 @@
 {
 	u32 pid;
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0))
-	pid = NETLINK_CB(skb).pid;
-#else
 	pid = NETLINK_CB(skb).portid;
-#endif
 	WARN_ON(pid == 0);
 
 	iscsid_pid = pid;
@@ -61,11 +57,7 @@
 	return 0;
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24))
-static int event_recv_skb(struct sk_buff *skb)
-#else
 static void event_recv_skb(struct sk_buff *skb)
-#endif
 {
 	int err;
 	struct nlmsghdr	*nlh;
@@ -87,27 +79,9 @@
 	}
 
 out:
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24))
-	return 0;
-#else
 	return;
-#endif
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24))
-static void event_recv(struct sock *sk, int length)
-{
-	struct sk_buff *skb;
-
-	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
-		if (event_recv_skb(skb) && skb->len)
-			skb_queue_head(&sk->sk_receive_queue, skb);
-		else
-			kfree_skb(skb);
-	}
-}
-#endif
-
 /* event_mutex supposed to be held */
 static int __event_send(const void *buf, int buf_len)
 {
@@ -155,7 +129,7 @@
 {
 	int err;
 	static DEFINE_MUTEX(event_mutex);
-	struct iscsi_kern_event event;
+	struct iscsi_kern_event event = {};
 	int param1_size, param2_size;
 
 	param1_size = (param1 != NULL) ? strlen(param1) : 0;
@@ -196,30 +170,14 @@
 {
 	iscsi_net_ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22))
-	nl = netlink_kernel_create(NETLINK_ISCSI_SCST, 1, event_recv,
-		THIS_MODULE);
-#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24))
-	nl = netlink_kernel_create(NETLINK_ISCSI_SCST, 1, event_recv, NULL,
-				   THIS_MODULE);
-#elif (LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 0))
-	nl = netlink_kernel_create(iscsi_net_ns, NETLINK_ISCSI_SCST, 1,
-				   event_recv_skb, NULL, THIS_MODULE);
-#else
 	{
 		struct netlink_kernel_cfg cfg = {
 			.input = event_recv_skb,
 			.groups = 1,
 		};
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0))
-		nl = netlink_kernel_create(iscsi_net_ns, NETLINK_ISCSI_SCST,
-				   THIS_MODULE, &cfg);
-#else
 		nl = netlink_kernel_create(iscsi_net_ns, NETLINK_ISCSI_SCST,
 					   &cfg);
-#endif
 	}
-#endif
 	if (!nl)
 		goto drop_ns;
 
@@ -234,12 +192,7 @@
 
 void event_exit(void)
 {
-#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 24))
-	if (nl)
-		sock_release(nl->sk_socket);
-#else
 	netlink_kernel_release(nl);
-#endif
 	kobj_ns_drop(KOBJ_NS_TYPE_NET, iscsi_net_ns);
 	iscsi_net_ns = NULL;
 }

diff --git a/scst/iscsi-scst/kernel/iscsi.c b/scst/iscsi-scst/kernel/iscsi.c
index 198680d..bb367d1 100644
--- a/scst/iscsi-scst/kernel/iscsi.c
+++ b/scst/iscsi-scst/kernel/iscsi.c

@@ -2469,18 +2469,10 @@
 
 static mempool_t *iscsi_cmnd_abort_mempool;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void iscsi_cmnd_abort_fn(void *ctx)
-#else
 static void iscsi_cmnd_abort_fn(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct iscsi_cmnd_abort_params *params = ctx;
-#else
 	struct iscsi_cmnd_abort_params *params = container_of(work,
 		struct iscsi_cmnd_abort_params, iscsi_cmnd_abort_work);
-#endif
 	struct scst_cmd *scst_cmd = params->scst_cmd;
 	struct iscsi_session *session = scst_sess_get_tgt_priv(scst_cmd->sess);
 	struct iscsi_conn *conn;
@@ -2489,7 +2481,8 @@
 
 	TRACE_ENTRY();
 
-	TRACE(TRACE_MGMT,"Checking aborted scst_cmd %p (cmnd %p)", scst_cmd, cmnd);
+	TRACE_MGMT_DBG("Checking aborted scst_cmd %p (cmnd %p)", scst_cmd,
+		       cmnd);
 
 	mutex_lock(&session->target->target_mutex);
 
@@ -2539,11 +2532,7 @@
 	}
 
 	memset(params, 0, sizeof(*params));
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&params->iscsi_cmnd_abort_work, iscsi_cmnd_abort_fn, params);
-#else
 	INIT_WORK(&params->iscsi_cmnd_abort_work, iscsi_cmnd_abort_fn);
-#endif
 	params->scst_cmd = scst_cmd;
 
 	scst_cmd_get(scst_cmd);
@@ -3277,19 +3266,10 @@
 	switch (sk->sk_family) {
 	case AF_INET:
 		pos = scnprintf(buf, size,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-			 "%u.%u.%u.%u", NIPQUAD(inet_sk(sk)->daddr));
-#else
 			"%pI4", &inet_sk(sk)->inet_daddr);
-#endif
 		break;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-		pos = scnprintf(buf, size,
-			 "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]",
-			 NIP6(inet6_sk(sk)->daddr));
-#else
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) && \
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7)
 		pos = scnprintf(buf, size, "[%pI6]", &inet6_sk(sk)->daddr);
@@ -3298,7 +3278,6 @@
 #endif
 #endif
 		break;
-#endif
 	default:
 		pos = scnprintf(buf, size, "Unknown family %d",
 			sk->sk_family);

diff --git a/scst/iscsi-scst/kernel/iscsi.h b/scst/iscsi-scst/kernel/iscsi.h
index 7a5895e..dfd61cf 100644
--- a/scst/iscsi-scst/kernel/iscsi.h
+++ b/scst/iscsi-scst/kernel/iscsi.h

@@ -130,11 +130,7 @@
 };
 
 #define ISCSI_HASH_ORDER	8
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
 #define	cmnd_hashfn(itt)	hash_32(itt, ISCSI_HASH_ORDER)
-#else
-#define	cmnd_hashfn(itt)	hash_long(itt, ISCSI_HASH_ORDER)
-#endif
 
 struct iscsi_session {
 	struct iscsi_target *target;
@@ -315,11 +311,7 @@
 	/* Doesn't need any protection */
 	u16 cid;
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
 	struct delayed_work nop_in_delayed_work;
-#else
-	struct work_struct nop_in_delayed_work;
-#endif
 	struct work_struct close_work;
 	unsigned int nop_in_interval; /* in jiffies */
 	unsigned int nop_in_timeout; /* in jiffies */

diff --git a/scst/iscsi-scst/kernel/isert-scst/iser.h b/scst/iscsi-scst/kernel/isert-scst/iser.h
index a638c29..c543c8b 100644
--- a/scst/iscsi-scst/kernel/isert-scst/iser.h
+++ b/scst/iscsi-scst/kernel/isert-scst/iser.h

@@ -66,6 +66,8 @@
 	/* protected by dev_list_mutex */
 	struct list_head	conn_list; /* head of conns list */
 	enum isert_portal_state	state;
+	struct work_struct      work;
+	struct workqueue_struct *reinit_id_wq;
 	int			refcnt;
 };
 
@@ -88,13 +90,13 @@
 };
 
 struct isert_device;
-struct isert_connection;
+struct isert_conn;
 
 struct isert_wr {
 	enum isert_wr_op	wr_op;
 	struct isert_buf	*buf;
 
-	struct isert_connection	*conn;
+	struct isert_conn       *conn;
 	struct isert_cmnd	*pdu;
 
 	struct isert_device	*isert_dev;
@@ -168,7 +170,7 @@
 #define ISERT_CONNECTION_CLOSE		5
 #define ISERT_IN_PORTAL_LIST		6
 
-struct isert_connection {
+struct isert_conn {
 	struct iscsi_conn	iscsi ____cacheline_aligned;
 
 	int			repost_threshold ____cacheline_aligned;
@@ -224,13 +226,16 @@
 	struct work_struct	close_work;
 	struct work_struct	drain_work;
 	struct work_struct	discon_work;
-	struct work_struct	free_work;
+	struct work_struct	release_work;
 	struct isert_wr		drain_wr_sq;
 	struct isert_wr		drain_wr_rq;
 	struct kref		kref;
 
 	struct isert_portal	*portal;
 	void			*priv_data; /* for connection tracking */
+
+	wait_queue_head_t       rem_wait;
+	atomic_t                dev_removed;
 };
 
 struct isert_device {
@@ -264,8 +269,8 @@
 	struct workqueue_struct	*conn_wq;
 };
 
-#define _ptr_to_u64(p)		(u64)(unsigned long)(p)
-#define _u64_to_ptr(v)		(void *)(unsigned long)(v)
+#define _ptr_to_u64(p)		((u64)(unsigned long)(p))
+#define _u64_to_ptr(v)		((void *)(unsigned long)(v))
 
 /* global iser scope */
 int isert_global_init(void);
@@ -286,33 +291,30 @@
 extern struct kmem_cache *isert_conn_cache;
 
 /* iser portal */
-struct isert_portal *isert_portal_create(void);
-int isert_portal_listen(struct isert_portal *portal,
-			struct sockaddr *sa,
-			size_t addr_len);
+struct isert_portal *isert_portal_create(struct sockaddr *sa, size_t addr_len);
 void isert_portal_release(struct isert_portal *portal);
 void isert_portal_list_release_all(void);
 struct isert_portal *isert_portal_start(struct sockaddr *sa, size_t addr_len);
 
 /* iser connection */
-int isert_post_recv(struct isert_connection *isert_conn,
+int isert_post_recv(struct isert_conn *isert_conn,
 		    struct isert_wr *first_wr, int num_wr);
-int isert_post_send(struct isert_connection *isert_conn,
+int isert_post_send(struct isert_conn *isert_conn,
 		    struct isert_wr *first_wr, int num_wr);
 
-int isert_alloc_conn_resources(struct isert_connection *isert_conn);
-void isert_free_conn_resources(struct isert_connection *isert_conn);
-void isert_conn_free(struct isert_connection *isert_conn);
-void isert_conn_disconnect(struct isert_connection *isert_conn);
-void isert_post_drain(struct isert_connection *isert_conn);
-void isert_sched_conn_free(struct isert_connection *isert_conn);
+int isert_alloc_conn_resources(struct isert_conn *isert_conn);
+void isert_free_conn_resources(struct isert_conn *isert_conn);
+void isert_put_conn(struct isert_conn *isert_conn);
+void isert_conn_disconnect(struct isert_conn *isert_conn);
+void isert_post_drain(struct isert_conn *isert_conn);
+void isert_sched_conn_free(struct isert_conn *isert_conn);
 
-static inline struct isert_connection *isert_conn_zalloc(void)
+static inline struct isert_conn *isert_conn_zalloc(void)
 {
 	return kmem_cache_zalloc(isert_conn_cache, GFP_KERNEL);
 }
 
-static inline void isert_conn_kfree(struct isert_connection *isert_conn)
+static inline void isert_conn_kfree(struct isert_conn *isert_conn)
 {
 	kmem_cache_free(isert_conn_cache, isert_conn);
 }
@@ -322,12 +324,12 @@
 			     struct isert_buf *isert_buf, size_t size,
 			     enum dma_data_direction dma_dir);
 void isert_wr_set_fields(struct isert_wr *wr,
-			 struct isert_connection *isert_conn,
+			 struct isert_conn *isert_conn,
 			 struct isert_cmnd *pdu);
 int isert_wr_init(struct isert_wr *wr,
 		  enum isert_wr_op wr_op,
 		  struct isert_buf *isert_buf,
-		  struct isert_connection *isert_conn,
+		  struct isert_conn *isert_conn,
 		  struct isert_cmnd *pdu,
 		  struct ib_sge *sge,
 		  int sg_offset,
@@ -357,23 +359,23 @@
 	kmem_cache_free(isert_cmnd_cache, cmnd);
 }
 
-struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn,
+struct isert_cmnd *isert_rx_pdu_alloc(struct isert_conn *isert_conn,
 				      size_t size);
-struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn,
+struct isert_cmnd *isert_tx_pdu_alloc(struct isert_conn *isert_conn,
 				      size_t size);
 void isert_tx_pdu_init(struct isert_cmnd *isert_pdu,
-		       struct isert_connection *isert_conn);
-int isert_pdu_send(struct isert_connection *isert_conn,
+		       struct isert_conn *isert_conn);
+int isert_pdu_send(struct isert_conn *isert_conn,
 		   struct isert_cmnd *tx_pdu);
 
 int isert_prepare_rdma(struct isert_cmnd *isert_pdu,
-		       struct isert_connection *isert_conn,
+		       struct isert_conn *isert_conn,
 		       enum isert_wr_op op);
-int isert_pdu_post_rdma_write(struct isert_connection *isert_conn,
+int isert_pdu_post_rdma_write(struct isert_conn *isert_conn,
 			      struct isert_cmnd *isert_cmd,
 			      struct isert_cmnd *isert_rsp,
 			      int wr_cnt);
-int isert_pdu_post_rdma_read(struct isert_connection *isert_conn,
+int isert_pdu_post_rdma_read(struct isert_conn *isert_conn,
 			     struct isert_cmnd *isert_cmd,
 			     int wr_cnt);
 

diff --git a/scst/iscsi-scst/kernel/isert-scst/iser_buf.c b/scst/iscsi-scst/kernel/isert-scst/iser_buf.c
index acbdee4..2112758 100644
--- a/scst/iscsi-scst/kernel/isert-scst/iser_buf.c
+++ b/scst/iscsi-scst/kernel/isert-scst/iser_buf.c

@@ -197,7 +197,7 @@
 }
 
 void isert_wr_set_fields(struct isert_wr *wr,
-			 struct isert_connection *isert_conn,
+			 struct isert_conn *isert_conn,
 			 struct isert_cmnd *pdu)
 {
 	struct isert_device *isert_dev = isert_conn->isert_dev;
@@ -210,7 +210,7 @@
 int isert_wr_init(struct isert_wr *wr,
 		  enum isert_wr_op wr_op,
 		  struct isert_buf *isert_buf,
-		  struct isert_connection *isert_conn,
+		  struct isert_conn *isert_conn,
 		  struct isert_cmnd *pdu,
 		  struct ib_sge *sge,
 		  int sg_offset,

diff --git a/scst/iscsi-scst/kernel/isert-scst/iser_datamover.c b/scst/iscsi-scst/kernel/isert-scst/iser_datamover.c
index 42dd2c0..d8241c0 100644
--- a/scst/iscsi-scst/kernel/isert-scst/iser_datamover.c
+++ b/scst/iscsi-scst/kernel/isert-scst/iser_datamover.c

@@ -63,8 +63,8 @@
 			size_t *addr_len)
 {
 	int ret;
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 	struct sockaddr *peer_sa = (struct sockaddr *)&isert_conn->peer_addr;
 
 	ret = isert_get_addr_size(peer_sa, addr_len);
@@ -80,8 +80,8 @@
 			  size_t *addr_len)
 {
 	int ret;
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 	struct sockaddr *self_sa = (struct sockaddr *)&isert_conn->self_addr;
 
 	ret = isert_get_addr_size(self_sa, addr_len);
@@ -106,17 +106,17 @@
 
 void isert_free_connection(struct iscsi_conn *iscsi_conn)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 
 	isert_post_drain(isert_conn);
-	isert_conn_free(isert_conn);
+	isert_put_conn(isert_conn);
 }
 
 struct iscsi_cmnd *isert_alloc_login_rsp_pdu(struct iscsi_conn *iscsi_conn)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 	struct isert_cmnd *isert_pdu = isert_conn->login_rsp_pdu;
 
 	isert_tx_pdu_init(isert_pdu, isert_conn);
@@ -126,8 +126,8 @@
 static struct iscsi_cmnd *isert_alloc_scsi_pdu(struct iscsi_conn *iscsi_conn,
 					       int fake)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 	struct isert_cmnd *isert_pdu;
 
 again:
@@ -159,8 +159,8 @@
 {
 	struct isert_cmnd *isert_pdu = container_of(iscsi_pdu,
 						    struct isert_cmnd, iscsi);
-	struct isert_connection *isert_conn = container_of(iscsi_pdu->conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_pdu->conn,
+						struct isert_conn, iscsi);
 
 	isert_tx_pdu_init_iscsi(isert_pdu);
 
@@ -180,8 +180,8 @@
 /* if last transition into FF (Fully Featured) state */
 int isert_login_rsp_tx(struct iscsi_cmnd *login_rsp, int last, int discovery)
 {
-	struct isert_connection *isert_conn = container_of(login_rsp->conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(login_rsp->conn,
+						struct isert_conn, iscsi);
 	int err;
 
 	if (last && !discovery) {
@@ -210,8 +210,8 @@
 			     struct iscsi_sess_params *sess_params,
 			     struct iscsi_tgt_params *tgt_params)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 
 	isert_conn->queue_depth = tgt_params->queued_cmnds;
 
@@ -228,8 +228,8 @@
 {
 	struct isert_cmnd *isert_cmnd = container_of(iscsi_cmnd,
 						    struct isert_cmnd, iscsi);
-	struct isert_connection *isert_conn = container_of(iscsi_cmnd->conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_cmnd->conn,
+						struct isert_conn, iscsi);
 	int err;
 
 	isert_tx_pdu_convert_from_iscsi(isert_cmnd, iscsi_cmnd);
@@ -242,8 +242,8 @@
 {
 	struct isert_cmnd *isert_cmnd = container_of(iscsi_cmnd,
 						    struct isert_cmnd, iscsi);
-	struct isert_connection *isert_conn = container_of(iscsi_cmnd->conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_cmnd->conn,
+						struct isert_conn, iscsi);
 	int ret;
 
 	ret = isert_prepare_rdma(isert_cmnd, isert_conn, ISER_WR_RDMA_READ);
@@ -260,8 +260,8 @@
 {
 	struct isert_cmnd *isert_cmnd = container_of(iscsi_cmnd,
 						    struct isert_cmnd, iscsi);
-	struct isert_connection *isert_conn = container_of(iscsi_cmnd->conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_cmnd->conn,
+						struct isert_conn, iscsi);
 	struct isert_cmnd *isert_rsp = container_of(iscsi_rsp,
 						    struct isert_cmnd, iscsi);
 	int ret;
@@ -278,8 +278,8 @@
 
 int isert_close_connection(struct iscsi_conn *iscsi_conn)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 
 	isert_conn_disconnect(isert_conn);
 
@@ -293,16 +293,16 @@
 
 void *isert_get_priv(struct iscsi_conn *iscsi_conn)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+						struct isert_conn, iscsi);
 
 	return isert_conn->priv_data;
 }
 
 void isert_set_priv(struct iscsi_conn *iscsi_conn, void *priv)
 {
-	struct isert_connection *isert_conn = container_of(iscsi_conn,
-					struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(iscsi_conn,
+					struct isert_conn, iscsi);
 
 	isert_conn->priv_data = priv;
 }

diff --git a/scst/iscsi-scst/kernel/isert-scst/iser_global.c b/scst/iscsi-scst/kernel/isert-scst/iser_global.c
index cee6398..e6e0ec2 100644
--- a/scst/iscsi-scst/kernel/isert-scst/iser_global.c
+++ b/scst/iscsi-scst/kernel/isert-scst/iser_global.c

@@ -138,30 +138,36 @@
 	spin_lock_init(&isert_glob.portal_lock);
 	init_waitqueue_head(&isert_glob.portal_wq);
 
-	isert_glob.conn_wq = create_workqueue("isert_conn_wq");
+	isert_glob.conn_wq = alloc_workqueue("isert_conn_wq", WQ_MEM_RECLAIM, 1);
 	if (!isert_glob.conn_wq) {
 		PRINT_ERROR("Failed to alloc iser conn work queue");
 		return -ENOMEM;
 	}
 
-	isert_cmnd_cache = KMEM_CACHE(isert_cmnd,
-				     SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN);
+	isert_cmnd_cache = KMEM_CACHE_USERCOPY(isert_cmnd,
+						SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN,
+						iscsi);
 	if (!isert_cmnd_cache) {
-		destroy_workqueue(isert_glob.conn_wq);
 		PRINT_ERROR("Failed to alloc iser command cache");
-		return -ENOMEM;
+		goto free_wq;
 	}
 
-	isert_conn_cache = KMEM_CACHE(isert_connection,
+	isert_conn_cache = KMEM_CACHE(isert_conn,
 				     SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN);
 	if (!isert_conn_cache) {
-		destroy_workqueue(isert_glob.conn_wq);
-		kmem_cache_destroy(isert_cmnd_cache);
 		PRINT_ERROR("Failed to alloc iser connection cache");
-		return -ENOMEM;
+		goto free_cmnd_cache;
 	}
 
 	return 0;
+
+free_cmnd_cache:
+	kmem_cache_destroy(isert_cmnd_cache);
+
+free_wq:
+	destroy_workqueue(isert_glob.conn_wq);
+
+	return -ENOMEM;
 }
 
 void isert_global_cleanup(void)
@@ -169,10 +175,8 @@
 	isert_portal_list_release_all();
 	if (isert_glob.conn_wq)
 		destroy_workqueue(isert_glob.conn_wq);
-	if (isert_cmnd_cache)
-		kmem_cache_destroy(isert_cmnd_cache);
-	if (isert_conn_cache)
-		kmem_cache_destroy(isert_conn_cache);
+	kmem_cache_destroy(isert_cmnd_cache);
+	kmem_cache_destroy(isert_conn_cache);
 }
 
 int isert_get_addr_size(struct sockaddr *sa, size_t *addr_len)

diff --git a/scst/iscsi-scst/kernel/isert-scst/iser_pdu.c b/scst/iscsi-scst/kernel/isert-scst/iser_pdu.c
index da2c1e0..b4b5b5e 100644
--- a/scst/iscsi-scst/kernel/isert-scst/iser_pdu.c
+++ b/scst/iscsi-scst/kernel/isert-scst/iser_pdu.c

@@ -43,7 +43,7 @@
 #include "iser_datamover.h"
 
 static inline int isert_pdu_rx_buf_init(struct isert_cmnd *isert_pdu,
-				 struct isert_connection *isert_conn)
+				 struct isert_conn *isert_conn)
 {
 	struct isert_buf *isert_buf = &isert_pdu->buf;
 
@@ -53,7 +53,7 @@
 }
 
 static inline int isert_pdu_tx_buf_init(struct isert_cmnd *isert_pdu,
-				 struct isert_connection *isert_conn)
+				 struct isert_conn *isert_conn)
 {
 	struct isert_buf *isert_buf = &isert_pdu->buf;
 
@@ -78,7 +78,7 @@
  * it should be parsed to setup isert_cmnd + iscsi_cmnd in full
  */
 static int isert_rx_pdu_init(struct isert_cmnd *isert_pdu,
-			     struct isert_connection *isert_conn)
+			     struct isert_conn *isert_conn)
 {
 	struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi;
 	int err = isert_pdu_rx_buf_init(isert_pdu, isert_conn);
@@ -105,7 +105,7 @@
  * of the iscsi pdu struct
  */
 void isert_tx_pdu_init(struct isert_cmnd *isert_pdu,
-		       struct isert_connection *isert_conn)
+		       struct isert_conn *isert_conn)
 {
 	struct iscsi_cmnd *iscsi_cmnd = &isert_pdu->iscsi;
 	struct isert_buf *isert_buf = &isert_pdu->buf;
@@ -142,7 +142,7 @@
 	return;
 }
 
-static inline int isert_pdu_prepare_send(struct isert_connection *isert_conn,
+static inline int isert_pdu_prepare_send(struct isert_conn *isert_conn,
 					  struct isert_cmnd *tx_pdu)
 {
 	struct isert_device *isert_dev = isert_conn->isert_dev;
@@ -168,7 +168,7 @@
 }
 
 static int isert_alloc_for_rdma(struct isert_cmnd *pdu, int sge_cnt,
-				struct isert_connection *isert_conn)
+				struct isert_conn *isert_conn)
 {
 	struct isert_wr *wr;
 	struct ib_sge *sg_pool;
@@ -234,7 +234,7 @@
 }
 
 int isert_prepare_rdma(struct isert_cmnd *isert_pdu,
-		       struct isert_connection *isert_conn,
+		       struct isert_conn *isert_conn,
 		       enum isert_wr_op op)
 {
 	struct isert_buf *isert_buf = &isert_pdu->rdma_buf;
@@ -323,7 +323,7 @@
 	isert_pdu_kfree(pdu);
 }
 
-struct isert_cmnd *isert_rx_pdu_alloc(struct isert_connection *isert_conn,
+struct isert_cmnd *isert_rx_pdu_alloc(struct isert_conn *isert_conn,
 				      size_t size)
 {
 	struct isert_cmnd *pdu = NULL;
@@ -371,7 +371,7 @@
 	return pdu;
 }
 
-struct isert_cmnd *isert_tx_pdu_alloc(struct isert_connection *isert_conn,
+struct isert_cmnd *isert_tx_pdu_alloc(struct isert_conn *isert_conn,
 				      size_t size)
 {
 	struct isert_cmnd *pdu = NULL;
@@ -437,7 +437,7 @@
 	isert_link_recv_wrs(&from_pdu->wr[0], &to_pdu->wr[0]);
 }
 
-int isert_alloc_conn_resources(struct isert_connection *isert_conn)
+int isert_alloc_conn_resources(struct isert_conn *isert_conn)
 {
 	struct isert_cmnd *pdu, *prev_pdu = NULL, *first_pdu = NULL;
 	/* RFC states that minimum receive data size is 512 */
@@ -496,8 +496,8 @@
 
 static int isert_reinit_rx_pdu(struct isert_cmnd *pdu)
 {
-	struct isert_connection *isert_conn = container_of(pdu->iscsi.conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(pdu->iscsi.conn,
+						struct isert_conn, iscsi);
 
 	pdu->is_rstag_valid = 0;
 	pdu->is_wstag_valid = 0;
@@ -510,8 +510,8 @@
 int isert_rx_pdu_done(struct isert_cmnd *pdu)
 {
 	int err;
-	struct isert_connection *isert_conn = container_of(pdu->iscsi.conn,
-						struct isert_connection, iscsi);
+	struct isert_conn *isert_conn = container_of(pdu->iscsi.conn,
+						struct isert_conn, iscsi);
 
 	TRACE_ENTRY();
 
@@ -539,7 +539,7 @@
 	return err;
 }
 
-void isert_free_conn_resources(struct isert_connection *isert_conn)
+void isert_free_conn_resources(struct isert_conn *isert_conn)
 {
 	struct isert_cmnd *pdu;
 
@@ -577,7 +577,7 @@
 	TRACE_EXIT();
 }
 
-int isert_pdu_send(struct isert_connection *isert_conn,
+int isert_pdu_send(struct isert_conn *isert_conn,
 		   struct isert_cmnd *tx_pdu)
 {
 	int err;
@@ -607,7 +607,7 @@
 	return err;
 }
 
-int isert_pdu_post_rdma_write(struct isert_connection *isert_conn,
+int isert_pdu_post_rdma_write(struct isert_conn *isert_conn,
 			      struct isert_cmnd *isert_cmd,
 			      struct isert_cmnd *isert_rsp,
 			      int wr_cnt)
@@ -634,7 +634,7 @@
 	return err;
 }
 
-int isert_pdu_post_rdma_read(struct isert_connection *isert_conn,
+int isert_pdu_post_rdma_read(struct isert_conn *isert_conn,
 			     struct isert_cmnd *isert_cmd, int wr_cnt)
 {
 	int err;

diff --git a/scst/iscsi-scst/kernel/isert-scst/iser_rdma.c b/scst/iscsi-scst/kernel/isert-scst/iser_rdma.c
index d4ac072..bad41bf 100644
--- a/scst/iscsi-scst/kernel/isert-scst/iser_rdma.c
+++ b/scst/iscsi-scst/kernel/isert-scst/iser_rdma.c

@@ -54,6 +54,8 @@
 static DEFINE_MUTEX(dev_list_mutex);
 
 static void isert_portal_free(struct isert_portal *portal);
+static struct rdma_cm_id *
+isert_setup_id(struct isert_portal *portal);
 
 static int isert_num_recv_posted_on_err(struct ib_recv_wr *first_ib_wr,
 					BAD_WR_MODIFIER struct ib_recv_wr *bad_wr)
@@ -67,7 +69,7 @@
 	return num_posted;
 }
 
-int isert_post_recv(struct isert_connection *isert_conn,
+int isert_post_recv(struct isert_conn *isert_conn,
 		    struct isert_wr *first_wr,
 		    int num_wr)
 {
@@ -110,7 +112,7 @@
 	return num_posted;
 }
 
-int isert_post_send(struct isert_connection *isert_conn,
+int isert_post_send(struct isert_conn *isert_conn,
 		    struct isert_wr *first_wr,
 		    int num_wr)
 {
@@ -145,7 +147,7 @@
 	return err;
 }
 
-static void isert_post_drain_sq(struct isert_connection *isert_conn)
+static void isert_post_drain_sq(struct isert_conn *isert_conn)
 {
 	BAD_WR_MODIFIER struct ib_send_wr *bad_wr;
 	struct isert_wr *drain_wr_sq = &isert_conn->drain_wr_sq;
@@ -177,7 +179,7 @@
 	}
 }
 
-static void isert_post_drain_rq(struct isert_connection *isert_conn)
+static void isert_post_drain_rq(struct isert_conn *isert_conn)
 {
 	BAD_WR_MODIFIER struct ib_recv_wr *bad_wr;
 	struct isert_wr *drain_wr_rq = &isert_conn->drain_wr_rq;
@@ -197,7 +199,7 @@
 	}
 }
 
-void isert_post_drain(struct isert_connection *isert_conn)
+void isert_post_drain(struct isert_conn *isert_conn)
 {
 	if (!test_and_set_bit(ISERT_DRAIN_POSTED, &isert_conn->flags)) {
 		mutex_lock(&isert_conn->state_mutex);
@@ -208,7 +210,7 @@
 	}
 }
 
-void isert_conn_disconnect(struct isert_connection *isert_conn)
+void isert_conn_disconnect(struct isert_conn *isert_conn)
 {
 	int err;
 
@@ -468,7 +470,7 @@
 static void isert_handle_wc(struct ib_wc *wc)
 {
 	struct isert_wr *wr = _u64_to_ptr(wc->wr_id);
-	struct isert_connection *isert_conn;
+	struct isert_conn *isert_conn;
 
 	TRACE_ENTRY();
 
@@ -578,122 +580,71 @@
 	}
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void isert_discon_do_work(void *ctx)
-#else
 static void isert_discon_do_work(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct isert_connection *isert_conn = ctx;
-#else
-	struct isert_connection *isert_conn =
-		container_of(work, struct isert_connection, discon_work);
-#endif
+	struct isert_conn *isert_conn =
+		container_of(work, struct isert_conn, discon_work);
 
 	/* notify upper layer */
 	isert_connection_closed(&isert_conn->iscsi);
 }
 
-static void isert_sched_discon(struct isert_connection *isert_conn)
+static void isert_sched_discon(struct isert_conn *isert_conn)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&isert_conn->discon_work, isert_discon_do_work, isert_conn);
-#else
 	INIT_WORK(&isert_conn->discon_work, isert_discon_do_work);
-#endif
 	isert_conn_queue_work(&isert_conn->discon_work);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void isert_conn_drained_do_work(void *ctx)
-#else
 static void isert_conn_drained_do_work(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct isert_connection *isert_conn = ctx;
-#else
-	struct isert_connection *isert_conn =
-		container_of(work, struct isert_connection, drain_work);
-#endif
+	struct isert_conn *isert_conn =
+		container_of(work, struct isert_conn, drain_work);
 
-	isert_conn_free(isert_conn);
+	isert_put_conn(isert_conn);
 }
 
-static void isert_sched_conn_drained(struct isert_connection *isert_conn)
+static void isert_sched_conn_drained(struct isert_conn *isert_conn)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&isert_conn->drain_work, isert_conn_drained_do_work,
-		  isert_conn);
-#else
 	INIT_WORK(&isert_conn->drain_work, isert_conn_drained_do_work);
-#endif
 	isert_conn_queue_work(&isert_conn->drain_work);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void isert_conn_closed_do_work(void *ctx)
-#else
 static void isert_conn_closed_do_work(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct isert_connection *isert_conn = ctx;
-#else
-	struct isert_connection *isert_conn =
-		container_of(work, struct isert_connection, close_work);
-#endif
+	struct isert_conn *isert_conn =
+		container_of(work, struct isert_conn, close_work);
 
 	if (!test_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags))
 		isert_connection_abort(&isert_conn->iscsi);
 
-	isert_conn_free(isert_conn);
+	isert_put_conn(isert_conn);
 }
 
-static void isert_sched_conn_closed(struct isert_connection *isert_conn)
+static void isert_sched_conn_closed(struct isert_conn *isert_conn)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&isert_conn->close_work, isert_conn_closed_do_work,
-		  isert_conn);
-#else
 	INIT_WORK(&isert_conn->close_work, isert_conn_closed_do_work);
-#endif
 	isert_conn_queue_work(&isert_conn->close_work);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void isert_conn_free_do_work(void *ctx)
-#else
-static void isert_conn_free_do_work(struct work_struct *work)
-#endif
+static void isert_release_work(struct work_struct *work)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct isert_connection *isert_conn = ctx;
-#else
-	struct isert_connection *isert_conn =
-		container_of(work, struct isert_connection, free_work);
-#endif
+	struct isert_conn *isert_conn =
+		container_of(work, struct isert_conn, release_work);
 
-	isert_conn_free(isert_conn);
+	isert_put_conn(isert_conn);
 }
 
-void isert_sched_conn_free(struct isert_connection *isert_conn)
+void isert_sched_conn_free(struct isert_conn *isert_conn)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&isert_conn->free_work, isert_conn_free_do_work,
-		  isert_conn);
-#else
-	INIT_WORK(&isert_conn->free_work, isert_conn_free_do_work);
-#endif
-	isert_conn_queue_work(&isert_conn->free_work);
+	INIT_WORK(&isert_conn->release_work, isert_release_work);
+	isert_conn_queue_work(&isert_conn->release_work);
 }
 
 static void isert_handle_wc_error(struct ib_wc *wc)
 {
 	struct isert_wr *wr = _u64_to_ptr(wc->wr_id);
 	struct isert_cmnd *isert_pdu = wr->pdu;
-	struct isert_connection *isert_conn = wr->conn;
+	struct isert_conn *isert_conn = wr->conn;
 	struct isert_buf *isert_buf = wr->buf;
 	struct isert_device *isert_dev = wr->isert_dev;
 	struct ib_device *ib_dev = isert_dev->ib_dev;
@@ -788,18 +739,10 @@
 }
 
 /* callback function for isert_dev->[cq]->cq_comp_work */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && \
-	!defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
-/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
-static void isert_cq_comp_work_cb(void *ctx)
-{
-	struct isert_cq *cq_desc = ctx;
-#else
 static void isert_cq_comp_work_cb(struct work_struct *work)
 {
 	struct isert_cq *cq_desc =
 		container_of(work, struct isert_cq, cq_comp_work);
-#endif
 	int ret;
 
 	TRACE_ENTRY();
@@ -828,12 +771,8 @@
 {
 	struct isert_cq *cq_desc = context;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	queue_work(cq_desc->cq_workqueue, &cq_desc->cq_comp_work);
-#else
 	queue_work_on(smp_processor_id(), cq_desc->cq_workqueue,
 		      &cq_desc->cq_comp_work);
-#endif
 }
 
 static const char *ib_event_type_str(enum ib_event_type ev_type)
@@ -887,7 +826,7 @@
 	struct ib_device *ib_dev = isert_dev->ib_dev;
 	char *dev_name = ib_dev->name;
 	enum ib_event_type ev_type = async_ev->event;
-	struct isert_connection *isert_conn;
+	struct isert_conn *isert_conn;
 
 	TRACE_ENTRY();
 
@@ -1037,26 +976,12 @@
 
 		cq_desc->dev = isert_dev;
 		cq_desc->idx = i;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL);
-#else
 		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb);
-#endif
 
 		snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-		cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name);
-#else
-#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36)
-		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
-							WQ_CPU_INTENSIVE|
-							WQ_RESCUER, 1);
-#else
 		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
 							WQ_CPU_INTENSIVE|
 							WQ_MEM_RECLAIM, 1);
-#endif
-#endif
 		if (unlikely(!cq_desc->cq_workqueue)) {
 			PRINT_ERROR("Failed to alloc iser cq work queue for dev:%s",
 				    ib_dev->name);
@@ -1159,15 +1084,7 @@
 	for (i = 0; i < isert_dev->num_cqs; ++i) {
 		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
-		/*
-		 * cancel_work_sync() was introduced in 2.6.22. We can
-		 * only wait until all scheduled work is done.
-		 */
-		flush_workqueue(cq_desc->cq_workqueue);
-#else
 		cancel_work_sync(&cq_desc->cq_comp_work);
-#endif
 
 		ib_destroy_cq(cq_desc->cq);
 		destroy_workqueue(cq_desc->cq_workqueue);
@@ -1206,7 +1123,7 @@
 	return min_idx;
 }
 
-static int isert_conn_qp_create(struct isert_connection *isert_conn)
+static int isert_conn_qp_create(struct isert_conn *isert_conn)
 {
 	struct rdma_cm_id *cm_id = isert_conn->cm_id;
 	struct isert_device *isert_dev = isert_conn->isert_dev;
@@ -1266,10 +1183,24 @@
 	goto out;
 }
 
-static struct isert_connection *isert_conn_create(struct rdma_cm_id *cm_id,
+static void
+isert_init_conn(struct isert_conn *isert_conn)
+{
+	isert_conn->state = ISER_CONN_INIT;
+	INIT_LIST_HEAD(&isert_conn->rx_buf_list);
+	INIT_LIST_HEAD(&isert_conn->tx_free_list);
+	INIT_LIST_HEAD(&isert_conn->tx_busy_list);
+	spin_lock_init(&isert_conn->tx_lock);
+	spin_lock_init(&isert_conn->post_recv_lock);
+	init_waitqueue_head(&isert_conn->rem_wait);
+	kref_init(&isert_conn->kref);
+	mutex_init(&isert_conn->state_mutex);
+}
+
+static struct isert_conn *isert_conn_create(struct rdma_cm_id *cm_id,
 						struct isert_device *isert_dev)
 {
-	struct isert_connection *isert_conn;
+	struct isert_conn *isert_conn;
 	int err;
 	struct isert_cq *cq;
 
@@ -1281,7 +1212,9 @@
 		err = -ENOMEM;
 		goto fail_alloc;
 	}
-	isert_conn->state = ISER_CONN_INIT;
+
+	isert_init_conn(isert_conn);
+
 	isert_conn->cm_id = cm_id;
 	isert_conn->isert_dev = isert_dev;
 
@@ -1307,12 +1240,6 @@
 		goto fail_login_req_pdu;
 	}
 
-	INIT_LIST_HEAD(&isert_conn->rx_buf_list);
-	INIT_LIST_HEAD(&isert_conn->tx_free_list);
-	INIT_LIST_HEAD(&isert_conn->tx_busy_list);
-	spin_lock_init(&isert_conn->tx_lock);
-	spin_lock_init(&isert_conn->post_recv_lock);
-
 	isert_conn->login_req_pdu = isert_rx_pdu_alloc(isert_conn,
 						       ISER_MAX_LOGIN_RDSL);
 	if (unlikely(!isert_conn->login_req_pdu)) {
@@ -1340,9 +1267,6 @@
 		goto fail_post_recv;
 	}
 
-	kref_init(&isert_conn->kref);
-	mutex_init(&isert_conn->state_mutex);
-
 	TRACE_EXIT();
 	return isert_conn;
 
@@ -1370,11 +1294,11 @@
 		isert_device_release(isert_dev);
 }
 
-static void isert_kref_free(struct kref *kref)
+static void isert_release_kref(struct kref *kref)
 {
 	struct isert_conn_dev *dev;
-	struct isert_connection *isert_conn =
-		container_of(kref, struct isert_connection, kref);
+	struct isert_conn *isert_conn =
+		container_of(kref, struct isert_conn, kref);
 	struct isert_device *isert_dev = isert_conn->isert_dev;
 	struct isert_cq *cq = isert_conn->qp->recv_cq->cq_context;
 
@@ -1384,8 +1308,11 @@
 
 	isert_free_conn_resources(isert_conn);
 
-	rdma_destroy_id(isert_conn->cm_id);
-	isert_conn->cm_id = NULL;
+	if (isert_conn->cm_id &&
+	    !atomic_read(&isert_conn->dev_removed)) {
+		rdma_destroy_id(isert_conn->cm_id);
+		isert_conn->cm_id = NULL;
+	}
 
 	dev = isert_get_priv(&isert_conn->iscsi);
 	if (dev) {
@@ -1408,30 +1335,35 @@
 		isert_portal_free(isert_conn->portal);
 	mutex_unlock(&dev_list_mutex);
 
-	isert_conn_kfree(isert_conn);
 
-	module_put(THIS_MODULE);
+	if (atomic_read(&isert_conn->dev_removed)) {
+		atomic_set(&isert_conn->dev_removed, 0);
+		wake_up_interruptible(&isert_conn->rem_wait);
+	} else {
+		isert_conn_kfree(isert_conn);
+		module_put(THIS_MODULE);
+	}
 
 	TRACE_EXIT();
 }
 
-void isert_conn_free(struct isert_connection *isert_conn)
+void isert_put_conn(struct isert_conn *isert_conn)
 {
 	sBUG_ON(kref_read(&isert_conn->kref) == 0);
-	kref_put(&isert_conn->kref, isert_kref_free);
+	kref_put(&isert_conn->kref, isert_release_kref);
 }
 
 static int isert_cm_disconnected_handler(struct rdma_cm_id *cm_id,
 					 struct rdma_cm_event *event)
 {
-	struct isert_connection *isert_conn = cm_id->qp->qp_context;
+	struct isert_conn *isert_conn = cm_id->qp->qp_context;
 
 	if (!test_and_set_bit(ISERT_CONNECTION_CLOSE, &isert_conn->flags))
 		isert_sched_conn_closed(isert_conn);
 	return 0;
 }
 
-static void isert_immediate_conn_close(struct isert_connection *isert_conn)
+static void isert_immediate_conn_close(struct isert_conn *isert_conn)
 {
 	set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags);
 	set_bit(ISERT_CONNECTION_CLOSE, &isert_conn->flags);
@@ -1441,8 +1373,8 @@
 	 * one from the init and two from the connect request,
 	 * thus it is safe to deref directly before the sched_conn_free.
 	 */
-	isert_conn_free(isert_conn);
-	isert_conn_free(isert_conn);
+	isert_put_conn(isert_conn);
+	isert_put_conn(isert_conn);
 	isert_sched_conn_free(isert_conn);
 }
 
@@ -1453,7 +1385,7 @@
 	struct isert_portal *portal = cm_id->context;
 	struct ib_device *ib_dev = cm_id->device;
 	struct isert_device *isert_dev;
-	struct isert_connection *isert_conn;
+	struct isert_conn *isert_conn;
 	struct rdma_conn_param *ini_conn_param;
 	struct rdma_conn_param tgt_conn_param;
 	struct isert_cm_hdr cm_hdr = { 0 };
@@ -1525,36 +1457,8 @@
 		goto fail_accept;
 	}
 
-	switch (isert_conn->peer_addr.ss_family) {
-	case AF_INET:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-		PRINT_INFO("iser accepted connection cm_id:%p "
-			   NIPQUAD_FMT "->" NIPQUAD_FMT, cm_id,
-			   NIPQUAD(((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr),
-			   NIPQUAD(((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr));
-#else
-		PRINT_INFO("iser accepted connection cm_id:%p %pI4->%pI4",
-			   cm_id,
-			   &((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr,
-			   &((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr);
-#endif
-		break;
-	case AF_INET6:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-		PRINT_INFO("iser accepted connection cm_id:%p "
-			   NIP6_FMT "->" NIP6_FMT, cm_id,
-			   NIP6(((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr),
-			   NIP6(((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr));
-#else
-		PRINT_INFO("iser accepted connection cm_id:%p %pI6->%pI6",
-			   cm_id,
-			   &((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr,
-			   &((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr);
-#endif
-		break;
-	default:
-		PRINT_INFO("iser accepted connection cm_id:%p", cm_id);
-	}
+	PRINT_INFO("iser accepted connection cm_id:%p %pISpc->%pISpc",
+		   cm_id, &isert_conn->peer_addr, &isert_conn->self_addr);
 
 	mutex_lock(&dev_list_mutex);
 	list_add_tail(&isert_conn->portal_node, &portal->conn_list);
@@ -1587,7 +1491,7 @@
 static int isert_cm_connect_handler(struct rdma_cm_id *cm_id,
 				    struct rdma_cm_event *event)
 {
-	struct isert_connection *isert_conn = cm_id->qp->qp_context;
+	struct isert_conn *isert_conn = cm_id->qp->qp_context;
 	int push_saved_pdu = 0;
 	int ret = 0;
 
@@ -1625,85 +1529,89 @@
 	return ret;
 }
 
-static int isert_cm_disconnect_handler(struct rdma_cm_id *cm_id,
-				       struct rdma_cm_event *event)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) && \
+	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7)
+static const char *const cma_events[] = {
+	[RDMA_CM_EVENT_ADDR_RESOLVED]	 = "address resolved",
+	[RDMA_CM_EVENT_ADDR_ERROR]	 = "address error",
+	[RDMA_CM_EVENT_ROUTE_RESOLVED]	 = "route resolved ",
+	[RDMA_CM_EVENT_ROUTE_ERROR]	 = "route error",
+	[RDMA_CM_EVENT_CONNECT_REQUEST]	 = "connect request",
+	[RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response",
+	[RDMA_CM_EVENT_CONNECT_ERROR]	 = "connect error",
+	[RDMA_CM_EVENT_UNREACHABLE]	 = "unreachable",
+	[RDMA_CM_EVENT_REJECTED]	 = "rejected",
+	[RDMA_CM_EVENT_ESTABLISHED]	 = "established",
+	[RDMA_CM_EVENT_DISCONNECTED]	 = "disconnected",
+	[RDMA_CM_EVENT_DEVICE_REMOVAL]	 = "device removal",
+	[RDMA_CM_EVENT_MULTICAST_JOIN]	 = "multicast join",
+	[RDMA_CM_EVENT_MULTICAST_ERROR]	 = "multicast error",
+	[RDMA_CM_EVENT_ADDR_CHANGE]	 = "address change",
+	[RDMA_CM_EVENT_TIMEWAIT_EXIT]	 = "timewait exit",
+};
+
+static const char *rdma_event_msg(enum rdma_cm_event_type event)
 {
-	struct isert_connection *isert_conn = cm_id->qp->qp_context;
+	size_t index = event;
 
-	isert_conn_disconnect(isert_conn);
-
-	return 0;
+	return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ?
+			cma_events[index] : "unrecognized event";
 }
+#endif
 
-static const char *cm_event_type_str(enum rdma_cm_event_type ev_type)
-{
-	switch (ev_type) {
-	case RDMA_CM_EVENT_ADDR_RESOLVED:
-		return "ADDRESS_RESOLVED";
-	case RDMA_CM_EVENT_ADDR_ERROR:
-		return "ADDESS_ERROR";
-	case RDMA_CM_EVENT_ROUTE_RESOLVED:
-		return "ROUTE_RESOLVED";
-	case RDMA_CM_EVENT_ROUTE_ERROR:
-		return "ROUTE_ERROR";
-	case RDMA_CM_EVENT_CONNECT_REQUEST:
-		return "CONNECT_REQUEST";
-	case RDMA_CM_EVENT_CONNECT_RESPONSE:
-		return "CONNECT_RESPONSE";
-	case RDMA_CM_EVENT_CONNECT_ERROR:
-		return "CONNECT_ERROR";
-	case RDMA_CM_EVENT_UNREACHABLE:
-		return "UNREACHABLE";
-	case RDMA_CM_EVENT_REJECTED:
-		return "REJECTED";
-	case RDMA_CM_EVENT_ESTABLISHED:
-		return "ESTABLISHED";
-	case RDMA_CM_EVENT_DISCONNECTED:
-		return "DISCONNECTED";
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		return "DEVICE_REMOVAL";
-	case RDMA_CM_EVENT_MULTICAST_JOIN:
-		return "MULTICAST_JOIN";
-	case RDMA_CM_EVENT_MULTICAST_ERROR:
-		return "MULTICAST_ERROR";
-	case RDMA_CM_EVENT_ADDR_CHANGE:
-		return "ADDR_CHANGE";
-	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		return "TIMEWAIT_EXIT";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static int isert_handle_failure(struct isert_connection *conn)
+static int isert_handle_failure(struct isert_conn *conn)
 {
 	isert_conn_disconnect(conn);
 	return 0;
 }
 
-static int isert_cm_evt_listener_handler(struct rdma_cm_id *cm_id,
-					 struct rdma_cm_event *cm_ev)
+static void isert_portal_reinit_id_work(struct work_struct *w)
 {
-	enum rdma_cm_event_type ev_type;
-	struct isert_portal *portal;
-	int err = 0;
+	struct isert_portal *portal = container_of(w, struct isert_portal, work);
 
-	ev_type = cm_ev->event;
+	rdma_destroy_id(portal->cm_id);
+
+	portal->cm_id = isert_setup_id(portal);
+	if (IS_ERR(portal->cm_id)) {
+		PRINT_ERROR("Failed to create rdma id, err:%ld\n",
+				PTR_ERR(portal->cm_id));
+		portal->cm_id = NULL;
+	}
+}
+
+static int isert_cm_evt_listener_handler(struct rdma_cm_id *cm_id,
+					 enum rdma_cm_event_type event)
+{
+	struct isert_portal *portal;
+	int ret = -1;
+
 	portal = cm_id->context;
 
-	switch (ev_type) {
+	switch (event) {
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		portal->cm_id = NULL;
-		err = -EINVAL;
 		break;
-
+	case RDMA_CM_EVENT_ADDR_CHANGE:
+		queue_work(portal->reinit_id_wq, &portal->work);
+		ret = 0;
+		break;
 	default:
 		PRINT_INFO("Listener event:%s(%d), ignored",
-			   cm_event_type_str(ev_type), ev_type);
+			   rdma_event_msg(event), event);
 		break;
 	}
 
-	return err;
+	return ret;
+}
+
+static int isert_cm_disconnect_handler(struct rdma_cm_id *cm_id,
+				       enum rdma_cm_event_type event)
+{
+	struct isert_conn *isert_conn = cm_id->qp->qp_context;
+
+	isert_conn_disconnect(isert_conn);
+
+	return 0;
 }
 
 static int isert_cm_evt_handler(struct rdma_cm_id *cm_id,
@@ -1718,17 +1626,20 @@
 	ev_type = cm_ev->event;
 	portal = cm_id->context;
 	PRINT_INFO("isert_cm_evt:%s(%d) status:%d portal:%p cm_id:%p",
-		   cm_event_type_str(ev_type), ev_type, cm_ev->status,
+		   rdma_event_msg(ev_type), ev_type, cm_ev->status,
 		   portal, cm_id);
 
 	if (portal->cm_id == cm_id) {
-		err = isert_cm_evt_listener_handler(cm_id, cm_ev);
+		err = isert_cm_evt_listener_handler(cm_id, ev_type);
 		goto out;
 	}
 
 	switch (ev_type) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		err = isert_cm_conn_req_handler(cm_id, cm_ev);
+		if (unlikely(err))
+			PRINT_ERROR("Failed to handle RDMA_CM_EVENT_CONNECT_REQUEST, err:%d",
+				    err);
 		break;
 
 	case RDMA_CM_EVENT_ESTABLISHED:
@@ -1739,17 +1650,36 @@
 
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	case RDMA_CM_EVENT_REJECTED:
-		err = isert_cm_disconnect_handler(cm_id, cm_ev);
+		err = isert_cm_disconnect_handler(cm_id, ev_type);
 		break;
 
 	case RDMA_CM_EVENT_ADDR_CHANGE:
 	case RDMA_CM_EVENT_DISCONNECTED:
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		isert_cm_disconnect_handler(cm_id, cm_ev);
+		isert_cm_disconnect_handler(cm_id, ev_type);
 		err = isert_cm_disconnected_handler(cm_id, cm_ev);
 		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL: {
+		struct isert_conn *isert_conn = cm_id->qp->qp_context;
 
+		atomic_set(&isert_conn->dev_removed, 1);
+
+		isert_cm_disconnect_handler(cm_id, ev_type);
+		isert_cm_disconnected_handler(cm_id, cm_ev);
+
+		wait_event_interruptible(isert_conn->rem_wait,
+					 !atomic_read(&isert_conn->dev_removed));
+
+		isert_conn_kfree(isert_conn);
+		module_put(THIS_MODULE);
+		/*
+		 * return non-zero from the callback to destroy
+		 * the rdma cm id
+		 */
+		err = 1;
+
+		break;
+	}
 	case RDMA_CM_EVENT_MULTICAST_JOIN:
 	case RDMA_CM_EVENT_MULTICAST_ERROR:
 		PRINT_ERROR("UD-related event:%d, ignored", ev_type);
@@ -1766,7 +1696,7 @@
 	/* We can receive this instead of RDMA_CM_EVENT_ESTABLISHED */
 	case RDMA_CM_EVENT_UNREACHABLE:
 		{
-			struct isert_connection *isert_conn =
+			struct isert_conn *isert_conn =
 				cm_id->qp->qp_context;
 
 			mutex_lock(&isert_conn->state_mutex);
@@ -1782,19 +1712,69 @@
 		break;
 	}
 
-	if (unlikely(err))
-		PRINT_ERROR("Failed to handle rdma cm evt:%d, err:%d",
-			    ev_type, err);
-
 out:
 	TRACE_EXIT_RES(err);
 	return err;
 }
 
+static struct rdma_cm_id *
+isert_setup_id(struct isert_portal *portal)
+{
+	struct rdma_cm_id *id;
+	struct sockaddr *sa;
+	int ret;
+
+	sa = (struct sockaddr *)&portal->addr;
+
+#if !RDMA_CREATE_ID_TAKES_NET_ARG
+	id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP,
+			       IB_QPT_RC);
+#else
+	id = rdma_create_id(iscsi_net_ns, isert_cm_evt_handler, portal,
+			       RDMA_PS_TCP, IB_QPT_RC);
+#endif
+	if (IS_ERR(id)) {
+		ret = PTR_ERR(id);
+		PRINT_ERROR("Failed to create rdma id, err:%d", ret);
+		goto out;
+	}
+
+	/*
+	 * Allow both IPv4 and IPv6 sockets to bind a single port
+	 * at the same time.
+	 */
+	ret = rdma_set_afonly(id, 1);
+	if (ret) {
+		PRINT_ERROR("Failed to set afonly, err:%d", ret);
+		goto out_id;
+	}
+
+	ret = rdma_bind_addr(id, sa);
+	if (ret) {
+		PRINT_ERROR("Failed to bind rdma addr, err:%d", ret);
+		goto out_id;
+	}
+
+	ret = rdma_listen(id, ISER_LISTEN_BACKLOG);
+	if (ret) {
+		PRINT_ERROR("Failed rdma listen, err:%d", ret);
+		goto out_id;
+	}
+
+	PRINT_INFO("iser portal with cm_id %p listens on %pISpc", id, sa);
+
+	return id;
+
+out_id:
+	rdma_destroy_id(id);
+out:
+	return ERR_PTR(ret);
+}
+
 /* create a portal, after listening starts all events
  * are received in isert_cm_evt_handler()
  */
-struct isert_portal *isert_portal_create(void)
+struct isert_portal *isert_portal_create(struct sockaddr *sa, size_t addr_len)
 {
 	struct isert_portal *portal;
 	struct rdma_cm_id *cm_id;
@@ -1813,35 +1793,35 @@
 		goto err_alloc;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) && \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 <= 5)
-	cm_id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP);
-#elif !RDMA_CREATE_ID_TAKES_NET_ARG
-	cm_id = rdma_create_id(isert_cm_evt_handler, portal, RDMA_PS_TCP,
-			       IB_QPT_RC);
-#else
-	cm_id = rdma_create_id(iscsi_net_ns, isert_cm_evt_handler, portal,
-			       RDMA_PS_TCP, IB_QPT_RC);
-#endif
+	portal->reinit_id_wq = alloc_ordered_workqueue("isert_reinit_id_wq", WQ_MEM_RECLAIM);
+	if (unlikely(!portal->reinit_id_wq)) {
+		PRINT_ERROR("Unable to allocate reinit workqueue");
+		err = -ENOMEM;
+		goto free_portal;
+	}
+
+	INIT_WORK(&portal->work, isert_portal_reinit_id_work);
+
+	INIT_LIST_HEAD(&portal->conn_list);
+	memcpy(&portal->addr, sa, addr_len);
+
+	cm_id = isert_setup_id(portal);
 	if (IS_ERR(cm_id)) {
 		err = PTR_ERR(cm_id);
 		PRINT_ERROR("Failed to create rdma id, err:%d", err);
-		goto create_id_err;
+		goto free_wq;
 	}
+
 	portal->cm_id = cm_id;
-
-	INIT_LIST_HEAD(&portal->conn_list);
 	isert_portal_list_add(portal);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
-	rdma_set_afonly(cm_id, 1);
-#endif
-
 	PRINT_INFO("Created iser portal cm_id:%p", cm_id);
 out:
 	return portal;
 
-create_id_err:
+free_wq:
+	destroy_workqueue(portal->reinit_id_wq);
+free_portal:
 	kfree(portal);
 	portal = ERR_PTR(err);
 err_alloc:
@@ -1849,64 +1829,6 @@
 	goto out;
 }
 
-int isert_portal_listen(struct isert_portal *portal,
-			struct sockaddr *sa,
-			size_t addr_len)
-{
-	int err;
-
-	TRACE_ENTRY();
-	err = rdma_bind_addr(portal->cm_id, sa);
-	if (err) {
-		PRINT_WARNING("Failed to bind rdma addr, err:%d", err);
-		goto out;
-	}
-	err = rdma_listen(portal->cm_id, ISER_LISTEN_BACKLOG);
-	if (err) {
-		PRINT_ERROR("Failed rdma listen, err:%d", err);
-		goto out;
-	}
-	memcpy(&portal->addr, sa, addr_len);
-
-	switch (sa->sa_family) {
-	case AF_INET:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-		PRINT_INFO("iser portal cm_id:%p listens on: "
-			   NIPQUAD_FMT ":%d", portal->cm_id,
-			   NIPQUAD(((struct sockaddr_in *)sa)->sin_addr.s_addr),
-			   (int)ntohs(((struct sockaddr_in *)sa)->sin_port));
-#else
-		PRINT_INFO("iser portal cm_id:%p listens on: %pI4:%d",
-			   portal->cm_id,
-			   &((struct sockaddr_in *)sa)->sin_addr.s_addr,
-			   (int)ntohs(((struct sockaddr_in *)sa)->sin_port));
-#endif
-		break;
-	case AF_INET6:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-		PRINT_INFO("iser portal cm_id:%p listens on: "
-			   NIP6_FMT " %d",
-			   portal->cm_id,
-			   NIP6(((struct sockaddr_in6 *)sa)->sin6_addr),
-			   (int)ntohs(((struct sockaddr_in6 *)sa)->sin6_port));
-#else
-		PRINT_INFO("iser portal cm_id:%p listens on: %pI6 %d",
-			   portal->cm_id,
-			   &((struct sockaddr_in6 *)sa)->sin6_addr,
-			   (int)ntohs(((struct sockaddr_in6 *)sa)->sin6_port));
-#endif
-		break;
-	default:
-		PRINT_ERROR("Unknown address family");
-		err = -EINVAL;
-		goto out;
-	}
-
-out:
-	TRACE_EXIT_RES(err);
-	return err;
-}
-
 static void isert_portal_free(struct isert_portal *portal)
 {
 	lockdep_assert_held(&dev_list_mutex);
@@ -1914,6 +1836,8 @@
 	if (portal->refcnt > 0)
 		return;
 
+	destroy_workqueue(portal->reinit_id_wq);
+
 	kfree(portal);
 	module_put(THIS_MODULE);
 
@@ -1922,7 +1846,7 @@
 
 void isert_portal_release(struct isert_portal *portal)
 {
-	struct isert_connection *conn;
+	struct isert_conn *conn;
 
 	PRINT_INFO("iser portal cm_id:%p releasing", portal->cm_id);
 
@@ -1948,17 +1872,5 @@
 
 struct isert_portal *isert_portal_start(struct sockaddr *sa, size_t addr_len)
 {
-	struct isert_portal *portal;
-	int err;
-
-	portal = isert_portal_create();
-	if (IS_ERR(portal))
-		return portal;
-
-	err = isert_portal_listen(portal, sa, addr_len);
-	if (err) {
-		isert_portal_release(portal);
-		portal = ERR_PTR(err);
-	}
-	return portal;
+	return isert_portal_create(sa, addr_len);
 }

diff --git a/scst/iscsi-scst/kernel/isert-scst/isert.c b/scst/iscsi-scst/kernel/isert-scst/isert.c
index 3c99816..5e47d8e 100644
--- a/scst/iscsi-scst/kernel/isert-scst/isert.c
+++ b/scst/iscsi-scst/kernel/isert-scst/isert.c

@@ -432,25 +432,13 @@
 
 	switch (ss.ss_family) {
 	case AF_INET:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-		pos = scnprintf(buf, size,
-			 "%u.%u.%u.%u",
-			 NIPQUAD(((struct sockaddr_in *)&ss)->sin_addr.s_addr));
-#else
 		pos = scnprintf(buf, size,
 			"%pI4", &((struct sockaddr_in *)&ss)->sin_addr.s_addr);
-#endif
 		break;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-		pos = scnprintf(buf, size,
-			 "[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]",
-			 NIP6(((struct sockaddr_in6 *)&ss)->sin6_addr));
-#else
 		pos = scnprintf(buf, size, "[%pI6]",
 			&((struct sockaddr_in6 *)&ss)->sin6_addr);
-#endif
 		break;
 #endif
 	default:
@@ -515,8 +503,8 @@
 MODULE_AUTHOR("Yan Burman");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_IMPORT_NS(SCST);
-#define DRV_VERSION		"3.5.0" "#" __stringify(OFED_FLAVOR)
-#define DRV_RELDATE		"21 December 2020"
+#define DRV_VERSION		"3.7.0" "#" __stringify(OFED_FLAVOR)
+#define DRV_RELDATE		"26 December 2022"
 MODULE_DESCRIPTION("iSER target transport driver "
 		   "v" DRV_VERSION " (" DRV_RELDATE ")");
 module_init(isert_init_module);

diff --git a/scst/iscsi-scst/kernel/isert-scst/isert.h b/scst/iscsi-scst/kernel/isert-scst/isert.h
index dd541a1..9d5600e 100644
--- a/scst/iscsi-scst/kernel/isert-scst/isert.h
+++ b/scst/iscsi-scst/kernel/isert-scst/isert.h

@@ -44,11 +44,7 @@
 #include <linux/wait.h>
 #include <linux/init.h>
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-#include <asm/atomic.h>
-#else
 #include <linux/atomic.h>
-#endif
 
 #ifdef INSIDE_KERNEL_TREE
 #include <scst/isert_scst.h>

diff --git a/scst/iscsi-scst/kernel/isert-scst/isert_login.c b/scst/iscsi-scst/kernel/isert-scst/isert_login.c
index be91262..43abc5a 100644
--- a/scst/iscsi-scst/kernel/isert-scst/isert_login.c
+++ b/scst/iscsi-scst/kernel/isert-scst/isert_login.c

@@ -42,11 +42,7 @@
 #ifndef INSIDE_KERNEL_TREE
 #include <linux/version.h>
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
 #include <linux/freezer.h>
-#else
-#define wait_event_freezable(wq, cond) ({ wait_event(wq, cond); 0; })
-#endif
 #include <linux/file.h>
 #include "isert_dbg.h"
 #include "../iscsi.h"
@@ -119,18 +115,10 @@
 	mutex_unlock(&isert_listen_dev.conn_lock);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void isert_close_conn_fn(void *ctx)
-#else
 static void isert_close_conn_fn(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct iscsi_conn *conn = ctx;
-#else
 	struct iscsi_conn *conn = container_of(work,
 		struct iscsi_conn, close_work);
-#endif
 
 	isert_close_connection(conn);
 }
@@ -166,11 +154,7 @@
 		goto out;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&conn->close_work, isert_close_conn_fn, conn);
-#else
 	INIT_WORK(&conn->close_work, isert_close_conn_fn);
-#endif
 
 	timer_setup(&conn_dev->tmo_timer, isert_conn_timer_fn, 0);
 	conn_dev->tmo_timer.expires = jiffies + 60 * HZ;
@@ -486,8 +470,8 @@
 
 void isert_connection_abort(struct iscsi_conn *iscsi_conn)
 {
-	struct isert_connection *isert_conn =
-		container_of(iscsi_conn, struct isert_connection, iscsi);
+	struct isert_conn *isert_conn =
+		container_of(iscsi_conn, struct isert_conn, iscsi);
 
 	TRACE_ENTRY();
 
@@ -953,9 +937,7 @@
 			    index);
 
 	dev->dev = device_create(isert_class, NULL, dev->devno,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
 				 NULL,
-#endif
 				 ISER_CONN_DEV_PREFIX"%d", index);
 
 	TRACE_EXIT();
@@ -983,9 +965,7 @@
 		PRINT_ERROR("Error %d adding isert_scst", err);
 
 	dev->dev = device_create(isert_class, NULL, dev->devno,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
 				 NULL,
-#endif
 				 "isert_scst");
 
 	TRACE_EXIT();

diff --git a/scst/iscsi-scst/kernel/nthread.c b/scst/iscsi-scst/kernel/nthread.c
index 575674b..e91a885 100644
--- a/scst/iscsi-scst/kernel/nthread.c
+++ b/scst/iscsi-scst/kernel/nthread.c

@@ -1020,10 +1020,10 @@
 					&r->write_timeout_list_entry);
 				inserted = true;
 				break;
-			} else {
-				TRACE_DBG("Skipping op %x req %p (tt %ld)",
-					cmnd_opcode(r), r, tt);
 			}
+
+			TRACE_DBG("Skipping op %x req %p (tt %ld)",
+				  cmnd_opcode(r), r, tt);
 		}
 		if (!inserted) {
 			TRACE_DBG("Add NOP IN req %p in the tail", req);
@@ -1315,19 +1315,23 @@
 			    (unsigned long long)conn->session->sid,
 			    conn->cid, conn->write_cmnd);
 	}
-	if (ref_cmd_to_parent &&
-	    ((ref_cmd->scst_cmd != NULL) || (ref_cmd->scst_aen != NULL))) {
-		if (ref_cmd->scst_state == ISCSI_CMD_STATE_AEN)
-			scst_set_aen_delivery_status(ref_cmd->scst_aen,
-				SCST_AEN_RES_FAILED);
-		else
-			scst_set_delivery_status(ref_cmd->scst_cmd,
-				SCST_CMD_DELIVERY_FAILED);
+
+	if (ref_cmd_to_parent) {
+		if (ref_cmd->scst_state == ISCSI_CMD_STATE_AEN) {
+			if (ref_cmd->scst_aen)
+				scst_set_aen_delivery_status(ref_cmd->scst_aen,
+					SCST_AEN_RES_FAILED);
+		} else {
+			if (ref_cmd->scst_cmd)
+				scst_set_delivery_status(ref_cmd->scst_cmd,
+					SCST_CMD_DELIVERY_FAILED);
+		}
 	}
+
 	goto out;
 }
 
-static int exit_tx(struct iscsi_conn *conn, int res)
+static void exit_tx(struct iscsi_conn *conn, int res)
 {
 	iscsi_extracheck_is_wr_thread(conn);
 
@@ -1351,7 +1355,8 @@
 		mark_conn_closed(conn);
 		break;
 	}
-	return res;
+
+	return;
 }
 
 static int tx_ddigest(struct iscsi_cmnd *cmnd, int state)
@@ -1373,7 +1378,7 @@
 		if (!cmnd->conn->write_size)
 			cmnd->conn->write_state = state;
 	} else
-		res = exit_tx(cmnd->conn, res);
+		exit_tx(cmnd->conn, res);
 
 	return res;
 }
@@ -1420,7 +1425,7 @@
 		if (!cmnd->conn->write_size)
 			cmnd->conn->write_state = state;
 	} else
-		res = exit_tx(cmnd->conn, res);
+		exit_tx(cmnd->conn, res);
 
 	return res;
 }
@@ -1436,7 +1441,7 @@
 		if (!conn->write_size)
 			conn->write_state = state;
 	} else
-		res = exit_tx(conn, res);
+		exit_tx(conn, res);
 
 	return res;
 }
@@ -1506,9 +1511,6 @@
 		sBUG();
 	}
 
-	if (res == 0)
-		goto out;
-
 	if (conn->write_state != TX_END)
 		goto out;
 

diff --git a/scst/iscsi-scst/kernel/session.c b/scst/iscsi-scst/kernel/session.c
index 022e6ef..59c1f3e 100644
--- a/scst/iscsi-scst/kernel/session.c
+++ b/scst/iscsi-scst/kernel/session.c

@@ -17,9 +17,7 @@
 #ifndef INSIDE_KERNEL_TREE
 #include <linux/version.h>
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
 #include <linux/export.h>
-#endif
 
 #include "iscsi_trace_flag.h"
 #include "iscsi.h"

diff --git a/scst/iscsi-scst/resource_agents/SCSTTarget b/scst/iscsi-scst/resource_agents/SCSTTarget
index 3512004..48235ff 100644
--- a/scst/iscsi-scst/resource_agents/SCSTTarget
+++ b/scst/iscsi-scst/resource_agents/SCSTTarget

@@ -122,7 +122,7 @@
 <actions>
 <action name="start"        timeout="10" />
 <action name="stop"         timeout="180" />
-<action name="status "      timeout="10" interval="10" depth="0" />
+<action name="status"       timeout="10" interval="10" depth="0" />
 <action name="monitor"      timeout="10" interval="10" depth="0" />
 <action name="meta-data"    timeout="5" />
 <action name="validate-all"   timeout="10" />

diff --git a/scst/iscsi-scst/usr/Makefile b/scst/iscsi-scst/usr/Makefile
index c94a9d4..e50aad6 100644
--- a/scst/iscsi-scst/usr/Makefile
+++ b/scst/iscsi-scst/usr/Makefile

@@ -18,7 +18,7 @@
         PREFIX=/usr/local
 endif
 
-cc-option = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null \
+cc-option = $(shell if $(CC) $(1) -Werror -S -o /dev/null -xc /dev/null \
              > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;)
 
 SRCS_D = iscsid.c iscsi_scstd.c conn.c session.c target.c message.c ctldev.c \

diff --git a/scst/iscsi-scst/usr/iscsi_adm.c b/scst/iscsi-scst/usr/iscsi_adm.c
index 7468250..19af947 100644
--- a/scst/iscsi-scst/usr/iscsi_adm.c
+++ b/scst/iscsi-scst/usr/iscsi_adm.c

@@ -375,7 +375,7 @@
 static int sess_handle(int op, u32 set, u32 tid, u64 sid, char *params)
 {
 	int err = -EINVAL;
-	struct iscsi_adm_req req;
+	struct iscsi_adm_req req = {};
 
 	if (op == OP_NEW || op == OP_UPDATE) {
 		fprintf(stderr, "Unsupported.\n");

diff --git a/scst/iscsi-scst/usr/iscsi_adm.h b/scst/iscsi-scst/usr/iscsi_adm.h
index ae96b8c..2cce717 100644
--- a/scst/iscsi-scst/usr/iscsi_adm.h
+++ b/scst/iscsi-scst/usr/iscsi_adm.h

@@ -74,7 +74,6 @@
 	u32 tid;
 	u64 sid;
 	u32 cid;
-	u32 lun;
 
 	union {
 		struct msg_trgt trgt;

diff --git a/scst/iscsi-scst/usr/iscsi_scstd.c b/scst/iscsi-scst/usr/iscsi_scstd.c
index c86ac11..55b7d10 100644
--- a/scst/iscsi-scst/usr/iscsi_scstd.c
+++ b/scst/iscsi-scst/usr/iscsi_scstd.c

@@ -40,7 +40,7 @@
 #include "iscsid.h"
 #include "iscsi_adm.h"
 
-static char *server_address;
+static char *server_addresses[ADDR_MAX];
 uint16_t server_port = ISCSI_LISTEN_PORT;
 
 struct pollfd poll_array[POLL_MAX];
@@ -79,13 +79,13 @@
 iSCSI target daemon.\n\
   -c, --config=[path]     Execute in the config file.\n");
 		printf("\
-  -f, --foreground        make the program run in the foreground\n\
-  -d, --debug debuglevel  print debugging information\n\
-  -u, --uid=uid           run as uid, default is current user\n\
-  -g, --gid=gid           run as gid, default is current user group\n\
-  -a, --address=address   listen on specified local address instead of all\n\
-  -p, --port=port         listen on specified port instead of 3260\n\
-  -h, --help              display this help and exit\n\
+  -f, --foreground           make the program run in the foreground\n\
+  -d, --debug debuglevel     print debugging information\n\
+  -u, --uid=uid              run as uid, default is current user\n\
+  -g, --gid=gid              run as gid, default is current user group\n\
+  -a, --address=address ...  listen on specified space-separated list of local address instead of all\n\
+  -p, --port=port            listen on specified port instead of 3260\n\
+  -h, --help                 display this help and exit\n\
 ");
 	}
 	exit(1);
@@ -103,7 +103,7 @@
 {
 	struct addrinfo hints, *res, *res0;
 	char servname[64];
-	int i, sock, opt, rc;
+	int i, k, sock, opt, rc;
 
 	memset(servname, 0, sizeof(servname));
 	snprintf(servname, sizeof(servname), "%d", server_port);
@@ -112,59 +112,75 @@
 	hints.ai_socktype = SOCK_STREAM;
 	hints.ai_flags = AI_PASSIVE;
 
-	rc = getaddrinfo(server_address, servname, &hints, &res0);
-	if (rc != 0) {
-		log_error("Unable to get address info (%s)!",
-			get_error_str(rc));
-		exit(1);
-	}
-
 	i = 0;
-	for (res = res0; res && i < LISTEN_MAX; res = res->ai_next) {
-		sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-		if (sock < 0) {
-			log_error("Unable to create server socket (%s) %d %d %d!",
-				  strerror(errno), res->ai_family,
-				  res->ai_socktype, res->ai_protocol);
-			continue;
+	for (k = 0; k < ADDR_MAX; k++) {
+		char *server_address;
+
+		server_address = server_addresses[k];
+		if (k > 0 && server_address == NULL)
+			break;
+
+		if (i == LISTEN_MAX) {
+			log_error("Cannot handle address %s! Too many were specified.", server_address);
+			exit(1);
 		}
 
-		sock_set_keepalive(sock, 50);
-
-		opt = 1;
-		if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)))
-			log_warning("Unable to set SO_REUSEADDR on server socket (%s)!",
-				    strerror(errno));
-		opt = 1;
-		if (res->ai_family == AF_INET6 &&
-		    setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt))) {
-			log_error("Unable to restrict IPv6 socket (%s)", strerror(errno));
-			close(sock);
-			continue;
+		rc = getaddrinfo(server_address, servname, &hints, &res0);
+		if (rc != 0) {
+			log_error("Unable to get address info [%s] (%s)!",
+				  server_address, get_error_str(rc));
+			exit(1);
 		}
 
-		if (bind(sock, res->ai_addr, res->ai_addrlen)) {
-			log_error("Unable to bind server socket (%s)!", strerror(errno));
-			close(sock);
-			continue;
+		for (res = res0; res && i < LISTEN_MAX; res = res->ai_next) {
+			sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
+			if (sock < 0) {
+				log_error("Unable to create server socket (%s) %d %d %d!",
+					  strerror(errno), res->ai_family,
+					  res->ai_socktype, res->ai_protocol);
+				continue;
+			}
+
+			sock_set_keepalive(sock, 50);
+
+			opt = 1;
+			if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)))
+				log_warning("Unable to set SO_REUSEADDR on server socket (%s)!",
+					    strerror(errno));
+			opt = 1;
+			if (res->ai_family == AF_INET6 &&
+				setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt))) {
+				log_error("Unable to restrict IPv6 socket (%s)", strerror(errno));
+				close(sock);
+				continue;
+			}
+
+			if (bind(sock, res->ai_addr, res->ai_addrlen)) {
+				log_error("Unable to bind server socket (%s)!", strerror(errno));
+				close(sock);
+				continue;
+			}
+
+			if (listen(sock, INCOMING_MAX)) {
+				log_error("Unable to listen to server socket (%s)!", strerror(errno));
+				close(sock);
+				continue;
+			}
+
+			set_non_blocking(sock);
+
+			array[i].fd = sock;
+			array[i].events = POLLIN;
+
+			i++;
 		}
 
-		if (listen(sock, INCOMING_MAX)) {
-			log_error("Unable to listen to server socket (%s)!", strerror(errno));
-			close(sock);
-			continue;
-		}
+		if (res)
+			log_error("Unable to listen on all available sockets.");
 
-		set_non_blocking(sock);
-
-		array[i].fd = sock;
-		array[i].events = POLLIN;
-
-		i++;
+		freeaddrinfo(res0);
 	}
 
-	freeaddrinfo(res0);
-
 	if (i == 0)
 		exit(1);
 }
@@ -226,7 +242,8 @@
 {
 	struct addrinfo hints, *res, *res0;
 	char servname[64];
-	int rc, i;
+	char *server_address;
+	int rc, i, k;
 	int iser_fd;
 	struct isert_addr_info info;
 
@@ -252,27 +269,42 @@
 	hints.ai_socktype = SOCK_STREAM;
 	hints.ai_flags = AI_PASSIVE;
 
-	rc = getaddrinfo(server_address, servname, &hints, &res0);
-	if (rc != 0) {
-		log_error("Unable to get address info (%s)!",
-			get_error_str(rc));
-		exit(1);
-	}
-
 	i = 0;
-	for (res = res0; res && i < ISERT_MAX_PORTALS; res = res->ai_next) {
-		memcpy(&info.addr, res->ai_addr, res->ai_addrlen);
-		info.addr_len = res->ai_addrlen;
+	for (k = 0; k < ADDR_MAX; k++) {
+		server_address = server_addresses[k];
 
-		rc = ioctl(iser_fd, SET_LISTEN_ADDR, &info);
-		if (rc != 0) {
-			log_error("Unable to set listen address (%s)!",
-				strerror(errno));
+		if (k > 0 && server_address == NULL)
+			break;
+
+		if (i == ISERT_MAX_PORTALS) {
+			log_error("iSER: Cannot handle address %s! Too many were specified.", server_address);
+			exit(1);
 		}
-		++i;
-	}
 
-	freeaddrinfo(res0);
+		rc = getaddrinfo(server_address, servname, &hints, &res0);
+		if (rc != 0) {
+			log_error("iSER: Unable to get address info[%s] (%s)!",
+				server_address, get_error_str(rc));
+			exit(1);
+		}
+
+		for (res = res0; res && i < ISERT_MAX_PORTALS; res = res->ai_next) {
+			memcpy(&info.addr, res->ai_addr, res->ai_addrlen);
+			info.addr_len = res->ai_addrlen;
+
+			rc = ioctl(iser_fd, SET_LISTEN_ADDR, &info);
+			if (rc != 0) {
+				log_error("iSER: Unable to set listen address (%s)!",
+					strerror(errno));
+			}
+			++i;
+		}
+
+		if (res)
+			log_error("iSER: Unable to listen on all available sockets.");
+
+		freeaddrinfo(res0);
+	}
 }
 
 static int iser_getsockname(int fd, struct sockaddr *name, socklen_t *namelen)
@@ -874,13 +906,28 @@
 		case 'g':
 			gid = strtoul(optarg, NULL, 0);
 			break;
-		case 'a':
+		case 'a': {
+			char *server_address, *token;
+			int i = 0;
+
 			server_address = strdup(optarg);
 			if (server_address == NULL) {
 				perror("strdup failed");
 				exit(-1);
 			}
+
+			token = strtok(server_address, " ");
+
+			while ((i < ADDR_MAX) && token) {
+				log_debug(0, "Address to listen: %s\n", token);
+				server_addresses[i] = token;
+
+				i++;
+				token = strtok(NULL, " ");
+			}
+
 			break;
+		}
 		case 'p':
 			server_port = (uint16_t)strtoul(optarg, NULL, 0);
 			break;

diff --git a/scst/iscsi-scst/usr/iscsid.h b/scst/iscsi-scst/usr/iscsid.h
index 93df505..78f036c 100644
--- a/scst/iscsi-scst/usr/iscsid.h
+++ b/scst/iscsi-scst/usr/iscsid.h

@@ -52,7 +52,7 @@
 	struct __qelem entry;
 
 	unsigned int len;
-	char data[0];
+	char data[];
 };
 
 struct PDU {
@@ -225,7 +225,8 @@
 extern int ctrl_fd;
 extern int conn_blocked;
 
-#define LISTEN_MAX		8
+#define ADDR_MAX		32
+#define LISTEN_MAX		32
 #define INCOMING_MAX		256
 
 enum {

diff --git a/scst/iscsi-scst/usr/isns_proto.h b/scst/iscsi-scst/usr/isns_proto.h
index 84438f9..48c3461 100644
--- a/scst/iscsi-scst/usr/isns_proto.h
+++ b/scst/iscsi-scst/usr/isns_proto.h

@@ -32,7 +32,7 @@
 	uint16_t flags;
 	uint16_t transaction;
 	uint16_t sequence;
-	uint32_t pdu[0];
+	uint32_t pdu[];
 } __attribute__ ((packed));
 
 struct isns_tlv {

diff --git a/scst/iscsi-scst/usr/message.c b/scst/iscsi-scst/usr/message.c
index 4e41acf..341fbbf 100644
--- a/scst/iscsi-scst/usr/message.c
+++ b/scst/iscsi-scst/usr/message.c

@@ -58,8 +58,8 @@
 {
 	int err = 0;
 
-	log_debug(1, "request %u, tid %u, sid 0x%" PRIx64 ", cid %u, lun %u",
-		req->rcmnd, req->tid, req->sid, req->cid, req->lun);
+	log_debug(1, "request %u, tid %u, sid 0x%" PRIx64 ", cid %u",
+		req->rcmnd, req->tid, req->sid, req->cid);
 
 	switch (req->rcmnd) {
 	case C_TRGT_NEW:

diff --git a/scst/nightly/conf/nightly.conf b/scst/nightly/conf/nightly.conf
index 82b0c2c..5606c45 100644
--- a/scst/nightly/conf/nightly.conf
+++ b/scst/nightly/conf/nightly.conf

@@ -3,33 +3,45 @@
 ABT_DETAILS="x86_64"
 ABT_JOBS=5
 ABT_KERNELS="  \
-5.9.10         \
+6.1.1          \
+6.0.15-nc      \
+5.19.17-nc     \
+5.18.19-nc     \
+5.17.15-nc     \
+5.16.20-nc     \
+5.15.85-nc     \
+5.14.21-nc     \
+5.13.19-nc     \
+5.12.19-nc     \
+5.11.22-nc     \
+5.10.161-nc    \
+5.9.16-nc      \
 5.8.18-nc      \
 5.7.19-nc      \
 5.6.19-nc      \
 5.5.19-nc      \
-5.4.80-nc      \
+5.4.228-nc     \
 5.3.18-nc      \
 5.2.21-nc      \
 5.1.21-nc      \
 5.0.21-nc      \
 4.20.17-nc     \
-4.19.160-nc    \
+4.19.269-nc    \
 4.18.20-nc     \
 4.17.19-nc     \
 4.16.18-nc     \
 4.15.18-nc     \
-4.14.209-nc    \
+4.14.302-nc    \
 4.13.16-nc     \
 4.12.14-nc     \
 4.11.12-nc     \
 4.10.17-nc     \
-4.9.246-nc     \
+4.9.336-nc     \
 4.8.17-nc      \
 4.7.10-nc      \
 4.6.7-nc       \
 4.5.7-nc       \
-4.4.246-nc     \
+4.4.302-nc     \
 4.3.6-nc       \
 4.2.8-nc       \
 4.1.52-nc      \
@@ -44,35 +56,30 @@
 3.12.74-nc     \
 3.11.10-nc     \
 3.10.108-nc    \
-3.9.11-nc      \
-3.8.13-nc      \
-3.7.10-nc      \
-3.6.11-nc      \
-3.5.7-nc       \
-3.4.113-nc     \
-3.3.8-nc       \
-3.2.102-nc     \
-3.1.10-nc      \
-3.0.101-nc     \
-2.6.39.4-nc    \
-2.6.38.8-nc    \
-2.6.37.6-nc    \
-2.6.36.4-nc    \
-2.6.35.9-nc    \
-2.6.34.7-nc    \
-2.6.33.7-nc    \
-2.6.32.27-nc   \
-2.6.31.14-nc   \
-4.18.0-240.1.1.el8_3^CentOS^8.3.2011-nc	\
+5.14.0-162.6.1.el9_1^AlmaLinux^9.1-nc    \
+5.14.0-70.30.1.el9_0^AlmaLinux^9.0-nc    \
+4.18.0-425.3.1.el8^AlmaLinux^8.7-nc      \
+4.18.0-372.32.1.el8_6^AlmaLinux^8.6-nc   \
+4.18.0-348.2.1.el8_5^CentOS^8.5.2111-nc  \
+4.18.0-305.3.1.el8^CentOS^8.4.2105-nc    \
+4.18.0-240.15.1.el8_3^CentOS^8.3.2011-nc \
 4.18.0-193.28.1.el8_2^CentOS^8.2.2004-nc \
-4.18.0-147.8.1.el8_1^CentOS^8.1.1911-nc	\
-4.18.0-80.11.2.el8_0^CentOS^8.0.1905-nc	\
-3.10.0-1127.19.1.el7^CentOS^7.8.2003-nc	\
-3.10.0-1062.18.1.el7^CentOS^7.7.1908-nc	\
-3.10.0-957.27.2.el7^CentOS^7.6.1810-nc	\
-3.10.0-862.14.4.el7^CentOS^7.5.1804-nc	\
-2.6.32-754.29.2.el6^CentOS^6.10-nc	\
-2.6.32-696.30.1.el6^CentOS^6.9-nc	\
-5.4.17-2036.101.2.el7uek^UEK^7-nc	\
-4.14.35-2025.403.3.el7uek^UEK^7-nc	\
+4.18.0-147.8.1.el8_1^CentOS^8.1.1911-nc \
+4.18.0-80.11.2.el8_0^CentOS^8.0.1905-nc \
+3.10.0-1160.el7^CentOS^7.9.2009-nc      \
+3.10.0-1127.19.1.el7^CentOS^7.8.2003-nc \
+3.10.0-1062.18.1.el7^CentOS^7.7.1908-nc \
+3.10.0-957.27.2.el7^CentOS^7.6.1810-nc  \
+3.10.0-862.14.4.el7^CentOS^7.5.1804-nc  \
+5.15.0-5.76.5.1.el9uek^UEK^9-nc         \
+5.15.0-5.76.5.1.el8uek^UEK^8-nc         \
+5.4.17-2136.314.6.3.el8uek^UEK^8-nc     \
+5.4.17-2102.206.1.el8uek^UEK^8-nc       \
+5.4.17-2036.104.5.el8uek^UEK^8-nc       \
+5.4.17-2011.7.4.el8uek^UEK^8-nc         \
+5.4.17-2136.314.6.3.el7uek^UEK^7-nc     \
+5.4.17-2102.206.1.el7uek^UEK^7-nc       \
+5.4.17-2036.104.5.el7uek^UEK^7-nc       \
+5.4.17-2011.7.4.el7uek^UEK^7-nc         \
+4.1.12-124.48.6.el6uek^UEK^6-nc         \
 "

diff --git a/scst/qla2x00t-32gbit/Kbuild b/scst/qla2x00t-32gbit/Kbuild
index f25885d..053a4d9 100644
--- a/scst/qla2x00t-32gbit/Kbuild
+++ b/scst/qla2x00t-32gbit/Kbuild

@@ -9,4 +9,5 @@
 qla2xxx_scst-objs := \
 		qla_os.o qla_init.o qla_mbx.o qla_iocb.o qla_isr.o qla_gs.o \
 		qla_dbg.o qla_sup.o qla_attr.o qla_mid.o qla_dfs.o qla_bsg.o \
-		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o
+		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o \
+		qla_edif.o

diff --git a/scst/qla2x00t-32gbit/Makefile b/scst/qla2x00t-32gbit/Makefile
index 30c8bec..ddcaafc 100644
--- a/scst/qla2x00t-32gbit/Makefile
+++ b/scst/qla2x00t-32gbit/Makefile

@@ -4,7 +4,8 @@
 
 qla2xxx-y := qla_os.o qla_init.o qla_mbx.o qla_iocb.o qla_isr.o qla_gs.o \
 		qla_dbg.o qla_sup.o qla_attr.o qla_mid.o qla_dfs.o qla_bsg.o \
-		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o
+		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o \
+		qla_edif.o
 
 obj-$(CONFIG_SCSI_QLA_FC) += qla2xxx.o
 
@@ -49,7 +50,7 @@
 all:
 	$(MAKE) -C $(KDIR) M=$(shell pwd)				\
 	  $(shell [ -n "$(PASS_CC_TO_MAKE)" ] && echo CC="$(CC)")	\
-	  $(CONFIG_SCSI_QLA2XXX_TARGET)=CONFIG_SCSI_QLA2XXX_TARGET
+	  CONFIG_SCSI_QLA2XXX_TARGET=$(CONFIG_SCSI_QLA2XXX_TARGET)
 
 install: all
 	KDIR=$(KDIR) ../scripts/sign-modules

diff --git a/scst/qla2x00t-32gbit/Makefile_in-tree b/scst/qla2x00t-32gbit/Makefile_in-tree
index 9017a0a..d6f65c4 100644
--- a/scst/qla2x00t-32gbit/Makefile_in-tree
+++ b/scst/qla2x00t-32gbit/Makefile_in-tree

@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 
-qla2xxx-y := qla_os.o qla_init.o qla_mbx.o qla_iocb.o qla_isr.o qla_gs.o \
+qla2xxx-y := \
+		qla_os.o qla_init.o qla_mbx.o qla_iocb.o qla_isr.o qla_gs.o \
 		qla_dbg.o qla_sup.o qla_attr.o qla_mid.o qla_dfs.o qla_bsg.o \
-		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o
+		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o \
+		qla_edif.o
 qla2x00tgt-objs := qla_tgt.o scst_qla2xxx.o
 
 obj-$(CONFIG_SCSI_QLA_FC) += qla2xxx.o

diff --git a/scst/qla2x00t-32gbit/btree-backport.h b/scst/qla2x00t-32gbit/btree-backport.h
deleted file mode 100644
index e1800bd..0000000
--- a/scst/qla2x00t-32gbit/btree-backport.h
+++ /dev/null

@@ -1,286 +0,0 @@
-#ifndef _BTREE_BACKPORT_H_
-#define _BTREE_BACKPORT_H_
-
-struct btree_head32 {
-	struct list_head	head;
-};
-
-struct btree_node32 {
-	struct list_head	entry;
-	u32			key;
-	void			*val;
-};
-
-struct btree_head64 {
-	struct list_head	head;
-};
-
-struct btree_node64 {
-	struct list_head	entry;
-	u64			key;
-	void			*val;
-};
-
-/**
- * btree_init - initialise a btree
- *
- * @head: the btree head to initialise
- *
- * This function allocates the memory pool that the
- * btree needs. Returns zero or a negative error code
- * (-%ENOMEM) when memory allocation fails.
- */
-static inline int __must_check btree_init32(struct btree_head32 *head)
-{
-	INIT_LIST_HEAD(&head->head);
-	return 0;
-}
-
-static inline int __must_check btree_init64(struct btree_head64 *head)
-{
-	INIT_LIST_HEAD(&head->head);
-	return 0;
-}
-
-/**
- * btree_destroy - destroy mempool
- *
- * @head: the btree head to destroy
- *
- * This function destroys the internal memory pool, use only
- * when using btree_init(), not with btree_init_mempool().
- */
-static inline void btree_destroy32(struct btree_head32 *head)
-{
-}
-
-static inline void btree_destroy64(struct btree_head64 *head)
-{
-}
-
-/**
- * btree_lookup - look up a key in the btree
- *
- * @head: the btree to look in
- * @geo: the btree geometry
- * @key: the key to look up
- *
- * This function returns the value for the given key, or %NULL.
- */
-static inline void *btree_lookup32(struct btree_head32 *head, u32 key)
-{
-	struct btree_node32 *n;
-
-	list_for_each_entry(n, &head->head, entry) {
-		if (n->key == key)
-			return n->val;
-	}
-	return NULL;
-}
-
-static inline void *btree_lookup64(struct btree_head64 *head, u64 key)
-{
-	struct btree_node64 *n;
-
-	list_for_each_entry(n, &head->head, entry) {
-		if (n->key == key)
-			return n->val;
-	}
-	return NULL;
-}
-
-/**
- * btree_insert - insert an entry into the btree
- *
- * @head: the btree to add to
- * @geo: the btree geometry
- * @key: the key to add (must not already be present)
- * @val: the value to add (must not be %NULL)
- * @gfp: allocation flags for node allocations
- *
- * This function returns 0 if the item could be added, or an
- * error code if it failed (may fail due to memory pressure).
- */
-static inline int __must_check btree_insert32(struct btree_head32 *head,
-				u32 key, void *val, gfp_t gfp)
-{
-	struct btree_node32 *n, *p;
-
-	n = kmalloc(sizeof(*n), gfp);
-	if (IS_ERR(n))
-		return PTR_ERR(n);
-	n->key = key;
-	n->val = val;
-	list_for_each_entry(p, &head->head, entry) {
-		if (p->key > key)
-			break;
-	}
-	list_add(&n->entry, p->entry.prev);
-	return 0;
-}
-
-static inline int __must_check btree_insert64(struct btree_head64 *head,
-				u64 key, void *val, gfp_t gfp)
-{
-	struct btree_node64 *n, *p;
-
-	n = kmalloc(sizeof(*n), gfp);
-	if (IS_ERR(n))
-		return PTR_ERR(n);
-	n->key = key;
-	n->val = val;
-	list_for_each_entry(p, &head->head, entry) {
-		if (p->key > key)
-			break;
-	}
-	list_add(&n->entry, p->entry.prev);
-	return 0;
-}
-
-/**
- * btree_update - update an entry in the btree
- *
- * @head: the btree to update
- * @geo: the btree geometry
- * @key: the key to update
- * @val: the value to change it to (must not be %NULL)
- *
- * This function returns 0 if the update was successful, or
- * -%ENOENT if the key could not be found.
- */
-static inline int btree_update32(struct btree_head32 *head, u32 key, void *val)
-{
-	struct btree_node32 *p;
-
-	list_for_each_entry(p, &head->head, entry) {
-		if (p->key == key) {
-			p->val = val;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-/**
- * btree_remove - remove an entry from the btree
- *
- * @head: the btree to update
- * @geo: the btree geometry
- * @key: the key to remove
- *
- * This function returns the removed entry, or %NULL if the key
- * could not be found.
- */
-static inline void *btree_remove32(struct btree_head32 *head, u32 key)
-{
-	struct btree_node32 *p;
-	void *val;
-
-	list_for_each_entry(p, &head->head, entry) {
-		if (p->key == key) {
-			val = p->val;
-			list_del(&p->entry);
-			kfree(p);
-			return val;
-		}
-	}
-	return NULL;
-}
-
-static inline void *btree_remove64(struct btree_head64 *head, u64 key)
-{
-	struct btree_node64 *p;
-	void *val;
-
-	list_for_each_entry(p, &head->head, entry) {
-		if (p->key == key) {
-			val = p->val;
-			list_del(&p->entry);
-			kfree(p);
-			return val;
-		}
-	}
-	return NULL;
-}
-
-/**
- * btree_last - get last entry in btree
- *
- * @head: btree head
- * @geo: btree geometry
- * @key: last key
- *
- * Returns the last entry in the btree, and sets @key to the key
- * of that entry; returns NULL if the tree is empty, in that case
- * key is not changed.
- */
-static inline void *btree_last32(struct btree_head32 *head, u32 *key)
-{
-	struct btree_node32 *p;
-
-	if (list_empty(&head->head))
-		return NULL;
-	p = list_last_entry(&head->head, typeof(*p), entry);
-	*key = p->key;
-	return p->val;
-}
-
-static inline void *btree_last64(struct btree_head64 *head, u64 *key)
-{
-	struct btree_node64 *p;
-
-	if (list_empty(&head->head))
-		return NULL;
-	p = list_last_entry(&head->head, typeof(*p), entry);
-	*key = p->key;
-	return p->val;
-}
-
-/**
- * btree_get_prev - get previous entry
- *
- * @head: btree head
- * @geo: btree geometry
- * @key: pointer to key
- *
- * The function returns the next item right before the value pointed to by
- * @key, and updates @key with its key, or returns %NULL when there is no
- * entry with a key smaller than the given key.
- */
-static inline void *btree_get_prev32(struct btree_head32 *head, u32 *key)
-{
-	struct btree_node32 *p;
-
-	list_for_each_entry_reverse(p, &head->head, entry) {
-		if (p->key < *key) {
-			*key = p->key;
-			return p->val;
-		}
-	}
-	return NULL;
-}
-
-static inline void *btree_get_prev64(struct btree_head64 *head, u64 *key)
-{
-	struct btree_node64 *p;
-
-	list_for_each_entry_reverse(p, &head->head, entry) {
-		if (p->key < *key) {
-			*key = p->key;
-			return p->val;
-		}
-	}
-	return NULL;
-}
-
-#define btree_for_each_safe32(head, key, val)	\
-	for (val = btree_last32(head, &key);	\
-	     val;				\
-	     val = btree_get_prev32(head, &key))
-
-#define btree_for_each_safe64(head, key, val)	\
-	for (val = btree_last64(head, &key);	\
-	     val;				\
-	     val = btree_get_prev64(head, &key))
-
-#endif /* _BTREE_BACKPORT_H_ */

diff --git a/scst/qla2x00t-32gbit/qla2x00-target/qla_tgt.c b/scst/qla2x00t-32gbit/qla2x00-target/qla_tgt.c
index 03fb20c..36b2896 100644
--- a/scst/qla2x00t-32gbit/qla2x00-target/qla_tgt.c
+++ b/scst/qla2x00t-32gbit/qla2x00-target/qla_tgt.c

@@ -47,9 +47,10 @@
 
 size_t qlt_add_vtarget(u64 port_name, u64 node_name, u64 parent_host)
 {
+	struct fc_vport *vport;
 	struct Scsi_Host *shost = NULL;
+	scsi_qla_host_t *vha = NULL, *npiv_vha;
 	struct qla_tgt *tgt;
-	scsi_qla_host_t *vha = NULL;
 	struct fc_vport_identifiers vid;
 	uint8_t parent_wwn[WWN_SIZE];
 
@@ -78,8 +79,14 @@
 	vid.disable = false;            /* always enabled */
 
 	/* We only allow support on Channel 0 !!! */
-	if (!fc_vport_create(shost, 0, &vid))
+	vport = fc_vport_create(shost, 0, &vid);
+	if (!vport) {
+		pr_err("fc_vport_create failed for qla2xxx_npiv\n");
 		return -EINVAL;
+	}
+
+	npiv_vha = (struct scsi_qla_host *) vport->dd_data;
+	scsi_host_get(npiv_vha->host);
 
 	return 0;
 }

diff --git a/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
index de20da8..045d57c 100644
--- a/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
+++ b/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c

@@ -487,8 +487,7 @@
 	      vha->host_no, vha->vp_idx, cmd, cmd->atio.u.isp24.exchange_addr,
 	      scst_cmd_get_queue_type(cmd->scst_cmd));
 
-	/* we're being call by wq, so do direct */
-	scst_cmd_init_done(cmd->scst_cmd, SCST_CONTEXT_DIRECT);
+	scst_cmd_init_done(cmd->scst_cmd, scst_work_context);
 
 out:
 	TRACE_EXIT_RES(res);
@@ -667,6 +666,26 @@
 	return res;
 }
 
+static struct qla_tgt_cmd *
+sqa_qla2xxx_find_cmd_by_tag(struct fc_port *fcport, uint64_t tag)
+{
+	struct scst_session *sess = fcport->se_sess->fabric_sess_ptr;
+	struct qla_tgt_cmd *qla_cmd = NULL;
+	struct scst_cmd *cmd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sess->sess_list_lock, flags);
+	list_for_each_entry(cmd, &sess->sess_cmd_list, sess_cmd_list_entry) {
+		if (cmd->tag == tag) {
+			qla_cmd = scst_cmd_get_tgt_priv(cmd);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&sess->sess_list_lock, flags);
+
+	return qla_cmd;
+}
+
 static void sqa_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
 {
 	struct scst_cmd *scst_cmd = cmd->scst_cmd;
@@ -905,7 +924,16 @@
 static ssize_t sqa_hw_target_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", 1);
+	struct scst_tgt *scst_tgt;
+	struct sqa_scst_tgt *sqa_tgt;
+	struct qla_tgt *tgt;
+
+	scst_tgt = container_of(kobj, struct scst_tgt, tgt_kobj);
+	sqa_tgt = scst_tgt_get_tgt_priv(scst_tgt);
+
+	tgt = sqa_tgt->qla_tgt;
+
+	return sprintf(buf, "%d\n", (tgt->vha->vp_idx == 0) ? 1 : 0);
 }
 
 static ssize_t sqa_node_name_show(struct kobject *kobj,
@@ -1294,8 +1322,7 @@
 #else
 	sqa_tgt->tag_num = tag_num;
 	sqa_tgt->tgt_tag_pool = kzalloc(BITS_TO_LONGS(tag_num), GFP_KERNEL);
-	res = IS_ERR(sqa_tgt->tgt_tag_pool) ? PTR_ERR(sqa_tgt->tgt_tag_pool) :
-		0;
+	res = PTR_ERR_OR_ZERO(sqa_tgt->tgt_tag_pool);
 #endif
 	if (res < 0) {
 		pr_err("Unable to init se_sess->tgt_tag_pool, tag_num: %u\n",
@@ -1408,6 +1435,39 @@
 	scst_unregister_target(sqa_tgt->scst_tgt);
 	TRACE_EXIT();
 }
+
+static void sqa_qla2xxx_drop_lport(struct qla_tgt *tgt)
+{
+	struct scsi_qla_host *vha = tgt->vha;
+
+	TRACE_ENTRY();
+
+	if (vha->vha_tgt.qla_tgt->tgt_stop &&
+			!vha->vha_tgt.qla_tgt->tgt_stopped) {
+		PRINT_INFO("sqatgt(%ld/%d): calling qlt_stop_phase2.\n",
+				vha->host_no, vha->vp_idx);
+		qlt_stop_phase2(vha->vha_tgt.qla_tgt);
+	}
+
+	qlt_lport_deregister(vha);
+
+	TRACE_EXIT();
+}
+
+static void sqa_qla2xxx_npiv_drop_lport(struct qla_tgt *tgt)
+{
+	struct scsi_qla_host *npiv_vha = tgt->vha;
+	struct qla_hw_data *ha = npiv_vha->hw;
+	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+
+	TRACE_ENTRY();
+
+	scsi_host_put(npiv_vha->host);
+	scsi_host_put(base_vha->host);
+
+	TRACE_EXIT();
+}
+
 /*
  * Must be called under tgt_host_action_mutex or sqa_unreg_rwsem write
  * locked.
@@ -1421,21 +1481,20 @@
 	TRACE_ENTRY();
 
 	if (vha->vha_tgt.target_lport_ptr) {
+
 		if (!vha->vha_tgt.qla_tgt->tgt_stop &&
-		    !vha->vha_tgt.qla_tgt->tgt_stopped) {
+				!vha->vha_tgt.qla_tgt->tgt_stopped) {
 			PRINT_INFO("sqatgt(%ld:%d: calling qlt_stop_phase1.\n",
-			vha->host_no, vha->vp_idx);
+					vha->host_no, vha->vp_idx);
 			qlt_stop_phase1(vha->vha_tgt.qla_tgt);
 		}
 
-		if (vha->vha_tgt.qla_tgt->tgt_stop &&
-		    !vha->vha_tgt.qla_tgt->tgt_stopped) {
-			PRINT_INFO("sqatgt(%ld/%d): calling qlt_stop_phase2.\n",
-			    vha->host_no, vha->vp_idx);
-			qlt_stop_phase2(vha->vha_tgt.qla_tgt);
-		}
-		qlt_lport_deregister(tgt->vha);
+		if (vha->vp_idx)
+			sqa_qla2xxx_npiv_drop_lport(tgt);
+		else
+			sqa_qla2xxx_drop_lport(tgt);
 	}
+
 	scst_tgt_set_tgt_priv(scst_tgt, NULL);
 
 	mutex_lock(&sqa_mutex);
@@ -1444,7 +1503,7 @@
 	mutex_unlock(&sqa_mutex);
 
 	TRACE(TRACE_MGMT, "sqatgt(%ld/%d): Target release finished sqa_tgt %p",
-	    vha->host_no, tgt->vha->vp_idx, sqa_tgt);
+	    vha->host_no, vha->vp_idx, sqa_tgt);
 
 	kfree(sqa_tgt);
 
@@ -1859,6 +1918,7 @@
 	.handle_cmd		    = sqa_qla2xxx_handle_cmd,
 	.handle_data		    = sqa_qla2xxx_handle_data,
 	.handle_tmr		    = sqa_qla2xxx_handle_tmr,
+	.find_cmd_by_tag	    = sqa_qla2xxx_find_cmd_by_tag,
 	.get_cmd		    = sqa_qla2xxx_get_cmd,
 	.rel_cmd		    = sqa_qla2xxx_rel_cmd,
 	.free_cmd		    = sqa_qla2xxx_free_cmd,
@@ -2113,7 +2173,6 @@
 				vha->host_no);
 
 			qlt_stop_phase1(sqa_tgt->qla_tgt);
-			scst_unregister_target(sqa_tgt->scst_tgt);
 			qlt_del_vtarget(wwn_to_u64(vha->port_name));
 		}
 	}

diff --git a/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.h b/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.h
index a3bf655..8ff88f5 100644
--- a/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.h
+++ b/scst/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.h

@@ -19,8 +19,8 @@
 
 /* Driver version number */
 #define Q2T_VERSION(a, b, c, d)	(((a) << 030) + ((b) << 020) + (c) << 010 + (d))
-#define Q2T_VERSION_CODE	Q2T_VERSION(3, 5, 0, 0)
-#define Q2T_VERSION_STRING	"3.5.0"
+#define Q2T_VERSION_CODE	Q2T_VERSION(3, 7, 0, 0)
+#define Q2T_VERSION_STRING	"3.7.0"
 
 #define SQA_DEFAULT_TAGS 2048
 

diff --git a/scst/qla2x00t-32gbit/qla_attr.c b/scst/qla2x00t-32gbit/qla_attr.c
index 469f0ee..67e2133 100644
--- a/scst/qla2x00t-32gbit/qla_attr.c
+++ b/scst/qla2x00t-32gbit/qla_attr.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_target.h"
@@ -157,6 +156,14 @@
 			       vha->host_no);
 		}
 		break;
+	case 10:
+		if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+			ql_log(ql_log_info, vha, 0x70e9,
+			       "Issuing MPI firmware dump on host#%ld.\n",
+			       vha->host_no);
+			ha->isp_ops->mpi_fw_dump(vha, 0);
+		}
+		break;
 	}
 	return count;
 }
@@ -548,7 +555,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EINVAL;
 
-	if (IS_NOCACHE_VPD_TYPE(ha))
+	if (!IS_NOCACHE_VPD_TYPE(ha))
 		goto skip;
 
 	faddr = ha->flt_region_vpd << 2;
@@ -703,6 +710,12 @@
 		ql_log(ql_log_info, vha, 0x706e,
 		    "Issuing ISP reset.\n");
 
+		if (vha->hw->flags.port_isolated) {
+			ql_log(ql_log_info, vha, 0x706e,
+			       "Port is isolated, returning.\n");
+			return -EINVAL;
+		}
+
 		scsi_block_requests(vha->host);
 		if (IS_QLA82XX(ha)) {
 			ha->flags.isp82xx_no_md_cap = 1;
@@ -732,7 +745,7 @@
 		ql_log(ql_log_info, vha, 0x706f,
 		    "Issuing MPI reset.\n");
 
-		if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+		if (IS_QLA83XX(ha)) {
 			uint32_t idc_control;
 
 			qla83xx_idc_lock(vha, 0);
@@ -744,8 +757,6 @@
 			qla83xx_idc_audit(vha, IDC_AUDIT_TIMESTAMP);
 			qla83xx_idc_unlock(vha, 0);
 			break;
-		} else if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
-			qla27xx_reset_mpi(vha);
 		} else {
 			/* Make sure FC side is not in reset */
 			WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) !=
@@ -940,9 +951,9 @@
 	if (!capable(CAP_SYS_ADMIN) || off != 0 || count > DCBX_TLV_DATA_SIZE)
 		return 0;
 
+	mutex_lock(&vha->hw->optrom_mutex);
 	if (ha->dcbx_tlv)
 		goto do_read;
-	mutex_lock(&vha->hw->optrom_mutex);
 	if (qla2x00_chip_is_down(vha)) {
 		mutex_unlock(&vha->hw->optrom_mutex);
 		return 0;
@@ -1045,9 +1056,6 @@
 			continue;
 		if (iter->type == 3 && !(IS_CNA_CAPABLE(ha)))
 			continue;
-		if (iter->type == 0x27 &&
-		    (!IS_QLA27XX(ha) || !IS_QLA28XX(ha)))
-			continue;
 
 		sysfs_remove_bin_file(&host->shost_gendev.kobj,
 		    iter->attr);
@@ -1857,6 +1865,18 @@
 	return strlen(buf);
 }
 
+static const struct {
+	u16 rate;
+	char *str;
+} port_speed_str[] = {
+	{ PORT_SPEED_4GB, "4" },
+	{ PORT_SPEED_8GB, "8" },
+	{ PORT_SPEED_16GB, "16" },
+	{ PORT_SPEED_32GB, "32" },
+	{ PORT_SPEED_64GB, "64" },
+	{ PORT_SPEED_10GB, "10" },
+};
+
 static ssize_t
 qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr,
     char *buf)
@@ -1864,7 +1884,8 @@
 	struct scsi_qla_host *vha = shost_priv(dev_to_shost(dev));
 	struct qla_hw_data *ha = vha->hw;
 	ssize_t rval;
-	char *spd[7] = {"0", "0", "0", "4", "8", "16", "32"};
+	u16 i;
+	char *speed = "Unknown";
 
 	rval = qla2x00_get_data_rate(vha);
 	if (rval != QLA_SUCCESS) {
@@ -1873,9 +1894,40 @@
 		return -EINVAL;
 	}
 
-	return scnprintf(buf, PAGE_SIZE, "%s\n", spd[ha->link_data_rate]);
+	for (i = 0; i < ARRAY_SIZE(port_speed_str); i++) {
+		if (port_speed_str[i].rate != ha->link_data_rate)
+			continue;
+		speed = port_speed_str[i].str;
+		break;
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", speed);
 }
 
+static ssize_t
+qla2x00_mpi_pause_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+	int rval = 0;
+
+	if (sscanf(buf, "%d", &rval) != 1)
+		return -EINVAL;
+
+	ql_log(ql_log_warn, vha, 0x7089, "Pausing MPI...\n");
+
+	rval = qla83xx_wr_reg(vha, 0x002012d4, 0x30000001);
+
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x708a, "Unable to pause MPI.\n");
+		count = 0;
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR(mpi_pause, S_IWUSR, NULL, qla2x00_mpi_pause_store);
+
 /* ----- */
 
 static ssize_t
@@ -2425,7 +2477,7 @@
 static DEVICE_ATTR(port_no, 0444, qla2x00_port_no_show, NULL);
 static DEVICE_ATTR(fw_attr, 0444, qla2x00_fw_attr_show, NULL);
 
-
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_driver_version,
 	&dev_attr_fw_version,
@@ -2469,6 +2521,7 @@
 	&dev_attr_port_no,
 	&dev_attr_fw_attr,
 	&dev_attr_dport_diagnostics,
+	&dev_attr_mpi_pause,
 	NULL, /* reserve for qlini_mode */
 	NULL, /* reserve for ql2xiniexchg */
 	NULL, /* reserve for ql2xexchoffld */
@@ -2489,6 +2542,78 @@
 	attr++;
 	*attr = &dev_attr_ql2xexchoffld;
 }
+#else
+static struct attribute *qla2x00_host_attrs[] = {
+	&dev_attr_driver_version.attr,
+	&dev_attr_fw_version.attr,
+	&dev_attr_serial_num.attr,
+	&dev_attr_isp_name.attr,
+	&dev_attr_isp_id.attr,
+	&dev_attr_model_name.attr,
+	&dev_attr_model_desc.attr,
+	&dev_attr_pci_info.attr,
+	&dev_attr_link_state.attr,
+	&dev_attr_zio.attr,
+	&dev_attr_zio_timer.attr,
+	&dev_attr_beacon.attr,
+	&dev_attr_beacon_config.attr,
+	&dev_attr_optrom_bios_version.attr,
+	&dev_attr_optrom_efi_version.attr,
+	&dev_attr_optrom_fcode_version.attr,
+	&dev_attr_optrom_fw_version.attr,
+	&dev_attr_84xx_fw_version.attr,
+	&dev_attr_total_isp_aborts.attr,
+	&dev_attr_serdes_version.attr,
+	&dev_attr_mpi_version.attr,
+	&dev_attr_phy_version.attr,
+	&dev_attr_flash_block_size.attr,
+	&dev_attr_vlan_id.attr,
+	&dev_attr_vn_port_mac_address.attr,
+	&dev_attr_fabric_param.attr,
+	&dev_attr_fw_state.attr,
+	&dev_attr_optrom_gold_fw_version.attr,
+	&dev_attr_thermal_temp.attr,
+	&dev_attr_diag_requests.attr,
+	&dev_attr_diag_megabytes.attr,
+	&dev_attr_fw_dump_size.attr,
+	&dev_attr_allow_cna_fw_dump.attr,
+	&dev_attr_pep_version.attr,
+	&dev_attr_min_supported_speed.attr,
+	&dev_attr_max_supported_speed.attr,
+	&dev_attr_zio_threshold.attr,
+	&dev_attr_dif_bundle_statistics.attr,
+	&dev_attr_port_speed.attr,
+	&dev_attr_port_no.attr,
+	&dev_attr_fw_attr.attr,
+	&dev_attr_dport_diagnostics.attr,
+	&dev_attr_mpi_pause.attr,
+	&dev_attr_qlini_mode.attr,
+	&dev_attr_ql2xiniexchg.attr,
+	&dev_attr_ql2xexchoffld.attr,
+	NULL,
+};
+
+static umode_t qla_host_attr_is_visible(struct kobject *kobj,
+					struct attribute *attr, int i)
+{
+	if (ql2x_ini_mode != QLA2XXX_INI_MODE_DUAL &&
+	    (attr == &dev_attr_qlini_mode.attr ||
+	     attr == &dev_attr_ql2xiniexchg.attr ||
+	     attr == &dev_attr_ql2xexchoffld.attr))
+		return 0;
+	return attr->mode;
+}
+
+static const struct attribute_group qla2x00_host_attr_group = {
+	.is_visible = qla_host_attr_is_visible,
+	.attrs = qla2x00_host_attrs
+};
+
+const struct attribute_group *qla2x00_host_groups[] = {
+	&qla2x00_host_attr_group,
+	NULL
+};
+#endif
 
 /* Host attributes. */
 
@@ -2637,7 +2762,17 @@
 static inline void
 qla2x00_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
 {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
+	fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
+#endif
+
 	rport->dev_loss_tmo = timeout ? timeout : 1;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
+	if (IS_ENABLED(CONFIG_NVME_FC) && fcport && fcport->nvme_remote_port)
+		nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port,
+					       rport->dev_loss_tmo);
+#endif
 }
 
 static void
@@ -2650,17 +2785,27 @@
 	if (!fcport)
 		return;
 
-	/* Now that the rport has been deleted, set the fcport state to
-	   FCS_DEVICE_DEAD */
-	qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
+	ql_dbg(ql_dbg_async, fcport->vha, 0x5101,
+	       DBG_FCPORT_PRFMT(fcport, "dev_loss_tmo expiry, rport_state=%d",
+				rport->port_state));
+
+	/*
+	 * Now that the rport has been deleted, set the fcport state to
+	 * FCS_DEVICE_DEAD, if the fcport is still lost.
+	 */
+	if (fcport->scan_state != QLA_FCPORT_FOUND)
+		qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
 
 	/*
 	 * Transport has effectively 'deleted' the rport, clear
 	 * all local references.
 	 */
 	spin_lock_irqsave(host->host_lock, flags);
-	fcport->rport = fcport->drport = NULL;
-	*((fc_port_t **)rport->dd_data) = NULL;
+	/* Confirm port has not reappeared before clearing pointers. */
+	if (rport->port_state != FC_PORTSTATE_ONLINE) {
+		fcport->rport = fcport->drport = NULL;
+		*((fc_port_t **)rport->dd_data) = NULL;
+	}
 	spin_unlock_irqrestore(host->host_lock, flags);
 
 	if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
@@ -2693,14 +2838,24 @@
 	/*
 	 * At this point all fcport's software-states are cleared.  Perform any
 	 * final cleanup of firmware resources (PCBs and XCBs).
+	 *
+	 * Attempt to cleanup only lost devices.
 	 */
 	if (fcport->loop_id != FC_NO_LOOP_ID) {
-		if (IS_FWI2_CAPABLE(fcport->vha->hw))
-			fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
-			    fcport->loop_id, fcport->d_id.b.domain,
-			    fcport->d_id.b.area, fcport->d_id.b.al_pa);
-		else
+		if (IS_FWI2_CAPABLE(fcport->vha->hw) &&
+		    fcport->scan_state != QLA_FCPORT_FOUND) {
+			if (fcport->loop_id != FC_NO_LOOP_ID)
+				fcport->logout_on_delete = 1;
+
+			if (!EDIF_NEGOTIATION_PENDING(fcport)) {
+				ql_dbg(ql_dbg_disc, fcport->vha, 0x911e,
+				       "%s %d schedule session deletion\n", __func__,
+				       __LINE__);
+				qlt_schedule_sess_for_deletion(fcport);
+			}
+		} else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) {
 			qla2x00_port_logout(fcport->vha, fcport);
+		}
 	}
 }
 
@@ -2712,6 +2867,9 @@
 	if (IS_QLAFX00(vha->hw))
 		return 0;
 
+	if (vha->hw->flags.port_isolated)
+		return 0;
+
 	qla2x00_loop_reset(vha);
 	return 0;
 }
@@ -2726,6 +2884,9 @@
 	struct link_statistics *stats;
 	dma_addr_t stats_dma;
 	struct fc_host_statistics *p = &vha->fc_host_stat;
+	struct qla_qpair *qpair;
+	int i;
+	u64 ib = 0, ob = 0, ir = 0, or = 0;
 
 	memset(p, -1, sizeof(*p));
 
@@ -2762,6 +2923,27 @@
 	if (rval != QLA_SUCCESS)
 		goto done_free;
 
+	/* --- */
+	for (i = 0; i < vha->hw->max_qpairs; i++) {
+		qpair = vha->hw->queue_pair_map[i];
+		if (!qpair)
+			continue;
+		ir += qpair->counters.input_requests;
+		or += qpair->counters.output_requests;
+		ib += qpair->counters.input_bytes;
+		ob += qpair->counters.output_bytes;
+	}
+	ir += ha->base_qpair->counters.input_requests;
+	or += ha->base_qpair->counters.output_requests;
+	ib += ha->base_qpair->counters.input_bytes;
+	ob += ha->base_qpair->counters.output_bytes;
+
+	ir += vha->qla_stats.input_requests;
+	or += vha->qla_stats.output_requests;
+	ib += vha->qla_stats.input_bytes;
+	ob += vha->qla_stats.output_bytes;
+	/* --- */
+
 	p->link_failure_count = le32_to_cpu(stats->link_fail_cnt);
 	p->loss_of_sync_count = le32_to_cpu(stats->loss_sync_cnt);
 	p->loss_of_signal_count = le32_to_cpu(stats->loss_sig_cnt);
@@ -2781,15 +2963,16 @@
 			p->rx_words = le64_to_cpu(stats->fpm_recv_word_cnt);
 			p->tx_words = le64_to_cpu(stats->fpm_xmit_word_cnt);
 		} else {
-			p->rx_words = vha->qla_stats.input_bytes;
-			p->tx_words = vha->qla_stats.output_bytes;
+			p->rx_words = ib >> 2;
+			p->tx_words = ob >> 2;
 		}
 	}
+
 	p->fcp_control_requests = vha->qla_stats.control_requests;
-	p->fcp_input_requests = vha->qla_stats.input_requests;
-	p->fcp_output_requests = vha->qla_stats.output_requests;
-	p->fcp_input_megabytes = vha->qla_stats.input_bytes >> 20;
-	p->fcp_output_megabytes = vha->qla_stats.output_bytes >> 20;
+	p->fcp_input_requests = ir;
+	p->fcp_output_requests = or;
+	p->fcp_input_megabytes  = ib >> 20;
+	p->fcp_output_megabytes = ob >> 20;
 	p->seconds_since_last_reset =
 	    get_jiffies_64() - vha->qla_stats.jiffies_at_last_reset;
 	do_div(p->seconds_since_last_reset, HZ);
@@ -2809,9 +2992,18 @@
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 	struct link_statistics *stats;
 	dma_addr_t stats_dma;
+	int i;
+	struct qla_qpair *qpair;
 
 	memset(&vha->qla_stats, 0, sizeof(vha->qla_stats));
 	memset(&vha->fc_host_stat, 0, sizeof(vha->fc_host_stat));
+	for (i = 0; i < vha->hw->max_qpairs; i++) {
+		qpair = vha->hw->queue_pair_map[i];
+		if (!qpair)
+			continue;
+		memset(&qpair->counters, 0, sizeof(qpair->counters));
+	}
+	memset(&ha->base_qpair->counters, 0, sizeof(qpair->counters));
 
 	vha->qla_stats.jiffies_at_last_reset = get_jiffies_64();
 
@@ -2828,7 +3020,10 @@
 
 		/* reset firmware statistics */
 		rval = qla24xx_get_isp_stats(base_vha, stats, stats_dma, BIT_0);
-		WARN_ONCE(rval != QLA_SUCCESS, "rval = %d\n", rval);
+		if (rval != QLA_SUCCESS)
+			ql_log(ql_log_warn, vha, 0x70de,
+			       "Resetting ISP statistics failed: rval = %d\n",
+			       rval);
 
 		dma_free_coherent(&ha->pdev->dev, sizeof(*stats),
 		    stats, stats_dma);
@@ -3056,6 +3251,9 @@
 	qla2x00_wait_for_sess_deletion(vha);
 
 	qla_nvme_delete(vha);
+	qla_enode_stop(vha);
+	qla_edb_stop(vha);
+
 	vha->flags.delete_progress = 1;
 
 	qlt_remove_target(ha, vha);
@@ -3203,11 +3401,34 @@
 	.bsg_timeout = qla24xx_bsg_timeout,
 };
 
+static uint
+qla2x00_get_host_supported_speeds(scsi_qla_host_t *vha, uint speeds)
+{
+	uint supported_speeds = FC_PORTSPEED_UNKNOWN;
+
+	if (speeds & FDMI_PORT_SPEED_64GB)
+		supported_speeds |= FC_PORTSPEED_64GBIT;
+	if (speeds & FDMI_PORT_SPEED_32GB)
+		supported_speeds |= FC_PORTSPEED_32GBIT;
+	if (speeds & FDMI_PORT_SPEED_16GB)
+		supported_speeds |= FC_PORTSPEED_16GBIT;
+	if (speeds & FDMI_PORT_SPEED_8GB)
+		supported_speeds |= FC_PORTSPEED_8GBIT;
+	if (speeds & FDMI_PORT_SPEED_4GB)
+		supported_speeds |= FC_PORTSPEED_4GBIT;
+	if (speeds & FDMI_PORT_SPEED_2GB)
+		supported_speeds |= FC_PORTSPEED_2GBIT;
+	if (speeds & FDMI_PORT_SPEED_1GB)
+		supported_speeds |= FC_PORTSPEED_1GBIT;
+
+	return supported_speeds;
+}
+
 void
 qla2x00_init_host_attr(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	u32 speeds = FC_PORTSPEED_UNKNOWN;
+	u32 speeds = 0, fdmi_speed = 0;
 
 	fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count;
 	fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name);
@@ -3217,46 +3438,8 @@
 	fc_host_max_npiv_vports(vha->host) = ha->max_npiv_vports;
 	fc_host_npiv_vports_inuse(vha->host) = ha->cur_vport_count;
 
-	if (IS_CNA_CAPABLE(ha))
-		speeds = FC_PORTSPEED_10GBIT;
-	else if (IS_QLA28XX(ha) || IS_QLA27XX(ha)) {
-		if (ha->max_supported_speed == 2) {
-			if (ha->min_supported_speed <= 6)
-				speeds |= FC_PORTSPEED_64GBIT;
-		}
-		if (ha->max_supported_speed == 2 ||
-		    ha->max_supported_speed == 1) {
-			if (ha->min_supported_speed <= 5)
-				speeds |= FC_PORTSPEED_32GBIT;
-		}
-		if (ha->max_supported_speed == 2 ||
-		    ha->max_supported_speed == 1 ||
-		    ha->max_supported_speed == 0) {
-			if (ha->min_supported_speed <= 4)
-				speeds |= FC_PORTSPEED_16GBIT;
-		}
-		if (ha->max_supported_speed == 1 ||
-		    ha->max_supported_speed == 0) {
-			if (ha->min_supported_speed <= 3)
-				speeds |= FC_PORTSPEED_8GBIT;
-		}
-		if (ha->max_supported_speed == 0) {
-			if (ha->min_supported_speed <= 2)
-				speeds |= FC_PORTSPEED_4GBIT;
-		}
-	} else if (IS_QLA2031(ha))
-		speeds = FC_PORTSPEED_16GBIT|FC_PORTSPEED_8GBIT|
-			FC_PORTSPEED_4GBIT;
-	else if (IS_QLA25XX(ha) || IS_QLAFX00(ha))
-		speeds = FC_PORTSPEED_8GBIT|FC_PORTSPEED_4GBIT|
-			FC_PORTSPEED_2GBIT|FC_PORTSPEED_1GBIT;
-	else if (IS_QLA24XX_TYPE(ha))
-		speeds = FC_PORTSPEED_4GBIT|FC_PORTSPEED_2GBIT|
-			FC_PORTSPEED_1GBIT;
-	else if (IS_QLA23XX(ha))
-		speeds = FC_PORTSPEED_2GBIT|FC_PORTSPEED_1GBIT;
-	else
-		speeds = FC_PORTSPEED_1GBIT;
+	fdmi_speed = qla25xx_fdmi_port_speed_capability(ha);
+	speeds = qla2x00_get_host_supported_speeds(vha, fdmi_speed);
 
 	fc_host_supported_speeds(vha->host) = speeds;
 }

diff --git a/scst/qla2x00t-32gbit/qla_bsg.c b/scst/qla2x00t-32gbit/qla_bsg.c
index 9943d2d..517f680 100644
--- a/scst/qla2x00t-32gbit/qla_bsg.c
+++ b/scst/qla2x00t-32gbit/qla_bsg.c

@@ -1,10 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
+#include "qla_gbl.h"
 
 #include <linux/kthread.h>
 #include <linux/vmalloc.h>
@@ -12,24 +12,6 @@
 
 #ifdef NEW_LIBFC_API
 #include <linux/bsg-lib.h>
-#else
-static inline struct Scsi_Host *fc_bsg_to_shost(struct fc_bsg_job *job)
-{
-	return job->shost;
-}
-
-static inline struct fc_rport *fc_bsg_to_rport(struct fc_bsg_job *job)
-{
-	return job->rport;
-}
-
-static inline void bsg_job_done_backport(struct fc_bsg_job *job, int result,
-					 unsigned int reply_payload_rcv_len)
-{
-	job->job_done(job);
-}
-
-#define bsg_job_done bsg_job_done_backport
 #endif
 
 static void qla2xxx_free_fcport_work(struct work_struct *work)
@@ -43,27 +25,25 @@
 /* BSG support for ELS/CT pass through */
 void qla2x00_bsg_job_done(srb_t *sp, int res)
 {
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 
+	ql_dbg(ql_dbg_user, sp->vha, 0x7009,
+	    "%s: sp hdl %x, result=%x bsg ptr %p\n",
+	    __func__, sp->handle, res, bsg_job);
+
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
 	bsg_reply->result = res;
 	bsg_job_done(bsg_job, bsg_reply->result,
 		       bsg_reply->reply_payload_rcv_len);
-	sp->free(sp);
 }
 
 void qla2x00_bsg_sp_free(srb_t *sp)
 {
 	struct qla_hw_data *ha = sp->vha->hw;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
 
@@ -81,11 +61,19 @@
 			    bsg_job->reply_payload.sg_list,
 			    bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
 	} else {
-		dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
-		    bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE);
 
-		dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list,
-		    bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
+		if (sp->remap.remapped) {
+			dma_pool_free(ha->purex_dma_pool, sp->remap.rsp.buf,
+			    sp->remap.rsp.dma);
+			dma_pool_free(ha->purex_dma_pool, sp->remap.req.buf,
+			    sp->remap.req.dma);
+		} else {
+			dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
+				bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE);
+
+			dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list,
+				bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
+		}
 	}
 
 	if (sp->type == SRB_CT_CMD ||
@@ -152,13 +140,8 @@
 	return ret;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla24xx_proc_fcp_prio_cfg_cmd(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla24xx_proc_fcp_prio_cfg_cmd(struct bsg_job *bsg_job)
-#endif
+qla24xx_proc_fcp_prio_cfg_cmd(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -285,13 +268,8 @@
 	return ret;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_process_els(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_process_els(struct bsg_job *bsg_job)
-#endif
+qla2x00_process_els(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_rport *rport;
@@ -304,6 +282,7 @@
 	int req_sg_cnt, rsp_sg_cnt;
 	int rval =  (DID_ERROR << 16);
 	uint16_t nextlid = 0;
+	uint32_t els_cmd = 0;
 
 	if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
 		rport = fc_bsg_to_rport(bsg_job);
@@ -317,6 +296,9 @@
 		vha = shost_priv(host);
 		ha = vha->hw;
 		type = "FC_BSG_HST_ELS_NOLOGIN";
+		els_cmd = bsg_request->rqst_data.h_els.command_code;
+		if (els_cmd == ELS_AUTH_ELS)
+			return qla_edif_process_els(vha, bsg_job);
 	}
 
 	if (!vha->flags.online) {
@@ -453,7 +435,7 @@
 	goto done_free_fcport;
 
 done_free_fcport:
-	if (bsg_request->msgcode == FC_BSG_RPT_ELS)
+	if (bsg_request->msgcode != FC_BSG_RPT_ELS)
 		qla2x00_free_fcport(fcport);
 done:
 	return rval;
@@ -473,13 +455,8 @@
 	return iocbs;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_process_ct(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_process_ct(struct bsg_job *bsg_job)
-#endif
+qla2x00_process_ct(BSG_JOB_TYPE *bsg_job)
 {
 	srb_t *sp;
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -756,13 +733,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_process_loopback(struct bsg_job *bsg_job)
-#endif
+qla2x00_process_loopback(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1004,13 +976,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla84xx_reset(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla84xx_reset(struct bsg_job *bsg_job)
-#endif
+qla84xx_reset(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1045,13 +1012,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla84xx_updatefw(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla84xx_updatefw(struct bsg_job *bsg_job)
-#endif
+qla84xx_updatefw(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1160,13 +1122,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla84xx_mgmt_cmd(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla84xx_mgmt_cmd(struct bsg_job *bsg_job)
-#endif
+qla84xx_mgmt_cmd(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1361,13 +1318,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla24xx_iidma(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla24xx_iidma(struct bsg_job *bsg_job)
-#endif
+qla24xx_iidma(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1455,15 +1407,9 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_optrom_setup(struct fc_bsg_job *bsg_job, scsi_qla_host_t *vha,
+qla2x00_optrom_setup(BSG_JOB_TYPE *bsg_job, scsi_qla_host_t *vha,
 	uint8_t is_update)
-#else
-static int
-qla2x00_optrom_setup(struct bsg_job *bsg_job, scsi_qla_host_t *vha,
-	uint8_t is_update)
-#endif
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	uint32_t start = 0;
@@ -1531,13 +1477,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_read_optrom(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_read_optrom(struct bsg_job *bsg_job)
-#endif
+qla2x00_read_optrom(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1573,13 +1514,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_update_optrom(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_update_optrom(struct bsg_job *bsg_job)
-#endif
+qla2x00_update_optrom(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1619,13 +1555,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_update_fru_versions(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_update_fru_versions(struct bsg_job *bsg_job)
-#endif
+qla2x00_update_fru_versions(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1677,13 +1608,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_read_fru_status(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_read_fru_status(struct bsg_job *bsg_job)
-#endif
+qla2x00_read_fru_status(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1733,13 +1659,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_write_fru_status(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_write_fru_status(struct bsg_job *bsg_job)
-#endif
+qla2x00_write_fru_status(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1785,13 +1706,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_write_i2c(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_write_i2c(struct bsg_job *bsg_job)
-#endif
+qla2x00_write_i2c(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1836,13 +1752,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_read_i2c(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_read_i2c(struct bsg_job *bsg_job)
-#endif
+qla2x00_read_i2c(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1891,13 +1802,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla24xx_process_bidir_cmd(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla24xx_process_bidir_cmd(struct bsg_job *bsg_job)
-#endif
+qla24xx_process_bidir_cmd(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2074,13 +1980,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qlafx00_mgmt_cmd(struct fc_bsg_job *bsg_job)
-#else
-static int
-qlafx00_mgmt_cmd(struct bsg_job *bsg_job)
-#endif
+qlafx00_mgmt_cmd(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2202,13 +2103,8 @@
 	return rval;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla26xx_serdes_op(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla26xx_serdes_op(struct bsg_job *bsg_job)
-#endif
+qla26xx_serdes_op(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2249,13 +2145,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla8044_serdes_op(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla8044_serdes_op(struct bsg_job *bsg_job)
-#endif
+qla8044_serdes_op(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2296,13 +2187,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla27xx_get_flash_upd_cap(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla27xx_get_flash_upd_cap(struct bsg_job *bsg_job)
-#endif
+qla27xx_get_flash_upd_cap(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2333,13 +2219,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla27xx_set_flash_upd_cap(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla27xx_set_flash_upd_cap(struct bsg_job *bsg_job)
-#endif
+qla27xx_set_flash_upd_cap(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2384,13 +2265,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla27xx_get_bbcr_data(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla27xx_get_bbcr_data(struct bsg_job *bsg_job)
-#endif
+qla27xx_get_bbcr_data(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2448,13 +2324,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_get_priv_stats(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_get_priv_stats(struct bsg_job *bsg_job)
-#endif
+qla2x00_get_priv_stats(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -2512,13 +2383,8 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_do_dport_diagnostics(struct fc_bsg_job *bsg_job)
-#else
-static int
-qla2x00_do_dport_diagnostics(struct bsg_job *bsg_job)
-#endif
+qla2x00_do_dport_diagnostics(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -2561,13 +2427,91 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_get_flash_image_status(struct fc_bsg_job *bsg_job)
-#else
+qla2x00_do_dport_diagnostics_v2(BSG_JOB_TYPE *bsg_job)
+{
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
+	scsi_qla_host_t *vha = shost_priv(host);
+	int rval;
+	struct qla_dport_diag_v2 *dd;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+	uint16_t options;
+
+	if (!IS_DPORT_CAPABLE(vha->hw))
+		return -EPERM;
+
+	dd = kzalloc(sizeof(*dd), GFP_KERNEL);
+	if (!dd)
+		return -ENOMEM;
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			bsg_job->request_payload.sg_cnt, dd, sizeof(*dd));
+
+	options  = dd->options;
+
+	/*  Check dport Test in progress */
+	if (options == QLA_GET_DPORT_RESULT_V2 &&
+	    vha->dport_status & DPORT_DIAG_IN_PROGRESS) {
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] =
+					EXT_STATUS_DPORT_DIAG_IN_PROCESS;
+		goto dportcomplete;
+	}
+
+	/*  Check chip reset in progress and start/restart requests arrive */
+	if (vha->dport_status & DPORT_DIAG_CHIP_RESET_IN_PROGRESS &&
+	    (options == QLA_START_DPORT_TEST_V2 ||
+	     options == QLA_RESTART_DPORT_TEST_V2)) {
+		vha->dport_status &= ~DPORT_DIAG_CHIP_RESET_IN_PROGRESS;
+	}
+
+	/*  Check chip reset in progress and get result request arrive */
+	if (vha->dport_status & DPORT_DIAG_CHIP_RESET_IN_PROGRESS &&
+	    options == QLA_GET_DPORT_RESULT_V2) {
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] =
+					EXT_STATUS_DPORT_DIAG_NOT_RUNNING;
+		goto dportcomplete;
+	}
+
+	rval = qla26xx_dport_diagnostics_v2(vha, dd, mcp);
+
+	if (rval == QLA_SUCCESS) {
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] =
+					EXT_STATUS_OK;
+		if (options == QLA_START_DPORT_TEST_V2 ||
+		    options == QLA_RESTART_DPORT_TEST_V2) {
+			dd->mbx1 = mcp->mb[0];
+			dd->mbx2 = mcp->mb[1];
+			vha->dport_status |=  DPORT_DIAG_IN_PROGRESS;
+		} else if (options == QLA_GET_DPORT_RESULT_V2) {
+			dd->mbx1 = le16_to_cpu(vha->dport_data[1]);
+			dd->mbx2 = le16_to_cpu(vha->dport_data[2]);
+		}
+	} else {
+		dd->mbx1 = mcp->mb[0];
+		dd->mbx2 = mcp->mb[1];
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] =
+				EXT_STATUS_DPORT_DIAG_ERR;
+	}
+
+dportcomplete:
+	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+			    bsg_job->reply_payload.sg_cnt, dd, sizeof(*dd));
+
+	bsg_reply->reply_payload_rcv_len = sizeof(*dd);
+	bsg_job->reply_len = sizeof(*bsg_reply);
+	bsg_reply->result = DID_OK << 16;
+	bsg_job_done(bsg_job, bsg_reply->result,
+		     bsg_reply->reply_payload_rcv_len);
+
+	kfree(dd);
+
+	return 0;
+}
+
 static int
-qla2x00_get_flash_image_status(struct bsg_job *bsg_job)
-#endif
+qla2x00_get_flash_image_status(BSG_JOB_TYPE *bsg_job)
 {
 	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -2578,19 +2522,23 @@
 	qla27xx_get_active_image(vha, &active_regions);
 	regions.global_image = active_regions.global;
 
+	if (IS_QLA27XX(ha))
+		regions.nvme_params = QLA27XX_PRIMARY_IMAGE;
+
 	if (IS_QLA28XX(ha)) {
 		qla28xx_get_aux_images(vha, &active_regions);
 		regions.board_config = active_regions.aux.board_config;
 		regions.vpd_nvram = active_regions.aux.vpd_nvram;
 		regions.npiv_config_0_1 = active_regions.aux.npiv_config_0_1;
 		regions.npiv_config_2_3 = active_regions.aux.npiv_config_2_3;
+		regions.nvme_params = active_regions.aux.nvme_params;
 	}
 
 	ql_dbg(ql_dbg_user, vha, 0x70e1,
-	    "%s(%lu): FW=%u BCFG=%u VPDNVR=%u NPIV01=%u NPIV02=%u\n",
+	    "%s(%lu): FW=%u BCFG=%u VPDNVR=%u NPIV01=%u NPIV02=%u NVME_PARAMS=%u\n",
 	    __func__, vha->host_no, regions.global_image,
 	    regions.board_config, regions.vpd_nvram,
-	    regions.npiv_config_0_1, regions.npiv_config_2_3);
+	    regions.npiv_config_0_1, regions.npiv_config_2_3, regions.nvme_params);
 
 	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
 	    bsg_job->reply_payload.sg_cnt, &regions, sizeof(regions));
@@ -2605,16 +2553,336 @@
 	return 0;
 }
 
-#ifndef NEW_LIBFC_API
 static int
-qla2x00_process_vendor_specific(struct fc_bsg_job *bsg_job)
-#else
+qla2x00_manage_host_stats(BSG_JOB_TYPE *bsg_job)
+{
+	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct ql_vnd_mng_host_stats_param *req_data;
+	struct ql_vnd_mng_host_stats_resp rsp_data;
+	u32 req_data_len;
+	int ret = 0;
+
+	if (!vha->flags.online) {
+		ql_log(ql_log_warn, vha, 0x0000, "Host is not online.\n");
+		return -EIO;
+	}
+
+	req_data_len = bsg_job->request_payload.payload_len;
+
+	if (req_data_len != sizeof(struct ql_vnd_mng_host_stats_param)) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data_len invalid.\n");
+		return -EIO;
+	}
+
+	req_data = kzalloc(sizeof(*req_data), GFP_KERNEL);
+	if (!req_data) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data memory allocation failure.\n");
+		return -ENOMEM;
+	}
+
+	/* Copy the request buffer in req_data */
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			  bsg_job->request_payload.sg_cnt, req_data,
+			  req_data_len);
+
+	switch (req_data->action) {
+	case QLA_STOP:
+		ret = qla2xxx_stop_stats(vha->host, req_data->stat_type);
+		break;
+	case QLA_START:
+		ret = qla2xxx_start_stats(vha->host, req_data->stat_type);
+		break;
+	case QLA_CLEAR:
+		ret = qla2xxx_reset_stats(vha->host, req_data->stat_type);
+		break;
+	default:
+		ql_log(ql_log_warn, vha, 0x0000, "Invalid action.\n");
+		ret = -EIO;
+		break;
+	}
+
+	kfree(req_data);
+
+	/* Prepare response */
+	rsp_data.status = ret;
+	bsg_job->reply_payload.payload_len = sizeof(struct ql_vnd_mng_host_stats_resp);
+
+	bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_OK;
+	bsg_reply->reply_payload_rcv_len =
+		sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+				    bsg_job->reply_payload.sg_cnt,
+				    &rsp_data,
+				    sizeof(struct ql_vnd_mng_host_stats_resp));
+
+	bsg_reply->result = DID_OK;
+	bsg_job_done(bsg_job, bsg_reply->result,
+		     bsg_reply->reply_payload_rcv_len);
+
+	return ret;
+}
+
 static int
-qla2x00_process_vendor_specific(struct bsg_job *bsg_job)
-#endif
+qla2x00_get_host_stats(BSG_JOB_TYPE *bsg_job)
+{
+	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct ql_vnd_stats_param *req_data;
+	struct ql_vnd_host_stats_resp rsp_data;
+	u32 req_data_len;
+	int ret = 0;
+	u64 ini_entry_count = 0;
+	u64 entry_count = 0;
+	u64 tgt_num = 0;
+	u64 tmp_stat_type = 0;
+	u64 response_len = 0;
+	void *data;
+
+	req_data_len = bsg_job->request_payload.payload_len;
+
+	if (req_data_len != sizeof(struct ql_vnd_stats_param)) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data_len invalid.\n");
+		return -EIO;
+	}
+
+	req_data = kzalloc(sizeof(*req_data), GFP_KERNEL);
+	if (!req_data) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data memory allocation failure.\n");
+		return -ENOMEM;
+	}
+
+	/* Copy the request buffer in req_data */
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			  bsg_job->request_payload.sg_cnt, req_data, req_data_len);
+
+	/* Copy stat type to work on it */
+	tmp_stat_type = req_data->stat_type;
+
+	if (tmp_stat_type & QLA2XX_TGT_SHT_LNK_DOWN) {
+		/* Num of tgts connected to this host */
+		tgt_num = qla2x00_get_num_tgts(vha);
+		/* unset BIT_17 */
+		tmp_stat_type &= ~(1 << 17);
+	}
+
+	/* Total ini stats */
+	ini_entry_count = qla2x00_count_set_bits(tmp_stat_type);
+
+	/* Total number of entries */
+	entry_count = ini_entry_count + tgt_num;
+
+	response_len = sizeof(struct ql_vnd_host_stats_resp) +
+		(sizeof(struct ql_vnd_stat_entry) * entry_count);
+
+	if (response_len > bsg_job->reply_payload.payload_len) {
+		rsp_data.status = EXT_STATUS_BUFFER_TOO_SMALL;
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_BUFFER_TOO_SMALL;
+		bsg_job->reply_payload.payload_len = sizeof(struct ql_vnd_mng_host_stats_resp);
+
+		bsg_reply->reply_payload_rcv_len =
+			sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+					    bsg_job->reply_payload.sg_cnt, &rsp_data,
+					    sizeof(struct ql_vnd_mng_host_stats_resp));
+
+		bsg_reply->result = DID_OK;
+		bsg_job_done(bsg_job, bsg_reply->result,
+			     bsg_reply->reply_payload_rcv_len);
+		goto host_stat_out;
+	}
+
+	data = kzalloc(response_len, GFP_KERNEL);
+	if (!data) {
+		ret = -ENOMEM;
+		goto host_stat_out;
+	}
+
+	ret = qla2xxx_get_ini_stats(fc_bsg_to_shost(bsg_job), req_data->stat_type,
+				    data, response_len);
+
+	rsp_data.status = EXT_STATUS_OK;
+	bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_OK;
+
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       data, response_len);
+	bsg_reply->result = DID_OK;
+	bsg_job_done(bsg_job, bsg_reply->result,
+		     bsg_reply->reply_payload_rcv_len);
+
+	kfree(data);
+host_stat_out:
+	kfree(req_data);
+	return ret;
+}
+
+static struct fc_rport *
+qla2xxx_find_rport(scsi_qla_host_t *vha, uint32_t tgt_num)
+{
+	fc_port_t *fcport = NULL;
+
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (fcport->rport->number == tgt_num)
+			return fcport->rport;
+	}
+	return NULL;
+}
+
+static int
+qla2x00_get_tgt_stats(BSG_JOB_TYPE *bsg_job)
+{
+	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct ql_vnd_tgt_stats_param *req_data;
+	u32 req_data_len;
+	int ret = 0;
+	u64 response_len = 0;
+	struct ql_vnd_tgt_stats_resp *data = NULL;
+	struct fc_rport *rport = NULL;
+
+	if (!vha->flags.online) {
+		ql_log(ql_log_warn, vha, 0x0000, "Host is not online.\n");
+		return -EIO;
+	}
+
+	req_data_len = bsg_job->request_payload.payload_len;
+
+	if (req_data_len != sizeof(struct ql_vnd_stat_entry)) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data_len invalid.\n");
+		return -EIO;
+	}
+
+	req_data = kzalloc(sizeof(*req_data), GFP_KERNEL);
+	if (!req_data) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data memory allocation failure.\n");
+		return -ENOMEM;
+	}
+
+	/* Copy the request buffer in req_data */
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			  bsg_job->request_payload.sg_cnt,
+			  req_data, req_data_len);
+
+	response_len = sizeof(struct ql_vnd_tgt_stats_resp) +
+		sizeof(struct ql_vnd_stat_entry);
+
+	/* structure + size for one entry */
+	data = kzalloc(response_len, GFP_KERNEL);
+	if (!data) {
+		kfree(req_data);
+		return -ENOMEM;
+	}
+
+	if (response_len > bsg_job->reply_payload.payload_len) {
+		data->status = EXT_STATUS_BUFFER_TOO_SMALL;
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_BUFFER_TOO_SMALL;
+		bsg_job->reply_payload.payload_len = sizeof(struct ql_vnd_mng_host_stats_resp);
+
+		bsg_reply->reply_payload_rcv_len =
+			sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+					    bsg_job->reply_payload.sg_cnt, data,
+					    sizeof(struct ql_vnd_tgt_stats_resp));
+
+		bsg_reply->result = DID_OK;
+		bsg_job_done(bsg_job, bsg_reply->result,
+			     bsg_reply->reply_payload_rcv_len);
+		goto tgt_stat_out;
+	}
+
+	rport = qla2xxx_find_rport(vha, req_data->tgt_id);
+	if (!rport) {
+		ql_log(ql_log_warn, vha, 0x0000, "target %d not found.\n", req_data->tgt_id);
+		ret = EXT_STATUS_INVALID_PARAM;
+		data->status = EXT_STATUS_INVALID_PARAM;
+		goto reply;
+	}
+
+	ret = qla2xxx_get_tgt_stats(fc_bsg_to_shost(bsg_job), req_data->stat_type,
+				    rport, (void *)data, response_len);
+
+	bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_OK;
+reply:
+	bsg_reply->reply_payload_rcv_len =
+		sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+				    bsg_job->reply_payload.sg_cnt, data,
+				    response_len);
+	bsg_reply->result = DID_OK;
+	bsg_job_done(bsg_job, bsg_reply->result,
+		     bsg_reply->reply_payload_rcv_len);
+
+tgt_stat_out:
+	kfree(data);
+	kfree(req_data);
+
+	return ret;
+}
+
+static int
+qla2x00_manage_host_port(BSG_JOB_TYPE *bsg_job)
+{
+	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct ql_vnd_mng_host_port_param *req_data;
+	struct ql_vnd_mng_host_port_resp rsp_data;
+	u32 req_data_len;
+	int ret = 0;
+
+	req_data_len = bsg_job->request_payload.payload_len;
+
+	if (req_data_len != sizeof(struct ql_vnd_mng_host_port_param)) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data_len invalid.\n");
+		return -EIO;
+	}
+
+	req_data = kzalloc(sizeof(*req_data), GFP_KERNEL);
+	if (!req_data) {
+		ql_log(ql_log_warn, vha, 0x0000, "req_data memory allocation failure.\n");
+		return -ENOMEM;
+	}
+
+	/* Copy the request buffer in req_data */
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			  bsg_job->request_payload.sg_cnt, req_data, req_data_len);
+
+	switch (req_data->action) {
+	case QLA_ENABLE:
+		ret = qla2xxx_enable_port(vha->host);
+		break;
+	case QLA_DISABLE:
+		ret = qla2xxx_disable_port(vha->host);
+		break;
+	default:
+		ql_log(ql_log_warn, vha, 0x0000, "Invalid action.\n");
+		ret = -EIO;
+		break;
+	}
+
+	kfree(req_data);
+
+	/* Prepare response */
+	rsp_data.status = ret;
+	bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_OK;
+	bsg_job->reply_payload.payload_len = sizeof(struct ql_vnd_mng_host_port_resp);
+
+	bsg_reply->reply_payload_rcv_len =
+		sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+				    bsg_job->reply_payload.sg_cnt, &rsp_data,
+				    sizeof(struct ql_vnd_mng_host_port_resp));
+	bsg_reply->result = DID_OK;
+	bsg_job_done(bsg_job, bsg_reply->result,
+		     bsg_reply->reply_payload_rcv_len);
+
+	return ret;
+}
+
+static int
+qla2x00_process_vendor_specific(struct scsi_qla_host *vha,
+				BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 
+	ql_dbg(ql_dbg_edif, vha, 0x911b, "%s FC_BSG_HST_VENDOR cmd[0]=0x%x\n",
+	    __func__, bsg_request->rqst_data.h_vendor.vendor_cmd[0]);
+
 	switch (bsg_request->rqst_data.h_vendor.vendor_cmd[0]) {
 	case QL_VND_LOOPBACK:
 		return qla2x00_process_loopback(bsg_job);
@@ -2683,21 +2951,37 @@
 	case QL_VND_DPORT_DIAGNOSTICS:
 		return qla2x00_do_dport_diagnostics(bsg_job);
 
+	case QL_VND_DPORT_DIAGNOSTICS_V2:
+		return qla2x00_do_dport_diagnostics_v2(bsg_job);
+
+	case QL_VND_EDIF_MGMT:
+		return qla_edif_app_mgmt(bsg_job);
+
 	case QL_VND_SS_GET_FLASH_IMAGE_STATUS:
 		return qla2x00_get_flash_image_status(bsg_job);
 
+	case QL_VND_MANAGE_HOST_STATS:
+		return qla2x00_manage_host_stats(bsg_job);
+
+	case QL_VND_GET_HOST_STATS:
+		return qla2x00_get_host_stats(bsg_job);
+
+	case QL_VND_GET_TGT_STATS:
+		return qla2x00_get_tgt_stats(bsg_job);
+
+	case QL_VND_MANAGE_HOST_PORT:
+		return qla2x00_manage_host_port(bsg_job);
+
+	case QL_VND_MBX_PASSTHRU:
+		return qla2x00_mailbox_passthru(bsg_job);
+
 	default:
 		return -ENOSYS;
 	}
 }
 
-#ifndef NEW_LIBFC_API
 int
-qla24xx_bsg_request(struct fc_bsg_job *bsg_job)
-#else
-int
-qla24xx_bsg_request(struct bsg_job *bsg_job)
-#endif
+qla24xx_bsg_request(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -2718,15 +3002,34 @@
 		vha = shost_priv(host);
 	}
 
+	/* Disable port will bring down the chip, allow enable command */
+	if (bsg_request->rqst_data.h_vendor.vendor_cmd[0] == QL_VND_MANAGE_HOST_PORT ||
+	    bsg_request->rqst_data.h_vendor.vendor_cmd[0] == QL_VND_GET_HOST_STATS)
+		goto skip_chip_chk;
+
+	if (vha->hw->flags.port_isolated) {
+		bsg_reply->result = DID_ERROR;
+		/* operation not permitted */
+		return -EPERM;
+	}
+
 	if (qla2x00_chip_is_down(vha)) {
 		ql_dbg(ql_dbg_user, vha, 0x709f,
 		    "BSG: ISP abort active/needed -- cmd=%d.\n",
 		    bsg_request->msgcode);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
 		return -EBUSY;
 	}
 
-	ql_dbg(ql_dbg_user, vha, 0x7000,
-	    "Entered %s msgcode=0x%x.\n", __func__, bsg_request->msgcode);
+	if (test_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags)) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		return -EIO;
+	}
+
+skip_chip_chk:
+	ql_dbg(ql_dbg_user + ql_dbg_verbose, vha, 0x7000,
+	    "Entered %s msgcode=0x%x. bsg ptr %px\n",
+	    __func__, bsg_request->msgcode, bsg_job);
 
 	switch (bsg_request->msgcode) {
 	case FC_BSG_RPT_ELS:
@@ -2737,7 +3040,7 @@
 		ret = qla2x00_process_ct(bsg_job);
 		break;
 	case FC_BSG_HST_VENDOR:
-		ret = qla2x00_process_vendor_specific(bsg_job);
+		ret = qla2x00_process_vendor_specific(vha, bsg_job);
 		break;
 	case FC_BSG_HST_ADD_RPORT:
 	case FC_BSG_HST_DEL_RPORT:
@@ -2746,16 +3049,15 @@
 		ql_log(ql_log_warn, vha, 0x705a, "Unsupported BSG request.\n");
 		break;
 	}
+
+	ql_dbg(ql_dbg_user + ql_dbg_verbose, vha, 0x7000,
+	    "%s done with return %x\n", __func__, ret);
+
 	return ret;
 }
 
-#ifndef NEW_LIBFC_API
 int
-qla24xx_bsg_timeout(struct fc_bsg_job *bsg_job)
-#else
-int
-qla24xx_bsg_timeout(struct bsg_job *bsg_job)
-#endif
+qla24xx_bsg_timeout(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
@@ -2765,6 +3067,15 @@
 	unsigned long flags;
 	struct req_que *req;
 
+	ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
+	    __func__, bsg_job);
+
+	if (qla2x00_isp_reg_stat(ha)) {
+		ql_log(ql_log_info, vha, 0x9007,
+		    "PCI/Register disconnect.\n");
+		qla_pci_set_eeh_busy(vha);
+	}
+
 	/* find the bsg job from the active list of commands */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (que = 0; que < ha->max_req_queues; que++) {
@@ -2774,27 +3085,27 @@
 
 		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 			sp = req->outstanding_cmds[cnt];
-			if (sp) {
-				if (((sp->type == SRB_CT_CMD) ||
-					(sp->type == SRB_ELS_CMD_HST) ||
-					(sp->type == SRB_FXIOCB_BCMD))
-					&& (sp->u.bsg_job == bsg_job)) {
-					req->outstanding_cmds[cnt] = NULL;
-					spin_unlock_irqrestore(&ha->hardware_lock, flags);
-					if (ha->isp_ops->abort_command(sp)) {
-						ql_log(ql_log_warn, vha, 0x7089,
-						    "mbx abort_command "
-						    "failed.\n");
-						bsg_reply->result = -EIO;
-					} else {
-						ql_dbg(ql_dbg_user, vha, 0x708a,
-						    "mbx abort_command "
-						    "success.\n");
-						bsg_reply->result = 0;
-					}
-					spin_lock_irqsave(&ha->hardware_lock, flags);
-					goto done;
+			if (sp &&
+			    (sp->type == SRB_CT_CMD ||
+			     sp->type == SRB_ELS_CMD_HST ||
+			     sp->type == SRB_ELS_CMD_HST_NOLOGIN ||
+			     sp->type == SRB_FXIOCB_BCMD) &&
+			    sp->u.bsg_job == bsg_job) {
+				req->outstanding_cmds[cnt] = NULL;
+				spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+				if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
+					ql_log(ql_log_warn, vha, 0x7089,
+					    "mbx abort_command failed.\n");
+					bsg_reply->result = -EIO;
+				} else {
+					ql_dbg(ql_dbg_user, vha, 0x708a,
+					    "mbx abort_command success.\n");
+					bsg_reply->result = 0;
 				}
+				spin_lock_irqsave(&ha->hardware_lock, flags);
+				goto done;
+
 			}
 		}
 	}
@@ -2805,6 +3116,52 @@
 
 done:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	return 0;
 }
+
+int qla2x00_mailbox_passthru(BSG_JOB_TYPE *bsg_job)
+{
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
+	int ret = -EINVAL;
+	int ptsize = sizeof(struct qla_mbx_passthru);
+	struct qla_mbx_passthru *req_data = NULL;
+	uint32_t req_data_len;
+
+	req_data_len = bsg_job->request_payload.payload_len;
+	if (req_data_len != ptsize) {
+		ql_log(ql_log_warn, vha, 0xf0a3, "req_data_len invalid.\n");
+		return -EIO;
+	}
+	req_data = kzalloc(ptsize, GFP_KERNEL);
+	if (!req_data) {
+		ql_log(ql_log_warn, vha, 0xf0a4,
+		       "req_data memory allocation failure.\n");
+		return -ENOMEM;
+	}
+
+	/* Copy the request buffer in req_data */
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			  bsg_job->request_payload.sg_cnt, req_data, ptsize);
+	ret = qla_mailbox_passthru(vha, req_data->mbx_in, req_data->mbx_out);
+
+	/* Copy the req_data in  request buffer */
+	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+			    bsg_job->reply_payload.sg_cnt, req_data, ptsize);
+
+	bsg_reply->reply_payload_rcv_len = ptsize;
+	if (ret == QLA_SUCCESS)
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_OK;
+	else
+		bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = EXT_STATUS_ERR;
+
+	bsg_job->reply_len = sizeof(*bsg_job->reply);
+	bsg_reply->result = DID_OK << 16;
+	bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
+
+	kfree(req_data);
+
+	return ret;
+}

diff --git a/scst/qla2x00t-32gbit/qla_bsg.h b/scst/qla2x00t-32gbit/qla_bsg.h
index 7594fad..d38dab0 100644
--- a/scst/qla2x00t-32gbit/qla_bsg.h
+++ b/scst/qla2x00t-32gbit/qla_bsg.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_BSG_H
 #define __QLA_BSG_H
@@ -32,6 +31,13 @@
 #define QL_VND_DPORT_DIAGNOSTICS	0x19
 #define QL_VND_GET_PRIV_STATS_EX	0x1A
 #define QL_VND_SS_GET_FLASH_IMAGE_STATUS	0x1E
+#define QL_VND_EDIF_MGMT                0X1F
+#define QL_VND_MANAGE_HOST_STATS	0x23
+#define QL_VND_GET_HOST_STATS		0x24
+#define QL_VND_GET_TGT_STATS		0x25
+#define QL_VND_MANAGE_HOST_PORT		0x26
+#define QL_VND_MBX_PASSTHRU		0x2B
+#define QL_VND_DPORT_DIAGNOSTICS_V2	0x2C
 
 /* BSG Vendor specific subcode returns */
 #define EXT_STATUS_OK			0
@@ -41,6 +47,7 @@
 #define EXT_STATUS_DATA_OVERRUN		7
 #define EXT_STATUS_DATA_UNDERRUN	8
 #define EXT_STATUS_MAILBOX		11
+#define EXT_STATUS_BUFFER_TOO_SMALL	16
 #define EXT_STATUS_NO_MEMORY		17
 #define EXT_STATUS_DEVICE_OFFLINE	22
 
@@ -54,6 +61,9 @@
 #define EXT_STATUS_TIMEOUT		30
 #define EXT_STATUS_THREAD_FAILED	31
 #define EXT_STATUS_DATA_CMP_FAILED	32
+#define EXT_STATUS_DPORT_DIAG_ERR	40
+#define EXT_STATUS_DPORT_DIAG_IN_PROCESS	41
+#define EXT_STATUS_DPORT_DIAG_NOT_RUNNING	42
 
 /* BSG definations for interpreting CommandSent field */
 #define INT_DEF_LB_LOOPBACK_CMD         0
@@ -151,7 +161,7 @@
 	uint16_t rsrvd;
 	struct qla84_mgmt_param mgmtp;/* parameters for cmd */
 	uint32_t len; /* bytes in payload following this struct */
-	uint8_t payload[0]; /* payload for cmd */
+	uint8_t payload[]; /* payload for cmd */
 };
 
 struct qla_bsg_a84_mgmt {
@@ -182,6 +192,12 @@
 	uint16_t speed;
 } __attribute__ ((packed));
 
+struct qla_mbx_passthru {
+	uint16_t reserved1[2];
+	uint16_t mbx_in[32];
+	uint16_t mbx_out[32];
+	uint32_t reserved2[16];
+} __packed;
 
 /* FRU VPD */
 
@@ -204,7 +220,7 @@
 
 struct qla_image_version_list {
 	uint32_t count;
-	struct qla_image_version version[0];
+	struct qla_image_version version[];
 } __packed;
 
 struct qla_status_reg {
@@ -276,6 +292,17 @@
 	uint8_t  unused[62];
 } __packed;
 
+#define QLA_GET_DPORT_RESULT_V2		0  /* Get Result */
+#define QLA_RESTART_DPORT_TEST_V2	1  /* Restart test */
+#define QLA_START_DPORT_TEST_V2		2  /* Start test */
+struct qla_dport_diag_v2 {
+	uint16_t options;
+	uint16_t mbx1;
+	uint16_t mbx2;
+	uint8_t  unused[58];
+	uint8_t buf[1024]; /* Test Result */
+} __packed;
+
 /* D_Port options */
 #define QLA_DPORT_RESULT	0x0
 #define QLA_DPORT_START		0x2
@@ -287,7 +314,10 @@
 	uint8_t vpd_nvram;
 	uint8_t npiv_config_0_1;
 	uint8_t npiv_config_2_3;
-	uint8_t reserved[32];
+	uint8_t nvme_params;
+	uint8_t reserved[31];
 } __packed;
 
+#include "qla_edif_bsg.h"
+
 #endif

diff --git a/scst/qla2x00t-32gbit/qla_dbg.c b/scst/qla2x00t-32gbit/qla_dbg.c
index 1be811a..d9f9dda 100644
--- a/scst/qla2x00t-32gbit/qla_dbg.c
+++ b/scst/qla2x00t-32gbit/qla_dbg.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 /*
@@ -13,10 +12,9 @@
  * ----------------------------------------------------------------------
  * | Module Init and Probe        |       0x0199       |                |
  * | Mailbox commands             |       0x1206       | 0x11a5-0x11ff	|
- * | Device Discovery             |       0x2134       | 0x210e-0x2116  |
- * |				  | 		       | 0x211a         |
+ * | Device Discovery             |       0x2134       | 0x210e-0x2115  |
  * |                              |                    | 0x211c-0x2128  |
- * |                              |                    | 0x212a-0x2134  |
+ * |                              |                    | 0x212c-0x2134  |
  * | Queue Command and IO tracing |       0x3074       | 0x300b         |
  * |                              |                    | 0x3027-0x3028  |
  * |                              |                    | 0x303d-0x3041  |
@@ -114,8 +112,13 @@
 	uint32_t stat;
 	ulong i, j, timer = 6000000;
 	int rval = QLA_FUNCTION_FAILED;
+	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
 	clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags);
+
+	if (qla_pci_disconnected(vha, reg))
+		return rval;
+
 	for (i = 0; i < ram_dwords; i += dwords, addr += dwords) {
 		if (i + dwords > ram_dwords)
 			dwords = ram_dwords - i;
@@ -139,6 +142,9 @@
 		while (timer--) {
 			udelay(5);
 
+			if (qla_pci_disconnected(vha, reg))
+				return rval;
+
 			stat = rd_reg_dword(&reg->host_status);
 			/* Check for pending interrupts. */
 			if (!(stat & HSRX_RISC_INT))
@@ -193,9 +199,13 @@
 	uint32_t dwords = qla2x00_gid_list_size(ha) / 4;
 	uint32_t stat;
 	ulong i, j, timer = 6000000;
+	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
 	clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags);
 
+	if (qla_pci_disconnected(vha, reg))
+		return rval;
+
 	for (i = 0; i < ram_dwords; i += dwords, addr += dwords) {
 		if (i + dwords > ram_dwords)
 			dwords = ram_dwords - i;
@@ -203,6 +213,7 @@
 		wrt_reg_word(&reg->mailbox0, MBC_DUMP_RISC_RAM_EXTENDED);
 		wrt_reg_word(&reg->mailbox1, LSW(addr));
 		wrt_reg_word(&reg->mailbox8, MSW(addr));
+		wrt_reg_word(&reg->mailbox10, 0);
 
 		wrt_reg_word(&reg->mailbox2, MSW(LSD(dump_dma)));
 		wrt_reg_word(&reg->mailbox3, LSW(LSD(dump_dma)));
@@ -216,8 +227,10 @@
 		ha->flags.mbox_int = 0;
 		while (timer--) {
 			udelay(5);
-			stat = rd_reg_dword(&reg->host_status);
+			if (qla_pci_disconnected(vha, reg))
+				return rval;
 
+			stat = rd_reg_dword(&reg->host_status);
 			/* Check for pending interrupts. */
 			if (!(stat & HSRX_RISC_INT))
 				continue;
@@ -2442,15 +2455,18 @@
 /****************************************************************************/
 
 /* Write the debug message prefix into @pbuf. */
-static void ql_dbg_prefix(char *pbuf, int pbuf_size,
+static void ql_dbg_prefix(char *pbuf, int pbuf_size, struct pci_dev *pdev,
 			  const scsi_qla_host_t *vha, uint msg_id)
 {
 	if (vha) {
 		const struct pci_dev *pdev = vha->hw->pdev;
 
 		/* <module-name> [<dev-name>]-<msg-id>:<host>: */
-		snprintf(pbuf, pbuf_size, "%s [%s]-%04x:%ld: ", QL_MSGHDR,
+		snprintf(pbuf, pbuf_size, "%s [%s]-%04x:%lu: ", QL_MSGHDR,
 			 dev_name(&(pdev->dev)), msg_id, vha->host_no);
+	} else if (pdev) {
+		snprintf(pbuf, pbuf_size, "%s [%s]-%04x: : ", QL_MSGHDR,
+			 dev_name(&pdev->dev), msg_id);
 	} else {
 		/* <module-name> [<dev-name>]-<msg-id>: : */
 		snprintf(pbuf, pbuf_size, "%s [%s]-%04x: : ", QL_MSGHDR,
@@ -2478,18 +2494,36 @@
 	struct va_format vaf;
 	char pbuf[64];
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	ql_ktrace(1, level, pbuf, NULL, vha, id, fmt);
+
+	if (!ql_mask_match(level))
+		return;
+
+	if (!pbuf[0]) /* set by ql_ktrace */
+		ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, vha, id);
+#else
+	if (!ql_mask_match(level) && !trace_ql_dbg_log_enabled())
+		return;
+#endif
+
 	va_start(va, fmt);
 
 	vaf.fmt = fmt;
 	vaf.va = &va;
 
-	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), vha, id);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	pr_warn("%s%pV", pbuf, &vaf);
+#else
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, vha, id);
 
 	if (!ql_mask_match(level))
 		trace_ql_dbg_log(pbuf, &vaf);
 	else
 		pr_warn("%s%pV", pbuf, &vaf);
 
+#endif
+
 	va_end(va);
 
 }
@@ -2517,6 +2551,11 @@
 
 	if (pdev == NULL)
 		return;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	ql_ktrace(1, level, pbuf, pdev, NULL, id, fmt);
+#endif
+
 	if (!ql_mask_match(level))
 		return;
 
@@ -2525,7 +2564,14 @@
 	vaf.fmt = fmt;
 	vaf.va = &va;
 
-	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, id + ql_dbg_offset);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	if (!pbuf[0]) /* set by ql_ktrace */
+		ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), pdev, NULL,
+			      id + ql_dbg_offset);
+#else
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), pdev, NULL,
+		      id + ql_dbg_offset);
+#endif
 	pr_warn("%s%pV", pbuf, &vaf);
 
 	va_end(va);
@@ -2554,7 +2600,14 @@
 	if (level > ql_errlev)
 		return;
 
-	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), vha, id);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	ql_ktrace(0, level, pbuf, NULL, vha, id, fmt);
+
+	if (!pbuf[0]) /* set by ql_ktrace */
+		ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, vha, id);
+#else
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, vha, id);
+#endif
 
 	va_start(va, fmt);
 
@@ -2605,7 +2658,14 @@
 	if (level > ql_errlev)
 		return;
 
-	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, id);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	ql_ktrace(0, level, pbuf, pdev, NULL, id, fmt);
+
+	if (!pbuf[0]) /* set by ql_ktrace */
+		ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), pdev, NULL, id);
+#else
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), pdev, NULL, id);
+#endif
 
 	va_start(va, fmt);
 
@@ -2700,7 +2760,16 @@
 	if (level > ql_errlev)
 		return;
 
-	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), qpair ? qpair->vha : NULL, id);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	ql_ktrace(0, level, pbuf, NULL, qpair ? qpair->vha : NULL, id, fmt);
+
+	if (!pbuf[0]) /* set by ql_ktrace */
+		ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL,
+			      qpair ? qpair->vha : NULL, id);
+#else
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, qpair ? qpair->vha : NULL, id);
+
+#endif
 
 	va_start(va, fmt);
 
@@ -2746,6 +2815,10 @@
 	struct va_format vaf;
 	char pbuf[128];
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	ql_ktrace(1, level, pbuf, NULL, qpair ? qpair->vha : NULL, id, fmt);
+#endif
+
 	if (!ql_mask_match(level))
 		return;
 
@@ -2754,8 +2827,15 @@
 	vaf.fmt = fmt;
 	vaf.va = &va;
 
-	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), qpair ? qpair->vha : NULL,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	if (!pbuf[0]) /* set by ql_ktrace */
+		ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL,
+			      qpair ? qpair->vha : NULL, id + ql_dbg_offset);
+#else
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), NULL, qpair ? qpair->vha : NULL,
 		      id + ql_dbg_offset);
+#endif
+
 	pr_warn("%s%pV", pbuf, &vaf);
 
 	va_end(va);

diff --git a/scst/qla2x00t-32gbit/qla_dbg.h b/scst/qla2x00t-32gbit/qla_dbg.h
index e1d7de6..4ea3463 100644
--- a/scst/qla2x00t-32gbit/qla_dbg.h
+++ b/scst/qla2x00t-32gbit/qla_dbg.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #include "qla_def.h"
@@ -309,7 +308,7 @@
 };
 
 #define QL_MSGHDR "qla2xxx"
-#define QL_DBG_DEFAULT1_MASK    0x1e400000
+#define QL_DBG_DEFAULT1_MASK    0x1e600000
 
 #define ql_log_fatal		0 /* display fatal errors */
 #define ql_log_warn		1 /* display critical errors */
@@ -368,6 +367,7 @@
 #define ql_dbg_tgt_mgt	0x00002000 /* Target mode management */
 #define ql_dbg_tgt_tmr	0x00001000 /* Target mode task management */
 #define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
+#define ql_dbg_edif	0x00000400 /* edif and purex debug */
 
 extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
 	uint32_t, void **);
@@ -383,5 +383,52 @@
 	if (ql2xextended_error_logging == 1)
 		ql2xextended_error_logging = QL_DBG_DEFAULT1_MASK;
 
-	return (level & ql2xextended_error_logging) == level;
+	return level && ((level & ql2xextended_error_logging) == level);
 }
+
+static inline int
+ql_mask_match_ext(uint level, int *log_tunable)
+{
+	if (*log_tunable == 1)
+		*log_tunable = QL_DBG_DEFAULT1_MASK;
+
+	return (level & *log_tunable) == level;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+
+/* Assumes local variable pbuf and pbuf_ready present. */
+#define ql_ktrace(dbg_msg, level, pbuf, pdev, vha, id, fmt) do {	\
+	struct va_format _vaf;						\
+	va_list _va;							\
+	u32 dbg_off = dbg_msg ? ql_dbg_offset : 0;			\
+									\
+	pbuf[0] = 0;							\
+	if (!trace_ql_dbg_log_enabled())				\
+		break;							\
+									\
+	if (dbg_msg && !ql_mask_match_ext(level,			\
+				&ql2xextended_error_logging_ktrace))	\
+		break;							\
+									\
+	ql_dbg_prefix(pbuf, ARRAY_SIZE(pbuf), pdev, vha, id + dbg_off);	\
+									\
+	va_start(_va, fmt);						\
+	_vaf.fmt = fmt;							\
+	_vaf.va = &_va;							\
+									\
+	trace_ql_dbg_log(pbuf, &_vaf);					\
+									\
+	va_end(_va);							\
+} while (0)
+
+#define QLA_ENABLE_KERNEL_TRACING
+
+#ifdef QLA_ENABLE_KERNEL_TRACING
+#define QLA_TRACE_ENABLE(_tr) \
+	trace_array_set_clr_event(_tr, "qla", NULL, true)
+#else /* QLA_ENABLE_KERNEL_TRACING */
+#define QLA_TRACE_ENABLE(_tr)
+#endif /* QLA_ENABLE_KERNEL_TRACING */
+
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) */

diff --git a/scst/qla2x00t-32gbit/qla_def.h b/scst/qla2x00t-32gbit/qla_def.h
index 87709f0..78adee8 100644
--- a/scst/qla2x00t-32gbit/qla_def.h
+++ b/scst/qla2x00t-32gbit/qla_def.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_DEF_H
 #define __QLA_DEF_H
@@ -25,15 +24,9 @@
 #include <linux/firmware.h>
 #include <linux/aer.h>
 #include <linux/mutex.h>
-#include <linux/version.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
-/*
- * See also commit 5db53f3e80de ("[LogFS] add new flash file system") # v2.6.34.
- */
 #include <linux/btree.h>
-#else
-#include "btree-backport.h"
-#endif
+#include <linux/version.h>
+#include <linux/bsg-lib.h>	/* struct bsg_job */
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -46,18 +39,44 @@
 	defined(CONFIG_SUSE_KERNEL) && \
 	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
 #define NEW_LIBFC_API
+#define BSG_JOB_TYPE struct bsg_job
+#else
+#define BSG_JOB_TYPE struct fc_bsg_job
+
+static inline struct Scsi_Host *fc_bsg_to_shost(struct fc_bsg_job *job)
+{
+	return job->shost;
+}
+
+static inline struct fc_rport *fc_bsg_to_rport(struct fc_bsg_job *job)
+{
+	return job->rport;
+}
+
+static inline void bsg_job_done_backport(struct fc_bsg_job *job, int result,
+					 unsigned int reply_payload_rcv_len)
+{
+	job->job_done(job);
+}
+
+#define bsg_job_done bsg_job_done_backport
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) &&	\
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7)
 #error
 #error ***This version of qla2xxx does not support distributions based on***
-#error ***kernels before Linux kernel v3.15.***
+#error ***kernels before Linux kernel v3.18.***
 #error
 #endif
 
 #include <uapi/scsi/fc/fc_els.h>
 
+#define QLA_DFS_DEFINE_DENTRY(_debugfs_file_name) \
+	struct dentry *dfs_##_debugfs_file_name
+#define QLA_DFS_ROOT_DEFINE_DENTRY(_debugfs_file_name) \
+	struct dentry *qla_dfs_##_debugfs_file_name
+
 /* Big endian Fibre Channel S_ID (source ID) or D_ID (destination ID). */
 typedef struct {
 	uint8_t domain;
@@ -72,6 +91,28 @@
 	uint8_t domain;
 } le_id_t;
 
+/*
+ * 24 bit port ID type definition.
+ */
+typedef union {
+	uint32_t b24 : 24;
+	struct {
+#ifdef __BIG_ENDIAN
+		uint8_t domain;
+		uint8_t area;
+		uint8_t al_pa;
+#elif defined(__LITTLE_ENDIAN)
+		uint8_t al_pa;
+		uint8_t area;
+		uint8_t domain;
+#else
+#error "__BIG_ENDIAN or __LITTLE_ENDIAN must be defined!"
+#endif
+		uint8_t rsvd_1;
+	} b;
+} port_id_t;
+#define INVALID_PORT_ID	0xFFFFFF
+
 #include "qla_bsg.h"
 #include "qla_dsd.h"
 #include "qla_nx.h"
@@ -79,7 +120,7 @@
 #include "qla_nvme.h"
 #define QLA2XXX_DRIVER_NAME	"qla2xxx"
 #define QLA2XXX_APIDEV		"ql2xapidev"
-#define QLA2XXX_MANUFACTURER	"QLogic Corporation"
+#define QLA2XXX_MANUFACTURER	"Marvell Semiconductor, Inc."
 
 /*
  * We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
@@ -342,6 +383,13 @@
 	u32			size;
 	u8			sent;
 };
+
+struct els_reject {
+	struct fc_els_ls_rjt *c;
+	dma_addr_t  cdma;
+	u16 size;
+};
+
 /*
  * Timeout timer counts in seconds
  */
@@ -368,6 +416,8 @@
 #define FW_MAX_EXCHANGES_CNT (32 * 1024)
 #define REDUCE_EXCHANGES_CNT  (8 * 1024)
 
+#define SET_DID_STATUS(stat_var, status) (stat_var = status << 16)
+
 struct req_que;
 struct qla_tgt_sess;
 
@@ -393,32 +443,11 @@
 #define SRB_CRC_CTX_DSD_VALID		BIT_5	/* DIF: dsd_list valid */
 #define SRB_WAKEUP_ON_COMP		BIT_6
 #define SRB_DIF_BUNDL_DMA_VALID		BIT_7   /* DIF: DMA list valid */
+#define SRB_EDIF_CLEANUP_DELETE		BIT_9
 
 /* To identify if a srb is of T10-CRC type. @sp => srb_t pointer */
 #define IS_PROT_IO(sp)	(sp->flags & SRB_CRC_CTX_DSD_VALID)
-
-/*
- * 24 bit port ID type definition.
- */
-typedef union {
-	uint32_t b24 : 24;
-
-	struct {
-#ifdef __BIG_ENDIAN
-		uint8_t domain;
-		uint8_t area;
-		uint8_t al_pa;
-#elif defined(__LITTLE_ENDIAN)
-		uint8_t al_pa;
-		uint8_t area;
-		uint8_t domain;
-#else
-#error "__BIG_ENDIAN or __LITTLE_ENDIAN must be defined!"
-#endif
-		uint8_t rsvd_1;
-	} b;
-} port_id_t;
-#define INVALID_PORT_ID	0xFFFFFF
+#define ISP_REG16_DISCONNECT 0xFFFF
 
 static inline le_id_t be_id_to_le(be_id_t id)
 {
@@ -505,6 +534,7 @@
 #define SRB_LOGIN_SKIP_PRLI	BIT_2
 #define SRB_LOGIN_NVME_PRLI	BIT_3
 #define SRB_LOGIN_PRLI_ONLY	BIT_4
+#define SRB_LOGIN_FCSP		BIT_5
 			uint16_t data[2];
 			u32 iop[2];
 		} logio;
@@ -609,6 +639,10 @@
 			u16 cmd;
 			u16 vp_index;
 		} ctrlvp;
+		struct {
+			struct edif_sa_ctl	*sa_ctl;
+			struct qla_sa_update_frame sa_frame;
+		} sa_update;
 	} u;
 
 	struct timer_list timer;
@@ -639,6 +673,21 @@
 #define SRB_PRLI_CMD	21
 #define SRB_CTRL_VP	22
 #define SRB_PRLO_CMD	23
+#define SRB_SA_UPDATE	25
+#define SRB_ELS_CMD_HST_NOLOGIN 26
+#define SRB_SA_REPLACE	27
+
+struct qla_els_pt_arg {
+	u8 els_opcode;
+	u8 vp_idx;
+	__le16 nport_handle;
+	u16 control_flags, ox_id;
+	__le32 rx_xchg_address;
+	port_id_t did, sid;
+	u32 tx_len, tx_byte_count, rx_len, rx_byte_count;
+	dma_addr_t tx_addr, rx_addr;
+
+};
 
 enum {
 	TYPE_SRB,
@@ -646,6 +695,23 @@
 	TYPE_TGT_TMCMD,		/* task management */
 };
 
+struct iocb_resource {
+	u8 res_type;
+	u8 pad;
+	u16 iocb_cnt;
+};
+
+struct bsg_cmd {
+#ifndef NEW_LIBFC_API
+	struct fc_bsg_job *bsg_job;
+#else
+	struct bsg_job *bsg_job;
+#endif
+	union {
+		struct qla_els_pt_arg els_arg;
+	} u;
+};
+
 typedef struct srb {
 	/*
 	 * Do not move cmd_type field, it needs to
@@ -653,6 +719,7 @@
 	 */
 	uint8_t cmd_type;
 	uint8_t pad[3];
+	struct iocb_resource iores;
 	struct kref cmd_kref;	/* need to migrate ref_count over to this */
 	void *priv;
 	wait_queue_head_t nvme_ls_waitq;
@@ -681,7 +748,21 @@
 		struct bsg_job *bsg_job;
 #endif
 		struct srb_cmd scmd;
+		struct bsg_cmd bsg_cmd;
 	} u;
+	struct {
+		bool remapped;
+		struct {
+			dma_addr_t dma;
+			void *buf;
+			uint len;
+		} req;
+		struct {
+			dma_addr_t dma;
+			void *buf;
+			uint len;
+		} rsp;
+	} remap;
 	/*
 	 * Report completion status @res and call sp_put(@sp). @res is
 	 * an NVMe status code, a SCSI result (e.g. DID_OK << 16) or a
@@ -695,6 +776,11 @@
 	 * code.
 	 */
 	void (*put_fn)(struct kref *kref);
+
+	/*
+	 * Report completion for asynchronous commands.
+	 */
+	void (*async_done)(struct srb *sp, int res);
 } srb_t;
 
 #define GET_CMD_SP(sp) (sp->u.scmd.cmd)
@@ -1137,6 +1223,12 @@
 
 /* ISP mailbox loopback echo diagnostic error code */
 #define MBS_LB_RESET	0x17
+
+/* AEN mailbox Port Diagnostics test */
+#define AEN_START_DIAG_TEST		0x0	/* start the diagnostics */
+#define AEN_DONE_DIAG_TEST_WITH_NOERR	0x1	/* Done with no errors */
+#define AEN_DONE_DIAG_TEST_WITH_ERR	0x2	/* Done with error.*/
+
 /*
  * Firmware options 1, 2, 3.
  */
@@ -1547,7 +1639,7 @@
 	 * BIT_12 = Remote Write Optimization (1 - Enabled, 0 - Disabled)
 	 * BIT 11-0 = Reserved
 	 */
-	uint16_t flags;
+	__le16	flags;
 	uint8_t	reserved1[32];
 	uint16_t discard_OHRB_timeout_value;
 	uint16_t remote_write_opt_queue_num;
@@ -1652,7 +1744,7 @@
 	 */
 	uint8_t	 firmware_options[2];
 
-	uint16_t frame_payload_size;
+	__le16	frame_payload_size;
 	__le16	max_iocb_allocation;
 	__le16	execution_throttle;
 	uint8_t	 retry_count;
@@ -2121,6 +2213,12 @@
 #define CS_COMPLETE_CHKCOND	0x30	/* Error? */
 #define CS_IOCB_ERROR		0x31	/* Generic error for IOCB request
 					   failure */
+#define CS_REJECT_RECEIVED	0x4E	/* Reject received */
+#define CS_EDIF_AUTH_ERROR	0x63	/* decrypt error */
+#define CS_EDIF_PAD_LEN_ERROR	0x65	/* pad > frame size, not 4byte align */
+#define CS_EDIF_INV_REQ		0x66	/* invalid request */
+#define CS_EDIF_SPI_ERROR	0x67	/* rx frame unable to locate sa */
+#define CS_EDIF_HDR_ERROR	0x69	/* data frame != expected len */
 #define CS_BAD_PAYLOAD		0x80	/* Driver defined */
 #define CS_UNKNOWN		0x81	/* Driver defined */
 #define CS_RETRY		0x82	/* Driver defined */
@@ -2312,6 +2410,7 @@
 			__le16	nport_handle;
 			uint16_t reserved_2;
 			__le16	flags;
+#define NOTIFY24XX_FLAGS_FCSP		BIT_5
 #define NOTIFY24XX_FLAGS_GLOBAL_TPRLO   BIT_1
 #define NOTIFY24XX_FLAGS_PUREX_IOCB     BIT_0
 			__le16	srr_rx_id;
@@ -2395,11 +2494,9 @@
  */
 typedef enum {
 	FCT_UNKNOWN,
-	FCT_RSCN,
-	FCT_SWITCH,
-	FCT_BROADCAST,
-	FCT_INITIATOR,
-	FCT_TARGET,
+	FCT_BROADCAST = 0x01,
+	FCT_INITIATOR = 0x02,
+	FCT_TARGET    = 0x04,
 	FCT_NVME_INITIATOR = 0x10,
 	FCT_NVME_TARGET = 0x20,
 	FCT_NVME_DISCOVERY = 0x40,
@@ -2442,6 +2539,7 @@
 	DSC_LOGIN_COMPLETE,
 	DSC_ADISC,
 	DSC_DELETE_PEND,
+	DSC_LOGIN_AUTH_PEND,
 };
 
 enum login_state {	/* FW control Target side */
@@ -2469,12 +2567,6 @@
 	struct list_head list;
 	struct scsi_qla_host *vha;
 
-	uint8_t node_name[WWN_SIZE];
-	uint8_t port_name[WWN_SIZE];
-	port_id_t d_id;
-	uint16_t loop_id;
-	uint16_t old_loop_id;
-
 	unsigned int conf_compl_supported:1;
 	unsigned int deleted:2;
 	unsigned int free_pending:1;
@@ -2491,15 +2583,26 @@
 	unsigned int n2n_flag:1;
 	unsigned int explicit_logout:1;
 	unsigned int prli_pend_timer:1;
+	unsigned int do_prli_nvme:1;
+
+	uint8_t nvme_flag;
+
+	uint8_t node_name[WWN_SIZE];
+	uint8_t port_name[WWN_SIZE];
+	port_id_t d_id;
+	uint16_t loop_id;
+	uint16_t old_loop_id;
 
 	struct completion nvme_del_done;
 	uint32_t nvme_prli_service_param;
+#define NVME_PRLI_SP_PI_CTRL	BIT_9
+#define NVME_PRLI_SP_SLER	BIT_8
 #define NVME_PRLI_SP_CONF       BIT_7
 #define NVME_PRLI_SP_INITIATOR  BIT_5
 #define NVME_PRLI_SP_TARGET     BIT_4
 #define NVME_PRLI_SP_DISCOVERY  BIT_3
 #define NVME_PRLI_SP_FIRST_BURST	BIT_0
-	uint8_t nvme_flag;
+
 	uint32_t nvme_first_burst_size;
 #define NVME_FLAG_REGISTERED 4
 #define NVME_FLAG_DELETING 2
@@ -2510,6 +2613,8 @@
 	int generation;
 
 	struct se_session *se_sess;
+	struct list_head sess_cmd_list;
+	spinlock_t sess_cmd_lock;
 	struct kref sess_kref;
 	struct qla_tgt *tgt;
 	unsigned long expires;
@@ -2571,6 +2676,39 @@
 	u8 last_login_state;
 	u16 n2n_link_reset_cnt;
 	u16 n2n_chip_reset;
+
+	struct dentry *dfs_rport_dir;
+
+	u64 tgt_short_link_down_cnt;
+	u64 tgt_link_down_time;
+	u64 dev_loss_tmo;
+	/*
+	 * EDIF parameters for encryption.
+	 */
+	struct {
+		uint32_t	enable:1;	/* device is edif enabled/req'd */
+		uint32_t	app_stop:2;
+		uint32_t	aes_gmac:1;
+		uint32_t	app_sess_online:1;
+		uint32_t	tx_sa_set:1;
+		uint32_t	rx_sa_set:1;
+		uint32_t	tx_sa_pending:1;
+		uint32_t	rx_sa_pending:1;
+		uint32_t	tx_rekey_cnt;
+		uint32_t	rx_rekey_cnt;
+		uint64_t	tx_bytes;
+		uint64_t	rx_bytes;
+		uint8_t		sess_down_acked;
+		uint8_t		auth_state;
+		uint16_t	authok:1;
+		uint16_t	rekey_cnt;
+		struct list_head edif_indx_list;
+		spinlock_t  indx_list_lock;
+
+		struct list_head tx_sa_list;
+		struct list_head rx_sa_list;
+		spinlock_t	sa_list_lock;
+	} edif;
 } fc_port_t;
 
 enum {
@@ -2595,24 +2733,28 @@
 /*
  * Fibre channel port/lun states.
  */
-#define FCS_UNCONFIGURED	1
-#define FCS_DEVICE_DEAD		2
-#define FCS_DEVICE_LOST		3
-#define FCS_ONLINE		4
+enum {
+	FCS_UNKNOWN,
+	FCS_UNCONFIGURED,
+	FCS_DEVICE_DEAD,
+	FCS_DEVICE_LOST,
+	FCS_ONLINE,
+};
 
 extern const char *const port_state_str[5];
 
-static const char * const port_dstate_str[] = {
-	"DELETED",
-	"GNN_ID",
-	"GNL",
-	"LOGIN_PEND",
-	"LOGIN_FAILED",
-	"GPDB",
-	"UPD_FCPORT",
-	"LOGIN_COMPLETE",
-	"ADISC",
-	"DELETE_PEND"
+static const char *const port_dstate_str[] = {
+	[DSC_DELETED]		= "DELETED",
+	[DSC_GNN_ID]		= "GNN_ID",
+	[DSC_GNL]		= "GNL",
+	[DSC_LOGIN_PEND]	= "LOGIN_PEND",
+	[DSC_LOGIN_FAILED]	= "LOGIN_FAILED",
+	[DSC_GPDB]		= "GPDB",
+	[DSC_UPD_FCPORT]	= "UPD_FCPORT",
+	[DSC_LOGIN_COMPLETE]	= "LOGIN_COMPLETE",
+	[DSC_ADISC]		= "ADISC",
+	[DSC_DELETE_PEND]	= "DELETE_PEND",
+	[DSC_LOGIN_AUTH_PEND]	= "LOGIN_AUTH_PEND",
 };
 
 /*
@@ -2624,6 +2766,8 @@
 #define FCF_ASYNC_SENT		BIT_3
 #define FCF_CONF_COMP_SUPPORTED BIT_4
 #define FCF_ASYNC_ACTIVE	BIT_5
+#define FCF_FCSP_DEVICE		BIT_6
+#define FCF_EDIF_DELETE		BIT_7
 
 /* No loop ID flag. */
 #define FC_NO_LOOP_ID		0x1000
@@ -2715,7 +2859,7 @@
 /*
  * FDMI HBA attribute types.
  */
-#define FDMI1_HBA_ATTR_COUNT			9
+#define FDMI1_HBA_ATTR_COUNT			10
 #define FDMI2_HBA_ATTR_COUNT			17
 
 #define FDMI_HBA_NODE_NAME			0x1
@@ -2812,7 +2956,11 @@
 #define FDMI_PORT_SPEED_8GB		0x10
 #define FDMI_PORT_SPEED_16GB		0x20
 #define FDMI_PORT_SPEED_32GB		0x40
-#define FDMI_PORT_SPEED_64GB		0x80
+#define FDMI_PORT_SPEED_20GB		0x80
+#define FDMI_PORT_SPEED_40GB		0x100
+#define FDMI_PORT_SPEED_128GB		0x200
+#define FDMI_PORT_SPEED_64GB		0x400
+#define FDMI_PORT_SPEED_256GB		0x800
 #define FDMI_PORT_SPEED_UNKNOWN		0x8000
 
 #define FC_CLASS_2	0x04
@@ -3118,6 +3266,8 @@
 #define GFF_NVME_OFFSET		23 /* type = 28h */
 		struct {
 			uint8_t fc4_features[128];
+#define FC4_FF_TARGET    BIT_0
+#define FC4_FF_INITIATOR BIT_1
 		} gff_id;
 		struct {
 			uint8_t reserved;
@@ -3310,8 +3460,10 @@
 	void (*fw_dump)(struct scsi_qla_host *vha);
 	void (*mpi_fw_dump)(struct scsi_qla_host *, int);
 
+	/* Context: task, might sleep */
 	int (*beacon_on) (struct scsi_qla_host *);
 	int (*beacon_off) (struct scsi_qla_host *);
+
 	void (*beacon_blink) (struct scsi_qla_host *);
 
 	void *(*read_optrom)(struct scsi_qla_host *, void *,
@@ -3322,7 +3474,10 @@
 	int (*get_flash_version) (struct scsi_qla_host *, void *);
 	int (*start_scsi) (srb_t *);
 	int (*start_scsi_mq) (srb_t *);
+
+	/* Context: task, might sleep */
 	int (*abort_isp) (struct scsi_qla_host *);
+
 	int (*iospace_config)(struct qla_hw_data *);
 	int (*initialize_adapter)(struct scsi_qla_host *);
 };
@@ -3389,6 +3544,7 @@
 	QLA_EVT_SP_RETRY,
 	QLA_EVT_IIDMA,
 	QLA_EVT_ELS_PLOGI,
+	QLA_EVT_SA_REPLACE,
 };
 
 
@@ -3447,6 +3603,11 @@
 			u8 fc4_type;
 			srb_t *sp;
 		} gpnft;
+		struct {
+			struct edif_sa_ctl	*sa_ctl;
+			fc_port_t *fcport;
+			uint16_t nport_handle;
+		} sa_update;
 	 } u;
 };
 
@@ -3535,6 +3696,14 @@
 	uint64_t num_term_xchg_sent;
 };
 
+struct qla_counters {
+	uint64_t input_bytes;
+	uint64_t input_requests;
+	uint64_t output_bytes;
+	uint64_t output_requests;
+
+};
+
 struct qla_qpair;
 
 /* Response queue data structure */
@@ -3593,6 +3762,15 @@
 	uint8_t req_pkt[REQUEST_ENTRY_SIZE];
 };
 
+struct qla_fw_resources {
+	u16 iocbs_total;
+	u16 iocbs_limit;
+	u16 iocbs_qp_limit;
+	u16 iocbs_used;
+};
+
+#define QLA_IOCB_PCT_LIMIT 95
+
 /*Queue pair data structure */
 struct qla_qpair {
 	spinlock_t qp_lock;
@@ -3619,6 +3797,7 @@
 	uint32_t enable_class_2:1;
 	uint32_t enable_explicit_conf:1;
 	uint32_t use_shadow_reg:1;
+	uint32_t rcv_intr:1;
 
 	uint16_t id;			/* qp number used with FW */
 	uint16_t vp_idx;		/* vport ID */
@@ -3634,13 +3813,20 @@
 	struct qla_msix_entry *msix; /* point to &ha->msix_entries[x] */
 	struct qla_hw_data *hw;
 	struct work_struct q_work;
+	struct qla_counters counters;
+
 	struct list_head qp_list_elem; /* vha->qp_list */
 	struct list_head hints_list;
-	uint16_t cpuid;
+
 	uint16_t retry_term_cnt;
 	__le32	retry_term_exchg_addr;
 	uint64_t retry_term_jiff;
 	struct qla_tgt_counters tgt_counters;
+	uint16_t cpuid;
+	struct qla_fw_resources fwres ____cacheline_aligned;
+	u32	cmd_cnt;
+	u32	cmd_completion_cnt;
+	u32	prev_completion_cnt;
 };
 
 /* Place holder for FW buffer parameters */
@@ -3797,7 +3983,7 @@
 	__le32 __iomem *atio_q_in;
 	__le32 __iomem *atio_q_out;
 
-	struct qla_tgt_func_tmpl *tgt_ops;
+	const struct qla_tgt_func_tmpl *tgt_ops;
 	struct qla_tgt_vp_map *tgt_vp_map;
 
 	int saved_set;
@@ -3824,7 +4010,6 @@
 	int num_act_qpairs;
 #define DEFAULT_NAQP 2
 	spinlock_t atio_lock ____cacheline_aligned;
-	struct btree_head32 host_map;
 };
 
 #define MAX_QFULL_CMDS_ALLOC	8192
@@ -3839,6 +4024,13 @@
 	u32 num_mpi_reset;
 };
 
+/* refer to pcie_do_recovery reference */
+typedef enum {
+	QLA_PCI_RESUME,
+	QLA_PCI_ERR_DETECTED,
+	QLA_PCI_MMIO_ENABLED,
+	QLA_PCI_SLOT_RESET,
+} pci_error_state_t;
 /*
  * Qlogic host adapter specific data structure.
 */
@@ -3847,6 +4039,7 @@
 	/* SRB cache. */
 #define SRB_MIN_REQ     128
 	mempool_t       *srb_mempool;
+	u8 port_name[WWN_SIZE];
 
 	volatile struct {
 		uint32_t	mbox_int		:1;
@@ -3907,7 +4100,14 @@
 		uint32_t	scm_supported_f:1;
 				/* Enabled in Driver */
 		uint32_t	scm_enabled:1;
-		uint32_t	max_req_queue_warned:1;
+		uint32_t	edif_hw:1;
+		uint32_t	edif_enabled:1;
+		uint32_t	n2n_fw_acc_sec:1;
+		uint32_t	plogi_template_valid:1;
+		uint32_t	port_isolated:1;
+		uint32_t	eeh_flush:2;
+#define EEH_FLUSH_RDY  1
+#define EEH_FLUSH_DONE 2
 	} flags;
 
 	uint16_t max_exchg;
@@ -3942,6 +4142,7 @@
 	uint32_t		rsp_que_len;
 	uint32_t		req_que_off;
 	uint32_t		rsp_que_off;
+	unsigned long		eeh_jif;
 
 	/* Multi queue data structs */
 	device_reg_t *mqiobase;
@@ -4124,15 +4325,28 @@
 #define IS_OEM_001(ha)          ((ha)->device_type & DT_OEM_001)
 #define HAS_EXTENDED_IDS(ha)    ((ha)->device_type & DT_EXTENDED_IDS)
 #define IS_CT6_SUPPORTED(ha)	((ha)->device_type & DT_CT6_SUPPORTED)
-#define IS_MQUE_CAPABLE(ha)	((ha)->mqenable || IS_QLA83XX(ha) || \
-				IS_QLA27XX(ha) || IS_QLA28XX(ha))
+#define IS_MQUE_CAPABLE(ha)	(IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
+				 IS_QLA28XX(ha))
 #define IS_BIDI_CAPABLE(ha) \
     (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
 /* Bit 21 of fw_attributes decides the MCTP capabilities */
 #define IS_MCTP_CAPABLE(ha)	(IS_QLA2031(ha) && \
 				((ha)->fw_attributes_ext[0] & BIT_0))
-#define IS_PI_UNINIT_CAPABLE(ha)	(IS_QLA83XX(ha) || IS_QLA27XX(ha))
-#define IS_PI_IPGUARD_CAPABLE(ha)	(IS_QLA83XX(ha) || IS_QLA27XX(ha))
+#define QLA_ABTS_FW_ENABLED(_ha)       ((_ha)->fw_attributes_ext[0] & BIT_14)
+#define QLA_SRB_NVME_LS(_sp) ((_sp)->type == SRB_NVME_LS)
+#define QLA_SRB_NVME_CMD(_sp) ((_sp)->type == SRB_NVME_CMD)
+#define QLA_NVME_IOS(_sp) (QLA_SRB_NVME_CMD(_sp) || QLA_SRB_NVME_LS(_sp))
+#define QLA_LS_ABTS_WAIT_ENABLED(_sp) \
+	(QLA_SRB_NVME_LS(_sp) && QLA_ABTS_FW_ENABLED(_sp->fcport->vha->hw))
+#define QLA_CMD_ABTS_WAIT_ENABLED(_sp) \
+	(QLA_SRB_NVME_CMD(_sp) && QLA_ABTS_FW_ENABLED(_sp->fcport->vha->hw))
+#define QLA_ABTS_WAIT_ENABLED(_sp) \
+	(QLA_NVME_IOS(_sp) && QLA_ABTS_FW_ENABLED(_sp->fcport->vha->hw))
+
+#define IS_PI_UNINIT_CAPABLE(ha)	(IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
+					 IS_QLA28XX(ha))
+#define IS_PI_IPGUARD_CAPABLE(ha)	(IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
+					 IS_QLA28XX(ha))
 #define IS_PI_DIFB_DIX0_CAPABLE(ha)	(0)
 #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha)	(IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
 					IS_QLA28XX(ha))
@@ -4154,6 +4368,10 @@
 #define USE_ASYNC_SCAN(ha) (IS_QLA25XX(ha) || IS_QLA81XX(ha) ||\
 	IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
 
+#define IS_ZIO_THRESHOLD_CAPABLE(ha) \
+	((IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) &&\
+	 (ha->zio_mode == QLA_ZIO_MODE_6))
+
 	/* HBA serial number */
 	uint8_t		serial0;
 	uint8_t		serial1;
@@ -4241,7 +4459,7 @@
 	/* Extended Logins  */
 	void		*exlogin_buf;
 	dma_addr_t	exlogin_buf_dma;
-	int		exlogin_size;
+	uint32_t	exlogin_size;
 
 #define ENABLE_EXCHANGE_OFFLD	BIT_2
 
@@ -4252,7 +4470,8 @@
 	int 		exchoffld_count;
 
 	/* n2n */
-	struct els_plogi_payload plogi_els_payld;
+	struct fc_els_flogi plogi_els_payld;
+#define LOGIN_TEMPLATE_SIZE (sizeof(struct fc_els_flogi) - 4)
 
 	void            *swl;
 
@@ -4300,6 +4519,8 @@
 #define FW_ATTR_EXT0_SCM_BROCADE	0x00001000
 	/* Cisco fabric attached */
 #define FW_ATTR_EXT0_SCM_CISCO		0x00002000
+#define FW_ATTR_EXT0_NVME2	BIT_13
+#define FW_ATTR_EXT0_EDIF	BIT_5
 	uint16_t	fw_attributes_ext[2];
 	uint32_t	fw_memory_size;
 	uint32_t	fw_transfer_size;
@@ -4470,7 +4691,9 @@
 	struct qla_chip_state_84xx *cs84xx;
 	struct isp_operations *isp_ops;
 	struct workqueue_struct *wq;
+	struct work_struct heartbeat_work;
 	struct qlfc_fw fw_buf;
+	unsigned long last_heartbeat_run_jiffies;
 
 	/* FCP_CMND priority support */
 	struct qla_fcp_prio_cfg *fcp_prio_cfg;
@@ -4570,8 +4793,25 @@
 #define DEFAULT_ZIO_THRESHOLD 5
 
 	struct qla_hw_data_stat stat;
+	pci_error_state_t pci_error_state;
+	struct dma_pool *purex_dma_pool;
+	struct btree_head32 host_map;
+
+#define EDIF_NUM_SA_INDEX	512
+#define EDIF_TX_SA_INDEX_BASE	EDIF_NUM_SA_INDEX
+	void *edif_rx_sa_id_map;
+	void *edif_tx_sa_id_map;
+	spinlock_t sadb_fp_lock;
+
+	struct list_head sadb_tx_index_list;
+	struct list_head sadb_rx_index_list;
+	spinlock_t sadb_lock;	/* protects list */
+	struct els_reject elsrej;
+	u8 edif_post_stop_cnt_down;
 };
 
+#define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
+
 struct active_regions {
 	uint8_t global;
 	struct {
@@ -4579,6 +4819,7 @@
 		uint8_t vpd_nvram;
 		uint8_t npiv_config_0_1;
 		uint8_t npiv_config_2_3;
+		uint8_t nvme_params;
 	} aux;
 };
 
@@ -4610,6 +4851,8 @@
 	} iocb;
 };
 
+#include "qla_edif.h"
+
 #define SCM_FLAG_RDF_REJECT		0x00
 #define SCM_FLAG_RDF_COMPLETED		0x01
 
@@ -4649,6 +4892,7 @@
 		uint32_t	qpairs_rsp_created:1;
 		uint32_t	nvme_enabled:1;
 		uint32_t        nvme_first_burst:1;
+		uint32_t        nvme2_enabled:1;
 	} flags;
 
 	atomic_t	loop_state;
@@ -4689,7 +4933,7 @@
 #define FX00_CRITEMP_RECOVERY	25
 #define FX00_HOST_INFO_RESEND	26
 #define QPAIR_ONLINE_CHECK_NEEDED	27
-#define SET_NVME_ZIO_THRESHOLD_NEEDED	28
+#define DO_EEH_RECOVERY		28
 #define DETECT_SFP_CHANGE	29
 #define N2N_LOGIN_NEEDED	30
 #define IOCB_WORK_ACTIVE	31
@@ -4747,7 +4991,6 @@
 
 	/* list of commands waiting on workqueue */
 	struct list_head	qla_cmd_list;
-	struct list_head	qla_sess_op_cmd_list;
 	struct list_head	unknown_atio_list;
 	spinlock_t		cmd_list_lock;
 	struct delayed_work	unknown_atio_work;
@@ -4807,6 +5050,8 @@
 	uint16_t ql2xexchoffld;
 	uint16_t ql2xiniexchg;
 
+	struct dentry *dfs_rport_root;
+
 	struct purex_list {
 		struct list_head head;
 		spinlock_t lock;
@@ -4828,6 +5073,19 @@
 	uint8_t	scm_fabric_connection_flags;
 
 	unsigned int irq_offset;
+
+	u64 hw_err_cnt;
+	u64 interface_err_cnt;
+	u64 cmd_timeout_cnt;
+	u64 reset_cmd_err_cnt;
+	u64 link_down_time;
+	u64 short_link_down_cnt;
+	struct edif_dbell e_dbell;
+	struct pur_core pur_cinfo;
+
+#define DPORT_DIAG_IN_PROGRESS                 BIT_0
+#define DPORT_DIAG_CHIP_RESET_IN_PROGRESS      BIT_1
+	uint16_t dport_status;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
@@ -4846,6 +5104,7 @@
 #define QLA28XX_AUX_IMG_VPD_NVRAM		BIT_1
 #define QLA28XX_AUX_IMG_NPIV_CONFIG_0_1		BIT_2
 #define QLA28XX_AUX_IMG_NPIV_CONFIG_2_3		BIT_3
+#define QLA28XX_AUX_IMG_NVME_PARAMS		BIT_4
 
 #define SET_VP_IDX	1
 #define SET_AL_PA	2
@@ -4923,17 +5182,17 @@
 		(test_bit(ISP_ABORT_NEEDED, &ha->dpc_flags) || \
 			 test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags))
 
-#define QLA_VHA_MARK_BUSY(__vha, __bail) do {		\
-	atomic_inc(&__vha->vref_count);			\
-	mb();						\
-	if (__vha->flags.delete_progress) {		\
-		atomic_dec(&__vha->vref_count);		\
-		wake_up(&__vha->vref_waitq);		\
-		__bail = 1;				\
-	} else {					\
-		__bail = 0;				\
-	}						\
-} while (0)
+static inline bool qla_vha_mark_busy(scsi_qla_host_t *vha)
+{
+	atomic_inc(&vha->vref_count);
+	mb();
+	if (vha->flags.delete_progress) {
+		atomic_dec(&vha->vref_count);
+		wake_up(&vha->vref_waitq);
+		return true;
+	}
+	return false;
+}
 
 #define QLA_VHA_MARK_NOT_BUSY(__vha) do {		\
 	atomic_dec(&__vha->vref_count);			\
@@ -4952,8 +5211,7 @@
 } while (0)
 
 #define QLA_QPAIR_MARK_NOT_BUSY(__qpair)		\
-	atomic_dec(&__qpair->ref_count);		\
-
+	atomic_dec(&__qpair->ref_count)
 
 #define QLA_ENA_CONF(_ha) {\
     int i;\
@@ -4999,6 +5257,9 @@
 #define QLA_BUSY			0x107
 #define QLA_ALREADY_REGISTERED		0x109
 #define QLA_OS_TIMER_EXPIRED		0x10a
+#define QLA_ERR_NO_QPAIR		0x10b
+#define QLA_ERR_NOT_FOUND		0x10c
+#define QLA_ERR_FROM_FW			0x10d
 
 #define NVRAM_DELAY()		udelay(10)
 
@@ -5019,8 +5280,6 @@
 
 #define	QLA_DSDS_PER_IOCB	37
 
-#define CMD_SP(Cmnd)		((Cmnd)->SCp.ptr)
-
 #define QLA_SG_ALL	1024
 
 enum nexus_wait_type {
@@ -5029,6 +5288,43 @@
 	WAIT_LUN,
 };
 
+#define INVALID_EDIF_SA_INDEX	0xffff
+#define RX_DELETE_NO_EDIF_SA_INDEX	0xfffe
+
+#define QLA_SKIP_HANDLE QLA_TGT_SKIP_HANDLE
+
+/* edif hash element */
+struct edif_list_entry {
+	uint16_t handle;			/* nport_handle */
+	uint32_t update_sa_index;
+	uint32_t delete_sa_index;
+	uint32_t count;				/* counter for filtering sa_index */
+#define EDIF_ENTRY_FLAGS_CLEANUP	0x01	/* this index is being cleaned up */
+	uint32_t flags;				/* used by sadb cleanup code */
+	fc_port_t *fcport;			/* needed by rx delay timer function */
+	struct timer_list timer;		/* rx delay timer */
+	struct list_head next;
+};
+
+#define EDIF_TX_INDX_BASE 512
+#define EDIF_RX_INDX_BASE 0
+#define EDIF_RX_DELETE_FILTER_COUNT 3	/* delay queuing rx delete until this many */
+
+/* entry in the sa_index free pool */
+
+struct sa_index_pair {
+	uint16_t sa_index;
+	uint32_t spi;
+};
+
+/* edif sa_index data structure */
+struct edif_sa_index_entry {
+	struct sa_index_pair sa_pair[2];
+	fc_port_t *fcport;
+	uint16_t handle;
+	struct list_head next;
+};
+
 /* Refer to SNIA SFF 8247 */
 struct sff_8247_a0 {
 	u8 txid;	/* transceiver id */
@@ -5130,6 +5426,8 @@
 	 ha->current_topology == ISP_CFG_N || \
 	 !ha->current_topology)
 
+#define QLA_N2N_WAIT_TIME	5 /* 2 * ra_tov(n2n) + 1 */
+
 #define NVME_TYPE(fcport) \
 	(fcport->fc4_type & FS_FC4TYPE_NVME) \
 
@@ -5142,16 +5440,88 @@
 #define NVME_FCP_TARGET(fcport) \
 	(FCP_TYPE(fcport) && NVME_TYPE(fcport)) \
 
+#define NVME_PRIORITY(ha, fcport) \
+	(NVME_FCP_TARGET(fcport) && \
+	 (ha->fc4_type_priority == FC4_PRIORITY_NVME))
+
 #define NVME_TARGET(ha, fcport) \
-	((NVME_FCP_TARGET(fcport) && \
-	(ha->fc4_type_priority == FC4_PRIORITY_NVME)) || \
+	(fcport->do_prli_nvme || \
 	NVME_ONLY_TARGET(fcport)) \
 
 #define PRLI_PHASE(_cls) \
 	((_cls == DSC_LS_PRLI_PEND) || (_cls == DSC_LS_PRLI_COMP))
 
+enum ql_vnd_host_stat_action {
+	QLA_STOP = 0,
+	QLA_START,
+	QLA_CLEAR,
+};
+
+struct ql_vnd_mng_host_stats_param {
+	u32 stat_type;
+	enum ql_vnd_host_stat_action action;
+} __packed;
+
+struct ql_vnd_mng_host_stats_resp {
+	u32 status;
+} __packed;
+
+struct ql_vnd_stats_param {
+	u32 stat_type;
+} __packed;
+
+struct ql_vnd_tgt_stats_param {
+	s32 tgt_id;
+	u32 stat_type;
+} __packed;
+
+enum ql_vnd_host_port_action {
+	QLA_ENABLE = 0,
+	QLA_DISABLE,
+};
+
+struct ql_vnd_mng_host_port_param {
+	enum ql_vnd_host_port_action action;
+} __packed;
+
+struct ql_vnd_mng_host_port_resp {
+	u32 status;
+} __packed;
+
+struct ql_vnd_stat_entry {
+	u32 stat_type;	/* Failure type */
+	u32 tgt_num;	/* Target Num */
+	u64 cnt;	/* Counter value */
+} __packed;
+
+struct ql_vnd_stats {
+	u64 entry_count; /* Num of entries */
+	u64 rservd;
+	struct ql_vnd_stat_entry entry[]; /* Place holder of entries */
+} __packed;
+
+struct ql_vnd_host_stats_resp {
+	u32 status;
+	struct ql_vnd_stats stats;
+} __packed;
+
+struct ql_vnd_tgt_stats_resp {
+	u32 status;
+	struct ql_vnd_stats stats;
+} __packed;
+
 #include "qla_target.h"
 #include "qla_gbl.h"
 #include "qla_dbg.h"
 #include "qla_inline.h"
+
+#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \
+				      _fcport->disc_state == DSC_DELETED)
+
+#define DBG_FCPORT_PRFMT(_fp, _fmt, _args...) \
+	"%s: %8phC: " _fmt " (state=%d disc_state=%d scan_state=%d loopid=0x%x deleted=%d flags=0x%x)\n", \
+	__func__, _fp->port_name, ##_args, atomic_read(&_fp->state), \
+	_fp->disc_state, _fp->scan_state, _fp->loop_id, _fp->deleted, \
+	_fp->flags
+
 #endif

diff --git a/scst/qla2x00t-32gbit/qla_dfs.c b/scst/qla2x00t-32gbit/qla_dfs.c
index e62b211..038352a 100644
--- a/scst/qla2x00t-32gbit/qla_dfs.c
+++ b/scst/qla2x00t-32gbit/qla_dfs.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 
@@ -12,6 +11,144 @@
 static struct dentry *qla2x00_dfs_root;
 static atomic_t qla2x00_dfs_root_count;
 
+#define QLA_DFS_RPORT_DEVLOSS_TMO	1
+
+static int
+qla_dfs_rport_get(struct fc_port *fp, int attr_id, u64 *val)
+{
+	switch (attr_id) {
+	case QLA_DFS_RPORT_DEVLOSS_TMO:
+		/* Only supported for FC-NVMe devices that are registered. */
+		if (!(fp->nvme_flag & NVME_FLAG_REGISTERED))
+			return -EIO;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
+		*val = fp->nvme_remote_port->dev_loss_tmo;
+		break;
+#else
+		return -EINVAL;
+#endif
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+qla_dfs_rport_set(struct fc_port *fp, int attr_id, u64 val)
+{
+	switch (attr_id) {
+	case QLA_DFS_RPORT_DEVLOSS_TMO:
+		/* Only supported for FC-NVMe devices that are registered. */
+		if (!(fp->nvme_flag & NVME_FLAG_REGISTERED))
+			return -EIO;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) && IS_ENABLED(CONFIG_NVME_FC)
+		return nvme_fc_set_remoteport_devloss(fp->nvme_remote_port,
+						      val);
+#else /* CONFIG_NVME_FC */
+		return -EINVAL;
+#endif /* CONFIG_NVME_FC */
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#define DEFINE_QLA_DFS_RPORT_RW_ATTR(_attr_id, _attr)		\
+static int qla_dfs_rport_##_attr##_get(void *data, u64 *val)	\
+{								\
+	struct fc_port *fp = data;				\
+	return qla_dfs_rport_get(fp, _attr_id, val);		\
+}								\
+static int qla_dfs_rport_##_attr##_set(void *data, u64 val)	\
+{								\
+	struct fc_port *fp = data;				\
+	return qla_dfs_rport_set(fp, _attr_id, val);		\
+}								\
+DEFINE_DEBUGFS_ATTRIBUTE(qla_dfs_rport_##_attr##_fops,		\
+		qla_dfs_rport_##_attr##_get,			\
+		qla_dfs_rport_##_attr##_set, "%llu\n")
+
+/*
+ * Wrapper for getting fc_port fields.
+ *
+ * _attr    : Attribute name.
+ * _get_val : Accessor macro to retrieve the value.
+ */
+#define DEFINE_QLA_DFS_RPORT_FIELD_GET(_attr, _get_val)			\
+static int qla_dfs_rport_field_##_attr##_get(void *data, u64 *val)	\
+{									\
+	struct fc_port *fp = data;					\
+	*val = _get_val;						\
+	return 0;							\
+}									\
+DEFINE_DEBUGFS_ATTRIBUTE(qla_dfs_rport_field_##_attr##_fops,		\
+		qla_dfs_rport_field_##_attr##_get,			\
+		NULL, "%llu\n")
+
+#define DEFINE_QLA_DFS_RPORT_ACCESS(_attr, _get_val) \
+	DEFINE_QLA_DFS_RPORT_FIELD_GET(_attr, _get_val)
+
+#define DEFINE_QLA_DFS_RPORT_FIELD(_attr) \
+	DEFINE_QLA_DFS_RPORT_FIELD_GET(_attr, fp->_attr)
+
+DEFINE_QLA_DFS_RPORT_RW_ATTR(QLA_DFS_RPORT_DEVLOSS_TMO, dev_loss_tmo);
+
+DEFINE_QLA_DFS_RPORT_FIELD(disc_state);
+DEFINE_QLA_DFS_RPORT_FIELD(scan_state);
+DEFINE_QLA_DFS_RPORT_FIELD(fw_login_state);
+DEFINE_QLA_DFS_RPORT_FIELD(login_pause);
+DEFINE_QLA_DFS_RPORT_FIELD(flags);
+DEFINE_QLA_DFS_RPORT_FIELD(nvme_flag);
+DEFINE_QLA_DFS_RPORT_FIELD(last_rscn_gen);
+DEFINE_QLA_DFS_RPORT_FIELD(rscn_gen);
+DEFINE_QLA_DFS_RPORT_FIELD(login_gen);
+DEFINE_QLA_DFS_RPORT_FIELD(loop_id);
+DEFINE_QLA_DFS_RPORT_FIELD_GET(port_id, fp->d_id.b24);
+DEFINE_QLA_DFS_RPORT_FIELD_GET(sess_kref, kref_read(&fp->sess_kref));
+
+void
+qla2x00_dfs_create_rport(scsi_qla_host_t *vha, struct fc_port *fp)
+{
+	char wwn[32];
+
+#define QLA_CREATE_RPORT_FIELD_ATTR(_attr)			\
+	debugfs_create_file(#_attr, 0400, fp->dfs_rport_dir,	\
+		fp, &qla_dfs_rport_field_##_attr##_fops)
+
+	if (!vha->dfs_rport_root || fp->dfs_rport_dir)
+		return;
+
+	sprintf(wwn, "pn-%016llx", wwn_to_u64(fp->port_name));
+	fp->dfs_rport_dir = debugfs_create_dir(wwn, vha->dfs_rport_root);
+	if (!fp->dfs_rport_dir)
+		return;
+	if (NVME_TARGET(vha->hw, fp))
+		debugfs_create_file("dev_loss_tmo", 0600, fp->dfs_rport_dir,
+				    fp, &qla_dfs_rport_dev_loss_tmo_fops);
+
+	QLA_CREATE_RPORT_FIELD_ATTR(disc_state);
+	QLA_CREATE_RPORT_FIELD_ATTR(scan_state);
+	QLA_CREATE_RPORT_FIELD_ATTR(fw_login_state);
+	QLA_CREATE_RPORT_FIELD_ATTR(login_pause);
+	QLA_CREATE_RPORT_FIELD_ATTR(flags);
+	QLA_CREATE_RPORT_FIELD_ATTR(nvme_flag);
+	QLA_CREATE_RPORT_FIELD_ATTR(last_rscn_gen);
+	QLA_CREATE_RPORT_FIELD_ATTR(rscn_gen);
+	QLA_CREATE_RPORT_FIELD_ATTR(login_gen);
+	QLA_CREATE_RPORT_FIELD_ATTR(loop_id);
+	QLA_CREATE_RPORT_FIELD_ATTR(port_id);
+	QLA_CREATE_RPORT_FIELD_ATTR(sess_kref);
+}
+
+void
+qla2x00_dfs_remove_rport(scsi_qla_host_t *vha, struct fc_port *fp)
+{
+	if (!vha->dfs_rport_root || !fp->dfs_rport_dir)
+		return;
+	debugfs_remove_recursive(fp->dfs_rport_dir);
+	fp->dfs_rport_dir = NULL;
+}
+
 static int
 qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 {
@@ -37,89 +174,63 @@
 	return 0;
 }
 
-static int
-qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
-{
-	scsi_qla_host_t *vha = inode->i_private;
-
-	return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
-}
-
-static const struct file_operations dfs_tgt_sess_ops = {
-	.open		= qla2x00_dfs_tgt_sess_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(qla2x00_dfs_tgt_sess);
 
 static int
 qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
 {
 	scsi_qla_host_t *vha = s->private;
 	struct qla_hw_data *ha = vha->hw;
-	struct gid_list_info *gid_list, *gid;
+	struct gid_list_info *gid_list;
 	dma_addr_t gid_list_dma;
 	fc_port_t fc_port;
+	char *id_iter;
 	int rc, i;
 	uint16_t entries, loop_id;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 
 	seq_printf(s, "%s\n", vha->host_str);
-	if (tgt) {
-		gid_list = dma_alloc_coherent(&ha->pdev->dev,
-		    qla2x00_gid_list_size(ha),
-		    &gid_list_dma, GFP_KERNEL);
-		if (!gid_list) {
-			ql_dbg(ql_dbg_user, vha, 0x7018,
-			    "DMA allocation failed for %u\n",
-			     qla2x00_gid_list_size(ha));
-			return 0;
-		}
-
-		rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
-		    &entries);
-		if (rc != QLA_SUCCESS)
-			goto out_free_id_list;
-
-		gid = gid_list;
-
-		seq_puts(s, "Port Name	Port ID 	Loop ID\n");
-
-		for (i = 0; i < entries; i++) {
-			loop_id = le16_to_cpu(gid->loop_id);
-			memset(&fc_port, 0, sizeof(fc_port_t));
-
-			fc_port.loop_id = loop_id;
-
-			rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
-			seq_printf(s, "%8phC  %02x%02x%02x  %d\n",
-				fc_port.port_name, fc_port.d_id.b.domain,
-				fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
-				fc_port.loop_id);
-			gid = (void *)gid + ha->gid_list_info_size;
-		}
-out_free_id_list:
-		dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
-		    gid_list, gid_list_dma);
+	gid_list = dma_alloc_coherent(&ha->pdev->dev,
+				      qla2x00_gid_list_size(ha),
+				      &gid_list_dma, GFP_KERNEL);
+	if (!gid_list) {
+		ql_dbg(ql_dbg_user, vha, 0x7018,
+		       "DMA allocation failed for %u\n",
+		       qla2x00_gid_list_size(ha));
+		return 0;
 	}
 
+	rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
+				  &entries);
+	if (rc != QLA_SUCCESS)
+		goto out_free_id_list;
+
+	id_iter = (char *)gid_list;
+
+	seq_puts(s, "Port Name	Port ID		Loop ID\n");
+
+	for (i = 0; i < entries; i++) {
+		struct gid_list_info *gid =
+			(struct gid_list_info *)id_iter;
+		loop_id = le16_to_cpu(gid->loop_id);
+		memset(&fc_port, 0, sizeof(fc_port_t));
+
+		fc_port.loop_id = loop_id;
+
+		rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
+		seq_printf(s, "%8phC  %02x%02x%02x  %d\n",
+			   fc_port.port_name, fc_port.d_id.b.domain,
+			   fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
+			   fc_port.loop_id);
+		id_iter += ha->gid_list_info_size;
+	}
+out_free_id_list:
+	dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
+			  gid_list, gid_list_dma);
+
 	return 0;
 }
 
-static int
-qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file)
-{
-	scsi_qla_host_t *vha = inode->i_private;
-
-	return single_open(file, qla2x00_dfs_tgt_port_database_show, vha);
-}
-
-static const struct file_operations dfs_tgt_port_database_ops = {
-	.open		= qla2x00_dfs_tgt_port_database_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(qla2x00_dfs_tgt_port_database);
 
 static int
 qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
@@ -127,6 +238,8 @@
 	struct scsi_qla_host *vha = s->private;
 	uint16_t mb[MAX_IOCB_MB_REG];
 	int rc;
+	struct qla_hw_data *ha = vha->hw;
+	u16 iocbs_used, i;
 
 	rc = qla24xx_res_count_wait(vha, mb, SIZEOF_IOCB_MB_REG);
 	if (rc != QLA_SUCCESS) {
@@ -151,23 +264,22 @@
 		    mb[23]);
 	}
 
+	if (ql2xenforce_iocb_limit) {
+		/* lock is not require. It's an estimate. */
+		iocbs_used = ha->base_qpair->fwres.iocbs_used;
+		for (i = 0; i < ha->max_qpairs; i++) {
+			if (ha->queue_pair_map[i])
+				iocbs_used += ha->queue_pair_map[i]->fwres.iocbs_used;
+		}
+
+		seq_printf(s, "Driver: estimate iocb used [%d] high water limit [%d]\n",
+			   iocbs_used, ha->base_qpair->fwres.iocbs_limit);
+	}
+
 	return 0;
 }
 
-static int
-qla_dfs_fw_resource_cnt_open(struct inode *inode, struct file *file)
-{
-	struct scsi_qla_host *vha = inode->i_private;
-
-	return single_open(file, qla_dfs_fw_resource_cnt_show, vha);
-}
-
-static const struct file_operations dfs_fw_resource_cnt_ops = {
-	.open           = qla_dfs_fw_resource_cnt_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(qla_dfs_fw_resource_cnt);
 
 static int
 qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
@@ -178,6 +290,10 @@
 		core_qla_snd_status, qla_core_ret_sta_ctio, core_qla_free_cmd,
 		num_q_full_sent, num_alloc_iocb_failed, num_term_xchg_sent;
 	u16 i;
+	fc_port_t *fcport = NULL;
+
+	if (qla2x00_chip_is_down(vha))
+		return 0;
 
 	qla_core_sbt_cmd = qpair->tgt_counters.qla_core_sbt_cmd;
 	core_qla_que_buf = qpair->tgt_counters.core_qla_que_buf;
@@ -241,23 +357,34 @@
 		vha->qla_stats.qla_dif_stats.dif_ref_tag_err);
 	seq_printf(s, "DIF App tag err = %d\n",
 		vha->qla_stats.qla_dif_stats.dif_app_tag_err);
+
+	seq_puts(s, "\n");
+	seq_puts(s, "Initiator Error Counters\n");
+	seq_printf(s, "HW Error Count =		%14lld\n",
+		   vha->hw_err_cnt);
+	seq_printf(s, "Link Down Count =	%14lld\n",
+		   vha->short_link_down_cnt);
+	seq_printf(s, "Interface Err Count =	%14lld\n",
+		   vha->interface_err_cnt);
+	seq_printf(s, "Cmd Timeout Count =	%14lld\n",
+		   vha->cmd_timeout_cnt);
+	seq_printf(s, "Reset Count =		%14lld\n",
+		   vha->reset_cmd_err_cnt);
+	seq_puts(s, "\n");
+
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (!fcport->rport)
+			continue;
+
+		seq_printf(s, "Target Num = %7d Link Down Count = %14lld\n",
+			   fcport->rport->number, fcport->tgt_short_link_down_cnt);
+	}
+	seq_puts(s, "\n");
+
 	return 0;
 }
 
-static int
-qla_dfs_tgt_counters_open(struct inode *inode, struct file *file)
-{
-	struct scsi_qla_host *vha = inode->i_private;
-
-	return single_open(file, qla_dfs_tgt_counters_show, vha);
-}
-
-static const struct file_operations dfs_tgt_counters_ops = {
-	.open           = qla_dfs_tgt_counters_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(qla_dfs_tgt_counters);
 
 static int
 qla2x00_dfs_fce_show(struct seq_file *s, void *unused)
@@ -366,6 +493,99 @@
 	return 0;
 }
 
+/*
+ * Helper macros for setting up debugfs entries.
+ * _name: The name of the debugfs entry
+ * _ctx_struct: The context that was passed when creating the debugfs file
+ *
+ * QLA_DFS_SETUP_RD could be used when there is only a show function.
+ * - show function take the name qla_dfs_<sysfs-name>_show
+ *
+ * QLA_DFS_SETUP_RW could be used when there are both show and write functions.
+ * - show function take the name  qla_dfs_<sysfs-name>_show
+ * - write function take the name qla_dfs_<sysfs-name>_write
+ *
+ * To have a new debugfs entry, do:
+ * 1. Create a "struct dentry *" in the appropriate structure in the format
+ * dfs_<sysfs-name>
+ * 2. Setup debugfs entries using QLA_DFS_SETUP_RD / QLA_DFS_SETUP_RW
+ * 3. Create debugfs file in qla2x00_dfs_setup() using QLA_DFS_CREATE_FILE
+ * or QLA_DFS_ROOT_CREATE_FILE
+ * 4. Remove debugfs file in qla2x00_dfs_remove() using QLA_DFS_REMOVE_FILE
+ * or QLA_DFS_ROOT_REMOVE_FILE
+ *
+ * Example for creating "TEST" sysfs file:
+ * 1. struct qla_hw_data { ... struct dentry *dfs_TEST; }
+ * 2. QLA_DFS_SETUP_RD(TEST, scsi_qla_host_t);
+ * 3. In qla2x00_dfs_setup():
+ * QLA_DFS_CREATE_FILE(ha, TEST, 0600, ha->dfs_dir, vha);
+ * 4. In qla2x00_dfs_remove():
+ * QLA_DFS_REMOVE_FILE(ha, TEST);
+ */
+#define QLA_DFS_SETUP_RD(_name, _ctx_struct)				\
+static int								\
+qla_dfs_##_name##_open(struct inode *inode, struct file *file)		\
+{									\
+	_ctx_struct *__ctx = inode->i_private;				\
+									\
+	return single_open(file, qla_dfs_##_name##_show, __ctx);	\
+}									\
+									\
+static const struct file_operations qla_dfs_##_name##_ops = {		\
+	.open           = qla_dfs_##_name##_open,			\
+	.read           = seq_read,					\
+	.llseek         = seq_lseek,					\
+	.release        = single_release,				\
+};
+
+#define QLA_DFS_SETUP_RW(_name, _ctx_struct)				\
+static int								\
+qla_dfs_##_name##_open(struct inode *inode, struct file *file)		\
+{									\
+	_ctx_struct *__ctx = inode->i_private;				\
+									\
+	return single_open(file, qla_dfs_##_name##_show, __ctx);	\
+}									\
+									\
+static const struct file_operations qla_dfs_##_name##_ops = {		\
+	.open           = qla_dfs_##_name##_open,			\
+	.read           = seq_read,					\
+	.llseek         = seq_lseek,					\
+	.release        = single_release,				\
+	.write		= qla_dfs_##_name##_write,			\
+};
+
+#define QLA_DFS_ROOT_CREATE_FILE(_name, _perm, _ctx)			\
+	do {								\
+		if (!qla_dfs_##_name)					\
+			qla_dfs_##_name = debugfs_create_file(#_name,	\
+					_perm, qla2x00_dfs_root, _ctx,	\
+					&qla_dfs_##_name##_ops);	\
+	} while (0)
+
+#define QLA_DFS_ROOT_REMOVE_FILE(_name)					\
+	do {								\
+		if (qla_dfs_##_name) {					\
+			debugfs_remove(qla_dfs_##_name);		\
+			qla_dfs_##_name = NULL;				\
+		}							\
+	} while (0)
+
+#define QLA_DFS_CREATE_FILE(_struct, _name, _perm, _parent, _ctx)	\
+	do {								\
+		(_struct)->dfs_##_name = debugfs_create_file(#_name,	\
+					_perm, _parent, _ctx,		\
+					&qla_dfs_##_name##_ops)		\
+	} while (0)
+
+#define QLA_DFS_REMOVE_FILE(_struct, _name)				\
+	do {								\
+		if ((_struct)->dfs_##_name) {				\
+			debugfs_remove((_struct)->dfs_##_name);		\
+			(_struct)->dfs_##_name = NULL;			\
+		}							\
+	} while (0)
+
 static int
 qla_dfs_naqp_open(struct inode *inode, struct file *file)
 {
@@ -459,23 +679,35 @@
 
 create_nodes:
 	ha->dfs_fw_resource_cnt = debugfs_create_file("fw_resource_count",
-	    S_IRUSR, ha->dfs_dir, vha, &dfs_fw_resource_cnt_ops);
+	    S_IRUSR, ha->dfs_dir, vha, &qla_dfs_fw_resource_cnt_fops);
 
 	ha->dfs_tgt_counters = debugfs_create_file("tgt_counters", S_IRUSR,
-	    ha->dfs_dir, vha, &dfs_tgt_counters_ops);
+	    ha->dfs_dir, vha, &qla_dfs_tgt_counters_fops);
 
 	ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database",
-	    S_IRUSR,  ha->dfs_dir, vha, &dfs_tgt_port_database_ops);
+	    S_IRUSR,  ha->dfs_dir, vha, &qla2x00_dfs_tgt_port_database_fops);
 
 	ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha,
 	    &dfs_fce_ops);
 
 	ha->tgt.dfs_tgt_sess = debugfs_create_file("tgt_sess",
-		S_IRUSR, ha->dfs_dir, vha, &dfs_tgt_sess_ops);
+		S_IRUSR, ha->dfs_dir, vha, &qla2x00_dfs_tgt_sess_fops);
 
-	if (IS_QLA27XX(ha) || IS_QLA83XX(ha) || IS_QLA28XX(ha))
+	if (IS_QLA27XX(ha) || IS_QLA83XX(ha) || IS_QLA28XX(ha)) {
 		ha->tgt.dfs_naqp = debugfs_create_file("naqp",
 		    0400, ha->dfs_dir, vha, &dfs_naqp_ops);
+		if (!ha->tgt.dfs_naqp) {
+			ql_log(ql_log_warn, vha, 0xd011,
+			       "Unable to create debugFS naqp node.\n");
+			goto out;
+		}
+	}
+	vha->dfs_rport_root = debugfs_create_dir("rports", ha->dfs_dir);
+	if (!vha->dfs_rport_root) {
+		ql_log(ql_log_warn, vha, 0xd012,
+		       "Unable to create debugFS rports node.\n");
+		goto out;
+	}
 out:
 	return 0;
 }
@@ -515,6 +747,11 @@
 		ha->dfs_fce = NULL;
 	}
 
+	if (vha->dfs_rport_root) {
+		debugfs_remove_recursive(vha->dfs_rport_root);
+		vha->dfs_rport_root = NULL;
+	}
+
 	if (ha->dfs_dir) {
 		debugfs_remove(ha->dfs_dir);
 		ha->dfs_dir = NULL;

diff --git a/scst/qla2x00t-32gbit/qla_edif.c b/scst/qla2x00t-32gbit/qla_edif.c
new file mode 100644
index 0000000..3545184
--- /dev/null
+++ b/scst/qla2x00t-32gbit/qla_edif.c

@@ -0,0 +1,3685 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Marvell Fibre Channel HBA Driver
+ * Copyright (c)  2021     Marvell
+ */
+#include "qla_def.h"
+#include "qla_edif.h"
+
+#include <linux/kthread.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <scsi/scsi_tcq.h>
+
+static struct edif_sa_index_entry *qla_edif_sadb_find_sa_index_entry(uint16_t nport_handle,
+		struct list_head *sa_list);
+static uint16_t qla_edif_sadb_get_sa_index(fc_port_t *fcport,
+		struct qla_sa_update_frame *sa_frame);
+static int qla_edif_sadb_delete_sa_index(fc_port_t *fcport, uint16_t nport_handle,
+		uint16_t sa_index);
+static int qla_pur_get_pending(scsi_qla_host_t *, fc_port_t *, BSG_JOB_TYPE *);
+
+struct edb_node {
+	struct  list_head	list;
+	uint32_t		ntype;
+	union {
+		port_id_t	plogi_did;
+		uint32_t	async;
+		port_id_t	els_sid;
+		struct edif_sa_update_aen	sa_aen;
+	} u;
+};
+
+static struct els_sub_cmd {
+	uint16_t cmd;
+	const char *str;
+} sc_str[] = {
+	{SEND_ELS, "send ELS"},
+	{SEND_ELS_REPLY, "send ELS Reply"},
+	{PULL_ELS, "retrieve ELS"},
+};
+
+const char *sc_to_str(uint16_t cmd)
+{
+	int i;
+	struct els_sub_cmd *e;
+
+	for (i = 0; i < ARRAY_SIZE(sc_str); i++) {
+		e = sc_str + i;
+		if (cmd == e->cmd)
+			return e->str;
+	}
+	return "unknown";
+}
+
+static struct edb_node *qla_edb_getnext(scsi_qla_host_t *vha)
+{
+	unsigned long   flags;
+	struct edb_node *edbnode = NULL;
+
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+
+	/* db nodes are fifo - no qualifications done */
+	if (!list_empty(&vha->e_dbell.head)) {
+		edbnode = list_first_entry(&vha->e_dbell.head,
+					   struct edb_node, list);
+		list_del_init(&edbnode->list);
+	}
+
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	return edbnode;
+}
+
+static void qla_edb_node_free(scsi_qla_host_t *vha, struct edb_node *node)
+{
+	list_del_init(&node->list);
+	kfree(node);
+}
+
+static struct edif_list_entry *qla_edif_list_find_sa_index(fc_port_t *fcport,
+		uint16_t handle)
+{
+	struct edif_list_entry *entry;
+	struct edif_list_entry *tentry;
+	struct list_head *indx_list = &fcport->edif.edif_indx_list;
+
+	list_for_each_entry_safe(entry, tentry, indx_list, next) {
+		if (entry->handle == handle)
+			return entry;
+	}
+	return NULL;
+}
+
+/* timeout called when no traffic and delayed rx sa_index delete */
+static void qla2x00_sa_replace_iocb_timeout(struct timer_list *t)
+{
+	struct edif_list_entry *edif_entry = from_timer(edif_entry, t, timer);
+	fc_port_t *fcport = edif_entry->fcport;
+	struct scsi_qla_host *vha = fcport->vha;
+	struct  edif_sa_ctl *sa_ctl;
+	uint16_t nport_handle;
+	unsigned long flags = 0;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3069,
+	    "%s:  nport_handle 0x%x,  SA REPL Delay Timeout, %8phC portid=%06x\n",
+	    __func__, edif_entry->handle, fcport->port_name, fcport->d_id.b24);
+
+	/*
+	 * if delete_sa_index is valid then no one has serviced this
+	 * delayed delete
+	 */
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+
+	/*
+	 * delete_sa_index is invalidated when we find the new sa_index in
+	 * the incoming data stream.  If it is not invalidated then we are
+	 * still looking for the new sa_index because there is no I/O and we
+	 * need to just force the rx delete and move on.  Otherwise
+	 * we could get another rekey which will result in an error 66.
+	 */
+	if (edif_entry->delete_sa_index != INVALID_EDIF_SA_INDEX) {
+		uint16_t delete_sa_index = edif_entry->delete_sa_index;
+
+		edif_entry->delete_sa_index = INVALID_EDIF_SA_INDEX;
+		nport_handle = edif_entry->handle;
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+
+		sa_ctl = qla_edif_find_sa_ctl_by_index(fcport,
+		    delete_sa_index, 0);
+
+		if (sa_ctl) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl: %p, delete index %d, update index: %d, lid: 0x%x\n",
+			    __func__, sa_ctl, delete_sa_index, edif_entry->update_sa_index,
+			    nport_handle);
+
+			sa_ctl->flags = EDIF_SA_CTL_FLG_DEL;
+			set_bit(EDIF_SA_CTL_REPL, &sa_ctl->state);
+			qla_post_sa_replace_work(fcport->vha, fcport,
+			    nport_handle, sa_ctl);
+
+		} else {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl not found for delete_sa_index: %d\n",
+			    __func__, edif_entry->delete_sa_index);
+		}
+	} else {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+	}
+}
+
+/*
+ * create a new list entry for this nport handle and
+ * add an sa_update index to the list - called for sa_update
+ */
+static int qla_edif_list_add_sa_update_index(fc_port_t *fcport,
+		uint16_t sa_index, uint16_t handle)
+{
+	struct edif_list_entry *entry;
+	unsigned long flags = 0;
+
+	/* if the entry exists, then just update the sa_index */
+	entry = qla_edif_list_find_sa_index(fcport, handle);
+	if (entry) {
+		entry->update_sa_index = sa_index;
+		entry->count = 0;
+		return 0;
+	}
+
+	/*
+	 * This is the normal path - there should be no existing entry
+	 * when update is called.  The exception is at startup
+	 * when update is called for the first two sa_indexes
+	 * followed by a delete of the first sa_index
+	 */
+	entry = kzalloc((sizeof(struct edif_list_entry)), GFP_ATOMIC);
+	if (!entry)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&entry->next);
+	entry->handle = handle;
+	entry->update_sa_index = sa_index;
+	entry->delete_sa_index = INVALID_EDIF_SA_INDEX;
+	entry->count = 0;
+	entry->flags = 0;
+	timer_setup(&entry->timer, qla2x00_sa_replace_iocb_timeout, 0);
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	list_add_tail(&entry->next, &fcport->edif.edif_indx_list);
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+	return 0;
+}
+
+/* remove an entry from the list */
+static void qla_edif_list_delete_sa_index(fc_port_t *fcport, struct edif_list_entry *entry)
+{
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	list_del(&entry->next);
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+}
+
+int qla_post_sa_replace_work(struct scsi_qla_host *vha,
+	 fc_port_t *fcport, uint16_t nport_handle, struct edif_sa_ctl *sa_ctl)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_SA_REPLACE);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.sa_update.fcport = fcport;
+	e->u.sa_update.sa_ctl = sa_ctl;
+	e->u.sa_update.nport_handle = nport_handle;
+	fcport->flags |= FCF_ASYNC_ACTIVE;
+	return qla2x00_post_work(vha, e);
+}
+
+static void
+qla_edif_sa_ctl_init(scsi_qla_host_t *vha, struct fc_port  *fcport)
+{
+	ql_dbg(ql_dbg_edif, vha, 0x2058,
+	    "Init SA_CTL List for fcport - nn %8phN pn %8phN portid=%06x.\n",
+	    fcport->node_name, fcport->port_name, fcport->d_id.b24);
+
+	fcport->edif.tx_rekey_cnt = 0;
+	fcport->edif.rx_rekey_cnt = 0;
+
+	fcport->edif.tx_bytes = 0;
+	fcport->edif.rx_bytes = 0;
+}
+
+static int qla_bsg_check(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job,
+fc_port_t *fcport)
+{
+	struct extra_auth_els *p;
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct qla_bsg_auth_els_request *req =
+	    (struct qla_bsg_auth_els_request *)bsg_job->request;
+
+	if (!vha->hw->flags.edif_enabled) {
+		ql_dbg(ql_dbg_edif, vha, 0x9105,
+		    "%s edif not enabled\n", __func__);
+		goto done;
+	}
+	if (DBELL_INACTIVE(vha)) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled\n", __func__);
+		goto done;
+	}
+
+	p = &req->e;
+
+	/* Get response */
+	if (p->sub_cmd == PULL_ELS) {
+		struct qla_bsg_auth_els_reply *rpl =
+			(struct qla_bsg_auth_els_reply *)bsg_job->reply;
+
+		qla_pur_get_pending(vha, fcport, bsg_job);
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+			"%s %s %8phN sid=%x. xchg %x, nb=%xh bsg ptr %p\n",
+			__func__, sc_to_str(p->sub_cmd), fcport->port_name,
+			fcport->d_id.b24, rpl->rx_xchg_address,
+			rpl->r.reply_payload_rcv_len, bsg_job);
+
+		goto done;
+	}
+	return 0;
+
+done:
+
+	bsg_job_done(bsg_job, bsg_reply->result,
+			bsg_reply->reply_payload_rcv_len);
+	return -EIO;
+}
+
+fc_port_t *
+qla2x00_find_fcport_by_pid(scsi_qla_host_t *vha, port_id_t *id)
+{
+	fc_port_t *f, *tf;
+
+	f = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if (f->d_id.b24 == id->b24)
+			return f;
+	}
+	return NULL;
+}
+
+/**
+ * qla_edif_app_check(): check for valid application id.
+ * @vha: host adapter pointer
+ * @appid: application id
+ * Return: false = fail, true = pass
+ */
+static bool
+qla_edif_app_check(scsi_qla_host_t *vha, struct app_id appid)
+{
+	/* check that the app is allow/known to the driver */
+
+	if (appid.app_vid != EDIF_APP_ID) {
+		ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app id not ok (%x)",
+		    __func__, appid.app_vid);
+		return false;
+	}
+
+	if (appid.version != EDIF_VERSION1) {
+		ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app version is not ok (%x)",
+		    __func__, appid.version);
+		return false;
+	}
+
+	return true;
+}
+
+static void
+qla_edif_free_sa_ctl(fc_port_t *fcport, struct edif_sa_ctl *sa_ctl,
+	int index)
+{
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.sa_list_lock, flags);
+	list_del(&sa_ctl->next);
+	spin_unlock_irqrestore(&fcport->edif.sa_list_lock, flags);
+	if (index >= 512)
+		fcport->edif.tx_rekey_cnt--;
+	else
+		fcport->edif.rx_rekey_cnt--;
+	kfree(sa_ctl);
+}
+
+/* return an index to the freepool */
+static void qla_edif_add_sa_index_to_freepool(fc_port_t *fcport, int dir,
+		uint16_t sa_index)
+{
+	void *sa_id_map;
+	struct scsi_qla_host *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags = 0;
+	u16 lsa_index = sa_index;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+	    "%s: entry\n", __func__);
+
+	if (dir) {
+		sa_id_map = ha->edif_tx_sa_id_map;
+		lsa_index -= EDIF_TX_SA_INDEX_BASE;
+	} else {
+		sa_id_map = ha->edif_rx_sa_id_map;
+	}
+
+	spin_lock_irqsave(&ha->sadb_fp_lock, flags);
+	clear_bit(lsa_index, sa_id_map);
+	spin_unlock_irqrestore(&ha->sadb_fp_lock, flags);
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: index %d added to free pool\n", __func__, sa_index);
+}
+
+static void __qla2x00_release_all_sadb(struct scsi_qla_host *vha,
+	struct fc_port *fcport, struct edif_sa_index_entry *entry,
+	int pdir)
+{
+	struct edif_list_entry *edif_entry;
+	struct  edif_sa_ctl *sa_ctl;
+	int i, dir;
+	int key_cnt = 0;
+
+	for (i = 0; i < 2; i++) {
+		if (entry->sa_pair[i].sa_index == INVALID_EDIF_SA_INDEX)
+			continue;
+
+		if (fcport->loop_id != entry->handle) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: ** WARNING %d** entry handle: 0x%x, lid: 0x%x, sa_index: %d\n",
+			    __func__, i, entry->handle, fcport->loop_id,
+			    entry->sa_pair[i].sa_index);
+		}
+
+		/* release the sa_ctl */
+		sa_ctl = qla_edif_find_sa_ctl_by_index(fcport,
+				entry->sa_pair[i].sa_index, pdir);
+		if (sa_ctl &&
+		    qla_edif_find_sa_ctl_by_index(fcport, sa_ctl->index, pdir)) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: freeing sa_ctl for index %d\n", __func__, sa_ctl->index);
+			qla_edif_free_sa_ctl(fcport, sa_ctl, sa_ctl->index);
+		} else {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl NOT freed, sa_ctl: %p\n", __func__, sa_ctl);
+		}
+
+		/* Release the index */
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+			"%s: freeing sa_index %d, nph: 0x%x\n",
+			__func__, entry->sa_pair[i].sa_index, entry->handle);
+
+		dir = (entry->sa_pair[i].sa_index <
+			EDIF_TX_SA_INDEX_BASE) ? 0 : 1;
+		qla_edif_add_sa_index_to_freepool(fcport, dir,
+			entry->sa_pair[i].sa_index);
+
+		/* Delete timer on RX */
+		if (pdir != SAU_FLG_TX) {
+			edif_entry =
+				qla_edif_list_find_sa_index(fcport, entry->handle);
+			if (edif_entry) {
+				ql_dbg(ql_dbg_edif, vha, 0x5033,
+				    "%s: remove edif_entry %p, update_sa_index: 0x%x, delete_sa_index: 0x%x\n",
+				    __func__, edif_entry, edif_entry->update_sa_index,
+				    edif_entry->delete_sa_index);
+				qla_edif_list_delete_sa_index(fcport, edif_entry);
+				/*
+				 * valid delete_sa_index indicates there is a rx
+				 * delayed delete queued
+				 */
+				if (edif_entry->delete_sa_index !=
+						INVALID_EDIF_SA_INDEX) {
+					del_timer(&edif_entry->timer);
+
+					/* build and send the aen */
+					fcport->edif.rx_sa_set = 1;
+					fcport->edif.rx_sa_pending = 0;
+					qla_edb_eventcreate(vha,
+							VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+							QL_VND_SA_STAT_SUCCESS,
+							QL_VND_RX_SA_KEY, fcport);
+				}
+				ql_dbg(ql_dbg_edif, vha, 0x5033,
+				    "%s: release edif_entry %p, update_sa_index: 0x%x, delete_sa_index: 0x%x\n",
+				    __func__, edif_entry, edif_entry->update_sa_index,
+				    edif_entry->delete_sa_index);
+
+				kfree(edif_entry);
+			}
+		}
+		key_cnt++;
+	}
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: %d %s keys released\n",
+	    __func__, key_cnt, pdir ? "tx" : "rx");
+}
+
+/* find an release all outstanding sadb sa_indicies */
+void qla2x00_release_all_sadb(struct scsi_qla_host *vha, struct fc_port *fcport)
+{
+	struct edif_sa_index_entry *entry, *tmp;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+	    "%s: Starting...\n", __func__);
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_rx_index_list, next) {
+		if (entry->fcport == fcport) {
+			list_del(&entry->next);
+			spin_unlock_irqrestore(&ha->sadb_lock, flags);
+			__qla2x00_release_all_sadb(vha, fcport, entry, 0);
+			kfree(entry);
+			spin_lock_irqsave(&ha->sadb_lock, flags);
+			break;
+		}
+	}
+
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_tx_index_list, next) {
+		if (entry->fcport == fcport) {
+			list_del(&entry->next);
+			spin_unlock_irqrestore(&ha->sadb_lock, flags);
+
+			__qla2x00_release_all_sadb(vha, fcport, entry, SAU_FLG_TX);
+
+			kfree(entry);
+			spin_lock_irqsave(&ha->sadb_lock, flags);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+}
+
+/**
+ * qla_edif_app_start:  application has announce its present
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ *
+ * Set/activate doorbell.  Reset current sessions and re-login with
+ * secure flag.
+ */
+static int
+qla_edif_app_start(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	int32_t			rval = 0;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct app_start	appstart;
+	struct app_start_reply	appreply;
+	struct fc_port  *fcport, *tf;
+
+	ql_log(ql_log_info, vha, 0x1313,
+	       "EDIF application registration with driver, FC device connections will be re-established.\n");
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appstart,
+	    sizeof(struct app_start));
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app_vid=%x app_start_flags %x\n",
+	     __func__, appstart.app_info.app_vid, appstart.app_start_flags);
+
+	if (DBELL_INACTIVE(vha)) {
+		/* mark doorbell as active since an app is now present */
+		vha->e_dbell.db_flags |= EDB_ACTIVE;
+	} else {
+		goto out;
+	}
+
+	if (N2N_TOPO(vha->hw)) {
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list)
+			fcport->n2n_link_reset_cnt = 0;
+
+		if (vha->hw->flags.n2n_fw_acc_sec) {
+			list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list)
+				qla_edif_sa_ctl_init(vha, fcport);
+
+			/*
+			 * While authentication app was not running, remote device
+			 * could still try to login with this local port.  Let's
+			 * clear the state and try again.
+			 */
+			qla2x00_wait_for_sess_deletion(vha);
+
+			/* bounce the link to get the other guy to relogin */
+			if (!vha->hw->flags.n2n_bigger) {
+				set_bit(N2N_LINK_RESET, &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
+			}
+		} else {
+			qla2x00_wait_for_hba_online(vha);
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			qla2xxx_wake_dpc(vha);
+			qla2x00_wait_for_hba_online(vha);
+		}
+	} else {
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			ql_dbg(ql_dbg_edif, vha, 0x2058,
+			       "FCSP - nn %8phN pn %8phN portid=%06x.\n",
+			       fcport->node_name, fcport->port_name,
+			       fcport->d_id.b24);
+			ql_dbg(ql_dbg_edif, vha, 0xf084,
+			       "%s: se_sess %p / sess %p from port %8phC "
+			       "loop_id %#04x s_id %06x logout %d "
+			       "keep %d els_logo %d disc state %d auth state %d"
+			       "stop state %d\n",
+			       __func__, fcport->se_sess, fcport,
+			       fcport->port_name, fcport->loop_id,
+			       fcport->d_id.b24, fcport->logout_on_delete,
+			       fcport->keep_nport_handle, fcport->send_els_logo,
+			       fcport->disc_state, fcport->edif.auth_state,
+			       fcport->edif.app_stop);
+
+			if (atomic_read(&vha->loop_state) == LOOP_DOWN)
+				break;
+
+			fcport->login_retry = vha->hw->login_retry_count;
+
+			fcport->edif.app_stop = 0;
+			fcport->edif.app_sess_online = 0;
+
+			if (fcport->scan_state != QLA_FCPORT_FOUND)
+				continue;
+
+			if (fcport->port_type == FCT_UNKNOWN &&
+			    !fcport->fc4_features)
+				rval = qla24xx_async_gffid(vha, fcport, true);
+
+			if (!rval && !(fcport->fc4_features & FC4_FF_TARGET ||
+			    fcport->port_type & (FCT_TARGET|FCT_NVME_TARGET)))
+				continue;
+
+			rval = 0;
+
+			ql_dbg(ql_dbg_edif, vha, 0x911e,
+			       "%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
+			       __func__, fcport->port_name);
+			qlt_schedule_sess_for_deletion(fcport);
+			qla_edif_sa_ctl_init(vha, fcport);
+		}
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+	}
+
+	if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+		/* mark as active since an app is now present */
+		vha->pur_cinfo.enode_flags = ENODE_ACTIVE;
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x911f, "%s enode already active\n",
+		     __func__);
+	}
+
+out:
+	appreply.host_support_edif = vha->hw->flags.edif_enabled;
+	appreply.edif_enode_active = vha->pur_cinfo.enode_flags;
+	appreply.edif_edb_active = vha->e_dbell.db_flags;
+	appreply.version = EDIF_VERSION1;
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       &appreply,
+							       sizeof(struct app_start_reply));
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s app start completed with 0x%x\n",
+	    __func__, rval);
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_stop - app has announced it's exiting.
+ * @vha: host adapter pointer
+ * @bsg_job: user space command pointer
+ *
+ * Free any in flight messages, clear all doorbell events
+ * to application. Reject any message relate to security.
+ */
+static int
+qla_edif_app_stop(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	struct app_stop         appstop;
+	struct fc_bsg_reply     *bsg_reply = bsg_job->reply;
+	struct fc_port  *fcport, *tf;
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appstop,
+	    sizeof(struct app_stop));
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s Stopping APP: app_vid=%x\n",
+	    __func__, appstop.app_info.app_vid);
+
+	/* Call db stop and enode stop functions */
+
+	/* if we leave this running short waits are operational < 16 secs */
+	qla_enode_stop(vha);        /* stop enode */
+	qla_edb_stop(vha);          /* stop db */
+
+	list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+		if (!(fcport->flags & FCF_FCSP_DEVICE))
+			continue;
+
+		if (fcport->flags & FCF_FCSP_DEVICE) {
+			ql_dbg(ql_dbg_edif, vha, 0xf084,
+			    "%s: sess %p from port %8phC lid %#04x s_id %06x logout %d keep %d els_logo %d\n",
+			    __func__, fcport,
+			    fcport->port_name, fcport->loop_id, fcport->d_id.b24,
+			    fcport->logout_on_delete, fcport->keep_nport_handle,
+			    fcport->send_els_logo);
+
+			if (atomic_read(&vha->loop_state) == LOOP_DOWN)
+				break;
+
+			fcport->edif.app_stop = 1;
+			ql_dbg(ql_dbg_edif, vha, 0x911e,
+				"%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
+				__func__, fcport->port_name);
+
+			fcport->send_els_logo = 1;
+			qlt_schedule_sess_for_deletion(fcport);
+		}
+	}
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	/* no return interface to app - it assumes we cleaned up ok */
+
+	return 0;
+}
+
+static int
+qla_edif_app_chk_sa_update(scsi_qla_host_t *vha, fc_port_t *fcport,
+		struct app_plogi_reply *appplogireply)
+{
+	int	ret = 0;
+
+	if (!(fcport->edif.rx_sa_set && fcport->edif.tx_sa_set)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s: wwpn %8phC Both SA indexes has not been SET TX %d, RX %d.\n",
+		    __func__, fcport->port_name, fcport->edif.tx_sa_set,
+		    fcport->edif.rx_sa_set);
+		appplogireply->prli_status = 0;
+		ret = 1;
+	} else  {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s wwpn %8phC Both SA(s) updated.\n", __func__,
+		    fcport->port_name);
+		fcport->edif.rx_sa_set = fcport->edif.tx_sa_set = 0;
+		fcport->edif.rx_sa_pending = fcport->edif.tx_sa_pending = 0;
+		appplogireply->prli_status = 1;
+	}
+	return ret;
+}
+
+/**
+ * qla_edif_app_authok - authentication by app succeeded.  Driver can proceed
+ *   with prli
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ */
+static int
+qla_edif_app_authok(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	struct auth_complete_cmd appplogiok;
+	struct app_plogi_reply	appplogireply = {0};
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	fc_port_t		*fcport = NULL;
+	port_id_t		portid = {0};
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appplogiok,
+	    sizeof(struct auth_complete_cmd));
+
+	/* silent unaligned access warning */
+	portid.b.domain = appplogiok.u.d_id.b.domain;
+	portid.b.area   = appplogiok.u.d_id.b.area;
+	portid.b.al_pa  = appplogiok.u.d_id.b.al_pa;
+
+	appplogireply.version = EDIF_VERSION1;
+	switch (appplogiok.type) {
+	case PL_TYPE_WWPN:
+		fcport = qla2x00_find_fcport_by_wwpn(vha,
+		    appplogiok.u.wwpn, 0);
+		if (!fcport)
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s wwpn lookup failed: %8phC\n",
+			    __func__, appplogiok.u.wwpn);
+		break;
+	case PL_TYPE_DID:
+		fcport = qla2x00_find_fcport_by_pid(vha, &portid);
+		if (!fcport)
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s d_id lookup failed: %x\n", __func__,
+			    portid.b24);
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s undefined type: %x\n", __func__,
+		    appplogiok.type);
+		break;
+	}
+
+	if (!fcport) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto errstate_exit;
+	}
+
+	/*
+	 * if port is online then this is a REKEY operation
+	 * Only do sa update checking
+	 */
+	if (atomic_read(&fcport->state) == FCS_ONLINE) {
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s Skipping PRLI complete based on rekey\n", __func__);
+		appplogireply.prli_status = 1;
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		qla_edif_app_chk_sa_update(vha, fcport, &appplogireply);
+		goto errstate_exit;
+	}
+
+	/* make sure in AUTH_PENDING or else reject */
+	if (fcport->disc_state != DSC_LOGIN_AUTH_PEND) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s wwpn %8phC is not in auth pending state (%x)\n",
+		    __func__, fcport->port_name, fcport->disc_state);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		appplogireply.prli_status = 0;
+		goto errstate_exit;
+	}
+
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+	appplogireply.prli_status = 1;
+	fcport->edif.authok = 1;
+	if (!(fcport->edif.rx_sa_set && fcport->edif.tx_sa_set)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s: wwpn %8phC Both SA indexes has not been SET TX %d, RX %d.\n",
+		    __func__, fcport->port_name, fcport->edif.tx_sa_set,
+		    fcport->edif.rx_sa_set);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		appplogireply.prli_status = 0;
+		goto errstate_exit;
+
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s wwpn %8phC Both SA(s) updated.\n", __func__,
+		    fcport->port_name);
+		fcport->edif.rx_sa_set = fcport->edif.tx_sa_set = 0;
+		fcport->edif.rx_sa_pending = fcport->edif.tx_sa_pending = 0;
+	}
+
+	if (qla_ini_mode_enabled(vha)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s AUTH complete - RESUME with prli for wwpn %8phC\n",
+		    __func__, fcport->port_name);
+		qla24xx_post_prli_work(vha, fcport);
+	}
+
+errstate_exit:
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       &appplogireply,
+							       sizeof(struct app_plogi_reply));
+
+	return 0;
+}
+
+/**
+ * qla_edif_app_authfail - authentication by app has failed.  Driver is given
+ *   notice to tear down current session.
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ */
+static int
+qla_edif_app_authfail(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	int32_t			rval = 0;
+	struct auth_complete_cmd appplogifail;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	fc_port_t		*fcport = NULL;
+	port_id_t		portid = {0};
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app auth fail\n", __func__);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appplogifail,
+	    sizeof(struct auth_complete_cmd));
+
+	/* silent unaligned access warning */
+	portid.b.domain = appplogifail.u.d_id.b.domain;
+	portid.b.area   = appplogifail.u.d_id.b.area;
+	portid.b.al_pa  = appplogifail.u.d_id.b.al_pa;
+
+	/*
+	 * TODO: edif: app has failed this plogi. Inform driver to
+	 * take any action (if any).
+	 */
+	switch (appplogifail.type) {
+	case PL_TYPE_WWPN:
+		fcport = qla2x00_find_fcport_by_wwpn(vha,
+		    appplogifail.u.wwpn, 0);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		break;
+	case PL_TYPE_DID:
+		fcport = qla2x00_find_fcport_by_pid(vha, &portid);
+		if (!fcport)
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s d_id lookup failed: %x\n", __func__,
+			    portid.b24);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s undefined type: %x\n", __func__,
+		    appplogifail.type);
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+		break;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s fcport is 0x%p\n", __func__, fcport);
+
+	if (fcport) {
+		/* set/reset edif values and flags */
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s reset the auth process - %8phC, loopid=%x portid=%06x.\n",
+		    __func__, fcport->port_name, fcport->loop_id, fcport->d_id.b24);
+
+		if (qla_ini_mode_enabled(fcport->vha)) {
+			fcport->send_els_logo = 1;
+			qlt_schedule_sess_for_deletion(fcport);
+		}
+	}
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_getfcinfo - app would like to read session info (wwpn, nportid,
+ *   [initiator|target] mode.  It can specific session with specific nport id or
+ *   all sessions.
+ * @vha: host adapter pointer
+ * @bsg_job: user request pointer
+ */
+static int
+qla_edif_app_getfcinfo(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	int32_t			rval = 0;
+	int32_t			pcnt = 0;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct app_pinfo_req	app_req;
+	struct app_pinfo_reply	*app_reply;
+	port_id_t		tdid;
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app get fcinfo\n", __func__);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &app_req,
+	    sizeof(struct app_pinfo_req));
+
+	app_reply = kzalloc((sizeof(struct app_pinfo_reply) +
+	    sizeof(struct app_pinfo) * app_req.num_ports), GFP_KERNEL);
+
+	if (!app_reply) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+	} else {
+		struct fc_port	*fcport = NULL, *tf;
+
+		app_reply->version = EDIF_VERSION1;
+
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			if (!(fcport->flags & FCF_FCSP_DEVICE))
+				continue;
+
+			tdid = app_req.remote_pid;
+
+			ql_dbg(ql_dbg_edif, vha, 0x2058,
+			    "APP request entry - portid=%06x.\n", tdid.b24);
+
+			/* Ran out of space */
+			if (pcnt >= app_req.num_ports)
+				break;
+
+			if (tdid.b24 != 0 && tdid.b24 != fcport->d_id.b24)
+				continue;
+
+			if (!N2N_TOPO(vha->hw)) {
+				if (fcport->scan_state != QLA_FCPORT_FOUND)
+					continue;
+
+				if (fcport->port_type == FCT_UNKNOWN &&
+				    !fcport->fc4_features)
+					rval = qla24xx_async_gffid(vha, fcport,
+								   true);
+
+				if (!rval &&
+				    !(fcport->fc4_features & FC4_FF_TARGET ||
+				      fcport->port_type &
+				      (FCT_TARGET | FCT_NVME_TARGET)))
+					continue;
+			}
+
+			rval = 0;
+
+			app_reply->ports[pcnt].version = EDIF_VERSION1;
+			app_reply->ports[pcnt].remote_type =
+				VND_CMD_RTYPE_UNKNOWN;
+			if (fcport->port_type & (FCT_NVME_TARGET | FCT_TARGET))
+				app_reply->ports[pcnt].remote_type |=
+					VND_CMD_RTYPE_TARGET;
+			if (fcport->port_type & (FCT_NVME_INITIATOR | FCT_INITIATOR))
+				app_reply->ports[pcnt].remote_type |=
+					VND_CMD_RTYPE_INITIATOR;
+
+			app_reply->ports[pcnt].remote_pid = fcport->d_id;
+
+			ql_dbg(ql_dbg_edif, vha, 0x2058,
+			    "Found FC_SP fcport - nn %8phN pn %8phN pcnt %d portid=%06x secure %d.\n",
+			    fcport->node_name, fcport->port_name, pcnt,
+			    fcport->d_id.b24, fcport->flags & FCF_FCSP_DEVICE);
+
+			switch (fcport->edif.auth_state) {
+			case VND_CMD_AUTH_STATE_ELS_RCVD:
+				if (fcport->disc_state == DSC_LOGIN_AUTH_PEND) {
+					fcport->edif.auth_state = VND_CMD_AUTH_STATE_NEEDED;
+					app_reply->ports[pcnt].auth_state =
+						VND_CMD_AUTH_STATE_NEEDED;
+				} else {
+					app_reply->ports[pcnt].auth_state =
+						VND_CMD_AUTH_STATE_ELS_RCVD;
+				}
+				break;
+			default:
+				app_reply->ports[pcnt].auth_state = fcport->edif.auth_state;
+				break;
+			}
+
+			memcpy(app_reply->ports[pcnt].remote_wwpn,
+			    fcport->port_name, 8);
+
+			app_reply->ports[pcnt].remote_state =
+				(atomic_read(&fcport->state) ==
+				    FCS_ONLINE ? 1 : 0);
+
+			pcnt++;
+
+			if (tdid.b24 != 0)
+				break;
+		}
+		app_reply->port_count = pcnt;
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+	}
+
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       app_reply,
+							       sizeof(struct app_pinfo_reply) + sizeof(struct app_pinfo) * pcnt);
+
+	kfree(app_reply);
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_getstats - app would like to read various statistics info
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ */
+static int32_t
+qla_edif_app_getstats(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	int32_t			rval = 0;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	uint32_t size;
+
+	struct app_sinfo_req	app_req;
+	struct app_stats_reply	*app_reply;
+	uint32_t pcnt = 0;
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &app_req,
+	    sizeof(struct app_sinfo_req));
+	if (app_req.num_ports == 0) {
+		ql_dbg(ql_dbg_async, vha, 0x911d,
+		   "%s app did not indicate number of ports to return\n",
+		    __func__);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+	}
+
+	size = sizeof(struct app_stats_reply) +
+	    (sizeof(struct app_sinfo) * app_req.num_ports);
+
+	app_reply = kzalloc(size, GFP_KERNEL);
+	if (!app_reply) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+	} else {
+		struct fc_port	*fcport = NULL, *tf;
+
+		app_reply->version = EDIF_VERSION1;
+
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			if (fcport->edif.enable) {
+				if (pcnt > app_req.num_ports)
+					break;
+
+				app_reply->elem[pcnt].rekey_count =
+				    fcport->edif.rekey_cnt;
+				app_reply->elem[pcnt].tx_bytes =
+				    fcport->edif.tx_bytes;
+				app_reply->elem[pcnt].rx_bytes =
+				    fcport->edif.rx_bytes;
+
+				memcpy(app_reply->elem[pcnt].remote_wwpn,
+				    fcport->port_name, 8);
+
+				pcnt++;
+			}
+		}
+		app_reply->elem_count = pcnt;
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+	}
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	bsg_reply->reply_payload_rcv_len =
+	    sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+	       bsg_job->reply_payload.sg_cnt, app_reply,
+	       sizeof(struct app_stats_reply) + (sizeof(struct app_sinfo) * pcnt));
+
+	kfree(app_reply);
+
+	return rval;
+}
+
+static int32_t
+qla_edif_ack(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	struct fc_port *fcport;
+	struct aen_complete_cmd ack;
+	struct fc_bsg_reply     *bsg_reply = bsg_job->reply;
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+			  bsg_job->request_payload.sg_cnt, &ack, sizeof(ack));
+
+	ql_dbg(ql_dbg_edif, vha, 0x70cf,
+	       "%s: %06x event_code %x\n",
+	       __func__, ack.port_id.b24, ack.event_code);
+
+	fcport = qla2x00_find_fcport_by_pid(vha, &ack.port_id);
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	if (!fcport) {
+		ql_dbg(ql_dbg_edif, vha, 0x70cf,
+		       "%s: unable to find fcport %06x \n",
+		       __func__, ack.port_id.b24);
+		return 0;
+	}
+
+	switch (ack.event_code) {
+	case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+		fcport->edif.sess_down_acked = 1;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int qla_edif_consume_dbell(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	u32 sg_skip, reply_payload_len;
+	bool keep;
+	struct edb_node *dbnode = NULL;
+	struct edif_app_dbell ap;
+	int dat_size = 0;
+
+	sg_skip = 0;
+	reply_payload_len = bsg_job->reply_payload.payload_len;
+
+	while ((reply_payload_len - sg_skip) >= sizeof(struct edb_node)) {
+		dbnode = qla_edb_getnext(vha);
+		if (dbnode) {
+			keep = true;
+			dat_size = 0;
+			ap.event_code = dbnode->ntype;
+			switch (dbnode->ntype) {
+			case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+			case VND_CMD_AUTH_STATE_NEEDED:
+				ap.port_id = dbnode->u.plogi_did;
+				dat_size += sizeof(ap.port_id);
+				break;
+			case VND_CMD_AUTH_STATE_ELS_RCVD:
+				ap.port_id = dbnode->u.els_sid;
+				dat_size += sizeof(ap.port_id);
+				break;
+			case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
+				ap.port_id = dbnode->u.sa_aen.port_id;
+				memcpy(&ap.event_data, &dbnode->u,
+				    sizeof(struct edif_sa_update_aen));
+				dat_size += sizeof(struct edif_sa_update_aen);
+				break;
+			default:
+				keep = false;
+				ql_log(ql_log_warn, vha, 0x09102,
+					"%s unknown DB type=%d %p\n",
+					__func__, dbnode->ntype, dbnode);
+				break;
+			}
+			ap.event_data_size = dat_size;
+			/* 8 = sizeof(ap.event_code + ap.event_data_size) */
+			dat_size += 8;
+			if (keep)
+				sg_skip += sg_pcopy_from_buffer(bsg_job->reply_payload.sg_list,
+						bsg_job->reply_payload.sg_cnt,
+						&ap, dat_size, sg_skip);
+
+			ql_dbg(ql_dbg_edif, vha, 0x09102,
+				"%s Doorbell consumed : type=%d %p\n",
+				__func__, dbnode->ntype, dbnode);
+
+			kfree(dbnode);
+		} else {
+			break;
+		}
+	}
+
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+	bsg_reply->reply_payload_rcv_len = sg_skip;
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+
+	return 0;
+}
+
+static void __qla_edif_dbell_bsg_done(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job,
+	u32 delay)
+{
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+
+	/* small sleep for doorbell events to accumulate */
+	if (delay)
+		msleep(delay);
+
+	qla_edif_consume_dbell(vha, bsg_job);
+
+	bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
+}
+
+static void qla_edif_dbell_bsg_done(scsi_qla_host_t *vha)
+{
+	unsigned long flags;
+	BSG_JOB_TYPE *prev_bsg_job = NULL;
+
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+	if (vha->e_dbell.dbell_bsg_job) {
+		prev_bsg_job = vha->e_dbell.dbell_bsg_job;
+		vha->e_dbell.dbell_bsg_job = NULL;
+	}
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	if (prev_bsg_job)
+		__qla_edif_dbell_bsg_done(vha, prev_bsg_job, 0);
+}
+
+static int
+qla_edif_dbell_bsg(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	unsigned long flags;
+	bool return_bsg = false;
+
+	/* flush previous dbell bsg */
+	qla_edif_dbell_bsg_done(vha);
+
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+	if (list_empty(&vha->e_dbell.head) && DBELL_ACTIVE(vha)) {
+		/*
+		 * when the next db event happens, bsg_job will return.
+		 * Otherwise, timer will return it.
+		 */
+		vha->e_dbell.dbell_bsg_job = bsg_job;
+		vha->e_dbell.bsg_expire = jiffies + 10 * HZ;
+	} else {
+		return_bsg = true;
+	}
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	if (return_bsg)
+		__qla_edif_dbell_bsg_done(vha, bsg_job, 1);
+
+	return 0;
+}
+
+int32_t
+qla_edif_app_mgmt(BSG_JOB_TYPE *bsg_job)
+{
+	struct fc_bsg_request	*bsg_request = bsg_job->request;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
+	scsi_qla_host_t		*vha = shost_priv(host);
+	struct app_id		appcheck;
+	bool done = true;
+	int32_t         rval = 0;
+	uint32_t	vnd_sc = bsg_request->rqst_data.h_vendor.vendor_cmd[1];
+	u32 level = ql_dbg_edif;
+
+	/* doorbell is high traffic */
+	if (vnd_sc == QL_VND_SC_READ_DBELL)
+		level = 0;
+
+	ql_dbg(level, vha, 0x911d, "%s vnd subcmd=%x\n",
+	    __func__, vnd_sc);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appcheck,
+	    sizeof(struct app_id));
+
+	if (!vha->hw->flags.edif_enabled ||
+		test_bit(VPORT_DELETE, &vha->dpc_flags)) {
+		ql_dbg(level, vha, 0x911d,
+		    "%s edif not enabled or vp delete. bsg ptr done %p. dpc_flags %lx\n",
+		    __func__, bsg_job, vha->dpc_flags);
+
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	if (!qla_edif_app_check(vha, appcheck)) {
+		ql_dbg(level, vha, 0x911d,
+		    "%s app checked failed.\n",
+		    __func__);
+
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	switch (vnd_sc) {
+	case QL_VND_SC_SA_UPDATE:
+		done = false;
+		rval = qla24xx_sadb_update(bsg_job);
+		break;
+	case QL_VND_SC_APP_START:
+		rval = qla_edif_app_start(vha, bsg_job);
+		break;
+	case QL_VND_SC_APP_STOP:
+		rval = qla_edif_app_stop(vha, bsg_job);
+		break;
+	case QL_VND_SC_AUTH_OK:
+		rval = qla_edif_app_authok(vha, bsg_job);
+		break;
+	case QL_VND_SC_AUTH_FAIL:
+		rval = qla_edif_app_authfail(vha, bsg_job);
+		break;
+	case QL_VND_SC_GET_FCINFO:
+		rval = qla_edif_app_getfcinfo(vha, bsg_job);
+		break;
+	case QL_VND_SC_GET_STATS:
+		rval = qla_edif_app_getstats(vha, bsg_job);
+		break;
+	case QL_VND_SC_AEN_COMPLETE:
+		rval = qla_edif_ack(vha, bsg_job);
+		break;
+	case QL_VND_SC_READ_DBELL:
+		rval = qla_edif_dbell_bsg(vha, bsg_job);
+		done = false;
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x911d, "%s unknown cmd=%x\n",
+		    __func__,
+		    bsg_request->rqst_data.h_vendor.vendor_cmd[1]);
+		rval = EXT_STATUS_INVALID_PARAM;
+		done = false;
+		break;
+	}
+
+done:
+	if (done) {
+		ql_dbg(level, vha, 0x7009,
+		    "%s: %d  bsg ptr done %p\n", __func__, __LINE__, bsg_job);
+		bsg_job_done(bsg_job, bsg_reply->result,
+		    bsg_reply->reply_payload_rcv_len);
+	}
+
+	return rval;
+}
+
+static struct edif_sa_ctl *
+qla_edif_add_sa_ctl(fc_port_t *fcport, struct qla_sa_update_frame *sa_frame,
+	int dir)
+{
+	struct	edif_sa_ctl *sa_ctl;
+	struct qla_sa_update_frame *sap;
+	int	index = sa_frame->fast_sa_index;
+	unsigned long flags = 0;
+
+	sa_ctl = kzalloc(sizeof(*sa_ctl), GFP_KERNEL);
+	if (!sa_ctl) {
+		/* couldn't get space */
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+		    "unable to allocate SA CTL\n");
+		return NULL;
+	}
+
+	/*
+	 * need to allocate sa_index here and save it
+	 * in both sa_ctl->index and sa_frame->fast_sa_index;
+	 * If alloc fails then delete sa_ctl and return NULL
+	 */
+	INIT_LIST_HEAD(&sa_ctl->next);
+	sap = &sa_ctl->sa_frame;
+	*sap = *sa_frame;
+	sa_ctl->index = index;
+	sa_ctl->fcport = fcport;
+	sa_ctl->flags = 0;
+	sa_ctl->state = 0L;
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+	    "%s: Added sa_ctl %p, index %d, state 0x%lx\n",
+	    __func__, sa_ctl, sa_ctl->index, sa_ctl->state);
+	spin_lock_irqsave(&fcport->edif.sa_list_lock, flags);
+	if (dir == SAU_FLG_TX)
+		list_add_tail(&sa_ctl->next, &fcport->edif.tx_sa_list);
+	else
+		list_add_tail(&sa_ctl->next, &fcport->edif.rx_sa_list);
+	spin_unlock_irqrestore(&fcport->edif.sa_list_lock, flags);
+
+	return sa_ctl;
+}
+
+void
+qla_edif_flush_sa_ctl_lists(fc_port_t *fcport)
+{
+	struct edif_sa_ctl *sa_ctl, *tsa_ctl;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.sa_list_lock, flags);
+
+	list_for_each_entry_safe(sa_ctl, tsa_ctl, &fcport->edif.tx_sa_list,
+	    next) {
+		list_del(&sa_ctl->next);
+		kfree(sa_ctl);
+	}
+
+	list_for_each_entry_safe(sa_ctl, tsa_ctl, &fcport->edif.rx_sa_list,
+	    next) {
+		list_del(&sa_ctl->next);
+		kfree(sa_ctl);
+	}
+
+	spin_unlock_irqrestore(&fcport->edif.sa_list_lock, flags);
+}
+
+struct edif_sa_ctl *
+qla_edif_find_sa_ctl_by_index(fc_port_t *fcport, int index, int dir)
+{
+	struct edif_sa_ctl *sa_ctl, *tsa_ctl;
+	struct list_head *sa_list;
+
+	if (dir == SAU_FLG_TX)
+		sa_list = &fcport->edif.tx_sa_list;
+	else
+		sa_list = &fcport->edif.rx_sa_list;
+
+	list_for_each_entry_safe(sa_ctl, tsa_ctl, sa_list, next) {
+		if (test_bit(EDIF_SA_CTL_USED, &sa_ctl->state) &&
+		    sa_ctl->index == index)
+			return sa_ctl;
+	}
+	return NULL;
+}
+
+/* add the sa to the correct list */
+static int
+qla24xx_check_sadb_avail_slot(BSG_JOB_TYPE *bsg_job, fc_port_t *fcport,
+	struct qla_sa_update_frame *sa_frame)
+{
+	struct edif_sa_ctl *sa_ctl = NULL;
+	int dir;
+	uint16_t sa_index;
+
+	dir = (sa_frame->flags & SAU_FLG_TX);
+
+	/* map the spi to an sa_index */
+	sa_index = qla_edif_sadb_get_sa_index(fcport, sa_frame);
+	if (sa_index == RX_DELETE_NO_EDIF_SA_INDEX) {
+		/* process rx delete */
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x3063,
+		    "%s: rx delete for lid 0x%x, spi 0x%x, no entry found\n",
+		    __func__, fcport->loop_id, sa_frame->spi);
+
+		/* build and send the aen */
+		fcport->edif.rx_sa_set = 1;
+		fcport->edif.rx_sa_pending = 0;
+		qla_edb_eventcreate(fcport->vha,
+		    VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+		    QL_VND_SA_STAT_SUCCESS,
+		    QL_VND_RX_SA_KEY, fcport);
+
+		/* force a return of good bsg status; */
+		return RX_DELETE_NO_EDIF_SA_INDEX;
+	} else if (sa_index == INVALID_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+		    "%s: Failed to get sa_index for spi 0x%x, dir: %d\n",
+		    __func__, sa_frame->spi, dir);
+		return INVALID_EDIF_SA_INDEX;
+	}
+
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+	    "%s: index %d allocated to spi 0x%x, dir: %d, nport_handle: 0x%x\n",
+	    __func__, sa_index, sa_frame->spi, dir, fcport->loop_id);
+
+	/* This is a local copy of sa_frame. */
+	sa_frame->fast_sa_index = sa_index;
+	/* create the sa_ctl */
+	sa_ctl = qla_edif_add_sa_ctl(fcport, sa_frame, dir);
+	if (!sa_ctl) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+		    "%s: Failed to add sa_ctl for spi 0x%x, dir: %d, sa_index: %d\n",
+		    __func__, sa_frame->spi, dir, sa_index);
+		return -1;
+	}
+
+	set_bit(EDIF_SA_CTL_USED, &sa_ctl->state);
+
+	if (dir == SAU_FLG_TX)
+		fcport->edif.tx_rekey_cnt++;
+	else
+		fcport->edif.rx_rekey_cnt++;
+
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+	    "%s: Found sa_ctl %p, index %d, state 0x%lx, tx_cnt %d, rx_cnt %d, nport_handle: 0x%x\n",
+	    __func__, sa_ctl, sa_ctl->index, sa_ctl->state,
+	    fcport->edif.tx_rekey_cnt,
+	    fcport->edif.rx_rekey_cnt, fcport->loop_id);
+
+	return 0;
+}
+
+#define QLA_SA_UPDATE_FLAGS_RX_KEY      0x0
+#define QLA_SA_UPDATE_FLAGS_TX_KEY      0x2
+#define EDIF_MSLEEP_INTERVAL 100
+#define EDIF_RETRY_COUNT  50
+
+int
+qla24xx_sadb_update(BSG_JOB_TYPE *bsg_job)
+{
+	struct	fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
+	scsi_qla_host_t *vha = shost_priv(host);
+	fc_port_t		*fcport = NULL;
+	srb_t			*sp = NULL;
+	struct edif_list_entry *edif_entry = NULL;
+	int			found = 0;
+	int			rval = 0;
+	int result = 0, cnt;
+	struct qla_sa_update_frame sa_frame;
+	struct srb_iocb *iocb_cmd;
+	port_id_t portid;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d,
+	    "%s entered, vha: 0x%p\n", __func__, vha);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &sa_frame,
+	    sizeof(struct qla_sa_update_frame));
+
+	/* Check if host is online */
+	if (!vha->flags.online) {
+		ql_log(ql_log_warn, vha, 0x70a1, "Host is not online\n");
+		rval = -EIO;
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	if (DBELL_INACTIVE(vha)) {
+		ql_log(ql_log_warn, vha, 0x70a1, "App not started\n");
+		rval = -EIO;
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	/* silent unaligned access warning */
+	portid.b.domain = sa_frame.port_id.b.domain;
+	portid.b.area   = sa_frame.port_id.b.area;
+	portid.b.al_pa  = sa_frame.port_id.b.al_pa;
+
+	fcport = qla2x00_find_fcport_by_pid(vha, &portid);
+	if (fcport) {
+		found = 1;
+		if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_TX_KEY)
+			fcport->edif.tx_bytes = 0;
+		if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_RX_KEY)
+			fcport->edif.rx_bytes = 0;
+	}
+
+	if (!found) {
+		ql_dbg(ql_dbg_edif, vha, 0x70a3, "Failed to find port= %06x\n",
+		    sa_frame.port_id.b24);
+		rval = -EINVAL;
+		SET_DID_STATUS(bsg_reply->result, DID_NO_CONNECT);
+		goto done;
+	}
+
+	/* make sure the nport_handle is valid */
+	if (fcport->loop_id == FC_NO_LOOP_ID) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e1,
+		    "%s: %8phN lid=FC_NO_LOOP_ID, spi: 0x%x, DS %d, returning NO_CONNECT\n",
+		    __func__, fcport->port_name, sa_frame.spi,
+		    fcport->disc_state);
+		rval = -EINVAL;
+		SET_DID_STATUS(bsg_reply->result, DID_NO_CONNECT);
+		goto done;
+	}
+
+	/* allocate and queue an sa_ctl */
+	result = qla24xx_check_sadb_avail_slot(bsg_job, fcport, &sa_frame);
+
+	/* failure of bsg */
+	if (result == INVALID_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e1,
+		    "%s: %8phN, skipping update.\n",
+		    __func__, fcport->port_name);
+		rval = -EINVAL;
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+
+	/* rx delete failure */
+	} else if (result == RX_DELETE_NO_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e1,
+		    "%s: %8phN, skipping rx delete.\n",
+		    __func__, fcport->port_name);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		goto done;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x70e1,
+	    "%s: %8phN, sa_index in sa_frame: %d flags %xh\n",
+	    __func__, fcport->port_name, sa_frame.fast_sa_index,
+	    sa_frame.flags);
+
+	/* looking for rx index and delete */
+	if (((sa_frame.flags & SAU_FLG_TX) == 0) &&
+	    (sa_frame.flags & SAU_FLG_INV)) {
+		uint16_t nport_handle = fcport->loop_id;
+		uint16_t sa_index = sa_frame.fast_sa_index;
+
+		/*
+		 * make sure we have an existing rx key, otherwise just process
+		 * this as a straight delete just like TX
+		 * This is NOT a normal case, it indicates an error recovery or key cleanup
+		 * by the ipsec code above us.
+		 */
+		edif_entry = qla_edif_list_find_sa_index(fcport, fcport->loop_id);
+		if (!edif_entry) {
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: WARNING: no active sa_index for nport_handle 0x%x, forcing delete for sa_index 0x%x\n",
+			    __func__, fcport->loop_id, sa_index);
+			goto force_rx_delete;
+		}
+
+		/*
+		 * if we have a forced delete for rx, remove the sa_index from the edif list
+		 * and proceed with normal delete.  The rx delay timer should not be running
+		 */
+		if ((sa_frame.flags & SAU_FLG_FORCE_DELETE) == SAU_FLG_FORCE_DELETE) {
+			qla_edif_list_delete_sa_index(fcport, edif_entry);
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: FORCE DELETE flag found for nport_handle 0x%x, sa_index 0x%x, forcing DELETE\n",
+			    __func__, fcport->loop_id, sa_index);
+			kfree(edif_entry);
+			goto force_rx_delete;
+		}
+
+		/*
+		 * delayed rx delete
+		 *
+		 * if delete_sa_index is not invalid then there is already
+		 * a delayed index in progress, return bsg bad status
+		 */
+		if (edif_entry->delete_sa_index != INVALID_EDIF_SA_INDEX) {
+			struct edif_sa_ctl *sa_ctl;
+
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: delete for lid 0x%x, delete_sa_index %d is pending\n",
+			    __func__, edif_entry->handle, edif_entry->delete_sa_index);
+
+			/* free up the sa_ctl that was allocated with the sa_index */
+			sa_ctl = qla_edif_find_sa_ctl_by_index(fcport, sa_index,
+			    (sa_frame.flags & SAU_FLG_TX));
+			if (sa_ctl) {
+				ql_dbg(ql_dbg_edif, vha, 0x3063,
+				    "%s: freeing sa_ctl for index %d\n",
+				    __func__, sa_ctl->index);
+				qla_edif_free_sa_ctl(fcport, sa_ctl, sa_ctl->index);
+			}
+
+			/* release the sa_index */
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: freeing sa_index %d, nph: 0x%x\n",
+			    __func__, sa_index, nport_handle);
+			qla_edif_sadb_delete_sa_index(fcport, nport_handle, sa_index);
+
+			rval = -EINVAL;
+			SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+			goto done;
+		}
+
+		fcport->edif.rekey_cnt++;
+
+		/* configure and start the rx delay timer */
+		edif_entry->fcport = fcport;
+		edif_entry->timer.expires = jiffies + RX_DELAY_DELETE_TIMEOUT * HZ;
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: adding timer, entry: %p, delete sa_index %d, lid 0x%x to edif_list\n",
+		    __func__, edif_entry, sa_index, nport_handle);
+
+		/*
+		 * Start the timer when we queue the delayed rx delete.
+		 * This is an activity timer that goes off if we have not
+		 * received packets with the new sa_index
+		 */
+		add_timer(&edif_entry->timer);
+
+		/*
+		 * sa_delete for rx key with an active rx key including this one
+		 * add the delete rx sa index to the hash so we can look for it
+		 * in the rsp queue.  Do this after making any changes to the
+		 * edif_entry as part of the rx delete.
+		 */
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: delete sa_index %d, lid 0x%x to edif_list. bsg done ptr %p\n",
+		    __func__, sa_index, nport_handle, bsg_job);
+
+		edif_entry->delete_sa_index = sa_index;
+
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		bsg_reply->result = DID_OK << 16;
+
+		goto done;
+
+	/*
+	 * rx index and update
+	 * add the index to the list and continue with normal update
+	 */
+	} else if (((sa_frame.flags & SAU_FLG_TX) == 0) &&
+	    ((sa_frame.flags & SAU_FLG_INV) == 0)) {
+		/* sa_update for rx key */
+		uint32_t nport_handle = fcport->loop_id;
+		uint16_t sa_index = sa_frame.fast_sa_index;
+		int result;
+
+		/*
+		 * add the update rx sa index to the hash so we can look for it
+		 * in the rsp queue and continue normally
+		 */
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s:  adding update sa_index %d, lid 0x%x to edif_list\n",
+		    __func__, sa_index, nport_handle);
+
+		result = qla_edif_list_add_sa_update_index(fcport, sa_index,
+		    nport_handle);
+		if (result) {
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: SA_UPDATE failed to add new sa index %d to list for lid 0x%x\n",
+			    __func__, sa_index, nport_handle);
+		}
+	}
+	if (sa_frame.flags & SAU_FLG_GMAC_MODE)
+		fcport->edif.aes_gmac = 1;
+	else
+		fcport->edif.aes_gmac = 0;
+
+force_rx_delete:
+	/*
+	 * sa_update for both rx and tx keys, sa_delete for tx key
+	 * immediately process the request
+	 */
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp) {
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done;
+	}
+
+	sp->type = SRB_SA_UPDATE;
+	sp->name = "bsg_sa_update";
+	sp->u.bsg_job = bsg_job;
+	/* sp->free = qla2x00_bsg_sp_free; */
+	sp->free = qla2x00_rel_sp;
+	sp->done = qla2x00_bsg_job_done;
+	iocb_cmd = &sp->u.iocb_cmd;
+	iocb_cmd->u.sa_update.sa_frame  = sa_frame;
+	cnt = 0;
+retry:
+	rval = qla2x00_start_sp(sp);
+	switch (rval) {
+	case QLA_SUCCESS:
+		break;
+	case EAGAIN:
+		msleep(EDIF_MSLEEP_INTERVAL);
+		cnt++;
+		if (cnt < EDIF_RETRY_COUNT)
+			goto retry;
+
+		fallthrough;
+	default:
+		ql_log(ql_dbg_edif, vha, 0x70e3,
+		       "%s qla2x00_start_sp failed=%d.\n",
+		       __func__, rval);
+
+		qla2x00_rel_sp(sp);
+		rval = -EIO;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s:  %s sent, hdl=%x, portid=%06x.\n",
+	    __func__, sp->name, sp->handle, fcport->d_id.b24);
+
+	fcport->edif.rekey_cnt++;
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	return 0;
+
+/*
+ * send back error status
+ */
+done:
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s:status: FAIL, result: 0x%x, bsg ptr done %p\n",
+	    __func__, bsg_reply->result, bsg_job);
+	bsg_job_done(bsg_job, bsg_reply->result,
+	    bsg_reply->reply_payload_rcv_len);
+
+	return 0;
+}
+
+static void
+qla_enode_free(scsi_qla_host_t *vha, struct enode *node)
+{
+	node->ntype = N_UNDEF;
+	kfree(node);
+}
+
+/**
+ * qla_enode_init - initialize enode structs & lock
+ * @vha: host adapter pointer
+ *
+ * should only be called when driver attaching
+ */
+void
+qla_enode_init(scsi_qla_host_t *vha)
+{
+	struct	qla_hw_data *ha = vha->hw;
+	char	name[32];
+
+	if (vha->pur_cinfo.enode_flags == ENODE_ACTIVE) {
+		/* list still active - error */
+		ql_dbg(ql_dbg_edif, vha, 0x09102, "%s enode still active\n",
+		    __func__);
+		return;
+	}
+
+	/* initialize lock which protects pur_core & init list */
+	spin_lock_init(&vha->pur_cinfo.pur_lock);
+	INIT_LIST_HEAD(&vha->pur_cinfo.head);
+
+	snprintf(name, sizeof(name), "%s_%d_purex", QLA2XXX_DRIVER_NAME,
+	    ha->pdev->device);
+}
+
+/**
+ * qla_enode_stop - stop and clear and enode data
+ * @vha: host adapter pointer
+ *
+ * called when app notified it is exiting
+ */
+void
+qla_enode_stop(scsi_qla_host_t *vha)
+{
+	unsigned long flags;
+	struct enode *node, *q;
+
+	if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s enode not active\n", __func__);
+		return;
+	}
+
+	/* grab lock so list doesn't move */
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+
+	vha->pur_cinfo.enode_flags &= ~ENODE_ACTIVE; /* mark it not active */
+
+	/* hopefully this is a null list at this point */
+	list_for_each_entry_safe(node, q, &vha->pur_cinfo.head, list) {
+		ql_dbg(ql_dbg_edif, vha, 0x910f,
+		    "%s freeing enode type=%x, cnt=%x\n", __func__, node->ntype,
+		    node->dinfo.nodecnt);
+		list_del_init(&node->list);
+		qla_enode_free(vha, node);
+	}
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+}
+
+static void qla_enode_clear(scsi_qla_host_t *vha, port_id_t portid)
+{
+	unsigned    long flags;
+	struct enode    *e, *tmp;
+	struct purexevent   *purex;
+	LIST_HEAD(enode_list);
+
+	if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		       "%s enode not active\n", __func__);
+		return;
+	}
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+	list_for_each_entry_safe(e, tmp, &vha->pur_cinfo.head, list) {
+		purex = &e->u.purexinfo;
+		if (purex->pur_info.pur_sid.b24 == portid.b24) {
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s free ELS sid=%06x. xchg %x, nb=%xh\n",
+			    __func__, portid.b24,
+			    purex->pur_info.pur_rx_xchg_address,
+			    purex->pur_info.pur_bytes_rcvd);
+
+			list_del_init(&e->list);
+			list_add_tail(&e->list, &enode_list);
+		}
+	}
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+
+	list_for_each_entry_safe(e, tmp, &enode_list, list) {
+		list_del_init(&e->list);
+		qla_enode_free(vha, e);
+	}
+}
+
+/*
+ *  allocate enode struct and populate buffer
+ *  returns: enode pointer with buffers
+ *           NULL on error
+ */
+static struct enode *
+qla_enode_alloc(scsi_qla_host_t *vha, uint32_t ntype)
+{
+	struct enode		*node;
+	struct purexevent	*purex;
+
+	node = kzalloc(RX_ELS_SIZE, GFP_ATOMIC);
+	if (!node)
+		return NULL;
+
+	purex = &node->u.purexinfo;
+	purex->msgp = (u8 *)(node + 1);
+	purex->msgp_len = ELS_MAX_PAYLOAD;
+
+	node->ntype = ntype;
+	INIT_LIST_HEAD(&node->list);
+	return node;
+}
+
+static void
+qla_enode_add(scsi_qla_host_t *vha, struct enode *ptr)
+{
+	unsigned long flags;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x9109,
+	    "%s add enode for type=%x, cnt=%x\n",
+	    __func__, ptr->ntype, ptr->dinfo.nodecnt);
+
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+	list_add_tail(&ptr->list, &vha->pur_cinfo.head);
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+
+	return;
+}
+
+static struct enode *
+qla_enode_find(scsi_qla_host_t *vha, uint32_t ntype, uint32_t p1, uint32_t p2)
+{
+	struct enode		*node_rtn = NULL;
+	struct enode		*list_node, *q;
+	unsigned long		flags;
+	uint32_t		sid;
+	struct purexevent	*purex;
+
+	/* secure the list from moving under us */
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+
+	list_for_each_entry_safe(list_node, q, &vha->pur_cinfo.head, list) {
+
+		/* node type determines what p1 and p2 are */
+		purex = &list_node->u.purexinfo;
+		sid = p1;
+
+		if (purex->pur_info.pur_sid.b24 == sid) {
+			/* found it and its complete */
+			node_rtn = list_node;
+			list_del(&list_node->list);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+
+	return node_rtn;
+}
+
+/**
+ * qla_pur_get_pending - read/return authentication message sent
+ *  from remote port
+ * @vha: host adapter pointer
+ * @fcport: session pointer
+ * @bsg_job: user request where the message is copy to.
+ */
+static int
+qla_pur_get_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+	BSG_JOB_TYPE *bsg_job)
+{
+	struct enode		*ptr;
+	struct purexevent	*purex;
+	struct qla_bsg_auth_els_reply *rpl =
+	    (struct qla_bsg_auth_els_reply *)bsg_job->reply;
+
+	bsg_job->reply_len = sizeof(*rpl);
+
+	ptr = qla_enode_find(vha, N_PUREX, fcport->d_id.b24, PUR_GET);
+	if (!ptr) {
+		ql_dbg(ql_dbg_edif, vha, 0x9111,
+		    "%s no enode data found for %8phN sid=%06x\n",
+		    __func__, fcport->port_name, fcport->d_id.b24);
+		SET_DID_STATUS(rpl->r.result, DID_IMM_RETRY);
+		return -EIO;
+	}
+
+	/*
+	 * enode is now off the linked list and is ours to deal with
+	 */
+	purex = &ptr->u.purexinfo;
+
+	/* Copy info back to caller */
+	rpl->rx_xchg_address = purex->pur_info.pur_rx_xchg_address;
+
+	SET_DID_STATUS(rpl->r.result, DID_OK);
+	rpl->r.reply_payload_rcv_len =
+	    sg_pcopy_from_buffer(bsg_job->reply_payload.sg_list,
+		bsg_job->reply_payload.sg_cnt, purex->msgp,
+		purex->pur_info.pur_bytes_rcvd, 0);
+
+	/* data copy / passback completed - destroy enode */
+	qla_enode_free(vha, ptr);
+
+	return 0;
+}
+
+/* it is assume qpair lock is held */
+static int
+qla_els_reject_iocb(scsi_qla_host_t *vha, struct qla_qpair *qp,
+	struct qla_els_pt_arg *a)
+{
+	struct els_entry_24xx *els_iocb;
+
+	els_iocb = __qla2x00_alloc_iocbs(qp, NULL);
+	if (!els_iocb) {
+		ql_log(ql_log_warn, vha, 0x700c,
+		    "qla2x00_alloc_iocbs failed.\n");
+		return QLA_FUNCTION_FAILED;
+	}
+
+	qla_els_pt_iocb(vha, els_iocb, a);
+
+	ql_dbg(ql_dbg_edif, vha, 0x0183,
+	    "Sending ELS reject ox_id %04x s:%06x -> d:%06x\n",
+	    a->ox_id, a->sid.b24, a->did.b24);
+	ql_dump_buffer(ql_dbg_edif + ql_dbg_verbose, vha, 0x0185,
+	    vha->hw->elsrej.c, sizeof(*vha->hw->elsrej.c));
+	/* flush iocb to mem before notifying hw doorbell */
+	wmb();
+	qla2x00_start_iocbs(vha, qp->req);
+	return 0;
+}
+
+void
+qla_edb_init(scsi_qla_host_t *vha)
+{
+	if (DBELL_ACTIVE(vha)) {
+		/* list already init'd - error */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "edif db already initialized, cannot reinit\n");
+		return;
+	}
+
+	/* initialize lock which protects doorbell & init list */
+	spin_lock_init(&vha->e_dbell.db_lock);
+	INIT_LIST_HEAD(&vha->e_dbell.head);
+}
+
+static void qla_edb_clear(scsi_qla_host_t *vha, port_id_t portid)
+{
+	unsigned long flags;
+	struct edb_node *e, *tmp;
+	port_id_t sid;
+	LIST_HEAD(edb_list);
+
+	if (DBELL_INACTIVE(vha)) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		       "%s doorbell not enabled\n", __func__);
+		return;
+	}
+
+	/* grab lock so list doesn't move */
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+	list_for_each_entry_safe(e, tmp, &vha->e_dbell.head, list) {
+		switch (e->ntype) {
+		case VND_CMD_AUTH_STATE_NEEDED:
+		case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+			sid = e->u.plogi_did;
+			break;
+		case VND_CMD_AUTH_STATE_ELS_RCVD:
+			sid = e->u.els_sid;
+			break;
+		case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
+			/* app wants to see this  */
+			continue;
+		default:
+			ql_log(ql_log_warn, vha, 0x09102,
+			       "%s unknown node type: %x\n", __func__, e->ntype);
+			sid.b24 = 0;
+			break;
+		}
+		if (sid.b24 == portid.b24) {
+			ql_dbg(ql_dbg_edif, vha, 0x910f,
+			       "%s free doorbell event : node type = %x %p\n",
+			       __func__, e->ntype, e);
+			list_del_init(&e->list);
+			list_add_tail(&e->list, &edb_list);
+		}
+	}
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	list_for_each_entry_safe(e, tmp, &edb_list, list)
+		qla_edb_node_free(vha, e);
+}
+
+/* function called when app is stopping */
+
+void
+qla_edb_stop(scsi_qla_host_t *vha)
+{
+	unsigned long flags;
+	struct edb_node *node, *q;
+
+	if (DBELL_INACTIVE(vha)) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled\n", __func__);
+		return;
+	}
+
+	/* grab lock so list doesn't move */
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+
+	vha->e_dbell.db_flags &= ~EDB_ACTIVE; /* mark it not active */
+	/* hopefully this is a null list at this point */
+	list_for_each_entry_safe(node, q, &vha->e_dbell.head, list) {
+		ql_dbg(ql_dbg_edif, vha, 0x910f,
+		    "%s freeing edb_node type=%x\n",
+		    __func__, node->ntype);
+		qla_edb_node_free(vha, node);
+	}
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	qla_edif_dbell_bsg_done(vha);
+}
+
+static struct edb_node *
+qla_edb_node_alloc(scsi_qla_host_t *vha, uint32_t ntype)
+{
+	struct edb_node	*node;
+
+	node = kzalloc(sizeof(*node), GFP_ATOMIC);
+	if (!node) {
+		/* couldn't get space */
+		ql_dbg(ql_dbg_edif, vha, 0x9100,
+		    "edb node unable to be allocated\n");
+		return NULL;
+	}
+
+	node->ntype = ntype;
+	INIT_LIST_HEAD(&node->list);
+	return node;
+}
+
+/* adds a already allocated enode to the linked list */
+static bool
+qla_edb_node_add(scsi_qla_host_t *vha, struct edb_node *ptr)
+{
+	unsigned long		flags;
+
+	if (DBELL_INACTIVE(vha)) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled\n", __func__);
+		return false;
+	}
+
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+	list_add_tail(&ptr->list, &vha->e_dbell.head);
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	return true;
+}
+
+/* adds event to doorbell list */
+void
+qla_edb_eventcreate(scsi_qla_host_t *vha, uint32_t dbtype,
+	uint32_t data, uint32_t data2, fc_port_t	*sfcport)
+{
+	struct edb_node	*edbnode;
+	fc_port_t *fcport = sfcport;
+	port_id_t id;
+
+	if (!vha->hw->flags.edif_enabled) {
+		/* edif not enabled */
+		return;
+	}
+
+	if (DBELL_INACTIVE(vha)) {
+		if (fcport)
+			fcport->edif.auth_state = dbtype;
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled (type=%d\n", __func__, dbtype);
+		return;
+	}
+
+	edbnode = qla_edb_node_alloc(vha, dbtype);
+	if (!edbnode) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s unable to alloc db node\n", __func__);
+		return;
+	}
+
+	if (!fcport) {
+		id.b.domain = (data >> 16) & 0xff;
+		id.b.area = (data >> 8) & 0xff;
+		id.b.al_pa = data & 0xff;
+		ql_dbg(ql_dbg_edif, vha, 0x09222,
+		    "%s: Arrived s_id: %06x\n", __func__,
+		    id.b24);
+		fcport = qla2x00_find_fcport_by_pid(vha, &id);
+		if (!fcport) {
+			ql_dbg(ql_dbg_edif, vha, 0x09102,
+			    "%s can't find fcport for sid= 0x%x - ignoring\n",
+			__func__, id.b24);
+			kfree(edbnode);
+			return;
+		}
+	}
+
+	/* populate the edb node */
+	switch (dbtype) {
+	case VND_CMD_AUTH_STATE_NEEDED:
+	case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+		edbnode->u.plogi_did.b24 = fcport->d_id.b24;
+		break;
+	case VND_CMD_AUTH_STATE_ELS_RCVD:
+		edbnode->u.els_sid.b24 = fcport->d_id.b24;
+		break;
+	case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
+		edbnode->u.sa_aen.port_id = fcport->d_id;
+		edbnode->u.sa_aen.status =  data;
+		edbnode->u.sa_aen.key_type =  data2;
+		edbnode->u.sa_aen.version = EDIF_VERSION1;
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+			"%s unknown type: %x\n", __func__, dbtype);
+		kfree(edbnode);
+		edbnode = NULL;
+		break;
+	}
+
+	if (edbnode) {
+		if (!qla_edb_node_add(vha, edbnode)) {
+			ql_dbg(ql_dbg_edif, vha, 0x09102,
+			    "%s unable to add dbnode\n", __func__);
+			kfree(edbnode);
+			return;
+		}
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s Doorbell produced : type=%d %p\n", __func__, dbtype, edbnode);
+		qla_edif_dbell_bsg_done(vha);
+		if (fcport)
+			fcport->edif.auth_state = dbtype;
+	}
+}
+
+void
+qla_edif_timer(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!vha->vp_idx && N2N_TOPO(ha) && ha->flags.n2n_fw_acc_sec) {
+		if (DBELL_INACTIVE(vha) &&
+		    ha->edif_post_stop_cnt_down) {
+			ha->edif_post_stop_cnt_down--;
+
+			/*
+			 * turn off auto 'Plogi Acc + secure=1' feature
+			 * Set Add FW option[3]
+			 * BIT_15, if.
+			 */
+			if (ha->edif_post_stop_cnt_down == 0) {
+				ql_dbg(ql_dbg_async, vha, 0x911d,
+				       "%s chip reset to turn off PLOGI ACC + secure\n",
+				       __func__);
+				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			}
+		} else {
+			ha->edif_post_stop_cnt_down = 60;
+		}
+	}
+
+	if (vha->e_dbell.dbell_bsg_job && time_after_eq(jiffies, vha->e_dbell.bsg_expire))
+		qla_edif_dbell_bsg_done(vha);
+}
+
+static void qla_noop_sp_done(srb_t *sp, int res)
+{
+	sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
+}
+
+/*
+ * Called from work queue
+ * build and send the sa_update iocb to delete an rx sa_index
+ */
+int
+qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e)
+{
+	srb_t *sp;
+	fc_port_t	*fcport = NULL;
+	struct srb_iocb *iocb_cmd = NULL;
+	int rval = QLA_SUCCESS;
+	struct	edif_sa_ctl *sa_ctl = e->u.sa_update.sa_ctl;
+	uint16_t nport_handle = e->u.sa_update.nport_handle;
+
+	ql_dbg(ql_dbg_edif, vha, 0x70e6,
+	    "%s: starting,  sa_ctl: %p\n", __func__, sa_ctl);
+
+	if (!sa_ctl) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e6,
+		    "sa_ctl allocation failed\n");
+		rval =  -ENOMEM;
+		goto done;
+	}
+
+	fcport = sa_ctl->fcport;
+
+	/* Alloc SRB structure */
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e6,
+		 "SRB allocation failed\n");
+		rval = -ENOMEM;
+		goto done;
+	}
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	iocb_cmd = &sp->u.iocb_cmd;
+	iocb_cmd->u.sa_update.sa_ctl = sa_ctl;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3073,
+	    "Enter: SA REPL portid=%06x, sa_ctl %p, index %x, nport_handle: 0x%x\n",
+	    fcport->d_id.b24, sa_ctl, sa_ctl->index, nport_handle);
+	/*
+	 * if this is a sadb cleanup delete, mark it so the isr can
+	 * take the correct action
+	 */
+	if (sa_ctl->flags & EDIF_SA_CTL_FLG_CLEANUP_DEL) {
+		/* mark this srb as a cleanup delete */
+		sp->flags |= SRB_EDIF_CLEANUP_DELETE;
+		ql_dbg(ql_dbg_edif, vha, 0x70e6,
+		    "%s: sp 0x%p flagged as cleanup delete\n", __func__, sp);
+	}
+
+	sp->type = SRB_SA_REPLACE;
+	sp->name = "SA_REPLACE";
+	sp->fcport = fcport;
+	sp->free = qla2x00_rel_sp;
+	sp->done = qla_noop_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+
+	if (rval != QLA_SUCCESS) {
+		goto done_free_sp;
+	}
+
+	return rval;
+done_free_sp:
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
+	fcport->flags &= ~FCF_ASYNC_SENT;
+done:
+	fcport->flags &= ~FCF_ASYNC_ACTIVE;
+	return rval;
+}
+
+void qla24xx_sa_update_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb)
+{
+	int	itr = 0;
+	struct	scsi_qla_host		*vha = sp->vha;
+	struct	qla_sa_update_frame	*sa_frame =
+		&sp->u.iocb_cmd.u.sa_update.sa_frame;
+	u8 flags = 0;
+
+	switch (sa_frame->flags & (SAU_FLG_INV | SAU_FLG_TX)) {
+	case 0:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA UPDATE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		break;
+	case 1:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA DELETE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		flags |= SA_FLAG_INVALIDATE;
+		break;
+	case 2:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA UPDATE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		flags |= SA_FLAG_TX;
+		break;
+	case 3:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA DELETE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		flags |= SA_FLAG_TX | SA_FLAG_INVALIDATE;
+		break;
+	}
+
+	sa_update_iocb->entry_type = SA_UPDATE_IOCB_TYPE;
+	sa_update_iocb->entry_count = 1;
+	sa_update_iocb->sys_define = 0;
+	sa_update_iocb->entry_status = 0;
+	sa_update_iocb->handle = sp->handle;
+	sa_update_iocb->u.nport_handle = cpu_to_le16(sp->fcport->loop_id);
+	sa_update_iocb->vp_index = sp->fcport->vha->vp_idx;
+	sa_update_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
+	sa_update_iocb->port_id[1] = sp->fcport->d_id.b.area;
+	sa_update_iocb->port_id[2] = sp->fcport->d_id.b.domain;
+
+	sa_update_iocb->flags = flags;
+	sa_update_iocb->salt = cpu_to_le32(sa_frame->salt);
+	sa_update_iocb->spi = cpu_to_le32(sa_frame->spi);
+	sa_update_iocb->sa_index = cpu_to_le16(sa_frame->fast_sa_index);
+
+	sa_update_iocb->sa_control |= SA_CNTL_ENC_FCSP;
+	if (sp->fcport->edif.aes_gmac)
+		sa_update_iocb->sa_control |= SA_CNTL_AES_GMAC;
+
+	if (sa_frame->flags & SAU_FLG_KEY256) {
+		sa_update_iocb->sa_control |= SA_CNTL_KEY256;
+		for (itr = 0; itr < 32; itr++)
+			sa_update_iocb->sa_key[itr] = sa_frame->sa_key[itr];
+	} else {
+		sa_update_iocb->sa_control |= SA_CNTL_KEY128;
+		for (itr = 0; itr < 16; itr++)
+			sa_update_iocb->sa_key[itr] = sa_frame->sa_key[itr];
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x921d,
+	    "%s SAU Port ID = %02x%02x%02x, flags=%xh, index=%u, ctl=%xh, SPI 0x%x flags 0x%x hdl=%x gmac %d\n",
+	    __func__, sa_update_iocb->port_id[2], sa_update_iocb->port_id[1],
+	    sa_update_iocb->port_id[0], sa_update_iocb->flags, sa_update_iocb->sa_index,
+	    sa_update_iocb->sa_control, sa_update_iocb->spi, sa_frame->flags, sp->handle,
+	    sp->fcport->edif.aes_gmac);
+
+	if (sa_frame->flags & SAU_FLG_TX)
+		sp->fcport->edif.tx_sa_pending = 1;
+	else
+		sp->fcport->edif.rx_sa_pending = 1;
+
+	sp->fcport->vha->qla_stats.control_requests++;
+}
+
+void
+qla24xx_sa_replace_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb)
+{
+	struct	scsi_qla_host		*vha = sp->vha;
+	struct srb_iocb *srb_iocb = &sp->u.iocb_cmd;
+	struct	edif_sa_ctl		*sa_ctl = srb_iocb->u.sa_update.sa_ctl;
+	uint16_t nport_handle = sp->fcport->loop_id;
+
+	sa_update_iocb->entry_type = SA_UPDATE_IOCB_TYPE;
+	sa_update_iocb->entry_count = 1;
+	sa_update_iocb->sys_define = 0;
+	sa_update_iocb->entry_status = 0;
+	sa_update_iocb->handle = sp->handle;
+
+	sa_update_iocb->u.nport_handle = cpu_to_le16(nport_handle);
+
+	sa_update_iocb->vp_index = sp->fcport->vha->vp_idx;
+	sa_update_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
+	sa_update_iocb->port_id[1] = sp->fcport->d_id.b.area;
+	sa_update_iocb->port_id[2] = sp->fcport->d_id.b.domain;
+
+	/* Invalidate the index. salt, spi, control & key are ignore */
+	sa_update_iocb->flags = SA_FLAG_INVALIDATE;
+	sa_update_iocb->salt = 0;
+	sa_update_iocb->spi = 0;
+	sa_update_iocb->sa_index = cpu_to_le16(sa_ctl->index);
+	sa_update_iocb->sa_control = 0;
+
+	ql_dbg(ql_dbg_edif, vha, 0x921d,
+	    "%s SAU DELETE RX Port ID = %02x:%02x:%02x, lid %d flags=%xh, index=%u, hdl=%x\n",
+	    __func__, sa_update_iocb->port_id[2], sa_update_iocb->port_id[1],
+	    sa_update_iocb->port_id[0], nport_handle, sa_update_iocb->flags,
+	    sa_update_iocb->sa_index, sp->handle);
+
+	sp->fcport->vha->qla_stats.control_requests++;
+}
+
+void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp)
+{
+	struct purex_entry_24xx *p = *pkt;
+	struct enode		*ptr;
+	int		sid;
+	u16 totlen;
+	struct purexevent	*purex;
+	struct scsi_qla_host *host = NULL;
+	int rc;
+	struct fc_port *fcport;
+	struct qla_els_pt_arg a;
+	be_id_t beid;
+
+	memset(&a, 0, sizeof(a));
+
+	a.els_opcode = ELS_AUTH_ELS;
+	a.nport_handle = p->nport_handle;
+	a.rx_xchg_address = p->rx_xchg_addr;
+	a.did.b.domain = p->s_id[2];
+	a.did.b.area   = p->s_id[1];
+	a.did.b.al_pa  = p->s_id[0];
+	a.tx_byte_count = a.tx_len = sizeof(struct fc_els_ls_rjt);
+	a.tx_addr = vha->hw->elsrej.cdma;
+	a.vp_idx = vha->vp_idx;
+	a.control_flags = EPD_ELS_RJT;
+	a.ox_id = le16_to_cpu(p->ox_id);
+
+	sid = p->s_id[0] | (p->s_id[1] << 8) | (p->s_id[2] << 16);
+
+	totlen = (le16_to_cpu(p->frame_size) & 0x0fff) - PURX_ELS_HEADER_SIZE;
+	if (le16_to_cpu(p->status_flags) & 0x8000) {
+		totlen = le16_to_cpu(p->trunc_frame_size);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	if (totlen > ELS_MAX_PAYLOAD) {
+		ql_dbg(ql_dbg_edif, vha, 0x0910d,
+		    "%s WARNING: verbose ELS frame received (totlen=%x)\n",
+		    __func__, totlen);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	if (!vha->hw->flags.edif_enabled) {
+		/* edif support not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x910e, "%s edif not enabled\n",
+		    __func__);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	ptr = qla_enode_alloc(vha, N_PUREX);
+	if (!ptr) {
+		ql_dbg(ql_dbg_edif, vha, 0x09109,
+		    "WARNING: enode alloc failed for sid=%x\n",
+		    sid);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	purex = &ptr->u.purexinfo;
+	purex->pur_info.pur_sid = a.did;
+	purex->pur_info.pur_bytes_rcvd = totlen;
+	purex->pur_info.pur_rx_xchg_address = le32_to_cpu(p->rx_xchg_addr);
+	purex->pur_info.pur_nphdl = le16_to_cpu(p->nport_handle);
+	purex->pur_info.pur_did.b.domain =  p->d_id[2];
+	purex->pur_info.pur_did.b.area =  p->d_id[1];
+	purex->pur_info.pur_did.b.al_pa =  p->d_id[0];
+	purex->pur_info.vp_idx = p->vp_idx;
+
+	a.sid = purex->pur_info.pur_did;
+
+	rc = __qla_copy_purex_to_buffer(vha, pkt, rsp, purex->msgp,
+		purex->msgp_len);
+	if (rc) {
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		qla_enode_free(vha, ptr);
+		return;
+	}
+	beid.al_pa = purex->pur_info.pur_did.b.al_pa;
+	beid.area   = purex->pur_info.pur_did.b.area;
+	beid.domain = purex->pur_info.pur_did.b.domain;
+	host = qla_find_host_by_d_id(vha, beid);
+	if (!host) {
+		ql_log(ql_log_fatal, vha, 0x508b,
+		    "%s Drop ELS due to unable to find host %06x\n",
+		    __func__, purex->pur_info.pur_did.b24);
+
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		qla_enode_free(vha, ptr);
+		return;
+	}
+
+	fcport = qla2x00_find_fcport_by_pid(host, &purex->pur_info.pur_sid);
+
+	if (DBELL_INACTIVE(vha)) {
+		ql_dbg(ql_dbg_edif, host, 0x0910c, "%s e_dbell.db_flags =%x %06x\n",
+		    __func__, host->e_dbell.db_flags,
+		    fcport ? fcport->d_id.b24 : 0);
+
+		qla_els_reject_iocb(host, (*rsp)->qpair, &a);
+		qla_enode_free(host, ptr);
+		return;
+	}
+
+	if (fcport && EDIF_SESSION_DOWN(fcport)) {
+		ql_dbg(ql_dbg_edif, host, 0x13b6,
+		    "%s terminate exchange. Send logo to 0x%x\n",
+		    __func__, a.did.b24);
+
+		a.tx_byte_count = a.tx_len = 0;
+		a.tx_addr = 0;
+		a.control_flags = EPD_RX_XCHG;  /* EPD_RX_XCHG = terminate cmd */
+		qla_els_reject_iocb(host, (*rsp)->qpair, &a);
+		qla_enode_free(host, ptr);
+		/* send logo to let remote port knows to tear down session */
+		fcport->send_els_logo = 1;
+		qlt_schedule_sess_for_deletion(fcport);
+		return;
+	}
+
+	/* add the local enode to the list */
+	qla_enode_add(host, ptr);
+
+	ql_dbg(ql_dbg_edif, host, 0x0910c,
+	    "%s COMPLETE purex->pur_info.pur_bytes_rcvd =%xh s:%06x -> d:%06x xchg=%xh\n",
+	    __func__, purex->pur_info.pur_bytes_rcvd, purex->pur_info.pur_sid.b24,
+	    purex->pur_info.pur_did.b24, purex->pur_info.pur_rx_xchg_address);
+
+	qla_edb_eventcreate(host, VND_CMD_AUTH_STATE_ELS_RCVD, sid, 0, NULL);
+}
+
+static uint16_t  qla_edif_get_sa_index_from_freepool(fc_port_t *fcport, int dir)
+{
+	struct scsi_qla_host *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	void *sa_id_map;
+	unsigned long flags = 0;
+	u16 sa_index;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+	    "%s: entry\n", __func__);
+
+	if (dir)
+		sa_id_map = ha->edif_tx_sa_id_map;
+	else
+		sa_id_map = ha->edif_rx_sa_id_map;
+
+	spin_lock_irqsave(&ha->sadb_fp_lock, flags);
+	sa_index = find_first_zero_bit(sa_id_map, EDIF_NUM_SA_INDEX);
+	if (sa_index >=  EDIF_NUM_SA_INDEX) {
+		spin_unlock_irqrestore(&ha->sadb_fp_lock, flags);
+		return INVALID_EDIF_SA_INDEX;
+	}
+	set_bit(sa_index, sa_id_map);
+	spin_unlock_irqrestore(&ha->sadb_fp_lock, flags);
+
+	if (dir)
+		sa_index += EDIF_TX_SA_INDEX_BASE;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: index retrieved from free pool %d\n", __func__, sa_index);
+
+	return sa_index;
+}
+
+/* find an sadb entry for an nport_handle */
+static struct edif_sa_index_entry *
+qla_edif_sadb_find_sa_index_entry(uint16_t nport_handle,
+		struct list_head *sa_list)
+{
+	struct edif_sa_index_entry *entry;
+	struct edif_sa_index_entry *tentry;
+	struct list_head *indx_list = sa_list;
+
+	list_for_each_entry_safe(entry, tentry, indx_list, next) {
+		if (entry->handle == nport_handle)
+			return entry;
+	}
+	return NULL;
+}
+
+/* remove an sa_index from the nport_handle and return it to the free pool */
+static int qla_edif_sadb_delete_sa_index(fc_port_t *fcport, uint16_t nport_handle,
+		uint16_t sa_index)
+{
+	struct edif_sa_index_entry *entry;
+	struct list_head *sa_list;
+	int dir = (sa_index < EDIF_TX_SA_INDEX_BASE) ? 0 : 1;
+	int slot = 0;
+	int free_slot_count = 0;
+	scsi_qla_host_t *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags = 0;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: entry\n", __func__);
+
+	if (dir)
+		sa_list = &ha->sadb_tx_index_list;
+	else
+		sa_list = &ha->sadb_rx_index_list;
+
+	entry = qla_edif_sadb_find_sa_index_entry(nport_handle, sa_list);
+	if (!entry) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: no entry found for nport_handle 0x%x\n",
+		    __func__, nport_handle);
+		return -1;
+	}
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+	/*
+	 * each tx/rx direction has up to 2 sa indexes/slots. 1 slot for in flight traffic
+	 * the other is use at re-key time.
+	 */
+	for (slot = 0; slot < 2; slot++) {
+		if (entry->sa_pair[slot].sa_index == sa_index) {
+			entry->sa_pair[slot].sa_index = INVALID_EDIF_SA_INDEX;
+			entry->sa_pair[slot].spi = 0;
+			free_slot_count++;
+			qla_edif_add_sa_index_to_freepool(fcport, dir, sa_index);
+		} else if (entry->sa_pair[slot].sa_index == INVALID_EDIF_SA_INDEX) {
+			free_slot_count++;
+		}
+	}
+
+	if (free_slot_count == 2) {
+		list_del(&entry->next);
+		kfree(entry);
+	}
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: sa_index %d removed, free_slot_count: %d\n",
+	    __func__, sa_index, free_slot_count);
+
+	return 0;
+}
+
+void
+qla28xx_sa_update_iocb_entry(scsi_qla_host_t *v, struct req_que *req,
+	struct sa_update_28xx *pkt)
+{
+	const char *func = "SA_UPDATE_RESPONSE_IOCB";
+	srb_t *sp;
+	struct edif_sa_ctl *sa_ctl;
+	int old_sa_deleted = 1;
+	uint16_t nport_handle;
+	struct scsi_qla_host *vha;
+
+	sp = qla2x00_get_sp_from_handle(v, func, req, pkt);
+
+	if (!sp) {
+		ql_dbg(ql_dbg_edif, v, 0x3063,
+			"%s: no sp found for pkt\n", __func__);
+		return;
+	}
+	/* use sp->vha due to npiv */
+	vha = sp->vha;
+
+	switch (pkt->flags & (SA_FLAG_INVALIDATE | SA_FLAG_TX)) {
+	case 0:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA UPDATE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	case 1:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA DELETE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	case 2:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA UPDATE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	case 3:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA DELETE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	}
+
+	/*
+	 * dig the nport handle out of the iocb, fcport->loop_id can not be trusted
+	 * to be correct during cleanup sa_update iocbs.
+	 */
+	nport_handle = sp->fcport->loop_id;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: %8phN comp status=%x old_sa_info=%x new_sa_info=%x lid %d, index=0x%x pkt_flags %xh hdl=%x\n",
+	    __func__, sp->fcport->port_name, pkt->u.comp_sts, pkt->old_sa_info, pkt->new_sa_info,
+	    nport_handle, pkt->sa_index, pkt->flags, sp->handle);
+
+	/* if rx delete, remove the timer */
+	if ((pkt->flags & (SA_FLAG_INVALIDATE | SA_FLAG_TX)) ==  SA_FLAG_INVALIDATE) {
+		struct edif_list_entry *edif_entry;
+
+		sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+
+		edif_entry = qla_edif_list_find_sa_index(sp->fcport, nport_handle);
+		if (edif_entry) {
+			ql_dbg(ql_dbg_edif, vha, 0x5033,
+			    "%s: removing edif_entry %p, new sa_index: 0x%x\n",
+			    __func__, edif_entry, pkt->sa_index);
+			qla_edif_list_delete_sa_index(sp->fcport, edif_entry);
+			del_timer(&edif_entry->timer);
+
+			ql_dbg(ql_dbg_edif, vha, 0x5033,
+			    "%s: releasing edif_entry %p, new sa_index: 0x%x\n",
+			    __func__, edif_entry, pkt->sa_index);
+
+			kfree(edif_entry);
+		}
+	}
+
+	/*
+	 * if this is a delete for either tx or rx, make sure it succeeded.
+	 * The new_sa_info field should be 0xffff on success
+	 */
+	if (pkt->flags & SA_FLAG_INVALIDATE)
+		old_sa_deleted = (le16_to_cpu(pkt->new_sa_info) == 0xffff) ? 1 : 0;
+
+	/* Process update and delete the same way */
+
+	/* If this is an sadb cleanup delete, bypass sending events to IPSEC */
+	if (sp->flags & SRB_EDIF_CLEANUP_DELETE) {
+		sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: nph 0x%x, sa_index %d removed from fw\n",
+		    __func__, sp->fcport->loop_id, pkt->sa_index);
+
+	} else if ((pkt->entry_status == 0) && (pkt->u.comp_sts == 0) &&
+	    old_sa_deleted) {
+		/*
+		 * Note: Wa are only keeping track of latest SA,
+		 * so we know when we can start enableing encryption per I/O.
+		 * If all SA's get deleted, let FW reject the IOCB.
+
+		 * TODO: edif: don't set enabled here I think
+		 * TODO: edif: prli complete is where it should be set
+		 */
+		ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+			"SA(%x)updated for s_id %02x%02x%02x\n",
+			pkt->new_sa_info,
+			pkt->port_id[2], pkt->port_id[1], pkt->port_id[0]);
+		sp->fcport->edif.enable = 1;
+		if (pkt->flags & SA_FLAG_TX) {
+			sp->fcport->edif.tx_sa_set = 1;
+			sp->fcport->edif.tx_sa_pending = 0;
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				QL_VND_SA_STAT_SUCCESS,
+				QL_VND_TX_SA_KEY, sp->fcport);
+		} else {
+			sp->fcport->edif.rx_sa_set = 1;
+			sp->fcport->edif.rx_sa_pending = 0;
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				QL_VND_SA_STAT_SUCCESS,
+				QL_VND_RX_SA_KEY, sp->fcport);
+		}
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: %8phN SA update FAILED: sa_index: %d, new_sa_info %d, %02x%02x%02x\n",
+		    __func__, sp->fcport->port_name, pkt->sa_index, pkt->new_sa_info,
+		    pkt->port_id[2], pkt->port_id[1], pkt->port_id[0]);
+
+		if (pkt->flags & SA_FLAG_TX)
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				(le16_to_cpu(pkt->u.comp_sts) << 16) | QL_VND_SA_STAT_FAILED,
+				QL_VND_TX_SA_KEY, sp->fcport);
+		else
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				(le16_to_cpu(pkt->u.comp_sts) << 16) | QL_VND_SA_STAT_FAILED,
+				QL_VND_RX_SA_KEY, sp->fcport);
+	}
+
+	/* for delete, release sa_ctl, sa_index */
+	if (pkt->flags & SA_FLAG_INVALIDATE) {
+		/* release the sa_ctl */
+		sa_ctl = qla_edif_find_sa_ctl_by_index(sp->fcport,
+		    le16_to_cpu(pkt->sa_index), (pkt->flags & SA_FLAG_TX));
+		if (sa_ctl &&
+		    qla_edif_find_sa_ctl_by_index(sp->fcport, sa_ctl->index,
+			(pkt->flags & SA_FLAG_TX)) != NULL) {
+			ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+			    "%s: freeing sa_ctl for index %d\n",
+			    __func__, sa_ctl->index);
+			qla_edif_free_sa_ctl(sp->fcport, sa_ctl, sa_ctl->index);
+		} else {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl NOT freed, sa_ctl: %p\n",
+			    __func__, sa_ctl);
+		}
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: freeing sa_index %d, nph: 0x%x\n",
+		    __func__, le16_to_cpu(pkt->sa_index), nport_handle);
+		qla_edif_sadb_delete_sa_index(sp->fcport, nport_handle,
+		    le16_to_cpu(pkt->sa_index));
+	/*
+	 * check for a failed sa_update and remove
+	 * the sadb entry.
+	 */
+	} else if (pkt->u.comp_sts) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: freeing sa_index %d, nph: 0x%x\n",
+		    __func__, pkt->sa_index, nport_handle);
+		qla_edif_sadb_delete_sa_index(sp->fcport, nport_handle,
+		    le16_to_cpu(pkt->sa_index));
+		switch (le16_to_cpu(pkt->u.comp_sts)) {
+		case CS_PORT_EDIF_UNAVAIL:
+		case CS_PORT_EDIF_LOGOUT:
+			qlt_schedule_sess_for_deletion(sp->fcport);
+			break;
+		default:
+			break;
+		}
+	}
+
+	sp->done(sp, 0);
+}
+
+/**
+ * qla28xx_start_scsi_edif() - Send a SCSI type 6 command to the ISP
+ * @sp: command to send to the ISP
+ *
+ * Return: non-zero if a failure occurred, else zero.
+ */
+int
+qla28xx_start_scsi_edif(srb_t *sp)
+{
+	int             nseg;
+	unsigned long   flags;
+	struct scsi_cmnd *cmd;
+	uint32_t        *clr_ptr;
+	uint32_t        index, i;
+	uint32_t        handle;
+	uint16_t        cnt;
+	int16_t        req_cnt;
+	uint16_t        tot_dsds;
+	__be32 *fcp_dl;
+	uint8_t additional_cdb_len;
+	struct ct6_dsd *ctx;
+	struct scsi_qla_host *vha = sp->vha;
+	struct qla_hw_data *ha = vha->hw;
+	struct cmd_type_6 *cmd_pkt;
+	struct dsd64	*cur_dsd;
+	uint8_t		avail_dsds = 0;
+	struct scatterlist *sg;
+	struct req_que *req = sp->qpair->req;
+	spinlock_t *lock = sp->qpair->qp_lock_ptr;
+
+	/* Setup device pointers. */
+	cmd = GET_CMD_SP(sp);
+
+	/* So we know we haven't pci_map'ed anything yet */
+	tot_dsds = 0;
+
+	/* Send marker if required */
+	if (vha->marker_needed != 0) {
+		if (qla2x00_marker(vha, sp->qpair, 0, 0, MK_SYNC_ALL) !=
+			QLA_SUCCESS) {
+			ql_log(ql_log_warn, vha, 0x300c,
+			    "qla2x00_marker failed for cmd=%p.\n", cmd);
+			return QLA_FUNCTION_FAILED;
+		}
+		vha->marker_needed = 0;
+	}
+
+	/* Acquire ring specific lock */
+	spin_lock_irqsave(lock, flags);
+
+	/* Check for room in outstanding command list. */
+	handle = req->current_outstanding_cmd;
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
+		handle++;
+		if (handle == req->num_outstanding_cmds)
+			handle = 1;
+		if (!req->outstanding_cmds[handle])
+			break;
+	}
+	if (index == req->num_outstanding_cmds)
+		goto queuing_error;
+
+	/* Map the sg table so we have an accurate count of sg entries needed */
+	if (scsi_sg_count(cmd)) {
+		nseg = dma_map_sg(&ha->pdev->dev, scsi_sglist(cmd),
+		    scsi_sg_count(cmd), cmd->sc_data_direction);
+		if (unlikely(!nseg))
+			goto queuing_error;
+	} else {
+		nseg = 0;
+	}
+
+	tot_dsds = nseg;
+	req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+
+	sp->iores.res_type = RESOURCE_INI;
+	sp->iores.iocb_cnt = req_cnt;
+	if (qla_get_iocbs(sp->qpair, &sp->iores))
+		goto queuing_error;
+
+	if (req->cnt < (req_cnt + 2)) {
+		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
+		    rd_reg_dword(req->req_q_out);
+		if (req->ring_index < cnt)
+			req->cnt = cnt - req->ring_index;
+		else
+			req->cnt = req->length -
+			    (req->ring_index - cnt);
+		if (req->cnt < (req_cnt + 2))
+			goto queuing_error;
+	}
+
+	ctx = sp->u.scmd.ct6_ctx =
+	    mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
+	if (!ctx) {
+		ql_log(ql_log_fatal, vha, 0x3010,
+		    "Failed to allocate ctx for cmd=%p.\n", cmd);
+		goto queuing_error;
+	}
+
+	memset(ctx, 0, sizeof(struct ct6_dsd));
+	ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
+	    GFP_ATOMIC, &ctx->fcp_cmnd_dma);
+	if (!ctx->fcp_cmnd) {
+		ql_log(ql_log_fatal, vha, 0x3011,
+		    "Failed to allocate fcp_cmnd for cmd=%p.\n", cmd);
+		goto queuing_error;
+	}
+
+	/* Initialize the DSD list and dma handle */
+	INIT_LIST_HEAD(&ctx->dsd_list);
+	ctx->dsd_use_cnt = 0;
+
+	if (cmd->cmd_len > 16) {
+		additional_cdb_len = cmd->cmd_len - 16;
+		if ((cmd->cmd_len % 4) != 0) {
+			/*
+			 * SCSI command bigger than 16 bytes must be
+			 * multiple of 4
+			 */
+			ql_log(ql_log_warn, vha, 0x3012,
+			    "scsi cmd len %d not multiple of 4 for cmd=%p.\n",
+			    cmd->cmd_len, cmd);
+			goto queuing_error_fcp_cmnd;
+		}
+		ctx->fcp_cmnd_len = 12 + cmd->cmd_len + 4;
+	} else {
+		additional_cdb_len = 0;
+		ctx->fcp_cmnd_len = 12 + 16 + 4;
+	}
+
+	cmd_pkt = (struct cmd_type_6 *)req->ring_ptr;
+	cmd_pkt->handle = make_handle(req->id, handle);
+
+	/*
+	 * Zero out remaining portion of packet.
+	 * tagged queuing modifier -- default is TSK_SIMPLE (0).
+	 */
+	clr_ptr = (uint32_t *)cmd_pkt + 2;
+	memset(clr_ptr, 0, REQUEST_ENTRY_SIZE - 8);
+	cmd_pkt->dseg_count = cpu_to_le16(tot_dsds);
+
+	/* No data transfer */
+	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+		cmd_pkt->byte_count = cpu_to_le32(0);
+		goto no_dsds;
+	}
+
+	/* Set transfer direction */
+	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
+		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
+		vha->qla_stats.output_requests++;
+		sp->fcport->edif.tx_bytes += scsi_bufflen(cmd);
+	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
+		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
+		vha->qla_stats.input_requests++;
+		sp->fcport->edif.rx_bytes += scsi_bufflen(cmd);
+	}
+
+	cmd_pkt->control_flags |= cpu_to_le16(CF_EN_EDIF);
+	cmd_pkt->control_flags &= ~(cpu_to_le16(CF_NEW_SA));
+
+	/* One DSD is available in the Command Type 6 IOCB */
+	avail_dsds = 1;
+	cur_dsd = &cmd_pkt->fcp_dsd;
+
+	/* Load data segments */
+	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+		dma_addr_t      sle_dma;
+		cont_a64_entry_t *cont_pkt;
+
+		/* Allocate additional continuation packets? */
+		if (avail_dsds == 0) {
+			/*
+			 * Five DSDs are available in the Continuation
+			 * Type 1 IOCB.
+			 */
+			cont_pkt = qla2x00_prep_cont_type1_iocb(vha, req);
+			cur_dsd = cont_pkt->dsd;
+			avail_dsds = 5;
+		}
+
+		sle_dma = sg_dma_address(sg);
+		put_unaligned_le64(sle_dma, &cur_dsd->address);
+		cur_dsd->length = cpu_to_le32(sg_dma_len(sg));
+		cur_dsd++;
+		avail_dsds--;
+	}
+
+no_dsds:
+	/* Set NPORT-ID and LUN number*/
+	cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id);
+	cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
+	cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
+	cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
+	cmd_pkt->vp_index = sp->vha->vp_idx;
+
+	cmd_pkt->entry_type = COMMAND_TYPE_6;
+
+	/* Set total data segment count. */
+	cmd_pkt->entry_count = (uint8_t)req_cnt;
+
+	int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
+	host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
+
+	/* build FCP_CMND IU */
+	int_to_scsilun(cmd->device->lun, &ctx->fcp_cmnd->lun);
+	ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
+
+	if (cmd->sc_data_direction == DMA_TO_DEVICE)
+		ctx->fcp_cmnd->additional_cdb_len |= 1;
+	else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+		ctx->fcp_cmnd->additional_cdb_len |= 2;
+
+	/* Populate the FCP_PRIO. */
+	if (ha->flags.fcp_prio_enabled)
+		ctx->fcp_cmnd->task_attribute |=
+		    sp->fcport->fcp_prio << 3;
+
+	memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
+
+	fcp_dl = (__be32 *)(ctx->fcp_cmnd->cdb + 16 +
+	    additional_cdb_len);
+	*fcp_dl = htonl((uint32_t)scsi_bufflen(cmd));
+
+	cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(ctx->fcp_cmnd_len);
+	put_unaligned_le64(ctx->fcp_cmnd_dma, &cmd_pkt->fcp_cmnd_dseg_address);
+
+	sp->flags |= SRB_FCP_CMND_DMA_VALID;
+	cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+	/* Set total data segment count. */
+	cmd_pkt->entry_count = (uint8_t)req_cnt;
+	cmd_pkt->entry_status = 0;
+
+	/* Build command packet. */
+	req->current_outstanding_cmd = handle;
+	req->outstanding_cmds[handle] = sp;
+	sp->handle = handle;
+	cmd->host_scribble = (unsigned char *)(unsigned long)handle;
+	req->cnt -= req_cnt;
+
+	/* Adjust ring index. */
+	wmb();
+	req->ring_index++;
+	if (req->ring_index == req->length) {
+		req->ring_index = 0;
+		req->ring_ptr = req->ring;
+	} else {
+		req->ring_ptr++;
+	}
+
+	sp->qpair->cmd_cnt++;
+	/* Set chip new ring index. */
+	wrt_reg_dword(req->req_q_in, req->ring_index);
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return QLA_SUCCESS;
+
+queuing_error_fcp_cmnd:
+	dma_pool_free(ha->fcp_cmnd_dma_pool, ctx->fcp_cmnd, ctx->fcp_cmnd_dma);
+queuing_error:
+	if (tot_dsds)
+		scsi_dma_unmap(cmd);
+
+	if (sp->u.scmd.ct6_ctx) {
+		mempool_free(sp->u.scmd.ct6_ctx, ha->ctx_mempool);
+		sp->u.scmd.ct6_ctx = NULL;
+	}
+	qla_put_iocbs(sp->qpair, &sp->iores);
+	spin_unlock_irqrestore(lock, flags);
+
+	return QLA_FUNCTION_FAILED;
+}
+
+/**********************************************
+ * edif update/delete sa_index list functions *
+ **********************************************/
+
+/* clear the edif_indx_list for this port */
+void qla_edif_list_del(fc_port_t *fcport)
+{
+	struct edif_list_entry *indx_lst;
+	struct edif_list_entry *tindx_lst;
+	struct list_head *indx_list = &fcport->edif.edif_indx_list;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	list_for_each_entry_safe(indx_lst, tindx_lst, indx_list, next) {
+		list_del(&indx_lst->next);
+		kfree(indx_lst);
+	}
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+}
+
+/******************
+ * SADB functions *
+ ******************/
+
+/* allocate/retrieve an sa_index for a given spi */
+static uint16_t qla_edif_sadb_get_sa_index(fc_port_t *fcport,
+		struct qla_sa_update_frame *sa_frame)
+{
+	struct edif_sa_index_entry *entry;
+	struct list_head *sa_list;
+	uint16_t sa_index;
+	int dir = sa_frame->flags & SAU_FLG_TX;
+	int slot = 0;
+	int free_slot = -1;
+	scsi_qla_host_t *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags = 0;
+	uint16_t nport_handle = fcport->loop_id;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: entry  fc_port: %p, nport_handle: 0x%x\n",
+	    __func__, fcport, nport_handle);
+
+	if (dir)
+		sa_list = &ha->sadb_tx_index_list;
+	else
+		sa_list = &ha->sadb_rx_index_list;
+
+	entry = qla_edif_sadb_find_sa_index_entry(nport_handle, sa_list);
+	if (!entry) {
+		if ((sa_frame->flags & (SAU_FLG_TX | SAU_FLG_INV)) == SAU_FLG_INV) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: rx delete request with no entry\n", __func__);
+			return RX_DELETE_NO_EDIF_SA_INDEX;
+		}
+
+		/* if there is no entry for this nport, add one */
+		entry = kzalloc((sizeof(struct edif_sa_index_entry)), GFP_ATOMIC);
+		if (!entry)
+			return INVALID_EDIF_SA_INDEX;
+
+		sa_index = qla_edif_get_sa_index_from_freepool(fcport, dir);
+		if (sa_index == INVALID_EDIF_SA_INDEX) {
+			kfree(entry);
+			return INVALID_EDIF_SA_INDEX;
+		}
+
+		INIT_LIST_HEAD(&entry->next);
+		entry->handle = nport_handle;
+		entry->fcport = fcport;
+		entry->sa_pair[0].spi = sa_frame->spi;
+		entry->sa_pair[0].sa_index = sa_index;
+		entry->sa_pair[1].spi = 0;
+		entry->sa_pair[1].sa_index = INVALID_EDIF_SA_INDEX;
+		spin_lock_irqsave(&ha->sadb_lock, flags);
+		list_add_tail(&entry->next, sa_list);
+		spin_unlock_irqrestore(&ha->sadb_lock, flags);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: Created new sadb entry for nport_handle 0x%x, spi 0x%x, returning sa_index %d\n",
+		    __func__, nport_handle, sa_frame->spi, sa_index);
+
+		return sa_index;
+	}
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+
+	/* see if we already have an entry for this spi */
+	for (slot = 0; slot < 2; slot++) {
+		if (entry->sa_pair[slot].sa_index == INVALID_EDIF_SA_INDEX) {
+			free_slot = slot;
+		} else {
+			if (entry->sa_pair[slot].spi == sa_frame->spi) {
+				spin_unlock_irqrestore(&ha->sadb_lock, flags);
+				ql_dbg(ql_dbg_edif, vha, 0x3063,
+				    "%s: sadb slot %d entry for lid 0x%x, spi 0x%x found, sa_index %d\n",
+				    __func__, slot, entry->handle, sa_frame->spi,
+				    entry->sa_pair[slot].sa_index);
+				return entry->sa_pair[slot].sa_index;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+
+	/* both slots are used */
+	if (free_slot == -1) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: WARNING: No free slots in sadb for nport_handle 0x%x, spi: 0x%x\n",
+		    __func__, entry->handle, sa_frame->spi);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: Slot 0  spi: 0x%x  sa_index: %d,  Slot 1  spi: 0x%x  sa_index: %d\n",
+		    __func__, entry->sa_pair[0].spi, entry->sa_pair[0].sa_index,
+		    entry->sa_pair[1].spi, entry->sa_pair[1].sa_index);
+
+		return INVALID_EDIF_SA_INDEX;
+	}
+
+	/* there is at least one free slot, use it */
+	sa_index = qla_edif_get_sa_index_from_freepool(fcport, dir);
+	if (sa_index == INVALID_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x3063,
+		    "%s: empty freepool!!\n", __func__);
+		return INVALID_EDIF_SA_INDEX;
+	}
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+	entry->sa_pair[free_slot].spi = sa_frame->spi;
+	entry->sa_pair[free_slot].sa_index = sa_index;
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x3063,
+	    "%s: sadb slot %d entry for nport_handle 0x%x, spi 0x%x added, returning sa_index %d\n",
+	    __func__, free_slot, entry->handle, sa_frame->spi, sa_index);
+
+	return sa_index;
+}
+
+/* release any sadb entries -- only done at teardown */
+void qla_edif_sadb_release(struct qla_hw_data *ha)
+{
+	struct edif_sa_index_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_rx_index_list, next) {
+		list_del(&entry->next);
+		kfree(entry);
+	}
+
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_tx_index_list, next) {
+		list_del(&entry->next);
+		kfree(entry);
+	}
+}
+
+/**************************
+ * sadb freepool functions
+ **************************/
+
+/* build the rx and tx sa_index free pools -- only done at fcport init */
+int qla_edif_sadb_build_free_pool(struct qla_hw_data *ha)
+{
+	ha->edif_tx_sa_id_map =
+	    kcalloc(BITS_TO_LONGS(EDIF_NUM_SA_INDEX), sizeof(long), GFP_KERNEL);
+
+	if (!ha->edif_tx_sa_id_map) {
+		ql_log_pci(ql_log_fatal, ha->pdev, 0x0009,
+		    "Unable to allocate memory for sadb tx.\n");
+		return -ENOMEM;
+	}
+
+	ha->edif_rx_sa_id_map =
+	    kcalloc(BITS_TO_LONGS(EDIF_NUM_SA_INDEX), sizeof(long), GFP_KERNEL);
+	if (!ha->edif_rx_sa_id_map) {
+		kfree(ha->edif_tx_sa_id_map);
+		ha->edif_tx_sa_id_map = NULL;
+		ql_log_pci(ql_log_fatal, ha->pdev, 0x0009,
+		    "Unable to allocate memory for sadb rx.\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/* release the free pool - only done during fcport teardown */
+void qla_edif_sadb_release_free_pool(struct qla_hw_data *ha)
+{
+	kfree(ha->edif_tx_sa_id_map);
+	ha->edif_tx_sa_id_map = NULL;
+	kfree(ha->edif_rx_sa_id_map);
+	ha->edif_rx_sa_id_map = NULL;
+}
+
+static void __chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha,
+		fc_port_t *fcport, uint32_t handle, uint16_t sa_index)
+{
+	struct edif_list_entry *edif_entry;
+	struct edif_sa_ctl *sa_ctl;
+	uint16_t delete_sa_index = INVALID_EDIF_SA_INDEX;
+	unsigned long flags = 0;
+	uint16_t nport_handle = fcport->loop_id;
+	uint16_t cached_nport_handle;
+
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	edif_entry = qla_edif_list_find_sa_index(fcport, nport_handle);
+	if (!edif_entry) {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+		return;		/* no pending delete for this handle */
+	}
+
+	/*
+	 * check for no pending delete for this index or iocb does not
+	 * match rx sa_index
+	 */
+	if (edif_entry->delete_sa_index == INVALID_EDIF_SA_INDEX ||
+	    edif_entry->update_sa_index != sa_index) {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+		return;
+	}
+
+	/*
+	 * wait until we have seen at least EDIF_DELAY_COUNT transfers before
+	 * queueing RX delete
+	 */
+	if (edif_entry->count++ < EDIF_RX_DELETE_FILTER_COUNT) {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+		return;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x5033,
+	    "%s: invalidating delete_sa_index,  update_sa_index: 0x%x sa_index: 0x%x, delete_sa_index: 0x%x\n",
+	    __func__, edif_entry->update_sa_index, sa_index, edif_entry->delete_sa_index);
+
+	delete_sa_index = edif_entry->delete_sa_index;
+	edif_entry->delete_sa_index = INVALID_EDIF_SA_INDEX;
+	cached_nport_handle = edif_entry->handle;
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+
+	/* sanity check on the nport handle */
+	if (nport_handle != cached_nport_handle) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: POST SA DELETE nport_handle mismatch: lid: 0x%x, edif_entry nph: 0x%x\n",
+		    __func__, nport_handle, cached_nport_handle);
+	}
+
+	/* find the sa_ctl for the delete and schedule the delete */
+	sa_ctl = qla_edif_find_sa_ctl_by_index(fcport, delete_sa_index, 0);
+	if (sa_ctl) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: POST SA DELETE sa_ctl: %p, index recvd %d\n",
+		    __func__, sa_ctl, sa_index);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "delete index %d, update index: %d, nport handle: 0x%x, handle: 0x%x\n",
+		    delete_sa_index,
+		    edif_entry->update_sa_index, nport_handle, handle);
+
+		sa_ctl->flags = EDIF_SA_CTL_FLG_DEL;
+		set_bit(EDIF_SA_CTL_REPL, &sa_ctl->state);
+		qla_post_sa_replace_work(fcport->vha, fcport,
+		    nport_handle, sa_ctl);
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: POST SA DELETE sa_ctl not found for delete_sa_index: %d\n",
+		    __func__, delete_sa_index);
+	}
+}
+
+void qla_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha,
+		srb_t *sp, struct sts_entry_24xx *sts24)
+{
+	fc_port_t *fcport = sp->fcport;
+	/* sa_index used by this iocb */
+	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+	uint32_t handle;
+
+	handle = (uint32_t)LSW(sts24->handle);
+
+	/* find out if this status iosb is for a scsi read */
+	if (cmd->sc_data_direction != DMA_FROM_DEVICE)
+		return;
+
+	return __chk_edif_rx_sa_delete_pending(vha, fcport, handle,
+	   le16_to_cpu(sts24->edif_sa_index));
+}
+
+void qlt_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+		struct ctio7_from_24xx *pkt)
+{
+	__chk_edif_rx_sa_delete_pending(vha, fcport,
+	    pkt->handle, le16_to_cpu(pkt->edif_sa_index));
+}
+
+static void qla_parse_auth_els_ctl(struct srb *sp)
+{
+	struct qla_els_pt_arg *a = &sp->u.bsg_cmd.u.els_arg;
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_cmd.bsg_job;
+	struct fc_bsg_request *request = bsg_job->request;
+	struct qla_bsg_auth_els_request *p =
+	    (struct qla_bsg_auth_els_request *)bsg_job->request;
+
+	a->tx_len = a->tx_byte_count = sp->remap.req.len;
+	a->tx_addr = sp->remap.req.dma;
+	a->rx_len = a->rx_byte_count = sp->remap.rsp.len;
+	a->rx_addr = sp->remap.rsp.dma;
+
+	if (p->e.sub_cmd == SEND_ELS_REPLY) {
+		a->control_flags = p->e.extra_control_flags << 13;
+		a->rx_xchg_address = cpu_to_le32(p->e.extra_rx_xchg_address);
+		if (p->e.extra_control_flags == BSG_CTL_FLAG_LS_ACC)
+			a->els_opcode = ELS_LS_ACC;
+		else if (p->e.extra_control_flags == BSG_CTL_FLAG_LS_RJT)
+			a->els_opcode = ELS_LS_RJT;
+	}
+	a->did = sp->fcport->d_id;
+	a->els_opcode =  request->rqst_data.h_els.command_code;
+	a->nport_handle = cpu_to_le16(sp->fcport->loop_id);
+	a->vp_idx = sp->vha->vp_idx;
+}
+
+int qla_edif_process_els(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsg_job)
+{
+	struct fc_bsg_request *bsg_request = bsg_job->request;
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	fc_port_t *fcport = NULL;
+	struct qla_hw_data *ha = vha->hw;
+	srb_t *sp;
+	int rval =  (DID_ERROR << 16), cnt;
+	port_id_t d_id;
+	struct qla_bsg_auth_els_request *p =
+	    (struct qla_bsg_auth_els_request *)bsg_job->request;
+	struct qla_bsg_auth_els_reply *rpl =
+	    (struct qla_bsg_auth_els_reply *)bsg_job->reply;
+
+	rpl->version = EDIF_VERSION1;
+
+	d_id.b.al_pa = bsg_request->rqst_data.h_els.port_id[2];
+	d_id.b.area = bsg_request->rqst_data.h_els.port_id[1];
+	d_id.b.domain = bsg_request->rqst_data.h_els.port_id[0];
+
+	/* find matching d_id in fcport list */
+	fcport = qla2x00_find_fcport_by_pid(vha, &d_id);
+	if (!fcport) {
+		ql_dbg(ql_dbg_edif, vha, 0x911a,
+		    "%s fcport not find online portid=%06x.\n",
+		    __func__, d_id.b24);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		return -EIO;
+	}
+
+	if (qla_bsg_check(vha, bsg_job, fcport))
+		return 0;
+
+	if (EDIF_SESS_DELETE(fcport)) {
+		ql_dbg(ql_dbg_edif, vha, 0x910d,
+		    "%s ELS code %x, no loop id.\n", __func__,
+		    bsg_request->rqst_data.r_els.els_code);
+		SET_DID_STATUS(bsg_reply->result, DID_BAD_TARGET);
+		return -ENXIO;
+	}
+
+	if (!vha->flags.online) {
+		ql_log(ql_log_warn, vha, 0x7005, "Host not online.\n");
+		SET_DID_STATUS(bsg_reply->result, DID_BAD_TARGET);
+		rval = -EIO;
+		goto done;
+	}
+
+	/* pass through is supported only for ISP 4Gb or higher */
+	if (!IS_FWI2_CAPABLE(ha)) {
+		ql_dbg(ql_dbg_user, vha, 0x7001,
+		    "ELS passthru not supported for ISP23xx based adapters.\n");
+		SET_DID_STATUS(bsg_reply->result, DID_BAD_TARGET);
+		rval = -EPERM;
+		goto done;
+	}
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp) {
+		ql_dbg(ql_dbg_user, vha, 0x7004,
+		    "Failed get sp pid=%06x\n", fcport->d_id.b24);
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done;
+	}
+
+	sp->remap.req.len = bsg_job->request_payload.payload_len;
+	sp->remap.req.buf = dma_pool_alloc(ha->purex_dma_pool,
+	    GFP_KERNEL, &sp->remap.req.dma);
+	if (!sp->remap.req.buf) {
+		ql_dbg(ql_dbg_user, vha, 0x7005,
+		    "Failed allocate request dma len=%x\n",
+		    bsg_job->request_payload.payload_len);
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done_free_sp;
+	}
+
+	sp->remap.rsp.len = bsg_job->reply_payload.payload_len;
+	sp->remap.rsp.buf = dma_pool_alloc(ha->purex_dma_pool,
+	    GFP_KERNEL, &sp->remap.rsp.dma);
+	if (!sp->remap.rsp.buf) {
+		ql_dbg(ql_dbg_user, vha, 0x7006,
+		    "Failed allocate response dma len=%x\n",
+		    bsg_job->reply_payload.payload_len);
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done_free_remap_req;
+	}
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, sp->remap.req.buf,
+	    sp->remap.req.len);
+	sp->remap.remapped = true;
+
+	sp->type = SRB_ELS_CMD_HST_NOLOGIN;
+	sp->name = "SPCN_BSG_HST_NOLOGIN";
+	sp->u.bsg_cmd.bsg_job = bsg_job;
+	qla_parse_auth_els_ctl(sp);
+
+	sp->free = qla2x00_bsg_sp_free;
+	sp->done = qla2x00_bsg_job_done;
+
+	cnt = 0;
+retry:
+	rval = qla2x00_start_sp(sp);
+	switch (rval) {
+	case QLA_SUCCESS:
+		ql_dbg(ql_dbg_edif, vha, 0x700a,
+		       "%s %s %8phN xchg %x ctlflag %x hdl %x reqlen %xh bsg ptr %p\n",
+		       __func__, sc_to_str(p->e.sub_cmd), fcport->port_name,
+		       p->e.extra_rx_xchg_address, p->e.extra_control_flags,
+		       sp->handle, sp->remap.req.len, bsg_job);
+		break;
+	case EAGAIN:
+		msleep(EDIF_MSLEEP_INTERVAL);
+		cnt++;
+		if (cnt < EDIF_RETRY_COUNT)
+			goto retry;
+		fallthrough;
+	default:
+		ql_log(ql_log_warn, vha, 0x700e,
+		    "%s qla2x00_start_sp failed = %d\n", __func__, rval);
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		rval = -EIO;
+		goto done_free_remap_rsp;
+	}
+	return rval;
+
+done_free_remap_rsp:
+	dma_pool_free(ha->purex_dma_pool, sp->remap.rsp.buf,
+	    sp->remap.rsp.dma);
+done_free_remap_req:
+	dma_pool_free(ha->purex_dma_pool, sp->remap.req.buf,
+	    sp->remap.req.dma);
+done_free_sp:
+	qla2x00_rel_sp(sp);
+
+done:
+	return rval;
+}
+
+void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess)
+{
+	u16 cnt = 0;
+
+	if (sess->edif.app_sess_online && DBELL_ACTIVE(vha)) {
+		ql_dbg(ql_dbg_disc, vha, 0xf09c,
+			"%s: sess %8phN send port_offline event\n",
+			__func__, sess->port_name);
+		sess->edif.app_sess_online = 0;
+		sess->edif.sess_down_acked = 0;
+		qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SESSION_SHUTDOWN,
+		    sess->d_id.b24, 0, sess);
+		qla2x00_post_aen_work(vha, FCH_EVT_PORT_OFFLINE, sess->d_id.b24);
+
+		while (!READ_ONCE(sess->edif.sess_down_acked) &&
+		       !test_bit(VPORT_DELETE, &vha->dpc_flags)) {
+			msleep(100);
+			cnt++;
+			if (cnt > 100)
+				break;
+		}
+		sess->edif.sess_down_acked = 0;
+		ql_dbg(ql_dbg_disc, vha, 0xf09c,
+		       "%s: sess %8phN port_offline event completed\n",
+		       __func__, sess->port_name);
+	}
+}
+
+void qla_edif_clear_appdata(struct scsi_qla_host *vha, struct fc_port *fcport)
+{
+	if (!(fcport->flags & FCF_FCSP_DEVICE))
+		return;
+
+	qla_edb_clear(vha, fcport->d_id);
+	qla_enode_clear(vha, fcport->d_id);
+}

diff --git a/scst/qla2x00t-32gbit/qla_edif.h b/scst/qla2x00t-32gbit/qla_edif.h
new file mode 100644
index 0000000..e0f721f
--- /dev/null
+++ b/scst/qla2x00t-32gbit/qla_edif.h

@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Marvell Fibre Channel HBA Driver
+ * Copyright (c)  2021    Marvell
+ */
+#ifndef __QLA_EDIF_H
+#define __QLA_EDIF_H
+
+struct qla_scsi_host;
+#define EDIF_APP_ID 0x73730001
+
+#define EDIF_MAX_INDEX	2048
+struct edif_sa_ctl {
+	struct list_head next;
+	uint16_t	del_index;
+	uint16_t	index;
+	uint16_t	slot;
+	uint16_t	flags;
+#define	EDIF_SA_CTL_FLG_REPL		BIT_0
+#define	EDIF_SA_CTL_FLG_DEL		BIT_1
+#define EDIF_SA_CTL_FLG_CLEANUP_DEL BIT_4
+	// Invalidate Index bit and mirrors QLA_SA_UPDATE_FLAGS_DELETE
+	unsigned long   state;
+#define EDIF_SA_CTL_USED	1	/* Active Sa update  */
+#define EDIF_SA_CTL_PEND	2	/* Waiting for slot */
+#define EDIF_SA_CTL_REPL	3	/* Active Replace and Delete */
+#define EDIF_SA_CTL_DEL		4	/* Delete Pending */
+	struct fc_port	*fcport;
+#ifndef NEW_LIBFC_API
+	struct fc_bsg_job *bsg_job;
+#else
+	struct bsg_job *bsg_job;
+#endif
+	struct qla_sa_update_frame sa_frame;
+};
+
+enum enode_flags_t {
+	ENODE_ACTIVE = 0x1,
+};
+
+struct pur_core {
+	enum enode_flags_t	enode_flags;
+	spinlock_t		pur_lock;
+	struct  list_head	head;
+};
+
+enum db_flags_t {
+	EDB_ACTIVE = BIT_0,
+};
+
+#define DBELL_ACTIVE(_v) (_v->e_dbell.db_flags & EDB_ACTIVE)
+#define DBELL_INACTIVE(_v) (!(_v->e_dbell.db_flags & EDB_ACTIVE))
+
+struct edif_dbell {
+	enum db_flags_t		db_flags;
+	spinlock_t		db_lock;
+	struct  list_head	head;
+#ifndef NEW_LIBFC_API
+	struct fc_bsg_job *dbell_bsg_job;
+#else
+	struct bsg_job *dbell_bsg_job;
+#endif
+	unsigned long bsg_expire;
+};
+
+#define SA_UPDATE_IOCB_TYPE            0x71    /* Security Association Update IOCB entry */
+struct sa_update_28xx {
+	uint8_t entry_type;             /* Entry type. */
+	uint8_t entry_count;            /* Entry count. */
+	uint8_t sys_define;             /* System Defined. */
+	uint8_t entry_status;           /* Entry Status. */
+
+	uint32_t handle;                /* IOCB System handle. */
+
+	union {
+		__le16 nport_handle;  /* in: N_PORT handle. */
+		__le16 comp_sts;              /* out: completion status */
+#define CS_PORT_EDIF_UNAVAIL	0x28
+#define CS_PORT_EDIF_LOGOUT	0x29
+#define CS_PORT_EDIF_SUPP_NOT_RDY 0x64
+#define CS_PORT_EDIF_INV_REQ      0x66
+	} u;
+	uint8_t vp_index;
+	uint8_t reserved_1;
+	uint8_t port_id[3];
+	uint8_t flags;
+#define SA_FLAG_INVALIDATE BIT_0
+#define SA_FLAG_TX	   BIT_1 // 1=tx, 0=rx
+
+	uint8_t sa_key[32];     /* 256 bit key */
+	__le32 salt;
+	__le32 spi;
+	uint8_t sa_control;
+#define SA_CNTL_ENC_FCSP        (1 << 3)
+#define SA_CNTL_ENC_OPD         (2 << 3)
+#define SA_CNTL_ENC_MSK         (3 << 3)  // mask bits 4,3
+#define SA_CNTL_AES_GMAC	(1 << 2)
+#define SA_CNTL_KEY256          (2 << 0)
+#define SA_CNTL_KEY128          0
+
+	uint8_t reserved_2;
+	__le16 sa_index;   // reserve: bit 11-15
+	__le16 old_sa_info;
+	__le16 new_sa_info;
+};
+
+#define        NUM_ENTRIES     256
+#define        PUR_GET         1
+
+struct dinfo {
+	int		nodecnt;
+	int		lstate;
+};
+
+struct pur_ninfo {
+	port_id_t       pur_sid;
+	port_id_t	pur_did;
+	uint8_t		vp_idx;
+	short           pur_bytes_rcvd;
+	unsigned short  pur_nphdl;
+	unsigned int    pur_rx_xchg_address;
+};
+
+struct purexevent {
+	struct  pur_ninfo	pur_info;
+	unsigned char		*msgp;
+	u32			msgp_len;
+};
+
+#define	N_UNDEF		0
+#define	N_PUREX		1
+struct enode {
+	struct list_head	list;
+	struct dinfo		dinfo;
+	uint32_t		ntype;
+	union {
+		struct purexevent	purexinfo;
+	} u;
+};
+
+#define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
+
+#define EDIF_SESSION_DOWN(_s) \
+	(qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \
+	 _s->disc_state == DSC_DELETED || \
+	 !_s->edif.app_sess_online))
+
+#define EDIF_NEGOTIATION_PENDING(_fcport) \
+	(DBELL_ACTIVE(_fcport->vha) && \
+	 (_fcport->disc_state == DSC_LOGIN_AUTH_PEND))
+
+#define EDIF_SESS_DELETE(_s) \
+	(qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \
+	 _s->disc_state == DSC_DELETED))
+
+#endif	/* __QLA_EDIF_H */

diff --git a/scst/qla2x00t-32gbit/qla_edif_bsg.h b/scst/qla2x00t-32gbit/qla_edif_bsg.h
new file mode 100644
index 0000000..0931f4e
--- /dev/null
+++ b/scst/qla2x00t-32gbit/qla_edif_bsg.h

@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Marvell Fibre Channel HBA Driver
+ * Copyright (C)  2018-	    Marvell
+ *
+ */
+#ifndef __QLA_EDIF_BSG_H
+#define __QLA_EDIF_BSG_H
+
+#define EDIF_VERSION1 1
+
+/* BSG Vendor specific commands */
+#define	ELS_MAX_PAYLOAD		2112
+#ifndef	WWN_SIZE
+#define WWN_SIZE		8
+#endif
+#define VND_CMD_APP_RESERVED_SIZE	28
+#define VND_CMD_PAD_SIZE                3
+enum auth_els_sub_cmd {
+	SEND_ELS = 0,
+	SEND_ELS_REPLY,
+	PULL_ELS,
+};
+
+struct extra_auth_els {
+	enum auth_els_sub_cmd sub_cmd;
+	uint32_t        extra_rx_xchg_address;
+	uint8_t         extra_control_flags;
+#define BSG_CTL_FLAG_INIT       0
+#define BSG_CTL_FLAG_LS_ACC     1
+#define BSG_CTL_FLAG_LS_RJT     2
+#define BSG_CTL_FLAG_TRM        3
+	uint8_t		version;
+	uint8_t		pad[2];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct qla_bsg_auth_els_request {
+	struct fc_bsg_request r;
+	struct extra_auth_els e;
+};
+
+struct qla_bsg_auth_els_reply {
+	struct fc_bsg_reply r;
+	uint32_t rx_xchg_address;
+	uint8_t version;
+	uint8_t pad[VND_CMD_PAD_SIZE];
+	uint8_t reserved[VND_CMD_APP_RESERVED_SIZE];
+};
+
+struct app_id {
+	int		app_vid;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_start_reply {
+	uint32_t	host_support_edif;
+	uint32_t	edif_enode_active;
+	uint32_t	edif_edb_active;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_start {
+	struct app_id	app_info;
+	uint8_t         app_start_flags;
+	uint8_t		version;
+	uint8_t		pad[2];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_stop {
+	struct app_id	app_info;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_plogi_reply {
+	uint32_t	prli_status;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_pinfo_req {
+	struct app_id app_info;
+	uint8_t	 num_ports;
+	port_id_t remote_pid;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_pinfo {
+	port_id_t remote_pid;
+	uint8_t	remote_wwpn[WWN_SIZE];
+	uint8_t	remote_type;
+#define	VND_CMD_RTYPE_UNKNOWN		0
+#define	VND_CMD_RTYPE_TARGET		1
+#define	VND_CMD_RTYPE_INITIATOR		2
+	uint8_t	remote_state;
+	uint8_t	auth_state;
+	uint8_t	version;
+	uint8_t	pad[VND_CMD_PAD_SIZE];
+	uint8_t	reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+/* AUTH States */
+#define	VND_CMD_AUTH_STATE_UNDEF	0
+#define	VND_CMD_AUTH_STATE_SESSION_SHUTDOWN	1
+#define	VND_CMD_AUTH_STATE_NEEDED	2
+#define	VND_CMD_AUTH_STATE_ELS_RCVD	3
+#define	VND_CMD_AUTH_STATE_SAUPDATE_COMPL 4
+
+struct app_pinfo_reply {
+	uint8_t		port_count;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+	struct app_pinfo ports[];
+} __packed;
+
+struct app_sinfo_req {
+	struct app_id	app_info;
+	uint8_t		num_ports;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_sinfo {
+	uint8_t	remote_wwpn[WWN_SIZE];
+	int64_t	rekey_count;
+	uint8_t	rekey_mode;
+	int64_t	tx_bytes;
+	int64_t	rx_bytes;
+} __packed;
+
+struct app_stats_reply {
+	uint8_t		elem_count;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+	struct app_sinfo elem[];
+} __packed;
+
+struct qla_sa_update_frame {
+	struct app_id	app_info;
+	uint16_t	flags;
+#define SAU_FLG_INV		0x01	/* delete key */
+#define SAU_FLG_TX		0x02	/* 1=tx, 0 = rx */
+#define SAU_FLG_FORCE_DELETE	0x08
+#define SAU_FLG_GMAC_MODE	0x20	/*
+					 * GMAC mode is cleartext for the IO
+					 * (i.e. NULL encryption)
+					 */
+#define SAU_FLG_KEY128          0x40
+#define SAU_FLG_KEY256          0x80
+	uint16_t        fast_sa_index:10,
+			reserved:6;
+	uint32_t	salt;
+	uint32_t	spi;
+	uint8_t		sa_key[32];
+	uint8_t		node_name[WWN_SIZE];
+	uint8_t		port_name[WWN_SIZE];
+	port_id_t	port_id;
+	uint8_t		version;
+	uint8_t		pad[VND_CMD_PAD_SIZE];
+	uint8_t		reserved2[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+#define	QL_VND_SC_UNDEF		0
+#define	QL_VND_SC_SA_UPDATE	1
+#define	QL_VND_SC_APP_START	2
+#define	QL_VND_SC_APP_STOP	3
+#define	QL_VND_SC_AUTH_OK	4
+#define	QL_VND_SC_AUTH_FAIL	5
+#define	QL_VND_SC_REKEY_CONFIG	6
+#define	QL_VND_SC_GET_FCINFO	7
+#define	QL_VND_SC_GET_STATS	8
+#define QL_VND_SC_AEN_COMPLETE  9
+#define QL_VND_SC_READ_DBELL	10
+
+/*
+ * bsg caller to provide empty buffer for doorbell events.
+ *
+ * sg_io_v4.din_xferp  = empty buffer for door bell events
+ * sg_io_v4.dout_xferp = struct edif_read_dbell *buf
+ */
+struct edif_read_dbell {
+	struct app_id app_info;
+	uint8_t version;
+	uint8_t pad[VND_CMD_PAD_SIZE];
+	uint8_t reserved[VND_CMD_APP_RESERVED_SIZE];
+};
+
+
+/* Application interface data structure for rtn data */
+#define	EXT_DEF_EVENT_DATA_SIZE	64
+struct edif_app_dbell {
+	uint32_t	event_code;
+	uint32_t	event_data_size;
+	union  {
+		port_id_t	port_id;
+		uint8_t		event_data[EXT_DEF_EVENT_DATA_SIZE];
+	};
+} __packed;
+
+struct edif_sa_update_aen {
+	port_id_t port_id;
+	uint32_t key_type;	/* Tx (1) or RX (2) */
+	uint32_t status;	/* 0 succes,  1 failed, 2 timeout , 3 error */
+	uint8_t	version;
+	uint8_t	pad[VND_CMD_PAD_SIZE];
+	uint8_t	reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+#define	QL_VND_SA_STAT_SUCCESS	0
+#define	QL_VND_SA_STAT_FAILED	1
+#define	QL_VND_SA_STAT_TIMEOUT	2
+#define	QL_VND_SA_STAT_ERROR	3
+
+#define	QL_VND_RX_SA_KEY	1
+#define	QL_VND_TX_SA_KEY	2
+
+/* App defines for plogi auth'd ok and plogi auth bad requests */
+struct auth_complete_cmd {
+	struct app_id app_info;
+#define PL_TYPE_WWPN    1
+#define PL_TYPE_DID     2
+	uint32_t    type;
+	union {
+		uint8_t  wwpn[WWN_SIZE];
+		port_id_t d_id;
+	} u;
+	uint8_t	version;
+	uint8_t	pad[VND_CMD_PAD_SIZE];
+	uint8_t	reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct aen_complete_cmd {
+	struct app_id app_info;
+	port_id_t   port_id;
+	uint32_t    event_code;
+	uint8_t     version;
+	uint8_t     pad[VND_CMD_PAD_SIZE];
+	uint8_t     reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+#define RX_DELAY_DELETE_TIMEOUT 20
+
+#define FCH_EVT_VENDOR_UNIQUE_VPORT_DOWN  1
+
+#endif	/* QLA_EDIF_BSG_H */

diff --git a/scst/qla2x00t-32gbit/qla_fw.h b/scst/qla2x00t-32gbit/qla_fw.h
index f2d560d..de7b1a3 100644
--- a/scst/qla2x00t-32gbit/qla_fw.h
+++ b/scst/qla2x00t-32gbit/qla_fw.h

@@ -1,16 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_FW_H
 #define __QLA_FW_H
 
 #include <linux/version.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0) || defined(RHEL_MAJOR)
 #include <linux/nvme.h>
-#endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
 #include <linux/nvme-fc.h>
 #endif
@@ -95,10 +92,11 @@
 	uint8_t port_name[WWN_SIZE];
 	uint8_t node_name[WWN_SIZE];
 
-	uint8_t reserved_3[4];
+	uint8_t reserved_3[2];
+	uint16_t nvme_first_burst_size;
 	uint16_t prli_nvme_svc_param_word_0;	/* Bits 15-0 of word 0 */
 	uint16_t prli_nvme_svc_param_word_3;	/* Bits 15-0 of word 3 */
-	uint16_t nvme_first_burst_size;
+	uint8_t secure_login;
 	uint8_t reserved_4[14];
 };
 
@@ -502,6 +500,9 @@
 	struct scsi_lun lun;		/* FCP LUN (BE). */
 
 	__le16	control_flags;		/* Control flags. */
+#define CF_NEW_SA			BIT_12
+#define CF_EN_EDIF			BIT_9
+#define CF_ADDITIONAL_PARAM_BLK		BIT_8
 #define CF_DIF_SEG_DESCR_ENABLE		BIT_3
 #define CF_DATA_SEG_DESCR_ENABLE	BIT_2
 #define CF_READ_DATA			BIT_1
@@ -624,6 +625,7 @@
 	union {
 		__le16 reserved_1;
 		__le16	nvme_rsp_pyld_len;
+		__le16 edif_sa_index;	 /* edif sa_index used for initiator read data */
 	};
 
 	__le16	state_flags;		/* State flags. */
@@ -631,7 +633,7 @@
 #define SF_NVME_ERSP            BIT_6
 #define SF_FCP_RSP_DMA		BIT_0
 
-	__le16	retry_delay;
+	__le16	status_qualifier;
 	__le16	scsi_status;		/* SCSI status. */
 #define SS_CONFIRMATION_REQ		BIT_12
 
@@ -815,9 +817,10 @@
 #define EPD_ELS_COMMAND		(0 << 13)
 #define EPD_ELS_ACC		(1 << 13)
 #define EPD_ELS_RJT		(2 << 13)
-#define EPD_RX_XCHG		(3 << 13)
+#define EPD_RX_XCHG		(3 << 13)  /* terminate exchange */
 #define ECF_CLR_PASSTHRU_PEND	BIT_12
 #define ECF_INCL_FRAME_HDR	BIT_11
+#define ECF_SEC_LOGIN		BIT_3
 
 	union {
 		struct {
@@ -909,6 +912,7 @@
 #define LCF_FCP2_OVERRIDE	BIT_9	/* Set/Reset word 3 of PRLI. */
 #define LCF_CLASS_2		BIT_8	/* Enable class 2 during PLOGI. */
 #define LCF_FREE_NPORT		BIT_7	/* Release NPORT handle after LOGO. */
+#define LCF_COMMON_FEAT		BIT_7	/* PLOGI - Set Common Features Field */
 #define LCF_EXPL_LOGO		BIT_6	/* Perform an explicit LOGO. */
 #define LCF_NVME_PRLI		BIT_6   /* Perform NVME FC4 PRLI */
 #define LCF_SKIP_PRLI		BIT_5	/* Skip PRLI after PLOGI. */
@@ -933,6 +937,8 @@
 	uint8_t rsp_size;		/* Response size in 32bit words. */
 
 	__le32	io_parameter[11];	/* General I/O parameters. */
+#define LIO_COMM_FEAT_FCSP	BIT_21
+#define LIO_COMM_FEAT_CIO	BIT_31
 #define LSC_SCODE_NOLINK	0x01
 #define LSC_SCODE_NOIOCB	0x02
 #define LSC_SCODE_NOXCB		0x03
@@ -995,11 +1001,18 @@
 
 	uint32_t handle;		/* System handle. */
 
-	__le16	nport_handle;		/* N_PORT handle. */
-					/* or Completion status. */
+	union {
+		__le16 nport_handle;            /* N_PORT handle. */
+		__le16 comp_status;             /* Completion status. */
+	};
 
 	__le16	options;		/* Options. */
 #define AOF_NO_ABTS		BIT_0	/* Do not send any ABTS. */
+#define AOF_NO_RRQ		BIT_1   /* Do not send RRQ. */
+#define AOF_ABTS_TIMEOUT	BIT_2   /* Disable logout on ABTS timeout. */
+#define AOF_ABTS_RTY_CNT	BIT_3   /* Use driver specified retry count. */
+#define AOF_RSP_TIMEOUT		BIT_4   /* Use specified response timeout. */
+
 
 	uint32_t handle_to_abort;	/* System handle to abort. */
 
@@ -1008,8 +1021,20 @@
 
 	uint8_t port_id[3];		/* PortID of destination port. */
 	uint8_t vp_index;
-
-	uint8_t reserved_2[12];
+	u8	reserved_2[4];
+	union {
+		struct {
+			__le16 abts_rty_cnt;
+			__le16 rsp_timeout;
+		} drv;
+		struct {
+			u8	ba_rjt_vendorUnique;
+			u8	ba_rjt_reasonCodeExpl;
+			u8	ba_rjt_reasonCode;
+			u8	reserved_3;
+		} fw;
+	};
+	u8	reserved_4[4];
 };
 
 #define ABTS_RCV_TYPE		0x54
@@ -1660,6 +1685,7 @@
 #define FLT_REG_VPD_SEC_27XX_1	0x52
 #define FLT_REG_VPD_SEC_27XX_2	0xD8
 #define FLT_REG_VPD_SEC_27XX_3	0xDA
+#define FLT_REG_NVME_PARAMS_27XX	0x21
 
 /* 28xx */
 #define FLT_REG_AUX_IMG_PRI_28XX	0x125
@@ -1676,6 +1702,8 @@
 #define FLT_REG_MPI_SEC_28XX		0xF0
 #define FLT_REG_PEP_PRI_28XX		0xD1
 #define FLT_REG_PEP_SEC_28XX		0xF1
+#define FLT_REG_NVME_PARAMS_PRI_28XX	0x14E
+#define FLT_REG_NVME_PARAMS_SEC_28XX	0x179
 
 struct qla_flt_region {
 	__le16	code;
@@ -1691,7 +1719,7 @@
 	__le16	length;
 	__le16	checksum;
 	__le16	unused;
-	struct qla_flt_region region[0];
+	struct qla_flt_region region[];
 };
 
 #define FLT_REGION_SIZE		16

diff --git a/scst/qla2x00t-32gbit/qla_gbl.h b/scst/qla2x00t-32gbit/qla_gbl.h
index 83470ec..7567728 100644
--- a/scst/qla2x00t-32gbit/qla_gbl.h
+++ b/scst/qla2x00t-32gbit/qla_gbl.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_GBL_H
 #define	__QLA_GBL_H
@@ -13,6 +12,7 @@
  * Global Function Prototypes in qla_init.c source file.
  */
 extern int qla2x00_initialize_adapter(scsi_qla_host_t *);
+extern int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport);
 
 extern int qla2100_pci_config(struct scsi_qla_host *);
 extern int qla2300_pci_config(struct scsi_qla_host *);
@@ -70,8 +70,6 @@
 extern int qla2x00_async_adisc(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
 extern int qla2x00_async_tm_cmd(fc_port_t *, uint32_t, uint32_t, uint32_t);
-extern void qla2x00_async_login_done(struct scsi_qla_host *, fc_port_t *,
-    uint16_t *);
 struct qla_work_evt *qla2x00_alloc_work(struct scsi_qla_host *,
     enum qla_work_type);
 extern int qla24xx_async_gnl(struct scsi_qla_host *, fc_port_t *);
@@ -129,6 +127,22 @@
 void qla_rscn_replay(fc_port_t *fcport);
 void qla24xx_free_purex_item(struct purex_item *item);
 extern bool qla24xx_risc_firmware_invalid(uint32_t *);
+void qla_init_iocb_limit(scsi_qla_host_t *);
+
+void qla_edif_list_del(fc_port_t *fcport);
+void qla_edif_sadb_release(struct qla_hw_data *ha);
+int qla_edif_sadb_build_free_pool(struct qla_hw_data *ha);
+void qla_edif_sadb_release_free_pool(struct qla_hw_data *ha);
+void qla_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha,
+		srb_t *sp, struct sts_entry_24xx *sts24);
+void qlt_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+		struct ctio7_from_24xx *ctio);
+void qla2x00_release_all_sadb(struct scsi_qla_host *vha, struct fc_port *fcport);
+int qla_edif_process_els(scsi_qla_host_t *vha, BSG_JOB_TYPE *bsgjob);
+void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess);
+void qla_edif_clear_appdata(struct scsi_qla_host *vha,
+			    struct fc_port *fcport);
+const char *sc_to_str(uint16_t cmd);
 
 /*
  * Global Data in qla_os.c source file.
@@ -147,6 +161,9 @@
 extern int ql2xsmartsan;
 extern int ql2xallocfwdump;
 extern int ql2xextended_error_logging;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+extern int ql2xextended_error_logging_ktrace;
+#endif
 extern int ql2xiidmaenable;
 extern int ql2xmqsupport;
 extern int ql2xfwloadbin;
@@ -157,7 +174,6 @@
 extern int ql2xgffidenable;
 extern int ql2xenabledif;
 extern int ql2xenablehba_err_chk;
-extern int ql2xtargetreset;
 extern int ql2xdontresethba;
 #if 1
 extern uint ql2xmaxlun;
@@ -176,9 +192,12 @@
 extern int ql2xautodetectsfp;
 extern int ql2xenablemsix;
 extern int qla2xuseresexchforels;
-extern int ql2xexlogins;
 extern int ql2xdifbundlinginternalbuffers;
 extern int ql2xfulldump_on_mpifail;
+extern int ql2xsecenable;
+extern int ql2xenforce_iocb_limit;
+extern int ql2xabts_wait_nvme;
+extern u32 ql2xnvme_queues;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -221,10 +240,10 @@
 extern int __qla83xx_set_drv_presence(scsi_qla_host_t *vha);
 extern int qla83xx_clear_drv_presence(scsi_qla_host_t *vha);
 extern int __qla83xx_clear_drv_presence(scsi_qla_host_t *vha);
-extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 
 extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 extern void qla2x00_disable_board_on_pci_error(struct work_struct *);
+extern void qla_eeh_work(struct work_struct *);
 extern void qla2x00_sp_compl(srb_t *sp, int);
 extern void qla2xxx_qpair_sp_free_dma(srb_t *sp);
 extern void qla2xxx_qpair_sp_compl(srb_t *sp, int);
@@ -236,6 +255,10 @@
 void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *);
 void qla24xx_process_purex_rdp(struct scsi_qla_host *vha,
 			       struct purex_item *pkt);
+void qla_pci_set_eeh_busy(struct scsi_qla_host *);
+void qla_schedule_eeh_work(struct scsi_qla_host *);
+struct edif_sa_ctl *qla_edif_find_sa_ctl_by_index(fc_port_t *fcport,
+						  int index, int dir);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -259,7 +282,6 @@
 extern scsi_qla_host_t *qla24xx_create_vhost(struct fc_vport *);
 
 extern void qla2x00_sp_free_dma(srb_t *sp);
-extern char *qla2x00_get_fw_version_str(struct scsi_qla_host *, char *);
 
 extern void qla2x00_mark_device_lost(scsi_qla_host_t *, fc_port_t *, int);
 extern void qla2x00_mark_all_devices_lost(scsi_qla_host_t *);
@@ -280,7 +302,10 @@
 /*
  * Global Function Prototypes in qla_iocb.c source file.
  */
-
+void qla_els_pt_iocb(struct scsi_qla_host *vha,
+	struct els_entry_24xx *pkt, struct qla_els_pt_arg *a);
+cont_a64_entry_t *qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha,
+		struct req_que *que);
 extern uint16_t qla2x00_calc_iocbs_32(uint16_t);
 extern uint16_t qla2x00_calc_iocbs_64(uint16_t);
 extern void qla2x00_build_scsi_iocbs_32(srb_t *, cmd_entry_t *, uint16_t);
@@ -296,7 +321,8 @@
 extern int qla24xx_dif_start_scsi(srb_t *);
 extern int qla2x00_start_bidir(srb_t *, struct scsi_qla_host *, uint32_t);
 extern int qla2xxx_dif_start_scsi_mq(srb_t *);
-extern void qla2x00_init_timer(srb_t *sp, unsigned long tmo);
+extern void qla2x00_init_async_sp(srb_t *sp, unsigned long tmo,
+				  void (*done)(struct srb *, int));
 extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
 
 extern void *qla2x00_alloc_iocbs(struct scsi_qla_host *, srb_t *);
@@ -310,6 +336,10 @@
 	struct dsd64 *, uint16_t, struct qla_tgt_cmd *);
 extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
 extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
+extern int qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha,
+	struct qla_work_evt *e);
+void qla2x00_sp_release(struct kref *kref);
+void qla2x00_els_dcmd2_iocb_timeout(void *data);
 
 /*
  * Global Function Prototypes in qla_mbx.c source file.
@@ -408,7 +438,8 @@
 qla2x00_get_resource_cnts(scsi_qla_host_t *);
 
 extern int
-qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map);
+qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map,
+		u8 *num_entries);
 
 extern int
 qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
@@ -529,6 +560,10 @@
 extern int
 qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint);
 
+extern int
+qla26xx_dport_diagnostics_v2(scsi_qla_host_t *,
+			     struct qla_dport_diag_v2 *,  mbx_cmd_t *);
+
 int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *);
 int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8);
 int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t,
@@ -549,6 +584,7 @@
     uint32_t *);
 extern int qla2xxx_write_remote_register(scsi_qla_host_t *, uint32_t,
     uint32_t);
+void qla_no_op_mb(struct scsi_qla_host *vha);
 
 /*
  * Global Function Prototypes in qla_isr.c source file.
@@ -577,11 +613,11 @@
 fc_port_t *qla2x00_find_fcport_by_loopid(scsi_qla_host_t *, uint16_t);
 fc_port_t *qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *, u8 *, u8);
 fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8);
+void __qla_consume_iocb(struct scsi_qla_host *vha, void **pkt, struct rsp_que **rsp);
 
 /*
  * Global Function Prototypes in qla_sup.c source file.
  */
-extern void qla2x00_release_nvram_protection(scsi_qla_host_t *);
 extern int qla24xx_read_flash_data(scsi_qla_host_t *, uint32_t *,
     uint32_t, uint32_t);
 extern uint8_t *qla2x00_read_nvram_data(scsi_qla_host_t *, void *, uint32_t,
@@ -639,6 +675,12 @@
 
 extern void qla2xxx_flash_npiv_conf(scsi_qla_host_t *);
 extern int qla24xx_read_fcp_prio_cfg(scsi_qla_host_t *);
+extern int qla2x00_mailbox_passthru(BSG_JOB_TYPE *bsg_job);
+int __qla_copy_purex_to_buffer(struct scsi_qla_host *vha, void **pkt,
+	struct rsp_que **rsp, u8 *buf, u32 buf_len);
+
+int qla_mailbox_passthru(scsi_qla_host_t *vha, uint16_t *mbx_in,
+			 uint16_t *mbx_out);
 
 /*
  * Global Function Prototypes in qla_dbg.c source file.
@@ -685,8 +727,6 @@
 	struct ct_sns_rsp *, const char *);
 extern void qla2x00_async_iocb_timeout(void *data);
 
-extern void qla2x00_free_fcport(fc_port_t *);
-
 extern int qla24xx_post_gpnid_work(struct scsi_qla_host *, port_id_t *);
 extern int qla24xx_async_gpnid(scsi_qla_host_t *, port_id_t *);
 void qla24xx_handle_gpnid_event(scsi_qla_host_t *, struct event_arg *);
@@ -696,7 +736,7 @@
 void qla24xx_handle_gpsc_event(scsi_qla_host_t *, struct event_arg *);
 int qla2x00_mgmt_svr_login(scsi_qla_host_t *);
 void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea);
-int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport);
+int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool);
 int qla24xx_async_gpnft(scsi_qla_host_t *, u8, srb_t *);
 void qla24xx_async_gpnft_done(scsi_qla_host_t *, srb_t *);
 void qla24xx_async_gnnft_done(scsi_qla_host_t *, srb_t *);
@@ -708,13 +748,18 @@
 void qla24xx_handle_gfpnid_event(scsi_qla_host_t *, struct event_arg *);
 void qla24xx_sp_unmap(scsi_qla_host_t *, srb_t *);
 void qla_scan_work_fn(struct work_struct *);
+uint qla25xx_fdmi_port_speed_capability(struct qla_hw_data *);
+uint qla25xx_fdmi_port_speed_currently(struct qla_hw_data *);
 
 /*
  * Global Function Prototypes in qla_attr.c source file.
  */
 struct device_attribute;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 extern struct device_attribute *qla2x00_host_attrs[];
-extern struct device_attribute *qla2x00_host_attrs_dm[];
+#else
+extern const struct attribute_group *qla2x00_host_groups[];
+#endif
 struct fc_function_template;
 extern struct fc_function_template qla2xxx_transport_functions;
 extern struct fc_function_template qla2xxx_transport_vport_functions;
@@ -728,7 +773,9 @@
 extern int qla24xx_update_all_fcp_prio(scsi_qla_host_t *);
 extern int qla24xx_fcp_prio_cfg_valid(scsi_qla_host_t *,
 	struct qla_fcp_prio_cfg *, uint8_t);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 void qla_insert_tgt_attrs(void);
+#endif
 /*
  * Global Function Prototypes in qla_dfs.c source file.
  */
@@ -749,12 +796,6 @@
 extern int qla25xx_delete_req_que(struct scsi_qla_host *, struct req_que *);
 extern int qla25xx_delete_rsp_que(struct scsi_qla_host *, struct rsp_que *);
 extern int qla25xx_delete_queues(struct scsi_qla_host *);
-extern uint16_t qla24xx_rd_req_reg(struct qla_hw_data *, uint16_t);
-extern uint16_t qla25xx_rd_req_reg(struct qla_hw_data *, uint16_t);
-extern void qla24xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
-extern void qla25xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
-extern void qla25xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
-extern void qla24xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
 
 /* qlafx00 related functions */
 extern int qlafx00_pci_config(struct scsi_qla_host *);
@@ -791,7 +832,6 @@
 extern void qlafx00_fxdisc_iocb(srb_t *, struct fxdisc_entry_fx00 *);
 extern void qlafx00_timer_routine(scsi_qla_host_t *);
 extern int qlafx00_rescan_isp(scsi_qla_host_t *);
-extern int qlafx00_loop_reset(scsi_qla_host_t *vha);
 
 /* qla82xx related functions */
 
@@ -840,8 +880,6 @@
 extern void qla82xx_set_drv_active(scsi_qla_host_t *);
 extern int qla82xx_wr_32(struct qla_hw_data *, ulong, u32);
 extern int qla82xx_rd_32(struct qla_hw_data *, ulong);
-extern int qla82xx_rdmem(struct qla_hw_data *, u64, void *, int);
-extern int qla82xx_wrmem(struct qla_hw_data *, u64, void *, int);
 
 /* ISP 8021 IDC */
 extern void qla82xx_clear_drv_active(struct qla_hw_data *);
@@ -863,7 +901,7 @@
 extern int qla81xx_set_led_config(scsi_qla_host_t *, uint16_t *);
 extern int qla81xx_get_led_config(scsi_qla_host_t *, uint16_t *);
 extern int qla82xx_mbx_beacon_ctl(scsi_qla_host_t *, int);
-extern char *qdev_state(uint32_t);
+extern const char *qdev_state(uint32_t);
 extern void qla82xx_clear_pending_mbx(scsi_qla_host_t *);
 extern int qla82xx_read_temperature(scsi_qla_host_t *);
 extern int qla8044_read_temperature(scsi_qla_host_t *);
@@ -871,18 +909,16 @@
 extern int ql26xx_led_config(scsi_qla_host_t *, uint16_t, uint16_t *);
 
 /* BSG related functions */
-#ifndef NEW_LIBFC_API
-extern int qla24xx_bsg_request(struct fc_bsg_job *);
-extern int qla24xx_bsg_timeout(struct fc_bsg_job *);
-#else
-extern int qla24xx_bsg_request(struct bsg_job *);
-extern int qla24xx_bsg_timeout(struct bsg_job *);
-#endif
+extern int qla24xx_bsg_request(BSG_JOB_TYPE *);
+extern int qla24xx_bsg_timeout(BSG_JOB_TYPE *);
 extern int qla84xx_reset_chip(scsi_qla_host_t *, uint16_t);
 extern int qla2x00_issue_iocb_timeout(scsi_qla_host_t *, void *,
 	dma_addr_t, size_t, uint32_t);
 extern int qla2x00_get_idma_speed(scsi_qla_host_t *, uint16_t,
 	uint16_t *, uint16_t *);
+extern int qla24xx_sadb_update(BSG_JOB_TYPE *bsg_job);
+extern int qla_post_sa_replace_work(struct scsi_qla_host *vha,
+	 fc_port_t *fcport, uint16_t nport_handle, struct edif_sa_ctl *sa_ctl);
 
 /* 83xx related functions */
 void qla83xx_fw_dump(scsi_qla_host_t *vha);
@@ -927,6 +963,7 @@
 extern void qlt_handle_abts_recv(struct scsi_qla_host *, struct rsp_que *,
 	response_t *);
 
+struct scsi_qla_host *qla_find_host_by_d_id(struct scsi_qla_host *vha, be_id_t d_id);
 int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
 	struct imm_ntfy_from_isp *, int);
 void qla24xx_do_nack_work(struct scsi_qla_host *, struct qla_work_evt *);
@@ -939,14 +976,61 @@
 void qla24xx_delete_sess_fn(struct work_struct *);
 void qlt_unknown_atio_work_fn(struct work_struct *);
 void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
-void qlt_remove_target_resources(struct qla_hw_data *);
+void qla_remove_hostmap(struct qla_hw_data *ha);
 void qlt_clr_qp_table(struct scsi_qla_host *vha);
 void qlt_set_mode(struct scsi_qla_host *);
 int qla2x00_set_data_rate(scsi_qla_host_t *vha, uint16_t mode);
 extern void qla24xx_process_purex_list(struct purex_list *);
+extern void qla2x00_dfs_create_rport(scsi_qla_host_t *vha, struct fc_port *fp);
+extern void qla2x00_dfs_remove_rport(scsi_qla_host_t *vha, struct fc_port *fp);
+extern void qla_wait_nvme_release_cmd_kref(srb_t *sp);
+extern void qla_nvme_abort_set_option
+		(struct abort_entry_24xx *abt, srb_t *sp);
+extern void qla_nvme_abort_process_comp_status
+		(struct abort_entry_24xx *abt, srb_t *sp);
 
 /* nvme.c */
 void qla_nvme_unregister_remote_port(struct fc_port *fcport);
-void qla27xx_reset_mpi(scsi_qla_host_t *vha);
+
+/* qla_edif.c */
+fc_port_t *qla2x00_find_fcport_by_pid(scsi_qla_host_t *vha, port_id_t *id);
+void qla_edb_eventcreate(scsi_qla_host_t *vha, uint32_t dbtype, uint32_t data, uint32_t data2,
+		fc_port_t *fcport);
+void qla_edb_stop(scsi_qla_host_t *vha);
+int32_t qla_edif_app_mgmt(BSG_JOB_TYPE *bsg_job);
+void qla_enode_init(scsi_qla_host_t *vha);
+void qla_enode_stop(scsi_qla_host_t *vha);
+void qla_edif_flush_sa_ctl_lists(fc_port_t *fcport);
+void qla_edb_init(scsi_qla_host_t *vha);
+void qla_edif_timer(scsi_qla_host_t *vha);
+int qla28xx_start_scsi_edif(srb_t *sp);
+void qla24xx_sa_update_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb);
+void qla24xx_sa_replace_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb);
+void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp);
+void qla28xx_sa_update_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+		struct sa_update_28xx *pkt);
 void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea);
+
+#define QLA2XX_HW_ERROR			BIT_0
+#define QLA2XX_SHT_LNK_DWN		BIT_1
+#define QLA2XX_INT_ERR			BIT_2
+#define QLA2XX_CMD_TIMEOUT		BIT_3
+#define QLA2XX_RESET_CMD_ERR		BIT_4
+#define QLA2XX_TGT_SHT_LNK_DOWN		BIT_17
+
+#define QLA2XX_MAX_LINK_DOWN_TIME	100
+
+int qla2xxx_start_stats(struct Scsi_Host *shost, u32 flags);
+int qla2xxx_stop_stats(struct Scsi_Host *shost, u32 flags);
+int qla2xxx_reset_stats(struct Scsi_Host *shost, u32 flags);
+
+int qla2xxx_get_ini_stats(struct Scsi_Host *shost, u32 flags, void *data, u64 size);
+int qla2xxx_get_tgt_stats(struct Scsi_Host *shost, u32 flags,
+			  struct fc_rport *rport, void *data, u64 size);
+int qla2xxx_disable_port(struct Scsi_Host *shost);
+int qla2xxx_enable_port(struct Scsi_Host *shost);
+
+uint64_t qla2x00_get_num_tgts(scsi_qla_host_t *vha);
+uint64_t qla2x00_count_set_bits(u32 num);
+
 #endif /* _QLA_GBL_H */

diff --git a/scst/qla2x00t-32gbit/qla_gs.c b/scst/qla2x00t-32gbit/qla_gs.c
index b569fd6..64ab070 100644
--- a/scst/qla2x00t-32gbit/qla_gs.c
+++ b/scst/qla2x00t-32gbit/qla_gs.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_target.h"
@@ -530,7 +529,6 @@
 		if (!e)
 			goto err2;
 
-		del_timer(&sp->u.iocb_cmd.timer);
 		e->u.iosb.sp = sp;
 		qla2x00_post_work(vha, e);
 		return;
@@ -557,8 +555,8 @@
 			sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 		}
 
-		sp->free(sp);
-
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 		return;
 	}
 
@@ -593,13 +591,15 @@
 	if (!vha->flags.online)
 		goto done;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_CT_PTHRU_CMD;
 	sp->name = "rft_id";
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_sns_sp_done);
 
 	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
 	    sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
@@ -633,14 +633,12 @@
 	ct_req->req.rft_id.port_id = port_id_to_be_id(vha->d_id);
 	ct_req->req.rft_id.fc4_types[2] = 0x01;		/* FCP-3 */
 
-	if (vha->flags.nvme_enabled)
+	if (vha->flags.nvme_enabled && qla_ini_mode_enabled(vha))
 		ct_req->req.rft_id.fc4_types[6] = 1;    /* NVMe type 28h */
 
 	sp->u.iocb_cmd.u.ctarg.req_size = RFT_ID_REQ_SIZE;
 	sp->u.iocb_cmd.u.ctarg.rsp_size = RFT_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	sp->done = qla2x00_async_sns_sp_done;
 
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s - hdl=%x portid %06x.\n",
@@ -654,7 +652,8 @@
 	}
 	return rval;
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -677,8 +676,7 @@
 		return (QLA_SUCCESS);
 	}
 
-	return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha),
-	    FC4_TYPE_FCP_SCSI);
+	return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), type);
 }
 
 static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id,
@@ -689,13 +687,15 @@
 	srb_t *sp;
 	struct ct_sns_pkt *ct_sns;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_CT_PTHRU_CMD;
 	sp->name = "rff_id";
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_sns_sp_done);
 
 	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
 	    sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
@@ -728,13 +728,11 @@
 	/* Prepare CT arguments -- port_id, FC-4 feature, FC-4 type */
 	ct_req->req.rff_id.port_id = port_id_to_be_id(*d_id);
 	ct_req->req.rff_id.fc4_feature = fc4feature;
-	ct_req->req.rff_id.fc4_type = fc4type;		/* SCSI - FCP */
+	ct_req->req.rff_id.fc4_type = fc4type;		/* SCSI-FCP or FC-NVMe */
 
 	sp->u.iocb_cmd.u.ctarg.req_size = RFF_ID_REQ_SIZE;
 	sp->u.iocb_cmd.u.ctarg.rsp_size = RFF_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	sp->done = qla2x00_async_sns_sp_done;
 
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s - hdl=%x portid %06x feature %x type %x.\n",
@@ -750,7 +748,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -780,13 +779,15 @@
 	srb_t *sp;
 	struct ct_sns_pkt *ct_sns;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_CT_PTHRU_CMD;
 	sp->name = "rnid";
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_sns_sp_done);
 
 	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
 	    sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
@@ -824,9 +825,6 @@
 	sp->u.iocb_cmd.u.ctarg.rsp_size = RNN_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	sp->done = qla2x00_async_sns_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s - hdl=%x portid %06x\n",
 	    sp->name, sp->handle, d_id->b24);
@@ -841,7 +839,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -887,13 +886,15 @@
 	srb_t *sp;
 	struct ct_sns_pkt *ct_sns;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_CT_PTHRU_CMD;
 	sp->name = "rsnn_nn";
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_sns_sp_done);
 
 	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
 	    sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
@@ -937,9 +938,6 @@
 	sp->u.iocb_cmd.u.ctarg.rsp_size = RSNN_NN_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	sp->done = qla2x00_async_sns_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s - hdl=%x.\n",
 	    sp->name, sp->handle);
@@ -954,7 +952,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -1248,7 +1247,7 @@
 }
 
 /**
- * qla2x00_snd_rft_id() - SNS Register FC-4 TYPEs (RFT_ID) supported by the HBA.
+ * qla2x00_sns_rft_id() - SNS Register FC-4 TYPEs (RFT_ID) supported by the HBA.
  * @vha: HA context
  *
  * This command uses the old Exectute SNS Command mailbox routine.
@@ -1480,7 +1479,7 @@
 }
 
 /**
- * qla2x00_prep_ct_req() - Prepare common CT request fields for SNS query.
+ * qla2x00_prep_ct_fdmi_req() - Prepare common CT request fields for SNS query.
  * @p: CT request buffer
  * @cmd: GS command
  * @rsp_size: response size in bytes
@@ -1502,7 +1501,7 @@
 	return &p->p.req;
 }
 
-static uint
+uint
 qla25xx_fdmi_port_speed_capability(struct qla_hw_data *ha)
 {
 	uint speeds = 0;
@@ -1538,7 +1537,8 @@
 	}
 	if (IS_QLA2031(ha)) {
 		if ((ha->pdev->subsystem_vendor == 0x103C) &&
-		    (ha->pdev->subsystem_device == 0x8002)) {
+		    ((ha->pdev->subsystem_device == 0x8002) ||
+		    (ha->pdev->subsystem_device == 0x8086))) {
 			speeds = FDMI_PORT_SPEED_16GB;
 		} else {
 			speeds = FDMI_PORT_SPEED_16GB|FDMI_PORT_SPEED_8GB|
@@ -1546,7 +1546,7 @@
 		}
 		return speeds;
 	}
-	if (IS_QLA25XX(ha))
+	if (IS_QLA25XX(ha) || IS_QLAFX00(ha))
 		return FDMI_PORT_SPEED_8GB|FDMI_PORT_SPEED_4GB|
 			FDMI_PORT_SPEED_2GB|FDMI_PORT_SPEED_1GB;
 	if (IS_QLA24XX_TYPE(ha))
@@ -1556,7 +1556,8 @@
 		return FDMI_PORT_SPEED_2GB|FDMI_PORT_SPEED_1GB;
 	return FDMI_PORT_SPEED_1GB;
 }
-static uint
+
+uint
 qla25xx_fdmi_port_speed_currently(struct qla_hw_data *ha)
 {
 	switch (ha->link_data_rate) {
@@ -1582,7 +1583,7 @@
 }
 
 /**
- * qla2x00_hba_attributes() perform HBA attributes registration
+ * qla2x00_hba_attributes() - perform HBA attributes registration
  * @vha: HA context
  * @entries: number of entries to use
  * @callopt: Option to issue extended or standard FDMI
@@ -1595,7 +1596,6 @@
 	unsigned int callopt)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct init_cb_24xx *icb24 = (void *)ha->init_cb;
 	struct new_utsname *p_sysid = utsname();
 	struct ct_fdmi_hba_attr *eiter;
 	uint16_t alen;
@@ -1616,7 +1616,7 @@
 	eiter->type = cpu_to_be16(FDMI_HBA_MANUFACTURER);
 	alen = scnprintf(
 		eiter->a.manufacturer, sizeof(eiter->a.manufacturer),
-		"%s", "QLogic Corporation");
+		"%s", QLA2XXX_MANUFACTURER);
 	alen += FDMI_ATTR_ALIGNMENT(alen);
 	alen += FDMI_ATTR_TYPELEN(eiter);
 	eiter->len = cpu_to_be16(alen);
@@ -1730,8 +1730,6 @@
 	size += alen;
 	ql_dbg(ql_dbg_disc, vha, 0x20a8,
 	    "FIRMWARE VERSION = %s.\n", eiter->a.fw_version);
-	if (callopt == CALLOPT_FDMI1)
-		goto done;
 	/* OS Name and Version */
 	eiter = entries + size;
 	eiter->type = cpu_to_be16(FDMI_HBA_OS_NAME_AND_VERSION);
@@ -1754,18 +1752,20 @@
 	size += alen;
 	ql_dbg(ql_dbg_disc, vha, 0x20a9,
 	    "OS VERSION = %s.\n", eiter->a.os_version);
+	if (callopt == CALLOPT_FDMI1)
+		goto done;
 	/* MAX CT Payload Length */
 	eiter = entries + size;
 	eiter->type = cpu_to_be16(FDMI_HBA_MAXIMUM_CT_PAYLOAD_LENGTH);
-	eiter->a.max_ct_len = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ?
-		icb24->frame_payload_size : ha->init_cb->frame_payload_size));
+	eiter->a.max_ct_len = cpu_to_be32(ha->frame_payload_size >> 2);
+
 	alen = sizeof(eiter->a.max_ct_len);
 	alen += FDMI_ATTR_TYPELEN(eiter);
 	eiter->len = cpu_to_be16(alen);
 	size += alen;
 	ql_dbg(ql_dbg_disc, vha, 0x20aa,
 	    "CT PAYLOAD LENGTH = 0x%x.\n", be32_to_cpu(eiter->a.max_ct_len));
-	/* Node Sybolic Name */
+	/* Node Symbolic Name */
 	eiter = entries + size;
 	eiter->type = cpu_to_be16(FDMI_HBA_NODE_SYMBOLIC_NAME);
 	alen = qla2x00_get_sym_node_name(vha, eiter->a.sym_name,
@@ -1837,7 +1837,7 @@
 }
 
 /**
- * qla2x00_port_attributes() perform Port attributes registration
+ * qla2x00_port_attributes() - perform Port attributes registration
  * @vha: HA context
  * @entries: number of entries to use
  * @callopt: Option to issue extended or standard FDMI
@@ -1850,7 +1850,6 @@
 	unsigned int callopt)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct init_cb_24xx *icb24 = (void *)ha->init_cb;
 	struct new_utsname *p_sysid = utsname();
 	char *hostname = p_sysid ?
 		p_sysid->nodename : fc_host_system_hostname(vha->host);
@@ -1902,8 +1901,7 @@
 	/* Max frame size. */
 	eiter = entries + size;
 	eiter->type = cpu_to_be16(FDMI_PORT_MAX_FRAME_SIZE);
-	eiter->a.max_frame_size = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ?
-		icb24->frame_payload_size : ha->init_cb->frame_payload_size));
+	eiter->a.max_frame_size = cpu_to_be32(ha->frame_payload_size);
 	alen = sizeof(eiter->a.max_frame_size);
 	alen += FDMI_ATTR_TYPELEN(eiter);
 	eiter->len = cpu_to_be16(alen);
@@ -2272,7 +2270,7 @@
 }
 
 /**
- * qla2x00_fdmi_rprt() perform RPRT registration
+ * qla2x00_fdmi_rprt() - perform RPRT registration
  * @vha: HA context
  * @callopt: Option to issue extended or standard FDMI
  *           command parameter
@@ -2826,6 +2824,10 @@
 	if (fcport->disc_state == DSC_DELETE_PEND)
 		return;
 
+	/* We will figure-out what happen after AUTH completes */
+	if (fcport->disc_state == DSC_LOGIN_AUTH_PEND)
+		return;
+
 	if (ea->sp->gen2 != fcport->login_gen) {
 		/* target side must have changed it. */
 		ql_dbg(ql_dbg_disc, vha, 0x20d3,
@@ -2888,7 +2890,8 @@
 	qla24xx_handle_gpsc_event(vha, &ea);
 
 done:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -2900,6 +2903,7 @@
 	if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
 		return rval;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -2908,8 +2912,8 @@
 	sp->name = "gpsc";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
-
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla24xx_async_gpsc_sp_done);
 
 	/* CT_IU preamble  */
 	ct_req = qla24xx_prep_ct_fm_req(fcport->ct_desc.ct_sns, GPSC_CMD,
@@ -2927,9 +2931,6 @@
 	sp->u.iocb_cmd.u.ctarg.rsp_size = GPSC_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = vha->mgmt_svr_loop_id;
 
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	sp->done = qla24xx_async_gpsc_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0x205e,
 	    "Async-%s %8phC hdl=%x loopid=%x portid=%02x%02x%02x.\n",
 	    sp->name, fcport->port_name, sp->handle,
@@ -2942,7 +2943,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -2991,7 +2993,8 @@
 		break;
 	}
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
@@ -3130,13 +3133,15 @@
 	if (res) {
 		if (res == QLA_FUNCTION_TIMEOUT) {
 			qla24xx_post_gpnid_work(sp->vha, &ea.id);
-			sp->free(sp);
+			/* ref: INIT */
+			kref_put(&sp->cmd_kref, qla2x00_sp_release);
 			return;
 		}
 	} else if (sp->gen1) {
 		/* There was another RSCN for this Nport ID */
 		qla24xx_post_gpnid_work(sp->vha, &ea.id);
-		sp->free(sp);
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 		return;
 	}
 
@@ -3157,7 +3162,8 @@
 				  sp->u.iocb_cmd.u.ctarg.rsp_dma);
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 
-		sp->free(sp);
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 		return;
 	}
 
@@ -3177,6 +3183,7 @@
 	if (!vha->flags.online)
 		goto done;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -3185,14 +3192,16 @@
 	sp->name = "gpnid";
 	sp->u.iocb_cmd.u.ctarg.id = *id;
 	sp->gen1 = 0;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_gpnid_sp_done);
 
 	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 	list_for_each_entry(tsp, &vha->gpnid_list, elem) {
 		if (tsp->u.iocb_cmd.u.ctarg.id.b24 == id->b24) {
 			tsp->gen1++;
 			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
-			sp->free(sp);
+			/* ref: INIT */
+			kref_put(&sp->cmd_kref, qla2x00_sp_release);
 			goto done;
 		}
 	}
@@ -3233,9 +3242,6 @@
 	sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	sp->done = qla2x00_async_gpnid_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0x2067,
 	    "Async-%s hdl=%x ID %3phC.\n", sp->name,
 	    sp->handle, &ct_req->req.port_id.port_id);
@@ -3265,25 +3271,18 @@
 			sp->u.iocb_cmd.u.ctarg.rsp_dma);
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
-
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
 
-void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea)
-{
-	fc_port_t *fcport = ea->fcport;
-
-	qla24xx_post_gnl_work(vha, fcport);
-}
 
 void qla24xx_async_gffid_sp_done(srb_t *sp, int res)
 {
 	struct scsi_qla_host *vha = sp->vha;
 	fc_port_t *fcport = sp->fcport;
 	struct ct_sns_rsp *ct_rsp;
-	struct event_arg ea;
 	uint8_t fc4_scsi_feat;
 	uint8_t fc4_nvme_feat;
 
@@ -3291,10 +3290,10 @@
 	       "Async done-%s res %x ID %x. %8phC\n",
 	       sp->name, res, fcport->d_id.b24, fcport->port_name);
 
-	fcport->flags &= ~FCF_ASYNC_SENT;
-	ct_rsp = &fcport->ct_desc.ct_sns->p.rsp;
+	ct_rsp = sp->u.iocb_cmd.u.ctarg.rsp;
 	fc4_scsi_feat = ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET];
 	fc4_nvme_feat = ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET];
+	sp->rc = res;
 
 	/*
 	 * FC-GS-7, 5.2.3.12 FC-4 Features - format
@@ -3315,68 +3314,129 @@
 		}
 	}
 
-	memset(&ea, 0, sizeof(ea));
-	ea.sp = sp;
-	ea.fcport = sp->fcport;
-	ea.rc = res;
+	if (sp->flags & SRB_WAKEUP_ON_COMP) {
+		complete(sp->comp);
+	} else  {
+		if (sp->u.iocb_cmd.u.ctarg.req) {
+			dma_free_coherent(&vha->hw->pdev->dev,
+				sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+				sp->u.iocb_cmd.u.ctarg.req,
+				sp->u.iocb_cmd.u.ctarg.req_dma);
+			sp->u.iocb_cmd.u.ctarg.req = NULL;
+		}
 
-	qla24xx_handle_gffid_event(vha, &ea);
-	sp->free(sp);
+		if (sp->u.iocb_cmd.u.ctarg.rsp) {
+			dma_free_coherent(&vha->hw->pdev->dev,
+				sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+				sp->u.iocb_cmd.u.ctarg.rsp,
+				sp->u.iocb_cmd.u.ctarg.rsp_dma);
+			sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+		}
+
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
+		/* we should not be here */
+		dump_stack();
+	}
 }
 
 /* Get FC4 Feature with Nport ID. */
-int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport)
+int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool wait)
 {
 	int rval = QLA_FUNCTION_FAILED;
 	struct ct_sns_req       *ct_req;
 	srb_t *sp;
+	DECLARE_COMPLETION_ONSTACK(comp);
 
-	if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
+	/* this routine does not have handling for no wait */
+	if (!vha->flags.online || !wait)
 		return rval;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		return rval;
 
-	fcport->flags |= FCF_ASYNC_SENT;
 	sp->type = SRB_CT_PTHRU_CMD;
 	sp->name = "gffid";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla24xx_async_gffid_sp_done);
+	sp->comp = &comp;
+	sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout;
 
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	if (wait)
+		sp->flags = SRB_WAKEUP_ON_COMP;
+
+	sp->u.iocb_cmd.u.ctarg.req_allocated_size = sizeof(struct ct_sns_pkt);
+	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+				sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+				&sp->u.iocb_cmd.u.ctarg.req_dma,
+	    GFP_KERNEL);
+	if (!sp->u.iocb_cmd.u.ctarg.req) {
+		ql_log(ql_log_warn, vha, 0xd041,
+		       "%s: Failed to allocate ct_sns request.\n",
+		       __func__);
+		goto done_free_sp;
+	}
+
+	sp->u.iocb_cmd.u.ctarg.rsp_allocated_size = sizeof(struct ct_sns_pkt);
+	sp->u.iocb_cmd.u.ctarg.rsp = dma_alloc_coherent(&vha->hw->pdev->dev,
+				sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+				&sp->u.iocb_cmd.u.ctarg.rsp_dma,
+	    GFP_KERNEL);
+	if (!sp->u.iocb_cmd.u.ctarg.rsp) {
+		ql_log(ql_log_warn, vha, 0xd041,
+		       "%s: Failed to allocate ct_sns response.\n",
+		       __func__);
+		goto done_free_sp;
+	}
 
 	/* CT_IU preamble  */
-	ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFF_ID_CMD,
-	    GFF_ID_RSP_SIZE);
+	ct_req = qla2x00_prep_ct_req(sp->u.iocb_cmd.u.ctarg.req, GFF_ID_CMD, GFF_ID_RSP_SIZE);
 
 	ct_req->req.gff_id.port_id[0] = fcport->d_id.b.domain;
 	ct_req->req.gff_id.port_id[1] = fcport->d_id.b.area;
 	ct_req->req.gff_id.port_id[2] = fcport->d_id.b.al_pa;
 
-	sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns;
-	sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma;
-	sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns;
-	sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma;
 	sp->u.iocb_cmd.u.ctarg.req_size = GFF_ID_REQ_SIZE;
 	sp->u.iocb_cmd.u.ctarg.rsp_size = GFF_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->done = qla24xx_async_gffid_sp_done;
-
-	ql_dbg(ql_dbg_disc, vha, 0x2132,
-	    "Async-%s hdl=%x  %8phC.\n", sp->name,
-	    sp->handle, fcport->port_name);
-
 	rval = qla2x00_start_sp(sp);
-	if (rval != QLA_SUCCESS)
-		goto done_free_sp;
 
-	return rval;
+	if (rval != QLA_SUCCESS) {
+		rval = QLA_FUNCTION_FAILED;
+		goto done_free_sp;
+	} else {
+		ql_dbg(ql_dbg_disc, vha, 0x3074,
+		       "Async-%s hdl=%x portid %06x\n",
+		       sp->name, sp->handle, fcport->d_id.b24);
+	}
+
+	wait_for_completion(sp->comp);
+	rval = sp->rc;
+
 done_free_sp:
-	sp->free(sp);
-	fcport->flags &= ~FCF_ASYNC_SENT;
+	if (sp->u.iocb_cmd.u.ctarg.req) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+				  sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+				  sp->u.iocb_cmd.u.ctarg.req,
+				  sp->u.iocb_cmd.u.ctarg.req_dma);
+		sp->u.iocb_cmd.u.ctarg.req = NULL;
+	}
+
+	if (sp->u.iocb_cmd.u.ctarg.rsp) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+				  sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+				  sp->u.iocb_cmd.u.ctarg.rsp,
+				  sp->u.iocb_cmd.u.ctarg.rsp_dma);
+		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+	}
+
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	return rval;
 }
 
@@ -3443,6 +3503,10 @@
 			list_for_each_entry(fcport, &vha->vp_fcports, list) {
 				if ((fcport->flags & FCF_FABRIC_DEVICE) != 0) {
 					fcport->scan_state = QLA_FCPORT_SCAN;
+					if (fcport->loop_id == FC_NO_LOOP_ID)
+						fcport->logout_on_delete = 0;
+					else
+						fcport->logout_on_delete = 1;
 				}
 			}
 			goto login_logout;
@@ -3494,7 +3558,16 @@
 				continue;
 			fcport->scan_state = QLA_FCPORT_FOUND;
 			fcport->last_rscn_gen = fcport->rscn_gen;
+			fcport->fc4_type = rp->fc4type;
 			found = true;
+
+			if (fcport->scan_needed) {
+				if (NVME_PRIORITY(vha->hw, fcport))
+					fcport->do_prli_nvme = 1;
+				else
+					fcport->do_prli_nvme = 0;
+			}
+
 			/*
 			 * If device was not a fabric device before.
 			 */
@@ -3556,13 +3629,14 @@
 				do_delete) {
 				if (fcport->loop_id != FC_NO_LOOP_ID) {
 					if (fcport->flags & FCF_FCP2_DEVICE)
-						fcport->logout_on_delete = 0;
+						continue;
 
-					ql_dbg(ql_dbg_disc, vha, 0x20f0,
-					    "%s %d %8phC post del sess\n",
-					    __func__, __LINE__,
-					    fcport->port_name);
+					ql_log(ql_log_warn, vha, 0x20f0,
+					       "%s %d %8phC post del sess\n",
+					       __func__, __LINE__,
+					       fcport->port_name);
 
+					fcport->tgt_link_down_time = 0;
 					qlt_schedule_sess_for_deletion(fcport);
 					continue;
 				}
@@ -3748,7 +3822,6 @@
 	    "Async done-%s res %x FC4Type %x\n",
 	    sp->name, res, sp->gen2);
 
-	del_timer(&sp->u.iocb_cmd.timer);
 	sp->rc = res;
 	if (res) {
 		unsigned long flags;
@@ -3873,9 +3946,8 @@
 	sp->name = "gnnft";
 	sp->gen1 = vha->hw->base_qpair->chip_reset;
 	sp->gen2 = fc4_type;
-
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_gpnft_gnnft_sp_done);
 
 	memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size);
 	memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size);
@@ -3891,8 +3963,6 @@
 	sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->done = qla2x00_async_gpnft_gnnft_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s hdl=%x FC4Type %x.\n", sp->name,
 	    sp->handle, ct_req->req.gpn_ft.port_type);
@@ -3919,8 +3989,8 @@
 		    sp->u.iocb_cmd.u.ctarg.rsp_dma);
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
-
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 
 	spin_lock_irqsave(&vha->work_lock, flags);
 	vha->scan.scan_flags &= ~SF_SCANNING;
@@ -3972,9 +4042,12 @@
 		ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff,
 		    "%s: Performing FCP Scan\n", __func__);
 
-		if (sp)
-			sp->free(sp); /* should not happen */
+		if (sp) {
+			/* ref: INIT */
+			kref_put(&sp->cmd_kref, qla2x00_sp_release);
+		}
 
+		/* ref: INIT */
 		sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 		if (!sp) {
 			spin_lock_irqsave(&vha->work_lock, flags);
@@ -4019,6 +4092,7 @@
 			    sp->u.iocb_cmd.u.ctarg.req,
 			    sp->u.iocb_cmd.u.ctarg.req_dma);
 			sp->u.iocb_cmd.u.ctarg.req = NULL;
+			/* ref: INIT */
 			qla2x00_rel_sp(sp);
 			return rval;
 		}
@@ -4038,9 +4112,8 @@
 	sp->name = "gpnft";
 	sp->gen1 = vha->hw->base_qpair->chip_reset;
 	sp->gen2 = fc4_type;
-
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_gpnft_gnnft_sp_done);
 
 	rspsz = sp->u.iocb_cmd.u.ctarg.rsp_size;
 	memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size);
@@ -4055,8 +4128,6 @@
 
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->done = qla2x00_async_gpnft_gnnft_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s hdl=%x FC4Type %x.\n", sp->name,
 	    sp->handle, ct_req->req.gpn_ft.port_type);
@@ -4084,7 +4155,8 @@
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 
 	spin_lock_irqsave(&vha->work_lock, flags);
 	vha->scan.scan_flags &= ~SF_SCANNING;
@@ -4148,7 +4220,8 @@
 
 	qla24xx_handle_gnnid_event(vha, &ea);
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -4161,6 +4234,7 @@
 		return rval;
 
 	qla2x00_set_fcport_disc_state(fcport, DSC_GNN_ID);
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
 	if (!sp)
 		goto done;
@@ -4170,9 +4244,8 @@
 	sp->name = "gnnid";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
-
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_gnnid_sp_done);
 
 	/* CT_IU preamble  */
 	ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GNN_ID_CMD,
@@ -4191,8 +4264,6 @@
 	sp->u.iocb_cmd.u.ctarg.rsp_size = GNN_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->done = qla2x00_async_gnnid_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n",
 	    sp->name, fcport->port_name,
@@ -4204,7 +4275,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
 	return rval;
@@ -4278,7 +4350,8 @@
 
 	qla24xx_handle_gfpnid_event(vha, &ea);
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -4290,6 +4363,7 @@
 	if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
 		return rval;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
 	if (!sp)
 		goto done;
@@ -4298,9 +4372,8 @@
 	sp->name = "gfpnid";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
-
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_gfpnid_sp_done);
 
 	/* CT_IU preamble  */
 	ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFPN_ID_CMD,
@@ -4319,8 +4392,6 @@
 	sp->u.iocb_cmd.u.ctarg.rsp_size = GFPN_ID_RSP_SIZE;
 	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
 
-	sp->done = qla2x00_async_gfpnid_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
 	    "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n",
 	    sp->name, fcport->port_name,
@@ -4333,7 +4404,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }

diff --git a/scst/qla2x00t-32gbit/qla_init.c b/scst/qla2x00t-32gbit/qla_init.c
index eecde32..25860c0 100644
--- a/scst/qla2x00t-32gbit/qla_init.c
+++ b/scst/qla2x00t-32gbit/qla_init.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_gbl.h"
@@ -35,7 +34,6 @@
 static struct qla_chip_state_84xx *qla84xx_get_chip(struct scsi_qla_host *);
 static int qla84xx_init_chip(scsi_qla_host_t *);
 static int qla25xx_init_queues(struct qla_hw_data *);
-static int qla24xx_post_prli_work(struct scsi_qla_host*, fc_port_t *);
 static void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha,
 				      struct event_arg *ea);
 static void qla24xx_handle_prli_done_event(struct scsi_qla_host *,
@@ -49,10 +47,20 @@
 {
 	srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer);
 	struct srb_iocb *iocb;
+	scsi_qla_host_t *vha = sp->vha;
 
 	WARN_ON(irqs_disabled());
 	iocb = &sp->u.iocb_cmd;
 	iocb->timeout(sp);
+
+	/* ref: TMR */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
+	if (vha && qla2x00_isp_reg_stat(vha->hw)) {
+		ql_log(ql_log_info, vha, 0x9008,
+		    "PCI/Register disconnect.\n");
+		qla_pci_set_eeh_busy(vha);
+	}
 }
 
 void qla2x00_sp_free(srb_t *sp)
@@ -63,6 +71,16 @@
 	qla2x00_rel_sp(sp);
 }
 
+void qla2xxx_rel_done_warning(srb_t *sp, int res)
+{
+	WARN_ONCE(1, "Calling done() of an already freed srb %p object\n", sp);
+}
+
+void qla2xxx_rel_free_warning(srb_t *sp)
+{
+	WARN_ONCE(1, "Calling free() of an already freed srb %p object\n", sp);
+}
+
 /* Asynchronous Login/Logout Routines -------------------------------------- */
 
 unsigned long
@@ -117,8 +135,13 @@
 	}
 	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
-	if (sp->cmd_sp)
+	if (sp->cmd_sp) {
+		/*
+		 * This done function should take care of
+		 * original command ref: INIT
+		 */
 		sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED);
+	}
 
 	abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT);
 	sp->done(sp, QLA_OS_TIMER_EXPIRED);
@@ -127,12 +150,16 @@
 static void qla24xx_abort_sp_done(srb_t *sp, int res)
 {
 	struct srb_iocb *abt = &sp->u.iocb_cmd;
+	srb_t *orig_sp = sp->cmd_sp;
 
-	del_timer(&sp->u.iocb_cmd.timer);
+	if (orig_sp)
+		qla_wait_nvme_release_cmd_kref(orig_sp);
+
 	if (sp->flags & SRB_WAKEUP_ON_COMP)
 		complete(&abt->u.abt.comp);
 	else
-		sp->free(sp);
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
@@ -142,11 +169,13 @@
 	srb_t *sp;
 	int rval = QLA_FUNCTION_FAILED;
 
+	/* ref: INIT for ABTS command */
 	sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
 				  GFP_ATOMIC);
 	if (!sp)
-		return rval;
+		return QLA_MEMORY_ALLOC_FAILED;
 
+	qla_vha_mark_busy(vha);
 	abt_iocb = &sp->u.iocb_cmd;
 	sp->type = SRB_ABT_CMD;
 	sp->name = "abort";
@@ -155,31 +184,31 @@
 	if (wait)
 		sp->flags = SRB_WAKEUP_ON_COMP;
 
-	abt_iocb->timeout = qla24xx_abort_iocb_timeout;
 	init_completion(&abt_iocb->u.abt.comp);
 	/* FW can send 2 x ABTS's timeout/20s */
-	qla2x00_init_timer(sp, 42);
+	qla2x00_init_async_sp(sp, 42, qla24xx_abort_sp_done);
+	sp->u.iocb_cmd.timeout = qla24xx_abort_iocb_timeout;
 
 	abt_iocb->u.abt.cmd_hndl = cmd_sp->handle;
 	abt_iocb->u.abt.req_que_no = cpu_to_le16(cmd_sp->qpair->req->id);
 
-	sp->done = qla24xx_abort_sp_done;
-
 	ql_dbg(ql_dbg_async, vha, 0x507c,
 	       "Abort command issued - hdl=%x, type=%x\n", cmd_sp->handle,
 	       cmd_sp->type);
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
-		sp->free(sp);
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 		return rval;
 	}
 
 	if (wait) {
 		wait_for_completion(&abt_iocb->u.abt.comp);
 		rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
-			QLA_SUCCESS : QLA_FUNCTION_FAILED;
-		sp->free(sp);
+			QLA_SUCCESS : QLA_ERR_FROM_FW;
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	}
 
 	return rval;
@@ -274,26 +303,13 @@
 		ea.iop[0] = lio->u.logio.iop[0];
 		ea.iop[1] = lio->u.logio.iop[1];
 		ea.sp = sp;
+		if (res)
+			ea.data[0] = MBS_COMMAND_ERROR;
 		qla24xx_handle_plogi_done_event(vha, &ea);
 	}
 
-	sp->free(sp);
-}
-
-static inline bool
-fcport_is_smaller(fc_port_t *fcport)
-{
-	if (wwn_to_u64(fcport->port_name) <
-	    wwn_to_u64(fcport->vha->port_name))
-		return true;
-	else
-		return false;
-}
-
-static inline bool
-fcport_is_bigger(fc_port_t *fcport)
-{
-	return !fcport_is_smaller(fcport);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int
@@ -312,6 +328,7 @@
 		return rval;
 	}
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -324,27 +341,33 @@
 	sp->name = "login";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_login_sp_done);
 
 	lio = &sp->u.iocb_cmd;
-	lio->timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
-
-	sp->done = qla2x00_async_login_sp_done;
-	if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport))
+	if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) {
 		lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY;
-	else
-		lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI;
+	} else {
+		if (vha->hw->flags.edif_enabled &&
+		    DBELL_ACTIVE(vha)) {
+			lio->u.logio.flags |=
+				(SRB_LOGIN_FCSP | SRB_LOGIN_SKIP_PRLI);
+		} else {
+			lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI;
+		}
+	}
 
 	if (NVME_TARGET(vha->hw, fcport))
 		lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI;
 
-	ql_dbg(ql_dbg_disc, vha, 0x2072,
-	    "Async-login - %8phC hdl=%x, loopid=%x portid=%02x%02x%02x "
-		"retries=%d.\n", fcport->port_name, sp->handle, fcport->loop_id,
-	    fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa,
-	    fcport->login_retry);
-
 	rval = qla2x00_start_sp(sp);
+
+	ql_dbg(ql_dbg_disc, vha, 0x2072,
+	       "Async-login - %8phC hdl=%x, loopid=%x portid=%06x retries=%d %s.\n",
+	       fcport->port_name, sp->handle, fcport->loop_id,
+	       fcport->d_id.b24, fcport->login_retry,
+	       lio->u.logio.flags & SRB_LOGIN_FCSP ? "FCSP" : "");
+
 	if (rval != QLA_SUCCESS) {
 		fcport->flags |= FCF_LOGIN_NEEDED;
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
@@ -354,7 +377,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
 	fcport->flags &= ~FCF_ASYNC_ACTIVE;
@@ -365,36 +389,33 @@
 {
 	sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	sp->fcport->login_gen++;
-	qlt_logo_completion_handler(sp->fcport, res);
-	sp->free(sp);
+	qlt_logo_completion_handler(sp->fcport, sp->u.iocb_cmd.u.logio.data[0]);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int
 qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	srb_t *sp;
-	struct srb_iocb *lio;
 	int rval = QLA_FUNCTION_FAILED;
 
 	fcport->flags |= FCF_ASYNC_SENT;
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_LOGOUT_CMD;
 	sp->name = "logout";
-
-	lio = &sp->u.iocb_cmd;
-	lio->timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
-
-	sp->done = qla2x00_async_logout_sp_done;
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_logout_sp_done),
 
 	ql_dbg(ql_dbg_disc, vha, 0x2070,
-	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC.\n",
+	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC explicit %d.\n",
 	    sp->handle, fcport->loop_id, fcport->d_id.b.domain,
 		fcport->d_id.b.area, fcport->d_id.b.al_pa,
-		fcport->port_name);
+		fcport->port_name, fcport->explicit_logout);
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS)
@@ -402,7 +423,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	return rval;
@@ -428,29 +450,26 @@
 	if (!test_bit(UNLOADING, &vha->dpc_flags))
 		qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport,
 		    lio->u.logio.data);
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int
 qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	srb_t *sp;
-	struct srb_iocb *lio;
 	int rval;
 
 	rval = QLA_FUNCTION_FAILED;
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_PRLO_CMD;
 	sp->name = "prlo";
-
-	lio = &sp->u.iocb_cmd;
-	lio->timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
-
-	sp->done = qla2x00_async_prlo_sp_done;
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_prlo_sp_done);
 
 	ql_dbg(ql_dbg_disc, vha, 0x2070,
 	    "Async-prlo - hdl=%x loop-id=%x portid=%02x%02x%02x.\n",
@@ -464,7 +483,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	fcport->flags &= ~FCF_ASYNC_ACTIVE;
 	return rval;
@@ -547,10 +567,12 @@
 	ea.iop[1] = lio->u.logio.iop[1];
 	ea.fcport = sp->fcport;
 	ea.sp = sp;
+	if (res)
+		ea.data[0] = MBS_COMMAND_ERROR;
 
 	qla24xx_handle_adisc_event(vha, &ea);
-
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int
@@ -561,26 +583,34 @@
 	struct srb_iocb *lio;
 	int rval = QLA_FUNCTION_FAILED;
 
+	if (IS_SESSION_DELETED(fcport)) {
+		ql_log(ql_log_warn, vha, 0xffff,
+		       "%s: %8phC is being delete - not sending command.\n",
+		       __func__, fcport->port_name);
+		fcport->flags &= ~FCF_ASYNC_ACTIVE;
+		return rval;
+	}
+
 	if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
 		return rval;
 
 	fcport->flags |= FCF_ASYNC_SENT;
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_ADISC_CMD;
 	sp->name = "adisc";
-
-	lio = &sp->u.iocb_cmd;
-	lio->timeout = qla2x00_async_iocb_timeout;
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_adisc_sp_done);
 
-	sp->done = qla2x00_async_adisc_sp_done;
-	if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
+	if (data[1] & QLA_LOGIO_LOGIN_RETRIED) {
+		lio = &sp->u.iocb_cmd;
 		lio->u.logio.flags |= SRB_LOGIN_RETRIED;
+	}
 
 	ql_dbg(ql_dbg_disc, vha, 0x206f,
 	    "Async-adisc - hdl=%x loopid=%x portid=%06x %8phC.\n",
@@ -593,7 +623,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	qla2x00_post_async_adisc_work(vha, fcport, data);
@@ -679,11 +710,11 @@
 
 	fcport = ea->fcport;
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
-	    "%s %8phC DS %d LS rc %d %d login %d|%d rscn %d|%d lid %d\n",
+	    "%s %8phC DS %d LS rc %d %d login %d|%d rscn %d|%d lid %d edif %d\n",
 	    __func__, fcport->port_name, fcport->disc_state,
 	    fcport->fw_login_state, ea->rc,
 	    fcport->login_gen, fcport->last_login_gen,
-	    fcport->rscn_gen, fcport->last_rscn_gen, vha->loop_id);
+	    fcport->rscn_gen, fcport->last_rscn_gen, vha->loop_id, fcport->edif.enable);
 
 	if (fcport->disc_state == DSC_DELETE_PEND)
 		return;
@@ -705,6 +736,7 @@
 		ql_dbg(ql_dbg_disc, vha, 0x20e0,
 		    "%s %8phC login gen changed\n",
 		    __func__, fcport->port_name);
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		return;
 	}
 
@@ -796,7 +828,7 @@
 		default:
 			switch (current_login_state) {
 			case DSC_LS_PRLI_COMP:
-				ql_dbg(ql_dbg_disc + ql_dbg_verbose,
+				ql_dbg(ql_dbg_disc,
 				    vha, 0x20e4, "%s %d %8phC post gpdb\n",
 				    __func__, __LINE__, fcport->port_name);
 
@@ -808,6 +840,13 @@
 				qla2x00_post_async_adisc_work(vha, fcport,
 				    data);
 				break;
+			case DSC_LS_PLOGI_COMP:
+				if (vha->hw->flags.edif_enabled) {
+					/* check to see if App support Secure */
+					qla24xx_post_gpdb_work(vha, fcport, 0);
+					break;
+				}
+				fallthrough;
 			case DSC_LS_PORT_UNAVAIL:
 			default:
 				if (fcport->loop_id == FC_NO_LOOP_ID) {
@@ -835,6 +874,7 @@
 				 */
 				qla2x00_set_fcport_disc_state(fcport,
 				    DSC_DELETED);
+				set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 				break;
 			case DSC_LS_PRLI_COMP:
 				if ((e->prli_svc_param_word_3[0] & BIT_4) == 0)
@@ -847,6 +887,12 @@
 				    data);
 				break;
 			case DSC_LS_PLOGI_COMP:
+				if (vha->hw->flags.edif_enabled &&
+				    DBELL_ACTIVE(vha)) {
+					/* check to see if App support secure or not */
+					qla24xx_post_gpdb_work(vha, fcport, 0);
+					break;
+				}
 				if (fcport_is_bigger(fcport)) {
 					/* local adapter is smaller */
 					if (fcport->loop_id != FC_NO_LOOP_ID)
@@ -944,6 +990,9 @@
 				set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 			}
 			break;
+		case ISP_CFG_NL:
+			qla24xx_fcport_handle_login(vha, fcport);
+			break;
 		default:
 			break;
 		}
@@ -967,8 +1016,6 @@
 	    sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1],
 	    sp->u.iocb_cmd.u.mbx.in_mb[2]);
 
-	if (res == QLA_FUNCTION_TIMEOUT)
-		return;
 
 	sp->fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE);
 	memset(&ea, 0, sizeof(ea));
@@ -1006,8 +1053,8 @@
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
 	list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
-		list_del_init(&fcport->gnl_entry);
 		spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+		list_del_init(&fcport->gnl_entry);
 		fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 		ea.fcport = fcport;
@@ -1061,13 +1108,13 @@
 	}
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	srb_t *sp;
-	struct srb_iocb *mbx;
 	int rval = QLA_FUNCTION_FAILED;
 	unsigned long flags;
 	u16 *mb;
@@ -1092,6 +1139,7 @@
 	vha->gnl.sent = 1;
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -1100,10 +1148,8 @@
 	sp->name = "gnlist";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
-
-	mbx = &sp->u.iocb_cmd;
-	mbx->timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla24xx_async_gnl_sp_done);
 
 	mb = sp->u.iocb_cmd.u.mbx.out_mb;
 	mb[0] = MBC_PORT_NODE_NAME_LIST;
@@ -1115,8 +1161,6 @@
 	mb[8] = vha->gnl.size;
 	mb[9] = vha->vp_idx;
 
-	sp->done = qla24xx_async_gnl_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0x20da,
 	    "Async-%s - OUT WWPN %8phC hndl %x\n",
 	    sp->name, fcport->port_name, sp->handle);
@@ -1128,7 +1172,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	fcport->flags &= ~(FCF_ASYNC_ACTIVE | FCF_ASYNC_SENT);
 	return rval;
@@ -1174,13 +1219,16 @@
 	dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
 		sp->u.iocb_cmd.u.mbx.in_dma);
 
-	sp->free(sp);
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
-static int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	struct qla_work_evt *e;
 
+	if (vha->host->active_mode == MODE_TARGET)
+		return QLA_FUNCTION_FAILED;
+
 	e = qla2x00_alloc_work(vha, QLA_EVT_PRLI);
 	if (!e)
 		return QLA_FUNCTION_FAILED;
@@ -1197,7 +1245,7 @@
 	struct event_arg ea;
 
 	ql_dbg(ql_dbg_disc, vha, 0x2129,
-	    "%s %8phC res %d \n", __func__,
+	    "%s %8phC res %x\n", __func__,
 	    sp->fcport->port_name, res);
 
 	sp->fcport->flags &= ~FCF_ASYNC_SENT;
@@ -1210,11 +1258,15 @@
 		ea.iop[0] = lio->u.logio.iop[0];
 		ea.iop[1] = lio->u.logio.iop[1];
 		ea.sp = sp;
+		if (res == QLA_OS_TIMER_EXPIRED)
+			ea.data[0] = QLA_OS_TIMER_EXPIRED;
+		else if (res)
+			ea.data[0] = MBS_COMMAND_ERROR;
 
 		qla24xx_handle_prli_done_event(vha, &ea);
 	}
 
-	sp->free(sp);
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int
@@ -1247,21 +1299,20 @@
 
 	sp->type = SRB_PRLI_CMD;
 	sp->name = "prli";
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_prli_sp_done);
 
 	lio = &sp->u.iocb_cmd;
-	lio->timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
-
-	sp->done = qla2x00_async_prli_sp_done;
 	lio->u.logio.flags = 0;
 
 	if (NVME_TARGET(vha->hw, fcport))
 		lio->u.logio.flags |= SRB_LOGIN_NVME_PRLI;
 
 	ql_dbg(ql_dbg_disc, vha, 0x211b,
-	    "Async-prli - %8phC hdl=%x, loopid=%x portid=%06x retries=%d %s.\n",
+	    "Async-prli - %8phC hdl=%x, loopid=%x portid=%06x retries=%d fc4type %x priority %x %s.\n",
 	    fcport->port_name, sp->handle, fcport->loop_id, fcport->d_id.b24,
-	    fcport->login_retry, NVME_TARGET(vha->hw, fcport) ? "nvme" : "fc");
+	    fcport->login_retry, fcport->fc4_type, vha->hw->fc4_type_priority,
+	    NVME_TARGET(vha->hw, fcport) ? "nvme" : "fcp");
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
@@ -1273,7 +1324,8 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
@@ -1302,14 +1354,21 @@
 	struct port_database_24xx *pd;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) ||
-	    fcport->loop_id == FC_NO_LOOP_ID) {
+	if (IS_SESSION_DELETED(fcport)) {
 		ql_log(ql_log_warn, vha, 0xffff,
-		    "%s: %8phC - not sending command.\n",
-		    __func__, fcport->port_name);
+		       "%s: %8phC is being delete - not sending command.\n",
+		       __func__, fcport->port_name);
+		fcport->flags &= ~FCF_ASYNC_ACTIVE;
 		return rval;
 	}
 
+	if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) {
+		ql_log(ql_log_warn, vha, 0xffff,
+		    "%s: %8phC online %d flags %x - not sending command.\n",
+		    __func__, fcport->port_name, vha->flags.online, fcport->flags);
+		goto done;
+	}
+
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -1321,10 +1380,8 @@
 	sp->name = "gpdb";
 	sp->gen1 = fcport->rscn_gen;
 	sp->gen2 = fcport->login_gen;
-
-	mbx = &sp->u.iocb_cmd;
-	mbx->timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla24xx_async_gpdb_sp_done);
 
 	pd = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
 	if (pd == NULL) {
@@ -1343,11 +1400,10 @@
 	mb[9] = vha->vp_idx;
 	mb[10] = opt;
 
-	mbx->u.mbx.in = pd;
+	mbx = &sp->u.iocb_cmd;
+	mbx->u.mbx.in = (void *)pd;
 	mbx->u.mbx.in_dma = pd_dma;
 
-	sp->done = qla24xx_async_gpdb_sp_done;
-
 	ql_dbg(ql_dbg_disc, vha, 0x20dc,
 	    "Async-%s %8phC hndl %x opt %x\n",
 	    sp->name, fcport->port_name, sp->handle, opt);
@@ -1361,7 +1417,7 @@
 	if (pd)
 		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
 
-	sp->free(sp);
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
 	fcport->flags &= ~FCF_ASYNC_ACTIVE;
@@ -1400,6 +1456,56 @@
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 }
 
+static int	qla_chk_secure_login(scsi_qla_host_t	*vha, fc_port_t *fcport,
+	struct port_database_24xx *pd)
+{
+	int rc = 0;
+
+	if (pd->secure_login) {
+		ql_dbg(ql_dbg_disc, vha, 0x104d,
+		    "Secure Login established on %8phC\n",
+		    fcport->port_name);
+		fcport->flags |= FCF_FCSP_DEVICE;
+	} else {
+		ql_dbg(ql_dbg_disc, vha, 0x104d,
+		    "non-Secure Login %8phC",
+		    fcport->port_name);
+		fcport->flags &= ~FCF_FCSP_DEVICE;
+	}
+	if (vha->hw->flags.edif_enabled) {
+		if (fcport->flags & FCF_FCSP_DEVICE) {
+			qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_AUTH_PEND);
+			/* Start edif prli timer & ring doorbell for app */
+			fcport->edif.rx_sa_set = 0;
+			fcport->edif.tx_sa_set = 0;
+			fcport->edif.rx_sa_pending = 0;
+			fcport->edif.tx_sa_pending = 0;
+
+			qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+			    fcport->d_id.b24);
+
+			if (DBELL_ACTIVE(vha)) {
+				ql_dbg(ql_dbg_disc, vha, 0x20ef,
+				    "%s %d %8phC EDIF: post DB_AUTH: AUTH needed\n",
+				    __func__, __LINE__, fcport->port_name);
+				fcport->edif.app_sess_online = 1;
+
+				qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_NEEDED,
+				    fcport->d_id.b24, 0, fcport);
+			}
+
+			rc = 1;
+		} else if (qla_ini_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
+			ql_dbg(ql_dbg_disc, vha, 0x2117,
+			    "%s %d %8phC post prli\n",
+			    __func__, __LINE__, fcport->port_name);
+			qla24xx_post_prli_work(vha, fcport);
+			rc = 1;
+		}
+	}
+	return rc;
+}
+
 static
 void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 {
@@ -1413,12 +1519,15 @@
 	fcport->flags &= ~FCF_ASYNC_SENT;
 
 	ql_dbg(ql_dbg_disc, vha, 0x20d2,
-	    "%s %8phC DS %d LS %d fc4_type %x rc %d\n", __func__,
+	    "%s %8phC DS %d LS %x fc4_type %x rc %x\n", __func__,
 	    fcport->port_name, fcport->disc_state, pd->current_login_state,
 	    fcport->fc4_type, ea->rc);
 
-	if (fcport->disc_state == DSC_DELETE_PEND)
+	if (fcport->disc_state == DSC_DELETE_PEND) {
+		ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC\n",
+		       __func__, __LINE__, fcport->port_name);
 		return;
+	}
 
 	if (NVME_TARGET(vha->hw, fcport))
 		ls = pd->current_login_state >> 4;
@@ -1435,6 +1544,8 @@
 	} else if (ea->sp->gen1 != fcport->rscn_gen) {
 		qla_rscn_replay(fcport);
 		qlt_schedule_sess_for_deletion(fcport);
+		ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC, ls %x\n",
+		       __func__, __LINE__, fcport->port_name, ls);
 		return;
 	}
 
@@ -1442,8 +1553,14 @@
 	case PDS_PRLI_COMPLETE:
 		__qla24xx_parse_gpdb(vha, fcport, pd);
 		break;
-	case PDS_PLOGI_PENDING:
 	case PDS_PLOGI_COMPLETE:
+		if (qla_chk_secure_login(vha, fcport, pd)) {
+			ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC, ls %x\n",
+			       __func__, __LINE__, fcport->port_name, ls);
+			return;
+		}
+		fallthrough;
+	case PDS_PLOGI_PENDING:
 	case PDS_PRLI_PENDING:
 	case PDS_PRLI2_PENDING:
 		/* Set discovery state back to GNL to Relogin attempt */
@@ -1452,6 +1569,8 @@
 			qla2x00_set_fcport_disc_state(fcport, DSC_GNL);
 			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		}
+		ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC, ls %x\n",
+		       __func__, __LINE__, fcport->port_name, ls);
 		return;
 	case PDS_LOGO_PENDING:
 	case PDS_PORT_UNAVAILABLE:
@@ -1469,6 +1588,11 @@
 	u8 login = 0;
 	int rc;
 
+	ql_dbg(ql_dbg_disc, vha, 0x307b,
+	    "%s %8phC DS %d LS %d lid %d retries=%d\n",
+	    __func__, fcport->port_name, fcport->disc_state,
+	    fcport->fw_login_state, fcport->loop_id, fcport->login_retry);
+
 	if (qla_tgt_mode_enabled(vha))
 		return;
 
@@ -1520,13 +1644,15 @@
 	u16 sec;
 
 	ql_dbg(ql_dbg_disc, vha, 0x20d8,
-	    "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d lid %d scan %d\n",
+	    "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d lid %d scan %d fc4type %x\n",
 	    __func__, fcport->port_name, fcport->disc_state,
 	    fcport->fw_login_state, fcport->login_pause, fcport->flags,
 	    fcport->conflict, fcport->last_rscn_gen, fcport->rscn_gen,
-	    fcport->login_gen, fcport->loop_id, fcport->scan_state);
+	    fcport->login_gen, fcport->loop_id, fcport->scan_state,
+	    fcport->fc4_type);
 
-	if (fcport->scan_state != QLA_FCPORT_FOUND)
+	if (fcport->scan_state != QLA_FCPORT_FOUND ||
+	    fcport->disc_state == DSC_DELETE_PEND)
 		return 0;
 
 	if ((fcport->loop_id != FC_NO_LOOP_ID) &&
@@ -1547,7 +1673,7 @@
 	if (vha->host->active_mode == MODE_TARGET && !N2N_TOPO(vha->hw))
 		return 0;
 
-	if (fcport->flags & FCF_ASYNC_SENT) {
+	if (fcport->flags & (FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE)) {
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		return 0;
 	}
@@ -1644,8 +1770,16 @@
 		break;
 
 	case DSC_LOGIN_PEND:
-		if (fcport->fw_login_state == DSC_LS_PLOGI_COMP)
+		if (vha->hw->flags.edif_enabled)
+			break;
+
+		if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+			ql_dbg(ql_dbg_disc, vha, 0x2118,
+			       "%s %d %8phC post %s PRLI\n",
+			       __func__, __LINE__, fcport->port_name,
+			       NVME_TARGET(vha->hw, fcport) ? "NVME" : "FC");
 			qla24xx_post_prli_work(vha, fcport);
+		}
 		break;
 
 	case DSC_UPD_FCPORT:
@@ -1695,10 +1829,76 @@
 	fc_port_t *fcport;
 	unsigned long flags;
 
-	fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
-	if (fcport) {
-		fcport->scan_needed = 1;
-		fcport->rscn_gen++;
+	switch (ea->id.b.rsvd_1) {
+	case RSCN_PORT_ADDR:
+		fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+		if (fcport) {
+			if (fcport->flags & FCF_FCP2_DEVICE &&
+			    atomic_read(&fcport->state) == FCS_ONLINE) {
+				ql_dbg(ql_dbg_disc, vha, 0x2115,
+				       "Delaying session delete for FCP2 portid=%06x %8phC ",
+					fcport->d_id.b24, fcport->port_name);
+				return;
+			}
+
+			if (vha->hw->flags.edif_enabled && DBELL_ACTIVE(vha)) {
+				/*
+				 * On ipsec start by remote port, Target port
+				 * may use RSCN to trigger initiator to
+				 * relogin. If driver is already in the
+				 * process of a relogin, then ignore the RSCN
+				 * and allow the current relogin to continue.
+				 * This reduces thrashing of the connection.
+				 */
+				if (atomic_read(&fcport->state) == FCS_ONLINE) {
+					/*
+					 * If state = online, then set scan_needed=1 to do relogin.
+					 * Otherwise we're already in the middle of a relogin
+					 */
+					fcport->scan_needed = 1;
+					fcport->rscn_gen++;
+				}
+			} else {
+				fcport->scan_needed = 1;
+				fcport->rscn_gen++;
+			}
+		}
+		break;
+	case RSCN_AREA_ADDR:
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->flags & FCF_FCP2_DEVICE &&
+			    atomic_read(&fcport->state) == FCS_ONLINE)
+				continue;
+
+			if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) {
+				fcport->scan_needed = 1;
+				fcport->rscn_gen++;
+			}
+		}
+		break;
+	case RSCN_DOM_ADDR:
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->flags & FCF_FCP2_DEVICE &&
+			    atomic_read(&fcport->state) == FCS_ONLINE)
+				continue;
+
+			if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) {
+				fcport->scan_needed = 1;
+				fcport->rscn_gen++;
+			}
+		}
+		break;
+	case RSCN_FAB_ADDR:
+	default:
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->flags & FCF_FCP2_DEVICE &&
+			    atomic_read(&fcport->state) == FCS_ONLINE)
+				continue;
+
+			fcport->scan_needed = 1;
+			fcport->rscn_gen++;
+		}
+		break;
 	}
 
 	spin_lock_irqsave(&vha->work_lock, flags);
@@ -1740,6 +1940,13 @@
 void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
 				      struct event_arg *ea)
 {
+	if (N2N_TOPO(vha->hw) && fcport_is_smaller(ea->fcport) &&
+	    vha->hw->flags.edif_enabled) {
+		/* check to see if App support Secure */
+		qla24xx_post_gpdb_work(vha, ea->fcport, 0);
+		return;
+	}
+
 	/* for pure Target Mode, PRLI will not be initiated */
 	if (vha->host->active_mode == MODE_TARGET)
 		return;
@@ -1813,22 +2020,22 @@
 	srb_t *sp;
 	int rval = QLA_FUNCTION_FAILED;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
-	tm_iocb = &sp->u.iocb_cmd;
+	qla_vha_mark_busy(vha);
 	sp->type = SRB_TM_CMD;
 	sp->name = "tmf";
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha),
+			      qla2x00_tmf_sp_done);
+	sp->u.iocb_cmd.timeout = qla2x00_tmf_iocb_timeout;
 
-	tm_iocb->timeout = qla2x00_tmf_iocb_timeout;
+	tm_iocb = &sp->u.iocb_cmd;
 	init_completion(&tm_iocb->u.tmf.comp);
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha));
-
 	tm_iocb->u.tmf.flags = flags;
 	tm_iocb->u.tmf.lun = lun;
-	tm_iocb->u.tmf.data = tag;
-	sp->done = qla2x00_tmf_sp_done;
 
 	ql_dbg(ql_dbg_taskm, vha, 0x802f,
 	    "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x.\n",
@@ -1858,7 +2065,8 @@
 	}
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	fcport->flags &= ~FCF_ASYNC_SENT;
 done:
 	return rval;
@@ -1884,7 +2092,7 @@
 
 	if (handle == req->num_outstanding_cmds) {
 		/* Command not found. */
-		return QLA_FUNCTION_FAILED;
+		return QLA_ERR_NOT_FOUND;
 	}
 	if (sp->type == SRB_FXIOCB_DCMD)
 		return qlafx00_fx_disc(vha, &vha->hw->mr.fcport,
@@ -1896,6 +2104,7 @@
 static void
 qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 {
+	struct srb *sp;
 	WARN_ONCE(!qla2xxx_is_valid_mbs(ea->data[0]), "mbs: %#x\n",
 		  ea->data[0]);
 
@@ -1916,33 +2125,58 @@
 		qla24xx_post_gpdb_work(vha, ea->fcport, 0);
 		break;
 	default:
-		if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) &&
-		    (ea->iop[1] == 0x50000)) {   /* reson 5=busy expl:0x0 */
-			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-			ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
-			break;
-		}
+		sp = ea->sp;
+		ql_dbg(ql_dbg_disc, vha, 0x2118,
+		       "%s %d %8phC priority %s, fc4type %x prev try %s\n",
+		       __func__, __LINE__, ea->fcport->port_name,
+		       vha->hw->fc4_type_priority == FC4_PRIORITY_FCP ?
+		       "FCP" : "NVMe", ea->fcport->fc4_type,
+		       (sp->u.iocb_cmd.u.logio.flags & SRB_LOGIN_NVME_PRLI) ?
+			"NVME" : "FCP");
 
-		/*
-		 * Retry PRLI with other FC-4 type if failure occurred on dual
-		 * FCP/NVMe port
-		 */
 		if (NVME_FCP_TARGET(ea->fcport)) {
-			ql_dbg(ql_dbg_disc, vha, 0x2118,
-				"%s %d %8phC post %s prli\n",
-				__func__, __LINE__, ea->fcport->port_name,
-				(ea->fcport->fc4_type & FS_FC4TYPE_NVME) ?
-				"NVMe" : "FCP");
-			if (vha->hw->fc4_type_priority == FC4_PRIORITY_NVME)
-				ea->fcport->fc4_type &= ~FS_FC4TYPE_NVME;
+			if (sp->u.iocb_cmd.u.logio.flags & SRB_LOGIN_NVME_PRLI)
+				ea->fcport->do_prli_nvme = 0;
 			else
-				ea->fcport->fc4_type &= ~FS_FC4TYPE_FCP;
+				ea->fcport->do_prli_nvme = 1;
+		} else {
+			ea->fcport->do_prli_nvme = 0;
 		}
 
-		ea->fcport->flags &= ~FCF_ASYNC_SENT;
-		ea->fcport->keep_nport_handle = 0;
-		ea->fcport->logout_on_delete = 1;
-		qlt_schedule_sess_for_deletion(ea->fcport);
+		if (N2N_TOPO(vha->hw)) {
+			if (ea->fcport->n2n_link_reset_cnt ==
+			    vha->hw->login_retry_count &&
+			    ea->fcport->flags & FCF_FCSP_DEVICE) {
+				/* remote authentication app just started */
+				ea->fcport->n2n_link_reset_cnt = 0;
+			}
+
+			if (ea->fcport->n2n_link_reset_cnt <
+			    vha->hw->login_retry_count) {
+				ea->fcport->n2n_link_reset_cnt++;
+				vha->relogin_jif = jiffies + 2 * HZ;
+				/*
+				 * PRLI failed. Reset link to kick start
+				 * state machine
+				 */
+				set_bit(N2N_LINK_RESET, &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
+			} else {
+				ql_log(ql_log_warn, vha, 0x2119,
+				       "%s %d %8phC Unable to reconnect\n",
+				       __func__, __LINE__,
+				       ea->fcport->port_name);
+			}
+		} else {
+			/*
+			 * switch connect. login failed. Take connection down
+			 * and allow relogin to retrigger
+			 */
+			ea->fcport->flags &= ~FCF_ASYNC_SENT;
+			ea->fcport->keep_nport_handle = 0;
+			ea->fcport->logout_on_delete = 1;
+			qlt_schedule_sess_for_deletion(ea->fcport);
+		}
 		break;
 	}
 }
@@ -2003,38 +2237,45 @@
 		 * force a relogin attempt via implicit LOGO, PLOGI, and PRLI
 		 * requests.
 		 */
-		if (NVME_TARGET(vha->hw, ea->fcport)) {
-			ql_dbg(ql_dbg_disc, vha, 0x2117,
-				"%s %d %8phC post prli\n",
-				__func__, __LINE__, ea->fcport->port_name);
-			qla24xx_post_prli_work(vha, ea->fcport);
-		} else {
-			ql_dbg(ql_dbg_disc, vha, 0x20ea,
-			    "%s %d %8phC LoopID 0x%x in use with %06x. post gpdb\n",
-			    __func__, __LINE__, ea->fcport->port_name,
-			    ea->fcport->loop_id, ea->fcport->d_id.b24);
-
+		if (vha->hw->flags.edif_enabled) {
 			set_bit(ea->fcport->loop_id, vha->hw->loop_id_map);
 			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 			ea->fcport->chip_reset = vha->hw->base_qpair->chip_reset;
 			ea->fcport->logout_on_delete = 1;
 			ea->fcport->send_els_logo = 0;
-			ea->fcport->fw_login_state = DSC_LS_PRLI_COMP;
+			ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
 			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
 			qla24xx_post_gpdb_work(vha, ea->fcport, 0);
+		} else {
+			if (NVME_TARGET(vha->hw, fcport)) {
+				ql_dbg(ql_dbg_disc, vha, 0x2117,
+				    "%s %d %8phC post prli\n",
+				    __func__, __LINE__, fcport->port_name);
+				qla24xx_post_prli_work(vha, fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0x20ea,
+				    "%s %d %8phC LoopID 0x%x in use with %06x. post gpdb\n",
+				    __func__, __LINE__, fcport->port_name,
+				    fcport->loop_id, fcport->d_id.b24);
+
+				set_bit(fcport->loop_id, vha->hw->loop_id_map);
+				spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+				fcport->chip_reset = vha->hw->base_qpair->chip_reset;
+				fcport->logout_on_delete = 1;
+				fcport->send_els_logo = 0;
+				fcport->fw_login_state = DSC_LS_PRLI_COMP;
+				spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+				qla24xx_post_gpdb_work(vha, fcport, 0);
+			}
 		}
 		break;
 	case MBS_COMMAND_ERROR:
 		ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n",
 		    __func__, __LINE__, ea->fcport->port_name, ea->data[1]);
 
-		ea->fcport->flags &= ~FCF_ASYNC_SENT;
-		qla2x00_set_fcport_disc_state(ea->fcport, DSC_LOGIN_FAILED);
-		if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED)
-			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-		else
-			qla2x00_mark_device_lost(vha, ea->fcport, 1);
+		qlt_schedule_sess_for_deletion(ea->fcport);
 		break;
 	case MBS_LOOP_ID_USED:
 		/* data[1] = IO PARAM 1 = nport ID  */
@@ -2257,6 +2498,9 @@
 	    ha->fc4_type_priority != FC4_PRIORITY_NVME)
 		ha->fc4_type_priority = FC4_PRIORITY_FCP;
 
+	/* BVA: Ignore the NVRAM configuration and force the default to FCP. */
+	ha->fc4_type_priority = FC4_PRIORITY_FCP;
+
 	ql_log(ql_log_info, vha, 0xffff, "FC4 priority set to %s\n",
 	       ha->fc4_type_priority == FC4_PRIORITY_FCP ? "FCP" : "NVMe");
 
@@ -2720,6 +2964,49 @@
 	return qla81xx_write_mpi_register(vha, mb);
 }
 
+static int
+qla_chk_risc_recovery(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
+	__le16 __iomem *mbptr = &reg->mailbox0;
+	int i;
+	u16 mb[32];
+	int rc = QLA_SUCCESS;
+
+	if (!IS_QLA27XX(ha) && !IS_QLA28XX(ha))
+		return rc;
+
+	/* this check is only valid after RISC reset */
+	mb[0] = rd_reg_word(mbptr);
+	mbptr++;
+	if (mb[0] == 0xf) {
+		rc = QLA_FUNCTION_FAILED;
+
+		for (i = 1; i < 32; i++) {
+			mb[i] = rd_reg_word(mbptr);
+			mbptr++;
+		}
+
+		ql_log(ql_log_warn, vha, 0x1015,
+		       "RISC reset failed. mb[0-7] %04xh %04xh %04xh %04xh %04xh %04xh %04xh %04xh\n",
+		       mb[0], mb[1], mb[2], mb[3], mb[4], mb[5], mb[6], mb[7]);
+		ql_log(ql_log_warn, vha, 0x1015,
+		       "RISC reset failed. mb[8-15] %04xh %04xh %04xh %04xh %04xh %04xh %04xh %04xh\n",
+		       mb[8], mb[9], mb[10], mb[11], mb[12], mb[13], mb[14],
+		       mb[15]);
+		ql_log(ql_log_warn, vha, 0x1015,
+		       "RISC reset failed. mb[16-23] %04xh %04xh %04xh %04xh %04xh %04xh %04xh %04xh\n",
+		       mb[16], mb[17], mb[18], mb[19], mb[20], mb[21], mb[22],
+		       mb[23]);
+		ql_log(ql_log_warn, vha, 0x1015,
+		       "RISC reset failed. mb[24-31] %04xh %04xh %04xh %04xh %04xh %04xh %04xh %04xh\n",
+		       mb[24], mb[25], mb[26], mb[27], mb[28], mb[29], mb[30],
+		       mb[31]);
+	}
+	return rc;
+}
+
 /**
  * qla24xx_reset_risc() - Perform full reset of ISP24xx RISC.
  * @vha: HA context
@@ -2736,6 +3023,7 @@
 	uint16_t wd;
 	static int abts_cnt; /* ISP abort retry counts */
 	int rval = QLA_SUCCESS;
+	int print = 1;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
@@ -2824,17 +3112,26 @@
 	rd_reg_dword(&reg->hccr);
 
 	wrt_reg_dword(&reg->hccr, HCCRX_CLR_RISC_RESET);
+	mdelay(10);
 	rd_reg_dword(&reg->hccr);
 
-	rd_reg_word(&reg->mailbox0);
-	for (cnt = 60; rd_reg_word(&reg->mailbox0) != 0 &&
-	    rval == QLA_SUCCESS; cnt--) {
+	wd = rd_reg_word(&reg->mailbox0);
+	for (cnt = 300; wd != 0 && rval == QLA_SUCCESS; cnt--) {
 		barrier();
-		if (cnt)
-			udelay(5);
-		else
+		if (cnt) {
+			mdelay(1);
+			if (print && qla_chk_risc_recovery(vha))
+				print = 0;
+
+			wd = rd_reg_word(&reg->mailbox0);
+		} else {
 			rval = QLA_FUNCTION_TIMEOUT;
+
+			ql_log(ql_log_warn, vha, 0x015e,
+			       "RISC reset timeout\n");
+		}
 	}
+
 	if (rval == QLA_SUCCESS)
 		set_bit(RISC_RDY_AFT_RESET, &ha->fw_dump_cap_flags);
 
@@ -3224,6 +3521,14 @@
 	struct rsp_que *rsp = ha->rsp_q_map[0];
 	struct qla2xxx_fw_dump *fw_dump;
 
+	if (ha->fw_dump) {
+		ql_dbg(ql_dbg_init, vha, 0x00bd,
+		    "Firmware dump already allocated.\n");
+		return;
+	}
+
+	ha->fw_dumped = 0;
+	ha->fw_dump_cap_flags = 0;
 	dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0;
 	req_q_size = rsp_q_size = 0;
 
@@ -3234,7 +3539,7 @@
 		mem_size = (ha->fw_memory_size - 0x11000 + 1) *
 		    sizeof(uint16_t);
 	} else if (IS_FWI2_CAPABLE(ha)) {
-		if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
+		if (IS_QLA83XX(ha))
 			fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem);
 		else if (IS_QLA81XX(ha))
 			fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem);
@@ -3246,8 +3551,7 @@
 		mem_size = (ha->fw_memory_size - 0x100000 + 1) *
 		    sizeof(uint32_t);
 		if (ha->mqenable) {
-			if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) &&
-			    !IS_QLA28XX(ha))
+			if (!IS_QLA83XX(ha))
 				mq_size = sizeof(struct qla2xxx_mq_chain);
 			/*
 			 * Allocate maximum buffer size for all queues - Q0.
@@ -3288,6 +3592,8 @@
 			    j, fwdt->dump_size);
 			dump_size += fwdt->dump_size;
 		}
+		/* Add space for spare MPI fw dump. */
+		dump_size += ha->fwdt[1].dump_size;
 	} else {
 		req_q_size = req->length * sizeof(request_t);
 		rsp_q_size = rsp->length * sizeof(response_t);
@@ -3327,8 +3633,7 @@
 				    "Re-Allocated (%d KB) and save firmware dump.\n",
 				    dump_size / 1024);
 			} else {
-				if (ha->fw_dump)
-					vfree(ha->fw_dump);
+				vfree(ha->fw_dump);
 				ha->fw_dump = fw_dump;
 
 				ha->fw_dump_len = ha->fw_dump_alloc_len =
@@ -3622,6 +3927,31 @@
 	return ha->flags.lr_detected;
 }
 
+void qla_init_iocb_limit(scsi_qla_host_t *vha)
+{
+	u16 i, num_qps;
+	u32 limit;
+	struct qla_hw_data *ha = vha->hw;
+
+	num_qps = ha->num_qpairs + 1;
+	limit = (ha->orig_fw_iocb_count * QLA_IOCB_PCT_LIMIT) / 100;
+
+	ha->base_qpair->fwres.iocbs_total = ha->orig_fw_iocb_count;
+	ha->base_qpair->fwres.iocbs_limit = limit;
+	ha->base_qpair->fwres.iocbs_qp_limit = limit / num_qps;
+	ha->base_qpair->fwres.iocbs_used = 0;
+	for (i = 0; i < ha->max_qpairs; i++) {
+		if (ha->queue_pair_map[i])  {
+			ha->queue_pair_map[i]->fwres.iocbs_total =
+				ha->orig_fw_iocb_count;
+			ha->queue_pair_map[i]->fwres.iocbs_limit = limit;
+			ha->queue_pair_map[i]->fwres.iocbs_qp_limit =
+				limit / num_qps;
+			ha->queue_pair_map[i]->fwres.iocbs_used = 0;
+		}
+	}
+}
+
 /**
  * qla2x00_setup_chip() - Load and start RISC firmware.
  * @vha: HA context
@@ -3690,9 +4020,7 @@
 					goto execute_fw_with_lr;
 				}
 
-				if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
-				    IS_QLA28XX(ha)) &&
-				    (ha->zio_mode == QLA_ZIO_MODE_6))
+				if (IS_ZIO_THRESHOLD_CAPABLE(ha))
 					qla27xx_set_zio_threshold(vha,
 					    ha->last_zio_threshold);
 
@@ -3723,6 +4051,7 @@
 						    MIN_MULTI_ID_FABRIC - 1;
 				}
 				qla2x00_get_resource_cnts(vha);
+				qla_init_iocb_limit(vha);
 
 				/*
 				 * Allocate the array of outstanding commands
@@ -3749,7 +4078,8 @@
 		}
 
 		/* Enable PUREX PASSTHRU */
-		if (ql2xrdpenable || ha->flags.scm_supported_f)
+		if (ql2xrdpenable || ha->flags.scm_supported_f ||
+		    ha->flags.edif_enabled)
 			qla25xx_set_els_cmds_supported(vha);
 	} else
 		goto failed;
@@ -3782,8 +4112,7 @@
 			    ha->fw_major_version, ha->fw_minor_version,
 			    ha->fw_subminor_version);
 
-			if (IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
-			    IS_QLA28XX(ha)) {
+			if (IS_QLA83XX(ha)) {
 				ha->flags.fac_supported = 0;
 				rval = QLA_SUCCESS;
 			}
@@ -3934,7 +4263,7 @@
 	}
 
 	/* Move PUREX, ABTS RX & RIDA to ATIOQ */
-	if (ql2xmvasynctoatio &&
+	if (ql2xmvasynctoatio && !ha->flags.edif_enabled &&
 	    (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))) {
 		if (qla_tgt_mode_enabled(vha) ||
 		    qla_dual_mode_enabled(vha))
@@ -3953,16 +4282,30 @@
 		    qla_dual_mode_enabled(vha))
 			ha->fw_options[2] |= BIT_4;
 		else
-			ha->fw_options[2] &= ~BIT_4;
+			ha->fw_options[2] &= ~(BIT_4);
 
 		/* Reserve 1/2 of emergency exchanges for ELS.*/
 		if (qla2xuseresexchforels)
 			ha->fw_options[2] |= BIT_8;
 		else
 			ha->fw_options[2] &= ~BIT_8;
+
+		/*
+		 * N2N: set Secure=1 for PLOGI ACC and
+		 * fw shal not send PRLI after PLOGI Acc
+		 */
+		if (ha->flags.edif_enabled &&
+		    DBELL_ACTIVE(vha)) {
+			ha->fw_options[3] |= BIT_15;
+			ha->flags.n2n_fw_acc_sec = 1;
+		} else {
+			ha->fw_options[3] &= ~BIT_15;
+			ha->flags.n2n_fw_acc_sec = 0;
+		}
 	}
 
-	if (ql2xrdpenable || ha->flags.scm_supported_f)
+	if (ql2xrdpenable || ha->flags.scm_supported_f ||
+	    ha->flags.edif_enabled)
 		ha->fw_options[1] |= ADD_FO1_ENABLE_PUREX_IOCB;
 
 	/* Enable Async 8130/8131 events -- transceiver insertion/removal */
@@ -4161,8 +4504,6 @@
 
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	ql_dbg(ql_dbg_init, vha, 0x00d1, "Issue init firmware.\n");
-
 	if (IS_QLAFX00(ha)) {
 		rval = qlafx00_init_firmware(vha, ha->init_cb_size);
 		goto next_check;
@@ -4171,6 +4512,12 @@
 	/* Update any ISP specific firmware options before initialization. */
 	ha->isp_ops->update_fw_options(vha);
 
+	ql_dbg(ql_dbg_init, vha, 0x00d1,
+	       "Issue init firmware FW opt 1-3= %08x %08x %08x.\n",
+	       le32_to_cpu(mid_init_cb->init_cb.firmware_options_1),
+	       le32_to_cpu(mid_init_cb->init_cb.firmware_options_2),
+	       le32_to_cpu(mid_init_cb->init_cb.firmware_options_3));
+
 	if (ha->flags.npiv_supported) {
 		if (ha->operating_mode == LOOP && !IS_CNA_CAPABLE(ha))
 			ha->max_npiv_vports = MIN_MULTI_ID_FABRIC - 1;
@@ -4192,8 +4539,14 @@
 			 BIT_6) != 0;
 		ql_dbg(ql_dbg_init, vha, 0x00bc, "FA-WWPN Support: %s.\n",
 		    (ha->flags.fawwpn_enabled) ? "enabled" : "disabled");
+		/* Init_cb will be reused for other command(s).  Save a backup copy of port_name */
+		memcpy(ha->port_name, ha->init_cb->port_name, WWN_SIZE);
 	}
 
+	/* ELS pass through payload is limit by frame size. */
+	if (ha->flags.edif_enabled)
+		mid_init_cb->init_cb.frame_payload_size = cpu_to_le16(ELS_MAX_PAYLOAD);
+
 	rval = qla2x00_init_firmware(vha, ha->init_cb_size);
 next_check:
 	if (rval) {
@@ -4228,8 +4581,6 @@
 	if (IS_QLAFX00(vha->hw))
 		return qlafx00_fw_ready(vha);
 
-	rval = QLA_SUCCESS;
-
 	/* Time to wait for loop down */
 	if (IS_P3P_TYPE(ha))
 		min_wait = 30;
@@ -4405,11 +4756,11 @@
 	/* initialize */
 	ha->min_external_loopid = SNS_FIRST_LOOP_ID;
 	ha->operating_mode = LOOP;
-	ha->switch_cap = 0;
 
 	switch (topo) {
 	case 0:
 		ql_dbg(ql_dbg_disc, vha, 0x200b, "HBA in NL topology.\n");
+		ha->switch_cap = 0;
 		ha->current_topology = ISP_CFG_NL;
 		strcpy(connect_type, "(Loop)");
 		break;
@@ -4423,6 +4774,7 @@
 
 	case 2:
 		ql_dbg(ql_dbg_disc, vha, 0x200d, "HBA in N P2P topology.\n");
+		ha->switch_cap = 0;
 		ha->operating_mode = P2P;
 		ha->current_topology = ISP_CFG_N;
 		strcpy(connect_type, "(N_Port-to-N_Port)");
@@ -4439,6 +4791,7 @@
 	default:
 		ql_dbg(ql_dbg_disc, vha, 0x200f,
 		    "HBA in unknown topology %x, using NL.\n", topo);
+		ha->switch_cap = 0;
 		ha->current_topology = ISP_CFG_NL;
 		strcpy(connect_type, "(Loop)");
 		break;
@@ -4451,7 +4804,10 @@
 	id.b.al_pa = al_pa;
 	id.b.rsvd_1 = 0;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	if (!(topo == 2 && ha->flags.n2n_bigger))
+	if (vha->hw->flags.edif_enabled) {
+		if (topo != 2)
+			qlt_update_host_map(vha, id);
+	} else if (!(topo == 2 && ha->flags.n2n_bigger))
 		qlt_update_host_map(vha, id);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
@@ -4603,18 +4959,18 @@
 			nv->firmware_options[1] = BIT_7 | BIT_5;
 			nv->add_firmware_options[0] = BIT_5;
 			nv->add_firmware_options[1] = BIT_5 | BIT_4;
-			nv->frame_payload_size = 2048;
+			nv->frame_payload_size = cpu_to_le16(2048);
 			nv->special_options[1] = BIT_7;
 		} else if (IS_QLA2200(ha)) {
 			nv->firmware_options[0] = BIT_2 | BIT_1;
 			nv->firmware_options[1] = BIT_7 | BIT_5;
 			nv->add_firmware_options[0] = BIT_5;
 			nv->add_firmware_options[1] = BIT_5 | BIT_4;
-			nv->frame_payload_size = 1024;
+			nv->frame_payload_size = cpu_to_le16(1024);
 		} else if (IS_QLA2100(ha)) {
 			nv->firmware_options[0] = BIT_3 | BIT_1;
 			nv->firmware_options[1] = BIT_5;
-			nv->frame_payload_size = 1024;
+			nv->frame_payload_size = cpu_to_le16(1024);
 		}
 
 		nv->max_iocb_allocation = cpu_to_le16(256);
@@ -4925,6 +5281,9 @@
 	fcport->login_retry = vha->hw->login_retry_count;
 	fcport->chip_reset = vha->hw->base_qpair->chip_reset;
 	fcport->logout_on_delete = 1;
+	fcport->tgt_link_down_time = QLA2XX_MAX_LINK_DOWN_TIME;
+	fcport->tgt_short_link_down_cnt = 0;
+	fcport->dev_loss_tmo = 0;
 
 	if (!fcport->ct_desc.ct_sns) {
 		ql_log(ql_log_warn, vha, 0xd049,
@@ -4939,6 +5298,16 @@
 	INIT_LIST_HEAD(&fcport->gnl_entry);
 	INIT_LIST_HEAD(&fcport->list);
 
+	INIT_LIST_HEAD(&fcport->sess_cmd_list);
+	spin_lock_init(&fcport->sess_cmd_lock);
+
+	spin_lock_init(&fcport->edif.sa_list_lock);
+	INIT_LIST_HEAD(&fcport->edif.tx_sa_list);
+	INIT_LIST_HEAD(&fcport->edif.rx_sa_list);
+
+	spin_lock_init(&fcport->edif.indx_list_lock);
+	INIT_LIST_HEAD(&fcport->edif.edif_indx_list);
+
 	return fcport;
 }
 
@@ -4952,11 +5321,39 @@
 
 		fcport->ct_desc.ct_sns = NULL;
 	}
+
+	qla_edif_flush_sa_ctl_lists(fcport);
 	list_del(&fcport->list);
 	qla2x00_clear_loop_id(fcport);
+
+	qla_edif_list_del(fcport);
+
 	kfree(fcport);
 }
 
+static void qla_get_login_template(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	int rval;
+	u32 *bp, sz;
+	__be32 *q;
+
+	memset(ha->init_cb, 0, ha->init_cb_size);
+	sz = min_t(int, sizeof(struct fc_els_flogi), ha->init_cb_size);
+	rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma,
+					    ha->init_cb, sz);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_init, vha, 0x00d1,
+		       "PLOGI ELS param read fail.\n");
+		return;
+	}
+	q = (__be32 *)&ha->plogi_els_payld.fl_csp;
+
+	bp = (uint32_t *)ha->init_cb;
+	cpu_to_be32_array(q, bp, sz / 4);
+	ha->flags.plogi_template_valid = 1;
+}
+
 /*
  * qla2x00_configure_loop
  *      Updates Fibre Channel Device Database with what is actually on loop.
@@ -5000,6 +5397,7 @@
 	clear_bit(RSCN_UPDATE, &vha->dpc_flags);
 
 	qla2x00_get_data_rate(vha);
+	qla_get_login_template(vha);
 
 	/* Determine what we need to do */
 	if ((ha->current_topology == ISP_CFG_FL ||
@@ -5049,6 +5447,14 @@
 			ha->flags.fw_init_done = 1;
 
 			/*
+			 * use link up to wake up app to get ready for
+			 * authentication.
+			 */
+			if (ha->flags.edif_enabled && DBELL_INACTIVE(vha))
+				qla2x00_post_aen_work(vha, FCH_EVT_LINKUP,
+						      ha->link_data_rate);
+
+			/*
 			 * Process any ATIO queue entries that came in
 			 * while we weren't online.
 			 */
@@ -5067,7 +5473,8 @@
 		    "%s *** FAILED ***.\n", __func__);
 	} else {
 		ql_dbg(ql_dbg_disc, vha, 0x206b,
-		    "%s: exiting normally.\n", __func__);
+		    "%s: exiting normally. local port wwpn %8phN id %06x)\n",
+		    __func__, vha->port_name, vha->d_id.b24);
 	}
 
 	/* Restore state if a resync event occurred during processing */
@@ -5084,32 +5491,13 @@
 
 static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha)
 {
-	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	fc_port_t *fcport;
-	int rval;
 
-	if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) {
-		/* borrowing */
-		u32 *bp, sz;
+	ql_dbg(ql_dbg_disc, vha, 0x206a, "%s %d.\n", __func__, __LINE__);
 
-		memset(ha->init_cb, 0, ha->init_cb_size);
-		sz = min_t(int, sizeof(struct els_plogi_payload),
-			   ha->init_cb_size);
-		rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma,
-						    ha->init_cb, sz);
-		if (rval == QLA_SUCCESS) {
-			__be32 *q = &ha->plogi_els_payld.data[0];
-
-			bp = (uint32_t *)ha->init_cb;
-			cpu_to_be32_array(q, bp, sz / 4);
-			memcpy(bp, q, sizeof(ha->plogi_els_payld.data));
-		} else {
-			ql_dbg(ql_dbg_init, vha, 0x00d1,
-			       "PLOGI ELS param read fail.\n");
-			goto skip_login;
-		}
-	}
+	if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags))
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
 		if (fcport->n2n_flag) {
@@ -5118,7 +5506,6 @@
 		}
 	}
 
-skip_login:
 	spin_lock_irqsave(&vha->work_lock, flags);
 	vha->scan.scan_retry++;
 	spin_unlock_irqrestore(&vha->work_lock, flags);
@@ -5130,6 +5517,22 @@
 	return QLA_FUNCTION_FAILED;
 }
 
+static void
+qla_reinitialize_link(scsi_qla_host_t *vha)
+{
+	int rval;
+
+	atomic_set(&vha->loop_state, LOOP_DOWN);
+	atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
+	rval = qla2x00_full_login_lip(vha);
+	if (rval == QLA_SUCCESS) {
+		ql_dbg(ql_dbg_disc, vha, 0xd050, "Link reinitialized\n");
+	} else {
+		ql_dbg(ql_dbg_disc, vha, 0xd051,
+			"Link reinitialization failed (%d)\n", rval);
+	}
+}
+
 /*
  * qla2x00_configure_local_loop
  *	Updates Fibre Channel Device Database with local loop devices.
@@ -5144,7 +5547,6 @@
 qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 {
 	int		rval, rval2;
-	int		found_devs;
 	int		found;
 	fc_port_t	*fcport, *new_fcport;
 	uint16_t	index;
@@ -5159,7 +5561,6 @@
 	if (N2N_TOPO(ha))
 		return qla2x00_configure_n2n_loop(vha);
 
-	found_devs = 0;
 	new_fcport = NULL;
 	entries = MAX_FIBRE_DEVICES_LOOP;
 
@@ -5181,6 +5582,19 @@
 		spin_unlock_irqrestore(&vha->work_lock, flags);
 
 		if (vha->scan.scan_retry < MAX_SCAN_RETRIES) {
+			u8 loop_map_entries = 0;
+			int rc;
+
+			rc = qla2x00_get_fcal_position_map(vha, NULL,
+						&loop_map_entries);
+			if (rc == QLA_SUCCESS && loop_map_entries > 1) {
+				/*
+				 * There are devices that are still not logged
+				 * in. Reinitialize to give them a chance.
+				 */
+				qla_reinitialize_link(vha);
+				return QLA_FUNCTION_FAILED;
+			}
 			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		}
@@ -5269,6 +5683,13 @@
 			memcpy(fcport->node_name, new_fcport->node_name,
 			    WWN_SIZE);
 			fcport->scan_state = QLA_FCPORT_FOUND;
+			if (fcport->login_retry == 0) {
+				fcport->login_retry = vha->hw->login_retry_count;
+				ql_dbg(ql_dbg_disc, vha, 0x2135,
+				    "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n",
+				    fcport->port_name, fcport->loop_id,
+				    fcport->login_retry);
+			}
 			found++;
 			break;
 		}
@@ -5298,8 +5719,6 @@
 
 		/* Base iIDMA settings on HBA port speed. */
 		fcport->fp_speed = ha->link_data_rate;
-
-		found_devs++;
 	}
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
@@ -5417,6 +5836,7 @@
 	spin_lock_irqsave(fcport->vha->host->host_lock, flags);
 	*((fc_port_t **)rport->dd_data) = fcport;
 	spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
+	fcport->dev_loss_tmo = rport->dev_loss_tmo;
 
 	rport->supported_classes = fcport->supported_classes;
 
@@ -5432,13 +5852,14 @@
 	if (fcport->port_type & FCT_NVME_DISCOVERY)
 		rport_ids.roles |= FC_PORT_ROLE_NVME_DISCOVERY;
 
+	fc_remote_port_rolechg(rport, rport_ids.roles);
+
 	ql_dbg(ql_dbg_disc, vha, 0x20ee,
-	    "%s %8phN. rport %p is %s mode\n",
-	    __func__, fcport->port_name, rport,
+	    "%s: %8phN. rport %ld:0:%d (%p) is %s mode\n",
+	    __func__, fcport->port_name, vha->host_no,
+	    rport->scsi_target_id, rport,
 	    (fcport->port_type == FCT_TARGET) ? "tgt" :
 	    ((fcport->port_type & FCT_NVME) ? "nvme" : "ini"));
-
-	fc_remote_port_rolechg(rport, rport_ids.roles);
 }
 
 /*
@@ -5475,6 +5896,11 @@
 		fcport->logout_on_delete = 1;
 	fcport->n2n_chip_reset = fcport->n2n_link_reset_cnt = 0;
 
+	if (fcport->tgt_link_down_time < fcport->dev_loss_tmo) {
+		fcport->tgt_short_link_down_cnt++;
+		fcport->tgt_link_down_time = QLA2XX_MAX_LINK_DOWN_TIME;
+	}
+
 	switch (vha->hw->current_topology) {
 	case ISP_CFG_N:
 	case ISP_CFG_NL:
@@ -5486,12 +5912,7 @@
 
 	qla2x00_iidma_fcport(vha, fcport);
 
-	if (NVME_TARGET(vha->hw, fcport)) {
-		qla_nvme_register_remote(vha, fcport);
-		qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_COMPLETE);
-		qla2x00_set_fcport_state(fcport, FCS_ONLINE);
-		return;
-	}
+	qla2x00_dfs_create_rport(vha, fcport);
 
 	qla24xx_update_fcport_fcp_prio(vha, fcport);
 
@@ -5514,6 +5935,9 @@
 		break;
 	}
 
+	if (NVME_TARGET(vha->hw, fcport))
+		qla_nvme_register_remote(vha, fcport);
+
 	qla2x00_set_fcport_state(fcport, FCS_ONLINE);
 
 	if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) {
@@ -5547,6 +5971,10 @@
 
 	qla2x00_update_fcport(fcport->vha, fcport);
 
+	ql_dbg(ql_dbg_disc, fcport->vha, 0x911e,
+	       "%s rscn gen %d/%d next DS %d\n", __func__,
+	       rscn_gen, fcport->rscn_gen, fcport->next_disc_state);
+
 	if (rscn_gen != fcport->rscn_gen) {
 		/* RSCN(s) came in while registration */
 		switch (fcport->next_disc_state) {
@@ -5907,6 +6335,9 @@
 				break;
 			}
 
+			if (fcport->login_retry == 0)
+				fcport->login_retry =
+					vha->hw->login_retry_count;
 			/*
 			 * If device was not a fabric device before.
 			 */
@@ -6313,13 +6744,13 @@
 qla2x00_update_fcports(scsi_qla_host_t *base_vha)
 {
 	fc_port_t *fcport;
-	struct scsi_qla_host *vha;
+	struct scsi_qla_host *vha, *tvp;
 	struct qla_hw_data *ha = base_vha->hw;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
 	/* Go with deferred removal of rport references. */
-	list_for_each_entry(vha, &base_vha->hw->vp_list, list) {
+	list_for_each_entry_safe(vha, tvp, &base_vha->hw->vp_list, list) {
 		atomic_inc(&vha->vref_count);
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
 			if (fcport->drport &&
@@ -6428,29 +6859,6 @@
 	return rval;
 }
 
-static const char *
-qla83xx_dev_state_to_string(uint32_t dev_state)
-{
-	switch (dev_state) {
-	case QLA8XXX_DEV_COLD:
-		return "COLD/RE-INIT";
-	case QLA8XXX_DEV_INITIALIZING:
-		return "INITIALIZING";
-	case QLA8XXX_DEV_READY:
-		return "READY";
-	case QLA8XXX_DEV_NEED_RESET:
-		return "NEED RESET";
-	case QLA8XXX_DEV_NEED_QUIESCENT:
-		return "NEED QUIESCENT";
-	case QLA8XXX_DEV_FAILED:
-		return "FAILED";
-	case QLA8XXX_DEV_QUIESCENT:
-		return "QUIESCENT";
-	default:
-		return "Unknown";
-	}
-}
-
 /* Assumes idc-lock always held on entry */
 void
 qla83xx_idc_audit(scsi_qla_host_t *vha, int audit_type)
@@ -6504,9 +6912,8 @@
 		ql_log(ql_log_info, vha, 0xb056, "HW State: NEED RESET.\n");
 		qla83xx_idc_audit(vha, IDC_AUDIT_TIMESTAMP);
 	} else {
-		const char *state = qla83xx_dev_state_to_string(dev_state);
-
-		ql_log(ql_log_info, vha, 0xb057, "HW State: %s.\n", state);
+		ql_log(ql_log_info, vha, 0xb057, "HW State: %s.\n",
+				qdev_state(dev_state));
 
 		/* SV: XXX: Is timeout required here? */
 		/* Wait for IDC state change READY -> NEED_RESET */
@@ -6664,7 +7071,8 @@
 qla2x00_quiesce_io(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
+	unsigned long flags;
 
 	ql_dbg(ql_dbg_dpc, vha, 0x401d,
 	    "Quiescing I/O - ha=%p.\n", ha);
@@ -6673,8 +7081,18 @@
 	if (atomic_read(&vha->loop_state) != LOOP_DOWN) {
 		atomic_set(&vha->loop_state, LOOP_DOWN);
 		qla2x00_mark_all_devices_lost(vha);
-		list_for_each_entry(vp, &ha->vp_list, list)
+
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
+			atomic_inc(&vp->vref_count);
+			spin_unlock_irqrestore(&ha->vport_slock, flags);
+
 			qla2x00_mark_all_devices_lost(vp);
+
+			spin_lock_irqsave(&ha->vport_slock, flags);
+			atomic_dec(&vp->vref_count);
+		}
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
 	} else {
 		if (!atomic_read(&vha->loop_down_timer))
 			atomic_set(&vha->loop_down_timer,
@@ -6689,7 +7107,7 @@
 qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	unsigned long flags;
 	fc_port_t *fcport;
 	u16 i;
@@ -6724,10 +7142,16 @@
 	ha->flags.fw_init_done = 0;
 	ha->chip_reset++;
 	ha->base_qpair->chip_reset = ha->chip_reset;
+	ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0;
+	ha->base_qpair->prev_completion_cnt = 0;
 	for (i = 0; i < ha->max_qpairs; i++) {
-		if (ha->queue_pair_map[i])
+		if (ha->queue_pair_map[i]) {
 			ha->queue_pair_map[i]->chip_reset =
 				ha->base_qpair->chip_reset;
+			ha->queue_pair_map[i]->cmd_cnt =
+			    ha->queue_pair_map[i]->cmd_completion_cnt = 0;
+			ha->base_qpair->prev_completion_cnt = 0;
+		}
 	}
 
 	/* purge MBox commands */
@@ -6753,7 +7177,7 @@
 		qla2x00_mark_all_devices_lost(vha);
 
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 			atomic_inc(&vp->vref_count);
 			spin_unlock_irqrestore(&ha->vport_slock, flags);
 
@@ -6775,7 +7199,7 @@
 		fcport->scan_state = 0;
 	}
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		atomic_inc(&vp->vref_count);
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 
@@ -6787,22 +7211,18 @@
 	}
 	spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-	if (!ha->flags.eeh_busy) {
-		/* Make sure for ISP 82XX IO DMA is complete */
-		if (IS_P3P_TYPE(ha)) {
-			qla82xx_chip_reset_cleanup(vha);
-			ql_log(ql_log_info, vha, 0x00b4,
-			    "Done chip reset cleanup.\n");
+	/* Make sure for ISP 82XX IO DMA is complete */
+	if (IS_P3P_TYPE(ha)) {
+		qla82xx_chip_reset_cleanup(vha);
+		ql_log(ql_log_info, vha, 0x00b4,
+		       "Done chip reset cleanup.\n");
 
-			/* Done waiting for pending commands.
-			 * Reset the online flag.
-			 */
-			vha->flags.online = 0;
-		}
-
-		/* Requeue all commands in outstanding command list. */
-		qla2x00_abort_all_cmds(vha, DID_RESET << 16);
+		/* Done waiting for pending commands. Reset online flag */
+		vha->flags.online = 0;
 	}
+
+	/* Requeue all commands in outstanding command list. */
+	qla2x00_abort_all_cmds(vha, DID_RESET << 16);
 	/* memory barrier */
 	wmb();
 }
@@ -6823,13 +7243,25 @@
 	int rval;
 	uint8_t        status = 0;
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	struct req_que *req = ha->req_q_map[0];
 	unsigned long flags;
 
 	if (vha->flags.online) {
 		qla2x00_abort_isp_cleanup(vha);
 
+		vha->dport_status |= DPORT_DIAG_CHIP_RESET_IN_PROGRESS;
+		vha->dport_status &= ~DPORT_DIAG_IN_PROGRESS;
+
+		if (vha->hw->flags.port_isolated)
+			return status;
+
+		if (qla2x00_isp_reg_stat(ha)) {
+			ql_log(ql_log_info, vha, 0x803f,
+			       "ISP Abort - ISP reg disconnect, exiting.\n");
+			return status;
+		}
+
 		if (test_and_clear_bit(ISP_ABORT_TO_ROM, &vha->dpc_flags)) {
 			ha->flags.chip_reset_done = 1;
 			vha->flags.online = 1;
@@ -6859,7 +7291,8 @@
 				return 0;
 			break;
 		case QLA2XXX_INI_MODE_DUAL:
-			if (!qla_dual_mode_enabled(vha))
+			if (!qla_dual_mode_enabled(vha) &&
+			    !qla_ini_mode_enabled(vha))
 				return 0;
 			break;
 		case QLA2XXX_INI_MODE_ENABLED:
@@ -6869,8 +7302,18 @@
 
 		ha->isp_ops->get_flash_version(vha, req->ring);
 
+		if (qla2x00_isp_reg_stat(ha)) {
+			ql_log(ql_log_info, vha, 0x803f,
+			       "ISP Abort - ISP reg disconnect pre nvram config, exiting.\n");
+			return status;
+		}
 		ha->isp_ops->nvram_config(vha);
 
+		if (qla2x00_isp_reg_stat(ha)) {
+			ql_log(ql_log_info, vha, 0x803f,
+			       "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n");
+			return status;
+		}
 		if (!qla2x00_restart_isp(vha)) {
 			clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
 
@@ -6951,11 +7394,16 @@
 
 	}
 
+	if (vha->hw->flags.port_isolated) {
+		qla2x00_abort_isp_cleanup(vha);
+		return status;
+	}
+
 	if (!status) {
 		ql_dbg(ql_dbg_taskm, vha, 0x8022, "%s succeeded.\n", __func__);
 		qla2x00_configure_hba(vha);
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 			if (vp->vp_idx) {
 				atomic_inc(&vp->vref_count);
 				spin_unlock_irqrestore(&ha->vport_slock, flags);
@@ -7109,10 +7557,9 @@
 	unsigned long flags = 0;
 	struct qla_hw_data *ha = vha->hw;
 	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
-	int rval = QLA_SUCCESS;
 
 	if (IS_P3P_TYPE(ha))
-		return rval;
+		return QLA_SUCCESS;
 
 	vha->flags.online = 0;
 	ha->isp_ops->disable_intrs(ha);
@@ -7127,7 +7574,7 @@
 	if (IS_NOPOLLING_TYPE(ha))
 		ha->isp_ops->enable_intrs(ha);
 
-	return rval;
+	return QLA_SUCCESS;
 }
 
 /* On sparc systems, obtain port and node WWN from firmware
@@ -7483,6 +7930,9 @@
 
 	active_regions->aux.npiv_config_2_3 =
 	    qla28xx_component_bitmask(aux, QLA28XX_AUX_IMG_NPIV_CONFIG_2_3);
+
+	active_regions->aux.nvme_params =
+	    qla28xx_component_bitmask(aux, QLA28XX_AUX_IMG_NVME_PARAMS);
 }
 
 static int
@@ -7591,11 +8041,12 @@
 	}
 
 	ql_dbg(ql_dbg_init, vha, 0x018f,
-	    "aux images active: BCFG=%u VPD/NVR=%u NPIV0/1=%u NPIV2/3=%u\n",
+	    "aux images active: BCFG=%u VPD/NVR=%u NPIV0/1=%u NPIV2/3=%u, NVME=%u\n",
 	    active_regions->aux.board_config,
 	    active_regions->aux.vpd_nvram,
 	    active_regions->aux.npiv_config_0_1,
-	    active_regions->aux.npiv_config_2_3);
+	    active_regions->aux.npiv_config_2_3,
+	    active_regions->aux.nvme_params);
 }
 
 void
@@ -7778,8 +8229,7 @@
 	templates = (risc_attr & BIT_9) ? 2 : 1;
 	ql_dbg(ql_dbg_init, vha, 0x0160, "-> templates = %u\n", templates);
 	for (j = 0; j < templates; j++, fwdt++) {
-		if (fwdt->template)
-			vfree(fwdt->template);
+		vfree(fwdt->template);
 		fwdt->template = NULL;
 		fwdt->length = 0;
 
@@ -7839,8 +8289,7 @@
 	return QLA_SUCCESS;
 
 failed:
-	if (fwdt->template)
-		vfree(fwdt->template);
+	vfree(fwdt->template);
 	fwdt->template = NULL;
 	fwdt->length = 0;
 
@@ -8036,8 +8485,7 @@
 	templates = (risc_attr & BIT_9) ? 2 : 1;
 	ql_dbg(ql_dbg_init, vha, 0x0170, "-> templates = %u\n", templates);
 	for (j = 0; j < templates; j++, fwdt++) {
-		if (fwdt->template)
-			vfree(fwdt->template);
+		vfree(fwdt->template);
 		fwdt->template = NULL;
 		fwdt->length = 0;
 
@@ -8097,8 +8545,7 @@
 	return QLA_SUCCESS;
 
 failed:
-	if (fwdt->template)
-		vfree(fwdt->template);
+	vfree(fwdt->template);
 	fwdt->template = NULL;
 	fwdt->length = 0;
 
@@ -8645,7 +9092,7 @@
 {
 	int status, rval;
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	unsigned long flags;
 
 	status = qla2x00_init_rings(vha);
@@ -8717,7 +9164,7 @@
 		    "qla82xx_restart_isp succeeded.\n");
 
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 			if (vp->vp_idx) {
 				atomic_inc(&vp->vref_count);
 				spin_unlock_irqrestore(&ha->vport_slock, flags);
@@ -9094,3 +9541,215 @@
 fail:
 	return ret;
 }
+
+uint64_t
+qla2x00_count_set_bits(uint32_t num)
+{
+	/* Brian Kernighan's Algorithm */
+	u64 count = 0;
+
+	while (num) {
+		num &= (num - 1);
+		count++;
+	}
+	return count;
+}
+
+uint64_t
+qla2x00_get_num_tgts(scsi_qla_host_t *vha)
+{
+	fc_port_t *f, *tf;
+	u64 count = 0;
+
+	f = NULL;
+	tf = NULL;
+
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if (f->port_type != FCT_TARGET)
+			continue;
+		count++;
+	}
+	return count;
+}
+
+int qla2xxx_reset_stats(struct Scsi_Host *host, u32 flags)
+{
+	scsi_qla_host_t *vha = shost_priv(host);
+	fc_port_t *fcport = NULL;
+	unsigned long int_flags;
+
+	if (flags & QLA2XX_HW_ERROR)
+		vha->hw_err_cnt = 0;
+	if (flags & QLA2XX_SHT_LNK_DWN)
+		vha->short_link_down_cnt = 0;
+	if (flags & QLA2XX_INT_ERR)
+		vha->interface_err_cnt = 0;
+	if (flags & QLA2XX_CMD_TIMEOUT)
+		vha->cmd_timeout_cnt = 0;
+	if (flags & QLA2XX_RESET_CMD_ERR)
+		vha->reset_cmd_err_cnt = 0;
+	if (flags & QLA2XX_TGT_SHT_LNK_DOWN) {
+		spin_lock_irqsave(&vha->hw->tgt.sess_lock, int_flags);
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			fcport->tgt_short_link_down_cnt = 0;
+			fcport->tgt_link_down_time = QLA2XX_MAX_LINK_DOWN_TIME;
+		}
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, int_flags);
+	}
+	vha->link_down_time = QLA2XX_MAX_LINK_DOWN_TIME;
+	return 0;
+}
+
+int qla2xxx_start_stats(struct Scsi_Host *host, u32 flags)
+{
+	return qla2xxx_reset_stats(host, flags);
+}
+
+int qla2xxx_stop_stats(struct Scsi_Host *host, u32 flags)
+{
+	return qla2xxx_reset_stats(host, flags);
+}
+
+int qla2xxx_get_ini_stats(struct Scsi_Host *host, u32 flags,
+			  void *data, u64 size)
+{
+	scsi_qla_host_t *vha = shost_priv(host);
+	struct ql_vnd_host_stats_resp *resp = (struct ql_vnd_host_stats_resp *)data;
+	struct ql_vnd_stats *rsp_data = &resp->stats;
+	u64 ini_entry_count = 0;
+	u64 i = 0;
+	u64 entry_count = 0;
+	u64 num_tgt = 0;
+	u32 tmp_stat_type = 0;
+	fc_port_t *fcport = NULL;
+	unsigned long int_flags;
+
+	/* Copy stat type to work on it */
+	tmp_stat_type = flags;
+
+	if (tmp_stat_type & BIT_17) {
+		num_tgt = qla2x00_get_num_tgts(vha);
+		/* unset BIT_17 */
+		tmp_stat_type &= ~(1 << 17);
+	}
+	ini_entry_count = qla2x00_count_set_bits(tmp_stat_type);
+
+	entry_count = ini_entry_count + num_tgt;
+
+	rsp_data->entry_count = entry_count;
+
+	i = 0;
+	if (flags & QLA2XX_HW_ERROR) {
+		rsp_data->entry[i].stat_type = QLA2XX_HW_ERROR;
+		rsp_data->entry[i].tgt_num = 0x0;
+		rsp_data->entry[i].cnt = vha->hw_err_cnt;
+		i++;
+	}
+
+	if (flags & QLA2XX_SHT_LNK_DWN) {
+		rsp_data->entry[i].stat_type = QLA2XX_SHT_LNK_DWN;
+		rsp_data->entry[i].tgt_num = 0x0;
+		rsp_data->entry[i].cnt = vha->short_link_down_cnt;
+		i++;
+	}
+
+	if (flags & QLA2XX_INT_ERR) {
+		rsp_data->entry[i].stat_type = QLA2XX_INT_ERR;
+		rsp_data->entry[i].tgt_num = 0x0;
+		rsp_data->entry[i].cnt = vha->interface_err_cnt;
+		i++;
+	}
+
+	if (flags & QLA2XX_CMD_TIMEOUT) {
+		rsp_data->entry[i].stat_type = QLA2XX_CMD_TIMEOUT;
+		rsp_data->entry[i].tgt_num = 0x0;
+		rsp_data->entry[i].cnt = vha->cmd_timeout_cnt;
+		i++;
+	}
+
+	if (flags & QLA2XX_RESET_CMD_ERR) {
+		rsp_data->entry[i].stat_type = QLA2XX_RESET_CMD_ERR;
+		rsp_data->entry[i].tgt_num = 0x0;
+		rsp_data->entry[i].cnt = vha->reset_cmd_err_cnt;
+		i++;
+	}
+
+	/* i will continue from previous loop, as target
+	 * entries are after initiator
+	 */
+	if (flags & QLA2XX_TGT_SHT_LNK_DOWN) {
+		spin_lock_irqsave(&vha->hw->tgt.sess_lock, int_flags);
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->port_type != FCT_TARGET)
+				continue;
+			if (!fcport->rport)
+				continue;
+			rsp_data->entry[i].stat_type = QLA2XX_TGT_SHT_LNK_DOWN;
+			rsp_data->entry[i].tgt_num = fcport->rport->number;
+			rsp_data->entry[i].cnt = fcport->tgt_short_link_down_cnt;
+			i++;
+		}
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, int_flags);
+	}
+	resp->status = EXT_STATUS_OK;
+
+	return 0;
+}
+
+int qla2xxx_get_tgt_stats(struct Scsi_Host *host, u32 flags,
+			  struct fc_rport *rport, void *data, u64 size)
+{
+	struct ql_vnd_tgt_stats_resp *tgt_data = data;
+	fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
+
+	tgt_data->status = 0;
+	tgt_data->stats.entry_count = 1;
+	tgt_data->stats.entry[0].stat_type = flags;
+	tgt_data->stats.entry[0].tgt_num = rport->number;
+	tgt_data->stats.entry[0].cnt = fcport->tgt_short_link_down_cnt;
+
+	return 0;
+}
+
+int qla2xxx_disable_port(struct Scsi_Host *host)
+{
+	scsi_qla_host_t *vha = shost_priv(host);
+
+	vha->hw->flags.port_isolated = 1;
+
+	if (qla2x00_isp_reg_stat(vha->hw)) {
+		ql_log(ql_log_info, vha, 0x9006,
+		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
+		return FAILED;
+	}
+	if (qla2x00_chip_is_down(vha))
+		return 0;
+
+	if (vha->flags.online) {
+		qla2x00_abort_isp_cleanup(vha);
+		qla2x00_wait_for_sess_deletion(vha);
+	}
+
+	return 0;
+}
+
+int qla2xxx_enable_port(struct Scsi_Host *host)
+{
+	scsi_qla_host_t *vha = shost_priv(host);
+
+	if (qla2x00_isp_reg_stat(vha->hw)) {
+		ql_log(ql_log_info, vha, 0x9001,
+		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
+		return FAILED;
+	}
+
+	vha->hw->flags.port_isolated = 0;
+	/* Set the flag to 1, so that isp_abort can proceed */
+	vha->flags.online = 1;
+	set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+	qla2xxx_wake_dpc(vha);
+
+	return 0;
+}

diff --git a/scst/qla2x00t-32gbit/qla_inline.h b/scst/qla2x00t-32gbit/qla_inline.h
index 80b9ef7..27d2a5c 100644
--- a/scst/qla2x00t-32gbit/qla_inline.h
+++ b/scst/qla2x00t-32gbit/qla_inline.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #ifdef INSIDE_KERNEL_TREE
@@ -193,6 +192,8 @@
 	sp->vha = vha;
 	sp->qpair = qpair;
 	sp->cmd_type = TYPE_SRB;
+	/* ref : INIT - normal flow */
+	kref_init(&sp->cmd_kref);
 	INIT_LIST_HEAD(&sp->elem);
 }
 
@@ -215,10 +216,15 @@
 	return sp;
 }
 
+void qla2xxx_rel_done_warning(srb_t *sp, int res);
+void qla2xxx_rel_free_warning(srb_t *sp);
+
 static inline void
 qla2xxx_rel_qpair_sp(struct qla_qpair *qpair, srb_t *sp)
 {
 	sp->qpair = NULL;
+	sp->done = qla2xxx_rel_done_warning;
+	sp->free = qla2xxx_rel_free_warning;
 	mempool_free(sp, qpair->srb_mempool);
 	QLA_QPAIR_MARK_NOT_BUSY(qpair);
 }
@@ -227,11 +233,9 @@
 qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 {
 	srb_t *sp = NULL;
-	uint8_t bail;
 	struct qla_qpair *qpair;
 
-	QLA_VHA_MARK_BUSY(vha, bail);
-	if (unlikely(bail))
+	if (unlikely(qla_vha_mark_busy(vha)))
 		return NULL;
 
 	qpair = vha->hw->base_qpair;
@@ -274,11 +278,41 @@
 }
 
 static inline void
-qla2x00_set_retry_delay_timestamp(fc_port_t *fcport, uint16_t retry_delay)
+qla2x00_set_retry_delay_timestamp(fc_port_t *fcport, uint16_t sts_qual)
 {
-	if (retry_delay)
-		fcport->retry_delay_timestamp = jiffies +
-		    (retry_delay * HZ / 10);
+	u8 scope;
+	u16 qual;
+#define SQ_SCOPE_MASK		0xc000 /* SAM-6 rev5 5.3.2 */
+#define SQ_SCOPE_SHIFT		14
+#define SQ_QUAL_MASK		0x3fff
+
+#define SQ_MAX_WAIT_SEC		60 /* Max I/O hold off time in seconds. */
+#define SQ_MAX_WAIT_TIME	(SQ_MAX_WAIT_SEC * 10) /* in 100ms. */
+
+	if (!sts_qual) /* Common case. */
+		return;
+
+	scope = (sts_qual & SQ_SCOPE_MASK) >> SQ_SCOPE_SHIFT;
+	/* Handle only scope 1 or 2, which is for I-T nexus. */
+	if (scope != 1 && scope != 2)
+		return;
+
+	/* Skip processing, if retry delay timer is already in effect. */
+	if (fcport->retry_delay_timestamp &&
+	    time_before(jiffies, fcport->retry_delay_timestamp))
+		return;
+
+	qual = sts_qual & SQ_QUAL_MASK;
+	if (qual < 1 || qual > 0x3fef)
+		return;
+	qual = min(qual, (u16)SQ_MAX_WAIT_TIME);
+
+	/* qual is expressed in 100ms increments. */
+	fcport->retry_delay_timestamp = jiffies + (qual * HZ / 10);
+
+	ql_log(ql_log_warn, fcport->vha, 0x5101,
+	       "%8phC: I/O throttling requested (status qualifier = %04xh), holding off I/Os for %ums.\n",
+	       fcport->port_name, sts_qual, qual * 100);
 }
 
 static inline bool
@@ -351,3 +385,120 @@
 
 	return (data >> 6) & BIT_0 ? FC4_PRIORITY_FCP : FC4_PRIORITY_NVME;
 }
+
+enum {
+	RESOURCE_NONE,
+	RESOURCE_INI,
+};
+
+static inline int
+qla_get_iocbs(struct qla_qpair *qp, struct iocb_resource *iores)
+{
+	u16 iocbs_used, i;
+	struct qla_hw_data *ha = qp->vha->hw;
+
+	if (!ql2xenforce_iocb_limit) {
+		iores->res_type = RESOURCE_NONE;
+		return 0;
+	}
+
+	if ((iores->iocb_cnt + qp->fwres.iocbs_used) < qp->fwres.iocbs_qp_limit) {
+		qp->fwres.iocbs_used += iores->iocb_cnt;
+		return 0;
+	} else {
+		/* no need to acquire qpair lock. It's just rough calculation */
+		iocbs_used = ha->base_qpair->fwres.iocbs_used;
+		for (i = 0; i < ha->max_qpairs; i++) {
+			if (ha->queue_pair_map[i])
+				iocbs_used += ha->queue_pair_map[i]->fwres.iocbs_used;
+		}
+
+		if ((iores->iocb_cnt + iocbs_used) < qp->fwres.iocbs_limit) {
+			qp->fwres.iocbs_used += iores->iocb_cnt;
+			return 0;
+		} else {
+			iores->res_type = RESOURCE_NONE;
+			return -ENOSPC;
+		}
+	}
+}
+
+static inline void
+qla_put_iocbs(struct qla_qpair *qp, struct iocb_resource *iores)
+{
+	switch (iores->res_type) {
+	case RESOURCE_NONE:
+		break;
+	default:
+		if (qp->fwres.iocbs_used >= iores->iocb_cnt) {
+			qp->fwres.iocbs_used -= iores->iocb_cnt;
+		} else {
+			// should not happen
+			qp->fwres.iocbs_used = 0;
+		}
+		break;
+	}
+	iores->res_type = RESOURCE_NONE;
+}
+
+#define ISP_REG_DISCONNECT 0xffffffffU
+/**************************************************************************
+ * qla2x00_isp_reg_stat
+ *
+ * Description:
+ *        Read the host status register of ISP before aborting the command.
+ *
+ * Input:
+ *       ha = pointer to host adapter structure.
+ *
+ *
+ * Returns:
+ *       Either true or false.
+ *
+ * Note: Return true if there is register disconnect.
+ **************************************************************************/
+static inline
+uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
+{
+	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
+	struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82;
+
+	if (IS_P3P_TYPE(ha))
+		return ((rd_reg_dword(&reg82->host_int)) == ISP_REG_DISCONNECT);
+	else
+		return ((rd_reg_dword(&reg->host_status)) ==
+			ISP_REG_DISCONNECT);
+}
+
+static inline
+bool qla_pci_disconnected(struct scsi_qla_host *vha,
+			  struct device_reg_24xx __iomem *reg)
+{
+	uint32_t stat;
+	bool ret = false;
+
+	stat = rd_reg_dword(&reg->host_status);
+	if (stat == 0xffffffff) {
+		ql_log(ql_log_info, vha, 0x8041,
+		       "detected PCI disconnect.\n");
+		qla_schedule_eeh_work(vha);
+		ret = true;
+	}
+	return ret;
+}
+
+static inline bool
+fcport_is_smaller(fc_port_t *fcport)
+{
+	if (wwn_to_u64(fcport->port_name) <
+		wwn_to_u64(fcport->vha->port_name))
+		return true;
+	else
+		return false;
+}
+
+static inline bool
+fcport_is_bigger(fc_port_t *fcport)
+{
+	return !fcport_is_smaller(fcport);
+}

diff --git a/scst/qla2x00t-32gbit/qla_iocb.c b/scst/qla2x00t-32gbit/qla_iocb.c
index a2aa5b3..8d0d4a2 100644
--- a/scst/qla2x00t-32gbit/qla_iocb.c
+++ b/scst/qla2x00t-32gbit/qla_iocb.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_target.h"
@@ -119,7 +118,7 @@
  *
  * Returns a pointer to the continuation type 1 IOCB packet.
  */
-static inline cont_a64_entry_t *
+cont_a64_entry_t *
 qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha, struct req_que *req)
 {
 	cont_a64_entry_t *cont_pkt;
@@ -146,7 +145,6 @@
 qla24xx_configure_prot_mode(srb_t *sp, uint16_t *fw_prot_opts)
 {
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	uint8_t	guard = scsi_host_get_guard(cmd->device->host);
 
 	/* We always use DIFF Bundling for best performance */
 	*fw_prot_opts = 0;
@@ -167,16 +165,25 @@
 		break;
 	case SCSI_PROT_READ_PASS:
 	case SCSI_PROT_WRITE_PASS:
-		if (guard & SHOST_DIX_GUARD_IP)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
+		if (cmd->prot_flags & SCSI_PROT_IP_CHECKSUM)
 			*fw_prot_opts |= PO_MODE_DIF_TCP_CKSUM;
 		else
 			*fw_prot_opts |= PO_MODE_DIF_PASS;
+#endif
 		break;
 	default:	/* Normal Request */
 		*fw_prot_opts |= PO_MODE_DIF_PASS;
 		break;
 	}
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
+	if (!(cmd->prot_flags & SCSI_PROT_GUARD_CHECK))
+		*fw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+#else
+		WARN_ON_ONCE(true);
+#endif
+
 	return scsi_prot_sg_count(cmd);
 }
 
@@ -492,7 +499,7 @@
 }
 
 /**
- * qla2x00_marker() - Send a marker IOCB to the firmware.
+ * __qla2x00_marker() - Send a marker IOCB to the firmware.
  * @vha: HA context
  * @qpair: queue pair pointer
  * @loop_id: loop ID
@@ -594,6 +601,7 @@
 	uint32_t dsd_list_len;
 	struct dsd_dma *dsd_ptr;
 	struct ct6_dsd *ctx;
+	struct qla_qpair *qpair = sp->qpair;
 
 	cmd = GET_CMD_SP(sp);
 
@@ -612,12 +620,12 @@
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
-		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.output_requests++;
+		qpair->counters.output_bytes += scsi_bufflen(cmd);
+		qpair->counters.output_requests++;
 	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
-		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.input_requests++;
+		qpair->counters.input_bytes += scsi_bufflen(cmd);
+		qpair->counters.input_requests++;
 	}
 
 	cur_seg = scsi_sglist(cmd);
@@ -704,6 +712,7 @@
 	struct scsi_cmnd *cmd;
 	struct scatterlist *sg;
 	int i;
+	struct qla_qpair *qpair = sp->qpair;
 
 	cmd = GET_CMD_SP(sp);
 
@@ -721,12 +730,12 @@
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
 		cmd_pkt->task_mgmt_flags = cpu_to_le16(TMF_WRITE_DATA);
-		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.output_requests++;
+		qpair->counters.output_bytes += scsi_bufflen(cmd);
+		qpair->counters.output_requests++;
 	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
 		cmd_pkt->task_mgmt_flags = cpu_to_le16(TMF_READ_DATA);
-		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.input_requests++;
+		qpair->counters.input_bytes += scsi_bufflen(cmd);
+		qpair->counters.input_requests++;
 	}
 
 	/* One DSD is available in the Command Type 3 IOCB */
@@ -771,74 +780,23 @@
 {
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 
-	switch (scsi_get_prot_type(cmd)) {
-	case SCSI_PROT_DIF_TYPE0:
-		/*
-		 * No check for ql2xenablehba_err_chk, as it would be an
-		 * I/O error if hba tag generation is not done.
-		 */
-		pkt->ref_tag = cpu_to_le32((uint32_t)
-		    (0xffffffff & scsi_get_lba(cmd)));
+	pkt->ref_tag = cpu_to_le32(scsi_prot_ref_tag(cmd));
 
-		if (!qla2x00_hba_err_chk_enabled(sp))
-			break;
-
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
+	if (cmd->prot_flags & SCSI_PROT_REF_CHECK &&
+	    qla2x00_hba_err_chk_enabled(sp)) {
 		pkt->ref_tag_mask[0] = 0xff;
 		pkt->ref_tag_mask[1] = 0xff;
 		pkt->ref_tag_mask[2] = 0xff;
 		pkt->ref_tag_mask[3] = 0xff;
-		break;
-
-	/*
-	 * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
-	 * match LBA in CDB + N
-	 */
-	case SCSI_PROT_DIF_TYPE2:
-		pkt->app_tag = cpu_to_le16(0);
-		pkt->app_tag_mask[0] = 0x0;
-		pkt->app_tag_mask[1] = 0x0;
-
-		pkt->ref_tag = cpu_to_le32((uint32_t)
-		    (0xffffffff & scsi_get_lba(cmd)));
-
-		if (!qla2x00_hba_err_chk_enabled(sp))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		pkt->ref_tag_mask[0] = 0xff;
-		pkt->ref_tag_mask[1] = 0xff;
-		pkt->ref_tag_mask[2] = 0xff;
-		pkt->ref_tag_mask[3] = 0xff;
-		break;
-
-	/* For Type 3 protection: 16 bit GUARD only */
-	case SCSI_PROT_DIF_TYPE3:
-		pkt->ref_tag_mask[0] = pkt->ref_tag_mask[1] =
-			pkt->ref_tag_mask[2] = pkt->ref_tag_mask[3] =
-								0x00;
-		break;
-
-	/*
-	 * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
-	 * 16 bit app tag.
-	 */
-	case SCSI_PROT_DIF_TYPE1:
-		pkt->ref_tag = cpu_to_le32((uint32_t)
-		    (0xffffffff & scsi_get_lba(cmd)));
-		pkt->app_tag = cpu_to_le16(0);
-		pkt->app_tag_mask[0] = 0x0;
-		pkt->app_tag_mask[1] = 0x0;
-
-		if (!qla2x00_hba_err_chk_enabled(sp))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		pkt->ref_tag_mask[0] = 0xff;
-		pkt->ref_tag_mask[1] = 0xff;
-		pkt->ref_tag_mask[2] = 0xff;
-		pkt->ref_tag_mask[3] = 0xff;
-		break;
 	}
+#else
+	WARN_ON_ONCE(true);
+#endif
+
+	pkt->app_tag = cpu_to_le16(0);
+	pkt->app_tag_mask[0] = 0x0;
+	pkt->app_tag_mask[1] = 0x0;
 }
 
 int
@@ -904,7 +862,7 @@
 	memset(&sgx, 0, sizeof(struct qla2_sgx));
 	if (sp) {
 		cmd = GET_CMD_SP(sp);
-		prot_int = cmd->device->sector_size;
+		prot_int = scsi_prot_interval(cmd);
 
 		sgx.tot_bytes = scsi_bufflen(cmd);
 		sgx.cur_sg = scsi_sglist(cmd);
@@ -1605,12 +1563,17 @@
 	uint16_t	req_cnt;
 	uint16_t	tot_dsds;
 	struct req_que *req = NULL;
+	struct rsp_que *rsp;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 
+	if (sp->fcport->edif.enable  && (sp->fcport->flags & FCF_FCSP_DEVICE))
+		return qla28xx_start_scsi_edif(sp);
+
 	/* Setup device pointers. */
 	req = vha->req;
+	rsp = req->rsp;
 
 	/* So we know we haven't pci_map'ed anything yet */
 	tot_dsds = 0;
@@ -1641,9 +1604,21 @@
 
 	tot_dsds = nseg;
 	req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+
+	sp->iores.res_type = RESOURCE_INI;
+	sp->iores.iocb_cnt = req_cnt;
+	if (qla_get_iocbs(sp->qpair, &sp->iores))
+		goto queuing_error;
+
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
-		    rd_reg_dword_relaxed(req->req_q_out);
+		if (IS_SHADOW_REG_CAPABLE(ha)) {
+			cnt = *req->out_ptr;
+		} else {
+			cnt = rd_reg_dword_relaxed(req->req_q_out);
+			if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+				goto queuing_error;
+		}
+
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -1701,11 +1676,17 @@
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	sp->flags |= SRB_DMA_VALID;
 
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
+	/* Manage unprocessed RIO/ZIO commands in response queue. */
+	if (vha->flags.process_response_queue &&
+	    rsp->ring_ptr->signature != RESPONSE_PROCESSED)
+		qla24xx_process_response_queue(vha, rsp);
+
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	return QLA_SUCCESS;
 
@@ -1713,6 +1694,7 @@
 	if (tot_dsds)
 		scsi_dma_unmap(cmd);
 
+	qla_put_iocbs(sp->qpair, &sp->iores);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	return QLA_FUNCTION_FAILED;
@@ -1826,9 +1808,20 @@
 	/* Total Data and protection sg segment(s) */
 	tot_prot_dsds = nseg;
 	tot_dsds += nseg;
+
+	sp->iores.res_type = RESOURCE_INI;
+	sp->iores.iocb_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+	if (qla_get_iocbs(sp->qpair, &sp->iores))
+		goto queuing_error;
+
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
-		    rd_reg_dword_relaxed(req->req_q_out);
+		if (IS_SHADOW_REG_CAPABLE(ha)) {
+			cnt = *req->out_ptr;
+		} else {
+			cnt = rd_reg_dword_relaxed(req->req_q_out);
+			if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+				goto queuing_error;
+		}
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -1886,9 +1879,15 @@
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
+	/* Manage unprocessed RIO/ZIO commands in response queue. */
+	if (vha->flags.process_response_queue &&
+	    rsp->ring_ptr->signature != RESPONSE_PROCESSED)
+		qla24xx_process_response_queue(vha, rsp);
+
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	return QLA_SUCCESS;
@@ -1900,7 +1899,9 @@
 	}
 	/* Cleanup will be performed by the caller (queuecommand) */
 
+	qla_put_iocbs(sp->qpair, &sp->iores);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	return QLA_FUNCTION_FAILED;
 }
 
@@ -1922,16 +1923,21 @@
 	uint16_t	req_cnt;
 	uint16_t	tot_dsds;
 	struct req_que *req = NULL;
+	struct rsp_que *rsp;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_qpair *qpair = sp->qpair;
 
+	if (sp->fcport->edif.enable && (sp->fcport->flags & FCF_FCSP_DEVICE))
+		return qla28xx_start_scsi_edif(sp);
+
 	/* Acquire qpair specific lock */
 	spin_lock_irqsave(&qpair->qp_lock, flags);
 
 	/* Setup qpair pointers */
 	req = qpair->req;
+	rsp = qpair->rsp;
 
 	/* So we know we haven't pci_map'ed anything yet */
 	tot_dsds = 0;
@@ -1961,9 +1967,21 @@
 
 	tot_dsds = nseg;
 	req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+
+	sp->iores.res_type = RESOURCE_INI;
+	sp->iores.iocb_cnt = req_cnt;
+	if (qla_get_iocbs(sp->qpair, &sp->iores))
+		goto queuing_error;
+
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
-		    rd_reg_dword_relaxed(req->req_q_out);
+		if (IS_SHADOW_REG_CAPABLE(ha)) {
+			cnt = *req->out_ptr;
+		} else {
+			cnt = rd_reg_dword_relaxed(req->req_q_out);
+			if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+				goto queuing_error;
+		}
+
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -2021,11 +2039,17 @@
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	sp->flags |= SRB_DMA_VALID;
 
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
+	/* Manage unprocessed RIO/ZIO commands in response queue. */
+	if (vha->flags.process_response_queue &&
+	    rsp->ring_ptr->signature != RESPONSE_PROCESSED)
+		qla24xx_process_response_queue(vha, rsp);
+
 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
 	return QLA_SUCCESS;
 
@@ -2033,6 +2057,7 @@
 	if (tot_dsds)
 		scsi_dma_unmap(cmd);
 
+	qla_put_iocbs(sp->qpair, &sp->iores);
 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
 
 	return QLA_FUNCTION_FAILED;
@@ -2161,9 +2186,21 @@
 	/* Total Data and protection sg segment(s) */
 	tot_prot_dsds = nseg;
 	tot_dsds += nseg;
+
+	sp->iores.res_type = RESOURCE_INI;
+	sp->iores.iocb_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+	if (qla_get_iocbs(sp->qpair, &sp->iores))
+		goto queuing_error;
+
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
-		    rd_reg_dword_relaxed(req->req_q_out);
+		if (IS_SHADOW_REG_CAPABLE(ha)) {
+			cnt = *req->out_ptr;
+		} else {
+			cnt = rd_reg_dword_relaxed(req->req_q_out);
+			if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+				goto queuing_error;
+		}
+
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -2219,6 +2256,7 @@
 	} else
 		req->ring_ptr++;
 
+	sp->qpair->cmd_cnt++;
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
@@ -2238,7 +2276,9 @@
 	}
 	/* Cleanup will be performed by the caller (queuecommand) */
 
+	qla_put_iocbs(sp->qpair, &sp->iores);
 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
 	return QLA_FUNCTION_FAILED;
 }
 
@@ -2283,6 +2323,11 @@
 			cnt = qla2x00_debounce_register(
 			    ISP_REQ_Q_OUT(ha, &reg->isp));
 
+		if (!qpair->use_shadow_reg && cnt == ISP_REG16_DISCONNECT) {
+			qla_schedule_eeh_work(vha);
+			return NULL;
+		}
+
 		if  (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -2354,6 +2399,17 @@
 		if (sp->vha->flags.nvme_first_burst)
 			logio->io_parameter[0] =
 				cpu_to_le32(NVME_PRLI_SP_FIRST_BURST);
+		if (sp->vha->flags.nvme2_enabled) {
+			/* Set service parameter BIT_7 for NVME CONF support */
+			logio->io_parameter[0] |=
+				cpu_to_le32(NVME_PRLI_SP_CONF);
+			/* Set service parameter BIT_8 for SLER support */
+			logio->io_parameter[0] |=
+				cpu_to_le32(NVME_PRLI_SP_SLER);
+			/* Set service parameter BIT_9 for PI control support */
+			logio->io_parameter[0] |=
+				cpu_to_le32(NVME_PRLI_SP_PI_CTRL);
+		}
 	}
 
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
@@ -2379,6 +2435,12 @@
 			logio->control_flags |= cpu_to_le16(LCF_COND_PLOGI);
 		if (lio->u.logio.flags & SRB_LOGIN_SKIP_PRLI)
 			logio->control_flags |= cpu_to_le16(LCF_SKIP_PRLI);
+		if (lio->u.logio.flags & SRB_LOGIN_FCSP) {
+			logio->control_flags |=
+			    cpu_to_le16(LCF_COMMON_FEAT | LCF_SKIP_PRLI);
+			logio->io_parameter[0] =
+			    cpu_to_le32(LIO_COMM_FEAT_FCSP | LIO_COMM_FEAT_CIO);
+		}
 	}
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
@@ -2514,11 +2576,38 @@
 	}
 }
 
-void qla2x00_init_timer(srb_t *sp, unsigned long tmo)
+static void
+qla2x00_async_done(struct srb *sp, int res)
+{
+	if (del_timer(&sp->u.iocb_cmd.timer)) {
+		/*
+		 * Successfully cancelled the timeout handler
+		 * ref: TMR
+		 */
+		if (kref_put(&sp->cmd_kref, qla2x00_sp_release))
+			return;
+	}
+	sp->async_done(sp, res);
+}
+
+void
+qla2x00_sp_release(struct kref *kref)
+{
+	struct srb *sp = container_of(kref, struct srb, cmd_kref);
+
+	sp->free(sp);
+}
+
+void
+qla2x00_init_async_sp(srb_t *sp, unsigned long tmo,
+		     void (*done)(struct srb *sp, int res))
 {
 	timer_setup(&sp->u.iocb_cmd.timer, qla2x00_sp_timeout, 0);
-	sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ;
+	sp->done = qla2x00_async_done;
+	sp->async_done = done;
 	sp->free = qla2x00_sp_free;
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ;
 	if (IS_QLAFX00(sp->vha->hw) && sp->type == SRB_FXIOCB_DCMD)
 		init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
 	sp->start_timer = 1;
@@ -2605,7 +2694,9 @@
 	       return -ENOMEM;
 	}
 
-	/* Alloc SRB structure */
+	/* Alloc SRB structure
+	 * ref: INIT
+	 */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp) {
 		kfree(fcport);
@@ -2626,18 +2717,19 @@
 	sp->type = SRB_ELS_DCMD;
 	sp->name = "ELS_DCMD";
 	sp->fcport = fcport;
-	elsio->timeout = qla2x00_els_dcmd_iocb_timeout;
-	qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT);
-	init_completion(&sp->u.iocb_cmd.u.els_logo.comp);
-	sp->done = qla2x00_els_dcmd_sp_done;
+	qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT,
+			      qla2x00_els_dcmd_sp_done);
 	sp->free = qla2x00_els_dcmd_sp_free;
+	sp->u.iocb_cmd.timeout = qla2x00_els_dcmd_iocb_timeout;
+	init_completion(&sp->u.iocb_cmd.u.els_logo.comp);
 
 	elsio->u.els_logo.els_logo_pyld = dma_alloc_coherent(&ha->pdev->dev,
 			    DMA_POOL_SIZE, &elsio->u.els_logo.els_logo_pyld_dma,
 			    GFP_KERNEL);
 
 	if (!elsio->u.els_logo.els_logo_pyld) {
-		sp->free(sp);
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 		return QLA_FUNCTION_FAILED;
 	}
 
@@ -2660,7 +2752,8 @@
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
-		sp->free(sp);
+		/* ref: INIT */
+		kref_put(&sp->cmd_kref, qla2x00_sp_release);
 		return QLA_FUNCTION_FAILED;
 	}
 
@@ -2671,7 +2764,8 @@
 
 	wait_for_completion(&elsio->u.els_logo.comp);
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	return rval;
 }
 
@@ -2702,7 +2796,10 @@
 	els_iocb->s_id[0] = vha->d_id.b.domain;
 
 	if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) {
-		els_iocb->control_flags = 0;
+		if (vha->hw->flags.edif_enabled)
+			els_iocb->control_flags = cpu_to_le16(ECF_SEC_LOGIN);
+		else
+			els_iocb->control_flags = 0;
 		els_iocb->tx_byte_count = els_iocb->tx_len =
 			cpu_to_le32(sizeof(struct els_plogi_payload));
 		put_unaligned_le64(elsio->u.els_plogi.els_plogi_pyld_dma,
@@ -2719,7 +2816,6 @@
 		    (uint8_t *)els_iocb,
 		    sizeof(*els_iocb));
 	} else {
-		els_iocb->control_flags = cpu_to_le16(1 << 13);
 		els_iocb->tx_byte_count =
 			cpu_to_le32(sizeof(struct els_logo_payload));
 		put_unaligned_le64(elsio->u.els_logo.els_logo_pyld_dma,
@@ -2739,7 +2835,7 @@
 	sp->vha->qla_stats.control_requests++;
 }
 
-static void
+void
 qla2x00_els_dcmd2_iocb_timeout(void *data)
 {
 	srb_t *sp = data;
@@ -2802,7 +2898,9 @@
 	    sp->name, res, sp->handle, fcport->d_id.b24, fcport->port_name);
 
 	fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE);
-	del_timer(&sp->u.iocb_cmd.timer);
+	/* For edif, set logout on delete to ensure any residual key from FW is flushed.*/
+	fcport->logout_on_delete = 1;
+	fcport->chip_reset = vha->hw->base_qpair->chip_reset;
 
 	if (sp->flags & SRB_WAKEUP_ON_COMP)
 		complete(&lio->u.els_plogi.comp);
@@ -2879,6 +2977,7 @@
 					set_bit(ISP_ABORT_NEEDED,
 					    &vha->dpc_flags);
 					qla2xxx_wake_dpc(vha);
+					break;
 				}
 				fallthrough;
 			default:
@@ -2888,9 +2987,7 @@
 				    fw_status[0], fw_status[1], fw_status[2]);
 
 				fcport->flags &= ~FCF_ASYNC_SENT;
-				qla2x00_set_fcport_disc_state(fcport,
-				    DSC_LOGIN_FAILED);
-				set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+				qlt_schedule_sess_for_deletion(fcport);
 				break;
 			}
 			break;
@@ -2902,8 +2999,7 @@
 			    fw_status[0], fw_status[1], fw_status[2]);
 
 			sp->fcport->flags &= ~FCF_ASYNC_SENT;
-			qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_FAILED);
-			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+			qlt_schedule_sess_for_deletion(fcport);
 			break;
 		}
 
@@ -2912,7 +3008,8 @@
 			struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
 			qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
-			sp->free(sp);
+			/* ref: INIT */
+			kref_put(&sp->cmd_kref, qla2x00_sp_release);
 			return;
 		}
 		e->u.iosb.sp = sp;
@@ -2930,7 +3027,9 @@
 	int rval = QLA_SUCCESS;
 	void	*ptr, *resp_ptr;
 
-	/* Alloc SRB structure */
+	/* Alloc SRB structure
+	 * ref: INIT
+	 */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp) {
 		ql_log(ql_log_info, vha, 0x70e6,
@@ -2943,19 +3042,18 @@
 	qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_PEND);
 	elsio = &sp->u.iocb_cmd;
 	ql_dbg(ql_dbg_io, vha, 0x3073,
-	    "Enter: PLOGI portid=%06x\n", fcport->d_id.b24);
+	       "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24);
+
+	if (wait)
+		sp->flags = SRB_WAKEUP_ON_COMP;
 
 	sp->type = SRB_ELS_DCMD;
 	sp->name = "ELS_DCMD";
 	sp->fcport = fcport;
+	qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT + 2,
+			     qla2x00_els_dcmd2_sp_done);
+	sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout;
 
-	elsio->timeout = qla2x00_els_dcmd2_iocb_timeout;
-	if (wait)
-		sp->flags = SRB_WAKEUP_ON_COMP;
-
-	qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT + 2);
-
-	sp->done = qla2x00_els_dcmd2_sp_done;
 	elsio->u.els_plogi.tx_size = elsio->u.els_plogi.rx_size = DMA_POOL_SIZE;
 
 	ptr = elsio->u.els_plogi.els_plogi_pyld =
@@ -2981,12 +3079,17 @@
 	memset(ptr, 0, sizeof(struct els_plogi_payload));
 	memset(resp_ptr, 0, sizeof(struct els_plogi_payload));
 	memcpy(elsio->u.els_plogi.els_plogi_pyld->data,
-	    &ha->plogi_els_payld.data,
-	    sizeof(elsio->u.els_plogi.els_plogi_pyld->data));
+	    &ha->plogi_els_payld.fl_csp, LOGIN_TEMPLATE_SIZE);
 
 	elsio->u.els_plogi.els_cmd = els_opcode;
 	elsio->u.els_plogi.els_plogi_pyld->opcode = els_opcode;
 
+	if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) {
+		struct fc_els_flogi *p = ptr;
+
+		p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
+	}
+
 	ql_dbg(ql_dbg_disc + ql_dbg_buffer, vha, 0x3073, "PLOGI buffer:\n");
 	ql_dump_buffer(ql_dbg_disc + ql_dbg_buffer, vha, 0x0109,
 	    (uint8_t *)elsio->u.els_plogi.els_plogi_pyld,
@@ -3015,19 +3118,53 @@
 out:
 	fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
 
+/* it is assume qpair lock is held */
+void qla_els_pt_iocb(struct scsi_qla_host *vha,
+	struct els_entry_24xx *els_iocb,
+	struct qla_els_pt_arg *a)
+{
+	els_iocb->entry_type = ELS_IOCB_TYPE;
+	els_iocb->entry_count = 1;
+	els_iocb->sys_define = 0;
+	els_iocb->entry_status = 0;
+	els_iocb->handle = QLA_SKIP_HANDLE;
+	els_iocb->nport_handle = a->nport_handle;
+	els_iocb->rx_xchg_address = a->rx_xchg_address;
+	els_iocb->tx_dsd_count = cpu_to_le16(1);
+	els_iocb->vp_index = a->vp_idx;
+	els_iocb->sof_type = EST_SOFI3;
+	els_iocb->rx_dsd_count = cpu_to_le16(0);
+	els_iocb->opcode = a->els_opcode;
+
+	els_iocb->d_id[0] = a->did.b.al_pa;
+	els_iocb->d_id[1] = a->did.b.area;
+	els_iocb->d_id[2] = a->did.b.domain;
+	/* For SID the byte order is different than DID */
+	els_iocb->s_id[1] = vha->d_id.b.al_pa;
+	els_iocb->s_id[2] = vha->d_id.b.area;
+	els_iocb->s_id[0] = vha->d_id.b.domain;
+
+	els_iocb->control_flags = cpu_to_le16(a->control_flags);
+
+	els_iocb->tx_byte_count = cpu_to_le32(a->tx_byte_count);
+	els_iocb->tx_len = cpu_to_le32(a->tx_len);
+	put_unaligned_le64(a->tx_addr, &els_iocb->tx_address);
+
+	els_iocb->rx_byte_count = cpu_to_le32(a->rx_byte_count);
+	els_iocb->rx_len = cpu_to_le32(a->rx_len);
+	put_unaligned_le64(a->rx_addr, &els_iocb->rx_address);
+}
+
 static void
 qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 {
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 
         els_iocb->entry_type = ELS_IOCB_TYPE;
@@ -3077,11 +3214,7 @@
 	uint16_t tot_dsds;
 	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	int entry_count = 1;
 
 	memset(ct_iocb, 0, sizeof(ms_iocb_entry_t));
@@ -3148,11 +3281,7 @@
 	uint16_t cmd_dsds, rsp_dsds;
 	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	int entry_count = 1;
 	cont_a64_entry_t *cont_pkt = NULL;
 
@@ -3553,6 +3682,7 @@
 	struct srb_iocb *aio = &sp->u.iocb_cmd;
 	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = sp->qpair->req;
+	srb_t *orig_sp = sp->cmd_sp;
 
 	memset(abt_iocb, 0, sizeof(struct abort_entry_24xx));
 	abt_iocb->entry_type = ABORT_IOCB_TYPE;
@@ -3569,6 +3699,11 @@
 			    aio->u.abt.cmd_hndl);
 	abt_iocb->vp_index = vha->vp_idx;
 	abt_iocb->req_que_no = aio->u.abt.req_que_no;
+
+	/* need to pass original sp */
+	if (orig_sp)
+		qla_nvme_abort_set_option(abt_iocb, orig_sp);
+
 	/* Send the command to the firmware */
 	wmb();
 }
@@ -3620,16 +3755,25 @@
 	nack->u.isp24.srr_reject_code = 0;
 	nack->u.isp24.srr_reject_code_expl = 0;
 	nack->u.isp24.vp_index = ntfy->u.isp24.vp_index;
+
+	if (ntfy->u.isp24.status_subcode == ELS_PLOGI &&
+	    (le16_to_cpu(ntfy->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP) &&
+	    sp->vha->hw->flags.edif_enabled) {
+		ql_dbg(ql_dbg_disc, sp->vha, 0x3074,
+		    "%s PLOGI NACK sent with FC SECURITY bit, hdl=%x, loopid=%x, to pid %06x\n",
+		    sp->name, sp->handle, sp->fcport->loop_id,
+		    sp->fcport->d_id.b24);
+		nack->u.isp24.flags |= cpu_to_le16(NOTIFY_ACK_FLAGS_FCSP);
+	}
 }
 
 /*
  * Build NVME LS request
  */
-static int
+static void
 qla_nvme_ls(srb_t *sp, struct pt_ls4_request *cmd_pkt)
 {
 	struct srb_iocb *nvme;
-	int     rval = QLA_SUCCESS;
 
 	nvme = &sp->u.iocb_cmd;
 	cmd_pkt->entry_type = PT_LS4_REQUEST;
@@ -3649,8 +3793,6 @@
 	cmd_pkt->rx_byte_count = cpu_to_le32(nvme->u.nvme.rsp_len);
 	cmd_pkt->dsd[1].length = cpu_to_le32(nvme->u.nvme.rsp_len);
 	put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address);
-
-	return rval;
 }
 
 static void
@@ -3697,6 +3839,9 @@
 	void *pkt;
 	unsigned long flags;
 
+	if (vha->hw->flags.eeh_busy)
+		return -EIO;
+
 	spin_lock_irqsave(qp->qp_lock_ptr, flags);
 	pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
 	if (!pkt) {
@@ -3724,6 +3869,10 @@
 	case SRB_ELS_CMD_HST:
 		qla24xx_els_iocb(sp, pkt);
 		break;
+	case SRB_ELS_CMD_HST_NOLOGIN:
+		qla_els_pt_iocb(sp->vha, pkt,  &sp->u.bsg_cmd.u.els_arg);
+		((struct els_entry_24xx *)pkt)->handle = sp->handle;
+		break;
 	case SRB_CT_CMD:
 		IS_FWI2_CAPABLE(ha) ?
 		    qla24xx_ct_iocb(sp, pkt) :
@@ -3771,12 +3920,25 @@
 	case SRB_PRLO_CMD:
 		qla24xx_prlo_iocb(sp, pkt);
 		break;
+	case SRB_SA_UPDATE:
+		qla24xx_sa_update_iocb(sp, pkt);
+		break;
+	case SRB_SA_REPLACE:
+		qla24xx_sa_replace_iocb(sp, pkt);
+		break;
 	default:
 		break;
 	}
 
-	if (sp->start_timer)
+	if (sp->start_timer) {
+		/* ref: TMR timer ref
+		 * this code should be just before start_iocbs function
+		 * This will make sure that caller function don't to do
+		 * kref_put even on failure
+		 */
+		kref_get(&sp->cmd_kref);
 		add_timer(&sp->u.iocb_cmd.timer);
+	}
 
 	wmb();
 	qla2x00_start_iocbs(vha, qp->req);
@@ -3796,11 +3958,7 @@
 	struct scatterlist *sg;
 	int index;
 	int entry_count = 1;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 
 	/*Update entry type to indicate bidir command */
 	put_unaligned_le32(COMMAND_BIDIRECTIONAL, &cmd_pkt->entry_type);
@@ -3918,8 +4076,14 @@
 
 	/* Check for room on request queue. */
 	if (req->cnt < req_cnt + 2) {
-		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
-		    rd_reg_dword_relaxed(req->req_q_out);
+		if (IS_SHADOW_REG_CAPABLE(ha)) {
+			cnt = *req->out_ptr;
+		} else {
+			cnt = rd_reg_dword_relaxed(req->req_q_out);
+			if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+				goto queuing_error;
+		}
+
 		if  (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
 		else
@@ -3958,5 +4122,6 @@
 	qla2x00_start_iocbs(vha, req);
 queuing_error:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	return rval;
 }

diff --git a/scst/qla2x00t-32gbit/qla_isr.c b/scst/qla2x00t-32gbit/qla_isr.c
index 4323b97..a216840 100644
--- a/scst/qla2x00t-32gbit/qla_isr.c
+++ b/scst/qla2x00t-32gbit/qla_isr.c

@@ -1,11 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_target.h"
+#include "qla_gbl.h"
 
 #include <linux/delay.h>
 #include <linux/slab.h>
@@ -57,15 +57,15 @@
 	ql_dump_buffer(ql_dbg_init + ql_dbg_verbose, vha, 0x508f,
 		       pkt, pkt_size);
 
-	fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt);
+	fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt, 0);
 }
 
 const char *const port_state_str[] = {
-	"Unknown",
-	"UNCONFIGURED",
-	"DEAD",
-	"LOST",
-	"ONLINE"
+	[FCS_UNKNOWN]		= "Unknown",
+	[FCS_UNCONFIGURED]	= "UNCONFIGURED",
+	[FCS_DEVICE_DEAD]	= "DEAD",
+	[FCS_DEVICE_LOST]	= "LOST",
+	[FCS_ONLINE]		= "ONLINE"
 };
 
 static void
@@ -182,6 +182,149 @@
 }
 
 /**
+ * __qla_consume_iocb - this routine is used to tell fw driver has processed
+ *   or consumed the head IOCB along with the continuation IOCB's from the
+ *   provided respond queue.
+ * @vha: host adapter pointer
+ * @pkt: pointer to current packet.  On return, this pointer shall move
+ *       to the next packet.
+ * @rsp: respond queue pointer.
+ *
+ * it is assumed pkt is the head iocb, not the continuation iocbk
+ */
+void __qla_consume_iocb(struct scsi_qla_host *vha,
+	void **pkt, struct rsp_que **rsp)
+{
+	struct rsp_que *rsp_q = *rsp;
+	response_t *new_pkt;
+	uint16_t entry_count_remaining;
+	struct purex_entry_24xx *purex = *pkt;
+
+	entry_count_remaining = purex->entry_count;
+	while (entry_count_remaining > 0) {
+		new_pkt = rsp_q->ring_ptr;
+		*pkt = new_pkt;
+
+		rsp_q->ring_index++;
+		if (rsp_q->ring_index == rsp_q->length) {
+			rsp_q->ring_index = 0;
+			rsp_q->ring_ptr = rsp_q->ring;
+		} else {
+			rsp_q->ring_ptr++;
+		}
+
+		new_pkt->signature = RESPONSE_PROCESSED;
+		/* flush signature */
+		wmb();
+		--entry_count_remaining;
+	}
+}
+
+/**
+ * __qla_copy_purex_to_buffer - extract ELS payload from Purex IOCB
+ *    and save to provided buffer
+ * @vha: host adapter pointer
+ * @pkt: pointer Purex IOCB
+ * @rsp: respond queue
+ * @buf: extracted ELS payload copy here
+ * @buf_len: buffer length
+ */
+int __qla_copy_purex_to_buffer(struct scsi_qla_host *vha,
+	void **pkt, struct rsp_que **rsp, u8 *buf, u32 buf_len)
+{
+	struct purex_entry_24xx *purex = *pkt;
+	struct rsp_que *rsp_q = *rsp;
+	sts_cont_entry_t *new_pkt;
+	uint16_t no_bytes = 0, total_bytes = 0, pending_bytes = 0;
+	uint16_t buffer_copy_offset = 0;
+	uint16_t entry_count_remaining;
+	u16 tpad;
+
+	entry_count_remaining = purex->entry_count;
+	total_bytes = (le16_to_cpu(purex->frame_size) & 0x0FFF)
+		- PURX_ELS_HEADER_SIZE;
+
+	/*
+	 * end of payload may not end in 4bytes boundary.  Need to
+	 * round up / pad for room to swap, before saving data
+	 */
+	tpad = roundup(total_bytes, 4);
+
+	if (buf_len < tpad) {
+		ql_dbg(ql_dbg_async, vha, 0x5084,
+		    "%s buffer is too small %d < %d\n",
+		    __func__, buf_len, tpad);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return -EIO;
+	}
+
+	pending_bytes = total_bytes = tpad;
+	no_bytes = (pending_bytes > sizeof(purex->els_frame_payload))  ?
+	    sizeof(purex->els_frame_payload) : pending_bytes;
+
+	memcpy(buf, &purex->els_frame_payload[0], no_bytes);
+	buffer_copy_offset += no_bytes;
+	pending_bytes -= no_bytes;
+	--entry_count_remaining;
+
+	((response_t *)purex)->signature = RESPONSE_PROCESSED;
+	/* flush signature */
+	wmb();
+
+	do {
+		while ((total_bytes > 0) && (entry_count_remaining > 0)) {
+			new_pkt = (sts_cont_entry_t *)rsp_q->ring_ptr;
+			*pkt = new_pkt;
+
+			if (new_pkt->entry_type != STATUS_CONT_TYPE) {
+				ql_log(ql_log_warn, vha, 0x507a,
+				    "Unexpected IOCB type, partial data 0x%x\n",
+				    buffer_copy_offset);
+				break;
+			}
+
+			rsp_q->ring_index++;
+			if (rsp_q->ring_index == rsp_q->length) {
+				rsp_q->ring_index = 0;
+				rsp_q->ring_ptr = rsp_q->ring;
+			} else {
+				rsp_q->ring_ptr++;
+			}
+			no_bytes = (pending_bytes > sizeof(new_pkt->data)) ?
+			    sizeof(new_pkt->data) : pending_bytes;
+			if ((buffer_copy_offset + no_bytes) <= total_bytes) {
+				memcpy((buf + buffer_copy_offset), new_pkt->data,
+				    no_bytes);
+				buffer_copy_offset += no_bytes;
+				pending_bytes -= no_bytes;
+				--entry_count_remaining;
+			} else {
+				ql_log(ql_log_warn, vha, 0x5044,
+				    "Attempt to copy more that we got, optimizing..%x\n",
+				    buffer_copy_offset);
+				memcpy((buf + buffer_copy_offset), new_pkt->data,
+				    total_bytes - buffer_copy_offset);
+			}
+
+			((response_t *)new_pkt)->signature = RESPONSE_PROCESSED;
+			/* flush signature */
+			wmb();
+		}
+
+		if (pending_bytes != 0 || entry_count_remaining != 0) {
+			ql_log(ql_log_fatal, vha, 0x508b,
+			    "Dropping partial Data, underrun bytes = 0x%x, entry cnts 0x%x\n",
+			    total_bytes, entry_count_remaining);
+			return -EIO;
+		}
+	} while (entry_count_remaining > 0);
+
+	be32_to_cpu_array((u32 *)buf, (__be32 *)buf, total_bytes >> 2);
+
+	return 0;
+}
+
+/**
  * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
  * @irq: interrupt number
  * @dev_id: SCSI driver HA context
@@ -282,12 +425,7 @@
 		if (!test_and_set_bit(PFLG_DISCONNECTED, &vha->pci_flags) &&
 		    !test_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags) &&
 		    !test_bit(PFLG_DRIVER_PROBING, &vha->pci_flags)) {
-			/*
-			 * Schedule this (only once) on the default system
-			 * workqueue so that all the adapter workqueues and the
-			 * DPC thread can be shutdown cleanly.
-			 */
-			schedule_work(&vha->hw->board_disable);
+			qla_schedule_eeh_work(vha);
 		}
 		return true;
 	} else
@@ -522,7 +660,7 @@
 qla2x00_get_link_speed_str(struct qla_hw_data *ha, uint16_t speed)
 {
 	static const char *const link_speeds[] = {
-		"1", "2", "?", "4", "8", "16", "32", "10"
+		"1", "2", "?", "4", "8", "16", "32", "64", "10"
 	};
 #define	QLA_LAST_SPEED (ARRAY_SIZE(link_speeds) - 1)
 
@@ -774,12 +912,12 @@
 qla27xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb)
 {
 	struct qla_hw_data *ha = vha->hw;
-	bool reset_isp_needed = 0;
+	bool reset_isp_needed = false;
 
 	ql_log(ql_log_warn, vha, 0x02f0,
 	       "MPI Heartbeat stop. MPI reset is%s needed. "
 	       "MB0[%xh] MB1[%xh] MB2[%xh] MB3[%xh]\n",
-	       mb[0] & BIT_8 ? "" : " not",
+	       mb[1] & BIT_8 ? "" : " not",
 	       mb[0], mb[1], mb[2], mb[3]);
 
 	if ((mb[1] & BIT_8) == 0)
@@ -790,7 +928,7 @@
 
 	if (ql2xfulldump_on_mpifail) {
 		ha->isp_ops->fw_dump(vha);
-		reset_isp_needed = 1;
+		reset_isp_needed = true;
 	}
 
 	ha->isp_ops->mpi_fw_dump(vha, 1);
@@ -856,8 +994,8 @@
  * @vha: SCSI driver HA context
  * @pkt: ELS packet
  */
-static struct purex_item *
-qla24xx_copy_std_pkt(struct scsi_qla_host *vha, void *pkt)
+static struct purex_item
+*qla24xx_copy_std_pkt(struct scsi_qla_host *vha, void *pkt)
 {
 	struct purex_item *item;
 
@@ -1072,6 +1210,9 @@
 
 	case MBA_SYSTEM_ERR:		/* System Error */
 		mbx = 0;
+
+		vha->hw_err_cnt++;
+
 		if (IS_QLA81XX(ha) || IS_QLA83XX(ha) ||
 		    IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
 			u16 m[4];
@@ -1125,6 +1266,8 @@
 		ql_log(ql_log_warn, vha, 0x5006,
 		    "ISP Request Transfer Error (%x).\n",  mb[1]);
 
+		vha->hw_err_cnt++;
+
 		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		break;
 
@@ -1132,6 +1275,8 @@
 		ql_log(ql_log_warn, vha, 0x5007,
 		    "ISP Response Transfer Error (%x).\n", mb[1]);
 
+		vha->hw_err_cnt++;
+
 		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		break;
 
@@ -1189,12 +1334,18 @@
 		vha->flags.management_server_logged_in = 0;
 		qla2x00_post_aen_work(vha, FCH_EVT_LINKUP, ha->link_data_rate);
 
+		if (vha->link_down_time < vha->hw->port_down_retry_count) {
+			vha->short_link_down_cnt++;
+			vha->link_down_time = QLA2XX_MAX_LINK_DOWN_TIME;
+		}
+
 		break;
 
 	case MBA_LOOP_DOWN:		/* Loop Down Event */
 		SAVE_TOPO(ha);
 		ha->flags.lip_ae = 0;
 		ha->current_topology = 0;
+		vha->link_down_time = 0;
 
 		mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha))
 			? rd_reg_word(&reg24->mailbox4) : 0;
@@ -1215,9 +1366,7 @@
 			if (!vha->vp_idx) {
 				if (ha->flags.fawwpn_enabled &&
 				    (ha->current_topology == ISP_CFG_F)) {
-					void *wwpn = ha->init_cb->port_name;
-
-					memcpy(vha->port_name, wwpn, WWN_SIZE);
+					memcpy(vha->port_name, ha->port_name, WWN_SIZE);
 					fc_host_port_name(vha->host) =
 					    wwn_to_u64(vha->port_name);
 					ql_dbg(ql_dbg_init + ql_dbg_verbose,
@@ -1455,9 +1604,9 @@
 		if (ha->flags.npiv_supported && vha->vp_idx != (mb[3] & 0xff))
 			break;
 
-		ql_dbg(ql_dbg_async, vha, 0x5013,
-		    "RSCN database changed -- %04x %04x %04x.\n",
-		    mb[1], mb[2], mb[3]);
+		ql_log(ql_log_warn, vha, 0x5013,
+		       "RSCN database changed -- %04x %04x %04x.\n",
+		       mb[1], mb[2], mb[3]);
 
 		rscn_entry = ((mb[1] & 0xff) << 16) | mb[2];
 		host_pid = (vha->d_id.b.domain << 16) | (vha->d_id.b.area << 8)
@@ -1516,6 +1665,7 @@
 		ql_dbg(ql_dbg_async, vha, 0x5016,
 		    "Discard RND Frame -- %04x %04x %04x.\n",
 		    mb[1], mb[2], mb[3]);
+		vha->interface_err_cnt++;
 		break;
 
 	case MBA_TRACE_NOTIFICATION:
@@ -1605,6 +1755,7 @@
 
 	case MBA_IDC_AEN:
 		if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+			vha->hw_err_cnt++;
 			qla27xx_handle_8200_aen(vha, mb);
 		} else if (IS_QLA83XX(ha)) {
 			mb[4] = rd_reg_word(&reg24->mailbox4);
@@ -1620,6 +1771,9 @@
 		break;
 
 	case MBA_DPORT_DIAGNOSTICS:
+		if ((mb[1] & 0xF) == AEN_DONE_DIAG_TEST_WITH_NOERR ||
+		    (mb[1] & 0xF) == AEN_DONE_DIAG_TEST_WITH_ERR)
+			vha->dport_status &= ~DPORT_DIAG_IN_PROGRESS;
 		ql_dbg(ql_dbg_async, vha, 0x5052,
 		    "D-Port Diagnostics: %04x %04x %04x %04x\n",
 		    mb[0], mb[1], mb[2], mb[3]);
@@ -1654,8 +1808,6 @@
 	case MBA_TEMPERATURE_ALERT:
 		ql_dbg(ql_dbg_async, vha, 0x505e,
 		    "TEMPERATURE ALERT: %04x %04x %04x\n", mb[1], mb[2], mb[3]);
-		if (mb[1] == 0x12)
-			schedule_work(&ha->board_disable);
 		break;
 
 	case MBA_TRANS_INSERT:
@@ -1728,35 +1880,38 @@
 {
 	struct qla_hw_data *ha = vha->hw;
 	sts_entry_t *pkt = iocb;
-	srb_t *sp = NULL;
+	srb_t *sp;
 	uint16_t index;
 
+	if (pkt->handle == QLA_SKIP_HANDLE)
+		return NULL;
+
 	index = LSW(pkt->handle);
 	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x5031,
-			   "Invalid command index (%x) type %8ph.\n",
-			   index, iocb);
+			   "%s: Invalid command index (%x) type %8ph.\n",
+			   func, index, iocb);
 		if (IS_P3P_TYPE(ha))
 			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
 		else
 			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
-		goto done;
+		return NULL;
 	}
 	sp = req->outstanding_cmds[index];
 	if (!sp) {
 		ql_log(ql_log_warn, vha, 0x5032,
-		    "Invalid completion handle (%x) -- timed-out.\n", index);
-		return sp;
+			"%s: Invalid completion handle (%x) -- timed-out.\n",
+			func, index);
+		return NULL;
 	}
 	if (sp->handle != index) {
 		ql_log(ql_log_warn, vha, 0x5033,
-		    "SRB handle (%x) mismatch %x.\n", sp->handle, index);
+			"%s: SRB handle (%x) mismatch %x.\n", func,
+			sp->handle, index);
 		return NULL;
 	}
 
 	req->outstanding_cmds[index] = NULL;
-
-done:
 	return sp;
 }
 
@@ -1851,6 +2006,7 @@
     struct mbx_24xx_entry *pkt)
 {
 	const char func[] = "MBX-IOCB2";
+	struct qla_hw_data *ha = vha->hw;
 	srb_t *sp;
 	struct srb_iocb *si;
 	u16 sz, i;
@@ -1860,6 +2016,18 @@
 	if (!sp)
 		return;
 
+	if (sp->type == SRB_SCSI_CMD ||
+	    sp->type == SRB_NVME_CMD ||
+	    sp->type == SRB_TM_CMD) {
+		ql_log(ql_log_warn, vha, 0x509d,
+			"Inconsistent event entry type %d\n", sp->type);
+		if (IS_P3P_TYPE(ha))
+			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		return;
+	}
+
 	si = &sp->u.iocb_cmd;
 	sz = min(ARRAY_SIZE(pkt->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.in_mb));
 
@@ -1896,11 +2064,7 @@
 	const char func[] = "CT_IOCB";
 	const char *type;
 	srb_t *sp;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job;
-#else
-	struct bsg_job *bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
 	uint16_t comp_status;
 	int res = 0;
@@ -1966,33 +2130,69 @@
 }
 
 static void
-qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
+qla24xx_els_ct_entry(scsi_qla_host_t *v, struct req_que *req,
     struct sts_entry_24xx *pkt, int iocb_type)
 {
 	struct els_sts_entry_24xx *ese = (struct els_sts_entry_24xx *)pkt;
 	const char func[] = "ELS_CT_IOCB";
 	const char *type;
 	srb_t *sp;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job;
-#else
-	struct bsg_job *bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
 	uint16_t comp_status;
 	uint32_t fw_status[3];
-	int res;
+	int res, logit = 1;
 	struct srb_iocb *els;
+	uint n;
+	scsi_qla_host_t *vha;
+	struct els_sts_entry_24xx *e = (struct els_sts_entry_24xx *)pkt;
 
-	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+	sp = qla2x00_get_sp_from_handle(v, func, req, pkt);
 	if (!sp)
 		return;
+	bsg_job = sp->u.bsg_job;
+	vha = sp->vha;
 
 	type = NULL;
+
+	comp_status = fw_status[0] = le16_to_cpu(pkt->comp_status);
+	fw_status[1] = le32_to_cpu(((struct els_sts_entry_24xx *)pkt)->error_subcode_1);
+	fw_status[2] = le32_to_cpu(((struct els_sts_entry_24xx *)pkt)->error_subcode_2);
+
 	switch (sp->type) {
 	case SRB_ELS_CMD_RPT:
 	case SRB_ELS_CMD_HST:
+		type = "rpt hst";
+		break;
+	case SRB_ELS_CMD_HST_NOLOGIN:
 		type = "els";
+		{
+			struct els_entry_24xx *els = (void *)pkt;
+			struct qla_bsg_auth_els_request *p =
+				(struct qla_bsg_auth_els_request *)bsg_job->request;
+
+			ql_dbg(ql_dbg_user, vha, 0x700f,
+			     "%s %s. portid=%02x%02x%02x status %x xchg %x bsg ptr %p\n",
+			     __func__, sc_to_str(p->e.sub_cmd),
+			     e->d_id[2], e->d_id[1], e->d_id[0],
+			     comp_status, p->e.extra_rx_xchg_address, bsg_job);
+
+			if (!(le16_to_cpu(els->control_flags) & ECF_PAYLOAD_DESCR_MASK)) {
+				if (sp->remap.remapped) {
+					n = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+						bsg_job->reply_payload.sg_cnt,
+						sp->remap.rsp.buf,
+						sp->remap.rsp.len);
+					ql_dbg(ql_dbg_user + ql_dbg_verbose, vha, 0x700e,
+					   "%s: SG copied %x of %x\n",
+					   __func__, n, sp->remap.rsp.len);
+				} else {
+					ql_dbg(ql_dbg_user, vha, 0x700f,
+					   "%s: NOT REMAPPED (error)...!!!\n",
+					   __func__);
+				}
+			}
+		}
 		break;
 	case SRB_CT_CMD:
 		type = "ct pass-through";
@@ -2022,10 +2222,6 @@
 		return;
 	}
 
-	comp_status = fw_status[0] = le16_to_cpu(pkt->comp_status);
-	fw_status[1] = le32_to_cpu(ese->error_subcode_1);
-	fw_status[2] = le32_to_cpu(ese->error_subcode_2);
-
 	if (iocb_type == ELS_IOCB_TYPE) {
 		els = &sp->u.iocb_cmd;
 		els->u.els_plogi.fw_status[0] = cpu_to_le32(fw_status[0]);
@@ -2039,15 +2235,63 @@
 				res =  DID_OK << 16;
 				els->u.els_plogi.len = cpu_to_le16(le32_to_cpu(
 					ese->total_byte_count));
+
+				if (sp->remap.remapped &&
+				    ((u8 *)sp->remap.rsp.buf)[0] == ELS_LS_ACC) {
+					ql_dbg(ql_dbg_user, vha, 0x503f,
+					    "%s IOCB Done LS_ACC %02x%02x%02x -> %02x%02x%02x",
+					    __func__, e->s_id[0], e->s_id[2], e->s_id[1],
+					    e->d_id[2], e->d_id[1], e->d_id[0]);
+					logit = 0;
+				}
+
+			} else if (comp_status == CS_PORT_LOGGED_OUT) {
+				ql_dbg(ql_dbg_disc, vha, 0x911e,
+				       "%s %d schedule session deletion\n",
+				       __func__, __LINE__);
+
+				els->u.els_plogi.len = 0;
+				res = DID_IMM_RETRY << 16;
+				qlt_schedule_sess_for_deletion(sp->fcport);
 			} else {
 				els->u.els_plogi.len = 0;
 				res = DID_ERROR << 16;
 			}
+
+			if (sp->remap.remapped &&
+			    ((u8 *)sp->remap.rsp.buf)[0] == ELS_LS_RJT) {
+				if (logit) {
+					ql_dbg(ql_dbg_user, vha, 0x503f,
+					    "%s IOCB Done LS_RJT hdl=%x comp_status=0x%x\n",
+					    type, sp->handle, comp_status);
+
+					ql_dbg(ql_dbg_user, vha, 0x503f,
+					    "subcode 1=0x%x subcode 2=0x%x bytes=0x%x %02x%02x%02x -> %02x%02x%02x\n",
+					    fw_status[1], fw_status[2],
+					    le32_to_cpu(((struct els_sts_entry_24xx *)
+						pkt)->total_byte_count),
+					    e->s_id[0], e->s_id[2], e->s_id[1],
+					    e->d_id[2], e->d_id[1], e->d_id[0]);
+				}
+				if (sp->fcport && sp->fcport->flags & FCF_FCSP_DEVICE &&
+				    sp->type == SRB_ELS_CMD_HST_NOLOGIN) {
+					ql_dbg(ql_dbg_edif, vha, 0x911e,
+					    "%s rcv reject. Sched delete\n", __func__);
+					qlt_schedule_sess_for_deletion(sp->fcport);
+				}
+			} else if (logit) {
+				ql_log(ql_log_info, vha, 0x503f,
+				    "%s IOCB Done hdl=%x comp_status=0x%x\n",
+				    type, sp->handle, comp_status);
+				ql_log(ql_log_info, vha, 0x503f,
+				    "subcode 1=0x%x subcode 2=0x%x bytes=0x%x %02x%02x%02x -> %02x%02x%02x\n",
+				    fw_status[1], fw_status[2],
+				    le32_to_cpu(((struct els_sts_entry_24xx *)
+				    pkt)->total_byte_count),
+				    e->s_id[0], e->s_id[2], e->s_id[1],
+				    e->d_id[2], e->d_id[1], e->d_id[0]);
+			}
 		}
-		ql_dbg(ql_dbg_disc, vha, 0x503f,
-		    "ELS IOCB Done -%s hdl=%x comp_status=0x%x error subcode 1=0x%x error subcode 2=0x%x total_byte=0x%x\n",
-		    type, sp->handle, comp_status, fw_status[1], fw_status[2],
-		    le32_to_cpu(ese->total_byte_count));
 		goto els_ct_done;
 	}
 
@@ -2106,6 +2350,7 @@
 	struct srb_iocb *lio;
 	uint16_t *data;
 	uint32_t iop[2];
+	int logit = 1;
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, logio);
 	if (!sp)
@@ -2152,6 +2397,10 @@
 		if (sp->type != SRB_LOGIN_CMD)
 			goto logio_done;
 
+		lio->u.logio.iop[1] = le32_to_cpu(logio->io_parameter[5]);
+		if (le32_to_cpu(logio->io_parameter[5]) & LIO_COMM_FEAT_FCSP)
+			fcport->flags |= FCF_FCSP_DEVICE;
+
 		iop[0] = le32_to_cpu(logio->io_parameter[0]);
 		if (iop[0] & BIT_4) {
 			fcport->port_type = FCT_TARGET;
@@ -2179,9 +2428,11 @@
 	case LSC_SCODE_PORTID_USED:
 		data[0] = MBS_PORT_ID_USED;
 		data[1] = LSW(iop[1]);
+		logit = 0;
 		break;
 	case LSC_SCODE_NPORT_USED:
 		data[0] = MBS_LOOP_ID_USED;
+		logit = 0;
 		break;
 	case LSC_SCODE_CMD_FAILED:
 		if (iop[1] == 0x0606) {
@@ -2214,12 +2465,20 @@
 		break;
 	}
 
-	ql_dbg(ql_dbg_async, sp->vha, 0x5037,
-	    "Async-%s failed: handle=%x pid=%06x wwpn=%8phC comp_status=%x iop0=%x iop1=%x\n",
-	    type, sp->handle, fcport->d_id.b24, fcport->port_name,
-	    le16_to_cpu(logio->comp_status),
-	    le32_to_cpu(logio->io_parameter[0]),
-	    le32_to_cpu(logio->io_parameter[1]));
+	if (logit)
+		ql_log(ql_log_warn, sp->vha, 0x5037, "Async-%s failed: "
+		       "handle=%x pid=%06x wwpn=%8phC comp_status=%x iop0=%x iop1=%x\n",
+		       type, sp->handle, fcport->d_id.b24, fcport->port_name,
+		       le16_to_cpu(logio->comp_status),
+		       le32_to_cpu(logio->io_parameter[0]),
+		       le32_to_cpu(logio->io_parameter[1]));
+	else
+		ql_dbg(ql_dbg_disc, sp->vha, 0x5037, "Async-%s failed: "
+		       "handle=%x pid=%06x wwpn=%8phC comp_status=%x iop0=%x iop1=%x\n",
+		       type, sp->handle, fcport->d_id.b24, fcport->port_name,
+		       le16_to_cpu(logio->comp_status),
+		       le32_to_cpu(logio->io_parameter[0]),
+		       le32_to_cpu(logio->io_parameter[1]));
 
 logio_done:
 	sp->done(sp, 0);
@@ -2234,11 +2493,13 @@
 	srb_t *sp;
 	struct srb_iocb *iocb;
 	struct sts_entry_24xx *sts = (struct sts_entry_24xx *)tsk;
+	u16 comp_status;
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, tsk);
 	if (!sp)
 		return;
 
+	comp_status = le16_to_cpu(sts->comp_status);
 	iocb = &sp->u.iocb_cmd;
 	type = sp->name;
 	fcport = sp->fcport;
@@ -2252,10 +2513,11 @@
 	} else if (sts->comp_status != cpu_to_le16(CS_COMPLETE)) {
 		ql_log(ql_log_warn, fcport->vha, 0x5039,
 		    "Async-%s error - hdl=%x completion status(%x).\n",
-		    type, sp->handle, sts->comp_status);
+		    type, sp->handle, comp_status);
 		iocb->u.tmf.data = QLA_FUNCTION_FAILED;
 	} else if ((le16_to_cpu(sts->scsi_status) &
 	    SS_RESPONSE_INFO_LEN_VALID)) {
+		host_to_fcp_swap(sts->data, sizeof(sts->data));
 		if (le32_to_cpu(sts->rsp_data_len) < 4) {
 			ql_log(ql_log_warn, fcport->vha, 0x503b,
 			    "Async-%s error - hdl=%x not enough response(%d).\n",
@@ -2268,6 +2530,30 @@
 		}
 	}
 
+	switch (comp_status) {
+	case CS_PORT_LOGGED_OUT:
+	case CS_PORT_CONFIG_CHG:
+	case CS_PORT_BUSY:
+	case CS_INCOMPLETE:
+	case CS_PORT_UNAVAILABLE:
+	case CS_TIMEOUT:
+	case CS_RESET:
+		if (atomic_read(&fcport->state) == FCS_ONLINE) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+			       "-Port to be marked lost on fcport=%02x%02x%02x, current port state= %s comp_status %x.\n",
+			       fcport->d_id.b.domain, fcport->d_id.b.area,
+			       fcport->d_id.b.al_pa,
+			       port_state_str[FCS_ONLINE],
+			       comp_status);
+
+			qlt_schedule_sess_for_deletion(fcport);
+		}
+		break;
+
+	default:
+		break;
+	}
+
 	if (iocb->u.tmf.data != QLA_SUCCESS)
 		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, sp->vha, 0x5055,
 		    sts, sizeof(*sts));
@@ -2295,6 +2581,8 @@
 
 	if (unlikely(iocb->u.nvme.aen_op))
 		atomic_dec(&sp->vha->hw->nvme_active_aen_cnt);
+	else
+		sp->qpair->cmd_completion_cnt++;
 
 	if (unlikely(comp_status != CS_COMPLETE))
 		logit = 1;
@@ -2367,9 +2655,9 @@
 		tgt_xfer_len = 0;
 #endif
 		if (fd->transferred_length != tgt_xfer_len) {
-			ql_dbg(ql_dbg_io, fcport->vha, 0x3079,
-				"Dropped frame(s) detected (sent/rcvd=%u/%u).\n",
-				tgt_xfer_len, fd->transferred_length);
+			ql_log(ql_log_warn, fcport->vha, 0x3079,
+			       "Dropped frame(s) detected (sent/rcvd=%u/%u).\n",
+			       tgt_xfer_len, fd->transferred_length);
 			logit = 1;
 		} else if (le16_to_cpu(comp_status) == CS_DATA_UNDERRUN) {
 			/*
@@ -2381,7 +2669,7 @@
 	}
 
 	if (unlikely(logit))
-		ql_log(ql_log_warn, fcport->vha, 0x5060,
+		ql_dbg(ql_dbg_io, fcport->vha, 0x5060,
 		   "NVME-%s ERR Handling - hdl=%x status(%x) tr_len:%x resid=%x  ox_id=%x\n",
 		   sp->name, sp->handle, comp_status,
 		   fd->transferred_length, le32_to_cpu(sts->residual_len),
@@ -2399,6 +2687,15 @@
 	case CS_PORT_UNAVAILABLE:
 	case CS_PORT_LOGGED_OUT:
 		fcport->nvme_flag |= NVME_FLAG_RESETTING;
+		if (atomic_read(&fcport->state) == FCS_ONLINE) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+			       "Port to be marked lost on fcport=%06x, current "
+			       "port state= %s comp_status %x.\n",
+			       fcport->d_id.b24, port_state_str[FCS_ONLINE],
+			       comp_status);
+
+			qlt_schedule_sess_for_deletion(fcport);
+		}
 		fallthrough;
 	case CS_ABORTED:
 	case CS_PORT_BUSY:
@@ -2678,31 +2975,22 @@
 
 	/* check guard */
 	if (e_guard != a_guard) {
-		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x1);
-		set_driver_byte(cmd, DRIVER_SENSE);
+		scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x1);
 		set_host_byte(cmd, DID_ABORT);
-		cmd->result |= SAM_STAT_CHECK_CONDITION;
 		return 1;
 	}
 
 	/* check ref tag */
 	if (e_ref_tag != a_ref_tag) {
-		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x3);
-		set_driver_byte(cmd, DRIVER_SENSE);
+		scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x3);
 		set_host_byte(cmd, DID_ABORT);
-		cmd->result |= SAM_STAT_CHECK_CONDITION;
 		return 1;
 	}
 
 	/* check appl tag */
 	if (e_app_tag != a_app_tag) {
-		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x2);
-		set_driver_byte(cmd, DRIVER_SENSE);
+		scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x2);
 		set_host_byte(cmd, DID_ABORT);
-		cmd->result |= SAM_STAT_CHECK_CONDITION;
 		return 1;
 	}
 
@@ -2719,11 +3007,7 @@
 	uint16_t	scsi_status;
 	uint16_t thread_id;
 	uint32_t rval = EXT_STATUS_OK;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = NULL;
-#else
-	struct bsg_job *bsg_job = NULL;
-#endif
+	BSG_JOB_TYPE *bsg_job = NULL;
 	struct fc_bsg_request *bsg_request;
 	struct fc_bsg_reply *bsg_reply;
 	sts_entry_t *sts = pkt;
@@ -2890,7 +3174,7 @@
 	int logit = 1;
 	int res = 0;
 	uint16_t state_flags = 0;
-	uint16_t retry_delay = 0;
+	uint16_t sts_qual = 0;
 
 	if (IS_FWI2_CAPABLE(ha)) {
 		comp_status = le16_to_cpu(sts24->comp_status);
@@ -2936,6 +3220,7 @@
 		}
 		return;
 	}
+	qla_put_iocbs(sp->qpair, &sp->iores);
 
 	if (sp->cmd_type != TYPE_SRB) {
 		req->outstanding_cmds[handle] = NULL;
@@ -2964,6 +3249,9 @@
 	}
 
 	/* Fast path completion. */
+	qla_chk_edif_rx_sa_delete_pending(vha, sp, sts24);
+	sp->qpair->cmd_completion_cnt++;
+
 	if (comp_status == CS_COMPLETE && scsi_status == 0) {
 		qla2x00_process_completed_request(vha, req, handle);
 
@@ -2988,8 +3276,6 @@
 	sense_len = par_sense_len = rsp_info_len = resid_len =
 	    fw_resid_len = 0;
 	if (IS_FWI2_CAPABLE(ha)) {
-		u16 sts24_retry_delay = le16_to_cpu(sts24->retry_delay);
-
 		if (scsi_status & SS_SENSE_LEN_VALID)
 			sense_len = le32_to_cpu(sts24->sense_len);
 		if (scsi_status & SS_RESPONSE_INFO_LEN_VALID)
@@ -3003,13 +3289,7 @@
 		host_to_fcp_swap(sts24->data, sizeof(sts24->data));
 		ox_id = le16_to_cpu(sts24->ox_id);
 		par_sense_len = sizeof(sts24->data);
-		/* Valid values of the retry delay timer are 0x1-0xffef */
-		if (sts24_retry_delay > 0 && sts24_retry_delay < 0xfff1) {
-			retry_delay = sts24_retry_delay & 0x3fff;
-			ql_dbg(ql_dbg_io, sp->vha, 0x3033,
-			    "%s: scope=%#x retry_delay=%#x\n", __func__,
-			    sts24_retry_delay >> 14, retry_delay);
-		}
+		sts_qual = le16_to_cpu(sts24->status_qualifier);
 	} else {
 		if (scsi_status & SS_SENSE_LEN_VALID)
 			sense_len = le16_to_cpu(sts->req_sense_length);
@@ -3047,9 +3327,9 @@
 	 * Check retry_delay_timer value if we receive a busy or
 	 * queue full.
 	 */
-	if (lscsi_status == SAM_STAT_TASK_SET_FULL ||
-	    lscsi_status == SAM_STAT_BUSY)
-		qla2x00_set_retry_delay_timestamp(fcport, retry_delay);
+	if (unlikely(lscsi_status == SAM_STAT_TASK_SET_FULL ||
+		     lscsi_status == SAM_STAT_BUSY))
+		qla2x00_set_retry_delay_timestamp(fcport, sts_qual);
 
 	/*
 	 * Based on Host and scsi status generate status code for Linux
@@ -3101,9 +3381,11 @@
 		scsi_set_resid(cp, resid);
 		if (scsi_status & SS_RESIDUAL_UNDER) {
 			if (IS_FWI2_CAPABLE(ha) && fw_resid_len != resid_len) {
-				ql_dbg(ql_dbg_io, fcport->vha, 0x301d,
-				    "Dropped frame(s) detected (0x%x of 0x%x bytes).\n",
-				    resid, scsi_bufflen(cp));
+				ql_log(ql_log_warn, fcport->vha, 0x301d,
+				       "Dropped frame(s) detected (0x%x of 0x%x bytes).\n",
+				       resid, scsi_bufflen(cp));
+
+				vha->interface_err_cnt++;
 
 				res = DID_ERROR << 16 | lscsi_status;
 				goto check_scsi_status;
@@ -3126,9 +3408,11 @@
 			 * task not completed.
 			 */
 
-			ql_dbg(ql_dbg_io, fcport->vha, 0x301f,
-			    "Dropped frame(s) detected (0x%x of 0x%x bytes).\n",
-			    resid, scsi_bufflen(cp));
+			ql_log(ql_log_warn, fcport->vha, 0x301f,
+			       "Dropped frame(s) detected (0x%x of 0x%x bytes).\n",
+			       resid, scsi_bufflen(cp));
+
+			vha->interface_err_cnt++;
 
 			res = DID_ERROR << 16 | lscsi_status;
 			goto check_scsi_status;
@@ -3172,6 +3456,7 @@
 	case CS_PORT_UNAVAILABLE:
 	case CS_TIMEOUT:
 	case CS_RESET:
+	case CS_EDIF_INV_REQ:
 
 		/*
 		 * We are going to have the fc class block the rport
@@ -3212,6 +3497,7 @@
 
 	case CS_TRANSPORT:
 		res = DID_ERROR << 16;
+		vha->hw_err_cnt++;
 
 		if (!IS_PI_SPLIT_DET_CAPABLE(ha))
 			break;
@@ -3232,6 +3518,7 @@
 		ql_dump_buffer(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe0ee,
 		    pkt, sizeof(*sts24));
 		res = DID_ERROR << 16;
+		vha->hw_err_cnt++;
 		break;
 	default:
 		res = DID_ERROR << 16;
@@ -3241,14 +3528,12 @@
 out:
 	if (logit)
 		ql_dbg(ql_dbg_io, fcport->vha, 0x3022,
-		    "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu "
-		    "portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x "
-		    "rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n",
-		    comp_status, scsi_status, res, vha->host_no,
-		    cp->device->id, (u64)cp->device->lun, fcport->d_id.b.domain,
-		    fcport->d_id.b.area, fcport->d_id.b.al_pa, ox_id,
-		    cp->cmnd, scsi_bufflen(cp), rsp_info_len,
-		    resid_len, fw_resid_len, sp, cp);
+		       "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n",
+		       comp_status, scsi_status, res, vha->host_no,
+		       cp->device->id, (u64)cp->device->lun, fcport->d_id.b.domain,
+		       fcport->d_id.b.area, fcport->d_id.b.al_pa, ox_id,
+		       cp->cmnd, scsi_bufflen(cp), rsp_info_len,
+		       resid_len, fw_resid_len, sp, cp);
 
 	if (rsp->status_srb == NULL)
 		sp->done(sp, res);
@@ -3356,11 +3641,13 @@
 	default:
 		sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 		if (sp) {
+			qla_put_iocbs(sp->qpair, &sp->iores);
 			sp->done(sp, res);
 			return 0;
 		}
 		break;
 
+	case SA_UPDATE_IOCB_TYPE:
 	case ABTS_RESP_24XX:
 	case CTIO_TYPE7:
 	case CTIO_CRC2:
@@ -3415,6 +3702,7 @@
 {
 	const char func[] = "ABT_IOCB";
 	srb_t *sp;
+	srb_t *orig_sp = NULL;
 	struct srb_iocb *abt;
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
@@ -3422,7 +3710,12 @@
 		return;
 
 	abt = &sp->u.iocb_cmd;
-	abt->u.abt.comp_status = pkt->nport_handle;
+	abt->u.abt.comp_status = pkt->comp_status;
+	orig_sp = sp->cmd_sp;
+	/* Need to pass original sp */
+	if (orig_sp)
+		qla_nvme_abort_process_comp_status(pkt, orig_sp);
+
 	sp->done(sp, 0);
 }
 
@@ -3442,6 +3735,46 @@
 }
 
 /**
+ * qla_chk_cont_iocb_avail - check for all continuation iocbs are available
+ *   before iocb processing can start.
+ * @vha: host adapter pointer
+ * @rsp: respond queue
+ * @pkt: head iocb describing how many continuation iocb
+ * Return: 0 all iocbs has arrived, xx- all iocbs have not arrived.
+ */
+static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+	struct rsp_que *rsp, response_t *pkt, u32 rsp_q_in)
+{
+	int start_pkt_ring_index;
+	u32 iocb_cnt = 0;
+	int rc = 0;
+
+	if (pkt->entry_count == 1)
+		return rc;
+
+	/* ring_index was pre-increment. set it back to current pkt */
+	if (rsp->ring_index == 0)
+		start_pkt_ring_index = rsp->length - 1;
+	else
+		start_pkt_ring_index = rsp->ring_index - 1;
+
+	if (rsp_q_in < start_pkt_ring_index)
+		/* q in ptr is wrapped */
+		iocb_cnt = rsp->length - start_pkt_ring_index + rsp_q_in;
+	else
+		iocb_cnt = rsp_q_in - start_pkt_ring_index;
+
+	if (iocb_cnt < pkt->entry_count)
+		rc = -EIO;
+
+	ql_dbg(ql_dbg_init, vha, 0x5091,
+	       "%s - ring %p pkt %p entry count %d iocb_cnt %d rsp_q_in %d rc %d\n",
+	       __func__, rsp->ring, pkt, pkt->entry_count, iocb_cnt, rsp_q_in, rc);
+
+	return rc;
+}
+
+/**
  * qla24xx_process_response_queue() - Process response queue entries.
  * @vha: SCSI driver HA context
  * @rsp: response queue
@@ -3453,15 +3786,32 @@
 	struct qla_hw_data *ha = vha->hw;
 	struct purex_entry_24xx *purex_entry;
 	struct purex_item *pure_item;
+	u16 rsp_in = 0, cur_ring_index;
+	int is_shadow_hba;
 
 	if (!ha->flags.fw_started)
 		return;
 
-	if (rsp->qpair->cpuid != raw_smp_processor_id())
+	if (rsp->qpair->cpuid != raw_smp_processor_id() ||
+	    !rsp->qpair->rcv_intr) {
+		rsp->qpair->rcv_intr = 1;
 		qla_cpu_update(rsp->qpair, raw_smp_processor_id());
+	}
 
-	while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
+#define __update_rsp_in(_is_shadow_hba, _rsp, _rsp_in)			\
+	do {								\
+		_rsp_in = _is_shadow_hba ? *(_rsp)->in_ptr :		\
+				rd_reg_dword_relaxed((_rsp)->rsp_q_in);	\
+	} while (0)
+
+	is_shadow_hba = IS_SHADOW_REG_CAPABLE(ha);
+
+	__update_rsp_in(is_shadow_hba, rsp, rsp_in);
+
+	while (rsp->ring_index != rsp_in &&
+		       rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
 		pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
+		cur_ring_index = rsp->ring_index;
 
 		rsp->ring_index++;
 		if (rsp->ring_index == rsp->length) {
@@ -3573,18 +3923,43 @@
 				}
 				pure_item = qla27xx_copy_fpin_pkt(vha,
 							  (void **)&pkt, &rsp);
+				__update_rsp_in(is_shadow_hba, rsp, rsp_in);
 				if (!pure_item)
 					break;
 				qla24xx_queue_purex_item(vha, pure_item,
 						 qla27xx_process_purex_fpin);
 				break;
 
+			case ELS_AUTH_ELS:
+				if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt, rsp_in)) {
+					/*
+					 * ring_ptr and ring_index were
+					 * pre-incremented above. Reset them
+					 * back to current. Wait for next
+					 * interrupt with all IOCBs to arrive
+					 * and re-process.
+					 */
+					rsp->ring_ptr = (response_t *)pkt;
+					rsp->ring_index = cur_ring_index;
+
+					ql_dbg(ql_dbg_init, vha, 0x5091,
+					    "Defer processing ELS opcode %#x...\n",
+					    purex_entry->els_frame_payload[3]);
+					return;
+				}
+				qla24xx_auth_els(vha, (void **)&pkt, &rsp);
+				break;
 			default:
 				ql_log(ql_log_warn, vha, 0x509c,
 				       "Discarding ELS Request opcode 0x%x\n",
 				       purex_entry->els_frame_payload[3]);
 			}
 			break;
+		case SA_UPDATE_IOCB_TYPE:
+			qla28xx_sa_update_iocb_entry(vha, rsp->req,
+				(struct sa_update_28xx *)pkt);
+			break;
+
 		default:
 			/* Type Not Supported. */
 			ql_dbg(ql_dbg_async, vha, 0x5042,
@@ -3840,6 +4215,7 @@
 			    hccr);
 
 			qla2xxx_check_risc_status(vha);
+			vha->hw_err_cnt++;
 
 			ha->isp_ops->fw_dump(vha);
 			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
@@ -3908,7 +4284,7 @@
 	}
 	ha = qpair->hw;
 
-	queue_work(ha->wq, &qpair->q_work);
+	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
 
 	return IRQ_HANDLED;
 }
@@ -3934,7 +4310,7 @@
 	wrt_reg_dword(&reg->hccr, HCCRX_CLR_RISC_INT);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	queue_work(ha->wq, &qpair->q_work);
+	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
 
 	return IRQ_HANDLED;
 }
@@ -4047,10 +4423,12 @@
 	if (USER_CTRL_IRQ(ha) || !ha->mqiobase) {
 		/* user wants to control IRQ setting for target mode */
 		ret = pci_alloc_irq_vectors(ha->pdev, min_vecs,
-		    ha->msix_count, PCI_IRQ_MSIX);
+		    min((u16)ha->msix_count, (u16)(num_online_cpus() + min_vecs)),
+		    PCI_IRQ_MSIX);
 	} else
 		ret = pci_alloc_irq_vectors_affinity(ha->pdev, min_vecs,
-		    ha->msix_count, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
+		    min((u16)ha->msix_count, (u16)(num_online_cpus() + min_vecs)),
+		    PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
 		    &desc);
 #endif
 
@@ -4164,16 +4542,12 @@
 	}
 
 	/* Enable MSI-X vector for response queue update for queue 0 */
-	if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
-		if (ha->msixbase && ha->mqiobase &&
-		    (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
-		     ql2xmqsupport))
-			ha->mqenable = 1;
-	} else
-		if (ha->mqiobase &&
-		    (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
-		     ql2xmqsupport))
-			ha->mqenable = 1;
+	if (IS_MQUE_CAPABLE(ha) &&
+	    (ha->msixbase && ha->mqiobase && ha->max_qpairs))
+		ha->mqenable = 1;
+	else
+		ha->mqenable = 0;
+
 	ql_dbg(ql_dbg_multiq, vha, 0xc005,
 	    "mqiobase=%p, max_rsp_queues=%d, max_req_queues=%d.\n",
 	    ha->mqiobase, ha->max_rsp_queues, ha->max_req_queues);
@@ -4278,6 +4652,8 @@
 		ql_dbg(ql_dbg_init, vha, 0x0125,
 		    "INTa mode: Enabled.\n");
 		ha->flags.mr_intr_valid = 1;
+		/* Set max_qpair to 0, as MSI-X and MSI in not enabled */
+		ha->max_qpairs = 0;
 	}
 
 clear_risc_ints:

diff --git a/scst/qla2x00t-32gbit/qla_mbx.c b/scst/qla2x00t-32gbit/qla_mbx.c
index 53c7321..359595a 100644
--- a/scst/qla2x00t-32gbit/qla_mbx.c
+++ b/scst/qla2x00t-32gbit/qla_mbx.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_target.h"
@@ -10,6 +9,12 @@
 #include <linux/delay.h>
 #include <linux/gfp.h>
 
+#ifdef CONFIG_PPC
+#define IS_PPCARCH      true
+#else
+#define IS_PPCARCH      false
+#endif
+
 static struct mb_cmd_name {
 	uint16_t cmd;
 	const char *str;
@@ -103,7 +108,7 @@
 	int		rval, i;
 	unsigned long    flags = 0;
 	device_reg_t *reg;
-	uint8_t		abort_active;
+	uint8_t		abort_active, eeh_delay;
 	uint8_t		io_lock_on;
 	uint16_t	command = 0;
 	uint16_t	*iptr;
@@ -137,7 +142,7 @@
 		    "PCI error, exiting.\n");
 		return QLA_FUNCTION_TIMEOUT;
 	}
-
+	eeh_delay = 0;
 	reg = ha->iobase;
 	io_lock_on = base_vha->flags.init_done;
 
@@ -160,10 +165,10 @@
 	}
 
 	/* check if ISP abort is active and return cmd with timeout */
-	if ((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) ||
-	    test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) ||
-	    test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) &&
-	    !is_rom_cmd(mcp->mb[0])) {
+	if (((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) ||
+	      test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) ||
+	      test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) &&
+	      !is_rom_cmd(mcp->mb[0])) || ha->flags.eeh_busy) {
 		ql_log(ql_log_info, vha, 0x1005,
 		    "Cmd 0x%x aborted with timeout since ISP Abort is pending\n",
 		    mcp->mb[0]);
@@ -181,11 +186,16 @@
 		ql_log(ql_log_warn, vha, 0xd035,
 		    "Cmd access timeout, cmd=0x%x, Exiting.\n",
 		    mcp->mb[0]);
+		vha->hw_err_cnt++;
 		atomic_dec(&ha->num_pend_mbx_stage1);
 		return QLA_FUNCTION_TIMEOUT;
 	}
 	atomic_dec(&ha->num_pend_mbx_stage1);
-	if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) {
+	if (ha->flags.purge_mbox || chip_reset != ha->chip_reset ||
+	    ha->flags.eeh_busy) {
+		ql_log(ql_log_warn, vha, 0xd035,
+		       "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
+		       ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]);
 		rval = QLA_ABORTED;
 		goto premature_exit;
 	}
@@ -228,6 +238,8 @@
 			ql_dbg(ql_dbg_mbx, vha, 0x1112,
 			    "mbox[%d]<-0x%04x\n", cnt, *iptr);
 			wrt_reg_word(optr, *iptr);
+		} else {
+			wrt_reg_word(optr, 0);
 		}
 
 		mboxes >>= 1;
@@ -264,7 +276,15 @@
 		atomic_inc(&ha->num_pend_mbx_stage3);
 		if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
 		    mcp->tov * HZ)) {
+			ql_dbg(ql_dbg_mbx, vha, 0x117a,
+			    "cmd=%x Timeout.\n", command);
+			spin_lock_irqsave(&ha->hardware_lock, flags);
+			clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
+			spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 			if (chip_reset != ha->chip_reset) {
+				eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+
 				spin_lock_irqsave(&ha->hardware_lock, flags);
 				ha->flags.mbox_busy = 0;
 				spin_unlock_irqrestore(&ha->hardware_lock,
@@ -274,14 +294,10 @@
 				rval = QLA_ABORTED;
 				goto premature_exit;
 			}
-			ql_dbg(ql_dbg_mbx, vha, 0x117a,
-			    "cmd=%x Timeout.\n", command);
-			spin_lock_irqsave(&ha->hardware_lock, flags);
-			clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
 		} else if (ha->flags.purge_mbox ||
 		    chip_reset != ha->chip_reset) {
+			eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+
 			spin_lock_irqsave(&ha->hardware_lock, flags);
 			ha->flags.mbox_busy = 0;
 			spin_unlock_irqrestore(&ha->hardware_lock, flags);
@@ -308,6 +324,7 @@
 				atomic_dec(&ha->num_pend_mbx_stage2);
 				ql_dbg(ql_dbg_mbx, vha, 0x1012,
 				    "Pending mailbox timeout, exiting.\n");
+				vha->hw_err_cnt++;
 				rval = QLA_FUNCTION_TIMEOUT;
 				goto premature_exit;
 			}
@@ -322,6 +339,8 @@
 		while (!ha->flags.mbox_int) {
 			if (ha->flags.purge_mbox ||
 			    chip_reset != ha->chip_reset) {
+				eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+
 				spin_lock_irqsave(&ha->hardware_lock, flags);
 				ha->flags.mbox_busy = 0;
 				spin_unlock_irqrestore(&ha->hardware_lock,
@@ -419,6 +438,7 @@
 			    "mb[0-3]=[0x%x 0x%x 0x%x 0x%x] mb7 0x%x host_status 0x%x hccr 0x%x\n",
 			    command, ictrl, jiffies, mb[0], mb[1], mb[2], mb[3],
 			    mb[7], host_status, hccr);
+			vha->hw_err_cnt++;
 
 		} else {
 			mb[0] = RD_MAILBOX_REG(ha, &reg->isp, 0);
@@ -426,6 +446,7 @@
 			ql_dbg(ql_dbg_mbx + ql_dbg_buffer, vha, 0x1119,
 			    "MBX Command timeout for cmd %x, iocontrol=%x jiffies=%lx "
 			    "mb[0]=0x%x\n", command, ictrl, jiffies, mb[0]);
+			vha->hw_err_cnt++;
 		}
 		ql_dump_regs(ql_dbg_mbx + ql_dbg_buffer, vha, 0x1019);
 
@@ -498,6 +519,7 @@
 				    "mb[0]=0x%x, eeh_busy=0x%x. Scheduling ISP "
 				    "abort.\n", command, mcp->mb[0],
 				    ha->flags.eeh_busy);
+				vha->hw_err_cnt++;
 				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 				qla2xxx_wake_dpc(vha);
 			}
@@ -522,11 +544,13 @@
 				    "Mailbox cmd timeout occurred, cmd=0x%x, "
 				    "mb[0]=0x%x. Scheduling ISP abort ",
 				    command, mcp->mb[0]);
+				vha->hw_err_cnt++;
 				set_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags);
 				clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 				/* Allow next mbx cmd to come in. */
 				complete(&ha->mbx_cmd_comp);
-				if (ha->isp_ops->abort_isp(vha)) {
+				if (ha->isp_ops->abort_isp(vha) &&
+				    !ha->flags.eeh_busy) {
 					/* Failed. retry later. */
 					set_bit(ISP_ABORT_NEEDED,
 					    &vha->dpc_flags);
@@ -579,6 +603,17 @@
 		ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__);
 	}
 
+	i = 500;
+	while (i && eeh_delay && (ha->pci_error_state < QLA_PCI_SLOT_RESET)) {
+		/*
+		 * The caller of this mailbox encounter pci error.
+		 * Hold the thread until PCIE link reset complete to make
+		 * sure caller does not unmap dma while recovery is
+		 * in progress.
+		 */
+		msleep(1);
+		i--;
+	}
 	return rval;
 }
 
@@ -626,6 +661,7 @@
 		ql_dbg(ql_dbg_mbx, vha, 0x1023,
 		    "Failed=%x mb[0]=%x mb[1]=%x.\n",
 		    rval, mcp->mb[0], mcp->mb[1]);
+		vha->hw_err_cnt++;
 	} else {
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1024,
 		    "Done %s.\n", __func__);
@@ -635,6 +671,7 @@
 }
 
 #define	NVME_ENABLE_FLAG	BIT_3
+#define	EDIF_HW_SUPPORT		BIT_10
 
 /*
  * qla2x00_execute_fw
@@ -660,7 +697,7 @@
 	mbx_cmd_t *mcp = &mc;
 	u8 semaphore = 0;
 #define EXE_FW_FORCE_SEMAPHORE BIT_7
-	u8 retry = 3;
+	u8 retry = 5;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1025,
 	    "Entered %s.\n", __func__);
@@ -699,6 +736,9 @@
 				vha->min_supported_speed =
 				    nv->min_supported_speed;
 			}
+
+			if (IS_PPCARCH)
+				mcp->mb[11] |= BIT_4;
 		}
 
 		if (ha->flags.exlogins_enabled)
@@ -711,7 +751,7 @@
 			mcp->mb[11] |= EXE_FW_FORCE_SEMAPHORE;
 
 		mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11;
-		mcp->in_mb |= MBX_3 | MBX_2 | MBX_1;
+		mcp->in_mb |= MBX_5 | MBX_3 | MBX_2 | MBX_1;
 	} else {
 		mcp->mb[1] = LSW(risc_addr);
 		mcp->out_mb |= MBX_1;
@@ -735,8 +775,15 @@
 			goto again;
 		}
 
+		if (retry) {
+			retry--;
+			ql_dbg(ql_dbg_async, vha, 0x509d,
+			    "Exe FW retry: mb[0]=%x retry[%d]\n", mcp->mb[0], retry);
+			goto again;
+		}
 		ql_dbg(ql_dbg_mbx, vha, 0x1026,
 		    "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]);
+		vha->hw_err_cnt++;
 		return rval;
 	}
 
@@ -766,6 +813,12 @@
 		}
 	}
 
+	if (IS_QLA28XX(ha) && (mcp->mb[5] & EDIF_HW_SUPPORT)) {
+		ha->flags.edif_hw = 1;
+		ql_log(ql_log_info, vha, 0xffff,
+		    "%s: edif HW\n", __func__);
+	}
+
 done:
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1028,
 	    "Done %s.\n", __func__);
@@ -845,7 +898,7 @@
  * Context:
  *	Kernel context.
  */
-#define CONFIG_XLOGINS_MEM	0x3
+#define CONFIG_XLOGINS_MEM	0x9
 int
 qla_set_exlogin_mem_cfg(scsi_qla_host_t *vha, dma_addr_t phys_addr)
 {
@@ -872,8 +925,9 @@
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
 	if (rval != QLA_SUCCESS) {
-		/*EMPTY*/
-		ql_dbg(ql_dbg_mbx, vha, 0x111b, "Failed=%x.\n", rval);
+		ql_dbg(ql_dbg_mbx, vha, 0x111b,
+		       "EXlogin Failed=%x. MB0=%x MB11=%x\n",
+		       rval, mcp->mb[0], mcp->mb[11]);
 	} else {
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x118c,
 		    "Done %s.\n", __func__);
@@ -1092,6 +1146,21 @@
 			    "%s: FC-NVMe is Enabled (0x%x)\n",
 			     __func__, ha->fw_attributes_h);
 		}
+
+		/* BIT_13 of Extended FW Attributes informs about NVMe2 support */
+		if (ha->fw_attributes_ext[0] & FW_ATTR_EXT0_NVME2) {
+			ql_log(ql_log_info, vha, 0xd302,
+			       "Firmware supports NVMe2 0x%x\n",
+			       ha->fw_attributes_ext[0]);
+			vha->flags.nvme2_enabled = 1;
+		}
+
+		if (IS_QLA28XX(ha) && ha->flags.edif_hw && ql2xsecenable &&
+		    (ha->fw_attributes_ext[0] & FW_ATTR_EXT0_EDIF)) {
+			ha->flags.edif_enabled = 1;
+			ql_log(ql_log_info, vha, 0xffff,
+			       "%s: edif is enabled\n", __func__);
+		}
 	}
 
 	if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
@@ -1121,12 +1190,18 @@
 		if (ha->flags.scm_supported_a &&
 		    (ha->fw_attributes_ext[0] & FW_ATTR_EXT0_SCM_SUPPORTED)) {
 			ha->flags.scm_supported_f = 1;
-			memset(ha->sf_init_cb, 0, sizeof(struct init_sf_cb));
-			ha->sf_init_cb->flags |= BIT_13;
+			ha->sf_init_cb->flags |= cpu_to_le16(BIT_13);
 		}
 		ql_log(ql_log_info, vha, 0x11a3, "SCM in FW: %s\n",
 		       (ha->flags.scm_supported_f) ? "Supported" :
 		       "Not Supported");
+
+		if (vha->flags.nvme2_enabled) {
+			/* set BIT_15 of special feature control block for SLER */
+			ha->sf_init_cb->flags |= cpu_to_le16(BIT_15);
+			/* set BIT_14 of special feature control block for PI CTRL*/
+			ha->sf_init_cb->flags |= cpu_to_le16(BIT_14);
+		}
 	}
 
 failed:
@@ -1299,6 +1374,7 @@
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		ql_dbg(ql_dbg_mbx, vha, 0x1033, "Failed=%x.\n", rval);
+		vha->hw_err_cnt++;
 	} else {
 		/*EMPTY*/
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1034,
@@ -1636,10 +1712,8 @@
 		mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
 	if (IS_FWI2_CAPABLE(vha->hw))
 		mcp->in_mb |= MBX_19|MBX_18|MBX_17|MBX_16;
-	if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) {
-		mcp->in_mb |= MBX_15;
-		mcp->out_mb |= MBX_7|MBX_21|MBX_22|MBX_23;
-	}
+	if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw))
+		mcp->in_mb |= MBX_15|MBX_21|MBX_22|MBX_23;
 
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
@@ -1822,7 +1896,7 @@
 		mcp->out_mb |= MBX_14|MBX_13|MBX_12|MBX_11|MBX_10;
 	}
 
-	if (ha->flags.scm_supported_f) {
+	if (ha->flags.scm_supported_f || vha->flags.nvme2_enabled) {
 		mcp->mb[1] |= BIT_1;
 		mcp->mb[16] = MSW(ha->sf_init_cb_dma);
 		mcp->mb[17] = LSW(ha->sf_init_cb_dma);
@@ -2972,8 +3046,7 @@
 		ha->orig_fw_iocb_count = mcp->mb[10];
 		if (ha->flags.npiv_supported)
 			ha->max_npiv_vports = mcp->mb[11];
-		if (IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
-		    IS_QLA28XX(ha))
+		if (IS_QLA81XX(ha) || IS_QLA83XX(ha))
 			ha->fw_max_fcf_count = mcp->mb[12];
 	}
 
@@ -2995,7 +3068,8 @@
  *	Kernel context.
  */
 int
-qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
+qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map,
+		u8 *num_entries)
 {
 	int rval;
 	mbx_cmd_t mc;
@@ -3035,6 +3109,8 @@
 
 		if (pos_map)
 			memcpy(pos_map, pmap, FCAL_MAP_SIZE);
+		if (num_entries)
+			*num_entries = pmap[0];
 	}
 	dma_pool_free(ha->s_dma_pool, pmap, pmap_dma);
 
@@ -3177,7 +3253,7 @@
 	fc_port_t	*fcport = sp->fcport;
 	struct scsi_qla_host *vha = fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = vha->req;
+	struct req_que *req;
 	struct qla_qpair *qpair = sp->qpair;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x108c,
@@ -3186,7 +3262,7 @@
 	if (sp->qpair)
 		req = sp->qpair->req;
 	else
-		return QLA_FUNCTION_FAILED;
+		return QLA_ERR_NO_QPAIR;
 
 	if (ql2xasynctmfenable)
 		return qla24xx_async_abort_command(sp);
@@ -3199,7 +3275,7 @@
 	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 	if (handle == req->num_outstanding_cmds) {
 		/* Command not found. */
-		return QLA_FUNCTION_FAILED;
+		return QLA_ERR_NOT_FOUND;
 	}
 
 	abt = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &abt_dma);
@@ -3220,6 +3296,8 @@
 	abt->vp_index = fcport->vha->vp_idx;
 
 	abt->req_que_no = cpu_to_le16(req->id);
+	/* Need to pass original sp */
+	qla_nvme_abort_set_option(abt, sp);
 
 	rval = qla2x00_issue_iocb(vha, abt, abt_dma, 0);
 	if (rval != QLA_SUCCESS) {
@@ -3242,6 +3320,10 @@
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1091,
 		    "Done %s.\n", __func__);
 	}
+	if (rval == QLA_SUCCESS)
+		qla_nvme_abort_process_comp_status(abt, sp);
+
+	qla_wait_nvme_release_cmd_kref(sp);
 
 	dma_pool_free(ha->s_dma_pool, abt, abt_dma);
 
@@ -3979,10 +4061,12 @@
 
 			if (fcport) {
 				fcport->plogi_nack_done_deadline = jiffies + HZ;
-				fcport->dm_login_expire = jiffies + 2*HZ;
+				fcport->dm_login_expire = jiffies +
+					QLA_N2N_WAIT_TIME * HZ;
 				fcport->scan_state = QLA_FCPORT_FOUND;
 				fcport->n2n_flag = 1;
 				fcport->keep_nport_handle = 1;
+				fcport->login_retry = vha->hw->login_retry_count;
 				fcport->fc4_type = FS_FC4TYPE_FCP;
 				if (vha->flags.nvme_enabled)
 					fcport->fc4_type |= FS_FC4TYPE_NVME;
@@ -4015,7 +4099,6 @@
 
 			set_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags);
 			return;
-			break;
 		case TOPO_FL:
 			ha->current_topology = ISP_CFG_FL;
 			break;
@@ -4124,6 +4207,16 @@
 				rptid_entry->u.f2.remote_nport_id[1];
 			fcport->d_id.b.al_pa =
 				rptid_entry->u.f2.remote_nport_id[0];
+
+			/*
+			 * For the case where remote port sending PRLO, FW
+			 * sends up RIDA Format 2 as an indication of session
+			 * loss. In other word, FW state change from PRLI
+			 * complete back to PLOGI complete. Delete the
+			 * session and let relogin drive the reconnect.
+			 */
+			if (atomic_read(&fcport->state) == FCS_ONLINE)
+				qlt_schedule_sess_for_deletion(fcport);
 		}
 	}
 }
@@ -4265,7 +4358,8 @@
 	if (MSW(addr) || IS_FWI2_CAPABLE(vha->hw)) {
 		mcp->mb[0] = MBC_DUMP_RISC_RAM_EXTENDED;
 		mcp->mb[8] = MSW(addr);
-		mcp->out_mb = MBX_8|MBX_0;
+		mcp->mb[10] = 0;
+		mcp->out_mb = MBX_10|MBX_8|MBX_0;
 	} else {
 		mcp->mb[0] = MBC_DUMP_RISC_RAM;
 		mcp->out_mb = MBX_0;
@@ -4897,7 +4991,7 @@
 	return rval;
 }
 
-#define PUREX_CMD_COUNT	2
+#define PUREX_CMD_COUNT	4
 int
 qla25xx_set_els_cmds_supported(scsi_qla_host_t *vha)
 {
@@ -4905,6 +4999,7 @@
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 	uint8_t *els_cmd_map;
+	uint8_t active_cnt = 0;
 	dma_addr_t els_cmd_map_dma;
 	uint8_t cmd_opcode[PUREX_CMD_COUNT];
 	uint8_t i, index, purex_bit;
@@ -4926,10 +5021,20 @@
 	}
 
 	/* List of Purex ELS */
-	cmd_opcode[0] = ELS_FPIN;
-	cmd_opcode[1] = ELS_RDP;
+	if (ql2xrdpenable) {
+		cmd_opcode[active_cnt] = ELS_RDP;
+		active_cnt++;
+	}
+	if (ha->flags.scm_supported_f) {
+		cmd_opcode[active_cnt] = ELS_FPIN;
+		active_cnt++;
+	}
+	if (ha->flags.edif_enabled) {
+		cmd_opcode[active_cnt] = ELS_AUTH_ELS;
+		active_cnt++;
+	}
 
-	for (i = 0; i < PUREX_CMD_COUNT; i++) {
+	for (i = 0; i < active_cnt; i++) {
 		index = cmd_opcode[i] / 8;
 		purex_bit = cmd_opcode[i] % 8;
 		els_cmd_map[index] |= 1 << purex_bit;
@@ -4962,45 +5067,6 @@
 	return rval;
 }
 
-int
-qla24xx_get_buffer_credits(scsi_qla_host_t *vha, struct buffer_credit_24xx *bbc,
-	dma_addr_t bbc_dma)
-{
-	mbx_cmd_t mc;
-	mbx_cmd_t *mcp = &mc;
-	int rval;
-
-	if (!IS_FWI2_CAPABLE(vha->hw))
-		return QLA_FUNCTION_FAILED;
-
-	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x118e,
-	    "Entered %s.\n", __func__);
-
-	mcp->mb[0] = MBC_GET_RNID_PARAMS;
-	mcp->mb[1] = RNID_BUFFER_CREDITS << 8;
-	mcp->mb[2] = MSW(LSD(bbc_dma));
-	mcp->mb[3] = LSW(LSD(bbc_dma));
-	mcp->mb[6] = MSW(MSD(bbc_dma));
-	mcp->mb[7] = LSW(MSD(bbc_dma));
-	mcp->mb[8] = sizeof(*bbc) / sizeof(*bbc->parameter);
-	mcp->out_mb = MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
-	mcp->in_mb = MBX_1|MBX_0;
-	mcp->buf_size = sizeof(*bbc);
-	mcp->flags = MBX_DMA_IN;
-	mcp->tov = MBX_TOV_SECONDS;
-	rval = qla2x00_mailbox_command(vha, mcp);
-
-	if (rval != QLA_SUCCESS) {
-		ql_dbg(ql_dbg_mbx, vha, 0x118f,
-		    "Failed=%x mb[0]=%x,%x.\n", rval, mcp->mb[0], mcp->mb[1]);
-	} else {
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1190,
-		    "Done %s.\n", __func__);
-	}
-
-	return rval;
-}
-
 static int
 qla2x00_read_asic_temperature(scsi_qla_host_t *vha, uint16_t *temp)
 {
@@ -5574,7 +5640,7 @@
 	mcp->out_mb = MBX_1|MBX_0;
 	mcp->in_mb = MBX_2|MBX_1|MBX_0;
 	if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
-		mcp->in_mb |= MBX_3;
+		mcp->in_mb |= MBX_4|MBX_3;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -6410,6 +6476,54 @@
 	return rval;
 }
 
+int
+qla26xx_dport_diagnostics_v2(scsi_qla_host_t *vha,
+			     struct qla_dport_diag_v2 *dd,  mbx_cmd_t *mcp)
+{
+	int rval;
+	dma_addr_t dd_dma;
+	uint size = sizeof(dd->buf);
+	uint16_t options = dd->options;
+
+	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x119f,
+	       "Entered %s.\n", __func__);
+
+	dd_dma = dma_map_single(&vha->hw->pdev->dev,
+				dd->buf, size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&vha->hw->pdev->dev, dd_dma)) {
+		ql_log(ql_log_warn, vha, 0x1194,
+		       "Failed to map dma buffer.\n");
+		return QLA_MEMORY_ALLOC_FAILED;
+	}
+
+	memset(dd->buf, 0, size);
+
+	mcp->mb[0] = MBC_DPORT_DIAGNOSTICS;
+	mcp->mb[1] = options;
+	mcp->mb[2] = MSW(LSD(dd_dma));
+	mcp->mb[3] = LSW(LSD(dd_dma));
+	mcp->mb[6] = MSW(MSD(dd_dma));
+	mcp->mb[7] = LSW(MSD(dd_dma));
+	mcp->mb[8] = size;
+	mcp->out_mb = MBX_8 | MBX_7 | MBX_6 | MBX_3 | MBX_2 | MBX_1 | MBX_0;
+	mcp->in_mb = MBX_3 | MBX_2 | MBX_1 | MBX_0;
+	mcp->buf_size = size;
+	mcp->flags = MBX_DMA_IN;
+	mcp->tov = MBX_TOV_SECONDS * 4;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0x1195, "Failed=%x.\n", rval);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1196,
+		       "Done %s.\n", __func__);
+	}
+
+	dma_unmap_single(&vha->hw->pdev->dev, dd_dma, size, DMA_FROM_DEVICE);
+
+	return rval;
+}
+
 static void qla2x00_async_mb_sp_done(srb_t *sp, int res)
 {
 	sp->u.iocb_cmd.u.mbx.rc = res;
@@ -6432,23 +6546,21 @@
 	if (!vha->hw->flags.fw_started)
 		goto done;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
-	sp->type = SRB_MB_IOCB;
-	sp->name = mb_to_str(mcp->mb[0]);
-
 	c = &sp->u.iocb_cmd;
-	c->timeout = qla2x00_async_iocb_timeout;
 	init_completion(&c->u.mbx.comp);
 
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	sp->type = SRB_MB_IOCB;
+	sp->name = mb_to_str(mcp->mb[0]);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_mb_sp_done);
 
 	memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
 
-	sp->done = qla2x00_async_mb_sp_done;
-
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_mbx, vha, 0x1018,
@@ -6480,7 +6592,8 @@
 	}
 
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -6578,6 +6691,12 @@
 	fcport->d_id.b.al_pa = pd->port_id[2];
 	fcport->d_id.b.rsvd_1 = 0;
 
+	ql_dbg(ql_dbg_disc, vha, 0x2062,
+	     "%8phC SVC Param w3 %02x%02x",
+	     fcport->port_name,
+	     pd->prli_svc_param_word_3[1],
+	     pd->prli_svc_param_word_3[0]);
+
 	if (NVME_TARGET(vha->hw, fcport)) {
 		fcport->port_type = FCT_NVME;
 		if ((pd->prli_svc_param_word_3[0] & BIT_5) == 0)
@@ -6929,3 +7048,63 @@
 
 	return rval;
 }
+
+/**
+ * qla_no_op_mb(): This MB is used to check if FW is still alive and
+ * able to generate an interrupt. Otherwise, a timeout will trigger
+ * FW dump + reset
+ * @vha: host adapter pointer
+ * Return: None
+ */
+void qla_no_op_mb(struct scsi_qla_host *vha)
+{
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+	int rval;
+
+	memset(&mc, 0, sizeof(mc));
+	mcp->mb[0] = 0; // noop cmd= 0
+	mcp->out_mb = MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = 5;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval) {
+		ql_dbg(ql_dbg_async, vha, 0x7071,
+			"Failed %s %x\n", __func__, rval);
+	}
+}
+
+int qla_mailbox_passthru(scsi_qla_host_t *vha,
+			 uint16_t *mbx_in, uint16_t *mbx_out)
+{
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+	int rval = -EINVAL;
+
+	memset(&mc, 0, sizeof(mc));
+	/* Receiving all 32 register's contents */
+	memcpy(&mcp->mb, (char *)mbx_in, (32 * sizeof(uint16_t)));
+
+	mcp->out_mb = 0xFFFFFFFF;
+	mcp->in_mb = 0xFFFFFFFF;
+
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	mcp->bufp = NULL;
+
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xf0a2,
+			"Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0xf0a3, "Done %s.\n",
+		       __func__);
+		/* passing all 32 register's contents */
+		memcpy(mbx_out, &mcp->mb, 32 * sizeof(uint16_t));
+	}
+
+	return rval;
+}

diff --git a/scst/qla2x00t-32gbit/qla_mid.c b/scst/qla2x00t-32gbit/qla_mid.c
index 15efe2f..16a9f22 100644
--- a/scst/qla2x00t-32gbit/qla_mid.c
+++ b/scst/qla2x00t-32gbit/qla_mid.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_gbl.h"
@@ -66,7 +65,7 @@
 	uint16_t vp_id;
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags = 0;
-	u8 i;
+	u32 i, bailout;
 
 	mutex_lock(&ha->vport_lock);
 	/*
@@ -76,21 +75,29 @@
 	 * ensures no active vp_list traversal while the vport is removed
 	 * from the queue)
 	 */
-	for (i = 0; i < 10; i++) {
-		if (wait_event_timeout(vha->vref_waitq,
-		    !atomic_read(&vha->vref_count), HZ) > 0)
-			break;
-	}
+	bailout = 0;
+	for (i = 0; i < 500; i++) {
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		if (atomic_read(&vha->vref_count) == 0) {
+			list_del(&vha->list);
+			qlt_update_vp_map(vha, RESET_VP_IDX);
+			bailout = 1;
+		}
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	if (atomic_read(&vha->vref_count)) {
-		ql_dbg(ql_dbg_vport, vha, 0xfffa,
-		    "vha->vref_count=%u timeout\n", vha->vref_count.counter);
-		vha->vref_count = (atomic_t)ATOMIC_INIT(0);
+		if (bailout)
+			break;
+		else
+			msleep(20);
 	}
-	list_del(&vha->list);
-	qlt_update_vp_map(vha, RESET_VP_IDX);
-	spin_unlock_irqrestore(&ha->vport_slock, flags);
+	if (!bailout) {
+		ql_log(ql_log_info, vha, 0xfffa,
+			"vha->vref_count=%u timeout\n", vha->vref_count.counter);
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		list_del(&vha->list);
+		qlt_update_vp_map(vha, RESET_VP_IDX);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+	}
 
 	vp_id = vha->vp_idx;
 	ha->num_vhosts--;
@@ -159,6 +166,14 @@
 	int ret = QLA_SUCCESS;
 	fc_port_t *fcport;
 
+	if (vha->hw->flags.edif_enabled) {
+		if (DBELL_ACTIVE(vha))
+			qla2x00_post_aen_work(vha, FCH_EVT_VENDOR_UNIQUE,
+			    FCH_EVT_VENDOR_UNIQUE_VPORT_DOWN);
+		/* delete sessions and flush sa_indexes */
+		qla2x00_wait_for_sess_deletion(vha);
+	}
+
 	if (vha->hw->flags.fw_started)
 		ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
 
@@ -167,7 +182,8 @@
 	list_for_each_entry(fcport, &vha->vp_fcports, list)
 		fcport->logout_on_delete = 0;
 
-	qla2x00_mark_all_devices_lost(vha);
+	if (!vha->hw->flags.edif_enabled)
+		qla2x00_wait_for_sess_deletion(vha);
 
 	/* Remove port id from vp target map */
 	spin_lock_irqsave(&vha->hw->hardware_lock, flags);
@@ -258,13 +274,13 @@
 void
 qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 {
-	scsi_qla_host_t *vha;
+	scsi_qla_host_t *vha, *tvp;
 	struct qla_hw_data *ha = rsp->hw;
 	int i = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vha, &ha->vp_list, list) {
+	list_for_each_entry_safe(vha, tvp, &ha->vp_list, list) {
 		if (vha->vp_idx) {
 			if (test_bit(VPORT_DELETE, &vha->dpc_flags))
 				continue;
@@ -417,7 +433,7 @@
 qla2x00_do_dpc_all_vps(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	scsi_qla_host_t *vp;
+	scsi_qla_host_t *vp, *tvp;
 	unsigned long flags = 0;
 
 	if (vha->vp_idx)
@@ -431,7 +447,7 @@
 		return;
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		if (vp->vp_idx) {
 			atomic_inc(&vp->vref_count);
 			spin_unlock_irqrestore(&ha->vport_slock, flags);
@@ -579,7 +595,6 @@
 	}
 	kfree(req->outstanding_cmds);
 	kfree(req);
-	req = NULL;
 }
 
 static void
@@ -605,7 +620,6 @@
 		mutex_unlock(&ha->vport_lock);
 	}
 	kfree(rsp);
-	rsp = NULL;
 }
 
 int
@@ -808,11 +822,9 @@
 {
 	unsigned long flags;
 	struct qla_qpair *qpair = container_of(work, struct qla_qpair, q_work);
-	struct scsi_qla_host *vha;
-	struct qla_hw_data *ha = qpair->hw;
+	struct scsi_qla_host *vha = qpair->vha;
 
 	spin_lock_irqsave(&qpair->qp_lock, flags);
-	vha = pci_get_drvdata(ha->pdev);
 	qla24xx_process_response_queue(vha, qpair->rsp);
 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
 
@@ -955,6 +967,7 @@
 	if (vp_index == 0 || vp_index >= ha->max_npiv_vports)
 		return QLA_PARAMETER_ERROR;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(base_vha, NULL, GFP_KERNEL);
 	if (!sp)
 		return rval;
@@ -962,9 +975,8 @@
 	sp->type = SRB_CTRL_VP;
 	sp->name = "ctrl_vp";
 	sp->comp = &comp;
-	sp->done = qla_ctrlvp_sp_done;
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla_ctrlvp_sp_done);
 	sp->u.iocb_cmd.u.ctrlvp.cmd = cmd;
 	sp->u.iocb_cmd.u.ctrlvp.vp_index = vp_index;
 
@@ -998,6 +1010,7 @@
 		break;
 	}
 done:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	return rval;
 }

diff --git a/scst/qla2x00t-32gbit/qla_mr.c b/scst/qla2x00t-32gbit/qla_mr.c
index e2d854f..84c129f 100644
--- a/scst/qla2x00t-32gbit/qla_mr.c
+++ b/scst/qla2x00t-32gbit/qla_mr.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include <linux/delay.h>
@@ -517,7 +516,7 @@
 }
 
 /**
- * qlafx00_warm_reset() - Perform warm reset of iSA(CPUs being reset on SOC).
+ * qlafx00_soc_cpu_reset() - Perform warm reset of iSA(CPUs being reset on SOC).
  * @vha: HA context
  *
  */
@@ -740,29 +739,6 @@
 }
 
 int
-qlafx00_loop_reset(scsi_qla_host_t *vha)
-{
-	int ret;
-	struct fc_port *fcport;
-	struct qla_hw_data *ha = vha->hw;
-
-	if (ql2xtargetreset) {
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (fcport->port_type != FCT_TARGET)
-				continue;
-
-			ret = ha->isp_ops->target_reset(fcport, 0, 0);
-			if (ret != QLA_SUCCESS) {
-				ql_dbg(ql_dbg_taskm, vha, 0x803d,
-				    "Bus Reset failed: Reset=%d "
-				    "d_id=%x.\n", ret, fcport->d_id.b24);
-			}
-		}
-	}
-	return QLA_SUCCESS;
-}
-
-int
 qlafx00_iospace_config(struct qla_hw_data *ha)
 {
 	if (pci_request_selected_regions(ha->pdev, ha->bars,
@@ -1821,17 +1797,18 @@
 	struct register_host_info *preg_hsi;
 	struct new_utsname *p_sysid = NULL;
 
+	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
 	sp->type = SRB_FXIOCB_DCMD;
 	sp->name = "fxdisc";
+	qla2x00_init_async_sp(sp, FXDISC_TIMEOUT,
+			      qla2x00_fxdisc_sp_done);
+	sp->u.iocb_cmd.timeout = qla2x00_fxdisc_iocb_timeout;
 
 	fdisc = &sp->u.iocb_cmd;
-	fdisc->timeout = qla2x00_fxdisc_iocb_timeout;
-	qla2x00_init_timer(sp, FXDISC_TIMEOUT);
-
 	switch (fx_type) {
 	case FXDISC_GET_CONFIG_INFO:
 	fdisc->u.fxiocb.flags =
@@ -1932,7 +1909,6 @@
 	}
 
 	fdisc->u.fxiocb.req_func_type = cpu_to_le16(fx_type);
-	sp->done = qla2x00_fxdisc_sp_done;
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS)
@@ -2008,7 +1984,8 @@
 		dma_free_coherent(&ha->pdev->dev, fdisc->u.fxiocb.req_len,
 		    fdisc->u.fxiocb.req_addr, fdisc->u.fxiocb.req_dma_handle);
 done_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	return rval;
 }
@@ -2216,11 +2193,7 @@
 {
 	const char func[] = "IOSB_IOCB";
 	srb_t *sp;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job;
-#else
-	struct bsg_job *bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
 	struct srb_iocb *iocb_job;
 	int res = 0;
@@ -2876,7 +2849,7 @@
 }
 
 /**
- * qlafx00x_mbx_completion() - Process mailbox command completions.
+ * qlafx00_mbx_completion() - Process mailbox command completions.
  * @vha: SCSI driver HA context
  * @mb0: value to be written into mailbox register 0
  */
@@ -3258,11 +3231,7 @@
 {
 	struct srb_iocb *fxio = &sp->u.iocb_cmd;
 	struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job;
-#else
-	struct bsg_job *bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job;
 	struct fc_bsg_request *bsg_request;
 	struct fxdisc_entry_fx00 fx_iocb;
 	uint8_t entry_cnt = 1;

diff --git a/scst/qla2x00t-32gbit/qla_mr.h b/scst/qla2x00t-32gbit/qla_mr.h
index 968fc76..4f63aff 100644
--- a/scst/qla2x00t-32gbit/qla_mr.h
+++ b/scst/qla2x00t-32gbit/qla_mr.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_MR_H
 #define __QLA_MR_H
@@ -177,6 +176,10 @@
 	uint8_t flags;
 	uint8_t reserved_1;
 
+	/*
+	 * Use array size 1 below to prevent that Coverity complains about
+	 * the append_dsd64() calls for the two arrays below.
+	 */
 	struct dsd64 dseg_rq[1];
 	struct dsd64 dseg_rsp[1];
 

diff --git a/scst/qla2x00t-32gbit/qla_nvme.c b/scst/qla2x00t-32gbit/qla_nvme.c
index a4a06e5..9c5ccfd 100644
--- a/scst/qla2x00t-32gbit/qla_nvme.c
+++ b/scst/qla2x00t-32gbit/qla_nvme.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2017 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #include <linux/version.h>
@@ -14,6 +13,8 @@
 #include <linux/delay.h>
 #include <linux/nvme.h>
 #include <linux/nvme-fc.h>
+#include <linux/blk-mq-pci.h>
+#include <linux/blk-mq.h>
 
 static struct nvme_fc_port_template qla_nvme_fc_transport;
 
@@ -50,9 +51,11 @@
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
 	/*
 	 * See also commit 9dd9686b1419 ("scsi: qla2xxx: Add changes for
-	 * devloss timeout in driver") # v4.17.
+	 * devloss timeout in driver") # v4.17. See also commit dd8d0bf6fb72
+	 * ("scsi: qla2xxx: Fix I/O failures during remote port toggle
+	 * testing") # v5.10.
 	 */
-	req.dev_loss_tmo = NVME_FC_DEV_LOSS_TMO;
+	req.dev_loss_tmo = fcport->dev_loss_tmo;
 #endif
 
 	if (fcport->nvme_prli_service_param & NVME_PRLI_SP_INITIATOR)
@@ -80,6 +83,19 @@
 		return ret;
 	}
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
+	nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port,
+				       fcport->dev_loss_tmo);
+#endif
+
+	if (fcport->nvme_prli_service_param & NVME_PRLI_SP_SLER)
+		ql_log(ql_log_info, vha, 0x212a,
+		       "PortID:%06x Supports SLER\n", req.port_id);
+
+	if (fcport->nvme_prli_service_param & NVME_PRLI_SP_PI_CTRL)
+		ql_log(ql_log_info, vha, 0x212b,
+		       "PortID:%06x Supports PI control\n", req.port_id);
+
 	rport = fcport->nvme_remote_port->private;
 	rport->fcport = fcport;
 
@@ -95,8 +111,9 @@
 	struct qla_hw_data *ha;
 	struct qla_qpair *qpair;
 
-	if (!qidx)
-		qidx++;
+	/* Map admin queue and 1st IO queue to index 0 */
+	if (qidx)
+		qidx--;
 
 	vha = (struct scsi_qla_host *)lport->private;
 	ha = vha->hw;
@@ -112,19 +129,24 @@
 		return -EINVAL;
 	}
 
-	if (ha->queue_pair_map[qidx]) {
-		*handle = ha->queue_pair_map[qidx];
-		ql_log(ql_log_info, vha, 0x2121,
-		    "Returning existing qpair of %p for idx=%x\n",
-		    *handle, qidx);
-		return 0;
-	}
+	/* Use base qpair if max_qpairs is 0 */
+	if (!ha->max_qpairs) {
+		qpair = ha->base_qpair;
+	} else {
+		if (ha->queue_pair_map[qidx]) {
+			*handle = ha->queue_pair_map[qidx];
+			ql_log(ql_log_info, vha, 0x2121,
+			       "Returning existing qpair of %p for idx=%x\n",
+			       *handle, qidx);
+			return 0;
+		}
 
-	qpair = qla2xxx_create_qpair(vha, 5, vha->vp_idx, true);
-	if (qpair == NULL) {
-		ql_log(ql_log_warn, vha, 0x2122,
-		    "Failed to allocate qpair\n");
-		return -EINVAL;
+		qpair = qla2xxx_create_qpair(vha, 5, vha->vp_idx, true);
+		if (!qpair) {
+			ql_log(ql_log_warn, vha, 0x2122,
+			       "Failed to allocate qpair\n");
+			return -EINVAL;
+		}
 	}
 	*handle = qpair;
 
@@ -163,6 +185,18 @@
 	qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
 
+static void qla_nvme_ls_unmap(struct srb *sp, struct nvmefc_ls_req *fd)
+{
+	if (sp->flags & SRB_DMA_VALID) {
+		struct srb_iocb *nvme = &sp->u.iocb_cmd;
+		struct qla_hw_data *ha = sp->fcport->vha->hw;
+
+		dma_unmap_single(&ha->pdev->dev, nvme->u.nvme.cmd_dma,
+				 fd->rqstlen, DMA_TO_DEVICE);
+		sp->flags &= ~SRB_DMA_VALID;
+	}
+}
+
 static void qla_nvme_release_ls_cmd_kref(struct kref *kref)
 {
 	struct srb *sp = container_of(kref, struct srb, cmd_kref);
@@ -179,6 +213,8 @@
 	spin_unlock_irqrestore(&priv->cmd_lock, flags);
 
 	fd = priv->fd;
+
+	qla_nvme_ls_unmap(sp, fd);
 	fd->done(fd, priv->comp_status);
 out:
 	qla2x00_rel_sp(sp);
@@ -225,13 +261,15 @@
 	srb_t *sp = priv->sp;
 	fc_port_t *fcport = sp->fcport;
 	struct qla_hw_data *ha = fcport->vha->hw;
-	int rval;
+	int rval, abts_done_called = 1;
+	bool io_wait_for_abort_done;
+	uint32_t handle;
 
 	ql_dbg(ql_dbg_io, fcport->vha, 0xffff,
-	       "%s called for sp=%p, hndl=%x on fcport=%p deleted=%d\n",
-	       __func__, sp, sp->handle, fcport, fcport->deleted);
+	       "%s called for sp=%p, hndl=%x on fcport=%p desc=%p deleted=%d\n",
+	       __func__, sp, sp->handle, fcport, sp->u.iocb_cmd.u.nvme.desc, fcport->deleted);
 
-	if (!ha->flags.fw_started && fcport->deleted)
+	if (!ha->flags.fw_started || fcport->deleted == QLA_SESS_DELETED)
 		goto out;
 
 	if (ha->flags.host_shutting_down) {
@@ -242,13 +280,36 @@
 		goto out;
 	}
 
+	/*
+	 * sp may not be valid after abort_command if return code is either
+	 * SUCCESS or ERR_FROM_FW codes, so cache the value here.
+	 */
+	io_wait_for_abort_done = ql2xabts_wait_nvme &&
+					QLA_ABTS_WAIT_ENABLED(sp);
+	handle = sp->handle;
+
 	rval = ha->isp_ops->abort_command(sp);
 
 	ql_dbg(ql_dbg_io, fcport->vha, 0x212b,
 	    "%s: %s command for sp=%p, handle=%x on fcport=%p rval=%x\n",
 	    __func__, (rval != QLA_SUCCESS) ? "Failed to abort" : "Aborted",
-	    sp, sp->handle, fcport, rval);
+	    sp, handle, fcport, rval);
 
+	/*
+	 * If async tmf is enabled, the abort callback is called only on
+	 * return codes QLA_SUCCESS and QLA_ERR_FROM_FW.
+	 */
+	if (ql2xasynctmfenable &&
+	    rval != QLA_SUCCESS && rval != QLA_ERR_FROM_FW)
+		abts_done_called = 0;
+
+	/*
+	 * Returned before decreasing kref so that I/O requests
+	 * are waited until ABTS complete. This kref is decreased
+	 * at qla24xx_abort_sp_done function.
+	 */
+	if (abts_done_called && io_wait_for_abort_done)
+		return;
 out:
 	/* kref_get was done before work was schedule. */
 	kref_put(&sp->cmd_kref, sp->put_fn);
@@ -288,8 +349,7 @@
 	struct qla_hw_data *ha;
 	srb_t           *sp;
 
-
-	if (!fcport || (fcport && fcport->deleted))
+	if (!fcport || fcport->deleted)
 		return rval;
 
 	vha = fcport->vha;
@@ -325,6 +385,8 @@
 	dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma,
 	    fd->rqstlen, DMA_TO_DEVICE);
 
+	sp->flags |= SRB_DMA_VALID;
+
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x700e,
@@ -332,6 +394,7 @@
 		wake_up(&sp->nvme_ls_waitq);
 		sp->priv = NULL;
 		priv->sp = NULL;
+		qla_nvme_ls_unmap(sp, fd);
 		qla2x00_rel_sp(sp);
 		return rval;
 	}
@@ -373,16 +436,19 @@
 	uint16_t	avail_dsds;
 	struct dsd64	*cur_dsd;
 	struct req_que *req = NULL;
+	struct rsp_que *rsp = NULL;
 	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_qpair *qpair = sp->qpair;
 	struct srb_iocb *nvme = &sp->u.iocb_cmd;
 	struct scatterlist *sgl, *sg;
 	struct nvmefc_fcp_req *fd = nvme->u.nvme.desc;
+	struct nvme_fc_cmd_iu *cmd = fd->cmdaddr;
 	uint32_t        rval = QLA_SUCCESS;
 
 	/* Setup qpair pointers */
 	req = qpair->req;
+	rsp = qpair->rsp;
 	tot_dsds = fd->sg_cnt;
 
 	/* Acquire qpair specific lock */
@@ -395,8 +461,13 @@
 	}
 	req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
 	if (req->cnt < (req_cnt + 2)) {
-		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
-		    rd_reg_dword_relaxed(req->req_q_out);
+		if (IS_SHADOW_REG_CAPABLE(ha)) {
+			cnt = *req->out_ptr;
+		} else {
+			cnt = rd_reg_dword_relaxed(req->req_q_out);
+			if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+				goto queuing_error;
+		}
 
 		if (req->ring_index < cnt)
 			req->cnt = cnt - req->ring_index;
@@ -410,8 +481,6 @@
 	}
 
 	if (unlikely(!fd->sqid)) {
-		struct nvme_fc_cmd_iu *cmd = fd->cmdaddr;
-
 		if (cmd->sqe.common.opcode == nvme_admin_async_event) {
 			nvme->u.nvme.aen_op = 1;
 			atomic_inc(&ha->nvme_active_aen_cnt);
@@ -439,8 +508,8 @@
 	/* No data transfer how do we check buffer len == 0?? */
 	if (fd->io_dir == NVMEFC_FCP_READ) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
-		vha->qla_stats.input_bytes += fd->payload_length;
-		vha->qla_stats.input_requests++;
+		qpair->counters.input_bytes += fd->payload_length;
+		qpair->counters.input_requests++;
 	} else if (fd->io_dir == NVMEFC_FCP_WRITE) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
 		if ((vha->flags.nvme_first_burst) &&
@@ -452,12 +521,21 @@
 				cmd_pkt->control_flags |=
 					cpu_to_le16(CF_NVME_FIRST_BURST_ENABLE);
 		}
-		vha->qla_stats.output_bytes += fd->payload_length;
-		vha->qla_stats.output_requests++;
+		qpair->counters.output_bytes += fd->payload_length;
+		qpair->counters.output_requests++;
 	} else if (fd->io_dir == 0) {
 		cmd_pkt->control_flags = 0;
 	}
 
+	if (sp->fcport->edif.enable && fd->io_dir != 0)
+		cmd_pkt->control_flags |= cpu_to_le16(CF_EN_EDIF);
+
+	/* Set BIT_13 of control flags for Async event */
+	if (vha->flags.nvme2_enabled &&
+	    cmd->sqe.common.opcode == nvme_admin_async_event) {
+		cmd_pkt->control_flags |= cpu_to_le16(CF_ADMIN_ASYNC_EVENT);
+	}
+
 	/* Set NPORT-ID */
 	cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
@@ -525,11 +603,20 @@
 		req->ring_ptr++;
 	}
 
+	/* ignore nvme async cmd due to long timeout */
+	if (!nvme->u.nvme.aen_op)
+		sp->qpair->cmd_cnt++;
+
 	/* Set chip new ring index. */
 	wrt_reg_dword(req->req_q_in, req->ring_index);
 
+	if (vha->flags.process_response_queue &&
+	    rsp->ring_ptr->signature != RESPONSE_PROCESSED)
+		qla24xx_process_response_queue(vha, rsp);
+
 queuing_error:
 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
 	return rval;
 }
 
@@ -541,7 +628,7 @@
 	fc_port_t *fcport;
 	struct srb_iocb *nvme;
 	struct scsi_qla_host *vha;
-	int rval = -ENODEV;
+	int rval;
 	srb_t *sp;
 	struct qla_qpair *qpair = hw_queue_handle;
 	struct nvme_private *priv = fd->private;
@@ -549,16 +636,22 @@
 
 	if (!priv) {
 		/* nvme association has been torn down */
-		return rval;
+		return -ENODEV;
 	}
 
 	fcport = qla_rport->fcport;
 
-	if (!qpair || !fcport || (qpair && !qpair->fw_started) ||
-	    (fcport && fcport->deleted))
-		return rval;
+	if (unlikely(!qpair || !fcport || fcport->deleted))
+		return -EBUSY;
+
+	if (!(fcport->nvme_flag & NVME_FLAG_REGISTERED))
+		return -ENODEV;
 
 	vha = fcport->vha;
+
+	if (test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags))
+		return -EBUSY;
+
 	/*
 	 * If we know the dev is going away while the transport is still sending
 	 * IO's return busy back to stall the IO Q.  This happens when the
@@ -585,6 +678,7 @@
 	sp->put_fn = qla_nvme_release_fcp_cmd_kref;
 	sp->qpair = qpair;
 	sp->vha = vha;
+	sp->cmd_sp = sp;
 	nvme = &sp->u.iocb_cmd;
 	nvme->u.nvme.desc = fd;
 
@@ -601,6 +695,20 @@
 	return rval;
 }
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
+/*
+ * See also commit 01d838164b4c ("nvme-fc: add support for ->map_queues")
+ * # v5.16.
+ */
+static void qla_nvme_map_queues(struct nvme_fc_local_port *lport,
+		struct blk_mq_queue_map *map)
+{
+	struct scsi_qla_host *vha = lport->private;
+
+	blk_mq_pci_map_queues(map, vha->hw->pdev, vha->irq_offset);
+}
+#endif
+
 static void qla_nvme_localport_delete(struct nvme_fc_local_port *lport)
 {
 	struct scsi_qla_host *vha = lport->private;
@@ -627,15 +735,6 @@
 }
 
 static struct nvme_fc_port_template qla_nvme_fc_transport = {
-#if 0
-	/*
-	 * See also commit 863fbae929c7 ("nvme_fc: add module to ops template
-	 * to allow module references"). See also commit 8c5c66052920
-	 * ("nvme-fc: Revert "add module to ops template to allow module
-	 * references"") # v5.7-rc1.
-	 */
-	.module	= THIS_MODULE,
-#endif
 	.localport_delete = qla_nvme_localport_delete,
 	.remoteport_delete = qla_nvme_remoteport_delete,
 	.create_queue   = qla_nvme_alloc_queue,
@@ -644,7 +743,10 @@
 	.ls_abort	= qla_nvme_ls_abort,
 	.fcp_io		= qla_nvme_post_cmd,
 	.fcp_abort	= qla_nvme_fcp_abort,
-	.max_hw_queues  = 8,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
+	.map_queues	= qla_nvme_map_queues,
+#endif
+	.max_hw_queues  = DEF_NVME_HW_QUEUES,
 	.max_sgl_segments = 1024,
 	.max_dif_sgl_segments = 64,
 	.dma_boundary = 0xFFFFFFFF,
@@ -661,11 +763,11 @@
 	if (!IS_ENABLED(CONFIG_NVME_FC))
 		return;
 
-	ql_log(ql_log_warn, NULL, 0x2112,
+	ql_log(ql_log_warn, fcport->vha, 0x2112,
 	    "%s: unregister remoteport on %p %8phN\n",
 	    __func__, fcport, fcport->port_name);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
 	if (test_bit(PFLG_DRIVER_REMOVING, &fcport->vha->pci_flags))
 		nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port, 0);
 #endif
@@ -712,33 +814,52 @@
 	ha = vha->hw;
 	tmpl = &qla_nvme_fc_transport;
 
-	WARN_ON(vha->nvme_local_port);
-
-	if (ha->max_req_queues < 3) {
-		if (!ha->flags.max_req_queue_warned)
-			ql_log(ql_log_info, vha, 0x2120,
-			       "%s: Disabling FC-NVME due to lack of free queue pairs (%d).\n",
-			       __func__, ha->max_req_queues);
-		ha->flags.max_req_queue_warned = 1;
-		return ret;
+	if (ql2xnvme_queues < MIN_NVME_HW_QUEUES) {
+		ql_log(ql_log_warn, vha, 0xfffd,
+		    "ql2xnvme_queues=%d is lower than minimum queues: %d. Resetting ql2xnvme_queues to:%d\n",
+		    ql2xnvme_queues, MIN_NVME_HW_QUEUES, DEF_NVME_HW_QUEUES);
+		ql2xnvme_queues = DEF_NVME_HW_QUEUES;
+	} else if (ql2xnvme_queues > (ha->max_qpairs - 1)) {
+		ql_log(ql_log_warn, vha, 0xfffd,
+		       "ql2xnvme_queues=%d is greater than available IRQs: %d. Resetting ql2xnvme_queues to: %d\n",
+		       ql2xnvme_queues, (ha->max_qpairs - 1),
+		       (ha->max_qpairs - 1));
+		ql2xnvme_queues = ((ha->max_qpairs - 1));
 	}
 
 	qla_nvme_fc_transport.max_hw_queues =
-	    min((uint8_t)(qla_nvme_fc_transport.max_hw_queues),
-		(uint8_t)(ha->max_req_queues - 2));
+	    min((uint8_t)(ql2xnvme_queues),
+		(uint8_t)((ha->max_qpairs - 1) ? (ha->max_qpairs - 1) : 1));
+
+	ql_log(ql_log_info, vha, 0xfffb,
+	       "Number of NVME queues used for this port: %d\n",
+	    qla_nvme_fc_transport.max_hw_queues);
+
 
 	pinfo.node_name = wwn_to_u64(vha->node_name);
 	pinfo.port_name = wwn_to_u64(vha->port_name);
 	pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR;
 	pinfo.port_id = vha->d_id.b24;
 
-	ql_log(ql_log_info, vha, 0xffff,
-	    "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n",
-	    pinfo.node_name, pinfo.port_name, pinfo.port_id);
-	qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary;
+	mutex_lock(&ha->vport_lock);
+	/*
+	 * Check again for nvme_local_port to see if any other thread raced
+	 * with this one and finished registration.
+	 */
+	if (!vha->nvme_local_port) {
+		ql_log(ql_log_info, vha, 0xffff,
+		    "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n",
+		    pinfo.node_name, pinfo.port_name, pinfo.port_id);
+		qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary;
 
-	ret = nvme_fc_register_localport(&pinfo, tmpl,
-	    get_device(&ha->pdev->dev), &vha->nvme_local_port);
+		ret = nvme_fc_register_localport(&pinfo, tmpl,
+						 get_device(&ha->pdev->dev),
+						 &vha->nvme_local_port);
+		mutex_unlock(&ha->vport_lock);
+	} else {
+		mutex_unlock(&ha->vport_lock);
+		return 0;
+	}
 	if (ret) {
 		ql_log(ql_log_warn, vha, 0xffff,
 		    "register_localport failed: ret=%x\n", ret);
@@ -756,3 +877,85 @@
 }
 
 #endif
+
+void qla_nvme_abort_set_option(struct abort_entry_24xx *abt, srb_t *orig_sp)
+{
+	struct qla_hw_data *ha;
+
+	if (!(ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(orig_sp)))
+		return;
+
+	ha = orig_sp->fcport->vha->hw;
+
+	WARN_ON_ONCE(abt->options & cpu_to_le16(BIT_0));
+	/* Use Driver Specified Retry Count */
+	abt->options |= cpu_to_le16(AOF_ABTS_RTY_CNT);
+	abt->drv.abts_rty_cnt = cpu_to_le16(2);
+	/* Use specified response timeout */
+	abt->options |= cpu_to_le16(AOF_RSP_TIMEOUT);
+	/* set it to 2 * r_a_tov in secs */
+	abt->drv.rsp_timeout = cpu_to_le16(2 * (ha->r_a_tov / 10));
+}
+
+void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *orig_sp)
+{
+	u16	comp_status;
+	struct scsi_qla_host *vha;
+
+	if (!(ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(orig_sp)))
+		return;
+
+	vha = orig_sp->fcport->vha;
+
+	comp_status = le16_to_cpu(abt->comp_status);
+	switch (comp_status) {
+	case CS_RESET:		/* reset event aborted */
+	case CS_ABORTED:	/* IOCB was cleaned */
+	/* N_Port handle is not currently logged in */
+	case CS_TIMEOUT:
+	/* N_Port handle was logged out while waiting for ABTS to complete */
+	case CS_PORT_UNAVAILABLE:
+	/* Firmware found that the port name changed */
+	case CS_PORT_LOGGED_OUT:
+	/* BA_RJT was received for the ABTS */
+	case CS_PORT_CONFIG_CHG:
+		ql_dbg(ql_dbg_async, vha, 0xf09d,
+		       "Abort I/O IOCB completed with error, comp_status=%x\n",
+		comp_status);
+		break;
+
+	/* BA_RJT was received for the ABTS */
+	case CS_REJECT_RECEIVED:
+		ql_dbg(ql_dbg_async, vha, 0xf09e,
+		       "BA_RJT was received for the ABTS rjt_vendorUnique = %u",
+			abt->fw.ba_rjt_vendorUnique);
+		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
+		       "ba_rjt_reasonCodeExpl = %u, ba_rjt_reasonCode = %u\n",
+		       abt->fw.ba_rjt_reasonCodeExpl, abt->fw.ba_rjt_reasonCode);
+		break;
+
+	case CS_COMPLETE:
+		ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0xf09f,
+		       "IOCB request is completed successfully comp_status=%x\n",
+		comp_status);
+		break;
+
+	case CS_IOCB_ERROR:
+		ql_dbg(ql_dbg_async, vha, 0xf0a0,
+		       "IOCB request is failed, comp_status=%x\n", comp_status);
+		break;
+
+	default:
+		ql_dbg(ql_dbg_async, vha, 0xf0a1,
+		       "Invalid Abort IO IOCB Completion Status %x\n",
+		comp_status);
+		break;
+	}
+}
+
+inline void qla_wait_nvme_release_cmd_kref(srb_t *orig_sp)
+{
+	if (!(ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(orig_sp)))
+		return;
+	kref_put(&orig_sp->cmd_kref, orig_sp->put_fn);
+}

diff --git a/scst/qla2x00t-32gbit/qla_nvme.h b/scst/qla2x00t-32gbit/qla_nvme.h
index 01376c6..78f9dd1 100644
--- a/scst/qla2x00t-32gbit/qla_nvme.h
+++ b/scst/qla2x00t-32gbit/qla_nvme.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2017 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_NVME_H
 #define __QLA_NVME_H
@@ -17,8 +16,8 @@
 #include "qla_def.h"
 #include "qla_dsd.h"
 
-/* default dev loss time (seconds) before transport tears down ctrl */
-#define NVME_FC_DEV_LOSS_TMO  30
+#define MIN_NVME_HW_QUEUES 1
+#define DEF_NVME_HW_QUEUES 8
 
 #define NVME_ATIO_CMD_OFF 32
 #define NVME_FIRST_PACKET_CMDLEN (64 - NVME_ATIO_CMD_OFF)
@@ -60,6 +59,7 @@
 	uint64_t rsvd;
 
 	__le16	control_flags;		/* Control Flags */
+#define CF_ADMIN_ASYNC_EVENT		BIT_13
 #define CF_NVME_FIRST_BURST_ENABLE	BIT_11
 #define CF_DIF_SEG_DESCR_ENABLE         BIT_3
 #define CF_DATA_SEG_DESCR_ENABLE        BIT_2

diff --git a/scst/qla2x00t-32gbit/qla_nx.c b/scst/qla2x00t-32gbit/qla_nx.c
index 0c9f94e..a6f7d3e 100644
--- a/scst/qla2x00t-32gbit/qla_nx.c
+++ b/scst/qla2x00t-32gbit/qla_nx.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include <linux/delay.h>
@@ -335,20 +334,20 @@
 };
 
 /* Device states */
-static char *q_dev_state[] = {
-	 "Unknown",
-	"Cold",
-	"Initializing",
-	"Ready",
-	"Need Reset",
-	"Need Quiescent",
-	"Failed",
-	"Quiescent",
+static const char *const q_dev_state[] = {
+	[QLA8XXX_DEV_UNKNOWN]		= "Unknown",
+	[QLA8XXX_DEV_COLD]		= "Cold/Re-init",
+	[QLA8XXX_DEV_INITIALIZING]	= "Initializing",
+	[QLA8XXX_DEV_READY]		= "Ready",
+	[QLA8XXX_DEV_NEED_RESET]	= "Need Reset",
+	[QLA8XXX_DEV_NEED_QUIESCENT]	= "Need Quiescent",
+	[QLA8XXX_DEV_FAILED]		= "Failed",
+	[QLA8XXX_DEV_QUIESCENT]		= "Quiescent",
 };
 
-char *qdev_state(uint32_t dev_state)
+const char *qdev_state(uint32_t dev_state)
 {
-	return q_dev_state[dev_state];
+	return (dev_state < MAX_STATES) ? q_dev_state[dev_state] : "Unknown";
 }
 
 /*
@@ -489,29 +488,26 @@
 	return data;
 }
 
-#define IDC_LOCK_TIMEOUT 100000000
+/*
+ * Context: task, might sleep
+ */
 int qla82xx_idc_lock(struct qla_hw_data *ha)
 {
-	int i;
-	int done = 0, timeout = 0;
+	const int delay_ms = 100, timeout_ms = 2000;
+	int done, total = 0;
 
-	while (!done) {
+	might_sleep();
+
+	while (true) {
 		/* acquire semaphore5 from PCI HW block */
 		done = qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM5_LOCK));
 		if (done == 1)
 			break;
-		if (timeout >= IDC_LOCK_TIMEOUT)
+		if (WARN_ON_ONCE(total >= timeout_ms))
 			return -1;
 
-		timeout++;
-
-		/* Yield CPU */
-		if (!in_interrupt())
-			schedule();
-		else {
-			for (i = 0; i < 20; i++)
-				cpu_relax();
-		}
+		total += delay_ms;
+		msleep(delay_ms);
 	}
 
 	return 0;
@@ -973,7 +969,7 @@
 static int
 qla82xx_flash_wait_write_finish(struct qla_hw_data *ha)
 {
-	uint32_t val;
+	uint32_t val = 0;
 	int i, ret;
 	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
@@ -1074,7 +1070,8 @@
 		return ret;
 	}
 
-	if (qla82xx_flash_set_write_enable(ha))
+	ret = qla82xx_flash_set_write_enable(ha);
+	if (ret < 0)
 		goto done_write;
 
 	qla82xx_wr_32(ha, QLA82XX_ROMUSB_ROM_WDATA, data);
@@ -2176,7 +2173,6 @@
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
 	struct device_reg_82xx __iomem *reg;
-	int status = 0;
 	uint32_t stat;
 	uint32_t host_int = 0;
 	uint16_t mb[8];
@@ -2205,7 +2201,6 @@
 		case 0x10:
 		case 0x11:
 			qla82xx_mbx_completion(vha, MSW(stat));
-			status |= MBX_INTERRUPT;
 			break;
 		case 0x12:
 			mb[0] = MSW(stat);
@@ -3073,8 +3068,7 @@
 
 	ql_log(ql_log_info, vha, 0x00b6,
 	    "Device state is 0x%x = %s.\n",
-	    dev_state,
-	    dev_state < MAX_STATES ? qdev_state(dev_state) : "Unknown");
+	    dev_state, qdev_state(dev_state));
 
 	/* Force to DEV_COLD unless someone else is starting a reset */
 	if (dev_state != QLA8XXX_DEV_INITIALIZING &&
@@ -3197,8 +3191,7 @@
 	old_dev_state = dev_state;
 	ql_log(ql_log_info, vha, 0x009b,
 	    "Device state is 0x%x = %s.\n",
-	    dev_state,
-	    dev_state < MAX_STATES ? qdev_state(dev_state) : "Unknown");
+	    dev_state, qdev_state(dev_state));
 
 	/* wait for 30 seconds for device to go ready */
 	dev_init_timeout = jiffies + (ha->fcoe_dev_init_timeout * HZ);
@@ -3219,9 +3212,7 @@
 		if (loopcount < 5) {
 			ql_log(ql_log_info, vha, 0x009d,
 			    "Device state is 0x%x = %s.\n",
-			    dev_state,
-			    dev_state < MAX_STATES ? qdev_state(dev_state) :
-			    "Unknown");
+			    dev_state, qdev_state(dev_state));
 		}
 
 		switch (dev_state) {
@@ -3451,8 +3442,7 @@
 	} else
 		ql_log(ql_log_info, vha, 0xb031,
 		    "Device state is 0x%x = %s.\n",
-		    dev_state,
-		    dev_state < MAX_STATES ? qdev_state(dev_state) : "Unknown");
+		    dev_state, qdev_state(dev_state));
 }
 
 /*

diff --git a/scst/qla2x00t-32gbit/qla_nx.h b/scst/qla2x00t-32gbit/qla_nx.h
index 93344a0..6dc80c8 100644
--- a/scst/qla2x00t-32gbit/qla_nx.h
+++ b/scst/qla2x00t-32gbit/qla_nx.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #ifndef __QLA_NX_H
 #define __QLA_NX_H
@@ -541,14 +540,18 @@
 #define QLA82XX_CRB_DRV_IDC_VERSION  (QLA82XX_CAM_RAM(0x174))
 
 /* Every driver should use these Device State */
-#define QLA8XXX_DEV_COLD		1
-#define QLA8XXX_DEV_INITIALIZING	2
-#define QLA8XXX_DEV_READY		3
-#define QLA8XXX_DEV_NEED_RESET		4
-#define QLA8XXX_DEV_NEED_QUIESCENT	5
-#define QLA8XXX_DEV_FAILED		6
-#define QLA8XXX_DEV_QUIESCENT		7
-#define	MAX_STATES			8 /* Increment if new state added */
+enum {
+	QLA8XXX_DEV_UNKNOWN,
+	QLA8XXX_DEV_COLD,
+	QLA8XXX_DEV_INITIALIZING,
+	QLA8XXX_DEV_READY,
+	QLA8XXX_DEV_NEED_RESET,
+	QLA8XXX_DEV_NEED_QUIESCENT,
+	QLA8XXX_DEV_FAILED,
+	QLA8XXX_DEV_QUIESCENT,
+	MAX_STATES, /* Increment if new state added */
+};
+
 #define QLA8XXX_BAD_VALUE		0xbad0bad0
 
 #define QLA82XX_IDC_VERSION			1

diff --git a/scst/qla2x00t-32gbit/qla_nx2.c b/scst/qla2x00t-32gbit/qla_nx2.c
index 50e5760..41ff6fb 100644
--- a/scst/qla2x00t-32gbit/qla_nx2.c
+++ b/scst/qla2x00t-32gbit/qla_nx2.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #include <linux/vmalloc.h>
@@ -140,7 +139,7 @@
 	uint32_t mask)
 {
 	unsigned long timeout;
-	uint32_t temp;
+	uint32_t temp = 0;
 
 	/* jiffies after 100ms */
 	timeout = jiffies + msecs_to_jiffies(TIMEOUT_100_MS);
@@ -660,7 +659,7 @@
 qla8044_poll_reg(struct scsi_qla_host *vha, uint32_t addr,
 	int duration, uint32_t test_mask, uint32_t test_result)
 {
-	uint32_t value;
+	uint32_t value = 0;
 	int timeout_error;
 	uint8_t retries;
 	int ret_val = QLA_SUCCESS;
@@ -1939,8 +1938,7 @@
 	dev_state = qla8044_rd_direct(vha, QLA8044_CRB_DEV_STATE_INDEX);
 	ql_dbg(ql_dbg_p3p, vha, 0xb0ce,
 	    "Device state is 0x%x = %s\n",
-	    dev_state, dev_state < MAX_STATES ?
-	    qdev_state(dev_state) : "Unknown");
+	    dev_state, qdev_state(dev_state));
 
 	/* wait for 30 seconds for device to go ready */
 	dev_init_timeout = jiffies + (ha->fcoe_dev_init_timeout * HZ);
@@ -1953,8 +1951,7 @@
 				ql_log(ql_log_warn, vha, 0xb0cf,
 				    "%s: Device Init Failed 0x%x = %s\n",
 				    QLA2XXX_DRIVER_NAME, dev_state,
-				    dev_state < MAX_STATES ?
-				    qdev_state(dev_state) : "Unknown");
+				    qdev_state(dev_state));
 				qla8044_wr_direct(vha,
 				    QLA8044_CRB_DEV_STATE_INDEX,
 				    QLA8XXX_DEV_FAILED);
@@ -1964,8 +1961,7 @@
 		dev_state = qla8044_rd_direct(vha, QLA8044_CRB_DEV_STATE_INDEX);
 		ql_log(ql_log_info, vha, 0xb0d0,
 		    "Device state is 0x%x = %s\n",
-		    dev_state, dev_state < MAX_STATES ?
-		    qdev_state(dev_state) : "Unknown");
+		    dev_state, qdev_state(dev_state));
 
 		/* NOTE: Make sure idc unlocked upon exit of switch statement */
 		switch (dev_state) {
@@ -2029,7 +2025,7 @@
 }
 
 /**
- * qla4_8xxx_check_temp - Check the ISP82XX temperature.
+ * qla8044_check_temp - Check the ISP82XX temperature.
  * @vha: adapter block pointer.
  *
  * Note: The caller should not hold the idc lock.
@@ -2227,19 +2223,16 @@
 		if (opcode & QLA82XX_DBG_OPCODE_WR) {
 			qla8044_wr_reg_indirect(vha, crb_addr,
 			    crb_entry->value_1);
-			opcode &= ~QLA82XX_DBG_OPCODE_WR;
 		}
 
 		if (opcode & QLA82XX_DBG_OPCODE_RW) {
 			qla8044_rd_reg_indirect(vha, crb_addr, &read_value);
 			qla8044_wr_reg_indirect(vha, crb_addr, read_value);
-			opcode &= ~QLA82XX_DBG_OPCODE_RW;
 		}
 
 		if (opcode & QLA82XX_DBG_OPCODE_AND) {
 			qla8044_rd_reg_indirect(vha, crb_addr, &read_value);
 			read_value &= crb_entry->value_2;
-			opcode &= ~QLA82XX_DBG_OPCODE_AND;
 			if (opcode & QLA82XX_DBG_OPCODE_OR) {
 				read_value |= crb_entry->value_3;
 				opcode &= ~QLA82XX_DBG_OPCODE_OR;
@@ -2250,7 +2243,6 @@
 			qla8044_rd_reg_indirect(vha, crb_addr, &read_value);
 			read_value |= crb_entry->value_3;
 			qla8044_wr_reg_indirect(vha, crb_addr, read_value);
-			opcode &= ~QLA82XX_DBG_OPCODE_OR;
 		}
 		if (opcode & QLA82XX_DBG_OPCODE_POLL) {
 			poll_time = crb_entry->crb_strd.poll_timeout;
@@ -2270,7 +2262,6 @@
 					    crb_addr, &read_value);
 				}
 			} while (1);
-			opcode &= ~QLA82XX_DBG_OPCODE_POLL;
 		}
 
 		if (opcode & QLA82XX_DBG_OPCODE_RDSTATE) {
@@ -2284,7 +2275,6 @@
 			qla8044_rd_reg_indirect(vha, addr, &read_value);
 			index = crb_entry->crb_ctrl.state_index_v;
 			tmplt_hdr->saved_state_array[index] = read_value;
-			opcode &= ~QLA82XX_DBG_OPCODE_RDSTATE;
 		}
 
 		if (opcode & QLA82XX_DBG_OPCODE_WRSTATE) {
@@ -2304,7 +2294,6 @@
 			}
 
 			qla8044_wr_reg_indirect(vha, addr, read_value);
-			opcode &= ~QLA82XX_DBG_OPCODE_WRSTATE;
 		}
 
 		if (opcode & QLA82XX_DBG_OPCODE_MDSTATE) {
@@ -2317,7 +2306,6 @@
 			read_value |= crb_entry->value_3;
 			read_value += crb_entry->value_1;
 			tmplt_hdr->saved_state_array[index] = read_value;
-			opcode &= ~QLA82XX_DBG_OPCODE_MDSTATE;
 		}
 		crb_addr += crb_entry->crb_strd.addr_stride;
 	}
@@ -2595,7 +2583,7 @@
 	struct qla8044_minidump_entry_hdr *entry_hdr,
 	uint32_t **d_ptr)
 {
-	uint32_t r_addr, s_stride, s_addr, s_value, loop_cnt, i, r_value;
+	uint32_t r_addr, s_stride, s_addr, s_value, loop_cnt, i, r_value = 0;
 	struct qla8044_minidump_entry_mux *mux_hdr;
 	uint32_t *data_ptr = *d_ptr;
 

diff --git a/scst/qla2x00t-32gbit/qla_nx2.h b/scst/qla2x00t-32gbit/qla_nx2.h
index 8ba7c1d..2fc902a 100644
--- a/scst/qla2x00t-32gbit/qla_nx2.h
+++ b/scst/qla2x00t-32gbit/qla_nx2.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #ifndef __QLA_NX2_H

diff --git a/scst/qla2x00t-32gbit/qla_os.c b/scst/qla2x00t-32gbit/qla_os.c
index d614f4a..16917bc 100644
--- a/scst/qla2x00t-32gbit/qla_os.c
+++ b/scst/qla2x00t-32gbit/qla_os.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 
@@ -21,6 +20,11 @@
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 #include <linux/refcount.h>
 #endif
+#include <linux/crash_dump.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+#include <linux/trace_events.h>
+#include <linux/trace.h>
+#endif
 
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsicam.h>
@@ -48,11 +52,20 @@
  */
 struct kmem_cache *srb_cachep;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+static struct trace_array *qla_trc_array;
+#endif
+
 int ql2xfulldump_on_mpifail;
 module_param(ql2xfulldump_on_mpifail, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ql2xfulldump_on_mpifail,
 		 "Set this to take full dump on MPI hang.");
 
+int ql2xenforce_iocb_limit = 1;
+module_param(ql2xenforce_iocb_limit, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ql2xenforce_iocb_limit,
+		 "Enforce IOCB throttling, to avoid FW congestion. (default: 1)");
+
 /*
  * CT6 CTX allocation cache
  */
@@ -62,6 +75,11 @@
  */
 uint ql_errlev = 0x8001;
 
+int ql2xsecenable;
+module_param(ql2xsecenable, int, S_IRUGO);
+MODULE_PARM_DESC(ql2xsecenable,
+	"Enable/disable security. 0(Default) - Security disabled. 1 - Security enabled.");
+
 static int ql2xenableclass2;
 module_param(ql2xenableclass2, int, S_IRUGO|S_IRUSR);
 MODULE_PARM_DESC(ql2xenableclass2,
@@ -120,6 +138,13 @@
 		"ql2xextended_error_logging=1).\n"
 		"\t\tDo LOGICAL OR of the value to enable more than one level");
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+int ql2xextended_error_logging_ktrace = 1;
+module_param(ql2xextended_error_logging_ktrace, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql2xextended_error_logging_ktrace,
+		"Same BIT definition as ql2xextended_error_logging, but used to control logging to kernel trace buffer (default=1).\n");
+#endif
+
 int ql2xshiftctondsd = 6;
 module_param(ql2xshiftctondsd, int, S_IRUGO);
 MODULE_PARM_DESC(ql2xshiftctondsd,
@@ -205,12 +230,6 @@
 		" 0 -- Regular doorbell.\n"
 		" 1 -- CAMRAM doorbell (faster).\n");
 
-int ql2xtargetreset = 1;
-module_param(ql2xtargetreset, int, S_IRUGO);
-MODULE_PARM_DESC(ql2xtargetreset,
-		 "Enable target reset."
-		 "Default is 1 - use hw defaults.");
-
 int ql2xgffidenable;
 module_param(ql2xgffidenable, int, S_IRUGO);
 MODULE_PARM_DESC(ql2xgffidenable,
@@ -341,14 +360,44 @@
 		"Enables RDP responses. "
 		"0 - no RDP responses (default). "
 		"1 - provide RDP responses.");
+int ql2xabts_wait_nvme = 1;
+module_param(ql2xabts_wait_nvme, int, 0444);
+MODULE_PARM_DESC(ql2xabts_wait_nvme,
+		 "To wait for ABTS response on I/O timeouts for NVMe. (default: 1)");
+
+
+static u32 ql2xdelay_before_pci_error_handling = 5;
+module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
+MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
+	"Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
 
 static void qla2x00_clear_drv_active(struct qla_hw_data *);
 static void qla2x00_free_device(scsi_qla_host_t *);
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
-static int qla2xxx_map_queues(struct Scsi_Host *shost);
+
+/*
+ * See also commit a4e1d0b76e7b ("block: Change the return type of
+ * blk_mq_map_queues() into void") # v6.1.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 1, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 2))
+#define MAP_QUEUES_RET int
+#else
+#define MAP_QUEUES_RET void
+#endif
+
+static MAP_QUEUES_RET qla2xxx_map_queues(struct Scsi_Host *shost);
 #endif
 static void qla2x00_destroy_deferred_work(struct qla_hw_data *);
 
+u32 ql2xnvme_queues = DEF_NVME_HW_QUEUES;
+module_param(ql2xnvme_queues, uint, S_IRUGO);
+MODULE_PARM_DESC(ql2xnvme_queues,
+	"Number of NVMe Queues that can be configured.\n"
+	"Final value will be min(ql2xnvme_queues, num_cpus,num_chip_queues)\n"
+	"1 - Minimum number of queues supported\n"
+	"8 - Default value");
 
 static struct scsi_transport_template *qla2xxx_transport_template = NULL;
 struct scsi_transport_template *qla2xxx_transport_vport_template = NULL;
@@ -739,10 +788,11 @@
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	struct completion *comp = sp->comp;
 
-	sp->free(sp);
+	/* kref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	cmd->result = res;
-	CMD_SP(cmd) = NULL;
-	cmd->scsi_done(cmd);
+	sp->type = 0;
+	scsi_done(cmd);
 	if (comp)
 		complete(comp);
 }
@@ -830,31 +880,17 @@
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	struct completion *comp = sp->comp;
 
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 	cmd->result = res;
-	CMD_SP(cmd) = NULL;
-	cmd->scsi_done(cmd);
+	sp->type = 0;
+	scsi_done(cmd);
 	if (comp)
 		complete(comp);
 }
 
-#if defined(RHEL_MAJOR) && RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 >= 2
-static int
-qla2xxx_queuecommand_wrk(struct Scsi_Host *host, struct scsi_cmnd *cmd);
-
-static int qla2xxx_queuecommand(struct scsi_cmnd *scmnd,
-				void (*done)(struct scsi_cmnd *))
-{
-	scmnd->scsi_done = done;
-	return qla2xxx_queuecommand_wrk(scmnd->device->host, scmnd);
-}
-
-static int
-qla2xxx_queuecommand_wrk(struct Scsi_Host *host, struct scsi_cmnd *cmd)
-#else
 static int
 qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
-#endif
 {
 	scsi_qla_host_t *vha = shost_priv(host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
@@ -876,7 +912,7 @@
 		uint16_t hwq;
 		struct qla_qpair *qpair = NULL;
 
-		tag = blk_mq_unique_tag(cmd->request);
+		tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmd));
 		hwq = blk_mq_unique_tag_to_hwq(tag);
 		qpair = ha->queue_pair_map[hwq];
 
@@ -917,8 +953,8 @@
 			goto qc24_fail_command;
 	}
 
-	if (!fcport) {
-		cmd->result = DID_NO_CONNECT << 16;
+	if (!fcport || fcport->deleted) {
+		cmd->result = DID_IMM_RETRY << 16;
 		goto qc24_fail_command;
 	}
 
@@ -947,12 +983,11 @@
 		goto qc24_target_busy;
 
 	sp = scsi_cmd_priv(cmd);
+	/* ref: INIT */
 	qla2xxx_init_sp(sp, vha, vha->hw->base_qpair, fcport);
 
 	sp->u.scmd.cmd = cmd;
 	sp->type = SRB_SCSI_CMD;
-
-	CMD_SP(cmd) = (void *)sp;
 	sp->free = qla2x00_sp_free_dma;
 	sp->done = qla2x00_sp_compl;
 
@@ -966,13 +1001,14 @@
 	return 0;
 
 qc24_host_busy_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 
 qc24_target_busy:
 	return SCSI_MLQUEUE_TARGET_BUSY;
 
 qc24_fail_command:
-	cmd->scsi_done(cmd);
+	scsi_done(cmd);
 
 	return 0;
 }
@@ -990,7 +1026,7 @@
 	srb_t *sp;
 	int rval;
 
-	rval = rport ? fc_remote_port_chkready(rport) : FC_PORTSTATE_OFFLINE;
+	rval = rport ? fc_remote_port_chkready(rport) : (DID_NO_CONNECT << 16);
 	if (rval) {
 		cmd->result = rval;
 		ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3076,
@@ -999,11 +1035,18 @@
 		goto qc24_fail_command;
 	}
 
-	if (!fcport) {
+	if (!qpair->online) {
+		ql_dbg(ql_dbg_io, vha, 0x3077,
+		       "qpair not online. eeh_busy=%d.\n", ha->flags.eeh_busy);
 		cmd->result = DID_NO_CONNECT << 16;
 		goto qc24_fail_command;
 	}
 
+	if (!fcport || fcport->deleted) {
+		cmd->result = DID_IMM_RETRY << 16;
+		goto qc24_fail_command;
+	}
+
 	if (atomic_read(&fcport->state) != FCS_ONLINE || fcport->deleted) {
 		if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
 			atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
@@ -1029,11 +1072,11 @@
 		goto qc24_target_busy;
 
 	sp = scsi_cmd_priv(cmd);
+	/* ref: INIT */
 	qla2xxx_init_sp(sp, vha, qpair, fcport);
 
 	sp->u.scmd.cmd = cmd;
 	sp->type = SRB_SCSI_CMD;
-	CMD_SP(cmd) = (void *)sp;
 	sp->free = qla2xxx_qpair_sp_free_dma;
 	sp->done = qla2xxx_qpair_sp_compl;
 
@@ -1041,26 +1084,20 @@
 	if (rval != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3078,
 		    "Start scsi failed rval=%d for cmd=%p.\n", rval, cmd);
-		if (rval == QLA_INTERFACE_ERROR)
-			goto qc24_free_sp_fail_command;
 		goto qc24_host_busy_free_sp;
 	}
 
 	return 0;
 
 qc24_host_busy_free_sp:
-	sp->free(sp);
+	/* ref: INIT */
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 
 qc24_target_busy:
 	return SCSI_MLQUEUE_TARGET_BUSY;
 
-qc24_free_sp_fail_command:
-	sp->free(sp);
-	CMD_SP(cmd) = NULL;
-	qla2xxx_rel_qpair_sp(sp->qpair, sp);
-
 qc24_fail_command:
-	cmd->scsi_done(cmd);
+	scsi_done(cmd);
 
 	return 0;
 }
@@ -1085,6 +1122,7 @@
 	unsigned long wait_iter = ABORT_WAIT_ITER;
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	struct qla_hw_data *ha = vha->hw;
+	srb_t *sp = scsi_cmd_priv(cmd);
 	int ret = QLA_SUCCESS;
 
 	if (unlikely(pci_channel_offline(ha->pdev)) || ha->flags.eeh_busy) {
@@ -1093,10 +1131,9 @@
 		return ret;
 	}
 
-	while (CMD_SP(cmd) && wait_iter--) {
+	while (sp->type && wait_iter--)
 		msleep(ABORT_POLLING_PERIOD);
-	}
-	if (CMD_SP(cmd))
+	if (sp->type)
 		ret = QLA_FUNCTION_FAILED;
 
 	return ret;
@@ -1148,12 +1185,28 @@
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	int res;
+	/* Return 0 = sleep, x=wake */
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 	ql_dbg(ql_dbg_init, vha, 0x00ec,
 	    "tgt %p, fcport_count=%d\n",
 	    vha, vha->fcport_count);
 	res = (vha->fcport_count == 0);
+	if  (res) {
+		struct fc_port *fcport;
+
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->deleted != QLA_SESS_DELETED) {
+				/* session(s) may not be fully logged in
+				 * (ie fcport_count=0), but session
+				 * deletion thread(s) may be inflight.
+				 */
+
+				res = 0;
+				break;
+			}
+		}
+	}
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	return res;
@@ -1235,35 +1288,6 @@
 	return return_status;
 }
 
-#define ISP_REG_DISCONNECT 0xffffffffU
-/**************************************************************************
-* qla2x00_isp_reg_stat
-*
-* Description:
-*	Read the host status register of ISP before aborting the command.
-*
-* Input:
-*	ha = pointer to host adapter structure.
-*
-*
-* Returns:
-*	Either true or false.
-*
-* Note:	Return true if there is register disconnect.
-**************************************************************************/
-static inline
-uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
-{
-	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
-	struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82;
-
-	if (IS_P3P_TYPE(ha))
-		return ((rd_reg_dword(&reg82->host_int)) == ISP_REG_DISCONNECT);
-	else
-		return ((rd_reg_dword(&reg->host_status)) ==
-			ISP_REG_DISCONNECT);
-}
-
 /**************************************************************************
 * qla2xxx_eh_abort
 *
@@ -1293,22 +1317,27 @@
 	uint32_t ratov_j;
 	struct qla_qpair *qpair;
 	unsigned long flags;
+	int fast_fail_status = SUCCESS;
 
 	if (qla2x00_isp_reg_stat(ha)) {
 		ql_log(ql_log_info, vha, 0x8042,
 		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
 		return FAILED;
 	}
 
+	/* Save any FAST_IO_FAIL value to return later if abort succeeds */
 	ret = fc_block_scsi_eh(cmd);
 	if (ret != 0)
-		return ret;
+		fast_fail_status = ret;
 
 	sp = scsi_cmd_priv(cmd);
 	qpair = sp->qpair;
 
+	vha->cmd_timeout_cnt++;
+
 	if ((sp->fcport && sp->fcport->deleted) || !qpair)
-		return SUCCESS;
+		return fast_fail_status != SUCCESS ? fast_fail_status : FAILED;
 
 	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 	sp->comp = &comp;
@@ -1343,7 +1372,7 @@
 			    __func__, ha->r_a_tov/10);
 			ret = FAILED;
 		} else {
-			ret = SUCCESS;
+			ret = fast_fail_status;
 		}
 		break;
 	default:
@@ -1363,21 +1392,20 @@
 /*
  * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED.
  */
-int
-qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
-	uint64_t l, enum nexus_wait_type type)
+static int
+__qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t,
+				       uint64_t l, enum nexus_wait_type type)
 {
 	int cnt, match, status;
 	unsigned long flags;
-	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req;
+	scsi_qla_host_t *vha = qpair->vha;
+	struct req_que *req = qpair->req;
 	srb_t *sp;
 	struct scsi_cmnd *cmd;
 
 	status = QLA_SUCCESS;
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	req = vha->req;
+	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 	for (cnt = 1; status == QLA_SUCCESS &&
 		cnt < req->num_outstanding_cmds; cnt++) {
 		sp = req->outstanding_cmds[cnt];
@@ -1404,15 +1432,35 @@
 		if (!match)
 			continue;
 
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 		status = qla2x00_eh_wait_on_command(cmd);
-		spin_lock_irqsave(&ha->hardware_lock, flags);
+		spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 	}
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
 	return status;
 }
 
+int
+qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+				     uint64_t l, enum nexus_wait_type type)
+{
+	struct qla_qpair *qpair;
+	struct qla_hw_data *ha = vha->hw;
+	int i, status = QLA_SUCCESS;
+
+	status = __qla2x00_eh_wait_for_pending_commands(ha->base_qpair, t, l,
+							type);
+	for (i = 0; status == QLA_SUCCESS && i < ha->max_qpairs; i++) {
+		qpair = ha->queue_pair_map[i];
+		if (!qpair)
+			continue;
+		status = __qla2x00_eh_wait_for_pending_commands(qpair, t, l,
+								type);
+	}
+	return status;
+}
+
 static char *reset_errors[] = {
 	"HBA not online",
 	"HBA not ready",
@@ -1421,27 +1469,36 @@
 };
 
 static int
-__qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
-    struct scsi_cmnd *cmd, int (*do_reset)(struct fc_port *, uint64_t, int))
+qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
 {
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
+	struct scsi_device *sdev = cmd->device;
+	scsi_qla_host_t *vha = shost_priv(sdev->host);
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	fc_port_t *fcport = (struct fc_port *) sdev->hostdata;
+	struct qla_hw_data *ha = vha->hw;
 	int err;
 
+	if (qla2x00_isp_reg_stat(ha)) {
+		ql_log(ql_log_info, vha, 0x803e,
+		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
+		return FAILED;
+	}
+
 	if (!fcport) {
 		return FAILED;
 	}
 
-	err = fc_block_scsi_eh(cmd);
+	err = fc_block_rport(rport);
 	if (err != 0)
 		return err;
 
 	if (fcport->deleted)
-		return SUCCESS;
+		return FAILED;
 
 	ql_log(ql_log_info, vha, 0x8009,
-	    "%s RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", name, vha->host_no,
-	    cmd->device->id, (u64)cmd->device->lun, cmd);
+	    "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no,
+	    sdev->id, (u64)sdev->lun, cmd);
 
 	err = 0;
 	if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
@@ -1450,64 +1507,100 @@
 		goto eh_reset_failed;
 	}
 	err = 2;
-	if (do_reset(fcport, cmd->device->lun, 1)
+	if (ha->isp_ops->lun_reset(fcport, sdev->lun, 1)
 		!= QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x800c,
 		    "do_reset failed for cmd=%p.\n", cmd);
 		goto eh_reset_failed;
 	}
 	err = 3;
-	if (qla2x00_eh_wait_for_pending_commands(vha, cmd->device->id,
-	    cmd->device->lun, type) != QLA_SUCCESS) {
+	if (qla2x00_eh_wait_for_pending_commands(vha, sdev->id,
+	    sdev->lun, WAIT_LUN) != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x800d,
 		    "wait for pending cmds failed for cmd=%p.\n", cmd);
 		goto eh_reset_failed;
 	}
 
 	ql_log(ql_log_info, vha, 0x800e,
-	    "%s RESET SUCCEEDED nexus:%ld:%d:%llu cmd=%p.\n", name,
-	    vha->host_no, cmd->device->id, (u64)cmd->device->lun, cmd);
+	    "DEVICE RESET SUCCEEDED nexus:%ld:%d:%llu cmd=%p.\n",
+	    vha->host_no, sdev->id, (u64)sdev->lun, cmd);
 
 	return SUCCESS;
 
 eh_reset_failed:
 	ql_log(ql_log_info, vha, 0x800f,
-	    "%s RESET FAILED: %s nexus=%ld:%d:%llu cmd=%p.\n", name,
-	    reset_errors[err], vha->host_no, cmd->device->id,
-	    (u64)cmd->device->lun, cmd);
+	    "DEVICE RESET FAILED: %s nexus=%ld:%d:%llu cmd=%p.\n",
+	    reset_errors[err], vha->host_no, sdev->id, (u64)sdev->lun,
+	    cmd);
+	vha->reset_cmd_err_cnt++;
 	return FAILED;
 }
 
 static int
-qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
-{
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	struct qla_hw_data *ha = vha->hw;
-
-	if (qla2x00_isp_reg_stat(ha)) {
-		ql_log(ql_log_info, vha, 0x803e,
-		    "PCI/Register disconnect, exiting.\n");
-		return FAILED;
-	}
-
-	return __qla2xxx_eh_generic_reset("DEVICE", WAIT_LUN, cmd,
-	    ha->isp_ops->lun_reset);
-}
-
-static int
 qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
 {
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+	struct scsi_device *sdev = cmd->device;
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	scsi_qla_host_t *vha = shost_priv(rport_to_shost(rport));
 	struct qla_hw_data *ha = vha->hw;
+	fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
+	int err;
 
 	if (qla2x00_isp_reg_stat(ha)) {
 		ql_log(ql_log_info, vha, 0x803f,
 		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
 		return FAILED;
 	}
 
-	return __qla2xxx_eh_generic_reset("TARGET", WAIT_TARGET, cmd,
-	    ha->isp_ops->target_reset);
+	if (!fcport) {
+		return FAILED;
+	}
+
+	err = fc_block_rport(rport);
+	if (err != 0)
+		return err;
+
+	if (fcport->deleted)
+		return FAILED;
+
+	ql_log(ql_log_info, vha, 0x8009,
+	    "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no,
+	    sdev->id, cmd);
+
+	err = 0;
+	if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x800a,
+		    "Wait for hba online failed for cmd=%p.\n", cmd);
+		goto eh_reset_failed;
+	}
+	err = 2;
+	if (ha->isp_ops->target_reset(fcport, 0, 0) != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x800c,
+		    "target_reset failed for cmd=%p.\n", cmd);
+		goto eh_reset_failed;
+	}
+	err = 3;
+	if (qla2x00_eh_wait_for_pending_commands(vha, sdev->id,
+	    0, WAIT_TARGET) != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x800d,
+		    "wait for pending cmds failed for cmd=%p.\n", cmd);
+		goto eh_reset_failed;
+	}
+
+	ql_log(ql_log_info, vha, 0x800e,
+	    "TARGET RESET SUCCEEDED nexus:%ld:%d cmd=%p.\n",
+	    vha->host_no, sdev->id, cmd);
+
+	return SUCCESS;
+
+eh_reset_failed:
+	ql_log(ql_log_info, vha, 0x800f,
+	    "TARGET RESET FAILED: %s nexus=%ld:%d:%llu cmd=%p.\n",
+	    reset_errors[err], vha->host_no, cmd->device->id, (u64)cmd->device->lun,
+	    cmd);
+	vha->reset_cmd_err_cnt++;
+	return FAILED;
 }
 
 /**************************************************************************
@@ -1529,7 +1622,6 @@
 qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
 {
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
 	int ret = FAILED;
 	unsigned int id;
 	uint64_t lun;
@@ -1538,21 +1630,13 @@
 	if (qla2x00_isp_reg_stat(ha)) {
 		ql_log(ql_log_info, vha, 0x8040,
 		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
 		return FAILED;
 	}
 
 	id = cmd->device->id;
 	lun = cmd->device->lun;
 
-	if (!fcport) {
-		return ret;
-	}
-
-	ret = fc_block_scsi_eh(cmd);
-	if (ret != 0)
-		return ret;
-	ret = FAILED;
-
 	if (qla2x00_chip_is_down(vha))
 		return ret;
 
@@ -1615,7 +1699,7 @@
 	if (qla2x00_isp_reg_stat(ha)) {
 		ql_log(ql_log_info, vha, 0x8041,
 		    "PCI/Register disconnect, exiting.\n");
-		schedule_work(&ha->board_disable);
+		qla_pci_set_eeh_busy(vha);
 		return SUCCESS;
 	}
 
@@ -1689,27 +1773,10 @@
 qla2x00_loop_reset(scsi_qla_host_t *vha)
 {
 	int ret;
-	struct fc_port *fcport;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (IS_QLAFX00(ha)) {
-		return qlafx00_loop_reset(vha);
-	}
-
-	if (ql2xtargetreset == 1 && ha->flags.enable_target_reset) {
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (fcport->port_type != FCT_TARGET)
-				continue;
-
-			ret = ha->isp_ops->target_reset(fcport, 0, 0);
-			if (ret != QLA_SUCCESS) {
-				ql_dbg(ql_dbg_taskm, vha, 0x802c,
-				    "Bus Reset failed: Reset=%d "
-				    "d_id=%x.\n", ret, fcport->d_id.b24);
-			}
-		}
-	}
-
+	if (IS_QLAFX00(ha))
+		return QLA_SUCCESS;
 
 	if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) {
 		atomic_set(&vha->loop_state, LOOP_DOWN);
@@ -1793,7 +1860,7 @@
 
 		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
 #if HAVE_SCSI_MQ
-		if (ret_cmd && blk_mq_request_started(cmd->request))
+		if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
 			sp->done(sp, res);
 #else
 		if (ret_cmd && list_empty(&cmd->request->queuelist))
@@ -2799,6 +2866,16 @@
 	return atomic_read(&vha->loop_state) == LOOP_READY;
 }
 
+static void qla_heartbeat_work_fn(struct work_struct *work)
+{
+	struct qla_hw_data *ha = container_of(work,
+		struct qla_hw_data, heartbeat_work);
+	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+
+	if (!ha->flags.mbox_busy && base_vha->flags.init_done)
+		qla_no_op_mb(base_vha);
+}
+
 static void qla2x00_iocb_work_fn(struct work_struct *work)
 {
 	struct scsi_qla_host *vha = container_of(work,
@@ -2821,6 +2898,29 @@
 	spin_unlock_irqrestore(&vha->work_lock, flags);
 }
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+static void
+qla_trace_init(void)
+{
+	qla_trc_array = trace_array_get_by_name("qla2xxx");
+	if (!qla_trc_array) {
+		ql_log(ql_log_fatal, NULL, 0x0001,
+		       "Unable to create qla2xxx trace instance, instance logging will be disabled.\n");
+		return;
+	}
+
+	QLA_TRACE_ENABLE(qla_trc_array);
+}
+
+static void
+qla_trace_uninit(void)
+{
+	if (!qla_trc_array)
+		return;
+	trace_array_put(qla_trc_array);
+}
+#endif
+
 /*
  * PCI driver interface
  */
@@ -2877,6 +2977,11 @@
 			return ret;
 	}
 
+	if (is_kdump_kernel()) {
+		ql2xmqsupport = 0;
+		ql2xallocfwdump = 0;
+	}
+
 	/* This may fail but that's ok */
 	pci_enable_pcie_error_reporting(pdev);
 
@@ -2894,6 +2999,17 @@
 	spin_lock_init(&ha->tgt.sess_lock);
 	spin_lock_init(&ha->tgt.atio_lock);
 
+	spin_lock_init(&ha->sadb_lock);
+	INIT_LIST_HEAD(&ha->sadb_tx_index_list);
+	INIT_LIST_HEAD(&ha->sadb_rx_index_list);
+
+	spin_lock_init(&ha->sadb_fp_lock);
+
+	if (qla_edif_sadb_build_free_pool(ha)) {
+		kfree(ha);
+		goto  disable_device;
+	}
+
 	atomic_set(&ha->nvme_active_aen_cnt, 0);
 
 	/* Clear our data area */
@@ -3092,8 +3208,8 @@
 		ha->portnum = PCI_FUNC(ha->pdev->devfn);
 		ha->max_fibre_devices = MAX_FIBRE_DEVICES_2400;
 		ha->mbx_count = MAILBOX_REGISTER_COUNT;
-		req_length = REQUEST_ENTRY_CNT_24XX;
-		rsp_length = RESPONSE_ENTRY_CNT_2300;
+		req_length = REQUEST_ENTRY_CNT_83XX;
+		rsp_length = RESPONSE_ENTRY_CNT_83XX;
 		ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX;
 		ha->max_loop_id = SNS_LAST_LOOP_ID_2300;
 		ha->init_cb_size = sizeof(struct mid_init_cb_81xx);
@@ -3183,6 +3299,10 @@
 	ha->mr.fcport.supported_classes = FC_COS_UNSPECIFIED;
 	ha->mr.fcport.scan_state = 1;
 
+	qla2xxx_reset_stats(host, QLA2XX_HW_ERROR | QLA2XX_SHT_LNK_DWN |
+			    QLA2XX_INT_ERR | QLA2XX_CMD_TIMEOUT |
+			    QLA2XX_RESET_CMD_ERR | QLA2XX_TGT_SHT_LNK_DOWN);
+
 	/* Set the SG table size based on ISP type */
 	if (!IS_FWI2_CAPABLE(ha)) {
 		if (IS_QLA2100(ha))
@@ -3216,7 +3336,7 @@
 	    host->max_cmd_len, host->max_channel, (u64)host->max_lun,
 	    host->transportt, sht->vendor_id);
 
-	INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
+	INIT_WORK(&ha->heartbeat_work, qla_heartbeat_work_fn);
 
 	/* Set up the irqs */
 	ret = qla2x00_request_irqs(ha, rsp);
@@ -3309,7 +3429,7 @@
 	    "req->req_q_in=%p req->req_q_out=%p rsp->rsp_q_in=%p rsp->rsp_q_out=%p.\n",
 	    req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out);
 
-	ha->wq = alloc_workqueue("qla2xxx_wq", 0, 0);
+	ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0);
 	if (unlikely(!ha->wq)) {
 		ret = -ENOMEM;
 		goto probe_failed;
@@ -3351,6 +3471,10 @@
 	    host->can_queue, base_vha->req,
 	    base_vha->mgmt_svr_loop_id, host->sg_tablesize);
 
+	/* Check if FW supports MQ or not for ISP25xx */
+	if (IS_QLA25XX(ha) && !(ha->fw_attributes & BIT_6))
+		ha->mqenable = 0;
+
 	if (ha->mqenable) {
 		bool startit = false;
 
@@ -3364,6 +3488,7 @@
 		for (i = 0; i < ha->max_qpairs; i++)
 			qla2xxx_create_qpair(base_vha, 5, 0, startit);
 	}
+	qla_init_iocb_limit(base_vha);
 
 	if (ha->flags.running_gold_fw)
 		goto skip_dpc;
@@ -3478,7 +3603,7 @@
 		qla_dual_mode_enabled(base_vha))
 		scsi_scan_host(host);
 	else
-		ql_dbg(ql_dbg_init, base_vha, 0x0122,
+		ql_log(ql_log_info, base_vha, 0x0122,
 			"skipping scsi_scan_host() for non-initiator port\n");
 
 	qla2x00_alloc_sysfs_attr(base_vha);
@@ -3516,6 +3641,8 @@
 	return 0;
 
 probe_failed:
+	qla_enode_stop(base_vha);
+	qla_edb_stop(base_vha);
 	if (base_vha->gnl.l) {
 		dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size,
 				base_vha->gnl.l, base_vha->gnl.ldma);
@@ -3738,8 +3865,7 @@
 		if (ha->mqiobase)
 			iounmap(ha->mqiobase);
 
-		if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) &&
-		    ha->msixbase)
+		if (ha->msixbase)
 			iounmap(ha->msixbase);
 	}
 }
@@ -3818,6 +3944,8 @@
 		base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma);
 
 	base_vha->gnl.l = NULL;
+	qla_enode_stop(base_vha);
+	qla_edb_stop(base_vha);
 
 	vfree(base_vha->scan.l);
 
@@ -3851,7 +3979,6 @@
 	qla2x00_free_sysfs_attr(base_vha, true);
 
 	fc_remove_host(base_vha->host);
-	qlt_remove_target_resources(ha);
 
 	scsi_remove_host(base_vha->host);
 
@@ -3874,13 +4001,15 @@
 static inline void
 qla24xx_free_purex_list(struct purex_list *list)
 {
-	struct list_head *item, *next;
+	struct purex_item *item, *next;
 	ulong flags;
 
 	spin_lock_irqsave(&list->lock, flags);
-	list_for_each_safe(item, next, &list->head) {
-		list_del(item);
-		kfree(list_entry(item, struct purex_item, list));
+	list_for_each_entry_safe(item, next, &list->head, list) {
+		list_del(&item->list);
+		if (item == &item->vha->default_item)
+			continue;
+		kfree(item);
 	}
 	spin_unlock_irqrestore(&list->lock, flags);
 }
@@ -3911,7 +4040,6 @@
 
 	/* Flush the work queue and remove it */
 	if (ha->wq) {
-		flush_workqueue(ha->wq);
 		destroy_workqueue(ha->wq);
 		ha->wq = NULL;
 	}
@@ -3923,6 +4051,9 @@
 
 	qla82xx_md_free(vha);
 
+	qla_edif_sadb_release_free_pool(ha);
+	qla_edif_sadb_release(ha);
+
 	qla2x00_free_queues(ha);
 }
 
@@ -3975,6 +4106,7 @@
 		qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
 		qla2x00_schedule_rport_del(vha, fcport);
 	}
+
 	/*
 	 * We may need to retry the login, so don't change the state of the
 	 * port but do the retries.
@@ -3997,6 +4129,16 @@
 	    "Mark all dev lost\n");
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (fcport->loop_id != FC_NO_LOOP_ID &&
+		    (fcport->flags & FCF_FCP2_DEVICE) &&
+		    fcport->port_type == FCT_TARGET &&
+		    !qla2x00_reset_active(vha)) {
+			ql_dbg(ql_dbg_disc, vha, 0x211a,
+			       "Delaying session delete for FCP2 flags 0x%x port_type = 0x%x port_id=%06x %phC",
+			       fcport->flags, fcport->port_type,
+			       fcport->d_id.b24, fcport->port_name);
+			continue;
+		}
 		fcport->scan_state = 0;
 		qlt_schedule_sess_for_deletion(fcport);
 	}
@@ -4028,15 +4170,20 @@
 	struct req_que **req, struct rsp_que **rsp)
 {
 	char	name[16];
+	int rc;
 
 	ha->init_cb = dma_alloc_coherent(&ha->pdev->dev, ha->init_cb_size,
 		&ha->init_cb_dma, GFP_KERNEL);
 	if (!ha->init_cb)
 		goto fail;
 
-	if (qlt_mem_alloc(ha) < 0)
+	rc = btree_init32(&ha->host_map);
+	if (rc)
 		goto fail_free_init_cb;
 
+	if (qlt_mem_alloc(ha) < 0)
+		goto fail_free_btree;
+
 	ha->gid_list = dma_alloc_coherent(&ha->pdev->dev,
 		qla2x00_gid_list_size(ha), &ha->gid_list_dma, GFP_KERNEL);
 	if (!ha->gid_list)
@@ -4046,7 +4193,7 @@
 	if (!ha->srb_mempool)
 		goto fail_free_gid_list;
 
-	if (IS_P3P_TYPE(ha)) {
+	if (IS_P3P_TYPE(ha) || IS_QLA27XX(ha) || (ql2xsecenable && IS_QLA28XX(ha))) {
 		/* Allocate cache for CT6 Ctx. */
 		if (!ctx_cachep) {
 			ctx_cachep = kmem_cache_create("qla2xxx_ctx",
@@ -4080,7 +4227,7 @@
 	    "init_cb=%p gid_list=%p, srb_mempool=%p s_dma_pool=%p.\n",
 	    ha->init_cb, ha->gid_list, ha->srb_mempool, ha->s_dma_pool);
 
-	if (IS_P3P_TYPE(ha) || ql2xenabledif) {
+	if (IS_P3P_TYPE(ha) || ql2xenabledif || (IS_QLA28XX(ha) && ql2xsecenable)) {
 		ha->dl_dma_pool = dma_pool_create(name, &ha->pdev->dev,
 			DSD_LIST_DMA_POOL_SIZE, 8, 0);
 		if (!ha->dl_dma_pool) {
@@ -4121,7 +4268,7 @@
 					ql_dbg_pci(ql_dbg_init, ha->pdev,
 					    0xe0ee, "%s: failed alloc dsd\n",
 					    __func__);
-					return 1;
+					return -ENOMEM;
 				}
 				ha->dif_bundle_kallocs++;
 
@@ -4269,7 +4416,7 @@
 
 	/* Get consistent memory allocated for Special Features-CB. */
 	if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
-		ha->sf_init_cb = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL,
+		ha->sf_init_cb = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL,
 						&ha->sf_init_cb_dma);
 		if (!ha->sf_init_cb)
 			goto fail_sf_init_cb;
@@ -4320,8 +4467,36 @@
 		goto fail_flt_buffer;
 	}
 
+	/* allocate the purex dma pool */
+	ha->purex_dma_pool = dma_pool_create(name, &ha->pdev->dev,
+	    ELS_MAX_PAYLOAD, 8, 0);
+
+	if (!ha->purex_dma_pool) {
+		ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011b,
+		    "Unable to allocate purex_dma_pool.\n");
+		goto fail_flt;
+	}
+
+	ha->elsrej.size = sizeof(struct fc_els_ls_rjt) + 16;
+	ha->elsrej.c = dma_alloc_coherent(&ha->pdev->dev,
+	    ha->elsrej.size, &ha->elsrej.cdma, GFP_KERNEL);
+
+	if (!ha->elsrej.c) {
+		ql_dbg_pci(ql_dbg_init, ha->pdev, 0xffff,
+		    "Alloc failed for els reject cmd.\n");
+		goto fail_elsrej;
+	}
+	ha->elsrej.c->er_cmd = ELS_LS_RJT;
+	ha->elsrej.c->er_reason = ELS_RJT_LOGIC;
+	ha->elsrej.c->er_explan = ELS_EXPL_UNAB_DATA;
 	return 0;
 
+fail_elsrej:
+	dma_pool_destroy(ha->purex_dma_pool);
+fail_flt:
+	dma_free_coherent(&ha->pdev->dev, SFP_DEV_SIZE,
+	    ha->flt, ha->flt_dma);
+
 fail_flt_buffer:
 	dma_free_coherent(&ha->pdev->dev, SFP_DEV_SIZE,
 	    ha->sfp_data, ha->sfp_data_dma);
@@ -4412,6 +4587,8 @@
 	ha->gid_list_dma = 0;
 fail_free_tgt_mem:
 	qlt_mem_free(ha);
+fail_free_btree:
+	btree_destroy32(&ha->host_map);
 fail_free_init_cb:
 	dma_free_coherent(&ha->pdev->dev, ha->init_cb_size, ha->init_cb,
 	ha->init_cb_dma);
@@ -4427,11 +4604,12 @@
 qla2x00_set_exlogins_buffer(scsi_qla_host_t *vha)
 {
 	int rval;
-	uint16_t	size, max_cnt, temp;
+	uint16_t	size, max_cnt;
+	uint32_t temp;
 	struct qla_hw_data *ha = vha->hw;
 
 	/* Return if we don't need to alloacate any extended logins */
-	if (!ql2xexlogins)
+	if (ql2xexlogins <= MAX_FIBRE_DEVICES_2400)
 		return QLA_SUCCESS;
 
 	if (!IS_EXLOGIN_OFFLD_CAPABLE(ha))
@@ -4673,8 +4851,7 @@
 		dma_free_coherent(&ha->pdev->dev,
 		    EFT_SIZE, ha->eft, ha->eft_dma);
 
-	if (ha->fw_dump)
-		vfree(ha->fw_dump);
+	vfree(ha->fw_dump);
 
 	ha->fce = NULL;
 	ha->fce_dma = 0;
@@ -4688,8 +4865,7 @@
 	ha->fw_dump_len = 0;
 
 	for (j = 0; j < 2; j++, fwdt++) {
-		if (fwdt->template)
-			vfree(fwdt->template);
+		vfree(fwdt->template);
 		fwdt->template = NULL;
 		fwdt->length = 0;
 	}
@@ -4829,10 +5005,21 @@
 	ha->dif_bundl_pool = NULL;
 
 	qlt_mem_free(ha);
+	qla_remove_hostmap(ha);
 
 	if (ha->init_cb)
 		dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
 			ha->init_cb, ha->init_cb_dma);
+
+	dma_pool_destroy(ha->purex_dma_pool);
+	ha->purex_dma_pool = NULL;
+
+	if (ha->elsrej.c) {
+		dma_free_coherent(&ha->pdev->dev, ha->elsrej.size,
+		    ha->elsrej.c, ha->elsrej.cdma);
+		ha->elsrej.c = NULL;
+	}
+
 	ha->init_cb = NULL;
 	ha->init_cb_dma = 0;
 
@@ -4879,7 +5066,6 @@
 	INIT_LIST_HEAD(&vha->work_list);
 	INIT_LIST_HEAD(&vha->list);
 	INIT_LIST_HEAD(&vha->qla_cmd_list);
-	INIT_LIST_HEAD(&vha->qla_sess_op_cmd_list);
 	INIT_LIST_HEAD(&vha->logo_list);
 	INIT_LIST_HEAD(&vha->plogi_ack_list);
 	INIT_LIST_HEAD(&vha->qp_list);
@@ -4894,6 +5080,9 @@
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 	init_waitqueue_head(&vha->vref_waitq);
+	qla_enode_init(vha);
+	qla_edb_init(vha);
+
 
 	vha->gnl.size = sizeof(struct get_name_list_extended) *
 			(ha->max_loop_id + 1);
@@ -4920,7 +5109,7 @@
 	}
 	INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn);
 
-	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
+	sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no);
 	ql_dbg(ql_dbg_init, vha, 0x0041,
 	    "Allocated the host=%p hw=%p vha=%p dev_name=%s",
 	    vha->host, vha->hw, vha,
@@ -4933,13 +5122,11 @@
 qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 {
 	struct qla_work_evt *e;
-	uint8_t bail;
 
 	if (test_bit(UNLOADING, &vha->dpc_flags))
 		return NULL;
 
-	QLA_VHA_MARK_BUSY(vha, bail);
-	if (bail)
+	if (qla_vha_mark_busy(vha))
 		return NULL;
 
 	e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC);
@@ -5049,7 +5236,7 @@
 
 	switch (code) {
 	case QLA_UEVENT_CODE_FW_DUMP:
-		snprintf(event_string, sizeof(event_string), "FW_DUMP=%ld",
+		snprintf(event_string, sizeof(event_string), "FW_DUMP=%lu",
 		    vha->host_no);
 		break;
 	default:
@@ -5093,11 +5280,7 @@
 	qla2x00_set_fcport_disc_state(fcport, DSC_UPD_FCPORT);
 	spin_unlock_irqrestore(&fcport->vha->work_lock, flags);
 
-#if defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 6
-	schedule_work(&fcport->reg_work);
-#else
 	queue_work(system_unbound_wq, &fcport->reg_work);
-#endif
 }
 
 static
@@ -5135,12 +5318,20 @@
 			fcport->d_id = e->u.new_sess.id;
 			fcport->flags |= FCF_FABRIC_DEVICE;
 			fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+			fcport->tgt_short_link_down_cnt = 0;
 
 			memcpy(fcport->port_name, e->u.new_sess.port_name,
 			    WWN_SIZE);
 
 			fcport->fc4_type = e->u.new_sess.fc4_type;
+			if (NVME_PRIORITY(vha->hw, fcport))
+				fcport->do_prli_nvme = 1;
+			else
+				fcport->do_prli_nvme = 0;
+
 			if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) {
+				fcport->dm_login_expire = jiffies +
+					QLA_N2N_WAIT_TIME * HZ;
 				fcport->fc4_type = FS_FC4TYPE_FCP;
 				fcport->n2n_flag = 1;
 				if (vha->flags.nvme_enabled)
@@ -5385,6 +5576,9 @@
 			qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI,
 			    e->u.fcport.fcport, false);
 			break;
+		case QLA_EVT_SA_REPLACE:
+			rc = qla24xx_issue_sa_replace_iocb(vha, e);
+			break;
 		}
 
 		if (rc == EAGAIN) {
@@ -5434,6 +5628,7 @@
 		if (atomic_read(&fcport->state) != FCS_ONLINE &&
 		    fcport->login_retry) {
 			if (fcport->scan_state != QLA_FCPORT_FOUND ||
+			    fcport->disc_state == DSC_LOGIN_AUTH_PEND ||
 			    fcport->disc_state == DSC_LOGIN_COMPLETE)
 				continue;
 
@@ -5446,6 +5641,11 @@
 					ea.fcport = fcport;
 					qla24xx_handle_relogin_event(vha, &ea);
 				} else if (vha->hw->current_topology ==
+					 ISP_CFG_NL &&
+					IS_QLA2XXX_MIDTYPE(vha->hw)) {
+					(void)qla24xx_fcport_handle_login(vha,
+									fcport);
+				} else if (vha->hw->current_topology ==
 				    ISP_CFG_NL) {
 					fcport->login_retry--;
 					status =
@@ -5662,25 +5862,10 @@
 	}
 }
 
-static void
-qla83xx_wait_logic(void)
-{
-	int i;
-
-	/* Yield CPU */
-	if (!in_interrupt()) {
-		/*
-		 * Wait about 200ms before retrying again.
-		 * This controls the number of retries for single
-		 * lock operation.
-		 */
-		msleep(100);
-		schedule();
-	} else {
-		for (i = 0; i < 20; i++)
-			cpu_relax(); /* This a nop instr on i386 */
-	}
-}
+/*
+ * Control the frequency of IDC lock retries
+ */
+#define QLA83XX_WAIT_LOGIC_MS	100
 
 static int
 qla83xx_force_lock_recovery(scsi_qla_host_t *base_vha)
@@ -5770,7 +5955,7 @@
 		goto exit;
 
 	if (o_drv_lockid == n_drv_lockid) {
-		qla83xx_wait_logic();
+		msleep(QLA83XX_WAIT_LOGIC_MS);
 		goto retry_lockid;
 	} else
 		return QLA_SUCCESS;
@@ -5779,6 +5964,9 @@
 	return rval;
 }
 
+/*
+ * Context: task, can sleep
+ */
 void
 qla83xx_idc_lock(scsi_qla_host_t *base_vha, uint16_t requester_id)
 {
@@ -5786,6 +5974,8 @@
 	uint32_t lock_owner;
 	struct qla_hw_data *ha = base_vha->hw;
 
+	might_sleep();
+
 	/* IDC-lock implementation using driver-lock/lock-id remote registers */
 retry_lock:
 	if (qla83xx_rd_reg(base_vha, QLA83XX_DRIVER_LOCK, &data)
@@ -5804,7 +5994,7 @@
 			/* Retry/Perform IDC-Lock recovery */
 			if (qla83xx_idc_lock_recovery(base_vha)
 			    == QLA_SUCCESS) {
-				qla83xx_wait_logic();
+				msleep(QLA83XX_WAIT_LOGIC_MS);
 				goto retry_lock;
 			} else
 				ql_log(ql_log_warn, base_vha, 0xb075,
@@ -5862,98 +6052,6 @@
 	return true;
 }
 
-static uint
-qla25xx_rdp_port_speed_capability(struct qla_hw_data *ha)
-{
-	if (IS_CNA_CAPABLE(ha))
-		return RDP_PORT_SPEED_10GB;
-
-	if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
-		unsigned int speeds = 0;
-
-		if (ha->max_supported_speed == 2) {
-			if (ha->min_supported_speed <= 6)
-				speeds |= RDP_PORT_SPEED_64GB;
-		}
-
-		if (ha->max_supported_speed == 2 ||
-		    ha->max_supported_speed == 1) {
-			if (ha->min_supported_speed <= 5)
-				speeds |= RDP_PORT_SPEED_32GB;
-		}
-
-		if (ha->max_supported_speed == 2 ||
-		    ha->max_supported_speed == 1 ||
-		    ha->max_supported_speed == 0) {
-			if (ha->min_supported_speed <= 4)
-				speeds |= RDP_PORT_SPEED_16GB;
-		}
-
-		if (ha->max_supported_speed == 1 ||
-		    ha->max_supported_speed == 0) {
-			if (ha->min_supported_speed <= 3)
-				speeds |= RDP_PORT_SPEED_8GB;
-		}
-
-		if (ha->max_supported_speed == 0) {
-			if (ha->min_supported_speed <= 2)
-				speeds |= RDP_PORT_SPEED_4GB;
-		}
-
-		return speeds;
-	}
-
-	if (IS_QLA2031(ha))
-		return RDP_PORT_SPEED_16GB|RDP_PORT_SPEED_8GB|
-		       RDP_PORT_SPEED_4GB;
-
-	if (IS_QLA25XX(ha))
-		return RDP_PORT_SPEED_8GB|RDP_PORT_SPEED_4GB|
-		       RDP_PORT_SPEED_2GB|RDP_PORT_SPEED_1GB;
-
-	if (IS_QLA24XX_TYPE(ha))
-		return RDP_PORT_SPEED_4GB|RDP_PORT_SPEED_2GB|
-		       RDP_PORT_SPEED_1GB;
-
-	if (IS_QLA23XX(ha))
-		return RDP_PORT_SPEED_2GB|RDP_PORT_SPEED_1GB;
-
-	return RDP_PORT_SPEED_1GB;
-}
-
-static uint
-qla25xx_rdp_port_speed_currently(struct qla_hw_data *ha)
-{
-	switch (ha->link_data_rate) {
-	case PORT_SPEED_1GB:
-		return RDP_PORT_SPEED_1GB;
-
-	case PORT_SPEED_2GB:
-		return RDP_PORT_SPEED_2GB;
-
-	case PORT_SPEED_4GB:
-		return RDP_PORT_SPEED_4GB;
-
-	case PORT_SPEED_8GB:
-		return RDP_PORT_SPEED_8GB;
-
-	case PORT_SPEED_10GB:
-		return RDP_PORT_SPEED_10GB;
-
-	case PORT_SPEED_16GB:
-		return RDP_PORT_SPEED_16GB;
-
-	case PORT_SPEED_32GB:
-		return RDP_PORT_SPEED_32GB;
-
-	case PORT_SPEED_64GB:
-		return RDP_PORT_SPEED_64GB;
-
-	default:
-		return RDP_PORT_SPEED_UNKNOWN;
-	}
-}
-
 /*
  * Function Name: qla24xx_process_purex_iocb
  *
@@ -5973,12 +6071,10 @@
 	dma_addr_t rsp_els_dma;
 	dma_addr_t rsp_payload_dma;
 	dma_addr_t stat_dma;
-	dma_addr_t bbc_dma;
 	dma_addr_t sfp_dma;
 	struct els_entry_24xx *rsp_els = NULL;
 	struct rdp_rsp_payload *rsp_payload = NULL;
 	struct link_statistics *stat = NULL;
-	struct buffer_credit_24xx *bbc = NULL;
 	uint8_t *sfp = NULL;
 	uint16_t sfp_flags = 0;
 	uint rsp_payload_length = sizeof(*rsp_payload);
@@ -6022,9 +6118,6 @@
 	stat = dma_alloc_coherent(&ha->pdev->dev, sizeof(*stat),
 	    &stat_dma, GFP_KERNEL);
 
-	bbc = dma_alloc_coherent(&ha->pdev->dev, sizeof(*bbc),
-	    &bbc_dma, GFP_KERNEL);
-
 	/* Prepare Response IOCB */
 	rsp_els->entry_type = ELS_IOCB_TYPE;
 	rsp_els->entry_count = 1;
@@ -6120,9 +6213,9 @@
 	rsp_payload->port_speed_desc.desc_len =
 	    cpu_to_be32(RDP_DESC_LEN(rsp_payload->port_speed_desc));
 	rsp_payload->port_speed_desc.speed_capab = cpu_to_be16(
-	    qla25xx_rdp_port_speed_capability(ha));
+	    qla25xx_fdmi_port_speed_capability(ha));
 	rsp_payload->port_speed_desc.operating_speed = cpu_to_be16(
-	    qla25xx_rdp_port_speed_currently(ha));
+	    qla25xx_fdmi_port_speed_currently(ha));
 
 	/* Link Error Status Descriptor */
 	rsp_payload->ls_err_desc.desc_tag = cpu_to_be32(0x10002);
@@ -6178,13 +6271,10 @@
 	rsp_payload->buffer_credit_desc.attached_fcport_b2b = cpu_to_be32(0);
 	rsp_payload->buffer_credit_desc.fcport_rtt = cpu_to_be32(0);
 
-	if (bbc) {
-		memset(bbc, 0, sizeof(*bbc));
-		rval = qla24xx_get_buffer_credits(vha, bbc, bbc_dma);
-		if (!rval) {
-			rsp_payload->buffer_credit_desc.fcport_b2b =
-			    cpu_to_be32(LSW(bbc->parameter[0]));
-		}
+	if (ha->flags.plogi_template_valid) {
+		uint32_t tmp =
+		be16_to_cpu(ha->plogi_els_payld.fl_csp.sp_bb_cred);
+		rsp_payload->buffer_credit_desc.fcport_b2b = cpu_to_be32(tmp);
 	}
 
 	if (rsp_payload_length < sizeof(*rsp_payload))
@@ -6362,9 +6452,6 @@
 	}
 
 dealloc:
-	if (bbc)
-		dma_free_coherent(&ha->pdev->dev, sizeof(*bbc),
-		    bbc, bbc_dma);
 	if (stat)
 		dma_free_coherent(&ha->pdev->dev, sizeof(*stat),
 		    stat, stat_dma);
@@ -6379,7 +6466,8 @@
 		    rsp_els, rsp_els_dma);
 }
 
-void qla24xx_free_purex_item(struct purex_item *item)
+void
+qla24xx_free_purex_item(struct purex_item *item)
 {
 	if (item == &item->vha->default_item)
 		memset(&item->vha->default_item, 0, sizeof(struct purex_item));
@@ -6404,6 +6492,9 @@
 	}
 }
 
+/*
+ * Context: task, can sleep
+ */
 void
 qla83xx_idc_unlock(scsi_qla_host_t *base_vha, uint16_t requester_id)
 {
@@ -6414,6 +6505,8 @@
 	uint32_t data;
 	struct qla_hw_data *ha = base_vha->hw;
 
+	might_sleep();
+
 	/* IDC-unlock implementation using driver-unlock/lock-id
 	 * remote registers
 	 */
@@ -6429,7 +6522,7 @@
 			/* SV: XXX: IDC unlock retrying needed here? */
 
 			/* Retry for IDC-unlock */
-			qla83xx_wait_logic();
+			msleep(QLA83XX_WAIT_LOGIC_MS);
 			retry++;
 			ql_dbg(ql_dbg_p3p, base_vha, 0xb064,
 			    "Failed to release IDC lock, retrying=%d\n", retry);
@@ -6437,7 +6530,7 @@
 		}
 	} else if (retry < 10) {
 		/* Retry for IDC-unlock */
-		qla83xx_wait_logic();
+		msleep(QLA83XX_WAIT_LOGIC_MS);
 		retry++;
 		ql_dbg(ql_dbg_p3p, base_vha, 0xb065,
 		    "Failed to read drv-lockid, retrying=%d\n", retry);
@@ -6453,7 +6546,7 @@
 	if (qla83xx_access_control(base_vha, options, 0, 0, NULL)) {
 		if (retry < 10) {
 			/* Retry for IDC-unlock */
-			qla83xx_wait_logic();
+			msleep(QLA83XX_WAIT_LOGIC_MS);
 			retry++;
 			ql_dbg(ql_dbg_p3p, base_vha, 0xb066,
 			    "Failed to release IDC lock, retrying=%d\n", retry);
@@ -6813,6 +6906,9 @@
 
 		schedule();
 
+		if (test_and_clear_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags))
+			qla_pci_set_eeh_busy(base_vha);
+
 		if (!base_vha->flags.init_done || ha->flags.mbox_busy)
 			goto end_loop;
 
@@ -7106,26 +7202,21 @@
 			mutex_unlock(&ha->mq_lock);
 		}
 
-		if (test_and_clear_bit(SET_NVME_ZIO_THRESHOLD_NEEDED,
-		    &base_vha->dpc_flags)) {
-			ql_log(ql_log_info, base_vha, 0xffffff,
-				"nvme: SET ZIO Activity exchange threshold to %d.\n",
-						ha->nvme_last_rptd_aen);
-			if (qla27xx_set_zio_threshold(base_vha,
-			    ha->nvme_last_rptd_aen)) {
-				ql_log(ql_log_info, base_vha, 0xffffff,
-				    "nvme: Unable to SET ZIO Activity exchange threshold to %d.\n",
-				    ha->nvme_last_rptd_aen);
-			}
-		}
-
 		if (test_and_clear_bit(SET_ZIO_THRESHOLD_NEEDED,
-		    &base_vha->dpc_flags)) {
+				       &base_vha->dpc_flags)) {
+			u16 threshold = ha->nvme_last_rptd_aen + ha->last_zio_threshold;
+
+			if (threshold > ha->orig_fw_xcb_count)
+				threshold = ha->orig_fw_xcb_count;
+
 			ql_log(ql_log_info, base_vha, 0xffffff,
-			    "SET ZIO Activity exchange threshold to %d.\n",
-			    ha->last_zio_threshold);
-			qla27xx_set_zio_threshold(base_vha,
-			    ha->last_zio_threshold);
+			       "SET ZIO Activity exchange threshold to %d.\n",
+			       threshold);
+			if (qla27xx_set_zio_threshold(base_vha, threshold)) {
+				ql_log(ql_log_info, base_vha, 0xffffff,
+				       "Unable to SET ZIO Activity exchange threshold to %d.\n",
+				       threshold);
+			}
 		}
 
 		if (!IS_QLAFX00(ha))
@@ -7192,6 +7283,104 @@
 	}
 }
 
+static bool qla_do_heartbeat(struct scsi_qla_host *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	u32 cmpl_cnt;
+	u16 i;
+	bool do_heartbeat = false;
+
+	/*
+	 * Allow do_heartbeat only if we don’t have any active interrupts,
+	 * but there are still IOs outstanding with firmware.
+	 */
+	cmpl_cnt = ha->base_qpair->cmd_completion_cnt;
+	if (cmpl_cnt == ha->base_qpair->prev_completion_cnt &&
+	    cmpl_cnt != ha->base_qpair->cmd_cnt) {
+		do_heartbeat = true;
+		goto skip;
+	}
+	ha->base_qpair->prev_completion_cnt = cmpl_cnt;
+
+	for (i = 0; i < ha->max_qpairs; i++) {
+		if (ha->queue_pair_map[i]) {
+			cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt;
+			if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt &&
+			    cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) {
+				do_heartbeat = true;
+				break;
+			}
+			ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt;
+		}
+	}
+
+skip:
+	return do_heartbeat;
+}
+
+static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started)
+{
+	struct qla_hw_data *ha = vha->hw;
+
+	if (vha->vp_idx)
+		return;
+
+	if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha))
+		return;
+
+	/*
+	 * dpc thread cannot run if heartbeat is running at the same time.
+	 * We also do not want to starve heartbeat task. Therefore, do
+	 * heartbeat task at least once every 5 seconds.
+	 */
+	if (dpc_started &&
+	    time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ))
+		return;
+
+	if (qla_do_heartbeat(vha)) {
+		ha->last_heartbeat_run_jiffies = jiffies;
+		queue_work(ha->wq, &ha->heartbeat_work);
+	}
+}
+
+static void qla_wind_down_chip(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!ha->flags.eeh_busy)
+		return;
+	if (ha->pci_error_state)
+		/* system is trying to recover */
+		return;
+
+	/*
+	 * Current system is not handling PCIE error.  At this point, this is
+	 * best effort to wind down the adapter.
+	 */
+	if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) &&
+	    !ha->flags.eeh_flush) {
+		ql_log(ql_log_info, vha, 0x9009,
+		    "PCI Error detected, attempting to reset hardware.\n");
+
+		ha->isp_ops->reset_chip(vha);
+		ha->isp_ops->disable_intrs(ha);
+
+		ha->flags.eeh_flush = EEH_FLUSH_RDY;
+		ha->eeh_jif = jiffies;
+
+	} else if (ha->flags.eeh_flush == EEH_FLUSH_RDY &&
+	    time_after_eq(jiffies, ha->eeh_jif +  5 * HZ)) {
+		pci_clear_master(ha->pdev);
+
+		/* flush all command */
+		qla2x00_abort_isp_cleanup(vha);
+		ha->flags.eeh_flush = EEH_FLUSH_DONE;
+
+		ql_log(ql_log_info, vha, 0x900a,
+		    "PCI Error handling complete, all IOs aborted.\n");
+	}
+}
+
 /**************************************************************************
 *   qla2x00_timer
 *
@@ -7211,8 +7400,12 @@
 	uint16_t        w;
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req;
+	unsigned long flags;
+	fc_port_t *fcport = NULL;
 
 	if (ha->flags.eeh_busy) {
+		qla_wind_down_chip(vha);
+
 		ql_dbg(ql_dbg_timer, vha, 0x6000,
 		    "EEH = %d, restarting timer.\n",
 		    ha->flags.eeh_busy);
@@ -7242,6 +7435,16 @@
 	if (!vha->vp_idx && IS_QLAFX00(ha))
 		qlafx00_timer_routine(vha);
 
+	if (vha->link_down_time < QLA2XX_MAX_LINK_DOWN_TIME)
+		vha->link_down_time++;
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (fcport->tgt_link_down_time < QLA2XX_MAX_LINK_DOWN_TIME)
+			fcport->tgt_link_down_time++;
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 	/* Loop down handler. */
 	if (atomic_read(&vha->loop_down_timer) > 0 &&
 	    !(test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) &&
@@ -7323,6 +7526,10 @@
 		}
 	}
 
+	/* check if edif running */
+	if (vha->hw->flags.edif_enabled)
+		qla_edif_timer(vha);
+
 	/* Process any deferred work. */
 	if (!list_empty(&vha->work_list)) {
 		unsigned long flags;
@@ -7340,22 +7547,22 @@
 	 * FC-NVME
 	 * see if the active AEN count has changed from what was last reported.
 	 */
+	index = atomic_read(&ha->nvme_active_aen_cnt);
 	if (!vha->vp_idx &&
-	    (atomic_read(&ha->nvme_active_aen_cnt) != ha->nvme_last_rptd_aen) &&
+	    (index != ha->nvme_last_rptd_aen) &&
 	    ha->zio_mode == QLA_ZIO_MODE_6 &&
 	    !ha->flags.host_shutting_down) {
+		ha->nvme_last_rptd_aen = atomic_read(&ha->nvme_active_aen_cnt);
 		ql_log(ql_log_info, vha, 0x3002,
 		    "nvme: Sched: Set ZIO exchange threshold to %d.\n",
 		    ha->nvme_last_rptd_aen);
-		ha->nvme_last_rptd_aen = atomic_read(&ha->nvme_active_aen_cnt);
-		set_bit(SET_NVME_ZIO_THRESHOLD_NEEDED, &vha->dpc_flags);
+		set_bit(SET_ZIO_THRESHOLD_NEEDED, &vha->dpc_flags);
 		start_dpc++;
 	}
 
 	if (!vha->vp_idx &&
-	    (atomic_read(&ha->zio_threshold) != ha->last_zio_threshold) &&
-	    (ha->zio_mode == QLA_ZIO_MODE_6) &&
-	    (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))) {
+	    atomic_read(&ha->zio_threshold) != ha->last_zio_threshold &&
+	    IS_ZIO_THRESHOLD_CAPABLE(ha)) {
 		ql_log(ql_log_info, vha, 0x3002,
 		    "Sched: Set ZIO exchange threshold to %d.\n",
 		    ha->last_zio_threshold);
@@ -7364,6 +7571,8 @@
 		start_dpc++;
 	}
 
+	/* borrowing w to signify dpc will run */
+	w = 0;
 	/* Schedule the DPC routine if needed */
 	if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) ||
 	    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) ||
@@ -7396,8 +7605,11 @@
 		    test_bit(RELOGIN_NEEDED, &vha->dpc_flags),
 		    test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags));
 		qla2xxx_wake_dpc(vha);
+		w = 1;
 	}
 
+	qla_heart_beat(vha, w);
+
 	qla2x00_restart_timer(vha, WATCH_INTERVAL);
 }
 
@@ -7517,11 +7729,13 @@
 	struct qla_hw_data *ha = vha->hw;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 	struct qla_qpair *qpair = NULL;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	fc_port_t *fcport;
 	int i;
 	unsigned long flags;
 
+	ql_dbg(ql_dbg_aer, vha, 0x9000,
+	       "%s\n", __func__);
 	ha->chip_reset++;
 
 	ha->base_qpair->chip_reset = ha->chip_reset;
@@ -7531,34 +7745,22 @@
 			    ha->base_qpair->chip_reset;
 	}
 
-	/* purge MBox commands */
-	if (atomic_read(&ha->num_pend_mbx_stage3)) {
-		clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
-		complete(&ha->mbx_intr_comp);
-	}
-
-	i = 0;
-
-	while (atomic_read(&ha->num_pend_mbx_stage3) ||
-	    atomic_read(&ha->num_pend_mbx_stage2) ||
-	    atomic_read(&ha->num_pend_mbx_stage1)) {
-		msleep(20);
-		i++;
-		if (i > 50)
-			break;
-	}
-
-	ha->flags.purge_mbox = 0;
+	/*
+	 * purge mailbox might take a while. Slot Reset/chip reset
+	 * will take care of the purge
+	 */
 
 	mutex_lock(&ha->mq_lock);
+	ha->base_qpair->online = 0;
 	list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem)
 		qpair->online = 0;
+	wmb();
 	mutex_unlock(&ha->mq_lock);
 
 	qla2x00_mark_all_devices_lost(vha);
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		atomic_inc(&vp->vref_count);
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 		qla2x00_mark_all_devices_lost(vp);
@@ -7572,7 +7774,7 @@
 		fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		atomic_inc(&vp->vref_count);
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 		list_for_each_entry(fcport, &vp->vp_fcports, list)
@@ -7589,28 +7791,32 @@
 {
 	scsi_qla_host_t *vha = pci_get_drvdata(pdev);
 	struct qla_hw_data *ha = vha->hw;
+	pci_ers_result_t ret = PCI_ERS_RESULT_NEED_RESET;
 
-	ql_dbg(ql_dbg_aer, vha, 0x9000,
-	    "PCI error detected, state %x.\n", state);
+	ql_log(ql_log_warn, vha, 0x9000,
+	       "PCI error detected, state %x.\n", state);
+	ha->pci_error_state = QLA_PCI_ERR_DETECTED;
 
 	if (!atomic_read(&pdev->enable_cnt)) {
 		ql_log(ql_log_info, vha, 0xffff,
 			"PCI device is disabled,state %x\n", state);
-		return PCI_ERS_RESULT_NEED_RESET;
+		ret = PCI_ERS_RESULT_NEED_RESET;
+		goto out;
 	}
 
 	switch (state) {
 	case pci_channel_io_normal:
-		ha->flags.eeh_busy = 0;
+		qla_pci_set_eeh_busy(vha);
 		if (ql2xmqsupport || ql2xnvmeenable) {
 			set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
 			qla2xxx_wake_dpc(vha);
 		}
-		return PCI_ERS_RESULT_CAN_RECOVER;
+		ret = PCI_ERS_RESULT_CAN_RECOVER;
+		break;
 	case pci_channel_io_frozen:
-		ha->flags.eeh_busy = 1;
-		qla_pci_error_cleanup(vha);
-		return PCI_ERS_RESULT_NEED_RESET;
+		qla_pci_set_eeh_busy(vha);
+		ret = PCI_ERS_RESULT_NEED_RESET;
+		break;
 	case pci_channel_io_perm_failure:
 		ha->flags.pci_channel_io_perm_failure = 1;
 		qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16);
@@ -7618,9 +7824,12 @@
 			set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
 			qla2xxx_wake_dpc(vha);
 		}
-		return PCI_ERS_RESULT_DISCONNECT;
+		ret = PCI_ERS_RESULT_DISCONNECT;
 	}
-	return PCI_ERS_RESULT_NEED_RESET;
+out:
+	ql_dbg(ql_dbg_aer, vha, 0x600d,
+	       "PCI error detected returning [%x].\n", ret);
+	return ret;
 }
 
 static pci_ers_result_t
@@ -7634,9 +7843,20 @@
 	struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
 	struct device_reg_24xx __iomem *reg24 = &ha->iobase->isp24;
 
+	ql_log(ql_log_warn, base_vha, 0x9000,
+	       "mmio enabled\n");
+
+	ha->pci_error_state = QLA_PCI_MMIO_ENABLED;
+
 	if (IS_QLA82XX(ha))
 		return PCI_ERS_RESULT_RECOVERED;
 
+	if (qla2x00_isp_reg_stat(ha)) {
+		ql_log(ql_log_info, base_vha, 0x803f,
+		    "During mmio enabled, PCI/Register disconnect still detected.\n");
+		goto out;
+	}
+
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	if (IS_QLA2100(ha) || IS_QLA2200(ha)){
 		stat = rd_reg_word(&reg->hccr);
@@ -7657,10 +7877,12 @@
 		ql_log(ql_log_info, base_vha, 0x9003,
 		    "RISC paused -- mmio_enabled, Dumping firmware.\n");
 		qla2xxx_dump_fw(base_vha);
-
-		return PCI_ERS_RESULT_NEED_RESET;
-	} else
-		return PCI_ERS_RESULT_RECOVERED;
+	}
+out:
+	/* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */
+	ql_dbg(ql_dbg_aer, base_vha, 0x600d,
+	       "mmio enabled returning.\n");
+	return PCI_ERS_RESULT_NEED_RESET;
 }
 
 static pci_ers_result_t
@@ -7672,9 +7894,10 @@
 	int rc;
 	struct qla_qpair *qpair = NULL;
 
-	ql_dbg(ql_dbg_aer, base_vha, 0x9004,
-	    "Slot Reset.\n");
+	ql_log(ql_log_warn, base_vha, 0x9004,
+	       "Slot Reset.\n");
 
+	ha->pci_error_state = QLA_PCI_SLOT_RESET;
 	/* Workaround: qla2xxx driver which access hardware earlier
 	 * needs error state to be pci_channel_io_online.
 	 * Otherwise mailbox command timesout.
@@ -7708,16 +7931,24 @@
 		qpair->online = 1;
 	mutex_unlock(&ha->mq_lock);
 
+	ha->flags.eeh_busy = 0;
 	base_vha->flags.online = 1;
 	set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
-	if (ha->isp_ops->abort_isp(base_vha) == QLA_SUCCESS)
-		ret =  PCI_ERS_RESULT_RECOVERED;
+	ha->isp_ops->abort_isp(base_vha);
 	clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
 
+	if (qla2x00_isp_reg_stat(ha)) {
+		ha->flags.eeh_busy = 1;
+		qla_pci_error_cleanup(base_vha);
+		ql_log(ql_log_warn, base_vha, 0x9005,
+		       "Device unable to recover from PCI error.\n");
+	} else {
+		ret =  PCI_ERS_RESULT_RECOVERED;
+	}
 
 exit_slot_reset:
 	ql_dbg(ql_dbg_aer, base_vha, 0x900e,
-	    "slot_reset return %x.\n", ret);
+	    "Slot Reset returning %x.\n", ret);
 
 	return ret;
 }
@@ -7729,16 +7960,58 @@
 	struct qla_hw_data *ha = base_vha->hw;
 	int ret;
 
-	ql_dbg(ql_dbg_aer, base_vha, 0x900f,
-	    "pci_resume.\n");
+	ql_log(ql_log_warn, base_vha, 0x900f,
+	       "Pci Resume.\n");
 
-	ha->flags.eeh_busy = 0;
 
 	ret = qla2x00_wait_for_hba_online(base_vha);
 	if (ret != QLA_SUCCESS) {
 		ql_log(ql_log_fatal, base_vha, 0x9002,
 		    "The device failed to resume I/O from slot/link_reset.\n");
 	}
+	ha->pci_error_state = QLA_PCI_RESUME;
+	ql_dbg(ql_dbg_aer, base_vha, 0x600d,
+	       "Pci Resume returning.\n");
+}
+
+void qla_pci_set_eeh_busy(struct scsi_qla_host *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+	bool do_cleanup = false;
+	unsigned long flags;
+
+	if (ha->flags.eeh_busy)
+		return;
+
+	spin_lock_irqsave(&base_vha->work_lock, flags);
+	if (!ha->flags.eeh_busy) {
+		ha->eeh_jif = jiffies;
+		ha->flags.eeh_flush = 0;
+
+		ha->flags.eeh_busy = 1;
+		do_cleanup = true;
+	}
+	spin_unlock_irqrestore(&base_vha->work_lock, flags);
+
+	if (do_cleanup)
+		qla_pci_error_cleanup(base_vha);
+}
+
+/*
+ * this routine will schedule a task to pause IO from interrupt context
+ * if caller sees a PCIE error event (register read = 0xf's)
+ */
+void qla_schedule_eeh_work(struct scsi_qla_host *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+
+	if (ha->flags.eeh_busy)
+		return;
+
+	set_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags);
+	qla2xxx_wake_dpc(base_vha);
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
@@ -7804,21 +8077,20 @@
 #endif
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
-static int qla2xxx_map_queues(struct Scsi_Host *shost)
+static MAP_QUEUES_RET qla2xxx_map_queues(struct Scsi_Host *shost)
 {
-	int rc;
 	scsi_qla_host_t *vha = (scsi_qla_host_t *)shost->hostdata;
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) ||	\
 	(defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 8)
 	struct blk_mq_queue_map *qmap = &shost->tag_set.map[0];
 
 	if (USER_CTRL_IRQ(vha->hw) || !vha->hw->mqiobase)
-		rc = blk_mq_map_queues(qmap);
+		blk_mq_map_queues(qmap);
 	else
-		rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, vha->irq_offset);
+		blk_mq_pci_map_queues(qmap, vha->hw->pdev, vha->irq_offset);
 #else
 	if (USER_CTRL_IRQ(vha->hw))
-		rc = blk_mq_map_queues(&shost->tag_set);
+		blk_mq_map_queues(&shost->tag_set);
 	else
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) &&		\
 	(!defined(UEK_KABI_RENAME) ||				\
@@ -7828,12 +8100,13 @@
 		 * See also commit f23f5bece686 ("blk-mq: Allow PCI vector
 		 * offset for mapping queues") # v4.17.
 		 */
-		rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev);
+		blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev);
 #else
-		rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0);
+		blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0);
 #endif
 #endif
-	return rc;
+
+	return (MAP_QUEUES_RET) 0;
 }
 #endif
 
@@ -7846,6 +8119,11 @@
 	.eh_timed_out		= fc_eh_timed_out,
 #endif
 	.eh_abort_handler	= qla2xxx_eh_abort,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0)
+	/* See also commit 60bee27ba2df ("scsi: core: No retries on abort
+	 * success") */
+	.eh_should_retry_cmd	= fc_eh_should_retry_cmd,
+#endif
 	.eh_device_reset_handler = qla2xxx_eh_device_reset,
 	.eh_target_reset_handler = qla2xxx_eh_target_reset,
 	.eh_bus_reset_handler	= qla2xxx_eh_bus_reset,
@@ -7870,7 +8148,11 @@
 	.sg_tablesize		= SG_ALL,
 
 	.max_sectors		= 0xFFFF,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 	.shost_attrs		= qla2x00_host_attrs,
+#else
+	.shost_groups		= qla2x00_host_groups,
+#endif
 
 	.supported_mode		= MODE_INITIATOR,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
@@ -7891,12 +8173,7 @@
 }
 #endif
 
-static
-/* See also commit 494530284f16 ("PCI: Make pci_error_handlers const") # v3.7 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) || defined(RHEL_MAJOR)
-const
-#endif
-struct pci_error_handlers qla2xxx_err_handler = {
+static const struct pci_error_handlers qla2xxx_err_handler = {
 	.error_detected = qla2xxx_pci_error_detected,
 	.mmio_enabled = qla2xxx_pci_mmio_enabled,
 	.slot_reset = qla2xxx_pci_slot_reset,
@@ -7988,7 +8265,7 @@
 	BUILD_BUG_ON(sizeof(struct cmd_type_7_fx00) != 64);
 	BUILD_BUG_ON(sizeof(struct cmd_type_crc_2) != 64);
 	BUILD_BUG_ON(sizeof(struct ct_entry_24xx) != 64);
-	BUILD_BUG_ON(sizeof(struct ct_fdmi1_hba_attributes) != 2344);
+	BUILD_BUG_ON(sizeof(struct ct_fdmi1_hba_attributes) != 2604);
 	BUILD_BUG_ON(sizeof(struct ct_fdmi2_hba_attributes) != 4424);
 	BUILD_BUG_ON(sizeof(struct ct_fdmi2_port_attributes) != 4164);
 	BUILD_BUG_ON(sizeof(struct ct_fdmi_hba_attr) != 260);
@@ -8053,6 +8330,10 @@
 	BUILD_BUG_ON(sizeof(sw_info_t) != 32);
 	BUILD_BUG_ON(sizeof(target_id_t) != 2);
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	qla_trace_init();
+#endif
+
 	/* Allocate cache for SRBs. */
 	srb_cachep = kmem_cache_create("qla2xxx_srbs", sizeof(srb_t), 0,
 	    SLAB_HWCACHE_ALIGN, NULL);
@@ -8083,8 +8364,10 @@
 	if (ql2xextended_error_logging == 1)
 		ql2xextended_error_logging = QL_DBG_DEFAULT1_MASK;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 	if (ql2x_ini_mode == QLA2XXX_INI_MODE_DUAL)
 		qla_insert_tgt_attrs();
+#endif
 
 	qla2xxx_transport_template =
 	    fc_attach_transport(&qla2xxx_transport_functions);
@@ -8134,6 +8417,10 @@
 
 destroy_cache:
 	kmem_cache_destroy(srb_cachep);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	qla_trace_uninit();
+#endif
 	return ret;
 }
 
@@ -8152,6 +8439,10 @@
 	fc_release_transport(qla2xxx_transport_template);
 	qlt_exit();
 	kmem_cache_destroy(srb_cachep);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+	qla_trace_uninit();
+#endif
 }
 
 module_init(qla2x00_module_init);
@@ -8160,7 +8451,6 @@
 MODULE_AUTHOR("QLogic Corporation");
 MODULE_DESCRIPTION("QLogic Fibre Channel HBA Driver");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(QLA2XXX_VERSION);
 MODULE_FIRMWARE(FW_FILE_ISP21XX);
 MODULE_FIRMWARE(FW_FILE_ISP22XX);
 MODULE_FIRMWARE(FW_FILE_ISP2300);

diff --git a/scst/qla2x00t-32gbit/qla_settings.h b/scst/qla2x00t-32gbit/qla_settings.h
index 2fb7ebf..a5f3000 100644
--- a/scst/qla2x00t-32gbit/qla_settings.h
+++ b/scst/qla2x00t-32gbit/qla_settings.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #define MAX_RETRIES_OF_ISP_ABORT	5
 

diff --git a/scst/qla2x00t-32gbit/qla_sup.c b/scst/qla2x00t-32gbit/qla_sup.c
index 411b8a9..c092a6b 100644
--- a/scst/qla2x00t-32gbit/qla_sup.c
+++ b/scst/qla2x00t-32gbit/qla_sup.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 
@@ -845,7 +844,7 @@
 				ha->flt_region_nvram = start;
 			break;
 		case FLT_REG_IMG_PRI_27XX:
-			if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
+			if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
 				ha->flt_region_img_status_pri = start;
 			break;
 		case FLT_REG_IMG_SEC_27XX:
@@ -1357,7 +1356,7 @@
 		    flash_data_addr(ha, faddr), le32_to_cpu(*dwptr));
 		if (ret) {
 			ql_dbg(ql_dbg_user, vha, 0x7006,
-			    "Failed slopw write %x (%x)\n", faddr, *dwptr);
+			    "Failed slow write %x (%x)\n", faddr, *dwptr);
 			break;
 		}
 	}
@@ -2622,10 +2621,11 @@
 }
 
 static int
-qla28xx_extract_sfub_and_verify(struct scsi_qla_host *vha, uint32_t *buf,
+qla28xx_extract_sfub_and_verify(struct scsi_qla_host *vha, __le32 *buf,
     uint32_t len, uint32_t buf_size_without_sfub, uint8_t *sfub_buf)
 {
-	uint32_t *p, check_sum = 0;
+	uint32_t check_sum = 0;
+	__le32 *p;
 	int i;
 
 	p = buf + buf_size_without_sfub;
@@ -2635,14 +2635,14 @@
 	    sizeof(struct secure_flash_update_block));
 
 	for (i = 0; i < (sizeof(struct secure_flash_update_block) >> 2); i++)
-		check_sum += p[i];
+		check_sum += le32_to_cpu(p[i]);
 
 	check_sum = (~check_sum) + 1;
 
-	if (check_sum != p[i]) {
+	if (check_sum != le32_to_cpu(p[i])) {
 		ql_log(ql_log_warn, vha, 0x7097,
 		    "SFUB checksum failed, 0x%x, 0x%x\n",
-		    check_sum, p[i]);
+		    check_sum, le32_to_cpu(p[i]));
 		return QLA_COMMAND_ERROR;
 	}
 
@@ -2722,7 +2722,7 @@
 	if (ha->flags.secure_adapter && region.attribute) {
 
 		ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff,
-		    "Region %x is secure\n", region.code);
+		    "Region %x is secure\n", le16_to_cpu(region.code));
 
 		switch (le16_to_cpu(region.code)) {
 		case FLT_REG_FW:
@@ -2776,7 +2776,7 @@
 		default:
 			ql_log(ql_log_warn + ql_dbg_verbose, vha,
 			    0xffff, "Secure region %x not supported\n",
-			    region.code);
+			    le16_to_cpu(region.code));
 			rval = QLA_COMMAND_ERROR;
 			goto done;
 		}
@@ -2791,8 +2791,8 @@
 			goto done;
 		}
 
-		rval = qla28xx_extract_sfub_and_verify(vha, dwptr, dwords,
-			buf_size_without_sfub, (uint8_t *)sfub);
+		rval = qla28xx_extract_sfub_and_verify(vha, (__le32 *)dwptr,
+			dwords, buf_size_without_sfub, (uint8_t *)sfub);
 
 		if (rval != QLA_SUCCESS)
 			goto done;
@@ -2936,7 +2936,6 @@
 		liter += dburst - 1;
 		faddr += dburst - 1;
 		dwptr += dburst - 1;
-		continue;
 	}
 
 write_protect:

diff --git a/scst/qla2x00t-32gbit/qla_target.c b/scst/qla2x00t-32gbit/qla_target.c
index 64038f2..02bb0c1 100644
--- a/scst/qla2x00t-32gbit/qla_target.c
+++ b/scst/qla2x00t-32gbit/qla_target.c

@@ -47,24 +47,17 @@
 MODULE_PARM_DESC(ql2xtgt_tape_enable,
 		"Enables Sequence level error recovery (aka FC Tape). Default is 0 - no SLER. 1 - Enable SLER.");
 
-static char *qlini_mode = QLA2XXX_INI_MODE_STR_ENABLED;
+static char *qlini_mode = QLA2XXX_INI_MODE_STR_EXCLUSIVE;
 module_param(qlini_mode, charp, S_IRUGO);
 MODULE_PARM_DESC(qlini_mode,
 	"Determines when initiator mode will be enabled. Possible values: "
-	"\"exclusive\" - initiator mode will be enabled on load, "
+	"\"exclusive\" (default) - initiator mode will be enabled on load, "
 	"disabled on enabling target mode and then on disabling target mode "
 	"enabled back; "
 	"\"disabled\" - initiator mode will never be enabled; "
 	"\"dual\" - Initiator Modes will be enabled. Target Mode can be activated "
-	"when ready "
-	"\"enabled\" (default) - initiator mode will always stay enabled.");
-
-static int ql_dm_tgt_ex_pct = 0;
-module_param(ql_dm_tgt_ex_pct, int, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(ql_dm_tgt_ex_pct,
-	"For Dual Mode (qlini_mode=dual), this parameter determines "
-	"the percentage of exchanges/cmds FW will allocate resources "
-	"for Target mode.");
+	"when ready; "
+	"\"enabled\" - initiator mode will always stay enabled.");
 
 int ql2xuctrlirq = 1;
 module_param(ql2xuctrlirq, int, 0644);
@@ -199,8 +192,7 @@
 	return QLA_SUCCESS;
 }
 
-static inline
-struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
+struct scsi_qla_host *qla_find_host_by_d_id(struct scsi_qla_host *vha,
 					    be_id_t d_id)
 {
 	struct scsi_qla_host *host;
@@ -213,7 +205,7 @@
 
 	key = be_to_port_id(d_id).b24;
 
-	host = btree_lookup32(&vha->hw->tgt.host_map, key);
+	host = btree_lookup32(&vha->hw->host_map, key);
 	if (!host)
 		ql_dbg(ql_dbg_tgt_mgt + ql_dbg_verbose, vha, 0xf005,
 		    "Unable to find host %06x\n", key);
@@ -314,7 +306,7 @@
 			goto abort;
 		}
 
-		host = qlt_find_host_by_d_id(vha, u->atio.u.isp24.fcp_hdr.d_id);
+		host = qla_find_host_by_d_id(vha, u->atio.u.isp24.fcp_hdr.d_id);
 		if (host != NULL) {
 			ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0x502f,
 			    "Requeuing unknown ATIO_TYPE7 %p\n", u);
@@ -363,7 +355,7 @@
 	switch (atio->u.raw.entry_type) {
 	case ATIO_TYPE7:
 	{
-		struct scsi_qla_host *host = qlt_find_host_by_d_id(vha,
+		struct scsi_qla_host *host = qla_find_host_by_d_id(vha,
 		    atio->u.isp24.fcp_hdr.d_id);
 		if (unlikely(NULL == host)) {
 			ql_dbg(ql_dbg_tgt, vha, 0xe03e,
@@ -592,6 +584,18 @@
 		sp->fcport->logout_on_delete = 1;
 		sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
 		sp->fcport->send_els_logo = 0;
+
+		if (sp->fcport->flags & FCF_FCSP_DEVICE) {
+			ql_dbg(ql_dbg_edif, vha, 0x20ef,
+			    "%s %8phC edif: PLOGI- AUTH WAIT\n", __func__,
+			    sp->fcport->port_name);
+			qla2x00_set_fcport_disc_state(sp->fcport,
+			    DSC_LOGIN_AUTH_PEND);
+			qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+			    sp->fcport->d_id.b24);
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_NEEDED, sp->fcport->d_id.b24,
+			    0, sp->fcport);
+		}
 		break;
 
 	case SRB_NACK_PRLI:
@@ -624,7 +628,7 @@
 	}
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
-	sp->free(sp);
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 }
 
 int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
@@ -639,6 +643,9 @@
 	case SRB_NACK_PLOGI:
 		fcport->fw_login_state = DSC_LS_PLOGI_PEND;
 		c = "PLOGI";
+		if (vha->hw->flags.edif_enabled &&
+		    (le16_to_cpu(ntfy->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP))
+			fcport->flags |= FCF_FCSP_DEVICE;
 		break;
 	case SRB_NACK_PRLI:
 		fcport->fw_login_state = DSC_LS_PRLI_PEND;
@@ -657,12 +664,10 @@
 
 	sp->type = type;
 	sp->name = "nack";
-
-	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
-	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
+	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
+			      qla2x00_async_nack_sp_done);
 
 	sp->u.iocb_cmd.u.nack.ntfy = ntfy;
-	sp->done = qla2x00_async_nack_sp_done;
 
 	ql_dbg(ql_dbg_disc, vha, 0x20f4,
 	    "Async-%s %8phC hndl %x %s\n",
@@ -675,7 +680,7 @@
 	return rval;
 
 done_free_sp:
-	sp->free(sp);
+	kref_put(&sp->cmd_kref, qla2x00_sp_release);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -708,7 +713,12 @@
 void qla24xx_delete_sess_fn(struct work_struct *work)
 {
 	fc_port_t *fcport = container_of(work, struct fc_port, del_work);
-	struct qla_hw_data *ha = fcport->vha->hw;
+	struct qla_hw_data *ha = NULL;
+
+	if (!fcport || !fcport->vha || !fcport->vha->hw)
+		return;
+
+	ha = fcport->vha->hw;
 
 	if (fcport->se_sess) {
 		ha->tgt.tgt_ops->shutdown_sess(fcport);
@@ -932,6 +942,11 @@
 	qlt_port_logo_t *tmp;
 	int res;
 
+	if (test_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags)) {
+		res = 0;
+		goto out;
+	}
+
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
 
 	list_for_each_entry(tmp, &vha->logo_list, list) {
@@ -952,6 +967,7 @@
 	list_del(&logo->list);
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 
+out:
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf098,
 	    "Finished LOGO to %02x:%02x:%02x, dropped %d cmds, res = %#x\n",
 	    logo->id.b.domain, logo->id.b.area, logo->id.b.al_pa,
@@ -987,6 +1003,7 @@
 
 			logo.id = sess->d_id;
 			logo.cmd_count = 0;
+			INIT_LIST_HEAD(&logo.list);
 			if (!own)
 				qlt_send_first_logo(vha, &logo);
 			sess->send_els_logo = 0;
@@ -996,8 +1013,8 @@
 			int rc;
 
 			if (!own ||
-			    (own &&
-			     (own->iocb.u.isp24.status_subcode == ELS_PLOGI))) {
+			     (own->iocb.u.isp24.status_subcode == ELS_PLOGI)) {
+				sess->logout_completed = 0;
 				rc = qla2x00_post_async_logout_work(vha, sess,
 				    NULL);
 				if (rc != QLA_SUCCESS)
@@ -1024,6 +1041,25 @@
 			sess->nvme_flag |= NVME_FLAG_DELETING;
 			qla_nvme_unregister_remote_port(sess);
 		}
+
+		if (ha->flags.edif_enabled &&
+		    (!own || (own &&
+			      own->iocb.u.isp24.status_subcode == ELS_PLOGI))) {
+			sess->edif.authok = 0;
+			if (!ha->flags.host_shutting_down) {
+				ql_dbg(ql_dbg_edif, vha, 0x911e,
+				       "%s wwpn %8phC calling qla2x00_release_all_sadb\n",
+				       __func__, sess->port_name);
+				qla2x00_release_all_sadb(vha, sess);
+			} else {
+				ql_dbg(ql_dbg_edif, vha, 0x911e,
+				       "%s bypassing release_all_sadb\n",
+				       __func__);
+			}
+
+			qla_edif_clear_appdata(vha, sess);
+			qla_edif_sess_down(vha, sess);
+		}
 	}
 
 	/*
@@ -1045,7 +1081,12 @@
 			}
 			msleep(100);
 			cnt++;
-			if (cnt > 200)
+			/*
+			 * Driver timeout is set to 22 Sec, update count value to loop
+			 * long enough for log-out to complete before advancing. Otherwise,
+			 * straddling logout can interfere with re-login attempt.
+			 */
+			if (cnt > 230)
 				break;
 		}
 
@@ -1126,6 +1167,8 @@
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	sess->free_pending = 0;
 
+	qla2x00_dfs_remove_rport(vha, sess);
+
 	ql_dbg(ql_dbg_disc, vha, 0xf001,
 	    "Unregistration of sess %p %8phC finished fcp_cnt %d\n",
 		sess, sess->port_name, vha->fcport_count);
@@ -1244,14 +1287,15 @@
 	case DSC_DELETE_PEND:
 		return;
 	case DSC_DELETED:
-		if (tgt && tgt->tgt_stop && (tgt->sess_count == 0))
-			wake_up_all(&tgt->waitQ);
-		if (sess->vha->fcport_count == 0)
-			wake_up_all(&sess->vha->fcport_waitQ);
-
 		if (!sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] &&
-			!sess->plogi_link[QLT_PLOGI_LINK_CONFLICT])
+			!sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) {
+			if (tgt && tgt->tgt_stop && tgt->sess_count == 0)
+				wake_up_all(&tgt->waitQ);
+
+			if (sess->vha->fcport_count == 0)
+				wake_up_all(&sess->vha->fcport_waitQ);
 			return;
+		}
 		break;
 	case DSC_UPD_FCPORT:
 		/*
@@ -1285,9 +1329,9 @@
 
 	qla24xx_chk_fcp_state(sess);
 
-	ql_dbg(ql_dbg_disc, sess->vha, 0xe001,
-	    "Scheduling sess %p for deletion %8phC\n",
-	    sess, sess->port_name);
+	ql_dbg(ql_log_warn, sess->vha, 0xe001,
+	    "Scheduling sess %p for deletion %8phC fc4_type %x\n",
+	    sess, sess->port_name, sess->fc4_type);
 
 	WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work));
 }
@@ -1528,11 +1572,11 @@
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf009,
 	    "Waiting for sess works (tgt %p)", tgt);
 	spin_lock_irqsave(&tgt->sess_work_lock, flags);
-	while (!list_empty(&tgt->sess_works_list)) {
+	do {
 		spin_unlock_irqrestore(&tgt->sess_work_lock, flags);
-		flush_scheduled_work();
+		flush_work(&tgt->sess_work);
 		spin_lock_irqsave(&tgt->sess_work_lock, flags);
-	}
+	} while (!list_empty(&tgt->sess_works_list));
 	spin_unlock_irqrestore(&tgt->sess_work_lock, flags);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00a,
@@ -1571,10 +1615,12 @@
 		return;
 	}
 
+	mutex_lock(&tgt->ha->optrom_mutex);
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
 	tgt->tgt_stop = 0;
 	tgt->tgt_stopped = 1;
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
+	mutex_unlock(&tgt->ha->optrom_mutex);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00c, "Stop of tgt %p finished\n",
 	    tgt);
@@ -1726,6 +1772,12 @@
 	nack->u.isp24.srr_reject_code_expl = srr_explan;
 	nack->u.isp24.vp_index = ntfy->u.isp24.vp_index;
 
+	/* TODO qualify this with EDIF enable */
+	if (ntfy->u.isp24.status_subcode == ELS_PLOGI &&
+	    (le16_to_cpu(ntfy->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP)) {
+		nack->u.isp24.flags |= cpu_to_le16(NOTIFY_ACK_FLAGS_FCSP);
+	}
+
 	ql_dbg(ql_dbg_tgt, vha, 0xe005,
 	    "qla_target(%d): Sending 24xx Notify Ack %d\n",
 	    vha->vp_idx, nack->u.isp24.status);
@@ -1983,17 +2035,6 @@
 
 	key = sid_to_key(s_id);
 	spin_lock_irqsave(&vha->cmd_list_lock, flags);
-	list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) {
-		uint32_t op_key;
-		u64 op_lun;
-
-		op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
-		op_lun = scsilun_to_int(
-			(struct scsi_lun *)&op->atio.u.isp24.fcp_cmnd.lun);
-		if (op_key == key && op_lun == lun)
-			op->aborted = true;
-	}
-
 	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
 		uint32_t op_key;
 		u64 op_lun;
@@ -2095,6 +2136,7 @@
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt_mgmt_cmd *mcmd;
 	struct qla_qpair_hint *h = &vha->vha_tgt.qla_tgt->qphints[0];
+	struct qla_tgt_cmd *abort_cmd;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00f,
 	    "qla_target(%d): task abort (tag=%d)\n",
@@ -2124,19 +2166,21 @@
 	mcmd->se_cmd.cpuid = h->cpuid;
 #endif
 
-	if (ha->tgt.tgt_ops->find_cmd_by_tag) {
-		struct qla_tgt_cmd *abort_cmd;
-
-		abort_cmd = ha->tgt.tgt_ops->find_cmd_by_tag(sess,
+	abort_cmd = ha->tgt.tgt_ops->find_cmd_by_tag(sess,
 				le32_to_cpu(abts->exchange_addr_to_abort));
-		if (abort_cmd && abort_cmd->qpair) {
-			mcmd->qpair = abort_cmd->qpair;
+	if (!abort_cmd) {
+		mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool);
+		return -EIO;
+	}
+	mcmd->unpacked_lun = abort_cmd->se_cmd.orig_fe_lun;
+
+	if (abort_cmd->qpair) {
+		mcmd->qpair = abort_cmd->qpair;
 #if HAVE_SE_CMD_CPUID
-			mcmd->se_cmd.cpuid = abort_cmd->se_cmd.cpuid;
+		mcmd->se_cmd.cpuid = abort_cmd->se_cmd.cpuid;
 #endif
-			mcmd->abort_io_attr = abort_cmd->atio.u.isp24.attr;
-			mcmd->flags = QLA24XX_MGMT_ABORT_IO_ATTR_VALID;
-		}
+		mcmd->abort_io_attr = abort_cmd->atio.u.isp24.attr;
+		mcmd->flags = QLA24XX_MGMT_ABORT_IO_ATTR_VALID;
 	}
 
 	INIT_WORK(&mcmd->work, qlt_do_tmr_work);
@@ -2591,6 +2635,7 @@
 	struct ctio7_to_24xx *pkt;
 	struct atio_from_isp *atio = &prm->cmd->atio;
 	uint16_t temp;
+	struct qla_tgt_cmd      *cmd = prm->cmd;
 
 	pkt = (struct ctio7_to_24xx *)qpair->req->ring_ptr;
 	prm->pkt = pkt;
@@ -2623,6 +2668,15 @@
 	pkt->u.status0.ox_id = cpu_to_le16(temp);
 	pkt->u.status0.relative_offset = cpu_to_le32(prm->cmd->offset);
 
+	if (cmd->edif) {
+		if (cmd->dma_data_direction == DMA_TO_DEVICE)
+			prm->cmd->sess->edif.rx_bytes += cmd->bufflen;
+		if (cmd->dma_data_direction == DMA_FROM_DEVICE)
+			prm->cmd->sess->edif.tx_bytes += cmd->bufflen;
+
+		pkt->u.status0.edif_flags |= EF_EN_EDIF;
+	}
+
 	return 0;
 }
 
@@ -3251,8 +3305,7 @@
 	if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) ||
 	    (cmd->sess && cmd->sess->deleted)) {
 		cmd->state = QLA_TGT_STATE_PROCESSED;
-		res = 0;
-		goto free;
+		return 0;
 	}
 
 	ql_dbg_qp(ql_dbg_tgt, qpair, 0xe018,
@@ -3263,8 +3316,9 @@
 
 	res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status,
 	    &full_req_cnt);
-	if (unlikely(res != 0))
-		goto free;
+	if (unlikely(res != 0)) {
+		return res;
+	}
 
 	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 
@@ -3283,9 +3337,8 @@
 			"RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n",
 			vha->flags.online, qla2x00_reset_active(vha),
 			cmd->reset_count, qpair->chip_reset);
-		spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 		res = 0;
-		goto free;
+		goto out_unmap_unlock;
 	}
 
 	/* Does F/W have an IOCBs for this request */
@@ -3320,8 +3373,10 @@
 			if (xmit_type & QLA_TGT_XMIT_STATUS) {
 				pkt->u.status0.scsi_status =
 				    cpu_to_le16(prm.rq_result);
-				pkt->u.status0.residual =
-				    cpu_to_le32(prm.residual);
+				if (!cmd->edif)
+					pkt->u.status0.residual =
+						cpu_to_le32(prm.residual);
+
 				pkt->u.status0.flags |= cpu_to_le16(
 				    CTIO7_FLAGS_SEND_STATUS);
 				if (qlt_need_explicit_conf(cmd, 0)) {
@@ -3392,8 +3447,6 @@
 	qlt_unmap_sg(vha, cmd);
 	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
-free:
-	vha->hw->tgt.tgt_ops->free_cmd(cmd);
 	return res;
 }
 EXPORT_SYMBOL(qlt_xmit_response);
@@ -3414,10 +3467,6 @@
 	prm.sg = NULL;
 	prm.req_cnt = 1;
 
-	/* Calculate number of entries and segments required */
-	if (qlt_pci_map_calc_cnt(&prm) != 0)
-		return -EAGAIN;
-
 	if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) ||
 	    (cmd->sess && cmd->sess->deleted)) {
 		/*
@@ -3435,6 +3484,10 @@
 		return 0;
 	}
 
+	/* Calculate number of entries and segments required */
+	if (qlt_pci_map_calc_cnt(&prm) != 0)
+		return -EAGAIN;
+
 	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 	/* Does F/W have an IOCBs for this request */
 	res = qlt_check_reserve_free_req(qpair, prm.req_cnt);
@@ -3820,6 +3873,9 @@
 
 	spin_lock_irqsave(&cmd->cmd_lock, flags);
 	if (cmd->aborted) {
+		if (cmd->sg_mapped)
+			qlt_unmap_sg(vha, cmd);
+
 		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 		/*
 		 * It's normal to see 2 calls in this path:
@@ -3828,7 +3884,7 @@
 		 */
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf016, "multiple abort. %p\n",
 		       cmd);
-		return EIO;
+		return -EIO;
 	}
 	cmd->aborted = 1;
 	cmd->trc_flags |= TRC_ABORT;
@@ -3850,16 +3906,11 @@
 
 	BUG_ON(cmd->cmd_in_wq);
 
-	if (cmd->sg_mapped)
-		qlt_unmap_sg(cmd->vha, cmd);
-
 	if (!cmd->q_full)
 		qlt_decr_num_pend_cmds(cmd->vha);
 
 	BUG_ON(cmd->sg_mapped);
 	cmd->jiffies_at_free = get_jiffies_64();
-	if (unlikely(cmd->free_sg))
-		kfree(cmd->sg);
 
 	if (!sess || !sess->se_sess) {
 		WARN_ON(1);
@@ -3983,6 +4034,12 @@
 	if (cmd == NULL)
 		return;
 
+	if ((le16_to_cpu(((struct ctio7_from_24xx *)ctio)->flags) & CTIO7_FLAGS_DATA_OUT) &&
+	    cmd->sess) {
+		qlt_chk_edif_rx_sa_delete_pending(vha, cmd->sess,
+		    (struct ctio7_from_24xx *)ctio);
+	}
+
 	se_cmd = &cmd->se_cmd;
 	cmd->cmd_sent_to_fw = 0;
 
@@ -4053,6 +4110,16 @@
 			qlt_handle_dif_error(qpair, cmd, ctio);
 			return;
 		}
+
+		case CTIO_FAST_AUTH_ERR:
+		case CTIO_FAST_INCOMP_PAD_LEN:
+		case CTIO_FAST_INVALID_REQ:
+		case CTIO_FAST_SPI_ERR:
+			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
+			    "qla_target(%d): CTIO with EDIF error status 0x%x received (state %x, se_cmd %p\n",
+			    vha->vp_idx, status, cmd->state, se_cmd);
+			break;
+
 		default:
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
 			    "qla_target(%d): CTIO with error status 0x%x received (state %x, se_cmd %p\n",
@@ -4340,6 +4407,7 @@
 
 	cmd->cmd_type = TYPE_TGT_CMD;
 	memcpy(&cmd->atio, atio, sizeof(*atio));
+	INIT_LIST_HEAD(&cmd->sess_cmd_list);
 	cmd->state = QLA_TGT_STATE_NEW;
 	cmd->tgt = vha->vha_tgt.qla_tgt;
 	qlt_incr_num_pend_cmds(vha);
@@ -4356,6 +4424,7 @@
 	qlt_assign_qpair(vha, cmd);
 	cmd->reset_count = vha->hw->base_qpair->chip_reset;
 	cmd->vp_idx = vha->vp_idx;
+	cmd->edif = sess->edif.enable;
 
 	return cmd;
 }
@@ -4713,15 +4782,6 @@
 	       ((u32)s_id->b.al_pa));
 
 	spin_lock_irqsave(&vha->cmd_list_lock, flags);
-	list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) {
-		uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
-
-		if (op_key == key) {
-			op->aborted = true;
-			count++;
-		}
-	}
-
 	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
 		uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
 
@@ -4787,6 +4847,34 @@
 		goto out;
 	}
 
+	if (vha->hw->flags.edif_enabled &&
+	    !(vha->e_dbell.db_flags & EDB_ACTIVE) &&
+	    iocb->u.isp24.status_subcode == ELS_PLOGI &&
+	    !(le16_to_cpu(iocb->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP)) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %d Term INOT due to app not available lid=%d, NportID %06X ",
+			__func__, __LINE__, loop_id, port_id.b24);
+		qlt_send_term_imm_notif(vha, iocb, 1);
+		goto out;
+	}
+
+	if (vha->hw->flags.edif_enabled) {
+		if (DBELL_INACTIVE(vha)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			       "%s %d Term INOT due to app not started lid=%d, NportID %06X ",
+			       __func__, __LINE__, loop_id, port_id.b24);
+			qlt_send_term_imm_notif(vha, iocb, 1);
+			goto out;
+		} else if (iocb->u.isp24.status_subcode == ELS_PLOGI &&
+			   !(le16_to_cpu(iocb->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			       "%s %d Term INOT due to unsecure lid=%d, NportID %06X ",
+			       __func__, __LINE__, loop_id, port_id.b24);
+			qlt_send_term_imm_notif(vha, iocb, 1);
+			goto out;
+		}
+	}
+
 	pla = qlt_plogi_ack_find_add(vha, &port_id, iocb);
 	if (!pla) {
 		ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff,
@@ -4852,6 +4940,20 @@
 	qlt_plogi_ack_link(vha, pla, sess, QLT_PLOGI_LINK_SAME_WWN);
 	sess->d_id = port_id;
 	sess->login_gen++;
+	sess->loop_id = loop_id;
+
+	if (iocb->u.isp24.status_subcode == ELS_PLOGI) {
+		/* remote port has assigned Port ID */
+		if (N2N_TOPO(vha->hw) && fcport_is_bigger(sess))
+			vha->d_id = sess->d_id;
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC - send port online\n",
+		    __func__, sess->port_name);
+
+		qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+		    sess->d_id.b24);
+	}
 
 	if (iocb->u.isp24.status_subcode == ELS_PRLI) {
 		sess->fw_login_state = DSC_LS_PRLI_PEND;
@@ -4964,6 +5066,16 @@
 			sess = qla2x00_find_fcport_by_wwpn(vha,
 			    iocb->u.isp24.port_name, 1);
 
+			if (vha->hw->flags.edif_enabled && sess &&
+			    (!(sess->flags & FCF_FCSP_DEVICE) ||
+			     !sess->edif.authok)) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				       "%s %d %8phC Term PRLI due to unauthorize PRLI\n",
+				       __func__, __LINE__, iocb->u.isp24.port_name);
+				qlt_send_term_imm_notif(vha, iocb, 1);
+				break;
+			}
+
 			if (sess && sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN]) {
 				ql_dbg(ql_dbg_disc, vha, 0xffff,
 				    "%s %d %8phC Term PRLI due to PLOGI ACK not completed\n",
@@ -5012,6 +5124,16 @@
 			bool delete = false;
 			int sec;
 
+			if (vha->hw->flags.edif_enabled && sess &&
+			    (!(sess->flags & FCF_FCSP_DEVICE) ||
+			     !sess->edif.authok)) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				       "%s %d %8phC Term PRLI due to unauthorize prli\n",
+				       __func__, __LINE__, iocb->u.isp24.port_name);
+				qlt_send_term_imm_notif(vha, iocb, 1);
+				break;
+			}
+
 			spin_lock_irqsave(&tgt->ha->tgt.sess_lock, flags);
 			switch (sess->fw_login_state) {
 			case DSC_LS_PLOGI_PEND:
@@ -5201,7 +5323,8 @@
 }
 
 /*
- * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
+ * ha->hardware_lock supposed to be held on entry.
+ * Might drop it, then reacquire.
  */
 static void qlt_handle_imm_notify(struct scsi_qla_host *vha,
 	struct imm_ntfy_from_isp *iocb)
@@ -5541,8 +5664,7 @@
 			    "%s: Unexpected cmd in QFull list %p\n", __func__,
 			    cmd);
 
-		list_del(&cmd->cmd_list);
-		list_add_tail(&cmd->cmd_list, &free_list);
+		list_move_tail(&cmd->cmd_list, &free_list);
 
 		/* piggy back on hardware_lock for protection */
 		vha->hw->tgt.num_qfull_cmds_alloc--;
@@ -6282,69 +6404,6 @@
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
-static void qlt_tmr_work(struct qla_tgt *tgt,
-	struct qla_tgt_sess_work_param *prm)
-{
-	struct atio_from_isp *a = &prm->tm_iocb2;
-	struct scsi_qla_host *vha = tgt->vha;
-	struct qla_hw_data *ha = vha->hw;
-	struct fc_port *sess;
-	unsigned long flags;
-	be_id_t s_id;
-	int rc;
-	u64 unpacked_lun;
-	int fn;
-	void *iocb;
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-
-	if (tgt->tgt_stop)
-		goto out_term2;
-
-	s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id;
-	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
-	if (!sess) {
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-
-		sess = qlt_make_local_sess(vha, s_id);
-		/* sess has got an extra creation ref */
-
-		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-		if (!sess)
-			goto out_term2;
-	} else {
-		if (sess->deleted) {
-			goto out_term2;
-		}
-
-		if (!kref_get_unless_zero(&sess->sess_kref)) {
-			ql_dbg(ql_dbg_tgt_tmr, vha, 0xf020,
-			    "%s: kref_get fail %8phC\n",
-			     __func__, sess->port_name);
-			goto out_term2;
-		}
-	}
-
-	iocb = a;
-	fn = a->u.isp24.fcp_cmnd.task_mgmt_flags;
-	unpacked_lun =
-	    scsilun_to_int((struct scsi_lun *)&a->u.isp24.fcp_cmnd.lun);
-
-	rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-
-	ha->tgt.tgt_ops->put_sess(sess);
-
-	if (rc != 0)
-		goto out_term;
-	return;
-
-out_term2:
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-out_term:
-	qlt_send_term_exchange(ha->base_qpair, NULL, &prm->tm_iocb2, 1, 0);
-}
-
 static void qlt_sess_work_fn(struct work_struct *work)
 {
 	struct qla_tgt *tgt = container_of(work, struct qla_tgt, sess_work);
@@ -6371,9 +6430,6 @@
 		case QLA_TGT_SESS_WORK_ABORT:
 			qlt_abort_work(tgt, prm);
 			break;
-		case QLA_TGT_SESS_WORK_TM:
-			qlt_tmr_work(tgt, prm);
-			break;
 		default:
 			BUG_ON(1);
 			break;
@@ -6460,7 +6516,6 @@
 	tgt->ha = ha;
 	tgt->vha = base_vha;
 	init_waitqueue_head(&tgt->waitQ);
-	INIT_LIST_HEAD(&tgt->del_sess_list);
 	spin_lock_init(&tgt->sess_work_lock);
 	INIT_WORK(&tgt->sess_work, qlt_sess_work_fn);
 	INIT_LIST_HEAD(&tgt->sess_works_list);
@@ -6505,15 +6560,15 @@
 	return 0;
 }
 
-void qlt_remove_target_resources(struct qla_hw_data *ha)
+void qla_remove_hostmap(struct qla_hw_data *ha)
 {
 	struct scsi_qla_host *node;
 	u32 key = 0;
 
-	btree_for_each_safe32(&ha->tgt.host_map, key, node)
-		btree_remove32(&ha->tgt.host_map, key);
+	btree_for_each_safe32(&ha->host_map, key, node)
+		btree_remove32(&ha->host_map, key);
 
-	btree_destroy32(&ha->tgt.host_map);
+	btree_destroy32(&ha->host_map);
 }
 
 static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
@@ -6526,7 +6581,7 @@
 }
 
 /**
- * qla_tgt_lport_register - register lport with external module
+ * qlt_lport_register - register lport with external module
  *
  * @target_lport_ptr: pointer for tcm_qla2xxx specific lport data
  * @phys_wwpn: physical port WWPN
@@ -6602,7 +6657,7 @@
 EXPORT_SYMBOL(qlt_lport_register);
 
 /**
- * qla_tgt_lport_deregister - Degister lport
+ * qlt_lport_deregister - Degister lport
  *
  * @vha:  Registered scsi_qla_host pointer
  */
@@ -6754,6 +6809,9 @@
 	mutex_init(&vha->vha_tgt.tgt_mutex);
 	mutex_init(&vha->vha_tgt.tgt_host_action_mutex);
 
+	INIT_LIST_HEAD(&vha->unknown_atio_list);
+	INIT_DELAYED_WORK(&vha->unknown_atio_work, qlt_unknown_atio_work_fn);
+
 	qlt_clear_mode(vha);
 
 	/*
@@ -6883,14 +6941,8 @@
 
 	if (ha->flags.msix_enabled) {
 		if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
-			if (IS_QLA2071(ha)) {
-				/* 4 ports Baker: Enable Interrupt Handshake */
-				icb->msix_atio = 0;
-				icb->firmware_options_2 |= cpu_to_le32(BIT_26);
-			} else {
-				icb->msix_atio = cpu_to_le16(msix->entry);
-				icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
-			}
+			icb->msix_atio = cpu_to_le16(msix->entry);
+			icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
 			ql_dbg(ql_dbg_init, vha, 0xf072,
 			    "Registering ICB vector 0x%x for atio que.\n",
 			    msix->entry);
@@ -7141,13 +7193,11 @@
 void
 qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 {
-	int rc;
-
+	mutex_init(&base_vha->vha_tgt.tgt_mutex);
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
-	if  ((ql2xenablemsix == 0) || IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
-	    IS_QLA28XX(ha)) {
+	if  (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
 		ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in;
 		ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out;
 	} else {
@@ -7155,7 +7205,6 @@
 		ISP_ATIO_Q_OUT(base_vha) = &ha->iobase->isp24.atio_q_out;
 	}
 
-	mutex_init(&base_vha->vha_tgt.tgt_mutex);
 	mutex_init(&base_vha->vha_tgt.tgt_host_action_mutex);
 
 	INIT_LIST_HEAD(&base_vha->unknown_atio_list);
@@ -7164,11 +7213,6 @@
 
 	qlt_clear_mode(base_vha);
 
-	rc = btree_init32(&ha->tgt.host_map);
-	if (rc)
-		ql_log(ql_log_info, base_vha, 0xd03d,
-		    "Unable to initialize ha->host_map btree\n");
-
 	qlt_update_vp_map(base_vha, SET_VP_IDX);
 }
 
@@ -7289,21 +7333,20 @@
 	u32 key;
 	int rc;
 
-	if (!QLA_TGT_MODE_ENABLED())
-		return;
-
 	key = vha->d_id.b24;
 
 	switch (cmd) {
 	case SET_VP_IDX:
+		if (!QLA_TGT_MODE_ENABLED())
+			return;
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
 		break;
 	case SET_AL_PA:
-		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		slot = btree_lookup32(&vha->hw->host_map, key);
 		if (!slot) {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf018,
 			    "Save vha in host_map %p %06x\n", vha, key);
-			rc = btree_insert32(&vha->hw->tgt.host_map,
+			rc = btree_insert32(&vha->hw->host_map,
 				key, vha, GFP_ATOMIC);
 			if (rc)
 				ql_log(ql_log_info, vha, 0xd03e,
@@ -7313,17 +7356,19 @@
 		}
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf019,
 		    "replace existing vha in host_map %p %06x\n", vha, key);
-		btree_update32(&vha->hw->tgt.host_map, key, vha);
+		btree_update32(&vha->hw->host_map, key, vha);
 		break;
 	case RESET_VP_IDX:
+		if (!QLA_TGT_MODE_ENABLED())
+			return;
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
 		break;
 	case RESET_AL_PA:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01a,
 		   "clear vha in host_map %p %06x\n", vha, key);
-		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		slot = btree_lookup32(&vha->hw->host_map, key);
 		if (slot)
-			btree_remove32(&vha->hw->tgt.host_map, key);
+			btree_remove32(&vha->hw->host_map, key);
 		vha->d_id.b24 = 0;
 		break;
 	}

diff --git a/scst/qla2x00t-32gbit/qla_target.h b/scst/qla2x00t-32gbit/qla_target.h
index f696ffb..9e2d8b7 100644
--- a/scst/qla2x00t-32gbit/qla_target.h
+++ b/scst/qla2x00t-32gbit/qla_target.h

@@ -143,7 +143,6 @@
 	(min(1270, ((ql) > 0) ? (QLA_TGT_DATASEGS_PER_CMD_24XX + \
 		QLA_TGT_DATASEGS_PER_CONT_24XX*((ql) - 1)) : 0))
 #endif
-#endif
 
 #define GET_TARGET_ID(ha, iocb) ((HAS_EXTENDED_IDS(ha))			\
 			 ? le16_to_cpu((iocb)->u.isp2x.target.extended)	\
@@ -204,6 +203,7 @@
 	uint8_t  reserved[2];
 	__le16	ox_id;
 } __packed;
+#define NOTIFY_ACK_FLAGS_FCSP		BIT_5
 #define NOTIFY_ACK_FLAGS_TERMINATE	BIT_3
 #define NOTIFY_ACK_SRR_FLAGS_ACCEPT	0
 #define NOTIFY_ACK_SRR_FLAGS_REJECT	1
@@ -266,11 +266,16 @@
 #define CTIO_PORT_LOGGED_OUT		0x29
 #define CTIO_PORT_CONF_CHANGED		0x2A
 #define CTIO_SRR_RECEIVED		0x45
+#define CTIO_FAST_AUTH_ERR		0x63
+#define CTIO_FAST_INCOMP_PAD_LEN	0x65
+#define CTIO_FAST_INVALID_REQ		0x66
+#define CTIO_FAST_SPI_ERR		0x67
 #endif
 
 #ifndef CTIO_RET_TYPE
 #define CTIO_RET_TYPE	0x17		/* CTIO return entry */
 #define ATIO_TYPE7 0x06 /* Accept target I/O entry for 24xx */
+#endif
 
 struct fcp_hdr {
 	uint8_t  r_ctl;
@@ -435,7 +440,16 @@
 		struct {
 			__le16	reserved1;
 			__le16 flags;
-			__le32	residual;
+			union {
+				__le32	residual;
+				struct {
+					uint8_t rsvd1;
+					uint8_t edif_flags;
+#define EF_EN_EDIF	BIT_0
+#define EF_NEW_SA	BIT_1
+					uint16_t rsvd2;
+				};
+			};
 			__le16 ox_id;
 			__le16	scsi_status;
 			__le32	relative_offset;
@@ -473,7 +487,7 @@
 	uint8_t  vp_index;
 	uint8_t  reserved1[5];
 	__le32	exchange_address;
-	__le16	reserved2;
+	__le16	edif_sa_index;
 	__le16	flags;
 	__le32	residual;
 	__le16	ox_id;
@@ -722,15 +736,11 @@
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
 /*
  * See also commit c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0")
  * # v2.6.38.
  */
 #include <target/target_core_base.h>
-#else
-#include "target_core_base-backport.h"
-#endif
 
 #define QLA_TGT_TIMEOUT			10	/* in seconds */
 
@@ -834,9 +844,6 @@
 	/* Count of sessions refering qla_tgt. Protected by hardware_lock. */
 	int sess_count;
 
-	/* Protected by hardware_lock */
-	struct list_head del_sess_list;
-
 	spinlock_t sess_work_lock;
 	struct list_head sess_works_list;
 	struct work_struct sess_work;
@@ -891,6 +898,7 @@
 	uint8_t cmd_type;
 	uint8_t pad[7];
 	struct se_cmd se_cmd;
+	struct list_head sess_cmd_list;
 	struct scst_cmd *scst_cmd;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
 	/*
@@ -914,12 +922,12 @@
 	/* to save extra sess dereferences */
 	unsigned int conf_compl_supported:1;
 	unsigned int sg_mapped:1;
-	unsigned int free_sg:1;
 	unsigned int write_data_transferred:1;
 	unsigned int q_full:1;
 	unsigned int term_exchg:1;
 	unsigned int cmd_sent_to_fw:1;
 	unsigned int cmd_in_wq:1;
+	unsigned int edif:1;
 
 	/*
 	 * This variable may be set from outside the LIO and I/O completion
@@ -976,7 +984,6 @@
 	struct list_head sess_works_list_entry;
 
 #define QLA_TGT_SESS_WORK_ABORT	1
-#define QLA_TGT_SESS_WORK_TM	2
 	int type;
 
 	union {
@@ -1113,8 +1120,6 @@
 	struct init_cb_81xx *);
 extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *,
 	struct nvram_81xx *);
-extern int qlt_24xx_process_response_error(struct scsi_qla_host *,
-	struct sts_entry_24xx *);
 extern void qlt_modify_vp_config(struct scsi_qla_host *,
 	struct vp_config_entry_24xx *);
 extern void qlt_probe_one_stage1(struct scsi_qla_host *, struct qla_hw_data *);

diff --git a/scst/qla2x00t-32gbit/qla_tmpl.c b/scst/qla2x00t-32gbit/qla_tmpl.c
index 8dc82cf..b0a74b0 100644
--- a/scst/qla2x00t-32gbit/qla_tmpl.c
+++ b/scst/qla2x00t-32gbit/qla_tmpl.c

@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 #include "qla_def.h"
 #include "qla_tmpl.h"
@@ -12,33 +11,6 @@
 #define IOBASE(vha)	IOBAR(ISPREG(vha))
 #define INVALID_ENTRY ((struct qla27xx_fwdt_entry *)0xffffffffffffffffUL)
 
-/* hardware_lock assumed held. */
-static void
-qla27xx_write_remote_reg(struct scsi_qla_host *vha,
-			 u32 addr, u32 data)
-{
-	struct device_reg_24xx __iomem *reg = &vha->hw->iobase->isp24;
-
-	ql_dbg(ql_dbg_misc, vha, 0xd300,
-	       "%s: addr/data = %xh/%xh\n", __func__, addr, data);
-
-	wrt_reg_dword(&reg->iobase_addr, 0x40);
-	wrt_reg_dword(&reg->iobase_c4, data);
-	wrt_reg_dword(&reg->iobase_window, addr);
-}
-
-void
-qla27xx_reset_mpi(scsi_qla_host_t *vha)
-{
-	ql_dbg(ql_dbg_misc + ql_dbg_verbose, vha, 0xd301,
-	       "Entered %s.\n", __func__);
-
-	qla27xx_write_remote_reg(vha, 0x104050, 0x40004);
-	qla27xx_write_remote_reg(vha, 0x10405c, 0x4);
-
-	vha->hw->stat.num_mpi_reset++;
-}
-
 static inline void
 qla27xx_insert16(uint16_t value, void *buf, ulong *len)
 {
@@ -463,8 +435,13 @@
 {
 	ql_dbg(ql_dbg_misc, vha, 0xd20a,
 	    "%s: reset risc [%lx]\n", __func__, *len);
-	if (buf)
-		WARN_ON_ONCE(qla24xx_soft_reset(vha->hw) != QLA_SUCCESS);
+	if (buf) {
+		if (qla24xx_soft_reset(vha->hw) != QLA_SUCCESS) {
+			ql_dbg(ql_dbg_async, vha, 0x5001,
+			    "%s: unable to soft reset\n", __func__);
+			return INVALID_ENTRY;
+		}
+	}
 
 	return qla27xx_next_entry(ent);
 }
@@ -906,8 +883,8 @@
 	uint8_t v[] = { 0, 0, 0, 0, 0, 0 };
 
 	WARN_ON_ONCE(sscanf(qla2x00_version_str,
-			    "%hhu.%hhu.%hhu.%hhu.%hhu.%hhu",
-			    v+0, v+1, v+2, v+3, v+4, v+5) != 6);
+			    "%hhu.%hhu.%hhu.%hhu",
+			    v + 0, v + 1, v + 2, v + 3) != 4);
 
 	tmp->driver_info[0] = cpu_to_le32(
 		v[3] << 24 | v[2] << 16 | v[1] << 8 | v[0]);
@@ -956,7 +933,8 @@
 static inline int
 qla27xx_verify_template_checksum(struct qla27xx_fwdt_template *tmp)
 {
-	return qla27xx_template_checksum(tmp, tmp->template_size) == 0;
+	return qla27xx_template_checksum(tmp,
+		le32_to_cpu(tmp->template_size)) == 0;
 }
 
 static inline int
@@ -972,7 +950,7 @@
 	ulong len = 0;
 
 	if (qla27xx_fwdt_template_valid(tmp)) {
-		len = tmp->template_size;
+		len = le32_to_cpu(tmp->template_size);
 		tmp = memcpy(buf, tmp, len);
 		ql27xx_edit_template(vha, tmp);
 		qla27xx_walk_template(vha, tmp, buf, &len);
@@ -988,7 +966,7 @@
 	ulong len = 0;
 
 	if (qla27xx_fwdt_template_valid(tmp)) {
-		len = tmp->template_size;
+		len = le32_to_cpu(tmp->template_size);
 		qla27xx_walk_template(vha, tmp, NULL, &len);
 	}
 
@@ -1000,7 +978,7 @@
 {
 	struct qla27xx_fwdt_template *tmp = p;
 
-	return tmp->template_size;
+	return le32_to_cpu(tmp->template_size);
 }
 
 int
@@ -1028,22 +1006,25 @@
 qla27xx_mpi_fwdump(scsi_qla_host_t *vha, int hardware_locked)
 {
 	ulong flags = 0;
-	bool need_mpi_reset = true;
 
-#ifndef __CHECKER__
 	if (!hardware_locked)
 		spin_lock_irqsave(&vha->hw->hardware_lock, flags);
-#endif
 	if (!vha->hw->mpi_fw_dump) {
 		ql_log(ql_log_warn, vha, 0x02f3, "-> mpi_fwdump no buffer\n");
-	} else if (vha->hw->mpi_fw_dumped) {
-		ql_log(ql_log_warn, vha, 0x02f4,
-		       "-> MPI firmware already dumped (%p) -- ignoring request\n",
-		       vha->hw->mpi_fw_dump);
 	} else {
 		struct fwdt *fwdt = &vha->hw->fwdt[1];
 		ulong len;
 		void *buf = vha->hw->mpi_fw_dump;
+		bool walk_template_only = false;
+
+		if (vha->hw->mpi_fw_dumped) {
+			/* Use the spare area for any further dumps. */
+			buf += fwdt->dump_size;
+			walk_template_only = true;
+			ql_log(ql_log_warn, vha, 0x02f4,
+			       "-> MPI firmware already dumped -- dump saving to temporary buffer %p.\n",
+			       buf);
+		}
 
 		ql_log(ql_log_warn, vha, 0x02f5, "-> fwdt1 running...\n");
 		if (!fwdt->template) {
@@ -1058,9 +1039,10 @@
 			ql_log(ql_log_warn, vha, 0x02f7,
 			       "-> fwdt1 fwdump residual=%+ld\n",
 			       fwdt->dump_size - len);
-		} else {
-			need_mpi_reset = false;
 		}
+		vha->hw->stat.num_mpi_reset++;
+		if (walk_template_only)
+			goto bailout;
 
 		vha->hw->mpi_fw_dump_len = len;
 		vha->hw->mpi_fw_dumped = 1;
@@ -1072,12 +1054,8 @@
 	}
 
 bailout:
-	if (need_mpi_reset)
-		qla27xx_reset_mpi(vha);
-#ifndef __CHECKER__
 	if (!hardware_locked)
 		spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
-#endif
 }
 
 void

diff --git a/scst/qla2x00t-32gbit/qla_tmpl.h b/scst/qla2x00t-32gbit/qla_tmpl.h
index 89280b3..6e0987e 100644
--- a/scst/qla2x00t-32gbit/qla_tmpl.h
+++ b/scst/qla2x00t-32gbit/qla_tmpl.h

@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 
 #ifndef __QLA_DMP27_H__
@@ -13,7 +12,7 @@
 struct __packed qla27xx_fwdt_template {
 	__le32 template_type;
 	__le32 entry_offset;
-	uint32_t template_size;
+	__le32 template_size;
 	uint32_t count;		/* borrow field for running/residual count */
 
 	__le32 entry_count;

diff --git a/scst/qla2x00t-32gbit/qla_version.h b/scst/qla2x00t-32gbit/qla_version.h
index 8ccd9ba..03f3e2c 100644
--- a/scst/qla2x00t-32gbit/qla_version.h
+++ b/scst/qla2x00t-32gbit/qla_version.h

@@ -1,15 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * QLogic Fibre Channel HBA Driver
  * Copyright (c)  2003-2014 QLogic Corporation
- *
- * See LICENSE.qla2xxx for copyright and licensing details.
  */
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "10.01.00.25-k"
+#define QLA2XXX_VERSION      "10.02.07.900-k"
 
 #define QLA_DRIVER_MAJOR_VER	10
-#define QLA_DRIVER_MINOR_VER	1
-#define QLA_DRIVER_PATCH_VER	0
-#define QLA_DRIVER_BETA_VER	0
+#define QLA_DRIVER_MINOR_VER	2
+#define QLA_DRIVER_PATCH_VER	7
+#define QLA_DRIVER_BETA_VER	900

diff --git a/scst/qla2x00t-32gbit/target_core_base-backport.h b/scst/qla2x00t-32gbit/target_core_base-backport.h
deleted file mode 100644
index 5f2e42b..0000000
--- a/scst/qla2x00t-32gbit/target_core_base-backport.h
+++ /dev/null

@@ -1,29 +0,0 @@
-#ifndef _TARGET_CORE_BASE_BACKPORT_H_
-#define _TARGET_CORE_BASE_BACKPORT_H_
-
-#define TRANSPORT_SENSE_BUFFER 96
-
-enum se_cmd_flags_table {
-	SCF_OVERFLOW_BIT		= 0x00001000,
-	SCF_UNDERFLOW_BIT		= 0x00002000,
-};
-
-/* for sam_task_attr */
-#define TCM_SIMPLE_TAG	0x20
-#define TCM_HEAD_TAG	0x21
-#define TCM_ORDERED_TAG	0x22
-#define TCM_ACA_TAG	0x24
-
-struct se_cmd {
-	u64	tag; /* SAM command identifier aka task tag */
-	u32	se_cmd_flags;
-	u32	residual_count;
-	u64	t_task_lba;
-	int	cpuid;
-};
-
-struct se_session {
-	void	*fabric_sess_ptr;
-};
-
-#endif /* _TARGET_CORE_BASE_BACKPORT_H_ */

diff --git a/scst/qla2x00t/Makefile b/scst/qla2x00t/Makefile
index 84fb09b..dfaa980 100644
--- a/scst/qla2x00t/Makefile
+++ b/scst/qla2x00t/Makefile

@@ -48,7 +48,7 @@
 all:
 	$(MAKE) -C $(KDIR) M=$(shell pwd)				\
 	  $(shell [ -n "$(PASS_CC_TO_MAKE)" ] && echo CC="$(CC)")	\
-	  $(CONFIG_SCSI_QLA2XXX_TARGET)=CONFIG_SCSI_QLA2XXX_TARGET
+	  CONFIG_SCSI_QLA2XXX_TARGET=$(CONFIG_SCSI_QLA2XXX_TARGET)
 
 install: all
 	KDIR=$(KDIR) ../scripts/sign-modules

diff --git a/scst/qla2x00t/doc/Makefile b/scst/qla2x00t/doc/Makefile
new file mode 100644
index 0000000..069466a
--- /dev/null
+++ b/scst/qla2x00t/doc/Makefile

@@ -0,0 +1,2 @@
+check:
+	find -name '*.html' | while read f; do tidy -e < "$$f" | sed "s|^-e|$$f|"; done

diff --git a/scst/qla2x00t/doc/qla2x00t-howto.html b/scst/qla2x00t/doc/qla2x00t-howto.html
index 5f8e8a2..ded4c1a 100644
--- a/scst/qla2x00t/doc/qla2x00t-howto.html
+++ b/scst/qla2x00t/doc/qla2x00t-howto.html

@@ -63,7 +63,7 @@
   Copy the firmware image (.BIN) file to the /lib/firmware directory and
   rename it such that the kernel driver can find it. The file name that should
   be used for the firmware file depends on the RISC controller ID:
-  <table border="1">
+  <table border="1" summary="">
     <tr><th><b>ISP Model</b></th><th><b>Firmware file<br>name</b></th></tr>
     <tr><td>ISP 21XX</td><td>ql2100_fw.bin</td></tr>
     <tr><td>ISP 22XX</td><td>ql2200_fw.bin</td></tr>
@@ -231,6 +231,31 @@
   <pre>[root@proj ]# make -C scstadmin -s install</pre>
 </li>
 
+<li id="qlini_mode"> Initiator and target modes <br><br>
+  The qla2xxx_scst module has parameter "qlini_mode", which determines when initiator mode will be enabled.<br>
+  Possible values:
+<ul>
+<li>"exclusive" (default) - initiator mode will be enabled on load, disabled on enabling target mode and then on disabling target mode enabled back.</li>
+<li>"disabled" - initiator mode will never be enabled.</li>
+<li>"dual" - initiator mode will be enabled. Target mode can be activated when ready (only <b>qla2x00t-32gbit</b>).</li>
+<li>"enabled" - initiator mode will always stay enabled.</li>
+</ul>
+
+<br>
+  Usage of mode "disabled" is recommended, if you have incorrectly functioning your target's initiators, which if once seen a port in initiator mode, later refuse to see it as a target.
+<br><br>
+
+<b>qla2x00t</b> (old qlogic driver): <br><br>
+  Use mode "enabled" if you need your QLA adapters to work in both initiator and target modes at the same time.<br>
+  You can always see which modes are currently active in active_mode sysfs attribute.<br>
+  In all the modes you can at any time use sysfs attribute ini_mode_force_reverse to force enable or disable initiator mode on any particular port. Setting this attribute to 1 will reverse current status of the initiator mode from enabled to disabled and vice versa.<br>
+<br>
+<b>qla2x00t-32gbit</b> (new qlogic driver): <br><br>
+  Use mode "dual" if you need your QLA adapters to work in both initiator and target modes at the same time. In this mode, each qlogic host has individual <b>qlini_mode</b>, <b>ql2xexchoffld</b>, <b>ql2xiniexchg</b> attributes that can be changed dynamically.<br>
+  For example, you can change qlini_mode to "disabled" for specific qlogic host:
+  <pre>echo "disabled" > /sys/devices/pci0000:80/0000:80:02.0/0000:81:00.0/host1/scsi_host/host1/qlini_mode</pre>
+</li>
+
 <li id="target-mode">
   To see the device on the initiator we have to add it in the LUNs set of our
   target.<br>  We must have a LUN with number 0 (LUs numeration must not start
@@ -241,8 +266,8 @@
 
 <pre>modprobe qla2x00tgt
 find /sys/kernel/scst_tgt/targets/qla2x00t -name enabled | \
-while read f; do echo 1 >$f & done; wait
-find /sys -name issue_lip | while read f; do echo 1 >$f & done; wait
+while read f; do echo 1 >$f &amp; done; wait
+find /sys -name issue_lip | while read f; do echo 1 >$f &amp; done; wait
 {
 cat &lt;&lt;EOF
 HANDLER vdisk_fileio {

diff --git a/scst/qla2x00t/qla2x00-target/README b/scst/qla2x00t/qla2x00-target/README
index 28b1058..e16a136 100644
--- a/scst/qla2x00t/qla2x00-target/README
+++ b/scst/qla2x00t/qla2x00-target/README

@@ -1,7 +1,7 @@
 Target driver for QLogic 2[2-6]xx/8[1-3]xx Fibre Channel cards
 ==============================================================
 
-Version 3.5.0, 21 December 2020
+Version 3.7.0, 26 December 2022
 ----------------------------
 
 This is target driver for QLogic 2[2-6]xx/8[1-3]xx Fibre Channel cards.

diff --git a/scst/qla2x00t/qla2x00-target/qla2x00t.c b/scst/qla2x00t/qla2x00-target/qla2x00t.c
index 1c61b78..c6aa31e 100644
--- a/scst/qla2x00t/qla2x00-target/qla2x00t.c
+++ b/scst/qla2x00t/qla2x00-target/qla2x00t.c

@@ -197,15 +197,8 @@
 #define ENABLE_NPIV 0 /* NPIV does not work */
 
 #if ENABLE_NPIV
-#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-     defined(FC_VPORT_CREATE_DEFINED))
 static ssize_t q2t_add_vtarget(const char *target_name, char *params);
 static ssize_t q2t_del_vtarget(const char *target_name);
-#else
-#warning Patch scst_fc_vport_create was not applied on\
- your kernel. Adding NPIV targets using SCST sysfs interface will be disabled.
-#endif /*((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-	  defined(FC_VPORT_CREATE_DEFINED))*/
 #endif /* ENABLE_NPIV */
 
 /*
@@ -242,12 +235,8 @@
 	.enable_target = q2t_enable_tgt,
 	.is_target_enabled = q2t_is_tgt_enabled,
 #if ENABLE_NPIV
-#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-     defined(FC_VPORT_CREATE_DEFINED))
 	.add_target = q2t_add_vtarget,
 	.del_target = q2t_del_vtarget,
-#endif /*((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-	  defined(FC_VPORT_CREATE_DEFINED))*/
 	.add_target_parameters = "node_name, parent_host",
 #endif
 	.tgtt_attrs = q2tt_attrs,
@@ -2357,7 +2346,7 @@
 	sBUG_ON(prm->cmd->sg_cnt == 0);
 
 	prm->sg = prm->cmd->sg;
-	prm->seg_cnt = pci_map_sg(prm->tgt->vha->hw->pdev, prm->cmd->sg,
+	prm->seg_cnt = dma_map_sg(&prm->tgt->vha->hw->pdev->dev, prm->cmd->sg,
 		prm->cmd->sg_cnt, prm->cmd->dma_data_direction);
 	if (unlikely(prm->seg_cnt == 0))
 		goto out_err;
@@ -2387,7 +2376,7 @@
 static inline void q2t_unmap_sg(scsi_qla_host_t *vha, struct q2t_cmd *cmd)
 {
 	EXTRACHECKS_BUG_ON(!cmd->sg_mapped);
-	pci_unmap_sg(vha->hw->pdev, cmd->sg, cmd->sg_cnt,
+	dma_unmap_sg(&vha->hw->pdev->dev, cmd->sg, cmd->sg_cnt,
 	    cmd->dma_data_direction);
 	cmd->sg_mapped = 0;
 }
@@ -6458,8 +6447,6 @@
 }
 
 #if ENABLE_NPIV
-#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-     defined(FC_VPORT_CREATE_DEFINED))
 static ssize_t q2t_add_vtarget(const char *target_name, char *params)
 {
 	int res;
@@ -6571,8 +6558,6 @@
 	TRACE_EXIT_RES(res);
 	return res;
 }
-#endif /*((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-	  defined(FC_VPORT_CREATE_DEFINED))*/
 #endif /* ENABLE_NPIV */
 
 static int q2t_get_initiator_port_transport_id(struct scst_tgt *tgt,

diff --git a/scst/qla2x00t/qla2x00-target/qla2x00t.h b/scst/qla2x00t/qla2x00-target/qla2x00t.h
index 2ed0f4c..7993dbe 100644
--- a/scst/qla2x00t/qla2x00-target/qla2x00t.h
+++ b/scst/qla2x00t/qla2x00-target/qla2x00t.h

@@ -34,8 +34,8 @@
 
 /* Version numbers, the same as for the kernel */
 #define Q2T_VERSION(a, b, c, d)	(((a) << 030) + ((b) << 020) + (c) << 010 + (d))
-#define Q2T_VERSION_CODE	Q2T_VERSION(3, 5, 0, 0)
-#define Q2T_VERSION_STRING	"3.5.0"
+#define Q2T_VERSION_CODE	Q2T_VERSION(3, 7, 0, 0)
+#define Q2T_VERSION_STRING	"3.7.0"
 #define Q2T_PROC_VERSION_NAME	"version"
 
 #define Q2T_MAX_CDB_LEN             16

diff --git a/scst/qla2x00t/qla2x_tgt.h b/scst/qla2x00t/qla2x_tgt.h
index 0d5c37c..36e8c94 100644
--- a/scst/qla2x00t/qla2x_tgt.h
+++ b/scst/qla2x00t/qla2x_tgt.h

@@ -132,13 +132,9 @@
 }
 
 extern void qla2xxx_add_targets(void);
-#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-     defined(FC_VPORT_CREATE_DEFINED))
 extern size_t
 qla2xxx_add_vtarget(u64 port_name, u64 node_name, u64 parent_host);
 extern size_t qla2xxx_del_vtarget(u64 port_name);
-#endif /*((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-	  defined(FC_VPORT_CREATE_DEFINED))*/
 
 extern void qla_unknown_atio_work_fn(struct work_struct *work);
 

diff --git a/scst/qla2x00t/qla_attr.c b/scst/qla2x00t/qla_attr.c
index f541a68..bfa29e5 100644
--- a/scst/qla2x00t/qla_attr.c
+++ b/scst/qla2x00t/qla_attr.c

@@ -159,7 +159,7 @@
 
 	set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags);
 	qla2xxx_wake_dpc(base_vha);
-	qla2x00_wait_for_hba_online(vha);
+	WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS);
 
 out:
 	return size;
@@ -419,7 +419,7 @@
 		printk(KERN_INFO "Reconfiguring fabric on %ld\n",
 			vha->host_no);
 		qla2x00_configure_fabric(vha);
-		/* fall through */
+		fallthrough;
 
 	default:
 		printk(KERN_INFO "Resyncing loop on %ld\n",
@@ -441,13 +441,7 @@
 #endif /* CONFIG_SCSI_QLA2XXX_TARGET */
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_fw_dump(
-#else
 qla2x00_sysfs_read_fw_dump(struct file *file,
-#endif
 			   struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
@@ -476,13 +470,7 @@
 }
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_write_fw_dump(
-#else
 qla2x00_sysfs_write_fw_dump(struct file *file,
-#endif
 			    struct kobject *kobj,
 			    struct bin_attribute *bin_attr,
 			    char *buf, loff_t off, size_t count)
@@ -561,13 +549,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_nvram(
-#else
 qla2x00_sysfs_read_nvram(struct file *file,
-#endif
 			 struct kobject *kobj,
 			 struct bin_attribute *bin_attr,
 			 char *buf, loff_t off, size_t count)
@@ -587,13 +569,7 @@
 }
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_write_nvram(
-#else
 qla2x00_sysfs_write_nvram(struct file *file,
-#endif
 			  struct kobject *kobj,
 			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
@@ -662,13 +638,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_optrom(
-#else
 qla2x00_sysfs_read_optrom(struct file *file,
-#endif
 			  struct kobject *kobj,
 			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
@@ -685,13 +655,7 @@
 }
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_write_optrom(
-#else
 qla2x00_sysfs_write_optrom(struct file *file,
-#endif
 			   struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
@@ -723,13 +687,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_write_optrom_ctl(
-#else
 qla2x00_sysfs_write_optrom_ctl(struct file *file,
-#endif
 			       struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
@@ -894,13 +852,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_vpd(
-#else
 qla2x00_sysfs_read_vpd(struct file *file,
-#endif
 		       struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
@@ -922,13 +874,7 @@
 }
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_write_vpd(
-#else
 qla2x00_sysfs_write_vpd(struct file *file,
-#endif
 			struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
@@ -982,13 +928,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_sfp(
-#else
 qla2x00_sysfs_read_sfp(struct file *file,
-#endif
 		       struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
@@ -1050,13 +990,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_write_reset(
-#else
 qla2x00_sysfs_write_reset(struct file *file,
-#endif
 			struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
@@ -1096,7 +1030,7 @@
 		    "Issuing MPI reset.\n");
 
 		/* Make sure FC side is not in reset */
-		qla2x00_wait_for_hba_online(vha);
+		WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS);
 
 		/* Issue MPI reset */
 		scsi_block_requests(vha->host);
@@ -1132,13 +1066,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_xgmac_stats(
-#else
 qla2x00_sysfs_read_xgmac_stats(struct file *file,
-#endif
 		       struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
@@ -1191,13 +1119,7 @@
 };
 
 static ssize_t
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-qla2x00_sysfs_read_dcbx_tlv(
-#else
 qla2x00_sysfs_read_dcbx_tlv(struct file *file,
-#endif
 		       struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
@@ -1803,6 +1725,7 @@
 static DEVICE_ATTR(fw_state, S_IRUGO, qla2x00_fw_state_show, NULL);
 static DEVICE_ATTR(thermal_temp, S_IRUGO, qla2x00_thermal_temp_show, NULL);
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_driver_version,
 	&dev_attr_fw_version,
@@ -1839,6 +1762,53 @@
 	&dev_attr_thermal_temp,
 	NULL,
 };
+#else
+static struct attribute *qla2x00_host_attrs[] = {
+	&dev_attr_driver_version.attr,
+	&dev_attr_fw_version.attr,
+	&dev_attr_serial_num.attr,
+	&dev_attr_isp_name.attr,
+	&dev_attr_isp_id.attr,
+	&dev_attr_model_name.attr,
+	&dev_attr_model_desc.attr,
+	&dev_attr_pci_info.attr,
+	&dev_attr_link_state.attr,
+	&dev_attr_zio.attr,
+	&dev_attr_zio_timer.attr,
+	&dev_attr_beacon.attr,
+	&dev_attr_optrom_bios_version.attr,
+	&dev_attr_optrom_efi_version.attr,
+	&dev_attr_optrom_fcode_version.attr,
+	&dev_attr_optrom_fw_version.attr,
+	&dev_attr_84xx_fw_version.attr,
+	&dev_attr_class2_enabled.attr,
+#ifdef CONFIG_SCSI_QLA2XXX_TARGET
+	&dev_attr_ini_mode_force_reverse.attr,
+	&dev_attr_resource_counts.attr,
+	&dev_attr_port_database.attr,
+#endif
+	&dev_attr_total_isp_aborts.attr,
+	&dev_attr_mpi_version.attr,
+	&dev_attr_phy_version.attr,
+	&dev_attr_flash_block_size.attr,
+	&dev_attr_vlan_id.attr,
+	&dev_attr_vn_port_mac_address.attr,
+	&dev_attr_fabric_param.attr,
+	&dev_attr_fw_state.attr,
+	&dev_attr_optrom_gold_fw_version.attr,
+	&dev_attr_thermal_temp.attr,
+	NULL,
+};
+
+static const struct attribute_group qla2x00_host_attr_group = {
+       .attrs = qla2x00_host_attrs
+};
+
+const struct attribute_group *qla2x00_host_groups[] = {
+	&qla2x00_host_attr_group,
+	NULL
+};
+#endif
 
 /* Host attributes. */
 

diff --git a/scst/qla2x00t/qla_bsg.c b/scst/qla2x00t/qla_bsg.c
index 3239e77..4053a7b 100644
--- a/scst/qla2x00t/qla_bsg.c
+++ b/scst/qla2x00t/qla_bsg.c

@@ -20,21 +20,17 @@
 {
 	job->req->errors = result;
 }
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-static inline void set_bsg_result(struct bsg_job *job, int result)
-{
-	job->req->errors = result;
-}
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) &&	\
-	!defined(CONFIG_SUSE_KERNEL)
-static inline void set_bsg_result(struct bsg_job *job, int result)
-{
-	scsi_req(job->req)->result = result;
-}
 #else
 static inline void set_bsg_result(struct bsg_job *job, int result)
 {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+	job->req->errors = result;
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) &&	\
+	!defined(CONFIG_SUSE_KERNEL)
+	scsi_req(job->req)->result = result;
+#else
 	scsi_req(blk_mq_rq_from_pdu(job))->result = result;
+#endif
 }
 #endif
 
@@ -64,11 +60,7 @@
 {
 	srb_t *sp = ptr;
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 
 	bsg_reply->result = res;
@@ -81,11 +73,7 @@
 {
 	srb_t *sp = ptr;
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	struct qla_hw_data *ha = vha->hw;
 
 	dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
@@ -155,12 +143,7 @@
 	return ret;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla24xx_proc_fcp_prio_cfg_cmd(struct fc_bsg_job *bsg_job)
-#else
-qla24xx_proc_fcp_prio_cfg_cmd(struct bsg_job *bsg_job)
-#endif
+static int qla24xx_proc_fcp_prio_cfg_cmd(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -292,12 +275,7 @@
 	return ret;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_process_els(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_process_els(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_process_els(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_rport *rport;
@@ -308,7 +286,7 @@
 	srb_t *sp;
 	const char *type;
 	int req_sg_cnt, rsp_sg_cnt;
-	int rval =  (DRIVER_ERROR << 16);
+	int rval = DID_ERROR << 16;
 	uint16_t nextlid = 0;
 
 #ifdef __COVERITY__
@@ -479,12 +457,7 @@
 	return iocbs;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_process_ct(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_process_ct(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_process_ct(BSG_JOB_TYPE *bsg_job)
 {
 	srb_t *sp;
 #ifndef NEW_LIBFC_API
@@ -495,7 +468,7 @@
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	scsi_qla_host_t *vha = shost_priv(host);
 	struct qla_hw_data *ha = vha->hw;
-	int rval = (DRIVER_ERROR << 16);
+	int rval = DID_ERROR << 16;
 	int req_sg_cnt, rsp_sg_cnt;
 	uint16_t loop_id;
 	struct fc_port *fcport;
@@ -708,12 +681,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_process_loopback(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_process_loopback(BSG_JOB_TYPE *bsg_job)
 {
 #ifndef NEW_LIBFC_API
 	struct Scsi_Host *host = bsg_job->shost;
@@ -949,12 +917,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla84xx_reset(struct fc_bsg_job *bsg_job)
-#else
-qla84xx_reset(struct bsg_job *bsg_job)
-#endif
+static int qla84xx_reset(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -994,12 +957,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla84xx_updatefw(struct fc_bsg_job *bsg_job)
-#else
-qla84xx_updatefw(struct bsg_job *bsg_job)
-#endif
+static int qla84xx_updatefw(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -1115,12 +1073,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla84xx_mgmt_cmd(struct fc_bsg_job *bsg_job)
-#else
-qla84xx_mgmt_cmd(struct bsg_job *bsg_job)
-#endif
+static int qla84xx_mgmt_cmd(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -1324,12 +1277,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla24xx_iidma(struct fc_bsg_job *bsg_job)
-#else
-qla24xx_iidma(struct bsg_job *bsg_job)
-#endif
+static int qla24xx_iidma(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_request *bsg_request = bsg_job->request;
@@ -1423,13 +1371,8 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_optrom_setup(struct fc_bsg_job *bsg_job, scsi_qla_host_t *vha,
-#else
-qla2x00_optrom_setup(struct bsg_job *bsg_job, scsi_qla_host_t *vha,
-#endif
-    uint8_t is_update)
+static int qla2x00_optrom_setup(BSG_JOB_TYPE *bsg_job, scsi_qla_host_t *vha,
+				uint8_t is_update)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	uint32_t start = 0;
@@ -1497,12 +1440,7 @@
 	return 0;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_read_optrom(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_read_optrom(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_read_optrom(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1534,12 +1472,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_update_optrom(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_update_optrom(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_update_optrom(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1570,12 +1503,7 @@
 	return rval;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_update_fru_versions(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_update_fru_versions(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_update_fru_versions(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1626,12 +1554,7 @@
 	return 0;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_read_fru_status(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_read_fru_status(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_read_fru_status(BSG_JOB_TYPE *bsg_job)
 {
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1680,12 +1603,7 @@
 	return 0;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_write_fru_status(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_write_fru_status(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_write_fru_status(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
@@ -1730,12 +1648,7 @@
 	return 0;
 }
 
-static int
-#ifndef NEW_LIBFC_API
-qla2x00_process_vendor_specific(struct fc_bsg_job *bsg_job)
-#else
-qla2x00_process_vendor_specific(struct bsg_job *bsg_job)
-#endif
+static int qla2x00_process_vendor_specific(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1783,12 +1696,7 @@
 	}
 }
 
-int
-#ifndef NEW_LIBFC_API
-qla24xx_bsg_request(struct fc_bsg_job *bsg_job)
-#else
-qla24xx_bsg_request(struct bsg_job *bsg_job)
-#endif
+int qla24xx_bsg_request(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
@@ -1849,12 +1757,7 @@
 	return ret;
 }
 
-int
-#ifndef NEW_LIBFC_API
-qla24xx_bsg_timeout(struct fc_bsg_job *bsg_job)
-#else
-qla24xx_bsg_timeout(struct bsg_job *bsg_job)
-#endif
+int qla24xx_bsg_timeout(BSG_JOB_TYPE *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;

diff --git a/scst/qla2x00t/qla_bsg.h b/scst/qla2x00t/qla_bsg.h
index 6a6725e..a680434 100644
--- a/scst/qla2x00t/qla_bsg.h
+++ b/scst/qla2x00t/qla_bsg.h

@@ -121,7 +121,7 @@
 	uint16_t rsrvd;
 	struct qla84_mgmt_param mgmtp;/* parameters for cmd */
 	uint32_t len; /* bytes in payload following this struct */
-	uint8_t payload[0]; /* payload for cmd */
+	uint8_t payload[]; /* payload for cmd */
 };
 
 struct qla_bsg_a84_mgmt {

diff --git a/scst/qla2x00t/qla_def.h b/scst/qla2x00t/qla_def.h
index 049caca..19bdec1 100644
--- a/scst/qla2x00t/qla_def.h
+++ b/scst/qla2x00t/qla_def.h

@@ -31,19 +31,15 @@
 	defined(CONFIG_SUSE_KERNEL) && \
 	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
 #define NEW_LIBFC_API
+#define BSG_JOB_TYPE struct bsg_job
+#else
+#define BSG_JOB_TYPE struct fc_bsg_job
 #endif
 
 #ifdef NEW_LIBFC_API
 #include <linux/bsg-lib.h>
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
-#error
-#error ***This version of qla2xxx does not support distributions based on***
-#error ***kernels less than 2.6.32.***
-#error
-#endif
-
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
@@ -3193,8 +3189,6 @@
 
 #define        QLA_DSDS_PER_IOCB       37
 
-#define CMD_SP(Cmnd)           ((Cmnd)->SCp.ptr)
-
 #define QLA_SG_ALL     1024
 
 enum nexus_wait_type {

diff --git a/scst/qla2x00t/qla_gbl.h b/scst/qla2x00t/qla_gbl.h
index 7b94d60..936bc2d 100644
--- a/scst/qla2x00t/qla_gbl.h
+++ b/scst/qla2x00t/qla_gbl.h

@@ -496,7 +496,11 @@
  * Global Function Prototypes in qla_attr.c source file.
  */
 struct device_attribute;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 extern struct device_attribute *qla2x00_host_attrs[];
+#else
+extern const struct attribute_group *qla2x00_host_groups[];
+#endif
 struct fc_function_template;
 extern struct fc_function_template qla2xxx_transport_functions;
 extern struct fc_function_template qla2xxx_transport_vport_functions;
@@ -605,13 +609,8 @@
 extern void qla82xx_clear_pending_mbx(scsi_qla_host_t *);
 
 /* BSG related functions */
-#ifdef NEW_LIBFC_API
-extern int qla24xx_bsg_request(struct bsg_job *);
-extern int qla24xx_bsg_timeout(struct bsg_job *);
-#else
-extern int qla24xx_bsg_request(struct fc_bsg_job *);
-extern int qla24xx_bsg_timeout(struct fc_bsg_job *);
-#endif
+extern int qla24xx_bsg_request(BSG_JOB_TYPE *);
+extern int qla24xx_bsg_timeout(BSG_JOB_TYPE *);
 extern int qla84xx_reset_chip(scsi_qla_host_t *, uint16_t);
 extern int qla2x00_issue_iocb_timeout(scsi_qla_host_t *, void *,
 	dma_addr_t, size_t, uint32_t);

diff --git a/scst/qla2x00t/qla_gs.c b/scst/qla2x00t/qla_gs.c
index 4eedcec..ec76353 100644
--- a/scst/qla2x00t/qla_gs.c
+++ b/scst/qla2x00t/qla_gs.c

@@ -1015,7 +1015,7 @@
 }
 
 /**
- * qla2x00_snd_rft_id() - SNS Register FC-4 TYPEs (RFT_ID) supported by the HBA.
+ * qla2x00_sns_rft_id() - SNS Register FC-4 TYPEs (RFT_ID) supported by the HBA.
  * @vha: HA context
  *
  * This command uses the old Exectute SNS Command mailbox routine.
@@ -1251,7 +1251,7 @@
 }
 
 /**
- * qla2x00_prep_ct_req() - Prepare common CT request fields for SNS query.
+ * qla2x00_prep_ct_fdmi_req() - Prepare common CT request fields for SNS query.
  * @p: CT request buffer
  * @cmd: GS command
  * @rsp_size: response size in bytes

diff --git a/scst/qla2x00t/qla_iocb.c b/scst/qla2x00t/qla_iocb.c
index fe327ea..7584de9 100644
--- a/scst/qla2x00t/qla_iocb.c
+++ b/scst/qla2x00t/qla_iocb.c

@@ -504,7 +504,7 @@
 EXPORT_SYMBOL(qla2x00_start_iocbs);
 
 /**
- * qla2x00_marker() - Send a marker IOCB to the firmware.
+ * __qla2x00_marker() - Send a marker IOCB to the firmware.
  * @vha: HA context
  * @req: Request queue
  * @rsp: ...
@@ -1875,7 +1875,7 @@
 {
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	struct qla_hw_data *ha = sp->fcport->vha->hw;
-	int affinity = scst_blk_rq_cpu(cmd->request);
+	int affinity = scst_blk_rq_cpu(scsi_cmd_to_rq(cmd));
 
 	if (ha->flags.cpu_affinity_enabled && affinity >= 0 &&
 		affinity < ha->max_rsp_queues - 1)
@@ -2099,12 +2099,8 @@
 static void
 qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 {
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_request *bsg_request = bsg_job->request;
-#endif
 
 	els_iocb->entry_type = ELS_IOCB_TYPE;
 	els_iocb->entry_count = 1;
@@ -2119,13 +2115,8 @@
 
 	els_iocb->opcode =
 	    sp->type == SRB_ELS_CMD_RPT ?
-#ifndef NEW_LIBFC_API
-	    bsg_job->request->rqst_data.r_els.els_code :
-	    bsg_job->request->rqst_data.h_els.command_code;
-#else
 	    bsg_request->rqst_data.r_els.els_code :
 	    bsg_request->rqst_data.h_els.command_code;
-#endif
 	els_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
 	els_iocb->port_id[1] = sp->fcport->d_id.b.area;
 	els_iocb->port_id[2] = sp->fcport->d_id.b.domain;
@@ -2160,11 +2151,7 @@
 	uint16_t tot_dsds;
 	scsi_qla_host_t *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	int loop_iterartion = 0;
 	int entry_count = 1;
 
@@ -2239,11 +2226,7 @@
 	uint16_t tot_dsds;
 	scsi_qla_host_t *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
-#else
-	struct bsg_job *bsg_job = sp->u.bsg_job;
-#endif
+	BSG_JOB_TYPE *bsg_job = sp->u.bsg_job;
 	int loop_iterartion = 0;
 	int entry_count = 1;
 

diff --git a/scst/qla2x00t/qla_isr.c b/scst/qla2x00t/qla_isr.c
index bc5b4bf..d5b1181 100644
--- a/scst/qla2x00t/qla_isr.c
+++ b/scst/qla2x00t/qla_isr.c

@@ -885,7 +885,7 @@
 	case MBA_CHG_IN_CONNECTION:	/* Change in connection mode */
 		if (IS_QLA2100(ha))
 			break;
-		/* fall through */
+		fallthrough;
 	case MBA_RESET:			/* Reset */
 	case MBA_SYSTEM_ERR:		/* System Error */
 	case MBA_REQ_TRANSFER_ERR:	/* Request Transfer Error */
@@ -1083,12 +1083,8 @@
 	const char func[] = "CT_IOCB";
 	const char *type;
 	srb_t *sp;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job;
-#else
-	struct bsg_job *bsg_job;
+	BSG_JOB_TYPE *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
-#endif
 	uint16_t comp_status;
 	int res;
 
@@ -1097,9 +1093,7 @@
 		return;
 
 	bsg_job = sp->u.bsg_job;
-#ifdef NEW_LIBFC_API
 	bsg_reply = bsg_job->reply;
-#endif
 
 	type = "ct pass-through";
 
@@ -1108,52 +1102,32 @@
 	/* return FC_CTELS_STATUS_OK and leave the decoding of the ELS/CT
 	 * fc payload  to the caller
 	 */
-#ifndef NEW_LIBFC_API
-	bsg_job->reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
-#else
 	bsg_reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
-#endif
 	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
 
 	if (comp_status != CS_COMPLETE) {
 		if (comp_status == CS_DATA_UNDERRUN) {
 			res = DID_OK << 16;
-#ifndef NEW_LIBFC_API
-			bsg_job->reply->reply_payload_rcv_len =
-#else
 			bsg_reply->reply_payload_rcv_len =
-#endif
 			    le16_to_cpu(((sts_entry_t *)pkt)->rsp_info_len);
 
 			ql_log(ql_log_warn, vha, 0x5048,
 			    "CT pass-through-%s error "
 			    "comp_status-status=0x%x total_byte = 0x%x.\n",
 			    type, comp_status,
-#ifndef NEW_LIBFC_API
-			    bsg_job->reply->reply_payload_rcv_len);
-#else
 			    bsg_reply->reply_payload_rcv_len);
-#endif
 		} else {
 			ql_log(ql_log_warn, vha, 0x5049,
 			    "CT pass-through-%s error "
 			    "comp_status-status=0x%x.\n", type, comp_status);
 			res = DID_ERROR << 16;
-#ifndef NEW_LIBFC_API
-			bsg_job->reply->reply_payload_rcv_len = 0;
-#else
 			bsg_reply->reply_payload_rcv_len = 0;
-#endif
 		}
 		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5035,
 		    (uint8_t *)pkt, sizeof(*pkt));
 	} else {
 		res =  DID_OK << 16;
-#ifndef NEW_LIBFC_API
-		bsg_job->reply->reply_payload_rcv_len =
-#else
 		bsg_reply->reply_payload_rcv_len =
-#endif
 		    bsg_job->reply_payload.payload_len;
 		bsg_job->reply_len = 0;
 	}
@@ -1167,12 +1141,8 @@
 	const char func[] = "ELS_CT_IOCB";
 	const char *type;
 	srb_t *sp;
-#ifndef NEW_LIBFC_API
-	struct fc_bsg_job *bsg_job;
-#else
-	struct bsg_job *bsg_job;
+	BSG_JOB_TYPE *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
-#endif
 	uint16_t comp_status;
 	uint32_t fw_status[3];
 	uint8_t *fw_sts_ptr;
@@ -1181,9 +1151,7 @@
 	if (!sp)
 		return;
 	bsg_job = sp->u.bsg_job;
-#ifdef NEW_LIBFC_API
 	bsg_reply = bsg_job->reply;
-#endif
 
 	type = NULL;
 	switch (sp->type) {
@@ -1207,20 +1175,12 @@
 	/* return FC_CTELS_STATUS_OK and leave the decoding of the ELS/CT
 	 * fc payload  to the caller
 	 */
-#ifndef NEW_LIBFC_API
-	bsg_job->reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
-#else
 	bsg_reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
-#endif
 	bsg_job->reply_len = sizeof(struct fc_bsg_reply) + sizeof(fw_status);
 
 	if (comp_status != CS_COMPLETE) {
 		if (comp_status == CS_DATA_UNDERRUN) {
-#ifndef NEW_LIBFC_API
-			bsg_job->reply->reply_payload_rcv_len =
-#else
 			bsg_reply->reply_payload_rcv_len =
-#endif
 			    le16_to_cpu(((struct els_sts_entry_24xx *)pkt)->total_byte_count);
 
 			ql_dbg(ql_dbg_user, vha, 0x503f,
@@ -1241,11 +1201,7 @@
 				pkt)->error_subcode_1),
 			    le16_to_cpu(((struct els_sts_entry_24xx *)
 				    pkt)->error_subcode_2));
-#ifndef NEW_LIBFC_API
-			bsg_job->reply->reply_payload_rcv_len = 0;
-#else
 			bsg_reply->reply_payload_rcv_len = 0;
-#endif
 			fw_sts_ptr = bsg_job_sense(bsg_job) +
 				sizeof(struct fc_bsg_reply);
 			memcpy(fw_sts_ptr, fw_status, sizeof(fw_status));
@@ -1253,11 +1209,7 @@
 		ql_dump_buffer(ql_dbg_user + ql_dbg_buffer, vha, 0x5056,
 				(uint8_t *)pkt, sizeof(*pkt));
 	} else {
-#ifndef NEW_LIBFC_API
-		bsg_job->reply->reply_payload_rcv_len = bsg_job->reply_payload.payload_len;
-#else
 		bsg_reply->reply_payload_rcv_len = bsg_job->reply_payload.payload_len;
-#endif
 		bsg_job->reply_len = 0;
 	}
 	sp->done(vha, sp, 0);
@@ -1667,31 +1619,22 @@
 
 	/* check guard */
 	if (e_guard != a_guard) {
-		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x1);
-		set_driver_byte(cmd, DRIVER_SENSE);
+		scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x1);
 		set_host_byte(cmd, DID_ABORT);
-		cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
 		return 1;
 	}
 
 	/* check ref tag */
 	if (e_ref_tag != a_ref_tag) {
-		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x3);
-		set_driver_byte(cmd, DRIVER_SENSE);
+		scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x3);
 		set_host_byte(cmd, DID_ABORT);
-		cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
 		return 1;
 	}
 
 	/* check appl tag */
 	if (e_app_tag != a_app_tag) {
-		scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-		    0x10, 0x2);
-		set_driver_byte(cmd, DRIVER_SENSE);
+		scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x2);
 		set_host_byte(cmd, DID_ABORT);
-		cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
 		return 1;
 	}
 
@@ -2251,7 +2194,7 @@
 		case ABTS_RECV_24XX:
 			/* ensure that the ATIO queue is empty */
 			qla24xx_process_atio_queue(vha);
-			/* fall through */
+			fallthrough;
 		case ABTS_RESP_24XX:
 		case CTIO_TYPE7:
 		case NOTIFY_ACK_TYPE:

diff --git a/scst/qla2x00t/qla_mbx.c b/scst/qla2x00t/qla_mbx.c
index 202a87e..86133c9 100644
--- a/scst/qla2x00t/qla_mbx.c
+++ b/scst/qla2x00t/qla_mbx.c

@@ -1819,7 +1819,7 @@
     uint16_t cmd_size, size_t buf_size)
 {
 	int rval;
-	mbx_cmd_t mc;
+	mbx_cmd_t mc = {};
 	mbx_cmd_t *mcp = &mc;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x105d,

diff --git a/scst/qla2x00t/qla_nx.c b/scst/qla2x00t/qla_nx.c
index c08decc..780377f 100644
--- a/scst/qla2x00t/qla_nx.c
+++ b/scst/qla2x00t/qla_nx.c

@@ -1181,11 +1181,7 @@
 	ql_log(ql_log_info, vha, 0x0072,
 	    "%d CRB init values found in ROM.\n", n);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 	buf = kmalloc_array(n, sizeof(struct crb_addr_pair), GFP_KERNEL);
-#else
-	buf = kmalloc(n * sizeof(struct crb_addr_pair), GFP_KERNEL);
-#endif
 	if (buf == NULL) {
 		ql_log(ql_log_fatal, vha, 0x010c,
 		    "Unable to allocate memory.\n");

diff --git a/scst/qla2x00t/qla_os.c b/scst/qla2x00t/qla_os.c
index 05e6e8d..92d5b24 100644
--- a/scst/qla2x00t/qla_os.c
+++ b/scst/qla2x00t/qla_os.c

@@ -13,10 +13,6 @@
 #include <linux/mutex.h>
 #include <linux/version.h>
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-#error Kernel 2.6.26 or above is needed to build the qla2x00t driver
-#endif
-
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsicam.h>
 #include <scsi/scsi_transport.h>
@@ -249,38 +245,28 @@
 static int qla2xxx_scan_finished(struct Scsi_Host *, unsigned long time);
 static void qla2xxx_scan_start(struct Scsi_Host *);
 static void qla2xxx_slave_destroy(struct scsi_device *);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 static int qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd,
 		void (*fn)(struct scsi_cmnd *));
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
-static DEF_SCSI_QCMD(qla2xxx_queuecommand);
+#else
+static int qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd);
 #endif
+static DEF_SCSI_QCMD(qla2xxx_queuecommand);
 static int qla2xxx_eh_abort(struct scsi_cmnd *);
 static int qla2xxx_eh_device_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_target_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_bus_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_host_reset(struct scsi_cmnd *);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && \
-	!defined(CONFIG_SUSE_KERNEL) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-static int qla2x00_change_queue_depth(struct scsi_device *, int);
-#else
-static int qla2x00_change_queue_depth(struct scsi_device *, int, int);
-#endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
+static int qla2x00_change_queue_depth(struct scsi_device *, int, int);
 static int qla2x00_change_queue_type(struct scsi_device *, int);
 #endif
 
 struct scsi_host_template qla2xxx_driver_template = {
 	.module			= THIS_MODULE,
 	.name			= QLA2XXX_DRIVER_NAME,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-	.queuecommand		= qla2xxx_queuecommand_lck,
-#else
 	.queuecommand		= qla2xxx_queuecommand,
-#endif
 
 	.eh_abort_handler	= qla2xxx_eh_abort,
 	.eh_device_reset_handler = qla2xxx_eh_device_reset,
@@ -306,7 +292,11 @@
 	.sg_tablesize		= SG_ALL,
 
 	.max_sectors		= 0xFFFF,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 	.shost_attrs		= qla2x00_host_attrs,
+#else
+	.shost_groups		= qla2x00_host_groups,
+#endif
 #ifdef CONFIG_SCSI_QLA2XXX_TARGET
 	.supported_mode		= MODE_INITIATOR | MODE_TARGET,
 #endif /* CONFIG_SCSI_QLA2XXX_TARGET */
@@ -460,7 +450,7 @@
 			    "Failed to create request queue.\n");
 			goto fail;
 		}
-		ha->wq = create_workqueue("qla2xxx_wq");
+		ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
 		vha->req = ha->req_q_map[req];
 		options |= BIT_1;
 		for (ques = 1; ques < ha->max_rsp_queues; ques++) {
@@ -654,7 +644,7 @@
 		ctx1 = NULL;
 	}
 
-	CMD_SP(cmd) = NULL;
+	sp->type = 0;
 	mempool_free(sp, ha->srb_mempool);
 }
 
@@ -674,11 +664,15 @@
 		return;
 
 	qla2x00_sp_free_dma(ha, sp);
-	cmd->scsi_done(cmd);
+	scsi_done(cmd);
 }
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 static int
 qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+#else
+static int qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd)
+#endif
 {
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
@@ -756,8 +750,9 @@
 	sp->u.scmd.cmd = cmd;
 	sp->type = SRB_SCSI_CMD;
 	atomic_set(&sp->ref_count, 1);
-	CMD_SP(cmd) = (void *)sp;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 	cmd->scsi_done = done;
+#endif
 	sp->free = qla2x00_sp_free_dma;
 	sp->done = qla2x00_sp_compl;
 
@@ -785,7 +780,7 @@
 
 qc24_fail_command:
 	spin_lock_irq(vha->host->host_lock);
-	done(cmd);
+	scsi_done(cmd);
 
 	return 0;
 }
@@ -811,6 +806,7 @@
 	unsigned long wait_iter = ABORT_WAIT_ITER;
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	struct qla_hw_data *ha = vha->hw;
+	srb_t *sp = scsi_cmd_priv(cmd);
 	int ret = QLA_SUCCESS;
 
 	if (unlikely(pci_channel_offline(ha->pdev)) || ha->flags.eeh_busy) {
@@ -819,10 +815,10 @@
 		return ret;
 	}
 
-	while (CMD_SP(cmd) && wait_iter--) {
+	while (sp->type && wait_iter--) {
 		msleep(ABORT_POLLING_PERIOD);
 	}
-	if (CMD_SP(cmd))
+	if (sp->type)
 		ret = QLA_FUNCTION_FAILED;
 
 	return ret;
@@ -949,25 +945,6 @@
 	atomic_inc(&sp->ref_count);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL)
-static void
-qla2x00_block_error_handler(struct scsi_cmnd *cmnd)
-{
-	struct Scsi_Host *shost = cmnd->device->host;
-	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
-	unsigned long flags;
-
-	spin_lock_irqsave(shost->host_lock, flags);
-	while (rport->port_state == FC_PORTSTATE_BLOCKED) {
-		spin_unlock_irqrestore(shost->host_lock, flags);
-		msleep(1000);
-		spin_lock_irqsave(shost->host_lock, flags);
-	}
-	spin_unlock_irqrestore(shost->host_lock, flags);
-	return;
-}
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL) */
-
 /**************************************************************************
 * qla2xxx_eh_abort
 *
@@ -994,23 +971,17 @@
 	int wait = 0;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (!CMD_SP(cmd))
-		return SUCCESS;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL)
-	qla2x00_block_error_handler(cmd);
-#else
 	ret = fc_block_scsi_eh(cmd);
 	if (ret != SUCCESS && ret != 0)
 		return ret;
-#endif
+
 	ret = SUCCESS;
 
 	id = cmd->device->id;
 	lun = cmd->device->lun;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sp = (srb_t *) CMD_SP(cmd);
+	sp = scsi_cmd_priv(cmd);
 	if (!sp) {
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		return SUCCESS;
@@ -1038,7 +1009,7 @@
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	/* Did the command return during mailbox execution? */
-	if (ret == FAILED && !CMD_SP(cmd))
+	if (ret == FAILED && !sp->type)
 		ret = SUCCESS;
 
 	/* Wait for the command to be returned. */
@@ -1126,13 +1097,9 @@
 		return FAILED;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL)
-	qla2x00_block_error_handler(cmd);
-#else
 	err = fc_block_scsi_eh(cmd);
 	if (err != SUCCESS && err != 0)
 		return err;
-#endif
 
 	ql_log(ql_log_info, vha, 0x8009,
 	    "%s RESET ISSUED nexus=%ld:%d:%lld cmd=%p.\n", name, vha->host_no,
@@ -1146,7 +1113,7 @@
 	}
 	err = 2;
 	if (do_reset(fcport, cmd->device->lun,
-		     scst_blk_rq_cpu(cmd->request) + 1)
+		     scst_blk_rq_cpu(scsi_cmd_to_rq(cmd)) + 1)
 		!= QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x800c,
 		    "do_reset failed for cmd=%p.\n", cmd);
@@ -1224,13 +1191,10 @@
 		return ret;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL)
-	qla2x00_block_error_handler(cmd);
-#else
 	ret = fc_block_scsi_eh(cmd);
 	if (ret != SUCCESS && ret != 0)
 		return ret;
-#endif
+
 	ret = FAILED;
 
 	ql_log(ql_log_info, vha, 0x8012,
@@ -1462,20 +1426,7 @@
 	sdev->hostdata = NULL;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && \
-	!defined(CONFIG_SUSE_KERNEL) && \
-	(!defined(RHEL_RELEASE_CODE) || \
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
-
-static int
-qla2x00_change_queue_depth(struct scsi_device *sdev, int qdepth)
-{
-	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
-	return sdev->queue_depth;
-}
-
-#else /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL) && (!defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1)) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
 
 static void qla2x00_handle_queue_full(struct scsi_device *sdev, int qdepth)
 {
@@ -1532,9 +1483,6 @@
 	return sdev->queue_depth;
 }
 
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL) && (!defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1)) */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
 static int
 qla2x00_change_queue_type(struct scsi_device *sdev, int tag_type)
 {
@@ -1567,7 +1515,7 @@
 	if (!dma_set_mask(&ha->pdev->dev, DMA_BIT_MASK(64))) {
 		/* Any upper-dword bits set? */
 		if (MSD(dma_get_required_mask(&ha->pdev->dev)) &&
-		    !pci_set_consistent_dma_mask(ha->pdev, DMA_BIT_MASK(64))) {
+		    !dma_set_coherent_mask(&ha->pdev->dev, DMA_BIT_MASK(64))) {
 			/* Ok, a 64bit DMA mask is applicable. */
 			ha->enable_64bit_addressing = 1;
 			ha->isp_ops->calc_req_entries = qla2x00_calc_iocbs_64;
@@ -1577,7 +1525,7 @@
 	}
 
 	dma_set_mask(&ha->pdev->dev, DMA_BIT_MASK(32));
-	pci_set_consistent_dma_mask(ha->pdev, DMA_BIT_MASK(32));
+	dma_set_coherent_mask(&ha->pdev->dev, DMA_BIT_MASK(32));
 }
 
 static void
@@ -2287,8 +2235,6 @@
 }
 EXPORT_SYMBOL(qla2xxx_add_targets);
 
-#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-     defined(FC_VPORT_CREATE_DEFINED))
 size_t qla2xxx_add_vtarget(u64 port_name, u64 node_name, u64 parent_host)
 {
 	struct Scsi_Host *shost = NULL;
@@ -2368,12 +2314,6 @@
 }
 EXPORT_SYMBOL(qla2xxx_del_vtarget);
 
-#else
-#warning "Patch scst_fc_vport_create was not applied on\
- your kernel. Adding NPIV targets using SCST sysfs interface will be disabled."
-#endif /*((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)) || \
-	  defined(FC_VPORT_CREATE_DEFINED))*/
-
 void qla_unknown_atio_work_fn(struct work_struct *work)
 {
 	struct qla_hw_data *ha = container_of(work, struct qla_hw_data,
@@ -2385,9 +2325,6 @@
 #endif /* CONFIG_SCSI_QLA2XXX_TARGET */
 
 static int
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0))
-__devinit
-#endif
 qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int	ret = -ENODEV;
@@ -4604,11 +4541,7 @@
 		ql_dbg(ql_dbg_aer, base_vha, 0x9007,
 		    "Finding pci device at function = 0x%x.\n", fn);
 		other_pdev =
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) && !defined(CONFIG_SUSE_KERNEL)
-		    pci_get_bus_and_slot(
-#else
 		    pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
-#endif
 		    ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
 		    fn));
 
@@ -4773,7 +4706,8 @@
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) &&		\
 	(!defined(RHEL_RELEASE_CODE) ||				\
-	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(8, 3))
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(8, 3)) &&	\
+	!defined(UEK_KABI_RENAME)
 	pci_cleanup_aer_uncorrect_error_status(pdev);
 #endif
 
@@ -4968,8 +4902,7 @@
 	pci_unregister_driver(&qla2xxx_pci_driver);
 	qla2x00_release_firmware();
 	kmem_cache_destroy(srb_cachep);
-	if (ctx_cachep)
-		kmem_cache_destroy(ctx_cachep);
+	kmem_cache_destroy(ctx_cachep);
 	fc_release_transport(qla2xxx_transport_template);
 	fc_release_transport(qla2xxx_transport_vport_template);
 }

diff --git a/scst/qla2x00t/qla_sup.c b/scst/qla2x00t/qla_sup.c
index 18b745e..cadecdb 100644
--- a/scst/qla2x00t/qla_sup.c
+++ b/scst/qla2x00t/qla_sup.c

@@ -2329,8 +2329,7 @@
 				sec_mask = 0x10000;
 				break;
 			}
-			/* Fall through... */
-
+			fallthrough;
 		case 0x1f: /* Atmel flash. */
 			/* 512k sector size. */
 			if (flash_id == 0x13) {
@@ -2338,8 +2337,7 @@
 				sec_mask =   0x80000000;
 				break;
 			}
-			/* Fall through... */
-
+			fallthrough;
 		case 0x01: /* AMD flash. */
 			if (flash_id == 0x38 || flash_id == 0x40 ||
 			    flash_id == 0x4f) {
@@ -2371,7 +2369,7 @@
 				sec_mask = 0x1e000;
 				break;
 			}
-			/* Fall through... */
+			fallthrough;
 		default:
 			/* Default to 16 kb sector size. */
 			rest_addr = 0x3fff;

diff --git a/scst/scripts/checkpatch b/scst/scripts/checkpatch
index 249d1d0..8cdcfcd 100755
--- a/scst/scripts/checkpatch
+++ b/scst/scripts/checkpatch

@@ -10,4 +10,4 @@
     SPDX_LICENSE_TAG
     SYMBOLIC_PERMS
 )
-../linux-kernel/scripts/checkpatch.pl -f --show-types --ignore="$(echo "${ignore[@]}" | sed 's/ /,/g')" $(list-source-files | grep -vE '^debian/|^fcst/linux-patches|patch$|pdf$|png$|^ibmvstgt/|^iscsi-scst/usr|^mpt/|^mvsas_tgt/|^qla|^scripts/|^scstadmin/|^usr/|^www/') | sed 's/^#[0-9]*: FILE: \(.*\):/\1:1:/'
+../linux-kernel/scripts/checkpatch.pl -f --show-types --ignore="$(echo "${ignore[@]}" | sed 's/ /,/g')" $(list-source-files | grep -vE '^debian/|^fcst/linux-patches|patch$|pdf$|png$|^iscsi-scst/usr|^qla|^scripts/|^scstadmin/|^usr/|^www/') | sed 's/^#[0-9]*: FILE: \(.*\):/\1:1:/'

diff --git a/scst/scripts/generate-kernel-patch b/scst/scripts/generate-kernel-patch
index c8c4e55..7a3bcb9 100755
--- a/scst/scripts/generate-kernel-patch
+++ b/scst/scripts/generate-kernel-patch

@@ -27,11 +27,10 @@
 source "$(dirname "$0")/kernel-functions"
 
 function usage {
-  echo "Usage: $0 [-d] [-h] [-m] [-n] [-p <dir>] [-s] [-u] <kernel version>"
+  echo "Usage: $0 [-d] [-h] [-n] [-p <dir>] [-s] [-u] <kernel version>"
   echo "where: "
   echo "        -d - enable patch specialization debugging"
   echo "        -h - show this text"
-  echo "        -m - add mpt target driver"
   echo "        -n - do not delete code disabled via preprocessor statements"
   echo "        -p - generate multiple patches instead of one big patch into"\
        "the specified directory."
@@ -114,7 +113,7 @@
   local releasevermajor="$1"
   local releaseverminor="$2"
   case "$distro" in
-      CentOS)
+      CentOS|AlmaLinux)
 	  if [ -n "$releasevermajor" ]; then
 	      ao=(
 		  -v "RHEL_MAJOR=$releasevermajor"
@@ -125,7 +124,7 @@
 	  fi
 	  ;;
       UEK)
-	  ao=(-v UEK_KABI_RENAME=1)
+	  ao=(-v UEK_KABI_RENAME=1 -v UEK_RELEASE=${releasevermajor})
 	  ;;
   esac
   local kver3
@@ -248,7 +247,7 @@
 
 # See also commit 89d9a567952b ("[SCSI] add support for per-host cmd pools";
 # v3.15).
-if kernel_version_lt "$kver" 3.15; then
+if kernel_version_lt "$kver" 3.18; then
     qla2x00t="true"
     qla2x00t_32gbit="false"
 else
@@ -489,7 +488,7 @@
 
 (
 for f in iscsi-scst/include/*h; do
-    [ -e "$f" ] || continue	
+    [ -e "$f" ] || continue
     case "${f}" in
       "iscsi-scst/include/iscsi_scst_itf_ver.h")
           ;;
@@ -578,7 +577,7 @@
   add_file "qla2x00t-32gbit/qla2x00-target/README" \
            "Documentation/scst/README.qla2x00t" \
   | process_patch "qla2x00t-doc.diff"
-    
+
 fi \
 | process_patch "qla2x00t.diff"
 
@@ -642,186 +641,3 @@
   add_file "scst_local/scst_local.c" "drivers/scst/scst_local/scst_local.c"
 ) \
 | process_patch "scst_16_local.diff"
-
-
-
-# Directory drivers/scsi/ibmvstgt
-
-{
-  ( cd ibmvstgt 2>/dev/null && ./generate-in-tree-patches "${kver}" ) &&
-  if [ -e "ibmvstgt/in-tree-patches/${kver}" ]; then
-    if [ "${multiple_patches}" = "true" ]; then
-      cat <<EOF
-[SCSI] ibmvstgt: Port from tgt to SCST
-
-The ibmvstgt and libsrp kernel modules as included in the 2.6.37 kernel are
-based on the tgt SCSI target framework. Both kernel modules need the scsi_tgt
-kernel module and the tgtd user space process in order to function
-properly. This patch modifies the ibmvstgt and libsrp kernel modules such that
-both use the SCST storage target framework instead of tgt. As a result,
-neither the scsi_tgt kernel module nor the tgtd user space process are any
-more necessary when using the ibmvstgt driver.
-
-This patch introduces one backwards-incompatible change, namely that the path
-of the ibmvstgt sysfs attributes is modified. This change is unavoidable
-because this patch dissociates ibmvstgt SRP sessions from a SCSI host
-instance.  Since the user space STGT driver ibmvio was the only user of
-these attributes, that shouldn't be an issue.
-
-Changes in ibmvstgt compared to kernel 2.6.36:
-- Increased maximum data size for a single SRP command from 128 KB to 64 MB
-  such that an initiator is not forced to split large transfers into
-  multiple SCSI commands.
-- The maximum RDMA transfer size supported by a single H_COPY_RDMA call is
-  queried at driver initialization time from the open firmware tree / larger
-  transfers than 128 KB are now supported too.
-- If DMA mapping fails while handling a READ or WRITE command, the offending
-  command is retried until the associated data has been transferred instead of
-  reporting to the ibmvscsi client that the SCSI command failed.
-- VSCSI command/response queue: one element has been reserved for management
-  datagrams since these fall outside the SRP credit mechanism. Added a compile-
-  time check whether the size of this queue is a power of two.
-- Fixed a race condition which in theory could have caused the VSCSI receive
-  queue to overflow: srp_iu_put() is now invoked before a response is sent back
-  to the initiator instead of after.
-- Moved enum iue_flags from libsrp to ibmvstgt because it is ibmvstgt-specific.
-- Removed a variable that was modified but never read from ibmvstgt_rdma().
-- ibmvstgt_probe(): changed the datatype of the variable "dma" from
-  unsigned * into const unsigned * such that a cast could be removed.
-- Fixed all compiler and sparse warnings (C=2 CF=-D__CHECK_ENDIAN__).
-
-Changes in libsrp compared to kernel 2.6.36:
-- Renamed vscsis_data_length() into srp_data_length() and exported
-  this function.
-- All error messages reported via printk() do now have prefix KERN_ERR.
-- modified srp_target_alloc() and srp_target_free() such that the
-  driver-private data reflects whether or not target data has been allocated.
-  This change was necessary to avoid that ibmvstgt_remove() triggers a
-  NULL-pointer dereference if ibmvstgt_probe() failed.
-- srp_transfer_data(): All three return statements related to DMA mapping
-  failure do now return -ENOMEM instead of 0, -EIO and -ENOMEM.
-- srp_direct_data(): Removed the ext_desc argument since not used.
-- srp_direct_data() and srp_indirect_data(): Use DMA_TO/FROM_DEVICE
-  instead of DMA_BIDIRECTIONAL for the buffers mapped for transferring data
-  via DMA.
-- struct srp_target: eliminated the information unit linked list and also the
-  V_FLYING flag since both were duplicating information managed by the SCST
-  core.
-- Fixed all compiler and sparse warnings (C=2 CF=-D__CHECK_ENDIAN__).
-
-Tests performed on a backport to kernel version 2.6.18 of this driver with a
-Linux initiator system:
-- Verified that the kernel module ibmvstgt loads and initializes successfully
-  and also that the client connects after loading.
-- Verified that all virtual disks configured in scst_vdisk were discovered by
-  the client after rescanning the SCSI bus.
-- Verified that after unloading and reloading ibmvstgt and after client
-  recovery that the initiator devices were functioning normally.
-- Verified that after a client reboot ibmvscsic reconnected with the target
-  and that the target devices were again usable.
-- Performed IO stress testing on the device.
-- Verified that SCSI task abortion works correctly.
-- Performed basic I/O performance testing. With a RAM disk as target linear
-  direct I/O throughput was above 2 GB/s and a random I/O test resulted in
-  about 30000 IOPS for all block sizes between 512 bytes and 16 KB.
-  Both initiator and target were dual core POWER6 LPAR systems.
-
-Note: ibmvstgt is the only user of libsrp.
-
-Signed-off-by: Bart Van Assche <bvanassche@acm.org>
-Cc: Fujita Tomonori <fujita.tomonori@lab.ntt.co.jp>
-Cc: Brian King <brking@linux.vnet.ibm.com>
-Cc: Robert Jennings <rcj@linux.vnet.ibm.com>
-
-EOF
-    fi
-    for f in \
-      drivers/scsi/ibmvscsi/ibmvstgt.c \
-      drivers/scsi/libsrp.c \
-      include/scsi/libsrp.h \
-      include/scsi/srp.h
-    do
-      patch="ibmvstgt/in-tree-patches/${kver}/$(basename $f).patch"
-      if [ -e "${patch}" ]; then
-        add_patch "${patch}" $f
-      fi
-    done
-    add_file "ibmvstgt/README.sysfs" "Documentation/powerpc/ibmvstgt.txt"
-  fi
-} \
-| process_patch "scst_18_ibmvstgt.diff"
-
-{
-  ( cd ibmvstgt 2>/dev/null && ./generate-in-tree-patches "${kver}" ) &&
-  if [ -e "ibmvstgt/in-tree-patches/${kver}" ]; then
-    if [ "${multiple_patches}" = "true" ]; then
-      cat <<EOF
-[SCSI] tgt: Removal
-
-Because of the conversion of the ibmvstgt driver from tgt to SCST, and because
-the ibmvstgt driver was the only user of scsi_tgt, the scsi_tgt kernel module,
-the CONFIG_SCSI_TGT, CONFIG_SCSI_SRP_TGT_ATTRS and CONFIG_SCSI_FC_TGT_ATTRS
-kbuild variable, the scsi_host_template member variables transfer_response,
-supportedmode and active_mode and the constants MODE_UNKNOWN, MODE_INITIATOR
-and MODE_TARGET are no longer needed.
-
-Note: this patch applies cleanly on a 2.6.35 kernel tree. The patch tool
-however complains about the defconfig changes when trying to apply this patch
-on a 2.6.36 kernel tree.
-
-Signed-off-by: Bart Van Assche <bvanassche@acm.org>
-
-EOF
-    fi
-    for f in \
-      arch/arm/configs/at572d940hfek_defconfig \
-      arch/arm/configs/cam60_defconfig \
-      arch/arm/configs/s3c2410_defconfig \
-      arch/m68k/configs/amiga_defconfig \
-      arch/m68k/configs/apollo_defconfig \
-      arch/m68k/configs/atari_defconfig \
-      arch/m68k/configs/bvme6000_defconfig \
-      arch/m68k/configs/hp300_defconfig \
-      arch/m68k/configs/mac_defconfig \
-      arch/m68k/configs/multi_defconfig \
-      arch/m68k/configs/mvme147_defconfig \
-      arch/m68k/configs/mvme16x_defconfig \
-      arch/m68k/configs/q40_defconfig \
-      arch/m68k/configs/sun3_defconfig \
-      arch/m68k/configs/sun3x_defconfig \
-      arch/mips/configs/bcm47xx_defconfig \
-      arch/mips/configs/decstation_defconfig \
-      arch/mips/configs/ip22_defconfig \
-      arch/mips/configs/ip27_defconfig \
-      arch/mips/configs/ip32_defconfig \
-      arch/mips/configs/jazz_defconfig \
-      arch/mips/configs/malta_defconfig \
-      arch/mips/configs/markeins_defconfig \
-      arch/mips/configs/pnx8550-jbs_defconfig \
-      arch/mips/configs/pnx8550-stb810_defconfig \
-      arch/mips/configs/rm200_defconfig \
-      arch/mips/configs/tb0226_defconfig \
-      arch/mips/configs/tb0287_defconfig \
-      arch/powerpc/configs/52xx/motionpro_defconfig \
-      arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig \
-      arch/powerpc/configs/mpc5200_defconfig \
-      drivers/scsi/Kconfig \
-      drivers/scsi/Makefile \
-      drivers/scsi/hosts.c \
-      drivers/scsi/scsi_sysfs.c \
-      drivers/scsi/scsi_tgt_if.c \
-      drivers/scsi/scsi_tgt_lib.c \
-      drivers/scsi/scsi_tgt_priv.h \
-      drivers/scsi/scsi_transport_fc.c \
-      drivers/scsi/scsi_transport_fc_internal.h \
-      drivers/scsi/scsi_transport_srp.c \
-      drivers/scsi/scsi_transport_srp_internal.h \
-      include/scsi/scsi_host.h \
-      include/scsi/scsi_tgt.h \
-      include/scsi/scsi_tgt_if.h
-    do
-      add_patch "ibmvstgt/in-tree-patches/${kver}/$(basename $f).patch" $f
-    done
-  fi
-} \
-| process_patch "scst_19_scsi_tgt.diff"

diff --git a/scst/scripts/generate-release-archive b/scst/scripts/generate-release-archive
index 632d7cc..68698ad 100755
--- a/scst/scripts/generate-release-archive
+++ b/scst/scripts/generate-release-archive

@@ -20,6 +20,9 @@
     fi
 fi
 
-tar --owner=root --group=root --transform="s|^|$name-$version/|" \
-	-cjf $name-$version.tar.bz2 $files &&
-ls -l $name-$version.tar.bz2
+result=../$name-$version.tar.bz2
+rm -f "${result}"
+for f in "${files[@]}"; do echo "$f"; done | \
+    tar --owner=root --group=root --transform="s|^|$name-$version/|;s|^$name-$version/../scst/include/backport.h$|../scst/include/backport.h|;s|^$name-$version/scstadmin.sysfs$|scstadmin.sysfs|" \
+	-cjf "${result}" -T- &&
+    ls -l "${result}"

diff --git a/scst/scripts/kernel-functions b/scst/scripts/kernel-functions
index c50668a..336baf8 100644
--- a/scst/scripts/kernel-functions
+++ b/scst/scripts/kernel-functions

@@ -119,9 +119,9 @@
 	{ extract_kernel_archive "$kver" && mv "linux-$kver" "linux-$1"; } ||
 	return $?
     fi
-    mv "linux-$1" ".." || return $?
+    mv "linux-$1" .. || return $?
     cd "../linux-$1" || return $?
-  )
+  ) || return $?
   rmdir "${tmpdir}"
 }
 
@@ -142,7 +142,7 @@
 		# versions do not support recent gcc versions. See also commit
 		# 9c695203a7dd ("compiler-gcc.h: gcc-4.5 needs noclone and
 		# noinline on __naked functions") # v2.6.35.
-		if kernel_version_le 2.6.35 "$1"; then
+		if kernel_version_le 2.6.34.2 "$1"; then
 		    patch -f -s -p1 <<'EOF'
 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
 index 02ae99e8e6d3..47e12c19c965 100644
@@ -690,7 +690,7 @@
 
 This is because both dtc-lexer as well as dtc-parser define the same
 global symbol yyloc. Before with -fcommon those were merged into one
-defintion. The proper solution would be to to mark this as "extern",
+defintion. The proper solution would be to mark this as "extern",
 however that leads to:
 
   dtc-lexer.l:26:16: error: redundant redeclaration of 'yylloc' [-Werror=redundant-decls]
@@ -726,6 +726,84 @@
  extern bool treesource_error;
  
  /* CAUTION: this will stop working if we ever use yyless() or yyunput() */
+diff --git a/scripts/dtc/dtc-lexer.lex.c_shipped b/scripts/dtc/dtc-lexer.lex.c_shipped
+index 5c6c3fd557d7..b3b7270300de 100644
+--- a/scripts/dtc/dtc-lexer.lex.c_shipped
++++ b/scripts/dtc/dtc-lexer.lex.c_shipped
+@@ -23,7 +23,6 @@ LINECOMMENT	"//".*\n
+ #include "srcpos.h"
+ #include "dtc-parser.tab.h"
+ 
+-YYLTYPE yylloc;
+ extern bool treesource_error;
+ 
+ /* CAUTION: this will stop working if we ever use yyless() or yyunput() */
+EOF
+    fi
+
+    # See also commit 52a9dab6d892 ("libsubcmd: Fix use-after-free for
+    # realloc(..., 0)") # v5.17
+    if kernel_version_le 2.6.38 "$1" && kernel_version_lt "$1" 5.17; then
+	patch -p1 -f -s <<'EOF'
+From 52a9dab6d892763b2a8334a568bd4e2c1a6fde66 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Sun, 13 Feb 2022 10:24:43 -0800
+Subject: [PATCH] libsubcmd: Fix use-after-free for realloc(..., 0)
+
+GCC 12 correctly reports a potential use-after-free condition in the
+xrealloc helper. Fix the warning by avoiding an implicit "free(ptr)"
+when size == 0:
+
+In file included from help.c:12:
+In function 'xrealloc',
+    inlined from 'add_cmdname' at help.c:24:2: subcmd-util.h:56:23: error: pointer may be used after 'realloc' [-Werror=use-after-free]
+   56 |                 ret = realloc(ptr, size);
+      |                       ^~~~~~~~~~~~~~~~~~
+subcmd-util.h:52:21: note: call to 'realloc' here
+   52 |         void *ret = realloc(ptr, size);
+      |                     ^~~~~~~~~~~~~~~~~~
+subcmd-util.h:58:31: error: pointer may be used after 'realloc' [-Werror=use-after-free]
+   58 |                         ret = realloc(ptr, 1);
+      |                               ^~~~~~~~~~~~~~~
+subcmd-util.h:52:21: note: call to 'realloc' here
+   52 |         void *ret = realloc(ptr, size);
+      |                     ^~~~~~~~~~~~~~~~~~
+
+Fixes: 2f4ce5ec1d447beb ("perf tools: Finalize subcmd independence")
+Reported-by: Valdis Klētnieks <valdis.kletnieks@vt.edu>
+Signed-off-by: Kees Kook <keescook@chromium.org>
+Tested-by: Valdis Klētnieks <valdis.kletnieks@vt.edu>
+Tested-by: Justin M. Forbes <jforbes@fedoraproject.org>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: linux-hardening@vger.kernel.org
+Cc: Valdis Klētnieks <valdis.kletnieks@vt.edu>
+Link: http://lore.kernel.org/lkml/20220213182443.4037039-1-keescook@chromium.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+---
+ tools/lib/subcmd/subcmd-util.h | 11 ++---------
+ 1 file changed, 2 insertions(+), 9 deletions(-)
+
+diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
+index 794a375dad36..b2aec04fce8f 100644
+--- a/tools/lib/subcmd/subcmd-util.h
++++ b/tools/lib/subcmd/subcmd-util.h
+@@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...)
+ static inline void *xrealloc(void *ptr, size_t size)
+ {
+ 	void *ret = realloc(ptr, size);
+-	if (!ret && !size)
+-		ret = realloc(ptr, 1);
+-	if (!ret) {
+-		ret = realloc(ptr, size);
+-		if (!ret && !size)
+-			ret = realloc(ptr, 1);
+-		if (!ret)
+-			die("Out of memory, realloc failed");
+-	}
++	if (!ret)
++		die("Out of memory, realloc failed");
+ 	return ret;
+ }
 EOF
     fi
 
@@ -754,7 +832,7 @@
 			      tr '\n' ' ')"
 	for url in "${urls[@]}"; do
 	    case "$distro" in
-		CentOS)
+		CentOS|AlmaLinux)
 		    wget -q -nc "${url}/kernel-${kver}.src.rpm" && break
 		    ;;
 		UEK)
@@ -769,19 +847,21 @@
     (
 	cd "${tmpdir}" &&
 	    case "$distro" in
-		CentOS)
+		CentOS|AlmaLinux)
 		    rpm2cpio "${kernel_downloads}/kernel-${kver}.src.rpm" |
 			cpio -i --make-directories --quiet &&
-			tar xaf "linux-${kver}.tar."* &&
-			mv "linux-${kver}" ".." &&
-			cd "../linux-${kver}"
+			tar xaf "linux-${kver}"*.tar.* &&
+			if [ -e "linux-${kver}" ]; then
+			    mv "linux-${kver}" ..
+			else
+			    mv "linux-${kver}"*[^z] "../linux-${kver}"
+			fi
 		    ;;
 		UEK)
 		    rpm2cpio "${kernel_downloads}/kernel-uek-${kver}.src.rpm" |
 			cpio -i --make-directories --quiet &&
 			tar xaf "linux-${kver/-*}.tar."* &&
-			mv "linux-${kver/-*}" "../linux-${kver}" &&
-			cd "../linux-${kver}"
+			mv "linux-${kver/-*}" "../linux-${kver}"
 		    ;;
 		*)
 		    echo "Error: unknown distro $distro"

diff --git a/scst/scripts/list-source-files b/scst/scripts/list-source-files
index 06b22a3..56e3f1c 100755
--- a/scst/scripts/list-source-files
+++ b/scst/scripts/list-source-files

@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 list_source_files() {
   local d r
@@ -24,14 +24,14 @@
       done
     )
   elif [ -e "$r/.git" ]; then
-    subdir="${d#${r}}"
+    subdir="${d#"${r}"}"
     if [ "$r" != "" ]; then
       ( cd "$d" && git ls-tree --name-only -r HEAD ) | sed "s|^$subdir/||"
     else
       echo "Ignored directory $1" >&2
     fi
   elif [ -e "$r/.hg" ]; then
-    subdir="${d#${r}}"
+    subdir="${d#"${r}"}"
     if [ -n "${subdir}" ]; then
       subdir="${subdir#/}/"
       hg manifest | sed -n "s|^$subdir||p"
@@ -41,16 +41,35 @@
   else
     (
       cd "$d" &&
-      find . -type f -o -type l | \
-      sed -e 's|^\./||' \
-	  -e '\|\.k\{0,1\}o\(\.\(cm\)\{0,1\}d\)\{0,1\}$|d' \
-	  -e '\|\.mod\(\.c\)\{0,1\}$|d' \
-	  -e '\|/conftest/.*/result-.*\.txt$|d' \
-	  -e '\|/modules\.order$|d' \
-	  -e '\,/Module\.\(symver\|marker\)s$,d' \
-	  -e '\,/\.tmp_versions\(/\|$\),d'
+      find . -type f -o -type l |
+      sed -e 's/^\.\///'				\
+	  -e '/\.depend_\(adm\|d\|f\)$/d'		\
+	  -e '/\.o$/d'					\
+	  -e '/\.o\.d$/d'				\
+	  -e '/\.o\.cmd$/d'				\
+	  -e '/\.ko$/d'					\
+	  -e '/\.ko\.cmd$/d'				\
+	  -e '/\.mod$/d'				\
+	  -e '/\.mod\.c$/d'				\
+	  -e '/\.mod\.cmd$/d'				\
+	  -e '/\/Module\.\(symver\|marker\)s$/d'	\
+	  -e '/\/\.Module\.symvers\.cmd$/d'		\
+	  -e '/\/\.modules\.order\.cmd$/d'		\
+	  -e '/\/\.tmp_versions\(\/\|$\)/d'		\
+	  -e '/\/blib\//d'				\
+	  -e '/\/conftest\/.*\/build-output-.*\.txt$/d'	\
+	  -e '/\/conftest\/.*\/result-.*\.txt$/d'	\
+	  -e '/\/modules\.order$/d'			\
+	  -e '/\/rpmbuilddir\//d'			\
+	  -e '/^iscsi-scst\/usr\/iscsi-scst-adm$/d'	\
+	  -e '/^iscsi-scst\/usr\/iscsi-scstd$/d'	\
+	  -e '/^rpmbuilddir\//d'			\
+	  -e '/^usr\/fileio\/fileio_tgt$/d'		\
+	  -e '/^usr\/stpgd\/stpgd$/d'			\
+	  -e '/debian\/tmp\//d'				\
+	  -e '/~$/d'
     )
-  fi
+  fi | sort
 }
 
 if [ $# = 0 ]; then

diff --git a/scst/scripts/rhel-rpm-functions b/scst/scripts/rhel-rpm-functions
index 3385587..7ca3ef5 100644
--- a/scst/scripts/rhel-rpm-functions
+++ b/scst/scripts/rhel-rpm-functions

@@ -35,10 +35,16 @@
 		    echo "http://ftp.redhat.com/redhat/rhel/rc/7/Server/source/tree/Packages";;
 	    esac
 	    ;;
+	"AlmaLinux")
+	    case $releasever in
+		[89]*)
+		    echo "https://repo.almalinux.org/vault/${releasever}/BaseOS/Source/Packages/";;
+	    esac
+	    ;;
 	"Oracle Linux Server"|"UEK")
 	    echo "http://public-yum.oracle.com/repo/OracleLinux/OL${releasevermajor}/${releaseverminor}/base/${arch}/getPackageSource"
 	    echo "http://public-yum.oracle.com/repo/OracleLinux/OL${releasevermajor}/latest/${arch}/getPackageSource"
-	    for ((i=4;i<=6;i++)); do
+	    for ((i=4;i<=7;i++)); do
 		echo "http://public-yum.oracle.com/repo/OracleLinux/OL${releasevermajor}/UEKR$i/${arch}/getPackageSource"
 	    done
 	    ;;
@@ -89,10 +95,16 @@
 	    ;;
 	"Red Hat Enterprise Linux"*)
 	    echo "";;
+	"AlmaLinux")
+	    case $releasever in
+		[89]*)
+		    echo "https://mirror.yandex.ru/almalinux/${releasever}/BaseOS/${arch}/os/Packages/";;
+	    esac
+	    ;;
 	"Oracle Linux Server"|"UEK")
 	    echo "http://public-yum.oracle.com/repo/OracleLinux/OL${releasevermajor}/${releaseverminor}/base/${arch}/getPackageSource"
 	    echo "http://public-yum.oracle.com/repo/OracleLinux/OL${releasevermajor}/latest/${arch}/getPackageSource"
-	    for ((i=4;i<=6;i++)); do
+	    for ((i=4;i<=7;i++)); do
 		echo "http://public-yum.oracle.com/repo/OracleLinux/OL${releasevermajor}/UEKR$i/${arch}/getPackageSource"
 	    done
 	    ;;

diff --git a/scst/scripts/run-regression-tests b/scst/scripts/run-regression-tests
index 3520f1d..d5f65d8 100755
--- a/scst/scripts/run-regression-tests
+++ b/scst/scripts/run-regression-tests

@@ -291,6 +291,7 @@
 CONFIG_IWLWIFI_DEVICE_TRACING			\
 CONFIG_IWM_TRACING				\
 CONFIG_KALLMODSYMS				\
+CONFIG_KCOV					\
 CONFIG_KVM_MMU_AUDIT				\
 CONFIG_MAC80211_DRIVER_API_TRACER		\
 CONFIG_MMIOTRACE				\
@@ -302,6 +303,7 @@
 CONFIG_SECURITY_SELINUX				\
 CONFIG_STACK_TRACER				\
 CONFIG_STACK_VALIDATION				\
+CONFIG_TARGET_CORE				\
 CONFIG_TRACEPOINTS				\
 CONFIG_TRACER_MAX_TRACE				\
 CONFIG_TRACE_BRANCH_PROFILING			\
@@ -385,7 +387,7 @@
   if (cd "${outputdir}/linux-$k" \
       && make -s modules_prepare \
       && make -s scripts \
-      && for subdir; do LC_ALL=C make -k M="${subdir}"; done
+      && for subdir; do LC_ALL=C make -j$(nproc) -k M="${subdir}"; done
      ) &> "${outputfile}"
   then
     local errors warnings
@@ -678,11 +680,13 @@
   k="${kv}"
 
   generate_kernel_patch "$k" "${generate_kernel_patch_options}" || continue
-  (
+  if ! (
       cd "${outputdir}" &&
 	  download_and_extract_kernel_tree "$k"
-  ) ||
+  ); then
+      echo "Error: download_and_extract_kernel_tree $k failed"
       continue
+  fi
   k="${k/^*}"
   if [ "${run_checkpatch}" = "true" ]; then
     run_checkpatch "$k"

diff --git a/scst/scripts/specialize-patch b/scst/scripts/specialize-patch
index ce46d06..e5f508a 100755
--- a/scst/scripts/specialize-patch
+++ b/scst/scripts/specialize-patch

@@ -31,10 +31,14 @@
 # Convert a kernel version in the x.y.z format into numeric form, just like
 # the KERNEL_VERSION() macro.
 
+function version_number(a, b, c) {
+  return a * 65536 + b * 256 + (c > 255 ? 255 : c);
+}
+
 function version_code(kver, array) {
   if (!match(kver, "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$", array))
     match(kver, "^([0-9]+)\\.([0-9]+)$", array)
-  return 65536*array[1] + 256*array[2] + array[3]
+  return version_number(array[1], array[2], array[3]);
 }
 
 
@@ -64,6 +68,11 @@
     stmnt = "+#if !defined(" arg[1] ")"
   }
 
+  # See also commit c9c9762d4d44 ("block: return the correct bvec when checking
+  # for gaps"; v5.14-rc1).
+  gsub("defined\\(bio_multiple_segments\\)",
+       "(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0))", stmnt)
+
   gsub("defined\\(REGISTER_MAD_AGENT_HAS_FLAGS_ARG\\)",
        "(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) || " \
        "RHEL_RELEASE_CODE -0 >= 6 * 256 + 9)", stmnt)
@@ -91,8 +100,12 @@
        "(LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) \\&\\& " \
        "RHEL_RELEASE_CODE -0 < 7 * 256 + 5)", stmnt)
 
+  gsub("defined\\(IB_CM_LISTEN_TAKES_THIRD_ARG\\)",
+       "(LINUX_VERSION_CODE < KERNEL_VERSION(6, 1, 0))", stmnt)
+
   gsub("IB_CLIENT_ADD_ONE_RETURNS_INT",
-       "(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0))", stmnt)
+       "(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) || " \
+       "RHEL_RELEASE_CODE -0 >= 8 * 256 + 4)", stmnt)
 
   gsub("defined\\(IB_CLIENT_REMOVE_TAKES_TWO_ARGS\\)",
        "(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) || " \
@@ -161,7 +174,8 @@
        "RHEL_RELEASE_CODE -0 < 8 * 256 + 2)", stmnt)
 
   gsub("RDMA_REJECT_HAS_FOUR_ARGS",
-       "(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0))", stmnt)
+       "(LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) || " \
+       "RHEL_RELEASE_CODE -0 >= 8 * 256 + 4)", stmnt)
 
   gsub("defined(ENABLE_NPIV)", 0, stmnt)
 
@@ -172,7 +186,7 @@
   pattern="KERNEL_VERSION\\([[:blank:]]*([0-9]+)[[:blank:]]*,[[:blank:]]*([0-9]+)[[:blank:]]*,[[:blank:]]*([0-9]+)[[:blank:]]*\\)"
   while (match(stmnt, pattern, op) != 0)
   {
-    sub(pattern, op[1] * 65536 + op[2] * 256 + op[3], stmnt)
+    sub(pattern, version_number(op[1], op[2], op[3]), stmnt)
   }
 
   gsub("defined\\(INSIDE_KERNEL_TREE\\)", "1", stmnt)
@@ -238,6 +252,8 @@
 
   gsub("defined\\(_COMPAT_LINUX_MM_H\\)", "0", stmnt)
   gsub("defined\\(UEK_KABI_RENAME\\)", UEK_KABI_RENAME ? "1" : "0", stmnt)
+  gsub("defined\\(UEK_RELEASE\\)", UEK_RELEASE ? "1" : "0", stmnt)
+  gsub("UEK_RELEASE", UEK_RELEASE ? UEK_RELEASE : "0", stmnt)
 
   if (SCST_IO_CONTEXT != "")
   {
@@ -338,6 +354,12 @@
       sub(pattern, (op[1] != 0) && (op[2] != 0), stmnt)
     }
 
+    pattern="0[[:blank:]]*&&[[:blank:]]*defined\\([A-Za-z0-9_]*\\)"
+    while (match(stmnt, pattern, op) != 0)
+    {
+      sub(pattern, 0, stmnt)
+    }
+
     pattern="^+#(if|elif)[[:blank:]]*([01])[[:blank:]]*&&[[:blank:]]*(!*[[:blank:]]*defined[[:blank:]]*\\([[:blank:]]*[A-Za-z0-9_]*[[:blank:]]*\\))$"
     while (match(stmnt, pattern, op) != 0)
     {
@@ -422,6 +444,7 @@
       || $0 ~ "IB_CLIENT_ADD_ONE_RETURNS_INT"                           \
       || $0 ~ "IB_CLIENT_REMOVE_TAKES_TWO_ARGS"				\
       || $0 ~ "IB_CM_LISTEN_TAKES_FOURTH_ARG"				\
+      || $0 ~ "IB_CM_LISTEN_TAKES_THIRD_ARG"				\
       || $0 ~ "IB_CREATE_CQ_HAS_INIT_ATTR"				\
       || $0 ~ "IB_PD_HAS_LOCAL_DMA_LKEY"				\
       || $0 ~ "IB_QUERY_GID_HAS_ATTR_ARG"				\
@@ -437,8 +460,10 @@
       || $0 ~ "RHEL_RELEASE_CODE"					\
       || $0 ~ "SOCK_RECVMSG_HAS_FOUR_ARGS"				\
       || $0 ~ "UEK_KABI_RENAME"						\
-      || $0 ~ "_COMPAT_LINUX_MM_H"					\
+      || $0 ~ "UEK_RELEASE"						\
       || $0 ~ "USE_PRE_440_WR_STRUCTURE"				\
+      || $0 ~ "_COMPAT_LINUX_MM_H"					\
+      || $0 ~ "bio_multiple_segments"                                   \
       || generating_upstream_patch_defined				\
          && $0 ~ "GENERATING_UPSTREAM_PATCH"				\
       || $0 ~ "CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION"	\

diff --git a/scst/scripts/update-version b/scst/scripts/update-version
index aa1107b..7800efb 100755
--- a/scst/scripts/update-version
+++ b/scst/scripts/update-version

@@ -17,12 +17,21 @@
 fv=$major.$minor.$release$suffix
 reldate=$(date "+%d %B %Y")
 
-sed -i "s,^\(#define[[:blank:]]*\(FT_VERSION\|ISCSI_VERSION_STRING\|Q2T_VERSION_STRING\|SCST_LOCAL_VERSION\|SCST_VERSION_NAME\|DRV_VERSION\|VERSION_STR\)[[:blank:]]*\)\"[^\"]*\",\1\"$major.$minor.$release$suffix\"," iscsi-scst/kernel/isert-scst/isert.c scst_local/scst_local.c srpt/src/ib_srpt.c fcst/fcst.h iscsi-scst/include/iscsi_scst_ver.h qla2x00t/qla2x00-target/qla2x00t.h scst/include/scst_const.h usr/include/version.h
+sed -i "s,^\(#define[[:blank:]]*\(FT_VERSION\|ISCSI_VERSION_STRING\|Q2T_VERSION_STRING\|SCST_LOCAL_VERSION\|SCST_VERSION_NAME\|DRV_VERSION\|VERSION_STR\)[[:blank:]]*\)\"[^\"]*\",\1\"$major.$minor.$release$suffix\"," \
+    iscsi-scst/kernel/isert-scst/isert.c          \
+    scst_local/scst_local.c                       \
+    srpt/src/ib_srpt.c                            \
+    fcst/fcst.h                                   \
+    iscsi-scst/include/iscsi_scst_ver.h           \
+    qla2x00t/qla2x00-target/qla2x00t.h            \
+    qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.h \
+    scst/include/scst_const.h                     \
+    usr/include/version.h
 sed -i "s,^\(<date>Version[[:blank:]]*\)[^[:blank:]]*\(</date>\),\1$fv\2," doc/scst_user_spec.sgml
 sed -i "s/^Version .*/Version $fv, $reldate/" iscsi-scst/README qla2x00t/qla2x00-target/README scst/README usr/fileio/README
 sed -i "s/^\(\(static const char \*scst_local_version_date\|#define[[:blank:]]*DRV_RELDATE\)[[:blank:]]*\)\"[^\"]*\"/\1\"$reldate\"/" iscsi-scst/kernel/isert-scst/isert.c scst_local/scst_local.c srpt/src/ib_srpt.c
 sed -i "s/^\(#define[[:blank:]]*[^[:blank:]]*_REV[[:blank:]]*\)\"[[:blank:]0-9]*\"/\1\"$(printf "%-4s" ${major}${minor}${release})\"/" usr/fileio/common.h scst/src/dev_handlers/scst_vdisk.c
-sed -i "s/^\(#define[[:blank:]]*SCST_VERSION_CODE[[:blank:]]*\).*/\1SCST_VERSION($major, $minor, $release, 0)/" scst/include/scst.h
+sed -i "s/^\(#define[[:blank:]]*SCST_VERSION_CODE[[:blank:]]*\).*/\1SCST_VERSION($major, $minor, $release, 0)/" scst/include/scst_const.h
 sed -i "s/^\(#define[[:blank:]]*Q2T_VERSION_CODE[[:blank:]]*\).*/\1Q2T_VERSION($major, $minor, $release, 0)/" \
     qla2x00t/qla2x00-target/qla2x00t.h \
     qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.h

diff --git a/scst/scst-dkms.spec.in b/scst/scst-dkms.spec.in
index 1ed6bf9..693c0da 100644
--- a/scst/scst-dkms.spec.in
+++ b/scst/scst-dkms.spec.in

@@ -46,11 +46,16 @@
 # Fedora
 %define kernel_devel_rpm kernel-devel
 %else
+%if %([ %{kernel_rpm} = kernel-uek-core ]; echo $((1-$?)))
+# UEK 7
+%define kernel_devel_rpm kernel-uek-devel
+%else
 # Other Linux distros
 %define kernel_devel_rpm %{kernel_rpm}-devel
 %endif
 %endif
 %endif
+%endif
 %{echo:kernel_devel_rpm=%{kernel_devel_rpm}
 }
 %endif
@@ -132,7 +137,7 @@
 %build
 export KVER=%{kversion} PREFIX=%{_prefix}
 export BUILD_2X_MODULE=y CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y
-for d in scst fcst iscsi-scst qla2x00t/qla2x00-target scst_local srpt; do
+for d in scst fcst iscsi-scst qla2x00t-32gbit/qla2x00-target scst_local srpt; do
     %{make} -C $d
 done
 
@@ -142,7 +147,7 @@
 for d in scst; do
     DESTDIR=%{buildroot} %{make} -C $d install
 done
-for d in fcst iscsi-scst qla2x00t/qla2x00-target scst_local srpt; do
+for d in fcst iscsi-scst qla2x00t-32gbit/qla2x00-target scst_local srpt; do
     DESTDIR=%{buildroot} INSTALL_MOD_PATH=%{buildroot} %{make} -C $d install
 done
 rm -f %{buildroot}/lib/modules/%{kversion}/[Mm]odule*
@@ -157,7 +162,7 @@
 PACKAGE_VERSION="%{dkms_version}"
 PACKAGE_NAME="%{kmod_name}"
 AUTOINSTALL=yes
-MAKE[0]="export KVER=${kernelver} KDIR=${kernel_source_dir} BUILD_2X_MODULE=y CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y && make 2release && make -sC scst && make -sC fcst && make -sC iscsi-scst && make -sC qla2x00t/qla2x00-target && make -sC scst_local && make -sC srpt && cp */*.ko */*/*.ko *scst*/*/*/*.ko ."
+MAKE[0]="export KVER=${kernelver} KDIR=${kernel_source_dir} BUILD_2X_MODULE=y CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y && make 2release && make -sC scst && make -sC fcst && make -sC iscsi-scst && make -sC qla2x00t-32gbit/qla2x00-target && make -sC scst_local && make -sC srpt && cp */*.ko */*/*.ko *scst*/*/*/*.ko ."
 CLEAN="make clean"
 # Remove any existing ib_srpt.ko kernel modules
 PRE_INSTALL="find /lib/modules/${kernelver} -name ib_srpt.ko -exec rm {} \;"

diff --git a/scst/scst.spec.in b/scst/scst.spec.in
index c1a88de..01d863a 100644
--- a/scst/scst.spec.in
+++ b/scst/scst.spec.in

@@ -51,11 +51,16 @@
 # Fedora
 %define kernel_devel_rpm kernel-devel
 %else
+%if %([ %{kernel_rpm} = kernel-uek-core ]; echo $((1-$?)))
+# UEK 7
+%define kernel_devel_rpm kernel-uek-devel
+%else
 # Other Linux distros
 %define kernel_devel_rpm %{kernel_rpm}-devel
 %endif
 %endif
 %endif
+%endif
 %{echo:kernel_devel_rpm=%{kernel_devel_rpm}
 }
 %endif
@@ -67,7 +72,7 @@
 
 Name:		%{kmod_name}-%{kversion}
 Version:	%{rpm_version}
-Release:	%{rpm_release}%{?dist}
+Release:	%{pkgrel}%{?dist}
 Summary:	SCST mid-layer kernel drivers
 Group:		System/Kernel
 License:	GPLv2
@@ -125,7 +130,7 @@
 export PREFIX=%{_prefix}
 export BUILD_2X_MODULE=y CONFIG_SCSI_QLA_FC=y CONFIG_SCSI_QLA2XXX_TARGET=y
 make 2release
-for d in scst iscsi-scst scst_local; do
+for d in scst fcst iscsi-scst qla2x00t-32gbit/qla2x00-target scst_local srpt usr; do
     %{make} -C $d
 done
 
@@ -137,7 +142,7 @@
 for d in scst; do
     DESTDIR=%{buildroot} %{make} -C $d install
 done
-for d in iscsi-scst scst_local; do
+for d in fcst iscsi-scst qla2x00t-32gbit/qla2x00-target scst_local srpt usr; do
     DESTDIR=%{buildroot} INSTALL_MOD_PATH=%{buildroot} %{make} -C $d install
 done
 # Set the executable bit such that /usr/lib/rpm/find-debuginfo.sh can find the
@@ -163,10 +168,13 @@
 @depmod@ -a %{kversion}
 
 %files
-%defattr(-,root,root,0755)
-%dir /lib/modules/%{kversion}/extra
+%defattr(0644,root,root,0755)
+/lib/modules/%{kversion}/extra/fcst.ko
+/lib/modules/%{kversion}/extra/ib_srpt.ko
 /lib/modules/%{kversion}/extra/iscsi-scst.ko
 /lib/modules/%{kversion}/extra/isert-scst.ko
+/lib/modules/%{kversion}/extra/qla2x00tgt.ko
+/lib/modules/%{kversion}/extra/qla2xxx_scst.ko
 /lib/modules/%{kversion}/extra/scst.ko
 /lib/modules/%{kversion}/extra/scst_local.ko
 %dir /lib/modules/%{kversion}/extra/dev_handlers
@@ -188,6 +196,9 @@
 %dir /var/lib/scst/dif_tags
 %dir /var/lib/scst/pr
 %dir /var/lib/scst/vdev_mode_pages
+/usr/bin/scst/fileio_tgt
+/usr/bin/scst/scst_on_stpg
+/usr/sbin/stpgd
 
 %files devel
 %defattr(-,root,root,0755)
@@ -203,8 +214,6 @@
 /usr/include/scst/scst_user.h
 
 %changelog
-* Sat Apr 4 2020 Jim McCarthy <jim.mccarthy@actifio.com>
-- Changed list of build artifacts to match Actifio deliverables.
 * Sun Mar  8 2020 Bart Van Assche <bvanassche@acm.org>
 - Added support for the CentOSPlus kernel.
 * Tue Oct  8 2019 Bart Van Assche <bvanassche@acm.org>

diff --git a/scst/scst/ChangeLog b/scst/scst/ChangeLog
index 049ea3c..ed7f614 100644
--- a/scst/scst/ChangeLog
+++ b/scst/scst/ChangeLog

@@ -1,3 +1,73 @@
+Summary of changes between versions 3.6 and 3.7
+-----------------------------------------------
+- The SCST event subsystem works again reliably.
+- Fixed a race condition when replacing a LUN under load.
+- Fixed handling of INQUIRY/SENSE commands that comes with buffer size 0.
+- The behavior of the on_alua_state_change_*() callback functions has been
+  fixed such that these are also invoked for devices that are not in any
+  target.
+- Error handling for iscsi-scst has been improved such that data sending
+  failure no longer crashes the system.
+- An improvement has been added to iscsi-scstd to allow multiple addresses to
+  be specified for the server to listen on.
+- Fixed a hang when unregistering a SCST device due to incorrect device
+  reference counter management in copy manager.
+- Fixed copy manager device update (for auto_cm_assignment=1) such that it no
+  longer corrupts the designator list.
+- Support for scst_tgt_template detect() method has been dropped. This method
+  was declared obsolete in 2015.
+- The scst_user device handler has been made compatible with the
+  qla2x00t-32gbit driver.
+- qla2x00t-32gbit driver: NPIV support has been improved.
+- qla2x00t-32gbit driver: Target mode usage has been simplified by changing
+  the default qlini_mode to exclusive.
+- qla2x00t-32gbit driver: Updated from Linux kernel version v5.15 to v6.1.
+
+The kernel versions supported by this release are:
+* Kernel.org kernel versions v3.10..v6.1.
+* Debian / Ubuntu kernels based on upstream kernel versions v3.10..v6.1.
+* RHEL / CentOS / AlmaLinux 7.x, 8.0..8.7 and 9.0..9.1 kernels.
+* UEK version 4, 5, 6 and 7 kernels.
+
+Summary of changes between versions 3.5 and 3.6
+-----------------------------------------------
+- Made the command processing path slightly faster by removing two atomic
+  instructions from the command processing path.
+- Added support for the READ and WRITE DYN RUNTIME ATTR commands and also
+  for SERVICE ACTION IN(12).
+- An infinite loop in the code that sets CHECK CONDITION has been fixed.
+- A deadlock has been fixed in the code for assigning a device handler to a
+  vdisk.
+- Support for adding a vdisk_blockio device with a non-existent filename has
+  been restored.
+- The async mode of vdisk_fileio has been made compatible with filesystems that
+  use the iomap code, e.g. XFS.
+- A "INFO: rcu_sched self-detected stall" issue has been fixed.
+- Support in the copy manager for auto_cm_assignment=0 has been fixed.
+- Standards-compliance of the copy manager has been improved. Designators with
+  a length above 20 bytes are now rejected instead of being accepted.
+- The copy manager no longer suspends activity when adding a LUN.
+- A bug has been fixed in the vdisk resync_size functionality.
+- The tape device handler now sets 'block_shift' correctly.
+- Improved the code for building a Debian package (dpkg).
+- scst_local: the SCSI host number is now available in sysfs.
+- scst-isert: this driver has been made compatible with
+  CONFIG_HARDENED_USERCOPY.
+- scst-isert: support for RDMA_CV_EVENT_ADDR_CHANGE has been added.
+- scst-isert: a hang in iscsi_release() has been fixed.
+- The top-level Makefile has been modified such that the qla2x00t-32gbit driver
+  is built by default instead of the qla2x00t driver. The qla2x00t driver can
+  be selected by passing QLA_32GBIT=no as argument to make.
+- qla2x00t-32gbit driver: updated from Linux kernel version v5.10 to v5.15.
+- scstadmin: the -force option is now passed to removeGroup().
+- scstadmin: improved performance of the scstadmin function make_path().
+
+The kernel versions supported by this release are:
+* Kernel.org kernel versions v3.10..v5.15.
+* Debian / Ubuntu kernels based on upstream kernel versions v3.10..v5.15.
+* RHEL / CentOS 7.x and 8.0..8.5 kernels.
+* UEK version 6, 7 and 8 kernels.
+
 Summary of changes between versions 3.4 and 3.5
 -----------------------------------------------
 - Added the forward_src and forward_dst sysfs attributes. Removed

diff --git a/scst/scst/README b/scst/scst/README
index a28e844..cd64387 100644
--- a/scst/scst/README
+++ b/scst/scst/README

@@ -1,7 +1,7 @@
 Generic SCSI target mid-level for Linux (SCST)
 ==============================================
 
-Version 3.5.0, 21 December 2020
+Version 3.7.0, 26 December 2022
 ----------------------------
 
 SCST is designed to provide unified, consistent interface between SCSI
@@ -1196,7 +1196,7 @@
  - rotational - if set, this device reported as rotational. Otherwise,
    it is reported as non-rotational (SSD, etc.)
 
- - zero_copy - ignored. For zero-copy I/O, set the async flag and
+ - zero_copy - obsolete. For zero-copy I/O, set the async flag and
    possibly also the o_direct flag and use Linux kernel v4.10 or later.
 
  - dif_mode - specifies which T10-PI, or DIF, mode this device will use.

diff --git a/scst/scst/include/backport.h b/scst/scst/include/backport.h
index c58d43b..343c131 100644
--- a/scst/scst/include/backport.h
+++ b/scst/scst/include/backport.h

@@ -30,15 +30,11 @@
 	(defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 8)
 #include <linux/blk-mq.h>
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
 #include <linux/bsg-lib.h>	/* struct bsg_job */
-#endif
 #include <linux/dmapool.h>
 #include <linux/eventpoll.h>
 #include <linux/iocontext.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
 #include <linux/kobject_ns.h>
-#endif
 #include <linux/scatterlist.h>	/* struct scatterlist */
 #include <linux/slab.h>		/* kmalloc() */
 #include <linux/stddef.h>	/* sizeof_field() */
@@ -46,13 +42,12 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/writeback.h>	/* sync_page_range() */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
 #include <net/net_namespace.h>  /* init_net */
-#endif
 #include <rdma/ib_verbs.h>
 #include <scsi/scsi_cmnd.h>	/* struct scsi_cmnd */
+#include <scsi/scsi_eh.h>	/* scsi_build_sense_buffer() */
 struct scsi_target;
-#include <scsi/scsi_transport_fc.h> /* struct fc_bsg_job */
+#include <scsi/scsi_transport_fc.h> /* struct bsg_job */
 #include <asm/unaligned.h>	/* get_unaligned_be64() */
 
 /* <asm-generic/barrier.h> */
@@ -64,26 +59,6 @@
 #define smp_mb__after_atomic_dec smp_mb__after_atomic
 #endif
 
-/* <asm-generic/bug.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) && !defined(WARN)
-/* See also commit a8f18b909c0a3f22630846207035c8b84bb252b8 */
-#define WARN(condition, format...) do {		\
-	if (unlikely(condition)) {		\
-		printk(KERN_WARNING format);	\
-		WARN_ON(true);			\
-	}					\
-} while (0)
-#endif
-
-/* <asm-generic/fcntl.h> */
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
-#ifndef O_DSYNC
-#define O_DSYNC O_SYNC
-#endif
-#endif
-
 /* <asm/msr.h> */
 
 #ifdef CONFIG_X86
@@ -102,6 +77,15 @@
 #define tsc_khz 1000
 #endif
 
+/* <linux/err.h> */
+
+/*
+ * See also commit 6e8b8726ad50 ("PTR_RET is now PTR_ERR_OR_ZERO") # v3.12
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(RHEL_RELEASE_CODE)
+#define PTR_ERR_OR_ZERO(p) PTR_RET(p)
+#endif
+
 /* <linux/bio.h> */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) &&	\
@@ -126,17 +110,78 @@
 }
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
+/*
+ * See also commit a8affc03a9b3 ("block: rename BIO_MAX_PAGES to BIO_MAX_VECS")
+ * # v5.12.
+ */
+#define BIO_MAX_VECS BIO_MAX_PAGES
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+/*
+ * See also commit 609be1066731 ("block: pass a block_device and opf to
+ * bio_alloc_bioset") # v5.18
+ */
+static inline
+struct bio *bio_alloc_bioset_backport(struct block_device *bdev,
+		unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask,
+		struct bio_set *bs)
+{
+	/*
+	 * Check that @bdev and @opf parameters are zeros.
+	 *
+	 * The old API expects these parameters to be set implicitly.
+	 * Therefore, warn about using an explicit setting that would
+	 * cause these parameters to be lost.
+	 */
+	WARN_ON_ONCE(bdev || opf);
+
+	return bio_alloc_bioset(gfp_mask, nr_vecs, bs);
+}
+
+#define bio_alloc_bioset bio_alloc_bioset_backport
+
+/*
+ * See also commit 07888c665b40 ("block: pass a block_device and opf to
+ * bio_alloc") # v5.18
+ */
+static inline
+struct bio *bio_alloc_backport(struct block_device *bdev,
+		unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask)
+{
+	/*
+	 * Check that @bdev and @opf parameters are zeros.
+	 *
+	 * The old API expects these parameters to be set implicitly.
+	 * Therefore, warn about using an explicit setting that would
+	 * cause these parameters to be lost.
+	 */
+	WARN_ON_ONCE(bdev || opf);
+
+	return bio_alloc(gfp_mask, nr_vecs);
+}
+
+#define bio_alloc bio_alloc_backport
+
+#endif
+
+/* <linux/blk_types.h> */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0)
+enum {
+	REQ_OP_SCSI_IN	= REQ_OP_DRV_IN,
+	REQ_OP_SCSI_OUT	= REQ_OP_DRV_OUT,
+};
+#endif
+
 /* <linux/blk-mq.h> */
 
 static inline unsigned int scst_blk_rq_cpu(struct request *rq)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	/*
-	 * See also commit c7c22e4d5c1f ("block: add support for IO CPU
-	 * affinity") # v2.6.28.
-	 */
-	return 0;
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 21, 0) &&	\
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 21, 0) &&	\
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8)
 	return rq->cpu;
 #else
@@ -144,52 +189,54 @@
 #endif
 }
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 19, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+/*
+ * See also commit e2e530867245 ("blk-mq: remove the done argument to
+ * blk_execute_rq_nowait") # v5.19.
+ */
+static inline
+void blk_execute_rq_nowait_backport(struct request *rq, bool at_head)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
+	/*
+	 * See also commit 8eeed0b554b9 ("block: remove unnecessary argument from
+	 * blk_execute_rq_nowait") # v5.12.
+	 */
+	blk_execute_rq_nowait(rq->q, NULL, rq, at_head, rq->end_io);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
+	/*
+	 * See also commit b84ba30b6c7a ("block: remove the gendisk argument to
+	 * blk_execute_rq") # v5.17.
+	 */
+	blk_execute_rq_nowait(NULL, rq, at_head, rq->end_io);
+#else
+	blk_execute_rq_nowait(rq, at_head, rq->end_io);
+#endif
+}
+
+#define blk_execute_rq_nowait blk_execute_rq_nowait_backport
+#endif
+
 /* <linux/blkdev.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
-static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
-{
-	return q->max_hw_sectors;
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
-/* See also commit ac481c20ef8f ("block: Topology ioctls") # v2.6.32 */
-static inline int bdev_io_opt(struct block_device *bdev)
-{
-	return 0;
-}
-#endif
-
-/* <linux/bsg-lib.h> */
-
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 19, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
 /*
- * Note: the function bsg_job_sense() exists only in SCST but not in any
- * upstream kernel.
+ * See also commit 44abff2c0b97 ("block: decouple REQ_OP_SECURE_ERASE
+ * from REQ_OP_DISCARD") # v5.19.
  */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) &&	\
-     !defined(CONFIG_SUSE_KERNEL)) ||			\
-    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) &&	\
-     defined(CONFIG_SUSE_KERNEL))
-static inline void *bsg_job_sense(struct fc_bsg_job *job)
+static inline
+int blkdev_issue_discard_backport(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask)
 {
-	return job->req->sense;
+	return blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0);
 }
-#else
-static inline void *bsg_job_sense(struct bsg_job *job)
-{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
-	return job->req->sense;
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) &&	\
-	!defined(CONFIG_SUSE_KERNEL)
-	return scsi_req(job->req)->sense;
-#else
-	return scsi_req(blk_mq_rq_from_pdu(job))->sense;
+
+#define blkdev_issue_discard blkdev_issue_discard_backport
 #endif
-}
-#endif
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31) */
 
 /* <linux/byteorder/generic.h> */
 /*
@@ -207,25 +254,18 @@
 	for (i = 0; i < len; i++)
 		dst[i] = cpu_to_be32(src[i]);
 }
+
+static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		dst[i] = be32_to_cpu(src[i]);
+}
 #endif
 
 /* <linux/compiler.h> */
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 20)
-#ifndef __printf
-#define __printf(a, b) __attribute__((format(printf, a, b)))
-#endif
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-#ifndef __aligned
-#define __aligned(x) __attribute__((aligned(x)))
-#endif
-#ifndef __packed
-#define __packed __attribute__((packed))
-#endif
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(READ_ONCE)
 /*
  * See also patch "kernel: Provide READ_ONCE and ASSIGN_ONCE" (commit ID
@@ -233,18 +273,6 @@
  */
 #define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-#define ACCESS_ONCE(x) READ_ONCE(x)
-#endif
-
-#endif
-
-/*
- * See also commit e0fdb0e050ea ("percpu: add __percpu for sparse.")
- * # v2.6.34.
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && !defined(__percpu)
-#define __percpu
 #endif
 
 /* <linux/compiler_attributes.h> */
@@ -265,119 +293,37 @@
 #endif
 #endif
 
-/* <linux/cpumask.h> */
+/* <linux/debugfs.h> */
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 20) && !defined(BACKPORT_LINUX_CPUMASK_H)
-#define nr_cpu_ids NR_CPUS
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) && defined(__LINUX_CPUMASK_H)
 /*
- * See also patch "cpumask: introduce new API, without changing anything"
- * (commit ID 2d3854a37e8b).
+ * See also commit c64688081490 ("debugfs: add support for self-protecting
+ * attribute file fops") # v4.7.
  */
-typedef cpumask_t cpumask_var_t[1];
-#define cpumask_bits(maskp) ((maskp)->bits)
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/*
- * Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
- * not all bits may be allocated.
- */
-#define nr_cpumask_bits nr_cpu_ids
-#else
-#define nr_cpumask_bits NR_CPUS
-#endif
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
-void free_cpumask_var(cpumask_var_t mask);
-#else
-static inline void free_cpumask_var(cpumask_var_t mask)
-{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
+#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt)		\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return simple_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static const struct file_operations __fops = {				\
+	.owner	 = THIS_MODULE,						\
+	.open	 = __fops ## _open,					\
+	.release = simple_attr_release,					\
+	.read	 = debugfs_attr_read,					\
+	.write	 = debugfs_attr_write,					\
+	.llseek  = no_llseek,						\
 }
 
-static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static inline ssize_t debugfs_attr_read(struct file *file, char __user *buf,
+					size_t len, loff_t *ppos)
 {
-	return true;
+	return -ENOENT;
 }
-#endif
-
-/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static inline ssize_t debugfs_attr_write(struct file *file,
+		const char __user *buf, size_t len, loff_t *ppos)
 {
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	WARN_ON_ONCE(cpu >= nr_cpumask_bits);
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
-	return cpu;
-}
-
-/**
- * cpumask_next - get the next cpu in a cpumask
- * @n: the cpu prior to the place to search (ie. return will be > @n)
- * @srcp: the cpumask pointer
- *
- * Returns >= nr_cpu_ids if no further cpus set.
- */
-static inline unsigned int cpumask_next(int n, const cpumask_t *srcp)
-{
-	/* -1 is a legal arg here. */
-	if (n != -1)
-		cpumask_check(n);
-	return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
-}
-
-/**
- * for_each_cpu - iterate over every cpu in a mask
- * @cpu: the (optionally unsigned) integer iterator
- * @mask: the cpumask pointer
- *
- * After the loop, cpu is >= nr_cpu_ids.
- */
-#define for_each_cpu(cpu, mask)                         \
-	for ((cpu) = -1;                                \
-		(cpu) = cpumask_next((cpu), (mask)),    \
-		(cpu) < nr_cpu_ids;)
-
-/**
- * cpumask_set_cpu - set a cpu in a cpumask
- * @cpu: cpu number (< nr_cpu_ids)
- * @dstp: the cpumask pointer
- */
-static inline void cpumask_set_cpu(unsigned int cpu, cpumask_t *dstp)
-{
-	set_bit(cpu, cpumask_bits(dstp));
-}
-
-/**
- * cpumask_copy - *dstp = *srcp
- * @dstp: the result
- * @srcp: the input cpumask
- */
-static inline void cpumask_copy(cpumask_t *dstp,
-				const cpumask_t *srcp)
-{
-	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits);
-}
-
-/**
- * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
- * @dstp: the cpumask pointer
- */
-static inline void cpumask_setall(cpumask_t *dstp)
-{
-	bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
-}
-
-/**
- * cpumask_equal - *src1p == *src2p
- * @src1p: the first input
- * @src2p: the second input
- */
-static inline bool cpumask_equal(const cpumask_t *src1p,
-				 const cpumask_t *src2p)
-{
-	return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
-			    nr_cpumask_bits);
+	return -ENOENT;
 }
 #endif
 
@@ -394,13 +340,6 @@
 
 /* <linux/dlm.h> */
 
-/* See also commit 0f8e0d9a317406612700426fad3efab0b7bbc467 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-enum {
-	DLM_LSFL_NEWEXCL = 0
-};
-#endif
-
 /* <linux/dmapool.h> */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && \
@@ -467,34 +406,6 @@
 
 /* <linux/fs.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) && \
-	!defined(CONFIG_COMPAT_KERNEL_3_12)
-/*
- * See also patch "new helper: file_inode(file)" (commit ID
- * 496ad9aa8ef448058e36ca7a787c61f2e63f0f54). See also patch
- * "kill f_dentry macro" (commit ID 78d28e651f97).
- */
-static inline struct inode *file_inode(const struct file *f)
-{
-	return f->f_dentry->d_inode;
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-static inline int vfs_fsync_backport(struct file *file, int datasync)
-{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-	struct inode *inode = file_inode(file);
-
-	return sync_page_range(inode, file->f_mapping, 0, i_size_read(inode));
-#else
-	return vfs_fsync(file, file->f_path.dentry, datasync);
-#endif
-}
-
-#define vfs_fsync vfs_fsync_backport
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
 /* See also commit dde0c2e79848 ("fs: add IOCB_SYNC and IOCB_DSYNC") */
 #define IOCB_DSYNC 0
@@ -539,7 +450,6 @@
 #define kernel_read(file, buf, count, pos)			\
 	kernel_read_backport((file), (buf), (count), (pos))
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) || defined(RHEL_MAJOR)
 /*
  * See also commit 7bb307e894d5 ("export kernel_write(), convert open-coded
  * instances") # v3.9.
@@ -561,30 +471,11 @@
 }
 
 #define kernel_write kernel_write_backport
-#else
-ssize_t kernel_write(struct file *file, const void *buf, size_t count,
-		     loff_t *pos);
-#endif
-#endif
-
-/* <linux/interrupt.h> */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && !defined(RHEL_MAJOR)
-/*
- * See also commit cd7eab44e994 ("genirq: Add IRQ affinity notifiers";
- * v2.6.39).
- */
-struct irq_affinity_notify;
-static inline int
-irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
-{
-	return 0;
-}
 #endif
 
 /* <linux/iocontext.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25) ||	  \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 21, 0) || \
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 21, 0) || \
 	(defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 8)
 
 static inline struct io_context *
@@ -606,146 +497,6 @@
 #define put_io_context scst_put_io_context
 #define ioc_task_link scst_ioc_task_link
 
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) && \
-	LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
-static inline struct io_context *get_task_io_context(struct task_struct *task,
-						     gfp_t gfp_flags, int node)
-{
-	WARN_ON_ONCE(task != current);
-	return get_io_context(gfp_flags, node);
-}
-#endif
-
-/* <linux/kconfig.h> */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) && !defined(RHEL_MAJOR)
-/*
- * See also commit 2a11c8ea20bf ("kconfig: Introduce IS_ENABLED(), IS_BUILTIN()
- * and IS_MODULE()") # v3.1.
- */
-#define __ARG_PLACEHOLDER_1 0,
-#define __take_second_arg(__ignored, val, ...) val
-#define __or(x, y)			___or(x, y)
-#define ___or(x, y)			____or(__ARG_PLACEHOLDER_##x, y)
-#define ____or(arg1_or_junk, y)		__take_second_arg(arg1_or_junk 1, y)
-#define __is_defined(x)			___is_defined(x)
-#define ___is_defined(val)		____is_defined(__ARG_PLACEHOLDER_##val)
-#define ____is_defined(arg1_or_junk)	__take_second_arg(arg1_or_junk 1, 0)
-#define IS_BUILTIN(option) __is_defined(option)
-#define IS_MODULE(option) __is_defined(option##_MODULE)
-#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
-#endif
-
-/* <linux/kernel.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
-#ifndef RHEL_RELEASE_CODE
-typedef _Bool bool;
-#endif
-#define true  1
-#define false 0
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-#ifndef swap
-#define swap(a, b) \
-	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-#endif
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) &&	\
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6 ||	\
-	 RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 < 1)
-extern int hex_to_bin(char ch);
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && !defined(RHEL_MAJOR)
-/* See also commit 9b3be9f99203 ("Move round_up/down to kernel.h") # v2.6.34 */
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-/**
- * round_up - round up to next specified power of 2
- * @x: the value to round
- * @y: multiple to round up to (must be a power of 2)
- *
- * Rounds @x up to next multiple of @y (which must be a power of 2).
- * To perform arbitrary rounding up, use roundup() below.
- */
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-/**
- * round_down - round down to next specified power of 2
- * @x: the value to round
- * @y: multiple to round down to (must be a power of 2)
- *
- * Rounds @x down to next multiple of @y (which must be a power of 2).
- * To perform arbitrary rounding down, use rounddown() below.
- */
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38)
-/*
- * See also "lib: hex2bin converts ascii hexadecimal string to binary" (commit
- * dc88e46029486ed475c71fe1bb696d39511ac8fe).
- */
-static inline void hex2bin(u8 *dst, const char *src, size_t count)
-{
-	while (count--) {
-		*dst = hex_to_bin(*src++) << 4;
-		*dst += hex_to_bin(*src++);
-		dst++;
-	}
-}
-#endif
-
-/*
- * See also commit 33ee3b2e2eb9. That commit was introduced in kernel v2.6.39
- * and later backported to kernel v2.6.38.4.
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) &&		\
-	LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 38) &&	\
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
-static inline int __must_check kstrtoull(const char *s, unsigned int base,
-					 unsigned long long *res)
-{
-	return strict_strtoull(s, base, res);
-}
-
-static inline int __must_check kstrtoll(const char *s, unsigned int base,
-					long long *res)
-{
-	return strict_strtoll(s, base, res);
-}
-
-static inline int __must_check kstrtoul(const char *s, unsigned int base,
-					unsigned long *res)
-{
-	return strict_strtoul(s, base, res);
-}
-
-static inline int __must_check kstrtol(const char *s, unsigned int base,
-				       long *res)
-{
-	return strict_strtol(s, base, res);
-}
-
-static inline int __must_check kstrtoint(const char *s, unsigned int base,
-					 int *result)
-{
-	long val;
-	int ret = strict_strtol(s, base, &val);
-
-	if (ret)
-		return ret;
-	*result = val;
-	if (*result != val)
-		return -EINVAL;
-	return 0;
-}
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)
@@ -823,31 +574,11 @@
 #define get_user_pages get_user_pages_backport
 #endif
 
-/* <linux/kmod.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23)
-enum umh_wait {
-	UMH_NO_WAIT = -1,       /* don't wait at all */
-	UMH_WAIT_EXEC = 0,      /* wait for the exec, but not the process */
-	UMH_WAIT_PROC = 1,      /* wait for the process to complete */
-};
-#endif
-
 /* <linux/kobject_ns.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-/*
- * See also commit 608b4b9548de ("netns: Teach network device kobjects which
- * namespace they are in.") # v2.6.35.
- */
-enum kobj_ns_type {
-	KOBJ_NS_TYPE_NET = 1,
-};
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) &&		      \
-	LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) &&	      \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) && 		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(7, 6))
 /*
  * See also commit 5f256becd868 ("[NET]: Basic network namespace
  * infrastructure."; v2.6.24). a685e08987d1 ("Delay struct net freeing while
@@ -871,46 +602,14 @@
 
 /* <linux/kref.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) &&		      \
-	!(LINUX_VERSION_CODE >> 8 == KERNEL_VERSION(3, 4, 0) >> 8 &&  \
-	  LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 41)) &&	      \
-	!(LINUX_VERSION_CODE >> 8 == KERNEL_VERSION(3, 2, 0) >> 8 &&  \
-	  LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 44)) &&	      \
-	(!defined(CONFIG_SUSE_KERNEL) ||			      \
-	 LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 101)) &&	      \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6 ||		      \
-	 (RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 < 6))
-/*
- * See also commit 4b20db3 (kref: Implement kref_get_unless_zero v3 -- v3.8).
- * See also commit e3a5505 in branch stable/linux-3.4.y (v3.4.41).
- * See also commit 3fa8ee5 in branch stable/linux-3.2.y (v3.2.44).
- * See also commit 6b9508d in the SuSE kernel tree.
- */
-static inline int __must_check kref_get_unless_zero(struct kref *kref)
-{
-	return atomic_add_unless(&kref->refcount, 1, 0);
-}
-#endif
-
 /* See also commit 2c935bc57221 */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 #define kref_read(kref) (atomic_read(&(kref)->refcount))
 #endif
 
-/* <linux/kthread.h> */
-
-/* See also commit 207205a2ba26 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6 || \
-	 RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 < 9)
-#define kthread_create_on_node(threadfn, data, node, namefmt, arg...)\
-	kthread_create((threadfn), (data), (namefmt), ##arg)
-#endif
-
 /* <linux/ktime.h> */
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) &&		\
-	LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)) &&	\
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)) &&	\
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7)
 /**
  * ktime_before - Compare if a ktime_t value is smaller than another one.
@@ -943,10 +642,6 @@
 
 /* <linux/lockdep.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
-#define lockdep_assert_held(l) (void)(l)
-#endif
-
 /*
  * See also commit 108c14858b9e ("locking/lockdep: Add support for dynamic
  * keys").
@@ -1094,6 +789,30 @@
 }
 #endif
 
+/* <linux/shrinker.h> */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 0, 0)
+/*
+ * See also commit e33c267ab70d ("mm: shrinkers: provide shrinkers with
+ * names") # v6.0.
+ */
+static inline
+int register_shrinker_backport(struct shrinker *shrinker, const char *fmt, ...)
+{
+/*
+ * See also commit 1d3d4437eae1 ("vmscan: per-node deferred work") # v3.12
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
+	return register_shrinker(shrinker);
+#else
+	register_shrinker(shrinker);
+	return 0;
+#endif
+}
+
+#define register_shrinker register_shrinker_backport
+#endif
+
 /* <linux/module.h> */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)
 #define MODULE_IMPORT_NS(ns)
@@ -1140,30 +859,6 @@
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 #endif
 
-/* <linux/pci.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) && !defined(RHEL_MAJOR)
-/*
- * See also commit 8c0d3a02c130 ("PCI: Add accessors for PCI Express
- * Capability") # v3.7.
- */
-static inline int pcie_capability_read_word(struct pci_dev *dev, int pos,
-					    u16 *val)
-{
-	WARN_ON_ONCE(true);
-	*val = 0;
-	return -EOPNOTSUPP;
-}
-
-static inline int pcie_capability_read_dword(struct pci_dev *dev, int pos,
-					     u32 *val)
-{
-	WARN_ON_ONCE(true);
-	*val = 0;
-	return -EOPNOTSUPP;
-}
-#endif
-
 /* <linux/percpu-refcount.h> */
 
 #if defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 7 ||	\
@@ -1171,7 +866,9 @@
 #include <linux/percpu-refcount.h>
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) &&	\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(8, 3))
 /*
  * See also commit 09ed79d6d75f ("percpu_ref: introduce PERCPU_REF_ALLOW_REINIT
  * flag") # v5.3.
@@ -1185,7 +882,7 @@
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) &&	\
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7)
-typedef unsigned percpu_count_t;
+typedef unsigned int percpu_count_t;
 #define READ_REF_COUNT(ref) atomic_read(&(ref)->count)
 #else
 typedef unsigned long percpu_count_t;
@@ -1435,144 +1132,8 @@
 	return READ_REF_COUNT(ref) - !percpu_ref_is_dying(ref);
 }
 
-/* <linux/preempt.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-/*
- * See also patch "sched: Fix softirq time accounting" (commit ID
- * 75e1056f5c57050415b64cb761a3acc35d91f013).
- */
-#ifndef in_serving_softirq
-#define in_serving_softirq() in_softirq()
-#endif
-#endif
-
-/* <linux/printk.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) && !defined(RHEL_MAJOR)
-#define KERN_CONT       ""
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-/*
- * See also the following commits:
- * d091c2f5 - Introduction of pr_info() etc. in <linux/kernel.h>.
- * 311d0761 - Introduction of pr_cont() in <linux/kernel.h>.
- * 968ab183 - Moved pr_info() etc. from <linux/kernel.h> to <linux/printk.h>
- */
-#ifndef pr_emerg
-
-#ifndef pr_fmt
-#define pr_fmt(fmt) fmt
-#endif
-
-#define pr_emerg(fmt, ...)	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...)	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...)	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...)	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...)	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...)	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-
-#endif /* pr_emerg */
-
-#ifndef pr_info
-#define pr_info(fmt, ...)	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-#endif
-
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
-#ifndef pr_cont
-#define pr_cont(fmt, ...)	printk(KERN_CONT fmt, ##__VA_ARGS__)
-#endif
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30) */
-
-/* See also commit f036be96dd9c ("printk: introduce printk_once()") */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
-#define printk_once(fmt, ...)					\
-({								\
-	static bool __print_once __read_mostly;			\
-	bool __ret_print_once = !__print_once;			\
-								\
-	if (!__print_once) {					\
-		__print_once = true;				\
-		printk(fmt, ##__VA_ARGS__);			\
-	}							\
-	unlikely(__ret_print_once);				\
-})
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38)
-/*
- * See also commit 16cb839f1332 ("include/linux/printk.h: add pr_<level>_once
- * macros") # v2.6.38.
- */
-#define pr_warn_once(fmt, ...)					\
-	printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-/*
- * See also patch "kernel.h: add pr_warn for symmetry to dev_warn,
- * netdev_warn" (commit fc62f2f19edf46c9bdbd1a54725b56b18c43e94f).
- */
-#ifndef pr_warn
-#define pr_warn pr_warning
-#endif
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) && \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
-/*
- * See also patch "Add a dummy printk function for the maintenance of unused
- * printks" (commit 12fdff3fc2483f906ae6404a6e8dcf2550310b6f).
- */
-static inline __attribute__ ((format (printf, 1, 2)))
-int no_printk(const char *s, ...) { return 0; }
-#endif
-
-/* <linux/ratelimit.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
-/* See also commit 717115e1a585 */
-
-#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
-#define DEFAULT_RATELIMIT_BURST 10
-
-struct ratelimit_state {
-	int interval;
-	int burst;
-};
-
-#define DEFINE_RATELIMIT_STATE(name, interval, burst)	\
-	struct ratelimit_state name = {interval, burst,}
-
-static inline int __ratelimit(struct ratelimit_state *rs)
-{
-	return 1;
-}
-#endif
-
 /* <linux/rcupdate.h> */
 
-/* See also commit b62730baea32 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)
-#define rcu_dereference_protected(p, c) rcu_dereference(p)
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) && !defined(kfree_rcu)
-typedef void (*rcu_callback_t)(struct rcu_head *);
-#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
-#define kfree_call_rcu(head, rcb) call_rcu(head, rcb)
-#define __kfree_rcu(head, offset)				\
-	do {							\
-		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));	\
-		kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
-	} while (0)
-#define kfree_rcu(ptr, rcu_head)				\
-	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) &&	\
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 7 ||	\
 	 RHEL_MAJOR -0 == 7 && RHEL_MINOR -0 < 7)
@@ -1584,95 +1145,6 @@
 static inline void destroy_rcu_head(struct rcu_head *head) { }
 #endif
 
-/* <linux/scatterlist.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-/*
- * The macro's sg_page(), sg_virt(), sg_init_table(), sg_assign_page() and
- * sg_set_page() have been introduced in the 2.6.24 kernel. The definitions
- * below are backports of the 2.6.24 macro's for older kernels. There is one
- * exception however: when compiling SCST on a system with a pre-2.6.24 kernel
- * (e.g. RHEL 5.x) where the OFED kernel headers have been installed, do not
- * define the backported macro's because OFED has already defined these.
- */
-
-static inline bool sg_is_chain(struct scatterlist *sg)
-{
-	return false;
-}
-
-static inline struct scatterlist *sg_chain_ptr(struct scatterlist *sg)
-{
-	return NULL;
-}
-
-#define sg_is_last(sg) false
-
-#ifndef sg_page
-static inline struct page *sg_page(struct scatterlist *sg)
-{
-	return sg->page;
-}
-#endif
-
-static inline void *sg_virt(struct scatterlist *sg)
-{
-	return page_address(sg_page(sg)) + sg->offset;
-}
-
-static inline void sg_mark_end(struct scatterlist *sg)
-{
-}
-
-static inline void sg_unmark_end(struct scatterlist *sg)
-{
-}
-
-#ifndef __BACKPORT_LINUX_SCATTERLIST_H_TO_2_6_23__
-
-static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
-{
-	memset(sgl, 0, sizeof(*sgl) * nents);
-}
-
-static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
-{
-	sg->page = page;
-}
-
-static inline void sg_set_page(struct scatterlist *sg, struct page *page,
-			       unsigned int len, unsigned int offset)
-{
-	sg_assign_page(sg, page);
-	sg->offset = offset;
-	sg->length = len;
-}
-
-#ifndef for_each_sg
-/* See also commit 96b418c960af0d5c7185ff5c4af9376eb37ac9d3 */
-#define for_each_sg(sglist, sg, nr, __i)       \
-	for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next_inline(sg))
-#endif /* for_each_sg */
-
-#endif /* __BACKPORT_LINUX_SCATTERLIST_H_TO_2_6_23__ */
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
-/*
- * See also commit c8164d8931fd ("scatterlist: introduce sg_unmark_end";
- * v3.10).
- */
-static inline void sg_unmark_end(struct scatterlist *sg)
-{
-	sg->page_link &= ~0x02;
-}
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) */
-
-/* <linux/sched.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) && \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
-#define set_cpus_allowed_ptr(p, new_mask) set_cpus_allowed((p), *(new_mask))
-#endif
-
 /* <linux/sched/prio.h> */
 
 /*
@@ -1683,14 +1155,30 @@
 #define MIN_NICE -20
 #endif
 
-/* <linux/slab.h> */
+/* <linux/seq_file.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
-#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
-	sizeof(struct __struct), __alignof__(struct __struct),\
-	(__flags), NULL, NULL)
+/*
+ * See also commit a08f06bb7a07 ("seq_file: Introduce DEFINE_SHOW_ATTRIBUTE()
+ * helper macro") # v4.16.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0)
+#define DEFINE_SHOW_ATTRIBUTE(__name)					\
+static int __name ## _open(struct inode *inode, struct file *file)	\
+{									\
+	return single_open(file, __name ## _show, inode->i_private);	\
+}									\
+									\
+static const struct file_operations __name ## _fops = {			\
+	.owner		= THIS_MODULE,					\
+	.open		= __name ## _open,				\
+	.read		= seq_read,					\
+	.llseek		= seq_lseek,					\
+	.release	= single_release,				\
+}
 #endif
 
+/* <linux/slab.h> */
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) &&	\
 	!defined(_COMPAT_LINUX_MM_H)
 /*
@@ -1706,36 +1194,14 @@
 #define kmem_cache_destroy kmem_cache_destroy_backport
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) &&	    \
-	!(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 52) && \
-	  LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) &&  \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
-static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
-{
-	if (size != 0 && n > ULONG_MAX / size)
-		return NULL;
-	return kmalloc(n * size, flags);
-}
-#endif
-
 /*
  * See also commit 8eb8284b4129 ("usercopy: Prepare for usercopy
  * whitelisting").
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23)
-static inline struct kmem_cache *kmem_cache_create_usercopy(const char *name,
-			unsigned int size, unsigned int align,
-			unsigned long flags,
-			unsigned int useroffset, unsigned int usersize,
-			void (*ctor)(void *))
-{
-	return kmem_cache_create(name, size, align, flags, ctor, NULL);
-}
-/*
+ *
  * UEK4 is based on kernel v4.1.12 and does not have a backport of the v4.16
  * API. UEK5 is based on kernel v4.14.35 and has a backport of the v4.16 API.
  */
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) &&	\
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) &&	\
 	(!defined(UEK_KABI_RENAME) ||			\
 	 LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0))
 static inline struct kmem_cache *kmem_cache_create_usercopy(const char *name,
@@ -1760,11 +1226,16 @@
 /* <linux/sockptr.h> */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
+#if !defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||	\
+	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 4
 /* See also commit ba423fdaa589 ("net: add a new sockptr_t type") # v5.9 */
 static inline void __user *KERNEL_SOCKPTR(void *p)
 {
 	return (void __force __user *)p;
 }
+#else
+#define KERNEL_SOCKPTR(p) ((char __force __user *)p)
+#endif
 #endif
 
 /* <linux/stddef.h> */
@@ -1854,20 +1325,6 @@
 		(unsigned long)&(_name))
 #endif
 
-/* <linux/types.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-/*
- * See also patch "fix abuses of ptrdiff_t" (commit ID
- * 142956af525002c5378e7d91d81a01189841a785).
- */
-typedef unsigned long uintptr_t;
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
-char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap);
-#endif
-
 /* <linux/uio.h> */
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
@@ -1896,39 +1353,9 @@
 
 /* <linux/unaligned.h> */
 
-#if defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-static inline uint16_t get_unaligned_be16(const void *p)
-{
-	return be16_to_cpu(get_unaligned((__be16 *)p));
-}
-
-static inline void put_unaligned_be16(uint16_t i, void *p)
-{
-	put_unaligned(cpu_to_be16(i), (__be16 *)p);
-}
-
-static inline uint32_t get_unaligned_be32(const void *p)
-{
-	return be32_to_cpu(get_unaligned((__be32 *)p));
-}
-
-static inline void put_unaligned_be32(uint32_t i, void *p)
-{
-	put_unaligned(cpu_to_be32(i), (__be32 *)p);
-}
-
-static inline uint64_t get_unaligned_be64(const void *p)
-{
-	return be64_to_cpu(get_unaligned((__be64 *)p));
-}
-
-static inline void put_unaligned_be64(uint64_t i, void *p)
-{
-	put_unaligned(cpu_to_be64(i), (__be64 *)p);
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) && \
+	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||	\
+	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 4)
 /* Only use get_unaligned_be24() if reading p - 1 is allowed. */
 static inline uint32_t get_unaligned_be24(const uint8_t *const p)
 {
@@ -1943,57 +1370,6 @@
 }
 #endif
 
-/* <linux/vmalloc.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) && \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 5 || \
-	 RHEL_MAJOR -0 == 5 && RHEL_MINOR -0 < 10 || \
-	 RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 < 1)
-/*
- * See also patch "mm: add vzalloc() and vzalloc_node() helpers" (commit
- * e1ca7788dec6773b1a2bce51b7141948f2b8bccf).
- */
-static inline void *vzalloc(unsigned long size)
-{
-	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
-			 PAGE_KERNEL);
-}
-#endif
-
-/* <linux/workqueue.h> */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-/*
- * See also commit d320c03830b1 ("workqueue: s/__create_workqueue()/
- * alloc_workqueue()/, and add system workqueues") # v2.6.36.
- */
-static inline struct workqueue_struct *alloc_workqueue(const char *fmt,
-						       unsigned int flags,
-						       int max_active, ...)
-{
-	WARN_ON_ONCE(flags | max_active);
-	return create_workqueue(fmt);
-}
-#endif
-
-/*
- * See also commits 18aa9effad4a ("workqueue: implement WQ_NON_REENTRANT";
- * v2.6.36) and commits dbf2576e37da ("workqueue: make all workqueues
- * non-reentrant"; v3.7).
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
-#define WQ_NON_REENTRANT 0
-#endif
-
-/*
- * See also commit 226223ab3c41 ("workqueue: implement sysfs interface for
- * workqueues"; v3.10).
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
-#define WQ_SYSFS 0
-#endif
-
 /* <rdma/ib_verbs.h> */
 
 /* commit ed082d36 */
@@ -2008,10 +1384,40 @@
 	})
 #endif
 
+/* <scsi/scsi.h> */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0)
+#ifndef msg_byte
+/*
+ * See also commit 54cf31d07aa8 ("scsi: core: Drop message byte helper";
+ * v5.14-rc1).
+ */
+static inline uint8_t msg_byte(uint32_t result)
+{
+	return (result >> 8) & 0xff;
+}
+#endif
+#ifndef host_byte
+static inline uint8_t host_byte(uint32_t result)
+{
+	return (result >> 16) & 0xff;
+}
+#endif
+#ifndef driver_byte
+/*
+ * See also commit 54c29086195f ("scsi: core: Drop the now obsolete driver_byte
+ * definitions"; v5.14-rc1).
+ */
+static inline uint8_t driver_byte(uint32_t result)
+{
+	return (result >> 24) & 0xff;
+}
+#endif
+#endif
+
 /* <scsi/scsi_cmnd.h> */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) || \
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) || \
 	(defined(RHEL_RELEASE_CODE) &&			 \
 	 RHEL_RELEASE_CODE -0 >= RHEL_RELEASE_VERSION(8, 3))
 /*
@@ -2035,8 +1441,110 @@
 }
 #endif
 
+/*
+ * The Debian 5.13.0 kernel has a scsi_build_sense() definition but does not
+ * define bio_multiple_segments() while the upstream 5.13.0 kernel defines
+ * bio_multiple_segments(). Hence the check two lines below for the Debian
+ * kernel.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) && \
+	(LINUX_VERSION_CODE >> 8 != KERNEL_VERSION(5, 13, 0) >> 8 ||	\
+	 defined(bio_multiple_segments))
+/*
+ * See also commit f2b1e9c6f867 ("scsi: core: Introduce scsi_build_sense()";
+ * v5.14-rc1).
+ */
+static inline void scsi_build_sense(struct scsi_cmnd *scmd, int desc,
+                            u8 key, u8 asc, u8 ascq)
+{
+	scsi_build_sense_buffer(desc, scmd->sense_buffer, key, asc, ascq);
+	scmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(8, 7) ||	\
+	 RHEL_RELEASE_CODE -0 == RHEL_RELEASE_VERSION(9, 0))
+/*
+ * See also 51f3a4788928 ("scsi: core: Introduce the scsi_cmd_to_rq()
+ * function").
+ */
+static inline struct request *scsi_cmd_to_rq(struct scsi_cmnd *scmd)
+{
+	return scmd->request;
+}
+#endif
+
+/*
+ * See also commits 7ba46799d346 ("scsi: core: Add scsi_prot_ref_tag()
+ * helper") and ddd0bc756983 ("block: move ref_tag calculation func to the
+ * block layer"; v4.19).
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) ||		\
+	(defined(RHEL_RELEASE_CODE) &&				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(8, 7))
+static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
+{
+#if defined(RHEL_MAJOR) && RHEL_MAJOR -0 == 7
+	WARN_ON_ONCE(true);
+	return 0;
+#else
+	struct request *rq = blk_mq_rq_from_pdu(scmd);
+
+	return t10_pi_ref_tag(rq);
+#endif
+}
+#endif
+#endif
+
+/*
+ * See also commit c611529e7cd3 ("sd: Honor block layer integrity handling
+ * flags"; v3.18).
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
+{
+	/* To do: backport this function properly. */
+	WARN_ON_ONCE(true);
+	return 512;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+/*
+ * See also commit 11b68e36b167 ("scsi: core: Call scsi_done directly"; v5.16)
+ */
+static inline void scsi_done(struct scsi_cmnd *cmd)
+{
+	return cmd->scsi_done(cmd);
+}
+#endif
+
 /* <scsi/scsi_request.h> */
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)
+/*
+ * See also commit 6aded12b10e0 ("scsi: core: Remove struct scsi_request") # v5.18
+ */
+static inline struct scsi_cmnd *scsi_req(struct request *rq)
+{
+	return blk_mq_rq_to_pdu(rq);
+}
+
+#define SREQ_SENSE(req) ((req)->sense_buffer)
+#define SREQ_CP(req)    ((req)->cmnd)
+#else
+#define SREQ_SENSE(req) ((req)->sense)
+#define SREQ_CP(req)    ((req)->cmd)
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 static inline struct request *scsi_req(struct request *rq)
 {
@@ -2047,13 +1555,8 @@
 {
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
-	rq->data_len = 0;
-	rq->sector = (sector_t) -1;
-#else
 	rq->__data_len = 0;
 	rq->__sector = (sector_t) -1;
-#endif
 	rq->bio = rq->biotail = NULL;
 	memset(rq->__cmd, 0, sizeof(rq->__cmd));
 	rq->cmd = rq->__cmd;
@@ -2063,6 +1566,34 @@
 }
 #endif
 
+/* <linux/bsg-lib.h> */
+
+/*
+ * Note: the function bsg_job_sense() exists only in SCST but not in any
+ * upstream kernel.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) &&	\
+     !defined(CONFIG_SUSE_KERNEL)) ||			\
+    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) &&	\
+     defined(CONFIG_SUSE_KERNEL))
+static inline void *bsg_job_sense(struct fc_bsg_job *job)
+{
+	return job->req->sense;
+}
+#else
+static inline void *bsg_job_sense(struct bsg_job *job)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	return job->req->sense;
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) &&	\
+	!defined(CONFIG_SUSE_KERNEL)
+	return scsi_req(job->req)->sense;
+#else
+	return SREQ_SENSE(scsi_req(blk_mq_rq_from_pdu(job)));
+#endif
+}
+#endif
+
 /* <scsi/scsi_transport_fc.h> */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) &&	\
@@ -2106,16 +1637,44 @@
 #define wwn_to_u64(wwn) get_unaligned_be64(wwn)
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) &&		\
+	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||		\
+	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 9 ||		\
+	 RHEL_MAJOR -0 == 9 && RHEL_MINOR -0 < 3)
 /*
- * See also commit c39e0af64bce ("scsi: scsi_transport_fc: Add FPIN fc event
- * codes") # v5.2
+ * See also commit 64fd2ba977b1 ("scsi: scsi_transport_fc: Add an additional
+ * flag to fc_host_fpin_rcv()") # v6.3
  */
+static inline void
+fc_host_fpin_rcv_backport(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf,
+			  u8 event_acknowledge)
+{
 #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && \
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||	\
 	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 2)
-static inline void
-fc_host_fpin_rcv(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf)
+	/*
+	 * See also commit c39e0af64bce ("scsi: scsi_transport_fc: Add FPIN fc event
+	 * codes") # v5.2
+	 */
+	return;
+#else
+	return fc_host_fpin_rcv(shost, fpin_len, fpin_buf);
+#endif
+}
+
+#define fc_host_fpin_rcv fc_host_fpin_rcv_backport
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+/*
+ * See also commit 67b465250e04 ("scsi: fc: start decoupling fc_block_scsi_eh
+ * from scsi_cmnd"; v4.14).
+ */
+static inline int fc_block_rport(struct fc_rport *rport)
 {
+	/* To do: backport this function. */
+	WARN_ON_ONCE(true);
+	return 0;
 }
 #endif
 
@@ -2132,7 +1691,10 @@
  * See also commit 62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN
  * ELS requests").
  */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0) &&			\
+	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||			\
+	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 4) &&			\
+	!(defined(UEK_KABI_RENAME) && defined(FC_PORTSPEED_256GBIT))
 #define ELS_RDP 0x18
 #endif
 

diff --git a/scst/scst/include/scst.h b/scst/scst/include/scst.h
index b5abe41..f83dc4b 100644
--- a/scst/scst/include/scst.h
+++ b/scst/scst/include/scst.h

@@ -45,10 +45,10 @@
 #define CONFIG_SCST_PER_DEVICE_CMD_COUNT_LIMIT
 #endif
 
-#if defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-#error RHEL 5 is no longer supported. Please upgrade to RHEL 6 or later.
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-#error The SCST sysfs interface is supported from kernel version 2.6.26 on. Please upgrade to a newer kernel version.
+#if defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 6
+#error RHEL 6 is no longer supported. Please upgrade to RHEL 7 or later.
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#error The SCST has dropped support for kernels older than 3.10.0 since SCST v3.6. Please upgrade to a newer kernel version.
 #endif
 
 #include <scsi/scsi_cmnd.h>
@@ -1006,17 +1006,6 @@
 	void (*on_abort_cmd)(struct scst_cmd *cmd);
 
 	/*
-	 * This function should detect the target adapters that
-	 * are present in the system. The function should return a value
-	 * >= 0 to signify the number of detected target adapters.
-	 * A negative value should be returned whenever there is
-	 * an error.
-	 *
-	 * OBSOLETE
-	 */
-	int (*detect)(struct scst_tgt_template *tgt_template);
-
-	/*
 	 * This function should free up the resources allocated to the device.
 	 * The function should return 0 to indicate successful release
 	 * or a negative value if there are some issues with the release.
@@ -1874,11 +1863,7 @@
 	/* List entry for the sessions list inside ACG */
 	struct list_head acg_sess_list_entry;
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
 	struct delayed_work hw_pending_work;
-#else
-	struct work_struct hw_pending_work;
-#endif
 
 	/* Name of attached initiator */
 	const char *initiator_name;
@@ -1923,11 +1908,7 @@
 	 */
 	struct list_head sess_cm_list_id_list;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct work_struct sess_cm_list_id_cleanup_work;
-#else
 	struct delayed_work sess_cm_list_id_cleanup_work;
-#endif
 
 	/* sysfs release completion */
 	struct completion *sess_kobj_release_cmpl;
@@ -2081,7 +2062,7 @@
 
 	struct scst_session *sess;	/* corresponding session */
 
-	atomic_t *cpu_cmd_counter;
+	bool counted;
 
 	atomic_t cmd_ref;
 
@@ -2341,7 +2322,9 @@
 	uint8_t lba_off;	/* LBA offset in cdb */
 	uint8_t lba_len;	/* LBA length in cdb */
 	uint8_t len_off;	/* length offset in cdb */
-	uint8_t len_len;	/* length length in cdb */
+	uint8_t len_len;	/* length of length in cdb */
+	/* If not zero, logarithm base 2 of the maximum data buffer length. */
+	uint8_t log2_max_buf_len;
 	uint32_t op_flags;	/* various flags of this opcode */
 	const char *op_name;	/* op code SCSI full name */
 
@@ -2571,7 +2554,7 @@
 
 	struct scst_session *sess;
 
-	atomic_t *cpu_cmd_counter;
+	bool counted;
 
 	/* Mgmt cmd state, one of SCST_MCMD_STATE_* constants */
 	int state;
@@ -2845,6 +2828,9 @@
 	atomic_t dev_cmd_count;
 #endif
 
+	/* Number of copy manager designators update requests. */
+	atomic_t cm_update_req_cnt;
+
 	/*
 	 * One more than the number of commands associated with this device
 	 * and the number of SCST data structures holding a reference on this
@@ -4681,10 +4667,9 @@
 #endif
 	if (cmd->cdb_len == 32)
 		return get_unaligned_be16(&cmd->cdb[24]);
-	else {
-		/* cmd->dev must be alive at this point */
-		return be16_to_cpu(cmd->dev->dev_dif_static_app_tag);
-	}
+
+	/* cmd->dev must be alive at this point */
+	return be16_to_cpu(cmd->dev->dev_dif_static_app_tag);
 }
 
 /*
@@ -4699,12 +4684,11 @@
 #endif
 	if (cmd->cdb_len == 32)
 		return get_unaligned_be16(&cmd->cdb[26]);
-	else {
-		if (scst_get_dif_checks(cmd->cmd_dif_actions) & SCST_DIF_CHECK_APP_TAG)
-			return 0xFFFF;
-		else
-			return 0;
-	}
+
+	if (scst_get_dif_checks(cmd->cmd_dif_actions) & SCST_DIF_CHECK_APP_TAG)
+		return 0xFFFF;
+
+	return 0;
 }
 
 /*
@@ -5132,6 +5116,23 @@
 }
 
 /*
+ * Returns approximate higher rounded buffers count in pages
+ */
+static inline int scst_get_buf_page_count(struct scst_cmd *cmd)
+{
+	struct scatterlist *sg;
+	int page_cnt = 0, i;
+
+	if (unlikely(cmd->sg_cnt == 0))
+		return 1;
+
+	for (i = 0, sg = cmd->sg; i < cmd->sg_cnt; i++, sg = sg_next_inline(sg))
+		page_cnt += PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
+
+	return page_cnt;
+}
+
+/*
  * Returns approximate higher rounded buffers count that
  * scst_get_out_buf_[first|next]() return.
  */
@@ -5150,12 +5151,11 @@
 }
 
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) && defined(CONFIG_LOCKDEP)
+#if defined(CONFIG_LOCKDEP)
 extern struct lockdep_map scst_suspend_dep_map;
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) && \
-	defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if defined(CONFIG_DEBUG_LOCK_ALLOC)
 #define scst_assert_activity_suspended()		\
 	WARN_ON(debug_locks && !lock_is_held(&scst_suspend_dep_map))
 #else
@@ -5204,13 +5204,9 @@
 extern struct mutex scst_mutex;
 
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34))
 const struct sysfs_ops *scst_sysfs_get_sysfs_ops(void);
-#else
-struct sysfs_ops *scst_sysfs_get_sysfs_ops(void);
-#endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) && defined(CONFIG_LOCKDEP)
+#if defined(CONFIG_LOCKDEP)
 #define SCST_SET_DEP_MAP(work, dm) ((work)->dep_map = (dm))
 #define SCST_KOBJECT_PUT_AND_WAIT(kobj, category, c, dep_map) \
 	scst_kobject_put_and_wait(kobj, category, c, dep_map)
@@ -5522,11 +5518,9 @@
 void scst_sysfs_work_get(struct scst_sysfs_work_item *work);
 void scst_sysfs_work_put(struct scst_sysfs_work_item *work);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 #ifdef CONFIG_LOCKDEP
 extern struct lockdep_map scst_dev_dep_map;
 #endif
-#endif
 
 
 char *scst_get_next_lexem(char **token_str);
@@ -5538,10 +5532,8 @@
 
 void scst_pass_through_cmd_done(void *data, char *sense, int result, int resid);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 int scst_scsi_exec_async(struct scst_cmd *cmd, void *data,
 	void (*done)(void *data, char *sense, int result, int resid));
-#endif
 
 int scst_get_file_mode(const char *path);
 bool scst_parent_dir_exists(const char *path);
@@ -5551,6 +5543,9 @@
 	uint64_t sdd_blocks;
 };
 
+loff_t scst_file_size(const char *path, umode_t *mode);
+loff_t scst_bdev_size(const char *path);
+loff_t scst_file_or_bdev_size(const char *path);
 ssize_t scst_readv(struct file *file, const struct kvec *vec,
 		   unsigned long vlen, loff_t *pos);
 ssize_t scst_writev(struct file *file, const struct kvec *vec,
@@ -5577,16 +5572,11 @@
 	scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,	\
 		     NULL /* sshdr */, timeout, retries, flags,		\
 		     0 /* rq_flags */, NULL /* resid */)
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
-#define scst_scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \
-			  timeout, retries, flags)			\
-	scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,	\
-		     timeout, retries, flags, NULL /* resid */)
 #else
 #define scst_scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \
 			  timeout, retries, flags)			\
 	scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,	\
-		     timeout, retries, flags)
+		     timeout, retries, flags, NULL /* resid */)
 #endif
 
 __be64 scst_pack_lun(const uint64_t lun, enum scst_lun_addr_method addr_method);
@@ -5602,9 +5592,6 @@
 int scst_write_file_transactional(const char *name, const char *name1,
 	const char *signature, int signature_len, const uint8_t *buf, int size);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
-void scst_path_put(struct nameidata *nd);
-#endif
 int scst_remove_file(const char *name);
 
 void scst_set_tp_soft_threshold_reached_UA(struct scst_tgt_dev *tgt_dev);

diff --git a/scst/scst/include/scst_const.h b/scst/scst/include/scst_const.h
index b32f10c..65b1439 100644
--- a/scst/scst/include/scst_const.h
+++ b/scst/scst/include/scst_const.h

@@ -44,9 +44,9 @@
  * and FIO_REV in usr/fileio/common.h as well.
  */
 #define SCST_VERSION(a, b, c, d)    (((a) << 24) + ((b) << 16) + ((c) << 8) + d)
-#define SCST_VERSION_CODE	    SCST_VERSION(3, 4, 0, 0)
+#define SCST_VERSION_CODE	    SCST_VERSION(3, 7, 0, 0)
 #define SCST_VERSION_STRING_SUFFIX
-#define SCST_VERSION_NAME	    "3.5.0"
+#define SCST_VERSION_NAME	    "3.7.0"
 #define SCST_VERSION_STRING	    SCST_VERSION_NAME SCST_VERSION_STRING_SUFFIX
 
 #define SCST_CONST_VERSION SCST_CONST_INTF_VER
@@ -356,15 +356,7 @@
 #define INIT_ELEMENT_STATUS         0x07
 #define INIT_ELEMENT_STATUS_RANGE   0x37
 #define PREVENT_ALLOW_MEDIUM        0x1E
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38) \
-	&& (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 <= 5)
-#define READ_ATTRIBUTE              0x8C
-#endif
 #define REQUEST_VOLUME_ADDRESS      0xB5
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38) \
-	&& (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 <= 5)
-#define WRITE_ATTRIBUTE             0x8D
-#endif
 #if (!defined(__KERNEL__) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) &&\
 	!defined(WRITE_VERIFY_16)
 #define WRITE_VERIFY_16             0x8E
@@ -373,8 +365,7 @@
 #ifndef VERIFY_12
 #define VERIFY_12                   0xAF
 #endif
-#if !defined(GENERATING_UPSTREAM_PATCH) || \
-	LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38)
+#if !defined(GENERATING_UPSTREAM_PATCH)
 /*
  * The constants below have been defined in the kernel header <scsi/scsi.h>
  * and hence are not needed when this header file is included in kernel code.
@@ -385,9 +376,6 @@
 /* Upstream commit 93aae17a (v2.6.38) */
 #define GET_EVENT_STATUS_NOTIFICATION 0x4a
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-#define VARIABLE_LENGTH_CMD   0x7f
-#endif
 #ifndef READ_16
 #define READ_16               0x88
 #endif
@@ -397,27 +385,11 @@
 #ifndef VERIFY_16
 #define VERIFY_16	      0x8f
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38)
-#ifndef MI_REPORT_IDENTIFYING_INFORMATION
-#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
-#endif
-#ifndef MI_REPORT_SUPPORTED_OPERATION_CODES
-#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
-#endif
-#ifndef MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS
-#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
-#endif
-#endif
 #ifndef SAI_READ_CAPACITY_16
 /* values for service action in */
 #define	SAI_READ_CAPACITY_16  0x10
 #endif
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-#ifndef SAI_GET_LBA_STATUS
-#define SAI_GET_LBA_STATUS    0x12
-#endif
-#endif
 #ifndef GENERATING_UPSTREAM_PATCH
 #ifndef REPORT_LUNS
 #define REPORT_LUNS           0xa0
@@ -440,14 +412,6 @@
 #define SYNCHRONIZE_CACHE_16  0x91
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
-/*
- * From <scsi/scsi.h>. See also commit
- * f57e4502cea471c69782d4790c71d8414ab49a9d.
- */
-#define UNMAP 0x42
-#endif
-
 /* Subcodes of VARIABLE_LENGTH_CMD (0x7F) */
 #define SUBCODE_READ_32		0x09
 #define SUBCODE_VERIFY_32	0x0a
@@ -475,14 +439,6 @@
 #endif
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-/*
- * From <linux/fs.h>. See also commit
- * d30a2605be9d5132d95944916e8f578fcfe4f976.
- */
-#define BLKDISCARD _IO(0x12, 119)
-#endif
-
 /*************************************************************
  **  SCSI Architecture Model (SAM) Status codes. Taken from SAM-3 draft
  **  T10/1561-D Revision 4 Draft dated 7th November 2002.

diff --git a/scst/scst/include/scst_debug.h b/scst/scst/include/scst_debug.h
index b789252..8f5f4ec 100644
--- a/scst/scst/include/scst_debug.h
+++ b/scst/scst/include/scst_debug.h

@@ -21,19 +21,9 @@
 #ifndef __SCST_DEBUG_H
 #define __SCST_DEBUG_H
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
-#include <linux/autoconf.h>	/* for CONFIG_* */
-#else
 #include <generated/autoconf.h>	/* for CONFIG_* */
-#endif
-
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 19)
 #include <linux/bug.h>		/* for WARN_ON_ONCE */
-#endif
-
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27)
 #include <linux/ratelimit.h>
-#endif
 
 #ifdef INSIDE_KERNEL_TREE
 #include <scst/backport.h>

diff --git a/scst/scst/include/scst_event.h b/scst/scst/include/scst_event.h
index f18b1ea..3180237 100644
--- a/scst/scst/include/scst_event.h
+++ b/scst/scst/include/scst_event.h

@@ -61,11 +61,7 @@
 	int *pqueued_events_cnt;
 	union {
 		struct work_struct scst_event_queue_work;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-		struct work_struct event_timeout_work;
-#else
 		struct delayed_work event_timeout_work;
-#endif
 	};
 
 	struct scst_event event;
@@ -153,7 +149,7 @@
 	aligned_u64 stpg_cmd_tag;
 	uint8_t device_name[64];
 	uint16_t stpg_descriptors_cnt;
-	struct scst_event_stpg_descr stpg_descriptors[0];
+	struct scst_event_stpg_descr stpg_descriptors[];
 };
 
 #define SCST_EVENT_REG_VIRT_DEV		6

diff --git a/scst/scst/include/scst_user.h b/scst/scst/include/scst_user.h
index 92c1b12..8285c29 100644
--- a/scst/scst/include/scst_user.h
+++ b/scst/scst/include/scst_user.h

@@ -354,7 +354,7 @@
 	int16_t replies_done; /* out */
 	int16_t cmds_cnt; /* in/out */
 	int16_t pad;
-	struct scst_user_get_cmd cmds[0]; /* out */
+	struct scst_user_get_cmd cmds[]; /* out */
 };
 
 #define SCST_USER_REGISTER_DEVICE	_IOW('u', 1, struct scst_user_dev_desc)

diff --git a/scst/scst/kernel/in-tree/Kconfig.drivers.Linux.patch b/scst/scst/kernel/in-tree/Kconfig.drivers.Linux.patch
index 0d5a19f..347bf5d 100644
--- a/scst/scst/kernel/in-tree/Kconfig.drivers.Linux.patch
+++ b/scst/scst/kernel/in-tree/Kconfig.drivers.Linux.patch

@@ -1,13 +1,13 @@
 diff --git a/drivers/Kconfig b/drivers/Kconfig
-index aa43b91..c96860e 100644
+index 8bad63417a50..a61b1804fcf3 100644
 --- a/drivers/Kconfig
 +++ b/drivers/Kconfig
-@@ -24,6 +24,8 @@ source "drivers/ide/Kconfig"
+@@ -39,6 +39,8 @@ source "drivers/ata/Kconfig"
  
- source "drivers/scsi/Kconfig"
+ source "drivers/md/Kconfig"
  
 +source "drivers/scst/Kconfig"
 +
- source "drivers/ata/Kconfig"
+ source "drivers/target/Kconfig"
  
- source "drivers/md/Kconfig"
+ source "drivers/message/fusion/Kconfig"

diff --git a/scst/scst/kernel/in-tree/Makefile.drivers.Linux.patch b/scst/scst/kernel/in-tree/Makefile.drivers.Linux.patch
index e027411..4dbb006 100644
--- a/scst/scst/kernel/in-tree/Makefile.drivers.Linux.patch
+++ b/scst/scst/kernel/in-tree/Makefile.drivers.Linux.patch

@@ -1,12 +1,12 @@
 diff --git a/drivers/Makefile b/drivers/Makefile
-index 31cf17dee252..b45c17aee468 100644
+index 27c018bdf4de..6fc8c24fd0d6 100644
 --- a/drivers/Makefile
 +++ b/drivers/Makefile
-@@ -75,6 +75,7 @@ obj-$(CONFIG_DAX)		+= dax/
- obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
- obj-$(CONFIG_NUBUS)		+= nubus/
- obj-y				+= macintosh/
-+obj-$(CONFIG_SCST)		+= scst/
- obj-$(CONFIG_IDE)		+= ide/
+@@ -81,6 +81,7 @@ obj-y				+= macintosh/
  obj-y				+= scsi/
  obj-y				+= nvme/
+ obj-$(CONFIG_ATA)		+= ata/
++obj-$(CONFIG_SCST)		+= scst/
+ obj-$(CONFIG_TARGET_CORE)	+= target/
+ obj-$(CONFIG_MTD)		+= mtd/
+ obj-$(CONFIG_SPI)		+= spi/

diff --git a/scst/scst/src/Makefile b/scst/scst/src/Makefile
index 486ec8a..b036b33 100644
--- a/scst/scst/src/Makefile
+++ b/scst/scst/src/Makefile

@@ -89,13 +89,7 @@
 	false; fi
 	-rm -f $(INSTALL_DIR)/scsi_tgt.ko
 	KDIR=$(KDIR) ../../scripts/sign-modules
-	# Apparently on RHEL 8 and CentOS 8 the module installation can
-	# leave stale symlinks in /lib/modules/$(KVER)/weak-updates/. These
-	# symlinks may cause loading of SCST to fail, Remove these symlinks
-	# before installing SCST.
-	if [ -e /usr/sbin/weak-modules ]; then				\
-		/usr/sbin/weak-modules --remove-kernel;			\
-	fi
+	(cd dev_handlers; KDIR=$(KDIR) ../../../scripts/sign-modules)
 	$(MAKE) -C $(KDIR) M=$(shell pwd)/dev_handlers			\
 	  $(shell [ -n "$(PASS_CC_TO_MAKE)" ] && echo CC="$(CC)")	\
 	  INSTALL_MOD_DIR=extra/dev_handlers				\

diff --git a/scst/scst/src/certs/Makefile b/scst/scst/src/certs/Makefile
index 2c9d76f..f373d30 100644
--- a/scst/scst/src/certs/Makefile
+++ b/scst/scst/src/certs/Makefile

@@ -9,6 +9,9 @@
 	openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 365000 \
 		-batch -x509 -config $< -outform DER -out scst_module_key.der \
 		-keyout scst_module_key.priv
+	# override those signing keys with the keys generated at kernel build time
+	openssl x509 -in "${KERNEL_FILES}/signing_key.pem" -out scst_module_key.der -outform DER
+	openssl pkey -in "${KERNEL_FILES}/signing_key.pem" -out scst_module_key.priv
 	chmod 600 $@
 
 .PHONY: module_signing_enabled

diff --git a/scst/scst/src/dev_handlers/scst_changer.c b/scst/scst/src/dev_handlers/scst_changer.c
index 53b6933..31b625c 100644
--- a/scst/scst/src/dev_handlers/scst_changer.c
+++ b/scst/scst/src/dev_handlers/scst_changer.c

@@ -83,12 +83,7 @@
 	do {
 		TRACE_DBG("%s", "Doing TEST_UNIT_READY");
 		rc = scsi_test_unit_ready(dev->scsi_dev,
-			SCST_GENERIC_CHANGER_TIMEOUT, CHANGER_RETRIES
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-					  );
-#else
-					  , NULL);
-#endif
+			SCST_GENERIC_CHANGER_TIMEOUT, CHANGER_RETRIES, NULL);
 		TRACE_DBG("TEST_UNIT_READY done: %x", rc);
 	} while ((--retries > 0) && rc);
 

diff --git a/scst/scst/src/dev_handlers/scst_disk.c b/scst/scst/src/dev_handlers/scst_disk.c
index fd01b8d..c42c5d6 100644
--- a/scst/scst/src/dev_handlers/scst_disk.c
+++ b/scst/scst/src/dev_handlers/scst_disk.c

@@ -200,8 +200,6 @@
 	return res;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
-
 static bool disk_on_sg_tablesize_low(struct scst_cmd *cmd)
 {
 	bool res;
@@ -264,7 +262,7 @@
 
 	WARN_ON_ONCE(IS_ERR_VALUE((long)result));
 
-	if (status_byte(result) == GOOD)
+	if ((result & 0xff) == SAM_STAT_GOOD)
 		goto out_complete;
 
 	work->result = result;
@@ -466,7 +464,6 @@
 	goto out_done;
 }
 
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30) */
 
 static enum scst_exec_res disk_perf_exec(struct scst_cmd *cmd)
 {
@@ -517,10 +514,8 @@
 	.attach =		disk_attach,
 	.detach =		disk_detach,
 	.parse =		disk_parse,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 	.exec =			disk_exec,
 	.on_sg_tablesize_low = disk_on_sg_tablesize_low,
-#endif
 	.dev_done =		disk_done,
 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
 	.default_trace_flags = SCST_DEFAULT_DEV_LOG_FLAGS,
@@ -538,9 +533,7 @@
 	.parse =		disk_parse,
 	.exec =			disk_perf_exec,
 	.dev_done =		disk_done,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 	.on_sg_tablesize_low = disk_on_sg_tablesize_low,
-#endif
 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
 	.default_trace_flags =	SCST_DEFAULT_DEV_LOG_FLAGS,
 	.trace_flags =		&trace_flag,

diff --git a/scst/scst/src/dev_handlers/scst_processor.c b/scst/scst/src/dev_handlers/scst_processor.c
index db259b9..d87abc5 100644
--- a/scst/scst/src/dev_handlers/scst_processor.c
+++ b/scst/scst/src/dev_handlers/scst_processor.c

@@ -83,12 +83,7 @@
 	do {
 		TRACE_DBG("%s", "Doing TEST_UNIT_READY");
 		rc = scsi_test_unit_ready(dev->scsi_dev,
-			SCST_GENERIC_PROCESSOR_TIMEOUT, PROCESSOR_RETRIES
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-					  );
-#else
-					  , NULL);
-#endif
+			SCST_GENERIC_PROCESSOR_TIMEOUT, PROCESSOR_RETRIES, NULL);
 		TRACE_DBG("TEST_UNIT_READY done: %x", rc);
 	} while ((--retries > 0) && rc);
 

diff --git a/scst/scst/src/dev_handlers/scst_raid.c b/scst/scst/src/dev_handlers/scst_raid.c
index 84f647a..4a0938f 100644
--- a/scst/scst/src/dev_handlers/scst_raid.c
+++ b/scst/scst/src/dev_handlers/scst_raid.c

@@ -83,12 +83,7 @@
 	do {
 		TRACE_DBG("%s", "Doing TEST_UNIT_READY");
 		rc = scsi_test_unit_ready(dev->scsi_dev,
-			SCST_GENERIC_RAID_TIMEOUT, RAID_RETRIES
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-					  );
-#else
-					  , NULL);
-#endif
+			SCST_GENERIC_RAID_TIMEOUT, RAID_RETRIES, NULL);
 		TRACE_DBG("TEST_UNIT_READY done: %x", rc);
 	} while ((--retries > 0) && rc);
 

diff --git a/scst/scst/src/dev_handlers/scst_tape.c b/scst/scst/src/dev_handlers/scst_tape.c
index 7c0ffd3..1bb0301 100644
--- a/scst/scst/src/dev_handlers/scst_tape.c
+++ b/scst/scst/src/dev_handlers/scst_tape.c

@@ -145,7 +145,7 @@
 	}
 
 	dev->block_size = TAPE_DEF_BLOCK_SIZE;
-	dev->block_shift = -1; /* not used */
+	dev->block_shift = scst_calc_block_shift(dev->block_size);
 
 	buffer = kmalloc(buffer_size, GFP_KERNEL);
 	if (!buffer) {
@@ -159,12 +159,7 @@
 	do {
 		TRACE_DBG("%s", "Doing TEST_UNIT_READY");
 		rc = scsi_test_unit_ready(dev->scsi_dev,
-			SCST_GENERIC_TAPE_SMALL_TIMEOUT, TAPE_RETRIES
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-					  );
-#else
-					  , NULL);
-#endif
+			SCST_GENERIC_TAPE_SMALL_TIMEOUT, TAPE_RETRIES, NULL);
 		TRACE_DBG("TEST_UNIT_READY done: %x", rc);
 	} while ((--retries > 0) && rc);
 
@@ -203,7 +198,7 @@
 		res = -ENODEV;
 		goto out_free_buf;
 	}
-	dev->block_shift = -1; /* not used */
+	dev->block_shift = scst_calc_block_shift(dev->block_size);
 
 obtain:
 	res = scst_obtain_device_parameters(dev, NULL);
@@ -250,7 +245,7 @@
 	 * there are existing commands.
 	 */
 	dev->block_size = block_size;
-	dev->block_shift = -1; /* not used */
+	dev->block_shift = scst_calc_block_shift(dev->block_size);
 	return;
 }
 

diff --git a/scst/scst/src/dev_handlers/scst_user.c b/scst/scst/src/dev_handlers/scst_user.c
index f5b7d74..0ceb8d2 100644
--- a/scst/scst/src/dev_handlers/scst_user.c
+++ b/scst/scst/src/dev_handlers/scst_user.c

@@ -4120,11 +4120,7 @@
 			struct scst_user_reply_cmd r;
 		};
 	};
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-	struct class_device *class_member;
-#else
 	struct device *dev;
-#endif
 
 	TRACE_ENTRY();
 
@@ -4174,25 +4170,14 @@
 		goto out_class;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-	class_member = class_device_create(dev_user_sysfs_class, NULL,
-				MKDEV(dev_user_major, 0), NULL, DEV_USER_NAME);
-	if (IS_ERR(class_member)) {
-		res = PTR_ERR(class_member);
-		goto out_chrdev;
-	}
-#else
 	dev = device_create(dev_user_sysfs_class, NULL,
 			    MKDEV(dev_user_major, 0),
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
 				NULL,
-#endif
 				DEV_USER_NAME);
 	if (IS_ERR(dev)) {
 		res = PTR_ERR(dev);
 		goto out_chrdev;
 	}
-#endif
 
 	cleanup_thread = kthread_run(dev_user_cleanup_thread, NULL,
 		"scst_usr_cleanupd");
@@ -4207,11 +4192,7 @@
 	return res;
 
 out_dev:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-	class_device_destroy(dev_user_sysfs_class, MKDEV(dev_user_major, 0));
-#else
 	device_destroy(dev_user_sysfs_class, MKDEV(dev_user_major, 0));
-#endif
 
 out_chrdev:
 	unregister_chrdev(dev_user_major, DEV_USER_NAME);
@@ -4242,11 +4223,7 @@
 		TRACE_MGMT_DBG("kthread_stop() failed: %d", rc);
 
 	unregister_chrdev(dev_user_major, DEV_USER_NAME);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-	class_device_destroy(dev_user_sysfs_class, MKDEV(dev_user_major, 0));
-#else
 	device_destroy(dev_user_sysfs_class, MKDEV(dev_user_major, 0));
-#endif
 	class_destroy(dev_user_sysfs_class);
 
 	scst_unregister_virtual_dev_driver(&dev_user_devtype);

diff --git a/scst/scst/src/dev_handlers/scst_vdisk.c b/scst/scst/src/dev_handlers/scst_vdisk.c
index 6ba1032..a4703bd 100644
--- a/scst/scst/src/dev_handlers/scst_vdisk.c
+++ b/scst/scst/src/dev_handlers/scst_vdisk.c

@@ -6,7 +6,7 @@
  *  Copyright (C) 2007 Ming Zhang <blackmagic02881 at gmail dot com>
  *  Copyright (C) 2007 Ross Walker <rswwalker at hotmail dot com>
  *  Copyright (C) 2007 - 2018 Western Digital Corporation
- *  Copyright (C) 2008 - 2018 Bart Van Assche <bvanassche@acm.org>
+ *  Copyright (C) 2008 - 2020 Bart Van Assche <bvanassche@acm.org>
  *
  *  SCSI disk (type 0) and CDROM (type 5) dev handler using files
  *  on file systems or block devices (VDISK)
@@ -29,6 +29,7 @@
 #include <linux/aio.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/pagemap.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/unistd.h>
@@ -39,10 +40,10 @@
 #include <linux/ctype.h>
 #include <linux/writeback.h>
 #include <linux/vmalloc.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 #include <linux/atomic.h>
-#else
-#include <asm/atomic.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) || \
+	(defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 9)
+#include <linux/blk-integrity.h>
 #endif
 #include <linux/kthread.h>
 #include <linux/sched.h>
@@ -53,9 +54,7 @@
 #include <linux/slab.h>
 #include <linux/bio.h>
 #include <linux/crc32c.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
 #include <linux/falloc.h>
-#endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 #include <linux/sched/signal.h>
 #endif
@@ -76,7 +75,7 @@
 #define SCST_FIO_VENDOR			"SCST_FIO"
 #define SCST_BIO_VENDOR			"SCST_BIO"
 /* 4 byte ASCII Product Revision Level - left aligned */
-#define SCST_FIO_REV			"350 "
+#define SCST_FIO_REV			"370 "
 
 #define MAX_USN_LEN			(20+1) /* For '\0' */
 #define MAX_INQ_VEND_SPECIFIC_LEN	(INQ_BUF_SZ - 96)
@@ -165,7 +164,6 @@
 	unsigned int wt_flag:1;
 	unsigned int nv_cache:1;
 	unsigned int o_direct_flag:1;
-	unsigned int zero_copy:1;
 	unsigned int async:1;
 	unsigned int media_changed:1;
 	unsigned int prevent_allow_medium_removal:1;
@@ -191,12 +189,11 @@
 	struct file *fd;
 	struct file *dif_fd;
 	struct block_device *bdev;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
+	fmode_t bdev_mode;
 	struct bio_set *vdisk_bioset;
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
 	struct bio_set vdisk_bioset_struct;
 #endif
-#endif
 
 	uint64_t format_progress_to_do, format_progress_done;
 
@@ -263,8 +260,8 @@
 		} sync;
 		struct {
 			struct kiocb	iocb;
-			struct kvec	*kvec;
-			struct kvec	small_kvec[4];
+			struct bio_vec	*bvec;
+			struct bio_vec	small_bvec[4];
 		} async;
 	};
 	struct scst_cmd *cmd;
@@ -287,12 +284,8 @@
 
 typedef enum compl_status_e (*vdisk_op_fn)(struct vdisk_cmd_params *p);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-#define DEF_NUM_THREADS		5
-#else
 /* Context RA patch supposed to be applied on the kernel */
 #define DEF_NUM_THREADS		8
-#endif
 static int num_threads = DEF_NUM_THREADS;
 
 module_param_named(num_threads, num_threads, int, S_IRUGO);
@@ -370,13 +363,6 @@
 
 	sBUG_ON(!name);
 
-	if (!virt_dev->dev_active) {
-		TRACE_MGMT_DBG("Skip opening for not active dev %s",
-			       virt_dev->dev->virt_name);
-		fd = ERR_PTR(-EMEDIUMTYPE);
-		goto out;
-	}
-
 	if (read_only)
 		open_flags |= O_RDONLY;
 	else
@@ -387,10 +373,6 @@
 	TRACE_DBG("Opening file %s, flags 0x%x", name, open_flags);
 	fd = filp_open(name, O_LARGEFILE | open_flags, 0600);
 	if (IS_ERR(fd)) {
-		PRINT_ERROR("JPM vdev_open_fd() failed for dev: %s", virt_dev->dev->virt_name);
-		PRINT_ERROR("JPM vdev_open_fd() virt_dev->o_direct_flag= %d, wt_flag= %d, nv_cache= %d", virt_dev->o_direct_flag, virt_dev->wt_flag, virt_dev->nv_cache);
-		PRINT_ERROR("JPM Failed opening file %s, flags 0x%x", name, open_flags);
-		PRINT_ERROR("JPM O_RDONLY: 0x%x, O_RDWR: 0x%x,  O_DIRECT: 0x%x, O_DSYNC: 0x%x", O_RDONLY, O_RDWR, O_DIRECT, O_DSYNC);
 		if (PTR_ERR(fd) == -EMEDIUMTYPE)
 			TRACE(TRACE_MINOR, "Unable to open %s with EMEDIUMTYPE, "
 				"DRBD passive?", name);
@@ -398,12 +380,10 @@
 			PRINT_ERROR("filp_open(%s) failed: %d", name, (int)PTR_ERR(fd));
 	}
 
-out:
 	TRACE_EXIT();
 	return fd;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 static void vdev_flush_end_io(struct bio *bio, int error)
 {
@@ -440,7 +420,6 @@
 	TRACE_EXIT();
 	return;
 }
-#endif
 
 static int vdisk_blockio_flush(struct block_device *bdev, gfp_t gfp_mask,
 	bool report_error, struct scst_cmd *cmd, bool async)
@@ -449,17 +428,20 @@
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 	if (async) {
-		struct bio *bio = bio_alloc(gfp_mask, 0);
+		struct bio *bio;
 
+		bio = bio_alloc(/*bdev=*/NULL, 0, /*opf=*/0, gfp_mask);
 		if (bio == NULL) {
 			res = -ENOMEM;
 			goto out_rep;
 		}
+
 		bio->bi_end_io = vdev_flush_end_io;
 		bio->bi_private = cmd;
+
 		bio_set_dev(bio, bdev);
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) ||	\
 	(defined(CONFIG_SUSE_KERNEL) &&			\
 	 LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
@@ -480,25 +462,20 @@
 #endif
 		goto out;
 	} else {
-#else
-	{
-#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)           \
-    && !(defined(CONFIG_SUSE_KERNEL)                        \
-	 && LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 34))
-		res = blkdev_issue_flush(bdev, NULL);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-		res = blkdev_issue_flush(bdev, gfp_mask, NULL, BLKDEV_IFL_WAIT);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) && \
+	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||	\
+	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 4)
 		res = blkdev_issue_flush(bdev, gfp_mask, NULL);
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) && \
+	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8 ||	\
+	 RHEL_MAJOR -0 == 8 && RHEL_MINOR -0 < 6)
 		res = blkdev_issue_flush(bdev, gfp_mask);
+#else
+		res = blkdev_issue_flush(bdev);
 #endif
 	}
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 out_rep:
-#endif
 	if ((res != 0) && report_error)
 		PRINT_ERROR("%s() failed: %d",
 			async ? "bio_alloc" : "blkdev_issue_flush", res);
@@ -509,50 +486,41 @@
 			scst_estimate_context());
 	}
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 out:
-#endif
 	TRACE_EXIT_RES(res);
 	return res;
 }
 
 static void vdisk_blockio_check_flush_support(struct scst_vdisk_dev *virt_dev)
 {
-	struct inode *inode;
-	struct file *fd;
+	struct block_device *bdev;
 
 	TRACE_ENTRY();
 
-	if (!virt_dev->blockio || virt_dev->rd_only || virt_dev->nv_cache || virt_dev->wt_flag || !virt_dev->dev_active)
+	if (!virt_dev->blockio || virt_dev->rd_only || virt_dev->nv_cache ||
+	    virt_dev->wt_flag || !virt_dev->dev_active)
 		goto out;
 
-	fd = filp_open(virt_dev->filename, O_LARGEFILE, 0600);
-	if (IS_ERR(fd)) {
-		if ((PTR_ERR(fd) == -EMEDIUMTYPE) && virt_dev->blockio)
+	bdev = blkdev_get_by_path(virt_dev->filename, FMODE_READ,
+				  (void *)__func__);
+	if (IS_ERR(bdev)) {
+		if (PTR_ERR(bdev) == -EMEDIUMTYPE)
 			TRACE(TRACE_MINOR, "Unable to open %s with EMEDIUMTYPE, "
 				"DRBD passive?", virt_dev->filename);
 		else
-			PRINT_ERROR("filp_open(%s) failed: %ld",
-				virt_dev->filename, PTR_ERR(fd));
+			PRINT_ERROR("blkdev_get_by_path(%s) failed: %ld",
+				virt_dev->filename, PTR_ERR(bdev));
 		goto out;
 	}
 
-	inode = file_inode(fd);
-
-	if (!S_ISBLK(inode->i_mode)) {
-		PRINT_ERROR("%s is NOT a block device", virt_dev->filename);
-		goto out_close;
-	}
-
-	if (vdisk_blockio_flush(inode->i_bdev, GFP_KERNEL, false, NULL, false) != 0) {
+	if (vdisk_blockio_flush(bdev, GFP_KERNEL, false, NULL, false) != 0) {
 		PRINT_WARNING("Device %s doesn't support barriers, switching "
 			"to NV_CACHE mode. Read README for more details.",
 			virt_dev->filename);
 		virt_dev->nv_cache = 1;
 	}
 
-out_close:
-	filp_close(fd, NULL);
+	blkdev_put(bdev, FMODE_READ);
 
 out:
 	TRACE_EXIT();
@@ -561,46 +529,51 @@
 
 static void vdisk_check_tp_support(struct scst_vdisk_dev *virt_dev)
 {
+	struct block_device *bdev = NULL;
 	struct file *fd = NULL;
 	bool fd_open = false;
+	int res;
 
 	TRACE_ENTRY();
 
 	virt_dev->dev_thin_provisioned = 0;
 
-	if (virt_dev->rd_only || (virt_dev->filename == NULL) || !virt_dev->dev_active)
+	if (virt_dev->rd_only || !virt_dev->filename || !virt_dev->dev_active)
 		goto check;
 
-	fd = filp_open(virt_dev->filename, O_LARGEFILE, 0600);
-	if (IS_ERR(fd)) {
-		if ((PTR_ERR(fd) == -EMEDIUMTYPE) && virt_dev->blockio)
-			TRACE(TRACE_MINOR, "Unable to open %s with EMEDIUMTYPE, "
-				"DRBD passive?", virt_dev->filename);
+	if (virt_dev->blockio) {
+		bdev = blkdev_get_by_path(virt_dev->filename, FMODE_READ,
+					  (void *)__func__);
+		res = PTR_ERR_OR_ZERO(bdev);
+	} else {
+		fd = filp_open(virt_dev->filename, O_LARGEFILE, 0600);
+		res = PTR_ERR_OR_ZERO(fd);
+	}
+	if (res) {
+		if (res == -EMEDIUMTYPE && virt_dev->blockio)
+			TRACE(TRACE_MINOR,
+			      "Unable to open %s with EMEDIUMTYPE, DRBD passive?",
+			      virt_dev->filename);
 		else
-			PRINT_ERROR("filp_open(%s) failed: %ld",
-				virt_dev->filename, PTR_ERR(fd));
+			PRINT_ERROR("opening %s failed: %d",
+				    virt_dev->filename, res);
 		goto check;
 	}
+
 	fd_open = true;
 
 	if (virt_dev->blockio) {
-		struct inode *inode = file_inode(fd);
-
-		if (!S_ISBLK(inode->i_mode)) {
-			PRINT_ERROR("%s is NOT a block device",
-				virt_dev->filename);
-			goto check;
-		}
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) || (defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 6)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 19, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
 		virt_dev->dev_thin_provisioned =
-			blk_queue_discard(bdev_get_queue(inode->i_bdev));
+			blk_queue_discard(bdev_get_queue(bdev));
+#else
+		virt_dev->dev_thin_provisioned =
+			!!bdev_max_discard_sectors(bdev);
 #endif
 	} else {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
 		virt_dev->dev_thin_provisioned = (fd->f_op->fallocate != NULL);
-#else
-		virt_dev->dev_thin_provisioned = 0;
-#endif
 	}
 
 check:
@@ -616,7 +589,6 @@
 		if (virt_dev->thin_provisioned)
 			PRINT_INFO("Auto enable thin provisioning for device "
 				"%s", virt_dev->filename);
-
 	}
 
 	if (virt_dev->thin_provisioned) {
@@ -632,12 +604,10 @@
 		}
 
 		if (virt_dev->blockio) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) || \
-	(defined(RHEL_MAJOR) && RHEL_MAJOR -0 >= 6)
 			struct request_queue *q;
 
 			sBUG_ON(!fd_open);
-			q = bdev_get_queue(file_inode(fd)->i_bdev);
+			q = bdev_get_queue(bdev);
 			virt_dev->unmap_opt_gran = q->limits.discard_granularity >> block_shift;
 			virt_dev->unmap_align = q->limits.discard_alignment >> block_shift;
 			if (virt_dev->unmap_opt_gran == virt_dev->unmap_align)
@@ -646,9 +616,6 @@
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
 			virt_dev->discard_zeroes_data = q->limits.discard_zeroes_data;
 #endif
-#else
-			sBUG();
-#endif
 		} else {
 			virt_dev->unmap_opt_gran = 1;
 			virt_dev->unmap_align = 0;
@@ -670,8 +637,12 @@
 			virt_dev->discard_zeroes_data);
 	}
 
-	if (fd_open)
-		filp_close(fd, NULL);
+	if (fd_open) {
+		if (virt_dev->blockio)
+			blkdev_put(bdev, FMODE_READ);
+		else
+			filp_close(fd, NULL);
+	}
 
 	TRACE_EXIT();
 	return;
@@ -681,69 +652,46 @@
 static int vdisk_get_file_size(const struct scst_vdisk_dev *virt_dev,
 	loff_t *file_size)
 {
-	struct inode *inode;
-	int res = 0;
-	struct file *fd;
+	loff_t res;
 
 	TRACE_ENTRY();
 
 	sBUG_ON(!virt_dev->filename);
 
 	if (!virt_dev->dev_active) {
-		TRACE_DBG("Not active dev %s, skip reexaming", virt_dev->dev->virt_name);
+		TRACE_DBG("Not active dev %s, skip reexaming",
+			  virt_dev->dev->virt_name);
 		res = -EMEDIUMTYPE;
 		goto out;
 	}
 
-	*file_size = 0;
-
-	fd = filp_open(virt_dev->filename, O_LARGEFILE | O_RDONLY, 0600);
-	if (IS_ERR(fd)) {
-		res = PTR_ERR(fd);
-		if ((res == -EMEDIUMTYPE) && virt_dev->blockio)
-			TRACE(TRACE_MINOR, "Unable to open %s with EMEDIUMTYPE, "
-				"DRBD passive?", virt_dev->filename);
-		else
-			PRINT_ERROR("filp_open(%s) failed: %d", virt_dev->filename, res);
+	res = scst_file_or_bdev_size(virt_dev->filename);
+	if (res == -EMEDIUMTYPE && virt_dev->blockio) {
+		TRACE(TRACE_MINOR,
+		      "Unable to open %s with EMEDIUMTYPE, DRBD passive?",
+		      virt_dev->filename);
 		goto out;
 	}
-
-	inode = file_inode(fd);
-
-	if (virt_dev->blockio && !S_ISBLK(inode->i_mode)) {
-		PRINT_ERROR("File %s is NOT a block device", virt_dev->filename);
-		res = -EINVAL;
-		goto out_close;
+	if (res < 0) {
+		PRINT_ERROR("opening %s failed: %lld", virt_dev->filename, res);
+		goto out;
 	}
-
-	if (S_ISREG(inode->i_mode)) {
-		/* Nothing to do */
-	} else if (S_ISBLK(inode->i_mode)) {
-		inode = inode->i_bdev->bd_inode;
-	} else {
-		PRINT_ERROR("File %s unsupported mode: mode=0%o\n",
-			    virt_dev->filename, inode->i_mode);
-		res = -EINVAL;
-		goto out_close;
-	}
-
-	*file_size = inode->i_size;
-
-out_close:
-	filp_close(fd, NULL);
+	*file_size = res;
+	res = 0;
 
 out:
 	TRACE_EXIT_RES(res);
 	return res;
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static struct scst_vdisk_dev *vdev_find(const char *name)
 {
 	struct scst_vdisk_dev *res, *vv;
 
 	TRACE_ENTRY();
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	res = NULL;
 	list_for_each_entry(vv, &vdev_list, vdev_list_entry) {
 		if (strcmp(vv->name, name) == 0) {
@@ -1010,28 +958,20 @@
 {
 	int res;
 	struct scst_device *dev = virt_dev->dev;
-	struct inode *inode;
-	struct file *fd;
+	struct block_device *bdev;
 	struct blk_integrity *bi;
 	const char *bi_profile_name;
 
 	TRACE_ENTRY();
 
-	fd = vdev_open_fd(virt_dev, virt_dev->filename, virt_dev->rd_only);
-	if (IS_ERR(fd)) {
-		res = -EINVAL;
+	bdev = blkdev_get_by_path(virt_dev->filename, FMODE_READ,
+				  (void *)__func__);
+	if (IS_ERR(bdev)) {
+		res = PTR_ERR(bdev);
 		goto out;
 	}
 
-	inode = file_inode(fd);
-
-	if (!S_ISBLK(inode->i_mode)) {
-		PRINT_ERROR("%s is NOT a block device!", virt_dev->filename);
-		res = -EINVAL;
-		goto out_close;
-	}
-
-	bi = bdev_get_integrity(inode->i_bdev);
+	bi = bdev_get_integrity(bdev);
 	if (bi == NULL) {
 		TRACE_DBG("Block integrity not supported");
 		goto out_no_bi;
@@ -1103,7 +1043,7 @@
 	res = 0;
 
 out_close:
-	filp_close(fd, NULL);
+	blkdev_put(bdev, FMODE_READ);
 
 out:
 	TRACE_EXIT_RES(res);
@@ -1219,7 +1159,7 @@
 		}
 	}
 
-	if (virt_dev->dif_filename != NULL) {
+	if (virt_dev->dev_active && virt_dev->dif_filename != NULL) {
 		/* Check if it can be used */
 		struct file *dfd = vdev_open_fd(virt_dev, virt_dev->dif_filename,
 					virt_dev->rd_only);
@@ -1251,12 +1191,6 @@
 		}
 	}
 
-	if (virt_dev->zero_copy && virt_dev->o_direct_flag) {
-		PRINT_ERROR("%s: combining zero_copy with o_direct is not"
-			    " supported", virt_dev->filename);
-		res = -EINVAL;
-		goto out;
-	}
 	if (!virt_dev->async && virt_dev->o_direct_flag) {
 		PRINT_ERROR("%s: using o_direct without setting async is not"
 			    " supported", virt_dev->filename);
@@ -1356,21 +1290,39 @@
 	return;
 }
 
+static bool vdisk_is_open(const struct scst_vdisk_dev *virt_dev)
+{
+	return virt_dev->fd || virt_dev->bdev;
+}
+
 static int vdisk_open_fd(struct scst_vdisk_dev *virt_dev, bool read_only)
 {
 	int res;
 
 	sBUG_ON(!virt_dev->filename);
-	sBUG_ON(virt_dev->fd);
+	sBUG_ON(vdisk_is_open(virt_dev));
 
-	virt_dev->fd = vdev_open_fd(virt_dev, virt_dev->filename, read_only);
-	if (IS_ERR(virt_dev->fd)) {
-		res = PTR_ERR(virt_dev->fd);
+	if (!virt_dev->dev_active) {
+		TRACE_MGMT_DBG("Skip opening for not active dev %s",
+			       virt_dev->dev->virt_name);
+		res = -EMEDIUMTYPE;
+	} else if (virt_dev->blockio) {
+		virt_dev->bdev_mode = FMODE_READ;
+		if (!read_only)
+			virt_dev->bdev_mode |= FMODE_WRITE;
+		virt_dev->bdev = blkdev_get_by_path(virt_dev->filename,
+					virt_dev->bdev_mode, (void *)__func__);
+		res = PTR_ERR_OR_ZERO(virt_dev->bdev);
+	} else {
+		virt_dev->fd = vdev_open_fd(virt_dev, virt_dev->filename,
+					    read_only);
+		res = PTR_ERR_OR_ZERO(virt_dev->fd);
+	}
+	if (res) {
+		virt_dev->bdev = NULL;
 		virt_dev->fd = NULL;
 		goto out;
 	}
-	virt_dev->bdev = virt_dev->blockio ? file_inode(virt_dev->fd)->i_bdev : NULL;
-	res = 0;
 
 	/*
 	 * For block devices, get the optimal I/O size from the block device
@@ -1390,27 +1342,34 @@
 		}
 	}
 
-	TRACE_DBG("virt_dev %s: fd %p open (dif_fd %p)", virt_dev->name,
-		virt_dev->fd, virt_dev->dif_fd);
+	TRACE_DBG("virt_dev %s: fd %p %p open (dif_fd %p)", virt_dev->name,
+		  virt_dev->fd, virt_dev->bdev, virt_dev->dif_fd);
 
 out:
 	return res;
 
 out_close_fd:
-	filp_close(virt_dev->fd, NULL);
-	virt_dev->fd = NULL;
+	if (virt_dev->blockio) {
+		blkdev_put(virt_dev->bdev, virt_dev->bdev_mode);
+		virt_dev->bdev = NULL;
+	} else {
+		filp_close(virt_dev->fd, NULL);
+		virt_dev->fd = NULL;
+	}
 	goto out;
 }
 
 static void vdisk_close_fd(struct scst_vdisk_dev *virt_dev)
 {
-	TRACE_DBG("virt_dev %s: closing fd %p (dif_fd %p)", virt_dev->name,
-		virt_dev->fd, virt_dev->dif_fd);
+	TRACE_DBG("virt_dev %s: closing fd %p %p (dif_fd %p)", virt_dev->name,
+		  virt_dev->fd, virt_dev->bdev, virt_dev->dif_fd);
 
-	if (virt_dev->fd) {
+	if (virt_dev->bdev) {
+		blkdev_put(virt_dev->bdev, virt_dev->bdev_mode);
+		virt_dev->bdev = NULL;
+	} else if (virt_dev->fd) {
 		filp_close(virt_dev->fd, NULL);
 		virt_dev->fd = NULL;
-		virt_dev->bdev = NULL;
 	}
 	if (virt_dev->dif_fd) {
 		filp_close(virt_dev->dif_fd, NULL);
@@ -1418,6 +1377,65 @@
 	}
 }
 
+static int vdisk_reopen_fd(struct scst_vdisk_dev *virt_dev, bool read_only)
+{
+	/*
+	 * To do: make this function transactional. That means that it either
+	 * succeeds or does not modify the state of @virt_dev.
+	 */
+	vdisk_close_fd(virt_dev);
+	return vdisk_open_fd(virt_dev, read_only);
+}
+
+static int vdisk_activate_dev(struct scst_vdisk_dev *virt_dev, bool rd_only)
+{
+	int rc = 0;
+
+	virt_dev->dev_active = 1;
+
+	/*
+	 * Only re-open FD if tgt_dev_cnt is not zero,
+	 * otherwise we will leak reference.
+	 */
+	if (virt_dev->tgt_dev_cnt) {
+		int i=0;
+		while (i<=5) {
+			rc = vdisk_open_fd(virt_dev, rd_only);
+			if (rc!=0) {
+				PRINT_ERROR("JPM vdisk_open_fd() returned rc= %d for i= %d", rc, i);
+				i++;
+				msleep(100);
+			}
+			else {
+				break;
+			}
+		}
+
+		if (rc) {
+			PRINT_ERROR("vdev %s: Unable to open FD: rd_only=%d, rc=%d",
+			         virt_dev->name, rd_only, rc);
+			virt_dev->dev_active = 0;
+			goto out;
+		}
+	}
+
+	if (virt_dev->reexam_pending) {
+		rc = vdisk_reexamine(virt_dev);
+		WARN_ON(rc != 0);
+		virt_dev->reexam_pending = 0;
+	}
+
+out:
+	return rc;
+}
+
+static void vdisk_disable_dev(struct scst_vdisk_dev *virt_dev)
+{
+	/* Close the FD here */
+	vdisk_close_fd(virt_dev);
+	virt_dev->dev_active = 0;
+}
+
 /* Invoked with scst_mutex held, so no further locking is necessary here. */
 static int vdisk_attach_tgt(struct scst_tgt_dev *tgt_dev)
 {
@@ -1430,7 +1448,7 @@
 
 	virt_dev->tgt_dev_cnt++;
 
-	if (virt_dev->fd != NULL)
+	if (vdisk_is_open(virt_dev))
 		goto out;
 
 	if (!virt_dev->nullio && !virt_dev->cdrom_empty) {
@@ -1446,6 +1464,7 @@
 		}
 	} else {
 		virt_dev->fd = NULL;
+		virt_dev->bdev = NULL;
 		virt_dev->dif_fd = NULL;
 	}
 
@@ -1485,15 +1504,11 @@
 
 	/* BLOCKIO can be here for DIF tags fsync */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
-	res = sync_page_range(file_inode(file), file->f_mapping, loff, len);
-#else
 #if 0	/* For sparse files we might need to sync metadata as well */
 	res = generic_write_sync(file, loff, len);
 #else
 	res = filemap_write_and_wait_range(file->f_mapping, loff, len);
 #endif
-#endif
 	if (unlikely(res != 0)) {
 		PRINT_ERROR("sync range failed (%d)", res);
 		if (cmd != NULL) {
@@ -1801,7 +1816,6 @@
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
 	TRACE_DBG("Fallocating range %lld, len %lld",
 		(unsigned long long)off, (unsigned long long)len);
 
@@ -1819,9 +1833,6 @@
 			SCST_LOAD_SENSE(scst_sense_write_error));
 		res = -EIO;
 	}
-#else
-	res = 0;
-#endif
 
 	TRACE_EXIT_RES(res);
 	return res;
@@ -1830,12 +1841,7 @@
 static int vdisk_unmap_range(struct scst_cmd *cmd,
 	struct scst_vdisk_dev *virt_dev, uint64_t start_lba, uint64_t blocks)
 {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27)
 	int res, err;
-#else
-	int res;
-#endif
-	struct file *fd = virt_dev->fd;
 
 	TRACE_ENTRY();
 
@@ -1856,25 +1862,12 @@
 		  (unsigned long long)start_lba, blocks);
 
 	if (virt_dev->blockio) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27)
+		struct block_device *bdev = virt_dev->bdev;
 		sector_t start_sector = start_lba << (cmd->dev->block_shift - 9);
 		sector_t nr_sects = blocks << (cmd->dev->block_shift - 9);
-		struct inode *inode = file_inode(fd);
 		gfp_t gfp = cmd->cmd_gfp_mask;
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 31)
-		err = blkdev_issue_discard(inode->i_bdev, start_sector, nr_sects, gfp);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)       \
-      && !(LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 34) \
-	   && defined(CONFIG_SUSE_KERNEL))
-		err = blkdev_issue_discard(inode->i_bdev, start_sector, nr_sects,
-				gfp, DISCARD_FL_WAIT);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-		err = blkdev_issue_discard(inode->i_bdev, start_sector, nr_sects,
-				gfp, BLKDEV_IFL_WAIT);
-#else
-		err = blkdev_issue_discard(inode->i_bdev, start_sector, nr_sects, gfp, 0);
-#endif
+		err = blkdev_issue_discard(bdev, start_sector, nr_sects, gfp);
 		if (unlikely(err != 0)) {
 			PRINT_ERROR("blkdev_issue_discard() for "
 				"LBA %lld, blocks %lld failed: %d",
@@ -1884,14 +1877,10 @@
 			res = -EIO;
 			goto out;
 		}
-#else
-		scst_set_cmd_error(cmd, SCST_LOAD_SENSE(scst_sense_invalid_opcode));
-		res = -EIO;
-		goto out;
-#endif
 	} else {
 		loff_t off = start_lba << cmd->dev->block_shift;
 		loff_t len = blocks << cmd->dev->block_shift;
+		struct file *fd = virt_dev->fd;
 
 		res = vdisk_unmap_file_range(cmd, virt_dev, off, len, fd);
 		if (unlikely(res != 0))
@@ -2840,27 +2829,9 @@
 struct bio_priv_sync {
 	struct completion c;
 	int error;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	struct bio_set *bs;
-	struct completion c1;
-#endif
 };
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-static void blockio_bio_destructor_sync(struct bio *bio)
-{
-	struct bio_priv_sync *s = bio->bi_private;
-
-	bio_free(bio, s->bs);
-	complete(&s->c1);
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-static int blockio_end_sync_io(struct bio *bio, unsigned int bytes_done,
-			       int error)
-{
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 static void blockio_end_sync_io(struct bio *bio, int error)
 {
 #else
@@ -2875,11 +2846,6 @@
 #endif
 	struct bio_priv_sync *s = bio->bi_private;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-	if (bio->bi_size)
-		return 1;
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 	if (!bio_flagged(bio, BIO_UPTODATE) && error == 0) {
 		PRINT_ERROR("Not up to date bio with error 0; returning -EIO");
@@ -2890,11 +2856,7 @@
 	s->error = error;
 	complete(&s->c);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-	return 0;
-#else
 	return;
-#endif
 }
 
 /*
@@ -2913,10 +2875,6 @@
 {
 	struct bio_priv_sync s = {
 		COMPLETION_INITIALIZER_ONSTACK(s.c), 0,
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-		virt_dev->vdisk_bioset,
-		COMPLETION_INITIALIZER_ONSTACK(s.c1)
-#endif
 	};
 	struct block_device *bdev = virt_dev->bdev;
 	const bool is_vmalloc = is_vmalloc_addr(buf);
@@ -2926,22 +2884,15 @@
 	int max_nr_vecs, rc;
 	unsigned int bytes, off;
 	ssize_t ret = -ENOMEM;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	bool submitted = false;
-#endif
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
-	max_nr_vecs = BIO_MAX_PAGES;
+	max_nr_vecs = BIO_MAX_VECS;
 #else
-	max_nr_vecs = min(bio_get_nr_vecs(bdev), BIO_MAX_PAGES);
+	max_nr_vecs = min(bio_get_nr_vecs(bdev), BIO_MAX_VECS);
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
-	bio = bio_alloc_bioset(GFP_KERNEL, max_nr_vecs, virt_dev->vdisk_bioset);
-#else
-	bio = bio_alloc(GFP_KERNEL, max_nr_vecs);
-#endif
-
+	bio = bio_alloc_bioset(/*bdev=*/NULL, max_nr_vecs, /*opf=*/0,
+			       GFP_KERNEL, virt_dev->vdisk_bioset);
 	if (!bio)
 		goto out;
 
@@ -2955,9 +2906,6 @@
 	bio_set_dev(bio, bdev);
 	bio->bi_end_io = blockio_end_sync_io;
 	bio->bi_private = &s;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	bio->bi_destructor = blockio_bio_destructor_sync;
-#endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
 	bio->bi_sector = *loff >> 9;
 #else
@@ -2985,9 +2933,6 @@
 #else
 	submit_bio(bio);
 #endif
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	submitted = true;
-#endif
 	wait_for_completion(&s.c);
 	ret = (unsigned long)s.error;
 	if (likely(ret == 0)) {
@@ -2997,10 +2942,6 @@
 
 free:
 	bio_put(bio);
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	if (submitted)
-		wait_for_completion(&s.c1);
-#endif
 
 out:
 	return ret;
@@ -3166,28 +3107,74 @@
 	}
 }
 
-static bool vdisk_alloc_async_kvec(struct scst_cmd *cmd,
+static bool vdisk_alloc_async_bvec(struct scst_cmd *cmd,
 				   struct vdisk_cmd_params *p)
 {
 	int n;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)
+	n = scst_get_buf_page_count(cmd);
+#else
 	n = scst_get_buf_count(cmd);
-	if (n <= ARRAY_SIZE(p->async.small_kvec)) {
-		p->async.kvec = &p->async.small_kvec[0];
+#endif
+	if (n <= ARRAY_SIZE(p->async.small_bvec)) {
+		p->async.bvec = &p->async.small_bvec[0];
 		return true;
 	}
 
-	p->async.kvec = kmalloc_array(n, sizeof(*p->async.kvec),
+	p->async.bvec = kmalloc_array(n, sizeof(*p->async.bvec),
 				      cmd->cmd_gfp_mask);
-	if (p->async.kvec == NULL) {
-		PRINT_ERROR("Unable to allocate kvec (%d)", n);
+	if (p->async.bvec == NULL) {
+		PRINT_ERROR("Unable to allocate bvec (%d)", n);
 		return false;
 	}
 
 	return true;
 }
 
-static void fileio_async_complete(struct kiocb *iocb, long ret, long ret2)
+static inline
+struct bio_vec *vdisk_map_pages_to_bvec(struct bio_vec *bvec, struct page *page,
+					ssize_t length, int offset)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)
+	ssize_t page_len = min_t(ssize_t, length, PAGE_SIZE - offset);
+#else
+	ssize_t page_len = length;
+#endif
+
+	*bvec++ = (struct bio_vec) {
+		.bv_page   = page,
+		.bv_offset = offset,
+		.bv_len    = page_len,
+	};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)
+	length -= page_len;
+
+	while (length > 0) {
+		page++;
+		page_len = min_t(ssize_t, length, PAGE_SIZE);
+
+		*bvec++ = (struct bio_vec) {
+			.bv_page   = page,
+			.bv_offset = 0,
+			.bv_len    = page_len,
+		};
+
+		length -= page_len;
+	}
+#endif
+
+	return bvec;
+}
+
+static void fileio_async_complete(struct kiocb *iocb, long ret
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 2))
+				  , long ret2
+#endif
+				  )
 {
 	struct vdisk_cmd_params *p = container_of(iocb, typeof(*p), async.iocb);
 	struct scst_cmd *cmd = p->cmd;
@@ -3211,12 +3198,12 @@
 			w->cmd = cmd;
 			schedule_work(&w->work);
 			return;
-		} else {
-			scst_set_busy(cmd);
 		}
+
+		scst_set_busy(cmd);
 	}
 	cmd->completed = 1;
-	cmd->scst_cmd_done(cmd, SCST_CMD_STATE_DEFAULT, SCST_CONTEXT_SAME);
+	cmd->scst_cmd_done(cmd, SCST_CMD_STATE_DEFAULT, scst_estimate_context());
 }
 
 static enum compl_status_e fileio_exec_async(struct vdisk_cmd_params *p)
@@ -3227,10 +3214,10 @@
 	struct file *fd = virt_dev->fd;
 	struct iov_iter iter = { };
 	ssize_t length, total = 0;
-	struct kvec *kvec;
+	struct bio_vec *bvec;
+	struct page *page;
 	struct kiocb *iocb = &p->async.iocb;
-	uint8_t *address;
-	int sg_cnt = 0, dir, ret;
+	int offset, sg_cnt = 0, dir, ret;
 
 	switch (cmd->data_direction) {
 	case SCST_DATA_READ:
@@ -3244,28 +3231,32 @@
 		return CMD_FAILED;
 	}
 
-	if (!vdisk_alloc_async_kvec(cmd, p)) {
+	if (!vdisk_alloc_async_bvec(cmd, p)) {
 		scst_set_busy(cmd);
 		return CMD_SUCCEEDED;
 	}
 
 	p->execute_async = true;
 
-	kvec = p->async.kvec;
-	length = scst_get_buf_first(cmd, &address);
+	bvec = p->async.bvec;
+	length = scst_get_sg_page_first(cmd, &page, &offset);
 	while (length) {
-		*kvec++ = (struct kvec){
-			.iov_base = address,
-			.iov_len = length,
-		};
+		bvec = vdisk_map_pages_to_bvec(bvec, page, length, offset);
+
 		total += length;
 		sg_cnt++;
-		length = scst_get_buf_next(cmd, &address);
+		length = scst_get_sg_page_next(cmd, &page, &offset);
 	}
 
 	WARN_ON_ONCE(sg_cnt != cmd->sg_cnt);
 
-	iov_iter_kvec(&iter, dir, p->async.kvec, sg_cnt, total);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) || 	\
+		(defined(RHEL_RELEASE_CODE) && 		\
+		 RHEL_RELEASE_CODE -0 >= RHEL_RELEASE_VERSION(8, 2))
+	iov_iter_bvec(&iter, dir, p->async.bvec, sg_cnt, total);
+#else
+	iov_iter_bvec(&iter, ITER_BVEC | dir, p->async.bvec, sg_cnt, total);
+#endif
 	*iocb = (struct kiocb) {
 		.ki_pos = p->loff,
 		.ki_filp = fd,
@@ -3287,10 +3278,17 @@
 		else
 			break;
 	}
-	if (p->async.kvec != p->async.small_kvec)
-		kfree(p->async.kvec);
-	if (ret != -EIOCBQUEUED)
+	if (p->async.bvec != p->async.small_bvec)
+		kfree(p->async.bvec);
+	if (ret != -EIOCBQUEUED) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 2))
 		fileio_async_complete(iocb, ret, 0);
+#else
+		fileio_async_complete(iocb, ret);
+#endif
+	}
 	/*
 	 * Return RUNNING_ASYNC even if fileio_async_complete() has been
 	 * called because that function calls cmd->scst_cmd_done().
@@ -3414,7 +3412,7 @@
 	if (unlikely(!vdisk_parse_offset(&p, cmd)))
 		goto err;
 
-	if (unlikely(virt_dev->fd == NULL)) {
+	if (unlikely(virt_dev->bdev == NULL)) {
 		if (!vdisk_no_fd_allowed_commands(cmd)) {
 			/*
 			 * We should not get here, unless the user space
@@ -4542,7 +4540,6 @@
 static int vdisk_set_wt(struct scst_vdisk_dev *virt_dev, int wt, bool read_only)
 {
 	int res = 0;
-	struct file *fd, *dif_fd = NULL;
 	bool old_wt = virt_dev->wt_flag;
 
 	TRACE_ENTRY();
@@ -4554,42 +4551,20 @@
 	virt_dev->wt_flag = wt;
 	spin_unlock(&virt_dev->flags_lock);
 
-	if (virt_dev->fd == NULL)
-		goto out;
-
 	/*
-	 * MODE SELECT is strictly serialized command, so it's safe here
-	 * to reopen fd.
+	 * MODE SELECT is a strictly serialized command so it's safe to reopen
+	 * the fd.
 	 */
-
-	fd = vdev_open_fd(virt_dev, virt_dev->filename, read_only);
-	if (IS_ERR(fd)) {
-		res = PTR_ERR(fd);
-		goto out_err;
+	if (vdisk_is_open(virt_dev)) {
+		res = vdisk_reopen_fd(virt_dev, read_only);
+		if (res < 0)
+			goto out_err;
 	}
 
-	if (virt_dev->dif_filename != NULL) {
-		dif_fd = vdev_open_fd(virt_dev, virt_dev->dif_filename, read_only);
-		if (IS_ERR(dif_fd)) {
-			res = PTR_ERR(dif_fd);
-			goto out_err_close_fd;
-		}
-	}
-
-	filp_close(virt_dev->fd, NULL);
-	if (virt_dev->dif_fd)
-		filp_close(virt_dev->dif_fd, NULL);
-
-	virt_dev->fd = fd;
-	virt_dev->dif_fd = dif_fd;
-
 out:
 	TRACE_EXIT_RES(res);
 	return res;
 
-out_err_close_fd:
-	filp_close(fd, NULL);
-
 out_err:
 	spin_lock(&virt_dev->flags_lock);
 	virt_dev->wt_flag = old_wt;
@@ -4976,17 +4951,6 @@
 	return CMD_SUCCEEDED;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
-/*
- * See also patch "block: Export I/O topology for block devices and partitions"
- * (commit ID c72758f33784).
- */
-static inline unsigned int queue_physical_block_size(struct request_queue *q)
-{
-	return 4096;
-}
-#endif
-
 static enum compl_status_e vdisk_exec_read_capacity16(struct vdisk_cmd_params *p)
 {
 	struct scst_cmd *cmd = p->cmd;
@@ -5723,10 +5687,6 @@
 struct scst_blockio_work {
 	atomic_t bios_inflight;
 	struct scst_cmd *cmd;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	/* just to avoid extra dereferences */
-	struct bio_set *bioset;
-#endif
 };
 
 static inline void blockio_check_finish(struct scst_blockio_work *blockio_work)
@@ -5770,20 +5730,7 @@
 	kmem_cache_free(blockio_work_cachep, blockio_work);
 }
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-static void blockio_bio_destructor(struct bio *bio)
-{
-	struct scst_blockio_work *blockio_work = bio->bi_private;
-
-	bio_free(bio, blockio_work->bioset);
-	blockio_check_finish(blockio_work);
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-static int blockio_endio(struct bio *bio, unsigned int bytes_done, int error)
-{
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 static void blockio_endio(struct bio *bio, int error)
 {
 #else
@@ -5798,11 +5745,6 @@
 #endif
 	struct scst_blockio_work *blockio_work = bio->bi_private;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-	if (bio->bi_size)
-		return 1;
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 	if (unlikely(!bio_flagged(bio, BIO_UPTODATE))) {
 		if (error == 0) {
@@ -5826,9 +5768,7 @@
 		 */
 		spin_lock_irqsave(&vdev_err_lock, flags);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-		if (bio->bi_rw & (1 << BIO_RW)) {
-#elif (!defined(CONFIG_SUSE_KERNEL) &&			 \
+#if (!defined(CONFIG_SUSE_KERNEL) &&			 \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) || \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 		if (bio->bi_rw & REQ_WRITE) {
@@ -5851,27 +5791,15 @@
 		spin_unlock_irqrestore(&vdev_err_lock, flags);
 	}
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)) || (LINUX_VERSION_CODE > KERNEL_VERSION(3, 6, 0))
 	blockio_check_finish(blockio_work);
-#endif
 
 	bio_put(bio);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-	return 0;
-#else
 	return;
-#endif
 }
 
 static void vdisk_bio_set_failfast(struct bio *bio)
 {
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 27)
-	bio->bi_rw |= (1 << BIO_RW_FAILFAST);
-#elif LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 35)
-	bio->bi_rw |= (1 << BIO_RW_FAILFAST_DEV) |
-		      (1 << BIO_RW_FAILFAST_TRANSPORT) |
-		      (1 << BIO_RW_FAILFAST_DRIVER);
-#elif (!defined(CONFIG_SUSE_KERNEL) &&			 \
+#if (!defined(CONFIG_SUSE_KERNEL) &&			 \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) || \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 	bio->bi_rw |= REQ_FAILFAST_DEV |
@@ -5889,36 +5817,20 @@
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) ||			\
 defined(CONFIG_SUSE_KERNEL) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
 	bio->bi_opf |= REQ_SYNC;
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) ||			\
-	(defined(RHEL_MAJOR) &&						\
-	 (RHEL_MAJOR -0 > 6 || RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 > 0))
-	bio->bi_rw |= REQ_SYNC;
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
-	bio->bi_rw |= 1 << BIO_RW_SYNCIO;
 #else
-	bio->bi_rw |= 1 << BIO_RW_SYNC;
+	bio->bi_rw |= REQ_SYNC;
 #endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) ||			\
 defined(CONFIG_SUSE_KERNEL) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
 	bio->bi_opf |= REQ_PRIO;
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) ||			\
-	(defined(RHEL_MAJOR) &&						\
-	 (RHEL_MAJOR -0 > 6 || RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 > 0))
+#else
 	bio->bi_rw |= REQ_META;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
 	/*
 	 * Priority boosting was separated from REQ_META in commit 65299a3b
 	 * (kernel 3.1.0).
 	 */
 	bio->bi_rw |= REQ_PRIO;
 #endif
-#elif !defined(RHEL_MAJOR) || RHEL_MAJOR -0 >= 6
-	/*
-	 * BIO_* and REQ_* flags were unified in commit 7b6d91da (kernel
-	 * 2.6.36).
-	 */
-	bio->bi_rw |= BIO_RW_META;
-#endif
 }
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -5964,7 +5876,7 @@
 	}
 
 	bip = bio_integrity_alloc(bio, gfp_mask, pages);
-	if (unlikely(bip == NULL)) {
+	if (IS_ERR_OR_NULL(bip)) {
 		PRINT_WARNING("Allocation of %d pages for DIF tags "
 			"failed! (dev %s)", pages, dev->virt_name);
 		goto out; /* proceed without integrity */
@@ -6033,9 +5945,7 @@
 	struct scst_vdisk_dev *virt_dev = dev->dh_priv;
 	int block_shift = dev->block_shift;
 	struct block_device *bdev = virt_dev->bdev;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 	struct bio_set *bs = virt_dev->vdisk_bioset;
-#endif
 	struct request_queue *q = bdev_get_queue(bdev);
 	int length, max_nr_vecs = 0, offset;
 	struct page *page;
@@ -6044,9 +5954,7 @@
 	struct scst_blockio_work *blockio_work;
 	int bios = 0;
 	gfp_t gfp_mask = cmd->cmd_gfp_mask;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
 	struct blk_plug plug;
-#endif
 	struct scatterlist *dsg;
 	int dsg_offs, dsg_len;
 	bool dif = virt_dev->blk_integrity &&
@@ -6082,15 +5990,12 @@
 #endif
 
 	blockio_work->cmd = cmd;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-	blockio_work->bioset = bs;
-#endif
 
 	if (q)
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
-		max_nr_vecs = BIO_MAX_PAGES;
+		max_nr_vecs = BIO_MAX_VECS;
 #else
-		max_nr_vecs = min(bio_get_nr_vecs(bdev), BIO_MAX_PAGES);
+		max_nr_vecs = min(bio_get_nr_vecs(bdev), BIO_MAX_VECS);
 #endif
 	else
 		max_nr_vecs = 1;
@@ -6125,12 +6030,8 @@
 			int rc;
 
 			if (need_new_bio) {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
-				bio = bio_alloc_bioset(gfp_mask, max_nr_vecs, bs);
-#else
-				bio = bio_alloc(gfp_mask, max_nr_vecs);
-#endif
-
+				bio = bio_alloc_bioset(/*bdev=*/NULL, max_nr_vecs, /*opf=*/0,
+						       gfp_mask, bs);
 				if (!bio) {
 					PRINT_ERROR("Failed to create bio "
 						"for data segment %d (cmd %p)",
@@ -6149,13 +6050,8 @@
 #endif
 				bio_set_dev(bio, bdev);
 				bio->bi_private = blockio_work;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 6, 0))
-				bio->bi_destructor = blockio_bio_destructor;
-#endif
 				if (write)
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-					bio->bi_rw |= (1 << BIO_RW);
-#elif (!defined(CONFIG_SUSE_KERNEL) &&			\
+#if (!defined(CONFIG_SUSE_KERNEL) &&			\
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) || \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 					bio->bi_rw |= REQ_WRITE;
@@ -6221,9 +6117,7 @@
 	/* +1 to prevent erroneous too early command completion */
 	atomic_set(&blockio_work->bios_inflight, bios+1);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
 	blk_start_plug(&plug);
-#endif
 
 	while (hbio) {
 		bio = hbio;
@@ -6239,12 +6133,7 @@
 #endif
 	}
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
 	blk_finish_plug(&plug);
-#else
-	if (q && q->unplug_fn)
-		q->unplug_fn(q);
-#endif
 
 	if ((dev->dev_dif_mode & SCST_DIF_MODE_DEV_STORE) &&
 	    (virt_dev->dif_fd != NULL) &&
@@ -6480,9 +6369,7 @@
 	if (!close)
 		return;
 
-	virt_dev->dev_active = 0;
-
-	vdisk_close_fd(virt_dev);
+	vdisk_disable_dev(virt_dev);
 
 	TRACE_EXIT();
 	return;
@@ -6512,36 +6399,9 @@
 	if (!open)
 		return;
 
-	virt_dev->dev_active = 1;
-
-	/*
-	 * only reopen fd if tgt_dev_cnt is not zero, otherwise we will
-	 * leak reference.
-	 */
-
-	if (virt_dev->tgt_dev_cnt) {
-		int i=0;
-		while (i<=5) {
-			rc = vdisk_open_fd(virt_dev, dev->dev_rd_only);
-			if (rc!=0) {
-				PRINT_ERROR("JPM vdisk_open_fd() returned rc= %d for i= %d", rc, i);
-				i++;
-				msleep(100);
-			}
-			else {
-				break;
-			}
-		}
-	}
-
-	if (rc == 0) {
-		if (virt_dev->reexam_pending) {
-			rc = vdisk_reexamine(virt_dev);
-			WARN_ON(rc != 0);
-			virt_dev->reexam_pending = 0;
-		}
-	} else {
-		PRINT_ERROR("dev %s: opening after ALUA state change to %s failed",
+	rc = vdisk_activate_dev(virt_dev, dev->dev_rd_only);
+	if (rc) {
+		PRINT_ERROR("dev %s: Activating after ALUA state change to %s failed",
 			    dev->virt_name,
 			    scst_alua_state_name(new_state));
 	}
@@ -6732,10 +6592,6 @@
 				(long long)be64_to_cpu(virt_dev->dif_static_app_tag_combined));
 	}
 
-	if (virt_dev->zero_copy)
-		i += snprintf(&buf[i], buf_size - i, "%sZERO_COPY",
-			(j == i) ? "(" : ", ");
-
 	if (virt_dev->async)
 		i += snprintf(&buf[i], buf_size - i, "%sASYNC",
 			(j == i) ? "(" : ", ");
@@ -6759,7 +6615,7 @@
 	sBUG_ON(virt_dev->nullio);
 	sBUG_ON(!virt_dev->filename);
 
-	if ((virt_dev->fd == NULL) || !virt_dev->dev_active) {
+	if ((!virt_dev->fd && !virt_dev->bdev) || !virt_dev->dev_active) {
 		res = -EMEDIUMTYPE;
 		goto out;
 	}
@@ -6799,7 +6655,6 @@
 	return res;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 static int vdisk_create_bioset(struct scst_vdisk_dev *virt_dev)
 {
 	int res = 0;
@@ -6822,12 +6677,8 @@
 	}
 
 	if (virt_dev->dif_mode & SCST_DIF_MODE_DEV) {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
 		/* The same, pool size doesn't really matter */
 		res = bioset_integrity_create(virt_dev->vdisk_bioset, 2);
-#else
-		res = -EOPNOTSUPP;
-#endif
 		if (res != 0) {
 			PRINT_ERROR("Failed to create integrity bioset "
 				"(dev %s)", virt_dev->name);
@@ -6860,7 +6711,6 @@
 	bioset_free(virt_dev->vdisk_bioset);
 #endif
 }
-#endif
 
 static void vdev_inq_changed_fn(struct work_struct *work)
 {
@@ -6879,7 +6729,6 @@
 	return;
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static int vdev_create_node(struct scst_dev_type *devt,
 	const char *name, int nodeid, struct scst_vdisk_dev **res_virt_dev)
 {
@@ -6887,6 +6736,8 @@
 	struct scst_vdisk_dev *virt_dev, *vv;
 	uint64_t dev_id_num;
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	res = -EEXIST;
 	if (vdev_find(name))
 		goto out;
@@ -6983,9 +6834,7 @@
 
 static void vdev_destroy(struct scst_vdisk_dev *virt_dev)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 	vdisk_free_bioset(virt_dev);
-#endif
 	kfree(virt_dev->filename);
 	kfree(virt_dev->dif_filename);
 	kfree(virt_dev);
@@ -7160,8 +7009,8 @@
 				}
 				if (dd == NULL)
 					break;
-				else
-					*dd = '|';
+
+				*dd = '|';
 				d = dd+1;
 			}
 			TRACE_DBG("DIF DEV mode %x", virt_dev->dif_mode);
@@ -7239,8 +7088,6 @@
 			virt_dev->thin_provisioned_manually_set = 1;
 			TRACE_DBG("THIN PROVISIONED %d",
 				virt_dev->thin_provisioned);
-		} else if (!strcasecmp("zero_copy", p)) {
-			virt_dev->zero_copy = !!ull_val;
 		} else if (!strcasecmp("async", p)) {
 			virt_dev->async = !!ull_val;
 		} else if (!strcasecmp("size", p)) {
@@ -7286,7 +7133,6 @@
 	return res;
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static int vdev_fileio_add_device(const char *device_name, char *params)
 {
 	int res = 0;
@@ -7294,6 +7140,8 @@
 
 	TRACE_ENTRY();
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	res = vdev_create(&vdisk_file_devtype, device_name, &virt_dev);
 	if (res != 0)
 		goto out;
@@ -7349,7 +7197,6 @@
 	goto out;
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static int vdev_blockio_add_device(const char *device_name, char *params)
 {
 	int res = 0;
@@ -7357,6 +7204,8 @@
 
 	TRACE_ENTRY();
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	res = vdev_create(&vdisk_blk_devtype, device_name, &virt_dev);
 	if (res != 0)
 		goto out;
@@ -7381,11 +7230,9 @@
 
 	vdev_check_node(&virt_dev, NUMA_NO_NODE);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 	res = vdisk_create_bioset(virt_dev);
 	if (res != 0)
 		goto out_destroy;
-#endif
 
 	list_add_tail(&virt_dev->vdev_list_entry, &vdev_list);
 
@@ -7413,7 +7260,6 @@
 	goto out;
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static int vdev_nullio_add_device(const char *device_name, char *params)
 {
 	int res = 0;
@@ -7421,6 +7267,8 @@
 
 	TRACE_ENTRY();
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	res = vdev_create(&vdisk_null_devtype, device_name, &virt_dev);
 	if (res != 0)
 		goto out;
@@ -7537,11 +7385,12 @@
 	cancel_work_sync(&virt_dev->vdev_inq_changed_work);
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static void vdev_del_device(struct scst_vdisk_dev *virt_dev)
 {
 	TRACE_ENTRY();
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	scst_unregister_virtual_device(virt_dev->virt_id, vdev_on_free,
 				       virt_dev);
 
@@ -7584,7 +7433,6 @@
 	return res;
 }
 
-/* scst_vdisk_mutex supposed to be held */
 static ssize_t __vcdrom_add_device(const char *device_name, char *params)
 {
 	int res = 0;
@@ -7592,6 +7440,8 @@
 
 	TRACE_ENTRY();
 
+	lockdep_assert_held(&scst_vdisk_mutex);
+
 	res = vdev_create(&vcdrom_devtype, device_name, &virt_dev);
 	if (res != 0)
 		goto out;
@@ -7690,24 +7540,20 @@
 }
 
 
-static int vcdrom_change(struct scst_vdisk_dev *virt_dev,
-	char *buffer)
+static int vcdrom_change(struct scst_vdisk_dev *virt_dev, char *buffer)
 {
 	loff_t err;
-	char *old_fn, *p, *pp;
+	char *old_fn, *p;
 	bool old_empty;
-	struct file *old_fd;
-	struct file *old_dif_fd;
 	const char *filename = NULL;
 	int length = strlen(buffer);
 	int res = 0;
 
 	TRACE_ENTRY();
 
-	TRACE_DBG("virt_dev %s, empty %d, fd %p (dif_fd %p), filename %p", virt_dev->name,
-		virt_dev->cdrom_empty, virt_dev->fd, virt_dev->dif_fd, virt_dev->filename);
-
-	sBUG_ON(virt_dev->dif_fd); /* DIF is not supported for CDROMs */
+	TRACE_DBG("virt_dev %s, empty %d, fd %p (dif_fd %p), filename %p",
+		  virt_dev->name, virt_dev->cdrom_empty, virt_dev->fd,
+		  virt_dev->dif_fd, virt_dev->filename);
 
 	if (virt_dev->prevent_allow_medium_removal) {
 		PRINT_ERROR("Prevent medium removal for "
@@ -7718,16 +7564,16 @@
 
 	p = buffer;
 
+	/* Skip leading whitespace */
 	while (isspace(*p) && *p != '\0')
 		p++;
 	filename = p;
+	/* Strip trailing whitespace */
+	WARN_ON_ONCE(length == 0);
 	p = &buffer[length-1];
-	pp = &buffer[length];
-	while (isspace(*p) && (*p != '\0')) {
-		pp = p;
+	while (p > buffer && isspace(*p))
 		p--;
-	}
-	*pp = '\0';
+	p[1] = '\0';
 
 	res = scst_suspend_activity(SCST_SUSPEND_TIMEOUT_USER);
 	if (res != 0)
@@ -7737,8 +7583,6 @@
 	mutex_lock(&scst_mutex);
 
 	old_empty = virt_dev->cdrom_empty;
-	old_fd = virt_dev->fd;
-	old_dif_fd = virt_dev->dif_fd;
 	old_fn = virt_dev->filename;
 
 	if (*filename == '\0') {
@@ -7765,24 +7609,17 @@
 		res = vdisk_get_file_size(virt_dev, &err);
 		if (res != 0)
 			goto out_free_fn;
-		if (virt_dev->fd == NULL) {
-			res = vdisk_open_fd(virt_dev, true);
+		if (!vdisk_is_open(virt_dev)) {
+			res = vdisk_reopen_fd(virt_dev, true);
 			if (res != 0)
 				goto out_free_fn;
 			sBUG_ON(!virt_dev->fd);
-
-			TRACE_DBG("Closing old_fd %p", old_fd);
-			if (old_fd != NULL)
-				filp_close(old_fd, NULL);
-			if (old_dif_fd != NULL)
-				filp_close(old_dif_fd, NULL);
-			old_fd = NULL;
-			old_dif_fd = NULL;
 		}
 	} else {
 		err = 0;
 		virt_dev->filename = NULL;
 		virt_dev->fd = NULL;
+		virt_dev->bdev = NULL;
 	}
 
 	virt_dev->file_size = err;
@@ -7820,7 +7657,6 @@
 	return res;
 
 out_free_fn:
-	virt_dev->fd = old_fd;
 	kfree(virt_dev->filename);
 	virt_dev->filename = old_fn;
 
@@ -7940,7 +7776,7 @@
 
 	virt_dev = dev->dh_priv;
 
-	queue_ua = (virt_dev->fd != NULL);
+	queue_ua = vdisk_is_open(virt_dev);
 
 	if ((new_size & ((1 << virt_dev->blk_shift) - 1)) == 0) {
 		virt_dev->file_size = new_size;
@@ -8158,9 +7994,7 @@
 	virt_dev = dev->dh_priv;
 
 	pos = sprintf(buf, "%d\n%s", virt_dev->thin_provisioned,
-		      virt_dev->thin_provisioned_manually_set &&
-		      (virt_dev->thin_provisioned !=
-		       virt_dev->dev_thin_provisioned) ?
+		      virt_dev->thin_provisioned_manually_set ?
 		      SCST_SYSFS_KEY_MARK "\n" : "");
 
 	TRACE_EXIT_RES(pos);
@@ -9142,18 +8976,10 @@
 	}
 
 	write_lock(&vdisk_serial_rwlock);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0) ||	\
-    defined(CONFIG_SUSE_KERNEL) &&			\
-    LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 76)
 	if (hex2bin(virt_dev->eui64_id, buf, count / 2) == 0)
 		virt_dev->eui64_id_len = count / 2;
 	else
 		res = -EINVAL;
-#else
-	memset(virt_dev->eui64_id, 0, sizeof(virt_dev->eui64_id));
-	hex2bin(virt_dev->eui64_id, buf, count / 2);
-	virt_dev->eui64_id_len = count / 2;
-#endif
 	write_unlock(&vdisk_serial_rwlock);
 
 	if (res >= 0)
@@ -9212,13 +9038,13 @@
 	case 2 * 8:
 		if (strchr("235", buf[0]))
 			break;
-		else
-			goto out;
+
+		goto out;
 	case 2 * 16:
 		if (strchr("6", buf[0]))
 			break;
-		else
-			goto out;
+
+		goto out;
 	default:
 		goto out;
 	}
@@ -9226,18 +9052,10 @@
 	res = count;
 
 	write_lock(&vdisk_serial_rwlock);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0) ||	\
-    defined(CONFIG_SUSE_KERNEL) &&			\
-    LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 76)
 	if (hex2bin(virt_dev->naa_id, buf, c / 2) == 0)
 		virt_dev->naa_id_len = c / 2;
 	else
 		res = -EINVAL;
-#else
-	memset(virt_dev->naa_id, 0, sizeof(virt_dev->naa_id));
-	hex2bin(virt_dev->naa_id, buf, c / 2);
-	virt_dev->naa_id_len = c / 2;
-#endif
 	write_unlock(&vdisk_serial_rwlock);
 
 	if (res >= 0)
@@ -9441,36 +9259,24 @@
 	res = kstrtol(work->buf, 0, &dev_active);
 	if (res)
 		goto unlock;
-	res = -EINVAL;
-	if (dev_active < 0 || dev_active > 1)
+
+	if (dev_active < 0 || dev_active > 1) {
+		res = -EINVAL;
 		goto unlock;
-	if (dev_active != virt_dev->dev_active) {
-		res = 0;
-		if (dev_active == 0) {
-			/* Close the FD here */
-			vdisk_close_fd(virt_dev);
-			virt_dev->dev_active = dev_active;
-		} else {
-			/* Re-open FD if tgt_dev_cnt is not zero */
-			virt_dev->dev_active = dev_active;
-			if (virt_dev->tgt_dev_cnt)
-				res = vdisk_open_fd(virt_dev, dev->dev_rd_only);
-			if (res == 0) {
-				if (virt_dev->reexam_pending) {
-					res = vdisk_reexamine(virt_dev);
-					WARN_ON(res != 0);
-					virt_dev->reexam_pending = 0;
-				}
-			} else {
-				PRINT_ERROR("Unable to open FD on active -> "
-					"%ld (dev %s): %d", dev_active,
-					dev->virt_name, res);
-				virt_dev->dev_active = 0;
-				goto unlock;
-			}
-		}
+	}
+
+	if (dev_active == virt_dev->dev_active)
+		goto unlock;
+
+	if (dev_active == 0) {
+		vdisk_disable_dev(virt_dev);
 	} else {
-		res = 0;
+		res = vdisk_activate_dev(virt_dev, dev->dev_rd_only);
+		if (res) {
+			PRINT_ERROR("dev %s: Activating failed",
+				    dev->virt_name);
+			goto unlock;
+		}
 	}
 
 unlock:
@@ -9574,25 +9380,6 @@
 	return res;
 }
 
-static ssize_t vdev_zero_copy_show(struct kobject *kobj,
-					struct kobj_attribute *attr, char *buf)
-{
-	int pos = 0;
-	struct scst_device *dev;
-	struct scst_vdisk_dev *virt_dev;
-
-	TRACE_ENTRY();
-
-	dev = container_of(kobj, struct scst_device, dev_kobj);
-	virt_dev = dev->dh_priv;
-
-	pos = sprintf(buf, "%d\n%s", virt_dev->zero_copy,
-		      virt_dev->zero_copy ? SCST_SYSFS_KEY_MARK "\n" : "");
-
-	TRACE_EXIT_RES(pos);
-	return pos;
-}
-
 static ssize_t vdev_async_store(struct kobject *kobj,
 	struct kobj_attribute *attr, const char *buf, size_t count)
 {
@@ -9733,8 +9520,6 @@
 	__ATTR(inq_vend_specific, S_IWUSR|S_IRUGO,
 	       vdev_sysfs_inq_vend_specific_show,
 	       vdev_sysfs_inq_vend_specific_store);
-static struct kobj_attribute vdev_zero_copy_attr =
-	__ATTR(zero_copy, S_IRUGO, vdev_zero_copy_show, NULL);
 static struct kobj_attribute vdev_async_attr =
 	__ATTR(async, S_IWUSR|S_IRUGO, vdev_async_show, vdev_async_store);
 
@@ -9781,7 +9566,6 @@
 	&vdev_eui64_id_attr.attr,
 	&vdev_usn_attr.attr,
 	&vdev_inq_vend_specific_attr.attr,
-	&vdev_zero_copy_attr.attr,
 	&vdev_async_attr.attr,
 	NULL,
 };
@@ -9804,7 +9588,6 @@
 	"thin_provisioned",
 	"tst",
 	"write_through",
-	"zero_copy",
 	NULL
 };
 
@@ -10106,23 +9889,13 @@
 static int __init vdev_check_mode_pages_path(void)
 {
 	int res;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	struct nameidata nd;
-#else
 	struct path path;
-#endif
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	res = path_lookup(VDEV_MODE_PAGES_DIR, 0, &nd);
-	if (res == 0)
-		scst_path_put(&nd);
-#else
 	res = kern_path(VDEV_MODE_PAGES_DIR, 0, &path);
 	if (res == 0)
 		path_put(&path);
-#endif
 	if (res != 0) {
 		PRINT_WARNING("Unable to find %s (err %d), saved mode pages "
 			"disabled. You should create this directory manually "

diff --git a/scst/scst/src/scst_copy_mgr.c b/scst/scst/src/scst_copy_mgr.c
index 68e081b..874d0da 100644
--- a/scst/scst/src/scst_copy_mgr.c
+++ b/scst/scst/src/scst_copy_mgr.c

@@ -180,11 +180,7 @@
 
 struct scst_cm_retry {
 	struct scst_cmd *cm_retry_cmd;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct work_struct cm_retry_work;
-#else
 	struct delayed_work cm_retry_work;
-#endif
 	scst_cm_retry_fn_t cm_retry_fn;
 };
 
@@ -1950,12 +1946,12 @@
 			if (l->cm_list_id_state == SCST_CM_LIST_ID_STATE_PENDING_FREE) {
 				scst_cm_del_free_list_id(l);
 				break;
-			} else {
-				TRACE_DBG("List id %d already exists", list_id);
-				scst_set_cmd_error(cmd,
-					SCST_LOAD_SENSE(scst_sense_operation_in_progress));
-				goto out_unlock_free;
 			}
+
+			TRACE_DBG("List id %d already exists", list_id);
+			scst_set_cmd_error(cmd,
+				SCST_LOAD_SENSE(scst_sense_operation_in_progress));
+			goto out_unlock_free;
 		}
 	}
 
@@ -1975,16 +1971,10 @@
 	goto out;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-void sess_cm_list_id_cleanup_work_fn(void *p)
-{
-	struct scst_session *sess = p;
-#else
 void sess_cm_list_id_cleanup_work_fn(struct work_struct *work)
 {
 	struct scst_session *sess = container_of(work,
 			struct scst_session, sess_cm_list_id_cleanup_work.work);
-#endif
 	struct scst_cm_list_id *l, *t;
 	unsigned long cur_time = jiffies;
 	unsigned long flags;
@@ -2317,6 +2307,8 @@
 	return;
 }
 
+static void scst_cm_update_dev_fini(struct scst_device *dev);
+
 static void scst_cm_init_inq_finish(struct scst_cmd *cmd)
 {
 	int length, page_len, off, rc;
@@ -2351,7 +2343,7 @@
 		PRINT_CRIT_ERROR("Unable to perform initial INQUIRY for device "
 			"%s. Copy manager for this device will be disabled",
 			dev->virt_name);
-		goto out;
+		goto out_put_ref;
 	}
 
 	length = scst_get_buf_full(cmd, &buf, false);
@@ -2359,7 +2351,7 @@
 	if (unlikely(length <= 0)) {
 		if (length < 0)
 			PRINT_ERROR("scst_get_buf_full() failed: %d", length);
-		goto out;
+		goto out_put_ref;
 	}
 
 	TRACE_BUFF_FLAG(TRACE_DEBUG, "buf", buf, length);
@@ -2435,8 +2427,11 @@
 out_put:
 	scst_put_buf_full(cmd, buf);
 
-out:
+out_put_ref:
 	percpu_ref_put(&dev->refcnt);
+
+	scst_cm_update_dev_fini(dev);
+out:
 	TRACE_EXIT();
 	return;
 }
@@ -2444,15 +2439,17 @@
 static int scst_cm_send_init_inquiry(struct scst_device *dev,
 	unsigned int unpacked_lun, struct scst_cm_init_inq_priv *priv)
 {
-	int res = -EINVAL;
 	static const uint8_t inq_cdb[6] = { INQUIRY, 1, 0x83, 0x10, 0, 0 };
 	__be64 lun;
 	struct scst_cmd *cmd;
+	int res = 0;
 
 	TRACE_ENTRY();
 
-	if (WARN_ON_ONCE(!dev))
+	if (WARN_ON_ONCE(!dev)) {
+		res = -EINVAL;
 		goto out;
+	}
 
 	if (priv == NULL) {
 		priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -2483,18 +2480,18 @@
 
 	scst_cmd_init_done(cmd, SCST_CONTEXT_THREAD);
 
-	res = 0;
-
 out:
 	TRACE_EXIT_RES(res);
 	return res;
 
 out_free:
+	percpu_ref_put(&dev->refcnt);
+
 	kfree(priv);
 	goto out;
 }
 
-/* scst_mutex supposed to be held and activities suspended */
+/* scst_mutex supposed to be held */
 static bool scst_cm_is_lun_free(unsigned int lun)
 {
 	bool res = true;
@@ -2516,7 +2513,7 @@
 	return res;
 }
 
-/* scst_mutex supposed to be held and activities suspended */
+/* scst_mutex supposed to be held */
 static unsigned int scst_cm_get_lun(const struct scst_device *dev)
 {
 	unsigned int res = SCST_MAX_LUN;
@@ -2547,11 +2544,28 @@
 	return res;
 }
 
+static uint64_t scst_cm_get_free_lun(void)
+{
+	uint64_t lun;
+
+	while (1) {
+		lun = scst_cm_next_lun++;
+		if (lun == SCST_MAX_LUN) {
+			scst_cm_next_lun = 0;
+			continue;
+		}
+
+		if (scst_cm_is_lun_free(lun))
+			break;
+	}
+
+	return lun;
+}
+
 static int scst_cm_dev_register(struct scst_device *dev, uint64_t lun)
 {
 	int res;
 	struct scst_acg_dev *acg_dev;
-	bool add_lun;
 
 	TRACE_ENTRY();
 
@@ -2559,28 +2573,16 @@
 
 	TRACE_DBG("dev %s, LUN %ld", dev->virt_name, (unsigned long)lun);
 
-	if (scst_cm_get_lun(dev) != SCST_MAX_LUN) {
-		TRACE_DBG("Copy Manager already registered device %s",
-			  dev->virt_name);
-		res = 0;
-		goto out;
-	}
-
 	if (lun == SCST_MAX_LUN) {
-		add_lun = true;
-		while (1) {
-			lun = scst_cm_next_lun++;
-			if (lun == SCST_MAX_LUN) {
-				scst_cm_next_lun = 0;
-				continue;
-			}
-			if (scst_cm_is_lun_free(lun))
-				break;
+		if (scst_cm_get_lun(dev) != SCST_MAX_LUN) {
+			TRACE_DBG("Copy Manager already registered device %s",
+				  dev->virt_name);
+			res = 0;
+			goto out;
 		}
-	} else
-		add_lun = false;
 
-	if (add_lun) {
+		lun = scst_cm_get_free_lun();
+
 		res = scst_acg_add_lun(scst_cm_tgt->default_acg,
 			scst_cm_tgt->tgt_luns_kobj, dev, lun, SCST_ADD_LUN_CM,
 			&acg_dev);
@@ -2592,6 +2594,8 @@
 	scst_block_dev(dev);
 	spin_unlock_bh(&dev->dev_lock);
 
+	atomic_set(&dev->cm_update_req_cnt, 1);
+
 	res = scst_cm_send_init_inquiry(dev, lun, NULL);
 	if (res != 0)
 		goto out_unblock;
@@ -2605,6 +2609,8 @@
 	scst_unblock_dev(dev);
 	spin_unlock_bh(&dev->dev_lock);
 
+	atomic_set(&dev->cm_update_req_cnt, 0);
+
 	scst_acg_del_lun(scst_cm_tgt->default_acg, lun, false);
 
 out_err:
@@ -2612,17 +2618,13 @@
 	goto out;
 }
 
-/* scst_mutex supposed to be held and activities suspended */
-static void scst_cm_dev_unregister(struct scst_device *dev, bool del_lun)
+static void scst_cm_dev_free_designators(struct scst_device *dev)
 {
 	struct scst_cm_desig *des, *t;
-	u32 lun;
 
 	TRACE_ENTRY();
 
-	lockdep_assert_held(&scst_mutex);
-
-	TRACE_DBG("dev %s, del_lun %d", dev->virt_name, del_lun);
+	mutex_lock(&scst_cm_mutex);
 
 	list_for_each_entry_safe(des, t, &scst_cm_desig_list, cm_desig_list_entry) {
 		if (des->desig_tgt_dev->dev == dev) {
@@ -2632,63 +2634,135 @@
 		}
 	}
 
-	if (!del_lun)
-		goto out;
+	mutex_unlock(&scst_cm_mutex);
+
+	TRACE_EXIT();
+
+	return;
+}
+
+/* scst_mutex supposed to be held */
+static void scst_cm_dev_unregister(struct scst_device *dev)
+{
+	u32 lun;
+
+	TRACE_ENTRY();
+
+	lockdep_assert_held(&scst_mutex);
+
+	TRACE_DBG("Unregister CM dev %s", dev->virt_name);
+
+	scst_cm_dev_free_designators(dev);
 
 	lun = scst_cm_get_lun(dev);
 	if (lun != SCST_MAX_LUN)
 		scst_acg_del_lun(scst_cm_tgt->default_acg, lun, false);
 
-out:
 	TRACE_EXIT();
+
 	return;
 }
 
-void scst_cm_update_dev(struct scst_device *dev)
+static int __scst_cm_update_dev(struct scst_device *dev)
 {
 	unsigned int lun;
-	int rc, res;
+	int rc = 0;
 
 	TRACE_ENTRY();
 
 	TRACE_MGMT_DBG("copy manager: updating device %s", dev->virt_name);
 
-	if (!scst_auto_cm_assignment ||
-	    !dev->handler->auto_cm_assignment_possible)
-		goto out;
-
-	res = scst_suspend_activity(SCST_SUSPEND_TIMEOUT_UNLIMITED);
-	WARN_ON_ONCE(res);
-
 	mutex_lock(&scst_mutex);
 
-	scst_cm_dev_unregister(dev, false);
+	lun = scst_cm_get_lun(dev);
+	if (lun == SCST_MAX_LUN) {
+		/*
+		 * Verify that scst_unregister_virtual_device() is in progress.
+		 */
+		WARN_ON_ONCE(!dev->remove_completion);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	scst_cm_dev_free_designators(dev);
 
 	spin_lock_bh(&dev->dev_lock);
 	scst_block_dev(dev);
 	spin_unlock_bh(&dev->dev_lock);
 
-	lun = scst_cm_get_lun(dev);
-	if (WARN_ON_ONCE(lun == SCST_MAX_LUN))
-		goto out_unblock;
-
 	rc = scst_cm_send_init_inquiry(dev, lun, NULL);
 	if (rc != 0)
 		goto out_unblock;
 
-out_resume:
+out_unlock:
 	mutex_unlock(&scst_mutex);
-	scst_resume_activity();
 
-out:
 	TRACE_EXIT();
-	return;
+
+	return rc;
 
 out_unblock:
 	spin_lock_bh(&dev->dev_lock);
 	scst_unblock_dev(dev);
 	spin_unlock_bh(&dev->dev_lock);
-	goto out_resume;
+
+	goto out_unlock;
+}
+
+static void
+scst_cm_update_dev_start(struct scst_device *dev)
+{
+	int update_req_cnt, rc;
+
+	update_req_cnt = atomic_inc_return(&dev->cm_update_req_cnt);
+	if (update_req_cnt > 1)
+		return;
+
+	rc = __scst_cm_update_dev(dev);
+	if (rc)
+		atomic_set(&dev->cm_update_req_cnt, 0);
+}
+
+static void
+scst_cm_update_dev_fini(struct scst_device *dev)
+{
+	int update_req_cnt, rc;
+
+	update_req_cnt = atomic_dec_return(&dev->cm_update_req_cnt);
+
+	WARN_ON_ONCE(update_req_cnt < 0);
+
+	if (update_req_cnt == 0)
+		return;
+
+	/*
+	 * If we have received at least one update, we must re-update the
+	 * designators information. We don't care about the exact number of
+	 * updates we've received since the inquiry was submitted, as only the
+	 * last one is indicative. So set dev->cm_update_req_cnt to 1 to avoid
+	 * unnecessary __scst_cm_update_dev() calls.
+	 */
+	atomic_set(&dev->cm_update_req_cnt, 1);
+
+	rc = __scst_cm_update_dev(dev);
+	if (rc)
+		atomic_set(&dev->cm_update_req_cnt, 0);
+}
+
+void scst_cm_update_dev(struct scst_device *dev)
+{
+	TRACE_ENTRY();
+
+	if (!scst_auto_cm_assignment ||
+	    !dev->handler->auto_cm_assignment_possible)
+		goto out;
+
+	scst_cm_update_dev_start(dev);
+
+out:
+	TRACE_EXIT();
+
+	return;
 }
 
 int scst_cm_on_dev_register(struct scst_device *dev)
@@ -2715,13 +2789,13 @@
 
 	lockdep_assert_held(&scst_mutex);
 
-	scst_cm_dev_unregister(dev, true);
+	scst_cm_dev_unregister(dev);
 
 	TRACE_EXIT();
 	return;
 }
 
-/* scst_mutex supposed to be held and activities suspended */
+/* scst_mutex supposed to be held */
 int scst_cm_on_add_acg(struct scst_acg *acg)
 {
 	int res = 0;
@@ -2755,7 +2829,7 @@
 	/* Nothing to do */
 }
 
-/* scst_mutex supposed to be held and activities suspended */
+/* scst_mutex supposed to be held */
 int scst_cm_on_add_lun(struct scst_acg_dev *acg_dev, uint64_t lun,
 	unsigned int *flags)
 {
@@ -2783,7 +2857,7 @@
 	return res;
 }
 
-/* scst_mutex supposed to be held and activities suspended */
+/* scst_mutex supposed to be held */
 bool scst_cm_on_del_lun(struct scst_acg_dev *acg_dev, bool gen_report_luns_changed)
 {
 	bool res = gen_report_luns_changed;
@@ -2795,7 +2869,7 @@
 	if (acg_dev->acg != scst_cm_tgt->default_acg)
 		goto out;
 
-	scst_cm_dev_unregister(acg_dev->dev, false);
+	scst_cm_dev_free_designators(acg_dev->dev);
 
 	res = false;
 
@@ -2945,9 +3019,14 @@
 			continue;
 		if (seg[5] != des->desig[1])
 			continue;
-		if (seg[7] > des->desig[3])
+		if (seg[7] > 20) {
+			PRINT_WARNING("Initiator sent non-compliant identification descriptor (len %u > 20)",
+				      seg[7]);
 			continue;
-		if (memcmp(&des->desig[4], &seg[8], min_t(int, seg[7], des->desig[3])) == 0) {
+		}
+		if (seg[7] != des->desig[3])
+			continue;
+		if (memcmp(&des->desig[4], &seg[8], seg[7]) == 0) {
 			TRACE_DBG("Tgt_dev %p (lun %lld) found",
 				des->desig_tgt_dev,
 				(unsigned long long)des->desig_tgt_dev->lun);
@@ -3736,11 +3815,7 @@
 
 static struct scst_tgt_template scst_cm_tgtt = {
 	.name			= SCST_CM_NAME,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	.sg_tablesize		= SG_MAX_SINGLE_ALLOC,
-#else
 	.sg_tablesize		= 0xffff,
-#endif
 	.enabled_attr_not_needed = 1,
 	.dif_supported		= 1,
 	.hw_dif_type1_supported = 1,

diff --git a/scst/scst/src/scst_debug.c b/scst/scst/src/scst_debug.c
index ab47cdf..d211eb8 100644
--- a/scst/scst/src/scst_debug.c
+++ b/scst/scst/src/scst_debug.c

@@ -22,9 +22,7 @@
 #ifndef INSIDE_KERNEL_TREE
 #include <linux/version.h>
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
 #include <linux/export.h>
-#endif
 
 #ifdef INSIDE_KERNEL_TREE
 #include <scst/scst.h>
@@ -44,9 +42,6 @@
 static inline int get_current_tid(void)
 {
 	/* Code should be the same as in sys_gettid() */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
-	return current->pid;
-#else
 	if (in_interrupt()) {
 		/*
 		 * Unfortunately, task_pid_vnr() isn't IRQ-safe, so otherwise
@@ -55,7 +50,6 @@
 		return current->pid;
 	}
 	return task_pid_vnr(current);
-#endif
 }
 
 /*

diff --git a/scst/scst/src/scst_dlm.c b/scst/scst/src/scst_dlm.c
index dff421c..0468003 100644
--- a/scst/scst/src/scst_dlm.c
+++ b/scst/scst/src/scst_dlm.c

@@ -510,6 +510,13 @@
 		goto out;
 	}
 	pos = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	/*
+	 * Commit 4d03e3cc5982 ("fs: don't allow kernel reads and writes
+	 * without iter ops") made kernel_read() depend on .read_iter.
+	 */
+	WARN_ON_ONCE(!f->f_op->read_iter);
+#endif
 	ret = kernel_read(f, buf, buf_len, &pos);
 	if (ret >= 0)
 		buf[min(ret, buf_len - 1)] = '\0';
@@ -536,9 +543,21 @@
 static int scst_dlm_filldir(void *arg, const char *name_arg, int name_len,
 			    loff_t curr_pos, u64 inode, unsigned int dtype)
 #else
-static int scst_dlm_filldir(struct dir_context *arg, const char *name_arg,
-			    int name_len, loff_t curr_pos, u64 inode,
-			    unsigned int dtype)
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)
+/*
+ * See also commit 25885a35a720 ("Change calling conventions for filldir_t")
+ * # v6.1.
+ */
+#define DLM_FILLDIR_RET bool
+#else
+#define DLM_FILLDIR_RET int
+#endif
+
+static DLM_FILLDIR_RET
+scst_dlm_filldir(struct dir_context *arg, const char *name_arg,
+		 int name_len, loff_t curr_pos, u64 inode,
+		 unsigned int dtype)
 #endif
 {
 	char *p, *q, name[64];
@@ -573,7 +592,11 @@
 	(*entries)[i + 1] = '\0';
 
 out:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 1, 0)
 	return *entries ? 0 : -ENOMEM;
+#else
+	return *entries ? true : false;
+#endif
 }
 
 /*
@@ -804,6 +827,7 @@
 	char lsp_name[32], lock_name[32];
 	int res;
 	bool modified_lvb = false;
+	uint32_t flags;
 
 	if (pr_dlm->ls || !pr_dlm->cl_dev_id || in_interrupt() ||
 	    time_is_after_jiffies(pr_dlm->latest_lscr_attempt + 1 * HZ))
@@ -828,9 +852,18 @@
 
 	snprintf(lsp_name, sizeof(lsp_name), "%s%s", SCST_DLM_LOCKSPACE_PFX,
 		 pr_dlm->cl_dev_id);
+
+	flags = DLM_LSFL_NEWEXCL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 1, 0)
+	/*
+	 * See also commit 12cda13cfd53 ("fs: dlm: remove DLM_LSFL_FS from uapi")
+	 * # v6.1.
+	 */
+	flags |= DLM_LSFL_FS;
+#endif
+
 	res = scst_dlm_new_lockspace(lsp_name, strlen(lsp_name), &ls,
-				     DLM_LSFL_NEWEXCL | DLM_LSFL_FS,
-				     PR_DLM_LVB_LEN);
+				     flags, PR_DLM_LVB_LEN);
 	if (res) {
 		PRINT_ERROR("Creating DLM lockspace %s failed: %d", lsp_name,
 			    res);
@@ -1095,16 +1128,10 @@
 		queue_work(pr_dlm->from_wq, &pr_dlm->pre_upd_work);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_pre_join_work(void *p)
-{
-	struct scst_pr_dlm_data *pr_dlm = p;
-#else
 static void scst_pre_join_work(struct work_struct *work)
 {
 	struct scst_pr_dlm_data *pr_dlm = container_of(work,
 				struct scst_pr_dlm_data, pre_join_work);
-#endif
 	dlm_lockspace_t *ls;
 
 	mutex_lock(&pr_dlm->ls_mutex);
@@ -1118,16 +1145,10 @@
 	mutex_unlock(&pr_dlm->ls_mutex);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_pre_upd_work(void *p)
-{
-	struct scst_pr_dlm_data *pr_dlm = p;
-#else
 static void scst_pre_upd_work(struct work_struct *work)
 {
 	struct scst_pr_dlm_data *pr_dlm = container_of(work,
 				struct scst_pr_dlm_data, pre_upd_work);
-#endif
 	dlm_lockspace_t *ls;
 
 	mutex_lock(&pr_dlm->ls_mutex);
@@ -1158,16 +1179,10 @@
  * Note: the node that has invoked scst_trigger_lvb_update() holds PR_LOCK
  * in EX mode and waits until this function has finished.
  */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_copy_to_dlm_work(void *p)
-{
-	struct scst_pr_dlm_data *pr_dlm = p;
-#else
 static void scst_copy_to_dlm_work(struct work_struct *work)
 {
 	struct scst_pr_dlm_data *pr_dlm = container_of(work,
 				struct scst_pr_dlm_data, copy_to_dlm_work);
-#endif
 	struct scst_device *dev = pr_dlm->dev;
 	dlm_lockspace_t *ls;
 	int res;
@@ -1231,16 +1246,10 @@
  * scst_pr_init_tgt_dev() and scst_pr_clear_tgt_dev() in scst_pres.c protect
  * these manipulations by locking the PR data structures for writing.
  */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_copy_from_dlm_work(void *p)
-{
-	struct scst_pr_dlm_data *pr_dlm = p;
-#else
 static void scst_copy_from_dlm_work(struct work_struct *work)
 {
 	struct scst_pr_dlm_data *pr_dlm = container_of(work,
 				struct scst_pr_dlm_data, copy_from_dlm_work);
-#endif
 	struct scst_device *dev = pr_dlm->dev;
 	dlm_lockspace_t *ls;
 	int res = -ENOENT;
@@ -1292,16 +1301,10 @@
 }
 
 /* Tell other nodes to refresh their local state from the lock value blocks. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_reread_lvb_work(void *p)
-{
-	struct scst_pr_dlm_data *pr_dlm = p;
-#else
 static void scst_reread_lvb_work(struct work_struct *work)
 {
 	struct scst_pr_dlm_data *pr_dlm = container_of(work,
 				struct scst_pr_dlm_data, reread_lvb_work);
-#endif
 	dlm_lockspace_t *ls;
 	struct scst_lksb pr_lksb;
 	int res;
@@ -1323,16 +1326,10 @@
 }
 
 /* Tell other nodes to update the DLM lock value blocks. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_lvb_upd_work(void *p)
-{
-	struct scst_pr_dlm_data *pr_dlm = p;
-#else
 static void scst_lvb_upd_work(struct work_struct *work)
 {
 	struct scst_pr_dlm_data *pr_dlm = container_of(work,
 				struct scst_pr_dlm_data, lvb_upd_work);
-#endif
 	dlm_lockspace_t *ls;
 	struct scst_lksb lksb;
 	int res;
@@ -1392,21 +1389,12 @@
 	mutex_init(&pr_dlm->ls_cr_mutex);
 	mutex_init(&pr_dlm->ls_mutex);
 	pr_dlm->data_lksb.lksb.sb_lvbptr = pr_dlm->lvb;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&pr_dlm->pre_join_work, scst_pre_join_work, pr_dlm);
-	INIT_WORK(&pr_dlm->pre_upd_work, scst_pre_upd_work, pr_dlm);
-	INIT_WORK(&pr_dlm->copy_from_dlm_work, scst_copy_from_dlm_work, pr_dlm);
-	INIT_WORK(&pr_dlm->copy_to_dlm_work, scst_copy_to_dlm_work, pr_dlm);
-	INIT_WORK(&pr_dlm->lvb_upd_work, scst_lvb_upd_work, pr_dlm);
-	INIT_WORK(&pr_dlm->reread_lvb_work, scst_reread_lvb_work, pr_dlm);
-#else
 	INIT_WORK(&pr_dlm->pre_join_work, scst_pre_join_work);
 	INIT_WORK(&pr_dlm->pre_upd_work, scst_pre_upd_work);
 	INIT_WORK(&pr_dlm->copy_from_dlm_work, scst_copy_from_dlm_work);
 	INIT_WORK(&pr_dlm->copy_to_dlm_work, scst_copy_to_dlm_work);
 	INIT_WORK(&pr_dlm->lvb_upd_work, scst_lvb_upd_work);
 	INIT_WORK(&pr_dlm->reread_lvb_work, scst_reread_lvb_work);
-#endif
 	pr_dlm->latest_lscr_attempt = jiffies - 100 * HZ;
 
 	res = -ENOMEM;

diff --git a/scst/scst/src/scst_event.c b/scst/scst/src/scst_event.c
index 724a0e7..0194a57 100644
--- a/scst/scst/src/scst_event.c
+++ b/scst/scst/src/scst_event.c

@@ -29,6 +29,7 @@
 
 #include "scst_priv.h"
 
+static struct workqueue_struct *scst_event_wq;
 static struct class *scst_event_sysfs_class;
 
 static int scst_event_major;
@@ -95,16 +96,10 @@
 	return res;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_event_timeout_fn(void *p)
-{
-	struct scst_event_entry *event_entry = p;
-#else
 static void scst_event_timeout_fn(struct work_struct *work)
 {
 	struct scst_event_entry *event_entry = container_of(work,
 		struct scst_event_entry, event_timeout_work.work);
-#endif
 
 	TRACE_ENTRY();
 
@@ -211,20 +206,15 @@
 					break;
 				}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-				INIT_WORK(&new_event_entry->event_timeout_work,
-					  scst_event_timeout_fn,
-					  new_event_entry);
-#else
 				INIT_DELAYED_WORK(&new_event_entry->event_timeout_work,
 						  scst_event_timeout_fn);
-#endif
 				if (new_event_entry->event_notify_fn != NULL) {
 					new_event_entry->event.event_id = atomic_inc_return(&base_event_id);
 					if (new_event_entry->event_timeout == 0)
 						new_event_entry->event_timeout = SCST_DEFAULT_EVENT_TIMEOUT;
-					schedule_delayed_work(&new_event_entry->event_timeout_work,
-						new_event_entry->event_timeout);
+
+					queue_delayed_work(scst_event_wq, &new_event_entry->event_timeout_work,
+							   new_event_entry->event_timeout);
 				}
 
 				list_add_tail(&new_event_entry->events_list_entry,
@@ -264,16 +254,10 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_event_queue_work_fn(void *p)
-{
-	struct scst_event_entry *e = p;
-#else
 static void scst_event_queue_work_fn(struct work_struct *work)
 {
 	struct scst_event_entry *e = container_of(work,
 		struct scst_event_entry, scst_event_queue_work);
-#endif
 
 	TRACE_ENTRY();
 
@@ -289,18 +273,14 @@
 {
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&e->scst_event_queue_work, scst_event_queue_work_fn, e);
-#else
 	INIT_WORK(&e->scst_event_queue_work, scst_event_queue_work_fn);
-#endif
 
 	TRACE_DBG("Scheduling event entry %p", e);
 
 	e->event.event_code = event_code;
 	strlcpy(e->event.issuer_name, issuer_name, sizeof(e->event.issuer_name));
 
-	schedule_work(&e->scst_event_queue_work);
+	queue_work(scst_event_wq, &e->scst_event_queue_work);
 
 	TRACE_EXIT();
 	return;
@@ -613,9 +593,11 @@
 static int scst_event_get_event_from_user(struct scst_event_user __user *arg,
 	struct scst_event_entry **out_event_entry)
 {
-	int res, rc, event_entry_len;
+	int res, rc;
+	int event_entry_len, event_len;
 	uint32_t payload_len;
 	struct scst_event_entry *event_entry;
+	struct scst_event *event;
 
 	TRACE_ENTRY();
 
@@ -646,9 +628,10 @@
 
 	TRACE_MEM("Allocated event entry %p", event_entry);
 
-	rc = copy_from_user((u8 *)event_entry +
-			    offsetof(typeof(*event_entry), event), arg,
-			    event_entry_len);
+	event = &event_entry->event;
+	event_len = sizeof(*event) + payload_len;
+
+	rc = copy_from_user((u8 *)event, arg, event_len);
 	if (rc != 0) {
 		PRINT_ERROR("Failed to copy %d user's bytes", rc);
 		res = -EFAULT;
@@ -656,16 +639,17 @@
 	}
 
 	/* payload_len has been recopied, so recheck it. */
-	if (event_entry->event.payload_len != event_entry_len) {
-		PRINT_ERROR("Payload len changed while being read");
+	if (event->payload_len != payload_len) {
+		PRINT_ERROR("Payload len %d changed while being read: %d",
+				event->payload_len, payload_len);
 		res = -EINVAL;
 		goto out_free;
 	}
 
-	event_entry->event.issuer_name[sizeof(event_entry->event.issuer_name)-1] = '\0';
+	event->issuer_name[sizeof(event->issuer_name) - 1] = '\0';
 
 	TRACE_DBG("user event: event_code %d, issuer_name %s",
-		event_entry->event.event_code, event_entry->event.issuer_name);
+		event->event_code, event->issuer_name);
 
 	*out_event_entry = event_entry;
 
@@ -1123,19 +1107,22 @@
 int scst_event_init(void)
 {
 	int res = 0;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-	struct class_device *class_member;
-#else
 	struct device *dev;
-#endif
 
 	TRACE_ENTRY();
 
+	scst_event_wq = alloc_workqueue("scst_event_wq", WQ_MEM_RECLAIM, 0);
+	if (unlikely(!scst_event_wq)) {
+		PRINT_ERROR("Failed to allocate scst_event_wq");
+		res = -ENOMEM;
+		goto out;
+	}
+
 	scst_event_sysfs_class = class_create(THIS_MODULE, SCST_EVENT_NAME);
 	if (IS_ERR(scst_event_sysfs_class)) {
-		PRINT_ERROR("%s", "Unable create sysfs class for SCST event");
+		PRINT_ERROR("Unable create sysfs class for SCST event");
 		res = PTR_ERR(scst_event_sysfs_class);
-		goto out;
+		goto out_wq;
 	}
 
 	scst_event_major = register_chrdev(0, SCST_EVENT_NAME, &scst_event_fops);
@@ -1145,15 +1132,6 @@
 		goto out_class;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
-	class_member = class_device_create(scst_event_sysfs_class, NULL,
-					   MKDEV(scst_event_major, 0), NULL,
-					   SCST_EVENT_NAME);
-	if (IS_ERR(class_member)) {
-		res = PTR_ERR(class_member);
-		goto out_chrdev;
-	}
-#else
 	dev = device_create(scst_event_sysfs_class, NULL,
 			    MKDEV(scst_event_major, 0),
 				NULL,
@@ -1162,7 +1140,6 @@
 		res = PTR_ERR(dev);
 		goto out_chrdev;
 	}
-#endif
 
 #ifdef CONFIG_EVENTS_WAIT_TEST
 	sysfs_create_file(kernel_kobj, &event_wait_test_attr.attr);
@@ -1172,12 +1149,15 @@
 	TRACE_EXIT_RES(res);
 	return res;
 
-
 out_chrdev:
 	unregister_chrdev(scst_event_major, SCST_EVENT_NAME);
 
 out_class:
 	class_destroy(scst_event_sysfs_class);
+
+out_wq:
+	destroy_workqueue(scst_event_wq);
+
 	goto out;
 }
 
@@ -1194,8 +1174,8 @@
 	device_destroy(scst_event_sysfs_class, MKDEV(scst_event_major, 0));
 	class_destroy(scst_event_sysfs_class);
 
-	/* Wait for all pending being queued events to process */
-	flush_scheduled_work();
+	/* All pending works will be drained by destroy_workqueue() */
+	destroy_workqueue(scst_event_wq);
 
 	TRACE_EXIT();
 	return;

diff --git a/scst/scst/src/scst_lib.c b/scst/scst/src/scst_lib.c
index bf7d266..34823ff 100644
--- a/scst/scst/src/scst_lib.c
+++ b/scst/scst/src/scst_lib.c

@@ -16,6 +16,7 @@
  *  GNU General Public License for more details.
  */
 
+#include <linux/version.h>
 #include <linux/aio.h>		/* struct kiocb for kernel v4.0 */
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -31,25 +32,18 @@
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/vmalloc.h>
-#include <asm/kmap_types.h>
 #include <asm/unaligned.h>
 #include <asm/checksum.h>
 #ifndef INSIDE_KERNEL_TREE
 #include <linux/version.h>
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
 #include <linux/crc-t10dif.h>
-#endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 #include <linux/sched/task_stack.h>
 #endif
 #include <linux/namei.h>
 #include <linux/mount.h>
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-#include <linux/writeback.h>
-#endif
-
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
 #include <linux/t10-pi.h>
 #endif
@@ -76,93 +70,33 @@
 static DEFINE_SPINLOCK(scst_global_stpg_list_lock);
 static LIST_HEAD(scst_global_stpg_list);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_put_acg_work(void *p);
-#else
 static void scst_put_acg_work(struct work_struct *work);
-#endif
 static void scst_free_acn(struct scst_acn *acn, bool reassign);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 struct scsi_io_context {
 	void *data;
 	void (*done)(void *data, char *sense, int result, int resid);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	/*
+	 * See commit 772c8f6f3bbd ("Merge tag 'for-4.11/linus-merge-signed'
+	 * of git://git.kernel.dk/linux-block")
+	 *
+	 * Both scsi_init_rq and scsi_init_request (later renamed to
+	 * scsi_mq_init_request in e7008ff5c61a) initialize the scsi_request
+	 * sense buffer, so we don't need to (nor should) provide our own.
+	 */
 	char sense[SCST_SENSE_BUFFERSIZE];
+#endif
 };
 static struct kmem_cache *scsi_io_context_cache;
-#endif
 static struct workqueue_struct *scst_release_acg_wq;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) \
-    && (!defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE -0 < 5 * 256 + 3) \
-    && !defined(CONFIG_PPC)
-static int strncasecmp(const char *s1, const char *s2, size_t n)
-{
-	int c1, c2;
-
-	do {
-		c1 = tolower(*s1++);
-		c2 = tolower(*s2++);
-	} while ((--n > 0) && c1 == c2 && c1 != 0);
-	return c1 - c2;
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
-char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
-{
-	unsigned int len;
-	char *p;
-	va_list aq;
-
-	va_copy(aq, ap);
-	len = vsnprintf(NULL, 0, fmt, aq);
-	va_end(aq);
-
-	p = kmalloc(len + 1, gfp);
-	if (!p)
-		return NULL;
-
-	vsnprintf(p, len + 1, fmt, ap);
-
-	return p;
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) &&	\
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6 ||	\
-	 RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 < 1)
-/*
- * See also "lib: introduce common method to convert hex digits" (commit
- * 903788892ea0fc7fcaf7e8e5fac9a77379fc215b).
- */
-int hex_to_bin(char ch)
-{
-	if (ch >= '0' && ch <= '9')
-		return ch - '0';
-	ch = tolower(ch);
-	if (ch >= 'a' && ch <= 'f')
-		return ch - 'a' + 10;
-	return -1;
-}
-EXPORT_SYMBOL(hex_to_bin);
-#endif
-
 static int sg_copy(struct scatterlist *dst_sg, struct scatterlist *src_sg,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	    int nents_to_copy, size_t copy_len,
-	    enum km_type d_km_type, enum km_type s_km_type);
-#else
 	    int nents_to_copy, size_t copy_len);
-#endif
 
 static void scst_free_descriptors(struct scst_cmd *cmd);
 static bool sg_cmp(struct scatterlist *dst_sg, struct scatterlist *src_sg,
-	    int nents_to_cmp, size_t cmp_len, int *miscompare_offs
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	    , enum km_type d_km_type, enum km_type s_km_type
-#endif
-	    );
+	    int nents_to_cmp, size_t cmp_len, int *miscompare_offs);
 
 const struct scst_opcode_descriptor scst_op_descr_inquiry = {
 	.od_opcode = INQUIRY,
@@ -492,6 +426,8 @@
 	const struct scst_sdbops *sdbops);
 static int get_cdb_info_var_len(struct scst_cmd *cmd,
 	const struct scst_sdbops *sdbops);
+static int get_cdb_info_dyn_runtime_attr(struct scst_cmd *cmd,
+	const struct scst_sdbops *sdbops);
 
 /*
  * +=====================================-============-======-
@@ -537,7 +473,7 @@
 	uint8_t info_lba_off;	/* LBA offset in cdb */
 	uint8_t info_lba_len;	/* LBA length in cdb */
 	uint8_t info_len_off;	/* length offset in cdb */
-	uint8_t info_len_len;	/* length length in cdb */
+	uint8_t info_len_len;	/* length of length in cdb */
 	uint8_t info_data_direction;
 				/*
 				 * init --> target: SCST_DATA_WRITE
@@ -545,6 +481,8 @@
 				 * target <--> init: SCST_DATA_READ|
 				 *		     SCST_DATA_WRITE
 				 */
+	/* If not zero, logarithm base 2 of the maximum data buffer length. */
+	uint8_t log2_max_buf_len;
 	uint32_t info_op_flags;	/* various flags of this opcode */
 	const char *info_op_name;/* op code SCSI full name */
 	int (*get_cdb_info)(struct scst_cmd *cmd, const struct scst_sdbops *sdbops);
@@ -871,6 +809,7 @@
 	 .info_data_direction = SCST_DATA_READ,
 	 .info_op_flags = SCST_IMPLICIT_HQ|SCST_REG_RESERVE_ALLOWED|
 		SCST_WRITE_EXCL_ALLOWED|SCST_EXCL_ACCESS_ALLOWED,
+	 .log2_max_buf_len = 3,
 	 .get_cdb_info = get_cdb_info_read_capacity},
 	{.ops = 0x25, .devkey = "      O         ",
 	 .info_op_name = "GET WINDOW",
@@ -1525,6 +1464,12 @@
 	 .info_op_flags = FLAG_NONE,
 	 .info_len_off = 6, .info_len_len = 4,
 	 .get_cdb_info = get_cdb_info_len_4},
+	{.ops = 0xAB, .devkey = " O              ",
+	 .info_op_name = "SERVICE ACTION IN(12)",
+	 .info_data_direction = SCST_DATA_READ,
+	 .info_op_flags = FLAG_NONE,
+	 .info_len_off = 6, .info_len_len = 4,
+	 .get_cdb_info = get_cdb_info_len_4},
 	{.ops = 0xAC, .devkey = "       O        ",
 	 .info_op_name = "ERASE(12)",
 	 .info_data_direction = SCST_DATA_NONE,
@@ -1654,7 +1599,7 @@
 	 .info_len_off = 6, .info_len_len = 4,
 	 .get_cdb_info = get_cdb_info_len_4},
 	{.ops = 0xBF, .devkey = "     O          ",
-	 .info_op_name = "SEND DVD STRUCTUE",
+	 .info_op_name = "SEND DVD STRUCTURE",
 	 .info_data_direction = SCST_DATA_WRITE,
 	 .info_op_flags = FLAG_NONE,
 	 .info_len_off = 8, .info_len_len = 4,
@@ -1665,6 +1610,18 @@
 	 .info_op_flags = FLAG_NONE,
 	 .info_len_off = 6, .info_len_len = 4,
 	 .get_cdb_info = get_cdb_info_len_4},
+	{.ops = 0xD1, .devkey = " O              ",
+	 .info_op_name = "READ DYN RUNTIME ATTR",
+	 .info_data_direction = SCST_DATA_READ,
+	 .info_op_flags = FLAG_NONE,
+	 .info_len_off = 6, .info_len_len = 4,
+	 .get_cdb_info = get_cdb_info_dyn_runtime_attr},
+	{.ops = 0xD2, .devkey = " O              ",
+	 .info_op_name = "WRITE DYN RUNTIME ATTR",
+	 .info_data_direction = SCST_DATA_WRITE,
+	 .info_op_flags = FLAG_NONE,
+	 .info_len_off = 6, .info_len_len = 4,
+	 .get_cdb_info = get_cdb_info_dyn_runtime_attr},
 	{.ops = 0xE7, .devkey = "        V       ",
 	 .info_op_name = "INIT ELEMENT STATUS WRANGE",
 	 .info_data_direction = SCST_DATA_NONE,
@@ -1829,15 +1786,15 @@
 static int scst_set_lun_not_supported_request_sense(struct scst_cmd *cmd,
 	int key, int asc, int ascq)
 {
-	int res;
 	int sense_len, len;
 	struct scatterlist *sg;
+	int res = 0;
 
 	TRACE_ENTRY();
 
 	if (cmd->status != 0) {
-		TRACE_MGMT_DBG("cmd %p already has status %x set", cmd,
-			cmd->status);
+		TRACE_MGMT_DBG("cmd %p already has status %x set",
+			       cmd, cmd->status);
 		res = -EEXIST;
 		goto out;
 	}
@@ -1849,6 +1806,12 @@
 	}
 
 	if (cmd->sg == NULL) {
+		if (cmd->bufflen == 0) {
+			int bufflen = cmd->cdb[4];
+
+			cmd->bufflen = bufflen ?: 18;
+		}
+
 		/*
 		 * If target driver preparing data buffer using tgt_alloc_data_buf()
 		 * callback, it is responsible to copy the sense to its buffer
@@ -1861,19 +1824,16 @@
 			goto go;
 		}
 
-		if (cmd->bufflen == 0)
-			cmd->bufflen = cmd->cdb[4];
-
 		cmd->sg = scst_alloc_sg(cmd->bufflen, GFP_ATOMIC, &cmd->sg_cnt);
 		if (cmd->sg == NULL) {
-			PRINT_ERROR("Unable to alloc sg for REQUEST SENSE"
-				"(sense %x/%x/%x)", key, asc, ascq);
+			PRINT_ERROR("Unable to alloc sg for REQUEST SENSE (sense %x/%x/%x)",
+				    key, asc, ascq);
 			res = 1;
 			goto out;
 		}
 
-		TRACE_MEM("sg %p alloced for sense for cmd %p (cnt %d, "
-			"len %d)", cmd->sg, cmd, cmd->sg_cnt, cmd->bufflen);
+		TRACE_MEM("sg %p (cnt %d, len %d) alloced for sense: cmd %p",
+			  cmd->sg, cmd->sg_cnt, cmd->bufflen, cmd);
 	}
 
 go:
@@ -1890,34 +1850,37 @@
 	cmd->data_direction = SCST_DATA_READ;
 	scst_set_resp_data_len(cmd, sense_len);
 
-	res = 0;
 	cmd->completed = 1;
 	cmd->resid_possible = 1;
 
 out:
 	TRACE_EXIT_RES(res);
+
 	return res;
 }
 
 static int scst_set_lun_not_supported_inquiry(struct scst_cmd *cmd)
 {
-	int res;
 	uint8_t *buf;
 	struct scatterlist *sg;
 	int len;
+	int res = 0;
 
 	TRACE_ENTRY();
 
 	if (cmd->status != 0) {
-		TRACE_MGMT_DBG("cmd %p already has status %x set", cmd,
-			cmd->status);
+		TRACE_MGMT_DBG("cmd %p already has status %x set",
+			       cmd, cmd->status);
 		res = -EEXIST;
 		goto out;
 	}
 
 	if (cmd->sg == NULL) {
-		if (cmd->bufflen == 0)
-			cmd->bufflen = min_t(int, 36, get_unaligned_be16(&cmd->cdb[3]));
+		if (cmd->bufflen == 0) {
+			int bufflen = get_unaligned_be16(&cmd->cdb[3]);
+
+			cmd->bufflen = bufflen ? min_t(int, 36, bufflen) : 36;
+		}
 
 		/*
 		 * If target driver preparing data buffer using tgt_alloc_data_buf()
@@ -1927,22 +1890,20 @@
 		if (cmd->tgt_i_data_buf_alloced && (cmd->tgt_i_sg != NULL)) {
 			cmd->sg = cmd->tgt_i_sg;
 			cmd->sg_cnt = cmd->tgt_i_sg_cnt;
-			TRACE_MEM("Tgt used for INQUIRY for not supported "
-				"LUN for cmd %p", cmd);
+			TRACE_MEM("Tgt used for INQUIRY (not supported LUN): cmd %p",
+				  cmd);
 			goto go;
 		}
 
 		cmd->sg = scst_alloc_sg(cmd->bufflen, GFP_ATOMIC, &cmd->sg_cnt);
 		if (cmd->sg == NULL) {
-			PRINT_ERROR("%s", "Unable to alloc sg for INQUIRY "
-				"for not supported LUN");
+			PRINT_ERROR("Unable to alloc sg for INQUIRY (not supported LUN)");
 			res = 1;
 			goto out;
 		}
 
-		TRACE_MEM("sg %p alloced for INQUIRY for not supported LUN for "
-			"cmd %p (cnt %d, len %d)", cmd->sg, cmd, cmd->sg_cnt,
-			cmd->bufflen);
+		TRACE_MEM("sg %p (cnt %d, len %d) allocated for INQUIRY (not supported LUN): cmd %p",
+			  cmd->sg, cmd->sg_cnt, cmd->bufflen, cmd);
 	}
 
 go:
@@ -1964,12 +1925,12 @@
 	cmd->data_direction = SCST_DATA_READ;
 	scst_set_resp_data_len(cmd, len);
 
-	res = 0;
 	cmd->completed = 1;
 	cmd->resid_possible = 1;
 
 out:
 	TRACE_EXIT_RES(res);
+
 	return res;
 }
 
@@ -3254,11 +3215,11 @@
 {
 	if (cmd->op_name)
 		return cmd->op_name;
-	else {
-		scnprintf(cmd->not_parsed_op_name,
-			sizeof(cmd->not_parsed_op_name), "0x%x", cmd->cdb[0]);
-		return cmd->not_parsed_op_name;
-	}
+
+	scnprintf(cmd->not_parsed_op_name,
+		sizeof(cmd->not_parsed_op_name), "0x%x", cmd->cdb[0]);
+
+	return cmd->not_parsed_op_name;
 }
 EXPORT_SYMBOL(scst_get_opcode_name);
 #endif
@@ -3985,18 +3946,10 @@
 	return res;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_hw_pending_work_fn(void *p)
-#else
 static void scst_hw_pending_work_fn(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct scst_session *sess = (struct scst_session *)p;
-#else
 	struct scst_session *sess = container_of(work, struct scst_session,
 						 hw_pending_work.work);
-#endif
 	struct scst_tgt_template *tgtt = sess->tgt->tgtt;
 	struct scst_cmd *cmd;
 	unsigned long cur_time = jiffies;
@@ -4200,11 +4153,7 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_ext_blocking_done_fn(void *p);
-#else
 static void scst_ext_blocking_done_fn(struct work_struct *work);
-#endif
 
 static int scst_dif_none(struct scst_cmd *cmd);
 #ifdef CONFIG_SCST_DIF_INJECT_CORRUPTED_TAGS
@@ -4276,6 +4225,7 @@
 #ifdef CONFIG_SCST_PER_DEVICE_CMD_COUNT_LIMIT
 	atomic_set(&dev->dev_cmd_count, 0);
 #endif
+	atomic_set(&dev->cm_update_req_cnt, 0);
 	scst_init_mem_lim(&dev->dev_mem_lim);
 	spin_lock_init(&dev->dev_lock);
 	lockdep_register_key(&dev->dev_lock_key);
@@ -4285,11 +4235,7 @@
 	INIT_LIST_HEAD(&dev->dev_tgt_dev_list);
 	INIT_LIST_HEAD(&dev->dev_acg_dev_list);
 	INIT_LIST_HEAD(&dev->ext_blockers_list);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&dev->ext_blockers_work, scst_ext_blocking_done_fn, dev);
-#else
 	INIT_WORK(&dev->ext_blockers_work, scst_ext_blocking_done_fn);
-#endif
 	dev->dev_double_ua_possible = 1;
 	dev->queue_alg = SCST_QUEUE_ALG_1_UNRESTRICTED_REORDER;
 	dev->dev_numa_node_id = nodeid;
@@ -4476,7 +4422,7 @@
 	return res;
 }
 
-/* The activity supposed to be suspended and scst_mutex held */
+/* The caller must hold scst_mutex. */
 int scst_acg_add_lun(struct scst_acg *acg, struct kobject *parent,
 	struct scst_device *dev, uint64_t lun, unsigned int flags,
 	struct scst_acg_dev **out_acg_dev)
@@ -4489,7 +4435,7 @@
 
 	TRACE_ENTRY();
 
-	INIT_LIST_HEAD(&tmp_tgt_dev_list);
+	lockdep_assert_held(&scst_mutex);
 
 	res = scst_check_dif_compatibility(acg, dev);
 	if (res != 0)
@@ -4515,12 +4461,6 @@
 	list_add_tail(&acg_dev->acg_dev_list_entry, &acg->acg_dev_list);
 	list_add_tail(&acg_dev->dev_acg_dev_list_entry, &dev->dev_acg_dev_list);
 
-	if (!(flags & SCST_ADD_LUN_CM)) {
-		res = scst_cm_on_add_lun(acg_dev, lun, &flags);
-		if (res != 0)
-			goto out_free;
-	}
-
 	list_for_each_entry(sess, &acg->acg_sess_list, acg_sess_list_entry) {
 		res = scst_alloc_add_tgt_dev(sess, acg_dev, &tgt_dev);
 		if (res == -EPERM)
@@ -4532,6 +4472,12 @@
 			      &tmp_tgt_dev_list);
 	}
 
+	if (!(flags & SCST_ADD_LUN_CM)) {
+		res = scst_cm_on_add_lun(acg_dev, lun, &flags);
+		if (res != 0)
+			goto out_free;
+	}
+
 	res = scst_acg_dev_sysfs_create(acg_dev, parent);
 	if (res != 0)
 		goto out_on_del;
@@ -4887,16 +4833,10 @@
 	struct scst_acg		*acg;
 };
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_put_acg_work(void *p)
-{
-	struct scst_acg_put_work *put_work = p;
-#else
 static void scst_put_acg_work(struct work_struct *work)
 {
 	struct scst_acg_put_work *put_work =
 		container_of(work, typeof(*put_work), work);
-#endif
 	struct scst_acg *acg = put_work->acg;
 
 	kfree(put_work);
@@ -4914,11 +4854,7 @@
 		return;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	INIT_WORK(&put_work->work, scst_put_acg_work, put_work);
-#else
 	INIT_WORK(&put_work->work, scst_put_acg_work);
-#endif
 	put_work->acg = acg;
 
 	/*
@@ -5338,7 +5274,7 @@
 {
 	int res = 0;
 	struct scst_tgt_template *tgtt = sess->tgt->tgtt;
-	int ini_sg, ini_unchecked_isa_dma, ini_use_clustering;
+	int ini_sg, ini_unchecked_isa_dma = 0, ini_use_clustering;
 	struct scst_tgt_dev *tgt_dev;
 	struct scst_device *dev = acg_dev->dev;
 	struct list_head *head;
@@ -5395,7 +5331,9 @@
 		struct Scsi_Host *shost = dev->scsi_dev->host;
 
 		ini_sg = shost->sg_tablesize;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0)
 		ini_unchecked_isa_dma = shost->unchecked_isa_dma;
+#endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 21, 0)
 		ini_use_clustering = shost->use_clustering == ENABLE_CLUSTERING;
 #else
@@ -5821,7 +5759,7 @@
 	}
 
 	scst_sess_get(res->sess);
-	res->cpu_cmd_counter = scst_get();
+	scst_get_icmd(res);
 
 	TRACE(TRACE_SCSI, "New internal cmd %p (op %s)", res,
 		scst_get_opcode_name(res));
@@ -6000,22 +5938,68 @@
 }
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) && !defined(RHEL_MAJOR)
-ssize_t kernel_write(struct file *file, const void *buf, size_t count,
-		     loff_t *pos)
+/**
+ * scst_file_size - returns the size of a regular file
+ * @path: Path of the file.
+ * @mode: If not NULL, the file mode will be stored in *@mode.
+ *
+ * Returns the file size or an error code.
+ */
+loff_t scst_file_size(const char *path, umode_t *mode)
 {
-	struct kvec iov = {
-		.iov_base = buf,
-		.iov_len = count
-	};
+	struct file *filp;
+	struct inode *inode;
+	loff_t res;
 
-	return scst_writev(file, &iov, 1, pos);
+	filp = filp_open(path, O_LARGEFILE | O_RDONLY, 0600);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+	inode = file_inode(filp);
+	if (mode)
+		*mode = inode->i_mode;
+	res = S_ISREG(inode->i_mode) ? i_size_read(file_inode(filp)) : -ENOTTY;
+	filp_close(filp, NULL);
+	return res;
 }
-EXPORT_SYMBOL(kernel_write);
-#endif
+EXPORT_SYMBOL(scst_file_size);
 
 /**
- * scst_writev - read data from a file into a kernel buffer
+ * scst_bdev_size - returns the size of a block device
+ * @path: Path of the block device.
+ *
+ * Returns the block device size or an error code.
+ */
+loff_t scst_bdev_size(const char *path)
+{
+	struct block_device *bdev;
+	loff_t res;
+
+	bdev = blkdev_get_by_path(path, FMODE_READ, (void *)__func__);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+	res = i_size_read(bdev->bd_inode);
+	blkdev_put(bdev, FMODE_READ);
+	return res;
+}
+EXPORT_SYMBOL(scst_bdev_size);
+
+loff_t scst_file_or_bdev_size(const char *path)
+{
+	enum { INVALID_FILE_MODE = 0 };
+	umode_t mode = INVALID_FILE_MODE;
+	loff_t res;
+
+	res = scst_file_size(path, &mode);
+	if (S_ISREG(mode))
+		return res;
+	if (mode != INVALID_FILE_MODE && !S_ISBLK(mode))
+		return -EINVAL;
+	return scst_bdev_size(path);
+}
+EXPORT_SYMBOL(scst_file_or_bdev_size);
+
+/**
+ * scst_readv - read data from a file into a kernel buffer
  * @file: File to read from.
  * @vec:  Pointer to first element of struct kvec array.
  * @vlen: Number of elements of the kvec array.
@@ -6823,11 +6807,7 @@
 		goto out_finish;
 	}
 
-	c = sg_cmp(cmd->sg, cwr_cmd->sg, 0, 0, &miscompare_offs
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-		, KM_USER0, KM_USER1
-#endif
-	);
+	c = sg_cmp(cmd->sg, cwr_cmd->sg, 0, 0, &miscompare_offs);
 	if (!c) {
 		scst_set_cmd_error_and_inf(cwr_cmd,
 			SCST_LOAD_SENSE(scst_sense_miscompare_error), miscompare_offs);
@@ -6941,6 +6921,14 @@
 		goto out_done;
 	}
 
+	if (cmd->bufflen != scst_cmd_get_expected_transfer_len_data(cmd)) {
+		PRINT_ERROR("COMPARE AND WRITE: data buffer length mismatch (CDB %u <> ini %u)",
+			    cmd->bufflen,
+			    scst_cmd_get_expected_transfer_len_data(cmd));
+		scst_set_invalid_field_in_cdb(cmd, 13/*NLB*/, 0);
+		goto out_done;
+	}
+
 	/* ToDo: HWALIGN'ed kmem_cache */
 	cwrp = kzalloc(sizeof(*cwrp), GFP_KERNEL);
 	if (cwrp == NULL) {
@@ -6952,14 +6940,6 @@
 	cwrp->cwr_orig_cmd = cmd;
 	cwrp->cwr_finish_fn = scst_cwr_read_cmd_finished;
 
-	if (cmd->bufflen != scst_cmd_get_expected_transfer_len_data(cmd)) {
-		PRINT_ERROR("COMPARE AND WRITE: data buffer length mismatch (CDB %u <> ini %u)",
-			    cmd->bufflen,
-			    scst_cmd_get_expected_transfer_len_data(cmd));
-		scst_set_invalid_field_in_cdb(cmd, 13/*NLB*/, 0);
-		goto out_done;
-	}
-
 	/*
 	 * As required by SBC, DIF PI, if any, is not checked for the read part
 	 */
@@ -7071,14 +7051,12 @@
 				       sense, 15, 0, 0);
 		TRACE_DBG("RELEASE done: %x", rc);
 
-		if (scsi_status_is_good(rc)) {
+		if (scsi_status_is_good(rc))
 			break;
-		} else {
-			PRINT_ERROR("RELEASE failed: %d", rc);
-			PRINT_BUFFER("RELEASE sense", sense, sizeof(sense));
-			scst_check_internal_sense(dev, rc, sense,
-				sizeof(sense));
-		}
+
+		PRINT_ERROR("RELEASE failed: %d", rc);
+		PRINT_BUFFER("RELEASE sense", sense, sizeof(sense));
+		scst_check_internal_sense(dev, rc, sense, sizeof(sense));
 	}
 
 out:
@@ -7148,15 +7126,9 @@
 	INIT_LIST_HEAD(&sess->init_deferred_cmd_list);
 	INIT_LIST_HEAD(&sess->init_deferred_mcmd_list);
 	INIT_LIST_HEAD(&sess->sess_cm_list_id_list);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
 	INIT_DELAYED_WORK(&sess->sess_cm_list_id_cleanup_work,
 			  sess_cm_list_id_cleanup_work_fn);
 	INIT_DELAYED_WORK(&sess->hw_pending_work, scst_hw_pending_work_fn);
-#else
-	INIT_WORK(&sess->sess_cm_list_id_cleanup_work,
-		  sess_cm_list_id_cleanup_work_fn, sess);
-	INIT_WORK(&sess->hw_pending_work, scst_hw_pending_work_fn, sess);
-#endif
 	spin_lock_init(&sess->lat_stats_lock);
 
 	sess->initiator_name = kstrdup(initiator_name, gfp_mask);
@@ -7473,10 +7445,8 @@
 
 	scst_sess_put(cmd->sess);
 
-	if (likely(cmd->cpu_cmd_counter)) {
-		scst_put(cmd->cpu_cmd_counter);
-		cmd->cpu_cmd_counter = NULL;
-	}
+	if (likely(cmd->counted))
+		scst_put_cmd(cmd);
 
 	EXTRACHECKS_BUG_ON(cmd->pre_alloced && cmd->internal);
 
@@ -7716,10 +7686,8 @@
 
 	scst_sess_put(mcmd->sess);
 
-	if (mcmd->cpu_cmd_counter) {
-		scst_put(mcmd->cpu_cmd_counter);
-		mcmd->cpu_cmd_counter = NULL;
-	}
+	if (mcmd->counted)
+		scst_put_mcmd(mcmd);
 
 	mempool_free(mcmd, scst_mgmt_mempool);
 
@@ -7945,7 +7913,6 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 struct blk_kern_sg_work {
 	atomic_t bios_inflight;
 	struct sg_table sg_table;
@@ -7971,6 +7938,8 @@
 	return;
 }
 
+static inline void scst_free_bio(struct bio *bio);
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 static void blk_bio_map_kern_endio(struct bio *bio, int err)
 {
@@ -7994,53 +7963,17 @@
 				unsigned long flags;
 
 				local_irq_save(flags);	/* to protect KMs */
-				sg_copy(bw->src_sgl, bw->sg_table.sgl, 0, 0
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-					, KM_BIO_DST_IRQ, KM_BIO_SRC_IRQ
-#endif
-					);
+				sg_copy(bw->src_sgl, bw->sg_table.sgl, 0, 0);
 				local_irq_restore(flags);
 			}
 			blk_free_kern_sg_work(bw);
 		}
 	}
 
-	bio_put(bio);
+	scst_free_bio(bio);
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
-/*
- * See also patch "block: Add blk_make_request(), takes bio, returns a
- * request" (commit 79eb63e9e5875b84341a3a05f8e6ae9cdb4bb6f6).
- */
-static struct request *blk_make_request(struct request_queue *q,
-					struct bio *bio,
-					gfp_t gfp_mask)
-{
-	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
-
-	if (unlikely(!rq))
-		return ERR_PTR(-ENOMEM);
-
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-
-	for ( ; bio; bio = bio->bi_next) {
-		struct bio *bounce_bio = bio;
-		int ret;
-
-		blk_queue_bounce(q, &bounce_bio);
-		ret = blk_rq_append_bio(q, rq, bounce_bio);
-		if (unlikely(ret)) {
-			blk_put_request(rq);
-			return ERR_PTR(ret);
-		}
-	}
-
-	return rq;
-}
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) */
-
 /*
  * Copy an sg-list. This function is related to bio_copy_kern() but duplicates
  * an sg-list instead of creating a bio out of a single kernel address range.
@@ -8078,7 +8011,11 @@
 	for_each_sg(new_sgl, sg, new_sgl_nents, i) {
 		struct page *pg;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0)
 		pg = alloc_page(q->bounce_gfp | gfp_mask);
+#else
+		pg = alloc_page(gfp_mask);
+#endif
 		if (pg == NULL)
 			goto err_free_table;
 
@@ -8094,11 +8031,7 @@
 		 * sgl might have the last element in sgl not marked as last in
 		 * SG chaining.
 		 */
-		sg_copy(new_sgl, sgl, 0, to_copy
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-			, KM_USER0, KM_USER1
-#endif
-			);
+		sg_copy(new_sgl, sgl, 0, to_copy);
 	}
 
 out:
@@ -8116,16 +8049,8 @@
 	goto out;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-static void bio_kmalloc_destructor(struct bio *bio)
-{
-	kfree(bio->bi_io_vec);
-	kfree(bio);
-}
-#endif
-
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
-static blk_mq_req_flags_t gfp_mask_to_flags(gfp_t gfp_mask)
+static blk_mq_req_flags_t scst_gfp_mask_to_flags(gfp_t gfp_mask)
 {
 	switch (gfp_mask) {
 	case GFP_KERNEL:
@@ -8140,6 +8065,124 @@
 }
 #endif
 
+/**
+ * scst_alloc_passthrough_request - Allocate a SCSI pass-through request.
+ * @q: Request queue.
+ * @rw: READ or WRITE.
+ * @gfp_mask: GFP_KERNEL, GFP_ATOMIC or GFP_NOIO.
+ *
+ * Returns
+ * A valid request pointer, NULL or an error pointer. The value NULL is only
+ * returned for the legacy block layer if allocation fails. The legacy block
+ * layer is only supported by kernel versions before v5.0.
+ */
+static inline struct request *
+scst_alloc_passthrough_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	return blk_get_request(q, rw, gfp_mask);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
+	return blk_get_request(q, rw == READ ? REQ_OP_SCSI_IN : REQ_OP_SCSI_OUT,
+			       gfp_mask);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+	return blk_get_request(q, rw == READ ? REQ_OP_SCSI_IN : REQ_OP_SCSI_OUT,
+			       scst_gfp_mask_to_flags(gfp_mask));
+#else
+	return blk_mq_alloc_request(q, rw == READ ? REQ_OP_DRV_IN : REQ_OP_DRV_OUT,
+				    scst_gfp_mask_to_flags(gfp_mask));
+#endif
+}
+
+/**
+ * scst_init_passthrough_request - Init a SCSI pass-through request.
+ * @rq: Request pointer.
+ */
+static void
+scst_init_passthrough_request(struct request *rq)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ||	\
+	defined(CONFIG_SUSE_KERNEL)
+	scsi_req_init(scsi_req(rq));
+#else
+	scsi_req_init(rq);
+#endif
+
+#endif
+}
+
+/**
+ * scst_free_passthrough_request - Free a SCSI pass-through request.
+ * @rq: Request pointer.
+ */
+static void
+scst_free_passthrough_request(struct request *rq)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+	blk_put_request(rq);
+#else
+	blk_mq_free_request(rq);
+#endif
+}
+
+/**
+ * scst_alloc_bio - Allocate a bio.
+ * @nr_vecs: Number of bio_vecs to allocate.
+ * @gfp_mask: The GFP_* mask given to the slab allocator.
+ *
+ * Returns
+ * Pointer to new bio on success, NULL on failure.
+ */
+static inline struct bio *
+scst_alloc_bio(unsigned short nr_vecs, gfp_t gfp_mask)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 19, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+	return bio_kmalloc(gfp_mask, nr_vecs);
+#else
+	/*
+	 * See also commit 066ff571011d ("block: turn bio_kmalloc into a
+	 * simple kmalloc wrapper").
+	 */
+	struct bio *bio;
+
+	bio = bio_kmalloc(nr_vecs, gfp_mask);
+	if (bio)
+		bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, 0);
+
+	return bio;
+#endif
+}
+
+/**
+ * scst_free_bio - Free a bio that was allocated with scst_alloc_bio().
+ * @bio: bio pointer.
+ */
+static inline void
+scst_free_bio(struct bio *bio)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 19, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+	bio_put(bio);
+#else
+	/*
+	 * See also commit 066ff571011d ("block: turn bio_kmalloc into a
+	 * simple kmalloc wrapper").
+	 */
+	bio_uninit(bio);
+	kfree(bio);
+#endif
+}
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) ||			\
 (defined(CONFIG_SUSE_KERNEL) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
 static struct request *blk_make_request(struct request_queue *q,
@@ -8148,25 +8191,11 @@
 {
 	struct request *rq;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
-	rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
-	rq = blk_get_request(q, bio_data_dir(bio) == READ ? REQ_OP_SCSI_IN :
-			     REQ_OP_SCSI_OUT, gfp_mask);
-#else
-	rq = blk_get_request(q, bio_data_dir(bio) == READ ? REQ_OP_SCSI_IN :
-			     REQ_OP_SCSI_OUT, gfp_mask_to_flags(gfp_mask));
-#endif
-
-	if (IS_ERR(rq))
+	rq = scst_alloc_passthrough_request(q, bio_data_dir(bio), gfp_mask);
+	if (IS_ERR_OR_NULL(rq))
 		return rq;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ||	\
-	defined(CONFIG_SUSE_KERNEL)
-	scsi_req_init(scsi_req(rq));
-#else
-	scsi_req_init(rq);
-#endif
+	scst_init_passthrough_request(rq);
 
 	for_each_bio(bio) {
 		int ret;
@@ -8177,10 +8206,6 @@
 
 		blk_queue_bounce(q, &bounce_bio);
 		ret = blk_rq_append_bio(rq, bounce_bio);
-		if (unlikely(ret)) {
-			blk_put_request(rq);
-			return ERR_PTR(ret);
-		}
 		/*
 		 * See also commit 0abc2a10389f ("block: fix
 		 * blk_rq_append_bio"). That commit has been backported to
@@ -8189,17 +8214,15 @@
 #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 11) &&	\
 	!defined(CONFIG_SUSE_KERNEL)
 		ret = blk_rq_append_bio(rq, bio);
-		if (unlikely(ret)) {
-			blk_put_request(rq);
-			return ERR_PTR(ret);
-		}
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0)
 		ret = blk_rq_append_bio(rq, &bio);
+#else
+		ret = blk_rq_append_bio(rq, bio);
+#endif
 		if (unlikely(ret)) {
-			blk_put_request(rq);
+			scst_free_passthrough_request(rq);
 			return ERR_PTR(ret);
 		}
-#endif
 	}
 
 	return rq;
@@ -8231,7 +8254,7 @@
 	 */
 	max_nr_vecs = min_t(int,
 		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
-		BIO_MAX_PAGES);
+		BIO_MAX_VECS);
 
 	TRACE_DBG("max_nr_vecs %d, nents %d, reading %d", max_nr_vecs,
 		nents, reading);
@@ -8272,23 +8295,14 @@
 			int rc;
 
 			if (need_new_bio) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-				bio = bio_alloc_bioset(gfp_mask, max_nr_vecs, NULL);
-				if (bio)
-					bio->bi_destructor =
-						bio_kmalloc_destructor;
-#else
-				bio = bio_kmalloc(gfp_mask, max_nr_vecs);
-#endif
+				bio = scst_alloc_bio(max_nr_vecs, gfp_mask);
 				if (bio == NULL) {
 					rq = ERR_PTR(-ENOMEM);
 					goto out_free_bios;
 				}
 
 				if (!reading)
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-					bio->bi_rw |= 1 << BIO_RW;
-#elif (!defined(CONFIG_SUSE_KERNEL) &&			\
+#if (!defined(CONFIG_SUSE_KERNEL) && \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) || \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 					bio->bi_rw |= REQ_WRITE;
@@ -8364,7 +8378,7 @@
 	while (hbio != NULL) {
 		bio = hbio;
 		hbio = hbio->bi_next;
-		bio_put(bio);
+		scst_free_bio(bio);
 	}
 	goto out;
 }
@@ -8386,24 +8400,12 @@
 	struct request *rq;
 
 	if (!sgl) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
-		rq = blk_get_request(q, reading ? READ : WRITE, gfp);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
-		rq = blk_get_request(q, reading ? REQ_OP_SCSI_IN :
-				     REQ_OP_SCSI_OUT, gfp);
-#else
-		rq = blk_get_request(q, reading ? REQ_OP_SCSI_IN :
-				     REQ_OP_SCSI_OUT, gfp_mask_to_flags(gfp));
-#endif
-		if (unlikely(!rq))
+		rq = scst_alloc_passthrough_request(q, reading ? READ : WRITE,
+						    gfp);
+		if (IS_ERR_OR_NULL(rq))
 			return ERR_PTR(-ENOMEM);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ||	\
-	defined(CONFIG_SUSE_KERNEL)
-		scsi_req_init(scsi_req(rq));
-#else
-		scsi_req_init(rq);
-#endif
+		scst_init_passthrough_request(rq);
 		goto out;
 	}
 
@@ -8428,7 +8430,6 @@
 out:
 	return rq;
 }
-#endif
 
 /*
  * Can switch to the next dst_sg element, so, to copy to strictly only
@@ -8437,12 +8438,7 @@
  */
 static int sg_copy_elem(struct scatterlist **pdst_sg, size_t *pdst_len,
 			size_t *pdst_offs, struct scatterlist *src_sg,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-			size_t copy_len,
-			enum km_type d_km_type, enum km_type s_km_type)
-#else
 			size_t copy_len)
-#endif
 {
 	int res = 0;
 	struct scatterlist *dst_sg;
@@ -8462,19 +8458,10 @@
 		void *saddr, *daddr;
 		size_t n;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-		saddr = kmap_atomic(src_page +
-					 (src_offs >> PAGE_SHIFT), s_km_type) +
-				    (src_offs & ~PAGE_MASK);
-		daddr = kmap_atomic(dst_page +
-					(dst_offs >> PAGE_SHIFT), d_km_type) +
-				    (dst_offs & ~PAGE_MASK);
-#else
 		saddr = kmap_atomic(src_page + (src_offs >> PAGE_SHIFT)) +
 			(src_offs & ~PAGE_MASK);
 		daddr = kmap_atomic(dst_page + (dst_offs >> PAGE_SHIFT)) +
 			(dst_offs & ~PAGE_MASK);
-#endif
 
 		if (((src_offs & ~PAGE_MASK) == 0) &&
 		    ((dst_offs & ~PAGE_MASK) == 0) &&
@@ -8493,13 +8480,8 @@
 		dst_offs += n;
 		src_offs += n;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-		kunmap_atomic(saddr, s_km_type);
-		kunmap_atomic(daddr, d_km_type);
-#else
 		kunmap_atomic(saddr);
 		kunmap_atomic(daddr);
-#endif
 
 		res += n;
 		copy_len -= n;
@@ -8540,12 +8522,7 @@
  *    NULL. Returns number of bytes copied.
  */
 static int sg_copy(struct scatterlist *dst_sg, struct scatterlist *src_sg,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	    int nents_to_copy, size_t copy_len,
-	    enum km_type d_km_type, enum km_type s_km_type)
-#else
 	    int nents_to_copy, size_t copy_len)
-#endif
 {
 	int res = 0;
 	size_t dst_len, dst_offs;
@@ -8561,11 +8538,7 @@
 
 	do {
 		int copied = sg_copy_elem(&dst_sg, &dst_len, &dst_offs,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-				src_sg, copy_len, d_km_type, s_km_type);
-#else
 				src_sg, copy_len);
-#endif
 		copy_len -= copied;
 		res += copied;
 		if ((copy_len == 0) || (dst_sg == NULL))
@@ -8582,12 +8555,24 @@
 	return res;
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) &&	\
 	!defined(CONFIG_SUSE_KERNEL)
 static void scsi_end_async(struct request *req, int error)
 #else
-static void scsi_end_async(struct request *req, blk_status_t error)
+
+/*
+ * See also commit de671d6116b5 ("block: change request end_io handler to pass
+ * back a return value") # v6.1.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 1, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 2))
+#define RQ_END_IO_RET void
+#else
+#define RQ_END_IO_RET enum rq_end_io_ret
+#endif
+
+static RQ_END_IO_RET scsi_end_async(struct request *req, blk_status_t error)
 #endif
 {
 	struct scsi_io_context *sioc = req->end_io_data;
@@ -8610,6 +8595,7 @@
 	if (sioc->done) {
 		int resid_len;
 		long result;
+		char *sense;
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
 		result = scsi_req(req)->result;
@@ -8624,30 +8610,31 @@
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 		resid_len = scsi_req(req)->resid_len;
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
-		resid_len = req->resid_len;
+		sense = SREQ_SENSE(scsi_req(req));
 #else
-		/*
-		 * A quote from commit c3a4d78c580d: "rq->data_len served two
-		 * purposes - the length of data buffer on issue and the
-		 * residual count on completion."
-		 */
-		resid_len = req->data_len;
+		resid_len = req->resid_len;
+		sense = sioc->sense;
 #endif
 
-		sioc->done(sioc->data, sioc->sense, result, resid_len);
+		sioc->done(sioc->data, sense, result, resid_len);
 	}
 
 	kmem_cache_free(scsi_io_context_cache, sioc);
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 1, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 2))
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 21, 0) &&	\
 	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 8)
 	/* See also commit 92bc5a24844a ("block: remove __blk_put_request()") */
 	__blk_put_request(req->q, req);
 #else
-	blk_put_request(req);
+	scst_free_passthrough_request(req);
 #endif
 	return;
+#else
+	return RQ_END_IO_FREE;
+#endif
 }
 
 /**
@@ -8662,7 +8649,9 @@
 	int res = 0;
 	struct request_queue *q = cmd->dev->scsi_dev->request_queue;
 	struct request *rq;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)
+	struct scsi_cmnd *req;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 	struct scsi_request *req;
 #else
 	struct request *req;
@@ -8722,27 +8711,46 @@
 
 	req = scsi_req(rq);
 	req->cmd_len = cmd_len;
-	if (req->cmd_len <= BLK_MAX_CDB) {
-		memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
-		memcpy(req->cmd, cmd->cdb, cmd->cdb_len);
-	} else
-		req->cmd = cmd->cdb;
+	if (req->cmd_len > MAX_COMMAND_SIZE) {
+		PRINT_ERROR("SCSI command length %d exceeds the limit %d",
+				req->cmd_len, MAX_COMMAND_SIZE);
+		res = -EINVAL;
+#ifdef QUEUE_FLAG_BIDI
+		goto out_free_unmap;
+#else
+		goto out_free_sioc;
+#endif
+	}
 
-	req->sense = sioc->sense;
+	memset(SREQ_CP(req), 0, MAX_COMMAND_SIZE); /* ATAPI hates garbage after CDB */
+	memcpy(SREQ_CP(req), cmd->cdb, cmd->cdb_len);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	/*
+	 * See commit 772c8f6f3bbd ("Merge tag 'for-4.11/linus-merge-signed'
+	 * of git://git.kernel.dk/linux-block")
+	 *
+	 * Both scsi_init_rq and scsi_init_request (later renamed to
+	 * scsi_mq_init_request in e7008ff5c61a) initialize the scsi_request
+	 * sense buffer, so we don't need to (nor should) provide our own.
+	 */
+	SREQ_SENSE(req) = sioc->sense;
 	req->sense_len = sizeof(sioc->sense);
+#endif
+
 	rq->timeout = cmd->timeout;
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
 	req->retries = cmd->retries;
 #else
 	rq->retries = cmd->retries;
 #endif
+	rq->end_io      = scsi_end_async;
 	rq->end_io_data = sioc;
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 35)
-	rq->cmd_flags |= REQ_FAILFAST_MASK;
-#endif
 
-	blk_execute_rq_nowait(rq->q, NULL, rq,
-		(cmd->queue_type == SCST_CMD_QUEUE_HEAD_OF_QUEUE), scsi_end_async);
+	rq->cmd_flags |= REQ_FAILFAST_MASK;
+
+	blk_execute_rq_nowait(rq,
+		(cmd->queue_type == SCST_CMD_QUEUE_HEAD_OF_QUEUE));
 out:
 	return res;
 
@@ -8769,7 +8777,7 @@
 	}
 	rq->bio = NULL;
 
-	blk_put_request(rq);
+	scst_free_passthrough_request(rq);
 #endif
 
 out_free_sioc:
@@ -8778,8 +8786,6 @@
 }
 EXPORT_SYMBOL(scst_scsi_exec_async);
 
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30) */
-
 /*
  * Can switch to the next dst_sg element, so, to cmp to strictly only
  * one dst_sg element, it must be either last in the chain, or
@@ -8788,9 +8794,6 @@
 static int sg_cmp_elem(struct scatterlist **pdst_sg, size_t *pdst_len,
 			size_t *pdst_offs, struct scatterlist *src_sg,
 			size_t cmp_len, int *miscompare_offs,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-			enum km_type d_km_type, enum km_type s_km_type,
-#endif
 			bool *cmp_res)
 {
 	int res = 0;
@@ -8814,17 +8817,10 @@
 		size_t n;
 		int rc;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-		saddr = kmap_atomic(src_page + (src_offs >> PAGE_SHIFT), s_km_type) +
-			(src_offs & ~PAGE_MASK);
-		daddr = kmap_atomic(dst_page + (dst_offs >> PAGE_SHIFT), d_km_type) +
-			(dst_offs & ~PAGE_MASK);
-#else
 		saddr = kmap_atomic(src_page + (src_offs >> PAGE_SHIFT)) +
 			(src_offs & ~PAGE_MASK);
 		daddr = kmap_atomic(dst_page + (dst_offs >> PAGE_SHIFT)) +
 			(dst_offs & ~PAGE_MASK);
-#endif
 
 		if (((src_offs & ~PAGE_MASK) == 0) &&
 		    ((dst_offs & ~PAGE_MASK) == 0) &&
@@ -8896,13 +8892,8 @@
 		dst_offs += n;
 		src_offs += n;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-		kunmap_atomic(saddr, s_km_type);
-		kunmap_atomic(daddr, d_km_type);
-#else
 		kunmap_atomic(saddr);
 		kunmap_atomic(daddr);
-#endif
 
 		res += n;
 		cmp_len -= n;
@@ -8928,13 +8919,8 @@
 	return res;
 
 out_unmap:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	kunmap_atomic(saddr, s_km_type);
-	kunmap_atomic(daddr, d_km_type);
-#else
 	kunmap_atomic(saddr);
 	kunmap_atomic(daddr);
-#endif
 	goto out;
 }
 
@@ -8957,11 +8943,7 @@
  * the same data in min(sg1_size, sg2_size) size will match!
  */
 static bool sg_cmp(struct scatterlist *dst_sg, struct scatterlist *src_sg,
-	    int nents_to_cmp, size_t cmp_len, int *miscompare_offs
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	    , enum km_type d_km_type, enum km_type s_km_type
-#endif
-	    )
+	    int nents_to_cmp, size_t cmp_len, int *miscompare_offs)
 {
 	bool res = true;
 	size_t dst_len, dst_offs;
@@ -8991,9 +8973,6 @@
 
 		compared = sg_cmp_elem(&dst_sg, &dst_len, &dst_offs,
 				src_sg, cmp_len, miscompare_offs,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-				d_km_type, s_km_type,
-#endif
 				&res);
 		if (!res) {
 			if (miscompare_offs != NULL) {
@@ -9032,9 +9011,6 @@
 	struct scatterlist *src_sg, *dst_sg;
 	struct scatterlist *src_sg_dif, *dst_sg_dif;
 	unsigned int to_copy, to_copy_dif;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	int atomic = scst_cmd_atomic(cmd);
-#endif
 
 	TRACE_ENTRY();
 
@@ -9078,24 +9054,12 @@
 		goto out;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	sg_copy(dst_sg, src_sg, 0, to_copy,
-		atomic ? KM_SOFTIRQ0 : KM_USER0,
-		atomic ? KM_SOFTIRQ1 : KM_USER1);
-#else
 	sg_copy(dst_sg, src_sg, 0, to_copy);
-#endif
 
 	if ((src_sg_dif == NULL) || (dst_sg_dif == NULL) || (to_copy_dif == 0))
 		goto out;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
-	sg_copy(dst_sg_dif, src_sg_dif, 0, to_copy_dif,
-		atomic ? KM_SOFTIRQ0 : KM_USER0,
-		atomic ? KM_SOFTIRQ1 : KM_USER1);
-#else
 	sg_copy(dst_sg_dif, src_sg_dif, 0, to_copy_dif);
-#endif
 
 out:
 	TRACE_EXIT();
@@ -9251,12 +9215,7 @@
 
 static __be16 scst_dif_crc_fn(const void *data, unsigned int len)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
 	return cpu_to_be16(crc_t10dif(data, len));
-#else
-	WARN_ON_ONCE(true);
-	return 0;
-#endif
 }
 
 static __be16 scst_dif_ip_fn(const void *data, unsigned int len)
@@ -11144,6 +11103,7 @@
 				SCST_REG_RESERVE_ALLOWED |
 				SCST_WRITE_EXCL_ALLOWED |
 				SCST_EXCL_ACCESS_ALLOWED;
+		cmd->log2_max_buf_len = 7;
 		break;
 	case SAI_GET_LBA_STATUS:
 		cmd->op_name = "GET LBA STATUS";
@@ -11575,13 +11535,12 @@
 
 	if (res != 0)
 		return res;
-	else {
+
 #ifdef CONFIG_SCST_DIF_INJECT_CORRUPTED_TAGS
-		EXTRACHECKS_BUG_ON(cmd->cdb[0] != READ_10);
-		cmd->cmd_corrupt_dif_tag = (cmd->cdb[6] & 0xE0) >> 5;
+	EXTRACHECKS_BUG_ON(cmd->cdb[0] != READ_10);
+	cmd->cmd_corrupt_dif_tag = (cmd->cdb[6] & 0xE0) >> 5;
 #endif
-		return scst_parse_rdprotect(cmd);
-	}
+	return scst_parse_rdprotect(cmd);
 }
 
 static int get_cdb_info_lba_4_len_2_wrprotect(struct scst_cmd *cmd,
@@ -11642,13 +11601,12 @@
 
 	if (res != 0)
 		return res;
-	else {
+
 #ifdef CONFIG_SCST_DIF_INJECT_CORRUPTED_TAGS
-		EXTRACHECKS_BUG_ON(cmd->cdb[0] != READ_16);
-		cmd->cmd_corrupt_dif_tag = (cmd->cdb[14] & 0xE0) >> 5;
+	EXTRACHECKS_BUG_ON(cmd->cdb[0] != READ_16);
+	cmd->cmd_corrupt_dif_tag = (cmd->cdb[14] & 0xE0) >> 5;
 #endif
-		return scst_parse_rdprotect(cmd);
-	}
+	return scst_parse_rdprotect(cmd);
 }
 
 static int get_cdb_info_lba_8_len_4_wrprotect(struct scst_cmd *cmd,
@@ -11737,6 +11695,16 @@
 	return get_cdb_info_write_same(cmd, sdbops, cmd->cdb[10] & 1 /*NDOB*/);
 }
 
+static int get_cdb_info_dyn_runtime_attr(struct scst_cmd *cmd,
+	const struct scst_sdbops *sdbops)
+{
+	/*
+	 * Read/write dyn runtime attr commands are non-standard, CDB len is 12
+	 */
+	cmd->cdb_len = 12;
+	return get_cdb_info_len_4(cmd, sdbops);
+}
+
 /**
  * scst_set_cmd_from_cdb_info() - Parse the SCSI CDB.
  * @cmd: SCSI command to parse.
@@ -11747,6 +11715,8 @@
 static int scst_set_cmd_from_cdb_info(struct scst_cmd *cmd,
 	const struct scst_sdbops *ptr)
 {
+	int res;
+
 	cmd->cdb_len = SCST_GET_CDB_LEN(cmd->cdb[0]);
 	cmd->cmd_naca = (cmd->cdb[cmd->cdb_len - 1] & CONTROL_BYTE_NACA_BIT);
 	cmd->cmd_linked = (cmd->cdb[cmd->cdb_len - 1] & CONTROL_BYTE_LINK_BIT);
@@ -11757,7 +11727,16 @@
 	cmd->lba_len = ptr->info_lba_len;
 	cmd->len_off = ptr->info_len_off;
 	cmd->len_len = ptr->info_len_len;
-	return (*ptr->get_cdb_info)(cmd, ptr);
+	cmd->log2_max_buf_len = ptr->log2_max_buf_len;
+	res = (*ptr->get_cdb_info)(cmd, ptr);
+	if (!cmd->log2_max_buf_len ||
+	    cmd->bufflen <= (1U << cmd->log2_max_buf_len))
+		return res;
+	PRINT_ERROR("Data buffer length %d is too big for SCSI command %s (max %d)",
+		    cmd->bufflen, scst_get_opcode_name(cmd),
+		    1U << cmd->log2_max_buf_len);
+	scst_set_invalid_field_in_cdb(cmd, cmd->len_off, 0);
+	return 1;
 }
 
 static int get_cdb_info_var_len(struct scst_cmd *cmd,
@@ -12077,7 +12056,7 @@
 			      ptr->devkey[9],	/* commdev */
 			      ptr->info_op_name);
 			TRACE_DBG("data direction %d, op flags 0x%x, lba off %d, "
-				"lba len %d, len off %d, len len %d",
+				"lba len %d, len off %d, len of len %d",
 				ptr->info_data_direction, ptr->info_op_flags,
 				ptr->info_lba_off, ptr->info_lba_len,
 				ptr->info_len_off, ptr->info_len_len);
@@ -12370,21 +12349,20 @@
 	 */
 
 	if (cmd->op_flags & SCST_TRANSFER_LEN_TYPE_FIXED && cmd->cdb[1] & 1) {
-		int block_size = cmd->dev->block_size;
-		uint64_t b, ob;
-		bool overflow;
+		uint32_t block_size = cmd->dev->block_size;
+		uint32_t block_shift = cmd->dev->block_shift;
+		bool overflow = shift_left_overflows(cmd->bufflen, block_shift) ||
+				shift_left_overflows(cmd->data_len, block_shift) ||
+				shift_left_overflows(cmd->out_bufflen, block_shift);
 
-		b = ((uint64_t)cmd->bufflen) * block_size;
-		ob = ((uint64_t)cmd->out_bufflen) * block_size;
-
-		overflow = (b > 0xFFFFFFFF) ||
-			   (ob > 0xFFFFFFFF);
+		BUILD_BUG_ON(sizeof(cmd->bufflen) != 4);
+		BUILD_BUG_ON(sizeof(cmd->out_bufflen) != 4);
 		if (unlikely(overflow)) {
 			PRINT_WARNING("bufflen %u, data_len %llu or out_bufflen"
 				      " %u too large for device %s (block size"
-				      " %u, b %llu, ob %llu)", cmd->bufflen,
+				      " %u)", cmd->bufflen,
 				      cmd->data_len, cmd->out_bufflen,
-				      cmd->dev->virt_name, block_size, b, ob);
+				      cmd->dev->virt_name, block_size);
 			PRINT_BUFFER("CDB", cmd->cdb, cmd->cdb_len);
 			scst_set_cmd_error(cmd, SCST_LOAD_SENSE(
 					scst_sense_block_out_range_error));
@@ -12392,12 +12370,9 @@
 			goto out;
 		}
 
-		cmd->bufflen = b;
-		cmd->out_bufflen = ob;
-
-		/* cmd->data_len is 64-bit, so can't overflow here */
-		BUILD_BUG_ON(sizeof(cmd->data_len) < 8);
-		cmd->data_len *= block_size;
+		cmd->bufflen <<= block_shift;
+		cmd->out_bufflen <<= block_shift;
+		cmd->data_len <<= block_shift;
 	}
 
 	if ((cmd->op_flags & (SCST_SMALL_TIMEOUT | SCST_LONG_TIMEOUT)) == 0)
@@ -12838,7 +12813,7 @@
 		sl = scst_set_sense(sense, sense_len, dev->d_sense,
 			SCST_LOAD_SENSE(scst_sense_reset_UA));
 		scst_dev_check_set_UA(dev, NULL, sense, sl);
-	} else if ((status_byte(result) == CHECK_CONDITION) &&
+	} else if ((result & 0xff) == SAM_STAT_CHECK_CONDITION &&
 		   scst_is_ua_sense(sense, sense_len))
 		scst_dev_check_set_UA(dev, NULL, sense, sense_len);
 
@@ -13719,7 +13694,7 @@
 			scst_check_internal_sense(dev, rc, sense_buffer,
 				sizeof(sense_buffer));
 #if 0
-			if ((status_byte(rc) == CHECK_CONDITION) &&
+			if ((rc & 0xff) == SAM_STAT_CHECK_CONDITION &&
 			    scst_sense_valid(sense_buffer)) {
 #else
 			/*
@@ -13759,9 +13734,11 @@
 					goto brk;
 				}
 				switch (driver_byte(rc)) {
+#if defined(DRIVER_BUSY) && defined(DRIVER_SOFT)
 				case DRIVER_BUSY:
 				case DRIVER_SOFT:
 					break;
+#endif
 				default:
 					goto brk;
 				}
@@ -14047,8 +14024,6 @@
 process_qerr:
 	scst_process_qerr(cmd);
 
-	scst_store_sense(cmd);
-
 	res = 0;
 
 out:
@@ -14495,11 +14470,11 @@
 	return;
 }
 
-/**
- ** We currently have only few saved parameters and it is impossible to get
- ** pointer on a bit field, so let's have a simple straightforward
- ** implementation.
- **/
+/*
+ * We currently have only few saved parameters and it is impossible to get
+ * pointer on a bit field, so let's have a simple straightforward
+ * implementation.
+ */
 
 #define SCST_TAS_LABEL		"TAS"
 #define SCST_QERR_LABEL		"QERR"
@@ -14878,16 +14853,10 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_ext_blocking_done_fn(void *p)
-{
-	struct scst_device *dev = p;
-#else
 static void scst_ext_blocking_done_fn(struct work_struct *work)
 {
 	struct scst_device *dev = container_of(work, struct scst_device,
 					ext_blockers_work);
-#endif
 
 	TRACE_ENTRY();
 
@@ -15098,20 +15067,6 @@
 }
 
 /* Abstract vfs_unlink() for different kernel versions (as possible) */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
-void scst_vfs_unlink_and_put_nd(struct nameidata *nd)
-{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-	vfs_unlink(nd->dentry->d_parent->d_inode, nd->dentry);
-	dput(nd->dentry);
-	mntput(nd->mnt);
-#else
-	vfs_unlink(nd->path.dentry->d_parent->d_inode,
-		nd->path.dentry);
-	path_put(&nd->path);
-#endif
-}
-#endif
 
 void scst_vfs_unlink_and_put(struct path *path)
 {
@@ -15120,29 +15075,18 @@
 	(!defined(CONFIG_SUSE_KERNEL) || \
 	 LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0))
 	vfs_unlink(path->dentry->d_parent->d_inode, path->dentry);
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)
 	vfs_unlink(path->dentry->d_parent->d_inode, path->dentry, NULL);
+#else
+	vfs_unlink(&init_user_ns, path->dentry->d_parent->d_inode, path->dentry,
+		   NULL);
 #endif
 	path_put(path);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
-void scst_path_put(struct nameidata *nd)
-{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-	dput(nd->dentry);
-	mntput(nd->mnt);
-#else
-	path_put(&nd->path);
-#endif
-}
-EXPORT_SYMBOL(scst_path_put);
-#endif
-
 int scst_copy_file(const char *src, const char *dest)
 {
 	int res = 0;
-	struct inode *inode;
 	loff_t file_size, pos;
 	uint8_t *buf = NULL;
 	struct file *file_src = NULL, *file_dest = NULL;
@@ -15158,6 +15102,12 @@
 
 	TRACE_DBG("Copying '%s' into '%s'", src, dest);
 
+	file_size = scst_file_or_bdev_size(src);
+	if (file_size < 0) {
+		res = file_size;
+		goto out;
+	}
+
 	file_src = filp_open(src, O_RDONLY, 0);
 	if (IS_ERR(file_src)) {
 		res = PTR_ERR(file_src);
@@ -15173,20 +15123,6 @@
 		goto out_close;
 	}
 
-	inode = file_inode(file_src);
-
-	if (S_ISREG(inode->i_mode)) {
-		/* Nothing to do */
-	} else if (S_ISBLK(inode->i_mode)) {
-		inode = inode->i_bdev->bd_inode;
-	} else {
-		PRINT_ERROR("Invalid file mode 0x%x", inode->i_mode);
-		res = -EINVAL;
-		goto out_skip;
-	}
-
-	file_size = inode->i_size;
-
 	buf = vmalloc(file_size);
 	if (buf == NULL) {
 		res = -ENOMEM;
@@ -15232,27 +15168,15 @@
 int scst_remove_file(const char *name)
 {
 	int res = 0;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	struct nameidata nd;
-#else
 	struct path path;
-#endif
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	res = path_lookup(name, 0, &nd);
-	if (!res)
-		scst_vfs_unlink_and_put_nd(&nd);
-	else
-		TRACE_DBG("Unable to lookup file '%s' - error %d", name, res);
-#else
 	res = kern_path(name, 0, &path);
 	if (!res)
 		scst_vfs_unlink_and_put(&path);
 	else
 		TRACE_DBG("Unable to lookup file '%s' - error %d", name, res);
-#endif
 
 	TRACE_EXIT_RES(res);
 	return res;
@@ -15279,7 +15203,7 @@
 		res = PTR_ERR(file);
 		PRINT_ERROR("Unable to (re)create file '%s' - error %d",
 			name, res);
-		goto out_set_fs;
+		goto out_remove_file;
 	}
 
 	TRACE_DBG("Writing file '%s'", name);
@@ -15315,7 +15239,7 @@
 
 	filp_close(file, NULL);
 
-out_set_fs:
+out_remove_file:
 	if (res == 0)
 		scst_remove_file(name1);
 	else
@@ -15332,7 +15256,7 @@
 	filp_close(file, NULL);
 	if (res > 0)
 		res = -EIO;
-	goto out_set_fs;
+	goto out_remove_file;
 }
 EXPORT_SYMBOL_GPL(scst_write_file_transactional);
 
@@ -15341,13 +15265,18 @@
 {
 	int res;
 	struct file *file = NULL;
-	struct inode *inode;
 	loff_t file_size, pos;
 
 	TRACE_ENTRY();
 
 	TRACE_DBG("Loading file '%s'", file_name);
 
+	file_size = scst_file_or_bdev_size(file_name);
+	if (file_size < 0) {
+		res = file_size;
+		goto out;
+	}
+
 	file = filp_open(file_name, O_RDONLY, 0);
 	if (IS_ERR(file)) {
 		res = PTR_ERR(file);
@@ -15355,20 +15284,6 @@
 		goto out;
 	}
 
-	inode = file_inode(file);
-
-	if (S_ISREG(inode->i_mode)) {
-		/* Nothing to do */
-	} else if (S_ISBLK(inode->i_mode)) {
-		inode = inode->i_bdev->bd_inode;
-	} else {
-		PRINT_ERROR("Invalid file mode 0x%x", inode->i_mode);
-		res = -EINVAL;
-		goto out_close;
-	}
-
-	file_size = inode->i_size;
-
 	if (file_size > size) {
 		PRINT_ERROR("Supplied buffer (%d) too small (need %d)", size,
 			(int)file_size);
@@ -15495,9 +15410,6 @@
 	TRACE_BUFFER("scst_scsi_op_list", scst_scsi_op_list,
 		sizeof(scst_scsi_op_list));
 
-	scst_release_acg_wq = create_workqueue("scst_release_acg");
-	WARN_ON_ONCE(IS_ERR(scst_release_acg_wq));
-
 	TRACE_EXIT();
 	return;
 }
@@ -15506,37 +15418,45 @@
 {
 	int res = 0;
 
+	TRACE_ENTRY();
+
 	scst_scsi_op_list_init();
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
+	scst_release_acg_wq = alloc_workqueue("scst_release_acg", WQ_MEM_RECLAIM, 1);
+	if (unlikely(!scst_release_acg_wq)) {
+		PRINT_ERROR("Failed to allocate scst_release_acg_wq");
+		res = -ENOMEM;
+		goto out;
+	}
+
 	scsi_io_context_cache = kmem_cache_create("scst_scsi_io_context",
 					sizeof(struct scsi_io_context),
 					__alignof__(struct scsi_io_context),
 					SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN, NULL);
 	if (!scsi_io_context_cache) {
-		PRINT_ERROR("%s", "Can't init scsi io context cache");
+		PRINT_ERROR("Can't init scsi io context cache");
 		res = -ENOMEM;
-		goto out;
+		goto free_wq;
 	}
 
 out:
-#endif
 	TRACE_EXIT_RES(res);
 	return res;
+
+free_wq:
+	destroy_workqueue(scst_release_acg_wq);
+	goto out;
 }
 
 void scst_lib_exit(void)
 {
-	/* Wait until any ongoing acg->put_work has finished. */
-	flush_workqueue(scst_release_acg_wq);
+	/* All pending works will be drained by destroy_workqueue() */
 	destroy_workqueue(scst_release_acg_wq);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
-	BUILD_BUG_ON(SCST_MAX_CDB_SIZE != BLK_MAX_CDB);
+	BUILD_BUG_ON(SCST_MAX_CDB_SIZE != MAX_COMMAND_SIZE);
 	BUILD_BUG_ON(SCST_SENSE_BUFFERSIZE < SCSI_SENSE_BUFFERSIZE);
 
 	kmem_cache_destroy(scsi_io_context_cache);
-#endif
 }
 
 #ifdef CONFIG_SCST_DEBUG

diff --git a/scst/scst/src/scst_main.c b/scst/scst/src/scst_main.c
index 1503447..d466ec7 100644
--- a/scst/scst/src/scst_main.c
+++ b/scst/scst/src/scst_main.c

@@ -47,12 +47,6 @@
 details.
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30) && \
-	!defined(CONFIG_SCST_STRICT_SERIALIZING)
-#warning CONFIG_SCST_STRICT_SERIALIZING has not been defined. \
-Pass-through dev handlers will not work.
-#endif
-
 /*
  ** SCST global variables. They are all uninitialized to have their layout in
  ** memory be exactly as specified. Otherwise compiler puts zero-initialized
@@ -112,16 +106,15 @@
 unsigned long scst_trace_flag;
 #endif
 
-unsigned long scst_flags;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 unsigned long scst_poll_ns = SCST_DEF_POLL_NS;
-#endif
 
 int scst_max_tasklet_cmd = SCST_DEF_MAX_TASKLET_CMD;
 
 struct scst_cmd_threads scst_main_cmd_threads;
 
+static bool percpu_ref_killed;
+struct percpu_ref scst_cmd_count;
+struct percpu_ref scst_mcmd_count;
 struct scst_percpu_info scst_percpu_infos[NR_CPUS];
 
 spinlock_t scst_mcmd_lock;
@@ -140,7 +133,6 @@
 /* protected by scst_cmd_threads_mutex */
 static struct list_head scst_cmd_threads_list;
 
-int scst_threads;
 static struct task_struct *scst_init_cmd_thread;
 static struct task_struct *scst_mgmt_thread;
 static struct task_struct *scst_mgmt_cmd_thread;
@@ -150,13 +142,11 @@
  * several threads simultaneously.
  */
 static struct mutex scst_suspend_mutex;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 #ifdef CONFIG_LOCKDEP
 static struct lock_class_key scst_suspend_key;
 struct lockdep_map scst_suspend_dep_map =
 	STATIC_LOCKDEP_MAP_INIT("scst_suspend_activity", &scst_suspend_key);
 #endif
-#endif
 
 /* Protected by scst_suspend_mutex */
 static int suspend_count;
@@ -165,34 +155,34 @@
 
 cpumask_t default_cpu_mask;
 
-static unsigned int scst_max_cmd_mem;
-unsigned int scst_max_dev_cmd_mem;
-int scst_forcibly_close_sessions;
-int scst_auto_cm_assignment = false;
-
 spinlock_t scst_measure_latency_lock;
 atomic_t scst_measure_latency;
 
+int scst_threads;
 module_param_named(scst_threads, scst_threads, int, S_IRUGO);
 MODULE_PARM_DESC(scst_threads, "SCSI target threads count");
 
+static unsigned int scst_max_cmd_mem;
 module_param_named(scst_max_cmd_mem, scst_max_cmd_mem, int, S_IRUGO);
 MODULE_PARM_DESC(scst_max_cmd_mem, "Maximum memory allowed to be consumed by "
 	"all SCSI commands of all devices at any given time in MB");
 
+unsigned int scst_max_dev_cmd_mem;
 module_param_named(scst_max_dev_cmd_mem, scst_max_dev_cmd_mem, int, S_IRUGO);
 MODULE_PARM_DESC(scst_max_dev_cmd_mem, "Maximum memory allowed to be consumed "
 	"by all SCSI commands of a device at any given time in MB");
 
-module_param_named(forcibly_close_sessions, scst_forcibly_close_sessions, int,
+bool scst_forcibly_close_sessions;
+module_param_named(forcibly_close_sessions, scst_forcibly_close_sessions, bool,
 		   S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(forcibly_close_sessions,
 "If enabled, close the sessions associated with an access control group (ACG)"
-" when an ACG is deleted via sysfs instead of returning -EBUSY");
+" when an ACG is deleted via sysfs instead of returning -EBUSY. (default: false)");
 
-module_param_named(auto_cm_assignment, scst_auto_cm_assignment, int,
+bool scst_auto_cm_assignment = true;
+module_param_named(auto_cm_assignment, scst_auto_cm_assignment, bool,
 		   S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(auto_cm_assignment, "Enables the copy managers auto registration");
+MODULE_PARM_DESC(auto_cm_assignment, "Enables the copy managers auto registration. (default: true)");
 
 struct scst_dev_type scst_null_devtype = {
 	.name = "none",
@@ -231,10 +221,6 @@
 		goto out;
 	}
 
-	if (vtt->detect)
-		PRINT_WARNING("detect() method is obsolete and scheduled for "
-			"removal (target driver %s)", vtt->name);
-
 	if (!vtt->release) {
 		PRINT_ERROR("Target driver %s must have "
 			"release() method.", vtt->name);
@@ -302,30 +288,12 @@
 	mutex_unlock(&scst_mutex2);
 	mutex_unlock(&scst_mutex);
 
-	TRACE_DBG("%s", "Calling target driver's detect()");
-	res = vtt->detect ? vtt->detect(vtt) : 0;
-	TRACE_DBG("Target driver's detect() returned %d", res);
-	if (res < 0) {
-		PRINT_ERROR("%s", "The detect() routine failed");
-		res = -EINVAL;
-		goto out_del;
-	}
-
 	PRINT_INFO("Target template %s registered successfully", vtt->name);
 
 out:
 	TRACE_EXIT_RES(res);
 	return res;
 
-out_del:
-	scst_tgtt_sysfs_del(vtt);
-
-	mutex_lock(&scst_mutex);
-
-	mutex_lock(&scst_mutex2);
-	list_del(&vtt->scst_template_list_entry);
-	mutex_unlock(&scst_mutex2);
-
 out_unlock:
 	mutex_unlock(&scst_mutex);
 	goto out;
@@ -796,13 +764,15 @@
 	return;
 }
 
+/*
+ * Number of SCST non-management commands, management commands and activities
+ * that are in progress. Must only be called if both scst_cmd_count and
+ * scst_mcmd_count are in atomic mode.
+ */
 int scst_get_cmd_counter(void)
 {
-	int i, res = 0;
-
-	for (i = 0; i < ARRAY_SIZE(scst_percpu_infos); i++)
-		res += atomic_read(&scst_percpu_infos[i].cpu_cmd_count);
-	return res;
+	return percpu_ref_read(&scst_cmd_count) +
+		percpu_ref_read(&scst_mcmd_count);
 }
 
 static int scst_susp_wait(unsigned long timeout)
@@ -820,7 +790,7 @@
 		t = min(timeout, SCST_SUSP_WAIT_REPORT_TIMEOUT);
 
 	res = wait_event_interruptible_timeout(scst_dev_cmd_waitQ,
-			(scst_get_cmd_counter() == 0), t);
+			percpu_ref_killed, t);
 	if (res > 0) {
 		res = 0;
 		goto out;
@@ -836,13 +806,13 @@
 
 	if (timeout != SCST_SUSPEND_TIMEOUT_UNLIMITED) {
 		res = wait_event_interruptible_timeout(scst_dev_cmd_waitQ,
-			(scst_get_cmd_counter() == 0), timeout - t);
+			percpu_ref_killed, timeout - t);
 		if (res == 0)
 			res = -EBUSY;
 		else if (res > 0)
 			res = 0;
 	} else {
-		wait_event(scst_dev_cmd_waitQ, scst_get_cmd_counter() == 0);
+		wait_event(scst_dev_cmd_waitQ, percpu_ref_killed);
 		res = 0;
 	}
 
@@ -855,17 +825,18 @@
 }
 
 /*
- * scst_suspend_activity() - globally suspend any activity
+ * scst_suspend_activity() - globally suspend activity
  *
  * Description:
- *    Globally suspends any activity and doesn't return, until there are any
- *    active commands (state after SCST_CMD_STATE_INIT). Timeout parameter sets
- *    max time this function will wait for suspending or interrupted by a
- *    signal with the corresponding error status < 0. If timeout is
- *    SCST_SUSPEND_TIMEOUT_UNLIMITED, then it will wait virtually forever.
- *    On success returns 0.
+ *    Globally suspends SCSI command and SCSI management command processing and
+ *    waits until all active commands have finished (state after
+ *    SCST_CMD_STATE_INIT). The timeout parameter defines the maximum time this
+ *    function will wait until activity has been suspended. If this function is
+ *    interrupted by a signal, it returns a negative value. If the timeout value
+ *    is SCST_SUSPEND_TIMEOUT_UNLIMITED, then it will wait virtually forever.
+ *    Returns 0 upon success.
  *
- *    New arriving commands stay in the suspended state until
+ *    Newly arriving commands remain in the suspended state until
  *    scst_resume_activity() is called.
  */
 int scst_suspend_activity(unsigned long timeout)
@@ -876,9 +847,7 @@
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 	rwlock_acquire_read(&scst_suspend_dep_map, 0, 0, _RET_IP_);
-#endif
 
 	if (timeout != SCST_SUSPEND_TIMEOUT_UNLIMITED) {
 		res = mutex_lock_interruptible(&scst_suspend_mutex);
@@ -892,14 +861,9 @@
 	if (suspend_count > 1)
 		goto out_up;
 
-	set_bit(SCST_FLAG_SUSPENDING, &scst_flags);
-	set_bit(SCST_FLAG_SUSPENDED, &scst_flags);
-	/*
-	 * Assignment of SCST_FLAG_SUSPENDING and SCST_FLAG_SUSPENDED must be
-	 * ordered with cpu_cmd_count in scst_get(). Otherwise, lockless logic
-	 * of scst_get() users won't work.
-	 */
-	smp_mb__after_set_bit();
+	/* Cause scst_get_cmd() to fail. */
+	percpu_ref_killed = false;
+	percpu_ref_kill(&scst_cmd_count);
 
 	/*
 	 * See comment in scst_user.c::dev_user_task_mgmt_fn() for more
@@ -915,18 +879,17 @@
 			scst_get_cmd_counter());
 		rep = true;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 		lock_contended(&scst_suspend_dep_map, _RET_IP_);
-#endif
 	}
 
 	res = scst_susp_wait(timeout);
-	if (res != 0)
-		goto out_clear;
 
-	clear_bit(SCST_FLAG_SUSPENDING, &scst_flags);
-	/* See comment about smp_mb() above */
-	smp_mb__after_clear_bit();
+	/* Cause scst_get_mcmd() to fail. */
+	percpu_ref_killed = false;
+	percpu_ref_kill(&scst_mcmd_count);
+
+	if (res != 0)
+		goto out_resume;
 
 	if (scst_get_cmd_counter() != 0)
 		TRACE_MGMT_DBG("Waiting for %d active commands finally to "
@@ -954,21 +917,14 @@
 	mutex_unlock(&scst_suspend_mutex);
 
 out:
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 	if (res == 0)
 		lock_acquired(&scst_suspend_dep_map, _RET_IP_);
 	else
 		rwlock_release(&scst_suspend_dep_map, _RET_IP_);
-#endif
 
 	TRACE_EXIT_RES(res);
 	return res;
 
-out_clear:
-	clear_bit(SCST_FLAG_SUSPENDING, &scst_flags);
-	/* See comment about smp_mb() above */
-	smp_mb__after_clear_bit();
-
 out_resume:
 	__scst_resume_activity();
 	EXTRACHECKS_BUG_ON(suspend_count != 0);
@@ -994,7 +950,8 @@
 	if (suspend_count > 0)
 		goto out;
 
-	clear_bit(SCST_FLAG_SUSPENDED, &scst_flags);
+	percpu_ref_resurrect(&scst_mcmd_count);
+	percpu_ref_resurrect(&scst_cmd_count);
 
 	mutex_lock(&scst_cmd_threads_mutex);
 	list_for_each_entry(l, &scst_cmd_threads_list, lists_list_entry) {
@@ -1004,7 +961,7 @@
 
 	/*
 	 * Wait until scst_init_thread() either is waiting or has reexamined
-	 * scst_flags.
+	 * scst_cmd_count.
 	 */
 	spin_lock_irq(&scst_init_lock);
 	spin_unlock_irq(&scst_init_lock);
@@ -1037,9 +994,7 @@
 {
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 	rwlock_release(&scst_suspend_dep_map, _RET_IP_);
-#endif
 
 	mutex_lock(&scst_suspend_mutex);
 	__scst_resume_activity();
@@ -1379,8 +1334,10 @@
 	list_add_tail(&dev->dev_list_entry, &scst_dev_list);
 
 	res = scst_cm_on_dev_register(dev);
-	if (res != 0)
+	if (res != 0) {
+		sysfs_del = true;
 		goto out_unreg;
+	}
 
 	mutex_unlock(&scst_mutex);
 
@@ -1531,19 +1488,6 @@
 	if (res != 0)
 		goto out;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30) && \
-	!defined(CONFIG_SCST_STRICT_SERIALIZING)
-	if (dev_type->exec == NULL) {
-		PRINT_ERROR("Pass-through dev handlers (handler \"%s\") not "
-			"supported. Consider applying on your kernel patch "
-			"scst_exec_req_fifo-<kernel-version> or define "
-			"CONFIG_SCST_STRICT_SERIALIZING", dev_type->name);
-		res = -EINVAL;
-		goto out;
-	}
-#endif
-
-
 	res = mutex_lock_interruptible(&scst_mutex);
 	if (res != 0)
 		goto out;
@@ -2195,22 +2139,14 @@
 }
 EXPORT_SYMBOL_GPL(scst_get_setup_id);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-static int scst_add(struct class_device *cdev, struct class_interface *intf)
-#else
 static int scst_add(struct device *cdev, struct class_interface *intf)
-#endif
 {
 	struct scsi_device *scsidp;
 	int res = 0;
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-	scsidp = to_scsi_device(cdev->dev);
-#else
 	scsidp = to_scsi_device(cdev->parent);
-#endif
 
 	if ((scsidp->host->hostt->name == NULL) ||
 	    (strcmp(scsidp->host->hostt->name, SCST_LOCAL_NAME) != 0))
@@ -2220,21 +2156,13 @@
 	return res;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-static void scst_remove(struct class_device *cdev, struct class_interface *intf)
-#else
 static void scst_remove(struct device *cdev, struct class_interface *intf)
-#endif
 {
 	struct scsi_device *scsidp;
 
 	TRACE_ENTRY();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-	scsidp = to_scsi_device(cdev->dev);
-#else
 	scsidp = to_scsi_device(cdev->parent);
-#endif
 
 	if ((scsidp->host->hostt->name == NULL) ||
 	    (strcmp(scsidp->host->hostt->name, SCST_LOCAL_NAME) != 0))
@@ -2244,17 +2172,10 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-static struct class_interface scst_interface = {
-	.add = scst_add,
-	.remove = scst_remove,
-};
-#else
 static struct class_interface scst_interface = {
 	.add_dev = scst_add,
 	.remove_dev = scst_remove,
 };
-#endif
 
 static void __init scst_print_config(void)
 {
@@ -2323,6 +2244,13 @@
 		PRINT_INFO("%s", buf);
 }
 
+static void scst_suspended(struct percpu_ref *ref)
+{
+	WARN_ON_ONCE(ref != &scst_cmd_count && ref != &scst_mcmd_count);
+	percpu_ref_killed = true;
+	wake_up_all(&scst_dev_cmd_waitQ);
+}
+
 static int __init init_scst(void)
 {
 	int res, i;
@@ -2531,8 +2459,17 @@
 	if (res != 0)
 		goto out_destroy_sgv_pool;
 
+	res = percpu_ref_init(&scst_cmd_count, scst_suspended,
+			      PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
+	if (res != 0)
+		goto out_unreg_interface;
+
+	res = percpu_ref_init(&scst_mcmd_count, scst_suspended,
+			      PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
+	if (res != 0)
+		goto out_cmd_count;
+
 	for (i = 0; i < ARRAY_SIZE(scst_percpu_infos); i++) {
-		atomic_set(&scst_percpu_infos[i].cpu_cmd_count, 0);
 		spin_lock_init(&scst_percpu_infos[i].tasklet_lock);
 		INIT_LIST_HEAD(&scst_percpu_infos[i].tasklet_cmd_list);
 		tasklet_init(&scst_percpu_infos[i].tasklet,
@@ -2570,10 +2507,14 @@
 
 out_thread_free:
 	scst_stop_global_threads();
+	percpu_ref_exit(&scst_mcmd_count);
 
+out_cmd_count:
+	percpu_ref_exit(&scst_cmd_count);
+
+out_unreg_interface:
 	scsi_unregister_interface(&scst_interface);
 
-
 out_destroy_sgv_pool:
 	scst_sgv_pools_deinit();
 	scst_tg_cleanup();
@@ -2656,6 +2597,9 @@
 
 	scst_deinit_threads(&scst_main_cmd_threads);
 
+	percpu_ref_exit(&scst_mcmd_count);
+	percpu_ref_exit(&scst_cmd_count);
+
 	scsi_unregister_interface(&scst_interface);
 
 

diff --git a/scst/scst/src/scst_mem.c b/scst/scst/src/scst_mem.c
index a0bba76..fb438b5 100644
--- a/scst/scst/src/scst_mem.c
+++ b/scst/scst/src/scst_mem.c

@@ -52,13 +52,11 @@
 
 static struct sgv_pool *sgv_norm_clust_pool_main, *sgv_norm_pool_main, *sgv_dma_pool_main;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 #if defined(CONFIG_LOCKDEP) && !defined(CONFIG_SCST_PROC)
 static struct lock_class_key scst_pool_key;
 static struct lockdep_map scst_pool_dep_map =
 	STATIC_LOCKDEP_MAP_INIT("scst_pool_kref", &scst_pool_key);
 #endif
-#endif
 
 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 static atomic_t sgv_pages_total = ATOMIC_INIT(0);
@@ -80,11 +78,7 @@
 static atomic_t sgv_other_total_alloc = ATOMIC_INIT(0);
 #endif
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
-static struct shrinker *sgv_shrinker;
-#else
 static struct shrinker sgv_shrinker;
-#endif
 
 static struct kmem_cache *sgv_pool_cachep;
 
@@ -340,17 +334,9 @@
 	return freed;
 }
 #else /* if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
-static int sgv_shrink(int nr, gfp_t gfpm)
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-static int sgv_shrink(struct shrinker *shrinker, int nr, gfp_t gfpm)
-#else
 static int sgv_shrink(struct shrinker *shrinker, struct shrink_control *sc)
-#endif
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
 	int nr = sc->nr_to_scan;
-#endif
 	int freed = 0;
 
 	TRACE_ENTRY();
@@ -366,19 +352,11 @@
 }
 #endif /* if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void sgv_purge_work_fn(void *p)
-#else
 static void sgv_purge_work_fn(struct work_struct *work)
-#endif
 {
 	unsigned long cur_time = jiffies;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct sgv_pool *pool = (struct sgv_pool *)p;
-#else
 	struct sgv_pool *pool = container_of(work, struct sgv_pool,
 					     sgv_purge_work.work);
-#endif
 
 	TRACE_ENTRY();
 
@@ -1379,12 +1357,7 @@
 	pool->caches[cache_num] = kmem_cache_create(
 		pool->cache_names[cache_num], size,
 		0, per_cpu ? SCST_SLAB_FLAGS :
-			     (SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN), NULL
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
-		, NULL);
-#else
-		);
-#endif
+		(SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN), NULL);
 	return;
 }
 
@@ -1458,11 +1431,7 @@
 	for (i = 0; i < pool->max_caches; i++)
 		INIT_LIST_HEAD(&pool->recycling_lists[i]);
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
 	INIT_DELAYED_WORK(&pool->sgv_purge_work, sgv_purge_work_fn);
-#else
-	INIT_WORK(&pool->sgv_purge_work, sgv_purge_work_fn, pool);
-#endif
 
 	spin_lock_bh(&sgv_pools_lock);
 	list_add_tail(&pool->sgv_pools_list_entry, &sgv_pools_list);
@@ -1487,11 +1456,8 @@
 
 out_free:
 	for (i = 0; i < pool->max_caches; i++) {
-		if (pool->caches[i]) {
-			kmem_cache_destroy(pool->caches[i]);
-			pool->caches[i] = NULL;
-		} else
-			break;
+		kmem_cache_destroy(pool->caches[i]);
+		pool->caches[i] = NULL;
 	}
 	goto out;
 }
@@ -1553,6 +1519,9 @@
 
 	TRACE_ENTRY();
 
+	if (unlikely(!pool))
+		goto out;
+
 	sgv_pool_flush(pool);
 
 	mutex_lock(&sgv_pools_mutex);
@@ -1568,13 +1537,13 @@
 	cancel_delayed_work_sync(&pool->sgv_purge_work);
 
 	for (i = 0; i < pool->max_caches; i++) {
-		if (pool->caches[i])
-			kmem_cache_destroy(pool->caches[i]);
+		kmem_cache_destroy(pool->caches[i]);
 		pool->caches[i] = NULL;
 	}
 
 	kmem_cache_free(sgv_pool_cachep, pool);
 
+out:
 	TRACE_EXIT();
 	return;
 }
@@ -1822,9 +1791,6 @@
 			goto out_free_per_cpu_dma;
 	}
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
-	sgv_shrinker = set_shrinker(DEFAULT_SEEKS, sgv_shrink);
-#else
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
 	sgv_shrinker.count_objects = sgv_can_be_shrunk;
 	sgv_shrinker.scan_objects = sgv_scan_shrink;
@@ -1832,8 +1798,10 @@
 	sgv_shrinker.shrink = sgv_shrink;
 #endif
 	sgv_shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&sgv_shrinker);
-#endif
+
+	res = register_shrinker(&sgv_shrinker, "scst-sgv");
+	if (unlikely(res))
+		goto out_free_per_cpu_dma;
 
 out:
 	TRACE_EXIT_RES(res);
@@ -1841,18 +1809,15 @@
 
 out_free_per_cpu_dma:
 	for (i = 0; i < nr_cpu_ids; i++)
-		if (sgv_dma_pool_per_cpu[i] != NULL)
-			sgv_pool_destroy(sgv_dma_pool_per_cpu[i]);
+		sgv_pool_destroy(sgv_dma_pool_per_cpu[i]);
 
 out_free_per_cpu_clust:
 	for (i = 0; i < nr_cpu_ids; i++)
-		if (sgv_norm_clust_pool_per_cpu[i] != NULL)
-			sgv_pool_destroy(sgv_norm_clust_pool_per_cpu[i]);
+		sgv_pool_destroy(sgv_norm_clust_pool_per_cpu[i]);
 
 out_free_per_cpu_norm:
 	for (i = 0; i < nr_cpu_ids; i++)
-		if (sgv_norm_pool_per_cpu[i] != NULL)
-			sgv_pool_destroy(sgv_norm_pool_per_cpu[i]);
+		sgv_pool_destroy(sgv_norm_pool_per_cpu[i]);
 
 	sgv_pool_destroy(sgv_dma_pool_main);
 
@@ -1866,7 +1831,7 @@
 	kmem_cache_destroy(sgv_pool_cachep);
 
 out_err:
-	res = -ENOMEM;
+	res = res ?: -ENOMEM;
 	goto out;
 }
 
@@ -1876,26 +1841,19 @@
 
 	TRACE_ENTRY();
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
-	remove_shrinker(sgv_shrinker);
-#else
 	unregister_shrinker(&sgv_shrinker);
-#endif
 
 	sgv_pool_destroy(sgv_dma_pool_main);
 	for (i = 0; i < nr_cpu_ids; i++)
-		if (sgv_dma_pool_per_cpu[i] != NULL)
-			sgv_pool_destroy(sgv_dma_pool_per_cpu[i]);
+		sgv_pool_destroy(sgv_dma_pool_per_cpu[i]);
 
 	sgv_pool_destroy(sgv_norm_pool_main);
 	for (i = 0; i < nr_cpu_ids; i++)
-		if (sgv_norm_pool_per_cpu[i] != NULL)
-			sgv_pool_destroy(sgv_norm_pool_per_cpu[i]);
+		sgv_pool_destroy(sgv_norm_pool_per_cpu[i]);
 
 	sgv_pool_destroy(sgv_norm_clust_pool_main);
 	for (i = 0; i < nr_cpu_ids; i++)
-		if (sgv_norm_clust_pool_per_cpu[i] != NULL)
-			sgv_pool_destroy(sgv_norm_clust_pool_per_cpu[i]);
+		sgv_pool_destroy(sgv_norm_clust_pool_per_cpu[i]);
 
 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_norm_pool_global[i] = NULL;
@@ -2051,10 +2009,13 @@
 	__ATTR(stats, S_IRUGO | S_IWUSR, sgv_sysfs_stat_show,
 		sgv_sysfs_stat_reset);
 
-static struct attribute *sgv_attrs[] = {
+static struct attribute *sgv_pool_attrs[] = {
 	&sgv_stat_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(sgv_pool);
+#endif
 
 static void sgv_kobj_release(struct kobject *kobj)
 {
@@ -2073,7 +2034,11 @@
 static struct kobj_type sgv_pool_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = sgv_kobj_release,
-	.default_attrs = sgv_attrs,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = sgv_pool_groups,
+#else
+	.default_attrs = sgv_pool_attrs,
+#endif
 };
 
 static int scst_sgv_sysfs_create(struct sgv_pool *pool)
@@ -2114,10 +2079,13 @@
 	__ATTR(global_stats, S_IRUGO | S_IWUSR, sgv_sysfs_global_stat_show,
 		sgv_sysfs_global_stat_reset);
 
-static struct attribute *sgv_default_attrs[] = {
+static struct attribute *sgv_def_attrs[] = {
 	&sgv_global_stat_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(sgv_def);
+#endif
 
 static void scst_sysfs_release(struct kobject *kobj)
 {
@@ -2127,7 +2095,11 @@
 static struct kobj_type sgv_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_sysfs_release,
-	.default_attrs = sgv_default_attrs,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = sgv_def_groups,
+#else
+	.default_attrs = sgv_def_attrs,
+#endif
 };
 
 /*

diff --git a/scst/scst/src/scst_mem.h b/scst/scst/src/scst_mem.h
index 6b64e1d..9b5439c 100644
--- a/scst/scst/src/scst_mem.h
+++ b/scst/scst/src/scst_mem.h

@@ -50,7 +50,7 @@
 	void *allocator_priv;
 	struct trans_tbl_ent *trans_tbl;
 	struct scatterlist *sg_entries;
-	struct scatterlist sg_entries_data[0];
+	struct scatterlist sg_entries_data[];
 };
 
 /*
@@ -108,11 +108,7 @@
 
 	struct sgv_pool_cache_acc cache_acc[SGV_POOL_ELEMENTS];
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
 	struct delayed_work sgv_purge_work;
-#else
-	struct work_struct sgv_purge_work;
-#endif
 
 	atomic_t big_alloc, big_pages, big_merged;
 	atomic_t other_alloc, other_pages, other_merged;

diff --git a/scst/scst/src/scst_pres.c b/scst/scst/src/scst_pres.c
index 96f3d77..ce86068 100644
--- a/scst/scst/src/scst_pres.c
+++ b/scst/scst/src/scst_pres.c

@@ -41,15 +41,6 @@
 #endif
 #include <linux/vmalloc.h>
 #include <asm/unaligned.h>
-#include <stdarg.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-#include <linux/mount.h>
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-#include <linux/writeback.h>
-#endif
 
 #ifdef INSIDE_KERNEL_TREE
 #include <scst/scst.h>
@@ -61,6 +52,14 @@
 #include "scst_priv.h"
 #include "scst_pres.h"
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 1))
+#include <stdarg.h>
+#else
+#include <linux/stdarg.h>
+#endif
+
 #define SCST_PR_ROOT_ENTRY	"pr"
 #define SCST_PR_FILE_SIGN	0xBBEEEEAAEEBBDD77LLU
 #define SCST_PR_FILE_VERSION	1LLU
@@ -71,7 +70,7 @@
 #define isblank(c)		((c) == ' ' || (c) == '\t')
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) && defined(CONFIG_LOCKDEP)
+#if defined(CONFIG_LOCKDEP)
 #define scst_assert_pr_mutex_held(dev)					\
 	do {								\
 		if (dev->dev_list_entry.next &&				\
@@ -173,8 +172,9 @@
 			if (tolower(tid_a[i]) != tolower(tid_b[i]))
 				return false;
 		return true;
-	} else
-		len = TID_COMMON_SIZE;
+	}
+
+	len = TID_COMMON_SIZE;
 
 	return memcmp(tid_a, tid_b, len) == 0;
 
@@ -660,7 +660,6 @@
 {
 	int res = 0, rc;
 	struct file *file = NULL;
-	struct inode *inode;
 	char *buf = NULL;
 	loff_t file_size, pos, data_size;
 	uint64_t sign, version;
@@ -676,6 +675,12 @@
 
 	TRACE_PR("Loading persistent file '%s'", file_name);
 
+	file_size = scst_file_or_bdev_size(file_name);
+	if (file_size < 0) {
+		res = file_size;
+		goto out;
+	}
+
 	file = filp_open(file_name, O_RDONLY, 0);
 	if (IS_ERR(file)) {
 		res = PTR_ERR(file);
@@ -683,19 +688,6 @@
 		goto out;
 	}
 
-	inode = file_inode(file);
-
-	if (S_ISREG(inode->i_mode)) {
-		/* Nothing to do */
-	} else if (S_ISBLK(inode->i_mode)) {
-		inode = inode->i_bdev->bd_inode;
-	} else {
-		PRINT_ERROR("Invalid file mode 0x%x", inode->i_mode);
-		goto out_close;
-	}
-
-	file_size = inode->i_size;
-
 	/* Let's limit the file size by some reasonable number */
 	if ((file_size == 0) || (file_size >= 15*1024*1024)) {
 		PRINT_ERROR("Invalid PR file size %d", (int)file_size);
@@ -1017,19 +1009,7 @@
 
 write_error_close:
 	filp_close(file, NULL);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	{
-		struct nameidata nd;
-		int rc;
 
-		rc = path_lookup(dev->pr_file_name, 0,	&nd);
-		if (!rc)
-			scst_vfs_unlink_and_put_nd(&nd);
-		else
-			TRACE_PR("Unable to lookup '%s' - error %d",
-				dev->pr_file_name, rc);
-	}
-#else
 	{
 		struct path path;
 		int rc;
@@ -1041,7 +1021,6 @@
 			TRACE_PR("Unable to lookup '%s' - error %d",
 				dev->pr_file_name, rc);
 	}
-#endif
 	goto out;
 }
 

diff --git a/scst/scst/src/scst_priv.h b/scst/scst/src/scst_priv.h
index 21061c7..ce5b5eb 100644
--- a/scst/scst/src/scst_priv.h
+++ b/scst/scst/src/scst_priv.h

@@ -22,9 +22,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
 #include <linux/export.h>
-#endif
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_driver.h>
@@ -74,20 +72,6 @@
 
 #endif
 
-/**
- ** Bits for scst_flags
- **/
-
-/*
- * Set if new commands initialization is being suspended for a while.
- * Used to let TM commands execute while preparing the suspend, since
- * RESET or ABORT could be necessary to free SCSI commands.
- */
-#define SCST_FLAG_SUSPENDING		     0
-
-/* Set if new commands initialization is suspended for a while */
-#define SCST_FLAG_SUSPENDED		     1
-
 extern spinlock_t scst_measure_latency_lock;
 extern atomic_t scst_measure_latency;
 void scst_update_latency_stats(struct scst_cmd *cmd, int new_state);
@@ -168,8 +152,8 @@
 
 extern unsigned int scst_max_dev_cmd_mem;
 
-extern int scst_forcibly_close_sessions;
-extern int scst_auto_cm_assignment;
+extern bool scst_forcibly_close_sessions;
+extern bool scst_auto_cm_assignment;
 
 extern mempool_t *scst_mgmt_mempool;
 extern mempool_t *scst_mgmt_stub_mempool;
@@ -184,7 +168,6 @@
 extern struct kmem_cache *scst_tgtd_cachep;
 extern struct kmem_cache *scst_acgd_cachep;
 
-extern unsigned long scst_flags;
 extern struct list_head scst_template_list;
 extern struct list_head scst_dev_list;
 extern struct list_head scst_dev_type_list;
@@ -199,10 +182,8 @@
 #define SCST_DEF_MAX_TASKLET_CMD 10
 extern int scst_max_tasklet_cmd;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 #define SCST_DEF_POLL_NS 0
 extern unsigned long scst_poll_ns;
-#endif
 
 extern spinlock_t scst_init_lock;
 extern struct list_head scst_init_cmd_list;
@@ -217,8 +198,9 @@
 extern struct list_head scst_delayed_mgmt_cmd_list;
 extern wait_queue_head_t scst_mgmt_cmd_list_waitQ;
 
+extern struct percpu_ref scst_cmd_count;
+extern struct percpu_ref scst_mcmd_count;
 struct scst_percpu_info {
-	atomic_t cpu_cmd_count;
 	spinlock_t tasklet_lock;
 	struct list_head tasklet_cmd_list;
 	struct tasklet_struct tasklet;
@@ -244,9 +226,6 @@
 static inline bool scst_set_io_context(struct scst_cmd *cmd,
 				       struct io_context **old)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
-	return false;
-#else
 #ifdef CONFIG_SCST_TEST_IO_IN_SIRQ
 	return false;
 #else
@@ -273,7 +252,6 @@
 
 	return res;
 #endif
-#endif
 }
 
 static inline void scst_reset_io_context(struct scst_tgt_dev *tgt_dev,
@@ -439,34 +417,10 @@
 					 uint32_t flags,
 					 int lvblen)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
-	return dlm_new_lockspace((char *)name, namelen, lockspace, flags,
-				 lvblen);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
-	return dlm_new_lockspace(name, namelen, lockspace, flags, lvblen);
-#else
 	return dlm_new_lockspace(name, NULL, flags, lvblen, NULL, NULL, NULL,
 				 lockspace);
-#endif
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
-static inline int scst_exec_req(struct scsi_device *sdev,
-	const unsigned char *cmd, int cmd_len, int data_direction,
-	struct scatterlist *sgl, unsigned int bufflen, unsigned int nents,
-	int timeout, int retries, void *privdata,
-	void (*done)(void *, char *, int, int), gfp_t gfp)
-{
-#if defined(CONFIG_SCST_STRICT_SERIALIZING)
-	return scsi_execute_async(sdev, cmd, cmd_len, data_direction, (void *)sgl,
-		    bufflen, nents, timeout, retries, privdata, done, gfp);
-#else
-	WARN_ON(1);
-	return -1;
-#endif
-}
-#endif
-
 int scst_alloc_space(struct scst_cmd *cmd);
 
 int scst_lib_init(void);
@@ -512,11 +466,7 @@
 			   struct scst_tg_tgt *tg_tgt);
 
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
 extern const struct sysfs_ops scst_sysfs_ops;
-#else
-extern struct sysfs_ops scst_sysfs_ops;
-#endif
 int scst_sysfs_init(void);
 void scst_sysfs_cleanup(void);
 int scst_tgtt_sysfs_create(struct scst_tgt_template *tgtt);
@@ -671,51 +621,64 @@
 int scst_get_suspend_count(void);
 
 /*
- * Increases global SCST ref counters which prevent from entering into suspended
- * activities stage, so protects from any global management operations.
+ * Increase the global command count if it has not been 'killed'. Use this
+ * function to protect regular commands.
  */
-static inline atomic_t *scst_get(void)
+static inline bool __must_check scst_get_cmd(struct scst_cmd *cmd)
 {
-	atomic_t *a;
-
-	/*
-	 * Avoid that a high I/O load prevents activity to be suspended. See
-	 * also http://sourceforge.net/p/scst/mailman/message/34074831/.
-	 */
-	if (unlikely(test_bit(SCST_FLAG_SUSPENDING, &scst_flags)))
-		mdelay(100);
-
-	/*
-	 * We don't mind if we because of preemption inc counter from another
-	 * CPU as soon in the majority cases we will the correct one.
-	 */
-	a = &scst_percpu_infos[raw_smp_processor_id()].cpu_cmd_count;
-	atomic_inc(a);
-	TRACE_DBG("Incrementing cpu_cmd_count %p (new value %d)",
-		a, atomic_read(a));
-	/* See comment about smp_mb() in scst_suspend_activity() */
-	smp_mb__after_atomic_inc();
-
-	return a;
+	if (!percpu_ref_tryget_live(&scst_cmd_count))
+		return false;
+	cmd->counted = true;
+	return true;
 }
 
 /*
- * Decreases global SCST ref counters which prevent from entering into suspended
- * activities stage, so protects from any global management operations. On
- * all them zero, if suspending activities is waiting, it will be proceed.
+ * Increase the global management command count if it is not zero. Use this
+ * function to protect management commands.
  */
-static inline void scst_put(atomic_t *a)
+static inline bool __must_check scst_get_mcmd(struct scst_mgmt_cmd *mcmd)
 {
-	int f;
+	if (!percpu_ref_tryget_live(&scst_mcmd_count))
+		return false;
+	mcmd->counted = true;
+	return true;
+}
 
-	f = atomic_dec_and_test(a);
-	/* See comment about smp_mb() in scst_suspend_activity() */
-	if (unlikely(test_bit(SCST_FLAG_SUSPENDED, &scst_flags)) && f) {
-		TRACE_MGMT_DBG("%s", "Waking up scst_dev_cmd_waitQ");
-		wake_up_all(&scst_dev_cmd_waitQ);
-	}
-	TRACE_DBG("Decrementing cpu_cmd_count %p (new value %d)",
-	      a, atomic_read(a));
+/*
+ * Increase the global command count. Use this function to protect internal
+ * commands.
+ */
+static inline void scst_get_icmd(struct scst_cmd *cmd)
+{
+	percpu_ref_get(&scst_cmd_count);
+	cmd->counted = true;
+}
+
+/* Decrease the global SCST refcount which prevents suspending activity. */
+static inline void scst_put_cmd(struct scst_cmd *cmd)
+{
+	WARN_ON_ONCE(!cmd->counted);
+	cmd->counted = false;
+	percpu_ref_put(&scst_cmd_count);
+}
+
+static inline void scst_put_mcmd(struct scst_mgmt_cmd *mcmd)
+{
+	WARN_ON_ONCE(!mcmd->counted);
+	mcmd->counted = false;
+	percpu_ref_put(&scst_mcmd_count);
+}
+
+/* Whether or not activities are being suspended or have been suspended. */
+static inline bool scst_activity_suspended(void)
+{
+	return percpu_ref_is_dying(&scst_cmd_count);
+}
+
+/* Returns true if and only if regular commands have already been suspended. */
+static inline bool scst_mcmd_suspended(void)
+{
+	return percpu_ref_is_dying(&scst_mcmd_count);
 }
 
 int scst_get_cmd_counter(void);
@@ -765,9 +728,6 @@
 int scst_pr_init(struct scst_device *dev);
 void scst_pr_cleanup(struct scst_device *dev);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
-void scst_vfs_unlink_and_put_nd(struct nameidata *nd);
-#endif
 void scst_vfs_unlink_and_put(struct path *path);
 
 int scst_copy_file(const char *src, const char *dest);
@@ -811,11 +771,7 @@
 enum scst_exec_res scst_cm_ext_copy_exec(struct scst_cmd *cmd);
 enum scst_exec_res scst_cm_rcv_copy_res_exec(struct scst_cmd *cmd);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-void sess_cm_list_id_cleanup_work_fn(void *p);
-#else
 void sess_cm_list_id_cleanup_work_fn(struct work_struct *work);
-#endif
 void scst_cm_free_pending_list_ids(struct scst_session *sess);
 
 bool scst_cm_check_block_all_devs(struct scst_cmd *cmd);

diff --git a/scst/scst/src/scst_sysfs.c b/scst/scst/src/scst_sysfs.c
index 0205fa7..8fed173 100644
--- a/scst/scst/src/scst_sysfs.c
+++ b/scst/scst/src/scst_sysfs.c

@@ -34,7 +34,6 @@
 #include "scst_pres.h"
 #include "scst_mem.h"
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 #ifdef CONFIG_LOCKDEP
 static struct lock_class_key scst_tgtt_key;
 static struct lockdep_map scst_tgtt_dep_map =
@@ -68,7 +67,6 @@
 static struct lockdep_map scst_tg_dep_map =
 	STATIC_LOCKDEP_MAP_INIT("scst_tg_kref", &scst_tg_key);
 #endif
-#endif
 
 static DECLARE_COMPLETION(scst_sysfs_root_release_completion);
 
@@ -317,37 +315,6 @@
 
 #endif /* defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) &&	\
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6 ||	\
-	 (RHEL_MAJOR -0 == 6 && RHEL_MINOR -0 < 6))
-/*
- ** Backported sysfs functions.
- **/
-
-static int sysfs_create_files(struct kobject *kobj,
-			      const struct attribute **ptr)
-{
-	int err = 0;
-	int i;
-
-	for (i = 0; ptr[i] && !err; i++)
-		err = sysfs_create_file(kobj, ptr[i]);
-	if (err)
-		while (--i >= 0)
-			sysfs_remove_file(kobj, ptr[i]);
-	return err;
-}
-
-static void sysfs_remove_files(struct kobject *kobj,
-			       const struct attribute **ptr)
-{
-	int i;
-
-	for (i = 0; ptr[i]; i++)
-		sysfs_remove_file(kobj, ptr[i]);
-}
-#endif
-
 /*
  ** Sysfs work
  **/
@@ -453,19 +420,15 @@
 
 		TRACE_DBG("Sysfs work %p", work);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 		if (work->dep_map) {
 			mutex_acquire(work->dep_map, 0, 0, _RET_IP_);
 			lock_acquired(work->dep_map, _RET_IP_);
 		}
-#endif
 
 		work->work_res = work->sysfs_work_fn(work);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 		if (work->dep_map)
 			mutex_release(work->dep_map, _RET_IP_);
-#endif
 
 		spin_lock(&sysfs_work_lock);
 		if (!work->read_only_action)
@@ -806,20 +769,12 @@
 		return -EIO;
 }
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34))
 const struct sysfs_ops scst_sysfs_ops = {
-#else
-struct sysfs_ops scst_sysfs_ops = {
-#endif
 	.show = scst_show,
 	.store = scst_store,
 };
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34))
 const struct sysfs_ops *scst_sysfs_get_sysfs_ops(void)
-#else
-struct sysfs_ops *scst_sysfs_get_sysfs_ops(void)
-#endif
 {
 	return &scst_sysfs_ops;
 }
@@ -1184,7 +1139,7 @@
 
 void scst_kobject_put_and_wait(struct kobject *kobj, const char *category,
 			       struct completion *c
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) && defined(CONFIG_LOCKDEP)
+#if defined(CONFIG_LOCKDEP)
 			       , struct lockdep_map *dep_map
 #endif
 			       )
@@ -1197,9 +1152,7 @@
 
 	kobject_put(kobj);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 	mutex_acquire(dep_map, 0, 0, _RET_IP_);
-#endif
 
 	if (wait_for_completion_timeout(c, HZ) > 0)
 		goto out_free;
@@ -1211,10 +1164,8 @@
 		   category, name ? : "(?)");
 
 out_free:
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 	lock_acquired(dep_map, _RET_IP_);
 	mutex_release(dep_map, _RET_IP_);
-#endif
 
 	kfree(name);
 
@@ -1571,7 +1522,7 @@
 		"\n"
 		"where parameters are one or more "
 		"param_name=value pairs separated by ';'\n"
-		"\nThe following parameters available: read_only.\n";
+		"\nThe following parameters available: read_only\n";
 
 	return sprintf(buf, "%s", help);
 }
@@ -1960,10 +1911,7 @@
 {
 	int res;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	res = cpumask_scnprintf(buf, SCST_SYSFS_BLOCK_SIZE,
-		acg->acg_cpu_mask);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
 	res = cpumask_scnprintf(buf, SCST_SYSFS_BLOCK_SIZE,
 		&acg->acg_cpu_mask);
 #else
@@ -2944,11 +2892,18 @@
 	&scst_tgt_none_cmd_count_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(scst_tgt);
+#endif
 
 static struct kobj_type tgt_ktype = {
 	.sysfs_ops	= &scst_sysfs_ops,
 	.release	= scst_tgt_release,
-	.default_attrs	= scst_tgt_attrs,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = scst_tgt_groups,
+#else
+	.default_attrs  = scst_tgt_attrs,
+#endif
 };
 
 /*
@@ -3778,6 +3733,9 @@
 	&dev_block_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(scst_dev);
+#endif
 
 static void scst_sysfs_dev_release(struct kobject *kobj)
 {
@@ -3905,7 +3863,11 @@
 static struct kobj_type scst_dev_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_sysfs_dev_release,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = scst_dev_groups,
+#else
 	.default_attrs = scst_dev_attrs,
+#endif
 };
 
 /*
@@ -4270,6 +4232,9 @@
 	&tgt_dev_active_commands_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(scst_tgt_dev);
+#endif
 
 static void scst_sysfs_tgt_dev_release(struct kobject *kobj)
 {
@@ -4288,7 +4253,11 @@
 static struct kobj_type scst_tgt_dev_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_sysfs_tgt_dev_release,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = scst_tgt_dev_groups,
+#else
 	.default_attrs = scst_tgt_dev_attrs,
+#endif
 };
 
 int scst_tgt_dev_sysfs_create(struct scst_tgt_dev *tgt_dev)
@@ -4380,7 +4349,7 @@
 #else
 	uint64_t sum = 0, sumsq = 0;
 #endif
-	unsigned count = 0, numst = 0;
+	unsigned int count = 0, numst = 0;
 	u64 d_min_div_10, d_max_div_10, avg_div_10, stddev_div_10;
 	u32 d_min_mod_10, d_max_mod_10, avg_mod_10, stddev_mod_10;
 	char state_name[32];
@@ -4841,6 +4810,9 @@
 	&session_none_cmd_count_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(scst_session);
+#endif
 
 static void scst_sysfs_session_release(struct kobject *kobj)
 {
@@ -4859,7 +4831,11 @@
 static struct kobj_type scst_session_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_sysfs_session_release,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = scst_session_groups,
+#else
 	.default_attrs = scst_session_attrs,
+#endif
 };
 
 #define SCST_LAT_ATTRS(size)		\
@@ -5095,15 +5071,22 @@
 static struct kobj_attribute lun_options_attr =
 	__ATTR(read_only, S_IRUGO, scst_lun_rd_only_show, NULL);
 
-static struct attribute *lun_attrs[] = {
+static struct attribute *acg_dev_attrs[] = {
 	&lun_options_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(acg_dev);
+#endif
 
 static struct kobj_type acg_dev_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_acg_dev_release,
-	.default_attrs = lun_attrs,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = acg_dev_groups,
+#else
+	.default_attrs = acg_dev_attrs,
+#endif
 };
 
 /*
@@ -5663,11 +5646,9 @@
 	int res = 0;
 	struct scst_acg *acg = acn->acg;
 	struct kobj_attribute *attr = NULL;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	static struct lock_class_key __key;
 #endif
-#endif
 
 	TRACE_ENTRY();
 
@@ -5689,14 +5670,9 @@
 		goto out_free;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-	attr->attr.owner = THIS_MODULE;
-#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	attr->attr.key = &__key;
 #endif
-#endif
 
 	attr->attr.mode = S_IRUGO;
 	attr->show = scst_acn_file_show;
@@ -5819,15 +5795,22 @@
 static struct kobj_attribute scst_devt_type_attr =
 	__ATTR(type, S_IRUGO, scst_devt_type_show, NULL);
 
-static struct attribute *scst_devt_default_attrs[] = {
+static struct attribute *scst_devt_def_attrs[] = {
 	&scst_devt_type_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(scst_devt_def);
+#endif
 
 static struct kobj_type scst_devt_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_devt_release,
-	.default_attrs = scst_devt_default_attrs,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = scst_devt_def_groups,
+#else
+	.default_attrs = scst_devt_def_attrs,
+#endif
 };
 
 static char *scst_dev_params(struct scst_dev_type *devt)
@@ -7261,8 +7244,6 @@
 	__ATTR(max_tasklet_cmd, S_IRUGO | S_IWUSR, scst_max_tasklet_cmd_show,
 	       scst_max_tasklet_cmd_store);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
-
 static ssize_t scst_poll_us_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
 {
@@ -7310,8 +7291,6 @@
 	__ATTR(poll_us, S_IRUGO | S_IWUSR, scst_poll_us_show,
 	       scst_poll_us_store);
 
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
-
 static ssize_t scst_suspend_show(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
 {
@@ -7614,14 +7593,12 @@
 	__ATTR(last_sysfs_mgmt_res, S_IRUGO,
 		scst_last_sysfs_mgmt_res_show, NULL);
 
-static struct attribute *scst_sysfs_root_default_attrs[] = {
+static struct attribute *scst_sysfs_root_def_attrs[] = {
 	&scst_measure_latency_attr.attr,
 	&scst_threads_attr.attr,
 	&scst_setup_id_attr.attr,
 	&scst_max_tasklet_cmd_attr.attr,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 	&scst_poll_us_attr.attr,
-#endif
 	&scst_suspend_attr.attr,
 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
 	&scst_main_trace_level_attr.attr,
@@ -7633,6 +7610,9 @@
 	&scst_last_sysfs_mgmt_res_attr.attr,
 	NULL,
 };
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+ATTRIBUTE_GROUPS(scst_sysfs_root_def);
+#endif
 
 static void scst_sysfs_root_release(struct kobject *kobj)
 {
@@ -7642,7 +7622,11 @@
 static struct kobj_type scst_sysfs_root_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_sysfs_root_release,
-	.default_attrs = scst_sysfs_root_default_attrs,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+	.default_groups = scst_sysfs_root_def_groups,
+#else
+	.default_attrs = scst_sysfs_root_def_attrs,
+#endif
 };
 
 /*

diff --git a/scst/scst/src/scst_targ.c b/scst/scst/src/scst_targ.c
index c9c3255..d57a706 100644
--- a/scst/scst/src/scst_targ.c
+++ b/scst/scst/src/scst_targ.c

@@ -59,7 +59,16 @@
 
 	i = &scst_percpu_infos[smp_processor_id()];
 
-	if (atomic_read(&i->cpu_cmd_count) <= scst_max_tasklet_cmd) {
+	/*
+	 * Commands are removed from the list they are on before being
+	 * processed. If both lists are empty that means that at most two
+	 * commands are being processed and hence that processing a
+	 * command in tasklet context is possible without making a CPU core
+	 * spend all its time in interrupt and tasklet context and thereby
+	 * starving threads scheduled on the same CPU core.
+	 */
+	if (list_empty_careful(&i->tasklet_cmd_list) &&
+	    list_empty_careful(&cmd->cmd_threads->active_cmd_list)) {
 		spin_lock_irqsave(&i->tasklet_lock, flags);
 		TRACE_DBG("Adding cmd %p to tasklet %d cmd list", cmd,
 			smp_processor_id());
@@ -69,8 +78,8 @@
 		tasklet_schedule(&i->tasklet);
 	} else {
 		spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags);
-		TRACE_DBG("Too many tasklet commands (%d), adding cmd %p to "
-			"active cmd list", atomic_read(&i->cpu_cmd_count), cmd);
+		TRACE_DBG("Too many tasklet commands, adding cmd %p to active cmd list",
+			  cmd);
 		list_add_tail(&cmd->cmd_list_entry,
 			&cmd->cmd_threads->active_cmd_list);
 		wake_up(&cmd->cmd_threads->cmd_list_waitQ);
@@ -565,7 +574,7 @@
 {
 	TRACE_ENTRY();
 
-	WARN_ON_ONCE(cmd->cpu_cmd_counter);
+	WARN_ON_ONCE(cmd->counted);
 
 	cmd->sess = sess;
 	scst_sess_get(sess);
@@ -2613,14 +2622,7 @@
 		  scsi_dev->host->host_no, scsi_dev->channel, scsi_dev->id,
 		  (u64)scsi_dev->lun);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
-	rc = scst_exec_req(scsi_dev, cmd->cdb, cmd->cdb_len,
-			cmd->data_direction, cmd->sg, cmd->bufflen,
-			cmd->sg_cnt, cmd->timeout, cmd->retries, cmd,
-			scst_pass_through_cmd_done, cmd->cmd_gfp_mask);
-#else
 	rc = scst_scsi_exec_async(cmd, cmd, scst_pass_through_cmd_done);
-#endif
 	if (unlikely(rc != 0)) {
 		PRINT_ERROR("scst pass-through exec failed: %d", rc);
 		/* "Sectors" are hardcoded as 512 bytes in the kernel */
@@ -2671,11 +2673,6 @@
 	res = scst_do_real_exec(cmd);
 	if (likely(res == SCST_EXEC_COMPLETED)) {
 		scst_post_exec_sn(cmd, true);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
-		if (cmd->dev->scsi_dev != NULL)
-			generic_unplug_device(
-				cmd->dev->scsi_dev->request_queue);
-#endif
 	} else
 		sBUG();
 
@@ -2924,11 +2921,6 @@
 
 	*active_cmd = cmd;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
-	if (ref_cmd->dev->scsi_dev != NULL)
-		generic_unplug_device(ref_cmd->dev->scsi_dev->request_queue);
-#endif
-
 	__scst_cmd_put(ref_cmd);
 	/* !! At this point sess, dev and tgt_dev can be already freed !! */
 
@@ -3672,11 +3664,11 @@
 		/*
 		 * Those counters protect from not getting too long processing
 		 * latency, so we should decrement them after cmd completed.
+		 *
+		 * @cmd processing for SCST device is complete.
 		 */
-		smp_mb__before_atomic_dec();
 		WARN_ON_ONCE(!cmd->owns_refcnt);
 		cmd->owns_refcnt = false;
-		atomic_dec(&cmd->tgt_dev->tgt_dev_cmd_count);
 		percpu_ref_put(&cmd->dev->refcnt);
 #ifdef CONFIG_SCST_PER_DEVICE_CMD_COUNT_LIMIT
 		atomic_dec(&cmd->dev->dev_cmd_count);
@@ -3883,6 +3875,22 @@
 		}
 	}
 
+	if (likely(cmd->tgt_dev != NULL)) {
+		/*
+		 * We must decrement @tgt_dev->tgt_dev_cmd_count
+		 * after scst_tgt_cmd_done() was called. Otherwise,
+		 * this may lead to a race condition between
+		 * scst_acg_repl_lun() and scst_tgt_cmd_done()'s
+		 * cmd processing.
+		 *
+		 * See also https://github.com/SCST-project/scst/pull/27
+		 *
+		 * @cmd processing for target device is complete.
+		 */
+		smp_mb__before_atomic_dec();
+		atomic_dec(&cmd->tgt_dev->tgt_dev_cmd_count);
+	}
+
 	atomic_dec(&sess->sess_cmd_count);
 
 	spin_lock_irq(&sess->sess_list_lock);
@@ -4128,14 +4136,12 @@
 	struct list_head *head;
 	struct scst_tgt_dev *tgt_dev;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 #if defined(CONFIG_SCST_EXTRACHECKS) && defined(CONFIG_PREEMPT_RCU) && \
 	defined(CONFIG_DEBUG_LOCK_ALLOC)
 	WARN_ON_ONCE(debug_locks &&
 		     !lockdep_is_held(&sess->tgt_dev_list_mutex) &&
 		     rcu_preempt_depth() == 0);
 #endif
-#endif
 
 	head = &sess->sess_tgt_dev_list[SESS_TGT_DEV_LIST_HASH_FN(lun)];
 	list_for_each_entry_rcu(tgt_dev, head, sess_tgt_dev_list_entry) {
@@ -4150,7 +4156,7 @@
  * scst_translate_lun() - Translate @cmd->lun into a tgt_dev pointer.
  * @cmd: SCSI command for which to translate the LUN number.
  *
- * Initialize the following @cmd members: cpu_cmd_counter, cmd_threads,
+ * Initialize the following @cmd members: counted, cmd_threads,
  * tgt_dev, cur_order_data, dev and devt.
  *
  * The caller must not hold any locks. May be called from IRQ context. The data
@@ -4168,9 +4174,7 @@
 
 	TRACE_ENTRY();
 
-	cmd->cpu_cmd_counter = scst_get();
-
-	if (likely(!test_bit(SCST_FLAG_SUSPENDED, &scst_flags))) {
+	if (likely(scst_get_cmd(cmd))) {
 		TRACE_DBG("Finding tgt_dev for cmd %p (lun %lld)", cmd,
 			(unsigned long long)cmd->lun);
 		res = -1;
@@ -4213,12 +4217,9 @@
 					cmd->sess->initiator_name, cmd->tgt->tgt_name);
 				scst_event_queue_lun_not_found(cmd);
 			}
-			scst_put(cmd->cpu_cmd_counter);
-			cmd->cpu_cmd_counter = NULL;
+			scst_put_cmd(cmd);
 		}
 	} else {
-		scst_put(cmd->cpu_cmd_counter);
-		cmd->cpu_cmd_counter = NULL;
 		TRACE_MGMT_DBG("%s", "FLAG SUSPENDED set, skipping");
 		res = 1;
 	}
@@ -4333,7 +4334,7 @@
 
 		scst_set_cmd_state(cmd, SCST_CMD_STATE_PARSE);
 
-		cnt = atomic_read(&tgt_dev->tgt_dev_cmd_count) - 1;
+		cnt = atomic_read(&tgt_dev->tgt_dev_cmd_count);
 		if (unlikely(cnt > dev->max_tgt_dev_commands)) {
 			TRACE(TRACE_FLOW_CONTROL,
 				"Too many pending commands (%d) in "
@@ -4344,8 +4345,7 @@
 		}
 
 #ifdef CONFIG_SCST_PER_DEVICE_CMD_COUNT_LIMIT
-		atomic_inc(&dev->dev_cmd_count);
-		cnt = atomic_read(&dev->dev_cmd_count);
+		cnt = atomic_inc_return(&dev->dev_cmd_count);
 		if (unlikely(cnt > SCST_MAX_DEV_COMMANDS)) {
 			if (!failure) {
 				TRACE(TRACE_FLOW_CONTROL,
@@ -4427,7 +4427,8 @@
 	 * There is no need for read barrier here, because we don't care where
 	 * this check will be done.
 	 */
-	susp = test_bit(SCST_FLAG_SUSPENDED, &scst_flags);
+	susp = scst_activity_suspended();
+
 	if (scst_init_poll_cnt > 0)
 		scst_init_poll_cnt--;
 
@@ -4488,13 +4489,13 @@
 	return;
 }
 
-static inline int test_init_cmd_list(void)
+/* Whether or not scst_init_thread() should stop waiting. */
+static inline bool test_init_cmd_list(void)
 {
-	int res = (!list_empty(&scst_init_cmd_list) &&
-		   !test_bit(SCST_FLAG_SUSPENDED, &scst_flags)) ||
-		  unlikely(kthread_should_stop()) ||
-		  (scst_init_poll_cnt > 0);
-	return res;
+	return (!list_empty(&scst_init_cmd_list) &&
+		!scst_activity_suspended()) ||
+		unlikely(kthread_should_stop()) ||
+		(scst_init_poll_cnt > 0);
 }
 
 int scst_init_thread(void *arg)
@@ -4542,7 +4543,6 @@
  */
 static void scst_ioctx_get(struct scst_cmd_threads *p_cmd_threads)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
 	mutex_lock(&p_cmd_threads->io_context_mutex);
 
 	WARN_ON(current->io_context);
@@ -4564,31 +4564,16 @@
 			 */
 			put_io_context(p_cmd_threads->io_context);
 		} else {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) && (LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 0)))
-#warning IO context sharing functionality disabled on 3.5 kernels due to bug in them. \
-See "http://lkml.org/lkml/2012/7/17/515" for more details.
-			static int q;
-
-			if (q == 0) {
-				q++;
-				PRINT_WARNING("IO context sharing functionality "
-					"disabled on 3.5 kernels due to bug in "
-					"them. See http://lkml.org/lkml/2012/7/17/515 "
-					"for more details.");
-			}
-#else
 			ioc_task_link(p_cmd_threads->io_context);
 			current->io_context = p_cmd_threads->io_context;
 			TRACE_DBG("Linked IO context %p "
 				"(p_cmd_threads %p)", p_cmd_threads->io_context,
 				p_cmd_threads);
-#endif
 		}
 		p_cmd_threads->io_context_refcnt++;
 	}
 
 	mutex_unlock(&p_cmd_threads->io_context_mutex);
-#endif
 
 	smp_wmb();
 	p_cmd_threads->io_context_ready = true;
@@ -4600,14 +4585,12 @@
  */
 static void scst_ioctx_put(struct scst_cmd_threads *p_cmd_threads)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
 	if (p_cmd_threads != &scst_main_cmd_threads) {
 		mutex_lock(&p_cmd_threads->io_context_mutex);
 		if (--p_cmd_threads->io_context_refcnt == 0)
 			p_cmd_threads->io_context = NULL;
 		mutex_unlock(&p_cmd_threads->io_context_mutex);
 	}
-#endif
 	return;
 }
 
@@ -4953,10 +4936,9 @@
 
 				if (++thr_cnt == 2)
 					break;
-				else {
-					spin_lock_irq(&thr->thr_cmd_list_lock);
-					thr_locked = true;
-				}
+
+				spin_lock_irq(&thr->thr_cmd_list_lock);
+				thr_locked = true;
 			}
 		} while (someth_done);
 
@@ -4967,7 +4949,6 @@
 			thr_locked = false;
 		}
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 		if (scst_poll_ns > 0) {
 			ktime_t end, kt;
 
@@ -4985,7 +4966,6 @@
 				kt = ktime_get();
 			} while (ktime_before(kt, end));
 		}
-#endif
 		spin_lock_irq(&p_cmd_threads->cmd_list_lock);
 		spin_lock(&thr->thr_cmd_list_lock);
 	}
@@ -5015,9 +4995,8 @@
 }
 
 /*
- * Returns 0 on success, or > 0 if SCST_FLAG_SUSPENDED set and
- * SCST_FLAG_SUSPENDING - not. No locks, protection is done by the
- * suspended activity.
+ * Returns 0 on success, or > 0 upon failure. No locks, protection is done by
+ * suspending activity.
  */
 static int scst_get_mgmt(struct scst_mgmt_cmd *mcmd)
 {
@@ -5025,12 +5004,7 @@
 
 	TRACE_ENTRY();
 
-	mcmd->cpu_cmd_counter = scst_get();
-
-	if (unlikely(test_bit(SCST_FLAG_SUSPENDED, &scst_flags) &&
-		     !test_bit(SCST_FLAG_SUSPENDING, &scst_flags))) {
-		scst_put(mcmd->cpu_cmd_counter);
-		mcmd->cpu_cmd_counter = NULL;
+	if (unlikely(!scst_get_mcmd(mcmd))) {
 		TRACE_MGMT_DBG("%s", "FLAG SUSPENDED set, skipping");
 		res = 1;
 		goto out;
@@ -5042,9 +5016,8 @@
 }
 
 /*
- * Returns 0 on success, < 0 if there is no device handler or
- * > 0 if SCST_FLAG_SUSPENDED set and SCST_FLAG_SUSPENDING - not.
- * No locks, protection is done by the suspended activity.
+ * Returns 0 on success, < 0 if there is no device handler or > 0 if activity
+ * has been suspended. No locks, protection is done by the suspended activity.
  */
 static int scst_mgmt_translate_lun(struct scst_mgmt_cmd *mcmd)
 {
@@ -5069,8 +5042,7 @@
 		mcmd->mcmd_tgt_dev = tgt_dev;
 		res = 0;
 	} else {
-		scst_put(mcmd->cpu_cmd_counter);
-		mcmd->cpu_cmd_counter = NULL;
+		scst_put_mcmd(mcmd);
 		res = -1;
 	}
 
@@ -5504,7 +5476,7 @@
 /* No locks. Returns 0, if mcmd should be processed further. */
 static int scst_set_mcmd_next_state(struct scst_mgmt_cmd *mcmd)
 {
-	int res;
+	int res = 0;
 
 	spin_lock_irq(&scst_mcmd_lock);
 
@@ -5513,7 +5485,6 @@
 	case SCST_MCMD_STATE_EXEC:
 		if (mcmd->cmd_done_wait_count == 0) {
 			mcmd->state = SCST_MCMD_STATE_AFFECTED_CMDS_DONE;
-			res = 0;
 		} else {
 			TRACE(TRACE_SCSI|TRACE_MGMT_DEBUG,
 				"cmd_done_wait_count(%d) not 0, "
@@ -5526,7 +5497,6 @@
 	case SCST_MCMD_STATE_AFFECTED_CMDS_DONE:
 		if (mcmd->cmd_finish_wait_count == 0) {
 			mcmd->state = SCST_MCMD_STATE_DONE;
-			res = 0;
 		} else {
 			TRACE(TRACE_SCSI|TRACE_MGMT_DEBUG,
 				"cmd_finish_wait_count(%d) not 0, "
@@ -5539,7 +5509,6 @@
 
 	case SCST_MCMD_STATE_DONE:
 		mcmd->state = SCST_MCMD_STATE_FINISHED;
-		res = 0;
 		break;
 
 	default:
@@ -5902,10 +5871,14 @@
 			res = scst_set_mcmd_next_state(mcmd);
 			goto out;
 		}
-		__scst_cmd_get(cmd);
 		tgt_dev = cmd->tgt_dev;
-		if (tgt_dev != NULL)
-			mcmd->cpu_cmd_counter = scst_get();
+		if (tgt_dev && !scst_get_mcmd(mcmd)) {
+			TRACE_MGMT_DBG("Suspended; skipping mcmd");
+			spin_unlock_irq(&sess->sess_list_lock);
+			res = 1;
+			goto ret;
+		}
+		__scst_cmd_get(cmd);
 		spin_unlock_irq(&sess->sess_list_lock);
 		TRACE_DBG("Cmd to abort %p for tag %llu found (tgt_dev %p)",
 			cmd, (unsigned long long)mcmd->tag, tgt_dev);
@@ -5963,6 +5936,7 @@
 out:
 	scst_event_queue_tm_fn_received(mcmd);
 
+ret:
 	TRACE_EXIT_RES(res);
 	return res;
 }
@@ -5977,10 +5951,8 @@
 	int arg = SG_SCSI_RESET_TARGET;
 
 	return scsi_ioctl_reset(sdev, (__force __user int *)&arg);
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 26)
-	return scsi_reset_provider(sdev, SCSI_TRY_RESET_TARGET);
 #else
-	return scsi_reset_provider(sdev, SCSI_TRY_RESET_BUS);
+	return scsi_reset_provider(sdev, SCSI_TRY_RESET_TARGET);
 #endif
 }
 
@@ -6522,21 +6494,21 @@
 
 	case SCST_CLEAR_ACA:
 		res = scst_clear_aca_mcmd(mcmd);
-		goto out_done;
+		if (unlikely(res))
+			break;
+
+		res = scst_set_mcmd_next_state(mcmd);
+		break;
 
 	default:
 		PRINT_ERROR("Unknown task management function %d", mcmd->fn);
 		scst_mgmt_cmd_set_status(mcmd, SCST_MGMT_STATUS_REJECTED);
-		goto out_done;
+		res = scst_set_mcmd_next_state(mcmd);
+		break;
 	}
 
-out:
 	TRACE_EXIT_RES(res);
 	return res;
-
-out_done:
-	res = scst_set_mcmd_next_state(mcmd);
-	goto out;
 }
 
 static void scst_call_task_mgmt_affected_cmds_done(struct scst_mgmt_cmd *mcmd)
@@ -6842,9 +6814,7 @@
 			rc = scst_process_mgmt_cmd(mcmd);
 			spin_lock_irq(&scst_mcmd_lock);
 			if (rc > 0) {
-				if (test_bit(SCST_FLAG_SUSPENDED, &scst_flags) &&
-				    !test_bit(SCST_FLAG_SUSPENDING,
-						&scst_flags)) {
+				if (scst_mcmd_suspended()) {
 					TRACE_MGMT_DBG("Adding mgmt cmd %p to "
 						"head of delayed mgmt cmd list",
 						mcmd);

diff --git a/scst/scst/src/scst_tg.c b/scst/scst/src/scst_tg.c
index 9a2ccc0..72feb67 100644
--- a/scst/scst/src/scst_tg.c
+++ b/scst/scst/src/scst_tg.c

@@ -50,15 +50,10 @@
 static DEFINE_MUTEX(scst_dg_mutex);
 static LIST_HEAD(scst_dev_group_list);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) || \
-	defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-static int alua_invariant_check;
-#else
 static bool alua_invariant_check;
-#endif
 module_param(alua_invariant_check, bool, 0644);
 MODULE_PARM_DESC(alua_invariant_check,
-		 "Enables a run-time ALUA state invariant check.");
+		 "Enables a run-time ALUA state invariant check. (default: false)");
 
 /* Global SCST ALUA lock/unlock functions (scst_dg_mutex) */
 void scst_alua_lock(void)
@@ -181,19 +176,29 @@
 	return NULL;
 }
 
+static bool __scst_tg_have_tgt(struct scst_target_group *tg,
+			       const struct scst_tgt *tgt)
+{
+	struct scst_tg_tgt *tg_tgt;
+
+	list_for_each_entry(tg_tgt, &tg->tgt_list, entry)
+		if (tg_tgt->tgt == tgt)
+			return true;
+
+	return false;
+}
+
 /* Look up a target group by target port. */
 static struct scst_target_group *__lookup_tg_by_tgt(struct scst_dev_group *dg,
 						    const struct scst_tgt *tgt)
 {
 	struct scst_target_group *tg;
-	struct scst_tg_tgt *tg_tgt;
 
 	lockdep_assert_held(&scst_dg_mutex);
 
 	list_for_each_entry(tg, &dg->tg_list, entry)
-		list_for_each_entry(tg_tgt, &tg->tgt_list, entry)
-			if (tg_tgt->tgt == tgt)
-				return tg;
+		if (__scst_tg_have_tgt(tg, tgt))
+			return tg;
 
 	return NULL;
 }
@@ -404,24 +409,14 @@
 
 struct scst_alua_retry {
 	struct scst_cmd *alua_retry_cmd;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct work_struct alua_retry_work;
-#else
 	struct delayed_work alua_retry_work;
-#endif
 };
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_alua_transitioning_work_fn(void *p)
-{
-	struct scst_alua_retry *retry = p;
-#else
 static void scst_alua_transitioning_work_fn(struct work_struct *work)
 {
 	struct scst_alua_retry *retry =
 		container_of(work, struct scst_alua_retry,
 			     alua_retry_work.work);
-#endif
 	struct scst_cmd *cmd = retry->alua_retry_cmd;
 
 	TRACE_ENTRY();
@@ -493,13 +488,8 @@
 
 		/* No get is needed, because cmd is sync here */
 		retry->alua_retry_cmd = cmd;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-		INIT_WORK(&retry->alua_retry_work,
-			  scst_alua_transitioning_work_fn, retry);
-#else
 		INIT_DELAYED_WORK(&retry->alua_retry_work,
 				  scst_alua_transitioning_work_fn);
-#endif
 		cmd->already_transitioning = 1;
 		schedule_delayed_work(&retry->alua_retry_work, HZ/2);
 		res = SCST_ALUA_CHECK_DELAYED;
@@ -679,12 +669,7 @@
 	if (!tg_tgt)
 		goto out;
 	tg_tgt->tg = tg;
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 24)
 	kobject_init(&tg_tgt->kobj, &scst_tg_tgt_ktype);
-#else
-	kobject_init(&tg_tgt->kobj);
-	tg_tgt->kobj.ktype = &scst_tg_tgt_ktype;
-#endif
 	tg_tgt->name = kstrdup(name, GFP_KERNEL);
 	if (!tg_tgt->name)
 		goto out_put;
@@ -809,12 +794,7 @@
 	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
 	if (!tg)
 		goto out;
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 24)
 	kobject_init(&tg->kobj, &scst_tg_ktype);
-#else
-	kobject_init(&tg->kobj);
-	tg->kobj.ktype = &scst_tg_ktype;
-#endif
 	tg->name = kstrdup(name, GFP_KERNEL);
 	if (!tg->name)
 		goto out_put;
@@ -1009,9 +989,9 @@
 	struct scst_dg_dev *dg_dev;
 	struct scst_device *dev;
 	struct scst_tgt_dev *tgt_dev;
-	struct scst_tg_tgt *tg_tgt;
 	struct scst_tgt *tgt;
 	bool invoke_callbacks;
+	bool tg_is_remote;
 
 	sBUG_ON(state >= ARRAY_SIZE(scst_alua_filter));
 	lockdep_assert_held(&scst_dg_mutex);
@@ -1019,21 +999,53 @@
 	if (tg->state == state)
 		return;
 
+	/*
+	 * If the target group has a target with NULL target device,
+	 * that means that this target is remote one, so we shouldn't
+	 * call on_alua_state_change_*() callbacks then.
+	 *
+	 * See also 29548a4a ("scst: Remove the on_alua_state_change_*()
+	 * callback functions") and d333ce82 ("Restore the
+	 * on_alua_state_change_*() callback functions").
+	 */
+	tg_is_remote = __scst_tg_have_tgt(tg, NULL);
+
 	list_for_each_entry(dg_dev, &tg->dg->dev_list, entry) {
 		invoke_callbacks = true;
 		dev = dg_dev->dev;
+
 		list_for_each_entry(tgt_dev, &dev->dev_tgt_dev_list,
 				    dev_tgt_dev_list_entry) {
 			tgt = tgt_dev->sess->tgt;
-			list_for_each_entry(tg_tgt, &tg->tgt_list, entry) {
-				if (tg_tgt->tgt == tgt) {
-					__scst_tgt_set_state(tg, tgt_dev, state,
-							     invoke_callbacks);
-					invoke_callbacks = false;
-					break;
-				}
+
+			if (__scst_tg_have_tgt(tg, tgt)) {
+				__scst_tgt_set_state(tg, tgt_dev, state,
+						     invoke_callbacks);
+				invoke_callbacks = false;
 			}
 		}
+
+		/*
+		 * There are several cases when `invoke_callbacks` can
+		 * still be true here:
+		 * - The SCST device still doesn't have any target
+		 *   devices, or have only those that aren't included
+		 *   in the given target group (e.g. the default copy
+		 *   manager for a blockio devices).
+		 * - Target group has remote targets.
+		 *
+		 * We should call on_alua_state_chage_*() callbacks only
+		 * in the first case.
+		 *
+		 * See also https://github.com/SCST-project/scst/issues/55.
+		 */
+		if (invoke_callbacks && !tg_is_remote) {
+			if (dev->handler->on_alua_state_change_start)
+				dev->handler->on_alua_state_change_start(dev, tg->state, state);
+
+			if (dev->handler->on_alua_state_change_finish)
+				dev->handler->on_alua_state_change_finish(dev, tg->state, state);
+		}
 	}
 
 	tg->state = state;
@@ -1077,7 +1089,6 @@
 	struct scst_dg_dev *dg_dev;
 	struct scst_device *dev;
 	struct scst_tgt_dev *tgt_dev;
-	struct scst_tg_tgt *tg_tgt;
 	struct scst_tgt *tgt;
 
 	lockdep_assert_held(&scst_dg_mutex);
@@ -1087,13 +1098,9 @@
 		list_for_each_entry(tgt_dev, &dev->dev_tgt_dev_list,
 				    dev_tgt_dev_list_entry) {
 			tgt = tgt_dev->sess->tgt;
-			list_for_each_entry(tg_tgt, &tg->tgt_list, entry) {
-				if (tg_tgt->tgt == tgt) {
-					scst_gen_aen_or_ua(tgt_dev,
-			SCST_LOAD_SENSE(scst_sense_asym_access_state_changed));
-					break;
-				}
-			}
+			if (__scst_tg_have_tgt(tg, tgt))
+				scst_gen_aen_or_ua(tgt_dev,
+					SCST_LOAD_SENSE(scst_sense_asym_access_state_changed));
 		}
 	}
 }
@@ -1316,12 +1323,7 @@
 	dg = kzalloc(sizeof(*dg), GFP_KERNEL);
 	if (!dg)
 		goto out;
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 24)
 	kobject_init(&dg->kobj, &scst_dg_ktype);
-#else
-	kobject_init(&dg->kobj);
-	dg->kobj.ktype = &scst_dg_ktype;
-#endif
 	dg->name = kstrdup(name, GFP_KERNEL);
 	if (!dg->name)
 		goto out_put;

diff --git a/scst/scst_local/Makefile b/scst/scst_local/Makefile
index e00aae2..4e7626f 100644
--- a/scst/scst_local/Makefile
+++ b/scst/scst_local/Makefile

@@ -43,7 +43,6 @@
 	  $(shell [ -n "$(PASS_CC_TO_MAKE)" ] && echo CC="$(CC)")	\
 	  $$([ -n "$(DEPMOD)" ] && echo "DEPMOD=$(DEPMOD)")		\
 	  CONFIG_MODULE_SIG_ALL= modules_install
-	chmod u+x $(INSTALL_DIR)/*.ko
 
 uninstall:
 	rm -f $(INSTALL_DIR)/scst_local.ko

diff --git a/scst/scst_local/scst_local.c b/scst/scst_local/scst_local.c
index 79ca63a..e9a0390 100644
--- a/scst/scst_local/scst_local.c
+++ b/scst/scst_local/scst_local.c

@@ -46,10 +46,6 @@
 #include <scst_debug.h>
 #endif
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-#define SG_MAX_SINGLE_ALLOC	(PAGE_SIZE / sizeof(struct scatterlist))
-#endif
-
 #ifndef INSIDE_KERNEL_TREE
 #if defined(CONFIG_HIGHMEM4G) || defined(CONFIG_HIGHMEM64G)
 #warning HIGHMEM kernel configurations are not supported by this module, \
@@ -75,21 +71,7 @@
 static unsigned long scst_local_trace_flag = SCST_LOCAL_DEFAULT_LOG_FLAGS;
 #endif
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19))
-/*
- * Provide some local definitions that are not provided for some earlier
- * kernels so we operate over a wider range of kernels
- *
- * Some time before 2.6.24 scsi_sg_count, scsi_sglist and scsi_bufflen were
- * not available. Make it available for 2.6.18 which is used still on some
- * distros, like CentOS etc.
- */
-#define scsi_sg_count(cmd) ((cmd)->use_sg)
-#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer)
-#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
-#endif
-
-#define SCST_LOCAL_VERSION "3.5.0"
+#define SCST_LOCAL_VERSION "3.7.0"
 static const char *scst_local_version_date = "20110901";
 
 /* Some statistics */
@@ -97,12 +79,7 @@
 static atomic_t num_dev_resets = ATOMIC_INIT(0);
 static atomic_t num_target_resets = ATOMIC_INIT(0);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
-    || defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-static int scst_local_add_default_tgt = true;
-#else
 static bool scst_local_add_default_tgt = true;
-#endif
 module_param_named(add_default_tgt, scst_local_add_default_tgt, bool, S_IRUGO);
 MODULE_PARM_DESC(add_default_tgt, "add (default) or not on start default "
 	"target scst_local_tgt with default session scst_local_host");
@@ -163,31 +140,6 @@
 static void __scst_local_remove_target(struct scst_local_tgt *tgt);
 static void scst_local_remove_target(struct scst_local_tgt *tgt);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-
-/*
- * Maintains data that is needed during command processing ...
- * We have a single element scatterlist in here in case the scst_cmnd
- * we are given has a buffer, not a scatterlist, but we only need this for
- * kernels less than 2.6.25.
- */
-struct scst_local_tgt_specific {
-	struct scsi_cmnd *cmnd;
-	void (*done)(struct scsi_cmnd *);
-	struct scatterlist sgl;
-};
-
-/*
- * We use a pool of objects maintaind by the kernel so that it is less
- * likely to have to allocate them when we are in the data path.
- *
- * Note, we only need this for kernels in which we are likely to get non
- * scatterlist requests.
- */
-static struct kmem_cache *tgt_specific_pool;
-
-#endif /* (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)) */
-
 static atomic_t scst_local_sess_num = ATOMIC_INIT(0);
 
 static LIST_HEAD(scst_local_tgts_list);
@@ -483,6 +435,19 @@
  ** Session attributes
  **/
 
+static ssize_t host_no_show(struct kobject *kobj,
+	struct kobj_attribute *attr, char *buf)
+{
+	struct scst_session *scst_sess =
+		container_of(kobj, struct scst_session, sess_kobj);
+	struct scst_local_sess *sess = scst_sess_get_tgt_priv(scst_sess);
+	struct Scsi_Host *host = sess->shost;
+
+	return host ? snprintf(buf, PAGE_SIZE, "%u\n", host->host_no) : -EINVAL;
+}
+
+static struct kobj_attribute scst_local_host_no_attr = __ATTR_RO(host_no);
+
 static ssize_t scst_local_transport_id_show(struct kobject *kobj,
 	struct kobj_attribute *attr, char *buf)
 {
@@ -571,6 +536,7 @@
 		scst_local_transport_id_store);
 
 static const struct attribute *scst_local_sess_attrs[] = {
+	&scst_local_host_no_attr.attr,
 	&scst_local_transport_id_attr.attr,
 	NULL,
 };
@@ -749,7 +715,7 @@
 	sess = to_scst_lcl_sess(scsi_get_device(scmd->device->host));
 
 	ret = scst_rx_mgmt_fn_tag(sess->scst_sess, SCST_ABORT_TASK,
-				  blk_mq_unique_tag(scmd->request),
+				  blk_mq_unique_tag(scsi_cmd_to_rq(scmd)),
 				  false, &dev_reset_completion);
 
 	/* Now wait for the completion ... */
@@ -793,7 +759,6 @@
 	return ret;
 }
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 25))
 static int scst_local_target_reset(struct scsi_cmnd *scmd)
 {
 	struct scst_local_sess *sess;
@@ -822,7 +787,6 @@
 	TRACE_EXIT_RES(ret);
 	return ret;
 }
-#endif
 
 static void scst_local_copy_sense(struct scsi_cmnd *cmnd, struct scst_cmd *scst_cmnd)
 {
@@ -874,19 +838,9 @@
  * This does the heavy lifting ... we pass all the commands on to the
  * target driver and have it do its magic ...
  */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 static int scst_local_queuecommand(struct Scsi_Host *host,
 				   struct scsi_cmnd *scmd)
-#else
-static int scst_local_queuecommand_lck(struct scsi_cmnd *scmd,
-				       void (*done)(struct scsi_cmnd *))
-	__acquires(&h->host_lock)
-	__releases(&h->host_lock)
-#endif
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	struct scst_local_tgt_specific *tgt_specific = NULL;
-#endif
 	struct scst_local_sess *sess;
 	struct scatterlist *sgl = NULL;
 	int sgl_count = 0;
@@ -899,39 +853,16 @@
 	TRACE_DBG("lun %lld, cmd: 0x%02X", (u64)scmd->device->lun,
 		  scmd->cmnd[0]);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-	/*
-	 * We save a pointer to the done routine in scmd->scsi_done and
-	 * we save that as tgt specific stuff below.
-	 */
-	scmd->scsi_done = done;
-#endif
-
 	sess = to_scst_lcl_sess(scsi_get_device(scmd->device->host));
 
 	if (sess->unregistering) {
 		scmd->result = DID_BAD_TARGET << 16;
-		scmd->scsi_done(scmd);
+		scsi_done(scmd);
 		return 0;
 	}
 
 	scsi_set_resid(scmd, 0);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	/*
-	 * Allocate a tgt_specific_structure. We need this in case we need
-	 * to construct a single element SGL.
-	 */
-	tgt_specific = kmem_cache_alloc(tgt_specific_pool, GFP_ATOMIC);
-	if (!tgt_specific) {
-		PRINT_ERROR("Unable to create tgt_specific (size %zu)",
-			sizeof(*tgt_specific));
-		return SCSI_MLQUEUE_HOST_BUSY;
-	}
-	tgt_specific->cmnd = scmd;
-	tgt_specific->done = done;
-#endif
-
 	/*
 	 * Tell the target that we have a command ... but first we need
 	 * to get the LUN into a format that SCST understand
@@ -948,7 +879,7 @@
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
-	scst_cmd_set_tag(scst_cmd, blk_mq_unique_tag(scmd->request));
+	scst_cmd_set_tag(scst_cmd, blk_mq_unique_tag(scsi_cmd_to_rq(scmd)));
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
 	if (scmd->device->tagged_supported && scmd->device->simple_tags)
 		scst_cmd_set_queue_type(scst_cmd, SCST_CMD_QUEUE_SIMPLE);
@@ -972,43 +903,11 @@
 	}
 #endif
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	/*
-	 * If the command has a request, not a scatterlist, then convert it
-	 * to one. We use scsi_sg_count to isolate us from the changes from
-	 * version to version
-	 */
-	if (scsi_sg_count(scmd)) {
-		sgl = scsi_sglist(scmd);
-		sgl_count = scsi_sg_count(scmd);
-	} else {
-		/*
-		 * Build a one-element scatter list out of the buffer
-		 * We will not even get here if the kernel version we
-		 * are building on only supports scatterlists. See #if above.
-		 *
-		 * We use the sglist and bufflen function/macros to isolate
-		 * us from kernel version differences.
-		 */
-		if (scsi_sglist(scmd)) {
-			sg_init_one(&tgt_specific->sgl,
-				    scsi_sglist(scmd),
-				    scsi_bufflen(scmd));
-			sgl	  = &tgt_specific->sgl;
-			sgl_count = 1;
-		} else {
-			sgl = NULL;
-			sgl_count = 0;
-		}
-	}
-#else
 	sgl = scsi_sglist(scmd);
 	sgl_count = scsi_sg_count(scmd);
-#endif
 
 	if (scsi_bidi_cmnd(scmd)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 24) &&	\
-	LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && \
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && 		\
 	(!defined(RHEL_RELEASE_CODE) ||				\
 	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(8, 3))
 		/* Some of these symbols are only defined after 2.6.24 */
@@ -1037,11 +936,7 @@
 	}
 
 	/* Save the correct thing below depending on version */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	scst_cmd_set_tgt_priv(scst_cmd, tgt_specific);
-#else
 	scst_cmd_set_tgt_priv(scst_cmd, scmd);
-#endif
 
 	scst_cmd_init_done(scst_cmd, SCST_CONTEXT_THREAD);
 
@@ -1088,10 +983,7 @@
 	return scsi_change_queue_depth(sdev, depth);
 }
 
-#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) || \
-    defined(CONFIG_SUSE_KERNEL) || \
-    !(!defined(RHEL_RELEASE_CODE) || \
-     RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
+#else
 
 static int scst_local_change_queue_depth(struct scsi_device *sdev, int depth,
 	int reason)
@@ -1139,15 +1031,7 @@
 	return res;
 }
 
-#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) || defined(CONFIG_SUSE_KERNEL) || !(!defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1)) */
-
-static int scst_local_change_queue_depth(struct scsi_device *sdev, int qdepth)
-{
-	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
-	return sdev->queue_depth;
-}
-
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) || defined(CONFIG_SUSE_KERNEL) || !(!defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1)) */
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) */
 
 static int scst_local_slave_alloc(struct scsi_device *sdev)
 {
@@ -1242,18 +1126,10 @@
 	return;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_aen_work_fn(void *ctx)
-#else
 static void scst_aen_work_fn(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct scst_local_sess *sess = ctx;
-#else
 	struct scst_local_sess *sess =
 		container_of(work, struct scst_local_sess, aen_work);
-#endif
 
 	TRACE_ENTRY();
 
@@ -1330,18 +1206,10 @@
 	return 0;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-static void scst_remove_work_fn(void *ctx)
-#else
 static void scst_remove_work_fn(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	struct scst_local_sess *sess = ctx;
-#else
 	struct scst_local_sess *sess =
 		container_of(work, struct scst_local_sess, remove_work);
-#endif
 
 	scst_local_remove_adapter(sess);
 }
@@ -1382,9 +1250,6 @@
 
 static int scst_local_targ_xmit_response(struct scst_cmd *scst_cmd)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	struct scst_local_tgt_specific *tgt_specific;
-#endif
 	struct scsi_cmnd *scmd = NULL;
 	void (*done)(struct scsi_cmnd *);
 
@@ -1400,13 +1265,11 @@
 	    (scst_cmd_get_data_direction(scst_cmd) & SCST_DATA_READ))
 		scst_copy_sg(scst_cmd, SCST_SG_COPY_TO_TARGET);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	tgt_specific = scst_cmd_get_tgt_priv(scst_cmd);
-	scmd = tgt_specific->cmnd;
-	done = tgt_specific->done;
-#else
 	scmd = scst_cmd_get_tgt_priv(scst_cmd);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
 	done = scmd->scsi_done;
+#else
+	done = scsi_done;
 #endif
 
 	/*
@@ -1441,21 +1304,6 @@
 	return SCST_TGT_RES_SUCCESS;
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-static void scst_local_targ_on_free_cmd(struct scst_cmd *scst_cmd)
-{
-	struct scst_local_tgt_specific *tgt_specific;
-
-	TRACE_ENTRY();
-
-	tgt_specific = scst_cmd_get_tgt_priv(scst_cmd);
-	kmem_cache_free(tgt_specific_pool, tgt_specific);
-
-	TRACE_EXIT();
-	return;
-}
-#endif
-
 static void scst_local_targ_task_mgmt_done(struct scst_mgmt_cmd *mgmt_cmd)
 {
 	struct completion *compl;
@@ -1499,11 +1347,7 @@
 
 static struct scst_tgt_template scst_local_targ_tmpl = {
 	.name			= "scst_local",
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
-	.sg_tablesize		= SG_MAX_SINGLE_ALLOC,
-#else
 	.sg_tablesize		= 0xffff,
-#endif
 	.xmit_response_atomic	= 1,
 	.multithreaded_init_done = 1,
 	.enabled_attr_not_needed = 1,
@@ -1520,9 +1364,6 @@
 	.close_session		= scst_local_close_session,
 	.pre_exec		= scst_local_targ_pre_exec,
 	.xmit_response		= scst_local_targ_xmit_response,
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	.on_free_cmd		= scst_local_targ_on_free_cmd,
-#endif
 	.task_mgmt_fn_done	= scst_local_targ_task_mgmt_done,
 	.report_aen		= scst_local_report_aen,
 	.get_initiator_port_transport_id = scst_local_get_initiator_port_transport_id,
@@ -1536,27 +1377,17 @@
 
 static struct scsi_host_template scst_lcl_ini_driver_template = {
 	.name				= SCST_LOCAL_NAME,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
-	.queuecommand			= scst_local_queuecommand_lck,
-#else
 	.queuecommand			= scst_local_queuecommand,
-#endif
 	.change_queue_depth		= scst_local_change_queue_depth,
 	.slave_alloc			= scst_local_slave_alloc,
 	.slave_configure		= scst_local_slave_configure,
 	.eh_abort_handler		= scst_local_abort,
 	.eh_device_reset_handler	= scst_local_device_reset,
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 25))
 	.eh_target_reset_handler	= scst_local_target_reset,
-#endif
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && \
 	LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 	.use_blk_tags			= true,
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) || \
-    defined(CONFIG_SUSE_KERNEL) || \
-    !(!defined(RHEL_RELEASE_CODE) || \
-     RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(6, 1))
 	.can_queue			= 2048,
 	/*
 	 * Set it low for the "Drop back to untagged" case in
@@ -1564,10 +1395,6 @@
 	 * default in slave_configure()
 	 */
 	.cmd_per_lun			= 3,
-#else
-	.can_queue			= 256,
-	.cmd_per_lun			= 32,
-#endif
 	.this_id			= -1,
 	.sg_tablesize			= 0xFFFF,
 	.max_sectors			= 0xffff,
@@ -1620,11 +1447,7 @@
 	 * kernels. If we don't,  max_cmd_size gets set to 4 (and we get
 	 * a compiler warning) so a scan never occurs.
 	 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
-	hpnt->max_cmd_len = 16;
-#else
 	hpnt->max_cmd_len = 260;
-#endif
 
 	ret = scsi_add_host(hpnt, &sess->dev);
 	if (ret) {
@@ -1639,7 +1462,18 @@
 	return ret;
 }
 
-static int scst_local_driver_remove(struct device *dev)
+/*
+ * See also commit fc7a6209d571 ("bus: Make remove callback return void")
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) &&		\
+	(!defined(RHEL_RELEASE_CODE) ||				\
+	 RHEL_RELEASE_CODE -0 < RHEL_RELEASE_VERSION(9, 2))
+#define DRIVER_REMOVE_RET int
+#else
+#define DRIVER_REMOVE_RET void
+#endif
+
+static DRIVER_REMOVE_RET scst_local_driver_remove(struct device *dev)
 {
 	struct scst_local_sess *sess;
 	struct Scsi_Host *shost = NULL;
@@ -1656,7 +1490,7 @@
 	scsi_host_put(shost);
 
 	TRACE_EXIT();
-	return 0;
+	return (DRIVER_REMOVE_RET)0;
 }
 
 static int scst_local_bus_match(struct device *dev,
@@ -1680,26 +1514,7 @@
 	.bus	= &scst_local_lld_bus,
 };
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
-static void scst_local_root_release(struct device *dev)
-{
-	TRACE_ENTRY();
-
-	TRACE_EXIT();
-	return;
-}
-
-static struct device scst_local_root = {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
-	.bus_id		= "scst_local_root",
-#else
-	.init_name	= "scst_local_root",
-#endif
-	.release	= scst_local_root_release,
-};
-#else
 static struct device *scst_local_root;
-#endif
 
 static void scst_local_free_sess(struct scst_session *scst_sess)
 {
@@ -1731,15 +1546,7 @@
 	 * work.
 	 */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
-	/*
-	 * cancel_work_sync() was introduced in 2.6.22. We can only wait until
-	 * all scheduled work is done.
-	 */
-	flush_workqueue(aen_workqueue);
-#else
 	cancel_work_sync(&sess->aen_work);
-#endif
 
 	spin_lock(&sess->aen_lock);
 	WARN_ON_ONCE(!sess->unregistering);
@@ -1776,13 +1583,8 @@
 	/*
 	 * Init this stuff we need for scheduling AEN work
 	 */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20))
-	INIT_WORK(&sess->aen_work, scst_aen_work_fn, sess);
-	INIT_WORK(&sess->remove_work, scst_remove_work_fn, sess);
-#else
 	INIT_WORK(&sess->aen_work, scst_aen_work_fn);
 	INIT_WORK(&sess->remove_work, scst_remove_work_fn);
-#endif
 	spin_lock_init(&sess->aen_lock);
 	INIT_LIST_HEAD(&sess->aen_work_list);
 
@@ -1795,17 +1597,9 @@
 	}
 
 	sess->dev.bus     = &scst_local_lld_bus;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
-	sess->dev.parent  = &scst_local_root;
-#else
 	sess->dev.parent = scst_local_root;
-#endif
 	sess->dev.release = &scst_local_release_adapter;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
-	snprintf(sess->dev.bus_id, sizeof(sess->dev.bus_id), initiator_name);
-#else
 	sess->dev.init_name = kobject_name(&sess->scst_sess->sess_kobj);
-#endif
 
 	res = device_register(&sess->dev);
 	if (res != 0)
@@ -1959,44 +1753,11 @@
 #endif
 #endif
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	/*
-	 * Allocate a pool of structures for tgt_specific structures.
-	 * We only need this if we could get non scatterlist requests
-	 */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
-	tgt_specific_pool = kmem_cache_create("scst_tgt_specific",
-				      sizeof(struct scst_local_tgt_specific),
-				      0, SCST_SLAB_FLAGS, NULL);
-#else
-	tgt_specific_pool = kmem_cache_create("scst_tgt_specific",
-				      sizeof(struct scst_local_tgt_specific),
-				      0, SCST_SLAB_FLAGS, NULL, NULL);
-#endif
-	if (!tgt_specific_pool) {
-		PRINT_ERROR("%s", "Unable to initialize tgt_specific_pool");
-		ret = -ENOMEM;
-		goto out;
-	}
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
-	ret = device_register(&scst_local_root);
-	if (ret < 0) {
-		PRINT_ERROR("Root device_register() error: %d", ret);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-		goto destroy_kmem;
-#else
-		goto out;
-#endif
-	}
-#else
 	scst_local_root = root_device_register(SCST_LOCAL_NAME);
 	if (IS_ERR(scst_local_root)) {
 		ret = PTR_ERR(scst_local_root);
 		goto out;
 	}
-#endif
 
 	ret = bus_register(&scst_local_lld_bus);
 	if (ret < 0) {
@@ -2058,16 +1819,8 @@
 	bus_unregister(&scst_local_lld_bus);
 
 dev_unreg:
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
-	device_unregister(&scst_local_root);
-#else
 	root_device_unregister(scst_local_root);
-#endif
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-destroy_kmem:
-	kmem_cache_destroy(tgt_specific_pool);
-#endif
 	goto out;
 }
 
@@ -2090,21 +1843,11 @@
 
 	driver_unregister(&scst_local_driver);
 	bus_unregister(&scst_local_lld_bus);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
-	device_unregister(&scst_local_root);
-#else
 	root_device_unregister(scst_local_root);
-#endif
 
 	/* Now unregister the target template */
 	scst_unregister_target_template(&scst_local_targ_tmpl);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
-	/* Free the non scatterlist pool we allocated */
-	if (tgt_specific_pool)
-		kmem_cache_destroy(tgt_specific_pool);
-#endif
-
 	/* To make lockdep happy */
 	up_write(&scst_local_exit_rwsem);
 

diff --git a/scst/scstadmin/Makefile b/scst/scstadmin/Makefile
index 3526d5e..5d962ea 100644
--- a/scst/scstadmin/Makefile
+++ b/scst/scstadmin/Makefile

@@ -4,9 +4,11 @@
 
 REVISION ?= $(shell if [ -e .svn ]; then				\
 		      svn info | sed -n 's/^Revision:[[:blank:]]*/./p';	\
-		else git log | grep -c ^commit;				\
+		    elif [ -e .git ]; then				\
+                      echo -n .;					\
+		      git log | grep -c ^commit;			\
 		    fi)
-VERSION = $(shell echo -n "$$(sed -n 's/^[[:blank:]]*\$$VERSION[[:blank:]]*=[[:blank:]]*[\"'"'"']\([0-9.]*\)[\"'"'"'];$$/\1/p' scstadmin/scst-*/lib/SCST/SCST.pm)")
+VERSION = $(shell echo -n "$$(sed -n 's/^[[:blank:]]*\$$VERSION[[:blank:]]*=[[:blank:]]*[\"'"'"']\([0-9.]*\)[\"'"'"'];$$/\1/p' scstadmin/scst-*/lib/SCST/SCST.pm)$(REVISION)")
 
 SCSTADMIN_DIR = $(shell if [ ! -h scstadmin ]; then		\
 			    rm -f scstadmin;			\
@@ -149,9 +151,7 @@
 	sed "s/@rpm_version@/$(VERSION)/g"				\
 		<$${name}.spec.in >$${name}.spec &&			\
 	MAKE="$(MAKE)"							\
-	rpmbuild --define="%_topdir $${rpmtopdir}"                      \
-		--define="%rpm_release $(REVISION)"                     \
-		-ba $${name}.spec &&														\
+	rpmbuild --define="%_topdir $${rpmtopdir}" -ba $${name}.spec &&	\
 	rm -f $${name}-$(VERSION).tar.bz2
 
 clean:

diff --git a/scst/scstadmin/default/scst b/scst/scstadmin/default/scst
index 4b466ad..6d90290 100644
--- a/scst/scstadmin/default/scst
+++ b/scst/scstadmin/default/scst

@@ -1,5 +1,2 @@
 # iscsi-scstd command-line options. See also man iscsi-scstd.
 # ISCSID_OPTIONS="-u0 -g0 -p3260"
-# For SCST specify all SCST target drivers in SCST_TARGET_MODULES.
-# An example:
-SCST_TARGET_MODULES="scst_local iscsi_scst ocs_fc_scst scst_user"

diff --git a/scst/scstadmin/examples/scst.conf.sysfs b/scst/scstadmin/examples/scst.conf.sysfs
index 873614a..b20dfb6 100644
--- a/scst/scstadmin/examples/scst.conf.sysfs
+++ b/scst/scstadmin/examples/scst.conf.sysfs

@@ -79,14 +79,14 @@
 }
 
 TARGET_DRIVER ib_srpt {
-	TARGET ib_srpt_target_0 {
+	TARGET fe80:0000:0000:0000:0002:c903:0005:f34b {
 		enabled 1
 
 		LUN 0 disk1
 		LUN 1 disk2
 	}
 
-	TARGET ib_srpt_target_1 {
+	TARGET fe80:0000:0000:0000:0002:c903:0005:f34c {
 		enabled 1
 
 		LUN 0 disk1
@@ -101,7 +101,7 @@
 		group_id 1
 		state active
 
-		TARGET ib_srpt_target_0 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34b {
 			rel_tgt_id 1
 		}
 	}
@@ -110,7 +110,7 @@
 		group_id 2
 		state offline
 
-		TARGET ib_srpt_target_1 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34c {
 			rel_tgt_id 2
 		}
 	}
@@ -123,7 +123,7 @@
 		group_id 1
 		state offline
 
-		TARGET ib_srpt_target_0 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34b {
 			rel_tgt_id 1
 		}
 	}
@@ -132,7 +132,7 @@
 		group_id 2
 		state active
 
-		TARGET ib_srpt_target_1 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34c {
 			rel_tgt_id 2
 		}
 	}

diff --git a/scst/scstadmin/init.d/scst b/scst/scstadmin/init.d/scst
index 960c404..9490e1b 100755
--- a/scst/scstadmin/init.d/scst
+++ b/scst/scstadmin/init.d/scst

@@ -69,8 +69,6 @@
 PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin
 SCST_DFLT=/etc/default/scst
 
-[ -x "$(which scstadmin)" ] || exit 5
-
 if [ -f $SCST_DFLT ]; then
     . $SCST_DFLT
 fi
@@ -253,12 +251,6 @@
             tmpout=/tmp/scstadmin-output-$$
             if scstadmin -config $SCST_CFG >$tmpout 2>&1; then
                 rm -f $tmpout
-                for m in $SCST_MODULES; do
-                    if [ "$m" = "ocs_fc_scst" ]; then
-                        for i in $(ls /sys/kernel/scst_tgt/targets/ocs_xe201/53:8a:95:f2:26*/enabled); do echo 1 > $i; done
-                        for h in $(ls /sys/class/fc_host/); do echo "1" > /sys/class/fc_host/$h/issue_lip; done
-                    fi
-                done
                 return 0
             else
                 cat $tmpout

diff --git a/scst/scstadmin/scstadmin.spec.in b/scst/scstadmin/scstadmin.spec.in
index e53308e..086aa6f 100644
--- a/scst/scstadmin/scstadmin.spec.in
+++ b/scst/scstadmin/scstadmin.spec.in

@@ -3,7 +3,7 @@
 
 Name:		scstadmin
 Version:        %{rpm_version}
-Release:        %{rpm_release}
+Release:        1
 Summary:	SCST configuration tool
 Group:		Productivity/Networking/Other
 License:	GPLv2

diff --git a/scst/scstadmin/scstadmin.sysfs/Makefile b/scst/scstadmin/scstadmin.sysfs/Makefile
index f23e189..c749bb4 100644
--- a/scst/scstadmin/scstadmin.sysfs/Makefile
+++ b/scst/scstadmin/scstadmin.sysfs/Makefile

@@ -15,21 +15,24 @@
 	install -m 755 $(TOOL) $(DESTDIR)$(SBINDIR)
 	regex="s|%INSTALLSITELIB%|$$(make -sC scst-$(MODULE_VERSION) print-INSTALLSITELIB | grep -v ^make)|"; echo "$${regex}"; sed -i "$${regex}" $(DESTDIR)$(SBINDIR)/$(TOOL)
 
-uninstall:
+makefile:
+	@cd ./scst-$(MODULE_VERSION) &&		\
+	perl Makefile.PL PREFIX=$(PREFIX)
+
+uninstall: makefile
 	-rm -f $(DESTDIR)$(SBINDIR)/$(TOOL)
 	$(MAKE) -C scst-$(MODULE_VERSION) uninstall
 
-perl-module:
+perl-module: makefile
 	@cd ./scst-$(MODULE_VERSION) &&		\
-	perl Makefile.PL PREFIX=$(PREFIX) &&	\
 	printf '\nprint-%%:\n\t@echo '"'"'$$($$*)'"'"'\n' >> Makefile
 	$(MAKE) -C scst-$(MODULE_VERSION)
 
-test:
+test: makefile
 	export PERL_TEST_DIFF=diff
 	$(MAKE) -C scst-$(MODULE_VERSION) test #TEST_VERBOSE=1
 
-clean:
+clean: makefile
 	-$(MAKE) -C scst-$(MODULE_VERSION) clean
 
 distclean: clean
@@ -38,4 +41,4 @@
 
 extraclean: distclean
 
-.PHONY: all install uninstall perl-module clean distclean extraclean
+.PHONY: all install uninstall makefile perl-module clean distclean extraclean

diff --git a/scst/scstadmin/scstadmin.sysfs/man5/scst.conf.5 b/scst/scstadmin/scstadmin.sysfs/man5/scst.conf.5
index 1bf2f52..edf733e 100644
--- a/scst/scstadmin/scstadmin.sysfs/man5/scst.conf.5
+++ b/scst/scstadmin/scstadmin.sysfs/man5/scst.conf.5

@@ -250,7 +250,7 @@
 .IP
 TARGET_DRIVER ib_srpt {
 .br
-	TARGET ib_srpt_target_0 {
+	TARGET fe80:0000:0000:0000:0002:c903:0005:f34b {
 .br
 		enabled 1
 .IP
@@ -260,7 +260,7 @@
 .br
 	}
 .IP
-	TARGET ib_srpt_target_1 {
+	TARGET fe80:0000:0000:0000:0002:c903:0005:f34c {
 .br
 		enabled 1
 .IP
@@ -306,7 +306,7 @@
 .br
 		state active
 .IP
-		TARGET ib_srpt_target_0 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34b {
 .br
 			rel_tgt_id 1
 .br
@@ -320,7 +320,7 @@
 .br
 		state offline
 .IP
-		TARGET ib_srpt_target_1 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34c {
 .br
 			rel_tgt_id 2
 .br
@@ -340,7 +340,7 @@
 .br
 		state offline
 .IP
-		TARGET ib_srpt_target_0 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34b {
 .br
 			rel_tgt_id 1
 .br
@@ -354,7 +354,7 @@
 .br
 		state active
 .IP
-		TARGET ib_srpt_target_1 {
+		TARGET fe80:0000:0000:0000:0002:c903:0005:f34c {
 .br
 			rel_tgt_id 2
 .br
@@ -368,7 +368,7 @@
 module parameter. Parameters for an SCST kernel module can be configured in
 /etc/modprobe.d/99-local.conf just like for any other kernel module. An example:
 .IP
-options ib_srpt use_node_guid_in_target_name=1
+options ib_srpt rdma_cm_port=5000
 .SH FILES
 .IP "/etc/scst.conf"
 The configuration file read by

diff --git a/scst/scstadmin/scstadmin.sysfs/scst-1.0.0/lib/SCST/SCST.pm b/scst/scstadmin/scstadmin.sysfs/scst-1.0.0/lib/SCST/SCST.pm
index 5a5ab48..df34d00 100644
--- a/scst/scstadmin/scstadmin.sysfs/scst-1.0.0/lib/SCST/SCST.pm
+++ b/scst/scstadmin/scstadmin.sysfs/scst-1.0.0/lib/SCST/SCST.pm

@@ -11,6 +11,7 @@
 use warnings;
 use 5.005;
 use Fcntl ':mode';
+use File::Spec;
 use IO::Handle;
 use IO::File;
 use Carp qw(cluck);
@@ -4635,17 +4636,7 @@
 }
 
 sub make_path {
-	my $path;
-
-	foreach my $element (@_) {
-		if ($path && rindex($path, '/') != length($path) - 1) {
-			$path .= '/';
-		}
-		cluck("make_path: invalid argument") if !valid($element);
-		$path .= $element;
-	}
-
-	return $path;
+	return File::Spec->catdir(@_);
 }
 
 ;1 __END__

diff --git a/scst/scstadmin/scstadmin.sysfs/scstadmin b/scst/scstadmin/scstadmin.sysfs/scstadmin
index 15aeeef..83c6089 100755
--- a/scst/scstadmin/scstadmin.sysfs/scstadmin
+++ b/scst/scstadmin/scstadmin.sysfs/scstadmin

@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 
-my $Version  = 'SCST Configurator v3.5.0';
+my $Version  = 'SCST Configurator v3.7.0';
 
 # Configures SCST
 #
@@ -21,7 +21,7 @@
      -h, -help, --help       : Show this information.
 
 General Operations
-     -config <file>          : Configure Actifio SCST given the specified <file>.
+     -config <file>          : Configure SCST given the specified <file>.
      -check_config <file>    : Checks the saved configuration <file>.
      -write_config <file>    : Writes the current configuration to <file>.
      -clear_config           : Clear all SCST configuration.
@@ -222,7 +222,6 @@
                                even deletions (DANGER!).
      -noprompt               : Do not prompt or pause. Use with caution!
      -cont_on_err            : Continue after an error occurred.
-	 -skipReads				 : Used with force to skip readingWorkingConfig on device attribute change
 
 Debugging (limited support)
      -debug                  : Debug mode - don\'t do anything destructive.
@@ -254,7 +253,7 @@
 use File::Spec;
 
 BEGIN {
-  my $site_lib = '/usr/share/perl5';
+  my $site_lib = '%INSTALLSITELIB%';
 	if ($site_lib =~ '^%') {
 		my $scstadmindir = dirname(abs_path($0));
 		$site_lib = File::Spec->catdir($scstadmindir, "scst-1.0.0",
@@ -305,11 +304,6 @@
 
 use vars qw($Version);
 
-# Temporary (hopefully) workaround so we don't need to 
-# add -force and -noprompt flags in multiple places in
-# psrv
-unshift(@ARGV, "-force", "-noprompt");
-
 &main();
 
 sub getArgs {
@@ -436,8 +430,6 @@
 	my $force;
 	my $dumpAttrs;
 
-	my $skipRead;
-
 	my $p = new Getopt::Long::Parser;
 
 	if (!$p->getoptions('config:s'		=> \$applyConfig,
@@ -565,7 +557,6 @@
 			    'noprompt'		=> \$_NOPROMPT_,
 			    'cont_on_err'       => \$_CONT_ON_ERR_,
 			    'force'		=> \$force,
-				'skipRead'	=> \$skipRead,
 			    'dumpAttrs'		=> \$dumpAttrs,
 			    'debug'             => \$_DEBUG_))
 	{
@@ -585,7 +576,6 @@
 	$nonkey = TRUE if (defined($nonkey));
 	$lip    = TRUE if (defined($lip));
 	$noLip  = TRUE if (defined($noLip));
-	$skipRead  = TRUE if (defined($skipRead));
 
 	my $query_mode = defined($listHandler) || defined($listDevice) || defined($listDeviceGroup) || defined($listTargetGroup) ||
 	  defined($listDriver) || defined($listTarget) || defined($listGroup) || defined($listSessions) ||
@@ -943,7 +933,6 @@
 		nonkey				=> $nonkey,
 		force				=> $force,
 		dumpAttrs			=> $dumpAttrs,
-		skipRead 			=> $skipRead,
 	    );
 	return \%args;
 }
@@ -1080,7 +1069,6 @@
 	my $nonkey			= $args->{nonkey};
 	my $force			= $args->{force};
 	my $dumpAttrs			= $args->{dumpAttrs};
-	my $skipRead			= $args->{skipRead};
 
 	$SCST = new SCST::SCST($_DEBUG_);
 
@@ -1095,7 +1083,7 @@
 			$rc = checkConfiguration();
 			condExit("Configuration has errors, aborting.") if ($rc);
 			last if ($force && prompt());
-			my $changes = applyConfiguration($force, $skipRead);
+			my $changes = applyConfiguration($force);
 			$rc = issueLip() if ($changes && $lip);
 			last SWITCH;
 		};
@@ -1759,86 +1747,54 @@
 #***********************************************************************************************
 # Returns 0 upon success and 1 upon error.
 sub writePeerConfiguration {
-	my $nonkey = shift;
+        my $nonkey = shift;
 	my $errorString;
 
-	my $io = new IO::File $CONFIGFILE, O_CREAT|O_WRONLY|O_TRUNC;
+        my $io = new IO::File $CONFIGFILE, O_CREAT|O_WRONLY|O_TRUNC;
 
-	if (!$io) {
-		print "Failed to save configuration to file '$CONFIGFILE': $!\n";
-		return 1;
-	}
+        if (!$io) {
+                print "Failed to save configuration to file '$CONFIGFILE': $!\n";
+                return 1;
+        }
 
-	print "Writing current configuration to file '$CONFIGFILE'.. ";
+        print "Writing current configuration to file '$CONFIGFILE'.. ";
 
+        print $io "# Automatically generated by $Version.\n\n";
 
-        # Prune devices (and targets) out of iscsi
-        $CURRENT{assign}->{iscsi} = {};
+        {
+                my ($attributes, $errorString) = $SCST->scstAttributes();
+                immediateExit($errorString);
 
-        # Accumulate all remaining devices in use
-        my %ALL_DEVICES=();
-        while (my ($assign, $targets) = each %{$CURRENT{assign}}) {
-            next if $assign eq "iscsi";
-            for my $target (keys %$targets) {
-                my $groups = $targets->{$target}->{GROUP};
-                for my $group (values %$groups) {
-                    my $luns = $group->{LUN};
-                    for my $lun (values %$luns) {
-                        my $device = $lun;
-                        ++$ALL_DEVICES{$device}
+                print $io serializeKeyAttr("", $attributes);
+                if ($nonkey) {
+                    my $nk = serializeNkAttr("", $attributes);
+                    if ($nk) {
+                        print $io "# Non-key attributes\n";
+                        print $io $nk;
                     }
                 }
-            }
+                print $io "\n";
         }
 
-        # Prune all devices not in use
-        for my $handler (keys %{$CURRENT{handler}}) {
-            $CURRENT{handler}->{$handler} = [grep exists $ALL_DEVICES{$_}, @{$CURRENT{handler}->{$handler}}]
-        }
-
-        for my $dgroup (keys %{$CURRENT{dgroups}}) {            
-            $CURRENT{dgroups}->{$dgroup}->{devices} = [grep exists $ALL_DEVICES{$_}, @{$CURRENT{dgroups}->{$dgroup}->{devices}}]
-        }
-
-	print $io "# Automatically generated by $Version.\n\n";
-
-	{
-		my $attributes;
-		($attributes, $errorString) = $SCST->scstAttributes();
-		immediateExit($errorString);
-
-		print $io serializeKeyAttr("", $attributes);
-		if ($nonkey) {
-		    my $nk = serializeNkAttr("", $attributes);
-		    if ($nk) {
-			print $io "# Non-key attributes\n";
-			print $io $nk;
-		    }
-		}
-		print $io "\n";
-	}
-
 	foreach my $handler (sort keys %{$CURRENT{'handler'}}) {
-		my $handler_buff = "";
-		my $handler_buff_nk = "";
+
+                my $handler_buff;
+                my $handler_buff_nk;
 		my $handler_attrs;
 		my $attributes;
 
-		($handler_attrs, $errorString) = $SCST->deviceCreateAttributes($handler);
-		($attributes, $errorString) = $SCST->handlerAttributes($handler);
-		$handler_buff = serializeKeyAttr("\t", $attributes);
-		$handler_buff_nk = serializeNkAttr("\t", $attributes) if ($nonkey);
+                ($handler_attrs, $errorString) = $SCST->deviceCreateAttributes($handler);
+                ($attributes, $errorString) = $SCST->handlerAttributes($handler);
 
-		my $devices = $CURRENT{'handler'}->{$handler};
+                $handler_buff = serializeKeyAttr("\t", $attributes);
+                $handler_buff_nk = serializeNkAttr("\t", $attributes) if ($nonkey);
 
-		my $device_buff = "";
-		foreach my $device (sort @{$devices}) {
+                my $devices = $CURRENT{'handler'}->{$handler};
 
-			my $attributes;
-			my $attribute_buff = "";
-			my $attribute_buff_nk = "";
+                my $device_buff;
+                foreach my $device (sort @{$devices}) {
 
-			($attributes, $errorString) = $SCST->deviceAttributes($device);
+                        my ($attributes, $errorString) = $SCST->deviceAttributes($device);
 
                         if ($handler eq 'vdisk_fileio') {
 				if ($device eq 'disk00') {
@@ -1852,61 +1808,68 @@
 				$attributes->{active}->{keys}->{0}->{value} = "0";
 			}
 
-			$attribute_buff = serializeKeyAttr("\t\t", $attributes, $handler_attrs);
-			$attribute_buff_nk = serializeNkAttr("\t\t", $attributes, $handler_attrs) if ($nonkey);
-			$attribute_buff .= "\n" if ($attribute_buff);
-			$attribute_buff_nk .= "\n" if ($attribute_buff_nk);
+                        my $attribute_buff;
+                        my $attribute_buff_nk;
 
-			if ($attribute_buff_nk) {
-				$attribute_buff .= "\t\t# Non-key attributes\n";
-				$attribute_buff .= $attribute_buff_nk;
-			}
+                        $attribute_buff = serializeKeyAttr("\t\t", $attributes, $handler_attrs);
+                        $attribute_buff_nk = serializeNkAttr("\t\t", $attributes, $handler_attrs) if ($nonkey);
+                        $attribute_buff .= "\n" if ($attribute_buff);
+                        $attribute_buff_nk .= "\n" if ($attribute_buff_nk);
 
-			$attribute_buff =~ s/\n+$/\n/;
+                        if ($attribute_buff_nk) {
+                                $attribute_buff .= "\t\t# Non-key attributes\n";
+                                $attribute_buff .= $attribute_buff_nk;
+                        }
 
-			if ($attribute_buff) {
-				$device_buff .= " {\n";
-				$device_buff .= $attribute_buff;
-				$device_buff .= "\t}\n\n";
-			} else {
-				$device_buff .= "\n";
-			}
-		}
+                        $attribute_buff =~ s/\n+$/\n/;
 
-		$device_buff =~ s/\n+$/\n/;
+                        if ($attribute_buff) {
+                                $device_buff .= " {\n";
+                                $device_buff .= $attribute_buff;
+                                $device_buff .= "\t}\n\n";
+                        } else {
+                                $device_buff .= "\n";
+                        }
+                }
 
-		$handler_buff .= $device_buff;
+                $device_buff =~ s/\n+$/\n/;
 
-		if ($handler_buff_nk) {
-			$handler_buff .= "\t# Non-key attributes\n";
-			$handler_buff .= $handler_buff_nk;
-		}
+                $handler_buff .= $device_buff;
 
-		if ($handler_buff) {
-			print $io "HANDLER $handler {\n";
-			print $io $handler_buff;
-			print $io "}\n\n";
-		}
-	}
-	foreach my $driver (sort keys %{$CURRENT{'assign'}}) {
-		my $driver_buff = "";
+                if ($handler_buff_nk) {
+                        $handler_buff .= "\t# Non-key attributes\n";
+                        $handler_buff .= $handler_buff_nk;
+                }
+
+                if ($handler_buff) {
+                        print $io "HANDLER $handler {\n";
+                        print $io $handler_buff;
+                        print $io "}\n\n";
+                }
+        }
+        foreach my $driver (sort keys %{$CURRENT{'assign'}}) {
+                my $driver_buff;
 
 		my $drv_attrs;
 		my $drv_attr_buff = "";
 		my $drv_attr_buff_nk = "";
 
-		($drv_attrs, $errorString) = $SCST->driverAttributes($driver);
-		$drv_attr_buff = serializeKeyAttr("\t", $drv_attrs);
-		$drv_attr_buff_nk = serializeNkAttr("\t", $drv_attrs) if ($nonkey);
-		$drv_attr_buff .= "\n" if ($drv_attr_buff);
-		$drv_attr_buff_nk .= "\n" if ($drv_attr_buff_nk);
+                ($drv_attrs, $errorString) = $SCST->driverAttributes($driver);
+                $drv_attr_buff = serializeKeyAttr("\t", $drv_attrs);
+                $drv_attr_buff_nk = serializeNkAttr("\t", $drv_attrs) if ($nonkey);
+                $drv_attr_buff .= "\n" if ($drv_attr_buff);
+                $drv_attr_buff_nk .= "\n" if ($drv_attr_buff_nk);
 
-		my $targets = $CURRENT{'assign'}->{$driver};
-		my $tgt_attrs;
-		($tgt_attrs, $errorString) = $SCST->targetCreateAttributes($driver);
+                my $targets = $CURRENT{'assign'}->{$driver};
+                my ($tgt_attrs, $errorString) = $SCST->targetCreateAttributes($driver);
 
-		my $target_buff = "";
-		foreach my $target (sort keys %{$targets}) {
+                my $target_buff;
+
+use Data::Dumper;
+#warn Dumper ($attributes, $handler_attrs, $handler);
+#warn Dumper ($handler_attrs);
+
+                foreach my $target (sort keys %{$targets}) {
                         if ($target eq $MYWWPN1) {
                                 $target_buff .= "\tTARGET $PEERWWPN1";
                         } elsif ($target eq $MYWWPN2) {
@@ -1916,329 +1879,328 @@
                         } elsif ($target eq $MYWWPN4) {
                                 $target_buff .= "\tTARGET $PEERWWPN4";
                         } else {
-        			$target_buff .= "\tTARGET $target";
+                                $target_buff .= "\tTARGET $target";
                         }
 
 			my $attributes;
 			my $attribute_buff = "";
 			my $attribute_buff_nk = "";
 
-			($attributes, $errorString) = $SCST->targetAttributes($driver, $target);
-			if (defined($$attributes{'hw_target'}) &&
-			  ($$attributes{'hw_target'}->{'value'} == TRUE)) {
-				$attribute_buff = "\t\tHW_TARGET\n\n";
-			}
+                        ($attributes, $errorString) = $SCST->targetAttributes($driver, $target);
 
-			$attribute_buff .= serializeKeyAttr("\t\t", $attributes, $tgt_attrs);
-			$attribute_buff_nk .= serializeNkAttr("\t\t", $attributes, $tgt_attrs) if ($nonkey);
-			$attribute_buff .= "\n" if ($attribute_buff);
-			$attribute_buff_nk .= "\n" if ($attribute_buff_nk);
+                        if (defined($$attributes{'hw_target'}) &&
+                          ($$attributes{'hw_target'}->{'value'} == TRUE)) {
+                                $attribute_buff = "\t\tHW_TARGET\n\n";
+                        }
 
-			my $luns = $CURRENT{'assign'}->{$driver}->{$target}->{'LUN'};
-			my $lun_attrs;
-			($lun_attrs, $errorString) = $SCST->lunCreateAttributes($driver, $target);
+                        $attribute_buff .= serializeKeyAttr("\t\t", $attributes, $tgt_attrs);
+                        $attribute_buff_nk .= serializeNkAttr("\t\t", $attributes, $tgt_attrs) if ($nonkey);
+                        $attribute_buff .= "\n" if ($attribute_buff);
+                        $attribute_buff_nk .= "\n" if ($attribute_buff_nk);
 
-			my $t_lun_buff = "";
-			foreach my $lun (sort numerically keys %{$luns}) {
-				my $lun_dev = $$luns{$lun};
+                        my $luns = $CURRENT{'assign'}->{$driver}->{$target}->{'LUN'};
+                        my ($lun_attrs, $errorString) = $SCST->lunCreateAttributes($driver, $target);
 
-				# Do not save copy_manager LUN definitions
-				# for LUNs associated with an SCST device
+                        my $t_lun_buff;
+                        foreach my $lun (sort numerically keys %{$luns}) {
+                                my $lun_dev = $$luns{$lun};
+
+                                # Do not save copy_manager LUN definitions
+                                # for LUNs associated with an SCST device
 				# handler.
 
-				next if ($driver eq 'copy_manager' &&
-					 isPassthroughDev($lun_dev));
+                                next if ($driver eq 'copy_manager' &&
+                                         isPassthroughDev($lun_dev));
 
-				$t_lun_buff .= "\t\tLUN $lun $lun_dev";
+                                $t_lun_buff .= "\t\tLUN $lun $lun_dev";
 
-				my $attributes;
-				($attributes, $errorString) = $SCST->lunAttributes($driver, $target, $lun);
-				my $l_attribute_buff =
-				    serializeKeyAttr("\t\t\t",
-						     $attributes,
-						     $lun_attrs);
-				my $l_attribute_buff_nk =
-				    serializeNkAttr("\t\t\t",
-						    $attributes,
-						    $lun_attrs) if ($nonkey);
+                                my ($attributes, $errorString) = $SCST->lunAttributes($driver, $target, $lun);
+                                my $l_attribute_buff =
+                                    serializeKeyAttr("\t\t\t",
+                                                     $attributes,
+                                                     $lun_attrs);
+                                my $l_attribute_buff_nk =
+                                    serializeNkAttr("\t\t\t",
+                                                    $attributes,
+                                                    $lun_attrs) if ($nonkey);
 
-				if ($l_attribute_buff_nk) {
-					$l_attribute_buff .= "\t\t\t# Non-key attributes\n";
-					$l_attribute_buff .= $l_attribute_buff_nk;
-				}
+                                if ($l_attribute_buff_nk) {
+                                        $l_attribute_buff .= "\t\t\t# Non-key attributes\n";
+                                        $l_attribute_buff .= $l_attribute_buff_nk;
+                                }
 
-				if ($l_attribute_buff) {
-					$t_lun_buff .= " {\n";
-					$t_lun_buff .= $l_attribute_buff;
-					$t_lun_buff .= "\t\t}\n\n";
-				} else {
-					$t_lun_buff .= "\n";
-				}
-			}
+                                if ($l_attribute_buff) {
+                                        $t_lun_buff .= " {\n";
+                                        $t_lun_buff .= $l_attribute_buff;
+                                        $t_lun_buff .= "\t\t}\n\n";
+                                } else {
+                                        $t_lun_buff .= "\n";
+                                }
+                        }
 
-			$t_lun_buff .= "\n" if ($t_lun_buff);
-			$t_lun_buff =~ s/\n+$/\n\n/;
+                        $t_lun_buff .= "\n" if ($t_lun_buff);
+                        $t_lun_buff =~ s/\n+$/\n\n/;
 
-			my $groups = $CURRENT{'assign'}->{$driver}->{$target}->{'GROUP'};
+                        my $groups = $CURRENT{'assign'}->{$driver}->{$target}->{'GROUP'};
 
-			my $group_buff = "";
-			foreach my $group (sort keys %{$groups}) {
+                        my $group_buff;
+                        foreach my $group (sort keys %{$groups}) {
 				my $lun_attrs;
 				my $ini_attrs;
 
-				($lun_attrs, $errorString) = $SCST->lunCreateAttributes($driver, $target, $group);
-				($ini_attrs, $errorString) = $SCST->initiatorCreateAttributes($driver, $target, $group);
-				$group_buff .= "\t\tGROUP $group";
+                                ($lun_attrs, $errorString) = $SCST->lunCreateAttributes($driver, $target, $group);
+                                ($ini_attrs, $errorString) = $SCST->initiatorCreateAttributes($driver, $target, $group);
 
-				my $luns = $CURRENT{'assign'}->{$driver}->{$target}->{'GROUP'}->{$group}->{'LUN'};
+                                $group_buff .= "\t\tGROUP $group";
 
-				my $lun_buff = "";
-				foreach my $lun (sort numerically keys %{$luns}) {
-					my $lun_dev = $$luns{$lun};
+                                my $luns = $CURRENT{'assign'}->{$driver}->{$target}->{'GROUP'}->{$group}->{'LUN'};
 
-					$lun_buff .= "\t\t\tLUN $lun $lun_dev";
+                                my $lun_buff;
+                                foreach my $lun (sort numerically keys %{$luns}) {
+                                        my $lun_dev = $$luns{$lun};
 
-					my $attributes;
-					($attributes, $errorString) = $SCST->lunAttributes($driver, $target, $lun, $group);
+                                        $lun_buff .= "\t\t\tLUN $lun $lun_dev";
 
-					my $l_attribute_buff
-					    = serializeKeyAttr("\t\t\t\t",
-							       $attributes,
-							       $lun_attrs);
-					my $l_attribute_buff_nk
-					    = serializeNkAttr("\t\t\t\t",
-							      $attributes,
-							      $lun_attrs)
-					    if ($nonkey);
+                                        my ($attributes, $errorString) = $SCST->lunAttributes($driver, $target, $lun, $group);
 
-					if ($l_attribute_buff_nk) {
-						$l_attribute_buff .= "\t\t\t\t# Non-key attributes\n";
-						$l_attribute_buff .= $l_attribute_buff_nk;
-					}
+                                        my $l_attribute_buff
+                                            = serializeKeyAttr("\t\t\t\t",
+                                                               $attributes,
+                                                               $lun_attrs);
+                                        my $l_attribute_buff_nk
+                                            = serializeNkAttr("\t\t\t\t",
+                                                              $attributes,
+                                                              $lun_attrs)
+                                            if ($nonkey);
 
-					if ($l_attribute_buff) {
-						$lun_buff .= " {\n";
-						$lun_buff .= $l_attribute_buff;
-						$lun_buff .= "\t\t\t}\n";
-					} else {
-						$lun_buff .= "\n";
-					}
+                                        if ($l_attribute_buff_nk) {
+                                                $l_attribute_buff .= "\t\t\t\t# Non-key attributes\n";
+                                                $l_attribute_buff .= $l_attribute_buff_nk;
+                                        }
 
-				}
+                                        if ($l_attribute_buff) {
+                                                $lun_buff .= " {\n";
+                                                $lun_buff .= $l_attribute_buff;
+                                                $lun_buff .= "\t\t\t}\n";
+                                        } else {
+                                                $lun_buff .= "\n";
+                                        }
 
-				my $inits = $CURRENT{'assign'}->{$driver}->{$target}->{'GROUP'}->{$group}->{'INITIATORS'};
+                                }
 
-				my $init_buff = "";
-				foreach my $init (sort @{$inits}) {
-					$init_buff .= "\n\t\t\tINITIATOR " . escapeMeta($init);
+                                my $inits = $CURRENT{'assign'}->{$driver}->{$target}->{'GROUP'}->{$group}->{'INITIATORS'};
 
-					my $attributes;
-					($attributes, $errorString) = $SCST->initiatorAttributes($driver, $target, $group, $init);
+                                my $init_buff;
+                                foreach my $init (sort @{$inits}) {
+                                        $init_buff .= "\n\t\t\tINITIATOR " . escapeMeta($init);
 
-					my $i_attribute_buff
-					    = serializeKeyAttr("\t\t\t\t",
-							       $attributes,
-							       $ini_attrs);
-					my $i_attribute_buff_nk
-					    = serializeNkAttr("\t\t\t\t",
-							      $attributes,
-							      $ini_attrs)
-					    if ($nonkey);
+                                        my ($attributes, $errorString) = $SCST->initiatorAttributes($driver, $target, $group, $init);
 
-					if ($i_attribute_buff_nk) {
-						$i_attribute_buff .= "\t\t\t\t# Non-key attributes\n";
-						$i_attribute_buff .= $i_attribute_buff_nk;
-					}
+                                        my $i_attribute_buff
+                                            = serializeKeyAttr("\t\t\t\t",
+                                                               $attributes,
+                                                               $ini_attrs);
+                                        my $i_attribute_buff_nk
+                                            = serializeNkAttr("\t\t\t\t",
+                                                              $attributes,
+                                                              $ini_attrs)
+                                            if ($nonkey);
 
-					if ($i_attribute_buff) {
-						$init_buff .= " {\n";
-						$init_buff .= $i_attribute_buff;
-						$init_buff .= "\t\t\t}\n";
-					} else {
-						$init_buff .= "\n";
-					}
-				}
+                                        if ($i_attribute_buff_nk) {
+                                                $i_attribute_buff .= "\t\t\t\t# Non-key attributes\n";
+                                                $i_attribute_buff .= $i_attribute_buff_nk;
+                                        }
+
+                                        if ($i_attribute_buff) {
+                                                $init_buff .= " {\n";
+                                                $init_buff .= $i_attribute_buff;
+                                                $init_buff .= "\t\t\t}\n";
+                                        } else {
+                                                $init_buff .= "\n";
+                                        }
+                                }
 
 				my $grp_attributes;
-				($grp_attributes, $errorString) = $SCST->groupAttributes($driver, $target, $group);
-				my $g_attribute_buff
-				    = serializeKeyAttr("\t\t\t",
-						       $grp_attributes);
-				my $g_attribute_buff_nk
-				    = serializeNkAttr("\t\t\t",
-						      $grp_attributes)
-				    if ($nonkey);
+                                ($grp_attributes, $errorString) = $SCST->groupAttributes($driver, $target, $group);
+                                my $g_attribute_buff
+                                    = serializeKeyAttr("\t\t\t",
+                                                       $grp_attributes);
+                                my $g_attribute_buff_nk
+                                    = serializeNkAttr("\t\t\t",
+                                                      $grp_attributes)
+                                    if ($nonkey);
 
-				if ($lun_buff || $init_buff ||
-				    $g_attribute_buff || $g_attribute_buff_nk) {
-					$group_buff .= " {\n";
-					$group_buff .= $lun_buff;
-					$group_buff .= $init_buff;
-				}
+                                if ($lun_buff || $init_buff ||
+                                    $g_attribute_buff || $g_attribute_buff_nk) {
+                                        $group_buff .= " {\n";
+                                        $group_buff .= $lun_buff;
+                                        $group_buff .= $init_buff;
+                                }
 
-				if ($g_attribute_buff_nk) {
-					$g_attribute_buff .= "\n" if ($g_attribute_buff);
-					$g_attribute_buff .= "\t\t\t# Non-key attributes\n";
-					$g_attribute_buff .= $g_attribute_buff_nk;
-				}
+                                if ($g_attribute_buff_nk) {
+                                        $g_attribute_buff .= "\n" if ($g_attribute_buff);
+                                        $g_attribute_buff .= "\t\t\t# Non-key attributes\n";
+                                        $g_attribute_buff .= $g_attribute_buff_nk;
+                                }
 
-				if ($g_attribute_buff) {
-					$group_buff .= "\n";
-					$group_buff .= $g_attribute_buff;
-				}
+                                if ($g_attribute_buff) {
+                                        $group_buff .= "\n";
+                                        $group_buff .= $g_attribute_buff;
+                                }
 
-				if ($group_buff && ($lun_buff || $init_buff ||
-						    $g_attribute_buff || $g_attribute_buff_nk)) {
-					$group_buff .= "\t\t}\n\n";
-					$group_buff =~ s/\n+$/\n/;
-				}
+                                if ($group_buff && ($lun_buff || $init_buff ||
+                                                    $g_attribute_buff || $g_attribute_buff_nk)) {
+                                        $group_buff .= "\t\t}\n\n";
+                                        $group_buff =~ s/\n+$/\n/;
+                                }
 
-				$group_buff .= "\n" if ($group_buff);
-			}
+                                $group_buff .= "\n" if ($group_buff);
+                        }
 
-			if ($attribute_buff_nk) {
-				$attribute_buff .= "\t\t# Non-key attributes\n";
-				$attribute_buff .= $attribute_buff_nk;
-			}
+                        if ($attribute_buff_nk) {
+                                $attribute_buff .= "\t\t# Non-key attributes\n";
+                                $attribute_buff .= $attribute_buff_nk;
+                        }
 
-			if ($attribute_buff || $t_lun_buff || $group_buff ) {
-				$target_buff .= " {\n";
+                        if ($attribute_buff || $t_lun_buff || $group_buff ) {
+                                $target_buff .= " {\n";
 
-				$target_buff .= $attribute_buff;
-				$target_buff .= $t_lun_buff;
-				$target_buff .= $group_buff;
+                                $target_buff .= $attribute_buff;
+                                $target_buff .= $t_lun_buff;
+                                $target_buff .= $group_buff;
 
-				$target_buff =~ s/\n\n$/\n/;
-				$target_buff .= "\t}\n\n";
-			} else {
-				$target_buff .= "\n";
-			}
-		}
+                                $target_buff =~ s/\n\n$/\n/;
+                                $target_buff .= "\t}\n\n";
+                        } else {
+                                $target_buff .= "\n";
+                        }
+                }
 
-		if ($drv_attr_buff_nk) {
-			$drv_attr_buff .= "\t# Non-key attributes\n";
-			$drv_attr_buff .= $drv_attr_buff_nk;
-		}
+                if ($drv_attr_buff_nk) {
+                        $drv_attr_buff .= "\t# Non-key attributes\n";
+                        $drv_attr_buff .= $drv_attr_buff_nk;
+                }
 
-		$driver_buff .= $drv_attr_buff;
-		$driver_buff .= $target_buff;
-		$driver_buff =~ s/\n\n$/\n/;
+                $driver_buff .= $drv_attr_buff;
+                $driver_buff .= $target_buff;
+                $driver_buff =~ s/\n\n$/\n/;
 
-		if ($driver_buff) {
-			print $io "TARGET_DRIVER $driver {\n";
+                if ($driver_buff) {
+                        print $io "TARGET_DRIVER $driver {\n";
                         if($driver eq 'iscsi') {
                                 print $io "\tenabled 1\n";
                         } elsif ($driver eq 'copy_manager') {
-                                print $io "\tTARGET copy_manager_tgt\n";
+                                print $io "\tTARGET copy_manager_tgt\n";;
                         } else {
-			print $io $driver_buff;
+                                print $io $driver_buff;
                         }
-			print $io "}\n\n";
-		}
-	}
+                        print $io "}\n\n";
+                }
+        }
 
 	my $dga;
-	($dga, $errorString) = $SCST->aluaAttributes();
-	my $dga_buff = serializeKeyAttr("\t", $dga);
-	my $dga_buff_nk = serializeNkAttr("\t", $dga) if ($nonkey);
-	if ($dga_buff_nk) {
-		$dga_buff .= "\t# Non-key attributes\n";
-		$dga_buff .= $dga_buff_nk;
-	}
-	if ($dga_buff) {
-		print $io "ALUA {\n";
-		print $io $dga_buff;
-		print $io "}\n\n";
-	}
+        ($dga, $errorString) = $SCST->aluaAttributes();
+        my $dga_buff = serializeKeyAttr("\t", $dga);
+        my $dga_buff_nk = serializeNkAttr("\t", $dga) if ($nonkey);
+        if ($dga_buff_nk) {
+                $dga_buff .= "\t# Non-key attributes\n";
+                $dga_buff .= $dga_buff_nk;
+        }
+        if ($dga_buff) {
+                print $io "ALUA {\n";
+                print $io $dga_buff;
+                print $io "}\n\n";
+        }
 
-	foreach my $dgroup (sort keys %{$CURRENT{'dgroups'}}) {
-		my $dgroup_buff = "";
-		my $dgroup_attrs;
-		my $dgrp_attr_buff = "";
-		my $dgrp_attr_buff_nk = "";
+        foreach my $dgroup (sort keys %{$CURRENT{'dgroups'}}) {
+                my $dgroup_buff;
 
-		($dgroup_attrs, $errorString) = $SCST->deviceGroupAttributes($dgroup);
-		$dgrp_attr_buff = serializeKeyAttr("\t", $dgroup_attrs);
-		$dgrp_attr_buff_nk = serializeNkAttr("\t", $dgroup_attrs) if ($nonkey);
-		$dgrp_attr_buff .= "\n" if ($dgrp_attr_buff);
-		$dgrp_attr_buff_nk .= "\n" if ($dgrp_attr_buff_nk);
+                my ($dgroup_attrs, $errorString) = $SCST->deviceGroupAttributes($dgroup);
 
-		my $devices_buff = "";
+                my $dgrp_attr_buff;
+                my $dgrp_attr_buff_nk;
 
-		my $devices = $CURRENT{'dgroups'}->{$dgroup}->{'devices'};
+                $dgrp_attr_buff = serializeKeyAttr("\t", $dgroup_attrs);
+                $dgrp_attr_buff_nk = serializeNkAttr("\t", $dgroup_attrs) if ($nonkey);
+                $dgrp_attr_buff .= "\n" if ($dgrp_attr_buff);
+                $dgrp_attr_buff_nk .= "\n" if ($dgrp_attr_buff_nk);
 
-		foreach my $device (sort @{$devices}) {
-			$devices_buff .= "\tDEVICE $device\n";
-		}
+                my $devices_buff;
 
-		$devices_buff .= "\n" if ($devices_buff);
+                my $devices = $CURRENT{'dgroups'}->{$dgroup}->{'devices'};
 
-		my $tgroups = $CURRENT{'dgroups'}->{$dgroup}->{'tgroups'};
+                foreach my $device (sort @{$devices}) {
+                        $devices_buff .= "\tDEVICE $device\n";
+                }
 
-		my $tgroup_buff = "";
+                $devices_buff .= "\n" if ($devices_buff);
 
-		foreach my $tgroup (sort keys %{$tgroups}) {
-			$tgroup_buff .= "\tTARGET_GROUP $tgroup";
+                my $tgroups = $CURRENT{'dgroups'}->{$dgroup}->{'tgroups'};
 
-			my $attributes;
-			my $attribute_buff = "";
-			my $attribute_buff_nk = "";
+                my $tgroup_buff;
 
-			($attributes, $errorString) = $SCST->targetGroupAttributes($dgroup, $tgroup);
-			$attribute_buff .= serializeKeyAttr("\t\t", $attributes);
-			$attribute_buff_nk .= serializeNkAttr("\t\t", $attributes) if ($nonkey);
-			$attribute_buff .= "\n" if ($attribute_buff);
-			$attribute_buff_nk .= "\n" if ($attribute_buff_nk);
+                foreach my $tgroup (sort keys %{$tgroups}) {
+                        $tgroup_buff .= "\tTARGET_GROUP $tgroup";
 
-			my $tgts = $CURRENT{'dgroups'}->{$dgroup}->{'tgroups'}->{$tgroup}->{'targets'};
+                        my ($attributes, $errorString) = $SCST->targetGroupAttributes($dgroup, $tgroup);
 
-			my $tgt_buff = "";
+                        my $attribute_buff;
+                        my $attribute_buff_nk;
+
+                        $attribute_buff .= serializeKeyAttr("\t\t", $attributes);
+                        $attribute_buff_nk .= serializeNkAttr("\t\t", $attributes) if ($nonkey);
+                        $attribute_buff .= "\n" if ($attribute_buff);
+                        $attribute_buff_nk .= "\n" if ($attribute_buff_nk);
+
+                        my $tgts = $CURRENT{'dgroups'}->{$dgroup}->{'tgroups'}->{$tgroup}->{'targets'};
+
+                        my $tgt_buff;
                         if ($tgroup eq $PEERNAME) {
                                 $tgt_buff .= "\t\tTARGET $PEERWWPN1\n";
                                 $tgt_buff .= "\t\tTARGET $PEERWWPN2\n";
                                 $tgt_buff .= "\t\tTARGET $PEERWWPN3\n";
-                                $tgt_buff .= "\t\tTARGET $PEERWWPN4\n";			
-			}
+                                $tgt_buff .= "\t\tTARGET $PEERWWPN4\n";
+                        }
 
-			if ($attribute_buff_nk) {
-				$attribute_buff .= "\t\t# Non-key attributes\n";
-				$attribute_buff .= $attribute_buff_nk;
-			}
+                        if ($attribute_buff_nk) {
+                                $attribute_buff .= "\t\t# Non-key attributes\n";
+                                $attribute_buff .= $attribute_buff_nk;
+                        }
 
-			if ($attribute_buff || $tgt_buff) {
-				$tgroup_buff .= " {\n";
+                        if ($attribute_buff || $tgt_buff) {
+                                $tgroup_buff .= " {\n";
 
-				$tgroup_buff .= $attribute_buff;
-				$tgroup_buff .= $tgt_buff;
+                                $tgroup_buff .= $attribute_buff;
+                                $tgroup_buff .= $tgt_buff;
 
-				$tgroup_buff =~ s/\n\n$/\n/;
-				$tgroup_buff .= "\t}\n\n";
-			} else {
-				$tgroup_buff .= "\n";
-			}
-		}
+                                $tgroup_buff =~ s/\n\n$/\n/;
+                                $tgroup_buff .= "\t}\n\n";
+                        } else {
+                                $tgroup_buff .= "\n";
+                        }
+                }
 
-		if ($dgrp_attr_buff_nk) {
-			$dgrp_attr_buff .= "\t# Non-key attributes\n";
-			$dgrp_attr_buff .= $dgrp_attr_buff_nk;
-		}
+                if ($dgrp_attr_buff_nk) {
+                        $dgrp_attr_buff .= "\t# Non-key attributes\n";
+                        $dgrp_attr_buff .= $dgrp_attr_buff_nk;
+                }
 
-		$dgroup_buff .= $dgrp_attr_buff;
-		$dgroup_buff .= $devices_buff;
-		$dgroup_buff .= $tgroup_buff;
-		$dgroup_buff =~ s/\n\n$/\n/;
-		if ($dgroup_buff) {
-			print $io "DEVICE_GROUP $dgroup {\n";
-			print $io $dgroup_buff;
-			print $io "}\n\n";
-		}
-	}
+                $dgroup_buff .= $dgrp_attr_buff;
+                $dgroup_buff .= $devices_buff;
+                $dgroup_buff .= $tgroup_buff;
+                $dgroup_buff =~ s/\n\n$/\n/;
+                if ($dgroup_buff) {
+                        print $io "DEVICE_GROUP $dgroup {\n";
+                        print $io $dgroup_buff;
+                        print $io "}\n\n";
+                }
+        }
 
-	$io->flush;
-	$io->sync;
-	close $io;
+        $io->flush;
+        $io->sync;
+        close $io;
 
-	return 0;
+        return 0;
 }
 
 #***********************************************************************************************
@@ -2767,7 +2729,6 @@
 			$attribute_buff =~ s/\n+$/\n/;
 
 			if ($attribute_buff) {
-#                       print $io "HANDLER $temphandler {\n";
 				$device_buff .= " {\n";
 				$device_buff .= $attribute_buff;
 				$device_buff .= "\t}\n\n";
@@ -3334,7 +3295,6 @@
 
 sub applyConfiguration {
 	my $force = shift;
-	my $skipRead = shift;
 	my $changes = 0;
 
 	readConfigFile() if (!$CONFIG);
@@ -3349,7 +3309,7 @@
 	}
 
 	# Apply config additions
-	$changes += applyConfigDevices($CONFIG, $force, $skipRead);
+	$changes += applyConfigDevices($CONFIG, $force);
 	$changes += applyConfigAssignments($CONFIG, $force);
 	$changes += applyConfigAlua($CONFIG, $force);
 	$changes += applyConfigDeviceGroups($CONFIG, $force);
@@ -3378,7 +3338,6 @@
 sub applyConfigDevices {
 	my $config = shift;
 	my $deletions = shift;
-	my $skipRead = shift;
 	my $changes = 0;
 	my $errorString;
 
@@ -3435,10 +3394,8 @@
 							closeDevice($handler, $device, $deletions);
 							openDevice($handler, $device, $create_attrs);
 							$changes += 2;
-							if (!$skipRead){
-								my $rc = readWorkingConfig($deletions);
-								exit $rc if ($rc);
-							}
+							my $rc = readWorkingConfig($deletions);
+							exit $rc if ($rc);
 						} else {
 							print "\t  -> Use -force to re-open device with new attributes. ".
 							  "NOTE: This will disrupt all initiators using this device.\n";
@@ -3475,11 +3432,6 @@
 
 	}
 
-	if ($skipRead && $deletions && $changes) {
-		my $rc = readWorkingConfig($deletions);
-		exit $rc if ($rc);
-	}
-
 	return $changes;
 }
 
@@ -3584,7 +3536,8 @@
 
 				if (!defined($$config{'TARGET_DRIVER'}->{$driver}->{'TARGET'}->{$target}->{'GROUP'}->{$group})) {
 					if ($deletions) {
-						removeGroup($driver, $target, $group);
+						removeGroup($driver, $target,
+							    $group, TRUE);
 						$changes++;
 					} else {
 						print "\t-> Group '$group' is not in configuration. Use -force to remove.\n";

diff --git a/scst/srpt/Makefile b/scst/srpt/Makefile
index 5e398ea..5f2a97a 100644
--- a/scst/srpt/Makefile
+++ b/scst/srpt/Makefile

@@ -182,6 +182,12 @@
 	echo "$(call run_conftest,cm_listen,				\
 		-DIB_CM_LISTEN_TAKES_FOURTH_ARG)" >"$@"
 
+conftest/cm_listen_2/result-$(KVER).txt:				\
+	conftest/cm_listen_2/cm_listen_2.c				\
+	conftest/cm_listen_2/Kbuild
+	echo "$(call run_conftest,cm_listen_2,				\
+		-DIB_CM_LISTEN_TAKES_THIRD_ARG)" >"$@"
+
 conftest/create_cq/result-$(KVER).txt:					\
 	conftest/create_cq/create_cq.c					\
 	conftest/create_cq/Kbuild

diff --git a/scst/srpt/README b/scst/srpt/README
index 334fb5f..b574574 100644
--- a/scst/srpt/README
+++ b/scst/srpt/README

@@ -175,10 +175,8 @@
 Target names
 ------------
 
-The name assigned by the ib_srpt target driver to an SCST target is either
-ib_srpt_target_<n>, the node GUID of a HCA in hexadecimal form with a colon
-after every fourth digit or the port GID with a colon afer every fourth
-digit. The HCA node GUID and the port GIDs can be obtained via the
+The name assigned by the ib_srpt target driver to an SCST target is the port
+GID with a colon afer every fourth digit. The port GIDs can be obtained via the
 ibv_devinfo command. An example:
 
 # ibv_devinfo -v | grep -E '[^a-z]port:|guid|GID'

diff --git a/scst/srpt/conftest/cm_listen_2/Kbuild b/scst/srpt/conftest/cm_listen_2/Kbuild
new file mode 100644
index 0000000..e997664
--- /dev/null
+++ b/scst/srpt/conftest/cm_listen_2/Kbuild

@@ -0,0 +1,3 @@
+LINUXINCLUDE := $(CONFTEST_CFLAGS) $(LINUXINCLUDE)
+
+obj-m += cm_listen_2.o

diff --git a/scst/srpt/conftest/cm_listen_2/cm_listen_2.c b/scst/srpt/conftest/cm_listen_2/cm_listen_2.c
new file mode 100644
index 0000000..927af0b
--- /dev/null
+++ b/scst/srpt/conftest/cm_listen_2/cm_listen_2.c

@@ -0,0 +1,11 @@
+#include <linux/module.h>
+#include <rdma/ib_cm.h>
+
+static int __init modinit(void)
+{
+	return ib_cm_listen(NULL, 0, 0);
+}
+
+module_init(modinit);
+
+MODULE_LICENSE("GPL");

diff --git a/scst/srpt/src/ib_srpt.c b/scst/srpt/src/ib_srpt.c
index 9f60e63..a0298d5 100644
--- a/scst/srpt/src/ib_srpt.c
+++ b/scst/srpt/src/ib_srpt.c

@@ -47,11 +47,7 @@
 #if !defined(INSIDE_KERNEL_TREE)
 #include <linux/version.h>
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
 #include <linux/atomic.h>
-#else
-#include <asm/atomic.h>
-#endif
 #include <rdma/ib_cache.h>
 #include "ib_srpt.h"
 #define LOG_PREFIX "ib_srpt" /* Prefix for SCST tracing macros. */
@@ -63,8 +59,8 @@
 
 /* Name of this kernel module. */
 #define DRV_NAME		"ib_srpt"
-#define DRV_VERSION		"3.5.0" "#" __stringify(OFED_FLAVOR)
-#define DRV_RELDATE		"21 December 2020"
+#define DRV_VERSION		"3.7.0" "#" __stringify(OFED_FLAVOR)
+#define DRV_RELDATE		"26 December 2022"
 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
 /* Flags to be used in SCST debug tracing statements. */
 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
@@ -112,12 +108,7 @@
 MODULE_PARM_DESC(srp_max_rsp_size,
 		 "Maximum size of SRP response messages in bytes.");
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
-	|| defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-static int use_srq;
-#else
 static bool use_srq;
-#endif
 module_param(use_srq, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(use_srq, "Whether or not to use SRQ");
 
@@ -130,27 +121,6 @@
 module_param(srpt_sq_size, int, 0444);
 MODULE_PARM_DESC(srpt_sq_size, "Per-channel send queue (SQ) size.");
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
-	|| defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-static int use_port_guid_in_session_name;
-#else
-static bool use_port_guid_in_session_name;
-#endif
-module_param(use_port_guid_in_session_name, bool, 0444);
-MODULE_PARM_DESC(use_port_guid_in_session_name,
-		 "Use target port ID in the session name such that"
-		 " redundant paths between multiport systems can be masked.");
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
-	|| defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5
-static int use_node_guid_in_target_name;
-#else
-static bool use_node_guid_in_target_name;
-#endif
-module_param(use_node_guid_in_target_name, bool, 0444);
-MODULE_PARM_DESC(use_node_guid_in_target_name,
-		 "Use HCA node GUID as SCST target name.");
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
 static int srpt_get_u64_x(char *buffer, struct kernel_param *kp)
 #else
@@ -164,10 +134,6 @@
 MODULE_PARM_DESC(srpt_service_guid,
 		 "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA.");
 
-static unsigned int max_sge_delta;
-module_param(max_sge_delta, uint, 0444);
-MODULE_PARM_DESC(max_sge_delta, "Number to subtract from max_sge (obsolete).");
-
 /*
  * Note: changing any of the two constants below into SCST_CONTEXT_DIRECT is
  * dangerous because it might cause IB completions to be processed too late
@@ -357,15 +323,10 @@
 
 	switch (event->event) {
 	case IB_EVENT_COMM_EST:
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
 		if (ch->using_rdma_cm)
 			rdma_notify(ch->rdma_cm.cm_id, event->event);
 		else
 			ib_cm_notify(ch->ib_cm.cm_id, event->event);
-#else
-		/* Vanilla 2.6.19 kernel (or before) without OFED. */
-		pr_err("how to perform ib_cm_notify() on a vanilla 2.6.18 kernel ???\n");
-#endif
 		break;
 	case IB_EVENT_QP_LAST_WQE_REACHED:
 		pr_debug("%s-%d, state %s: received Last WQE event.\n",
@@ -1095,7 +1056,6 @@
 
 static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
 	/*
 	 * The pointer computations below will only be compiled correctly
 	 * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
@@ -1103,7 +1063,6 @@
 	 */
 	BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) &&
 		     !__same_type(srp_cmd->add_data[0], (u8)0));
-#endif
 
 	/*
 	 * According to the SRP spec, the lower two bits of the 'ADDITIONAL
@@ -2281,11 +2240,7 @@
 		goto out;
 
 retry:
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && \
-	!defined(RHEL_RELEASE_CODE)
-	ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
-			      ch->rq_size + sq_size);
-#elif !defined(IB_CREATE_CQ_HAS_INIT_ATTR)
+#if !defined(IB_CREATE_CQ_HAS_INIT_ATTR)
 	ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
 			      ch->rq_size + sq_size, ch->comp_vector);
 #else
@@ -2604,14 +2559,8 @@
 
 	WARN_ON_ONCE(irqs_disabled());
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
-	WARN_ON(!sdev || !req);
-	if (!sdev || !req)
-		return -EINVAL;
-#else
 	if (WARN_ON(!sdev || !req))
 		return -EINVAL;
-#endif
 
 	it_iu_len = be32_to_cpu(req->req_it_iu_len);
 
@@ -3887,18 +3836,9 @@
 	srpt_put_send_ioctx(ioctx);
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && !defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
-/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
-static void srpt_refresh_port_work(void *ctx)
-#else
 static void srpt_refresh_port_work(struct work_struct *work)
-#endif
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && !defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
-	struct srpt_port *sport = ctx;
-#else
 	struct srpt_port *sport = container_of(work, struct srpt_port, work);
-#endif
 
 	srpt_refresh_port(sport);
 }
@@ -4013,9 +3953,7 @@
 
 	if (!sport)
 		goto out;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
-	res = cpumask_scnprintf(buf, PAGE_SIZE, sport->comp_v_mask);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
 	res = cpumask_scnprintf(buf, PAGE_SIZE, &sport->comp_v_mask);
 #else
 	res = scnprintf(buf, PAGE_SIZE, "%*pb",
@@ -4046,11 +3984,7 @@
 	res = -ENOMEM;
 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
 		goto out;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
-	res = bitmap_parse(buf, count, cpumask_bits(mask), nr_cpumask_bits);
-#else
 	res = cpumask_parse(buf, mask);
-#endif
 	if (res)
 		goto free_mask;
 	res = -EINVAL;
@@ -4110,7 +4044,6 @@
 	if (!sport)
 		goto out;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37) /* commit a3f5adaf4 */
 	switch (rdma_port_get_link_layer(sport->sdev->device, sport->port)) {
 	case IB_LINK_LAYER_INFINIBAND:
 		lln = "InfiniBand";
@@ -4122,7 +4055,6 @@
 	default:
 		break;
 	}
-#endif
 	res = sprintf(buf, "%s\n", lln);
 
 out:
@@ -4268,7 +4200,6 @@
 	return sprintf(buf, "%s\n", get_ch_state_name(ch->state));
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(RHEL_RELEASE_CODE)
 static ssize_t show_comp_vector(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf)
 {
@@ -4279,7 +4210,6 @@
 	ch = scst_sess_get_tgt_priv(sess);
 	return ch ? sprintf(buf, "%u\n", ch->comp_vector) : -ENOENT;
 }
-#endif
 
 static const struct kobj_attribute srpt_req_lim_attr =
 	__ATTR(req_lim,       S_IRUGO, show_req_lim,       NULL);
@@ -4287,18 +4217,14 @@
 	__ATTR(req_lim_delta, S_IRUGO, show_req_lim_delta, NULL);
 static const struct kobj_attribute srpt_ch_state_attr =
 	__ATTR(ch_state, S_IRUGO, show_ch_state, NULL);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(RHEL_RELEASE_CODE)
 static const struct kobj_attribute srpt_comp_vector_attr =
 	__ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
-#endif
 
 static const struct attribute *srpt_sess_attrs[] = {
 	&srpt_req_lim_attr.attr,
 	&srpt_req_lim_delta_attr.attr,
 	&srpt_ch_state_attr.attr,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(RHEL_RELEASE_CODE)
 	&srpt_comp_vector_attr.attr,
-#endif
 	NULL
 };
 
@@ -4397,9 +4323,7 @@
 	srq_attr.attr.max_wr = sdev->srq_size;
 	srq_attr.attr.max_sge = 1;
 	srq_attr.attr.srq_limit = 0;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
 	srq_attr.srq_type = IB_SRQT_BASIC;
-#endif
 
 	sdev->srq = use_srq ? ib_create_srq(sdev->pd, &srq_attr) :
 		ERR_PTR(-EOPNOTSUPP);
@@ -4448,15 +4372,7 @@
 		sport->sdev = sdev;
 		sport->port = i;
 		srpt_init_sport(sport, sdev->device);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && !defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
-		/*
-		 * A vanilla 2.6.19 or older kernel without backported OFED
-		 * kernel headers.
-		 */
-		INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
-#else
 		INIT_WORK(&sport->work, srpt_refresh_port_work);
-#endif
 		ret = srpt_refresh_port(sport);
 		if (ret) {
 			pr_err("MAD registration failed for %s-%d.\n",
@@ -4487,9 +4403,12 @@
 	 * in the system as service_id; therefore, the target_id will change
 	 * if this HCA is gone bad and replaced by different HCA
 	 */
-	ret = ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0
+	ret = ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid)
 #ifdef IB_CM_LISTEN_TAKES_FOURTH_ARG
+			   , 0
 			   , NULL
+#elif defined(IB_CM_LISTEN_TAKES_THIRD_ARG)
+			   , 0
 #endif
 			   );
 	if (ret) {
@@ -4567,16 +4486,7 @@
 
 	/* Cancel any work queued by the just unregistered IB event handler. */
 	for (i = 0; i < sdev->device->phys_port_cnt; i++)
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
 		cancel_work_sync(&sdev->port[i].work);
-#else
-		/*
-		 * cancel_work_sync() was introduced in kernel 2.6.22. Older
-		 * kernels do not have a facility to cancel scheduled work, so
-		 * wait until the scheduled work finished.
-		 */
-		flush_scheduled_work();
-#endif
 
 	ib_destroy_cm_id(sdev->cm_id);
 
@@ -4667,7 +4577,7 @@
 		goto out;
 	}
 
-	srpt_wq = alloc_workqueue("srpt", WQ_SYSFS | WQ_NON_REENTRANT, 0);
+	srpt_wq = alloc_workqueue("srpt", WQ_SYSFS, 0);
 	if (!srpt_wq) {
 		pr_err("Couldn't allocate the ib_srpt workqueue\n");
 		ret = -ENOMEM;
@@ -4685,11 +4595,7 @@
 	if (rdma_cm_port) {
 		struct sockaddr_in addr;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) && \
-	(!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
-		rdma_cm_id = rdma_create_id(srpt_rdma_cm_handler, NULL,
-					    RDMA_PS_TCP);
-#elif !RDMA_CREATE_ID_TAKES_NET_ARG
+#if !RDMA_CREATE_ID_TAKES_NET_ARG
 		rdma_cm_id = rdma_create_id(srpt_rdma_cm_handler, NULL,
 					    RDMA_PS_TCP, IB_QPT_RC);
 #else

diff --git a/scst/srpt/src/ib_srpt.h b/scst/srpt/src/ib_srpt.h
index a6a1849..a52cc43 100644
--- a/scst/srpt/src/ib_srpt.h
+++ b/scst/srpt/src/ib_srpt.h

@@ -204,12 +204,6 @@
 	RDMA_COMPL_TIMEOUT_S = 80,
 };
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) &&			\
-	!defined(HAVE_IB_EVENT_GID_CHANGE)
-/* See also patch "IB/core: Add GID change event" (commit 761d90ed4). */
-enum { IB_EVENT_GID_CHANGE = 18 };
-#endif
-
 enum srpt_opcode {
 	SRPT_RECV,
 	SRPT_SEND,
@@ -445,9 +439,7 @@
 	struct list_head	list;
 	struct list_head	cmd_wait_list;
 	uint16_t		pkey;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(RHEL_RELEASE_CODE)
 	u16			comp_vector;
-#endif
 	bool			using_rdma_cm;
 	bool			processing_wait_list;
 	struct scst_session	*sess;

diff --git a/scst/usr/fileio/Makefile b/scst/usr/fileio/Makefile
index 54fc649..a5135e3 100644
--- a/scst/usr/fileio/Makefile
+++ b/scst/usr/fileio/Makefile

@@ -18,12 +18,6 @@
         PREFIX=/usr/local
 endif
 
-REVISION ?= $(shell if svn info >/dev/null 2>&1;                        \
-	then svn info | sed -n 's/^Revision:[[:blank:]]*/r/p';\
-	else git log | grep -c ^commit;                      \
-	fi)
-VERSION = $(shell echo -n "$$(sed -n 's/^\#define[[:blank:]]VERSION_STR[[:blank:]]*\"\([^-]*\).*\"/\1/p' ../include/version.h).$(REVISION)")
-
 SHELL=/bin/bash
 
 SRCS_F = fileio.c common.c debug.c crc32.c
@@ -38,12 +32,8 @@
 DEBUG_INC_DIR := ../include
 INSTALL_DIR := $(DESTDIR)$(PREFIX)/bin/scst
 
-RPM_INC_DIR = ./scst/include
-RPM_INC_DIR_EXTRA = ./include
-RPM_ARC_DIR_EXTRA = ../include
-
 CFLAGS += -O2 -Wall -Wextra -Wno-unused-parameter -Wstrict-prototypes \
-	-I$(SCST_INC_DIR) -I$(DEBUG_INC_DIR) -I$(RPM_INC_DIR) -I$(RPM_INC_DIR_EXTRA) -D_GNU_SOURCE -D__USE_FILE_OFFSET64 \
+	-I$(SCST_INC_DIR) -I$(DEBUG_INC_DIR) -D_GNU_SOURCE -D__USE_FILE_OFFSET64 \
 	-D__USE_LARGEFILE64
 PROGS = fileio_tgt
 LIBS = -lpthread
@@ -105,8 +95,6 @@
 
 extraclean: clean
 	rm -f *.orig *.rej
-	rm -f *.orig *.rej *.tar.bz2
-	@find . -type d -name rpmbuilddir | xargs rm -rf
 
 2release:
 	-$(MAKE) clean
@@ -118,25 +106,6 @@
 	-$(MAKE) clean
 
 release-archive:
-	../../scripts/generate-release-archive fileio_tgt $(VERSION)
-
-rpm-archive:
-	../../scripts/generate-release-archive fileio_tgt $(VERSION) \
-	"$$(../../scripts/list-source-files)" $(SCST_INC_DIR)/* $(RPM_ARC_DIR_EXTRA)
-
-rpm:
-	name=fileio_tgt &&                                              \
-	rpmtopdir="$$(if [ $$(id -u) = 0 ]; then echo /usr/src/packages;\
-		else echo $$PWD/rpmbuilddir; fi)" &&                    \
-	$(MAKE) rpm-archive &&                                          \
-	for d in BUILD RPMS SOURCES SPECS SRPMS; do                     \
-		mkdir -p $${rpmtopdir}/$$d;                                   \
-	done &&                                                         \
-	cp $${name}-$(VERSION).tar.bz2 $${rpmtopdir}/SOURCES &&         \
-	MAKE="$(MAKE)"                                                  \
-	rpmbuild --define="%_topdir $${rpmtopdir}"                      \
-		--define="%rpm_version $(VERSION)"                      \
-		-ba $${name}.spec &&                                    \
-	rm -f $${name}-$(VERSION).tar.bz2
+	../../scripts/generate-release-archive fileio_tgt "$$(sed -n 's/^#define[[:blank:]]VERSION_STR[[:blank:]]*\"\([^\"]*\)\".*/\1/p' ../include/version.h)"
 
 .PHONY: all install uninstall clean extraclean 2release 2debug 2perf

diff --git a/scst/usr/fileio/README b/scst/usr/fileio/README
index bacdd13..fa18414 100644
--- a/scst/usr/fileio/README
+++ b/scst/usr/fileio/README

@@ -1,7 +1,7 @@
 User space FILEIO handler
 =========================
 
-Version 3.5.0, 21 December 2020
+Version 3.7.0, 26 December 2022
 ----------------------------
 
 User space program fileio_tgt uses interface of SCST's scst_user dev

diff --git a/scst/usr/fileio/common.c b/scst/usr/fileio/common.c
index b2d242f..48d031c 100644
--- a/scst/usr/fileio/common.c
+++ b/scst/usr/fileio/common.c

@@ -792,7 +792,7 @@
 		struct scst_user_reply_cmd replies[MULTI_CMDS_CNT];
 		struct scst_user_get_multi multi_cmd;
 		struct scst_user_get_cmd cmds[MULTI_CMDS_CNT];
-	} multi;
+	} multi = {};
 
 	TRACE_ENTRY();
 

diff --git a/scst/usr/fileio/common.h b/scst/usr/fileio/common.h
index 6b0efdb..7c8df98 100644
--- a/scst/usr/fileio/common.h
+++ b/scst/usr/fileio/common.h

@@ -30,7 +30,7 @@
 /* 8 byte ASCII Vendor */
 #define VENDOR				"ACTIFIO"
 /* 4 byte ASCII Product Revision Level - left aligned */
-#define FIO_REV				"350 "
+#define FIO_REV				"370 "
 
 #define MAX_USN_LEN			(20+1) /* For '\0' */
 

diff --git a/scst/usr/fileio/fileio_tgt.spec b/scst/usr/fileio/fileio_tgt.spec
deleted file mode 100644
index 0cdabfc..0000000
--- a/scst/usr/fileio/fileio_tgt.spec
+++ /dev/null

@@ -1,53 +0,0 @@
-%define make %{expand:%%(echo ${MAKE:-make})}
-%define name fileio_tgt
-
-Name:          %{name}
-Version:       %{rpm_version}
-Release:       1
-Summary:       SCST file IO user-mode backend
-Group:         System/User
-License:       GPLv2
-URL:           http://scst.sourceforge.net
-Source:                %{name}-%{rpm_version}.tar.bz2
-BuildRoot:     %{_tpmpath}/%{name}-%{rpm_version}-%{name}-build
-
-%description
-User space program fileio_tgt uses interface of SCST's scst_user dev
-handler and allows to see how it works in various modes. Fileio_tgt
-provides mostly the same functionality as the kernel space SCST's
-scst_vdisk handler with the only exceptions that it supports O_DIRECT
-mode and doesn't support BLOCKIO one. O_DIRECT mode is basically the
-same as BLOCKIO, but also supports files, so for some loads it could be
-significantly faster, than the regular FILEIO access. All the words
-about BLOCKIO mode from SCST's README file apply to O_DIRECT mode as
-well.
-
-%prep
-%setup -q
-
-
-%build
-%{make} DESTDIR=%{buildroot}
-
-
-%install
-%{make} install INSTALL_DIR=%{buildroot}/opt/act/bin
-
-
-%clean
-rm -rf %{buildroot}
-
-
-%pre
-rm -rf %{buildroot}/opt/act/bin/*
-
-
-%files
-%defattr(-,root,root,-)
-%dir /opt/act/bin
-/opt/act/bin/fileio_tgt
-
-
-%changelog
-* Mon Apr 20 2020 Jim McCarthy <jim.mccarthy@actifio.com>
-* - Initial spec file.

diff --git a/scst/usr/include/version.h b/scst/usr/include/version.h
index df6d06f..5eca6d7 100644
--- a/scst/usr/include/version.h
+++ b/scst/usr/include/version.h

@@ -19,6 +19,6 @@
 #ifndef __VERSION_H
 #define __VERSION_H
 
-#define VERSION_STR "3.5.0"
+#define VERSION_STR "3.7.0"
 
 #endif /* __VERSION_H */

diff --git a/scst/www/Makefile b/scst/www/Makefile
new file mode 100644
index 0000000..069466a
--- /dev/null
+++ b/scst/www/Makefile

@@ -0,0 +1,2 @@
+check:
+	find -name '*.html' | while read f; do tidy -e < "$$f" | sed "s|^-e|$$f|"; done

diff --git a/scst/www/comparison.html b/scst/www/comparison.html
index 4b3374e..4bb50d8 100644
--- a/scst/www/comparison.html
+++ b/scst/www/comparison.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -55,7 +55,7 @@
 
 				<p><small>As on June 2011, briefly reviewed April 2013.</small></p>
 
-<table bgcolor="#F0F0F0" border="1" cellspacing="1" cellpadding="7" style="text-align:center" width="620">
+<table bgcolor="#F0F0F0" border="1" cellspacing="1" cellpadding="7" style="text-align:center" width="620" summary="">
 
 <tr>
 <td>
@@ -269,9 +269,6 @@
 									(not completed) </td>	<td> - </td>		<td> - </td>		<td> - </td>
 </tr>
 <tr>
-<td align="left"><b>IBM pSeries Virtual SCSI</b></td>			<td> + </td>		<td> + </td>		<td> - </td>		<td> Preliminary </td>
-</tr>
-<tr>
 <td align="left"><b>Local access to emulated backstorage devices
 <sup><A HREF="#6">6</A></sup></b></td>					<td> scst_local </td>	<td> - </td>		<td> - </td>		<td> tcm_loop </td>
 </tr>
@@ -546,7 +543,7 @@
 <!-- wrap ends here -->
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020 <b><font color="#EC981F">Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021 <b><font color="#EC981F">Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	   Design by: <b><font color="#EC981F">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/contributing.html b/scst/www/contributing.html
index f784d44..df6f092 100644
--- a/scst/www/contributing.html
+++ b/scst/www/contributing.html

@@ -11,14 +11,15 @@
 <body>
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
+
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li id="current"><a href="contributing.html">Contributing</a></li>
@@ -26,6 +27,7 @@
 			<li><a href="users.html">Users</a></li>
 		</ul>
 	</div>
+
 	<div id="content-wrap">
 	  		<div id="main">
 				<h1>Contributing to SCST</h1>
@@ -183,7 +185,7 @@
 <!-- wrap ends here -->
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020 <b><font color="#EC981F">Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021 <b><font color="#EC981F">Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	   Design by: <b><font color="#EC981F">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/downloads.html b/scst/www/downloads.html
index f48d8c5..bd5b724 100644
--- a/scst/www/downloads.html
+++ b/scst/www/downloads.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li id="current"><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -35,10 +35,9 @@
 			<div id="main">
 				<h1>SCST Downloads</h1>
 
-				<p>The latest stable version of SCST is 3.3.
-				  The latest updates for that version are
-				  available on the 3.3.x branch in the SVN
-				  repository.</p>
+				<p>The latest stable version of SCST is 3.7.
+				  Updates for that version are available on
+				  the 3.7.x branch in the Git repository.</p>
 
 				<p>Debian packages can be built by running
 				"make dpkg". RPMs can be built by running
@@ -48,30 +47,30 @@
 
 				<p>There is also a well done user space port, which you can find <a href="https://github.com/DavidButterfield/SCST-Usermode-Adaptation">here</a>.</p>
 
-				<p>The latest development version of SCST is 3.4. You can download it as well as target drivers and user space
-				utilities directly from the SCST SVN. You can access it using either
-				<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/trunk">web-based SVN repository viewer</a> or using anonymous access:</p>
+				<p>The latest development version of SCST is
+				3.8. That version including target drivers and
+				user space utilities can be downloaded
+				directly from the SCST Git repository.</p>
+				<p>You can access it using either
+				<a href="https://github.com/SCST-project/scst">GitHub repository viewer</a> or using 'git clone':</p>
+				<p><code>git clone https://github.com/SCST-project/scst.git</code></p>
 
+				<p>Also you can find in the SCST Git reposity the latest updates for the stable branches.</p>
+
+				<p>History of the pre-Git SCST development is available in SCST SVN repository, which is accessible using
+				<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/trunk">web-based SVN repository viewer</a>, or using anonymous access:</p>
 				<p><code>svn checkout svn://svn.code.sf.net/p/scst/svn/trunk scst-trunk</code></p>
 
-				<p>Also you can find in the SCST SVN the latest updates for the stable branches. More information about accessing SVN repository may be found
-				<a href="https://sourceforge.net/p/forge/documentation/svn/">here</a>. Or, alternatively, you can download it as a GNU tarball from
-				<a href="http://sourceforge.net/p/scst/svn/HEAD/tarball?path=/branches/3.3.x">here</a>.</p>
-
-				<p>History of the pre-SVN SCST development is available in SCST CVS repository, which is accessible using
-				<a href="http://scst.cvs.sourceforge.net">web-based CVS repository viewer</a>, or anonymous CVS access.</p>
-
 				<p class="post-footer align-right">
-					<a href="http://sourceforge.net/project/showfiles.php?group_id=110471" class="readmore">Download released versions</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
-					<a href="https://github.com/bvanassche/scst" class="readmore">Official git mirror</a>
+					<a href="https://github.com/SCST-project/scst/releases" class="readmore">Download released versions</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 				</p>
 			</div>
 	</div>
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/handler_fileio_tgt.html b/scst/www/handler_fileio_tgt.html
index 6e11239..758e817 100644
--- a/scst/www/handler_fileio_tgt.html
+++ b/scst/www/handler_fileio_tgt.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -66,18 +62,20 @@
 				All the words about BLOCKIO mode from SCST's README file apply to
 				O_DIRECT mode as well.</p>
 
-				<p>You can find the latest development version of this handler in the SCST SVN. See the download page how to setup
+				<p>You can find the latest development version
+				of this handler in the SCST Git
+				repository. See the download page how to setup
 				access to it.</p>
 				<p class="post-footer align-right">
 					<a href="downloads.html" class="readmore">Download</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 				</p>
 	  		</div>
 	</div>
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020 <b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021 <b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/images/sourceforge_badges/oss-community-choice-white.svg b/scst/www/images/sourceforge_badges/oss-community-choice-white.svg
new file mode 100644
index 0000000..76d73fd
--- /dev/null
+++ b/scst/www/images/sourceforge_badges/oss-community-choice-white.svg

@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1500 1500"><defs><style>.cls-1{fill:#898989;}.cls-2{fill:#eae9ee;stroke:#c7c2bd;}.cls-2,.cls-3,.cls-5{stroke-miterlimit:10;}.cls-2,.cls-5{stroke-width:5.1px;}.cls-3{fill:#fff;stroke-width:25.5px;}.cls-3,.cls-5{stroke:#898989;}.cls-4{fill:#3f3f3f;}.cls-5{fill:url(#linear-gradient);}.cls-6{fill:#ff6700;}</style><linearGradient id="linear-gradient" x1="78.74" y1="1183.39" x2="1421.26" y2="1183.39" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#eaeaec"/><stop offset="1" stop-color="#fffefe"/></linearGradient></defs><polygon class="cls-1" points="1421.26 1032.79 78.74 1032.79 200.73 965.7 1299.27 965.7 1421.26 1032.79"/><circle class="cls-2" cx="750" cy="750" r="655.64"/><circle class="cls-3" cx="750" cy="750" r="575.63"/><path class="cls-4" d="M271.67,758.71a48.62,48.62,0,0,1,19-19.22,55.08,55.08,0,0,1,27.61-6.88q18.92,0,32.38,10t18,27.23H340.21a23,23,0,0,0-9-10.14A25.28,25.28,0,0,0,318,756.21q-12,0-19.37,8.32t-7.41,22.24q0,13.93,7.41,22.25T318,817.34a25.28,25.28,0,0,0,13.23-3.48,23,23,0,0,0,9-10.14h28.44q-4.53,17.25-18,27.16t-32.38,9.91a55.08,55.08,0,0,1-27.61-6.88,48.75,48.75,0,0,1-19-19.14q-6.81-12.25-6.81-28A57.08,57.08,0,0,1,271.67,758.71Z"/><path class="cls-4" d="M407.31,834.13a52,52,0,0,1-19.82-19.44,57.19,57.19,0,0,1,0-56.06,52.18,52.18,0,0,1,19.82-19.36,57.65,57.65,0,0,1,54.92,0,51.1,51.1,0,0,1,19.67,19.36,58,58,0,0,1-.08,56.06,51.64,51.64,0,0,1-19.67,19.44,57.49,57.49,0,0,1-54.84,0ZM455.12,809q7.63-8.47,7.64-22.4t-7.64-22.46q-7.65-8.4-20.35-8.4-12.87,0-20.5,8.32t-7.64,22.54q0,14.07,7.64,22.47t20.5,8.4Q447.48,817.49,455.12,809Z"/><path class="cls-4" d="M623.74,733.82V840H597.87v-63.7L574.11,840H553.23l-23.9-63.85V840H503.46V733.82H534l29.81,73.53,29.5-73.53Z"/><path class="cls-4" d="M762.64,733.82V840H736.76v-63.7L713,840H692.13l-23.91-63.85V840H642.35V733.82h30.56l29.81,73.53,29.5-73.53Z"/><path class="cls-4" d="M806.66,733.82v63.55q0,9.52,4.69,14.67t13.77,5.15q9.09,0,13.92-5.15t4.84-14.67V733.82h25.88v63.39q0,14.24-6.06,24.06a38.7,38.7,0,0,1-16.26,14.83,51.14,51.14,0,0,1-22.77,5,49.88,49.88,0,0,1-22.47-4.92,36.54,36.54,0,0,1-15.66-14.82q-5.74-9.92-5.75-24.14V733.82Z"/><path class="cls-4" d="M982.93,840H957.05l-43.27-65.51V840H887.91V733.82h25.87l43.27,65.82V733.82h25.88Z"/><path class="cls-4" d="M1027.56,733.82V840h-25.87V733.82Z"/><path class="cls-4" d="M1122.73,733.82v20.73h-28.14V840h-25.88V754.55h-28.14V733.82Z"/><path class="cls-4" d="M1226.82,733.82l-36.77,71.11V840h-25.87v-35.1l-36.77-71.11h29.36l20.57,44.48,20.43-44.48Z"/><path class="cls-4" d="M462.38,903.21a48.69,48.69,0,0,1,19-19.22A55.11,55.11,0,0,1,509,877.11q18.91,0,32.38,10t18,27.24H530.92a23,23,0,0,0-9-10.14,25.34,25.34,0,0,0-13.24-3.48q-12,0-19.37,8.32t-7.41,22.24q0,13.93,7.41,22.25t19.37,8.32a25.34,25.34,0,0,0,13.24-3.48,23,23,0,0,0,9-10.14h28.45q-4.54,17.25-18,27.16T509,985.29a55.21,55.21,0,0,1-27.61-6.88,48.77,48.77,0,0,1-19-19.15q-6.81-12.24-6.81-28A57.08,57.08,0,0,1,462.38,903.21Z"/><path class="cls-4" d="M667.24,878.32V984.53H641.37V940.81H601.12v43.72H575.25V878.32h25.87v41.61h40.25V878.32Z"/><path class="cls-4" d="M708.62,978.63a52,52,0,0,1-19.82-19.44,57.19,57.19,0,0,1,0-56.06,52.18,52.18,0,0,1,19.82-19.36,57.65,57.65,0,0,1,54.92,0,51.1,51.1,0,0,1,19.67,19.36,58,58,0,0,1-.07,56.06,51.71,51.71,0,0,1-19.67,19.44,57.51,57.51,0,0,1-54.85,0Zm47.81-25.11q7.64-8.47,7.64-22.4t-7.64-22.46q-7.63-8.4-20.35-8.4t-20.5,8.32q-7.65,8.32-7.64,22.54t7.64,22.47q7.63,8.4,20.5,8.4Q748.78,962,756.43,953.52Z"/><path class="cls-4" d="M830.64,878.32V984.53H804.77V878.32Z"/><path class="cls-4" d="M851.82,903.21a48.69,48.69,0,0,1,19-19.22,55.13,55.13,0,0,1,27.61-6.88q18.92,0,32.38,10t18,27.24H920.36a22.94,22.94,0,0,0-9-10.14,25.31,25.31,0,0,0-13.24-3.48q-12,0-19.36,8.32t-7.42,22.24q0,13.93,7.42,22.25t19.36,8.32a25.31,25.31,0,0,0,13.24-3.48,22.94,22.94,0,0,0,9-10.14h28.45q-4.54,17.25-18,27.16t-32.38,9.91a55.24,55.24,0,0,1-27.61-6.88,48.77,48.77,0,0,1-19-19.15q-6.8-12.24-6.8-28A57.19,57.19,0,0,1,851.82,903.21Z"/><path class="cls-4" d="M990.57,899.05v21.48h34.64v20H990.57v23.3h39.18v20.73H964.69V878.32h65.06v20.73Z"/><path class="cls-5" d="M1421.26,1032.78H78.74l33.72,166.56A168,168,0,0,0,277.13,1334h945.74a168,168,0,0,0,164.67-134.66Z"/><path class="cls-6" d="M345.4,1186.65c0-30-10.59-43.72-16.2-48.91a1.6,1.6,0,0,0-2.65,1.42c1.09,17-20.11,21.22-20.11,47.8v.16c0,16.19,12.16,29.4,27.12,29.4s27.11-13.21,27.11-29.4V1187c0-7.55-2.8-14.78-5.61-20.13-.62-1.1-2.18-.63-2,.32C358.18,1190.11,345.4,1204.26,345.4,1186.65Z"/><path class="cls-6" d="M313.14,1250a2.89,2.89,0,0,1-1.87-.79l-69.34-70a2.88,2.88,0,0,1,0-3.78l73.24-73.91a3.29,3.29,0,0,1,1.71-.63h21a2.46,2.46,0,0,1,2.34,1.58,2.58,2.58,0,0,1-.62,2.83l-68.73,69.5a3.54,3.54,0,0,0,0,5l54.39,55a2.91,2.91,0,0,1,0,3.78l-10.44,10.69a3.3,3.3,0,0,1-1.72.63ZM327,1262.29a2.45,2.45,0,0,1-2.33-1.58,2.54,2.54,0,0,1,.62-2.83l68.88-69.5a3.76,3.76,0,0,0,1.09-2.52,3.15,3.15,0,0,0-1.09-2.52l-54.54-55a2.87,2.87,0,0,1,0-3.77l10.59-10.69a2.58,2.58,0,0,1,1.87-.79,2.39,2.39,0,0,1,1.72.94L423,1184a2.68,2.68,0,0,1,0,3.78l-73.24,73.91a2.57,2.57,0,0,1-1.87.78H327Z"/><path class="cls-4" d="M499.22,1184.44a28,28,0,0,0-6.71-3.93,61,61,0,0,0-7.48-2.83c-2.65-.78-5.14-1.73-7.48-2.51a33.61,33.61,0,0,1-6.23-3.15,14.61,14.61,0,0,1-4.36-4.4,11.08,11.08,0,0,1-1.56-6.29,14.06,14.06,0,0,1,1.09-5.66,16,16,0,0,1,3.27-4.72,15,15,0,0,1,5.3-3.3,21.86,21.86,0,0,1,7.48-1.26,22.18,22.18,0,0,1,7.17,1.1,35.23,35.23,0,0,1,4.83,2.36,20.46,20.46,0,0,1,3.12,2.2c2.18,2,3.89,1.57,4.83-.16l1.71-3.3-.46-.47a31.13,31.13,0,0,0-9.51-6.45,30.54,30.54,0,0,0-11.69-2,26.79,26.79,0,0,0-10.28,1.88,23.58,23.58,0,0,0-7.64,4.88,21.33,21.33,0,0,0-4.68,7.08,21.94,21.94,0,0,0-1.55,8.33,20.17,20.17,0,0,0,1.87,9.12,18.5,18.5,0,0,0,4.83,6.13,27.56,27.56,0,0,0,6.7,3.93c2.34,1,4.83,1.89,7.48,2.68s5.14,1.73,7.48,2.51a25.49,25.49,0,0,1,6.23,3,14.79,14.79,0,0,1,4.37,4.41,12.18,12.18,0,0,1,1.55,6.6,17.94,17.94,0,0,1-1.24,7.08,15.64,15.64,0,0,1-3.74,5.66,19.94,19.94,0,0,1-6.08,3.77,22.05,22.05,0,0,1-8.26,1.42,23.42,23.42,0,0,1-6.08-.63,21.17,21.17,0,0,1-4.67-1.57,19.17,19.17,0,0,1-3.59-2.05c-.93-.79-1.87-1.57-2.65-2.2s-1.55-1.26-2-1.73a2.69,2.69,0,0,0-1.87-.94,2.86,2.86,0,0,0-2.34,1.25l-2,3.15.47.47a34.61,34.61,0,0,0,10.44,8c3.9,2.05,8.73,3,14.18,3a30.9,30.9,0,0,0,10.91-1.89,26,26,0,0,0,8.42-5.5,23.81,23.81,0,0,0,5.3-8.18,27.56,27.56,0,0,0,1.87-10.06,18.94,18.94,0,0,0-1.87-8.81A18.29,18.29,0,0,0,499.22,1184.44Z"/><path class="cls-4" d="M584.46,1151A35,35,0,0,0,572,1142.3a43.52,43.52,0,0,0-16-3,40.3,40.3,0,0,0-15.89,3.15,34.79,34.79,0,0,0-12.47,8.65,40.43,40.43,0,0,0-8.11,13.52,48.41,48.41,0,0,0-3,17.46,51.83,51.83,0,0,0,3,17.45,38.9,38.9,0,0,0,8.11,13.52,34.79,34.79,0,0,0,12.47,8.65,42.66,42.66,0,0,0,15.89,3,38.53,38.53,0,0,0,16-3.14,35.13,35.13,0,0,0,12.47-8.65,40.56,40.56,0,0,0,8.1-13.53,55.62,55.62,0,0,0,0-34.91A40.41,40.41,0,0,0,584.46,1151ZM586,1197.5a32.93,32.93,0,0,1-6.55,11.48,26.66,26.66,0,0,1-10.28,7.23,32.69,32.69,0,0,1-13.25,2.52,33.22,33.22,0,0,1-13.25-2.52,28.47,28.47,0,0,1-10.28-7.23,31,31,0,0,1-6.7-11.48,47.67,47.67,0,0,1-2.34-15.41,47.09,47.09,0,0,1,2.34-15.42,33.06,33.06,0,0,1,6.7-11.47,28.37,28.37,0,0,1,10.28-7.24,36.23,36.23,0,0,1,26.5,0,28.37,28.37,0,0,1,10.28,7.24,32.79,32.79,0,0,1,6.55,11.47,47.73,47.73,0,0,1,2.34,15.42A47,47,0,0,1,586,1197.5Z"/><path class="cls-4" d="M664.41,1192a31.49,31.49,0,0,1-1.56,10.38,24.72,24.72,0,0,1-4.68,8.18,20.18,20.18,0,0,1-7.48,5.5,24.49,24.49,0,0,1-10,1.89,25.27,25.27,0,0,1-10-2,22.23,22.23,0,0,1-7.48-5.51,23.35,23.35,0,0,1-4.67-8.18,37.75,37.75,0,0,1-1.56-10.37v-51.43h-7.79v51.43a38.61,38.61,0,0,0,2.18,12.89,32.43,32.43,0,0,0,6.23,10.54,28.93,28.93,0,0,0,10,7.07,35.26,35.26,0,0,0,26.18,0,29,29,0,0,0,10-7.07,32.64,32.64,0,0,0,6.24-10.54,38.61,38.61,0,0,0,2.18-12.89v-51.43h-7.79Z"/><path class="cls-4" d="M717.7,1186.17a35.71,35.71,0,0,0,7.64-2.2,28.6,28.6,0,0,0,7.48-5,21.9,21.9,0,0,0,4.83-7.08,23.53,23.53,0,0,0,1.71-9c0-7.39-2.49-13.05-7.32-16.83s-12-5.66-21.51-5.66H688.87v83.35h7.79v-76.9h13.87c7,0,12.31,1.42,15.9,4.09s5.3,6.6,5.3,12.11a18.21,18.21,0,0,1-1.4,7.23,14.58,14.58,0,0,1-4.21,5.51,19,19,0,0,1-6.86,3.61,34.32,34.32,0,0,1-9.35,1.26h-5c-2.49,0-2.8.79-2.8,2.2V1187h.93a52.69,52.69,0,0,1,6.39.47,5.33,5.33,0,0,1,1.72,1.57L737,1222a4.31,4.31,0,0,0,1.55,1.41,4.25,4.25,0,0,0,2,.48h7l-29-36.64A6.81,6.81,0,0,0,717.7,1186.17Z"/><path class="cls-4" d="M812.76,1208.35a2,2,0,0,0-.78.47,48.54,48.54,0,0,1-4.67,3.77,39.3,39.3,0,0,1-5.14,2.68,25.35,25.35,0,0,1-6.08,1.73,37.63,37.63,0,0,1-7.64.63,32.05,32.05,0,0,1-12.46-2.36,28.08,28.08,0,0,1-10-6.92,31.54,31.54,0,0,1-6.54-11.17,45,45,0,0,1-2.34-15.09,44.52,44.52,0,0,1,2.49-14.79,30.06,30.06,0,0,1,17-18.24,33.15,33.15,0,0,1,13.25-2.51,44,44,0,0,1,7,.63,24.88,24.88,0,0,1,5.45,1.41,33.37,33.37,0,0,1,4,1.89,16.87,16.87,0,0,1,3,1.88,12.32,12.32,0,0,1,2,1.42,3.72,3.72,0,0,0,1.87.79,2.44,2.44,0,0,0,2.18-1.1l2.19-3.31-.63-.63a45.84,45.84,0,0,0-5.61-4.4,27.33,27.33,0,0,0-6.08-3.14,41.57,41.57,0,0,0-15.27-2.52,43.07,43.07,0,0,0-16.52,3.14,38.28,38.28,0,0,0-12.78,8.81,43.94,43.94,0,0,0-8.26,13.53,47.45,47.45,0,0,0-3,17.29,51.9,51.9,0,0,0,2.81,17.46,38.68,38.68,0,0,0,8,13.52,34.19,34.19,0,0,0,12.31,8.65,41.83,41.83,0,0,0,15.74,3,33,33,0,0,0,9.19-1.1,29.48,29.48,0,0,0,8-2.36,31.13,31.13,0,0,0,6.7-3.77,42.74,42.74,0,0,0,5.61-5l.62-.63-3-3.14A2.14,2.14,0,0,0,812.76,1208.35Zm-.46-55.4a2.9,2.9,0,0,1-.22-.19A1.86,1.86,0,0,0,812.3,1153Zm-2.34-1.84h0c.39.31.78.59,1.13.84C810.74,1151.7,810.35,1151.42,810,1151.11Z"/><polygon class="cls-4" points="831.15 1223.92 881.49 1223.92 881.49 1217 838.95 1217 838.95 1184.92 874.32 1184.92 874.32 1178.15 838.95 1178.15 838.95 1147.17 881.49 1147.17 881.49 1140.41 831.15 1140.41 831.15 1223.92"/><path class="cls-4" d="M1165.58,1187.12a2.8,2.8,0,0,0,.94,2,3.81,3.81,0,0,0,2.34.79h10.28v18.87a41.15,41.15,0,0,1-7.32,2.67,38,38,0,0,1-9,1,29,29,0,0,1-11.22-2.21,25.62,25.62,0,0,1-8.57-6,27.91,27.91,0,0,1-5.61-9.44,36.5,36.5,0,0,1-2-12.89,37.16,37.16,0,0,1,1.87-12.11,25.77,25.77,0,0,1,5.3-9.28,22.89,22.89,0,0,1,8.42-6,27.67,27.67,0,0,1,11.06-2,42.52,42.52,0,0,1,6.7.47,42.05,42.05,0,0,1,5.14,1.26,17,17,0,0,1,4.21,1.89c1.25.78,2.49,1.41,3.74,2.2a5,5,0,0,0,2.81.94,4.09,4.09,0,0,0,3.43-2.2l4.51-7.07a41.34,41.34,0,0,0-12.46-7.87c-4.83-2-10.76-3-17.46-3a49.09,49.09,0,0,0-17.76,3.14,38.17,38.17,0,0,0-13.56,8.81,39,39,0,0,0-8.57,13.52,47.93,47.93,0,0,0-3,17.3,47.31,47.31,0,0,0,3.11,17.14,42,42,0,0,0,8.73,13.52,36.37,36.37,0,0,0,13.09,8.81,44.46,44.46,0,0,0,16.68,3.14,88,88,0,0,0,9.5-.47,44.59,44.59,0,0,0,8.26-1.57,51.4,51.4,0,0,0,7.33-2.83,34.6,34.6,0,0,0,6.54-3.93v-37.43h-27.43Z"/><path class="cls-4" d="M1091.87,1188.22a25.45,25.45,0,0,0,7.79-3.62,23.7,23.7,0,0,0,5.77-5.34,21.47,21.47,0,0,0,3.58-6.77,26.93,26.93,0,0,0,1.25-8,24.67,24.67,0,0,0-1.87-9.75,18.76,18.76,0,0,0-5.61-7.54,26.12,26.12,0,0,0-9.82-4.88,55.53,55.53,0,0,0-14.49-1.73h-25.25v83.35h15.43v-71.4h10c5.61,0,9.81,1.26,12.62,3.46s4.21,5.51,4.21,9.75a17.6,17.6,0,0,1-1.09,5.82,11.46,11.46,0,0,1-3.12,4.56,15.68,15.68,0,0,1-5.3,3,22.52,22.52,0,0,1-7.64,1.1h-3.58s-1.71,0-1.87,1.41v10.07h5.45c.94.16,1.87,1.57,2.34,2.2l17.3,26.73a5.6,5.6,0,0,0,2.34,2.36,7.36,7.36,0,0,0,3.43.79h13.87l-20.73-30.82A15.69,15.69,0,0,0,1091.87,1188.22Z"/><polygon class="cls-4" points="900.97 1223.92 916.4 1223.92 916.4 1189.95 947.25 1189.95 947.25 1177.53 916.4 1177.53 916.4 1152.84 953.02 1152.84 953.02 1140.57 900.97 1140.57 900.97 1223.92"/><path class="cls-4" d="M1258.78,1152.84v-12.43h-52v83.35h52v-12.43h-36.47v-21.07h25.09c3.43,0,3.58-2.67,3.58-2.67v-9.12h-28.83v-25.63Z"/><path class="cls-4" d="M1031.09,1151.74a41.11,41.11,0,0,0-13.24-9,47.26,47.26,0,0,0-34.29-.16,41.27,41.27,0,0,0-13.24,9,39,39,0,0,0-8.57,13.52,50.21,50.21,0,0,0,0,34,39.26,39.26,0,0,0,21.81,22.49,46.37,46.37,0,0,0,17.15,3.14,43.29,43.29,0,0,0,17.14-3.14,39.32,39.32,0,0,0,21.82-22.49,46.4,46.4,0,0,0,3-17,48.18,48.18,0,0,0-3-16.83A41.51,41.51,0,0,0,1031.09,1151.74Zm-6.07,42.61a27.92,27.92,0,0,1-5.15,9.28,23.8,23.8,0,0,1-8.26,5.82,30.17,30.17,0,0,1-21.81,0,23.89,23.89,0,0,1-8.26-5.82,25.77,25.77,0,0,1-5.3-9.28,41.16,41.16,0,0,1,0-24.53,25.77,25.77,0,0,1,5.3-9.28,24,24,0,0,1,8.26-5.82,30.17,30.17,0,0,1,21.81,0,22.6,22.6,0,0,1,8.26,5.82,27.92,27.92,0,0,1,5.15,9.28,41.16,41.16,0,0,1,0,24.53Z"/><path class="cls-6" d="M846.36,578.22c-1.85,11-3.72,22-5.54,33-.91,5.46-2,10.91-2.54,16.41-.37,3.5-2.08,4.75-5.33,5.5a353.22,353.22,0,0,1-159.3,0c-3.61-.82-4.91-2.45-5.44-6.07-2.21-15-4.78-30-7.25-45a5,5,0,0,0-2-3.39v50c-3-.82-5.2-1.1-7.11-2-12.61-5.89-25.21-11.8-37.66-18-1.88-.94-4.22-3.53-4.19-5.33.32-22.55.36-45.14,1.9-67.63A67.41,67.41,0,0,1,631,493.21a100.61,100.61,0,0,1,26.08-19.09,133.86,133.86,0,0,1,51.15-14.22c6.4-.49,13.31,1.42,19.53,3.56,18.23,6.29,36.65,6.3,54-1.11,9.13-3.91,17.19-3.19,25.56-1.35a259.18,259.18,0,0,1,35.3,10.11c28.15,10.59,45.34,31.25,48.39,61.07,2.48,24.21,1.72,48.75,2.25,73.15a5.5,5.5,0,0,1-1.86,4.14C878.22,618,864,624.08,847.89,628.84V578.31Z"/><path class="cls-6" d="M753.81,299c41.6.13,74.78,33.83,74.56,75.73-.2,41.15-34.24,74.78-75.36,74.46a75,75,0,0,1-74.84-75C678.19,332.1,711.59,298.88,753.81,299Z"/><path class="cls-6" d="M938.07,552.91V519.48l-1.32-.19c-1.13,6.51-2.4,13-3.37,19.54-2.3,15.34-2.24,15.35-17.52,18.29-2,.39-4,.63-7,1.09-.4-27.8-4.76-54.18-23.57-75.85-18.61-21.44-44-29.62-70.77-37.05,8.3-2.07,16.57-4.3,24.92-6.18a27.45,27.45,0,0,1,16.74,1.22c12.48,4.95,25.22,4.24,37.89-.27a26.13,26.13,0,0,1,11.2-1.46c15.59,1.73,30.51,5.5,43.4,15.28,12,9.11,17.55,21.66,18.68,36,1.17,14.9.91,29.92,1,44.89a6.4,6.4,0,0,1-2.68,4.84C957.05,544,948.16,548.11,938.07,552.91Z"/><path class="cls-6" d="M594.4,557a10.41,10.41,0,0,1-1.79.4c-16.58-2.64-16.56-2.64-19.27-19.37a89.88,89.88,0,0,0-4.48-18.43V552.9c-10.19-4.83-19.21-9-28.09-13.46a5.08,5.08,0,0,1-2.37-3.81c.29-16.2-.5-32.56,1.57-48.54,3.28-25.32,19.69-40.32,47.06-45.95,3.61-.74,7.2-1.61,10.83-2.21,5.47-.9,10.63-.92,16.15,1.31,12.46,5,25.19,4.42,37.9-.08a24.81,24.81,0,0,1,11.82-1.39A193.58,193.58,0,0,1,692.36,446C631.62,458.85,594.32,491.67,594.4,557Z"/><path class="cls-6" d="M924.74,377c0,24.17-12,42.26-30.84,50.13a50.9,50.9,0,0,1-54.79-10.35c-2.94-2.87-3.49-5.22-1.89-9.09a86.45,86.45,0,0,0,3.21-57.83c-1.23-4.37-.3-6.83,3.14-9.28,16.64-11.88,34.46-14.09,52.83-5.22C915.11,344.34,924,360.14,924.74,377Z"/><path class="cls-6" d="M632.55,431a50.45,50.45,0,0,1-30-91.1,50.8,50.8,0,0,1,59.81,0c3.73,2.7,5.11,5.14,3.66,10.14a84.81,84.81,0,0,0,3.1,57.2c1.93,4.67,1.07,7.32-2.43,10.49A51.35,51.35,0,0,1,632.55,431Z"/></svg>
\ No newline at end of file

diff --git a/scst/www/images/sourceforge_badges/oss-community-leader-white.svg b/scst/www/images/sourceforge_badges/oss-community-leader-white.svg
new file mode 100644
index 0000000..093268b
--- /dev/null
+++ b/scst/www/images/sourceforge_badges/oss-community-leader-white.svg

@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1500 1500"><defs><style>.cls-1{fill:#898989;}.cls-2{fill:#eae9ee;stroke:#c7c2bd;}.cls-2,.cls-3,.cls-6{stroke-miterlimit:10;}.cls-2,.cls-6{stroke-width:5.1px;}.cls-3{fill:#fff;stroke-width:25.5px;}.cls-3,.cls-6{stroke:#898989;}.cls-4{fill:#3f3f3f;}.cls-5{fill:#ff6700;}.cls-6{fill:url(#linear-gradient);}</style><linearGradient id="linear-gradient" x1="1362.99" y1="1185.9" x2="123.78" y2="1185.9" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#fffefe"/><stop offset="1" stop-color="#eaeaec"/></linearGradient></defs><polygon class="cls-1" points="1421.26 1035.29 78.74 1035.29 251.77 968.2 1248.23 968.2 1421.26 1035.29"/><path class="cls-2" d="M1281.53,1079.45V420.55a11.09,11.09,0,0,0-5-9.44L755,81.65a9.25,9.25,0,0,0-10,0L223.45,411.11a11.09,11.09,0,0,0-5,9.44v658.9a11.09,11.09,0,0,0,5,9.44L745,1418.35a9.25,9.25,0,0,0,10,0l521.57-329.46A11.09,11.09,0,0,0,1281.53,1079.45Z"/><path class="cls-3" d="M1175.61,1018.91V491.3a8.91,8.91,0,0,0-4-7.56L754,219.94a7.39,7.39,0,0,0-8,0L328.38,483.74a8.91,8.91,0,0,0-4,7.56v527.61a8.92,8.92,0,0,0,4,7.56L746,1290.27a7.39,7.39,0,0,0,8,0l417.63-263.8A8.92,8.92,0,0,0,1175.61,1018.91Z"/><path class="cls-4" d="M389.22,821.84a37.09,37.09,0,0,1,14.5-14.68,42.11,42.11,0,0,1,21.1-5.26q14.44,0,24.74,7.63a36.66,36.66,0,0,1,13.76,20.8H441.58a17.53,17.53,0,0,0-6.88-7.74,19.34,19.34,0,0,0-10.11-2.66,18.89,18.89,0,0,0-14.8,6.36q-5.66,6.36-5.66,17t5.66,17a18.91,18.91,0,0,0,14.8,6.35A19.25,19.25,0,0,0,434.7,864a17.53,17.53,0,0,0,6.88-7.74h21.74A36.44,36.44,0,0,1,449.56,877q-10.29,7.57-24.74,7.57a42.11,42.11,0,0,1-21.1-5.26,37.27,37.27,0,0,1-14.5-14.62,46.62,46.62,0,0,1,0-42.83Z"/><path class="cls-4" d="M492.85,879.46a39.61,39.61,0,0,1-15.14-14.85,43.68,43.68,0,0,1,0-42.83A39.84,39.84,0,0,1,492.85,807a44.1,44.1,0,0,1,42,0,39.07,39.07,0,0,1,15,14.8,44.3,44.3,0,0,1-.06,42.83,39.44,39.44,0,0,1-15,14.85,43.93,43.93,0,0,1-41.91,0Zm36.53-19.18q5.83-6.48,5.84-17.11T529.38,826q-5.83-6.42-15.55-6.42t-15.66,6.36q-5.85,6.36-5.84,17.23t5.84,17.16q5.83,6.42,15.66,6.42T529.38,860.28Z"/><path class="cls-4" d="M658.21,802.82V884H638.45V835.3L620.3,884H604.35l-18.27-48.78V884H566.31V802.82h23.36L612.44,859,635,802.82Z"/><path class="cls-4" d="M764.34,802.82V884H744.57V835.3L726.42,884H710.47L692.2,835.19V884H672.43V802.82h23.36L718.56,859l22.54-56.18Z"/><path class="cls-4" d="M798,802.82v48.55q0,7.29,3.58,11.22t10.52,3.93q6.93,0,10.63-3.93t3.7-11.22V802.82h19.77v48.44q0,10.86-4.62,18.38A29.71,29.71,0,0,1,829.13,881a39.11,39.11,0,0,1-17.4,3.81A38.1,38.1,0,0,1,794.57,881a28,28,0,0,1-12-11.33q-4.4-7.58-4.39-18.44V802.82Z"/><path class="cls-4" d="M932.65,884H912.88l-33.06-50.05V884H860.05V802.82h19.77l33.06,50.29V802.82h19.77Z"/><path class="cls-4" d="M966.75,802.82V884H947V802.82Z"/><path class="cls-4" d="M1039.46,802.82v15.84H1018V884H998.19V818.66h-21.5V802.82Z"/><path class="cls-4" d="M1119,802.82l-28.09,54.33V884h-19.77V857.15L1043,802.82h22.43l15.72,34,15.61-34Z"/><path class="cls-4" d="M564.52,969h25.9v15.26H544.75V903.12h19.77Z"/><path class="cls-4" d="M619.66,919v16.41h26.48v15.26H619.66v17.8H649.6v15.84H599.9V903.12h49.7V919Z"/><path class="cls-4" d="M713,969.94H682.66l-4.85,14.33H657.12l29.36-81.15h22.89l29.36,81.15H717.81Zm-5.08-15.26L697.81,925l-9.94,29.71Z"/><path class="cls-4" d="M800.46,908.21a36.37,36.37,0,0,1,14.86,14.27,45.37,45.37,0,0,1,0,42.37,36.14,36.14,0,0,1-14.92,14.34A47.27,47.27,0,0,1,778,984.27H747.63V903.12H778Q790.87,903.12,800.46,908.21ZM794.22,961q6.24-6.13,6.24-17.34t-6.24-17.46Q788,920,776.76,920H767.4v47.16h9.36Q788,967.16,794.22,961Z"/><path class="cls-4" d="M851.44,919v16.41h26.47v15.26H851.44v17.8h29.94v15.84H831.67V903.12h49.71V919Z"/><path class="cls-4" d="M935.59,984.27l-16.87-30.63H914v30.63H894.21V903.12h33.18a36.55,36.55,0,0,1,16.35,3.35,23.16,23.16,0,0,1,10.12,9.19,25.73,25.73,0,0,1,3.35,13,24.19,24.19,0,0,1-4.56,14.45q-4.57,6.36-13.47,9l18.72,32.14ZM914,939.65h12.25c3.62,0,6.34-.88,8.15-2.66s2.72-4.28,2.72-7.51a9.72,9.72,0,0,0-2.72-7.28c-1.81-1.78-4.53-2.66-8.15-2.66H914Z"/><path class="cls-5" d="M802.26,700.12a12,12,0,0,0-9.27-9.68,14.15,14.15,0,0,0-5.66-.77H698.67c-1.57,0-3.17-.17-4.63.67-4.19.86-6.88,3.54-8.74,7.22-1.1,2.16-1,4.61-1.6,6.86q-6,21.45-12,42.84h143c-.66-4.91-2.49-9.53-3.7-14.3C808.22,722,804.55,711.24,802.26,700.12Z"/><path class="cls-5" d="M707.71,492.48q4.75,29.91,7.55,60.06c2.23,24.37,4.86,48.7,7.32,73.05,1.26,12.45,2.46,24.91,3.84,37.35.53,4.81,3.47,7.56,8.29,8.41a81.71,81.71,0,0,0,21,0c5-1,7.75-3.77,8.27-8.83q3.35-32.14,6.57-64.32c2.37-23.58,4.61-47.18,7.08-70.75.57-5.39-.34-4.42,5.24-4.48a18.58,18.58,0,0,0,18.24-13.45c1.27-4,.32-7.85-.23-11.72-3.06-21.56-6.27-43.09-9.3-64.65-1.24-8.75-7.8-15.11-16.21-15.33l-1.87-.1h0a1.89,1.89,0,0,0-2.22,0c-9.77,0-19.54-.12-29.31-.11-9,0-18,.13-26.94.2a7.54,7.54,0,0,0-4.91.94c-2.56,1.54-3.36.26-4.4-1.84-6.6-13.23-13.23-26.43-20-39.56-4-7.79-12.88-10.41-20.9-6.48a15.3,15.3,0,0,0-7.48,20.58c1.17,2.56,2.5,5.05,3.77,7.57q21.82,43.19,43.64,86.39C705.9,487.67,707.27,489.76,707.71,492.48Zm65-74.41-.57-.08C772,418,772.74,417.67,772.7,418.07Z"/><path class="cls-5" d="M652.66,502.14a12.93,12.93,0,0,0-11.5-11.8c-11.66-.65-23.33-.15-35-.26-3.24,0-6.51-.39-9.73.31-6.08.95-10.25,5.62-10.95,12-2.34,21.2-4.69,42.4-7.17,63.58-1.19,10.2,3.92,16.38,14.18,16.45,2.41,0,3,.89,3.23,3.07q4.71,41.2,9.59,82.38c.46,3.91,2.6,6,6.72,6.84a87.43,87.43,0,0,0,13.54.07c4.3-.54,6.58-2.62,7.22-6.71.25-1.55.29-3.14.48-4.71,3.08-25.77,6.22-51.53,9.18-77.31.33-2.9,1.22-3.66,4.1-3.65,8.84,0,14.4-6.56,13.4-15.29Q656.23,534.61,652.66,502.14Z"/><path class="cls-5" d="M902.22,501.62A12.68,12.68,0,0,0,891.3,490.4c-6.16-.75-12.33-.32-18.49-.31-8.75,0-17.5-.4-26.24.25a12.74,12.74,0,0,0-11.37,11.51q-3.8,33.23-7.45,66.46a12.4,12.4,0,0,0,10,13.53l.24.15.27,0h0a5.12,5.12,0,0,0,3.61.46c2.38-.26,3.25.45,3.5,3,.9,9.31,2.15,18.57,3.24,27.86,2.13,18.11,4.2,36.23,6.38,54.34.57,4.72,3,6.84,7.83,7.13a87.76,87.76,0,0,0,12.87-.07c4-.72,6.29-2.86,6.73-6.43,1-7.85,1.85-15.7,2.78-23.55q3.49-29.43,7-58.85c.25-2.14.53-3.55,3.49-3.52,9.62.1,14.9-6,13.83-15.77Q906,534.08,902.22,501.62Zm-60,79.93-.16,0a8.16,8.16,0,0,0,1.1-.09A4.34,4.34,0,0,1,842.18,581.55Z"/><path class="cls-5" d="M554.49,508.51c-.68-5.8-5-9.64-11-9.95H507c-6.26.29-10.58,4.56-10.89,10.77l-.09,1.16h0c-.19,1.1-.44,2.19-.56,3.3-1.93,16.92-3.74,33.86-5.79,50.78-1.09,9,3.17,14.42,12.23,14.58,2,0,2.79.62,3,2.6.35,3.94.89,7.85,1.35,11.77q3.49,30.09,7,60.18c.37,3.19,1.83,5.54,5.32,6a67.28,67.28,0,0,0,12.85.07c3.43-.14,5.31-2.18,5.84-5.31s.63-6.39.91-9.59l.12-1.18h0a6,6,0,0,0,.42-1.26c2.32-19.74,4.7-39.46,6.86-59.21.33-3,1-4.21,4.33-4.16,7.12.1,12-5.59,11.22-12.69Q557.84,537.45,554.49,508.51Zm-17,133.71a2.54,2.54,0,0,1,.17.45l0,.11A3.27,3.27,0,0,0,537.52,642.22Z"/><path class="cls-5" d="M997.6,566.18q-3.2-28.59-6.48-57.19c-.55-4.76-2.92-8.2-7.62-9.77a4.74,4.74,0,0,0-2.89-.77q-18.87,0-37.75.13c-5.84.73-9.68,4.45-10.34,10.25-2.12,18.59-4.11,37.21-6.3,55.79-1.08,9.09,3.13,14.35,12.26,14.52,2.06,0,2.75.73,2.92,2.65.37,4,.91,8.07,1.37,12.1q3.48,29.91,7,59.81c.44,3.76,2.17,5.46,6,6.1a69.9,69.9,0,0,0,12.19,0c3.81-.63,5.55-2.37,6-6.11,2.76-23.55,5.56-47.08,8.19-70.64.32-2.89,1-4,4.17-3.94C993.55,579.19,998.41,573.47,997.6,566.18Z"/><path class="cls-5" d="M741.5,404.9c2,.37,4.07.89,6.13.1h0c19.82-.14,33.73-23.11,25.44-42a30.38,30.38,0,0,0-57.87,17C717.26,393.1,729.08,404.29,741.5,404.9Z"/><path class="cls-5" d="M868.67,482.87a22,22,0,0,0,.34-44,22,22,0,1,0-.34,44Z"/><path class="cls-5" d="M619.07,482.88a22,22,0,1,0-22-21.93A22.06,22.06,0,0,0,619.07,482.88Z"/><path class="cls-5" d="M522.52,491.71a10,10,0,0,0,6.09-.07c10.6-1.55,17.65-11.87,15.51-22.71a19.31,19.31,0,0,0-23-14.83c-9.6,2.23-15.62,10.51-14.94,20.54C506.72,483.43,513.92,490.93,522.52,491.71Z"/><path class="cls-5" d="M958.45,491.63a9.87,9.87,0,0,0,6.08.08c10.35-.89,17.76-10.8,16.3-21.79-1.25-9.49-9.92-16.57-19.85-16.23a19.1,19.1,0,0,0-18.32,17.91C942.08,481.65,948.86,490.25,958.45,491.63Z"/><path class="cls-6" d="M1421.26,1035.29H78.74l33.72,166.55a168,168,0,0,0,164.67,134.67h945.74a168,168,0,0,0,164.67-134.67Z"/><path class="cls-5" d="M345.4,1189.15c0-30-10.59-43.72-16.2-48.91a1.6,1.6,0,0,0-2.65,1.42c1.09,17-20.11,21.23-20.11,47.8v.16c0,16.2,12.16,29.41,27.12,29.41s27.11-13.21,27.11-29.41v-.16c0-7.54-2.8-14.78-5.61-20.12-.62-1.11-2.18-.63-2,.31C358.18,1192.61,345.4,1206.76,345.4,1189.15Z"/><path class="cls-5" d="M313.14,1252.52a2.88,2.88,0,0,1-1.87-.78l-69.34-70a2.88,2.88,0,0,1,0-3.78l73.24-73.9a3.21,3.21,0,0,1,1.71-.63h21a2.45,2.45,0,0,1,2.34,1.57,2.57,2.57,0,0,1-.62,2.83l-68.73,69.51a3.54,3.54,0,0,0,0,5l54.39,55a2.89,2.89,0,0,1,0,3.77l-10.44,10.69a3.21,3.21,0,0,1-1.72.63ZM327,1264.79a2.46,2.46,0,0,1-2.33-1.57,2.54,2.54,0,0,1,.62-2.83l68.88-69.51a3.74,3.74,0,0,0,1.09-2.52,3.13,3.13,0,0,0-1.09-2.51l-54.54-55a2.88,2.88,0,0,1,0-3.78l10.59-10.69a2.58,2.58,0,0,1,1.87-.79,2.4,2.4,0,0,1,1.72,1l69.19,70a2.67,2.67,0,0,1,0,3.77l-73.24,73.91a2.58,2.58,0,0,1-1.87.79H327Z"/><path class="cls-4" d="M501.71,1153.77a1.56,1.56,0,0,1-1.56.94,2.94,2.94,0,0,1-1.87-1.1,18,18,0,0,0-3.12-2.36,18.59,18.59,0,0,0-5-2.36,21,21,0,0,0-7.48-1.1,23.8,23.8,0,0,0-7.8,1.26,17,17,0,0,0-5.61,3.46,13.77,13.77,0,0,0-3.42,5,15.77,15.77,0,0,0-1.25,6,13.38,13.38,0,0,0,1.71,6.92A14.5,14.5,0,0,0,471,1175a25.45,25.45,0,0,0,6.54,3.14c2.5.95,5,1.73,7.48,2.52a70.45,70.45,0,0,1,7.48,2.83,29.78,29.78,0,0,1,6.55,3.77,16.58,16.58,0,0,1,4.68,5.66,18.92,18.92,0,0,1,1.71,8.34,27.58,27.58,0,0,1-1.71,9.59,21.89,21.89,0,0,1-5,7.86,27.12,27.12,0,0,1-8.11,5.35,29.89,29.89,0,0,1-10.9,1.89,30.63,30.63,0,0,1-13.72-2.83,32.73,32.73,0,0,1-10.13-7.87l1.56-2.51A1.73,1.73,0,0,1,459,1212a1.88,1.88,0,0,1,1.25.63c.46.47,1.24,1.1,1.87,1.73.78.63,1.71,1.42,2.8,2.2a21.3,21.3,0,0,0,3.74,2.2,28.35,28.35,0,0,0,4.83,1.73,25.5,25.5,0,0,0,6.24.63,24,24,0,0,0,8.57-1.41,20.29,20.29,0,0,0,6.39-3.93,17.83,17.83,0,0,0,4.05-6,20.16,20.16,0,0,0,1.4-7.39,14,14,0,0,0-1.71-7.08,13.54,13.54,0,0,0-4.68-4.72,25.78,25.78,0,0,0-6.54-3.14c-2.5-.79-5-1.73-7.48-2.52s-5-1.73-7.48-2.67a24.54,24.54,0,0,1-6.55-3.77,19.12,19.12,0,0,1-4.67-5.82,20.43,20.43,0,0,1-1.72-8.81,19.69,19.69,0,0,1,1.56-7.86,21.22,21.22,0,0,1,4.52-6.76,24.5,24.5,0,0,1,7.32-4.72,28.28,28.28,0,0,1,10-1.73,29.44,29.44,0,0,1,11.37,2,31.39,31.39,0,0,1,9.2,6.14Z"/><path class="cls-4" d="M479.42,1227.36c-5.45,0-10.28-.94-14.18-3a34.29,34.29,0,0,1-10.44-8l-.47-.47,2-3.14a2.88,2.88,0,0,1,2.34-1.26,2.69,2.69,0,0,1,1.87.94c.47.48,1.24,1.1,2,1.73s1.72,1.42,2.65,2.2a19.17,19.17,0,0,0,3.59,2.05,20.18,20.18,0,0,0,4.67,1.57,23.42,23.42,0,0,0,6.08.63,22.26,22.26,0,0,0,8.26-1.41,20.16,20.16,0,0,0,6.08-3.78,15.56,15.56,0,0,0,3.74-5.66,17.91,17.91,0,0,0,1.24-7.08,12.18,12.18,0,0,0-1.55-6.6,14.65,14.65,0,0,0-4.37-4.4,25.13,25.13,0,0,0-6.23-3c-2.34-.79-4.83-1.73-7.48-2.52s-5.14-1.73-7.48-2.67a27.88,27.88,0,0,1-6.7-3.93,18.64,18.64,0,0,1-4.83-6.14,20.14,20.14,0,0,1-1.87-9.12,21.94,21.94,0,0,1,1.55-8.33,21.69,21.69,0,0,1,4.68-7.08,23.72,23.72,0,0,1,7.64-4.87,26.6,26.6,0,0,1,10.28-1.89,30.54,30.54,0,0,1,11.69,2,31,31,0,0,1,9.5,6.45l.47.47-1.71,3.31c-.94,1.73-2.65,2.2-4.83.15a21.19,21.19,0,0,0-3.12-2.2,36.42,36.42,0,0,0-4.83-2.36,22.18,22.18,0,0,0-7.17-1.1,21.86,21.86,0,0,0-7.48,1.26,15.27,15.27,0,0,0-5.3,3.3,16.14,16.14,0,0,0-3.27,4.72,14.06,14.06,0,0,0-1.09,5.66,11.06,11.06,0,0,0,1.56,6.29,14.5,14.5,0,0,0,4.36,4.4,33,33,0,0,0,6.23,3.15c2.34.79,4.83,1.73,7.48,2.52a59.38,59.38,0,0,1,7.48,2.83,28,28,0,0,1,6.71,3.93,18.15,18.15,0,0,1,4.83,6,18.94,18.94,0,0,1,1.87,8.81,27.56,27.56,0,0,1-1.87,10.06,23.81,23.81,0,0,1-5.3,8.18,26.17,26.17,0,0,1-8.42,5.51,30.92,30.92,0,0,1-10.91,1.88Zm-22.59-11.79a29.22,29.22,0,0,0,22.75,9.91,28.28,28.28,0,0,0,10.6-1.89,24.91,24.91,0,0,0,7.79-5,22.47,22.47,0,0,0,4.83-7.55,25.86,25.86,0,0,0,1.71-9.28,20.45,20.45,0,0,0-1.55-8,16.28,16.28,0,0,0-4.37-5.35,34.83,34.83,0,0,0-6.23-3.77c-2.34-.94-4.83-1.89-7.33-2.83-2.49-.79-5-1.73-7.48-2.52a38,38,0,0,1-6.7-3.3,16.13,16.13,0,0,1-4.83-4.87,13.4,13.4,0,0,1-1.87-7.4,18.17,18.17,0,0,1,1.25-6.44,14,14,0,0,1,3.58-5.35,15.23,15.23,0,0,1,5.92-3.62A22.08,22.08,0,0,1,483,1147a23.22,23.22,0,0,1,7.79,1.1,24.58,24.58,0,0,1,8.42,5c.93.78,1.24.78,1.24.78a.59.59,0,0,0,.63-.47l1.09-2a28.29,28.29,0,0,0-8.26-5.51A30.2,30.2,0,0,0,483,1144a26.45,26.45,0,0,0-9.66,1.73,22.3,22.3,0,0,0-7,4.4,18.94,18.94,0,0,0-5.76,14,18.84,18.84,0,0,0,1.71,8.33,15.13,15.13,0,0,0,4.37,5.5,26.69,26.69,0,0,0,6.23,3.62c2.34.94,4.83,1.89,7.32,2.67s5,1.58,7.48,2.52a29.44,29.44,0,0,1,6.7,3.3,13.59,13.59,0,0,1,4.84,5,14.49,14.49,0,0,1,1.87,7.7,21.83,21.83,0,0,1-1.41,7.86,15.85,15.85,0,0,1-4.2,6.29,22.3,22.3,0,0,1-6.71,4.25,26.34,26.34,0,0,1-8.88,1.57,28.73,28.73,0,0,1-6.54-.78,31.4,31.4,0,0,1-5-1.73,16.35,16.35,0,0,1-3.9-2.36c-1.09-.79-2-1.58-2.8-2.2a23,23,0,0,1-1.87-1.73c-.47-.48-.78-.48-.78-.48a.84.84,0,0,0-.78.48Z"/><path class="cls-4" d="M594.43,1184.59a50,50,0,0,1-2.8,17.14,37.3,37.3,0,0,1-8,13.21,36.89,36.89,0,0,1-12.15,8.49,42.53,42.53,0,0,1-31.33,0,34.72,34.72,0,0,1-12.15-8.49,37.3,37.3,0,0,1-7.95-13.21,53.68,53.68,0,0,1,0-34.28,37.3,37.3,0,0,1,7.95-13.21,36.79,36.79,0,0,1,12.15-8.49,42.53,42.53,0,0,1,31.33,0,34.81,34.81,0,0,1,12.15,8.49,37.3,37.3,0,0,1,8,13.21A50,50,0,0,1,594.43,1184.59Zm-6.07,0a47.67,47.67,0,0,0-2.34-15.41,32.84,32.84,0,0,0-6.55-11.48,28.23,28.23,0,0,0-10.28-7.23,36.09,36.09,0,0,0-26.5,0,28.23,28.23,0,0,0-10.28,7.23,33.1,33.1,0,0,0-6.7,11.48,47,47,0,0,0-2.34,15.41,47.67,47.67,0,0,0,2.34,15.41,31,31,0,0,0,6.7,11.48,28.47,28.47,0,0,0,10.28,7.23,33.22,33.22,0,0,0,13.25,2.52,32.69,32.69,0,0,0,13.25-2.52,26.66,26.66,0,0,0,10.28-7.23A32.93,32.93,0,0,0,586,1200,47,47,0,0,0,588.36,1184.59Z"/><path class="cls-4" d="M555.94,1227.21a42.86,42.86,0,0,1-15.89-3,35,35,0,0,1-12.47-8.65,39,39,0,0,1-8.11-13.53,51.78,51.78,0,0,1-3-17.45,48.41,48.41,0,0,1,3-17.46,40.43,40.43,0,0,1,8.11-13.52,35,35,0,0,1,12.47-8.65,40.47,40.47,0,0,1,15.89-3.14,43.72,43.72,0,0,1,16,3,35.13,35.13,0,0,1,12.47,8.65,40.56,40.56,0,0,1,8.1,13.53,55.62,55.62,0,0,1,0,34.91,40.41,40.41,0,0,1-8.1,13.52,34.88,34.88,0,0,1-12.47,8.65A38.53,38.53,0,0,1,555.94,1227.21Zm0-83.35a37.73,37.73,0,0,0-15.27,3,34.33,34.33,0,0,0-19.48,21.07,51.78,51.78,0,0,0,0,33.65,39.13,39.13,0,0,0,7.64,12.9,34.69,34.69,0,0,0,11.84,8.17,42.47,42.47,0,0,0,30.54,0,34.82,34.82,0,0,0,11.85-8.17,36,36,0,0,0,7.63-12.74,51.78,51.78,0,0,0,0-33.65,39.28,39.28,0,0,0-7.63-12.9,34.82,34.82,0,0,0-11.85-8.17A34.4,34.4,0,0,0,555.94,1143.86Z"/><path class="cls-4" d="M640.72,1221.23a24.89,24.89,0,0,0,10.44-2A23,23,0,0,0,663.78,1205a33.41,33.41,0,0,0,1.72-10.69v-50.48h5.76v50.48a36.8,36.8,0,0,1-2.18,12.58,30.6,30.6,0,0,1-6.08,10.22,28.27,28.27,0,0,1-9.66,6.76,32.94,32.94,0,0,1-12.78,2.52,30.05,30.05,0,0,1-22.44-9.28,29.25,29.25,0,0,1-6.08-10.22,37.49,37.49,0,0,1-2.18-12.58v-50.48h5.92v50.32a34.22,34.22,0,0,0,1.72,10.7,25,25,0,0,0,4.83,8.49,23.78,23.78,0,0,0,7.79,5.66,23.33,23.33,0,0,0,10.6,2.2Z"/><path class="cls-4" d="M640.72,1227.36a31.88,31.88,0,0,1-13.09-2.51,28.82,28.82,0,0,1-10-7.08,32.33,32.33,0,0,1-6.23-10.54,38.61,38.61,0,0,1-2.18-12.89v-51.42H617v51.42a37.76,37.76,0,0,0,1.56,10.38,23.53,23.53,0,0,0,4.67,8.18,22.34,22.34,0,0,0,7.48,5.5,25.1,25.1,0,0,0,10,2,24.66,24.66,0,0,0,10-1.88,20.2,20.2,0,0,0,7.48-5.51,24.78,24.78,0,0,0,4.68-8.17,31.58,31.58,0,0,0,1.56-10.38v-51.58h7.79v51.42a38.61,38.61,0,0,1-2.18,12.89,32.53,32.53,0,0,1-6.24,10.54,28.88,28.88,0,0,1-10,7.08,31.91,31.91,0,0,1-13.09,2.51ZM611,1144.8v49.54a34.94,34.94,0,0,0,2,12.26,28.76,28.76,0,0,0,5.92,9.91,25.4,25.4,0,0,0,9.35,6.61,31,31,0,0,0,12.47,2.36,32.05,32.05,0,0,0,12.46-2.36,26.83,26.83,0,0,0,9.36-6.61,31.66,31.66,0,0,0,5.92-9.91,34.94,34.94,0,0,0,2-12.26V1144.8h-3.89v49.54a36.21,36.21,0,0,1-1.72,11,23.2,23.2,0,0,1-5.14,9,23.78,23.78,0,0,1-8.1,6,26.41,26.41,0,0,1-10.76,2.2,25.85,25.85,0,0,1-10.75-2.2,23.68,23.68,0,0,1-8.1-6,25.13,25.13,0,0,1-5-9,35.49,35.49,0,0,1-1.71-11V1144.8Z"/><path class="cls-4" d="M704.77,1184.12h4.83a35.85,35.85,0,0,0,9.66-1.26,19.42,19.42,0,0,0,7.17-3.77,16.33,16.33,0,0,0,4.52-5.82,17.48,17.48,0,0,0,1.56-7.71c0-5.82-1.87-10.06-5.61-12.89s-9.2-4.25-16.52-4.25H695.57v77.06h-5.76v-81.62h20.57c9.35,0,16.36,1.89,20.88,5.5,4.68,3.62,6.86,9,6.86,16a21.19,21.19,0,0,1-1.56,8.5,20.23,20.23,0,0,1-4.68,6.76,23.06,23.06,0,0,1-7.16,4.72,35.48,35.48,0,0,1-9.51,2.51,12,12,0,0,1,2.49,2.36l27.9,35.23h-5.15a4.44,4.44,0,0,1-1.55-.32,2.79,2.79,0,0,1-1.25-1.1l-25.87-33a6.4,6.4,0,0,0-2-1.73c-.77-.32-5.29-.47-6.85-.47,0-4.25.62-4.72,1.87-4.72Z"/><path class="cls-4" d="M747.62,1226.42h-7a4.37,4.37,0,0,1-2-.47,4.43,4.43,0,0,1-1.55-1.42l-25.87-33a5.11,5.11,0,0,0-1.72-1.57,50.3,50.3,0,0,0-6.39-.48h-.93v-4.08c0-1.42.31-2.21,2.8-2.21h5a34.73,34.73,0,0,0,9.35-1.25,19.21,19.21,0,0,0,6.86-3.62,14.47,14.47,0,0,0,4.21-5.5,18.29,18.29,0,0,0,1.4-7.24c0-5.5-1.72-9.43-5.3-12.11s-8.88-4.09-15.9-4.09H696.66v76.9h-7.79v-83.34h21.66q14.26,0,21.51,5.66c4.83,3.77,7.32,9.43,7.32,16.82a23.58,23.58,0,0,1-1.71,9,22,22,0,0,1-4.83,7.07,28.63,28.63,0,0,1-7.48,5,36.3,36.3,0,0,1-7.64,2.2,6.71,6.71,0,0,1,.94,1.1Zm-43.79-38.84c1.72,0,5.3.15,6.24.63a7.6,7.6,0,0,1,2.49,2l25.87,33a2.63,2.63,0,0,0,.93,1,2.37,2.37,0,0,0,1.09.31h3.12l-26.65-33.65a6.63,6.63,0,0,0-2.18-2l-2.34-1.42,2.81-.31a27.62,27.62,0,0,0,16.21-6.92,18.29,18.29,0,0,0,4.36-6.45,23.25,23.25,0,0,0,1.56-8.18c0-6.76-2.18-11.79-6.55-15.25s-11.22-5.19-20.26-5.19H690.9v79.41h3.89v-77h15.74c7.33,0,13.09,1.41,17,4.4q6.08,4.48,6.08,13.68a21.7,21.7,0,0,1-1.56,8,16.52,16.52,0,0,1-4.83,6.13,21.38,21.38,0,0,1-7.48,3.94,32.69,32.69,0,0,1-10,1.41h-4.83c-.46,0-.93,0-1.09,2.52Z"/><path class="cls-4" d="M814,1211.64a1.11,1.11,0,0,1,.94.47l2.33,2.51a40.22,40.22,0,0,1-5.45,4.88,33.89,33.89,0,0,1-6.55,3.62,54.47,54.47,0,0,1-7.63,2.36,43.29,43.29,0,0,1-9.2.78,37.91,37.91,0,0,1-15.42-3,32.65,32.65,0,0,1-12-8.49,39.22,39.22,0,0,1-7.8-13.21,50,50,0,0,1-2.8-17.14,46.2,46.2,0,0,1,3-17,39,39,0,0,1,8.1-13.21,36.61,36.61,0,0,1,12.47-8.49,41,41,0,0,1,16-3,43.77,43.77,0,0,1,8.11.63,43.11,43.11,0,0,1,6.85,1.89,26.4,26.4,0,0,1,5.92,3.14,46.36,46.36,0,0,1,5.46,4.25l-1.72,2.51a1.51,1.51,0,0,1-1.4.63,1.84,1.84,0,0,1-1.24-.63c-.47-.47-1.25-.94-2-1.57a24.6,24.6,0,0,0-3-1.89,13.26,13.26,0,0,0-4.21-1.88,36.11,36.11,0,0,0-5.61-1.58,33,33,0,0,0-7.17-.63,34.21,34.21,0,0,0-13.56,2.52,29.44,29.44,0,0,0-10.59,7.39,35.6,35.6,0,0,0-7,11.48,42.36,42.36,0,0,0-2.49,15.25,43.31,43.31,0,0,0,2.49,15.42,33.65,33.65,0,0,0,6.86,11.48,30.81,30.81,0,0,0,23.06,9.74,39.34,39.34,0,0,0,7.8-.62,24.68,24.68,0,0,0,6.23-1.73,28.76,28.76,0,0,0,5.3-2.83,28.29,28.29,0,0,0,4.83-3.94c.16-.15.31-.31.47-.31C813.54,1211.79,813.86,1211.64,814,1211.64Z"/><path class="cls-4" d="M788.3,1227.36a41.83,41.83,0,0,1-15.74-3,34.28,34.28,0,0,1-12.31-8.64,38.73,38.73,0,0,1-8-13.53,51.85,51.85,0,0,1-2.81-17.45,47.54,47.54,0,0,1,3-17.3,43.94,43.94,0,0,1,8.26-13.53,38.11,38.11,0,0,1,12.78-8.8A42.88,42.88,0,0,1,790,1142a41.57,41.57,0,0,1,15.27,2.52,28.2,28.2,0,0,1,6.08,3.14A46,46,0,0,1,817,1152l.63.63-2.19,3.3a2.44,2.44,0,0,1-2.18,1.1,3.63,3.63,0,0,1-1.87-.79,11.75,11.75,0,0,0-2-1.41,16.3,16.3,0,0,0-3-1.89,33.37,33.37,0,0,0-4-1.89,25.45,25.45,0,0,0-5.45-1.41,44,44,0,0,0-7-.63,33.15,33.15,0,0,0-13.25,2.52,30,30,0,0,0-17,18.24,44.42,44.42,0,0,0-2.49,14.78,45.13,45.13,0,0,0,2.34,15.1,31.58,31.58,0,0,0,6.54,11.16,27.83,27.83,0,0,0,10,6.92,32.05,32.05,0,0,0,12.46,2.36,37.63,37.63,0,0,0,7.64-.63,24.47,24.47,0,0,0,6.08-1.73,40.54,40.54,0,0,0,5.14-2.67,47.2,47.2,0,0,0,4.67-3.78,2.25,2.25,0,0,1,.78-.47,2.15,2.15,0,0,1,2.65.47l3,3.15-.62.63a42.66,42.66,0,0,1-5.61,5,31.13,31.13,0,0,1-6.7,3.77,29.48,29.48,0,0,1-8,2.36A32.62,32.62,0,0,1,788.3,1227.36Zm1.71-83.5a39,39,0,0,0-15.74,3,33.14,33.14,0,0,0-12.15,8.33,37.39,37.39,0,0,0-8,12.9,47.29,47.29,0,0,0-2.81,16.67,48.18,48.18,0,0,0,2.81,16.82,39.28,39.28,0,0,0,7.63,12.9,34.18,34.18,0,0,0,11.69,8.17,38.42,38.42,0,0,0,15,2.84,53.85,53.85,0,0,0,9-.79,27.75,27.75,0,0,0,7.48-2.36,26.76,26.76,0,0,0,6.24-3.62,38.73,38.73,0,0,0,4.67-4.09l-1.71-1.88a.29.29,0,0,0-.47,0c-.16,0-.31.16-.47.31a38.44,38.44,0,0,1-5,3.93,36.1,36.1,0,0,1-5.45,3,29.16,29.16,0,0,1-6.39,1.73,57.5,57.5,0,0,1-7.95.63,36.6,36.6,0,0,1-13.24-2.52,31.46,31.46,0,0,1-10.6-7.39,35.14,35.14,0,0,1-7-11.79,50.2,50.2,0,0,1,0-31.29,33.15,33.15,0,0,1,7.17-11.8,31,31,0,0,1,10.9-7.55,36.38,36.38,0,0,1,13.87-2.67,35.51,35.51,0,0,1,7.33.63,42.68,42.68,0,0,1,5.76,1.57,22.54,22.54,0,0,1,4.37,2,31.06,31.06,0,0,1,3,2c.78.63,1.56,1.1,2,1.57s.78.47.78.47c.47,0,.63-.15.63-.31l1.24-1.89a46.22,46.22,0,0,0-4.67-3.61,28.26,28.26,0,0,0-5.77-3,41,41,0,0,0-6.7-1.89,21.33,21.33,0,0,0-7.48-1.1Z"/><path class="cls-4" d="M880.55,1143.86v4.88H838v33h35.38v4.72H838v34h42.54v4.88H832.09v-81.46Z"/><path class="cls-4" d="M881.49,1226.42H831.15v-83.5h50.34v6.76H839v31h35.37v6.76H839v32.08h42.54ZM833,1224.53h46.6v-3H837.08v-36h35.37v-2.83H837.08v-34.91h42.54v-3H833Z"/><path class="cls-4" d="M953,1142.92v12.42H916.4V1180h30.85v12.42H916.4v34H901v-83.35h52Zm89.61,41.67a46.4,46.4,0,0,1-3,17,39.16,39.16,0,0,1-8.58,13.53,39.57,39.57,0,0,1-13.24,9,43.29,43.29,0,0,1-17.14,3.15,46.37,46.37,0,0,1-17.15-3.15,39.19,39.19,0,0,1-21.81-22.49,50.18,50.18,0,0,1,0-34,39.14,39.14,0,0,1,8.57-13.53,41.25,41.25,0,0,1,13.24-9,47.12,47.12,0,0,1,34.29.16,39.26,39.26,0,0,1,21.82,22.48A48.27,48.27,0,0,1,1042.63,1184.59Zm-15.74,0a37.73,37.73,0,0,0-1.87-12.27,27.87,27.87,0,0,0-5.15-9.27,22.41,22.41,0,0,0-8.25-5.82,30.06,30.06,0,0,0-21.82,0,23.76,23.76,0,0,0-8.26,5.82,25.73,25.73,0,0,0-5.3,9.27,41.19,41.19,0,0,0,0,24.54,25.73,25.73,0,0,0,5.3,9.27,23.76,23.76,0,0,0,8.26,5.82,30.06,30.06,0,0,0,21.82,0,23.72,23.72,0,0,0,8.25-5.82,27.87,27.87,0,0,0,5.15-9.27A37.73,37.73,0,0,0,1026.89,1184.59Zm47.84-1.89h3.58a22.28,22.28,0,0,0,7.64-1.1,15.51,15.51,0,0,0,5.3-3,11.39,11.39,0,0,0,3.12-4.56,17.6,17.6,0,0,0,1.09-5.82c0-4.24-1.41-7.54-4.21-9.74s-7-3.46-12.62-3.46h-10v71.39h-15.43v-83.35h25.25a55.53,55.53,0,0,1,14.49,1.73,26.12,26.12,0,0,1,9.82,4.88,18.79,18.79,0,0,1,5.61,7.55,24.6,24.6,0,0,1,1.87,9.75,27,27,0,0,1-1.25,8,21.52,21.52,0,0,1-3.58,6.76,23.94,23.94,0,0,1-5.77,5.35,25.2,25.2,0,0,1-7.79,3.61,15.81,15.81,0,0,1,5,4.72l20.73,30.82h-13.87a7.37,7.37,0,0,1-3.43-.78,5.66,5.66,0,0,1-2.34-2.36l-17.3-26.74c-.47-.63-1.4-2-2.34-2.2h-5.45v-10.06c.16-1.42,1.87-1.42,1.87-1.42Zm88,32.24a38.57,38.57,0,0,0,9-.94,41.18,41.18,0,0,0,7.32-2.68v-18.87h-10.28a3.8,3.8,0,0,1-2.34-.78,2.8,2.8,0,0,1-.94-2.05v-8.8H1193v37.42a34.6,34.6,0,0,1-6.54,3.93,50.23,50.23,0,0,1-7.33,2.83,45.43,45.43,0,0,1-8.26,1.58,90.69,90.69,0,0,1-9.5.47,44.27,44.27,0,0,1-16.68-3.15,36.47,36.47,0,0,1-13.09-8.8,42.19,42.19,0,0,1-8.73-13.53,47.31,47.31,0,0,1-3.11-17.14,47.93,47.93,0,0,1,3-17.3,39,39,0,0,1,8.57-13.52,38.44,38.44,0,0,1,13.56-8.81,49.09,49.09,0,0,1,17.76-3.14c6.7,0,12.63.94,17.46,3a41.12,41.12,0,0,1,12.46,7.86l-4.51,7.08a4.11,4.11,0,0,1-3.43,2.2,5,5,0,0,1-2.81-1c-1.25-.78-2.49-1.41-3.74-2.2a17.37,17.37,0,0,0-4.21-1.88,38.68,38.68,0,0,0-5.14-1.26,41.35,41.35,0,0,0-6.7-.47,27.67,27.67,0,0,0-11.06,2,22.92,22.92,0,0,0-8.42,6,25.73,25.73,0,0,0-5.3,9.27,37.16,37.16,0,0,0-1.87,12.11,36.55,36.55,0,0,0,2,12.9,27.86,27.86,0,0,0,5.61,9.43,25.65,25.65,0,0,0,8.57,6,29.22,29.22,0,0,0,11.22,2.2Zm84.62-22.17h-25.09v21.07h36.47v12.42h-52v-83.34h52v12.42h-36.63V1181H1251v9.12s-.15,2.68-3.58,2.68Z"/></svg>
\ No newline at end of file

diff --git a/scst/www/images/sourceforge_badges/oss-open-source-excellence-white.svg b/scst/www/images/sourceforge_badges/oss-open-source-excellence-white.svg
new file mode 100644
index 0000000..1cca345
--- /dev/null
+++ b/scst/www/images/sourceforge_badges/oss-open-source-excellence-white.svg

@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1500 1500"><defs><style>.cls-1{fill:#dbdbdb;}.cls-2{fill:#eae9ee;stroke:#c7c2bd;}.cls-2,.cls-3,.cls-4{stroke-miterlimit:10;}.cls-2,.cls-4{stroke-width:5.1px;}.cls-3{fill:#fff;stroke-width:25.5px;}.cls-3,.cls-4{stroke:#898989;}.cls-4{fill:url(#linear-gradient);}.cls-5{fill:#cacdcc;}.cls-6{fill:#ff6700;}.cls-7{fill:#3f3f3f;}</style><linearGradient id="linear-gradient" x1="117.03" y1="1013.6" x2="1382.97" y2="1013.6" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#eaeaec"/><stop offset="1" stop-color="#fffefe"/></linearGradient></defs><polygon class="cls-1" points="305.42 1239.51 13.18 1239.51 75.36 1098.66 13.18 946.07 305.42 946.07 305.42 1239.51"/><polygon class="cls-1" points="1194.58 957.81 1486.82 957.81 1424.64 1098.66 1486.82 1251.24 1194.58 1251.24 1194.58 957.81"/><path class="cls-2" d="M1338.35,117.61H161.65V1382.39L750,1171.49l588.35,210.9V117.61Z"/><path class="cls-3" d="M1287.9,216.42V1320.67l-517-185.34-20.9-7.49-20.9,7.49-517,185.34V157.33H1287.9Z"/><rect class="cls-4" x="117.03" y="876.32" width="1265.93" height="274.57"/><circle class="cls-5" cx="186.65" cy="1019.1" r="26.74"/><circle class="cls-5" cx="1313.35" cy="1019.1" r="26.74"/><path class="cls-6" d="M345.4,1016.31c0-30-10.59-43.71-16.2-48.9a1.6,1.6,0,0,0-2.65,1.41c1.09,17-20.11,21.23-20.11,47.81v.16c0,16.19,12.16,29.4,27.12,29.4s27.11-13.21,27.11-29.4v-.16c0-7.55-2.8-14.78-5.61-20.13-.62-1.1-2.18-.63-2,.32C358.18,1019.77,345.4,1033.93,345.4,1016.31Z"/><path class="cls-6" d="M313.14,1079.69a2.89,2.89,0,0,1-1.87-.79l-69.34-70a2.87,2.87,0,0,1,0-3.77l73.24-73.91a3.29,3.29,0,0,1,1.71-.63h21a2.46,2.46,0,0,1,2.34,1.57,2.57,2.57,0,0,1-.62,2.83l-68.73,69.51a3.54,3.54,0,0,0,0,5l54.39,55a2.91,2.91,0,0,1,0,3.78l-10.44,10.69a3.3,3.3,0,0,1-1.72.63ZM327,1092a2.45,2.45,0,0,1-2.33-1.57,2.54,2.54,0,0,1,.62-2.83L394.18,1018a3.72,3.72,0,0,0,1.09-2.51,3.14,3.14,0,0,0-1.09-2.52l-54.54-55a2.87,2.87,0,0,1,0-3.77l10.59-10.69a2.54,2.54,0,0,1,1.87-.79,2.39,2.39,0,0,1,1.72.94l69.19,70a2.6,2.6,0,0,1,.78,1.89,2.64,2.64,0,0,1-.78,1.89l-73.24,73.9a2.54,2.54,0,0,1-1.87.79H327Z"/><path class="cls-7" d="M501.71,980.93a1.57,1.57,0,0,1-1.56,1,3,3,0,0,1-1.87-1.1,17.47,17.47,0,0,0-3.12-2.36,18.29,18.29,0,0,0-5-2.36,20.82,20.82,0,0,0-7.48-1.1,24.07,24.07,0,0,0-7.8,1.25,17.12,17.12,0,0,0-5.61,3.46,13.89,13.89,0,0,0-3.42,5,15.72,15.72,0,0,0-1.25,6,13.38,13.38,0,0,0,1.71,6.92,14.5,14.5,0,0,0,4.68,4.56,25.46,25.46,0,0,0,6.54,3.15c2.5.94,5,1.73,7.48,2.51a72.69,72.69,0,0,1,7.48,2.83,29.81,29.81,0,0,1,6.55,3.78,16.48,16.48,0,0,1,4.68,5.66,18.84,18.84,0,0,1,1.71,8.33,27.63,27.63,0,0,1-1.71,9.6,22,22,0,0,1-5,7.86,27.12,27.12,0,0,1-8.11,5.35,30.1,30.1,0,0,1-10.9,1.88,30.49,30.49,0,0,1-13.72-2.83,32.85,32.85,0,0,1-10.13-7.86l1.56-2.52a1.72,1.72,0,0,1,1.56-.78,1.84,1.84,0,0,1,1.25.63c.46.47,1.24,1.1,1.87,1.73.78.62,1.71,1.41,2.8,2.2a21.94,21.94,0,0,0,3.74,2.2,28.35,28.35,0,0,0,4.83,1.73,25.5,25.5,0,0,0,6.24.63,23.8,23.8,0,0,0,8.57-1.42,20.11,20.11,0,0,0,6.39-3.93,17.63,17.63,0,0,0,4.05-6,20.12,20.12,0,0,0,1.4-7.39,13.93,13.93,0,0,0-1.71-7.07,13.46,13.46,0,0,0-4.68-4.72,25.46,25.46,0,0,0-6.54-3.15c-2.5-.78-5-1.72-7.48-2.51s-5-1.73-7.48-2.67a24.84,24.84,0,0,1-6.55-3.78A19,19,0,0,1,461,999.8a20.42,20.42,0,0,1-1.72-8.8,19.71,19.71,0,0,1,1.56-7.87,21.45,21.45,0,0,1,4.52-6.76,24.7,24.7,0,0,1,7.32-4.72,28.28,28.28,0,0,1,10-1.73A29.26,29.26,0,0,1,494.07,972a31.35,31.35,0,0,1,9.2,6.13Z"/><path class="cls-7" d="M479.42,1054.53a30.57,30.57,0,0,1-14.18-3,34.61,34.61,0,0,1-10.44-8l-.47-.47,2-3.15a2.88,2.88,0,0,1,2.34-1.26,2.71,2.71,0,0,1,1.87.95c.47.47,1.24,1.1,2,1.73s1.72,1.41,2.65,2.2a19.15,19.15,0,0,0,3.59,2,20.67,20.67,0,0,0,4.67,1.58,23.42,23.42,0,0,0,6.08.63,22.25,22.25,0,0,0,8.26-1.42,19.94,19.94,0,0,0,6.08-3.77,15.6,15.6,0,0,0,3.74-5.67,17.89,17.89,0,0,0,1.24-7.07,12.22,12.22,0,0,0-1.55-6.61,14.76,14.76,0,0,0-4.37-4.4,25.49,25.49,0,0,0-6.23-3c-2.34-.78-4.83-1.73-7.48-2.51s-5.14-1.73-7.48-2.68a27.88,27.88,0,0,1-6.7-3.93,18.5,18.5,0,0,1-4.83-6.13,20.17,20.17,0,0,1-1.87-9.12,22,22,0,0,1,1.55-8.34,21.54,21.54,0,0,1,4.68-7.07,23.58,23.58,0,0,1,7.64-4.88,26.6,26.6,0,0,1,10.28-1.88,30.35,30.35,0,0,1,11.69,2,31.13,31.13,0,0,1,9.51,6.45l.46.47-1.71,3.3c-.94,1.73-2.65,2.2-4.83.16a20.46,20.46,0,0,0-3.12-2.2,34.11,34.11,0,0,0-4.83-2.36,22.18,22.18,0,0,0-7.17-1.1,21.86,21.86,0,0,0-7.48,1.26,15,15,0,0,0-5.3,3.3,16,16,0,0,0-3.27,4.72A14.06,14.06,0,0,0,465.4,991a11.1,11.1,0,0,0,1.56,6.29,14.61,14.61,0,0,0,4.36,4.4,33.61,33.61,0,0,0,6.23,3.15c2.34.78,4.84,1.72,7.48,2.51a61,61,0,0,1,7.48,2.83,28,28,0,0,1,6.71,3.93,18.29,18.29,0,0,1,4.83,6,18.93,18.93,0,0,1,1.87,8.8,27.65,27.65,0,0,1-1.87,10.07,23.9,23.9,0,0,1-5.3,8.18,26,26,0,0,1-8.42,5.5,30.9,30.9,0,0,1-10.91,1.89Zm-22.59-11.8a29.13,29.13,0,0,0,22.75,9.91,28.28,28.28,0,0,0,10.6-1.89,24.56,24.56,0,0,0,7.79-5,22.37,22.37,0,0,0,4.83-7.55,25.8,25.8,0,0,0,1.71-9.27,20.45,20.45,0,0,0-1.55-8,16,16,0,0,0-4.37-5.35,34.86,34.86,0,0,0-6.23-3.78c-2.34-.94-4.83-1.88-7.33-2.83-2.49-.78-5-1.73-7.48-2.51a39.42,39.42,0,0,1-6.7-3.3,16.05,16.05,0,0,1-4.83-4.88,13.36,13.36,0,0,1-1.87-7.39,18.26,18.26,0,0,1,1.25-6.45,14,14,0,0,1,3.58-5.34,15,15,0,0,1,5.92-3.62,22.08,22.08,0,0,1,8.11-1.26,23.48,23.48,0,0,1,7.79,1.1,25,25,0,0,1,8.42,5c.93.79,1.24.79,1.24.79a.59.59,0,0,0,.63-.47l1.09-2a28.25,28.25,0,0,0-8.26-5.5A30,30,0,0,0,483,971.18a26.45,26.45,0,0,0-9.66,1.73,22.32,22.32,0,0,0-7,4.41,18.9,18.9,0,0,0-5.76,14,18.92,18.92,0,0,0,1.71,8.34,15.21,15.21,0,0,0,4.37,5.5,27.41,27.41,0,0,0,6.23,3.62c2.34.94,4.83,1.88,7.32,2.67s5,1.57,7.48,2.52a29,29,0,0,1,6.7,3.3,13.64,13.64,0,0,1,4.84,5,14.5,14.5,0,0,1,1.87,7.71,21.87,21.87,0,0,1-1.41,7.86,15.92,15.92,0,0,1-4.2,6.29,22.28,22.28,0,0,1-6.71,4.24,26.09,26.09,0,0,1-8.88,1.58,28.71,28.71,0,0,1-6.54-.79,30.43,30.43,0,0,1-5-1.73,16,16,0,0,1-3.9-2.36c-1.09-.78-2-1.57-2.8-2.2a23,23,0,0,1-1.87-1.73c-.47-.47-.78-.47-.78-.47a.84.84,0,0,0-.78.47Z"/><path class="cls-7" d="M594.43,1011.75a50.08,50.08,0,0,1-2.8,17.15,36.41,36.41,0,0,1-20.1,21.7,42.66,42.66,0,0,1-31.33,0,34.74,34.74,0,0,1-12.15-8.5,37.25,37.25,0,0,1-7.95-13.2,53.71,53.71,0,0,1,0-34.29,36.34,36.34,0,0,1,20.1-21.7,42.53,42.53,0,0,1,31.33,0,34.94,34.94,0,0,1,12.15,8.49,37.3,37.3,0,0,1,8,13.21A50,50,0,0,1,594.43,1011.75Zm-6.07,0A47.67,47.67,0,0,0,586,996.34a32.93,32.93,0,0,0-6.55-11.48,28.47,28.47,0,0,0-10.28-7.23,36.09,36.09,0,0,0-26.5,0,28.47,28.47,0,0,0-10.28,7.23,33.2,33.2,0,0,0-6.7,11.48,47,47,0,0,0-2.34,15.41,47.73,47.73,0,0,0,2.34,15.42,30.89,30.89,0,0,0,6.7,11.47,28.37,28.37,0,0,0,10.28,7.24,33.22,33.22,0,0,0,13.25,2.51,32.68,32.68,0,0,0,13.25-2.51,26.57,26.57,0,0,0,10.28-7.24,32.79,32.79,0,0,0,6.55-11.47A47.09,47.09,0,0,0,588.36,1011.75Z"/><path class="cls-7" d="M555.94,1054.37a42.66,42.66,0,0,1-15.89-3,34.79,34.79,0,0,1-12.47-8.65,38.9,38.9,0,0,1-8.11-13.52,51.88,51.88,0,0,1-3-17.46,48.36,48.36,0,0,1,3-17.45,40.43,40.43,0,0,1,8.11-13.52,34.79,34.79,0,0,1,12.47-8.65A40.3,40.3,0,0,1,555.94,969a43.52,43.52,0,0,1,16,3,34.88,34.88,0,0,1,12.47,8.65,40.41,40.41,0,0,1,8.1,13.52,55.62,55.62,0,0,1,0,34.91,40.56,40.56,0,0,1-8.1,13.53,35.13,35.13,0,0,1-12.47,8.65A38.53,38.53,0,0,1,555.94,1054.37Zm0-83.34a37.89,37.89,0,0,0-15.27,3,34.31,34.31,0,0,0-19.48,21.08,51.78,51.78,0,0,0,0,33.65,39,39,0,0,0,7.64,12.89,34.85,34.85,0,0,0,11.84,8.18,42.61,42.61,0,0,0,30.54,0,35,35,0,0,0,11.85-8.18,36,36,0,0,0,7.63-12.73,51.81,51.81,0,0,0,0-33.66,39.13,39.13,0,0,0-7.63-12.89,35,35,0,0,0-11.85-8.18A34.53,34.53,0,0,0,555.94,971Z"/><path class="cls-7" d="M640.72,1048.39a24.73,24.73,0,0,0,10.44-2,23.78,23.78,0,0,0,7.79-5.66,24.1,24.1,0,0,0,4.83-8.49,33.43,33.43,0,0,0,1.72-10.7V971h5.76v50.47a36.8,36.8,0,0,1-2.18,12.58,30.55,30.55,0,0,1-6.08,10.23,28.27,28.27,0,0,1-9.66,6.76,33.12,33.12,0,0,1-12.78,2.51,30,30,0,0,1-22.44-9.27,29.2,29.2,0,0,1-6.08-10.23,37.45,37.45,0,0,1-2.18-12.58V971h5.92v50.32A34.16,34.16,0,0,0,617.5,1032a25,25,0,0,0,4.83,8.49,23.64,23.64,0,0,0,7.79,5.66,23.2,23.2,0,0,0,10.6,2.2Z"/><path class="cls-7" d="M640.72,1054.53a31.88,31.88,0,0,1-13.09-2.52,28.82,28.82,0,0,1-10-7.08,32.28,32.28,0,0,1-6.23-10.53,38.67,38.67,0,0,1-2.18-12.9V970.08H617v51.42a37.76,37.76,0,0,0,1.56,10.38,23.35,23.35,0,0,0,4.67,8.18,22.07,22.07,0,0,0,7.48,5.5,25.1,25.1,0,0,0,10,2,24.49,24.49,0,0,0,10-1.89,20.18,20.18,0,0,0,7.48-5.5,24.72,24.72,0,0,0,4.68-8.18,31.49,31.49,0,0,0,1.56-10.38V970.08h7.79v51.42a38.67,38.67,0,0,1-2.18,12.9,32.48,32.48,0,0,1-6.24,10.53,28.88,28.88,0,0,1-10,7.08,31.92,31.92,0,0,1-13.09,2.52ZM611,972v49.53a35,35,0,0,0,2,12.27,28.85,28.85,0,0,0,5.92,9.91,25.26,25.26,0,0,0,9.35,6.6,31,31,0,0,0,12.47,2.36,32.05,32.05,0,0,0,12.46-2.36,26.68,26.68,0,0,0,9.36-6.6,31.78,31.78,0,0,0,5.92-9.91,35,35,0,0,0,2-12.27V972h-3.89v49.53a36.16,36.16,0,0,1-1.72,11,23.24,23.24,0,0,1-5.14,9,23.75,23.75,0,0,1-8.1,6,26.24,26.24,0,0,1-10.76,2.2,25.69,25.69,0,0,1-10.75-2.2,23.66,23.66,0,0,1-8.1-6,25.17,25.17,0,0,1-5-9,35.4,35.4,0,0,1-1.71-11V972Z"/><path class="cls-7" d="M704.77,1011.28h4.83a35.85,35.85,0,0,0,9.66-1.26,19.42,19.42,0,0,0,7.17-3.77,16.25,16.25,0,0,0,4.52-5.82,17.44,17.44,0,0,0,1.56-7.7c0-5.82-1.87-10.07-5.61-12.9s-9.2-4.24-16.52-4.24H695.57v77.05h-5.76V971h20.57c9.35,0,16.36,1.88,20.88,5.5,4.68,3.62,6.86,9,6.86,16a21.17,21.17,0,0,1-1.56,8.49,20,20,0,0,1-4.68,6.76,22.71,22.71,0,0,1-7.16,4.72,35.48,35.48,0,0,1-9.51,2.52,11.75,11.75,0,0,1,2.49,2.36l27.9,35.22h-5.15a4.68,4.68,0,0,1-1.55-.31,2.79,2.79,0,0,1-1.25-1.1l-25.87-33a6.67,6.67,0,0,0-2-1.73,43,43,0,0,0-6.85-.47c0-4.25.62-4.72,1.87-4.72Z"/><path class="cls-7" d="M747.62,1053.58h-7a4.25,4.25,0,0,1-2-.47,4.31,4.31,0,0,1-1.55-1.41l-25.87-33a5.33,5.33,0,0,0-1.72-1.57,50.7,50.7,0,0,0-6.39-.47h-.93v-4.09c0-1.41.31-2.2,2.8-2.2h5a34.32,34.32,0,0,0,9.35-1.26,19.05,19.05,0,0,0,6.86-3.62,14.47,14.47,0,0,0,4.21-5.5,18.21,18.21,0,0,0,1.4-7.23c0-5.51-1.72-9.44-5.3-12.11s-8.88-4.09-15.9-4.09H696.66v76.9h-7.79V970.08h21.66q14.26,0,21.51,5.66c4.83,3.78,7.32,9.44,7.32,16.83a23.53,23.53,0,0,1-1.71,9,22,22,0,0,1-4.83,7.08,28.6,28.6,0,0,1-7.48,5,36.3,36.3,0,0,1-7.64,2.2,7.15,7.15,0,0,1,.94,1.1Zm-43.79-38.84c1.72,0,5.3.16,6.24.63a7.52,7.52,0,0,1,2.49,2l25.87,33a2.59,2.59,0,0,0,.93.94,2.26,2.26,0,0,0,1.09.32h3.12L716.92,1018a6.63,6.63,0,0,0-2.18-2l-2.34-1.42,2.81-.31a27.62,27.62,0,0,0,16.21-6.92,18.2,18.2,0,0,0,4.36-6.45,23.2,23.2,0,0,0,1.56-8.17c0-6.77-2.18-11.8-6.55-15.26s-11.22-5.19-20.26-5.19H690.9v79.42h3.89V974.64h15.74c7.33,0,13.09,1.42,17,4.41q6.08,4.47,6.08,13.68a21.74,21.74,0,0,1-1.56,8,16.52,16.52,0,0,1-4.83,6.13,21.2,21.2,0,0,1-7.48,3.93,32.68,32.68,0,0,1-10,1.42h-4.83c-.46,0-.93,0-1.09,2.51Z"/><path class="cls-7" d="M814,1038.8a1.13,1.13,0,0,1,.94.47l2.33,2.52a39.49,39.49,0,0,1-5.45,4.87,32.86,32.86,0,0,1-6.55,3.62,53.17,53.17,0,0,1-7.63,2.36,42.63,42.63,0,0,1-9.2.79,38.08,38.08,0,0,1-15.42-3,32.88,32.88,0,0,1-12-8.49,39.32,39.32,0,0,1-7.8-13.21,50,50,0,0,1-2.8-17.14,46.21,46.21,0,0,1,3-17,39.1,39.1,0,0,1,8.1-13.21A36.9,36.9,0,0,1,774,972.91a41.22,41.22,0,0,1,16-3,39.81,39.81,0,0,1,15,2.52,26.85,26.85,0,0,1,5.92,3.15,46.27,46.27,0,0,1,5.46,4.24l-1.72,2.52a1.51,1.51,0,0,1-1.4.63,1.84,1.84,0,0,1-1.24-.63c-.47-.47-1.25-1-2-1.57a24.6,24.6,0,0,0-3-1.89,13,13,0,0,0-4.21-1.89,35.1,35.1,0,0,0-5.61-1.57,33,33,0,0,0-7.17-.63,34.21,34.21,0,0,0-13.56,2.52,29.32,29.32,0,0,0-10.59,7.39,35.38,35.38,0,0,0-7,11.48,42.36,42.36,0,0,0-2.49,15.25,43.26,43.26,0,0,0,2.49,15.41,33.55,33.55,0,0,0,6.86,11.48,30.86,30.86,0,0,0,23.07,9.75,39.25,39.25,0,0,0,7.79-.63,23.88,23.88,0,0,0,6.23-1.73,28.76,28.76,0,0,0,5.3-2.83,28.22,28.22,0,0,0,4.83-3.93c.16-.16.31-.32.47-.32C813.54,1039,813.86,1038.8,814,1038.8Z"/><path class="cls-7" d="M788.3,1054.53a41.83,41.83,0,0,1-15.74-3,34.31,34.31,0,0,1-12.31-8.65,38.68,38.68,0,0,1-8-13.52,51.9,51.9,0,0,1-2.81-17.46,47.54,47.54,0,0,1,3-17.3,43.77,43.77,0,0,1,8.26-13.52,38.28,38.28,0,0,1,12.78-8.81A43.07,43.07,0,0,1,790,969.14a45.38,45.38,0,0,1,8.26.63,46.29,46.29,0,0,1,7,1.88,27.79,27.79,0,0,1,6.08,3.15,45.84,45.84,0,0,1,5.61,4.4l.63.63-2.19,3.3a2.42,2.42,0,0,1-2.18,1.1,3.69,3.69,0,0,1-1.87-.78,12.32,12.32,0,0,0-2-1.42,16.31,16.31,0,0,0-3-1.88,33.37,33.37,0,0,0-4-1.89,25.44,25.44,0,0,0-5.45-1.42,45.22,45.22,0,0,0-7-.63,33.15,33.15,0,0,0-13.25,2.52,30.06,30.06,0,0,0-17,18.24,44.51,44.51,0,0,0-2.49,14.78,45,45,0,0,0,2.34,15.1A31.63,31.63,0,0,0,766,1038a27.93,27.93,0,0,0,10,6.91,31.87,31.87,0,0,0,12.46,2.36,36.93,36.93,0,0,0,7.64-.63,24.47,24.47,0,0,0,6.08-1.73,38.1,38.1,0,0,0,5.14-2.67,49.88,49.88,0,0,0,4.68-3.77,1.89,1.89,0,0,1,.77-.47,2.13,2.13,0,0,1,2.65.47l3,3.14-.62.63a42,42,0,0,1-5.61,5,31.16,31.16,0,0,1-6.7,3.78,29.84,29.84,0,0,1-8,2.36A33,33,0,0,1,788.3,1054.53ZM790,971a39.16,39.16,0,0,0-15.74,3,33,33,0,0,0-12.15,8.34,37.25,37.25,0,0,0-8,12.89,47.33,47.33,0,0,0-2.81,16.67,48.24,48.24,0,0,0,2.81,16.83,39.13,39.13,0,0,0,7.63,12.89,34.35,34.35,0,0,0,11.69,8.18,38.42,38.42,0,0,0,15,2.83,52.82,52.82,0,0,0,9-.79,34.09,34.09,0,0,0,13.72-6,39.62,39.62,0,0,0,4.67-4.09l-1.71-1.89a.3.3,0,0,0-.47,0c-.16,0-.31.16-.47.32a39.32,39.32,0,0,1-5,3.93,37,37,0,0,1-5.45,3,30.32,30.32,0,0,1-6.39,1.73,59.1,59.1,0,0,1-7.95.63,36.81,36.81,0,0,1-13.24-2.52,31.6,31.6,0,0,1-10.6-7.39,35.29,35.29,0,0,1-7-11.8,50.2,50.2,0,0,1,0-31.29,33,33,0,0,1,7.17-11.79,30.89,30.89,0,0,1,10.9-7.55,36.38,36.38,0,0,1,13.87-2.67,35.54,35.54,0,0,1,7.33.62,44.14,44.14,0,0,1,5.76,1.58,22.55,22.55,0,0,1,4.37,2,32.86,32.86,0,0,1,3,2c.78.62,1.56,1.1,2,1.57s.77.47.77.47c.47,0,.63-.16.63-.31l1.24-1.89A45,45,0,0,0,810,977a28.77,28.77,0,0,0-5.77-3,42,42,0,0,0-6.7-1.88A21.33,21.33,0,0,0,790,971Z"/><path class="cls-7" d="M880.55,971v4.87H838v33h35.38v4.72H838v34h42.54v4.87H832.09V971Z"/><path class="cls-7" d="M881.49,1053.58H831.15v-83.5h50.34v6.76H839v31h35.37v6.76H839v32.08h42.54ZM833,1051.7h46.6v-3H837.08v-36h35.37v-2.83H837.08V975h42.54v-3H833Z"/><path class="cls-7" d="M953,970.08v12.43H916.4v24.68h30.85v12.43H916.4v34H901V970.24h52Zm89.61,41.67a46.42,46.42,0,0,1-3,17,39,39,0,0,1-8.58,13.52,39.59,39.59,0,0,1-13.24,9,43.29,43.29,0,0,1-17.14,3.14,46.37,46.37,0,0,1-17.15-3.14,39.26,39.26,0,0,1-21.81-22.49,50.21,50.21,0,0,1,0-34,39,39,0,0,1,8.57-13.52,41.27,41.27,0,0,1,13.24-9,47.26,47.26,0,0,1,34.29.16,39.24,39.24,0,0,1,21.82,22.49A48.17,48.17,0,0,1,1042.63,1011.75Zm-15.74,0a37.67,37.67,0,0,0-1.87-12.26,27.92,27.92,0,0,0-5.15-9.28,22.53,22.53,0,0,0-8.25-5.82,30.19,30.19,0,0,0-21.82,0,23.89,23.89,0,0,0-8.26,5.82,25.77,25.77,0,0,0-5.3,9.28,41.16,41.16,0,0,0,0,24.53,25.77,25.77,0,0,0,5.3,9.28,24.12,24.12,0,0,0,8.26,5.82,30.19,30.19,0,0,0,21.82,0,24,24,0,0,0,8.25-5.82A27.92,27.92,0,0,0,1025,1024,37.73,37.73,0,0,0,1026.89,1011.75Zm47.84-1.88h3.58a22.52,22.52,0,0,0,7.64-1.1,15.68,15.68,0,0,0,5.3-3,11.46,11.46,0,0,0,3.12-4.56,17.65,17.65,0,0,0,1.09-5.82c0-4.25-1.41-7.55-4.21-9.75s-7-3.46-12.62-3.46h-10v71.39h-15.43V970.24h25.25A55.53,55.53,0,0,1,1093,972a26.26,26.26,0,0,1,9.82,4.87,18.85,18.85,0,0,1,5.61,7.55,24.64,24.64,0,0,1,1.87,9.75,26.93,26.93,0,0,1-1.25,8,21.31,21.31,0,0,1-3.58,6.76,23.54,23.54,0,0,1-5.77,5.35,25.45,25.45,0,0,1-7.79,3.62,15.77,15.77,0,0,1,5,4.71l20.73,30.83h-13.87a7.36,7.36,0,0,1-3.43-.79,5.6,5.6,0,0,1-2.34-2.36l-17.3-26.73c-.47-.63-1.4-2-2.34-2.2h-5.45v-10.07c.16-1.41,1.87-1.41,1.87-1.41Zm88,32.23a38,38,0,0,0,9-.94,41.15,41.15,0,0,0,7.32-2.67v-18.87h-10.28a3.81,3.81,0,0,1-2.34-.79,2.8,2.8,0,0,1-.94-2V1008H1193v37.43a34.6,34.6,0,0,1-6.54,3.93,51.4,51.4,0,0,1-7.33,2.83,45.43,45.43,0,0,1-8.26,1.57,88,88,0,0,1-9.5.47,44.46,44.46,0,0,1-16.68-3.14,36.49,36.49,0,0,1-13.09-8.81,42,42,0,0,1-8.73-13.52,47.31,47.31,0,0,1-3.11-17.14,47.93,47.93,0,0,1,3-17.3,39,39,0,0,1,8.57-13.52,38.17,38.17,0,0,1,13.56-8.81,48.88,48.88,0,0,1,17.76-3.15c6.7,0,12.63.95,17.46,3a41.51,41.51,0,0,1,12.46,7.86l-4.51,7.08a4.09,4.09,0,0,1-3.43,2.2,5,5,0,0,1-2.81-.94c-1.25-.79-2.49-1.42-3.74-2.2a17,17,0,0,0-4.21-1.89,42.05,42.05,0,0,0-5.14-1.26,42.52,42.52,0,0,0-6.7-.47,27.85,27.85,0,0,0-11.06,2,23,23,0,0,0-8.42,6,25.77,25.77,0,0,0-5.3,9.28,37.12,37.12,0,0,0-1.87,12.11,36.5,36.5,0,0,0,2,12.89,27.91,27.91,0,0,0,5.61,9.44,25.62,25.62,0,0,0,8.57,6,29,29,0,0,0,11.22,2.2Zm84.62-22.17h-25.09V1041h36.47v12.43h-52V970.08h52v12.43h-36.63v25.63H1251v9.12s-.15,2.67-3.58,2.67Z"/><path class="cls-7" d="M283.92,706.32A49.64,49.64,0,0,1,265,687.75a54.62,54.62,0,0,1,0-53.54,49.86,49.86,0,0,1,18.93-18.49,55,55,0,0,1,52.45,0,48.87,48.87,0,0,1,18.79,18.49,55.42,55.42,0,0,1-.07,53.54,49.34,49.34,0,0,1-18.79,18.57,54.9,54.9,0,0,1-52.38,0Zm45.66-24q7.31-8.08,7.3-21.38t-7.3-21.46q-7.29-8-19.43-8t-19.58,7.95q-7.31,8-7.3,21.53t7.3,21.46q7.29,8,19.58,8Q322.29,690.43,329.58,682.33Z"/><path class="cls-7" d="M447.57,659.29a29.73,29.73,0,0,1-12.43,11.77q-8.38,4.49-20.81,4.48H399V712H374.3V610.52h40q12.14,0,20.52,4.19a28.7,28.7,0,0,1,12.57,11.56,33.63,33.63,0,0,1,4.19,16.9A32.78,32.78,0,0,1,447.57,659.29ZM423,652.57q3.47-3.33,3.47-9.4T423,633.78q-3.47-3.31-10.55-3.32H399v25.43h13.44Q419.53,655.89,423,652.57Z"/><path class="cls-7" d="M487.74,630.31v20.52h33.09v19.08H487.74v22.25h37.42V712H463V610.52h62.13v19.79Z"/><path class="cls-7" d="M630.5,712H605.79l-41.32-62.57V712H539.76V610.52h24.71l41.32,62.85V610.52H630.5Z"/><path class="cls-7" d="M692.92,709.36a32.6,32.6,0,0,1-14.09-10.7,29.22,29.22,0,0,1-5.56-17h26.3q.57,5.64,3.9,8.6a12.58,12.58,0,0,0,8.67,3q5.49,0,8.67-2.53a8.45,8.45,0,0,0,3.18-7,8.28,8.28,0,0,0-2.53-6.21,20.55,20.55,0,0,0-6.21-4,100.5,100.5,0,0,0-10.48-3.61,112.76,112.76,0,0,1-16-6.07,29.56,29.56,0,0,1-10.69-9q-4.49-5.91-4.48-15.46,0-14.16,10.26-22.18t26.73-8q16.77,0,27,8t11,22.33H721.82a10.4,10.4,0,0,0-3.61-7.73,12.73,12.73,0,0,0-8.52-2.82,10.63,10.63,0,0,0-7.23,2.38,8.61,8.61,0,0,0-2.75,6.87,8.51,8.51,0,0,0,4.63,7.66q4.62,2.73,14.45,5.92a135.38,135.38,0,0,1,16,6.36,30.45,30.45,0,0,1,10.62,8.81q4.47,5.79,4.48,14.88a29.22,29.22,0,0,1-4.41,15.76,30.91,30.91,0,0,1-12.79,11.27q-8.38,4.19-19.8,4.19A52.34,52.34,0,0,1,692.92,709.36Z"/><path class="cls-7" d="M785.33,706.32a49.64,49.64,0,0,1-18.93-18.57,54.62,54.62,0,0,1,0-53.54,49.86,49.86,0,0,1,18.93-18.49,55,55,0,0,1,52.45,0,48.87,48.87,0,0,1,18.79,18.49,55.42,55.42,0,0,1-.07,53.54,49.34,49.34,0,0,1-18.79,18.57,54.9,54.9,0,0,1-52.38,0Zm45.66-24q7.29-8.08,7.3-21.38T831,639.49q-7.29-8-19.43-8T792,639.42q-7.31,8-7.3,21.53t7.3,21.46q7.29,8,19.58,8Q823.7,690.43,831,682.33Z"/><path class="cls-7" d="M900,610.52v60.69q0,9.1,4.48,14t13.15,4.92q8.67,0,13.29-4.92t4.63-14V610.52h24.71v60.54q0,13.59-5.78,23a37,37,0,0,1-15.54,14.16A48.82,48.82,0,0,1,917.19,713a47.64,47.64,0,0,1-21.46-4.7,35,35,0,0,1-15-14.16q-5.49-9.47-5.49-23.05V610.52Z"/><path class="cls-7" d="M1027.87,712l-21.1-38.3h-5.92V712H976.14V610.52h41.47q12,0,20.45,4.19a28.82,28.82,0,0,1,12.64,11.49,32.06,32.06,0,0,1,4.19,16.25,30.18,30.18,0,0,1-5.7,18.06q-5.71,7.95-16.84,11.28L1055.76,712Zm-27-55.78h15.32q6.78,0,10.18-3.33t3.4-9.39q0-5.78-3.4-9.1T1016.17,631h-15.32Z"/><path class="cls-7" d="M1071.22,634.29a46.44,46.44,0,0,1,18.13-18.35,52.61,52.61,0,0,1,26.37-6.58q18.08,0,30.93,9.54t17.19,26h-27.16a21.94,21.94,0,0,0-8.6-9.68,24.13,24.13,0,0,0-12.64-3.33q-11.42,0-18.5,8t-7.08,21.24q0,13.3,7.08,21.24t18.5,7.95a24.12,24.12,0,0,0,12.64-3.32,22,22,0,0,0,8.6-9.68h27.16q-4.33,16.47-17.19,25.93t-30.93,9.47a52.61,52.61,0,0,1-26.37-6.58,46.53,46.53,0,0,1-18.13-18.28,58.35,58.35,0,0,1,0-53.53Z"/><path class="cls-7" d="M1202.28,630.31v20.52h33.09v19.08h-33.09v22.25h37.43V712h-62.14V610.52h62.14v19.79Z"/><path class="cls-7" d="M348.8,754.41v20.52h33.09V794H348.8v22.25h37.43v19.8H324.09V734.62h62.14v19.79Z"/><path class="cls-7" d="M463,836.06,442.29,805l-18.21,31.07h-28l32.51-51.59-33.23-49.85h28.75l20.38,30.63,17.92-30.63h28l-32.22,51.15,33.52,50.29Z"/><path class="cls-7" d="M505,758.39A46.47,46.47,0,0,1,523.14,740a52.55,52.55,0,0,1,26.37-6.58q18.06,0,30.92,9.54t17.2,26H570.46a21.92,21.92,0,0,0-8.59-9.68A24.16,24.16,0,0,0,549.22,756q-11.41,0-18.5,8t-7.07,21.24q0,13.3,7.07,21.24t18.5,8a24.16,24.16,0,0,0,12.65-3.32,22,22,0,0,0,8.59-9.68h27.17q-4.33,16.47-17.2,25.93t-30.92,9.47a52.55,52.55,0,0,1-26.37-6.58A46.55,46.55,0,0,1,505,811.92a58.35,58.35,0,0,1,0-53.53Z"/><path class="cls-7" d="M636.07,754.41v20.52h33.09V794H636.07v22.25h37.42v19.8H611.36V734.62h62.13v19.79Z"/><path class="cls-7" d="M712.79,817h32.37v19.08H688.08V734.62h24.71Z"/><path class="cls-7" d="M780.27,817h32.37v19.08H755.56V734.62h24.71Z"/><path class="cls-7" d="M847.75,754.41v20.52h33.09V794H847.75v22.25h37.43v19.8H823V734.62h62.14v19.79Z"/><path class="cls-7" d="M990.52,836.06H965.81l-41.33-62.57v62.57H899.77V734.62h24.71l41.33,62.85V734.62h24.71Z"/><path class="cls-7" d="M1009.3,758.39A46.47,46.47,0,0,1,1027.44,740a52.55,52.55,0,0,1,26.37-6.58q18.06,0,30.92,9.54t17.2,26h-27.17a21.88,21.88,0,0,0-8.6-9.68,24.13,24.13,0,0,0-12.64-3.33q-11.41,0-18.5,8t-7.08,21.24q0,13.3,7.08,21.24t18.5,8a24.12,24.12,0,0,0,12.64-3.32,21.93,21.93,0,0,0,8.6-9.68h27.17q-4.34,16.47-17.2,25.93t-30.92,9.47a52.55,52.55,0,0,1-26.37-6.58,46.55,46.55,0,0,1-18.14-18.28,58.35,58.35,0,0,1,0-53.53Z"/><path class="cls-7" d="M1140.36,754.41v20.52h33.09V794h-33.09v22.25h37.43v19.8h-62.14V734.62h62.14v19.79Z"/><path class="cls-6" d="M721.49,266.64c13,9.82,22.79,21.29,28.48,35.88,6,15.41,4.15,30.05-5.22,43.6-4,5.81-8.69,11.17-12.94,16.83-6.22,8.27-10.66,17.25-10.95,30.09,4.82-6.19,8.22-11.05,12.12-15.48,9.53-10.85,19.65-21.2,28.87-32.31,25.76-31,26.58-63.79,2.86-96.36-1-1.36-2-2.71-4-5.44,3.07.85,4.78,1.09,6.28,1.78,34.34,15.75,52.11,49,44.26,82.78-3.14,13.48-10.79,24.26-20.71,33.43-8.52,7.88-18.07,14.68-26.28,22.85-9.17,9.13-17.17,19.43-26.07,28.84-2.16,2.29-5.81,4.31-8.84,4.42-13.3.47-26.63-.05-39.94.32-5.59.16-8.67-1.81-11.26-6.74-14.13-26.86-11-58.72,8.52-81.88a101.39,101.39,0,0,1,11.2-11.59C713.4,304.32,725,289.27,721.49,266.64Z"/><path class="cls-6" d="M682.68,463V433.67H804.15V463Z"/><path class="cls-6" d="M751.87,415.48c5.55-6.17,10.58-13,16.79-18.37,7.63-6.64,16.09-12.38,24.49-18.08C807,369.6,819.93,359.31,827,342.66c9.37,23.59-.68,59.22-19.87,72.94-1.12.8-2.45,1.83-3.69,1.84-16.79.12-33.58.08-50.37.08Z"/><path class="cls-6" d="M728.57,547.28V475h29.54v72.31Z"/></svg>
\ No newline at end of file

diff --git a/scst/www/images/sourceforge_badges/oss-sf-favorite-white.svg b/scst/www/images/sourceforge_badges/oss-sf-favorite-white.svg
new file mode 100644
index 0000000..5a4aa0b
--- /dev/null
+++ b/scst/www/images/sourceforge_badges/oss-sf-favorite-white.svg

@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1500 1500"><defs><style>.cls-1{fill:#898989;}.cls-2{fill:#eae9ee;stroke:#c7c2bd;}.cls-2,.cls-5{stroke-miterlimit:10;stroke-width:5.1px;}.cls-3{fill:#fff;}.cls-4{fill:#3f3f3f;}.cls-5{stroke:#898989;fill:url(#linear-gradient);}.cls-6{fill:#ff6700;}</style><linearGradient id="linear-gradient" x1="1319.56" y1="1079.42" x2="89.91" y2="1079.42" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#fffefe"/><stop offset="1" stop-color="#eaeaec"/></linearGradient></defs><polygon class="cls-1" points="1421.26 928.81 78.74 928.81 200.73 861.72 1299.27 861.72 1421.26 928.81"/><path class="cls-2" d="M1373,738.52a24.45,24.45,0,0,1-1.84-35.31,68.92,68.92,0,0,0-18-108.11,24.45,24.45,0,0,1-9.94-33.93,68.92,68.92,0,0,0-42.49-101,24.45,24.45,0,0,1-17.51-30.72,68.93,68.93,0,0,0-64.67-88.49,24.44,24.44,0,0,1-24.13-25.84A68.93,68.93,0,0,0,1111,243.92a24.44,24.44,0,0,1-29.45-19.57,68.92,68.92,0,0,0-97.53-50,24.44,24.44,0,0,1-33.17-12.24A68.94,68.94,0,0,0,844.41,136a24.44,24.44,0,0,1-35.1-4.25,68.87,68.87,0,0,0-108-2.76,24.07,24.07,0,0,1-33.38,3.62,68.87,68.87,0,0,0-105.06,25.14,24.46,24.46,0,0,1-33.38,11.69A68.92,68.92,0,0,0,430.8,217.1,24.45,24.45,0,0,1,401,236.18a68.91,68.91,0,0,0-85,69.16,24.46,24.46,0,0,1-24.57,25.44,68.93,68.93,0,0,0-66.75,86.93,24.45,24.45,0,0,1-18,30.42,68.92,68.92,0,0,0-44.88,100,24.44,24.44,0,0,1-10.51,33.76,68.93,68.93,0,0,0-20.57,107.65,24.45,24.45,0,0,1-2.43,35.28,68.93,68.93,0,0,0,4.85,109.49,24.46,24.46,0,0,1,5.78,34.89,68.92,68.92,0,0,0,30,105.41,24.45,24.45,0,0,1,13.68,32.61,68.93,68.93,0,0,0,53.53,95.64A24.44,24.44,0,0,1,257,1131.41a68.92,68.92,0,0,0,74.17,80.69,24.47,24.47,0,0,1,26.88,23,68.93,68.93,0,0,0,90.8,61.39,24.44,24.44,0,0,1,31.45,16.15,68.92,68.92,0,0,0,102.52,38.75,24.45,24.45,0,0,1,34.33,8.46,68.93,68.93,0,0,0,108.7,14,24.45,24.45,0,0,1,35.36.3,68.92,68.92,0,0,0,109-11.45,24.46,24.46,0,0,1,34.48-7.88,68.92,68.92,0,0,0,103.41-36.31,24.45,24.45,0,0,1,31.72-15.62,68.93,68.93,0,0,0,92.23-59.21,24.45,24.45,0,0,1,27.25-22.53,68.92,68.92,0,0,0,76.07-78.91,24.44,24.44,0,0,1,21.31-28.21,68.92,68.92,0,0,0,55.79-94.34,24.45,24.45,0,0,1,14.23-32.37,68.93,68.93,0,0,0,32.49-104.68,24.45,24.45,0,0,1,6.37-34.78A68.93,68.93,0,0,0,1373,738.52Z"/><circle class="cls-1" cx="750" cy="750" r="511.86" transform="translate(-310.66 750) rotate(-45)"/><circle class="cls-3" cx="750" cy="750" r="479.87" transform="translate(-310.66 750) rotate(-45)"/><path class="cls-4" d="M423.52,724.77v23.29H374.9v25.16h36.38v22.61H374.9v48.28H345.83V724.77Z"/><path class="cls-4" d="M513.11,823H468.57l-7.14,21.08H431l43.18-119.34h33.66L551,844.11H520.25Zm-7.48-22.44L490.84,756.9l-14.62,43.69Z"/><path class="cls-4" d="M676.14,724.77,633.81,844.11H597.43L555.1,724.77H586l29.58,90.1,29.75-90.1Z"/><path class="cls-4" d="M713.79,837.48a58.44,58.44,0,0,1-22.27-21.84,64.31,64.31,0,0,1,0-63,58.67,58.67,0,0,1,22.27-21.76,64.78,64.78,0,0,1,61.71,0,57.39,57.39,0,0,1,22.1,21.76,65.14,65.14,0,0,1-.08,63,58,58,0,0,1-22.1,21.84,64.62,64.62,0,0,1-61.63,0Zm53.72-28.22q8.6-9.53,8.59-25.16,0-15.81-8.59-25.24t-22.86-9.44q-14.45,0-23,9.35T713,784.1q0,15.81,8.58,25.24t23,9.44Q758.93,818.78,767.51,809.26Z"/><path class="cls-4" d="M882.69,844.11l-24.82-45.05h-7v45.05H821.83V724.77h48.79q14.1,0,24,4.93a34,34,0,0,1,14.88,13.51,37.81,37.81,0,0,1,4.93,19.13,35.5,35.5,0,0,1-6.72,21.25q-6.71,9.35-19.8,13.26l27.54,47.26ZM850.9,778.49h18q8,0,12-3.91t4-11.05q0-6.79-4-10.71t-12-3.91h-18Z"/><path class="cls-4" d="M961.74,724.77V844.11H932.67V724.77Z"/><path class="cls-4" d="M1068.67,724.77v23.29h-31.62v96.05H1008V748.06H976.36V724.77Z"/><path class="cls-4" d="M1112.36,748.06V772.2h38.93v22.44h-38.93v26.18h44v23.29h-73.1V724.77h73.1v23.29Z"/><path class="cls-5" d="M1421.26,928.81H78.74l33.72,166.55A168,168,0,0,0,277.13,1230h945.74a168,168,0,0,0,164.67-134.66Z"/><path class="cls-6" d="M345.4,1082.67c0-30-10.59-43.72-16.2-48.91a1.6,1.6,0,0,0-2.65,1.42c1.09,17-20.11,21.23-20.11,47.8v.16c0,16.2,12.16,29.41,27.12,29.41s27.11-13.21,27.11-29.41V1083c0-7.55-2.8-14.78-5.61-20.13-.62-1.1-2.18-.63-2,.32C358.18,1086.13,345.4,1100.28,345.4,1082.67Z"/><path class="cls-6" d="M313.14,1146a2.84,2.84,0,0,1-1.87-.79l-69.34-70a2.88,2.88,0,0,1,0-3.78l73.24-73.91a3.29,3.29,0,0,1,1.71-.63h21a2.46,2.46,0,0,1,2.34,1.58,2.58,2.58,0,0,1-.62,2.83l-68.73,69.5a3.55,3.55,0,0,0,0,5l54.39,55a2.91,2.91,0,0,1,0,3.78l-10.44,10.69a3.21,3.21,0,0,1-1.72.63ZM327,1158.31a2.45,2.45,0,0,1-2.33-1.58,2.54,2.54,0,0,1,.62-2.83l68.88-69.5a3.76,3.76,0,0,0,1.09-2.52,3.12,3.12,0,0,0-1.09-2.51l-54.54-55a2.88,2.88,0,0,1,0-3.78l10.59-10.69a2.58,2.58,0,0,1,1.87-.79,2.36,2.36,0,0,1,1.72.95L423,1080a2.64,2.64,0,0,1,.78,1.89,2.6,2.6,0,0,1-.78,1.89l-73.24,73.91a2.57,2.57,0,0,1-1.87.78H327Z"/><path class="cls-4" d="M501.71,1047.29a1.57,1.57,0,0,1-1.56.94,3,3,0,0,1-1.87-1.1,18,18,0,0,0-3.12-2.36,18.59,18.59,0,0,0-5-2.36,20.82,20.82,0,0,0-7.48-1.1,23.8,23.8,0,0,0-7.8,1.26,16.8,16.8,0,0,0-5.61,3.46,13.77,13.77,0,0,0-3.42,5,15.77,15.77,0,0,0-1.25,6,13.31,13.31,0,0,0,1.71,6.91,14.4,14.4,0,0,0,4.68,4.56,25.46,25.46,0,0,0,6.54,3.15c2.5.94,5,1.73,7.48,2.52a70.45,70.45,0,0,1,7.48,2.83,29.78,29.78,0,0,1,6.55,3.77,16.58,16.58,0,0,1,4.68,5.66,18.85,18.85,0,0,1,1.71,8.34,27.58,27.58,0,0,1-1.71,9.59,21.89,21.89,0,0,1-5,7.86,27.12,27.12,0,0,1-8.11,5.35,29.89,29.89,0,0,1-10.9,1.89,30.49,30.49,0,0,1-13.72-2.84,32.7,32.7,0,0,1-10.13-7.86l1.56-2.51a1.73,1.73,0,0,1,1.56-.79,1.88,1.88,0,0,1,1.25.63c.46.47,1.24,1.1,1.87,1.73.78.63,1.71,1.41,2.8,2.2a21.3,21.3,0,0,0,3.74,2.2,28.35,28.35,0,0,0,4.83,1.73,25.5,25.5,0,0,0,6.24.63,24,24,0,0,0,8.57-1.41,20.13,20.13,0,0,0,6.39-3.94,17.69,17.69,0,0,0,4.05-6,20.2,20.2,0,0,0,1.4-7.39,14,14,0,0,0-1.71-7.08,13.46,13.46,0,0,0-4.68-4.72,25.12,25.12,0,0,0-6.54-3.14c-2.5-.79-5-1.73-7.48-2.52s-5-1.73-7.48-2.67a24.3,24.3,0,0,1-6.55-3.78,19.08,19.08,0,0,1-4.67-5.81,20.43,20.43,0,0,1-1.72-8.81,19.66,19.66,0,0,1,1.56-7.86,21.22,21.22,0,0,1,4.52-6.76,24.5,24.5,0,0,1,7.32-4.72,28.28,28.28,0,0,1,10-1.73,29.44,29.44,0,0,1,11.37,2,31.55,31.55,0,0,1,9.2,6.13Z"/><path class="cls-4" d="M479.42,1120.88c-5.45,0-10.28-.94-14.18-3a34.29,34.29,0,0,1-10.44-8l-.47-.47,2-3.14a2.88,2.88,0,0,1,2.34-1.26,2.69,2.69,0,0,1,1.87.94c.47.47,1.24,1.1,2,1.73s1.72,1.42,2.65,2.2a19.17,19.17,0,0,0,3.59,2.05,20.18,20.18,0,0,0,4.67,1.57,23.42,23.42,0,0,0,6.08.63,22.05,22.05,0,0,0,8.26-1.42,19.94,19.94,0,0,0,6.08-3.77,15.56,15.56,0,0,0,3.74-5.66,17.91,17.91,0,0,0,1.24-7.08,12.18,12.18,0,0,0-1.55-6.6,14.79,14.79,0,0,0-4.37-4.41,25.47,25.47,0,0,0-6.23-3c-2.34-.79-4.83-1.73-7.48-2.52s-5.14-1.73-7.48-2.67a27.88,27.88,0,0,1-6.7-3.93,18.64,18.64,0,0,1-4.83-6.14,20.14,20.14,0,0,1-1.87-9.12,21.94,21.94,0,0,1,1.55-8.33,21.48,21.48,0,0,1,4.68-7.08,23.56,23.56,0,0,1,7.64-4.87,26.6,26.6,0,0,1,10.28-1.89,30.54,30.54,0,0,1,11.69,2,31,31,0,0,1,9.5,6.45l.47.47-1.71,3.3c-.94,1.73-2.65,2.21-4.83.16a21.19,21.19,0,0,0-3.12-2.2,35.23,35.23,0,0,0-4.83-2.36,22.18,22.18,0,0,0-7.17-1.1,21.86,21.86,0,0,0-7.48,1.26,15.13,15.13,0,0,0-5.3,3.3,16.14,16.14,0,0,0-3.27,4.72,14.06,14.06,0,0,0-1.09,5.66,11.06,11.06,0,0,0,1.56,6.29,14.5,14.5,0,0,0,4.36,4.4,33.61,33.61,0,0,0,6.23,3.15c2.34.78,4.83,1.73,7.48,2.51a61,61,0,0,1,7.48,2.83,28,28,0,0,1,6.71,3.94,18.15,18.15,0,0,1,4.83,6,18.94,18.94,0,0,1,1.87,8.81,27.56,27.56,0,0,1-1.87,10.06,23.81,23.81,0,0,1-5.3,8.18,26,26,0,0,1-8.42,5.5,30.9,30.9,0,0,1-10.91,1.89Zm-22.59-11.79a29.17,29.17,0,0,0,22.75,9.9,28.48,28.48,0,0,0,10.6-1.88,24.76,24.76,0,0,0,7.79-5,22.32,22.32,0,0,0,4.83-7.54,25.86,25.86,0,0,0,1.71-9.28,20.45,20.45,0,0,0-1.55-8,16.17,16.17,0,0,0-4.37-5.35,34.83,34.83,0,0,0-6.23-3.77c-2.34-.95-4.83-1.89-7.33-2.83-2.49-.79-5-1.73-7.48-2.52a38,38,0,0,1-6.7-3.3,16.05,16.05,0,0,1-4.83-4.88,13.34,13.34,0,0,1-1.87-7.39,18.17,18.17,0,0,1,1.25-6.44,13.94,13.94,0,0,1,3.58-5.35,15.23,15.23,0,0,1,5.92-3.62,22.08,22.08,0,0,1,8.11-1.26,23.48,23.48,0,0,1,7.79,1.1,22.57,22.57,0,0,1,5.14,2.52,21.33,21.33,0,0,1,3.28,2.52c.93.78,1.24.78,1.24.78a.59.59,0,0,0,.63-.47l1.09-2a28.29,28.29,0,0,0-8.26-5.51,30,30,0,0,0-10.91-1.88,26.24,26.24,0,0,0-9.66,1.73,22.3,22.3,0,0,0-7,4.4,18.94,18.94,0,0,0-5.76,14,18.85,18.85,0,0,0,1.71,8.34,15.13,15.13,0,0,0,4.37,5.5,26.69,26.69,0,0,0,6.23,3.62c2.34.94,4.83,1.89,7.32,2.67s5,1.58,7.48,2.52a29.44,29.44,0,0,1,6.7,3.3,13.64,13.64,0,0,1,4.84,5,14.53,14.53,0,0,1,1.87,7.71,21.83,21.83,0,0,1-1.41,7.86,15.85,15.85,0,0,1-4.2,6.29,22.3,22.3,0,0,1-6.71,4.25,26.34,26.34,0,0,1-8.88,1.57,28.16,28.16,0,0,1-6.54-.79,30.43,30.43,0,0,1-5-1.73,16,16,0,0,1-3.9-2.35c-1.09-.79-2-1.58-2.8-2.21a21.36,21.36,0,0,1-1.87-1.73c-.47-.47-.78-.47-.78-.47a.84.84,0,0,0-.78.47Z"/><path class="cls-4" d="M594.43,1078.11a50,50,0,0,1-2.8,17.14,36.34,36.34,0,0,1-20.1,21.7,42.53,42.53,0,0,1-31.33,0,34.85,34.85,0,0,1-12.15-8.49,37.3,37.3,0,0,1-7.95-13.21,53.68,53.68,0,0,1,0-34.28,37.21,37.21,0,0,1,7.95-13.21,36.79,36.79,0,0,1,12.15-8.49,42.53,42.53,0,0,1,31.33,0,34.68,34.68,0,0,1,12.15,8.49,37.21,37.21,0,0,1,8,13.21A50,50,0,0,1,594.43,1078.11Zm-6.07,0A47.67,47.67,0,0,0,586,1062.7a32.84,32.84,0,0,0-6.55-11.48,28.37,28.37,0,0,0-10.28-7.24,36.23,36.23,0,0,0-26.5,0,28.37,28.37,0,0,0-10.28,7.24,33.1,33.1,0,0,0-6.7,11.48,47,47,0,0,0-2.34,15.41,47.67,47.67,0,0,0,2.34,15.41,31,31,0,0,0,6.7,11.48,28.47,28.47,0,0,0,10.28,7.23,33.22,33.22,0,0,0,13.25,2.52,32.69,32.69,0,0,0,13.25-2.52,26.66,26.66,0,0,0,10.28-7.23,32.93,32.93,0,0,0,6.55-11.48A47,47,0,0,0,588.36,1078.11Z"/><path class="cls-4" d="M555.94,1120.72a42.86,42.86,0,0,1-15.89-3,35,35,0,0,1-12.47-8.65,39,39,0,0,1-8.11-13.53,51.83,51.83,0,0,1-3-17.45,48.41,48.41,0,0,1,3-17.46,40.43,40.43,0,0,1,8.11-13.52,34.79,34.79,0,0,1,12.47-8.65,40.48,40.48,0,0,1,15.89-3.15,43.72,43.72,0,0,1,16,3,35.13,35.13,0,0,1,12.47,8.65,40.61,40.61,0,0,1,8.1,13.52,55.65,55.65,0,0,1,0,34.92,40.51,40.51,0,0,1-8.1,13.52,35.13,35.13,0,0,1-12.47,8.65A38.53,38.53,0,0,1,555.94,1120.72Zm0-83.34a37.73,37.73,0,0,0-15.27,3,34.69,34.69,0,0,0-11.84,8.17,35.21,35.21,0,0,0-7.64,12.9,51.78,51.78,0,0,0,0,33.65,39.13,39.13,0,0,0,7.64,12.9,34.69,34.69,0,0,0,11.84,8.17,42.61,42.61,0,0,0,30.54,0,34.82,34.82,0,0,0,11.85-8.17,36,36,0,0,0,7.63-12.74,51.78,51.78,0,0,0,0-33.65,39.17,39.17,0,0,0-7.63-12.9,35,35,0,0,0-11.85-8.18A34.53,34.53,0,0,0,555.94,1037.38Z"/><path class="cls-4" d="M640.72,1114.75a24.73,24.73,0,0,0,10.44-2A23.64,23.64,0,0,0,659,1107a24,24,0,0,0,4.83-8.49,33.41,33.41,0,0,0,1.72-10.69v-50.48h5.76v50.48a36.8,36.8,0,0,1-2.18,12.58,30.6,30.6,0,0,1-6.08,10.22,28.27,28.27,0,0,1-9.66,6.76,33.12,33.12,0,0,1-12.78,2.52,30.05,30.05,0,0,1-22.44-9.28,29.25,29.25,0,0,1-6.08-10.22,37.49,37.49,0,0,1-2.18-12.58v-50.48h5.92v50.32a34.16,34.16,0,0,0,1.72,10.69,24.84,24.84,0,0,0,4.83,8.49,23.8,23.8,0,0,0,7.79,5.67,23.33,23.33,0,0,0,10.6,2.2Z"/><path class="cls-4" d="M640.72,1120.88a31.88,31.88,0,0,1-13.09-2.52,28.8,28.8,0,0,1-10-7.07,32.43,32.43,0,0,1-6.23-10.54,38.61,38.61,0,0,1-2.18-12.89v-51.42H617v51.42a37.88,37.88,0,0,0,1.56,10.38,23.39,23.39,0,0,0,4.67,8.17,22.23,22.23,0,0,0,7.48,5.51,25.1,25.1,0,0,0,10,2,24.49,24.49,0,0,0,10-1.89,20.07,20.07,0,0,0,7.48-5.5,24.72,24.72,0,0,0,4.68-8.18,31.49,31.49,0,0,0,1.56-10.38v-51.57h7.79v51.42a38.61,38.61,0,0,1-2.18,12.89,32.64,32.64,0,0,1-6.24,10.54,28.85,28.85,0,0,1-10,7.07,31.92,31.92,0,0,1-13.09,2.52ZM611,1038.32v49.54a34.94,34.94,0,0,0,2,12.26,28.76,28.76,0,0,0,5.92,9.91,25.26,25.26,0,0,0,9.35,6.6,30.82,30.82,0,0,0,12.47,2.36,31.87,31.87,0,0,0,12.46-2.36,26.68,26.68,0,0,0,9.36-6.6,31.66,31.66,0,0,0,5.92-9.91,34.94,34.94,0,0,0,2-12.26v-49.54h-3.89v49.54a36.1,36.1,0,0,1-1.72,11,23.24,23.24,0,0,1-5.14,9,23.75,23.75,0,0,1-8.1,6,26.25,26.25,0,0,1-10.76,2.21,25.69,25.69,0,0,1-10.75-2.21,23.66,23.66,0,0,1-8.1-6,25.17,25.17,0,0,1-5-9,35.38,35.38,0,0,1-1.71-11v-49.54Z"/><path class="cls-4" d="M704.77,1077.64h4.83a36.25,36.25,0,0,0,9.66-1.26,19.58,19.58,0,0,0,7.17-3.78,16.21,16.21,0,0,0,4.52-5.81,17.51,17.51,0,0,0,1.56-7.71c0-5.82-1.87-10.06-5.61-12.9s-9.2-4.24-16.52-4.24H695.57v77h-5.76v-81.61h20.57c9.35,0,16.36,1.89,20.88,5.5,4.68,3.62,6.86,9,6.86,16a21.11,21.11,0,0,1-1.56,8.49,20.17,20.17,0,0,1-4.68,6.77,22.86,22.86,0,0,1-7.16,4.71,35.12,35.12,0,0,1-9.51,2.52,11.75,11.75,0,0,1,2.49,2.36L745.6,1119h-5.15a4.44,4.44,0,0,1-1.55-.31,2.79,2.79,0,0,1-1.25-1.1l-25.87-33a6.67,6.67,0,0,0-2-1.73,43,43,0,0,0-6.85-.47c0-4.24.62-4.71,1.87-4.71Z"/><path class="cls-4" d="M747.62,1119.94h-7a4.37,4.37,0,0,1-2-.47,4.43,4.43,0,0,1-1.55-1.42l-25.87-33a5.26,5.26,0,0,0-1.72-1.58,52.69,52.69,0,0,0-6.39-.47h-.93v-4.09c0-1.41.31-2.2,2.8-2.2h5a34.32,34.32,0,0,0,9.35-1.26,19,19,0,0,0,6.86-3.61,14.58,14.58,0,0,0,4.21-5.51,18.21,18.21,0,0,0,1.4-7.23c0-5.5-1.72-9.44-5.3-12.11s-8.88-4.09-15.9-4.09H696.66v76.9h-7.79v-83.34h21.66q14.26,0,21.51,5.66c4.83,3.77,7.32,9.43,7.32,16.82a23.62,23.62,0,0,1-1.71,9,22,22,0,0,1-4.83,7.07,28.35,28.35,0,0,1-7.48,5,36.3,36.3,0,0,1-7.64,2.21,6.31,6.31,0,0,1,.94,1.1Zm-43.79-38.84c1.72,0,5.3.15,6.24.62a7.64,7.64,0,0,1,2.49,2l25.87,33a2.72,2.72,0,0,0,.93,1,2.37,2.37,0,0,0,1.09.31h3.12l-26.65-33.65a6.57,6.57,0,0,0-2.18-2.05l-2.34-1.41,2.81-.32a27.62,27.62,0,0,0,16.21-6.92,18.16,18.16,0,0,0,4.36-6.44,23.25,23.25,0,0,0,1.56-8.18c0-6.76-2.18-11.79-6.55-15.25s-11.22-5.19-20.26-5.19H690.9v79.41h3.89v-77h15.74c7.33,0,13.09,1.41,17,4.4q6.08,4.49,6.08,13.68a21.7,21.7,0,0,1-1.56,8,16.52,16.52,0,0,1-4.83,6.13,21.2,21.2,0,0,1-7.48,3.93,32.37,32.37,0,0,1-10,1.42h-4.83c-.46,0-.93,0-1.09,2.52Z"/><path class="cls-4" d="M814,1105.16a1.11,1.11,0,0,1,.94.47l2.33,2.51a40.22,40.22,0,0,1-5.45,4.88,33.32,33.32,0,0,1-6.55,3.61,53.17,53.17,0,0,1-7.63,2.36,42.63,42.63,0,0,1-9.2.79,37.91,37.91,0,0,1-15.42-3,32.76,32.76,0,0,1-12-8.49,39.32,39.32,0,0,1-7.8-13.21,50,50,0,0,1-2.8-17.14,46.2,46.2,0,0,1,3-17,39,39,0,0,1,8.1-13.21,36.61,36.61,0,0,1,12.47-8.49,41,41,0,0,1,16-3,40.09,40.09,0,0,1,15,2.51,26.85,26.85,0,0,1,5.92,3.15,47.35,47.35,0,0,1,5.46,4.24l-1.72,2.52a1.51,1.51,0,0,1-1.4.63,1.84,1.84,0,0,1-1.24-.63c-.47-.47-1.25-.94-2-1.57a24.6,24.6,0,0,0-3-1.89,13.26,13.26,0,0,0-4.21-1.89,36.18,36.18,0,0,0-5.61-1.57,33,33,0,0,0-7.17-.63,34.21,34.21,0,0,0-13.56,2.52,29.44,29.44,0,0,0-10.59,7.39,35.49,35.49,0,0,0-7,11.48,42.36,42.36,0,0,0-2.49,15.25,43.26,43.26,0,0,0,2.49,15.41,33.55,33.55,0,0,0,6.86,11.48,30.77,30.77,0,0,0,23.06,9.75,38.55,38.55,0,0,0,7.8-.63,23.88,23.88,0,0,0,6.23-1.73,31.82,31.82,0,0,0,10.13-6.76c.16-.15.31-.31.47-.31C813.54,1105.31,813.86,1105.16,814,1105.16Z"/><path class="cls-4" d="M788.3,1120.88a41.83,41.83,0,0,1-15.74-3,34.19,34.19,0,0,1-12.31-8.65,38.59,38.59,0,0,1-8-13.52,51.9,51.9,0,0,1-2.81-17.46,47.48,47.48,0,0,1,3-17.29,43.94,43.94,0,0,1,8.26-13.53,38.11,38.11,0,0,1,12.78-8.8,42.88,42.88,0,0,1,16.52-3.15,41.57,41.57,0,0,1,15.27,2.52,27.33,27.33,0,0,1,6.08,3.14,46,46,0,0,1,5.61,4.41l.63.62-2.19,3.31a2.44,2.44,0,0,1-2.18,1.1,3.72,3.72,0,0,1-1.87-.79,11.75,11.75,0,0,0-2-1.41,16.3,16.3,0,0,0-3-1.89,33.37,33.37,0,0,0-4-1.89,24.88,24.88,0,0,0-5.45-1.41,44,44,0,0,0-7-.63,33.15,33.15,0,0,0-13.25,2.51,30,30,0,0,0-17,18.25,44.42,44.42,0,0,0-2.49,14.78,45,45,0,0,0,2.34,15.09,31.54,31.54,0,0,0,6.54,11.17,27.83,27.83,0,0,0,10,6.92,32.05,32.05,0,0,0,12.46,2.36,37.63,37.63,0,0,0,7.64-.63,25.35,25.35,0,0,0,6.08-1.73,39.3,39.3,0,0,0,5.14-2.68,47.06,47.06,0,0,0,4.67-3.77,2.25,2.25,0,0,1,.78-.47,2.14,2.14,0,0,1,2.65.47l3,3.15-.62.62a42.74,42.74,0,0,1-5.61,5,31.13,31.13,0,0,1-6.7,3.77,29.48,29.48,0,0,1-8,2.36A32.62,32.62,0,0,1,788.3,1120.88Zm1.71-83.5a39,39,0,0,0-15.74,3,32.9,32.9,0,0,0-12.15,8.33,37.3,37.3,0,0,0-8,12.9,47.27,47.27,0,0,0-2.81,16.66,48.2,48.2,0,0,0,2.81,16.83,39.28,39.28,0,0,0,7.63,12.9,34.18,34.18,0,0,0,11.69,8.17,38.42,38.42,0,0,0,15,2.83,53.89,53.89,0,0,0,9-.78,27.75,27.75,0,0,0,7.48-2.36,27.12,27.12,0,0,0,6.24-3.62,39.62,39.62,0,0,0,4.67-4.09l-1.71-1.88a.29.29,0,0,0-.47,0c-.16,0-.31.15-.47.31a38.44,38.44,0,0,1-5,3.93,37,37,0,0,1-5.45,3,29.73,29.73,0,0,1-6.39,1.73,59.1,59.1,0,0,1-7.95.63,36.81,36.81,0,0,1-13.24-2.52,31.46,31.46,0,0,1-10.6-7.39,35.14,35.14,0,0,1-7-11.79,50.23,50.23,0,0,1,0-31.3,33,33,0,0,1,7.17-11.79,31,31,0,0,1,10.9-7.55,36.38,36.38,0,0,1,13.87-2.67,35.51,35.51,0,0,1,7.33.63,42.68,42.68,0,0,1,5.76,1.57,23.15,23.15,0,0,1,4.37,2,32.86,32.86,0,0,1,3,2.05c.78.63,1.56,1.1,2,1.57s.78.47.78.47c.47,0,.63-.15.63-.31l1.24-1.89a46.36,46.36,0,0,0-4.67-3.62,28.72,28.72,0,0,0-5.77-3,41,41,0,0,0-6.7-1.89,21.33,21.33,0,0,0-7.48-1.1Z"/><path class="cls-4" d="M880.55,1037.38v4.87H838v33h35.38V1080H838v34h42.54v4.88H832.09v-81.46Z"/><path class="cls-4" d="M881.49,1119.94H831.15v-83.5h50.34v6.76H839v31h35.37v6.76H839V1113h42.54ZM833,1118.05h46.6v-3H837.08v-36h35.37v-2.83H837.08v-34.91h42.54v-3H833Z"/><path class="cls-4" d="M953,1036.44v12.42H916.4v24.69h30.85V1086H916.4v34H901v-83.35h52Zm89.61,41.67a46.4,46.4,0,0,1-3,17,39,39,0,0,1-8.58,13.52,39.45,39.45,0,0,1-13.24,9,43.29,43.29,0,0,1-17.14,3.14,46.37,46.37,0,0,1-17.15-3.14,39.26,39.26,0,0,1-21.81-22.49,50.21,50.21,0,0,1,0-34,39,39,0,0,1,8.57-13.52,41.25,41.25,0,0,1,13.24-9,47.19,47.19,0,0,1,34.29.15,39.32,39.32,0,0,1,21.82,22.49A48.18,48.18,0,0,1,1042.63,1078.11Zm-15.74,0a37.73,37.73,0,0,0-1.87-12.27,27.92,27.92,0,0,0-5.15-9.28,22.51,22.51,0,0,0-8.25-5.81,30.06,30.06,0,0,0-21.82,0,23.87,23.87,0,0,0-8.26,5.81,25.77,25.77,0,0,0-5.3,9.28,41.16,41.16,0,0,0,0,24.53,25.69,25.69,0,0,0,5.3,9.28,23.89,23.89,0,0,0,8.26,5.82,30.19,30.19,0,0,0,21.82,0,23.85,23.85,0,0,0,8.25-5.82,27.82,27.82,0,0,0,5.15-9.28A37.63,37.63,0,0,0,1026.89,1078.11Zm47.84-1.89h3.58a22.28,22.28,0,0,0,7.64-1.1,15.68,15.68,0,0,0,5.3-3,11.46,11.46,0,0,0,3.12-4.56,17.6,17.6,0,0,0,1.09-5.82c0-4.24-1.41-7.55-4.21-9.75s-7-3.46-12.62-3.46h-10v71.4h-15.43v-83.35h25.25a55.53,55.53,0,0,1,14.49,1.73,26.12,26.12,0,0,1,9.82,4.88,18.72,18.72,0,0,1,5.61,7.55,24.59,24.59,0,0,1,1.87,9.74,26.88,26.88,0,0,1-1.25,8,21.47,21.47,0,0,1-3.58,6.77,23.7,23.7,0,0,1-5.77,5.34,25.22,25.22,0,0,1-7.79,3.62,15.69,15.69,0,0,1,5,4.72l20.73,30.82h-13.87a7.24,7.24,0,0,1-3.43-.79,5.54,5.54,0,0,1-2.34-2.36l-17.3-26.73c-.47-.63-1.4-2-2.34-2.2h-5.45v-10.06c.16-1.42,1.87-1.42,1.87-1.42Zm88,32.24a38,38,0,0,0,9-1,41.15,41.15,0,0,0,7.32-2.67V1086h-10.28a3.75,3.75,0,0,1-2.34-.79,2.78,2.78,0,0,1-.94-2v-8.81H1193v37.43a34.6,34.6,0,0,1-6.54,3.93,50.23,50.23,0,0,1-7.33,2.83,44.59,44.59,0,0,1-8.26,1.57,87.94,87.94,0,0,1-9.5.48,44.27,44.27,0,0,1-16.68-3.15,36.37,36.37,0,0,1-13.09-8.81,42,42,0,0,1-8.73-13.52,47.31,47.31,0,0,1-3.11-17.14,47.93,47.93,0,0,1,3-17.3,39,39,0,0,1,8.57-13.52,38.31,38.31,0,0,1,13.56-8.81,49.09,49.09,0,0,1,17.76-3.14c6.7,0,12.63.94,17.46,3a41.34,41.34,0,0,1,12.46,7.87l-4.51,7.07a4.1,4.1,0,0,1-3.43,2.21,5,5,0,0,1-2.81-1c-1.25-.78-2.49-1.41-3.74-2.2a17.39,17.39,0,0,0-4.21-1.89,40.35,40.35,0,0,0-5.14-1.25,41.26,41.26,0,0,0-6.7-.48,27.67,27.67,0,0,0-11.06,2,22.89,22.89,0,0,0-8.42,6,25.77,25.77,0,0,0-5.3,9.28,37.16,37.16,0,0,0-1.87,12.11,36.46,36.46,0,0,0,2,12.89,27.91,27.91,0,0,0,5.61,9.44,25.65,25.65,0,0,0,8.57,6,29.22,29.22,0,0,0,11.22,2.2Zm84.62-22.18h-25.09v21.08h36.47v12.42h-52v-83.34h52v12.42h-36.63v25.63H1251v9.12s-.15,2.67-3.58,2.67Z"/><path class="cls-6" d="M692.21,576.09c0-16.6.08-33.2-.06-49.8,0-3.37.65-5,4.46-5.35,5.47-.53,9.07-4.55,12.4-8.5,7.68-9.13,12.88-19.73,17.54-30.59q4.55-10.62,8.55-21.47c2.48-6.74,6-12.78,11.74-17.12,13-9.88,19.31-23.45,21.75-39.07,1.55-9.89,2.54-19.87,3.53-29.83.31-3.15,1.52-4.25,4.6-4.23,8.65.06,18.33,7.2,21.72,16.85,5.21,14.82,4.41,29.64-1.28,44.15-3.62,9.23-8.13,18.13-11.44,27.46a63.62,63.62,0,0,0-3,16.73c-.38,5.4,1.16,6.49,6.56,6.42,11.45-.15,22.9-.67,34.34-.45,8.73.17,17.44,1.18,26.15,1.94a9,9,0,0,1,4.29,1.37c8.16,5.85,12.93,13.92,14.77,23.71.25,1.34-.49,2.92-1,4.32a6.79,6.79,0,0,1-1.5,2.35c-7.36,7.79-6.69,15.81-.77,24,.27.38.43.84.7,1.22,3.41,4.75,3.41,9.28-.95,13.46-7.72,7.4-8.62,15.83-3.84,25.12,3.51,6.84,1.8,12.6-3.52,17.68-1.18,1.13-2.21,2.42-3.39,3.55a17.61,17.61,0,0,0-5.53,13.8c.28,15.36-11.93,28.36-27.26,30-17.38,1.86-34.7.66-52.08.22-25.21-.65-49.31-7.23-73.81-11.7-2.79-.51-3.78-1.77-3.76-4.7C692.28,610.46,692.21,593.27,692.21,576.09Z"/><path class="cls-6" d="M676.67,577.51q0,26.13,0,52.27c0,6.94-2.84,9.75-9.81,9.77q-13,0-26,0c-6.43,0-9.71-3-9.73-9.32q-.09-52.63,0-105.25c0-5.79,3-9,8.75-9.2q14.21-.39,28.41,0c5.7.16,8.33,3.33,8.34,9.09Q676.71,551.19,676.67,577.51Z"/></svg>
\ No newline at end of file

diff --git a/scst/www/images/sourceforge_badges/oss-users-love-us-white.svg b/scst/www/images/sourceforge_badges/oss-users-love-us-white.svg
new file mode 100644
index 0000000..5b9ba8c
--- /dev/null
+++ b/scst/www/images/sourceforge_badges/oss-users-love-us-white.svg

@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 1500 1500"><defs><style>.cls-1{fill:#898989;}.cls-2{fill:#eae9ee;stroke:#c7c2bd;}.cls-2,.cls-4{stroke-miterlimit:10;stroke-width:5.1px;}.cls-3{fill:#fff;}.cls-4{stroke:#898989;fill:url(#linear-gradient);}.cls-5{fill:#ff6700;}.cls-6{fill:#3f3f3f;}</style><linearGradient id="linear-gradient" x1="1380.15" y1="1141.02" x2="131.16" y2="1141.02" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#fffefe"/><stop offset="1" stop-color="#eaeaec"/></linearGradient></defs><polygon class="cls-1" points="1421.26 990.41 78.74 990.41 200.73 923.33 1299.27 923.33 1421.26 990.41"/><polygon class="cls-2" points="1374.25 832.18 1438.46 750 1374.25 667.82 1415 571.81 1331.71 509.05 1346.22 405.77 1249.53 366.7 1236.82 263.19 1133.3 250.47 1094.23 153.78 990.95 168.29 928.19 85 832.18 125.75 750 61.54 667.82 125.75 571.81 85 509.05 168.29 405.77 153.78 369.02 244.74 263.19 263.19 244.74 369.02 153.78 405.77 167.43 502.92 85 571.81 119.87 667.04 61.54 750 119.87 832.96 85 928.19 162.81 993.22 153.78 1094.23 245.77 1136.91 263.19 1236.82 363.09 1254.23 405.77 1346.22 506.78 1337.19 571.81 1415 667.04 1380.13 750 1438.46 832.96 1380.13 928.19 1415 993.22 1337.19 1094.23 1346.22 1139.44 1248.77 1236.82 1236.82 1248.77 1139.44 1346.22 1094.23 1336.65 987.23 1415 928.19 1374.25 832.18"/><circle class="cls-1" cx="750" cy="750" r="511.86" transform="translate(-310.66 750) rotate(-45)"/><circle class="cls-3" cx="750" cy="750" r="479.87" transform="translate(-310.66 750) rotate(-45)"/><path class="cls-4" d="M1421.26,990.41H78.74L112.46,1157a168,168,0,0,0,164.67,134.67h945.74A168,168,0,0,0,1387.54,1157Z"/><path class="cls-5" d="M345.4,1144.27c0-30-10.59-43.72-16.2-48.91a1.6,1.6,0,0,0-2.65,1.42c1.09,17-20.11,21.23-20.11,47.8v.16c0,16.2,12.16,29.41,27.12,29.41s27.11-13.21,27.11-29.41v-.16c0-7.54-2.8-14.78-5.61-20.12-.62-1.1-2.18-.63-2,.31C358.18,1147.73,345.4,1161.88,345.4,1144.27Z"/><path class="cls-5" d="M325.3,1192.55l-54.39-55a3.54,3.54,0,0,1,0-5L339.64,1063a2.57,2.57,0,0,0,.62-2.83,2.45,2.45,0,0,0-2.34-1.57h-21a3.21,3.21,0,0,0-1.71.63l-73.25,73.91a2.89,2.89,0,0,0,0,3.77l69.35,70a2.88,2.88,0,0,0,1.87.78,3.21,3.21,0,0,0,1.72-.63l10.44-10.69A2.89,2.89,0,0,0,325.3,1192.55Z"/><path class="cls-5" d="M353.82,1071.62a2.4,2.4,0,0,0-1.72-.95,2.58,2.58,0,0,0-1.87.79l-10.59,10.69a2.88,2.88,0,0,0,0,3.78l54.54,55a3.13,3.13,0,0,1,1.09,2.51,3.74,3.74,0,0,1-1.09,2.52l-68.88,69.51a2.54,2.54,0,0,0-.62,2.83,2.46,2.46,0,0,0,2.33,1.57v.16H347.9a2.58,2.58,0,0,0,1.87-.79L423,1145.37a2.67,2.67,0,0,0,0-3.77Z"/><path class="cls-6" d="M499.22,1142.07a28,28,0,0,0-6.71-3.93,59.38,59.38,0,0,0-7.48-2.83c-2.65-.79-5.14-1.73-7.48-2.52a33.59,33.59,0,0,1-6.23-3.14,14.64,14.64,0,0,1-4.36-4.41,11.06,11.06,0,0,1-1.56-6.29,14.09,14.09,0,0,1,1.09-5.66,16.14,16.14,0,0,1,3.27-4.72,15.27,15.27,0,0,1,5.3-3.3,21.86,21.86,0,0,1,7.48-1.26,22.18,22.18,0,0,1,7.17,1.1,36.42,36.42,0,0,1,4.83,2.36,22,22,0,0,1,3.12,2.2c2.18,2,3.89,1.58,4.83-.15l1.71-3.31-.47-.47a30.76,30.76,0,0,0-9.5-6.44,30.35,30.35,0,0,0-11.69-2,26.6,26.6,0,0,0-10.28,1.89,23.72,23.72,0,0,0-7.64,4.87,21.53,21.53,0,0,0-4.68,7.08,21.94,21.94,0,0,0-1.55,8.33,20.14,20.14,0,0,0,1.87,9.12,18.54,18.54,0,0,0,4.83,6.14,27.88,27.88,0,0,0,6.7,3.93c2.34.94,4.83,1.89,7.48,2.67s5.14,1.73,7.48,2.52a25.13,25.13,0,0,1,6.23,3,14.65,14.65,0,0,1,4.37,4.4,12.18,12.18,0,0,1,1.55,6.6,17.91,17.91,0,0,1-1.24,7.08,15.56,15.56,0,0,1-3.74,5.66,20.16,20.16,0,0,1-6.08,3.78,22.26,22.26,0,0,1-8.26,1.41,23.42,23.42,0,0,1-6.08-.63,20.18,20.18,0,0,1-4.67-1.57,19.73,19.73,0,0,1-3.59-2c-.93-.79-1.87-1.58-2.65-2.21a24.81,24.81,0,0,1-2-1.73,2.69,2.69,0,0,0-1.87-.94,2.88,2.88,0,0,0-2.34,1.26l-2,3.14.47.48a34.41,34.41,0,0,0,10.44,8c3.9,2,8.73,3,14.18,3a30.92,30.92,0,0,0,10.91-1.88,26.17,26.17,0,0,0,8.42-5.51,23.81,23.81,0,0,0,5.3-8.18,27.56,27.56,0,0,0,1.87-10.06,18.94,18.94,0,0,0-1.87-8.81A18.25,18.25,0,0,0,499.22,1142.07Z"/><path class="cls-6" d="M584.46,1108.57a35.13,35.13,0,0,0-12.47-8.65,43.72,43.72,0,0,0-16-3,40.47,40.47,0,0,0-15.89,3.14,35,35,0,0,0-12.47,8.65,40.7,40.7,0,0,0-8.11,13.52,48.41,48.41,0,0,0-3,17.46,51.78,51.78,0,0,0,3,17.45,39.19,39.19,0,0,0,8.11,13.53,35,35,0,0,0,12.47,8.65,42.86,42.86,0,0,0,15.89,3,38.53,38.53,0,0,0,16-3.15,34.88,34.88,0,0,0,12.47-8.65,40.41,40.41,0,0,0,8.1-13.52,55.62,55.62,0,0,0,0-34.91A40.56,40.56,0,0,0,584.46,1108.57Zm-28.52,67.78a33.22,33.22,0,0,1-13.25-2.52,28.35,28.35,0,0,1-10.28-7.23,30.93,30.93,0,0,1-6.7-11.48,47.67,47.67,0,0,1-2.34-15.41,47,47,0,0,1,2.34-15.41,33.1,33.1,0,0,1,6.7-11.48,28.35,28.35,0,0,1,10.28-7.23,36.09,36.09,0,0,1,26.5,0,28.35,28.35,0,0,1,10.28,7.23A32.84,32.84,0,0,1,586,1124.3a47.67,47.67,0,0,1,2.34,15.41,47,47,0,0,1-2.34,15.41,32.84,32.84,0,0,1-6.55,11.48,26.55,26.55,0,0,1-10.28,7.23A32.69,32.69,0,0,1,555.94,1176.35Z"/><path class="cls-6" d="M664.41,1149.62a31.58,31.58,0,0,1-1.56,10.38,24.78,24.78,0,0,1-4.68,8.17,20.2,20.2,0,0,1-7.48,5.51,24.66,24.66,0,0,1-10,1.88,25.1,25.1,0,0,1-10-2,22.34,22.34,0,0,1-7.48-5.5,23.53,23.53,0,0,1-4.67-8.18,37.76,37.76,0,0,1-1.56-10.38V1098h-7.79v51.42a38.61,38.61,0,0,0,2.18,12.89,32.33,32.33,0,0,0,6.23,10.54,28.82,28.82,0,0,0,10,7.08,35.39,35.39,0,0,0,26.18,0,28.88,28.88,0,0,0,10-7.08,32.53,32.53,0,0,0,6.24-10.54,38.61,38.61,0,0,0,2.18-12.89V1098h-7.79Z"/><path class="cls-6" d="M717.7,1143.8a36.3,36.3,0,0,0,7.64-2.2,28.63,28.63,0,0,0,7.48-5,22,22,0,0,0,4.83-7.07,23.58,23.58,0,0,0,1.71-9c0-7.39-2.49-13-7.32-16.82s-12-5.66-21.51-5.66H688.87v83.34h7.79v-76.9h13.87c7,0,12.31,1.42,15.9,4.09s5.3,6.61,5.3,12.11a18.26,18.26,0,0,1-1.4,7.24,14.47,14.47,0,0,1-4.21,5.5,19.21,19.21,0,0,1-6.86,3.62,34.73,34.73,0,0,1-9.35,1.25h-5c-2.49,0-2.8.79-2.8,2.21v4.08h.93a50.3,50.3,0,0,1,6.39.48,5.11,5.11,0,0,1,1.72,1.57l25.87,33a4.43,4.43,0,0,0,1.55,1.42,4.37,4.37,0,0,0,2,.47h7l-29-36.64A6.71,6.71,0,0,0,717.7,1143.8Z"/><path class="cls-6" d="M812.76,1166a2.25,2.25,0,0,0-.78.47,47.2,47.2,0,0,1-4.67,3.78,40.54,40.54,0,0,1-5.14,2.67,24.47,24.47,0,0,1-6.08,1.73,37.63,37.63,0,0,1-7.64.63,32.05,32.05,0,0,1-12.46-2.36,27.83,27.83,0,0,1-10-6.92,31.58,31.58,0,0,1-6.54-11.16,45.13,45.13,0,0,1-2.34-15.1,44.42,44.42,0,0,1,2.49-14.78,30,30,0,0,1,17-18.24,33.15,33.15,0,0,1,13.25-2.52,44.13,44.13,0,0,1,7,.63,25.45,25.45,0,0,1,5.45,1.41,33.37,33.37,0,0,1,4,1.89,16.3,16.3,0,0,1,3,1.89,11.37,11.37,0,0,1,2,1.41,3.63,3.63,0,0,0,1.87.79,2.44,2.44,0,0,0,2.18-1.1l2.19-3.3-.63-.63a46,46,0,0,0-5.61-4.41,28.2,28.2,0,0,0-6.08-3.14,41.57,41.57,0,0,0-15.27-2.52,42.88,42.88,0,0,0-16.52,3.15,38,38,0,0,0-12.78,8.81,43.77,43.77,0,0,0-8.26,13.52,47.54,47.54,0,0,0-3,17.3,51.85,51.85,0,0,0,2.81,17.45,38.73,38.73,0,0,0,8,13.53,34.28,34.28,0,0,0,12.31,8.64,41.83,41.83,0,0,0,15.74,3,32.62,32.62,0,0,0,9.19-1.1,29.48,29.48,0,0,0,8-2.36,30.89,30.89,0,0,0,6.7-3.77,42.66,42.66,0,0,0,5.61-5l.62-.63-3-3.15A2.15,2.15,0,0,0,812.76,1166Zm-1.18-56,.41.35a2.07,2.07,0,0,0,.32.28,2.47,2.47,0,0,1-.33-.28A5,5,0,0,0,811.58,1110Z"/><polygon class="cls-6" points="831.15 1181.54 881.49 1181.54 881.49 1174.62 838.95 1174.62 838.95 1142.54 874.32 1142.54 874.32 1135.78 838.95 1135.78 838.95 1104.8 881.49 1104.8 881.49 1098.04 831.15 1098.04 831.15 1181.54"/><path class="cls-6" d="M1165.58,1144.74a2.8,2.8,0,0,0,.94,2,3.8,3.8,0,0,0,2.34.78h10.28v18.87a41.18,41.18,0,0,1-7.32,2.68,38,38,0,0,1-9,.94,29.22,29.22,0,0,1-11.22-2.2,25.65,25.65,0,0,1-8.57-6,27.86,27.86,0,0,1-5.61-9.43,36.55,36.55,0,0,1-2-12.9,37.16,37.16,0,0,1,1.87-12.11,25.73,25.73,0,0,1,5.3-9.27,22.92,22.92,0,0,1,8.42-6,27.85,27.85,0,0,1,11.06-2,41.35,41.35,0,0,1,6.7.47,38.68,38.68,0,0,1,5.14,1.26,17.37,17.37,0,0,1,4.21,1.88c1.25.79,2.49,1.42,3.74,2.2a5,5,0,0,0,2.81,1,4.11,4.11,0,0,0,3.43-2.2l4.51-7.08a41.12,41.12,0,0,0-12.46-7.86c-4.83-2-10.76-3-17.46-3a49.09,49.09,0,0,0-17.76,3.14,38.44,38.44,0,0,0-13.56,8.81,39.1,39.1,0,0,0-8.57,13.52,47.93,47.93,0,0,0-3,17.3,47.27,47.27,0,0,0,3.11,17.14,42.19,42.19,0,0,0,8.73,13.53,36.47,36.47,0,0,0,13.09,8.8,44.27,44.27,0,0,0,16.68,3.15,90.69,90.69,0,0,0,9.5-.47,45.43,45.43,0,0,0,8.26-1.58,50.23,50.23,0,0,0,7.33-2.83,34.6,34.6,0,0,0,6.54-3.93v-37.42h-27.43Z"/><path class="cls-6" d="M1258.78,1110.46V1098h-52v83.34h52V1169h-36.47v-21.07h25.09c3.43,0,3.58-2.68,3.58-2.68v-9.12h-28.83v-25.63Z"/><path class="cls-6" d="M1091.87,1145.84a25.2,25.2,0,0,0,7.79-3.61,23.94,23.94,0,0,0,5.77-5.35,21.42,21.42,0,0,0,3.58-6.76,27,27,0,0,0,1.25-8,24.64,24.64,0,0,0-1.87-9.75,18.79,18.79,0,0,0-5.61-7.55,26.27,26.27,0,0,0-9.82-4.88,55.53,55.53,0,0,0-14.49-1.73h-25.25v83.35h15.43v-71.39h10c5.61,0,9.81,1.25,12.62,3.46s4.21,5.5,4.21,9.75a17.59,17.59,0,0,1-1.09,5.81,11.39,11.39,0,0,1-3.12,4.56,15.51,15.51,0,0,1-5.3,3,22.28,22.28,0,0,1-7.64,1.1h-3.58s-1.71,0-1.87,1.42v10.06h5.45c.94.16,1.87,1.57,2.34,2.2l17.3,26.74a5.66,5.66,0,0,0,2.34,2.36,7.37,7.37,0,0,0,3.43.78h13.87l-20.73-30.82A15.81,15.81,0,0,0,1091.87,1145.84Z"/><path class="cls-6" d="M1031.09,1109.36a41.25,41.25,0,0,0-13.24-9,47.12,47.12,0,0,0-34.29-.16,41.25,41.25,0,0,0-13.24,9,39.14,39.14,0,0,0-8.57,13.53,50.18,50.18,0,0,0,0,34,39.19,39.19,0,0,0,21.81,22.49,46.37,46.37,0,0,0,17.15,3.15,43.29,43.29,0,0,0,17.14-3.15,39.57,39.57,0,0,0,13.24-9,39.16,39.16,0,0,0,8.58-13.53,46.4,46.4,0,0,0,3-17,48.27,48.27,0,0,0-3-16.83A41.51,41.51,0,0,0,1031.09,1109.36ZM1025,1152a27.87,27.87,0,0,1-5.15,9.27,23.67,23.67,0,0,1-8.26,5.82,30,30,0,0,1-21.81,0,23.76,23.76,0,0,1-8.26-5.82,25.73,25.73,0,0,1-5.3-9.27,41.19,41.19,0,0,1,0-24.54,25.73,25.73,0,0,1,5.3-9.27,23.76,23.76,0,0,1,8.26-5.82,30,30,0,0,1,21.81,0,22.37,22.37,0,0,1,8.26,5.82,27.87,27.87,0,0,1,5.15,9.27,41.19,41.19,0,0,1,0,24.54Z"/><polygon class="cls-6" points="900.97 1181.54 916.4 1181.54 916.4 1147.57 947.25 1147.57 947.25 1135.15 916.4 1135.15 916.4 1110.46 953.02 1110.46 953.02 1098.19 900.97 1098.19 900.97 1181.54"/><path class="cls-6" d="M548.17,684.53v63.55q0,9.52,4.69,14.67t13.77,5.14q9.07,0,13.92-5.14t4.84-14.67V684.53h25.87v63.39q0,14.24-6,24.06a38.72,38.72,0,0,1-16.27,14.83,51.08,51.08,0,0,1-22.77,5,49.8,49.8,0,0,1-22.46-4.92,36.6,36.6,0,0,1-15.66-14.82q-5.76-9.91-5.75-24.14V684.53Z"/><path class="cls-6" d="M647,788a33.92,33.92,0,0,1-14.75-11.2A30.5,30.5,0,0,1,626.39,759h27.54q.6,5.89,4.08,9a13.13,13.13,0,0,0,9.08,3.1q5.74,0,9.08-2.65a8.84,8.84,0,0,0,3.33-7.33,8.73,8.73,0,0,0-2.65-6.51,21.43,21.43,0,0,0-6.51-4.23,106,106,0,0,0-11-3.79,119.49,119.49,0,0,1-16.79-6.35,31.12,31.12,0,0,1-11.2-9.38q-4.68-6.21-4.69-16.19,0-14.83,10.75-23.23t28-8.39q17.55,0,28.29,8.39t11.5,23.38h-28a11,11,0,0,0-3.78-8.1,13.39,13.39,0,0,0-8.93-3,11.16,11.16,0,0,0-7.57,2.5,9,9,0,0,0-2.87,7.19q0,5.15,4.84,8t15.13,6.2a139.76,139.76,0,0,1,16.72,6.66,31.68,31.68,0,0,1,11.12,9.23q4.68,6,4.69,15.58A30.63,30.63,0,0,1,702,775.61a32.31,32.31,0,0,1-13.39,11.8q-8.79,4.4-20.73,4.39A54.65,54.65,0,0,1,647,788Z"/><path class="cls-6" d="M748.34,705.26v21.48H783v20H748.34V770h39.18v20.73H722.46V684.53h65.06v20.73Z"/><path class="cls-6" d="M858.48,790.74l-22.09-40.09h-6.2v40.09H804.32V684.53h43.42q12.56,0,21.41,4.39a30.19,30.19,0,0,1,13.24,12,33.59,33.59,0,0,1,4.39,17,31.64,31.64,0,0,1-6,18.91q-6,8.32-17.63,11.8l24.51,42.06Zm-28.29-58.4h16q7.11,0,10.66-3.48t3.56-9.83q0-6.06-3.56-9.54T846.23,706h-16Z"/><path class="cls-6" d="M920.51,788a34,34,0,0,1-14.75-11.2A30.57,30.57,0,0,1,899.94,759h27.53q.62,5.89,4.09,9a13.11,13.11,0,0,0,9.08,3.1q5.75,0,9.07-2.65a8.84,8.84,0,0,0,3.33-7.33,8.72,8.72,0,0,0-2.64-6.51,21.55,21.55,0,0,0-6.51-4.23,106,106,0,0,0-11-3.79,119,119,0,0,1-16.79-6.35,31.12,31.12,0,0,1-11.2-9.38q-4.7-6.21-4.69-16.19,0-14.83,10.74-23.23T939,683q17.55,0,28.29,8.39t11.5,23.38h-28a11,11,0,0,0-3.78-8.1,13.37,13.37,0,0,0-8.93-3,11.14,11.14,0,0,0-7.56,2.5c-1.92,1.67-2.87,4.06-2.87,7.19q0,5.15,4.84,8t15.13,6.2a139.52,139.52,0,0,1,16.71,6.66,31.71,31.71,0,0,1,11.13,9.23q4.68,6,4.69,15.58a30.55,30.55,0,0,1-4.62,16.49,32.25,32.25,0,0,1-13.39,11.8q-8.77,4.4-20.73,4.39A54.65,54.65,0,0,1,920.51,788Z"/><path class="cls-6" d="M477.67,915.27h33.89v20H451.79V829h25.88Z"/><path class="cls-6" d="M546.73,929.34a52,52,0,0,1-19.82-19.44,57.19,57.19,0,0,1,0-56.06,52.18,52.18,0,0,1,19.82-19.36,57.67,57.67,0,0,1,54.93,0,51.07,51.07,0,0,1,19.66,19.36,58,58,0,0,1-.07,56.06,51.64,51.64,0,0,1-19.67,19.44,57.51,57.51,0,0,1-54.85,0Zm47.81-25.11q7.65-8.47,7.64-22.4t-7.64-22.47Q586.91,851,574.19,851t-20.5,8.32q-7.63,8.33-7.64,22.54t7.64,22.47q7.63,8.4,20.5,8.4T594.54,904.23Z"/><path class="cls-6" d="M742.59,829,704.92,935.24H672.54L634.86,829H662.4l26.33,80.19L715.2,829Z"/><path class="cls-6" d="M779.2,849.76v21.48h34.65v20H779.2v23.3h39.19v20.73H753.33V829h65.06v20.73Z"/><path class="cls-6" d="M892.67,829v63.55q0,9.52,4.69,14.67t13.77,5.14q9.07,0,13.92-5.14t4.84-14.67V829h25.88v63.39q0,14.24-6,24.06a38.78,38.78,0,0,1-16.27,14.83,51.14,51.14,0,0,1-22.77,5,49.88,49.88,0,0,1-22.47-4.92,36.6,36.6,0,0,1-15.66-14.82q-5.74-9.91-5.75-24.14V829Z"/><path class="cls-6" d="M991.47,932.52a34,34,0,0,1-14.75-11.2,30.57,30.57,0,0,1-5.82-17.85h27.53q.6,5.89,4.09,9a13.1,13.1,0,0,0,9.07,3.1c3.84,0,6.86-.88,9.08-2.65a8.84,8.84,0,0,0,3.33-7.33,8.69,8.69,0,0,0-2.65-6.51,21.37,21.37,0,0,0-6.5-4.23,106.89,106.89,0,0,0-11-3.79,119.68,119.68,0,0,1-16.8-6.35,31.09,31.09,0,0,1-11.19-9.38q-4.7-6.21-4.69-16.19,0-14.83,10.74-23.23t28-8.39q17.55,0,28.29,8.39t11.5,23.38h-28a11,11,0,0,0-3.78-8.1,13.37,13.37,0,0,0-8.93-3,11.12,11.12,0,0,0-7.56,2.5c-1.92,1.67-2.88,4.06-2.88,7.19q0,5.15,4.84,8t15.14,6.2a140.21,140.21,0,0,1,16.71,6.66,31.59,31.59,0,0,1,11.12,9.23q4.69,6,4.69,15.58a30.54,30.54,0,0,1-4.61,16.49,32.25,32.25,0,0,1-13.39,11.8q-8.77,4.4-20.73,4.39A54.65,54.65,0,0,1,991.47,932.52Z"/><path class="cls-5" d="M712.11,453.34c4.47,9.38,8.14,18.05,12.62,26.3,13.53,24.89,27.37,49.62,41.1,74.42.95,1.72,1.93,3.44,3.15,5.62,5.25-2.89,10.28-5.65,15.31-8.43,24-13.24,47.74-26.89,72-39.63,35.19-18.5,33.19-58.48,12.62-78.29-13.41-12.91-29.37-15.22-46.7-10.13-5.21,1.53-10.17,3.92-15.65,6.07-1.84-2.81-3.77-5.85-5.79-8.84-10.59-15.72-25.46-23.24-44.24-21.35C738,400.93,724.89,411,717,427.89a7.28,7.28,0,0,1-4.52,3.34c-4.19.93-8.52,1.22-13.24,1.82a25.61,25.61,0,0,1,.55-3.45c7-23.92,22.67-39.19,46.8-44.83,24.32-5.69,45.39,1,62.19,19.68,2.84,3.16,4.9,3.84,9.13,2.69,24.25-6.58,46.09-1.47,63.6,16.64s21.88,39.84,14.89,64.09c-4.77,16.55-15.53,28-30,36.15-30.1,16.91-60.38,33.51-90.62,50.16-8.39,4.63-15.79,2.34-20.49-6.23q-21.3-38.87-42.48-77.78c-4.11-7.56-8.47-15.07-11.73-23-4.41-10.75-4.18-10.25,7.11-13.39A18.87,18.87,0,0,1,712.11,453.34Z"/><path class="cls-5" d="M758.12,534c-3.79-6.85-7-12.22-9.69-17.8-.58-1.18.23-3.13.62-4.67,1.63-6.4,3.72-12.71,5-19.18,3.13-16.47-9.44-34.78-26.49-40.3a38.62,38.62,0,0,0-45.38,16.79c-2.43,4.11-4.2,8.62-6.51,13.46a38.09,38.09,0,0,1-4.79-1.27c-22.89-9.18-46.92,2.85-53.13,26.66-4.79,18.35,3.78,37.25,21.72,44.69,19.75,8.2,40,15.12,60.1,22.56,7.6,2.81,15.24,5.49,23.24,8.37,4.94-13.45,9.77-26.63,15-41,3.58,6.52,6.69,12.08,9.65,17.72a4.09,4.09,0,0,1-.22,2.81C744,572,741,581.1,737.3,590c-4.12,10-8.62,12.06-18.71,8.39-29.48-10.72-59.24-20.78-88.16-32.85-18.76-7.83-28.39-24.33-29.86-44.46a54.8,54.8,0,0,1,61.78-58.59c3.64.46,5.21-.88,7.11-3.62,13-18.82,31.3-27,53.8-24,21.57,2.94,36.25,15.62,44.14,35.78,4.47,11.41,3.61,23.25.33,34.79C765.12,514.65,761.67,523.58,758.12,534Z"/></svg>
\ No newline at end of file

diff --git a/scst/www/index.html b/scst/www/index.html
index dd1f9ce..eeeefea 100644
--- a/scst/www/index.html
+++ b/scst/www/index.html

@@ -13,14 +13,15 @@
 <body>
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id="slogan">Generic SCSI Target Subsystem for Linux</h2>
 	</div>
+
 	<div id="menu">
 		<ul>
 			<li id="current"><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -28,6 +29,7 @@
 			<li><a href="users.html">Users</a></li>
 		</ul>
 	</div>
+
 	<div id="content-wrap">
 	  		<div id="main">
 				<h1>Generic SCSI Target Subsystem for Linux</h1>
@@ -149,7 +151,7 @@
 				<h1>Documentation</h1>
 				<p><a href="scst_pg.html">HTML</a></p>
 				<p><a href="scst_pg.pdf">PDF</a></p>
-				<p><a href="http://events.linuxfoundation.org/sites/events/files/slides/lcna15_bvanassche.pdf">SCST overview slides</a></p>
+				<p><a href="https://events.static.linuxfound.org/sites/events/files/slides/lcna15_bvanassche.pdf">SCST overview slides</a></p>
 				<p><a href="http://monklinux.blogspot.com/2012/02/scst-configuration-how-to-using-gentoo.html">Gentoo HOWTO</a></p>
 				<p><a href="iscsi-scst-howto.txt">HOWTO For iSCSI-SCST</a></p>
 				<p><a href="SCST_Gentoo_HOWTO.txt">Gentoo HOWTO For iSCSI-SCST</a></p>
@@ -159,7 +161,7 @@
 				<h1>Articles</h1>
 				<p>By Marc Smith:</p>
 				<p><a href="http://marcitland.blogspot.com/2011/03/accelerating-vdi-using-scst-and-ssds.html">Accelerating VDI Using SCST and SSDs</a></p>
-				<p><a href="http://marcitland.blogspot.com/2013/04/building-using-highly-available-esos.html">Building &amp Using a Highly Available ESOS Disk Array</a></p>
+				<p><a href="http://marcitland.blogspot.com/2013/04/building-using-highly-available-esos.html">Building &amp; Using a Highly Available ESOS Disk Array</a></p>
 				<p><a href="http://marcitland.blogspot.com/2014/07/open-storage-dual-controller-oss-disk.html">Open Storage: Dual-Controller OSS Disk Array</a></p>
 				<h1>SCST 0.9.6 graphs</h1>
 				<p><a href="images/init_scst.png">init_scst</a></p>
@@ -172,10 +174,10 @@
 					scst-devel mailing list</a><br><br>
 					See <a href="http://sourceforge.net/mail/?group_id=110471">mailing lists page</a> for more info about SCST mailing
 					lists.</p>
-				<h1></h1>
+				<h1>&nbsp;</h1>
 				<p><a href="http://sourceforge.net/donate/index.php?group_id=110471">
 				<img src="http://images.sourceforge.net/images/project-support.jpg" width="88" height="32" border="0" alt="Support This Project"> </a></p>
-				<h1></h1>
+				<h1>&nbsp;</h1>
 				<p><a href="http://validator.w3.org/check?uri=referer">
 					<img src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01 Transitional" height="31" width="88"></a>
   				</p>
@@ -187,7 +189,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/mc_s.html b/scst/www/mc_s.html
index 19e0d75..89b8fc5 100644
--- a/scst/www/mc_s.html
+++ b/scst/www/mc_s.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -246,7 +246,7 @@
 <!-- wrap ends here -->
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020 <b><font color="#EC981F">Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021 <b><font color="#EC981F">Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	   Design by: <b><font color="#EC981F">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/scst_admin.html b/scst/www/scst_admin.html
index 516a620..4ca49fc 100644
--- a/scst/www/scst_admin.html
+++ b/scst/www/scst_admin.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -67,15 +63,15 @@
 
 				<p class="post-footer align-right">
 					<a href="downloads.html" class="readmore">Download</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	</div>
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/scstvslio.html b/scst/www/scstvslio.html
index 1cf828b..e8d6e41 100644
--- a/scst/www/scstvslio.html
+++ b/scst/www/scstvslio.html

@@ -12,15 +12,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -99,7 +99,7 @@
 <!-- wrap ends here -->
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020 <b><font color="#EC981F">Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021 <b><font color="#EC981F">Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	   Design by: <b><font color="#EC981F">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/scstvsstgt.html b/scst/www/scstvsstgt.html
index 8c30562..4fd682a 100644
--- a/scst/www/scstvsstgt.html
+++ b/scst/www/scstvsstgt.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -79,7 +79,7 @@
 <!-- wrap ends here -->
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020 <b><font color="#EC981F">Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021 <b><font color="#EC981F">Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	   Design by: <b><font color="#EC981F">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_emulex.html b/scst/www/target_emulex.html
index 8a8e357..d75b5e7 100644
--- a/scst/www/target_emulex.html
+++ b/scst/www/target_emulex.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -72,7 +68,7 @@
 				<p class="post-footer align-right">
 					<a href="https://www.broadcom.com/products/storage/fibre-channel-host-bus-adapters/onecore-storage-sdk" class="readmore">Emulex OneCore Storage Drivers</a>
 				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	<!-- content-wrap ends here -->
 	</div>
@@ -81,7 +77,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_fcoe.html b/scst/www/target_fcoe.html
index 570e00d..193c853 100644
--- a/scst/www/target_fcoe.html
+++ b/scst/www/target_fcoe.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -57,16 +53,17 @@
 				<h1>FCoE target</h1>
 				<p><img src="images/t_fcoe.gif" width="100" height="120" class="float-left" alt="SCST Fcoe">
 				SCST Fibre Channel over Ethernet (FCoE) target is developed by Open-FCoE team and Joe Eykholt.
-				Since February 2010 the main development place of it is SCST SVN repository.
+				Since December 2021 development of this driver
+				happens in the SCST Git repository.
 				</p>
-				<p>You can download the latest development version from the SCST SVN repository. See the download
+				<p>You can download the latest development version from the SCST Git repository. See the download
 				page how to setup access to it.
 				</p><br><br><br><br>
 				<p class="post-footer align-right">
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 					<a href="http://www.open-fcoe.org/" class="readmore">Open-FCoE Site</a>
 				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	<!-- content-wrap ends here -->
 	</div>
@@ -75,7 +72,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_ibmvscsi.html b/scst/www/target_ibmvscsi.html
deleted file mode 100644
index 6dafd58..0000000
--- a/scst/www/target_ibmvscsi.html
+++ /dev/null

@@ -1,96 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<meta name="Keywords" content="SRP target, RDMA target">
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="author" content="Daniel Fernandes">
-<meta name="Robots" content="index,follow">
-<link rel="stylesheet" href="images/Orange.css" type="text/css">
-<title>SRP Target Driver</title>
-</head>
-
-<body>
-<!-- wrap starts here -->
-<div id="wrap">
-	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
-		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
-	</div>
-
-	<div id="menu">
-		<ul>
-			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
-			<li id="current"><a href="targets.html">Drivers</a></li>
-			<li><a href="downloads.html">Downloads</a></li>
-			<li><a href="contributing.html">Contributing</a></li>
-			<li><a href="comparison.html">Comparison</a></li>
-			<li><a href="users.html">Users</a></li>
-		</ul>
-	</div>
-
-	<!-- content-wrap starts here -->
-	<div id="content-wrap">
-			<div id="sidebar">
-				<h1>Target Drivers</h1>
-				<ul class="sidemenu">
-					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
-					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
-					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
-					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
-					<li><a href="target_fcoe.html">FCoE Target</a></li>
-					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
-				</ul>
-				<h1>User utilities</h1>
-				<ul class="sidemenu">
-					<li><a href="scst_admin.html">SCST Admin Utility</a></li>
-					<li><a href="handler_fileio_tgt.html">FILEIO_TGT handler</a></li>
-				</ul>
-			</div>
-
-	  		<div id="main">
-				<h1>IBM Virtual SCSI Target</h1>
-				<p>The virtual SCSI (VSCSI) protocol as defined in <a href="http://www.power.org" class="readmore">Power Architecture Standard</a>
-				is a protocol that allows one logical partition (LPAR) to access SCSI targets provided by another LPAR.
-				The LPAR that provides one or more SCSI targets is called the VIO server or
-				VIOS. The ibmvstgt driver is a VIOS driver that makes it possible to access
-				exported target devices via the VSCSI protocol.</p>
-				<p>This driver is based on ibmvstgt driver, but comparing to the original ibmvstgt has a number of important fixes and improvements.
-				The port was made by Bart Van Assche.</p>
-				<p>You can download it from the SCST SVN repository. See the download page how to setup access to it.</p>
-				<br><br><br>
-				<p class="post-footer align-right">
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
-				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
-	  		</div>
-	<!-- content-wrap ends here -->
-	</div>
-
-<!-- wrap ends here -->
-</div>
-<!-- footer starts here -->
-<div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
-	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
-</div>
-<!-- footer ends here -->
-<!-- Piwik -->
-<script type="text/javascript">
-var pkBaseURL = (("https:" == document.location.protocol) ? "https://apps.sourceforge.net/piwik/scst/" : "http://apps.sourceforge.net/piwik/scst/");
-document.write(unescape("%3Cscript src='" + pkBaseURL + "piwik.js' type='text/javascript'%3E%3C/script%3E"));
-</script><script type="text/javascript">
-piwik_action_name = '';
-piwik_idsite = 1;
-piwik_url = pkBaseURL + "piwik.php";
-piwik_log(piwik_action_name, piwik_idsite, piwik_url);
-</script>
-<object><noscript><p><img src="http://apps.sourceforge.net/piwik/scst/piwik.php?idsite=1" alt="piwik"></p></noscript></object>
-<!-- End Piwik Tag -->
-</body>
-</html>

diff --git a/scst/www/target_iscsi.html b/scst/www/target_iscsi.html
index 6fee660..a2472fd 100644
--- a/scst/www/target_iscsi.html
+++ b/scst/www/target_iscsi.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
-			<li><a href="http://scst.sourceforge.net">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="index.html">Home</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -66,8 +62,9 @@
 				the SCST core. You can also use a migration tool developed by Scalable Informatics Inc., which will
 				convert your IET machine to an iSCSI-SCST machine. See README for more details.</p>
 
-				<p>You can find the latest development version of this driver in the SCST SVN. See the download page how to setup
-				access to it.</p>
+				<p>You can find the latest development version
+				of this driver in the SCST Git repository. See
+				the download page how to setup access to it.</p>
 
 				<h1>Certification</h1>
 
@@ -82,14 +79,14 @@
 					<a href="http://community.mellanox.com/docs/DOC-1479" class="readmore">iSER HOWTO</a>
 					<a href="SCST_Gentoo_HOWTO.txt" class="readmore">Gentoo HOWTO</a>
 					<a href="downloads.html" class="readmore">Download</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 				</p>
 	  		</div>
 	</div>
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_local.html b/scst/www/target_local.html
index 7125d70..7ba802a 100644
--- a/scst/www/target_local.html
+++ b/scst/www/target_local.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -69,13 +65,13 @@
 				<p>This driver was made by Richard Sharpe.</p>
 
 				<p>You can download
-				the latest development version from the SCST SVN repository. See the
+				the latest development version from the SCST Git repository. See the
 				download page how to setup access to it. </p><br><br><br>
 				<p class="post-footer align-right">
 					<a href="downloads.html" class="readmore">Download</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	<!-- content-wrap ends here -->
 	</div>
@@ -84,7 +80,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_lsi.html b/scst/www/target_lsi.html
index ad6a95b..2574cca 100644
--- a/scst/www/target_lsi.html
+++ b/scst/www/target_lsi.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -52,26 +48,6 @@
 					<li><a href="handler_fileio_tgt.html">FILEIO_TGT handler</a></li>
 				</ul>
 			</div>
-
-	  		<div id="main">
-				<h1>Target driver for LSI/MPT adapters</h1>
-				<p><img src="images/t_lsi.gif" width="100" height="120" class="float-left" alt="SCST LSI">
-				Target driver for LSI/MPT adapters was originally developed by Hu Gang, then Erik Habbinga has continued the development. </p>
-
-				<p>It supports parallel SCSI (SPI), including Wide SCSI, and Fibre Channel, but also should work with SAS. This driver is on the
-				alpha stage and available for download from the SCST SVN repository. See the download page how to setup access to it.
-				</p>
-
-				<p>Recently Theodore Vaida updated it for the latest hardware generation, including 12G support. You can download current version
-				from <a href="https://github.com/exactassembly/meta-xa-stm">Github</a>.</p>
-
-				<br><br><br>
-				<p class="post-footer align-right">
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
-					<a href="https://github.com/exactassembly/meta-xa-stm" class="readmore">Github</a>
-				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
-	  		</div>
 	<!-- content-wrap ends here -->
 	</div>
 
@@ -79,7 +55,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_mvsas.html b/scst/www/target_mvsas.html
deleted file mode 100644
index 842ee8a..0000000
--- a/scst/www/target_mvsas.html
+++ /dev/null

@@ -1,93 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<meta name="Keywords" content="SCSI target, SAS target, Marvell target">
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="author" content="Daniel Fernandes">
-<meta name="Robots" content="index,follow">
-<link rel="stylesheet" href="images/Orange.css" type="text/css">
-<title>Marvell SAS Target Driver</title>
-</head>
-
-<body>
-<!-- wrap starts here -->
-<div id="wrap">
-	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
-		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
-	</div>
-
-	<div id="menu">
-		<ul>
-			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
-			<li id="current"><a href="targets.html">Drivers</a></li>
-			<li><a href="downloads.html">Downloads</a></li>
-			<li><a href="contributing.html">Contributing</a></li>
-			<li><a href="comparison.html">Comparison</a></li>
-			<li><a href="users.html">Users</a></li>
-		</ul>
-	</div>
-
-	<!-- content-wrap starts here -->
-	<div id="content-wrap">
-			<div id="sidebar">
-				<h1>Target Drivers</h1>
-				<ul class="sidemenu">
-					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
-					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
-					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
-					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
-					<li><a href="target_fcoe.html">FCoE Target</a></li>
-					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
-				</ul>
-				<h1>User utilities</h1>
-				<ul class="sidemenu">
-					<li><a href="scst_admin.html">SCST Admin Utility</a></li>
-					<li><a href="handler_fileio_tgt.html">FILEIO_TGT handler</a></li>
-				</ul>
-			</div>
-
-	  		<div id="main">
-				<h1>Target driver for Marvell SAS adapters</h1>
-				<p><img src="images/t_sas.gif" width="100" height="120" class="float-left" alt="SCST Marvell SAS">
-				<p>Target driver for Marvell SAS adapters is developed by Marvell and Andy Yan. It is fully functional
-				SAS target driver.</p>
-
-				<p>It is on the beta stage. You can download it from the SCST SVN repository. See the download page how
-				to setup access to it. </p><br><br><br>
-				<p class="post-footer align-right">
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
-				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
-	  		</div>
-	<!-- content-wrap ends here -->
-	</div>
-
-<!-- wrap ends here -->
-</div>
-<!-- footer starts here -->
-<div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names">Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
-	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
-</div>
-<!-- footer ends here -->
-<!-- Piwik -->
-<script type="text/javascript">
-var pkBaseURL = (("https:" == document.location.protocol) ? "https://apps.sourceforge.net/piwik/scst/" : "http://apps.sourceforge.net/piwik/scst/");
-document.write(unescape("%3Cscript src='" + pkBaseURL + "piwik.js' type='text/javascript'%3E%3C/script%3E"));
-</script><script type="text/javascript">
-piwik_action_name = '';
-piwik_idsite = 1;
-piwik_url = pkBaseURL + "piwik.php";
-piwik_log(piwik_action_name, piwik_idsite, piwik_url);
-</script>
-<object><noscript><p><img src="http://apps.sourceforge.net/piwik/scst/piwik.php?idsite=1" alt="piwik"></p></noscript></object>
-<!-- End Piwik Tag -->
-</body>
-</html>

diff --git a/scst/www/target_old.html b/scst/www/target_old.html
deleted file mode 100644
index 04faa80..0000000
--- a/scst/www/target_old.html
+++ /dev/null

@@ -1,129 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<meta name="Keywords" content="SCST Old Unsupported Target Drivers">
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="author" content="Daniel Fernandes">
-<meta name="Robots" content="index,follow">
-<link rel="stylesheet" href="images/Orange.css" type="text/css">
-<title>Old Unsupported SCST Target Drivers</title>
-</head>
-
-<body>
-<!-- wrap starts here -->
-<div id="wrap">
-	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
-		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
-	</div>
-
-	<div id="menu">
-		<ul>
-			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
-			<li id="current"><a href="targets.html">Drivers</a></li>
-			<li><a href="downloads.html">Downloads</a></li>
-			<li><a href="contributing.html">Contributing</a></li>
-			<li><a href="comparison.html">Comparison</a></li>
-			<li><a href="users.html">Users</a></li>
-		</ul>
-	</div>
-
-	<!-- content-wrap starts here -->
-	<div id="content-wrap">
-			<div id="sidebar">
-				<h1>Target Drivers</h1>
-				<ul class="sidemenu">
-					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
-					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
-					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
-					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
-					<li><a href="target_fcoe.html">FCoE Target</a></li>
-					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
-				</ul>
-				<h1>User utilities</h1>
-				<ul class="sidemenu">
-					<li><a href="scst_admin.html">SCST Admin Utility</a></li>
-					<li><a href="handler_fileio_tgt.html">FILEIO_TGT handler</a></li>
-				</ul>
-			</div>
-
-	  		<div id="main">
-	  			<h1>Target driver for QLogic ISP chipsets</h1>
-				<p><img src="images/t_unsupported.gif" width="100" height="120" class="float-left" alt="SCST Unsupported">
-				This is an SCST driver for ISP QLogic chipsets commonly used in many SCSI and FC host bus adapters.
-				It is based on Matthew Jacob's (<a href="http://www.feral.com">http://www.feral.com</a>)
-				multiplatform driver for ISP chipsets. Update for SCST was made by Stanislaw Gruszka for Open-E Inc.</p>
-
-				<p>The latest release is 1.0.2. It supports kernel versions between 2.6.16 and 2.6.32.</p>
-				<p>This driver is obsoleted in favor of qla2x00t.</p>
-				<br>
-
-
-				<h1>Old target driver for QLogic qla2x00t adapters for 2.4 kernels</h1>
-				<p><img src="images/t_unsupported.gif" width="100" height="120" class="float-left" alt="SCST Unsupported">
-				Old target driver for QLogic qla2x00t adapters is capable to work on 2.4 kernels.
-				It has all required features and looks to be quite stable. It is designed to work in conjunction with the
-				initiator driver, which is intended to perform all the initialization and shutdown tasks. In the current release as
-				a base for the initiator driver was taken Red Hat's driver from the stock 2.4.20 kernel. Then it was patched to
-				enable the target mode and provide all necessary callbacks, and it's still able to work as initiator only. Mode,
-				when a host acts as the initiator and the target simultaneously, is also supported. This driver is obsoleted in
-				favor of 2.6-based driver.</p>
-				<p>The latest version is 0.9.3.4. Requires Linux kernel versions 2.4.20 or higher and SCST version 0.9.3-pre4 or
-				higher. If you are lucky, it works also on 2.6 kernels, see README file for details. Tested on i386 only, but
-				should work on any other supported by Linux platform.</p>
-				<p>Currently it is <strong>not supported</strong> and listed here for historical reasons only.</p>
-				<br>
-
-				<h1>Target drivers for Adaptec 7xxx and QLogic QLA12xx adapters</h1>
-				<p><img src="images/t_unsupported.gif" width="100" height="120" class="float-left" alt="SCST Unsupported">
-				Target drivers for Adaptec 7xxx and QLogic QLA12xx adapters have been developed by Hu Gang and they available for
-				download from <a href="http://bj.soulinfo.com/~hugang/scst/tgt/">http://bj.soulinfo.com/~hugang/scst/tgt/</a>.
-				These drivers are not completed, but looks to be a good starting point if you are going to use one of these adapters.
-				SCST team don't have the appropriate hardware, therefore have not tested and don't support these drivers.
-				Send all questions to <strong>Hu Gang < hugang at soulinfo com ></strong>. If some of these drivers don't compile for
-				you, try again with SCST version 0.9.3-pre2.</p><br><br>
-				<br>
-
-				<h1>Patches for UNH-iSCSI Target 1.5.03 and 1.6.00 to SCST</h1>
-				<p><img src="images/t_unsupported.gif" width="100" height="120" class="float-left" alt="SCST Unsupported">
-				SCST is much more advanced, than the internal mid-level of <a href="http://sourceforge.net/projects/unh-iscsi">
-				UNH-iSCSI target driver</a>. With SCST the iSCSI target benefits from all its features and gets ability to use all
-				its advantages, like high performance and scalability, SMP support, required SCSI functionality emulation, etc.</p>
-
-				<p>Since the interface between SCST and the target drivers is based on work, done by UNH IOL, it was relatively
-				simple to update UNH-iSCSI target to work over SCST. Mostly it was "search and replace" job. The built-in
-				scsi_target remains available as a compile-time option.</p>
-
-				<p>Requires Linux kernel versions 2.4.20 or higher or 2.6.7 or higher and SCST version 0.9.2 or higher.</p>
-				<p>Currently it is <strong>not supported</strong> and listed here for historical reasons only.</p>
-	<!-- content-wrap ends here -->
-			</div>
-	</div>
-<!-- wrap ends here -->
-</div>
-<!-- footer starts here -->
-<div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
-	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
-</div>
-<!-- footer ends here -->
-<!-- Piwik -->
-<script type="text/javascript">
-var pkBaseURL = (("https:" == document.location.protocol) ? "https://apps.sourceforge.net/piwik/scst/" : "http://apps.sourceforge.net/piwik/scst/");
-document.write(unescape("%3Cscript src='" + pkBaseURL + "piwik.js' type='text/javascript'%3E%3C/script%3E"));
-</script><script type="text/javascript">
-piwik_action_name = '';
-piwik_idsite = 1;
-piwik_url = pkBaseURL + "piwik.php";
-piwik_log(piwik_action_name, piwik_idsite, piwik_url);
-</script>
-<object><noscript><p><img src="http://apps.sourceforge.net/piwik/scst/piwik.php?idsite=1" alt="piwik"></p></noscript></object>
-<!-- End Piwik Tag -->
-</body>
-</html>

diff --git a/scst/www/target_qla2x00t.html b/scst/www/target_qla2x00t.html
index 1bb7d79..195e3d0 100644
--- a/scst/www/target_qla2x00t.html
+++ b/scst/www/target_qla2x00t.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -78,10 +74,10 @@
 					<a href="qla2x00t-howto.html" class="readmore">HOWTO</a>
 					<a href="https://blog.it-kb.ru/2018/03/12/configure-the-server-with-qlogic-fc-hba-on-debian-linux-9-and-scst-fc-target-as-storage-for-csv-volumes-in-the-hyper-v-cluster-for-highly-available-virtual-machines/" class="readmore"> Russian HOWTO</a>
 					<a href="downloads.html" class="readmore">Download</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/trunk" class="readmore">SCST SVN trunk</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 					<a href="scst-qla2xxx-unified-20180330.tgz" class="readmore">QLA git snapshot</a>
 				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	<!-- content-wrap ends here -->
 	</div>
@@ -90,7 +86,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/target_srp.html b/scst/www/target_srp.html
index 826abf3..784404d 100644
--- a/scst/www/target_srp.html
+++ b/scst/www/target_srp.html

@@ -13,15 +13,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -55,18 +51,29 @@
 
 	  		<div id="main">
 				<h1>Infiniband SCSI RDMA protocol (SRP) target driver</h1>
-				<p><img src="images/t_rdma.gif" width="100" height="120" class="float-left" alt="SCST SRP">
-				SCSI RDMA Protocol (SRP) target driver has been developed by Vu Pham. Since March
-				2008 the main development place of the SRP target driver is SCST SVN repository.
-				It is maintained by Bart Van Assche.</p>
-				<p>This driver is mainline Linux kernel ready and going to be pushed to it
-				together with other SCST patches.</p>
+				<p><img src="images/t_rdma.gif" width="100"
+				height="120" class="float-left" alt="SCST
+				SRP">The first version of the SCSI RDMA
+				Protocol (SRP) target driver has been written
+				by Vu Pham. Since March 2008 the main
+				development place of the SRP target driver is
+				SCST Git repository. The current maintainer is
+				Bart Van Assche.</p>
+				<p>The original version of this driver only
+				supported <a href="https://en.wikipedia.org/wiki/InfiniBand">InfiniBand</a>
+				networks, hence the "ib" in ib_srpt. The
+				current version supports InfiniBand,
+				<a href="https://en.wikipedia.org/wiki/RDMA_over_Converged_Ethernet">RoCE</a>
+				and <a href="https://en.wikipedia.org/wiki/IWARP">iWARP</a>.</p>
+				<p>A version of this driver is available in
+				  the upstream Linux kernel. See
+				  also <a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/infiniband/ulp/srpt">drivers/infiniband/ulp/srpt</a>.</p>
 				<br><br><br>
 				<p class="post-footer align-right">
 					<a href="downloads.html" class="readmore">Download</a>
-					<a href="http://sourceforge.net/p/scst/svn/HEAD/tree/" class="readmore">SCST SVN Repository</a>
+					<a href="https://github.com/SCST-project/scst" class="readmore">SCST Git Repository</a>
 				</p>
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	<!-- content-wrap ends here -->
 	</div>
@@ -75,7 +82,7 @@
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/targets.html b/scst/www/targets.html
index e58949b..4f219be 100644
--- a/scst/www/targets.html
+++ b/scst/www/targets.html

@@ -6,22 +6,22 @@
 <meta name="author" content="Daniel Fernandes">
 <meta name="Robots" content="index,follow">
 <link rel="stylesheet" href="images/Orange.css" type="text/css">
-<title>SCST SCSI Target Drivers &amp Utilities</title>
+<title>SCST SCSI Target Drivers &amp; Utilities</title>
 </head>
 
 <body>
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li id="current"><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -38,13 +38,9 @@
 					<li><a href="target_iscsi.html">ISCSI-SCST with iSER</a></li>
 					<li><a href="target_qla2x00t.html">QLogic FC qla2x00t</a></li>
 					<li><a href="target_srp.html">SCSI RDMA Protocol (SRP)</a></li>
-					<li><a href="target_mvsas.html">Marvell SAS adapters</a></li>
 					<li><a href="target_emulex.html">Emulex FC/FCoE</a></li>
-					<li><a href="target_lsi.html">LSI/MPT adapters</a></li>
 					<li><a href="target_fcoe.html">FCoE Target</a></li>
 					<li><a href="target_local.html">Local Target Driver</a></li>
-					<li><a href="target_ibmvscsi.html">IBM pSeries Virtual SCSI</a></li>
-					<li><a href="target_old.html">Old Unsupported</a></li>
 				</ul>
 				<h1>User utilities</h1>
 				<ul class="sidemenu">
@@ -61,22 +57,19 @@
 						<li><span>iSCSI with iSER</span></li>
 						<li><span>Fibre Channel QLogic qla2xxx series</span></li>
 						<li><span>Infiniband SCSI RDMA Protocol (SRP)</span></li>
-						<li><span>Marvell SAS adapters</span></li>
 						<li><span>Emulex FC/FCoE</span></li>
-						<li><span>LSI/MPT adapters (parallel SCSI, including Wide Ultra320, SAS, Fibre Channel)</span></li>
 						<li><span>FCoE</span></li>
 						<li><span>Local access</span></li>
-						<li><span>IBM pSeries Virtual SCSI</span></li>
 						<li><span>...</span></li>
 					</ul>
 
-				<table border=0><tr><td height="300px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 300px">&nbsp;</td></tr></table>
 	  		</div>
 	</div>
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/scst/www/users.html b/scst/www/users.html
index 7b01897..4599a52 100644
--- a/scst/www/users.html
+++ b/scst/www/users.html

@@ -15,15 +15,15 @@
 <!-- wrap starts here -->
 <div id="wrap">
 	<div id="header">
-		<div class="logoimg"></div><h1 id="logo"><span class="orange"></span></h1>
+		<div class="logoimg"></div><h1 id="logo"><span class="orange">&nbsp;</span></h1>
 		<h2 id=slogan>Generic SCSI Target Subsystem for Linux</h2>
 	</div>
 
 	<div id="menu">
 		<ul>
 			<li><a href="index.html">Home</a></li>
-			<li><a href="http://www.sourceforge.net/projects/scst">Main</a></li>
-			<li><a href="http://sourceforge.net/news/?group_id=110471">News</a></li>
+			<li><a href="https://github.com/SCST-project/scst">Main</a></li>
+			<li><a href="https://github.com/SCST-project/scst/releases">News</a></li>
 			<li><a href="targets.html">Drivers</a></li>
 			<li><a href="downloads.html">Downloads</a></li>
 			<li><a href="contributing.html">Contributing</a></li>
@@ -41,7 +41,7 @@
 
 				<span class="companysubtitles">Companies developed SCST target drivers for their adapters</span>
 
-				<table width="100%" border="0" cellpadding="0" cellspacing="0" style="margin: 10px 15px;">
+				<table width="100%" border="0" cellpadding="0" cellspacing="0" style="margin: 10px 15px;" summary="">
 				<tr>
 					<td class="companybox" width="33%">
 						<a href="http://qlogic.com"><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a0/QLogic_Logo2.png/300px-QLogic_Logo2.png" style="height: 80px" alt="QLogic"></a></td>
@@ -62,7 +62,7 @@
 
 				<span class="companysubtitles">Companies using SCST in their products and solutions</span>
 
-				<table width="100%" border="0" cellpadding="0" cellspacing="0" style="margin: 10px 15px;">
+				<table width="100%" border="0" cellpadding="0" cellspacing="0" style="margin: 10px 15px;" summary="">
 				<tr>
 					<td class="companybox" width="33%">
 						<a href="https://www.onestopsystems.com/"><img src="https://www.onestopsystems.com/sites/default/files/onestopsystems-logo.png" style="width: 180px; height: 60px" alt="One Stop Systems"></a></td>
@@ -139,7 +139,7 @@
 
 				<span class="companysubtitles">Companies using SCST for their internal storage infrastructure</span>
 
-				<table width="100%" border="0" cellpadding="0" cellspacing="0" style="margin: 10px 15px;">
+				<table width="100%" border="0" cellpadding="0" cellspacing="0" style="margin: 10px 15px;" summary="">
 				<tr>
 					<td class="companybox" width="33%">
 						<a href="http://www.datacrunch.net"><img src="http://www.datacrunch.net/images/general/dc_logo.png" alt="DataCrunch Company"></a></td>
@@ -157,13 +157,13 @@
 				SCST-powered product or solution, we will be proud to write on our pages that SCST engine has successfully
 				passed the certification tests. This is the least appreciation your company can do for SCST.</p>
 
-				<table border=0><tr><td height="15px">&nbsp;</td></tr></table>
+				<table border=0 summary=""><tr><td style="height: 15px">&nbsp;</td></tr></table>
 	  		</div>
 	</div>
 </div>
 <!-- footer starts here -->
 <div id="footer">
-	<p>&copy; Copyright 2004 - 2020<b><font class="names"> Vladislav Bolkhovitin &amp others</font></b>&nbsp;&nbsp;
+	<p>&copy; Copyright 2004 - 2021<b><font class="names"> Vladislav Bolkhovitin, Bart Van Assche &amp; others</font></b>&nbsp;&nbsp;
 	Design by: <b><font class="names">Daniel Fernandes</font></b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</p>
 </div>
 <!-- footer ends here -->

diff --git a/zfs/.editorconfig b/zfs/.editorconfig
new file mode 100644
index 0000000..0f4b96a
--- /dev/null
+++ b/zfs/.editorconfig

@@ -0,0 +1,10 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.{c,h}]
+tab_width = 8
+indent_style = tab

diff --git a/zfs/.github/CONTRIBUTING.md b/zfs/.github/CONTRIBUTING.md
deleted file mode 100644
index 2b47d45..0000000
--- a/zfs/.github/CONTRIBUTING.md
+++ /dev/null

@@ -1,292 +0,0 @@
-# Contributing to ZFS on Linux
-<p align="center"><img src="http://zfsonlinux.org/images/zfs-linux.png"/></p>
-
-*First of all, thank you for taking the time to contribute!*
-
-By using the following guidelines, you can help us make ZFS on Linux even
-better.
-
-## Table Of Contents
-[What should I know before I get
-started?](#what-should-i-know-before-i-get-started)
-
-  * [Get ZFS](#get-zfs)
-  * [Debug ZFS](#debug-zfs)
-  * [Where can I ask for help?](#where-can-I-ask-for-help)
-
-[How Can I Contribute?](#how-can-i-contribute)
-
-  * [Reporting Bugs](#reporting-bugs)
-  * [Suggesting Enhancements](#suggesting-enhancements)
-  * [Pull Requests](#pull-requests)
-  * [Testing](#testing)
-
-[Style Guides](#style-guides)
-
-  * [Coding Conventions](#coding-conventions)
-  * [Commit Message Formats](#commit-message-formats)
-    * [New Changes](#new-changes)
-    * [OpenZFS Patch Ports](#openzfs-patch-ports)
-    * [Coverity Defect Fixes](#coverity-defect-fixes)
-    * [Signed Off By](#signed-off-by)
-
-Helpful resources
-
-  * [ZFS on Linux wiki](https://github.com/zfsonlinux/zfs/wiki)
-  * [OpenZFS Documentation](http://open-zfs.org/wiki/Developer_resources)
-  * [Git and GitHub for beginners](https://github.com/zfsonlinux/zfs/wiki/Git-and-GitHub-for-beginners)
-
-## What should I know before I get started?
-
-### Get ZFS
-You can build zfs packages by following [these
-instructions](https://github.com/zfsonlinux/zfs/wiki/Building-ZFS),
-or install stable packages from [your distribution's
-repository](https://github.com/zfsonlinux/zfs/wiki/Getting-Started).
-
-### Debug ZFS
-A variety of methods and tools are available to aid ZFS developers.
-It's strongly recommended that when developing a patch the `--enable-debug`
-configure option should be set. This will enable additional correctness
-checks and all the ASSERTs to help quickly catch potential issues.
-
-In addition, there are numerous utilities and debugging files which
-provide visibility into the inner workings of ZFS.  The most useful
-of these tools are discussed in detail on the [debugging ZFS wiki
-page](https://github.com/zfsonlinux/zfs/wiki/Debugging).
-
-### Where can I ask for help?
-[The zfs-discuss mailing list or IRC](http://list.zfsonlinux.org)
-are the best places to ask for help. Please do not file support requests
-on the GitHub issue tracker.
-
-## How Can I Contribute?
-
-### Reporting Bugs
-*Please* contact us via the [zfs-discuss mailing
-list or IRC](http://list.zfsonlinux.org) if you aren't
-certain that you are experiencing a bug.
-
-If you run into an issue, please search our [issue
-tracker](https://github.com/zfsonlinux/zfs/issues) *first* to ensure the
-issue hasn't been reported before. Open a new issue only if you haven't
-found anything similar to your issue.
-
-You can open a new issue and search existing issues using the public [issue
-tracker](https://github.com/zfsonlinux/zfs/issues).
-
-#### When opening a new issue, please include the following information at the top of the issue:
-* What distribution (with version) you are using.
-* The spl and zfs versions you are using, installation method (repository
-or manual compilation).
-* Describe the issue you are experiencing.
-* Describe how to reproduce the issue.
-* Including any warning/errors/backtraces from the system logs.
-
-When a new issue is opened, it is not uncommon for developers to request
-additional information.
-
-In general, the more detail you share about a problem the quicker a
-developer can resolve it. For example, providing a simple test case is always
-exceptionally helpful.
-
-Be prepared to work with the developers investigating your issue. Your
-assistance is crucial in providing a quick solution. They may ask for
-information like:
-
-* Your pool configuration as reported by `zdb` or `zpool status`.
-* Your hardware configuration, such as
-  * Number of CPUs.
-  * Amount of memory.
-  * Whether your system has ECC memory.
-  * Whether it is running under a VMM/Hypervisor.
-  * Kernel version.
-  * Values of the spl/zfs module parameters.
-* Stack traces which may be logged to `dmesg`.
-
-### Suggesting Enhancements
-ZFS on Linux is a widely deployed production filesystem which is under
-active development. The team's primary focus is on fixing known issues,
-improving performance, and adding compelling new features.
-
-You can view the list of proposed features
-by filtering the issue tracker by the ["Feature"
-label](https://github.com/zfsonlinux/zfs/issues?q=is%3Aopen+is%3Aissue+label%3AFeature).
-If you have an idea for a feature first check this list. If your idea already
-appears then add a +1 to the top most comment, this helps us gauge interest
-in that feature.
-
-Otherwise, open a new issue and describe your proposed feature.  Why is this
-feature needed?  What problem does it solve?
-
-### Pull Requests
-* All pull requests must be based on the current master branch and apply
-without conflicts.
-* Please attempt to limit pull requests to a single commit which resolves
-one specific issue.
-* Make sure your commit messages are in the correct format. See the
-[Commit Message Formats](#commit-message-formats) section for more information.
-* When updating a pull request squash multiple commits by performing a
-[rebase](https://git-scm.com/docs/git-rebase) (squash).
-* For large pull requests consider structuring your changes as a stack of
-logically independent patches which build on each other.  This makes large
-changes easier to review and approve which speeds up the merging process.
-* Try to keep pull requests simple. Simple code with comments is much easier
-to review and approve.
-* Test cases should be provided when appropriate.
-* If your pull request improves performance, please include some benchmarks.
-* The pull request must pass all required [ZFS
-Buildbot](http://build.zfsonlinux.org/) builders before
-being accepted. If you are experiencing intermittent TEST
-builder failures, you may be experiencing a [test suite
-issue](https://github.com/zfsonlinux/zfs/issues?q=is%3Aissue+is%3Aopen+label%3A%22Test+Suite%22).
-There are also various [buildbot options](https://github.com/zfsonlinux/zfs/wiki/Buildbot-Options)
-to control how changes are tested.
-* All proposed changes must be approved by a ZFS on Linux organization member.
-
-### Testing
-All help is appreciated! If you're in a position to run the latest code
-consider helping us by reporting any functional problems, performance
-regressions or other suspected issues. By running the latest code to a wide
-range of realistic workloads, configurations and architectures we're better
-able quickly identify and resolve potential issues.
-
-Users can also run the [ZFS Test
-Suite](https://github.com/zfsonlinux/zfs/tree/master/tests) on their systems
-to verify ZFS is behaving as intended.
-
-## Style Guides
-
-### Coding Conventions
-We currently use [C  Style  and  Coding  Standards  for
-SunOS](http://www.cis.upenn.edu/%7Elee/06cse480/data/cstyle.ms.pdf) as our
-coding convention.
-
-### Commit Message Formats
-#### New Changes
-Commit messages for new changes must meet the following guidelines:
-* In 72 characters or less, provide a summary of the change as the
-first line in the commit message.
-* A body which provides a description of the change. If necessary,
-please summarize important information such as why the proposed
-approach was chosen or a brief description of the bug you are resolving.
-Each line of the body must be 72 characters or less.
-* The last line must be a `Signed-off-by:` tag. See the
-[Signed Off By](#signed-off-by) section for more information.
-
-An example commit message for new changes is provided below.
-
-```
-This line is a brief summary of your change
-
-Please provide at least a couple sentences describing the
-change. If necessary, please summarize decisions such as
-why the proposed approach was chosen or what bug you are
-attempting to solve.
-
-Signed-off-by: Contributor <contributor@email.com>
-```
-
-#### OpenZFS Patch Ports
-If you are porting OpenZFS patches, the commit message must meet
-the following guidelines:
-* The first line must be the summary line from the most important OpenZFS commit being ported.
-It must begin with `OpenZFS dddd, dddd - ` where `dddd` are OpenZFS issue numbers.
-* Provides a `Authored by:` line to attribute each patch for each original author.
-* Provides the `Reviewed by:` and `Approved by:` lines from each original
-OpenZFS commit.
-* Provides a `Ported-by:` line with the developer's name followed by
-their email for each OpenZFS commit.
-* Provides a `OpenZFS-issue:` line with link for each original illumos
-issue.
-* Provides a `OpenZFS-commit:` line with link for each original OpenZFS commit.
-* If necessary, provide some porting notes to describe any deviations from
-the original OpenZFS commits.
-
-An example OpenZFS patch port commit message for a single patch is provided
-below.
-```
-OpenZFS 1234 - Summary from the original OpenZFS commit
-
-Authored by: Original Author <original@email.com>
-Reviewed by: Reviewer One <reviewer1@email.com>
-Reviewed by: Reviewer Two <reviewer2@email.com>
-Approved by: Approver One <approver1@email.com>
-Ported-by: ZFS Contributor <contributor@email.com>
-
-Provide some porting notes here if necessary.
-
-OpenZFS-issue: https://www.illumos.org/issues/1234
-OpenZFS-commit: https://github.com/openzfs/openzfs/commit/abcd1234
-```
-
-If necessary, multiple OpenZFS patches can be combined in a single port.
-This is useful when you are porting a new patch and its subsequent bug
-fixes. An example commit message is provided below.
-```
-OpenZFS 1234, 5678 - Summary of most important OpenZFS commit
-
-1234 Summary from original OpenZFS commit for 1234
-
-Authored by: Original Author <original@email.com>
-Reviewed by: Reviewer Two <reviewer2@email.com>
-Approved by: Approver One <approver1@email.com>
-Ported-by: ZFS Contributor <contributor@email.com>
-
-Provide some porting notes here for 1234 if necessary.
-
-OpenZFS-issue: https://www.illumos.org/issues/1234
-OpenZFS-commit: https://github.com/openzfs/openzfs/commit/abcd1234
-
-5678 Summary from original OpenZFS commit for 5678
-
-Authored by: Original Author2 <original2@email.com>
-Reviewed by: Reviewer One <reviewer1@email.com>
-Approved by: Approver Two <approver2@email.com>
-Ported-by: ZFS Contributor <contributor@email.com>
-
-Provide some porting notes here for 5678 if necessary.
-
-OpenZFS-issue: https://www.illumos.org/issues/5678
-OpenZFS-commit: https://github.com/openzfs/openzfs/commit/efgh5678
-```
-
-#### Coverity Defect Fixes
-If you are submitting a fix to a
-[Coverity defect](https://scan.coverity.com/projects/zfsonlinux-zfs),
-the commit message should meet the following guidelines:
-* Provides a subject line in the format of
-`Fix coverity defects: CID dddd, dddd...` where `dddd` represents
-each CID fixed by the commit.
-* Provides a body which lists each Coverity defect and how it was corrected.
-* The last line must be a `Signed-off-by:` tag. See the
-[Signed Off By](#signed-off-by) section for more information.
-
-An example Coverity defect fix commit message is provided below.
-```
-Fix coverity defects: CID 12345, 67890
-
-CID 12345: Logically dead code (DEADCODE)
-
-Removed the if(var != 0) block because the condition could never be
-satisfied.
-
-CID 67890: Resource Leak (RESOURCE_LEAK)
-
-Ensure free is called after allocating memory in function().
-
-Signed-off-by: Contributor <contributor@email.com>
-```
-
-#### Signed Off By
-A line tagged as `Signed-off-by:` must contain the developer's
-name followed by their email. This is the developer's certification
-that they have the right to submit the patch for inclusion into
-the code base and indicates agreement to the [Developer's Certificate
-of Origin](https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin).
-Code without a proper signoff cannot be merged.
-
-Git can append the `Signed-off-by` line to your commit messages. Simply
-provide the `-s` or `--signoff` option when performing a `git commit`.
-For more information about writing commit messages, visit [How to Write
-a Git Commit Message](https://chris.beams.io/posts/git-commit/).

diff --git a/zfs/.github/ISSUE_TEMPLATE.md b/zfs/.github/ISSUE_TEMPLATE.md
deleted file mode 100644
index e77ab39..0000000
--- a/zfs/.github/ISSUE_TEMPLATE.md
+++ /dev/null

@@ -1,48 +0,0 @@
-<!-- Please fill out the following template, which will help other contributors address your issue. -->
-
-<!--
-Thank you for reporting an issue.
-
-*IMPORTANT* - Please search our issue tracker *before* making a new issue.
-If you cannot find a similar issue, then create a new issue.
-https://github.com/zfsonlinux/zfs/issues 
-
-*IMPORTANT* - This issue tracker is for *bugs* and *issues* only.
-Please search the wiki and the mailing list archives before asking 
-questions on the mailing list.
-https://github.com/zfsonlinux/zfs/wiki/Mailing-Lists
-
-Please fill in as much of the template as possible.
--->
-
-### System information
-<!--  add version after "|" character -->
-Type | Version/Name
- --- | --- 
-Distribution Name	| 
-Distribution Version	| 
-Linux Kernel	| 
-Architecture	| 
-ZFS Version	| 
-SPL Version	| 
-<!-- 
-Commands to find ZFS/SPL versions:
-modinfo zfs | grep -iw version
-modinfo spl | grep -iw version 
--->
-
-### Describe the problem you're observing
-
-### Describe how to reproduce the problem
-
-### Include any warning/errors/backtraces from the system logs
-<!-- 
-*IMPORTANT* - Please mark logs and text output from terminal commands 
-or else Github will not display them correctly. 
-An example is provided below.
-
-Example:
-```
-this is an example how log text should be marked (wrap it with ```)
-```
--->

diff --git a/zfs/.github/PULL_REQUEST_TEMPLATE.md b/zfs/.github/PULL_REQUEST_TEMPLATE.md
deleted file mode 100644
index 699ca90..0000000
--- a/zfs/.github/PULL_REQUEST_TEMPLATE.md
+++ /dev/null

@@ -1,40 +0,0 @@
-<!--- Please fill out the following template, which will help other contributors review your Pull Request. -->
-
-<!--- Provide a general summary of your changes in the Title above -->
-
-<!---
-Documentation on ZFS Buildbot options can be found at
-https://github.com/zfsonlinux/zfs/wiki/Buildbot-Options
--->
-
-### Motivation and Context
-<!--- Why is this change required? What problem does it solve? -->
-<!--- If it fixes an open issue, please link to the issue here. -->
-
-### Description
-<!--- Describe your changes in detail -->
-
-### How Has This Been Tested?
-<!--- Please describe in detail how you tested your changes. -->
-<!--- Include details of your testing environment, and the tests you ran to -->
-<!--- see how your change affects other areas of the code, etc. -->
-<!--- If your change is a performance enhancement, please provide benchmarks here. -->
-
-### Types of changes
-<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
-- [ ] Bug fix (non-breaking change which fixes an issue)
-- [ ] New feature (non-breaking change which adds functionality)
-- [ ] Performance enhancement (non-breaking change which improves efficiency)
-- [ ] Code cleanup (non-breaking change which makes code smaller or more readable)
-- [ ] Breaking change (fix or feature that would cause existing functionality to change)
-- [ ] Documentation (a change to man pages or other documentation)
-
-### Checklist:
-<!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
-<!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
-- [ ] My code follows the ZFS on Linux [code style requirements](https://github.com/zfsonlinux/zfs/blob/master/.github/CONTRIBUTING.md#coding-conventions).
-- [ ] I have updated the documentation accordingly.
-- [ ] I have read the [**contributing** document](https://github.com/zfsonlinux/zfs/blob/master/.github/CONTRIBUTING.md).
-- [ ] I have added [tests](https://github.com/zfsonlinux/zfs/tree/master/tests) to cover my changes.
-- [ ] All new and existing tests passed.
-- [ ] All commit messages are properly formatted and contain [`Signed-off-by`](https://github.com/zfsonlinux/zfs/blob/master/.github/CONTRIBUTING.md#signed-off-by).

diff --git a/zfs/.github/codecov.yml b/zfs/.github/codecov.yml
deleted file mode 100644
index 9ae9626..0000000
--- a/zfs/.github/codecov.yml
+++ /dev/null

@@ -1,22 +0,0 @@
-codecov:
-  notify:
-    require_ci_to_pass: false	# always post
-    after_n_builds: 2		# user and kernel
-
-coverage:
-  precision: 2			# 2 digits of precision
-  range: "50...90"		# red -> yellow -> green
-
-  status:
-    project:
-      default:
-        threshold: 1%		# allow 1% coverage variance
-
-    patch:
-      default:
-        threshold: 1%		# allow 1% coverage variance
-
-comment:
-  layout: "reach, diff, flags, footer"
-  behavior: once		# update if exists; post new; skip if deleted
-  require_changes: yes		# only post when coverage changes

diff --git a/zfs/.github/suppressions.txt b/zfs/.github/suppressions.txt
deleted file mode 100644
index f9508a2..0000000
--- a/zfs/.github/suppressions.txt
+++ /dev/null

@@ -1,3 +0,0 @@
-preprocessorErrorDirective:./module/zfs/vdev_raidz_math_avx512f.c:243
-preprocessorErrorDirective:./module/zfs/vdev_raidz_math_sse2.c:266
-

diff --git a/zfs/.gitignore b/zfs/.gitignore
index 57867bf..056bbb8 100644
--- a/zfs/.gitignore
+++ b/zfs/.gitignore

@@ -62,4 +62,9 @@
 *.patch
 *.orig
 *.log
+*.tmp
 venv
+
+*.so
+*.so.debug
+*.so.full

diff --git a/zfs/.gitmodules b/zfs/.gitmodules
index d400f10..9eaa2b0 100644
--- a/zfs/.gitmodules
+++ b/zfs/.gitmodules

@@ -1,3 +1,3 @@
 [submodule "scripts/zfs-images"]
 	path = scripts/zfs-images
-	url = https://github.com/zfsonlinux/zfs-images
+	url = https://github.com/openzfs/zfs-images

diff --git a/zfs/.travis.yml b/zfs/.travis.yml
deleted file mode 100644
index 620c043..0000000
--- a/zfs/.travis.yml
+++ /dev/null

@@ -1,38 +0,0 @@
-language: c
-sudo: required
-env:
-  global:
-    # Travis limits maximum log size, we have to cut tests output 
-    - ZFS_TEST_TRAVIS_LOG_MAX_LENGTH=800
-  matrix:
-    # tags are mainly in ascending order
-    - ZFS_TEST_TAGS='acl,atime,bootfs,cachefile,casenorm,chattr,checksum,clean_mirror,compression,ctime,delegate,devices,events,exec,fault,features,grow_pool,zdb,zfs,zfs_bookmark,zfs_change-key,zfs_clone,zfs_copies,zfs_create,zfs_diff,zfs_get,zfs_inherit,zfs_load-key,zfs_rename'
-    - ZFS_TEST_TAGS='cache,history,hkdf,inuse,zfs_property,zfs_receive,zfs_reservation,zfs_send,zfs_set,zfs_share,zfs_snapshot,zfs_unload-key,zfs_unmount,zfs_unshare,zfs_upgrade,zpool,zpool_add,zpool_attach,zpool_clear,zpool_create,zpool_destroy,zpool_detach'
-    - ZFS_TEST_TAGS='grow_replicas,mv_files,cli_user,zfs_mount,zfs_promote,zfs_rollback,zpool_events,zpool_expand,zpool_export,zpool_get,zpool_history,zpool_import,zpool_labelclear,zpool_offline,zpool_online,zpool_remove,zpool_reopen,zpool_replace,zpool_scrub,zpool_set,zpool_status,zpool_sync,zpool_upgrade'
-    - ZFS_TEST_TAGS='zfs_destroy,large_files,largest_pool,link_count,migration,mmap,mmp,mount,nestedfs,no_space,nopwrite,online_offline,pool_names,poolversion,privilege,quota,raidz,redundancy,rsend'
-    - ZFS_TEST_TAGS='inheritance,refquota,refreserv,rename_dirs,replacement,reservation,rootpool,scrub_mirror,slog,snapshot,snapused,sparse,threadsappend,tmpfile,truncate,upgrade,userquota,vdev_zaps,write_dirs,xattr,zvol,libzfs'
-before_install:
-    - sudo apt-get -qq update
-    - sudo apt-get install --yes -qq build-essential autoconf libtool gawk alien fakeroot linux-headers-$(uname -r)
-    - sudo apt-get install --yes -qq zlib1g-dev uuid-dev libattr1-dev libblkid-dev libselinux-dev libudev-dev libssl-dev
-    # packages for tests
-    - sudo apt-get install --yes -qq parted lsscsi ksh attr acl nfs-kernel-server fio
-install:
-    - git clone --depth=1 https://github.com/zfsonlinux/spl
-    - cd spl
-    - git checkout master
-    - sh autogen.sh
-    - ./configure
-    - make --no-print-directory -s pkg-utils pkg-kmod
-    - sudo dpkg -i *.deb
-    - cd ..
-    - sh autogen.sh
-    - ./configure
-    - make --no-print-directory -s pkg-utils pkg-kmod
-    - sudo dpkg -i *.deb
-script:
-    - travis_wait 50 /usr/share/zfs/zfs-tests.sh -v -T $ZFS_TEST_TAGS
-after_failure:
-    - find /var/tmp/test_results/current/log -type f -name '*' -printf "%f\n" -exec cut -c -$ZFS_TEST_TRAVIS_LOG_MAX_LENGTH {} \;
-after_success:
-    - find /var/tmp/test_results/current/log -type f -name '*' -printf "%f\n" -exec cut -c -$ZFS_TEST_TRAVIS_LOG_MAX_LENGTH {} \;

diff --git a/zfs/AUTHORS b/zfs/AUTHORS
index 5b183bd..aab8bf2 100644
--- a/zfs/AUTHORS
+++ b/zfs/AUTHORS

@@ -182,6 +182,7 @@
     Keith M Wesolowski <wesolows@foobazco.org>
     Kevin Tanguy <kevin.tanguy@ovh.net>
     KireinaHoro <i@jsteward.moe>
+    Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl>
     Kohsuke Kawaguchi <kk@kohsuke.org>
     Kyle Blatter <kyleblatter@llnl.gov>
     Kyle Fuller <inbox@kylefuller.co.uk>
@@ -210,6 +211,7 @@
     Michael Gebetsroither <michael@mgeb.org>
     Michael Kjorling <michael@kjorling.se>
     Michael Martin <mgmartin.mgm@gmail.com>
+    Michael Niewöhner <foss@mniewoehner.de>
     Mike Gerdts <mike.gerdts@joyent.com>
     Mike Harsch <mike@harschsystems.com>
     Mike Leddy <mike.leddy@gmail.com>
@@ -258,6 +260,7 @@
     Saso Kiselkov <saso.kiselkov@nexenta.com>
     Scot W. Stevenson <scot.stevenson@gmail.com>
     Sean Eric Fagan <sef@ixsystems.com>
+    Sebastian Gottschall <s.gottschall@dd-wrt.com>
     Sen Haerens <sen@senhaerens.be>
     Serapheim Dimitropoulos <serapheim@delphix.com>
     Seth Forshee <seth.forshee@canonical.com>

diff --git a/zfs/CODE_OF_CONDUCT.md b/zfs/CODE_OF_CONDUCT.md
index d314a66..51c9ef1 100644
--- a/zfs/CODE_OF_CONDUCT.md
+++ b/zfs/CODE_OF_CONDUCT.md

@@ -1,2 +1,2 @@
-The [OpenZFS Code of Conduct](http://www.open-zfs.org/wiki/Code_of_Conduct)
-applies to spaces associated with the ZFS on Linux project, including GitHub.
+The [OpenZFS Code of Conduct](https://openzfs.org/wiki/Code_of_Conduct)
+applies to spaces associated with the OpenZFS project, including GitHub.

diff --git a/zfs/COPYRIGHT b/zfs/COPYRIGHT
index 1eda895..85556b5 100644
--- a/zfs/COPYRIGHT
+++ b/zfs/COPYRIGHT

@@ -19,11 +19,11 @@
   * AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman
   * AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
   * PBKDF2 Implementation: lib/libzfs/THIRDPARTYLICENSE.openssl
-  * SPL Implementation: module/spl/THIRDPARTYLICENSE.gplv2
-  * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
-  * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
-  * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
-  * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
+  * SPL Implementation: module/os/linux/spl/THIRDPARTYLICENSE.gplv2
+  * GCM Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
+  * GCM Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
+  * GHASH Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
+  * GHASH Implementation: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
 
 This product includes software developed by the OpenSSL Project for use
 in the OpenSSL Toolkit (http://www.openssl.org/)

diff --git a/zfs/META b/zfs/META
index 9f43b0d..6e199fa 100644
--- a/zfs/META
+++ b/zfs/META

@@ -1,10 +1,10 @@
 Meta:          1
 Name:          zfs
 Branch:        1.0
-Version:       0.8.6
+Version:       2.1.14
 Release:       1
 Release-Tags:  relext
 License:       CDDL
-Author:        OpenZFS on Linux
-Linux-Maximum: 5.9
-Linux-Minimum: 2.6.32
+Author:        OpenZFS
+Linux-Maximum: 6.5
+Linux-Minimum: 3.10

diff --git a/zfs/Makefile.am b/zfs/Makefile.am
index 70d9fd7..36d8cd2 100644
--- a/zfs/Makefile.am
+++ b/zfs/Makefile.am

@@ -1,12 +1,17 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 ACLOCAL_AMFLAGS = -I config
 
-include config/rpm.am
-include config/deb.am
-include config/tgz.am
+SUBDIRS = include
+if BUILD_LINUX
+SUBDIRS += rpm
+endif
 
-SUBDIRS = include rpm
 if CONFIG_USER
-SUBDIRS += udev etc man scripts lib tests cmd contrib
+SUBDIRS += man scripts lib tests cmd etc contrib
+if BUILD_LINUX
+SUBDIRS += udev
+endif
 endif
 if CONFIG_KERNEL
 SUBDIRS += module
@@ -14,15 +19,18 @@
 extradir = $(prefix)/src/zfs-$(VERSION)
 extra_HEADERS = zfs.release.in zfs_config.h.in
 
+if BUILD_LINUX
 kerneldir = $(prefix)/src/zfs-$(VERSION)/$(LINUX_VERSION)
 nodist_kernel_HEADERS = zfs.release zfs_config.h module/$(LINUX_SYMBOLS)
 endif
+endif
 
 AUTOMAKE_OPTIONS = foreign
 EXTRA_DIST  = autogen.sh copy-builtin
 EXTRA_DIST += config/config.awk config/rpm.am config/deb.am config/tgz.am
-EXTRA_DIST += META AUTHORS COPYRIGHT LICENSE NEWS NOTICE README.md
-EXTRA_DIST += CODE_OF_CONDUCT.md
+EXTRA_DIST += AUTHORS CODE_OF_CONDUCT.md COPYRIGHT LICENSE META NEWS NOTICE
+EXTRA_DIST += README.md RELEASES.md
+EXTRA_DIST += module/lua/README.zfs module/os/linux/spl/README.md
 
 # Include all the extra licensing information for modules
 EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE
@@ -31,20 +39,29 @@
 EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip
 EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
 EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip
-EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2
-EXTRA_DIST += module/spl/THIRDPARTYLICENSE.gplv2.descrip
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
+EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
+EXTRA_DIST += module/os/linux/spl/THIRDPARTYLICENSE.gplv2
+EXTRA_DIST += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip
 EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash
 EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash.descrip
 
 @CODE_COVERAGE_RULES@
 
-.PHONY: gitrev
-gitrev:
-	-${top_srcdir}/scripts/make_gitrev.sh
+GITREV = include/zfs_gitrev.h
 
-BUILT_SOURCES = gitrev
+PHONY = gitrev
+gitrev:
+	$(AM_V_GEN)$(top_srcdir)/scripts/make_gitrev.sh $(GITREV)
+
+all: gitrev
 
 # Double-colon rules are allowed; there are multiple independent definitions.
+maintainer-clean-local::
+	-$(RM) $(GITREV)
+
 distclean-local::
 	-$(RM) -R autom4te*.cache build
 	-find . \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS \
@@ -61,11 +78,12 @@
 	-[ -x ${top_builddir}/scripts/zfs-tests.sh ] && \
 	    ${top_builddir}/scripts/zfs-tests.sh -c
 
-dist-hook: gitrev
-	cp ${top_srcdir}/include/zfs_gitrev.h $(distdir)/include; \
-	sed -i 's/Release:[[:print:]]*/Release:      $(RELEASE)/' \
+dist-hook:
+	$(AM_V_GEN)$(top_srcdir)/scripts/make_gitrev.sh -D $(distdir) $(GITREV)
+	$(SED) ${ac_inplace} -e 's/Release:[[:print:]]*/Release:      $(RELEASE)/' \
 		$(distdir)/META
 
+if BUILD_LINUX
 # For compatibility, create a matching spl-x.y.z directly which contains
 # symlinks to the updated header and object file locations.  These
 # compatibility links will be removed in the next major release.
@@ -82,75 +100,112 @@
 	ln -fs zfs_config.h spl_config.h && \
 	ln -fs zfs.release spl.release
 endif
+endif
 
-codecheck: cstyle shellcheck flake8 mancheck testscheck vcscheck
+PHONY += codecheck
+codecheck: cstyle shellcheck checkbashisms flake8 mancheck testscheck vcscheck zstdcheck
 
+PHONY += checkstyle
 checkstyle: codecheck commitcheck
 
+PHONY += commitcheck
 commitcheck:
 	@if git rev-parse --git-dir > /dev/null 2>&1; then \
 		${top_srcdir}/scripts/commitcheck.sh; \
 	fi
 
+if HAVE_PARALLEL
+cstyle_line = -print0 | parallel -X0 ${top_srcdir}/scripts/cstyle.pl -cpP {}
+else
+cstyle_line = -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} +
+endif
+PHONY += cstyle
 cstyle:
-	@find ${top_srcdir} -name build -prune -o -name '*.[hc]' \
-		! -name 'zfs_config.*' ! -name '*.mod.c' -type f \
-		-exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+
+	@find ${top_srcdir} -name build -prune \
+		-o -type f -name '*.[hc]' \
+		! -name 'zfs_config.*' ! -name '*.mod.c' \
+		! -name 'opt_global.h' ! -name '*_if*.h' \
+		! -name 'zstd_compat_wrapper.h' \
+		! -path './module/zstd/lib/*' \
+		$(cstyle_line)
 
-shellcheck:
-	@if type shellcheck > /dev/null 2>&1; then \
-		shellcheck --exclude=SC1090 --format=gcc \
-			$$(find ${top_srcdir}/scripts/*.sh -type f) \
-			$$(find ${top_srcdir}/cmd/zed/zed.d/*.sh -type f) \
-			$$(find ${top_srcdir}/cmd/zpool/zpool.d/* -executable); \
-	else \
-		echo "skipping shellcheck because shellcheck is not installed"; \
-	fi
+filter_executable = -exec test -x '{}' \; -print
 
+SHELLCHECKDIRS = cmd contrib etc scripts tests
+SHELLCHECKSCRIPTS = autogen.sh
+
+PHONY += checkabi storeabi
+
+checklibabiversion:
+	libabiversion=`abidw -v | $(SED) 's/[^0-9]//g'`; \
+	if test $$libabiversion -lt "200"; then \
+        /bin/echo -e "\n" \
+        "*** Please use libabigail 2.0.0 version or newer;\n" \
+        "*** otherwise results are not consistent!\n" \
+        "(or see https://github.com/openzfs/libabigail-docker )\n"; \
+        exit 1; \
+    fi;
+
+checkabi: checklibabiversion lib
+	$(MAKE) -C lib checkabi
+
+storeabi: checklibabiversion lib
+	$(MAKE) -C lib storeabi
+
+PHONY += mancheck
 mancheck:
-	@if type mandoc > /dev/null 2>&1; then \
-		find ${top_srcdir}/man/man8 -type f -name 'zfs.8' \
-			-o -name 'zpool.8' -o -name 'zdb.8' \
-			-o -name 'zgenhostid.8' | \
-			xargs mandoc -Tlint -Werror; \
-	else \
-		echo "skipping mancheck because mandoc is not installed"; \
-	fi
+	${top_srcdir}/scripts/mancheck.sh ${top_srcdir}/man ${top_srcdir}/tests/test-runner/man
 
+if BUILD_LINUX
+stat_fmt = -c '%A %n'
+else
+stat_fmt = -f '%Sp %N'
+endif
+
+PHONY += testscheck
 testscheck:
 	@find ${top_srcdir}/tests/zfs-tests -type f \
-		\( -name '*.ksh' -not -executable \) -o \
-		\( -name '*.kshlib' -executable \) -o \
-		\( -name '*.shlib' -executable \) -o \
-		\( -name '*.cfg' -executable \) | \
-		xargs -r stat -c '%A %n' | \
+		\( -name '*.ksh' -not ${filter_executable} \) -o \
+		\( -name '*.kshlib' ${filter_executable} \) -o \
+		\( -name '*.shlib' ${filter_executable} \) -o \
+		\( -name '*.cfg' ${filter_executable} \) | \
+		xargs -r stat ${stat_fmt} | \
 		awk '{c++; print} END {if(c>0) exit 1}'
 
+PHONY += vcscheck
 vcscheck:
 	@if git rev-parse --git-dir > /dev/null 2>&1; then \
 		git ls-files . --exclude-standard --others | \
 		awk '{c++; print} END {if(c>0) exit 1}' ; \
 	fi
 
+PHONY += zstdcheck
+zstdcheck:
+	@$(MAKE) -C module/zstd checksymbols
+
+PHONY += lint
 lint: cppcheck paxcheck
 
-cppcheck:
-	@if type cppcheck > /dev/null 2>&1; then \
-		cppcheck --quiet --force --error-exitcode=2 --inline-suppr \
-			--suppressions-list=.github/suppressions.txt \
-			-UHAVE_SSE2 -UHAVE_AVX512F -UHAVE_UIO_ZEROCOPY \
-			${top_srcdir}; \
+CPPCHECKDIRS = cmd lib module
+PHONY += cppcheck
+cppcheck: $(CPPCHECKDIRS)
+	@if test -n "$(CPPCHECK)"; then \
+		set -e ; for dir in $(CPPCHECKDIRS) ; do \
+			$(MAKE) -C $$dir cppcheck ; \
+		done \
 	else \
 		echo "skipping cppcheck because cppcheck is not installed"; \
 	fi
 
+PHONY += paxcheck
 paxcheck:
 	@if type scanelf > /dev/null 2>&1; then \
-		${top_srcdir}/scripts/paxcheck.sh ${top_srcdir}; \
+		${top_srcdir}/scripts/paxcheck.sh ${top_builddir}; \
 	else \
 		echo "skipping paxcheck because scanelf is not installed"; \
 	fi
 
+PHONY += flake8
 flake8:
 	@if type flake8 > /dev/null 2>&1; then \
 		flake8 ${top_srcdir}; \
@@ -158,17 +213,34 @@
 		echo "skipping flake8 because flake8 is not installed"; \
 	fi
 
+PHONY += ctags
 ctags:
 	$(RM) tags
-	find $(top_srcdir) -name .git -prune -o -name '*.[hc]' | xargs ctags
+	find $(top_srcdir) -name '.?*' -prune \
+		-o -type f -name '*.[hcS]' -print | xargs ctags -a
 
+PHONY += etags
 etags:
 	$(RM) TAGS
-	find $(top_srcdir) -name .pc -prune -o -name '*.[hc]' | xargs etags -a
+	find $(top_srcdir) -name '.?*' -prune \
+		-o -type f -name '*.[hcS]' -print | xargs etags -a
 
+PHONY += cscopelist
+cscopelist:
+	find $(top_srcdir) -name '.?*' -prune \
+		-o -type f -name '*.[hc]' -print >cscope.files
+
+PHONY += tags
 tags: ctags etags
 
+PHONY += pkg pkg-dkms pkg-kmod pkg-utils
 pkg: @DEFAULT_PACKAGE@
 pkg-dkms: @DEFAULT_PACKAGE@-dkms
 pkg-kmod: @DEFAULT_PACKAGE@-kmod
 pkg-utils: @DEFAULT_PACKAGE@-utils
+
+include config/rpm.am
+include config/deb.am
+include config/tgz.am
+
+.PHONY: $(PHONY)

diff --git a/zfs/NEWS b/zfs/NEWS
index bbdc2b6..3907ce5 100644
--- a/zfs/NEWS
+++ b/zfs/NEWS

@@ -1,3 +1,3 @@
 Descriptions of all releases can be found on github:
 
-https://github.com/zfsonlinux/zfs/releases
+https://github.com/openzfs/zfs/releases

diff --git a/zfs/README.md b/zfs/README.md
index 59d167f..3318895 100644
--- a/zfs/README.md
+++ b/zfs/README.md

@@ -1,31 +1,35 @@
-![img](http://zfsonlinux.org/images/zfs-linux.png)
+![img](https://openzfs.github.io/openzfs-docs/_static/img/logo/480px-Open-ZFS-Secondary-Logo-Colour-halfsize.png)
 
-ZFS on Linux is an advanced file system and volume manager which was originally
+OpenZFS is an advanced file system and volume manager which was originally
 developed for Solaris and is now maintained by the OpenZFS community.
+This repository contains the code for running OpenZFS on Linux and FreeBSD.
 
-[![codecov](https://codecov.io/gh/zfsonlinux/zfs/branch/master/graph/badge.svg)](https://codecov.io/gh/zfsonlinux/zfs)
-[![coverity](https://scan.coverity.com/projects/1973/badge.svg)](https://scan.coverity.com/projects/zfsonlinux-zfs)
+[![codecov](https://codecov.io/gh/openzfs/zfs/branch/master/graph/badge.svg)](https://codecov.io/gh/openzfs/zfs)
+[![coverity](https://scan.coverity.com/projects/1973/badge.svg)](https://scan.coverity.com/projects/openzfs-zfs)
 
 # Official Resources
 
-  * [Site](http://zfsonlinux.org)
-  * [Wiki](https://github.com/zfsonlinux/zfs/wiki)
-  * [Mailing lists](https://github.com/zfsonlinux/zfs/wiki/Mailing-Lists)
-  * [OpenZFS site](http://open-zfs.org/)
+  * [Documentation](https://openzfs.github.io/openzfs-docs/) - for using and developing this repo
+  * [ZoL Site](https://zfsonlinux.org) - Linux release info & links
+  * [Mailing lists](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
+  * [OpenZFS site](https://openzfs.org/) - for conference videos and info on other platforms (illumos, OSX, Windows, etc)
 
 # Installation
 
-Full documentation for installing ZoL on your favorite Linux distribution can
-be found at [our site](http://zfsonlinux.org/).
+Full documentation for installing OpenZFS on your favorite operating system can
+be found at the [Getting Started Page](https://openzfs.github.io/openzfs-docs/Getting%20Started/index.html).
 
 # Contribute & Develop
 
 We have a separate document with [contribution guidelines](./.github/CONTRIBUTING.md).
 
+We have a [Code of Conduct](./CODE_OF_CONDUCT.md).
+
 # Release
 
-ZFS on Linux is released under a CDDL license.  
+OpenZFS is released under a CDDL license.
 For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
 
 # Supported Kernels
-  * The `META` file contains the officially recognized supported kernel versions.
+  * The `META` file contains the officially recognized supported Linux kernel versions.
+  * Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.

diff --git a/zfs/RELEASES.md b/zfs/RELEASES.md
new file mode 100644
index 0000000..55bfdb8
--- /dev/null
+++ b/zfs/RELEASES.md

@@ -0,0 +1,37 @@
+OpenZFS uses the MAJOR.MINOR.PATCH versioning scheme described here:
+
+  * MAJOR - Incremented at the discretion of the OpenZFS developers to indicate
+    a particularly noteworthy feature or change. An increase in MAJOR number
+    does not indicate any incompatible on-disk format change. The ability
+    to import a ZFS pool is controlled by the feature flags enabled on the
+    pool and the feature flags supported by the installed OpenZFS version.
+    Increasing the MAJOR version is expected to be an infrequent occurrence.
+
+  * MINOR - Incremented to indicate new functionality such as a new feature
+    flag, pool/dataset property, zfs/zpool sub-command, new user/kernel
+    interface, etc. MINOR releases may introduce incompatible changes to the
+    user space library APIs (libzfs.so). Existing user/kernel interfaces are
+    considered to be stable to maximize compatibility between OpenZFS releases.
+    Additions to the user/kernel interface are backwards compatible.
+
+  * PATCH - Incremented when applying documentation updates, important bug
+    fixes, minor performance improvements, and kernel compatibility patches.
+    The user space library APIs and user/kernel interface are considered to
+    be stable. PATCH releases for a MAJOR.MINOR are published as needed.
+
+Two release branches are maintained for OpenZFS, they are:
+
+  * OpenZFS LTS - A designated MAJOR.MINOR release with periodic PATCH
+    releases that incorporate important changes backported from newer OpenZFS
+    releases. This branch is intended for use in environments using an
+    LTS, enterprise, or similarly managed kernel (RHEL, Ubuntu LTS, Debian).
+    Minor changes to support these distribution kernels will be applied as
+    needed. New kernel versions released after the OpenZFS LTS release are
+    not supported. LTS releases will receive patches for at least 2 years.
+    The current LTS release is OpenZFS 2.1.
+
+  * OpenZFS current - Tracks the newest MAJOR.MINOR release. This branch
+    includes support for the latest OpenZFS features and recently releases
+    kernels.  When a new MINOR release is tagged the previous MINOR release
+    will no longer be maintained (unless it is an LTS release). New MINOR
+    releases are planned to occur roughly annually.

diff --git a/zfs/TEST b/zfs/TEST
index ebe6ef9..376d6eb 100644
--- a/zfs/TEST
+++ b/zfs/TEST

@@ -48,64 +48,3 @@
 #TEST_ZFSSTRESS_VDEV="/var/tmp/vdev"
 #TEST_ZFSSTRESS_DIR="/$TEST_ZFSSTRESS_POOL/$TEST_ZFSSTRESS_FS"
 #TEST_ZFSSTRESS_OPTIONS=""
-
-### per-builder customization
-#
-# BB_NAME=builder-name <distribution-version-architecture-type>
-# - distribution=Amazon,Debian,Fedora,RHEL,SUSE,Ubuntu
-# - version=x.y
-# - architecture=x86_64,i686,arm,aarch64
-# - type=build,test
-#
-case "$BB_NAME" in
-Amazon*)
-    # ZFS enabled xfstests fails to build
-    TEST_XFSTESTS_SKIP="yes"
-    ;;
-CentOS-7*)
-    # ZFS enabled xfstests fails to build
-    TEST_XFSTESTS_SKIP="yes"
-    ;;
-CentOS-6*)
-    ;;
-Debian*)
-    ;;
-Fedora*)
-    ;;
-RHEL*)
-    ;;
-SUSE*)
-    ;;
-Ubuntu-16.04*)
-    # ZFS enabled xfstests fails to build
-    TEST_XFSTESTS_SKIP="yes"
-    ;;
-Ubuntu*)
-    ;;
-*)
-    ;;
-esac
-
-###
-#
-# Run ztest longer on the "coverage" builders to gain more code coverage
-# data out of ztest, libzpool, etc.
-#
-case "$BB_NAME" in
-*coverage*)
-    TEST_ZTEST_TIMEOUT=3600
-    ;;
-*)
-    TEST_ZTEST_TIMEOUT=900
-    ;;
-esac
-
-###
-#
-# Disable the following test suites on 32-bit systems.
-#
-if [ $(getconf LONG_BIT) = "32" ]; then
-    TEST_ZTEST_SKIP="yes"
-    TEST_XFSTESTS_SKIP="yes"
-    TEST_ZFSSTRESS_SKIP="yes"
-fi

diff --git a/zfs/cmd/Makefile.am b/zfs/cmd/Makefile.am
index 4e5e896..62da9e4 100644
--- a/zfs/cmd/Makefile.am
+++ b/zfs/cmd/Makefile.am

@@ -1,8 +1,27 @@
-SUBDIRS  = zfs zpool zdb zhack zinject zstreamdump ztest
-SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid
+include $(top_srcdir)/config/Shellcheck.am
+
+SUBDIRS  = zfs zpool zdb zhack zinject zstream ztest
+SUBDIRS += fsck_zfs vdev_id raidz_test zfs_ids_to_path
+SUBDIRS += zpool_influxdb
+
+CPPCHECKDIRS  = zfs zpool zdb zhack zinject zstream ztest
+CPPCHECKDIRS += raidz_test zfs_ids_to_path zpool_influxdb
+
+# TODO: #12084: SHELLCHECKDIRS = fsck_zfs vdev_id zpool
+SHELLCHECKDIRS = fsck_zfs zpool
 
 if USING_PYTHON
 SUBDIRS += arcstat arc_summary dbufstat
 endif
 
-SUBDIRS += mount_zfs zed zvol_id zvol_wait zfstool
+if BUILD_LINUX
+SUBDIRS += mount_zfs zed zgenhostid zvol_id zvol_wait zfstool
+CPPCHECKDIRS += mount_zfs zed zgenhostid zvol_id
+SHELLCHECKDIRS += zed
+endif
+
+PHONY = cppcheck
+cppcheck: $(CPPCHECKDIRS)
+	set -e ; for dir in $(CPPCHECKDIRS) ; do \
+		$(MAKE) -C $$dir cppcheck ; \
+	done

diff --git a/zfs/cmd/arc_summary/.gitignore b/zfs/cmd/arc_summary/.gitignore
new file mode 100644
index 0000000..50ba15f
--- /dev/null
+++ b/zfs/cmd/arc_summary/.gitignore

@@ -0,0 +1 @@
+arc_summary

diff --git a/zfs/cmd/arc_summary/Makefile.am b/zfs/cmd/arc_summary/Makefile.am
index 7d83624..f419f07 100644
--- a/zfs/cmd/arc_summary/Makefile.am
+++ b/zfs/cmd/arc_summary/Makefile.am

@@ -1,11 +1,8 @@
-EXTRA_DIST = arc_summary2 arc_summary3
+bin_SCRIPTS = arc_summary
 
-if USING_PYTHON_2
-dist_bin_SCRIPTS = arc_summary2
-install-exec-hook:
-	mv $(DESTDIR)$(bindir)/arc_summary2 $(DESTDIR)$(bindir)/arc_summary
-else
-dist_bin_SCRIPTS = arc_summary3
-install-exec-hook:
-	mv $(DESTDIR)$(bindir)/arc_summary3 $(DESTDIR)$(bindir)/arc_summary
-endif
+CLEANFILES = arc_summary
+EXTRA_DIST = arc_summary3
+SCRIPT = arc_summary3
+
+arc_summary: $(SCRIPT)
+	cp $< $@

diff --git a/zfs/cmd/arc_summary/arc_summary2 b/zfs/cmd/arc_summary/arc_summary2
deleted file mode 100755
index 1326d9e..0000000
--- a/zfs/cmd/arc_summary/arc_summary2
+++ /dev/null

@@ -1,1081 +0,0 @@
-#!/usr/bin/env python2
-#
-# $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $
-#
-# Copyright (c) 2008 Ben Rockwood <benr@cuddletech.com>,
-# Copyright (c) 2010 Martin Matuska <mm@FreeBSD.org>,
-# Copyright (c) 2010-2011 Jason J. Hellenthal <jhell@DataIX.net>,
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-#
-# If you are having troubles when using this script from cron(8) please try
-# adjusting your PATH before reporting problems.
-#
-# Note some of this code uses older code (eg getopt instead of argparse,
-# subprocess.Popen() instead of subprocess.run()) because we need to support
-# some very old versions of Python.
-#
-
-"""Print statistics on the ZFS Adjustable Replacement Cache (ARC)
-
-Provides basic information on the ARC, its efficiency, the L2ARC (if present),
-the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See the
-in-source documentation and code at
-https://github.com/zfsonlinux/zfs/blob/master/module/zfs/arc.c for details.
-"""
-
-import getopt
-import os
-import sys
-import time
-import errno
-
-from subprocess import Popen, PIPE
-from decimal import Decimal as D
-
-show_tunable_descriptions = False
-alternate_tunable_layout = False
-
-
-def handle_Exception(ex_cls, ex, tb):
-    if ex is IOError:
-        if ex.errno == errno.EPIPE:
-            sys.exit()
-
-    if ex is KeyboardInterrupt:
-        sys.exit()
-
-
-sys.excepthook = handle_Exception
-
-
-def get_Kstat():
-    """Collect information on the ZFS subsystem from the /proc virtual
-    file system. The name "kstat" is a holdover from the Solaris utility
-    of the same name.
-    """
-
-    def load_proc_kstats(fn, namespace):
-        """Collect information on a specific subsystem of the ARC"""
-
-        kstats = [line.strip() for line in open(fn)]
-        del kstats[0:2]
-        for kstat in kstats:
-            kstat = kstat.strip()
-            name, _, value = kstat.split()
-            Kstat[namespace + name] = D(value)
-
-    Kstat = {}
-    load_proc_kstats('/proc/spl/kstat/zfs/arcstats',
-                     'kstat.zfs.misc.arcstats.')
-    load_proc_kstats('/proc/spl/kstat/zfs/zfetchstats',
-                     'kstat.zfs.misc.zfetchstats.')
-    load_proc_kstats('/proc/spl/kstat/zfs/vdev_cache_stats',
-                     'kstat.zfs.misc.vdev_cache_stats.')
-
-    return Kstat
-
-
-def fBytes(b=0):
-    """Return human-readable representation of a byte value in
-    powers of 2 (eg "KiB" for "kibibytes", etc) to two decimal
-    points. Values smaller than one KiB are returned without
-    decimal points.
-    """
-
-    prefixes = [
-        [2**80, "YiB"],   # yobibytes (yotta)
-        [2**70, "ZiB"],   # zebibytes (zetta)
-        [2**60, "EiB"],   # exbibytes (exa)
-        [2**50, "PiB"],   # pebibytes (peta)
-        [2**40, "TiB"],   # tebibytes (tera)
-        [2**30, "GiB"],   # gibibytes (giga)
-        [2**20, "MiB"],   # mebibytes (mega)
-        [2**10, "KiB"]]   # kibibytes (kilo)
-
-    if b >= 2**10:
-
-        for limit, unit in prefixes:
-
-            if b >= limit:
-                value = b / limit
-                break
-
-        result = "%0.2f\t%s" % (value, unit)
-
-    else:
-
-        result = "%d\tBytes" % b
-
-    return result
-
-
-def fHits(hits=0):
-    """Create a human-readable representation of the number of hits.
-    The single-letter symbols used are SI to avoid the confusion caused
-    by the different "short scale" and "long scale" representations in
-    English, which use the same words for different values. See
-    https://en.wikipedia.org/wiki/Names_of_large_numbers and
-    https://physics.nist.gov/cuu/Units/prefixes.html
-    """
-
-    numbers = [
-            [10**24, 'Y'],  # yotta (septillion)
-            [10**21, 'Z'],  # zetta (sextillion)
-            [10**18, 'E'],  # exa   (quintrillion)
-            [10**15, 'P'],  # peta  (quadrillion)
-            [10**12, 'T'],  # tera  (trillion)
-            [10**9, 'G'],   # giga  (billion)
-            [10**6, 'M'],   # mega  (million)
-            [10**3, 'k']]   # kilo  (thousand)
-
-    if hits >= 1000:
-
-        for limit, symbol in numbers:
-
-            if hits >= limit:
-                value = hits/limit
-                break
-
-        result = "%0.2f%s" % (value, symbol)
-
-    else:
-
-        result = "%d" % hits
-
-    return result
-
-
-def fPerc(lVal=0, rVal=0, Decimal=2):
-    """Calculate percentage value and return in human-readable format"""
-
-    if rVal > 0:
-        return str("%0." + str(Decimal) + "f") % (100 * (lVal / rVal)) + "%"
-    else:
-        return str("%0." + str(Decimal) + "f") % 100 + "%"
-
-
-def get_arc_summary(Kstat):
-    """Collect general data on the ARC"""
-
-    output = {}
-    memory_throttle_count = Kstat[
-        "kstat.zfs.misc.arcstats.memory_throttle_count"
-        ]
-
-    if memory_throttle_count > 0:
-        output['health'] = 'THROTTLED'
-    else:
-        output['health'] = 'HEALTHY'
-
-    output['memory_throttle_count'] = fHits(memory_throttle_count)
-
-    # ARC Misc.
-    deleted = Kstat["kstat.zfs.misc.arcstats.deleted"]
-    mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"]
-    evict_skip = Kstat["kstat.zfs.misc.arcstats.evict_skip"]
-
-    # ARC Misc.
-    output["arc_misc"] = {}
-    output["arc_misc"]["deleted"] = fHits(deleted)
-    output["arc_misc"]['mutex_miss'] = fHits(mutex_miss)
-    output["arc_misc"]['evict_skips'] = fHits(evict_skip)
-
-    # ARC Sizing
-    arc_size = Kstat["kstat.zfs.misc.arcstats.size"]
-    mru_size = Kstat["kstat.zfs.misc.arcstats.mru_size"]
-    mfu_size = Kstat["kstat.zfs.misc.arcstats.mfu_size"]
-    meta_limit = Kstat["kstat.zfs.misc.arcstats.arc_meta_limit"]
-    meta_size = Kstat["kstat.zfs.misc.arcstats.arc_meta_used"]
-    dnode_limit = Kstat["kstat.zfs.misc.arcstats.arc_dnode_limit"]
-    dnode_size = Kstat["kstat.zfs.misc.arcstats.dnode_size"]
-    target_max_size = Kstat["kstat.zfs.misc.arcstats.c_max"]
-    target_min_size = Kstat["kstat.zfs.misc.arcstats.c_min"]
-    target_size = Kstat["kstat.zfs.misc.arcstats.c"]
-
-    target_size_ratio = (target_max_size / target_min_size)
-
-    # ARC Sizing
-    output['arc_sizing'] = {}
-    output['arc_sizing']['arc_size'] = {
-        'per': fPerc(arc_size, target_max_size),
-        'num': fBytes(arc_size),
-    }
-    output['arc_sizing']['target_max_size'] = {
-        'ratio': target_size_ratio,
-        'num': fBytes(target_max_size),
-    }
-    output['arc_sizing']['target_min_size'] = {
-        'per': fPerc(target_min_size, target_max_size),
-        'num': fBytes(target_min_size),
-    }
-    output['arc_sizing']['target_size'] = {
-        'per': fPerc(target_size, target_max_size),
-        'num': fBytes(target_size),
-    }
-    output['arc_sizing']['meta_limit'] = {
-        'per': fPerc(meta_limit, target_max_size),
-        'num': fBytes(meta_limit),
-    }
-    output['arc_sizing']['meta_size'] = {
-        'per': fPerc(meta_size, meta_limit),
-        'num': fBytes(meta_size),
-    }
-    output['arc_sizing']['dnode_limit'] = {
-        'per': fPerc(dnode_limit, meta_limit),
-        'num': fBytes(dnode_limit),
-    }
-    output['arc_sizing']['dnode_size'] = {
-        'per': fPerc(dnode_size, dnode_limit),
-        'num': fBytes(dnode_size),
-    }
-
-    # ARC Hash Breakdown
-    output['arc_hash_break'] = {}
-    output['arc_hash_break']['hash_chain_max'] = Kstat[
-        "kstat.zfs.misc.arcstats.hash_chain_max"
-        ]
-    output['arc_hash_break']['hash_chains'] = Kstat[
-        "kstat.zfs.misc.arcstats.hash_chains"
-        ]
-    output['arc_hash_break']['hash_collisions'] = Kstat[
-        "kstat.zfs.misc.arcstats.hash_collisions"
-        ]
-    output['arc_hash_break']['hash_elements'] = Kstat[
-        "kstat.zfs.misc.arcstats.hash_elements"
-        ]
-    output['arc_hash_break']['hash_elements_max'] = Kstat[
-        "kstat.zfs.misc.arcstats.hash_elements_max"
-        ]
-
-    output['arc_size_break'] = {}
-    output['arc_size_break']['recently_used_cache_size'] = {
-        'per': fPerc(mru_size, mru_size + mfu_size),
-        'num': fBytes(mru_size),
-    }
-    output['arc_size_break']['frequently_used_cache_size'] = {
-        'per': fPerc(mfu_size, mru_size + mfu_size),
-        'num': fBytes(mfu_size),
-    }
-
-    # ARC Hash Breakdown
-    hash_chain_max = Kstat["kstat.zfs.misc.arcstats.hash_chain_max"]
-    hash_chains = Kstat["kstat.zfs.misc.arcstats.hash_chains"]
-    hash_collisions = Kstat["kstat.zfs.misc.arcstats.hash_collisions"]
-    hash_elements = Kstat["kstat.zfs.misc.arcstats.hash_elements"]
-    hash_elements_max = Kstat["kstat.zfs.misc.arcstats.hash_elements_max"]
-
-    output['arc_hash_break'] = {}
-    output['arc_hash_break']['elements_max'] = fHits(hash_elements_max)
-    output['arc_hash_break']['elements_current'] = {
-        'per': fPerc(hash_elements, hash_elements_max),
-        'num': fHits(hash_elements),
-        }
-    output['arc_hash_break']['collisions'] = fHits(hash_collisions)
-    output['arc_hash_break']['chain_max'] = fHits(hash_chain_max)
-    output['arc_hash_break']['chains'] = fHits(hash_chains)
-
-    return output
-
-
-def _arc_summary(Kstat):
-    """Print information on the ARC"""
-
-    # ARC Sizing
-    arc = get_arc_summary(Kstat)
-
-    sys.stdout.write("ARC Summary: (%s)\n" % arc['health'])
-
-    sys.stdout.write("\tMemory Throttle Count:\t\t\t%s\n" %
-                     arc['memory_throttle_count'])
-    sys.stdout.write("\n")
-
-    # ARC Misc.
-    sys.stdout.write("ARC Misc:\n")
-    sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted'])
-    sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" %
-                     arc['arc_misc']['mutex_miss'])
-    sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" %
-                     arc['arc_misc']['evict_skips'])
-    sys.stdout.write("\n")
-
-    # ARC Sizing
-    sys.stdout.write("ARC Size:\t\t\t\t%s\t%s\n" % (
-        arc['arc_sizing']['arc_size']['per'],
-        arc['arc_sizing']['arc_size']['num']
-        )
-    )
-    sys.stdout.write("\tTarget Size: (Adaptive)\t\t%s\t%s\n" % (
-        arc['arc_sizing']['target_size']['per'],
-        arc['arc_sizing']['target_size']['num'],
-        )
-    )
-
-    sys.stdout.write("\tMin Size (Hard Limit):\t\t%s\t%s\n" % (
-        arc['arc_sizing']['target_min_size']['per'],
-        arc['arc_sizing']['target_min_size']['num'],
-        )
-    )
-
-    sys.stdout.write("\tMax Size (High Water):\t\t%d:1\t%s\n" % (
-        arc['arc_sizing']['target_max_size']['ratio'],
-        arc['arc_sizing']['target_max_size']['num'],
-        )
-    )
-
-    sys.stdout.write("\nARC Size Breakdown:\n")
-    sys.stdout.write("\tRecently Used Cache Size:\t%s\t%s\n" % (
-        arc['arc_size_break']['recently_used_cache_size']['per'],
-        arc['arc_size_break']['recently_used_cache_size']['num'],
-        )
-    )
-    sys.stdout.write("\tFrequently Used Cache Size:\t%s\t%s\n" % (
-        arc['arc_size_break']['frequently_used_cache_size']['per'],
-        arc['arc_size_break']['frequently_used_cache_size']['num'],
-        )
-    )
-    sys.stdout.write("\tMetadata Size (Hard Limit):\t%s\t%s\n" % (
-        arc['arc_sizing']['meta_limit']['per'],
-        arc['arc_sizing']['meta_limit']['num'],
-        )
-    )
-    sys.stdout.write("\tMetadata Size:\t\t\t%s\t%s\n" % (
-        arc['arc_sizing']['meta_size']['per'],
-        arc['arc_sizing']['meta_size']['num'],
-        )
-    )
-    sys.stdout.write("\tDnode Size (Hard Limit):\t%s\t%s\n" % (
-        arc['arc_sizing']['dnode_limit']['per'],
-        arc['arc_sizing']['dnode_limit']['num'],
-        )
-    )
-    sys.stdout.write("\tDnode Size:\t\t\t%s\t%s\n" % (
-        arc['arc_sizing']['dnode_size']['per'],
-        arc['arc_sizing']['dnode_size']['num'],
-        )
-    )
-
-    sys.stdout.write("\n")
-
-    # ARC Hash Breakdown
-    sys.stdout.write("ARC Hash Breakdown:\n")
-    sys.stdout.write("\tElements Max:\t\t\t\t%s\n" %
-                     arc['arc_hash_break']['elements_max'])
-    sys.stdout.write("\tElements Current:\t\t%s\t%s\n" % (
-        arc['arc_hash_break']['elements_current']['per'],
-        arc['arc_hash_break']['elements_current']['num'],
-        )
-    )
-    sys.stdout.write("\tCollisions:\t\t\t\t%s\n" %
-                     arc['arc_hash_break']['collisions'])
-    sys.stdout.write("\tChain Max:\t\t\t\t%s\n" %
-                     arc['arc_hash_break']['chain_max'])
-    sys.stdout.write("\tChains:\t\t\t\t\t%s\n" %
-                     arc['arc_hash_break']['chains'])
-
-
-def get_arc_efficiency(Kstat):
-    """Collect information on the efficiency of the ARC"""
-
-    output = {}
-
-    arc_hits = Kstat["kstat.zfs.misc.arcstats.hits"]
-    arc_misses = Kstat["kstat.zfs.misc.arcstats.misses"]
-    demand_data_hits = Kstat["kstat.zfs.misc.arcstats.demand_data_hits"]
-    demand_data_misses = Kstat["kstat.zfs.misc.arcstats.demand_data_misses"]
-    demand_metadata_hits = Kstat[
-        "kstat.zfs.misc.arcstats.demand_metadata_hits"
-        ]
-    demand_metadata_misses = Kstat[
-        "kstat.zfs.misc.arcstats.demand_metadata_misses"
-        ]
-    mfu_ghost_hits = Kstat["kstat.zfs.misc.arcstats.mfu_ghost_hits"]
-    mfu_hits = Kstat["kstat.zfs.misc.arcstats.mfu_hits"]
-    mru_ghost_hits = Kstat["kstat.zfs.misc.arcstats.mru_ghost_hits"]
-    mru_hits = Kstat["kstat.zfs.misc.arcstats.mru_hits"]
-    prefetch_data_hits = Kstat["kstat.zfs.misc.arcstats.prefetch_data_hits"]
-    prefetch_data_misses = Kstat[
-        "kstat.zfs.misc.arcstats.prefetch_data_misses"
-        ]
-    prefetch_metadata_hits = Kstat[
-        "kstat.zfs.misc.arcstats.prefetch_metadata_hits"
-        ]
-    prefetch_metadata_misses = Kstat[
-        "kstat.zfs.misc.arcstats.prefetch_metadata_misses"
-        ]
-
-    anon_hits = arc_hits - (
-        mfu_hits + mru_hits + mfu_ghost_hits + mru_ghost_hits
-        )
-    arc_accesses_total = (arc_hits + arc_misses)
-    demand_data_total = (demand_data_hits + demand_data_misses)
-    prefetch_data_total = (prefetch_data_hits + prefetch_data_misses)
-    real_hits = (mfu_hits + mru_hits)
-
-    output["total_accesses"] = fHits(arc_accesses_total)
-    output["cache_hit_ratio"] = {
-        'per': fPerc(arc_hits, arc_accesses_total),
-        'num': fHits(arc_hits),
-    }
-    output["cache_miss_ratio"] = {
-        'per': fPerc(arc_misses, arc_accesses_total),
-        'num': fHits(arc_misses),
-    }
-    output["actual_hit_ratio"] = {
-        'per': fPerc(real_hits, arc_accesses_total),
-        'num': fHits(real_hits),
-    }
-    output["data_demand_efficiency"] = {
-        'per': fPerc(demand_data_hits, demand_data_total),
-        'num': fHits(demand_data_total),
-    }
-
-    if prefetch_data_total > 0:
-        output["data_prefetch_efficiency"] = {
-            'per': fPerc(prefetch_data_hits, prefetch_data_total),
-            'num': fHits(prefetch_data_total),
-        }
-
-    if anon_hits > 0:
-        output["cache_hits_by_cache_list"] = {}
-        output["cache_hits_by_cache_list"]["anonymously_used"] = {
-            'per': fPerc(anon_hits, arc_hits),
-            'num': fHits(anon_hits),
-        }
-
-    output["most_recently_used"] = {
-        'per': fPerc(mru_hits, arc_hits),
-        'num': fHits(mru_hits),
-    }
-    output["most_frequently_used"] = {
-        'per': fPerc(mfu_hits, arc_hits),
-        'num': fHits(mfu_hits),
-    }
-    output["most_recently_used_ghost"] = {
-        'per': fPerc(mru_ghost_hits, arc_hits),
-        'num': fHits(mru_ghost_hits),
-    }
-    output["most_frequently_used_ghost"] = {
-        'per': fPerc(mfu_ghost_hits, arc_hits),
-        'num': fHits(mfu_ghost_hits),
-    }
-
-    output["cache_hits_by_data_type"] = {}
-    output["cache_hits_by_data_type"]["demand_data"] = {
-        'per': fPerc(demand_data_hits, arc_hits),
-        'num': fHits(demand_data_hits),
-    }
-    output["cache_hits_by_data_type"]["prefetch_data"] = {
-        'per': fPerc(prefetch_data_hits, arc_hits),
-        'num': fHits(prefetch_data_hits),
-    }
-    output["cache_hits_by_data_type"]["demand_metadata"] = {
-        'per': fPerc(demand_metadata_hits, arc_hits),
-        'num': fHits(demand_metadata_hits),
-    }
-    output["cache_hits_by_data_type"]["prefetch_metadata"] = {
-        'per': fPerc(prefetch_metadata_hits, arc_hits),
-        'num': fHits(prefetch_metadata_hits),
-    }
-
-    output["cache_misses_by_data_type"] = {}
-    output["cache_misses_by_data_type"]["demand_data"] = {
-        'per': fPerc(demand_data_misses, arc_misses),
-        'num': fHits(demand_data_misses),
-    }
-    output["cache_misses_by_data_type"]["prefetch_data"] = {
-        'per': fPerc(prefetch_data_misses, arc_misses),
-        'num': fHits(prefetch_data_misses),
-    }
-    output["cache_misses_by_data_type"]["demand_metadata"] = {
-        'per': fPerc(demand_metadata_misses, arc_misses),
-        'num': fHits(demand_metadata_misses),
-    }
-    output["cache_misses_by_data_type"]["prefetch_metadata"] = {
-        'per': fPerc(prefetch_metadata_misses, arc_misses),
-        'num': fHits(prefetch_metadata_misses),
-    }
-
-    return output
-
-
-def _arc_efficiency(Kstat):
-    """Print information on the efficiency of the ARC"""
-
-    arc = get_arc_efficiency(Kstat)
-
-    sys.stdout.write("ARC Total accesses:\t\t\t\t\t%s\n" %
-                     arc['total_accesses'])
-    sys.stdout.write("\tCache Hit Ratio:\t\t%s\t%s\n" % (
-        arc['cache_hit_ratio']['per'],
-        arc['cache_hit_ratio']['num'],
-        )
-    )
-    sys.stdout.write("\tCache Miss Ratio:\t\t%s\t%s\n" % (
-        arc['cache_miss_ratio']['per'],
-        arc['cache_miss_ratio']['num'],
-        )
-    )
-
-    sys.stdout.write("\tActual Hit Ratio:\t\t%s\t%s\n" % (
-        arc['actual_hit_ratio']['per'],
-        arc['actual_hit_ratio']['num'],
-        )
-    )
-
-    sys.stdout.write("\n")
-    sys.stdout.write("\tData Demand Efficiency:\t\t%s\t%s\n" % (
-        arc['data_demand_efficiency']['per'],
-        arc['data_demand_efficiency']['num'],
-        )
-    )
-
-    if 'data_prefetch_efficiency' in arc:
-        sys.stdout.write("\tData Prefetch Efficiency:\t%s\t%s\n" % (
-            arc['data_prefetch_efficiency']['per'],
-            arc['data_prefetch_efficiency']['num'],
-            )
-        )
-    sys.stdout.write("\n")
-
-    sys.stdout.write("\tCACHE HITS BY CACHE LIST:\n")
-    if 'cache_hits_by_cache_list' in arc:
-        sys.stdout.write("\t  Anonymously Used:\t\t%s\t%s\n" % (
-            arc['cache_hits_by_cache_list']['anonymously_used']['per'],
-            arc['cache_hits_by_cache_list']['anonymously_used']['num'],
-            )
-        )
-    sys.stdout.write("\t  Most Recently Used:\t\t%s\t%s\n" % (
-        arc['most_recently_used']['per'],
-        arc['most_recently_used']['num'],
-        )
-    )
-    sys.stdout.write("\t  Most Frequently Used:\t\t%s\t%s\n" % (
-        arc['most_frequently_used']['per'],
-        arc['most_frequently_used']['num'],
-        )
-    )
-    sys.stdout.write("\t  Most Recently Used Ghost:\t%s\t%s\n" % (
-        arc['most_recently_used_ghost']['per'],
-        arc['most_recently_used_ghost']['num'],
-        )
-    )
-    sys.stdout.write("\t  Most Frequently Used Ghost:\t%s\t%s\n" % (
-        arc['most_frequently_used_ghost']['per'],
-        arc['most_frequently_used_ghost']['num'],
-        )
-    )
-
-    sys.stdout.write("\n\tCACHE HITS BY DATA TYPE:\n")
-    sys.stdout.write("\t  Demand Data:\t\t\t%s\t%s\n" % (
-        arc["cache_hits_by_data_type"]['demand_data']['per'],
-        arc["cache_hits_by_data_type"]['demand_data']['num'],
-        )
-    )
-    sys.stdout.write("\t  Prefetch Data:\t\t%s\t%s\n" % (
-        arc["cache_hits_by_data_type"]['prefetch_data']['per'],
-        arc["cache_hits_by_data_type"]['prefetch_data']['num'],
-        )
-    )
-    sys.stdout.write("\t  Demand Metadata:\t\t%s\t%s\n" % (
-        arc["cache_hits_by_data_type"]['demand_metadata']['per'],
-        arc["cache_hits_by_data_type"]['demand_metadata']['num'],
-        )
-    )
-    sys.stdout.write("\t  Prefetch Metadata:\t\t%s\t%s\n" % (
-        arc["cache_hits_by_data_type"]['prefetch_metadata']['per'],
-        arc["cache_hits_by_data_type"]['prefetch_metadata']['num'],
-        )
-    )
-
-    sys.stdout.write("\n\tCACHE MISSES BY DATA TYPE:\n")
-    sys.stdout.write("\t  Demand Data:\t\t\t%s\t%s\n" % (
-        arc["cache_misses_by_data_type"]['demand_data']['per'],
-        arc["cache_misses_by_data_type"]['demand_data']['num'],
-        )
-    )
-    sys.stdout.write("\t  Prefetch Data:\t\t%s\t%s\n" % (
-        arc["cache_misses_by_data_type"]['prefetch_data']['per'],
-        arc["cache_misses_by_data_type"]['prefetch_data']['num'],
-        )
-    )
-    sys.stdout.write("\t  Demand Metadata:\t\t%s\t%s\n" % (
-        arc["cache_misses_by_data_type"]['demand_metadata']['per'],
-        arc["cache_misses_by_data_type"]['demand_metadata']['num'],
-        )
-    )
-    sys.stdout.write("\t  Prefetch Metadata:\t\t%s\t%s\n" % (
-        arc["cache_misses_by_data_type"]['prefetch_metadata']['per'],
-        arc["cache_misses_by_data_type"]['prefetch_metadata']['num'],
-        )
-    )
-
-
-def get_l2arc_summary(Kstat):
-    """Collection information on the L2ARC"""
-
-    output = {}
-
-    l2_abort_lowmem = Kstat["kstat.zfs.misc.arcstats.l2_abort_lowmem"]
-    l2_cksum_bad = Kstat["kstat.zfs.misc.arcstats.l2_cksum_bad"]
-    l2_evict_lock_retry = Kstat["kstat.zfs.misc.arcstats.l2_evict_lock_retry"]
-    l2_evict_reading = Kstat["kstat.zfs.misc.arcstats.l2_evict_reading"]
-    l2_feeds = Kstat["kstat.zfs.misc.arcstats.l2_feeds"]
-    l2_free_on_write = Kstat["kstat.zfs.misc.arcstats.l2_free_on_write"]
-    l2_hdr_size = Kstat["kstat.zfs.misc.arcstats.l2_hdr_size"]
-    l2_hits = Kstat["kstat.zfs.misc.arcstats.l2_hits"]
-    l2_io_error = Kstat["kstat.zfs.misc.arcstats.l2_io_error"]
-    l2_misses = Kstat["kstat.zfs.misc.arcstats.l2_misses"]
-    l2_rw_clash = Kstat["kstat.zfs.misc.arcstats.l2_rw_clash"]
-    l2_size = Kstat["kstat.zfs.misc.arcstats.l2_size"]
-    l2_asize = Kstat["kstat.zfs.misc.arcstats.l2_asize"]
-    l2_writes_done = Kstat["kstat.zfs.misc.arcstats.l2_writes_done"]
-    l2_writes_error = Kstat["kstat.zfs.misc.arcstats.l2_writes_error"]
-    l2_writes_sent = Kstat["kstat.zfs.misc.arcstats.l2_writes_sent"]
-
-    l2_access_total = (l2_hits + l2_misses)
-    output['l2_health_count'] = (l2_writes_error + l2_cksum_bad + l2_io_error)
-
-    output['l2_access_total'] = l2_access_total
-    output['l2_size'] = l2_size
-    output['l2_asize'] = l2_asize
-
-    if l2_size > 0 and l2_access_total > 0:
-
-        if output['l2_health_count'] > 0:
-            output["health"] = "DEGRADED"
-        else:
-            output["health"] = "HEALTHY"
-
-        output["low_memory_aborts"] = fHits(l2_abort_lowmem)
-        output["free_on_write"] = fHits(l2_free_on_write)
-        output["rw_clashes"] = fHits(l2_rw_clash)
-        output["bad_checksums"] = fHits(l2_cksum_bad)
-        output["io_errors"] = fHits(l2_io_error)
-
-        output["l2_arc_size"] = {}
-        output["l2_arc_size"]["adative"] = fBytes(l2_size)
-        output["l2_arc_size"]["actual"] = {
-            'per': fPerc(l2_asize, l2_size),
-            'num': fBytes(l2_asize)
-            }
-        output["l2_arc_size"]["head_size"] = {
-            'per': fPerc(l2_hdr_size, l2_size),
-            'num': fBytes(l2_hdr_size),
-        }
-
-        output["l2_arc_evicts"] = {}
-        output["l2_arc_evicts"]['lock_retries'] = fHits(l2_evict_lock_retry)
-        output["l2_arc_evicts"]['reading'] = fHits(l2_evict_reading)
-
-        output['l2_arc_breakdown'] = {}
-        output['l2_arc_breakdown']['value'] = fHits(l2_access_total)
-        output['l2_arc_breakdown']['hit_ratio'] = {
-            'per': fPerc(l2_hits, l2_access_total),
-            'num': fHits(l2_hits),
-        }
-        output['l2_arc_breakdown']['miss_ratio'] = {
-            'per': fPerc(l2_misses, l2_access_total),
-            'num': fHits(l2_misses),
-        }
-        output['l2_arc_breakdown']['feeds'] = fHits(l2_feeds)
-
-        output['l2_arc_buffer'] = {}
-
-        output['l2_arc_writes'] = {}
-        output['l2_writes_done'] = l2_writes_done
-        output['l2_writes_sent'] = l2_writes_sent
-        if l2_writes_done != l2_writes_sent:
-            output['l2_arc_writes']['writes_sent'] = {
-                'value': "FAULTED",
-                'num': fHits(l2_writes_sent),
-            }
-            output['l2_arc_writes']['done_ratio'] = {
-                'per': fPerc(l2_writes_done, l2_writes_sent),
-                'num': fHits(l2_writes_done),
-            }
-            output['l2_arc_writes']['error_ratio'] = {
-                'per': fPerc(l2_writes_error, l2_writes_sent),
-                'num': fHits(l2_writes_error),
-            }
-        else:
-            output['l2_arc_writes']['writes_sent'] = {
-                'per': fPerc(100),
-                'num': fHits(l2_writes_sent),
-            }
-
-    return output
-
-
-def _l2arc_summary(Kstat):
-    """Print information on the L2ARC"""
-
-    arc = get_l2arc_summary(Kstat)
-
-    if arc['l2_size'] > 0 and arc['l2_access_total'] > 0:
-        sys.stdout.write("L2 ARC Summary: ")
-        if arc['l2_health_count'] > 0:
-            sys.stdout.write("(DEGRADED)\n")
-        else:
-            sys.stdout.write("(HEALTHY)\n")
-        sys.stdout.write("\tLow Memory Aborts:\t\t\t%s\n" %
-                         arc['low_memory_aborts'])
-        sys.stdout.write("\tFree on Write:\t\t\t\t%s\n" % arc['free_on_write'])
-        sys.stdout.write("\tR/W Clashes:\t\t\t\t%s\n" % arc['rw_clashes'])
-        sys.stdout.write("\tBad Checksums:\t\t\t\t%s\n" % arc['bad_checksums'])
-        sys.stdout.write("\tIO Errors:\t\t\t\t%s\n" % arc['io_errors'])
-        sys.stdout.write("\n")
-
-        sys.stdout.write("L2 ARC Size: (Adaptive)\t\t\t\t%s\n" %
-                         arc["l2_arc_size"]["adative"])
-        sys.stdout.write("\tCompressed:\t\t\t%s\t%s\n" % (
-            arc["l2_arc_size"]["actual"]["per"],
-            arc["l2_arc_size"]["actual"]["num"],
-            )
-        )
-        sys.stdout.write("\tHeader Size:\t\t\t%s\t%s\n" % (
-            arc["l2_arc_size"]["head_size"]["per"],
-            arc["l2_arc_size"]["head_size"]["num"],
-            )
-        )
-        sys.stdout.write("\n")
-
-        if arc["l2_arc_evicts"]['lock_retries'] != '0' or \
-           arc["l2_arc_evicts"]["reading"] != '0':
-            sys.stdout.write("L2 ARC Evicts:\n")
-            sys.stdout.write("\tLock Retries:\t\t\t\t%s\n" %
-                             arc["l2_arc_evicts"]['lock_retries'])
-            sys.stdout.write("\tUpon Reading:\t\t\t\t%s\n" %
-                             arc["l2_arc_evicts"]["reading"])
-            sys.stdout.write("\n")
-
-        sys.stdout.write("L2 ARC Breakdown:\t\t\t\t%s\n" %
-                         arc['l2_arc_breakdown']['value'])
-        sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % (
-            arc['l2_arc_breakdown']['hit_ratio']['per'],
-            arc['l2_arc_breakdown']['hit_ratio']['num'],
-            )
-        )
-
-        sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % (
-            arc['l2_arc_breakdown']['miss_ratio']['per'],
-            arc['l2_arc_breakdown']['miss_ratio']['num'],
-            )
-        )
-
-        sys.stdout.write("\tFeeds:\t\t\t\t\t%s\n" %
-                         arc['l2_arc_breakdown']['feeds'])
-        sys.stdout.write("\n")
-
-        sys.stdout.write("L2 ARC Writes:\n")
-        if arc['l2_writes_done'] != arc['l2_writes_sent']:
-            sys.stdout.write("\tWrites Sent: (%s)\t\t\t\t%s\n" % (
-                arc['l2_arc_writes']['writes_sent']['value'],
-                arc['l2_arc_writes']['writes_sent']['num'],
-                )
-            )
-            sys.stdout.write("\t  Done Ratio:\t\t\t%s\t%s\n" % (
-                arc['l2_arc_writes']['done_ratio']['per'],
-                arc['l2_arc_writes']['done_ratio']['num'],
-                )
-            )
-            sys.stdout.write("\t  Error Ratio:\t\t\t%s\t%s\n" % (
-                arc['l2_arc_writes']['error_ratio']['per'],
-                arc['l2_arc_writes']['error_ratio']['num'],
-                )
-            )
-        else:
-            sys.stdout.write("\tWrites Sent:\t\t\t%s\t%s\n" % (
-                arc['l2_arc_writes']['writes_sent']['per'],
-                arc['l2_arc_writes']['writes_sent']['num'],
-                )
-            )
-
-
-def get_dmu_summary(Kstat):
-    """Collect information on the DMU"""
-
-    output = {}
-
-    zfetch_hits = Kstat["kstat.zfs.misc.zfetchstats.hits"]
-    zfetch_misses = Kstat["kstat.zfs.misc.zfetchstats.misses"]
-
-    zfetch_access_total = (zfetch_hits + zfetch_misses)
-    output['zfetch_access_total'] = zfetch_access_total
-
-    if zfetch_access_total > 0:
-        output['dmu'] = {}
-        output['dmu']['efficiency'] = {}
-        output['dmu']['efficiency']['value'] = fHits(zfetch_access_total)
-        output['dmu']['efficiency']['hit_ratio'] = {
-            'per': fPerc(zfetch_hits, zfetch_access_total),
-            'num': fHits(zfetch_hits),
-        }
-        output['dmu']['efficiency']['miss_ratio'] = {
-            'per': fPerc(zfetch_misses, zfetch_access_total),
-            'num': fHits(zfetch_misses),
-        }
-
-    return output
-
-
-def _dmu_summary(Kstat):
-    """Print information on the DMU"""
-
-    arc = get_dmu_summary(Kstat)
-
-    if arc['zfetch_access_total'] > 0:
-        sys.stdout.write("DMU Prefetch Efficiency:\t\t\t\t\t%s\n" %
-                         arc['dmu']['efficiency']['value'])
-        sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % (
-            arc['dmu']['efficiency']['hit_ratio']['per'],
-            arc['dmu']['efficiency']['hit_ratio']['num'],
-            )
-        )
-        sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % (
-            arc['dmu']['efficiency']['miss_ratio']['per'],
-            arc['dmu']['efficiency']['miss_ratio']['num'],
-            )
-        )
-
-        sys.stdout.write("\n")
-
-
-def get_vdev_summary(Kstat):
-    """Collect information on the VDEVs"""
-
-    output = {}
-
-    vdev_cache_delegations = \
-        Kstat["kstat.zfs.misc.vdev_cache_stats.delegations"]
-    vdev_cache_misses = Kstat["kstat.zfs.misc.vdev_cache_stats.misses"]
-    vdev_cache_hits = Kstat["kstat.zfs.misc.vdev_cache_stats.hits"]
-    vdev_cache_total = (vdev_cache_misses + vdev_cache_hits +
-                        vdev_cache_delegations)
-
-    output['vdev_cache_total'] = vdev_cache_total
-
-    if vdev_cache_total > 0:
-        output['summary'] = fHits(vdev_cache_total)
-        output['hit_ratio'] = {
-            'per': fPerc(vdev_cache_hits, vdev_cache_total),
-            'num': fHits(vdev_cache_hits),
-        }
-        output['miss_ratio'] = {
-            'per': fPerc(vdev_cache_misses, vdev_cache_total),
-            'num': fHits(vdev_cache_misses),
-        }
-        output['delegations'] = {
-            'per': fPerc(vdev_cache_delegations, vdev_cache_total),
-            'num': fHits(vdev_cache_delegations),
-        }
-
-    return output
-
-
-def _vdev_summary(Kstat):
-    """Print information on the VDEVs"""
-
-    arc = get_vdev_summary(Kstat)
-
-    if arc['vdev_cache_total'] > 0:
-        sys.stdout.write("VDEV Cache Summary:\t\t\t\t%s\n" % arc['summary'])
-        sys.stdout.write("\tHit Ratio:\t\t\t%s\t%s\n" % (
-            arc['hit_ratio']['per'],
-            arc['hit_ratio']['num'],
-        ))
-        sys.stdout.write("\tMiss Ratio:\t\t\t%s\t%s\n" % (
-            arc['miss_ratio']['per'],
-            arc['miss_ratio']['num'],
-        ))
-        sys.stdout.write("\tDelegations:\t\t\t%s\t%s\n" % (
-            arc['delegations']['per'],
-            arc['delegations']['num'],
-        ))
-
-
-def _tunable_summary(Kstat):
-    """Print information on tunables, including descriptions if requested"""
-
-    global show_tunable_descriptions
-    global alternate_tunable_layout
-
-    names = os.listdir("/sys/module/zfs/parameters/")
-
-    values = {}
-    for name in names:
-        with open("/sys/module/zfs/parameters/" + name) as f:
-            value = f.read()
-        values[name] = value.strip()
-
-    descriptions = {}
-
-    if show_tunable_descriptions:
-
-        command = ["/sbin/modinfo", "zfs", "-0"]
-
-        try:
-            p = Popen(command, stdin=PIPE, stdout=PIPE,
-                      stderr=PIPE, shell=False, close_fds=True)
-            p.wait()
-
-            # By default, Python 2 returns a string as the first element of the
-            # tuple from p.communicate(), while Python 3 returns bytes which
-            # must be decoded first. The better way to do this would be with
-            # subprocess.run() or at least .check_output(), but this fails on
-            # CentOS 6 because of its old version of Python 2
-            desc = bytes.decode(p.communicate()[0])
-            description_list = desc.strip().split('\0')
-
-            if p.returncode == 0:
-                for tunable in description_list:
-                    if tunable[0:5] == 'parm:':
-                        tunable = tunable[5:].strip()
-                        name, description = tunable.split(':', 1)
-                        if not description:
-                            description = "Description unavailable"
-                        descriptions[name] = description
-            else:
-                sys.stderr.write("%s: '%s' exited with code %i\n" %
-                                 (sys.argv[0], command[0], p.returncode))
-                sys.stderr.write("Tunable descriptions will be disabled.\n")
-        except OSError as e:
-            sys.stderr.write("%s: Cannot run '%s': %s\n" %
-                             (sys.argv[0], command[0], e.strerror))
-            sys.stderr.write("Tunable descriptions will be disabled.\n")
-
-    sys.stdout.write("ZFS Tunables:\n")
-    names.sort()
-
-    if alternate_tunable_layout:
-        fmt = "\t%s=%s\n"
-    else:
-        fmt = "\t%-50s%s\n"
-
-    for name in names:
-
-        if not name:
-            continue
-
-        if show_tunable_descriptions and name in descriptions:
-            sys.stdout.write("\t# %s\n" % descriptions[name])
-
-        sys.stdout.write(fmt % (name, values[name]))
-
-
-unSub = [
-    _arc_summary,
-    _arc_efficiency,
-    _l2arc_summary,
-    _dmu_summary,
-    _vdev_summary,
-    _tunable_summary
-]
-
-
-def zfs_header():
-    """Print title string with date"""
-
-    daydate = time.strftime('%a %b %d %H:%M:%S %Y')
-
-    sys.stdout.write('\n'+'-'*72+'\n')
-    sys.stdout.write('ZFS Subsystem Report\t\t\t\t%s' % daydate)
-    sys.stdout.write('\n')
-
-
-def usage():
-    """Print usage information"""
-
-    sys.stdout.write("Usage: arc_summary [-h] [-a] [-d] [-p PAGE]\n\n")
-    sys.stdout.write("\t -h, --help           : "
-                     "Print this help message and exit\n")
-    sys.stdout.write("\t -a, --alternate      : "
-                     "Show an alternate sysctl layout\n")
-    sys.stdout.write("\t -d, --description    : "
-                     "Show the sysctl descriptions\n")
-    sys.stdout.write("\t -p PAGE, --page=PAGE : "
-                     "Select a single output page to display,\n")
-    sys.stdout.write("\t                        "
-                     "should be an integer between 1 and " +
-                     str(len(unSub)) + "\n\n")
-    sys.stdout.write("Examples:\n")
-    sys.stdout.write("\tarc_summary -a\n")
-    sys.stdout.write("\tarc_summary -p 4\n")
-    sys.stdout.write("\tarc_summary -ad\n")
-    sys.stdout.write("\tarc_summary --page=2\n")
-
-
-def main():
-    """Main function"""
-
-    global show_tunable_descriptions
-    global alternate_tunable_layout
-
-    try:
-        opts, args = getopt.getopt(
-            sys.argv[1:],
-            "adp:h", ["alternate", "description", "page=", "help"]
-        )
-    except getopt.error as e:
-        sys.stderr.write("Error: %s\n" % e.msg)
-        usage()
-        sys.exit(1)
-
-    args = {}
-    for opt, arg in opts:
-        if opt in ('-a', '--alternate'):
-            args['a'] = True
-        if opt in ('-d', '--description'):
-            args['d'] = True
-        if opt in ('-p', '--page'):
-            args['p'] = arg
-        if opt in ('-h', '--help'):
-            usage()
-            sys.exit(0)
-
-    Kstat = get_Kstat()
-
-    alternate_tunable_layout = 'a' in args
-    show_tunable_descriptions = 'd' in args
-
-    pages = []
-
-    if 'p' in args:
-        try:
-            pages.append(unSub[int(args['p']) - 1])
-        except IndexError:
-            sys.stderr.write('the argument to -p must be between 1 and ' +
-                             str(len(unSub)) + '\n')
-            sys.exit(1)
-    else:
-        pages = unSub
-
-    zfs_header()
-    for page in pages:
-        page(Kstat)
-        sys.stdout.write("\n")
-
-
-if __name__ == '__main__':
-    main()

diff --git a/zfs/cmd/arc_summary/arc_summary3 b/zfs/cmd/arc_summary/arc_summary3
index e9890bf..9d0c2d3 100755
--- a/zfs/cmd/arc_summary/arc_summary3
+++ b/zfs/cmd/arc_summary/arc_summary3

@@ -32,7 +32,7 @@
 Provides basic information on the ARC, its efficiency, the L2ARC (if present),
 the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
 the in-source documentation and code at
-https://github.com/zfsonlinux/zfs/blob/master/module/zfs/arc.c for details.
+https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
 The original introduction to arc_summary can be found at
 http://cuddletech.com/?p=454
 """
@@ -42,13 +42,17 @@
 import subprocess
 import sys
 import time
+import errno
 
-DESCRIPTION = 'Print ARC and other statistics for ZFS on Linux'
+# We can't use env -S portably, and we need python3 -u to handle pipes in
+# the shell abruptly closing the way we want to, so...
+import io
+if isinstance(sys.__stderr__.buffer, io.BufferedWriter):
+    os.execv(sys.executable, [sys.executable, "-u"] + sys.argv)
+
+DESCRIPTION = 'Print ARC and other statistics for OpenZFS'
 INDENT = ' '*8
 LINE_LENGTH = 72
-PROC_PATH = '/proc/spl/kstat/zfs/'
-SPL_PATH = '/sys/module/spl/parameters/'
-TUNABLES_PATH = '/sys/module/zfs/parameters/'
 DATE_FORMAT = '%a %b %d %H:%M:%S %Y'
 TITLE = 'ZFS Subsystem Report'
 
@@ -61,7 +65,6 @@
                  'dmu': 'dmu_tx',
                  'l2arc': 'arcstats',  # L2ARC stuff lives in arcstats
                  'vdev': 'vdev_cache_stats',
-                 'xuio': 'xuio_stats',
                  'zfetch': 'zfetchstats',
                  'zil': 'zil'}
 
@@ -83,6 +86,164 @@
 ARGS = parser.parse_args()
 
 
+if sys.platform.startswith('freebsd'):
+    # Requires py36-sysctl on FreeBSD
+    import sysctl
+
+    VDEV_CACHE_SIZE = 'vdev.cache_size'
+
+    def is_value(ctl):
+        return ctl.type != sysctl.CTLTYPE_NODE
+
+    def namefmt(ctl, base='vfs.zfs.'):
+        # base is removed from the name
+        cut = len(base)
+        return ctl.name[cut:]
+
+    def load_kstats(section):
+        base = 'kstat.zfs.misc.{section}.'.format(section=section)
+        fmt = lambda kstat: '{name} : {value}'.format(name=namefmt(kstat, base),
+                                                      value=kstat.value)
+        kstats = sysctl.filter(base)
+        return [fmt(kstat) for kstat in kstats if is_value(kstat)]
+
+    def get_params(base):
+        ctls = sysctl.filter(base)
+        return {namefmt(ctl): str(ctl.value) for ctl in ctls if is_value(ctl)}
+
+    def get_tunable_params():
+        return get_params('vfs.zfs')
+
+    def get_vdev_params():
+        return get_params('vfs.zfs.vdev')
+
+    def get_version_impl(request):
+        # FreeBSD reports versions for zpl and spa instead of zfs and spl.
+        name = {'zfs': 'zpl',
+                'spl': 'spa'}[request]
+        mib = 'vfs.zfs.version.{}'.format(name)
+        version = sysctl.filter(mib)[0].value
+        return '{} version {}'.format(name, version)
+
+    def get_descriptions(_request):
+        ctls = sysctl.filter('vfs.zfs')
+        return {namefmt(ctl): ctl.description for ctl in ctls if is_value(ctl)}
+
+
+elif sys.platform.startswith('linux'):
+    KSTAT_PATH = '/proc/spl/kstat/zfs'
+    SPL_PATH = '/sys/module/spl/parameters'
+    TUNABLES_PATH = '/sys/module/zfs/parameters'
+
+    VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
+
+    def load_kstats(section):
+        path = os.path.join(KSTAT_PATH, section)
+        with open(path) as f:
+            return list(f)[2:] # Get rid of header
+
+    def get_params(basepath):
+        """Collect information on the Solaris Porting Layer (SPL) or the
+        tunables, depending on the PATH given. Does not check if PATH is
+        legal.
+        """
+        result = {}
+        for name in os.listdir(basepath):
+            path = os.path.join(basepath, name)
+            with open(path) as f:
+                value = f.read()
+                result[name] = value.strip()
+        return result
+
+    def get_spl_params():
+        return get_params(SPL_PATH)
+
+    def get_tunable_params():
+        return get_params(TUNABLES_PATH)
+
+    def get_vdev_params():
+        return get_params(TUNABLES_PATH)
+
+    def get_version_impl(request):
+        # The original arc_summary called /sbin/modinfo/{spl,zfs} to get
+        # the version information. We switch to /sys/module/{spl,zfs}/version
+        # to make sure we get what is really loaded in the kernel
+        try:
+            with open("/sys/module/{}/version".format(request)) as f:
+                return f.read().strip()
+        except:
+            return "(unknown)"
+
+    def get_descriptions(request):
+        """Get the descriptions of the Solaris Porting Layer (SPL) or the
+        tunables, return with minimal formatting.
+        """
+
+        if request not in ('spl', 'zfs'):
+            print('ERROR: description of "{0}" requested)'.format(request))
+            sys.exit(1)
+
+        descs = {}
+        target_prefix = 'parm:'
+
+        # We would prefer to do this with /sys/modules -- see the discussion at
+        # get_version() -- but there isn't a way to get the descriptions from
+        # there, so we fall back on modinfo
+        command = ["/sbin/modinfo", request, "-0"]
+
+        info = ''
+
+        try:
+
+            info = subprocess.run(command, stdout=subprocess.PIPE,
+                                  check=True, universal_newlines=True)
+            raw_output = info.stdout.split('\0')
+
+        except subprocess.CalledProcessError:
+            print("Error: Descriptions not available",
+                  "(can't access kernel module)")
+            sys.exit(1)
+
+        for line in raw_output:
+
+            if not line.startswith(target_prefix):
+                continue
+
+            line = line[len(target_prefix):].strip()
+            name, raw_desc = line.split(':', 1)
+            desc = raw_desc.rsplit('(', 1)[0]
+
+            if desc == '':
+                desc = '(No description found)'
+
+            descs[name.strip()] = desc.strip()
+
+        return descs
+
+def handle_unraisableException(exc_type, exc_value=None, exc_traceback=None,
+                               err_msg=None, object=None):
+   handle_Exception(exc_type, object, exc_traceback)
+
+def handle_Exception(ex_cls, ex, tb):
+    if ex_cls is KeyboardInterrupt:
+        sys.exit()
+
+    if ex_cls is BrokenPipeError:
+        # It turns out that while sys.exit() triggers an exception
+        # not handled message on Python 3.8+, os._exit() does not.
+        os._exit(0)
+
+    if ex_cls is OSError:
+      if ex.errno == errno.ENOTCONN:
+        sys.exit()
+
+    raise ex
+
+if hasattr(sys,'unraisablehook'): # Python 3.8+
+    sys.unraisablehook = handle_unraisableException
+sys.excepthook = handle_Exception
+
+
 def cleanup_line(single_line):
     """Format a raw line of data from /proc and isolate the name value
     part, returning a tuple with each. Currently, this gets rid of the
@@ -238,139 +399,48 @@
     if ARGS.alt:
         result = '{0}{1}={2}'.format(INDENT, name, value)
     else:
-        spc = LINE_LENGTH-(len(INDENT)+len(value))
-        result = '{0}{1:<{spc}}{2}'.format(INDENT, name, value, spc=spc)
+        # Right-align the value within the line length if it fits,
+        # otherwise just separate it from the name by a single space.
+        fit = LINE_LENGTH - len(INDENT) - len(name)
+        overflow = len(value) + 1
+        w = max(fit, overflow)
+        result = '{0}{1}{2:>{w}}'.format(INDENT, name, value, w=w)
 
     return result
 
 
 def get_kstats():
-    """Collect information on the ZFS subsystem from the /proc Linux virtual
-    file system. The step does not perform any further processing, giving us
-    the option to only work on what is actually needed. The name "kstat" is a
-    holdover from the Solaris utility of the same name.
+    """Collect information on the ZFS subsystem. The step does not perform any
+    further processing, giving us the option to only work on what is actually
+    needed. The name "kstat" is a holdover from the Solaris utility of the same
+    name.
     """
 
     result = {}
-    secs = SECTION_PATHS.values()
 
-    for section in secs:
-
-        with open(PROC_PATH+section, 'r') as proc_location:
-            lines = [line for line in proc_location]
-
-        del lines[0:2]  # Get rid of header
-        result[section] = lines
+    for section in SECTION_PATHS.values():
+        if section not in result:
+            result[section] = load_kstats(section)
 
     return result
 
 
-def get_spl_tunables(PATH):
-    """Collect information on the Solaris Porting Layer (SPL) or the
-    tunables, depending on the PATH given. Does not check if PATH is
-    legal.
-    """
-
-    result = {}
-    parameters = os.listdir(PATH)
-
-    for name in parameters:
-
-        with open(PATH+name, 'r') as para_file:
-            value = para_file.read()
-            result[name] = value.strip()
-
-    return result
-
-
-def get_descriptions(request):
-    """Get the descriptions of the Solaris Porting Layer (SPL) or the
-    tunables, return with minimal formatting.
-    """
-
-    if request not in ('spl', 'zfs'):
-        print('ERROR: description of "{0}" requested)'.format(request))
-        sys.exit(1)
-
-    descs = {}
-    target_prefix = 'parm:'
-
-    # We would prefer to do this with /sys/modules -- see the discussion at
-    # get_version() -- but there isn't a way to get the descriptions from
-    # there, so we fall back on modinfo
-    command = ["/sbin/modinfo", request, "-0"]
-
-    # The recommended way to do this is with subprocess.run(). However,
-    # some installed versions of Python are < 3.5, so we offer them
-    # the option of doing it the old way (for now)
-    info = ''
-
-    try:
-
-        if 'run' in dir(subprocess):
-            info = subprocess.run(command, stdout=subprocess.PIPE,
-                                  universal_newlines=True)
-            raw_output = info.stdout.split('\0')
-        else:
-            info = subprocess.check_output(command, universal_newlines=True)
-            raw_output = info.split('\0')
-
-    except subprocess.CalledProcessError:
-        print("Error: Descriptions not available (can't access kernel module)")
-        sys.exit(1)
-
-    for line in raw_output:
-
-        if not line.startswith(target_prefix):
-            continue
-
-        line = line[len(target_prefix):].strip()
-        name, raw_desc = line.split(':', 1)
-        desc = raw_desc.rsplit('(', 1)[0]
-
-        if desc == '':
-            desc = '(No description found)'
-
-        descs[name.strip()] = desc.strip()
-
-    return descs
-
-
 def get_version(request):
     """Get the version number of ZFS or SPL on this machine for header.
     Returns an error string, but does not raise an error, if we can't
-    get the ZFS/SPL version via modinfo.
+    get the ZFS/SPL version.
     """
 
     if request not in ('spl', 'zfs'):
         error_msg = '(ERROR: "{0}" requested)'.format(request)
         return error_msg
 
-    # The original arc_summary called /sbin/modinfo/{spl,zfs} to get
-    # the version information. We switch to /sys/module/{spl,zfs}/version
-    # to make sure we get what is really loaded in the kernel
-    command = ["cat", "/sys/module/{0}/version".format(request)]
-    req = request.upper()
-    version = "(Can't get {0} version)".format(req)
-
-    # The recommended way to do this is with subprocess.run(). However,
-    # some installed versions of Python are < 3.5, so we offer them
-    # the option of doing it the old way (for now)
-    info = ''
-    if 'run' in dir(subprocess):
-        info = subprocess.run(command, stdout=subprocess.PIPE,
-                              universal_newlines=True)
-        version = info.stdout.strip()
-    else:
-        info = subprocess.check_output(command, universal_newlines=True)
-        version = info.strip()
-
-    return version
+    return get_version_impl(request)
 
 
 def print_header():
     """Print the initial heading with date and time as well as info on the
-    Linux and ZFS versions. This is not called for the graph.
+    kernel and ZFS versions. This is not called for the graph.
     """
 
     # datetime is now recommended over time but we keep the exact formatting
@@ -534,6 +604,20 @@
     prt_i1('Deleted:', f_hits(arc_stats['deleted']))
     prt_i1('Mutex misses:', f_hits(arc_stats['mutex_miss']))
     prt_i1('Eviction skips:', f_hits(arc_stats['evict_skip']))
+    prt_i1('Eviction skips due to L2 writes:',
+           f_hits(arc_stats['evict_l2_skip']))
+    prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached']))
+    prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible']))
+    prt_i2('L2 eligible MFU evictions:',
+           f_perc(arc_stats['evict_l2_eligible_mfu'],
+           arc_stats['evict_l2_eligible']),
+           f_bytes(arc_stats['evict_l2_eligible_mfu']))
+    prt_i2('L2 eligible MRU evictions:',
+           f_perc(arc_stats['evict_l2_eligible_mru'],
+           arc_stats['evict_l2_eligible']),
+           f_bytes(arc_stats['evict_l2_eligible_mru']))
+    prt_i1('L2 ineligible evictions:',
+           f_bytes(arc_stats['evict_l2_ineligible']))
     print()
 
 
@@ -594,9 +678,9 @@
     print()
     print('Cache hits by data type:')
     dt_todo = (('Demand data:', arc_stats['demand_data_hits']),
-               ('Demand prefetch data:', arc_stats['prefetch_data_hits']),
+               ('Prefetch data:', arc_stats['prefetch_data_hits']),
                ('Demand metadata:', arc_stats['demand_metadata_hits']),
-               ('Demand prefetch metadata:',
+               ('Prefetch metadata:',
                 arc_stats['prefetch_metadata_hits']))
 
     for title, value in dt_todo:
@@ -605,10 +689,10 @@
     print()
     print('Cache misses by data type:')
     dm_todo = (('Demand data:', arc_stats['demand_data_misses']),
-               ('Demand prefetch data:',
+               ('Prefetch data:',
                 arc_stats['prefetch_data_misses']),
                ('Demand metadata:', arc_stats['demand_metadata_misses']),
-               ('Demand prefetch metadata:',
+               ('Prefetch metadata:',
                 arc_stats['prefetch_metadata_misses']))
 
     for title, value in dm_todo:
@@ -672,6 +756,21 @@
     prt_i2('Header size:',
            f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
            f_bytes(arc_stats['l2_hdr_size']))
+    prt_i2('MFU allocated size:',
+           f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']),
+           f_bytes(arc_stats['l2_mfu_asize']))
+    prt_i2('MRU allocated size:',
+           f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']),
+           f_bytes(arc_stats['l2_mru_asize']))
+    prt_i2('Prefetch allocated size:',
+           f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']),
+           f_bytes(arc_stats['l2_prefetch_asize']))
+    prt_i2('Data (buffer content) allocated size:',
+           f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']),
+           f_bytes(arc_stats['l2_bufc_data_asize']))
+    prt_i2('Metadata (buffer content) allocated size:',
+           f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']),
+           f_bytes(arc_stats['l2_bufc_metadata_asize']))
 
     print()
     prt_1('L2ARC breakdown:', f_hits(l2_access_total))
@@ -691,13 +790,13 @@
         prt_i2('Done ratio:',
                f_perc(arc_stats['l2_writes_done'],
                       arc_stats['l2_writes_sent']),
-               f_bytes(arc_stats['l2_writes_done']))
+               f_hits(arc_stats['l2_writes_done']))
         prt_i2('Error ratio:',
                f_perc(arc_stats['l2_writes_error'],
                       arc_stats['l2_writes_sent']),
-               f_bytes(arc_stats['l2_writes_error']))
+               f_hits(arc_stats['l2_writes_error']))
     else:
-        prt_i2('Writes sent:', '100 %', f_bytes(arc_stats['l2_writes_sent']))
+        prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))
 
     print()
     print('L2ARC evicts:')
@@ -711,7 +810,11 @@
     and/or descriptions. This does not use kstats.
     """
 
-    spls = get_spl_tunables(SPL_PATH)
+    if sys.platform.startswith('freebsd'):
+        # No SPL support in FreeBSD
+        return
+
+    spls = get_spl_params()
     keylist = sorted(spls.keys())
     print('Solaris Porting Layer (SPL):')
 
@@ -737,7 +840,7 @@
     descriptions. This does not use kstasts.
     """
 
-    tunables = get_spl_tunables(TUNABLES_PATH)
+    tunables = get_tunable_params()
     keylist = sorted(tunables.keys())
     print('Tunables:')
 
@@ -763,11 +866,11 @@
 
     # Currently [Nov 2017] the VDEV cache is disabled, because it is actually
     # harmful. When this is the case, we just skip the whole entry. See
-    # https://github.com/zfsonlinux/zfs/blob/master/module/zfs/vdev_cache.c
+    # https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
     # for details
-    tunables = get_spl_tunables(TUNABLES_PATH)
+    tunables = get_vdev_params()
 
-    if tunables['zfs_vdev_cache_size'] == '0':
+    if tunables[VDEV_CACHE_SIZE] == '0':
         print('VDEV cache disabled, skipping section\n')
         return
 
@@ -789,7 +892,7 @@
 
 def section_zil(kstats_dict):
     """Collect information on the ZFS Intent Log. Some of the information
-    taken from https://github.com/zfsonlinux/zfs/blob/master/include/sys/zil.h
+    taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
     """
 
     zil_stats = isolate_section('zil', kstats_dict)

diff --git a/zfs/cmd/arcstat/.gitignore b/zfs/cmd/arcstat/.gitignore
new file mode 100644
index 0000000..6d6cd1a
--- /dev/null
+++ b/zfs/cmd/arcstat/.gitignore

@@ -0,0 +1 @@
+arcstat

diff --git a/zfs/cmd/arcstat/Makefile.am b/zfs/cmd/arcstat/Makefile.am
index 8166778..d1ba989 100644
--- a/zfs/cmd/arcstat/Makefile.am
+++ b/zfs/cmd/arcstat/Makefile.am

@@ -1,13 +1,5 @@
-dist_bin_SCRIPTS = arcstat
+include $(top_srcdir)/config/Substfiles.am
 
-#
-# The arcstat script is compatible with both Python 2.6 and 3.4.
-# As such the python 3 shebang can be replaced at install time when
-# targeting a python 2 system.  This allows us to maintain a single
-# version of the source.
-#
-if USING_PYTHON_2
-install-exec-hook:
-	sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \
-	    $(DESTDIR)$(bindir)/arcstat
-endif
+bin_SCRIPTS = arcstat
+
+SUBSTFILES += $(bin_SCRIPTS)

diff --git a/zfs/cmd/arcstat/arcstat b/zfs/cmd/arcstat/arcstat
deleted file mode 100755
index 0034999..0000000
--- a/zfs/cmd/arcstat/arcstat
+++ /dev/null

@@ -1,470 +0,0 @@
-#!/usr/bin/env python3
-#
-# Print out ZFS ARC Statistics exported via kstat(1)
-# For a definition of fields, or usage, use arctstat.pl -v
-#
-# This script is a fork of the original arcstat.pl (0.1) by
-# Neelakanth Nadgir, originally published on his Sun blog on
-# 09/18/2007
-#     http://blogs.sun.com/realneel/entry/zfs_arc_statistics
-#
-# This version aims to improve upon the original by adding features
-# and fixing bugs as needed.  This version is maintained by
-# Mike Harsch and is hosted in a public open source repository:
-#    http://github.com/mharsch/arcstat
-#
-# Comments, Questions, or Suggestions are always welcome.
-# Contact the maintainer at ( mike at harschsystems dot com )
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Fields have a fixed width. Every interval, we fill the "v"
-# hash with its corresponding value (v[field]=value) using calculate().
-# @hdr is the array of fields that needs to be printed, so we
-# just iterate over this array and print the values using our pretty printer.
-#
-# This script must remain compatible with Python 2.6+ and Python 3.4+.
-#
-
-import sys
-import time
-import getopt
-import re
-import copy
-
-from decimal import Decimal
-from signal import signal, SIGINT, SIGWINCH, SIG_DFL
-
-cols = {
-    # HDR:        [Size, Scale, Description]
-    "time":       [8, -1, "Time"],
-    "hits":       [4, 1000, "ARC reads per second"],
-    "miss":       [4, 1000, "ARC misses per second"],
-    "read":       [4, 1000, "Total ARC accesses per second"],
-    "hit%":       [4, 100, "ARC Hit percentage"],
-    "miss%":      [5, 100, "ARC miss percentage"],
-    "dhit":       [4, 1000, "Demand hits per second"],
-    "dmis":       [4, 1000, "Demand misses per second"],
-    "dh%":        [3, 100, "Demand hit percentage"],
-    "dm%":        [3, 100, "Demand miss percentage"],
-    "phit":       [4, 1000, "Prefetch hits per second"],
-    "pmis":       [4, 1000, "Prefetch misses per second"],
-    "ph%":        [3, 100, "Prefetch hits percentage"],
-    "pm%":        [3, 100, "Prefetch miss percentage"],
-    "mhit":       [4, 1000, "Metadata hits per second"],
-    "mmis":       [4, 1000, "Metadata misses per second"],
-    "mread":      [5, 1000, "Metadata accesses per second"],
-    "mh%":        [3, 100, "Metadata hit percentage"],
-    "mm%":        [3, 100, "Metadata miss percentage"],
-    "arcsz":      [5, 1024, "ARC Size"],
-    "c":          [4, 1024, "ARC Target Size"],
-    "mfu":        [4, 1000, "MFU List hits per second"],
-    "mru":        [4, 1000, "MRU List hits per second"],
-    "mfug":       [4, 1000, "MFU Ghost List hits per second"],
-    "mrug":       [4, 1000, "MRU Ghost List hits per second"],
-    "eskip":      [5, 1000, "evict_skip per second"],
-    "mtxmis":     [6, 1000, "mutex_miss per second"],
-    "dread":      [5, 1000, "Demand accesses per second"],
-    "pread":      [5, 1000, "Prefetch accesses per second"],
-    "l2hits":     [6, 1000, "L2ARC hits per second"],
-    "l2miss":     [6, 1000, "L2ARC misses per second"],
-    "l2read":     [6, 1000, "Total L2ARC accesses per second"],
-    "l2hit%":     [6, 100, "L2ARC access hit percentage"],
-    "l2miss%":    [7, 100, "L2ARC access miss percentage"],
-    "l2asize":    [7, 1024, "Actual (compressed) size of the L2ARC"],
-    "l2size":     [6, 1024, "Size of the L2ARC"],
-    "l2bytes":    [7, 1024, "bytes read per second from the L2ARC"],
-    "grow":       [4, 1000, "ARC Grow disabled"],
-    "need":       [4, 1024, "ARC Reclaim need"],
-    "free":       [4, 1024, "ARC Free memory"],
-}
-
-v = {}
-hdr = ["time", "read", "miss", "miss%", "dmis", "dm%", "pmis", "pm%", "mmis",
-       "mm%", "arcsz", "c"]
-xhdr = ["time", "mfu", "mru", "mfug", "mrug", "eskip", "mtxmis", "dread",
-        "pread", "read"]
-sint = 1               # Default interval is 1 second
-count = 1              # Default count is 1
-hdr_intr = 20          # Print header every 20 lines of output
-opfile = None
-sep = "  "              # Default separator is 2 spaces
-version = "0.4"
-l2exist = False
-cmd = ("Usage: arcstat [-hvx] [-f fields] [-o file] [-s string] [interval "
-       "[count]]\n")
-cur = {}
-d = {}
-out = None
-kstat = None
-
-
-def detailed_usage():
-    sys.stderr.write("%s\n" % cmd)
-    sys.stderr.write("Field definitions are as follows:\n")
-    for key in cols:
-        sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
-    sys.stderr.write("\n")
-
-    sys.exit(0)
-
-
-def usage():
-    sys.stderr.write("%s\n" % cmd)
-    sys.stderr.write("\t -h : Print this help message\n")
-    sys.stderr.write("\t -v : List all possible field headers and definitions"
-                     "\n")
-    sys.stderr.write("\t -x : Print extended stats\n")
-    sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n")
-    sys.stderr.write("\t -o : Redirect output to the specified file\n")
-    sys.stderr.write("\t -s : Override default field separator with custom "
-                     "character or string\n")
-    sys.stderr.write("\nExamples:\n")
-    sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
-    sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
-    sys.stderr.write("\tarcstat -v\n")
-    sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n")
-    sys.stderr.write("\n")
-
-    sys.exit(1)
-
-
-def kstat_update():
-    global kstat
-
-    k = [line.strip() for line in open('/proc/spl/kstat/zfs/arcstats')]
-
-    if not k:
-        sys.exit(1)
-
-    del k[0:2]
-    kstat = {}
-
-    for s in k:
-        if not s:
-            continue
-
-        name, unused, value = s.split()
-        kstat[name] = Decimal(value)
-
-
-def snap_stats():
-    global cur
-    global kstat
-
-    prev = copy.deepcopy(cur)
-    kstat_update()
-
-    cur = kstat
-    for key in cur:
-        if re.match(key, "class"):
-            continue
-        if key in prev:
-            d[key] = cur[key] - prev[key]
-        else:
-            d[key] = cur[key]
-
-
-def prettynum(sz, scale, num=0):
-    suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
-    index = 0
-    save = 0
-
-    # Special case for date field
-    if scale == -1:
-        return "%s" % num
-
-    # Rounding error, return 0
-    elif 0 < num < 1:
-        num = 0
-
-    while num > scale and index < 5:
-        save = num
-        num = num / scale
-        index += 1
-
-    if index == 0:
-        return "%*d" % (sz, num)
-
-    if (save / scale) < 10:
-        return "%*.1f%s" % (sz - 1, num, suffix[index])
-    else:
-        return "%*d%s" % (sz - 1, num, suffix[index])
-
-
-def print_values():
-    global hdr
-    global sep
-    global v
-
-    for col in hdr:
-        sys.stdout.write("%s%s" % (
-            prettynum(cols[col][0], cols[col][1], v[col]),
-            sep
-        ))
-    sys.stdout.write("\n")
-    sys.stdout.flush()
-
-
-def print_header():
-    global hdr
-    global sep
-
-    for col in hdr:
-        sys.stdout.write("%*s%s" % (cols[col][0], col, sep))
-    sys.stdout.write("\n")
-
-
-def get_terminal_lines():
-    try:
-        import fcntl
-        import termios
-        import struct
-        data = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, '1234')
-        sz = struct.unpack('hh', data)
-        return sz[0]
-    except Exception:
-        pass
-
-
-def update_hdr_intr():
-    global hdr_intr
-
-    lines = get_terminal_lines()
-    if lines and lines > 3:
-        hdr_intr = lines - 3
-
-
-def resize_handler(signum, frame):
-    update_hdr_intr()
-
-
-def init():
-    global sint
-    global count
-    global hdr
-    global xhdr
-    global opfile
-    global sep
-    global out
-    global l2exist
-
-    desired_cols = None
-    xflag = False
-    hflag = False
-    vflag = False
-    i = 1
-
-    try:
-        opts, args = getopt.getopt(
-            sys.argv[1:],
-            "xo:hvs:f:",
-            [
-                "extended",
-                "outfile",
-                "help",
-                "verbose",
-                "separator",
-                "columns"
-            ]
-        )
-    except getopt.error as msg:
-        sys.stderr.write("Error: %s\n" % str(msg))
-        usage()
-        opts = None
-
-    for opt, arg in opts:
-        if opt in ('-x', '--extended'):
-            xflag = True
-        if opt in ('-o', '--outfile'):
-            opfile = arg
-            i += 1
-        if opt in ('-h', '--help'):
-            hflag = True
-        if opt in ('-v', '--verbose'):
-            vflag = True
-        if opt in ('-s', '--separator'):
-            sep = arg
-            i += 1
-        if opt in ('-f', '--columns'):
-            desired_cols = arg
-            i += 1
-        i += 1
-
-    argv = sys.argv[i:]
-    sint = Decimal(argv[0]) if argv else sint
-    count = int(argv[1]) if len(argv) > 1 else count
-
-    if len(argv) > 1:
-        sint = Decimal(argv[0])
-        count = int(argv[1])
-
-    elif len(argv) > 0:
-        sint = Decimal(argv[0])
-        count = 0
-
-    if hflag or (xflag and desired_cols):
-        usage()
-
-    if vflag:
-        detailed_usage()
-
-    if xflag:
-        hdr = xhdr
-
-    update_hdr_intr()
-
-    # check if L2ARC exists
-    snap_stats()
-    l2_size = cur.get("l2_size")
-    if l2_size:
-        l2exist = True
-
-    if desired_cols:
-        hdr = desired_cols.split(",")
-
-        invalid = []
-        incompat = []
-        for ele in hdr:
-            if ele not in cols:
-                invalid.append(ele)
-            elif not l2exist and ele.startswith("l2"):
-                sys.stdout.write("No L2ARC Here\n%s\n" % ele)
-                incompat.append(ele)
-
-        if len(invalid) > 0:
-            sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
-            usage()
-
-        if len(incompat) > 0:
-            sys.stderr.write("Incompatible field specified! -- %s\n" %
-                             incompat)
-            usage()
-
-    if opfile:
-        try:
-            out = open(opfile, "w")
-            sys.stdout = out
-
-        except IOError:
-            sys.stderr.write("Cannot open %s for writing\n" % opfile)
-            sys.exit(1)
-
-
-def calculate():
-    global d
-    global v
-    global l2exist
-
-    v = dict()
-    v["time"] = time.strftime("%H:%M:%S", time.localtime())
-    v["hits"] = d["hits"] / sint
-    v["miss"] = d["misses"] / sint
-    v["read"] = v["hits"] + v["miss"]
-    v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0
-    v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0
-
-    v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint
-    v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint
-
-    v["dread"] = v["dhit"] + v["dmis"]
-    v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0
-    v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0
-
-    v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint
-    v["pmis"] = (d["prefetch_data_misses"] +
-                 d["prefetch_metadata_misses"]) / sint
-
-    v["pread"] = v["phit"] + v["pmis"]
-    v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0
-    v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0
-
-    v["mhit"] = (d["prefetch_metadata_hits"] +
-                 d["demand_metadata_hits"]) / sint
-    v["mmis"] = (d["prefetch_metadata_misses"] +
-                 d["demand_metadata_misses"]) / sint
-
-    v["mread"] = v["mhit"] + v["mmis"]
-    v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0
-    v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0
-
-    v["arcsz"] = cur["size"]
-    v["c"] = cur["c"]
-    v["mfu"] = d["mfu_hits"] / sint
-    v["mru"] = d["mru_hits"] / sint
-    v["mrug"] = d["mru_ghost_hits"] / sint
-    v["mfug"] = d["mfu_ghost_hits"] / sint
-    v["eskip"] = d["evict_skip"] / sint
-    v["mtxmis"] = d["mutex_miss"] / sint
-
-    if l2exist:
-        v["l2hits"] = d["l2_hits"] / sint
-        v["l2miss"] = d["l2_misses"] / sint
-        v["l2read"] = v["l2hits"] + v["l2miss"]
-        v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0
-
-        v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0
-        v["l2asize"] = cur["l2_asize"]
-        v["l2size"] = cur["l2_size"]
-        v["l2bytes"] = d["l2_read_bytes"] / sint
-
-    v["grow"] = 0 if cur["arc_no_grow"] else 1
-    v["need"] = cur["arc_need_free"]
-    v["free"] = cur["arc_sys_free"]
-
-
-def main():
-    global sint
-    global count
-    global hdr_intr
-
-    i = 0
-    count_flag = 0
-
-    init()
-    if count > 0:
-        count_flag = 1
-
-    signal(SIGINT, SIG_DFL)
-    signal(SIGWINCH, resize_handler)
-    while True:
-        if i == 0:
-            print_header()
-
-        snap_stats()
-        calculate()
-        print_values()
-
-        if count_flag == 1:
-            if count <= 1:
-                break
-            count -= 1
-
-        i = 0 if i >= hdr_intr else i + 1
-        time.sleep(sint)
-
-    if out:
-        out.close()
-
-
-if __name__ == '__main__':
-    main()

diff --git a/zfs/cmd/arcstat/arcstat.in b/zfs/cmd/arcstat/arcstat.in
new file mode 100755
index 0000000..0128fd8
--- /dev/null
+++ b/zfs/cmd/arcstat/arcstat.in

@@ -0,0 +1,554 @@
+#!/usr/bin/env @PYTHON_SHEBANG@
+#
+# Print out ZFS ARC Statistics exported via kstat(1)
+# For a definition of fields, or usage, use arcstat -v
+#
+# This script was originally a fork of the original arcstat.pl (0.1)
+# by Neelakanth Nadgir, originally published on his Sun blog on
+# 09/18/2007
+#     http://blogs.sun.com/realneel/entry/zfs_arc_statistics
+#
+# A new version aimed to improve upon the original by adding features
+# and fixing bugs as needed.  This version was maintained by Mike
+# Harsch and was hosted in a public open source repository:
+#    http://github.com/mharsch/arcstat
+#
+# but has since moved to the illumos-gate repository.
+#
+# This Python port was written by John Hixson for FreeNAS, introduced
+# in commit e2c29f:
+#    https://github.com/freenas/freenas
+#
+# and has been improved by many people since.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Fields have a fixed width. Every interval, we fill the "v"
+# hash with its corresponding value (v[field]=value) using calculate().
+# @hdr is the array of fields that needs to be printed, so we
+# just iterate over this array and print the values using our pretty printer.
+#
+# This script must remain compatible with Python 3.6+.
+#
+
+import sys
+import time
+import getopt
+import re
+import copy
+
+from signal import signal, SIGINT, SIGWINCH, SIG_DFL
+
+
+cols = {
+    # HDR:        [Size, Scale, Description]
+    "time":       [8, -1, "Time"],
+    "hits":       [4, 1000, "ARC reads per second"],
+    "miss":       [4, 1000, "ARC misses per second"],
+    "read":       [4, 1000, "Total ARC accesses per second"],
+    "hit%":       [4, 100, "ARC hit percentage"],
+    "miss%":      [5, 100, "ARC miss percentage"],
+    "dhit":       [4, 1000, "Demand hits per second"],
+    "dmis":       [4, 1000, "Demand misses per second"],
+    "dh%":        [3, 100, "Demand hit percentage"],
+    "dm%":        [3, 100, "Demand miss percentage"],
+    "phit":       [4, 1000, "Prefetch hits per second"],
+    "pmis":       [4, 1000, "Prefetch misses per second"],
+    "ph%":        [3, 100, "Prefetch hits percentage"],
+    "pm%":        [3, 100, "Prefetch miss percentage"],
+    "mhit":       [4, 1000, "Metadata hits per second"],
+    "mmis":       [4, 1000, "Metadata misses per second"],
+    "mread":      [5, 1000, "Metadata accesses per second"],
+    "mh%":        [3, 100, "Metadata hit percentage"],
+    "mm%":        [3, 100, "Metadata miss percentage"],
+    "arcsz":      [5, 1024, "ARC size"],
+    "size":       [4, 1024, "ARC size"],
+    "c":          [4, 1024, "ARC target size"],
+    "mfu":        [4, 1000, "MFU list hits per second"],
+    "mru":        [4, 1000, "MRU list hits per second"],
+    "mfug":       [4, 1000, "MFU ghost list hits per second"],
+    "mrug":       [4, 1000, "MRU ghost list hits per second"],
+    "eskip":      [5, 1000, "evict_skip per second"],
+    "el2skip":    [7, 1000, "evict skip, due to l2 writes, per second"],
+    "el2cach":    [7, 1024, "Size of L2 cached evictions per second"],
+    "el2el":      [5, 1024, "Size of L2 eligible evictions per second"],
+    "el2mfu":     [6, 1024, "Size of L2 eligible MFU evictions per second"],
+    "el2mru":     [6, 1024, "Size of L2 eligible MRU evictions per second"],
+    "el2inel":    [7, 1024, "Size of L2 ineligible evictions per second"],
+    "mtxmis":     [6, 1000, "mutex_miss per second"],
+    "dread":      [5, 1000, "Demand accesses per second"],
+    "pread":      [5, 1000, "Prefetch accesses per second"],
+    "l2hits":     [6, 1000, "L2ARC hits per second"],
+    "l2miss":     [6, 1000, "L2ARC misses per second"],
+    "l2read":     [6, 1000, "Total L2ARC accesses per second"],
+    "l2hit%":     [6, 100, "L2ARC access hit percentage"],
+    "l2miss%":    [7, 100, "L2ARC access miss percentage"],
+    "l2pref":     [6, 1024, "L2ARC prefetch allocated size"],
+    "l2mfu":      [5, 1024, "L2ARC MFU allocated size"],
+    "l2mru":      [5, 1024, "L2ARC MRU allocated size"],
+    "l2data":     [6, 1024, "L2ARC data allocated size"],
+    "l2meta":     [6, 1024, "L2ARC metadata allocated size"],
+    "l2pref%":    [7, 100, "L2ARC prefetch percentage"],
+    "l2mfu%":     [6, 100, "L2ARC MFU percentage"],
+    "l2mru%":     [6, 100, "L2ARC MRU percentage"],
+    "l2data%":    [7, 100, "L2ARC data percentage"],
+    "l2meta%":    [7, 100, "L2ARC metadata percentage"],
+    "l2asize":    [7, 1024, "Actual (compressed) size of the L2ARC"],
+    "l2size":     [6, 1024, "Size of the L2ARC"],
+    "l2bytes":    [7, 1024, "Bytes read per second from the L2ARC"],
+    "grow":       [4, 1000, "ARC grow disabled"],
+    "need":       [4, 1024, "ARC reclaim need"],
+    "free":       [4, 1024, "ARC free memory"],
+    "avail":      [5, 1024, "ARC available memory"],
+    "waste":      [5, 1024, "Wasted memory due to round up to pagesize"],
+}
+
+v = {}
+hdr = ["time", "read", "miss", "miss%", "dmis", "dm%", "pmis", "pm%", "mmis",
+       "mm%", "size", "c", "avail"]
+xhdr = ["time", "mfu", "mru", "mfug", "mrug", "eskip", "mtxmis", "dread",
+        "pread", "read"]
+sint = 1               # Default interval is 1 second
+count = 1              # Default count is 1
+hdr_intr = 20          # Print header every 20 lines of output
+opfile = None
+sep = "  "              # Default separator is 2 spaces
+version = "0.4"
+l2exist = False
+cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
+       "[count]]\n")
+cur = {}
+d = {}
+out = None
+kstat = None
+pretty_print = True
+
+
+if sys.platform.startswith('freebsd'):
+    # Requires py-sysctl on FreeBSD
+    import sysctl
+
+    def kstat_update():
+        global kstat
+
+        k = [ctl for ctl in sysctl.filter('kstat.zfs.misc.arcstats')
+             if ctl.type != sysctl.CTLTYPE_NODE]
+
+        if not k:
+            sys.exit(1)
+
+        kstat = {}
+
+        for s in k:
+            if not s:
+                continue
+
+            name, value = s.name, s.value
+            # Trims 'kstat.zfs.misc.arcstats' from the name
+            kstat[name[24:]] = int(value)
+
+elif sys.platform.startswith('linux'):
+    def kstat_update():
+        global kstat
+
+        k = [line.strip() for line in open('/proc/spl/kstat/zfs/arcstats')]
+
+        if not k:
+            sys.exit(1)
+
+        del k[0:2]
+        kstat = {}
+
+        for s in k:
+            if not s:
+                continue
+
+            name, unused, value = s.split()
+            kstat[name] = int(value)
+
+
+def detailed_usage():
+    sys.stderr.write("%s\n" % cmd)
+    sys.stderr.write("Field definitions are as follows:\n")
+    for key in cols:
+        sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
+    sys.stderr.write("\n")
+
+    sys.exit(0)
+
+
+def usage():
+    sys.stderr.write("%s\n" % cmd)
+    sys.stderr.write("\t -h : Print this help message\n")
+    sys.stderr.write("\t -a : Print all possible stats\n")
+    sys.stderr.write("\t -v : List all possible field headers and definitions"
+                     "\n")
+    sys.stderr.write("\t -x : Print extended stats\n")
+    sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n")
+    sys.stderr.write("\t -o : Redirect output to the specified file\n")
+    sys.stderr.write("\t -s : Override default field separator with custom "
+                     "character or string\n")
+    sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n")
+    sys.stderr.write("\nExamples:\n")
+    sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
+    sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
+    sys.stderr.write("\tarcstat -v\n")
+    sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n")
+    sys.stderr.write("\n")
+
+    sys.exit(1)
+
+
+def snap_stats():
+    global cur
+    global kstat
+
+    prev = copy.deepcopy(cur)
+    kstat_update()
+
+    cur = kstat
+    for key in cur:
+        if re.match(key, "class"):
+            continue
+        if key in prev:
+            d[key] = cur[key] - prev[key]
+        else:
+            d[key] = cur[key]
+
+
+def prettynum(sz, scale, num=0):
+    suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
+    index = 0
+    save = 0
+
+    # Special case for date field
+    if scale == -1:
+        return "%s" % num
+
+    # Rounding error, return 0
+    elif 0 < num < 1:
+        num = 0
+
+    while abs(num) > scale and index < 5:
+        save = num
+        num = num / scale
+        index += 1
+
+    if index == 0:
+        return "%*d" % (sz, num)
+
+    if abs(save / scale) < 10:
+        return "%*.1f%s" % (sz - 1, num, suffix[index])
+    else:
+        return "%*d%s" % (sz - 1, num, suffix[index])
+
+
+def print_values():
+    global hdr
+    global sep
+    global v
+    global pretty_print
+
+    if pretty_print:
+        fmt = lambda col: prettynum(cols[col][0], cols[col][1], v[col])
+    else:
+        fmt = lambda col: str(v[col])
+
+    sys.stdout.write(sep.join(fmt(col) for col in hdr))
+    sys.stdout.write("\n")
+    sys.stdout.flush()
+
+
+def print_header():
+    global hdr
+    global sep
+    global pretty_print
+
+    if pretty_print:
+        fmt = lambda col: "%*s" % (cols[col][0], col)
+    else:
+        fmt = lambda col: col
+
+    sys.stdout.write(sep.join(fmt(col) for col in hdr))
+    sys.stdout.write("\n")
+
+
+def get_terminal_lines():
+    try:
+        import fcntl
+        import termios
+        import struct
+        data = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, '1234')
+        sz = struct.unpack('hh', data)
+        return sz[0]
+    except Exception:
+        pass
+
+
+def update_hdr_intr():
+    global hdr_intr
+
+    lines = get_terminal_lines()
+    if lines and lines > 3:
+        hdr_intr = lines - 3
+
+
+def resize_handler(signum, frame):
+    update_hdr_intr()
+
+
+def init():
+    global sint
+    global count
+    global hdr
+    global xhdr
+    global opfile
+    global sep
+    global out
+    global l2exist
+    global pretty_print
+
+    desired_cols = None
+    aflag = False
+    xflag = False
+    hflag = False
+    vflag = False
+    i = 1
+
+    try:
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "axo:hvs:f:p",
+            [
+                "all",
+                "extended",
+                "outfile",
+                "help",
+                "verbose",
+                "separator",
+                "columns",
+                "parsable"
+            ]
+        )
+    except getopt.error as msg:
+        sys.stderr.write("Error: %s\n" % str(msg))
+        usage()
+        opts = None
+
+    for opt, arg in opts:
+        if opt in ('-a', '--all'):
+            aflag = True
+        if opt in ('-x', '--extended'):
+            xflag = True
+        if opt in ('-o', '--outfile'):
+            opfile = arg
+            i += 1
+        if opt in ('-h', '--help'):
+            hflag = True
+        if opt in ('-v', '--verbose'):
+            vflag = True
+        if opt in ('-s', '--separator'):
+            sep = arg
+            i += 1
+        if opt in ('-f', '--columns'):
+            desired_cols = arg
+            i += 1
+        if opt in ('-p', '--parsable'):
+            pretty_print = False
+        i += 1
+
+    argv = sys.argv[i:]
+    sint = int(argv[0]) if argv else sint
+    count = int(argv[1]) if len(argv) > 1 else (0 if len(argv) > 0 else 1)
+
+    if hflag or (xflag and desired_cols):
+        usage()
+
+    if vflag:
+        detailed_usage()
+
+    if xflag:
+        hdr = xhdr
+
+    update_hdr_intr()
+
+    # check if L2ARC exists
+    snap_stats()
+    l2_size = cur.get("l2_size")
+    if l2_size:
+        l2exist = True
+
+    if desired_cols:
+        hdr = desired_cols.split(",")
+
+        invalid = []
+        incompat = []
+        for ele in hdr:
+            if ele not in cols:
+                invalid.append(ele)
+            elif not l2exist and ele.startswith("l2"):
+                sys.stdout.write("No L2ARC Here\n%s\n" % ele)
+                incompat.append(ele)
+
+        if len(invalid) > 0:
+            sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
+            usage()
+
+        if len(incompat) > 0:
+            sys.stderr.write("Incompatible field specified! -- %s\n" %
+                             incompat)
+            usage()
+
+    if aflag:
+        if l2exist:
+            hdr = cols.keys()
+        else:
+            hdr = [col for col in cols.keys() if not col.startswith("l2")]
+
+    if opfile:
+        try:
+            out = open(opfile, "w")
+            sys.stdout = out
+
+        except IOError:
+            sys.stderr.write("Cannot open %s for writing\n" % opfile)
+            sys.exit(1)
+
+
+def calculate():
+    global d
+    global v
+    global l2exist
+
+    v = dict()
+    v["time"] = time.strftime("%H:%M:%S", time.localtime())
+    v["hits"] = d["hits"] / sint
+    v["miss"] = d["misses"] / sint
+    v["read"] = v["hits"] + v["miss"]
+    v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0
+    v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0
+
+    v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint
+    v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint
+
+    v["dread"] = v["dhit"] + v["dmis"]
+    v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0
+    v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0
+
+    v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint
+    v["pmis"] = (d["prefetch_data_misses"] +
+                 d["prefetch_metadata_misses"]) / sint
+
+    v["pread"] = v["phit"] + v["pmis"]
+    v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0
+    v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0
+
+    v["mhit"] = (d["prefetch_metadata_hits"] +
+                 d["demand_metadata_hits"]) / sint
+    v["mmis"] = (d["prefetch_metadata_misses"] +
+                 d["demand_metadata_misses"]) / sint
+
+    v["mread"] = v["mhit"] + v["mmis"]
+    v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0
+    v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0
+
+    v["arcsz"] = cur["size"]
+    v["size"] = cur["size"]
+    v["c"] = cur["c"]
+    v["mfu"] = d["mfu_hits"] / sint
+    v["mru"] = d["mru_hits"] / sint
+    v["mrug"] = d["mru_ghost_hits"] / sint
+    v["mfug"] = d["mfu_ghost_hits"] / sint
+    v["eskip"] = d["evict_skip"] / sint
+    v["el2skip"] = d["evict_l2_skip"] / sint
+    v["el2cach"] = d["evict_l2_cached"] / sint
+    v["el2el"] = d["evict_l2_eligible"] / sint
+    v["el2mfu"] = d["evict_l2_eligible_mfu"] / sint
+    v["el2mru"] = d["evict_l2_eligible_mru"] / sint
+    v["el2inel"] = d["evict_l2_ineligible"] / sint
+    v["mtxmis"] = d["mutex_miss"] / sint
+
+    if l2exist:
+        v["l2hits"] = d["l2_hits"] / sint
+        v["l2miss"] = d["l2_misses"] / sint
+        v["l2read"] = v["l2hits"] + v["l2miss"]
+        v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0
+
+        v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0
+        v["l2asize"] = cur["l2_asize"]
+        v["l2size"] = cur["l2_size"]
+        v["l2bytes"] = d["l2_read_bytes"] / sint
+
+        v["l2pref"] = cur["l2_prefetch_asize"]
+        v["l2mfu"] = cur["l2_mfu_asize"]
+        v["l2mru"] = cur["l2_mru_asize"]
+        v["l2data"] = cur["l2_bufc_data_asize"]
+        v["l2meta"] = cur["l2_bufc_metadata_asize"]
+        v["l2pref%"] = 100 * v["l2pref"] / v["l2asize"]
+        v["l2mfu%"] = 100 * v["l2mfu"] / v["l2asize"]
+        v["l2mru%"] = 100 * v["l2mru"] / v["l2asize"]
+        v["l2data%"] = 100 * v["l2data"] / v["l2asize"]
+        v["l2meta%"] = 100 * v["l2meta"] / v["l2asize"]
+
+    v["grow"] = 0 if cur["arc_no_grow"] else 1
+    v["need"] = cur["arc_need_free"]
+    v["free"] = cur["memory_free_bytes"]
+    v["avail"] = cur["memory_available_bytes"]
+    v["waste"] = cur["abd_chunk_waste_size"]
+
+
+def main():
+    global sint
+    global count
+    global hdr_intr
+
+    i = 0
+    count_flag = 0
+
+    init()
+    if count > 0:
+        count_flag = 1
+
+    signal(SIGINT, SIG_DFL)
+    signal(SIGWINCH, resize_handler)
+    while True:
+        if i == 0:
+            print_header()
+
+        snap_stats()
+        calculate()
+        print_values()
+
+        if count_flag == 1:
+            if count <= 1:
+                break
+            count -= 1
+
+        i = 0 if i >= hdr_intr else i + 1
+        time.sleep(sint)
+
+    if out:
+        out.close()
+
+
+if __name__ == '__main__':
+    main()

diff --git a/zfs/cmd/dbufstat/.gitignore b/zfs/cmd/dbufstat/.gitignore
new file mode 100644
index 0000000..2c2e913
--- /dev/null
+++ b/zfs/cmd/dbufstat/.gitignore

@@ -0,0 +1 @@
+dbufstat

diff --git a/zfs/cmd/dbufstat/Makefile.am b/zfs/cmd/dbufstat/Makefile.am
index a3f0c6e..e672a01 100644
--- a/zfs/cmd/dbufstat/Makefile.am
+++ b/zfs/cmd/dbufstat/Makefile.am

@@ -1,13 +1,5 @@
-dist_bin_SCRIPTS = dbufstat
+include $(top_srcdir)/config/Substfiles.am
 
-#
-# The dbufstat script is compatible with both Python 2.6 and 3.4.
-# As such the python 3 shebang can be replaced at install time when
-# targeting a python 2 system.  This allows us to maintain a single
-# version of the source.
-#
-if USING_PYTHON_2
-install-exec-hook:
-	sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \
-	    $(DESTDIR)$(bindir)/dbufstat
-endif
+bin_SCRIPTS = dbufstat
+
+SUBSTFILES += $(bin_SCRIPTS)

diff --git a/zfs/cmd/dbufstat/dbufstat b/zfs/cmd/dbufstat/dbufstat
deleted file mode 100755
index 4a57d81..0000000
--- a/zfs/cmd/dbufstat/dbufstat
+++ /dev/null

@@ -1,669 +0,0 @@
-#!/usr/bin/env python3
-#
-# Print out statistics for all cached dmu buffers.  This information
-# is available through the dbufs kstat and may be post-processed as
-# needed by the script.
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright (C) 2013 Lawrence Livermore National Security, LLC.
-# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
-#
-# This script must remain compatible with Python 2.6+ and Python 3.4+.
-#
-
-import sys
-import getopt
-import errno
-import re
-
-bhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize"]
-bxhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize",
-         "meta", "state", "dbholds", "dbc", "list", "atype", "flags",
-         "count", "asize", "access", "mru", "gmru", "mfu", "gmfu", "l2",
-         "l2_dattr", "l2_asize", "l2_comp", "aholds", "dtype", "btype",
-         "data_bs", "meta_bs", "bsize", "lvls", "dholds", "blocks", "dsize"]
-bincompat = ["cached", "direct", "indirect", "bonus", "spill"]
-
-dhdr = ["pool", "objset", "object", "dtype", "cached"]
-dxhdr = ["pool", "objset", "object", "dtype", "btype", "data_bs", "meta_bs",
-         "bsize", "lvls", "dholds", "blocks", "dsize", "cached", "direct",
-         "indirect", "bonus", "spill"]
-dincompat = ["level", "blkid", "offset", "dbsize", "meta", "state", "dbholds",
-             "dbc", "list", "atype", "flags", "count", "asize", "access",
-             "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
-             "l2_comp", "aholds"]
-
-thdr = ["pool", "objset", "dtype", "cached"]
-txhdr = ["pool", "objset", "dtype", "cached", "direct", "indirect",
-         "bonus", "spill"]
-tincompat = ["object", "level", "blkid", "offset", "dbsize", "meta", "state",
-             "dbc", "dbholds", "list", "atype", "flags", "count", "asize",
-             "access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
-             "l2_asize", "l2_comp", "aholds", "btype", "data_bs", "meta_bs",
-             "bsize", "lvls", "dholds", "blocks", "dsize"]
-
-cols = {
-    # hdr:        [size, scale, description]
-    "pool":       [15,   -1, "pool name"],
-    "objset":     [6,    -1, "dataset identification number"],
-    "object":     [10,   -1, "object number"],
-    "level":      [5,    -1, "indirection level of buffer"],
-    "blkid":      [8,    -1, "block number of buffer"],
-    "offset":     [12, 1024, "offset in object of buffer"],
-    "dbsize":     [7,  1024, "size of buffer"],
-    "meta":       [4,    -1, "is this buffer metadata?"],
-    "state":      [5,    -1, "state of buffer (read, cached, etc)"],
-    "dbholds":    [7,  1000, "number of holds on buffer"],
-    "dbc":        [3,    -1, "in dbuf cache"],
-    "list":       [4,    -1, "which ARC list contains this buffer"],
-    "atype":      [7,    -1, "ARC header type (data or metadata)"],
-    "flags":      [9,    -1, "ARC read flags"],
-    "count":      [5,    -1, "ARC data count"],
-    "asize":      [7,  1024, "size of this ARC buffer"],
-    "access":     [10,   -1, "time this ARC buffer was last accessed"],
-    "mru":        [5,  1000, "hits while on the ARC's MRU list"],
-    "gmru":       [5,  1000, "hits while on the ARC's MRU ghost list"],
-    "mfu":        [5,  1000, "hits while on the ARC's MFU list"],
-    "gmfu":       [5,  1000, "hits while on the ARC's MFU ghost list"],
-    "l2":         [5,  1000, "hits while on the L2ARC"],
-    "l2_dattr":   [8,    -1, "L2ARC disk address/offset"],
-    "l2_asize":   [8,  1024, "L2ARC alloc'd size (depending on compression)"],
-    "l2_comp":    [21,   -1, "L2ARC compression algorithm for buffer"],
-    "aholds":     [6,  1000, "number of holds on this ARC buffer"],
-    "dtype":      [27,   -1, "dnode type"],
-    "btype":      [27,   -1, "bonus buffer type"],
-    "data_bs":    [7,  1024, "data block size"],
-    "meta_bs":    [7,  1024, "metadata block size"],
-    "bsize":      [6,  1024, "bonus buffer size"],
-    "lvls":       [6,    -1, "number of indirection levels"],
-    "dholds":     [6,  1000, "number of holds on dnode"],
-    "blocks":     [8,  1000, "number of allocated blocks"],
-    "dsize":      [12, 1024, "size of dnode"],
-    "cached":     [6,  1024, "bytes cached for all blocks"],
-    "direct":     [6,  1024, "bytes cached for direct blocks"],
-    "indirect":   [8,  1024, "bytes cached for indirect blocks"],
-    "bonus":      [5,  1024, "bytes cached for bonus buffer"],
-    "spill":      [5,  1024, "bytes cached for spill block"],
-}
-
-hdr = None
-xhdr = None
-sep = "  "  # Default separator is 2 spaces
-cmd = ("Usage: dbufstat [-bdhnrtvx] [-i file] [-f fields] [-o file] "
-       "[-s string] [-F filter]\n")
-raw = 0
-
-
-def print_incompat_helper(incompat):
-    cnt = 0
-    for key in sorted(incompat):
-        if cnt is 0:
-            sys.stderr.write("\t")
-        elif cnt > 8:
-            sys.stderr.write(",\n\t")
-            cnt = 0
-        else:
-            sys.stderr.write(", ")
-
-        sys.stderr.write("%s" % key)
-        cnt += 1
-
-    sys.stderr.write("\n\n")
-
-
-def detailed_usage():
-    sys.stderr.write("%s\n" % cmd)
-
-    sys.stderr.write("Field definitions incompatible with '-b' option:\n")
-    print_incompat_helper(bincompat)
-
-    sys.stderr.write("Field definitions incompatible with '-d' option:\n")
-    print_incompat_helper(dincompat)
-
-    sys.stderr.write("Field definitions incompatible with '-t' option:\n")
-    print_incompat_helper(tincompat)
-
-    sys.stderr.write("Field definitions are as follows:\n")
-    for key in sorted(cols.keys()):
-        sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
-    sys.stderr.write("\n")
-
-    sys.exit(0)
-
-
-def usage():
-    sys.stderr.write("%s\n" % cmd)
-    sys.stderr.write("\t -b : Print table of information for each dbuf\n")
-    sys.stderr.write("\t -d : Print table of information for each dnode\n")
-    sys.stderr.write("\t -h : Print this help message\n")
-    sys.stderr.write("\t -n : Exclude header from output\n")
-    sys.stderr.write("\t -r : Print raw values\n")
-    sys.stderr.write("\t -t : Print table of information for each dnode type"
-                     "\n")
-    sys.stderr.write("\t -v : List all possible field headers and definitions"
-                     "\n")
-    sys.stderr.write("\t -x : Print extended stats\n")
-    sys.stderr.write("\t -i : Redirect input from the specified file\n")
-    sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n")
-    sys.stderr.write("\t -o : Redirect output to the specified file\n")
-    sys.stderr.write("\t -s : Override default field separator with custom "
-                     "character or string\n")
-    sys.stderr.write("\t -F : Filter output by value or regex\n")
-    sys.stderr.write("\nExamples:\n")
-    sys.stderr.write("\tdbufstat -d -o /tmp/d.log\n")
-    sys.stderr.write("\tdbufstat -t -s \",\" -o /tmp/t.log\n")
-    sys.stderr.write("\tdbufstat -v\n")
-    sys.stderr.write("\tdbufstat -d -f pool,object,objset,dsize,cached\n")
-    sys.stderr.write("\tdbufstat -bx -F dbc=1,objset=54,pool=testpool\n")
-    sys.stderr.write("\n")
-
-    sys.exit(1)
-
-
-def prettynum(sz, scale, num=0):
-    global raw
-
-    suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
-    index = 0
-    save = 0
-
-    if raw or scale == -1:
-        return "%*s" % (sz, num)
-
-    # Rounding error, return 0
-    elif 0 < num < 1:
-        num = 0
-
-    while num > scale and index < 5:
-        save = num
-        num = num / scale
-        index += 1
-
-    if index == 0:
-        return "%*d" % (sz, num)
-
-    if (save / scale) < 10:
-        return "%*.1f%s" % (sz - 1, num, suffix[index])
-    else:
-        return "%*d%s" % (sz - 1, num, suffix[index])
-
-
-def print_values(v):
-    global hdr
-    global sep
-
-    try:
-        for col in hdr:
-            sys.stdout.write("%s%s" % (
-                prettynum(cols[col][0], cols[col][1], v[col]), sep))
-        sys.stdout.write("\n")
-    except IOError as e:
-        if e.errno == errno.EPIPE:
-            sys.exit(1)
-
-
-def print_header():
-    global hdr
-    global sep
-
-    try:
-        for col in hdr:
-            sys.stdout.write("%*s%s" % (cols[col][0], col, sep))
-        sys.stdout.write("\n")
-    except IOError as e:
-        if e.errno == errno.EPIPE:
-            sys.exit(1)
-
-
-def get_typestring(t):
-    ot_strings = [
-                    "DMU_OT_NONE",
-                    # general:
-                    "DMU_OT_OBJECT_DIRECTORY",
-                    "DMU_OT_OBJECT_ARRAY",
-                    "DMU_OT_PACKED_NVLIST",
-                    "DMU_OT_PACKED_NVLIST_SIZE",
-                    "DMU_OT_BPOBJ",
-                    "DMU_OT_BPOBJ_HDR",
-                    # spa:
-                    "DMU_OT_SPACE_MAP_HEADER",
-                    "DMU_OT_SPACE_MAP",
-                    # zil:
-                    "DMU_OT_INTENT_LOG",
-                    # dmu:
-                    "DMU_OT_DNODE",
-                    "DMU_OT_OBJSET",
-                    # dsl:
-                    "DMU_OT_DSL_DIR",
-                    "DMU_OT_DSL_DIR_CHILD_MAP",
-                    "DMU_OT_DSL_DS_SNAP_MAP",
-                    "DMU_OT_DSL_PROPS",
-                    "DMU_OT_DSL_DATASET",
-                    # zpl:
-                    "DMU_OT_ZNODE",
-                    "DMU_OT_OLDACL",
-                    "DMU_OT_PLAIN_FILE_CONTENTS",
-                    "DMU_OT_DIRECTORY_CONTENTS",
-                    "DMU_OT_MASTER_NODE",
-                    "DMU_OT_UNLINKED_SET",
-                    # zvol:
-                    "DMU_OT_ZVOL",
-                    "DMU_OT_ZVOL_PROP",
-                    # other; for testing only!
-                    "DMU_OT_PLAIN_OTHER",
-                    "DMU_OT_UINT64_OTHER",
-                    "DMU_OT_ZAP_OTHER",
-                    # new object types:
-                    "DMU_OT_ERROR_LOG",
-                    "DMU_OT_SPA_HISTORY",
-                    "DMU_OT_SPA_HISTORY_OFFSETS",
-                    "DMU_OT_POOL_PROPS",
-                    "DMU_OT_DSL_PERMS",
-                    "DMU_OT_ACL",
-                    "DMU_OT_SYSACL",
-                    "DMU_OT_FUID",
-                    "DMU_OT_FUID_SIZE",
-                    "DMU_OT_NEXT_CLONES",
-                    "DMU_OT_SCAN_QUEUE",
-                    "DMU_OT_USERGROUP_USED",
-                    "DMU_OT_USERGROUP_QUOTA",
-                    "DMU_OT_USERREFS",
-                    "DMU_OT_DDT_ZAP",
-                    "DMU_OT_DDT_STATS",
-                    "DMU_OT_SA",
-                    "DMU_OT_SA_MASTER_NODE",
-                    "DMU_OT_SA_ATTR_REGISTRATION",
-                    "DMU_OT_SA_ATTR_LAYOUTS",
-                    "DMU_OT_SCAN_XLATE",
-                    "DMU_OT_DEDUP",
-                    "DMU_OT_DEADLIST",
-                    "DMU_OT_DEADLIST_HDR",
-                    "DMU_OT_DSL_CLONES",
-                    "DMU_OT_BPOBJ_SUBOBJ"]
-    otn_strings = {
-                    0x80: "DMU_OTN_UINT8_DATA",
-                    0xc0: "DMU_OTN_UINT8_METADATA",
-                    0x81: "DMU_OTN_UINT16_DATA",
-                    0xc1: "DMU_OTN_UINT16_METADATA",
-                    0x82: "DMU_OTN_UINT32_DATA",
-                    0xc2: "DMU_OTN_UINT32_METADATA",
-                    0x83: "DMU_OTN_UINT64_DATA",
-                    0xc3: "DMU_OTN_UINT64_METADATA",
-                    0x84: "DMU_OTN_ZAP_DATA",
-                    0xc4: "DMU_OTN_ZAP_METADATA",
-                    0xa0: "DMU_OTN_UINT8_ENC_DATA",
-                    0xe0: "DMU_OTN_UINT8_ENC_METADATA",
-                    0xa1: "DMU_OTN_UINT16_ENC_DATA",
-                    0xe1: "DMU_OTN_UINT16_ENC_METADATA",
-                    0xa2: "DMU_OTN_UINT32_ENC_DATA",
-                    0xe2: "DMU_OTN_UINT32_ENC_METADATA",
-                    0xa3: "DMU_OTN_UINT64_ENC_DATA",
-                    0xe3: "DMU_OTN_UINT64_ENC_METADATA",
-                    0xa4: "DMU_OTN_ZAP_ENC_DATA",
-                    0xe4: "DMU_OTN_ZAP_ENC_METADATA"}
-
-    # If "-rr" option is used, don't convert to string representation
-    if raw > 1:
-        return "%i" % t
-
-    try:
-        if t < len(ot_strings):
-            return ot_strings[t]
-        else:
-            return otn_strings[t]
-    except (IndexError, KeyError):
-        return "(UNKNOWN)"
-
-
-def get_compstring(c):
-    comp_strings = ["ZIO_COMPRESS_INHERIT", "ZIO_COMPRESS_ON",
-                    "ZIO_COMPRESS_OFF",     "ZIO_COMPRESS_LZJB",
-                    "ZIO_COMPRESS_EMPTY",   "ZIO_COMPRESS_GZIP_1",
-                    "ZIO_COMPRESS_GZIP_2",  "ZIO_COMPRESS_GZIP_3",
-                    "ZIO_COMPRESS_GZIP_4",  "ZIO_COMPRESS_GZIP_5",
-                    "ZIO_COMPRESS_GZIP_6",  "ZIO_COMPRESS_GZIP_7",
-                    "ZIO_COMPRESS_GZIP_8",  "ZIO_COMPRESS_GZIP_9",
-                    "ZIO_COMPRESS_ZLE",     "ZIO_COMPRESS_LZ4",
-                    "ZIO_COMPRESS_FUNCTION"]
-
-    # If "-rr" option is used, don't convert to string representation
-    if raw > 1:
-        return "%i" % c
-
-    try:
-        return comp_strings[c]
-    except IndexError:
-        return "%i" % c
-
-
-def parse_line(line, labels):
-    global hdr
-
-    new = dict()
-    val = None
-    for col in hdr:
-        # These are "special" fields computed in the update_dict
-        # function, prevent KeyError exception on labels[col] for these.
-        if col not in ['bonus', 'cached', 'direct', 'indirect', 'spill']:
-            val = line[labels[col]]
-
-        if col in ['pool', 'flags']:
-            new[col] = str(val)
-        elif col in ['dtype', 'btype']:
-            new[col] = get_typestring(int(val))
-        elif col in ['l2_comp']:
-            new[col] = get_compstring(int(val))
-        else:
-            new[col] = int(val)
-
-    return new
-
-
-def update_dict(d, k, line, labels):
-    pool = line[labels['pool']]
-    objset = line[labels['objset']]
-    key = line[labels[k]]
-
-    dbsize = int(line[labels['dbsize']])
-    blkid = int(line[labels['blkid']])
-    level = int(line[labels['level']])
-
-    if pool not in d:
-        d[pool] = dict()
-
-    if objset not in d[pool]:
-        d[pool][objset] = dict()
-
-    if key not in d[pool][objset]:
-        d[pool][objset][key] = parse_line(line, labels)
-        d[pool][objset][key]['bonus'] = 0
-        d[pool][objset][key]['cached'] = 0
-        d[pool][objset][key]['direct'] = 0
-        d[pool][objset][key]['indirect'] = 0
-        d[pool][objset][key]['spill'] = 0
-
-    d[pool][objset][key]['cached'] += dbsize
-
-    if blkid == -1:
-        d[pool][objset][key]['bonus'] += dbsize
-    elif blkid == -2:
-        d[pool][objset][key]['spill'] += dbsize
-    else:
-        if level == 0:
-            d[pool][objset][key]['direct'] += dbsize
-        else:
-            d[pool][objset][key]['indirect'] += dbsize
-
-    return d
-
-
-def skip_line(vals, filters):
-    '''
-    Determines if a line should be skipped during printing
-    based on a set of filters
-    '''
-    if len(filters) == 0:
-        return False
-
-    for key in vals:
-        if key in filters:
-            val = prettynum(cols[key][0], cols[key][1], vals[key]).strip()
-            # we want a full match here
-            if re.match("(?:" + filters[key] + r")\Z", val) is None:
-                return True
-
-    return False
-
-
-def print_dict(d, filters, noheader):
-    if not noheader:
-        print_header()
-    for pool in list(d.keys()):
-        for objset in list(d[pool].keys()):
-            for v in list(d[pool][objset].values()):
-                if not skip_line(v, filters):
-                    print_values(v)
-
-
-def dnodes_build_dict(filehandle):
-    labels = dict()
-    dnodes = dict()
-
-    # First 3 lines are header information, skip the first two
-    for i in range(2):
-        next(filehandle)
-
-    # The third line contains the labels and index locations
-    for i, v in enumerate(next(filehandle).split()):
-        labels[v] = i
-
-    # The rest of the file is buffer information
-    for line in filehandle:
-        update_dict(dnodes, 'object', line.split(), labels)
-
-    return dnodes
-
-
-def types_build_dict(filehandle):
-    labels = dict()
-    types = dict()
-
-    # First 3 lines are header information, skip the first two
-    for i in range(2):
-        next(filehandle)
-
-    # The third line contains the labels and index locations
-    for i, v in enumerate(next(filehandle).split()):
-        labels[v] = i
-
-    # The rest of the file is buffer information
-    for line in filehandle:
-        update_dict(types, 'dtype', line.split(), labels)
-
-    return types
-
-
-def buffers_print_all(filehandle, filters, noheader):
-    labels = dict()
-
-    # First 3 lines are header information, skip the first two
-    for i in range(2):
-        next(filehandle)
-
-    # The third line contains the labels and index locations
-    for i, v in enumerate(next(filehandle).split()):
-        labels[v] = i
-
-    if not noheader:
-        print_header()
-
-    # The rest of the file is buffer information
-    for line in filehandle:
-        vals = parse_line(line.split(), labels)
-        if not skip_line(vals, filters):
-            print_values(vals)
-
-
-def main():
-    global hdr
-    global sep
-    global raw
-
-    desired_cols = None
-    bflag = False
-    dflag = False
-    hflag = False
-    ifile = None
-    ofile = None
-    tflag = False
-    vflag = False
-    xflag = False
-    nflag = False
-    filters = dict()
-
-    try:
-        opts, args = getopt.getopt(
-            sys.argv[1:],
-            "bdf:hi:o:rs:tvxF:n",
-            [
-                "buffers",
-                "dnodes",
-                "columns",
-                "help",
-                "infile",
-                "outfile",
-                "separator",
-                "types",
-                "verbose",
-                "extended",
-                "filter"
-            ]
-        )
-    except getopt.error:
-        usage()
-        opts = None
-
-    for opt, arg in opts:
-        if opt in ('-b', '--buffers'):
-            bflag = True
-        if opt in ('-d', '--dnodes'):
-            dflag = True
-        if opt in ('-f', '--columns'):
-            desired_cols = arg
-        if opt in ('-h', '--help'):
-            hflag = True
-        if opt in ('-i', '--infile'):
-            ifile = arg
-        if opt in ('-o', '--outfile'):
-            ofile = arg
-        if opt in ('-r', '--raw'):
-            raw += 1
-        if opt in ('-s', '--separator'):
-            sep = arg
-        if opt in ('-t', '--types'):
-            tflag = True
-        if opt in ('-v', '--verbose'):
-            vflag = True
-        if opt in ('-x', '--extended'):
-            xflag = True
-        if opt in ('-n', '--noheader'):
-            nflag = True
-        if opt in ('-F', '--filter'):
-            fils = [x.strip() for x in arg.split(",")]
-
-            for fil in fils:
-                f = [x.strip() for x in fil.split("=")]
-
-                if len(f) != 2:
-                    sys.stderr.write("Invalid filter '%s'.\n" % fil)
-                    sys.exit(1)
-
-                if f[0] not in cols:
-                    sys.stderr.write("Invalid field '%s' in filter.\n" % f[0])
-                    sys.exit(1)
-
-                if f[0] in filters:
-                    sys.stderr.write("Field '%s' specified multiple times in "
-                                     "filter.\n" % f[0])
-                    sys.exit(1)
-
-                try:
-                    re.compile("(?:" + f[1] + r")\Z")
-                except re.error:
-                    sys.stderr.write("Invalid regex for field '%s' in "
-                                     "filter.\n" % f[0])
-                    sys.exit(1)
-
-                filters[f[0]] = f[1]
-
-    if hflag or (xflag and desired_cols):
-        usage()
-
-    if vflag:
-        detailed_usage()
-
-    # Ensure at most only one of b, d, or t flags are set
-    if (bflag and dflag) or (bflag and tflag) or (dflag and tflag):
-        usage()
-
-    if bflag:
-        hdr = bxhdr if xflag else bhdr
-    elif tflag:
-        hdr = txhdr if xflag else thdr
-    else:  # Even if dflag is False, it's the default if none set
-        dflag = True
-        hdr = dxhdr if xflag else dhdr
-
-    if desired_cols:
-        hdr = desired_cols.split(",")
-
-        invalid = []
-        incompat = []
-        for ele in hdr:
-            if ele not in cols:
-                invalid.append(ele)
-            elif ((bflag and bincompat and ele in bincompat) or
-                  (dflag and dincompat and ele in dincompat) or
-                  (tflag and tincompat and ele in tincompat)):
-                    incompat.append(ele)
-
-        if len(invalid) > 0:
-            sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
-            usage()
-
-        if len(incompat) > 0:
-            sys.stderr.write("Incompatible field specified! -- %s\n" %
-                             incompat)
-            usage()
-
-    if ofile:
-        try:
-            tmp = open(ofile, "w")
-            sys.stdout = tmp
-
-        except IOError:
-            sys.stderr.write("Cannot open %s for writing\n" % ofile)
-            sys.exit(1)
-
-    if not ifile:
-        ifile = '/proc/spl/kstat/zfs/dbufs'
-
-    if ifile is not "-":
-        try:
-            tmp = open(ifile, "r")
-            sys.stdin = tmp
-        except IOError:
-            sys.stderr.write("Cannot open %s for reading\n" % ifile)
-            sys.exit(1)
-
-    if bflag:
-        buffers_print_all(sys.stdin, filters, nflag)
-
-    if dflag:
-        print_dict(dnodes_build_dict(sys.stdin), filters, nflag)
-
-    if tflag:
-        print_dict(types_build_dict(sys.stdin), filters, nflag)
-
-
-if __name__ == '__main__':
-    main()

diff --git a/zfs/cmd/dbufstat/dbufstat.in b/zfs/cmd/dbufstat/dbufstat.in
new file mode 100755
index 0000000..b716a0c
--- /dev/null
+++ b/zfs/cmd/dbufstat/dbufstat.in

@@ -0,0 +1,684 @@
+#!/usr/bin/env @PYTHON_SHEBANG@
+#
+# Print out statistics for all cached dmu buffers.  This information
+# is available through the dbufs kstat and may be post-processed as
+# needed by the script.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (C) 2013 Lawrence Livermore National Security, LLC.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+#
+# This script must remain compatible with and Python 3.6+.
+#
+
+import sys
+import getopt
+import errno
+import re
+
+bhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize"]
+bxhdr = ["pool", "objset", "object", "level", "blkid", "offset", "dbsize",
+         "meta", "state", "dbholds", "dbc", "list", "atype", "flags",
+         "count", "asize", "access", "mru", "gmru", "mfu", "gmfu", "l2",
+         "l2_dattr", "l2_asize", "l2_comp", "aholds", "dtype", "btype",
+         "data_bs", "meta_bs", "bsize", "lvls", "dholds", "blocks", "dsize"]
+bincompat = ["cached", "direct", "indirect", "bonus", "spill"]
+
+dhdr = ["pool", "objset", "object", "dtype", "cached"]
+dxhdr = ["pool", "objset", "object", "dtype", "btype", "data_bs", "meta_bs",
+         "bsize", "lvls", "dholds", "blocks", "dsize", "cached", "direct",
+         "indirect", "bonus", "spill"]
+dincompat = ["level", "blkid", "offset", "dbsize", "meta", "state", "dbholds",
+             "dbc", "list", "atype", "flags", "count", "asize", "access",
+             "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
+             "l2_comp", "aholds"]
+
+thdr = ["pool", "objset", "dtype", "cached"]
+txhdr = ["pool", "objset", "dtype", "cached", "direct", "indirect",
+         "bonus", "spill"]
+tincompat = ["object", "level", "blkid", "offset", "dbsize", "meta", "state",
+             "dbc", "dbholds", "list", "atype", "flags", "count", "asize",
+             "access", "mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr",
+             "l2_asize", "l2_comp", "aholds", "btype", "data_bs", "meta_bs",
+             "bsize", "lvls", "dholds", "blocks", "dsize"]
+
+cols = {
+    # hdr:        [size, scale, description]
+    "pool":       [15,   -1, "pool name"],
+    "objset":     [6,    -1, "dataset identification number"],
+    "object":     [10,   -1, "object number"],
+    "level":      [5,    -1, "indirection level of buffer"],
+    "blkid":      [8,    -1, "block number of buffer"],
+    "offset":     [12, 1024, "offset in object of buffer"],
+    "dbsize":     [7,  1024, "size of buffer"],
+    "meta":       [4,    -1, "is this buffer metadata?"],
+    "state":      [5,    -1, "state of buffer (read, cached, etc)"],
+    "dbholds":    [7,  1000, "number of holds on buffer"],
+    "dbc":        [3,    -1, "in dbuf cache"],
+    "list":       [4,    -1, "which ARC list contains this buffer"],
+    "atype":      [7,    -1, "ARC header type (data or metadata)"],
+    "flags":      [9,    -1, "ARC read flags"],
+    "count":      [5,    -1, "ARC data count"],
+    "asize":      [7,  1024, "size of this ARC buffer"],
+    "access":     [10,   -1, "time this ARC buffer was last accessed"],
+    "mru":        [5,  1000, "hits while on the ARC's MRU list"],
+    "gmru":       [5,  1000, "hits while on the ARC's MRU ghost list"],
+    "mfu":        [5,  1000, "hits while on the ARC's MFU list"],
+    "gmfu":       [5,  1000, "hits while on the ARC's MFU ghost list"],
+    "l2":         [5,  1000, "hits while on the L2ARC"],
+    "l2_dattr":   [8,    -1, "L2ARC disk address/offset"],
+    "l2_asize":   [8,  1024, "L2ARC alloc'd size (depending on compression)"],
+    "l2_comp":    [21,   -1, "L2ARC compression algorithm for buffer"],
+    "aholds":     [6,  1000, "number of holds on this ARC buffer"],
+    "dtype":      [27,   -1, "dnode type"],
+    "btype":      [27,   -1, "bonus buffer type"],
+    "data_bs":    [7,  1024, "data block size"],
+    "meta_bs":    [7,  1024, "metadata block size"],
+    "bsize":      [6,  1024, "bonus buffer size"],
+    "lvls":       [6,    -1, "number of indirection levels"],
+    "dholds":     [6,  1000, "number of holds on dnode"],
+    "blocks":     [8,  1000, "number of allocated blocks"],
+    "dsize":      [12, 1024, "size of dnode"],
+    "cached":     [6,  1024, "bytes cached for all blocks"],
+    "direct":     [6,  1024, "bytes cached for direct blocks"],
+    "indirect":   [8,  1024, "bytes cached for indirect blocks"],
+    "bonus":      [5,  1024, "bytes cached for bonus buffer"],
+    "spill":      [5,  1024, "bytes cached for spill block"],
+}
+
+hdr = None
+xhdr = None
+sep = "  "  # Default separator is 2 spaces
+cmd = ("Usage: dbufstat [-bdhnrtvx] [-i file] [-f fields] [-o file] "
+       "[-s string] [-F filter]\n")
+raw = 0
+
+
+if sys.platform.startswith("freebsd"):
+    import io
+    # Requires py-sysctl on FreeBSD
+    import sysctl
+
+    def default_ifile():
+        dbufs = sysctl.filter("kstat.zfs.misc.dbufs")[0].value
+        sys.stdin = io.StringIO(dbufs)
+        return "-"
+
+elif sys.platform.startswith("linux"):
+    def default_ifile():
+        return "/proc/spl/kstat/zfs/dbufs"
+
+
+def print_incompat_helper(incompat):
+    cnt = 0
+    for key in sorted(incompat):
+        if cnt == 0:
+            sys.stderr.write("\t")
+        elif cnt > 8:
+            sys.stderr.write(",\n\t")
+            cnt = 0
+        else:
+            sys.stderr.write(", ")
+
+        sys.stderr.write("%s" % key)
+        cnt += 1
+
+    sys.stderr.write("\n\n")
+
+
+def detailed_usage():
+    sys.stderr.write("%s\n" % cmd)
+
+    sys.stderr.write("Field definitions incompatible with '-b' option:\n")
+    print_incompat_helper(bincompat)
+
+    sys.stderr.write("Field definitions incompatible with '-d' option:\n")
+    print_incompat_helper(dincompat)
+
+    sys.stderr.write("Field definitions incompatible with '-t' option:\n")
+    print_incompat_helper(tincompat)
+
+    sys.stderr.write("Field definitions are as follows:\n")
+    for key in sorted(cols.keys()):
+        sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
+    sys.stderr.write("\n")
+
+    sys.exit(0)
+
+
+def usage():
+    sys.stderr.write("%s\n" % cmd)
+    sys.stderr.write("\t -b : Print table of information for each dbuf\n")
+    sys.stderr.write("\t -d : Print table of information for each dnode\n")
+    sys.stderr.write("\t -h : Print this help message\n")
+    sys.stderr.write("\t -n : Exclude header from output\n")
+    sys.stderr.write("\t -r : Print raw values\n")
+    sys.stderr.write("\t -t : Print table of information for each dnode type"
+                     "\n")
+    sys.stderr.write("\t -v : List all possible field headers and definitions"
+                     "\n")
+    sys.stderr.write("\t -x : Print extended stats\n")
+    sys.stderr.write("\t -i : Redirect input from the specified file\n")
+    sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n")
+    sys.stderr.write("\t -o : Redirect output to the specified file\n")
+    sys.stderr.write("\t -s : Override default field separator with custom "
+                     "character or string\n")
+    sys.stderr.write("\t -F : Filter output by value or regex\n")
+    sys.stderr.write("\nExamples:\n")
+    sys.stderr.write("\tdbufstat -d -o /tmp/d.log\n")
+    sys.stderr.write("\tdbufstat -t -s \",\" -o /tmp/t.log\n")
+    sys.stderr.write("\tdbufstat -v\n")
+    sys.stderr.write("\tdbufstat -d -f pool,object,objset,dsize,cached\n")
+    sys.stderr.write("\tdbufstat -bx -F dbc=1,objset=54,pool=testpool\n")
+    sys.stderr.write("\n")
+
+    sys.exit(1)
+
+
+def prettynum(sz, scale, num=0):
+    global raw
+
+    suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
+    index = 0
+    save = 0
+
+    if raw or scale == -1:
+        return "%*s" % (sz, num)
+
+    # Rounding error, return 0
+    elif 0 < num < 1:
+        num = 0
+
+    while num > scale and index < 5:
+        save = num
+        num = num / scale
+        index += 1
+
+    if index == 0:
+        return "%*d" % (sz, num)
+
+    if (save / scale) < 10:
+        return "%*.1f%s" % (sz - 1, num, suffix[index])
+    else:
+        return "%*d%s" % (sz - 1, num, suffix[index])
+
+
+def print_values(v):
+    global hdr
+    global sep
+
+    try:
+        for col in hdr:
+            sys.stdout.write("%s%s" % (
+                prettynum(cols[col][0], cols[col][1], v[col]), sep))
+        sys.stdout.write("\n")
+    except IOError as e:
+        if e.errno == errno.EPIPE:
+            sys.exit(1)
+
+
+def print_header():
+    global hdr
+    global sep
+
+    try:
+        for col in hdr:
+            sys.stdout.write("%*s%s" % (cols[col][0], col, sep))
+        sys.stdout.write("\n")
+    except IOError as e:
+        if e.errno == errno.EPIPE:
+            sys.exit(1)
+
+
+def get_typestring(t):
+    ot_strings = [
+                    "DMU_OT_NONE",
+                    # general:
+                    "DMU_OT_OBJECT_DIRECTORY",
+                    "DMU_OT_OBJECT_ARRAY",
+                    "DMU_OT_PACKED_NVLIST",
+                    "DMU_OT_PACKED_NVLIST_SIZE",
+                    "DMU_OT_BPOBJ",
+                    "DMU_OT_BPOBJ_HDR",
+                    # spa:
+                    "DMU_OT_SPACE_MAP_HEADER",
+                    "DMU_OT_SPACE_MAP",
+                    # zil:
+                    "DMU_OT_INTENT_LOG",
+                    # dmu:
+                    "DMU_OT_DNODE",
+                    "DMU_OT_OBJSET",
+                    # dsl:
+                    "DMU_OT_DSL_DIR",
+                    "DMU_OT_DSL_DIR_CHILD_MAP",
+                    "DMU_OT_DSL_DS_SNAP_MAP",
+                    "DMU_OT_DSL_PROPS",
+                    "DMU_OT_DSL_DATASET",
+                    # zpl:
+                    "DMU_OT_ZNODE",
+                    "DMU_OT_OLDACL",
+                    "DMU_OT_PLAIN_FILE_CONTENTS",
+                    "DMU_OT_DIRECTORY_CONTENTS",
+                    "DMU_OT_MASTER_NODE",
+                    "DMU_OT_UNLINKED_SET",
+                    # zvol:
+                    "DMU_OT_ZVOL",
+                    "DMU_OT_ZVOL_PROP",
+                    # other; for testing only!
+                    "DMU_OT_PLAIN_OTHER",
+                    "DMU_OT_UINT64_OTHER",
+                    "DMU_OT_ZAP_OTHER",
+                    # new object types:
+                    "DMU_OT_ERROR_LOG",
+                    "DMU_OT_SPA_HISTORY",
+                    "DMU_OT_SPA_HISTORY_OFFSETS",
+                    "DMU_OT_POOL_PROPS",
+                    "DMU_OT_DSL_PERMS",
+                    "DMU_OT_ACL",
+                    "DMU_OT_SYSACL",
+                    "DMU_OT_FUID",
+                    "DMU_OT_FUID_SIZE",
+                    "DMU_OT_NEXT_CLONES",
+                    "DMU_OT_SCAN_QUEUE",
+                    "DMU_OT_USERGROUP_USED",
+                    "DMU_OT_USERGROUP_QUOTA",
+                    "DMU_OT_USERREFS",
+                    "DMU_OT_DDT_ZAP",
+                    "DMU_OT_DDT_STATS",
+                    "DMU_OT_SA",
+                    "DMU_OT_SA_MASTER_NODE",
+                    "DMU_OT_SA_ATTR_REGISTRATION",
+                    "DMU_OT_SA_ATTR_LAYOUTS",
+                    "DMU_OT_SCAN_XLATE",
+                    "DMU_OT_DEDUP",
+                    "DMU_OT_DEADLIST",
+                    "DMU_OT_DEADLIST_HDR",
+                    "DMU_OT_DSL_CLONES",
+                    "DMU_OT_BPOBJ_SUBOBJ"]
+    otn_strings = {
+                    0x80: "DMU_OTN_UINT8_DATA",
+                    0xc0: "DMU_OTN_UINT8_METADATA",
+                    0x81: "DMU_OTN_UINT16_DATA",
+                    0xc1: "DMU_OTN_UINT16_METADATA",
+                    0x82: "DMU_OTN_UINT32_DATA",
+                    0xc2: "DMU_OTN_UINT32_METADATA",
+                    0x83: "DMU_OTN_UINT64_DATA",
+                    0xc3: "DMU_OTN_UINT64_METADATA",
+                    0x84: "DMU_OTN_ZAP_DATA",
+                    0xc4: "DMU_OTN_ZAP_METADATA",
+                    0xa0: "DMU_OTN_UINT8_ENC_DATA",
+                    0xe0: "DMU_OTN_UINT8_ENC_METADATA",
+                    0xa1: "DMU_OTN_UINT16_ENC_DATA",
+                    0xe1: "DMU_OTN_UINT16_ENC_METADATA",
+                    0xa2: "DMU_OTN_UINT32_ENC_DATA",
+                    0xe2: "DMU_OTN_UINT32_ENC_METADATA",
+                    0xa3: "DMU_OTN_UINT64_ENC_DATA",
+                    0xe3: "DMU_OTN_UINT64_ENC_METADATA",
+                    0xa4: "DMU_OTN_ZAP_ENC_DATA",
+                    0xe4: "DMU_OTN_ZAP_ENC_METADATA"}
+
+    # If "-rr" option is used, don't convert to string representation
+    if raw > 1:
+        return "%i" % t
+
+    try:
+        if t < len(ot_strings):
+            return ot_strings[t]
+        else:
+            return otn_strings[t]
+    except (IndexError, KeyError):
+        return "(UNKNOWN)"
+
+
+def get_compstring(c):
+    comp_strings = ["ZIO_COMPRESS_INHERIT", "ZIO_COMPRESS_ON",
+                    "ZIO_COMPRESS_OFF",     "ZIO_COMPRESS_LZJB",
+                    "ZIO_COMPRESS_EMPTY",   "ZIO_COMPRESS_GZIP_1",
+                    "ZIO_COMPRESS_GZIP_2",  "ZIO_COMPRESS_GZIP_3",
+                    "ZIO_COMPRESS_GZIP_4",  "ZIO_COMPRESS_GZIP_5",
+                    "ZIO_COMPRESS_GZIP_6",  "ZIO_COMPRESS_GZIP_7",
+                    "ZIO_COMPRESS_GZIP_8",  "ZIO_COMPRESS_GZIP_9",
+                    "ZIO_COMPRESS_ZLE",     "ZIO_COMPRESS_LZ4",
+                    "ZIO_COMPRESS_ZSTD",    "ZIO_COMPRESS_FUNCTION"]
+
+    # If "-rr" option is used, don't convert to string representation
+    if raw > 1:
+        return "%i" % c
+
+    try:
+        return comp_strings[c]
+    except IndexError:
+        return "%i" % c
+
+
+def parse_line(line, labels):
+    global hdr
+
+    new = dict()
+    val = None
+    for col in hdr:
+        # These are "special" fields computed in the update_dict
+        # function, prevent KeyError exception on labels[col] for these.
+        if col not in ['bonus', 'cached', 'direct', 'indirect', 'spill']:
+            val = line[labels[col]]
+
+        if col in ['pool', 'flags']:
+            new[col] = str(val)
+        elif col in ['dtype', 'btype']:
+            new[col] = get_typestring(int(val))
+        elif col in ['l2_comp']:
+            new[col] = get_compstring(int(val))
+        else:
+            new[col] = int(val)
+
+    return new
+
+
+def update_dict(d, k, line, labels):
+    pool = line[labels['pool']]
+    objset = line[labels['objset']]
+    key = line[labels[k]]
+
+    dbsize = int(line[labels['dbsize']])
+    blkid = int(line[labels['blkid']])
+    level = int(line[labels['level']])
+
+    if pool not in d:
+        d[pool] = dict()
+
+    if objset not in d[pool]:
+        d[pool][objset] = dict()
+
+    if key not in d[pool][objset]:
+        d[pool][objset][key] = parse_line(line, labels)
+        d[pool][objset][key]['bonus'] = 0
+        d[pool][objset][key]['cached'] = 0
+        d[pool][objset][key]['direct'] = 0
+        d[pool][objset][key]['indirect'] = 0
+        d[pool][objset][key]['spill'] = 0
+
+    d[pool][objset][key]['cached'] += dbsize
+
+    if blkid == -1:
+        d[pool][objset][key]['bonus'] += dbsize
+    elif blkid == -2:
+        d[pool][objset][key]['spill'] += dbsize
+    else:
+        if level == 0:
+            d[pool][objset][key]['direct'] += dbsize
+        else:
+            d[pool][objset][key]['indirect'] += dbsize
+
+    return d
+
+
+def skip_line(vals, filters):
+    '''
+    Determines if a line should be skipped during printing
+    based on a set of filters
+    '''
+    if len(filters) == 0:
+        return False
+
+    for key in vals:
+        if key in filters:
+            val = prettynum(cols[key][0], cols[key][1], vals[key]).strip()
+            # we want a full match here
+            if re.match("(?:" + filters[key] + r")\Z", val) is None:
+                return True
+
+    return False
+
+
+def print_dict(d, filters, noheader):
+    if not noheader:
+        print_header()
+    for pool in list(d.keys()):
+        for objset in list(d[pool].keys()):
+            for v in list(d[pool][objset].values()):
+                if not skip_line(v, filters):
+                    print_values(v)
+
+
+def dnodes_build_dict(filehandle):
+    labels = dict()
+    dnodes = dict()
+
+    # First 3 lines are header information, skip the first two
+    for i in range(2):
+        next(filehandle)
+
+    # The third line contains the labels and index locations
+    for i, v in enumerate(next(filehandle).split()):
+        labels[v] = i
+
+    # The rest of the file is buffer information
+    for line in filehandle:
+        update_dict(dnodes, 'object', line.split(), labels)
+
+    return dnodes
+
+
+def types_build_dict(filehandle):
+    labels = dict()
+    types = dict()
+
+    # First 3 lines are header information, skip the first two
+    for i in range(2):
+        next(filehandle)
+
+    # The third line contains the labels and index locations
+    for i, v in enumerate(next(filehandle).split()):
+        labels[v] = i
+
+    # The rest of the file is buffer information
+    for line in filehandle:
+        update_dict(types, 'dtype', line.split(), labels)
+
+    return types
+
+
+def buffers_print_all(filehandle, filters, noheader):
+    labels = dict()
+
+    # First 3 lines are header information, skip the first two
+    for i in range(2):
+        next(filehandle)
+
+    # The third line contains the labels and index locations
+    for i, v in enumerate(next(filehandle).split()):
+        labels[v] = i
+
+    if not noheader:
+        print_header()
+
+    # The rest of the file is buffer information
+    for line in filehandle:
+        vals = parse_line(line.split(), labels)
+        if not skip_line(vals, filters):
+            print_values(vals)
+
+
+def main():
+    global hdr
+    global sep
+    global raw
+
+    desired_cols = None
+    bflag = False
+    dflag = False
+    hflag = False
+    ifile = None
+    ofile = None
+    tflag = False
+    vflag = False
+    xflag = False
+    nflag = False
+    filters = dict()
+
+    try:
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "bdf:hi:o:rs:tvxF:n",
+            [
+                "buffers",
+                "dnodes",
+                "columns",
+                "help",
+                "infile",
+                "outfile",
+                "separator",
+                "types",
+                "verbose",
+                "extended",
+                "filter"
+            ]
+        )
+    except getopt.error:
+        usage()
+        opts = None
+
+    for opt, arg in opts:
+        if opt in ('-b', '--buffers'):
+            bflag = True
+        if opt in ('-d', '--dnodes'):
+            dflag = True
+        if opt in ('-f', '--columns'):
+            desired_cols = arg
+        if opt in ('-h', '--help'):
+            hflag = True
+        if opt in ('-i', '--infile'):
+            ifile = arg
+        if opt in ('-o', '--outfile'):
+            ofile = arg
+        if opt in ('-r', '--raw'):
+            raw += 1
+        if opt in ('-s', '--separator'):
+            sep = arg
+        if opt in ('-t', '--types'):
+            tflag = True
+        if opt in ('-v', '--verbose'):
+            vflag = True
+        if opt in ('-x', '--extended'):
+            xflag = True
+        if opt in ('-n', '--noheader'):
+            nflag = True
+        if opt in ('-F', '--filter'):
+            fils = [x.strip() for x in arg.split(",")]
+
+            for fil in fils:
+                f = [x.strip() for x in fil.split("=")]
+
+                if len(f) != 2:
+                    sys.stderr.write("Invalid filter '%s'.\n" % fil)
+                    sys.exit(1)
+
+                if f[0] not in cols:
+                    sys.stderr.write("Invalid field '%s' in filter.\n" % f[0])
+                    sys.exit(1)
+
+                if f[0] in filters:
+                    sys.stderr.write("Field '%s' specified multiple times in "
+                                     "filter.\n" % f[0])
+                    sys.exit(1)
+
+                try:
+                    re.compile("(?:" + f[1] + r")\Z")
+                except re.error:
+                    sys.stderr.write("Invalid regex for field '%s' in "
+                                     "filter.\n" % f[0])
+                    sys.exit(1)
+
+                filters[f[0]] = f[1]
+
+    if hflag or (xflag and desired_cols):
+        usage()
+
+    if vflag:
+        detailed_usage()
+
+    # Ensure at most only one of b, d, or t flags are set
+    if (bflag and dflag) or (bflag and tflag) or (dflag and tflag):
+        usage()
+
+    if bflag:
+        hdr = bxhdr if xflag else bhdr
+    elif tflag:
+        hdr = txhdr if xflag else thdr
+    else:  # Even if dflag is False, it's the default if none set
+        dflag = True
+        hdr = dxhdr if xflag else dhdr
+
+    if desired_cols:
+        hdr = desired_cols.split(",")
+
+        invalid = []
+        incompat = []
+        for ele in hdr:
+            if ele not in cols:
+                invalid.append(ele)
+            elif ((bflag and bincompat and ele in bincompat) or
+                  (dflag and dincompat and ele in dincompat) or
+                  (tflag and tincompat and ele in tincompat)):
+                    incompat.append(ele)
+
+        if len(invalid) > 0:
+            sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
+            usage()
+
+        if len(incompat) > 0:
+            sys.stderr.write("Incompatible field specified! -- %s\n" %
+                             incompat)
+            usage()
+
+    if ofile:
+        try:
+            tmp = open(ofile, "w")
+            sys.stdout = tmp
+
+        except IOError:
+            sys.stderr.write("Cannot open %s for writing\n" % ofile)
+            sys.exit(1)
+
+    if not ifile:
+        ifile = default_ifile()
+
+    if ifile != "-":
+        try:
+            tmp = open(ifile, "r")
+            sys.stdin = tmp
+        except IOError:
+            sys.stderr.write("Cannot open %s for reading\n" % ifile)
+            sys.exit(1)
+
+    if bflag:
+        buffers_print_all(sys.stdin, filters, nflag)
+
+    if dflag:
+        print_dict(dnodes_build_dict(sys.stdin), filters, nflag)
+
+    if tflag:
+        print_dict(types_build_dict(sys.stdin), filters, nflag)
+
+
+if __name__ == '__main__':
+    main()

diff --git a/zfs/cmd/fsck_zfs/.gitignore b/zfs/cmd/fsck_zfs/.gitignore
new file mode 100644
index 0000000..0edf030
--- /dev/null
+++ b/zfs/cmd/fsck_zfs/.gitignore

@@ -0,0 +1 @@
+/fsck.zfs

diff --git a/zfs/cmd/fsck_zfs/Makefile.am b/zfs/cmd/fsck_zfs/Makefile.am
index 2380f56..f8139f1 100644
--- a/zfs/cmd/fsck_zfs/Makefile.am
+++ b/zfs/cmd/fsck_zfs/Makefile.am

@@ -1 +1,6 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
 dist_sbin_SCRIPTS = fsck.zfs
+
+SUBSTFILES += $(dist_sbin_SCRIPTS)

diff --git a/zfs/cmd/fsck_zfs/fsck.zfs b/zfs/cmd/fsck_zfs/fsck.zfs
deleted file mode 100755
index 129a7f3..0000000
--- a/zfs/cmd/fsck_zfs/fsck.zfs
+++ /dev/null

@@ -1,9 +0,0 @@
-#!/bin/sh
-#
-# fsck.zfs: A fsck helper to accommodate distributions that expect
-# to be able to execute a fsck on all filesystem types.  Currently
-# this script does nothing but it could be extended to act as a
-# compatibility wrapper for 'zpool scrub'.
-#
-
-exit 0

diff --git a/zfs/cmd/fsck_zfs/fsck.zfs.in b/zfs/cmd/fsck_zfs/fsck.zfs.in
new file mode 100755
index 0000000..3709690
--- /dev/null
+++ b/zfs/cmd/fsck_zfs/fsck.zfs.in

@@ -0,0 +1,44 @@
+#!/bin/sh
+#
+# fsck.zfs: A fsck helper to accommodate distributions that expect
+# to be able to execute a fsck on all filesystem types.
+#
+# This script simply bubbles up some already-known-about errors,
+# see fsck.zfs(8)
+#
+
+if [ "$#" = "0" ]; then
+	echo "Usage: $0 [options] dataset…" >&2
+	exit 16
+fi
+
+ret=0
+for dataset in "$@"; do
+	case "$dataset" in
+		-*)
+			continue
+			;;
+		*)
+			;;
+	esac
+
+	pool="${dataset%%/*}"
+
+	case "$(@sbindir@/zpool list -Ho health "$pool")" in
+		DEGRADED)
+			ret=$(( ret | 4 ))
+			;;
+		FAULTED)
+			awk '!/^([[:space:]]*#.*)?$/ && $1 == "'"$dataset"'" && $3 == "zfs" {exit 1}' /etc/fstab || \
+				ret=$(( ret | 8 ))
+			;;
+		"")
+			# Pool not found, error printed by zpool(8)
+			ret=$(( ret | 8 ))
+			;;
+		*)
+			;;
+	esac
+done
+
+exit "$ret"

diff --git a/zfs/cmd/mount_zfs/Makefile.am b/zfs/cmd/mount_zfs/Makefile.am
index 7adedd6..3957602 100644
--- a/zfs/cmd/mount_zfs/Makefile.am
+++ b/zfs/cmd/mount_zfs/Makefile.am

@@ -1,9 +1,5 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 #
 # Ignore the prefix for the mount helper.  It must be installed in /sbin/
 # because this path is hardcoded in the mount(8) for security reasons.
@@ -17,5 +13,10 @@
 	mount_zfs.c
 
 mount_zfs_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzfs/libzfs.la
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+mount_zfs_LDADD += $(LTLIBINTL)
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/mount_zfs/mount_zfs.c b/zfs/cmd/mount_zfs/mount_zfs.c
index a37dd6f..55968ac 100644
--- a/zfs/cmd/mount_zfs/mount_zfs.c
+++ b/zfs/cmd/mount_zfs/mount_zfs.c

@@ -42,247 +42,46 @@
 
 libzfs_handle_t *g_zfs;
 
-typedef struct option_map {
-	const char *name;
-	unsigned long mntmask;
-	unsigned long zfsmask;
-} option_map_t;
-
-static const option_map_t option_map[] = {
-	/* Canonicalized filesystem independent options from mount(8) */
-	{ MNTOPT_NOAUTO,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_DEFAULTS,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_NODEVICES,	MS_NODEV,	ZS_COMMENT	},
-	{ MNTOPT_DIRSYNC,	MS_DIRSYNC,	ZS_COMMENT	},
-	{ MNTOPT_NOEXEC,	MS_NOEXEC,	ZS_COMMENT	},
-	{ MNTOPT_GROUP,		MS_GROUP,	ZS_COMMENT	},
-	{ MNTOPT_NETDEV,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_NOFAIL,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_NOSUID,	MS_NOSUID,	ZS_COMMENT	},
-	{ MNTOPT_OWNER,		MS_OWNER,	ZS_COMMENT	},
-	{ MNTOPT_REMOUNT,	MS_REMOUNT,	ZS_COMMENT	},
-	{ MNTOPT_RO,		MS_RDONLY,	ZS_COMMENT	},
-	{ MNTOPT_RW,		MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_SYNC,		MS_SYNCHRONOUS,	ZS_COMMENT	},
-	{ MNTOPT_USER,		MS_USERS,	ZS_COMMENT	},
-	{ MNTOPT_USERS,		MS_USERS,	ZS_COMMENT	},
-	/* acl flags passed with util-linux-2.24 mount command */
-	{ MNTOPT_ACL,		MS_POSIXACL,	ZS_COMMENT	},
-	{ MNTOPT_NOACL,		MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_POSIXACL,	MS_POSIXACL,	ZS_COMMENT	},
-#ifdef MS_NOATIME
-	{ MNTOPT_NOATIME,	MS_NOATIME,	ZS_COMMENT	},
-#endif
-#ifdef MS_NODIRATIME
-	{ MNTOPT_NODIRATIME,	MS_NODIRATIME,	ZS_COMMENT	},
-#endif
-#ifdef MS_RELATIME
-	{ MNTOPT_RELATIME,	MS_RELATIME,	ZS_COMMENT	},
-#endif
-#ifdef MS_STRICTATIME
-	{ MNTOPT_STRICTATIME,	MS_STRICTATIME,	ZS_COMMENT	},
-#endif
-#ifdef MS_LAZYTIME
-	{ MNTOPT_LAZYTIME,	MS_LAZYTIME,	ZS_COMMENT	},
-#endif
-	{ MNTOPT_CONTEXT,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_FSCONTEXT,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_DEFCONTEXT,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_ROOTCONTEXT,	MS_COMMENT,	ZS_COMMENT	},
-#ifdef MS_I_VERSION
-	{ MNTOPT_IVERSION,	MS_I_VERSION,	ZS_COMMENT	},
-#endif
-#ifdef MS_MANDLOCK
-	{ MNTOPT_NBMAND,	MS_MANDLOCK,	ZS_COMMENT	},
-#endif
-	/* Valid options not found in mount(8) */
-	{ MNTOPT_BIND,		MS_BIND,	ZS_COMMENT	},
-#ifdef MS_REC
-	{ MNTOPT_RBIND,		MS_BIND|MS_REC,	ZS_COMMENT	},
-#endif
-	{ MNTOPT_COMMENT,	MS_COMMENT,	ZS_COMMENT	},
-#ifdef MS_NOSUB
-	{ MNTOPT_NOSUB,		MS_NOSUB,	ZS_COMMENT	},
-#endif
-#ifdef MS_SILENT
-	{ MNTOPT_QUIET,		MS_SILENT,	ZS_COMMENT	},
-#endif
-	/* Custom zfs options */
-	{ MNTOPT_XATTR,		MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_NOXATTR,	MS_COMMENT,	ZS_COMMENT	},
-	{ MNTOPT_ZFSUTIL,	MS_COMMENT,	ZS_ZFSUTIL	},
-	{ NULL,			0,		0		} };
-
 /*
- * Break the mount option in to a name/value pair.  The name is
- * validated against the option map and mount flags set accordingly.
+ * Opportunistically convert a target string into a pool name. If the
+ * string does not represent a block device with a valid zfs label
+ * then it is passed through without modification.
  */
-static int
-parse_option(char *mntopt, unsigned long *mntflags,
-    unsigned long *zfsflags, int sloppy)
+static void
+parse_dataset(const char *target, char **dataset)
 {
-	const option_map_t *opt;
-	char *ptr, *name, *value = NULL;
-	int error = 0;
-
-	name = strdup(mntopt);
-	if (name == NULL)
-		return (ENOMEM);
-
-	for (ptr = name; ptr && *ptr; ptr++) {
-		if (*ptr == '=') {
-			*ptr = '\0';
-			value = ptr+1;
-			VERIFY3P(value, !=, NULL);
-			break;
-		}
-	}
-
-	for (opt = option_map; opt->name != NULL; opt++) {
-		if (strncmp(name, opt->name, strlen(name)) == 0) {
-			*mntflags |= opt->mntmask;
-			*zfsflags |= opt->zfsmask;
-			error = 0;
-			goto out;
-		}
-	}
-
-	if (!sloppy)
-		error = ENOENT;
-out:
-	/* If required further process on the value may be done here */
-	free(name);
-	return (error);
-}
-
-/*
- * Translate the mount option string in to MS_* mount flags for the
- * kernel vfs.  When sloppy is non-zero unknown options will be ignored
- * otherwise they are considered fatal are copied in to badopt.
- */
-static int
-parse_options(char *mntopts, unsigned long *mntflags, unsigned long *zfsflags,
-    int sloppy, char *badopt, char *mtabopt)
-{
-	int error = 0, quote = 0, flag = 0, count = 0;
-	char *ptr, *opt, *opts;
-
-	opts = strdup(mntopts);
-	if (opts == NULL)
-		return (ENOMEM);
-
-	*mntflags = 0;
-	opt = NULL;
-
 	/*
-	 * Scan through all mount options which must be comma delimited.
-	 * We must be careful to notice regions which are double quoted
-	 * and skip commas in these regions.  Each option is then checked
-	 * to determine if it is a known option.
+	 * Prior to util-linux 2.36.2, if a file or directory in the
+	 * current working directory was named 'dataset' then mount(8)
+	 * would prepend the current working directory to the dataset.
+	 * Check for it and strip the prepended path when it is added.
 	 */
-	for (ptr = opts; ptr && !flag; ptr++) {
-		if (opt == NULL)
-			opt = ptr;
-
-		if (*ptr == '"')
-			quote = !quote;
-
-		if (quote)
-			continue;
-
-		if (*ptr == '\0')
-			flag = 1;
-
-		if ((*ptr == ',') || (*ptr == '\0')) {
-			*ptr = '\0';
-
-			error = parse_option(opt, mntflags, zfsflags, sloppy);
-			if (error) {
-				strcpy(badopt, opt);
-				goto out;
-
-			}
-
-			if (!(*mntflags & MS_REMOUNT) &&
-			    !(*zfsflags & ZS_ZFSUTIL)) {
-				if (count > 0)
-					strlcat(mtabopt, ",", MNT_LINE_MAX);
-
-				strlcat(mtabopt, opt, MNT_LINE_MAX);
-				count++;
-			}
-
-			opt = NULL;
-		}
-	}
-
-out:
-	free(opts);
-	return (error);
-}
-
-/*
- * Return the pool/dataset to mount given the name passed to mount.  This
- * is expected to be of the form pool/dataset, however may also refer to
- * a block device if that device contains a valid zfs label.
- */
-static char *
-parse_dataset(char *dataset)
-{
 	char cwd[PATH_MAX];
-	struct stat64 statbuf;
-	int error;
-	int len;
-
-	/*
-	 * We expect a pool/dataset to be provided, however if we're
-	 * given a device which is a member of a zpool we attempt to
-	 * extract the pool name stored in the label.  Given the pool
-	 * name we can mount the root dataset.
-	 */
-	error = stat64(dataset, &statbuf);
-	if (error == 0) {
-		nvlist_t *config;
-		char *name;
-		int fd;
-
-		fd = open(dataset, O_RDONLY);
-		if (fd < 0)
-			goto out;
-
-		error = zpool_read_label(fd, &config, NULL);
-		(void) close(fd);
-		if (error)
-			goto out;
-
-		error = nvlist_lookup_string(config,
-		    ZPOOL_CONFIG_POOL_NAME, &name);
-		if (error) {
-			nvlist_free(config);
-		} else {
-			dataset = strdup(name);
-			nvlist_free(config);
-			return (dataset);
-		}
+	if (getcwd(cwd, PATH_MAX) == NULL) {
+		perror("getcwd");
+		return;
 	}
-out:
-	/*
-	 * If a file or directory in your current working directory is
-	 * named 'dataset' then mount(8) will prepend your current working
-	 * directory to the dataset.  There is no way to prevent this
-	 * behavior so we simply check for it and strip the prepended
-	 * patch when it is added.
-	 */
-	if (getcwd(cwd, PATH_MAX) == NULL)
-		return (dataset);
+	int len = strlen(cwd);
+	if (strncmp(cwd, target, len) == 0)
+		target += len;
 
-	len = strlen(cwd);
+	/* Assume pool/dataset is more likely */
+	strlcpy(*dataset, target, PATH_MAX);
 
-	/* Do not add one when cwd already ends in a trailing '/' */
-	if (strncmp(cwd, dataset, len) == 0)
-		return (dataset + len + (cwd[len-1] != '/'));
+	int fd = open(target, O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		return;
 
-	return (dataset);
+	nvlist_t *cfg = NULL;
+	if (zpool_read_label(fd, &cfg, NULL) == 0) {
+		char *nm = NULL;
+		if (!nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &nm))
+			strlcpy(*dataset, nm, PATH_MAX);
+		nvlist_free(cfg);
+	}
+
+	if (close(fd))
+		perror("close");
 }
 
 /*
@@ -326,8 +125,8 @@
 	if (!fp) {
 		(void) fprintf(stderr, gettext(
 		    "filesystem '%s' was mounted, but /etc/mtab "
-		    "could not be opened due to error %d\n"),
-		    dataset, errno);
+		    "could not be opened due to error: %s\n"),
+		    dataset, strerror(errno));
 		return (MOUNT_FILEIO);
 	}
 
@@ -335,8 +134,8 @@
 	if (error) {
 		(void) fprintf(stderr, gettext(
 		    "filesystem '%s' was mounted, but /etc/mtab "
-		    "could not be updated due to error %d\n"),
-		    dataset, errno);
+		    "could not be updated due to error: %s\n"),
+		    dataset, strerror(errno));
 		return (MOUNT_FILEIO);
 	}
 
@@ -345,34 +144,6 @@
 	return (MOUNT_SUCCESS);
 }
 
-static void
-append_mntopt(const char *name, const char *val, char *mntopts,
-    char *mtabopt, boolean_t quote)
-{
-	char tmp[MNT_LINE_MAX];
-
-	snprintf(tmp, MNT_LINE_MAX, quote ? ",%s=\"%s\"" : ",%s=%s", name, val);
-
-	if (mntopts)
-		strlcat(mntopts, tmp, MNT_LINE_MAX);
-
-	if (mtabopt)
-		strlcat(mtabopt, tmp, MNT_LINE_MAX);
-}
-
-static void
-zfs_selinux_setcontext(zfs_handle_t *zhp, zfs_prop_t zpt, const char *name,
-    char *mntopts, char *mtabopt)
-{
-	char context[ZFS_MAXPROPLEN];
-
-	if (zfs_prop_get(zhp, zpt, context, sizeof (context),
-	    NULL, NULL, 0, B_FALSE) == 0) {
-		if (strcmp(context, "none") != 0)
-			append_mntopt(name, context, mntopts, mtabopt, B_TRUE);
-	}
-}
-
 int
 main(int argc, char **argv)
 {
@@ -383,12 +154,13 @@
 	char badopt[MNT_LINE_MAX] = { '\0' };
 	char mtabopt[MNT_LINE_MAX] = { '\0' };
 	char mntpoint[PATH_MAX];
-	char *dataset;
+	char dataset[PATH_MAX], *pdataset = dataset;
 	unsigned long mntflags = 0, zfsflags = 0, remount = 0;
 	int sloppy = 0, fake = 0, verbose = 0, nomtab = 0, zfsutil = 0;
 	int error, c;
 
 	(void) setlocale(LC_ALL, "");
+	(void) setlocale(LC_NUMERIC, "C");
 	(void) textdomain(TEXT_DOMAIN);
 
 	opterr = 0;
@@ -413,10 +185,11 @@
 			break;
 		case 'h':
 		case '?':
-			(void) fprintf(stderr, gettext("Invalid option '%c'\n"),
-			    optopt);
+			if (optopt)
+				(void) fprintf(stderr,
+				    gettext("Invalid option '%c'\n"), optopt);
 			(void) fprintf(stderr, gettext("Usage: mount.zfs "
-			    "[-sfnv] [-o options] <dataset> <mountpoint>\n"));
+			    "[-sfnvh] [-o options] <dataset> <mountpoint>\n"));
 			return (MOUNT_USAGE);
 		}
 	}
@@ -438,18 +211,18 @@
 		return (MOUNT_USAGE);
 	}
 
-	dataset = parse_dataset(argv[0]);
+	parse_dataset(argv[0], &pdataset);
 
 	/* canonicalize the mount point */
 	if (realpath(argv[1], mntpoint) == NULL) {
 		(void) fprintf(stderr, gettext("filesystem '%s' cannot be "
-		    "mounted at '%s' due to canonicalization error %d.\n"),
-		    dataset, argv[1], errno);
+		    "mounted at '%s' due to canonicalization error: %s\n"),
+		    dataset, argv[1], strerror(errno));
 		return (MOUNT_SYSERR);
 	}
 
 	/* validate mount options and set mntflags */
-	error = parse_options(mntopts, &mntflags, &zfsflags, sloppy,
+	error = zfs_parse_mount_options(mntopts, &mntflags, &zfsflags, sloppy,
 	    badopt, mtabopt);
 	if (error) {
 		switch (error) {
@@ -473,13 +246,6 @@
 		}
 	}
 
-	if (verbose)
-		(void) fprintf(stdout, gettext("mount.zfs:\n"
-		    "  dataset:    \"%s\"\n  mountpoint: \"%s\"\n"
-		    "  mountflags: 0x%lx\n  zfsflags:   0x%lx\n"
-		    "  mountopts:  \"%s\"\n  mtabopts:   \"%s\"\n"),
-		    dataset, mntpoint, mntflags, zfsflags, mntopts, mtabopt);
-
 	if (mntflags & MS_REMOUNT) {
 		nomtab = 1;
 		remount = 1;
@@ -502,33 +268,11 @@
 		return (MOUNT_USAGE);
 	}
 
-	/*
-	 * Checks to see if the ZFS_PROP_SELINUX_CONTEXT exists
-	 * if it does, create a tmp variable in case it's needed
-	 * checks to see if the selinux context is set to the default
-	 * if it is, allow the setting of the other context properties
-	 * this is needed because the 'context' property overrides others
-	 * if it is not the default, set the 'context' property
-	 */
-	if (zfs_prop_get(zhp, ZFS_PROP_SELINUX_CONTEXT, prop, sizeof (prop),
-	    NULL, NULL, 0, B_FALSE) == 0) {
-		if (strcmp(prop, "none") == 0) {
-			zfs_selinux_setcontext(zhp, ZFS_PROP_SELINUX_FSCONTEXT,
-			    MNTOPT_FSCONTEXT, mntopts, mtabopt);
-			zfs_selinux_setcontext(zhp, ZFS_PROP_SELINUX_DEFCONTEXT,
-			    MNTOPT_DEFCONTEXT, mntopts, mtabopt);
-			zfs_selinux_setcontext(zhp,
-			    ZFS_PROP_SELINUX_ROOTCONTEXT, MNTOPT_ROOTCONTEXT,
-			    mntopts, mtabopt);
-		} else {
-			append_mntopt(MNTOPT_CONTEXT, prop,
-			    mntopts, mtabopt, B_TRUE);
-		}
+	if (!zfsutil || sloppy ||
+	    libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
+		zfs_adjust_mount_options(zhp, mntpoint, mntopts, mtabopt);
 	}
 
-	/* A hint used to determine an auto-mounted snapshot mount point */
-	append_mntopt(MNTOPT_MNTPOINT, mntpoint, mntopts, NULL, B_FALSE);
-
 	/* treat all snapshots as legacy mount points */
 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT)
 		(void) strlcpy(prop, ZFS_MOUNTPOINT_LEGACY, ZFS_MAXPROPLEN);
@@ -545,12 +289,11 @@
 	if (zfs_version == 0) {
 		fprintf(stderr, gettext("unable to fetch "
 		    "ZFS version for filesystem '%s'\n"), dataset);
+		zfs_close(zhp);
+		libzfs_fini(g_zfs);
 		return (MOUNT_SYSERR);
 	}
 
-	zfs_close(zhp);
-	libzfs_fini(g_zfs);
-
 	/*
 	 * Legacy mount points may only be mounted using 'mount', never using
 	 * 'zfs mount'.  However, since 'zfs mount' actually invokes 'mount'
@@ -568,6 +311,8 @@
 		    "Use 'zfs set mountpoint=%s' or 'mount -t zfs %s %s'.\n"
 		    "See zfs(8) for more information.\n"),
 		    dataset, mntpoint, dataset, mntpoint);
+		zfs_close(zhp);
+		libzfs_fini(g_zfs);
 		return (MOUNT_USAGE);
 	}
 
@@ -578,14 +323,38 @@
 		    "Use 'zfs set mountpoint=%s' or 'zfs mount %s'.\n"
 		    "See zfs(8) for more information.\n"),
 		    dataset, "legacy", dataset);
+		zfs_close(zhp);
+		libzfs_fini(g_zfs);
 		return (MOUNT_USAGE);
 	}
 
+	if (verbose)
+		(void) fprintf(stdout, gettext("mount.zfs:\n"
+		    "  dataset:    \"%s\"\n  mountpoint: \"%s\"\n"
+		    "  mountflags: 0x%lx\n  zfsflags:   0x%lx\n"
+		    "  mountopts:  \"%s\"\n  mtabopts:   \"%s\"\n"),
+		    dataset, mntpoint, mntflags, zfsflags, mntopts, mtabopt);
+
 	if (!fake) {
-		error = mount(dataset, mntpoint, MNTTYPE_ZFS,
-		    mntflags, mntopts);
+		if (zfsutil && !sloppy &&
+		    !libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
+			error = zfs_mount_at(zhp, mntopts, mntflags, mntpoint);
+			if (error) {
+				(void) fprintf(stderr, "zfs_mount_at() failed: "
+				    "%s", libzfs_error_description(g_zfs));
+				zfs_close(zhp);
+				libzfs_fini(g_zfs);
+				return (MOUNT_SYSERR);
+			}
+		} else {
+			error = mount(dataset, mntpoint, MNTTYPE_ZFS,
+			    mntflags, mntopts);
+		}
 	}
 
+	zfs_close(zhp);
+	libzfs_fini(g_zfs);
+
 	if (error) {
 		switch (errno) {
 		case ENOENT:
@@ -620,7 +389,7 @@
 				    "mount the filesystem again.\n"), dataset);
 				return (MOUNT_SYSERR);
 			}
-			/* fallthru */
+			fallthrough;
 #endif
 		default:
 			(void) fprintf(stderr, gettext("filesystem "

diff --git a/zfs/cmd/raidz_test/Makefile.am b/zfs/cmd/raidz_test/Makefile.am
index a394a0d..983ff25 100644
--- a/zfs/cmd/raidz_test/Makefile.am
+++ b/zfs/cmd/raidz_test/Makefile.am

@@ -4,11 +4,7 @@
 AM_CFLAGS += $(FRAME_LARGER_THAN)
 
 # Unconditionally enable ASSERTs
-AM_CPPFLAGS += -DDEBUG -UNDEBUG
-
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
 
 bin_PROGRAMS = raidz_test
 
@@ -18,6 +14,9 @@
 	raidz_bench.c
 
 raidz_test_LDADD = \
-	$(top_builddir)/lib/libzpool/libzpool.la
+	$(abs_top_builddir)/lib/libzpool/libzpool.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la
 
-raidz_test_LDADD += -lm -ldl
+raidz_test_LDADD += -lm
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/raidz_test/raidz_bench.c b/zfs/cmd/raidz_test/raidz_bench.c
index 4863b8d..f44d6fb 100644
--- a/zfs/cmd/raidz_test/raidz_bench.c
+++ b/zfs/cmd/raidz_test/raidz_bench.c

@@ -31,8 +31,6 @@
 #include <sys/vdev_raidz_impl.h>
 #include <stdio.h>
 
-#include <sys/time.h>
-
 #include "raidz_test.h"
 
 #define	GEN_BENCH_MEMORY	(((uint64_t)1ULL)<<32)
@@ -83,8 +81,17 @@
 			/* create suitable raidz_map */
 			ncols = rto_opts.rto_dcols + fn + 1;
 			zio_bench.io_size = 1ULL << ds;
-			rm_bench = vdev_raidz_map_alloc(&zio_bench,
-			    BENCH_ASHIFT, ncols, fn+1);
+
+			if (rto_opts.rto_expand) {
+				rm_bench = vdev_raidz_map_alloc_expanded(
+				    zio_bench.io_abd,
+				    zio_bench.io_size, zio_bench.io_offset,
+				    rto_opts.rto_ashift, ncols+1, ncols,
+				    fn+1, rto_opts.rto_expand_offset);
+			} else {
+				rm_bench = vdev_raidz_map_alloc(&zio_bench,
+				    BENCH_ASHIFT, ncols, fn+1);
+			}
 
 			/* estimate iteration count */
 			iter_cnt = GEN_BENCH_MEMORY;
@@ -113,7 +120,7 @@
 	}
 }
 
-void
+static void
 run_gen_bench(void)
 {
 	char **impl_name;
@@ -163,8 +170,16 @@
 			    (1ULL << BENCH_ASHIFT))
 				continue;
 
-			rm_bench = vdev_raidz_map_alloc(&zio_bench,
-			    BENCH_ASHIFT, ncols, PARITY_PQR);
+			if (rto_opts.rto_expand) {
+				rm_bench = vdev_raidz_map_alloc_expanded(
+				    zio_bench.io_abd,
+				    zio_bench.io_size, zio_bench.io_offset,
+				    BENCH_ASHIFT, ncols+1, ncols,
+				    PARITY_PQR, rto_opts.rto_expand_offset);
+			} else {
+				rm_bench = vdev_raidz_map_alloc(&zio_bench,
+				    BENCH_ASHIFT, ncols, PARITY_PQR);
+			}
 
 			/* estimate iteration count */
 			iter_cnt = (REC_BENCH_MEMORY);
@@ -197,7 +212,7 @@
 	}
 }
 
-void
+static void
 run_rec_bench(void)
 {
 	char **impl_name;

diff --git a/zfs/cmd/raidz_test/raidz_test.c b/zfs/cmd/raidz_test/raidz_test.c
index a050703..9a8be54 100644
--- a/zfs/cmd/raidz_test/raidz_test.c
+++ b/zfs/cmd/raidz_test/raidz_test.c

@@ -77,16 +77,20 @@
 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
 		    "  (-a) zio ashift                   : %zu\n"
 		    "  (-o) zio offset                   : 1 << %zu\n"
+		    "  (-e) expanded map                 : %s\n"
+		    "  (-r) reflow offset                : %llx\n"
 		    "  (-d) number of raidz data columns : %zu\n"
 		    "  (-s) size of DATA                 : 1 << %zu\n"
 		    "  (-S) sweep parameters             : %s \n"
 		    "  (-v) verbose                      : %s \n\n",
-		    opts->rto_ashift,			/* -a */
-		    ilog2(opts->rto_offset),		/* -o */
-		    opts->rto_dcols,			/* -d */
-		    ilog2(opts->rto_dsize),		/* -s */
-		    opts->rto_sweep ? "yes" : "no",	/* -S */
-		    verbose);				/* -v */
+		    opts->rto_ashift,				/* -a */
+		    ilog2(opts->rto_offset),			/* -o */
+		    opts->rto_expand ? "yes" : "no",		/* -e */
+		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
+		    opts->rto_dcols,				/* -d */
+		    ilog2(opts->rto_dsize),			/* -s */
+		    opts->rto_sweep ? "yes" : "no",		/* -S */
+		    verbose);					/* -v */
 	}
 }
 
@@ -104,6 +108,8 @@
 	    "\t[-S parameter sweep (default: %s)]\n"
 	    "\t[-t timeout for parameter sweep test]\n"
 	    "\t[-B benchmark all raidz implementations]\n"
+	    "\t[-e use expanded raidz map (default: %s)]\n"
+	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
 	    "\t[-v increase verbosity (default: %zu)]\n"
 	    "\t[-h (print help)]\n"
 	    "\t[-T test the test, see if failure would be detected]\n"
@@ -114,6 +120,8 @@
 	    o->rto_dcols,				/* -d */
 	    ilog2(o->rto_dsize),			/* -s */
 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
+	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
+	    (u_longlong_t)o->rto_expand_offset,		/* -r */
 	    o->rto_v);					/* -d */
 
 	exit(requested ? 0 : 1);
@@ -128,7 +136,7 @@
 
 	bcopy(&rto_opts_defaults, o, sizeof (*o));
 
-	while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
+	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
 		value = 0;
 
 		switch (opt) {
@@ -136,6 +144,12 @@
 			value = strtoull(optarg, NULL, 0);
 			o->rto_ashift = MIN(13, MAX(9, value));
 			break;
+		case 'e':
+			o->rto_expand = 1;
+			break;
+		case 'r':
+			o->rto_expand_offset = strtoull(optarg, NULL, 0);
+			break;
 		case 'o':
 			value = strtoull(optarg, NULL, 0);
 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
@@ -179,25 +193,34 @@
 	}
 }
 
-#define	DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
-#define	DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
+#define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
+#define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
 
-#define	CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
-#define	CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
+#define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
+#define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
 
 static int
 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
 {
-	int i, ret = 0;
+	int r, i, ret = 0;
 
 	VERIFY(parity >= 1 && parity <= 3);
 
-	for (i = 0; i < parity; i++) {
-		if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i))
-		    != 0) {
-			ret++;
-			LOG_OPT(D_DEBUG, opts,
-			    "\nParity block [%d] different!\n", i);
+	for (r = 0; r < rm->rm_nrows; r++) {
+		raidz_row_t * const rr = rm->rm_row[r];
+		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
+		for (i = 0; i < parity; i++) {
+			if (CODE_COL_SIZE(rrg, i) == 0) {
+				VERIFY0(CODE_COL_SIZE(rr, i));
+				continue;
+			}
+
+			if (abd_cmp(CODE_COL(rr, i),
+			    CODE_COL(rrg, i)) != 0) {
+				ret++;
+				LOG_OPT(D_DEBUG, opts,
+				    "\nParity block [%d] different!\n", i);
+			}
 		}
 	}
 	return (ret);
@@ -206,16 +229,26 @@
 static int
 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
 {
-	int i, ret = 0;
-	int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
+	int r, i, dcols, ret = 0;
 
-	for (i = 0; i < dcols; i++) {
-		if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i))
-		    != 0) {
-			ret++;
+	for (r = 0; r < rm->rm_nrows; r++) {
+		raidz_row_t *rr = rm->rm_row[r];
+		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
+		dcols = opts->rm_golden->rm_row[0]->rr_cols -
+		    raidz_parity(opts->rm_golden);
+		for (i = 0; i < dcols; i++) {
+			if (DATA_COL_SIZE(rrg, i) == 0) {
+				VERIFY0(DATA_COL_SIZE(rr, i));
+				continue;
+			}
 
-			LOG_OPT(D_DEBUG, opts,
-			    "\nData block [%d] different!\n", i);
+			if (abd_cmp(DATA_COL(rrg, i),
+			    DATA_COL(rr, i)) != 0) {
+				ret++;
+
+				LOG_OPT(D_DEBUG, opts,
+				    "\nData block [%d] different!\n", i);
+			}
 		}
 	}
 	return (ret);
@@ -236,12 +269,13 @@
 static void
 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
 {
-	int i;
-	raidz_col_t *col;
-
-	for (i = 0; i < cnt; i++) {
-		col = &rm->rm_col[tgts[i]];
-		abd_iterate_func(col->rc_abd, 0, col->rc_size, init_rand, NULL);
+	for (int r = 0; r < rm->rm_nrows; r++) {
+		raidz_row_t *rr = rm->rm_row[r];
+		for (int i = 0; i < cnt; i++) {
+			raidz_col_t *col = &rr->rr_col[tgts[i]];
+			abd_iterate_func(col->rc_abd, 0, col->rc_size,
+			    init_rand, NULL);
+		}
 	}
 }
 
@@ -288,10 +322,22 @@
 
 	VERIFY0(vdev_raidz_impl_set("original"));
 
-	opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
-	    opts->rto_ashift, total_ncols, parity);
-	rm_test = vdev_raidz_map_alloc(zio_test,
-	    opts->rto_ashift, total_ncols, parity);
+	if (opts->rto_expand) {
+		opts->rm_golden =
+		    vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
+		    opts->zio_golden->io_size, opts->zio_golden->io_offset,
+		    opts->rto_ashift, total_ncols+1, total_ncols,
+		    parity, opts->rto_expand_offset);
+		rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
+		    zio_test->io_size, zio_test->io_offset,
+		    opts->rto_ashift, total_ncols+1, total_ncols,
+		    parity, opts->rto_expand_offset);
+	} else {
+		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
+		    opts->rto_ashift, total_ncols, parity);
+		rm_test = vdev_raidz_map_alloc(zio_test,
+		    opts->rto_ashift, total_ncols, parity);
+	}
 
 	VERIFY(opts->zio_golden);
 	VERIFY(opts->rm_golden);
@@ -312,6 +358,187 @@
 	return (err);
 }
 
+/*
+ * If reflow is not in progress, reflow_offset should be UINT64_MAX.
+ * For each row, if the row is entirely before reflow_offset, it will
+ * come from the new location.  Otherwise this row will come from the
+ * old location.  Therefore, rows that straddle the reflow_offset will
+ * come from the old location.
+ *
+ * NOTE: Until raidz expansion is implemented this function is only
+ * needed by raidz_test.c to the multi-row raid_map_t functionality.
+ */
+raidz_map_t *
+vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
+    uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
+    uint64_t nparity, uint64_t reflow_offset)
+{
+	/* The zio's size in units of the vdev's minimum sector size. */
+	uint64_t s = size >> ashift;
+	uint64_t q, r, bc, devidx, asize = 0, tot;
+
+	/*
+	 * "Quotient": The number of data sectors for this stripe on all but
+	 * the "big column" child vdevs that also contain "remainder" data.
+	 * AKA "full rows"
+	 */
+	q = s / (logical_cols - nparity);
+
+	/*
+	 * "Remainder": The number of partial stripe data sectors in this I/O.
+	 * This will add a sector to some, but not all, child vdevs.
+	 */
+	r = s - q * (logical_cols - nparity);
+
+	/* The number of "big columns" - those which contain remainder data. */
+	bc = (r == 0 ? 0 : r + nparity);
+
+	/*
+	 * The total number of data and parity sectors associated with
+	 * this I/O.
+	 */
+	tot = s + nparity * (q + (r == 0 ? 0 : 1));
+
+	/* How many rows contain data (not skip) */
+	uint64_t rows = howmany(tot, logical_cols);
+	int cols = MIN(tot, logical_cols);
+
+	raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
+	    KM_SLEEP);
+	rm->rm_nrows = rows;
+
+	for (uint64_t row = 0; row < rows; row++) {
+		raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
+		    rr_col[cols]), KM_SLEEP);
+		rm->rm_row[row] = rr;
+
+		/* The starting RAIDZ (parent) vdev sector of the row. */
+		uint64_t b = (offset >> ashift) + row * logical_cols;
+
+		/*
+		 * If we are in the middle of a reflow, and any part of this
+		 * row has not been copied, then use the old location of
+		 * this row.
+		 */
+		int row_phys_cols = physical_cols;
+		if (b + (logical_cols - nparity) > reflow_offset >> ashift)
+			row_phys_cols--;
+
+		/* starting child of this row */
+		uint64_t child_id = b % row_phys_cols;
+		/* The starting byte offset on each child vdev. */
+		uint64_t child_offset = (b / row_phys_cols) << ashift;
+
+		/*
+		 * We set cols to the entire width of the block, even
+		 * if this row is shorter.  This is needed because parity
+		 * generation (for Q and R) needs to know the entire width,
+		 * because it treats the short row as though it was
+		 * full-width (and the "phantom" sectors were zero-filled).
+		 *
+		 * Another approach to this would be to set cols shorter
+		 * (to just the number of columns that we might do i/o to)
+		 * and have another mechanism to tell the parity generation
+		 * about the "entire width".  Reconstruction (at least
+		 * vdev_raidz_reconstruct_general()) would also need to
+		 * know about the "entire width".
+		 */
+		rr->rr_cols = cols;
+		rr->rr_bigcols = bc;
+		rr->rr_missingdata = 0;
+		rr->rr_missingparity = 0;
+		rr->rr_firstdatacol = nparity;
+		rr->rr_abd_empty = NULL;
+		rr->rr_nempty = 0;
+
+		for (int c = 0; c < rr->rr_cols; c++, child_id++) {
+			if (child_id >= row_phys_cols) {
+				child_id -= row_phys_cols;
+				child_offset += 1ULL << ashift;
+			}
+			rr->rr_col[c].rc_devidx = child_id;
+			rr->rr_col[c].rc_offset = child_offset;
+			rr->rr_col[c].rc_orig_data = NULL;
+			rr->rr_col[c].rc_error = 0;
+			rr->rr_col[c].rc_tried = 0;
+			rr->rr_col[c].rc_skipped = 0;
+			rr->rr_col[c].rc_need_orig_restore = B_FALSE;
+
+			uint64_t dc = c - rr->rr_firstdatacol;
+			if (c < rr->rr_firstdatacol) {
+				rr->rr_col[c].rc_size = 1ULL << ashift;
+				rr->rr_col[c].rc_abd =
+				    abd_alloc_linear(rr->rr_col[c].rc_size,
+				    B_TRUE);
+			} else if (row == rows - 1 && bc != 0 && c >= bc) {
+				/*
+				 * Past the end, this for parity generation.
+				 */
+				rr->rr_col[c].rc_size = 0;
+				rr->rr_col[c].rc_abd = NULL;
+			} else {
+				/*
+				 * "data column" (col excluding parity)
+				 * Add an ASCII art diagram here
+				 */
+				uint64_t off;
+
+				if (c < bc || r == 0) {
+					off = dc * rows + row;
+				} else {
+					off = r * rows +
+					    (dc - r) * (rows - 1) + row;
+				}
+				rr->rr_col[c].rc_size = 1ULL << ashift;
+				rr->rr_col[c].rc_abd = abd_get_offset_struct(
+				    &rr->rr_col[c].rc_abdstruct,
+				    abd, off << ashift, 1 << ashift);
+			}
+
+			asize += rr->rr_col[c].rc_size;
+		}
+		/*
+		 * If all data stored spans all columns, there's a danger that
+		 * parity will always be on the same device and, since parity
+		 * isn't read during normal operation, that that device's I/O
+		 * bandwidth won't be used effectively. We therefore switch
+		 * the parity every 1MB.
+		 *
+		 * ...at least that was, ostensibly, the theory. As a practical
+		 * matter unless we juggle the parity between all devices
+		 * evenly, we won't see any benefit. Further, occasional writes
+		 * that aren't a multiple of the LCM of the number of children
+		 * and the minimum stripe width are sufficient to avoid pessimal
+		 * behavior. Unfortunately, this decision created an implicit
+		 * on-disk format requirement that we need to support for all
+		 * eternity, but only for single-parity RAID-Z.
+		 *
+		 * If we intend to skip a sector in the zeroth column for
+		 * padding we must make sure to note this swap. We will never
+		 * intend to skip the first column since at least one data and
+		 * one parity column must appear in each row.
+		 */
+		if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
+		    (offset & (1ULL << 20))) {
+			ASSERT(rr->rr_cols >= 2);
+			ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
+			devidx = rr->rr_col[0].rc_devidx;
+			uint64_t o = rr->rr_col[0].rc_offset;
+			rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
+			rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
+			rr->rr_col[1].rc_devidx = devidx;
+			rr->rr_col[1].rc_offset = o;
+		}
+
+	}
+	ASSERT3U(asize, ==, tot << ashift);
+
+	/* init RAIDZ parity ops */
+	rm->rm_ops = vdev_raidz_math_get_ops();
+
+	return (rm);
+}
+
 static raidz_map_t *
 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
 {
@@ -330,8 +557,15 @@
 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
 	init_zio_abd(*zio);
 
-	rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
-	    total_ncols, parity);
+	if (opts->rto_expand) {
+		rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
+		    (*zio)->io_size, (*zio)->io_offset,
+		    opts->rto_ashift, total_ncols+1, total_ncols,
+		    parity, opts->rto_expand_offset);
+	} else {
+		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
+		    total_ncols, parity);
+	}
 	VERIFY(rm);
 
 	/* Make sure code columns are destroyed */
@@ -420,7 +654,7 @@
 	if (fn < RAIDZ_REC_PQ) {
 		/* can reconstruct 1 failed data disk */
 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
-			if (x0 >= rm->rm_cols - raidz_parity(rm))
+			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
 				continue;
 
 			/* Check if should stop */
@@ -445,10 +679,11 @@
 	} else if (fn < RAIDZ_REC_PQR) {
 		/* can reconstruct 2 failed data disk */
 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
-			if (x0 >= rm->rm_cols - raidz_parity(rm))
+			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
 				continue;
 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
-				if (x1 >= rm->rm_cols - raidz_parity(rm))
+				if (x1 >= rm->rm_row[0]->rr_cols -
+				    raidz_parity(rm))
 					continue;
 
 				/* Check if should stop */
@@ -475,14 +710,15 @@
 	} else {
 		/* can reconstruct 3 failed data disk */
 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
-			if (x0 >= rm->rm_cols - raidz_parity(rm))
+			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
 				continue;
 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
-				if (x1 >= rm->rm_cols - raidz_parity(rm))
+				if (x1 >= rm->rm_row[0]->rr_cols -
+				    raidz_parity(rm))
 					continue;
 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
-					if (x2 >=
-					    rm->rm_cols - raidz_parity(rm))
+					if (x2 >= rm->rm_row[0]->rr_cols -
+					    raidz_parity(rm))
 						continue;
 
 					/* Check if should stop */
@@ -700,6 +936,8 @@
 		opts->rto_dcols = dcols_v[d];
 		opts->rto_offset = (1 << ashift_v[a]) * rand();
 		opts->rto_dsize = size_v[s];
+		opts->rto_expand = rto_opts.rto_expand;
+		opts->rto_expand_offset = rto_opts.rto_expand_offset;
 		opts->rto_v = 0; /* be quiet */
 
 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
@@ -732,6 +970,7 @@
 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
 }
 
+
 int
 main(int argc, char **argv)
 {
@@ -757,7 +996,7 @@
 
 	process_options(argc, argv);
 
-	kernel_init(FREAD);
+	kernel_init(SPA_MODE_READ);
 
 	/* setup random data because rand() is not reentrant */
 	rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

diff --git a/zfs/cmd/raidz_test/raidz_test.h b/zfs/cmd/raidz_test/raidz_test.h
index a7fd26b..0f7f4ce 100644
--- a/zfs/cmd/raidz_test/raidz_test.h
+++ b/zfs/cmd/raidz_test/raidz_test.h

@@ -38,18 +38,21 @@
 	"avx512bw",
 	"aarch64_neon",
 	"aarch64_neonx2",
+	"powerpc_altivec",
 	NULL
 };
 
 typedef struct raidz_test_opts {
 	size_t rto_ashift;
-	size_t rto_offset;
+	uint64_t rto_offset;
 	size_t rto_dcols;
 	size_t rto_dsize;
 	size_t rto_v;
 	size_t rto_sweep;
 	size_t rto_sweep_timeout;
 	size_t rto_benchmark;
+	size_t rto_expand;
+	uint64_t rto_expand_offset;
 	size_t rto_sanity;
 	size_t rto_gdb;
 
@@ -68,6 +71,8 @@
 	.rto_v = 0,
 	.rto_sweep = 0,
 	.rto_benchmark = 0,
+	.rto_expand = 0,
+	.rto_expand_offset = -1ULL,
 	.rto_sanity = 0,
 	.rto_gdb = 0,
 	.rto_should_stop = B_FALSE
@@ -112,4 +117,7 @@
 
 void run_raidz_benchmark(void);
 
+struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t,
+    uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+
 #endif /* RAIDZ_TEST_H */

diff --git a/zfs/cmd/vdev_id/Makefile.am b/zfs/cmd/vdev_id/Makefile.am
index fb815fa..4071c6d 100644
--- a/zfs/cmd/vdev_id/Makefile.am
+++ b/zfs/cmd/vdev_id/Makefile.am

@@ -1 +1,3 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 dist_udev_SCRIPTS = vdev_id

diff --git a/zfs/cmd/vdev_id/vdev_id b/zfs/cmd/vdev_id/vdev_id
index a79aed3..7b5aab1 100755
--- a/zfs/cmd/vdev_id/vdev_id
+++ b/zfs/cmd/vdev_id/vdev_id

@@ -80,6 +80,34 @@
 # channel 86:00.0 0         B
 
 # #
+# # Example vdev_id.conf - multipath / multijbod-daisychaining
+# #
+#
+# multipath yes
+# multijbod yes
+#
+# #       PCI_ID  HBA PORT  CHANNEL NAME
+# channel 85:00.0 1         A
+# channel 85:00.0 0         B
+# channel 86:00.0 1         A
+# channel 86:00.0 0         B
+
+# #
+# # Example vdev_id.conf - multipath / mixed
+# #
+#
+# multipath yes
+# slot mix
+#
+# #       PCI_ID  HBA PORT  CHANNEL NAME
+# channel 85:00.0 3         A
+# channel 85:00.0 2         B
+# channel 86:00.0 3         A
+# channel 86:00.0 2         B
+# channel af:00.0 0         C
+# channel af:00.0 1         C
+
+# #
 # # Example vdev_id.conf - alias
 # #
 #
@@ -92,9 +120,10 @@
 CONFIG=/etc/zfs/vdev_id.conf
 PHYS_PER_PORT=
 DEV=
-MULTIPATH=
 TOPOLOGY=
 BAY=
+ENCL_ID=""
+UNIQ_ENCL_ID=""
 
 usage() {
 	cat << EOF
@@ -107,66 +136,148 @@
   -e    Create enclose device symlinks only (/dev/by-enclosure)
   -g    Storage network topology [default="$TOPOLOGY"]
   -m    Run in multipath mode
+  -j    Run in multijbod mode
   -p    number of phy's per switch port [default=$PHYS_PER_PORT]
   -h    show this summary
 EOF
-	exit 0
+	exit 1
+	# exit with error to avoid processing usage message by a udev rule
 }
 
 map_slot() {
-	local LINUX_SLOT=$1
-	local CHANNEL=$2
-	local MAPPED_SLOT=
+	LINUX_SLOT=$1
+	CHANNEL=$2
 
-	MAPPED_SLOT=`awk "\\$1 == \"slot\" && \\$2 == ${LINUX_SLOT} && \
-			\\$4 ~ /^${CHANNEL}$|^$/ { print \\$3; exit }" $CONFIG`
+	MAPPED_SLOT=$(awk -v linux_slot="$LINUX_SLOT" -v channel="$CHANNEL" \
+			'$1 == "slot" && $2 == linux_slot && \
+			($4 ~ "^"channel"$" || $4 ~ /^$/) { print $3; exit}' $CONFIG)
 	if [ -z "$MAPPED_SLOT" ] ; then
 		MAPPED_SLOT=$LINUX_SLOT
 	fi
-	printf "%d" ${MAPPED_SLOT}
+	printf "%d" "${MAPPED_SLOT}"
 }
 
 map_channel() {
-	local MAPPED_CHAN=
-	local PCI_ID=$1
-	local PORT=$2
+	MAPPED_CHAN=
+	PCI_ID=$1
+	PORT=$2
 
 	case $TOPOLOGY in
 		"sas_switch")
-		MAPPED_CHAN=`awk "\\$1 == \"channel\" && \\$2 == ${PORT} \
-			{ print \\$3; exit }" $CONFIG`
+		MAPPED_CHAN=$(awk -v port="$PORT" \
+			'$1 == "channel" && $2 == port \
+			{ print $3; exit }' $CONFIG)
 		;;
 		"sas_direct"|"scsi")
-		MAPPED_CHAN=`awk "\\$1 == \"channel\" && \
-			\\$2 == \"${PCI_ID}\" && \\$3 == ${PORT} \
-			{ print \\$4; exit }" $CONFIG`
+		MAPPED_CHAN=$(awk -v pciID="$PCI_ID" -v port="$PORT" \
+			'$1 == "channel" && $2 == pciID && $3 == port \
+			{print $4}' $CONFIG)
 		;;
 	esac
-	printf "%s" ${MAPPED_CHAN}
+	printf "%s" "${MAPPED_CHAN}"
+}
+
+get_encl_id() {
+	set -- $(echo $1)
+	count=$#
+
+	i=1
+	while [ $i -le $count ] ; do
+		d=$(eval echo '$'{$i})
+		id=$(cat "/sys/class/enclosure/${d}/id")
+		ENCL_ID="${ENCL_ID} $id"
+		i=$((i + 1))
+	done
+}
+
+get_uniq_encl_id() {
+	for uuid in ${ENCL_ID}; do
+		found=0
+
+		for count in ${UNIQ_ENCL_ID}; do
+			if [ $count = $uuid ]; then
+				found=1
+				break
+			fi
+		done
+
+		if [ $found -eq 0 ]; then
+			UNIQ_ENCL_ID="${UNIQ_ENCL_ID} $uuid"
+		fi
+	done
+}
+
+# map_jbod explainer: The bsg driver knows the difference between a SAS
+# expander and fanout expander. Use hostX instance along with top-level
+# (whole enclosure) expander instances in /sys/class/enclosure and
+# matching a field in an array of expanders, using the index of the
+# matched array field as the enclosure instance, thereby making jbod IDs
+# dynamic. Avoids reliance on high overhead userspace commands like
+# multipath and lsscsi and instead uses existing sysfs data.  $HOSTCHAN
+# variable derived from devpath gymnastics in sas_handler() function.
+map_jbod() {
+	DEVEXP=$(ls -l "/sys/block/$DEV/device/" | grep enclos | awk -F/ '{print $(NF-1) }')
+	DEV=$1
+
+	# Use "set --" to create index values (Arrays)
+	set -- $(ls -l /sys/class/enclosure | grep -v "^total" | awk '{print $9}')
+	# Get count of total elements
+	JBOD_COUNT=$#
+	JBOD_ITEM=$*
+
+	# Build JBODs (enclosure)  id from sys/class/enclosure/<dev>/id
+	get_encl_id "$JBOD_ITEM"
+	# Different expander instances for each paths.
+	# Filter out and keep only unique id.
+	get_uniq_encl_id
+
+	# Identify final 'mapped jbod'
+	j=0
+	for count in ${UNIQ_ENCL_ID}; do
+		i=1
+		j=$((j + 1))
+		while [ $i -le $JBOD_COUNT ] ; do
+			d=$(eval echo '$'{$i})
+			id=$(cat "/sys/class/enclosure/${d}/id")
+			if [ "$d" = "$DEVEXP" ] && [ $id = $count ] ; then
+				MAPPED_JBOD=$j
+				break
+			fi
+			i=$((i + 1))
+		done
+	done
+
+	printf "%d" "${MAPPED_JBOD}"
 }
 
 sas_handler() {
 	if [ -z "$PHYS_PER_PORT" ] ; then
-		PHYS_PER_PORT=`awk "\\$1 == \"phys_per_port\" \
-			{print \\$2; exit}" $CONFIG`
+		PHYS_PER_PORT=$(awk '$1 == "phys_per_port" \
+			{print $2; exit}' $CONFIG)
 	fi
 	PHYS_PER_PORT=${PHYS_PER_PORT:-4}
-	if ! echo $PHYS_PER_PORT | grep -q -E '^[0-9]+$' ; then
+
+	if ! echo "$PHYS_PER_PORT" | grep -q -E '^[0-9]+$' ; then
 		echo "Error: phys_per_port value $PHYS_PER_PORT is non-numeric"
 		exit 1
 	fi
 
 	if [ -z "$MULTIPATH_MODE" ] ; then
-		MULTIPATH_MODE=`awk "\\$1 == \"multipath\" \
-			{print \\$2; exit}" $CONFIG`
+		MULTIPATH_MODE=$(awk '$1 == "multipath" \
+			{print $2; exit}' $CONFIG)
+	fi
+
+	if [ -z "$MULTIJBOD_MODE" ] ; then
+		MULTIJBOD_MODE=$(awk '$1 == "multijbod" \
+			{print $2; exit}' $CONFIG)
 	fi
 
 	# Use first running component device if we're handling a dm-mpath device
 	if [ "$MULTIPATH_MODE" = "yes" ] ; then
 		# If udev didn't tell us the UUID via DM_NAME, check /dev/mapper
 		if [ -z "$DM_NAME" ] ; then
-			DM_NAME=`ls -l --full-time /dev/mapper |
-				awk "/\/$DEV$/{print \\$9}"`
+			DM_NAME=$(ls -l --full-time /dev/mapper |
+				grep "$DEV"$ | awk '{print $9}')
 		fi
 
 		# For raw disks udev exports DEVTYPE=partition when
@@ -176,28 +287,50 @@
 		# we have to append the -part suffix directly in the
 		# helper.
 		if [ "$DEVTYPE" != "partition" ] ; then
-			PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'`
+			# Match p[number], remove the 'p' and prepend "-part"
+			PART=$(echo "$DM_NAME" |
+				awk 'match($0,/p[0-9]+$/) {print "-part"substr($0,RSTART+1,RLENGTH-1)}')
 		fi
 
 		# Strip off partition information.
-		DM_NAME=`echo $DM_NAME | sed 's/p[0-9][0-9]*$//'`
+		DM_NAME=$(echo "$DM_NAME" | sed 's/p[0-9][0-9]*$//')
 		if [ -z "$DM_NAME" ] ; then
 			return
 		fi
 
-		# Get the raw scsi device name from multipath -ll. Strip off
-		# leading pipe symbols to make field numbering consistent.
-		DEV=`multipath -ll $DM_NAME |
-			awk '/running/{gsub("^[|]"," "); print $3 ; exit}'`
+		# Utilize DM device name to gather subordinate block devices
+		# using sysfs to avoid userspace utilities
+
+		# If our DEVNAME is something like /dev/dm-177, then we may be
+		# able to get our DMDEV from it.
+		DMDEV=$(echo $DEVNAME | sed 's;/dev/;;g')
+		if [ ! -e /sys/block/$DMDEV/slaves/* ] ; then
+			# It's not there, try looking in /dev/mapper
+			DMDEV=$(ls -l --full-time /dev/mapper | grep $DM_NAME |
+			awk '{gsub("../", " "); print $NF}')
+		fi
+
+		# Use sysfs pointers in /sys/block/dm-X/slaves because using
+		# userspace tools creates lots of overhead and should be avoided
+		# whenever possible. Use awk to isolate lowest instance of
+		# sd device member in dm device group regardless of string
+		# length.
+		DEV=$(ls "/sys/block/$DMDEV/slaves" | awk '
+			{ len=sprintf ("%20s",length($0)); gsub(/ /,0,str); a[NR]=len "_" $0; }
+			END {
+				asort(a)
+				print substr(a[1],22)
+			}')
+
 		if [ -z "$DEV" ] ; then
 			return
 		fi
 	fi
 
-	if echo $DEV | grep -q ^/devices/ ; then
+	if echo "$DEV" | grep -q ^/devices/ ; then
 		sys_path=$DEV
 	else
-		sys_path=`udevadm info -q path -p /sys/block/$DEV 2>/dev/null`
+		sys_path=$(udevadm info -q path -p "/sys/block/$DEV" 2>/dev/null)
 	fi
 
 	# Use positional parameters as an ad-hoc array
@@ -207,84 +340,104 @@
 
 	# Get path up to /sys/.../hostX
 	i=1
-	while [ $i -le $num_dirs ] ; do
-		d=$(eval echo \${$i})
+
+	while [ $i -le "$num_dirs" ] ; do
+		d=$(eval echo '$'{$i})
 		scsi_host_dir="$scsi_host_dir/$d"
-		echo $d | grep -q -E '^host[0-9]+$' && break
-		i=$(($i + 1))
+		echo "$d" | grep -q -E '^host[0-9]+$' && break
+		i=$((i + 1))
 	done
 
-	if [ $i = $num_dirs ] ; then
+	# Lets grab the SAS host channel number and save it for JBOD sorting later
+	HOSTCHAN=$(echo "$d" | awk -F/ '{ gsub("host","",$NF); print $NF}')
+
+	if [ $i = "$num_dirs" ] ; then
 		return
 	fi
 
-	PCI_ID=$(eval echo \${$(($i -1))} | awk -F: '{print $2":"$3}')
+	PCI_ID=$(eval echo '$'{$((i -1))} | awk -F: '{print $2":"$3}')
 
 	# In sas_switch mode, the directory four levels beneath
 	# /sys/.../hostX contains symlinks to phy devices that reveal
 	# the switch port number.  In sas_direct mode, the phy links one
 	# directory down reveal the HBA port.
 	port_dir=$scsi_host_dir
+
 	case $TOPOLOGY in
-		"sas_switch") j=$(($i + 4)) ;;
-		"sas_direct") j=$(($i + 1)) ;;
+		"sas_switch") j=$((i + 4)) ;;
+		"sas_direct") j=$((i + 1)) ;;
 	esac
 
-	i=$(($i + 1))
+	i=$((i + 1))
+
 	while [ $i -le $j ] ; do
-		port_dir="$port_dir/$(eval echo \${$i})"
-		i=$(($i + 1))
+		port_dir="$port_dir/$(eval echo '$'{$i})"
+		i=$((i + 1))
 	done
 
-	PHY=`ls -d $port_dir/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}'`
+	PHY=$(ls -vd "$port_dir"/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}')
 	if [ -z "$PHY" ] ; then
 		PHY=0
 	fi
-	PORT=$(( $PHY / $PHYS_PER_PORT ))
+	PORT=$((PHY / PHYS_PER_PORT))
 
 	# Look in /sys/.../sas_device/end_device-X for the bay_identifier
 	# attribute.
 	end_device_dir=$port_dir
-	while [ $i -lt $num_dirs ] ; do
-		d=$(eval echo \${$i})
+
+	while [ $i -lt "$num_dirs" ] ; do
+		d=$(eval echo '$'{$i})
 		end_device_dir="$end_device_dir/$d"
-		if echo $d | grep -q '^end_device' ; then
+		if echo "$d" | grep -q '^end_device' ; then
 			end_device_dir="$end_device_dir/sas_device/$d"
 			break
 		fi
-		i=$(($i + 1))
+		i=$((i + 1))
 	done
 
+	# Add 'mix' slot type for environments where dm-multipath devices
+	# include end-devices connected via SAS expanders or direct connection
+	# to SAS HBA. A mixed connectivity environment such as pool devices
+	# contained in a SAS JBOD and spare drives or log devices directly
+	# connected in a server backplane without expanders in the I/O path.
 	SLOT=
+
 	case $BAY in
 	"bay")
-		SLOT=`cat $end_device_dir/bay_identifier 2>/dev/null`
+		SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null)
+		;;
+	"mix")
+		if [ $(cat "$end_device_dir/bay_identifier" 2>/dev/null) ] ; then
+			SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null)
+		else
+			SLOT=$(cat "$end_device_dir/phy_identifier" 2>/dev/null)
+		fi
 		;;
 	"phy")
-		SLOT=`cat $end_device_dir/phy_identifier 2>/dev/null`
+		SLOT=$(cat "$end_device_dir/phy_identifier" 2>/dev/null)
 		;;
 	"port")
-		d=$(eval echo \${$i})
-		SLOT=`echo $d | sed -e 's/^.*://'`
+		d=$(eval echo '$'{$i})
+		SLOT=$(echo "$d" | sed -e 's/^.*://')
 		;;
 	"id")
-		i=$(($i + 1))
-		d=$(eval echo \${$i})
-		SLOT=`echo $d | sed -e 's/^.*://'`
+		i=$((i + 1))
+		d=$(eval echo '$'{$i})
+		SLOT=$(echo "$d" | sed -e 's/^.*://')
 		;;
 	"lun")
-		i=$(($i + 2))
-		d=$(eval echo \${$i})
-		SLOT=`echo $d | sed -e 's/^.*://'`
+		i=$((i + 2))
+		d=$(eval echo '$'{$i})
+		SLOT=$(echo "$d" | sed -e 's/^.*://')
 		;;
 	"ses")
 		# look for this SAS path in all SCSI Enclosure Services
 		# (SES) enclosures
-		sas_address=`cat $end_device_dir/sas_address 2>/dev/null`
-		enclosures=`lsscsi -g | \
-			sed -n -e '/enclosu/s/^.* \([^ ][^ ]*\) *$/\1/p'`
+		sas_address=$(cat "$end_device_dir/sas_address" 2>/dev/null)
+		enclosures=$(lsscsi -g | \
+			sed -n -e '/enclosu/s/^.* \([^ ][^ ]*\) *$/\1/p')
 		for enclosure in $enclosures; do
-			set -- $(sg_ses -p aes $enclosure | \
+			set -- $(sg_ses -p aes "$enclosure" | \
 				awk "/device slot number:/{slot=\$12} \
 					/SAS address: $sas_address/\
 					{print slot}")
@@ -299,42 +452,55 @@
 		return
 	fi
 
-	CHAN=`map_channel $PCI_ID $PORT`
-	SLOT=`map_slot $SLOT $CHAN`
-	if [ -z "$CHAN" ] ; then
-		return
+	if [ "$MULTIJBOD_MODE" = "yes" ] ; then
+		CHAN=$(map_channel "$PCI_ID" "$PORT")
+		SLOT=$(map_slot "$SLOT" "$CHAN")
+		JBOD=$(map_jbod "$DEV")
+
+		if [ -z "$CHAN" ] ; then
+			return
+		fi
+		echo "${CHAN}"-"${JBOD}"-"${SLOT}${PART}"
+	else
+		CHAN=$(map_channel "$PCI_ID" "$PORT")
+		SLOT=$(map_slot "$SLOT" "$CHAN")
+
+		if [ -z "$CHAN" ] ; then
+			return
+		fi
+		echo "${CHAN}${SLOT}${PART}"
 	fi
-	echo ${CHAN}${SLOT}${PART}
 }
 
 scsi_handler() {
 	if [ -z "$FIRST_BAY_NUMBER" ] ; then
-		FIRST_BAY_NUMBER=`awk "\\$1 == \"first_bay_number\" \
-			{print \\$2; exit}" $CONFIG`
+		FIRST_BAY_NUMBER=$(awk '$1 == "first_bay_number" \
+			{print $2; exit}' $CONFIG)
 	fi
 	FIRST_BAY_NUMBER=${FIRST_BAY_NUMBER:-0}
 
 	if [ -z "$PHYS_PER_PORT" ] ; then
-		PHYS_PER_PORT=`awk "\\$1 == \"phys_per_port\" \
-			{print \\$2; exit}" $CONFIG`
+		PHYS_PER_PORT=$(awk '$1 == "phys_per_port" \
+			{print $2; exit}' $CONFIG)
 	fi
 	PHYS_PER_PORT=${PHYS_PER_PORT:-4}
-	if ! echo $PHYS_PER_PORT | grep -q -E '^[0-9]+$' ; then
+
+	if ! echo "$PHYS_PER_PORT" | grep -q -E '^[0-9]+$' ; then
 		echo "Error: phys_per_port value $PHYS_PER_PORT is non-numeric"
 		exit 1
 	fi
 
 	if [ -z "$MULTIPATH_MODE" ] ; then
-		MULTIPATH_MODE=`awk "\\$1 == \"multipath\" \
-			{print \\$2; exit}" $CONFIG`
+		MULTIPATH_MODE=$(awk '$1 == "multipath" \
+			{print $2; exit}' $CONFIG)
 	fi
 
 	# Use first running component device if we're handling a dm-mpath device
 	if [ "$MULTIPATH_MODE" = "yes" ] ; then
 		# If udev didn't tell us the UUID via DM_NAME, check /dev/mapper
 		if [ -z "$DM_NAME" ] ; then
-			DM_NAME=`ls -l --full-time /dev/mapper |
-				awk "/\/$DEV$/{print \\$9}"`
+			DM_NAME=$(ls -l --full-time /dev/mapper |
+				grep "$DEV"$ | awk '{print $9}')
 		fi
 
 		# For raw disks udev exports DEVTYPE=partition when
@@ -344,28 +510,30 @@
 		# we have to append the -part suffix directly in the
 		# helper.
 		if [ "$DEVTYPE" != "partition" ] ; then
-			PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'`
+			# Match p[number], remove the 'p' and prepend "-part"
+			PART=$(echo "$DM_NAME" |
+			    awk 'match($0,/p[0-9]+$/) {print "-part"substr($0,RSTART+1,RLENGTH-1)}')
 		fi
 
 		# Strip off partition information.
-		DM_NAME=`echo $DM_NAME | sed 's/p[0-9][0-9]*$//'`
+		DM_NAME=$(echo "$DM_NAME" | sed 's/p[0-9][0-9]*$//')
 		if [ -z "$DM_NAME" ] ; then
 			return
 		fi
 
 		# Get the raw scsi device name from multipath -ll. Strip off
 		# leading pipe symbols to make field numbering consistent.
-		DEV=`multipath -ll $DM_NAME |
-			awk '/running/{gsub("^[|]"," "); print $3 ; exit}'`
+		DEV=$(multipath -ll "$DM_NAME" |
+			awk '/running/{gsub("^[|]"," "); print $3 ; exit}')
 		if [ -z "$DEV" ] ; then
 			return
 		fi
 	fi
 
-	if echo $DEV | grep -q ^/devices/ ; then
+	if echo "$DEV" | grep -q ^/devices/ ; then
 		sys_path=$DEV
 	else
-		sys_path=`udevadm info -q path -p /sys/block/$DEV 2>/dev/null`
+		sys_path=$(udevadm info -q path -p "/sys/block/$DEV" 2>/dev/null)
 	fi
 
 	# expect sys_path like this, for example:
@@ -378,44 +546,47 @@
 
 	# Get path up to /sys/.../hostX
 	i=1
-	while [ $i -le $num_dirs ] ; do
-		d=$(eval echo \${$i})
+
+	while [ $i -le "$num_dirs" ] ; do
+		d=$(eval echo '$'{$i})
 		scsi_host_dir="$scsi_host_dir/$d"
-		echo $d | grep -q -E '^host[0-9]+$' && break
-		i=$(($i + 1))
+
+		echo "$d" | grep -q -E '^host[0-9]+$' && break
+		i=$((i + 1))
 	done
 
-	if [ $i = $num_dirs ] ; then
+	if [ $i = "$num_dirs" ] ; then
 		return
 	fi
 
-	PCI_ID=$(eval echo \${$(($i -1))} | awk -F: '{print $2":"$3}')
+	PCI_ID=$(eval echo '$'{$((i -1))} | awk -F: '{print $2":"$3}')
 
 	# In scsi mode, the directory two levels beneath
 	# /sys/.../hostX reveals the port and slot.
 	port_dir=$scsi_host_dir
-	j=$(($i + 2))
+	j=$((i + 2))
 
-	i=$(($i + 1))
+	i=$((i + 1))
 	while [ $i -le $j ] ; do
-		port_dir="$port_dir/$(eval echo \${$i})"
-		i=$(($i + 1))
+		port_dir="$port_dir/$(eval echo '$'{$i})"
+		i=$((i + 1))
 	done
 
-	set -- $(echo $port_dir | sed -e 's/^.*:\([^:]*\):\([^:]*\)$/\1 \2/')
+	set -- $(echo "$port_dir" | sed -e 's/^.*:\([^:]*\):\([^:]*\)$/\1 \2/')
 	PORT=$1
-	SLOT=$(($2 + $FIRST_BAY_NUMBER))
+	SLOT=$(($2 + FIRST_BAY_NUMBER))
 
 	if [ -z "$SLOT" ] ; then
 		return
 	fi
 
-	CHAN=`map_channel $PCI_ID $PORT`
-	SLOT=`map_slot $SLOT $CHAN`
+	CHAN=$(map_channel "$PCI_ID" "$PORT")
+	SLOT=$(map_slot "$SLOT" "$CHAN")
+
 	if [ -z "$CHAN" ] ; then
 		return
 	fi
-	echo ${CHAN}${SLOT}${PART}
+	echo "${CHAN}${SLOT}${PART}"
 }
 
 # Figure out the name for the enclosure symlink
@@ -425,8 +596,10 @@
 	# DEVPATH=/sys/devices/pci0000:00/0000:00:03.0/0000:05:00.0/host0/subsystem/devices/0:0:0:0/scsi_generic/sg0
 
 	# Get the enclosure ID ("0:0:0:0")
-	ENC=$(basename $(readlink -m "/sys/$DEVPATH/../.."))
-	if [ ! -d /sys/class/enclosure/$ENC ] ; then
+	ENC="${DEVPATH%/*}"
+	ENC="${ENC%/*}"
+	ENC="${ENC##*/}"
+	if [ ! -d "/sys/class/enclosure/$ENC" ] ; then
 		# Not an enclosure, bail out
 		return
 	fi
@@ -434,25 +607,26 @@
 	# Get the long sysfs device path to our enclosure. Looks like:
 	# /devices/pci0000:00/0000:00:03.0/0000:05:00.0/host0/port-0:0/ ... /enclosure/0:0:0:0
 
-	ENC_DEVICE=$(readlink /sys/class/enclosure/$ENC)
+	ENC_DEVICE=$(readlink "/sys/class/enclosure/$ENC")
 
 	# Grab the full path to the hosts port dir:
 	# /devices/pci0000:00/0000:00:03.0/0000:05:00.0/host0/port-0:0
-	PORT_DIR=$(echo $ENC_DEVICE | grep -Eo '.+host[0-9]+/port-[0-9]+:[0-9]+')
+	PORT_DIR=$(echo "$ENC_DEVICE" | grep -Eo '.+host[0-9]+/port-[0-9]+:[0-9]+')
 
 	# Get the port number
-	PORT_ID=$(echo $PORT_DIR | grep -Eo "[0-9]+$")
+	PORT_ID=$(echo "$PORT_DIR" | grep -Eo "[0-9]+$")
 
 	# The PCI directory is two directories up from the port directory
 	# /sys/devices/pci0000:00/0000:00:03.0/0000:05:00.0
-	PCI_ID_LONG=$(basename $(readlink -m "/sys/$PORT_DIR/../.."))
+	PCI_ID_LONG="$(readlink -m "/sys/$PORT_DIR/../..")"
+	PCI_ID_LONG="${PCI_ID_LONG##*/}"
 
 	# Strip down the PCI address from 0000:05:00.0 to 05:00.0
-	PCI_ID=$(echo "$PCI_ID_LONG" | sed -r 's/^[0-9]+://g')
+	PCI_ID="${PCI_ID_LONG#[0-9]*:}"
 
 	# Name our device according to vdev_id.conf (like "L0" or "U1").
 	NAME=$(awk "/channel/{if (\$1 == \"channel\" && \$2 == \"$PCI_ID\" && \
-		\$3 == \"$PORT_ID\") {print \$4int(count[\$4])}; count[\$4]++}" $CONFIG)
+		\$3 == \"$PORT_ID\") {print \$4\$3}}" $CONFIG)
 
 	echo "${NAME}"
 }
@@ -487,10 +661,12 @@
 	#          digits as partitions, causing alias creation to fail. This
 	#          ambiguity seems unavoidable, so devices using this facility
 	#          must not use such names.
-	local DM_PART=
-	if echo $DM_NAME | grep -q -E 'p[0-9][0-9]*$' ; then
+	DM_PART=
+	if echo "$DM_NAME" | grep -q -E 'p[0-9][0-9]*$' ; then
 		if [ "$DEVTYPE" != "partition" ] ; then
-			DM_PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'`
+			# Match p[number], remove the 'p' and prepend "-part"
+			DM_PART=$(echo "$DM_NAME" |
+			    awk 'match($0,/p[0-9]+$/) {print "-part"substr($0,RSTART+1,RLENGTH-1)}')
 		fi
 	fi
 
@@ -498,21 +674,25 @@
 	for link in $DEVLINKS ; do
 		# Remove partition information to match key of top-level device.
 		if [ -n "$DM_PART" ] ; then
-			link=`echo $link | sed 's/p[0-9][0-9]*$//'`
+			link=$(echo "$link" | sed 's/p[0-9][0-9]*$//')
 		fi
 		# Check both the fully qualified and the base name of link.
-		for l in $link `basename $link` ; do
-			alias=`awk "\\$1 == \"alias\" && \\$3 == \"${l}\" \
-					{ print \\$2; exit }" $CONFIG`
-			if [ -n "$alias" ] ; then
-				echo ${alias}${DM_PART}
-				return
+		for l in $link ${link##*/} ; do
+			if [ ! -z "$l" ]; then
+				alias=$(awk -v var="$l" '($1 == "alias") && \
+					($3 == var) \
+					{ print $2; exit }' $CONFIG)
+				if [ -n "$alias" ] ; then
+					echo "${alias}${DM_PART}"
+					return
+				fi
 			fi
 		done
 	done
 }
 
-while getopts 'c:d:eg:mp:h' OPTION; do
+# main
+while getopts 'c:d:eg:jmp:h' OPTION; do
 	case ${OPTION} in
 	c)
 		CONFIG=${OPTARG}
@@ -525,7 +705,9 @@
 	# create the enclosure device symlinks only.  We also need
 	# "enclosure_symlinks yes" set in vdev_id.config to actually create the
 	# symlink.
-	ENCLOSURE_MODE=$(awk '{if ($1 == "enclosure_symlinks") print $2}' $CONFIG)
+	ENCLOSURE_MODE=$(awk '{if ($1 == "enclosure_symlinks") \
+		print $2}' "$CONFIG")
+
 	if [ "$ENCLOSURE_MODE" != "yes" ] ; then
 		exit 0
 	fi
@@ -536,6 +718,9 @@
 	p)
 		PHYS_PER_PORT=${OPTARG}
 		;;
+	j)
+		MULTIJBOD_MODE=yes
+		;;
 	m)
 		MULTIPATH_MODE=yes
 		;;
@@ -545,34 +730,35 @@
 	esac
 done
 
-if [ ! -r $CONFIG ] ; then
-	exit 0
+if [ ! -r "$CONFIG" ] ; then
+	echo "Error: Config file \"$CONFIG\" not found"
+	exit 1
 fi
 
-if [ -z "$DEV" -a -z "$ENCLOSURE_MODE" ] ; then
+if [ -z "$DEV" ] && [ -z "$ENCLOSURE_MODE" ] ; then
 	echo "Error: missing required option -d"
 	exit 1
 fi
 
 if [ -z "$TOPOLOGY" ] ; then
-	TOPOLOGY=`awk "\\$1 == \"topology\" {print \\$2; exit}" $CONFIG`
+	TOPOLOGY=$(awk '($1 == "topology") {print $2; exit}' "$CONFIG")
 fi
 
 if [ -z "$BAY" ] ; then
-	BAY=`awk "\\$1 == \"slot\" {print \\$2; exit}" $CONFIG`
+	BAY=$(awk '($1 == "slot") {print $2; exit}' "$CONFIG")
 fi
 
 TOPOLOGY=${TOPOLOGY:-sas_direct}
 
 # Should we create /dev/by-enclosure symlinks?
-if [ "$ENCLOSURE_MODE" = "yes" -a "$TOPOLOGY" = "sas_direct" ] ; then
+if [ "$ENCLOSURE_MODE" = "yes" ] && [ "$TOPOLOGY" = "sas_direct" ] ; then
 	ID_ENCLOSURE=$(enclosure_handler)
 	if [ -z "$ID_ENCLOSURE" ] ; then
 		exit 0
 	fi
 
 	# Just create the symlinks to the enclosure devices and then exit.
-	ENCLOSURE_PREFIX=$(awk '/enclosure_symlinks_prefix/{print $2}' $CONFIG)
+	ENCLOSURE_PREFIX=$(awk '/enclosure_symlinks_prefix/{print $2}' "$CONFIG")
 	if [ -z "$ENCLOSURE_PREFIX" ] ; then
 		ENCLOSURE_PREFIX="enc"
 	fi
@@ -582,16 +768,16 @@
 fi
 
 # First check if an alias was defined for this device.
-ID_VDEV=`alias_handler`
+ID_VDEV=$(alias_handler)
 
 if [ -z "$ID_VDEV" ] ; then
 	BAY=${BAY:-bay}
 	case $TOPOLOGY in
 		sas_direct|sas_switch)
-			ID_VDEV=`sas_handler`
+			ID_VDEV=$(sas_handler)
 			;;
 		scsi)
-			ID_VDEV=`scsi_handler`
+			ID_VDEV=$(scsi_handler)
 			;;
 		*)
 			echo "Error: unknown topology $TOPOLOGY"

diff --git a/zfs/cmd/zdb/Makefile.am b/zfs/cmd/zdb/Makefile.am
index 1fa7ec6..c5858c2 100644
--- a/zfs/cmd/zdb/Makefile.am
+++ b/zfs/cmd/zdb/Makefile.am

@@ -1,11 +1,7 @@
 include $(top_srcdir)/config/Rules.am
 
 # Unconditionally enable debugging for zdb
-AM_CPPFLAGS += -DDEBUG -UNDEBUG
-
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
 
 sbin_PROGRAMS = zdb
 
@@ -15,5 +11,8 @@
 	zdb.h
 
 zdb_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzpool/libzpool.la
+	$(abs_top_builddir)/lib/libzpool/libzpool.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zdb/zdb.c b/zfs/cmd/zdb/zdb.c
index 0182c79..bcd520d 100644
--- a/zfs/cmd/zdb/zdb.c
+++ b/zfs/cmd/zdb/zdb.c

@@ -21,17 +21,21 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Nexenta Systems, Inc.
  * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
  * Copyright (c) 2015, 2017, Intel Corporation.
- * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2020 Datto Inc.
+ * Copyright (c) 2020, The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ *     under sponsorship from the FreeBSD Foundation.
+ * Copyright (c) 2021 Allan Jude
  */
 
 #include <stdio.h>
 #include <unistd.h>
-#include <stdio_ext.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <sys/zfs_context.h>
@@ -51,23 +55,28 @@
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_pool.h>
+#include <sys/dsl_bookmark.h>
 #include <sys/dbuf.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
+#include <sys/dmu_send.h>
 #include <sys/dmu_traverse.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfs_fuid.h>
 #include <sys/arc.h>
+#include <sys/arc_impl.h>
 #include <sys/ddt.h>
 #include <sys/zfeature.h>
 #include <sys/abd.h>
 #include <sys/blkptr.h>
 #include <sys/dsl_crypt.h>
 #include <sys/dsl_scan.h>
+#include <sys/btree.h>
 #include <zfs_comutil.h>
+#include <sys/zstd/zstd.h>
 
 #include <libnvpair.h>
 #include <libzutil.h>
@@ -98,25 +107,650 @@
 
 extern int reference_tracking_enable;
 extern int zfs_recover;
-extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
+extern unsigned long zfs_arc_meta_min, zfs_arc_meta_limit;
 extern int zfs_vdev_async_read_max_active;
 extern boolean_t spa_load_verify_dryrun;
+extern boolean_t spa_mode_readable_spacemaps;
 extern int zfs_reconstruct_indirect_combinations_max;
+extern uint_t zfs_btree_verify_intensity;
 
 static const char cmdname[] = "zdb";
 uint8_t dump_opt[256];
 
 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
 
-uint64_t *zopt_object = NULL;
-static unsigned zopt_objects = 0;
+uint64_t *zopt_metaslab = NULL;
+static unsigned zopt_metaslab_args = 0;
+
+typedef struct zopt_object_range {
+	uint64_t zor_obj_start;
+	uint64_t zor_obj_end;
+	uint64_t zor_flags;
+} zopt_object_range_t;
+zopt_object_range_t *zopt_object_ranges = NULL;
+static unsigned zopt_object_args = 0;
+
+static int flagbits[256];
+
+#define	ZOR_FLAG_PLAIN_FILE	0x0001
+#define	ZOR_FLAG_DIRECTORY	0x0002
+#define	ZOR_FLAG_SPACE_MAP	0x0004
+#define	ZOR_FLAG_ZAP		0x0008
+#define	ZOR_FLAG_ALL_TYPES	-1
+#define	ZOR_SUPPORTED_FLAGS	(ZOR_FLAG_PLAIN_FILE	| \
+				ZOR_FLAG_DIRECTORY	| \
+				ZOR_FLAG_SPACE_MAP	| \
+				ZOR_FLAG_ZAP)
+
+#define	ZDB_FLAG_CHECKSUM	0x0001
+#define	ZDB_FLAG_DECOMPRESS	0x0002
+#define	ZDB_FLAG_BSWAP		0x0004
+#define	ZDB_FLAG_GBH		0x0008
+#define	ZDB_FLAG_INDIRECT	0x0010
+#define	ZDB_FLAG_RAW		0x0020
+#define	ZDB_FLAG_PRINT_BLKPTR	0x0040
+#define	ZDB_FLAG_VERBOSE	0x0080
+
 uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
 static int leaked_objects = 0;
 static range_tree_t *mos_refd_objs;
 
-static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
+static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
+    boolean_t);
 static void mos_obj_refd(uint64_t);
 static void mos_obj_refd_multiple(uint64_t);
+static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
+    dmu_tx_t *tx);
+
+typedef struct sublivelist_verify {
+	/* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
+	zfs_btree_t sv_pair;
+
+	/* ALLOC's without a matching FREE, accumulates across sub-livelists */
+	zfs_btree_t sv_leftover;
+} sublivelist_verify_t;
+
+static int
+livelist_compare(const void *larg, const void *rarg)
+{
+	const blkptr_t *l = larg;
+	const blkptr_t *r = rarg;
+
+	/* Sort them according to dva[0] */
+	uint64_t l_dva0_vdev, r_dva0_vdev;
+	l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
+	r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
+	if (l_dva0_vdev < r_dva0_vdev)
+		return (-1);
+	else if (l_dva0_vdev > r_dva0_vdev)
+		return (+1);
+
+	/* if vdevs are equal, sort by offsets. */
+	uint64_t l_dva0_offset;
+	uint64_t r_dva0_offset;
+	l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
+	r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
+	if (l_dva0_offset < r_dva0_offset) {
+		return (-1);
+	} else if (l_dva0_offset > r_dva0_offset) {
+		return (+1);
+	}
+
+	/*
+	 * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
+	 * it's possible the offsets are equal. In that case, sort by txg
+	 */
+	if (l->blk_birth < r->blk_birth) {
+		return (-1);
+	} else if (l->blk_birth > r->blk_birth) {
+		return (+1);
+	}
+	return (0);
+}
+
+typedef struct sublivelist_verify_block {
+	dva_t svb_dva;
+
+	/*
+	 * We need this to check if the block marked as allocated
+	 * in the livelist was freed (and potentially reallocated)
+	 * in the metaslab spacemaps at a later TXG.
+	 */
+	uint64_t svb_allocated_txg;
+} sublivelist_verify_block_t;
+
+static void zdb_print_blkptr(const blkptr_t *bp, int flags);
+
+typedef struct sublivelist_verify_block_refcnt {
+	/* block pointer entry in livelist being verified */
+	blkptr_t svbr_blk;
+
+	/*
+	 * Refcount gets incremented to 1 when we encounter the first
+	 * FREE entry for the svfbr block pointer and a node for it
+	 * is created in our ZDB verification/tracking metadata.
+	 *
+	 * As we encounter more FREE entries we increment this counter
+	 * and similarly decrement it whenever we find the respective
+	 * ALLOC entries for this block.
+	 *
+	 * When the refcount gets to 0 it means that all the FREE and
+	 * ALLOC entries of this block have paired up and we no longer
+	 * need to track it in our verification logic (e.g. the node
+	 * containing this struct in our verification data structure
+	 * should be freed).
+	 *
+	 * [refer to sublivelist_verify_blkptr() for the actual code]
+	 */
+	uint32_t svbr_refcnt;
+} sublivelist_verify_block_refcnt_t;
+
+static int
+sublivelist_block_refcnt_compare(const void *larg, const void *rarg)
+{
+	const sublivelist_verify_block_refcnt_t *l = larg;
+	const sublivelist_verify_block_refcnt_t *r = rarg;
+	return (livelist_compare(&l->svbr_blk, &r->svbr_blk));
+}
+
+static int
+sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
+    dmu_tx_t *tx)
+{
+	ASSERT3P(tx, ==, NULL);
+	struct sublivelist_verify *sv = arg;
+	sublivelist_verify_block_refcnt_t current = {
+			.svbr_blk = *bp,
+
+			/*
+			 * Start with 1 in case this is the first free entry.
+			 * This field is not used for our B-Tree comparisons
+			 * anyway.
+			 */
+			.svbr_refcnt = 1,
+	};
+
+	zfs_btree_index_t where;
+	sublivelist_verify_block_refcnt_t *pair =
+	    zfs_btree_find(&sv->sv_pair, &current, &where);
+	if (free) {
+		if (pair == NULL) {
+			/* first free entry for this block pointer */
+			zfs_btree_add(&sv->sv_pair, &current);
+		} else {
+			pair->svbr_refcnt++;
+		}
+	} else {
+		if (pair == NULL) {
+			/* block that is currently marked as allocated */
+			for (int i = 0; i < SPA_DVAS_PER_BP; i++) {
+				if (DVA_IS_EMPTY(&bp->blk_dva[i]))
+					break;
+				sublivelist_verify_block_t svb = {
+				    .svb_dva = bp->blk_dva[i],
+				    .svb_allocated_txg = bp->blk_birth
+				};
+
+				if (zfs_btree_find(&sv->sv_leftover, &svb,
+				    &where) == NULL) {
+					zfs_btree_add_idx(&sv->sv_leftover,
+					    &svb, &where);
+				}
+			}
+		} else {
+			/* alloc matches a free entry */
+			pair->svbr_refcnt--;
+			if (pair->svbr_refcnt == 0) {
+				/* all allocs and frees have been matched */
+				zfs_btree_remove_idx(&sv->sv_pair, &where);
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
+{
+	int err;
+	struct sublivelist_verify *sv = args;
+
+	zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
+	    sizeof (sublivelist_verify_block_refcnt_t));
+
+	err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
+	    sv, NULL);
+
+	sublivelist_verify_block_refcnt_t *e;
+	zfs_btree_index_t *cookie = NULL;
+	while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) {
+		char blkbuf[BP_SPRINTF_LEN];
+		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf),
+		    &e->svbr_blk, B_TRUE);
+		(void) printf("\tERROR: %d unmatched FREE(s): %s\n",
+		    e->svbr_refcnt, blkbuf);
+	}
+	zfs_btree_destroy(&sv->sv_pair);
+
+	return (err);
+}
+
+static int
+livelist_block_compare(const void *larg, const void *rarg)
+{
+	const sublivelist_verify_block_t *l = larg;
+	const sublivelist_verify_block_t *r = rarg;
+
+	if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva))
+		return (-1);
+	else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva))
+		return (+1);
+
+	if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva))
+		return (-1);
+	else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva))
+		return (+1);
+
+	if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva))
+		return (-1);
+	else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva))
+		return (+1);
+
+	return (0);
+}
+
+/*
+ * Check for errors in a livelist while tracking all unfreed ALLOCs in the
+ * sublivelist_verify_t: sv->sv_leftover
+ */
+static void
+livelist_verify(dsl_deadlist_t *dl, void *arg)
+{
+	sublivelist_verify_t *sv = arg;
+	dsl_deadlist_iterate(dl, sublivelist_verify_func, sv);
+}
+
+/*
+ * Check for errors in the livelist entry and discard the intermediary
+ * data structures
+ */
+/* ARGSUSED */
+static int
+sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
+{
+	sublivelist_verify_t sv;
+	zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+	    sizeof (sublivelist_verify_block_t));
+	int err = sublivelist_verify_func(&sv, dle);
+	zfs_btree_clear(&sv.sv_leftover);
+	zfs_btree_destroy(&sv.sv_leftover);
+	return (err);
+}
+
+typedef struct metaslab_verify {
+	/*
+	 * Tree containing all the leftover ALLOCs from the livelists
+	 * that are part of this metaslab.
+	 */
+	zfs_btree_t mv_livelist_allocs;
+
+	/*
+	 * Metaslab information.
+	 */
+	uint64_t mv_vdid;
+	uint64_t mv_msid;
+	uint64_t mv_start;
+	uint64_t mv_end;
+
+	/*
+	 * What's currently allocated for this metaslab.
+	 */
+	range_tree_t *mv_allocated;
+} metaslab_verify_t;
+
+typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
+
+typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg,
+    void *arg);
+
+typedef struct unflushed_iter_cb_arg {
+	spa_t *uic_spa;
+	uint64_t uic_txg;
+	void *uic_arg;
+	zdb_log_sm_cb_t uic_cb;
+} unflushed_iter_cb_arg_t;
+
+static int
+iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
+{
+	unflushed_iter_cb_arg_t *uic = arg;
+	return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
+}
+
+static void
+iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
+{
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+		return;
+
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+		space_map_t *sm = NULL;
+		VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
+		    sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
+
+		unflushed_iter_cb_arg_t uic = {
+			.uic_spa = spa,
+			.uic_txg = sls->sls_txg,
+			.uic_arg = arg,
+			.uic_cb = cb
+		};
+		VERIFY0(space_map_iterate(sm, space_map_length(sm),
+		    iterate_through_spacemap_logs_cb, &uic));
+		space_map_close(sm);
+	}
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
+}
+
+static void
+verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
+    uint64_t offset, uint64_t size)
+{
+	sublivelist_verify_block_t svb;
+	DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
+	DVA_SET_OFFSET(&svb.svb_dva, offset);
+	DVA_SET_ASIZE(&svb.svb_dva, size);
+	zfs_btree_index_t where;
+	uint64_t end_offset = offset + size;
+
+	/*
+	 *  Look for an exact match for spacemap entry in the livelist entries.
+	 *  Then, look for other livelist entries that fall within the range
+	 *  of the spacemap entry as it may have been condensed
+	 */
+	sublivelist_verify_block_t *found =
+	    zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where);
+	if (found == NULL) {
+		found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where);
+	}
+	for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid &&
+	    DVA_GET_OFFSET(&found->svb_dva) < end_offset;
+	    found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
+		if (found->svb_allocated_txg <= txg) {
+			(void) printf("ERROR: Livelist ALLOC [%llx:%llx] "
+			    "from TXG %llx FREED at TXG %llx\n",
+			    (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva),
+			    (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
+			    (u_longlong_t)found->svb_allocated_txg,
+			    (u_longlong_t)txg);
+		}
+	}
+}
+
+static int
+metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
+{
+	metaslab_verify_t *mv = arg;
+	uint64_t offset = sme->sme_offset;
+	uint64_t size = sme->sme_run;
+	uint64_t txg = sme->sme_txg;
+
+	if (sme->sme_type == SM_ALLOC) {
+		if (range_tree_contains(mv->mv_allocated,
+		    offset, size)) {
+			(void) printf("ERROR: DOUBLE ALLOC: "
+			    "%llu [%llx:%llx] "
+			    "%llu:%llu LOG_SM\n",
+			    (u_longlong_t)txg, (u_longlong_t)offset,
+			    (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
+			    (u_longlong_t)mv->mv_msid);
+		} else {
+			range_tree_add(mv->mv_allocated,
+			    offset, size);
+		}
+	} else {
+		if (!range_tree_contains(mv->mv_allocated,
+		    offset, size)) {
+			(void) printf("ERROR: DOUBLE FREE: "
+			    "%llu [%llx:%llx] "
+			    "%llu:%llu LOG_SM\n",
+			    (u_longlong_t)txg, (u_longlong_t)offset,
+			    (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
+			    (u_longlong_t)mv->mv_msid);
+		} else {
+			range_tree_remove(mv->mv_allocated,
+			    offset, size);
+		}
+	}
+
+	if (sme->sme_type != SM_ALLOC) {
+		/*
+		 * If something is freed in the spacemap, verify that
+		 * it is not listed as allocated in the livelist.
+		 */
+		verify_livelist_allocs(mv, txg, offset, size);
+	}
+	return (0);
+}
+
+static int
+spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme,
+    uint64_t txg, void *arg)
+{
+	metaslab_verify_t *mv = arg;
+	uint64_t offset = sme->sme_offset;
+	uint64_t vdev_id = sme->sme_vdev;
+
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+
+	/* skip indirect vdevs */
+	if (!vdev_is_concrete(vd))
+		return (0);
+
+	if (vdev_id != mv->mv_vdid)
+		return (0);
+
+	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+	if (ms->ms_id != mv->mv_msid)
+		return (0);
+
+	if (txg < metaslab_unflushed_txg(ms))
+		return (0);
+
+
+	ASSERT3U(txg, ==, sme->sme_txg);
+	return (metaslab_spacemap_validation_cb(sme, mv));
+}
+
+static void
+spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv)
+{
+	iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv);
+}
+
+static void
+spacemap_check_ms_sm(space_map_t  *sm, metaslab_verify_t *mv)
+{
+	if (sm == NULL)
+		return;
+
+	VERIFY0(space_map_iterate(sm, space_map_length(sm),
+	    metaslab_spacemap_validation_cb, mv));
+}
+
+static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg);
+
+/*
+ * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if
+ * they are part of that metaslab (mv_msid).
+ */
+static void
+mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
+{
+	zfs_btree_index_t where;
+	sublivelist_verify_block_t *svb;
+	ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0);
+	for (svb = zfs_btree_first(&sv->sv_leftover, &where);
+	    svb != NULL;
+	    svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) {
+		if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid)
+			continue;
+
+		if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start &&
+		    (DVA_GET_OFFSET(&svb->svb_dva) +
+		    DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) {
+			(void) printf("ERROR: Found block that crosses "
+			    "metaslab boundary: <%llu:%llx:%llx>\n",
+			    (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
+			    (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+			    (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+			continue;
+		}
+
+		if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start)
+			continue;
+
+		if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end)
+			continue;
+
+		if ((DVA_GET_OFFSET(&svb->svb_dva) +
+		    DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) {
+			(void) printf("ERROR: Found block that crosses "
+			    "metaslab boundary: <%llu:%llx:%llx>\n",
+			    (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
+			    (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+			    (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+			continue;
+		}
+
+		zfs_btree_add(&mv->mv_livelist_allocs, svb);
+	}
+
+	for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where);
+	    svb != NULL;
+	    svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
+		zfs_btree_remove(&sv->sv_leftover, svb);
+	}
+}
+
+/*
+ * [Livelist Check]
+ * Iterate through all the sublivelists and:
+ * - report leftover frees (**)
+ * - record leftover ALLOCs together with their TXG [see Cross Check]
+ *
+ * (**) Note: Double ALLOCs are valid in datasets that have dedup
+ *      enabled. Similarly double FREEs are allowed as well but
+ *      only if they pair up with a corresponding ALLOC entry once
+ *      we our done with our sublivelist iteration.
+ *
+ * [Spacemap Check]
+ * for each metaslab:
+ * - iterate over spacemap and then the metaslab's entries in the
+ *   spacemap log, then report any double FREEs and ALLOCs (do not
+ *   blow up).
+ *
+ * [Cross Check]
+ * After finishing the Livelist Check phase and while being in the
+ * Spacemap Check phase, we find all the recorded leftover ALLOCs
+ * of the livelist check that are part of the metaslab that we are
+ * currently looking at in the Spacemap Check. We report any entries
+ * that are marked as ALLOCs in the livelists but have been actually
+ * freed (and potentially allocated again) after their TXG stamp in
+ * the spacemaps. Also report any ALLOCs from the livelists that
+ * belong to indirect vdevs (e.g. their vdev completed removal).
+ *
+ * Note that this will miss Log Spacemap entries that cancelled each other
+ * out before being flushed to the metaslab, so we are not guaranteed
+ * to match all erroneous ALLOCs.
+ */
+static void
+livelist_metaslab_validate(spa_t *spa)
+{
+	(void) printf("Verifying deleted livelist entries\n");
+
+	sublivelist_verify_t sv;
+	zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+	    sizeof (sublivelist_verify_block_t));
+	iterate_deleted_livelists(spa, livelist_verify, &sv);
+
+	(void) printf("Verifying metaslab entries\n");
+	vdev_t *rvd = spa->spa_root_vdev;
+	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+		vdev_t *vd = rvd->vdev_child[c];
+
+		if (!vdev_is_concrete(vd))
+			continue;
+
+		for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) {
+			metaslab_t *m = vd->vdev_ms[mid];
+
+			(void) fprintf(stderr,
+			    "\rverifying concrete vdev %llu, "
+			    "metaslab %llu of %llu ...",
+			    (longlong_t)vd->vdev_id,
+			    (longlong_t)mid,
+			    (longlong_t)vd->vdev_ms_count);
+
+			uint64_t shift, start;
+			range_seg_type_t type =
+			    metaslab_calculate_range_tree_type(vd, m,
+			    &start, &shift);
+			metaslab_verify_t mv;
+			mv.mv_allocated = range_tree_create(NULL,
+			    type, NULL, start, shift);
+			mv.mv_vdid = vd->vdev_id;
+			mv.mv_msid = m->ms_id;
+			mv.mv_start = m->ms_start;
+			mv.mv_end = m->ms_start + m->ms_size;
+			zfs_btree_create(&mv.mv_livelist_allocs,
+			    livelist_block_compare,
+			    sizeof (sublivelist_verify_block_t));
+
+			mv_populate_livelist_allocs(&mv, &sv);
+
+			spacemap_check_ms_sm(m->ms_sm, &mv);
+			spacemap_check_sm_log(spa, &mv);
+
+			range_tree_vacate(mv.mv_allocated, NULL, NULL);
+			range_tree_destroy(mv.mv_allocated);
+			zfs_btree_clear(&mv.mv_livelist_allocs);
+			zfs_btree_destroy(&mv.mv_livelist_allocs);
+		}
+	}
+	(void) fprintf(stderr, "\n");
+
+	/*
+	 * If there are any segments in the leftover tree after we walked
+	 * through all the metaslabs in the concrete vdevs then this means
+	 * that we have segments in the livelists that belong to indirect
+	 * vdevs and are marked as allocated.
+	 */
+	if (zfs_btree_numnodes(&sv.sv_leftover) == 0) {
+		zfs_btree_destroy(&sv.sv_leftover);
+		return;
+	}
+	(void) printf("ERROR: Found livelist blocks marked as allocated "
+	    "for indirect vdevs:\n");
+
+	zfs_btree_index_t *where = NULL;
+	sublivelist_verify_block_t *svb;
+	while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) !=
+	    NULL) {
+		int vdev_id = DVA_GET_VDEV(&svb->svb_dva);
+		ASSERT3U(vdev_id, <, rvd->vdev_children);
+		vdev_t *vd = rvd->vdev_child[vdev_id];
+		ASSERT(!vdev_is_concrete(vd));
+		(void) printf("<%d:%llx:%llx> TXG %llx\n",
+		    vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+		    (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva),
+		    (u_longlong_t)svb->svb_allocated_txg);
+	}
+	(void) printf("\n");
+	zfs_btree_destroy(&sv.sv_leftover);
+}
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -138,31 +772,45 @@
 usage(void)
 {
 	(void) fprintf(stderr,
-	    "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] "
+	    "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] "
 	    "[-I <inflight I/Os>]\n"
 	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
-	    "\t\t[<poolname> [<object> ...]]\n"
-	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset>\n"
-	    "\t\t[<object> ...]\n"
+	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
+	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
+	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
+	    "\t%s [-v] <bookmark>\n"
 	    "\t%s -C [-A] [-U <cache>]\n"
 	    "\t%s -l [-Aqu] <device>\n"
 	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 	    "\t%s -O <dataset> <path>\n"
+	    "\t%s -r <dataset> <path> <destination>\n"
 	    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 	    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 	    "\t%s -E [-A] word0:word1:...:word15\n"
 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 	    "<poolname>\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
-	    cmdname, cmdname);
+	    cmdname, cmdname, cmdname, cmdname);
 
 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
 	(void) fprintf(stderr, "    If dataset name is specified, only that "
 	    "dataset is dumped\n");
-	(void) fprintf(stderr, "    If object numbers are specified, only "
-	    "those objects are dumped\n\n");
+	(void) fprintf(stderr,  "    If object numbers or object number "
+	    "ranges are specified, only those\n"
+	    "    objects or ranges are dumped.\n\n");
+	(void) fprintf(stderr,
+	    "    Object ranges take the form <start>:<end>[:<flags>]\n"
+	    "        start    Starting object number\n"
+	    "        end      Ending object number, or -1 for no upper bound\n"
+	    "        flags    Optional flags to select object types:\n"
+	    "            A     All objects (this is the default)\n"
+	    "            d     ZFS directories\n"
+	    "            f     ZFS files \n"
+	    "            m     SPA space maps\n"
+	    "            z     ZAPs\n"
+	    "            -     Negate effect of next flag\n\n");
 	(void) fprintf(stderr, "    Options to control amount of output:\n");
 	(void) fprintf(stderr, "        -b block statistics\n");
 	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
@@ -182,12 +830,15 @@
 	(void) fprintf(stderr, "        -m metaslabs\n");
 	(void) fprintf(stderr, "        -M metaslab groups\n");
 	(void) fprintf(stderr, "        -O perform object lookups by path\n");
+	(void) fprintf(stderr, "        -r copy an object by path to file\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n");
 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
 	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 	(void) fprintf(stderr, "        -v verbose (applies to all "
-	    "others)\n\n");
+	    "others)\n");
+	(void) fprintf(stderr, "        -y perform livelist and metaslab "
+	    "validation on any livelists being deleted\n\n");
 	(void) fprintf(stderr, "    Below options are intended for use "
 	    "with other options:\n");
 	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
@@ -219,6 +870,7 @@
 	    "work with dataset)\n");
 	(void) fprintf(stderr, "        -Y attempt all reconstruction "
 	    "combinations for split blocks\n");
+	(void) fprintf(stderr, "        -Z show ZSTD headers \n");
 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 	    "to make only that option verbose\n");
 	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@@ -421,6 +1073,57 @@
 static void
 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
 {
+	uint64_t *arr;
+	uint64_t oursize;
+	if (dump_opt['d'] < 6)
+		return;
+
+	if (data == NULL) {
+		dmu_object_info_t doi;
+
+		VERIFY0(dmu_object_info(os, object, &doi));
+		size = doi.doi_max_offset;
+		/*
+		 * We cap the size at 1 mebibyte here to prevent
+		 * allocation failures and nigh-infinite printing if the
+		 * object is extremely large.
+		 */
+		oursize = MIN(size, 1 << 20);
+		arr = kmem_alloc(oursize, KM_SLEEP);
+
+		int err = dmu_read(os, object, 0, oursize, arr, 0);
+		if (err != 0) {
+			(void) printf("got error %u from dmu_read\n", err);
+			kmem_free(arr, oursize);
+			return;
+		}
+	} else {
+		/*
+		 * Even though the allocation is already done in this code path,
+		 * we still cap the size to prevent excessive printing.
+		 */
+		oursize = MIN(size, 1 << 20);
+		arr = data;
+	}
+
+	if (size == 0) {
+		(void) printf("\t\t[]\n");
+		return;
+	}
+
+	(void) printf("\t\t[%0llx", (u_longlong_t)arr[0]);
+	for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) {
+		if (i % 4 != 0)
+			(void) printf(", %0llx", (u_longlong_t)arr[i]);
+		else
+			(void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]);
+	}
+	if (oursize != size)
+		(void) printf(", ... ");
+	(void) printf("]\n");
+
+	if (data == NULL)
+		kmem_free(arr, oursize);
 }
 
 /*ARGSUSED*/
@@ -448,7 +1151,21 @@
 		(void) zap_lookup(os, object, attr.za_name,
 		    attr.za_integer_length, attr.za_num_integers, prop);
 		if (attr.za_integer_length == 1) {
-			(void) printf("%s", (char *)prop);
+			if (strcmp(attr.za_name,
+			    DSL_CRYPTO_KEY_MASTER_KEY) == 0 ||
+			    strcmp(attr.za_name,
+			    DSL_CRYPTO_KEY_HMAC_KEY) == 0 ||
+			    strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 ||
+			    strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 ||
+			    strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) {
+				uint8_t *u8 = prop;
+
+				for (i = 0; i < attr.za_num_integers; i++) {
+					(void) printf("%02x", u8[i]);
+				}
+			} else {
+				(void) printf("%s", (char *)prop);
+			}
 		} else {
 			for (i = 0; i < attr.za_num_integers; i++) {
 				switch (attr.za_integer_length) {
@@ -499,12 +1216,16 @@
 		(void) printf("\t\tcomp = %s\n", comp);
 		(void) printf("\t\tuncomp = %s\n", uncomp);
 	}
-	if (size >= sizeof (*bpop)) {
+	if (size >= BPOBJ_SIZE_V2) {
 		(void) printf("\t\tsubobjs = %llu\n",
 		    (u_longlong_t)bpop->bpo_subobjs);
 		(void) printf("\t\tnum_subobjs = %llu\n",
 		    (u_longlong_t)bpop->bpo_num_subobjs);
 	}
+	if (size >= sizeof (*bpop)) {
+		(void) printf("\t\tnum_freed = %llu\n",
+		    (u_longlong_t)bpop->bpo_num_freed);
+	}
 
 	if (dump_opt['d'] < 5)
 		return;
@@ -519,7 +1240,8 @@
 			(void) printf("got error %u from dmu_read\n", err);
 			break;
 		}
-		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
+		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp,
+		    BP_GET_FREE(&bp));
 		(void) printf("\t%s\n", blkbuf);
 	}
 }
@@ -760,6 +1482,12 @@
 }
 
 static int
+get_log_spacemap_refcount(spa_t *spa)
+{
+	return (avl_numnodes(&spa->spa_sm_logs_by_txg));
+}
+
+static int
 verify_spacemap_refcounts(spa_t *spa)
 {
 	uint64_t expected_refcount = 0;
@@ -773,6 +1501,7 @@
 	actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
 	actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
 	actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
+	actual_refcount += get_log_spacemap_refcount(spa);
 
 	if (expected_refcount != actual_refcount) {
 		(void) printf("space map refcount mismatch: expected %lld != "
@@ -816,11 +1545,20 @@
 		    sizeof (word), &word, DMU_READ_PREFETCH));
 
 		if (sm_entry_is_debug(word)) {
-			(void) printf("\t    [%6llu] %s: txg %llu pass %llu\n",
-			    (u_longlong_t)entry_id,
-			    ddata[SM_DEBUG_ACTION_DECODE(word)],
-			    (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
-			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+			uint64_t de_txg = SM_DEBUG_TXG_DECODE(word);
+			uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word);
+			if (de_txg == 0) {
+				(void) printf(
+				    "\t    [%6llu] PADDING\n",
+				    (u_longlong_t)entry_id);
+			} else {
+				(void) printf(
+				    "\t    [%6llu] %s: txg %llu pass %llu\n",
+				    (u_longlong_t)entry_id,
+				    ddata[SM_DEBUG_ACTION_DECODE(word)],
+				    (u_longlong_t)de_txg,
+				    (u_longlong_t)de_sync_pass);
+			}
 			entry_id++;
 			continue;
 		}
@@ -871,7 +1609,7 @@
 			alloc -= entry_run;
 		entry_id++;
 	}
-	if ((uint64_t)alloc != space_map_allocated(sm)) {
+	if (alloc != space_map_allocated(sm)) {
 		(void) printf("space_map_object alloc (%lld) INCONSISTENT "
 		    "with space map summary (%lld)\n",
 		    (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
@@ -883,16 +1621,16 @@
 {
 	char maxbuf[32];
 	range_tree_t *rt = msp->ms_allocatable;
-	avl_tree_t *t = &msp->ms_allocatable_by_size;
+	zfs_btree_t *t = &msp->ms_allocatable_by_size;
 	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
 
 	/* max sure nicenum has enough space */
 	CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 
-	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
+	zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf));
 
 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
-	    "segments", avl_numnodes(t), "maxsize", maxbuf,
+	    "segments", zfs_btree_numnodes(t), "maxsize", maxbuf,
 	    "freepct", free_pct);
 	(void) printf("\tIn-memory histogram:\n");
 	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
@@ -935,25 +1673,51 @@
 		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
 	}
 
-	ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
+	if (vd->vdev_ops == &vdev_draid_ops)
+		ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
+	else
+		ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift);
+
 	dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
+
+	if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
+		(void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n",
+		    (u_longlong_t)metaslab_unflushed_txg(msp));
+	}
 }
 
 static void
 print_vdev_metaslab_header(vdev_t *vd)
 {
 	vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
-	const char *bias_str;
+	const char *bias_str = "";
+	if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) {
+		bias_str = VDEV_ALLOC_BIAS_LOG;
+	} else if (alloc_bias == VDEV_BIAS_SPECIAL) {
+		bias_str = VDEV_ALLOC_BIAS_SPECIAL;
+	} else if (alloc_bias == VDEV_BIAS_DEDUP) {
+		bias_str = VDEV_ALLOC_BIAS_DEDUP;
+	}
 
-	bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
-	    VDEV_ALLOC_BIAS_LOG :
-	    (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
-	    (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
-	    vd->vdev_islog ? "log" : "";
+	uint64_t ms_flush_data_obj = 0;
+	if (vd->vdev_top_zap != 0) {
+		int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
+		    vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
+		    sizeof (uint64_t), 1, &ms_flush_data_obj);
+		if (error != ENOENT) {
+			ASSERT0(error);
+		}
+	}
 
-	(void) printf("\tvdev %10llu   %s\n"
-	    "\t%-10s%5llu   %-19s   %-15s   %-12s\n",
-	    (u_longlong_t)vd->vdev_id, bias_str,
+	(void) printf("\tvdev %10llu   %s",
+	    (u_longlong_t)vd->vdev_id, bias_str);
+
+	if (ms_flush_data_obj != 0) {
+		(void) printf("   ms_unflushed_phys object %llu",
+		    (u_longlong_t)ms_flush_data_obj);
+	}
+
+	(void) printf("\n\t%-10s%5llu   %-19s   %-15s   %-12s\n",
 	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
 	    "offset", "spacemap", "free");
 	(void) printf("\t%15s   %19s   %15s   %12s\n",
@@ -1083,24 +1847,24 @@
 
 	(void) printf("\nMetaslabs:\n");
 
-	if (!dump_opt['d'] && zopt_objects > 0) {
-		c = zopt_object[0];
+	if (!dump_opt['d'] && zopt_metaslab_args > 0) {
+		c = zopt_metaslab[0];
 
 		if (c >= children)
 			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
 
-		if (zopt_objects > 1) {
+		if (zopt_metaslab_args > 1) {
 			vd = rvd->vdev_child[c];
 			print_vdev_metaslab_header(vd);
 
-			for (m = 1; m < zopt_objects; m++) {
-				if (zopt_object[m] < vd->vdev_ms_count)
+			for (m = 1; m < zopt_metaslab_args; m++) {
+				if (zopt_metaslab[m] < vd->vdev_ms_count)
 					dump_metaslab(
-					    vd->vdev_ms[zopt_object[m]]);
+					    vd->vdev_ms[zopt_metaslab[m]]);
 				else
 					(void) fprintf(stderr, "bad metaslab "
 					    "number %llu\n",
-					    (u_longlong_t)zopt_object[m]);
+					    (u_longlong_t)zopt_metaslab[m]);
 			}
 			(void) printf("\n");
 			return;
@@ -1120,6 +1884,27 @@
 }
 
 static void
+dump_log_spacemaps(spa_t *spa)
+{
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+		return;
+
+	(void) printf("\nLog Space Maps in Pool:\n");
+	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+		space_map_t *sm = NULL;
+		VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
+		    sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
+
+		(void) printf("Log Spacemap object %llu txg %llu\n",
+		    (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg);
+		dump_spacemap(spa->spa_meta_objset, sm);
+		space_map_close(sm);
+	}
+	(void) printf("\n");
+}
+
+static void
 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
 {
 	const ddt_phys_t *ddp = dde->dde_phys;
@@ -1405,7 +2190,69 @@
 }
 
 static void
-snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
+snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
+    const blkptr_t *bp)
+{
+	abd_t *pabd;
+	void *buf;
+	zio_t *zio;
+	zfs_zstdhdr_t zstd_hdr;
+	int error;
+
+	if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD)
+		return;
+
+	if (BP_IS_HOLE(bp))
+		return;
+
+	if (BP_IS_EMBEDDED(bp)) {
+		buf = malloc(SPA_MAXBLOCKSIZE);
+		if (buf == NULL) {
+			(void) fprintf(stderr, "out of memory\n");
+			exit(1);
+		}
+		decode_embedded_bp_compressed(bp, buf);
+		memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
+		free(buf);
+		zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
+		zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
+		(void) snprintf(blkbuf + strlen(blkbuf),
+		    buflen - strlen(blkbuf),
+		    " ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
+		    zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
+		    zfs_get_hdrlevel(&zstd_hdr));
+		return;
+	}
+
+	pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
+	zio = zio_root(spa, NULL, NULL, 0);
+
+	/* Decrypt but don't decompress so we can read the compression header */
+	zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL,
+	    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS,
+	    NULL));
+	error = zio_wait(zio);
+	if (error) {
+		(void) fprintf(stderr, "read failed: %d\n", error);
+		return;
+	}
+	buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp));
+	memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
+	zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
+	zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
+
+	(void) snprintf(blkbuf + strlen(blkbuf),
+	    buflen - strlen(blkbuf),
+	    " ZSTD:size=%u:version=%u:level=%u:NORMAL",
+	    zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
+	    zfs_get_hdrlevel(&zstd_hdr));
+
+	abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
+}
+
+static void
+snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
+    boolean_t bp_freed)
 {
 	const dva_t *dva = bp->blk_dva;
 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
@@ -1413,6 +2260,10 @@
 
 	if (dump_opt['b'] >= 6) {
 		snprintf_blkptr(blkbuf, buflen, bp);
+		if (bp_freed) {
+			(void) snprintf(blkbuf + strlen(blkbuf),
+			    buflen - strlen(blkbuf), " %s", "FREE");
+		}
 		return;
 	}
 
@@ -1450,6 +2301,9 @@
 		    (u_longlong_t)BP_GET_FILL(bp),
 		    (u_longlong_t)bp->blk_birth,
 		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
+		if (bp_freed)
+			(void) snprintf(blkbuf + strlen(blkbuf),
+			    buflen - strlen(blkbuf), " %s", "FREE");
 		(void) snprintf(blkbuf + strlen(blkbuf),
 		    buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx",
 		    (u_longlong_t)bp->blk_cksum.zc_word[0],
@@ -1460,7 +2314,7 @@
 }
 
 static void
-print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
+print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
     const dnode_phys_t *dnp)
 {
 	char blkbuf[BP_SPRINTF_LEN];
@@ -1483,7 +2337,9 @@
 		}
 	}
 
-	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
+	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
+	if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
+		snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
 	(void) printf("%s\n", blkbuf);
 }
 
@@ -1496,7 +2352,7 @@
 	if (bp->blk_birth == 0)
 		return (0);
 
-	print_indirect(bp, zb, dnp);
+	print_indirect(spa, bp, zb, dnp);
 
 	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
@@ -1505,6 +2361,7 @@
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 		arc_buf_t *buf;
 		uint64_t fill = 0;
+		ASSERT(!BP_IS_REDACTED(bp));
 
 		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
@@ -1717,12 +2574,12 @@
 
 /* ARGSUSED */
 static int
-dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	ASSERT(bp->blk_birth != 0);
-	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
+	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed);
 	(void) printf("\t%s\n", blkbuf);
 	return (0);
 }
@@ -1747,14 +2604,28 @@
 	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
 		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
 		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
-		(void) printf("    %*s: object %llu, %llu local blkptrs, "
-		    "%llu subobjs in object, %llu, %s (%s/%s comp)\n",
-		    indent * 8, name,
-		    (u_longlong_t)bpo->bpo_object,
-		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
-		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
-		    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
-		    bytes, comp, uncomp);
+		if (bpo->bpo_havefreed) {
+			(void) printf("    %*s: object %llu, %llu local "
+			    "blkptrs, %llu freed, %llu subobjs in object %llu, "
+			    "%s (%s/%s comp)\n",
+			    indent * 8, name,
+			    (u_longlong_t)bpo->bpo_object,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_freed,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
+			    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
+			    bytes, comp, uncomp);
+		} else {
+			(void) printf("    %*s: object %llu, %llu local "
+			    "blkptrs, %llu subobjs in object %llu, "
+			    "%s (%s/%s comp)\n",
+			    indent * 8, name,
+			    (u_longlong_t)bpo->bpo_object,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
+			    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
+			    bytes, comp, uncomp);
+		}
 
 		for (i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
 			uint64_t subobj;
@@ -1774,11 +2645,22 @@
 			bpobj_close(&subbpo);
 		}
 	} else {
-		(void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
-		    indent * 8, name,
-		    (u_longlong_t)bpo->bpo_object,
-		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
-		    bytes);
+		if (bpo->bpo_havefreed) {
+			(void) printf("    %*s: object %llu, %llu blkptrs, "
+			    "%llu freed, %s\n",
+			    indent * 8, name,
+			    (u_longlong_t)bpo->bpo_object,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_freed,
+			    bytes);
+		} else {
+			(void) printf("    %*s: object %llu, %llu blkptrs, "
+			    "%s\n",
+			    indent * 8, name,
+			    (u_longlong_t)bpo->bpo_object,
+			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+			    bytes);
+		}
 	}
 
 	if (dump_opt['d'] < 5)
@@ -1791,6 +2673,128 @@
 	}
 }
 
+static int
+dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact,
+    boolean_t print_list)
+{
+	int err = 0;
+	zfs_bookmark_phys_t prop;
+	objset_t *mos = dp->dp_spa->spa_meta_objset;
+	err = dsl_bookmark_lookup(dp, name, NULL, &prop);
+
+	if (err != 0) {
+		return (err);
+	}
+
+	(void) printf("\t#%s: ", strchr(name, '#') + 1);
+	(void) printf("{guid: %llx creation_txg: %llu creation_time: "
+	    "%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid,
+	    (u_longlong_t)prop.zbm_creation_txg,
+	    (u_longlong_t)prop.zbm_creation_time,
+	    (u_longlong_t)prop.zbm_redaction_obj);
+
+	IMPLY(print_list, print_redact);
+	if (!print_redact || prop.zbm_redaction_obj == 0)
+		return (0);
+
+	redaction_list_t *rl;
+	VERIFY0(dsl_redaction_list_hold_obj(dp,
+	    prop.zbm_redaction_obj, FTAG, &rl));
+
+	redaction_list_phys_t *rlp = rl->rl_phys;
+	(void) printf("\tRedacted:\n\t\tProgress: ");
+	if (rlp->rlp_last_object != UINT64_MAX ||
+	    rlp->rlp_last_blkid != UINT64_MAX) {
+		(void) printf("%llu %llu (incomplete)\n",
+		    (u_longlong_t)rlp->rlp_last_object,
+		    (u_longlong_t)rlp->rlp_last_blkid);
+	} else {
+		(void) printf("complete\n");
+	}
+	(void) printf("\t\tSnapshots: [");
+	for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) {
+		if (i > 0)
+			(void) printf(", ");
+		(void) printf("%0llu",
+		    (u_longlong_t)rlp->rlp_snaps[i]);
+	}
+	(void) printf("]\n\t\tLength: %llu\n",
+	    (u_longlong_t)rlp->rlp_num_entries);
+
+	if (!print_list) {
+		dsl_redaction_list_rele(rl, FTAG);
+		return (0);
+	}
+
+	if (rlp->rlp_num_entries == 0) {
+		dsl_redaction_list_rele(rl, FTAG);
+		(void) printf("\t\tRedaction List: []\n\n");
+		return (0);
+	}
+
+	redact_block_phys_t *rbp_buf;
+	uint64_t size;
+	dmu_object_info_t doi;
+
+	VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi));
+	size = doi.doi_max_offset;
+	rbp_buf = kmem_alloc(size, KM_SLEEP);
+
+	err = dmu_read(mos, prop.zbm_redaction_obj, 0, size,
+	    rbp_buf, 0);
+	if (err != 0) {
+		dsl_redaction_list_rele(rl, FTAG);
+		kmem_free(rbp_buf, size);
+		return (err);
+	}
+
+	(void) printf("\t\tRedaction List: [{object: %llx, offset: "
+	    "%llx, blksz: %x, count: %llx}",
+	    (u_longlong_t)rbp_buf[0].rbp_object,
+	    (u_longlong_t)rbp_buf[0].rbp_blkid,
+	    (uint_t)(redact_block_get_size(&rbp_buf[0])),
+	    (u_longlong_t)redact_block_get_count(&rbp_buf[0]));
+
+	for (size_t i = 1; i < rlp->rlp_num_entries; i++) {
+		(void) printf(",\n\t\t{object: %llx, offset: %llx, "
+		    "blksz: %x, count: %llx}",
+		    (u_longlong_t)rbp_buf[i].rbp_object,
+		    (u_longlong_t)rbp_buf[i].rbp_blkid,
+		    (uint_t)(redact_block_get_size(&rbp_buf[i])),
+		    (u_longlong_t)redact_block_get_count(&rbp_buf[i]));
+	}
+	dsl_redaction_list_rele(rl, FTAG);
+	kmem_free(rbp_buf, size);
+	(void) printf("]\n\n");
+	return (0);
+}
+
+static void
+dump_bookmarks(objset_t *os, int verbosity)
+{
+	zap_cursor_t zc;
+	zap_attribute_t attr;
+	dsl_dataset_t *ds = dmu_objset_ds(os);
+	dsl_pool_t *dp = spa_get_dsl(os->os_spa);
+	objset_t *mos = os->os_spa->spa_meta_objset;
+	if (verbosity < 4)
+		return;
+	dsl_pool_config_enter(dp, FTAG);
+
+	for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj);
+	    zap_cursor_retrieve(&zc, &attr) == 0;
+	    zap_cursor_advance(&zc)) {
+		char osname[ZFS_MAX_DATASET_NAME_LEN];
+		char buf[ZFS_MAX_DATASET_NAME_LEN];
+		dmu_objset_name(os, osname);
+		VERIFY3S(0, <=, snprintf(buf, sizeof (buf), "%s#%s", osname,
+		    attr.za_name));
+		(void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6);
+	}
+	zap_cursor_fini(&zc);
+	dsl_pool_config_exit(dp, FTAG);
+}
+
 static void
 bpobj_count_refd(bpobj_t *bpo)
 {
@@ -1818,36 +2822,59 @@
 	}
 }
 
-static void
-dump_deadlist(dsl_deadlist_t *dl)
+static int
+dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle)
 {
-	dsl_deadlist_entry_t *dle;
-	uint64_t unused;
+	spa_t *spa = arg;
+	uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj;
+	if (dle->dle_bpobj.bpo_object != empty_bpobj)
+		bpobj_count_refd(&dle->dle_bpobj);
+	return (0);
+}
+
+static int
+dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle)
+{
+	ASSERT(arg == NULL);
+	if (dump_opt['d'] >= 5) {
+		char buf[128];
+		(void) snprintf(buf, sizeof (buf),
+		    "mintxg %llu -> obj %llu",
+		    (longlong_t)dle->dle_mintxg,
+		    (longlong_t)dle->dle_bpobj.bpo_object);
+
+		dump_full_bpobj(&dle->dle_bpobj, buf, 0);
+	} else {
+		(void) printf("mintxg %llu -> obj %llu\n",
+		    (longlong_t)dle->dle_mintxg,
+		    (longlong_t)dle->dle_bpobj.bpo_object);
+	}
+	return (0);
+}
+
+static void
+dump_blkptr_list(dsl_deadlist_t *dl, char *name)
+{
 	char bytes[32];
 	char comp[32];
 	char uncomp[32];
-	uint64_t empty_bpobj =
-	    dmu_objset_spa(dl->dl_os)->spa_dsl_pool->dp_empty_bpobj;
-
-	/* force the tree to be loaded */
-	dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
+	char entries[32];
+	spa_t *spa = dmu_objset_spa(dl->dl_os);
+	uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj;
 
 	if (dl->dl_oldfmt) {
 		if (dl->dl_bpobj.bpo_object != empty_bpobj)
 			bpobj_count_refd(&dl->dl_bpobj);
 	} else {
 		mos_obj_refd(dl->dl_object);
-		for (dle = avl_first(&dl->dl_tree); dle;
-		    dle = AVL_NEXT(&dl->dl_tree, dle)) {
-			if (dle->dle_bpobj.bpo_object != empty_bpobj)
-				bpobj_count_refd(&dle->dle_bpobj);
-		}
+		dsl_deadlist_iterate(dl, dsl_deadlist_entry_count_refd, spa);
 	}
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
+	CTASSERT(sizeof (entries) >= NN_NUMBUF_SZ);
 
 	if (dump_opt['d'] < 3)
 		return;
@@ -1860,30 +2887,65 @@
 	zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
 	zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
 	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
-	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
-	    bytes, comp, uncomp);
+	zdb_nicenum(avl_numnodes(&dl->dl_tree), entries, sizeof (entries));
+	(void) printf("\n    %s: %s (%s/%s comp), %s entries\n",
+	    name, bytes, comp, uncomp, entries);
 
 	if (dump_opt['d'] < 4)
 		return;
 
 	(void) printf("\n");
 
-	for (dle = avl_first(&dl->dl_tree); dle;
-	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
-		if (dump_opt['d'] >= 5) {
-			char buf[128];
-			(void) snprintf(buf, sizeof (buf),
-			    "mintxg %llu -> obj %llu",
-			    (longlong_t)dle->dle_mintxg,
-			    (longlong_t)dle->dle_bpobj.bpo_object);
+	dsl_deadlist_iterate(dl, dsl_deadlist_entry_dump, NULL);
+}
 
-			dump_full_bpobj(&dle->dle_bpobj, buf, 0);
-		} else {
-			(void) printf("mintxg %llu -> obj %llu\n",
-			    (longlong_t)dle->dle_mintxg,
-			    (longlong_t)dle->dle_bpobj.bpo_object);
-		}
+static int
+verify_dd_livelist(objset_t *os)
+{
+	uint64_t ll_used, used, ll_comp, comp, ll_uncomp, uncomp;
+	dsl_pool_t *dp = spa_get_dsl(os->os_spa);
+	dsl_dir_t  *dd = os->os_dsl_dataset->ds_dir;
+
+	ASSERT(!dmu_objset_is_snapshot(os));
+	if (!dsl_deadlist_is_open(&dd->dd_livelist))
+		return (0);
+
+	/* Iterate through the livelist to check for duplicates */
+	dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight,
+	    NULL);
+
+	dsl_pool_config_enter(dp, FTAG);
+	dsl_deadlist_space(&dd->dd_livelist, &ll_used,
+	    &ll_comp, &ll_uncomp);
+
+	dsl_dataset_t *origin_ds;
+	ASSERT(dsl_pool_config_held(dp));
+	VERIFY0(dsl_dataset_hold_obj(dp,
+	    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_ds));
+	VERIFY0(dsl_dataset_space_written(origin_ds, os->os_dsl_dataset,
+	    &used, &comp, &uncomp));
+	dsl_dataset_rele(origin_ds, FTAG);
+	dsl_pool_config_exit(dp, FTAG);
+	/*
+	 *  It's possible that the dataset's uncomp space is larger than the
+	 *  livelist's because livelists do not track embedded block pointers
+	 */
+	if (used != ll_used || comp != ll_comp || uncomp < ll_uncomp) {
+		char nice_used[32], nice_comp[32], nice_uncomp[32];
+		(void) printf("Discrepancy in space accounting:\n");
+		zdb_nicenum(used, nice_used, sizeof (nice_used));
+		zdb_nicenum(comp, nice_comp, sizeof (nice_comp));
+		zdb_nicenum(uncomp, nice_uncomp, sizeof (nice_uncomp));
+		(void) printf("dir: used %s, comp %s, uncomp %s\n",
+		    nice_used, nice_comp, nice_uncomp);
+		zdb_nicenum(ll_used, nice_used, sizeof (nice_used));
+		zdb_nicenum(ll_comp, nice_comp, sizeof (nice_comp));
+		zdb_nicenum(ll_uncomp, nice_uncomp, sizeof (nice_uncomp));
+		(void) printf("livelist: used %s, comp %s, uncomp %s\n",
+		    nice_used, nice_comp, nice_uncomp);
+		return (1);
 	}
+	return (0);
 }
 
 static avl_tree_t idx_tree;
@@ -1893,19 +2955,26 @@
 static sa_attr_type_t *sa_attr_table = NULL;
 
 static int
-open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
+open_objset(const char *path, void *tag, objset_t **osp)
 {
 	int err;
 	uint64_t sa_attrs = 0;
 	uint64_t version = 0;
 
 	VERIFY3P(sa_os, ==, NULL);
-	err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp);
+	/*
+	 * We can't own an objset if it's redacted.  Therefore, we do this
+	 * dance: hold the objset, then acquire a long hold on its dataset, then
+	 * release the pool (which is held as part of holding the objset).
+	 */
+	err = dmu_objset_hold(path, tag, osp);
 	if (err != 0) {
-		(void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
-		    strerror(err));
+		(void) fprintf(stderr, "failed to hold dataset '%s': %s\n",
+		    path, strerror(err));
 		return (err);
 	}
+	dsl_dataset_long_hold(dmu_objset_ds(*osp), tag);
+	dsl_pool_rele(dmu_objset_pool(*osp), tag);
 
 	if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) {
 		(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
@@ -1919,13 +2988,14 @@
 		if (err != 0) {
 			(void) fprintf(stderr, "sa_setup failed: %s\n",
 			    strerror(err));
-			dmu_objset_disown(*osp, B_FALSE, tag);
+			dsl_dataset_long_rele(dmu_objset_ds(*osp), tag);
+			dsl_dataset_rele(dmu_objset_ds(*osp), tag);
 			*osp = NULL;
 		}
 	}
 	sa_os = *osp;
 
-	return (0);
+	return (err);
 }
 
 static void
@@ -1934,7 +3004,8 @@
 	VERIFY3P(os, ==, sa_os);
 	if (os->os_sa != NULL)
 		sa_tear_down(os);
-	dmu_objset_disown(os, B_FALSE, tag);
+	dsl_dataset_long_rele(dmu_objset_ds(os), tag);
+	dsl_dataset_rele(dmu_objset_ds(os), tag);
 	sa_attr_table = NULL;
 	sa_os = NULL;
 }
@@ -2031,13 +3102,22 @@
 	(void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
 	    sa_xattr_size, sa_xattr_entries);
 	while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
+		boolean_t can_print = !dump_opt['P'];
 		uchar_t *value;
 		uint_t cnt, idx;
 
 		(void) printf("\t\t%s = ", nvpair_name(elem));
 		nvpair_value_byte_array(elem, &value, &cnt);
+
 		for (idx = 0; idx < cnt; ++idx) {
-			if (isprint(value[idx]))
+			if (!isprint(value[idx])) {
+				can_print = B_FALSE;
+				break;
+			}
+		}
+
+		for (idx = 0; idx < cnt; ++idx) {
+			if (can_print)
 				(void) putchar(value[idx]);
 			else
 				(void) printf("\\%3.3o", value[idx]);
@@ -2049,6 +3129,28 @@
 	free(sa_xattr_packed);
 }
 
+static void
+dump_znode_symlink(sa_handle_t *hdl)
+{
+	int sa_symlink_size = 0;
+	char linktarget[MAXPATHLEN];
+	int error;
+
+	error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size);
+	if (error || sa_symlink_size == 0) {
+		return;
+	}
+	if (sa_symlink_size >= sizeof (linktarget)) {
+		(void) printf("symlink size %d is too large\n",
+		    sa_symlink_size);
+		return;
+	}
+	linktarget[sa_symlink_size] = '\0';
+	if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK],
+	    &linktarget, sa_symlink_size) == 0)
+		(void) printf("\ttarget	%s\n", linktarget);
+}
+
 /*ARGSUSED*/
 static void
 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
@@ -2113,6 +3215,9 @@
 		}
 		(void) printf("\tpath	%s\n", path);
 	}
+
+	if (S_ISLNK(mode))
+		dump_znode_symlink(hdl);
 	dump_uidgid(os, uid, gid);
 	(void) printf("\tatime	%s", ctime(&z_atime));
 	(void) printf("\tmtime	%s", ctime(&z_mtime));
@@ -2211,9 +3316,49 @@
 	dump_unknown,		/* Unknown type, must be last	*/
 };
 
+static boolean_t
+match_object_type(dmu_object_type_t obj_type, uint64_t flags)
+{
+	boolean_t match = B_TRUE;
+
+	switch (obj_type) {
+	case DMU_OT_DIRECTORY_CONTENTS:
+		if (!(flags & ZOR_FLAG_DIRECTORY))
+			match = B_FALSE;
+		break;
+	case DMU_OT_PLAIN_FILE_CONTENTS:
+		if (!(flags & ZOR_FLAG_PLAIN_FILE))
+			match = B_FALSE;
+		break;
+	case DMU_OT_SPACE_MAP:
+		if (!(flags & ZOR_FLAG_SPACE_MAP))
+			match = B_FALSE;
+		break;
+	default:
+		if (strcmp(zdb_ot_name(obj_type), "zap") == 0) {
+			if (!(flags & ZOR_FLAG_ZAP))
+				match = B_FALSE;
+			break;
+		}
+
+		/*
+		 * If all bits except some of the supported flags are
+		 * set, the user combined the all-types flag (A) with
+		 * a negated flag to exclude some types (e.g. A-f to
+		 * show all object types except plain files).
+		 */
+		if ((flags | ZOR_SUPPORTED_FLAGS) != ZOR_FLAG_ALL_TYPES)
+			match = B_FALSE;
+
+		break;
+	}
+
+	return (match);
+}
+
 static void
-dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
-    uint64_t *dnode_slots_used)
+dump_object(objset_t *os, uint64_t object, int verbosity,
+    boolean_t *print_header, uint64_t *dnode_slots_used, uint64_t flags)
 {
 	dmu_buf_t *db = NULL;
 	dmu_object_info_t doi;
@@ -2270,6 +3415,13 @@
 		}
 	}
 
+	/*
+	 * Default to showing all object types if no flags were specified.
+	 */
+	if (flags != 0 && flags != ZOR_FLAG_ALL_TYPES &&
+	    !match_object_type(doi.doi_type, flags))
+		goto out;
+
 	if (dnode_slots_used)
 		*dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
 
@@ -2290,7 +3442,25 @@
 		    " (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum));
 	}
 
-	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
+	if (doi.doi_compress == ZIO_COMPRESS_INHERIT &&
+	    ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) {
+		const char *compname = NULL;
+		if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION,
+		    ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel),
+		    &compname) == 0) {
+			(void) snprintf(aux + strlen(aux),
+			    sizeof (aux) - strlen(aux), " (Z=inherit=%s)",
+			    compname);
+		} else {
+			(void) snprintf(aux + strlen(aux),
+			    sizeof (aux) - strlen(aux),
+			    " (Z=inherit=%s-unknown)",
+			    ZDB_COMPRESS_NAME(os->os_compress));
+		}
+	} else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) {
+		(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
+		    " (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress));
+	} else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
 		(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
 		    " (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress));
 	}
@@ -2332,7 +3502,7 @@
 			(void) printf("\t\t(object encrypted)\n");
 		}
 
-		*print_header = 1;
+		*print_header = B_TRUE;
 	}
 
 	if (verbosity >= 5)
@@ -2373,6 +3543,7 @@
 		}
 	}
 
+out:
 	if (db != NULL)
 		dmu_buf_rele(db, FTAG);
 	if (dnode_held)
@@ -2403,6 +3574,7 @@
 	mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj);
 	mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj);
 	mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
+	mos_obj_refd(ds->ds_bookmarks_obj);
 
 	if (!dsl_dataset_is_snapshot(ds)) {
 		count_dir_mos_objects(ds->ds_dir);
@@ -2412,8 +3584,112 @@
 static const char *objset_types[DMU_OST_NUMTYPES] = {
 	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
 
+/*
+ * Parse a string denoting a range of object IDs of the form
+ * <start>[:<end>[:flags]], and store the results in zor.
+ * Return 0 on success. On error, return 1 and update the msg
+ * pointer to point to a descriptive error message.
+ */
+static int
+parse_object_range(char *range, zopt_object_range_t *zor, char **msg)
+{
+	uint64_t flags = 0;
+	char *p, *s, *dup, *flagstr;
+	size_t len;
+	int i;
+	int rc = 0;
+
+	if (strchr(range, ':') == NULL) {
+		zor->zor_obj_start = strtoull(range, &p, 0);
+		if (*p != '\0') {
+			*msg = "Invalid characters in object ID";
+			rc = 1;
+		}
+		zor->zor_obj_end = zor->zor_obj_start;
+		return (rc);
+	}
+
+	if (strchr(range, ':') == range) {
+		*msg = "Invalid leading colon";
+		rc = 1;
+		return (rc);
+	}
+
+	len = strlen(range);
+	if (range[len - 1] == ':') {
+		*msg = "Invalid trailing colon";
+		rc = 1;
+		return (rc);
+	}
+
+	dup = strdup(range);
+	s = strtok(dup, ":");
+	zor->zor_obj_start = strtoull(s, &p, 0);
+
+	if (*p != '\0') {
+		*msg = "Invalid characters in start object ID";
+		rc = 1;
+		goto out;
+	}
+
+	s = strtok(NULL, ":");
+	zor->zor_obj_end = strtoull(s, &p, 0);
+
+	if (*p != '\0') {
+		*msg = "Invalid characters in end object ID";
+		rc = 1;
+		goto out;
+	}
+
+	if (zor->zor_obj_start > zor->zor_obj_end) {
+		*msg = "Start object ID may not exceed end object ID";
+		rc = 1;
+		goto out;
+	}
+
+	s = strtok(NULL, ":");
+	if (s == NULL) {
+		zor->zor_flags = ZOR_FLAG_ALL_TYPES;
+		goto out;
+	} else if (strtok(NULL, ":") != NULL) {
+		*msg = "Invalid colon-delimited field after flags";
+		rc = 1;
+		goto out;
+	}
+
+	flagstr = s;
+	for (i = 0; flagstr[i]; i++) {
+		int bit;
+		boolean_t negation = (flagstr[i] == '-');
+
+		if (negation) {
+			i++;
+			if (flagstr[i] == '\0') {
+				*msg = "Invalid trailing negation operator";
+				rc = 1;
+				goto out;
+			}
+		}
+		bit = flagbits[(uchar_t)flagstr[i]];
+		if (bit == 0) {
+			*msg = "Invalid flag";
+			rc = 1;
+			goto out;
+		}
+		if (negation)
+			flags &= ~bit;
+		else
+			flags |= bit;
+	}
+	zor->zor_flags = flags;
+
+out:
+	free(dup);
+	return (rc);
+}
+
 static void
-dump_dir(objset_t *os)
+dump_objset(objset_t *os)
 {
 	dmu_objset_stats_t dds = { 0 };
 	uint64_t object, object_count;
@@ -2423,12 +3699,15 @@
 	char osname[ZFS_MAX_DATASET_NAME_LEN];
 	const char *type = "UNKNOWN";
 	int verbosity = dump_opt['d'];
-	int print_header = 1;
+	boolean_t print_header;
 	unsigned i;
 	int error;
 	uint64_t total_slots_used = 0;
 	uint64_t max_slot_used = 0;
 	uint64_t dnode_slots;
+	uint64_t obj_start;
+	uint64_t obj_end;
+	uint64_t flags;
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
@@ -2437,6 +3716,8 @@
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 
+	print_header = B_TRUE;
+
 	if (dds.dds_type < DMU_OST_NUMTYPES)
 		type = objset_types[dds.dds_type];
 
@@ -2470,10 +3751,26 @@
 	    numbuf, (u_longlong_t)usedobjs, blkbuf,
 	    (dds.dds_inconsistent) ? " (inconsistent)" : "");
 
-	if (zopt_objects != 0) {
-		for (i = 0; i < zopt_objects; i++)
-			dump_object(os, zopt_object[i], verbosity,
-			    &print_header, NULL);
+	for (i = 0; i < zopt_object_args; i++) {
+		obj_start = zopt_object_ranges[i].zor_obj_start;
+		obj_end = zopt_object_ranges[i].zor_obj_end;
+		flags = zopt_object_ranges[i].zor_flags;
+
+		object = obj_start;
+		if (object == 0 || obj_start == obj_end)
+			dump_object(os, object, verbosity, &print_header, NULL,
+			    flags);
+		else
+			object--;
+
+		while ((dmu_object_next(os, &object, B_FALSE, 0) == 0) &&
+		    object <= obj_end) {
+			dump_object(os, object, verbosity, &print_header, NULL,
+			    flags);
+		}
+	}
+
+	if (zopt_object_args > 0) {
 		(void) printf("\n");
 		return;
 	}
@@ -2483,39 +3780,49 @@
 
 	if (dmu_objset_ds(os) != NULL) {
 		dsl_dataset_t *ds = dmu_objset_ds(os);
-		dump_deadlist(&ds->ds_deadlist);
+		dump_blkptr_list(&ds->ds_deadlist, "Deadlist");
+		if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
+		    !dmu_objset_is_snapshot(os)) {
+			dump_blkptr_list(&ds->ds_dir->dd_livelist, "Livelist");
+			if (verify_dd_livelist(os) != 0)
+				fatal("livelist is incorrect");
+		}
 
 		if (dsl_dataset_remap_deadlist_exists(ds)) {
 			(void) printf("ds_remap_deadlist:\n");
-			dump_deadlist(&ds->ds_remap_deadlist);
+			dump_blkptr_list(&ds->ds_remap_deadlist, "Deadlist");
 		}
 		count_ds_mos_objects(ds);
 	}
 
+	if (dmu_objset_ds(os) != NULL)
+		dump_bookmarks(os, verbosity);
+
 	if (verbosity < 2)
 		return;
 
 	if (BP_IS_HOLE(os->os_rootbp))
 		return;
 
-	dump_object(os, 0, verbosity, &print_header, NULL);
+	dump_object(os, 0, verbosity, &print_header, NULL, 0);
 	object_count = 0;
 	if (DMU_USERUSED_DNODE(os) != NULL &&
 	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
-		    NULL);
+		    NULL, 0);
 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
-		    NULL);
+		    NULL, 0);
 	}
 
 	if (DMU_PROJECTUSED_DNODE(os) != NULL &&
 	    DMU_PROJECTUSED_DNODE(os)->dn_type != 0)
 		dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity,
-		    &print_header, NULL);
+		    &print_header, NULL, 0);
 
 	object = 0;
 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
-		dump_object(os, object, verbosity, &print_header, &dnode_slots);
+		dump_object(os, object, verbosity, &print_header, &dnode_slots,
+		    0);
 		object_count++;
 		total_slots_used += dnode_slots;
 		max_slot_used = object + dnode_slots - 1;
@@ -2804,10 +4111,10 @@
 	const cksum_record_t *l = (cksum_record_t *)x1;
 	const cksum_record_t *r = (cksum_record_t *)x2;
 	int arraysize = ARRAY_SIZE(l->cksum.zc_word);
-	int difference;
+	int difference = 0;
 
 	for (int i = 0; i < arraysize; i++) {
-		difference = AVL_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]);
+		difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]);
 		if (difference)
 			break;
 	}
@@ -2901,6 +4208,270 @@
 }
 
 static void
+print_l2arc_header(void)
+{
+	(void) printf("------------------------------------\n");
+	(void) printf("L2ARC device header\n");
+	(void) printf("------------------------------------\n");
+}
+
+static void
+print_l2arc_log_blocks(void)
+{
+	(void) printf("------------------------------------\n");
+	(void) printf("L2ARC device log blocks\n");
+	(void) printf("------------------------------------\n");
+}
+
+static void
+dump_l2arc_log_entries(uint64_t log_entries,
+    l2arc_log_ent_phys_t *le, uint64_t i)
+{
+	for (int j = 0; j < log_entries; j++) {
+		dva_t dva = le[j].le_dva;
+		(void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, "
+		    "vdev: %llu, offset: %llu\n",
+		    (u_longlong_t)i, j + 1,
+		    (u_longlong_t)DVA_GET_ASIZE(&dva),
+		    (u_longlong_t)DVA_GET_VDEV(&dva),
+		    (u_longlong_t)DVA_GET_OFFSET(&dva));
+		(void) printf("|\t\t\t\tbirth: %llu\n",
+		    (u_longlong_t)le[j].le_birth);
+		(void) printf("|\t\t\t\tlsize: %llu\n",
+		    (u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop));
+		(void) printf("|\t\t\t\tpsize: %llu\n",
+		    (u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop));
+		(void) printf("|\t\t\t\tcompr: %llu\n",
+		    (u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop));
+		(void) printf("|\t\t\t\tcomplevel: %llu\n",
+		    (u_longlong_t)(&le[j])->le_complevel);
+		(void) printf("|\t\t\t\ttype: %llu\n",
+		    (u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop));
+		(void) printf("|\t\t\t\tprotected: %llu\n",
+		    (u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop));
+		(void) printf("|\t\t\t\tprefetch: %llu\n",
+		    (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop));
+		(void) printf("|\t\t\t\taddress: %llu\n",
+		    (u_longlong_t)le[j].le_daddr);
+		(void) printf("|\t\t\t\tARC state: %llu\n",
+		    (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop));
+		(void) printf("|\n");
+	}
+	(void) printf("\n");
+}
+
+static void
+dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps)
+{
+	(void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps.lbp_daddr);
+	(void) printf("|\t\tpayload_asize: %llu\n",
+	    (u_longlong_t)lbps.lbp_payload_asize);
+	(void) printf("|\t\tpayload_start: %llu\n",
+	    (u_longlong_t)lbps.lbp_payload_start);
+	(void) printf("|\t\tlsize: %llu\n",
+	    (u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop));
+	(void) printf("|\t\tasize: %llu\n",
+	    (u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop));
+	(void) printf("|\t\tcompralgo: %llu\n",
+	    (u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop));
+	(void) printf("|\t\tcksumalgo: %llu\n",
+	    (u_longlong_t)L2BLK_GET_CHECKSUM((&lbps)->lbp_prop));
+	(void) printf("|\n\n");
+}
+
+static void
+dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr,
+    l2arc_dev_hdr_phys_t *rebuild)
+{
+	l2arc_log_blk_phys_t this_lb;
+	uint64_t asize;
+	l2arc_log_blkptr_t lbps[2];
+	abd_t *abd;
+	zio_cksum_t cksum;
+	int failed = 0;
+	l2arc_dev_t dev;
+
+	if (!dump_opt['q'])
+		print_l2arc_log_blocks();
+	bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps));
+
+	dev.l2ad_evict = l2dhdr.dh_evict;
+	dev.l2ad_start = l2dhdr.dh_start;
+	dev.l2ad_end = l2dhdr.dh_end;
+
+	if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) {
+		/* no log blocks to read */
+		if (!dump_opt['q']) {
+			(void) printf("No log blocks to read\n");
+			(void) printf("\n");
+		}
+		return;
+	} else {
+		dev.l2ad_hand = lbps[0].lbp_daddr +
+		    L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
+	}
+
+	dev.l2ad_first = !!(l2dhdr.dh_flags & L2ARC_DEV_HDR_EVICT_FIRST);
+
+	for (;;) {
+		if (!l2arc_log_blkptr_valid(&dev, &lbps[0]))
+			break;
+
+		/* L2BLK_GET_PSIZE returns aligned size for log blocks */
+		asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
+		if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) {
+			if (!dump_opt['q']) {
+				(void) printf("Error while reading next log "
+				    "block\n\n");
+			}
+			break;
+		}
+
+		fletcher_4_native_varsize(&this_lb, asize, &cksum);
+		if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) {
+			failed++;
+			if (!dump_opt['q']) {
+				(void) printf("Invalid cksum\n");
+				dump_l2arc_log_blkptr(lbps[0]);
+			}
+			break;
+		}
+
+		switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
+		case ZIO_COMPRESS_OFF:
+			break;
+		default:
+			abd = abd_alloc_for_io(asize, B_TRUE);
+			abd_copy_from_buf_off(abd, &this_lb, 0, asize);
+			zio_decompress_data(L2BLK_GET_COMPRESS(
+			    (&lbps[0])->lbp_prop), abd, &this_lb,
+			    asize, sizeof (this_lb), NULL);
+			abd_free(abd);
+			break;
+		}
+
+		if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
+			byteswap_uint64_array(&this_lb, sizeof (this_lb));
+		if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) {
+			if (!dump_opt['q'])
+				(void) printf("Invalid log block magic\n\n");
+			break;
+		}
+
+		rebuild->dh_lb_count++;
+		rebuild->dh_lb_asize += asize;
+		if (dump_opt['l'] > 1 && !dump_opt['q']) {
+			(void) printf("lb[%4llu]\tmagic: %llu\n",
+			    (u_longlong_t)rebuild->dh_lb_count,
+			    (u_longlong_t)this_lb.lb_magic);
+			dump_l2arc_log_blkptr(lbps[0]);
+		}
+
+		if (dump_opt['l'] > 2 && !dump_opt['q'])
+			dump_l2arc_log_entries(l2dhdr.dh_log_entries,
+			    this_lb.lb_entries,
+			    rebuild->dh_lb_count);
+
+		if (l2arc_range_check_overlap(lbps[1].lbp_payload_start,
+		    lbps[0].lbp_payload_start, dev.l2ad_evict) &&
+		    !dev.l2ad_first)
+			break;
+
+		lbps[0] = lbps[1];
+		lbps[1] = this_lb.lb_prev_lbp;
+	}
+
+	if (!dump_opt['q']) {
+		(void) printf("log_blk_count:\t %llu with valid cksum\n",
+		    (u_longlong_t)rebuild->dh_lb_count);
+		(void) printf("\t\t %d with invalid cksum\n", failed);
+		(void) printf("log_blk_asize:\t %llu\n\n",
+		    (u_longlong_t)rebuild->dh_lb_asize);
+	}
+}
+
+static int
+dump_l2arc_header(int fd)
+{
+	l2arc_dev_hdr_phys_t l2dhdr, rebuild;
+	int error = B_FALSE;
+
+	bzero(&l2dhdr, sizeof (l2dhdr));
+	bzero(&rebuild, sizeof (rebuild));
+
+	if (pread64(fd, &l2dhdr, sizeof (l2dhdr),
+	    VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) {
+		error = B_TRUE;
+	} else {
+		if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC))
+			byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr));
+
+		if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC)
+			error = B_TRUE;
+	}
+
+	if (error) {
+		(void) printf("L2ARC device header not found\n\n");
+		/* Do not return an error here for backward compatibility */
+		return (0);
+	} else if (!dump_opt['q']) {
+		print_l2arc_header();
+
+		(void) printf("    magic: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_magic);
+		(void) printf("    version: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_version);
+		(void) printf("    pool_guid: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_spa_guid);
+		(void) printf("    flags: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_flags);
+		(void) printf("    start_lbps[0]: %llu\n",
+		    (u_longlong_t)
+		    l2dhdr.dh_start_lbps[0].lbp_daddr);
+		(void) printf("    start_lbps[1]: %llu\n",
+		    (u_longlong_t)
+		    l2dhdr.dh_start_lbps[1].lbp_daddr);
+		(void) printf("    log_blk_ent: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_log_entries);
+		(void) printf("    start: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_start);
+		(void) printf("    end: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_end);
+		(void) printf("    evict: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_evict);
+		(void) printf("    lb_asize_refcount: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_lb_asize);
+		(void) printf("    lb_count_refcount: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_lb_count);
+		(void) printf("    trim_action_time: %llu\n",
+		    (u_longlong_t)l2dhdr.dh_trim_action_time);
+		(void) printf("    trim_state: %llu\n\n",
+		    (u_longlong_t)l2dhdr.dh_trim_state);
+	}
+
+	dump_l2arc_log_blocks(fd, l2dhdr, &rebuild);
+	/*
+	 * The total aligned size of log blocks and the number of log blocks
+	 * reported in the header of the device may be less than what zdb
+	 * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild().
+	 * This happens because dump_l2arc_log_blocks() lacks the memory
+	 * pressure valve that l2arc_rebuild() has. Thus, if we are on a system
+	 * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize
+	 * and dh_lb_count will be lower to begin with than what exists on the
+	 * device. This is normal and zdb should not exit with an error. The
+	 * opposite case should never happen though, the values reported in the
+	 * header should never be higher than what dump_l2arc_log_blocks() and
+	 * l2arc_rebuild() report. If this happens there is a leak in the
+	 * accounting of log blocks.
+	 */
+	if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize ||
+	    l2dhdr.dh_lb_count > rebuild.dh_lb_count)
+		return (1);
+
+	return (0);
+}
+
+static void
 dump_config_from_label(zdb_label_t *label, size_t buflen, int l)
 {
 	if (dump_opt['q'])
@@ -2966,10 +4537,10 @@
  * for the last one.
  */
 static int
-dump_path_impl(objset_t *os, uint64_t obj, char *name)
+dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj)
 {
 	int err;
-	int header = 1;
+	boolean_t header = B_TRUE;
 	uint64_t child_obj;
 	char *s;
 	dmu_buf_t *db;
@@ -3016,10 +4587,15 @@
 	switch (doi.doi_type) {
 	case DMU_OT_DIRECTORY_CONTENTS:
 		if (s != NULL && *(s + 1) != '\0')
-			return (dump_path_impl(os, child_obj, s + 1));
-		/*FALLTHROUGH*/
+			return (dump_path_impl(os, child_obj, s + 1, retobj));
+		fallthrough;
 	case DMU_OT_PLAIN_FILE_CONTENTS:
-		dump_object(os, child_obj, dump_opt['v'], &header, NULL);
+		if (retobj != NULL) {
+			*retobj = child_obj;
+		} else {
+			dump_object(os, child_obj, dump_opt['v'], &header,
+			    NULL, 0);
+		}
 		return (0);
 	default:
 		(void) fprintf(stderr, "object %llu has non-file/directory "
@@ -3034,13 +4610,13 @@
  * Dump the blocks for the object specified by path inside the dataset.
  */
 static int
-dump_path(char *ds, char *path)
+dump_path(char *ds, char *path, uint64_t *retobj)
 {
 	int err;
 	objset_t *os;
 	uint64_t root_obj;
 
-	err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
+	err = open_objset(ds, FTAG, &os);
 	if (err != 0)
 		return (err);
 
@@ -3048,27 +4624,105 @@
 	if (err != 0) {
 		(void) fprintf(stderr, "can't lookup root znode: %s\n",
 		    strerror(err));
-		dmu_objset_disown(os, B_FALSE, FTAG);
+		close_objset(os, FTAG);
 		return (EINVAL);
 	}
 
 	(void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
 
-	err = dump_path_impl(os, root_obj, path);
+	err = dump_path_impl(os, root_obj, path, retobj);
 
 	close_objset(os, FTAG);
 	return (err);
 }
 
 static int
+zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
+{
+	int err = 0;
+	uint64_t size, readsize, oursize, offset;
+	ssize_t writesize;
+	sa_handle_t *hdl;
+
+	(void) printf("Copying object %" PRIu64 " to file %s\n", srcobj,
+	    destfile);
+
+	VERIFY3P(os, ==, sa_os);
+	if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) {
+		(void) printf("Failed to get handle for SA znode\n");
+		return (err);
+	}
+	if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) {
+		(void) sa_handle_destroy(hdl);
+		return (err);
+	}
+	(void) sa_handle_destroy(hdl);
+
+	(void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj,
+	    size);
+	if (size == 0) {
+		return (EINVAL);
+	}
+
+	int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	/*
+	 * We cap the size at 1 mebibyte here to prevent
+	 * allocation failures and nigh-infinite printing if the
+	 * object is extremely large.
+	 */
+	oursize = MIN(size, 1 << 20);
+	offset = 0;
+	char *buf = kmem_alloc(oursize, KM_NOSLEEP);
+	if (buf == NULL) {
+		return (ENOMEM);
+	}
+
+	while (offset < size) {
+		readsize = MIN(size - offset, 1 << 20);
+		err = dmu_read(os, srcobj, offset, readsize, buf, 0);
+		if (err != 0) {
+			(void) printf("got error %u from dmu_read\n", err);
+			kmem_free(buf, oursize);
+			return (err);
+		}
+		if (dump_opt['v'] > 3) {
+			(void) printf("Read offset=%" PRIu64 " size=%" PRIu64
+			    " error=%d\n", offset, readsize, err);
+		}
+
+		writesize = write(fd, buf, readsize);
+		if (writesize < 0) {
+			err = errno;
+			break;
+		} else if (writesize != readsize) {
+			/* Incomplete write */
+			(void) fprintf(stderr, "Short write, only wrote %llu of"
+			    " %" PRIu64 " bytes, exiting...\n",
+			    (u_longlong_t)writesize, readsize);
+			break;
+		}
+
+		offset += readsize;
+	}
+
+	(void) close(fd);
+
+	if (buf != NULL)
+		kmem_free(buf, oursize);
+
+	return (err);
+}
+
+static int
 dump_label(const char *dev)
 {
 	char path[MAXPATHLEN];
 	zdb_label_t labels[VDEV_LABELS];
-	uint64_t psize, ashift;
+	uint64_t psize, ashift, l2cache;
 	struct stat64 statbuf;
 	boolean_t config_found = B_FALSE;
 	boolean_t error = B_FALSE;
+	boolean_t read_l2arc_header = B_FALSE;
 	avl_tree_t config_tree;
 	avl_tree_t uberblock_tree;
 	void *node, *cookie;
@@ -3110,7 +4764,7 @@
 		exit(1);
 	}
 
-	if (S_ISBLK(statbuf.st_mode) && ioctl(fd, BLKFLSBUF) != 0)
+	if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0)
 		(void) printf("failed to invalidate cache '%s' : %s\n", path,
 		    strerror(errno));
 
@@ -3161,6 +4815,15 @@
 			if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0)
 				size = buflen;
 
+			/* If the device is a cache device clear the header. */
+			if (!read_l2arc_header) {
+				if (nvlist_lookup_uint64(config,
+				    ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&
+				    l2cache == POOL_STATE_L2CACHE) {
+					read_l2arc_header = B_TRUE;
+				}
+			}
+
 			fletcher_4_native_varsize(buf, size, &cksum);
 			rec = cksum_record_insert(&config_tree, &cksum, l);
 
@@ -3211,6 +4874,12 @@
 		nvlist_free(label->config_nv);
 	}
 
+	/*
+	 * Dump the L2ARC header, if existent.
+	 */
+	if (read_l2arc_header)
+		error |= dump_l2arc_header(fd);
+
 	cookie = NULL;
 	while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
 		umem_free(node, sizeof (cksum_record_t));
@@ -3229,17 +4898,18 @@
 }
 
 static uint64_t dataset_feature_count[SPA_FEATURES];
+static uint64_t global_feature_count[SPA_FEATURES];
 static uint64_t remap_deadlist_count = 0;
 
 /*ARGSUSED*/
 static int
-dump_one_dir(const char *dsname, void *arg)
+dump_one_objset(const char *dsname, void *arg)
 {
 	int error;
 	objset_t *os;
 	spa_feature_t f;
 
-	error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
+	error = open_objset(dsname, FTAG, &os);
 	if (error != 0)
 		return (0);
 
@@ -3255,7 +4925,22 @@
 		remap_deadlist_count++;
 	}
 
-	dump_dir(os);
+	for (dsl_bookmark_node_t *dbn =
+	    avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
+	    dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
+		mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
+		if (dbn->dbn_phys.zbm_redaction_obj != 0)
+			global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++;
+		if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
+			global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
+	}
+
+	if (dsl_deadlist_is_open(&dmu_objset_ds(os)->ds_dir->dd_livelist) &&
+	    !dmu_objset_is_snapshot(os)) {
+		global_feature_count[SPA_FEATURE_LIVELIST]++;
+	}
+
+	dump_objset(os);
 	close_objset(os, FTAG);
 	fuid_table_destroy();
 	return (0);
@@ -3292,6 +4977,7 @@
 };
 
 #define	ZB_TOTAL	DN_MAX_LEVELS
+#define	SPA_MAX_FOR_16M	(SPA_MAXBLOCKSHIFT+1)
 
 typedef struct zdb_cb {
 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
@@ -3299,6 +4985,15 @@
 	uint64_t	zcb_checkpoint_size;
 	uint64_t	zcb_dedup_asize;
 	uint64_t	zcb_dedup_blocks;
+	uint64_t	zcb_psize_count[SPA_MAX_FOR_16M];
+	uint64_t	zcb_lsize_count[SPA_MAX_FOR_16M];
+	uint64_t	zcb_asize_count[SPA_MAX_FOR_16M];
+	uint64_t	zcb_psize_len[SPA_MAX_FOR_16M];
+	uint64_t	zcb_lsize_len[SPA_MAX_FOR_16M];
+	uint64_t	zcb_asize_len[SPA_MAX_FOR_16M];
+	uint64_t	zcb_psize_total;
+	uint64_t	zcb_lsize_total;
+	uint64_t	zcb_asize_total;
 	uint64_t	zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
 	uint64_t	zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
 	    [BPE_PAYLOAD_SIZE + 1];
@@ -3322,6 +5017,172 @@
 	return ((off1 >> ms_shift) == (off2 >> ms_shift));
 }
 
+/*
+ * Used to simplify reporting of the histogram data.
+ */
+typedef struct one_histo {
+	char *name;
+	uint64_t *count;
+	uint64_t *len;
+	uint64_t cumulative;
+} one_histo_t;
+
+/*
+ * The number of separate histograms processed for psize, lsize and asize.
+ */
+#define	NUM_HISTO 3
+
+/*
+ * This routine will create a fixed column size output of three different
+ * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M
+ * the count, length and cumulative length of the psize, lsize and
+ * asize blocks.
+ *
+ * All three types of blocks are listed on a single line
+ *
+ * By default the table is printed in nicenumber format (e.g. 123K) but
+ * if the '-P' parameter is specified then the full raw number (parseable)
+ * is printed out.
+ */
+static void
+dump_size_histograms(zdb_cb_t *zcb)
+{
+	/*
+	 * A temporary buffer that allows us to convert a number into
+	 * a string using zdb_nicenumber to allow either raw or human
+	 * readable numbers to be output.
+	 */
+	char numbuf[32];
+
+	/*
+	 * Define titles which are used in the headers of the tables
+	 * printed by this routine.
+	 */
+	const char blocksize_title1[] = "block";
+	const char blocksize_title2[] = "size";
+	const char count_title[] = "Count";
+	const char length_title[] = "Size";
+	const char cumulative_title[] = "Cum.";
+
+	/*
+	 * Setup the histogram arrays (psize, lsize, and asize).
+	 */
+	one_histo_t parm_histo[NUM_HISTO];
+
+	parm_histo[0].name = "psize";
+	parm_histo[0].count = zcb->zcb_psize_count;
+	parm_histo[0].len = zcb->zcb_psize_len;
+	parm_histo[0].cumulative = 0;
+
+	parm_histo[1].name = "lsize";
+	parm_histo[1].count = zcb->zcb_lsize_count;
+	parm_histo[1].len = zcb->zcb_lsize_len;
+	parm_histo[1].cumulative = 0;
+
+	parm_histo[2].name = "asize";
+	parm_histo[2].count = zcb->zcb_asize_count;
+	parm_histo[2].len = zcb->zcb_asize_len;
+	parm_histo[2].cumulative = 0;
+
+
+	(void) printf("\nBlock Size Histogram\n");
+	/*
+	 * Print the first line titles
+	 */
+	if (dump_opt['P'])
+		(void) printf("\n%s\t", blocksize_title1);
+	else
+		(void) printf("\n%7s   ", blocksize_title1);
+
+	for (int j = 0; j < NUM_HISTO; j++) {
+		if (dump_opt['P']) {
+			if (j < NUM_HISTO - 1) {
+				(void) printf("%s\t\t\t", parm_histo[j].name);
+			} else {
+				/* Don't print trailing spaces */
+				(void) printf("  %s", parm_histo[j].name);
+			}
+		} else {
+			if (j < NUM_HISTO - 1) {
+				/* Left aligned strings in the output */
+				(void) printf("%-7s              ",
+				    parm_histo[j].name);
+			} else {
+				/* Don't print trailing spaces */
+				(void) printf("%s", parm_histo[j].name);
+			}
+		}
+	}
+	(void) printf("\n");
+
+	/*
+	 * Print the second line titles
+	 */
+	if (dump_opt['P']) {
+		(void) printf("%s\t", blocksize_title2);
+	} else {
+		(void) printf("%7s ", blocksize_title2);
+	}
+
+	for (int i = 0; i < NUM_HISTO; i++) {
+		if (dump_opt['P']) {
+			(void) printf("%s\t%s\t%s\t",
+			    count_title, length_title, cumulative_title);
+		} else {
+			(void) printf("%7s%7s%7s",
+			    count_title, length_title, cumulative_title);
+		}
+	}
+	(void) printf("\n");
+
+	/*
+	 * Print the rows
+	 */
+	for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) {
+
+		/*
+		 * Print the first column showing the blocksize
+		 */
+		zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf));
+
+		if (dump_opt['P']) {
+			printf("%s", numbuf);
+		} else {
+			printf("%7s:", numbuf);
+		}
+
+		/*
+		 * Print the remaining set of 3 columns per size:
+		 * for psize, lsize and asize
+		 */
+		for (int j = 0; j < NUM_HISTO; j++) {
+			parm_histo[j].cumulative += parm_histo[j].len[i];
+
+			zdb_nicenum(parm_histo[j].count[i],
+			    numbuf, sizeof (numbuf));
+			if (dump_opt['P'])
+				(void) printf("\t%s", numbuf);
+			else
+				(void) printf("%7s", numbuf);
+
+			zdb_nicenum(parm_histo[j].len[i],
+			    numbuf, sizeof (numbuf));
+			if (dump_opt['P'])
+				(void) printf("\t%s", numbuf);
+			else
+				(void) printf("%7s", numbuf);
+
+			zdb_nicenum(parm_histo[j].cumulative,
+			    numbuf, sizeof (numbuf));
+			if (dump_opt['P'])
+				(void) printf("\t%s", numbuf);
+			else
+				(void) printf("%7s", numbuf);
+		}
+		(void) printf("\n");
+	}
+}
+
 static void
 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
     dmu_object_type_t type)
@@ -3415,6 +5276,28 @@
 		    [BPE_GET_PSIZE(bp)]++;
 		return;
 	}
+	/*
+	 * The binning histogram bins by powers of two up to
+	 * SPA_MAXBLOCKSIZE rather than creating bins for
+	 * every possible blocksize found in the pool.
+	 */
+	int bin = highbit64(BP_GET_PSIZE(bp)) - 1;
+
+	zcb->zcb_psize_count[bin]++;
+	zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp);
+	zcb->zcb_psize_total += BP_GET_PSIZE(bp);
+
+	bin = highbit64(BP_GET_LSIZE(bp)) - 1;
+
+	zcb->zcb_lsize_count[bin]++;
+	zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp);
+	zcb->zcb_lsize_total += BP_GET_LSIZE(bp);
+
+	bin = highbit64(BP_GET_ASIZE(bp)) - 1;
+
+	zcb->zcb_asize_count[bin]++;
+	zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
+	zcb->zcb_asize_total += BP_GET_ASIZE(bp);
 
 	if (dump_opt['L'])
 		return;
@@ -3453,8 +5336,6 @@
 	zdb_cb_t *zcb = zio->io_private;
 	zbookmark_phys_t *zb = &zio->io_bookmark;
 
-	abd_free(zio->io_abd);
-
 	mutex_enter(&spa->spa_scrub_lock);
 	spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
 	cv_broadcast(&spa->spa_scrub_io_cv);
@@ -3481,6 +5362,8 @@
 		    blkbuf);
 	}
 	mutex_exit(&spa->spa_scrub_lock);
+
+	abd_free(zio->io_abd);
 }
 
 static int
@@ -3491,7 +5374,7 @@
 	dmu_object_type_t type;
 	boolean_t is_metadata;
 
-	if (bp == NULL)
+	if (zb->zb_level == ZB_DNODE_LEVEL)
 		return (0);
 
 	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
@@ -3506,7 +5389,7 @@
 		    blkbuf);
 	}
 
-	if (BP_IS_HOLE(bp))
+	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
 		return (0);
 
 	type = BP_GET_TYPE(bp);
@@ -3586,6 +5469,35 @@
 };
 
 /* ARGSUSED */
+static int
+load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme,
+    uint64_t txg, void *arg)
+{
+	spa_vdev_removal_t *svr = arg;
+
+	uint64_t offset = sme->sme_offset;
+	uint64_t size = sme->sme_run;
+
+	/* skip vdevs we don't care about */
+	if (sme->sme_vdev != svr->svr_vdev_id)
+		return (0);
+
+	vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev);
+	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
+
+	if (txg < metaslab_unflushed_txg(ms))
+		return (0);
+
+	if (sme->sme_type == SM_ALLOC)
+		range_tree_add(svr->svr_allocd_segs, offset, size);
+	else
+		range_tree_remove(svr->svr_allocd_segs, offset, size);
+
+	return (0);
+}
+
+/* ARGSUSED */
 static void
 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
     uint64_t size, void *arg)
@@ -3633,49 +5545,47 @@
 	vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 
+	ASSERT0(range_tree_space(svr->svr_allocd_segs));
+
+	range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
 	for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
 		metaslab_t *msp = vd->vdev_ms[msi];
 
-		if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
-			break;
-
-		ASSERT0(range_tree_space(svr->svr_allocd_segs));
-
-		if (msp->ms_sm != NULL) {
-			VERIFY0(space_map_load(msp->ms_sm,
-			    svr->svr_allocd_segs, SM_ALLOC));
-
-			/*
-			 * Clear everything past what has been synced unless
-			 * it's past the spacemap, because we have not allocated
-			 * mappings for it yet.
-			 */
-			uint64_t vim_max_offset =
-			    vdev_indirect_mapping_max_offset(vim);
-			uint64_t sm_end = msp->ms_sm->sm_start +
-			    msp->ms_sm->sm_size;
-			if (sm_end > vim_max_offset)
-				range_tree_clear(svr->svr_allocd_segs,
-				    vim_max_offset, sm_end - vim_max_offset);
-		}
-
-		zcb->zcb_removing_size +=
-		    range_tree_space(svr->svr_allocd_segs);
-		range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
+		ASSERT0(range_tree_space(allocs));
+		if (msp->ms_sm != NULL)
+			VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC));
+		range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs);
 	}
+	range_tree_destroy(allocs);
+
+	iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr);
+
+	/*
+	 * Clear everything past what has been synced,
+	 * because we have not allocated mappings for
+	 * it yet.
+	 */
+	range_tree_clear(svr->svr_allocd_segs,
+	    vdev_indirect_mapping_max_offset(vim),
+	    vd->vdev_asize - vdev_indirect_mapping_max_offset(vim));
+
+	zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs);
+	range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 }
 
 /* ARGSUSED */
 static int
-increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
 {
 	zdb_cb_t *zcb = arg;
 	spa_t *spa = zcb->zcb_spa;
 	vdev_t *vd;
 	const dva_t *dva = &bp->blk_dva[0];
 
+	ASSERT(!bp_freed);
 	ASSERT(!dump_opt['L']);
 	ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
 
@@ -3873,6 +5783,82 @@
 	}
 }
 
+static int
+count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme,
+    uint64_t txg, void *arg)
+{
+	int64_t *ualloc_space = arg;
+
+	uint64_t offset = sme->sme_offset;
+	uint64_t vdev_id = sme->sme_vdev;
+
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	if (!vdev_is_concrete(vd))
+		return (0);
+
+	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
+
+	if (txg < metaslab_unflushed_txg(ms))
+		return (0);
+
+	if (sme->sme_type == SM_ALLOC)
+		*ualloc_space += sme->sme_run;
+	else
+		*ualloc_space -= sme->sme_run;
+
+	return (0);
+}
+
+static int64_t
+get_unflushed_alloc_space(spa_t *spa)
+{
+	if (dump_opt['L'])
+		return (0);
+
+	int64_t ualloc_space = 0;
+	iterate_through_spacemap_logs(spa, count_unflushed_space_cb,
+	    &ualloc_space);
+	return (ualloc_space);
+}
+
+static int
+load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg)
+{
+	maptype_t *uic_maptype = arg;
+
+	uint64_t offset = sme->sme_offset;
+	uint64_t size = sme->sme_run;
+	uint64_t vdev_id = sme->sme_vdev;
+
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+
+	/* skip indirect vdevs */
+	if (!vdev_is_concrete(vd))
+		return (0);
+
+	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+
+	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
+	ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE);
+
+	if (txg < metaslab_unflushed_txg(ms))
+		return (0);
+
+	if (*uic_maptype == sme->sme_type)
+		range_tree_add(ms->ms_allocatable, offset, size);
+	else
+		range_tree_remove(ms->ms_allocatable, offset, size);
+
+	return (0);
+}
+
+static void
+load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype)
+{
+	iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype);
+}
+
 static void
 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
 {
@@ -3896,7 +5882,7 @@
 			    (longlong_t)vd->vdev_ms_count);
 
 			mutex_enter(&msp->ms_lock);
-			metaslab_unload(msp);
+			range_tree_vacate(msp->ms_allocatable, NULL, NULL);
 
 			/*
 			 * We don't want to spend the CPU manipulating the
@@ -3913,6 +5899,8 @@
 			mutex_exit(&msp->ms_lock);
 		}
 	}
+
+	load_unflushed_to_ms_allocatables(spa, maptype);
 }
 
 /*
@@ -3927,7 +5915,7 @@
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 
 	mutex_enter(&msp->ms_lock);
-	metaslab_unload(msp);
+	range_tree_vacate(msp->ms_allocatable, NULL, NULL);
 
 	/*
 	 * We don't want to spend the CPU manipulating the
@@ -3985,9 +5973,11 @@
 		 * metaslabs.  We want to set them up for
 		 * zio_claim().
 		 */
+		vdev_metaslab_group_create(vd);
 		VERIFY0(vdev_metaslab_init(vd, 0));
 
-		vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
+		vdev_indirect_mapping_t *vim __maybe_unused =
+		    vd->vdev_indirect_mapping;
 		uint64_t vim_idx = 0;
 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 
@@ -4023,6 +6013,7 @@
 	 */
 	spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
 	spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
+	spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops;
 
 	zcb->zcb_vd_obsolete_counts =
 	    umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
@@ -4156,7 +6147,6 @@
 	vdev_t *rvd = spa->spa_root_vdev;
 	for (unsigned c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
-		ASSERTV(metaslab_group_t *mg = vd->vdev_mg);
 
 		if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
 			leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
@@ -4164,7 +6154,9 @@
 
 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 			metaslab_t *msp = vd->vdev_ms[m];
-			ASSERT3P(mg, ==, msp->ms_group);
+			ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class ==
+			    spa_embedded_log_class(spa)) ?
+			    vd->vdev_log_mg : vd->vdev_mg);
 
 			/*
 			 * ms_allocatable has been overloaded
@@ -4186,7 +6178,6 @@
 				range_tree_vacate(msp->ms_allocatable,
 				    zdb_leak, vd);
 			}
-
 			if (msp->ms_loaded) {
 				msp->ms_loaded = B_FALSE;
 			}
@@ -4216,6 +6207,100 @@
 	return (0);
 }
 
+/*
+ * Iterate over livelists which have been destroyed by the user but
+ * are still present in the MOS, waiting to be freed
+ */
+static void
+iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg)
+{
+	objset_t *mos = spa->spa_meta_objset;
+	uint64_t zap_obj;
+	int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
+	if (err == ENOENT)
+		return;
+	ASSERT0(err);
+
+	zap_cursor_t zc;
+	zap_attribute_t attr;
+	dsl_deadlist_t ll;
+	/* NULL out os prior to dsl_deadlist_open in case it's garbage */
+	ll.dl_os = NULL;
+	for (zap_cursor_init(&zc, mos, zap_obj);
+	    zap_cursor_retrieve(&zc, &attr) == 0;
+	    (void) zap_cursor_advance(&zc)) {
+		dsl_deadlist_open(&ll, mos, attr.za_first_integer);
+		func(&ll, arg);
+		dsl_deadlist_close(&ll);
+	}
+	zap_cursor_fini(&zc);
+}
+
+static int
+bpobj_count_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
+{
+	ASSERT(!bp_freed);
+	return (count_block_cb(arg, bp, tx));
+}
+
+static int
+livelist_entry_count_blocks_cb(void *args, dsl_deadlist_entry_t *dle)
+{
+	zdb_cb_t *zbc = args;
+	bplist_t blks;
+	bplist_create(&blks);
+	/* determine which blocks have been alloc'd but not freed */
+	VERIFY0(dsl_process_sub_livelist(&dle->dle_bpobj, &blks, NULL, NULL));
+	/* count those blocks */
+	(void) bplist_iterate(&blks, count_block_cb, zbc, NULL);
+	bplist_destroy(&blks);
+	return (0);
+}
+
+static void
+livelist_count_blocks(dsl_deadlist_t *ll, void *arg)
+{
+	dsl_deadlist_iterate(ll, livelist_entry_count_blocks_cb, arg);
+}
+
+/*
+ * Count the blocks in the livelists that have been destroyed by the user
+ * but haven't yet been freed.
+ */
+static void
+deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc)
+{
+	iterate_deleted_livelists(spa, livelist_count_blocks, zbc);
+}
+
+static void
+dump_livelist_cb(dsl_deadlist_t *ll, void *arg)
+{
+	ASSERT3P(arg, ==, NULL);
+	global_feature_count[SPA_FEATURE_LIVELIST]++;
+	dump_blkptr_list(ll, "Deleted Livelist");
+	dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL);
+}
+
+/*
+ * Print out, register object references to, and increment feature counts for
+ * livelists that have been destroyed by the user but haven't yet been freed.
+ */
+static void
+deleted_livelists_dump_mos(spa_t *spa)
+{
+	uint64_t zap_obj;
+	objset_t *mos = spa->spa_meta_objset;
+	int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
+	if (err == ENOENT)
+		return;
+	mos_obj_refd(zap_obj);
+	iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
+}
+
 static int
 dump_block_stats(spa_t *spa)
 {
@@ -4255,11 +6340,11 @@
 	 * If there's a deferred-free bplist, process that first.
 	 */
 	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
-	    count_block_cb, &zcb, NULL);
+	    bpobj_count_block_cb, &zcb, NULL);
 
 	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
-		    count_block_cb, &zcb, NULL);
+		    bpobj_count_block_cb, &zcb, NULL);
 	}
 
 	zdb_claim_removing(spa, &zcb);
@@ -4270,12 +6355,16 @@
 		    &zcb, NULL));
 	}
 
+	deleted_livelists_count_blocks(spa, &zcb);
+
 	if (dump_opt['c'] > 1)
 		flags |= TRAVERSE_PREFETCH_DATA;
 
 	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
 	zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
 	zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
+	zcb.zcb_totalasize +=
+	    metaslab_class_get_alloc(spa_embedded_log_class(spa));
 	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
 	err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
 
@@ -4323,8 +6412,10 @@
 
 	total_alloc = norm_alloc +
 	    metaslab_class_get_alloc(spa_log_class(spa)) +
+	    metaslab_class_get_alloc(spa_embedded_log_class(spa)) +
 	    metaslab_class_get_alloc(spa_special_class(spa)) +
-	    metaslab_class_get_alloc(spa_dedup_class(spa));
+	    metaslab_class_get_alloc(spa_dedup_class(spa)) +
+	    get_unflushed_alloc_space(spa);
 	total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
 	    zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
 
@@ -4367,7 +6458,7 @@
 	(void) printf("\t%-16s %14llu     used: %5.2f%%\n", "Normal class:",
 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
 
-	if (spa_special_class(spa)->mc_rotor != NULL) {
+	if (spa_special_class(spa)->mc_allocator[0].mca_rotor != NULL) {
 		uint64_t alloc = metaslab_class_get_alloc(
 		    spa_special_class(spa));
 		uint64_t space = metaslab_class_get_space(
@@ -4378,7 +6469,7 @@
 		    100.0 * alloc / space);
 	}
 
-	if (spa_dedup_class(spa)->mc_rotor != NULL) {
+	if (spa_dedup_class(spa)->mc_allocator[0].mca_rotor != NULL) {
 		uint64_t alloc = metaslab_class_get_alloc(
 		    spa_dedup_class(spa));
 		uint64_t space = metaslab_class_get_space(
@@ -4389,6 +6480,17 @@
 		    100.0 * alloc / space);
 	}
 
+	if (spa_embedded_log_class(spa)->mc_allocator[0].mca_rotor != NULL) {
+		uint64_t alloc = metaslab_class_get_alloc(
+		    spa_embedded_log_class(spa));
+		uint64_t space = metaslab_class_get_space(
+		    spa_embedded_log_class(spa));
+
+		(void) printf("\t%-16s %14llu     used: %5.2f%%\n",
+		    "Embedded log class", (u_longlong_t)alloc,
+		    100.0 * alloc / space);
+	}
+
 	for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
 		if (zcb.zcb_embedded_blocks[i] == 0)
 			continue;
@@ -4522,6 +6624,11 @@
 				}
 			}
 		}
+
+		/* Output a table summarizing block sizes in the pool */
+		if (dump_opt['b'] >= 2) {
+			dump_size_histograms(&zcb);
+		}
 	}
 
 	(void) printf("\n");
@@ -4553,7 +6660,8 @@
 	avl_index_t where;
 	zdb_ddt_entry_t *zdde, zdde_search;
 
-	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
+	    BP_IS_EMBEDDED(bp))
 		return (0);
 
 	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@@ -4801,6 +6909,7 @@
 {
 	int error = 0;
 	char *poolname, *bogus_name = NULL;
+	boolean_t freecfg = B_FALSE;
 
 	/* If the target is not a pool, the extract the pool name */
 	char *path_start = strchr(target, '/');
@@ -4819,6 +6928,7 @@
 			    "spa_get_stats() failed with error %d\n",
 			    poolname, error);
 		}
+		freecfg = B_TRUE;
 	}
 
 	if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1)
@@ -4828,6 +6938,8 @@
 	error = spa_import(bogus_name, cfg, NULL,
 	    ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
 	    ZFS_IMPORT_SKIP_MMP);
+	if (freecfg)
+		nvlist_free(cfg);
 	if (error != 0) {
 		fatal("Tried to import pool \"%s\" but spa_import() failed "
 		    "with error %d\n", bogus_name, error);
@@ -4974,7 +7086,7 @@
 		for (uint64_t c = ckpoint_rvd->vdev_children;
 		    c < current_rvd->vdev_children; c++) {
 			vdev_t *current_vd = current_rvd->vdev_child[c];
-			ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL);
+			VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL);
 		}
 	}
 
@@ -5056,7 +7168,6 @@
 
 	spa_t *checkpoint_spa;
 	char *checkpoint_pool;
-	nvlist_t *config = NULL;
 	int error = 0;
 
 	/*
@@ -5064,7 +7175,7 @@
 	 * name) so we can do verification on it against the current state
 	 * of the pool.
 	 */
-	checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
+	checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL,
 	    NULL);
 	ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
 
@@ -5196,11 +7307,24 @@
 }
 
 static void
+mos_leak_vdev_top_zap(vdev_t *vd)
+{
+	uint64_t ms_flush_data_obj;
+	int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
+	    vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
+	    sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj);
+	if (error == ENOENT)
+		return;
+	ASSERT0(error);
+
+	mos_obj_refd(ms_flush_data_obj);
+}
+
+static void
 mos_leak_vdev(vdev_t *vd)
 {
 	mos_obj_refd(vd->vdev_dtl_object);
 	mos_obj_refd(vd->vdev_ms_array);
-	mos_obj_refd(vd->vdev_top_zap);
 	mos_obj_refd(vd->vdev_indirect_config.vic_births_object);
 	mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object);
 	mos_obj_refd(vd->vdev_leaf_zap);
@@ -5218,11 +7342,33 @@
 		mos_obj_refd(space_map_object(ms->ms_sm));
 	}
 
+	if (vd->vdev_top_zap != 0) {
+		mos_obj_refd(vd->vdev_top_zap);
+		mos_leak_vdev_top_zap(vd);
+	}
+
 	for (uint64_t c = 0; c < vd->vdev_children; c++) {
 		mos_leak_vdev(vd->vdev_child[c]);
 	}
 }
 
+static void
+mos_leak_log_spacemaps(spa_t *spa)
+{
+	uint64_t spacemap_zap;
+	int error = zap_lookup(spa_meta_objset(spa),
+	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP,
+	    sizeof (spacemap_zap), 1, &spacemap_zap);
+	if (error == ENOENT)
+		return;
+	ASSERT0(error);
+
+	mos_obj_refd(spacemap_zap);
+	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls))
+		mos_obj_refd(sls->sls_sm_obj);
+}
+
 static int
 dump_mos_leaks(spa_t *spa)
 {
@@ -5254,6 +7400,10 @@
 	mos_obj_refd(spa->spa_l2cache.sav_object);
 	mos_obj_refd(spa->spa_spares.sav_object);
 
+	if (spa->spa_syncing_log_sm != NULL)
+		mos_obj_refd(spa->spa_syncing_log_sm->sm_object);
+	mos_leak_log_spacemaps(spa);
+
 	mos_obj_refd(spa->spa_condensing_indirect_phys.
 	    scip_next_mapping_object);
 	mos_obj_refd(spa->spa_condensing_indirect_phys.
@@ -5265,6 +7415,7 @@
 		mos_obj_refd(vim->vim_phys->vimp_counts_object);
 		vdev_indirect_mapping_close(vim);
 	}
+	deleted_livelists_dump_mos(spa);
 
 	if (dp->dp_origin_snap != NULL) {
 		dsl_dataset_t *ds;
@@ -5274,12 +7425,12 @@
 		    dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj,
 		    FTAG, &ds));
 		count_ds_mos_objects(ds);
-		dump_deadlist(&ds->ds_deadlist);
+		dump_blkptr_list(&ds->ds_deadlist, "Deadlist");
 		dsl_dataset_rele(ds, FTAG);
 		dsl_pool_config_exit(dp, FTAG);
 
 		count_ds_mos_objects(dp->dp_origin_snap);
-		dump_deadlist(&dp->dp_origin_snap->ds_deadlist);
+		dump_blkptr_list(&dp->dp_origin_snap->ds_deadlist, "Deadlist");
 	}
 	count_dir_mos_objects(dp->dp_mos_dir);
 	if (dp->dp_free_dir != NULL)
@@ -5331,12 +7482,92 @@
 	return (rv);
 }
 
+typedef struct log_sm_obsolete_stats_arg {
+	uint64_t lsos_current_txg;
+
+	uint64_t lsos_total_entries;
+	uint64_t lsos_valid_entries;
+
+	uint64_t lsos_sm_entries;
+	uint64_t lsos_valid_sm_entries;
+} log_sm_obsolete_stats_arg_t;
+
+static int
+log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme,
+    uint64_t txg, void *arg)
+{
+	log_sm_obsolete_stats_arg_t *lsos = arg;
+
+	uint64_t offset = sme->sme_offset;
+	uint64_t vdev_id = sme->sme_vdev;
+
+	if (lsos->lsos_current_txg == 0) {
+		/* this is the first log */
+		lsos->lsos_current_txg = txg;
+	} else if (lsos->lsos_current_txg < txg) {
+		/* we just changed log - print stats and reset */
+		(void) printf("%-8llu valid entries out of %-8llu - txg %llu\n",
+		    (u_longlong_t)lsos->lsos_valid_sm_entries,
+		    (u_longlong_t)lsos->lsos_sm_entries,
+		    (u_longlong_t)lsos->lsos_current_txg);
+		lsos->lsos_valid_sm_entries = 0;
+		lsos->lsos_sm_entries = 0;
+		lsos->lsos_current_txg = txg;
+	}
+	ASSERT3U(lsos->lsos_current_txg, ==, txg);
+
+	lsos->lsos_sm_entries++;
+	lsos->lsos_total_entries++;
+
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	if (!vdev_is_concrete(vd))
+		return (0);
+
+	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
+
+	if (txg < metaslab_unflushed_txg(ms))
+		return (0);
+	lsos->lsos_valid_sm_entries++;
+	lsos->lsos_valid_entries++;
+	return (0);
+}
+
+static void
+dump_log_spacemap_obsolete_stats(spa_t *spa)
+{
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+		return;
+
+	log_sm_obsolete_stats_arg_t lsos;
+	bzero(&lsos, sizeof (lsos));
+
+	(void) printf("Log Space Map Obsolete Entry Statistics:\n");
+
+	iterate_through_spacemap_logs(spa,
+	    log_spacemap_obsolete_stats_cb, &lsos);
+
+	/* print stats for latest log */
+	(void) printf("%-8llu valid entries out of %-8llu - txg %llu\n",
+	    (u_longlong_t)lsos.lsos_valid_sm_entries,
+	    (u_longlong_t)lsos.lsos_sm_entries,
+	    (u_longlong_t)lsos.lsos_current_txg);
+
+	(void) printf("%-8llu valid entries out of %-8llu - total\n\n",
+	    (u_longlong_t)lsos.lsos_valid_entries,
+	    (u_longlong_t)lsos.lsos_total_entries);
+}
+
 static void
 dump_zpool(spa_t *spa)
 {
 	dsl_pool_t *dp = spa_get_dsl(spa);
 	int rc = 0;
 
+	if (dump_opt['y']) {
+		livelist_metaslab_validate(spa);
+	}
+
 	if (dump_opt['S']) {
 		dump_simulated_ddt(spa);
 		return;
@@ -5360,11 +7591,16 @@
 		dump_metaslabs(spa);
 	if (dump_opt['M'])
 		dump_metaslab_groups(spa);
+	if (dump_opt['d'] > 2 || dump_opt['m']) {
+		dump_log_spacemaps(spa);
+		dump_log_spacemap_obsolete_stats(spa);
+	}
 
 	if (dump_opt['d'] || dump_opt['i']) {
 		spa_feature_t f;
-		mos_refd_objs = range_tree_create(NULL, NULL);
-		dump_dir(dp->dp_meta_objset);
+		mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
+		    0);
+		dump_objset(dp->dp_meta_objset);
 
 		if (dump_opt['d'] >= 3) {
 			dsl_pool_t *dp = spa->spa_dsl_pool;
@@ -5389,7 +7625,14 @@
 			}
 			dump_dtl(spa->spa_root_vdev, 0);
 		}
-		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
+
+		for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
+			global_feature_count[f] = UINT64_MAX;
+		global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
+		global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
+		global_feature_count[SPA_FEATURE_LIVELIST] = 0;
+
+		(void) dmu_objset_find(spa_name(spa), dump_one_objset,
 		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
 
 		if (rc == 0 && !dump_opt['L'])
@@ -5398,21 +7641,31 @@
 		for (f = 0; f < SPA_FEATURES; f++) {
 			uint64_t refcount;
 
+			uint64_t *arr;
 			if (!(spa_feature_table[f].fi_flags &
-			    ZFEATURE_FLAG_PER_DATASET) ||
-			    !spa_feature_is_enabled(spa, f)) {
-				ASSERT0(dataset_feature_count[f]);
-				continue;
+			    ZFEATURE_FLAG_PER_DATASET)) {
+				if (global_feature_count[f] == UINT64_MAX)
+					continue;
+				if (!spa_feature_is_enabled(spa, f)) {
+					ASSERT0(global_feature_count[f]);
+					continue;
+				}
+				arr = global_feature_count;
+			} else {
+				if (!spa_feature_is_enabled(spa, f)) {
+					ASSERT0(dataset_feature_count[f]);
+					continue;
+				}
+				arr = dataset_feature_count;
 			}
 			if (feature_get_refcount(spa, &spa_feature_table[f],
 			    &refcount) == ENOTSUP)
 				continue;
-			if (dataset_feature_count[f] != refcount) {
+			if (arr[f] != refcount) {
 				(void) printf("%s feature refcount mismatch: "
-				    "%lld datasets != %lld refcount\n",
+				    "%lld consumers != %lld refcount\n",
 				    spa_feature_table[f].fi_uname,
-				    (longlong_t)dataset_feature_count[f],
-				    (longlong_t)refcount);
+				    (longlong_t)arr[f], (longlong_t)refcount);
 				rc = 2;
 			} else {
 				(void) printf("Verified %s feature refcount "
@@ -5422,9 +7675,8 @@
 			}
 		}
 
-		if (rc == 0) {
+		if (rc == 0)
 			rc = verify_device_removal_feature_counts(spa);
-		}
 	}
 
 	if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
@@ -5458,9 +7710,10 @@
 #define	ZDB_FLAG_VERBOSE	0x0080
 
 static int flagbits[256];
+static char flagbitstr[16];
 
 static void
-zdb_print_blkptr(blkptr_t *bp, int flags)
+zdb_print_blkptr(const blkptr_t *bp, int flags)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
@@ -5586,6 +7839,26 @@
 	return (NULL);
 }
 
+static int
+name_from_objset_id(spa_t *spa, uint64_t objset_id, char *outstr)
+{
+	dsl_dataset_t *ds;
+
+	dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
+	int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset_id,
+	    NULL, &ds);
+	if (error != 0) {
+		(void) fprintf(stderr, "failed to hold objset %llu: %s\n",
+		    (u_longlong_t)objset_id, strerror(error));
+		dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
+		return (error);
+	}
+	dsl_dataset_name(ds, outstr);
+	dsl_dataset_rele(ds, NULL);
+	dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
+	return (0);
+}
+
 static boolean_t
 zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize)
 {
@@ -5605,6 +7878,83 @@
 
 #define	ZIO_COMPRESS_MASK(alg)	(1ULL << (ZIO_COMPRESS_##alg))
 
+static boolean_t
+zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
+    uint64_t psize, int flags)
+{
+	boolean_t exceeded = B_FALSE;
+	/*
+	 * We don't know how the data was compressed, so just try
+	 * every decompress function at every inflated blocksize.
+	 */
+	void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+	int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 };
+	int *cfuncp = cfuncs;
+	uint64_t maxlsize = SPA_MAXBLOCKSIZE;
+	uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) |
+	    ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) |
+	    (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0);
+	*cfuncp++ = ZIO_COMPRESS_LZ4;
+	*cfuncp++ = ZIO_COMPRESS_LZJB;
+	mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB);
+	for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++)
+		if (((1ULL << c) & mask) == 0)
+			*cfuncp++ = c;
+
+	/*
+	 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this
+	 * could take a while and we should let the user know
+	 * we are not stuck.  On the other hand, printing progress
+	 * info gets old after a while.  User can specify 'v' flag
+	 * to see the progression.
+	 */
+	if (lsize == psize)
+		lsize += SPA_MINBLOCKSIZE;
+	else
+		maxlsize = lsize;
+	for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) {
+		for (cfuncp = cfuncs; *cfuncp; cfuncp++) {
+			if (flags & ZDB_FLAG_VERBOSE) {
+				(void) fprintf(stderr,
+				    "Trying %05llx -> %05llx (%s)\n",
+				    (u_longlong_t)psize,
+				    (u_longlong_t)lsize,
+				    zio_compress_table[*cfuncp].\
+				    ci_name);
+			}
+
+			/*
+			 * We randomize lbuf2, and decompress to both
+			 * lbuf and lbuf2. This way, we will know if
+			 * decompression fill exactly to lsize.
+			 */
+			VERIFY0(random_get_pseudo_bytes(lbuf2, lsize));
+
+			if (zio_decompress_data(*cfuncp, pabd,
+			    lbuf, psize, lsize, NULL) == 0 &&
+			    zio_decompress_data(*cfuncp, pabd,
+			    lbuf2, psize, lsize, NULL) == 0 &&
+			    bcmp(lbuf, lbuf2, lsize) == 0)
+				break;
+		}
+		if (*cfuncp != 0)
+			break;
+	}
+	umem_free(lbuf2, SPA_MAXBLOCKSIZE);
+
+	if (lsize > maxlsize) {
+		exceeded = B_TRUE;
+	}
+	buf = lbuf;
+	if (*cfuncp == ZIO_COMPRESS_ZLE) {
+		printf("\nZLE decompression was selected. If you "
+		    "suspect the results are wrong,\ntry avoiding ZLE "
+		    "by setting and exporting ZDB_NO_ZLE=\"true\"\n");
+	}
+
+	return (exceeded);
+}
+
 /*
  * Read a block from a pool and print it out.  The syntax of the
  * block descriptor is:
@@ -5639,7 +7989,7 @@
 	void *lbuf, *buf;
 	char *s, *p, *dup, *vdev, *flagstr, *sizes;
 	int i, error;
-	boolean_t borrowed = B_FALSE;
+	boolean_t borrowed = B_FALSE, found = B_FALSE;
 
 	dup = strdup(thing);
 	s = strtok(dup, ":");
@@ -5659,41 +8009,57 @@
 		s = "offset must be a multiple of sector size";
 	if (s) {
 		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
-		free(flagstr);
-		free(dup);
-		return;
+		goto done;
 	}
 
 	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
-		for (i = 0; flagstr[i]; i++) {
+		for (i = 0; i < strlen(flagstr); i++) {
 			int bit = flagbits[(uchar_t)flagstr[i]];
 
 			if (bit == 0) {
-				(void) printf("***Invalid flag: %c\n",
-				    flagstr[i]);
+				(void) printf("***Ignoring flag: %c\n",
+				    (uchar_t)flagstr[i]);
 				continue;
 			}
+			found = B_TRUE;
 			flags |= bit;
 
-			/* If it's not something with an argument, keep going */
-			if ((bit & (ZDB_FLAG_CHECKSUM |
-			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
-				continue;
-
 			p = &flagstr[i + 1];
-			if (bit == ZDB_FLAG_PRINT_BLKPTR) {
-				blkptr_offset = strtoull(p, &p, 16);
-				i = p - &flagstr[i + 1];
-			}
 			if (*p != ':' && *p != '\0') {
-				(void) printf("***Invalid flag arg: '%s'\n", s);
-				free(flagstr);
-				free(dup);
-				return;
+				int j = 0, nextbit = flagbits[(uchar_t)*p];
+				char *end, offstr[8] = { 0 };
+				if ((bit == ZDB_FLAG_PRINT_BLKPTR) &&
+				    (nextbit == 0)) {
+					/* look ahead to isolate the offset */
+					while (nextbit == 0 &&
+					    strchr(flagbitstr, *p) == NULL) {
+						offstr[j] = *p;
+						j++;
+						if (i + j > strlen(flagstr))
+							break;
+						p++;
+						nextbit = flagbits[(uchar_t)*p];
+					}
+					blkptr_offset = strtoull(offstr, &end,
+					    16);
+					i += j;
+				} else if (nextbit == 0) {
+					(void) printf("***Ignoring flag arg:"
+					    " '%c'\n", (uchar_t)*p);
+				}
 			}
 		}
 	}
-	free(flagstr);
+	if (blkptr_offset % sizeof (blkptr_t)) {
+		printf("Block pointer offset 0x%llx "
+		    "must be divisible by 0x%x\n",
+		    (longlong_t)blkptr_offset, (int)sizeof (blkptr_t));
+		goto done;
+	}
+	if (found == B_FALSE && strlen(flagstr) > 0) {
+		printf("Invalid flag arg: '%s'\n", flagstr);
+		goto done;
+	}
 
 	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
 	if (vd == NULL) {
@@ -5746,10 +8112,9 @@
 		 */
 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
 		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
-		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
-		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
-		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
-		    NULL, NULL));
+		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
+		    ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
+		    ZIO_FLAG_OPTIONAL, NULL, NULL));
 	}
 
 	error = zio_wait(zio);
@@ -5760,80 +8125,43 @@
 		goto out;
 	}
 
+	uint64_t orig_lsize = lsize;
+	buf = lbuf;
 	if (flags & ZDB_FLAG_DECOMPRESS) {
-		/*
-		 * We don't know how the data was compressed, so just try
-		 * every decompress function at every inflated blocksize.
-		 */
-		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
-		int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 };
-		int *cfuncp = cfuncs;
-		uint64_t maxlsize = SPA_MAXBLOCKSIZE;
-		uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) |
-		    ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) |
-		    (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0);
-		*cfuncp++ = ZIO_COMPRESS_LZ4;
-		*cfuncp++ = ZIO_COMPRESS_LZJB;
-		mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB);
-		for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++)
-			if (((1ULL << c) & mask) == 0)
-				*cfuncp++ = c;
-
-		/*
-		 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this
-		 * could take a while and we should let the user know
-		 * we are not stuck.  On the other hand, printing progress
-		 * info gets old after a while.  User can specify 'v' flag
-		 * to see the progression.
-		 */
-		if (lsize == psize)
-			lsize += SPA_MINBLOCKSIZE;
-		else
-			maxlsize = lsize;
-		for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) {
-			for (cfuncp = cfuncs; *cfuncp; cfuncp++) {
-				if (flags & ZDB_FLAG_VERBOSE) {
-					(void) fprintf(stderr,
-					    "Trying %05llx -> %05llx (%s)\n",
-					    (u_longlong_t)psize,
-					    (u_longlong_t)lsize,
-					    zio_compress_table[*cfuncp].\
-					    ci_name);
-				}
-
-				/*
-				 * We randomize lbuf2, and decompress to both
-				 * lbuf and lbuf2. This way, we will know if
-				 * decompression fill exactly to lsize.
-				 */
-				VERIFY0(random_get_pseudo_bytes(lbuf2, lsize));
-
-				if (zio_decompress_data(*cfuncp, pabd,
-				    lbuf, psize, lsize) == 0 &&
-				    zio_decompress_data(*cfuncp, pabd,
-				    lbuf2, psize, lsize) == 0 &&
-				    bcmp(lbuf, lbuf2, lsize) == 0)
-					break;
-			}
-			if (*cfuncp != 0)
-				break;
-		}
-		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
-
-		if (lsize > maxlsize) {
+		boolean_t failed = zdb_decompress_block(pabd, buf, lbuf,
+		    lsize, psize, flags);
+		if (failed) {
 			(void) printf("Decompress of %s failed\n", thing);
 			goto out;
 		}
-		buf = lbuf;
-		if (*cfuncp == ZIO_COMPRESS_ZLE) {
-			printf("\nZLE decompression was selected. If you "
-			    "suspect the results are wrong,\ntry avoiding ZLE "
-			    "by setting and exporting ZDB_NO_ZLE=\"true\"\n");
-		}
 	} else {
 		buf = abd_borrow_buf_copy(pabd, lsize);
 		borrowed = B_TRUE;
 	}
+	/*
+	 * Try to detect invalid block pointer.  If invalid, try
+	 * decompressing.
+	 */
+	if ((flags & ZDB_FLAG_PRINT_BLKPTR || flags & ZDB_FLAG_INDIRECT) &&
+	    !(flags & ZDB_FLAG_DECOMPRESS)) {
+		const blkptr_t *b = (const blkptr_t *)(void *)
+		    ((uintptr_t)buf + (uintptr_t)blkptr_offset);
+		if (zfs_blkptr_verify(spa, b, B_FALSE, BLK_VERIFY_ONLY) ==
+		    B_FALSE) {
+			abd_return_buf_copy(pabd, buf, lsize);
+			borrowed = B_FALSE;
+			buf = lbuf;
+			boolean_t failed = zdb_decompress_block(pabd, buf,
+			    lbuf, lsize, psize, flags);
+			b = (const blkptr_t *)(void *)
+			    ((uintptr_t)buf + (uintptr_t)blkptr_offset);
+			if (failed || zfs_blkptr_verify(spa, b, B_FALSE,
+			    BLK_VERIFY_LOG) == B_FALSE) {
+				printf("invalid block pointer at this DVA\n");
+				goto out;
+			}
+		}
+	}
 
 	if (flags & ZDB_FLAG_PRINT_BLKPTR)
 		zdb_print_blkptr((blkptr_t *)(void *)
@@ -5841,8 +8169,8 @@
 	else if (flags & ZDB_FLAG_RAW)
 		zdb_dump_block_raw(buf, lsize, flags);
 	else if (flags & ZDB_FLAG_INDIRECT)
-		zdb_dump_indirect((blkptr_t *)buf, lsize / sizeof (blkptr_t),
-		    flags);
+		zdb_dump_indirect((blkptr_t *)buf,
+		    orig_lsize / sizeof (blkptr_t), flags);
 	else if (flags & ZDB_FLAG_GBH)
 		zdb_dump_gbh(buf, flags);
 	else
@@ -5855,7 +8183,7 @@
 	 */
 	if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) &&
 	    !(flags & ZDB_FLAG_GBH)) {
-		zio_t *czio, *cio;
+		zio_t *czio;
 		(void) printf("\n");
 		for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL;
 		    ck < ZIO_CHECKSUM_FUNCTIONS; ck++) {
@@ -5871,12 +8199,11 @@
 			czio->io_bp = bp;
 
 			if (vd == vd->vdev_top) {
-				cio = zio_read(czio, spa, bp, pabd, psize,
+				zio_nowait(zio_read(czio, spa, bp, pabd, psize,
 				    NULL, NULL,
 				    ZIO_PRIORITY_SYNC_READ,
 				    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
-				    ZIO_FLAG_DONT_RETRY, NULL);
-				zio_nowait(cio);
+				    ZIO_FLAG_DONT_RETRY, NULL));
 			} else {
 				zio_nowait(zio_vdev_child_io(czio, bp, vd,
 				    offset, pabd, psize, ZIO_TYPE_READ,
@@ -5915,6 +8242,8 @@
 out:
 	abd_free(pabd);
 	umem_free(lbuf, SPA_MAXBLOCKSIZE);
+done:
+	free(flagstr);
 	free(dup);
 }
 
@@ -5952,6 +8281,23 @@
 	free(buf);
 }
 
+/* check for valid hex or decimal numeric string */
+static boolean_t
+zdb_numeric(char *str)
+{
+	int i = 0;
+
+	if (strlen(str) == 0)
+		return (B_FALSE);
+	if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 2) == 0)
+		i = 2;
+	for (; i < strlen(str); i++) {
+		if (!isxdigit(str[i]))
+			return (B_FALSE);
+	}
+	return (B_TRUE);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -5964,13 +8310,15 @@
 	int error = 0;
 	char **searchdirs = NULL;
 	int nsearch = 0;
-	char *target, *target_pool;
+	char *target, *target_pool, dsname[ZFS_MAX_DATASET_NAME_LEN];
 	nvlist_t *policy = NULL;
 	uint64_t max_txg = UINT64_MAX;
+	int64_t objset_id = -1;
+	uint64_t object;
 	int flags = ZFS_IMPORT_MISSING_LOG;
 	int rewind = ZPOOL_NEVER_REWIND;
-	char *spa_config_path_env;
-	boolean_t target_is_spa = B_TRUE;
+	char *spa_config_path_env, *objset_str;
+	boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE;
 	nvlist_t *cfg = NULL;
 
 	(void) setrlimit(RLIMIT_NOFILE, &rl);
@@ -5987,8 +8335,15 @@
 	if (spa_config_path_env != NULL)
 		spa_config_path = spa_config_path_env;
 
+	/*
+	 * For performance reasons, we set this tunable down. We do so before
+	 * the arg parsing section so that the user can override this value if
+	 * they choose.
+	 */
+	zfs_btree_verify_intensity = 3;
+
 	while ((c = getopt(argc, argv,
-	    "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XY")) != -1) {
+	    "AbcCdDeEFGhiI:klLmMNo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
@@ -6002,11 +8357,15 @@
 		case 'l':
 		case 'm':
 		case 'M':
+		case 'N':
 		case 'O':
+		case 'r':
 		case 'R':
 		case 's':
 		case 'S':
 		case 'u':
+		case 'y':
+		case 'Z':
 			dump_opt[c]++;
 			dump_all = 0;
 			break;
@@ -6096,6 +8455,7 @@
 	 * ZDB does not typically re-read blocks; therefore limit the ARC
 	 * to 256 MB, which can be used entirely for metadata.
 	 */
+	zfs_arc_min = zfs_arc_meta_min = 2ULL << SPA_MAXBLOCKSHIFT;
 	zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
 #endif
 
@@ -6117,13 +8477,18 @@
 	 */
 	spa_load_verify_dryrun = B_TRUE;
 
-	kernel_init(FREAD);
+	/*
+	 * ZDB should have ability to read spacemaps.
+	 */
+	spa_mode_readable_spacemaps = B_TRUE;
+
+	kernel_init(SPA_MODE_READ);
 
 	if (dump_all)
 		verbose = MAX(verbose, 1);
 
 	for (c = 0; c < 256; c++) {
-		if (dump_all && strchr("AeEFklLOPRSX", c) == NULL)
+		if (dump_all && strchr("AeEFklLNOPrRSXy", c) == NULL)
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
@@ -6134,7 +8499,6 @@
 
 	argc -= optind;
 	argv += optind;
-
 	if (argc < 2 && dump_opt['R'])
 		usage();
 
@@ -6160,13 +8524,24 @@
 		if (argc != 2)
 			usage();
 		dump_opt['v'] = verbose + 3;
-		return (dump_path(argv[0], argv[1]));
+		return (dump_path(argv[0], argv[1], NULL));
+	}
+	if (dump_opt['r']) {
+		target_is_spa = B_FALSE;
+		if (argc != 3)
+			usage();
+		dump_opt['v'] = verbose;
+		error = dump_path(argv[0], argv[1], &object);
 	}
 
 	if (dump_opt['X'] || dump_opt['F'])
 		rewind = ZPOOL_DO_REWIND |
 		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
 
+	/* -N implies -d */
+	if (dump_opt['N'] && dump_opt['d'] == 0)
+		dump_opt['d'] = dump_opt['N'];
+
 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
 	    nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 ||
 	    nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0)
@@ -6185,6 +8560,34 @@
 		targetlen = strlen(target);
 		if (targetlen && target[targetlen - 1] == '/')
 			target[targetlen - 1] = '\0';
+		/*
+		 * See if an objset ID was supplied (-d <pool>/<objset ID>).
+		 * To disambiguate tank/100, consider the 100 as objsetID
+		 * if -N was given, otherwise 100 is an objsetID iff
+		 * tank/100 as a named dataset fails on lookup.
+		 */
+		objset_str = strchr(target, '/');
+		if (objset_str && strlen(objset_str) > 1 &&
+		    zdb_numeric(objset_str + 1)) {
+			char *endptr;
+			errno = 0;
+			objset_str++;
+			objset_id = strtoull(objset_str, &endptr, 0);
+			/* dataset 0 is the same as opening the pool */
+			if (errno == 0 && endptr != objset_str &&
+			    objset_id != 0) {
+				if (dump_opt['N'])
+					dataset_lookup = B_TRUE;
+			}
+			/* normal dataset name not an objset ID */
+			if (endptr == objset_str) {
+				objset_id = -1;
+			}
+		} else if (objset_str && !zdb_numeric(objset_str + 1) &&
+		    dump_opt['N']) {
+			printf("Supply a numeric objset ID with -N\n");
+			exit(1);
+		}
 	} else {
 		target_pool = target;
 	}
@@ -6221,6 +8624,11 @@
 		}
 	}
 
+	if (searchdirs != NULL) {
+		umem_free(searchdirs, nsearch * sizeof (char *));
+		searchdirs = NULL;
+	}
+
 	/*
 	 * import_checkpointed_state makes the assumption that the
 	 * target pool that we pass it is already part of the spa
@@ -6239,6 +8647,11 @@
 			target = checkpoint_target;
 	}
 
+	if (cfg != NULL) {
+		nvlist_free(cfg);
+		cfg = NULL;
+	}
+
 	if (target_pool != target)
 		free(target_pool);
 
@@ -6254,7 +8667,7 @@
 				    checkpoint_pool, error);
 			}
 
-		} else if (target_is_spa || dump_opt['R']) {
+		} else if (target_is_spa || dump_opt['R'] || objset_id == 0) {
 			zdb_set_skip_mmp(target);
 			error = spa_open_rewind(target, &spa, FTAG, policy,
 			    NULL);
@@ -6277,11 +8690,67 @@
 					    FTAG, policy, NULL);
 				}
 			}
+		} else if (strpbrk(target, "#") != NULL) {
+			dsl_pool_t *dp;
+			error = dsl_pool_hold(target, FTAG, &dp);
+			if (error != 0) {
+				fatal("can't dump '%s': %s", target,
+				    strerror(error));
+			}
+			error = dump_bookmark(dp, target, B_TRUE, verbose > 1);
+			dsl_pool_rele(dp, FTAG);
+			if (error != 0) {
+				fatal("can't dump '%s': %s", target,
+				    strerror(error));
+			}
+			return (error);
 		} else {
+			target_pool = strdup(target);
+			if (strpbrk(target, "/@") != NULL)
+				*strpbrk(target_pool, "/@") = '\0';
+
 			zdb_set_skip_mmp(target);
-			error = open_objset(target, DMU_OST_ANY, FTAG, &os);
+			/*
+			 * If -N was supplied, the user has indicated that
+			 * zdb -d <pool>/<objsetID> is in effect.  Otherwise
+			 * we first assume that the dataset string is the
+			 * dataset name.  If dmu_objset_hold fails with the
+			 * dataset string, and we have an objset_id, retry the
+			 * lookup with the objsetID.
+			 */
+			boolean_t retry = B_TRUE;
+retry_lookup:
+			if (dataset_lookup == B_TRUE) {
+				/*
+				 * Use the supplied id to get the name
+				 * for open_objset.
+				 */
+				error = spa_open(target_pool, &spa, FTAG);
+				if (error == 0) {
+					error = name_from_objset_id(spa,
+					    objset_id, dsname);
+					spa_close(spa, FTAG);
+					if (error == 0)
+						target = dsname;
+				}
+			}
+			if (error == 0) {
+				if (objset_id > 0 && retry) {
+					int err = dmu_objset_hold(target, FTAG,
+					    &os);
+					if (err) {
+						dataset_lookup = B_TRUE;
+						retry = B_FALSE;
+						goto retry_lookup;
+					} else {
+						dmu_objset_rele(os, FTAG);
+					}
+				}
+				error = open_objset(target, FTAG, &os);
+			}
 			if (error == 0)
 				spa = dmu_objset_spa(os);
+			free(target_pool);
 		}
 	}
 	nvlist_free(policy);
@@ -6299,22 +8768,45 @@
 
 	argv++;
 	argc--;
-	if (!dump_opt['R']) {
-		if (argc > 0) {
-			zopt_objects = argc;
-			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
-			for (unsigned i = 0; i < zopt_objects; i++) {
+	if (dump_opt['r']) {
+		error = zdb_copy_object(os, object, argv[1]);
+	} else if (!dump_opt['R']) {
+		flagbits['d'] = ZOR_FLAG_DIRECTORY;
+		flagbits['f'] = ZOR_FLAG_PLAIN_FILE;
+		flagbits['m'] = ZOR_FLAG_SPACE_MAP;
+		flagbits['z'] = ZOR_FLAG_ZAP;
+		flagbits['A'] = ZOR_FLAG_ALL_TYPES;
+
+		if (argc > 0 && dump_opt['d']) {
+			zopt_object_args = argc;
+			zopt_object_ranges = calloc(zopt_object_args,
+			    sizeof (zopt_object_range_t));
+			for (unsigned i = 0; i < zopt_object_args; i++) {
+				int err;
+				char *msg = NULL;
+
+				err = parse_object_range(argv[i],
+				    &zopt_object_ranges[i], &msg);
+				if (err != 0)
+					fatal("Bad object or range: '%s': %s\n",
+					    argv[i], msg ? msg : "");
+			}
+		} else if (argc > 0 && dump_opt['m']) {
+			zopt_metaslab_args = argc;
+			zopt_metaslab = calloc(zopt_metaslab_args,
+			    sizeof (uint64_t));
+			for (unsigned i = 0; i < zopt_metaslab_args; i++) {
 				errno = 0;
-				zopt_object[i] = strtoull(argv[i], NULL, 0);
-				if (zopt_object[i] == 0 && errno != 0)
-					fatal("bad number %s: %s",
-					    argv[i], strerror(errno));
+				zopt_metaslab[i] = strtoull(argv[i], NULL, 0);
+				if (zopt_metaslab[i] == 0 && errno != 0)
+					fatal("bad number %s: %s", argv[i],
+					    strerror(errno));
 			}
 		}
 		if (os != NULL) {
-			dump_dir(os);
-		} else if (zopt_objects > 0 && !dump_opt['m']) {
-			dump_dir(spa->spa_meta_objset);
+			dump_objset(os);
+		} else if (zopt_object_args > 0 && !dump_opt['m']) {
+			dump_objset(spa->spa_meta_objset);
 		} else {
 			dump_zpool(spa);
 		}
@@ -6338,10 +8830,11 @@
 			free(checkpoint_target);
 	}
 
-	if (os != NULL)
+	if (os != NULL) {
 		close_objset(os, FTAG);
-	else
+	} else {
 		spa_close(spa, FTAG);
+	}
 
 	fuid_table_destroy();
 

diff --git a/zfs/cmd/zdb/zdb_il.c b/zfs/cmd/zdb/zdb_il.c
index c12178e..553765b 100644
--- a/zfs/cmd/zdb/zdb_il.c
+++ b/zfs/cmd/zdb/zdb_il.c

@@ -62,9 +62,9 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_create(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_create(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_create_t *lr = arg;
+	const lr_create_t *lr = arg;
 	time_t crtime = lr->lr_crtime[0];
 	char *name, *link;
 	lr_attr_t *lrattr;
@@ -98,9 +98,9 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_remove(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_remove(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_remove_t *lr = arg;
+	const lr_remove_t *lr = arg;
 
 	(void) printf("%sdoid %llu, name %s\n", tab_prefix,
 	    (u_longlong_t)lr->lr_doid, (char *)(lr + 1));
@@ -108,9 +108,9 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_link(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_link(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_link_t *lr = arg;
+	const lr_link_t *lr = arg;
 
 	(void) printf("%sdoid %llu, link_obj %llu, name %s\n", tab_prefix,
 	    (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
@@ -119,9 +119,9 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_rename(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_rename_t *lr = arg;
+	const lr_rename_t *lr = arg;
 	char *snm = (char *)(lr + 1);
 	char *tnm = snm + strlen(snm) + 1;
 
@@ -148,11 +148,11 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_write(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_write_t *lr = arg;
+	const lr_write_t *lr = arg;
 	abd_t *data;
-	blkptr_t *bp = &lr->lr_blkptr;
+	const blkptr_t *bp = &lr->lr_blkptr;
 	zbookmark_phys_t zb;
 	int verbose = MAX(dump_opt['d'], dump_opt['i']);
 	int error;
@@ -211,9 +211,9 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_truncate(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_truncate(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_truncate_t *lr = arg;
+	const lr_truncate_t *lr = arg;
 
 	(void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", tab_prefix,
 	    (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
@@ -222,9 +222,9 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_setattr(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_setattr(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_setattr_t *lr = arg;
+	const lr_setattr_t *lr = arg;
 	time_t atime = (time_t)lr->lr_atime[0];
 	time_t mtime = (time_t)lr->lr_mtime[0];
 
@@ -268,15 +268,15 @@
 
 /* ARGSUSED */
 static void
-zil_prt_rec_acl(zilog_t *zilog, int txtype, void *arg)
+zil_prt_rec_acl(zilog_t *zilog, int txtype, const void *arg)
 {
-	lr_acl_t *lr = arg;
+	const lr_acl_t *lr = arg;
 
 	(void) printf("%sfoid %llu, aclcnt %llu\n", tab_prefix,
 	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
 }
 
-typedef void (*zil_prt_rec_func_t)(zilog_t *, int, void *);
+typedef void (*zil_prt_rec_func_t)(zilog_t *, int, const void *);
 typedef struct zil_rec_info {
 	zil_prt_rec_func_t	zri_print;
 	const char		*zri_name;
@@ -309,7 +309,7 @@
 
 /* ARGSUSED */
 static int
-print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
+print_log_record(zilog_t *zilog, const lr_t *lr, void *arg, uint64_t claim_txg)
 {
 	int txtype;
 	int verbose = MAX(dump_opt['d'], dump_opt['i']);
@@ -343,7 +343,8 @@
 
 /* ARGSUSED */
 static int
-print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
+print_log_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
+    uint64_t claim_txg)
 {
 	char blkbuf[BP_SPRINTF_LEN + 10];
 	int verbose = MAX(dump_opt['d'], dump_opt['i']);

diff --git a/zfs/cmd/zed/Makefile.am b/zfs/cmd/zed/Makefile.am
index fb479f9..7b66299 100644
--- a/zfs/cmd/zed/Makefile.am
+++ b/zfs/cmd/zed/Makefile.am

@@ -1,10 +1,10 @@
-SUBDIRS = zed.d
-
 include $(top_srcdir)/config/Rules.am
+include $(top_srcdir)/config/Shellcheck.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+AM_CFLAGS += $(LIBUDEV_CFLAGS) $(LIBUUID_CFLAGS)
+
+SUBDIRS = zed.d
+SHELLCHECKDIRS = $(SUBDIRS)
 
 sbin_PROGRAMS = zed
 
@@ -40,9 +40,14 @@
 zed_SOURCES = $(ZED_SRC) $(FMA_SRC)
 
 zed_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libuutil/libuutil.la \
-	$(top_builddir)/lib/libzfs/libzfs.la
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libuutil/libuutil.la
 
-zed_LDADD += -lrt
+zed_LDADD += -lrt $(LIBATOMIC_LIBS) $(LIBUDEV_LIBS) $(LIBUUID_LIBS)
 zed_LDFLAGS = -pthread
+
+EXTRA_DIST = agents/README.md
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zed/agents/fmd_api.c b/zfs/cmd/zed/agents/fmd_api.c
index 607b387..f6cb763 100644
--- a/zfs/cmd/zed/agents/fmd_api.c
+++ b/zfs/cmd/zed/agents/fmd_api.c

@@ -599,6 +599,7 @@
 	sev.sigev_notify_function = _timer_notify;
 	sev.sigev_notify_attributes = NULL;
 	sev.sigev_value.sival_ptr = ftp;
+	sev.sigev_signo = 0;
 
 	timer_create(CLOCK_REALTIME, &sev, &ftp->ft_tid);
 	timer_settime(ftp->ft_tid, 0, &its, NULL);

diff --git a/zfs/cmd/zed/agents/zfs_agents.c b/zfs/cmd/zed/agents/zfs_agents.c
index 006e0ab..e148ae5 100644
--- a/zfs/cmd/zed/agents/zfs_agents.c
+++ b/zfs/cmd/zed/agents/zfs_agents.c

@@ -13,6 +13,7 @@
 /*
  * Copyright (c) 2016, Intel Corporation.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
+ * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
  */
 
 #include <libnvpair.h>
@@ -79,6 +80,7 @@
 	char *path = NULL;
 	uint_t c, children;
 	nvlist_t **child;
+	uint64_t vdev_guid;
 
 	/*
 	 * First iterate over any children.
@@ -99,7 +101,7 @@
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
-				gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
+				gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
 				return (B_TRUE);
 			}
 		}
@@ -108,7 +110,7 @@
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
-				gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
+				gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
 				return (B_TRUE);
 			}
 		}
@@ -125,6 +127,21 @@
 		    &gsp->gs_vdev_expandtime);
 		return (B_TRUE);
 	}
+	/*
+	 * Otherwise, on a vdev guid match, grab the devid and expansion
+	 * time. The devid might be missing on removal since its not part
+	 * of blkid cache and L2ARC VDEV does not contain pool guid in its
+	 * blkid, so this is a special case for L2ARC VDEV.
+	 */
+	else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL &&
+	    nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
+	    gsp->gs_vdev_guid == vdev_guid) {
+		(void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
+		    &gsp->gs_devid);
+		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
+		    &gsp->gs_vdev_expandtime);
+		return (B_TRUE);
+	}
 
 	return (B_FALSE);
 }
@@ -147,13 +164,13 @@
 	/*
 	 * if a match was found then grab the pool guid
 	 */
-	if (gsp->gs_vdev_guid) {
+	if (gsp->gs_vdev_guid && gsp->gs_devid) {
 		(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &gsp->gs_pool_guid);
 	}
 
 	zpool_close(zhp);
-	return (gsp->gs_vdev_guid != 0);
+	return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0);
 }
 
 void
@@ -177,10 +194,12 @@
 	}
 
 	/*
-	 * On ZFS on Linux, we don't get the expected FM_RESOURCE_REMOVED
-	 * ereport from vdev_disk layer after a hot unplug. Fortunately we
-	 * get a EC_DEV_REMOVE from our disk monitor and it is a suitable
+	 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
+	 * from the vdev_disk layer after a hot unplug. Fortunately we do
+	 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
 	 * proxy so we remap it here for the benefit of the diagnosis engine.
+	 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
+	 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
 	 */
 	if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
 	    (strcmp(subclass, ESC_DISK) == 0) &&
@@ -192,11 +211,13 @@
 		uint64_t pool_guid = 0, vdev_guid = 0;
 		guid_search_t search = { 0 };
 		device_type_t devtype = DEVICE_TYPE_PRIMARY;
+		char *devid = NULL;
 
 		class = "resource.fs.zfs.removed";
 		subclass = "";
 
 		(void) nvlist_add_string(payload, FM_CLASS, class);
+		(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
 
@@ -206,15 +227,25 @@
 		(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
 
 		/*
+		 * If devid is missing but vdev_guid is available, find devid
+		 * and pool_guid from vdev_guid.
 		 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
 		 * ZFS_EV_POOL_GUID may be missing so find them.
 		 */
-		(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
-		    &search.gs_devid);
-		(void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
-		pool_guid = search.gs_pool_guid;
-		vdev_guid = search.gs_vdev_guid;
-		devtype = search.gs_vdev_type;
+		if (devid == NULL || pool_guid == 0 || vdev_guid == 0) {
+			if (devid == NULL)
+				search.gs_vdev_guid = vdev_guid;
+			else
+				search.gs_devid = devid;
+			zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
+			if (devid == NULL)
+				devid = search.gs_devid;
+			if (pool_guid == 0)
+				pool_guid = search.gs_pool_guid;
+			if (vdev_guid == 0)
+				vdev_guid = search.gs_vdev_guid;
+			devtype = search.gs_vdev_type;
+		}
 
 		/*
 		 * We want to avoid reporting "remove" events coming from
@@ -226,7 +257,9 @@
 		    search.gs_vdev_expandtime + 10 > tv.tv_sec) {
 			zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
 			    "for recently expanded device '%s'", EC_DEV_REMOVE,
-			    search.gs_devid);
+			    devid);
+			fnvlist_free(payload);
+			free(event);
 			goto out;
 		}
 
@@ -383,6 +416,7 @@
 		list_destroy(&agent_events);
 		zed_log_die("Failed to initialize agents");
 	}
+	pthread_setname_np(g_agents_tid, "agents");
 }
 
 void

diff --git a/zfs/cmd/zed/agents/zfs_diagnosis.c b/zfs/cmd/zed/agents/zfs_diagnosis.c
index 0b27f67..9f646f9 100644
--- a/zfs/cmd/zed/agents/zfs_diagnosis.c
+++ b/zfs/cmd/zed/agents/zfs_diagnosis.c

@@ -35,6 +35,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/fm/protocol.h>
 #include <sys/fm/fs/zfs.h>
+#include <sys/zio.h>
 
 #include "zfs_agents.h"
 #include "fmd_api.h"
@@ -773,6 +774,8 @@
 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) {
 		char *failmode = NULL;
 		boolean_t checkremove = B_FALSE;
+		uint32_t pri = 0;
+		int32_t flags = 0;
 
 		/*
 		 * If this is a checksum or I/O error, then toss it into the
@@ -795,6 +798,23 @@
 				checkremove = B_TRUE;
 		} else if (fmd_nvl_class_match(hdl, nvl,
 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
+			/*
+			 * We ignore ereports for checksum errors generated by
+			 * scrub/resilver I/O to avoid potentially further
+			 * degrading the pool while it's being repaired.
+			 */
+			if (((nvlist_lookup_uint32(nvl,
+			    FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY, &pri) == 0) &&
+			    (pri == ZIO_PRIORITY_SCRUB ||
+			    pri == ZIO_PRIORITY_REBUILD)) ||
+			    ((nvlist_lookup_int32(nvl,
+			    FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags) == 0) &&
+			    (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) {
+				fmd_hdl_debug(hdl, "ignoring '%s' for "
+				    "scrub/resilver I/O", class);
+				return;
+			}
+
 			if (zcp->zc_data.zc_serd_checksum[0] == '\0') {
 				zfs_serd_name(zcp->zc_data.zc_serd_checksum,
 				    pool_guid, vdev_guid, "checksum");

diff --git a/zfs/cmd/zed/agents/zfs_mod.c b/zfs/cmd/zed/agents/zfs_mod.c
index d980794..8dd75e0 100644
--- a/zfs/cmd/zed/agents/zfs_mod.c
+++ b/zfs/cmd/zed/agents/zfs_mod.c

@@ -63,13 +63,10 @@
  * If the device could not be replaced, then the second online attempt will
  * trigger the FMA fault that we skipped earlier.
  *
- * ZFS on Linux porting notes:
- *	Linux udev provides a disk insert for both the disk and the partition
- *
+ * On Linux udev provides a disk insert for both the disk and the partition.
  */
 
 #include <ctype.h>
-#include <devid.h>
 #include <fcntl.h>
 #include <libnvpair.h>
 #include <libzfs.h>
@@ -186,14 +183,16 @@
 	nvlist_t *nvroot, *newvd;
 	pendingdev_t *device;
 	uint64_t wholedisk = 0ULL;
-	uint64_t offline = 0ULL;
+	uint64_t offline = 0ULL, faulted = 0ULL;
 	uint64_t guid = 0ULL;
+	uint64_t is_spare = 0;
 	char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
 	char devpath[PATH_MAX];
 	int ret;
-	int is_dm = 0;
-	int is_sd = 0;
+	int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
+	boolean_t is_sd = B_FALSE;
+	boolean_t is_mpath_wholedisk = B_FALSE;
 	uint_t c;
 	vdev_stat_t *vs;
 
@@ -214,15 +213,74 @@
 	    &enc_sysfs_path);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
+	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);
+
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);
+	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_IS_SPARE, &is_spare);
 
-	if (offline)
-		return;  /* don't intervene if it was taken offline */
+	/*
+	 * Special case:
+	 *
+	 * We've seen times where a disk won't have a ZPOOL_CONFIG_PHYS_PATH
+	 * entry in their config. For example, on this force-faulted disk:
+	 *
+	 *	children[0]:
+	 *	   type: 'disk'
+	 *	   id: 0
+	 *	   guid: 14309659774640089719
+	 *        path: '/dev/disk/by-vdev/L28'
+	 *        whole_disk: 0
+	 *        DTL: 654
+	 *        create_txg: 4
+	 *        com.delphix:vdev_zap_leaf: 1161
+	 *        faulted: 1
+	 *        aux_state: 'external'
+	 *	children[1]:
+	 *        type: 'disk'
+	 *        id: 1
+	 *        guid: 16002508084177980912
+	 *        path: '/dev/disk/by-vdev/L29'
+	 *        devid: 'dm-uuid-mpath-35000c500a61d68a3'
+	 *        phys_path: 'L29'
+	 *        vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
+	 *        whole_disk: 0
+	 *        DTL: 1028
+	 *        create_txg: 4
+	 *        com.delphix:vdev_zap_leaf: 131
+	 *
+	 * If the disk's path is a /dev/disk/by-vdev/ path, then we can infer
+	 * the ZPOOL_CONFIG_PHYS_PATH from the by-vdev disk name.
+	 */
+	if (physpath == NULL && path != NULL) {
+		/* If path begins with "/dev/disk/by-vdev/" ... */
+		if (strncmp(path, DEV_BYVDEV_PATH,
+		    strlen(DEV_BYVDEV_PATH)) == 0) {
+			/* Set physpath to the char after "/dev/disk/by-vdev" */
+			physpath = &path[strlen(DEV_BYVDEV_PATH)];
+		}
+	}
 
-	is_dm = zfs_dev_is_dm(path);
+	/*
+	 * We don't want to autoreplace offlined disks.  However, we do want to
+	 * replace force-faulted disks (`zpool offline -f`).  Force-faulted
+	 * disks have both offline=1 and faulted=1 in the nvlist.
+	 */
+	if (offline && !faulted) {
+		zed_log_msg(LOG_INFO, "%s: %s is offline, skip autoreplace",
+		    __func__, path);
+		return;
+	}
+
+	is_mpath_wholedisk = is_mpath_whole_disk(path);
 	zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
-	    " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
-	    physpath ? physpath : "NULL", wholedisk, is_dm,
+	    " %s blank disk, %s mpath blank disk, %s labeled, enc sysfs '%s', "
+	    "(guid %llu)",
+	    zpool_get_name(zhp), path,
+	    physpath ? physpath : "NULL",
+	    wholedisk ? "is" : "not",
+	    is_mpath_wholedisk? "is" : "not",
+	    labeled ? "is" : "not",
+	    enc_sysfs_path,
 	    (long long unsigned int)guid);
 
 	/*
@@ -249,15 +307,18 @@
 		}
 	}
 
+	if (is_spare)
+		online_flag |= ZFS_ONLINE_SPARE;
+
 	/*
 	 * Attempt to online the device.
 	 */
-	if (zpool_vdev_online(zhp, fullpath,
-	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
+	if (zpool_vdev_online(zhp, fullpath, online_flag, &newstate) == 0 &&
 	    (newstate == VDEV_STATE_HEALTHY ||
 	    newstate == VDEV_STATE_DEGRADED)) {
-		zed_log_msg(LOG_INFO, "  zpool_vdev_online: vdev %s is %s",
-		    fullpath, (newstate == VDEV_STATE_HEALTHY) ?
+		zed_log_msg(LOG_INFO,
+		    "  zpool_vdev_online: vdev '%s' ('%s') is "
+		    "%s", fullpath, physpath, (newstate == VDEV_STATE_HEALTHY) ?
 		    "HEALTHY" : "DEGRADED");
 		return;
 	}
@@ -267,18 +328,19 @@
 	 * testing)
 	 */
 	if (physpath != NULL && strcmp("scsidebug", physpath) == 0)
-		is_sd = 1;
+		is_sd = B_TRUE;
 
 	/*
 	 * If the pool doesn't have the autoreplace property set, then use
 	 * vdev online to trigger a FMA fault by posting an ereport.
 	 */
 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
-	    !(wholedisk || is_dm) || (physpath == NULL)) {
+	    !(wholedisk || is_mpath_wholedisk) || (physpath == NULL)) {
 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
 		    &newstate);
 		zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
-		    "not a whole disk for '%s'", fullpath);
+		    "not a blank disk for '%s' ('%s')", fullpath,
+		    physpath);
 		return;
 	}
 
@@ -290,7 +352,7 @@
 	(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
 	    is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
 
-	if (realpath(rawpath, devpath) == NULL && !is_dm) {
+	if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
 		zed_log_msg(LOG_INFO, "  realpath: %s failed (%s)",
 		    rawpath, strerror(errno));
 
@@ -306,12 +368,14 @@
 	if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
 	    (vs->vs_state != VDEV_STATE_FAULTED) &&
 	    (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
+		zed_log_msg(LOG_INFO, "  not autoreplacing since disk isn't in "
+		    "a bad state (currently %d)", vs->vs_state);
 		return;
 	}
 
 	nvlist_lookup_string(vdev, "new_devid", &new_devid);
 
-	if (is_dm) {
+	if (is_mpath_wholedisk) {
 		/* Don't label device mapper or multipath disks. */
 	} else if (!labeled) {
 		/*
@@ -438,7 +502,15 @@
 		return;
 	}
 
-	ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
+	/*
+	 * Prefer sequential resilvering when supported (mirrors and dRAID),
+	 * otherwise fallback to a traditional healing resilver.
+	 */
+	ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_TRUE);
+	if (ret != 0) {
+		ret = zpool_vdev_attach(zhp, fullpath, path, nvroot,
+		    B_TRUE, B_FALSE);
+	}
 
 	zed_log_msg(LOG_INFO, "  zpool_vdev_replace: %s with %s (%s)",
 	    fullpath, path, (ret == 0) ? "no errors" :
@@ -458,7 +530,9 @@
 	boolean_t		dd_islabeled;
 	uint64_t		dd_pool_guid;
 	uint64_t		dd_vdev_guid;
+	uint64_t		dd_new_vdev_guid;
 	const char		*dd_new_devid;
+	uint64_t		dd_num_spares;
 } dev_data_t;
 
 static void
@@ -468,6 +542,8 @@
 	char *path = NULL;
 	uint_t c, children;
 	nvlist_t **child;
+	uint64_t guid = 0;
+	uint64_t isspare = 0;
 
 	/*
 	 * First iterate over any children.
@@ -493,19 +569,16 @@
 	}
 
 	/* once a vdev was matched and processed there is nothing left to do */
-	if (dp->dd_found)
+	if (dp->dd_found && dp->dd_num_spares == 0)
 		return;
+	(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &guid);
 
 	/*
 	 * Match by GUID if available otherwise fallback to devid or physical
 	 */
 	if (dp->dd_vdev_guid != 0) {
-		uint64_t guid;
-
-		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
-		    &guid) != 0 || guid != dp->dd_vdev_guid) {
+		if (guid != dp->dd_vdev_guid)
 			return;
-		}
 		zed_log_msg(LOG_INFO, "  zfs_iter_vdev: matched on %llu", guid);
 		dp->dd_found = B_TRUE;
 
@@ -515,10 +588,23 @@
 		 * illumos, substring matching is not required to accommodate
 		 * the partition suffix. An exact match will be present in
 		 * the dp->dd_compare value.
+		 * If the attached disk already contains a vdev GUID, it means
+		 * the disk is not clean. In such a scenario, the physical path
+		 * would be a match that makes the disk faulted when trying to
+		 * online it. So, we would only want to proceed if either GUID
+		 * matches with the last attached disk or the disk is in clean
+		 * state.
 		 */
 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
-		    strcmp(dp->dd_compare, path) != 0)
+		    strcmp(dp->dd_compare, path) != 0) {
 			return;
+		}
+		if (dp->dd_new_vdev_guid != 0 && dp->dd_new_vdev_guid != guid) {
+			zed_log_msg(LOG_INFO, "  %s: no match (GUID:%llu"
+			    " != vdev GUID:%llu)", __func__,
+			    dp->dd_new_vdev_guid, guid);
+			return;
+		}
 
 		zed_log_msg(LOG_INFO, "  zfs_iter_vdev: matched %s on %s",
 		    dp->dd_prop, path);
@@ -531,10 +617,14 @@
 		}
 	}
 
+	if (dp->dd_found == B_TRUE && nvlist_lookup_uint64(nvl,
+	    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
+		dp->dd_num_spares++;
+
 	(dp->dd_func)(zhp, nvl, dp->dd_islabeled);
 }
 
-void
+static void
 zfs_enable_ds(void *arg)
 {
 	unavailpool_t *pool = (unavailpool_t *)arg;
@@ -566,6 +656,8 @@
 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
 			zfs_iter_vdev(zhp, nvl, data);
 		}
+	} else {
+		zed_log_msg(LOG_INFO, "%s: no config\n", __func__);
 	}
 
 	/*
@@ -589,7 +681,9 @@
 	}
 
 	zpool_close(zhp);
-	return (dp->dd_found);	/* cease iteration after a match */
+
+	/* cease iteration after a match */
+	return (dp->dd_found && dp->dd_num_spares == 0);
 }
 
 /*
@@ -598,7 +692,7 @@
  */
 static boolean_t
 devphys_iter(const char *physical, const char *devid, zfs_process_func_t func,
-    boolean_t is_slice)
+    boolean_t is_slice, uint64_t new_vdev_guid)
 {
 	dev_data_t data = { 0 };
 
@@ -608,6 +702,73 @@
 	data.dd_found = B_FALSE;
 	data.dd_islabeled = is_slice;
 	data.dd_new_devid = devid;	/* used by auto replace code */
+	data.dd_new_vdev_guid = new_vdev_guid;
+
+	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
+
+	return (data.dd_found);
+}
+
+/*
+ * Given a device identifier, find any vdevs with a matching by-vdev
+ * path.  Normally we shouldn't need this as the comparison would be
+ * made earlier in the devphys_iter().  For example, if we were replacing
+ * /dev/disk/by-vdev/L28, normally devphys_iter() would match the
+ * ZPOOL_CONFIG_PHYS_PATH of "L28" from the old disk config to "L28"
+ * of the new disk config.  However, we've seen cases where
+ * ZPOOL_CONFIG_PHYS_PATH was not in the config for the old disk.  Here's
+ * an example of a real 2-disk mirror pool where one disk was force
+ * faulted:
+ *
+ *       com.delphix:vdev_zap_top: 129
+ *           children[0]:
+ *               type: 'disk'
+ *               id: 0
+ *               guid: 14309659774640089719
+ *               path: '/dev/disk/by-vdev/L28'
+ *               whole_disk: 0
+ *               DTL: 654
+ *               create_txg: 4
+ *               com.delphix:vdev_zap_leaf: 1161
+ *               faulted: 1
+ *               aux_state: 'external'
+ *           children[1]:
+ *               type: 'disk'
+ *               id: 1
+ *               guid: 16002508084177980912
+ *               path: '/dev/disk/by-vdev/L29'
+ *               devid: 'dm-uuid-mpath-35000c500a61d68a3'
+ *               phys_path: 'L29'
+ *               vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
+ *               whole_disk: 0
+ *               DTL: 1028
+ *               create_txg: 4
+ *               com.delphix:vdev_zap_leaf: 131
+ *
+ * So in the case above, the only thing we could compare is the path.
+ *
+ * We can do this because we assume by-vdev paths are authoritative as physical
+ * paths.  We could not assume this for normal paths like /dev/sda since the
+ * physical location /dev/sda points to could change over time.
+ */
+static boolean_t
+by_vdev_path_iter(const char *by_vdev_path, const char *devid,
+    zfs_process_func_t func, boolean_t is_slice)
+{
+	dev_data_t data = { 0 };
+
+	data.dd_compare = by_vdev_path;
+	data.dd_func = func;
+	data.dd_prop = ZPOOL_CONFIG_PATH;
+	data.dd_found = B_FALSE;
+	data.dd_islabeled = is_slice;
+	data.dd_new_devid = devid;
+
+	if (strncmp(by_vdev_path, DEV_BYVDEV_PATH,
+	    strlen(DEV_BYVDEV_PATH)) != 0) {
+		/* by_vdev_path doesn't start with "/dev/disk/by-vdev/" */
+		return (B_FALSE);
+	}
 
 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 
@@ -636,6 +797,27 @@
 }
 
 /*
+ * Given a device guid, find any vdevs with a matching guid.
+ */
+static boolean_t
+guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
+    zfs_process_func_t func, boolean_t is_slice)
+{
+	dev_data_t data = { 0 };
+
+	data.dd_func = func;
+	data.dd_found = B_FALSE;
+	data.dd_pool_guid = pool_guid;
+	data.dd_vdev_guid = vdev_guid;
+	data.dd_islabeled = is_slice;
+	data.dd_new_devid = devid;
+
+	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
+
+	return (data.dd_found);
+}
+
+/*
  * Handle a EC_DEV_ADD.ESC_DISK event.
  *
  * illumos
@@ -657,16 +839,21 @@
 static int
 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
 {
-	char *devpath = NULL, *devid;
+	char *devpath = NULL, *devid = NULL;
+	uint64_t pool_guid = 0, vdev_guid = 0;
 	boolean_t is_slice;
 
 	/*
-	 * Expecting a devid string and an optional physical location
+	 * Expecting a devid string and an optional physical location and guid
 	 */
-	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
+	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0) {
+		zed_log_msg(LOG_INFO, "%s: no dev identifier\n", __func__);
 		return (-1);
+	}
 
 	(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
+	(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
+	(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
 
 	is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
 
@@ -677,12 +864,28 @@
 	 * Iterate over all vdevs looking for a match in the following order:
 	 * 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
 	 * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
-	 *
-	 * For disks, we only want to pay attention to vdevs marked as whole
-	 * disks or are a multipath device.
+	 * 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
+	 * 4. ZPOOL_CONFIG_PATH for /dev/disk/by-vdev devices only (since
+	 *    by-vdev paths represent physical paths).
 	 */
-	if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
-		(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
+	if (devid_iter(devid, zfs_process_add, is_slice))
+		return (0);
+	if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
+	    is_slice, vdev_guid))
+		return (0);
+	if (vdev_guid != 0)
+		(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
+		    is_slice);
+
+	if (devpath != NULL) {
+		/* Can we match a /dev/disk/by-vdev/ path? */
+		char by_vdev_path[MAXPATHLEN];
+		snprintf(by_vdev_path, sizeof (by_vdev_path),
+		    "/dev/disk/by-vdev/%s", devpath);
+		if (by_vdev_path_iter(by_vdev_path, devid, zfs_process_add,
+		    is_slice))
+			return (0);
+	}
 
 	return (0);
 }
@@ -714,21 +917,96 @@
 	return (0);
 }
 
+/*
+ * Given a path to a vdev, lookup the vdev's physical size from its
+ * config nvlist.
+ *
+ * Returns the vdev's physical size in bytes on success, 0 on error.
+ */
+static uint64_t
+vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+	nvlist_t *nvl = NULL;
+	boolean_t avail_spare, l2cache, log;
+	vdev_stat_t *vs = NULL;
+	uint_t c;
+
+	nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+	if (!nvl)
+		return (0);
+
+	verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+	if (!vs) {
+		zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
+		    vdev_path);
+		return (0);
+	}
+
+	return (vs->vs_pspace);
+}
+
+/*
+ * Given a path to a vdev, lookup if the vdev is a "whole disk" in the
+ * config nvlist.  "whole disk" means that ZFS was passed a whole disk
+ * at pool creation time, which it partitioned up and has full control over.
+ * Thus a partition with wholedisk=1 set tells us that zfs created the
+ * partition at creation time.  A partition without whole disk set would have
+ * been created by externally (like with fdisk) and passed to ZFS.
+ *
+ * Returns the whole disk value (either 0 or 1).
+ */
+static uint64_t
+vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+	nvlist_t *nvl = NULL;
+	boolean_t avail_spare, l2cache, log;
+	uint64_t wholedisk = 0;
+
+	nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+	if (!nvl)
+		return (0);
+
+	(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
+
+	return (wholedisk);
+}
+
+/*
+ * If the device size grew more than 1% then return true.
+ */
+#define	DEVICE_GREW(oldsize, newsize) \
+		    ((newsize > oldsize) && \
+		    ((newsize / (newsize - oldsize)) <= 100))
+
 static int
 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 {
-	char *devname = data;
 	boolean_t avail_spare, l2cache;
+	nvlist_t *udev_nvl = data;
 	nvlist_t *tgt;
 	int error;
 
+	char *tmp_devname, devname[MAXPATHLEN] = "";
+	uint64_t guid;
+
+	if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
+		sprintf(devname, "%llu", (u_longlong_t)guid);
+	} else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
+	    &tmp_devname) == 0) {
+		strlcpy(devname, tmp_devname, MAXPATHLEN);
+		zfs_append_partition(devname, MAXPATHLEN);
+	} else {
+		zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
+	}
+
 	zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
 	    devname, zpool_get_name(zhp));
 
 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
 	    &avail_spare, &l2cache, NULL)) != NULL) {
 		char *path, fullpath[MAXPATHLEN];
-		uint64_t wholedisk;
+		uint64_t wholedisk = 0;
 
 		error = nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path);
 		if (error) {
@@ -736,10 +1014,8 @@
 			return (0);
 		}
 
-		error = nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
+		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
 		    &wholedisk);
-		if (error)
-			wholedisk = 0;
 
 		if (wholedisk) {
 			path = strrchr(path, '/');
@@ -773,12 +1049,75 @@
 			vdev_state_t newstate;
 
 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
-				error = zpool_vdev_online(zhp, fullpath, 0,
-				    &newstate);
-				zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
-				    "setting device '%s' to ONLINE state "
-				    "in pool '%s': %d", fullpath,
-				    zpool_get_name(zhp), error);
+				/*
+				 * If this disk size has not changed, then
+				 * there's no need to do an autoexpand.  To
+				 * check we look at the disk's size in its
+				 * config, and compare it to the disk size
+				 * that udev is reporting.
+				 */
+				uint64_t udev_size = 0, conf_size = 0,
+				    wholedisk = 0, udev_parent_size = 0;
+
+				/*
+				 * Get the size of our disk that udev is
+				 * reporting.
+				 */
+				if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
+				    &udev_size) != 0) {
+					udev_size = 0;
+				}
+
+				/*
+				 * Get the size of our disk's parent device
+				 * from udev (where sda1's parent is sda).
+				 */
+				if (nvlist_lookup_uint64(udev_nvl,
+				    DEV_PARENT_SIZE, &udev_parent_size) != 0) {
+					udev_parent_size = 0;
+				}
+
+				conf_size = vdev_size_from_config(zhp,
+				    fullpath);
+
+				wholedisk = vdev_whole_disk_from_config(zhp,
+				    fullpath);
+
+				/*
+				 * Only attempt an autoexpand if the vdev size
+				 * changed.  There are two different cases
+				 * to consider.
+				 *
+				 * 1. wholedisk=1
+				 * If you do a 'zpool create' on a whole disk
+				 * (like /dev/sda), then zfs will create
+				 * partitions on the disk (like /dev/sda1).  In
+				 * that case, wholedisk=1 will be set in the
+				 * partition's nvlist config.  So zed will need
+				 * to see if your parent device (/dev/sda)
+				 * expanded in size, and if so, then attempt
+				 * the autoexpand.
+				 *
+				 * 2. wholedisk=0
+				 * If you do a 'zpool create' on an existing
+				 * partition, or a device that doesn't allow
+				 * partitions, then wholedisk=0, and you will
+				 * simply need to check if the device itself
+				 * expanded in size.
+				 */
+				if (DEVICE_GREW(conf_size, udev_size) ||
+				    (wholedisk && DEVICE_GREW(conf_size,
+				    udev_parent_size))) {
+					error = zpool_vdev_online(zhp, fullpath,
+					    0, &newstate);
+
+					zed_log_msg(LOG_INFO,
+					    "%s: autoexpanding '%s' from %llu"
+					    " to %llu bytes in pool '%s': %d",
+					    __func__, fullpath, conf_size,
+					    MAX(udev_size, udev_parent_size),
+					    zpool_get_name(zhp), error);
+				}
 			}
 		}
 		zpool_close(zhp);
@@ -806,10 +1145,11 @@
 		strlcpy(name, devname, MAXPATHLEN);
 		zfs_append_partition(name, MAXPATHLEN);
 	} else {
+		sprintf(name, "unknown");
 		zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
 	}
 
-	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
+	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
 		zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
 		    "found", name);
 		return (1);
@@ -895,7 +1235,7 @@
  * For now, each agent has its own libzfs instance
  */
 int
-zfs_slm_init()
+zfs_slm_init(void)
 {
 	if ((g_zfshdl = libzfs_init()) == NULL)
 		return (-1);
@@ -913,6 +1253,7 @@
 		return (-1);
 	}
 
+	pthread_setname_np(g_zfs_tid, "enum-pools");
 	list_create(&g_device_list, sizeof (struct pendingdev),
 	    offsetof(struct pendingdev, pd_node));
 
@@ -920,7 +1261,7 @@
 }
 
 void
-zfs_slm_fini()
+zfs_slm_fini(void)
 {
 	unavailpool_t *pool;
 	pendingdev_t *device;

diff --git a/zfs/cmd/zed/agents/zfs_retire.c b/zfs/cmd/zed/agents/zfs_retire.c
index c37b56a..b2b28ef 100644
--- a/zfs/cmd/zed/agents/zfs_retire.c
+++ b/zfs/cmd/zed/agents/zfs_retire.c

@@ -40,6 +40,7 @@
 #include <sys/fm/fs/zfs.h>
 #include <libzfs.h>
 #include <string.h>
+#include <libgen.h>
 
 #include "zfs_agents.h"
 #include "fmd_api.h"
@@ -74,6 +75,8 @@
 	uint64_t	cb_guid;
 	zpool_handle_t	*cb_zhp;
 	nvlist_t	*cb_vdev;
+	uint64_t	cb_vdev_guid;
+	uint64_t	cb_num_spares;
 } find_cbdata_t;
 
 static int
@@ -139,6 +142,64 @@
 	return (NULL);
 }
 
+static int
+remove_spares(zpool_handle_t *zhp, void *data)
+{
+	nvlist_t *config, *nvroot;
+	nvlist_t **spares;
+	uint_t nspares;
+	char *devname;
+	find_cbdata_t *cbp = data;
+	uint64_t spareguid = 0;
+	vdev_stat_t *vs;
+	unsigned int c;
+
+	config = zpool_get_config(zhp, NULL);
+	if (nvlist_lookup_nvlist(config,
+	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) {
+		zpool_close(zhp);
+		return (0);
+	}
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) != 0) {
+		zpool_close(zhp);
+		return (0);
+	}
+
+	for (int i = 0; i < nspares; i++) {
+		if (nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID,
+		    &spareguid) == 0 && spareguid == cbp->cb_vdev_guid) {
+			devname = zpool_vdev_name(NULL, zhp, spares[i],
+			    B_FALSE);
+			nvlist_lookup_uint64_array(spares[i],
+			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c);
+			if (vs->vs_state != VDEV_STATE_REMOVED &&
+			    zpool_vdev_remove_wanted(zhp, devname) == 0)
+				cbp->cb_num_spares++;
+			break;
+		}
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Given a vdev guid, find and remove all spares associated with it.
+ */
+static int
+find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid)
+{
+	find_cbdata_t cb;
+
+	cb.cb_num_spares = 0;
+	cb.cb_vdev_guid = vdev_guid;
+	zpool_iter(zhdl, remove_spares, &cb);
+
+	return (cb.cb_num_spares);
+}
+
 /*
  * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
  */
@@ -219,12 +280,18 @@
 	 * replace it.
 	 */
 	for (s = 0; s < nspares; s++) {
-		char *spare_name;
+		boolean_t rebuild = B_FALSE;
+		char *spare_name, *type;
 
 		if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
 		    &spare_name) != 0)
 			continue;
 
+		/* prefer sequential resilvering for distributed spares */
+		if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE,
+		    &type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)
+			rebuild = B_TRUE;
+
 		/* if set, add the "ashift" pool property to the spare nvlist */
 		if (source != ZPROP_SRC_DEFAULT)
 			(void) nvlist_add_uint64(spares[s],
@@ -237,7 +304,7 @@
 		    dev_name, basename(spare_name));
 
 		if (zpool_vdev_attach(zhp, dev_name, spare_name,
-		    replacement, B_TRUE) == 0) {
+		    replacement, B_TRUE, rebuild) == 0) {
 			free(dev_name);
 			nvlist_free(replacement);
 			return (B_TRUE);
@@ -308,6 +375,8 @@
 	libzfs_handle_t *zhdl = zdp->zrd_hdl;
 	boolean_t fault_device, degrade_device;
 	boolean_t is_repair;
+	boolean_t l2arc = B_FALSE;
+	boolean_t spare = B_FALSE;
 	char *scheme;
 	nvlist_t *vdev = NULL;
 	char *uuid;
@@ -316,22 +385,49 @@
 	boolean_t is_disk;
 	vdev_aux_t aux;
 	uint64_t state = 0;
+	vdev_stat_t *vs;
+	unsigned int c;
 
 	fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class);
 
+	nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, &state);
+
 	/*
 	 * If this is a resource notifying us of device removal then simply
 	 * check for an available spare and continue unless the device is a
 	 * l2arc vdev, in which case we just offline it.
 	 */
-	if (strcmp(class, "resource.fs.zfs.removed") == 0) {
+	if (strcmp(class, "resource.fs.zfs.removed") == 0 ||
+	    (strcmp(class, "resource.fs.zfs.statechange") == 0 &&
+	    (state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
 		char *devtype;
 		char *devname;
 
+		if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
+		    &devtype) == 0) {
+			if (strcmp(devtype, VDEV_TYPE_SPARE) == 0)
+				spare = B_TRUE;
+			else if (strcmp(devtype, VDEV_TYPE_L2CACHE) == 0)
+				l2arc = B_TRUE;
+		}
+
+		if (nvlist_lookup_uint64(nvl,
+		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
+			return;
+
+		if (vdev_guid == 0) {
+			fmd_hdl_debug(hdl, "Got a zero GUID");
+			return;
+		}
+
+		if (spare) {
+			int nspares = find_and_remove_spares(zhdl, vdev_guid);
+			fmd_hdl_debug(hdl, "%d spares removed", nspares);
+			return;
+		}
+
 		if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
-		    &pool_guid) != 0 ||
-		    nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
-		    &vdev_guid) != 0)
+		    &pool_guid) != 0)
 			return;
 
 		if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
@@ -340,13 +436,30 @@
 
 		devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
 
-		/* Can't replace l2arc with a spare: offline the device */
-		if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
-		    &devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) {
-			fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname);
-			zpool_vdev_offline(zhp, devname, B_TRUE);
-		} else if (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
-		    replace_with_spare(hdl, zhp, vdev) == B_FALSE) {
+		nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
+		    (uint64_t **)&vs, &c);
+
+		/*
+		 * If state removed is requested for already removed vdev,
+		 * its a loopback event from spa_async_remove(). Just
+		 * ignore it.
+		 */
+		if (vs->vs_state == VDEV_STATE_REMOVED &&
+		    state == VDEV_STATE_REMOVED)
+			return;
+
+		/* Remove the vdev since device is unplugged */
+		int remove_status = 0;
+		if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
+			remove_status = zpool_vdev_remove_wanted(zhp, devname);
+			fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
+			    ", err:%d", devname, libzfs_errno(zhdl));
+		}
+
+		/* Replace the vdev with a spare if its not a l2arc */
+		if (!l2arc && !remove_status &&
+		    (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
+		    replace_with_spare(hdl, zhp, vdev) == B_FALSE)) {
 			/* Could not handle with spare */
 			fmd_hdl_debug(hdl, "no spare for '%s'", devname);
 		}
@@ -360,12 +473,11 @@
 		return;
 
 	/*
-	 * Note: on zfsonlinux statechange events are more than just
+	 * Note: on Linux statechange events are more than just
 	 * healthy ones so we need to confirm the actual state value.
 	 */
 	if (strcmp(class, "resource.fs.zfs.statechange") == 0 &&
-	    nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE,
-	    &state) == 0 && state == VDEV_STATE_HEALTHY) {
+	    state == VDEV_STATE_HEALTHY) {
 		zfs_vdev_repair(hdl, nvl);
 		return;
 	}
@@ -496,6 +608,7 @@
 		 * Attempt to substitute a hot spare.
 		 */
 		(void) replace_with_spare(hdl, zhp, vdev);
+
 		zpool_close(zhp);
 	}
 

diff --git a/zfs/cmd/zed/zed.c b/zfs/cmd/zed/zed.c
index bba8b8f..e45176c 100644
--- a/zfs/cmd/zed/zed.c
+++ b/zfs/cmd/zed/zed.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -60,8 +60,8 @@
 		zed_log_die("Failed to initialize sigset");
 
 	sa.sa_flags = SA_RESTART;
-	sa.sa_handler = SIG_IGN;
 
+	sa.sa_handler = SIG_IGN;
 	if (sigaction(SIGPIPE, &sa, NULL) < 0)
 		zed_log_die("Failed to ignore SIGPIPE");
 
@@ -75,6 +75,10 @@
 	sa.sa_handler = _hup_handler;
 	if (sigaction(SIGHUP, &sa, NULL) < 0)
 		zed_log_die("Failed to register SIGHUP handler");
+
+	(void) sigaddset(&sa.sa_mask, SIGCHLD);
+	if (pthread_sigmask(SIG_BLOCK, &sa.sa_mask, NULL) < 0)
+		zed_log_die("Failed to block SIGCHLD");
 }
 
 /*
@@ -212,22 +216,20 @@
 int
 main(int argc, char *argv[])
 {
-	struct zed_conf *zcp;
+	struct zed_conf zcp;
 	uint64_t saved_eid;
 	int64_t saved_etime[2];
 
 	zed_log_init(argv[0]);
 	zed_log_stderr_open(LOG_NOTICE);
-	zcp = zed_conf_create();
-	zed_conf_parse_opts(zcp, argc, argv);
-	if (zcp->do_verbose)
+	zed_conf_init(&zcp);
+	zed_conf_parse_opts(&zcp, argc, argv);
+	if (zcp.do_verbose)
 		zed_log_stderr_open(LOG_INFO);
 
 	if (geteuid() != 0)
 		zed_log_die("Must be run as root");
 
-	zed_conf_parse_file(zcp);
-
 	zed_file_close_from(STDERR_FILENO + 1);
 
 	(void) umask(0);
@@ -235,47 +237,72 @@
 	if (chdir("/") < 0)
 		zed_log_die("Failed to change to root directory");
 
-	if (zed_conf_scan_dir(zcp) < 0)
+	if (zed_conf_scan_dir(&zcp) < 0)
 		exit(EXIT_FAILURE);
 
-	if (!zcp->do_foreground) {
+	if (!zcp.do_foreground) {
 		_start_daemonize();
 		zed_log_syslog_open(LOG_DAEMON);
 	}
 	_setup_sig_handlers();
 
-	if (zcp->do_memlock)
+	if (zcp.do_memlock)
 		_lock_memory();
 
-	if ((zed_conf_write_pid(zcp) < 0) && (!zcp->do_force))
+	if ((zed_conf_write_pid(&zcp) < 0) && (!zcp.do_force))
 		exit(EXIT_FAILURE);
 
-	if (!zcp->do_foreground)
+	if (!zcp.do_foreground)
 		_finish_daemonize();
 
 	zed_log_msg(LOG_NOTICE,
 	    "ZFS Event Daemon %s-%s (PID %d)",
 	    ZFS_META_VERSION, ZFS_META_RELEASE, (int)getpid());
 
-	if (zed_conf_open_state(zcp) < 0)
+	if (zed_conf_open_state(&zcp) < 0)
 		exit(EXIT_FAILURE);
 
-	if (zed_conf_read_state(zcp, &saved_eid, saved_etime) < 0)
+	if (zed_conf_read_state(&zcp, &saved_eid, saved_etime) < 0)
 		exit(EXIT_FAILURE);
 
-	zed_event_init(zcp);
-	zed_event_seek(zcp, saved_eid, saved_etime);
+idle:
+	/*
+	 * If -I is specified, attempt to open /dev/zfs repeatedly until
+	 * successful.
+	 */
+	do {
+		if (!zed_event_init(&zcp))
+			break;
+		/* Wait for some time and try again. tunable? */
+		sleep(30);
+	} while (!_got_exit && zcp.do_idle);
+
+	if (_got_exit)
+		goto out;
+
+	zed_event_seek(&zcp, saved_eid, saved_etime);
 
 	while (!_got_exit) {
+		int rv;
 		if (_got_hup) {
 			_got_hup = 0;
-			(void) zed_conf_scan_dir(zcp);
+			(void) zed_conf_scan_dir(&zcp);
 		}
-		zed_event_service(zcp);
+		rv = zed_event_service(&zcp);
+
+		/* ENODEV: When kernel module is unloaded (osx) */
+		if (rv != 0)
+			break;
 	}
+
 	zed_log_msg(LOG_NOTICE, "Exiting");
-	zed_event_fini(zcp);
-	zed_conf_destroy(zcp);
+	zed_event_fini(&zcp);
+
+	if (zcp.do_idle && !_got_exit)
+		goto idle;
+
+out:
+	zed_conf_destroy(&zcp);
 	zed_log_fini();
 	exit(EXIT_SUCCESS);
 }

diff --git a/zfs/cmd/zed/zed.d/Makefile.am b/zfs/cmd/zed/zed.d/Makefile.am
index 716db2b..1905a92 100644
--- a/zfs/cmd/zed/zed.d/Makefile.am
+++ b/zfs/cmd/zed/zed.d/Makefile.am

@@ -1,8 +1,8 @@
 include $(top_srcdir)/config/Rules.am
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
 
-EXTRA_DIST = \
-	README \
-	history_event-zfs-list-cacher.sh.in
+EXTRA_DIST += README
 
 zedconfdir = $(sysconfdir)/zfs/zed.d
 
@@ -21,27 +21,26 @@
 	scrub_finish-notify.sh \
 	statechange-led.sh \
 	statechange-notify.sh \
+	statechange-slot_off.sh \
 	vdev_clear-led.sh \
 	vdev_attach-led.sh \
 	pool_import-led.sh \
-	resilver_finish-start-scrub.sh
+	resilver_finish-start-scrub.sh \
+	trim_finish-notify.sh
 
 nodist_zedexec_SCRIPTS = history_event-zfs-list-cacher.sh
 
-$(nodist_zedexec_SCRIPTS): %: %.in
-	-$(SED) -e 's,@bindir\@,$(bindir),g' \
-		-e 's,@runstatedir\@,$(runstatedir),g' \
-		-e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< >'$@'
+SUBSTFILES += $(nodist_zedexec_SCRIPTS)
 
 zedconfdefaults = \
 	all-syslog.sh \
 	data-notify.sh \
+	history_event-zfs-list-cacher.sh \
 	resilver_finish-notify.sh \
 	scrub_finish-notify.sh \
 	statechange-led.sh \
 	statechange-notify.sh \
+	statechange-slot_off.sh \
 	vdev_clear-led.sh \
 	vdev_attach-led.sh \
 	pool_import-led.sh \
@@ -55,3 +54,6 @@
 	    ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
 	done
 	chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc"
+
+# False positive: 1>&"${ZED_FLOCK_FD}" looks suspiciously similar to a >&filename bash extension
+CHECKBASHISMS_IGNORE = -e 'should be >word 2>&1' -e '&"$${ZED_FLOCK_FD}"'

diff --git a/zfs/cmd/zed/zed.d/all-debug.sh b/zfs/cmd/zed/zed.d/all-debug.sh
index 14b39ca..824c9fe 100755
--- a/zfs/cmd/zed/zed.d/all-debug.sh
+++ b/zfs/cmd/zed/zed.d/all-debug.sh

@@ -12,15 +12,11 @@
 
 zed_exit_if_ignoring_this_event
 
-lockfile="$(basename -- "${ZED_DEBUG_LOG}").lock"
+zed_lock "${ZED_DEBUG_LOG}"
+{
+	printenv | sort
+	echo
+} 1>&"${ZED_FLOCK_FD}"
+zed_unlock "${ZED_DEBUG_LOG}"
 
-umask 077
-zed_lock "${lockfile}"
-exec >> "${ZED_DEBUG_LOG}"
-
-printenv | sort
-echo
-
-exec >&-
-zed_unlock "${lockfile}"
 exit 0

diff --git a/zfs/cmd/zed/zed.d/all-syslog.sh b/zfs/cmd/zed/zed.d/all-syslog.sh
index cb92865..ea108c4 100755
--- a/zfs/cmd/zed/zed.d/all-syslog.sh
+++ b/zfs/cmd/zed/zed.d/all-syslog.sh

@@ -1,14 +1,51 @@
 #!/bin/sh
 #
+# Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+#
 # Log the zevent via syslog.
+#
 
 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 . "${ZED_ZEDLET_DIR}/zed-functions.sh"
 
 zed_exit_if_ignoring_this_event
 
-zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \
-    "${ZEVENT_POOL_GUID:+"pool_guid=${ZEVENT_POOL_GUID}"}" \
-    "${ZEVENT_VDEV_PATH:+"vdev_path=${ZEVENT_VDEV_PATH}"}" \
-    "${ZEVENT_VDEV_STATE_STR:+"vdev_state=${ZEVENT_VDEV_STATE_STR}"}"
+# build a string of name=value pairs for this event
+msg="eid=${ZEVENT_EID} class=${ZEVENT_SUBCLASS}"
+
+if [ "${ZED_SYSLOG_DISPLAY_GUIDS}" = "1" ]; then
+    [ -n "${ZEVENT_POOL_GUID}" ] && msg="${msg} pool_guid=${ZEVENT_POOL_GUID}"
+    [ -n "${ZEVENT_VDEV_GUID}" ] && msg="${msg} vdev_guid=${ZEVENT_VDEV_GUID}"
+else
+    [ -n "${ZEVENT_POOL}" ] && msg="${msg} pool='${ZEVENT_POOL}'"
+    [ -n "${ZEVENT_VDEV_PATH}" ] && msg="${msg} vdev=${ZEVENT_VDEV_PATH##*/}"
+fi
+
+# log pool state if state is anything other than 'ACTIVE'
+[ -n "${ZEVENT_POOL_STATE_STR}" ] && [ "$ZEVENT_POOL_STATE" -ne 0 ] && \
+    msg="${msg} pool_state=${ZEVENT_POOL_STATE_STR}"
+
+# Log the following payload nvpairs if they are present
+[ -n "${ZEVENT_VDEV_STATE_STR}" ]  && msg="${msg} vdev_state=${ZEVENT_VDEV_STATE_STR}"
+[ -n "${ZEVENT_CKSUM_ALGORITHM}" ] && msg="${msg} algorithm=${ZEVENT_CKSUM_ALGORITHM}"
+[ -n "${ZEVENT_ZIO_SIZE}" ]        && msg="${msg} size=${ZEVENT_ZIO_SIZE}"
+[ -n "${ZEVENT_ZIO_OFFSET}" ]      && msg="${msg} offset=${ZEVENT_ZIO_OFFSET}"
+[ -n "${ZEVENT_ZIO_PRIORITY}" ]    && msg="${msg} priority=${ZEVENT_ZIO_PRIORITY}"
+[ -n "${ZEVENT_ZIO_ERR}" ]         && msg="${msg} err=${ZEVENT_ZIO_ERR}"
+[ -n "${ZEVENT_ZIO_FLAGS}" ]       && msg="${msg} flags=$(printf '0x%x' "${ZEVENT_ZIO_FLAGS}")"
+
+# log delays that are >= 10 milisec
+[ -n "${ZEVENT_ZIO_DELAY}" ] && [ "$ZEVENT_ZIO_DELAY" -gt 10000000 ] && \
+    msg="${msg} delay=$((ZEVENT_ZIO_DELAY / 1000000))ms"
+
+# list the bookmark data together
+# shellcheck disable=SC2153
+[ -n "${ZEVENT_ZIO_OBJSET}" ] && \
+    msg="${msg} bookmark=${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}:${ZEVENT_ZIO_LEVEL}:${ZEVENT_ZIO_BLKID}"
+
+zed_log_msg "${msg}"
+
 exit 0

diff --git a/zfs/cmd/zed/zed.d/data-notify.sh b/zfs/cmd/zed/zed.d/data-notify.sh
index 639b459..792d30a 100755
--- a/zfs/cmd/zed/zed.d/data-notify.sh
+++ b/zfs/cmd/zed/zed.d/data-notify.sh

@@ -25,7 +25,7 @@
 
 umask 077
 note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
-note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+note_pathname="$(mktemp)"
 {
     echo "ZFS has detected a data error:"
     echo

diff --git a/zfs/cmd/zed/zed.d/generic-notify.sh b/zfs/cmd/zed/zed.d/generic-notify.sh
index e438031..9cf657e 100755
--- a/zfs/cmd/zed/zed.d/generic-notify.sh
+++ b/zfs/cmd/zed/zed.d/generic-notify.sh

@@ -23,7 +23,7 @@
 
 # Rate-limit the notification based in part on the filename.
 #
-rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")"
+rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};${0##*/}"
 rate_limit_interval="${ZED_NOTIFY_INTERVAL_SECS}"
 zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3
 
@@ -31,7 +31,7 @@
 pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}"
 host_str=" on $(hostname)"
 note_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}"
-note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+note_pathname="$(mktemp)"
 {
     echo "ZFS has posted the following event:"
     echo

diff --git a/zfs/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in b/zfs/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in
index 053b441..db40fa3 100755
--- a/zfs/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in
+++ b/zfs/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in

@@ -3,9 +3,8 @@
 # Track changes to enumerated pools for use in early-boot
 set -ef
 
-FSLIST_DIR="@sysconfdir@/zfs/zfs-list.cache"
-FSLIST_TMP="@runstatedir@/zfs-list.cache.new"
-FSLIST="${FSLIST_DIR}/${ZEVENT_POOL}"
+FSLIST="@sysconfdir@/zfs/zfs-list.cache/${ZEVENT_POOL}"
+FSLIST_TMP="@runstatedir@/zfs-list.cache@${ZEVENT_POOL}"
 
 # If the pool specific cache file is not writeable, abort
 [ -w "${FSLIST}" ] || exit 0
@@ -13,21 +12,21 @@
 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 . "${ZED_ZEDLET_DIR}/zed-functions.sh"
 
-zed_exit_if_ignoring_this_event
-zed_check_cmd "${ZFS}" sort diff grep
+[ "$ZEVENT_SUBCLASS" != "history_event" ] && exit 0
+zed_check_cmd "${ZFS}" sort diff
 
 # If we are acting on a snapshot, we have nothing to do
-printf '%s' "${ZEVENT_HISTORY_DSNAME}" | grep '@' && exit 0
+[ "${ZEVENT_HISTORY_DSNAME%@*}" = "${ZEVENT_HISTORY_DSNAME}" ] || exit 0
 
-# We obtain a lock on zfs-list to avoid any simultaneous writes.
+# We lock the output file to avoid simultaneous writes.
 # If we run into trouble, log and drop the lock
 abort_alter() {
-  zed_log_msg "Error updating zfs-list.cache!"
-  zed_unlock zfs-list
+  zed_log_msg "Error updating zfs-list.cache for ${ZEVENT_POOL}!"
+  zed_unlock "${FSLIST}"
 }
 
 finished() {
-  zed_unlock zfs-list
+  zed_unlock "${FSLIST}"
   trap - EXIT
   exit 0
 }
@@ -37,7 +36,7 @@
       ;;
 
     export)
-        zed_lock zfs-list
+        zed_lock "${FSLIST}"
         trap abort_alter EXIT
         echo > "${FSLIST}"
         finished
@@ -63,7 +62,7 @@
       ;;
 esac
 
-zed_lock zfs-list
+zed_lock "${FSLIST}"
 trap abort_alter EXIT
 
 PROPS="name,mountpoint,canmount,atime,relatime,devices,exec\
@@ -79,7 +78,7 @@
 sort "${FSLIST_TMP}" -o "${FSLIST_TMP}"
 
 # Don't modify the file if it hasn't changed
-diff -q "${FSLIST_TMP}" "${FSLIST}" || mv "${FSLIST_TMP}" "${FSLIST}"
+diff -q "${FSLIST_TMP}" "${FSLIST}" || cat "${FSLIST_TMP}" > "${FSLIST}"
 rm -f "${FSLIST_TMP}"
 
 finished

diff --git a/zfs/cmd/zed/zed.d/resilver_finish-start-scrub.sh b/zfs/cmd/zed/zed.d/resilver_finish-start-scrub.sh
index 6f9c0b3..c7cfd1d 100755
--- a/zfs/cmd/zed/zed.d/resilver_finish-start-scrub.sh
+++ b/zfs/cmd/zed/zed.d/resilver_finish-start-scrub.sh

@@ -5,10 +5,12 @@
 # Exit codes:
 # 1: Internal error
 # 2: Script wasn't enabled in zed.rc
+# 3: Scrubs are automatically started for sequential resilvers
 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 . "${ZED_ZEDLET_DIR}/zed-functions.sh"
 
 [ "${ZED_SCRUB_AFTER_RESILVER}" = "1" ] || exit 2
+[ "${ZEVENT_RESILVER_TYPE}" != "sequential" ] || exit 3
 [ -n "${ZEVENT_POOL}" ] || exit 1
 [ -n "${ZEVENT_SUBCLASS}" ] || exit 1
 zed_check_cmd "${ZPOOL}" || exit 1

diff --git a/zfs/cmd/zed/zed.d/scrub_finish-notify.sh b/zfs/cmd/zed/zed.d/scrub_finish-notify.sh
index 2145a10..5c0124b 100755
--- a/zfs/cmd/zed/zed.d/scrub_finish-notify.sh
+++ b/zfs/cmd/zed/zed.d/scrub_finish-notify.sh

@@ -41,7 +41,7 @@
 
 umask 077
 note_subject="ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on $(hostname)"
-note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+note_pathname="$(mktemp)"
 {
     echo "ZFS has finished a ${action}:"
     echo

diff --git a/zfs/cmd/zed/zed.d/statechange-led.sh b/zfs/cmd/zed/zed.d/statechange-led.sh
index e656e12..26e6064 100755
--- a/zfs/cmd/zed/zed.d/statechange-led.sh
+++ b/zfs/cmd/zed/zed.d/statechange-led.sh

@@ -1,21 +1,21 @@
 #!/bin/sh
 #
-# Turn off/on the VDEV's enclosure fault LEDs when the pool's state changes.
+# Turn off/on vdevs' enclosure fault LEDs when their pool's state changes.
 #
-# Turn the VDEV's fault LED on if it becomes FAULTED, DEGRADED or UNAVAIL.
-# Turn the LED off when it's back ONLINE again.
+# Turn a vdev's fault LED on if it becomes FAULTED, DEGRADED or UNAVAIL.
+# Turn its LED off when it's back ONLINE again.
 #
 # This script run in two basic modes:
 #
 # 1. If $ZEVENT_VDEV_ENC_SYSFS_PATH and $ZEVENT_VDEV_STATE_STR are set, then
-# only set the LED for that particular VDEV. This is the case for statechange
+# only set the LED for that particular vdev. This is the case for statechange
 # events and some vdev_* events.
 #
-# 2. If those vars are not set, then check the state of all VDEVs in the pool
+# 2. If those vars are not set, then check the state of all vdevs in the pool
 # and set the LEDs accordingly.  This is the case for pool_import events.
 #
 # Note that this script requires that your enclosure be supported by the
-# Linux SCSI enclosure services (ses) driver.  The script will do nothing
+# Linux SCSI Enclosure services (SES) driver.  The script will do nothing
 # if you have no enclosure, or if your enclosure isn't supported.
 #
 # Exit codes:
@@ -29,7 +29,8 @@
 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 . "${ZED_ZEDLET_DIR}/zed-functions.sh"
 
-if [ ! -d /sys/class/enclosure ] ; then
+if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
+	# No JBOD enclosure or NVMe slots
 	exit 1
 fi
 
@@ -59,6 +60,10 @@
 	file="$1"
 	val="$2"
 
+	if [ -z "$val" ]; then
+		return 0
+	fi
+
 	if [ ! -e "$file" ] ; then
 		return 3
 	fi
@@ -66,11 +71,11 @@
 	# If another process is accessing the LED when we attempt to update it,
 	# the update will be lost so retry until the LED actually changes or we
 	# timeout.
-	for _ in $(seq 1 5); do
+	for _ in 1 2 3 4 5; do
 		# We want to check the current state first, since writing to the
 		# 'fault' entry always causes a SES command, even if the
 		# current state is already what you want.
-		current=$(cat "${file}")
+		read -r current < "${file}"
 
 		# On some enclosures if you write 1 to fault, and read it back,
 		# it will return 2.  Treat all non-zero values as 1 for
@@ -85,27 +90,84 @@
 		else
 			break
 		fi
-        done
+	done
+}
+
+# Fault LEDs for JBODs and NVMe drives are handled a little differently.
+#
+# On JBODs the fault LED is called 'fault' and on a path like this:
+#
+#   /sys/class/enclosure/0:0:1:0/SLOT 10/fault
+#
+# On NVMe it's called 'attention' and on a path like this:
+#
+#   /sys/bus/pci/slot/0/attention
+#
+# This function returns the full path to the fault LED file for a given
+# enclosure/slot directory.
+#
+path_to_led()
+{
+	dir=$1
+	if [ -f "$dir/fault" ] ; then
+		echo "$dir/fault"
+	elif [ -f "$dir/attention" ] ; then
+		echo "$dir/attention"
+	fi
 }
 
 state_to_val()
 {
 	state="$1"
-	if [ "$state" = "FAULTED" ] || [ "$state" = "DEGRADED" ] || \
-	   [ "$state" = "UNAVAIL" ] ; then
-		echo 1
-	elif [ "$state" = "ONLINE" ] ; then
-		echo 0
-	fi
+	case "$state" in
+		FAULTED|DEGRADED|UNAVAIL)
+			echo 1
+			;;
+		ONLINE)
+			echo 0
+			;;
+	esac
 }
 
-# process_pool ([pool])
 #
-# Iterate through a pool (or pools) and set the VDEV's enclosure slot LEDs to
-# the VDEV's state.
+# Given a nvme name like 'nvme0n1', pass back its slot directory
+# like "/sys/bus/pci/slots/0"
+#
+nvme_dev_to_slot()
+{
+	dev="$1"
+
+	# Get the address "0000:01:00.0"
+	address=$(cat "/sys/class/block/$dev/device/address")
+
+	# For each /sys/bus/pci/slots subdir that is an actual number
+	# (rather than weird directories like "1-3/").
+	# shellcheck disable=SC2010
+	for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
+		this_address=$(cat "/sys/bus/pci/slots/$i/address")
+
+		# The format of address is a little different between
+		# /sys/class/block/$dev/device/address and
+		# /sys/bus/pci/slots/
+		#
+		# address=           "0000:01:00.0"
+		# this_address =     "0000:01:00"
+		#
+		if echo "$address" | grep -Eq ^"$this_address" ; then
+			echo "/sys/bus/pci/slots/$i"
+			break
+		fi
+	done
+}
+
+
+# process_pool (pool)
+#
+# Iterate through a pool and set the vdevs' enclosure slot LEDs to
+# those vdevs' state.
 #
 # Arguments
-#   pool:	Optional pool name.  If not specified, iterate though all pools.
+#   pool:	Pool name.
 #
 # Return
 #  0 on success, 3 on missing sysfs path
@@ -113,19 +175,27 @@
 process_pool()
 {
 	pool="$1"
+
+	# The output will be the vdevs only (from "grep '/dev/'"):
+	#
+	#    U45     ONLINE       0     0     0   /dev/sdk          0
+	#    U46     ONLINE       0     0     0   /dev/sdm          0
+	#    U47     ONLINE       0     0     0   /dev/sdn          0
+	#    U50     ONLINE       0     0     0  /dev/sdbn          0
+	#
+	ZPOOL_SCRIPTS_AS_ROOT=1 $ZPOOL status -c upath,fault_led "$pool" | grep '/dev/' | (
 	rc=0
-
-	# Lookup all the current LED values and paths in parallel
-	#shellcheck disable=SC2016
-	cmd='echo led_token=$(cat "$VDEV_ENC_SYSFS_PATH/fault"),"$VDEV_ENC_SYSFS_PATH",'
-	out=$($ZPOOL status -vc "$cmd" "$pool" | grep 'led_token=')
-
-	#shellcheck disable=SC2034
-	echo "$out" | while read -r vdev state read write chksum therest; do
+	while read -r vdev state _ _ _ therest; do
 		# Read out current LED value and path
-		tmp=$(echo "$therest" | sed 's/^.*led_token=//g')
-		vdev_enc_sysfs_path=$(echo "$tmp" | awk -F ',' '{print $2}')
-		current_val=$(echo "$tmp" | awk -F ',' '{print $1}')
+		# Get dev name (like 'sda')
+		dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
+		vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
+		if [ ! -d "$vdev_enc_sysfs_path" ] ; then
+			# This is not a JBOD disk, but it could be a PCI NVMe drive
+			vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
+		fi
+
+		current_val=$(echo "$therest" | awk '{print $NF}')
 
 		if [ "$current_val" != "0" ] ; then
 			current_val=1
@@ -136,40 +206,33 @@
 			continue
 		fi
 
-		if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
-			#shellcheck disable=SC2030
-			rc=1
-			zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
-			continue;
+		led_path=$(path_to_led "$vdev_enc_sysfs_path")
+		if [ ! -e "$led_path" ] ; then
+			rc=3
+			zed_log_msg "vdev $vdev '$led_path' doesn't exist"
+			continue
 		fi
 
 		val=$(state_to_val "$state")
 
 		if [ "$current_val" = "$val" ] ; then
 			# LED is already set correctly
-			continue;
+			continue
 		fi
 
-		if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
-			rc=1
+		if ! check_and_set_led "$led_path" "$val"; then
+			rc=3
 		fi
-
 	done
-
-	#shellcheck disable=SC2031
-	if [ "$rc" = "0" ] ; then
-		return 0
-	else
-		# We didn't see a sysfs entry that we wanted to set
-		return 3
-	fi
+	exit "$rc"; )
 }
 
 if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; then
-	# Got a statechange for an individual VDEV
+	# Got a statechange for an individual vdev
 	val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
 	vdev=$(basename "$ZEVENT_VDEV_PATH")
-	check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
+	ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
+	check_and_set_led "$ledpath" "$val"
 else
 	# Process the entire pool
 	poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")

diff --git a/zfs/cmd/zed/zed.d/statechange-notify.sh b/zfs/cmd/zed/zed.d/statechange-notify.sh
index f46080a..8d7a531 100755
--- a/zfs/cmd/zed/zed.d/statechange-notify.sh
+++ b/zfs/cmd/zed/zed.d/statechange-notify.sh

@@ -15,7 +15,7 @@
 # Send notification in response to a fault induced statechange
 #
 # ZEVENT_SUBCLASS: 'statechange'
-# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED' or 'REMOVED'
+# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED', 'REMOVED', or 'UNAVAIL'
 #
 # Exit codes:
 #   0: notification sent
@@ -31,13 +31,14 @@
 
 if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \
         && [ "${ZEVENT_VDEV_STATE_STR}" != "DEGRADED" ] \
-        && [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ]; then
+        && [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ] \
+        && [ "${ZEVENT_VDEV_STATE_STR}" != "UNAVAIL" ]; then
     exit 3
 fi
 
 umask 077
-note_subject="ZFS device fault for pool ${ZEVENT_POOL_GUID} on $(hostname)"
-note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+note_subject="ZFS device fault for pool ${ZEVENT_POOL} on $(hostname)"
+note_pathname="$(mktemp)"
 {
     if [ "${ZEVENT_VDEV_STATE_STR}" = "FAULTED" ] ; then
         echo "The number of I/O errors associated with a ZFS device exceeded"
@@ -64,7 +65,7 @@
     [ -n "${ZEVENT_VDEV_GUID}" ] && echo "  vguid: ${ZEVENT_VDEV_GUID}"
     [ -n "${ZEVENT_VDEV_DEVID}" ] && echo "  devid: ${ZEVENT_VDEV_DEVID}"
 
-    echo "   pool: ${ZEVENT_POOL_GUID}"
+    echo "   pool: ${ZEVENT_POOL} (${ZEVENT_POOL_GUID})"
 
 } > "${note_pathname}"
 

diff --git a/zfs/cmd/zed/zed.d/statechange-slot_off.sh b/zfs/cmd/zed/zed.d/statechange-slot_off.sh
new file mode 100755
index 0000000..150012a
--- /dev/null
+++ b/zfs/cmd/zed/zed.d/statechange-slot_off.sh

@@ -0,0 +1,64 @@
+#!/bin/sh
+# shellcheck disable=SC3014,SC2154,SC2086,SC2034
+#
+# Turn off disk's enclosure slot if it becomes FAULTED.
+#
+# Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos
+# as they flip between FAULTED and ONLINE.  If
+# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets
+# FAULTED, then power down the slot via sysfs:
+#
+# /sys/class/enclosure/<enclosure>/<slot>/power_status
+#
+# We assume the user will be responsible for turning the slot back on again.
+#
+# Note that this script requires that your enclosure be supported by the
+# Linux SCSI Enclosure services (SES) driver.  The script will do nothing
+# if you have no enclosure, or if your enclosure isn't supported.
+#
+# Exit codes:
+#   0: slot successfully powered off
+#   1: enclosure not available
+#   2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled
+#   3: vdev was not FAULTED
+#   4: The enclosure sysfs path passed from ZFS does not exist
+#   5: Enclosure slot didn't actually turn off after we told it to
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+if [ ! -d /sys/class/enclosure ] ; then
+	# No JBOD enclosure or NVMe slots
+	exit 1
+fi
+
+if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then
+	exit 2
+fi
+
+if [ "$ZEVENT_VDEV_STATE_STR" != "FAULTED" ] ; then
+	exit 3
+fi
+
+if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then
+	exit 4
+fi
+
+# Turn off the slot and wait for sysfs to report that the slot is off.
+# It can take ~400ms on some enclosures and multiple retries may be needed.
+for i in $(seq 1 20) ; do
+	echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"
+
+	for j in $(seq 1 5) ; do
+		if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then
+			break 2
+		fi
+		sleep 0.1
+	done
+done
+
+if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then
+	exit 5
+fi
+
+zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"

diff --git a/zfs/cmd/zed/zed.d/trim_finish-notify.sh b/zfs/cmd/zed/zed.d/trim_finish-notify.sh
new file mode 100755
index 0000000..8fdb645
--- /dev/null
+++ b/zfs/cmd/zed/zed.d/trim_finish-notify.sh

@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# Send notification in response to a TRIM_FINISH. The event
+# will be received for each vdev in the pool which was trimmed.
+#
+# Exit codes:
+#   0: notification sent
+#   1: notification failed
+#   2: notification not configured
+#   9: internal error
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+[ -n "${ZEVENT_POOL}" ] || exit 9
+[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
+
+zed_check_cmd "${ZPOOL}" || exit 9
+
+umask 077
+note_subject="ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on $(hostname)"
+note_pathname="$(mktemp)"
+{
+    echo "ZFS has finished a trim:"
+    echo
+    echo "   eid: ${ZEVENT_EID}"
+    echo " class: ${ZEVENT_SUBCLASS}"
+    echo "  host: $(hostname)"
+    echo "  time: ${ZEVENT_TIME_STRING}"
+
+    "${ZPOOL}" status -t "${ZEVENT_POOL}"
+
+} > "${note_pathname}"
+
+zed_notify "${note_subject}" "${note_pathname}"; rv=$?
+rm -f "${note_pathname}"
+exit "${rv}"

diff --git a/zfs/cmd/zed/zed.d/zed-functions.sh b/zfs/cmd/zed/zed.d/zed-functions.sh
index a6e6085..567f7ae 100644
--- a/zfs/cmd/zed/zed.d/zed-functions.sh
+++ b/zfs/cmd/zed/zed.d/zed-functions.sh

@@ -77,7 +77,7 @@
 zed_log_err()
 {
     logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "error:" \
-        "$(basename -- "$0"):""${ZEVENT_EID:+" eid=${ZEVENT_EID}:"}" "$@"
+        "${0##*/}:""${ZEVENT_EID:+" eid=${ZEVENT_EID}:"}" "$@"
 }
 
 
@@ -126,10 +126,8 @@
 
     # Obtain a lock on the file bound to the given file descriptor.
     #
-    eval "exec ${fd}> '${lockfile}'"
-    err="$(flock --exclusive "${fd}" 2>&1)"
-    # shellcheck disable=SC2181
-    if [ $? -ne 0 ]; then
+    eval "exec ${fd}>> '${lockfile}'"
+    if ! err="$(flock --exclusive "${fd}" 2>&1)"; then
         zed_log_err "failed to lock \"${lockfile}\": ${err}"
     fi
 
@@ -165,9 +163,7 @@
     fi
 
     # Release the lock and close the file descriptor.
-    err="$(flock --unlock "${fd}" 2>&1)"
-    # shellcheck disable=SC2181
-    if [ $? -ne 0 ]; then
+    if ! err="$(flock --unlock "${fd}" 2>&1)"; then
         zed_log_err "failed to unlock \"${lockfile}\": ${err}"
     fi
     eval "exec ${fd}>&-"
@@ -202,6 +198,14 @@
     [ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
     [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))
 
+    zed_notify_slack_webhook "${subject}" "${pathname}"; rv=$?
+    [ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
+    [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))
+
+    zed_notify_pushover "${subject}" "${pathname}"; rv=$?
+    [ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
+    [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))
+
     [ "${num_success}" -gt 0 ] && return 0
     [ "${num_failure}" -gt 0 ] && return 1
     return 2
@@ -220,6 +224,8 @@
 # ZED_EMAIL_OPTS.  This undergoes the following keyword substitutions:
 # - @ADDRESS@ is replaced with the space-delimited recipient email address(es)
 # - @SUBJECT@ is replaced with the notification subject
+#   If @SUBJECT@ was omited here, a "Subject: ..." header will be added to notification
+#
 #
 # Arguments
 #   subject: notification subject
@@ -237,7 +243,7 @@
 #
 zed_notify_email()
 {
-    local subject="$1"
+    local subject="${1:-"ZED notification"}"
     local pathname="${2:-"/dev/null"}"
 
     : "${ZED_EMAIL_PROG:="mail"}"
@@ -254,19 +260,30 @@
     [ -n "${subject}" ] || return 1
     if [ ! -r "${pathname}" ]; then
         zed_log_err \
-                "$(basename "${ZED_EMAIL_PROG}") cannot read \"${pathname}\""
+                "${ZED_EMAIL_PROG##*/} cannot read \"${pathname}\""
         return 1
     fi
 
-    ZED_EMAIL_OPTS="$(echo "${ZED_EMAIL_OPTS}" \
+    # construct cmdline options
+    ZED_EMAIL_OPTS_PARSED="$(echo "${ZED_EMAIL_OPTS}" \
         | sed   -e "s/@ADDRESS@/${ZED_EMAIL_ADDR}/g" \
                 -e "s/@SUBJECT@/${subject}/g")"
 
-    # shellcheck disable=SC2086
-    eval "${ZED_EMAIL_PROG}" ${ZED_EMAIL_OPTS} < "${pathname}" >/dev/null 2>&1
+    # pipe message to email prog
+    # shellcheck disable=SC2086,SC2248
+    {
+        # no subject passed as option?
+        if [ "${ZED_EMAIL_OPTS%@SUBJECT@*}" = "${ZED_EMAIL_OPTS}" ] ; then
+            # inject subject header
+            printf "Subject: %s\n" "${subject}"
+        fi
+        # output message
+        cat "${pathname}"
+    } |
+    eval ${ZED_EMAIL_PROG} ${ZED_EMAIL_OPTS_PARSED} >/dev/null 2>&1
     rv=$?
     if [ "${rv}" -ne 0 ]; then
-        zed_log_err "$(basename "${ZED_EMAIL_PROG}") exit=${rv}"
+        zed_log_err "${ZED_EMAIL_PROG##*/} exit=${rv}"
         return 1
     fi
     return 0
@@ -359,6 +376,158 @@
 }
 
 
+# zed_notify_slack_webhook (subject, pathname)
+#
+# Notification via Slack Webhook <https://api.slack.com/incoming-webhooks>.
+# The Webhook URL (ZED_SLACK_WEBHOOK_URL) identifies this client to the
+# Slack channel.
+#
+# Requires awk, curl, and sed executables to be installed in the standard PATH.
+#
+# References
+#   https://api.slack.com/incoming-webhooks
+#
+# Arguments
+#   subject: notification subject
+#   pathname: pathname containing the notification message (OPTIONAL)
+#
+# Globals
+#   ZED_SLACK_WEBHOOK_URL
+#
+# Return
+#   0: notification sent
+#   1: notification failed
+#   2: not configured
+#
+zed_notify_slack_webhook()
+{
+    [ -n "${ZED_SLACK_WEBHOOK_URL}" ] || return 2
+
+    local subject="$1"
+    local pathname="${2:-"/dev/null"}"
+    local msg_body
+    local msg_tag
+    local msg_json
+    local msg_out
+    local msg_err
+    local url="${ZED_SLACK_WEBHOOK_URL}"
+
+    [ -n "${subject}" ] || return 1
+    if [ ! -r "${pathname}" ]; then
+        zed_log_err "slack webhook cannot read \"${pathname}\""
+        return 1
+    fi
+
+    zed_check_cmd "awk" "curl" "sed" || return 1
+
+    # Escape the following characters in the message body for JSON:
+    # newline, backslash, double quote, horizontal tab, vertical tab,
+    # and carriage return.
+    #
+    msg_body="$(awk '{ ORS="\\n" } { gsub(/\\/, "\\\\"); gsub(/"/, "\\\"");
+        gsub(/\t/, "\\t"); gsub(/\f/, "\\f"); gsub(/\r/, "\\r"); print }' \
+        "${pathname}")"
+
+    # Construct the JSON message for posting.
+    #
+    msg_json="$(printf '{"text": "*%s*\\n%s"}' "${subject}" "${msg_body}" )"
+
+    # Send the POST request and check for errors.
+    #
+    msg_out="$(curl -X POST "${url}" \
+        --header "Content-Type: application/json" --data-binary "${msg_json}" \
+        2>/dev/null)"; rv=$?
+    if [ "${rv}" -ne 0 ]; then
+        zed_log_err "curl exit=${rv}"
+        return 1
+    fi
+    msg_err="$(echo "${msg_out}" \
+        | sed -n -e 's/.*"error" *:.*"message" *: *"\([^"]*\)".*/\1/p')"
+    if [ -n "${msg_err}" ]; then
+        zed_log_err "slack webhook \"${msg_err}"\"
+        return 1
+    fi
+    return 0
+}
+
+# zed_notify_pushover (subject, pathname)
+#
+# Send a notification via Pushover <https://pushover.net/>.
+# The access token (ZED_PUSHOVER_TOKEN) identifies this client to the
+# Pushover server. The user token (ZED_PUSHOVER_USER) defines the user or
+# group to which the notification will be sent.
+#
+# Requires curl and sed executables to be installed in the standard PATH.
+#
+# References
+#   https://pushover.net/api
+#
+# Arguments
+#   subject: notification subject
+#   pathname: pathname containing the notification message (OPTIONAL)
+#
+# Globals
+#   ZED_PUSHOVER_TOKEN
+#   ZED_PUSHOVER_USER
+#
+# Return
+#   0: notification sent
+#   1: notification failed
+#   2: not configured
+#
+zed_notify_pushover()
+{
+    local subject="$1"
+    local pathname="${2:-"/dev/null"}"
+    local msg_body
+    local msg_out
+    local msg_err
+    local url="https://api.pushover.net/1/messages.json"
+
+    [ -n "${ZED_PUSHOVER_TOKEN}" ] && [ -n "${ZED_PUSHOVER_USER}" ] || return 2
+
+    if [ ! -r "${pathname}" ]; then
+        zed_log_err "pushover cannot read \"${pathname}\""
+        return 1
+    fi
+
+    zed_check_cmd "curl" "sed" || return 1
+
+    # Read the message body in.
+    #
+    msg_body="$(cat "${pathname}")"
+
+    if [ -z "${msg_body}" ]
+    then
+        msg_body=$subject
+        subject=""
+    fi
+
+    # Send the POST request and check for errors.
+    #
+    msg_out="$( \
+        curl \
+        --form-string "token=${ZED_PUSHOVER_TOKEN}" \
+        --form-string "user=${ZED_PUSHOVER_USER}" \
+        --form-string "message=${msg_body}" \
+        --form-string "title=${subject}" \
+        "${url}" \
+        2>/dev/null \
+        )"; rv=$?
+    if [ "${rv}" -ne 0 ]; then
+        zed_log_err "curl exit=${rv}"
+        return 1
+    fi
+    msg_err="$(echo "${msg_out}" \
+        | sed -n -e 's/.*"errors" *:.*\[\(.*\)\].*/\1/p')"
+    if [ -n "${msg_err}" ]; then
+        zed_log_err "pushover \"${msg_err}"\"
+        return 1
+    fi
+    return 0
+}
+
+
 # zed_rate_limit (tag, [interval])
 #
 # Check whether an event of a given type [tag] has already occurred within the
@@ -433,10 +602,8 @@
 		return
 	fi
 
-	guid=$(printf "%llu" "$1")
-	if [ -n "$guid" ] ; then
-		$ZPOOL get -H -ovalue,name guid | awk '$1=='"$guid"' {print $2}'
-	fi
+	guid="$(printf "%u" "$1")"
+	$ZPOOL get -H -ovalue,name guid | awk '$1 == '"$guid"' {print $2; exit}'
 }
 
 # zed_exit_if_ignoring_this_event

diff --git a/zfs/cmd/zed/zed.d/zed.rc b/zfs/cmd/zed/zed.d/zed.rc
index 0ef7068..1dfd434 100644
--- a/zfs/cmd/zed/zed.d/zed.rc
+++ b/zfs/cmd/zed/zed.d/zed.rc

@@ -13,9 +13,9 @@
 # Email address of the zpool administrator for receipt of notifications;
 #   multiple addresses can be specified if they are delimited by whitespace.
 # Email will only be sent if ZED_EMAIL_ADDR is defined.
-# Disabled by default; uncomment to enable.
+# Enabled by default; comment to disable.
 #
-#ZED_EMAIL_ADDR="root"
+ZED_EMAIL_ADDR="root"
 
 ##
 # Name or path of executable responsible for sending notifications via email;
@@ -30,6 +30,7 @@
 # The string @SUBJECT@ will be replaced with the notification subject;
 #   this should be protected with quotes to prevent word-splitting.
 # Email will only be sent if ZED_EMAIL_ADDR is defined.
+# If @SUBJECT@ was omited here, a "Subject: ..." header will be added to notification
 #
 #ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@"
 
@@ -75,14 +76,39 @@
 #ZED_PUSHBULLET_CHANNEL_TAG=""
 
 ##
+# Slack Webhook URL.
+# This allows posting to the given channel and includes an access token.
+#   <https://api.slack.com/incoming-webhooks>
+# Disabled by default; uncomment to enable.
+#
+#ZED_SLACK_WEBHOOK_URL=""
+
+##
+# Pushover token.
+# This defines the application from which the notification will be sent.
+#   <https://pushover.net/api#registration>
+# Disabled by default; uncomment to enable.
+# ZED_PUSHOVER_USER, below, must also be configured.
+#
+#ZED_PUSHOVER_TOKEN=""
+
+##
+# Pushover user key.
+# This defines which user or group will receive Pushover notifications.
+#  <https://pushover.net/api#identifiers>
+# Disabled by default; uncomment to enable.
+# ZED_PUSHOVER_TOKEN, above, must also be configured.
+#ZED_PUSHOVER_USER=""
+
+##
 # Default directory for zed state files.
 #
 #ZED_RUNDIR="/var/run"
 
 ##
 # Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED.  This works for
-# device mapper and multipath devices as well.  Your enclosure must be
-# supported by the Linux SES driver for this to work.
+# device mapper and multipath devices as well.  This works with JBOD enclosures
+# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
 #
 ZED_USE_ENCLOSURE_LEDS=1
 
@@ -110,5 +136,15 @@
 # Otherwise, if ZED_SYSLOG_SUBCLASS_EXCLUDE is set, the
 # matching subclasses are excluded from logging.
 #ZED_SYSLOG_SUBCLASS_INCLUDE="checksum|scrub_*|vdev.*"
-#ZED_SYSLOG_SUBCLASS_EXCLUDE="statechange|config_*|history_event"
+ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"
 
+##
+# Use GUIDs instead of names when logging pool and vdevs
+# Disabled by default, 1 to enable and 0 to disable.
+#ZED_SYSLOG_DISPLAY_GUIDS=1
+
+##
+# Power off the drive's slot in the enclosure if it becomes FAULTED.  This can
+# help silence misbehaving drives.  This assumes your drive enclosure fully
+# supports slot power control via sysfs.
+#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1

diff --git a/zfs/cmd/zed/zed.h b/zfs/cmd/zed/zed.h
index 3ac0e63..94f13c2 100644
--- a/zfs/cmd/zed/zed.h
+++ b/zfs/cmd/zed/zed.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -16,11 +16,6 @@
 #define	ZED_H
 
 /*
- * Absolute path for the default zed configuration file.
- */
-#define	ZED_CONF_FILE		SYSCONFDIR "/zfs/zed.conf"
-
-/*
  * Absolute path for the default zed pid file.
  */
 #define	ZED_PID_FILE		RUNSTATEDIR "/zed.pid"
@@ -36,16 +31,6 @@
 #define	ZED_ZEDLET_DIR		SYSCONFDIR "/zfs/zed.d"
 
 /*
- * Reserved for future use.
- */
-#define	ZED_MAX_EVENTS		0
-
-/*
- * Reserved for future use.
- */
-#define	ZED_MIN_EVENTS		0
-
-/*
  * String prefix for ZED variables passed via environment variables.
  */
 #define	ZED_VAR_PREFIX		"ZED_"

diff --git a/zfs/cmd/zed/zed_conf.c b/zfs/cmd/zed/zed_conf.c
index 8667136..5993510 100644
--- a/zfs/cmd/zed/zed_conf.c
+++ b/zfs/cmd/zed/zed_conf.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -22,6 +22,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <unistd.h>
@@ -32,43 +33,26 @@
 #include "zed_strings.h"
 
 /*
- * Return a new configuration with default values.
+ * Initialise the configuration with default values.
  */
-struct zed_conf *
-zed_conf_create(void)
+void
+zed_conf_init(struct zed_conf *zcp)
 {
-	struct zed_conf *zcp;
+	memset(zcp, 0, sizeof (*zcp));
 
-	zcp = calloc(1, sizeof (*zcp));
-	if (!zcp)
-		goto nomem;
+	/* zcp->zfs_hdl opened in zed_event_init() */
+	/* zcp->zedlets created in zed_conf_scan_dir() */
 
-	zcp->syslog_facility = LOG_DAEMON;
-	zcp->min_events = ZED_MIN_EVENTS;
-	zcp->max_events = ZED_MAX_EVENTS;
-	zcp->pid_fd = -1;
-	zcp->zedlets = NULL;		/* created via zed_conf_scan_dir() */
-	zcp->state_fd = -1;		/* opened via zed_conf_open_state() */
-	zcp->zfs_hdl = NULL;		/* opened via zed_event_init() */
-	zcp->zevent_fd = -1;		/* opened via zed_event_init() */
+	zcp->pid_fd = -1;		/* opened in zed_conf_write_pid() */
+	zcp->state_fd = -1;		/* opened in zed_conf_open_state() */
+	zcp->zevent_fd = -1;		/* opened in zed_event_init() */
 
-	if (!(zcp->conf_file = strdup(ZED_CONF_FILE)))
-		goto nomem;
+	zcp->max_jobs = 16;
 
-	if (!(zcp->pid_file = strdup(ZED_PID_FILE)))
-		goto nomem;
-
-	if (!(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)))
-		goto nomem;
-
-	if (!(zcp->state_file = strdup(ZED_STATE_FILE)))
-		goto nomem;
-
-	return (zcp);
-
-nomem:
-	zed_log_die("Failed to create conf: %s", strerror(errno));
-	return (NULL);
+	if (!(zcp->pid_file = strdup(ZED_PID_FILE)) ||
+	    !(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) ||
+	    !(zcp->state_file = strdup(ZED_STATE_FILE)))
+		zed_log_die("Failed to create conf: %s", strerror(errno));
 }
 
 /*
@@ -79,9 +63,6 @@
 void
 zed_conf_destroy(struct zed_conf *zcp)
 {
-	if (!zcp)
-		return;
-
 	if (zcp->state_fd >= 0) {
 		if (close(zcp->state_fd) < 0)
 			zed_log_msg(LOG_WARNING,
@@ -102,10 +83,6 @@
 			    zcp->pid_file, strerror(errno));
 		zcp->pid_fd = -1;
 	}
-	if (zcp->conf_file) {
-		free(zcp->conf_file);
-		zcp->conf_file = NULL;
-	}
 	if (zcp->pid_file) {
 		free(zcp->pid_file);
 		zcp->pid_file = NULL;
@@ -122,7 +99,6 @@
 		zed_strings_destroy(zcp->zedlets);
 		zcp->zedlets = NULL;
 	}
-	free(zcp);
 }
 
 /*
@@ -132,44 +108,52 @@
  * otherwise, output to stderr and exit with a failure status.
  */
 static void
-_zed_conf_display_help(const char *prog, int got_err)
+_zed_conf_display_help(const char *prog, boolean_t got_err)
 {
+	struct opt { const char *o, *d, *v; };
+
 	FILE *fp = got_err ? stderr : stdout;
-	int w1 = 4;			/* width of leading whitespace */
-	int w2 = 8;			/* width of L-justified option field */
+
+	struct opt *oo;
+	struct opt iopts[] = {
+		{ .o = "-h", .d = "Display help" },
+		{ .o = "-L", .d = "Display license information" },
+		{ .o = "-V", .d = "Display version information" },
+		{},
+	};
+	struct opt nopts[] = {
+		{ .o = "-v", .d = "Be verbose" },
+		{ .o = "-f", .d = "Force daemon to run" },
+		{ .o = "-F", .d = "Run daemon in the foreground" },
+		{ .o = "-I",
+		    .d = "Idle daemon until kernel module is (re)loaded" },
+		{ .o = "-M", .d = "Lock all pages in memory" },
+		{ .o = "-P", .d = "$PATH for ZED to use (only used by ZTS)" },
+		{ .o = "-Z", .d = "Zero state file" },
+		{},
+	};
+	struct opt vopts[] = {
+		{ .o = "-d DIR", .d = "Read enabled ZEDLETs from DIR.",
+		    .v = ZED_ZEDLET_DIR },
+		{ .o = "-p FILE", .d = "Write daemon's PID to FILE.",
+		    .v = ZED_PID_FILE },
+		{ .o = "-s FILE", .d = "Write daemon's state to FILE.",
+		    .v = ZED_STATE_FILE },
+		{ .o = "-j JOBS", .d = "Start at most JOBS at once.",
+		    .v = "16" },
+		{},
+	};
 
 	fprintf(fp, "Usage: %s [OPTION]...\n", (prog ? prog : "zed"));
 	fprintf(fp, "\n");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-h",
-	    "Display help.");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-L",
-	    "Display license information.");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-V",
-	    "Display version information.");
+	for (oo = iopts; oo->o; ++oo)
+		fprintf(fp, "    %*s %s\n", -8, oo->o, oo->d);
 	fprintf(fp, "\n");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-v",
-	    "Be verbose.");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-f",
-	    "Force daemon to run.");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-F",
-	    "Run daemon in the foreground.");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-M",
-	    "Lock all pages in memory.");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-P",
-	    "$PATH for ZED to use (only used by ZTS).");
-	fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-Z",
-	    "Zero state file.");
+	for (oo = nopts; oo->o; ++oo)
+		fprintf(fp, "    %*s %s\n", -8, oo->o, oo->d);
 	fprintf(fp, "\n");
-#if 0
-	fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-c FILE",
-	    "Read configuration from FILE.", ZED_CONF_FILE);
-#endif
-	fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-d DIR",
-	    "Read enabled ZEDLETs from DIR.", ZED_ZEDLET_DIR);
-	fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-p FILE",
-	    "Write daemon's PID to FILE.", ZED_PID_FILE);
-	fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-s FILE",
-	    "Write daemon's state to FILE.", ZED_STATE_FILE);
+	for (oo = vopts; oo->o; ++oo)
+		fprintf(fp, "    %*s %s [%s]\n", -8, oo->o, oo->d, oo->v);
 	fprintf(fp, "\n");
 
 	exit(got_err ? EXIT_FAILURE : EXIT_SUCCESS);
@@ -181,20 +165,14 @@
 static void
 _zed_conf_display_license(void)
 {
-	const char **pp;
-	const char *text[] = {
-	    "The ZFS Event Daemon (ZED) is distributed under the terms of the",
-	    "  Common Development and Distribution License (CDDL-1.0)",
-	    "  <http://opensource.org/licenses/CDDL-1.0>.",
-	    "",
+	printf(
+	    "The ZFS Event Daemon (ZED) is distributed under the terms of the\n"
+	    "  Common Development and Distribution License (CDDL-1.0)\n"
+	    "  <http://opensource.org/licenses/CDDL-1.0>.\n"
+	    "\n"
 	    "Developed at Lawrence Livermore National Laboratory"
-	    " (LLNL-CODE-403049).",
-	    "",
-	    NULL
-	};
-
-	for (pp = text; *pp; pp++)
-		printf("%s\n", *pp);
+	    " (LLNL-CODE-403049).\n"
+	    "\n");
 
 	exit(EXIT_SUCCESS);
 }
@@ -229,16 +207,19 @@
 
 	if (path[0] == '/') {
 		*resultp = strdup(path);
-	} else if (!getcwd(buf, sizeof (buf))) {
-		zed_log_die("Failed to get current working dir: %s",
-		    strerror(errno));
-	} else if (strlcat(buf, "/", sizeof (buf)) >= sizeof (buf)) {
-		zed_log_die("Failed to copy path: %s", strerror(ENAMETOOLONG));
-	} else if (strlcat(buf, path, sizeof (buf)) >= sizeof (buf)) {
-		zed_log_die("Failed to copy path: %s", strerror(ENAMETOOLONG));
 	} else {
+		if (!getcwd(buf, sizeof (buf)))
+			zed_log_die("Failed to get current working dir: %s",
+			    strerror(errno));
+
+		if (strlcat(buf, "/", sizeof (buf)) >= sizeof (buf) ||
+		    strlcat(buf, path, sizeof (buf)) >= sizeof (buf))
+			zed_log_die("Failed to copy path: %s",
+			    strerror(ENAMETOOLONG));
+
 		*resultp = strdup(buf);
 	}
+
 	if (!*resultp)
 		zed_log_die("Failed to copy path: %s", strerror(ENOMEM));
 }
@@ -249,8 +230,9 @@
 void
 zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
 {
-	const char * const opts = ":hLVc:d:p:P:s:vfFMZ";
+	const char * const opts = ":hLVd:p:P:s:vfFMZIj:";
 	int opt;
+	unsigned long raw;
 
 	if (!zcp || !argv || !argv[0])
 		zed_log_die("Failed to parse options: Internal error");
@@ -260,7 +242,7 @@
 	while ((opt = getopt(argc, argv, opts)) != -1) {
 		switch (opt) {
 		case 'h':
-			_zed_conf_display_help(argv[0], EXIT_SUCCESS);
+			_zed_conf_display_help(argv[0], B_FALSE);
 			break;
 		case 'L':
 			_zed_conf_display_license();
@@ -268,12 +250,12 @@
 		case 'V':
 			_zed_conf_display_version();
 			break;
-		case 'c':
-			_zed_conf_parse_path(&zcp->conf_file, optarg);
-			break;
 		case 'd':
 			_zed_conf_parse_path(&zcp->zedlet_dir, optarg);
 			break;
+		case 'I':
+			zcp->do_idle = 1;
+			break;
 		case 'p':
 			_zed_conf_parse_path(&zcp->pid_file, optarg);
 			break;
@@ -298,40 +280,37 @@
 		case 'Z':
 			zcp->do_zero = 1;
 			break;
+		case 'j':
+			errno = 0;
+			raw = strtoul(optarg, NULL, 0);
+			if (errno == ERANGE || raw > INT16_MAX) {
+				zed_log_die("%lu is too many jobs", raw);
+			} if (raw == 0) {
+				zed_log_die("0 jobs makes no sense");
+			} else {
+				zcp->max_jobs = raw;
+			}
+			break;
 		case '?':
 		default:
 			if (optopt == '?')
-				_zed_conf_display_help(argv[0], EXIT_SUCCESS);
+				_zed_conf_display_help(argv[0], B_FALSE);
 
-			fprintf(stderr, "%s: %s '-%c'\n\n", argv[0],
-			    "Invalid option", optopt);
-			_zed_conf_display_help(argv[0], EXIT_FAILURE);
+			fprintf(stderr, "%s: Invalid option '-%c'\n\n",
+			    argv[0], optopt);
+			_zed_conf_display_help(argv[0], B_TRUE);
 			break;
 		}
 	}
 }
 
 /*
- * Parse the configuration file into the configuration [zcp].
- *
- * FIXME: Not yet implemented.
- */
-void
-zed_conf_parse_file(struct zed_conf *zcp)
-{
-	if (!zcp)
-		zed_log_die("Failed to parse config: %s", strerror(EINVAL));
-}
-
-/*
  * Scan the [zcp] zedlet_dir for files to exec based on the event class.
  * Files must be executable by user, but not writable by group or other.
  * Dotfiles are ignored.
  *
  * Return 0 on success with an updated set of zedlets,
  * or -1 on error with errno set.
- *
- * FIXME: Check if zedlet_dir and all parent dirs are secure.
  */
 int
 zed_conf_scan_dir(struct zed_conf *zcp)
@@ -447,8 +426,6 @@
 int
 zed_conf_write_pid(struct zed_conf *zcp)
 {
-	const mode_t dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
-	const mode_t filemode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 	char buf[PATH_MAX];
 	int n;
 	char *p;
@@ -476,7 +453,7 @@
 	if (p)
 		*p = '\0';
 
-	if ((mkdirp(buf, dirmode) < 0) && (errno != EEXIST)) {
+	if ((mkdirp(buf, 0755) < 0) && (errno != EEXIST)) {
 		zed_log_msg(LOG_ERR, "Failed to create directory \"%s\": %s",
 		    buf, strerror(errno));
 		goto err;
@@ -486,7 +463,7 @@
 	 */
 	mask = umask(0);
 	umask(mask | 022);
-	zcp->pid_fd = open(zcp->pid_file, (O_RDWR | O_CREAT), filemode);
+	zcp->pid_fd = open(zcp->pid_file, O_RDWR | O_CREAT | O_CLOEXEC, 0644);
 	umask(mask);
 	if (zcp->pid_fd < 0) {
 		zed_log_msg(LOG_ERR, "Failed to open PID file \"%s\": %s",
@@ -523,7 +500,7 @@
 		errno = ERANGE;
 		zed_log_msg(LOG_ERR, "Failed to write PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
-	} else if (zed_file_write_n(zcp->pid_fd, buf, n) != n) {
+	} else if (write(zcp->pid_fd, buf, n) != n) {
 		zed_log_msg(LOG_ERR, "Failed to write PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
 	} else if (fdatasync(zcp->pid_fd) < 0) {
@@ -551,7 +528,6 @@
 zed_conf_open_state(struct zed_conf *zcp)
 {
 	char dirbuf[PATH_MAX];
-	mode_t dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
 	int n;
 	char *p;
 	int rv;
@@ -573,7 +549,7 @@
 	if (p)
 		*p = '\0';
 
-	if ((mkdirp(dirbuf, dirmode) < 0) && (errno != EEXIST)) {
+	if ((mkdirp(dirbuf, 0755) < 0) && (errno != EEXIST)) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to create directory \"%s\": %s",
 		    dirbuf, strerror(errno));
@@ -591,7 +567,7 @@
 		(void) unlink(zcp->state_file);
 
 	zcp->state_fd = open(zcp->state_file,
-	    (O_RDWR | O_CREAT), (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH));
+	    O_RDWR | O_CREAT | O_CLOEXEC, 0644);
 	if (zcp->state_fd < 0) {
 		zed_log_msg(LOG_WARNING, "Failed to open state file \"%s\": %s",
 		    zcp->state_file, strerror(errno));

diff --git a/zfs/cmd/zed/zed_conf.h b/zfs/cmd/zed/zed_conf.h
index 7d6b63b..0b30a15 100644
--- a/zfs/cmd/zed/zed_conf.h
+++ b/zfs/cmd/zed/zed_conf.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -20,42 +20,39 @@
 #include "zed_strings.h"
 
 struct zed_conf {
-	unsigned	do_force:1;		/* true if force enabled */
-	unsigned	do_foreground:1;	/* true if run in foreground */
-	unsigned	do_memlock:1;		/* true if locking memory */
-	unsigned	do_verbose:1;		/* true if verbosity enabled */
-	unsigned	do_zero:1;		/* true if zeroing state */
-	int		syslog_facility;	/* syslog facility value */
-	int		min_events;		/* RESERVED FOR FUTURE USE */
-	int		max_events;		/* RESERVED FOR FUTURE USE */
-	char		*conf_file;		/* abs path to config file */
 	char		*pid_file;		/* abs path to pid file */
-	int		pid_fd;			/* fd to pid file for lock */
 	char		*zedlet_dir;		/* abs path to zedlet dir */
-	zed_strings_t	*zedlets;		/* names of enabled zedlets */
 	char		*state_file;		/* abs path to state file */
-	int		state_fd;		/* fd to state file */
+
 	libzfs_handle_t	*zfs_hdl;		/* handle to libzfs */
-	int		zevent_fd;		/* fd for access to zevents */
+	zed_strings_t	*zedlets;		/* names of enabled zedlets */
 	char		*path;		/* custom $PATH for zedlets to use */
+
+	int		pid_fd;			/* fd to pid file for lock */
+	int		state_fd;		/* fd to state file */
+	int		zevent_fd;		/* fd for access to zevents */
+
+	int16_t max_jobs;		/* max zedlets to run at one time */
+
+	boolean_t	do_force:1;		/* true if force enabled */
+	boolean_t	do_foreground:1;	/* true if run in foreground */
+	boolean_t	do_memlock:1;		/* true if locking memory */
+	boolean_t	do_verbose:1;		/* true if verbosity enabled */
+	boolean_t	do_zero:1;		/* true if zeroing state */
+	boolean_t	do_idle:1;		/* true if idle enabled */
 };
 
-struct zed_conf *zed_conf_create(void);
-
+void zed_conf_init(struct zed_conf *zcp);
 void zed_conf_destroy(struct zed_conf *zcp);
 
 void zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv);
 
-void zed_conf_parse_file(struct zed_conf *zcp);
-
 int zed_conf_scan_dir(struct zed_conf *zcp);
 
 int zed_conf_write_pid(struct zed_conf *zcp);
 
 int zed_conf_open_state(struct zed_conf *zcp);
-
 int zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[]);
-
 int zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[]);
 
 #endif	/* !ZED_CONF_H */

diff --git a/zfs/cmd/zed/zed_disk_event.c b/zfs/cmd/zed/zed_disk_event.c
index 174d245..3c8e2fb 100644
--- a/zfs/cmd/zed/zed_disk_event.c
+++ b/zfs/cmd/zed/zed_disk_event.c

@@ -72,10 +72,14 @@
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
 	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
+	if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
+		zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
 	if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
+	if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
+		zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
@@ -128,6 +132,20 @@
 
 		numval *= strtoull(value, NULL, 10);
 		(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
+
+		/*
+		 * If the device has a parent, then get the parent block
+		 * device's size as well.  For example, /dev/sda1's parent
+		 * is /dev/sda.
+		 */
+		struct udev_device *parent_dev = udev_device_get_parent(dev);
+		if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
+		    != NULL) {
+			uint64_t numval = DEV_BSIZE;
+
+			numval *= strtoull(value, NULL, 10);
+			(void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
+		}
 	}
 
 	/*
@@ -167,7 +185,7 @@
 	while (1) {
 		struct udev_device *dev;
 		const char *action, *type, *part, *sectors;
-		const char *bus, *uuid;
+		const char *bus, *uuid, *devpath;
 		const char *class, *subclass;
 		nvlist_t *nvl;
 		boolean_t is_zfs = B_FALSE;
@@ -206,6 +224,12 @@
 		 * if this is a disk and it is partitioned, then the
 		 * zfs label will reside in a DEVTYPE=partition and
 		 * we can skip passing this event
+		 *
+		 * Special case: Blank disks are sometimes reported with
+		 * an erroneous 'atari' partition, and should not be
+		 * excluded from being used as an autoreplace disk:
+		 *
+		 * https://github.com/openzfs/zfs/issues/13497
 		 */
 		type = udev_device_get_property_value(dev, "DEVTYPE");
 		part = udev_device_get_property_value(dev,
@@ -213,9 +237,23 @@
 		if (type != NULL && type[0] != '\0' &&
 		    strcmp(type, "disk") == 0 &&
 		    part != NULL && part[0] != '\0') {
-			/* skip and wait for partition event */
-			udev_device_unref(dev);
-			continue;
+			const char *devname =
+			    udev_device_get_property_value(dev, "DEVNAME");
+
+			if (strcmp(part, "atari") == 0) {
+				zed_log_msg(LOG_INFO,
+				    "%s: %s is reporting an atari partition, "
+				    "but we're going to assume it's a false "
+				    "positive and still use it (issue #13497)",
+				    __func__, devname);
+			} else {
+				zed_log_msg(LOG_INFO,
+				    "%s: skip %s since it has a %s partition "
+				    "already", __func__, devname, part);
+				/* skip and wait for partition event */
+				udev_device_unref(dev);
+				continue;
+			}
 		}
 
 		/*
@@ -227,6 +265,11 @@
 			sectors = udev_device_get_sysattr_value(dev, "size");
 		if (sectors != NULL &&
 		    strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
+			zed_log_msg(LOG_INFO,
+			    "%s: %s sectors %s < %llu (minimum)",
+			    __func__,
+			    udev_device_get_property_value(dev, "DEVNAME"),
+			    sectors, MINIMUM_SECTORS);
 			udev_device_unref(dev);
 			continue;
 		}
@@ -236,10 +279,19 @@
 		 * device id string is required in the message schema
 		 * for matching with vdevs. Preflight here for expected
 		 * udev information.
+		 *
+		 * Special case:
+		 * NVMe devices don't have ID_BUS set (at least on RHEL 7-8),
+		 * but they are valid for autoreplace.  Add a special case for
+		 * them by searching for "/nvme/" in the udev DEVPATH:
+		 *
+		 * DEVPATH=/devices/pci0000:00/0000:00:1e.0/nvme/nvme2/nvme2n1
 		 */
 		bus = udev_device_get_property_value(dev, "ID_BUS");
 		uuid = udev_device_get_property_value(dev, "DM_UUID");
-		if (!is_zfs && (bus == NULL && uuid == NULL)) {
+		devpath = udev_device_get_devpath(dev);
+		if (!is_zfs && (bus == NULL && uuid == NULL &&
+		    strstr(devpath, "/nvme/") == NULL)) {
 			zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
 			    "source", udev_device_get_devnode(dev));
 			udev_device_unref(dev);
@@ -350,7 +402,7 @@
 }
 
 int
-zed_disk_event_init()
+zed_disk_event_init(void)
 {
 	int fd, fflags;
 
@@ -379,13 +431,14 @@
 		return (-1);
 	}
 
+	pthread_setname_np(g_mon_tid, "udev monitor");
 	zed_log_msg(LOG_INFO, "zed_disk_event_init");
 
 	return (0);
 }
 
 void
-zed_disk_event_fini()
+zed_disk_event_fini(void)
 {
 	/* cancel monitor thread at recvmsg() */
 	(void) pthread_cancel(g_mon_tid);
@@ -403,13 +456,13 @@
 #include "zed_disk_event.h"
 
 int
-zed_disk_event_init()
+zed_disk_event_init(void)
 {
 	return (0);
 }
 
 void
-zed_disk_event_fini()
+zed_disk_event_fini(void)
 {
 }
 

diff --git a/zfs/cmd/zed/zed_event.c b/zfs/cmd/zed/zed_event.c
index 2a7ff16..9eaad0e 100644
--- a/zfs/cmd/zed/zed_event.c
+++ b/zfs/cmd/zed/zed_event.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -15,7 +15,7 @@
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <libzfs.h>			/* FIXME: Replace with libzfs_core. */
+#include <libzfs_core.h>
 #include <paths.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -28,6 +28,7 @@
 #include "zed.h"
 #include "zed_conf.h"
 #include "zed_disk_event.h"
+#include "zed_event.h"
 #include "zed_exec.h"
 #include "zed_file.h"
 #include "zed_log.h"
@@ -40,25 +41,36 @@
 /*
  * Open the libzfs interface.
  */
-void
+int
 zed_event_init(struct zed_conf *zcp)
 {
 	if (!zcp)
 		zed_log_die("Failed zed_event_init: %s", strerror(EINVAL));
 
 	zcp->zfs_hdl = libzfs_init();
-	if (!zcp->zfs_hdl)
+	if (!zcp->zfs_hdl) {
+		if (zcp->do_idle)
+			return (-1);
 		zed_log_die("Failed to initialize libzfs");
+	}
 
-	zcp->zevent_fd = open(ZFS_DEV, O_RDWR);
-	if (zcp->zevent_fd < 0)
+	zcp->zevent_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
+	if (zcp->zevent_fd < 0) {
+		if (zcp->do_idle)
+			return (-1);
 		zed_log_die("Failed to open \"%s\": %s",
 		    ZFS_DEV, strerror(errno));
+	}
 
 	zfs_agent_init(zcp->zfs_hdl);
 
-	if (zed_disk_event_init() != 0)
+	if (zed_disk_event_init() != 0) {
+		if (zcp->do_idle)
+			return (-1);
 		zed_log_die("Failed to initialize disk events");
+	}
+
+	return (0);
 }
 
 /*
@@ -84,6 +96,47 @@
 		libzfs_fini(zcp->zfs_hdl);
 		zcp->zfs_hdl = NULL;
 	}
+
+	zed_exec_fini();
+}
+
+static void
+_bump_event_queue_length(void)
+{
+	int zzlm = -1, wr;
+	char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */
+	long int qlen;
+
+	zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR);
+	if (zzlm < 0)
+		goto done;
+
+	if (read(zzlm, qlen_buf, sizeof (qlen_buf)) < 0)
+		goto done;
+	qlen_buf[sizeof (qlen_buf) - 1] = '\0';
+
+	errno = 0;
+	qlen = strtol(qlen_buf, NULL, 10);
+	if (errno == ERANGE)
+		goto done;
+
+	if (qlen <= 0)
+		qlen = 512; /* default zfs_zevent_len_max value */
+	else
+		qlen *= 2;
+
+	if (qlen > INT_MAX)
+		qlen = INT_MAX;
+	wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen);
+
+	if (pwrite(zzlm, qlen_buf, wr, 0) < 0)
+		goto done;
+
+	zed_log_msg(LOG_WARNING, "Bumping queue length to %ld", qlen);
+
+done:
+	if (zzlm > -1)
+		(void) close(zzlm);
 }
 
 /*
@@ -124,10 +177,7 @@
 
 		if (n_dropped > 0) {
 			zed_log_msg(LOG_WARNING, "Missed %d events", n_dropped);
-			/*
-			 * FIXME: Increase max size of event nvlist in
-			 *   /sys/module/zfs/parameters/zfs_zevent_len_max ?
-			 */
+			_bump_event_queue_length();
 		}
 		if (nvlist_lookup_uint64(nvl, "eid", &eid) != 0) {
 			zed_log_msg(LOG_WARNING, "Failed to lookup zevent eid");
@@ -199,7 +249,7 @@
  *
  * All environment variables in [zsp] should be added through this function.
  */
-static int
+static __attribute__((format(printf, 5, 6))) int
 _zed_event_add_var(uint64_t eid, zed_strings_t *zsp,
     const char *prefix, const char *name, const char *fmt, ...)
 {
@@ -574,8 +624,6 @@
  * Convert the nvpair [nvp] to a string which is added to the environment
  * of the child process.
  * Return 0 on success, -1 on error.
- *
- * FIXME: Refactor with cmd/zpool/zpool_main.c:zpool_do_events_nvprint()?
  */
 static void
 _zed_event_add_nvpair(uint64_t eid, zed_strings_t *zsp, nvpair_t *nvp)
@@ -674,23 +722,11 @@
 		_zed_event_add_var(eid, zsp, prefix, name,
 		    "%llu", (u_longlong_t)i64);
 		break;
-	case DATA_TYPE_NVLIST:
-		_zed_event_add_var(eid, zsp, prefix, name,
-		    "%s", "_NOT_IMPLEMENTED_");			/* FIXME */
-		break;
 	case DATA_TYPE_STRING:
 		(void) nvpair_value_string(nvp, &str);
 		_zed_event_add_var(eid, zsp, prefix, name,
 		    "%s", (str ? str : "<NULL>"));
 		break;
-	case DATA_TYPE_BOOLEAN_ARRAY:
-		_zed_event_add_var(eid, zsp, prefix, name,
-		    "%s", "_NOT_IMPLEMENTED_");			/* FIXME */
-		break;
-	case DATA_TYPE_BYTE_ARRAY:
-		_zed_event_add_var(eid, zsp, prefix, name,
-		    "%s", "_NOT_IMPLEMENTED_");			/* FIXME */
-		break;
 	case DATA_TYPE_INT8_ARRAY:
 		_zed_event_add_int8_array(eid, zsp, prefix, nvp);
 		break;
@@ -718,9 +754,11 @@
 	case DATA_TYPE_STRING_ARRAY:
 		_zed_event_add_string_array(eid, zsp, prefix, nvp);
 		break;
+	case DATA_TYPE_NVLIST:
+	case DATA_TYPE_BOOLEAN_ARRAY:
+	case DATA_TYPE_BYTE_ARRAY:
 	case DATA_TYPE_NVLIST_ARRAY:
-		_zed_event_add_var(eid, zsp, prefix, name,
-		    "%s", "_NOT_IMPLEMENTED_");			/* FIXME */
+		_zed_event_add_var(eid, zsp, prefix, name, "_NOT_IMPLEMENTED_");
 		break;
 	default:
 		errno = EINVAL;
@@ -872,7 +910,7 @@
 /*
  * Service the next zevent, blocking until one is available.
  */
-void
+int
 zed_event_service(struct zed_conf *zcp)
 {
 	nvlist_t *nvl;
@@ -890,20 +928,17 @@
 		errno = EINVAL;
 		zed_log_msg(LOG_ERR, "Failed to service zevent: %s",
 		    strerror(errno));
-		return;
+		return (EINVAL);
 	}
 	rv = zpool_events_next(zcp->zfs_hdl, &nvl, &n_dropped, ZEVENT_NONE,
 	    zcp->zevent_fd);
 
 	if ((rv != 0) || !nvl)
-		return;
+		return (errno);
 
 	if (n_dropped > 0) {
 		zed_log_msg(LOG_WARNING, "Missed %d events", n_dropped);
-		/*
-		 * FIXME: Increase max size of event nvlist in
-		 * /sys/module/zfs/parameters/zfs_zevent_len_max ?
-		 */
+		_bump_event_queue_length();
 	}
 	if (nvlist_lookup_uint64(nvl, "eid", &eid) != 0) {
 		zed_log_msg(LOG_WARNING, "Failed to lookup zevent eid");
@@ -941,12 +976,12 @@
 
 		_zed_event_add_time_strings(eid, zsp, etime);
 
-		zed_exec_process(eid, class, subclass,
-		    zcp->zedlet_dir, zcp->zedlets, zsp, zcp->zevent_fd);
+		zed_exec_process(eid, class, subclass, zcp, zsp);
 
 		zed_conf_write_state(zcp, eid, etime);
 
 		zed_strings_destroy(zsp);
 	}
 	nvlist_free(nvl);
+	return (0);
 }

diff --git a/zfs/cmd/zed/zed_event.h b/zfs/cmd/zed/zed_event.h
index 9f37b80..5606f14 100644
--- a/zfs/cmd/zed/zed_event.h
+++ b/zfs/cmd/zed/zed_event.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -17,13 +17,13 @@
 
 #include <stdint.h>
 
-void zed_event_init(struct zed_conf *zcp);
+int zed_event_init(struct zed_conf *zcp);
 
 void zed_event_fini(struct zed_conf *zcp);
 
 int zed_event_seek(struct zed_conf *zcp, uint64_t saved_eid,
     int64_t saved_etime[]);
 
-void zed_event_service(struct zed_conf *zcp);
+int zed_event_service(struct zed_conf *zcp);
 
 #endif	/* !ZED_EVENT_H */

diff --git a/zfs/cmd/zed/zed_exec.c b/zfs/cmd/zed/zed_exec.c
index 0370371..03dcd03 100644
--- a/zfs/cmd/zed/zed_exec.c
+++ b/zfs/cmd/zed/zed_exec.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -18,16 +18,55 @@
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stddef.h>
+#include <sys/avl.h>
+#include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <time.h>
 #include <unistd.h>
-#include "zed_file.h"
+#include <pthread.h>
+#include <signal.h>
+
+#include "zed_exec.h"
 #include "zed_log.h"
 #include "zed_strings.h"
 
 #define	ZEVENT_FILENO	3
 
+struct launched_process_node {
+	avl_node_t node;
+	pid_t pid;
+	uint64_t eid;
+	char *name;
+};
+
+static int
+_launched_process_node_compare(const void *x1, const void *x2)
+{
+	pid_t p1;
+	pid_t p2;
+
+	assert(x1 != NULL);
+	assert(x2 != NULL);
+
+	p1 = ((const struct launched_process_node *) x1)->pid;
+	p2 = ((const struct launched_process_node *) x2)->pid;
+
+	if (p1 < p2)
+		return (-1);
+	else if (p1 == p2)
+		return (0);
+	else
+		return (1);
+}
+
+static pthread_t _reap_children_tid = (pthread_t)-1;
+static volatile boolean_t _reap_children_stop;
+static avl_tree_t _launched_processes;
+static pthread_mutex_t _launched_processes_lock = PTHREAD_MUTEX_INITIALIZER;
+static int16_t _launched_processes_limit;
+
 /*
  * Create an environment string array for passing to execve() using the
  * NAME=VALUE strings in container [zsp].
@@ -78,20 +117,26 @@
  */
 static void
 _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
-    char *env[], int zfd)
+    char *env[], int zfd, boolean_t in_foreground)
 {
 	char path[PATH_MAX];
 	int n;
 	pid_t pid;
 	int fd;
-	pid_t wpid;
-	int status;
+	struct launched_process_node *node;
+	sigset_t mask;
+	struct timespec launch_timeout =
+		{ .tv_sec = 0, .tv_nsec = 200 * 1000 * 1000, };
 
 	assert(dir != NULL);
 	assert(prog != NULL);
 	assert(env != NULL);
 	assert(zfd >= 0);
 
+	while (__atomic_load_n(&_launched_processes_limit,
+	    __ATOMIC_SEQ_CST) <= 0)
+		(void) nanosleep(&launch_timeout, NULL);
+
 	n = snprintf(path, sizeof (path), "%s/%s", dir, prog);
 	if ((n < 0) || (n >= sizeof (path))) {
 		zed_log_msg(LOG_WARNING,
@@ -99,100 +144,179 @@
 		    prog, eid, strerror(ENAMETOOLONG));
 		return;
 	}
+	(void) pthread_mutex_lock(&_launched_processes_lock);
 	pid = fork();
 	if (pid < 0) {
+		(void) pthread_mutex_unlock(&_launched_processes_lock);
 		zed_log_msg(LOG_WARNING,
 		    "Failed to fork \"%s\" for eid=%llu: %s",
 		    prog, eid, strerror(errno));
 		return;
 	} else if (pid == 0) {
+		(void) sigemptyset(&mask);
+		(void) sigprocmask(SIG_SETMASK, &mask, NULL);
+
 		(void) umask(022);
-		if ((fd = open("/dev/null", O_RDWR)) != -1) {
+		if (in_foreground && /* we're already devnulled if daemonised */
+		    (fd = open("/dev/null", O_RDWR | O_CLOEXEC)) != -1) {
 			(void) dup2(fd, STDIN_FILENO);
 			(void) dup2(fd, STDOUT_FILENO);
 			(void) dup2(fd, STDERR_FILENO);
 		}
 		(void) dup2(zfd, ZEVENT_FILENO);
-		zed_file_close_from(ZEVENT_FILENO + 1);
 		execle(path, prog, NULL, env);
 		_exit(127);
 	}
 
 	/* parent process */
 
+	node = calloc(1, sizeof (*node));
+	if (node) {
+		node->pid = pid;
+		node->eid = eid;
+		node->name = strdup(prog);
+
+		avl_add(&_launched_processes, node);
+	}
+	(void) pthread_mutex_unlock(&_launched_processes_lock);
+
+	__atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
 	zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
 	    prog, eid, pid);
+}
 
-	/* FIXME: Timeout rogue child processes with sigalarm? */
+static void
+_nop(int sig)
+{}
 
-	/*
-	 * Wait for child process using WNOHANG to limit
-	 * the time spent waiting to 10 seconds (10,000ms).
-	 */
-	for (n = 0; n < 1000; n++) {
-		wpid = waitpid(pid, &status, WNOHANG);
-		if (wpid == (pid_t)-1) {
-			if (errno == EINTR)
-				continue;
-			zed_log_msg(LOG_WARNING,
-			    "Failed to wait for \"%s\" eid=%llu pid=%d",
-			    prog, eid, pid);
-			break;
-		} else if (wpid == 0) {
-			struct timespec t;
+static void *
+_reap_children(void *arg)
+{
+	struct launched_process_node node, *pnode;
+	pid_t pid;
+	int status;
+	struct rusage usage;
+	struct sigaction sa = {};
 
-			/* child still running */
-			t.tv_sec = 0;
-			t.tv_nsec = 10000000;	/* 10ms */
-			(void) nanosleep(&t, NULL);
-			continue;
-		}
+	(void) sigfillset(&sa.sa_mask);
+	(void) sigdelset(&sa.sa_mask, SIGCHLD);
+	(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
 
-		if (WIFEXITED(status)) {
-			zed_log_msg(LOG_INFO,
-			    "Finished \"%s\" eid=%llu pid=%d exit=%d",
-			    prog, eid, pid, WEXITSTATUS(status));
-		} else if (WIFSIGNALED(status)) {
-			zed_log_msg(LOG_INFO,
-			    "Finished \"%s\" eid=%llu pid=%d sig=%d/%s",
-			    prog, eid, pid, WTERMSIG(status),
-			    strsignal(WTERMSIG(status)));
+	(void) sigemptyset(&sa.sa_mask);
+	sa.sa_handler = _nop;
+	sa.sa_flags = SA_NOCLDSTOP;
+	(void) sigaction(SIGCHLD, &sa, NULL);
+
+	for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
+		(void) pthread_mutex_lock(&_launched_processes_lock);
+		pid = wait4(0, &status, WNOHANG, &usage);
+
+		if (pid == 0 || pid == (pid_t)-1) {
+			(void) pthread_mutex_unlock(&_launched_processes_lock);
+			if (pid == 0 || errno == ECHILD)
+				pause();
+			else if (errno != EINTR)
+				zed_log_msg(LOG_WARNING,
+				    "Failed to wait for children: %s",
+				    strerror(errno));
 		} else {
-			zed_log_msg(LOG_INFO,
-			    "Finished \"%s\" eid=%llu pid=%d status=0x%X",
-			    prog, eid, (unsigned int) status);
+			memset(&node, 0, sizeof (node));
+			node.pid = pid;
+			pnode = avl_find(&_launched_processes, &node, NULL);
+			if (pnode) {
+				memcpy(&node, pnode, sizeof (node));
+
+				avl_remove(&_launched_processes, pnode);
+				free(pnode);
+			}
+			(void) pthread_mutex_unlock(&_launched_processes_lock);
+			__atomic_add_fetch(&_launched_processes_limit, 1,
+			    __ATOMIC_SEQ_CST);
+
+			usage.ru_utime.tv_sec += usage.ru_stime.tv_sec;
+			usage.ru_utime.tv_usec += usage.ru_stime.tv_usec;
+			usage.ru_utime.tv_sec +=
+			    usage.ru_utime.tv_usec / (1000 * 1000);
+			usage.ru_utime.tv_usec %= 1000 * 1000;
+
+			if (WIFEXITED(status)) {
+				zed_log_msg(LOG_INFO,
+				    "Finished \"%s\" eid=%llu pid=%d "
+				    "time=%llu.%06us exit=%d",
+				    node.name, node.eid, pid,
+				    (unsigned long long) usage.ru_utime.tv_sec,
+				    (unsigned int) usage.ru_utime.tv_usec,
+				    WEXITSTATUS(status));
+			} else if (WIFSIGNALED(status)) {
+				zed_log_msg(LOG_INFO,
+				    "Finished \"%s\" eid=%llu pid=%d "
+				    "time=%llu.%06us sig=%d/%s",
+				    node.name, node.eid, pid,
+				    (unsigned long long) usage.ru_utime.tv_sec,
+				    (unsigned int) usage.ru_utime.tv_usec,
+				    WTERMSIG(status),
+				    strsignal(WTERMSIG(status)));
+			} else {
+				zed_log_msg(LOG_INFO,
+				    "Finished \"%s\" eid=%llu pid=%d "
+				    "time=%llu.%06us status=0x%X",
+				    node.name, node.eid,
+				    (unsigned long long) usage.ru_utime.tv_sec,
+				    (unsigned int) usage.ru_utime.tv_usec,
+				    (unsigned int) status);
+			}
+
+			free(node.name);
 		}
-		break;
 	}
 
-	/*
-	 * kill child process after 10 seconds
-	 */
-	if (wpid == 0) {
-		zed_log_msg(LOG_WARNING, "Killing hung \"%s\" pid=%d",
-		    prog, pid);
-		(void) kill(pid, SIGKILL);
+	return (NULL);
+}
+
+void
+zed_exec_fini(void)
+{
+	struct launched_process_node *node;
+	void *ck = NULL;
+
+	if (_reap_children_tid == (pthread_t)-1)
+		return;
+
+	_reap_children_stop = B_TRUE;
+	(void) pthread_kill(_reap_children_tid, SIGCHLD);
+	(void) pthread_join(_reap_children_tid, NULL);
+
+	while ((node = avl_destroy_nodes(&_launched_processes, &ck)) != NULL) {
+		free(node->name);
+		free(node);
 	}
+	avl_destroy(&_launched_processes);
+
+	(void) pthread_mutex_destroy(&_launched_processes_lock);
+	(void) pthread_mutex_init(&_launched_processes_lock, NULL);
+
+	_reap_children_tid = (pthread_t)-1;
 }
 
 /*
  * Process the event [eid] by synchronously invoking all zedlets with a
  * matching class prefix.
  *
- * Each executable in [zedlets] from the directory [dir] is matched against
- * the event's [class], [subclass], and the "all" class (which matches
- * all events).  Every zedlet with a matching class prefix is invoked.
+ * Each executable in [zcp->zedlets] from the directory [zcp->zedlet_dir]
+ * is matched against the event's [class], [subclass], and the "all" class
+ * (which matches all events).
+ * Every zedlet with a matching class prefix is invoked.
  * The NAME=VALUE strings in [envs] will be passed to the zedlet as
  * environment variables.
  *
- * The file descriptor [zfd] is the zevent_fd used to track the
+ * The file descriptor [zcp->zevent_fd] is the zevent_fd used to track the
  * current cursor location within the zevent nvlist.
  *
  * Return 0 on success, -1 on error.
  */
 int
 zed_exec_process(uint64_t eid, const char *class, const char *subclass,
-    const char *dir, zed_strings_t *zedlets, zed_strings_t *envs, int zfd)
+    struct zed_conf *zcp, zed_strings_t *envs)
 {
 	const char *class_strings[4];
 	const char *allclass = "all";
@@ -201,9 +325,22 @@
 	char **e;
 	int n;
 
-	if (!dir || !zedlets || !envs || zfd < 0)
+	if (!zcp->zedlet_dir || !zcp->zedlets || !envs || zcp->zevent_fd < 0)
 		return (-1);
 
+	if (_reap_children_tid == (pthread_t)-1) {
+		_launched_processes_limit = zcp->max_jobs;
+
+		if (pthread_create(&_reap_children_tid, NULL,
+		    _reap_children, NULL) != 0)
+			return (-1);
+		pthread_setname_np(_reap_children_tid, "reap ZEDLETs");
+
+		avl_create(&_launched_processes, _launched_process_node_compare,
+		    sizeof (struct launched_process_node),
+		    offsetof(struct launched_process_node, node));
+	}
+
 	csp = class_strings;
 
 	if (class)
@@ -219,11 +356,13 @@
 
 	e = _zed_exec_create_env(envs);
 
-	for (z = zed_strings_first(zedlets); z; z = zed_strings_next(zedlets)) {
+	for (z = zed_strings_first(zcp->zedlets); z;
+	    z = zed_strings_next(zcp->zedlets)) {
 		for (csp = class_strings; *csp; csp++) {
 			n = strlen(*csp);
 			if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
-				_zed_exec_fork_child(eid, dir, z, e, zfd);
+				_zed_exec_fork_child(eid, zcp->zedlet_dir,
+				    z, e, zcp->zevent_fd, zcp->do_foreground);
 		}
 	}
 	free(e);

diff --git a/zfs/cmd/zed/zed_exec.h b/zfs/cmd/zed/zed_exec.h
index 69179c9..e4c8d86 100644
--- a/zfs/cmd/zed/zed_exec.h
+++ b/zfs/cmd/zed/zed_exec.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -16,9 +16,12 @@
 #define	ZED_EXEC_H
 
 #include <stdint.h>
+#include "zed_strings.h"
+#include "zed_conf.h"
+
+void zed_exec_fini(void);
 
 int zed_exec_process(uint64_t eid, const char *class, const char *subclass,
-    const char *dir, zed_strings_t *zedlets, zed_strings_t *envs,
-    int zevent_fd);
+    struct zed_conf *zcp, zed_strings_t *envs);
 
 #endif	/* !ZED_EXEC_H */

diff --git a/zfs/cmd/zed/zed_file.c b/zfs/cmd/zed/zed_file.c
index 3a1a661..b62f68b 100644
--- a/zfs/cmd/zed/zed_file.c
+++ b/zfs/cmd/zed/zed_file.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -12,73 +12,18 @@
  * You may not use this file except in compliance with the license.
  */
 
+#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <string.h>
-#include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include "zed_file.h"
 #include "zed_log.h"
 
 /*
- * Read up to [n] bytes from [fd] into [buf].
- * Return the number of bytes read, 0 on EOF, or -1 on error.
- */
-ssize_t
-zed_file_read_n(int fd, void *buf, size_t n)
-{
-	unsigned char *p;
-	size_t n_left;
-	ssize_t n_read;
-
-	p = buf;
-	n_left = n;
-	while (n_left > 0) {
-		if ((n_read = read(fd, p, n_left)) < 0) {
-			if (errno == EINTR)
-				continue;
-			else
-				return (-1);
-
-		} else if (n_read == 0) {
-			break;
-		}
-		n_left -= n_read;
-		p += n_read;
-	}
-	return (n - n_left);
-}
-
-/*
- * Write [n] bytes from [buf] out to [fd].
- * Return the number of bytes written, or -1 on error.
- */
-ssize_t
-zed_file_write_n(int fd, void *buf, size_t n)
-{
-	const unsigned char *p;
-	size_t n_left;
-	ssize_t n_written;
-
-	p = buf;
-	n_left = n;
-	while (n_left > 0) {
-		if ((n_written = write(fd, p, n_left)) < 0) {
-			if (errno == EINTR)
-				continue;
-			else
-				return (-1);
-
-		}
-		n_left -= n_written;
-		p += n_written;
-	}
-	return (n);
-}
-
-/*
  * Set an exclusive advisory lock on the open file descriptor [fd].
  * Return 0 on success, 1 if a conflicting lock is held by another process,
  * or -1 on error (with errno set).
@@ -159,6 +104,13 @@
 	return (lock.l_pid);
 }
 
+
+#if __APPLE__
+#define	PROC_SELF_FD "/dev/fd"
+#else /* Linux-compatible layout */
+#define	PROC_SELF_FD "/proc/self/fd"
+#endif
+
 /*
  * Close all open file descriptors greater than or equal to [lowfd].
  * Any errors encountered while closing file descriptors are ignored.
@@ -166,51 +118,24 @@
 void
 zed_file_close_from(int lowfd)
 {
-	const int maxfd_def = 256;
-	int errno_bak;
-	struct rlimit rl;
-	int maxfd;
+	int errno_bak = errno;
+	int maxfd = 0;
 	int fd;
+	DIR *fddir;
+	struct dirent *fdent;
 
-	errno_bak = errno;
-
-	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
-		maxfd = maxfd_def;
-	} else if (rl.rlim_max == RLIM_INFINITY) {
-		maxfd = maxfd_def;
+	if ((fddir = opendir(PROC_SELF_FD)) != NULL) {
+		while ((fdent = readdir(fddir)) != NULL) {
+			fd = atoi(fdent->d_name);
+			if (fd > maxfd && fd != dirfd(fddir))
+				maxfd = fd;
+		}
+		(void) closedir(fddir);
 	} else {
-		maxfd = rl.rlim_max;
+		maxfd = sysconf(_SC_OPEN_MAX);
 	}
 	for (fd = lowfd; fd < maxfd; fd++)
 		(void) close(fd);
 
 	errno = errno_bak;
 }
-
-/*
- * Set the CLOEXEC flag on file descriptor [fd] so it will be automatically
- * closed upon successful execution of one of the exec functions.
- * Return 0 on success, or -1 on error.
- *
- * FIXME: No longer needed?
- */
-int
-zed_file_close_on_exec(int fd)
-{
-	int flags;
-
-	if (fd < 0) {
-		errno = EBADF;
-		return (-1);
-	}
-	flags = fcntl(fd, F_GETFD);
-	if (flags == -1)
-		return (-1);
-
-	flags |= FD_CLOEXEC;
-
-	if (fcntl(fd, F_SETFD, flags) == -1)
-		return (-1);
-
-	return (0);
-}

diff --git a/zfs/cmd/zed/zed_file.h b/zfs/cmd/zed/zed_file.h
index 05f360d..7e3a0ef 100644
--- a/zfs/cmd/zed/zed_file.h
+++ b/zfs/cmd/zed/zed_file.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -18,10 +18,6 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-ssize_t zed_file_read_n(int fd, void *buf, size_t n);
-
-ssize_t zed_file_write_n(int fd, void *buf, size_t n);
-
 int zed_file_lock(int fd);
 
 int zed_file_unlock(int fd);
@@ -30,6 +26,4 @@
 
 void zed_file_close_from(int fd);
 
-int zed_file_close_on_exec(int fd);
-
 #endif	/* !ZED_FILE_H */

diff --git a/zfs/cmd/zed/zed_log.c b/zfs/cmd/zed/zed_log.c
index 5a3f2db..0c4ab6f 100644
--- a/zfs/cmd/zed/zed_log.c
+++ b/zfs/cmd/zed/zed_log.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).

diff --git a/zfs/cmd/zed/zed_log.h b/zfs/cmd/zed/zed_log.h
index a03a4f5..ed88ad4 100644
--- a/zfs/cmd/zed/zed_log.h
+++ b/zfs/cmd/zed/zed_log.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).

diff --git a/zfs/cmd/zed/zed_strings.c b/zfs/cmd/zed/zed_strings.c
index 51b872a..52a86e9 100644
--- a/zfs/cmd/zed/zed_strings.c
+++ b/zfs/cmd/zed/zed_strings.c

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -108,7 +108,7 @@
  * If [key] is specified, it will be used to index the node; otherwise,
  * the string [val] will be used.
  */
-zed_strings_node_t *
+static zed_strings_node_t *
 _zed_strings_node_create(const char *key, const char *val)
 {
 	zed_strings_node_t *np;

diff --git a/zfs/cmd/zed/zed_strings.h b/zfs/cmd/zed/zed_strings.h
index 37a84ca..8046395 100644
--- a/zfs/cmd/zed/zed_strings.h
+++ b/zfs/cmd/zed/zed_strings.h

@@ -1,9 +1,9 @@
 /*
- * This file is part of the ZFS Event Daemon (ZED)
- * for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+ * This file is part of the ZFS Event Daemon (ZED).
+ *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
- * Refer to the ZoL git commit log for authoritative copyright attribution.
+ * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).

diff --git a/zfs/cmd/zfs/Makefile.am b/zfs/cmd/zfs/Makefile.am
index 8b6ddaa..1ead457 100644
--- a/zfs/cmd/zfs/Makefile.am
+++ b/zfs/cmd/zfs/Makefile.am

@@ -1,9 +1,5 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 sbin_PROGRAMS = zfs
 
 zfs_SOURCES = \
@@ -15,7 +11,15 @@
 	zfs_projectutil.h
 
 zfs_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libuutil/libuutil.la \
-	$(top_builddir)/lib/libzfs/libzfs.la \
-	$(top_builddir)/lib/libzfs_core/libzfs_core.la
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libuutil/libuutil.la
+
+zfs_LDADD += $(LTLIBINTL)
+
+if BUILD_FREEBSD
+zfs_LDADD += -lgeom -ljail
+endif
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zfs/zfs_main.c b/zfs/cmd/zfs/zfs_main.c
index d099b5e..5d7f8a2 100644
--- a/zfs/cmd/zfs/zfs_main.c
+++ b/zfs/cmd/zfs/zfs_main.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 Steven Hartland.  All rights reserved.
@@ -30,10 +30,12 @@
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
  * Copyright 2019 Joyent, Inc.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
  */
 
 #include <assert.h>
 #include <ctype.h>
+#include <sys/debug.h>
 #include <errno.h>
 #include <getopt.h>
 #include <libgen.h>
@@ -51,7 +53,6 @@
 #include <grp.h>
 #include <pwd.h>
 #include <signal.h>
-#include <sys/debug.h>
 #include <sys/list.h>
 #include <sys/mkdev.h>
 #include <sys/mntent.h>
@@ -69,7 +70,6 @@
 #include <zfs_prop.h>
 #include <zfs_deleg.h>
 #include <libzutil.h>
-#include <libuutil.h>
 #ifdef HAVE_IDMAP
 #include <aclutils.h>
 #include <directory.h>
@@ -114,12 +114,18 @@
 static int zfs_do_diff(int argc, char **argv);
 static int zfs_do_bookmark(int argc, char **argv);
 static int zfs_do_channel_program(int argc, char **argv);
-static int zfs_do_remap(int argc, char **argv);
 static int zfs_do_load_key(int argc, char **argv);
 static int zfs_do_unload_key(int argc, char **argv);
 static int zfs_do_change_key(int argc, char **argv);
 static int zfs_do_project(int argc, char **argv);
 static int zfs_do_version(int argc, char **argv);
+static int zfs_do_redact(int argc, char **argv);
+static int zfs_do_wait(int argc, char **argv);
+
+#ifdef __FreeBSD__
+static int zfs_do_jail(int argc, char **argv);
+static int zfs_do_unjail(int argc, char **argv);
+#endif
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
@@ -168,13 +174,16 @@
 	HELP_HOLDS,
 	HELP_RELEASE,
 	HELP_DIFF,
-	HELP_REMAP,
 	HELP_BOOKMARK,
 	HELP_CHANNEL_PROGRAM,
 	HELP_LOAD_KEY,
 	HELP_UNLOAD_KEY,
 	HELP_CHANGE_KEY,
-	HELP_VERSION
+	HELP_VERSION,
+	HELP_REDACT,
+	HELP_JAIL,
+	HELP_UNJAIL,
+	HELP_WAIT,
 } zfs_help_t;
 
 typedef struct zfs_command {
@@ -235,10 +244,16 @@
 	{ "holds",	zfs_do_holds,		HELP_HOLDS		},
 	{ "release",	zfs_do_release,		HELP_RELEASE		},
 	{ "diff",	zfs_do_diff,		HELP_DIFF		},
-	{ "remap",	zfs_do_remap,		HELP_REMAP		},
 	{ "load-key",	zfs_do_load_key,	HELP_LOAD_KEY		},
 	{ "unload-key",	zfs_do_unload_key,	HELP_UNLOAD_KEY		},
 	{ "change-key",	zfs_do_change_key,	HELP_CHANGE_KEY		},
+	{ "redact",	zfs_do_redact,		HELP_REDACT		},
+	{ "wait",	zfs_do_wait,		HELP_WAIT		},
+
+#ifdef __FreeBSD__
+	{ "jail",	zfs_do_jail,		HELP_JAIL		},
+	{ "unjail",	zfs_do_unjail,		HELP_UNJAIL		},
+#endif
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
@@ -253,9 +268,9 @@
 		return (gettext("\tclone [-p] [-o property=value] ... "
 		    "<snapshot> <filesystem|volume>\n"));
 	case HELP_CREATE:
-		return (gettext("\tcreate [-p] [-o property=value] ... "
+		return (gettext("\tcreate [-Pnpuv] [-o property=value] ... "
 		    "<filesystem>\n"
-		    "\tcreate [-ps] [-b blocksize] [-o property=value] ... "
+		    "\tcreate [-Pnpsv] [-b blocksize] [-o property=value] ... "
 		    "-V <size> <volume>\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-fnpRrv] <filesystem|volume>\n"
@@ -280,30 +295,34 @@
 		    "[filesystem|volume|snapshot] ...\n"));
 	case HELP_MOUNT:
 		return (gettext("\tmount\n"
-		    "\tmount [-lvO] [-o opts] <-a | filesystem>\n"));
+		    "\tmount [-flvO] [-o opts] <-a | filesystem>\n"));
 	case HELP_PROMOTE:
 		return (gettext("\tpromote <clone-filesystem>\n"));
 	case HELP_RECEIVE:
-		return (gettext("\treceive [-vnsFhu] "
+		return (gettext("\treceive [-vMnsFhu] "
 		    "[-o <property>=<value>] ... [-x <property>] ...\n"
 		    "\t    <filesystem|volume|snapshot>\n"
-		    "\treceive [-vnsFhu] [-o <property>=<value>] ... "
+		    "\treceive [-vMnsFhu] [-o <property>=<value>] ... "
 		    "[-x <property>] ... \n"
 		    "\t    [-d | -e] <filesystem>\n"
 		    "\treceive -A <filesystem|volume>\n"));
 	case HELP_RENAME:
 		return (gettext("\trename [-f] <filesystem|volume|snapshot> "
 		    "<filesystem|volume|snapshot>\n"
-		    "\trename [-f] -p <filesystem|volume> <filesystem|volume>\n"
+		    "\trename -p [-f] <filesystem|volume> <filesystem|volume>\n"
+		    "\trename -u [-f] <filesystem> <filesystem>\n"
 		    "\trename -r <snapshot> <snapshot>\n"));
 	case HELP_ROLLBACK:
 		return (gettext("\trollback [-rRf] <snapshot>\n"));
 	case HELP_SEND:
 		return (gettext("\tsend [-DnPpRvLecwhbB] [-[i|I] snapshot] "
 		    "<snapshot>\n"
-		    "\tsend [-nvPLecw] [-i snapshot|bookmark] "
+		    "\tsend [-DnVvPLecw] [-i snapshot|bookmark] "
 		    "<filesystem|volume|snapshot>\n"
-		    "\tsend [-nvPe] -t <receive_resume_token>\n"));
+		    "\tsend [-DnPpVvLec] [-i bookmark|snapshot] "
+		    "--redact <bookmark> <snapshot>\n"
+		    "\tsend [-nVvPe] -t <receive_resume_token>\n"
+		    "\tsend [-PnVv] --saved filesystem\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> ... "
 		    "<filesystem|volume|snapshot> ...\n"));
@@ -313,7 +332,7 @@
 		return (gettext("\tsnapshot [-r] [-o property=value] ... "
 		    "<filesystem|volume>@<snap> ...\n"));
 	case HELP_UNMOUNT:
-		return (gettext("\tunmount [-f] "
+		return (gettext("\tunmount [-fu] "
 		    "<-a | filesystem|mountpoint>\n"));
 	case HELP_UNSHARE:
 		return (gettext("\tunshare "
@@ -342,16 +361,16 @@
 		return (gettext("\tuserspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n"
 		    "\t    [-S field] ... [-t type[,...]] "
-		    "<filesystem|snapshot>\n"));
+		    "<filesystem|snapshot|path>\n"));
 	case HELP_GROUPSPACE:
 		return (gettext("\tgroupspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n"
 		    "\t    [-S field] ... [-t type[,...]] "
-		    "<filesystem|snapshot>\n"));
+		    "<filesystem|snapshot|path>\n"));
 	case HELP_PROJECTSPACE:
 		return (gettext("\tprojectspace [-Hp] [-o field[,...]] "
 		    "[-s field] ... \n"
-		    "\t    [-S field] ... <filesystem|snapshot>\n"));
+		    "\t    [-S field] ... <filesystem|snapshot|path>\n"));
 	case HELP_PROJECT:
 		return (gettext("\tproject [-d|-r] <directory|file ...>\n"
 		    "\tproject -c [-0] [-d|-r] [-p id] <directory|file ...>\n"
@@ -366,10 +385,9 @@
 	case HELP_DIFF:
 		return (gettext("\tdiff [-FHt] <snapshot> "
 		    "[snapshot|filesystem]\n"));
-	case HELP_REMAP:
-		return (gettext("\tremap <filesystem | volume>\n"));
 	case HELP_BOOKMARK:
-		return (gettext("\tbookmark <snapshot> <bookmark>\n"));
+		return (gettext("\tbookmark <snapshot|bookmark> "
+		    "<newbookmark>\n"));
 	case HELP_CHANNEL_PROGRAM:
 		return (gettext("\tprogram [-jn] [-t <instruction limit>] "
 		    "[-m <memory limit (b)>]\n"
@@ -382,11 +400,20 @@
 		    "<-a | filesystem|volume>\n"));
 	case HELP_CHANGE_KEY:
 		return (gettext("\tchange-key [-l] [-o keyformat=<value>]\n"
-		    "\t    [-o keylocation=<value>] [-o pbkfd2iters=<value>]\n"
+		    "\t    [-o keylocation=<value>] [-o pbkdf2iters=<value>]\n"
 		    "\t    <filesystem|volume>\n"
 		    "\tchange-key -i [-l] <filesystem|volume>\n"));
 	case HELP_VERSION:
 		return (gettext("\tversion\n"));
+	case HELP_REDACT:
+		return (gettext("\tredact <snapshot> <bookmark> "
+		    "<redaction_snapshot> ...\n"));
+	case HELP_JAIL:
+		return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
+	case HELP_UNJAIL:
+		return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
+	case HELP_WAIT:
+		return (gettext("\twait [-t <activity>] <filesystem>\n"));
 	}
 
 	abort();
@@ -415,7 +442,7 @@
 	return (data);
 }
 
-void *
+static void *
 safe_realloc(void *data, size_t size)
 {
 	void *newp;
@@ -544,6 +571,8 @@
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "written@<snap>");
 		(void) fprintf(fp, " NO       NO   <size>\n");
+		(void) fprintf(fp, "\t%-15s ", "written#<bookmark>");
+		(void) fprintf(fp, " NO       NO   <size>\n");
 
 		(void) fprintf(fp, gettext("\nSizes are specified in bytes "
 		    "with standard units such as K, M, G, etc.\n"));
@@ -699,6 +728,32 @@
 	pt_header = NULL;
 }
 
+/* This function checks if the passed fd refers to /dev/null or /dev/zero */
+#ifdef __linux__
+static boolean_t
+is_dev_nullzero(int fd)
+{
+	struct stat st;
+	fstat(fd, &st);
+	return (major(st.st_rdev) == 1 && (minor(st.st_rdev) == 3 /* null */ ||
+	    minor(st.st_rdev) == 5 /* zero */));
+}
+#endif
+
+static void
+note_dev_error(int err, int fd)
+{
+#ifdef __linux__
+	if (err == EINVAL && is_dev_nullzero(fd)) {
+		(void) fprintf(stderr,
+		    gettext("Error: Writing directly to /dev/{null,zero} files"
+		    " on certain kernels is not currently implemented.\n"
+		    "(As a workaround, "
+		    "try \"zfs send [...] | cat > /dev/null\")\n"));
+	}
+#endif
+}
+
 static int
 zfs_mount_and_share(libzfs_handle_t *hdl, const char *dataset, zfs_type_t type)
 {
@@ -728,7 +783,7 @@
 	 */
 	if (zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, type, B_FALSE) &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON) {
-		if (geteuid() != 0) {
+		if (zfs_mount_delegation_check()) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but it may only be "
 			    "mounted by root\n"));
@@ -742,6 +797,7 @@
 			    "successfully created, but not shared\n"));
 			ret = 1;
 		}
+		zfs_commit_all_shares();
 	}
 
 	zfs_close(zhp);
@@ -861,8 +917,109 @@
 }
 
 /*
- * zfs create [-p] [-o prop=value] ... fs
- * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size
+ * Return a default volblocksize for the pool which always uses more than
+ * half of the data sectors.  This primarily applies to dRAID which always
+ * writes full stripe widths.
+ */
+static uint64_t
+default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
+{
+	uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
+	nvlist_t *tree, **vdevs;
+	uint_t nvdevs;
+
+	nvlist_t *config = zpool_get_config(zhp, NULL);
+
+	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 ||
+	    nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
+	    &vdevs, &nvdevs) != 0) {
+		return (ZVOL_DEFAULT_BLOCKSIZE);
+	}
+
+	for (int i = 0; i < nvdevs; i++) {
+		nvlist_t *nv = vdevs[i];
+		uint64_t ashift, ndata, nparity;
+
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &ashift) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA,
+		    &ndata) == 0) {
+			/* dRAID minimum allocation width */
+			asize = MAX(asize, ndata * (1ULL << ashift));
+		} else if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+		    &nparity) == 0) {
+			/* raidz minimum allocation width */
+			if (nparity == 1)
+				asize = MAX(asize, 2 * (1ULL << ashift));
+			else
+				asize = MAX(asize, 4 * (1ULL << ashift));
+		} else {
+			/* mirror or (non-redundant) leaf vdev */
+			asize = MAX(asize, 1ULL << ashift);
+		}
+	}
+
+	/*
+	 * Calculate the target volblocksize such that more than half
+	 * of the asize is used. The following table is for 4k sectors.
+	 *
+	 * n   asize   blksz  used  |   n   asize   blksz  used
+	 * -------------------------+---------------------------------
+	 * 1   4,096   8,192  100%  |   9  36,864  32,768   88%
+	 * 2   8,192   8,192  100%  |  10  40,960  32,768   80%
+	 * 3  12,288   8,192   66%  |  11  45,056  32,768   72%
+	 * 4  16,384  16,384  100%  |  12  49,152  32,768   66%
+	 * 5  20,480  16,384   80%  |  13  53,248  32,768   61%
+	 * 6  24,576  16,384   66%  |  14  57,344  32,768   57%
+	 * 7  28,672  16,384   57%  |  15  61,440  32,768   53%
+	 * 8  32,768  32,768  100%  |  16  65,536  65,636  100%
+	 *
+	 * This is primarily a concern for dRAID which always allocates
+	 * a full stripe width.  For dRAID the default stripe width is
+	 * n=8 in which case the volblocksize is set to 32k. Ignoring
+	 * compression there are no unused sectors.  This same reasoning
+	 * applies to raidz[2,3] so target 4 sectors to minimize waste.
+	 */
+	uint64_t tgt_volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
+	while (tgt_volblocksize * 2 <= asize)
+		tgt_volblocksize *= 2;
+
+	const char *prop = zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE);
+	if (nvlist_lookup_uint64(props, prop, &volblocksize) == 0) {
+
+		/* Issue a warning when a non-optimal size is requested. */
+		if (volblocksize < ZVOL_DEFAULT_BLOCKSIZE) {
+			(void) fprintf(stderr, gettext("Warning: "
+			    "volblocksize (%llu) is less than the default "
+			    "minimum block size (%llu).\nTo reduce wasted "
+			    "space a volblocksize of %llu is recommended.\n"),
+			    (u_longlong_t)volblocksize,
+			    (u_longlong_t)ZVOL_DEFAULT_BLOCKSIZE,
+			    (u_longlong_t)tgt_volblocksize);
+		} else if (volblocksize < tgt_volblocksize) {
+			(void) fprintf(stderr, gettext("Warning: "
+			    "volblocksize (%llu) is much less than the "
+			    "minimum allocation\nunit (%llu), which wastes "
+			    "at least %llu%% of space. To reduce wasted "
+			    "space,\nuse a larger volblocksize (%llu is "
+			    "recommended), fewer dRAID data disks\n"
+			    "per group, or smaller sector size (ashift).\n"),
+			    (u_longlong_t)volblocksize, (u_longlong_t)asize,
+			    (u_longlong_t)((100 * (asize - volblocksize)) /
+			    asize), (u_longlong_t)tgt_volblocksize);
+		}
+	} else {
+		volblocksize = tgt_volblocksize;
+		fnvlist_add_uint64(props, prop, volblocksize);
+	}
+
+	return (volblocksize);
+}
+
+/*
+ * zfs create [-Pnpv] [-o prop=value] ... fs
+ * zfs create [-Pnpsv] [-b blocksize] [-o prop=value] ... -V vol size
  *
  * Create a new dataset.  This command can be used to create filesystems
  * and volumes.  Snapshot creation is handled by 'zfs snapshot'.
@@ -874,25 +1031,42 @@
  * SPA_VERSION_REFRESERVATION, we set a refreservation instead.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
+ *
+ * The '-n' flag is no-op (dry run) mode.  This will perform a user-space sanity
+ * check of arguments and properties, but does not check for permissions,
+ * available space, etc.
+ *
+ * The '-u' flag prevents the newly created file system from being mounted.
+ *
+ * The '-v' flag is for verbose output.
+ *
+ * The '-P' flag is used for parseable output.  It implies '-v'.
  */
 static int
 zfs_do_create(int argc, char **argv)
 {
 	zfs_type_t type = ZFS_TYPE_FILESYSTEM;
+	zpool_handle_t *zpool_handle = NULL;
+	nvlist_t *real_props = NULL;
 	uint64_t volsize = 0;
 	int c;
 	boolean_t noreserve = B_FALSE;
 	boolean_t bflag = B_FALSE;
 	boolean_t parents = B_FALSE;
+	boolean_t dryrun = B_FALSE;
+	boolean_t nomount = B_FALSE;
+	boolean_t verbose = B_FALSE;
+	boolean_t parseable = B_FALSE;
 	int ret = 1;
 	nvlist_t *props;
 	uint64_t intval;
+	char *strval;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) {
+	while ((c = getopt(argc, argv, ":PV:b:nso:puv")) != -1) {
 		switch (c) {
 		case 'V':
 			type = ZFS_TYPE_VOLUME;
@@ -908,6 +1082,10 @@
 				nomem();
 			volsize = intval;
 			break;
+		case 'P':
+			verbose = B_TRUE;
+			parseable = B_TRUE;
+			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
@@ -925,6 +1103,9 @@
 			    intval) != 0)
 				nomem();
 			break;
+		case 'n':
+			dryrun = B_TRUE;
+			break;
 		case 'o':
 			if (!parseprop(props, optarg))
 				goto error;
@@ -932,6 +1113,12 @@
 		case 's':
 			noreserve = B_TRUE;
 			break;
+		case 'u':
+			nomount = B_TRUE;
+			break;
+		case 'v':
+			verbose = B_TRUE;
+			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing size "
 			    "argument\n"));
@@ -948,6 +1135,11 @@
 		    "used when creating a volume\n"));
 		goto badusage;
 	}
+	if (nomount && type != ZFS_TYPE_FILESYSTEM) {
+		(void) fprintf(stderr, gettext("'-u' can only be "
+		    "used when creating a filesystem\n"));
+		goto badusage;
+	}
 
 	argc -= optind;
 	argv += optind;
@@ -963,14 +1155,9 @@
 		goto badusage;
 	}
 
-	if (type == ZFS_TYPE_VOLUME && !noreserve) {
-		zpool_handle_t *zpool_handle;
-		nvlist_t *real_props = NULL;
-		uint64_t spa_version;
+	if (dryrun || type == ZFS_TYPE_VOLUME) {
+		char msg[ZFS_MAX_DATASET_NAME_LEN * 2];
 		char *p;
-		zfs_prop_t resv_prop;
-		char *strval;
-		char msg[1024];
 
 		if ((p = strchr(argv[0], '/')) != NULL)
 			*p = '\0';
@@ -979,6 +1166,51 @@
 			*p = '/';
 		if (zpool_handle == NULL)
 			goto error;
+
+		(void) snprintf(msg, sizeof (msg),
+		    dryrun ? gettext("cannot verify '%s'") :
+		    gettext("cannot create '%s'"), argv[0]);
+		if (props && (real_props = zfs_valid_proplist(g_zfs, type,
+		    props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) {
+			zpool_close(zpool_handle);
+			goto error;
+		}
+	}
+
+	if (type == ZFS_TYPE_VOLUME) {
+		const char *prop = zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE);
+		uint64_t volblocksize = default_volblocksize(zpool_handle,
+		    real_props);
+
+		if (volblocksize != ZVOL_DEFAULT_BLOCKSIZE &&
+		    nvlist_lookup_string(props, prop, &strval) != 0) {
+			if (asprintf(&strval, "%llu",
+			    (u_longlong_t)volblocksize) == -1)
+				nomem();
+			nvlist_add_string(props, prop, strval);
+			free(strval);
+		}
+
+		/*
+		 * If volsize is not a multiple of volblocksize, round it
+		 * up to the nearest multiple of the volblocksize.
+		 */
+		if (volsize % volblocksize) {
+			volsize = P2ROUNDUP_TYPED(volsize, volblocksize,
+			    uint64_t);
+
+			if (nvlist_add_uint64(props,
+			    zfs_prop_to_name(ZFS_PROP_VOLSIZE), volsize) != 0) {
+				nvlist_free(props);
+				nomem();
+			}
+		}
+	}
+
+	if (type == ZFS_TYPE_VOLUME && !noreserve) {
+		uint64_t spa_version;
+		zfs_prop_t resv_prop;
+
 		spa_version = zpool_get_prop_int(zpool_handle,
 		    ZPOOL_PROP_VERSION, NULL);
 		if (spa_version >= SPA_VERSION_REFRESERVATION)
@@ -986,18 +1218,8 @@
 		else
 			resv_prop = ZFS_PROP_RESERVATION;
 
-		(void) snprintf(msg, sizeof (msg),
-		    gettext("cannot create '%s'"), argv[0]);
-		if (props && (real_props = zfs_valid_proplist(g_zfs, type,
-		    props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) {
-			zpool_close(zpool_handle);
-			goto error;
-		}
-
 		volsize = zvol_volsize_to_reservation(zpool_handle, volsize,
 		    real_props);
-		nvlist_free(real_props);
-		zpool_close(zpool_handle);
 
 		if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
 		    &strval) != 0) {
@@ -1008,6 +1230,10 @@
 			}
 		}
 	}
+	if (zpool_handle != NULL) {
+		zpool_close(zpool_handle);
+		nvlist_free(real_props);
+	}
 
 	if (parents && zfs_name_valid(argv[0], type)) {
 		/*
@@ -1019,8 +1245,50 @@
 			ret = 0;
 			goto error;
 		}
-		if (zfs_create_ancestors(g_zfs, argv[0]) != 0)
-			goto error;
+		if (verbose) {
+			(void) printf(parseable ? "create_ancestors\t%s\n" :
+			    dryrun ?  "would create ancestors of %s\n" :
+			    "create ancestors of %s\n", argv[0]);
+		}
+		if (!dryrun) {
+			if (zfs_create_ancestors(g_zfs, argv[0]) != 0) {
+				goto error;
+			}
+		}
+	}
+
+	if (verbose) {
+		nvpair_t *nvp = NULL;
+		(void) printf(parseable ? "create\t%s\n" :
+		    dryrun ? "would create %s\n" : "create %s\n", argv[0]);
+		while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
+			uint64_t uval;
+			char *sval;
+
+			switch (nvpair_type(nvp)) {
+			case DATA_TYPE_UINT64:
+				VERIFY0(nvpair_value_uint64(nvp, &uval));
+				(void) printf(parseable ?
+				    "property\t%s\t%llu\n" : "\t%s=%llu\n",
+				    nvpair_name(nvp), (u_longlong_t)uval);
+				break;
+			case DATA_TYPE_STRING:
+				VERIFY0(nvpair_value_string(nvp, &sval));
+				(void) printf(parseable ?
+				    "property\t%s\t%s\n" : "\t%s=%s\n",
+				    nvpair_name(nvp), sval);
+				break;
+			default:
+				(void) fprintf(stderr, "property '%s' "
+				    "has illegal type %d\n",
+				    nvpair_name(nvp), nvpair_type(nvp));
+				abort();
+			}
+		}
+	}
+	if (dryrun) {
+		ret = 0;
+		goto error;
 	}
 
 	/* pass to libzfs */
@@ -1032,6 +1300,11 @@
 		log_history = B_FALSE;
 	}
 
+	if (nomount) {
+		ret = 0;
+		goto error;
+	}
+
 	ret = zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET);
 error:
 	nvlist_free(props);
@@ -1503,6 +1776,13 @@
 			return (-1);
 		}
 
+		/*
+		 * Unfortunately, zfs_bookmark() doesn't honor the
+		 * casesensitivity setting.  However, we can't simply
+		 * remove this check, because lzc_destroy_bookmarks()
+		 * ignores non-existent bookmarks, so this is necessary
+		 * to get a proper error message.
+		 */
 		if (!zfs_bookmark_exists(argv[0])) {
 			(void) fprintf(stderr, gettext("bookmark '%s' "
 			    "does not exist.\n"), argv[0]);
@@ -2200,7 +2480,7 @@
 
 	/* upgrade */
 	if (version < cb->cb_version) {
-		char verstr[16];
+		char verstr[24];
 		(void) snprintf(verstr, sizeof (verstr),
 		    "%llu", (u_longlong_t)cb->cb_version);
 		if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) {
@@ -2347,11 +2627,13 @@
 
 /*
  * zfs userspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
- *               [-S field [-S field]...] [-t type[,...]] filesystem | snapshot
+ *               [-S field [-S field]...] [-t type[,...]]
+ *               filesystem | snapshot | path
  * zfs groupspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
- *                [-S field [-S field]...] [-t type[,...]] filesystem | snapshot
+ *                [-S field [-S field]...] [-t type[,...]]
+ *                filesystem | snapshot | path
  * zfs projectspace [-Hp] [-o field[,...]] [-s field [-s field]...]
- *                [-S field [-S field]...] filesystem | snapshot
+ *                [-S field [-S field]...] filesystem | snapshot | path
  *
  *	-H      Scripted mode; elide headers and separate columns by tabs.
  *	-i	Translate SID to POSIX ID.
@@ -3057,7 +3339,7 @@
 		} while (delim != NULL);
 	}
 
-	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM |
+	if ((zhp = zfs_path_to_zhandle(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 	if (zhp->zfs_head_type != ZFS_TYPE_FILESYSTEM) {
@@ -3192,6 +3474,8 @@
 	boolean_t first = B_TRUE;
 	boolean_t right_justify;
 
+	color_start(ANSI_BOLD);
+
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			(void) printf("  ");
@@ -3218,10 +3502,32 @@
 			(void) printf("%-*s", (int)pl->pl_width, header);
 	}
 
+	color_end();
+
 	(void) printf("\n");
 }
 
 /*
+ * Decides on the color that the avail value should be printed in.
+ * > 80% used = yellow
+ * > 90% used = red
+ */
+static const char *
+zfs_list_avail_color(zfs_handle_t *zhp)
+{
+	uint64_t used = zfs_prop_get_int(zhp, ZFS_PROP_USED);
+	uint64_t avail = zfs_prop_get_int(zhp, ZFS_PROP_AVAILABLE);
+	int percentage = (int)((double)avail / MAX(avail + used, 1) * 100);
+
+	if (percentage > 20)
+		return (NULL);
+	else if (percentage > 10)
+		return (ANSI_YELLOW);
+	else
+		return (ANSI_RED);
+}
+
+/*
  * Given a dataset and a list of fields, print out all the properties according
  * to the described layout.
  */
@@ -3284,6 +3590,22 @@
 		}
 
 		/*
+		 * zfs_list_avail_color() needs ZFS_PROP_AVAILABLE + USED
+		 * - so we need another for() search for the USED part
+		 * - when no colors wanted, we can skip the whole thing
+		 */
+		if (use_color() && pl->pl_prop == ZFS_PROP_AVAILABLE) {
+			zprop_list_t *pl2 = cb->cb_proplist;
+			for (; pl2 != NULL; pl2 = pl2->pl_next) {
+				if (pl2->pl_prop == ZFS_PROP_USED) {
+					color_start(zfs_list_avail_color(zhp));
+					/* found it, no need for more loops */
+					break;
+				}
+			}
+		}
+
+		/*
 		 * If this is being called in scripted mode, or if this is the
 		 * last column and it is left-justified, don't include a width
 		 * format specifier.
@@ -3294,6 +3616,9 @@
 			(void) printf("%*s", (int)pl->pl_width, propstr);
 		else
 			(void) printf("%-*s", (int)pl->pl_width, propstr);
+
+		if (pl->pl_prop == ZFS_PROP_AVAILABLE)
+			color_end();
 	}
 
 	(void) printf("\n");
@@ -3470,36 +3795,40 @@
 }
 
 /*
- * zfs rename [-f] <fs | snap | vol> <fs | snap | vol>
+ * zfs rename [-fu] <fs | snap | vol> <fs | snap | vol>
  * zfs rename [-f] -p <fs | vol> <fs | vol>
- * zfs rename -r <snap> <snap>
+ * zfs rename [-u] -r <snap> <snap>
  *
  * Renames the given dataset to another of the same type.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
+ * The '-u' flag prevents file systems from being remounted during rename.
  */
 /* ARGSUSED */
 static int
 zfs_do_rename(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
+	renameflags_t flags = { 0 };
 	int c;
 	int ret = 0;
-	boolean_t recurse = B_FALSE;
+	int types;
 	boolean_t parents = B_FALSE;
-	boolean_t force_unmount = B_FALSE;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "prf")) != -1) {
+	while ((c = getopt(argc, argv, "pruf")) != -1) {
 		switch (c) {
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'r':
-			recurse = B_TRUE;
+			flags.recursive = B_TRUE;
+			break;
+		case 'u':
+			flags.nounmount = B_TRUE;
 			break;
 		case 'f':
-			force_unmount = B_TRUE;
+			flags.forceunmount = B_TRUE;
 			break;
 		case '?':
 		default:
@@ -3528,20 +3857,32 @@
 		usage(B_FALSE);
 	}
 
-	if (recurse && parents) {
+	if (flags.recursive && parents) {
 		(void) fprintf(stderr, gettext("-p and -r options are mutually "
 		    "exclusive\n"));
 		usage(B_FALSE);
 	}
 
-	if (recurse && strchr(argv[0], '@') == 0) {
+	if (flags.nounmount && parents) {
+		(void) fprintf(stderr, gettext("-u and -p options are mutually "
+		    "exclusive\n"));
+		usage(B_FALSE);
+	}
+
+	if (flags.recursive && strchr(argv[0], '@') == 0) {
 		(void) fprintf(stderr, gettext("source dataset for recursive "
 		    "rename must be a snapshot\n"));
 		usage(B_FALSE);
 	}
 
-	if ((zhp = zfs_open(g_zfs, argv[0], parents ? ZFS_TYPE_FILESYSTEM |
-	    ZFS_TYPE_VOLUME : ZFS_TYPE_DATASET)) == NULL)
+	if (flags.nounmount)
+		types = ZFS_TYPE_FILESYSTEM;
+	else if (parents)
+		types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
+	else
+		types = ZFS_TYPE_DATASET;
+
+	if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
 		return (1);
 
 	/* If we were asked and the name looks good, try to create ancestors. */
@@ -3551,7 +3892,7 @@
 		return (1);
 	}
 
-	ret = (zfs_rename(zhp, argv[1], recurse, force_unmount) != 0);
+	ret = (zfs_rename(zhp, argv[1], flags) != 0);
 
 	zfs_close(zhp);
 	return (ret);
@@ -3598,6 +3939,82 @@
 	return (ret);
 }
 
+static int
+zfs_do_redact(int argc, char **argv)
+{
+	char *snap = NULL;
+	char *bookname = NULL;
+	char **rsnaps = NULL;
+	int numrsnaps = 0;
+	argv++;
+	argc--;
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("too few arguments\n"));
+		usage(B_FALSE);
+	}
+
+	snap = argv[0];
+	bookname = argv[1];
+	rsnaps = argv + 2;
+	numrsnaps = argc - 2;
+
+	nvlist_t *rsnapnv = fnvlist_alloc();
+
+	for (int i = 0; i < numrsnaps; i++) {
+		fnvlist_add_boolean(rsnapnv, rsnaps[i]);
+	}
+
+	int err = lzc_redact(snap, bookname, rsnapnv);
+	fnvlist_free(rsnapnv);
+
+	switch (err) {
+	case 0:
+		break;
+	case ENOENT:
+		(void) fprintf(stderr,
+		    gettext("provided snapshot %s does not exist\n"), snap);
+		break;
+	case EEXIST:
+		(void) fprintf(stderr, gettext("specified redaction bookmark "
+		    "(%s) provided already exists\n"), bookname);
+		break;
+	case ENAMETOOLONG:
+		(void) fprintf(stderr, gettext("provided bookmark name cannot "
+		    "be used, final name would be too long\n"));
+		break;
+	case E2BIG:
+		(void) fprintf(stderr, gettext("too many redaction snapshots "
+		    "specified\n"));
+		break;
+	case EINVAL:
+		if (strchr(bookname, '#') != NULL)
+			(void) fprintf(stderr, gettext(
+			    "redaction bookmark name must not contain '#'\n"));
+		else
+			(void) fprintf(stderr, gettext(
+			    "redaction snapshot must be descendent of "
+			    "snapshot being redacted\n"));
+		break;
+	case EALREADY:
+		(void) fprintf(stderr, gettext("attempted to redact redacted "
+		    "dataset or with respect to redacted dataset\n"));
+		break;
+	case ENOTSUP:
+		(void) fprintf(stderr, gettext("redaction bookmarks feature "
+		    "not enabled\n"));
+		break;
+	case EXDEV:
+		(void) fprintf(stderr, gettext("potentially invalid redaction "
+		    "snapshot; full dataset names required\n"));
+		break;
+	default:
+		(void) fprintf(stderr, gettext("internal error: %s\n"),
+		    strerror(errno));
+	}
+
+	return (err);
+}
+
 /*
  * zfs rollback [-rRf] <snapshot>
  *
@@ -4009,6 +4426,7 @@
 	return (-1);
 }
 
+
 /*
  * Send a backup stream to stdout.
  */
@@ -4023,13 +4441,16 @@
 	sendflags_t flags = { 0 };
 	int c, err;
 	nvlist_t *dbgnv = NULL;
-	boolean_t extraverbose = B_FALSE;
+	char *redactbook = NULL;
 
 	struct option long_options[] = {
 		{"replicate",	no_argument,		NULL, 'R'},
+		{"skip-missing",	no_argument,		NULL, 's'},
+		{"redact",	required_argument,	NULL, 'd'},
 		{"props",	no_argument,		NULL, 'p'},
 		{"parsable",	no_argument,		NULL, 'P'},
 		{"dedup",	no_argument,		NULL, 'D'},
+		{"proctitle",	no_argument,		NULL, 'V'},
 		{"verbose",	no_argument,		NULL, 'v'},
 		{"dryrun",	no_argument,		NULL, 'n'},
 		{"large-block",	no_argument,		NULL, 'L'},
@@ -4039,13 +4460,14 @@
 		{"raw",		no_argument,		NULL, 'w'},
 		{"backup",	no_argument,		NULL, 'b'},
 		{"holds",	no_argument,		NULL, 'h'},
-		{"Block Diff",  no_argument,            NULL, 'B'},
+		{"saved",	no_argument,		NULL, 'S'},
+		{"Block-diff",  no_argument,            NULL, 'B'},
 		{0, 0, 0, 0}
 	};
 
 	/* check options */
-	while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLeht:cwbB", long_options,
-	    NULL)) != -1) {
+	while ((c = getopt_long(argc, argv, ":i:I:RsDpvnPLeht:cwbd:SB",
+	    long_options, NULL)) != -1) {
 		switch (c) {
 		case 'i':
 			if (fromname)
@@ -4061,6 +4483,12 @@
 		case 'R':
 			flags.replicate = B_TRUE;
 			break;
+		case 's':
+			flags.skipmissing = B_TRUE;
+			break;
+		case 'd':
+			redactbook = optarg;
+			break;
 		case 'p':
 			flags.props = B_TRUE;
 			break;
@@ -4072,19 +4500,22 @@
 			break;
 		case 'P':
 			flags.parsable = B_TRUE;
-			flags.verbose = B_TRUE;
 			break;
-		case 'B':
-			flags.block_diff = B_TRUE;
+		case 'V':
+			flags.progressastitle = B_TRUE;
 			break;
+        case 'B':
+            flags.block_diff = B_TRUE;
+            break;
 		case 'v':
-			if (flags.verbose)
-				extraverbose = B_TRUE;
-			flags.verbose = B_TRUE;
+			flags.verbosity++;
 			flags.progress = B_TRUE;
 			break;
 		case 'D':
-			flags.dedup = B_TRUE;
+			(void) fprintf(stderr,
+			    gettext("WARNING: deduplicated send is no "
+			    "longer supported.  A regular,\n"
+			    "non-deduplicated stream will be generated.\n\n"));
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
@@ -4107,6 +4538,9 @@
 			flags.embed_data = B_TRUE;
 			flags.largeblock = B_TRUE;
 			break;
+		case 'S':
+			flags.saved = B_TRUE;
+			break;
 		case ':':
 			/*
 			 * If a parameter was not passed, optopt contains the
@@ -4148,29 +4582,22 @@
 		}
 	}
 
-	if (flags.dedup) {
-		(void) fprintf(stderr,
-		    gettext("WARNING: deduplicated send is "
-		    "deprecated, and will be removed in a\n"
-		    "future release. (In the future, the flag will be "
-		    "accepted, but a\n"
-		    "regular, non-deduplicated stream will be "
-		    "generated.)\n\n"));
-	}
+	if ((flags.parsable || flags.progressastitle) && flags.verbosity == 0)
+		flags.verbosity = 1;
 
 	argc -= optind;
 	argv += optind;
 
 	if (resume_token != NULL) {
 		if (fromname != NULL || flags.replicate || flags.props ||
-		    flags.backup || flags.dedup) {
+		    flags.backup || flags.holds ||
+		    flags.saved || redactbook != NULL) {
 			(void) fprintf(stderr,
 			    gettext("invalid flags combined with -t\n"));
 			usage(B_FALSE);
 		}
-		if (argc != 0) {
-			(void) fprintf(stderr, gettext("no additional "
-			    "arguments are permitted with -t\n"));
+		if (argc > 0) {
+			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 	} else {
@@ -4185,6 +4612,29 @@
 		}
 	}
 
+	if (flags.saved) {
+		if (fromname != NULL || flags.replicate || flags.props ||
+		    flags.doall || flags.backup ||
+		    flags.holds || flags.largeblock || flags.embed_data ||
+		    flags.compress || flags.raw || redactbook != NULL) {
+			(void) fprintf(stderr, gettext("incompatible flags "
+			    "combined with saved send flag\n"));
+			usage(B_FALSE);
+		}
+		if (strchr(argv[0], '@') != NULL) {
+			(void) fprintf(stderr, gettext("saved send must "
+			    "specify the dataset with partially-received "
+			    "state\n"));
+			usage(B_FALSE);
+		}
+	}
+
+	if (flags.raw && redactbook != NULL) {
+		(void) fprintf(stderr,
+		    gettext("Error: raw sends may not be redacted.\n"));
+		return (1);
+	}
+
 	if (!flags.dryrun && isatty(STDOUT_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Stream can not be written to a terminal.\n"
@@ -4192,71 +4642,108 @@
 		return (1);
 	}
 
-	if (resume_token != NULL) {
-		return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO,
-		    resume_token));
-	}
-
-	/*
-	 * Special case sending a filesystem, or from a bookmark.
-	 */
-	if (strchr(argv[0], '@') == NULL ||
-	    (fromname && strchr(fromname, '#') != NULL)) {
-		char frombuf[ZFS_MAX_DATASET_NAME_LEN];
-
-		if (flags.replicate || flags.doall || flags.props ||
-		    flags.backup || flags.dedup || flags.holds ||
-                    flags.block_diff ||
-		    (strchr(argv[0], '@') == NULL &&
-		    (flags.dryrun || flags.verbose || flags.progress))) {
-			(void) fprintf(stderr, gettext("Error: "
-			    "Unsupported flag with filesystem or bookmark.\n"));
-			return (1);
-		}
-
+	if (flags.saved) {
 		zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET);
 		if (zhp == NULL)
 			return (1);
 
+		err = zfs_send_saved(zhp, &flags, STDOUT_FILENO,
+		    resume_token);
+		if (err != 0)
+			note_dev_error(errno, STDOUT_FILENO);
+		zfs_close(zhp);
+		return (err != 0);
+	} else if (resume_token != NULL) {
+		err = zfs_send_resume(g_zfs, &flags, STDOUT_FILENO,
+		    resume_token);
+		if (err != 0)
+			note_dev_error(errno, STDOUT_FILENO);
+		return (err);
+	}
+
+	if (flags.skipmissing && !flags.replicate) {
+		(void) fprintf(stderr,
+		    gettext("skip-missing flag can only be used in "
+		    "conjunction with replicate\n"));
+		usage(B_FALSE);
+	}
+
+	/*
+	 * For everything except -R and -I, use the new, cleaner code path.
+	 */
+	if (!(flags.replicate || flags.doall)) {
+		char frombuf[ZFS_MAX_DATASET_NAME_LEN];
+
+		if (fromname != NULL && (strchr(fromname, '#') == NULL &&
+		    strchr(fromname, '@') == NULL)) {
+			/*
+			 * Neither bookmark or snapshot was specified.  Print a
+			 * warning, and assume snapshot.
+			 */
+			(void) fprintf(stderr, "Warning: incremental source "
+			    "didn't specify type, assuming snapshot. Use '@' "
+			    "or '#' prefix to avoid ambiguity.\n");
+			(void) snprintf(frombuf, sizeof (frombuf), "@%s",
+			    fromname);
+			fromname = frombuf;
+		}
 		if (fromname != NULL &&
 		    (fromname[0] == '#' || fromname[0] == '@')) {
 			/*
 			 * Incremental source name begins with # or @.
 			 * Default to same fs as target.
 			 */
+			char tmpbuf[ZFS_MAX_DATASET_NAME_LEN];
+			(void) strlcpy(tmpbuf, fromname, sizeof (tmpbuf));
 			(void) strlcpy(frombuf, argv[0], sizeof (frombuf));
 			cp = strchr(frombuf, '@');
 			if (cp != NULL)
 				*cp = '\0';
-			(void) strlcat(frombuf, fromname, sizeof (frombuf));
+			(void) strlcat(frombuf, tmpbuf, sizeof (frombuf));
 			fromname = frombuf;
 		}
-		err = zfs_send_one(zhp, fromname, STDOUT_FILENO, flags);
+
+		zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET);
+		if (zhp == NULL)
+			return (1);
+		err = zfs_send_one(zhp, fromname, STDOUT_FILENO, &flags,
+		    redactbook);
 		zfs_close(zhp);
+		if (err != 0)
+			note_dev_error(errno, STDOUT_FILENO);
 		return (err != 0);
 	}
 
-	cp = strchr(argv[0], '@');
+	if (fromname != NULL && strchr(fromname, '#')) {
+		(void) fprintf(stderr,
+		    gettext("Error: multiple snapshots cannot be "
+		    "sent from a bookmark.\n"));
+		return (1);
+	}
+
+	if (redactbook != NULL) {
+		(void) fprintf(stderr, gettext("Error: multiple snapshots "
+		    "cannot be sent redacted.\n"));
+		return (1);
+	}
+
+	if ((cp = strchr(argv[0], '@')) == NULL) {
+		(void) fprintf(stderr, gettext("Error: "
+		    "Unsupported flag with filesystem or bookmark.\n"));
+		return (1);
+	}
 	*cp = '\0';
 	toname = cp + 1;
 	zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
-	if (flags.dedup && flags.block_diff) {
-		(void) fprintf(stderr,
-		    gettext("Error: block diffs are not supported with dedup "
-			    "option.\n"));
-		return (1);
-	}
-
-	if ((zhp->zfs_type != ZFS_TYPE_VOLUME) && (flags.block_diff)) {
-		(void) fprintf(stderr,
-		    gettext("Error: block diffs are available for volumes "
-			    "only.\n"));
-		return (1);
-	}
-
+    	if ((zhp->zfs_type != ZFS_TYPE_VOLUME) && (flags.block_diff)) {
+        	(void) fprintf(stderr,
+            		gettext("Error: block diffs are available for volumes "
+                		"only.\n"));
+        	return (1);
+    	}
 	/*
 	 * If they specified the full path to the snapshot, chop off
 	 * everything except the short name of the snapshot, but special
@@ -4293,9 +4780,9 @@
 		flags.doall = B_TRUE;
 
 	err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0,
-	    extraverbose ? &dbgnv : NULL);
+	    flags.verbosity >= 3 ? &dbgnv : NULL);
 
-	if (extraverbose && dbgnv != NULL) {
+	if (flags.verbosity >= 3 && dbgnv != NULL) {
 		/*
 		 * dump_nvlist prints to stdout, but that's been
 		 * redirected to a file.  Make it print to stderr
@@ -4306,6 +4793,7 @@
 		nvlist_free(dbgnv);
 	}
 	zfs_close(zhp);
+	note_dev_error(errno, STDOUT_FILENO);
 
 	return (err != 0);
 }
@@ -4325,7 +4813,7 @@
 		nomem();
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":o:x:dehnuvFsA")) != -1) {
+	while ((c = getopt(argc, argv, ":o:x:dehMnuvFsA")) != -1) {
 		switch (c) {
 		case 'o':
 			if (!parseprop(props, optarg)) {
@@ -4360,6 +4848,9 @@
 		case 'h':
 			flags.skipholds = B_TRUE;
 			break;
+		case 'M':
+			flags.forceunmount = B_TRUE;
+			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
@@ -4496,7 +4987,6 @@
 #define	ZFS_DELEG_PERM_RELEASE		"release"
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 #define	ZFS_DELEG_PERM_BOOKMARK		"bookmark"
-#define	ZFS_DELEG_PERM_REMAP		"remap"
 #define	ZFS_DELEG_PERM_LOAD_KEY		"load-key"
 #define	ZFS_DELEG_PERM_CHANGE_KEY	"change-key"
 
@@ -4524,7 +5014,6 @@
 	{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
 	{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
 	{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
-	{ ZFS_DELEG_PERM_REMAP, ZFS_DELEG_NOTE_REMAP },
 	{ ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY },
 	{ ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY },
 
@@ -4991,10 +5480,17 @@
 					break;
 				}
 
-				if (nice_name != NULL)
+				if (nice_name != NULL) {
 					(void) strlcpy(
 					    node->who_perm.who_ug_name,
 					    nice_name, 256);
+				} else {
+					/* User or group unknown */
+					(void) snprintf(
+					    node->who_perm.who_ug_name,
+					    sizeof (node->who_perm.who_ug_name),
+					    "(unknown: %d)", rid);
+				}
 			}
 
 			uu_avl_insert(avl, node, idx);
@@ -5526,9 +6022,9 @@
 
 				if (p != NULL)
 					rid = p->pw_uid;
-				else {
+				else if (*endch != '\0') {
 					(void) snprintf(errbuf, 256, gettext(
-					    "invalid user %s"), curr);
+					    "invalid user %s\n"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else if (opts->group) {
@@ -5540,9 +6036,9 @@
 
 				if (g != NULL)
 					rid = g->gr_gid;
-				else {
+				else if (*endch != '\0') {
 					(void) snprintf(errbuf, 256, gettext(
-					    "invalid group %s"),  curr);
+					    "invalid group %s\n"),  curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else {
@@ -5568,7 +6064,7 @@
 					rid = g->gr_gid;
 				} else {
 					(void) snprintf(errbuf, 256, gettext(
-					    "invalid user/group %s"), curr);
+					    "invalid user/group %s\n"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			}
@@ -6154,7 +6650,7 @@
 		/*
 		 *  1. collect holds data, set format options
 		 */
-		ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit,
+		ret = zfs_for_each(1, argv + i, flags, types, NULL, NULL, limit,
 		    holds_callback, &cb);
 		if (ret != 0)
 			++errors;
@@ -6322,9 +6818,9 @@
 
 		(void) fprintf(stderr, gettext("cannot share '%s': "
 		    "legacy share\n"), zfs_get_name(zhp));
-		(void) fprintf(stderr, gettext("use share(1M) to "
-		    "share this filesystem, or set "
-		    "sharenfs property on\n"));
+		(void) fprintf(stderr, gettext("use exports(5) or "
+		    "smb.conf(5) to share this filesystem, or set "
+		    "the sharenfs or sharesmb property\n"));
 		return (1);
 	}
 
@@ -6339,7 +6835,7 @@
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
-		(void) fprintf(stderr, gettext("use %s(1M) to "
+		(void) fprintf(stderr, gettext("use %s(8) to "
 		    "%s this filesystem\n"), cmdname, cmdname);
 		return (1);
 	}
@@ -6372,7 +6868,18 @@
 		    zfs_get_name(zhp));
 		return (1);
 	} else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
-		return (0);
+		/*
+		 * When performing a 'zfs mount -a', we skip any mounts for
+		 * datasets that have 'noauto' set. Sharing a dataset with
+		 * 'noauto' set is only allowed if it's mounted.
+		 */
+		if (op == OP_MOUNT)
+			return (0);
+		if (op == OP_SHARE && !zfs_is_mounted(zhp, NULL)) {
+			/* also purge it from existing exports */
+			zfs_unshareall_bypath(zhp, mountpoint);
+			return (0);
+		}
 	}
 
 	/*
@@ -6409,6 +6916,17 @@
 		return (1);
 	}
 
+	if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE)) {
+		if (!explicit)
+			return (0);
+
+		(void) fprintf(stderr, gettext("cannot %s '%s': "
+		    "Dataset is not complete, was created by receiving "
+		    "a redacted zfs send stream.\n"), cmdname,
+		    zfs_get_name(zhp));
+		return (1);
+	}
+
 	/*
 	 * At this point, we have verified that the mountpoint and/or
 	 * shareopts are appropriate for auto management. If the
@@ -6476,25 +6994,8 @@
 			return (1);
 		}
 
-		if (zfs_mount(zhp, options, flags) != 0) {
-			/*
-			 * Check if a mount sneaked in after we checked
-			 */
-			if (!explicit &&
-			    libzfs_errno(g_zfs) == EZFS_MOUNTFAILED) {
-				usleep(10 * MILLISEC);
-				libzfs_mnttab_cache(g_zfs, B_FALSE);
-
-				if (zfs_is_mounted(zhp, NULL)) {
-					(void) fprintf(stderr, gettext(
-					    "Ignoring previous 'already "
-					    "mounted' error for '%s'\n"),
-					    zfs_get_name(zhp));
-					return (0);
-				}
-			}
+		if (zfs_mount(zhp, options, flags) != 0)
 			return (1);
-		}
 		break;
 	}
 
@@ -6511,9 +7012,6 @@
 	time_t now = time(NULL);
 	char info[32];
 
-	/* report 1..n instead of 0..n-1 */
-	++current;
-
 	/* display header if we're here for the first time */
 	if (current == 1) {
 		set_progress_header(gettext("Mounting ZFS filesystems"));
@@ -6584,7 +7082,7 @@
 	int flags = 0;
 
 	/* check options */
-	while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al"))
+	while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:Of" : "al"))
 	    != -1) {
 		switch (c) {
 		case 'a':
@@ -6612,6 +7110,9 @@
 		case 'O':
 			flags |= MS_OVERLAY;
 			break;
+		case 'f':
+			flags |= MS_FORCE;
+			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
@@ -6676,6 +7177,8 @@
 		zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
 		    share_mount_one_cb, &share_mount_state,
 		    op == OP_MOUNT && !(flags & MS_CRYPT));
+		zfs_commit_all_shares();
+
 		ret = share_mount_state.sm_status;
 
 		for (int i = 0; i < cb.cb_used; i++)
@@ -6728,6 +7231,7 @@
 		} else {
 			ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
 			    options);
+			zfs_commit_all_shares();
 			zfs_close(zhp);
 		}
 	}
@@ -6794,19 +7298,6 @@
 	ino_t path_inode;
 
 	/*
-	 * Search for the path in /proc/self/mounts. Rather than looking for the
-	 * specific path, which can be fooled by non-standard paths (i.e. ".."
-	 * or "//"), we stat() the path and search for the corresponding
-	 * (major,minor) device pair.
-	 */
-	if (stat64(path, &statbuf) != 0) {
-		(void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
-		    cmdname, path, strerror(errno));
-		return (1);
-	}
-	path_inode = statbuf.st_ino;
-
-	/*
 	 * Search for the given (major,minor) pair in the mount table.
 	 */
 
@@ -6814,12 +7305,7 @@
 	if (freopen(MNTTAB, "r", mnttab_file) == NULL)
 		return (ENOENT);
 
-	while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) {
-		if (entry.mnt_major == major(statbuf.st_dev) &&
-		    entry.mnt_minor == minor(statbuf.st_dev))
-			break;
-	}
-	if (ret != 0) {
+	if (getextmntent(path, &entry, &statbuf) != 0) {
 		if (op == OP_SHARE) {
 			(void) fprintf(stderr, gettext("cannot %s '%s': not "
 			    "currently mounted\n"), cmdname, path);
@@ -6832,6 +7318,7 @@
 			    strerror(errno));
 		return (ret != 0);
 	}
+	path_inode = statbuf.st_ino;
 
 	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS "
@@ -6874,6 +7361,7 @@
 			    "not currently shared\n"), path);
 		} else {
 			ret = zfs_unshareall_bypath(zhp, path);
+			zfs_commit_all_shares();
 		}
 	} else {
 		char mtpt_prop[ZFS_MAXPROPLEN];
@@ -6915,13 +7403,16 @@
 	char sharesmb[ZFS_MAXPROPLEN];
 
 	/* check options */
-	while ((c = getopt(argc, argv, op == OP_SHARE ? ":a" : "af")) != -1) {
+	while ((c = getopt(argc, argv, op == OP_SHARE ? ":a" : "afu")) != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'f':
-			flags = MS_FORCE;
+			flags |= MS_FORCE;
+			break;
+		case 'u':
+			flags |= MS_CRYPT;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
@@ -7041,6 +7532,7 @@
 				if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
 				    ZFS_CANMOUNT_NOAUTO)
 					continue;
+				break;
 			default:
 				break;
 			}
@@ -7069,8 +7561,9 @@
 			nomem();
 
 		while ((node = uu_avl_walk_next(walk)) != NULL) {
-			uu_avl_remove(tree, node);
+			const char *mntarg = NULL;
 
+			uu_avl_remove(tree, node);
 			switch (op) {
 			case OP_SHARE:
 				if (zfs_unshareall_bytype(node->un_zhp,
@@ -7080,7 +7573,7 @@
 
 			case OP_MOUNT:
 				if (zfs_unmount(node->un_zhp,
-				    node->un_zhp->zfs_name, flags) != 0)
+				    mntarg, flags) != 0)
 					ret = 1;
 				break;
 			}
@@ -7090,6 +7583,9 @@
 			free(node);
 		}
 
+		if (op == OP_SHARE)
+			zfs_commit_shares(protocol);
+
 		uu_avl_walk_end(walk);
 		uu_avl_destroy(tree);
 		uu_avl_pool_destroy(pool);
@@ -7140,8 +7636,8 @@
 				    "unshare '%s': legacy share\n"),
 				    zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
-				    "unshare(1M) to unshare this "
-				    "filesystem\n"));
+				    "exports(5) or smb.conf(5) to unshare "
+				    "this filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_shared(zhp)) {
 				(void) fprintf(stderr, gettext("cannot "
@@ -7159,7 +7655,7 @@
 				    "unmount '%s': legacy "
 				    "mountpoint\n"), zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
-				    "umount(1M) to unmount this "
+				    "umount(8) to unmount this "
 				    "filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_mounted(zhp, NULL)) {
@@ -7181,8 +7677,8 @@
 }
 
 /*
- * zfs unmount -a
- * zfs unmount filesystem
+ * zfs unmount [-fu] -a
+ * zfs unmount [-fu] filesystem
  *
  * Unmount all filesystems, or a specific ZFS filesystem.
  */
@@ -7205,21 +7701,6 @@
 }
 
 static int
-disable_command_idx(char *command)
-{
-	for (int i = 0; i < NCOMMAND; i++) {
-		if (command_table[i].name == NULL)
-			continue;
-
-		if (strcmp(command, command_table[i].name) == 0) {
-			command_table[i].name = NULL;
-			return (0);
-		}
-	}
-	return (1);
-}
-
-static int
 find_command_idx(char *command, int *idx)
 {
 	int i;
@@ -7248,7 +7729,7 @@
 	int c;
 	struct sigaction sa;
 
-	while ((c = getopt(argc, argv, "FHt")) != -1) {
+	while ((c = getopt(argc, argv, "FHth")) != -1) {
 		switch (c) {
 		case 'F':
 			flags |= ZFS_DIFF_CLASSIFY;
@@ -7259,6 +7740,9 @@
 		case 't':
 			flags |= ZFS_DIFF_TIMESTAMP;
 			break;
+		case 'h':
+			flags |= ZFS_DIFF_NO_MANGLE;
+			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
@@ -7322,66 +7806,18 @@
 	return (err != 0);
 }
 
-
 /*
- * zfs remap <filesystem | volume>
+ * zfs bookmark <fs@source>|<fs#source> <fs#bookmark>
  *
- * N.B. The remap command has been disabled and may be removed in the future.
- *
- * Remap the indirect blocks in the given filesystem or volume so that they no
- * longer reference blocks on previously removed vdevs and we can eventually
- * shrink the size of the indirect mapping objects for the previously removed
- * vdevs. Note that remapping all blocks might not be possible and that
- * references from snapshots will still exist and cannot be remapped.
- *
- * This functionality is no longer particularly useful now that the removal
- * code can map large chunks.  Furthermore, explaining what this command
- * does and why it may be useful requires a detailed understanding of the
- * internals of device removal.  These are details users should not be
- * bothered with.  If required, the remap command can be re-enabled by
- * setting the ZFS_REMAP_ENABLED environment variable.
- *
- * > ZFS_REMAP_ENABLED=yes zfs remap <filesystem | volume>
- */
-static int
-zfs_do_remap(int argc, char **argv)
-{
-	const char *fsname;
-	int err = 0;
-	int c;
-
-	/* check options */
-	while ((c = getopt(argc, argv, "")) != -1) {
-		switch (c) {
-		case '?':
-			(void) fprintf(stderr,
-			    gettext("invalid option '%c'\n"), optopt);
-			usage(B_FALSE);
-		}
-	}
-
-	if (argc != 2) {
-		(void) fprintf(stderr, gettext("wrong number of arguments\n"));
-		usage(B_FALSE);
-	}
-
-	fsname = argv[1];
-	err = zfs_remap_indirects(g_zfs, fsname);
-
-	return (err);
-}
-
-/*
- * zfs bookmark <fs@snap> <fs#bmark>
- *
- * Creates a bookmark with the given name from the given snapshot.
+ * Creates a bookmark with the given name from the source snapshot
+ * or creates a copy of an existing source bookmark.
  */
 static int
 zfs_do_bookmark(int argc, char **argv)
 {
-	char snapname[ZFS_MAX_DATASET_NAME_LEN];
-	char bookname[ZFS_MAX_DATASET_NAME_LEN];
-	zfs_handle_t *zhp;
+	char *source, *bookname;
+	char expbuf[ZFS_MAX_DATASET_NAME_LEN];
+	int source_type;
 	nvlist_t *nvl;
 	int ret = 0;
 	int c;
@@ -7401,7 +7837,7 @@
 
 	/* check number of arguments */
 	if (argc < 1) {
-		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
+		(void) fprintf(stderr, gettext("missing source argument\n"));
 		goto usage;
 	}
 	if (argc < 2) {
@@ -7409,50 +7845,72 @@
 		goto usage;
 	}
 
-	if (strchr(argv[0], '@') == NULL) {
+	source = argv[0];
+	bookname = argv[1];
+
+	if (strchr(source, '@') == NULL && strchr(source, '#') == NULL) {
 		(void) fprintf(stderr,
-		    gettext("invalid snapshot name '%s': "
-		    "must contain a '@'\n"), argv[0]);
+		    gettext("invalid source name '%s': "
+		    "must contain a '@' or '#'\n"), source);
 		goto usage;
 	}
-	if (strchr(argv[1], '#') == NULL) {
+	if (strchr(bookname, '#') == NULL) {
 		(void) fprintf(stderr,
 		    gettext("invalid bookmark name '%s': "
-		    "must contain a '#'\n"), argv[1]);
+		    "must contain a '#'\n"), bookname);
 		goto usage;
 	}
 
-	if (argv[0][0] == '@') {
-		/*
-		 * Snapshot name begins with @.
-		 * Default to same fs as bookmark.
-		 */
-		(void) strlcpy(snapname, argv[1], sizeof (snapname));
-		*strchr(snapname, '#') = '\0';
-		(void) strlcat(snapname, argv[0], sizeof (snapname));
-	} else {
-		(void) strlcpy(snapname, argv[0], sizeof (snapname));
-	}
-	if (argv[1][0] == '#') {
-		/*
-		 * Bookmark name begins with #.
-		 * Default to same fs as snapshot.
-		 */
-		(void) strlcpy(bookname, argv[0], sizeof (bookname));
-		*strchr(bookname, '@') = '\0';
-		(void) strlcat(bookname, argv[1], sizeof (bookname));
-	} else {
-		(void) strlcpy(bookname, argv[1], sizeof (bookname));
+	/*
+	 * expand source or bookname to full path:
+	 * one of them may be specified as short name
+	 */
+	{
+		char **expand;
+		char *source_short, *bookname_short;
+		source_short = strpbrk(source, "@#");
+		bookname_short = strpbrk(bookname, "#");
+		if (source_short == source &&
+		    bookname_short == bookname) {
+			(void) fprintf(stderr, gettext(
+			    "either source or bookmark must be specified as "
+			    "full dataset paths"));
+			goto usage;
+		} else if (source_short != source &&
+		    bookname_short != bookname) {
+			expand = NULL;
+		} else if (source_short != source) {
+			strlcpy(expbuf, source, sizeof (expbuf));
+			expand = &bookname;
+		} else if (bookname_short != bookname) {
+			strlcpy(expbuf, bookname, sizeof (expbuf));
+			expand = &source;
+		} else {
+			abort();
+		}
+		if (expand != NULL) {
+			*strpbrk(expbuf, "@#") = '\0'; /* dataset name in buf */
+			(void) strlcat(expbuf, *expand, sizeof (expbuf));
+			*expand = expbuf;
+		}
 	}
 
-	zhp = zfs_open(g_zfs, snapname, ZFS_TYPE_SNAPSHOT);
+	/* determine source type */
+	switch (*strpbrk(source, "@#")) {
+		case '@': source_type = ZFS_TYPE_SNAPSHOT; break;
+		case '#': source_type = ZFS_TYPE_BOOKMARK; break;
+		default: abort();
+	}
+
+	/* test the source exists */
+	zfs_handle_t *zhp;
+	zhp = zfs_open(g_zfs, source, source_type);
 	if (zhp == NULL)
 		goto usage;
 	zfs_close(zhp);
 
-
 	nvl = fnvlist_alloc();
-	fnvlist_add_string(nvl, bookname, snapname);
+	fnvlist_add_string(nvl, bookname, source);
 	ret = lzc_bookmark(nvl, NULL);
 	fnvlist_free(nvl);
 
@@ -7468,6 +7926,10 @@
 		case EXDEV:
 			err_msg = "bookmark is in a different pool";
 			break;
+		case ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR:
+			err_msg = "source is not an ancestor of the "
+			    "new bookmark's dataset";
+			break;
 		case EEXIST:
 			err_msg = "bookmark exists";
 			break;
@@ -8126,6 +8588,90 @@
 	return (ret);
 }
 
+static int
+zfs_do_wait(int argc, char **argv)
+{
+	boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
+	int error = 0, i;
+	int c;
+
+	/* By default, wait for all types of activity. */
+	for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
+		enabled[i] = B_TRUE;
+
+	while ((c = getopt(argc, argv, "t:")) != -1) {
+		switch (c) {
+		case 't':
+		{
+			static char *col_subopts[] = { "deleteq", NULL };
+			char *value;
+
+			/* Reset activities array */
+			bzero(&enabled, sizeof (enabled));
+			while (*optarg != '\0') {
+				int activity = getsubopt(&optarg, col_subopts,
+				    &value);
+
+				if (activity < 0) {
+					(void) fprintf(stderr,
+					    gettext("invalid activity '%s'\n"),
+					    value);
+					usage(B_FALSE);
+				}
+
+				enabled[activity] = B_TRUE;
+			}
+			break;
+		}
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argv += optind;
+	argc -= optind;
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing 'filesystem' "
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM);
+	if (zhp == NULL)
+		return (1);
+
+	for (;;) {
+		boolean_t missing = B_FALSE;
+		boolean_t any_waited = B_FALSE;
+
+		for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) {
+			boolean_t waited;
+
+			if (!enabled[i])
+				continue;
+
+			error = zfs_wait_status(zhp, i, &missing, &waited);
+			if (error != 0 || missing)
+				break;
+
+			any_waited = (any_waited || waited);
+		}
+
+		if (error != 0 || missing || !any_waited)
+			break;
+	}
+
+	zfs_close(zhp);
+
+	return (error);
+}
+
 /*
  * Display version message
  */
@@ -8147,6 +8693,7 @@
 	char **newargv;
 
 	(void) setlocale(LC_ALL, "");
+	(void) setlocale(LC_NUMERIC, "C");
 	(void) textdomain(TEXT_DOMAIN);
 
 	opterr = 0;
@@ -8180,13 +8727,6 @@
 		cmdname = "snapshot";
 
 	/*
-	 * The 'remap' command has been disabled and may be removed in the
-	 * future.  See the comment above zfs_do_remap() for details.
-	 */
-	if (!libzfs_envvar_is_set("ZFS_REMAP_ENABLED"))
-		disable_command_idx("remap");
-
-	/*
 	 * Special case '-?'
 	 */
 	if ((strcmp(cmdname, "-?") == 0) ||
@@ -8210,6 +8750,8 @@
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
 
+	zfs_setproctitle_init(argc, argv, environ);
+
 	/*
 	 * Many commands modify input strings for string parsing reasons.
 	 * We create a copy to protect the original argv.
@@ -8257,3 +8799,67 @@
 
 	return (ret);
 }
+
+#ifdef __FreeBSD__
+#include <sys/jail.h>
+#include <jail.h>
+/*
+ * Attach/detach the given dataset to/from the given jail
+ */
+/* ARGSUSED */
+static int
+zfs_do_jail_impl(int argc, char **argv, boolean_t attach)
+{
+	zfs_handle_t *zhp;
+	int jailid, ret;
+
+	/* check number of arguments */
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("missing argument(s)\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 3) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	jailid = jail_getid(argv[1]);
+	if (jailid < 0) {
+		(void) fprintf(stderr, gettext("invalid jail id or name\n"));
+		usage(B_FALSE);
+	}
+
+	zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
+	if (zhp == NULL)
+		return (1);
+
+	ret = (zfs_jail(zhp, jailid, attach) != 0);
+
+	zfs_close(zhp);
+	return (ret);
+}
+
+/*
+ * zfs jail jailid filesystem
+ *
+ * Attach the given dataset to the given jail
+ */
+/* ARGSUSED */
+static int
+zfs_do_jail(int argc, char **argv)
+{
+	return (zfs_do_jail_impl(argc, argv, B_TRUE));
+}
+
+/*
+ * zfs unjail jailid filesystem
+ *
+ * Detach the given dataset from the given jail
+ */
+/* ARGSUSED */
+static int
+zfs_do_unjail(int argc, char **argv)
+{
+	return (zfs_do_jail_impl(argc, argv, B_FALSE));
+}
+#endif

diff --git a/zfs/cmd/zfs/zfs_project.c b/zfs/cmd/zfs/zfs_project.c
index 341cc00..2484975 100644
--- a/zfs/cmd/zfs/zfs_project.c
+++ b/zfs/cmd/zfs/zfs_project.c

@@ -207,7 +207,6 @@
 zfs_project_handle_dir(const char *name, zfs_project_control_t *zpc,
     list_t *head)
 {
-	char fullname[PATH_MAX];
 	struct dirent *ent;
 	DIR *dir;
 	int ret = 0;
@@ -227,21 +226,28 @@
 	zpc->zpc_ignore_noent = B_TRUE;
 	errno = 0;
 	while (!ret && (ent = readdir(dir)) != NULL) {
+		char *fullname;
+
 		/* skip "." and ".." */
 		if (strcmp(ent->d_name, ".") == 0 ||
 		    strcmp(ent->d_name, "..") == 0)
 			continue;
 
-		if (strlen(ent->d_name) + strlen(name) >=
-		    sizeof (fullname) + 1) {
+		if (strlen(ent->d_name) + strlen(name) + 1 >= PATH_MAX) {
 			errno = ENAMETOOLONG;
 			break;
 		}
 
-		sprintf(fullname, "%s/%s", name, ent->d_name);
+		if (asprintf(&fullname, "%s/%s", name, ent->d_name) == -1) {
+			errno = ENOMEM;
+			break;
+		}
+
 		ret = zfs_project_handle_one(fullname, zpc);
 		if (!ret && zpc->zpc_recursive && ent->d_type == DT_DIR)
 			zfs_project_item_alloc(head, fullname);
+
+		free(fullname);
 	}
 
 	if (errno && !ret) {

diff --git a/zfs/cmd/zfs_ids_to_path/.gitignore b/zfs/cmd/zfs_ids_to_path/.gitignore
new file mode 100644
index 0000000..f95f853
--- /dev/null
+++ b/zfs/cmd/zfs_ids_to_path/.gitignore

@@ -0,0 +1 @@
+zfs_ids_to_path

diff --git a/zfs/cmd/zfs_ids_to_path/Makefile.am b/zfs/cmd/zfs_ids_to_path/Makefile.am
new file mode 100644
index 0000000..5494267
--- /dev/null
+++ b/zfs/cmd/zfs_ids_to_path/Makefile.am

@@ -0,0 +1,11 @@
+include $(top_srcdir)/config/Rules.am
+
+sbin_PROGRAMS = zfs_ids_to_path
+
+zfs_ids_to_path_SOURCES = \
+	zfs_ids_to_path.c
+
+zfs_ids_to_path_LDADD = \
+        $(abs_top_builddir)/lib/libzfs/libzfs.la
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zfs_ids_to_path/zfs_ids_to_path.c b/zfs/cmd/zfs_ids_to_path/zfs_ids_to_path.c
new file mode 100644
index 0000000..1d3bb6b
--- /dev/null
+++ b/zfs/cmd/zfs_ids_to_path/zfs_ids_to_path.c

@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2019 by Delphix. All rights reserved.
+ */
+#include <libintl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <libzfs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+libzfs_handle_t *g_zfs;
+
+static void
+usage(int err)
+{
+	fprintf(stderr, "Usage: zfs_ids_to_path [-v] <pool> <objset id> "
+	    "<object id>\n");
+	exit(err);
+}
+
+int
+main(int argc, char **argv)
+{
+	boolean_t verbose = B_FALSE;
+	int c;
+	while ((c = getopt(argc, argv, "v")) != -1) {
+		switch (c) {
+		case 'v':
+			verbose = B_TRUE;
+			break;
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 3) {
+		(void) fprintf(stderr, "Incorrect number of arguments: %d\n",
+		    argc);
+		usage(1);
+	}
+
+	uint64_t objset, object;
+	if (sscanf(argv[1], "%llu", (u_longlong_t *)&objset) != 1) {
+		(void) fprintf(stderr, "Invalid objset id: %s\n", argv[1]);
+		usage(2);
+	}
+	if (sscanf(argv[2], "%llu", (u_longlong_t *)&object) != 1) {
+		(void) fprintf(stderr, "Invalid object id: %s\n", argv[2]);
+		usage(3);
+	}
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
+		return (4);
+	}
+	zpool_handle_t *pool = zpool_open(g_zfs, argv[0]);
+	if (pool == NULL) {
+		fprintf(stderr, "Could not open pool %s\n", argv[0]);
+		libzfs_fini(g_zfs);
+		return (5);
+	}
+
+	char pathname[PATH_MAX * 2];
+	if (verbose) {
+		zpool_obj_to_path_ds(pool, objset, object, pathname,
+		    sizeof (pathname));
+	} else {
+		zpool_obj_to_path(pool, objset, object, pathname,
+		    sizeof (pathname));
+	}
+	printf("%s\n", pathname);
+	zpool_close(pool);
+	libzfs_fini(g_zfs);
+	return (0);
+}

diff --git a/zfs/cmd/zfstool/Makefile.am b/zfs/cmd/zfstool/Makefile.am
index 72ac092..419f2e2 100644
--- a/zfs/cmd/zfstool/Makefile.am
+++ b/zfs/cmd/zfstool/Makefile.am

@@ -12,6 +12,7 @@
 	zst.h \
 	bitmap.c \
 	bitmap.h \
+	zfstlog.h \
 	fc_bitmap.h
 
 zfstool_LDADD = \

diff --git a/zfs/cmd/zfstool/bitmap.c b/zfs/cmd/zfstool/bitmap.c
index 2b30a65..d3fe0c0 100644
--- a/zfs/cmd/zfstool/bitmap.c
+++ b/zfs/cmd/zfstool/bitmap.c

@@ -15,6 +15,10 @@
 extern int verbosity;
 
 #define COMPRESSION_LEVEL 6
+void bitmap_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
+bool is_buffer_zeroed(unsigned char *buffer, uint64_t size);
+int pwrite_data(const int fd, const void *buf, size_t size, off_t write_offset);
+int bitmap_write_sparse(const int fd, uint64_t bitmap_size_bytes, unsigned char *buf);
 
 ram_bitmap_t *bitmap_create(uint64_t volsize, unsigned int grainsize)
 {
@@ -122,8 +126,7 @@
         }
 }
 
-void bitmap_fwrite(const void *ptr, size_t size, size_t nmemb,
-    FILE *stream)
+void bitmap_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
 {
         if (fwrite(ptr, size, nmemb, stream) != nmemb)
         {
@@ -238,7 +241,7 @@
 {
         int exists = 0;
         struct stat sb;
-        size_t nread;
+        // size_t nread;
         FILE *bm;
         unsigned char * compressed_bitmap;
 
@@ -268,7 +271,8 @@
 			exit(-1);
 		}
                 fseek(bm, 0, size);
-                nread = fread(&free_bitmap->bmhdr, sizeof(free_bitmap->bmhdr), 1, bm);
+                fread(&free_bitmap->bmhdr, sizeof(free_bitmap->bmhdr), 1, bm);
+                // nread = fread(&free_bitmap->bmhdr, sizeof(free_bitmap->bmhdr), 1, bm);
 	} else {
 		bm = fopen(holemap, "w+b");
 		if(!bm) {

diff --git a/zfs/cmd/zfstool/test.bash b/zfs/cmd/zfstool/test.bash
index f8a8af2..78d21b1 100644
--- a/zfs/cmd/zfstool/test.bash
+++ b/zfs/cmd/zfstool/test.bash

@@ -1,5 +1,6 @@
 #!/bin/bash
 
+# Copyright 2014-2023 Google LLC
 # test bitmap generation based on ZFS send stream
 
 usage()
@@ -10,7 +11,7 @@
 	-h - help
 	-H - test holes
 	-E - test embedded data feature
-	-n number - number of test interations, default 1
+	-n number - number of test iterations, default 1
 	-d - debug mode, leave the bitmap and the pool artifacts for further inspection (forces n = 1)
 	-V size (bytes) - volume size; default 10M
 	-b size (bytes) - blocks size; default 64k
@@ -28,7 +29,7 @@
 test_embedded=0
 volsize=$((10*1024*1024))
 blocksize=$((64*1024))
-grainsize=$((128*1024))
+grainsize=$((64*1024))
 verbose=0
 poolname="test_pool"
 devspec="/tmp/vd1 /tmp/vd2"

diff --git a/zfs/cmd/zfstool/zfstlog.h b/zfs/cmd/zfstool/zfstlog.h
new file mode 100644
index 0000000..b98cc07
--- /dev/null
+++ b/zfs/cmd/zfstool/zfstlog.h

@@ -0,0 +1,24 @@
+#ifndef __ZFSTOOL_H__
+#define __ZFSTOOL_H__
+
+
+extern void debug_print(int level, const char * file, const char * function, int line, const char * fmt, ...);
+
+#define LOGLEV_ERR	0
+#define LOGLEV_INFO 	1
+#define LOGLEV_DEBUG 	2
+
+
+#ifndef NODEBUG_LOG
+#define LOG_TRACE()	debug_print(LOGLEV_ERR, __FILE__, __FUNCTION__, __LINE__, "")
+#define LOG_ERR(fmt, ...) debug_print(LOGLEV_ERR, __FILE__, __FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
+#define LOG_INFO(fmt, ...) debug_print(LOGLEV_INFO, __FILE__, __FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
+#define LOG_DEBUG(fmt, ...) debug_print(LOGLEV_DEBUG,__FILE__, __FUNCTION__, __LINE__, fmt, ##__VA_ARGS__)
+#else
+#define LOG_TRACE()
+#define LOG_ERR(fmt, ...)
+#define LOG_INFO(fmt, ...)
+#define LOG_DEBUG(fmt, ...)
+#endif
+
+#endif

diff --git a/zfs/cmd/zfstool/zfstool.c b/zfs/cmd/zfstool/zfstool.c
index 1377305..a4d624c 100644
--- a/zfs/cmd/zfstool/zfstool.c
+++ b/zfs/cmd/zfstool/zfstool.c

@@ -1,7 +1,5 @@
-/* ============================================================
- * Copyright (c) 2014 Actifio Inc. All Rights Reserved
- */
-
+// Copyright 2023 Google LLC
+//
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
@@ -10,6 +8,16 @@
 
 #include "bitmap.h"
 
+#include "zfstlog.h"
+
+extern int usage(void);
+extern int post_process_bitmap_pair_zst(ram_bitmap_pair_t *bitmap_pair, const char* clone1_holes_file);
+extern ram_bitmap_pair_t * generate_bitmap_zst(const char *parent, const char *child, int separate_bitmaps, const char* clone1_holes_file);
+extern int create_bitmap_zst(const char *parent, const char *child, const char *bitmap, int separate_bitmaps, const char *clone1_holes_file);
+extern ram_bitmap_t * generate_bitmap_raw(const char *parent, const char *child, uint64_t byte_offset, uint64_t byte_length);
+extern int verify_bitmaps(const char *parent, const char *child, uint64_t byte_offset, uint64_t byte_length, int separate_bitmaps, const char* clone1_holes);
+extern void show_bitmap(char *bitmap_file, uint64_t offset);
+
 #ifndef ZFS_MAXNAMELEN
 #define ZFS_MAXNAMELEN  (MAXNAMELEN - 1)
 #endif
@@ -20,6 +28,19 @@
 int debug = 0;
 unsigned int grainsize = GRAINSIZE;
 
+void debug_print(int level, const char * file, const char * function, int line, const char * fmt, ...)
+{
+	char buffer[2048];
+        va_list ap;
+	if(verbosity >= level)
+	{
+		va_start(ap, fmt);
+		vsnprintf(buffer, sizeof(buffer), fmt, ap);
+		va_end(ap);
+		fprintf(stderr,"[%s:%3d] (%s) %s\n", file, line, function, buffer);
+	}
+}
+
 int usage(void)
 {
     printf("Bitmap generation usage:\n");
@@ -75,6 +96,7 @@
 static int
 write_callback(int type, void *arg1, void *arg2)
 {
+    LOG_TRACE();
     struct drr_write *drrw = (struct drr_write *)arg1;
     zst_callback_arg_t *arg = (zst_callback_arg_t *)arg2;
 
@@ -82,17 +104,19 @@
 
     /* skip non-zvol object */
     if (drrw->drr_object != ZVOL_OBJECT)
+    {
+        LOG_ERR("drrw->drr_object != ZVOL_OBJECT");
         return (0);
+    }
 
     if (drrw->drr_offset + drrw->drr_logical_size > arg->child_volsize) {
-        fprintf(stderr, "Write [0x%" PRIx64 ":0x%" PRIx64 ") outside zvol "
-                "byte range [0x0:0x%" PRIx64 ")\n",
+        LOG_INFO( "Write [0x%" PRIx64 ":0x%" PRIx64 ") outside zvol "
+                "byte range [0x0:0x%" PRIx64 ")",
                 drrw->drr_offset, drrw->drr_logical_size, arg->child_volsize);
         return (EINVAL);
     }
 
-    if (verbosity > 1)
-        fprintf(stderr, "Write record found [0x%" PRIx64 ":0x%" PRIx64 ")\n",
+    LOG_INFO( "Write record found [0x%" PRIx64 ":0x%" PRIx64 ")",
                 drrw->drr_offset, drrw->drr_offset + drrw->drr_logical_size);
 
     /* make a call to change the bitmap */
@@ -104,14 +128,15 @@
 static int
 write_embedded_callback(int type, void *arg1, void *arg2)
 {
+    LOG_TRACE();
     struct drr_write_embedded *drrwe = (struct drr_write_embedded *)arg1;
     zst_callback_arg_t *arg = (zst_callback_arg_t *)arg2;
 
     assert(type == DRR_WRITE_EMBEDDED);
 
     if (drrwe->drr_offset + drrwe->drr_length > arg->child_volsize) {
-        fprintf(stderr, "Write [%" PRIx64 ":%" PRIx64 ") outside zvol "
-                "byte range [0:%" PRIx64 ")\n",
+        LOG_INFO( "Write [%" PRIx64 ":%" PRIx64 ") outside zvol "
+                "byte range [0:%" PRIx64 ")",
                 drrwe->drr_offset, drrwe->drr_length, arg->child_volsize);
         return (EINVAL);
     }
@@ -120,8 +145,7 @@
     if (drrwe->drr_object != ZVOL_OBJECT)
         return (0);
 
-    if (verbosity)
-        fprintf(stderr, "Write record found [%" PRIx64 ":%" PRIx64 ")\n",
+    LOG_INFO( "Write record found [%" PRIx64 ":%" PRIx64 ")",
                 drrwe->drr_offset, drrwe->drr_offset + drrwe->drr_length);
 
     /* make a call to change the bitmap */
@@ -133,6 +157,7 @@
 static int
 free_callback(int type, void *arg1, void *arg2)
 {
+    LOG_TRACE();
     struct drr_free *drrf = (struct drr_free *)arg1;
     zst_callback_arg_t *arg = (zst_callback_arg_t *)arg2;
 
@@ -143,12 +168,11 @@
         return (0);
 
     if (drrf->drr_length == ZVOL_OBJLEN_MARKER) {
-        if (verbosity > 1)
-            fprintf(stderr, "Truncate record found; offset 0x%" PRIx64 "\n",
+        LOG_INFO( "Truncate record found; offset 0x%" PRIx64 ,
                     drrf->drr_offset);
         if (drrf->drr_offset > arg->child_volsize) {
-            fprintf(stderr, "Offset 0x%" PRIx64 " in trancate record is "
-                    "greater than child_volsize 0x%" PRIx64 "\n",
+            LOG_INFO( "Offset 0x%" PRIx64 " in trancate record is "
+                    "greater than child_volsize 0x%" PRIx64 ,
                     drrf->drr_offset, arg->child_volsize);
             return (EINVAL);
         }
@@ -160,9 +184,8 @@
         return (0);
     }
 
-    if (verbosity > 1)
-        fprintf(stderr, "Free record found [0x%" PRIx64 ":0x%" PRIx64 ")\n",
-                drrf->drr_offset, drrf->drr_offset + drrf->drr_length);
+    LOG_INFO( "Free record found [0x%" PRIx64 ":0x%" PRIx64 ")",
+            drrf->drr_offset, drrf->drr_offset + drrf->drr_length);
 
     /*
      * it is necessary to keep track of new holes that can be ligitimately
@@ -170,24 +193,21 @@
      */
 
     if (drrf->drr_offset + drrf->drr_length > arg->child_volsize) {
-        if (verbosity > 1)
-            fprintf(stderr, "\tThis free record goes outside the larger zvol "
+        LOG_INFO( "\tThis free record goes outside the larger zvol "
                     "byte range [0x0:0x%"PRIx64"), perhaps due to previously "
-                    "encountered truncate\n", arg->child_volsize);
+                    "encountered truncate", arg->child_volsize);
         if (drrf->drr_offset >= arg->child_volsize) {
-            if (verbosity > 1)
-                fprintf(stderr, "\tThis free record if entirely outside "
+            LOG_INFO( "\tThis free record if entirely outside "
                         "zvol byte range [0x0:0x%"PRIx64"), perhaps due to "
-                        "previously encountered truncate\n",
+                        "previously encountered truncate",
                         arg->child_volsize);
             return (0);
         }
         /*
-         * adjust the lenght such that the change is within the zvol byte range
+         * adjust the length such that the change is within the zvol byte range
          */
         drrf->drr_length = arg->child_volsize - drrf->drr_offset;
-        if (verbosity > 1)
-            fprintf(stderr, "Adjusted record to [0x%" PRIx64 ":0x%" PRIx64 ")\n",
+        LOG_INFO( "Adjusted record to [0x%" PRIx64 ":0x%" PRIx64 ")",
                     drrf->drr_offset, drrf->drr_offset + drrf->drr_length);
     }
 
@@ -198,12 +218,10 @@
      * bitmap by writing free bits into the diff_bitmap.
      */
     if (arg->bitmap_pair->free_bitmap) {
-	if (verbosity > 1)
-	    fprintf(stderr, "Generating separate bitmap diffs for free region\n");
+	LOG_INFO( "Generating separate bitmap diffs for free region");
 	bitmap_addbit(arg->bitmap_pair->free_bitmap, drrf->drr_offset, drrf->drr_length);
     } else {
-	if (verbosity > 1)
-	    fprintf(stderr, "Generating consolidated bitmap diffs for free region\n");
+	 LOG_INFO( "Generating consolidated bitmap diffs for free region");
 	bitmap_addbit(arg->bitmap_pair->diff_bitmap, drrf->drr_offset, drrf->drr_length);
     }
 
@@ -346,14 +364,14 @@
 
     /* init libzfs handle */
     if ((g_zfs = libzfs_init()) == NULL) {
-        fprintf(stderr, "Failed to initialize libzfs handle\n");
+        LOG_ERR( "Failed to initialize libzfs handle");
         exit(-1);
     }
 
     /* must be zvols */
     if ((parent_zhp = zfs_open(g_zfs, parent, ZFS_TYPE_VOLUME)) == NULL ||
         (child_zhp = zfs_open(g_zfs, child, ZFS_TYPE_VOLUME)) == NULL) {
-        fprintf(stderr, "Parent %s or child %s is not a volume\n",
+        LOG_ERR( "Parent %s or child %s is not a volume",
                 parent, child);
         exit(-1);
     }
@@ -381,13 +399,13 @@
     /* must be a snapshot name */
     if (NULL == strchr(parent_origin, '@') ||
         NULL == strchr(child_origin, '@')) {
-        fprintf(stderr, "Origin of parent %s or child %s (%s/%s) is not a "
-                "snapshot\n", parent, child, parent_origin, child_origin);
+        LOG_INFO( "Origin of parent %s or child %s (%s/%s) is not a "
+                "snapshot", parent, child, parent_origin, child_origin);
     }
 
     if (parent_volsize > child_volsize) {
-        fprintf(stderr, "Parent volume size 0x%" PRIx64 " cannot be larger "
-                "than child volume size 0x%" PRIx64 "\n",
+        LOG_ERR( "Parent volume size 0x%" PRIx64 " cannot be larger "
+                "than child volume size 0x%" PRIx64 ,
                 parent_volsize, child_volsize);
         exit(-1);
     }
@@ -395,7 +413,7 @@
     /* create the bitmap */
     bitmap_pair = bitmap_pair_create(child_volsize, grainsize);
     if (bitmap_pair == NULL) {
-        fprintf(stderr, "Failed to create bitmaps, not enough memory\n");
+        LOG_ERR( "Failed to create bitmaps, not enough memory");
         exit(-1);
     }
 
@@ -413,20 +431,22 @@
     /* run zfs send and get the output in a pipe */
     (void) snprintf(buffer, sizeof(buffer), "zfs send -B -i %s %s",
                     parent_origin, child_origin);
+    LOG_DEBUG("Running zfs command: %s", buffer);
+
     diff_fp = popen(buffer, "r");
     if (NULL == diff_fp) {
-        fprintf(stderr, "failed to run %s\n", buffer);
+        LOG_ERR( "failed to run %s", buffer);
         exit(-1);
     }
     diff_fd = fileno(diff_fp);
     if (diff_fd == -1) {
-        fprintf(stderr, "invaid file descriptor from popen(%s)\n", buffer);
+        LOG_ERR( "invalid file descriptor from popen(%s)", buffer);
         exit(-1);
     }
 
     /* init send stream traversal handle */
     if ((hdl = zst_init(diff_fd)) == NULL) {
-        fprintf(stderr, "zst_init() failed, not enough memory\n");
+        LOG_ERR( "zst_init() failed, not enough memory");
         exit(-1);
     }
 
@@ -443,19 +463,19 @@
 
     for (i = 0; i < 3; i++) {
         if (zst_register_callback(hdl, &zc[i]) < 0) {
-            fprintf(stderr, "zst_register_callback() failed\n");
+            LOG_ERR( "zst_register_callback() failed");
             exit(-1);
         }
     }
 
     /* invoke traversal */
     if ((rc = zst_traverse(hdl))) {
-        fprintf(stderr, "zst_traverse() error: %d\n", rc);
+        LOG_ERR( "zst_traverse() error: %d", rc);
     }
 
     /* fini handle */
     if (zst_fini(hdl) < 0) {
-        fprintf(stderr, "zsf_fini() error\n");
+        LOG_ERR( "zsf_fini() error");
         exit(-1);
     }
 
@@ -473,16 +493,16 @@
 	 * if using the separate bitmap optimization
 	 */
 	if (blocksize % grainsize) {
-	    fprintf(stderr,
+	    LOG_ERR(
 		    "cannot use separate bitmaps when blocksize %" PRIu64 
-		    " is not a multiple of grainsize %d\n",
+		    " is not a multiple of grainsize %d",
 		    blocksize, grainsize);
 	    exit(-1);
 	}
 
 	rc = post_process_bitmap_pair_zst(bitmap_pair, clone1_holes_file);
 	if (rc) {
-	    fprintf(stderr, "bitmap post-process error\n");
+	    LOG_ERR( "bitmap post-process error");
 	    exit(-1);
 	}
     }
@@ -499,19 +519,21 @@
 
     if ((bitmap_pair = generate_bitmap_zst(parent, child,
 					   separate_bitmaps,
-					   clone1_holes_file)) == NULL)
+					   clone1_holes_file)) == NULL){
+    	LOG_ERR("generate_bitmap_zst failed");
         return (-1);
+    }
 
     if (separate_bitmaps) {
-        /*
-        * get path for the new holes file - 'bitmap-path'.holes\0
-        * write out the new holes
-        */
-        char *new_path = malloc(strlen(bitmap)+strlen(".holes")+1);
-        strcpy(new_path, bitmap);
-        strcat(new_path, ".holes");
+	/*
+	 * get path for the new holes file - 'bitmap-path'.holes\0
+	 * write out the new holes
+	 */
+	char *new_path = malloc(strlen(bitmap)+strlen(".holes")+1);
+	strcpy(new_path, bitmap);
+	strcat(new_path, ".holes");
         holemap_write(bitmap_pair->free_bitmap, new_path);
-        free(new_path);
+	free(new_path);
     }
     
     bitmap_write(bitmap_pair->diff_bitmap, bitmap);
@@ -541,14 +563,14 @@
 
     /* init libzfs handle */
     if ((g_zfs = libzfs_init()) == NULL) {
-        fprintf(stderr, "Failed to initialize libzfs handle\n");
+        LOG_ERR( "Failed to initialize libzfs handle");
         exit(-1);
     }
 
     /* must be zvols */
     if ((parent_zhp = zfs_open(g_zfs, parent, ZFS_TYPE_VOLUME)) == NULL ||
         (child_zhp = zfs_open(g_zfs, child, ZFS_TYPE_VOLUME)) == NULL) {
-        fprintf(stderr, "Parent %s or child %s is not a volume\n",
+        LOG_ERR( "Parent %s or child %s is not a volume",
                 parent, child);
         exit(-1);
     }
@@ -567,17 +589,17 @@
     libzfs_fini(g_zfs);
 
     if (parent_volsize > child_volsize) {
-        fprintf(stderr, "Parent volume size 0x%" PRIx64 " cannot be larger "
-                "than child volume size 0x%" PRIx64 "\n",
+        LOG_ERR( "Parent volume size 0x%" PRIx64 " cannot be larger "
+                "than child volume size 0x%" PRIx64 "",
                 parent_volsize, child_volsize);
         exit(-1);
     }
 
     /* volume sizes should be integer number of grains */
     if (parent_volsize % grainsize || child_volsize % grainsize) {
-        fprintf(stderr, "Parent volume size 0x%" PRIx64
+        LOG_ERR( "Parent volume size 0x%" PRIx64
                 "or child volume size 0x%" PRIx64
-                "is not a multiple of grain size 0x%x\n",
+                "is not a multiple of grain size 0x%x",
                 parent_volsize, child_volsize, grainsize);
         exit(-1);
     }
@@ -585,20 +607,24 @@
     /* create the bitmap */
     diff_bitmap = bitmap_create(child_volsize, grainsize);
     if (diff_bitmap == NULL) {
-        fprintf(stderr, "Failed to create bitmap, not enough memory\n");
+        LOG_ERR( "Failed to create bitmap, not enough memory");
         return (NULL);
     }
     /* open parent and child */
-    sprintf(buffer, "/dev/zvol/%s", parent);
+    snprintf(buffer, sizeof(buffer), "/dev/zvol/%s", parent);
     if ((fd[PARENT] = open(buffer, O_RDONLY)) < 0) {
-	sprintf(buffer, "parent %s open() failed", parent);
-	perror(buffer);
+        char errbuf[256];
+	memset(errbuf,0,sizeof(errbuf));
+	strerror_r(errno, errbuf, sizeof(errbuf)-1);
+	LOG_ERR("%s: parent %s open() failed", errbuf, parent);
         exit(-1);
     }
-    sprintf(buffer, "/dev/zvol/%s", child);
+    snprintf(buffer, sizeof(buffer), "/dev/zvol/%s", child);
     if ((fd[CHILD] = open(buffer, O_RDONLY)) < 0) {
-	sprintf(buffer, "child %s open() failed", child);
-        perror(buffer);
+        char errbuf[256];
+	memset(errbuf,0,sizeof(errbuf));
+	strerror_r(errno, errbuf, sizeof(errbuf)-1);
+	LOG_ERR("%s: child %s open() failed", errbuf, child);
         exit(-1);
     }
 
@@ -620,8 +646,7 @@
                     break;
             }
             if (rd < grainsize) {
-                fprintf(stderr, "read() of %s failed",
-                        (a == PARENT) ? "parent" : "child");
+                LOG_ERR( "read() of %s failed", (a == PARENT) ? "parent" : "child");
                 exit(-1);
             }
         }
@@ -651,7 +676,7 @@
                     break;
             }
             if (rd < grainsize) {
-                fprintf(stderr, "read() of child failed");
+                LOG_ERR( "read() of child failed");
                 exit(-1);
             }
 
@@ -737,7 +762,7 @@
 	    printf("Bitmaps are saved for further analysis in /tmp/bitmap.*\n");
 	    bitmap_write(bmap_pair_zst->diff_bitmap, "/tmp/bitmap.diff");
 	    if (bmap_pair_zst->free_bitmap)
-		holemap_write(bmap_pair_zst->free_bitmap, "/tmp/bitmap.hole");
+		bitmap_write(bmap_pair_zst->free_bitmap, "/tmp/bitmap.hole");
 	    bitmap_write(bmap_raw, "/tmp/bitmap.raw");
     }
 
@@ -795,7 +820,7 @@
             break;
         case '?':
             usage();
-            fprintf(stderr, "invalid argument %c\n", optopt);
+            LOG_ERR( "invalid argument %c\n", optopt);
             return (-1);
         }
     }
@@ -813,7 +838,7 @@
 	for (mask = grainsize - 1; (mask && (mask & 1)); mask >>= 1)
 	    ;
 	if (mask) {
-	    fprintf(stderr, "grainsize %u is not a power of two\n", grainsize);
+	    LOG_ERR( "grainsize %u is not a power of two\n", grainsize);
 	}
     } while (0);
 	

diff --git a/zfs/cmd/zfstool/zst.c b/zfs/cmd/zfstool/zst.c
index 9751b02..64e96df 100644
--- a/zfs/cmd/zfstool/zst.c
+++ b/zfs/cmd/zfstool/zst.c

@@ -1,12 +1,13 @@
-/* ============================================================
- * Copyright (c) 2014 Actifio Inc. All Rights Reserved
- */
-
+// Copyright 2014-2023 Google LLC
+//
 #include <stdio.h>
 #include <zfs_fletcher.h>
 #include <sys/zfs_ioctl.h>
 
 #include "zst.h"
+#include "zfstlog.h"
+
+
 
 /* private type definitions */
 struct zst_handle {
@@ -88,6 +89,7 @@
 sread(int fd, char *buffer, size_t length)
 {
     ssize_t n = 0;
+    int retries=4;
 
     while (n < length) {
         ssize_t io = read(fd, buffer, length);
@@ -95,7 +97,13 @@
         if (io < 0)
             return (-1);
         else if (io == 0)
-            return (n);
+	{
+	    if(--retries <= 0)
+                return (n);
+	    else
+		usleep(1000);
+	    LOG_DEBUG("Retrying read: %s", strerror(errno));
+	}
         else
             n += io;
     }
@@ -109,6 +117,8 @@
 {
     ssize_t io = sread(hdl->fd, hdl->buf, length);
 
+    LOG_DEBUG("Read %lu bytes should have read %lu", io, length);
+
     if (io != length)
         return NULL;
 
@@ -122,16 +132,17 @@
 get_begin_section(zst_handle_t *hdl, struct drr_begin *drrb)
 {
     if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
-        fprintf(stderr, "Non-native stream format\n");
+        LOG_ERR("Non-native stream format");
         hdl->err = EINVAL;
         return (hdl->err);
     }
     if (drrb->drr_magic != DMU_BACKUP_MAGIC) {
-        fprintf(stderr, "Invalid stream format\n");
+        LOG_ERR("Invalid stream format");
         hdl->err = EINVAL;
         return (hdl->err);
     }
 
+    LOG_DEBUG("hdl->zc_array[DRR_BEGIN].cb=%d", hdl->zc_array[DRR_BEGIN].cb);
     if (hdl->zc_array[DRR_BEGIN].cb) {
         zst_callback_descr_t *d = &hdl->zc_array[DRR_BEGIN];
         return ((*d->cb)(DRR_BEGIN, (void *)drrb, d->arg));
@@ -144,7 +155,7 @@
 get_object_section(zst_handle_t *hdl, struct drr_object *drro)
 {
     int rc = 0;
-    void *data = NULL;
+    // void *data = NULL;
 
     if (drro->drr_type == DMU_OT_NONE ||
         !DMU_OT_IS_VALID(drro->drr_type) ||
@@ -159,7 +170,8 @@
     }
 
     if (drro->drr_bonuslen) {
-        data = get_next_section(hdl, P2ROUNDUP(drro->drr_bonuslen, 8));
+        // data = 
+	get_next_section(hdl, P2ROUNDUP(drro->drr_bonuslen, 8));
         if (hdl->err != 0)
             return (hdl->err);
     }
@@ -278,7 +290,10 @@
     zio_cksum_t pcksum = {{0}};
 
     if (hdl == NULL)
+    {
+	    LOG_ERR( "invalid handle");
         return (EINVAL);
+    }
 
     /*
      * Go through the records invoking the registered callbacks
@@ -290,10 +305,13 @@
     while ((hdl->err == 0) &&
            ((drr = get_next_section(hdl, sizeof (*drr))) != NULL)) {
 
+        LOG_INFO( "type=%d",drr->drr_type);
+	
         switch (drr->drr_type) {
         case DRR_BEGIN:
         {
             struct drr_begin drrb = drr->drr_u.drr_begin;
+            LOG_INFO( "DRR_BEGIN");
             hdl->err = get_begin_section(hdl, &drrb);
             break;
         }
@@ -305,6 +323,7 @@
              * value, because the stored checksum is of
              * everything before the DRR_END record.
              */
+            LOG_INFO( "DRR_END");
             if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
                 hdl->err = ECKSUM;
             if (hdl->zc_array[DRR_END].cb) {
@@ -317,6 +336,7 @@
         case DRR_OBJECT:
         {
             struct drr_object drro = drr->drr_u.drr_object;
+            LOG_INFO( "DRR_OBJECT");
             hdl->err = get_object_section(hdl, &drro);
             break;
         }
@@ -324,12 +344,14 @@
         {
             struct drr_freeobjects drrfo =
                 drr->drr_u.drr_freeobjects;
+            LOG_INFO( "DRR_FREEOBJECT");
             hdl->err = get_freeobjects_section(hdl, &drrfo);
             break;
         }
         case DRR_WRITE:
         {
             struct drr_write drrw = drr->drr_u.drr_write;
+            LOG_INFO( "DRR_WRITE");
             hdl->err = get_write_section(hdl, &drrw);
             break;
         }
@@ -337,6 +359,7 @@
         {
             struct drr_write_embedded drrwe =
                 drr->drr_u.drr_write_embedded;
+            LOG_INFO( "DRR_EMBEDDED");
             hdl->err = get_write_embedded_section(hdl, &drrwe);
             break;
         }
@@ -344,23 +367,28 @@
         {
             struct drr_write_byref drrwbr =
                 drr->drr_u.drr_write_byref;
+            LOG_INFO( "DRR_BYREF");
             hdl->err = get_write_byref_section(hdl, &drrwbr);
             break;
         }
         case DRR_FREE:
         {
             struct drr_free drrf = drr->drr_u.drr_free;
+            LOG_INFO( "DRR_FREE");
             hdl->err = get_free_section(hdl, &drrf);
             break;
         }
         case DRR_SPILL:
         {
             struct drr_spill drrs = drr->drr_u.drr_spill;
+            LOG_INFO( "DRR_SPILL");
             hdl->err = get_spill_section(hdl, &drrs);
             break;
         }
         default:
-            hdl->err = EINVAL;
+            LOG_INFO( "default");
+         //   hdl->err = EINVAL;
+              hdl->err = 0;
         }
 
         pcksum = hdl->cksum;
@@ -369,8 +397,7 @@
 done:
     /* checked for well-formed stream */
     if (drr == NULL || hdl->err) {
-        fprintf(stderr, "Incomplete/invalid stream format, status %d\n",
-                hdl->err);
+        LOG_ERR( "Incomplete/invalid stream format, status %d (%d)", hdl->err, (drr) ? drr->drr_type : 9999999);
         return (-1);
     }
 

diff --git a/zfs/cmd/zfstool/zst.h b/zfs/cmd/zfstool/zst.h
index e4af919..b2b2756 100644
--- a/zfs/cmd/zfstool/zst.h
+++ b/zfs/cmd/zfstool/zst.h

@@ -1,7 +1,5 @@
-/* ============================================================
- * Copyright (c) 2014 Actifio Inc. All Rights Reserved
- */
-
+// Copyright 2014-2023 Google LLC
+//
 #ifndef ZST_H
 #define ZST_H
 

diff --git a/zfs/cmd/zgenhostid/.gitignore b/zfs/cmd/zgenhostid/.gitignore
new file mode 100644
index 0000000..072246c
--- /dev/null
+++ b/zfs/cmd/zgenhostid/.gitignore

@@ -0,0 +1 @@
+/zgenhostid

diff --git a/zfs/cmd/zgenhostid/Makefile.am b/zfs/cmd/zgenhostid/Makefile.am
index 69c99ca..4526a90 100644
--- a/zfs/cmd/zgenhostid/Makefile.am
+++ b/zfs/cmd/zgenhostid/Makefile.am

@@ -1 +1,7 @@
-dist_bin_SCRIPTS = zgenhostid
+include $(top_srcdir)/config/Rules.am
+
+sbin_PROGRAMS = zgenhostid
+
+zgenhostid_SOURCES = zgenhostid.c
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zgenhostid/zgenhostid b/zfs/cmd/zgenhostid/zgenhostid
deleted file mode 100755
index db690ec..0000000
--- a/zfs/cmd/zgenhostid/zgenhostid
+++ /dev/null

@@ -1,61 +0,0 @@
-#!/bin/bash
-
-# Emulate genhostid(1) available on RHEL/CENTOS, for use on distros
-# which do not provide that utility.
-#
-# Usage:
-#    zgenhostid
-#    zgenhostid <value>
-#
-# If /etc/hostid already exists and is size > 0, the script exits immediately
-# and changes nothing.  Unlike genhostid, this generates an error message.
-#
-# The first form generates a random hostid and stores it in /etc/hostid.
-# The second form checks that the provided value is between 0x1 and 0xFFFFFFFF
-# and if so, stores it in /etc/hostid.  This form is not supported by
-# genhostid(1).
-
-hostid_file=/etc/hostid
-
-function usage {
-	echo "$0 [value]"
-	echo "If $hostid_file is not present, store a hostid in it." >&2
-	echo "The optional value must be an 8-digit hex number between" >&2
-	echo "1 and 2^32-1.  If no value is provided, a random one will" >&2
-	echo "be generated.  The value must be unique among your systems." >&2
-}
-
-# hostid(1) ignores contents of /etc/hostid if size < 4 bytes.  It would
-# be better if this checked size >= 4 bytes but it the method must be
-# widely portable.
-if [ -s $hostid_file ]; then
-	echo "$hostid_file already exists.  No change made." >&2
-	exit 1
-fi
-
-if [ -n "$1" ]; then
-	host_id=$1
-else
-	# $RANDOM goes from 0..32k-1
-	number=$((((RANDOM % 4) * 32768 + RANDOM) * 32768 + RANDOM))
-	host_id=$(printf "%08x" $number)
-fi
-
-if egrep -o '^0{8}$' <<< $host_id >/dev/null 2>&1; then
-	usage
-	exit 2
-fi
-
-if ! egrep -o '^[a-fA-F0-9]{8}$' <<< $host_id >/dev/null 2>&1; then
-	usage
-	exit 3
-fi
-
-a=${host_id:6:2}
-b=${host_id:4:2}
-c=${host_id:2:2}
-d=${host_id:0:2}
-
-echo -ne \\x$a\\x$b\\x$c\\x$d > $hostid_file
-
-exit 0

diff --git a/zfs/cmd/zgenhostid/zgenhostid.c b/zfs/cmd/zgenhostid/zgenhostid.c
new file mode 100644
index 0000000..4a4ca80
--- /dev/null
+++ b/zfs/cmd/zgenhostid/zgenhostid.c

@@ -0,0 +1,142 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020, Georgy Yakovlev.  All rights reserved.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <unistd.h>
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr,
+	    "usage: zgenhostid [-fh] [-o path] [value]\n\n"
+	    "  -f\t\t force hostid file write\n"
+	    "  -h\t\t print this usage and exit\n"
+	    "  -o <filename>\t write hostid to this file\n\n"
+	    "If hostid file is not present, store a hostid in it.\n"
+	    "The optional value should be an 8-digit hex number between"
+	    " 1 and 2^32-1.\n"
+	    "If the value is 0 or no value is provided, a random one"
+	    " will be generated.\n"
+	    "The value must be unique among your systems.\n");
+	exit(EXIT_FAILURE);
+	/* NOTREACHED */
+}
+
+int
+main(int argc, char **argv)
+{
+	/* default file path, can be optionally set by user */
+	const char *path = "/etc/hostid";
+	/* holds converted user input or lrand48() generated value */
+	unsigned long input_i = 0;
+
+	int opt;
+	int force_fwrite = 0;
+	while ((opt = getopt_long(argc, argv, "fo:h?", 0, 0)) != -1) {
+		switch (opt) {
+		case 'f':
+			force_fwrite = 1;
+			break;
+		case 'o':
+			path = optarg;
+			break;
+		case 'h':
+		case '?':
+			usage();
+		}
+	}
+
+	char *in_s = argv[optind];
+	if (in_s != NULL) {
+		/* increment pointer by 2 if string is 0x prefixed */
+		if (strncasecmp("0x", in_s, 2) == 0) {
+			in_s += 2;
+		}
+
+		/* need to be exactly 8 characters */
+		const char *hex = "0123456789abcdefABCDEF";
+		if (strlen(in_s) != 8 || strspn(in_s, hex) != 8) {
+			fprintf(stderr, "%s\n", strerror(ERANGE));
+			usage();
+		}
+
+		input_i = strtoul(in_s, NULL, 16);
+		if (errno != 0) {
+			perror("strtoul");
+			exit(EXIT_FAILURE);
+		}
+
+		if (input_i > UINT32_MAX) {
+			fprintf(stderr, "%s\n", strerror(ERANGE));
+			usage();
+		}
+	}
+
+	struct stat fstat;
+	if (force_fwrite == 0 && stat(path, &fstat) == 0 &&
+	    S_ISREG(fstat.st_mode)) {
+		fprintf(stderr, "%s: %s\n", path, strerror(EEXIST));
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * generate if not provided by user
+	 * also handle unlikely zero return from lrand48()
+	 */
+	while (input_i == 0) {
+		srand48(getpid() ^ time(NULL));
+		input_i = lrand48();
+	}
+
+	FILE *fp = fopen(path, "wb");
+	if (!fp) {
+		perror("fopen");
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * we need just 4 bytes in native endianness
+	 * not using sethostid() because it may be missing or just a stub
+	 */
+	uint32_t hostid = input_i;
+	int written = fwrite(&hostid, 1, 4, fp);
+	if (written != 4) {
+		perror("fwrite");
+		exit(EXIT_FAILURE);
+	}
+
+	fclose(fp);
+	exit(EXIT_SUCCESS);
+}

diff --git a/zfs/cmd/zhack/Makefile.am b/zfs/cmd/zhack/Makefile.am
index 6e3e706..23f03ff 100644
--- a/zfs/cmd/zhack/Makefile.am
+++ b/zfs/cmd/zhack/Makefile.am

@@ -1,8 +1,7 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# Unconditionally enable debugging for zhack
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
 
 sbin_PROGRAMS = zhack
 
@@ -10,5 +9,8 @@
 	zhack.c
 
 zhack_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzpool/libzpool.la
+	$(abs_top_builddir)/lib/libzpool/libzpool.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zhack/zhack.c b/zfs/cmd/zhack/zhack.c
index 57e497f..0826312 100644
--- a/zfs/cmd/zhack/zhack.c
+++ b/zfs/cmd/zhack/zhack.c

@@ -103,8 +103,8 @@
 
 /* ARGSUSED */
 static int
-space_delta_cb(dmu_object_type_t bonustype, void *data,
-    uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
+space_delta_cb(dmu_object_type_t bonustype, const void *data,
+    zfs_file_info_t *zoi)
 {
 	/*
 	 * Is it a valid type of object to track?
@@ -126,7 +126,8 @@
 	nvlist_t *props;
 	int error;
 
-	kernel_init(readonly ? FREAD : (FREAD | FWRITE));
+	kernel_init(readonly ? SPA_MODE_READ :
+	    (SPA_MODE_READ | SPA_MODE_WRITE));
 
 	dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
 
@@ -149,6 +150,7 @@
 	zfeature_checks_disable = B_TRUE;
 	error = spa_import(target, config, props,
 	    (readonly ?  ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
+	fnvlist_free(config);
 	zfeature_checks_disable = B_FALSE;
 	if (error == EEXIST)
 		error = 0;

diff --git a/zfs/cmd/zinject/Makefile.am b/zfs/cmd/zinject/Makefile.am
index ab7f4de..40f382c 100644
--- a/zfs/cmd/zinject/Makefile.am
+++ b/zfs/cmd/zinject/Makefile.am

@@ -1,9 +1,5 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 sbin_PROGRAMS = zinject
 
 zinject_SOURCES = \
@@ -12,5 +8,8 @@
 	zinject.h
 
 zinject_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzfs/libzfs.la
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zinject/translate.c b/zfs/cmd/zinject/translate.c
index d4795d0..4939c0b 100644
--- a/zfs/cmd/zinject/translate.c
+++ b/zfs/cmd/zinject/translate.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  */
 
 #include <libzfs.h>
@@ -85,8 +85,6 @@
     struct stat64 *statbuf)
 {
 	struct extmnttab mp;
-	FILE *fp;
-	int match;
 	const char *rel;
 	char fullpath[MAXPATHLEN];
 
@@ -99,35 +97,7 @@
 		return (-1);
 	}
 
-	if (strlen(fullpath) >= MAXPATHLEN) {
-		(void) fprintf(stderr, "invalid object; pathname too long\n");
-		return (-1);
-	}
-
-	if (stat64(fullpath, statbuf) != 0) {
-		(void) fprintf(stderr, "cannot open '%s': %s\n",
-		    fullpath, strerror(errno));
-		return (-1);
-	}
-
-#ifdef HAVE_SETMNTENT
-	if ((fp = setmntent(MNTTAB, "r")) == NULL) {
-#else
-	if ((fp = fopen(MNTTAB, "r")) == NULL) {
-#endif
-		(void) fprintf(stderr, "cannot open %s\n", MNTTAB);
-		return (-1);
-	}
-
-	match = 0;
-	while (getextmntent(fp, &mp, sizeof (mp)) == 0) {
-		if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) {
-			match = 1;
-			break;
-		}
-	}
-
-	if (!match) {
+	if (getextmntent(fullpath, &mp, statbuf) != 0) {
 		(void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
 		    fullpath);
 		return (-1);
@@ -418,7 +388,7 @@
 		record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 		break;
 	case TYPE_LABEL_PAD2:
-		record->zi_start = offsetof(vdev_label_t, vl_pad2);
+		record->zi_start = offsetof(vdev_label_t, vl_be);
 		record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 		break;
 	}

diff --git a/zfs/cmd/zinject/zinject.c b/zfs/cmd/zinject/zinject.c
index 1795bfd..bf97b0d 100644
--- a/zfs/cmd/zinject/zinject.c
+++ b/zfs/cmd/zinject/zinject.c

@@ -159,8 +159,6 @@
 libzfs_handle_t *g_zfs;
 int zfs_fd;
 
-#define	ECKSUM	EBADE
-
 static const char *errtable[TYPE_INVAL] = {
 	"data",
 	"dnode",
@@ -340,7 +338,7 @@
 	zfs_cmd_t zc = {"\0"};
 	int ret;
 
-	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
+	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
 		    &zc.zc_inject_record, data)) != 0)
 			return (ret);
@@ -508,7 +506,7 @@
 
 	zc.zc_guid = (uint64_t)id;
 
-	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
 		    id, strerror(errno));
 		return (1);
@@ -541,7 +539,7 @@
 
 	zc.zc_guid = (uint64_t)id;
 
-	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
 		    id, strerror(errno));
 		return (1);
@@ -565,7 +563,7 @@
 	zc.zc_inject_record = *record;
 	zc.zc_guid = flags;
 
-	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
+	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
 		(void) fprintf(stderr, "failed to add handler: %s\n",
 		    errno == EDOM ? "block level exceeds max level of object" :
 		    strerror(errno));
@@ -615,7 +613,7 @@
 	return (0);
 }
 
-int
+static int
 perform_action(const char *pool, zinject_record_t *record, int cmd)
 {
 	zfs_cmd_t zc = {"\0"};
@@ -625,7 +623,7 @@
 	zc.zc_guid = record->zi_guid;
 	zc.zc_cookie = cmd;
 
-	if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	return (1);

diff --git a/zfs/cmd/zpool/Makefile.am b/zfs/cmd/zpool/Makefile.am
index c03da94..fa494c0 100644
--- a/zfs/cmd/zpool/Makefile.am
+++ b/zfs/cmd/zpool/Makefile.am

@@ -1,8 +1,9 @@
 include $(top_srcdir)/config/Rules.am
+include $(top_srcdir)/config/Shellcheck.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+AM_CFLAGS += $(LIBBLKID_CFLAGS) $(LIBUUID_CFLAGS)
+
+DEFAULT_INCLUDES += -I$(srcdir)
 
 sbin_PROGRAMS = zpool
 
@@ -13,19 +14,37 @@
 	zpool_util.h \
 	zpool_vdev.c
 
-zpool_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libuutil/libuutil.la \
-	$(top_builddir)/lib/libzfs/libzfs.la
+if BUILD_FREEBSD
+zpool_SOURCES += os/freebsd/zpool_vdev_os.c
+endif
 
-zpool_LDADD += -lm $(LIBBLKID)
+if BUILD_LINUX
+zpool_SOURCES += os/linux/zpool_vdev_os.c
+endif
+
+zpool_LDADD = \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libuutil/libuutil.la \
+	$(abs_top_builddir)/lib/libzutil/libzutil.la
+
+zpool_LDADD += $(LTLIBINTL)
+
+if BUILD_FREEBSD
+zpool_LDADD += -lgeom
+endif
+zpool_LDADD += -lm $(LIBBLKID_LIBS) $(LIBUUID_LIBS)
+
+include $(top_srcdir)/config/CppCheck.am
 
 zpoolconfdir = $(sysconfdir)/zfs/zpool.d
 zpoolexecdir = $(zfsexecdir)/zpool.d
 
-EXTRA_DIST = zpool.d/README
+EXTRA_DIST = zpool.d/README compatibility.d
 
 dist_zpoolexec_SCRIPTS = \
+	zpool.d/dm-deps \
 	zpool.d/enc \
 	zpool.d/encdev \
 	zpool.d/fault_led \
@@ -40,7 +59,6 @@
 	zpool.d/serial \
 	zpool.d/ses \
 	zpool.d/size \
-	zpool.d/slaves \
 	zpool.d/slot \
 	zpool.d/smart \
 	zpool.d/smartx \
@@ -70,6 +88,7 @@
 	zpool.d/test_ended
 
 zpoolconfdefaults = \
+	dm-deps \
 	enc \
 	encdev \
 	fault_led \
@@ -84,7 +103,6 @@
 	serial \
 	ses \
 	size \
-	slaves \
 	slot \
 	smart \
 	smartx \
@@ -113,6 +131,52 @@
 	test_progress \
 	test_ended
 
+zpoolcompatdir = $(pkgdatadir)/compatibility.d
+
+dist_zpoolcompat_DATA = \
+	compatibility.d/compat-2018 \
+	compatibility.d/compat-2019 \
+	compatibility.d/compat-2020 \
+	compatibility.d/compat-2021 \
+	compatibility.d/freebsd-11.0 \
+	compatibility.d/freebsd-11.2 \
+	compatibility.d/freebsd-11.3 \
+	compatibility.d/freenas-9.10.2 \
+	compatibility.d/grub2 \
+	compatibility.d/openzfsonosx-1.7.0 \
+	compatibility.d/openzfsonosx-1.8.1 \
+	compatibility.d/openzfsonosx-1.9.3 \
+	compatibility.d/openzfs-2.0-freebsd \
+	compatibility.d/openzfs-2.0-linux \
+	compatibility.d/openzfs-2.1-freebsd \
+	compatibility.d/openzfs-2.1-linux \
+	compatibility.d/zol-0.6.1 \
+	compatibility.d/zol-0.6.4 \
+	compatibility.d/zol-0.6.5 \
+	compatibility.d/zol-0.7 \
+	compatibility.d/zol-0.8
+
+# canonical <- alias symbolic link pairs
+# eg: "2018" is a link to "compat-2018"
+zpoolcompatlinks = \
+	"compat-2018		2018" \
+	"compat-2019		2019" \
+	"compat-2020		2020" \
+	"compat-2021		2021" \
+	"freebsd-11.0		freebsd-11.1" \
+	"freebsd-11.0		freenas-11.0" \
+	"freebsd-11.2		freenas-11.2" \
+	"freebsd-11.3		freebsd-11.4" \
+	"freebsd-11.3		freebsd-12.0" \
+	"freebsd-11.3		freebsd-12.1" \
+	"freebsd-11.3		freebsd-12.2" \
+	"freebsd-11.3		freenas-11.3" \
+	"freenas-11.0		freenas-11.1" \
+	"openzfsonosx-1.9.3	openzfsonosx-1.9.4" \
+	"openzfs-2.0-freebsd	truenas-12.0" \
+	"zol-0.7		ubuntu-18.04" \
+	"zol-0.8		ubuntu-20.04"
+
 install-data-hook:
 	$(MKDIR_P) "$(DESTDIR)$(zpoolconfdir)"
 	for f in $(zpoolconfdefaults); do \
@@ -120,3 +184,6 @@
 	       -L "$(DESTDIR)$(zpoolconfdir)/$${f}" || \
 	    ln -s "$(zpoolexecdir)/$${f}" "$(DESTDIR)$(zpoolconfdir)"; \
 	done
+	for l in $(zpoolcompatlinks); do \
+		(cd "$(DESTDIR)$(zpoolcompatdir)"; ln -sf $${l} ); \
+	done

diff --git a/zfs/cmd/zpool/compatibility.d/compat-2018 b/zfs/cmd/zpool/compatibility.d/compat-2018
new file mode 100644
index 0000000..7be44e1
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/compat-2018

@@ -0,0 +1,12 @@
+# Features supported by all Tier 1 platforms as of 2018
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/compat-2019 b/zfs/cmd/zpool/compatibility.d/compat-2019
new file mode 100644
index 0000000..c105cc7
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/compat-2019

@@ -0,0 +1,15 @@
+# Features supported by all Tier 1 platforms as of 2019
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+sha512
+skein
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/compat-2020 b/zfs/cmd/zpool/compatibility.d/compat-2020
new file mode 100644
index 0000000..8d46a57
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/compat-2020

@@ -0,0 +1,15 @@
+# Features supported by all Tier 1 platforms as of 2020
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+sha512
+skein
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/compat-2021 b/zfs/cmd/zpool/compatibility.d/compat-2021
new file mode 100644
index 0000000..f45c82d
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/compat-2021

@@ -0,0 +1,19 @@
+# Features supported by all Tier 1 platforms as of 2021
+async_destroy
+bookmarks
+device_removal
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+zpool_checkpoint

diff --git a/zfs/cmd/zpool/compatibility.d/freebsd-11.0 b/zfs/cmd/zpool/compatibility.d/freebsd-11.0
new file mode 100644
index 0000000..8718559
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/freebsd-11.0

@@ -0,0 +1,15 @@
+# Features supported by FreeBSD 11.0
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+sha512
+skein
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/freebsd-11.2 b/zfs/cmd/zpool/compatibility.d/freebsd-11.2
new file mode 100644
index 0000000..14d2d57
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/freebsd-11.2

@@ -0,0 +1,18 @@
+# Features supported by FreeBSD 11.2
+async_destroy
+bookmarks
+device_removal
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+sha512
+skein
+spacemap_histogram
+zpool_checkpoint

diff --git a/zfs/cmd/zpool/compatibility.d/freebsd-11.3 b/zfs/cmd/zpool/compatibility.d/freebsd-11.3
new file mode 100644
index 0000000..802cc36
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/freebsd-11.3

@@ -0,0 +1,19 @@
+# Features supported by FreeBSD 11.3
+async_destroy
+bookmarks
+device_removal
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+zpool_checkpoint

diff --git a/zfs/cmd/zpool/compatibility.d/freenas-9.10.2 b/zfs/cmd/zpool/compatibility.d/freenas-9.10.2
new file mode 100644
index 0000000..10789c9
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/freenas-9.10.2

@@ -0,0 +1,13 @@
+# Features supported by FreeNAS 9.10.2
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/grub2 b/zfs/cmd/zpool/compatibility.d/grub2
new file mode 100644
index 0000000..4e8f213
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/grub2

@@ -0,0 +1,12 @@
+# Features which are supported by GRUB2
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/openzfs-2.0-freebsd b/zfs/cmd/zpool/compatibility.d/openzfs-2.0-freebsd
new file mode 100644
index 0000000..e7ee2f2
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfs-2.0-freebsd

@@ -0,0 +1,33 @@
+# Features supported by OpenZFS 2.0 on FreeBSD
+allocation_classes
+async_destroy
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+livelist
+log_spacemap
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+redacted_datasets
+redaction_bookmarks
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+zpool_checkpoint
+zstd_compress

diff --git a/zfs/cmd/zpool/compatibility.d/openzfs-2.0-linux b/zfs/cmd/zpool/compatibility.d/openzfs-2.0-linux
new file mode 100644
index 0000000..ac0f5c8
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfs-2.0-linux

@@ -0,0 +1,34 @@
+# Features supported by OpenZFS 2.0 on Linux
+allocation_classes
+async_destroy
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+livelist
+log_spacemap
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+redacted_datasets
+redaction_bookmarks
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+zpool_checkpoint
+zstd_compress

diff --git a/zfs/cmd/zpool/compatibility.d/openzfs-2.1-freebsd b/zfs/cmd/zpool/compatibility.d/openzfs-2.1-freebsd
new file mode 100644
index 0000000..9fde997
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfs-2.1-freebsd

@@ -0,0 +1,34 @@
+# Features supported by OpenZFS 2.1 on FreeBSD
+allocation_classes
+async_destroy
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+draid
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+livelist
+log_spacemap
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+redacted_datasets
+redaction_bookmarks
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+zpool_checkpoint
+zstd_compress

diff --git a/zfs/cmd/zpool/compatibility.d/openzfs-2.1-linux b/zfs/cmd/zpool/compatibility.d/openzfs-2.1-linux
new file mode 100644
index 0000000..c3ff176
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfs-2.1-linux

@@ -0,0 +1,35 @@
+# Features supported by OpenZFS 2.1 on Linux
+allocation_classes
+async_destroy
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+draid
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+livelist
+log_spacemap
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+redacted_datasets
+redaction_bookmarks
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+zpool_checkpoint
+zstd_compress

diff --git a/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.7.0 b/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.7.0
new file mode 100644
index 0000000..4ae87c9
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.7.0

@@ -0,0 +1,16 @@
+# Features supported by OpenZFSonOSX 1.7.0
+async_destroy
+bookmarks
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+sha512
+skein
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1 b/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1
new file mode 100644
index 0000000..162ff32
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1

@@ -0,0 +1,21 @@
+# Features supported by OpenZFSonOSX 1.8.1
+async_destroy
+bookmarks
+device_removal
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+zpool_checkpoint

diff --git a/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.9.3 b/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.9.3
new file mode 100644
index 0000000..b0b28ec
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/openzfsonosx-1.9.3

@@ -0,0 +1,27 @@
+# Features supported by OpenZFSonOSX 1.9.3
+allocation_classes
+async_destroy
+bookmark_v2
+bookmarks
+device_removal
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+zpool_checkpoint

diff --git a/zfs/cmd/zpool/compatibility.d/zol-0.6.1 b/zfs/cmd/zpool/compatibility.d/zol-0.6.1
new file mode 100644
index 0000000..9bc963d
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/zol-0.6.1

@@ -0,0 +1,4 @@
+# Features supported by ZFSonLinux v0.6.1
+async_destroy
+empty_bpobj
+lz4_compress

diff --git a/zfs/cmd/zpool/compatibility.d/zol-0.6.4 b/zfs/cmd/zpool/compatibility.d/zol-0.6.4
new file mode 100644
index 0000000..82a2698
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/zol-0.6.4

@@ -0,0 +1,10 @@
+# Features supported by ZFSonLinux v0.6.4
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+hole_birth
+lz4_compress
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/zol-0.6.5 b/zfs/cmd/zpool/compatibility.d/zol-0.6.5
new file mode 100644
index 0000000..cb9a94d
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/zol-0.6.5

@@ -0,0 +1,12 @@
+# Features supported by ZFSonLinux v0.6.5
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+spacemap_histogram

diff --git a/zfs/cmd/zpool/compatibility.d/zol-0.7 b/zfs/cmd/zpool/compatibility.d/zol-0.7
new file mode 100644
index 0000000..22a0293
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/zol-0.7

@@ -0,0 +1,18 @@
+# Features supported by ZFSonLinux v0.7
+async_destroy
+bookmarks
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+lz4_compress
+multi_vdev_crash_dump
+sha512
+skein
+spacemap_histogram
+userobj_accounting

diff --git a/zfs/cmd/zpool/compatibility.d/zol-0.8 b/zfs/cmd/zpool/compatibility.d/zol-0.8
new file mode 100644
index 0000000..762848e
--- /dev/null
+++ b/zfs/cmd/zpool/compatibility.d/zol-0.8

@@ -0,0 +1,27 @@
+# Features supported by ZFSonLinux v0.8
+allocation_classes
+async_destroy
+bookmark_v2
+bookmarks
+device_removal
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+large_dnode
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+project_quota
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+zpool_checkpoint

diff --git a/zfs/cmd/zpool/os/freebsd/zpool_vdev_os.c b/zfs/cmd/zpool/os/freebsd/zpool_vdev_os.c
new file mode 100644
index 0000000..aa66d29
--- /dev/null
+++ b/zfs/cmd/zpool/os/freebsd/zpool_vdev_os.c

@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2017 Intel Corporation.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ */
+
+/*
+ * Functions to convert between a list of vdevs and an nvlist representing the
+ * configuration.  Each entry in the list can be one of:
+ *
+ * 	Device vdevs
+ * 		disk=(path=..., devid=...)
+ * 		file=(path=...)
+ *
+ * 	Group vdevs
+ * 		raidz[1|2]=(...)
+ * 		mirror=(...)
+ *
+ * 	Hot spares
+ *
+ * While the underlying implementation supports it, group vdevs cannot contain
+ * other group vdevs.  All userland verification of devices is contained within
+ * this file.  If successful, the nvlist returned can be passed directly to the
+ * kernel; we've done as much verification as possible in userland.
+ *
+ * Hot spares are a special case, and passed down as an array of disk vdevs, at
+ * the same level as the root of the vdev tree.
+ *
+ * The only function exported by this file is 'make_root_vdev'.  The
+ * function performs several passes:
+ *
+ * 	1. Construct the vdev specification.  Performs syntax validation and
+ *         makes sure each device is valid.
+ * 	2. Check for devices in use.  Using libdiskmgt, makes sure that no
+ *         devices are also in use.  Some can be overridden using the 'force'
+ *         flag, others cannot.
+ * 	3. Check for replication errors if the 'force' flag is not specified.
+ *         validates that the replication level is consistent across the
+ *         entire pool.
+ * 	4. Call libzfs to label any whole disks with an EFI label.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libzutil.h>
+#include <limits.h>
+#include <sys/spa.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <paths.h>
+#include <sys/stat.h>
+#include <sys/disk.h>
+#include <sys/mntent.h>
+#include <libgeom.h>
+
+#include "zpool_util.h"
+#include <sys/zfs_context.h>
+
+int
+check_device(const char *name, boolean_t force, boolean_t isspare,
+    boolean_t iswholedisk)
+{
+	char path[MAXPATHLEN];
+
+	if (strncmp(name, _PATH_DEV, sizeof (_PATH_DEV) - 1) != 0)
+		snprintf(path, sizeof (path), "%s%s", _PATH_DEV, name);
+	else
+		strlcpy(path, name, sizeof (path));
+
+	return (check_file(path, force, isspare));
+}
+
+boolean_t
+check_sector_size_database(char *path, int *sector_size)
+{
+	return (0);
+}
+
+void
+after_zpool_upgrade(zpool_handle_t *zhp)
+{
+	char bootfs[ZPOOL_MAXPROPLEN];
+
+	if (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
+	    sizeof (bootfs), NULL, B_FALSE) == 0 &&
+	    strcmp(bootfs, "-") != 0) {
+		(void) printf(gettext("Pool '%s' has the bootfs "
+		    "property set, you might need to update\nthe boot "
+		    "code. See gptzfsboot(8) and loader.efi(8) for "
+		    "details.\n"), zpool_get_name(zhp));
+	}
+}

diff --git a/zfs/cmd/zpool/os/linux/zpool_vdev_os.c b/zfs/cmd/zpool/os/linux/zpool_vdev_os.c
new file mode 100644
index 0000000..da87aa7
--- /dev/null
+++ b/zfs/cmd/zpool/os/linux/zpool_vdev_os.c

@@ -0,0 +1,412 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2017 Intel Corporation.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ */
+
+/*
+ * Functions to convert between a list of vdevs and an nvlist representing the
+ * configuration.  Each entry in the list can be one of:
+ *
+ * 	Device vdevs
+ * 		disk=(path=..., devid=...)
+ * 		file=(path=...)
+ *
+ * 	Group vdevs
+ * 		raidz[1|2]=(...)
+ * 		mirror=(...)
+ *
+ * 	Hot spares
+ *
+ * While the underlying implementation supports it, group vdevs cannot contain
+ * other group vdevs.  All userland verification of devices is contained within
+ * this file.  If successful, the nvlist returned can be passed directly to the
+ * kernel; we've done as much verification as possible in userland.
+ *
+ * Hot spares are a special case, and passed down as an array of disk vdevs, at
+ * the same level as the root of the vdev tree.
+ *
+ * The only function exported by this file is 'make_root_vdev'.  The
+ * function performs several passes:
+ *
+ * 	1. Construct the vdev specification.  Performs syntax validation and
+ *         makes sure each device is valid.
+ * 	2. Check for devices in use.  Using libblkid to make sure that no
+ *         devices are also in use.  Some can be overridden using the 'force'
+ *         flag, others cannot.
+ * 	3. Check for replication errors if the 'force' flag is not specified.
+ *         validates that the replication level is consistent across the
+ *         entire pool.
+ * 	4. Call libzfs to label any whole disks with an EFI label.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libzutil.h>
+#include <limits.h>
+#include <sys/spa.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "zpool_util.h"
+#include <sys/zfs_context.h>
+
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <sys/efi_partition.h>
+#include <sys/stat.h>
+#include <sys/vtoc.h>
+#include <sys/mntent.h>
+#include <uuid/uuid.h>
+#include <blkid/blkid.h>
+
+typedef struct vdev_disk_db_entry
+{
+	char id[24];
+	int sector_size;
+} vdev_disk_db_entry_t;
+
+/*
+ * Database of block devices that lie about physical sector sizes.  The
+ * identification string must be precisely 24 characters to avoid false
+ * negatives
+ */
+static vdev_disk_db_entry_t vdev_disk_database[] = {
+	{"ATA     ADATA SSD S396 3", 8192},
+	{"ATA     APPLE SSD SM128E", 8192},
+	{"ATA     APPLE SSD SM256E", 8192},
+	{"ATA     APPLE SSD SM512E", 8192},
+	{"ATA     APPLE SSD SM768E", 8192},
+	{"ATA     C400-MTFDDAC064M", 8192},
+	{"ATA     C400-MTFDDAC128M", 8192},
+	{"ATA     C400-MTFDDAC256M", 8192},
+	{"ATA     C400-MTFDDAC512M", 8192},
+	{"ATA     Corsair Force 3 ", 8192},
+	{"ATA     Corsair Force GS", 8192},
+	{"ATA     INTEL SSDSA2CT04", 8192},
+	{"ATA     INTEL SSDSA2BZ10", 8192},
+	{"ATA     INTEL SSDSA2BZ20", 8192},
+	{"ATA     INTEL SSDSA2BZ30", 8192},
+	{"ATA     INTEL SSDSA2CW04", 8192},
+	{"ATA     INTEL SSDSA2CW08", 8192},
+	{"ATA     INTEL SSDSA2CW12", 8192},
+	{"ATA     INTEL SSDSA2CW16", 8192},
+	{"ATA     INTEL SSDSA2CW30", 8192},
+	{"ATA     INTEL SSDSA2CW60", 8192},
+	{"ATA     INTEL SSDSC2CT06", 8192},
+	{"ATA     INTEL SSDSC2CT12", 8192},
+	{"ATA     INTEL SSDSC2CT18", 8192},
+	{"ATA     INTEL SSDSC2CT24", 8192},
+	{"ATA     INTEL SSDSC2CW06", 8192},
+	{"ATA     INTEL SSDSC2CW12", 8192},
+	{"ATA     INTEL SSDSC2CW18", 8192},
+	{"ATA     INTEL SSDSC2CW24", 8192},
+	{"ATA     INTEL SSDSC2CW48", 8192},
+	{"ATA     KINGSTON SH100S3", 8192},
+	{"ATA     KINGSTON SH103S3", 8192},
+	{"ATA     M4-CT064M4SSD2  ", 8192},
+	{"ATA     M4-CT128M4SSD2  ", 8192},
+	{"ATA     M4-CT256M4SSD2  ", 8192},
+	{"ATA     M4-CT512M4SSD2  ", 8192},
+	{"ATA     OCZ-AGILITY2    ", 8192},
+	{"ATA     OCZ-AGILITY3    ", 8192},
+	{"ATA     OCZ-VERTEX2 3.5 ", 8192},
+	{"ATA     OCZ-VERTEX3     ", 8192},
+	{"ATA     OCZ-VERTEX3 LT  ", 8192},
+	{"ATA     OCZ-VERTEX3 MI  ", 8192},
+	{"ATA     OCZ-VERTEX4     ", 8192},
+	{"ATA     SAMSUNG MZ7WD120", 8192},
+	{"ATA     SAMSUNG MZ7WD240", 8192},
+	{"ATA     SAMSUNG MZ7WD480", 8192},
+	{"ATA     SAMSUNG MZ7WD960", 8192},
+	{"ATA     SAMSUNG SSD 830 ", 8192},
+	{"ATA     Samsung SSD 840 ", 8192},
+	{"ATA     SanDisk SSD U100", 8192},
+	{"ATA     TOSHIBA THNSNH06", 8192},
+	{"ATA     TOSHIBA THNSNH12", 8192},
+	{"ATA     TOSHIBA THNSNH25", 8192},
+	{"ATA     TOSHIBA THNSNH51", 8192},
+	{"ATA     APPLE SSD TS064C", 4096},
+	{"ATA     APPLE SSD TS128C", 4096},
+	{"ATA     APPLE SSD TS256C", 4096},
+	{"ATA     APPLE SSD TS512C", 4096},
+	{"ATA     INTEL SSDSA2M040", 4096},
+	{"ATA     INTEL SSDSA2M080", 4096},
+	{"ATA     INTEL SSDSA2M160", 4096},
+	{"ATA     INTEL SSDSC2MH12", 4096},
+	{"ATA     INTEL SSDSC2MH25", 4096},
+	{"ATA     OCZ CORE_SSD    ", 4096},
+	{"ATA     OCZ-VERTEX      ", 4096},
+	{"ATA     SAMSUNG MCCOE32G", 4096},
+	{"ATA     SAMSUNG MCCOE64G", 4096},
+	{"ATA     SAMSUNG SSD PM80", 4096},
+	/* Flash drives optimized for 4KB IOs on larger pages */
+	{"ATA     INTEL SSDSC2BA10", 4096},
+	{"ATA     INTEL SSDSC2BA20", 4096},
+	{"ATA     INTEL SSDSC2BA40", 4096},
+	{"ATA     INTEL SSDSC2BA80", 4096},
+	{"ATA     INTEL SSDSC2BB08", 4096},
+	{"ATA     INTEL SSDSC2BB12", 4096},
+	{"ATA     INTEL SSDSC2BB16", 4096},
+	{"ATA     INTEL SSDSC2BB24", 4096},
+	{"ATA     INTEL SSDSC2BB30", 4096},
+	{"ATA     INTEL SSDSC2BB40", 4096},
+	{"ATA     INTEL SSDSC2BB48", 4096},
+	{"ATA     INTEL SSDSC2BB60", 4096},
+	{"ATA     INTEL SSDSC2BB80", 4096},
+	{"ATA     INTEL SSDSC2BW24", 4096},
+	{"ATA     INTEL SSDSC2BW48", 4096},
+	{"ATA     INTEL SSDSC2BP24", 4096},
+	{"ATA     INTEL SSDSC2BP48", 4096},
+	{"NA      SmrtStorSDLKAE9W", 4096},
+	{"NVMe    Amazon EC2 NVMe ", 4096},
+	/* Imported from Open Solaris */
+	{"ATA     MARVELL SD88SA02", 4096},
+	/* Advanced format Hard drives */
+	{"ATA     Hitachi HDS5C303", 4096},
+	{"ATA     SAMSUNG HD204UI ", 4096},
+	{"ATA     ST2000DL004 HD20", 4096},
+	{"ATA     WDC WD10EARS-00M", 4096},
+	{"ATA     WDC WD10EARS-00S", 4096},
+	{"ATA     WDC WD10EARS-00Z", 4096},
+	{"ATA     WDC WD15EARS-00M", 4096},
+	{"ATA     WDC WD15EARS-00S", 4096},
+	{"ATA     WDC WD15EARS-00Z", 4096},
+	{"ATA     WDC WD20EARS-00M", 4096},
+	{"ATA     WDC WD20EARS-00S", 4096},
+	{"ATA     WDC WD20EARS-00Z", 4096},
+	{"ATA     WDC WD1600BEVT-0", 4096},
+	{"ATA     WDC WD2500BEVT-0", 4096},
+	{"ATA     WDC WD3200BEVT-0", 4096},
+	{"ATA     WDC WD5000BEVT-0", 4096},
+};
+
+
+#define	INQ_REPLY_LEN	96
+#define	INQ_CMD_LEN	6
+
+static const int vdev_disk_database_size =
+	sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
+
+boolean_t
+check_sector_size_database(char *path, int *sector_size)
+{
+	unsigned char inq_buff[INQ_REPLY_LEN];
+	unsigned char sense_buffer[32];
+	unsigned char inq_cmd_blk[INQ_CMD_LEN] =
+	    {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
+	sg_io_hdr_t io_hdr;
+	int error;
+	int fd;
+	int i;
+
+	/* Prepare INQUIRY command */
+	memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
+	io_hdr.interface_id = 'S';
+	io_hdr.cmd_len = sizeof (inq_cmd_blk);
+	io_hdr.mx_sb_len = sizeof (sense_buffer);
+	io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+	io_hdr.dxfer_len = INQ_REPLY_LEN;
+	io_hdr.dxferp = inq_buff;
+	io_hdr.cmdp = inq_cmd_blk;
+	io_hdr.sbp = sense_buffer;
+	io_hdr.timeout = 10;		/* 10 milliseconds is ample time */
+
+	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
+		return (B_FALSE);
+
+	error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
+
+	(void) close(fd);
+
+	if (error < 0)
+		return (B_FALSE);
+
+	if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
+		return (B_FALSE);
+
+	for (i = 0; i < vdev_disk_database_size; i++) {
+		if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
+			continue;
+
+		*sector_size = vdev_disk_database[i].sector_size;
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+static int
+check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
+{
+	int err;
+	char *value;
+
+	/* No valid type detected device is safe to use */
+	value = blkid_get_tag_value(cache, "TYPE", path);
+	if (value == NULL)
+		return (0);
+
+	/*
+	 * If libblkid detects a ZFS device, we check the device
+	 * using check_file() to see if it's safe.  The one safe
+	 * case is a spare device shared between multiple pools.
+	 */
+	if (strcmp(value, "zfs_member") == 0) {
+		err = check_file(path, force, isspare);
+	} else {
+		if (force) {
+			err = 0;
+		} else {
+			err = -1;
+			vdev_error(gettext("%s contains a filesystem of "
+			    "type '%s'\n"), path, value);
+		}
+	}
+
+	free(value);
+
+	return (err);
+}
+
+/*
+ * Validate that a disk including all partitions are safe to use.
+ *
+ * For EFI labeled disks this can done relatively easily with the libefi
+ * library.  The partition numbers are extracted from the label and used
+ * to generate the expected /dev/ paths.  Each partition can then be
+ * checked for conflicts.
+ *
+ * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
+ * but due to the lack of a readily available libraries this scanning is
+ * not implemented.  Instead only the device path as given is checked.
+ */
+static int
+check_disk(const char *path, blkid_cache cache, int force,
+    boolean_t isspare, boolean_t iswholedisk)
+{
+	struct dk_gpt *vtoc;
+	char slice_path[MAXPATHLEN];
+	int err = 0;
+	int fd, i;
+	int flags = O_RDONLY|O_DIRECT;
+
+	if (!iswholedisk)
+		return (check_slice(path, cache, force, isspare));
+
+	/* only spares can be shared, other devices require exclusive access */
+	if (!isspare)
+		flags |= O_EXCL;
+
+	if ((fd = open(path, flags)) < 0) {
+		char *value = blkid_get_tag_value(cache, "TYPE", path);
+		(void) fprintf(stderr, gettext("%s is in use and contains "
+		    "a %s filesystem.\n"), path, value ? value : "unknown");
+		free(value);
+		return (-1);
+	}
+
+	/*
+	 * Expected to fail for non-EFI labeled disks.  Just check the device
+	 * as given and do not attempt to detect and scan partitions.
+	 */
+	err = efi_alloc_and_read(fd, &vtoc);
+	if (err) {
+		(void) close(fd);
+		return (check_slice(path, cache, force, isspare));
+	}
+
+	/*
+	 * The primary efi partition label is damaged however the secondary
+	 * label at the end of the device is intact.  Rather than use this
+	 * label we should play it safe and treat this as a non efi device.
+	 */
+	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+		efi_free(vtoc);
+		(void) close(fd);
+
+		if (force) {
+			/* Partitions will now be created using the backup */
+			return (0);
+		} else {
+			vdev_error(gettext("%s contains a corrupt primary "
+			    "EFI label.\n"), path);
+			return (-1);
+		}
+	}
+
+	for (i = 0; i < vtoc->efi_nparts; i++) {
+
+		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
+		    uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
+			continue;
+
+		if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+			(void) snprintf(slice_path, sizeof (slice_path),
+			    "%s%s%d", path, "-part", i+1);
+		else
+			(void) snprintf(slice_path, sizeof (slice_path),
+			    "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
+			    "p" : "", i+1);
+
+		err = check_slice(slice_path, cache, force, isspare);
+		if (err)
+			break;
+	}
+
+	efi_free(vtoc);
+	(void) close(fd);
+
+	return (err);
+}
+
+int
+check_device(const char *path, boolean_t force,
+    boolean_t isspare, boolean_t iswholedisk)
+{
+	blkid_cache cache;
+	int error;
+
+	error = blkid_get_cache(&cache, NULL);
+	if (error != 0) {
+		(void) fprintf(stderr, gettext("unable to access the blkid "
+		    "cache.\n"));
+		return (-1);
+	}
+
+	error = check_disk(path, cache, force, isspare, iswholedisk);
+	blkid_put_cache(cache);
+
+	return (error);
+}
+
+void
+after_zpool_upgrade(zpool_handle_t *zhp)
+{
+}

diff --git a/zfs/cmd/zpool/zpool.d/dm-deps b/zfs/cmd/zpool/zpool.d/dm-deps
new file mode 100755
index 0000000..42af6a8
--- /dev/null
+++ b/zfs/cmd/zpool/zpool.d/dm-deps

@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# Show device mapper dependent / underlying devices.  This is useful for
+# looking up the /dev/sd* devices associated with a dm or multipath device. 
+#
+
+if [ "$1" = "-h" ] ; then
+	echo "Show device mapper dependent (underlying) devices."
+	exit
+fi
+
+dev="$VDEV_PATH"
+
+# If the VDEV path is a symlink, resolve it to a real device
+if [ -L "$dev" ] ; then
+	dev=$(readlink "$dev")
+fi
+
+dev="${dev##*/}"
+val=""
+if [ -d "/sys/class/block/$dev/slaves" ] ; then
+	# ls -C: output in columns, no newlines, two spaces (change to one)
+	# shellcheck disable=SC2012
+	val=$(ls -C "/sys/class/block/$dev/slaves" | tr -s '[:space:]' ' ')
+fi
+
+echo "dm-deps=$val"

diff --git a/zfs/cmd/zpool/zpool.d/iostat b/zfs/cmd/zpool/zpool.d/iostat
index f6452fb..19be475 100755
--- a/zfs/cmd/zpool/zpool.d/iostat
+++ b/zfs/cmd/zpool/zpool.d/iostat

@@ -9,7 +9,7 @@
 iostat-1s:	Do a single 1-second iostat sample and show values.
 iostat-10s:	Do a single 10-second iostat sample and show values."
 
-script=$(basename "$0")
+script="${0##*/}"
 if [ "$1" = "-h" ] ; then
 	echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
 	exit
@@ -17,14 +17,14 @@
 
 if [ "$script" = "iostat-1s" ] ; then
 	# Do a single one-second sample
-	extra="1 1"
+	interval=1
 	# Don't show summary stats
-	y="-y"
+	brief="yes"
 elif [ "$script" = "iostat-10s" ] ; then
 	# Do a single ten-second sample
-	extra="10 1"
+	interval=10
 	# Don't show summary stats
-	y="-y"
+	brief="yes"
 fi
 
 if [ -f "$VDEV_UPATH" ] ; then
@@ -32,7 +32,19 @@
 	exit
 fi
 
-out=$(eval "iostat $y -k -x $VDEV_UPATH $extra")
+if [ "$(uname)" = "FreeBSD" ]; then
+	out=$(iostat -dKx \
+		${interval:+"-w $interval"} \
+		${interval:+"-c 1"} \
+		"$VDEV_UPATH" | tail -n 2)
+else
+	out=$(iostat -kx \
+		${brief:+"-y"} \
+		${interval:+"$interval"} \
+		${interval:+"1"} \
+		"$VDEV_UPATH" | grep -v '^$' | tail -n 2)
+fi
+
 
 # Sample output (we want the last two lines):
 #
@@ -46,16 +58,16 @@
 #
 
 # Get the column names
-cols=$(echo "$out" | grep Device)
+cols=$(echo "$out" | head -n 1)
 
 # Get the values and tab separate them to make them cut-able.
-vals="$(echo "$out" | grep -A1 Device | tail -n 1 | sed -r 's/[[:blank:]]+/\t/g')"
+vals=$(echo "$out" | tail -n 1 | tr -s '[:space:]' '\t')
 
 i=0
 for col in $cols ; do
 	i=$((i+1))
 	# Skip the first column since it's just the device name
-	if [ "$col" = "Device:" ] ; then
+	if [ $i -eq 1 ]; then
 		continue
 	fi
 

diff --git a/zfs/cmd/zpool/zpool.d/lsblk b/zfs/cmd/zpool/zpool.d/lsblk
index 1cdef40..919783a 100755
--- a/zfs/cmd/zpool/zpool.d/lsblk
+++ b/zfs/cmd/zpool/zpool.d/lsblk

@@ -48,7 +48,7 @@
 vendor:	Show the disk vendor.
 lsblk:	Show the disk size, vendor, and model number."
 
-script=$(basename "$0")
+script="${0##*/}"
 
 if [ "$1" = "-h" ] ; then
         echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-

diff --git a/zfs/cmd/zpool/zpool.d/media b/zfs/cmd/zpool/zpool.d/media
index 05bc159..660f78b 100755
--- a/zfs/cmd/zpool/zpool.d/media
+++ b/zfs/cmd/zpool/zpool.d/media

@@ -4,19 +4,23 @@
 #
 
 if [ "$1" = "-h" ] ; then
-	echo "Show whether a vdev is a file, hdd, or ssd."
+	echo "Show whether a vdev is a file, hdd, ssd, or iscsi."
 	exit
 fi
 
 if [ -b "$VDEV_UPATH" ]; then
-	device=$(basename "$VDEV_UPATH")
-	val=$(cat "/sys/block/$device/queue/rotational" 2>/dev/null)
-	if [ "$val" = "0" ]; then
-		MEDIA="ssd"
-	fi
+	device="${VDEV_UPATH##*/}"
+	read -r val 2>/dev/null < "/sys/block/$device/queue/rotational"
+	case "$val" in
+		0) MEDIA="ssd" ;;
+		1) MEDIA="hdd" ;;
+	esac
 
-	if [ "$val" = "1" ]; then
-		MEDIA="hdd"
+	vpd_pg83="/sys/block/$device/device/vpd_pg83"
+	if [ -f "$vpd_pg83" ]; then
+		if grep -q --binary "iqn." "$vpd_pg83"; then
+			MEDIA="iscsi"
+		fi
 	fi
 else
 	if [ -f "$VDEV_UPATH" ]; then

diff --git a/zfs/cmd/zpool/zpool.d/ses b/zfs/cmd/zpool/zpool.d/ses
index f6b7520..b51fe31 100755
--- a/zfs/cmd/zpool/zpool.d/ses
+++ b/zfs/cmd/zpool/zpool.d/ses

@@ -11,7 +11,7 @@
 locate_led:	Show value of the disk enclosure slot locate LED.
 ses:		Show disk's enc, enc device, slot, and fault/locate LED values."
 
-script=$(basename "$0")
+script="${0##*/}"
 if [ "$1" = "-h" ] ; then
 	echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
 	exit
@@ -41,7 +41,13 @@
 		val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
 		;;
 	fault_led)
-		val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
+		# JBODs fault LED is called 'fault', NVMe fault LED is called
+		# 'attention'.
+		if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
+			val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
+		elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
+			val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
+		fi
 		;;
 	locate_led)
 		val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)

diff --git a/zfs/cmd/zpool/zpool.d/slaves b/zfs/cmd/zpool/zpool.d/slaves
deleted file mode 100755
index 9c16d6c..0000000
--- a/zfs/cmd/zpool/zpool.d/slaves
+++ /dev/null

@@ -1,32 +0,0 @@
-#!/bin/sh
-#
-# Show device mapper slave devices.  This is useful for looking up the
-# /dev/sd* devices associated with a dm or multipath device.  For example:
-#
-# $ ls /sys/block/dm-113/slaves/
-# sddt  sdjw
-#
-
-if [ "$1" = "-h" ] ; then
-	echo "Show device mapper slave devices."
-	exit
-fi
-
-dev="$VDEV_PATH"
-
-# If the VDEV path is a symlink, resolve it to a real device
-if [ -L "$dev" ] ; then
-	dev=$(readlink "$dev")
-fi
-
-dev=$(basename "$dev")
-val=""
-if [ -d "/sys/class/block/$dev/slaves" ] ; then
-	# ls -C: output in columns, no newlines
-	val=$(ls -C "/sys/class/block/$dev/slaves")
-
-	# ls -C will print two spaces between files; change to one space.
-	val=$(echo "$val" | sed -r 's/[[:blank:]]+/ /g')
-fi
-
-echo "slaves=$val"

diff --git a/zfs/cmd/zpool/zpool.d/smart b/zfs/cmd/zpool/zpool.d/smart
index bd18e9d..b95256d 100755
--- a/zfs/cmd/zpool/zpool.d/smart
+++ b/zfs/cmd/zpool/zpool.d/smart

@@ -53,7 +53,7 @@
 	num_files=$(find "$dir" -maxdepth 1 -type f | wc -l)
 	mod=$((pid % num_files))
 	i=0
-	find "$dir" -type f -printf "%f\n" | while read -r file ; do
+	find "$dir" -type f -printf '%f\n' | while read -r file ; do
 		if [ "$mod" = "$i" ] ; then
 			echo "$file"
 			break
@@ -62,24 +62,22 @@
 	done
 }
 
-script=$(basename "$0")
+script="${0##*/}"
 
 if [ "$1" = "-h" ] ; then
         echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
         exit
 fi
 
-smartctl_path=$(command -v smartctl)
-
-if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ] || [ -n "$samples" ] ; then
+if [ -b "$VDEV_UPATH" ] && PATH="/usr/sbin:$PATH" command -v smartctl > /dev/null || [ -n "$samples" ] ; then
 	if [ -n "$samples" ] ; then
 		# cat a smartctl output text file instead of running smartctl
 		# on a vdev (only used for developer testing).
-		file=$(get_filename_from_dir $samples)
+		file=$(get_filename_from_dir "$samples")
 		echo "file=$file"
 		raw_out=$(cat "$samples/$file")
 	else
-		raw_out=$(eval "sudo $smartctl_path -a $VDEV_UPATH")
+		raw_out=$(sudo smartctl -a "$VDEV_UPATH")
 	fi
 
 	# What kind of drive are we?  Look for the right line in smartctl:
@@ -230,11 +228,11 @@
 with_vals=$(echo "$out" | grep -E "$scripts")
 if [ -n "$with_vals" ]; then
 	echo "$with_vals"
-	without_vals=$(echo "$scripts" | tr "|" "\n" |
+	without_vals=$(echo "$scripts" | tr '|' '\n' |
 		grep -v -E "$(echo "$with_vals" |
 		awk -F "=" '{print $1}')" | awk '{print $0"="}')
 else
-	without_vals=$(echo "$scripts" | tr "|" "\n" | awk '{print $0"="}')
+	without_vals=$(echo "$scripts" | tr '|' '\n' | awk '{print $0"="}')
 fi
 
 if [ -n "$without_vals" ]; then

diff --git a/zfs/cmd/zpool/zpool_iter.c b/zfs/cmd/zpool/zpool_iter.c
index 9927a9d..abfa2b7 100644
--- a/zfs/cmd/zpool/zpool_iter.c
+++ b/zfs/cmd/zpool/zpool_iter.c

@@ -56,6 +56,7 @@
 
 struct zpool_list {
 	boolean_t	zl_findall;
+	boolean_t	zl_literal;
 	uu_avl_t	*zl_avl;
 	uu_avl_pool_t	*zl_pool;
 	zprop_list_t	**zl_proplist;
@@ -88,7 +89,9 @@
 	uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool);
 	if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) {
 		if (zlp->zl_proplist &&
-		    zpool_expand_proplist(zhp, zlp->zl_proplist) != 0) {
+		    zpool_expand_proplist(zhp, zlp->zl_proplist,
+		    zlp->zl_literal)
+		    != 0) {
 			zpool_close(zhp);
 			free(node);
 			return (-1);
@@ -110,7 +113,8 @@
  * line.
  */
 zpool_list_t *
-pool_list_get(int argc, char **argv, zprop_list_t **proplist, int *err)
+pool_list_get(int argc, char **argv, zprop_list_t **proplist,
+    boolean_t literal, int *err)
 {
 	zpool_list_t *zlp;
 
@@ -128,6 +132,8 @@
 
 	zlp->zl_proplist = proplist;
 
+	zlp->zl_literal = literal;
+
 	if (argc == 0) {
 		(void) zpool_iter(g_zfs, add_pool, zlp);
 		zlp->zl_findall = B_TRUE;
@@ -242,12 +248,12 @@
  */
 int
 for_each_pool(int argc, char **argv, boolean_t unavail,
-    zprop_list_t **proplist, zpool_iter_f func, void *data)
+    zprop_list_t **proplist, boolean_t literal, zpool_iter_f func, void *data)
 {
 	zpool_list_t *list;
 	int ret = 0;
 
-	if ((list = pool_list_get(argc, argv, proplist, &ret)) == NULL)
+	if ((list = pool_list_get(argc, argv, proplist, literal, &ret)) == NULL)
 		return (1);
 
 	if (pool_list_iter(list, unavail, func, data) != 0)
@@ -258,51 +264,6 @@
 	return (ret);
 }
 
-static int
-for_each_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, pool_vdev_iter_f func,
-    void *data)
-{
-	nvlist_t **child;
-	uint_t c, children;
-	int ret = 0;
-	int i;
-	char *type;
-
-	const char *list[] = {
-	    ZPOOL_CONFIG_SPARES,
-	    ZPOOL_CONFIG_L2CACHE,
-	    ZPOOL_CONFIG_CHILDREN
-	};
-
-	for (i = 0; i < ARRAY_SIZE(list); i++) {
-		if (nvlist_lookup_nvlist_array(nv, list[i], &child,
-		    &children) == 0) {
-			for (c = 0; c < children; c++) {
-				uint64_t ishole = 0;
-
-				(void) nvlist_lookup_uint64(child[c],
-				    ZPOOL_CONFIG_IS_HOLE, &ishole);
-
-				if (ishole)
-					continue;
-
-				ret |= for_each_vdev_cb(zhp, child[c], func,
-				    data);
-			}
-		}
-	}
-
-	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
-		return (ret);
-
-	/* Don't run our function on root vdevs */
-	if (strcmp(type, VDEV_TYPE_ROOT) != 0) {
-		ret |= func(zhp, nv, data);
-	}
-
-	return (ret);
-}
-
 /*
  * This is the equivalent of for_each_pool() for vdevs.  It iterates thorough
  * all vdevs in the pool, ignoring root vdevs and holes, calling func() on
@@ -321,7 +282,7 @@
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &nvroot) == 0);
 	}
-	return (for_each_vdev_cb(zhp, nvroot, func, data));
+	return (for_each_vdev_cb((void *) zhp, nvroot, func, data));
 }
 
 /*
@@ -488,19 +449,25 @@
 	/* Setup our custom environment variables */
 	rc = asprintf(&env[1], "VDEV_PATH=%s",
 	    data->path ? data->path : "");
-	if (rc == -1)
+	if (rc == -1) {
+		env[1] = NULL;
 		goto out;
+	}
 
 	rc = asprintf(&env[2], "VDEV_UPATH=%s",
 	    data->upath ? data->upath : "");
-	if (rc == -1)
+	if (rc == -1) {
+		env[2] = NULL;
 		goto out;
+	}
 
 	rc = asprintf(&env[3], "VDEV_ENC_SYSFS_PATH=%s",
 	    data->vdev_enc_sysfs_path ?
 	    data->vdev_enc_sysfs_path : "");
-	if (rc == -1)
+	if (rc == -1) {
+		env[3] = NULL;
 		goto out;
+	}
 
 	/* Run the command */
 	rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines,
@@ -519,8 +486,7 @@
 
 	/* Start with i = 1 since env[0] was statically allocated */
 	for (i = 1; i < ARRAY_SIZE(env); i++)
-		if (env[i] != NULL)
-			free(env[i]);
+		free(env[i]);
 }
 
 /*
@@ -592,7 +558,7 @@
 
 /* For each vdev in the pool run a command */
 static int
-for_each_vdev_run_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_vcdl)
+for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl)
 {
 	vdev_cmd_data_list_t *vcdl = cb_vcdl;
 	vdev_cmd_data_t *data;
@@ -600,6 +566,7 @@
 	char *vname = NULL;
 	char *vdev_enc_sysfs_path = NULL;
 	int i, match = 0;
+	zpool_handle_t *zhp = zhp_data;
 
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
 		return (1);
@@ -616,7 +583,7 @@
 		}
 	}
 
-	/* Check for whitelisted vdevs here, if any */
+	/* Check for selected vdevs here, if any */
 	for (i = 0; i < vcdl->vdev_names_count; i++) {
 		vname = zpool_vdev_name(g_zfs, zhp, nv, vcdl->cb_name_flags);
 		if (strcmp(vcdl->vdev_names[i], vname) == 0) {
@@ -627,7 +594,7 @@
 		free(vname);
 	}
 
-	/* If we whitelisted vdevs, and this isn't one of them, then bail out */
+	/* If we selected vdevs, and this isn't one of them, then bail out */
 	if (!match && vcdl->vdev_names_count)
 		return (0);
 
@@ -711,7 +678,7 @@
 	vcdl->g_zfs = g_zfs;
 
 	/* Gather our list of all vdevs in all pools */
-	for_each_pool(argc, argv, B_TRUE, NULL,
+	for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE,
 	    all_pools_for_each_vdev_gather_cb, vcdl);
 
 	/* Run command on all vdevs in all pools */

diff --git a/zfs/cmd/zpool/zpool_main.c b/zfs/cmd/zpool/zpool_main.c
index b4c98e0..a06af9a 100644
--- a/zfs/cmd/zpool/zpool_main.c
+++ b/zfs/cmd/zpool/zpool_main.c

@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2012 by Frederik Wessels. All rights reserved.
  * Copyright (c) 2012 by Cyril Plisko. All rights reserved.
  * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
@@ -31,6 +31,8 @@
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright [2021] Hewlett Packard Enterprise Development LP
  */
 
 #include <assert.h>
@@ -43,10 +45,12 @@
 #include <libintl.h>
 #include <libuutil.h>
 #include <locale.h>
+#include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
+#include <time.h>
 #include <unistd.h>
 #include <pwd.h>
 #include <zone.h>
@@ -120,6 +124,11 @@
 
 static int zpool_do_version(int, char **);
 
+static int zpool_do_wait(int, char **);
+
+static zpool_compat_status_t zpool_do_load_compat(
+    const char *, boolean_t *);
+
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
@@ -170,7 +179,8 @@
 	HELP_SYNC,
 	HELP_REGUID,
 	HELP_REOPEN,
-	HELP_VERSION
+	HELP_VERSION,
+	HELP_WAIT
 } zpool_help_t;
 
 
@@ -311,6 +321,8 @@
 	{ "get",	zpool_do_get,		HELP_GET		},
 	{ "set",	zpool_do_set,		HELP_SET		},
 	{ "sync",	zpool_do_sync,		HELP_SYNC		},
+	{ NULL },
+	{ "wait",	zpool_do_wait,		HELP_WAIT		},
 };
 
 #define	NCOMMAND	(ARRAY_SIZE(command_table))
@@ -330,7 +342,7 @@
 		return (gettext("\tadd [-fgLnP] [-o property=value] "
 		    "<pool> <vdev> ...\n"));
 	case HELP_ATTACH:
-		return (gettext("\tattach [-f] [-o property=value] "
+		return (gettext("\tattach [-fsw] [-o property=value] "
 		    "<pool> <device> <new-device>\n"));
 	case HELP_CLEAR:
 		return (gettext("\tclear [-nF] <pool> [device]\n"));
@@ -339,7 +351,7 @@
 		    "\t    [-O file-system-property=value] ... \n"
 		    "\t    [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
 	case HELP_CHECKPOINT:
-		return (gettext("\tcheckpoint [--discard] <pool> ...\n"));
+		return (gettext("\tcheckpoint [-d [-w]] <pool> ...\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-f] <pool>\n"));
 	case HELP_DETACH:
@@ -373,21 +385,21 @@
 	case HELP_ONLINE:
 		return (gettext("\tonline [-e] <pool> <device> ...\n"));
 	case HELP_REPLACE:
-		return (gettext("\treplace [-f] [-o property=value] "
+		return (gettext("\treplace [-fsw] [-o property=value] "
 		    "<pool> <device> [new-device]\n"));
 	case HELP_REMOVE:
-		return (gettext("\tremove [-nps] <pool> <device> ...\n"));
+		return (gettext("\tremove [-npsw] <pool> <device> ...\n"));
 	case HELP_REOPEN:
 		return (gettext("\treopen [-n] <pool>\n"));
 	case HELP_INITIALIZE:
-		return (gettext("\tinitialize [-c | -s] <pool> "
+		return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
 		    "[<device> ...]\n"));
 	case HELP_SCRUB:
-		return (gettext("\tscrub [-s | -p] <pool> ...\n"));
+		return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
 	case HELP_RESILVER:
 		return (gettext("\tresilver <pool> ...\n"));
 	case HELP_TRIM:
-		return (gettext("\ttrim [-d] [-r <rate>] [-c | -s] <pool> "
+		return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> "
 		    "[<device> ...]\n"));
 	case HELP_STATUS:
 		return (gettext("\tstatus [-c [script1,script2,...]] "
@@ -414,6 +426,9 @@
 		return (gettext("\tsync [pool] ...\n"));
 	case HELP_VERSION:
 		return (gettext("\tversion\n"));
+	case HELP_WAIT:
+		return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
+		    "<pool> [interval]\n"));
 	}
 
 	abort();
@@ -434,7 +449,8 @@
 		char *path = zpool_vdev_name(g_zfs, zhp, nvroot,
 		    VDEV_NAME_PATH);
 
-		if (strcmp(path, VDEV_TYPE_INDIRECT) != 0)
+		if (strcmp(path, VDEV_TYPE_INDIRECT) != 0 &&
+		    strcmp(path, VDEV_TYPE_HOLE) != 0)
 			fnvlist_add_boolean(res, path);
 
 		free(path);
@@ -474,7 +490,7 @@
  * that command.  Otherwise, iterate over the entire command table and display
  * a complete usage message.
  */
-void
+static void
 usage(boolean_t requested)
 {
 	FILE *fp = requested ? stdout : stderr;
@@ -517,7 +533,7 @@
 		(void) fprintf(fp, "YES   disabled | enabled | active\n");
 
 		(void) fprintf(fp, gettext("\nThe feature@ properties must be "
-		    "appended with a feature name.\nSee zpool-features(5).\n"));
+		    "appended with a feature name.\nSee zpool-features(7).\n"));
 	}
 
 	/*
@@ -532,12 +548,14 @@
 }
 
 /*
- * zpool initialize [-c | -s] <pool> [<vdev> ...]
+ * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...]
  * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool
  * if none specified.
  *
  *	-c	Cancel. Ends active initializing.
  *	-s	Suspend. Initializing can then be restarted with no flags.
+ *	-u	Uninitialize. Clears initialization state.
+ *	-w	Wait. Blocks until initializing has completed.
  */
 int
 zpool_do_initialize(int argc, char **argv)
@@ -547,15 +565,19 @@
 	zpool_handle_t *zhp;
 	nvlist_t *vdevs;
 	int err = 0;
+	boolean_t wait = B_FALSE;
 
 	struct option long_options[] = {
 		{"cancel",	no_argument,		NULL, 'c'},
 		{"suspend",	no_argument,		NULL, 's'},
+		{"uninit",	no_argument,		NULL, 'u'},
+		{"wait",	no_argument,		NULL, 'w'},
 		{0, 0, 0, 0}
 	};
 
 	pool_initialize_func_t cmd_type = POOL_INITIALIZE_START;
-	while ((c = getopt_long(argc, argv, "cs", long_options, NULL)) != -1) {
+	while ((c = getopt_long(argc, argv, "csuw", long_options,
+	    NULL)) != -1) {
 		switch (c) {
 		case 'c':
 			if (cmd_type != POOL_INITIALIZE_START &&
@@ -575,6 +597,18 @@
 			}
 			cmd_type = POOL_INITIALIZE_SUSPEND;
 			break;
+		case 'u':
+			if (cmd_type != POOL_INITIALIZE_START &&
+			    cmd_type != POOL_INITIALIZE_UNINIT) {
+				(void) fprintf(stderr, gettext("-u cannot be "
+				    "combined with other options\n"));
+				usage(B_FALSE);
+			}
+			cmd_type = POOL_INITIALIZE_UNINIT;
+			break;
+		case 'w':
+			wait = B_TRUE;
+			break;
 		case '?':
 			if (optopt != 0) {
 				(void) fprintf(stderr,
@@ -597,6 +631,12 @@
 		return (-1);
 	}
 
+	if (wait && (cmd_type != POOL_INITIALIZE_START)) {
+		(void) fprintf(stderr, gettext("-w cannot be used with -c, -s"
+		    "or -u\n"));
+		usage(B_FALSE);
+	}
+
 	poolname = argv[0];
 	zhp = zpool_open(g_zfs, poolname);
 	if (zhp == NULL)
@@ -615,7 +655,10 @@
 		}
 	}
 
-	err = zpool_initialize(zhp, cmd_type, vdevs);
+	if (wait)
+		err = zpool_initialize_wait(zhp, cmd_type, vdevs);
+	else
+		err = zpool_initialize(zhp, cmd_type, vdevs);
 
 	fnvlist_free(vdevs);
 	zpool_close(zhp);
@@ -643,9 +686,16 @@
 	}
 
 	for (c = 0; c < children; c++) {
-		uint64_t is_log = B_FALSE;
+		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
 		char *class = "";
 
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+		    &is_hole);
+
+		if (is_hole == B_TRUE) {
+			continue;
+		}
+
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		if (is_log)
@@ -666,6 +716,54 @@
 	}
 }
 
+/*
+ * Print the list of l2cache devices for dry runs.
+ */
+static void
+print_cache_list(nvlist_t *nv, int indent)
+{
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0 && children > 0) {
+		(void) printf("\t%*s%s\n", indent, "", "cache");
+	} else {
+		return;
+	}
+	for (c = 0; c < children; c++) {
+		char *vname;
+
+		vname = zpool_vdev_name(g_zfs, NULL, child[c], 0);
+		(void) printf("\t%*s%s\n", indent + 2, "", vname);
+		free(vname);
+	}
+}
+
+/*
+ * Print the list of spares for dry runs.
+ */
+static void
+print_spare_list(nvlist_t *nv, int indent)
+{
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0 && children > 0) {
+		(void) printf("\t%*s%s\n", indent, "", "spares");
+	} else {
+		return;
+	}
+	for (c = 0; c < children; c++) {
+		char *vname;
+
+		vname = zpool_vdev_name(g_zfs, NULL, child[c], 0);
+		(void) printf("\t%*s%s\n", indent + 2, "", vname);
+		free(vname);
+	}
+}
+
 static boolean_t
 prop_list_contains_feature(nvlist_t *proplist)
 {
@@ -701,6 +799,8 @@
 
 	if (poolprop) {
 		const char *vname = zpool_prop_to_name(ZPOOL_PROP_VERSION);
+		const char *cname =
+		    zpool_prop_to_name(ZPOOL_PROP_COMPATIBILITY);
 
 		if ((prop = zpool_name_to_prop(propname)) == ZPOOL_PROP_INVAL &&
 		    !zpool_prop_feature(propname)) {
@@ -723,6 +823,22 @@
 			return (2);
 		}
 
+		/*
+		 * if version is specified, only "legacy" compatibility
+		 * may be requested
+		 */
+		if ((prop == ZPOOL_PROP_COMPATIBILITY &&
+		    strcmp(propval, ZPOOL_COMPAT_LEGACY) != 0 &&
+		    nvlist_exists(proplist, vname)) ||
+		    (prop == ZPOOL_PROP_VERSION &&
+		    nvlist_exists(proplist, cname) &&
+		    strcmp(fnvlist_lookup_string(proplist, cname),
+		    ZPOOL_COMPAT_LEGACY) != 0)) {
+			(void) fprintf(stderr, gettext("when 'version' is "
+			    "specified, the 'compatibility' feature may only "
+			    "be set to '" ZPOOL_COMPAT_LEGACY "'\n"));
+			return (2);
+		}
 
 		if (zpool_prop_feature(propname))
 			normnm = propname;
@@ -895,16 +1011,16 @@
 
 	if (dryrun) {
 		nvlist_t *poolnvroot;
-		nvlist_t **l2child;
-		uint_t l2children, c;
+		nvlist_t **l2child, **sparechild;
+		uint_t l2children, sparechildren, c;
 		char *vname;
-		boolean_t hadcache = B_FALSE;
+		boolean_t hadcache = B_FALSE, hadspare = B_FALSE;
 
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &poolnvroot) == 0);
 
 		(void) printf(gettext("would update '%s' to the following "
-		    "configuration:\n"), zpool_get_name(zhp));
+		    "configuration:\n\n"), zpool_get_name(zhp));
 
 		/* print original main pool and new tree */
 		print_vdev_tree(zhp, poolname, poolnvroot, 0, "",
@@ -912,20 +1028,35 @@
 		print_vdev_tree(zhp, NULL, nvroot, 0, "", name_flags);
 
 		/* print other classes: 'dedup', 'special', and 'log' */
-		print_vdev_tree(zhp, "dedup", poolnvroot, 0,
-		    VDEV_ALLOC_BIAS_DEDUP, name_flags);
-		print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_DEDUP,
-		    name_flags);
+		if (zfs_special_devs(poolnvroot, VDEV_ALLOC_BIAS_DEDUP)) {
+			print_vdev_tree(zhp, "dedup", poolnvroot, 0,
+			    VDEV_ALLOC_BIAS_DEDUP, name_flags);
+			print_vdev_tree(zhp, NULL, nvroot, 0,
+			    VDEV_ALLOC_BIAS_DEDUP, name_flags);
+		} else if (zfs_special_devs(nvroot, VDEV_ALLOC_BIAS_DEDUP)) {
+			print_vdev_tree(zhp, "dedup", nvroot, 0,
+			    VDEV_ALLOC_BIAS_DEDUP, name_flags);
+		}
 
-		print_vdev_tree(zhp, "special", poolnvroot, 0,
-		    VDEV_ALLOC_BIAS_SPECIAL, name_flags);
-		print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL,
-		    name_flags);
+		if (zfs_special_devs(poolnvroot, VDEV_ALLOC_BIAS_SPECIAL)) {
+			print_vdev_tree(zhp, "special", poolnvroot, 0,
+			    VDEV_ALLOC_BIAS_SPECIAL, name_flags);
+			print_vdev_tree(zhp, NULL, nvroot, 0,
+			    VDEV_ALLOC_BIAS_SPECIAL, name_flags);
+		} else if (zfs_special_devs(nvroot, VDEV_ALLOC_BIAS_SPECIAL)) {
+			print_vdev_tree(zhp, "special", nvroot, 0,
+			    VDEV_ALLOC_BIAS_SPECIAL, name_flags);
+		}
 
-		print_vdev_tree(zhp, "logs", poolnvroot, 0, VDEV_ALLOC_BIAS_LOG,
-		    name_flags);
-		print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_LOG,
-		    name_flags);
+		if (num_logs(poolnvroot) > 0) {
+			print_vdev_tree(zhp, "logs", poolnvroot, 0,
+			    VDEV_ALLOC_BIAS_LOG, name_flags);
+			print_vdev_tree(zhp, NULL, nvroot, 0,
+			    VDEV_ALLOC_BIAS_LOG, name_flags);
+		} else if (num_logs(nvroot) > 0) {
+			print_vdev_tree(zhp, "logs", nvroot, 0,
+			    VDEV_ALLOC_BIAS_LOG, name_flags);
+		}
 
 		/* Do the same for the caches */
 		if (nvlist_lookup_nvlist_array(poolnvroot, ZPOOL_CONFIG_L2CACHE,
@@ -950,6 +1081,29 @@
 				free(vname);
 			}
 		}
+		/* And finally the spares */
+		if (nvlist_lookup_nvlist_array(poolnvroot, ZPOOL_CONFIG_SPARES,
+		    &sparechild, &sparechildren) == 0 && sparechildren > 0) {
+			hadspare = B_TRUE;
+			(void) printf(gettext("\tspares\n"));
+			for (c = 0; c < sparechildren; c++) {
+				vname = zpool_vdev_name(g_zfs, NULL,
+				    sparechild[c], name_flags);
+				(void) printf("\t  %s\n", vname);
+				free(vname);
+			}
+		}
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &sparechild, &sparechildren) == 0 && sparechildren > 0) {
+			if (!hadspare)
+				(void) printf(gettext("\tspares\n"));
+			for (c = 0; c < sparechildren; c++) {
+				vname = zpool_vdev_name(g_zfs, NULL,
+				    sparechild[c], name_flags);
+				(void) printf("\t  %s\n", vname);
+				free(vname);
+			}
+		}
 
 		ret = 0;
 	} else {
@@ -964,7 +1118,7 @@
 }
 
 /*
- * zpool remove  <pool> <vdev> ...
+ * zpool remove [-npsw] <pool> <vdev> ...
  *
  * Removes the given vdev from the pool.
  */
@@ -978,9 +1132,10 @@
 	int c;
 	boolean_t noop = B_FALSE;
 	boolean_t parsable = B_FALSE;
+	boolean_t wait = B_FALSE;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "nps")) != -1) {
+	while ((c = getopt(argc, argv, "npsw")) != -1) {
 		switch (c) {
 		case 'n':
 			noop = B_TRUE;
@@ -991,6 +1146,9 @@
 		case 's':
 			stop = B_TRUE;
 			break;
+		case 'w':
+			wait = B_TRUE;
+			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
@@ -1024,6 +1182,11 @@
 		}
 		if (zpool_vdev_remove_cancel(zhp) != 0)
 			ret = 1;
+		if (wait) {
+			(void) fprintf(stderr, gettext("invalid option "
+			    "combination: -w cannot be used with -s\n"));
+			usage(B_FALSE);
+		}
 	} else {
 		if (argc < 2) {
 			(void) fprintf(stderr, gettext("missing device\n"));
@@ -1055,6 +1218,9 @@
 					ret = 1;
 			}
 		}
+
+		if (ret == 0 && wait)
+			ret = zpool_wait(zhp, ZPOOL_WAIT_REMOVE);
 	}
 	zpool_close(zhp);
 
@@ -1062,6 +1228,26 @@
 }
 
 /*
+ * Return 1 if a vdev is active (being used in a pool)
+ * Return 0 if a vdev is inactive (offlined or faulted, or not in active pool)
+ *
+ * This is useful for checking if a disk in an active pool is offlined or
+ * faulted.
+ */
+static int
+vdev_is_active(char *vdev_path)
+{
+	int fd;
+	fd = open(vdev_path, O_EXCL);
+	if (fd < 0) {
+		return (1);   /* cant open O_EXCL - disk is active */
+	}
+
+	close(fd);
+	return (0);   /* disk is inactive in the pool */
+}
+
+/*
  * zpool labelclear [-f] <vdev>
  *
  *	-f	Force clearing the label for the vdevs which are members of
@@ -1142,7 +1328,7 @@
 	 * fatal when the device does not support BLKFLSBUF as would be the
 	 * case for a file vdev.
 	 */
-	if ((ioctl(fd, BLKFLSBUF) != 0) && (errno != ENOTTY))
+	if ((zfs_dev_flush(fd) != 0) && (errno != ENOTTY))
 		(void) fprintf(stderr, gettext("failed to invalidate "
 		    "cache for %s: %s\n"), vdev, strerror(errno));
 
@@ -1170,9 +1356,23 @@
 	case POOL_STATE_ACTIVE:
 	case POOL_STATE_SPARE:
 	case POOL_STATE_L2CACHE:
+		/*
+		 * We allow the user to call 'zpool offline -f'
+		 * on an offlined disk in an active pool. We can check if
+		 * the disk is online by calling vdev_is_active().
+		 */
+		if (force && !vdev_is_active(vdev))
+			break;
+
 		(void) fprintf(stderr, gettext(
-		    "%s is a member (%s) of pool \"%s\"\n"),
+		    "%s is a member (%s) of pool \"%s\""),
 		    vdev, zpool_pool_state_to_name(state), name);
+
+		if (force) {
+			(void) fprintf(stderr, gettext(
+			    ". Offline the disk first to clear its label."));
+		}
+		printf("\n");
 		ret = 1;
 		goto errout;
 
@@ -1243,13 +1443,15 @@
 {
 	boolean_t force = B_FALSE;
 	boolean_t dryrun = B_FALSE;
-	boolean_t enable_all_pool_feat = B_TRUE;
+	boolean_t enable_pool_features = B_TRUE;
+
 	int c;
 	nvlist_t *nvroot = NULL;
 	char *poolname;
 	char *tname = NULL;
 	int ret = 1;
 	char *altroot = NULL;
+	char *compat = NULL;
 	char *mountpoint = NULL;
 	nvlist_t *fsprops = NULL;
 	nvlist_t *props = NULL;
@@ -1265,7 +1467,7 @@
 			dryrun = B_TRUE;
 			break;
 		case 'd':
-			enable_all_pool_feat = B_FALSE;
+			enable_pool_features = B_FALSE;
 			break;
 		case 'R':
 			altroot = optarg;
@@ -1303,11 +1505,14 @@
 				ver = strtoull(propval, &end, 10);
 				if (*end == '\0' &&
 				    ver < SPA_VERSION_FEATURES) {
-					enable_all_pool_feat = B_FALSE;
+					enable_pool_features = B_FALSE;
 				}
 			}
 			if (zpool_name_to_prop(optarg) == ZPOOL_PROP_ALTROOT)
 				altroot = propval;
+			if (zpool_name_to_prop(optarg) ==
+			    ZPOOL_PROP_COMPATIBILITY)
+				compat = propval;
 			break;
 		case 'O':
 			if ((propval = strchr(optarg, '=')) == NULL) {
@@ -1495,14 +1700,33 @@
 		    VDEV_ALLOC_BIAS_SPECIAL, 0);
 		print_vdev_tree(NULL, "logs", nvroot, 0,
 		    VDEV_ALLOC_BIAS_LOG, 0);
+		print_cache_list(nvroot, 0);
+		print_spare_list(nvroot, 0);
 
 		ret = 0;
 	} else {
 		/*
-		 * Hand off to libzfs.
+		 * Load in feature set.
+		 * Note: if compatibility property not given, we'll have
+		 * NULL, which means 'all features'.
 		 */
-		spa_feature_t i;
-		for (i = 0; i < SPA_FEATURES; i++) {
+		boolean_t requested_features[SPA_FEATURES];
+		if (zpool_do_load_compat(compat, requested_features) !=
+		    ZPOOL_COMPATIBILITY_OK)
+			goto errout;
+
+		/*
+		 * props contains list of features to enable.
+		 * For each feature:
+		 *  - remove it if feature@name=disabled
+		 *  - leave it there if feature@name=enabled
+		 *  - add it if:
+		 *    - enable_pool_features (ie: no '-d' or '-o version')
+		 *    - it's supported by the kernel module
+		 *    - it's in the requested feature set
+		 *  - warn if it's enabled but not in compat
+		 */
+		for (spa_feature_t i = 0; i < SPA_FEATURES; i++) {
 			char propname[MAXPATHLEN];
 			char *propval;
 			zfeature_info_t *feat = &spa_feature_table[i];
@@ -1510,17 +1734,22 @@
 			(void) snprintf(propname, sizeof (propname),
 			    "feature@%s", feat->fi_uname);
 
-			/*
-			 * Only features contained in props will be enabled:
-			 * remove from the nvlist every ZFS_FEATURE_DISABLED
-			 * value and add every missing ZFS_FEATURE_ENABLED if
-			 * enable_all_pool_feat is set.
-			 */
 			if (!nvlist_lookup_string(props, propname, &propval)) {
 				if (strcmp(propval, ZFS_FEATURE_DISABLED) == 0)
 					(void) nvlist_remove_all(props,
 					    propname);
-			} else if (enable_all_pool_feat) {
+				if (strcmp(propval,
+				    ZFS_FEATURE_ENABLED) == 0 &&
+				    !requested_features[i])
+					(void) fprintf(stderr, gettext(
+					    "Warning: feature \"%s\" enabled "
+					    "but is not in specified "
+					    "'compatibility' feature set.\n"),
+					    feat->fi_uname);
+			} else if (
+			    enable_pool_features &&
+			    feat->fi_zfs_mod_supported &&
+			    requested_features[i]) {
 				ret = add_prop_list(propname,
 				    ZFS_FEATURE_ENABLED, &props, B_TRUE);
 				if (ret != 0)
@@ -1534,8 +1763,10 @@
 			zfs_handle_t *pool = zfs_open(g_zfs,
 			    tname ? tname : poolname, ZFS_TYPE_FILESYSTEM);
 			if (pool != NULL) {
-				if (zfs_mount(pool, NULL, 0) == 0)
+				if (zfs_mount(pool, NULL, 0) == 0) {
 					ret = zfs_shareall(pool);
+					zfs_commit_all_shares();
+				}
 				zfs_close(pool);
 			}
 		} else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
@@ -1636,7 +1867,7 @@
 /*
  * Export one pool
  */
-int
+static int
 zpool_export_one(zpool_handle_t *zhp, void *data)
 {
 	export_cbdata_t *cb = data;
@@ -1707,7 +1938,7 @@
 		}
 
 		return (for_each_pool(argc, argv, B_TRUE, NULL,
-		    zpool_export_one, &cb));
+		    B_FALSE, zpool_export_one, &cb));
 	}
 
 	/* check arguments */
@@ -1716,7 +1947,8 @@
 		usage(B_FALSE);
 	}
 
-	ret = for_each_pool(argc, argv, B_TRUE, NULL, zpool_export_one, &cb);
+	ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_export_one,
+	    &cb);
 
 	return (ret);
 }
@@ -1873,7 +2105,7 @@
 			 * Mark empty values with dashes to make output
 			 * awk-able.
 			 */
-			if (is_blank_str(val))
+			if (val == NULL || is_blank_str(val))
 				val = "-";
 
 			printf("%*s", vcdl->uniq_cols_width[j], val);
@@ -1998,14 +2230,36 @@
 }
 
 /*
+ * Return the color associated with a health string.  This includes returning
+ * NULL for no color change.
+ */
+static char *
+health_str_to_color(const char *health)
+{
+	if (strcmp(health, gettext("FAULTED")) == 0 ||
+	    strcmp(health, gettext("SUSPENDED")) == 0 ||
+	    strcmp(health, gettext("UNAVAIL")) == 0) {
+		return (ANSI_RED);
+	}
+
+	if (strcmp(health, gettext("OFFLINE")) == 0 ||
+	    strcmp(health, gettext("DEGRADED")) == 0 ||
+	    strcmp(health, gettext("REMOVED")) == 0) {
+		return (ANSI_YELLOW);
+	}
+
+	return (NULL);
+}
+
+/*
  * Print out configuration state as requested by status_callback.
  */
 static void
 print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
-    nvlist_t *nv, int depth, boolean_t isspare)
+    nvlist_t *nv, int depth, boolean_t isspare, vdev_rebuild_stat_t *vrs)
 {
 	nvlist_t **child, *root;
-	uint_t c, children;
+	uint_t c, i, vsc, children;
 	pool_scan_stat_t *ps = NULL;
 	vdev_stat_t *vs;
 	char rbuf[6], wbuf[6], cbuf[6];
@@ -2015,13 +2269,14 @@
 	const char *state;
 	char *type;
 	char *path = NULL;
+	char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		children = 0;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
-	    (uint64_t **)&vs, &c) == 0);
+	    (uint64_t **)&vs, &vsc) == 0);
 
 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 
@@ -2029,34 +2284,54 @@
 		return;
 
 	state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
+
 	if (isspare) {
 		/*
 		 * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
 		 * online drives.
 		 */
 		if (vs->vs_aux == VDEV_AUX_SPARED)
-			state = "INUSE";
+			state = gettext("INUSE");
 		else if (vs->vs_state == VDEV_STATE_HEALTHY)
-			state = "AVAIL";
+			state = gettext("AVAIL");
 	}
 
-	(void) printf("\t%*s%-*s  %-8s", depth, "", cb->cb_namewidth - depth,
+	printf_color(health_str_to_color(state),
+	    "\t%*s%-*s  %-8s", depth, "", cb->cb_namewidth - depth,
 	    name, state);
 
 	if (!isspare) {
+		if (vs->vs_read_errors)
+			rcolor = ANSI_RED;
+
+		if (vs->vs_write_errors)
+			wcolor = ANSI_RED;
+
+		if (vs->vs_checksum_errors)
+			ccolor = ANSI_RED;
+
 		if (cb->cb_literal) {
-			printf(" %5llu %5llu %5llu",
-			    (u_longlong_t)vs->vs_read_errors,
-			    (u_longlong_t)vs->vs_write_errors,
+			printf(" ");
+			printf_color(rcolor, "%5llu",
+			    (u_longlong_t)vs->vs_read_errors);
+			printf(" ");
+			printf_color(wcolor, "%5llu",
+			    (u_longlong_t)vs->vs_write_errors);
+			printf(" ");
+			printf_color(ccolor, "%5llu",
 			    (u_longlong_t)vs->vs_checksum_errors);
 		} else {
 			zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
 			zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
 			zfs_nicenum(vs->vs_checksum_errors, cbuf,
 			    sizeof (cbuf));
-			printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+			printf(" ");
+			printf_color(rcolor, "%5s", rbuf);
+			printf(" ");
+			printf_color(wcolor, "%5s", wbuf);
+			printf(" ");
+			printf_color(ccolor, "%5s", cbuf);
 		}
-
 		if (cb->cb_print_slow_ios) {
 			if (children == 0)  {
 				/* Only leafs vdevs have slow IOs */
@@ -2071,16 +2346,15 @@
 			else
 				printf(" %5s", rbuf);
 		}
-
 	}
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
 	    &notpresent) == 0) {
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-		(void) printf("  was %s", path);
+		(void) printf("  %s %s", gettext("was"), path);
 	} else if (vs->vs_aux != 0) {
 		(void) printf("  ");
-
+		color_start(ANSI_RED);
 		switch (vs->vs_aux) {
 		case VDEV_AUX_OPEN_FAILED:
 			(void) printf(gettext("cannot open"));
@@ -2102,6 +2376,10 @@
 			(void) printf(gettext("unsupported feature(s)"));
 			break;
 
+		case VDEV_AUX_ASHIFT_TOO_BIG:
+			(void) printf(gettext("unsupported minimum blocksize"));
+			break;
+
 		case VDEV_AUX_SPARED:
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 			    &spare_cb.cb_guid) == 0);
@@ -2148,10 +2426,22 @@
 			(void) printf(gettext("all children offline"));
 			break;
 
+		case VDEV_AUX_BAD_LABEL:
+			(void) printf(gettext("invalid label"));
+			break;
+
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
 		}
+		color_end();
+	} else if (children == 0 && !isspare &&
+	    getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") == NULL &&
+	    VDEV_STAT_VALID(vs_physical_ashift, vsc) &&
+	    vs->vs_configured_ashift < vs->vs_physical_ashift) {
+		(void) printf(
+		    gettext("  block size: %dB configured, %dB native"),
+		    1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift);
 	}
 
 	/* The root vdev has the scrub/resilver stats */
@@ -2160,7 +2450,14 @@
 	(void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS,
 	    (uint64_t **)&ps, &c);
 
-	if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0) {
+	/*
+	 * If you force fault a drive that's resilvering, its scan stats can
+	 * get frozen in time, giving the false impression that it's
+	 * being resilvered.  That's why we check the state to see if the vdev
+	 * is healthy before reporting "resilvering" or "repairing".
+	 */
+	if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0 &&
+	    vs->vs_state == VDEV_STATE_HEALTHY) {
 		if (vs->vs_scan_processed != 0) {
 			(void) printf(gettext("  (%s)"),
 			    (ps->pss_func == POOL_SCAN_RESILVER) ?
@@ -2170,6 +2467,14 @@
 		}
 	}
 
+	/* The top-level vdevs have the rebuild stats */
+	if (vrs != NULL && vrs->vrs_state == VDEV_REBUILD_ACTIVE &&
+	    children == 0 && vs->vs_state == VDEV_STATE_HEALTHY) {
+		if (vs->vs_rebuild_processed != 0) {
+			(void) printf(gettext("  (resilvering)"));
+		}
+	}
+
 	if (cb->vcdl != NULL) {
 		if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
 			printf("  ");
@@ -2177,7 +2482,7 @@
 		}
 	}
 
-	/* Display vdev initialization and trim status for leaves */
+	/* Display vdev initialization and trim status for leaves. */
 	if (children == 0) {
 		print_status_initialize(vs, cb->cb_print_vdev_init);
 		print_status_trim(vs, cb->cb_print_vdev_trim);
@@ -2199,11 +2504,17 @@
 		if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS))
 			continue;
 
+		/* Provide vdev_rebuild_stats to children if available */
+		if (vrs == NULL) {
+			(void) nvlist_lookup_uint64_array(nv,
+			    ZPOOL_CONFIG_REBUILD_STATS,
+			    (uint64_t **)&vrs, &i);
+		}
+
 		vname = zpool_vdev_name(g_zfs, zhp, child[c],
 		    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
-
 		print_status_config(zhp, cb, vname, child[c], depth + 2,
-		    isspare);
+		    isspare, vrs);
 		free(vname);
 	}
 }
@@ -2268,6 +2579,10 @@
 			(void) printf(gettext("all children offline"));
 			break;
 
+		case VDEV_AUX_BAD_LABEL:
+			(void) printf(gettext("invalid label"));
+			break;
+
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
@@ -2372,7 +2687,7 @@
 		    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 		if (cb->cb_print_status)
 			print_status_config(zhp, cb, name, child[c], 2,
-			    B_FALSE);
+			    B_FALSE, NULL);
 		else
 			print_import_config(cb, name, child[c], 2);
 		free(name);
@@ -2382,8 +2697,8 @@
 /*
  * Display the status for the given pool.
  */
-static void
-show_import(nvlist_t *config)
+static int
+show_import(nvlist_t *config, boolean_t report_error)
 {
 	uint64_t pool_state;
 	vdev_stat_t *vs;
@@ -2415,6 +2730,13 @@
 
 	reason = zpool_import_status(config, &msgid, &errata);
 
+	/*
+	 * If we're importing using a cachefile, then we won't report any
+	 * errors unless we are in the scan phase of the import.
+	 */
+	if (reason != ZPOOL_STATUS_OK && !report_error)
+		return (reason);
+
 	(void) printf(gettext("   pool: %s\n"), name);
 	(void) printf(gettext("     id: %llu\n"), (u_longlong_t)guid);
 	(void) printf(gettext("  state: %s"), health);
@@ -2426,14 +2748,16 @@
 	case ZPOOL_STATUS_MISSING_DEV_R:
 	case ZPOOL_STATUS_MISSING_DEV_NR:
 	case ZPOOL_STATUS_BAD_GUID_SUM:
-		(void) printf(gettext(" status: One or more devices are "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices are "
 		    "missing from the system.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_LABEL_R:
 	case ZPOOL_STATUS_CORRUPT_LABEL_NR:
-		(void) printf(gettext(" status: One or more devices contains "
-		    "corrupted data.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices contains"
+		    " corrupted data.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_DATA:
@@ -2442,81 +2766,123 @@
 		break;
 
 	case ZPOOL_STATUS_OFFLINE_DEV:
-		(void) printf(gettext(" status: One or more devices "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices "
 		    "are offlined.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_POOL:
-		(void) printf(gettext(" status: The pool metadata is "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool metadata is "
 		    "corrupted.\n"));
 		break;
 
 	case ZPOOL_STATUS_VERSION_OLDER:
-		(void) printf(gettext(" status: The pool is formatted using a "
-		    "legacy on-disk version.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool is formatted using "
+		    "a legacy on-disk version.\n"));
 		break;
 
 	case ZPOOL_STATUS_VERSION_NEWER:
-		(void) printf(gettext(" status: The pool is formatted using an "
-		    "incompatible version.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool is formatted using "
+		    "an incompatible version.\n"));
 		break;
 
 	case ZPOOL_STATUS_FEAT_DISABLED:
-		(void) printf(gettext(" status: Some supported features are "
-		    "not enabled on the pool.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("Some supported "
+		    "features are not enabled on the pool.\n\t"
+		    "(Note that they may be intentionally disabled "
+		    "if the\n\t'compatibility' property is set.)\n"));
+		break;
+
+	case ZPOOL_STATUS_COMPATIBILITY_ERR:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("Error reading or parsing "
+		    "the file(s) indicated by the 'compatibility'\n"
+		    "property.\n"));
+		break;
+
+	case ZPOOL_STATUS_INCOMPATIBLE_FEAT:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more features "
+		    "are enabled on the pool despite not being\n"
+		    "requested by the 'compatibility' property.\n"));
 		break;
 
 	case ZPOOL_STATUS_UNSUP_FEAT_READ:
-		(void) printf(gettext("status: The pool uses the following "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool uses the following "
 		    "feature(s) not supported on this system:\n"));
+		color_start(ANSI_YELLOW);
 		zpool_print_unsup_feat(config);
+		color_end();
 		break;
 
 	case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
-		(void) printf(gettext("status: The pool can only be accessed "
-		    "in read-only mode on this system. It\n\tcannot be "
-		    "accessed in read-write mode because it uses the "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool can only be "
+		    "accessed in read-only mode on this system. It\n\tcannot be"
+		    " accessed in read-write mode because it uses the "
 		    "following\n\tfeature(s) not supported on this system:\n"));
+		color_start(ANSI_YELLOW);
 		zpool_print_unsup_feat(config);
+		color_end();
 		break;
 
 	case ZPOOL_STATUS_HOSTID_ACTIVE:
-		(void) printf(gettext(" status: The pool is currently "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool is currently "
 		    "imported by another system.\n"));
 		break;
 
 	case ZPOOL_STATUS_HOSTID_REQUIRED:
-		(void) printf(gettext(" status: The pool has the "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool has the "
 		    "multihost property on.  It cannot\n\tbe safely imported "
 		    "when the system hostid is not set.\n"));
 		break;
 
 	case ZPOOL_STATUS_HOSTID_MISMATCH:
-		(void) printf(gettext(" status: The pool was last accessed by "
-		    "another system.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool was last accessed "
+		    "by another system.\n"));
 		break;
 
 	case ZPOOL_STATUS_FAULTED_DEV_R:
 	case ZPOOL_STATUS_FAULTED_DEV_NR:
-		(void) printf(gettext(" status: One or more devices are "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices are "
 		    "faulted.\n"));
 		break;
 
 	case ZPOOL_STATUS_BAD_LOG:
-		(void) printf(gettext(" status: An intent log record cannot be "
-		    "read.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("An intent log record cannot "
+		    "be read.\n"));
 		break;
 
 	case ZPOOL_STATUS_RESILVERING:
-		(void) printf(gettext(" status: One or more devices were being "
-		    "resilvered.\n"));
+	case ZPOOL_STATUS_REBUILDING:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices were "
+		    "being resilvered.\n"));
 		break;
 
 	case ZPOOL_STATUS_ERRATA:
-		(void) printf(gettext(" status: Errata #%d detected.\n"),
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("Errata #%d detected.\n"),
 		    errata);
 		break;
 
+	case ZPOOL_STATUS_NON_NATIVE_ASHIFT:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices are "
+		    "configured to use a non-native block size.\n"
+		    "\tExpect reduced performance.\n"));
+		break;
+
 	default:
 		/*
 		 * No other status can be seen when importing pools.
@@ -2534,6 +2900,12 @@
 			    "imported using its name or numeric identifier, "
 			    "though\n\tsome features will not be available "
 			    "without an explicit 'zpool upgrade'.\n"));
+		} else if (reason == ZPOOL_STATUS_COMPATIBILITY_ERR) {
+			(void) printf(gettext(" action: The pool can be "
+			    "imported using its name or numeric\n\tidentifier, "
+			    "though the file(s) indicated by its "
+			    "'compatibility'\n\tproperty cannot be parsed at "
+			    "this time.\n"));
 		} else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) {
 			(void) printf(gettext(" action: The pool can be "
 			    "imported using its name or numeric "
@@ -2608,13 +2980,15 @@
 			    "backup.\n"));
 			break;
 		case ZPOOL_STATUS_UNSUP_FEAT_READ:
-			(void) printf(gettext("action: The pool cannot be "
+			printf_color(ANSI_BOLD, gettext("action: "));
+			printf_color(ANSI_YELLOW, gettext("The pool cannot be "
 			    "imported. Access the pool on a system that "
 			    "supports\n\tthe required feature(s), or recreate "
 			    "the pool from backup.\n"));
 			break;
 		case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
-			(void) printf(gettext("action: The pool cannot be "
+			printf_color(ANSI_BOLD, gettext("action: "));
+			printf_color(ANSI_YELLOW, gettext("The pool cannot be "
 			    "imported in read-write mode. Import the pool "
 			    "with\n"
 			    "\t\"-o readonly=on\", access the pool on a system "
@@ -2675,9 +3049,11 @@
 			    "the '-f' flag.\n"));
 	}
 
-	if (msgid != NULL)
-		(void) printf(gettext("   see: http://zfsonlinux.org/msg/%s\n"),
+	if (msgid != NULL) {
+		(void) printf(gettext(
+		    "   see: https://openzfs.github.io/openzfs-docs/msg/%s\n"),
 		    msgid);
+	}
 
 	(void) printf(gettext(" config:\n\n"));
 
@@ -2697,6 +3073,7 @@
 		    "be part of this pool, though their\n\texact "
 		    "configuration cannot be determined.\n"));
 	}
+	return (0);
 }
 
 static boolean_t
@@ -2835,6 +3212,121 @@
 	return (ret);
 }
 
+static int
+import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags,
+    char *orig_name, char *new_name,
+    boolean_t do_destroyed, boolean_t pool_specified, boolean_t do_all,
+    importargs_t *import)
+{
+	nvlist_t *config = NULL;
+	nvlist_t *found_config = NULL;
+	uint64_t pool_state;
+
+	/*
+	 * At this point we have a list of import candidate configs. Even if
+	 * we were searching by pool name or guid, we still need to
+	 * post-process the list to deal with pool state and possible
+	 * duplicate names.
+	 */
+	int err = 0;
+	nvpair_t *elem = NULL;
+	boolean_t first = B_TRUE;
+	while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
+
+		verify(nvpair_value_nvlist(elem, &config) == 0);
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+		    &pool_state) == 0);
+		if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
+			continue;
+		if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
+			continue;
+
+		verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
+		    import->policy) == 0);
+
+		if (!pool_specified) {
+			if (first)
+				first = B_FALSE;
+			else if (!do_all)
+				(void) printf("\n");
+
+			if (do_all) {
+				err |= do_import(config, NULL, mntopts,
+				    props, flags);
+			} else {
+				/*
+				 * If we're importing from cachefile, then
+				 * we don't want to report errors until we
+				 * are in the scan phase of the import. If
+				 * we get an error, then we return that error
+				 * to invoke the scan phase.
+				 */
+				if (import->cachefile && !import->scan)
+					err = show_import(config, B_FALSE);
+				else
+					(void) show_import(config, B_TRUE);
+			}
+		} else if (import->poolname != NULL) {
+			char *name;
+
+			/*
+			 * We are searching for a pool based on name.
+			 */
+			verify(nvlist_lookup_string(config,
+			    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
+
+			if (strcmp(name, import->poolname) == 0) {
+				if (found_config != NULL) {
+					(void) fprintf(stderr, gettext(
+					    "cannot import '%s': more than "
+					    "one matching pool\n"),
+					    import->poolname);
+					(void) fprintf(stderr, gettext(
+					    "import by numeric ID instead\n"));
+					err = B_TRUE;
+				}
+				found_config = config;
+			}
+		} else {
+			uint64_t guid;
+
+			/*
+			 * Search for a pool by guid.
+			 */
+			verify(nvlist_lookup_uint64(config,
+			    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
+
+			if (guid == import->guid)
+				found_config = config;
+		}
+	}
+
+	/*
+	 * If we were searching for a specific pool, verify that we found a
+	 * pool, and then do the import.
+	 */
+	if (pool_specified && err == 0) {
+		if (found_config == NULL) {
+			(void) fprintf(stderr, gettext("cannot import '%s': "
+			    "no such pool available\n"), orig_name);
+			err = B_TRUE;
+		} else {
+			err |= do_import(found_config, new_name,
+			    mntopts, props, flags);
+		}
+	}
+
+	/*
+	 * If we were just looking for pools, report an error if none were
+	 * found.
+	 */
+	if (!pool_specified && first)
+		(void) fprintf(stderr,
+		    gettext("no pools available to import\n"));
+	return (err);
+}
+
 typedef struct target_exists_args {
 	const char	*poolname;
 	uint64_t	poolguid;
@@ -2876,28 +3368,36 @@
  *       -d         Discard the checkpoint from a checkpointed
  *       --discard  pool.
  *
+ *       -w         Wait for discarding a checkpoint to complete.
+ *       --wait
+ *
  * Checkpoints the specified pool, by taking a "snapshot" of its
  * current state. A pool can only have one checkpoint at a time.
  */
 int
 zpool_do_checkpoint(int argc, char **argv)
 {
-	boolean_t discard;
+	boolean_t discard, wait;
 	char *pool;
 	zpool_handle_t *zhp;
 	int c, err;
 
 	struct option long_options[] = {
 		{"discard", no_argument, NULL, 'd'},
+		{"wait", no_argument, NULL, 'w'},
 		{0, 0, 0, 0}
 	};
 
 	discard = B_FALSE;
-	while ((c = getopt_long(argc, argv, ":d", long_options, NULL)) != -1) {
+	wait = B_FALSE;
+	while ((c = getopt_long(argc, argv, ":dw", long_options, NULL)) != -1) {
 		switch (c) {
 		case 'd':
 			discard = B_TRUE;
 			break;
+		case 'w':
+			wait = B_TRUE;
+			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
@@ -2905,6 +3405,12 @@
 		}
 	}
 
+	if (wait && !discard) {
+		(void) fprintf(stderr, gettext("--wait only valid when "
+		    "--discard also specified\n"));
+		usage(B_FALSE);
+	}
+
 	argc -= optind;
 	argv += optind;
 
@@ -2930,10 +3436,13 @@
 		return (1);
 	}
 
-	if (discard)
+	if (discard) {
 		err = (zpool_discard_checkpoint(zhp) != 0);
-	else
+		if (err == 0 && wait)
+			err = zpool_wait(zhp, ZPOOL_WAIT_CKPT_DISCARD);
+	} else {
 		err = (zpool_checkpoint(zhp) != 0);
+	}
 
 	zpool_close(zhp);
 
@@ -2945,51 +3454,54 @@
 /*
  * zpool import [-d dir] [-D]
  *       import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
- *              [-d dir | -c cachefile] [-f] -a
+ *              [-d dir | -c cachefile | -s] [-f] -a
  *       import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
- *              [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
+ *              [-d dir | -c cachefile | -s] [-f] [-n] [-F] <pool | id>
+ *              [newpool]
  *
- *	 -c	Read pool information from a cachefile instead of searching
- *		devices.
+ *	-c	Read pool information from a cachefile instead of searching
+ *		devices. If importing from a cachefile config fails, then
+ *		fallback to searching for devices only in the directories that
+ *		exist in the cachefile.
  *
- *       -d	Scan in a specific directory, other than /dev/.  More than
+ *	-d	Scan in a specific directory, other than /dev/.  More than
  *		one directory can be specified using multiple '-d' options.
  *
- *       -D     Scan for previously destroyed pools or import all or only
- *              specified destroyed pools.
+ *	-D	Scan for previously destroyed pools or import all or only
+ *		specified destroyed pools.
  *
- *       -R	Temporarily import the pool, with all mountpoints relative to
+ *	-R	Temporarily import the pool, with all mountpoints relative to
  *		the given root.  The pool will remain exported when the machine
  *		is rebooted.
  *
- *       -V	Import even in the presence of faulted vdevs.  This is an
- *       	intentionally undocumented option for testing purposes, and
- *       	treats the pool configuration as complete, leaving any bad
+ *	-V	Import even in the presence of faulted vdevs.  This is an
+ *		intentionally undocumented option for testing purposes, and
+ *		treats the pool configuration as complete, leaving any bad
  *		vdevs in the FAULTED state. In other words, it does verbatim
  *		import.
  *
- *       -f	Force import, even if it appears that the pool is active.
+ *	-f	Force import, even if it appears that the pool is active.
  *
- *       -F     Attempt rewind if necessary.
+ *	-F	Attempt rewind if necessary.
  *
- *       -n     See if rewind would work, but don't actually rewind.
+ *	-n	See if rewind would work, but don't actually rewind.
  *
- *       -N     Import the pool but don't mount datasets.
+ *	-N	Import the pool but don't mount datasets.
  *
- *       -T     Specify a starting txg to use for import. This option is
- *       	intentionally undocumented option for testing purposes.
+ *	-T	Specify a starting txg to use for import. This option is
+ *		intentionally undocumented option for testing purposes.
  *
- *       -a	Import all pools found.
+ *	-a	Import all pools found.
  *
- *       -l	Load encryption keys while importing.
+ *	-l	Load encryption keys while importing.
  *
- *       -o	Set property=value and/or temporary mount options (without '=').
+ *	-o	Set property=value and/or temporary mount options (without '=').
  *
- *	 -s	Scan using the default search path, the libblkid cache will
- *	        not be consulted.
+ *	-s	Scan using the default search path, the libblkid cache will
+ *		not be consulted.
  *
- *       --rewind-to-checkpoint
- *       	Import the pool and revert back to the checkpoint.
+ *	--rewind-to-checkpoint
+ *		Import the pool and revert back to the checkpoint.
  *
  * The import command scans for pools to import, and import pools based on pool
  * name and GUID.  The pool can also be renamed as part of the import process.
@@ -3006,15 +3518,11 @@
 	boolean_t do_all = B_FALSE;
 	boolean_t do_destroyed = B_FALSE;
 	char *mntopts = NULL;
-	nvpair_t *elem;
-	nvlist_t *config;
 	uint64_t searchguid = 0;
 	char *searchname = NULL;
 	char *propval;
-	nvlist_t *found_config;
 	nvlist_t *policy = NULL;
 	nvlist_t *props = NULL;
-	boolean_t first;
 	int flags = ZFS_IMPORT_NORMAL;
 	uint32_t rewind_policy = ZPOOL_NO_REWIND;
 	boolean_t dryrun = B_FALSE;
@@ -3022,7 +3530,8 @@
 	boolean_t xtreme_rewind = B_FALSE;
 	boolean_t do_scan = B_FALSE;
 	boolean_t pool_exists = B_FALSE;
-	uint64_t pool_state, txg = -1ULL;
+	boolean_t pool_specified = B_FALSE;
+	uint64_t txg = -1ULL;
 	char *cachefile = NULL;
 	importargs_t idata = { 0 };
 	char *endptr;
@@ -3144,6 +3653,11 @@
 		usage(B_FALSE);
 	}
 
+	if (cachefile && do_scan) {
+		(void) fprintf(stderr, gettext("-c is incompatible with -s\n"));
+		usage(B_FALSE);
+	}
+
 	if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) {
 		(void) fprintf(stderr, gettext("-l is incompatible with -N\n"));
 		usage(B_FALSE);
@@ -3224,7 +3738,7 @@
 			searchname = argv[0];
 			searchguid = 0;
 		}
-		found_config = NULL;
+		pool_specified = B_TRUE;
 
 		/*
 		 * User specified a name or guid.  Ensure it's unique.
@@ -3303,98 +3817,35 @@
 		return (1);
 	}
 
+	err = import_pools(pools, props, mntopts, flags,
+	    argc >= 1 ? argv[0] : NULL,
+	    argc >= 2 ? argv[1] : NULL,
+	    do_destroyed, pool_specified, do_all, &idata);
+
 	/*
-	 * At this point we have a list of import candidate configs. Even if
-	 * we were searching by pool name or guid, we still need to
-	 * post-process the list to deal with pool state and possible
-	 * duplicate names.
+	 * If we're using the cachefile and we failed to import, then
+	 * fallback to scanning the directory for pools that match
+	 * those in the cachefile.
 	 */
-	err = 0;
-	elem = NULL;
-	first = B_TRUE;
-	while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
+	if (err != 0 && cachefile != NULL) {
+		(void) printf(gettext("cachefile import failed, retrying\n"));
 
-		verify(nvpair_value_nvlist(elem, &config) == 0);
+		/*
+		 * We use the scan flag to gather the directories that exist
+		 * in the cachefile. If we need to fallback to searching for
+		 * the pool config, we will only search devices in these
+		 * directories.
+		 */
+		idata.scan = B_TRUE;
+		nvlist_free(pools);
+		pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops);
 
-		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-		    &pool_state) == 0);
-		if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
-			continue;
-		if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
-			continue;
-
-		verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
-		    policy) == 0);
-
-		if (argc == 0) {
-			if (first)
-				first = B_FALSE;
-			else if (!do_all)
-				(void) printf("\n");
-
-			if (do_all) {
-				err |= do_import(config, NULL, mntopts,
-				    props, flags);
-			} else {
-				show_import(config);
-			}
-		} else if (searchname != NULL) {
-			char *name;
-
-			/*
-			 * We are searching for a pool based on name.
-			 */
-			verify(nvlist_lookup_string(config,
-			    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
-
-			if (strcmp(name, searchname) == 0) {
-				if (found_config != NULL) {
-					(void) fprintf(stderr, gettext(
-					    "cannot import '%s': more than "
-					    "one matching pool\n"), searchname);
-					(void) fprintf(stderr, gettext(
-					    "import by numeric ID instead\n"));
-					err = B_TRUE;
-				}
-				found_config = config;
-			}
-		} else {
-			uint64_t guid;
-
-			/*
-			 * Search for a pool by guid.
-			 */
-			verify(nvlist_lookup_uint64(config,
-			    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
-
-			if (guid == searchguid)
-				found_config = config;
-		}
+		err = import_pools(pools, props, mntopts, flags,
+		    argc >= 1 ? argv[0] : NULL,
+		    argc >= 2 ? argv[1] : NULL,
+		    do_destroyed, pool_specified, do_all, &idata);
 	}
 
-	/*
-	 * If we were searching for a specific pool, verify that we found a
-	 * pool, and then do the import.
-	 */
-	if (argc != 0 && err == 0) {
-		if (found_config == NULL) {
-			(void) fprintf(stderr, gettext("cannot import '%s': "
-			    "no such pool available\n"), argv[0]);
-			err = B_TRUE;
-		} else {
-			err |= do_import(found_config, argc == 1 ? NULL :
-			    argv[1], mntopts, props, flags);
-		}
-	}
-
-	/*
-	 * If we were just looking for pools, report an error if none were
-	 * found.
-	 */
-	if (argc == 0 && first)
-		(void) fprintf(stderr,
-		    gettext("no pools available to import\n"));
-
 error:
 	nvlist_free(props);
 	nvlist_free(pools);
@@ -3441,7 +3892,8 @@
 	argv += optind;
 
 	/* if argc == 0 we will execute zpool_sync_one on all pools */
-	ret = for_each_pool(argc, argv, B_FALSE, NULL, zpool_sync_one, &force);
+	ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, zpool_sync_one,
+	    &force);
 
 	return (ret);
 }
@@ -3575,7 +4027,7 @@
  * If force_column_width is set, use it for the column width.  If not set, use
  * the default column width.
  */
-void
+static void
 print_iostat_labels(iostat_cbdata_t *cb, unsigned int force_column_width,
     const name_and_columns_t labels[][IOSTAT_MAX_LABELS])
 {
@@ -3647,7 +4099,7 @@
  *     sdc         -      -      0      0      5    473  val1  val2
  * ----------  -----  -----  -----  -----  -----  -----  ----  ----
  */
-void
+static void
 print_cmd_columns(vdev_cmd_data_list_t *vcdl, int use_dashes)
 {
 	int i, j;
@@ -3667,7 +4119,7 @@
 			for (j = 0; j < vcdl->uniq_cols_width[i]; j++)
 				printf("-");
 		} else {
-			printf("%*s", vcdl->uniq_cols_width[i],
+			printf_color(ANSI_BOLD, "%*s", vcdl->uniq_cols_width[i],
 			    vcdl->uniq_cols[i]);
 		}
 	}
@@ -3765,6 +4217,8 @@
 	unsigned int namewidth;
 	const char *title;
 
+	color_start(ANSI_BOLD);
+
 	if (cb->cb_flags & IOS_ANYHISTO_M) {
 		title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)];
 	} else if (cb->cb_vdev_names_count) {
@@ -3798,6 +4252,8 @@
 	if (cb->vcdl != NULL)
 		print_cmd_columns(cb->vcdl, 1);
 
+	color_end();
+
 	printf("\n");
 }
 
@@ -3807,6 +4263,37 @@
 	print_iostat_header_impl(cb, 0, NULL);
 }
 
+/*
+ * Prints a size string (i.e. 120M) with the suffix ("M") colored
+ * by order of magnitude. Uses column_size to add padding.
+ */
+static void
+print_stat_color(const char *statbuf, unsigned int column_size)
+{
+	fputs("  ", stdout);
+	size_t len = strlen(statbuf);
+	while (len < column_size) {
+		fputc(' ', stdout);
+		column_size--;
+	}
+	if (*statbuf == '0') {
+		color_start(ANSI_GRAY);
+		fputc('0', stdout);
+	} else {
+		for (; *statbuf; statbuf++) {
+			if (*statbuf == 'K') color_start(ANSI_GREEN);
+			else if (*statbuf == 'M') color_start(ANSI_YELLOW);
+			else if (*statbuf == 'G') color_start(ANSI_RED);
+			else if (*statbuf == 'T') color_start(ANSI_BOLD_BLUE);
+			else if (*statbuf == 'P') color_start(ANSI_MAGENTA);
+			else if (*statbuf == 'E') color_start(ANSI_CYAN);
+			fputc(*statbuf, stdout);
+			if (--column_size <= 0)
+				break;
+		}
+	}
+	color_end();
+}
 
 /*
  * Display a single statistic.
@@ -3822,7 +4309,7 @@
 	if (scripted)
 		printf("\t%s", buf);
 	else
-		printf("  %*s", column_size, buf);
+		print_stat_color(buf, column_size);
 }
 
 /*
@@ -4259,11 +4746,11 @@
 	uint64_t tdelta;
 	double scale;
 
-	calcvs = safe_malloc(sizeof (*calcvs));
-
 	if (strcmp(name, VDEV_TYPE_INDIRECT) == 0)
 		return (ret);
 
+	calcvs = safe_malloc(sizeof (*calcvs));
+
 	if (oldnv != NULL) {
 		verify(nvlist_lookup_uint64_array(oldnv,
 		    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0);
@@ -4392,7 +4879,7 @@
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
-		    cb->cb_name_flags);
+		    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 		ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 		    newchild[c], cb, depth + 2);
 		free(vname);
@@ -4435,7 +4922,7 @@
 			}
 
 			vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
-			    cb->cb_name_flags);
+			    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 			ret += print_vdev_stats(zhp, vname, oldnv ?
 			    oldchild[c] : NULL, newchild[c], cb, depth + 2);
 			free(vname);
@@ -4497,7 +4984,7 @@
 /*
  * Callback to print out the iostats for the given pool.
  */
-int
+static int
 print_iostat(zpool_handle_t *zhp, void *data)
 {
 	iostat_cbdata_t *cb = data;
@@ -4590,7 +5077,7 @@
 	/*
 	 * Determine if the last argument is an integer or a pool name
 	 */
-	if (argc > 0 && isnumber(argv[argc - 1])) {
+	if (argc > 0 && zfs_isnumber(argv[argc - 1])) {
 		char *end;
 
 		errno = 0;
@@ -4598,8 +5085,8 @@
 
 		if (*end == '\0' && errno == 0) {
 			if (interval == 0) {
-				(void) fprintf(stderr, gettext("interval "
-				    "cannot be zero\n"));
+				(void) fprintf(stderr, gettext(
+				    "interval cannot be zero\n"));
 				usage(B_FALSE);
 			}
 			/*
@@ -4620,7 +5107,7 @@
 	 * If the last argument is also an integer, then we have both a count
 	 * and an interval.
 	 */
-	if (argc > 0 && isnumber(argv[argc - 1])) {
+	if (argc > 0 && zfs_isnumber(argv[argc - 1])) {
 		char *end;
 
 		errno = 0;
@@ -4629,8 +5116,8 @@
 
 		if (*end == '\0' && errno == 0) {
 			if (interval == 0) {
-				(void) fprintf(stderr, gettext("interval "
-				    "cannot be zero\n"));
+				(void) fprintf(stderr, gettext(
+				    "interval cannot be zero\n"));
 				usage(B_FALSE);
 			}
 
@@ -4732,11 +5219,12 @@
  * Return 1 if cb_data->cb_vdev_names[0] is this vdev's name, 0 otherwise.
  */
 static int
-is_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_data)
+is_vdev_cb(void *zhp_data, nvlist_t *nv, void *cb_data)
 {
 	iostat_cbdata_t *cb = cb_data;
 	char *name = NULL;
 	int ret = 0;
+	zpool_handle_t *zhp = zhp_data;
 
 	name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags);
 
@@ -4786,7 +5274,7 @@
 
 		/* Is this name a vdev in our pools? */
 		ret = for_each_pool(pool_count, &pool_name, B_TRUE, NULL,
-		    is_vdev, cb);
+		    B_FALSE, is_vdev, cb);
 		if (!ret) {
 			/* No match */
 			break;
@@ -4814,7 +5302,8 @@
 static int
 is_pool(char *name)
 {
-	return (for_each_pool(0, NULL, B_TRUE, NULL,  is_pool_cb, name));
+	return (for_each_pool(0, NULL, B_TRUE, NULL, B_FALSE, is_pool_cb,
+	    name));
 }
 
 /* Are all our argv[] strings pool names?  If so return 1, 0 otherwise. */
@@ -4910,6 +5399,24 @@
 }
 
 /*
+ * Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or
+ * if we were unable to determine its size.
+ */
+static int
+terminal_height(void)
+{
+	struct winsize win;
+
+	if (isatty(STDOUT_FILENO) == 0)
+		return (-1);
+
+	if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) != -1 && win.ws_row > 0)
+		return (win.ws_row);
+
+	return (-1);
+}
+
+/*
  * Run one of the zpool status/iostat -c scripts with the help (-h) option and
  * print the result.
  *
@@ -4954,7 +5461,13 @@
 	if ((dir = opendir(dirpath)) != NULL) {
 		/* print all the files and directories within directory */
 		while ((ent = readdir(dir)) != NULL) {
-			sprintf(fullpath, "%s/%s", dirpath, ent->d_name);
+			if (snprintf(fullpath, sizeof (fullpath), "%s/%s",
+			    dirpath, ent->d_name) >= sizeof (fullpath)) {
+				(void) fprintf(stderr,
+				    gettext("internal error: "
+				    "ZPOOL_SCRIPTS_PATH too large.\n"));
+				exit(1);
+			}
 
 			/* Print the scripts */
 			if (stat(fullpath, &dir_stat) == 0)
@@ -4993,22 +5506,48 @@
 /*
  * Set the minimum pool/vdev name column width.  The width must be at least 10,
  * but may be as large as the column width - 42 so it still fits on one line.
+ * NOTE: 42 is the width of the default capacity/operations/bandwidth output
  */
 static int
 get_namewidth_iostat(zpool_handle_t *zhp, void *data)
 {
 	iostat_cbdata_t *cb = data;
-	int width, columns;
+	int width, available_width;
 
-	width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags,
-	    cb->cb_verbose);
-	columns = get_columns();
+	/*
+	 * get_namewidth() returns the maximum width of any name in that column
+	 * for any pool/vdev/device line that will be output.
+	 */
+	width = get_namewidth(zhp, cb->cb_namewidth,
+	    cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose);
 
+	/*
+	 * The width we are calculating is the width of the header and also the
+	 * padding width for names that are less than maximum width.  The stats
+	 * take up 42 characters, so the width available for names is:
+	 */
+	available_width = get_columns() - 42;
+
+	/*
+	 * If the maximum width fits on a screen, then great!  Make everything
+	 * line up by justifying all lines to the same width.  If that max
+	 * width is larger than what's available, the name plus stats won't fit
+	 * on one line, and justifying to that width would cause every line to
+	 * wrap on the screen.  We only want lines with long names to wrap.
+	 * Limit the padding to what won't wrap.
+	 */
+	if (width > available_width)
+		width = available_width;
+
+	/*
+	 * And regardless of whatever the screen width is (get_columns can
+	 * return 0 if the width is not known or less than 42 for a narrow
+	 * terminal) have the width be a minimum of 10.
+	 */
 	if (width < 10)
 		width = 10;
-	if (width > columns - 42)
-		width = columns - 42;
 
+	/* Save the calculated width */
 	cb->cb_namewidth = width;
 
 	return (0);
@@ -5049,7 +5588,6 @@
 	int npools;
 	float interval = 0;
 	unsigned long count = 0;
-	struct winsize win;
 	int winheight = 24;
 	zpool_list_t *list;
 	boolean_t verbose = B_FALSE;
@@ -5223,7 +5761,7 @@
 	 * Construct the list of all interesting pools.
 	 */
 	ret = 0;
-	if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL)
+	if ((list = pool_list_get(argc, argv, NULL, parsable, &ret)) == NULL)
 		return (1);
 
 	if (pool_list_count(list) == 0 && argc != 0) {
@@ -5337,25 +5875,19 @@
 				cb.vcdl = NULL;
 			}
 
-			/*
-			 * Are we connected to TTY? If not, headers_once
-			 * should be true, to avoid breaking scripts.
-			 */
-			if (isatty(fileno(stdout)) == 0)
-				headers_once = B_TRUE;
 
 			/*
 			 * Check terminal size so we can print headers
 			 * even when terminal window has its height
 			 * changed.
 			 */
-			if (headers_once == B_FALSE) {
-				if (ioctl(1, TIOCGWINSZ, &win) != -1 &&
-				    win.ws_row > 0)
-					winheight = win.ws_row;
-				else
-					headers_once = B_TRUE;
-			}
+			winheight = terminal_height();
+			/*
+			 * Are we connected to TTY? If not, headers_once
+			 * should be true, to avoid breaking scripts.
+			 */
+			if (winheight < 0)
+				headers_once = B_TRUE;
 
 			/*
 			 * If it's the first time and we're not skipping it,
@@ -5563,6 +6095,7 @@
 	size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL);
 
 	switch (prop) {
+	case ZPOOL_PROP_SIZE:
 	case ZPOOL_PROP_EXPANDSZ:
 	case ZPOOL_PROP_CHECKPOINT:
 	case ZPOOL_PROP_DEDUPRATIO:
@@ -5595,7 +6128,7 @@
 		break;
 	case ZPOOL_PROP_HEALTH:
 		width = 8;
-		snprintf(propval, sizeof (propval), "%-*s", (int)width, str);
+		(void) strlcpy(propval, str, sizeof (propval));
 		break;
 	default:
 		zfs_nicenum_format(value, propval, sizeof (propval), format);
@@ -5614,7 +6147,7 @@
  * print static default line per vdev
  * not compatible with '-o' <proplist> option
  */
-void
+static void
 print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
     list_cbdata_t *cb, int depth, boolean_t isspare)
 {
@@ -5658,8 +6191,12 @@
 		 * 'toplevel' boolean value is passed to the print_one_column()
 		 * to indicate that the value is valid.
 		 */
-		print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, NULL, scripted,
-		    toplevel, format);
+		if (vs->vs_pspace)
+			print_one_column(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL,
+			    scripted, B_TRUE, format);
+		else
+			print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, NULL,
+			    scripted, toplevel, format);
 		print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL,
 		    scripted, toplevel, format);
 		print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc,
@@ -5710,7 +6247,7 @@
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, zhp, child[c],
-		    cb->cb_name_flags);
+		    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 		print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE);
 		free(vname);
 	}
@@ -5744,7 +6281,7 @@
 				printed = B_TRUE;
 			}
 			vname = zpool_vdev_name(g_zfs, zhp, child[c],
-			    cb->cb_name_flags);
+			    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 			print_list_stats(zhp, vname, child[c], cb, depth + 2,
 			    B_FALSE);
 			free(vname);
@@ -5781,7 +6318,7 @@
 /*
  * Generic callback function to list a pool.
  */
-int
+static int
 list_callback(zpool_handle_t *zhp, void *data)
 {
 	list_cbdata_t *cbp = data;
@@ -5810,8 +6347,8 @@
 	list_cbdata_t *cb = data;
 	int width;
 
-	width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags,
-	    cb->cb_verbose);
+	width = get_namewidth(zhp, cb->cb_namewidth,
+	    cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose);
 
 	if (width < 9)
 		width = 9;
@@ -5903,7 +6440,7 @@
 
 	for (;;) {
 		if ((list = pool_list_get(argc, argv, &cb.cb_proplist,
-		    &ret)) == NULL)
+		    cb.cb_literal, &ret)) == NULL)
 			return (1);
 
 		if (pool_list_count(list) == 0)
@@ -5945,6 +6482,8 @@
 zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 {
 	boolean_t force = B_FALSE;
+	boolean_t rebuild = B_FALSE;
+	boolean_t wait = B_FALSE;
 	int c;
 	nvlist_t *nvroot;
 	char *poolname, *old_disk, *new_disk;
@@ -5954,7 +6493,7 @@
 	int ret;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "fo:")) != -1) {
+	while ((c = getopt(argc, argv, "fo:sw")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
@@ -5972,6 +6511,12 @@
 			    (add_prop_list(optarg, propval, &props, B_TRUE)))
 				usage(B_FALSE);
 			break;
+		case 's':
+			rebuild = B_TRUE;
+			break;
+		case 'w':
+			wait = B_TRUE;
+			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
@@ -6053,7 +6598,12 @@
 		return (1);
 	}
 
-	ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
+	ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing,
+	    rebuild);
+
+	if (ret == 0 && wait)
+		ret = zpool_wait(zhp,
+		    replacing ? ZPOOL_WAIT_REPLACE : ZPOOL_WAIT_RESILVER);
 
 	nvlist_free(props);
 	nvlist_free(nvroot);
@@ -6063,9 +6613,12 @@
 }
 
 /*
- * zpool replace [-f] <pool> <device> <new_device>
+ * zpool replace [-fsw] [-o property=value] <pool> <device> <new_device>
  *
  *	-f	Force attach, even if <new_device> appears to be in use.
+ *	-s	Use sequential instead of healing reconstruction for resilver.
+ *	-o	Set property=value.
+ *	-w	Wait for replacing to complete before returning
  *
  * Replace <device> with <new_device>.
  */
@@ -6077,10 +6630,12 @@
 }
 
 /*
- * zpool attach [-f] [-o property=value] <pool> <device> <new_device>
+ * zpool attach [-fsw] [-o property=value] <pool> <device> <new_device>
  *
  *	-f	Force attach, even if <new_device> appears to be in use.
+ *	-s	Use sequential instead of healing reconstruction for resilver.
  *	-o	Set property=value.
+ *	-w	Wait for resilvering to complete before returning
  *
  * Attach <new_device> to the mirror containing <device>.  If <device> is not
  * part of a mirror, then <device> will be transformed into a mirror of
@@ -6285,6 +6840,10 @@
 			    "following layout:\n\n"), newpool);
 			print_vdev_tree(NULL, newpool, config, 0, "",
 			    flags.name_flags);
+			print_vdev_tree(NULL, "dedup", config, 0,
+			    VDEV_ALLOC_BIAS_DEDUP, 0);
+			print_vdev_tree(NULL, "special", config, 0,
+			    VDEV_ALLOC_BIAS_SPECIAL, 0);
 		}
 	}
 
@@ -6374,6 +6933,17 @@
 		return (1);
 
 	for (i = 1; i < argc; i++) {
+		vdev_state_t oldstate;
+		boolean_t avail_spare, l2cache;
+		nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare,
+		    &l2cache, NULL);
+		if (tgt == NULL) {
+			ret = 1;
+			continue;
+		}
+		uint_t vsc;
+		oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt,
+		    ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state;
 		if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
 			if (newstate != VDEV_STATE_HEALTHY) {
 				(void) printf(gettext("warning: device '%s' "
@@ -6387,6 +6957,17 @@
 					(void) printf(gettext("use 'zpool "
 					    "replace' to replace devices "
 					    "that are no longer present\n"));
+				if ((flags & ZFS_ONLINE_EXPAND)) {
+					(void) printf(gettext("%s: failed "
+					    "to expand usable space on "
+					    "unhealthy device '%s'\n"),
+					    (oldstate >= VDEV_STATE_DEGRADED ?
+					    "error" : "warning"), argv[i]);
+					if (oldstate >= VDEV_STATE_DEGRADED) {
+						ret = 1;
+						break;
+					}
+				}
 			}
 		} else {
 			ret = 1;
@@ -6637,7 +7218,7 @@
 	argv += optind;
 
 	/* if argc == 0 we will execute zpool_reopen_one on all pools */
-	ret = for_each_pool(argc, argv, B_TRUE, NULL, zpool_reopen_one,
+	ret = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE, zpool_reopen_one,
 	    &scrub_restart);
 
 	return (ret);
@@ -6645,8 +7226,6 @@
 
 typedef struct scrub_cbdata {
 	int	cb_type;
-	int	cb_argc;
-	char	**cb_argv;
 	pool_scrub_cmd_t cb_scrub_cmd;
 } scrub_cbdata_t;
 
@@ -6676,7 +7255,7 @@
 	return (B_FALSE);
 }
 
-int
+static int
 scrub_callback(zpool_handle_t *zhp, void *data)
 {
 	scrub_cbdata_t *cb = data;
@@ -6703,23 +7282,33 @@
 	return (err != 0);
 }
 
+static int
+wait_callback(zpool_handle_t *zhp, void *data)
+{
+	zpool_wait_activity_t *act = data;
+	return (zpool_wait(zhp, *act));
+}
+
 /*
- * zpool scrub [-s | -p] <pool> ...
+ * zpool scrub [-s | -p] [-w] <pool> ...
  *
  *	-s	Stop.  Stops any in-progress scrub.
  *	-p	Pause. Pause in-progress scrub.
+ *	-w	Wait.  Blocks until scrub has completed.
  */
 int
 zpool_do_scrub(int argc, char **argv)
 {
 	int c;
 	scrub_cbdata_t cb;
+	boolean_t wait = B_FALSE;
+	int error;
 
 	cb.cb_type = POOL_SCAN_SCRUB;
 	cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "sp")) != -1) {
+	while ((c = getopt(argc, argv, "spw")) != -1) {
 		switch (c) {
 		case 's':
 			cb.cb_type = POOL_SCAN_NONE;
@@ -6727,6 +7316,9 @@
 		case 'p':
 			cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
 			break;
+		case 'w':
+			wait = B_TRUE;
+			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
@@ -6741,8 +7333,13 @@
 		usage(B_FALSE);
 	}
 
-	cb.cb_argc = argc;
-	cb.cb_argv = argv;
+	if (wait && (cb.cb_type == POOL_SCAN_NONE ||
+	    cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) {
+		(void) fprintf(stderr, gettext("invalid option combination: "
+		    "-w cannot be used with -p or -s\n"));
+		usage(B_FALSE);
+	}
+
 	argc -= optind;
 	argv += optind;
 
@@ -6751,7 +7348,16 @@
 		usage(B_FALSE);
 	}
 
-	return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
+	error = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE,
+	    scrub_callback, &cb);
+
+	if (wait && !error) {
+		zpool_wait_activity_t act = ZPOOL_WAIT_SCRUB;
+		error = for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE,
+		    wait_callback, &act);
+	}
+
+	return (error);
 }
 
 /*
@@ -6767,8 +7373,6 @@
 
 	cb.cb_type = POOL_SCAN_RESILVER;
 	cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
-	cb.cb_argc = argc;
-	cb.cb_argv = argv;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "")) != -1) {
@@ -6788,7 +7392,8 @@
 		usage(B_FALSE);
 	}
 
-	return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
+	return (for_each_pool(argc, argv, B_TRUE, NULL, B_FALSE,
+	    scrub_callback, &cb));
 }
 
 /*
@@ -6799,6 +7404,7 @@
  *	-r <rate>	Sets the TRIM rate in bytes (per second). Supports
  *			adding a multiplier suffix such as 'k' or 'm'.
  *	-s		Suspend. TRIM can then be restarted with no flags.
+ *	-w		Wait. Blocks until trimming has completed.
  */
 int
 zpool_do_trim(int argc, char **argv)
@@ -6808,15 +7414,17 @@
 		{"secure",	no_argument,		NULL,	'd'},
 		{"rate",	required_argument,	NULL,	'r'},
 		{"suspend",	no_argument,		NULL,	's'},
+		{"wait",	no_argument,		NULL,	'w'},
 		{0, 0, 0, 0}
 	};
 
 	pool_trim_func_t cmd_type = POOL_TRIM_START;
 	uint64_t rate = 0;
 	boolean_t secure = B_FALSE;
+	boolean_t wait = B_FALSE;
 
 	int c;
-	while ((c = getopt_long(argc, argv, "cdr:s", long_options, NULL))
+	while ((c = getopt_long(argc, argv, "cdr:sw", long_options, NULL))
 	    != -1) {
 		switch (c) {
 		case 'c':
@@ -6857,6 +7465,9 @@
 			}
 			cmd_type = POOL_TRIM_SUSPEND;
 			break;
+		case 'w':
+			wait = B_TRUE;
+			break;
 		case '?':
 			if (optopt != 0) {
 				(void) fprintf(stderr,
@@ -6879,6 +7490,12 @@
 		return (-1);
 	}
 
+	if (wait && (cmd_type != POOL_TRIM_START)) {
+		(void) fprintf(stderr, gettext("-w cannot be used with -c or "
+		    "-s\n"));
+		usage(B_FALSE);
+	}
+
 	char *poolname = argv[0];
 	zpool_handle_t *zhp = zpool_open(g_zfs, poolname);
 	if (zhp == NULL)
@@ -6887,6 +7504,7 @@
 	trimflags_t trim_flags = {
 		.secure = secure,
 		.rate = rate,
+		.wait = wait,
 	};
 
 	nvlist_t *vdevs = fnvlist_alloc();
@@ -6913,21 +7531,44 @@
 }
 
 /*
+ * Converts a total number of seconds to a human readable string broken
+ * down in to days/hours/minutes/seconds.
+ */
+static void
+secs_to_dhms(uint64_t total, char *buf)
+{
+	uint64_t days = total / 60 / 60 / 24;
+	uint64_t hours = (total / 60 / 60) % 24;
+	uint64_t mins = (total / 60) % 60;
+	uint64_t secs = (total % 60);
+
+	if (days > 0) {
+		(void) sprintf(buf, "%llu days %02llu:%02llu:%02llu",
+		    (u_longlong_t)days, (u_longlong_t)hours,
+		    (u_longlong_t)mins, (u_longlong_t)secs);
+	} else {
+		(void) sprintf(buf, "%02llu:%02llu:%02llu",
+		    (u_longlong_t)hours, (u_longlong_t)mins,
+		    (u_longlong_t)secs);
+	}
+}
+
+/*
  * Print out detailed scrub status.
  */
 static void
-print_scan_status(pool_scan_stat_t *ps)
+print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
 {
 	time_t start, end, pause;
-	uint64_t total_secs_left;
-	uint64_t elapsed, secs_left, mins_left, hours_left, days_left;
 	uint64_t pass_scanned, scanned, pass_issued, issued, total;
-	uint64_t scan_rate, issue_rate;
+	uint64_t elapsed, scan_rate, issue_rate;
 	double fraction_done;
 	char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
-	char srate_buf[7], irate_buf[7];
+	char srate_buf[7], irate_buf[7], time_buf[32];
 
-	(void) printf(gettext("  scan: "));
+	printf("  ");
+	printf_color(ANSI_BOLD, gettext("scan:"));
+	printf(" ");
 
 	/* If there's never been a scan, there's not much to say. */
 	if (ps == NULL || ps->pss_func == POOL_SCAN_NONE ||
@@ -6942,38 +7583,31 @@
 
 	zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
 
-	assert(ps->pss_func == POOL_SCAN_SCRUB ||
-	    ps->pss_func == POOL_SCAN_RESILVER);
+	int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
+	int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
+	assert(is_resilver || is_scrub);
 
 	/* Scan is finished or canceled. */
 	if (ps->pss_state == DSS_FINISHED) {
-		total_secs_left = end - start;
-		days_left = total_secs_left / 60 / 60 / 24;
-		hours_left = (total_secs_left / 60 / 60) % 24;
-		mins_left = (total_secs_left / 60) % 60;
-		secs_left = (total_secs_left % 60);
+		secs_to_dhms(end - start, time_buf);
 
-		if (ps->pss_func == POOL_SCAN_SCRUB) {
+		if (is_scrub) {
 			(void) printf(gettext("scrub repaired %s "
-			    "in %llu days %02llu:%02llu:%02llu "
-			    "with %llu errors on %s"), processed_buf,
-			    (u_longlong_t)days_left, (u_longlong_t)hours_left,
-			    (u_longlong_t)mins_left, (u_longlong_t)secs_left,
-			    (u_longlong_t)ps->pss_errors, ctime(&end));
-		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+			    "in %s with %llu errors on %s"), processed_buf,
+			    time_buf, (u_longlong_t)ps->pss_errors,
+			    ctime(&end));
+		} else if (is_resilver) {
 			(void) printf(gettext("resilvered %s "
-			    "in %llu days %02llu:%02llu:%02llu "
-			    "with %llu errors on %s"), processed_buf,
-			    (u_longlong_t)days_left, (u_longlong_t)hours_left,
-			    (u_longlong_t)mins_left, (u_longlong_t)secs_left,
-			    (u_longlong_t)ps->pss_errors, ctime(&end));
+			    "in %s with %llu errors on %s"), processed_buf,
+			    time_buf, (u_longlong_t)ps->pss_errors,
+			    ctime(&end));
 		}
 		return;
 	} else if (ps->pss_state == DSS_CANCELED) {
-		if (ps->pss_func == POOL_SCAN_SCRUB) {
+		if (is_scrub) {
 			(void) printf(gettext("scrub canceled on %s"),
 			    ctime(&end));
-		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+		} else if (is_resilver) {
 			(void) printf(gettext("resilver canceled on %s"),
 			    ctime(&end));
 		}
@@ -6983,7 +7617,7 @@
 	assert(ps->pss_state == DSS_SCANNING);
 
 	/* Scan is in progress. Resilvers can't be paused. */
-	if (ps->pss_func == POOL_SCAN_SCRUB) {
+	if (is_scrub) {
 		if (pause == 0) {
 			(void) printf(gettext("scrub in progress since %s"),
 			    ctime(&start));
@@ -6993,7 +7627,7 @@
 			(void) printf(gettext("\tscrub started on %s"),
 			    ctime(&start));
 		}
-	} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+	} else if (is_resilver) {
 		(void) printf(gettext("resilver in progress since %s"),
 		    ctime(&start));
 	}
@@ -7014,13 +7648,9 @@
 
 	scan_rate = pass_scanned / elapsed;
 	issue_rate = pass_issued / elapsed;
-	total_secs_left = (issue_rate != 0 && total >= issued) ?
+	uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ?
 	    ((total - issued) / issue_rate) : UINT64_MAX;
-
-	days_left = total_secs_left / 60 / 60 / 24;
-	hours_left = (total_secs_left / 60 / 60) % 24;
-	mins_left = (total_secs_left / 60) % 60;
-	secs_left = (total_secs_left % 60);
+	secs_to_dhms(total_secs_left, time_buf);
 
 	/* format all of the numbers we will be reporting */
 	zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf));
@@ -7039,21 +7669,105 @@
 		    scanned_buf, issued_buf, total_buf);
 	}
 
-	if (ps->pss_func == POOL_SCAN_RESILVER) {
+	if (is_resilver) {
 		(void) printf(gettext("\t%s resilvered, %.2f%% done"),
 		    processed_buf, 100 * fraction_done);
-	} else if (ps->pss_func == POOL_SCAN_SCRUB) {
+	} else if (is_scrub) {
 		(void) printf(gettext("\t%s repaired, %.2f%% done"),
 		    processed_buf, 100 * fraction_done);
 	}
 
 	if (pause == 0) {
+		/*
+		 * Only provide an estimate iff:
+		 * 1) the time remaining is valid, and
+		 * 2) the issue rate exceeds 10 MB/s, and
+		 * 3) it's either:
+		 *    a) a resilver which has started repairs, or
+		 *    b) a scrub which has entered the issue phase.
+		 */
 		if (total_secs_left != UINT64_MAX &&
-		    issue_rate >= 10 * 1024 * 1024) {
-			(void) printf(gettext(", %llu days "
-			    "%02llu:%02llu:%02llu to go\n"),
-			    (u_longlong_t)days_left, (u_longlong_t)hours_left,
-			    (u_longlong_t)mins_left, (u_longlong_t)secs_left);
+		    issue_rate >= 10 * 1024 * 1024 &&
+		    ((is_resilver && ps->pss_processed > 0) ||
+		    (is_scrub && issued > 0))) {
+			(void) printf(gettext(", %s to go\n"), time_buf);
+		} else {
+			(void) printf(gettext(", no estimated "
+			    "completion time\n"));
+		}
+	} else {
+		(void) printf(gettext("\n"));
+	}
+}
+
+static void
+print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
+{
+	if (vrs == NULL || vrs->vrs_state == VDEV_REBUILD_NONE)
+		return;
+
+	printf("  ");
+	printf_color(ANSI_BOLD, gettext("scan:"));
+	printf(" ");
+
+	uint64_t bytes_scanned = vrs->vrs_bytes_scanned;
+	uint64_t bytes_issued = vrs->vrs_bytes_issued;
+	uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt;
+	uint64_t bytes_est = vrs->vrs_bytes_est;
+	uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned /
+	    (vrs->vrs_pass_time_ms + 1)) * 1000;
+	uint64_t issue_rate = (vrs->vrs_pass_bytes_issued /
+	    (vrs->vrs_pass_time_ms + 1)) * 1000;
+	double scan_pct = MIN((double)bytes_scanned * 100 /
+	    (bytes_est + 1), 100);
+
+	/* Format all of the numbers we will be reporting */
+	char bytes_scanned_buf[7], bytes_issued_buf[7];
+	char bytes_rebuilt_buf[7], bytes_est_buf[7];
+	char scan_rate_buf[7], issue_rate_buf[7], time_buf[32];
+	zfs_nicebytes(bytes_scanned, bytes_scanned_buf,
+	    sizeof (bytes_scanned_buf));
+	zfs_nicebytes(bytes_issued, bytes_issued_buf,
+	    sizeof (bytes_issued_buf));
+	zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf,
+	    sizeof (bytes_rebuilt_buf));
+	zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf));
+	zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
+	zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf));
+
+	time_t start = vrs->vrs_start_time;
+	time_t end = vrs->vrs_end_time;
+
+	/* Rebuild is finished or canceled. */
+	if (vrs->vrs_state == VDEV_REBUILD_COMPLETE) {
+		secs_to_dhms(vrs->vrs_scan_time_ms / 1000, time_buf);
+		(void) printf(gettext("resilvered (%s) %s in %s "
+		    "with %llu errors on %s"), vdev_name, bytes_rebuilt_buf,
+		    time_buf, (u_longlong_t)vrs->vrs_errors, ctime(&end));
+		return;
+	} else if (vrs->vrs_state == VDEV_REBUILD_CANCELED) {
+		(void) printf(gettext("resilver (%s) canceled on %s"),
+		    vdev_name, ctime(&end));
+		return;
+	} else if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
+		(void) printf(gettext("resilver (%s) in progress since %s"),
+		    vdev_name, ctime(&start));
+	}
+
+	assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE);
+
+	secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) /
+	    MAX(scan_rate, 1), time_buf);
+
+	(void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, "
+	    "%s total\n"), bytes_scanned_buf, scan_rate_buf,
+	    bytes_issued_buf, issue_rate_buf, bytes_est_buf);
+	(void) printf(gettext("\t%s resilvered, %.2f%% done"),
+	    bytes_rebuilt_buf, scan_pct);
+
+	if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
+		if (scan_rate >= 10 * 1024 * 1024) {
+			(void) printf(gettext(", %s to go\n"), time_buf);
 		} else {
 			(void) printf(gettext(", no estimated "
 			    "completion time\n"));
@@ -7064,9 +7778,38 @@
 }
 
 /*
- * As we don't scrub checkpointed blocks, we want to warn the
- * user that we skipped scanning some blocks if a checkpoint exists
- * or existed at any time during the scan.
+ * Print rebuild status for top-level vdevs.
+ */
+static void
+print_rebuild_status(zpool_handle_t *zhp, nvlist_t *nvroot)
+{
+	nvlist_t **child;
+	uint_t children;
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	for (uint_t c = 0; c < children; c++) {
+		vdev_rebuild_stat_t *vrs;
+		uint_t i;
+
+		if (nvlist_lookup_uint64_array(child[c],
+		    ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
+			char *name = zpool_vdev_name(g_zfs, zhp,
+			    child[c], VDEV_NAME_TYPE_ID);
+			print_rebuild_status_impl(vrs, name);
+			free(name);
+		}
+	}
+}
+
+/*
+ * As we don't scrub checkpointed blocks, we want to warn the user that we
+ * skipped scanning some blocks if a checkpoint exists or existed at any
+ * time during the scan.  If a sequential instead of healing reconstruction
+ * was performed then the blocks were reconstructed.  However, their checksums
+ * have not been verified so we still print the warning.
  */
 static void
 print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs)
@@ -7098,6 +7841,95 @@
 }
 
 /*
+ * Returns B_TRUE if there is an active rebuild in progress.  Otherwise,
+ * B_FALSE is returned and 'rebuild_end_time' is set to the end time for
+ * the last completed (or cancelled) rebuild.
+ */
+static boolean_t
+check_rebuilding(nvlist_t *nvroot, uint64_t *rebuild_end_time)
+{
+	nvlist_t **child;
+	uint_t children;
+	boolean_t rebuilding = B_FALSE;
+	uint64_t end_time = 0;
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	for (uint_t c = 0; c < children; c++) {
+		vdev_rebuild_stat_t *vrs;
+		uint_t i;
+
+		if (nvlist_lookup_uint64_array(child[c],
+		    ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
+
+			if (vrs->vrs_end_time > end_time)
+				end_time = vrs->vrs_end_time;
+
+			if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
+				rebuilding = B_TRUE;
+				end_time = 0;
+				break;
+			}
+		}
+	}
+
+	if (rebuild_end_time != NULL)
+		*rebuild_end_time = end_time;
+
+	return (rebuilding);
+}
+
+/*
+ * Print the scan status.
+ */
+static void
+print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
+{
+	uint64_t rebuild_end_time = 0, resilver_end_time = 0;
+	boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
+	boolean_t active_resilver = B_FALSE;
+	pool_checkpoint_stat_t *pcs = NULL;
+	pool_scan_stat_t *ps = NULL;
+	uint_t c;
+
+	if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
+	    (uint64_t **)&ps, &c) == 0) {
+		if (ps->pss_func == POOL_SCAN_RESILVER) {
+			resilver_end_time = ps->pss_end_time;
+			active_resilver = (ps->pss_state == DSS_SCANNING);
+		}
+
+		have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
+		have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
+	}
+
+	boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
+	boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0));
+
+	/* Always print the scrub status when available. */
+	if (have_scrub)
+		print_scan_scrub_resilver_status(ps);
+
+	/*
+	 * When there is an active resilver or rebuild print its status.
+	 * Otherwise print the status of the last resilver or rebuild.
+	 */
+	if (active_resilver || (!active_rebuild && have_resilver &&
+	    resilver_end_time && resilver_end_time > rebuild_end_time)) {
+		print_scan_scrub_resilver_status(ps);
+	} else if (active_rebuild || (!active_resilver && have_rebuild &&
+	    rebuild_end_time && rebuild_end_time > resilver_end_time)) {
+		print_rebuild_status(zhp, nvroot);
+	}
+
+	(void) nvlist_lookup_uint64_array(nvroot,
+	    ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
+	print_checkpoint_scan_warning(ps, pcs);
+}
+
+/*
  * Print out detailed removal status.
  */
 static void
@@ -7125,7 +7957,7 @@
 	vdev_name = zpool_vdev_name(g_zfs, zhp,
 	    child[prs->prs_removing_vdev], B_TRUE);
 
-	(void) printf(gettext("remove: "));
+	printf_color(ANSI_BOLD, gettext("remove: "));
 
 	start = prs->prs_start_time;
 	end = prs->prs_end_time;
@@ -7181,8 +8013,8 @@
 		 * do not print estimated time if hours_left is more than
 		 * 30 days
 		 */
-		(void) printf(gettext("    %s copied out of %s at %s/s, "
-		    "%.2f%% done"),
+		(void) printf(gettext(
+		    "\t%s copied out of %s at %s/s, %.2f%% done"),
 		    examined_buf, total_buf, rate_buf, 100 * fraction_done);
 		if (hours_left < (30 * 24)) {
 			(void) printf(gettext(", %lluh%um to go\n"),
@@ -7192,12 +8024,13 @@
 			    ", (copy is slow, no estimated time)\n"));
 		}
 	}
+	free(vdev_name);
 
 	if (prs->prs_mapping_memory > 0) {
 		char mem_buf[7];
 		zfs_nicenum(prs->prs_mapping_memory, mem_buf, sizeof (mem_buf));
-		(void) printf(gettext("    %s memory used for "
-		    "removed device mappings\n"),
+		(void) printf(gettext(
+		    "\t%s memory used for removed device mappings\n"),
 		    mem_buf);
 	}
 }
@@ -7282,7 +8115,7 @@
 	for (i = 0; i < nspares; i++) {
 		name = zpool_vdev_name(g_zfs, zhp, spares[i],
 		    cb->cb_name_flags);
-		print_status_config(zhp, cb, name, spares[i], 2, B_TRUE);
+		print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, NULL);
 		free(name);
 	}
 }
@@ -7302,7 +8135,8 @@
 	for (i = 0; i < nl2cache; i++) {
 		name = zpool_vdev_name(g_zfs, zhp, l2cache[i],
 		    cb->cb_name_flags);
-		print_status_config(zhp, cb, name, l2cache[i], 2, B_FALSE);
+		print_status_config(zhp, cb, name, l2cache[i], 2,
+		    B_FALSE, NULL);
 		free(name);
 	}
 }
@@ -7352,7 +8186,7 @@
  *        pool: tank
  *	status: DEGRADED
  *	reason: One or more devices ...
- *         see: http://zfsonlinux.org/msg/ZFS-xxxx-01
+ *         see: https://openzfs.github.io/openzfs-docs/msg/ZFS-xxxx-01
  *	config:
  *		mirror		DEGRADED
  *                c1t0d0	OK
@@ -7361,7 +8195,7 @@
  * When given the '-v' option, we print out the complete config.  If the '-e'
  * option is specified, then we print out error rate information as well.
  */
-int
+static int
 status_callback(zpool_handle_t *zhp, void *data)
 {
 	status_cbdata_t *cbp = data;
@@ -7385,7 +8219,9 @@
 	if (cbp->cb_explain &&
 	    (reason == ZPOOL_STATUS_OK ||
 	    reason == ZPOOL_STATUS_VERSION_OLDER ||
-	    reason == ZPOOL_STATUS_FEAT_DISABLED)) {
+	    reason == ZPOOL_STATUS_FEAT_DISABLED ||
+	    reason == ZPOOL_STATUS_COMPATIBILITY_ERR ||
+	    reason == ZPOOL_STATUS_INCOMPATIBLE_FEAT)) {
 		if (!cbp->cb_allpools) {
 			(void) printf(gettext("pool '%s' is healthy\n"),
 			    zpool_get_name(zhp));
@@ -7406,38 +8242,52 @@
 
 	health = zpool_get_state_str(zhp);
 
-	(void) printf(gettext("  pool: %s\n"), zpool_get_name(zhp));
-	(void) printf(gettext(" state: %s\n"), health);
+	printf("  ");
+	printf_color(ANSI_BOLD, gettext("pool:"));
+	printf(" %s\n", zpool_get_name(zhp));
+	printf(" ");
+	printf_color(ANSI_BOLD, gettext("state: "));
+
+	printf_color(health_str_to_color(health), "%s", health);
+
+	printf("\n");
 
 	switch (reason) {
 	case ZPOOL_STATUS_MISSING_DEV_R:
-		(void) printf(gettext("status: One or more devices could not "
-		    "be opened.  Sufficient replicas exist for\n\tthe pool to "
-		    "continue functioning in a degraded state.\n"));
-		(void) printf(gettext("action: Attach the missing device and "
-		    "online it using 'zpool online'.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices could "
+		    "not be opened.  Sufficient replicas exist for\n\tthe pool "
+		    "to continue functioning in a degraded state.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Attach the missing device "
+		    "and online it using 'zpool online'.\n"));
 		break;
 
 	case ZPOOL_STATUS_MISSING_DEV_NR:
-		(void) printf(gettext("status: One or more devices could not "
-		    "be opened.  There are insufficient\n\treplicas for the "
-		    "pool to continue functioning.\n"));
-		(void) printf(gettext("action: Attach the missing device and "
-		    "online it using 'zpool online'.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices could "
+		    "not be opened.  There are insufficient\n\treplicas for the"
+		    " pool to continue functioning.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Attach the missing device "
+		    "and online it using 'zpool online'.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_LABEL_R:
-		(void) printf(gettext("status: One or more devices could not "
-		    "be used because the label is missing or\n\tinvalid.  "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices could "
+		    "not be used because the label is missing or\n\tinvalid.  "
 		    "Sufficient replicas exist for the pool to continue\n\t"
 		    "functioning in a degraded state.\n"));
-		(void) printf(gettext("action: Replace the device using "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Replace the device using "
 		    "'zpool replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_LABEL_NR:
-		(void) printf(gettext("status: One or more devices could not "
-		    "be used because the label is missing \n\tor invalid.  "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices could "
+		    "not be used because the label is missing \n\tor invalid.  "
 		    "There are insufficient replicas for the pool to "
 		    "continue\n\tfunctioning.\n"));
 		zpool_explain_recover(zpool_get_handle(zhp),
@@ -7445,175 +8295,255 @@
 		break;
 
 	case ZPOOL_STATUS_FAILING_DEV:
-		(void) printf(gettext("status: One or more devices has "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices has "
 		    "experienced an unrecoverable error.  An\n\tattempt was "
 		    "made to correct the error.  Applications are "
 		    "unaffected.\n"));
-		(void) printf(gettext("action: Determine if the device needs "
-		    "to be replaced, and clear the errors\n\tusing "
-		    "'zpool clear' or replace the device with 'zpool "
+		printf_color(ANSI_BOLD, gettext("action: "));
+			printf_color(ANSI_YELLOW, gettext("Determine if the "
+		    "device needs to be replaced, and clear the errors\n\tusing"
+		    " 'zpool clear' or replace the device with 'zpool "
 		    "replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_OFFLINE_DEV:
-		(void) printf(gettext("status: One or more devices has "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices has "
 		    "been taken offline by the administrator.\n\tSufficient "
 		    "replicas exist for the pool to continue functioning in "
 		    "a\n\tdegraded state.\n"));
-		(void) printf(gettext("action: Online the device using "
-		    "'zpool online' or replace the device with\n\t'zpool "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Online the device "
+		    "using 'zpool online' or replace the device with\n\t'zpool "
 		    "replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_REMOVED_DEV:
-		(void) printf(gettext("status: One or more devices has "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices has "
 		    "been removed by the administrator.\n\tSufficient "
 		    "replicas exist for the pool to continue functioning in "
 		    "a\n\tdegraded state.\n"));
-		(void) printf(gettext("action: Online the device using "
-		    "'zpool online' or replace the device with\n\t'zpool "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Online the device "
+		    "using zpool online' or replace the device with\n\t'zpool "
 		    "replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_RESILVERING:
-		(void) printf(gettext("status: One or more devices is "
+	case ZPOOL_STATUS_REBUILDING:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices is "
 		    "currently being resilvered.  The pool will\n\tcontinue "
 		    "to function, possibly in a degraded state.\n"));
-		(void) printf(gettext("action: Wait for the resilver to "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Wait for the resilver to "
 		    "complete.\n"));
 		break;
 
+	case ZPOOL_STATUS_REBUILD_SCRUB:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices have "
+		    "been sequentially resilvered, scrubbing\n\tthe pool "
+		    "is recommended.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Use 'zpool scrub' to "
+		    "verify all data checksums.\n"));
+		break;
+
 	case ZPOOL_STATUS_CORRUPT_DATA:
-		(void) printf(gettext("status: One or more devices has "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices has "
 		    "experienced an error resulting in data\n\tcorruption.  "
 		    "Applications may be affected.\n"));
-		(void) printf(gettext("action: Restore the file in question "
-		    "if possible.  Otherwise restore the\n\tentire pool from "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Restore the file in question"
+		    " if possible.  Otherwise restore the\n\tentire pool from "
 		    "backup.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_POOL:
-		(void) printf(gettext("status: The pool metadata is corrupted "
-		    "and the pool cannot be opened.\n"));
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool metadata is "
+		    "corrupted and the pool cannot be opened.\n"));
 		zpool_explain_recover(zpool_get_handle(zhp),
 		    zpool_get_name(zhp), reason, config);
 		break;
 
 	case ZPOOL_STATUS_VERSION_OLDER:
-		(void) printf(gettext("status: The pool is formatted using a "
-		    "legacy on-disk format.  The pool can\n\tstill be used, "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool is formatted using "
+		    "a legacy on-disk format.  The pool can\n\tstill be used, "
 		    "but some features are unavailable.\n"));
-		(void) printf(gettext("action: Upgrade the pool using 'zpool "
-		    "upgrade'.  Once this is done, the\n\tpool will no longer "
-		    "be accessible on software that does not support\n\t"
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Upgrade the pool using "
+		    "'zpool upgrade'.  Once this is done, the\n\tpool will no "
+		    "longer be accessible on software that does not support\n\t"
 		    "feature flags.\n"));
 		break;
 
 	case ZPOOL_STATUS_VERSION_NEWER:
-		(void) printf(gettext("status: The pool has been upgraded to a "
-		    "newer, incompatible on-disk version.\n\tThe pool cannot "
-		    "be accessed on this system.\n"));
-		(void) printf(gettext("action: Access the pool from a system "
-		    "running more recent software, or\n\trestore the pool from "
-		    "backup.\n"));
-		break;
-
-	case ZPOOL_STATUS_FEAT_DISABLED:
-		(void) printf(gettext("status: Some supported features are not "
-		    "enabled on the pool. The pool can\n\tstill be used, but "
-		    "some features are unavailable.\n"));
-		(void) printf(gettext("action: Enable all features using "
-		    "'zpool upgrade'. Once this is done,\n\tthe pool may no "
-		    "longer be accessible by software that does not support\n\t"
-		    "the features. See zpool-features(5) for details.\n"));
-		break;
-
-	case ZPOOL_STATUS_UNSUP_FEAT_READ:
-		(void) printf(gettext("status: The pool cannot be accessed on "
-		    "this system because it uses the\n\tfollowing feature(s) "
-		    "not supported on this system:\n"));
-		zpool_print_unsup_feat(config);
-		(void) printf("\n");
-		(void) printf(gettext("action: Access the pool from a system "
-		    "that supports the required feature(s),\n\tor restore the "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool has been upgraded "
+		    "to a newer, incompatible on-disk version.\n\tThe pool "
+		    "cannot be accessed on this system.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Access the pool from a "
+		    "system running more recent software, or\n\trestore the "
 		    "pool from backup.\n"));
 		break;
 
+	case ZPOOL_STATUS_FEAT_DISABLED:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("Some supported and "
+		    "requested features are not enabled on the pool.\n\t"
+		    "The pool can still be used, but some features are "
+		    "unavailable.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Enable all features using "
+		    "'zpool upgrade'. Once this is done,\n\tthe pool may no "
+		    "longer be accessible by software that does not support\n\t"
+		    "the features. See zpool-features(7) for details.\n"));
+		break;
+
+	case ZPOOL_STATUS_COMPATIBILITY_ERR:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("This pool has a "
+		    "compatibility list specified, but it could not be\n\t"
+		    "read/parsed at this time. The pool can still be used, "
+		    "but this\n\tshould be investigated.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Check the value of the "
+		    "'compatibility' property against the\n\t"
+		    "appropriate file in " ZPOOL_SYSCONF_COMPAT_D " or "
+		    ZPOOL_DATA_COMPAT_D ".\n"));
+		break;
+
+	case ZPOOL_STATUS_INCOMPATIBLE_FEAT:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more features "
+		    "are enabled on the pool despite not being\n\t"
+		    "requested by the 'compatibility' property.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Consider setting "
+		    "'compatibility' to an appropriate value, or\n\t"
+		    "adding needed features to the relevant file in\n\t"
+		    ZPOOL_SYSCONF_COMPAT_D " or " ZPOOL_DATA_COMPAT_D ".\n"));
+		break;
+
+	case ZPOOL_STATUS_UNSUP_FEAT_READ:
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool cannot be accessed "
+		    "on this system because it uses the\n\tfollowing feature(s)"
+		    " not supported on this system:\n"));
+		zpool_print_unsup_feat(config);
+		(void) printf("\n");
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Access the pool from a "
+		    "system that supports the required feature(s),\n\tor "
+		    "restore the pool from backup.\n"));
+		break;
+
 	case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
-		(void) printf(gettext("status: The pool can only be accessed "
-		    "in read-only mode on this system. It\n\tcannot be "
-		    "accessed in read-write mode because it uses the "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool can only be "
+		    "accessed in read-only mode on this system. It\n\tcannot be"
+		    " accessed in read-write mode because it uses the "
 		    "following\n\tfeature(s) not supported on this system:\n"));
 		zpool_print_unsup_feat(config);
 		(void) printf("\n");
-		(void) printf(gettext("action: The pool cannot be accessed in "
-		    "read-write mode. Import the pool with\n"
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("The pool cannot be accessed "
+		    "in read-write mode. Import the pool with\n"
 		    "\t\"-o readonly=on\", access the pool from a system that "
 		    "supports the\n\trequired feature(s), or restore the "
 		    "pool from backup.\n"));
 		break;
 
 	case ZPOOL_STATUS_FAULTED_DEV_R:
-		(void) printf(gettext("status: One or more devices are "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices are "
 		    "faulted in response to persistent errors.\n\tSufficient "
 		    "replicas exist for the pool to continue functioning "
 		    "in a\n\tdegraded state.\n"));
-		(void) printf(gettext("action: Replace the faulted device, "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Replace the faulted device, "
 		    "or use 'zpool clear' to mark the device\n\trepaired.\n"));
 		break;
 
 	case ZPOOL_STATUS_FAULTED_DEV_NR:
-		(void) printf(gettext("status: One or more devices are "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices are "
 		    "faulted in response to persistent errors.  There are "
 		    "insufficient replicas for the pool to\n\tcontinue "
 		    "functioning.\n"));
-		(void) printf(gettext("action: Destroy and re-create the pool "
-		    "from a backup source.  Manually marking the device\n"
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Destroy and re-create the "
+		    "pool from a backup source.  Manually marking the device\n"
 		    "\trepaired using 'zpool clear' may allow some data "
 		    "to be recovered.\n"));
 		break;
 
 	case ZPOOL_STATUS_IO_FAILURE_MMP:
-		(void) printf(gettext("status: The pool is suspended because "
-		    "multihost writes failed or were delayed;\n\tanother "
-		    "system could import the pool undetected.\n"));
-		(void) printf(gettext("action: Make sure the pool's devices "
-		    "are connected, then reboot your system and\n\timport the "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("The pool is suspended "
+		    "because multihost writes failed or were delayed;\n\t"
+		    "another system could import the pool undetected.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Make sure the pool's devices"
+		    " are connected, then reboot your system and\n\timport the "
 		    "pool.\n"));
 		break;
 
 	case ZPOOL_STATUS_IO_FAILURE_WAIT:
 	case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
-		(void) printf(gettext("status: One or more devices are "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("One or more devices are "
 		    "faulted in response to IO failures.\n"));
-		(void) printf(gettext("action: Make sure the affected devices "
-		    "are connected, then run 'zpool clear'.\n"));
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Make sure the affected "
+		    "devices are connected, then run 'zpool clear'.\n"));
 		break;
 
 	case ZPOOL_STATUS_BAD_LOG:
-		(void) printf(gettext("status: An intent log record "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("An intent log record "
 		    "could not be read.\n"
 		    "\tWaiting for administrator intervention to fix the "
 		    "faulted pool.\n"));
-		(void) printf(gettext("action: Either restore the affected "
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Either restore the affected "
 		    "device(s) and run 'zpool online',\n"
 		    "\tor ignore the intent log records by running "
 		    "'zpool clear'.\n"));
 		break;
 
+	case ZPOOL_STATUS_NON_NATIVE_ASHIFT:
+		(void) printf(gettext("status: One or more devices are "
+		    "configured to use a non-native block size.\n"
+		    "\tExpect reduced performance.\n"));
+		(void) printf(gettext("action: Replace affected devices with "
+		    "devices that support the\n\tconfigured block size, or "
+		    "migrate data to a properly configured\n\tpool.\n"));
+		break;
+
 	case ZPOOL_STATUS_HOSTID_MISMATCH:
-		(void) printf(gettext("status: Mismatch between pool hostid "
-		    "and system hostid on imported pool.\n\tThis pool was "
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("Mismatch between pool hostid"
+		    " and system hostid on imported pool.\n\tThis pool was "
 		    "previously imported into a system with a different "
 		    "hostid,\n\tand then was verbatim imported into this "
 		    "system.\n"));
-		(void) printf(gettext("action: Export this pool on all systems "
-		    "on which it is imported.\n"
+		printf_color(ANSI_BOLD, gettext("action: "));
+		printf_color(ANSI_YELLOW, gettext("Export this pool on all "
+		    "systems on which it is imported.\n"
 		    "\tThen import it to correct the mismatch.\n"));
 		break;
 
 	case ZPOOL_STATUS_ERRATA:
-		(void) printf(gettext("status: Errata #%d detected.\n"),
+		printf_color(ANSI_BOLD, gettext("status: "));
+		printf_color(ANSI_YELLOW, gettext("Errata #%d detected.\n"),
 		    errata);
 
 		switch (errata) {
@@ -7621,16 +8551,18 @@
 			break;
 
 		case ZPOOL_ERRATA_ZOL_2094_SCRUB:
-			(void) printf(gettext("action: To correct the issue "
-			    "run 'zpool scrub'.\n"));
+			printf_color(ANSI_BOLD, gettext("action: "));
+			printf_color(ANSI_YELLOW, gettext("To correct the issue"
+			    " run 'zpool scrub'.\n"));
 			break;
 
 		case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION:
 			(void) printf(gettext("\tExisting encrypted datasets "
 			    "contain an on-disk incompatibility\n\twhich "
 			    "needs to be corrected.\n"));
-			(void) printf(gettext("action: To correct the issue "
-			    "backup existing encrypted datasets to new\n\t"
+			printf_color(ANSI_BOLD, gettext("action: "));
+			printf_color(ANSI_YELLOW, gettext("To correct the issue"
+			    " backup existing encrypted datasets to new\n\t"
 			    "encrypted datasets and destroy the old ones. "
 			    "'zfs mount -o ro' can\n\tbe used to temporarily "
 			    "mount existing encrypted datasets readonly.\n"));
@@ -7641,13 +8573,14 @@
 			    "and bookmarks contain an on-disk\n\tincompat"
 			    "ibility. This may cause on-disk corruption if "
 			    "they are used\n\twith 'zfs recv'.\n"));
-			(void) printf(gettext("action: To correct the issue, "
-			    "enable the bookmark_v2 feature. No additional\n\t"
-			    "action is needed if there are no encrypted "
-			    "snapshots or bookmarks.\n\tIf preserving the "
-			    "encrypted snapshots and bookmarks is required, "
-			    "use\n\ta non-raw send to backup and restore them. "
-			    "Alternately, they may be\n\tremoved to resolve "
+			printf_color(ANSI_BOLD, gettext("action: "));
+			printf_color(ANSI_YELLOW, gettext("To correct the"
+			    "issue, enable the bookmark_v2 feature. No "
+			    "additional\n\taction is needed if there are no "
+			    "encrypted snapshots or bookmarks.\n\tIf preserving"
+			    "the encrypted snapshots and bookmarks is required,"
+			    " use\n\ta non-raw send to backup and restore them."
+			    " Alternately, they may be\n\tremoved to resolve "
 			    "the incompatibility.\n"));
 			break;
 
@@ -7667,28 +8600,29 @@
 		assert(reason == ZPOOL_STATUS_OK);
 	}
 
-	if (msgid != NULL)
-		(void) printf(gettext("   see: http://zfsonlinux.org/msg/%s\n"),
+	if (msgid != NULL) {
+		printf("   ");
+		printf_color(ANSI_BOLD, gettext("see:"));
+		printf(gettext(
+		    " https://openzfs.github.io/openzfs-docs/msg/%s\n"),
 		    msgid);
+	}
 
 	if (config != NULL) {
 		uint64_t nerr;
 		nvlist_t **spares, **l2cache;
 		uint_t nspares, nl2cache;
 		pool_checkpoint_stat_t *pcs = NULL;
-		pool_scan_stat_t *ps = NULL;
 		pool_removal_stat_t *prs = NULL;
 
+		print_scan_status(zhp, nvroot);
+
+		(void) nvlist_lookup_uint64_array(nvroot,
+		    ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
+		print_removal_status(zhp, prs);
+
 		(void) nvlist_lookup_uint64_array(nvroot,
 		    ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
-		(void) nvlist_lookup_uint64_array(nvroot,
-		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
-		(void) nvlist_lookup_uint64_array(nvroot,
-		    ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
-
-		print_scan_status(ps);
-		print_checkpoint_scan_warning(ps, pcs);
-		print_removal_status(zhp, prs);
 		print_checkpoint_status(pcs);
 
 		cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0,
@@ -7696,13 +8630,16 @@
 		if (cbp->cb_namewidth < 10)
 			cbp->cb_namewidth = 10;
 
+		color_start(ANSI_BOLD);
 		(void) printf(gettext("config:\n\n"));
 		(void) printf(gettext("\t%-*s  %-8s %5s %5s %5s"),
 		    cbp->cb_namewidth, "NAME", "STATE", "READ", "WRITE",
 		    "CKSUM");
+		color_end();
 
-		if (cbp->cb_print_slow_ios)
-			(void) printf(" %5s", gettext("SLOW"));
+		if (cbp->cb_print_slow_ios) {
+			printf_color(ANSI_BOLD, " %5s", gettext("SLOW"));
+		}
 
 		if (cbp->vcdl != NULL)
 			print_cmd_columns(cbp->vcdl, 0);
@@ -7710,7 +8647,7 @@
 		printf("\n");
 
 		print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0,
-		    B_FALSE);
+		    B_FALSE, NULL);
 
 		print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP);
 		print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL);
@@ -7889,7 +8826,7 @@
 			cb.vcdl = all_pools_for_each_vdev_run(argc, argv, cmd,
 			    NULL, NULL, 0, 0);
 
-		ret = for_each_pool(argc, argv, B_TRUE, NULL,
+		ret = for_each_pool(argc, argv, B_TRUE, NULL, cb.cb_literal,
 		    status_callback, &cb);
 
 		if (cb.vcdl != NULL)
@@ -7954,6 +8891,11 @@
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &oldversion) == 0);
 
+	char compat[ZFS_MAXPROPLEN];
+	if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compat,
+	    ZFS_MAXPROPLEN, NULL, B_FALSE) != 0)
+		compat[0] = '\0';
+
 	assert(SPA_VERSION_IS_SUPPORTED(oldversion));
 	assert(oldversion < version);
 
@@ -7968,6 +8910,13 @@
 		return (1);
 	}
 
+	if (strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0) {
+		(void) fprintf(stderr, gettext("Upgrade not performed because "
+		    "'compatibility' property set to '"
+		    ZPOOL_COMPAT_LEGACY "'.\n"));
+		return (1);
+	}
+
 	ret = zpool_upgrade(zhp, version);
 	if (ret != 0)
 		return (ret);
@@ -7993,11 +8942,25 @@
 	boolean_t firstff = B_TRUE;
 	nvlist_t *enabled = zpool_get_features(zhp);
 
+	char compat[ZFS_MAXPROPLEN];
+	if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compat,
+	    ZFS_MAXPROPLEN, NULL, B_FALSE) != 0)
+		compat[0] = '\0';
+
+	boolean_t requested_features[SPA_FEATURES];
+	if (zpool_do_load_compat(compat, requested_features) !=
+	    ZPOOL_COMPATIBILITY_OK)
+		return (-1);
+
 	count = 0;
 	for (i = 0; i < SPA_FEATURES; i++) {
 		const char *fname = spa_feature_table[i].fi_uname;
 		const char *fguid = spa_feature_table[i].fi_guid;
-		if (!nvlist_exists(enabled, fguid)) {
+
+		if (!spa_feature_table[i].fi_zfs_mod_supported)
+			continue;
+
+		if (!nvlist_exists(enabled, fguid) && requested_features[i]) {
 			char *propname;
 			verify(-1 != asprintf(&propname, "feature@%s", fname));
 			ret = zpool_set_prop(zhp, propname,
@@ -8030,7 +8993,7 @@
 	upgrade_cbdata_t *cbp = arg;
 	nvlist_t *config;
 	uint64_t version;
-	boolean_t printnl = B_FALSE;
+	boolean_t modified_pool = B_FALSE;
 	int ret;
 
 	config = zpool_get_config(zhp, NULL);
@@ -8044,7 +9007,7 @@
 		ret = upgrade_version(zhp, cbp->cb_version);
 		if (ret != 0)
 			return (ret);
-		printnl = B_TRUE;
+		modified_pool = B_TRUE;
 
 		/*
 		 * If they did "zpool upgrade -a", then we could
@@ -8064,12 +9027,13 @@
 
 		if (count > 0) {
 			cbp->cb_first = B_FALSE;
-			printnl = B_TRUE;
+			modified_pool = B_TRUE;
 		}
 	}
 
-	if (printnl) {
-		(void) printf(gettext("\n"));
+	if (modified_pool) {
+		(void) printf("\n");
+		(void) after_zpool_upgrade(zhp);
 	}
 
 	return (0);
@@ -8095,7 +9059,10 @@
 			    "be upgraded to use feature flags.  After "
 			    "being upgraded, these pools\nwill no "
 			    "longer be accessible by software that does not "
-			    "support feature\nflags.\n\n"));
+			    "support feature\nflags.\n\n"
+			    "Note that setting a pool's 'compatibility' "
+			    "feature to '" ZPOOL_COMPAT_LEGACY "' will\n"
+			    "inhibit upgrades.\n\n"));
 			(void) printf(gettext("VER  POOL\n"));
 			(void) printf(gettext("---  ------------\n"));
 			cbp->cb_first = B_FALSE;
@@ -8127,6 +9094,10 @@
 		for (i = 0; i < SPA_FEATURES; i++) {
 			const char *fguid = spa_feature_table[i].fi_guid;
 			const char *fname = spa_feature_table[i].fi_uname;
+
+			if (!spa_feature_table[i].fi_zfs_mod_supported)
+				continue;
+
 			if (!nvlist_exists(enabled, fguid)) {
 				if (cbp->cb_first) {
 					(void) printf(gettext("\nSome "
@@ -8136,8 +9107,12 @@
 					    "pool may become incompatible with "
 					    "software\nthat does not support "
 					    "the feature. See "
-					    "zpool-features(5) for "
-					    "details.\n\n"));
+					    "zpool-features(7) for "
+					    "details.\n\n"
+					    "Note that the pool "
+					    "'compatibility' feature can be "
+					    "used to inhibit\nfeature "
+					    "upgrades.\n\n"));
 					(void) printf(gettext("POOL  "
 					    "FEATURE\n"));
 					(void) printf(gettext("------"
@@ -8171,7 +9146,7 @@
 static int
 upgrade_one(zpool_handle_t *zhp, void *data)
 {
-	boolean_t printnl = B_FALSE;
+	boolean_t modified_pool = B_FALSE;
 	upgrade_cbdata_t *cbp = data;
 	uint64_t cur_version;
 	int ret;
@@ -8199,7 +9174,7 @@
 	}
 
 	if (cur_version != cbp->cb_version) {
-		printnl = B_TRUE;
+		modified_pool = B_TRUE;
 		ret = upgrade_version(zhp, cbp->cb_version);
 		if (ret != 0)
 			return (ret);
@@ -8212,16 +9187,17 @@
 			return (ret);
 
 		if (count != 0) {
-			printnl = B_TRUE;
+			modified_pool = B_TRUE;
 		} else if (cur_version == SPA_VERSION) {
 			(void) printf(gettext("Pool '%s' already has all "
-			    "supported features enabled.\n"),
+			    "supported and requested features enabled.\n"),
 			    zpool_get_name(zhp));
 		}
 	}
 
-	if (printnl) {
-		(void) printf(gettext("\n"));
+	if (modified_pool) {
+		(void) printf("\n");
+		(void) after_zpool_upgrade(zhp);
 	}
 
 	return (0);
@@ -8316,6 +9292,8 @@
 		    "---------------\n");
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zfeature_info_t *fi = &spa_feature_table[i];
+			if (!fi->fi_zfs_mod_supported)
+				continue;
 			const char *ro =
 			    (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
 			    " (read-only compatible)" : "";
@@ -8376,8 +9354,8 @@
 				(void) printf(gettext("All pools are already "
 				    "formatted using feature flags.\n\n"));
 				(void) printf(gettext("Every feature flags "
-				    "pool already has all supported features "
-				    "enabled.\n"));
+				    "pool already has all supported and "
+				    "requested features enabled.\n"));
 			} else {
 				(void) printf(gettext("All pools are already "
 				    "formatted with version %llu or higher.\n"),
@@ -8403,12 +9381,12 @@
 
 		if (cb.cb_first) {
 			(void) printf(gettext("Every feature flags pool has "
-			    "all supported features enabled.\n"));
+			    "all supported and requested features enabled.\n"));
 		} else {
 			(void) printf(gettext("\n"));
 		}
 	} else {
-		ret = for_each_pool(argc, argv, B_FALSE, NULL,
+		ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE,
 		    upgrade_one, &cb);
 	}
 
@@ -8432,7 +9410,7 @@
 	    &records, &numrecords) == 0);
 	for (i = 0; i < numrecords; i++) {
 		nvlist_t *rec = records[i];
-		char tbuf[30] = "";
+		char tbuf[64] = "";
 
 		if (nvlist_exists(rec, ZPOOL_HIST_TIME)) {
 			time_t tsec;
@@ -8444,6 +9422,14 @@
 			(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
 		}
 
+		if (nvlist_exists(rec, ZPOOL_HIST_ELAPSED_NS)) {
+			uint64_t elapsed_ns = fnvlist_lookup_int64(records[i],
+			    ZPOOL_HIST_ELAPSED_NS);
+			(void) snprintf(tbuf + strlen(tbuf),
+			    sizeof (tbuf) - strlen(tbuf),
+			    " (%lldms)", (long long)elapsed_ns / 1000 / 1000);
+		}
+
 		if (nvlist_exists(rec, ZPOOL_HIST_CMD)) {
 			(void) printf("%s %s", tbuf,
 			    fnvlist_lookup_string(rec, ZPOOL_HIST_CMD));
@@ -8494,6 +9480,12 @@
 				dump_nvlist(fnvlist_lookup_nvlist(rec,
 				    ZPOOL_HIST_OUTPUT_NVL), 8);
 			}
+			if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_SIZE)) {
+				(void) printf("    output nvlist omitted; "
+				    "original size: %lldKB\n",
+				    (longlong_t)fnvlist_lookup_int64(rec,
+				    ZPOOL_HIST_OUTPUT_SIZE) / 1024);
+			}
 			if (nvlist_exists(rec, ZPOOL_HIST_ERRNO)) {
 				(void) printf("    errno: %lld\n",
 				    (longlong_t)fnvlist_lookup_int64(rec,
@@ -8591,7 +9583,7 @@
 	argc -= optind;
 	argv += optind;
 
-	ret = for_each_pool(argc, argv, B_FALSE,  NULL, get_history_one,
+	ret = for_each_pool(argc, argv, B_FALSE, NULL, B_FALSE, get_history_one,
 	    &cbdata);
 
 	if (argc == 0 && cbdata.first == B_TRUE) {
@@ -9154,7 +10146,7 @@
 		cb.cb_proplist = &fake_name;
 	}
 
-	ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
+	ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, cb.cb_literal,
 	    get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
@@ -9171,12 +10163,69 @@
 	boolean_t cb_any_successful;
 } set_cbdata_t;
 
-int
+static int
 set_callback(zpool_handle_t *zhp, void *data)
 {
 	int error;
 	set_cbdata_t *cb = (set_cbdata_t *)data;
 
+	/* Check if we have out-of-bounds features */
+	if (strcmp(cb->cb_propname, ZPOOL_CONFIG_COMPATIBILITY) == 0) {
+		boolean_t features[SPA_FEATURES];
+		if (zpool_do_load_compat(cb->cb_value, features) !=
+		    ZPOOL_COMPATIBILITY_OK)
+			return (-1);
+
+		nvlist_t *enabled = zpool_get_features(zhp);
+		spa_feature_t i;
+		for (i = 0; i < SPA_FEATURES; i++) {
+			const char *fguid = spa_feature_table[i].fi_guid;
+			if (nvlist_exists(enabled, fguid) && !features[i])
+				break;
+		}
+		if (i < SPA_FEATURES)
+			(void) fprintf(stderr, gettext("Warning: one or "
+			    "more features already enabled on pool '%s'\n"
+			    "are not present in this compatibility set.\n"),
+			    zpool_get_name(zhp));
+	}
+
+	/* if we're setting a feature, check it's in compatibility set */
+	if (zpool_prop_feature(cb->cb_propname) &&
+	    strcmp(cb->cb_value, ZFS_FEATURE_ENABLED) == 0) {
+		char *fname = strchr(cb->cb_propname, '@') + 1;
+		spa_feature_t f;
+
+		if (zfeature_lookup_name(fname, &f) == 0) {
+			char compat[ZFS_MAXPROPLEN];
+			if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY,
+			    compat, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0)
+				compat[0] = '\0';
+
+			boolean_t features[SPA_FEATURES];
+			if (zpool_do_load_compat(compat, features) !=
+			    ZPOOL_COMPATIBILITY_OK) {
+				(void) fprintf(stderr, gettext("Error: "
+				    "cannot enable feature '%s' on pool '%s'\n"
+				    "because the pool's 'compatibility' "
+				    "property cannot be parsed.\n"),
+				    fname, zpool_get_name(zhp));
+				return (-1);
+			}
+
+			if (!features[f]) {
+				(void) fprintf(stderr, gettext("Error: "
+				    "cannot enable feature '%s' on pool '%s'\n"
+				    "as it is not specified in this pool's "
+				    "current compatibility set.\n"
+				    "Consider setting 'compatibility' to a "
+				    "less restrictive set, or to 'off'.\n"),
+				    fname, zpool_get_name(zhp));
+				return (-1);
+			}
+		}
+	}
+
 	error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value);
 
 	if (!error)
@@ -9224,12 +10273,446 @@
 	*(cb.cb_value) = '\0';
 	cb.cb_value++;
 
-	error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL,
+	error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL, B_FALSE,
 	    set_callback, &cb);
 
 	return (error);
 }
 
+/* Add up the total number of bytes left to initialize/trim across all vdevs */
+static uint64_t
+vdev_activity_remaining(nvlist_t *nv, zpool_wait_activity_t activity)
+{
+	uint64_t bytes_remaining;
+	nvlist_t **child;
+	uint_t c, children;
+	vdev_stat_t *vs;
+
+	assert(activity == ZPOOL_WAIT_INITIALIZE ||
+	    activity == ZPOOL_WAIT_TRIM);
+
+	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+
+	if (activity == ZPOOL_WAIT_INITIALIZE &&
+	    vs->vs_initialize_state == VDEV_INITIALIZE_ACTIVE)
+		bytes_remaining = vs->vs_initialize_bytes_est -
+		    vs->vs_initialize_bytes_done;
+	else if (activity == ZPOOL_WAIT_TRIM &&
+	    vs->vs_trim_state == VDEV_TRIM_ACTIVE)
+		bytes_remaining = vs->vs_trim_bytes_est -
+		    vs->vs_trim_bytes_done;
+	else
+		bytes_remaining = 0;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	for (c = 0; c < children; c++)
+		bytes_remaining += vdev_activity_remaining(child[c], activity);
+
+	return (bytes_remaining);
+}
+
+/* Add up the total number of bytes left to rebuild across top-level vdevs */
+static uint64_t
+vdev_activity_top_remaining(nvlist_t *nv)
+{
+	uint64_t bytes_remaining = 0;
+	nvlist_t **child;
+	uint_t children;
+	int error;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	for (uint_t c = 0; c < children; c++) {
+		vdev_rebuild_stat_t *vrs;
+		uint_t i;
+
+		error = nvlist_lookup_uint64_array(child[c],
+		    ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i);
+		if (error == 0) {
+			if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
+				bytes_remaining += (vrs->vrs_bytes_est -
+				    vrs->vrs_bytes_rebuilt);
+			}
+		}
+	}
+
+	return (bytes_remaining);
+}
+
+/* Whether any vdevs are 'spare' or 'replacing' vdevs */
+static boolean_t
+vdev_any_spare_replacing(nvlist_t *nv)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *vdev_type;
+
+	(void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &vdev_type);
+
+	if (strcmp(vdev_type, VDEV_TYPE_REPLACING) == 0 ||
+	    strcmp(vdev_type, VDEV_TYPE_SPARE) == 0 ||
+	    strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0) {
+		return (B_TRUE);
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		children = 0;
+
+	for (c = 0; c < children; c++) {
+		if (vdev_any_spare_replacing(child[c]))
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+typedef struct wait_data {
+	char *wd_poolname;
+	boolean_t wd_scripted;
+	boolean_t wd_exact;
+	boolean_t wd_headers_once;
+	boolean_t wd_should_exit;
+	/* Which activities to wait for */
+	boolean_t wd_enabled[ZPOOL_WAIT_NUM_ACTIVITIES];
+	float wd_interval;
+	pthread_cond_t wd_cv;
+	pthread_mutex_t wd_mutex;
+} wait_data_t;
+
+/*
+ * Print to stdout a single line, containing one column for each activity that
+ * we are waiting for specifying how many bytes of work are left for that
+ * activity.
+ */
+static void
+print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row)
+{
+	nvlist_t *config, *nvroot;
+	uint_t c;
+	int i;
+	pool_checkpoint_stat_t *pcs = NULL;
+	pool_scan_stat_t *pss = NULL;
+	pool_removal_stat_t *prs = NULL;
+	char *headers[] = {"DISCARD", "FREE", "INITIALIZE", "REPLACE",
+	    "REMOVE", "RESILVER", "SCRUB", "TRIM"};
+	int col_widths[ZPOOL_WAIT_NUM_ACTIVITIES];
+
+	/* Calculate the width of each column */
+	for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) {
+		/*
+		 * Make sure we have enough space in the col for pretty-printed
+		 * numbers and for the column header, and then leave a couple
+		 * spaces between cols for readability.
+		 */
+		col_widths[i] = MAX(strlen(headers[i]), 6) + 2;
+	}
+
+	/* Print header if appropriate */
+	int term_height = terminal_height();
+	boolean_t reprint_header = (!wd->wd_headers_once && term_height > 0 &&
+	    row % (term_height-1) == 0);
+	if (!wd->wd_scripted && (row == 0 || reprint_header)) {
+		for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) {
+			if (wd->wd_enabled[i])
+				(void) printf("%*s", col_widths[i], headers[i]);
+		}
+		(void) printf("\n");
+	}
+
+	/* Bytes of work remaining in each activity */
+	int64_t bytes_rem[ZPOOL_WAIT_NUM_ACTIVITIES] = {0};
+
+	bytes_rem[ZPOOL_WAIT_FREE] =
+	    zpool_get_prop_int(zhp, ZPOOL_PROP_FREEING, NULL);
+
+	config = zpool_get_config(zhp, NULL);
+	nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+
+	(void) nvlist_lookup_uint64_array(nvroot,
+	    ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
+	if (pcs != NULL && pcs->pcs_state == CS_CHECKPOINT_DISCARDING)
+		bytes_rem[ZPOOL_WAIT_CKPT_DISCARD] = pcs->pcs_space;
+
+	(void) nvlist_lookup_uint64_array(nvroot,
+	    ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
+	if (prs != NULL && prs->prs_state == DSS_SCANNING)
+		bytes_rem[ZPOOL_WAIT_REMOVE] = prs->prs_to_copy -
+		    prs->prs_copied;
+
+	(void) nvlist_lookup_uint64_array(nvroot,
+	    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&pss, &c);
+	if (pss != NULL && pss->pss_state == DSS_SCANNING &&
+	    pss->pss_pass_scrub_pause == 0) {
+		int64_t rem = pss->pss_to_examine - pss->pss_issued;
+		if (pss->pss_func == POOL_SCAN_SCRUB)
+			bytes_rem[ZPOOL_WAIT_SCRUB] = rem;
+		else
+			bytes_rem[ZPOOL_WAIT_RESILVER] = rem;
+	} else if (check_rebuilding(nvroot, NULL)) {
+		bytes_rem[ZPOOL_WAIT_RESILVER] =
+		    vdev_activity_top_remaining(nvroot);
+	}
+
+	bytes_rem[ZPOOL_WAIT_INITIALIZE] =
+	    vdev_activity_remaining(nvroot, ZPOOL_WAIT_INITIALIZE);
+	bytes_rem[ZPOOL_WAIT_TRIM] =
+	    vdev_activity_remaining(nvroot, ZPOOL_WAIT_TRIM);
+
+	/*
+	 * A replace finishes after resilvering finishes, so the amount of work
+	 * left for a replace is the same as for resilvering.
+	 *
+	 * It isn't quite correct to say that if we have any 'spare' or
+	 * 'replacing' vdevs and a resilver is happening, then a replace is in
+	 * progress, like we do here. When a hot spare is used, the faulted vdev
+	 * is not removed after the hot spare is resilvered, so parent 'spare'
+	 * vdev is not removed either. So we could have a 'spare' vdev, but be
+	 * resilvering for a different reason. However, we use it as a heuristic
+	 * because we don't have access to the DTLs, which could tell us whether
+	 * or not we have really finished resilvering a hot spare.
+	 */
+	if (vdev_any_spare_replacing(nvroot))
+		bytes_rem[ZPOOL_WAIT_REPLACE] =  bytes_rem[ZPOOL_WAIT_RESILVER];
+
+	if (timestamp_fmt != NODATE)
+		print_timestamp(timestamp_fmt);
+
+	for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) {
+		char buf[64];
+		if (!wd->wd_enabled[i])
+			continue;
+
+		if (wd->wd_exact)
+			(void) snprintf(buf, sizeof (buf), "%" PRIi64,
+			    bytes_rem[i]);
+		else
+			zfs_nicenum(bytes_rem[i], buf, sizeof (buf));
+
+		if (wd->wd_scripted)
+			(void) printf(i == 0 ? "%s" : "\t%s", buf);
+		else
+			(void) printf(" %*s", col_widths[i] - 1, buf);
+	}
+	(void) printf("\n");
+	(void) fflush(stdout);
+}
+
+static void *
+wait_status_thread(void *arg)
+{
+	wait_data_t *wd = (wait_data_t *)arg;
+	zpool_handle_t *zhp;
+
+	if ((zhp = zpool_open(g_zfs, wd->wd_poolname)) == NULL)
+		return (void *)(1);
+
+	for (int row = 0; ; row++) {
+		boolean_t missing;
+		struct timespec timeout;
+		int ret = 0;
+		(void) clock_gettime(CLOCK_REALTIME, &timeout);
+
+		if (zpool_refresh_stats(zhp, &missing) != 0 || missing ||
+		    zpool_props_refresh(zhp) != 0) {
+			zpool_close(zhp);
+			return (void *)(uintptr_t)(missing ? 0 : 1);
+		}
+
+		print_wait_status_row(wd, zhp, row);
+
+		timeout.tv_sec += floor(wd->wd_interval);
+		long nanos = timeout.tv_nsec +
+		    (wd->wd_interval - floor(wd->wd_interval)) * NANOSEC;
+		if (nanos >= NANOSEC) {
+			timeout.tv_sec++;
+			timeout.tv_nsec = nanos - NANOSEC;
+		} else {
+			timeout.tv_nsec = nanos;
+		}
+		pthread_mutex_lock(&wd->wd_mutex);
+		if (!wd->wd_should_exit)
+			ret = pthread_cond_timedwait(&wd->wd_cv, &wd->wd_mutex,
+			    &timeout);
+		pthread_mutex_unlock(&wd->wd_mutex);
+		if (ret == 0) {
+			break; /* signaled by main thread */
+		} else if (ret != ETIMEDOUT) {
+			(void) fprintf(stderr, gettext("pthread_cond_timedwait "
+			    "failed: %s\n"), strerror(ret));
+			zpool_close(zhp);
+			return (void *)(uintptr_t)(1);
+		}
+	}
+
+	zpool_close(zhp);
+	return (void *)(0);
+}
+
+int
+zpool_do_wait(int argc, char **argv)
+{
+	boolean_t verbose = B_FALSE;
+	int c;
+	char *value;
+	int i;
+	unsigned long count;
+	pthread_t status_thr;
+	int error = 0;
+	zpool_handle_t *zhp;
+
+	wait_data_t wd;
+	wd.wd_scripted = B_FALSE;
+	wd.wd_exact = B_FALSE;
+	wd.wd_headers_once = B_FALSE;
+	wd.wd_should_exit = B_FALSE;
+
+	pthread_mutex_init(&wd.wd_mutex, NULL);
+	pthread_cond_init(&wd.wd_cv, NULL);
+
+	/* By default, wait for all types of activity. */
+	for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++)
+		wd.wd_enabled[i] = B_TRUE;
+
+	while ((c = getopt(argc, argv, "HpT:t:")) != -1) {
+		switch (c) {
+		case 'H':
+			wd.wd_scripted = B_TRUE;
+			break;
+		case 'n':
+			wd.wd_headers_once = B_TRUE;
+			break;
+		case 'p':
+			wd.wd_exact = B_TRUE;
+			break;
+		case 'T':
+			get_timestamp_arg(*optarg);
+			break;
+		case 't':
+		{
+			static char *col_subopts[] = { "discard", "free",
+			    "initialize", "replace", "remove", "resilver",
+			    "scrub", "trim", NULL };
+
+			/* Reset activities array */
+			bzero(&wd.wd_enabled, sizeof (wd.wd_enabled));
+			while (*optarg != '\0') {
+				int activity = getsubopt(&optarg, col_subopts,
+				    &value);
+
+				if (activity < 0) {
+					(void) fprintf(stderr,
+					    gettext("invalid activity '%s'\n"),
+					    value);
+					usage(B_FALSE);
+				}
+
+				wd.wd_enabled[activity] = B_TRUE;
+			}
+			break;
+		}
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	get_interval_count(&argc, argv, &wd.wd_interval, &count);
+	if (count != 0) {
+		/* This subcmd only accepts an interval, not a count */
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	if (wd.wd_interval != 0)
+		verbose = B_TRUE;
+
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing 'pool' argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 1) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	wd.wd_poolname = argv[0];
+
+	if ((zhp = zpool_open(g_zfs, wd.wd_poolname)) == NULL)
+		return (1);
+
+	if (verbose) {
+		/*
+		 * We use a separate thread for printing status updates because
+		 * the main thread will call lzc_wait(), which blocks as long
+		 * as an activity is in progress, which can be a long time.
+		 */
+		if (pthread_create(&status_thr, NULL, wait_status_thread, &wd)
+		    != 0) {
+			(void) fprintf(stderr, gettext("failed to create status"
+			    "thread: %s\n"), strerror(errno));
+			zpool_close(zhp);
+			return (1);
+		}
+	}
+
+	/*
+	 * Loop over all activities that we are supposed to wait for until none
+	 * of them are in progress. Note that this means we can end up waiting
+	 * for more activities to complete than just those that were in progress
+	 * when we began waiting; if an activity we are interested in begins
+	 * while we are waiting for another activity, we will wait for both to
+	 * complete before exiting.
+	 */
+	for (;;) {
+		boolean_t missing = B_FALSE;
+		boolean_t any_waited = B_FALSE;
+
+		for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) {
+			boolean_t waited;
+
+			if (!wd.wd_enabled[i])
+				continue;
+
+			error = zpool_wait_status(zhp, i, &missing, &waited);
+			if (error != 0 || missing)
+				break;
+
+			any_waited = (any_waited || waited);
+		}
+
+		if (error != 0 || missing || !any_waited)
+			break;
+	}
+
+	zpool_close(zhp);
+
+	if (verbose) {
+		uintptr_t status;
+		pthread_mutex_lock(&wd.wd_mutex);
+		wd.wd_should_exit = B_TRUE;
+		pthread_cond_signal(&wd.wd_cv);
+		pthread_mutex_unlock(&wd.wd_mutex);
+		(void) pthread_join(status_thr, (void *)&status);
+		if (status != 0)
+			error = status;
+	}
+
+	pthread_mutex_destroy(&wd.wd_mutex);
+	pthread_cond_destroy(&wd.wd_cv);
+	return (error);
+}
+
 static int
 find_command_idx(char *command, int *idx)
 {
@@ -9259,6 +10742,36 @@
 	return (0);
 }
 
+/*
+ * Do zpool_load_compat() and print error message on failure
+ */
+static zpool_compat_status_t
+zpool_do_load_compat(const char *compat, boolean_t *list)
+{
+	char report[1024];
+
+	zpool_compat_status_t ret;
+
+	ret = zpool_load_compat(compat, list, report, 1024);
+	switch (ret) {
+
+	case ZPOOL_COMPATIBILITY_OK:
+		break;
+
+	case ZPOOL_COMPATIBILITY_NOFILES:
+	case ZPOOL_COMPATIBILITY_BADFILE:
+	case ZPOOL_COMPATIBILITY_BADTOKEN:
+		(void) fprintf(stderr, "Error: %s\n", report);
+		break;
+
+	case ZPOOL_COMPATIBILITY_WARNTOKEN:
+		(void) fprintf(stderr, "Warning: %s\n", report);
+		ret = ZPOOL_COMPATIBILITY_OK;
+		break;
+	}
+	return (ret);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -9268,6 +10781,7 @@
 	char **newargv;
 
 	(void) setlocale(LC_ALL, "");
+	(void) setlocale(LC_NUMERIC, "C");
 	(void) textdomain(TEXT_DOMAIN);
 	srand(time(NULL));
 

diff --git a/zfs/cmd/zpool/zpool_util.c b/zfs/cmd/zpool/zpool_util.c
index c26c0eb..1c1eb02 100644
--- a/zfs/cmd/zpool/zpool_util.c
+++ b/zfs/cmd/zpool/zpool_util.c

@@ -99,20 +99,6 @@
 }
 
 /*
- * Return 1 if "str" is a number string, 0 otherwise.  Works for integer and
- * floating point numbers.
- */
-int
-isnumber(char *str)
-{
-	for (; *str; str++)
-		if (!(isdigit(*str) || (*str == '.')))
-			return (0);
-
-	return (1);
-}
-
-/*
  * Find highest one bit set.
  * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
  */

diff --git a/zfs/cmd/zpool/zpool_util.h b/zfs/cmd/zpool/zpool_util.h
index ccc2fac..da75866 100644
--- a/zfs/cmd/zpool/zpool_util.h
+++ b/zfs/cmd/zpool/zpool_util.h

@@ -27,6 +27,7 @@
 
 #include <libnvpair.h>
 #include <libzfs.h>
+#include <libzutil.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -42,7 +43,6 @@
 void zpool_no_memory(void);
 uint_t num_logs(nvlist_t *nv);
 uint64_t array64_max(uint64_t array[], unsigned int len);
-int isnumber(char *str);
 int highbit64(uint64_t i);
 int lowbit64(uint64_t i);
 
@@ -65,15 +65,14 @@
  * Pool list functions
  */
 int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **,
-    zpool_iter_f, void *);
+    boolean_t, zpool_iter_f, void *);
 
 /* Vdev list functions */
-typedef int (*pool_vdev_iter_f)(zpool_handle_t *, nvlist_t *, void *);
 int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data);
 
 typedef struct zpool_list zpool_list_t;
 
-zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *);
+zpool_list_t *pool_list_get(int, char **, zprop_list_t **, boolean_t, int *);
 void pool_list_update(zpool_list_t *);
 int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *);
 void pool_list_free(zpool_list_t *);
@@ -104,7 +103,7 @@
 	char *cmd;		/* Command to run */
 	unsigned int count;	/* Number of vdev_cmd_data items (vdevs) */
 
-	/* vars to whitelist only certain vdevs, if requested */
+	/* fields used to select only certain vdevs, if requested */
 	libzfs_handle_t *g_zfs;
 	char **vdev_names;
 	int vdev_names_count;
@@ -125,6 +124,13 @@
 
 void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl);
 
+int check_device(const char *path, boolean_t force,
+    boolean_t isspare, boolean_t iswholedisk);
+boolean_t check_sector_size_database(char *path, int *sector_size);
+void vdev_error(const char *fmt, ...);
+int check_file(const char *file, boolean_t force, boolean_t isspare);
+void after_zpool_upgrade(zpool_handle_t *zhp);
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/cmd/zpool/zpool_vdev.c b/zfs/cmd/zpool/zpool_vdev.c
index 527fca0..3d83da6 100644
--- a/zfs/cmd/zpool/zpool_vdev.c
+++ b/zfs/cmd/zpool/zpool_vdev.c

@@ -64,7 +64,6 @@
 
 #include <assert.h>
 #include <ctype.h>
-#include <devid.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libintl.h>
@@ -72,19 +71,12 @@
 #include <libzutil.h>
 #include <limits.h>
 #include <sys/spa.h>
-#include <scsi/scsi.h>
-#include <scsi/sg.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
-#include <sys/efi_partition.h>
-#include <sys/stat.h>
-#include <sys/vtoc.h>
-#include <sys/mntent.h>
-#include <uuid/uuid.h>
-#include <blkid/blkid.h>
 #include "zpool_util.h"
 #include <sys/zfs_context.h>
+#include <sys/stat.h>
 
 /*
  * For any given vdev specification, we can have multiple errors.  The
@@ -94,191 +86,8 @@
 boolean_t error_seen;
 boolean_t is_force;
 
-typedef struct vdev_disk_db_entry
-{
-	char id[24];
-	int sector_size;
-} vdev_disk_db_entry_t;
-
-/*
- * Database of block devices that lie about physical sector sizes.  The
- * identification string must be precisely 24 characters to avoid false
- * negatives
- */
-static vdev_disk_db_entry_t vdev_disk_database[] = {
-	{"ATA     ADATA SSD S396 3", 8192},
-	{"ATA     APPLE SSD SM128E", 8192},
-	{"ATA     APPLE SSD SM256E", 8192},
-	{"ATA     APPLE SSD SM512E", 8192},
-	{"ATA     APPLE SSD SM768E", 8192},
-	{"ATA     C400-MTFDDAC064M", 8192},
-	{"ATA     C400-MTFDDAC128M", 8192},
-	{"ATA     C400-MTFDDAC256M", 8192},
-	{"ATA     C400-MTFDDAC512M", 8192},
-	{"ATA     Corsair Force 3 ", 8192},
-	{"ATA     Corsair Force GS", 8192},
-	{"ATA     INTEL SSDSA2CT04", 8192},
-	{"ATA     INTEL SSDSA2BZ10", 8192},
-	{"ATA     INTEL SSDSA2BZ20", 8192},
-	{"ATA     INTEL SSDSA2BZ30", 8192},
-	{"ATA     INTEL SSDSA2CW04", 8192},
-	{"ATA     INTEL SSDSA2CW08", 8192},
-	{"ATA     INTEL SSDSA2CW12", 8192},
-	{"ATA     INTEL SSDSA2CW16", 8192},
-	{"ATA     INTEL SSDSA2CW30", 8192},
-	{"ATA     INTEL SSDSA2CW60", 8192},
-	{"ATA     INTEL SSDSC2CT06", 8192},
-	{"ATA     INTEL SSDSC2CT12", 8192},
-	{"ATA     INTEL SSDSC2CT18", 8192},
-	{"ATA     INTEL SSDSC2CT24", 8192},
-	{"ATA     INTEL SSDSC2CW06", 8192},
-	{"ATA     INTEL SSDSC2CW12", 8192},
-	{"ATA     INTEL SSDSC2CW18", 8192},
-	{"ATA     INTEL SSDSC2CW24", 8192},
-	{"ATA     INTEL SSDSC2CW48", 8192},
-	{"ATA     KINGSTON SH100S3", 8192},
-	{"ATA     KINGSTON SH103S3", 8192},
-	{"ATA     M4-CT064M4SSD2  ", 8192},
-	{"ATA     M4-CT128M4SSD2  ", 8192},
-	{"ATA     M4-CT256M4SSD2  ", 8192},
-	{"ATA     M4-CT512M4SSD2  ", 8192},
-	{"ATA     OCZ-AGILITY2    ", 8192},
-	{"ATA     OCZ-AGILITY3    ", 8192},
-	{"ATA     OCZ-VERTEX2 3.5 ", 8192},
-	{"ATA     OCZ-VERTEX3     ", 8192},
-	{"ATA     OCZ-VERTEX3 LT  ", 8192},
-	{"ATA     OCZ-VERTEX3 MI  ", 8192},
-	{"ATA     OCZ-VERTEX4     ", 8192},
-	{"ATA     SAMSUNG MZ7WD120", 8192},
-	{"ATA     SAMSUNG MZ7WD240", 8192},
-	{"ATA     SAMSUNG MZ7WD480", 8192},
-	{"ATA     SAMSUNG MZ7WD960", 8192},
-	{"ATA     SAMSUNG SSD 830 ", 8192},
-	{"ATA     Samsung SSD 840 ", 8192},
-	{"ATA     SanDisk SSD U100", 8192},
-	{"ATA     TOSHIBA THNSNH06", 8192},
-	{"ATA     TOSHIBA THNSNH12", 8192},
-	{"ATA     TOSHIBA THNSNH25", 8192},
-	{"ATA     TOSHIBA THNSNH51", 8192},
-	{"ATA     APPLE SSD TS064C", 4096},
-	{"ATA     APPLE SSD TS128C", 4096},
-	{"ATA     APPLE SSD TS256C", 4096},
-	{"ATA     APPLE SSD TS512C", 4096},
-	{"ATA     INTEL SSDSA2M040", 4096},
-	{"ATA     INTEL SSDSA2M080", 4096},
-	{"ATA     INTEL SSDSA2M160", 4096},
-	{"ATA     INTEL SSDSC2MH12", 4096},
-	{"ATA     INTEL SSDSC2MH25", 4096},
-	{"ATA     OCZ CORE_SSD    ", 4096},
-	{"ATA     OCZ-VERTEX      ", 4096},
-	{"ATA     SAMSUNG MCCOE32G", 4096},
-	{"ATA     SAMSUNG MCCOE64G", 4096},
-	{"ATA     SAMSUNG SSD PM80", 4096},
-	/* Flash drives optimized for 4KB IOs on larger pages */
-	{"ATA     INTEL SSDSC2BA10", 4096},
-	{"ATA     INTEL SSDSC2BA20", 4096},
-	{"ATA     INTEL SSDSC2BA40", 4096},
-	{"ATA     INTEL SSDSC2BA80", 4096},
-	{"ATA     INTEL SSDSC2BB08", 4096},
-	{"ATA     INTEL SSDSC2BB12", 4096},
-	{"ATA     INTEL SSDSC2BB16", 4096},
-	{"ATA     INTEL SSDSC2BB24", 4096},
-	{"ATA     INTEL SSDSC2BB30", 4096},
-	{"ATA     INTEL SSDSC2BB40", 4096},
-	{"ATA     INTEL SSDSC2BB48", 4096},
-	{"ATA     INTEL SSDSC2BB60", 4096},
-	{"ATA     INTEL SSDSC2BB80", 4096},
-	{"ATA     INTEL SSDSC2BW24", 4096},
-	{"ATA     INTEL SSDSC2BW48", 4096},
-	{"ATA     INTEL SSDSC2BP24", 4096},
-	{"ATA     INTEL SSDSC2BP48", 4096},
-	{"NA      SmrtStorSDLKAE9W", 4096},
-	{"NVMe    Amazon EC2 NVMe ", 4096},
-	/* Imported from Open Solaris */
-	{"ATA     MARVELL SD88SA02", 4096},
-	/* Advanced format Hard drives */
-	{"ATA     Hitachi HDS5C303", 4096},
-	{"ATA     SAMSUNG HD204UI ", 4096},
-	{"ATA     ST2000DL004 HD20", 4096},
-	{"ATA     WDC WD10EARS-00M", 4096},
-	{"ATA     WDC WD10EARS-00S", 4096},
-	{"ATA     WDC WD10EARS-00Z", 4096},
-	{"ATA     WDC WD15EARS-00M", 4096},
-	{"ATA     WDC WD15EARS-00S", 4096},
-	{"ATA     WDC WD15EARS-00Z", 4096},
-	{"ATA     WDC WD20EARS-00M", 4096},
-	{"ATA     WDC WD20EARS-00S", 4096},
-	{"ATA     WDC WD20EARS-00Z", 4096},
-	{"ATA     WDC WD1600BEVT-0", 4096},
-	{"ATA     WDC WD2500BEVT-0", 4096},
-	{"ATA     WDC WD3200BEVT-0", 4096},
-	{"ATA     WDC WD5000BEVT-0", 4096},
-	/* Virtual disks: Assume zvols with default volblocksize */
-#if 0
-	{"ATA     QEMU HARDDISK   ", 8192},
-	{"IET     VIRTUAL-DISK    ", 8192},
-	{"OI      COMSTAR         ", 8192},
-	{"SUN     COMSTAR         ", 8192},
-	{"NETAPP  LUN             ", 8192},
-#endif
-};
-
-static const int vdev_disk_database_size =
-	sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
-
-#define	INQ_REPLY_LEN	96
-#define	INQ_CMD_LEN	6
-
-static boolean_t
-check_sector_size_database(char *path, int *sector_size)
-{
-	unsigned char inq_buff[INQ_REPLY_LEN];
-	unsigned char sense_buffer[32];
-	unsigned char inq_cmd_blk[INQ_CMD_LEN] =
-	    {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
-	sg_io_hdr_t io_hdr;
-	int error;
-	int fd;
-	int i;
-
-	/* Prepare INQUIRY command */
-	memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
-	io_hdr.interface_id = 'S';
-	io_hdr.cmd_len = sizeof (inq_cmd_blk);
-	io_hdr.mx_sb_len = sizeof (sense_buffer);
-	io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-	io_hdr.dxfer_len = INQ_REPLY_LEN;
-	io_hdr.dxferp = inq_buff;
-	io_hdr.cmdp = inq_cmd_blk;
-	io_hdr.sbp = sense_buffer;
-	io_hdr.timeout = 10;		/* 10 milliseconds is ample time */
-
-	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
-		return (B_FALSE);
-
-	error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
-
-	(void) close(fd);
-
-	if (error < 0)
-		return (B_FALSE);
-
-	if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
-		return (B_FALSE);
-
-	for (i = 0; i < vdev_disk_database_size; i++) {
-		if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
-			continue;
-
-		*sector_size = vdev_disk_database[i].sector_size;
-		return (B_TRUE);
-	}
-
-	return (B_FALSE);
-}
-
 /*PRINTFLIKE1*/
-static void
+void
 vdev_error(const char *fmt, ...)
 {
 	va_list ap;
@@ -303,7 +112,7 @@
  * Check that a file is valid.  All we can do in this case is check that it's
  * not in use by another pool, and not in use by swap.
  */
-static int
+int
 check_file(const char *file, boolean_t force, boolean_t isspare)
 {
 	char  *name;
@@ -367,150 +176,6 @@
 	return (ret);
 }
 
-static int
-check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
-{
-	int err;
-	char *value;
-
-	/* No valid type detected device is safe to use */
-	value = blkid_get_tag_value(cache, "TYPE", path);
-	if (value == NULL)
-		return (0);
-
-	/*
-	 * If libblkid detects a ZFS device, we check the device
-	 * using check_file() to see if it's safe.  The one safe
-	 * case is a spare device shared between multiple pools.
-	 */
-	if (strcmp(value, "zfs_member") == 0) {
-		err = check_file(path, force, isspare);
-	} else {
-		if (force) {
-			err = 0;
-		} else {
-			err = -1;
-			vdev_error(gettext("%s contains a filesystem of "
-			    "type '%s'\n"), path, value);
-		}
-	}
-
-	free(value);
-
-	return (err);
-}
-
-/*
- * Validate that a disk including all partitions are safe to use.
- *
- * For EFI labeled disks this can done relatively easily with the libefi
- * library.  The partition numbers are extracted from the label and used
- * to generate the expected /dev/ paths.  Each partition can then be
- * checked for conflicts.
- *
- * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
- * but due to the lack of a readily available libraries this scanning is
- * not implemented.  Instead only the device path as given is checked.
- */
-static int
-check_disk(const char *path, blkid_cache cache, int force,
-    boolean_t isspare, boolean_t iswholedisk)
-{
-	struct dk_gpt *vtoc;
-	char slice_path[MAXPATHLEN];
-	int err = 0;
-	int fd, i;
-	int flags = O_RDONLY|O_DIRECT;
-
-	if (!iswholedisk)
-		return (check_slice(path, cache, force, isspare));
-
-	/* only spares can be shared, other devices require exclusive access */
-	if (!isspare)
-		flags |= O_EXCL;
-
-	if ((fd = open(path, flags)) < 0) {
-		char *value = blkid_get_tag_value(cache, "TYPE", path);
-		(void) fprintf(stderr, gettext("%s is in use and contains "
-		    "a %s filesystem.\n"), path, value ? value : "unknown");
-		free(value);
-		return (-1);
-	}
-
-	/*
-	 * Expected to fail for non-EFI labeled disks.  Just check the device
-	 * as given and do not attempt to detect and scan partitions.
-	 */
-	err = efi_alloc_and_read(fd, &vtoc);
-	if (err) {
-		(void) close(fd);
-		return (check_slice(path, cache, force, isspare));
-	}
-
-	/*
-	 * The primary efi partition label is damaged however the secondary
-	 * label at the end of the device is intact.  Rather than use this
-	 * label we should play it safe and treat this as a non efi device.
-	 */
-	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
-		efi_free(vtoc);
-		(void) close(fd);
-
-		if (force) {
-			/* Partitions will now be created using the backup */
-			return (0);
-		} else {
-			vdev_error(gettext("%s contains a corrupt primary "
-			    "EFI label.\n"), path);
-			return (-1);
-		}
-	}
-
-	for (i = 0; i < vtoc->efi_nparts; i++) {
-
-		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
-		    uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
-			continue;
-
-		if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
-			(void) snprintf(slice_path, sizeof (slice_path),
-			    "%s%s%d", path, "-part", i+1);
-		else
-			(void) snprintf(slice_path, sizeof (slice_path),
-			    "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
-			    "p" : "", i+1);
-
-		err = check_slice(slice_path, cache, force, isspare);
-		if (err)
-			break;
-	}
-
-	efi_free(vtoc);
-	(void) close(fd);
-
-	return (err);
-}
-
-static int
-check_device(const char *path, boolean_t force,
-    boolean_t isspare, boolean_t iswholedisk)
-{
-	blkid_cache cache;
-	int error;
-
-	error = blkid_get_cache(&cache, NULL);
-	if (error != 0) {
-		(void) fprintf(stderr, gettext("unable to access the blkid "
-		    "cache.\n"));
-		return (-1);
-	}
-
-	error = check_disk(path, cache, force, isspare, iswholedisk);
-	blkid_put_cache(cache);
-
-	return (error);
-}
-
 /*
  * This may be a shorthand device path or it could be total gibberish.
  * Check to see if it is a known device available in zfs_vdev_paths.
@@ -554,6 +219,9 @@
 	uint_t i, nspares;
 	boolean_t inuse;
 
+	if (zpool_is_draid_spare(path))
+		return (B_TRUE);
+
 	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
 		return (B_FALSE);
 
@@ -599,9 +267,10 @@
  *	/dev/xxx	Complete disk path
  *	/xxx		Full path to file
  *	xxx		Shorthand for <zfs_vdev_paths>/xxx
+ *	draid*		Virtual dRAID spare
  */
 static nvlist_t *
-make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
+make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
 {
 	char path[MAXPATHLEN];
 	struct stat64 statbuf;
@@ -641,6 +310,17 @@
 
 		/* After whole disk check restore original passed path */
 		strlcpy(path, arg, sizeof (path));
+	} else if (zpool_is_draid_spare(arg)) {
+		if (!is_primary) {
+			(void) fprintf(stderr,
+			    gettext("cannot open '%s': dRAID spares can only "
+			    "be used to replace primary vdevs\n"), arg);
+			return (NULL);
+		}
+
+		wholedisk = B_TRUE;
+		strlcpy(path, arg, sizeof (path));
+		type = VDEV_TYPE_DRAID_SPARE;
 	} else {
 		err = is_shorthand_path(arg, path, sizeof (path),
 		    &statbuf, &wholedisk);
@@ -669,17 +349,19 @@
 		}
 	}
 
-	/*
-	 * Determine whether this is a device or a file.
-	 */
-	if (wholedisk || S_ISBLK(statbuf.st_mode)) {
-		type = VDEV_TYPE_DISK;
-	} else if (S_ISREG(statbuf.st_mode)) {
-		type = VDEV_TYPE_FILE;
-	} else {
-		(void) fprintf(stderr, gettext("cannot use '%s': must be a "
-		    "block device or regular file\n"), path);
-		return (NULL);
+	if (type == NULL) {
+		/*
+		 * Determine whether this is a device or a file.
+		 */
+		if (wholedisk || S_ISBLK(statbuf.st_mode)) {
+			type = VDEV_TYPE_DISK;
+		} else if (S_ISREG(statbuf.st_mode)) {
+			type = VDEV_TYPE_FILE;
+		} else {
+			fprintf(stderr, gettext("cannot use '%s': must "
+			    "be a block device or regular file\n"), path);
+			return (NULL);
+		}
 	}
 
 	/*
@@ -690,10 +372,7 @@
 	verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
-	verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
-	if (is_log)
-		verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS,
-		    VDEV_ALLOC_BIAS_LOG) == 0);
+
 	if (strcmp(type, VDEV_TYPE_DISK) == 0)
 		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 		    (uint64_t)wholedisk) == 0);
@@ -764,11 +443,16 @@
 
 #define	ZPOOL_FUZZ	(16 * 1024 * 1024)
 
+/*
+ * N.B. For the purposes of comparing replication levels dRAID can be
+ * considered functionally equivalent to raidz.
+ */
 static boolean_t
 is_raidz_mirror(replication_level_t *a, replication_level_t *b,
     replication_level_t **raidz, replication_level_t **mirror)
 {
-	if (strcmp(a->zprl_type, "raidz") == 0 &&
+	if ((strcmp(a->zprl_type, "raidz") == 0 ||
+	    strcmp(a->zprl_type, "draid") == 0) &&
 	    strcmp(b->zprl_type, "mirror") == 0) {
 		*raidz = a;
 		*mirror = b;
@@ -778,6 +462,22 @@
 }
 
 /*
+ * Comparison for determining if dRAID and raidz where passed in either order.
+ */
+static boolean_t
+is_raidz_draid(replication_level_t *a, replication_level_t *b)
+{
+	if ((strcmp(a->zprl_type, "raidz") == 0 ||
+	    strcmp(a->zprl_type, "draid") == 0) &&
+	    (strcmp(b->zprl_type, "raidz") == 0 ||
+	    strcmp(b->zprl_type, "draid") == 0)) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
  * Given a list of toplevel vdevs, return the current replication level.  If
  * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
  * an error message will be displayed for each self-inconsistent vdev.
@@ -843,7 +543,8 @@
 			rep.zprl_type = type;
 			rep.zprl_children = 0;
 
-			if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+			if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
+			    strcmp(type, VDEV_TYPE_DRAID) == 0) {
 				verify(nvlist_lookup_uint64(nv,
 				    ZPOOL_CONFIG_NPARITY,
 				    &rep.zprl_parity) == 0);
@@ -956,7 +657,7 @@
 				 */
 				if (!dontreport &&
 				    (vdev_size != -1LL &&
-				    (labs(size - vdev_size) >
+				    (llabs(size - vdev_size) >
 				    ZPOOL_FUZZ))) {
 					if (ret != NULL)
 						free(ret);
@@ -1009,6 +710,29 @@
 					else
 						return (NULL);
 				}
+			} else if (is_raidz_draid(&lastrep, &rep)) {
+				/*
+				 * Accepted raidz and draid when they can
+				 * handle the same number of disk failures.
+				 */
+				if (lastrep.zprl_parity != rep.zprl_parity) {
+					if (ret != NULL)
+						free(ret);
+					ret = NULL;
+					if (fatal)
+						vdev_error(gettext(
+						    "mismatched replication "
+						    "level: %s and %s vdevs "
+						    "with different "
+						    "redundancy, %llu vs. "
+						    "%llu are present\n"),
+						    lastrep.zprl_type,
+						    rep.zprl_type,
+						    lastrep.zprl_parity,
+						    rep.zprl_parity);
+					else
+						return (NULL);
+				}
 			} else if (strcmp(lastrep.zprl_type, rep.zprl_type) !=
 			    0) {
 				if (ret != NULL)
@@ -1273,6 +997,10 @@
 		if (fd == -1) {
 			if (errno == EBUSY)
 				is_exclusive = 1;
+#ifdef __FreeBSD__
+			if (errno == EPERM)
+				is_exclusive = 1;
+#endif
 		} else {
 			(void) close(fd);
 		}
@@ -1431,31 +1159,87 @@
 	return (anyinuse);
 }
 
+/*
+ * Returns the parity level extracted from a raidz or draid type.
+ * If the parity cannot be determined zero is returned.
+ */
+static int
+get_parity(const char *type)
+{
+	long parity = 0;
+	const char *p;
+
+	if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0) {
+		p = type + strlen(VDEV_TYPE_RAIDZ);
+
+		if (*p == '\0') {
+			/* when unspecified default to single parity */
+			return (1);
+		} else if (*p == '0') {
+			/* no zero prefixes allowed */
+			return (0);
+		} else {
+			/* 0-3, no suffixes allowed */
+			char *end;
+			errno = 0;
+			parity = strtol(p, &end, 10);
+			if (errno != 0 || *end != '\0' ||
+			    parity < 1 || parity > VDEV_RAIDZ_MAXPARITY) {
+				return (0);
+			}
+		}
+	} else if (strncmp(type, VDEV_TYPE_DRAID,
+	    strlen(VDEV_TYPE_DRAID)) == 0) {
+		p = type + strlen(VDEV_TYPE_DRAID);
+
+		if (*p == '\0' || *p == ':') {
+			/* when unspecified default to single parity */
+			return (1);
+		} else if (*p == '0') {
+			/* no zero prefixes allowed */
+			return (0);
+		} else {
+			/* 0-3, allowed suffixes: '\0' or ':' */
+			char *end;
+			errno = 0;
+			parity = strtol(p, &end, 10);
+			if (errno != 0 ||
+			    parity < 1 || parity > VDEV_DRAID_MAXPARITY ||
+			    (*end != '\0' && *end != ':')) {
+				return (0);
+			}
+		}
+	}
+
+	return ((int)parity);
+}
+
+/*
+ * Assign the minimum and maximum number of devices allowed for
+ * the specified type.  On error NULL is returned, otherwise the
+ * type prefix is returned (raidz, mirror, etc).
+ */
 static const char *
 is_grouping(const char *type, int *mindev, int *maxdev)
 {
-	if (strncmp(type, "raidz", 5) == 0) {
-		const char *p = type + 5;
-		char *end;
-		long nparity;
+	int nparity;
 
-		if (*p == '\0') {
-			nparity = 1;
-		} else if (*p == '0') {
-			return (NULL); /* no zero prefixes allowed */
-		} else {
-			errno = 0;
-			nparity = strtol(p, &end, 10);
-			if (errno != 0 || nparity < 1 || nparity >= 255 ||
-			    *end != '\0')
-				return (NULL);
-		}
-
+	if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+	    strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0) {
+		nparity = get_parity(type);
+		if (nparity == 0)
+			return (NULL);
 		if (mindev != NULL)
 			*mindev = nparity + 1;
 		if (maxdev != NULL)
 			*maxdev = 255;
-		return (VDEV_TYPE_RAIDZ);
+
+		if (strncmp(type, VDEV_TYPE_RAIDZ,
+		    strlen(VDEV_TYPE_RAIDZ)) == 0) {
+			return (VDEV_TYPE_RAIDZ);
+		} else {
+			return (VDEV_TYPE_DRAID);
+		}
 	}
 
 	if (maxdev != NULL)
@@ -1496,18 +1280,175 @@
 }
 
 /*
+ * Extract the configuration parameters encoded in the dRAID type and
+ * use them to generate a dRAID configuration.  The expected format is:
+ *
+ * draid[<parity>][:<data><d|D>][:<children><c|C>][:<spares><s|S>]
+ *
+ * The intent is to be able to generate a good configuration when no
+ * additional information is provided.  The only mandatory component
+ * of the 'type' is the 'draid' prefix.  If a value is not provided
+ * then reasonable defaults are used.  The optional components may
+ * appear in any order but the d/s/c suffix is required.
+ *
+ * Valid inputs:
+ * - data:     number of data devices per group (1-255)
+ * - parity:   number of parity blocks per group (1-3)
+ * - spares:   number of distributed spare (0-100)
+ * - children: total number of devices (1-255)
+ *
+ * Examples:
+ * - zpool create tank draid <devices...>
+ * - zpool create tank draid2:8d:51c:2s <devices...>
+ */
+static int
+draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
+{
+	uint64_t nparity = 1;
+	uint64_t nspares = 0;
+	uint64_t ndata = UINT64_MAX;
+	uint64_t ngroups = 1;
+	long value;
+
+	if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) != 0)
+		return (EINVAL);
+
+	nparity = (uint64_t)get_parity(type);
+	if (nparity == 0)
+		return (EINVAL);
+
+	char *p = (char *)type;
+	while ((p = strchr(p, ':')) != NULL) {
+		char *end;
+
+		p = p + 1;
+		errno = 0;
+
+		if (!isdigit(p[0])) {
+			(void) fprintf(stderr, gettext("invalid dRAID "
+			    "syntax; expected [:<number><c|d|s>] not '%s'\n"),
+			    type);
+			return (EINVAL);
+		}
+
+		/* Expected non-zero value with c/d/s suffix */
+		value = strtol(p, &end, 10);
+		char suffix = tolower(*end);
+		if (errno != 0 ||
+		    (suffix != 'c' && suffix != 'd' && suffix != 's')) {
+			(void) fprintf(stderr, gettext("invalid dRAID "
+			    "syntax; expected [:<number><c|d|s>] not '%s'\n"),
+			    type);
+			return (EINVAL);
+		}
+
+		if (suffix == 'c') {
+			if ((uint64_t)value != children) {
+				fprintf(stderr,
+				    gettext("invalid number of dRAID children; "
+				    "%llu required but %llu provided\n"),
+				    (u_longlong_t)value,
+				    (u_longlong_t)children);
+				return (EINVAL);
+			}
+		} else if (suffix == 'd') {
+			ndata = (uint64_t)value;
+		} else if (suffix == 's') {
+			nspares = (uint64_t)value;
+		} else {
+			verify(0); /* Unreachable */
+		}
+	}
+
+	/*
+	 * When a specific number of data disks is not provided limit a
+	 * redundancy group to 8 data disks.  This value was selected to
+	 * provide a reasonable tradeoff between capacity and performance.
+	 */
+	if (ndata == UINT64_MAX) {
+		if (children > nspares + nparity) {
+			ndata = MIN(children - nspares - nparity, 8);
+		} else {
+			fprintf(stderr, gettext("request number of "
+			    "distributed spares %llu and parity level %llu\n"
+			    "leaves no disks available for data\n"),
+			    (u_longlong_t)nspares, (u_longlong_t)nparity);
+			return (EINVAL);
+		}
+	}
+
+	/* Verify the maximum allowed group size is never exceeded. */
+	if (ndata == 0 || (ndata + nparity > children - nspares)) {
+		fprintf(stderr, gettext("requested number of dRAID data "
+		    "disks per group %llu is too high,\nat most %llu disks "
+		    "are available for data\n"), (u_longlong_t)ndata,
+		    (u_longlong_t)(children - nspares - nparity));
+		return (EINVAL);
+	}
+
+	if (nparity == 0 || nparity > VDEV_DRAID_MAXPARITY) {
+		fprintf(stderr,
+		    gettext("invalid dRAID parity level %llu; must be "
+		    "between 1 and %d\n"), (u_longlong_t)nparity,
+		    VDEV_DRAID_MAXPARITY);
+		return (EINVAL);
+	}
+
+	/*
+	 * Verify the requested number of spares can be satisfied.
+	 * An arbitrary limit of 100 distributed spares is applied.
+	 */
+	if (nspares > 100 || nspares > (children - (ndata + nparity))) {
+		fprintf(stderr,
+		    gettext("invalid number of dRAID spares %llu; additional "
+		    "disks would be required\n"), (u_longlong_t)nspares);
+		return (EINVAL);
+	}
+
+	/* Verify the requested number children is sufficient. */
+	if (children < (ndata + nparity + nspares)) {
+		fprintf(stderr, gettext("%llu disks were provided, but at "
+		    "least %llu disks are required for this config\n"),
+		    (u_longlong_t)children,
+		    (u_longlong_t)(ndata + nparity + nspares));
+	}
+
+	if (children > VDEV_DRAID_MAX_CHILDREN) {
+		fprintf(stderr, gettext("%llu disks were provided, but "
+		    "dRAID only supports up to %u disks"),
+		    (u_longlong_t)children, VDEV_DRAID_MAX_CHILDREN);
+	}
+
+	/*
+	 * Calculate the minimum number of groups required to fill a slice.
+	 * This is the LCM of the stripe width (ndata + nparity) and the
+	 * number of data drives (children - nspares).
+	 */
+	while (ngroups * (ndata + nparity) % (children - nspares) != 0)
+		ngroups++;
+
+	/* Store the basic dRAID configuration. */
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, nparity);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, ndata);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
+
+	return (0);
+}
+
+/*
  * Construct a syntactically valid vdev specification,
  * and ensure that all devices and files exist and can be opened.
  * Note: we don't bother freeing anything in the error paths
  * because the program is just going to exit anyway.
  */
-nvlist_t *
+static nvlist_t *
 construct_spec(nvlist_t *props, int argc, char **argv)
 {
 	nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
 	int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
-	const char *type;
-	uint64_t is_log, is_special, is_dedup;
+	const char *type, *fulltype;
+	boolean_t is_log, is_special, is_dedup, is_spare;
 	boolean_t seen_logs;
 
 	top = NULL;
@@ -1517,18 +1458,20 @@
 	nspares = 0;
 	nlogs = 0;
 	nl2cache = 0;
-	is_log = is_special = is_dedup = B_FALSE;
+	is_log = is_special = is_dedup = is_spare = B_FALSE;
 	seen_logs = B_FALSE;
 	nvroot = NULL;
 
 	while (argc > 0) {
+		fulltype = argv[0];
 		nv = NULL;
 
 		/*
-		 * If it's a mirror or raidz, the subsequent arguments are
-		 * its leaves -- until we encounter the next mirror or raidz.
+		 * If it's a mirror, raidz, or draid the subsequent arguments
+		 * are its leaves -- until we encounter the next mirror,
+		 * raidz or draid.
 		 */
-		if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
+		if ((type = is_grouping(fulltype, &mindev, &maxdev)) != NULL) {
 			nvlist_t **child = NULL;
 			int c, children = 0;
 
@@ -1540,6 +1483,7 @@
 					    "specified only once\n"));
 					goto spec_out;
 				}
+				is_spare = B_TRUE;
 				is_log = is_special = is_dedup = B_FALSE;
 			}
 
@@ -1553,8 +1497,7 @@
 				}
 				seen_logs = B_TRUE;
 				is_log = B_TRUE;
-				is_special = B_FALSE;
-				is_dedup = B_FALSE;
+				is_special = is_dedup = is_spare = B_FALSE;
 				argc--;
 				argv++;
 				/*
@@ -1566,8 +1509,7 @@
 
 			if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
 				is_special = B_TRUE;
-				is_log = B_FALSE;
-				is_dedup = B_FALSE;
+				is_log = is_dedup = is_spare = B_FALSE;
 				argc--;
 				argv++;
 				continue;
@@ -1575,8 +1517,7 @@
 
 			if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
 				is_dedup = B_TRUE;
-				is_log = B_FALSE;
-				is_special = B_FALSE;
+				is_log = is_special = is_spare = B_FALSE;
 				argc--;
 				argv++;
 				continue;
@@ -1590,7 +1531,8 @@
 					    "specified only once\n"));
 					goto spec_out;
 				}
-				is_log = is_special = is_dedup = B_FALSE;
+				is_log = is_special = B_FALSE;
+				is_dedup = is_spare = B_FALSE;
 			}
 
 			if (is_log || is_special || is_dedup) {
@@ -1608,13 +1550,15 @@
 			for (c = 1; c < argc; c++) {
 				if (is_grouping(argv[c], NULL, NULL) != NULL)
 					break;
+
 				children++;
 				child = realloc(child,
 				    children * sizeof (nvlist_t *));
 				if (child == NULL)
 					zpool_no_memory();
 				if ((nv = make_leaf_vdev(props, argv[c],
-				    B_FALSE)) == NULL) {
+				    !(is_log || is_special || is_dedup ||
+				    is_spare))) == NULL) {
 					for (c = 0; c < children - 1; c++)
 						nvlist_free(child[c]);
 					free(child);
@@ -1663,10 +1607,11 @@
 				    type) == 0);
 				verify(nvlist_add_uint64(nv,
 				    ZPOOL_CONFIG_IS_LOG, is_log) == 0);
-				if (is_log)
+				if (is_log) {
 					verify(nvlist_add_string(nv,
 					    ZPOOL_CONFIG_ALLOCATION_BIAS,
 					    VDEV_ALLOC_BIAS_LOG) == 0);
+				}
 				if (is_special) {
 					verify(nvlist_add_string(nv,
 					    ZPOOL_CONFIG_ALLOCATION_BIAS,
@@ -1682,6 +1627,15 @@
 					    ZPOOL_CONFIG_NPARITY,
 					    mindev - 1) == 0);
 				}
+				if (strcmp(type, VDEV_TYPE_DRAID) == 0) {
+					if (draid_config_by_type(nv,
+					    fulltype, children) != 0) {
+						for (c = 0; c < children; c++)
+							nvlist_free(child[c]);
+						free(child);
+						goto spec_out;
+					}
+				}
 				verify(nvlist_add_nvlist_array(nv,
 				    ZPOOL_CONFIG_CHILDREN, child,
 				    children) == 0);
@@ -1695,12 +1649,19 @@
 			 * We have a device.  Pass off to make_leaf_vdev() to
 			 * construct the appropriate nvlist describing the vdev.
 			 */
-			if ((nv = make_leaf_vdev(props, argv[0],
-			    is_log)) == NULL)
+			if ((nv = make_leaf_vdev(props, argv[0], !(is_log ||
+			    is_special || is_dedup || is_spare))) == NULL)
 				goto spec_out;
 
-			if (is_log)
+			verify(nvlist_add_uint64(nv,
+			    ZPOOL_CONFIG_IS_LOG, is_log) == 0);
+			if (is_log) {
+				verify(nvlist_add_string(nv,
+				    ZPOOL_CONFIG_ALLOCATION_BIAS,
+				    VDEV_ALLOC_BIAS_LOG) == 0);
 				nlogs++;
+			}
+
 			if (is_special) {
 				verify(nvlist_add_string(nv,
 				    ZPOOL_CONFIG_ALLOCATION_BIAS,

diff --git a/zfs/cmd/zpool_influxdb/.gitignore b/zfs/cmd/zpool_influxdb/.gitignore
new file mode 100644
index 0000000..bd765d1
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/.gitignore

@@ -0,0 +1 @@
+/zpool_influxdb

diff --git a/zfs/cmd/zpool_influxdb/Makefile.am b/zfs/cmd/zpool_influxdb/Makefile.am
new file mode 100644
index 0000000..a592175
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/Makefile.am

@@ -0,0 +1,13 @@
+include $(top_srcdir)/config/Rules.am
+
+zfsexec_PROGRAMS = zpool_influxdb
+
+zpool_influxdb_SOURCES = \
+	zpool_influxdb.c
+
+zpool_influxdb_LDADD = \
+	$(top_builddir)/lib/libspl/libspl.la \
+	$(top_builddir)/lib/libnvpair/libnvpair.la \
+	$(top_builddir)/lib/libzfs/libzfs.la
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zpool_influxdb/README.md b/zfs/cmd/zpool_influxdb/README.md
new file mode 100644
index 0000000..864d674
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/README.md

@@ -0,0 +1,294 @@
+# Influxdb Metrics for ZFS Pools
+The _zpool_influxdb_ program produces
+[influxdb](https://github.com/influxdata/influxdb) line protocol
+compatible metrics from zpools. In the UNIX tradition, _zpool_influxdb_
+does one thing: read statistics from a pool and print them to
+stdout. In many ways, this is a metrics-friendly output of
+statistics normally observed via the `zpool` command.
+
+## Usage
+When run without arguments, _zpool_influxdb_ runs once, reading data
+from all imported pools, and prints to stdout.
+```shell
+zpool_influxdb [options] [poolname]
+```
+If no poolname is specified, then all pools are sampled.
+
+| option | short option | description |
+|---|---|---|
+| --execd | -e | For use with telegraf's `execd` plugin. When [enter] is pressed, the pools are sampled. To exit, use [ctrl+D] |
+| --no-histogram | -n | Do not print histogram information |
+| --signed-int | -i | Use signed integer data type (default=unsigned) |
+| --sum-histogram-buckets | -s | Sum histogram bucket values |
+| --tags key=value[,key=value...] | -t | Add tags to data points. No tag sanity checking is performed. |
+| --help | -h | Print a short usage message |
+
+#### Histogram Bucket Values
+The histogram data collected by ZFS is stored as independent bucket values.
+This works well out-of-the-box with an influxdb data source and grafana's
+heatmap visualization. The influxdb query for a grafana heatmap
+visualization looks like:
+```
+field(disk_read) last() non_negative_derivative(1s)
+```
+
+Another method for storing histogram data sums the values for lower-value
+buckets. For example, a latency bucket tagged "le=10" includes the values
+in the bucket "le=1".
+This method is often used for prometheus histograms.
+The `zpool_influxdb --sum-histogram-buckets` option presents the data from ZFS
+as summed values.
+
+## Measurements
+The following measurements are collected:
+
+| measurement | description | zpool equivalent |
+|---|---|---|
+| zpool_stats | general size and data | zpool list |
+| zpool_scan_stats | scrub, rebuild, and resilver statistics (omitted if no scan has been requested) | zpool status |
+| zpool_vdev_stats | per-vdev statistics | zpool iostat -q |
+| zpool_io_size | per-vdev I/O size histogram | zpool iostat -r |
+| zpool_latency | per-vdev I/O latency histogram | zpool iostat -w |
+| zpool_vdev_queue | per-vdev instantaneous queue depth | zpool iostat -q |
+
+### zpool_stats Description
+zpool_stats contains top-level summary statistics for the pool.
+Performance counters measure the I/Os to the pool's devices.
+
+#### zpool_stats Tags
+
+| label | description |
+|---|---|
+| name | pool name |
+| path | for leaf vdevs, the pathname |
+| state | pool state, as shown by _zpool status_ |
+| vdev | vdev name (root = entire pool) |
+
+#### zpool_stats Fields
+
+| field | units | description |
+|---|---|---|
+| alloc | bytes | allocated space |
+| free | bytes | unallocated space |
+| size | bytes | total pool size |
+| read_bytes | bytes | bytes read since pool import |
+| read_errors | count | number of read errors |
+| read_ops | count | number of read operations |
+| write_bytes | bytes | bytes written since pool import |
+| write_errors | count | number of write errors |
+| write_ops | count | number of write operations |
+
+### zpool_scan_stats Description
+Once a pool has been scrubbed, resilvered, or rebuilt, the zpool_scan_stats
+contain information about the status and performance of the operation.
+Otherwise, the zpool_scan_stats do not exist in the kernel, and therefore
+cannot be reported by this collector.
+
+#### zpool_scan_stats Tags
+
+| label | description |
+|---|---|
+| name | pool name |
+| function | name of the scan function running or recently completed |
+| state | scan state, as shown by _zpool status_ |
+
+#### zpool_scan_stats Fields
+
+| field | units | description |
+|---|---|---|
+| errors | count | number of errors encountered by scan |
+| examined | bytes | total data examined during scan |
+| to_examine | bytes | prediction of total bytes to be scanned |
+| pass_examined | bytes | data examined during current scan pass |
+| issued | bytes | size of I/Os issued to disks |
+| pass_issued | bytes | size of I/Os issued to disks for current pass |
+| processed | bytes | data reconstructed during scan |
+| to_process | bytes | total bytes to be repaired |
+| rate | bytes/sec | examination rate |
+| start_ts | epoch timestamp | start timestamp for scan |
+| pause_ts | epoch timestamp | timestamp for a scan pause request |
+| end_ts | epoch timestamp | completion timestamp for scan |
+| paused_t | seconds | elapsed time while paused |
+| remaining_t | seconds | estimate of time remaining for scan |
+
+### zpool_vdev_stats Description
+The ZFS I/O (ZIO) scheduler uses five queues to schedule I/Os to each vdev.
+These queues are further divided into active and pending states.
+An I/O is pending prior to being issued to the vdev. An active
+I/O has been issued to the vdev. The scheduler and its tunable
+parameters are described at the
+[ZFS documentation for ZIO Scheduler]
+(https://openzfs.github.io/openzfs-docs/Performance%20and%20Tuning/ZIO%20Scheduler.html)
+The ZIO scheduler reports the queue depths as gauges where the value
+represents an instantaneous snapshot of the queue depth at
+the sample time. Therefore, it is not unusual to see all zeroes
+for an idle pool.
+
+#### zpool_vdev_stats Tags
+| label | description |
+|---|---|
+| name | pool name |
+| vdev | vdev name (root = entire pool) |
+
+#### zpool_vdev_stats Fields
+| field | units | description |
+|---|---|---|
+| sync_r_active_queue | entries | synchronous read active queue depth |
+| sync_w_active_queue | entries | synchronous write active queue depth |
+| async_r_active_queue | entries | asynchronous read active queue depth |
+| async_w_active_queue | entries | asynchronous write active queue depth |
+| async_scrub_active_queue | entries | asynchronous scrub active queue depth |
+| sync_r_pend_queue | entries | synchronous read pending queue depth |
+| sync_w_pend_queue | entries | synchronous write pending queue depth |
+| async_r_pend_queue | entries | asynchronous read pending queue depth |
+| async_w_pend_queue | entries | asynchronous write pending queue depth |
+| async_scrub_pend_queue | entries | asynchronous scrub pending queue depth |
+
+### zpool_latency Histogram
+ZFS tracks the latency of each I/O in the ZIO pipeline. This latency can
+be useful for observing latency-related issues that are not easily observed
+using the averaged latency statistics.
+
+The histogram fields show cumulative values from lowest to highest.
+The largest bucket is tagged "le=+Inf", representing the total count
+of I/Os by type and vdev.
+
+#### zpool_latency Histogram Tags
+| label | description |
+|---|---|
+| le | bucket for histogram, latency is less than or equal to bucket value in seconds |
+| name | pool name |
+| path | for leaf vdevs, the device path name, otherwise omitted |
+| vdev | vdev name (root = entire pool) |
+
+#### zpool_latency Histogram Fields
+| field | units | description |
+|---|---|---|
+| total_read | operations | read operations of all types |
+| total_write | operations | write operations of all types |
+| disk_read | operations | disk read operations |
+| disk_write | operations | disk write operations |
+| sync_read | operations | ZIO sync reads |
+| sync_write | operations | ZIO sync writes |
+| async_read | operations | ZIO async reads|
+| async_write | operations | ZIO async writes |
+| scrub | operations | ZIO scrub/scan reads |
+| trim | operations | ZIO trim (aka unmap) writes |
+
+### zpool_io_size Histogram
+ZFS tracks I/O throughout the ZIO pipeline. The size of each I/O is used
+to create a histogram of the size by I/O type and vdev. For example, a
+4KiB write to mirrored pool will show a 4KiB write to the top-level vdev
+(root) and a 4KiB write to each of the mirror leaf vdevs.
+
+The ZIO pipeline can aggregate I/O operations. For example, a contiguous
+series of writes can be aggregated into a single, larger I/O to the leaf
+vdev. The independent I/O operations reflect the logical operations and
+the aggregated I/O operations reflect the physical operations.
+
+The histogram fields show cumulative values from lowest to highest.
+The largest bucket is tagged "le=+Inf", representing the total count
+of I/Os by type and vdev.
+
+Note: trim I/Os can be larger than 16MiB, but the larger sizes are
+accounted in the 16MiB bucket.
+
+#### zpool_io_size Histogram Tags
+| label | description |
+|---|---|
+| le | bucket for histogram, I/O size is less than or equal to bucket value in bytes |
+| name | pool name |
+| path | for leaf vdevs, the device path name, otherwise omitted |
+| vdev | vdev name (root = entire pool) |
+
+#### zpool_io_size Histogram Fields
+| field | units | description |
+|---|---|---|
+| sync_read_ind | blocks | independent sync reads |
+| sync_write_ind | blocks | independent sync writes |
+| async_read_ind | blocks | independent async reads |
+| async_write_ind | blocks | independent async writes |
+| scrub_read_ind | blocks | independent scrub/scan reads |
+| trim_write_ind | blocks | independent trim (aka unmap) writes |
+| sync_read_agg | blocks | aggregated sync reads |
+| sync_write_agg | blocks | aggregated sync writes |
+| async_read_agg | blocks | aggregated async reads |
+| async_write_agg | blocks | aggregated async writes |
+| scrub_read_agg | blocks | aggregated scrub/scan reads |
+| trim_write_agg | blocks | aggregated trim (aka unmap) writes |
+
+#### About unsigned integers
+Telegraf v1.6.2 and later support unsigned 64-bit integers which more
+closely matches the uint64_t values used by ZFS. By default, zpool_influxdb
+uses ZFS' uint64_t values and influxdb line protocol unsigned integer type.
+If you are using old telegraf or influxdb where unsigned integers are not
+available, use the `--signed-int` option.
+
+## Using _zpool_influxdb_
+
+The simplest method is to use the execd input agent in telegraf. For older
+versions of telegraf which lack execd, the exec input agent can be used.
+For convenience, one of the sample config files below can be placed in the
+telegraf config-directory (often /etc/telegraf/telegraf.d). Telegraf can
+be restarted to read the config-directory files.
+
+### Example telegraf execd configuration
+```toml
+# # Read metrics from zpool_influxdb
+[[inputs.execd]]
+#   ## default installation location for zpool_influxdb command
+  command = ["/usr/libexec/zfs/zpool_influxdb", "--execd"]
+
+    ## Define how the process is signaled on each collection interval.
+    ## Valid values are:
+    ##   "none"    : Do not signal anything. (Recommended for service inputs)
+    ##               The process must output metrics by itself.
+    ##   "STDIN"   : Send a newline on STDIN. (Recommended for gather inputs)
+    ##   "SIGHUP"  : Send a HUP signal. Not available on Windows. (not recommended)
+    ##   "SIGUSR1" : Send a USR1 signal. Not available on Windows.
+    ##   "SIGUSR2" : Send a USR2 signal. Not available on Windows.
+  signal = "STDIN"
+
+  ## Delay before the process is restarted after an unexpected termination
+  restart_delay = "10s"
+
+    ## Data format to consume.
+    ## Each data format has its own unique set of configuration options, read
+    ## more about them here:
+    ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"
+```
+
+### Example telegraf exec configuration
+```toml
+# # Read metrics from zpool_influxdb
+[[inputs.exec]]
+#   ## default installation location for zpool_influxdb command
+  commands = ["/usr/libexec/zfs/zpool_influxdb"]
+  data_format = "influx"
+```
+
+## Caveat Emptor
+* Like the _zpool_ command, _zpool_influxdb_ takes a reader
+  lock on spa_config for each imported pool. If this lock blocks,
+  then the command will also block indefinitely and might be
+  unkillable. This is not a normal condition, but can occur if
+  there are bugs in the kernel modules.
+  For this reason, care should be taken:
+  * avoid spawning many of these commands hoping that one might
+    finish
+  * avoid frequent updates or short sample time
+    intervals, because the locks can interfere with the performance
+    of other instances of _zpool_ or _zpool_influxdb_
+
+## Other collectors
+There are a few other collectors for zpool statistics roaming around
+the Internet. Many attempt to screen-scrape `zpool` output in various
+ways. The screen-scrape method works poorly for `zpool` output because
+of its human-friendly nature. Also, they suffer from the same caveats
+as this implementation. This implementation is optimized for directly
+collecting the metrics and is much more efficient than the screen-scrapers.
+
+## Feedback Encouraged
+Pull requests and issues are greatly appreciated at
+https://github.com/openzfs/zfs

diff --git a/zfs/cmd/zpool_influxdb/dashboards/README.md b/zfs/cmd/zpool_influxdb/dashboards/README.md
new file mode 100644
index 0000000..2fdbe49
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/dashboards/README.md

@@ -0,0 +1,3 @@
+### Dashboards for zpool_influxdb
+This directory contains a collection of dashboards related to ZFS with data
+collected from the zpool_influxdb collector.

diff --git a/zfs/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json b/zfs/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json
new file mode 100644
index 0000000..70260ae
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/dashboards/grafana/ZFS-pool-latency-heatmaps-influxdb.json

@@ -0,0 +1,1667 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_MACBOOK-INFLUX",
+      "label": "macbook-influx",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "influxdb",
+      "pluginName": "InfluxDB"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "6.7.3"
+    },
+    {
+      "type": "panel",
+      "id": "heatmap",
+      "name": "Heatmap",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "influxdb",
+      "name": "InfluxDB",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "jdbranham-diagram-panel",
+      "name": "Diagram",
+      "version": "1.4.5"
+    },
+    {
+      "type": "panel",
+      "id": "text",
+      "name": "Text",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "$$hashKey": "object:1627",
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "description": "Top-level ZFS pool latency by ZIO type",
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 1,
+  "id": null,
+  "iteration": 1590445168391,
+  "links": [],
+  "panels": [
+    {
+      "collapsed": false,
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 5,
+      "panels": [],
+      "title": "Total Reads and Writes",
+      "type": "row"
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the total reads of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 1
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 2,
+      "legend": {
+        "show": true
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "total_read"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Total Reads",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the total writes of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 1
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 3,
+      "legend": {
+        "show": true
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "total_write"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Total Writes",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "collapsed": false,
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 10
+      },
+      "id": 8,
+      "panels": [],
+      "title": "ZIO Scheduler Queues for Read Operations",
+      "type": "row"
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the synchronous reads of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 5,
+        "x": 0,
+        "y": 11
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 6,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "sync_read"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Sync Read Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the asynchronous reads of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 5,
+        "x": 5,
+        "y": 11
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 9,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "async_read"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Async Read Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the scrub or scan reads of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 5,
+        "x": 10,
+        "y": 11
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 10,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "scrub"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Scrub/Scan Read Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the actual disk reads of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 9,
+        "x": 15,
+        "y": 11
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 11,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "disk_read"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Disk Read Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "collapsed": false,
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 19
+      },
+      "id": 13,
+      "panels": [],
+      "title": "ZIO Scheduler Queues for Write Operations",
+      "type": "row"
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the synchronous writes of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 5,
+        "x": 0,
+        "y": 20
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 14,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "sync_write"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Sync Write Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the asynchronous writes of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 5,
+        "x": 5,
+        "y": 20
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 15,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "async_write"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Async Write Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the trim or unmap operations of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 5,
+        "x": 10,
+        "y": 20
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 16,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "trim"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Trim Write Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateOranges",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "tsbuckets",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "description": "Latency histogram for the disk write operations of a ZFS pool",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 9,
+        "x": 15,
+        "y": 20
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 17,
+      "legend": {
+        "show": false
+      },
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "alias": "$tag_le",
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "le"
+              ],
+              "type": "tag"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "measurement": "zpool_latency",
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "disk_write"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "last"
+              },
+              {
+                "params": [
+                  "1s"
+                ],
+                "type": "non_negative_derivative"
+              }
+            ]
+          ],
+          "tags": [
+            {
+              "key": "host",
+              "operator": "=~",
+              "value": "/^$hostname$/"
+            },
+            {
+              "condition": "AND",
+              "key": "name",
+              "operator": "=~",
+              "value": "/^$poolname$/"
+            }
+          ]
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Disk Write Queue",
+      "tooltip": {
+        "show": true,
+        "showHistogram": true
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": 0,
+        "format": "s",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "collapsed": false,
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 28
+      },
+      "id": 19,
+      "panels": [],
+      "title": "About",
+      "type": "row"
+    },
+    {
+      "content": "I/O requests that are satisfied by accessing pool devices are managed by the ZIO scheduler.\nThe total latency is measured from the start of the I/O to completion by the disk.\nLatency through each queue is shown prior to its submission to the disk queue.\n\nThis view is useful for observing the effects of tuning the ZIO scheduler min and max values\n(see zfs(4) and [ZFS on Linux Module Parameters](https://openzfs.github.io/openzfs-docs/Performance%20and%20tuning/ZFS%20on%20Linux%20Module%20Parameters.html)):\n+ *zfs_vdev_max_active* controls the ZIO scheduler's disk queue depth (do not confuse with the block device's nr_requests)\n+ *zfs_vdev_sync_read_min_active* and *zfs_vdev_sync_read_max_active* control the synchronous queue for reads: most reads are sync\n+ *zfs_vdev_sync_write_min_active* and *zfs_vdev_sync_write_max_active* control the synchronous queue for writes: \nusually metadata or user data depending on the \"sync\" property setting or I/Os that are requested to be flushed\n+ *zfs_vdev_async_read_min_active* and *zfs_vdev_async_read_max_active* control the asynchronous queue for reads: usually prefetches\n+ *zfs_vdev_async_write_min_active* and *zfs_vdev_async_write_max_active* control the asynchronous queue for writes: \nusually the bulk of all writes at transaction group (txg) commit\n+ *zfs_vdev_scrub_min_active* and *zfs_vdev_scrub_max_active* controls the scan reads: usually scrub or resilver\n\n",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 15,
+        "w": 16,
+        "x": 0,
+        "y": 29
+      },
+      "id": 21,
+      "mode": "markdown",
+      "targets": [
+        {
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "mean"
+              }
+            ]
+          ],
+          "tags": []
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "About ZFS Pool All Queues Read/Write Latency Histograms",
+      "type": "text"
+    },
+    {
+      "colors": [
+        "rgba(50, 172, 45, 0.97)",
+        "rgba(237, 129, 40, 0.89)",
+        "rgba(245, 54, 54, 0.9)"
+      ],
+      "composites": [],
+      "content": "graph LR\nIO((I/O request)) --> SR(sync read queue)\nIO --> SW(sync write queue)\nIO --> AR(async read queue)\nIO --> AW(async write queue)\nIO --> SCRUB(scrub queue)\nIO --> TRIM(trim queue)\nSR --> DISKQ(disk queue)\nSW --> DISKQ\nAR --> DISKQ\nAW --> DISKQ\nSCRUB --> DISKQ\nTRIM --> DISKQ\nDISKQ --> DISK((disk))\n",
+      "datasource": "${DS_MACBOOK-INFLUX}",
+      "decimals": 2,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "format": "none",
+      "graphId": "diagram_23",
+      "gridPos": {
+        "h": 15,
+        "w": 7,
+        "x": 16,
+        "y": 29
+      },
+      "id": 23,
+      "init": {
+        "arrowMarkerAbsolute": true,
+        "cloneCssStyles": true,
+        "flowchart": {
+          "htmlLabels": true,
+          "useMaxWidth": true
+        },
+        "gantt": {
+          "barGap": 4,
+          "barHeight": 20,
+          "fontFamily": "\"Open-Sans\", \"sans-serif\"",
+          "fontSize": 11,
+          "gridLineStartPadding": 35,
+          "leftPadding": 75,
+          "numberSectionStyles": 3,
+          "titleTopMargin": 25,
+          "topPadding": 50
+        },
+        "logLevel": 3,
+        "securityLevel": "loose",
+        "sequence": {
+          "actorMargin": 50,
+          "bottomMarginAdj": 1,
+          "boxMargin": 10,
+          "boxTextMargin": 5,
+          "diagramMarginX": 50,
+          "diagramMarginY": 10,
+          "height": 65,
+          "messageMargin": 35,
+          "mirrorActors": true,
+          "noteMargin": 10,
+          "useMaxWidth": true,
+          "width": 150
+        },
+        "startOnLoad": false,
+        "theme": "dark"
+      },
+      "legend": {
+        "avg": true,
+        "current": true,
+        "gradient": {
+          "enabled": true,
+          "show": true
+        },
+        "max": true,
+        "min": true,
+        "show": false,
+        "total": true
+      },
+      "mappingType": 1,
+      "mappingTypes": [
+        {
+          "$$hashKey": "object:155",
+          "name": "value to text",
+          "value": 1
+        },
+        {
+          "$$hashKey": "object:156",
+          "name": "range to text",
+          "value": 2
+        }
+      ],
+      "maxDataPoints": 100,
+      "maxWidth": false,
+      "mermaidServiceUrl": "",
+      "metricCharacterReplacements": [],
+      "moddedSeriesVal": 0,
+      "mode": "content",
+      "nullPointMode": "connected",
+      "seriesOverrides": [],
+      "style": "",
+      "styleValues": {},
+      "targets": [
+        {
+          "groupBy": [
+            {
+              "params": [
+                "$__interval"
+              ],
+              "type": "time"
+            },
+            {
+              "params": [
+                "null"
+              ],
+              "type": "fill"
+            }
+          ],
+          "hide": true,
+          "orderByTime": "ASC",
+          "policy": "default",
+          "refId": "A",
+          "resultFormat": "time_series",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "field"
+              },
+              {
+                "params": [],
+                "type": "mean"
+              }
+            ]
+          ],
+          "tags": []
+        }
+      ],
+      "themes": [
+        "default",
+        "dark",
+        "forest",
+        "neutral"
+      ],
+      "thresholds": "0,10",
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Panel Title",
+      "type": "jdbranham-diagram-panel",
+      "valueMaps": [
+        {
+          "$$hashKey": "object:151",
+          "op": "=",
+          "text": "N/A",
+          "value": "null"
+        }
+      ],
+      "valueName": "avg",
+      "valueOptions": [
+        "avg",
+        "min",
+        "max",
+        "total",
+        "current"
+      ]
+    }
+  ],
+  "refresh": false,
+  "schemaVersion": 22,
+  "style": "dark",
+  "tags": [
+    "ZFS",
+    "Latency",
+    "Histogram"
+  ],
+  "templating": {
+    "list": [
+      {
+        "allValue": null,
+        "current": {},
+        "datasource": "${DS_MACBOOK-INFLUX}",
+        "definition": "show tag values from \"zpool_latency\" with key = \"host\"",
+        "hide": 0,
+        "includeAll": false,
+        "index": -1,
+        "label": null,
+        "multi": false,
+        "name": "hostname",
+        "options": [],
+        "query": "show tag values from \"zpool_latency\" with key = \"host\"",
+        "refresh": 1,
+        "regex": "/([-a-zA-Z-0-9]+)/",
+        "skipUrlSync": false,
+        "sort": 5,
+        "tagValuesQuery": "",
+        "tags": [],
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "allValue": null,
+        "current": {},
+        "datasource": "${DS_MACBOOK-INFLUX}",
+        "definition": "show tag values from \"zpool_latency\" with key = \"name\"  where \"host\" =~ /^$hostname/",
+        "hide": 0,
+        "includeAll": false,
+        "index": -1,
+        "label": null,
+        "multi": false,
+        "name": "poolname",
+        "options": [],
+        "query": "show tag values from \"zpool_latency\" with key = \"name\"  where \"host\" =~ /^$hostname/",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 5,
+        "tagValuesQuery": "",
+        "tags": [],
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "2020-05-25T21:34:30.137Z",
+    "to": "2020-05-25T21:39:54.445Z"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "ZFS Pool Latency Heatmaps Influxdb",
+  "uid": "TbB4-DkGz",
+  "variables": {
+    "list": []
+  },
+  "version": 2
+}

diff --git a/zfs/cmd/zpool_influxdb/telegraf.d/README.md b/zfs/cmd/zpool_influxdb/telegraf.d/README.md
new file mode 100644
index 0000000..74f411a
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/telegraf.d/README.md

@@ -0,0 +1,7 @@
+This directory contains sample telegraf configurations for
+adding `zpool_influxdb` as an input plugin. Depending on your
+telegraf configuration, the installation can be as simple as
+copying one of these to the `/etc/telegraf/telegraf.d` directory
+and restarting `systemctl restart telegraf`
+
+See the telegraf docs for more information on input plugins.

diff --git a/zfs/cmd/zpool_influxdb/telegraf.d/exec_zpool_influxdb.conf b/zfs/cmd/zpool_influxdb/telegraf.d/exec_zpool_influxdb.conf
new file mode 100644
index 0000000..a2efa61
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/telegraf.d/exec_zpool_influxdb.conf

@@ -0,0 +1,15 @@
+# # Read metrics from zpool_influxdb
+[[inputs.exec]]
+#   ## default installation location for zpool_influxdb command
+  commands = ["/usr/local/libexec/zfs/zpool_influxdb"]
+#   ## Timeout for each command to complete.
+#   timeout = "5s"
+#
+#   ## measurement name suffix (for separating different commands)
+#   name_suffix = "_mycollector"
+#
+#   ## Data format to consume.
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"

diff --git a/zfs/cmd/zpool_influxdb/telegraf.d/execd_zpool_influxdb.conf b/zfs/cmd/zpool_influxdb/telegraf.d/execd_zpool_influxdb.conf
new file mode 100644
index 0000000..90737b8
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/telegraf.d/execd_zpool_influxdb.conf

@@ -0,0 +1,23 @@
+# # Read metrics from zpool_influxdb
+[[inputs.execd]]
+#   ## default installation location for zpool_influxdb command
+  command = ["/usr/local/libexec/zfs/zpool_influxdb", "--execd"]
+
+    ## Define how the process is signaled on each collection interval.
+    ## Valid values are:
+    ##   "none"    : Do not signal anything. (Recommended for service inputs)
+    ##               The process must output metrics by itself.
+    ##   "STDIN"   : Send a newline on STDIN. (Recommended for gather inputs)
+    ##   "SIGHUP"  : Send a HUP signal. Not available on Windows. (not recommended)
+    ##   "SIGUSR1" : Send a USR1 signal. Not available on Windows.
+    ##   "SIGUSR2" : Send a USR2 signal. Not available on Windows.
+  signal = "STDIN"
+
+  ## Delay before the process is restarted after an unexpected termination
+  restart_delay = "10s"
+
+    ## Data format to consume.
+    ## Each data format has its own unique set of configuration options, read
+    ## more about them here:
+    ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"

diff --git a/zfs/cmd/zpool_influxdb/zpool_influxdb.c b/zfs/cmd/zpool_influxdb/zpool_influxdb.c
new file mode 100644
index 0000000..d0b6e17
--- /dev/null
+++ b/zfs/cmd/zpool_influxdb/zpool_influxdb.c

@@ -0,0 +1,843 @@
+/*
+ * Gather top-level ZFS pool and resilver/scan statistics and print using
+ * influxdb line protocol
+ * usage: [options] [pool_name]
+ * where options are:
+ *   --execd, -e           run in telegraf execd input plugin mode, [CR] on
+ *                         stdin causes a sample to be printed and wait for
+ *                         the next [CR]
+ *   --no-histograms, -n   don't print histogram data (reduces cardinality
+ *                         if you don't care about histograms)
+ *   --sum-histogram-buckets, -s sum histogram bucket values
+ *
+ * To integrate into telegraf use one of:
+ * 1. the `inputs.execd` plugin with the `--execd` option
+ * 2. the `inputs.exec` plugin to simply run with no options
+ *
+ * NOTE: libzfs is an unstable interface. YMMV.
+ *
+ * The design goals of this software include:
+ * + be as lightweight as possible
+ * + reduce the number of external dependencies as far as possible, hence
+ *   there is no dependency on a client library for managing the metric
+ *   collection -- info is printed, KISS
+ * + broken pools or kernel bugs can cause this process to hang in an
+ *   unkillable state. For this reason, it is best to keep the damage limited
+ *   to a small process like zpool_influxdb rather than a larger collector.
+ *
+ * Copyright 2018-2020 Richard Elling
+ *
+ * This software is dual-licensed MIT and CDDL.
+ *
+ * The MIT License (MIT)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License Version 1.0 (CDDL-1.0).
+ * You can obtain a copy of the license from the top-level file
+ * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
+ * You may not use this file except in compliance with the license.
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * CDDL HEADER END
+ */
+#include <string.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <libzfs_impl.h>
+
+#define	POOL_MEASUREMENT	"zpool_stats"
+#define	SCAN_MEASUREMENT	"zpool_scan_stats"
+#define	VDEV_MEASUREMENT	"zpool_vdev_stats"
+#define	POOL_LATENCY_MEASUREMENT	"zpool_latency"
+#define	POOL_QUEUE_MEASUREMENT	"zpool_vdev_queue"
+#define	MIN_LAT_INDEX	10  /* minimum latency index 10 = 1024ns */
+#define	POOL_IO_SIZE_MEASUREMENT	"zpool_io_size"
+#define	MIN_SIZE_INDEX	9  /* minimum size index 9 = 512 bytes */
+
+/* global options */
+int execd_mode = 0;
+int no_histograms = 0;
+int sum_histogram_buckets = 0;
+char metric_data_type = 'u';
+uint64_t metric_value_mask = UINT64_MAX;
+uint64_t timestamp = 0;
+int complained_about_sync = 0;
+char *tags = "";
+
+typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *);
+
+/*
+ * influxdb line protocol rules for escaping are important because the
+ * zpool name can include characters that need to be escaped
+ *
+ * caller is responsible for freeing result
+ */
+static char *
+escape_string(char *s)
+{
+	char *c, *d;
+	char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2);
+	if (t == NULL) {
+		fprintf(stderr, "error: cannot allocate memory\n");
+		exit(1);
+	}
+
+	for (c = s, d = t; *c != '\0'; c++, d++) {
+		switch (*c) {
+		case ' ':
+		case ',':
+		case '=':
+		case '\\':
+			*d++ = '\\';
+			fallthrough;
+		default:
+			*d = *c;
+		}
+	}
+	*d = '\0';
+	return (t);
+}
+
+/*
+ * print key=value where value is a uint64_t
+ */
+static void
+print_kv(char *key, uint64_t value)
+{
+	printf("%s=%llu%c", key,
+	    (u_longlong_t)value & metric_value_mask, metric_data_type);
+}
+
+/*
+ * print_scan_status() prints the details as often seen in the "zpool status"
+ * output. However, unlike the zpool command, which is intended for humans,
+ * this output is suitable for long-term tracking in influxdb.
+ * TODO: update to include issued scan data
+ */
+static int
+print_scan_status(nvlist_t *nvroot, const char *pool_name)
+{
+	uint_t c;
+	int64_t elapsed;
+	uint64_t examined, pass_exam, paused_time, paused_ts, rate;
+	uint64_t remaining_time;
+	pool_scan_stat_t *ps = NULL;
+	double pct_done;
+	char *state[DSS_NUM_STATES] = {
+	    "none", "scanning", "finished", "canceled"};
+	char *func;
+
+	(void) nvlist_lookup_uint64_array(nvroot,
+	    ZPOOL_CONFIG_SCAN_STATS,
+	    (uint64_t **)&ps, &c);
+
+	/*
+	 * ignore if there are no stats
+	 */
+	if (ps == NULL)
+		return (0);
+
+	/*
+	 * return error if state is bogus
+	 */
+	if (ps->pss_state >= DSS_NUM_STATES ||
+	    ps->pss_func >= POOL_SCAN_FUNCS) {
+		if (complained_about_sync % 1000 == 0) {
+			fprintf(stderr, "error: cannot decode scan stats: "
+			    "ZFS is out of sync with compiled zpool_influxdb");
+			complained_about_sync++;
+		}
+		return (1);
+	}
+
+	switch (ps->pss_func) {
+	case POOL_SCAN_NONE:
+		func = "none_requested";
+		break;
+	case POOL_SCAN_SCRUB:
+		func = "scrub";
+		break;
+	case POOL_SCAN_RESILVER:
+		func = "resilver";
+		break;
+#ifdef POOL_SCAN_REBUILD
+	case POOL_SCAN_REBUILD:
+		func = "rebuild";
+		break;
+#endif
+	default:
+		func = "scan";
+	}
+
+	/* overall progress */
+	examined = ps->pss_examined ? ps->pss_examined : 1;
+	pct_done = 0.0;
+	if (ps->pss_to_examine > 0)
+		pct_done = 100.0 * examined / ps->pss_to_examine;
+
+#ifdef EZFS_SCRUB_PAUSED
+	paused_ts = ps->pss_pass_scrub_pause;
+	paused_time = ps->pss_pass_scrub_spent_paused;
+#else
+	paused_ts = 0;
+	paused_time = 0;
+#endif
+
+	/* calculations for this pass */
+	if (ps->pss_state == DSS_SCANNING) {
+		elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start -
+		    (int64_t)paused_time;
+		elapsed = (elapsed > 0) ? elapsed : 1;
+		pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
+		rate = pass_exam / elapsed;
+		rate = (rate > 0) ? rate : 1;
+		remaining_time = ps->pss_to_examine - examined / rate;
+	} else {
+		elapsed =
+		    (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start -
+		    (int64_t)paused_time;
+		elapsed = (elapsed > 0) ? elapsed : 1;
+		pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
+		rate = pass_exam / elapsed;
+		remaining_time = 0;
+	}
+	rate = rate ? rate : 1;
+
+	/* influxdb line protocol format: "tags metrics timestamp" */
+	printf("%s%s,function=%s,name=%s,state=%s ",
+	    SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]);
+	print_kv("end_ts", ps->pss_end_time);
+	print_kv(",errors", ps->pss_errors);
+	print_kv(",examined", examined);
+	print_kv(",issued", ps->pss_issued);
+	print_kv(",pass_examined", pass_exam);
+	print_kv(",pass_issued", ps->pss_pass_issued);
+	print_kv(",paused_ts", paused_ts);
+	print_kv(",paused_t", paused_time);
+	printf(",pct_done=%.2f", pct_done);
+	print_kv(",processed", ps->pss_processed);
+	print_kv(",rate", rate);
+	print_kv(",remaining_t", remaining_time);
+	print_kv(",start_ts", ps->pss_start_time);
+	print_kv(",to_examine", ps->pss_to_examine);
+	print_kv(",to_process", ps->pss_to_process);
+	printf(" %llu\n", (u_longlong_t)timestamp);
+	return (0);
+}
+
+/*
+ * get a vdev name that corresponds to the top-level vdev names
+ * printed by `zpool status`
+ */
+static char *
+get_vdev_name(nvlist_t *nvroot, const char *parent_name)
+{
+	static char vdev_name[256];
+	char *vdev_type = NULL;
+	uint64_t vdev_id = 0;
+
+	if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE,
+	    &vdev_type) != 0) {
+		vdev_type = "unknown";
+	}
+	if (nvlist_lookup_uint64(
+	    nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) {
+		vdev_id = UINT64_MAX;
+	}
+	if (parent_name == NULL) {
+		(void) snprintf(vdev_name, sizeof (vdev_name), "%s",
+		    vdev_type);
+	} else {
+		(void) snprintf(vdev_name, sizeof (vdev_name),
+		    "%s/%s-%llu",
+		    parent_name, vdev_type, (u_longlong_t)vdev_id);
+	}
+	return (vdev_name);
+}
+
+/*
+ * get a string suitable for an influxdb tag that describes this vdev
+ *
+ * By default only the vdev hierarchical name is shown, separated by '/'
+ * If the vdev has an associated path, which is typical of leaf vdevs,
+ * then the path is added.
+ * It would be nice to have the devid instead of the path, but under
+ * Linux we cannot be sure a devid will exist and we'd rather have
+ * something than nothing, so we'll use path instead.
+ */
+static char *
+get_vdev_desc(nvlist_t *nvroot, const char *parent_name)
+{
+	static char vdev_desc[2 * MAXPATHLEN];
+	char *vdev_type = NULL;
+	uint64_t vdev_id = 0;
+	char vdev_value[MAXPATHLEN];
+	char *vdev_path = NULL;
+	char *s, *t;
+
+	if (nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type) != 0) {
+		vdev_type = "unknown";
+	}
+	if (nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0) {
+		vdev_id = UINT64_MAX;
+	}
+	if (nvlist_lookup_string(
+	    nvroot, ZPOOL_CONFIG_PATH, &vdev_path) != 0) {
+		vdev_path = NULL;
+	}
+
+	if (parent_name == NULL) {
+		s = escape_string(vdev_type);
+		(void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s);
+		free(s);
+	} else {
+		s = escape_string((char *)parent_name);
+		t = escape_string(vdev_type);
+		(void) snprintf(vdev_value, sizeof (vdev_value),
+		    "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id);
+		free(s);
+		free(t);
+	}
+	if (vdev_path == NULL) {
+		(void) snprintf(vdev_desc, sizeof (vdev_desc), "%s",
+		    vdev_value);
+	} else {
+		s = escape_string(vdev_path);
+		(void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s",
+		    s, vdev_value);
+		free(s);
+	}
+	return (vdev_desc);
+}
+
+/*
+ * vdev summary stats are a combination of the data shown by
+ * `zpool status` and `zpool list -v`
+ */
+static int
+print_summary_stats(nvlist_t *nvroot, const char *pool_name,
+    const char *parent_name)
+{
+	uint_t c;
+	vdev_stat_t *vs;
+	char *vdev_desc = NULL;
+	vdev_desc = get_vdev_desc(nvroot, parent_name);
+	if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) != 0) {
+		return (1);
+	}
+	printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags,
+	    pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state,
+	    (vdev_aux_t)vs->vs_aux), vdev_desc);
+	print_kv("alloc", vs->vs_alloc);
+	print_kv(",free", vs->vs_space - vs->vs_alloc);
+	print_kv(",size", vs->vs_space);
+	print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]);
+	print_kv(",read_errors", vs->vs_read_errors);
+	print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]);
+	print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]);
+	print_kv(",write_errors", vs->vs_write_errors);
+	print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]);
+	print_kv(",checksum_errors", vs->vs_checksum_errors);
+	print_kv(",fragmentation", vs->vs_fragmentation);
+	printf(" %llu\n", (u_longlong_t)timestamp);
+	return (0);
+}
+
+/*
+ * vdev latency stats are histograms stored as nvlist arrays of uint64.
+ * Latency stats include the ZIO scheduler classes plus lower-level
+ * vdev latencies.
+ *
+ * In many cases, the top-level "root" view obscures the underlying
+ * top-level vdev operations. For example, if a pool has a log, special,
+ * or cache device, then each can behave very differently. It is useful
+ * to see how each is responding.
+ */
+static int
+print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name,
+    const char *parent_name)
+{
+	uint_t c, end = 0;
+	nvlist_t *nv_ex;
+	char *vdev_desc = NULL;
+
+	/* short_names become part of the metric name and are influxdb-ready */
+	struct lat_lookup {
+	    char *name;
+	    char *short_name;
+	    uint64_t sum;
+	    uint64_t *array;
+	};
+	struct lat_lookup lat_type[] = {
+	    {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,   "total_read", 0},
+	    {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,   "total_write", 0},
+	    {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,  "disk_read", 0},
+	    {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,  "disk_write", 0},
+	    {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,  "sync_read", 0},
+	    {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,  "sync_write", 0},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0},
+	    {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,   "scrub", 0},
+#ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO
+	    {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO,    "trim", 0},
+#endif
+	    {NULL,	NULL}
+	};
+
+	if (nvlist_lookup_nvlist(nvroot,
+	    ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
+		return (6);
+	}
+
+	vdev_desc = get_vdev_desc(nvroot, parent_name);
+
+	for (int i = 0; lat_type[i].name; i++) {
+		if (nvlist_lookup_uint64_array(nv_ex,
+		    lat_type[i].name, &lat_type[i].array, &c) != 0) {
+			fprintf(stderr, "error: can't get %s\n",
+			    lat_type[i].name);
+			return (3);
+		}
+		/* end count count, all of the arrays are the same size */
+		end = c - 1;
+	}
+
+	for (int bucket = 0; bucket <= end; bucket++) {
+		if (bucket < MIN_LAT_INDEX) {
+			/* don't print, but collect the sum */
+			for (int i = 0; lat_type[i].name; i++) {
+				lat_type[i].sum += lat_type[i].array[bucket];
+			}
+			continue;
+		}
+		if (bucket < end) {
+			printf("%s%s,le=%0.6f,name=%s,%s ",
+			    POOL_LATENCY_MEASUREMENT, tags,
+			    (float)(1ULL << bucket) * 1e-9,
+			    pool_name, vdev_desc);
+		} else {
+			printf("%s%s,le=+Inf,name=%s,%s ",
+			    POOL_LATENCY_MEASUREMENT, tags, pool_name,
+			    vdev_desc);
+		}
+		for (int i = 0; lat_type[i].name; i++) {
+			if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) {
+				lat_type[i].sum += lat_type[i].array[bucket];
+			} else {
+				lat_type[i].sum = lat_type[i].array[bucket];
+			}
+			print_kv(lat_type[i].short_name, lat_type[i].sum);
+			if (lat_type[i + 1].name != NULL) {
+				printf(",");
+			}
+		}
+		printf(" %llu\n", (u_longlong_t)timestamp);
+	}
+	return (0);
+}
+
+/*
+ * vdev request size stats are histograms stored as nvlist arrays of uint64.
+ * Request size stats include the ZIO scheduler classes plus lower-level
+ * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported.
+ *
+ * In many cases, the top-level "root" view obscures the underlying
+ * top-level vdev operations. For example, if a pool has a log, special,
+ * or cache device, then each can behave very differently. It is useful
+ * to see how each is responding.
+ */
+static int
+print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name,
+    const char *parent_name)
+{
+	uint_t c, end = 0;
+	nvlist_t *nv_ex;
+	char *vdev_desc = NULL;
+
+	/* short_names become the field name */
+	struct size_lookup {
+	    char *name;
+	    char *short_name;
+	    uint64_t sum;
+	    uint64_t *array;
+	};
+	struct size_lookup size_type[] = {
+	    {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,   "sync_read_ind"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO,   "sync_write_ind"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO,  "async_read_ind"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO,  "async_write_ind"},
+	    {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO,    "scrub_read_ind"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO,   "sync_read_agg"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO,   "sync_write_agg"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO,  "async_read_agg"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO,  "async_write_agg"},
+	    {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,    "scrub_read_agg"},
+#ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO
+	    {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO,    "trim_write_ind"},
+	    {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO,    "trim_write_agg"},
+#endif
+	    {NULL,	NULL}
+	};
+
+	if (nvlist_lookup_nvlist(nvroot,
+	    ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
+		return (6);
+	}
+
+	vdev_desc = get_vdev_desc(nvroot, parent_name);
+
+	for (int i = 0; size_type[i].name; i++) {
+		if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name,
+		    &size_type[i].array, &c) != 0) {
+			fprintf(stderr, "error: can't get %s\n",
+			    size_type[i].name);
+			return (3);
+		}
+		/* end count count, all of the arrays are the same size */
+		end = c - 1;
+	}
+
+	for (int bucket = 0; bucket <= end; bucket++) {
+		if (bucket < MIN_SIZE_INDEX) {
+			/* don't print, but collect the sum */
+			for (int i = 0; size_type[i].name; i++) {
+				size_type[i].sum += size_type[i].array[bucket];
+			}
+			continue;
+		}
+
+		if (bucket < end) {
+			printf("%s%s,le=%llu,name=%s,%s ",
+			    POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket,
+			    pool_name, vdev_desc);
+		} else {
+			printf("%s%s,le=+Inf,name=%s,%s ",
+			    POOL_IO_SIZE_MEASUREMENT, tags, pool_name,
+			    vdev_desc);
+		}
+		for (int i = 0; size_type[i].name; i++) {
+			if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) {
+				size_type[i].sum += size_type[i].array[bucket];
+			} else {
+				size_type[i].sum = size_type[i].array[bucket];
+			}
+			print_kv(size_type[i].short_name, size_type[i].sum);
+			if (size_type[i + 1].name != NULL) {
+				printf(",");
+			}
+		}
+		printf(" %llu\n", (u_longlong_t)timestamp);
+	}
+	return (0);
+}
+
+/*
+ * ZIO scheduler queue stats are stored as gauges. This is unfortunate
+ * because the values can change very rapidly and any point-in-time
+ * value will quickly be obsoleted. It is also not easy to downsample.
+ * Thus only the top-level queue stats might be beneficial... maybe.
+ */
+static int
+print_queue_stats(nvlist_t *nvroot, const char *pool_name,
+    const char *parent_name)
+{
+	nvlist_t *nv_ex;
+	uint64_t value;
+
+	/* short_names are used for the field name */
+	struct queue_lookup {
+	    char *name;
+	    char *short_name;
+	};
+	struct queue_lookup queue_type[] = {
+	    {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,	"sync_r_active"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,	"sync_w_active"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,	"async_r_active"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,	"async_w_active"},
+	    {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,	"async_scrub_active"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,	"sync_r_pend"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE,	"sync_w_pend"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE,	"async_r_pend"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE,	"async_w_pend"},
+	    {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,	"async_scrub_pend"},
+	    {NULL,	NULL}
+	};
+
+	if (nvlist_lookup_nvlist(nvroot,
+	    ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
+		return (6);
+	}
+
+	printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name,
+	    get_vdev_desc(nvroot, parent_name));
+	for (int i = 0; queue_type[i].name; i++) {
+		if (nvlist_lookup_uint64(nv_ex,
+		    queue_type[i].name, &value) != 0) {
+			fprintf(stderr, "error: can't get %s\n",
+			    queue_type[i].name);
+			return (3);
+		}
+		print_kv(queue_type[i].short_name, value);
+		if (queue_type[i + 1].name != NULL) {
+			printf(",");
+		}
+	}
+	printf(" %llu\n", (u_longlong_t)timestamp);
+	return (0);
+}
+
+/*
+ * top-level vdev stats are at the pool level
+ */
+static int
+print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name)
+{
+	nvlist_t *nv_ex;
+	uint64_t value;
+
+	/* short_names become part of the metric name */
+	struct queue_lookup {
+	    char *name;
+	    char *short_name;
+	};
+	struct queue_lookup queue_type[] = {
+	    {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"},
+	    {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"},
+	    {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"},
+	    {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"},
+	    {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"},
+	    {NULL, NULL}
+	};
+
+	if (nvlist_lookup_nvlist(nvroot,
+	    ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
+		return (6);
+	}
+
+	printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags,
+	    pool_name);
+	for (int i = 0; queue_type[i].name; i++) {
+		if (nvlist_lookup_uint64(nv_ex,
+		    queue_type[i].name, &value) != 0) {
+			fprintf(stderr, "error: can't get %s\n",
+			    queue_type[i].name);
+			return (3);
+		}
+		if (i > 0)
+			printf(",");
+		print_kv(queue_type[i].short_name, value);
+	}
+
+	printf(" %llu\n", (u_longlong_t)timestamp);
+	return (0);
+}
+
+/*
+ * recursive stats printer
+ */
+static int
+print_recursive_stats(stat_printer_f func, nvlist_t *nvroot,
+    const char *pool_name, const char *parent_name, int descend)
+{
+	uint_t c, children;
+	nvlist_t **child;
+	char vdev_name[256];
+	int err;
+
+	err = func(nvroot, pool_name, parent_name);
+	if (err)
+		return (err);
+
+	if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		(void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name),
+		    sizeof (vdev_name));
+
+		for (c = 0; c < children; c++) {
+			print_recursive_stats(func, child[c], pool_name,
+			    vdev_name, descend);
+		}
+	}
+	return (0);
+}
+
+/*
+ * call-back to print the stats from the pool config
+ *
+ * Note: if the pool is broken, this can hang indefinitely and perhaps in an
+ * unkillable state.
+ */
+static int
+print_stats(zpool_handle_t *zhp, void *data)
+{
+	uint_t c;
+	int err;
+	boolean_t missing;
+	nvlist_t *config, *nvroot;
+	vdev_stat_t *vs;
+	struct timespec tv;
+	char *pool_name;
+
+	/* if not this pool return quickly */
+	if (data &&
+	    strncmp(data, zhp->zpool_name, ZFS_MAX_DATASET_NAME_LEN) != 0) {
+		zpool_close(zhp);
+		return (0);
+	}
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (1);
+	}
+
+	config = zpool_get_config(zhp, NULL);
+	if (clock_gettime(CLOCK_REALTIME, &tv) != 0)
+		timestamp = (uint64_t)time(NULL) * 1000000000;
+	else
+		timestamp =
+		    ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec;
+
+	if (nvlist_lookup_nvlist(
+	    config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) {
+	zpool_close(zhp);
+		return (2);
+	}
+	if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) != 0) {
+	zpool_close(zhp);
+		return (3);
+	}
+
+	pool_name = escape_string(zhp->zpool_name);
+	err = print_recursive_stats(print_summary_stats, nvroot,
+	    pool_name, NULL, 1);
+	/* if any of these return an error, skip the rest */
+	if (err == 0)
+	err = print_top_level_vdev_stats(nvroot, pool_name);
+
+	if (no_histograms == 0) {
+	if (err == 0)
+		err = print_recursive_stats(print_vdev_latency_stats, nvroot,
+		    pool_name, NULL, 1);
+	if (err == 0)
+		err = print_recursive_stats(print_vdev_size_stats, nvroot,
+		    pool_name, NULL, 1);
+	if (err == 0)
+		err = print_recursive_stats(print_queue_stats, nvroot,
+		    pool_name, NULL, 0);
+	}
+	if (err == 0)
+		err = print_scan_status(nvroot, pool_name);
+
+	free(pool_name);
+	zpool_close(zhp);
+	return (err);
+}
+
+static void
+usage(char *name)
+{
+	fprintf(stderr, "usage: %s [--execd][--no-histograms]"
+	    "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name);
+	exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int opt;
+	int ret = 8;
+	char *line = NULL;
+	size_t len, tagslen = 0;
+	struct option long_options[] = {
+	    {"execd", no_argument, NULL, 'e'},
+	    {"help", no_argument, NULL, 'h'},
+	    {"no-histograms", no_argument, NULL, 'n'},
+	    {"signed-int", no_argument, NULL, 'i'},
+	    {"sum-histogram-buckets", no_argument, NULL, 's'},
+	    {"tags", required_argument, NULL, 't'},
+	    {0, 0, 0, 0}
+	};
+	while ((opt = getopt_long(
+	    argc, argv, "ehinst:", long_options, NULL)) != -1) {
+		switch (opt) {
+		case 'e':
+			execd_mode = 1;
+			break;
+		case 'i':
+			metric_data_type = 'i';
+			metric_value_mask = INT64_MAX;
+			break;
+		case 'n':
+			no_histograms = 1;
+			break;
+		case 's':
+			sum_histogram_buckets = 1;
+			break;
+		case 't':
+			tagslen = strlen(optarg) + 2;
+			tags = calloc(tagslen, 1);
+			if (tags == NULL) {
+				fprintf(stderr,
+				    "error: cannot allocate memory "
+				    "for tags\n");
+				exit(1);
+			}
+			(void) snprintf(tags, tagslen, ",%s", optarg);
+			break;
+		default:
+			usage(argv[0]);
+		}
+	}
+
+	libzfs_handle_t *g_zfs;
+	if ((g_zfs = libzfs_init()) == NULL) {
+		fprintf(stderr,
+		    "error: cannot initialize libzfs. "
+		    "Is the zfs module loaded or zrepl running?\n");
+		exit(EXIT_FAILURE);
+	}
+	if (execd_mode == 0) {
+		ret = zpool_iter(g_zfs, print_stats, argv[optind]);
+		return (ret);
+	}
+	while (getline(&line, &len, stdin) != -1) {
+		ret = zpool_iter(g_zfs, print_stats, argv[optind]);
+		fflush(stdout);
+	}
+	return (ret);
+}

diff --git a/zfs/cmd/zstream/.gitignore b/zfs/cmd/zstream/.gitignore
new file mode 100644
index 0000000..fd1240d
--- /dev/null
+++ b/zfs/cmd/zstream/.gitignore

@@ -0,0 +1 @@
+zstream

diff --git a/zfs/cmd/zstream/Makefile.am b/zfs/cmd/zstream/Makefile.am
new file mode 100644
index 0000000..8e81302
--- /dev/null
+++ b/zfs/cmd/zstream/Makefile.am

@@ -0,0 +1,20 @@
+include $(top_srcdir)/config/Rules.am
+
+sbin_PROGRAMS = zstream
+
+zstream_SOURCES = \
+	zstream.c \
+	zstream.h \
+	zstream_dump.c \
+	zstream_redup.c \
+	zstream_token.c
+
+zstream_LDADD = \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+include $(top_srcdir)/config/CppCheck.am
+
+install-exec-hook:
+	cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump

diff --git a/zfs/cmd/zstream/zstream.c b/zfs/cmd/zstream/zstream.c
new file mode 100644
index 0000000..523ae06
--- /dev/null
+++ b/zfs/cmd/zstream/zstream.c

@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2020 by Datto Inc. All rights reserved.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <stddef.h>
+#include <libzfs.h>
+#include "zstream.h"
+
+void
+zstream_usage(void)
+{
+	(void) fprintf(stderr,
+	    "usage: zstream command args ...\n"
+	    "Available commands are:\n"
+	    "\n"
+	    "\tzstream dump [-vCd] FILE\n"
+	    "\t... | zstream dump [-vCd]\n"
+	    "\n"
+	    "\tzstream token resume_token\n"
+	    "\n"
+	    "\tzstream redup [-v] FILE | ...\n");
+	exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+	char *basename = strrchr(argv[0], '/');
+	basename = basename ? (basename + 1) : argv[0];
+	if (argc >= 1 && strcmp(basename, "zstreamdump") == 0)
+		return (zstream_do_dump(argc, argv));
+
+	if (argc < 2)
+		zstream_usage();
+
+	char *subcommand = argv[1];
+
+	if (strcmp(subcommand, "dump") == 0) {
+		return (zstream_do_dump(argc - 1, argv + 1));
+	} else if (strcmp(subcommand, "token") == 0) {
+		return (zstream_do_token(argc - 1, argv + 1));
+	} else if (strcmp(subcommand, "redup") == 0) {
+		return (zstream_do_redup(argc - 1, argv + 1));
+	} else {
+		zstream_usage();
+	}
+}

diff --git a/zfs/cmd/zstream/zstream.h b/zfs/cmd/zstream/zstream.h
new file mode 100644
index 0000000..319fecb
--- /dev/null
+++ b/zfs/cmd/zstream/zstream.h

@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#ifndef	_ZSTREAM_H
+#define	_ZSTREAM_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+extern int zstream_do_redup(int, char *[]);
+extern int zstream_do_dump(int, char *[]);
+extern int zstream_do_token(int, char *[]);
+extern void zstream_usage(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZSTREAM_H */

diff --git a/zfs/cmd/zstream/zstream_dump.c b/zfs/cmd/zstream/zstream_dump.c
new file mode 100644
index 0000000..f0e83fb
--- /dev/null
+++ b/zfs/cmd/zstream/zstream_dump.c

@@ -0,0 +1,812 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Portions Copyright 2012 Martin Matuska <martin@matuska.org>
+ */
+
+/*
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ */
+
+#include <ctype.h>
+#include <libnvpair.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+
+#include <sys/dmu.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zio.h>
+#include <zfs_fletcher.h>
+#include "zstream.h"
+
+/*
+ * If dump mode is enabled, the number of bytes to print per line
+ */
+#define	BYTES_PER_LINE	16
+/*
+ * If dump mode is enabled, the number of bytes to group together, separated
+ * by newlines or spaces
+ */
+#define	DUMP_GROUPING	4
+
+uint64_t total_stream_len = 0;
+FILE *send_stream = 0;
+boolean_t do_byteswap = B_FALSE;
+boolean_t do_cksum = B_TRUE;
+
+static void *
+safe_malloc(size_t size)
+{
+	void *rv = malloc(size);
+	if (rv == NULL) {
+		(void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n",
+		    size);
+		abort();
+	}
+	return (rv);
+}
+
+/*
+ * ssread - send stream read.
+ *
+ * Read while computing incremental checksum
+ */
+static size_t
+ssread(void *buf, size_t len, zio_cksum_t *cksum)
+{
+	size_t outlen;
+
+	if ((outlen = fread(buf, len, 1, send_stream)) == 0)
+		return (0);
+
+	if (do_cksum) {
+		if (do_byteswap)
+			fletcher_4_incremental_byteswap(buf, len, cksum);
+		else
+			fletcher_4_incremental_native(buf, len, cksum);
+	}
+	total_stream_len += len;
+	return (outlen);
+}
+
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	zio_cksum_t saved_cksum = *cksum;
+	r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	if (do_cksum &&
+	    !ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+	    !ZIO_CHECKSUM_EQUAL(saved_cksum,
+	    drr->drr_u.drr_checksum.drr_checksum)) {
+		fprintf(stderr, "invalid checksum\n");
+		(void) printf("Incorrect checksum in record header.\n");
+		(void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+		    (longlong_t)saved_cksum.zc_word[0],
+		    (longlong_t)saved_cksum.zc_word[1],
+		    (longlong_t)saved_cksum.zc_word[2],
+		    (longlong_t)saved_cksum.zc_word[3]);
+		return (0);
+	}
+	return (sizeof (*drr));
+}
+
+/*
+ * Print part of a block in ASCII characters
+ */
+static void
+print_ascii_block(char *subbuf, int length)
+{
+	int i;
+
+	for (i = 0; i < length; i++) {
+		char char_print = isprint(subbuf[i]) ? subbuf[i] : '.';
+		if (i != 0 && i % DUMP_GROUPING == 0) {
+			(void) printf(" ");
+		}
+		(void) printf("%c", char_print);
+	}
+	(void) printf("\n");
+}
+
+/*
+ * print_block - Dump the contents of a modified block to STDOUT
+ *
+ * Assume that buf has capacity evenly divisible by BYTES_PER_LINE
+ */
+static void
+print_block(char *buf, int length)
+{
+	int i;
+	/*
+	 * Start printing ASCII characters at a constant offset, after
+	 * the hex prints. Leave 3 characters per byte on a line (2 digit
+	 * hex number plus 1 space) plus spaces between characters and
+	 * groupings.
+	 */
+	int ascii_start = BYTES_PER_LINE * 3 +
+	    BYTES_PER_LINE / DUMP_GROUPING + 2;
+
+	for (i = 0; i < length; i += BYTES_PER_LINE) {
+		int j;
+		int this_line_length = MIN(BYTES_PER_LINE, length - i);
+		int print_offset = 0;
+
+		for (j = 0; j < this_line_length; j++) {
+			int buf_offset = i + j;
+
+			/*
+			 * Separate every DUMP_GROUPING bytes by a space.
+			 */
+			if (buf_offset % DUMP_GROUPING == 0) {
+				print_offset += printf(" ");
+			}
+
+			/*
+			 * Print the two-digit hex value for this byte.
+			 */
+			unsigned char hex_print = buf[buf_offset];
+			print_offset += printf("%02x ", hex_print);
+		}
+
+		(void) printf("%*s", ascii_start - print_offset, " ");
+
+		print_ascii_block(buf + i, this_line_length);
+	}
+}
+
+/*
+ * Print an array of bytes to stdout as hexadecimal characters. str must
+ * have buf_len * 2 + 1 bytes of space.
+ */
+static void
+sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
+{
+	int i, n;
+
+	for (i = 0; i < buf_len; i++) {
+		n = sprintf(str, "%02x", buf[i] & 0xff);
+		str += n;
+	}
+
+	str[0] = '\0';
+}
+
+int
+zstream_do_dump(int argc, char *argv[])
+{
+	char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
+	uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
+	uint64_t total_payload_size = 0;
+	uint64_t total_overhead_size = 0;
+	uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 };
+	char salt[ZIO_DATA_SALT_LEN * 2 + 1];
+	char iv[ZIO_DATA_IV_LEN * 2 + 1];
+	char mac[ZIO_DATA_MAC_LEN * 2 + 1];
+	uint64_t total_records = 0;
+	uint64_t payload_size;
+	dmu_replay_record_t thedrr;
+	dmu_replay_record_t *drr = &thedrr;
+	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
+	struct drr_end *drre = &thedrr.drr_u.drr_end;
+	struct drr_object *drro = &thedrr.drr_u.drr_object;
+	struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects;
+	struct drr_write *drrw = &thedrr.drr_u.drr_write;
+	struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
+	struct drr_free *drrf = &thedrr.drr_u.drr_free;
+	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
+	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+	struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
+	struct drr_redact *drrr = &thedrr.drr_u.drr_redact;
+	struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
+	int c;
+	boolean_t verbose = B_FALSE;
+	boolean_t very_verbose = B_FALSE;
+	boolean_t first = B_TRUE;
+	/*
+	 * dump flag controls whether the contents of any modified data blocks
+	 * are printed to the console during processing of the stream. Warning:
+	 * for large streams, this can obviously lead to massive prints.
+	 */
+	boolean_t dump = B_FALSE;
+	int err;
+	zio_cksum_t zc = { { 0 } };
+	zio_cksum_t pcksum = { { 0 } };
+
+	while ((c = getopt(argc, argv, ":vCd")) != -1) {
+		switch (c) {
+		case 'C':
+			do_cksum = B_FALSE;
+			break;
+		case 'v':
+			if (verbose)
+				very_verbose = B_TRUE;
+			verbose = B_TRUE;
+			break;
+		case 'd':
+			dump = B_TRUE;
+			verbose = B_TRUE;
+			very_verbose = B_TRUE;
+			break;
+		case ':':
+			(void) fprintf(stderr,
+			    "missing argument for '%c' option\n", optopt);
+			zstream_usage();
+			break;
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			zstream_usage();
+			break;
+		}
+	}
+
+	if (argc > optind) {
+		const char *filename = argv[optind];
+		send_stream = fopen(filename, "r");
+		if (send_stream == NULL) {
+			(void) fprintf(stderr,
+			    "Error while opening file '%s': %s\n",
+			    filename, strerror(errno));
+			exit(1);
+		}
+	} else {
+		if (isatty(STDIN_FILENO)) {
+			(void) fprintf(stderr,
+			    "Error: The send stream is a binary format "
+			    "and can not be read from a\n"
+			    "terminal.  Standard input must be redirected, "
+			    "or a file must be\n"
+			    "specified as a command-line argument.\n");
+			exit(1);
+		}
+		send_stream = stdin;
+	}
+
+	fletcher_4_init();
+	while (read_hdr(drr, &zc)) {
+		uint64_t featureflags = 0;
+
+		/*
+		 * If this is the first DMU record being processed, check for
+		 * the magic bytes and figure out the endian-ness based on them.
+		 */
+		if (first) {
+			if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+				do_byteswap = B_TRUE;
+				if (do_cksum) {
+					ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
+					/*
+					 * recalculate header checksum now
+					 * that we know it needs to be
+					 * byteswapped.
+					 */
+					fletcher_4_incremental_byteswap(drr,
+					    sizeof (dmu_replay_record_t), &zc);
+				}
+			} else if (drrb->drr_magic != DMU_BACKUP_MAGIC) {
+				(void) fprintf(stderr, "Invalid stream "
+				    "(bad magic number)\n");
+				exit(1);
+			}
+			first = B_FALSE;
+		}
+		if (do_byteswap) {
+			drr->drr_type = BSWAP_32(drr->drr_type);
+			drr->drr_payloadlen =
+			    BSWAP_32(drr->drr_payloadlen);
+		}
+
+		/*
+		 * At this point, the leading fields of the replay record
+		 * (drr_type and drr_payloadlen) have been byte-swapped if
+		 * necessary, but the rest of the data structure (the
+		 * union of type-specific structures) is still in its
+		 * original state.
+		 */
+		if (drr->drr_type >= DRR_NUMTYPES) {
+			(void) printf("INVALID record found: type 0x%x\n",
+			    drr->drr_type);
+			(void) printf("Aborting.\n");
+			exit(1);
+		}
+
+		drr_record_count[drr->drr_type]++;
+		total_overhead_size += sizeof (*drr);
+		total_records++;
+		payload_size = 0;
+
+		switch (drr->drr_type) {
+		case DRR_BEGIN:
+			if (do_byteswap) {
+				drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+				drrb->drr_versioninfo =
+				    BSWAP_64(drrb->drr_versioninfo);
+				drrb->drr_creation_time =
+				    BSWAP_64(drrb->drr_creation_time);
+				drrb->drr_type = BSWAP_32(drrb->drr_type);
+				drrb->drr_flags = BSWAP_32(drrb->drr_flags);
+				drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+				drrb->drr_fromguid =
+				    BSWAP_64(drrb->drr_fromguid);
+			}
+
+			(void) printf("BEGIN record\n");
+			(void) printf("\thdrtype = %lld\n",
+			    DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo));
+			(void) printf("\tfeatures = %llx\n",
+			    DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo));
+			(void) printf("\tmagic = %llx\n",
+			    (u_longlong_t)drrb->drr_magic);
+			(void) printf("\tcreation_time = %llx\n",
+			    (u_longlong_t)drrb->drr_creation_time);
+			(void) printf("\ttype = %u\n", drrb->drr_type);
+			(void) printf("\tflags = 0x%x\n", drrb->drr_flags);
+			(void) printf("\ttoguid = %llx\n",
+			    (u_longlong_t)drrb->drr_toguid);
+			(void) printf("\tfromguid = %llx\n",
+			    (u_longlong_t)drrb->drr_fromguid);
+			(void) printf("\ttoname = %s\n", drrb->drr_toname);
+			(void) printf("\tpayloadlen = %u\n",
+			    drr->drr_payloadlen);
+			if (verbose)
+				(void) printf("\n");
+
+			if (drr->drr_payloadlen != 0) {
+				nvlist_t *nv;
+				int sz = drr->drr_payloadlen;
+
+				if (sz > SPA_MAXBLOCKSIZE) {
+					free(buf);
+					buf = safe_malloc(sz);
+				}
+				(void) ssread(buf, sz, &zc);
+				if (ferror(send_stream))
+					perror("fread");
+				err = nvlist_unpack(buf, sz, &nv, 0);
+				if (err) {
+					perror(strerror(err));
+				} else {
+					nvlist_print(stdout, nv);
+					nvlist_free(nv);
+				}
+				payload_size = sz;
+			}
+			break;
+
+		case DRR_END:
+			if (do_byteswap) {
+				drre->drr_checksum.zc_word[0] =
+				    BSWAP_64(drre->drr_checksum.zc_word[0]);
+				drre->drr_checksum.zc_word[1] =
+				    BSWAP_64(drre->drr_checksum.zc_word[1]);
+				drre->drr_checksum.zc_word[2] =
+				    BSWAP_64(drre->drr_checksum.zc_word[2]);
+				drre->drr_checksum.zc_word[3] =
+				    BSWAP_64(drre->drr_checksum.zc_word[3]);
+			}
+			/*
+			 * We compare against the *previous* checksum
+			 * value, because the stored checksum is of
+			 * everything before the DRR_END record.
+			 */
+			if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum,
+			    pcksum)) {
+				(void) printf("Expected checksum differs from "
+				    "checksum in stream.\n");
+				(void) printf("Expected checksum = "
+				    "%llx/%llx/%llx/%llx\n",
+				    (long long unsigned int)pcksum.zc_word[0],
+				    (long long unsigned int)pcksum.zc_word[1],
+				    (long long unsigned int)pcksum.zc_word[2],
+				    (long long unsigned int)pcksum.zc_word[3]);
+			}
+			(void) printf("END checksum = %llx/%llx/%llx/%llx\n",
+			    (long long unsigned int)
+			    drre->drr_checksum.zc_word[0],
+			    (long long unsigned int)
+			    drre->drr_checksum.zc_word[1],
+			    (long long unsigned int)
+			    drre->drr_checksum.zc_word[2],
+			    (long long unsigned int)
+			    drre->drr_checksum.zc_word[3]);
+
+			ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
+			break;
+
+		case DRR_OBJECT:
+			if (do_byteswap) {
+				drro->drr_object = BSWAP_64(drro->drr_object);
+				drro->drr_type = BSWAP_32(drro->drr_type);
+				drro->drr_bonustype =
+				    BSWAP_32(drro->drr_bonustype);
+				drro->drr_blksz = BSWAP_32(drro->drr_blksz);
+				drro->drr_bonuslen =
+				    BSWAP_32(drro->drr_bonuslen);
+				drro->drr_raw_bonuslen =
+				    BSWAP_32(drro->drr_raw_bonuslen);
+				drro->drr_toguid = BSWAP_64(drro->drr_toguid);
+				drro->drr_maxblkid =
+				    BSWAP_64(drro->drr_maxblkid);
+			}
+
+			featureflags =
+			    DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+
+			if (featureflags & DMU_BACKUP_FEATURE_RAW &&
+			    drro->drr_bonuslen > drro->drr_raw_bonuslen) {
+				(void) fprintf(stderr,
+				    "Warning: Object %llu has bonuslen = "
+				    "%u > raw_bonuslen = %u\n\n",
+				    (u_longlong_t)drro->drr_object,
+				    drro->drr_bonuslen, drro->drr_raw_bonuslen);
+			}
+
+			payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+
+			if (verbose) {
+				(void) printf("OBJECT object = %llu type = %u "
+				    "bonustype = %u blksz = %u bonuslen = %u "
+				    "dn_slots = %u raw_bonuslen = %u "
+				    "flags = %u maxblkid = %llu "
+				    "indblkshift = %u nlevels = %u "
+				    "nblkptr = %u\n",
+				    (u_longlong_t)drro->drr_object,
+				    drro->drr_type,
+				    drro->drr_bonustype,
+				    drro->drr_blksz,
+				    drro->drr_bonuslen,
+				    drro->drr_dn_slots,
+				    drro->drr_raw_bonuslen,
+				    drro->drr_flags,
+				    (u_longlong_t)drro->drr_maxblkid,
+				    drro->drr_indblkshift,
+				    drro->drr_nlevels,
+				    drro->drr_nblkptr);
+			}
+			if (drro->drr_bonuslen > 0) {
+				(void) ssread(buf, payload_size, &zc);
+				if (dump)
+					print_block(buf, payload_size);
+			}
+			break;
+
+		case DRR_FREEOBJECTS:
+			if (do_byteswap) {
+				drrfo->drr_firstobj =
+				    BSWAP_64(drrfo->drr_firstobj);
+				drrfo->drr_numobjs =
+				    BSWAP_64(drrfo->drr_numobjs);
+				drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid);
+			}
+			if (verbose) {
+				(void) printf("FREEOBJECTS firstobj = %llu "
+				    "numobjs = %llu\n",
+				    (u_longlong_t)drrfo->drr_firstobj,
+				    (u_longlong_t)drrfo->drr_numobjs);
+			}
+			break;
+
+		case DRR_WRITE:
+			if (do_byteswap) {
+				drrw->drr_object = BSWAP_64(drrw->drr_object);
+				drrw->drr_type = BSWAP_32(drrw->drr_type);
+				drrw->drr_offset = BSWAP_64(drrw->drr_offset);
+				drrw->drr_logical_size =
+				    BSWAP_64(drrw->drr_logical_size);
+				drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
+				drrw->drr_key.ddk_prop =
+				    BSWAP_64(drrw->drr_key.ddk_prop);
+				drrw->drr_compressed_size =
+				    BSWAP_64(drrw->drr_compressed_size);
+			}
+
+			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+
+			/*
+			 * If this is verbose and/or dump output,
+			 * print info on the modified block
+			 */
+			if (verbose) {
+				sprintf_bytes(salt, drrw->drr_salt,
+				    ZIO_DATA_SALT_LEN);
+				sprintf_bytes(iv, drrw->drr_iv,
+				    ZIO_DATA_IV_LEN);
+				sprintf_bytes(mac, drrw->drr_mac,
+				    ZIO_DATA_MAC_LEN);
+
+				(void) printf("WRITE object = %llu type = %u "
+				    "checksum type = %u compression type = %u "
+				    "flags = %u offset = %llu "
+				    "logical_size = %llu "
+				    "compressed_size = %llu "
+				    "payload_size = %llu props = %llx "
+				    "salt = %s iv = %s mac = %s\n",
+				    (u_longlong_t)drrw->drr_object,
+				    drrw->drr_type,
+				    drrw->drr_checksumtype,
+				    drrw->drr_compressiontype,
+				    drrw->drr_flags,
+				    (u_longlong_t)drrw->drr_offset,
+				    (u_longlong_t)drrw->drr_logical_size,
+				    (u_longlong_t)drrw->drr_compressed_size,
+				    (u_longlong_t)payload_size,
+				    (u_longlong_t)drrw->drr_key.ddk_prop,
+				    salt,
+				    iv,
+				    mac);
+			}
+
+			/*
+			 * Read the contents of the block in from STDIN to buf
+			 */
+			(void) ssread(buf, payload_size, &zc);
+			/*
+			 * If in dump mode
+			 */
+			if (dump) {
+				print_block(buf, payload_size);
+			}
+			break;
+
+		case DRR_WRITE_BYREF:
+			if (do_byteswap) {
+				drrwbr->drr_object =
+				    BSWAP_64(drrwbr->drr_object);
+				drrwbr->drr_offset =
+				    BSWAP_64(drrwbr->drr_offset);
+				drrwbr->drr_length =
+				    BSWAP_64(drrwbr->drr_length);
+				drrwbr->drr_toguid =
+				    BSWAP_64(drrwbr->drr_toguid);
+				drrwbr->drr_refguid =
+				    BSWAP_64(drrwbr->drr_refguid);
+				drrwbr->drr_refobject =
+				    BSWAP_64(drrwbr->drr_refobject);
+				drrwbr->drr_refoffset =
+				    BSWAP_64(drrwbr->drr_refoffset);
+				drrwbr->drr_key.ddk_prop =
+				    BSWAP_64(drrwbr->drr_key.ddk_prop);
+			}
+			if (verbose) {
+				(void) printf("WRITE_BYREF object = %llu "
+				    "checksum type = %u props = %llx "
+				    "offset = %llu length = %llu "
+				    "toguid = %llx refguid = %llx "
+				    "refobject = %llu refoffset = %llu\n",
+				    (u_longlong_t)drrwbr->drr_object,
+				    drrwbr->drr_checksumtype,
+				    (u_longlong_t)drrwbr->drr_key.ddk_prop,
+				    (u_longlong_t)drrwbr->drr_offset,
+				    (u_longlong_t)drrwbr->drr_length,
+				    (u_longlong_t)drrwbr->drr_toguid,
+				    (u_longlong_t)drrwbr->drr_refguid,
+				    (u_longlong_t)drrwbr->drr_refobject,
+				    (u_longlong_t)drrwbr->drr_refoffset);
+			}
+			break;
+
+		case DRR_FREE:
+			if (do_byteswap) {
+				drrf->drr_object = BSWAP_64(drrf->drr_object);
+				drrf->drr_offset = BSWAP_64(drrf->drr_offset);
+				drrf->drr_length = BSWAP_64(drrf->drr_length);
+			}
+			if (verbose) {
+				(void) printf("FREE object = %llu "
+				    "offset = %llu length = %lld\n",
+				    (u_longlong_t)drrf->drr_object,
+				    (u_longlong_t)drrf->drr_offset,
+				    (longlong_t)drrf->drr_length);
+			}
+			break;
+		case DRR_SPILL:
+			if (do_byteswap) {
+				drrs->drr_object = BSWAP_64(drrs->drr_object);
+				drrs->drr_length = BSWAP_64(drrs->drr_length);
+				drrs->drr_compressed_size =
+				    BSWAP_64(drrs->drr_compressed_size);
+				drrs->drr_type = BSWAP_32(drrs->drr_type);
+			}
+
+			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
+
+			if (verbose) {
+				sprintf_bytes(salt, drrs->drr_salt,
+				    ZIO_DATA_SALT_LEN);
+				sprintf_bytes(iv, drrs->drr_iv,
+				    ZIO_DATA_IV_LEN);
+				sprintf_bytes(mac, drrs->drr_mac,
+				    ZIO_DATA_MAC_LEN);
+
+				(void) printf("SPILL block for object = %llu "
+				    "length = %llu flags = %u "
+				    "compression type = %u "
+				    "compressed_size = %llu "
+				    "payload_size = %llu "
+				    "salt = %s iv = %s mac = %s\n",
+				    (u_longlong_t)drrs->drr_object,
+				    (u_longlong_t)drrs->drr_length,
+				    drrs->drr_flags,
+				    drrs->drr_compressiontype,
+				    (u_longlong_t)drrs->drr_compressed_size,
+				    (u_longlong_t)payload_size,
+				    salt,
+				    iv,
+				    mac);
+			}
+			(void) ssread(buf, payload_size, &zc);
+			if (dump) {
+				print_block(buf, payload_size);
+			}
+			break;
+		case DRR_WRITE_EMBEDDED:
+			if (do_byteswap) {
+				drrwe->drr_object =
+				    BSWAP_64(drrwe->drr_object);
+				drrwe->drr_offset =
+				    BSWAP_64(drrwe->drr_offset);
+				drrwe->drr_length =
+				    BSWAP_64(drrwe->drr_length);
+				drrwe->drr_toguid =
+				    BSWAP_64(drrwe->drr_toguid);
+				drrwe->drr_lsize =
+				    BSWAP_32(drrwe->drr_lsize);
+				drrwe->drr_psize =
+				    BSWAP_32(drrwe->drr_psize);
+			}
+			if (verbose) {
+				(void) printf("WRITE_EMBEDDED object = %llu "
+				    "offset = %llu length = %llu "
+				    "toguid = %llx comp = %u etype = %u "
+				    "lsize = %u psize = %u\n",
+				    (u_longlong_t)drrwe->drr_object,
+				    (u_longlong_t)drrwe->drr_offset,
+				    (u_longlong_t)drrwe->drr_length,
+				    (u_longlong_t)drrwe->drr_toguid,
+				    drrwe->drr_compression,
+				    drrwe->drr_etype,
+				    drrwe->drr_lsize,
+				    drrwe->drr_psize);
+			}
+			(void) ssread(buf,
+			    P2ROUNDUP(drrwe->drr_psize, 8), &zc);
+			if (dump) {
+				print_block(buf,
+				    P2ROUNDUP(drrwe->drr_psize, 8));
+			}
+			payload_size = P2ROUNDUP(drrwe->drr_psize, 8);
+			break;
+		case DRR_OBJECT_RANGE:
+			if (do_byteswap) {
+				drror->drr_firstobj =
+				    BSWAP_64(drror->drr_firstobj);
+				drror->drr_numslots =
+				    BSWAP_64(drror->drr_numslots);
+				drror->drr_toguid = BSWAP_64(drror->drr_toguid);
+			}
+			if (verbose) {
+				sprintf_bytes(salt, drror->drr_salt,
+				    ZIO_DATA_SALT_LEN);
+				sprintf_bytes(iv, drror->drr_iv,
+				    ZIO_DATA_IV_LEN);
+				sprintf_bytes(mac, drror->drr_mac,
+				    ZIO_DATA_MAC_LEN);
+
+				(void) printf("OBJECT_RANGE firstobj = %llu "
+				    "numslots = %llu flags = %u "
+				    "salt = %s iv = %s mac = %s\n",
+				    (u_longlong_t)drror->drr_firstobj,
+				    (u_longlong_t)drror->drr_numslots,
+				    drror->drr_flags,
+				    salt,
+				    iv,
+				    mac);
+			}
+			break;
+		case DRR_REDACT:
+			if (do_byteswap) {
+				drrr->drr_object = BSWAP_64(drrr->drr_object);
+				drrr->drr_offset = BSWAP_64(drrr->drr_offset);
+				drrr->drr_length = BSWAP_64(drrr->drr_length);
+				drrr->drr_toguid = BSWAP_64(drrr->drr_toguid);
+			}
+			if (verbose) {
+				(void) printf("REDACT object = %llu offset = "
+				    "%llu length = %llu\n",
+				    (u_longlong_t)drrr->drr_object,
+				    (u_longlong_t)drrr->drr_offset,
+				    (u_longlong_t)drrr->drr_length);
+			}
+			break;
+		case DRR_NUMTYPES:
+			/* should never be reached */
+			exit(1);
+		}
+		if (drr->drr_type != DRR_BEGIN && very_verbose) {
+			(void) printf("    checksum = %llx/%llx/%llx/%llx\n",
+			    (longlong_t)drrc->drr_checksum.zc_word[0],
+			    (longlong_t)drrc->drr_checksum.zc_word[1],
+			    (longlong_t)drrc->drr_checksum.zc_word[2],
+			    (longlong_t)drrc->drr_checksum.zc_word[3]);
+		}
+		pcksum = zc;
+		drr_byte_count[drr->drr_type] += payload_size;
+		total_payload_size += payload_size;
+	}
+	free(buf);
+	fletcher_4_fini();
+
+	/* Print final summary */
+
+	(void) printf("SUMMARY:\n");
+	(void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_BEGIN],
+	    (u_longlong_t)drr_byte_count[DRR_BEGIN]);
+	(void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_END],
+	    (u_longlong_t)drr_byte_count[DRR_END]);
+	(void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_OBJECT],
+	    (u_longlong_t)drr_byte_count[DRR_OBJECT]);
+	(void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_FREEOBJECTS],
+	    (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]);
+	(void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_WRITE],
+	    (u_longlong_t)drr_byte_count[DRR_WRITE]);
+	(void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_WRITE_BYREF],
+	    (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]);
+	(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu "
+	    "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED],
+	    (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]);
+	(void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_FREE],
+	    (u_longlong_t)drr_byte_count[DRR_FREE]);
+	(void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n",
+	    (u_longlong_t)drr_record_count[DRR_SPILL],
+	    (u_longlong_t)drr_byte_count[DRR_SPILL]);
+	(void) printf("\tTotal records = %lld\n",
+	    (u_longlong_t)total_records);
+	(void) printf("\tTotal payload size = %lld (0x%llx)\n",
+	    (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size);
+	(void) printf("\tTotal header overhead = %lld (0x%llx)\n",
+	    (u_longlong_t)total_overhead_size,
+	    (u_longlong_t)total_overhead_size);
+	(void) printf("\tTotal stream length = %lld (0x%llx)\n",
+	    (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len);
+	return (0);
+}

diff --git a/zfs/cmd/zstream/zstream_redup.c b/zfs/cmd/zstream/zstream_redup.c
new file mode 100644
index 0000000..15dd8a1
--- /dev/null
+++ b/zfs/cmd/zstream/zstream_redup.c

@@ -0,0 +1,470 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#include <assert.h>
+#include <cityhash.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libzfs_impl.h>
+#include <libzfs.h>
+#include <libzutil.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <umem.h>
+#include <unistd.h>
+#include <sys/debug.h>
+#include <sys/stat.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zio_checksum.h>
+#include "zfs_fletcher.h"
+#include "zstream.h"
+
+
+#define	MAX_RDT_PHYSMEM_PERCENT		20
+#define	SMALLEST_POSSIBLE_MAX_RDT_MB		128
+
+typedef struct redup_entry {
+	struct redup_entry	*rde_next;
+	uint64_t rde_guid;
+	uint64_t rde_object;
+	uint64_t rde_offset;
+	uint64_t rde_stream_offset;
+} redup_entry_t;
+
+typedef struct redup_table {
+	redup_entry_t	**redup_hash_array;
+	umem_cache_t	*ddecache;
+	uint64_t	ddt_count;
+	int		numhashbits;
+} redup_table_t;
+
+int
+highbit64(uint64_t i)
+{
+	if (i == 0)
+		return (0);
+
+	return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
+}
+
+static void *
+safe_calloc(size_t n)
+{
+	void *rv = calloc(1, n);
+	if (rv == NULL) {
+		fprintf(stderr,
+		    "Error: could not allocate %u bytes of memory\n",
+		    (int)n);
+		exit(1);
+	}
+	return (rv);
+}
+
+/*
+ * Safe version of fread(), exits on error.
+ */
+static int
+sfread(void *buf, size_t size, FILE *fp)
+{
+	int rv = fread(buf, size, 1, fp);
+	if (rv == 0 && ferror(fp)) {
+		(void) fprintf(stderr, "Error while reading file: %s\n",
+		    strerror(errno));
+		exit(1);
+	}
+	return (rv);
+}
+
+/*
+ * Safe version of pread(), exits on error.
+ */
+static void
+spread(int fd, void *buf, size_t count, off_t offset)
+{
+	ssize_t err = pread(fd, buf, count, offset);
+	if (err == -1) {
+		(void) fprintf(stderr,
+		    "Error while reading file: %s\n",
+		    strerror(errno));
+		exit(1);
+	} else if (err != count) {
+		(void) fprintf(stderr,
+		    "Error while reading file: short read\n");
+		exit(1);
+	}
+}
+
+static int
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+    zio_cksum_t *zc, int outfd)
+{
+	assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
+	    == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	fletcher_4_incremental_native(drr,
+	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+	if (drr->drr_type != DRR_BEGIN) {
+		assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+		    drr_checksum.drr_checksum));
+		drr->drr_u.drr_checksum.drr_checksum = *zc;
+	}
+	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), zc);
+	if (write(outfd, drr, sizeof (*drr)) == -1)
+		return (errno);
+	if (payload_len != 0) {
+		fletcher_4_incremental_native(payload, payload_len, zc);
+		if (write(outfd, payload, payload_len) == -1)
+			return (errno);
+	}
+	return (0);
+}
+
+static void
+rdt_insert(redup_table_t *rdt,
+    uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
+{
+	uint64_t ch = cityhash4(guid, object, offset, 0);
+	uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
+	redup_entry_t **rdepp;
+
+	rdepp = &(rdt->redup_hash_array[hashcode]);
+	redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL);
+	rde->rde_next = *rdepp;
+	rde->rde_guid = guid;
+	rde->rde_object = object;
+	rde->rde_offset = offset;
+	rde->rde_stream_offset = stream_offset;
+	*rdepp = rde;
+	rdt->ddt_count++;
+}
+
+static void
+rdt_lookup(redup_table_t *rdt,
+    uint64_t guid, uint64_t object, uint64_t offset,
+    uint64_t *stream_offsetp)
+{
+	uint64_t ch = cityhash4(guid, object, offset, 0);
+	uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
+
+	for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
+	    rde != NULL; rde = rde->rde_next) {
+		if (rde->rde_guid == guid &&
+		    rde->rde_object == object &&
+		    rde->rde_offset == offset) {
+			*stream_offsetp = rde->rde_stream_offset;
+			return;
+		}
+	}
+	assert(!"could not find expected redup table entry");
+}
+
+/*
+ * Convert a dedup stream (generated by "zfs send -D") to a
+ * non-deduplicated stream.  The entire infd will be converted, including
+ * any substreams in a stream package (generated by "zfs send -RD"). The
+ * infd must be seekable.
+ */
+static void
+zfs_redup_stream(int infd, int outfd, boolean_t verbose)
+{
+	int bufsz = SPA_MAXBLOCKSIZE;
+	dmu_replay_record_t thedrr = { 0 };
+	dmu_replay_record_t *drr = &thedrr;
+	redup_table_t rdt;
+	zio_cksum_t stream_cksum;
+	uint64_t numbuckets;
+	uint64_t num_records = 0;
+	uint64_t num_write_byref_records = 0;
+
+#ifdef _ILP32
+	uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20;
+#else
+	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
+	uint64_t max_rde_size =
+	    MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100,
+	    SMALLEST_POSSIBLE_MAX_RDT_MB << 20);
+#endif
+
+	numbuckets = max_rde_size / (sizeof (redup_entry_t));
+
+	/*
+	 * numbuckets must be a power of 2.  Increase number to
+	 * a power of 2 if necessary.
+	 */
+	if (!ISP2(numbuckets))
+		numbuckets = 1ULL << highbit64(numbuckets);
+
+	rdt.redup_hash_array =
+	    safe_calloc(numbuckets * sizeof (redup_entry_t *));
+	rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0,
+	    NULL, NULL, NULL, NULL, NULL, 0);
+	rdt.numhashbits = highbit64(numbuckets) - 1;
+	rdt.ddt_count = 0;
+
+	char *buf = safe_calloc(bufsz);
+	FILE *ofp = fdopen(infd, "r");
+	long offset = ftell(ofp);
+	while (sfread(drr, sizeof (*drr), ofp) != 0) {
+		num_records++;
+
+		/*
+		 * We need to regenerate the checksum.
+		 */
+		if (drr->drr_type != DRR_BEGIN) {
+			bzero(&drr->drr_u.drr_checksum.drr_checksum,
+			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
+		}
+
+		uint64_t payload_size = 0;
+		switch (drr->drr_type) {
+		case DRR_BEGIN:
+		{
+			struct drr_begin *drrb = &drr->drr_u.drr_begin;
+			int fflags;
+			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+
+			assert(drrb->drr_magic == DMU_BACKUP_MAGIC);
+
+			/* clear the DEDUP feature flag for this stream */
+			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+			fflags &= ~(DMU_BACKUP_FEATURE_DEDUP |
+			    DMU_BACKUP_FEATURE_DEDUPPROPS);
+			/* cppcheck-suppress syntaxError */
+			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
+
+			int sz = drr->drr_payloadlen;
+			if (sz != 0) {
+				if (sz > bufsz) {
+					free(buf);
+					buf = safe_calloc(sz);
+					bufsz = sz;
+				}
+				(void) sfread(buf, sz, ofp);
+			}
+			payload_size = sz;
+			break;
+		}
+
+		case DRR_END:
+		{
+			struct drr_end *drre = &drr->drr_u.drr_end;
+			/*
+			 * Use the recalculated checksum, unless this is
+			 * the END record of a stream package, which has
+			 * no checksum.
+			 */
+			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
+				drre->drr_checksum = stream_cksum;
+			break;
+		}
+
+		case DRR_OBJECT:
+		{
+			struct drr_object *drro = &drr->drr_u.drr_object;
+
+			if (drro->drr_bonuslen > 0) {
+				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+				(void) sfread(buf, payload_size, ofp);
+			}
+			break;
+		}
+
+		case DRR_SPILL:
+		{
+			struct drr_spill *drrs = &drr->drr_u.drr_spill;
+			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
+			(void) sfread(buf, payload_size, ofp);
+			break;
+		}
+
+		case DRR_WRITE_BYREF:
+		{
+			struct drr_write_byref drrwb =
+			    drr->drr_u.drr_write_byref;
+
+			num_write_byref_records++;
+
+			/*
+			 * Look up in hash table by drrwb->drr_refguid,
+			 * drr_refobject, drr_refoffset.  Replace this
+			 * record with the found WRITE record, but with
+			 * drr_object,drr_offset,drr_toguid replaced with ours.
+			 */
+			uint64_t stream_offset = 0;
+			rdt_lookup(&rdt, drrwb.drr_refguid,
+			    drrwb.drr_refobject, drrwb.drr_refoffset,
+			    &stream_offset);
+
+			spread(infd, drr, sizeof (*drr), stream_offset);
+
+			assert(drr->drr_type == DRR_WRITE);
+			struct drr_write *drrw = &drr->drr_u.drr_write;
+			assert(drrw->drr_toguid == drrwb.drr_refguid);
+			assert(drrw->drr_object == drrwb.drr_refobject);
+			assert(drrw->drr_offset == drrwb.drr_refoffset);
+
+			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+			spread(infd, buf, payload_size,
+			    stream_offset + sizeof (*drr));
+
+			drrw->drr_toguid = drrwb.drr_toguid;
+			drrw->drr_object = drrwb.drr_object;
+			drrw->drr_offset = drrwb.drr_offset;
+			break;
+		}
+
+		case DRR_WRITE:
+		{
+			struct drr_write *drrw = &drr->drr_u.drr_write;
+			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+			(void) sfread(buf, payload_size, ofp);
+
+			rdt_insert(&rdt, drrw->drr_toguid,
+			    drrw->drr_object, drrw->drr_offset, offset);
+			break;
+		}
+
+		case DRR_WRITE_EMBEDDED:
+		{
+			struct drr_write_embedded *drrwe =
+			    &drr->drr_u.drr_write_embedded;
+			payload_size =
+			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
+			(void) sfread(buf, payload_size, ofp);
+			break;
+		}
+
+		case DRR_FREEOBJECTS:
+		case DRR_FREE:
+		case DRR_OBJECT_RANGE:
+			break;
+
+		default:
+			(void) fprintf(stderr, "INVALID record type 0x%x\n",
+			    drr->drr_type);
+			/* should never happen, so assert */
+			assert(B_FALSE);
+		}
+
+		if (feof(ofp)) {
+			fprintf(stderr, "Error: unexpected end-of-file\n");
+			exit(1);
+		}
+		if (ferror(ofp)) {
+			fprintf(stderr, "Error while reading file: %s\n",
+			    strerror(errno));
+			exit(1);
+		}
+
+		/*
+		 * We need to recalculate the checksum, and it needs to be
+		 * initially zero to do that.  BEGIN records don't have
+		 * a checksum.
+		 */
+		if (drr->drr_type != DRR_BEGIN) {
+			bzero(&drr->drr_u.drr_checksum.drr_checksum,
+			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
+		}
+		if (dump_record(drr, buf, payload_size,
+		    &stream_cksum, outfd) != 0)
+			break;
+		if (drr->drr_type == DRR_END) {
+			/*
+			 * Typically the END record is either the last
+			 * thing in the stream, or it is followed
+			 * by a BEGIN record (which also zeros the checksum).
+			 * However, a stream package ends with two END
+			 * records.  The last END record's checksum starts
+			 * from zero.
+			 */
+			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+		}
+		offset = ftell(ofp);
+	}
+
+	if (verbose) {
+		char mem_str[16];
+		zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t),
+		    mem_str, sizeof (mem_str));
+		fprintf(stderr, "converted stream with %llu total records, "
+		    "including %llu dedup records, using %sB memory.\n",
+		    (long long)num_records,
+		    (long long)num_write_byref_records,
+		    mem_str);
+	}
+
+	umem_cache_destroy(rdt.ddecache);
+	free(rdt.redup_hash_array);
+	free(buf);
+	(void) fclose(ofp);
+}
+
+int
+zstream_do_redup(int argc, char *argv[])
+{
+	boolean_t verbose = B_FALSE;
+	int c;
+
+	while ((c = getopt(argc, argv, "v")) != -1) {
+		switch (c) {
+		case 'v':
+			verbose = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			zstream_usage();
+			break;
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 1)
+		zstream_usage();
+
+	const char *filename = argv[0];
+
+	if (isatty(STDOUT_FILENO)) {
+		(void) fprintf(stderr,
+		    "Error: Stream can not be written to a terminal.\n"
+		    "You must redirect standard output.\n");
+		return (1);
+	}
+
+	int fd = open(filename, O_RDONLY);
+	if (fd == -1) {
+		(void) fprintf(stderr,
+		    "Error while opening file '%s': %s\n",
+		    filename, strerror(errno));
+		exit(1);
+	}
+
+	fletcher_4_init();
+	zfs_redup_stream(fd, STDOUT_FILENO, verbose);
+	fletcher_4_fini();
+
+	close(fd);
+
+	return (0);
+}

diff --git a/zfs/cmd/zstream/zstream_token.c b/zfs/cmd/zstream/zstream_token.c
new file mode 100644
index 0000000..36a76a4
--- /dev/null
+++ b/zfs/cmd/zstream/zstream_token.c

@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Portions Copyright 2012 Martin Matuska <martin@matuska.org>
+ */
+
+/*
+ * Copyright (c) 2020 by Datto Inc. All rights reserved.
+ */
+
+#include <ctype.h>
+#include <libnvpair.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+
+#include <libzfs.h>
+#include <libzfs_core.h>
+
+#include <sys/dmu.h>
+#include <sys/zfs_ioctl.h>
+#include "zstream.h"
+
+int
+zstream_do_token(int argc, char *argv[])
+{
+	char *resume_token = NULL;
+
+	if (argc < 2) {
+		(void) fprintf(stderr, "Need to pass the resume token\n");
+		zstream_usage();
+	}
+
+	resume_token = argv[1];
+
+	libzfs_handle_t *hdl = libzfs_init();
+
+	nvlist_t *resume_nvl =
+	    zfs_send_resume_token_to_nvlist(hdl, resume_token);
+
+	if (resume_nvl == NULL) {
+		(void) fprintf(stderr,
+		    "Unable to parse resume token: %s\n",
+		    libzfs_error_description(hdl));
+		libzfs_fini(hdl);
+		return (1);
+	}
+
+	dump_nvlist(resume_nvl, 5);
+	nvlist_free(resume_nvl);
+
+	libzfs_fini(hdl);
+	return (0);
+}

diff --git a/zfs/cmd/zstreamdump/.gitignore b/zfs/cmd/zstreamdump/.gitignore
deleted file mode 100644
index ca44a52..0000000
--- a/zfs/cmd/zstreamdump/.gitignore
+++ /dev/null

@@ -1 +0,0 @@
-zstreamdump

diff --git a/zfs/cmd/zstreamdump/Makefile.am b/zfs/cmd/zstreamdump/Makefile.am
deleted file mode 100644
index f80b501..0000000
--- a/zfs/cmd/zstreamdump/Makefile.am
+++ /dev/null

@@ -1,14 +0,0 @@
-include $(top_srcdir)/config/Rules.am
-
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
-sbin_PROGRAMS = zstreamdump
-
-zstreamdump_SOURCES = \
-	zstreamdump.c
-
-zstreamdump_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzfs/libzfs.la

diff --git a/zfs/cmd/zstreamdump/zstreamdump.c b/zfs/cmd/zstreamdump/zstreamdump.c
deleted file mode 100644
index 6b960c2..0000000
--- a/zfs/cmd/zstreamdump/zstreamdump.c
+++ /dev/null

@@ -1,777 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- *
- * Portions Copyright 2012 Martin Matuska <martin@matuska.org>
- */
-
-/*
- * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
- */
-
-#include <ctype.h>
-#include <libnvpair.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-#include <unistd.h>
-#include <stddef.h>
-
-#include <sys/dmu.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zio.h>
-#include <zfs_fletcher.h>
-
-/*
- * If dump mode is enabled, the number of bytes to print per line
- */
-#define	BYTES_PER_LINE	16
-/*
- * If dump mode is enabled, the number of bytes to group together, separated
- * by newlines or spaces
- */
-#define	DUMP_GROUPING	4
-
-uint64_t total_stream_len = 0;
-FILE *send_stream = 0;
-boolean_t do_byteswap = B_FALSE;
-boolean_t do_cksum = B_TRUE;
-
-static void
-usage(void)
-{
-	(void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n");
-	(void) fprintf(stderr, "\t -v -- verbose\n");
-	(void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
-	(void) fprintf(stderr, "\t -d -- dump contents of blocks modified, "
-	    "implies verbose\n");
-	exit(1);
-}
-
-static void *
-safe_malloc(size_t size)
-{
-	void *rv = malloc(size);
-	if (rv == NULL) {
-		(void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n",
-		    size);
-		abort();
-	}
-	return (rv);
-}
-
-/*
- * ssread - send stream read.
- *
- * Read while computing incremental checksum
- */
-static size_t
-ssread(void *buf, size_t len, zio_cksum_t *cksum)
-{
-	size_t outlen;
-
-	if ((outlen = fread(buf, len, 1, send_stream)) == 0)
-		return (0);
-
-	if (do_cksum) {
-		if (do_byteswap)
-			fletcher_4_incremental_byteswap(buf, len, cksum);
-		else
-			fletcher_4_incremental_native(buf, len, cksum);
-	}
-	total_stream_len += len;
-	return (outlen);
-}
-
-static size_t
-read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
-{
-	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
-	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
-	size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
-	if (r == 0)
-		return (0);
-	zio_cksum_t saved_cksum = *cksum;
-	r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
-	    sizeof (zio_cksum_t), cksum);
-	if (r == 0)
-		return (0);
-	if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
-	    !ZIO_CHECKSUM_EQUAL(saved_cksum,
-	    drr->drr_u.drr_checksum.drr_checksum)) {
-		fprintf(stderr, "invalid checksum\n");
-		(void) printf("Incorrect checksum in record header.\n");
-		(void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
-		    (longlong_t)saved_cksum.zc_word[0],
-		    (longlong_t)saved_cksum.zc_word[1],
-		    (longlong_t)saved_cksum.zc_word[2],
-		    (longlong_t)saved_cksum.zc_word[3]);
-		return (0);
-	}
-	return (sizeof (*drr));
-}
-
-/*
- * Print part of a block in ASCII characters
- */
-static void
-print_ascii_block(char *subbuf, int length)
-{
-	int i;
-
-	for (i = 0; i < length; i++) {
-		char char_print = isprint(subbuf[i]) ? subbuf[i] : '.';
-		if (i != 0 && i % DUMP_GROUPING == 0) {
-			(void) printf(" ");
-		}
-		(void) printf("%c", char_print);
-	}
-	(void) printf("\n");
-}
-
-/*
- * print_block - Dump the contents of a modified block to STDOUT
- *
- * Assume that buf has capacity evenly divisible by BYTES_PER_LINE
- */
-static void
-print_block(char *buf, int length)
-{
-	int i;
-	/*
-	 * Start printing ASCII characters at a constant offset, after
-	 * the hex prints. Leave 3 characters per byte on a line (2 digit
-	 * hex number plus 1 space) plus spaces between characters and
-	 * groupings.
-	 */
-	int ascii_start = BYTES_PER_LINE * 3 +
-	    BYTES_PER_LINE / DUMP_GROUPING + 2;
-
-	for (i = 0; i < length; i += BYTES_PER_LINE) {
-		int j;
-		int this_line_length = MIN(BYTES_PER_LINE, length - i);
-		int print_offset = 0;
-
-		for (j = 0; j < this_line_length; j++) {
-			int buf_offset = i + j;
-
-			/*
-			 * Separate every DUMP_GROUPING bytes by a space.
-			 */
-			if (buf_offset % DUMP_GROUPING == 0) {
-				print_offset += printf(" ");
-			}
-
-			/*
-			 * Print the two-digit hex value for this byte.
-			 */
-			unsigned char hex_print = buf[buf_offset];
-			print_offset += printf("%02x ", hex_print);
-		}
-
-		(void) printf("%*s", ascii_start - print_offset, " ");
-
-		print_ascii_block(buf + i, this_line_length);
-	}
-}
-
-/*
- * Print an array of bytes to stdout as hexadecimal characters. str must
- * have buf_len * 2 + 1 bytes of space.
- */
-static void
-sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
-{
-	int i, n;
-
-	for (i = 0; i < buf_len; i++) {
-		n = sprintf(str, "%02x", buf[i] & 0xff);
-		str += n;
-	}
-
-	str[0] = '\0';
-}
-
-int
-main(int argc, char *argv[])
-{
-	char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
-	uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
-	uint64_t total_payload_size = 0;
-	uint64_t total_overhead_size = 0;
-	uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 };
-	char salt[ZIO_DATA_SALT_LEN * 2 + 1];
-	char iv[ZIO_DATA_IV_LEN * 2 + 1];
-	char mac[ZIO_DATA_MAC_LEN * 2 + 1];
-	uint64_t total_records = 0;
-	uint64_t payload_size;
-	dmu_replay_record_t thedrr;
-	dmu_replay_record_t *drr = &thedrr;
-	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
-	struct drr_end *drre = &thedrr.drr_u.drr_end;
-	struct drr_object *drro = &thedrr.drr_u.drr_object;
-	struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects;
-	struct drr_write *drrw = &thedrr.drr_u.drr_write;
-	struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
-	struct drr_free *drrf = &thedrr.drr_u.drr_free;
-	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
-	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
-	struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
-	struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
-	int c;
-	boolean_t verbose = B_FALSE;
-	boolean_t very_verbose = B_FALSE;
-	boolean_t first = B_TRUE;
-	/*
-	 * dump flag controls whether the contents of any modified data blocks
-	 * are printed to the console during processing of the stream. Warning:
-	 * for large streams, this can obviously lead to massive prints.
-	 */
-	boolean_t dump = B_FALSE;
-	int err;
-	zio_cksum_t zc = { { 0 } };
-	zio_cksum_t pcksum = { { 0 } };
-
-	while ((c = getopt(argc, argv, ":vCd")) != -1) {
-		switch (c) {
-		case 'C':
-			do_cksum = B_FALSE;
-			break;
-		case 'v':
-			if (verbose)
-				very_verbose = B_TRUE;
-			verbose = B_TRUE;
-			break;
-		case 'd':
-			dump = B_TRUE;
-			verbose = B_TRUE;
-			very_verbose = B_TRUE;
-			break;
-		case ':':
-			(void) fprintf(stderr,
-			    "missing argument for '%c' option\n", optopt);
-			usage();
-			break;
-		case '?':
-			(void) fprintf(stderr, "invalid option '%c'\n",
-			    optopt);
-			usage();
-			break;
-		}
-	}
-
-	if (isatty(STDIN_FILENO)) {
-		(void) fprintf(stderr,
-		    "Error: Backup stream can not be read "
-		    "from a terminal.\n"
-		    "You must redirect standard input.\n");
-		exit(1);
-	}
-
-	fletcher_4_init();
-	send_stream = stdin;
-	while (read_hdr(drr, &zc)) {
-
-		/*
-		 * If this is the first DMU record being processed, check for
-		 * the magic bytes and figure out the endian-ness based on them.
-		 */
-		if (first) {
-			if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
-				do_byteswap = B_TRUE;
-				if (do_cksum) {
-					ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
-					/*
-					 * recalculate header checksum now
-					 * that we know it needs to be
-					 * byteswapped.
-					 */
-					fletcher_4_incremental_byteswap(drr,
-					    sizeof (dmu_replay_record_t), &zc);
-				}
-			} else if (drrb->drr_magic != DMU_BACKUP_MAGIC) {
-				(void) fprintf(stderr, "Invalid stream "
-				    "(bad magic number)\n");
-				exit(1);
-			}
-			first = B_FALSE;
-		}
-		if (do_byteswap) {
-			drr->drr_type = BSWAP_32(drr->drr_type);
-			drr->drr_payloadlen =
-			    BSWAP_32(drr->drr_payloadlen);
-		}
-
-		/*
-		 * At this point, the leading fields of the replay record
-		 * (drr_type and drr_payloadlen) have been byte-swapped if
-		 * necessary, but the rest of the data structure (the
-		 * union of type-specific structures) is still in its
-		 * original state.
-		 */
-		if (drr->drr_type >= DRR_NUMTYPES) {
-			(void) printf("INVALID record found: type 0x%x\n",
-			    drr->drr_type);
-			(void) printf("Aborting.\n");
-			exit(1);
-		}
-
-		drr_record_count[drr->drr_type]++;
-		total_overhead_size += sizeof (*drr);
-		total_records++;
-		payload_size = 0;
-
-		switch (drr->drr_type) {
-		case DRR_BEGIN:
-			if (do_byteswap) {
-				drrb->drr_magic = BSWAP_64(drrb->drr_magic);
-				drrb->drr_versioninfo =
-				    BSWAP_64(drrb->drr_versioninfo);
-				drrb->drr_creation_time =
-				    BSWAP_64(drrb->drr_creation_time);
-				drrb->drr_type = BSWAP_32(drrb->drr_type);
-				drrb->drr_flags = BSWAP_32(drrb->drr_flags);
-				drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
-				drrb->drr_fromguid =
-				    BSWAP_64(drrb->drr_fromguid);
-			}
-
-			(void) printf("BEGIN record\n");
-			(void) printf("\thdrtype = %lld\n",
-			    DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo));
-			(void) printf("\tfeatures = %llx\n",
-			    DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo));
-			(void) printf("\tmagic = %llx\n",
-			    (u_longlong_t)drrb->drr_magic);
-			(void) printf("\tcreation_time = %llx\n",
-			    (u_longlong_t)drrb->drr_creation_time);
-			(void) printf("\ttype = %u\n", drrb->drr_type);
-			(void) printf("\tflags = 0x%x\n", drrb->drr_flags);
-			(void) printf("\ttoguid = %llx\n",
-			    (u_longlong_t)drrb->drr_toguid);
-			(void) printf("\tfromguid = %llx\n",
-			    (u_longlong_t)drrb->drr_fromguid);
-			(void) printf("\ttoname = %s\n", drrb->drr_toname);
-			if (verbose)
-				(void) printf("\n");
-
-			if (drr->drr_payloadlen != 0) {
-				nvlist_t *nv;
-				int sz = drr->drr_payloadlen;
-
-				if (sz > SPA_MAXBLOCKSIZE) {
-					free(buf);
-					buf = safe_malloc(sz);
-				}
-				(void) ssread(buf, sz, &zc);
-				if (ferror(send_stream))
-					perror("fread");
-				err = nvlist_unpack(buf, sz, &nv, 0);
-				if (err) {
-					perror(strerror(err));
-				} else {
-					nvlist_print(stdout, nv);
-					nvlist_free(nv);
-				}
-				payload_size = sz;
-			}
-			break;
-
-		case DRR_END:
-			if (do_byteswap) {
-				drre->drr_checksum.zc_word[0] =
-				    BSWAP_64(drre->drr_checksum.zc_word[0]);
-				drre->drr_checksum.zc_word[1] =
-				    BSWAP_64(drre->drr_checksum.zc_word[1]);
-				drre->drr_checksum.zc_word[2] =
-				    BSWAP_64(drre->drr_checksum.zc_word[2]);
-				drre->drr_checksum.zc_word[3] =
-				    BSWAP_64(drre->drr_checksum.zc_word[3]);
-			}
-			/*
-			 * We compare against the *previous* checksum
-			 * value, because the stored checksum is of
-			 * everything before the DRR_END record.
-			 */
-			if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum,
-			    pcksum)) {
-				(void) printf("Expected checksum differs from "
-				    "checksum in stream.\n");
-				(void) printf("Expected checksum = "
-				    "%llx/%llx/%llx/%llx\n",
-				    (long long unsigned int)pcksum.zc_word[0],
-				    (long long unsigned int)pcksum.zc_word[1],
-				    (long long unsigned int)pcksum.zc_word[2],
-				    (long long unsigned int)pcksum.zc_word[3]);
-			}
-			(void) printf("END checksum = %llx/%llx/%llx/%llx\n",
-			    (long long unsigned int)
-			    drre->drr_checksum.zc_word[0],
-			    (long long unsigned int)
-			    drre->drr_checksum.zc_word[1],
-			    (long long unsigned int)
-			    drre->drr_checksum.zc_word[2],
-			    (long long unsigned int)
-			    drre->drr_checksum.zc_word[3]);
-
-			ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
-			break;
-
-		case DRR_OBJECT:
-			if (do_byteswap) {
-				drro->drr_object = BSWAP_64(drro->drr_object);
-				drro->drr_type = BSWAP_32(drro->drr_type);
-				drro->drr_bonustype =
-				    BSWAP_32(drro->drr_bonustype);
-				drro->drr_blksz = BSWAP_32(drro->drr_blksz);
-				drro->drr_bonuslen =
-				    BSWAP_32(drro->drr_bonuslen);
-				drro->drr_raw_bonuslen =
-				    BSWAP_32(drro->drr_raw_bonuslen);
-				drro->drr_toguid = BSWAP_64(drro->drr_toguid);
-				drro->drr_maxblkid =
-				    BSWAP_64(drro->drr_maxblkid);
-			}
-
-			payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
-
-			if (verbose) {
-				(void) printf("OBJECT object = %llu type = %u "
-				    "bonustype = %u blksz = %u bonuslen = %u "
-				    "dn_slots = %u raw_bonuslen = %u "
-				    "flags = %u maxblkid = %llu "
-				    "indblkshift = %u nlevels = %u "
-				    "nblkptr = %u\n",
-				    (u_longlong_t)drro->drr_object,
-				    drro->drr_type,
-				    drro->drr_bonustype,
-				    drro->drr_blksz,
-				    drro->drr_bonuslen,
-				    drro->drr_dn_slots,
-				    drro->drr_raw_bonuslen,
-				    drro->drr_flags,
-				    (u_longlong_t)drro->drr_maxblkid,
-				    drro->drr_indblkshift,
-				    drro->drr_nlevels,
-				    drro->drr_nblkptr);
-			}
-			if (drro->drr_bonuslen > 0) {
-				(void) ssread(buf, payload_size, &zc);
-				if (dump)
-					print_block(buf, payload_size);
-			}
-			break;
-
-		case DRR_FREEOBJECTS:
-			if (do_byteswap) {
-				drrfo->drr_firstobj =
-				    BSWAP_64(drrfo->drr_firstobj);
-				drrfo->drr_numobjs =
-				    BSWAP_64(drrfo->drr_numobjs);
-				drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid);
-			}
-			if (verbose) {
-				(void) printf("FREEOBJECTS firstobj = %llu "
-				    "numobjs = %llu\n",
-				    (u_longlong_t)drrfo->drr_firstobj,
-				    (u_longlong_t)drrfo->drr_numobjs);
-			}
-			break;
-
-		case DRR_WRITE:
-			if (do_byteswap) {
-				drrw->drr_object = BSWAP_64(drrw->drr_object);
-				drrw->drr_type = BSWAP_32(drrw->drr_type);
-				drrw->drr_offset = BSWAP_64(drrw->drr_offset);
-				drrw->drr_logical_size =
-				    BSWAP_64(drrw->drr_logical_size);
-				drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
-				drrw->drr_key.ddk_prop =
-				    BSWAP_64(drrw->drr_key.ddk_prop);
-				drrw->drr_compressed_size =
-				    BSWAP_64(drrw->drr_compressed_size);
-			}
-
-			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
-
-			/*
-			 * If this is verbose and/or dump output,
-			 * print info on the modified block
-			 */
-			if (verbose) {
-				sprintf_bytes(salt, drrw->drr_salt,
-				    ZIO_DATA_SALT_LEN);
-				sprintf_bytes(iv, drrw->drr_iv,
-				    ZIO_DATA_IV_LEN);
-				sprintf_bytes(mac, drrw->drr_mac,
-				    ZIO_DATA_MAC_LEN);
-
-				(void) printf("WRITE object = %llu type = %u "
-				    "checksum type = %u compression type = %u "
-				    "flags = %u offset = %llu "
-				    "logical_size = %llu "
-				    "compressed_size = %llu "
-				    "payload_size = %llu props = %llx "
-				    "salt = %s iv = %s mac = %s\n",
-				    (u_longlong_t)drrw->drr_object,
-				    drrw->drr_type,
-				    drrw->drr_checksumtype,
-				    drrw->drr_compressiontype,
-				    drrw->drr_flags,
-				    (u_longlong_t)drrw->drr_offset,
-				    (u_longlong_t)drrw->drr_logical_size,
-				    (u_longlong_t)drrw->drr_compressed_size,
-				    (u_longlong_t)payload_size,
-				    (u_longlong_t)drrw->drr_key.ddk_prop,
-				    salt,
-				    iv,
-				    mac);
-			}
-
-			/*
-			 * Read the contents of the block in from STDIN to buf
-			 */
-			(void) ssread(buf, payload_size, &zc);
-			/*
-			 * If in dump mode
-			 */
-			if (dump) {
-				print_block(buf, payload_size);
-			}
-			break;
-
-		case DRR_WRITE_BYREF:
-			if (do_byteswap) {
-				drrwbr->drr_object =
-				    BSWAP_64(drrwbr->drr_object);
-				drrwbr->drr_offset =
-				    BSWAP_64(drrwbr->drr_offset);
-				drrwbr->drr_length =
-				    BSWAP_64(drrwbr->drr_length);
-				drrwbr->drr_toguid =
-				    BSWAP_64(drrwbr->drr_toguid);
-				drrwbr->drr_refguid =
-				    BSWAP_64(drrwbr->drr_refguid);
-				drrwbr->drr_refobject =
-				    BSWAP_64(drrwbr->drr_refobject);
-				drrwbr->drr_refoffset =
-				    BSWAP_64(drrwbr->drr_refoffset);
-				drrwbr->drr_key.ddk_prop =
-				    BSWAP_64(drrwbr->drr_key.ddk_prop);
-			}
-			if (verbose) {
-				(void) printf("WRITE_BYREF object = %llu "
-				    "checksum type = %u props = %llx "
-				    "offset = %llu length = %llu "
-				    "toguid = %llx refguid = %llx "
-				    "refobject = %llu refoffset = %llu\n",
-				    (u_longlong_t)drrwbr->drr_object,
-				    drrwbr->drr_checksumtype,
-				    (u_longlong_t)drrwbr->drr_key.ddk_prop,
-				    (u_longlong_t)drrwbr->drr_offset,
-				    (u_longlong_t)drrwbr->drr_length,
-				    (u_longlong_t)drrwbr->drr_toguid,
-				    (u_longlong_t)drrwbr->drr_refguid,
-				    (u_longlong_t)drrwbr->drr_refobject,
-				    (u_longlong_t)drrwbr->drr_refoffset);
-			}
-			break;
-
-		case DRR_FREE:
-			if (do_byteswap) {
-				drrf->drr_object = BSWAP_64(drrf->drr_object);
-				drrf->drr_offset = BSWAP_64(drrf->drr_offset);
-				drrf->drr_length = BSWAP_64(drrf->drr_length);
-			}
-			if (verbose) {
-				(void) printf("FREE object = %llu "
-				    "offset = %llu length = %lld\n",
-				    (u_longlong_t)drrf->drr_object,
-				    (u_longlong_t)drrf->drr_offset,
-				    (longlong_t)drrf->drr_length);
-			}
-			break;
-		case DRR_SPILL:
-			if (do_byteswap) {
-				drrs->drr_object = BSWAP_64(drrs->drr_object);
-				drrs->drr_length = BSWAP_64(drrs->drr_length);
-				drrs->drr_compressed_size =
-				    BSWAP_64(drrs->drr_compressed_size);
-				drrs->drr_type = BSWAP_32(drrs->drr_type);
-			}
-
-			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
-
-			if (verbose) {
-				sprintf_bytes(salt, drrs->drr_salt,
-				    ZIO_DATA_SALT_LEN);
-				sprintf_bytes(iv, drrs->drr_iv,
-				    ZIO_DATA_IV_LEN);
-				sprintf_bytes(mac, drrs->drr_mac,
-				    ZIO_DATA_MAC_LEN);
-
-				(void) printf("SPILL block for object = %llu "
-				    "length = %llu flags = %u "
-				    "compression type = %u "
-				    "compressed_size = %llu "
-				    "payload_size = %llu "
-				    "salt = %s iv = %s mac = %s\n",
-				    (u_longlong_t)drrs->drr_object,
-				    (u_longlong_t)drrs->drr_length,
-				    drrs->drr_flags,
-				    drrs->drr_compressiontype,
-				    (u_longlong_t)drrs->drr_compressed_size,
-				    (u_longlong_t)payload_size,
-				    salt,
-				    iv,
-				    mac);
-			}
-			(void) ssread(buf, payload_size, &zc);
-			if (dump) {
-				print_block(buf, payload_size);
-			}
-			break;
-		case DRR_WRITE_EMBEDDED:
-			if (do_byteswap) {
-				drrwe->drr_object =
-				    BSWAP_64(drrwe->drr_object);
-				drrwe->drr_offset =
-				    BSWAP_64(drrwe->drr_offset);
-				drrwe->drr_length =
-				    BSWAP_64(drrwe->drr_length);
-				drrwe->drr_toguid =
-				    BSWAP_64(drrwe->drr_toguid);
-				drrwe->drr_lsize =
-				    BSWAP_32(drrwe->drr_lsize);
-				drrwe->drr_psize =
-				    BSWAP_32(drrwe->drr_psize);
-			}
-			if (verbose) {
-				(void) printf("WRITE_EMBEDDED object = %llu "
-				    "offset = %llu length = %llu "
-				    "toguid = %llx comp = %u etype = %u "
-				    "lsize = %u psize = %u\n",
-				    (u_longlong_t)drrwe->drr_object,
-				    (u_longlong_t)drrwe->drr_offset,
-				    (u_longlong_t)drrwe->drr_length,
-				    (u_longlong_t)drrwe->drr_toguid,
-				    drrwe->drr_compression,
-				    drrwe->drr_etype,
-				    drrwe->drr_lsize,
-				    drrwe->drr_psize);
-			}
-			(void) ssread(buf,
-			    P2ROUNDUP(drrwe->drr_psize, 8), &zc);
-			if (dump) {
-				print_block(buf,
-				    P2ROUNDUP(drrwe->drr_psize, 8));
-			}
-			payload_size = P2ROUNDUP(drrwe->drr_psize, 8);
-			break;
-		case DRR_OBJECT_RANGE:
-			if (do_byteswap) {
-				drror->drr_firstobj =
-				    BSWAP_64(drror->drr_firstobj);
-				drror->drr_numslots =
-				    BSWAP_64(drror->drr_numslots);
-				drror->drr_toguid = BSWAP_64(drror->drr_toguid);
-			}
-			if (verbose) {
-				sprintf_bytes(salt, drror->drr_salt,
-				    ZIO_DATA_SALT_LEN);
-				sprintf_bytes(iv, drror->drr_iv,
-				    ZIO_DATA_IV_LEN);
-				sprintf_bytes(mac, drror->drr_mac,
-				    ZIO_DATA_MAC_LEN);
-
-				(void) printf("OBJECT_RANGE firstobj = %llu "
-				    "numslots = %llu flags = %u "
-				    "salt = %s iv = %s mac = %s\n",
-				    (u_longlong_t)drror->drr_firstobj,
-				    (u_longlong_t)drror->drr_numslots,
-				    drror->drr_flags,
-				    salt,
-				    iv,
-				    mac);
-			}
-			break;
-		case DRR_NUMTYPES:
-			/* should never be reached */
-			exit(1);
-		}
-		if (drr->drr_type != DRR_BEGIN && very_verbose) {
-			(void) printf("    checksum = %llx/%llx/%llx/%llx\n",
-			    (longlong_t)drrc->drr_checksum.zc_word[0],
-			    (longlong_t)drrc->drr_checksum.zc_word[1],
-			    (longlong_t)drrc->drr_checksum.zc_word[2],
-			    (longlong_t)drrc->drr_checksum.zc_word[3]);
-		}
-		pcksum = zc;
-		drr_byte_count[drr->drr_type] += payload_size;
-		total_payload_size += payload_size;
-	}
-	free(buf);
-	fletcher_4_fini();
-
-	/* Print final summary */
-
-	(void) printf("SUMMARY:\n");
-	(void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_BEGIN],
-	    (u_longlong_t)drr_byte_count[DRR_BEGIN]);
-	(void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_END],
-	    (u_longlong_t)drr_byte_count[DRR_END]);
-	(void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_OBJECT],
-	    (u_longlong_t)drr_byte_count[DRR_OBJECT]);
-	(void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_FREEOBJECTS],
-	    (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]);
-	(void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_WRITE],
-	    (u_longlong_t)drr_byte_count[DRR_WRITE]);
-	(void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_WRITE_BYREF],
-	    (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]);
-	(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu "
-	    "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED],
-	    (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]);
-	(void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_FREE],
-	    (u_longlong_t)drr_byte_count[DRR_FREE]);
-	(void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n",
-	    (u_longlong_t)drr_record_count[DRR_SPILL],
-	    (u_longlong_t)drr_byte_count[DRR_SPILL]);
-	(void) printf("\tTotal records = %lld\n",
-	    (u_longlong_t)total_records);
-	(void) printf("\tTotal payload size = %lld (0x%llx)\n",
-	    (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size);
-	(void) printf("\tTotal header overhead = %lld (0x%llx)\n",
-	    (u_longlong_t)total_overhead_size,
-	    (u_longlong_t)total_overhead_size);
-	(void) printf("\tTotal stream length = %lld (0x%llx)\n",
-	    (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len);
-	return (0);
-}

diff --git a/zfs/cmd/ztest/Makefile.am b/zfs/cmd/ztest/Makefile.am
index 55af416..d5e335e 100644
--- a/zfs/cmd/ztest/Makefile.am
+++ b/zfs/cmd/ztest/Makefile.am

@@ -7,11 +7,7 @@
 AM_CFLAGS += $(FRAME_LARGER_THAN)
 
 # Unconditionally enable ASSERTs
-AM_CPPFLAGS += -DDEBUG -UNDEBUG
-
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
 
 sbin_PROGRAMS = ztest
 
@@ -19,8 +15,11 @@
 	ztest.c
 
 ztest_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzpool/libzpool.la
+	$(abs_top_builddir)/lib/libzpool/libzpool.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
 
 ztest_LDADD += -lm
 ztest_LDFLAGS = -pthread
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/ztest/ztest.c b/zfs/cmd/ztest/ztest.c
index 77cab28..b7dc3fc 100644
--- a/zfs/cmd/ztest/ztest.c
+++ b/zfs/cmd/ztest/ztest.c

@@ -104,6 +104,7 @@
 #include <sys/zio.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
+#include <sys/vdev_draid.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/vdev_initialize.h>
@@ -116,14 +117,14 @@
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/zfeature.h>
 #include <sys/dsl_userhold.h>
 #include <sys/abd.h>
 #include <stdio.h>
-#include <stdio_ext.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <getopt.h>
 #include <signal.h>
 #include <umem.h>
 #include <ctype.h>
@@ -133,7 +134,7 @@
 #include <libnvpair.h>
 #include <libzutil.h>
 #include <sys/crypto/icp.h>
-#ifdef __GLIBC__
+#if (__GLIBC__ && !__UCLIBC__)
 #include <execinfo.h> /* for backtrace() */
 #endif
 
@@ -158,6 +159,9 @@
 	ZTEST_VDEV_CLASS_RND
 };
 
+#define	ZO_GVARS_MAX_ARGLEN	((size_t)64)
+#define	ZO_GVARS_MAX_COUNT	((size_t)10)
+
 typedef struct ztest_shared_opts {
 	char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
 	char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
@@ -168,8 +172,11 @@
 	size_t zo_vdev_size;
 	int zo_ashift;
 	int zo_mirrors;
-	int zo_raidz;
-	int zo_raidz_parity;
+	int zo_raid_children;
+	int zo_raid_parity;
+	char zo_raid_type[8];
+	int zo_draid_data;
+	int zo_draid_spares;
 	int zo_datasets;
 	int zo_threads;
 	uint64_t zo_passtime;
@@ -182,30 +189,64 @@
 	int zo_mmp_test;
 	int zo_special_vdevs;
 	int zo_dump_dbgmsg;
+	int zo_gvars_count;
+	char zo_gvars[ZO_GVARS_MAX_COUNT][ZO_GVARS_MAX_ARGLEN];
 } ztest_shared_opts_t;
 
+/* Default values for command line options. */
+#define	DEFAULT_POOL "ztest"
+#define	DEFAULT_VDEV_DIR "/tmp"
+#define	DEFAULT_VDEV_COUNT 5
+#define	DEFAULT_VDEV_SIZE (SPA_MINDEVSIZE * 4)	/* 256m default size */
+#define	DEFAULT_VDEV_SIZE_STR "256M"
+#define	DEFAULT_ASHIFT SPA_MINBLOCKSHIFT
+#define	DEFAULT_MIRRORS 2
+#define	DEFAULT_RAID_CHILDREN 4
+#define	DEFAULT_RAID_PARITY 1
+#define	DEFAULT_DRAID_DATA 4
+#define	DEFAULT_DRAID_SPARES 1
+#define	DEFAULT_DATASETS_COUNT 7
+#define	DEFAULT_THREADS 23
+#define	DEFAULT_RUN_TIME 300 /* 300 seconds */
+#define	DEFAULT_RUN_TIME_STR "300 sec"
+#define	DEFAULT_PASS_TIME 60 /* 60 seconds */
+#define	DEFAULT_PASS_TIME_STR "60 sec"
+#define	DEFAULT_KILL_RATE 70 /* 70% kill rate */
+#define	DEFAULT_KILLRATE_STR "70%"
+#define	DEFAULT_INITS 1
+#define	DEFAULT_MAX_LOOPS 50 /* 5 minutes */
+#define	DEFAULT_FORCE_GANGING (64 << 10)
+#define	DEFAULT_FORCE_GANGING_STR "64K"
+
+/* Simplifying assumption: -1 is not a valid default. */
+#define	NO_DEFAULT -1
+
 static const ztest_shared_opts_t ztest_opts_defaults = {
-	.zo_pool = "ztest",
-	.zo_dir = "/tmp",
+	.zo_pool = DEFAULT_POOL,
+	.zo_dir = DEFAULT_VDEV_DIR,
 	.zo_alt_ztest = { '\0' },
 	.zo_alt_libpath = { '\0' },
-	.zo_vdevs = 5,
-	.zo_ashift = SPA_MINBLOCKSHIFT,
-	.zo_mirrors = 2,
-	.zo_raidz = 4,
-	.zo_raidz_parity = 1,
-	.zo_vdev_size = SPA_MINDEVSIZE * 4,	/* 256m default size */
-	.zo_datasets = 7,
-	.zo_threads = 23,
-	.zo_passtime = 60,		/* 60 seconds */
-	.zo_killrate = 70,		/* 70% kill rate */
+	.zo_vdevs = DEFAULT_VDEV_COUNT,
+	.zo_ashift = DEFAULT_ASHIFT,
+	.zo_mirrors = DEFAULT_MIRRORS,
+	.zo_raid_children = DEFAULT_RAID_CHILDREN,
+	.zo_raid_parity = DEFAULT_RAID_PARITY,
+	.zo_raid_type = VDEV_TYPE_RAIDZ,
+	.zo_vdev_size = DEFAULT_VDEV_SIZE,
+	.zo_draid_data = DEFAULT_DRAID_DATA,	/* data drives */
+	.zo_draid_spares = DEFAULT_DRAID_SPARES, /* distributed spares */
+	.zo_datasets = DEFAULT_DATASETS_COUNT,
+	.zo_threads = DEFAULT_THREADS,
+	.zo_passtime = DEFAULT_PASS_TIME,
+	.zo_killrate = DEFAULT_KILL_RATE,
 	.zo_verbose = 0,
 	.zo_mmp_test = 0,
-	.zo_init = 1,
-	.zo_time = 300,			/* 5 minutes */
-	.zo_maxloops = 50,		/* max loops during spa_freeze() */
-	.zo_metaslab_force_ganging = 64 << 10,
+	.zo_init = DEFAULT_INITS,
+	.zo_time = DEFAULT_RUN_TIME,
+	.zo_maxloops = DEFAULT_MAX_LOOPS, /* max loops during spa_freeze() */
+	.zo_metaslab_force_ganging = DEFAULT_FORCE_GANGING,
 	.zo_special_vdevs = ZTEST_VDEV_CLASS_RND,
+	.zo_gvars_count = 0,
 };
 
 extern uint64_t metaslab_force_ganging;
@@ -233,7 +274,7 @@
 
 #define	BT_MAGIC	0x123456789abcdefULL
 #define	MAXFAULTS(zs) \
-	(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1)
+	(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raid_parity + 1) - 1)
 
 enum ztest_io_type {
 	ZTEST_IO_WRITE_TAG,
@@ -360,7 +401,6 @@
 ztest_func_t ztest_spa_prop_get_set;
 ztest_func_t ztest_spa_create_destroy;
 ztest_func_t ztest_fault_inject;
-ztest_func_t ztest_ddt_repair;
 ztest_func_t ztest_dmu_snapshot_hold;
 ztest_func_t ztest_mmp_enable_disable;
 ztest_func_t ztest_scrub;
@@ -415,7 +455,6 @@
 	ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
-	ZTI_INIT(ztest_ddt_repair, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
@@ -559,7 +598,7 @@
 static void sig_handler(int signo)
 {
 	struct sigaction action;
-#ifdef __GLIBC__ /* backtrace() is a GNU extension */
+#if (__GLIBC__ && !__UCLIBC__) /* backtrace() is a GNU extension */
 	int nptrs;
 	void *buffer[BACKTRACE_SZ];
 
@@ -651,7 +690,12 @@
 	} else if (end[0] == '.') {
 		double fval = strtod(buf, &end);
 		fval *= pow(2, str2shift(end));
-		if (fval > UINT64_MAX) {
+		/*
+		 * UINT64_MAX is not exactly representable as a double.
+		 * The closest representation is UINT64_MAX + 1, so we
+		 * use a >= comparison instead of > for the bounds check.
+		 */
+		if (fval >= (double)UINT64_MAX) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
@@ -669,66 +713,172 @@
 	return (val);
 }
 
+typedef struct ztest_option {
+	const char	short_opt;
+	const char	*long_opt;
+	const char	*long_opt_param;
+	const char	*comment;
+	unsigned int	default_int;
+	char		*default_str;
+} ztest_option_t;
+
+/*
+ * The following option_table is used for generating the usage info as well as
+ * the long and short option information for calling getopt_long().
+ */
+static ztest_option_t option_table[] = {
+	{ 'v',	"vdevs", "INTEGER", "Number of vdevs", DEFAULT_VDEV_COUNT,
+	    NULL},
+	{ 's',	"vdev-size", "INTEGER", "Size of each vdev",
+	    NO_DEFAULT, DEFAULT_VDEV_SIZE_STR},
+	{ 'a',	"alignment-shift", "INTEGER",
+	    "Alignment shift; use 0 for random", DEFAULT_ASHIFT, NULL},
+	{ 'm',	"mirror-copies", "INTEGER", "Number of mirror copies",
+	    DEFAULT_MIRRORS, NULL},
+	{ 'r',	"raid-disks", "INTEGER", "Number of raidz/draid disks",
+	    DEFAULT_RAID_CHILDREN, NULL},
+	{ 'R',	"raid-parity", "INTEGER", "Raid parity",
+	    DEFAULT_RAID_PARITY, NULL},
+	{ 'K',	"raid-kind", "raidz|draid|random", "Raid kind",
+	    NO_DEFAULT, "random"},
+	{ 'D',	"draid-data", "INTEGER", "Number of draid data drives",
+	    DEFAULT_DRAID_DATA, NULL},
+	{ 'S',	"draid-spares", "INTEGER", "Number of draid spares",
+	    DEFAULT_DRAID_SPARES, NULL},
+	{ 'd',	"datasets", "INTEGER", "Number of datasets",
+	    DEFAULT_DATASETS_COUNT, NULL},
+	{ 't',	"threads", "INTEGER", "Number of ztest threads",
+	    DEFAULT_THREADS, NULL},
+	{ 'g',	"gang-block-threshold", "INTEGER",
+	    "Metaslab gang block threshold",
+	    NO_DEFAULT, DEFAULT_FORCE_GANGING_STR},
+	{ 'i',	"init-count", "INTEGER", "Number of times to initialize pool",
+	    DEFAULT_INITS, NULL},
+	{ 'k',	"kill-percentage", "INTEGER", "Kill percentage",
+	    NO_DEFAULT, DEFAULT_KILLRATE_STR},
+	{ 'p',	"pool-name", "STRING", "Pool name",
+	    NO_DEFAULT, DEFAULT_POOL},
+	{ 'f',	"vdev-file-directory", "PATH", "File directory for vdev files",
+	    NO_DEFAULT, DEFAULT_VDEV_DIR},
+	{ 'M',	"multi-host", NULL,
+	    "Multi-host; simulate pool imported on remote host",
+	    NO_DEFAULT, NULL},
+	{ 'E',	"use-existing-pool", NULL,
+	    "Use existing pool instead of creating new one", NO_DEFAULT, NULL},
+	{ 'T',	"run-time", "INTEGER", "Total run time",
+	    NO_DEFAULT, DEFAULT_RUN_TIME_STR},
+	{ 'P',	"pass-time", "INTEGER", "Time per pass",
+	    NO_DEFAULT, DEFAULT_PASS_TIME_STR},
+	{ 'F',	"freeze-loops", "INTEGER", "Max loops in spa_freeze()",
+	    DEFAULT_MAX_LOOPS, NULL},
+	{ 'B',	"alt-ztest", "PATH", "Alternate ztest path",
+	    NO_DEFAULT, NULL},
+	{ 'C',	"vdev-class-state", "on|off|random", "vdev class state",
+	    NO_DEFAULT, "random"},
+	{ 'o',	"option", "\"OPTION=INTEGER\"",
+	    "Set global variable to an unsigned 32-bit integer value",
+	    NO_DEFAULT, NULL},
+	{ 'G',	"dump-debug-msg", NULL,
+	    "Dump zfs_dbgmsg buffer before exiting due to an error",
+	    NO_DEFAULT, NULL},
+	{ 'V',	"verbose", NULL,
+	    "Verbose (use multiple times for ever more verbosity)",
+	    NO_DEFAULT, NULL},
+	{ 'h',	"help",	NULL, "Show this help",
+	    NO_DEFAULT, NULL},
+	{0, 0, 0, 0, 0, 0}
+};
+
+static struct option *long_opts = NULL;
+static char *short_opts = NULL;
+
+static void
+init_options(void)
+{
+	ASSERT3P(long_opts, ==, NULL);
+	ASSERT3P(short_opts, ==, NULL);
+
+	int count = sizeof (option_table) / sizeof (option_table[0]);
+	long_opts = umem_alloc(sizeof (struct option) * count, UMEM_NOFAIL);
+
+	short_opts = umem_alloc(sizeof (char) * 2 * count, UMEM_NOFAIL);
+	int short_opt_index = 0;
+
+	for (int i = 0; i < count; i++) {
+		long_opts[i].val = option_table[i].short_opt;
+		long_opts[i].name = option_table[i].long_opt;
+		long_opts[i].has_arg = option_table[i].long_opt_param != NULL
+		    ? required_argument : no_argument;
+		long_opts[i].flag = NULL;
+		short_opts[short_opt_index++] = option_table[i].short_opt;
+		if (option_table[i].long_opt_param != NULL) {
+			short_opts[short_opt_index++] = ':';
+		}
+	}
+}
+
+static void
+fini_options(void)
+{
+	int count = sizeof (option_table) / sizeof (option_table[0]);
+
+	umem_free(long_opts, sizeof (struct option) * count);
+	umem_free(short_opts, sizeof (char) * 2 * count);
+
+	long_opts = NULL;
+	short_opts = NULL;
+}
+
 static void
 usage(boolean_t requested)
 {
-	const ztest_shared_opts_t *zo = &ztest_opts_defaults;
-
-	char nice_vdev_size[NN_NUMBUF_SZ];
-	char nice_force_ganging[NN_NUMBUF_SZ];
+	char option[80];
 	FILE *fp = requested ? stdout : stderr;
 
-	nicenum(zo->zo_vdev_size, nice_vdev_size, sizeof (nice_vdev_size));
-	nicenum(zo->zo_metaslab_force_ganging, nice_force_ganging,
-	    sizeof (nice_force_ganging));
+	(void) fprintf(fp, "Usage: %s [OPTIONS...]\n", DEFAULT_POOL);
+	for (int i = 0; option_table[i].short_opt != 0; i++) {
+		if (option_table[i].long_opt_param != NULL) {
+			(void) sprintf(option, "  -%c --%s=%s",
+			    option_table[i].short_opt,
+			    option_table[i].long_opt,
+			    option_table[i].long_opt_param);
+		} else {
+			(void) sprintf(option, "  -%c --%s",
+			    option_table[i].short_opt,
+			    option_table[i].long_opt);
+		}
+		(void) fprintf(fp, "  %-40s%s", option,
+		    option_table[i].comment);
 
-	(void) fprintf(fp, "Usage: %s\n"
-	    "\t[-v vdevs (default: %llu)]\n"
-	    "\t[-s size_of_each_vdev (default: %s)]\n"
-	    "\t[-a alignment_shift (default: %d)] use 0 for random\n"
-	    "\t[-m mirror_copies (default: %d)]\n"
-	    "\t[-r raidz_disks (default: %d)]\n"
-	    "\t[-R raidz_parity (default: %d)]\n"
-	    "\t[-d datasets (default: %d)]\n"
-	    "\t[-t threads (default: %d)]\n"
-	    "\t[-g gang_block_threshold (default: %s)]\n"
-	    "\t[-i init_count (default: %d)] initialize pool i times\n"
-	    "\t[-k kill_percentage (default: %llu%%)]\n"
-	    "\t[-p pool_name (default: %s)]\n"
-	    "\t[-f dir (default: %s)] file directory for vdev files\n"
-	    "\t[-M] Multi-host simulate pool imported on remote host\n"
-	    "\t[-V] verbose (use multiple times for ever more blather)\n"
-	    "\t[-E] use existing pool instead of creating new one\n"
-	    "\t[-T time (default: %llu sec)] total run time\n"
-	    "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
-	    "\t[-P passtime (default: %llu sec)] time per pass\n"
-	    "\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
-	    "\t[-C vdev class state (default: random)] special=on|off|random\n"
-	    "\t[-o variable=value] ... set global variable to an unsigned\n"
-	    "\t    32-bit integer value\n"
-	    "\t[-G dump zfs_dbgmsg buffer before exiting due to an error\n"
-	    "\t[-h] (print help)\n"
-	    "",
-	    zo->zo_pool,
-	    (u_longlong_t)zo->zo_vdevs,			/* -v */
-	    nice_vdev_size,				/* -s */
-	    zo->zo_ashift,				/* -a */
-	    zo->zo_mirrors,				/* -m */
-	    zo->zo_raidz,				/* -r */
-	    zo->zo_raidz_parity,			/* -R */
-	    zo->zo_datasets,				/* -d */
-	    zo->zo_threads,				/* -t */
-	    nice_force_ganging,				/* -g */
-	    zo->zo_init,				/* -i */
-	    (u_longlong_t)zo->zo_killrate,		/* -k */
-	    zo->zo_pool,				/* -p */
-	    zo->zo_dir,					/* -f */
-	    (u_longlong_t)zo->zo_time,			/* -T */
-	    (u_longlong_t)zo->zo_maxloops,		/* -F */
-	    (u_longlong_t)zo->zo_passtime);
+		if (option_table[i].long_opt_param != NULL) {
+			if (option_table[i].default_str != NULL) {
+				(void) fprintf(fp, " (default: %s)",
+				    option_table[i].default_str);
+			} else if (option_table[i].default_int != NO_DEFAULT) {
+				(void) fprintf(fp, " (default: %u)",
+				    option_table[i].default_int);
+			}
+		}
+		(void) fprintf(fp, "\n");
+	}
 	exit(requested ? 0 : 1);
 }
 
+static uint64_t
+ztest_random(uint64_t range)
+{
+	uint64_t r;
+
+	ASSERT3S(ztest_fd_rand, >=, 0);
+
+	if (range == 0)
+		return (0);
+
+	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
+		fatal(1, "short read from /dev/urandom");
+
+	return (r % range);
+}
 
 static void
 ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo)
@@ -778,11 +928,14 @@
 	int opt;
 	uint64_t value;
 	char altdir[MAXNAMELEN] = { 0 };
+	char raid_kind[8] = { "random" };
 
 	bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
 
-	while ((opt = getopt(argc, argv,
-	    "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:G")) != EOF) {
+	init_options();
+
+	while ((opt = getopt_long(argc, argv, short_opts, long_opts,
+	    NULL)) != EOF) {
 		value = 0;
 		switch (opt) {
 		case 'v':
@@ -791,6 +944,8 @@
 		case 'm':
 		case 'r':
 		case 'R':
+		case 'D':
+		case 'S':
 		case 'd':
 		case 't':
 		case 'g':
@@ -815,10 +970,19 @@
 			zo->zo_mirrors = value;
 			break;
 		case 'r':
-			zo->zo_raidz = MAX(1, value);
+			zo->zo_raid_children = MAX(1, value);
 			break;
 		case 'R':
-			zo->zo_raidz_parity = MIN(MAX(value, 1), 3);
+			zo->zo_raid_parity = MIN(MAX(value, 1), 3);
+			break;
+		case 'K':
+			(void) strlcpy(raid_kind, optarg, sizeof (raid_kind));
+			break;
+		case 'D':
+			zo->zo_draid_data = MAX(1, value);
+			break;
+		case 'S':
+			zo->zo_draid_spares = MAX(1, value);
 			break;
 		case 'd':
 			zo->zo_datasets = MAX(1, value);
@@ -877,8 +1041,21 @@
 			ztest_parse_name_value(optarg, zo);
 			break;
 		case 'o':
-			if (set_global_var(optarg) != 0)
+			if (zo->zo_gvars_count >= ZO_GVARS_MAX_COUNT) {
+				(void) fprintf(stderr,
+				    "max global var count (%zu) exceeded\n",
+				    ZO_GVARS_MAX_COUNT);
 				usage(B_FALSE);
+			}
+			char *v = zo->zo_gvars[zo->zo_gvars_count];
+			if (strlcpy(v, optarg, ZO_GVARS_MAX_ARGLEN) >=
+			    ZO_GVARS_MAX_ARGLEN) {
+				(void) fprintf(stderr,
+				    "global var option '%s' is too long\n",
+				    optarg);
+				usage(B_FALSE);
+			}
+			zo->zo_gvars_count++;
 			break;
 		case 'G':
 			zo->zo_dump_dbgmsg = 1;
@@ -893,7 +1070,56 @@
 		}
 	}
 
-	zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1);
+	fini_options();
+
+	/* When raid choice is 'random' add a draid pool 50% of the time */
+	if (strcmp(raid_kind, "random") == 0) {
+		(void) strlcpy(raid_kind, (ztest_random(2) == 0) ?
+		    "draid" : "raidz", sizeof (raid_kind));
+
+		if (ztest_opts.zo_verbose >= 3)
+			(void) printf("choosing RAID type '%s'\n", raid_kind);
+	}
+
+	if (strcmp(raid_kind, "draid") == 0) {
+		uint64_t min_devsize;
+
+		/* With fewer disk use 256M, otherwise 128M is OK */
+		min_devsize = (ztest_opts.zo_raid_children < 16) ?
+		    (256ULL << 20) : (128ULL << 20);
+
+		/* No top-level mirrors with dRAID for now */
+		zo->zo_mirrors = 0;
+
+		/* Use more appropriate defaults for dRAID */
+		if (zo->zo_vdevs == ztest_opts_defaults.zo_vdevs)
+			zo->zo_vdevs = 1;
+		if (zo->zo_raid_children ==
+		    ztest_opts_defaults.zo_raid_children)
+			zo->zo_raid_children = 16;
+		if (zo->zo_ashift < 12)
+			zo->zo_ashift = 12;
+		if (zo->zo_vdev_size < min_devsize)
+			zo->zo_vdev_size = min_devsize;
+
+		if (zo->zo_draid_data + zo->zo_raid_parity >
+		    zo->zo_raid_children - zo->zo_draid_spares) {
+			(void) fprintf(stderr, "error: too few draid "
+			    "children (%d) for stripe width (%d)\n",
+			    zo->zo_raid_children,
+			    zo->zo_draid_data + zo->zo_raid_parity);
+			usage(B_FALSE);
+		}
+
+		(void) strlcpy(zo->zo_raid_type, VDEV_TYPE_DRAID,
+		    sizeof (zo->zo_raid_type));
+
+	} else /* using raidz */ {
+		ASSERT0(strcmp(raid_kind, "raidz"));
+
+		zo->zo_raid_parity = MIN(zo->zo_raid_parity,
+		    zo->zo_raid_children - 1);
+	}
 
 	zo->zo_vdevtime =
 	    (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
@@ -910,13 +1136,13 @@
 		cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
-		VERIFY(NULL != realpath(getexecname(), cmd));
+		VERIFY3P(NULL, !=, realpath(getexecname(), cmd));
 		if (0 != access(altdir, F_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate ztest path: %s",
 			    altdir);
 		}
-		VERIFY(NULL != realpath(altdir, realaltdir));
+		VERIFY3P(NULL, !=, realpath(altdir, realaltdir));
 
 		/*
 		 * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest".
@@ -958,28 +1184,12 @@
 	 * See comment above spa_write_cachefile().
 	 */
 	mutex_enter(&spa_namespace_lock);
-	spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
+	spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE, B_FALSE);
 	mutex_exit(&spa_namespace_lock);
 
 	(void) kill(getpid(), SIGKILL);
 }
 
-static uint64_t
-ztest_random(uint64_t range)
-{
-	uint64_t r;
-
-	ASSERT3S(ztest_fd_rand, >=, 0);
-
-	if (range == 0)
-		return (0);
-
-	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
-		fatal(1, "short read from /dev/urandom");
-
-	return (r % range);
-}
-
 /* ARGSUSED */
 static void
 ztest_record_enospc(const char *s)
@@ -995,12 +1205,27 @@
 	return (ztest_opts.zo_ashift);
 }
 
+static boolean_t
+ztest_is_draid_spare(const char *name)
+{
+	uint64_t spare_id = 0, parity = 0, vdev_id = 0;
+
+	if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu",
+	    (u_longlong_t *)&parity, (u_longlong_t *)&vdev_id,
+	    (u_longlong_t *)&spare_id) == 3) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
 static nvlist_t *
 make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 {
 	char *pathbuf;
 	uint64_t vdev;
 	nvlist_t *file;
+	boolean_t draid_spare = B_FALSE;
 
 	pathbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
@@ -1022,9 +1247,11 @@
 			    ztest_dev_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 		}
+	} else {
+		draid_spare = ztest_is_draid_spare(path);
 	}
 
-	if (size != 0) {
+	if (size != 0 && !draid_spare) {
 		int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 		if (fd == -1)
 			fatal(1, "can't open %s", path);
@@ -1033,20 +1260,21 @@
 		(void) close(fd);
 	}
 
-	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
-	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
-	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
+	file = fnvlist_alloc();
+	fnvlist_add_string(file, ZPOOL_CONFIG_TYPE,
+	    draid_spare ? VDEV_TYPE_DRAID_SPARE : VDEV_TYPE_FILE);
+	fnvlist_add_string(file, ZPOOL_CONFIG_PATH, path);
+	fnvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift);
 	umem_free(pathbuf, MAXPATHLEN);
 
 	return (file);
 }
 
 static nvlist_t *
-make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
+make_vdev_raid(char *path, char *aux, char *pool, size_t size,
     uint64_t ashift, int r)
 {
-	nvlist_t *raidz, **child;
+	nvlist_t *raid, **child;
 	int c;
 
 	if (r < 2)
@@ -1056,20 +1284,40 @@
 	for (c = 0; c < r; c++)
 		child[c] = make_vdev_file(path, aux, pool, size, ashift);
 
-	VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
-	    VDEV_TYPE_RAIDZ) == 0);
-	VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
-	    ztest_opts.zo_raidz_parity) == 0);
-	VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
-	    child, r) == 0);
+	raid = fnvlist_alloc();
+	fnvlist_add_string(raid, ZPOOL_CONFIG_TYPE,
+	    ztest_opts.zo_raid_type);
+	fnvlist_add_uint64(raid, ZPOOL_CONFIG_NPARITY,
+	    ztest_opts.zo_raid_parity);
+	fnvlist_add_nvlist_array(raid, ZPOOL_CONFIG_CHILDREN, child, r);
+
+	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0) {
+		uint64_t ndata = ztest_opts.zo_draid_data;
+		uint64_t nparity = ztest_opts.zo_raid_parity;
+		uint64_t nspares = ztest_opts.zo_draid_spares;
+		uint64_t children = ztest_opts.zo_raid_children;
+		uint64_t ngroups = 1;
+
+		/*
+		 * Calculate the minimum number of groups required to fill a
+		 * slice. This is the LCM of the stripe width (data + parity)
+		 * and the number of data drives (children - spares).
+		 */
+		while (ngroups * (ndata + nparity) % (children - nspares) != 0)
+			ngroups++;
+
+		/* Store the basic dRAID configuration. */
+		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NDATA, ndata);
+		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
+		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
+	}
 
 	for (c = 0; c < r; c++)
-		nvlist_free(child[c]);
+		fnvlist_free(child[c]);
 
 	umem_free(child, r * sizeof (nvlist_t *));
 
-	return (raidz);
+	return (raid);
 }
 
 static nvlist_t *
@@ -1080,21 +1328,19 @@
 	int c;
 
 	if (m < 1)
-		return (make_vdev_raidz(path, aux, pool, size, ashift, r));
+		return (make_vdev_raid(path, aux, pool, size, ashift, r));
 
 	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < m; c++)
-		child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
+		child[c] = make_vdev_raid(path, aux, pool, size, ashift, r);
 
-	VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
-	    VDEV_TYPE_MIRROR) == 0);
-	VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
-	    child, m) == 0);
+	mirror = fnvlist_alloc();
+	fnvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, VDEV_TYPE_MIRROR);
+	fnvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, child, m);
 
 	for (c = 0; c < m; c++)
-		nvlist_free(child[c]);
+		fnvlist_free(child[c]);
 
 	umem_free(child, m * sizeof (nvlist_t *));
 
@@ -1109,7 +1355,7 @@
 	int c;
 	boolean_t log;
 
-	ASSERT(t > 0);
+	ASSERT3S(t, >, 0);
 
 	log = (class != NULL && strcmp(class, "log") == 0);
 
@@ -1118,23 +1364,22 @@
 	for (c = 0; c < t; c++) {
 		child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
 		    r, m);
-		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-		    log) == 0);
+		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, log);
 
 		if (class != NULL && class[0] != '\0') {
 			ASSERT(m > 1 || log);   /* expecting a mirror */
-			VERIFY(nvlist_add_string(child[c],
-			    ZPOOL_CONFIG_ALLOCATION_BIAS, class) == 0);
+			fnvlist_add_string(child[c],
+			    ZPOOL_CONFIG_ALLOCATION_BIAS, class);
 		}
 	}
 
-	VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
-	VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
-	    child, t) == 0);
+	root = fnvlist_alloc();
+	fnvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
+	fnvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
+	    child, t);
 
 	for (c = 0; c < t; c++)
-		nvlist_free(child[c]);
+		fnvlist_free(child[c]);
 
 	umem_free(child, t * sizeof (nvlist_t *));
 
@@ -1165,7 +1410,7 @@
 static int
 ztest_random_blocksize(void)
 {
-	ASSERT(ztest_spa->spa_max_ashift != 0);
+	ASSERT3U(ztest_spa->spa_max_ashift, !=, 0);
 
 	/*
 	 * Choose a block size >= the ashift.
@@ -1223,7 +1468,7 @@
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *tvd;
 
-	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+	ASSERT3U(spa_config_held(spa, SCL_ALL, RW_READER), !=, 0);
 
 	do {
 		top = ztest_random(rvd->vdev_children);
@@ -1291,12 +1536,12 @@
 	nvlist_t *props = NULL;
 	int error;
 
-	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
+	props = fnvlist_alloc();
+	fnvlist_add_uint64(props, zpool_prop_to_name(prop), value);
 
 	error = spa_prop_set(spa, props);
 
-	nvlist_free(props);
+	fnvlist_free(props);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
@@ -1330,7 +1575,11 @@
 		VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
 		    crypto_args, &dcp));
 		err = spa_keystore_load_wkey(ddname, dcp, B_FALSE);
-		dsl_crypto_params_free(dcp, B_FALSE);
+		/*
+		 * Note: if there was an error loading, the wkey was not
+		 * consumed, and needs to be freed.
+		 */
+		dsl_crypto_params_free(dcp, (err != 0));
 		fnvlist_free(crypto_args);
 
 		if (err == EINVAL) {
@@ -1370,8 +1619,8 @@
 static void
 ztest_rll_destroy(rll_t *rll)
 {
-	ASSERT(rll->rll_writer == NULL);
-	ASSERT(rll->rll_readers == 0);
+	ASSERT3P(rll->rll_writer, ==, NULL);
+	ASSERT0(rll->rll_readers);
 	mutex_destroy(&rll->rll_lock);
 	cv_destroy(&rll->rll_cv);
 }
@@ -1400,11 +1649,11 @@
 	mutex_enter(&rll->rll_lock);
 
 	if (rll->rll_writer) {
-		ASSERT(rll->rll_readers == 0);
+		ASSERT0(rll->rll_readers);
 		rll->rll_writer = NULL;
 	} else {
-		ASSERT(rll->rll_readers != 0);
-		ASSERT(rll->rll_writer == NULL);
+		ASSERT3S(rll->rll_readers, >, 0);
+		ASSERT3P(rll->rll_writer, ==, NULL);
 		rll->rll_readers--;
 	}
 
@@ -1510,7 +1759,7 @@
 	error = dmu_tx_assign(tx, txg_how);
 	if (error) {
 		if (error == ERESTART) {
-			ASSERT(txg_how == TXG_NOWAIT);
+			ASSERT3U(txg_how, ==, TXG_NOWAIT);
 			dmu_tx_wait(tx);
 		} else {
 			ASSERT3U(error, ==, ENOSPC);
@@ -1520,36 +1769,11 @@
 		return (0);
 	}
 	txg = dmu_tx_get_txg(tx);
-	ASSERT(txg != 0);
+	ASSERT3U(txg, !=, 0);
 	return (txg);
 }
 
 static void
-ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
-{
-	uint64_t *ip = buf;
-	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
-
-	while (ip < ip_end)
-		*ip++ = value;
-}
-
-#ifndef NDEBUG
-static boolean_t
-ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
-{
-	uint64_t *ip = buf;
-	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
-	uint64_t diff = 0;
-
-	while (ip < ip_end)
-		diff |= (value - *ip++);
-
-	return (diff == 0);
-}
-#endif
-
-static void
 ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
     uint64_t crtxg)
@@ -1607,7 +1831,7 @@
  * helps ensure that all dnode traversal code properly skips the
  * interior regions of large dnodes.
  */
-void
+static void
 ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
     objset_t *os, uint64_t gen)
 {
@@ -1626,7 +1850,7 @@
  * Verify that the unused area of a bonus buffer is filled with the
  * expected tokens.
  */
-void
+static void
 ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
     objset_t *os, uint64_t gen)
 {
@@ -1768,8 +1992,8 @@
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
-	ASSERT(lr->lr_doid == ZTEST_DIROBJ);
-	ASSERT(name[0] != '\0');
+	ASSERT3U(lr->lr_doid, ==, ZTEST_DIROBJ);
+	ASSERT3S(name[0], !=, '\0');
 
 	tx = dmu_tx_create(os);
 
@@ -1785,7 +2009,7 @@
 	if (txg == 0)
 		return (ENOSPC);
 
-	ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
+	ASSERT3U(dmu_objset_zil(os)->zl_replay, ==, !!lr->lr_foid);
 	bonuslen = DN_BONUS_SIZE(lr->lrz_dnodesize);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
@@ -1817,13 +2041,13 @@
 		return (error);
 	}
 
-	ASSERT(lr->lr_foid != 0);
+	ASSERT3U(lr->lr_foid, !=, 0);
 
 	if (lr->lrz_type != DMU_OT_ZAP_OTHER)
-		VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
+		VERIFY0(dmu_object_set_blocksize(os, lr->lr_foid,
 		    lr->lrz_blocksize, lr->lrz_ibshift, tx));
 
-	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 	bbt = ztest_bt_bonus(db);
 	dmu_buf_will_dirty(db, tx);
 	ztest_bt_generate(bbt, os, lr->lr_foid, lr->lrz_dnodesize, -1ULL,
@@ -1831,7 +2055,7 @@
 	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, lr->lr_gen);
 	dmu_buf_rele(db, FTAG);
 
-	VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
+	VERIFY0(zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
 	    &lr->lr_foid, tx));
 
 	(void) ztest_log_create(zd, tx, lr);
@@ -1855,16 +2079,16 @@
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
-	ASSERT(lr->lr_doid == ZTEST_DIROBJ);
-	ASSERT(name[0] != '\0');
+	ASSERT3U(lr->lr_doid, ==, ZTEST_DIROBJ);
+	ASSERT3S(name[0], !=, '\0');
 
-	VERIFY3U(0, ==,
+	VERIFY0(
 	    zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
-	ASSERT(object != 0);
+	ASSERT3U(object, !=, 0);
 
 	ztest_object_lock(zd, object, RL_WRITER);
 
-	VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
+	VERIFY0(dmu_object_info(os, object, &doi));
 
 	tx = dmu_tx_create(os);
 
@@ -1878,12 +2102,12 @@
 	}
 
 	if (doi.doi_type == DMU_OT_ZAP_OTHER) {
-		VERIFY3U(0, ==, zap_destroy(os, object, tx));
+		VERIFY0(zap_destroy(os, object, tx));
 	} else {
-		VERIFY3U(0, ==, dmu_object_free(os, object, tx));
+		VERIFY0(dmu_object_free(os, object, tx));
 	}
 
-	VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
+	VERIFY0(zap_remove(os, lr->lr_doid, name, tx));
 
 	(void) ztest_log_remove(zd, tx, lr, object);
 
@@ -1935,7 +2159,7 @@
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
 
-	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	dmu_object_info_from_db(db, &doi);
 
@@ -1969,7 +2193,8 @@
 		 * but not always, because we also want to verify correct
 		 * behavior when the data was not recently read into cache.
 		 */
-		ASSERT(offset % doi.doi_data_block_size == 0);
+		ASSERT(doi.doi_data_block_size);
+		ASSERT0(offset % doi.doi_data_block_size);
 		if (ztest_random(4) != 0) {
 			int prefetch = ztest_random(2) ?
 			    DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
@@ -2050,8 +2275,8 @@
 		return (ENOSPC);
 	}
 
-	VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
-	    lr->lr_length, tx) == 0);
+	VERIFY0(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
+	    lr->lr_length, tx));
 
 	(void) ztest_log_truncate(zd, tx, lr);
 
@@ -2079,7 +2304,7 @@
 
 	ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
 
-	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, lr->lr_foid);
@@ -2098,9 +2323,9 @@
 	dnodesize = bbt->bt_dnodesize;
 
 	if (zd->zd_zilog->zl_replay) {
-		ASSERT(lr->lr_size != 0);
-		ASSERT(lr->lr_mode != 0);
-		ASSERT(lrtxg != 0);
+		ASSERT3U(lr->lr_size, !=, 0);
+		ASSERT3U(lr->lr_mode, !=, 0);
+		ASSERT3U(lrtxg, !=, 0);
 	} else {
 		/*
 		 * Randomly change the size and increment the generation.
@@ -2108,7 +2333,7 @@
 		lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
 		    sizeof (*bbt);
 		lr->lr_mode = bbt->bt_gen + 1;
-		ASSERT(lrtxg == 0);
+		ASSERT0(lrtxg);
 	}
 
 	/*
@@ -2182,8 +2407,8 @@
 }
 
 static int
-ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
-    zio_t *zio)
+ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio)
 {
 	ztest_ds_t *zd = arg;
 	objset_t *os = zd->zd_os;
@@ -2230,13 +2455,13 @@
 
 		error = dmu_read(os, object, offset, size, buf,
 		    DMU_READ_NO_PREFETCH);
-		ASSERT(error == 0);
+		ASSERT0(error);
 	} else {
 		size = doi.doi_data_block_size;
 		if (ISP2(size)) {
 			offset = P2ALIGN(offset, size);
 		} else {
-			ASSERT(offset < size);
+			ASSERT3U(offset, <, size);
 			offset = 0;
 		}
 
@@ -2252,8 +2477,8 @@
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
-			ASSERT(db->db_offset == offset);
-			ASSERT(db->db_size == size);
+			ASSERT3U(db->db_offset, ==, offset);
+			ASSERT3U(db->db_size, ==, size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    ztest_get_done, zgd);
@@ -2282,7 +2507,7 @@
 	return (lr);
 }
 
-void
+static void
 ztest_lr_free(void *lr, size_t lrsize, char *name)
 {
 	size_t namesize = name ? strlen(name) + 1 : 0;
@@ -2307,20 +2532,20 @@
 		error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
 		    sizeof (uint64_t), 1, &od->od_object);
 		if (error) {
-			ASSERT(error == ENOENT);
-			ASSERT(od->od_object == 0);
+			ASSERT3S(error, ==, ENOENT);
+			ASSERT0(od->od_object);
 			missing++;
 		} else {
 			dmu_buf_t *db;
 			ztest_block_tag_t *bbt;
 			dmu_object_info_t doi;
 
-			ASSERT(od->od_object != 0);
-			ASSERT(missing == 0);	/* there should be no gaps */
+			ASSERT3U(od->od_object, !=, 0);
+			ASSERT0(missing);	/* there should be no gaps */
 
 			ztest_object_lock(zd, od->od_object, RL_READER);
-			VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
-			    od->od_object, FTAG, &db));
+			VERIFY0(dmu_bonus_hold(zd->zd_os, od->od_object,
+			    FTAG, &db));
 			dmu_object_info_from_db(db, &doi);
 			bbt = ztest_bt_bonus(db);
 			ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
@@ -2363,7 +2588,7 @@
 		lr->lr_crtime[0] = time(NULL);
 
 		if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
-			ASSERT(missing == 0);
+			ASSERT0(missing);
 			od->od_object = 0;
 			missing++;
 		} else {
@@ -2371,7 +2596,7 @@
 			od->od_type = od->od_crtype;
 			od->od_blocksize = od->od_crblocksize;
 			od->od_gen = od->od_crgen;
-			ASSERT(od->od_object != 0);
+			ASSERT3U(od->od_object, !=, 0);
 		}
 
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
@@ -2522,7 +2747,7 @@
 	uint64_t blocksize;
 	void *data;
 
-	VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
+	VERIFY0(dmu_object_info(zd->zd_os, object, &doi));
 	blocksize = doi.doi_data_block_size;
 	data = umem_alloc(blocksize, UMEM_NOFAIL);
 
@@ -2660,7 +2885,7 @@
 	 * will verify that the log really does contain this record.
 	 */
 	mutex_enter(&zilog->zl_lock);
-	ASSERT(zd->zd_shared != NULL);
+	ASSERT3P(zd->zd_shared, !=, NULL);
 	ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
 	zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
 	mutex_exit(&zilog->zl_lock);
@@ -2698,7 +2923,7 @@
 	zil_close(zd->zd_zilog);
 
 	/* zfsvfs_setup() */
-	VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
+	VERIFY3P(zil_open(os, ztest_get_data), ==, zd->zd_zilog);
 	zil_replay(os, zd, ztest_replay_vector);
 
 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
@@ -2727,7 +2952,7 @@
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
 	VERIFY3U(ENOENT, ==,
 	    spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
-	nvlist_free(nvroot);
+	fnvlist_free(nvroot);
 
 	/*
 	 * Attempt to create using a bad mirror.
@@ -2735,7 +2960,7 @@
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1);
 	VERIFY3U(ENOENT, ==,
 	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
-	nvlist_free(nvroot);
+	fnvlist_free(nvroot);
 
 	/*
 	 * Attempt to create an existing pool.  It shouldn't matter
@@ -2745,7 +2970,7 @@
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
 	VERIFY3U(EEXIST, ==,
 	    spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
-	nvlist_free(nvroot);
+	fnvlist_free(nvroot);
 
 	/*
 	 * We open a reference to the spa and then we try to export it
@@ -2758,7 +2983,7 @@
 	 *	For the case that there is another ztest thread doing
 	 *	an export concurrently.
 	 */
-	VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
+	VERIFY0(spa_open(zo->zo_pool, &spa, FTAG));
 	int error = spa_destroy(zo->zo_pool);
 	if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) {
 		fatal(0, "spa_destroy(%s) returned unexpected value %d",
@@ -2832,6 +3057,10 @@
 	if (ztest_opts.zo_mmp_test)
 		return;
 
+	/* dRAID added after feature flags, skip upgrade test. */
+	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0)
+		return;
+
 	mutex_enter(&ztest_vdev_lock);
 	name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
 
@@ -2841,13 +3070,13 @@
 	(void) spa_destroy(name);
 
 	nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
-	    NULL, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
+	    NULL, ztest_opts.zo_raid_children, ztest_opts.zo_mirrors, 1);
 
 	/*
 	 * If we're configuring a RAIDZ device then make sure that the
 	 * initial version is capable of supporting that feature.
 	 */
-	switch (ztest_opts.zo_raidz_parity) {
+	switch (ztest_opts.zo_raid_parity) {
 	case 0:
 	case 1:
 		initial_version = SPA_VERSION_INITIAL;
@@ -2871,11 +3100,11 @@
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
-	VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0);
+	VERIFY0(spa_create(name, nvroot, props, NULL, NULL));
 	fnvlist_free(nvroot);
 	fnvlist_free(props);
 
-	VERIFY3S(spa_open(name, &spa, FTAG), ==, 0);
+	VERIFY0(spa_open(name, &spa, FTAG));
 	VERIFY3U(spa_version(spa), ==, version);
 	newversion = ztest_random_spa_version(version + 1);
 
@@ -2890,7 +3119,7 @@
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION)));
 	spa_close(spa, FTAG);
 
-	strfree(name);
+	kmem_strfree(name);
 	mutex_exit(&ztest_vdev_lock);
 }
 
@@ -2967,24 +3196,12 @@
 	return (NULL);
 }
 
-/*
- * Find the first available hole which can be used as a top-level.
- */
-int
-find_vdev_hole(spa_t *spa)
+static int
+spa_num_top_vdevs(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
-	int c;
-
-	ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
-
-	for (c = 0; c < rvd->vdev_children; c++) {
-		vdev_t *cvd = rvd->vdev_child[c];
-
-		if (cvd->vdev_ishole)
-			break;
-	}
-	return (c);
+	ASSERT3U(spa_config_held(spa, SCL_VDEV, RW_READER), ==, SCL_VDEV);
+	return (rvd->vdev_children);
 }
 
 /*
@@ -3005,11 +3222,12 @@
 		return;
 
 	mutex_enter(&ztest_vdev_lock);
-	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
+	    ztest_opts.zo_raid_children;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
-	ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
+	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
 
 	/*
 	 * If we have slogs then remove them 1/4 of the time.
@@ -3020,7 +3238,7 @@
 		/*
 		 * find the first real slog in log allocation class
 		 */
-		mg =  spa_log_class(spa)->mc_rotor;
+		mg =  spa_log_class(spa)->mc_allocator[0].mca_rotor;
 		while (!mg->mg_vd->vdev_islog)
 			mg = mg->mg_next;
 
@@ -3059,10 +3277,11 @@
 		 */
 		nvroot = make_vdev_root(NULL, NULL, NULL,
 		    ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ?
-		    "log" : NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+		    "log" : NULL, ztest_opts.zo_raid_children, zs->zs_mirrors,
+		    1);
 
 		error = spa_vdev_add(spa, nvroot);
-		nvlist_free(nvroot);
+		fnvlist_free(nvroot);
 
 		switch (error) {
 		case 0:
@@ -3113,17 +3332,18 @@
 		return;
 	}
 
-	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
+	    ztest_opts.zo_raid_children;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
-	ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
+	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
-	    class, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+	    class, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
 
 	error = spa_vdev_add(spa, nvroot);
-	nvlist_free(nvroot);
+	fnvlist_free(nvroot);
 
 	if (error == ENOSPC)
 		ztest_record_enospc("spa_vdev_add");
@@ -3169,7 +3389,7 @@
 	char *aux;
 	char *path;
 	uint64_t guid = 0;
-	int error;
+	int error, ignore_err = 0;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
@@ -3192,7 +3412,13 @@
 		/*
 		 * Pick a random device to remove.
 		 */
-		guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
+		vdev_t *svd = sav->sav_vdevs[ztest_random(sav->sav_count)];
+
+		/* dRAID spares cannot be removed; try anyways to see ENOTSUP */
+		if (strstr(svd->vdev_path, VDEV_TYPE_DRAID) != NULL)
+			ignore_err = ENOTSUP;
+
+		guid = svd->vdev_guid;
 	} else {
 		/*
 		 * Find an unused device we can add.
@@ -3230,7 +3456,7 @@
 		default:
 			fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
 		}
-		nvlist_free(nvroot);
+		fnvlist_free(nvroot);
 	} else {
 		/*
 		 * Remove an existing device.  Sometimes, dirty its
@@ -3249,7 +3475,9 @@
 		case ZFS_ERR_DISCARDING_CHECKPOINT:
 			break;
 		default:
-			fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
+			if (error != ignore_err)
+				fatal(0, "spa_vdev_remove(%llu) = %d", guid,
+				    error);
 		}
 	}
 
@@ -3278,7 +3506,7 @@
 	mutex_enter(&ztest_vdev_lock);
 
 	/* ensure we have a usable config; mirrors of raidz aren't supported */
-	if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
+	if (zs->zs_mirrors < 3 || ztest_opts.zo_raid_children > 1) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
@@ -3290,12 +3518,11 @@
 
 	/* generate a config from the existing config */
 	mutex_enter(&spa->spa_props_lock);
-	VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
-	    &tree) == 0);
+	tree = fnvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE);
 	mutex_exit(&spa->spa_props_lock);
 
-	VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
-	    &children) == 0);
+	VERIFY0(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children));
 
 	schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
 	for (c = 0; c < children; c++) {
@@ -3304,37 +3531,35 @@
 		uint_t mchildren;
 
 		if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
-			VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME,
-			    0) == 0);
-			VERIFY(nvlist_add_string(schild[schildren],
-			    ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
-			VERIFY(nvlist_add_uint64(schild[schildren],
-			    ZPOOL_CONFIG_IS_HOLE, 1) == 0);
+			schild[schildren] = fnvlist_alloc();
+			fnvlist_add_string(schild[schildren],
+			    ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE);
+			fnvlist_add_uint64(schild[schildren],
+			    ZPOOL_CONFIG_IS_HOLE, 1);
 			if (lastlogid == 0)
 				lastlogid = schildren;
 			++schildren;
 			continue;
 		}
 		lastlogid = 0;
-		VERIFY(nvlist_lookup_nvlist_array(child[c],
-		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
-		VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
+		VERIFY0(nvlist_lookup_nvlist_array(child[c],
+		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren));
+		schild[schildren++] = fnvlist_dup(mchild[0]);
 	}
 
 	/* OK, create a config that can be used to split */
-	VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
-	    VDEV_TYPE_ROOT) == 0);
-	VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
-	    lastlogid != 0 ? lastlogid : schildren) == 0);
+	split = fnvlist_alloc();
+	fnvlist_add_string(split, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
+	fnvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
+	    lastlogid != 0 ? lastlogid : schildren);
 
-	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
-	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
+	config = fnvlist_alloc();
+	fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split);
 
 	for (c = 0; c < schildren; c++)
-		nvlist_free(schild[c]);
+		fnvlist_free(schild[c]);
 	free(schild);
-	nvlist_free(split);
+	fnvlist_free(split);
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
@@ -3342,7 +3567,7 @@
 	error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
-	nvlist_free(config);
+	fnvlist_free(config);
 
 	if (error == 0) {
 		(void) printf("successful split - results:\n");
@@ -3378,6 +3603,7 @@
 	int replacing;
 	int oldvd_has_siblings = B_FALSE;
 	int newvd_is_spare = B_FALSE;
+	int newvd_is_dspare = B_FALSE;
 	int oldvd_is_log;
 	int error, expected_error;
 
@@ -3388,7 +3614,7 @@
 	newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	mutex_enter(&ztest_vdev_lock);
-	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
@@ -3400,8 +3626,7 @@
 	 */
 	if (ztest_device_removal_active) {
 		spa_config_exit(spa, SCL_ALL, FTAG);
-		mutex_exit(&ztest_vdev_lock);
-		return;
+		goto out;
 	}
 
 	/*
@@ -3426,16 +3651,19 @@
 
 	/* pick a child from the mirror */
 	if (zs->zs_mirrors >= 1) {
-		ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
-		ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
-		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz];
+		ASSERT3P(oldvd->vdev_ops, ==, &vdev_mirror_ops);
+		ASSERT3U(oldvd->vdev_children, >=, zs->zs_mirrors);
+		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raid_children];
 	}
 
 	/* pick a child out of the raidz group */
-	if (ztest_opts.zo_raidz > 1) {
-		ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
-		ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz);
-		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz];
+	if (ztest_opts.zo_raid_children > 1) {
+		if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0)
+			ASSERT3P(oldvd->vdev_ops, ==, &vdev_raidz_ops);
+		else
+			ASSERT3P(oldvd->vdev_ops, ==, &vdev_draid_ops);
+		ASSERT3U(oldvd->vdev_children, ==, ztest_opts.zo_raid_children);
+		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raid_children];
 	}
 
 	/*
@@ -3444,7 +3672,7 @@
 	 */
 	while (oldvd->vdev_children != 0) {
 		oldvd_has_siblings = B_TRUE;
-		ASSERT(oldvd->vdev_children >= 2);
+		ASSERT3U(oldvd->vdev_children, >=, 2);
 		oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
 	}
 
@@ -3482,6 +3710,10 @@
 	if (sav->sav_count != 0 && ztest_random(3) == 0) {
 		newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		newvd_is_spare = B_TRUE;
+
+		if (newvd->vdev_ops == &vdev_draid_spare_ops)
+			newvd_is_dspare = B_TRUE;
+
 		(void) strcpy(newpath, newvd->vdev_path);
 	} else {
 		(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
@@ -3515,6 +3747,9 @@
 	 * If newvd is already part of the pool, it should fail with EBUSY.
 	 *
 	 * If newvd is too small, it should fail with EOVERFLOW.
+	 *
+	 * If newvd is a distributed spare and it's being attached to a
+	 * dRAID which is not its parent it should fail with EINVAL.
 	 */
 	if (pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_root_ops && (!replacing ||
@@ -3527,10 +3762,12 @@
 		expected_error = replacing ? 0 : EBUSY;
 	else if (vdev_lookup_by_path(rvd, newpath) != NULL)
 		expected_error = EBUSY;
-	else if (newsize < oldsize)
+	else if (!newvd_is_dspare && newsize < oldsize)
 		expected_error = EOVERFLOW;
 	else if (ashift > oldvd->vdev_top->vdev_ashift)
 		expected_error = EDOM;
+	else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
+		expected_error = ENOTSUP;
 	else
 		expected_error = 0;
 
@@ -3542,9 +3779,18 @@
 	root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
 	    ashift, NULL, 0, 0, 1);
 
-	error = spa_vdev_attach(spa, oldguid, root, replacing);
+	/*
+	 * When supported select either a healing or sequential resilver.
+	 */
+	boolean_t rebuilding = B_FALSE;
+	if (pvd->vdev_ops == &vdev_mirror_ops ||
+	    pvd->vdev_ops ==  &vdev_root_ops) {
+		rebuilding = !!ztest_random(2);
+	}
 
-	nvlist_free(root);
+	error = spa_vdev_attach(spa, oldguid, root, replacing, rebuilding);
+
+	fnvlist_free(root);
 
 	/*
 	 * If our parent was the replacing vdev, but the replace completed,
@@ -3562,10 +3808,11 @@
 		expected_error = error;
 
 	if (error == ZFS_ERR_CHECKPOINT_EXISTS ||
-	    error == ZFS_ERR_DISCARDING_CHECKPOINT)
+	    error == ZFS_ERR_DISCARDING_CHECKPOINT ||
+	    error == ZFS_ERR_RESILVER_IN_PROGRESS ||
+	    error == ZFS_ERR_REBUILD_IN_PROGRESS)
 		expected_error = error;
 
-	/* XXX workaround 6690467 */
 	if (error != expected_error && expected_error != EBUSY) {
 		fatal(0, "attach (%s %llu, %s %llu, %d) "
 		    "returned %d, expected %d",
@@ -3644,22 +3891,22 @@
 /*
  * Callback function which expands the physical size of the vdev.
  */
-vdev_t *
+static vdev_t *
 grow_vdev(vdev_t *vd, void *arg)
 {
-	ASSERTV(spa_t *spa = vd->vdev_spa);
+	spa_t *spa __maybe_unused = vd->vdev_spa;
 	size_t *newsize = arg;
 	size_t fsize;
 	int fd;
 
-	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), ==, SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
 		return (vd);
 
 	fsize = lseek(fd, 0, SEEK_END);
-	VERIFY(ftruncate(fd, *newsize) == 0);
+	VERIFY0(ftruncate(fd, *newsize));
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("%s grew from %lu to %lu bytes\n",
@@ -3673,7 +3920,7 @@
  * Callback function which expands a given vdev by calling vdev_online().
  */
 /* ARGSUSED */
-vdev_t *
+static vdev_t *
 online_vdev(vdev_t *vd, void *arg)
 {
 	spa_t *spa = vd->vdev_spa;
@@ -3683,7 +3930,7 @@
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 	int error;
 
-	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), ==, SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	/* Calling vdev_online will initialize the new metaslabs */
@@ -3733,7 +3980,7 @@
  * If a NULL callback is passed, then we just return back the first
  * leaf vdev we encounter.
  */
-vdev_t *
+static vdev_t *
 vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
 {
 	uint_t c;
@@ -3814,7 +4061,7 @@
 		mutex_exit(&ztest_checkpoint_lock);
 		return;
 	}
-	ASSERT(psize > 0);
+	ASSERT3U(psize, >, 0);
 	newsize = psize + MAX(psize / 8, SPA_MAXBLOCKSIZE);
 	ASSERT3U(newsize, >, psize);
 
@@ -3916,8 +4163,8 @@
 	/*
 	 * Create the objects common to all ztest datasets.
 	 */
-	VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
-	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+	VERIFY0(zap_create_claim(os, ZTEST_DIROBJ,
+	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx));
 }
 
 static int
@@ -4153,7 +4400,7 @@
 	/*
 	 * Verify that we can hold an objset that is also owned.
 	 */
-	VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
+	VERIFY0(dmu_objset_hold(name, FTAG, &os2));
 	dmu_objset_rele(os2, FTAG);
 
 	/*
@@ -4186,7 +4433,7 @@
 /*
  * Cleanup non-standard snapshots and clones.
  */
-void
+static void
 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
 {
 	char *snap1name;
@@ -4465,7 +4712,7 @@
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	chunksize = od[0].od_gen;
-	ASSERT(chunksize == od[1].od_gen);
+	ASSERT3U(chunksize, ==, od[1].od_gen);
 
 	/*
 	 * Prefetch a random chunk of the big object.
@@ -4560,8 +4807,8 @@
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
-		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
-		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
+		ASSERT3U((uintptr_t)bigH - (uintptr_t)bigbuf, <, bigsize);
+		ASSERT3U((uintptr_t)bigT - (uintptr_t)bigbuf, <, bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(0, "future leak: got %llx, open txg is %llx",
@@ -4602,7 +4849,7 @@
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
-		VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx));
+		VERIFY0(dmu_free_range(os, bigobj, bigoff, bigsize, tx));
 	} else {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %llx size %llx"
@@ -4623,13 +4870,13 @@
 		void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 		void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
-		VERIFY(0 == dmu_read(os, packobj, packoff,
+		VERIFY0(dmu_read(os, packobj, packoff,
 		    packsize, packcheck, DMU_READ_PREFETCH));
-		VERIFY(0 == dmu_read(os, bigobj, bigoff,
+		VERIFY0(dmu_read(os, bigobj, bigoff,
 		    bigsize, bigcheck, DMU_READ_PREFETCH));
 
-		ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
-		ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
+		ASSERT0(bcmp(packbuf, packcheck, packsize));
+		ASSERT0(bcmp(bigbuf, bigcheck, bigsize));
 
 		umem_free(packcheck, packsize);
 		umem_free(bigcheck, bigsize);
@@ -4640,7 +4887,7 @@
 	umem_free(od, size);
 }
 
-void
+static void
 compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
     uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
 {
@@ -4663,8 +4910,8 @@
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
-		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
-		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
+		ASSERT3U((uintptr_t)bigH - (uintptr_t)bigbuf, <, bigsize);
+		ASSERT3U((uintptr_t)bigT - (uintptr_t)bigbuf, <, bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(0, "future leak: got %llx, open txg is %llx",
@@ -4749,12 +4996,12 @@
 	packobj = od[1].od_object;
 	blocksize = od[0].od_blocksize;
 	chunksize = blocksize;
-	ASSERT(chunksize == od[1].od_gen);
+	ASSERT3U(chunksize, ==, od[1].od_gen);
 
-	VERIFY(dmu_object_info(os, bigobj, &doi) == 0);
+	VERIFY0(dmu_object_info(os, bigobj, &doi));
 	VERIFY(ISP2(doi.doi_data_block_size));
-	VERIFY(chunksize == doi.doi_data_block_size);
-	VERIFY(chunksize >= 2 * sizeof (bufwad_t));
+	VERIFY3U(chunksize, ==, doi.doi_data_block_size);
+	VERIFY3U(chunksize, >=, 2 * sizeof (bufwad_t));
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
@@ -4771,7 +5018,7 @@
 	packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
 
-	VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
+	VERIFY0(dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
 
 	bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
 
@@ -4905,13 +5152,13 @@
 			void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 			void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
-			VERIFY(0 == dmu_read(os, packobj, packoff,
+			VERIFY0(dmu_read(os, packobj, packoff,
 			    packsize, packcheck, DMU_READ_PREFETCH));
-			VERIFY(0 == dmu_read(os, bigobj, bigoff,
+			VERIFY0(dmu_read(os, bigobj, bigoff,
 			    bigsize, bigcheck, DMU_READ_PREFETCH));
 
-			ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
-			ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
+			ASSERT0(bcmp(packbuf, packcheck, packsize));
+			ASSERT0(bcmp(bigbuf, bigcheck, bigsize));
 
 			umem_free(packcheck, packsize);
 			umem_free(bigcheck, bigsize);
@@ -5040,19 +5287,19 @@
 		goto out;
 	for (i = 0; i < 2; i++) {
 		value[i] = i;
-		VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t),
+		VERIFY0(zap_add(os, object, hc[i], sizeof (uint64_t),
 		    1, &value[i], tx));
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i],
 		    sizeof (uint64_t), 1, &value[i], tx));
-		VERIFY3U(0, ==,
+		VERIFY0(
 		    zap_length(os, object, hc[i], &zl_intsize, &zl_ints));
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 	}
 	for (i = 0; i < 2; i++) {
-		VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx));
+		VERIFY0(zap_remove(os, object, hc[i], tx));
 	}
 	dmu_tx_commit(tx);
 
@@ -5075,17 +5322,17 @@
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 
-		VERIFY(zap_lookup(os, object, txgname, zl_intsize,
-		    zl_ints, &last_txg) == 0);
+		VERIFY0(zap_lookup(os, object, txgname, zl_intsize,
+		    zl_ints, &last_txg));
 
-		VERIFY(zap_length(os, object, propname, &zl_intsize,
-		    &zl_ints) == 0);
+		VERIFY0(zap_length(os, object, propname, &zl_intsize,
+		    &zl_ints));
 
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, ints);
 
-		VERIFY(zap_lookup(os, object, propname, zl_intsize,
-		    zl_ints, value) == 0);
+		VERIFY0(zap_lookup(os, object, propname, zl_intsize,
+		    zl_ints, value));
 
 		for (i = 0; i < ints; i++) {
 			ASSERT3U(value[i], ==, last_txg + object + i);
@@ -5113,9 +5360,9 @@
 	for (i = 0; i < ints; i++)
 		value[i] = txg + object + i;
 
-	VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t),
+	VERIFY0(zap_update(os, object, txgname, sizeof (uint64_t),
 	    1, &txg, tx));
-	VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t),
+	VERIFY0(zap_update(os, object, propname, sizeof (uint64_t),
 	    ints, value, tx));
 
 	dmu_tx_commit(tx);
@@ -5139,8 +5386,8 @@
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
-	VERIFY3U(0, ==, zap_remove(os, object, txgname, tx));
-	VERIFY3U(0, ==, zap_remove(os, object, propname, tx));
+	VERIFY0(zap_remove(os, object, txgname, tx));
+	VERIFY0(zap_remove(os, object, propname, tx));
 	dmu_tx_commit(tx);
 out:
 	umem_free(od, sizeof (ztest_od_t));
@@ -5242,7 +5489,7 @@
 
 	count = -1ULL;
 	VERIFY0(zap_count(os, object, &count));
-	ASSERT(count != -1ULL);
+	ASSERT3S(count, !=, -1ULL);
 
 	/*
 	 * Select an operation: length, lookup, add, update, remove.
@@ -5294,7 +5541,7 @@
 		break;
 
 	case 3:
-		VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0);
+		VERIFY0(zap_update(os, object, name, wsize, wc, data, tx));
 		break;
 
 	case 4:
@@ -5328,7 +5575,7 @@
 	ztest_cb_data_t *data = arg;
 	uint64_t synced_txg;
 
-	VERIFY(data != NULL);
+	VERIFY3P(data, !=, NULL);
 	VERIFY3S(data->zcd_expected_err, ==, error);
 	VERIFY(!data->zcd_called);
 
@@ -5454,7 +5701,7 @@
 	/*
 	 * Read existing data to make sure there isn't a future leak.
 	 */
-	VERIFY(0 == dmu_read(os, od->od_object, 0, sizeof (uint64_t),
+	VERIFY0(dmu_read(os, od->od_object, 0, sizeof (uint64_t),
 	    &old_txg, DMU_READ_PREFETCH));
 
 	if (old_txg > txg)
@@ -5591,9 +5838,6 @@
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
-	(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
-	    ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
-
 	(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));
 
 	VERIFY0(spa_prop_get(ztest_spa, &props));
@@ -5601,7 +5845,7 @@
 	if (ztest_opts.zo_verbose >= 6)
 		dump_nvlist(props, 4);
 
-	nvlist_free(props);
+	fnvlist_free(props);
 
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 }
@@ -5789,11 +6033,11 @@
 	}
 
 	maxfaults = MAXFAULTS(zs);
-	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
 	mirror_save = zs->zs_mirrors;
 	mutex_exit(&ztest_vdev_lock);
 
-	ASSERT(leaves >= 1);
+	ASSERT3U(leaves, >=, 1);
 
 	/*
 	 * While ztest is running the number of leaves will not change.  This
@@ -5855,7 +6099,7 @@
 		    vd0->vdev_resilver_txg != 0)) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
-			 * or unwriteable, or reach behind its back
+			 * or unwritable, or reach behind its back
 			 * and close the underlying fd.  We can do this if
 			 * maxfaults == 0 because we'll fail and reexecute,
 			 * and we can do it if maxfaults >= 2 because we'll
@@ -5869,8 +6113,8 @@
 			    (long long)vd0->vdev_id, (int)maxfaults);
 
 			if (vf != NULL && ztest_random(3) == 0) {
-				(void) close(vf->vf_vnode->v_fd);
-				vf->vf_vnode->v_fd = -1;
+				(void) close(vf->vf_file->f_fd);
+				vf->vf_file->f_fd = -1;
 			} else if (ztest_random(2) == 0) {
 				vd0->vdev_cant_read = B_TRUE;
 			} else {
@@ -5922,7 +6166,7 @@
 			if (islog)
 				(void) pthread_rwlock_wrlock(&ztest_name_lock);
 
-			VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
+			VERIFY3U(vdev_offline(spa, guid0, flags), !=, EBUSY);
 
 			if (islog)
 				(void) pthread_rwlock_unlock(&ztest_name_lock);
@@ -5962,24 +6206,26 @@
 		 * on two different leaf devices, because ZFS can not
 		 * tolerate that (if maxfaults==1).
 		 *
-		 * We divide each leaf into chunks of size
-		 * (# leaves * SPA_MAXBLOCKSIZE * 4).  Within each chunk
-		 * there is a series of ranges to which we can inject errors.
-		 * Each range can accept errors on only a single leaf vdev.
-		 * The error injection ranges are separated by ranges
-		 * which we will not inject errors on any device (DMZs).
-		 * Each DMZ must be large enough such that a single block
-		 * can not straddle it, so that a single block can not be
-		 * a target in two different injection ranges (on different
-		 * leaf vdevs).
+		 * To achieve this we divide each leaf device into
+		 * chunks of size (# leaves * SPA_MAXBLOCKSIZE * 4).
+		 * Each chunk is further divided into error-injection
+		 * ranges (can accept errors) and clear ranges (we do
+		 * not inject errors in those). Each error-injection
+		 * range can accept errors only for a single leaf vdev.
+		 * Error-injection ranges are separated by clear ranges.
 		 *
 		 * For example, with 3 leaves, each chunk looks like:
 		 *    0 to  32M: injection range for leaf 0
-		 *  32M to  64M: DMZ - no injection allowed
+		 *  32M to  64M: clear range - no injection allowed
 		 *  64M to  96M: injection range for leaf 1
-		 *  96M to 128M: DMZ - no injection allowed
+		 *  96M to 128M: clear range - no injection allowed
 		 * 128M to 160M: injection range for leaf 2
-		 * 160M to 192M: DMZ - no injection allowed
+		 * 160M to 192M: clear range - no injection allowed
+		 *
+		 * Each clear range must be large enough such that a
+		 * single block cannot straddle it. This way a block
+		 * can't be a target in two different injection ranges
+		 * (on different leaf vdevs).
 		 */
 		offset = ztest_random(fsize / (leaves << bshift)) *
 		    (leaves << bshift) + (leaf << bshift) +
@@ -6035,139 +6281,9 @@
 }
 
 /*
- * Verify that DDT repair works as expected.
- */
-void
-ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
-{
-	ztest_shared_t *zs = ztest_shared;
-	spa_t *spa = ztest_spa;
-	objset_t *os = zd->zd_os;
-	ztest_od_t *od;
-	uint64_t object, blocksize, txg, pattern;
-	enum zio_checksum checksum = spa_dedup_checksum(spa);
-	dmu_buf_t *db;
-	dmu_tx_t *tx;
-
-	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
-	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
-
-	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
-		umem_free(od, sizeof (ztest_od_t));
-		return;
-	}
-
-	/*
-	 * Take the name lock as writer to prevent anyone else from changing
-	 * the pool and dataset properties we need to maintain during this test.
-	 */
-	(void) pthread_rwlock_wrlock(&ztest_name_lock);
-
-	if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
-	    B_FALSE) != 0 ||
-	    ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
-	    B_FALSE) != 0) {
-		(void) pthread_rwlock_unlock(&ztest_name_lock);
-		umem_free(od, sizeof (ztest_od_t));
-		return;
-	}
-
-	dmu_objset_stats_t dds;
-	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-	dmu_objset_fast_stat(os, &dds);
-	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
-
-	object = od[0].od_object;
-	blocksize = od[0].od_blocksize;
-	pattern = zs->zs_guid ^ dds.dds_guid;
-
-	/*
-	 * The numbers of copies written must always be greater than or
-	 * equal to the threshold set by the dedupditto property.  This
-	 * is initialized in ztest_run() and then randomly changed by
-	 * ztest_spa_prop_get_set(), these function will never set it
-	 * larger than 2 * ZIO_DEDUPDITTO_MIN.
-	 */
-	int copies = 2 * ZIO_DEDUPDITTO_MIN;
-
-	/*
-	 * The block size is limited by DMU_MAX_ACCESS (64MB) which
-	 * caps the maximum transaction size.  A block size of up to
-	 * SPA_OLD_MAXBLOCKSIZE is allowed which results in a maximum
-	 * transaction size of: 128K * 200 (copies) = ~25MB
-	 *
-	 * The actual block size is checked here, rather than requested
-	 * above, because the way ztest_od_init() is implemented it does
-	 * not guarantee the block size requested will be used.
-	 */
-	if (blocksize > SPA_OLD_MAXBLOCKSIZE) {
-		(void) pthread_rwlock_unlock(&ztest_name_lock);
-		umem_free(od, sizeof (ztest_od_t));
-		return;
-	}
-
-	ASSERT(object != 0);
-
-	tx = dmu_tx_create(os);
-	dmu_tx_hold_write(tx, object, 0, copies * blocksize);
-	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
-	if (txg == 0) {
-		(void) pthread_rwlock_unlock(&ztest_name_lock);
-		umem_free(od, sizeof (ztest_od_t));
-		return;
-	}
-
-	/*
-	 * Write all the copies of our block.
-	 */
-	for (int i = 0; i < copies; i++) {
-		uint64_t offset = i * blocksize;
-		int error = dmu_buf_hold(os, object, offset, FTAG, &db,
-		    DMU_READ_NO_PREFETCH);
-		if (error != 0) {
-			fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
-			    os, (long long)object, (long long) offset, error);
-		}
-		ASSERT(db->db_offset == offset);
-		ASSERT(db->db_size == blocksize);
-		ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
-		    ztest_pattern_match(db->db_data, db->db_size, 0ULL));
-		dmu_buf_will_fill(db, tx);
-		ztest_pattern_set(db->db_data, db->db_size, pattern);
-		dmu_buf_rele(db, FTAG);
-	}
-
-	dmu_tx_commit(tx);
-	txg_wait_synced(spa_get_dsl(spa), txg);
-
-	/*
-	 * Find out what block we got.
-	 */
-	VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
-	blkptr_t blk = *((dmu_buf_impl_t *)db)->db_blkptr;
-	dmu_buf_rele(db, FTAG);
-
-	/*
-	 * Damage the block.  Dedup-ditto will save us when we read it later.
-	 */
-	uint64_t psize = BP_GET_PSIZE(&blk);
-	abd_t *abd = abd_alloc_linear(psize, B_TRUE);
-	ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
-
-	(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
-	    abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
-
-	abd_free(abd);
-
-	(void) pthread_rwlock_unlock(&ztest_name_lock);
-	umem_free(od, sizeof (ztest_od_t));
-}
-
-/*
  * By design ztest will never inject uncorrectable damage in to the pool.
  * Issue a scrub, wait for it to complete, and verify there is never any
- * any persistent damage.
+ * persistent damage.
  *
  * Only after a full scrub has been completed is it safe to start injecting
  * data corruption.  See the comment in zfs_fault_inject().
@@ -6397,6 +6513,75 @@
 }
 
 static int
+ztest_set_global_vars(void)
+{
+	for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) {
+		char *kv = ztest_opts.zo_gvars[i];
+		VERIFY3U(strlen(kv), <=, ZO_GVARS_MAX_ARGLEN);
+		VERIFY3U(strlen(kv), >, 0);
+		int err = set_global_var(kv);
+		if (ztest_opts.zo_verbose > 0) {
+			(void) printf("setting global var %s ... %s\n", kv,
+			    err ? "failed" : "ok");
+		}
+		if (err != 0) {
+			(void) fprintf(stderr,
+			    "failed to set global var '%s'\n", kv);
+			return (err);
+		}
+	}
+	return (0);
+}
+
+static char **
+ztest_global_vars_to_zdb_args(void)
+{
+	char **args = calloc(2*ztest_opts.zo_gvars_count + 1, sizeof (char *));
+	char **cur = args;
+	for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) {
+		char *kv = ztest_opts.zo_gvars[i];
+		*cur = "-o";
+		cur++;
+		*cur = strdup(kv);
+		cur++;
+	}
+	ASSERT3P(cur, ==, &args[2*ztest_opts.zo_gvars_count]);
+	*cur = NULL;
+	return (args);
+}
+
+/* The end of strings is indicated by a NULL element */
+static char *
+join_strings(char **strings, const char *sep)
+{
+	size_t totallen = 0;
+	for (char **sp = strings; *sp != NULL; sp++) {
+		totallen += strlen(*sp);
+		totallen += strlen(sep);
+	}
+	if (totallen > 0) {
+		ASSERT(totallen >= strlen(sep));
+		totallen -= strlen(sep);
+	}
+
+	size_t buflen = totallen + 1;
+	char *o = malloc(buflen); /* trailing 0 byte */
+	o[0] = '\0';
+	for (char **sp = strings; *sp != NULL; sp++) {
+		size_t would;
+		would = strlcat(o, *sp, buflen);
+		VERIFY3U(would, <, buflen);
+		if (*(sp+1) == NULL) {
+			break;
+		}
+		would = strlcat(o, sep, buflen);
+		VERIFY3U(would, <, buflen);
+	}
+	ASSERT3S(strlen(o), ==, totallen);
+	return (o);
+}
+
+static int
 ztest_check_path(char *path)
 {
 	struct stat s;
@@ -6421,7 +6606,7 @@
 		return;
 	}
 
-	VERIFY(realpath(getexecname(), bin) != NULL);
+	VERIFY3P(realpath(getexecname(), bin), !=, NULL);
 	if (strstr(bin, "/ztest/")) {
 		strstr(bin, "/ztest/")[0] = '\0'; /* In-tree */
 		strcat(bin, "/zdb/zdb");
@@ -6451,7 +6636,7 @@
 			eligible[eligible_idx++] = cvd;
 		}
 	}
-	VERIFY(eligible_idx > 0);
+	VERIFY3S(eligible_idx, >, 0);
 
 	uint64_t child_no = ztest_random(eligible_idx);
 	return (ztest_random_concrete_vdev_leaf(eligible[child_no]));
@@ -6485,7 +6670,7 @@
 	char *path = strdup(rand_vd->vdev_path);
 	boolean_t active = rand_vd->vdev_initialize_thread != NULL;
 
-	zfs_dbgmsg("vd %px, guid %llu", rand_vd, guid);
+	zfs_dbgmsg("vd %px, guid %llu", rand_vd, (u_longlong_t)guid);
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS);
@@ -6557,7 +6742,7 @@
 	char *path = strdup(rand_vd->vdev_path);
 	boolean_t active = rand_vd->vdev_trim_thread != NULL;
 
-	zfs_dbgmsg("vd %p, guid %llu", rand_vd, guid);
+	zfs_dbgmsg("vd %p, guid %llu", rand_vd, (u_longlong_t)guid);
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	uint64_t cmd = ztest_random(POOL_TRIM_FUNCS);
@@ -6624,13 +6809,21 @@
 
 	ztest_get_zdb_bin(bin, len);
 
-	(void) sprintf(zdb,
-	    "%s -bcc%s%s -G -d -Y -U %s %s",
+	char **set_gvars_args = ztest_global_vars_to_zdb_args();
+	char *set_gvars_args_joined = join_strings(set_gvars_args, " ");
+	free(set_gvars_args);
+
+	size_t would = snprintf(zdb, len,
+	    "%s -bcc%s%s -G -d -Y -e -y %s -p %s %s",
 	    bin,
 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
 	    ztest_opts.zo_verbose >= 4 ? "v" : "",
-	    spa_config_path,
+	    set_gvars_args_joined,
+	    ztest_opts.zo_dir,
 	    pool);
+	ASSERT3U(would, <, len);
+
+	free(set_gvars_args_joined);
 
 	if (ztest_opts.zo_verbose >= 5)
 		(void) printf("Executing %s\n", strstr(zdb, "zdb "));
@@ -6693,7 +6886,7 @@
 	/*
 	 * Get the pool's configuration and guid.
 	 */
-	VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
+	VERIFY0(spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Kick off a scrub to tickle scrub/export races.
@@ -6709,7 +6902,7 @@
 	/*
 	 * Export it.
 	 */
-	VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE));
+	VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE));
 
 	ztest_walk_pool_directory("pools after export");
 
@@ -6717,8 +6910,8 @@
 	 * Try to import it.
 	 */
 	newconfig = spa_tryimport(config);
-	ASSERT(newconfig != NULL);
-	nvlist_free(newconfig);
+	ASSERT3P(newconfig, !=, NULL);
+	fnvlist_free(newconfig);
 
 	/*
 	 * Import it under the new name.
@@ -6750,11 +6943,11 @@
 	/*
 	 * Verify that we can open and close the pool using the new name.
 	 */
-	VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
-	ASSERT(pool_guid == spa_guid(spa));
+	VERIFY0(spa_open(newname, &spa, FTAG));
+	ASSERT3U(pool_guid, ==, spa_guid(spa));
 	spa_close(spa, FTAG);
 
-	nvlist_free(config);
+	fnvlist_free(config);
 }
 
 static void
@@ -6958,7 +7151,7 @@
 	 * That's because zap_count() returns the open-context value,
 	 * while dmu_objset_space() returns the rootbp fill count.
 	 */
-	VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
+	VERIFY0(zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
 	dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
 	ASSERT3U(dirobjs + 1, ==, usedobjs);
 }
@@ -7065,6 +7258,150 @@
 	return (0);
 }
 
+static void
+ztest_freeze(void)
+{
+	ztest_ds_t *zd = &ztest_ds[0];
+	spa_t *spa;
+	int numloops = 0;
+
+	if (ztest_opts.zo_verbose >= 3)
+		(void) printf("testing spa_freeze()...\n");
+
+	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
+	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
+	VERIFY0(ztest_dataset_open(0));
+	ztest_spa = spa;
+
+	/*
+	 * Force the first log block to be transactionally allocated.
+	 * We have to do this before we freeze the pool -- otherwise
+	 * the log chain won't be anchored.
+	 */
+	while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
+		ztest_dmu_object_alloc_free(zd, 0);
+		zil_commit(zd->zd_zilog, 0);
+	}
+
+	txg_wait_synced(spa_get_dsl(spa), 0);
+
+	/*
+	 * Freeze the pool.  This stops spa_sync() from doing anything,
+	 * so that the only way to record changes from now on is the ZIL.
+	 */
+	spa_freeze(spa);
+
+	/*
+	 * Because it is hard to predict how much space a write will actually
+	 * require beforehand, we leave ourselves some fudge space to write over
+	 * capacity.
+	 */
+	uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2;
+
+	/*
+	 * Run tests that generate log records but don't alter the pool config
+	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
+	 * We do a txg_wait_synced() after each iteration to force the txg
+	 * to increase well beyond the last synced value in the uberblock.
+	 * The ZIL should be OK with that.
+	 *
+	 * Run a random number of times less than zo_maxloops and ensure we do
+	 * not run out of space on the pool.
+	 */
+	while (ztest_random(10) != 0 &&
+	    numloops++ < ztest_opts.zo_maxloops &&
+	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
+		ztest_od_t od;
+		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
+		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
+		ztest_io(zd, od.od_object,
+		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
+		txg_wait_synced(spa_get_dsl(spa), 0);
+	}
+
+	/*
+	 * Commit all of the changes we just generated.
+	 */
+	zil_commit(zd->zd_zilog, 0);
+	txg_wait_synced(spa_get_dsl(spa), 0);
+
+	/*
+	 * Close our dataset and close the pool.
+	 */
+	ztest_dataset_close(0);
+	spa_close(spa, FTAG);
+	kernel_fini();
+
+	/*
+	 * Open and close the pool and dataset to induce log replay.
+	 */
+	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
+	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
+	ASSERT3U(spa_freeze_txg(spa), ==, UINT64_MAX);
+	VERIFY0(ztest_dataset_open(0));
+	ztest_spa = spa;
+	txg_wait_synced(spa_get_dsl(spa), 0);
+	ztest_dataset_close(0);
+	ztest_reguid(NULL, 0);
+
+	spa_close(spa, FTAG);
+	kernel_fini();
+}
+
+static void
+ztest_import_impl(ztest_shared_t *zs)
+{
+	importargs_t args = { 0 };
+	nvlist_t *cfg = NULL;
+	int nsearch = 1;
+	char *searchdirs[nsearch];
+	int flags = ZFS_IMPORT_MISSING_LOG;
+
+	searchdirs[0] = ztest_opts.zo_dir;
+	args.paths = nsearch;
+	args.path = searchdirs;
+	args.can_be_active = B_FALSE;
+
+	VERIFY0(zpool_find_config(NULL, ztest_opts.zo_pool, &cfg, &args,
+	    &libzpool_config_ops));
+	VERIFY0(spa_import(ztest_opts.zo_pool, cfg, NULL, flags));
+	fnvlist_free(cfg);
+}
+
+/*
+ * Import a storage pool with the given name.
+ */
+static void
+ztest_import(ztest_shared_t *zs)
+{
+	spa_t *spa;
+
+	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
+	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
+
+	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
+
+	ztest_import_impl(zs);
+
+	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
+	zs->zs_metaslab_sz =
+	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
+	spa_close(spa, FTAG);
+
+	kernel_fini();
+
+	if (!ztest_opts.zo_mmp_test) {
+		ztest_run_zdb(ztest_opts.zo_pool);
+		ztest_freeze();
+		ztest_run_zdb(ztest_opts.zo_pool);
+	}
+
+	(void) pthread_rwlock_destroy(&ztest_name_lock);
+	mutex_destroy(&ztest_vdev_lock);
+	mutex_destroy(&ztest_checkpoint_lock);
+}
+
 /*
  * Kick off threads to run tests on all datasets in parallel.
  */
@@ -7104,10 +7441,19 @@
 	    offsetof(ztest_cb_data_t, zcd_node));
 
 	/*
-	 * Open our pool.
+	 * Open our pool.  It may need to be imported first depending on
+	 * what tests were running when the previous pass was terminated.
 	 */
-	kernel_init(FREAD | FWRITE);
-	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
+	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
+	error = spa_open(ztest_opts.zo_pool, &spa, FTAG);
+	if (error) {
+		VERIFY3S(error, ==, ENOENT);
+		ztest_import_impl(zs);
+		VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
+		zs->zs_metaslab_sz =
+		    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
+	}
+
 	metaslab_preload_limit = ztest_random(20) + 1;
 	ztest_spa = spa;
 
@@ -7122,8 +7468,6 @@
 	zs->zs_guid = dds.dds_guid;
 	dmu_objset_disown(os, B_TRUE, FTAG);
 
-	spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
-
 	/*
 	 * Create a thread to periodically resume suspended I/O.
 	 */
@@ -7288,96 +7632,6 @@
 }
 
 static void
-ztest_freeze(void)
-{
-	ztest_ds_t *zd = &ztest_ds[0];
-	spa_t *spa;
-	int numloops = 0;
-
-	if (ztest_opts.zo_verbose >= 3)
-		(void) printf("testing spa_freeze()...\n");
-
-	kernel_init(FREAD | FWRITE);
-	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
-	VERIFY3U(0, ==, ztest_dataset_open(0));
-	ztest_spa = spa;
-
-	/*
-	 * Force the first log block to be transactionally allocated.
-	 * We have to do this before we freeze the pool -- otherwise
-	 * the log chain won't be anchored.
-	 */
-	while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
-		ztest_dmu_object_alloc_free(zd, 0);
-		zil_commit(zd->zd_zilog, 0);
-	}
-
-	txg_wait_synced(spa_get_dsl(spa), 0);
-
-	/*
-	 * Freeze the pool.  This stops spa_sync() from doing anything,
-	 * so that the only way to record changes from now on is the ZIL.
-	 */
-	spa_freeze(spa);
-
-	/*
-	 * Because it is hard to predict how much space a write will actually
-	 * require beforehand, we leave ourselves some fudge space to write over
-	 * capacity.
-	 */
-	uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2;
-
-	/*
-	 * Run tests that generate log records but don't alter the pool config
-	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
-	 * We do a txg_wait_synced() after each iteration to force the txg
-	 * to increase well beyond the last synced value in the uberblock.
-	 * The ZIL should be OK with that.
-	 *
-	 * Run a random number of times less than zo_maxloops and ensure we do
-	 * not run out of space on the pool.
-	 */
-	while (ztest_random(10) != 0 &&
-	    numloops++ < ztest_opts.zo_maxloops &&
-	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
-		ztest_od_t od;
-		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
-		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
-		ztest_io(zd, od.od_object,
-		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
-		txg_wait_synced(spa_get_dsl(spa), 0);
-	}
-
-	/*
-	 * Commit all of the changes we just generated.
-	 */
-	zil_commit(zd->zd_zilog, 0);
-	txg_wait_synced(spa_get_dsl(spa), 0);
-
-	/*
-	 * Close our dataset and close the pool.
-	 */
-	ztest_dataset_close(0);
-	spa_close(spa, FTAG);
-	kernel_fini();
-
-	/*
-	 * Open and close the pool and dataset to induce log replay.
-	 */
-	kernel_init(FREAD | FWRITE);
-	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
-	ASSERT(spa_freeze_txg(spa) == UINT64_MAX);
-	VERIFY3U(0, ==, ztest_dataset_open(0));
-	ztest_spa = spa;
-	txg_wait_synced(spa_get_dsl(spa), 0);
-	ztest_dataset_close(0);
-	ztest_reguid(NULL, 0);
-
-	spa_close(spa, FTAG);
-	kernel_fini();
-}
-
-void
 print_time(hrtime_t t, char *timebuf)
 {
 	hrtime_t s = t / NANOSEC;
@@ -7407,68 +7661,18 @@
 {
 	nvlist_t *props;
 
-	VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
+	props = fnvlist_alloc();
 
 	if (ztest_random(2) == 0)
 		return (props);
 
-	VERIFY0(nvlist_add_uint64(props,
-	    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1));
+	fnvlist_add_uint64(props,
+	    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1);
 
 	return (props);
 }
 
 /*
- * Import a storage pool with the given name.
- */
-static void
-ztest_import(ztest_shared_t *zs)
-{
-	importargs_t args = { 0 };
-	spa_t *spa;
-	nvlist_t *cfg = NULL;
-	int nsearch = 1;
-	char *searchdirs[nsearch];
-	char *name = ztest_opts.zo_pool;
-	int flags = ZFS_IMPORT_MISSING_LOG;
-	int error;
-
-	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
-	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
-
-	kernel_init(FREAD | FWRITE);
-
-	searchdirs[0] = ztest_opts.zo_dir;
-	args.paths = nsearch;
-	args.path = searchdirs;
-	args.can_be_active = B_FALSE;
-
-	error = zpool_find_config(NULL, name, &cfg, &args,
-	    &libzpool_config_ops);
-	if (error)
-		(void) fatal(0, "No pools found\n");
-
-	VERIFY0(spa_import(name, cfg, NULL, flags));
-	VERIFY0(spa_open(name, &spa, FTAG));
-	zs->zs_metaslab_sz =
-	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
-	spa_close(spa, FTAG);
-
-	kernel_fini();
-
-	if (!ztest_opts.zo_mmp_test) {
-		ztest_run_zdb(ztest_opts.zo_pool);
-		ztest_freeze();
-		ztest_run_zdb(ztest_opts.zo_pool);
-	}
-
-	(void) pthread_rwlock_destroy(&ztest_name_lock);
-	mutex_destroy(&ztest_vdev_lock);
-	mutex_destroy(&ztest_checkpoint_lock);
-}
-
-/*
  * Create a storage pool with the given name and initial vdev size.
  * Then test spa_freeze() functionality.
  */
@@ -7483,7 +7687,7 @@
 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
 
-	kernel_init(FREAD | FWRITE);
+	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
 
 	/*
 	 * Create the storage pool.
@@ -7493,7 +7697,7 @@
 	zs->zs_splits = 0;
 	zs->zs_mirrors = ztest_opts.zo_mirrors;
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
-	    NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+	    NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
 	props = make_random_props();
 
 	/*
@@ -7501,23 +7705,35 @@
 	 * in which case ztest_fault_inject() temporarily takes away
 	 * the only valid replica.
 	 */
-	VERIFY0(nvlist_add_uint64(props,
+	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
-	    MAXFAULTS(zs) ? ZIO_FAILURE_MODE_PANIC : ZIO_FAILURE_MODE_WAIT));
+	    MAXFAULTS(zs) ? ZIO_FAILURE_MODE_PANIC : ZIO_FAILURE_MODE_WAIT);
 
 	for (i = 0; i < SPA_FEATURES; i++) {
 		char *buf;
+
+		if (!spa_feature_table[i].fi_zfs_mod_supported)
+			continue;
+
+		/*
+		 * 75% chance of using the log space map feature. We want ztest
+		 * to exercise both the code paths that use the log space map
+		 * feature and the ones that don't.
+		 */
+		if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
+			continue;
+
 		VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
 		    spa_feature_table[i].fi_uname));
-		VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
+		fnvlist_add_uint64(props, buf, 0);
 		free(buf);
 	}
 
 	VERIFY0(spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
-	nvlist_free(nvroot);
-	nvlist_free(props);
+	fnvlist_free(nvroot);
+	fnvlist_free(props);
 
-	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
+	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 	spa_close(spa, FTAG);
@@ -7567,9 +7783,9 @@
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
-	ASSERT(hdr != MAP_FAILED);
+	ASSERT3P(hdr, !=, MAP_FAILED);
 
-	VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
+	VERIFY0(ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
 
 	hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
 	hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
@@ -7580,7 +7796,7 @@
 	hdr->zh_ds_count = ztest_opts.zo_datasets;
 
 	size = shared_data_size(hdr);
-	VERIFY3U(0, ==, ftruncate(ztest_fd_data, size));
+	VERIFY0(ftruncate(ztest_fd_data, size));
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 }
@@ -7594,14 +7810,14 @@
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ, MAP_SHARED, ztest_fd_data, 0);
-	ASSERT(hdr != MAP_FAILED);
+	ASSERT3P(hdr, !=, MAP_FAILED);
 
 	size = shared_data_size(hdr);
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 	hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
-	ASSERT(hdr != MAP_FAILED);
+	ASSERT3P(hdr, !=, MAP_FAILED);
 	buf = (uint8_t *)hdr;
 
 	offset = hdr->zh_hdr_size;
@@ -7640,12 +7856,13 @@
 		(void) setrlimit(RLIMIT_NOFILE, &rl);
 
 		(void) close(ztest_fd_rand);
-		VERIFY(11 >= snprintf(fd_data_str, 12, "%d", ztest_fd_data));
-		VERIFY(0 == setenv("ZTEST_FD_DATA", fd_data_str, 1));
+		VERIFY3S(11, >=,
+		    snprintf(fd_data_str, 12, "%d", ztest_fd_data));
+		VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1));
 
 		(void) enable_extended_FILE_stdio(-1, -1);
 		if (libpath != NULL)
-			VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1));
+			VERIFY0(setenv("LD_LIBRARY_PATH", libpath, 1));
 		(void) execv(cmd, emptyargv);
 		ztest_dump_core = B_FALSE;
 		fatal(B_TRUE, "exec failed: %s", cmd);
@@ -7729,7 +7946,7 @@
 	char numbuf[NN_NUMBUF_SZ];
 	char *cmd;
 	boolean_t hasalt;
-	int f;
+	int f, err;
 	char *fd_data_str = getenv("ZTEST_FD_DATA");
 	struct sigaction action;
 
@@ -7796,9 +8013,18 @@
 	}
 	ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count);
 
+	err = ztest_set_global_vars();
+	if (err != 0 && !fd_data_str) {
+		/* error message done by ztest_set_global_vars */
+		exit(EXIT_FAILURE);
+	} else {
+		/* children should not be spawned if setting gvars fails */
+		VERIFY3S(err, ==, 0);
+	}
+
 	/* Override location of zpool.cache */
-	VERIFY(asprintf((char **)&spa_config_path, "%s/zpool.cache",
-	    ztest_opts.zo_dir) != -1);
+	VERIFY3S(asprintf((char **)&spa_config_path, "%s/zpool.cache",
+	    ztest_opts.zo_dir), !=, -1);
 
 	ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
 	    UMEM_NOFAIL);
@@ -7820,10 +8046,12 @@
 
 	if (ztest_opts.zo_verbose >= 1) {
 		(void) printf("%llu vdevs, %d datasets, %d threads,"
-		    " %llu seconds...\n",
+		    "%d %s disks, %llu seconds...\n\n",
 		    (u_longlong_t)ztest_opts.zo_vdevs,
 		    ztest_opts.zo_datasets,
 		    ztest_opts.zo_threads,
+		    ztest_opts.zo_raid_children,
+		    ztest_opts.zo_raid_type,
 		    (u_longlong_t)ztest_opts.zo_time);
 	}
 

diff --git a/zfs/cmd/zvol_id/Makefile.am b/zfs/cmd/zvol_id/Makefile.am
index d131c63..bb7e31a 100644
--- a/zfs/cmd/zvol_id/Makefile.am
+++ b/zfs/cmd/zvol_id/Makefile.am

@@ -1,14 +1,12 @@
 include $(top_srcdir)/config/Rules.am
 
 # Disable GCC stack protection for zvol_id.  This is a kludge and should be
-# removed once https://github.com/zfsonlinux/zfs/issues/569 is resolved.
+# removed once https://github.com/openzfs/zfs/issues/569 is resolved.
 AM_CFLAGS += -fno-stack-protector
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 udev_PROGRAMS = zvol_id
 
 zvol_id_SOURCES = \
 	zvol_id_main.c
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/cmd/zvol_id/zvol_id_main.c b/zfs/cmd/zvol_id/zvol_id_main.c
index 4a2d74c..22f2e84 100644
--- a/zfs/cmd/zvol_id/zvol_id_main.c
+++ b/zfs/cmd/zvol_id/zvol_id_main.c

@@ -38,40 +38,39 @@
 static int
 ioctl_get_msg(char *var, int fd)
 {
-	int error = 0;
+	int ret;
 	char msg[ZFS_MAX_DATASET_NAME_LEN];
 
-	error = ioctl(fd, BLKZNAME, msg);
-	if (error < 0) {
-		return (error);
+	ret = ioctl(fd, BLKZNAME, msg);
+	if (ret < 0) {
+		return (ret);
 	}
 
 	snprintf(var, ZFS_MAX_DATASET_NAME_LEN, "%s", msg);
-	return (error);
+	return (ret);
 }
 
 int
 main(int argc, char **argv)
 {
-	int fd, error = 0;
+	int fd = -1, ret = 0, status = EXIT_FAILURE;
 	char zvol_name[ZFS_MAX_DATASET_NAME_LEN];
 	char *zvol_name_part = NULL;
 	char *dev_name;
 	struct stat64 statbuf;
 	int dev_minor, dev_part;
 	int i;
-	int rc;
 
 	if (argc < 2) {
-		printf("Usage: %s /dev/zvol_device_node\n", argv[0]);
-		return (EINVAL);
+		fprintf(stderr, "Usage: %s /dev/zvol_device_node\n", argv[0]);
+		goto fail;
 	}
 
 	dev_name = argv[1];
-	error = stat64(dev_name, &statbuf);
-	if (error != 0) {
-		printf("Unable to access device file: %s\n", dev_name);
-		return (errno);
+	ret = stat64(dev_name, &statbuf);
+	if (ret != 0) {
+		fprintf(stderr, "Unable to access device file: %s\n", dev_name);
+		goto fail;
 	}
 
 	dev_minor = minor(statbuf.st_rdev);
@@ -79,23 +78,23 @@
 
 	fd = open(dev_name, O_RDONLY);
 	if (fd < 0) {
-		printf("Unable to open device file: %s\n", dev_name);
-		return (errno);
+		fprintf(stderr, "Unable to open device file: %s\n", dev_name);
+		goto fail;
 	}
 
-	error = ioctl_get_msg(zvol_name, fd);
-	if (error < 0) {
-		printf("ioctl_get_msg failed:%s\n", strerror(errno));
-		return (errno);
+	ret = ioctl_get_msg(zvol_name, fd);
+	if (ret < 0) {
+		fprintf(stderr, "ioctl_get_msg failed: %s\n", strerror(errno));
+		goto fail;
 	}
 	if (dev_part > 0)
-		rc = asprintf(&zvol_name_part, "%s-part%d", zvol_name,
+		ret = asprintf(&zvol_name_part, "%s-part%d", zvol_name,
 		    dev_part);
 	else
-		rc = asprintf(&zvol_name_part, "%s", zvol_name);
+		ret = asprintf(&zvol_name_part, "%s", zvol_name);
 
-	if (rc == -1 || zvol_name_part == NULL)
-		goto error;
+	if (ret == -1 || zvol_name_part == NULL)
+		goto fail;
 
 	for (i = 0; i < strlen(zvol_name_part); i++) {
 		if (isblank(zvol_name_part[i]))
@@ -103,8 +102,13 @@
 	}
 
 	printf("%s\n", zvol_name_part);
-	free(zvol_name_part);
-error:
-	close(fd);
-	return (error);
+	status = EXIT_SUCCESS;
+
+fail:
+	if (zvol_name_part)
+		free(zvol_name_part);
+	if (fd >= 0)
+		close(fd);
+
+	return (status);
 }

diff --git a/zfs/cmd/zvol_wait/Makefile.am b/zfs/cmd/zvol_wait/Makefile.am
index 564031c..2e5bf33 100644
--- a/zfs/cmd/zvol_wait/Makefile.am
+++ b/zfs/cmd/zvol_wait/Makefile.am

@@ -1 +1,3 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 dist_bin_SCRIPTS = zvol_wait

diff --git a/zfs/cmd/zvol_wait/zvol_wait b/zfs/cmd/zvol_wait/zvol_wait
index e5df82d..0b2a8a3 100755
--- a/zfs/cmd/zvol_wait/zvol_wait
+++ b/zfs/cmd/zvol_wait/zvol_wait

@@ -9,41 +9,44 @@
 }
 
 filter_out_zvols_with_links() {
-	while read -r zvol; do
-		if [ ! -L "/dev/zvol/$zvol" ]; then
+	echo "$zvols" | tr ' ' '+' | while read -r zvol; do
+		if ! [ -L "/dev/zvol/$zvol" ]; then
 			echo "$zvol"
 		fi
-	done
+	done | tr '+' ' '
 }
 
 filter_out_deleted_zvols() {
-	while read -r zvol; do
-		if zfs list "$zvol" >/dev/null 2>&1; then
-			echo "$zvol"
-		fi
-	done
+	OIFS="$IFS"
+	IFS="
+"
+	# shellcheck disable=SC2086
+	zfs list -H -o name $zvols 2>/dev/null
+	IFS="$OIFS"
 }
 
 list_zvols() {
-	zfs list -t volume -H -o name,volmode,receive_resume_token |
-		while read -r zvol_line; do
-		name=$(echo "$zvol_line" | awk '{print $1}')
-		volmode=$(echo "$zvol_line" | awk '{print $2}')
-		token=$(echo "$zvol_line" | awk '{print $3}')
-		#
-		# /dev links are not created for zvols with volmode = "none".
-		#
+	read -r default_volmode < /sys/module/zfs/parameters/zvol_volmode
+	zfs list -t volume -H -o \
+	    name,volmode,receive_resume_token,redact_snaps,keystatus |
+	    while IFS="	" read -r name volmode token redacted keystatus; do # IFS=\t here!
+
+		# /dev links are not created for zvols with volmode = "none",
+		# redacted zvols, or encrypted zvols for which the key has not
+		# been loaded.
 		[ "$volmode" = "none" ] && continue
-		#
-		# We also also ignore partially received zvols if it is
+		[ "$volmode" = "default" ] && [ "$default_volmode" = "3" ] &&
+		    continue
+		[ "$redacted" = "-" ] || continue
+		[ "$keystatus" = "unavailable" ] && continue
+
+		# We also ignore partially received zvols if it is
 		# not an incremental receive, as those won't even have a block
 		# device minor node created yet.
-		#
 		if [ "$token" != "-" ]; then
-			#
+
 			# Incremental receives create an invisible clone that
 			# is not automatically displayed by zfs list.
-			#
 			if ! zfs list "$name/%recv" >/dev/null 2>&1; then
 				continue
 			fi
@@ -71,7 +74,7 @@
 	while [ "$inner_loop" -lt 30 ]; do
 		inner_loop=$((inner_loop + 1))
 
-		zvols="$(echo "$zvols" | filter_out_zvols_with_links)"
+		zvols="$(filter_out_zvols_with_links)"
 
 		zvols_count=$(count_zvols)
 		if [ "$zvols_count" -eq 0 ]; then
@@ -91,7 +94,7 @@
 		echo "No progress since last loop."
 		echo "Checking if any zvols were deleted."
 
-		zvols=$(echo "$zvols" | filter_out_deleted_zvols)
+		zvols=$(filter_out_deleted_zvols)
 		zvols_count=$(count_zvols)
 
 		if [ "$old_zvols_count" -ne "$zvols_count" ]; then
@@ -106,6 +109,13 @@
 			exit 0
 		fi
 	fi
+
+	#
+	# zvol_count made some progress - let's stay in this loop.
+	#
+	if [ "$old_zvols_count" -gt "$zvols_count" ]; then
+		outer_loop=$((outer_loop - 1))
+	fi
 done
 
 echo "Timed out waiting on zvol links"

diff --git a/zfs/config/Abigail.am b/zfs/config/Abigail.am
new file mode 100644
index 0000000..94687b9
--- /dev/null
+++ b/zfs/config/Abigail.am

@@ -0,0 +1,33 @@
+#
+# When performing an ABI check the following options are applied:
+#
+# --no-unreferenced-symbols: Exclude symbols which are not referenced by
+# any debug information.  Without this _init() and _fini() are incorrectly
+# reported on CentOS7 for libuutil.so.
+#
+# --headers-dir1: Limit ABI checks to public OpenZFS headers, otherwise
+# changes in public system headers are also reported.
+#
+# --suppressions: Honor a suppressions file for each library to provide
+# a mechanism for suppressing harmless warnings.
+#
+
+PHONY += checkabi storeabi
+
+checkabi:
+	for lib in $(lib_LTLIBRARIES) ; do \
+		abidiff --no-unreferenced-symbols \
+		    --headers-dir1 ../../include \
+		    --suppressions $${lib%.la}.suppr \
+		    $${lib%.la}.abi .libs/$${lib%.la}.so ; \
+	done
+
+storeabi:
+	cd .libs ; \
+	for lib in $(lib_LTLIBRARIES) ; do \
+		abidw --no-show-locs \
+		--no-corpus-path \
+		--no-comp-dir-path \
+		--type-id-style hash \
+		$${lib%.la}.so > ../$${lib%.la}.abi ; \
+	done

diff --git a/zfs/config/CppCheck.am b/zfs/config/CppCheck.am
new file mode 100644
index 0000000..e53013b
--- /dev/null
+++ b/zfs/config/CppCheck.am

@@ -0,0 +1,11 @@
+#
+# Default rules for running cppcheck against the user space components.
+#
+
+PHONY += cppcheck
+
+CPPCHECKFLAGS  = --std=c99 --quiet --max-configs=1 --error-exitcode=2
+CPPCHECKFLAGS += --inline-suppr -U_KERNEL
+
+cppcheck:
+	$(CPPCHECK) -j$(CPU_COUNT) $(CPPCHECKFLAGS) $(DEFAULT_INCLUDES) $(SOURCES)

diff --git a/zfs/config/Rules.am b/zfs/config/Rules.am
index 1e569d3..3b24e36 100644
--- a/zfs/config/Rules.am
+++ b/zfs/config/Rules.am

@@ -3,28 +3,62 @@
 # should include these rules and override or extend them as needed.
 #
 
-DEFAULT_INCLUDES = -include ${top_builddir}/zfs_config.h
+PHONY =
+DEFAULT_INCLUDES = \
+	-include $(top_builddir)/zfs_config.h \
+	-I$(top_builddir)/include \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/module/icp/include \
+	-I$(top_srcdir)/lib/libspl/include
+
+if BUILD_LINUX
+DEFAULT_INCLUDES += \
+	-I$(top_srcdir)/lib/libspl/include/os/linux
+endif
+
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += \
+	-I$(top_srcdir)/lib/libspl/include/os/freebsd
+endif
 
 AM_LIBTOOLFLAGS = --silent
 
-AM_CFLAGS  = -std=gnu99 -Wall -Wstrict-prototypes -fno-strict-aliasing
+AM_CFLAGS  = -std=gnu99 -Wall -Wstrict-prototypes -Wmissing-prototypes
+AM_CFLAGS += -fno-strict-aliasing
 AM_CFLAGS += $(NO_OMIT_FRAME_POINTER)
+AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
 AM_CFLAGS += $(DEBUG_CFLAGS)
 AM_CFLAGS += $(ASAN_CFLAGS)
-AM_CFLAGS += $(CODE_COVERAGE_CFLAGS)
+AM_CFLAGS += $(CODE_COVERAGE_CFLAGS) $(NO_FORMAT_ZERO_LENGTH)
+if BUILD_FREEBSD
+AM_CFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion
+AM_CFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h
+AM_CFLAGS += -I/usr/include -I/usr/local/include
+endif
 
 AM_CPPFLAGS  = -D_GNU_SOURCE
 AM_CPPFLAGS += -D_REENTRANT
 AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
 AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
-AM_CPPFLAGS += -DHAVE_LARGE_STACKS=1
-AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-linux-user\"
 AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
 AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
 AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
 AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\"
+AM_CPPFLAGS += -DPKGDATADIR=\"$(pkgdatadir)\"
 AM_CPPFLAGS += $(DEBUG_CPPFLAGS)
 AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS)
+if BUILD_LINUX
+AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-linux-user\"
+endif
+if BUILD_FREEBSD
+AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-freebsd-user\"
+endif
 
 AM_LDFLAGS  = $(DEBUG_LDFLAGS)
 AM_LDFLAGS += $(ASAN_LDFLAGS)
+
+if BUILD_FREEBSD
+AM_LDFLAGS += -fstack-protector-strong -shared
+AM_LDFLAGS += -Wl,-x -Wl,--fatal-warnings -Wl,--warn-shared-textrel
+AM_LDFLAGS += -lm
+endif

diff --git a/zfs/config/Shellcheck.am b/zfs/config/Shellcheck.am
new file mode 100644
index 0000000..6b805b7
--- /dev/null
+++ b/zfs/config/Shellcheck.am

@@ -0,0 +1,22 @@
+.PHONY: shellcheck
+shellcheck: $(SCRIPTS) $(SHELLCHECKSCRIPTS)
+if HAVE_SHELLCHECK
+	[ -z "$(SCRIPTS)$(SHELLCHECKSCRIPTS)" ] && exit; shellcheck $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") --exclude=SC1090,SC1091$(SHELLCHECK_IGNORE) --format=gcc $(SCRIPTS) $(SHELLCHECKSCRIPTS)
+else
+	@[ -z "$(SCRIPTS)$(SHELLCHECKSCRIPTS)" ] && exit; echo "skipping shellcheck of" $(SCRIPTS) $(SHELLCHECKSCRIPTS) "because shellcheck is not installed"
+endif
+	@set -e; for dir in $(SHELLCHECKDIRS); do $(MAKE) -C $$dir shellcheck; done
+
+
+# command -v *is* specified by POSIX and every shell in existence supports it
+.PHONY: checkbashisms
+checkbashisms: $(SCRIPTS) $(SHELLCHECKSCRIPTS)
+if HAVE_CHECKBASHISMS
+	[ -z "$(SCRIPTS)$(SHELLCHECKSCRIPTS)" ] && exit; ! if [ -z "$(SHELLCHECK_SHELL)" ]; then \
+	    checkbashisms -npx $(SCRIPTS) $(SHELLCHECKSCRIPTS); else \
+	    for f in $(SCRIPTS) $(SHELLCHECKSCRIPTS); do echo $$f >&3; { echo '#!/bin/$(SHELLCHECK_SHELL)'; cat $$f; } | checkbashisms -npx; done; \
+	fi 3>&2 2>&1 | grep -vFe "'command' with option other than -p" -e 'command -v' $(CHECKBASHISMS_IGNORE) >&2
+else
+	@[ -z "$(SCRIPTS)$(SHELLCHECKSCRIPTS)" ] && exit; echo "skipping checkbashisms of" $(SCRIPTS) $(SHELLCHECKSCRIPTS) "because checkbashisms is not installed"
+endif
+	@set -e; for dir in $(SHELLCHECKDIRS); do $(MAKE) -C $$dir checkbashisms; done

diff --git a/zfs/config/Substfiles.am b/zfs/config/Substfiles.am
new file mode 100644
index 0000000..911903e
--- /dev/null
+++ b/zfs/config/Substfiles.am

@@ -0,0 +1,36 @@
+subst_sed_cmd = \
+	-e 's|@bindir[@]|$(bindir)|g' \
+	-e 's|@sbindir[@]|$(sbindir)|g' \
+	-e 's|@datadir[@]|$(datadir)|g' \
+	-e 's|@sysconfdir[@]|$(sysconfdir)|g' \
+	-e 's|@runstatedir[@]|$(runstatedir)|g' \
+	-e 's|@initconfdir[@]|$(initconfdir)|g' \
+	-e 's|@initdir[@]|$(initdir)|g' \
+	-e 's|@mounthelperdir[@]|$(mounthelperdir)|g' \
+	-e 's|@systemdgeneratordir[@]|$(systemdgeneratordir)|g' \
+	-e 's|@systemdunitdir[@]|$(systemdunitdir)|g' \
+	-e 's|@udevdir[@]|$(udevdir)|g' \
+	-e 's|@udevruledir[@]|$(udevruledir)|g' \
+	-e 's|@zfsexecdir[@]|$(zfsexecdir)|g' \
+	-e 's|@PYTHON[@]|$(PYTHON)|g' \
+	-e 's|@PYTHON_SHEBANG[@]|$(PYTHON_SHEBANG)|g' \
+	-e 's|@DEFAULT_INIT_NFS_SERVER[@]|$(DEFAULT_INIT_NFS_SERVER)|g' \
+	-e 's|@DEFAULT_INIT_SHELL[@]|$(DEFAULT_INIT_SHELL)|g' \
+	-e 's|@LIBFETCH_DYNAMIC[@]|$(LIBFETCH_DYNAMIC)|g' \
+	-e 's|@LIBFETCH_SONAME[@]|$(LIBFETCH_SONAME)|g'
+
+SUBSTFILES =
+CLEANFILES = $(SUBSTFILES)
+EXTRA_DIST = $(SUBSTFILES:=.in)
+
+$(SUBSTFILES):%:%.in Makefile
+	$(AM_V_GEN)set -e; \
+	$(MKDIR_P) $$(dirname $@); \
+	$(RM) $@~; \
+	$(SED) $(subst_sed_cmd) $< >$@~; \
+	if grep -E '@[a-zA-Z0-9_]+@' $@~ >&2; then \
+		echo "Undefined substitution" >&2; \
+		exit 1; \
+	else test $$? -eq 1; fi; \
+	test -x $< && chmod +x $@~; \
+	mv -f $@~ $@

diff --git a/zfs/config/always-arch.m4 b/zfs/config/always-arch.m4
index c3e6b4a..25e8c96 100644
--- a/zfs/config/always-arch.m4
+++ b/zfs/config/always-arch.m4

@@ -1,22 +1,41 @@
 dnl #
-dnl # Set the target arch for libspl atomic implementation and the icp
+dnl # Set the target cpu architecture.  This allows the
+dnl # following syntax to be used in a Makefile.am.
+dnl #
+dnl # ifeq ($(TARGET_CPU),x86_64)
+dnl # ...
+dnl # endif
+dnl #
+dnl # if TARGET_CPU_POWERPC
+dnl # ...
+dnl # else
+dnl # ...
+dnl # endif
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
-	AC_MSG_CHECKING(for target asm dir)
-	TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
-
-	case $TARGET_ARCH in
-	i386|x86_64)
-		TARGET_ASM_DIR=asm-${TARGET_ARCH}
+	case $target_cpu in
+	i?86)
+		TARGET_CPU=i386
 		;;
-	*)
-		TARGET_ASM_DIR=asm-generic
+	amd64|x86_64)
+		TARGET_CPU=x86_64
+		;;
+	powerpc*)
+		TARGET_CPU=powerpc
+		;;
+	aarch64*)
+		TARGET_CPU=aarch64
+		;;
+	sparc64)
+		TARGET_CPU=sparc64
 		;;
 	esac
 
-	AC_SUBST([TARGET_ASM_DIR])
-	AM_CONDITIONAL([TARGET_ASM_X86_64], test $TARGET_ASM_DIR = asm-x86_64)
-	AM_CONDITIONAL([TARGET_ASM_I386], test $TARGET_ASM_DIR = asm-i386)
-	AM_CONDITIONAL([TARGET_ASM_GENERIC], test $TARGET_ASM_DIR = asm-generic)
-	AC_MSG_RESULT([$TARGET_ASM_DIR])
+	AC_SUBST(TARGET_CPU)
+
+	AM_CONDITIONAL([TARGET_CPU_I386], test $TARGET_CPU = i386)
+	AM_CONDITIONAL([TARGET_CPU_X86_64], test $TARGET_CPU = x86_64)
+	AM_CONDITIONAL([TARGET_CPU_POWERPC], test $TARGET_CPU = powerpc)
+	AM_CONDITIONAL([TARGET_CPU_AARCH64], test $TARGET_CPU = aarch64)
+	AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64)
 ])

diff --git a/zfs/config/always-compiler-options.m4 b/zfs/config/always-compiler-options.m4
index e187f6f..0f66db5 100644
--- a/zfs/config/always-compiler-options.m4
+++ b/zfs/config/always-compiler-options.m4

@@ -22,7 +22,7 @@
 	AS_IF([ test "$enable_asan" = "yes" ], [
 		AC_MSG_CHECKING([whether $CC supports -fsanitize=address])
 		saved_cflags="$CFLAGS"
-		CFLAGS="$CFLAGS -fsanitize=address"
+		CFLAGS="$CFLAGS -Werror -fsanitize=address"
 		AC_LINK_IFELSE([
 			AC_LANG_SOURCE([[ int main() { return 0; } ]])
 		], [
@@ -52,7 +52,7 @@
 	AC_MSG_CHECKING([whether $CC supports -Wframe-larger-than=<size>])
 
 	saved_flags="$CFLAGS"
-	CFLAGS="$CFLAGS -Wframe-larger-than=4096"
+	CFLAGS="$CFLAGS -Werror -Wframe-larger-than=4096"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		FRAME_LARGER_THAN="-Wframe-larger-than=4096"
@@ -73,7 +73,7 @@
 	AC_MSG_CHECKING([whether $CC supports -Wno-format-truncation])
 
 	saved_flags="$CFLAGS"
-	CFLAGS="$CFLAGS -Wno-format-truncation"
+	CFLAGS="$CFLAGS -Werror -Wno-format-truncation"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_FORMAT_TRUNCATION=-Wno-format-truncation
@@ -87,57 +87,97 @@
 	AC_SUBST([NO_FORMAT_TRUNCATION])
 ])
 
-
 dnl #
-dnl # Check if gcc supports -Wno-bool-compare option.
+dnl # Check if gcc supports -Wno-format-zero-length option.
 dnl #
-dnl # We actually invoke gcc with the -Wbool-compare option
-dnl # and infer the 'no-' version does or doesn't exist based upon
-dnl # the results.  This is required because when checking any of
-dnl # no- prefixed options gcc always returns success.
-dnl #
-AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_BOOL_COMPARE], [
-	AC_MSG_CHECKING([whether $CC supports -Wno-bool-compare])
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [
+	AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length])
 
 	saved_flags="$CFLAGS"
-	CFLAGS="$CFLAGS -Wbool-compare"
+	CFLAGS="$CFLAGS -Werror -Wno-format-zero-length"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
-		NO_BOOL_COMPARE=-Wno-bool-compare
+		NO_FORMAT_ZERO_LENGTH=-Wno-format-zero-length
 		AC_MSG_RESULT([yes])
 	], [
-		NO_BOOL_COMPARE=
+		NO_FORMAT_ZERO_LENGTH=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
-	AC_SUBST([NO_BOOL_COMPARE])
+	AC_SUBST([NO_FORMAT_ZERO_LENGTH])
 ])
 
 dnl #
-dnl # Check if gcc supports -Wno-unused-but-set-variable option.
+dnl # Check if gcc supports -Wno-clobbered option.
 dnl #
-dnl # We actually invoke gcc with the -Wunused-but-set-variable option
+dnl # We actually invoke gcc with the -Wclobbered option
 dnl # and infer the 'no-' version does or doesn't exist based upon
 dnl # the results.  This is required because when checking any of
 dnl # no- prefixed options gcc always returns success.
 dnl #
-AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_UNUSED_BUT_SET_VARIABLE], [
-	AC_MSG_CHECKING([whether $CC supports -Wno-unused-but-set-variable])
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED], [
+	AC_MSG_CHECKING([whether $CC supports -Wno-clobbered])
 
 	saved_flags="$CFLAGS"
-	CFLAGS="$CFLAGS -Wunused-but-set-variable"
+	CFLAGS="$CFLAGS -Werror -Wclobbered"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
-		NO_UNUSED_BUT_SET_VARIABLE=-Wno-unused-but-set-variable
+		NO_CLOBBERED=-Wno-clobbered
 		AC_MSG_RESULT([yes])
 	], [
-		NO_UNUSED_BUT_SET_VARIABLE=
+		NO_CLOBBERED=
 		AC_MSG_RESULT([no])
 	])
 
 	CFLAGS="$saved_flags"
-	AC_SUBST([NO_UNUSED_BUT_SET_VARIABLE])
+	AC_SUBST([NO_CLOBBERED])
+])
+
+dnl #
+dnl # Check if gcc supports -Wimplicit-fallthrough option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [
+	AC_MSG_CHECKING([whether $CC supports -Wimplicit-fallthrough])
+
+	saved_flags="$CFLAGS"
+	CFLAGS="$CFLAGS -Werror -Wimplicit-fallthrough"
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		IMPLICIT_FALLTHROUGH=-Wimplicit-fallthrough
+		AC_DEFINE([HAVE_IMPLICIT_FALLTHROUGH], 1,
+			[Define if compiler supports -Wimplicit-fallthrough])
+		AC_MSG_RESULT([yes])
+	], [
+		IMPLICIT_FALLTHROUGH=
+		AC_MSG_RESULT([no])
+	])
+
+	CFLAGS="$saved_flags"
+	AC_SUBST([IMPLICIT_FALLTHROUGH])
+])
+
+dnl #
+dnl # Check if cc supports -Winfinite-recursion option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION], [
+	AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion])
+
+	saved_flags="$CFLAGS"
+	CFLAGS="$CFLAGS -Werror -Winfinite-recursion"
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		INFINITE_RECURSION=-Winfinite-recursion
+		AC_DEFINE([HAVE_INFINITE_RECURSION], 1,
+			[Define if compiler supports -Winfinite-recursion])
+		AC_MSG_RESULT([yes])
+	], [
+		INFINITE_RECURSION=
+		AC_MSG_RESULT([no])
+	])
+
+	CFLAGS="$saved_flags"
+	AC_SUBST([INFINITE_RECURSION])
 ])
 
 dnl #
@@ -147,7 +187,7 @@
 	AC_MSG_CHECKING([whether $CC supports -fno-omit-frame-pointer])
 
 	saved_flags="$CFLAGS"
-	CFLAGS="$CFLAGS -fno-omit-frame-pointer"
+	CFLAGS="$CFLAGS -Werror -fno-omit-frame-pointer"
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
 		NO_OMIT_FRAME_POINTER=-fno-omit-frame-pointer
@@ -160,3 +200,55 @@
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_OMIT_FRAME_POINTER])
 ])
+
+dnl #
+dnl # Check if cc supports -fno-ipa-sra option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [
+	AC_MSG_CHECKING([whether $CC supports -fno-ipa-sra])
+
+	saved_flags="$CFLAGS"
+	CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		NO_IPA_SRA=-fno-ipa-sra
+		AC_MSG_RESULT([yes])
+	], [
+		NO_IPA_SRA=
+		AC_MSG_RESULT([no])
+	])
+
+	CFLAGS="$saved_flags"
+	AC_SUBST([NO_IPA_SRA])
+])
+
+dnl #
+dnl # Check if kernel cc supports -fno-ipa-sra option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
+	AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
+
+	saved_cc="$CC"
+	saved_flags="$CFLAGS"
+	CC="gcc"
+	CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
+
+	AS_IF([ test -n "$KERNEL_CC" ], [
+		CC="$KERNEL_CC"
+	])
+	AS_IF([ test -n "$KERNEL_LLVM" ], [
+		CC="clang"
+	])
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		KERNEL_NO_IPA_SRA=-fno-ipa-sra
+		AC_MSG_RESULT([yes])
+	], [
+		KERNEL_NO_IPA_SRA=
+		AC_MSG_RESULT([no])
+	])
+
+	CC="$saved_cc"
+	CFLAGS="$saved_flags"
+	AC_SUBST([KERNEL_NO_IPA_SRA])
+])

diff --git a/zfs/config/always-cppcheck.m4 b/zfs/config/always-cppcheck.m4
new file mode 100644
index 0000000..c7c134a
--- /dev/null
+++ b/zfs/config/always-cppcheck.m4

@@ -0,0 +1,6 @@
+dnl #
+dnl # Check if cppcheck is available.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CPPCHECK], [
+	AC_CHECK_PROG([CPPCHECK], [cppcheck], [cppcheck])
+])

diff --git a/zfs/config/always-parallel.m4 b/zfs/config/always-parallel.m4
new file mode 100644
index 0000000..c1f1ae7
--- /dev/null
+++ b/zfs/config/always-parallel.m4

@@ -0,0 +1,8 @@
+dnl #
+dnl # Check if GNU parallel is available.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PARALLEL], [
+	AC_CHECK_PROG([PARALLEL], [parallel], [yes])
+
+	AM_CONDITIONAL([HAVE_PARALLEL], [test "x$PARALLEL" = "xyes"])
+])

diff --git a/zfs/config/always-python.m4 b/zfs/config/always-python.m4
index c1c0759..5a20081 100644
--- a/zfs/config/always-python.m4
+++ b/zfs/config/always-python.m4

@@ -1,21 +1,18 @@
 dnl #
 dnl # The majority of the python scripts are written to be compatible
-dnl # with Python 2.6 and Python 3.4.  Therefore, they may be installed
-dnl # and used with either interpreter.  This option is intended to
+dnl # with Python 3.6. This option is intended to
 dnl # to provide a method to specify the default system version, and
 dnl # set the PYTHON environment variable accordingly.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [
 	AC_ARG_WITH([python],
-		AC_HELP_STRING([--with-python[=VERSION]],
+		AS_HELP_STRING([--with-python[=VERSION]],
 		[default system python version @<:@default=check@:>@]),
 		[with_python=$withval],
 		[with_python=check])
 
 	AS_CASE([$with_python],
-		[check], [AC_CHECK_PROGS([PYTHON], [python3 python2], [:])],
-		[2*], [PYTHON="python${with_python}"],
-		[*python2*], [PYTHON="${with_python}"],
+		[check], [AC_CHECK_PROGS([PYTHON], [python3], [:])],
 		[3*], [PYTHON="python${with_python}"],
 		[*python3*], [PYTHON="${with_python}"],
 		[no], [PYTHON=":"],
@@ -23,31 +20,25 @@
 	)
 
 	dnl #
-	dnl # Minimum supported Python versions for utilities:
-	dnl # Python 2.6 or Python 3.4
+	dnl # Minimum supported Python versions for utilities: Python 3.6
 	dnl #
 	AM_PATH_PYTHON([], [], [:])
 	AS_IF([test -z "$PYTHON_VERSION"], [
-		PYTHON_VERSION=$(basename $PYTHON | tr -cd 0-9.)
+		PYTHON_VERSION=$(echo ${PYTHON##*/} | tr -cd 0-9.)
 	])
 	PYTHON_MINOR=${PYTHON_VERSION#*\.}
 
 	AS_CASE([$PYTHON_VERSION],
-		[2.*], [
-			AS_IF([test $PYTHON_MINOR -lt 6],
-				[AC_MSG_ERROR("Python >= 2.6 is required")])
-		],
 		[3.*], [
-			AS_IF([test $PYTHON_MINOR -lt 4],
-				[AC_MSG_ERROR("Python >= 3.4 is required")])
+			AS_IF([test $PYTHON_MINOR -lt 6],
+				[AC_MSG_ERROR("Python >= 3.6 is required")])
 		],
 		[:|2|3], [],
 		[PYTHON_VERSION=3]
 	)
 
 	AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :])
-	AM_CONDITIONAL([USING_PYTHON_2], [test "x${PYTHON_VERSION%%\.*}" = x2])
-	AM_CONDITIONAL([USING_PYTHON_3], [test "x${PYTHON_VERSION%%\.*}" = x3])
+	AC_SUBST([PYTHON_SHEBANG], [python3])
 
 	dnl #
 	dnl # Request that packages be built for a specific Python version.

diff --git a/zfs/config/always-pyzfs.m4 b/zfs/config/always-pyzfs.m4
index f620a8f..9b123b1 100644
--- a/zfs/config/always-pyzfs.m4
+++ b/zfs/config/always-pyzfs.m4

@@ -6,7 +6,7 @@
 dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS.
 dnl #
 AC_DEFUN([ZFS_AC_PYTHON_MODULE], [
-	PYTHON_NAME=$(basename $PYTHON)
+	PYTHON_NAME=${PYTHON##*/}
 	AC_MSG_CHECKING([for $PYTHON_NAME module: $1])
 	AS_IF([$PYTHON -c "import $1" 2>/dev/null], [
 		AC_MSG_RESULT(yes)
@@ -18,11 +18,11 @@
 ])
 
 dnl #
-dnl # Determines if pyzfs can be built, requires Python 2.7 or later.
+dnl # Determines if pyzfs can be built, requires Python 3.6 or later.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [
 	AC_ARG_ENABLE([pyzfs],
-		AC_HELP_STRING([--enable-pyzfs],
+		AS_HELP_STRING([--enable-pyzfs],
 		[install libzfs_core python bindings @<:@default=check@:>@]),
 		[enable_pyzfs=$enableval],
 		[enable_pyzfs=check])
@@ -47,12 +47,36 @@
 	AC_SUBST(DEFINE_PYZFS)
 
 	dnl #
-	dnl # Require python-devel libraries
+	dnl # Autodetection disables pyzfs if kernel or srpm config
+	dnl #
+	AS_IF([test "x$enable_pyzfs" = xcheck], [
+		AS_IF([test "x$ZFS_CONFIG" = xkernel -o "x$ZFS_CONFIG" = xsrpm ], [
+				enable_pyzfs=no
+				AC_MSG_NOTICE([Disabling pyzfs for kernel/srpm config])
+		])
+	])
+
+	dnl #
+	dnl # Python "packaging" (or, failing that, "distlib") module is required to build and install pyzfs
+	dnl #
+	AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [
+		ZFS_AC_PYTHON_MODULE([packaging], [], [
+			ZFS_AC_PYTHON_MODULE([distlib], [], [
+				AS_IF([test "x$enable_pyzfs" = xyes], [
+					AC_MSG_ERROR("Python $PYTHON_VERSION packaging and distlib modules are not installed")
+				], [test "x$enable_pyzfs" != xno], [
+					enable_pyzfs=no
+				])
+			])
+		])
+	])
+
+	dnl #
+	dnl # Require python3-devel libraries
 	dnl #
 	AS_IF([test "x$enable_pyzfs" = xcheck  -o "x$enable_pyzfs" = xyes], [
 		AS_CASE([$PYTHON_VERSION],
-			[3.*], [PYTHON_REQUIRED_VERSION=">= '3.4.0'"],
-			[2.*], [PYTHON_REQUIRED_VERSION=">= '2.7.0'"],
+			[3.*], [PYTHON_REQUIRED_VERSION=">= '3.6.0'"],
 			[AC_MSG_ERROR("Python $PYTHON_VERSION unknown")]
 		)
 

diff --git a/zfs/config/always-sed.m4 b/zfs/config/always-sed.m4
new file mode 100644
index 0000000..3d7ae28
--- /dev/null
+++ b/zfs/config/always-sed.m4

@@ -0,0 +1,16 @@
+dnl #
+dnl # Set the flags used for sed in-place edits.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_SED], [
+	AC_REQUIRE([AC_PROG_SED])dnl
+	AC_CACHE_CHECK([for sed --in-place], [ac_cv_inplace], [
+		tmpfile=$(mktemp conftest.XXXXXX)
+		echo foo >$tmpfile
+		AS_IF([$SED --in-place 's#foo#bar#' $tmpfile 2>/dev/null],
+		      [ac_cv_inplace="--in-place"],
+		      [$SED -i '' 's#foo#bar#' $tmpfile 2>/dev/null],
+		      [ac_cv_inplace="-i ''"],
+		      [AC_MSG_ERROR([$SED does not support in-place])])
+	])
+	AC_SUBST([ac_inplace], [$ac_cv_inplace])
+])

diff --git a/zfs/config/always-shellcheck.m4 b/zfs/config/always-shellcheck.m4
new file mode 100644
index 0000000..2a9a099
--- /dev/null
+++ b/zfs/config/always-shellcheck.m4

@@ -0,0 +1,10 @@
+dnl #
+dnl # Check if shellcheck and/or checkbashisms are available.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_SHELLCHECK], [
+	AC_CHECK_PROG([SHELLCHECK], [shellcheck], [yes])
+	AC_CHECK_PROG([CHECKBASHISMS], [checkbashisms], [yes])
+
+	AM_CONDITIONAL([HAVE_SHELLCHECK], [test "x$SHELLCHECK" = "xyes"])
+	AM_CONDITIONAL([HAVE_CHECKBASHISMS], [test "x$CHECKBASHISMS" = "xyes"])
+])

diff --git a/zfs/config/always-system.m4 b/zfs/config/always-system.m4
new file mode 100644
index 0000000..3225a52
--- /dev/null
+++ b/zfs/config/always-system.m4

@@ -0,0 +1,26 @@
+dnl #
+dnl # Set the target system
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_SYSTEM], [
+	AC_MSG_CHECKING([for system type ($host_os)])
+	case $host_os in
+		*linux*)
+			AC_DEFINE([SYSTEM_LINUX], [1],
+				[True if ZFS is to be compiled for a Linux system])
+			ac_system="Linux"
+			;;
+		*freebsd*)
+			AC_DEFINE([SYSTEM_FREEBSD], [1],
+				[True if ZFS is to be compiled for a FreeBSD system])
+			ac_system="FreeBSD"
+			;;
+		*)
+			ac_system="unknown"
+			;;
+	esac
+	AC_MSG_RESULT([$ac_system])
+	AC_SUBST([ac_system])
+
+	AM_CONDITIONAL([BUILD_LINUX], [test "x$ac_system" = "xLinux"])
+	AM_CONDITIONAL([BUILD_FREEBSD], [test "x$ac_system" = "xFreeBSD"])
+])

diff --git a/zfs/config/ax_code_coverage.m4 b/zfs/config/ax_code_coverage.m4
index 5cdfe14..3e3c666 100644
--- a/zfs/config/ax_code_coverage.m4
+++ b/zfs/config/ax_code_coverage.m4

@@ -142,7 +142,7 @@
 ']
 		[CODE_COVERAGE_RULES_CAPTURE='
 	$(code_coverage_v_lcov_cap)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --capture --output-file "$(CODE_COVERAGE_OUTPUT_FILE).tmp" --test-name "$(call code_coverage_sanitize,$(PACKAGE_NAME)-$(PACKAGE_VERSION))" --no-checksum --compat-libtool $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_OPTIONS)
-	$(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" "/tmp/*" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS)
+	$(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS)
 	-@rm -f $(CODE_COVERAGE_OUTPUT_FILE).tmp
 	$(code_coverage_v_genhtml)LANG=C $(GENHTML) $(code_coverage_quiet) $(addprefix --prefix ,$(CODE_COVERAGE_DIRECTORY)) --output-directory "$(CODE_COVERAGE_OUTPUT_DIRECTORY)" --title "$(PACKAGE_NAME)-$(PACKAGE_VERSION) Code Coverage" --legend --show-details "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_GENHTML_OPTIONS)
 	@echo "file://$(abs_builddir)/$(CODE_COVERAGE_OUTPUT_DIRECTORY)/index.html"
@@ -219,7 +219,11 @@
 $(if $(CODE_COVERAGE_BRANCH_COVERAGE),\
 --rc genhtml_branch_coverage=$(CODE_COVERAGE_BRANCH_COVERAGE))
 CODE_COVERAGE_GENHTML_OPTIONS ?= $(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT)
-CODE_COVERAGE_IGNORE_PATTERN ?=
+
+# Add any folders you want to ignore here
+# Ignore tmp and tests themselves
+CODE_COVERAGE_IGNORE_PATTERN ?= "/tmp/*" "*/tests/*"
+CODE_COVERAGE_IGNORE_PATTERN += "*/module/zstd/lib/*"
 
 GITIGNOREFILES ?=
 GITIGNOREFILES += $(CODE_COVERAGE_OUTPUT_FILE) $(CODE_COVERAGE_OUTPUT_DIRECTORY)

diff --git a/zfs/config/ax_count_cpus.m4 b/zfs/config/ax_count_cpus.m4
new file mode 100644
index 0000000..5db8925
--- /dev/null
+++ b/zfs/config/ax_count_cpus.m4

@@ -0,0 +1,101 @@
+# ===========================================================================
+#      https://www.gnu.org/software/autoconf-archive/ax_count_cpus.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_COUNT_CPUS([ACTION-IF-DETECTED],[ACTION-IF-NOT-DETECTED])
+#
+# DESCRIPTION
+#
+#   Attempt to count the number of logical processor cores (including
+#   virtual and HT cores) currently available to use on the machine and
+#   place detected value in CPU_COUNT variable.
+#
+#   On successful detection, ACTION-IF-DETECTED is executed if present. If
+#   the detection fails, then ACTION-IF-NOT-DETECTED is triggered. The
+#   default ACTION-IF-NOT-DETECTED is to set CPU_COUNT to 1.
+#
+# LICENSE
+#
+#   Copyright (c) 2014,2016 Karlson2k (Evgeny Grin) <k2k@narod.ru>
+#   Copyright (c) 2012 Brian Aker <brian@tangent.org>
+#   Copyright (c) 2008 Michael Paul Bailey <jinxidoru@byu.net>
+#   Copyright (c) 2008 Christophe Tournayre <turn3r@users.sourceforge.net>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 22
+
+  AC_DEFUN([AX_COUNT_CPUS],[dnl
+      AC_REQUIRE([AC_CANONICAL_HOST])dnl
+      AC_REQUIRE([AC_PROG_EGREP])dnl
+      AC_MSG_CHECKING([the number of available CPUs])
+      CPU_COUNT="0"
+
+      # Try generic methods
+
+      # 'getconf' is POSIX utility, but '_NPROCESSORS_ONLN' and
+      # 'NPROCESSORS_ONLN' are platform-specific
+      command -v getconf >/dev/null 2>&1 && \
+        CPU_COUNT=`getconf _NPROCESSORS_ONLN 2>/dev/null || getconf NPROCESSORS_ONLN 2>/dev/null` || CPU_COUNT="0"
+      AS_IF([[test "$CPU_COUNT" -gt "0" 2>/dev/null || ! command -v nproc >/dev/null 2>&1]],[[: # empty]],[dnl
+        # 'nproc' is part of GNU Coreutils and is widely available
+        CPU_COUNT=`OMP_NUM_THREADS='' nproc 2>/dev/null` || CPU_COUNT=`nproc 2>/dev/null` || CPU_COUNT="0"
+      ])dnl
+
+      AS_IF([[test "$CPU_COUNT" -gt "0" 2>/dev/null]],[[: # empty]],[dnl
+        # Try platform-specific preferred methods
+        AS_CASE([[$host_os]],dnl
+          [[*linux*]],[[CPU_COUNT=`lscpu -p 2>/dev/null | $EGREP -e '^@<:@0-9@:>@+,' -c` || CPU_COUNT="0"]],dnl
+          [[*darwin*]],[[CPU_COUNT=`sysctl -n hw.logicalcpu 2>/dev/null` || CPU_COUNT="0"]],dnl
+          [[freebsd*]],[[command -v sysctl >/dev/null 2>&1 && CPU_COUNT=`sysctl -n kern.smp.cpus 2>/dev/null` || CPU_COUNT="0"]],dnl
+          [[netbsd*]], [[command -v sysctl >/dev/null 2>&1 && CPU_COUNT=`sysctl -n hw.ncpuonline 2>/dev/null` || CPU_COUNT="0"]],dnl
+          [[solaris*]],[[command -v psrinfo >/dev/null 2>&1 && CPU_COUNT=`psrinfo 2>/dev/null | $EGREP -e '^@<:@0-9@:>@.*on-line' -c 2>/dev/null` || CPU_COUNT="0"]],dnl
+          [[mingw*]],[[CPU_COUNT=`ls -qpU1 /proc/registry/HKEY_LOCAL_MACHINE/HARDWARE/DESCRIPTION/System/CentralProcessor/ 2>/dev/null | $EGREP -e '^@<:@0-9@:>@+/' -c` || CPU_COUNT="0"]],dnl
+          [[msys*]],[[CPU_COUNT=`ls -qpU1 /proc/registry/HKEY_LOCAL_MACHINE/HARDWARE/DESCRIPTION/System/CentralProcessor/ 2>/dev/null | $EGREP -e '^@<:@0-9@:>@+/' -c` || CPU_COUNT="0"]],dnl
+          [[cygwin*]],[[CPU_COUNT=`ls -qpU1 /proc/registry/HKEY_LOCAL_MACHINE/HARDWARE/DESCRIPTION/System/CentralProcessor/ 2>/dev/null | $EGREP -e '^@<:@0-9@:>@+/' -c` || CPU_COUNT="0"]]dnl
+        )dnl
+      ])dnl
+
+      AS_IF([[test "$CPU_COUNT" -gt "0" 2>/dev/null || ! command -v sysctl >/dev/null 2>&1]],[[: # empty]],[dnl
+        # Try less preferred generic method
+        # 'hw.ncpu' exist on many platforms, but not on GNU/Linux
+        CPU_COUNT=`sysctl -n hw.ncpu 2>/dev/null` || CPU_COUNT="0"
+      ])dnl
+
+      AS_IF([[test "$CPU_COUNT" -gt "0" 2>/dev/null]],[[: # empty]],[dnl
+      # Try platform-specific fallback methods
+      # They can be less accurate and slower then preferred methods
+        AS_CASE([[$host_os]],dnl
+          [[*linux*]],[[CPU_COUNT=`$EGREP -e '^processor' -c /proc/cpuinfo 2>/dev/null` || CPU_COUNT="0"]],dnl
+          [[*darwin*]],[[CPU_COUNT=`system_profiler SPHardwareDataType 2>/dev/null | $EGREP -i -e 'number of cores:'|cut -d : -f 2 -s|tr -d ' '` || CPU_COUNT="0"]],dnl
+          [[freebsd*]],[[CPU_COUNT=`dmesg 2>/dev/null| $EGREP -e '^cpu@<:@0-9@:>@+: '|sort -u|$EGREP -e '^' -c` || CPU_COUNT="0"]],dnl
+          [[netbsd*]], [[CPU_COUNT=`command -v cpuctl >/dev/null 2>&1 && cpuctl list 2>/dev/null| $EGREP -e '^@<:@0-9@:>@+ .* online ' -c` || \
+                           CPU_COUNT=`dmesg 2>/dev/null| $EGREP -e '^cpu@<:@0-9@:>@+ at'|sort -u|$EGREP -e '^' -c` || CPU_COUNT="0"]],dnl
+          [[solaris*]],[[command -v kstat >/dev/null 2>&1 && CPU_COUNT=`kstat -m cpu_info -s state -p 2>/dev/null | $EGREP -c -e 'on-line'` || \
+                           CPU_COUNT=`kstat -m cpu_info 2>/dev/null | $EGREP -c -e 'module: cpu_info'` || CPU_COUNT="0"]],dnl
+          [[mingw*]],[AS_IF([[CPU_COUNT=`reg query 'HKLM\\Hardware\\Description\\System\\CentralProcessor' 2>/dev/null | $EGREP -e '\\\\@<:@0-9@:>@+$' -c`]],dnl
+                        [[: # empty]],[[test "$NUMBER_OF_PROCESSORS" -gt "0" 2>/dev/null && CPU_COUNT="$NUMBER_OF_PROCESSORS"]])],dnl
+          [[msys*]],[[test "$NUMBER_OF_PROCESSORS" -gt "0" 2>/dev/null && CPU_COUNT="$NUMBER_OF_PROCESSORS"]],dnl
+          [[cygwin*]],[[test "$NUMBER_OF_PROCESSORS" -gt "0" 2>/dev/null && CPU_COUNT="$NUMBER_OF_PROCESSORS"]]dnl
+        )dnl
+      ])dnl
+
+      AS_IF([[test "x$CPU_COUNT" != "x0" && test "$CPU_COUNT" -gt 0 2>/dev/null]],[dnl
+          AC_MSG_RESULT([[$CPU_COUNT]])
+          m4_ifvaln([$1],[$1],)dnl
+        ],[dnl
+          m4_ifval([$2],[dnl
+            AS_UNSET([[CPU_COUNT]])
+            AC_MSG_RESULT([[unable to detect]])
+            $2
+          ], [dnl
+            CPU_COUNT="1"
+            AC_MSG_RESULT([[unable to detect (assuming 1)]])
+          ])dnl
+        ])dnl
+      ])dnl

diff --git a/zfs/config/ax_python_devel.m4 b/zfs/config/ax_python_devel.m4
index c51b45b..9eef450 100644
--- a/zfs/config/ax_python_devel.m4
+++ b/zfs/config/ax_python_devel.m4

@@ -103,8 +103,7 @@
 	if test "$ac_supports_python_ver" != "True"; then
 		if test -z "$PYTHON_NOVERSIONCHECK"; then
 			AC_MSG_RESULT([no])
-			m4_ifvaln([$2],[$2],[
-				AC_MSG_FAILURE([
+			AC_MSG_FAILURE([
 This version of the AC@&t@_PYTHON_DEVEL macro
 doesn't work properly with versions of Python before
 2.1.0. You may need to re-run configure, setting the
@@ -113,7 +112,6 @@
 Moreover, to disable this check, set PYTHON_NOVERSIONCHECK
 to something else than an empty string.
 ])
-			])
 		else
 			AC_MSG_RESULT([skip at user request])
 		fi
@@ -122,25 +120,47 @@
 	fi
 
 	#
-	# if the macro parameter ``version'' is set, honour it
+	# If the macro parameter ``version'' is set, honour it.
+	# A Python shim class, VPy, is used to implement correct version comparisons via
+	# string expressions, since e.g. a naive textual ">= 2.7.3" won't work for
+	# Python 2.7.10 (the ".1" being evaluated as less than ".3").
 	#
 	if test -n "$1"; then
 		AC_MSG_CHECKING([for a version of Python $1])
-		ac_supports_python_ver=`$PYTHON -c "import sys; \
-			ver = sys.version.split ()[[0]]; \
+                cat << EOF > ax_python_devel_vpy.py
+class VPy:
+    def vtup(self, s):
+        return tuple(map(int, s.strip().replace("rc", ".").split(".")))
+    def __init__(self):
+        import sys
+        self.vpy = tuple(sys.version_info)
+    def __eq__(self, s):
+        return self.vpy == self.vtup(s)
+    def __ne__(self, s):
+        return self.vpy != self.vtup(s)
+    def __lt__(self, s):
+        return self.vpy < self.vtup(s)
+    def __gt__(self, s):
+        return self.vpy > self.vtup(s)
+    def __le__(self, s):
+        return self.vpy <= self.vtup(s)
+    def __ge__(self, s):
+        return self.vpy >= self.vtup(s)
+EOF
+		ac_supports_python_ver=`$PYTHON -c "import ax_python_devel_vpy; \
+                        ver = ax_python_devel_vpy.VPy(); \
 			print (ver $1)"`
+                rm -rf ax_python_devel_vpy*.py* __pycache__/ax_python_devel_vpy*.py*
 		if test "$ac_supports_python_ver" = "True"; then
-		   AC_MSG_RESULT([yes])
+			AC_MSG_RESULT([yes])
 		else
 			AC_MSG_RESULT([no])
-			m4_ifvaln([$2],[$2],[
-				AC_MSG_ERROR([this package requires Python $1.
+			AC_MSG_ERROR([this package requires Python $1.
 If you have it installed, but it isn't the default Python
 interpreter in your system path, please pass the PYTHON_VERSION
 variable to configure. See ``configure --help'' for reference.
 ])
-				PYTHON_VERSION=""
-			])
+			PYTHON_VERSION=""
 		fi
 	fi
 
@@ -148,8 +168,7 @@
 	# Check if you have distutils, else fail
 	#
 	AC_MSG_CHECKING([for the distutils Python package])
-	ac_distutils_result=`$PYTHON -c "import distutils" 2>&1`
-	if test $? -eq 0; then
+	if ac_distutils_result=`$PYTHON -c "import distutils" 2>&1`; then
 		AC_MSG_RESULT([yes])
 	else
 		AC_MSG_RESULT([no])
@@ -204,7 +223,7 @@
 				ac_python_version=$PYTHON_VERSION
 			else
 				ac_python_version=`$PYTHON -c "import sys; \
-					print (sys.version[[:3]])"`
+					print ('.'.join(sys.version.split('.')[[:2]]))"`
 			fi
 		fi
 

diff --git a/zfs/config/config.rpath b/zfs/config/config.rpath
old mode 100644
new mode 100755
index 7b9da3c..be202c1
--- a/zfs/config/config.rpath
+++ b/zfs/config/config.rpath

@@ -1 +1,684 @@
-# `make distclean` deletes files with size 0. This text is to avoid that.
+#! /bin/sh
+# Output a system dependent set of variables, describing how to set the
+# run time search path of shared libraries in an executable.
+#
+#   Copyright 1996-2019 Free Software Foundation, Inc.
+#   Taken from GNU libtool, 2001
+#   Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+#   This file is free software; the Free Software Foundation gives
+#   unlimited permission to copy and/or distribute it, with or without
+#   modifications, as long as this notice is preserved.
+#
+# The first argument passed to this file is the canonical host specification,
+#    CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or
+#    CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# The environment variables CC, GCC, LDFLAGS, LD, with_gnu_ld
+# should be set by the caller.
+#
+# The set of defined variables is at the end of this script.
+
+# Known limitations:
+# - On IRIX 6.5 with CC="cc", the run time search patch must not be longer
+#   than 256 bytes, otherwise the compiler driver will dump core. The only
+#   known workaround is to choose shorter directory names for the build
+#   directory and/or the installation directory.
+
+# All known linkers require a '.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+shrext=.so
+
+host="$1"
+host_cpu=`echo "$host" | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo "$host" | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo "$host" | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+# Code taken from libtool.m4's _LT_CC_BASENAME.
+
+for cc_temp in $CC""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`echo "$cc_temp" | sed -e 's%^.*/%%'`
+
+# Code taken from libtool.m4's _LT_COMPILER_PIC.
+
+wl=
+if test "$GCC" = yes; then
+  wl='-Wl,'
+else
+  case "$host_os" in
+    aix*)
+      wl='-Wl,'
+      ;;
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      ;;
+    hpux9* | hpux10* | hpux11*)
+      wl='-Wl,'
+      ;;
+    irix5* | irix6* | nonstopux*)
+      wl='-Wl,'
+      ;;
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+        ecc*)
+          wl='-Wl,'
+          ;;
+        icc* | ifort*)
+          wl='-Wl,'
+          ;;
+        lf95*)
+          wl='-Wl,'
+          ;;
+        nagfor*)
+          wl='-Wl,-Wl,,'
+          ;;
+        pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+          wl='-Wl,'
+          ;;
+        ccc*)
+          wl='-Wl,'
+          ;;
+        xl* | bgxl* | bgf* | mpixl*)
+          wl='-Wl,'
+          ;;
+        como)
+          wl='-lopt='
+          ;;
+        *)
+          case `$CC -V 2>&1 | sed 5q` in
+            *Sun\ F* | *Sun*Fortran*)
+              wl=
+              ;;
+            *Sun\ C*)
+              wl='-Wl,'
+              ;;
+          esac
+          ;;
+      esac
+      ;;
+    newsos6)
+      ;;
+    *nto* | *qnx*)
+      ;;
+    osf3* | osf4* | osf5*)
+      wl='-Wl,'
+      ;;
+    rdos*)
+      ;;
+    solaris*)
+      case $cc_basename in
+        f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+          wl='-Qoption ld '
+          ;;
+        *)
+          wl='-Wl,'
+          ;;
+      esac
+      ;;
+    sunos4*)
+      wl='-Qoption ld '
+      ;;
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      wl='-Wl,'
+      ;;
+    sysv4*MP*)
+      ;;
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      wl='-Wl,'
+      ;;
+    unicos*)
+      wl='-Wl,'
+      ;;
+    uts4*)
+      ;;
+  esac
+fi
+
+# Code taken from libtool.m4's _LT_LINKER_SHLIBS.
+
+hardcode_libdir_flag_spec=
+hardcode_libdir_separator=
+hardcode_direct=no
+hardcode_minus_L=no
+
+case "$host_os" in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+esac
+
+ld_shlibs=yes
+if test "$with_gnu_ld" = yes; then
+  # Set some defaults for GNU ld with shared library support. These
+  # are reset later if shared libraries are not supported. Putting them
+  # here allows them to be overridden if necessary.
+  # Unlike libtool, we use -rpath here, not --rpath, since the documented
+  # option of GNU ld is called -rpath, not --rpath.
+  hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+  case "$host_os" in
+    aix[3-9]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+        ld_shlibs=no
+      fi
+      ;;
+    amigaos*)
+      case "$host_cpu" in
+        powerpc)
+          ;;
+        m68k)
+          hardcode_libdir_flag_spec='-L$libdir'
+          hardcode_minus_L=yes
+          ;;
+      esac
+      ;;
+    beos*)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    cygwin* | mingw* | pw32* | cegcc*)
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      hardcode_libdir_flag_spec='-L$libdir'
+      if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    haiku*)
+      ;;
+    interix[3-9]*)
+      hardcode_direct=no
+      hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      ;;
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    netbsd*)
+      ;;
+    solaris*)
+      if $LD -v 2>&1 | grep 'BFD 2\.8' > /dev/null; then
+        ld_shlibs=no
+      elif $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*)
+          ld_shlibs=no
+          ;;
+        *)
+          if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+            hardcode_libdir_flag_spec='`test -z "$SCOABSPATH" && echo ${wl}-rpath,$libdir`'
+          else
+            ld_shlibs=no
+          fi
+          ;;
+      esac
+      ;;
+    sunos4*)
+      hardcode_direct=yes
+      ;;
+    *)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+  esac
+  if test "$ld_shlibs" = no; then
+    hardcode_libdir_flag_spec=
+  fi
+else
+  case "$host_os" in
+    aix3*)
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      hardcode_minus_L=yes
+      if test "$GCC" = yes; then
+        # Neither direct hardcoding nor static linking is supported with a
+        # broken collect2.
+        hardcode_direct=unsupported
+      fi
+      ;;
+    aix[4-9]*)
+      if test "$host_cpu" = ia64; then
+        # On IA64, the linker does run time linking by default, so we don't
+        # have to do anything special.
+        aix_use_runtimelinking=no
+      else
+        aix_use_runtimelinking=no
+        # Test if we are trying to use run time linking or normal
+        # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+        # need to do runtime linking.
+        case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+          for ld_flag in $LDFLAGS; do
+            if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+              aix_use_runtimelinking=yes
+              break
+            fi
+          done
+          ;;
+        esac
+      fi
+      hardcode_direct=yes
+      hardcode_libdir_separator=':'
+      if test "$GCC" = yes; then
+        case $host_os in aix4.[012]|aix4.[012].*)
+          collect2name=`${CC} -print-prog-name=collect2`
+          if test -f "$collect2name" && \
+            strings "$collect2name" | grep resolve_lib_name >/dev/null
+          then
+            # We have reworked collect2
+            :
+          else
+            # We have old collect2
+            hardcode_direct=unsupported
+            hardcode_minus_L=yes
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_libdir_separator=
+          fi
+          ;;
+        esac
+      fi
+      # Begin _LT_AC_SYS_LIBPATH_AIX.
+      echo 'int main () { return 0; }' > conftest.c
+      ${CC} ${LDFLAGS} conftest.c -o conftest
+      aix_libpath=`dump -H conftest 2>/dev/null | sed -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`
+      if test -z "$aix_libpath"; then
+        aix_libpath=`dump -HX64 conftest 2>/dev/null | sed -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`
+      fi
+      if test -z "$aix_libpath"; then
+        aix_libpath="/usr/lib:/lib"
+      fi
+      rm -f conftest.c conftest
+      # End _LT_AC_SYS_LIBPATH_AIX.
+      if test "$aix_use_runtimelinking" = yes; then
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+      else
+        if test "$host_cpu" = ia64; then
+          hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+        else
+          hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+        fi
+      fi
+      ;;
+    amigaos*)
+      case "$host_cpu" in
+        powerpc)
+          ;;
+        m68k)
+          hardcode_libdir_flag_spec='-L$libdir'
+          hardcode_minus_L=yes
+          ;;
+      esac
+      ;;
+    bsdi[45]*)
+      ;;
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      hardcode_libdir_flag_spec=' '
+      libext=lib
+      ;;
+    darwin* | rhapsody*)
+      hardcode_direct=no
+      if { case $cc_basename in ifort*) true;; *) test "$GCC" = yes;; esac; }; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    dgux*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      ;;
+    freebsd2.[01]*)
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      ;;
+    freebsd* | dragonfly*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      ;;
+    hpux9*)
+      hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_direct=yes
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      hardcode_minus_L=yes
+      ;;
+    hpux10*)
+      if test "$with_gnu_ld" = no; then
+        hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+        hardcode_libdir_separator=:
+        hardcode_direct=yes
+        # hardcode_minus_L: Not really in the search PATH,
+        # but as the default location of the library.
+        hardcode_minus_L=yes
+      fi
+      ;;
+    hpux11*)
+      if test "$with_gnu_ld" = no; then
+        hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+        hardcode_libdir_separator=:
+        case $host_cpu in
+          hppa*64*|ia64*)
+            hardcode_direct=no
+            ;;
+          *)
+            hardcode_direct=yes
+            # hardcode_minus_L: Not really in the search PATH,
+            # but as the default location of the library.
+            hardcode_minus_L=yes
+            ;;
+        esac
+      fi
+      ;;
+    irix5* | irix6* | nonstopux*)
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+    netbsd*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      ;;
+    newsos6)
+      hardcode_direct=yes
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+    *nto* | *qnx*)
+      ;;
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+        hardcode_direct=yes
+        if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+          hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+        else
+          case "$host_os" in
+            openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+              hardcode_libdir_flag_spec='-R$libdir'
+              ;;
+            *)
+              hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+              ;;
+          esac
+        fi
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    os2*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      ;;
+    osf3*)
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+    osf4* | osf5*)
+      if test "$GCC" = yes; then
+        hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      else
+        # Both cc and cxx compiler support -rpath directly
+        hardcode_libdir_flag_spec='-rpath $libdir'
+      fi
+      hardcode_libdir_separator=:
+      ;;
+    solaris*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      ;;
+    sunos4*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      ;;
+    sysv4)
+      case $host_vendor in
+        sni)
+          hardcode_direct=yes # is this really true???
+          ;;
+        siemens)
+          hardcode_direct=no
+          ;;
+        motorola)
+          hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+          ;;
+      esac
+      ;;
+    sysv4.3*)
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+        ld_shlibs=yes
+      fi
+      ;;
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
+      ;;
+    sysv5* | sco3.2v5* | sco5v6*)
+      hardcode_libdir_flag_spec='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`'
+      hardcode_libdir_separator=':'
+      ;;
+    uts4*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      ;;
+    *)
+      ld_shlibs=no
+      ;;
+  esac
+fi
+
+# Check dynamic linker characteristics
+# Code taken from libtool.m4's _LT_SYS_DYNAMIC_LINKER.
+# Unlike libtool.m4, here we don't care about _all_ names of the library, but
+# only about the one the linker finds when passed -lNAME. This is the last
+# element of library_names_spec in libtool.m4, or possibly two of them if the
+# linker has special search rules.
+library_names_spec=      # the last element of library_names_spec in libtool.m4
+libname_spec='lib$name'
+case "$host_os" in
+  aix3*)
+    library_names_spec='$libname.a'
+    ;;
+  aix[4-9]*)
+    library_names_spec='$libname$shrext'
+    ;;
+  amigaos*)
+    case "$host_cpu" in
+      powerpc*)
+        library_names_spec='$libname$shrext' ;;
+      m68k)
+        library_names_spec='$libname.a' ;;
+    esac
+    ;;
+  beos*)
+    library_names_spec='$libname$shrext'
+    ;;
+  bsdi[45]*)
+    library_names_spec='$libname$shrext'
+    ;;
+  cygwin* | mingw* | pw32* | cegcc*)
+    shrext=.dll
+    library_names_spec='$libname.dll.a $libname.lib'
+    ;;
+  darwin* | rhapsody*)
+    shrext=.dylib
+    library_names_spec='$libname$shrext'
+    ;;
+  dgux*)
+    library_names_spec='$libname$shrext'
+    ;;
+  freebsd[23].*)
+    library_names_spec='$libname$shrext$versuffix'
+    ;;
+  freebsd* | dragonfly*)
+    library_names_spec='$libname$shrext'
+    ;;
+  gnu*)
+    library_names_spec='$libname$shrext'
+    ;;
+  haiku*)
+    library_names_spec='$libname$shrext'
+    ;;
+  hpux9* | hpux10* | hpux11*)
+    case $host_cpu in
+      ia64*)
+        shrext=.so
+        ;;
+      hppa*64*)
+        shrext=.sl
+        ;;
+      *)
+        shrext=.sl
+        ;;
+    esac
+    library_names_spec='$libname$shrext'
+    ;;
+  interix[3-9]*)
+    library_names_spec='$libname$shrext'
+    ;;
+  irix5* | irix6* | nonstopux*)
+    library_names_spec='$libname$shrext'
+    case "$host_os" in
+      irix5* | nonstopux*)
+        libsuff= shlibsuff=
+        ;;
+      *)
+        case $LD in
+          *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= ;;
+          *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 ;;
+          *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 ;;
+          *) libsuff= shlibsuff= ;;
+        esac
+        ;;
+    esac
+    ;;
+  linux*oldld* | linux*aout* | linux*coff*)
+    ;;
+  linux* | k*bsd*-gnu | kopensolaris*-gnu)
+    library_names_spec='$libname$shrext'
+    ;;
+  knetbsd*-gnu)
+    library_names_spec='$libname$shrext'
+    ;;
+  netbsd*)
+    library_names_spec='$libname$shrext'
+    ;;
+  newsos6)
+    library_names_spec='$libname$shrext'
+    ;;
+  *nto* | *qnx*)
+    library_names_spec='$libname$shrext'
+    ;;
+  openbsd*)
+    library_names_spec='$libname$shrext$versuffix'
+    ;;
+  os2*)
+    libname_spec='$name'
+    shrext=.dll
+    library_names_spec='$libname.a'
+    ;;
+  osf3* | osf4* | osf5*)
+    library_names_spec='$libname$shrext'
+    ;;
+  rdos*)
+    ;;
+  solaris*)
+    library_names_spec='$libname$shrext'
+    ;;
+  sunos4*)
+    library_names_spec='$libname$shrext$versuffix'
+    ;;
+  sysv4 | sysv4.3*)
+    library_names_spec='$libname$shrext'
+    ;;
+  sysv4*MP*)
+    library_names_spec='$libname$shrext'
+    ;;
+  sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+    library_names_spec='$libname$shrext'
+    ;;
+  tpf*)
+    library_names_spec='$libname$shrext'
+    ;;
+  uts4*)
+    library_names_spec='$libname$shrext'
+    ;;
+esac
+
+sed_quote_subst='s/\(["`$\\]\)/\\\1/g'
+escaped_wl=`echo "X$wl" | sed -e 's/^X//' -e "$sed_quote_subst"`
+shlibext=`echo "$shrext" | sed -e 's,^\.,,'`
+escaped_libname_spec=`echo "X$libname_spec" | sed -e 's/^X//' -e "$sed_quote_subst"`
+escaped_library_names_spec=`echo "X$library_names_spec" | sed -e 's/^X//' -e "$sed_quote_subst"`
+escaped_hardcode_libdir_flag_spec=`echo "X$hardcode_libdir_flag_spec" | sed -e 's/^X//' -e "$sed_quote_subst"`
+
+LC_ALL=C sed -e 's/^\([a-zA-Z0-9_]*\)=/acl_cv_\1=/' <<EOF
+
+# How to pass a linker flag through the compiler.
+wl="$escaped_wl"
+
+# Static library suffix (normally "a").
+libext="$libext"
+
+# Shared library suffix (normally "so").
+shlibext="$shlibext"
+
+# Format of library name prefix.
+libname_spec="$escaped_libname_spec"
+
+# Library names that the linker finds when passed -lNAME.
+library_names_spec="$escaped_library_names_spec"
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec="$escaped_hardcode_libdir_flag_spec"
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator="$hardcode_libdir_separator"
+
+# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct="$hardcode_direct"
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L="$hardcode_minus_L"
+
+EOF

diff --git a/zfs/config/deb.am b/zfs/config/deb.am
index 83059a9..2dcd30e 100644
--- a/zfs/config/deb.am
+++ b/zfs/config/deb.am

@@ -1,3 +1,5 @@
+PHONY += deb-kmod deb-dkms deb-utils deb deb-local
+
 deb-local:
 	@(if test "${HAVE_DPKGBUILD}" = "no"; then \
 		echo -e "\n" \
@@ -12,7 +14,19 @@
 	"*** package for your distribution which provides ${ALIEN},\n" \
 	"*** re-run configure, and try again.\n"; \
 		exit 1; \
-	fi)
+	fi; \
+        if test "${ALIEN_MAJOR}" = "8" && \
+           test "${ALIEN_MINOR}" = "95"; then \
+        if test "${ALIEN_POINT}" = "1" || \
+           test "${ALIEN_POINT}" = "2" || \
+           test "${ALIEN_POINT}" = "3"; then \
+                /bin/echo -e "\n" \
+        "*** Installed version of ${ALIEN} is known to be broken;\n" \
+        "*** attempting to generate debs will fail! See\n" \
+        "*** https://github.com/openzfs/zfs/issues/11650 for details.\n"; \
+                exit 1; \
+        fi; \
+        fi)
 
 deb-kmod: deb-local rpm-kmod
 	name=${PACKAGE}; \
@@ -33,17 +47,17 @@
 	fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \
 	$(RM) $$pkg1
 
-deb-utils: deb-local rpm-utils
+deb-utils: deb-local rpm-utils-initramfs
 	name=${PACKAGE}; \
 	version=${VERSION}-${RELEASE}; \
 	arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
 	debarch=`$(DPKG) --print-architecture`; \
 	pkg1=$${name}-$${version}.$${arch}.rpm; \
-	pkg2=libnvpair1-$${version}.$${arch}.rpm; \
-	pkg3=libuutil1-$${version}.$${arch}.rpm; \
-	pkg4=libzfs2-$${version}.$${arch}.rpm; \
-	pkg5=libzpool2-$${version}.$${arch}.rpm; \
-	pkg6=libzfs2-devel-$${version}.$${arch}.rpm; \
+	pkg2=libnvpair3-$${version}.$${arch}.rpm; \
+	pkg3=libuutil3-$${version}.$${arch}.rpm; \
+	pkg4=libzfs5-$${version}.$${arch}.rpm; \
+	pkg5=libzpool5-$${version}.$${arch}.rpm; \
+	pkg6=libzfs5-devel-$${version}.$${arch}.rpm; \
 	pkg7=$${name}-test-$${version}.$${arch}.rpm; \
 	pkg8=$${name}-dracut-$${version}.noarch.rpm; \
 	pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \
@@ -51,16 +65,16 @@
 ## Arguments need to be passed to dh_shlibdeps. Alien provides no mechanism
 ## to do this, so we install a shim onto the path which calls the real
 ## dh_shlibdeps with the required arguments.
-	path_prepend=`mktemp -d /tmp/intercept.XXX`; \
-	echo "#$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
+	path_prepend=`mktemp -d /tmp/intercept.XXXXXX`; \
+	echo "#!$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
 	echo "`which dh_shlibdeps` -- \
-	 -xlibuutil1linux -xlibnvpair1linux -xlibzfs2linux -xlibzpool2linux" \
+	 -xlibuutil3linux -xlibnvpair3linux -xlibzfs5linux -xlibzpool5linux" \
 	 >> $${path_prepend}/dh_shlibdeps; \
 ## These -x arguments are passed to dpkg-shlibdeps, which exclude the
 ## Debianized packages from the auto-generated dependencies of the new debs,
 ## which should NOT be mixed with the alien-generated debs created here
 	chmod +x $${path_prepend}/dh_shlibdeps; \
-	env PATH=$${path_prepend}:$${PATH} \
+	env "PATH=$${path_prepend}:$${PATH}" \
 	fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch \
 	    $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
 	    $$pkg8 $$pkg9 $$pkg10 || exit 1; \

diff --git a/zfs/config/find_system_library.m4 b/zfs/config/find_system_library.m4
index 9d22bcf..310b441 100644
--- a/zfs/config/find_system_library.m4
+++ b/zfs/config/find_system_library.m4

@@ -4,70 +4,95 @@
 dnl requires ax_save_flags.m4 from autoconf-archive
 dnl requires ax_restore_flags.m4 from autoconf-archive
 
-dnl FIND_SYSTEM_LIBRARY(VARIABLE-PREFIX, MODULE, HEADER, HEADER-PREFIXES, LIBRARY, FUNCTIONS, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+dnl ZFS_AC_FIND_SYSTEM_LIBRARY(VARIABLE-PREFIX, MODULE, HEADER, HEADER-PREFIXES, LIBRARY, FUNCTIONS, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
 
-AC_DEFUN([FIND_SYSTEM_LIBRARY], [
+AC_DEFUN([ZFS_AC_FIND_SYSTEM_LIBRARY], [
     AC_REQUIRE([PKG_PROG_PKG_CONFIG])
 
+    _header_found=
     _library_found=
+    _pc_found=
 
-    PKG_CHECK_MODULES([$1], [$2], [_library_found=1], [
-        AS_IF([test -f /usr/include/[$3]], [
-            AC_SUBST([$1][_CFLAGS], [])
-            AC_SUBST([$1][_LIBS], ["-l[$5]]")
-            _library_found=1
-        ],[ AS_IF([test -f /usr/local/include/[$3]], [
-            AC_SUBST([$1][_CFLAGS], ["-I/usr/local/include"])
-            AC_SUBST([$1][_LIBS], ["-L/usr/local -l[$5]]")
-            _library_found=1
-        ],[dnl ELSE
-            m4_foreach([prefix], [$4], [
-                AS_IF([test "x$_library_found" != "x1"], [
-                    AS_IF([test -f [/usr/include/]prefix[/][$3]], [
-                        AC_SUBST([$1][_CFLAGS], ["[-I/usr/include/]prefix["]])
-                        AC_SUBST([$1][_LIBS], ["-l[$5]]")
-                        _library_found=1
-                    ],[ AS_IF([test -f [/usr/local/include/]prefix[/][$3]], [
-                        AC_SUBST([$1][_CFLAGS], ["[-I/usr/local/include/]prefix["]])
-                        AC_SUBST([$1][_LIBS], ["-L/usr/local -l[$5]"])
-                        _library_found=1
-                    ])])
-                ])
-            ])
-        ])])
+    AS_IF([test -n "$2"], [PKG_CHECK_MODULES([$1], [$2], [
+	_header_found=1
+	_library_found=1
+	_pc_found=1
+    ], [:])])
 
-        AS_IF([test -z "$_library_found"], [
-            AC_MSG_WARN([cannot find [$2] via pkg-config or in the standard locations])
-        ])
+    # set _header_found/_library_found if the user passed in CFLAGS/LIBS
+    AS_IF([test "x$[$1][_CFLAGS]" != x], [_header_found=1])
+    AS_IF([test "x$[$1][_LIBS]" != x], [_library_found=1])
+
+    AX_SAVE_FLAGS
+
+    orig_CFLAGS="$CFLAGS"
+
+    for _prefixdir in /usr /usr/local
+    do
+	AS_VAR_PUSHDEF([header_cache], [ac_cv_header_$3])
+	AS_IF([test "x$_prefixdir" != "x/usr"], [
+	    [$1][_CFLAGS]="-I$lt_sysroot$_prefixdir/include"
+	    AS_IF([test "x$_library_found" = x], [
+		[$1][_LIBS]="-L$lt_sysroot$_prefixdir/lib"
+	    ])
+	])
+	CFLAGS="$orig_CFLAGS $[$1][_CFLAGS]"
+	AS_UNSET([header_cache])
+	AC_CHECK_HEADER([$3], [
+	    _header_found=1
+	    break
+	], [AS_IF([test "x$_header_found" = "x1"], [
+	    # if pkg-config or the user set CFLAGS, fail if the header is unusable
+	    AC_MSG_FAILURE([header [$3] for library [$5] is not usable])
+	])], [AC_INCLUDES_DEFAULT])
+	# search for header under HEADER-PREFIXES
+	m4_foreach_w([prefix], [$4], [
+	    [$1][_CFLAGS]=["-I$lt_sysroot$_prefixdir/include/]prefix["]
+	    CFLAGS="$orig_CFLAGS $[$1][_CFLAGS]"
+	    AS_UNSET([header_cache])
+	    AC_CHECK_HEADER([$3], [
+		_header_found=1
+		break
+	    ], [], [AC_INCLUDES_DEFAULT])
+	])
+	AS_VAR_POPDEF([header_cache])
+    done
+
+    AS_IF([test "x$_header_found" = "x1"], [
+	AS_IF([test "x$_library_found" = x], [
+	    [$1][_LIBS]="$[$1]_LIBS -l[$5]"
+	])
+	LDFLAGS="$LDFLAGS $[$1][_LIBS]"
+
+	_libcheck=1
+	m4_ifval([$6],
+	    [m4_foreach_w([func], [$6], [AC_CHECK_LIB([$5], func, [:], [_libcheck=])])],
+	    [AC_CHECK_LIB([$5], [main], [:], [_libcheck=])])
+
+	AS_IF([test "x$_libcheck" = "x1"], [_library_found=1],
+	    [test "x$_library_found" = "x1"], [
+	    # if pkg-config or the user set LIBS, fail if the library is unusable
+	    AC_MSG_FAILURE([library [$5] is not usable])
+	])
+    ], [test "x$_library_found" = "x1"], [
+	# if the user set LIBS, fail if we didn't find the header
+	AC_MSG_FAILURE([cannot find header [$3] for library [$5]])
     ])
 
-    dnl do some further sanity checks
+    AX_RESTORE_FLAGS
 
-    AS_IF([test -n "$_library_found"], [
-        AX_SAVE_FLAGS
-
-        CPPFLAGS="$CPPFLAGS $(echo $[$1][_CFLAGS] | sed 's/-include */-include-/g; s/^/ /; s/ [^-][^ ]*//g; s/ -[^Ii][^ ]*//g; s/-include-/-include /g; s/^ //;')"
-        CFLAGS="$CFLAGS $[$1][_CFLAGS]"
-        LDFLAGS="$LDFLAGS $[$1][_LIBS]"
-
-        AC_CHECK_HEADER([$3], [], [
-            AC_MSG_WARN([header [$3] for library [$2] is not usable])
-            _library_found=
-        ])
-
-        m4_foreach([func], [$6], [
-            AC_CHECK_LIB([$5], func, [], [
-                AC_MSG_WARN([cannot find ]func[ in library [$5]])
-                _library_found=
-            ])
-        ])
-        
-        AX_RESTORE_FLAGS
-    ])
-
-    AS_IF([test -n "$_library_found"], [
-        :;$7
+    AS_IF([test "x$_header_found" = "x1" && test "x$_library_found" = "x1"], [
+	AC_SUBST([$1]_CFLAGS)
+	AC_SUBST([$1]_LIBS)
+	AS_IF([test "x$_pc_found" = "x1"], [
+	    AC_SUBST([$1]_PC, [$2])
+	])
+	AC_DEFINE([HAVE_][$1], [1], [Define if you have [$5]])
+	$7
     ],[dnl ELSE
-        :;$8
+	AC_SUBST([$1]_CFLAGS, [])
+	AC_SUBST([$1]_LIBS, [])
+	AC_MSG_WARN([cannot find [$5] via pkg-config or in the standard locations])
+	$8
     ])
 ])

diff --git a/zfs/config/iconv.m4 b/zfs/config/iconv.m4
index fc915fd..99b339a 100644
--- a/zfs/config/iconv.m4
+++ b/zfs/config/iconv.m4

@@ -29,9 +29,9 @@
   AC_REQUIRE([AM_ICONV_LINKFLAGS_BODY])
 
   dnl Add $INCICONV to CPPFLAGS before performing the following checks,
-  dnl because if the user has installed libiconv and not disabled its use
-  dnl via --without-libiconv-prefix, he wants to use it. The first
-  dnl AC_LINK_IFELSE will then fail, the second AC_LINK_IFELSE will succeed.
+  dnl so that if libiconv is installed, it will be used (unless disabled
+  dnl via --without-libiconv-prefix).  The first AC_LINK_IFELSE will
+  dnl then fail, the second AC_LINK_IFELSE will succeed.
   am_save_CPPFLAGS="$CPPFLAGS"
   AC_LIB_APPENDTOVAR([CPPFLAGS], [$INCICONV])
 

diff --git a/zfs/config/kernel-acl-refcount.m4 b/zfs/config/kernel-acl-refcount.m4
deleted file mode 100644
index 43e3c44..0000000
--- a/zfs/config/kernel-acl-refcount.m4
+++ /dev/null

@@ -1,20 +0,0 @@
-dnl #
-dnl # 4.16 kernel: check if struct posix_acl acl.a_refcount is a refcount_t.
-dnl # It's an atomic_t on older kernels.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_ACL_HAS_REFCOUNT], [
-	AC_MSG_CHECKING([whether posix_acl has refcount_t])
-	ZFS_LINUX_TRY_COMPILE([
-		#include <linux/backing-dev.h>
-		#include <linux/refcount.h>
-		#include <linux/posix_acl.h>
-	],[
-		struct posix_acl acl;
-		refcount_t *r __attribute__ ((unused)) = &acl.a_refcount;
-	],[
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_ACL_REFCOUNT, 1, [posix_acl has refcount_t])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-acl.m4 b/zfs/config/kernel-acl.m4
index 68a7287..be08c3c 100644
--- a/zfs/config/kernel-acl.m4
+++ b/zfs/config/kernel-acl.m4

@@ -11,7 +11,7 @@
 	], [
 		struct posix_acl *tmp = posix_acl_alloc(1, 0);
 		posix_acl_release(tmp);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [
@@ -50,7 +50,7 @@
 		struct posix_acl *acl = posix_acl_alloc(1, 0);
 		set_cached_acl(ip, ACL_TYPE_ACCESS, acl);
 		forget_cached_acl(ip, ACL_TYPE_ACCESS);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [
@@ -66,11 +66,10 @@
 
 dnl #
 dnl # 3.1 API change,
-dnl # posix_acl_chmod_masq() is not exported anymore and posix_acl_chmod()
-dnl # was introduced to replace it.
+dnl # posix_acl_chmod() was added as the preferred interface.
 dnl #
 dnl # 3.14 API change,
-dnl # posix_acl_chmod() is changed to __posix_acl_chmod()
+dnl # posix_acl_chmod() was changed to __posix_acl_chmod()
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD], [
 	ZFS_LINUX_TEST_SRC([posix_acl_chmod], [
@@ -89,14 +88,6 @@
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [
-	AC_MSG_CHECKING([whether posix_acl_chmod exists])
-	ZFS_LINUX_TEST_RESULT([posix_acl_chmod], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1, [posix_acl_chmod() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-
 	AC_MSG_CHECKING([whether __posix_acl_chmod exists])
 	ZFS_LINUX_TEST_RESULT([__posix_acl_chmod], [
 		AC_MSG_RESULT(yes)
@@ -104,12 +95,21 @@
 		    [__posix_acl_chmod() exists])
 	],[
 		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether posix_acl_chmod exists])
+		ZFS_LINUX_TEST_RESULT([posix_acl_chmod], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1,
+			    [posix_acl_chmod() exists])
+		],[
+			ZFS_LINUX_TEST_ERROR([posix_acl_chmod()])
+		])
 	])
 ])
 
 dnl #
 dnl # 3.1 API change,
-dnl # posix_acl_equiv_mode now wants an umode_t* instead of a mode_t*
+dnl # posix_acl_equiv_mode now wants an umode_t instead of a mode_t
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [
 	ZFS_LINUX_TEST_SRC([posix_acl_equiv_mode], [
@@ -117,7 +117,7 @@
 		#include <linux/posix_acl.h>
 	],[
 		umode_t tmp;
-		posix_acl_equiv_mode(NULL,&tmp);
+		posix_acl_equiv_mode(NULL, &tmp);
 	])
 ])
 
@@ -125,10 +125,8 @@
 	AC_MSG_CHECKING([whether posix_acl_equiv_mode() wants umode_t])
 	ZFS_LINUX_TEST_RESULT([posix_acl_equiv_mode], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T, 1,
-		    [posix_acl_equiv_mode wants umode_t*])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([posix_acl_equiv_mode()])
 	])
 ])
 
@@ -161,126 +159,15 @@
 ])
 
 dnl #
-dnl # 2.6.27 API change,
-dnl # Check if inode_operations contains the function permission
-dnl # and expects the nameidata structure to have been removed.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION], [
-	ZFS_LINUX_TEST_SRC([inode_operations_permission], [
-		#include <linux/fs.h>
-
-		int permission_fn(struct inode *inode, int mask) { return 0; }
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.permission = permission_fn,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION], [
-	AC_MSG_CHECKING([whether iops->permission() exists])
-	ZFS_LINUX_TEST_RESULT([inode_operations_permission], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_PERMISSION, 1, [iops->permission() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 2.6.26 API change,
-dnl # Check if inode_operations contains the function permission
-dnl # and expects the nameidata structure to be passed.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [
-	ZFS_LINUX_TEST_SRC([inode_operations_permission_with_nameidata], [
-		#include <linux/fs.h>
-		#include <linux/sched.h>
-
-		int permission_fn(struct inode *inode, int mask,
-		    struct nameidata *nd) { return 0; }
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.permission = permission_fn,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA], [
-	AC_MSG_CHECKING([whether iops->permission() wants nameidata])
-	ZFS_LINUX_TEST_RESULT([inode_operations_permission_with_nameidata], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_PERMISSION, 1, [iops->permission() exists])
-		AC_DEFINE(HAVE_PERMISSION_WITH_NAMEIDATA, 1,
-		    [iops->permission() with nameidata exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 2.6.32 API change,
-dnl # Check if inode_operations contains the function check_acl
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL], [
-	ZFS_LINUX_TEST_SRC([inode_operations_check_acl], [
-		#include <linux/fs.h>
-
-		int check_acl_fn(struct inode *inode, int mask) { return 0; }
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.check_acl = check_acl_fn,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL], [
-	AC_MSG_CHECKING([whether iops->check_acl() exists])
-	ZFS_LINUX_TEST_RESULT([inode_operations_check_acl], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CHECK_ACL, 1, [iops->check_acl() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 2.6.38 API change,
-dnl # The function check_acl gained a new parameter: flags
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [
-	ZFS_LINUX_TEST_SRC([inode_operations_check_acl_with_flags], [
-		#include <linux/fs.h>
-
-		int check_acl_fn(struct inode *inode, int mask,
-		    unsigned int flags) { return 0; }
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.check_acl = check_acl_fn,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS], [
-	AC_MSG_CHECKING([whether iops->check_acl() wants flags])
-	ZFS_LINUX_TEST_RESULT([inode_operations_check_acl_with_flags], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CHECK_ACL, 1, [iops->check_acl() exists])
-		AC_DEFINE(HAVE_CHECK_ACL_WITH_FLAGS, 1,
-		    [iops->check_acl() wants flags])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
 dnl # 3.1 API change,
 dnl # Check if inode_operations contains the function get_acl
 dnl #
+dnl # 5.15 API change,
+dnl # Added the bool rcu argument to get_acl for rcu path walk.
+dnl #
+dnl # 6.2 API change,
+dnl # get_acl() was renamed to get_inode_acl()
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [
 	ZFS_LINUX_TEST_SRC([inode_operations_get_acl], [
 		#include <linux/fs.h>
@@ -293,6 +180,30 @@
 			.get_acl = get_acl_fn,
 		};
 	],[])
+
+	ZFS_LINUX_TEST_SRC([inode_operations_get_acl_rcu], [
+		#include <linux/fs.h>
+
+		struct posix_acl *get_acl_fn(struct inode *inode, int type,
+		    bool rcu) { return NULL; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.get_acl = get_acl_fn,
+		};
+	],[])
+
+	ZFS_LINUX_TEST_SRC([inode_operations_get_inode_acl], [
+		#include <linux/fs.h>
+
+		struct posix_acl *get_inode_acl_fn(struct inode *inode, int type,
+		    bool rcu) { return NULL; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.get_inode_acl = get_inode_acl_fn,
+		};
+	],[])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [
@@ -301,7 +212,17 @@
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_GET_ACL, 1, [iops->get_acl() exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_RESULT([inode_operations_get_acl_rcu], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_GET_ACL_RCU, 1, [iops->get_acl() takes rcu])
+		],[
+			ZFS_LINUX_TEST_RESULT([inode_operations_get_inode_acl], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_GET_INODE_ACL, 1, [has iops->get_inode_acl()])
+			],[
+				ZFS_LINUX_TEST_ERROR([iops->get_acl() or iops->get_inode_acl()])
+			])
+		])
 	])
 ])
 
@@ -309,7 +230,52 @@
 dnl # 3.14 API change,
 dnl # Check if inode_operations contains the function set_acl
 dnl #
+dnl # 5.12 API change,
+dnl # set_acl() added a user_namespace* parameter first
+dnl #
+dnl # 6.2 API change,
+dnl # set_acl() second paramter changed to a struct dentry *
+dnl #
+dnl # 6.3 API change,
+dnl # set_acl() first parameter changed to struct mnt_idmap *
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [
+	ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [
+		#include <linux/fs.h>
+
+		int set_acl_fn(struct mnt_idmap *idmap,
+		    struct dentry *dent, struct posix_acl *acl,
+		    int type) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.set_acl = set_acl_fn,
+		};
+	],[])
+	ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [
+		#include <linux/fs.h>
+
+		int set_acl_fn(struct user_namespace *userns,
+		    struct dentry *dent, struct posix_acl *acl,
+		    int type) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.set_acl = set_acl_fn,
+		};
+	],[])
+	ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns], [
+		#include <linux/fs.h>
+
+		int set_acl_fn(struct user_namespace *userns,
+		    struct inode *inode, struct posix_acl *acl,
+		    int type) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.set_acl = set_acl_fn,
+		};
+	],[])
 	ZFS_LINUX_TEST_SRC([inode_operations_set_acl], [
 		#include <linux/fs.h>
 
@@ -325,11 +291,31 @@
 
 AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [
 	AC_MSG_CHECKING([whether iops->set_acl() exists])
-	ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
+	ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
+		AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
+			AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1,
+			    [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *])
+		],[
+			ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
+				AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
+				    [iops->set_acl() takes 4 args, arg2 is struct dentry *])
+			],[
+				ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
+				],[
+					ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
+				])
+			])
+		])
 	])
 ])
 
@@ -390,10 +376,6 @@
 	ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD
 	ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T
 	ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS
-	ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION
-	ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA
-	ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL
-	ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS
 	ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL
 	ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL
 	ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE
@@ -406,10 +388,6 @@
 	ZFS_AC_KERNEL_POSIX_ACL_CHMOD
 	ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T
 	ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS
-	ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION
-	ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA
-	ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL
-	ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS
 	ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL
 	ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL
 	ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE

diff --git a/zfs/config/kernel-add-disk.m4 b/zfs/config/kernel-add-disk.m4
new file mode 100644
index 0000000..86d81ea
--- /dev/null
+++ b/zfs/config/kernel-add-disk.m4

@@ -0,0 +1,24 @@
+dnl #
+dnl # 5.16 API change
+dnl # add_disk grew a must-check return code
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_ADD_DISK], [
+	ZFS_LINUX_TEST_SRC([add_disk_ret], [
+		#include <linux/blkdev.h>
+	], [
+		struct gendisk *disk = NULL;
+		int error __attribute__ ((unused)) = add_disk(disk);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_ADD_DISK], [
+	AC_MSG_CHECKING([whether add_disk() returns int])
+	ZFS_LINUX_TEST_RESULT([add_disk_ret],
+	[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ADD_DISK_RET, 1,
+		    [add_disk() returns int])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-automount.m4 b/zfs/config/kernel-automount.m4
index 93e14fa..f7bb63c 100644
--- a/zfs/config/kernel-automount.m4
+++ b/zfs/config/kernel-automount.m4

@@ -12,15 +12,14 @@
 		struct dentry_operations dops __attribute__ ((unused)) = {
 			.d_automount = d_automount,
 		};
-	],[])
+	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_AUTOMOUNT], [
 	AC_MSG_CHECKING([whether dops->d_automount() exists])
 	ZFS_LINUX_TEST_RESULT([dentry_operations_d_automount], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_AUTOMOUNT, 1, [dops->automount() exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([dops->d_automount()])
 	])
 ])

diff --git a/zfs/config/kernel-bdev-logical-size.m4 b/zfs/config/kernel-bdev-logical-size.m4
deleted file mode 100644
index 0de9afd..0000000
--- a/zfs/config/kernel-bdev-logical-size.m4
+++ /dev/null

@@ -1,26 +0,0 @@
-dnl #
-dnl # 2.6.30 API change
-dnl # bdev_hardsect_size() replaced with bdev_logical_block_size().  While
-dnl # it has been true for a while that there was no strict 1:1 mapping
-dnl # between physical sector size and logical block size this change makes
-dnl # it explicit.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_LOGICAL_BLOCK_SIZE], [
-	ZFS_LINUX_TEST_SRC([bdev_logical_block_size], [
-		#include <linux/blkdev.h>
-	],[
-		struct block_device *bdev = NULL;
-		bdev_logical_block_size(bdev);
-	], [$NO_UNUSED_BUT_SET_VARIABLE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE], [
-	AC_MSG_CHECKING([whether bdev_logical_block_size() is available])
-	ZFS_LINUX_TEST_RESULT([bdev_logical_block_size], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BDEV_LOGICAL_BLOCK_SIZE, 1,
-		    [bdev_logical_block_size() is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bdev-physical-size.m4 b/zfs/config/kernel-bdev-physical-size.m4
deleted file mode 100644
index 94d8172..0000000
--- a/zfs/config/kernel-bdev-physical-size.m4
+++ /dev/null

@@ -1,40 +0,0 @@
-dnl #
-dnl # 2.6.30 API change
-dnl #
-dnl # The bdev_physical_block_size() interface was added to provide a way
-dnl # to determine the smallest write which can be performed without a
-dnl # read-modify-write operation.  From the kernel documentation:
-dnl #
-dnl # What:          /sys/block/<disk>/queue/physical_block_size
-dnl # Date:          May 2009
-dnl # Contact:       Martin K. Petersen <martin.petersen@oracle.com>
-dnl # Description:
-dnl #                This is the smallest unit the storage device can write
-dnl #                without resorting to read-modify-write operation.  It is
-dnl #                usually the same as the logical block size but may be
-dnl #                bigger.  One example is SATA drives with 4KB sectors
-dnl #                that expose a 512-byte logical block size to the
-dnl #                operating system.
-dnl #
-dnl # Unfortunately, this interface isn't entirely reliable because
-dnl # drives are sometimes known to misreport this value.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_PHYSICAL_BLOCK_SIZE], [
-	ZFS_LINUX_TEST_SRC([bdev_physical_block_size], [
-		#include <linux/blkdev.h>
-	],[
-		struct block_device *bdev = NULL;
-		bdev_physical_block_size(bdev);
-	], [$NO_UNUSED_BUT_SET_VARIABLE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE], [
-	AC_MSG_CHECKING([whether bdev_physical_block_size() is available])
-	ZFS_LINUX_TEST_RESULT([bdev_physical_block_size], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BDEV_PHYSICAL_BLOCK_SIZE, 1,
-		    [bdev_physical_block_size() is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bdi.m4 b/zfs/config/kernel-bdi.m4
index 5151633..9758863 100644
--- a/zfs/config/kernel-bdi.m4
+++ b/zfs/config/kernel-bdi.m4

@@ -8,7 +8,9 @@
 	], [
 		char *name = "bdi";
 		atomic_long_t zfs_bdi_seq;
-		int error __attribute__((unused)) =
+		int error __attribute__((unused));
+		atomic_long_set(&zfs_bdi_seq, 0);
+		error =
 		    super_setup_bdi_name(&sb, "%.28s-%ld", name,
 		    atomic_long_inc_return(&zfs_bdi_seq));
 	])
@@ -72,11 +74,7 @@
 				AC_DEFINE(HAVE_3ARGS_BDI_SETUP_AND_REGISTER, 1,
 				    [bdi_setup_and_register() wants 3 args])
 			], [
-				dnl #
-				dnl # 2.6.32 - 2.6.33, bdi_setup_and_register()
-				dnl # is not exported.
-				dnl #
-				AC_MSG_RESULT(no)
+				ZFS_LINUX_TEST_ERROR([bdi_setup])
 			])
 		])
 	])

diff --git a/zfs/config/kernel-bio-bvec-iter.m4 b/zfs/config/kernel-bio-bvec-iter.m4
deleted file mode 100644
index f9a99ce..0000000
--- a/zfs/config/kernel-bio-bvec-iter.m4
+++ /dev/null

@@ -1,23 +0,0 @@
-dnl #
-dnl # 3.14 API change,
-dnl # Immutable biovecs. A number of fields of struct bio are moved to
-dnl # struct bvec_iter.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER], [
-	ZFS_LINUX_TEST_SRC([bio_bvec_iter], [
-		#include <linux/bio.h>
-	],[
-		struct bio bio;
-		bio.bi_iter.bi_sector = 0;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [
-	AC_MSG_CHECKING([whether bio has bi_iter])
-	ZFS_LINUX_TEST_RESULT([bio_bvec_iter], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_BVEC_ITER, 1, [bio has bi_iter])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bio-end-io-t-args.m4 b/zfs/config/kernel-bio-end-io-t-args.m4
deleted file mode 100644
index 80a1fbe..0000000
--- a/zfs/config/kernel-bio-end-io-t-args.m4
+++ /dev/null

@@ -1,50 +0,0 @@
-dnl #
-dnl # 4.3 API change
-dnl # Error argument dropped from bio_endio in favor of newly introduced
-dnl # bio->bi_error. This also replaces bio->bi_flags value BIO_UPTODATE.
-dnl # Introduced by torvalds/linux@4246a0b63bd8f56a1469b12eafeb875b1041a451
-dnl # ("block: add a bi_error field to struct bio").
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [
-	ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [
-		#include <linux/bio.h>
-		void wanted_end_io(struct bio *bio) { return; }
-		bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io;
-	], [])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [
-	AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg])
-	ZFS_LINUX_TEST_RESULT([bio_end_io_t_args], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_1ARG_BIO_END_IO_T, 1,
-		    [bio_end_io_t wants 1 arg])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 4.13 API change
-dnl # The bio->bi_error field was replaced with bio->bi_status which is an
-dnl # enum which describes all possible error types.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BI_STATUS], [
-	ZFS_LINUX_TEST_SRC([bio_bi_status], [
-		#include <linux/bio.h>
-	], [
-		struct bio bio __attribute__ ((unused));
-		blk_status_t status __attribute__ ((unused)) = BLK_STS_OK;
-		bio.bi_status = status;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [
-	AC_MSG_CHECKING([whether bio->bi_status exists])
-	ZFS_LINUX_TEST_RESULT([bio_bi_status], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_BI_STATUS, 1, [bio->bi_status exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bio-failfast.m4 b/zfs/config/kernel-bio-failfast.m4
deleted file mode 100644
index 0c636f0..0000000
--- a/zfs/config/kernel-bio-failfast.m4
+++ /dev/null

@@ -1,56 +0,0 @@
-dnl #
-dnl # Preferred interface for setting FAILFAST on a bio:
-dnl #   2.6.28-2.6.35: BIO_RW_FAILFAST_{DEV|TRANSPORT|DRIVER}
-dnl #       >= 2.6.36: REQ_FAILFAST_{DEV|TRANSPORT|DRIVER}
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_FAILFAST_DTD], [
-	ZFS_LINUX_TEST_SRC([bio_failfast_dtd], [
-		#include <linux/bio.h>
-	],[
-		int flags __attribute__ ((unused));
-		flags = ((1 << BIO_RW_FAILFAST_DEV) |
-			 (1 << BIO_RW_FAILFAST_TRANSPORT) |
-			 (1 << BIO_RW_FAILFAST_DRIVER));
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST_DTD], [
-	AC_MSG_CHECKING([whether BIO_RW_FAILFAST_* are defined])
-	ZFS_LINUX_TEST_RESULT([bio_failfast_dtd], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_RW_FAILFAST_DTD, 1,
-		    [BIO_RW_FAILFAST_* are defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_REQ_FAILFAST_MASK], [
-	ZFS_LINUX_TEST_SRC([bio_failfast_mask], [
-		#include <linux/bio.h>
-	],[
-		int flags __attribute__ ((unused));
-		flags = REQ_FAILFAST_MASK;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_REQ_FAILFAST_MASK], [
-	AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined])
-	ZFS_LINUX_TEST_RESULT([bio_failfast_mask], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_REQ_FAILFAST_MASK, 1,
-		    [REQ_FAILFAST_MASK is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_FAILFAST], [
-	ZFS_AC_KERNEL_SRC_BIO_FAILFAST_DTD
-	ZFS_AC_KERNEL_SRC_REQ_FAILFAST_MASK
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_FAILFAST], [
-	ZFS_AC_KERNEL_BIO_FAILFAST_DTD
-	ZFS_AC_KERNEL_REQ_FAILFAST_MASK
-])

diff --git a/zfs/config/kernel-bio-op.m4 b/zfs/config/kernel-bio-op.m4
deleted file mode 100644
index 1f2d237..0000000
--- a/zfs/config/kernel-bio-op.m4
+++ /dev/null

@@ -1,102 +0,0 @@
-dnl #
-dnl # Linux 4.8 API,
-dnl #
-dnl # The bio_op() helper was introduced as a replacement for explicitly
-dnl # checking the bio->bi_rw flags.  The following checks are used to
-dnl # detect if a specific operation is supported.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [
-	ZFS_LINUX_TEST_SRC([req_op_discard], [
-		#include <linux/blk_types.h>
-	],[
-		int op __attribute__ ((unused)) = REQ_OP_DISCARD;
-	])
-
-	ZFS_LINUX_TEST_SRC([req_op_secure_erase], [
-		#include <linux/blk_types.h>
-	],[
-		int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE;
-	])
-
-	ZFS_LINUX_TEST_SRC([req_op_flush], [
-		#include <linux/blk_types.h>
-	],[
-		int op __attribute__ ((unused)) = REQ_OP_FLUSH;
-	])
-
-	ZFS_LINUX_TEST_SRC([bio_bi_opf], [
-		#include <linux/bio.h>
-	],[
-		struct bio bio __attribute__ ((unused));
-		bio.bi_opf = 0;
-	])
-
-	ZFS_LINUX_TEST_SRC([bio_set_op_attrs], [
-		#include <linux/bio.h>
-	],[
-		struct bio *bio __attribute__ ((unused)) = NULL;
-		bio_set_op_attrs(bio, 0, 0);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [
-	AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined])
-	ZFS_LINUX_TEST_RESULT([req_op_discard], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_REQ_OP_DISCARD, 1,
-		    [REQ_OP_DISCARD is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [
-	AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined])
-	ZFS_LINUX_TEST_RESULT([req_op_secure_erase], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_REQ_OP_SECURE_ERASE, 1,
-		    [REQ_OP_SECURE_ERASE is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-
-AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [
-	AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined])
-	ZFS_LINUX_TEST_RESULT([req_op_flush], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, [REQ_OP_FLUSH is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [
-	AC_MSG_CHECKING([whether bio->bi_opf is defined])
-	ZFS_LINUX_TEST_RESULT([bio_bi_opf], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS], [
-	AC_MSG_CHECKING([whether bio_set_op_attrs is available])
-	ZFS_LINUX_TEST_RESULT([bio_set_op_attrs], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_SET_OP_ATTRS, 1,
-		    [bio_set_op_attrs is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_OPS], [
-	ZFS_AC_KERNEL_REQ_OP_DISCARD
-	ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE
-	ZFS_AC_KERNEL_REQ_OP_FLUSH
-	ZFS_AC_KERNEL_BIO_BI_OPF
-	ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS
-])

diff --git a/zfs/config/kernel-bio-rw-barrier.m4 b/zfs/config/kernel-bio-rw-barrier.m4
deleted file mode 100644
index f667d48..0000000
--- a/zfs/config/kernel-bio-rw-barrier.m4
+++ /dev/null

@@ -1,30 +0,0 @@
-dnl #
-dnl # Interface for issuing a discard bio:
-dnl # 2.6.28-2.6.35: BIO_RW_BARRIER
-dnl # 2.6.36-3.x:    REQ_BARRIER
-dnl #
-dnl #
-dnl # Since REQ_BARRIER is a preprocessor definition, there is no need for an
-dnl # autotools check for it. Also, REQ_BARRIER existed in the request layer
-dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the
-dnl # request layer and bio layer flags, so it would be wrong to assume that
-dnl # the APIs are mutually exclusive contrary to the typical case.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_BARRIER], [
-	ZFS_LINUX_TEST_SRC([bio_rw_barrier], [
-		#include <linux/bio.h>
-	],[
-		int flags __attribute__ ((unused));
-		flags = BIO_RW_BARRIER;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [
-	AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined])
-	ZFS_LINUX_TEST_RESULT([bio_rw_barrier], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_RW_BARRIER, 1, [BIO_RW_BARRIER is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bio-rw-discard.m4 b/zfs/config/kernel-bio-rw-discard.m4
deleted file mode 100644
index 34a8927..0000000
--- a/zfs/config/kernel-bio-rw-discard.m4
+++ /dev/null

@@ -1,30 +0,0 @@
-dnl #
-dnl # Interface for issuing a discard bio:
-dnl # 2.6.28-2.6.35: BIO_RW_DISCARD
-dnl # 2.6.36-3.x:    REQ_DISCARD
-dnl #
-dnl #
-dnl # Since REQ_DISCARD is a preprocessor definition, there is no need for an
-dnl # autotools check for it. Also, REQ_DISCARD existed in the request layer
-dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the
-dnl # request layer and bio layer flags, so it would be wrong to assume that
-dnl # the APIs are mutually exclusive contrary to the typical case.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_DISCARD], [
-	ZFS_LINUX_TEST_SRC([bio_rw_discard], [
-		#include <linux/bio.h>
-	],[
-		int flags __attribute__ ((unused));
-		flags = BIO_RW_DISCARD;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [
-	AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined])
-	ZFS_LINUX_TEST_RESULT([bio_rw_discard], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_RW_DISCARD, 1, [BIO_RW_DISCARD is defined])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bio-tryget.m4 b/zfs/config/kernel-bio-tryget.m4
deleted file mode 100644
index 49546ab..0000000
--- a/zfs/config/kernel-bio-tryget.m4
+++ /dev/null

@@ -1,37 +0,0 @@
-dnl #
-dnl # Linux 5.5 API,
-dnl #
-dnl # The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by
-dnl # blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched().
-dnl # As a side effect the function was converted to GPL-only.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKG_TRYGET], [
-	ZFS_LINUX_TEST_SRC([blkg_tryget], [
-		#include <linux/blk-cgroup.h>
-		#include <linux/bio.h>
-		#include <linux/fs.h>
-	],[
-		struct blkcg_gq blkg __attribute__ ((unused)) = {};
-		bool rc __attribute__ ((unused));
-		rc = blkg_tryget(&blkg);
-	], [], [$ZFS_META_LICENSE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLKG_TRYGET], [
-	AC_MSG_CHECKING([whether blkg_tryget() is available])
-	ZFS_LINUX_TEST_RESULT([blkg_tryget], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLKG_TRYGET, 1, [blkg_tryget() is available])
-
-		AC_MSG_CHECKING([whether blkg_tryget() is GPL-only])
-		ZFS_LINUX_TEST_RESULT([blkg_tryget_license], [
-			AC_MSG_RESULT(no)
-		],[
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BLKG_TRYGET_GPL_ONLY, 1,
-			    [blkg_tryget() GPL-only])
-		])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-bio.m4 b/zfs/config/kernel-bio.m4
new file mode 100644
index 0000000..18620ca
--- /dev/null
+++ b/zfs/config/kernel-bio.m4

@@ -0,0 +1,552 @@
+dnl #
+dnl # 2.6.36 API change,
+dnl # REQ_FAILFAST_{DEV|TRANSPORT|DRIVER}
+dnl # REQ_DISCARD
+dnl # REQ_FLUSH
+dnl #
+dnl # 4.8 - 4.9 API,
+dnl # REQ_FLUSH was renamed to REQ_PREFLUSH
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REQ], [
+	ZFS_LINUX_TEST_SRC([req_failfast_mask], [
+		#include <linux/bio.h>
+	],[
+		int flags __attribute__ ((unused));
+		flags = REQ_FAILFAST_MASK;
+	])
+
+	ZFS_LINUX_TEST_SRC([req_discard], [
+		#include <linux/bio.h>
+	],[
+		int flags __attribute__ ((unused));
+		flags = REQ_DISCARD;
+	])
+
+	ZFS_LINUX_TEST_SRC([req_flush], [
+		#include <linux/bio.h>
+	],[
+		int flags __attribute__ ((unused));
+		flags = REQ_FLUSH;
+	])
+
+	ZFS_LINUX_TEST_SRC([req_preflush], [
+		#include <linux/bio.h>
+	],[
+		int flags __attribute__ ((unused));
+		flags = REQ_PREFLUSH;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_FAILFAST_MASK], [
+	AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined])
+	ZFS_LINUX_TEST_RESULT([req_failfast_mask], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([REQ_FAILFAST_MASK])
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_DISCARD], [
+	AC_MSG_CHECKING([whether REQ_DISCARD is defined])
+	ZFS_LINUX_TEST_RESULT([req_discard], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REQ_DISCARD, 1, [REQ_DISCARD is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_FLUSH], [
+	AC_MSG_CHECKING([whether REQ_FLUSH is defined])
+	ZFS_LINUX_TEST_RESULT([req_flush], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REQ_FLUSH, 1, [REQ_FLUSH is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_PREFLUSH], [
+	AC_MSG_CHECKING([whether REQ_PREFLUSH is defined])
+	ZFS_LINUX_TEST_RESULT([req_preflush], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REQ_PREFLUSH, 1, [REQ_PREFLUSH is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 4.8 API,
+dnl #
+dnl # The bio_op() helper was introduced as a replacement for explicitly
+dnl # checking the bio->bi_rw flags.  The following checks are used to
+dnl # detect if a specific operation is supported.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [
+	ZFS_LINUX_TEST_SRC([req_op_discard], [
+		#include <linux/blk_types.h>
+	],[
+		int op __attribute__ ((unused)) = REQ_OP_DISCARD;
+	])
+
+	ZFS_LINUX_TEST_SRC([req_op_secure_erase], [
+		#include <linux/blk_types.h>
+	],[
+		int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE;
+	])
+
+	ZFS_LINUX_TEST_SRC([req_op_flush], [
+		#include <linux/blk_types.h>
+	],[
+		int op __attribute__ ((unused)) = REQ_OP_FLUSH;
+	])
+
+	ZFS_LINUX_TEST_SRC([bio_bi_opf], [
+		#include <linux/bio.h>
+	],[
+		struct bio bio __attribute__ ((unused));
+		bio.bi_opf = 0;
+	])
+
+	ZFS_LINUX_TEST_SRC([bio_set_op_attrs], [
+		#include <linux/bio.h>
+	],[
+		struct bio *bio __attribute__ ((unused)) = NULL;
+		bio_set_op_attrs(bio, 0, 0);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_DISCARD], [
+	AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined])
+	ZFS_LINUX_TEST_RESULT([req_op_discard], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, [REQ_OP_DISCARD is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_SECURE_ERASE], [
+	AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined])
+	ZFS_LINUX_TEST_RESULT([req_op_secure_erase], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REQ_OP_SECURE_ERASE, 1,
+		    [REQ_OP_SECURE_ERASE is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_FLUSH], [
+	AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined])
+	ZFS_LINUX_TEST_RESULT([req_op_flush], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, [REQ_OP_FLUSH is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [
+	AC_MSG_CHECKING([whether bio->bi_opf is defined])
+	ZFS_LINUX_TEST_RESULT([bio_bi_opf], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_OP_ATTRS], [
+	AC_MSG_CHECKING([whether bio_set_op_attrs is available])
+	ZFS_LINUX_TEST_RESULT([bio_set_op_attrs], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BIO_SET_OP_ATTRS, 1,
+		    [bio_set_op_attrs is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 4.14 API,
+dnl #
+dnl # The bio_set_dev() helper macro was introduced as part of the transition
+dnl # to have struct gendisk in struct bio.
+dnl #
+dnl # Linux 5.0 API,
+dnl #
+dnl # The bio_set_dev() helper macro was updated to internally depend on
+dnl # bio_associate_blkg() symbol which is exported GPL-only.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV], [
+	ZFS_LINUX_TEST_SRC([bio_set_dev], [
+		#include <linux/bio.h>
+		#include <linux/fs.h>
+	],[
+		struct block_device *bdev = NULL;
+		struct bio *bio = NULL;
+		bio_set_dev(bio, bdev);
+	], [], [ZFS_META_LICENSE])
+])
+
+dnl #
+dnl # Linux 5.16 API
+dnl #
+dnl # bio_set_dev is no longer a helper macro and is now an inline function,
+dnl # meaning that the function it calls internally can no longer be overridden
+dnl # by our code
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO], [
+	ZFS_LINUX_TEST_SRC([bio_set_dev_macro], [
+		#include <linux/bio.h>
+		#include <linux/fs.h>
+	],[
+		#ifndef bio_set_dev
+		#error Not a macro
+		#endif
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [
+	AC_MSG_CHECKING([whether bio_set_dev() is available])
+	ZFS_LINUX_TEST_RESULT([bio_set_dev], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() is available])
+
+		AC_MSG_CHECKING([whether bio_set_dev() is GPL-only])
+		ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [
+			AC_MSG_RESULT(no)
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1,
+			    [bio_set_dev() GPL-only])
+		])
+
+		AC_MSG_CHECKING([whether bio_set_dev() is a macro])
+		ZFS_LINUX_TEST_RESULT([bio_set_dev_macro], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BIO_SET_DEV_MACRO, 1,
+			    [bio_set_dev() is a macro])
+		],[
+			AC_MSG_RESULT(no)
+		])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 4.3 API change
+dnl # Error argument dropped from bio_endio in favor of newly introduced
+dnl # bio->bi_error. This also replaces bio->bi_flags value BIO_UPTODATE.
+dnl # Introduced by torvalds/linux@4246a0b63bd8f56a1469b12eafeb875b1041a451
+dnl # ("block: add a bi_error field to struct bio").
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [
+	ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [
+		#include <linux/bio.h>
+		void wanted_end_io(struct bio *bio) { return; }
+		bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io;
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [
+	AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg])
+	ZFS_LINUX_TEST_RESULT([bio_end_io_t_args], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_1ARG_BIO_END_IO_T, 1,
+		    [bio_end_io_t wants 1 arg])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 4.13 API change
+dnl # The bio->bi_error field was replaced with bio->bi_status which is an
+dnl # enum which describes all possible error types.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BI_STATUS], [
+	ZFS_LINUX_TEST_SRC([bio_bi_status], [
+		#include <linux/bio.h>
+	], [
+		struct bio bio __attribute__ ((unused));
+		blk_status_t status __attribute__ ((unused)) = BLK_STS_OK;
+		bio.bi_status = status;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [
+	AC_MSG_CHECKING([whether bio->bi_status exists])
+	ZFS_LINUX_TEST_RESULT([bio_bi_status], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BIO_BI_STATUS, 1, [bio->bi_status exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 3.14 API change,
+dnl # Immutable biovecs. A number of fields of struct bio are moved to
+dnl # struct bvec_iter.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER], [
+	ZFS_LINUX_TEST_SRC([bio_bvec_iter], [
+		#include <linux/bio.h>
+	],[
+		struct bio bio;
+		bio.bi_iter.bi_sector = 0;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [
+	AC_MSG_CHECKING([whether bio has bi_iter])
+	ZFS_LINUX_TEST_RESULT([bio_bvec_iter], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BIO_BVEC_ITER, 1, [bio has bi_iter])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 4.8 API change
+dnl # The rw argument has been removed from submit_bio/submit_bio_wait.
+dnl # Callers are now expected to set bio->bi_rw instead of passing it in.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO], [
+	ZFS_LINUX_TEST_SRC([submit_bio], [
+		#include <linux/bio.h>
+	],[
+		struct bio *bio = NULL;
+		(void) submit_bio(bio);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_SUBMIT_BIO], [
+	AC_MSG_CHECKING([whether submit_bio() wants 1 arg])
+	ZFS_LINUX_TEST_RESULT([submit_bio], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_1ARG_SUBMIT_BIO, 1, [submit_bio() wants 1 arg])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.34 API change
+dnl # current->bio_list
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_CURRENT_BIO_LIST], [
+	ZFS_LINUX_TEST_SRC([current_bio_list], [
+		#include <linux/sched.h>
+	], [
+		current->bio_list = (struct bio_list *) NULL;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_CURRENT_BIO_LIST], [
+	AC_MSG_CHECKING([whether current->bio_list exists])
+	ZFS_LINUX_TEST_RESULT([current_bio_list], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([bio_list])
+	])
+])
+
+dnl #
+dnl # Linux 5.5 API,
+dnl #
+dnl # The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by
+dnl # blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched().
+dnl # As a side effect the function was converted to GPL-only.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKG_TRYGET], [
+	ZFS_LINUX_TEST_SRC([blkg_tryget], [
+		#include <linux/blk-cgroup.h>
+		#include <linux/bio.h>
+		#include <linux/fs.h>
+	],[
+		struct blkcg_gq blkg __attribute__ ((unused)) = {};
+		bool rc __attribute__ ((unused));
+		rc = blkg_tryget(&blkg);
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKG_TRYGET], [
+	AC_MSG_CHECKING([whether blkg_tryget() is available])
+	ZFS_LINUX_TEST_RESULT([blkg_tryget], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLKG_TRYGET, 1, [blkg_tryget() is available])
+
+		AC_MSG_CHECKING([whether blkg_tryget() is GPL-only])
+		ZFS_LINUX_TEST_RESULT([blkg_tryget_license], [
+			AC_MSG_RESULT(no)
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLKG_TRYGET_GPL_ONLY, 1,
+			    [blkg_tryget() GPL-only])
+		])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 5.12 API,
+dnl #
+dnl # The Linux 5.12 kernel updated struct bio to create a new bi_bdev member
+dnl # and bio->bi_disk was moved to bio->bi_bdev->bd_disk
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BDEV_DISK], [
+	ZFS_LINUX_TEST_SRC([bio_bdev_disk], [
+		#include <linux/blk_types.h>
+		#include <linux/blkdev.h>
+	],[
+		struct bio *b = NULL;
+		struct gendisk *d = b->bi_bdev->bd_disk;
+		blk_register_queue(d);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_BDEV_DISK], [
+	AC_MSG_CHECKING([whether bio->bi_bdev->bd_disk exists])
+	ZFS_LINUX_TEST_RESULT([bio_bdev_disk], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BIO_BDEV_DISK, 1, [bio->bi_bdev->bd_disk exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 5.16 API
+dnl #
+dnl # The Linux 5.16 API for submit_bio changed the return type to be
+dnl # void instead of int
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID], [
+	ZFS_LINUX_TEST_SRC([bio_bdev_submit_bio_void], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device_operations *bdev = NULL;
+		__attribute__((unused)) void(*f)(struct bio *) = bdev->submit_bio;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID], [
+	AC_MSG_CHECKING(
+		[whether block_device_operations->submit_bio() returns void])
+	ZFS_LINUX_TEST_RESULT([bio_bdev_submit_bio_void], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID, 1,
+			[block_device_operations->submit_bio() returns void])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 5.16 API
+dnl #
+dnl # The Linux 5.16 API moved struct blkcg_gq into linux/blk-cgroup.h, which
+dnl # has been around since 2015. This test looks for the presence of that
+dnl # header, so that it can be conditionally included where it exists, but
+dnl # still be backward compatible with kernels that pre-date its introduction.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER], [
+	ZFS_LINUX_TEST_SRC([blk_cgroup_header], [
+		#include <linux/blk-cgroup.h>
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_CGROUP_HEADER], [
+	AC_MSG_CHECKING([whether linux/blk-cgroup.h exists])
+	ZFS_LINUX_TEST_RESULT([blk_cgroup_header],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_LINUX_BLK_CGROUP_HEADER, 1,
+			[linux/blk-cgroup.h exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 5.18 API
+dnl #
+dnl # In 07888c665b405b1cd3577ddebfeb74f4717a84c4 ("block: pass a block_device and opf to bio_alloc")
+dnl #   bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+dnl # became
+dnl #   bio_alloc(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask)
+dnl # however
+dnl # > NULL/0 can be passed, both for the
+dnl # > passthrough case on a raw request_queue and to temporarily avoid
+dnl # > refactoring some nasty code.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG], [
+	ZFS_LINUX_TEST_SRC([bio_alloc_4arg], [
+		#include <linux/bio.h>
+	],[
+		gfp_t gfp_mask = 0;
+		unsigned short nr_iovecs = 0;
+		struct block_device *bdev = NULL;
+		unsigned int opf = 0;
+
+		struct bio *__attribute__((unused)) allocated = bio_alloc(bdev, nr_iovecs, opf, gfp_mask);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_ALLOC_4ARG], [
+	AC_MSG_CHECKING([whether bio_alloc() wants 4 args])
+	ZFS_LINUX_TEST_RESULT([bio_alloc_4arg],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE([HAVE_BIO_ALLOC_4ARG], 1, [bio_alloc() takes 4 arguments])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [
+	ZFS_AC_KERNEL_SRC_REQ
+	ZFS_AC_KERNEL_SRC_BIO_OPS
+	ZFS_AC_KERNEL_SRC_BIO_SET_DEV
+	ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS
+	ZFS_AC_KERNEL_SRC_BIO_BI_STATUS
+	ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER
+	ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO
+	ZFS_AC_KERNEL_SRC_BIO_CURRENT_BIO_LIST
+	ZFS_AC_KERNEL_SRC_BLKG_TRYGET
+	ZFS_AC_KERNEL_SRC_BIO_BDEV_DISK
+	ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID
+	ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO
+	ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER
+	ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO], [
+	ZFS_AC_KERNEL_BIO_REQ_FAILFAST_MASK
+	ZFS_AC_KERNEL_BIO_REQ_DISCARD
+	ZFS_AC_KERNEL_BIO_REQ_FLUSH
+	ZFS_AC_KERNEL_BIO_REQ_PREFLUSH
+
+	ZFS_AC_KERNEL_BIO_REQ_OP_DISCARD
+	ZFS_AC_KERNEL_BIO_REQ_OP_SECURE_ERASE
+	ZFS_AC_KERNEL_BIO_REQ_OP_FLUSH
+	ZFS_AC_KERNEL_BIO_BI_OPF
+	ZFS_AC_KERNEL_BIO_SET_OP_ATTRS
+
+	ZFS_AC_KERNEL_BIO_SET_DEV
+	ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
+	ZFS_AC_KERNEL_BIO_BI_STATUS
+	ZFS_AC_KERNEL_BIO_BVEC_ITER
+	ZFS_AC_KERNEL_BIO_SUBMIT_BIO
+	ZFS_AC_KERNEL_BIO_CURRENT_BIO_LIST
+	ZFS_AC_KERNEL_BLKG_TRYGET
+	ZFS_AC_KERNEL_BIO_BDEV_DISK
+	ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID
+	ZFS_AC_KERNEL_BLK_CGROUP_HEADER
+	ZFS_AC_KERNEL_BIO_ALLOC_4ARG
+])

diff --git a/zfs/config/kernel-bio_max_segs.m4 b/zfs/config/kernel-bio_max_segs.m4
new file mode 100644
index 0000000..a90d754
--- /dev/null
+++ b/zfs/config/kernel-bio_max_segs.m4

@@ -0,0 +1,23 @@
+dnl #
+dnl # 5.12 API change removes BIO_MAX_PAGES in favor of bio_max_segs()
+dnl # which will handle the logic of setting the upper-bound to a
+dnl # BIO_MAX_PAGES, internally.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS], [
+	ZFS_LINUX_TEST_SRC([bio_max_segs], [
+		#include <linux/bio.h>
+	],[
+		bio_max_segs(1);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_MAX_SEGS], [
+	AC_MSG_CHECKING([whether bio_max_segs() exists])
+	ZFS_LINUX_TEST_RESULT([bio_max_segs], [
+		AC_MSG_RESULT(yes)
+
+		AC_DEFINE([HAVE_BIO_MAX_SEGS], 1, [bio_max_segs() is implemented])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-bio_set_dev.m4 b/zfs/config/kernel-bio_set_dev.m4
deleted file mode 100644
index b8e13f3..0000000
--- a/zfs/config/kernel-bio_set_dev.m4
+++ /dev/null

@@ -1,40 +0,0 @@
-dnl #
-dnl # Linux 4.14 API,
-dnl #
-dnl # The bio_set_dev() helper macro was introduced as part of the transition
-dnl # to have struct gendisk in struct bio. 
-dnl #
-dnl # Linux 5.0 API,
-dnl #
-dnl # The bio_set_dev() helper macro was updated to internally depend on
-dnl # bio_associate_blkg() symbol which is exported GPL-only.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV], [
-	ZFS_LINUX_TEST_SRC([bio_set_dev], [
-		#include <linux/bio.h>
-		#include <linux/fs.h>
-	],[
-		struct block_device *bdev = NULL;
-		struct bio *bio = NULL;
-		bio_set_dev(bio, bdev);
-	], [], [$ZFS_META_LICENSE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [
-	AC_MSG_CHECKING([whether bio_set_dev() is available])
-	ZFS_LINUX_TEST_RESULT([bio_set_dev], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() is available])
-
-		AC_MSG_CHECKING([whether bio_set_dev() is GPL-only])
-		ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [
-			AC_MSG_RESULT(no)
-		],[
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1,
-			    [bio_set_dev() GPL-only])
-		])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blk-queue-bdi.m4 b/zfs/config/kernel-blk-queue-bdi.m4
deleted file mode 100644
index 28241c4..0000000
--- a/zfs/config/kernel-blk-queue-bdi.m4
+++ /dev/null

@@ -1,24 +0,0 @@
-dnl #
-dnl # 2.6.32 - 4.11, statically allocated bdi in request_queue
-dnl # 4.12 - x.y, dynamically allocated bdi in request_queue
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI], [
-	ZFS_LINUX_TEST_SRC([blk_queue_bdi], [
-		#include <linux/blkdev.h>
-	],[
-		struct request_queue q;
-		struct backing_dev_info bdi;
-		q.backing_dev_info = &bdi;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [
-	AC_MSG_CHECKING([whether blk_queue bdi is dynamic])
-	ZFS_LINUX_TEST_RESULT([blk_queue_bdi], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1,
-		    [blk queue backing_dev_info is dynamic])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blk-queue-discard.m4 b/zfs/config/kernel-blk-queue-discard.m4
deleted file mode 100644
index 85a2935..0000000
--- a/zfs/config/kernel-blk-queue-discard.m4
+++ /dev/null

@@ -1,72 +0,0 @@
-dnl #
-dnl # 2.6.32 - 4.x API,
-dnl #   blk_queue_discard()
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [
-	ZFS_LINUX_TEST_SRC([blk_queue_discard], [
-		#include <linux/blkdev.h>
-	],[
-		struct request_queue *q __attribute__ ((unused)) = NULL;
-		int value __attribute__ ((unused));
-		value = blk_queue_discard(q);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [
-	AC_MSG_CHECKING([whether blk_queue_discard() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_discard], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1,
-		    [blk_queue_discard() is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 4.8 - 4.x API,
-dnl #   blk_queue_secure_erase()
-dnl #
-dnl # 2.6.36 - 4.7 API,
-dnl #   blk_queue_secdiscard()
-dnl #
-dnl # 2.6.x - 2.6.35 API,
-dnl #   Unsupported by kernel
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [
-	ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [
-		#include <linux/blkdev.h>
-	],[
-		struct request_queue *q __attribute__ ((unused)) = NULL;
-		int value __attribute__ ((unused));
-		value = blk_queue_secure_erase(q);
-	])
-
-	ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [
-		#include <linux/blkdev.h>
-	],[
-		struct request_queue *q __attribute__ ((unused)) = NULL;
-		int value __attribute__ ((unused));
-		value = blk_queue_secdiscard(q);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [
-	AC_MSG_CHECKING([whether blk_queue_secure_erase() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1,
-		    [blk_queue_secure_erase() is available])
-	],[
-		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether blk_queue_secdiscard() is available])
-		ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1,
-			    [blk_queue_secdiscard() is available])
-		],[
-			AC_MSG_RESULT(no)
-		])
-	])
-])

diff --git a/zfs/config/kernel-blk-queue-flags.m4 b/zfs/config/kernel-blk-queue-flags.m4
deleted file mode 100644
index 9d4dfc1..0000000
--- a/zfs/config/kernel-blk-queue-flags.m4
+++ /dev/null

@@ -1,56 +0,0 @@
-dnl #
-dnl # API change
-dnl # https://github.com/torvalds/linux/commit/8814ce8
-dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [
-	ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [
-		#include <linux/kernel.h>
-		#include <linux/blkdev.h>
-	],[
-		struct request_queue *q = NULL;
-		blk_queue_flag_set(0, q);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [
-	AC_MSG_CHECKING([whether blk_queue_flag_set() exists])
-	ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1,
-		    [blk_queue_flag_set() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [
-	ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [
-		#include <linux/kernel.h>
-		#include <linux/blkdev.h>
-	],[
-		struct request_queue *q = NULL;
-		blk_queue_flag_clear(0, q);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [
-	AC_MSG_CHECKING([whether blk_queue_flag_clear() exists])
-	ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1,
-		    [blk_queue_flag_clear() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAGS], [
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAGS], [
-	ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET
-	ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR
-])

diff --git a/zfs/config/kernel-blk-queue-flush.m4 b/zfs/config/kernel-blk-queue-flush.m4
deleted file mode 100644
index b546d94..0000000
--- a/zfs/config/kernel-blk-queue-flush.m4
+++ /dev/null

@@ -1,69 +0,0 @@
-dnl #
-dnl # 2.6.36 API change
-dnl # In 2.6.36 kernels the blk_queue_ordered() interface has been
-dnl # replaced by the simpler blk_queue_flush().  However, while the
-dnl # old interface was available to all the new one is GPL-only.
-dnl # Thus in addition to detecting if this function is available
-dnl # we determine if it is GPL-only.  If the GPL-only interface is
-dnl # there we implement our own compatibility function, otherwise
-dnl # we use the function.  The hope is that long term this function
-dnl # will be opened up.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [
-	ZFS_LINUX_TEST_SRC([blk_queue_flush], [
-		#include <linux/blkdev.h>
-	], [
-		struct request_queue *q = NULL;
-		(void) blk_queue_flush(q, REQ_FLUSH);
-	], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE])
-
-	ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [
-		#include <linux/kernel.h>
-		#include <linux/blkdev.h>
-	], [
-		struct request_queue *q = NULL;
-		blk_queue_write_cache(q, true, true);
-	], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [
-	AC_MSG_CHECKING([whether blk_queue_flush() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_flush], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1,
-		    [blk_queue_flush() is available])
-
-		AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only])
-		ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [
-			AC_MSG_RESULT(no)
-		],[
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1,
-			    [blk_queue_flush() is GPL-only])
-		])
-	],[
-		AC_MSG_RESULT(no)
-	])
-
-	dnl #
-	dnl # 4.7 API change
-	dnl # Replace blk_queue_flush with blk_queue_write_cache
-	dnl #
-	AC_MSG_CHECKING([whether blk_queue_write_cache() exists])
-	ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1,
-		    [blk_queue_write_cache() exists])
-
-		AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only])
-		ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [
-			AC_MSG_RESULT(no)
-		],[
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1,
-			    [blk_queue_write_cache() is GPL-only])
-		])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blk-queue-max-hw-sectors.m4 b/zfs/config/kernel-blk-queue-max-hw-sectors.m4
deleted file mode 100644
index 7387f84..0000000
--- a/zfs/config/kernel-blk-queue-max-hw-sectors.m4
+++ /dev/null

@@ -1,23 +0,0 @@
-dnl #
-dnl # 2.6.34 API change
-dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors().
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS], [
-	ZFS_LINUX_TEST_SRC([blk_queue_max_hw_sectors], [
-		#include <linux/blkdev.h>
-	], [
-		struct request_queue *q = NULL;
-		(void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
-	], [$NO_UNUSED_BUT_SET_VARIABLE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [
-	AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_max_hw_sectors], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_MAX_HW_SECTORS, 1,
-		    [blk_queue_max_hw_sectors() is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blk-queue-max-segments.m4 b/zfs/config/kernel-blk-queue-max-segments.m4
deleted file mode 100644
index 1e4092d..0000000
--- a/zfs/config/kernel-blk-queue-max-segments.m4
+++ /dev/null

@@ -1,24 +0,0 @@
-dnl #
-dnl # 2.6.34 API change
-dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments()
-dnl # and blk_queue_max_phys_segments().
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS], [
-	ZFS_LINUX_TEST_SRC([blk_queue_max_segments], [
-		#include <linux/blkdev.h>
-	], [
-		struct request_queue *q = NULL;
-		(void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
-	], [$NO_UNUSED_BUT_SET_VARIABLE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
-	AC_MSG_CHECKING([whether blk_queue_max_segments() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_max_segments], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1,
-		    [blk_queue_max_segments() is available])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blk-queue-unplug.m4 b/zfs/config/kernel-blk-queue-unplug.m4
deleted file mode 100644
index f5d1814..0000000
--- a/zfs/config/kernel-blk-queue-unplug.m4
+++ /dev/null

@@ -1,54 +0,0 @@
-dnl #
-dnl # 2.6.32-2.6.35 API - The BIO_RW_UNPLUG enum can be used as a hint
-dnl # to unplug the queue.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_RW_UNPLUG], [
-	ZFS_LINUX_TEST_SRC([blk_queue_bio_rw_unplug], [
-		#include <linux/blkdev.h>
-	],[
-		enum bio_rw_flags rw __attribute__ ((unused)) = BIO_RW_UNPLUG;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_UNPLUG], [
-	AC_MSG_CHECKING([whether the BIO_RW_UNPLUG enum is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_bio_rw_unplug], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG, 1,
-		    [BIO_RW_UNPLUG is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_PLUG], [
-	ZFS_LINUX_TEST_SRC([blk_plug], [
-		#include <linux/blkdev.h>
-	],[
-		struct blk_plug plug __attribute__ ((unused));
-
-		blk_start_plug(&plug);
-		blk_finish_plug(&plug);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_PLUG], [
-	AC_MSG_CHECKING([whether struct blk_plug is available])
-	ZFS_LINUX_TEST_RESULT([blk_plug], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BLK_PLUG, 1,
-		    [struct blk_plug is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG], [
-	ZFS_AC_KERNEL_SRC_BIO_RW_UNPLUG
-	ZFS_AC_KERNEL_SRC_BLK_PLUG
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PLUG], [
-	ZFS_AC_KERNEL_BIO_RW_UNPLUG
-	ZFS_AC_KERNEL_BLK_PLUG
-])

diff --git a/zfs/config/kernel-blk-queue.m4 b/zfs/config/kernel-blk-queue.m4
new file mode 100644
index 0000000..6f42b98
--- /dev/null
+++ b/zfs/config/kernel-blk-queue.m4

@@ -0,0 +1,386 @@
+dnl #
+dnl # 2.6.39 API change,
+dnl # blk_start_plug() and blk_finish_plug()
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG], [
+	ZFS_LINUX_TEST_SRC([blk_plug], [
+		#include <linux/blkdev.h>
+	],[
+		struct blk_plug plug __attribute__ ((unused));
+
+		blk_start_plug(&plug);
+		blk_finish_plug(&plug);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PLUG], [
+	AC_MSG_CHECKING([whether struct blk_plug is available])
+	ZFS_LINUX_TEST_RESULT([blk_plug], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([blk_plug])
+	])
+])
+
+dnl #
+dnl # 2.6.32 - 4.11: statically allocated bdi in request_queue
+dnl # 4.12: dynamically allocated bdi in request_queue
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI], [
+	ZFS_LINUX_TEST_SRC([blk_queue_bdi], [
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue q;
+		struct backing_dev_info bdi;
+		q.backing_dev_info = &bdi;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [
+	AC_MSG_CHECKING([whether blk_queue bdi is dynamic])
+	ZFS_LINUX_TEST_RESULT([blk_queue_bdi], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1,
+		    [blk queue backing_dev_info is dynamic])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 5.9: added blk_queue_update_readahead(),
+dnl # 5.15: renamed to disk_update_readahead()
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD], [
+	ZFS_LINUX_TEST_SRC([blk_queue_update_readahead], [
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue q;
+		blk_queue_update_readahead(&q);
+	])
+
+	ZFS_LINUX_TEST_SRC([disk_update_readahead], [
+		#include <linux/blkdev.h>
+	],[
+		struct gendisk disk;
+		disk_update_readahead(&disk);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD], [
+	AC_MSG_CHECKING([whether blk_queue_update_readahead() exists])
+	ZFS_LINUX_TEST_RESULT([blk_queue_update_readahead], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_UPDATE_READAHEAD, 1,
+		    [blk_queue_update_readahead() exists])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether disk_update_readahead() exists])
+		ZFS_LINUX_TEST_RESULT([disk_update_readahead], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_DISK_UPDATE_READAHEAD, 1,
+			    [disk_update_readahead() exists])
+		],[
+			AC_MSG_RESULT(no)
+		])
+	])
+])
+
+dnl #
+dnl # 5.19: bdev_max_discard_sectors() available
+dnl # 2.6.32: blk_queue_discard() available
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [
+	ZFS_LINUX_TEST_SRC([bdev_max_discard_sectors], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		unsigned int error __attribute__ ((unused));
+
+		error = bdev_max_discard_sectors(bdev);
+	])
+
+	ZFS_LINUX_TEST_SRC([blk_queue_discard], [
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue r;
+		struct request_queue *q = &r;
+		int value __attribute__ ((unused));
+		memset(q, 0, sizeof(r));
+		value = blk_queue_discard(q);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [
+	AC_MSG_CHECKING([whether bdev_max_discard_sectors() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_max_discard_sectors], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_MAX_DISCARD_SECTORS, 1,
+		    [bdev_max_discard_sectors() is available])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether blk_queue_discard() is available])
+		ZFS_LINUX_TEST_RESULT([blk_queue_discard], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1,
+			    [blk_queue_discard() is available])
+		],[
+			ZFS_LINUX_TEST_ERROR([blk_queue_discard])
+		])
+	])
+])
+
+dnl #
+dnl # 5.19: bdev_max_secure_erase_sectors() available
+dnl # 4.8: blk_queue_secure_erase() available
+dnl # 2.6.36: blk_queue_secdiscard() available
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [
+	ZFS_LINUX_TEST_SRC([bdev_max_secure_erase_sectors], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		unsigned int error __attribute__ ((unused));
+
+		error = bdev_max_secure_erase_sectors(bdev);
+	])
+
+	ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue r;
+		struct request_queue *q = &r;
+		int value __attribute__ ((unused));
+		memset(q, 0, sizeof(r));
+		value = blk_queue_secure_erase(q);
+	])
+
+	ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue r;
+		struct request_queue *q = &r;
+		int value __attribute__ ((unused));
+		memset(q, 0, sizeof(r));
+		value = blk_queue_secdiscard(q);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [
+	AC_MSG_CHECKING([whether bdev_max_secure_erase_sectors() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_max_secure_erase_sectors], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS, 1,
+		    [bdev_max_secure_erase_sectors() is available])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether blk_queue_secure_erase() is available])
+		ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1,
+			    [blk_queue_secure_erase() is available])
+		],[
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether blk_queue_secdiscard() is available])
+			ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [
+				AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1,
+				    [blk_queue_secdiscard() is available])
+			],[
+				ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase])
+			])
+		])
+	])
+])
+
+dnl #
+dnl # 4.16 API change,
+dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [
+	ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [
+		#include <linux/kernel.h>
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue *q = NULL;
+		blk_queue_flag_set(0, q);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [
+	AC_MSG_CHECKING([whether blk_queue_flag_set() exists])
+	ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1,
+		    [blk_queue_flag_set() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [
+	ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [
+		#include <linux/kernel.h>
+		#include <linux/blkdev.h>
+	],[
+		struct request_queue *q = NULL;
+		blk_queue_flag_clear(0, q);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [
+	AC_MSG_CHECKING([whether blk_queue_flag_clear() exists])
+	ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1,
+		    [blk_queue_flag_clear() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.36 API change,
+dnl # Added blk_queue_flush() interface, while the previous interface
+dnl # was available to all the new one is GPL-only.  Thus in addition to
+dnl # detecting if this function is available we determine if it is
+dnl # GPL-only.  If the GPL-only interface is there we implement our own
+dnl # compatibility function, otherwise we use the function.  The hope
+dnl # is that long term this function will be opened up.
+dnl #
+dnl # 4.7 API change,
+dnl # Replace blk_queue_flush with blk_queue_write_cache
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [
+	ZFS_LINUX_TEST_SRC([blk_queue_flush], [
+		#include <linux/blkdev.h>
+	], [
+		struct request_queue *q __attribute__ ((unused)) = NULL;
+		(void) blk_queue_flush(q, REQ_FLUSH);
+	], [], [ZFS_META_LICENSE])
+
+	ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [
+		#include <linux/kernel.h>
+		#include <linux/blkdev.h>
+	], [
+		struct request_queue *q __attribute__ ((unused)) = NULL;
+		blk_queue_write_cache(q, true, true);
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [
+	AC_MSG_CHECKING([whether blk_queue_flush() is available])
+	ZFS_LINUX_TEST_RESULT([blk_queue_flush], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1,
+		    [blk_queue_flush() is available])
+
+		AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only])
+		ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [
+			AC_MSG_RESULT(no)
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1,
+			    [blk_queue_flush() is GPL-only])
+		])
+	],[
+		AC_MSG_RESULT(no)
+	])
+
+	dnl #
+	dnl # 4.7 API change
+	dnl # Replace blk_queue_flush with blk_queue_write_cache
+	dnl #
+	AC_MSG_CHECKING([whether blk_queue_write_cache() exists])
+	ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1,
+		    [blk_queue_write_cache() exists])
+
+		AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only])
+		ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [
+			AC_MSG_RESULT(no)
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1,
+			    [blk_queue_write_cache() is GPL-only])
+		])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.34 API change
+dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors().
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS], [
+	ZFS_LINUX_TEST_SRC([blk_queue_max_hw_sectors], [
+		#include <linux/blkdev.h>
+	], [
+		struct request_queue *q __attribute__ ((unused)) = NULL;
+		(void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [
+	AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available])
+	ZFS_LINUX_TEST_RESULT([blk_queue_max_hw_sectors], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([blk_queue_max_hw_sectors])
+	])
+])
+
+dnl #
+dnl # 2.6.34 API change
+dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments()
+dnl # and blk_queue_max_phys_segments().
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS], [
+	ZFS_LINUX_TEST_SRC([blk_queue_max_segments], [
+		#include <linux/blkdev.h>
+	], [
+		struct request_queue *q __attribute__ ((unused)) = NULL;
+		(void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
+	AC_MSG_CHECKING([whether blk_queue_max_segments() is available])
+	ZFS_LINUX_TEST_RESULT([blk_queue_max_segments], [
+		AC_MSG_RESULT(yes)
+	], [
+		ZFS_LINUX_TEST_ERROR([blk_queue_max_segments])
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
+	ZFS_AC_KERNEL_BLK_QUEUE_PLUG
+	ZFS_AC_KERNEL_BLK_QUEUE_BDI
+	ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD
+	ZFS_AC_KERNEL_BLK_QUEUE_DISCARD
+	ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE
+	ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET
+	ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR
+	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
+	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
+	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
+])

diff --git a/zfs/config/kernel-blkdev-change.m4 b/zfs/config/kernel-blkdev-change.m4
deleted file mode 100644
index acaf12b..0000000
--- a/zfs/config/kernel-blkdev-change.m4
+++ /dev/null

@@ -1,62 +0,0 @@
-dnl #
-dnl # check_disk_change() was removed in 5.10
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE], [
-	ZFS_LINUX_TEST_SRC([check_disk_change], [
-		#include <linux/fs.h>
-		#include <linux/blkdev.h>
-	], [
-		struct block_device *bdev = NULL;
-		bool error;
-
-		error = check_disk_change(bdev);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
-	AC_MSG_CHECKING([whether check_disk_change() exists])
-	ZFS_LINUX_TEST_RESULT([check_disk_change], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CHECK_DISK_CHANGE, 1,
-		    [check_disk_change() exists])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 5.10 API, check_disk_change() is removed, in favor of
-dnl # bdev_check_media_change(), which doesn't force revalidation
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE], [
-	ZFS_LINUX_TEST_SRC([bdev_check_media_change], [
-		#include <linux/fs.h>
-		#include <linux/blkdev.h>
-	], [
-		struct block_device *bdev = NULL;
-		int error;
-
-		error = bdev_check_media_change(bdev);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE], [
-	AC_MSG_CHECKING([whether bdev_disk_changed() exists])
-	ZFS_LINUX_TEST_RESULT([bdev_check_media_change], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BDEV_CHECK_MEDIA_CHANGE, 1,
-		    [bdev_check_media_change() exists])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_CHANGE], [
-	ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE
-	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHANGE], [
-	ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE
-	ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
-])

diff --git a/zfs/config/kernel-blkdev-get-by-path.m4 b/zfs/config/kernel-blkdev-get-by-path.m4
deleted file mode 100644
index 62e4f5b..0000000
--- a/zfs/config/kernel-blkdev-get-by-path.m4
+++ /dev/null

@@ -1,25 +0,0 @@
-dnl #
-dnl # 2.6.38 API change
-dnl # open_bdev_exclusive() changed to blkdev_get_by_path()
-dnl # close_bdev_exclusive() changed to blkdev_put()
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [
-	ZFS_LINUX_TEST_SRC([blkdev_get_by_path], [
-		#include <linux/fs.h>
-		#include <linux/blkdev.h>
-	], [
-		blkdev_get_by_path(NULL, 0, NULL);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
-	AC_MSG_CHECKING([whether blkdev_get_by_path() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([blkdev_get_by_path],
-	    [blkdev_get_by_path], [fs/block_dev.c], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH, 1,
-		    [blkdev_get_by_path() is available])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blkdev-reread-part.m4 b/zfs/config/kernel-blkdev-reread-part.m4
deleted file mode 100644
index 011eeae..0000000
--- a/zfs/config/kernel-blkdev-reread-part.m4
+++ /dev/null

@@ -1,26 +0,0 @@
-dnl #
-dnl # 4.1 API, exported blkdev_reread_part() symbol, backported to the
-dnl # 3.10.0 CentOS 7.x enterprise kernels.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART], [
-	ZFS_LINUX_TEST_SRC([blkdev_reread_part], [
-		#include <linux/fs.h>
-		#include <linux/blkdev.h>
-	], [
-		struct block_device *bdev = NULL;
-		int error;
-
-		error = blkdev_reread_part(bdev);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [
-	AC_MSG_CHECKING([whether blkdev_reread_part() is available])
-	ZFS_LINUX_TEST_RESULT([blkdev_reread_part], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLKDEV_REREAD_PART, 1,
-		    [blkdev_reread_part() is available])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-blkdev.m4 b/zfs/config/kernel-blkdev.m4
new file mode 100644
index 0000000..e04a2bd
--- /dev/null
+++ b/zfs/config/kernel-blkdev.m4

@@ -0,0 +1,611 @@
+dnl #
+dnl # 2.6.38 API change,
+dnl # Added blkdev_get_by_path()
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [
+	ZFS_LINUX_TEST_SRC([blkdev_get_by_path], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		const char *path = "path";
+		fmode_t mode = 0;
+		void *holder = NULL;
+
+		bdev = blkdev_get_by_path(path, mode, holder);
+	])
+])
+
+dnl #
+dnl # 6.5.x API change,
+dnl # blkdev_get_by_path() takes 4 args
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [
+	ZFS_LINUX_TEST_SRC([blkdev_get_by_path_4arg], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		const char *path = "path";
+		fmode_t mode = 0;
+		void *holder = NULL;
+		struct blk_holder_ops h;
+
+		bdev = blkdev_get_by_path(path, mode, holder, &h);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
+	AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
+	ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
+		AC_MSG_RESULT(yes)
+	], [
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 4 args])
+		ZFS_LINUX_TEST_RESULT([blkdev_get_by_path_4arg], [
+			AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH_4ARG, 1,
+				[blkdev_get_by_path() exists and takes 4 args])
+			AC_MSG_RESULT(yes)
+		], [
+			ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
+		])
+	])
+])
+
+dnl #
+dnl # 6.5.x API change
+dnl # blk_mode_t was added as a type to supercede some places where fmode_t
+dnl # is used
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T], [
+	ZFS_LINUX_TEST_SRC([blk_mode_t], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		blk_mode_t m __attribute((unused)) = (blk_mode_t)0;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
+	AC_MSG_CHECKING([whether blk_mode_t is defined])
+	ZFS_LINUX_TEST_RESULT([blk_mode_t], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_MODE_T, 1, [blk_mode_t is defined])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.38 API change,
+dnl # Added blkdev_put()
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT], [
+	ZFS_LINUX_TEST_SRC([blkdev_put], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		fmode_t mode = 0;
+
+		blkdev_put(bdev, mode);
+	])
+])
+
+dnl #
+dnl # 6.5.x API change.
+dnl # blkdev_put() takes (void* holder) as arg 2
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER], [
+	ZFS_LINUX_TEST_SRC([blkdev_put_holder], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		void *holder = NULL;
+
+		blkdev_put(bdev, holder);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
+	AC_MSG_CHECKING([whether blkdev_put() exists])
+	ZFS_LINUX_TEST_RESULT([blkdev_put], [
+		AC_MSG_RESULT(yes)
+	], [
+		AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
+		ZFS_LINUX_TEST_RESULT([blkdev_put_holder], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLKDEV_PUT_HOLDER, 1,
+				[blkdev_put() accepts void* as arg 2])
+		], [
+			ZFS_LINUX_TEST_ERROR([blkdev_put()])
+		])
+	])
+])
+
+dnl #
+dnl # 4.1 API, exported blkdev_reread_part() symbol, back ported to the
+dnl # 3.10.0 CentOS 7.x enterprise kernels.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART], [
+	ZFS_LINUX_TEST_SRC([blkdev_reread_part], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		int error;
+
+		error = blkdev_reread_part(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [
+	AC_MSG_CHECKING([whether blkdev_reread_part() exists])
+	ZFS_LINUX_TEST_RESULT([blkdev_reread_part], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLKDEV_REREAD_PART, 1,
+		    [blkdev_reread_part() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # check_disk_change() was removed in 5.10
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE], [
+	ZFS_LINUX_TEST_SRC([check_disk_change], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		bool error;
+
+		error = check_disk_change(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
+	AC_MSG_CHECKING([whether check_disk_change() exists])
+	ZFS_LINUX_TEST_RESULT([check_disk_change], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_CHECK_DISK_CHANGE, 1,
+		    [check_disk_change() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 6.5.x API change
+dnl # disk_check_media_change() was added
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
+	ZFS_LINUX_TEST_SRC([disk_check_media_change], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		bool error;
+
+		error = disk_check_media_change(bdev->bd_disk);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
+	AC_MSG_CHECKING([whether disk_check_media_change() exists])
+	ZFS_LINUX_TEST_RESULT([disk_check_media_change], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_DISK_CHECK_MEDIA_CHANGE, 1,
+		    [disk_check_media_change() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # bdev_kobj() is introduced from 5.12
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ], [
+	ZFS_LINUX_TEST_SRC([bdev_kobj], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+		#include <linux/kobject.h>
+	], [
+		struct block_device *bdev = NULL;
+		struct kobject *disk_kobj;
+		disk_kobj = bdev_kobj(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ], [
+	AC_MSG_CHECKING([whether bdev_kobj() exists])
+	ZFS_LINUX_TEST_RESULT([bdev_kobj], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_KOBJ, 1,
+		    [bdev_kobj() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # part_to_dev() was removed in 5.12
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV], [
+	ZFS_LINUX_TEST_SRC([part_to_dev], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct hd_struct *p = NULL;
+		struct device *pdev;
+		pdev = part_to_dev(p);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV], [
+	AC_MSG_CHECKING([whether part_to_dev() exists])
+	ZFS_LINUX_TEST_RESULT([part_to_dev], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_PART_TO_DEV, 1,
+		    [part_to_dev() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 5.10 API, check_disk_change() is removed, in favor of
+dnl # bdev_check_media_change(), which doesn't force revalidation
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE], [
+	ZFS_LINUX_TEST_SRC([bdev_check_media_change], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		int error;
+
+		error = bdev_check_media_change(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE], [
+	AC_MSG_CHECKING([whether bdev_check_media_change() exists])
+	ZFS_LINUX_TEST_RESULT([bdev_check_media_change], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_CHECK_MEDIA_CHANGE, 1,
+		    [bdev_check_media_change() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.22 API change
+dnl # Single argument invalidate_bdev()
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV], [
+	ZFS_LINUX_TEST_SRC([invalidate_bdev], [
+		#include <linux/buffer_head.h>
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev = NULL;
+		invalidate_bdev(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_INVALIDATE_BDEV], [
+	AC_MSG_CHECKING([whether invalidate_bdev() exists])
+	ZFS_LINUX_TEST_RESULT([invalidate_bdev], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([invalidate_bdev()])
+	])
+])
+
+dnl #
+dnl # 5.11 API, lookup_bdev() takes dev_t argument.
+dnl # 2.6.27 API, lookup_bdev() was first exported.
+dnl # 4.4.0-6.21 API, lookup_bdev() on Ubuntu takes mode argument.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV], [
+	ZFS_LINUX_TEST_SRC([lookup_bdev_devt], [
+		#include <linux/blkdev.h>
+	], [
+		int error __attribute__ ((unused));
+		const char path[] = "/example/path";
+		dev_t dev;
+
+		error = lookup_bdev(path, &dev);
+	])
+
+	ZFS_LINUX_TEST_SRC([lookup_bdev_1arg], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused));
+		const char path[] = "/example/path";
+
+		bdev = lookup_bdev(path);
+	])
+
+	ZFS_LINUX_TEST_SRC([lookup_bdev_mode], [
+		#include <linux/fs.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused));
+		const char path[] = "/example/path";
+
+		bdev = lookup_bdev(path, FMODE_READ);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV], [
+	AC_MSG_CHECKING([whether lookup_bdev() wants dev_t arg])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_devt],
+	    [lookup_bdev], [fs/block_dev.c], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_DEVT_LOOKUP_BDEV, 1,
+		    [lookup_bdev() wants dev_t arg])
+	], [
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg])
+		ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_1arg],
+		    [lookup_bdev], [fs/block_dev.c], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1,
+			    [lookup_bdev() wants 1 arg])
+		], [
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether lookup_bdev() wants mode arg])
+			ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_mode],
+			    [lookup_bdev], [fs/block_dev.c], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_MODE_LOOKUP_BDEV, 1,
+				    [lookup_bdev() wants mode arg])
+			], [
+				ZFS_LINUX_TEST_ERROR([lookup_bdev()])
+			])
+		])
+	])
+])
+
+dnl #
+dnl # 2.6.30 API change
+dnl #
+dnl # The bdev_physical_block_size() interface was added to provide a way
+dnl # to determine the smallest write which can be performed without a
+dnl # read-modify-write operation.
+dnl #
+dnl # Unfortunately, this interface isn't entirely reliable because
+dnl # drives are sometimes known to misreport this value.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_PHYSICAL_BLOCK_SIZE], [
+	ZFS_LINUX_TEST_SRC([bdev_physical_block_size], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		bdev_physical_block_size(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_PHYSICAL_BLOCK_SIZE], [
+	AC_MSG_CHECKING([whether bdev_physical_block_size() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_physical_block_size], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([bdev_physical_block_size()])
+	])
+])
+
+dnl #
+dnl # 2.6.30 API change
+dnl # Added bdev_logical_block_size().
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_LOGICAL_BLOCK_SIZE], [
+	ZFS_LINUX_TEST_SRC([bdev_logical_block_size], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		bdev_logical_block_size(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_LOGICAL_BLOCK_SIZE], [
+	AC_MSG_CHECKING([whether bdev_logical_block_size() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_logical_block_size], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([bdev_logical_block_size()])
+	])
+])
+
+dnl #
+dnl # 5.11 API change
+dnl # Added bdev_whole() helper.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE], [
+	ZFS_LINUX_TEST_SRC([bdev_whole], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev = NULL;
+		bdev = bdev_whole(bdev);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE], [
+	AC_MSG_CHECKING([whether bdev_whole() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_whole], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_WHOLE, 1, [bdev_whole() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 5.20 API change,
+dnl # Removed bdevname(), snprintf(.., %pg) should be used.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME], [
+	ZFS_LINUX_TEST_SRC([bdevname], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		char path[BDEVNAME_SIZE];
+
+		(void) bdevname(bdev, path);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
+	AC_MSG_CHECKING([whether bdevname() exists])
+	ZFS_LINUX_TEST_RESULT([bdevname], [
+		AC_DEFINE(HAVE_BDEVNAME, 1, [bdevname() is available])
+		AC_MSG_RESULT(yes)
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 5.19 API: blkdev_issue_secure_erase()
+dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
+	ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev = NULL;
+		sector_t sector = 0;
+		sector_t nr_sects = 0;
+		int error __attribute__ ((unused));
+
+		error = blkdev_issue_secure_erase(bdev,
+		    sector, nr_sects, GFP_KERNEL);
+	])
+
+	ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev = NULL;
+		sector_t sector = 0;
+		sector_t nr_sects = 0;
+		unsigned long flags = 0;
+		int error __attribute__ ((unused));
+
+		error = blkdev_issue_discard(bdev,
+		    sector, nr_sects, GFP_KERNEL, flags);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
+	AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available])
+	ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLKDEV_ISSUE_SECURE_ERASE, 1,
+		    [blkdev_issue_secure_erase() is available])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
+		ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
+			    [blkdev_issue_discard() is available])
+		],[
+			ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
+		])
+	])
+])
+
+dnl #
+dnl # 5.13 API change
+dnl # blkdev_get_by_path() no longer handles ERESTARTSYS
+dnl #
+dnl # Unfortunately we're forced to rely solely on the kernel version
+dnl # number in order to determine the expected behavior.  This was an
+dnl # internal change to blkdev_get_by_dev(), see commit a8ed1a0607.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
+	AC_MSG_CHECKING([whether blkdev_get_by_path() handles ERESTARTSYS])
+	AS_VERSION_COMPARE([$LINUX_VERSION], [5.13.0], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLKDEV_GET_ERESTARTSYS, 1,
+			[blkdev_get_by_path() handles ERESTARTSYS])
+	],[
+		AC_MSG_RESULT(no)
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 6.5.x API change
+dnl # BLK_STS_NEXUS replaced with BLK_STS_RESV_CONFLICT
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT], [
+	ZFS_LINUX_TEST_SRC([blk_sts_resv_conflict], [
+		#include <linux/blkdev.h>
+	],[
+		blk_status_t s __attribute__ ((unused)) = BLK_STS_RESV_CONFLICT;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
+	AC_MSG_CHECKING([whether BLK_STS_RESV_CONFLICT is defined])
+		ZFS_LINUX_TEST_RESULT([blk_sts_resv_conflict], [
+			AC_DEFINE(HAVE_BLK_STS_RESV_CONFLICT, 1, [BLK_STS_RESV_CONFLICT is defined])
+			AC_MSG_RESULT(yes)
+		], [
+			AC_MSG_RESULT(no)
+		])
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
+	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
+	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
+	ZFS_AC_KERNEL_SRC_BLKDEV_PUT
+	ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
+	ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART
+	ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV
+	ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV
+	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_LOGICAL_BLOCK_SIZE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_PHYSICAL_BLOCK_SIZE
+	ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
+	ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
+	ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
+	ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
+	ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
+	ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH
+	ZFS_AC_KERNEL_BLKDEV_PUT
+	ZFS_AC_KERNEL_BLKDEV_REREAD_PART
+	ZFS_AC_KERNEL_BLKDEV_INVALIDATE_BDEV
+	ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV
+	ZFS_AC_KERNEL_BLKDEV_BDEV_LOGICAL_BLOCK_SIZE
+	ZFS_AC_KERNEL_BLKDEV_BDEV_PHYSICAL_BLOCK_SIZE
+	ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE
+	ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
+	ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
+	ZFS_AC_KERNEL_BLKDEV_BDEVNAME
+	ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
+	ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
+	ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
+	ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
+	ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
+	ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
+	ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
+])

diff --git a/zfs/config/kernel-block-device-operations.m4 b/zfs/config/kernel-block-device-operations.m4
index c3d5eec..d13c133 100644
--- a/zfs/config/kernel-block-device-operations.m4
+++ b/zfs/config/kernel-block-device-operations.m4

@@ -6,23 +6,24 @@
 		#include <linux/blkdev.h>
 
 		unsigned int blk_check_events(struct gendisk *disk,
-		    unsigned int clearing) { return (0); }
+		    unsigned int clearing) {
+			(void) disk, (void) clearing;
+			return (0);
+		}
 
 		static const struct block_device_operations
 		    bops __attribute__ ((unused)) = {
 			.check_events	= blk_check_events,
 		};
-	], [], [$NO_UNUSED_BUT_SET_VARIABLE])
+	], [], [])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [
 	AC_MSG_CHECKING([whether bops->check_events() exists])
 	ZFS_LINUX_TEST_RESULT([block_device_operations_check_events], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1,
-		    [bops->check_events() exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([bops->check_events()])
 	])
 ])
 
@@ -33,7 +34,10 @@
 	ZFS_LINUX_TEST_SRC([block_device_operations_release_void], [
 		#include <linux/blkdev.h>
 
-		void blk_release(struct gendisk *g, fmode_t mode) { return; }
+		void blk_release(struct gendisk *g, fmode_t mode) {
+			(void) g, (void) mode;
+			return;
+		}
 
 		static const struct block_device_operations
 		    bops __attribute__ ((unused)) = {
@@ -42,15 +46,74 @@
 			.ioctl		= NULL,
 			.compat_ioctl	= NULL,
 		};
-	], [], [$NO_UNUSED_BUT_SET_VARIABLE])
+	], [], [])
+])
+
+dnl #
+dnl # 5.9.x API change
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [
+	ZFS_LINUX_TEST_SRC([block_device_operations_release_void_1arg], [
+		#include <linux/blkdev.h>
+
+		void blk_release(struct gendisk *g) {
+			(void) g;
+			return;
+		}
+
+		static const struct block_device_operations
+		    bops __attribute__ ((unused)) = {
+			.open		= NULL,
+			.release	= blk_release,
+			.ioctl		= NULL,
+			.compat_ioctl	= NULL,
+		};
+	], [], [])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
-	AC_MSG_CHECKING([whether bops->release() is void])
+	AC_MSG_CHECKING([whether bops->release() is void and takes 2 args])
 	ZFS_LINUX_TEST_RESULT([block_device_operations_release_void], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1,
-		          [bops->release() returns void])
+	],[
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether bops->release() is void and takes 1 arg])
+		ZFS_LINUX_TEST_RESULT([block_device_operations_release_void_1arg], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE([HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [1],
+				[Define if release() in block_device_operations takes 1 arg])
+		],[
+			ZFS_LINUX_TEST_ERROR([bops->release()])
+		])
+	])
+])
+
+dnl #
+dnl # 5.13 API change
+dnl # block_device_operations->revalidate_disk() was removed
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
+	ZFS_LINUX_TEST_SRC([block_device_operations_revalidate_disk], [
+		#include <linux/blkdev.h>
+
+		int blk_revalidate_disk(struct gendisk *disk) {
+			(void) disk;
+			return(0);
+		}
+
+		static const struct block_device_operations
+		    bops __attribute__ ((unused)) = {
+			.revalidate_disk	= blk_revalidate_disk,
+		};
+	], [], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
+	AC_MSG_CHECKING([whether bops->revalidate_disk() exists])
+	ZFS_LINUX_TEST_RESULT([block_device_operations_revalidate_disk], [
+		AC_DEFINE([HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [1],
+			[Define if revalidate_disk() in block_device_operations])
+		AC_MSG_RESULT(yes)
 	],[
 		AC_MSG_RESULT(no)
 	])
@@ -59,9 +122,12 @@
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
+	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
+	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS], [
 	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
+	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 ])

diff --git a/zfs/config/kernel-commit-metadata.m4 b/zfs/config/kernel-commit-metadata.m4
index 9bc3b66..7df9b98 100644
--- a/zfs/config/kernel-commit-metadata.m4
+++ b/zfs/config/kernel-commit-metadata.m4

@@ -18,9 +18,7 @@
 	AC_MSG_CHECKING([whether eops->commit_metadata() exists])
 	ZFS_LINUX_TEST_RESULT([export_operations_commit_metadata], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_COMMIT_METADATA, 1,
-		    [eops->commit_metadata() exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([eops->commit_metadata()])
 	])
 ])

diff --git a/zfs/config/kernel-config-defined.m4 b/zfs/config/kernel-config-defined.m4
index fe778e6..54837d7 100644
--- a/zfs/config/kernel-config-defined.m4
+++ b/zfs/config/kernel-config-defined.m4

@@ -19,49 +19,47 @@
 		])
 	])
 
-	ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE
+	ZFS_AC_KERNEL_SRC_CONFIG_MODULES
+	ZFS_AC_KERNEL_SRC_CONFIG_BLOCK
 	ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC
 	ZFS_AC_KERNEL_SRC_CONFIG_TRIM_UNUSED_KSYMS
-	ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE
 	ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_DEFLATE
+	ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE
 
 	AC_MSG_CHECKING([for kernel config option compatibility])
 	ZFS_LINUX_TEST_COMPILE_ALL([config])
 	AC_MSG_RESULT([done])
 
-	ZFS_AC_KERNEL_CONFIG_THREAD_SIZE
+	ZFS_AC_KERNEL_CONFIG_MODULES
+	ZFS_AC_KERNEL_CONFIG_BLOCK
 	ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC
 	ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS
-	ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE
 	ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE
+	ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE
 ])
 
 dnl #
-dnl # Check configured THREAD_SIZE
+dnl # Check CONFIG_BLOCK
 dnl #
-dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64
-dnl # the default thread stack size was increased to 16K from 8K.  Therefore,
-dnl # on newer kernels and some architectures stack usage optimizations can be
-dnl # conditionally applied to improve performance without negatively impacting
-dnl # stability.
+dnl # Verify the kernel has CONFIG_BLOCK support enabled.
 dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE], [
-	ZFS_LINUX_TEST_SRC([config_thread_size], [
-		#include <linux/module.h>
-	],[
-		#if (THREAD_SIZE < 16384)
-		#error "THREAD_SIZE is less than 16K"
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_BLOCK], [
+	ZFS_LINUX_TEST_SRC([config_block], [
+		#if !defined(CONFIG_BLOCK)
+		#error CONFIG_BLOCK not defined
 		#endif
-	])
+	],[])
 ])
 
-AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [
-	AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks])
-	ZFS_LINUX_TEST_RESULT([config_thread_size], [
+AC_DEFUN([ZFS_AC_KERNEL_CONFIG_BLOCK], [
+	AC_MSG_CHECKING([whether CONFIG_BLOCK is defined])
+	ZFS_LINUX_TEST_RESULT([config_block], [
 		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks])
 	],[
 		AC_MSG_RESULT([no])
+		AC_MSG_ERROR([
+	*** This kernel does not include the required block device support.
+	*** Rebuild the kernel with CONFIG_BLOCK=y set.])
 	])
 ])
 
@@ -86,7 +84,7 @@
 		mutex_init(&lock);
 		mutex_lock(&lock);
 		mutex_unlock(&lock);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [
@@ -104,6 +102,61 @@
 ])
 
 dnl #
+dnl # Check CONFIG_MODULES
+dnl #
+dnl # Verify the kernel has CONFIG_MODULES support enabled.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_MODULES], [
+	ZFS_LINUX_TEST_SRC([config_modules], [
+		#if !defined(CONFIG_MODULES)
+		#error CONFIG_MODULES not defined
+		#endif
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_CONFIG_MODULES], [
+	AC_MSG_CHECKING([whether CONFIG_MODULES is defined])
+	AS_IF([test "x$enable_linux_builtin" != xyes], [
+		ZFS_LINUX_TEST_RESULT([config_modules], [
+			AC_MSG_RESULT([yes])
+		],[
+			AC_MSG_RESULT([no])
+			AC_MSG_ERROR([
+		*** This kernel does not include the required loadable module
+		*** support!
+		***
+		*** To build OpenZFS as a loadable Linux kernel module
+		*** enable loadable module support by setting
+		*** `CONFIG_MODULES=y` in the kernel configuration and run
+		*** `make modules_prepare` in the Linux source tree.
+		***
+		*** If you don't intend to enable loadable kernel module
+		*** support, please compile OpenZFS as a Linux kernel built-in.
+		***
+		*** Prepare the Linux source tree by running `make prepare`,
+		*** use the OpenZFS `--enable-linux-builtin` configure option,
+		*** copy the OpenZFS sources into the Linux source tree using
+		*** `./copy-builtin <linux source directory>`,
+		*** set `CONFIG_ZFS=y` in the kernel configuration and compile
+		*** kernel as usual.
+			])
+		])
+	], [
+		ZFS_LINUX_TRY_COMPILE([], [], [
+			AC_MSG_RESULT([not needed])
+		],[
+			AC_MSG_RESULT([error])
+			AC_MSG_ERROR([
+		*** This kernel is unable to compile object files.
+		***
+		*** Please make sure you prepared the Linux source tree
+		*** by running `make prepare` there.
+			])
+		])
+	])
+])
+
+dnl #
 dnl # Check CONFIG_TRIM_UNUSED_KSYMS
 dnl #
 dnl # Verify the kernel has CONFIG_TRIM_UNUSED_KSYMS disabled.

diff --git a/zfs/config/kernel-copy-from-user-inatomic.m4 b/zfs/config/kernel-copy-from-user-inatomic.m4
new file mode 100644
index 0000000..fec354b
--- /dev/null
+++ b/zfs/config/kernel-copy-from-user-inatomic.m4

@@ -0,0 +1,29 @@
+dnl #
+dnl # On certain architectures `__copy_from_user_inatomic`
+dnl # is a GPL exported variable and cannot be used by OpenZFS.
+dnl #
+
+dnl #
+dnl # Checking if `__copy_from_user_inatomic` is available.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC], [
+	ZFS_LINUX_TEST_SRC([__copy_from_user_inatomic], [
+		#include <linux/uaccess.h>
+	], [
+		int result __attribute__ ((unused)) = __copy_from_user_inatomic(NULL, NULL, 0);
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC], [
+	AC_MSG_CHECKING([whether __copy_from_user_inatomic is available])
+	ZFS_LINUX_TEST_RESULT([__copy_from_user_inatomic_license], [
+		AC_MSG_RESULT(yes)
+	], [
+		AC_MSG_RESULT(no)
+		AC_MSG_ERROR([
+	*** The `__copy_from_user_inatomic()` Linux kernel function is
+	*** incompatible with the CDDL license and will prevent the module
+	*** linking stage from succeeding.  OpenZFS cannot be compiled.
+		])
+	])
+])

diff --git a/zfs/config/kernel-cpu_has_feature.m4 b/zfs/config/kernel-cpu_has_feature.m4
new file mode 100644
index 0000000..608faf0
--- /dev/null
+++ b/zfs/config/kernel-cpu_has_feature.m4

@@ -0,0 +1,29 @@
+dnl #
+dnl # cpu_has_feature() may referencing GPL-only cpu_feature_keys on powerpc
+dnl #
+
+dnl #
+dnl # Checking if cpu_has_feature is exported GPL-only
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE], [
+	ZFS_LINUX_TEST_SRC([cpu_has_feature], [
+		#include <linux/version.h>
+		#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		#include <asm/cpu_has_feature.h>
+		#else
+		#include <asm/cputable.h>
+		#endif
+	], [
+		return cpu_has_feature(CPU_FTR_ALTIVEC) ? 0 : 1;
+	], [], [ZFS_META_LICENSE])
+])
+AC_DEFUN([ZFS_AC_KERNEL_CPU_HAS_FEATURE], [
+	AC_MSG_CHECKING([whether cpu_has_feature() is GPL-only])
+	ZFS_LINUX_TEST_RESULT([cpu_has_feature_license], [
+		AC_MSG_RESULT(no)
+	], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_CPU_HAS_FEATURE_GPL_ONLY, 1,
+		    [cpu_has_feature() is GPL-only])
+	])
+])

diff --git a/zfs/config/kernel-create-nameidata.m4 b/zfs/config/kernel-create-nameidata.m4
deleted file mode 100644
index c43ca5b..0000000
--- a/zfs/config/kernel-create-nameidata.m4
+++ /dev/null

@@ -1,33 +0,0 @@
-dnl #
-dnl # 3.6 API change
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_NAMEIDATA], [
-	ZFS_LINUX_TEST_SRC([create_nameidata], [
-		#include <linux/fs.h>
-		#include <linux/sched.h>
-
-		#ifdef HAVE_MKDIR_UMODE_T
-		int inode_create(struct inode *inode ,struct dentry *dentry,
-		    umode_t umode, struct nameidata *nidata) { return 0; }
-		#else
-		int inode_create(struct inode *inode,struct dentry *dentry,
-		    int umode, struct nameidata * nidata) { return 0; }
-		#endif
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.create		= inode_create,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_CREATE_NAMEIDATA], [
-	AC_MSG_CHECKING([whether iops->create() passes nameidata])
-	ZFS_LINUX_TEST_RESULT([create_nameidata], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CREATE_NAMEIDATA, 1,
-		    [iops->create() passes nameidata])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-ctl-table-name.m4 b/zfs/config/kernel-ctl-table-name.m4
deleted file mode 100644
index 16f2ad5..0000000
--- a/zfs/config/kernel-ctl-table-name.m4
+++ /dev/null

@@ -1,22 +0,0 @@
-dnl #
-dnl # 2.6.33 API change,
-dnl # Removed .ctl_name from struct ctl_table.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_CTL_NAME], [
-	ZFS_LINUX_TEST_SRC([ctl_name], [
-		#include <linux/sysctl.h>
-	],[
-		struct ctl_table ctl __attribute__ ((unused));
-		ctl.ctl_name = 0;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_CTL_NAME], [
-	AC_MSG_CHECKING([whether struct ctl_table has ctl_name])
-	ZFS_LINUX_TEST_RESULT([ctl_name], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CTL_NAME, 1, [struct ctl_table has ctl_name])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-current_bio_tail.m4 b/zfs/config/kernel-current_bio_tail.m4
deleted file mode 100644
index 9dfc3e6..0000000
--- a/zfs/config/kernel-current_bio_tail.m4
+++ /dev/null

@@ -1,39 +0,0 @@
-dnl #
-dnl # 2.6.34 API change
-dnl # current->bio_tail and current->bio_list were struct bio pointers prior to
-dnl # Linux 2.6.34. They were refactored into a struct bio_list pointer called
-dnl # current->bio_list in Linux 2.6.34.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL], [
-	ZFS_LINUX_TEST_SRC([current_bio_tail], [
-		#include <linux/sched.h>
-	], [
-		current->bio_tail = (struct bio **) NULL;
-	])
-
-	ZFS_LINUX_TEST_SRC([current_bio_list], [
-		#include <linux/sched.h>
-	], [
-		current->bio_list = (struct bio_list *) NULL;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [
-	AC_MSG_CHECKING([whether current->bio_tail exists])
-	ZFS_LINUX_TEST_RESULT([current_bio_tail], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CURRENT_BIO_TAIL, 1,
-		    [current->bio_tail exists])
-	],[
-		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether current->bio_list exists])
-		ZFS_LINUX_TEST_RESULT([current_bio_list], [
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_CURRENT_BIO_LIST, 1,
-			    [current->bio_list exists])
-		],[
-			ZFS_LINUX_TEST_ERROR([bio_list])
-		])
-	])
-])

diff --git a/zfs/config/kernel-dentry-alias.m4 b/zfs/config/kernel-dentry-alias.m4
new file mode 100644
index 0000000..f0ddb8d
--- /dev/null
+++ b/zfs/config/kernel-dentry-alias.m4

@@ -0,0 +1,30 @@
+dnl #
+dnl # 3.18 API change
+dnl # Dentry aliases are in d_u struct dentry member
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [
+	ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [
+		#include <linux/fs.h>
+		#include <linux/dcache.h>
+		#include <linux/list.h>
+	], [
+		struct inode *inode __attribute__ ((unused)) = NULL;
+		struct dentry *dentry __attribute__ ((unused)) = NULL;
+		hlist_for_each_entry(dentry, &inode->i_dentry,
+		    d_u.d_alias) {
+			d_drop(dentry);
+		}
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [
+	AC_MSG_CHECKING([whether dentry aliases are in d_u member])
+	ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1,
+		    [dentry aliases are in d_u member])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+

diff --git a/zfs/config/kernel-dentry-operations.m4 b/zfs/config/kernel-dentry-operations.m4
index 2dfd2ac..dd470d7 100644
--- a/zfs/config/kernel-dentry-operations.m4
+++ b/zfs/config/kernel-dentry-operations.m4

@@ -38,10 +38,8 @@
 	ZFS_LINUX_TEST_RESULT_SYMBOL([d_obtain_alias],
 	    [d_obtain_alias], [fs/dcache.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1,
-		          [d_obtain_alias() is available])
 	], [
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([d_obtain_alias()])
 	])
 ])
 
@@ -66,7 +64,7 @@
 		AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1,
 		    [d_prune_aliases() is available])
 	], [
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([d_prune_aliases()])
 	])
 ])
 
@@ -87,9 +85,8 @@
 	ZFS_LINUX_TEST_RESULT_SYMBOL([d_set_d_op],
 	    [d_set_d_op], [fs/dcache.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_D_SET_D_OP, 1, [d_set_d_op() is available])
 	], [
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([d_set_d_op])
 	])
 ])
 
@@ -146,7 +143,7 @@
 		AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1,
 		    [dentry uses const struct dentry_operations])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([const dentry_operations])
 	])
 ])
 
@@ -167,9 +164,8 @@
 	AC_MSG_CHECKING([whether super_block has s_d_op])
 	ZFS_LINUX_TEST_RESULT([super_block_s_d_op], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_S_D_OP, 1, [struct super_block has s_d_op])
 	], [
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([super_block s_d_op])
 	])
 ])
 

diff --git a/zfs/config/kernel-discard-granularity.m4 b/zfs/config/kernel-discard-granularity.m4
index c830d9a..61326e6 100644
--- a/zfs/config/kernel-discard-granularity.m4
+++ b/zfs/config/kernel-discard-granularity.m4

@@ -15,9 +15,7 @@
 	AC_MSG_CHECKING([whether ql->discard_granularity is available])
 	ZFS_LINUX_TEST_RESULT([discard_granularity], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_DISCARD_GRANULARITY, 1,
-		    [ql->discard_granularity is available])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([ql->discard_granularity])
 	])
 ])

diff --git a/zfs/config/kernel-evict-inode.m4 b/zfs/config/kernel-evict-inode.m4
index cd91c66..66f1049 100644
--- a/zfs/config/kernel-evict-inode.m4
+++ b/zfs/config/kernel-evict-inode.m4

@@ -19,6 +19,6 @@
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_EVICT_INODE, 1, [sops->evict_inode() exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([evict_inode])
 	])
 ])

diff --git a/zfs/config/kernel-fallocate.m4 b/zfs/config/kernel-fallocate.m4
index 302957a..815602d 100644
--- a/zfs/config/kernel-fallocate.m4
+++ b/zfs/config/kernel-fallocate.m4

@@ -1,10 +1,13 @@
 dnl #
+dnl # Linux 2.6.38 - 3.x API
 dnl # The fallocate callback was moved from the inode_operations
 dnl # structure to the file_operations structure.
 dnl #
+dnl #
+dnl # Linux 3.15+
+dnl # fallocate learned a new flag, FALLOC_FL_ZERO_RANGE
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [
-
-	dnl # Linux 2.6.38 - 3.x API
 	ZFS_LINUX_TEST_SRC([file_fallocate], [
 		#include <linux/fs.h>
 
@@ -16,35 +19,26 @@
 			.fallocate = test_fallocate,
 		};
 	], [])
-
-	dnl # Linux 2.6.x - 2.6.37 API
-	ZFS_LINUX_TEST_SRC([inode_fallocate], [
-		#include <linux/fs.h>
-
-		long test_fallocate(struct inode *inode, int mode,
-		    loff_t offset, loff_t len) { return 0; }
-
-		static const struct inode_operations
-		    fops __attribute__ ((unused)) = {
-			.fallocate = test_fallocate,
-		};
-	], [])
+	ZFS_LINUX_TEST_SRC([falloc_fl_zero_range], [
+		#include <linux/falloc.h>
+	],[
+		int flags __attribute__ ((unused));
+		flags = FALLOC_FL_ZERO_RANGE;
+	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [
 	AC_MSG_CHECKING([whether fops->fallocate() exists])
 	ZFS_LINUX_TEST_RESULT([file_fallocate], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists])
+		AC_MSG_CHECKING([whether FALLOC_FL_ZERO_RANGE exists])
+		ZFS_LINUX_TEST_RESULT([falloc_fl_zero_range], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_FALLOC_FL_ZERO_RANGE, 1, [FALLOC_FL_ZERO_RANGE is defined])
+		],[
+			AC_MSG_RESULT(no)
+		])
 	],[
-		AC_MSG_RESULT(no)
-	])
-
-	AC_MSG_CHECKING([whether iops->fallocate() exists])
-	ZFS_LINUX_TEST_RESULT([inode_fallocate], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_INODE_FALLOCATE, 1, [fops->fallocate() exists])
-	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([file_fallocate])
 	])
 ])

diff --git a/zfs/config/kernel-filemap-splice-read.m4 b/zfs/config/kernel-filemap-splice-read.m4
new file mode 100644
index 0000000..4c83b31
--- /dev/null
+++ b/zfs/config/kernel-filemap-splice-read.m4

@@ -0,0 +1,25 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ], [
+	dnl #
+	dnl # Kernel 6.5 - generic_file_splice_read was removed in favor
+	dnl # of copy_splice_read for the .splice_read member of the
+	dnl # file_operations struct.
+	dnl #
+	ZFS_LINUX_TEST_SRC([has_copy_splice_read], [
+		#include <linux/fs.h>
+
+		struct file_operations fops __attribute__((unused)) = {
+			.splice_read = copy_splice_read,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_COPY_SPLICE_READ], [
+	AC_MSG_CHECKING([whether copy_splice_read() exists])
+	ZFS_LINUX_TEST_RESULT([has_copy_splice_read], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_COPY_SPLICE_READ, 1,
+		    [copy_splice_read exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-filemap.m4 b/zfs/config/kernel-filemap.m4
new file mode 100644
index 0000000..7459281
--- /dev/null
+++ b/zfs/config/kernel-filemap.m4

@@ -0,0 +1,26 @@
+dnl #
+dnl # filemap_range_has_page was not available till 4.13
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
+	ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
+		#include <linux/fs.h>
+	],[
+		struct address_space *mapping = NULL;
+		loff_t lstart = 0;
+		loff_t lend = 0;
+		bool ret __attribute__ ((unused));
+
+		ret = filemap_range_has_page(mapping, lstart, lend);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [
+	AC_MSG_CHECKING([whether filemap_range_has_page() is available])
+	ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1,
+		[filemap_range_has_page() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-flush_dcache_page.m4 b/zfs/config/kernel-flush_dcache_page.m4
new file mode 100644
index 0000000..2340c38
--- /dev/null
+++ b/zfs/config/kernel-flush_dcache_page.m4

@@ -0,0 +1,26 @@
+dnl #
+dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline
+dnl # function and may indirectly referencing GPL-only cpu_feature_keys on
+dnl # powerpc
+dnl #
+
+dnl #
+dnl # Checking if flush_dcache_page is exported GPL-only
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE], [
+	ZFS_LINUX_TEST_SRC([flush_dcache_page], [
+		#include <asm/cacheflush.h>
+	], [
+		flush_dcache_page(0);
+	], [], [ZFS_META_LICENSE])
+])
+AC_DEFUN([ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE], [
+	AC_MSG_CHECKING([whether flush_dcache_page() is GPL-only])
+	ZFS_LINUX_TEST_RESULT([flush_dcache_page_license], [
+		AC_MSG_RESULT(no)
+	], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY, 1,
+		    [flush_dcache_page() is GPL-only])
+	])
+])

diff --git a/zfs/config/kernel-fmode-t.m4 b/zfs/config/kernel-fmode-t.m4
index bc0001b..5f111e2 100644
--- a/zfs/config/kernel-fmode-t.m4
+++ b/zfs/config/kernel-fmode-t.m4

@@ -14,8 +14,7 @@
 	AC_MSG_CHECKING([whether kernel defines fmode_t])
 	ZFS_LINUX_TEST_RESULT([type_fmode_t], [
 		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_FMODE_T, 1, [kernel defines fmode_t])
 	],[
-		AC_MSG_RESULT([no])
+		ZFS_LINUX_TEST_ERROR([type_fmode_t])
 	])
 ])

diff --git a/zfs/config/kernel-follow-down-one.m4 b/zfs/config/kernel-follow-down-one.m4
index 94e4aeb..38c460d 100644
--- a/zfs/config/kernel-follow-down-one.m4
+++ b/zfs/config/kernel-follow-down-one.m4

@@ -16,9 +16,7 @@
 	AC_MSG_CHECKING([whether follow_down_one() is available])
 	ZFS_LINUX_TEST_RESULT([follow_down_one], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_FOLLOW_DOWN_ONE, 1,
-		    [follow_down_one() is available])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([follow_down_one()])
 	])
 ])

diff --git a/zfs/config/kernel-fpu.m4 b/zfs/config/kernel-fpu.m4
index 3c79334..e79632b 100644
--- a/zfs/config/kernel-fpu.m4
+++ b/zfs/config/kernel-fpu.m4

@@ -1,7 +1,19 @@
-dnl # 
+dnl #
 dnl # Handle differences in kernel FPU code.
 dnl #
 dnl # Kernel
+dnl # 5.19:	The asm/fpu/internal.h header was removed, it has been
+dnl #		effectively empty since the 5.16 kernel.
+dnl #
+dnl # 5.16:	XCR code put into asm/fpu/xcr.h
+dnl #		HAVE_KERNEL_FPU_XCR_HEADER
+dnl #
+dnl #		XSTATE_XSAVE and XSTATE_XRESTORE aren't accessible any more
+dnl #		HAVE_KERNEL_FPU_XSAVE_INTERNAL
+dnl #
+dnl # 5.11:	kernel_fpu_begin() is an inlined function now, so don't check
+dnl #		for it inside the kernel symbols.
+dnl #
 dnl # 5.0:	Wrappers have been introduced to save/restore the FPU state.
 dnl #		This change was made to the 4.19.38 and 4.14.120 LTS kernels.
 dnl #		HAVE_KERNEL_FPU_INTERNAL
@@ -24,9 +36,31 @@
 	],[
 		AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1,
 		    [kernel has asm/fpu/api.h])
-		AC_MSG_RESULT(asm/fpu/api.h)
+		fpu_headers="asm/fpu/api.h"
+
+		ZFS_LINUX_TRY_COMPILE([
+			#include <linux/module.h>
+			#include <asm/fpu/xcr.h>
+		],[
+		],[
+			AC_DEFINE(HAVE_KERNEL_FPU_XCR_HEADER, 1,
+			    [kernel has asm/fpu/xcr.h])
+			fpu_headers="$fpu_headers asm/fpu/xcr.h"
+		])
+
+		ZFS_LINUX_TRY_COMPILE([
+			#include <linux/module.h>
+			#include <asm/fpu/internal.h>
+		],[
+		],[
+			AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL_HEADER, 1,
+			    [kernel has asm/fpu/internal.h])
+			fpu_headers="$fpu_headers asm/fpu/internal.h"
+		])
+
+		AC_MSG_RESULT([$fpu_headers])
 	],[
-		AC_MSG_RESULT(i387.h & xcr.h)
+		AC_MSG_RESULT([i387.h & xcr.h])
 	])
 ])
 
@@ -42,7 +76,7 @@
 	], [
 		kernel_fpu_begin();
 		kernel_fpu_end();
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 
 	ZFS_LINUX_TEST_SRC([__kernel_fpu], [
 		#include <linux/types.h>
@@ -55,7 +89,7 @@
 	], [
 		__kernel_fpu_begin();
 		__kernel_fpu_end();
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 
 	ZFS_LINUX_TEST_SRC([fpu_internal], [
 		#if defined(__x86_64) || defined(__x86_64__) || \
@@ -72,7 +106,9 @@
 		#include <linux/types.h>
 		#ifdef HAVE_KERNEL_FPU_API_HEADER
 		#include <asm/fpu/api.h>
+		#ifdef HAVE_KERNEL_FPU_INTERNAL_HEADER
 		#include <asm/fpu/internal.h>
+		#endif
 		#else
 		#include <asm/i387.h>
 		#include <asm/xcr.h>
@@ -92,6 +128,38 @@
 		struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
 		struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
 	])
+
+	ZFS_LINUX_TEST_SRC([fpu_xsave_internal], [
+		#include <linux/sched.h>
+		#if defined(__x86_64) || defined(__x86_64__) || \
+		    defined(__i386) || defined(__i386__)
+		#if !defined(__x86)
+		#define __x86
+		#endif
+		#endif
+
+		#if !defined(__x86)
+		#error Unsupported architecture
+		#endif
+
+		#include <linux/types.h>
+		#ifdef HAVE_KERNEL_FPU_API_HEADER
+		#include <asm/fpu/api.h>
+		#ifdef HAVE_KERNEL_FPU_INTERNAL_HEADER
+		#include <asm/fpu/internal.h>
+		#endif
+		#else
+		#include <asm/i387.h>
+		#include <asm/xcr.h>
+		#endif
+
+	],[
+		struct fpu *fpu = &current->thread.fpu;
+		union fpregs_state *st = &fpu->fpstate->regs;
+		struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
+		struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
+		struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
+	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_FPU], [
@@ -99,8 +167,7 @@
 	dnl # Legacy kernel
 	dnl #
 	AC_MSG_CHECKING([whether kernel fpu is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([kernel_fpu_license],
-	    [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [
+	ZFS_LINUX_TEST_RESULT([kernel_fpu_license], [
 		AC_MSG_RESULT(kernel_fpu_*)
 		AC_DEFINE(HAVE_KERNEL_FPU, 1,
 		    [kernel has kernel_fpu_* functions])
@@ -124,7 +191,13 @@
 				AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
 				    [kernel fpu internal])
 			],[
+				ZFS_LINUX_TEST_RESULT([fpu_xsave_internal], [
+				    AC_MSG_RESULT(internal with internal XSAVE)
+				    AC_DEFINE(HAVE_KERNEL_FPU_XSAVE_INTERNAL, 1,
+					[kernel fpu and XSAVE internal])
+			    ],[
 				AC_MSG_RESULT(unavailable)
+			    ])
 			])
 		])
 	])

diff --git a/zfs/config/kernel-fst-mount.m4 b/zfs/config/kernel-fst-mount.m4
index cec1ed4..576f5f0 100644
--- a/zfs/config/kernel-fst-mount.m4
+++ b/zfs/config/kernel-fst-mount.m4

@@ -24,8 +24,7 @@
         AC_MSG_CHECKING([whether fst->mount() exists])
         ZFS_LINUX_TEST_RESULT([file_system_type_mount], [
                 AC_MSG_RESULT(yes)
-                AC_DEFINE(HAVE_FST_MOUNT, 1, [fst->mount() exists])
         ],[
-                AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([fst->mount()])
         ])
 ])

diff --git a/zfs/config/kernel-fsync.m4 b/zfs/config/kernel-fsync.m4
index 0494e31..d198191 100644
--- a/zfs/config/kernel-fsync.m4
+++ b/zfs/config/kernel-fsync.m4

@@ -2,18 +2,6 @@
 dnl # Check file_operations->fsync interface.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [
-	ZFS_LINUX_TEST_SRC([fsync_with_dentry], [
-		#include <linux/fs.h>
-
-		int test_fsync(struct file *f, struct dentry *dentry, int x)
-		    { return 0; }
-
-		static const struct file_operations
-		    fops __attribute__ ((unused)) = {
-			.fsync = test_fsync,
-		};
-	],[])
-
 	ZFS_LINUX_TEST_SRC([fsync_without_dentry], [
 		#include <linux/fs.h>
 
@@ -40,38 +28,26 @@
 
 AC_DEFUN([ZFS_AC_KERNEL_FSYNC], [
 	dnl #
-	dnl # Linux 2.6.x - 2.6.34 API
+	dnl # Linux 2.6.35 - Linux 3.0 API
 	dnl #
-	AC_MSG_CHECKING([whether fops->fsync() wants dentry])
-	ZFS_LINUX_TEST_RESULT([fsync_with_dentry], [
+	AC_MSG_CHECKING([whether fops->fsync() wants no dentry])
+	ZFS_LINUX_TEST_RESULT([fsync_without_dentry], [
 		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_FSYNC_WITH_DENTRY, 1,
-		    [fops->fsync() with dentry])
+		AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1,
+		    [fops->fsync() without dentry])
 	],[
 		AC_MSG_RESULT([no])
 
 		dnl #
-		dnl # Linux 2.6.35 - Linux 3.0 API
+		dnl # Linux 3.1 - 3.x API
 		dnl #
-		AC_MSG_CHECKING([whether fops->fsync() wants no dentry])
-		ZFS_LINUX_TEST_RESULT([fsync_without_dentry], [
-			AC_MSG_RESULT([yes])
-			AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1,
-			    [fops->fsync() without dentry])
+		AC_MSG_CHECKING([whether fops->fsync() wants range])
+		ZFS_LINUX_TEST_RESULT([fsync_range], [
+			AC_MSG_RESULT([range])
+			AC_DEFINE(HAVE_FSYNC_RANGE, 1,
+			    [fops->fsync() with range])
 		],[
-			AC_MSG_RESULT([no])
-
-			dnl #
-			dnl # Linux 3.1 - 3.x API
-			dnl #
-			AC_MSG_CHECKING([whether fops->fsync() wants range])
-			ZFS_LINUX_TEST_RESULT([fsync_range], [
-				AC_MSG_RESULT([range])
-				AC_DEFINE(HAVE_FSYNC_RANGE, 1,
-				    [fops->fsync() with range])
-			],[
-				ZFS_LINUX_TEST_ERROR([fops->fsync])
-			])
+			ZFS_LINUX_TEST_ERROR([fops->fsync])
 		])
 	])
 ])

diff --git a/zfs/config/kernel-generic_fillattr.m4 b/zfs/config/kernel-generic_fillattr.m4
new file mode 100644
index 0000000..02dee4d
--- /dev/null
+++ b/zfs/config/kernel-generic_fillattr.m4

@@ -0,0 +1,47 @@
+dnl #
+dnl # 5.12 API
+dnl #
+dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace*
+dnl # as the first arg, to support idmapped mounts.
+dnl #
+dnl # 6.3 API
+dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
+	ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
+		#include <linux/fs.h>
+	],[
+		struct user_namespace *userns = NULL;
+		struct inode *in = NULL;
+		struct kstat *k = NULL;
+		generic_fillattr(userns, in, k);
+	])
+
+	ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap], [
+		#include <linux/fs.h>
+	],[
+		struct mnt_idmap *idmap = NULL;
+		struct inode *in = NULL;
+		struct kstat *k = NULL;
+		generic_fillattr(idmap, in, k);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
+	AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
+		    [generic_fillattr requires struct mnt_idmap*])
+	],[
+		AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
+			AC_MSG_RESULT([yes])
+			AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
+			    [generic_fillattr requires struct user_namespace*])
+		],[
+			AC_MSG_RESULT([no])
+		])
+	])
+])
+

diff --git a/zfs/config/kernel-generic_io_acct.m4 b/zfs/config/kernel-generic_io_acct.m4
index 423b3e5..a6a1090 100644
--- a/zfs/config/kernel-generic_io_acct.m4
+++ b/zfs/config/kernel-generic_io_acct.m4

@@ -2,6 +2,53 @@
 dnl # Check for generic io accounting interface.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [
+	ZFS_LINUX_TEST_SRC([bdev_io_acct_63], [
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		struct bio *bio = NULL;
+		unsigned long passed_time = 0;
+		unsigned long start_time;
+
+		start_time = bdev_start_io_acct(bdev, bio_op(bio),
+		    passed_time);
+		bdev_end_io_acct(bdev, bio_op(bio), bio_sectors(bio), start_time);
+	])
+
+	ZFS_LINUX_TEST_SRC([bdev_io_acct_old], [
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		struct bio *bio = NULL;
+		unsigned long passed_time = 0;
+		unsigned long start_time;
+
+		start_time = bdev_start_io_acct(bdev, bio_sectors(bio),
+		    bio_op(bio), passed_time);
+		bdev_end_io_acct(bdev, bio_op(bio), start_time);
+	])
+
+	ZFS_LINUX_TEST_SRC([disk_io_acct], [
+		#include <linux/blkdev.h>
+	], [
+		struct gendisk *disk = NULL;
+		struct bio *bio = NULL;
+		unsigned long start_time;
+
+		start_time = disk_start_io_acct(disk, bio_sectors(bio), bio_op(bio));
+		disk_end_io_acct(disk, bio_op(bio), start_time);
+	])
+
+	ZFS_LINUX_TEST_SRC([bio_io_acct], [
+		#include <linux/blkdev.h>
+	], [
+		struct bio *bio = NULL;
+		unsigned long start_time;
+
+		start_time = bio_start_io_acct(bio);
+		bio_end_io_acct(bio, start_time);
+	])
+
 	ZFS_LINUX_TEST_SRC([generic_acct_3args], [
 		#include <linux/bio.h>
 
@@ -29,36 +76,88 @@
 
 AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [
 	dnl #
-	dnl # 3.19 API addition
+	dnl # Linux 6.3, and then backports thereof, changed
+	dnl # the signatures on bdev_start_io_acct/bdev_end_io_acct
 	dnl #
-	dnl # torvalds/linux@394ffa50 allows us to increment iostat
-	dnl # counters without generic_make_request().
-	dnl #
-	AC_MSG_CHECKING([whether generic IO accounting wants 3 args])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args],
-	    [generic_start_io_acct], [block/bio.c], [
+	AC_MSG_CHECKING([whether 6.3+ bdev_*_io_acct() are available])
+	ZFS_LINUX_TEST_RESULT([bdev_io_acct_63], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
-		    [generic_start_io_acct()/generic_end_io_acct() available])
+		AC_DEFINE(HAVE_BDEV_IO_ACCT_63, 1, [bdev_*_io_acct() available])
 	], [
 		AC_MSG_RESULT(no)
 
 		dnl #
-		dnl # Linux 4.14 API,
+		dnl # 5.19 API,
 		dnl #
-		dnl # generic_start_io_acct/generic_end_io_acct now require
-		dnl # request_queue to be provided. No functional changes,
-		dnl # but preparation for inflight accounting.
+		dnl # disk_start_io_acct() and disk_end_io_acct() have been replaced by
+		dnl # bdev_start_io_acct() and bdev_end_io_acct().
 		dnl #
-		AC_MSG_CHECKING([whether generic IO accounting wants 4 args])
-		ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args],
-		    [generic_start_io_acct], [block/bio.c], [
+		AC_MSG_CHECKING([whether pre-6.3 bdev_*_io_acct() are available])
+		ZFS_LINUX_TEST_RESULT([bdev_io_acct_old], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1,
-			    [generic_start_io_acct()/generic_end_io_acct() ]
-			    [4 arg available])
+			AC_DEFINE(HAVE_BDEV_IO_ACCT_OLD, 1, [bdev_*_io_acct() available])
 		], [
 			AC_MSG_RESULT(no)
+			dnl #
+			dnl # 5.12 API,
+			dnl #
+			dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported
+			dnl # so use disk_start_io_acct() and disk_end_io_acct() instead
+			dnl #
+			AC_MSG_CHECKING([whether generic disk_*_io_acct() are available])
+			ZFS_LINUX_TEST_RESULT([disk_io_acct], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available])
+			], [
+				AC_MSG_RESULT(no)
+
+				dnl #
+				dnl # 5.7 API,
+				dnl #
+				dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers.
+				dnl #
+				AC_MSG_CHECKING([whether generic bio_*_io_acct() are available])
+				ZFS_LINUX_TEST_RESULT([bio_io_acct], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available])
+				], [
+					AC_MSG_RESULT(no)
+
+					dnl #
+					dnl # 4.14 API,
+					dnl #
+					dnl # generic_start_io_acct/generic_end_io_acct now require
+					dnl # request_queue to be provided. No functional changes,
+					dnl # but preparation for inflight accounting.
+					dnl #
+					AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args])
+					ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args],
+					    [generic_start_io_acct], [block/bio.c], [
+						AC_MSG_RESULT(yes)
+						AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1,
+						    [generic_*_io_acct() 4 arg available])
+					], [
+						AC_MSG_RESULT(no)
+
+						dnl #
+						dnl # 3.19 API addition
+						dnl #
+						dnl # torvalds/linux@394ffa50 allows us to increment
+						dnl # iostat counters without generic_make_request().
+						dnl #
+						AC_MSG_CHECKING(
+						    [whether generic_*_io_acct wants 3 args])
+						ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args],
+						    [generic_start_io_acct], [block/bio.c], [
+							AC_MSG_RESULT(yes)
+							AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
+							    [generic_*_io_acct() 3 arg available])
+						], [
+							AC_MSG_RESULT(no)
+						])
+					])
+				])
+			])
 		])
 	])
 ])

diff --git a/zfs/config/kernel-genhd-flags.m4 b/zfs/config/kernel-genhd-flags.m4
new file mode 100644
index 0000000..af6a8a0
--- /dev/null
+++ b/zfs/config/kernel-genhd-flags.m4

@@ -0,0 +1,58 @@
+dnl #
+dnl # 5.17 API change,
+dnl #
+dnl # GENHD_FL_EXT_DEVT flag removed
+dnl # GENHD_FL_NO_PART_SCAN renamed GENHD_FL_NO_PART
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_GENHD_FLAGS], [
+
+	ZFS_LINUX_TEST_SRC([genhd_fl_ext_devt], [
+		#include <linux/blkdev.h>
+	], [
+		int flags __attribute__ ((unused)) = GENHD_FL_EXT_DEVT;
+	])
+
+	ZFS_LINUX_TEST_SRC([genhd_fl_no_part], [
+		#include <linux/blkdev.h>
+	], [
+		int flags __attribute__ ((unused)) = GENHD_FL_NO_PART;
+	])
+
+	ZFS_LINUX_TEST_SRC([genhd_fl_no_part_scan], [
+		#include <linux/blkdev.h>
+	], [
+		int flags __attribute__ ((unused)) = GENHD_FL_NO_PART_SCAN;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [
+
+	AC_MSG_CHECKING([whether GENHD_FL_EXT_DEVT flag is available])
+	ZFS_LINUX_TEST_RESULT([genhd_fl_ext_devt], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, GENHD_FL_EXT_DEVT,
+		    [GENHD_FL_EXT_DEVT flag is available])
+	], [
+		AC_MSG_RESULT(no)
+		AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, 0,
+		    [GENHD_FL_EXT_DEVT flag is not available])
+	])
+
+	AC_MSG_CHECKING([whether GENHD_FL_NO_PART flag is available])
+	ZFS_LINUX_TEST_RESULT([genhd_fl_no_part], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART,
+		    [GENHD_FL_NO_PART flag is available])
+	], [
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether GENHD_FL_NO_PART_SCAN flag is available])
+		ZFS_LINUX_TEST_RESULT([genhd_fl_no_part_scan], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART_SCAN,
+			    [GENHD_FL_NO_PART_SCAN flag is available])
+		], [
+			ZFS_LINUX_TEST_ERROR([GENHD_FL_NO_PART|GENHD_FL_NO_PART_SCAN])
+		])
+	])
+])

diff --git a/zfs/config/kernel-get-disk-and-module.m4 b/zfs/config/kernel-get-disk-and-module.m4
deleted file mode 100644
index 51cf774..0000000
--- a/zfs/config/kernel-get-disk-and-module.m4
+++ /dev/null

@@ -1,24 +0,0 @@
-dnl #
-dnl # 4.16 API change
-dnl # Verify if get_disk_and_module() symbol is available.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE], [
-	ZFS_LINUX_TEST_SRC([get_disk_and_module], [
-		#include <linux/genhd.h>
-	], [
-		struct gendisk *disk = NULL;
-		(void) get_disk_and_module(disk);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_AND_MODULE], [
-	AC_MSG_CHECKING([whether get_disk_and_module() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([get_disk_and_module],
-	    [get_disk_and_module], [block/genhd.c], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GET_DISK_AND_MODULE,
-		    1, [get_disk_and_module() is available])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-get-disk-ro.m4 b/zfs/config/kernel-get-disk-ro.m4
index 1e2abb4..acfcb69 100644
--- a/zfs/config/kernel-get-disk-ro.m4
+++ b/zfs/config/kernel-get-disk-ro.m4

@@ -5,17 +5,16 @@
 	ZFS_LINUX_TEST_SRC([get_disk_ro], [
 		#include <linux/blkdev.h>
 	],[
-		struct gendisk *disk = NULL;
+		struct gendisk *disk __attribute__ ((unused)) = NULL;
 		(void) get_disk_ro(disk);
-	], [$NO_UNUSED_BUT_SET_VARIABLE])
+	], [])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_RO], [
 	AC_MSG_CHECKING([whether get_disk_ro() is available])
 	ZFS_LINUX_TEST_RESULT([get_disk_ro], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GET_DISK_RO, 1, [blk_disk_ro() is available])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([get_disk_ro()])
 	])
 ])

diff --git a/zfs/config/kernel-global_page_state.m4 b/zfs/config/kernel-global_page_state.m4
index badb5e5..76f2bba 100644
--- a/zfs/config/kernel-global_page_state.m4
+++ b/zfs/config/kernel-global_page_state.m4

@@ -55,7 +55,7 @@
 AC_DEFUN([ZFS_AC_KERNEL_ENUM_MEMBER], [
 	AC_MSG_CHECKING([whether enum $2 contains $1])
 	AS_IF([AC_TRY_COMMAND(
-	    "${srcdir}/scripts/enum-extract.pl" "$2" "$3" | egrep -qx $1)],[
+	    "${srcdir}/scripts/enum-extract.pl" "$2" "$3" | grep -Eqx $1)],[
 		AC_MSG_RESULT([yes])
 		AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1,
 		    [enum $2 contains $1])

diff --git a/zfs/config/kernel-group-info.m4 b/zfs/config/kernel-group-info.m4
index 0fee1d3..6941d62 100644
--- a/zfs/config/kernel-group-info.m4
+++ b/zfs/config/kernel-group-info.m4

@@ -6,8 +6,8 @@
 	ZFS_LINUX_TEST_SRC([group_info_gid], [
 		#include <linux/cred.h>
 	],[
-		struct group_info *gi = groups_alloc(1);
-		gi->gid[0] = KGIDT_INIT(0);
+		struct group_info gi __attribute__ ((unused)) = {};
+		gi.gid[0] = KGIDT_INIT(0);
 	])
 ])
 

diff --git a/zfs/config/kernel-hotplug.m4 b/zfs/config/kernel-hotplug.m4
new file mode 100644
index 0000000..e796a6d
--- /dev/null
+++ b/zfs/config/kernel-hotplug.m4

@@ -0,0 +1,26 @@
+dnl #
+dnl # 4.6 API change
+dnl # Added CPU hotplug APIs
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HOTPLUG], [
+	ZFS_LINUX_TEST_SRC([cpu_hotplug], [
+		#include <linux/cpuhotplug.h>
+	],[
+		enum cpuhp_state state = CPUHP_ONLINE;
+		int (*fp)(unsigned int, struct hlist_node *) = NULL;
+		cpuhp_state_add_instance_nocalls(0, (struct hlist_node *)NULL);
+		cpuhp_state_remove_instance_nocalls(0, (struct hlist_node *)NULL);
+		cpuhp_setup_state_multi(state, "", fp, fp);
+		cpuhp_remove_multi_state(0);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_CPU_HOTPLUG], [
+	AC_MSG_CHECKING([whether CPU hotplug APIs exist])
+	ZFS_LINUX_TEST_RESULT([cpu_hotplug], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_CPU_HOTPLUG, 1, [yes])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-inode-create.m4 b/zfs/config/kernel-inode-create.m4
new file mode 100644
index 0000000..9e9e431
--- /dev/null
+++ b/zfs/config/kernel-inode-create.m4

@@ -0,0 +1,80 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg is changed to struct mnt_idmap *
+	dnl #
+	ZFS_LINUX_TEST_SRC([create_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int inode_create(struct mnt_idmap *idmap,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t umode, bool flag) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.create         = inode_create,
+		};
+	],[])
+
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([create_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int inode_create(struct user_namespace *userns,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t umode, bool flag) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.create		= inode_create,
+		};
+	],[])
+
+	dnl #
+	dnl # 3.6 API change
+	dnl #
+	ZFS_LINUX_TEST_SRC([create_flags], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int inode_create(struct inode *inode ,struct dentry *dentry,
+		    umode_t umode, bool flag) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.create		= inode_create,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_CREATE], [
+	AC_MSG_CHECKING([whether iops->create() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([create_mnt_idmap], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_CREATE_IDMAP, 1,
+		   [iops->create() takes struct mnt_idmap*])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([create_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
+			   [iops->create() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->create() passes flags])
+			ZFS_LINUX_TEST_RESULT([create_flags], [
+				AC_MSG_RESULT(yes)
+			],[
+				ZFS_LINUX_TEST_ERROR([iops->create()])
+			])
+		])
+	])
+])

diff --git a/zfs/config/kernel-inode-getattr.m4 b/zfs/config/kernel-inode-getattr.m4
index 48391d6..c8bfb07 100644
--- a/zfs/config/kernel-inode-getattr.m4
+++ b/zfs/config/kernel-inode-getattr.m4

@@ -1,8 +1,49 @@
-dnl #
-dnl # Linux 4.11 API
-dnl # See torvalds/linux@a528d35
-dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
+	dnl #
+	dnl # Linux 6.3 API
+	dnl # The first arg of getattr I/O operations handler type
+	dnl # is changed to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [
+		#include <linux/fs.h>
+
+		int test_getattr(
+		    struct mnt_idmap *idmap,
+		    const struct path *p, struct kstat *k,
+		    u32 request_mask, unsigned int query_flags)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.getattr = test_getattr,
+		};
+	],[])
+
+	dnl #
+	dnl # Linux 5.12 API
+	dnl # The getattr I/O operations handler type was extended to require
+	dnl # a struct user_namespace* as its first arg, to support idmapped
+	dnl # mounts.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_getattr_userns], [
+		#include <linux/fs.h>
+
+		int test_getattr(
+			struct user_namespace *userns,
+		    const struct path *p, struct kstat *k,
+		    u32 request_mask, unsigned int query_flags)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.getattr = test_getattr,
+		};
+	],[])
+
+	dnl #
+	dnl # Linux 4.11 API
+	dnl # See torvalds/linux@a528d35
+	dnl #
 	ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [
 		#include <linux/fs.h>
 
@@ -33,21 +74,50 @@
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [
-	AC_MSG_CHECKING([whether iops->getattr() takes a path])
-	ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
+	dnl #
+	dnl # Kernel 6.3 test
+	dnl #
+	AC_MSG_CHECKING([whether iops->getattr() takes mnt_idmap])
+	ZFS_LINUX_TEST_RESULT([inode_operations_getattr_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
-		    [iops->getattr() takes a path])
+		AC_DEFINE(HAVE_IDMAP_IOPS_GETATTR, 1,
+		    [iops->getattr() takes struct mnt_idmap*])
 	],[
 		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
-		ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
+		dnl #
+		dnl # Kernel 5.12 test
+		dnl #
+		AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
+		ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
-			    [iops->getattr() takes a vfsmount])
+			AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
+			    [iops->getattr() takes struct user_namespace*])
 		],[
 			AC_MSG_RESULT(no)
+
+			dnl #
+			dnl # Kernel 4.11 test
+			dnl #
+			AC_MSG_CHECKING([whether iops->getattr() takes a path])
+			ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
+					[iops->getattr() takes a path])
+			],[
+				AC_MSG_RESULT(no)
+
+				dnl #
+				dnl # Kernel < 4.11 test
+				dnl #
+				AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
+				ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
+						[iops->getattr() takes a vfsmount])
+				],[
+					AC_MSG_RESULT(no)
+				])
+			])
 		])
 	])
 ])

diff --git a/zfs/config/kernel-inode-lookup.m4 b/zfs/config/kernel-inode-lookup.m4
new file mode 100644
index 0000000..1a56e69
--- /dev/null
+++ b/zfs/config/kernel-inode-lookup.m4

@@ -0,0 +1,26 @@
+dnl #
+dnl # 3.6 API change
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS], [
+	ZFS_LINUX_TEST_SRC([lookup_flags], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		struct dentry *inode_lookup(struct inode *inode,
+		    struct dentry *dentry, unsigned int flags) { return NULL; }
+
+		static const struct inode_operations iops
+		    __attribute__ ((unused)) = {
+			.lookup	= inode_lookup,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_FLAGS], [
+	AC_MSG_CHECKING([whether iops->lookup() passes flags])
+	ZFS_LINUX_TEST_RESULT([lookup_flags], [
+		AC_MSG_RESULT(yes)
+	],[
+		ZFS_LINUX_TEST_ERROR([iops->lookup()])
+	])
+])

diff --git a/zfs/config/kernel-inode-setattr.m4 b/zfs/config/kernel-inode-setattr.m4
new file mode 100644
index 0000000..45755b4
--- /dev/null
+++ b/zfs/config/kernel-inode-setattr.m4

@@ -0,0 +1,87 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [
+	dnl #
+	dnl # Linux 6.3 API
+	dnl # The first arg of setattr I/O operations handler type
+	dnl # is changed to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [
+		#include <linux/fs.h>
+
+		int test_setattr(
+		    struct mnt_idmap *idmap,
+		    struct dentry *de, struct iattr *ia)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.setattr = test_setattr,
+		};
+	],[])
+
+	dnl #
+	dnl # Linux 5.12 API
+	dnl # The setattr I/O operations handler type was extended to require
+	dnl # a struct user_namespace* as its first arg, to support idmapped
+	dnl # mounts.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [
+		#include <linux/fs.h>
+
+		int test_setattr(
+		    struct user_namespace *userns,
+		    struct dentry *de, struct iattr *ia)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.setattr = test_setattr,
+		};
+	],[])
+
+	ZFS_LINUX_TEST_SRC([inode_operations_setattr], [
+		#include <linux/fs.h>
+
+		int test_setattr(
+		    struct dentry *de, struct iattr *ia)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.setattr = test_setattr,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [
+	dnl #
+	dnl # Kernel 6.3 test
+	dnl #
+	AC_MSG_CHECKING([whether iops->setattr() takes mnt_idmap])
+	ZFS_LINUX_TEST_RESULT([inode_operations_setattr_mnt_idmap], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IDMAP_IOPS_SETATTR, 1,
+		    [iops->setattr() takes struct mnt_idmap*])
+	],[
+		AC_MSG_RESULT(no)
+		dnl #
+		dnl # Kernel 5.12 test
+		dnl #
+		AC_MSG_CHECKING([whether iops->setattr() takes user_namespace])
+		ZFS_LINUX_TEST_RESULT([inode_operations_setattr_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_USERNS_IOPS_SETATTR, 1,
+			    [iops->setattr() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->setattr() exists])
+			ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_IOPS_SETATTR, 1,
+					[iops->setattr() exists])
+			],[
+				AC_MSG_RESULT(no)
+			])
+		])
+	])
+])

diff --git a/zfs/config/kernel-inode-times.m4 b/zfs/config/kernel-inode-times.m4
index 8a79c29..9c016c7 100644
--- a/zfs/config/kernel-inode-times.m4
+++ b/zfs/config/kernel-inode-times.m4

@@ -10,6 +10,7 @@
 		struct timespec64 ts;
 		struct inode ip;
 
+		memset(&ts, 0, sizeof(ts));
 		ts = timestamp_truncate(ts, &ip);
 	])
 

diff --git a/zfs/config/kernel-insert-inode-locked.m4 b/zfs/config/kernel-insert-inode-locked.m4
index 4990399..348aff9 100644
--- a/zfs/config/kernel-insert-inode-locked.m4
+++ b/zfs/config/kernel-insert-inode-locked.m4

@@ -15,9 +15,7 @@
 	ZFS_LINUX_TEST_RESULT_SYMBOL([insert_inode_locked],
 	    [insert_inode_locked], [fs/inode.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_INSERT_INODE_LOCKED, 1,
-		    [insert_inode_locked() is available])
 	], [
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([insert_inode_locked()])
 	])
 ])

diff --git a/zfs/config/kernel-invalidate-bdev-args.m4 b/zfs/config/kernel-invalidate-bdev-args.m4
deleted file mode 100644
index 93b80e6..0000000
--- a/zfs/config/kernel-invalidate-bdev-args.m4
+++ /dev/null

@@ -1,24 +0,0 @@
-dnl #
-dnl # 2.6.22 API change
-dnl # Unused destroy_dirty_buffers arg removed from prototype.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_INVALIDATE_BDEV], [
-	ZFS_LINUX_TEST_SRC([invalidate_bdev], [
-		#include <linux/buffer_head.h>
-		#include <linux/blkdev.h>
-	],[
-		struct block_device *bdev = NULL;
-		invalidate_bdev(bdev);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_INVALIDATE_BDEV], [
-	AC_MSG_CHECKING([whether invalidate_bdev() wants 1 arg])
-	ZFS_LINUX_TEST_RESULT([invalidate_bdev], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_1ARG_INVALIDATE_BDEV, 1,
-		    [invalidate_bdev() wants 1 arg])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-is_owner_or_cap.m4 b/zfs/config/kernel-is_owner_or_cap.m4
index ab80724..4e9c002 100644
--- a/zfs/config/kernel-is_owner_or_cap.m4
+++ b/zfs/config/kernel-is_owner_or_cap.m4

@@ -4,6 +4,10 @@
 dnl # This is used for permission checks in the xattr and file attribute call
 dnl # paths.
 dnl #
+dnl # 5.12 API change,
+dnl # inode_owner_or_capable() now takes struct user_namespace *
+dnl # to support idmapped mounts
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [
 	ZFS_LINUX_TEST_SRC([inode_owner_or_capable], [
 		#include <linux/fs.h>
@@ -12,13 +16,19 @@
 		(void) inode_owner_or_capable(ip);
 	])
 
-
-	ZFS_LINUX_TEST_SRC([is_owner_or_cap], [
+	ZFS_LINUX_TEST_SRC([inode_owner_or_capable_userns], [
 		#include <linux/fs.h>
-		#include <linux/sched.h>
 	],[
 		struct inode *ip = NULL;
-		(void) is_owner_or_cap(ip);
+		(void) inode_owner_or_capable(&init_user_ns, ip);
+	])
+
+	ZFS_LINUX_TEST_SRC([inode_owner_or_capable_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/mnt_idmapping.h>
+	],[
+		struct inode *ip = NULL;
+		(void) inode_owner_or_capable(&nop_mnt_idmap, ip);
 	])
 ])
 
@@ -28,16 +38,26 @@
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE, 1,
 		    [inode_owner_or_capable() exists])
-	],[
+	], [
 		AC_MSG_RESULT(no)
-		AC_MSG_CHECKING([whether is_owner_or_cap() exists])
 
-		ZFS_LINUX_TEST_RESULT([is_owner_or_cap], [
+		AC_MSG_CHECKING(
+		    [whether inode_owner_or_capable() takes user_ns])
+		ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_IS_OWNER_OR_CAP, 1,
-			    [is_owner_or_cap() exists])
+			AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_USERNS, 1,
+			    [inode_owner_or_capable() takes user_ns])
 		],[
-			ZFS_LINUX_TEST_ERROR([capability])
+			AC_MSG_RESULT(no)
+			AC_MSG_CHECKING(
+			    [whether inode_owner_or_capable() takes mnt_idmap])
+			ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_mnt_idmap], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP, 1,
+				    [inode_owner_or_capable() takes mnt_idmap])
+			], [
+				ZFS_LINUX_TEST_ERROR([capability])
+			])
 		])
 	])
 ])

diff --git a/zfs/config/kernel-kmap-atomic-args.m4 b/zfs/config/kernel-kmap-atomic-args.m4
index d09e93d..1172505 100644
--- a/zfs/config/kernel-kmap-atomic-args.m4
+++ b/zfs/config/kernel-kmap-atomic-args.m4

@@ -16,9 +16,7 @@
 	AC_MSG_CHECKING([whether kmap_atomic wants 1 args])
 	ZFS_LINUX_TEST_RESULT([kmap_atomic], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_1ARG_KMAP_ATOMIC, 1,
-		    [kmap_atomic wants 1 args])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([kmap_atomic()])
 	])
 ])

diff --git a/zfs/config/kernel-kmem-cache.m4 b/zfs/config/kernel-kmem-cache.m4
index 7576e6c..0e9fe9e 100644
--- a/zfs/config/kernel-kmem-cache.m4
+++ b/zfs/config/kernel-kmem-cache.m4

@@ -1,47 +1,4 @@
 dnl #
-dnl # 2.6.35 API change,
-dnl # The cachep->gfpflags member was renamed cachep->allocflags.  These are
-dnl # private allocation flags which are applied when allocating a new slab
-dnl # in kmem_getpages().  Unfortunately there is no public API for setting
-dnl # non-default flags.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_ALLOCFLAGS], [
-	ZFS_LINUX_TEST_SRC([kmem_cache_allocflags], [
-		#include <linux/slab.h>
-	],[
-		struct kmem_cache cachep __attribute__ ((unused));
-		cachep.allocflags = GFP_KERNEL;
-	])
-
-	ZFS_LINUX_TEST_SRC([kmem_cache_gfpflags], [
-		#include <linux/slab.h>
-	],[
-		struct kmem_cache cachep __attribute__ ((unused));
-		cachep.gfpflags = GFP_KERNEL;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS], [
-	AC_MSG_CHECKING([whether struct kmem_cache has allocflags])
-	ZFS_LINUX_TEST_RESULT([kmem_cache_allocflags], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_KMEM_CACHE_ALLOCFLAGS, 1,
-		    [struct kmem_cache has allocflags])
-	],[
-		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether struct kmem_cache has gfpflags])
-		ZFS_LINUX_TEST_RESULT([kmem_cache_gfpflags], [
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_KMEM_CACHE_GFPFLAGS, 1,
-			    [struct kmem_cache has gfpflags])
-		],[
-			AC_MSG_RESULT(no)
-		])
-	])
-])
-
-dnl #
 dnl # grsecurity API change,
 dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by
 dnl # kmem_cache_create_usercopy().
@@ -76,11 +33,9 @@
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE], [
-	ZFS_AC_KERNEL_SRC_KMEM_CACHE_ALLOCFLAGS
 	ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE], [
-	ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS
 	ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY
 ])

diff --git a/zfs/config/kernel-kmem-vmalloc.m4 b/zfs/config/kernel-kmem-vmalloc.m4
deleted file mode 100644
index a46ebc6..0000000
--- a/zfs/config/kernel-kmem-vmalloc.m4
+++ /dev/null

@@ -1,24 +0,0 @@
-dnl #
-dnl # 5.8 API,
-dnl # __vmalloc PAGE_KERNEL removal
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL], [
-	ZFS_LINUX_TEST_SRC([__vmalloc], [
-		#include <linux/mm.h>
-		#include <linux/vmalloc.h>
-	],[
-		void *p __attribute__ ((unused));
-
-		p = __vmalloc(0, GFP_KERNEL, PAGE_KERNEL);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL], [
-	AC_MSG_CHECKING([whether __vmalloc(ptr, flags, pageflags) is available])
-	ZFS_LINUX_TEST_RESULT([__vmalloc], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_VMALLOC_PAGE_KERNEL, 1, [__vmalloc page flags exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-kmem.m4 b/zfs/config/kernel-kmem.m4
index cc055e5..03c2a41 100644
--- a/zfs/config/kernel-kmem.m4
+++ b/zfs/config/kernel-kmem.m4

@@ -56,3 +56,54 @@
 	AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
 	AC_MSG_RESULT([$enable_debug_kmem_tracking])
 ])
+
+dnl #
+dnl # 4.12 API,
+dnl # Added kvmalloc allocation strategy
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KVMALLOC], [
+	ZFS_LINUX_TEST_SRC([kvmalloc], [
+		#include <linux/mm.h>
+		#include <linux/slab.h>
+	],[
+		void *p __attribute__ ((unused));
+
+		p = kvmalloc(0, GFP_KERNEL);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_KVMALLOC], [
+	AC_MSG_CHECKING([whether kvmalloc(ptr, flags) is available])
+	ZFS_LINUX_TEST_RESULT([kvmalloc], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_KVMALLOC, 1, [kvmalloc exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 5.8 API,
+dnl # __vmalloc PAGE_KERNEL removal
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL], [
+	ZFS_LINUX_TEST_SRC([__vmalloc], [
+		#include <linux/mm.h>
+		#include <linux/vmalloc.h>
+	],[
+		void *p __attribute__ ((unused));
+
+		p = __vmalloc(0, GFP_KERNEL, PAGE_KERNEL);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL], [
+	AC_MSG_CHECKING([whether __vmalloc(ptr, flags, pageflags) is available])
+	ZFS_LINUX_TEST_RESULT([__vmalloc], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_VMALLOC_PAGE_KERNEL, 1, [__vmalloc page flags exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+-
\ No newline at end of file

diff --git a/zfs/config/kernel-kstrtoul.m4 b/zfs/config/kernel-kstrtoul.m4
index ef3c984..8e4b542 100644
--- a/zfs/config/kernel-kstrtoul.m4
+++ b/zfs/config/kernel-kstrtoul.m4

@@ -1,8 +1,6 @@
 dnl #
 dnl # 2.6.39 API change
-dnl #
-dnl # If kstrtoul() doesn't exist, fallback to use strict_strtoul() which has
-dnl # existed since 2.6.25.
+dnl # Added kstrtoul()
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_KSTRTOUL], [
 	ZFS_LINUX_TEST_SRC([kstrtoul], [
@@ -18,6 +16,6 @@
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_KSTRTOUL, 1, [kstrtoul() exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([kstrtoul()])
 	])
 ])

diff --git a/zfs/config/kernel-kthread.m4 b/zfs/config/kernel-kthread.m4
new file mode 100644
index 0000000..f5b824d
--- /dev/null
+++ b/zfs/config/kernel-kthread.m4

@@ -0,0 +1,68 @@
+AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT], [
+	dnl #
+	dnl # 5.17 API,
+	dnl # cead18552660702a4a46f58e65188fe5f36e9dfe ("exit: Rename complete_and_exit to kthread_complete_and_exit")
+	dnl #
+	dnl # Also moves the definition from include/linux/kernel.h to include/linux/kthread.h
+	dnl #
+	AC_MSG_CHECKING([whether kthread_complete_and_exit() is available])
+	ZFS_LINUX_TEST_RESULT([kthread_complete_and_exit], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(SPL_KTHREAD_COMPLETE_AND_EXIT, kthread_complete_and_exit, [kthread_complete_and_exit() available])
+	], [
+		AC_MSG_RESULT(no)
+		AC_DEFINE(SPL_KTHREAD_COMPLETE_AND_EXIT, complete_and_exit, [using complete_and_exit() instead])
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG], [
+	dnl #
+	dnl # 5.17 API: enum pid_type * as new 4th dequeue_signal() argument,
+	dnl # 5768d8906bc23d512b1a736c1e198aa833a6daa4 ("signal: Requeue signals in the appropriate queue")
+	dnl #
+	dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info);
+	dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type);
+	dnl #
+	AC_MSG_CHECKING([whether dequeue_signal() takes 4 arguments])
+	ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_DEQUEUE_SIGNAL_4ARG, 1, [dequeue_signal() takes 4 arguments])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT], [
+	ZFS_LINUX_TEST_SRC([kthread_complete_and_exit], [
+		#include <linux/kthread.h>
+	], [
+		struct completion *completion = NULL;
+		long code = 0;
+
+		kthread_complete_and_exit(completion, code);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG], [
+	ZFS_LINUX_TEST_SRC([kthread_dequeue_signal], [
+		#include <linux/sched/signal.h>
+	], [
+		struct task_struct *task = NULL;
+		sigset_t *mask = NULL;
+		kernel_siginfo_t *info = NULL;
+		enum pid_type *type = NULL;
+		int error __attribute__ ((unused));
+
+		error = dequeue_signal(task, mask, info, type);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_KTHREAD], [
+	ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT
+	ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD], [
+	ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT
+	ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG
+])

diff --git a/zfs/config/kernel-kuid-helpers.m4 b/zfs/config/kernel-kuid-helpers.m4
index 4bc4e03..38a439f 100644
--- a/zfs/config/kernel-kuid-helpers.m4
+++ b/zfs/config/kernel-kuid-helpers.m4

@@ -18,9 +18,7 @@
 	AC_MSG_CHECKING([whether i_(uid|gid)_(read|write) exist])
 	ZFS_LINUX_TEST_RESULT([i_uid_read], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_KUID_HELPERS, 1,
-		    [i_(uid|gid)_(read|write) exist])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([i_uid_read])
 	])
 ])

diff --git a/zfs/config/kernel-kuidgid.m4 b/zfs/config/kernel-kuidgid.m4
index 15bf981..b7e4414 100644
--- a/zfs/config/kernel-kuidgid.m4
+++ b/zfs/config/kernel-kuidgid.m4

@@ -1,34 +1,21 @@
 dnl #
-dnl # User namespaces, use kuid_t in place of uid_t
-dnl # where available. Not strictly a user namespaces thing
-dnl # but it should prevent surprises
+dnl # 3.8 API change,
+dnl # User namespaces, use kuid_t in place of uid_t where available.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_KUIDGID_T], [
-	ZFS_LINUX_TEST_SRC([kuidgid_t_init], [
+	ZFS_LINUX_TEST_SRC([kuidgid_t], [
 		#include <linux/uidgid.h>
 	], [
 		kuid_t userid __attribute__ ((unused)) = KUIDT_INIT(0);
 		kgid_t groupid __attribute__ ((unused)) = KGIDT_INIT(0);
 	])
-
-	ZFS_LINUX_TEST_SRC([kuidgid_t], [
-		#include <linux/uidgid.h>
-	], [
-		kuid_t userid __attribute__ ((unused)) = 0;
-		kgid_t groupid __attribute__ ((unused)) = 0;
-	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_KUIDGID_T], [
 	AC_MSG_CHECKING([whether kuid_t/kgid_t is available])
-	ZFS_LINUX_TEST_RESULT([kuidgid_t_init], [
-		ZFS_LINUX_TEST_RESULT([kuidgid_t], [
-			AC_MSG_RESULT(yes; optional)
-		],[
-			AC_MSG_RESULT(yes; mandatory)
-			AC_DEFINE(HAVE_KUIDGID_T, 1, [kuid_t/kgid_t in use])
-		])
+	ZFS_LINUX_TEST_RESULT([kuidgid_t], [
+		AC_MSG_RESULT(yes)
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([kuid_t/kgid_t])
 	])
 ])

diff --git a/zfs/config/kernel-lookup-bdev.m4 b/zfs/config/kernel-lookup-bdev.m4
deleted file mode 100644
index 69df60f..0000000
--- a/zfs/config/kernel-lookup-bdev.m4
+++ /dev/null

@@ -1,40 +0,0 @@
-dnl #
-dnl # 2.6.27, lookup_bdev() was exported.
-dnl # 4.4.0-6.21 - x.y on Ubuntu, lookup_bdev() takes 2 arguments.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_BDEV], [
-	ZFS_LINUX_TEST_SRC([lookup_bdev_1arg], [
-		#include <linux/fs.h>
-		#include <linux/blkdev.h>
-	], [
-		lookup_bdev(NULL);
-	])
-
-	ZFS_LINUX_TEST_SRC([lookup_bdev_2args], [
-		#include <linux/fs.h>
-	], [
-		lookup_bdev(NULL, FMODE_READ);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_BDEV], [
-	AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_1arg],
-	    [lookup_bdev], [fs/block_dev.c], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1,
-		    [lookup_bdev() wants 1 arg])
-	], [
-		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether lookup_bdev() wants 2 args])
-		ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_2args],
-		    [lookup_bdev], [fs/block_dev.c], [
-			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_2ARGS_LOOKUP_BDEV, 1,
-			    [lookup_bdev() wants 2 args])
-		], [
-			AC_MSG_RESULT(no)
-		])
-	])
-])

diff --git a/zfs/config/kernel-lookup-nameidata.m4 b/zfs/config/kernel-lookup-nameidata.m4
deleted file mode 100644
index 865b8af..0000000
--- a/zfs/config/kernel-lookup-nameidata.m4
+++ /dev/null

@@ -1,29 +0,0 @@
-dnl #
-dnl # 3.6 API change
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_NAMEIDATA], [
-	ZFS_LINUX_TEST_SRC([lookup_nameidata], [
-		#include <linux/fs.h>
-		#include <linux/sched.h>
-
-		struct dentry *inode_lookup(struct inode *inode,
-		    struct dentry *dentry, struct nameidata *nidata)
-		    { return NULL; }
-
-		static const struct inode_operations iops
-		    __attribute__ ((unused)) = {
-			.lookup	= inode_lookup,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_LOOKUP_NAMEIDATA], [
-	AC_MSG_CHECKING([whether iops->lookup() passes nameidata])
-	ZFS_LINUX_TEST_RESULT([lookup_nameidata], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_LOOKUP_NAMEIDATA, 1,
-		    [iops->lookup() passes nameidata])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-make-request-fn.m4 b/zfs/config/kernel-make-request-fn.m4
index 6dba8cc..f17416a 100644
--- a/zfs/config/kernel-make-request-fn.m4
+++ b/zfs/config/kernel-make-request-fn.m4

@@ -2,14 +2,6 @@
 dnl # Check for make_request_fn interface.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [
-	ZFS_LINUX_TEST_SRC([make_request_fn_int], [
-		#include <linux/blkdev.h>
-		int make_request(struct request_queue *q,
-		    struct bio *bio) { return (0); }
-	],[
-		blk_queue_make_request(NULL, &make_request);
-	])
-
 	ZFS_LINUX_TEST_SRC([make_request_fn_void], [
 		#include <linux/blkdev.h>
 		void make_request(struct request_queue *q,
@@ -35,12 +27,35 @@
 		q = blk_alloc_queue(make_request, NUMA_NO_NODE);
 	])
 
+	ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn_rh], [
+		#include <linux/blkdev.h>
+		blk_qc_t make_request(struct request_queue *q,
+		    struct bio *bio) { return (BLK_QC_T_NONE); }
+	],[
+		struct request_queue *q __attribute__ ((unused));
+		q = blk_alloc_queue_rh(make_request, NUMA_NO_NODE);
+	])
+
 	ZFS_LINUX_TEST_SRC([block_device_operations_submit_bio], [
 		#include <linux/blkdev.h>
 	],[
 		struct block_device_operations o;
 		o.submit_bio = NULL;
 	])
+
+	ZFS_LINUX_TEST_SRC([blk_alloc_disk], [
+		#include <linux/blkdev.h>
+	],[
+		struct gendisk *disk  __attribute__ ((unused));
+		disk = blk_alloc_disk(NUMA_NO_NODE);
+	])
+
+	ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [
+		#include <linux/blkdev.h>
+	],[
+		struct gendisk *disk  __attribute__ ((unused));
+		blk_cleanup_disk(disk);
+	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
@@ -55,7 +70,35 @@
 
 		AC_DEFINE(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS, 1,
 		    [submit_bio is member of struct block_device_operations])
-		],[
+
+		dnl #
+		dnl # Linux 5.14 API Change:
+		dnl # blk_alloc_queue() + alloc_disk() combo replaced by
+		dnl # a single call to blk_alloc_disk().
+		dnl #
+		AC_MSG_CHECKING([whether blk_alloc_disk() exists])
+		ZFS_LINUX_TEST_RESULT([blk_alloc_disk], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE([HAVE_BLK_ALLOC_DISK], 1, [blk_alloc_disk() exists])
+
+			dnl #
+			dnl # 5.20 API change,
+			dnl # Removed blk_cleanup_disk(), put_disk() should be used.
+			dnl #
+			AC_MSG_CHECKING([whether blk_cleanup_disk() exists])
+			ZFS_LINUX_TEST_RESULT([blk_cleanup_disk], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE([HAVE_BLK_CLEANUP_DISK], 1,
+				    [blk_cleanup_disk() exists])
+			], [
+				AC_MSG_RESULT(no)
+			])
+		], [
+			AC_MSG_RESULT(no)
+		])
+	],[
+		AC_MSG_RESULT(no)
+
 		dnl # Checked as part of the blk_alloc_queue_request_fn test
 		dnl #
 		dnl # Linux 5.7 API Change
@@ -65,62 +108,68 @@
 		ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn], [
 			AC_MSG_RESULT(yes)
 
-			dnl # Checked as part of the blk_alloc_queue_request_fn test
+			dnl # This is currently always the case.
 			AC_MSG_CHECKING([whether make_request_fn() returns blk_qc_t])
 			AC_MSG_RESULT(yes)
 
 			AC_DEFINE(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN, 1,
-				[blk_alloc_queue() expects request function])
+			    [blk_alloc_queue() expects request function])
 			AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
-				[make_request_fn() return type])
+			    [make_request_fn() return type])
 			AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
-				[Noting that make_request_fn() returns blk_qc_t])
+			    [Noting that make_request_fn() returns blk_qc_t])
 		],[
-			AC_MSG_RESULT(no)
-
 			dnl #
-			dnl # Linux 3.2 API Change
-			dnl # make_request_fn returns void.
+			dnl # CentOS Stream 4.18.0-257 API Change
+			dnl # The Linux 5.7 blk_alloc_queue() change was back-
+			dnl # ported and the symbol renamed blk_alloc_queue_rh().
+			dnl # As of this kernel version they're not providing
+			dnl # any compatibility code in the kernel for this.
 			dnl #
-			AC_MSG_CHECKING([whether make_request_fn() returns void])
-			ZFS_LINUX_TEST_RESULT([make_request_fn_void], [
+			ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn_rh], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(MAKE_REQUEST_FN_RET, void,
-					[make_request_fn() return type])
-				AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1,
-					[Noting that make_request_fn() returns void])
+
+				dnl # This is currently always the case.
+				AC_MSG_CHECKING([whether make_request_fn_rh() returns blk_qc_t])
+				AC_MSG_RESULT(yes)
+
+				AC_DEFINE(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH, 1,
+				    [blk_alloc_queue_rh() expects request function])
+				AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
+				    [make_request_fn() return type])
+				AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
+				    [Noting that make_request_fn() returns blk_qc_t])
 			],[
 				AC_MSG_RESULT(no)
 
 				dnl #
-				dnl # Linux 4.4 API Change
-				dnl # make_request_fn returns blk_qc_t.
+				dnl # Linux 3.2 API Change
+				dnl # make_request_fn returns void.
 				dnl #
 				AC_MSG_CHECKING(
-					[whether make_request_fn() returns blk_qc_t])
-				ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [
+				    [whether make_request_fn() returns void])
+				ZFS_LINUX_TEST_RESULT([make_request_fn_void], [
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
-						[make_request_fn() return type])
-					AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
-						[Noting that make_request_fn() ]
-						[returns blk_qc_t])
+					AC_DEFINE(MAKE_REQUEST_FN_RET, void,
+					    [make_request_fn() return type])
+					AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1,
+					    [Noting that make_request_fn() returns void])
 				],[
 					AC_MSG_RESULT(no)
 
 					dnl #
-					dnl # Legacy API
-					dnl # make_request_fn returns int.
+					dnl # Linux 4.4 API Change
+					dnl # make_request_fn returns blk_qc_t.
 					dnl #
 					AC_MSG_CHECKING(
-						[whether make_request_fn() returns int])
-					ZFS_LINUX_TEST_RESULT([make_request_fn_int], [
+					    [whether make_request_fn() returns blk_qc_t])
+					ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [
 						AC_MSG_RESULT(yes)
-						AC_DEFINE(MAKE_REQUEST_FN_RET, int,
-							[make_request_fn() return type])
-						AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT,
-							1, [Noting that make_request_fn() ]
-							[returns int])
+						AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
+						    [make_request_fn() return type])
+						AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
+						    [Noting that make_request_fn() ]
+						    [returns blk_qc_t])
 					],[
 						ZFS_LINUX_TEST_ERROR([make_request_fn])
 					])

diff --git a/zfs/config/kernel-mkdir-umode-t.m4 b/zfs/config/kernel-mkdir-umode-t.m4
deleted file mode 100644
index f4dde29..0000000
--- a/zfs/config/kernel-mkdir-umode-t.m4
+++ /dev/null

@@ -1,32 +0,0 @@
-dnl #
-dnl # 3.3 API change
-dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
-dnl # umode_t type rather than an int.  The expectation is that any backport
-dnl # would also change all three prototypes.  However, if it turns out that
-dnl # some distribution doesn't backport the whole thing this could be
-dnl # broken apart into three separate checks.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T], [
-	ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [
-		#include <linux/fs.h>
-
-		int mkdir(struct inode *inode, struct dentry *dentry,
-		    umode_t umode) { return 0; }
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.mkdir = mkdir,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [
-	AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t])
-	ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
-		    [iops->create()/mkdir()/mknod() take umode_t])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-mkdir.m4 b/zfs/config/kernel-mkdir.m4
new file mode 100644
index 0000000..7407a79
--- /dev/null
+++ b/zfs/config/kernel-mkdir.m4

@@ -0,0 +1,94 @@
+dnl #
+dnl # Supported mkdir() interfaces checked newest to oldest.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # mkdir() takes struct mnt_idmap * as the first arg
+	dnl #
+	ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [
+		#include <linux/fs.h>
+
+		int mkdir(struct mnt_idmap *idmap,
+			struct inode *inode, struct dentry *dentry,
+			umode_t umode) { return 0; }
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.mkdir = mkdir,
+		};
+	],[])
+
+	dnl #
+	dnl # 5.12 API change
+	dnl # The struct user_namespace arg was added as the first argument to
+	dnl # mkdir()
+	dnl #
+	ZFS_LINUX_TEST_SRC([mkdir_user_namespace], [
+		#include <linux/fs.h>
+
+		int mkdir(struct user_namespace *userns,
+			struct inode *inode, struct dentry *dentry,
+		    umode_t umode) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.mkdir = mkdir,
+		};
+	],[])
+
+	dnl #
+	dnl # 3.3 API change
+	dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
+	dnl # umode_t type rather than an int.  The expectation is that any backport
+	dnl # would also change all three prototypes.  However, if it turns out that
+	dnl # some distribution doesn't backport the whole thing this could be
+	dnl # broken apart into three separate checks.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [
+		#include <linux/fs.h>
+
+		int mkdir(struct inode *inode, struct dentry *dentry,
+		    umode_t umode) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.mkdir = mkdir,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # mkdir() takes struct mnt_idmap * as the first arg
+	dnl #
+	AC_MSG_CHECKING([whether iops->mkdir() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([mkdir_mnt_idmap], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1,
+		    [iops->mkdir() takes struct mnt_idmap*])
+	],[
+		dnl #
+		dnl # 5.12 API change
+		dnl # The struct user_namespace arg was added as the first argument to
+		dnl # mkdir() of the iops structure.
+		dnl #
+		AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
+			    [iops->mkdir() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
+			ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
+				    [iops->mkdir() takes umode_t])
+			],[
+				ZFS_LINUX_TEST_ERROR([mkdir()])
+			])
+		])
+	])
+])

diff --git a/zfs/config/kernel-mknod.m4 b/zfs/config/kernel-mknod.m4
new file mode 100644
index 0000000..1494ec1
--- /dev/null
+++ b/zfs/config/kernel-mknod.m4

@@ -0,0 +1,56 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg is now struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([mknod_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int tmp_mknod(struct mnt_idmap *idmap,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t u, dev_t d) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.mknod          = tmp_mknod,
+		};
+	],[])
+
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([mknod_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int tmp_mknod(struct user_namespace *userns,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t u, dev_t d) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.mknod		= tmp_mknod,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [
+	AC_MSG_CHECKING([whether iops->mknod() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([mknod_mnt_idmap], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_MKNOD_IDMAP, 1,
+		    [iops->mknod() takes struct mnt_idmap*])
+	],[
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([mknod_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
+			    [iops->mknod() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+		])
+	])
+])

diff --git a/zfs/config/kernel-mod-param.m4 b/zfs/config/kernel-mod-param.m4
deleted file mode 100644
index e00f19d..0000000
--- a/zfs/config/kernel-mod-param.m4
+++ /dev/null

@@ -1,33 +0,0 @@
-dnl #
-dnl # Grsecurity kernel API change
-dnl # constified parameters of module_param_call() methods
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST], [
-	ZFS_LINUX_TEST_SRC([module_param_call], [
-		#include <linux/module.h>
-		#include <linux/moduleparam.h>
-
-		int param_get(char *b, const struct kernel_param *kp)
-		{
-			return (0);
-		}
-
-		int param_set(const char *b, const struct kernel_param *kp)
-		{
-			return (0);
-		}
-
-		module_param_call(p, param_set, param_get, NULL, 0644);
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [
-	AC_MSG_CHECKING([whether module_param_call() is hardened])
-	ZFS_LINUX_TEST_RESULT([module_param_call], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(MODULE_PARAM_CALL_CONST, 1,
-		    [hardened module_param_call])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-open-bdev-exclusive.m4 b/zfs/config/kernel-open-bdev-exclusive.m4
deleted file mode 100644
index 2e46b88..0000000
--- a/zfs/config/kernel-open-bdev-exclusive.m4
+++ /dev/null

@@ -1,23 +0,0 @@
-dnl #
-dnl # 2.6.28 API change
-dnl # open/close_bdev_excl() renamed to open/close_bdev_exclusive()
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_OPEN_EXCLUSIVE], [
-	ZFS_LINUX_TEST_SRC([open_bdev_exclusive], [
-		#include <linux/fs.h>
-	], [
-		open_bdev_exclusive(NULL, 0, NULL);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BDEV_OPEN_EXCLUSIVE], [
-	AC_MSG_CHECKING([whether open_bdev_exclusive() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([open_bdev_exclusive],
-	    [open_bdev_exclusive], [fs/block_dev.c], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_OPEN_BDEV_EXCLUSIVE, 1,
-		    [open_bdev_exclusive() is available])
-	], [
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-pagemap-folio_wait_bit.m4 b/zfs/config/kernel-pagemap-folio_wait_bit.m4
new file mode 100644
index 0000000..12d8841
--- /dev/null
+++ b/zfs/config/kernel-pagemap-folio_wait_bit.m4

@@ -0,0 +1,26 @@
+dnl #
+dnl # Linux 5.16 no longer allows directly calling wait_on_page_bit, and
+dnl # instead requires you to call folio-specific functions. In this case,
+dnl # wait_on_page_bit(pg, PG_writeback) becomes
+dnl # folio_wait_bit(pg, PG_writeback)
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT], [
+	ZFS_LINUX_TEST_SRC([pagemap_has_folio_wait_bit], [
+		#include <linux/pagemap.h>
+	],[
+		static struct folio *f = NULL;
+
+		folio_wait_bit(f, PG_writeback);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT], [
+	AC_MSG_CHECKING([whether folio_wait_bit() exists])
+	ZFS_LINUX_TEST_RESULT([pagemap_has_folio_wait_bit], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_PAGEMAP_FOLIO_WAIT_BIT, 1,
+			[folio_wait_bit() exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-pde-data.m4 b/zfs/config/kernel-pde-data.m4
index 928c5ef..4fc665d 100644
--- a/zfs/config/kernel-pde-data.m4
+++ b/zfs/config/kernel-pde-data.m4

@@ -1,21 +1,22 @@
 dnl #
-dnl # 3.10 API change,
-dnl # PDE is replaced by PDE_DATA
+dnl # 5.17 API: PDE_DATA() renamed to pde_data(),
+dnl # 359745d78351c6f5442435f81549f0207ece28aa ("proc: remove PDE_DATA() completely")
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_PDE_DATA], [
 	ZFS_LINUX_TEST_SRC([pde_data], [
 		#include <linux/proc_fs.h>
 	], [
-		PDE_DATA(NULL);
+		pde_data(NULL);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_PDE_DATA], [
-	AC_MSG_CHECKING([whether PDE_DATA() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([pde_data], [PDE_DATA], [], [
+	AC_MSG_CHECKING([whether pde_data() is lowercase])
+	ZFS_LINUX_TEST_RESULT([pde_data], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_PDE_DATA, 1, [PDE_DATA is available])
-	],[
+		AC_DEFINE(SPL_PDE_DATA, pde_data, [pde_data() is pde_data()])
+	], [
 		AC_MSG_RESULT(no)
+		AC_DEFINE(SPL_PDE_DATA, PDE_DATA, [pde_data() is PDE_DATA()])
 	])
 ])

diff --git a/zfs/config/kernel-percpu.m4 b/zfs/config/kernel-percpu.m4
new file mode 100644
index 0000000..5125dd5
--- /dev/null
+++ b/zfs/config/kernel-percpu.m4

@@ -0,0 +1,87 @@
+dnl #
+dnl # 3.18 API change,
+dnl # The function percpu_counter_init now must be passed a GFP mask.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT], [
+	ZFS_LINUX_TEST_SRC([percpu_counter_init_with_gfp], [
+		#include <linux/gfp.h>
+		#include <linux/percpu_counter.h>
+	],[
+		struct percpu_counter counter;
+		int error;
+
+		error = percpu_counter_init(&counter, 0, GFP_KERNEL);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [
+	AC_MSG_CHECKING([whether percpu_counter_init() wants gfp_t])
+	ZFS_LINUX_TEST_RESULT([percpu_counter_init_with_gfp], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_PERCPU_COUNTER_INIT_WITH_GFP, 1,
+		    [percpu_counter_init() wants gfp_t])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 4.13 API change,
+dnl # __percpu_counter_add() was renamed to percpu_counter_add_batch().
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH], [
+	ZFS_LINUX_TEST_SRC([percpu_counter_add_batch], [
+		#include <linux/percpu_counter.h>
+	],[
+		struct percpu_counter counter;
+
+		percpu_counter_add_batch(&counter, 1, 1);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH], [
+	AC_MSG_CHECKING([whether percpu_counter_add_batch() is defined])
+	ZFS_LINUX_TEST_RESULT([percpu_counter_add_batch], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1,
+		    [percpu_counter_add_batch() is defined])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 5.10 API change,
+dnl # The "count" was moved into ref->data, from ref
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_REF_COUNT_IN_DATA], [
+	ZFS_LINUX_TEST_SRC([percpu_ref_count_in_data], [
+		#include <linux/percpu-refcount.h>
+	],[
+		struct percpu_ref_data d;
+
+		atomic_long_set(&d.count, 1L);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA], [
+	AC_MSG_CHECKING([whether is inside percpu_ref.data])
+	ZFS_LINUX_TEST_RESULT([percpu_ref_count_in_data], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(ZFS_PERCPU_REF_COUNT_IN_DATA, 1,
+		    [count is located in percpu_ref.data])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [
+	ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT
+	ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH
+	ZFS_AC_KERNEL_SRC_PERCPU_REF_COUNT_IN_DATA
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [
+	ZFS_AC_KERNEL_PERCPU_COUNTER_INIT
+	ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH
+	ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA
+])

diff --git a/zfs/config/kernel-put-link.m4 b/zfs/config/kernel-put-link.m4
index f03df9e..4234861 100644
--- a/zfs/config/kernel-put-link.m4
+++ b/zfs/config/kernel-put-link.m4

@@ -27,7 +27,7 @@
 	dnl #
 	dnl # 4.5 API change
 	dnl # get_link() uses delayed done, there is no put_link() interface.
-	dnl # This check intially uses the inode_operations_get_link result
+	dnl # This check initially uses the inode_operations_get_link result
 	dnl #
 	ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [
 		AC_DEFINE(HAVE_PUT_LINK_DELAYED, 1, [iops->put_link() delayed])

diff --git a/zfs/config/kernel-readpages.m4 b/zfs/config/kernel-readpages.m4
new file mode 100644
index 0000000..be65a0d
--- /dev/null
+++ b/zfs/config/kernel-readpages.m4

@@ -0,0 +1,25 @@
+dnl #
+dnl # Linux 5.18 removes address_space_operations ->readpages in favour of
+dnl # ->readahead
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READPAGES], [
+	ZFS_LINUX_TEST_SRC([vfs_has_readpages], [
+		#include <linux/fs.h>
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.readpages = NULL,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_READPAGES], [
+	AC_MSG_CHECKING([whether aops->readpages exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_readpages], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_READPAGES, 1,
+			[address_space_operations->readpages exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-reclaim_state.m4 b/zfs/config/kernel-reclaim_state.m4
new file mode 100644
index 0000000..9936b3c
--- /dev/null
+++ b/zfs/config/kernel-reclaim_state.m4

@@ -0,0 +1,26 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
+	dnl #
+	dnl # 6.4 API change
+	dnl # The reclaimed_slab of struct reclaim_state
+	dnl # is renamed to reclaimed
+	dnl #
+	ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
+		#include <linux/swap.h>
+		static const struct reclaim_state
+		    rs  __attribute__ ((unused)) = {
+		    .reclaimed = 100,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
+	AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
+	ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
+		   [struct reclaim_state has reclaimed])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+

diff --git a/zfs/config/kernel-register_sysctl_table.m4 b/zfs/config/kernel-register_sysctl_table.m4
new file mode 100644
index 0000000..a5e934f
--- /dev/null
+++ b/zfs/config/kernel-register_sysctl_table.m4

@@ -0,0 +1,27 @@
+dnl #
+dnl # Linux 6.5 removes register_sysctl_table
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
+	ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
+		#include <linux/sysctl.h>
+
+		static struct ctl_table dummy_table[] = {
+			{}
+		};
+
+    ],[
+		struct ctl_table_header *h
+			__attribute((unused)) = register_sysctl_table(dummy_table);
+    ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
+	AC_MSG_CHECKING([whether register_sysctl_table exists])
+	ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_REGISTER_SYSCTL_TABLE, 1,
+			[register_sysctl_table exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-rename.m4 b/zfs/config/kernel-rename.m4
index f707391..b33cd0b 100644
--- a/zfs/config/kernel-rename.m4
+++ b/zfs/config/kernel-rename.m4

@@ -1,10 +1,10 @@
-dnl #
-dnl # 4.9 API change,
-dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
-dnl # flags.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [
-	ZFS_LINUX_TEST_SRC([inode_operations_rename], [
+AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
+	dnl #
+	dnl # 4.9 API change,
+	dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
+	dnl # flags.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [
 		#include <linux/fs.h>
 		int rename_fn(struct inode *sip, struct dentry *sdp,
 			struct inode *tip, struct dentry *tdp,
@@ -15,15 +15,66 @@
 			.rename = rename_fn,
 		};
 	],[])
+
+	dnl #
+	dnl # 5.12 API change,
+	dnl #
+	dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument
+	dnl # of the rename() and other inode_operations members.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [
+		#include <linux/fs.h>
+		int rename_fn(struct user_namespace *user_ns, struct inode *sip,
+			struct dentry *sdp, struct inode *tip, struct dentry *tdp,
+			unsigned int flags) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.rename = rename_fn,
+		};
+	],[])
+
+	dnl #
+	dnl # 6.3 API change - the first arg is now struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [
+		#include <linux/fs.h>
+		int rename_fn(struct mnt_idmap *idmap, struct inode *sip,
+			struct dentry *sdp, struct inode *tip, struct dentry *tdp,
+			unsigned int flags) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.rename = rename_fn,
+		};
+	],[])
 ])
 
-AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [
-	AC_MSG_CHECKING([whether iops->rename() wants flags])
-	ZFS_LINUX_TEST_RESULT([inode_operations_rename], [
+AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
+	AC_MSG_CHECKING([whether iops->rename() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([inode_operations_rename_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
-		    [iops->rename() wants flags])
+		AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1,
+		    [iops->rename() takes struct mnt_idmap*])
 	],[
 		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
+			    [iops->rename() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->rename() wants flags])
+			ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
+					[iops->rename() wants flags])
+			],[
+				AC_MSG_RESULT(no)
+			])
+		])
 	])
 ])

diff --git a/zfs/config/kernel-revalidate-disk-size.m4 b/zfs/config/kernel-revalidate-disk-size.m4
new file mode 100644
index 0000000..13cb92a
--- /dev/null
+++ b/zfs/config/kernel-revalidate-disk-size.m4

@@ -0,0 +1,46 @@
+dnl #
+dnl # 5.11 API change
+dnl # revalidate_disk_size() has been removed entirely.
+dnl #
+dnl # 5.10 API change
+dnl # revalidate_disk() was replaced by revalidate_disk_size()
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REVALIDATE_DISK], [
+
+	ZFS_LINUX_TEST_SRC([revalidate_disk_size], [
+		#include <linux/blkdev.h>
+	], [
+		struct gendisk *disk = NULL;
+		(void) revalidate_disk_size(disk, false);
+	])
+
+	ZFS_LINUX_TEST_SRC([revalidate_disk], [
+		#include <linux/blkdev.h>
+	], [
+		struct gendisk *disk = NULL;
+		(void) revalidate_disk(disk);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REVALIDATE_DISK], [
+
+	AC_MSG_CHECKING([whether revalidate_disk_size() is available])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([revalidate_disk_size],
+		[revalidate_disk_size], [block/genhd.c], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_REVALIDATE_DISK_SIZE, 1,
+		    [revalidate_disk_size() is available])
+	], [
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether revalidate_disk() is available])
+		ZFS_LINUX_TEST_RESULT_SYMBOL([revalidate_disk],
+		    [revalidate_disk], [block/genhd.c], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_REVALIDATE_DISK, 1,
+			    [revalidate_disk() is available])
+		], [
+			AC_MSG_RESULT(no)
+		])
+	])
+])

diff --git a/zfs/config/kernel-rwsem.m4 b/zfs/config/kernel-rwsem.m4
index 67c5cf9..d3a64a8 100644
--- a/zfs/config/kernel-rwsem.m4
+++ b/zfs/config/kernel-rwsem.m4

@@ -1,32 +1,4 @@
 dnl #
-dnl # 3.1 API Change
-dnl #
-dnl # The rw_semaphore.wait_lock member was changed from spinlock_t to
-dnl # raw_spinlock_t at commit ddb6c9b58a19edcfac93ac670b066c836ff729f1.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW], [
-	ZFS_LINUX_TEST_SRC([rwsem_spinlock_is_raw], [
-		#include <linux/rwsem.h>
-	],[
-		struct rw_semaphore dummy_semaphore __attribute__ ((unused));
-		raw_spinlock_t dummy_lock __attribute__ ((unused)) =
-		    __RAW_SPIN_LOCK_INITIALIZER(dummy_lock);
-		dummy_semaphore.wait_lock = dummy_lock;
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW], [
-	AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw])
-	ZFS_LINUX_TEST_RESULT([rwsem_spinlock_is_raw], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(RWSEM_SPINLOCK_IS_RAW, 1,
-		    [struct rw_semaphore member wait_lock is raw_spinlock_t])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
 dnl # 3.16 API Change
 dnl #
 dnl # rwsem-spinlock "->activity" changed to "->count"
@@ -78,13 +50,11 @@
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM], [
-	ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW
 	ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY
 	ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_RWSEM], [
-	ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW
 	ZFS_AC_KERNEL_RWSEM_ACTIVITY
 	ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT
 ])

diff --git a/zfs/config/kernel-sched.m4 b/zfs/config/kernel-sched.m4
index 4a7db97..17e49fb 100644
--- a/zfs/config/kernel-sched.m4
+++ b/zfs/config/kernel-sched.m4

@@ -14,10 +14,9 @@
 AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], [
 	AC_MSG_CHECKING([whether header linux/sched/rt.h exists])
 	ZFS_LINUX_TEST_RESULT([sched_rt_header], [
-		AC_DEFINE(HAVE_SCHED_RT_HEADER, 1, [linux/sched/rt.h exists])
 		AC_MSG_RESULT(yes)
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([sched_rt_header])
 	])
 ])
 

diff --git a/zfs/config/kernel-security-inode-init.m4 b/zfs/config/kernel-security-inode-init.m4
index 0dea7e3..4e4bfd2 100644
--- a/zfs/config/kernel-security-inode-init.m4
+++ b/zfs/config/kernel-security-inode-init.m4

@@ -1,37 +1,4 @@
 dnl #
-dnl # 2.6.39 API change
-dnl # The security_inode_init_security() function now takes an additional
-dnl # qstr argument which must be passed in from the dentry if available.
-dnl # Passing a NULL is safe when no qstr is available the relevant
-dnl # security checks will just be skipped.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_6ARGS], [
-	ZFS_LINUX_TEST_SRC([security_inode_init_security_6args], [
-		#include <linux/security.h>
-	],[
-		struct inode *ip __attribute__ ((unused)) = NULL;
-		struct inode *dip __attribute__ ((unused)) = NULL;
-		const struct qstr *str __attribute__ ((unused)) = NULL;
-		char *name __attribute__ ((unused)) = NULL;
-		void *value __attribute__ ((unused)) = NULL;
-		size_t len __attribute__ ((unused)) = 0;
-
-		security_inode_init_security(ip, dip, str, &name, &value, &len);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_6ARGS], [
-	AC_MSG_CHECKING([whether security_inode_init_security wants 6 args])
-	ZFS_LINUX_TEST_RESULT([security_inode_init_security_6args], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY, 1,
-		    [security_inode_init_security wants 6 args])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
 dnl # 3.2 API change
 dnl # The security_inode_init_security() API has been changed to include
 dnl # a filesystem specific callback to write security extended attributes.
@@ -55,19 +22,15 @@
 	AC_MSG_CHECKING([whether security_inode_init_security wants callback])
 	ZFS_LINUX_TEST_RESULT([security_inode_init_security], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY, 1,
-		    [security_inode_init_security wants callback])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([security_inode_init_security callback])
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SECURITY_INODE], [
-	ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_6ARGS
 	ZFS_AC_KERNEL_SRC_SECURITY_INODE_INIT_SECURITY_CALLBACK
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SECURITY_INODE], [
-	ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_6ARGS
 	ZFS_AC_KERNEL_SECURITY_INODE_INIT_SECURITY_CALLBACK
 ])

diff --git a/zfs/config/kernel-set-nlink.m4 b/zfs/config/kernel-set-nlink.m4
index 63a5a8c..fa4f928 100644
--- a/zfs/config/kernel-set-nlink.m4
+++ b/zfs/config/kernel-set-nlink.m4

@@ -1,6 +1,6 @@
 dnl #
-dnl # Linux v3.2-rc1 API change
-dnl # SHA: bfe8684869601dacfcb2cd69ef8cfd9045f62170
+dnl # Linux 3.2 API change
+dnl # set_nlink()
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_NLINK], [
 	ZFS_LINUX_TEST_SRC([set_nlink], [
@@ -16,8 +16,7 @@
 	AC_MSG_CHECKING([whether set_nlink() is available])
 	ZFS_LINUX_TEST_RESULT([set_nlink], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SET_NLINK, 1, [set_nlink() is available])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([set_nlink()])
 	])
 ])

diff --git a/zfs/config/kernel-setattr-prepare.m4 b/zfs/config/kernel-setattr-prepare.m4
index 45408c4..e02d626 100644
--- a/zfs/config/kernel-setattr-prepare.m4
+++ b/zfs/config/kernel-setattr-prepare.m4

@@ -1,27 +1,74 @@
-dnl #
-dnl # 4.9 API change
-dnl # The inode_change_ok() function has been renamed setattr_prepare()
-dnl # and updated to take a dentry rather than an inode.
-dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [
+	dnl #
+	dnl # 4.9 API change
+	dnl # The inode_change_ok() function has been renamed setattr_prepare()
+	dnl # and updated to take a dentry rather than an inode.
+	dnl #
 	ZFS_LINUX_TEST_SRC([setattr_prepare], [
 		#include <linux/fs.h>
 	], [
 		struct dentry *dentry = NULL;
 		struct iattr *attr = NULL;
 		int error __attribute__ ((unused)) =
-		    setattr_prepare(dentry, attr);
+			setattr_prepare(dentry, attr);
+	])
+
+	dnl #
+	dnl # 5.12 API change
+	dnl # The setattr_prepare() function has been changed to accept a new argument
+	dnl # for struct user_namespace*
+	dnl #
+	ZFS_LINUX_TEST_SRC([setattr_prepare_userns], [
+		#include <linux/fs.h>
+	], [
+		struct dentry *dentry = NULL;
+		struct iattr *attr = NULL;
+		struct user_namespace *userns = NULL;
+		int error __attribute__ ((unused)) =
+			setattr_prepare(userns, dentry, attr);
+	])
+
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg of setattr_prepare() is changed to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([setattr_prepare_mnt_idmap], [
+		#include <linux/fs.h>
+	], [
+		struct dentry *dentry = NULL;
+		struct iattr *attr = NULL;
+		struct mnt_idmap *idmap = NULL;
+		int error __attribute__ ((unused)) =
+			setattr_prepare(idmap, dentry, attr);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [
-	AC_MSG_CHECKING([whether setattr_prepare() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
+	AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_mnt_idmap],
 	    [setattr_prepare], [fs/attr.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SETATTR_PREPARE, 1,
-		    [setattr_prepare() is available])
+		AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1,
+		    [setattr_prepare() accepts mnt_idmap])
 	], [
-		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
+		    [setattr_prepare], [fs/attr.c], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
+			    [setattr_prepare() accepts user_namespace])
+		], [
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
+			ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
+				[setattr_prepare], [fs/attr.c], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
+					[setattr_prepare() is available, doesn't accept user_namespace])
+			], [
+				AC_MSG_RESULT(no)
+			])
+		])
 	])
 ])

diff --git a/zfs/config/kernel-sget-args.m4 b/zfs/config/kernel-sget-args.m4
index 1358139..afa62c7 100644
--- a/zfs/config/kernel-sget-args.m4
+++ b/zfs/config/kernel-sget-args.m4

@@ -19,8 +19,7 @@
 	AC_MSG_CHECKING([whether sget() wants 5 args])
 	ZFS_LINUX_TEST_RESULT([sget_5args], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_5ARG_SGET, 1, [sget() wants 5 args])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([sget()])
 	])
 ])

diff --git a/zfs/config/kernel-show-options.m4 b/zfs/config/kernel-show-options.m4
index 9e426bc..93bd5fb 100644
--- a/zfs/config/kernel-show-options.m4
+++ b/zfs/config/kernel-show-options.m4

@@ -19,9 +19,7 @@
 	AC_MSG_CHECKING([whether sops->show_options() wants dentry])
 	ZFS_LINUX_TEST_RESULT([super_operations_show_options], [
 		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_SHOW_OPTIONS_WITH_DENTRY, 1,
-		    [sops->show_options() with dentry])
 	],[
-		AC_MSG_RESULT([no])
+		ZFS_LINUX_TEST_ERROR([sops->show_options()])
 	])
 ])

diff --git a/zfs/config/kernel-shrink.m4 b/zfs/config/kernel-shrink.m4
index 45b4b5d..0c70215 100644
--- a/zfs/config/kernel-shrink.m4
+++ b/zfs/config/kernel-shrink.m4

@@ -13,7 +13,6 @@
 
 		static const struct super_block
 		    sb __attribute__ ((unused)) = {
-			.s_shrink.shrink = shrink,
 			.s_shrink.seeks = DEFAULT_SEEKS,
 			.s_shrink.batch = 0,
 		};
@@ -24,101 +23,8 @@
 	AC_MSG_CHECKING([whether super_block has s_shrink])
 	ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SHRINK, 1, [struct super_block has s_shrink])
-
 	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-dnl #
-dnl # 3.3 API change
-dnl # The super_block structure was changed to use an hlist_node instead
-dnl # of a list_head for the .s_instance linkage.
-dnl #
-dnl # This was done in part to resolve a race in the iterate_supers_type()
-dnl # function which was introduced in Linux 3.0 kernel.  The iterator
-dnl # was supposed to provide a safe way to call an arbitrary function on
-dnl # all super blocks of a specific type.  Unfortunately, because a
-dnl # list_head was used it was possible for iterate_supers_type() to
-dnl # get stuck spinning a super block which was just deactivated.
-dnl #
-dnl # This can occur because when the list head is removed from the
-dnl # fs_supers list it is reinitialized to point to itself.  If the
-dnl # iterate_supers_type() function happened to be processing the
-dnl # removed list_head it will get stuck spinning on that list_head.
-dnl #
-dnl # To resolve the issue for existing 3.0 - 3.2 kernels we detect when
-dnl # a list_head is used.  Then to prevent the spinning from occurring
-dnl # the .next pointer is set to the fs_supers list_head which ensures
-dnl # the iterate_supers_type() function will always terminate.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_INSTANCES_LIST_HEAD], [
-	ZFS_LINUX_TEST_SRC([super_block_s_instances_list_head], [
-		#include <linux/fs.h>
-	],[
-		struct super_block sb __attribute__ ((unused));
-		INIT_LIST_HEAD(&sb.s_instances);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_INSTANCES_LIST_HEAD], [
-	AC_MSG_CHECKING([whether super_block has s_instances list_head])
-	ZFS_LINUX_TEST_RESULT([super_block_s_instances_list_head], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_S_INSTANCES_LIST_HEAD, 1,
-		    [struct super_block has s_instances list_head])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_NR_CACHED_OBJECTS], [
-	ZFS_LINUX_TEST_SRC([nr_cached_objects], [
-		#include <linux/fs.h>
-
-		int nr_cached_objects(struct super_block *sb) { return 0; }
-
-		static const struct super_operations
-		    sops __attribute__ ((unused)) = {
-			.nr_cached_objects = nr_cached_objects,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [
-	AC_MSG_CHECKING([whether sops->nr_cached_objects() exists])
-	ZFS_LINUX_TEST_RESULT([nr_cached_objects], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_NR_CACHED_OBJECTS, 1,
-		    [sops->nr_cached_objects() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SRC_FREE_CACHED_OBJECTS], [
-	ZFS_LINUX_TEST_SRC([free_cached_objects], [
-		#include <linux/fs.h>
-
-		void free_cached_objects(struct super_block *sb, int x)
-		    { return; }
-
-		static const struct super_operations
-		    sops __attribute__ ((unused)) = {
-			.free_cached_objects = free_cached_objects,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_FREE_CACHED_OBJECTS], [
-	AC_MSG_CHECKING([whether sops->free_cached_objects() exists])
-	ZFS_LINUX_TEST_RESULT([free_cached_objects], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_FREE_CACHED_OBJECTS, 1,
-		    [sops->free_cached_objects() exists])
-	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([sb->s_shrink()])
 	])
 ])
 
@@ -148,30 +54,22 @@
 	])
 ])
 
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [
+	ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [
+		#include <linux/mm.h>
+		unsigned long shrinker_cb(struct shrinker *shrink,
+		    struct shrink_control *sc) { return 0; }
+	],[
+		struct shrinker cache_shrinker = {
+			.count_objects = shrinker_cb,
+			.scan_objects = shrinker_cb,
+			.seeks = DEFAULT_SEEKS,
+		};
+		register_shrinker(&cache_shrinker, "vararg-reg-shrink-test");
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [
-	ZFS_LINUX_TEST_SRC([shrinker_cb_2arg], [
-		#include <linux/mm.h>
-		int shrinker_cb(int nr_to_scan, gfp_t gfp_mask) { return 0; }
-	],[
-		struct shrinker cache_shrinker = {
-			.shrink = shrinker_cb,
-			.seeks = DEFAULT_SEEKS,
-		};
-		register_shrinker(&cache_shrinker);
-	])
-
-	ZFS_LINUX_TEST_SRC([shrinker_cb_3arg], [
-		#include <linux/mm.h>
-		int shrinker_cb(struct shrinker *shrink, int nr_to_scan,
-		    gfp_t gfp_mask) { return 0; }
-	],[
-		struct shrinker cache_shrinker = {
-			.shrink = shrinker_cb,
-			.seeks = DEFAULT_SEEKS,
-		};
-		register_shrinker(&cache_shrinker);
-	])
-
 	ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [
 		#include <linux/mm.h>
 		int shrinker_cb(struct shrinker *shrink,
@@ -200,59 +98,49 @@
 
 AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
 	dnl #
-	dnl # 2.6.23 to 2.6.34 API change
-	dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask)
+	dnl # 6.0 API change
+	dnl # register_shrinker() becomes a var-arg function that takes
+	dnl # a printf-style format string as args > 0
 	dnl #
-	AC_MSG_CHECKING([whether old 2-argument shrinker exists])
-	ZFS_LINUX_TEST_RESULT([shrinker_cb_2arg], [
+	AC_MSG_CHECKING([whether new var-arg register_shrinker() exists])
+	ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_2ARGS_OLD_SHRINKER_CALLBACK, 1,
-		    [old shrinker callback wants 2 args])
+		AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1,
+		    [register_shrinker is vararg])
+
+		dnl # We assume that the split shrinker callback exists if the
+		dnl # vararg register_shrinker() exists, because the latter is
+		dnl # a much more recent addition, and the macro test for the
+		dnl # var-arg version only works if the callback is split
+		AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
+			[cs->count_objects exists])
 	],[
 		AC_MSG_RESULT(no)
-
 		dnl #
-		dnl # 2.6.35 - 2.6.39 API change
-		dnl # ->shrink(struct shrinker *,
-		dnl #          int nr_to_scan, gfp_t gfp_mask)
+		dnl # 3.0 - 3.11 API change
+		dnl # cs->shrink(struct shrinker *, struct shrink_control *sc)
 		dnl #
-		AC_MSG_CHECKING([whether old 3-argument shrinker exists])
-		ZFS_LINUX_TEST_RESULT([shrinker_cb_3arg], [
+		AC_MSG_CHECKING([whether new 2-argument shrinker exists])
+		ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_3ARGS_SHRINKER_CALLBACK, 1,
-				[old shrinker callback wants 3 args])
+			AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1,
+				[new shrinker callback wants 2 args])
 		],[
 			AC_MSG_RESULT(no)
 
 			dnl #
-			dnl # 3.0 - 3.11 API change
-			dnl # ->shrink(struct shrinker *,
-			dnl #          struct shrink_control *sc)
+			dnl # 3.12 API change,
+			dnl # cs->shrink() is logically split in to
+			dnl # cs->count_objects() and cs->scan_objects()
 			dnl #
-			AC_MSG_CHECKING(
-			    [whether new 2-argument shrinker exists])
-			ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [
-				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_2ARGS_NEW_SHRINKER_CALLBACK, 1,
-					[new shrinker callback wants 2 args])
-			],[
-				AC_MSG_RESULT(no)
-
-				dnl #
-				dnl # 3.12 API change,
-				dnl # ->shrink() is logically split in to
-				dnl # ->count_objects() and ->scan_objects()
-				dnl #
-				AC_MSG_CHECKING(
-				    [whether ->count_objects callback exists])
-				ZFS_LINUX_TEST_RESULT(
-				    [shrinker_cb_shrink_control_split], [
+			AC_MSG_CHECKING([if cs->count_objects callback exists])
+			ZFS_LINUX_TEST_RESULT(
+				[shrinker_cb_shrink_control_split],[
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK,
-						1, [->count_objects exists])
-				],[
+					AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
+						[cs->count_objects exists])
+			],[
 					ZFS_LINUX_TEST_ERROR([shrinker])
-				])
 			])
 		])
 	])
@@ -280,25 +168,20 @@
 		AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1,
 		    [struct shrink_control exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([shrink_control])
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [
 	ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK
-	ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_INSTANCES_LIST_HEAD
-	ZFS_AC_KERNEL_SRC_NR_CACHED_OBJECTS
-	ZFS_AC_KERNEL_SRC_FREE_CACHED_OBJECTS
 	ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID
 	ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK
 	ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT
+	ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [
 	ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK
-	ZFS_AC_KERNEL_SUPER_BLOCK_S_INSTANCES_LIST_HEAD
-	ZFS_AC_KERNEL_NR_CACHED_OBJECTS
-	ZFS_AC_KERNEL_FREE_CACHED_OBJECTS
 	ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID
 	ZFS_AC_KERNEL_SHRINKER_CALLBACK
 	ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT

diff --git a/zfs/config/kernel-siginfo.m4 b/zfs/config/kernel-siginfo.m4
new file mode 100644
index 0000000..6ddb0dc
--- /dev/null
+++ b/zfs/config/kernel-siginfo.m4

@@ -0,0 +1,21 @@
+dnl #
+dnl # 4.20 API change
+dnl # Added kernel_siginfo_t
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SIGINFO], [
+	ZFS_LINUX_TEST_SRC([siginfo], [
+		#include <linux/signal_types.h>
+	],[
+		kernel_siginfo_t info __attribute__ ((unused));
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SIGINFO], [
+	AC_MSG_CHECKING([whether kernel_siginfo_t tyepedef exists])
+	ZFS_LINUX_TEST_RESULT([siginfo], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_SIGINFO, 1, [kernel_siginfo_t exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-signal-stop.m4 b/zfs/config/kernel-signal-stop.m4
new file mode 100644
index 0000000..6cb86e7
--- /dev/null
+++ b/zfs/config/kernel-signal-stop.m4

@@ -0,0 +1,21 @@
+dnl #
+dnl # 4.4 API change
+dnl # Added kernel_signal_stop
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SIGNAL_STOP], [
+	ZFS_LINUX_TEST_SRC([signal_stop], [
+		#include <linux/sched/signal.h>
+	],[
+		kernel_signal_stop();
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SIGNAL_STOP], [
+	AC_MSG_CHECKING([whether signal_stop() exists])
+	ZFS_LINUX_TEST_RESULT([signal_stop], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_SIGNAL_STOP, 1, [signal_stop() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-special-state.m4 b/zfs/config/kernel-special-state.m4
new file mode 100644
index 0000000..aa60aab
--- /dev/null
+++ b/zfs/config/kernel-special-state.m4

@@ -0,0 +1,21 @@
+dnl #
+dnl # 4.17 API change
+dnl # Added set_special_state() function
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE], [
+	ZFS_LINUX_TEST_SRC([set_special_state], [
+		#include <linux/sched.h>
+	],[
+		set_special_state(TASK_STOPPED);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SET_SPECIAL_STATE], [
+	AC_MSG_CHECKING([whether set_special_state() exists])
+	ZFS_LINUX_TEST_RESULT([set_special_state], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_SET_SPECIAL_STATE, 1, [set_special_state() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-stdarg.m4 b/zfs/config/kernel-stdarg.m4
new file mode 100644
index 0000000..5bc8dd8
--- /dev/null
+++ b/zfs/config/kernel-stdarg.m4

@@ -0,0 +1,32 @@
+dnl #
+dnl # Linux 5.15 gets rid of -isystem and external <stdarg.h> inclusion
+dnl # and ships its own <linux/stdarg.h>. Check if this header file does
+dnl # exist and provide all necessary definitions for variable argument
+dnl # functions. Adjust the inclusion of <stdarg.h> according to the
+dnl # results.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG], [
+	ZFS_LINUX_TEST_SRC([has_standalone_linux_stdarg], [
+		#include <linux/stdarg.h>
+
+		#if !defined(va_start) || !defined(va_end) || \
+		    !defined(va_arg) || !defined(va_copy)
+		#error "<linux/stdarg.h> is invalid"
+		#endif
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG], [
+	dnl #
+	dnl # Linux 5.15 ships its own stdarg.h and doesn't allow to
+	dnl # include compiler headers.
+	dnl #
+	AC_MSG_CHECKING([whether standalone <linux/stdarg.h> exists])
+	ZFS_LINUX_TEST_RESULT([has_standalone_linux_stdarg], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_STANDALONE_LINUX_STDARG, 1,
+			[standalone <linux/stdarg.h> exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-submit_bio.m4 b/zfs/config/kernel-submit_bio.m4
deleted file mode 100644
index cf80e9b..0000000
--- a/zfs/config/kernel-submit_bio.m4
+++ /dev/null

@@ -1,24 +0,0 @@
-dnl #
-dnl # 4.8 API change
-dnl # The rw argument has been removed from submit_bio/submit_bio_wait.
-dnl # Callers are now expected to set bio->bi_rw instead of passing it in.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_SUBMIT_BIO], [
-	ZFS_LINUX_TEST_SRC([submit_bio], [
-		#include <linux/bio.h>
-	],[
-		blk_qc_t blk_qc;
-		struct bio *bio = NULL;
-		blk_qc = submit_bio(bio);
-	])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_SUBMIT_BIO], [
-	AC_MSG_CHECKING([whether submit_bio() wants 1 arg])
-	ZFS_LINUX_TEST_RESULT([submit_bio], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_1ARG_SUBMIT_BIO, 1, [submit_bio() wants 1 arg])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-symlink.m4 b/zfs/config/kernel-symlink.m4
new file mode 100644
index 0000000..a0333ed
--- /dev/null
+++ b/zfs/config/kernel-symlink.m4

@@ -0,0 +1,53 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
+	dnl #
+	dnl # 6.3 API change that changed the first arg
+	dnl # to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+		int tmp_symlink(struct mnt_idmap *idmap,
+		    struct inode *inode ,struct dentry *dentry,
+		    const char *path) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.symlink                = tmp_symlink,
+		};
+	],[])
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([symlink_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int tmp_symlink(struct user_namespace *userns,
+		    struct inode *inode ,struct dentry *dentry,
+		    const char *path) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.symlink		= tmp_symlink,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [
+	AC_MSG_CHECKING([whether iops->symlink() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([symlink_mnt_idmap], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1,
+		    [iops->symlink() takes struct mnt_idmap*])
+	],[
+		AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([symlink_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
+			    [iops->symlink() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+		])
+	])
+])

diff --git a/zfs/config/kernel-sysfs.m4 b/zfs/config/kernel-sysfs.m4
new file mode 100644
index 0000000..bbc77c8
--- /dev/null
+++ b/zfs/config/kernel-sysfs.m4

@@ -0,0 +1,37 @@
+dnl #
+dnl # Linux 5.2/5.18 API
+dnl #
+dnl # In cdb4f26a63c391317e335e6e683a614358e70aeb ("kobject: kobj_type: remove default_attrs")
+dnl # 	struct kobj_type.default_attrs
+dnl # was finally removed in favour of
+dnl # 	struct kobj_type.default_groups
+dnl #
+dnl # This was added in aa30f47cf666111f6bbfd15f290a27e8a7b9d854 ("kobject: Add support for default attribute groups to kobj_type"),
+dnl # if both are present (5.2-5.17), we prefer default_groups; they're otherwise equivalent
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS], [
+	ZFS_LINUX_TEST_SRC([sysfs_default_groups], [
+		#include <linux/kobject.h>
+	],[
+		struct kobj_type __attribute__ ((unused)) kt = {
+			.default_groups = (const struct attribute_group **)NULL };
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS], [
+	AC_MSG_CHECKING([whether struct kobj_type.default_groups exists])
+	ZFS_LINUX_TEST_RESULT([sysfs_default_groups],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE([HAVE_SYSFS_DEFAULT_GROUPS], 1, [struct kobj_type has default_groups])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS], [
+	ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYSFS], [
+	ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS
+])

diff --git a/zfs/config/kernel-tmpfile.m4 b/zfs/config/kernel-tmpfile.m4
index f510bfe..cc18b8f 100644
--- a/zfs/config/kernel-tmpfile.m4
+++ b/zfs/config/kernel-tmpfile.m4

@@ -3,23 +3,84 @@
 dnl # Add support for i_op->tmpfile
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [
-	ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg is now struct mnt_idmap * 
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [
 		#include <linux/fs.h>
-		int tmpfile(struct inode *inode, struct dentry *dentry,
+		int tmpfile(struct mnt_idmap *idmap,
+		    struct inode *inode, struct file *file,
 		    umode_t mode) { return 0; }
 		static struct inode_operations
 		    iops __attribute__ ((unused)) = {
 			.tmpfile = tmpfile,
 		};
 	],[])
+	dnl # 6.1 API change
+	dnl # use struct file instead of struct dentry
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [
+		#include <linux/fs.h>
+		int tmpfile(struct user_namespace *userns,
+		    struct inode *inode, struct file *file,
+		    umode_t mode) { return 0; }
+		static struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.tmpfile = tmpfile,
+		};
+	],[])
+	dnl #
+	dnl # 5.11 API change
+	dnl # add support for userns parameter to tmpfile
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry_userns], [
+		#include <linux/fs.h>
+		int tmpfile(struct user_namespace *userns,
+		    struct inode *inode, struct dentry *dentry,
+		    umode_t mode) { return 0; }
+		static struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.tmpfile = tmpfile,
+		};
+	],[])
+	ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry], [
+			#include <linux/fs.h>
+			int tmpfile(struct inode *inode, struct dentry *dentry,
+			    umode_t mode) { return 0; }
+			static struct inode_operations
+			    iops __attribute__ ((unused)) = {
+				.tmpfile = tmpfile,
+			};
+	],[])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [
 	AC_MSG_CHECKING([whether i_op->tmpfile() exists])
-	ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
+	ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
-	],[
-		AC_MSG_RESULT(no)
+		AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap])
+	], [
+		ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+			AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
+		],[
+			ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+				AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
+				AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
+			],[
+				ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+					AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
+				],[
+					ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
+				])
+			])
+		])
 	])
 ])

diff --git a/zfs/config/kernel-truncate-range.m4 b/zfs/config/kernel-truncate-range.m4
deleted file mode 100644
index 8fdbb10..0000000
--- a/zfs/config/kernel-truncate-range.m4
+++ /dev/null

@@ -1,27 +0,0 @@
-dnl #
-dnl # 3.5.0 API change
-dnl # torvalds/linux@17cf28afea2a1112f240a3a2da8af883be024811 removed
-dnl # truncate_range(). The file hole punching functionality is now
-dnl # provided by fallocate()
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_TRUNCATE_RANGE], [
-	ZFS_LINUX_TEST_SRC([inode_operations_truncate_range], [
-		#include <linux/fs.h>
-		void truncate_range(struct inode *inode, loff_t start,
-		                    loff_t end) { return; }
-		static struct inode_operations iops __attribute__ ((unused)) = {
-			.truncate_range	= truncate_range,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_TRUNCATE_RANGE], [
-	AC_MSG_CHECKING([whether iops->truncate_range() exists])
-	ZFS_LINUX_TEST_RESULT([inode_operations_truncate_range], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_INODE_TRUNCATE_RANGE, 1,
-		          [iops->truncate_range() exists])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])

diff --git a/zfs/config/kernel-truncate-setsize.m4 b/zfs/config/kernel-truncate-setsize.m4
index e719c14..76c82ef 100644
--- a/zfs/config/kernel-truncate-setsize.m4
+++ b/zfs/config/kernel-truncate-setsize.m4

@@ -15,9 +15,7 @@
 	ZFS_LINUX_TEST_RESULT_SYMBOL([truncate_setsize],
 	    [truncate_setsize], [mm/truncate.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_TRUNCATE_SETSIZE, 1,
-		    [truncate_setsize() is available])
 	], [
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([truncate_setsize])
 	])
 ])

diff --git a/zfs/config/kernel-userns-capabilities.m4 b/zfs/config/kernel-userns-capabilities.m4
index 5dcbc03..0265036 100644
--- a/zfs/config/kernel-userns-capabilities.m4
+++ b/zfs/config/kernel-userns-capabilities.m4

@@ -14,7 +14,33 @@
 	AC_MSG_CHECKING([whether ns_capable exists])
 	ZFS_LINUX_TEST_RESULT([ns_capable], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_NS_CAPABLE, 1, [ns_capable exists])
+	],[
+		ZFS_LINUX_TEST_ERROR([ns_capable()])
+	])
+])
+
+dnl #
+dnl # 4.10 API change
+dnl # has_capability() was exported.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_HAS_CAPABILITY], [
+	ZFS_LINUX_TEST_SRC([has_capability], [
+		#include <linux/capability.h>
+	],[
+		struct task_struct *task = NULL;
+		int cap = 0;
+		bool result __attribute__ ((unused));
+
+		result = has_capability(task, cap);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_HAS_CAPABILITY], [
+	AC_MSG_CHECKING([whether has_capability() is available])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([has_capability],
+	    [has_capability], [kernel/capability.c], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_HAS_CAPABILITY, 1, [has_capability() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
@@ -22,9 +48,7 @@
 
 dnl #
 dnl # 2.6.39 API change
-dnl # struct user_namespace was added to struct cred_t as
-dnl # cred->user_ns member
-dnl # Note that current_user_ns() was added in 2.6.28.
+dnl # struct user_namespace was added to struct cred_t as cred->user_ns member
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_CRED_USER_NS], [
 	ZFS_LINUX_TEST_SRC([cred_user_ns], [
@@ -39,9 +63,8 @@
 	AC_MSG_CHECKING([whether cred_t->user_ns exists])
 	ZFS_LINUX_TEST_RESULT([cred_user_ns], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_CRED_USER_NS, 1, [cred_t->user_ns exists])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([cred_t->user_ns()])
 	])
 ])
 
@@ -63,21 +86,21 @@
 	AC_MSG_CHECKING([whether kuid_has_mapping/kgid_has_mapping exist])
 	ZFS_LINUX_TEST_RESULT([kuid_has_mapping], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_KUID_HAS_MAPPING, 1,
-		    [kuid_has_mapping/kgid_has_mapping exist])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([kuid_has_mapping()])
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES], [
 	ZFS_AC_KERNEL_SRC_NS_CAPABLE
+	ZFS_AC_KERNEL_SRC_HAS_CAPABILITY
 	ZFS_AC_KERNEL_SRC_CRED_USER_NS
 	ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_USERNS_CAPABILITIES], [
 	ZFS_AC_KERNEL_NS_CAPABLE
+	ZFS_AC_KERNEL_HAS_CAPABILITY
 	ZFS_AC_KERNEL_CRED_USER_NS
 	ZFS_AC_KERNEL_KUID_HAS_MAPPING
 ])

diff --git a/zfs/config/kernel-usleep_range.m4 b/zfs/config/kernel-usleep_range.m4
index 5bf051a..06eb381 100644
--- a/zfs/config/kernel-usleep_range.m4
+++ b/zfs/config/kernel-usleep_range.m4

@@ -17,8 +17,7 @@
 	AC_MSG_CHECKING([whether usleep_range() is available])
 	ZFS_LINUX_TEST_RESULT([usleep_range], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_USLEEP_RANGE, 1, [usleep_range is available])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([usleep_range()])
 	])
 ])

diff --git a/zfs/config/kernel-vfs-filemap_dirty_folio.m4 b/zfs/config/kernel-vfs-filemap_dirty_folio.m4
new file mode 100644
index 0000000..729ca67
--- /dev/null
+++ b/zfs/config/kernel-vfs-filemap_dirty_folio.m4

@@ -0,0 +1,30 @@
+dnl #
+dnl # Linux 5.18 uses filemap_dirty_folio in lieu of
+dnl # ___set_page_dirty_nobuffers
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO], [
+	ZFS_LINUX_TEST_SRC([vfs_has_filemap_dirty_folio], [
+		#include <linux/pagemap.h>
+		#include <linux/writeback.h>
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.dirty_folio	= filemap_dirty_folio,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO], [
+	dnl #
+	dnl # Linux 5.18 uses filemap_dirty_folio in lieu of
+	dnl # ___set_page_dirty_nobuffers
+	dnl #
+	AC_MSG_CHECKING([whether filemap_dirty_folio exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_filemap_dirty_folio], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_FILEMAP_DIRTY_FOLIO, 1,
+			[filemap_dirty_folio exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-vfs-fsync.m4 b/zfs/config/kernel-vfs-fsync.m4
index 18a60d2..159efca 100644
--- a/zfs/config/kernel-vfs-fsync.m4
+++ b/zfs/config/kernel-vfs-fsync.m4

@@ -14,8 +14,7 @@
 	AC_MSG_CHECKING([whether vfs_fsync() wants 2 args])
 	ZFS_LINUX_TEST_RESULT([vfs_fsync_2args], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_2ARGS_VFS_FSYNC, 1, [vfs_fsync() wants 2 args])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([vfs_fsync()])
 	])
 ])

diff --git a/zfs/config/kernel-vfs-iov_iter.m4 b/zfs/config/kernel-vfs-iov_iter.m4
new file mode 100644
index 0000000..ff560ff
--- /dev/null
+++ b/zfs/config/kernel-vfs-iov_iter.m4

@@ -0,0 +1,226 @@
+dnl #
+dnl # Check for available iov_iter functionality.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
+	ZFS_LINUX_TEST_SRC([iov_iter_types], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		int type __attribute__ ((unused)) = ITER_KVEC;
+	])
+
+	ZFS_LINUX_TEST_SRC([iov_iter_advance], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		size_t advance = 512;
+
+		iov_iter_advance(&iter, advance);
+	])
+
+	ZFS_LINUX_TEST_SRC([iov_iter_revert], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		size_t revert = 512;
+
+		iov_iter_revert(&iter, revert);
+	])
+
+	ZFS_LINUX_TEST_SRC([iov_iter_fault_in_readable], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		size_t size = 512;
+		int error __attribute__ ((unused));
+
+		error = iov_iter_fault_in_readable(&iter, size);
+	])
+
+	ZFS_LINUX_TEST_SRC([fault_in_iov_iter_readable], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		size_t size = 512;
+		int error __attribute__ ((unused));
+
+		error = fault_in_iov_iter_readable(&iter, size);
+	])
+
+	ZFS_LINUX_TEST_SRC([iov_iter_count], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		size_t bytes __attribute__ ((unused));
+
+		bytes = iov_iter_count(&iter);
+	])
+
+	ZFS_LINUX_TEST_SRC([copy_to_iter], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		char buf[512] = { 0 };
+		size_t size = 512;
+		size_t bytes __attribute__ ((unused));
+
+		bytes = copy_to_iter((const void *)&buf, size, &iter);
+	])
+
+	ZFS_LINUX_TEST_SRC([copy_from_iter], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		char buf[512] = { 0 };
+		size_t size = 512;
+		size_t bytes __attribute__ ((unused));
+
+		bytes = copy_from_iter((void *)&buf, size, &iter);
+	])
+
+	ZFS_LINUX_TEST_SRC([iov_iter_type], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
+	])
+
+	ZFS_LINUX_TEST_SRC([iter_iov], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		__attribute__((unused)) const struct iovec *iov = iter_iov(&iter);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
+	enable_vfs_iov_iter="yes"
+
+	AC_MSG_CHECKING([whether iov_iter types are available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_types], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_TYPES, 1,
+		    [iov_iter types are available])
+	],[
+		AC_MSG_RESULT(no)
+		enable_vfs_iov_iter="no"
+	])
+
+	AC_MSG_CHECKING([whether iov_iter_advance() is available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_advance], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_ADVANCE, 1,
+		    [iov_iter_advance() is available])
+	],[
+		AC_MSG_RESULT(no)
+		enable_vfs_iov_iter="no"
+	])
+
+	AC_MSG_CHECKING([whether iov_iter_revert() is available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_revert], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_REVERT, 1,
+		    [iov_iter_revert() is available])
+	],[
+		AC_MSG_RESULT(no)
+		enable_vfs_iov_iter="no"
+	])
+
+	AC_MSG_CHECKING([whether iov_iter_fault_in_readable() is available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_fault_in_readable], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_FAULT_IN_READABLE, 1,
+		    [iov_iter_fault_in_readable() is available])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available])
+		ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1,
+			    [fault_in_iov_iter_readable() is available])
+		],[
+			AC_MSG_RESULT(no)
+			enable_vfs_iov_iter="no"
+		])
+	])
+
+	AC_MSG_CHECKING([whether iov_iter_count() is available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_count], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_COUNT, 1,
+		    [iov_iter_count() is available])
+	],[
+		AC_MSG_RESULT(no)
+		enable_vfs_iov_iter="no"
+	])
+
+	AC_MSG_CHECKING([whether copy_to_iter() is available])
+	ZFS_LINUX_TEST_RESULT([copy_to_iter], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_COPY_TO_ITER, 1,
+		    [copy_to_iter() is available])
+	],[
+		AC_MSG_RESULT(no)
+		enable_vfs_iov_iter="no"
+	])
+
+	AC_MSG_CHECKING([whether copy_from_iter() is available])
+	ZFS_LINUX_TEST_RESULT([copy_from_iter], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_COPY_FROM_ITER, 1,
+		    [copy_from_iter() is available])
+	],[
+		AC_MSG_RESULT(no)
+		enable_vfs_iov_iter="no"
+	])
+
+	dnl #
+	dnl # This checks for iov_iter_type() in linux/uio.h. It is not
+	dnl # required, however, and the module will compiled without it
+	dnl # using direct access of the member attribute
+	dnl #
+	AC_MSG_CHECKING([whether iov_iter_type() is available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_type], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_TYPE, 1,
+		    [iov_iter_type() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+
+	dnl #
+	dnl # As of the 4.9 kernel support is provided for iovecs, kvecs,
+	dnl # bvecs and pipes in the iov_iter structure.  As long as the
+	dnl # other support interfaces are all available the iov_iter can
+	dnl # be correctly used in the uio structure.
+	dnl #
+	AS_IF([test "x$enable_vfs_iov_iter" = "xyes"], [
+		AC_DEFINE(HAVE_VFS_IOV_ITER, 1,
+		    [All required iov_iter interfaces are available])
+	])
+
+	dnl #
+	dnl # Kernel 6.5 introduces the iter_iov() function that returns the
+	dnl # __iov member of an iov_iter*. The iov member was renamed to this
+	dnl # __iov member, and is intended to be accessed via the helper
+	dnl # function now.
+	dnl #
+	AC_MSG_CHECKING([whether iter_iov() is available])
+	ZFS_LINUX_TEST_RESULT([iter_iov], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ITER_IOV, 1,
+		    [iter_iov() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])

diff --git a/zfs/config/kernel-vfs-read_folio.m4 b/zfs/config/kernel-vfs-read_folio.m4
new file mode 100644
index 0000000..9ca0faf
--- /dev/null
+++ b/zfs/config/kernel-vfs-read_folio.m4

@@ -0,0 +1,32 @@
+dnl #
+dnl # Linux 5.19 uses read_folio in lieu of readpage
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO], [
+	ZFS_LINUX_TEST_SRC([vfs_has_read_folio], [
+		#include <linux/fs.h>
+
+		static int
+		test_read_folio(struct file *file, struct folio *folio) {
+			(void) file; (void) folio;
+			return (0);
+		}
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.read_folio	= test_read_folio,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_READ_FOLIO], [
+	dnl #
+	dnl # Linux 5.19 uses read_folio in lieu of readpage
+	dnl #
+	AC_MSG_CHECKING([whether read_folio exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_read_folio], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_READ_FOLIO, 1, [read_folio exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-vfs-set_page_dirty.m4 b/zfs/config/kernel-vfs-set_page_dirty.m4
new file mode 100644
index 0000000..90cb28f
--- /dev/null
+++ b/zfs/config/kernel-vfs-set_page_dirty.m4

@@ -0,0 +1,34 @@
+dnl #
+dnl # Linux 5.14 adds a change to require set_page_dirty to be manually
+dnl # wired up in struct address_space_operations. Determine if this needs
+dnl # to be done. This patch set also introduced __set_page_dirty_nobuffers
+dnl # declaration in linux/pagemap.h, so these tests look for the presence
+dnl # of that function to tell the compiler to assign set_page_dirty in
+dnl # module/os/linux/zfs/zpl_file.c
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS], [
+	ZFS_LINUX_TEST_SRC([vfs_has_set_page_dirty_nobuffers], [
+		#include <linux/pagemap.h>
+		#include <linux/fs.h>
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.set_page_dirty = __set_page_dirty_nobuffers,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS], [
+	dnl #
+	dnl # Linux 5.14 change requires set_page_dirty() to be assigned
+	dnl # in address_space_operations()
+	dnl #
+	AC_MSG_CHECKING([whether __set_page_dirty_nobuffers exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_set_page_dirty_nobuffers], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS, 1,
+			[__set_page_dirty_nobuffers exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/kernel-writepage_t.m4 b/zfs/config/kernel-writepage_t.m4
new file mode 100644
index 0000000..3a0cffd
--- /dev/null
+++ b/zfs/config/kernel-writepage_t.m4

@@ -0,0 +1,26 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The writepage_t function type now has its first argument as
+	dnl # struct folio* instead of struct page*
+	dnl #
+	ZFS_LINUX_TEST_SRC([writepage_t_folio], [
+		#include <linux/writeback.h>
+		int putpage(struct folio *folio,
+		    struct writeback_control *wbc, void *data)
+		{ return 0; }
+		writepage_t func = putpage;
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [
+	AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*])
+	ZFS_LINUX_TEST_RESULT([writepage_t_folio], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1,
+		   [int (*writepage_t)() takes struct folio*])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+

diff --git a/zfs/config/kernel-xattr-handler.m4 b/zfs/config/kernel-xattr-handler.m4
index ed84c63..6b8a08d 100644
--- a/zfs/config/kernel-xattr-handler.m4
+++ b/zfs/config/kernel-xattr-handler.m4

@@ -28,10 +28,8 @@
 	AC_MSG_CHECKING([whether super_block uses const struct xattr_handler])
 	ZFS_LINUX_TEST_RESULT([const_xattr_handler], [
 		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_CONST_XATTR_HANDLER, 1,
-		    [super_block uses const struct xattr_handler])
 	],[
-		AC_MSG_RESULT([no])
+		ZFS_LINUX_TEST_ERROR([const xattr_handler])
 	])
 ])
 
@@ -103,11 +101,13 @@
 		};
 	],[])
 
-	ZFS_LINUX_TEST_SRC([xattr_handler_get_inode], [
+	ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [
 		#include <linux/xattr.h>
 
-		int get(struct inode *ip, const char *name,
-		    void *buffer, size_t size) { return 0; }
+		int get(const struct xattr_handler *handler,
+		    struct dentry *dentry, struct inode *inode,
+		    const char *name, void *buffer,
+		    size_t size, int flags) { return 0; }
 		static const struct xattr_handler
 		    xops __attribute__ ((unused)) = {
 			.get = get,
@@ -156,16 +156,17 @@
 				    [xattr_handler->get() wants dentry])
 			],[
 				dnl #
-				dnl # Legacy 2.6.32 API
+				dnl # Android API change,
+				dnl # The xattr_handler->get() callback was
+				dnl # changed to take dentry, inode and flags.
 				dnl #
 				AC_MSG_RESULT(no)
 				AC_MSG_CHECKING(
-				    [whether xattr_handler->get() wants inode])
-				ZFS_LINUX_TEST_RESULT(
-				    [xattr_handler_get_inode], [
+				    [whether xattr_handler->get() wants dentry and inode and flags])
+				ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(HAVE_XATTR_GET_INODE, 1,
-					    [xattr_handler->get() wants inode])
+					AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1,
+					    [xattr_handler->get() wants dentry and inode and flags])
 				],[
 					ZFS_LINUX_TEST_ERROR([xattr get()])
 				])
@@ -178,6 +179,36 @@
 dnl # Supported xattr handler set() interfaces checked newest to oldest.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
+	ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [
+		#include <linux/xattr.h>
+
+		int set(const struct xattr_handler *handler,
+			struct mnt_idmap *idmap,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *buffer,
+			size_t size, int flags)
+			{ return 0; }
+		static const struct xattr_handler
+			xops __attribute__ ((unused)) = {
+			.set = set,
+		};
+	],[])
+
+	ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [
+		#include <linux/xattr.h>
+
+		int set(const struct xattr_handler *handler,
+			struct user_namespace *mnt_userns,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *buffer,
+			size_t size, int flags)
+			{ return 0; }
+		static const struct xattr_handler
+			xops __attribute__ ((unused)) = {
+			.set = set,
+		};
+	],[])
+
 	ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [
 		#include <linux/xattr.h>
 
@@ -216,73 +247,71 @@
 			.set = set,
 		};
 	],[])
-
-	ZFS_LINUX_TEST_SRC([xattr_handler_set_inode], [
-		#include <linux/xattr.h>
-
-		int set(struct inode *ip, const char *name,
-		    const void *buffer, size_t size, int flags)
-		    { return 0; }
-		static const struct xattr_handler
-		    xops __attribute__ ((unused)) = {
-			.set = set,
-		};
-	],[])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
 	dnl #
-	dnl # 4.7 API change,
-	dnl # The xattr_handler->set() callback was changed to take both
-	dnl # dentry and inode.
+	dnl # 5.12 API change,
+	dnl # The xattr_handler->set() callback was changed to 8 arguments, and
+	dnl # struct user_namespace* was inserted as arg #2
 	dnl #
-	AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
-	ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
+	dnl # 6.3 API change,
+	dnl # The xattr_handler->set() callback 2nd arg is now struct mnt_idmap *
+	dnl #
+	AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and mnt_idmap])
+	ZFS_LINUX_TEST_RESULT([xattr_handler_set_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
-		    [xattr_handler->set() wants both dentry and inode])
-	],[
-		dnl #
-		dnl # 4.4 API change,
-		dnl # The xattr_handler->set() callback was changed to take a
-		dnl # xattr_handler, and handler_flags argument was removed and
-		dnl # should be accessed by handler->flags.
-		dnl #
-		AC_MSG_RESULT(no)
-		AC_MSG_CHECKING(
-		    [whether xattr_handler->set() wants xattr_handler])
-		ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
+		AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1,
+		    [xattr_handler->set() takes mnt_idmap])
+	], [
+		AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
+		ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
-			    [xattr_handler->set() wants xattr_handler])
+			AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
+			    [xattr_handler->set() takes user_namespace])
 		],[
 			dnl #
-			dnl # 2.6.33 API change,
-			dnl # The xattr_handler->set() callback was changed
-			dnl # to take a dentry instead of an inode, and a
-			dnl # handler_flags argument was added.
+			dnl # 4.7 API change,
+			dnl # The xattr_handler->set() callback was changed to take both
+			dnl # dentry and inode.
 			dnl #
 			AC_MSG_RESULT(no)
-			AC_MSG_CHECKING(
-			    [whether xattr_handler->set() wants dentry])
-			ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
+			AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
+			ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
-				    [xattr_handler->set() wants dentry])
+				AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
+				    [xattr_handler->set() wants both dentry and inode])
 			],[
 				dnl #
-				dnl # Legacy 2.6.32 API
+				dnl # 4.4 API change,
+				dnl # The xattr_handler->set() callback was changed to take a
+				dnl # xattr_handler, and handler_flags argument was removed and
+				dnl # should be accessed by handler->flags.
 				dnl #
 				AC_MSG_RESULT(no)
 				AC_MSG_CHECKING(
-				    [whether xattr_handler->set() wants inode])
-				ZFS_LINUX_TEST_RESULT(
-				    [xattr_handler_set_inode], [
+				    [whether xattr_handler->set() wants xattr_handler])
+				ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(HAVE_XATTR_SET_INODE, 1,
-					    [xattr_handler->set() wants inode])
+					AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
+					    [xattr_handler->set() wants xattr_handler])
 				],[
-					ZFS_LINUX_TEST_ERROR([xattr set()])
+					dnl #
+					dnl # 2.6.33 API change,
+					dnl # The xattr_handler->set() callback was changed
+					dnl # to take a dentry instead of an inode, and a
+					dnl # handler_flags argument was added.
+					dnl #
+					AC_MSG_RESULT(no)
+					AC_MSG_CHECKING(
+					    [whether xattr_handler->set() wants dentry])
+					ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
+						AC_MSG_RESULT(yes)
+						AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
+						    [xattr_handler->set() wants dentry])
+					],[
+						ZFS_LINUX_TEST_ERROR([xattr set()])
+					])
 				])
 			])
 		])
@@ -327,18 +356,6 @@
 			.list = list,
 		};
 	],[])
-
-	ZFS_LINUX_TEST_SRC([xattr_handler_list_inode], [
-		#include <linux/xattr.h>
-
-		size_t list(struct inode *ip, char *lst,
-		    size_t list_size, const char *name,
-		    size_t name_len) { return 0; }
-		static const struct xattr_handler
-		    xops __attribute__ ((unused)) = {
-			.list = list,
-		};
-	],[])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [
@@ -379,20 +396,7 @@
 				AC_DEFINE(HAVE_XATTR_LIST_DENTRY, 1,
 				    [xattr_handler->list() wants dentry])
 			],[
-				dnl #
-				dnl # Legacy 2.6.32 API
-				dnl #
-				AC_MSG_RESULT(no)
-				AC_MSG_CHECKING(
-				    [whether xattr_handler->list() wants inode])
-				ZFS_LINUX_TEST_RESULT(
-				    [xattr_handler_list_inode], [
-					AC_MSG_RESULT(yes)
-					AC_DEFINE(HAVE_XATTR_LIST_INODE, 1,
-					    [xattr_handler->list() wants inode])
-				],[
-					ZFS_LINUX_TEST_ERROR([xattr list()])
-				])
+				ZFS_LINUX_TEST_ERROR([xattr list()])
 			])
 		])
 	])
@@ -420,7 +424,7 @@
 		AC_DEFINE(HAVE_POSIX_ACL_FROM_XATTR_USERNS, 1,
 		    [posix_acl_from_xattr() needs user_ns])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([posix_acl_from_xattr()])
 	])
 ])
 

diff --git a/zfs/config/kernel-zero_page.m4 b/zfs/config/kernel-zero_page.m4
new file mode 100644
index 0000000..1461781
--- /dev/null
+++ b/zfs/config/kernel-zero_page.m4

@@ -0,0 +1,27 @@
+dnl #
+dnl # ZERO_PAGE() is an alias for emtpy_zero_page. On certain architectures
+dnl # this is a GPL exported variable.
+dnl #
+
+dnl #
+dnl # Checking if ZERO_PAGE is exported GPL-only
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_ZERO_PAGE], [
+	ZFS_LINUX_TEST_SRC([zero_page], [
+		#include <asm/pgtable.h>
+	], [
+		struct page *p __attribute__ ((unused));
+		p = ZERO_PAGE(0);
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_ZERO_PAGE], [
+	AC_MSG_CHECKING([whether ZERO_PAGE() is GPL-only])
+	ZFS_LINUX_TEST_RESULT([zero_page_license], [
+		AC_MSG_RESULT(no)
+	], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ZERO_PAGE_GPL_ONLY, 1,
+		    [ZERO_PAGE() is GPL-only])
+	])
+])

diff --git a/zfs/config/kernel-zlib.m4 b/zfs/config/kernel-zlib.m4
index d554d11..752d388 100644
--- a/zfs/config/kernel-zlib.m4
+++ b/zfs/config/kernel-zlib.m4

@@ -21,6 +21,6 @@
 		AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1,
 		    [zlib_deflate_workspacesize() wants 2 args])
 	],[
-		AC_MSG_RESULT(no)
+		ZFS_LINUX_TEST_ERROR([zlib_deflate_workspacesize()])
 	])
 ])

diff --git a/zfs/config/kernel.m4 b/zfs/config/kernel.m4
index ae9b3f2..b81200f 100644
--- a/zfs/config/kernel.m4
+++ b/zfs/config/kernel.m4

@@ -2,30 +2,32 @@
 dnl # Default ZFS kernel configuration
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
-	dnl # Setup the kernel build environment.
-	ZFS_AC_KERNEL
-	ZFS_AC_QAT
+	AM_COND_IF([BUILD_LINUX], [
+		dnl # Setup the kernel build environment.
+		ZFS_AC_KERNEL
+		ZFS_AC_QAT
 
-	dnl # Sanity checks for module building and CONFIG_* defines
-	ZFS_AC_KERNEL_TEST_MODULE
-	ZFS_AC_KERNEL_CONFIG_DEFINED
+		dnl # Sanity checks for module building and CONFIG_* defines
+		ZFS_AC_KERNEL_CONFIG_DEFINED
+		ZFS_AC_MODULE_SYMVERS
 
-	dnl # Sequential ZFS_LINUX_TRY_COMPILE tests
-	ZFS_AC_KERNEL_FPU_HEADER
-	ZFS_AC_KERNEL_OBJTOOL_HEADER
-	ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T
-	ZFS_AC_KERNEL_MISC_MINOR
-	ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
+		dnl # Sequential ZFS_LINUX_TRY_COMPILE tests
+		ZFS_AC_KERNEL_FPU_HEADER
+		ZFS_AC_KERNEL_OBJTOOL_HEADER
+		ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T
+		ZFS_AC_KERNEL_MISC_MINOR
+		ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
 
-	dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests
-	ZFS_AC_KERNEL_TEST_SRC
-	ZFS_AC_KERNEL_TEST_RESULT
+		dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests
+		ZFS_AC_KERNEL_TEST_SRC
+		ZFS_AC_KERNEL_TEST_RESULT
 
-	AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
-		KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ"
+		AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
+			KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ"
+		])
+
+		AC_SUBST(KERNEL_MAKE)
 	])
-
-	AC_SUBST(KERNEL_MAKE)
 ])
 
 dnl #
@@ -38,7 +40,6 @@
 	ZFS_AC_KERNEL_SRC_OBJTOOL
 	ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE
 	ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE
-	ZFS_AC_KERNEL_SRC_CTL_NAME
 	ZFS_AC_KERNEL_SRC_PDE_DATA
 	ZFS_AC_KERNEL_SRC_FALLOCATE
 	ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
@@ -46,6 +47,7 @@
 	ZFS_AC_KERNEL_SRC_SCHED
 	ZFS_AC_KERNEL_SRC_USLEEP_RANGE
 	ZFS_AC_KERNEL_SRC_KMEM_CACHE
+	ZFS_AC_KERNEL_SRC_KVMALLOC
 	ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL
 	ZFS_AC_KERNEL_SRC_WAIT
 	ZFS_AC_KERNEL_SRC_INODE_TIMES
@@ -53,42 +55,21 @@
 	ZFS_AC_KERNEL_SRC_GROUP_INFO_GID
 	ZFS_AC_KERNEL_SRC_RW
 	ZFS_AC_KERNEL_SRC_TIMER_SETUP
-	ZFS_AC_KERNEL_SRC_CURRENT_BIO_TAIL
 	ZFS_AC_KERNEL_SRC_SUPER_USER_NS
 	ZFS_AC_KERNEL_SRC_PROC_OPERATIONS
-	ZFS_AC_KERNEL_SRC_SUBMIT_BIO
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS
-	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
-	ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART
-	ZFS_AC_KERNEL_SRC_INVALIDATE_BDEV
-	ZFS_AC_KERNEL_SRC_LOOKUP_BDEV
-	ZFS_AC_KERNEL_SRC_BDEV_OPEN_EXCLUSIVE
-	ZFS_AC_KERNEL_SRC_BDEV_LOGICAL_BLOCK_SIZE
-	ZFS_AC_KERNEL_SRC_BDEV_PHYSICAL_BLOCK_SIZE
-	ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER
-	ZFS_AC_KERNEL_SRC_BIO_FAILFAST
-	ZFS_AC_KERNEL_SRC_BIO_SET_DEV
-	ZFS_AC_KERNEL_SRC_BIO_OPS
-	ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS
-	ZFS_AC_KERNEL_SRC_BIO_BI_STATUS
-	ZFS_AC_KERNEL_SRC_BIO_RW_BARRIER
-	ZFS_AC_KERNEL_SRC_BIO_RW_DISCARD
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAGS
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
-	ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
-	ZFS_AC_KERNEL_SRC_BLKG_TRYGET
-	ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE
+	ZFS_AC_KERNEL_SRC_BIO
+	ZFS_AC_KERNEL_SRC_BLKDEV
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE
+	ZFS_AC_KERNEL_SRC_GENHD_FLAGS
+	ZFS_AC_KERNEL_SRC_REVALIDATE_DISK
 	ZFS_AC_KERNEL_SRC_GET_DISK_RO
 	ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL
 	ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY
 	ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE
 	ZFS_AC_KERNEL_SRC_XATTR
 	ZFS_AC_KERNEL_SRC_ACL
+	ZFS_AC_KERNEL_SRC_INODE_SETATTR
 	ZFS_AC_KERNEL_SRC_INODE_GETATTR
 	ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS
 	ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION
@@ -100,13 +81,12 @@
 	ZFS_AC_KERNEL_SRC_EVICT_INODE
 	ZFS_AC_KERNEL_SRC_DIRTY_INODE
 	ZFS_AC_KERNEL_SRC_SHRINKER
-	ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T
-	ZFS_AC_KERNEL_SRC_LOOKUP_NAMEIDATA
-	ZFS_AC_KERNEL_SRC_CREATE_NAMEIDATA
+	ZFS_AC_KERNEL_SRC_MKDIR
+	ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS
+	ZFS_AC_KERNEL_SRC_CREATE
 	ZFS_AC_KERNEL_SRC_GET_LINK
 	ZFS_AC_KERNEL_SRC_PUT_LINK
 	ZFS_AC_KERNEL_SRC_TMPFILE
-	ZFS_AC_KERNEL_SRC_TRUNCATE_RANGE
 	ZFS_AC_KERNEL_SRC_AUTOMOUNT
 	ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE
 	ZFS_AC_KERNEL_SRC_COMMIT_METADATA
@@ -114,6 +94,7 @@
 	ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
 	ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
 	ZFS_AC_KERNEL_SRC_DENTRY
+	ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U
 	ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SRC_SECURITY_INODE
 	ZFS_AC_KERNEL_SRC_FST_MOUNT
@@ -121,12 +102,17 @@
 	ZFS_AC_KERNEL_SRC_SET_NLINK
 	ZFS_AC_KERNEL_SRC_SGET
 	ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE
+	ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO
+	ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO
 	ZFS_AC_KERNEL_SRC_VFS_GETATTR
 	ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS
 	ZFS_AC_KERNEL_SRC_VFS_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO
+	ZFS_AC_KERNEL_SRC_VFS_READPAGES
+	ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
+	ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
 	ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN
@@ -135,8 +121,7 @@
 	ZFS_AC_KERNEL_SRC_FMODE_T
 	ZFS_AC_KERNEL_SRC_KUIDGID_T
 	ZFS_AC_KERNEL_SRC_KUID_HELPERS
-	ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST
-	ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS
+	ZFS_AC_KERNEL_SRC_RENAME
 	ZFS_AC_KERNEL_SRC_CURRENT_TIME
 	ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES
 	ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL
@@ -144,7 +129,33 @@
 	ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC
 	ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES
 	ZFS_AC_KERNEL_SRC_KSTRTOUL
-	ZFS_AC_KERNEL_SRC_BLKDEV_CHANGE
+	ZFS_AC_KERNEL_SRC_PERCPU
+	ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
+	ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR
+	ZFS_AC_KERNEL_SRC_MKNOD
+	ZFS_AC_KERNEL_SRC_SYMLINK
+	ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
+	ZFS_AC_KERNEL_SRC_SIGNAL_STOP
+	ZFS_AC_KERNEL_SRC_SIGINFO
+	ZFS_AC_KERNEL_SRC_SYSFS
+	ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE
+	ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG
+	ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT
+	ZFS_AC_KERNEL_SRC_ADD_DISK
+	ZFS_AC_KERNEL_SRC_KTHREAD
+	ZFS_AC_KERNEL_SRC_ZERO_PAGE
+	ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
+	ZFS_AC_KERNEL_SRC_FILEMAP
+	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
+	ZFS_AC_KERNEL_SRC_RECLAIMED
+	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
+	case "$host_cpu" in
+		powerpc*)
+			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
+			ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
+			;;
+	esac
 
 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -158,7 +169,6 @@
 	ZFS_AC_KERNEL_ACCESS_OK_TYPE
 	ZFS_AC_KERNEL_GLOBAL_PAGE_STATE
 	ZFS_AC_KERNEL_OBJTOOL
-	ZFS_AC_KERNEL_CTL_NAME
 	ZFS_AC_KERNEL_PDE_DATA
 	ZFS_AC_KERNEL_FALLOCATE
 	ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
@@ -166,6 +176,7 @@
 	ZFS_AC_KERNEL_SCHED
 	ZFS_AC_KERNEL_USLEEP_RANGE
 	ZFS_AC_KERNEL_KMEM_CACHE
+	ZFS_AC_KERNEL_KVMALLOC
 	ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL
 	ZFS_AC_KERNEL_WAIT
 	ZFS_AC_KERNEL_INODE_TIMES
@@ -173,42 +184,21 @@
 	ZFS_AC_KERNEL_GROUP_INFO_GID
 	ZFS_AC_KERNEL_RW
 	ZFS_AC_KERNEL_TIMER_SETUP
-	ZFS_AC_KERNEL_CURRENT_BIO_TAIL
 	ZFS_AC_KERNEL_SUPER_USER_NS
 	ZFS_AC_KERNEL_PROC_OPERATIONS
-	ZFS_AC_KERNEL_SUBMIT_BIO
 	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS
-	ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH
-	ZFS_AC_KERNEL_BLKDEV_REREAD_PART
-	ZFS_AC_KERNEL_INVALIDATE_BDEV
-	ZFS_AC_KERNEL_LOOKUP_BDEV
-	ZFS_AC_KERNEL_BDEV_OPEN_EXCLUSIVE
-	ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE
-	ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE
-	ZFS_AC_KERNEL_BIO_BVEC_ITER
-	ZFS_AC_KERNEL_BIO_FAILFAST
-	ZFS_AC_KERNEL_BIO_SET_DEV
-	ZFS_AC_KERNEL_BIO_OPS
-	ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
-	ZFS_AC_KERNEL_BIO_BI_STATUS
-	ZFS_AC_KERNEL_BIO_RW_BARRIER
-	ZFS_AC_KERNEL_BIO_RW_DISCARD
-	ZFS_AC_KERNEL_BLKG_TRYGET
-	ZFS_AC_KERNEL_BLK_QUEUE_BDI
-	ZFS_AC_KERNEL_BLK_QUEUE_DISCARD
-	ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE
-	ZFS_AC_KERNEL_BLK_QUEUE_FLAGS
-	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
-	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
-	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
-	ZFS_AC_KERNEL_BLK_QUEUE_PLUG
-	ZFS_AC_KERNEL_GET_DISK_AND_MODULE
+	ZFS_AC_KERNEL_BIO
+	ZFS_AC_KERNEL_BLKDEV
+	ZFS_AC_KERNEL_BLK_QUEUE
+	ZFS_AC_KERNEL_GENHD_FLAGS
+	ZFS_AC_KERNEL_REVALIDATE_DISK
 	ZFS_AC_KERNEL_GET_DISK_RO
 	ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL
 	ZFS_AC_KERNEL_DISCARD_GRANULARITY
 	ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE
 	ZFS_AC_KERNEL_XATTR
 	ZFS_AC_KERNEL_ACL
+	ZFS_AC_KERNEL_INODE_SETATTR
 	ZFS_AC_KERNEL_INODE_GETATTR
 	ZFS_AC_KERNEL_INODE_SET_FLAGS
 	ZFS_AC_KERNEL_INODE_SET_IVERSION
@@ -220,13 +210,12 @@
 	ZFS_AC_KERNEL_EVICT_INODE
 	ZFS_AC_KERNEL_DIRTY_INODE
 	ZFS_AC_KERNEL_SHRINKER
-	ZFS_AC_KERNEL_MKDIR_UMODE_T
-	ZFS_AC_KERNEL_LOOKUP_NAMEIDATA
-	ZFS_AC_KERNEL_CREATE_NAMEIDATA
+	ZFS_AC_KERNEL_MKDIR
+	ZFS_AC_KERNEL_LOOKUP_FLAGS
+	ZFS_AC_KERNEL_CREATE
 	ZFS_AC_KERNEL_GET_LINK
 	ZFS_AC_KERNEL_PUT_LINK
 	ZFS_AC_KERNEL_TMPFILE
-	ZFS_AC_KERNEL_TRUNCATE_RANGE
 	ZFS_AC_KERNEL_AUTOMOUNT
 	ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE
 	ZFS_AC_KERNEL_COMMIT_METADATA
@@ -234,6 +223,7 @@
 	ZFS_AC_KERNEL_SETATTR_PREPARE
 	ZFS_AC_KERNEL_INSERT_INODE_LOCKED
 	ZFS_AC_KERNEL_DENTRY
+	ZFS_AC_KERNEL_DENTRY_ALIAS_D_U
 	ZFS_AC_KERNEL_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SECURITY_INODE
 	ZFS_AC_KERNEL_FST_MOUNT
@@ -241,12 +231,17 @@
 	ZFS_AC_KERNEL_SET_NLINK
 	ZFS_AC_KERNEL_SGET
 	ZFS_AC_KERNEL_LSEEK_EXECUTE
+	ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO
+	ZFS_AC_KERNEL_VFS_READ_FOLIO
 	ZFS_AC_KERNEL_VFS_GETATTR
 	ZFS_AC_KERNEL_VFS_FSYNC_2ARGS
 	ZFS_AC_KERNEL_VFS_ITERATE
 	ZFS_AC_KERNEL_VFS_DIRECT_IO
+	ZFS_AC_KERNEL_VFS_READPAGES
+	ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
+	ZFS_AC_KERNEL_VFS_IOV_ITER
 	ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_MAKE_REQUEST_FN
@@ -255,8 +250,7 @@
 	ZFS_AC_KERNEL_FMODE_T
 	ZFS_AC_KERNEL_KUIDGID_T
 	ZFS_AC_KERNEL_KUID_HELPERS
-	ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST
-	ZFS_AC_KERNEL_RENAME_WANTS_FLAGS
+	ZFS_AC_KERNEL_RENAME
 	ZFS_AC_KERNEL_CURRENT_TIME
 	ZFS_AC_KERNEL_USERNS_CAPABILITIES
 	ZFS_AC_KERNEL_IN_COMPAT_SYSCALL
@@ -264,7 +258,33 @@
 	ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC
 	ZFS_AC_KERNEL_TOTALHIGH_PAGES
 	ZFS_AC_KERNEL_KSTRTOUL
-	ZFS_AC_KERNEL_BLKDEV_CHANGE
+	ZFS_AC_KERNEL_PERCPU
+	ZFS_AC_KERNEL_CPU_HOTPLUG
+	ZFS_AC_KERNEL_GENERIC_FILLATTR
+	ZFS_AC_KERNEL_MKNOD
+	ZFS_AC_KERNEL_SYMLINK
+	ZFS_AC_KERNEL_BIO_MAX_SEGS
+	ZFS_AC_KERNEL_SIGNAL_STOP
+	ZFS_AC_KERNEL_SIGINFO
+	ZFS_AC_KERNEL_SYSFS
+	ZFS_AC_KERNEL_SET_SPECIAL_STATE
+	ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG
+	ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT
+	ZFS_AC_KERNEL_ADD_DISK
+	ZFS_AC_KERNEL_KTHREAD
+	ZFS_AC_KERNEL_ZERO_PAGE
+	ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
+	ZFS_AC_KERNEL_FILEMAP
+	ZFS_AC_KERNEL_WRITEPAGE_T
+	ZFS_AC_KERNEL_RECLAIMED
+	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_COPY_SPLICE_READ
+	case "$host_cpu" in
+		powerpc*)
+			ZFS_AC_KERNEL_CPU_HAS_FEATURE
+			ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
+			;;
+	esac
 ])
 
 dnl #
@@ -298,6 +318,35 @@
 dnl #
 dnl # Detect the kernel to be built against
 dnl #
+dnl # Most modern Linux distributions have separate locations for bare
+dnl # source (source) and prebuilt (build) files. Additionally, there are
+dnl # `source` and `build` symlinks in `/lib/modules/$(KERNEL_VERSION)`
+dnl # pointing to them. The directory search order is now:
+dnl # 
+dnl # - `configure` command line values if both `--with-linux` and
+dnl #   `--with-linux-obj` were defined
+dnl # 
+dnl # - If only `--with-linux` was defined, `--with-linux-obj` is assumed
+dnl #   to have the same value as `--with-linux`
+dnl # 
+dnl # - If neither `--with-linux` nor `--with-linux-obj` were defined
+dnl #   autodetection is used:
+dnl # 
+dnl #   - `/lib/modules/$(uname -r)/{source,build}` respectively, if exist.
+dnl # 
+dnl #   - If only `/lib/modules/$(uname -r)/build` exists, it is assumed
+dnl #     to be both source and build directory.
+dnl # 
+dnl #   - The first directory in `/lib/modules` with the highest version
+dnl #     number according to `sort -V` which contains both `source` and
+dnl #     `build` symlinks/directories. If module directory contains only
+dnl #     `build` component, it is assumed to be both source and build
+dnl #     directory.
+dnl # 
+dnl #   - Last resort: the first directory matching `/usr/src/kernels/*`
+dnl #     and `/usr/src/linux-*` with the highest version number according
+dnl #     to `sort -V` is assumed to be both source and build directory.
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL], [
 	AC_ARG_WITH([linux],
 		AS_HELP_STRING([--with-linux=PATH],
@@ -309,25 +358,52 @@
 		[Path to kernel build objects]),
 		[kernelbuild="$withval"])
 
-	AC_MSG_CHECKING([kernel source directory])
-	AS_IF([test -z "$kernelsrc"], [
-		AS_IF([test -e "/lib/modules/$(uname -r)/source"], [
-			headersdir="/lib/modules/$(uname -r)/source"
-			sourcelink=$(readlink -f "$headersdir")
+	AC_MSG_CHECKING([kernel source and build directories])
+	AS_IF([test -n "$kernelsrc" && test -z "$kernelbuild"], [
+		kernelbuild="$kernelsrc"
+	], [test -z "$kernelsrc"], [
+		AS_IF([test -e "/lib/modules/$(uname -r)/source" && \
+		       test -e "/lib/modules/$(uname -r)/build"], [
+			src="/lib/modules/$(uname -r)/source"
+			build="/lib/modules/$(uname -r)/build"
 		], [test -e "/lib/modules/$(uname -r)/build"], [
-			headersdir="/lib/modules/$(uname -r)/build"
-			sourcelink=$(readlink -f "$headersdir")
+			build="/lib/modules/$(uname -r)/build"
+			src="$build"
 		], [
-			sourcelink=$(ls -1d /usr/src/kernels/* \
-			             /usr/src/linux-* \
-			             2>/dev/null | grep -v obj | tail -1)
+			src=
+
+			for d in $(ls -1d /lib/modules/* 2>/dev/null | sort -Vr); do
+				if test -e "$d/source" && test -e "$d/build"; then
+					src="$d/source"
+					build="$d/build"
+					break
+				fi
+
+				if test -e "$d/build"; then
+					src="$d/build"
+					build="$d/build"
+					break
+				fi
+			done
+
+			# the least reliable method
+			if test -z "$src"; then
+				src=$(ls -1d /usr/src/kernels/* /usr/src/linux-* \
+				      2>/dev/null | grep -v obj | sort -Vr | head -1)
+				build="$src"
+			fi
 		])
 
-		AS_IF([test -n "$sourcelink" && test -e ${sourcelink}], [
-			kernelsrc=`readlink -f ${sourcelink}`
+		AS_IF([test -n "$src" && test -e "$src"], [
+			kernelsrc=$(readlink -e "$src")
 		], [
 			kernelsrc="[Not found]"
 		])
+		AS_IF([test -n "$build" && test -e "$build"], [
+			kernelbuild=$(readlink -e "$build")
+		], [
+			kernelbuild="[Not found]"
+		])
 	], [
 		AS_IF([test "$kernelsrc" = "NONE"], [
 			kernsrcver=NONE
@@ -335,48 +411,33 @@
 		withlinux=yes
 	])
 
+	AC_MSG_RESULT([done])
+	AC_MSG_CHECKING([kernel source directory])
 	AC_MSG_RESULT([$kernelsrc])
-	AS_IF([test ! -d "$kernelsrc"], [
+	AC_MSG_CHECKING([kernel build directory])
+	AC_MSG_RESULT([$kernelbuild])
+	AS_IF([test ! -d "$kernelsrc" || test ! -d "$kernelbuild"], [
 		AC_MSG_ERROR([
 	*** Please make sure the kernel devel package for your distribution
 	*** is installed and then try again.  If that fails, you can specify the
-	*** location of the kernel source with the '--with-linux=PATH' option.])
+	*** location of the kernel source and build with the '--with-linux=PATH' and
+	*** '--with-linux-obj=PATH' options respectively.])
 	])
 
-	AC_MSG_CHECKING([kernel build directory])
-	AS_IF([test -z "$kernelbuild"], [
-		AS_IF([test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"], [
-			kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
-		], [test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}], [
-			kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
-		], [test -d ${kernelsrc}-obj/${target_cpu}/default], [
-			kernelbuild=${kernelsrc}-obj/${target_cpu}/default
-		], [test -d `dirname ${kernelsrc}`/build-${target_cpu}], [
-			kernelbuild=`dirname ${kernelsrc}`/build-${target_cpu}
-		], [
-			kernelbuild=${kernelsrc}
-		])
-	])
-	AC_MSG_RESULT([$kernelbuild])
-
 	AC_MSG_CHECKING([kernel source version])
 	utsrelease1=$kernelbuild/include/linux/version.h
 	utsrelease2=$kernelbuild/include/linux/utsrelease.h
 	utsrelease3=$kernelbuild/include/generated/utsrelease.h
-	AS_IF([test -r $utsrelease1 && fgrep -q UTS_RELEASE $utsrelease1], [
-		utsrelease=linux/version.h
-	], [test -r $utsrelease2 && fgrep -q UTS_RELEASE $utsrelease2], [
-		utsrelease=linux/utsrelease.h
-	], [test -r $utsrelease3 && fgrep -q UTS_RELEASE $utsrelease3], [
-		utsrelease=generated/utsrelease.h
+	AS_IF([test -r $utsrelease1 && grep -qF UTS_RELEASE $utsrelease1], [
+		utsrelease=$utsrelease1
+	], [test -r $utsrelease2 && grep -qF UTS_RELEASE $utsrelease2], [
+		utsrelease=$utsrelease2
+	], [test -r $utsrelease3 && grep -qF UTS_RELEASE $utsrelease3], [
+		utsrelease=$utsrelease3
 	])
 
-	AS_IF([test "$utsrelease"], [
-		kernsrcver=`(echo "#include <$utsrelease>";
-		             echo "kernsrcver=UTS_RELEASE") |
-		             ${CPP} -I $kernelbuild/include - |
-		             grep "^kernsrcver=" | cut -d \" -f 2`
-
+	AS_IF([test -n "$utsrelease"], [
+		kernsrcver=$($AWK '/UTS_RELEASE/ { gsub(/"/, "", $[3]); print $[3] }' $utsrelease)
 		AS_IF([test -z "$kernsrcver"], [
 			AC_MSG_RESULT([Not found])
 			AC_MSG_ERROR([
@@ -398,6 +459,13 @@
 
 	AC_MSG_RESULT([$kernsrcver])
 
+	AS_VERSION_COMPARE([$kernsrcver], [$ZFS_META_KVER_MIN], [
+		 AC_MSG_ERROR([
+	*** Cannot build against kernel version $kernsrcver.
+	*** The minimum supported kernel version is $ZFS_META_KVER_MIN.
+		])
+	])
+
 	LINUX=${kernelsrc}
 	LINUX_OBJ=${kernelbuild}
 	LINUX_VERSION=${kernsrcver}
@@ -405,8 +473,6 @@
 	AC_SUBST(LINUX)
 	AC_SUBST(LINUX_OBJ)
 	AC_SUBST(LINUX_VERSION)
-
-	ZFS_AC_MODULE_SYMVERS
 ])
 
 dnl #
@@ -502,27 +568,6 @@
 ])
 
 dnl #
-dnl # Basic toolchain sanity check.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_TEST_MODULE], [
-	AC_MSG_CHECKING([whether modules can be built])
-	ZFS_LINUX_TRY_COMPILE([], [], [
-		AC_MSG_RESULT([yes])
-	],[
-		AC_MSG_RESULT([no])
-		if test "x$enable_linux_builtin" != xyes; then
-			AC_MSG_ERROR([
-	*** Unable to build an empty module.
-			])
-		else
-			AC_MSG_ERROR([
-	*** Unable to build an empty module.
-	*** Please run 'make scripts' inside the kernel source tree.])
-		fi
-	])
-])
-
-dnl #
 dnl # ZFS_LINUX_CONFTEST_H
 dnl #
 AC_DEFUN([ZFS_LINUX_CONFTEST_H], [
@@ -576,7 +621,9 @@
 dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY])
 dnl #
 m4_define([ZFS_LINUX_TEST_PROGRAM], [
+#include <linux/module.h>
 $1
+
 int
 main (void)
 {
@@ -584,6 +631,11 @@
 	;
 	return 0;
 }
+
+MODULE_DESCRIPTION("conftest");
+MODULE_AUTHOR(ZFS_META_AUTHOR);
+MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+MODULE_LICENSE($3);
 ])
 
 dnl #
@@ -609,10 +661,18 @@
 dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST}
 dnl #
 AC_DEFUN([ZFS_LINUX_COMPILE], [
+	AC_ARG_VAR([KERNEL_CC], [C compiler for
+		building kernel modules])
+	AC_ARG_VAR([KERNEL_LD], [Linker for
+		building kernel modules])
+	AC_ARG_VAR([KERNEL_LLVM], [Binary option to
+		build kernel modules with LLVM/CLANG toolchain])
 	AC_TRY_COMMAND([
 	    KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6"
-	    make modules -k -j$TEST_JOBS -C $LINUX_OBJ $ARCH_UM
-	    M=$PWD/$1 &>$1/build.log])
+	    make modules -k -j$TEST_JOBS ${KERNEL_CC:+CC=$KERNEL_CC}
+	    ${KERNEL_LD:+LD=$KERNEL_LD} ${KERNEL_LLVM:+LLVM=$KERNEL_LLVM}
+	    CONFIG_MODULES=y CFLAGS_MODULE=-DCONFIG_MODULES
+	    -C $LINUX_OBJ $ARCH_UM M=$PWD/$1 >$1/build.log 2>&1])
 	AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4])
 ])
 
@@ -723,19 +783,21 @@
 dnl # $4 - extra cflags
 dnl # $5 - check license-compatibility
 dnl #
+dnl # Check if the test source is buildable at all and then if it is
+dnl # license compatible.
+dnl #
 dnl # N.B because all of the test cases are compiled in parallel they
 dnl # must never depend on the results of previous tests.  Each test
 dnl # needs to be entirely independent.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_SRC], [
-	ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]])], [$1])
+	ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]],
+	    [["Dual BSD/GPL"]])], [$1])
 	ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4])
 
 	AS_IF([ test -n "$5" ], [
-		ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[
-			#include <linux/module.h>
-			MODULE_LICENSE("$5");
-			$2]], [[$3]])], [$1_license])
+		ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM(
+		    [[$2]], [[$3]], [[$5]])], [$1_license])
 		ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4])
 	])
 ])
@@ -825,11 +887,13 @@
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [
 	AS_IF([test "x$enable_linux_builtin" = "xyes"], [
 		ZFS_LINUX_COMPILE_IFELSE(
-		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])],
+		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
+		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.o], [$3], [$4])
 	], [
 		ZFS_LINUX_COMPILE_IFELSE(
-		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])],
+		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
+		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.ko], [$3], [$4])
 	])
 ])
@@ -894,8 +958,47 @@
 dnl # provided via the fifth parameter
 dnl #
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [
-	ZFS_LINUX_COMPILE_IFELSE(
-	    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])],
-	    [test -f build/conftest/conftest.ko],
-	    [$3], [$4], [$5])
+	AS_IF([test "x$enable_linux_builtin" = "xyes"], [
+		ZFS_LINUX_COMPILE_IFELSE(
+		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
+		    [[ZFS_META_LICENSE]])],
+		    [test -f build/conftest/conftest.o], [$3], [$4], [$5])
+	], [
+		ZFS_LINUX_COMPILE_IFELSE(
+		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
+		    [[ZFS_META_LICENSE]])],
+		    [test -f build/conftest/conftest.ko], [$3], [$4], [$5])
+	])
+])
+
+dnl #
+dnl # AS_VERSION_COMPARE_LE
+dnl # like AS_VERSION_COMPARE_LE, but runs $3 if (and only if) $1 <= $2
+dnl # AS_VERSION_COMPARE_LE (version-1, version-2, [action-if-less-or-equal], [action-if-greater])
+dnl #
+AC_DEFUN([AS_VERSION_COMPARE_LE], [
+	AS_VERSION_COMPARE([$1], [$2], [$3], [$3], [$4])
+])
+
+dnl #
+dnl # ZFS_LINUX_REQUIRE_API
+dnl # like ZFS_LINUX_TEST_ERROR, except only fails if the kernel is
+dnl # at least some specified version.
+dnl #
+AC_DEFUN([ZFS_LINUX_REQUIRE_API], [
+	AS_VERSION_COMPARE_LE([$2], [$kernsrcver], [
+		AC_MSG_ERROR([
+		*** None of the expected "$1" interfaces were detected. This
+		*** interface is expected for kernels version "$2" and above.
+		*** This may be because your kernel version is newer than what is
+		*** supported, or you are using a patched custom kernel with
+		*** incompatible modifications.  Newer kernels may have incompatible
+		*** APIs.
+		***
+		*** ZFS Version: $ZFS_META_ALIAS
+		*** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX
+		])
+	], [
+		AC_MSG_RESULT(no)
+	])
 ])

diff --git a/zfs/config/lib-link.m4 b/zfs/config/lib-link.m4
index 01766c3..041f976 100644
--- a/zfs/config/lib-link.m4
+++ b/zfs/config/lib-link.m4

@@ -67,8 +67,8 @@
   AC_LIB_LINKFLAGS_BODY([$1], [$2])
 
   dnl Add $INC[]NAME to CPPFLAGS before performing the following checks,
-  dnl because if the user has installed lib[]Name and not disabled its use
-  dnl via --without-lib[]Name-prefix, he wants to use it.
+  dnl so that if lib[]Name is installed, it will be used (unless
+  dnl disabled via --without-lib[]Name-prefix).
   ac_save_CPPFLAGS="$CPPFLAGS"
   AC_LIB_APPENDTOVAR([CPPFLAGS], [$INC]NAME)
 

diff --git a/zfs/config/lib-prefix.m4 b/zfs/config/lib-prefix.m4
index 8adb17b..f7db237 100644
--- a/zfs/config/lib-prefix.m4
+++ b/zfs/config/lib-prefix.m4

@@ -8,10 +8,9 @@
 
 dnl AC_LIB_PREFIX adds to the CPPFLAGS and LDFLAGS the flags that are needed
 dnl to access previously installed libraries. The basic assumption is that
-dnl a user will want packages to use other packages he previously installed
-dnl with the same --prefix option.
-dnl This macro is not needed if only AC_LIB_LINKFLAGS is used to locate
-dnl libraries, but is otherwise very convenient.
+dnl packages should use other packages that are installed with the same
+dnl --prefix option.  This macro is not needed if only AC_LIB_LINKFLAGS is
+dnl used to locate libraries, but is otherwise very convenient.
 AC_DEFUN([AC_LIB_PREFIX],
 [
   AC_BEFORE([$0], [AC_LIB_LINKFLAGS])

diff --git a/zfs/config/mount-helper.m4 b/zfs/config/mount-helper.m4
index 0a6c767..e559b9a 100644
--- a/zfs/config/mount-helper.m4
+++ b/zfs/config/mount-helper.m4

@@ -1,6 +1,6 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_MOUNT_HELPER], [
 	AC_ARG_WITH(mounthelperdir,
-		AC_HELP_STRING([--with-mounthelperdir=DIR],
+		AS_HELP_STRING([--with-mounthelperdir=DIR],
 		[install mount.zfs in dir [[/sbin]]]),
 		mounthelperdir=$withval,mounthelperdir=/sbin)
 

diff --git a/zfs/config/rpm.am b/zfs/config/rpm.am
index 51a20b3..c8aaf72 100644
--- a/zfs/config/rpm.am
+++ b/zfs/config/rpm.am

@@ -6,6 +6,12 @@
 # Build targets for RPM packages.
 ###############################################################################
 
+PHONY += srpm srpms srpm-kmod srpm-dkms srpm-utils
+PHONY += rpm rpms rpm-kmod rpm-dkms rpm-utils rpm-utils-initramfs
+PHONY += srpm-common rpm-common rpm-local
+
+srpm-kmod srpm-dkms srpm-utils: dist
+
 srpm-kmod:
 	$(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}-kmod" \
 		def='${SRPM_DEFINE_COMMON} ${SRPM_DEFINE_KMOD}' srpm-common
@@ -29,10 +35,22 @@
 	$(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}-dkms" \
 		def='${RPM_DEFINE_COMMON} ${RPM_DEFINE_DKMS}' rpm-common
 
+# The rpm-utils and rpm-utils-initramfs targets are identical except for the
+# zfs-initramfs package: rpm-utils never includes it, rpm-utils-initramfs
+# includes it if detected at configure time. The zfs-initramfs package does
+# not work on any known RPM-based distribution and the resulting RPM is only
+# used to create a Debian package. The rpm-utils-initramfs target is not
+# intended to be specified by the user directly, it is provided as a
+# dependency of the deb-utils target.
+
 rpm-utils: srpm-utils
 	$(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}" \
 		def='${RPM_DEFINE_COMMON} ${RPM_DEFINE_UTIL}' rpm-common
 
+rpm-utils-initramfs: srpm-utils
+	$(MAKE) $(AM_MAKEFLAGS) pkg="${PACKAGE}" \
+		def='${RPM_DEFINE_COMMON} ${RPM_DEFINE_UTIL} ${RPM_DEFINE_INITRAMFS}' rpm-common
+
 rpm: rpm-kmod rpm-dkms rpm-utils
 rpms: rpm-kmod rpm-dkms rpm-utils
 
@@ -52,9 +70,10 @@
 	cp ${RPM_SPEC_DIR}/$(rpmspec) $(rpmbuild)/SPECS && \
 	mkdir -p $(rpmbuild)/SOURCES && \
 	cp $(top_srcdir)/scripts/kmodtool $(rpmbuild)/SOURCES && \
+	cp $(top_srcdir)/scripts/signmod $(rpmbuild)/SOURCES && \
 	cp $(distdir).tar.gz $(rpmbuild)/SOURCES)
 
-srpm-common: dist
+srpm-common:
 	@(dist=`$(RPM) --eval %{?dist}`; \
 	rpmpkg=$(pkg)-$(VERSION)-$(RELEASE)$$dist*src.rpm; \
 	rpmspec=$(pkg).spec; \

diff --git a/zfs/config/suppressed-warnings.txt b/zfs/config/suppressed-warnings.txt
deleted file mode 100644
index 621e3cd..0000000
--- a/zfs/config/suppressed-warnings.txt
+++ /dev/null

@@ -1,7 +0,0 @@
-#
-# Expected warnings which should be suppressed by buildbot
-#
-None : ^libtool: install: warning: relinking `.*'$
-None : ^libtool: install: warning: remember to run `libtool --finish .*'$
-None : ^libtool: install: warning: `.*' has not been installed in `.*'$
-None : ^warning: File listed twice:.*

diff --git a/zfs/config/tgz.am b/zfs/config/tgz.am
index 0657d04..2499ba4 100644
--- a/zfs/config/tgz.am
+++ b/zfs/config/tgz.am

@@ -1,3 +1,5 @@
+PHONY += tgz tgz-kmod tgz-utils tgz-local
+
 tgz-local:
 	@(if test "${HAVE_ALIEN}" = "no"; then \
 		echo -e "\n" \
@@ -8,17 +10,14 @@
 	fi)
 
 tgz-kmod: tgz-local rpm-kmod
-if CONFIG_KERNEL
 	name=${PACKAGE}; \
 	version=${VERSION}-${RELEASE}; \
 	arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
 	pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
 	fakeroot $(ALIEN) --scripts --to-tgz $$pkg1; \
 	$(RM) $$pkg1
-endif
 
 tgz-utils: tgz-local rpm-utils
-if CONFIG_USER
 	name=${PACKAGE}; \
 	version=${VERSION}-${RELEASE}; \
 	arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
@@ -27,6 +26,5 @@
 	pkg3=$${name}-test-$${version}.$${arch}.rpm; \
 	fakeroot $(ALIEN) --scripts --to-tgz $$pkg1 $$pkg2 $$pkg3; \
 	$(RM) $$pkg1 $$pkg2 $$pkg3
-endif
 
 tgz: tgz-kmod tgz-utils

diff --git a/zfs/config/toolchain-simd.m4 b/zfs/config/toolchain-simd.m4
index e86eb7f..061576f 100644
--- a/zfs/config/toolchain-simd.m4
+++ b/zfs/config/toolchain-simd.m4

@@ -3,7 +3,7 @@
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
 	case "$host_cpu" in
-		x86_64 | x86 | i686)
+		amd64 | x86_64 | x86 | i686)
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
@@ -24,6 +24,9 @@
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
 			;;
 	esac
 ])
@@ -422,3 +425,66 @@
 		AC_MSG_RESULT([no])
 	])
 ])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [
+	AC_MSG_CHECKING([whether host toolchain supports XSAVE])
+
+	AC_LINK_IFELSE([AC_LANG_SOURCE([
+	[
+		void main()
+		{
+		  char b[4096] __attribute__ ((aligned (64)));
+		  __asm__ __volatile__("xsave %[b]\n" : : [b] "m" (*b) : "memory");
+		}
+	]])], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE([HAVE_XSAVE], 1, [Define if host toolchain supports XSAVE])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT], [
+	AC_MSG_CHECKING([whether host toolchain supports XSAVEOPT])
+
+	AC_LINK_IFELSE([AC_LANG_SOURCE([
+	[
+		void main()
+		{
+		  char b[4096] __attribute__ ((aligned (64)));
+		  __asm__ __volatile__("xsaveopt %[b]\n" : : [b] "m" (*b) : "memory");
+		}
+	]])], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE([HAVE_XSAVEOPT], 1, [Define if host toolchain supports XSAVEOPT])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES], [
+	AC_MSG_CHECKING([whether host toolchain supports XSAVES])
+
+	AC_LINK_IFELSE([AC_LANG_SOURCE([
+	[
+		void main()
+		{
+		  char b[4096] __attribute__ ((aligned (64)));
+		  __asm__ __volatile__("xsaves %[b]\n" : : [b] "m" (*b) : "memory");
+		}
+	]])], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE([HAVE_XSAVES], 1, [Define if host toolchain supports XSAVES])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])

diff --git a/zfs/config/user-clock_gettime.m4 b/zfs/config/user-clock_gettime.m4
new file mode 100644
index 0000000..c96024d
--- /dev/null
+++ b/zfs/config/user-clock_gettime.m4

@@ -0,0 +1,12 @@
+dnl #
+dnl # Check if librt is required for clock_gettime.
+dnl # clock_gettime is generally available in libc on modern systems.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_CLOCK_GETTIME], [
+	AC_CHECK_FUNC([clock_gettime], [], [
+	    AC_CHECK_LIB([rt], [clock_gettime], [
+		AC_SUBST([LIBCLOCK_GETTIME], [-lrt])], [
+		AC_MSG_FAILURE([*** clock_gettime is missing in libc and librt])
+	    ])
+	])
+])

diff --git a/zfs/config/user-dracut.m4 b/zfs/config/user-dracut.m4
index 95f800b..b970529 100644
--- a/zfs/config/user-dracut.m4
+++ b/zfs/config/user-dracut.m4

@@ -1,7 +1,7 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_DRACUT], [
 	AC_MSG_CHECKING(for dracut directory)
 	AC_ARG_WITH([dracutdir],
-		AC_HELP_STRING([--with-dracutdir=DIR],
+		AS_HELP_STRING([--with-dracutdir=DIR],
 		[install dracut helpers @<:@default=check@:>@]),
 		[dracutdir=$withval],
 		[dracutdir=check])

diff --git a/zfs/config/user-gettext.m4 b/zfs/config/user-gettext.m4
index 89d1d45..824318e 100644
--- a/zfs/config/user-gettext.m4
+++ b/zfs/config/user-gettext.m4

@@ -2,7 +2,5 @@
 dnl # Check if libintl and possibly libiconv are needed for gettext() functionality
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_GETTEXT], [
-    AM_ICONV
     AM_GNU_GETTEXT([external])
-    LIBS="$LIBS $LTLIBINTL $LTLIBICONV"
 ])

diff --git a/zfs/config/user-libaio.m4 b/zfs/config/user-libaio.m4
index d7a7cb5..95c144d 100644
--- a/zfs/config/user-libaio.m4
+++ b/zfs/config/user-libaio.m4

@@ -2,13 +2,5 @@
 dnl # Check for libaio - only used for libaiot test cases.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_LIBAIO], [
-	LIBAIO=
-
-	AC_CHECK_HEADER([libaio.h], [
-	    user_libaio=yes
-	    AC_SUBST([LIBAIO], ["-laio"])
-	    AC_DEFINE([HAVE_LIBAIO], 1, [Define if you have libaio])
-	], [
-	    user_libaio=no
-	])
+	ZFS_AC_FIND_SYSTEM_LIBRARY(LIBAIO, [], [libaio.h], [], [aio], [], [user_libaio=yes], [user_libaio=no])
 ])

diff --git a/zfs/config/user-libatomic.m4 b/zfs/config/user-libatomic.m4
new file mode 100644
index 0000000..d15069f
--- /dev/null
+++ b/zfs/config/user-libatomic.m4

@@ -0,0 +1,28 @@
+dnl #
+dnl # If -latomic exists and atomic.c doesn't link without it,
+dnl # it's needed for __atomic intrinsics.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBATOMIC], [
+	AC_MSG_CHECKING([whether -latomic is required])
+
+	saved_libs="$LIBS"
+	LIBS="$LIBS -latomic"
+	LIBATOMIC_LIBS=""
+
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])], [
+		LIBS="$saved_libs"
+		saved_cflags="$CFLAGS"
+		CFLAGS="$CFLAGS -isystem lib/libspl/include"
+		AC_LINK_IFELSE([AC_LANG_PROGRAM([#include "lib/libspl/atomic.c"], [])], [], [LIBATOMIC_LIBS="-latomic"])
+		CFLAGS="$saved_cflags"
+	])
+
+	if test -n "$LIBATOMIC_LIBS"; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+	fi
+
+	LIBS="$saved_libs"
+	AC_SUBST([LIBATOMIC_LIBS])
+])

diff --git a/zfs/config/user-libblkid.m4 b/zfs/config/user-libblkid.m4
index 88e6f99..f2016dc 100644
--- a/zfs/config/user-libblkid.m4
+++ b/zfs/config/user-libblkid.m4

@@ -3,11 +3,7 @@
 dnl # has existing in blkid since 2008.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_LIBBLKID], [
-	LIBBLKID=
-
-	AC_CHECK_HEADER([blkid/blkid.h], [], [AC_MSG_FAILURE([
-	*** blkid.h missing, libblkid-devel package required])])
-
-	AC_SUBST([LIBBLKID], ["-lblkid"])
-	AC_DEFINE([HAVE_LIBBLKID], 1, [Define if you have libblkid])
+	ZFS_AC_FIND_SYSTEM_LIBRARY(LIBBLKID, [blkid], [blkid/blkid.h], [], [blkid], [], [], [
+		AC_MSG_FAILURE([
+		*** blkid.h missing, libblkid-devel package required])])
 ])

diff --git a/zfs/config/user-libcrypto.m4 b/zfs/config/user-libcrypto.m4
new file mode 100644
index 0000000..7293e1b
--- /dev/null
+++ b/zfs/config/user-libcrypto.m4

@@ -0,0 +1,8 @@
+dnl #
+dnl # Check for libcrypto. Used for userspace password derivation via PBKDF2.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBCRYPTO], [
+	ZFS_AC_FIND_SYSTEM_LIBRARY(LIBCRYPTO, [libcrypto], [openssl/evp.h], [], [crypto], [PKCS5_PBKDF2_HMAC_SHA1], [], [
+		AC_MSG_FAILURE([
+		*** evp.h missing, libssl-devel package required])])
+])

diff --git a/zfs/config/user-libexec.m4 b/zfs/config/user-libexec.m4
index 31bcea3..5379c25 100644
--- a/zfs/config/user-libexec.m4
+++ b/zfs/config/user-libexec.m4

@@ -1,6 +1,6 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_ZFSEXEC], [
 	AC_ARG_WITH(zfsexecdir,
-		AC_HELP_STRING([--with-zfsexecdir=DIR],
+		AS_HELP_STRING([--with-zfsexecdir=DIR],
 		[install scripts [[@<:@libexecdir@:>@/zfs]]]),
 		[zfsexecdir=$withval],
 		[zfsexecdir="${libexecdir}/zfs"])

diff --git a/zfs/config/user-libfetch.m4 b/zfs/config/user-libfetch.m4
new file mode 100644
index 0000000..d961c6c
--- /dev/null
+++ b/zfs/config/user-libfetch.m4

@@ -0,0 +1,71 @@
+dnl #
+dnl # Check for a libfetch - either fetch(3) or libcurl.
+dnl #
+dnl # There are two configuration dimensions:
+dnl #   * fetch(3) vs libcurl
+dnl #   * static vs dynamic
+dnl #
+dnl # fetch(3) is only dynamic.
+dnl # We use sover 6, which first appeared in FreeBSD 8.0-RELEASE.
+dnl #
+dnl # libcurl development packages include curl-config(1) – we want:
+dnl #   * HTTPS support
+dnl #   * version at least 7.16 (October 2006), for sover 4
+dnl #   * to decide if it's static or not
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBFETCH], [
+	AC_MSG_CHECKING([for libfetch])
+	LIBFETCH_LIBS=
+	LIBFETCH_IS_FETCH=0
+	LIBFETCH_IS_LIBCURL=0
+	LIBFETCH_DYNAMIC=0
+	LIBFETCH_SONAME=
+	have_libfetch=
+
+	saved_libs="$LIBS"
+	LIBS="$LIBS -lfetch"
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+		#include <sys/param.h>
+		#include <stdio.h>
+		#include <fetch.h>
+	]], [fetchGetURL("", "");])], [
+		have_libfetch=1
+		LIBFETCH_IS_FETCH=1
+		LIBFETCH_DYNAMIC=1
+		LIBFETCH_SONAME="libfetch.so.6"
+		LIBFETCH_LIBS="-ldl"
+		AC_MSG_RESULT([fetch(3)])
+	], [])
+	LIBS="$saved_libs"
+
+	if test -z "$have_libfetch"; then
+		if curl-config --protocols 2>/dev/null | grep -q HTTPS &&
+		    test "$(printf "%u" "0x$(curl-config --vernum)")" -ge "$(printf "%u" "0x071000")"; then
+			have_libfetch=1
+			LIBFETCH_IS_LIBCURL=1
+			if test "$(curl-config --built-shared)" = "yes"; then
+				LIBFETCH_DYNAMIC=1
+				LIBFETCH_SONAME="libcurl.so.4"
+				LIBFETCH_LIBS="-ldl"
+				AC_MSG_RESULT([libcurl])
+			else
+				LIBFETCH_LIBS="$(curl-config --libs)"
+				AC_MSG_RESULT([libcurl (static)])
+			fi
+
+			CCFLAGS="$CCFLAGS $(curl-config --cflags)"
+		fi
+	fi
+
+	if test -z "$have_libfetch"; then
+		AC_MSG_RESULT([none])
+	fi
+
+	AC_SUBST([LIBFETCH_LIBS])
+	AC_SUBST([LIBFETCH_DYNAMIC])
+	AC_SUBST([LIBFETCH_SONAME])
+	AC_DEFINE_UNQUOTED([LIBFETCH_IS_FETCH], [$LIBFETCH_IS_FETCH], [libfetch is fetch(3)])
+	AC_DEFINE_UNQUOTED([LIBFETCH_IS_LIBCURL], [$LIBFETCH_IS_LIBCURL], [libfetch is libcurl])
+	AC_DEFINE_UNQUOTED([LIBFETCH_DYNAMIC], [$LIBFETCH_DYNAMIC], [whether the chosen libfetch is to be loaded at run-time])
+	AC_DEFINE_UNQUOTED([LIBFETCH_SONAME], ["$LIBFETCH_SONAME"], [soname of chosen libfetch])
+])

diff --git a/zfs/config/user-libssl.m4 b/zfs/config/user-libssl.m4
deleted file mode 100644
index f682451..0000000
--- a/zfs/config/user-libssl.m4
+++ /dev/null

@@ -1,12 +0,0 @@
-dnl #
-dnl # Check for libssl. Used for userspace password derivation via PBKDF2.
-dnl #
-AC_DEFUN([ZFS_AC_CONFIG_USER_LIBSSL], [
-	LIBSSL=
-
-	AC_CHECK_HEADER([openssl/evp.h], [], [AC_MSG_FAILURE([
-	*** evp.h missing, libssl-devel package required])])
-
-	AC_SUBST([LIBSSL], ["-lssl -lcrypto"])
-	AC_DEFINE([HAVE_LIBSSL], 1, [Define if you have libssl])
-])

diff --git a/zfs/config/user-libtirpc.m4 b/zfs/config/user-libtirpc.m4
index 19c02c9..aa7ab4a 100644
--- a/zfs/config/user-libtirpc.m4
+++ b/zfs/config/user-libtirpc.m4

@@ -19,7 +19,7 @@
         ])
 
 	AS_IF([test "x$have_xdr" = "x"], [
-            FIND_SYSTEM_LIBRARY(LIBTIRPC, [libtirpc], [rpc/xdr.h], [tirpc], [tirpc], [xdrmem_create], [], [
+            ZFS_AC_FIND_SYSTEM_LIBRARY(LIBTIRPC, [libtirpc], [rpc/xdr.h], [tirpc], [tirpc], [xdrmem_create], [], [
 		AS_IF([test "x$with_tirpc" = "xyes"], [
 		    AC_MSG_FAILURE([--with-tirpc was given, but libtirpc is not available, try installing libtirpc-devel])
 		],[dnl ELSE

diff --git a/zfs/config/user-libudev.m4 b/zfs/config/user-libudev.m4
index 9b74549..8c3c1d7 100644
--- a/zfs/config/user-libudev.m4
+++ b/zfs/config/user-libudev.m4

@@ -2,18 +2,16 @@
 dnl # Check for libudev - needed for vdev auto-online and auto-replace
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_LIBUDEV], [
-	LIBUDEV=
+	ZFS_AC_FIND_SYSTEM_LIBRARY(LIBUDEV, [libudev], [libudev.h], [], [udev], [], [user_libudev=yes], [user_libudev=no])
 
-	AC_CHECK_HEADER([libudev.h], [
-	    user_libudev=yes
-	    AC_SUBST([LIBUDEV], ["-ludev"])
-	    AC_DEFINE([HAVE_LIBUDEV], 1, [Define if you have libudev])
-	], [
-	    user_libudev=no
+	AS_IF([test "x$user_libudev" = xyes], [
+	    AX_SAVE_FLAGS
+
+	    CFLAGS="$CFLAGS $LIBUDEV_CFLAGS"
+	    LIBS="$LIBUDEV_LIBS $LIBS"
+
+	    AC_CHECK_FUNCS([udev_device_get_is_initialized])
+
+	    AX_RESTORE_FLAGS
 	])
-
-	AC_SEARCH_LIBS([udev_device_get_is_initialized], [udev], [
-	    AC_DEFINE([HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED], 1, [
-	    Define if udev_device_get_is_initialized is available])], [])
-
 ])

diff --git a/zfs/config/user-libuuid.m4 b/zfs/config/user-libuuid.m4
index f0da671..0cfa83c 100644
--- a/zfs/config/user-libuuid.m4
+++ b/zfs/config/user-libuuid.m4

@@ -2,17 +2,7 @@
 dnl # Check for libuuid
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_LIBUUID], [
-	LIBUUID=
-
-	AC_CHECK_HEADER([uuid/uuid.h], [], [AC_MSG_FAILURE([
-	*** uuid/uuid.h missing, libuuid-devel package required])])
-
-	AC_SEARCH_LIBS([uuid_generate], [uuid], [], [AC_MSG_FAILURE([
-	*** uuid_generate() missing, libuuid-devel package required])])
-
-	AC_SEARCH_LIBS([uuid_is_null], [uuid], [], [AC_MSG_FAILURE([
-	*** uuid_is_null() missing, libuuid-devel package required])])
-
-	AC_SUBST([LIBUUID], ["-luuid"])
-	AC_DEFINE([HAVE_LIBUUID], 1, [Define if you have libuuid])
+	ZFS_AC_FIND_SYSTEM_LIBRARY(LIBUUID, [uuid], [uuid/uuid.h], [], [uuid], [uuid_generate uuid_is_null], [], [
+	    AC_MSG_FAILURE([*** libuuid-devel package required])
+	])
 ])

diff --git a/zfs/config/user-makedev.m4 b/zfs/config/user-makedev.m4
index 4383681..8986107 100644
--- a/zfs/config/user-makedev.m4
+++ b/zfs/config/user-makedev.m4

@@ -3,13 +3,12 @@
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_MAKEDEV_IN_SYSMACROS], [
 	AC_MSG_CHECKING([makedev() is declared in sys/sysmacros.h])
-	AC_TRY_COMPILE(
-	[
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 		#include <sys/sysmacros.h>
-	],[
+	]], [[
 		int k;
 		k = makedev(0,0);
-	],[
+	]])],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_MAKEDEV_IN_SYSMACROS, 1,
 		    [makedev() is declared in sys/sysmacros.h])
@@ -23,13 +22,12 @@
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV], [
 	AC_MSG_CHECKING([makedev() is declared in sys/mkdev.h])
-	AC_TRY_COMPILE(
-	[
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 		#include <sys/mkdev.h>
-	],[
+	]], [[
 		int k;
 		k = makedev(0,0);
-	],[
+	]])],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_MAKEDEV_IN_MKDEV, 1,
 		    [makedev() is declared in sys/mkdev.h])

diff --git a/zfs/config/user-pam.m4 b/zfs/config/user-pam.m4
new file mode 100644
index 0000000..9db3580
--- /dev/null
+++ b/zfs/config/user-pam.m4

@@ -0,0 +1,38 @@
+AC_DEFUN([ZFS_AC_CONFIG_USER_PAM], [
+	AC_ARG_ENABLE([pam],
+		AS_HELP_STRING([--enable-pam],
+		[install pam_zfs_key module [[default: check]]]),
+		[enable_pam=$enableval],
+		[enable_pam=check])
+
+	AC_ARG_WITH(pammoduledir,
+		AS_HELP_STRING([--with-pammoduledir=DIR],
+		[install pam module in dir [[$libdir/security]]]),
+		[pammoduledir="$withval"],[pammoduledir=$libdir/security])
+
+	AC_ARG_WITH(pamconfigsdir,
+		AS_HELP_STRING([--with-pamconfigsdir=DIR],
+		[install pam-config files in dir [DATADIR/pam-configs]]),
+		[pamconfigsdir="$withval"],
+		[pamconfigsdir='${datadir}/pam-configs'])
+
+	AS_IF([test "x$enable_pam" != "xno"], [
+		AC_CHECK_HEADERS([security/pam_modules.h], [
+			enable_pam=yes
+		], [
+			AS_IF([test "x$enable_pam" = "xyes"], [
+				AC_MSG_FAILURE([
+	*** security/pam_modules.h missing, libpam0g-dev package required
+				])
+			],[
+				enable_pam=no
+			])
+		])
+	])
+	AS_IF([test "x$enable_pam" = "xyes"], [
+		DEFINE_PAM='--with pam'
+	])
+	AC_SUBST(DEFINE_PAM)
+	AC_SUBST(pammoduledir)
+	AC_SUBST(pamconfigsdir)
+])

diff --git a/zfs/config/user-systemd.m4 b/zfs/config/user-systemd.m4
index 3e6a4a2..63f02ad 100644
--- a/zfs/config/user-systemd.m4
+++ b/zfs/config/user-systemd.m4

@@ -1,27 +1,27 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_SYSTEMD], [
 	AC_ARG_ENABLE(systemd,
-		AC_HELP_STRING([--enable-systemd],
+		AS_HELP_STRING([--enable-systemd],
 		[install systemd unit/preset files [[default: yes]]]),
 		[enable_systemd=$enableval],
 		[enable_systemd=check])
 
 	AC_ARG_WITH(systemdunitdir,
-		AC_HELP_STRING([--with-systemdunitdir=DIR],
+		AS_HELP_STRING([--with-systemdunitdir=DIR],
 		[install systemd unit files in dir [[/usr/lib/systemd/system]]]),
 		systemdunitdir=$withval,systemdunitdir=/usr/lib/systemd/system)
 
 	AC_ARG_WITH(systemdpresetdir,
-		AC_HELP_STRING([--with-systemdpresetdir=DIR],
+		AS_HELP_STRING([--with-systemdpresetdir=DIR],
 		[install systemd preset files in dir [[/usr/lib/systemd/system-preset]]]),
 		systemdpresetdir=$withval,systemdpresetdir=/usr/lib/systemd/system-preset)
 
 	AC_ARG_WITH(systemdmodulesloaddir,
-		AC_HELP_STRING([--with-systemdmodulesloaddir=DIR],
+		AS_HELP_STRING([--with-systemdmodulesloaddir=DIR],
 		[install systemd module load files into dir [[/usr/lib/modules-load.d]]]),
 		systemdmodulesloaddir=$withval,systemdmodulesloaddir=/usr/lib/modules-load.d)
 
 	AC_ARG_WITH(systemdgeneratordir,
-		AC_HELP_STRING([--with-systemdgeneratordir=DIR],
+		AS_HELP_STRING([--with-systemdgeneratordir=DIR],
 		[install systemd generators in dir [[/usr/lib/systemd/system-generators]]]),
 		systemdgeneratordir=$withval,systemdgeneratordir=/usr/lib/systemd/system-generators)
 

diff --git a/zfs/config/user-sysvinit.m4 b/zfs/config/user-sysvinit.m4
index 65dcc38..b6b63f1 100644
--- a/zfs/config/user-sysvinit.m4
+++ b/zfs/config/user-sysvinit.m4

@@ -1,6 +1,6 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_SYSVINIT], [
 	AC_ARG_ENABLE(sysvinit,
-		AC_HELP_STRING([--enable-sysvinit],
+		AS_HELP_STRING([--enable-sysvinit],
 		[install SysV init scripts [default: yes]]),
 		[],enable_sysvinit=yes)
 

diff --git a/zfs/config/user-udev.m4 b/zfs/config/user-udev.m4
index 65dc79f..e6120fc 100644
--- a/zfs/config/user-udev.m4
+++ b/zfs/config/user-udev.m4

@@ -1,7 +1,7 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_UDEV], [
 	AC_MSG_CHECKING(for udev directories)
 	AC_ARG_WITH(udevdir,
-		AC_HELP_STRING([--with-udevdir=DIR],
+		AS_HELP_STRING([--with-udevdir=DIR],
 		[install udev helpers @<:@default=check@:>@]),
 		[udevdir=$withval],
 		[udevdir=check])
@@ -18,7 +18,7 @@
 	])
 
 	AC_ARG_WITH(udevruledir,
-		AC_HELP_STRING([--with-udevruledir=DIR],
+		AS_HELP_STRING([--with-udevruledir=DIR],
 		[install udev rules [[UDEVDIR/rules.d]]]),
 		[udevruledir=$withval],
 		[udevruledir="${udevdir}/rules.d"])

diff --git a/zfs/config/user-zlib.m4 b/zfs/config/user-zlib.m4
index 82c0962..1f37928 100644
--- a/zfs/config/user-zlib.m4
+++ b/zfs/config/user-zlib.m4

@@ -2,20 +2,7 @@
 dnl # Check for zlib
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_ZLIB], [
-	ZLIB=
-
-	AC_CHECK_HEADER([zlib.h], [], [AC_MSG_FAILURE([
-	*** zlib.h missing, zlib-devel package required])])
-
-	AC_SEARCH_LIBS([compress2], [z], [], [AC_MSG_FAILURE([
-	*** compress2() missing, zlib-devel package required])])
-
-	AC_SEARCH_LIBS([uncompress], [z], [], [AC_MSG_FAILURE([
-	*** uncompress() missing, zlib-devel package required])])
-
-	AC_SEARCH_LIBS([crc32], [z], [], [AC_MSG_FAILURE([
-	*** crc32() missing, zlib-devel package required])])
-
-	AC_SUBST([ZLIB], ["-lz"])
-	AC_DEFINE([HAVE_ZLIB], 1, [Define if you have zlib])
+	ZFS_AC_FIND_SYSTEM_LIBRARY(ZLIB, [zlib], [zlib.h], [], [z], [compress2 uncompress crc32], [], [
+	    AC_MSG_FAILURE([*** zlib-devel package required])
+	])
 ])

diff --git a/zfs/config/user.m4 b/zfs/config/user.m4
index 3d97e9a..670820b 100644
--- a/zfs/config/user.m4
+++ b/zfs/config/user.m4

@@ -4,25 +4,34 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER], [
 	ZFS_AC_CONFIG_USER_GETTEXT
 	ZFS_AC_CONFIG_USER_MOUNT_HELPER
-	ZFS_AC_CONFIG_USER_UDEV
-	ZFS_AC_CONFIG_USER_SYSTEMD
 	ZFS_AC_CONFIG_USER_SYSVINIT
 	ZFS_AC_CONFIG_USER_DRACUT
+	AM_COND_IF([BUILD_FREEBSD], [
+		PKG_INSTALLDIR(['${prefix}/libdata/pkgconfig'])], [
+		PKG_INSTALLDIR
+	])
 	ZFS_AC_CONFIG_USER_ZLIB
-	ZFS_AC_CONFIG_USER_LIBUUID
+	AM_COND_IF([BUILD_LINUX], [
+		ZFS_AC_CONFIG_USER_UDEV
+		ZFS_AC_CONFIG_USER_SYSTEMD
+		ZFS_AC_CONFIG_USER_LIBUUID
+		ZFS_AC_CONFIG_USER_LIBBLKID
+	])
 	ZFS_AC_CONFIG_USER_LIBTIRPC
-	ZFS_AC_CONFIG_USER_LIBBLKID
 	ZFS_AC_CONFIG_USER_LIBUDEV
-	ZFS_AC_CONFIG_USER_LIBSSL
+	ZFS_AC_CONFIG_USER_LIBCRYPTO
 	ZFS_AC_CONFIG_USER_LIBAIO
+	ZFS_AC_CONFIG_USER_LIBATOMIC
+	ZFS_AC_CONFIG_USER_LIBFETCH
+	ZFS_AC_CONFIG_USER_CLOCK_GETTIME
+	ZFS_AC_CONFIG_USER_PAM
 	ZFS_AC_CONFIG_USER_RUNSTATEDIR
 	ZFS_AC_CONFIG_USER_MAKEDEV_IN_SYSMACROS
 	ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV
 	ZFS_AC_CONFIG_USER_ZFSEXEC
-
 	ZFS_AC_TEST_FRAMEWORK
 
-	AC_CHECK_FUNCS([mlockall strlcat strlcpy])
+	AC_CHECK_FUNCS([issetugid mlockall strlcat strlcpy])
 ])
 
 dnl #

diff --git a/zfs/config/zfs-build.m4 b/zfs/config/zfs-build.m4
index 92aa603..25987f9 100644
--- a/zfs/config/zfs-build.m4
+++ b/zfs/config/zfs-build.m4

@@ -11,6 +11,7 @@
 	DEBUG_CPPFLAGS="-DDEBUG -UNDEBUG"
 	DEBUG_LDFLAGS=""
 	DEBUG_ZFS="_with_debug"
+	WITH_DEBUG="true"
 	AC_DEFINE(ZFS_DEBUG, 1, [zfs debugging enabled])
 
 	KERNEL_DEBUG_CFLAGS="-Werror"
@@ -22,6 +23,7 @@
 	DEBUG_CPPFLAGS="-UDEBUG -DNDEBUG"
 	DEBUG_LDFLAGS=""
 	DEBUG_ZFS="_without_debug"
+	WITH_DEBUG=""
 
 	KERNEL_DEBUG_CFLAGS=""
 	KERNEL_DEBUG_CPPFLAGS="-UDEBUG -DNDEBUG"
@@ -32,6 +34,9 @@
 dnl # - Enable all ASSERTs (-DDEBUG)
 dnl # - Promote all compiler warnings to errors (-Werror)
 dnl #
+dnl # (If INVARIANTS is detected, we need to force DEBUG, or strange panics
+dnl # can ensue.)
+dnl #
 AC_DEFUN([ZFS_AC_DEBUG], [
 	AC_MSG_CHECKING([whether assertion support will be enabled])
 	AC_ARG_ENABLE([debug],
@@ -47,10 +52,25 @@
 		[ZFS_AC_DEBUG_DISABLE],
 		[AC_MSG_ERROR([Unknown option $enable_debug])])
 
+	AS_CASE(["x$enable_invariants"],
+		["xyes"],
+		[],
+		["xno"],
+		[],
+		[ZFS_AC_DEBUG_INVARIANTS_DETECT])
+
+	AS_CASE(["x$enable_invariants"],
+		["xyes"],
+		[ZFS_AC_DEBUG_ENABLE],
+		["xno"],
+		[],
+		[AC_MSG_ERROR([Unknown option $enable_invariants])])
+
 	AC_SUBST(DEBUG_CFLAGS)
 	AC_SUBST(DEBUG_CPPFLAGS)
 	AC_SUBST(DEBUG_LDFLAGS)
 	AC_SUBST(DEBUG_ZFS)
+	AC_SUBST(WITH_DEBUG)
 
 	AC_SUBST(KERNEL_DEBUG_CFLAGS)
 	AC_SUBST(KERNEL_DEBUG_CPPFLAGS)
@@ -59,9 +79,9 @@
 ])
 
 AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [
-	DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline"
+	DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA"
 
-	KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline"
+	KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA"
 	KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y"
 
 	DEBUGINFO_ZFS="_with_debuginfo"
@@ -110,7 +130,7 @@
 		[enable_debug_kmem=no])
 
 	AS_IF([test "x$enable_debug_kmem" = xyes], [
-		KERNEL_DEBUG_CPPFLAGS+=" -DDEBUG_KMEM"
+		KERNEL_DEBUG_CPPFLAGS="${KERNEL_DEBUG_CPPFLAGS} -DDEBUG_KMEM"
 		DEBUG_KMEM_ZFS="_with_debug_kmem"
 	], [
 		DEBUG_KMEM_ZFS="_without_debug_kmem"
@@ -140,7 +160,7 @@
 		[enable_debug_kmem_tracking=no])
 
 	AS_IF([test "x$enable_debug_kmem_tracking" = xyes], [
-		KERNEL_DEBUG_CPPFLAGS+=" -DDEBUG_KMEM_TRACKING"
+		KERNEL_DEBUG_CPPFLAGS="${KERNEL_DEBUG_CPPFLAGS} -DDEBUG_KMEM_TRACKING"
 		DEBUG_KMEM_TRACKING_ZFS="_with_debug_kmem_tracking"
 	], [
 		DEBUG_KMEM_TRACKING_ZFS="_without_debug_kmem_tracking"
@@ -152,17 +172,62 @@
 	AC_MSG_RESULT([$enable_debug_kmem_tracking])
 ])
 
+AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS_DETECT_FREEBSD], [
+	AS_IF([sysctl -n kern.conftxt | grep -Fqx $'options\tINVARIANTS'],
+		[enable_invariants="yes"],
+		[enable_invariants="no"])
+])
+
+AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS_DETECT], [
+	AM_COND_IF([BUILD_FREEBSD],
+		[ZFS_AC_DEBUG_INVARIANTS_DETECT_FREEBSD],
+		[enable_invariants="no"])
+])
+
+dnl #
+dnl # Detected for the running kernel by default, enables INVARIANTS features
+dnl # in the FreeBSD kernel module.  This feature must be used when building
+dnl # for a FreeBSD kernel with "options INVARIANTS" in the KERNCONF and must
+dnl # not be used when the INVARIANTS option is absent.
+dnl #
+AC_DEFUN([ZFS_AC_DEBUG_INVARIANTS], [
+	AC_MSG_CHECKING([whether FreeBSD kernel INVARIANTS checks are enabled])
+	AC_ARG_ENABLE([invariants],
+		[AS_HELP_STRING([--enable-invariants],
+		[Enable FreeBSD kernel INVARIANTS checks [[default: detect]]])],
+		[], [ZFS_AC_DEBUG_INVARIANTS_DETECT])
+
+	AS_IF([test "x$enable_invariants" = xyes],
+		[WITH_INVARIANTS="true"],
+		[WITH_INVARIANTS=""])
+	AC_SUBST(WITH_INVARIANTS)
+
+	AC_MSG_RESULT([$enable_invariants])
+])
+
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
-	ZFS_AC_CONFIG_ALWAYS_CC_NO_UNUSED_BUT_SET_VARIABLE
-	ZFS_AC_CONFIG_ALWAYS_CC_NO_BOOL_COMPARE
+	AX_COUNT_CPUS([])
+	AC_SUBST(CPU_COUNT)
+
+	ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED
+	ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION
+	ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH
 	ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION
+	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
+	ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
+	ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA
 	ZFS_AC_CONFIG_ALWAYS_CC_ASAN
 	ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
+	ZFS_AC_CONFIG_ALWAYS_SYSTEM
 	ZFS_AC_CONFIG_ALWAYS_ARCH
 	ZFS_AC_CONFIG_ALWAYS_PYTHON
 	ZFS_AC_CONFIG_ALWAYS_PYZFS
+	ZFS_AC_CONFIG_ALWAYS_SED
+	ZFS_AC_CONFIG_ALWAYS_CPPCHECK
+	ZFS_AC_CONFIG_ALWAYS_SHELLCHECK
+	ZFS_AC_CONFIG_ALWAYS_PARALLEL
 ])
 
 AC_DEFUN([ZFS_AC_CONFIG], [
@@ -170,20 +235,13 @@
         dnl # Remove the previous build test directory.
         rm -Rf build
 
-	AC_ARG_VAR([TEST_JOBS],
-	    [simultaneous jobs during configure (defaults to $(nproc))])
-	if test "x$ac_cv_env_TEST_JOBS_set" != "xset"; then
-		TEST_JOBS=$(nproc)
-	fi
-	AC_SUBST(TEST_JOBS)
-
 	ZFS_CONFIG=all
 	AC_ARG_WITH([config],
 		AS_HELP_STRING([--with-config=CONFIG],
 		[Config file 'kernel|user|all|srpm']),
 		[ZFS_CONFIG="$withval"])
 	AC_ARG_ENABLE([linux-builtin],
-		[AC_HELP_STRING([--enable-linux-builtin],
+		[AS_HELP_STRING([--enable-linux-builtin],
 		[Configure for builtin in-tree kernel modules @<:@default=no@:>@])],
 		[],
 		[enable_linux_builtin=no])
@@ -194,6 +252,14 @@
 
 	ZFS_AC_CONFIG_ALWAYS
 
+	AM_COND_IF([BUILD_LINUX], [
+		AC_ARG_VAR([TEST_JOBS], [simultaneous jobs during configure])
+		if test "x$ac_cv_env_TEST_JOBS_set" != "xset"; then
+			TEST_JOBS=$CPU_COUNT
+		fi
+		AC_SUBST(TEST_JOBS)
+	])
+
 	case "$ZFS_CONFIG" in
 		kernel) ZFS_AC_CONFIG_KERNEL ;;
 		user)	ZFS_AC_CONFIG_USER   ;;
@@ -216,6 +282,7 @@
 	    [test "x$qatsrc" != x ])
 	AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ])
 	AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ])
+	AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes])
 ])
 
 dnl #
@@ -253,12 +320,14 @@
 	])
 
 	RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1"'
-	RPM_DEFINE_COMMON+=' --define "$(DEBUG_KMEM_ZFS) 1"'
-	RPM_DEFINE_COMMON+=' --define "$(DEBUG_KMEM_TRACKING_ZFS) 1"'
-	RPM_DEFINE_COMMON+=' --define "$(DEBUGINFO_ZFS) 1"'
-	RPM_DEFINE_COMMON+=' --define "$(ASAN_ZFS) 1"'
+	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUGINFO_ZFS) 1"'
+	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_ZFS) 1"'
+	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_TRACKING_ZFS) 1"'
+	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(ASAN_ZFS) 1"'
 
-	RPM_DEFINE_UTIL=' --define "_initconfdir $(DEFAULT_INITCONF_DIR)"'
+	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"'
+
+	RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"'
 
 	dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since
 	dnl # their values may not be set when running:
@@ -266,19 +335,19 @@
 	dnl #	./configure --with-config=srpm
 	dnl #
 	AS_IF([test -n "$dracutdir" ], [
-		RPM_DEFINE_UTIL='--define "_dracutdir $(dracutdir)"'
+		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_dracutdir $(dracutdir)"'
 	])
 	AS_IF([test -n "$udevdir" ], [
-		RPM_DEFINE_UTIL+=' --define "_udevdir $(udevdir)"'
+		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevdir $(udevdir)"'
 	])
 	AS_IF([test -n "$udevruledir" ], [
-		RPM_DEFINE_UTIL+=' --define "_udevdir $(udevruledir)"'
+		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
 	])
-	RPM_DEFINE_UTIL+=' $(DEFINE_INITRAMFS)'
-	RPM_DEFINE_UTIL+=' $(DEFINE_SYSTEMD)'
-	RPM_DEFINE_UTIL+=' $(DEFINE_PYZFS)'
-	RPM_DEFINE_UTIL+=' $(DEFINE_PYTHON_VERSION)'
-	RPM_DEFINE_UTIL+=' $(DEFINE_PYTHON_PKG_VERSION)'
+	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
+	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
+	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
+	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYTHON_VERSION)'
+	RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYTHON_PKG_VERSION)'
 
 	dnl # Override default lib directory on Debian/Ubuntu systems.  The
 	dnl # provided /usr/lib/rpm/platform/<arch>/macros files do not
@@ -290,14 +359,23 @@
 	dnl #
 	AS_IF([test "$DEFAULT_PACKAGE" = "deb"], [
 		MULTIARCH_LIBDIR="lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)"
-		RPM_DEFINE_UTIL+=' --define "_lib $(MULTIARCH_LIBDIR)"'
+		RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_lib $(MULTIARCH_LIBDIR)"'
 		AC_SUBST(MULTIARCH_LIBDIR)
 	])
 
-	RPM_DEFINE_KMOD='--define "kernels $(LINUX_VERSION)"'
-	RPM_DEFINE_KMOD+=' --define "ksrc $(LINUX)"'
-	RPM_DEFINE_KMOD+=' --define "kobj $(LINUX_OBJ)"'
-	RPM_DEFINE_KMOD+=' --define "_wrong_version_format_terminate_build 0"'
+	dnl # Make RPM_DEFINE_KMOD additions conditional on CONFIG_KERNEL,
+	dnl # since the values will not be set otherwise. The spec files
+	dnl # provide defaults for them.
+	dnl #
+	RPM_DEFINE_KMOD='--define "_wrong_version_format_terminate_build 0"'
+	AM_COND_IF([CONFIG_KERNEL], [
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernels $(LINUX_VERSION)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "ksrc $(LINUX)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kobj $(LINUX_OBJ)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_cc KERNEL_CC=$(KERNEL_CC)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_ld KERNEL_LD=$(KERNEL_LD)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_llvm KERNEL_LLVM=$(KERNEL_LLVM)"'
+	])
 
 	RPM_DEFINE_DKMS=''
 
@@ -385,6 +463,9 @@
 	AC_MSG_CHECKING([whether $ALIEN is available])
 	AS_IF([tmp=$($ALIEN --version 2>/dev/null)], [
 		ALIEN_VERSION=$(echo $tmp | $AWK '{ print $[3] }')
+		ALIEN_MAJOR=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[1] }')
+		ALIEN_MINOR=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[2] }')
+		ALIEN_POINT=$(echo ${ALIEN_VERSION} | $AWK -F'.' '{ print $[3] }')
 		HAVE_ALIEN=yes
 		AC_MSG_RESULT([$HAVE_ALIEN ($ALIEN_VERSION)])
 	],[
@@ -395,6 +476,9 @@
 	AC_SUBST(HAVE_ALIEN)
 	AC_SUBST(ALIEN)
 	AC_SUBST(ALIEN_VERSION)
+	AC_SUBST(ALIEN_MAJOR)
+	AC_SUBST(ALIEN_MINOR)
+	AC_SUBST(ALIEN_POINT)
 ])
 
 dnl #
@@ -402,32 +486,46 @@
 dnl # package type for 'make pkg': (rpm | deb | tgz)
 dnl #
 AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
-	AC_MSG_CHECKING([linux distribution])
-	if test -f /etc/toss-release ; then
-		VENDOR=toss ;
-	elif test -f /etc/fedora-release ; then
-		VENDOR=fedora ;
-	elif test -f /etc/redhat-release ; then
-		VENDOR=redhat ;
-	elif test -f /etc/gentoo-release ; then
-		VENDOR=gentoo ;
-	elif test -f /etc/arch-release ; then
-		VENDOR=arch ;
-	elif test -f /etc/SuSE-release ; then
-		VENDOR=sles ;
-	elif test -f /etc/slackware-version ; then
-		VENDOR=slackware ;
-	elif test -f /etc/lunar.release ; then
-		VENDOR=lunar ;
-	elif test -f /etc/lsb-release ; then
-		VENDOR=ubuntu ;
-	elif test -f /etc/debian_version ; then
-		VENDOR=debian ;
-	elif test -f /etc/alpine-release ; then
-		VENDOR=alpine ;
-	else
-		VENDOR= ;
-	fi
+	AC_MSG_CHECKING([os distribution])
+	AC_ARG_WITH([vendor],
+		[AS_HELP_STRING([--with-vendor],
+			[Distribution vendor @<:@default=check@:>@])],
+		[with_vendor=$withval],
+		[with_vendor=check])
+	AS_IF([test "x$with_vendor" = "xcheck"],[
+		if test -f /etc/toss-release ; then
+			VENDOR=toss ;
+		elif test -f /etc/fedora-release ; then
+			VENDOR=fedora ;
+		elif test -f /etc/redhat-release ; then
+			VENDOR=redhat ;
+		elif test -f /etc/gentoo-release ; then
+			VENDOR=gentoo ;
+		elif test -f /etc/arch-release ; then
+			VENDOR=arch ;
+		elif test -f /etc/SuSE-release ; then
+			VENDOR=sles ;
+		elif test -f /etc/slackware-version ; then
+			VENDOR=slackware ;
+		elif test -f /etc/lunar.release ; then
+			VENDOR=lunar ;
+		elif test -f /etc/lsb-release ; then
+			VENDOR=ubuntu ;
+		elif test -f /etc/debian_version ; then
+			VENDOR=debian ;
+		elif test -f /etc/alpine-release ; then
+			VENDOR=alpine ;
+		elif test -f /bin/freebsd-version ; then
+			VENDOR=freebsd ;
+		elif test -f /etc/openEuler-release ; then
+			VENDOR=openeuler ;
+		else
+			VENDOR= ;
+		fi],
+		[ test "x${with_vendor}" != x],[
+			VENDOR="$with_vendor" ],
+		[ VENDOR= ; ]
+	)
 	AC_MSG_RESULT([$VENDOR])
 	AC_SUBST(VENDOR)
 
@@ -444,58 +542,65 @@
 		lunar)      DEFAULT_PACKAGE=tgz  ;;
 		ubuntu)     DEFAULT_PACKAGE=deb  ;;
 		debian)     DEFAULT_PACKAGE=deb  ;;
+		freebsd)    DEFAULT_PACKAGE=pkg  ;;
+		openeuler)  DEFAULT_PACKAGE=rpm  ;;
 		*)          DEFAULT_PACKAGE=rpm  ;;
 	esac
 	AC_MSG_RESULT([$DEFAULT_PACKAGE])
 	AC_SUBST(DEFAULT_PACKAGE)
 
-	DEFAULT_INIT_DIR=$sysconfdir/init.d
 	AC_MSG_CHECKING([default init directory])
-	AC_MSG_RESULT([$DEFAULT_INIT_DIR])
-	AC_SUBST(DEFAULT_INIT_DIR)
-
-	AC_MSG_CHECKING([default init script type])
 	case "$VENDOR" in
-		toss)       DEFAULT_INIT_SCRIPT=redhat ;;
-		redhat)     DEFAULT_INIT_SCRIPT=redhat ;;
-		fedora)     DEFAULT_INIT_SCRIPT=fedora ;;
-		gentoo)     DEFAULT_INIT_SCRIPT=openrc ;;
-		alpine)     DEFAULT_INIT_SCRIPT=openrc ;;
-		arch)       DEFAULT_INIT_SCRIPT=lsb    ;;
-		sles)       DEFAULT_INIT_SCRIPT=lsb    ;;
-		slackware)  DEFAULT_INIT_SCRIPT=lsb    ;;
-		lunar)      DEFAULT_INIT_SCRIPT=lunar  ;;
-		ubuntu)     DEFAULT_INIT_SCRIPT=lsb    ;;
-		debian)     DEFAULT_INIT_SCRIPT=lsb    ;;
-		*)          DEFAULT_INIT_SCRIPT=lsb    ;;
+		freebsd)    initdir=$sysconfdir/rc.d  ;;
+		*)          initdir=$sysconfdir/init.d;;
 	esac
-	AC_MSG_RESULT([$DEFAULT_INIT_SCRIPT])
-	AC_SUBST(DEFAULT_INIT_SCRIPT)
+	AC_MSG_RESULT([$initdir])
+	AC_SUBST(initdir)
+
+	AC_MSG_CHECKING([default shell])
+	case "$VENDOR" in
+		gentoo)     DEFAULT_INIT_SHELL="/sbin/openrc-run";;
+		alpine)     DEFAULT_INIT_SHELL="/sbin/openrc-run";;
+		*)          DEFAULT_INIT_SHELL="/bin/sh"         ;;
+	esac
+
+	AC_MSG_RESULT([$DEFAULT_INIT_SHELL])
+	AC_SUBST(DEFAULT_INIT_SHELL)
+
+	AC_MSG_CHECKING([default nfs server init script])
+	AS_IF([test "$VENDOR" = "debian"],
+		[DEFAULT_INIT_NFS_SERVER="nfs-kernel-server"],
+		[DEFAULT_INIT_NFS_SERVER="nfs"]
+	)
+	AC_MSG_RESULT([$DEFAULT_INIT_NFS_SERVER])
+	AC_SUBST(DEFAULT_INIT_NFS_SERVER)
 
 	AC_MSG_CHECKING([default init config directory])
 	case "$VENDOR" in
-		alpine)     DEFAULT_INITCONF_DIR=/etc/conf.d    ;;
-		gentoo)     DEFAULT_INITCONF_DIR=/etc/conf.d    ;;
-		toss)       DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
-		redhat)     DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
-		fedora)     DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
-		sles)       DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
-		ubuntu)     DEFAULT_INITCONF_DIR=/etc/default   ;;
-		debian)     DEFAULT_INITCONF_DIR=/etc/default   ;;
-		*)          DEFAULT_INITCONF_DIR=/etc/default   ;;
+		alpine)     initconfdir=/etc/conf.d    ;;
+		gentoo)     initconfdir=/etc/conf.d    ;;
+		toss)       initconfdir=/etc/sysconfig ;;
+		redhat)     initconfdir=/etc/sysconfig ;;
+		fedora)     initconfdir=/etc/sysconfig ;;
+		sles)       initconfdir=/etc/sysconfig ;;
+		openeuler)  initconfdir=/etc/sysconfig ;;
+		ubuntu)     initconfdir=/etc/default   ;;
+		debian)     initconfdir=/etc/default   ;;
+		freebsd)    initconfdir=$sysconfdir/rc.conf.d;;
+		*)          initconfdir=/etc/default   ;;
 	esac
-	AC_MSG_RESULT([$DEFAULT_INITCONF_DIR])
-	AC_SUBST(DEFAULT_INITCONF_DIR)
+	AC_MSG_RESULT([$initconfdir])
+	AC_SUBST(initconfdir)
 
 	AC_MSG_CHECKING([whether initramfs-tools is available])
 	if test -d /usr/share/initramfs-tools ; then
-		DEFINE_INITRAMFS='--define "_initramfs 1"'
+		RPM_DEFINE_INITRAMFS='--define "_initramfs 1"'
 		AC_MSG_RESULT([yes])
 	else
-		DEFINE_INITRAMFS=''
+		RPM_DEFINE_INITRAMFS=''
 		AC_MSG_RESULT([no])
 	fi
-	AC_SUBST(DEFINE_INITRAMFS)
+	AC_SUBST(RPM_DEFINE_INITRAMFS)
 ])
 
 dnl #
@@ -503,7 +608,9 @@
 dnl #
 AC_DEFUN([ZFS_AC_PACKAGE], [
 	ZFS_AC_DEFAULT_PACKAGE
-	ZFS_AC_RPM
-	ZFS_AC_DPKG
-	ZFS_AC_ALIEN
+	AS_IF([test x$VENDOR != xfreebsd], [
+		ZFS_AC_RPM
+		ZFS_AC_DPKG
+		ZFS_AC_ALIEN
+	])
 ])

diff --git a/zfs/config/zfs-meta.m4 b/zfs/config/zfs-meta.m4
index b3c1bef..20064a0 100644
--- a/zfs/config/zfs-meta.m4
+++ b/zfs/config/zfs-meta.m4

@@ -73,14 +73,14 @@
 		if test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
 			_match="${ZFS_META_NAME}-${ZFS_META_VERSION}"
 			_alias=$(git describe --match=${_match} 2>/dev/null)
-			_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+			_release=$(echo ${_alias}|sed "s/${ZFS_META_NAME}//"|cut -f3- -d'-'|tr - _)
 			if test -n "${_release}"; then
 				ZFS_META_RELEASE=${_release}
 				_zfs_ac_meta_type="git describe"
 			else
 				_match="${ZFS_META_NAME}-${ZFS_META_VERSION}-${ZFS_META_RELEASE}"
 	                        _alias=$(git describe --match=${_match} 2>/dev/null)
-	                        _release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+				_release=$(echo ${_alias}|sed 's/${ZFS_META_NAME}//'|cut -f3- -d'-'|tr - _)
 				if test -n "${_release}"; then
 					ZFS_META_RELEASE=${_release}
 					_zfs_ac_meta_type="git describe"

diff --git a/zfs/configure.ac b/zfs/configure.ac
index cf8cfdf..abf0a4a 100644
--- a/zfs/configure.ac
+++ b/zfs/configure.ac

@@ -36,7 +36,7 @@
 ZFS_AC_META
 AC_CONFIG_AUX_DIR([config])
 AC_CONFIG_MACRO_DIR([config])
-AC_CANONICAL_SYSTEM
+AC_CANONICAL_TARGET
 AM_MAINTAINER_MODE
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 AM_INIT_AUTOMAKE([subdir-objects])
@@ -45,9 +45,10 @@
 	awk -f ${ac_srcdir}/config/config.awk zfs_config.h.tmp >zfs_config.h &&
 	rm zfs_config.h.tmp) || exit 1])
 
+LT_INIT
 AC_PROG_INSTALL
 AC_PROG_CC
-AC_PROG_LIBTOOL
+AC_PROG_LN_S
 PKG_PROG_PKG_CONFIG
 AM_PROG_AS
 AM_PROG_CC_C_O
@@ -55,127 +56,164 @@
 _AM_PROG_TAR(pax)
 
 ZFS_AC_LICENSE
-ZFS_AC_PACKAGE
 ZFS_AC_CONFIG
+ZFS_AC_PACKAGE
 ZFS_AC_DEBUG
 ZFS_AC_DEBUGINFO
 ZFS_AC_DEBUG_KMEM
 ZFS_AC_DEBUG_KMEM_TRACKING
+ZFS_AC_DEBUG_INVARIANTS
 
 AC_CONFIG_FILES([
 	Makefile
-	udev/Makefile
-	udev/rules.d/Makefile
+	cmd/Makefile
+	cmd/arc_summary/Makefile
+	cmd/arcstat/Makefile
+	cmd/dbufstat/Makefile
+	cmd/fsck_zfs/Makefile
+	cmd/mount_zfs/Makefile
+	cmd/raidz_test/Makefile
+	cmd/vdev_id/Makefile
+	cmd/zdb/Makefile
+	cmd/zed/Makefile
+	cmd/zed/zed.d/Makefile
+	cmd/zfs/Makefile
+	cmd/zfs_ids_to_path/Makefile
+	cmd/zgenhostid/Makefile
+	cmd/zhack/Makefile
+	cmd/zinject/Makefile
+	cmd/zpool/Makefile
+	cmd/zstream/Makefile
+	cmd/ztest/Makefile
+	cmd/zvol_id/Makefile
+	cmd/zvol_wait/Makefile
+	cmd/zpool_influxdb/Makefile
+    cmd/zfstool/Makefile
+	contrib/Makefile
+	contrib/bash_completion.d/Makefile
+	contrib/bpftrace/Makefile
+	contrib/dracut/02zfsexpandknowledge/Makefile
+	contrib/dracut/90zfs/Makefile
+	contrib/dracut/Makefile
+	contrib/initramfs/Makefile
+	contrib/initramfs/conf.d/Makefile
+	contrib/initramfs/conf-hooks.d/Makefile
+	contrib/initramfs/hooks/Makefile
+	contrib/initramfs/scripts/Makefile
+	contrib/initramfs/scripts/local-top/Makefile
+	contrib/pam_zfs_key/Makefile
+	contrib/pyzfs/Makefile
+	contrib/pyzfs/setup.py
+	contrib/zcp/Makefile
 	etc/Makefile
 	etc/default/Makefile
 	etc/init.d/Makefile
-	etc/zfs/Makefile
-	etc/systemd/Makefile
-	etc/systemd/system/Makefile
-	etc/systemd/system-generators/Makefile
-	etc/sudoers.d/Makefile
 	etc/modules-load.d/Makefile
-	man/Makefile
-	man/man1/Makefile
-	man/man5/Makefile
-	man/man8/Makefile
+	etc/sudoers.d/Makefile
+	etc/systemd/Makefile
+	etc/systemd/system-generators/Makefile
+	etc/systemd/system/Makefile
+	etc/zfs/Makefile
+	include/Makefile
+	include/os/Makefile
+	include/os/freebsd/Makefile
+	include/os/freebsd/linux/Makefile
+	include/os/freebsd/spl/Makefile
+	include/os/freebsd/spl/acl/Makefile
+	include/os/freebsd/spl/rpc/Makefile
+	include/os/freebsd/spl/sys/Makefile
+	include/os/freebsd/zfs/Makefile
+	include/os/freebsd/zfs/sys/Makefile
+	include/os/linux/Makefile
+	include/os/linux/kernel/Makefile
+	include/os/linux/kernel/linux/Makefile
+	include/os/linux/spl/Makefile
+	include/os/linux/spl/rpc/Makefile
+	include/os/linux/spl/sys/Makefile
+	include/os/linux/zfs/Makefile
+	include/os/linux/zfs/sys/Makefile
+	include/sys/Makefile
+	include/sys/crypto/Makefile
+	include/sys/fm/Makefile
+	include/sys/fm/fs/Makefile
+	include/sys/fs/Makefile
+	include/sys/lua/Makefile
+	include/sys/sysevent/Makefile
+	include/sys/zstd/Makefile
 	lib/Makefile
-	lib/libspl/Makefile
-	lib/libspl/asm-generic/Makefile
-	lib/libspl/asm-i386/Makefile
-	lib/libspl/asm-x86_64/Makefile
-	lib/libspl/include/Makefile
-	lib/libspl/include/ia32/Makefile
-	lib/libspl/include/ia32/sys/Makefile
-	lib/libspl/include/rpc/Makefile
-	lib/libspl/include/sys/Makefile
-	lib/libspl/include/sys/dktp/Makefile
-	lib/libspl/include/util/Makefile
 	lib/libavl/Makefile
 	lib/libefi/Makefile
 	lib/libicp/Makefile
 	lib/libnvpair/Makefile
-	lib/libzutil/Makefile
+	lib/libshare/Makefile
+	lib/libspl/Makefile
+	lib/libspl/include/Makefile
+	lib/libspl/include/ia32/Makefile
+	lib/libspl/include/ia32/sys/Makefile
+	lib/libspl/include/os/Makefile
+	lib/libspl/include/os/freebsd/Makefile
+	lib/libspl/include/os/freebsd/sys/Makefile
+	lib/libspl/include/os/linux/Makefile
+	lib/libspl/include/os/linux/sys/Makefile
+	lib/libspl/include/rpc/Makefile
+	lib/libspl/include/sys/Makefile
+	lib/libspl/include/sys/dktp/Makefile
+	lib/libspl/include/util/Makefile
 	lib/libtpool/Makefile
 	lib/libunicode/Makefile
 	lib/libuutil/Makefile
-	lib/libzpool/Makefile
-	lib/libzfs/libzfs.pc
-	lib/libzfs/libzfs_core.pc
 	lib/libzfs/Makefile
+	lib/libzfs/libzfs.pc
+	lib/libzfsbootenv/Makefile
+	lib/libzfsbootenv/libzfsbootenv.pc
 	lib/libzfs_core/Makefile
-	lib/libshare/Makefile
-	cmd/Makefile
-	cmd/zdb/Makefile
-	cmd/zhack/Makefile
-	cmd/zfs/Makefile
-	cmd/zinject/Makefile
-	cmd/zpool/Makefile
-	cmd/zstreamdump/Makefile
-	cmd/ztest/Makefile
-	cmd/mount_zfs/Makefile
-	cmd/fsck_zfs/Makefile
-	cmd/zvol_id/Makefile
-	cmd/vdev_id/Makefile
-	cmd/arcstat/Makefile
-	cmd/dbufstat/Makefile
-	cmd/arc_summary/Makefile
-	cmd/zed/Makefile
-	cmd/zed/zed.d/Makefile
-	cmd/zfstool/Makefile
-	cmd/raidz_test/Makefile
-	cmd/zgenhostid/Makefile
-	cmd/zvol_wait/Makefile
-	contrib/Makefile
-	contrib/bash_completion.d/Makefile
-	contrib/dracut/Makefile
-	contrib/dracut/02zfsexpandknowledge/Makefile
-	contrib/dracut/90zfs/Makefile
-	contrib/initramfs/Makefile
-	contrib/initramfs/hooks/Makefile
-	contrib/initramfs/scripts/Makefile
-	contrib/initramfs/scripts/local-top/Makefile
-	contrib/pyzfs/Makefile
-	contrib/pyzfs/setup.py
-	contrib/zcp/Makefile
+	lib/libzfs_core/libzfs_core.pc
+	lib/libzpool/Makefile
+	lib/libzstd/Makefile
+	lib/libzutil/Makefile
+	man/Makefile
+	module/Kbuild
 	module/Makefile
 	module/avl/Makefile
+	module/icp/Makefile
+	module/lua/Makefile
 	module/nvpair/Makefile
+	module/os/linux/spl/Makefile
+	module/os/linux/zfs/Makefile
+	module/spl/Makefile
 	module/unicode/Makefile
 	module/zcommon/Makefile
 	module/zfs/Makefile
-	module/lua/Makefile
-	module/icp/Makefile
-	module/spl/Makefile
-	include/Makefile
-	include/linux/Makefile
-	include/spl/Makefile
-	include/spl/rpc/Makefile
-	include/spl/sys/Makefile
-	include/sys/Makefile
-	include/sys/fs/Makefile
-	include/sys/fm/Makefile
-	include/sys/fm/fs/Makefile
-	include/sys/crypto/Makefile
-	include/sys/sysevent/Makefile
-	include/sys/lua/Makefile
+	module/zstd/Makefile
+	rpm/Makefile
+	rpm/generic/Makefile
+	rpm/generic/zfs-dkms.spec
+	rpm/generic/zfs-kmod.spec
+	rpm/generic/zfs.spec
+	rpm/redhat/Makefile
+	rpm/redhat/zfs-dkms.spec
+	rpm/redhat/zfs-kmod.spec
+	rpm/redhat/zfs.spec
 	scripts/Makefile
 	tests/Makefile
+	tests/runfiles/Makefile
 	tests/test-runner/Makefile
 	tests/test-runner/bin/Makefile
 	tests/test-runner/include/Makefile
 	tests/test-runner/man/Makefile
-	tests/runfiles/Makefile
 	tests/zfs-tests/Makefile
 	tests/zfs-tests/callbacks/Makefile
 	tests/zfs-tests/cmd/Makefile
+	tests/zfs-tests/cmd/badsend/Makefile
+	tests/zfs-tests/cmd/btree_test/Makefile
 	tests/zfs-tests/cmd/chg_usr_exec/Makefile
-	tests/zfs-tests/cmd/user_ns_exec/Makefile
 	tests/zfs-tests/cmd/devname2devid/Makefile
+	tests/zfs-tests/cmd/draid/Makefile
 	tests/zfs-tests/cmd/dir_rd_update/Makefile
 	tests/zfs-tests/cmd/file_check/Makefile
 	tests/zfs-tests/cmd/file_trunc/Makefile
 	tests/zfs-tests/cmd/file_write/Makefile
+	tests/zfs-tests/cmd/get_diff/Makefile
 	tests/zfs-tests/cmd/largest_file/Makefile
 	tests/zfs-tests/cmd/libzfs_input_check/Makefile
 	tests/zfs-tests/cmd/mkbusy/Makefile
@@ -184,6 +222,8 @@
 	tests/zfs-tests/cmd/mktree/Makefile
 	tests/zfs-tests/cmd/mmap_exec/Makefile
 	tests/zfs-tests/cmd/mmap_libaio/Makefile
+	tests/zfs-tests/cmd/mmap_seek/Makefile
+	tests/zfs-tests/cmd/mmap_sync/Makefile
 	tests/zfs-tests/cmd/mmapwrite/Makefile
 	tests/zfs-tests/cmd/nvlist_to_lua/Makefile
 	tests/zfs-tests/cmd/randfree_file/Makefile
@@ -191,27 +231,35 @@
 	tests/zfs-tests/cmd/readmmap/Makefile
 	tests/zfs-tests/cmd/rename_dir/Makefile
 	tests/zfs-tests/cmd/rm_lnkcnt_zero_file/Makefile
+	tests/zfs-tests/cmd/send_doall/Makefile
+	tests/zfs-tests/cmd/stride_dd/Makefile
 	tests/zfs-tests/cmd/threadsappend/Makefile
+	tests/zfs-tests/cmd/user_ns_exec/Makefile
 	tests/zfs-tests/cmd/xattrtest/Makefile
 	tests/zfs-tests/include/Makefile
 	tests/zfs-tests/tests/Makefile
 	tests/zfs-tests/tests/functional/Makefile
 	tests/zfs-tests/tests/functional/acl/Makefile
+	tests/zfs-tests/tests/functional/acl/off/Makefile
 	tests/zfs-tests/tests/functional/acl/posix/Makefile
+	tests/zfs-tests/tests/functional/acl/posix-sa/Makefile
+	tests/zfs-tests/tests/functional/alloc_class/Makefile
 	tests/zfs-tests/tests/functional/arc/Makefile
 	tests/zfs-tests/tests/functional/atime/Makefile
 	tests/zfs-tests/tests/functional/bootfs/Makefile
+	tests/zfs-tests/tests/functional/btree/Makefile
 	tests/zfs-tests/tests/functional/cache/Makefile
 	tests/zfs-tests/tests/functional/cachefile/Makefile
 	tests/zfs-tests/tests/functional/casenorm/Makefile
-	tests/zfs-tests/tests/functional/checksum/Makefile
 	tests/zfs-tests/tests/functional/channel_program/Makefile
 	tests/zfs-tests/tests/functional/channel_program/lua_core/Makefile
 	tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile
 	tests/zfs-tests/tests/functional/chattr/Makefile
+	tests/zfs-tests/tests/functional/checksum/Makefile
 	tests/zfs-tests/tests/functional/clean_mirror/Makefile
 	tests/zfs-tests/tests/functional/cli_root/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zdb/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zfs/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile
@@ -220,15 +268,15 @@
 	tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_get/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_inherit/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zfs_jail/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile
-	tests/zfs-tests/tests/functional/cli_root/zfs/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_program/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_property/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile
-	tests/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_reservation/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_rollback/Makefile
@@ -241,6 +289,8 @@
 	tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_clear/Makefile
@@ -256,13 +306,12 @@
 	tests/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/Makefile
-	tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_online/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_remove/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile
-	tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_replace/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_set/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_split/Makefile
@@ -271,6 +320,8 @@
 	tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile
 	tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/blockfiles/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zpool_wait/Makefile
+	tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/Makefile
 	tests/zfs-tests/tests/functional/cli_user/Makefile
 	tests/zfs-tests/tests/functional/cli_user/misc/Makefile
 	tests/zfs-tests/tests/functional/cli_user/zfs_list/Makefile
@@ -279,27 +330,31 @@
 	tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile
 	tests/zfs-tests/tests/functional/compression/Makefile
 	tests/zfs-tests/tests/functional/cp_files/Makefile
+	tests/zfs-tests/tests/functional/crtime/Makefile
 	tests/zfs-tests/tests/functional/ctime/Makefile
 	tests/zfs-tests/tests/functional/deadman/Makefile
 	tests/zfs-tests/tests/functional/delegate/Makefile
 	tests/zfs-tests/tests/functional/devices/Makefile
 	tests/zfs-tests/tests/functional/events/Makefile
 	tests/zfs-tests/tests/functional/exec/Makefile
+	tests/zfs-tests/tests/functional/fallocate/Makefile
 	tests/zfs-tests/tests/functional/fault/Makefile
+	tests/zfs-tests/tests/functional/features/Makefile
 	tests/zfs-tests/tests/functional/features/async_destroy/Makefile
 	tests/zfs-tests/tests/functional/features/large_dnode/Makefile
-	tests/zfs-tests/tests/functional/features/Makefile
 	tests/zfs-tests/tests/functional/grow/Makefile
 	tests/zfs-tests/tests/functional/history/Makefile
 	tests/zfs-tests/tests/functional/hkdf/Makefile
 	tests/zfs-tests/tests/functional/inheritance/Makefile
 	tests/zfs-tests/tests/functional/inuse/Makefile
 	tests/zfs-tests/tests/functional/io/Makefile
+	tests/zfs-tests/tests/functional/l2arc/Makefile
 	tests/zfs-tests/tests/functional/large_files/Makefile
 	tests/zfs-tests/tests/functional/largest_pool/Makefile
-	tests/zfs-tests/tests/functional/link_count/Makefile
 	tests/zfs-tests/tests/functional/libzfs/Makefile
 	tests/zfs-tests/tests/functional/limits/Makefile
+	tests/zfs-tests/tests/functional/link_count/Makefile
+	tests/zfs-tests/tests/functional/log_spacemap/Makefile
 	tests/zfs-tests/tests/functional/migration/Makefile
 	tests/zfs-tests/tests/functional/mmap/Makefile
 	tests/zfs-tests/tests/functional/mmp/Makefile
@@ -309,8 +364,9 @@
 	tests/zfs-tests/tests/functional/no_space/Makefile
 	tests/zfs-tests/tests/functional/nopwrite/Makefile
 	tests/zfs-tests/tests/functional/online_offline/Makefile
-	tests/zfs-tests/tests/functional/pool_names/Makefile
+	tests/zfs-tests/tests/functional/pam/Makefile
 	tests/zfs-tests/tests/functional/pool_checkpoint/Makefile
+	tests/zfs-tests/tests/functional/pool_names/Makefile
 	tests/zfs-tests/tests/functional/poolversion/Makefile
 	tests/zfs-tests/tests/functional/privilege/Makefile
 	tests/zfs-tests/tests/functional/procfs/Makefile
@@ -318,6 +374,7 @@
 	tests/zfs-tests/tests/functional/pyzfs/Makefile
 	tests/zfs-tests/tests/functional/quota/Makefile
 	tests/zfs-tests/tests/functional/raidz/Makefile
+	tests/zfs-tests/tests/functional/redacted_send/Makefile
 	tests/zfs-tests/tests/functional/redundancy/Makefile
 	tests/zfs-tests/tests/functional/refquota/Makefile
 	tests/zfs-tests/tests/functional/refreserv/Makefile
@@ -325,29 +382,29 @@
 	tests/zfs-tests/tests/functional/rename_dirs/Makefile
 	tests/zfs-tests/tests/functional/replacement/Makefile
 	tests/zfs-tests/tests/functional/reservation/Makefile
-	tests/zfs-tests/tests/functional/resilver/Makefile
 	tests/zfs-tests/tests/functional/rootpool/Makefile
 	tests/zfs-tests/tests/functional/rsend/Makefile
 	tests/zfs-tests/tests/functional/scrub_mirror/Makefile
+	tests/zfs-tests/tests/functional/simd/Makefile
 	tests/zfs-tests/tests/functional/slog/Makefile
 	tests/zfs-tests/tests/functional/snapshot/Makefile
 	tests/zfs-tests/tests/functional/snapused/Makefile
 	tests/zfs-tests/tests/functional/sparse/Makefile
 	tests/zfs-tests/tests/functional/suid/Makefile
-	tests/zfs-tests/tests/functional/alloc_class/Makefile
 	tests/zfs-tests/tests/functional/threadsappend/Makefile
 	tests/zfs-tests/tests/functional/tmpfile/Makefile
 	tests/zfs-tests/tests/functional/trim/Makefile
 	tests/zfs-tests/tests/functional/truncate/Makefile
+	tests/zfs-tests/tests/functional/upgrade/Makefile
 	tests/zfs-tests/tests/functional/user_namespace/Makefile
 	tests/zfs-tests/tests/functional/userquota/Makefile
-	tests/zfs-tests/tests/functional/upgrade/Makefile
 	tests/zfs-tests/tests/functional/vdev_zaps/Makefile
 	tests/zfs-tests/tests/functional/write_dirs/Makefile
 	tests/zfs-tests/tests/functional/xattr/Makefile
+	tests/zfs-tests/tests/functional/zpool_influxdb/Makefile
 	tests/zfs-tests/tests/functional/zvol/Makefile
-	tests/zfs-tests/tests/functional/zvol/zvol_cli/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/Makefile
+	tests/zfs-tests/tests/functional/zvol/zvol_cli/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_misc/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_swap/Makefile
 	tests/zfs-tests/tests/perf/Makefile
@@ -355,15 +412,8 @@
 	tests/zfs-tests/tests/perf/regression/Makefile
 	tests/zfs-tests/tests/perf/scripts/Makefile
 	tests/zfs-tests/tests/stress/Makefile
-	rpm/Makefile
-	rpm/redhat/Makefile
-	rpm/redhat/zfs.spec
-	rpm/redhat/zfs-kmod.spec
-	rpm/redhat/zfs-dkms.spec
-	rpm/generic/Makefile
-	rpm/generic/zfs.spec
-	rpm/generic/zfs-kmod.spec
-	rpm/generic/zfs-dkms.spec
+	udev/Makefile
+	udev/rules.d/Makefile
 	zfs.release
 ])
 

diff --git a/zfs/contrib/Makefile.am b/zfs/contrib/Makefile.am
index 9a82f82..5ec13ec 100644
--- a/zfs/contrib/Makefile.am
+++ b/zfs/contrib/Makefile.am

@@ -1,2 +1,12 @@
-SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp
-DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp
+include $(top_srcdir)/config/Shellcheck.am
+
+SUBDIRS = bash_completion.d pyzfs zcp
+if BUILD_LINUX
+SUBDIRS += bpftrace dracut initramfs
+endif
+if PAM_ZFS_ENABLED
+SUBDIRS += pam_zfs_key
+endif
+DIST_SUBDIRS = bash_completion.d bpftrace dracut initramfs pam_zfs_key pyzfs zcp
+
+SHELLCHECKDIRS = bash_completion.d bpftrace dracut initramfs

diff --git a/zfs/contrib/bash_completion.d/.gitignore b/zfs/contrib/bash_completion.d/.gitignore
new file mode 100644
index 0000000..0fd9cc6
--- /dev/null
+++ b/zfs/contrib/bash_completion.d/.gitignore

@@ -0,0 +1 @@
+/zfs

diff --git a/zfs/contrib/bash_completion.d/Makefile.am b/zfs/contrib/bash_completion.d/Makefile.am
index 4f13af6..8c8d1ac 100644
--- a/zfs/contrib/bash_completion.d/Makefile.am
+++ b/zfs/contrib/bash_completion.d/Makefile.am

@@ -1,5 +1,13 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
 bashcompletiondir = $(sysconfdir)/bash_completion.d
 
 noinst_DATA = zfs
 
-EXTRA_DIST = $(noinst_DATA)
+EXTRA_DIST += $(noinst_DATA)
+SUBSTFILES += $(noinst_DATA)
+
+SHELLCHECKSCRIPTS = $(noinst_DATA)
+SHELLCHECK_SHELL = bash
+SHELLCHECK_IGNORE = ,SC2207

diff --git a/zfs/contrib/bash_completion.d/zfs b/zfs/contrib/bash_completion.d/zfs
deleted file mode 100644
index 914db43..0000000
--- a/zfs/contrib/bash_completion.d/zfs
+++ /dev/null

@@ -1,391 +0,0 @@
-# Copyright (c) 2013, Aneurin Price <aneurin.price@gmail.com>
-
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-if [[ -w /dev/zfs ]]; then
-    __ZFS_CMD="zfs"
-    __ZPOOL_CMD="zpool"
-else
-    __ZFS_CMD="sudo zfs"
-    __ZPOOL_CMD="sudo zpool"
-fi
-
-__zfs_get_commands()
-{
-    $__ZFS_CMD 2>&1 | awk '/^\t[a-z]/ {print $1}' | cut -f1 -d '|' | uniq
-}
-
-__zfs_get_properties()
-{
-    $__ZFS_CMD get 2>&1 | awk '$2 == "YES" || $2 == "NO" {print $1}'; echo all name space
-}
-
-__zfs_get_editable_properties()
-{
-    $__ZFS_CMD get 2>&1 | awk '$2 == "YES" {print $1"="}'
-}
-
-__zfs_get_inheritable_properties()
-{
-    $__ZFS_CMD get 2>&1 | awk '$3 == "YES" {print $1}'
-}
-
-__zfs_list_datasets()
-{
-    $__ZFS_CMD list -H -o name -t filesystem,volume
-}
-
-__zfs_list_filesystems()
-{
-    $__ZFS_CMD list -H -o name -t filesystem
-}
-
-__zfs_match_snapshot()
-{
-    local base_dataset=${cur%@*}
-    if [[ $base_dataset != $cur ]]
-    then
-        $__ZFS_CMD list -H -o name -t snapshot -d 1 $base_dataset
-    else
-        $__ZFS_CMD list -H -o name -t filesystem,volume | awk '{print $1"@"}'
-    fi
-}
-
-__zfs_match_explicit_snapshot()
-{
-    local base_dataset=${cur%@*}
-    if [[ $base_dataset != $cur ]]
-    then
-        $__ZFS_CMD list -H -o name -t snapshot -d 1 $base_dataset
-    fi
-}
-
-__zfs_match_multiple_snapshots()
-{
-    local existing_opts=$(expr "$cur" : '\(.*\)[%,]')
-    if [[ $existing_opts ]]
-    then
-        local base_dataset=${cur%@*}
-        if [[ $base_dataset != $cur ]]
-        then
-            local cur=${cur##*,}
-            if [[ $cur =~ ^%|%.*% ]]
-            then
-                # correct range syntax is start%end
-                return 1
-            fi
-            local range_start=$(expr "$cur" : '\(.*%\)')
-            $__ZFS_CMD list -H -o name -t snapshot -d 1 $base_dataset | sed 's$.*@$'$range_start'$g'
-        fi
-    else
-        __zfs_match_explicit_snapshot; __zfs_list_datasets
-    fi
-}
-
-__zfs_list_volumes()
-{
-    $__ZFS_CMD list -H -o name -t volume
-}
-
-__zfs_argument_chosen()
-{
-    local word property
-    for word in $(seq $((COMP_CWORD-1)) -1 2)
-    do
-        local prev="${COMP_WORDS[$word]}"
-        if [[ ${COMP_WORDS[$word-1]} != -[tos] ]]
-        then
-            if [[ "$prev" == [^,]*,* ]] || [[ "$prev" == *[@:]* ]]
-            then
-                return 0
-            fi
-            for property in $@
-            do
-                if [[ $prev == "$property" ]]
-                then
-                    return 0
-                fi
-            done
-        fi
-    done
-    return 1
-}
-
-__zfs_complete_ordered_arguments()
-{
-    local list1=$1
-    local list2=$2
-    local cur=$3
-    local extra=$4
-    if __zfs_argument_chosen $list1
-    then
-        COMPREPLY=($(compgen -W "$list2 $extra" -- "$cur"))
-    else
-        COMPREPLY=($(compgen -W "$list1 $extra" -- "$cur"))
-    fi
-}
-
-__zfs_complete_multiple_options()
-{
-    local options=$1
-    local cur=$2
-
-    COMPREPLY=($(compgen -W "$options" -- "${cur##*,}"))
-    local existing_opts=$(expr "$cur" : '\(.*,\)')
-    if [[ $existing_opts ]] 
-    then
-        COMPREPLY=( "${COMPREPLY[@]/#/${existing_opts}}" )
-    fi
-}
-
-__zfs_complete_switch()
-{
-    local options=$1
-    if [[ ${cur:0:1} == - ]]
-    then
-        COMPREPLY=($(compgen -W "-{$options}" -- "$cur"))
-        return 0
-    else
-        return 1
-    fi
-}
-
-__zfs_complete()
-{
-    local cur prev cmd cmds
-    COMPREPLY=()
-    # Don't split on colon
-    _get_comp_words_by_ref -n : -c cur -p prev -w COMP_WORDS -i COMP_CWORD
-    cmd="${COMP_WORDS[1]}"
-
-    if [[ ${prev##*/} == zfs ]]
-    then
-        cmds=$(__zfs_get_commands)
-        COMPREPLY=($(compgen -W "$cmds -?" -- "$cur"))
-        return 0
-    fi
-
-    case "${cmd}" in
-        clone)
-            case "${prev}" in
-                -o)
-                    COMPREPLY=($(compgen -W "$(__zfs_get_editable_properties)" -- "$cur"))
-                    ;;
-                *)
-                    if ! __zfs_complete_switch "o,p"
-                    then
-                        if __zfs_argument_chosen
-                        then
-                            COMPREPLY=($(compgen -W "$(__zfs_list_datasets)" -- "$cur"))
-                        else
-                            COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
-                        fi
-                    fi
-                    ;;
-            esac
-            ;;
-        get)
-            case "${prev}" in
-                -d)
-                    COMPREPLY=($(compgen -W "" -- "$cur"))
-                    ;;
-                -t)
-                    __zfs_complete_multiple_options "filesystem volume snapshot all" "$cur"
-                    ;;
-                -s)
-                    __zfs_complete_multiple_options "local default inherited temporary none" "$cur"
-                    ;;
-                -o)
-                    __zfs_complete_multiple_options "name property value source received all" "$cur"
-                    ;;
-                *)
-                    if ! __zfs_complete_switch "H,r,p,d,o,t,s"
-                    then
-                        if __zfs_argument_chosen $(__zfs_get_properties)
-                        then
-                            COMPREPLY=($(compgen -W "$(__zfs_match_explicit_snapshot) $(__zfs_list_datasets)" -- "$cur"))
-                        else
-                            __zfs_complete_multiple_options "$(__zfs_get_properties)" "$cur"
-                        fi
-                    fi
-                    ;;
-            esac
-            ;;
-        inherit)
-            if ! __zfs_complete_switch "r"
-            then
-                __zfs_complete_ordered_arguments "$(__zfs_get_inheritable_properties)" "$(__zfs_match_explicit_snapshot) $(__zfs_list_datasets)" $cur
-            fi
-            ;;
-        list)
-            case "${prev}" in
-                -d)
-                    COMPREPLY=($(compgen -W "" -- "$cur"))
-                    ;;
-                -t)
-                    __zfs_complete_multiple_options "filesystem volume snapshot all" "$cur"
-                    ;;
-                -o)
-                    __zfs_complete_multiple_options "$(__zfs_get_properties)" "$cur"
-                    ;;
-                -s|-S)
-                    COMPREPLY=($(compgen -W "$(__zfs_get_properties)" -- "$cur"))
-                    ;;
-                *)
-                    if ! __zfs_complete_switch "H,r,d,o,t,s,S"
-                    then
-                        COMPREPLY=($(compgen -W "$(__zfs_match_explicit_snapshot) $(__zfs_list_datasets)" -- "$cur"))
-                    fi
-                    ;;
-            esac
-            ;;
-        promote)
-            COMPREPLY=($(compgen -W "$(__zfs_list_filesystems)" -- "$cur"))
-            ;;
-        rollback)
-            if ! __zfs_complete_switch "r,R,f"
-            then
-                COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
-            fi
-            ;;
-        send)
-            if ! __zfs_complete_switch "d,n,P,p,R,v,i,I"
-            then
-                COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
-            fi
-            ;;
-        snapshot)
-            case "${prev}" in
-                -o)
-                    COMPREPLY=($(compgen -W "$(__zfs_get_editable_properties)" -- "$cur"))
-                    ;;
-                *)
-                    if ! __zfs_complete_switch "o,r"
-                    then
-                        COMPREPLY=($(compgen -W "$(__zfs_list_datasets | awk '{print $1"@"}')" -- "$cur"))
-                    fi
-                    ;;
-            esac
-            ;;
-        set)
-            __zfs_complete_ordered_arguments "$(__zfs_get_editable_properties)" "$(__zfs_match_explicit_snapshot) $(__zfs_list_datasets)" $cur
-            ;;
-        upgrade)
-            case "${prev}" in
-                -a|-V|-v)
-                    COMPREPLY=($(compgen -W "" -- "$cur"))
-                    ;;
-                *)
-                    if ! __zfs_complete_switch "a,V,v,r"
-                    then
-                        COMPREPLY=($(compgen -W "$(__zfs_list_filesystems)" -- "$cur"))
-                    fi
-                    ;;
-            esac
-            ;;
-        destroy)
-            if ! __zfs_complete_switch "d,f,n,p,R,r,v"
-            then
-                __zfs_complete_multiple_options "$(__zfs_match_multiple_snapshots)" $cur
-            fi
-            ;;
-        *)
-            COMPREPLY=($(compgen -W "$(__zfs_match_explicit_snapshot) $(__zfs_list_datasets)" -- "$cur"))
-            ;;
-    esac
-    __ltrim_colon_completions "$cur"
-    return 0
-}
-
-__zpool_get_commands()
-{
-    $__ZPOOL_CMD 2>&1 | awk '/^\t[a-z]/ {print $1}' | uniq
-}
-
-__zpool_get_properties()
-{
-    $__ZPOOL_CMD get 2>&1 | awk '$2 == "YES" || $2 == "NO" {print $1}'; echo all
-}
-
-__zpool_get_editable_properties()
-{
-    $__ZPOOL_CMD get 2>&1 | awk '$2 == "YES" {print $1"="}'
-}
-
-__zpool_list_pools()
-{
-    $__ZPOOL_CMD list -H -o name
-}
-
-__zpool_complete()
-{
-    local cur prev cmd cmds
-    COMPREPLY=()
-    cur="${COMP_WORDS[COMP_CWORD]}"
-    prev="${COMP_WORDS[COMP_CWORD-1]}"
-    cmd="${COMP_WORDS[1]}"
-
-    if [[ ${prev##*/} == zpool ]]
-    then
-        cmds=$(__zpool_get_commands)
-        COMPREPLY=($(compgen -W "$cmds" -- "$cur"))
-        return 0
-    fi
-
-    case "${cmd}" in
-        get)
-            __zfs_complete_ordered_arguments "$(__zpool_get_properties)" "$(__zpool_list_pools)" $cur
-            return 0
-            ;;
-        import)
-            if [[ $prev == -d ]]
-            then
-                _filedir -d
-            else
-                COMPREPLY=($(compgen -W "$(__zpool_list_pools) -d" -- "$cur"))
-            fi
-            return 0
-            ;;
-        set)
-            __zfs_complete_ordered_arguments "$(__zpool_get_editable_properties)" "$(__zpool_list_pools)" $cur
-            return 0
-            ;;
-        add|attach|clear|create|detach|offline|online|remove|replace)
-            local pools="$(__zpool_list_pools)"
-            if __zfs_argument_chosen $pools
-            then
-                _filedir
-            else
-                COMPREPLY=($(compgen -W "$pools" -- "$cur"))
-            fi
-            return 0
-            ;;
-        *)
-            COMPREPLY=($(compgen -W "$(__zpool_list_pools)" -- "$cur"))
-            return 0
-            ;;
-    esac
-
-}
-
-complete -F __zfs_complete zfs
-complete -F __zpool_complete zpool

diff --git a/zfs/contrib/bash_completion.d/zfs.in b/zfs/contrib/bash_completion.d/zfs.in
new file mode 100644
index 0000000..f97fa51
--- /dev/null
+++ b/zfs/contrib/bash_completion.d/zfs.in

@@ -0,0 +1,484 @@
+# Copyright (c) 2010-2016, Aneurin Price <aneurin.price@gmail.com>
+
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+__ZFS_CMD="@sbindir@/zfs"
+__ZPOOL_CMD="@sbindir@/zpool"
+
+# Disable bash's built-in hostname completion, as this makes it impossible to
+# provide completions containing an @-sign, which is necessary for completing
+# snapshot names. If bash_completion is in use, this will already be disabled
+# and replaced with better completions anyway.
+shopt -u hostcomplete
+
+__zfs_get_commands()
+{
+    $__ZFS_CMD 2>&1 | awk '/^\t[a-z]/ {print $1}' | cut -f1 -d '|' | uniq
+}
+
+__zfs_get_properties()
+{
+    $__ZFS_CMD get 2>&1 | awk '$2 == "YES" || $2 == "NO" {print $1}'; echo all name space
+}
+
+__zfs_get_editable_properties()
+{
+    $__ZFS_CMD get 2>&1 | awk '$2 == "YES" {print $1"="}'
+}
+
+__zfs_get_inheritable_properties()
+{
+    $__ZFS_CMD get 2>&1 | awk '$3 == "YES" {print $1}'
+}
+
+__zfs_list_datasets()
+{
+    $__ZFS_CMD list -H -o name -s name -t filesystem,volume "$@"
+}
+
+__zfs_list_filesystems()
+{
+    $__ZFS_CMD list -H -o name -s name -t filesystem
+}
+
+__zfs_match_snapshot()
+{
+    local base_dataset="${cur%@*}"
+    if [ "$base_dataset" != "$cur" ]
+    then
+        $__ZFS_CMD list -H -o name -s name -t snapshot -d 1 "$base_dataset"
+    else
+        if [ "$cur" != "" ] && __zfs_list_datasets "$cur" &> /dev/null
+        then
+            $__ZFS_CMD list -H -o name -s name -t filesystem,volume -r "$cur" | tail -n +2
+            # We output the base dataset name even though we might be
+            # completing a command that can only take a snapshot, because it
+            # prevents bash from considering the completion finished when it
+            # ends in the bare @.
+            echo "$cur"
+            echo "$cur@"
+        else
+            local datasets
+            datasets="$(__zfs_list_datasets)"
+            # As above
+            echo "$datasets"
+            if [[ "$cur" == */ ]]
+            then
+                # If the current command ends with a slash, then the only way
+                # it can be completed with a single tab press (ie. in this pass)
+                # is if it has exactly one child, so that's the only time we
+                # need to offer a suggestion with an @ appended.
+                local num_children
+                # This is actually off by one as zfs list includes the named
+                # dataset in addition to its children
+                num_children=$(__zfs_list_datasets -d 1 "${cur%/}" 2> /dev/null | wc -l)
+                if [[ $num_children != 2 ]]
+                then
+                    return 0
+                fi
+            fi
+            echo "$datasets" | awk '{print $1 "@"}'
+        fi
+    fi
+}
+
+__zfs_match_snapshot_or_bookmark()
+{
+    local base_dataset="${cur%[#@]*}"
+    if [ "$base_dataset" != "$cur" ]
+    then
+        if [[ $cur == *@* ]]
+        then
+            $__ZFS_CMD list -H -o name -s name -t snapshot -d 1 "$base_dataset"
+        else
+            $__ZFS_CMD list -H -o name -s name -t bookmark -d 1 "$base_dataset"
+        fi
+    else
+        $__ZFS_CMD list -H -o name -s name -t filesystem,volume
+        if [ -e "$cur" ] && $__ZFS_CMD list -H -o name -s name -t filesystem,volume "$cur" &> /dev/null
+        then
+            echo "$cur@"
+            echo "$cur#"
+        fi
+    fi
+}
+
+__zfs_match_multiple_snapshots()
+{
+    local existing_opts
+    existing_opts="$(expr "$cur" : '\(.*\)[%,]')"
+    if [ -e "$existing_opts" ]
+    then
+        local base_dataset="${cur%@*}"
+        if [ "$base_dataset" != "$cur" ]
+        then
+            local cur="${cur##*,}"
+            if [[ $cur =~ ^%|%.*% ]]
+            then
+                # correct range syntax is start%end
+                return 1
+            fi
+            local range_start
+            range_start="$(expr "$cur" : '\(.*%\)')"
+            # shellcheck disable=SC2016
+            $__ZFS_CMD list -H -o name -s name -t snapshot -d 1 "$base_dataset" | sed 's$.*@$'"$range_start"'$g'
+        fi
+    else
+        __zfs_match_snapshot_or_bookmark
+    fi
+}
+
+__zfs_list_volumes()
+{
+    $__ZFS_CMD list -H -o name -s name -t volume
+}
+
+__zfs_argument_chosen()
+{
+    local word property
+    for word in $(seq $((COMP_CWORD-1)) -1 2)
+    do
+        local prev="${COMP_WORDS[$word]}"
+        if [[ ${COMP_WORDS[$word-1]} != -[tos] ]]
+        then
+            if [[ "$prev" == [^,]*,* ]] || [[ "$prev" == *[@:\#]* ]]
+            then
+                return 0
+            fi
+            for property in "$@"
+            do
+                if [[ $prev == "$property"* ]]
+                then
+                    return 0
+                fi
+            done
+        fi
+    done
+    return 1
+}
+
+__zfs_complete_ordered_arguments()
+{
+    local list1=$1
+    local list2=$2
+    local cur=$3
+    local extra=$4
+    # shellcheck disable=SC2086
+    if __zfs_argument_chosen $list1
+    then
+        COMPREPLY=($(compgen -W "$list2 $extra" -- "$cur"))
+    else
+        COMPREPLY=($(compgen -W "$list1 $extra" -- "$cur"))
+    fi
+}
+
+__zfs_complete_multiple_options()
+{
+    local options=$1
+    local cur=$2
+    local existing_opts
+
+    COMPREPLY=($(compgen -W "$options" -- "${cur##*,}"))
+    existing_opts=$(expr "$cur" : '\(.*,\)')
+    if [[ $existing_opts ]]
+    then
+        COMPREPLY=( "${COMPREPLY[@]/#/${existing_opts}}" )
+    fi
+}
+
+__zfs_complete_switch()
+{
+    local options=$1
+    if [[ ${cur:0:1} == - ]]
+    then
+        COMPREPLY=($(compgen -W "-{$options}" -- "$cur"))
+        return 0
+    else
+        return 1
+    fi
+}
+
+__zfs_complete_nospace()
+{
+    # Google indicates that there may still be bash versions out there that
+    # don't have compopt.
+    if type compopt &> /dev/null
+    then
+        compopt -o nospace
+    fi
+}
+
+__zfs_complete()
+{
+    local cur prev cmd cmds
+    COMPREPLY=()
+    if type _get_comp_words_by_ref &> /dev/null
+    then
+        # Don't split on colon
+        _get_comp_words_by_ref -n : -c cur -p prev -w COMP_WORDS -i COMP_CWORD
+    else
+        cur="${COMP_WORDS[COMP_CWORD]}"
+        prev="${COMP_WORDS[COMP_CWORD-1]}"
+    fi
+    cmd="${COMP_WORDS[1]}"
+
+    if [[ ${prev##*/} == zfs ]]
+    then
+        cmds=$(__zfs_get_commands)
+        COMPREPLY=($(compgen -W "$cmds -?" -- "$cur"))
+        return 0
+    fi
+
+    case "${cmd}" in
+        bookmark)
+            if __zfs_argument_chosen
+            then
+                COMPREPLY=($(compgen -W "${prev%@*}# ${prev/@/#}" -- "$cur"))
+            else
+                COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+            fi
+            ;;
+        clone)
+            case "${prev}" in
+                -o)
+                    COMPREPLY=($(compgen -W "$(__zfs_get_editable_properties)" -- "$cur"))
+                    __zfs_complete_nospace
+                    ;;
+                *)
+                    if ! __zfs_complete_switch "o,p"
+                    then
+                        if __zfs_argument_chosen
+                        then
+                            COMPREPLY=($(compgen -W "$(__zfs_list_datasets)" -- "$cur"))
+                        else
+                            COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+                        fi
+                    fi
+                    ;;
+            esac
+            ;;
+        get)
+            case "${prev}" in
+                -d)
+                    COMPREPLY=($(compgen -W "" -- "$cur"))
+                    ;;
+                -t)
+                    __zfs_complete_multiple_options "filesystem volume snapshot bookmark all" "$cur"
+                    ;;
+                -s)
+                    __zfs_complete_multiple_options "local default inherited temporary received none" "$cur"
+                    ;;
+                -o)
+                    __zfs_complete_multiple_options "name property value source received all" "$cur"
+                    ;;
+                *)
+                    if ! __zfs_complete_switch "H,r,p,d,o,t,s"
+                    then
+                        # shellcheck disable=SC2046
+                        if __zfs_argument_chosen $(__zfs_get_properties)
+                        then
+                            COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+                        else
+                            __zfs_complete_multiple_options "$(__zfs_get_properties)" "$cur"
+                        fi
+                    fi
+                    ;;
+            esac
+            ;;
+        inherit)
+            if ! __zfs_complete_switch "r"
+            then
+                __zfs_complete_ordered_arguments "$(__zfs_get_inheritable_properties)" "$(__zfs_match_snapshot)" "$cur"
+            fi
+            ;;
+        list)
+            case "${prev}" in
+                -d)
+                    COMPREPLY=($(compgen -W "" -- "$cur"))
+                    ;;
+                -t)
+                    __zfs_complete_multiple_options "filesystem volume snapshot bookmark all" "$cur"
+                    ;;
+                -o)
+                    __zfs_complete_multiple_options "$(__zfs_get_properties)" "$cur"
+                    ;;
+                -s|-S)
+                    COMPREPLY=($(compgen -W "$(__zfs_get_properties)" -- "$cur"))
+                    ;;
+                *)
+                    if ! __zfs_complete_switch "H,r,d,o,t,s,S"
+                    then
+                        COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+                    fi
+                    ;;
+            esac
+            ;;
+        promote)
+            COMPREPLY=($(compgen -W "$(__zfs_list_filesystems)" -- "$cur"))
+            ;;
+        rollback)
+            if ! __zfs_complete_switch "r,R,f"
+            then
+                COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+            fi
+            ;;
+        send)
+            if ! __zfs_complete_switch "D,n,P,p,R,v,e,L,i,I"
+            then
+                if __zfs_argument_chosen
+                then
+                    COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+                else
+                    if [[ $prev == -*i* ]]
+                    then
+                        COMPREPLY=($(compgen -W "$(__zfs_match_snapshot_or_bookmark)" -- "$cur"))
+                    else
+                        COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+                    fi
+                fi
+            fi
+            ;;
+        snapshot)
+            case "${prev}" in
+                -o)
+                    COMPREPLY=($(compgen -W "$(__zfs_get_editable_properties)" -- "$cur"))
+                    __zfs_complete_nospace
+                    ;;
+                *)
+                    if ! __zfs_complete_switch "o,r"
+                    then
+                        COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+                        __zfs_complete_nospace
+                    fi
+                    ;;
+            esac
+            ;;
+        set)
+            __zfs_complete_ordered_arguments "$(__zfs_get_editable_properties)" "$(__zfs_match_snapshot)" "$cur"
+            __zfs_complete_nospace
+            ;;
+        upgrade)
+            case "${prev}" in
+                -a|-V|-v)
+                    COMPREPLY=($(compgen -W "" -- "$cur"))
+                    ;;
+                *)
+                    if ! __zfs_complete_switch "a,V,v,r"
+                    then
+                        COMPREPLY=($(compgen -W "$(__zfs_list_filesystems)" -- "$cur"))
+                    fi
+                    ;;
+            esac
+            ;;
+        destroy)
+            if ! __zfs_complete_switch "d,f,n,p,R,r,v"
+            then
+                __zfs_complete_multiple_options "$(__zfs_match_multiple_snapshots)" "$cur"
+                __zfs_complete_nospace
+            fi
+            ;;
+        *)
+            COMPREPLY=($(compgen -W "$(__zfs_match_snapshot)" -- "$cur"))
+            ;;
+    esac
+    if type __ltrim_colon_completions &> /dev/null
+    then
+        __ltrim_colon_completions "$cur"
+    fi
+    return 0
+}
+
+__zpool_get_commands()
+{
+    $__ZPOOL_CMD 2>&1 | awk '/^\t[a-z]/ {print $1}' | uniq
+}
+
+__zpool_get_properties()
+{
+    $__ZPOOL_CMD get 2>&1 | awk '$2 == "YES" || $2 == "NO" {print $1}'; echo all
+}
+
+__zpool_get_editable_properties()
+{
+    $__ZPOOL_CMD get 2>&1 | awk '$2 == "YES" {print $1"="}'
+}
+
+__zpool_list_pools()
+{
+    $__ZPOOL_CMD list -H -o name
+}
+
+__zpool_complete()
+{
+    local cur prev cmd cmds pools
+    COMPREPLY=()
+    cur="${COMP_WORDS[COMP_CWORD]}"
+    prev="${COMP_WORDS[COMP_CWORD-1]}"
+    cmd="${COMP_WORDS[1]}"
+
+    if [[ ${prev##*/} == zpool ]]
+    then
+        cmds=$(__zpool_get_commands)
+        COMPREPLY=($(compgen -W "$cmds" -- "$cur"))
+        return 0
+    fi
+
+    case "${cmd}" in
+        get)
+            __zfs_complete_ordered_arguments "$(__zpool_get_properties)" "$(__zpool_list_pools)" "$cur"
+            return 0
+            ;;
+        import)
+            if [[ $prev == -d ]]
+            then
+                _filedir -d
+            else
+                COMPREPLY=($(compgen -W "$(__zpool_list_pools) -d" -- "$cur"))
+            fi
+            return 0
+            ;;
+        set)
+            __zfs_complete_ordered_arguments "$(__zpool_get_editable_properties)" "$(__zpool_list_pools)" "$cur"
+            __zfs_complete_nospace
+            return 0
+            ;;
+        add|attach|clear|create|detach|offline|online|remove|replace)
+            pools="$(__zpool_list_pools)"
+            # shellcheck disable=SC2086
+            if __zfs_argument_chosen $pools
+            then
+                _filedir
+            else
+                COMPREPLY=($(compgen -W "$pools" -- "$cur"))
+            fi
+            return 0
+            ;;
+        *)
+            COMPREPLY=($(compgen -W "$(__zpool_list_pools)" -- "$cur"))
+            return 0
+            ;;
+    esac
+
+}
+
+complete -F __zfs_complete zfs
+complete -F __zpool_complete zpool

diff --git a/zfs/contrib/bpftrace/Makefile.am b/zfs/contrib/bpftrace/Makefile.am
new file mode 100644
index 0000000..05e4f1c
--- /dev/null
+++ b/zfs/contrib/bpftrace/Makefile.am

@@ -0,0 +1,7 @@
+include $(top_srcdir)/config/Shellcheck.am
+
+EXTRA_DIST = \
+	taskqlatency.bt \
+	zfs-trace.sh
+
+SHELLCHECKSCRIPTS = zfs-trace.sh

diff --git a/zfs/contrib/bpftrace/taskqlatency.bt b/zfs/contrib/bpftrace/taskqlatency.bt
new file mode 100644
index 0000000..598f988
--- /dev/null
+++ b/zfs/contrib/bpftrace/taskqlatency.bt

@@ -0,0 +1,54 @@
+#include <sys/taskq.h>
+
+kprobe:trace_zfs_taskq_ent__birth
+{
+        $tqent = (struct taskq_ent *)arg0;
+
+        $tqent_id = $tqent->tqent_id;
+        $tq_name = str($tqent->tqent_taskq->tq_name);
+
+        @birth[$tq_name, $tqent_id] = nsecs;
+}
+
+kprobe:trace_zfs_taskq_ent__start
+{
+        $tqent = (struct taskq_ent *)arg0;
+
+        @tqent_id[tid] = $tqent->tqent_id;
+        @tq_name[tid] = str($tqent->tqent_taskq->tq_name);
+
+        @start[@tq_name[tid], @tqent_id[tid]] = nsecs;
+}
+
+kprobe:trace_zfs_taskq_ent__start
+/ @birth[@tq_name[tid], @tqent_id[tid]] /
+{
+        @queue_lat_us[@tq_name[tid]] =
+                hist((nsecs - @birth[@tq_name[tid], @tqent_id[tid]])/1000);
+        delete(@birth[@tq_name[tid], @tqent_id[tid]]);
+}
+
+kprobe:trace_zfs_taskq_ent__finish
+/ @start[@tq_name[tid], @tqent_id[tid]] /
+{
+        $tqent = (struct taskq_ent *)arg0;
+
+        @exec_lat_us[@tq_name[tid], ksym($tqent->tqent_func)] =
+                hist((nsecs - @start[@tq_name[tid], @tqent_id[tid]])/1000);
+        delete(@start[@tq_name[tid], @tqent_id[tid]]);
+}
+
+kprobe:trace_zfs_taskq_ent__finish
+{
+        delete(@tq_name[tid]);
+        delete(@tqent_id[tid]);
+}
+
+END
+{
+        clear(@birth);
+        clear(@start);
+
+        clear(@tq_name);
+        clear(@tqent_id);
+}

diff --git a/zfs/contrib/bpftrace/zfs-trace.sh b/zfs/contrib/bpftrace/zfs-trace.sh
new file mode 100755
index 0000000..0165335
--- /dev/null
+++ b/zfs/contrib/bpftrace/zfs-trace.sh

@@ -0,0 +1,11 @@
+#!/bin/sh
+
+read -r ZVER < /sys/module/zfs/version
+ZVER="${ZVER%%-*}"
+KVER=$(uname -r)
+
+exec bpftrace \
+	--include "/usr/src/zfs-$ZVER/$KVER/zfs_config.h" \
+	-I "/usr/src/zfs-$ZVER/include" \
+	-I "/usr/src/zfs-$ZVER/include/spl" \
+	"$@"

diff --git a/zfs/contrib/dracut/02zfsexpandknowledge/Makefile.am b/zfs/contrib/dracut/02zfsexpandknowledge/Makefile.am
index 6e553e8..b1bbb6b 100644
--- a/zfs/contrib/dracut/02zfsexpandknowledge/Makefile.am
+++ b/zfs/contrib/dracut/02zfsexpandknowledge/Makefile.am

@@ -1,24 +1,8 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
 pkgdracutdir = $(dracutdir)/modules.d/02zfsexpandknowledge
 pkgdracut_SCRIPTS = \
 	module-setup.sh
 
-EXTRA_DIST = \
-	$(top_srcdir)/contrib/dracut/02zfsexpandknowledge/module-setup.sh.in
-
-$(pkgdracut_SCRIPTS):%:%.in
-	-$(SED) -e 's,@bindir\@,$(bindir),g' \
-		-e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@datadir\@,$(datadir),g' \
-		-e 's,@dracutdir\@,$(dracutdir),g' \
-		-e 's,@udevdir\@,$(udevdir),g' \
-		-e 's,@udevruledir\@,$(udevruledir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< >'$@'
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-clean-local::
-	-$(RM) $(pkgdracut_SCRIPTS)
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(pkgdracut_SCRIPTS)
+SUBSTFILES += $(pkgdracut_SCRIPTS)

diff --git a/zfs/contrib/dracut/02zfsexpandknowledge/module-setup.sh.in b/zfs/contrib/dracut/02zfsexpandknowledge/module-setup.sh.in
index c22141f..df8df31 100755
--- a/zfs/contrib/dracut/02zfsexpandknowledge/module-setup.sh.in
+++ b/zfs/contrib/dracut/02zfsexpandknowledge/module-setup.sh.in

@@ -1,9 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 get_devtype() {
   local typ
-  typ=$(udevadm info --query=property --name="$1" | grep "^ID_FS_TYPE=" | sed 's|^ID_FS_TYPE=||')
-  if [ "$typ" = "" ] ; then
+  typ=$(udevadm info --query=property --name="$1" | sed -n 's|^ID_FS_TYPE=||p')
+  if [ -z "$typ" ] ; then
      typ=$(blkid -c /dev/null "$1" -o value -s TYPE)
   fi
   echo "$typ"
@@ -14,22 +14,16 @@
   local poolconfigtemp
   local poolconfigoutput
   local pooldev
-  local prefix
   local resolved
-  poolconfigtemp=`mktemp`
-  @sbindir@/zpool list -v -H -P "$1" > "$poolconfigtemp" 2>&1
-  if [ "$?" != "0" ] ; then
-    poolconfigoutput=$(cat "$poolconfigtemp")
+  poolconfigtemp="$(mktemp)"
+  if ! @sbindir@/zpool list -v -H -P "$1" > "$poolconfigtemp" 2>&1 ; then
+    poolconfigoutput="$(cat "$poolconfigtemp")"
     dinfo "zfsexpandknowledge: pool $1 cannot be listed: $poolconfigoutput"
   else
-    cat "$poolconfigtemp" |  awk -F '\t' '/\t\/dev/ { print $2 }' | \
-    while read pooldev ; do
-        if [ -n "$pooldev" -a -e "$pooldev" ] ; then
-          if [ -h "$pooldev" ] ; then
-              resolved=`readlink -f "$pooldev"`
-          else
-              resolved="$pooldev"
-          fi
+    awk -F '\t' '/\t\/dev/ { print $2 }' "$poolconfigtemp" | \
+    while read -r pooldev ; do
+        if [ -e "$pooldev" ] ; then
+          resolved="$(readlink -f "$pooldev")"
           dinfo "zfsexpandknowledge: pool $1 has device $pooldev (which resolves to $resolved)"
           echo "$resolved"
         fi
@@ -40,25 +34,19 @@
 
 find_zfs_block_devices() {
     local dev
-    local blockdev
     local mp
     local fstype
-    local pool
-    local key
-    local n
-    local poolconfigoutput
-    numfields=`head -1 /proc/self/mountinfo | awk '{print NF}'`
-    if [ "$numfields" == "10" ] ; then
-        fields="n n n n mp n n fstype dev n"
+    local _
+    numfields="$(awk '{print NF; exit}' /proc/self/mountinfo)"
+    if [ "$numfields" = "10" ] ; then
+        fields="_ _ _ _ mp _ _ fstype dev _"
     else
-        fields="n n n n mp n n n fstype dev n"
+        fields="_ _ _ _ mp _ _ _ fstype dev _"
     fi
-    while read $fields ; do
-       if [ "$fstype" != "zfs" ]; then continue ; fi
-       if [ "$mp" == "$1" ]; then
-           pool=$(echo "$dev" | cut -d / -f 1)
-           get_pool_devices "$pool"
-       fi
+    # shellcheck disable=SC2086
+    while read -r ${fields?} ; do
+       [ "$fstype" = "zfs" ] || continue
+       [ "$mp" = "$1" ] && get_pool_devices "${dev%%/*}"
     done < /proc/self/mountinfo
 }
 
@@ -69,18 +57,23 @@
 }
 
 check() {
+    # https://github.com/dracutdevs/dracut/pull/1711 provides a zfs_devs
+    # function to detect the physical devices backing zfs pools. If this
+    # function exists in the version of dracut this module is being called
+    # from, then it does not need to run.
+    type zfs_devs >/dev/null 2>&1 && return 1
+
     local mp
     local dev
     local blockdevs
     local fstype
     local majmin
-    local _slavedev
-    local _slavedevname
-    local _slavedevtype
-    local _slavemajmin
-    local _dev
+    local _depdev
+    local _depdevname
+    local _depdevtype
 
-if [[ $hostonly ]]; then
+# shellcheck disable=SC2154
+if [ -n "$hostonly" ]; then
 
     for mp in \
         "/" \
@@ -100,23 +93,22 @@
         mountpoint "$mp" >/dev/null 2>&1 || continue
         blockdevs=$(find_zfs_block_devices "$mp")
         if [ -z "$blockdevs" ] ; then continue ; fi
-        dinfo "zfsexpandknowledge: block devices backing ZFS dataset $mp: $blockdevs"
+        dinfo "zfsexpandknowledge: block devices backing ZFS dataset $mp: ${blockdevs//$'\n'/ }"
         for dev in $blockdevs
         do
             array_contains "$dev" "${host_devs[@]}" || host_devs+=("$dev")
             fstype=$(get_devtype "$dev")
             host_fs_types["$dev"]="$fstype"
             majmin=$(get_maj_min "$dev")
-            if [[ -d /sys/dev/block/$majmin/slaves ]] ; then
-                for _slavedev in /sys/dev/block/$majmin/slaves/*; do
-                    [[ -f $_slavedev/dev ]] || continue
-                    _slavedev=/dev/$(basename "$_slavedev")
-                    _slavedevname=$(udevadm info --query=property --name="$_slavedev" | grep "^DEVNAME=" | sed 's|^DEVNAME=||')
-                    _slavedevtype=$(get_devtype "$_slavedevname")
-                    _slavemajmin=$(get_maj_min "$_slavedevname")
-                    dinfo "zfsexpandknowledge: slave block device backing ZFS dataset $mp: $_slavedevname"
-                    array_contains "$_slavedevname" "${host_devs[@]}" || host_devs+=("$_slavedevname")
-                    host_fs_types["$_slavedevname"]="$_slavedevtype"
+            if [ -d "/sys/dev/block/$majmin/slaves" ] ; then
+                for _depdev in "/sys/dev/block/$majmin/slaves"/*; do
+                    [ -f "$_depdev/dev" ] || continue
+                    _depdev="/dev/${_depdev##*/}"
+                    _depdevname=$(udevadm info --query=property --name="$_depdev" | sed -n 's|^DEVNAME=||p')
+                    _depdevtype=$(get_devtype "$_depdevname")
+                    dinfo "zfsexpandknowledge: underlying block device backing ZFS dataset $mp: ${_depdevname//$'\n'/ }"
+                    array_contains "$_depdevname" "${host_devs[@]}" || host_devs+=("$_depdevname")
+                    host_fs_types["$_depdevname"]="$_depdevtype"
                 done
             fi
         done

diff --git a/zfs/contrib/dracut/90zfs/.gitignore b/zfs/contrib/dracut/90zfs/.gitignore
index 85c23f7..cb84212 100644
--- a/zfs/contrib/dracut/90zfs/.gitignore
+++ b/zfs/contrib/dracut/90zfs/.gitignore

@@ -1,9 +1,2 @@
-export-zfs.sh
-module-setup.sh
-mount-zfs.sh
-parse-zfs.sh
-zfs-generator.sh
-zfs-lib.sh
-zfs-load-key.sh
-zfs-needshutdown.sh
-zfs-env-bootfs.service
+*.sh
+*.service

diff --git a/zfs/contrib/dracut/90zfs/Makefile.am b/zfs/contrib/dracut/90zfs/Makefile.am
index 1680230..5a68e01 100644
--- a/zfs/contrib/dracut/90zfs/Makefile.am
+++ b/zfs/contrib/dracut/90zfs/Makefile.am

@@ -1,3 +1,6 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
 pkgdracutdir = $(dracutdir)/modules.d/90zfs
 pkgdracut_SCRIPTS = \
 	export-zfs.sh \
@@ -7,32 +10,16 @@
 	zfs-generator.sh \
 	zfs-load-key.sh \
 	zfs-needshutdown.sh \
-	zfs-lib.sh
+	zfs-lib.sh \
+	import-opts-generator.sh
 
 pkgdracut_DATA = \
-	zfs-env-bootfs.service
+	zfs-env-bootfs.service \
+	zfs-nonroot-necessities.service \
+	zfs-snapshot-bootfs.service \
+	zfs-rollback-bootfs.service
 
-EXTRA_DIST = \
-	$(top_srcdir)/contrib/dracut/90zfs/export-zfs.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/module-setup.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/mount-zfs.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/parse-zfs.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/zfs-generator.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/zfs-load-key.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/zfs-needshutdown.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/zfs-lib.sh.in \
-	$(top_srcdir)/contrib/dracut/90zfs/zfs-env-bootfs.service.in
+SUBSTFILES += $(pkgdracut_SCRIPTS) $(pkgdracut_DATA)
 
-$(pkgdracut_SCRIPTS) $(pkgdracut_DATA) :%:%.in
-	-$(SED) -e 's,@bindir\@,$(bindir),g' \
-		-e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@udevdir\@,$(udevdir),g' \
-		-e 's,@udevruledir\@,$(udevruledir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		-e 's,@systemdunitdir\@,$(systemdunitdir),g' \
-		-e 's,@mounthelperdir\@,$(mounthelperdir),g' \
-		$< >'$@'
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(pkgdracut_SCRIPTS) $(pkgdracut_DATA)
+# Provided by /bin/sleep, and, again, every implementation of that supports this
+CHECKBASHISMS_IGNORE = -e 'sleep only takes one integer' -e 'sleep 0.'

diff --git a/zfs/contrib/dracut/90zfs/export-zfs.sh.in b/zfs/contrib/dracut/90zfs/export-zfs.sh.in
index 8926503..9e05ee0 100755
--- a/zfs/contrib/dracut/90zfs/export-zfs.sh.in
+++ b/zfs/contrib/dracut/90zfs/export-zfs.sh.in

@@ -1,14 +1,12 @@
 #!/bin/sh
 
-. /lib/dracut-zfs-lib.sh
-
 _do_zpool_export() {
 	ret=0
 	errs=""
 	final="${1}"
 
 	info "ZFS: Exporting ZFS storage pools..."
-	errs=$(export_all -F 2>&1)
+	errs=$(zpool export -aF 2>&1)
 	ret=$?
 	[ -z "${errs}" ] || echo "${errs}" | vwarn
 	if [ "x${ret}" != "x0" ]; then

diff --git a/zfs/contrib/dracut/90zfs/import-opts-generator.sh.in b/zfs/contrib/dracut/90zfs/import-opts-generator.sh.in
new file mode 100755
index 0000000..8bc8c9b
--- /dev/null
+++ b/zfs/contrib/dracut/90zfs/import-opts-generator.sh.in

@@ -0,0 +1,5 @@
+#!/bin/sh
+
+. /lib/dracut-zfs-lib.sh
+
+echo ZPOOL_IMPORT_OPTS="$ZPOOL_IMPORT_OPTS"

diff --git a/zfs/contrib/dracut/90zfs/module-setup.sh.in b/zfs/contrib/dracut/90zfs/module-setup.sh.in
index 4efc4b0..a247e2a 100755
--- a/zfs/contrib/dracut/90zfs/module-setup.sh.in
+++ b/zfs/contrib/dracut/90zfs/module-setup.sh.in

@@ -1,15 +1,14 @@
-#!/bin/bash
+#!/usr/bin/env bash
+# shellcheck disable=SC2154
 
 check() {
 	# We depend on udev-rules being loaded
 	[ "${1}" = "-d" ] && return 0
 
 	# Verify the zfs tool chain
-	for tool in "@sbindir@/zpool" "@sbindir@/zfs" "@mounthelperdir@/mount.zfs" ; do
-		test -x "$tool" || return 1
+	for tool in "zgenhostid" "zpool" "zfs" "mount.zfs"; do
+		command -v "${tool}" >/dev/null || return 1
 	done
-	# Verify grep exists
-	which grep >/dev/null 2>&1 || return 1
 
 	return 0
 }
@@ -20,94 +19,98 @@
 }
 
 installkernel() {
-	instmods zfs
-	instmods zcommon
-	instmods znvpair
-	instmods zavl
-	instmods zunicode
-	instmods zlua
-	instmods icp
-	instmods spl
-	instmods zlib_deflate
-	instmods zlib_inflate
+	instmods -c zfs
 }
 
 install() {
-	inst_rules @udevruledir@/90-zfs.rules
-	inst_rules @udevruledir@/69-vdev.rules
-	inst_rules @udevruledir@/60-zvol.rules
-	dracut_install hostid
-	dracut_install grep
-	dracut_install @sbindir@/zfs
-	dracut_install @sbindir@/zpool
-	# Workaround for zfsonlinux/zfs#4749 by ensuring libgcc_s.so(.1) is included
-	if [[ -n "$(ldd @sbindir@/zpool | grep -F 'libgcc_s.so')" ]]; then
-		# Dracut will have already tracked and included it
-		:;
-	elif command -v gcc-config 2>&1 1>/dev/null; then
-		# On systems with gcc-config (Gentoo, Funtoo, etc.):
-		# Use the current profile to resolve the appropriate path
-		dracut_install "/usr/lib/gcc/$(s=$(gcc-config -c); echo ${s%-*}/${s##*-})/libgcc_s.so.1"
-	elif [[ -n "$(ls /usr/lib/libgcc_s.so* 2>/dev/null)" ]]; then
-		# Try a simple path first
-		dracut_install /usr/lib/libgcc_s.so*
-	else
-		# Fallback: Guess the path and include all matches
-		dracut_install /usr/lib/gcc/*/*/libgcc_s.so*
+	inst_rules 90-zfs.rules 69-vdev.rules 60-zvol.rules
+
+	inst_multiple \
+		zgenhostid \
+		zfs \
+		zpool \
+		mount.zfs \
+		hostid \
+		grep \
+		awk \
+		tr \
+		cut \
+		head ||
+		{ dfatal "Failed to install essential binaries"; exit 1; }
+
+	# Adapted from https://github.com/zbm-dev/zfsbootmenu
+	if ! ldd "$(command -v zpool)" | grep -qF 'libgcc_s.so'; then
+		# On systems with gcc-config (Gentoo, Funtoo, etc.), use it to find libgcc_s
+		if command -v gcc-config >/dev/null; then
+			inst_simple "/usr/lib/gcc/$(s=$(gcc-config -c); echo "${s%-*}/${s##*-}")/libgcc_s.so.1" ||
+				{ dfatal "Unable to install libgcc_s.so"; exit 1; }
+			# Otherwise, use dracut's library installation function to find the right one
+		elif ! inst_libdir_file "libgcc_s.so*"; then
+			# If all else fails, just try looking for some gcc arch directory
+			inst_simple /usr/lib/gcc/*/*/libgcc_s.so* ||
+				{ dfatal "Unable to install libgcc_s.so"; exit 1; }
+		fi
 	fi
-	dracut_install @mounthelperdir@/mount.zfs
-	dracut_install @udevdir@/vdev_id
-	dracut_install awk
-	dracut_install basename
-	dracut_install cut
-	dracut_install head
-	dracut_install @udevdir@/zvol_id
+
 	inst_hook cmdline 95 "${moddir}/parse-zfs.sh"
-	if [ -n "$systemdutildir" ] ; then
-		inst_script "${moddir}/zfs-generator.sh" "$systemdutildir"/system-generators/dracut-zfs-generator
+	if [ -n "${systemdutildir}" ]; then
+		inst_script "${moddir}/zfs-generator.sh" "${systemdutildir}/system-generators/dracut-zfs-generator"
 	fi
 	inst_hook pre-mount 90 "${moddir}/zfs-load-key.sh"
 	inst_hook mount 98 "${moddir}/mount-zfs.sh"
 	inst_hook cleanup 99 "${moddir}/zfs-needshutdown.sh"
 	inst_hook shutdown 20 "${moddir}/export-zfs.sh"
 
-	inst_simple "${moddir}/zfs-lib.sh" "/lib/dracut-zfs-lib.sh"
-	if [ -e @sysconfdir@/zfs/zpool.cache ]; then
-		inst @sysconfdir@/zfs/zpool.cache
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @sysconfdir@/zfs/zpool.cache
-	fi
+	inst_script "${moddir}/zfs-lib.sh" "/lib/dracut-zfs-lib.sh"
 
-	if [ -e @sysconfdir@/zfs/vdev_id.conf ]; then
-		inst @sysconfdir@/zfs/vdev_id.conf
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @sysconfdir@/zfs/vdev_id.conf
-	fi
+	# -H ensures they are marked host-only
+	# -o ensures there is no error upon absence of these files
+	inst_multiple -o -H \
+		"@sysconfdir@/zfs/zpool.cache" \
+		"@sysconfdir@/zfs/vdev_id.conf"
 
 	# Synchronize initramfs and system hostid
-	AA=`hostid | cut -b 1,2`
-	BB=`hostid | cut -b 3,4`
-	CC=`hostid | cut -b 5,6`
-	DD=`hostid | cut -b 7,8`
-	echo -ne "\\x${DD}\\x${CC}\\x${BB}\\x${AA}" > "${initdir}/etc/hostid"
+	if ! inst_simple -H @sysconfdir@/hostid; then
+		if HOSTID="$(hostid 2>/dev/null)" && [ "${HOSTID}" != "00000000" ]; then
+			zgenhostid -o "${initdir}@sysconfdir@/hostid" "${HOSTID}"
+			mark_hostonly @sysconfdir@/hostid
+		fi
+	fi
 
 	if dracut_module_included "systemd"; then
-		mkdir -p "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"
-		for _item in scan cache ; do
-			dracut_install @systemdunitdir@/zfs-import-$_item.service
-			if ! [ -L "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"/zfs-import-$_item.service ]; then
-				ln -s ../zfs-import-$_item.service "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"/zfs-import-$_item.service
-				type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-import-$_item.service
-			fi
+		inst_simple "${systemdsystemunitdir}/zfs-import.target"
+		systemctl -q --root "${initdir}" add-wants initrd.target zfs-import.target
+
+		inst_simple "${moddir}/zfs-env-bootfs.service" "${systemdsystemunitdir}/zfs-env-bootfs.service"
+		systemctl -q --root "${initdir}" add-wants zfs-import.target zfs-env-bootfs.service
+
+		inst_simple "${moddir}/zfs-nonroot-necessities.service" "${systemdsystemunitdir}/zfs-nonroot-necessities.service"
+		systemctl -q --root "${initdir}" add-requires initrd-root-fs.target zfs-nonroot-necessities.service
+
+		for _service in \
+			"zfs-import-scan.service" \
+			"zfs-import-cache.service"; do
+			inst_simple "${systemdsystemunitdir}/${_service}"
+			systemctl -q --root "${initdir}" add-wants zfs-import.target "${_service}"
+
+			# Add user-provided unit overrides
+			# - /etc/systemd/system/zfs-import-{scan,cache}.service
+			# - /etc/systemd/system/zfs-import-{scan,cache}.service.d/overrides.conf
+			# -H ensures they are marked host-only
+			# -o ensures there is no error upon absence of these files
+			inst_multiple -o -H \
+				"${systemdsystemconfdir}/${_service}" \
+				"${systemdsystemconfdir}/${_service}.d/"*.conf
+
 		done
-		inst "${moddir}"/zfs-env-bootfs.service "${systemdsystemunitdir}"/zfs-env-bootfs.service
-		ln -s ../zfs-env-bootfs.service "${initdir}/${systemdsystemunitdir}/zfs-import.target.wants"/zfs-env-bootfs.service
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-env-bootfs.service
-		dracut_install systemd-ask-password
-		dracut_install systemd-tty-ask-password-agent
-		mkdir -p "${initdir}/$systemdsystemunitdir/initrd.target.wants"
-		dracut_install @systemdunitdir@/zfs-import.target
-		if ! [ -L "${initdir}/$systemdsystemunitdir/initrd.target.wants"/zfs-import.target ]; then
-			ln -s ../zfs-import.target "${initdir}/$systemdsystemunitdir/initrd.target.wants"/zfs-import.target
-			type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-import.target
-		fi
+
+		for _service in \
+			"zfs-snapshot-bootfs.service" \
+			"zfs-rollback-bootfs.service"; do
+			inst_simple "${moddir}/${_service}" "${systemdsystemunitdir}/${_service}"
+			systemctl -q --root "${initdir}" add-wants initrd.target "${_service}"
+		done
+
+		inst_simple "${moddir}/import-opts-generator.sh" "${systemdutildir}/system-environment-generators/zfs-import-opts.sh"
 	fi
 }

diff --git a/zfs/contrib/dracut/90zfs/mount-zfs.sh.in b/zfs/contrib/dracut/90zfs/mount-zfs.sh.in
index 73300a9..b0eb614 100755
--- a/zfs/contrib/dracut/90zfs/mount-zfs.sh.in
+++ b/zfs/contrib/dracut/90zfs/mount-zfs.sh.in

@@ -1,50 +1,75 @@
 #!/bin/sh
+# shellcheck disable=SC2034,SC2154
 
 . /lib/dracut-zfs-lib.sh
 
-ZFS_DATASET=""
-ZFS_POOL=""
-
-case "${root}" in
-	zfs:*) ;;
-	*) return ;;
-esac
+decode_root_args || return 0
 
 GENERATOR_FILE=/run/systemd/generator/sysroot.mount
 GENERATOR_EXTENSION=/run/systemd/generator/sysroot.mount.d/zfs-enhancement.conf
 
-if [ -e "$GENERATOR_FILE" ] && [ -e "$GENERATOR_EXTENSION" ] ; then
-	# If the ZFS sysroot.mount flag exists, the initial RAM disk configured
-	# it to mount ZFS on root.  In that case, we bail early.  This flag
-	# file gets created by the zfs-generator program upon successful run.
-	info "ZFS: There is a sysroot.mount and zfs-generator has extended it."
-	info "ZFS: Delegating root mount to sysroot.mount."
-	# Let us tell the initrd to run on shutdown.
-	# We have a shutdown hook to run
-	# because we imported the pool.
+if [ -e "$GENERATOR_FILE" ] && [ -e "$GENERATOR_EXTENSION" ]; then
+	# We're under systemd and dracut-zfs-generator ran to completion.
+	info "ZFS: Delegating root mount to sysroot.mount at al."
+
 	# We now prevent Dracut from running this thing again.
-	for zfsmounthook in "$hookdir"/mount/*zfs* ; do
-		if [ -f "$zfsmounthook" ] ; then
-			rm -f "$zfsmounthook"
-		fi
-	done
+	rm -f "$hookdir"/mount/*zfs*
 	return
 fi
+
 info "ZFS: No sysroot.mount exists or zfs-generator did not extend it."
 info "ZFS: Mounting root with the traditional mount-zfs.sh instead."
 
+# ask_for_password tries prompt cmd
+#
+# Wraps around plymouth ask-for-password and adds fallback to tty password ask
+# if plymouth is not present.
+ask_for_password() {
+    tries="$1"
+    prompt="$2"
+    cmd="$3"
+
+    {
+        flock -s 9
+
+        # Prompt for password with plymouth, if installed and running.
+        if plymouth --ping 2>/dev/null; then
+            plymouth ask-for-password \
+                --prompt "$prompt" --number-of-tries="$tries" | \
+                eval "$cmd"
+            ret=$?
+        else
+            i=1
+            while [ "$i" -le "$tries" ]; do
+                printf "%s [%i/%i]:" "$prompt" "$i" "$tries" >&2
+                eval "$cmd" && ret=0 && break
+                ret=$?
+                i=$((i+1))
+                printf '\n' >&2
+            done
+            unset i
+        fi
+    } 9>/.console_lock
+
+    [ "$ret" -ne 0 ] && echo "Wrong password" >&2
+    return "$ret"
+}
+
+
 # Delay until all required block devices are present.
 modprobe zfs 2>/dev/null
 udevadm settle
 
+ZFS_DATASET=
+ZFS_POOL=
+
 if [ "${root}" = "zfs:AUTO" ] ; then
-	ZFS_DATASET="$(find_bootfs)"
-	if [ $? -ne 0 ] ; then
+	if ! ZFS_DATASET="$(zpool get -Ho value bootfs | grep -m1 -vFx -)"; then
+		# shellcheck disable=SC2086
 		zpool import -N -a ${ZPOOL_IMPORT_OPTS}
-		ZFS_DATASET="$(find_bootfs)"
-		if [ $? -ne 0 ] ; then
+		if ! ZFS_DATASET="$(zpool get -Ho value bootfs | grep -m1 -vFx -)"; then
 			warn "ZFS: No bootfs attribute found in importable pools."
-			export_all -F
+			zpool export -aF
 
 			rootok=0
 			return 1
@@ -53,34 +78,43 @@
 	info "ZFS: Using ${ZFS_DATASET} as root."
 fi
 
-ZFS_DATASET="${ZFS_DATASET:-${root#zfs:}}"
+ZFS_DATASET="${ZFS_DATASET:-${root}}"
 ZFS_POOL="${ZFS_DATASET%%/*}"
 
-if import_pool "${ZFS_POOL}" ; then
-	# Load keys if we can or if we need to
-	if [ $(zpool list -H -o feature@encryption $(echo "${ZFS_POOL}" | awk -F\/ '{print $1}')) = 'active' ]; then
-		# if the root dataset has encryption enabled
-		ENCRYPTIONROOT="$(zfs get -H -o value encryptionroot "${ZFS_DATASET}")"
-		if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
-			KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")"
-			# if the key needs to be loaded
-			if [ "$KEYSTATUS" = "unavailable" ]; then
-				# decrypt them
-				ask_for_password \
-					--tries 5 \
-					--prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}: " \
-					--cmd "zfs load-key '${ENCRYPTIONROOT}'"
-			fi
+
+if ! zpool get -Ho value name "${ZFS_POOL}" > /dev/null 2>&1; then
+    info "ZFS: Importing pool ${ZFS_POOL}..."
+    # shellcheck disable=SC2086
+    if ! zpool import -N ${ZPOOL_IMPORT_OPTS} "${ZFS_POOL}"; then
+        warn "ZFS: Unable to import pool ${ZFS_POOL}"
+        rootok=0
+        return 1
+    fi
+fi
+
+# Load keys if we can or if we need to
+# TODO: for_relevant_root_children like in zfs-load-key.sh.in
+if [ "$(zpool get -Ho value feature@encryption "${ZFS_POOL}")" = 'active' ]; then
+	# if the root dataset has encryption enabled
+	ENCRYPTIONROOT="$(zfs get -Ho value encryptionroot "${ZFS_DATASET}")"
+	if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
+		KEYSTATUS="$(zfs get -Ho value keystatus "${ENCRYPTIONROOT}")"
+		# if the key needs to be loaded
+		if [ "$KEYSTATUS" = "unavailable" ]; then
+			# decrypt them
+			ask_for_password \
+				5 \
+				"Encrypted ZFS password for ${ENCRYPTIONROOT}: " \
+				"zfs load-key '${ENCRYPTIONROOT}'"
 		fi
 	fi
-	# Let us tell the initrd to run on shutdown.
-	# We have a shutdown hook to run
-	# because we imported the pool.
-	info "ZFS: Mounting dataset ${ZFS_DATASET}..."
-	if mount_dataset "${ZFS_DATASET}" ; then
-		ROOTFS_MOUNTED=yes
-		return 0
-	fi
 fi
 
-rootok=0
+# Let us tell the initrd to run on shutdown.
+# We have a shutdown hook to run
+# because we imported the pool.
+info "ZFS: Mounting dataset ${ZFS_DATASET}..."
+if ! mount_dataset "${ZFS_DATASET}"; then
+  rootok=0
+  return 1
+fi

diff --git a/zfs/contrib/dracut/90zfs/parse-zfs.sh.in b/zfs/contrib/dracut/90zfs/parse-zfs.sh.in
index eccfdc6..f7d1f1c 100755
--- a/zfs/contrib/dracut/90zfs/parse-zfs.sh.in
+++ b/zfs/contrib/dracut/90zfs/parse-zfs.sh.in

@@ -1,16 +1,14 @@
 #!/bin/sh
+# shellcheck disable=SC2034,SC2154
 
-. /lib/dracut-lib.sh
+# shellcheck source=zfs-lib.sh.in
+. /lib/dracut-zfs-lib.sh
 
 # Let the command line override our host id.
 spl_hostid=$(getarg spl_hostid=)
 if [ -n "${spl_hostid}" ] ; then
 	info "ZFS: Using hostid from command line: ${spl_hostid}"
-	AA=$(echo "${spl_hostid}" | cut -b 1,2)
-	BB=$(echo "${spl_hostid}" | cut -b 3,4)
-	CC=$(echo "${spl_hostid}" | cut -b 5,6)
-	DD=$(echo "${spl_hostid}" | cut -b 7,8)
-	echo -ne "\\x${DD}\\x${CC}\\x${BB}\\x${AA}" >/etc/hostid
+	zgenhostid -f "${spl_hostid}"
 elif [ -f "/etc/hostid" ] ; then
 	info "ZFS: Using hostid from /etc/hostid: $(hostid)"
 else
@@ -18,49 +16,20 @@
 	warn "ZFS: Pools may not import correctly."
 fi
 
-wait_for_zfs=0
-case "${root}" in
-	""|zfs|zfs:)
-		# We'll take root unset, root=zfs, or root=zfs:
-		# No root set, so we want to read the bootfs attribute.  We
-		# can't do that until udev settles so we'll set dummy values
-		# and hope for the best later on.
-		root="zfs:AUTO"
-		rootok=1
-		wait_for_zfs=1
+if decode_root_args; then
+	if [ "$root" = "zfs:AUTO" ]; then
+		info "ZFS: Boot dataset autodetected from bootfs=."
+	else
+		info "ZFS: Boot dataset is ${root}."
+	fi
 
-		info "ZFS: Enabling autodetection of bootfs after udev settles."
-		;;
-
-	ZFS\=*|zfs:*|zfs:FILESYSTEM\=*|FILESYSTEM\=*)
-		# root is explicit ZFS root.  Parse it now.  We can handle
-		# a root=... param in any of the following formats:
-		# root=ZFS=rpool/ROOT
-		# root=zfs:rpool/ROOT
-		# root=zfs:FILESYSTEM=rpool/ROOT
-		# root=FILESYSTEM=rpool/ROOT
-		# root=ZFS=pool+with+space/ROOT+WITH+SPACE (translates to root=ZFS=pool with space/ROOT WITH SPACE)
-
-		# Strip down to just the pool/fs
-		root="${root#zfs:}"
-		root="${root#FILESYSTEM=}"
-		root="zfs:${root#ZFS=}"
-		# switch + with spaces because kernel cmdline does not allow us to quote parameters
-		root=$(printf '%s\n' "$root" | sed "s/+/ /g")
-		rootok=1
-		wait_for_zfs=1
-
-		info "ZFS: Set ${root} as bootfs."
-		;;
-esac
-
-# Make sure Dracut is happy that we have a root and will wait for ZFS
-# modules to settle before mounting.
-if [ ${wait_for_zfs} -eq 1 ]; then
-	ln -s /dev/null /dev/root 2>/dev/null
-	initqueuedir="${hookdir}/initqueue/finished"
-	test -d "${initqueuedir}" || {
-		initqueuedir="${hookdir}/initqueue-finished"
-	}
-	echo '[ -e /dev/zfs ]' > "${initqueuedir}/zfs.sh"
+	rootok=1
+	# Make sure Dracut is happy that we have a root and will wait for ZFS
+	# modules to settle before mounting.
+	if [ -n "${wait_for_zfs}" ]; then
+		ln -s null /dev/root
+		echo '[ -e /dev/zfs ]' > "${hookdir}/initqueue/finished/zfs.sh"
+	fi
+else
+	info "ZFS: no ZFS-on-root."
 fi

diff --git a/zfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in b/zfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in
index 3cdf691..7ebab4c 100644
--- a/zfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in
+++ b/zfs/contrib/dracut/90zfs/zfs-env-bootfs.service.in

@@ -1,6 +1,5 @@
 [Unit]
-Description=Set BOOTFS environment for dracut
-Documentation=man:zpool(8)
+Description=Set BOOTFS and BOOTFSFLAGS environment variables for dracut
 DefaultDependencies=no
 After=zfs-import-cache.service
 After=zfs-import-scan.service
@@ -8,7 +7,17 @@
 
 [Service]
 Type=oneshot
-ExecStart=/bin/sh -c "/bin/systemctl set-environment BOOTFS=$(@sbindir@/zpool list -H -o bootfs | grep -m1 -v '^-$')"
+ExecStart=/bin/sh -c '                                                                         \
+    . /lib/dracut-zfs-lib.sh;                                                                  \
+    decode_root_args || exit 0;                                                                \
+    [ "$root" = "zfs:AUTO" ] && root="$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)"; \
+    rootflags="$(getarg rootflags=)";                                                          \
+    case ",$rootflags," in                                                                     \
+        *,zfsutil,*) ;;                                                                        \
+        ,,) rootflags=zfsutil ;;                                                               \
+        *)  rootflags="zfsutil,$rootflags" ;;                                                  \
+    esac;                                                                                      \
+    exec systemctl set-environment BOOTFS="$root" BOOTFSFLAGS="$rootflags"'
 
 [Install]
 WantedBy=zfs-import.target

diff --git a/zfs/contrib/dracut/90zfs/zfs-generator.sh.in b/zfs/contrib/dracut/90zfs/zfs-generator.sh.in
index 0b8a8aa..4e1eb74 100755
--- a/zfs/contrib/dracut/90zfs/zfs-generator.sh.in
+++ b/zfs/contrib/dracut/90zfs/zfs-generator.sh.in

@@ -1,6 +1,8 @@
-#!/bin/bash
+#!/bin/sh
+# shellcheck disable=SC2016,SC1004,SC2154
 
-echo "zfs-generator: starting" >> /dev/kmsg
+grep -wq debug /proc/cmdline && debug=1
+[ -n "$debug" ] && echo "zfs-generator: starting" >> /dev/kmsg
 
 GENERATOR_DIR="$1"
 [ -n "$GENERATOR_DIR" ] || {
@@ -8,54 +10,33 @@
     exit 1
 }
 
-[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh
-[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh
+# shellcheck source=zfs-lib.sh.in
+. /lib/dracut-zfs-lib.sh
+decode_root_args || exit 0
 
-type getarg >/dev/null 2>&1 || {
-    echo "zfs-generator: loading Dracut library from $dracutlib" >> /dev/kmsg
-    . "$dracutlib"
-}
+[ -n "$debug" ] && echo "zfs-generator: writing extension for sysroot.mount to $GENERATOR_DIR/sysroot.mount.d/zfs-enhancement.conf" >> /dev/kmsg
 
-[ -z "$root" ]       && root=$(getarg root=)
-[ -z "$rootfstype" ] && rootfstype=$(getarg rootfstype=)
-[ -z "$rootflags" ]  && rootflags=$(getarg rootflags=)
 
-# If root is not ZFS= or zfs: or rootfstype is not zfs
-# then we are not supposed to handle it.
-[ "${root##zfs:}" = "${root}" -a "${root##ZFS=}" = "${root}" -a "$rootfstype" != "zfs" ] && exit 0
-
-rootfstype=zfs
-if echo "${rootflags}" | grep -Eq '^zfsutil$|^zfsutil,|,zfsutil$|,zfsutil,' ; then
-    true
-elif test -n "${rootflags}" ; then
-    rootflags="zfsutil,${rootflags}"
-else
-    rootflags=zfsutil
-fi
-
-echo "zfs-generator: writing extension for sysroot.mount to $GENERATOR_DIR"/sysroot.mount.d/zfs-enhancement.conf >> /dev/kmsg
-
-[ -d "$GENERATOR_DIR" ] || mkdir "$GENERATOR_DIR"
-[ -d "$GENERATOR_DIR"/sysroot.mount.d ] || mkdir "$GENERATOR_DIR"/sysroot.mount.d
+mkdir -p "$GENERATOR_DIR"/sysroot.mount.d "$GENERATOR_DIR"/dracut-pre-mount.service.d
 
 {
     echo "[Unit]"
     echo "Before=initrd-root-fs.target"
     echo "After=zfs-import.target"
+    echo
     echo "[Mount]"
-    if [ "${root}" = "zfs:AUTO" ] ; then
-      echo "PassEnvironment=BOOTFS"
-      echo 'What=${BOOTFS}'
-    else
-      root="${root##zfs:}"
-      root="${root##ZFS=}"
-      echo "What=${root}"
-    fi
-    echo "Type=${rootfstype}"
-    echo "Options=${rootflags}"
+    echo "PassEnvironment=BOOTFS BOOTFSFLAGS"
+    echo 'What=${BOOTFS}'
+    echo "Type=zfs"
+    echo 'Options=${BOOTFSFLAGS}'
 } > "$GENERATOR_DIR"/sysroot.mount.d/zfs-enhancement.conf
+ln -fs ../sysroot.mount "$GENERATOR_DIR"/initrd-root-fs.target.requires/sysroot.mount
 
-[ -d "$GENERATOR_DIR"/initrd-root-fs.target.requires ] || mkdir -p "$GENERATOR_DIR"/initrd-root-fs.target.requires
-ln -s ../sysroot.mount "$GENERATOR_DIR"/initrd-root-fs.target.requires/sysroot.mount
+{
+    echo "[Unit]"
+    echo "After=zfs-import.target"
+} > "$GENERATOR_DIR"/dracut-pre-mount.service.d/zfs-enhancement.conf
 
-echo "zfs-generator: finished" >> /dev/kmsg
\ No newline at end of file
+[ -n "$debug" ] && echo "zfs-generator: finished" >> /dev/kmsg
+
+exit 0

diff --git a/zfs/contrib/dracut/90zfs/zfs-lib.sh.in b/zfs/contrib/dracut/90zfs/zfs-lib.sh.in
index 44021c6..171616b 100755
--- a/zfs/contrib/dracut/90zfs/zfs-lib.sh.in
+++ b/zfs/contrib/dracut/90zfs/zfs-lib.sh.in

@@ -1,174 +1,119 @@
 #!/bin/sh
+# shellcheck disable=SC2034
 
-command -v getarg >/dev/null || . /lib/dracut-lib.sh
-command -v getargbool >/dev/null || {
-    # Compatibility with older Dracut versions.
-    # With apologies to the Dracut developers.
-    getargbool() {
-        if ! [ -z "$_b" ]; then
-                unset _b
-        fi
-        _default="$1"; shift
-        _b=$(getarg "$@")
-        [ $? -ne 0 ] &&  [ -z "$_b" ] && _b="$_default"
-        if [ -n "$_b" ]; then
-            [ "$_b" = "0" ] && return 1
-            [ "$_b" = "no" ] && return 1
-            [ "$_b" = "off" ] && return 1
-        fi
-        return 0
-    }
-}
+command -v getarg >/dev/null || . /lib/dracut-lib.sh || . /usr/lib/dracut/modules.d/99base/dracut-lib.sh
 
-OLDIFS="${IFS}"
-NEWLINE="
-"
+TAB="	"
 
-ZPOOL_IMPORT_OPTS=""
-if getargbool 0 zfs_force -y zfs.force -y zfsforce ; then
+ZPOOL_IMPORT_OPTS=
+if getargbool 0 zfs_force -y zfs.force -y zfsforce; then
     warn "ZFS: Will force-import pools if necessary."
-    ZPOOL_IMPORT_OPTS="${ZPOOL_IMPORT_OPTS} -f"
+    ZPOOL_IMPORT_OPTS=-f
 fi
 
-# find_bootfs
-#   returns the first dataset with the bootfs attribute.
-find_bootfs() {
-    IFS="${NEWLINE}"
-    for dataset in $(zpool list -H -o bootfs); do
-        case "${dataset}" in
-            "" | "-")
-                continue
-                ;;
-            "no pools available")
-                IFS="${OLDIFS}"
-                return 1
-                ;;
-            *)
-                IFS="${OLDIFS}"
-                echo "${dataset}"
-                return 0
-                ;;
-        esac
-    done
-
-    IFS="${OLDIFS}"
-    return 1
-}
-
-# import_pool POOL
-#   imports the given zfs pool if it isn't imported already.
-import_pool() {
-        pool="${1}"
-
-    if ! zpool list -H "${pool}" > /dev/null 2>&1; then
-        info "ZFS: Importing pool ${pool}..."
-        if ! zpool import -N ${ZPOOL_IMPORT_OPTS} "${pool}" ; then
-            warn "ZFS: Unable to import pool ${pool}"
-            return 1
-        fi
-    fi
-
-    return 0
+_mount_dataset_cb() {
+    mount -o zfsutil -t zfs "${1}" "${NEWROOT}${2}"
 }
 
 # mount_dataset DATASET
 #   mounts the given zfs dataset.
 mount_dataset() {
-        dataset="${1}"
+    dataset="${1}"
     mountpoint="$(zfs get -H -o value mountpoint "${dataset}")"
+    ret=0
 
     # We need zfsutil for non-legacy mounts and not for legacy mounts.
     if [ "${mountpoint}" = "legacy" ] ; then
-        mount -t zfs "${dataset}" "${NEWROOT}"
+        mount -t zfs "${dataset}" "${NEWROOT}" || ret=$?
     else
-        mount -o zfsutil -t zfs "${dataset}" "${NEWROOT}"
-    fi
+        mount -o zfsutil -t zfs "${dataset}" "${NEWROOT}" || ret=$?
 
-    return $?
-}
-
-# export_all OPTS
-#   exports all imported zfs pools.
-export_all() {
-        opts="${@}"
-    ret=0
-
-    IFS="${NEWLINE}"
-    for pool in $(zpool list -H -o name) ; do
-        if zpool list -H "${pool}" > /dev/null 2>&1; then
-            zpool export "${pool}" ${opts} || ret=$?
+        if [ "$ret" = "0" ]; then
+            for_relevant_root_children "${dataset}" _mount_dataset_cb || ret=$?
         fi
-    done
-    IFS="${OLDIFS}"
+    fi
 
     return ${ret}
 }
 
-# ask_for_password
-#
-# Wraps around plymouth ask-for-password and adds fallback to tty password ask
-# if plymouth is not present.
-#
-# --cmd command
-#   Command to execute. Required.
-# --prompt prompt
-#   Password prompt. Note that function already adds ':' at the end.
-#   Recommended.
-# --tries n
-#   How many times repeat command on its failure.  Default is 3.
-# --ply-[cmd|prompt|tries]
-#   Command/prompt/tries specific for plymouth password ask only.
-# --tty-[cmd|prompt|tries]
-#   Command/prompt/tries specific for tty password ask only.
-# --tty-echo-off
-#   Turn off input echo before tty command is executed and turn on after.
-#   It's useful when password is read from stdin.
-ask_for_password() {
-    ply_tries=3
-    tty_tries=3
-    while [ "$#" -gt 0 ]; do
-        case "$1" in
-            --cmd) ply_cmd="$2"; tty_cmd="$2"; shift;;
-            --ply-cmd) ply_cmd="$2"; shift;;
-            --tty-cmd) tty_cmd="$2"; shift;;
-            --prompt) ply_prompt="$2"; tty_prompt="$2"; shift;;
-            --ply-prompt) ply_prompt="$2"; shift;;
-            --tty-prompt) tty_prompt="$2"; shift;;
-            --tries) ply_tries="$2"; tty_tries="$2"; shift;;
-            --ply-tries) ply_tries="$2"; shift;;
-            --tty-tries) tty_tries="$2"; shift;;
-            --tty-echo-off) tty_echo_off=yes;;
-        esac
-        shift
-    done
+# for_relevant_root_children DATASET EXEC
+#   Runs "EXEC dataset mountpoint" for all children of DATASET that are needed for system bringup
+#   Used by zfs-nonroot-necessities.service and friends, too!
+for_relevant_root_children() {
+    dataset="${1}"
+    exec="${2}"
 
-    { flock -s 9;
-        # Prompt for password with plymouth, if installed and running.
-        if type plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then
-            plymouth ask-for-password \
-                --prompt "$ply_prompt" --number-of-tries="$ply_tries" \
-                --command="$ply_cmd"
-            ret=$?
-        else
-            if [ "$tty_echo_off" = yes ]; then
-                stty_orig="$(stty -g)"
-                stty -echo
-            fi
+    zfs list -t filesystem -Ho name,mountpoint,canmount -r "${dataset}" |
+        (
+            _ret=0
+            while IFS="${TAB}" read -r dataset mountpoint canmount; do
+                [ "$canmount" != "on" ] && continue
 
-            i=1
-            while [ "$i" -le "$tty_tries" ]; do
-                [ -n "$tty_prompt" ] && \
-                    printf "%s [%i/%i]:" "$tty_prompt" "$i" "$tty_tries" >&2
-                eval "$tty_cmd" && ret=0 && break
-                ret=$?
-                i=$((i+1))
-                [ -n "$tty_prompt" ] && printf '\n' >&2
+                case "$mountpoint" in
+                    /etc|/bin|/lib|/lib??|/libx32|/usr)
+                        # If these aren't mounted we may not be able to get to the real init at all, or pollute the dataset holding the rootfs
+                        "${exec}" "${dataset}" "${mountpoint}" || _ret=$?
+                        ;;
+                    *)
+                        # Up to the real init to remount everything else it might need
+                        ;;
+                esac
             done
-            unset i
-            [ "$tty_echo_off" = yes ] && stty "$stty_orig"
-        fi
-    } 9>/.console_lock
+            exit ${_ret}
+        )
+}
 
-    [ $ret -ne 0 ] && echo "Wrong password" >&2
-    return $ret
+# Parse root=, rootfstype=, return them decoded and normalised to zfs:AUTO for auto, plain dset for explicit
+#
+# True if ZFS-on-root, false if we shouldn't
+#
+# Supported values:
+#   root=
+#   root=zfs
+#   root=zfs:
+#   root=zfs:AUTO
+#
+#   root=ZFS=data/set
+#   root=zfs:data/set
+#   root=zfs:ZFS=data/set (as a side-effect; allowed but undocumented)
+#
+#   rootfstype=zfs AND root=data/set <=> root=data/set
+#   rootfstype=zfs AND root=         <=> root=zfs:AUTO
+#
+# '+'es in explicit dataset decoded to ' 's.
+decode_root_args() {
+    if [ -n "$rootfstype" ]; then
+        [ "$rootfstype" = zfs ]
+        return
+    fi
+
+    root=$(getarg root=)
+    rootfstype=$(getarg rootfstype=)
+
+    # shellcheck disable=SC2249
+    case "$root" in
+        ""|zfs|zfs:|zfs:AUTO)
+            root=zfs:AUTO
+            rootfstype=zfs
+            return 0
+            ;;
+
+        ZFS=*|zfs:*)
+            root="${root#zfs:}"
+            root="${root#ZFS=}"
+            root=$(echo "$root" | tr '+' ' ')
+            rootfstype=zfs
+            return 0
+            ;;
+    esac
+
+    if [ "$rootfstype" = "zfs" ]; then
+        case "$root" in
+            "") root=zfs:AUTO ;;
+            *)  root=$(echo "$root" | tr '+' ' ') ;;
+        esac
+        return 0
+    fi
+
+    return 1
 }

diff --git a/zfs/contrib/dracut/90zfs/zfs-load-key.sh.in b/zfs/contrib/dracut/90zfs/zfs-load-key.sh.in
index 85e55c5..d916f43 100755
--- a/zfs/contrib/dracut/90zfs/zfs-load-key.sh.in
+++ b/zfs/contrib/dracut/90zfs/zfs-load-key.sh.in

@@ -1,58 +1,64 @@
 #!/bin/sh
+# shellcheck disable=SC2154
 
 # only run this on systemd systems, we handle the decrypt in mount-zfs.sh in the mount hook otherwise
-[ -e /bin/systemctl ] || return 0
+[ -e /bin/systemctl ] || [ -e /usr/bin/systemctl ] || return 0
 
-# This script only gets executed on systemd systems, see mount-zfs.sh for non-systemd systems
+# shellcheck source=zfs-lib.sh.in
+. /lib/dracut-zfs-lib.sh
 
-# import the libs now that we know the pool imported
-[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh
-[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh
-# shellcheck source=./lib-zfs.sh.in
-. "$dracutlib"
-
-# load the kernel command line vars
-[ -z "$root" ] && root="$(getarg root=)"
-# If root is not ZFS= or zfs: or rootfstype is not zfs then we are not supposed to handle it.
-[ "${root##zfs:}" = "${root}" ] && [ "${root##ZFS=}" = "${root}" ] && [ "$rootfstype" != "zfs" ] && exit 0
+decode_root_args || return 0
 
 # There is a race between the zpool import and the pre-mount hooks, so we wait for a pool to be imported
-while true; do
-    zpool list -H | grep -q -v '^$' && break
-    [ "$(systemctl is-failed zfs-import-cache.service)" = 'failed' ] && exit 1
-    [ "$(systemctl is-failed zfs-import-scan.service)" = 'failed' ] && exit 1
+while ! systemctl is-active --quiet zfs-import.target; do
+    systemctl is-failed --quiet zfs-import-cache.service zfs-import-scan.service && return 1
     sleep 0.1s
 done
 
-# run this after import as zfs-import-cache/scan service is confirmed good
-# we do not overwrite the ${root} variable, but create a new one, BOOTFS, to hold the dataset
-if [ "${root}" = "zfs:AUTO" ] ; then
-    BOOTFS="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')"
-else
-    BOOTFS="${root##zfs:}"
-    BOOTFS="${BOOTFS##ZFS=}"
+BOOTFS="$root"
+if [ "$BOOTFS" = "zfs:AUTO" ]; then
+    BOOTFS="$(zpool get -Ho value bootfs | grep -m1 -vFx -)"
 fi
 
-# if pool encryption is active and the zfs command understands '-o encryption'
-if [ "$(zpool list -H -o feature@encryption $(echo "${BOOTFS}" | awk -F\/ '{print $1}'))" = 'active' ]; then
-    # if the root dataset has encryption enabled
-    ENCRYPTIONROOT=$(zfs get -H -o value encryptionroot "${BOOTFS}")
-    # where the key is stored (in a file or loaded via prompt)
-    KEYLOCATION=$(zfs get -H -o value keylocation "${ENCRYPTIONROOT}")
-    if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
-        KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")"
-        # continue only if the key needs to be loaded
-        [ "$KEYSTATUS" = "unavailable" ] || exit 0
-        # if key is stored in a file, do not prompt
-        if ! [ "${KEYLOCATION}" = "prompt" ]; then
-            zfs load-key "${ENCRYPTIONROOT}"
-        else
-            # decrypt them
-            TRY_COUNT=5
-            while [ $TRY_COUNT -gt 0 ]; do
-                systemd-ask-password "Encrypted ZFS password for ${BOOTFS}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break
-                TRY_COUNT=$((TRY_COUNT - 1))
+[ "$(zpool get -Ho value feature@encryption "${BOOTFS%%/*}")" = 'active' ] || return 0
+
+_load_key_cb() {
+    dataset="$1"
+
+    ENCRYPTIONROOT="$(zfs get -Ho value encryptionroot "${dataset}")"
+    [ "${ENCRYPTIONROOT}" = "-" ] && return 0
+
+    [ "$(zfs get -Ho value keystatus "${ENCRYPTIONROOT}")" = "unavailable" ] || return 0
+
+    KEYLOCATION="$(zfs get -Ho value keylocation "${ENCRYPTIONROOT}")"
+    case "${KEYLOCATION%%://*}" in
+        prompt)
+            for _ in 1 2 3; do
+                systemd-ask-password --no-tty "Encrypted ZFS password for ${dataset}" | zfs load-key "${ENCRYPTIONROOT}" && break
             done
-        fi
-    fi
-fi
+            ;;
+        http*)
+            systemctl start network-online.target
+            zfs load-key "${ENCRYPTIONROOT}"
+            ;;
+        file)
+            KEYFILE="${KEYLOCATION#file://}"
+            [ -r "${KEYFILE}" ] || udevadm settle
+            [ -r "${KEYFILE}" ] || {
+                info "ZFS: Waiting for key ${KEYFILE} for ${ENCRYPTIONROOT}..."
+                for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do
+                    sleep 0.5s
+                    [ -r "${KEYFILE}" ] && break
+                done
+            }
+            [ -r "${KEYFILE}" ] || warn "ZFS: Key ${KEYFILE} for ${ENCRYPTIONROOT} hasn't appeared. Trying anyway."
+            zfs load-key "${ENCRYPTIONROOT}"
+            ;;
+        *)
+            zfs load-key "${ENCRYPTIONROOT}"
+            ;;
+    esac
+}
+
+_load_key_cb "$BOOTFS"
+for_relevant_root_children "$BOOTFS" _load_key_cb

diff --git a/zfs/contrib/dracut/90zfs/zfs-needshutdown.sh.in b/zfs/contrib/dracut/90zfs/zfs-needshutdown.sh.in
index e3d1b59..7fb825b 100755
--- a/zfs/contrib/dracut/90zfs/zfs-needshutdown.sh.in
+++ b/zfs/contrib/dracut/90zfs/zfs-needshutdown.sh.in

@@ -1,8 +1,8 @@
 #!/bin/sh
 
-type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
+command -v getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
 
-if zpool list 2>&1 | grep -q 'no pools available' ; then
+if [ -z "$(zpool get -Ho value name)" ]; then
     info "ZFS: No active pools, no need to export anything."
 else
     info "ZFS: There is an active pool, will export it."

diff --git a/zfs/contrib/dracut/90zfs/zfs-nonroot-necessities.service.in b/zfs/contrib/dracut/90zfs/zfs-nonroot-necessities.service.in
new file mode 100644
index 0000000..8f420c7
--- /dev/null
+++ b/zfs/contrib/dracut/90zfs/zfs-nonroot-necessities.service.in

@@ -0,0 +1,20 @@
+[Unit]
+Before=initrd-root-fs.target
+After=sysroot.mount
+DefaultDependencies=no
+ConditionEnvironment=BOOTFS
+
+[Service]
+Type=oneshot
+PassEnvironment=BOOTFS
+ExecStart=/bin/sh -c '                                                \
+    . /lib/dracut-zfs-lib.sh;                                         \
+    _zfs_nonroot_necessities_cb() {                                   \
+        @sbindir@/zfs mount | grep -m1 -q "^$1 " && return 0;         \
+        echo "Mounting $1 on /sysroot$2";                             \
+        mount -o zfsutil -t zfs "$1" "/sysroot$2";                    \
+    };                                                                \
+    for_relevant_root_children "${BOOTFS}" _zfs_nonroot_necessities_cb'
+
+[Install]
+RequiredBy=initrd-root-fs.target

diff --git a/zfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in b/zfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in
new file mode 100644
index 0000000..68fdcb1
--- /dev/null
+++ b/zfs/contrib/dracut/90zfs/zfs-rollback-bootfs.service.in

@@ -0,0 +1,13 @@
+[Unit]
+Description=Rollback bootfs just before it is mounted
+Requisite=zfs-import.target
+After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
+Before=dracut-mount.service
+DefaultDependencies=no
+ConditionKernelCommandLine=bootfs.rollback
+ConditionEnvironment=BOOTFS
+
+[Service]
+Type=oneshot
+ExecStart=/bin/sh -c '. /lib/dracut-lib.sh; SNAPNAME="$(getarg bootfs.rollback)"; exec @sbindir@/zfs rollback -Rf "$BOOTFS@${SNAPNAME:-%v}"'
+RemainAfterExit=yes

diff --git a/zfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in b/zfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in
new file mode 100644
index 0000000..a675b5b
--- /dev/null
+++ b/zfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in

@@ -0,0 +1,13 @@
+[Unit]
+Description=Snapshot bootfs just before it is mounted
+Requisite=zfs-import.target
+After=zfs-import.target dracut-pre-mount.service
+Before=dracut-mount.service
+DefaultDependencies=no
+ConditionKernelCommandLine=bootfs.snapshot
+ConditionEnvironment=BOOTFS
+
+[Service]
+Type=oneshot
+ExecStart=-/bin/sh -c '. /lib/dracut-lib.sh; SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$BOOTFS@${SNAPNAME:-%v}"'
+RemainAfterExit=yes

diff --git a/zfs/contrib/dracut/Makefile.am b/zfs/contrib/dracut/Makefile.am
index 1065e5e..8c9a6be 100644
--- a/zfs/contrib/dracut/Makefile.am
+++ b/zfs/contrib/dracut/Makefile.am

@@ -1,3 +1,6 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 SUBDIRS = 02zfsexpandknowledge 90zfs
+SHELLCHECKDIRS = $(SUBDIRS)
 
 EXTRA_DIST = README.dracut.markdown

diff --git a/zfs/contrib/dracut/README.dracut.markdown b/zfs/contrib/dracut/README.dracut.markdown
index b5fb288..b7cd8c8 100644
--- a/zfs/contrib/dracut/README.dracut.markdown
+++ b/zfs/contrib/dracut/README.dracut.markdown

@@ -1,201 +1,50 @@
-How to setup a zfs root filesystem using dracut
------------------------------------------------
+## Basic setup
+1. Install `zfs-dracut`
+2. Set `mountpoint=/` for your root dataset (for compatibility, `legacy` also works, but is not recommended for new installations):
+    ```sh
+    zfs set mountpoint=/ pool/dataset
+    ```
+3. Either (a) set `bootfs=` on the pool to the dataset:
+    ```sh
+    zpool set bootfs=pool/dataset pool
+    ```
+4. Or (b) append `root=zfs:pool/dataset` to your kernel cmdline.
+5. Re-generate your initrd and update it in your boot bundle
 
-1) Install the zfs-dracut package.  This package adds a zfs dracut module
-to the /usr/share/dracut/modules.d/ directory which allows dracut to
-create an initramfs which is zfs aware.
+Encrypted datasets have keys loaded automatically or prompted for.
 
-2) Set the bootfs property for the bootable dataset in the pool.  Then set
-the dataset mountpoint property to '/'.
+If the root dataset contains children with `mountpoint=`s of `/etc`, `/bin`, `/lib*`, or `/usr`, they're mounted too.
 
-    $ zpool set bootfs=pool/dataset pool
-    $ zfs set mountpoint=/ pool/dataset
+For complete documentation, see `dracut.zfs(7)`.
 
-Alternately, legacy mountpoints can be used by setting the 'root=' option
-on the kernel line of your grub.conf/menu.lst configuration file.  Then
-set the dataset mountpoint property to 'legacy'.
+## cmdline
+1. `root=`                    | Root dataset is…                                         |
+   ---------------------------|----------------------------------------------------------|
+   *(empty)*                  | the first `bootfs=` after `zpool import -aN`             |
+   `zfs:AUTO`, `zfs:`, `zfs`  | *(as above, but overriding other autoselection methods)* |
+   `ZFS=pool/dataset`         | `pool/dataset`                                           |
+   `zfs:pool/dataset`         | *(as above)*                                             |
 
-    $ grub.conf/menu.lst: kernel ... root=ZFS=pool/dataset
-    $ zfs set mountpoint=legacy pool/dataset
+   All `+`es are replaced with spaces (i.e. to boot from `root pool/data set`, pass `root=zfs:root+pool/data+set`).
 
-3) To set zfs module options put them in /etc/modprobe.d/zfs.conf file.
-The complete list of zfs module options is available by running the
-_modinfo zfs_ command.  Commonly set options include: zfs_arc_min,
-zfs_arc_max, zfs_prefetch_disable, and zfs_vdev_max_pending.
+   The dataset can be at any depth, including being the pool's root dataset (i.e. `root=zfs:pool`).
 
-4) Finally, create your new initramfs by running dracut.
+   `rootfstype=zfs` is equivalent to `root=zfs:AUTO`, `rootfstype=zfs root=pool/dataset` is equivalent to `root=zfs:pool/dataset`.
 
-    $ dracut --force /path/to/initramfs kernel_version
+2. `spl_hostid`: passed to `zgenhostid -f`, useful to override the `/etc/hostid` file baked into the initrd.
 
-Kernel Command Line
--------------------
+3. `bootfs.snapshot`, `bootfs.snapshot=snapshot-name`: enables `zfs-snapshot-bootfs.service`,
+   which creates a snapshot `$root_dataset@$(uname -r)` (or, in the second form, `$root_dataset@snapshot-name`)
+   after pool import but before the rootfs is mounted.
+   Failure to create the snapshot is noted, but booting continues.
 
-The initramfs' behavior is influenced by the following kernel command line
-parameters passed in from the boot loader:
+4. `bootfs.rollback`, `bootfs.rollback=snapshot-name`: enables `zfs-snapshot-bootfs.service`,
+   which `-Rf` rolls back to `$root_dataset@$(uname -r)` (or, in the second form, `$root_dataset@snapshot-name`)
+   after pool import but before the rootfs is mounted.
+   Failure to roll back will fall down to the rescue shell.
+   This has obvious potential for data loss: make sure your persistent data is not below the rootfs and you don't care about any intermediate snapshots.
 
-* `root=...`: If not set, importable pools are searched for a bootfs
-attribute.  If an explicitly set root is desired, you may use
-`root=ZFS:pool/dataset`
+5. If both `bootfs.snapshot` and `bootfs.rollback` are set, `bootfs.rollback` is ordered *after* `bootfs.snapshot`.
 
-* `zfs_force=0`: If set to 1, the initramfs will run `zpool import -f` when
-attempting to import pools if the required pool isn't automatically imported
-by the zfs module.  This can save you a trip to a bootcd if hostid has
-changed, but is dangerous and can lead to zpool corruption, particularly in
-cases where storage is on a shared fabric such as iSCSI where multiple hosts
-can access storage devices concurrently.  _Please understand the implications
-of force-importing a pool before enabling this option!_
-
-* `spl_hostid`: By default, the hostid used by the SPL module is read from
-/etc/hostid inside the initramfs.  This file is placed there from the host
-system when the initramfs is built which effectively ties the ramdisk to the
-host which builds it.  If a different hostid is desired, one may be set in
-this attribute and will override any file present in the ramdisk.  The
-format should be hex exactly as found in the `/etc/hostid` file, IE
-`spl_hostid=0x00bab10c`.
-
-Note that changing the hostid between boots will most likely lead to an
-un-importable pool since the last importing hostid won't match.  In order
-to recover from this, you may use the `zfs_force` option or boot from a
-different filesystem and `zpool import -f` then `zpool export` the pool
-before rebooting with the new hostid.
-
-How it Works
-============
-
-The Dracut module consists of the following files (less Makefile's):
-
-* `module-setup.sh`: Script run by the initramfs builder to create the
-ramdisk.  Contains instructions on which files are required by the modules
-and z* programs.  Also triggers inclusion of `/etc/hostid` and the zpool
-cache.  This file is not included in the initramfs.
-
-* `90-zfs.rules`: udev rules which trigger loading of the ZFS modules at boot.
-
-* `zfs-lib.sh`: Utility functions used by the other files.
-
-* `parse-zfs.sh`: Run early in the initramfs boot process to parse kernel
-command line and determine if ZFS is the active root filesystem.
-
-* `mount-zfs.sh`: Run later in initramfs boot process after udev has settled
-to mount the root dataset.
-
-* `export-zfs.sh`: Run on shutdown after dracut has restored the initramfs
-and pivoted to it, allowing for a clean unmount and export of the ZFS root.
-
-`zfs-lib.sh`
-------------
-
-This file provides a few handy functions for working with ZFS. Those
-functions are used by the `mount-zfs.sh` and `export-zfs.sh` files.
-However, they could be used by any other file as well, as long as the file
-sources `/lib/dracut-zfs-lib.sh`.
-
-`module-setup.sh`
------------------
-
-This file is run by the Dracut script within the live system, not at boot
-time.  It's not included in the final initramfs.  Functions in this script
-describe which files are needed by ZFS at boot time.
-
-Currently all the various z* and spl modules are included, a dependency is
-asserted on udev-rules, and the various zfs, zpool, etc. helpers are included.
-Dracut provides library functions which automatically gather the shared libs
-necessary to run each of these binaries, so statically built binaries are
-not required.
-
-The zpool and zvol udev rules files are copied from where they are
-installed by the ZFS build.  __PACKAGERS TAKE NOTE__: If you move
-`/etc/udev/rules/60-z*.rules`, you'll need to update this file to match.
-
-Currently this file also includes `/etc/hostid` and `/etc/zfs/zpool.cache`
-which means the generated ramdisk is specific to the host system which built
-it.  If a generic initramfs is required, it may be preferable to omit these
-files and specify the `spl_hostid` from the boot loader instead.
-
-`parse-zfs.sh`
---------------
-
-Run during the cmdline phase of the initramfs boot process, this script
-performs some basic sanity checks on kernel command line parameters to
-determine if booting from ZFS is likely to be what is desired.  Dracut
-requires this script to adjust the `root` variable if required and to set
-`rootok=1` if a mountable root filesystem is available.  Unfortunately this
-script must run before udev is settled and kernel modules are known to be
-loaded, so accessing the zpool and zfs commands is unsafe.
-
-If the root=ZFS... parameter is set on the command line, then it's at least
-certain that ZFS is what is desired, though this script is unable to
-determine if ZFS is in fact available.  This script will alter the `root`
-parameter to replace several historical forms of specifying the pool and
-dataset name with the canonical form of `zfs:pool/dataset`.
-
-If no root= parameter is set, the best this script can do is guess that
-ZFS is desired.  At present, no other known filesystems will work with no
-root= parameter, though this might possibly interfere with using the
-compiled-in default root in the kernel image.  It's considered unlikely
-that would ever be the case when an initramfs is in use, so this script
-sets `root=zfs:AUTO` and hopes for the best.
-
-Once the root=... (or lack thereof) parameter is parsed, a dummy symlink
-is created from `/dev/root` -> `/dev/null` to satisfy parts of the Dracut
-process which check for presence of a single root device node.
-
-Finally, an initqueue/finished hook is registered which causes the initqueue
-phase of Dracut to wait for `/dev/zfs` to become available before attempting
-to mount anything.
-
-`mount-zfs.sh`
---------------
-
-This script is run after udev has settled and all tasks in the initqueue
-have succeeded.  This ensures that `/dev/zfs` is available and that the
-various ZFS modules are successfully loaded.  As it is now safe to call
-zpool and friends, we can proceed to find the bootfs attribute if necessary.
-
-If the root parameter was explicitly set on the command line, no parsing is
-necessary.  The list of imported pools is checked to see if the desired pool
-is already imported.  If it's not, and attempt is made to import the pool
-explicitly, though no force is attempted.  Finally the specified dataset
-is mounted on `$NEWROOT`, first using the `-o zfsutil` option to handle
-non-legacy mounts, then if that fails, without zfsutil to handle legacy
-mount points.
-
-If no root parameter was specified, this script attempts to find a pool with
-its bootfs attribute set.  First, already-imported pools are scanned and if
-an appropriate pool is found, no additional pools are imported.  If no pool
-with bootfs is found, any additional pools in the system are imported with
-`zpool import -N -a`, and the scan for bootfs is tried again.  If no bootfs
-is found with all pools imported, all pools are re-exported, and boot fails.
-Assuming a bootfs is found, an attempt is made to mount it to `$NEWROOT`,
-first with, then without the zfsutil option as above.
-
-Ordinarily pools are imported _without_ the force option which may cause
-boot to fail if the hostid has changed or a pool has been physically moved
-between servers.  The `zfs_force` kernel parameter is provided which when
-set to `1` causes `zpool import` to be run with the `-f` flag.  Forcing pool
-import can lead to serious data corruption and loss of pools, so this option
-should be used with extreme caution.  Note that even with this flag set, if
-the required zpool was auto-imported by the kernel module, no additional
-`zpool import` commands are run, so nothing is forced.
-
-`export-zfs.sh`
----------------
-
-Normally the zpool containing the root dataset cannot be exported on
-shutdown as it is still in use by the init process. To work around this,
-Dracut is able to restore the initramfs on shutdown and pivot to it.
-All remaining process are then running from a ramdisk, allowing for a
-clean unmount and export of the ZFS root. The theory of operation is
-described in detail in the [Dracut manual](https://www.kernel.org/pub/linux/utils/boot/dracut/dracut.html#_dracut_on_shutdown).
-
-This script will try to export all remaining zpools after Dracut has
-pivoted to the initramfs. If an initial regular export is not successful,
-Dracut will call this script once more with the `final` option,
-in which case a forceful export is attempted.
-
-Other Dracut modules include similar shutdown scripts and Dracut
-invokes these scripts round-robin until they succeed. In particular,
-the `90dm` module installs a script which tries to close and remove
-all device mapper targets. Thus, if there are ZVOLs containing
-dm-crypt volumes or if the zpool itself is backed by a dm-crypt
-volume, the shutdown scripts will try to untangle this.
+6. `zfs_force`, `zfs.force`, `zfsforce`: add `-f` to all `zpool import` invocations.
+   May be useful. Use with caution.

diff --git a/zfs/contrib/initramfs/Makefile.am b/zfs/contrib/initramfs/Makefile.am
index 849b1d8..931ceb1 100644
--- a/zfs/contrib/initramfs/Makefile.am
+++ b/zfs/contrib/initramfs/Makefile.am

@@ -1,23 +1,12 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 initrddir = /usr/share/initramfs-tools
 
-initrd_SCRIPTS = \
-	conf.d/zfs conf-hooks.d/zfs hooks/zfs scripts/zfs scripts/local-top/zfs
+dist_initrd_SCRIPTS = \
+       zfsunlock
 
-SUBDIRS = hooks scripts
+SUBDIRS = conf.d conf-hooks.d hooks scripts
+SHELLCHECKDIRS = hooks scripts
 
 EXTRA_DIST = \
-	$(top_srcdir)/contrib/initramfs/conf.d/zfs \
-	$(top_srcdir)/contrib/initramfs/conf-hooks.d/zfs \
-	$(top_srcdir)/contrib/initramfs/README.initramfs.markdown
-
-install-initrdSCRIPTS: $(EXTRA_DIST)
-	for d in conf.d conf-hooks.d scripts/local-top; do \
-		$(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \
-		cp $(top_srcdir)/contrib/initramfs/$$d/zfs \
-		    $(DESTDIR)$(initrddir)/$$d/; \
-	done
-	for d in hooks scripts; do \
-		$(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \
-		cp $(top_builddir)/contrib/initramfs/$$d/zfs \
-		    $(DESTDIR)$(initrddir)/$$d/; \
-	done
+	README.initramfs.markdown

diff --git a/zfs/contrib/initramfs/README.initramfs.markdown b/zfs/contrib/initramfs/README.initramfs.markdown
index fa19f00..be3ec71 100644
--- a/zfs/contrib/initramfs/README.initramfs.markdown
+++ b/zfs/contrib/initramfs/README.initramfs.markdown

@@ -1,94 +1,84 @@
-DESCRIPTION
-  These scripts are intended to be used with initramfs-tools, which is a similar
-  software product to "dracut" (which is used in RedHat based distributions),
-  and is mainly used by Debian GNU/Linux and derivatives to create an initramfs
-  so that the system can be booted off a ZFS filesystem. If you have no need or
-  interest in this, then it can safely be ignored.
+## Description
 
-  These script were written with the primary intention of being portable and
-  usable on as many systems as possible.
+These scripts are intended to be used with `initramfs-tools`, which is a
+similar software product to `dracut` (which is used in Red Hat based
+distributions), and is mainly used by Debian GNU/Linux and derivatives.
 
-  This is, in practice, usually not possible. But the intention is there.
-  And it is a good one.
+These scripts share some common functionality with the SysV init scripts,
+primarily the `/etc/zfs/zfs-functions` script.
 
-  They have been tested successfully on:
+## Configuration
 
-    * Debian GNU/Linux Wheezy
-    * Debian GNU/Linux Jessie
+### Root pool/filesystem
 
-  It uses some functionality common with the SYSV init scripts, primarily
-  the "/etc/zfs/zfs-functions" script.
+Different distributions have their own standard on what to specify on the
+kernel command line to boot off a ZFS filesystem.
 
-FUNCTIONALITY
-  * Supports booting of a ZFS snapshot.
-    Do this by cloning the snapshot into a dataset. If this, the resulting
-    dataset, already exists, destroy it. Then mount it as the root filesystem.
-    * If snapshot does not exist, use base dataset (the part before '@')
-      as boot filesystem instead.
-    * Clone with 'mountpoint=none' and 'canmount=noauto' - we mount manually
-      and explicitly.
-    * Allow rollback of snapshots instead of clone it and boot from the clone.
-    * If no snapshot is specified on the 'root=' kernel command line, but
-      there is an '@', then get a list of snapshots below that filesystem
-      and ask the user which to use.
+This script supports the following kernel command line argument combinations
+(in this order - first match wins):
 
-  * Support all currently used kernel command line arguments
-    * Core options:
-      All the different distributions have their own standard on what to specify
-      on the kernel command line to boot of a ZFS filesystem.
+* `rpool=<pool>`
+* `bootfs=<pool>/<dataset>`
+* `rpool=<pool> bootfs=<pool>/<dataset>`
+* `-B zfs-bootfs=<pool>/<fs>`
+* `root=<pool>/<dataset>`
+* `root=ZFS=<pool>/<dataset>`
+* `root=zfs:AUTO`
+* `root=zfs:<pool>/<dataset>`
+* `rpool=rpool`
 
-      Supports the following kernel command line argument combinations
-      (in this order - first match win):
-      * rpool=<pool>			(tries to finds bootfs automatically)
-      * bootfs=<pool>/<dataset>		(uses this for rpool - first part)
-      * rpool=<pool> bootfs=<pool>/<dataset>
-      * -B zfs-bootfs=<pool>/<fs>	(uses this for rpool - first part)
-      * rpool=rpool			(default if none of the above is used)
-      * root=<pool>/<dataset>		(uses this for rpool - first part)
-      * root=ZFS=<pool>/<dataset>	(uses this for rpool - first part, without 'ZFS=')
-      * root=zfs:AUTO			(tries to detect both pool and rootfs
-      * root=zfs:<pool>/<dataset>	(uses this for rpool - first part, without 'zfs:')
+If a pool is specified, it will be used.  Otherwise, in `AUTO` mode, all pools
+will be searched.  Pools may be excluded from the search by listing them in
+`ZFS_POOL_EXCEPTIONS` in `/etc/default/zfs`.
 
-      Option <dataset> could also be <snapshot>
-    * Extra (control) options:
-      * zfsdebug=(on,yes,1)   Show extra debugging information
-      * zfsforce=(on,yes,1)   Force import the pool
-      * rollback=(on,yes,1)   Rollback (instead of clone) the snapshot
+Pools will be imported as follows:
 
-  * 'Smarter' way to import pools. Don't just try cache file or /dev.
-    * Try to use /dev/disk/by-vdev (if /etc/zfs/vdev_id.conf exists),
-    * Try /dev/mapper (to be able to use LUKS backed pools as well as
-      multi-path devices).
-    * /dev/disk/by-id and any other /dev/disk/by-* directory that may exist.
-    * Use /dev as a last ditch attempt.
-    * Fallback to using the cache file if that exist if nothing else worked.
-    * Only try to import pool if it haven't already been imported
-      * This will negate the need to force import a pool that have not been
-        exported cleanly.
-      * Support exclusion of pools to import by setting ZFS_POOL_EXCEPTIONS
-         in /etc/default/zfs.
+* Try `/dev/disk/by-vdev` if it exists; see `/etc/zfs/vdev_id.conf`.
+* Try `/dev/disk/by-id` and any other `/dev/disk/by-*` directories.
+* Try `/dev`.
+* Use the cache file if nothing else worked.
 
-    Controlling in which order devices is searched for is controlled by
-    ZPOOL_IMPORT_PATH variable set in /etc/defaults/zfs.
+This order may be modified by setting `ZPOOL_IMPORT_PATH` in
+`/etc/default/zfs`.
 
-  * Support additional configuration variable ZFS_INITRD_ADDITIONAL_DATASETS
-    to mount additional filesystems not located under your root dataset.
+If a dataset is specified, it will be used as the root filesystem.  Otherwise,
+this script will attempt to find a root filesystem automatically (in the
+specified pool or all pools, as described above).
 
-    For example, if the root fs is specified as 'rpool/ROOT/rootfs', it will
-    automatically and without specific configuration mount any filesystems
-    below this on the mount point specified in the 'mountpoint' property.
-    Such as 'rpool/root/rootfs/var', 'rpool/root/rootfs/usr' etc)
+Filesystems below the root filesystem will be automatically mounted with no
+additional configuration necessary.  For example, if the root filesystem is
+`rpool/ROOT/rootfs`, `rpool/root/rootfs/var`, `rpool/root/rootfs/usr`, etc.
+will be mounted (if they exist).
 
-    However, if one prefer to have separate filesystems, not located below
-    the root fs (such as 'rpool/var', 'rpool/ROOT/opt' etc), special
-    configuration needs to be done. This is what the variable, set in
-    /etc/defaults/zfs file, needs to be configured. The 'mountpoint'
-    property needs to be correct for this to work though.
+### Snapshots
 
-  * Allows mounting a rootfs with mountpoint=legacy set.
+The `<dataset>` can be a snapshot.  In this case, the snapshot will be cloned
+and the clone used as the root filesystem.  Note:
 
-  * Include /etc/modprobe.d/{zfs,spl}.conf in the initrd if it/they exist.
+* If the snapshot does not exist, the base dataset (the part before `@`) is
+  used as the boot filesystem instead.
+* If the resulting clone dataset already exists, it is destroyed.
+* The clone is created with `mountpoint=none` and `canmount=noauto`.  The root
+  filesystem is mounted manually by the initramfs script.
+* If no snapshot is specified on the `root=` kernel command line, but
+  there is an `@`, the user will be prompted to choose a snapshot to use.
 
-  * Include the udev rule to use by-vdev for pool imports.
+### Extra options
 
-  * Include the /etc/default/zfs file to the initrd.
+The following kernel command line arguments are supported:
+
+* `zfsdebug=(on,yes,1)`: Show extra debugging information
+* `zfsforce=(on,yes,1)`: Force import the pool
+* `rollback=(on,yes,1)`: Rollback to (instead of clone) the snapshot
+
+### Unlocking a ZFS encrypted root over SSH
+
+To use this feature:
+
+1. Install the `dropbear-initramfs` package.  You may wish to uninstall the
+   `cryptsetup-initramfs` package to avoid warnings.
+2. Add your SSH key(s) to `/etc/dropbear-initramfs/authorized_keys`.  Note
+   that Dropbear does not support ed25519 keys; use RSA (2048-bit or more)
+   instead.
+3. Rebuild the initramfs with your keys: `update-initramfs -u`
+4. During the system boot, login via SSH and run: `zfsunlock`

diff --git a/zfs/contrib/initramfs/conf-hooks.d/Makefile.am b/zfs/contrib/initramfs/conf-hooks.d/Makefile.am
new file mode 100644
index 0000000..f84ba5c
--- /dev/null
+++ b/zfs/contrib/initramfs/conf-hooks.d/Makefile.am

@@ -0,0 +1,4 @@
+confhooksddir = /usr/share/initramfs-tools/conf-hooks.d
+
+dist_confhooksd_DATA = \
+	zfs

diff --git a/zfs/contrib/initramfs/conf.d/Makefile.am b/zfs/contrib/initramfs/conf.d/Makefile.am
new file mode 100644
index 0000000..5ef27e0
--- /dev/null
+++ b/zfs/contrib/initramfs/conf.d/Makefile.am

@@ -0,0 +1,4 @@
+confddir = /usr/share/initramfs-tools/conf.d
+
+dist_confd_DATA = \
+	zfs

diff --git a/zfs/contrib/initramfs/hooks/.gitignore b/zfs/contrib/initramfs/hooks/.gitignore
index 73304bc..4e1604e 100644
--- a/zfs/contrib/initramfs/hooks/.gitignore
+++ b/zfs/contrib/initramfs/hooks/.gitignore

@@ -1 +1,2 @@
 zfs
+zfsunlock

diff --git a/zfs/contrib/initramfs/hooks/Makefile.am b/zfs/contrib/initramfs/hooks/Makefile.am
index 3d8ef62..0cd1aaf 100644
--- a/zfs/contrib/initramfs/hooks/Makefile.am
+++ b/zfs/contrib/initramfs/hooks/Makefile.am

@@ -1,23 +1,10 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
 hooksdir = /usr/share/initramfs-tools/hooks
 
 hooks_SCRIPTS = \
-	zfs
+	zfs \
+	zfsunlock
 
-EXTRA_DIST = \
-	$(top_srcdir)/contrib/initramfs/hooks/zfs.in
-
-$(hooks_SCRIPTS):%:%.in
-	-$(SED) -e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		-e 's,@udevdir\@,$(udevdir),g' \
-		-e 's,@udevruledir\@,$(udevruledir),g' \
-		-e 's,@mounthelperdir\@,$(mounthelperdir),g' \
-		$< >'$@'
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-clean-local::
-	-$(RM) $(hooks_SCRIPTS)
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(hooks_SCRIPTS)
+SUBSTFILES += $(hooks_SCRIPTS)

diff --git a/zfs/contrib/initramfs/hooks/zfs.in b/zfs/contrib/initramfs/hooks/zfs.in
index 15f23c9..546841e 100755
--- a/zfs/contrib/initramfs/hooks/zfs.in
+++ b/zfs/contrib/initramfs/hooks/zfs.in

@@ -1,108 +1,57 @@
 #!/bin/sh
 #
-# Add ZoL filesystem capabilities to an initrd, usually for a native ZFS root.
+# Add OpenZFS filesystem capabilities to an initrd, usually for a native ZFS root.
 #
 
-# This hook installs udev rules for ZoL.
-PREREQ="udev"
-
-# These prerequisites are provided by the zfsutils package. The zdb utility is
-# not strictly required, but it can be useful at the initramfs recovery prompt.
-COPY_EXEC_LIST="@sbindir@/zdb @sbindir@/zpool @sbindir@/zfs"
-COPY_EXEC_LIST="$COPY_EXEC_LIST @mounthelperdir@/mount.zfs @udevdir@/vdev_id"
-COPY_EXEC_LIST="$COPY_EXEC_LIST @udevdir@/zvol_id"
-COPY_FILE_LIST="/etc/hostid @sysconfdir@/zfs/zpool.cache"
-COPY_FILE_LIST="$COPY_FILE_LIST @DEFAULT_INITCONF_DIR@/zfs"
-COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/zfs/zfs-functions"
-COPY_FILE_LIST="$COPY_FILE_LIST @sysconfdir@/zfs/vdev_id.conf"
-COPY_FILE_LIST="$COPY_FILE_LIST @udevruledir@/60-zvol.rules"
-COPY_FILE_LIST="$COPY_FILE_LIST @udevruledir@/69-vdev.rules"
-
-# These prerequisites are provided by the base system.
-COPY_EXEC_LIST="$COPY_EXEC_LIST /usr/bin/dirname /bin/hostname /sbin/blkid"
-COPY_EXEC_LIST="$COPY_EXEC_LIST /usr/bin/env"
-
-# Explicitly specify all kernel modules because automatic dependency resolution
-# is unreliable on many systems.
-BASE_MODULES="zlib_deflate spl zavl zcommon znvpair zunicode zlua zfs icp"
-CRPT_MODULES="sun-ccm sun-gcm sun-ctr"
-MANUAL_ADD_MODULES_LIST="$BASE_MODULES"
-
-# Generic result code.
-RC=0
-
-case $1 in
-prereqs)
-	echo "$PREREQ"
-	exit 0
-	;;
-esac
-
-for ii in $COPY_EXEC_LIST
-do
-	if [ ! -x "$ii" ]
-	then
-		echo "Error: $ii is not executable."
-		RC=2
-	fi
-done
-
-if [ "$RC" -ne 0 ]
-then
-	exit "$RC"
+if [ "$1" = "prereqs" ]; then
+	echo "udev"
+	exit
 fi
 
 . /usr/share/initramfs-tools/hook-functions
 
-mkdir -p "$DESTDIR/etc/"
-
-# ZDB uses pthreads for some functions, but the library dependency is not
-# automatically detected. The `find` utility and extended `cp` options are
-# used here because libgcc_s.so could be in a subdirectory of /lib for
-# multi-arch installations.
-cp --target-directory="$DESTDIR" --parents $(find /lib/ -type f -name libgcc_s.so.1)
-
-for ii in $COPY_EXEC_LIST
-do
-	copy_exec "$ii"
+for req in "@sbindir@/zpool" "@sbindir@/zfs" "@mounthelperdir@/mount.zfs"; do
+	copy_exec "$req" || {
+		echo "$req not available!" >&2
+		exit 2
+	}
 done
 
-for ii in $COPY_FILE_LIST
-do
-	dir=$(dirname "$ii")
-	[ -d "$dir" ] && mkdir -p "$DESTDIR/$dir"
-	[ -f "$ii" ] && cp -p "$ii" "$DESTDIR/$ii"
+copy_exec "@sbindir@/zdb"
+copy_exec "@udevdir@/vdev_id"
+copy_exec "@udevdir@/zvol_id"
+if command -v systemd-ask-password > /dev/null; then
+	copy_exec "$(command -v systemd-ask-password)"
+fi
+
+# We use pthreads, but i-t from buster doesn't automatically
+# copy this indirect dependency: this can be removed when buster finally dies.
+find /lib/ -type f -name "libgcc_s.so.[1-9]" | while read -r libgcc; do
+	copy_exec "$libgcc"
 done
 
-for ii in $MANUAL_ADD_MODULES_LIST
-do
-	manual_add_modules "$ii"
-done
+# shellcheck disable=SC2050
+if [ @LIBFETCH_DYNAMIC@ -gt 0 ]; then
+	find /lib/ -name "@LIBFETCH_SONAME@" | while read -r libfetch; do
+		copy_exec "$libfetch"
+	done
+fi
 
-if [ -f "/etc/hostname" ]
-then
-	cp -p "/etc/hostname" "$DESTDIR/etc/"
+copy_file config "/etc/hostid"
+copy_file cache  "@sysconfdir@/zfs/zpool.cache"
+copy_file config "@initconfdir@/zfs"
+copy_file config "@sysconfdir@/zfs/zfs-functions"
+copy_file config "@sysconfdir@/zfs/vdev_id.conf"
+copy_file rule   "@udevruledir@/60-zvol.rules"
+copy_file rule   "@udevruledir@/69-vdev.rules"
+
+manual_add_modules zfs
+
+if [ -f "/etc/hostname" ]; then
+	copy_file config "/etc/hostname"
 else
-	hostname >"$DESTDIR/etc/hostname"
+	hostname="$(mktemp -t hostname.XXXXXXXXXX)"
+	hostname > "$hostname"
+	copy_file config "$hostname" "/etc/hostname"
+	rm -f "$hostname"
 fi
-
-for ii in zfs zfs.conf spl spl.conf
-do
-	if [ -f "/etc/modprobe.d/$ii" ]; then
-		if [ ! -d "$DESTDIR/etc/modprobe.d" ]; then
-			mkdir -p $DESTDIR/etc/modprobe.d
-		fi
-		cp -p "/etc/modprobe.d/$ii" $DESTDIR/etc/modprobe.d/
-	fi
-done
-
-# With pull request #1476 (not yet merged) comes a verbose warning
-# if /usr/bin/net doesn't exist or isn't executable. Just create
-# a dummy...
-[ ! -d "$DESTDIR/usr/bin" ] && mkdir -p "$DESTDIR/usr/bin"
-if [ ! -x "$DESTDIR/usr/bin/net" ]; then
-    touch "$DESTDIR/usr/bin/net"
-    chmod +x "$DESTDIR/usr/bin/net"
-fi
-
-exit 0

diff --git a/zfs/contrib/initramfs/hooks/zfsunlock.in b/zfs/contrib/initramfs/hooks/zfsunlock.in
new file mode 100644
index 0000000..4776087
--- /dev/null
+++ b/zfs/contrib/initramfs/hooks/zfsunlock.in

@@ -0,0 +1,10 @@
+#!/bin/sh
+
+if [ "$1" = "prereqs" ]; then
+	echo "dropbear"
+	exit
+fi
+
+. /usr/share/initramfs-tools/hook-functions
+
+copy_exec /usr/share/initramfs-tools/zfsunlock /usr/bin/zfsunlock

diff --git a/zfs/contrib/initramfs/scripts/Makefile.am b/zfs/contrib/initramfs/scripts/Makefile.am
index 2a14209..5bcbfb9 100644
--- a/zfs/contrib/initramfs/scripts/Makefile.am
+++ b/zfs/contrib/initramfs/scripts/Makefile.am

@@ -1,6 +1,12 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 scriptsdir = /usr/share/initramfs-tools/scripts
 
-dist_scripts_DATA = \
+dist_scripts_SCRIPTS = \
 	zfs
 
 SUBDIRS = local-top
+
+SHELLCHECK_IGNORE = ,SC2295
+SHELLCHECKDIRS = $(SUBDIRS)
+SHELLCHECK_SHELL = sh

diff --git a/zfs/contrib/initramfs/scripts/local-top/Makefile.am b/zfs/contrib/initramfs/scripts/local-top/Makefile.am
index c820325..897f9b2 100644
--- a/zfs/contrib/initramfs/scripts/local-top/Makefile.am
+++ b/zfs/contrib/initramfs/scripts/local-top/Makefile.am

@@ -1,3 +1,6 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 localtopdir = /usr/share/initramfs-tools/scripts/local-top
 
-EXTRA_DIST = zfs
+dist_localtop_SCRIPTS = \
+        zfs

diff --git a/zfs/contrib/initramfs/scripts/local-top/zfs b/zfs/contrib/initramfs/scripts/local-top/zfs
index e8e5cd2..6b80e9f 100755
--- a/zfs/contrib/initramfs/scripts/local-top/zfs
+++ b/zfs/contrib/initramfs/scripts/local-top/zfs

@@ -1,18 +1,11 @@
 #!/bin/sh
-PREREQ="mdadm mdrun multipath"
+# shellcheck disable=SC2154
 
-prereqs()
-{
-        echo "$PREREQ"
-}
 
-case $1 in
-# get pre-requisites
-prereqs)
-        prereqs
+if [ "$1" = "prereqs" ]; then
+        echo mdadm mdrun multipath
         exit 0
-        ;;
-esac
+fi
 
 
 #
@@ -20,10 +13,10 @@
 #
 message()
 {
-        if [ -x /bin/plymouth ] && plymouth --ping; then
-                plymouth message --text="$@"
+        if plymouth --ping 2>/dev/null; then
+                plymouth message --text="$*"
         else
-                echo "$@" >&2
+                echo "$*" >&2
         fi
         return 0
 }

diff --git a/zfs/contrib/initramfs/scripts/zfs b/zfs/contrib/initramfs/scripts/zfs
index dbc4e25..3c51b53 100644
--- a/zfs/contrib/initramfs/scripts/zfs
+++ b/zfs/contrib/initramfs/scripts/zfs

@@ -5,6 +5,8 @@
 #
 # Enable this by passing boot=zfs on the kernel command line.
 #
+# $quiet, $root, $rpool, $bootfs come from the cmdline:
+# shellcheck disable=SC2154
 
 # Source the common functions
 . /etc/zfs/zfs-functions
@@ -15,8 +17,8 @@
 # See "4.5 Disable root prompt on the initramfs" of Securing Debian Manual:
 # https://www.debian.org/doc/manuals/securing-debian-howto/ch4.en.html
 shell() {
-	if type panic > /dev/null 2>&1; then
-		panic $@
+	if command -v panic > /dev/null 2>&1; then
+		panic
 	else
 		/bin/sh
 	fi
@@ -26,22 +28,23 @@
 # pools and mounting any filesystems.
 pre_mountroot()
 {
-	if type run_scripts > /dev/null 2>&1 && \
-	    [ -f "/scripts/local-top" -o -d "/scripts/local-top" ]
+	if command -v run_scripts > /dev/null 2>&1
 	then
-		[ "$quiet" != "y" ] && \
-		    zfs_log_begin_msg "Running /scripts/local-top"
-		run_scripts /scripts/local-top
-		[ "$quiet" != "y" ] && zfs_log_end_msg
-	fi
+		if [ -f "/scripts/local-top" ] || [ -d "/scripts/local-top" ]
+		then
+			[ "$quiet" != "y" ] && \
+			    zfs_log_begin_msg "Running /scripts/local-top"
+			run_scripts /scripts/local-top
+			[ "$quiet" != "y" ] && zfs_log_end_msg
+		fi
 
-	if type run_scripts > /dev/null 2>&1 && \
-	    [ -f "/scripts/local-premount" -o -d "/scripts/local-premount" ]
-	then
-		[ "$quiet" != "y" ] && \
-		    zfs_log_begin_msg "Running /scripts/local-premount"
-		run_scripts /scripts/local-premount
-		[ "$quiet" != "y" ] && zfs_log_end_msg
+	  if [ -f "/scripts/local-premount" ] || [ -d "/scripts/local-premount" ]
+	  then
+			[ "$quiet" != "y" ] && \
+			    zfs_log_begin_msg "Running /scripts/local-premount"
+			run_scripts /scripts/local-premount
+			[ "$quiet" != "y" ] && zfs_log_end_msg
+		fi
 	fi
 }
 
@@ -57,10 +60,10 @@
 # Get a ZFS filesystem property value.
 get_fs_value()
 {
-	local fs="$1"
-	local value=$2
+	fs="$1"
+	value=$2
 
-	"${ZFS}" get -H -ovalue $value "$fs" 2> /dev/null
+	"${ZFS}" get -H -ovalue "$value" "$fs" 2> /dev/null
 }
 
 # Find the 'bootfs' property on pool $1.
@@ -68,7 +71,7 @@
 # pool by exporting it again.
 find_rootfs()
 {
-	local pool="$1"
+	pool="$1"
 
 	# If 'POOL_IMPORTED' isn't set, no pool imported and therefore
 	# we won't be able to find a root fs.
@@ -84,7 +87,7 @@
 
 	# Make sure it's not '-' and that it starts with /.
 	if [ "${ZFS_BOOTFS}" != "-" ] && \
-		$(get_fs_value "${ZFS_BOOTFS}" mountpoint | grep -q '^/$')
+		get_fs_value "${ZFS_BOOTFS}" mountpoint | grep -q '^/$'
 	then
 		# Keep it mounted
 		POOL_IMPORTED=1
@@ -93,23 +96,17 @@
 
 	# Not boot fs here, export it and later try again..
 	"${ZPOOL}" export "$pool"
-	POOL_IMPORTED=""
-
+	POOL_IMPORTED=
+	ZFS_BOOTFS=
 	return 1
 }
 
 # Support function to get a list of all pools, separated with ';'
 find_pools()
 {
-	local CMD="$*"
-	local pools pool
-
-	pools=$($CMD 2> /dev/null | \
-		grep -E "pool:|^[a-zA-Z0-9]" | \
-		sed 's@.*: @@' | \
-		while read pool; do \
-		    echo -n "$pool;"
-		done)
+	pools=$("$@" 2> /dev/null | \
+		sed -Ee '/pool:|^[a-zA-Z0-9]/!d' -e 's@.*: @@' | \
+		tr '\n' ';')
 
 	echo "${pools%%;}" # Return without the last ';'.
 }
@@ -117,8 +114,6 @@
 # Get a list of all available pools
 get_pools()
 {
-	local available_pools npools
-
 	if [ -n "${ZFS_POOL_IMPORT}" ]; then
 		echo "$ZFS_POOL_IMPORT"
 		return 0
@@ -159,9 +154,8 @@
 	# Filter out any exceptions...
 	if [ -n "$ZFS_POOL_EXCEPTIONS" ]
 	then
-		local found=""
-		local apools=""
-		local pool exception
+		found=""
+		apools=""
 		OLD_IFS="$IFS" ; IFS=";"
 
 		for pool in $available_pools
@@ -194,26 +188,25 @@
 # Import given pool $1
 import_pool()
 {
-	local pool="$1"
-	local dirs dir
+	pool="$1"
 
 	# Verify that the pool isn't already imported
 	# Make as sure as we can to not require '-f' to import.
-	"${ZPOOL}" get name,guid -o value -H 2>/dev/null | grep -Fxq "$pool" && return 0
+	"${ZPOOL}" get -H -o value name,guid 2>/dev/null | grep -Fxq "$pool" && return 0
 
 	# For backwards compatibility, make sure that ZPOOL_IMPORT_PATH is set
 	# to something we can use later with the real import(s). We want to
 	# make sure we find all by* dirs, BUT by-vdev should be first (if it
 	# exists).
-	if [ -n "$USE_DISK_BY_ID" -a -z "$ZPOOL_IMPORT_PATH" ]
+	if [ -n "$USE_DISK_BY_ID" ] && [ -z "$ZPOOL_IMPORT_PATH" ]
 	then
-		dirs="$(for dir in $(echo /dev/disk/by-*)
+		dirs="$(for dir in /dev/disk/by-*
 		do
 			# Ignore by-vdev here - we want it first!
 			echo "$dir" | grep -q /by-vdev && continue
 			[ ! -d "$dir" ] && continue
 
-			echo -n "$dir:"
+			printf "%s" "$dir:"
 		done | sed 's,:$,,g')"
 
 		if [ -d "/dev/disk/by-vdev" ]
@@ -277,7 +270,9 @@
 # with more logging etc.
 load_module_initrd()
 {
-	if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" > 0 ]
+	[ -n "$ROOTDELAY" ] && ZFS_INITRD_PRE_MOUNTROOT_SLEEP="$ROOTDELAY"
+
+	if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ] 2>/dev/null
 	then
 		if [ "$quiet" != "y" ]; then
 			zfs_log_begin_msg "Sleeping for" \
@@ -288,9 +283,9 @@
 	fi
 
 	# Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear.
-	if type wait_for_udev > /dev/null 2>&1 ; then
+	if command -v wait_for_udev > /dev/null 2>&1 ; then
 		wait_for_udev 10
-	elif type wait_for_dev > /dev/null 2>&1 ; then
+	elif command -v wait_for_dev > /dev/null 2>&1 ; then
 		wait_for_dev
 	fi
 
@@ -300,7 +295,7 @@
 	# Load the module
 	load_module "zfs" || return 1
 
-	if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" > 0 ]
+	if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null
 	then
 		if [ "$quiet" != "y" ]; then
 			zfs_log_begin_msg "Sleeping for" \
@@ -316,12 +311,10 @@
 # Mount a given filesystem
 mount_fs()
 {
-	local fs="$1"
-	local mountpoint
+	fs="$1"
 
 	# Check that the filesystem exists
-	"${ZFS}" list -oname -tfilesystem -H "${fs}" > /dev/null 2>&1
-	[ "$?" -ne 0 ] && return 1
+	"${ZFS}" list -oname -tfilesystem -H "${fs}" > /dev/null 2>&1 ||  return 1
 
 	# Skip filesystems with canmount=off.  The root fs should not have
 	# canmount=off, but ignore it for backwards compatibility just in case.
@@ -333,34 +326,30 @@
 
 	# Need the _original_ datasets mountpoint!
 	mountpoint=$(get_fs_value "$fs" mountpoint)
-	if [ "$mountpoint" = "legacy" -o "$mountpoint" = "none" ]; then
+	ZFS_CMD="mount -o zfsutil -t zfs"
+	if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
 		# Can't use the mountpoint property. Might be one of our
 		# clones. Check the 'org.zol:mountpoint' property set in
 		# clone_snap() if that's usable.
-		mountpoint=$(get_fs_value "$fs" org.zol:mountpoint)
-		if [ "$mountpoint" = "legacy" -o \
-		    "$mountpoint" = "none" -o \
-		    "$mountpoint" = "-" ]
+		mountpoint1=$(get_fs_value "$fs" org.zol:mountpoint)
+		if [ "$mountpoint1" = "legacy" ] ||
+		   [ "$mountpoint1" = "none" ] ||
+		   [ "$mountpoint1" = "-" ]
 		then
 			if [ "$fs" != "${ZFS_BOOTFS}" ]; then
 				# We don't have a proper mountpoint and this
 				# isn't the root fs.
 				return 0
-			else
-				# Last hail-mary: Hope 'rootmnt' is set!
-				mountpoint=""
 			fi
-		fi
-
-		if [ "$mountpoint" = "legacy" ]; then
-			ZFS_CMD="mount -t zfs"
+			# Don't use mount.zfs -o zfsutils for legacy mountpoint
+			if [ "$mountpoint" = "legacy" ]; then
+				ZFS_CMD="mount -t zfs"
+			fi
+			# Last hail-mary: Hope 'rootmnt' is set!
+			mountpoint=""
 		else
-			# If it's not a legacy filesystem, it can only be a
-			# native one...
-			ZFS_CMD="mount -o zfsutil -t zfs"
+			mountpoint="$mountpoint1"
 		fi
-	else
-		ZFS_CMD="mount -o zfsutil -t zfs"
 	fi
 
 	# Possibly decrypt a filesystem using native encryption.
@@ -396,46 +385,48 @@
 # Unlock a ZFS native encrypted filesystem.
 decrypt_fs()
 {
-	local fs="$1"
-	
+	fs="$1"
+
 	# If pool encryption is active and the zfs command understands '-o encryption'
-	if [ "$(zpool list -H -o feature@encryption $(echo "${fs}" | awk -F\/ '{print $1}'))" = 'active' ]; then
+	if [ "$(zpool list -H -o feature@encryption "${fs%%/*}")" = 'active' ]; then
 
 		# Determine dataset that holds key for root dataset
 		ENCRYPTIONROOT="$(get_fs_value "${fs}" encryptionroot)"
 		KEYLOCATION="$(get_fs_value "${ENCRYPTIONROOT}" keylocation)"
 
+		echo "${ENCRYPTIONROOT}" > /run/zfs_fs_name
+
 		# If root dataset is encrypted...
 		if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
 			KEYSTATUS="$(get_fs_value "${ENCRYPTIONROOT}" keystatus)"
 			# Continue only if the key needs to be loaded
 			[ "$KEYSTATUS" = "unavailable" ] || return 0
-			TRY_COUNT=3
 
-			# If key is stored in a file, do not prompt
+			# Do not prompt if key is stored noninteractively,
 			if ! [ "${KEYLOCATION}" = "prompt" ]; then
 				$ZFS load-key "${ENCRYPTIONROOT}"
 
 			# Prompt with plymouth, if active
-			elif [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then
-				while [ $TRY_COUNT -gt 0 ]; do
+			elif /bin/plymouth --ping 2>/dev/null; then
+				echo "plymouth" > /run/zfs_console_askpwd_cmd
+				for _ in 1 2 3; do
 					plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \
 						$ZFS load-key "${ENCRYPTIONROOT}" && break
-					TRY_COUNT=$((TRY_COUNT - 1))
 				done
 
-			# Prompt with systemd, if active 
+			# Prompt with systemd, if active
 			elif [ -e /run/systemd/system ]; then
-				while [ $TRY_COUNT -gt 0 ]; do
-					systemd-ask-password "Encrypted ZFS password for ${ENCRYPTIONROOT}" --no-tty | \
+				echo "systemd-ask-password" > /run/zfs_console_askpwd_cmd
+				for _ in 1 2 3; do
+					systemd-ask-password --no-tty "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \
 						$ZFS load-key "${ENCRYPTIONROOT}" && break
-					TRY_COUNT=$((TRY_COUNT - 1))
 				done
 
 			# Prompt with ZFS tty, otherwise
 			else
-				# Setting "printk" temporarily to "7" will allow prompt even if kernel option "quiet"
-				storeprintk="$(awk '{print $1}' /proc/sys/kernel/printk)"
+				# Temporarily setting "printk" to "7" allows the prompt to appear even when the "quiet" kernel option has been used
+				echo "load-key" > /run/zfs_console_askpwd_cmd
+				read -r storeprintk _ < /proc/sys/kernel/printk
 				echo 7 > /proc/sys/kernel/printk
 				$ZFS load-key "${ENCRYPTIONROOT}"
 				echo "$storeprintk" > /proc/sys/kernel/printk
@@ -449,7 +440,7 @@
 # Destroy a given filesystem.
 destroy_fs()
 {
-	local fs="$1"
+	fs="$1"
 
 	[ "$quiet" != "y" ] && \
 	    zfs_log_begin_msg "Destroying '$fs'"
@@ -484,9 +475,9 @@
 # mounted with a 'zfs mount -a' in the init/systemd scripts).
 clone_snap()
 {
-	local snap="$1"
-	local destfs="$2"
-	local mountpoint="$3"
+	snap="$1"
+	destfs="$2"
+	mountpoint="$3"
 
 	[ "$quiet" != "y" ] && zfs_log_begin_msg "Cloning '$snap' to '$destfs'"
 
@@ -511,7 +502,7 @@
 		echo "Error: $ZFS_ERROR"
 		echo ""
 		echo "Failed to clone snapshot."
-		echo "Make sure that the any problems are corrected and then make sure"
+		echo "Make sure that any problems are corrected and then make sure"
 		echo "that the dataset '$destfs' exists and is bootable."
 		shell
 	else
@@ -524,7 +515,7 @@
 # Rollback a given snapshot.
 rollback_snap()
 {
-	local snap="$1"
+	snap="$1"
 
 	[ "$quiet" != "y" ] && zfs_log_begin_msg "Rollback $snap"
 
@@ -554,9 +545,7 @@
 # to the user to choose from.
 ask_user_snap()
 {
-	local fs="$1"
-	local i=1
-	local SNAP snapnr snap debug
+	fs="$1"
 
 	# We need to temporarily disable debugging. Set 'debug' so we
 	# remember to enabled it again.
@@ -569,16 +558,25 @@
 	# Because we need the resulting snapshot, which is sent on
 	# stdout to the caller, we use stderr for our questions.
 	echo "What snapshot do you want to boot from?" > /dev/stderr
-	while read snap; do
-	    echo "  $i: ${snap}" > /dev/stderr
-	    eval `echo SNAP_$i=$snap`
-	    i=$((i + 1))
-	done <<EOT
-$("${ZFS}" list -H -oname -tsnapshot -r "${fs}")
-EOT
+	# shellcheck disable=SC2046
+	IFS="
+" set -- $("${ZFS}" list -H -oname -tsnapshot -r "${fs}")
 
-	echo -n "  Snap nr [1-$((i-1))]? " > /dev/stderr
-	read snapnr
+	i=1
+	for snap in "$@"; do
+		echo "  $i: $snap"
+		i=$((i + 1))
+	done > /dev/stderr
+
+	# expr instead of test here because [ a -lt 0 ] errors out,
+	# but expr falls back to lexicographical, which works out right
+	snapnr=0
+	while expr "$snapnr" "<" 1 > /dev/null ||
+	    expr "$snapnr" ">" "$#" > /dev/null
+	do
+		printf "%s" "Snap nr [1-$#]? " > /dev/stderr
+		read -r snapnr
+	done
 
 	# Re-enable debugging.
 	if [ -n "${debug}" ]; then
@@ -586,16 +584,16 @@
 		set -x
 	fi
 
-	echo "$(eval echo "$"SNAP_$snapnr)"
+	eval echo '$'"$snapnr"
 }
 
 setup_snapshot_booting()
 {
-	local snap="$1"
-	local s destfs subfs mountpoint retval=0 filesystems fs
+	snap="$1"
+	retval=0
 
-	# Make sure that the snapshot specified actually exist.
-	if [ ! $(get_fs_value "${snap}" type) ]
+	# Make sure that the snapshot specified actually exists.
+	if [ ! "$(get_fs_value "${snap}" type)" ]
 	then
 		# Snapshot does not exist (...@<null> ?)
 		# ask the user for a snapshot to use.
@@ -612,7 +610,7 @@
 	then
 		# If the destination dataset for the clone
 		# already exists, destroy it. Recursively
-		if [ $(get_fs_value "${rootfs}_${snapname}" type) ]; then
+		if [ "$(get_fs_value "${rootfs}_${snapname}" type)" ]; then
 			filesystems=$("${ZFS}" list -oname -tfilesystem -H \
 			    -r -Sname "${ZFS_BOOTFS}")
 			for fs in $filesystems; do
@@ -647,8 +645,8 @@
 			# with clone_snap(). If legacy or none, then use
 			# the sub fs value.
 			mountpoint=$(get_fs_value "${s%%@*}" mountpoint)
-			if [ "$mountpoint" = "legacy" -o \
-			    "$mountpoint" = "none" ]
+			if [ "$mountpoint" = "legacy" ] || \
+			   [ "$mountpoint" = "none" ]
 			then
 				if [ -n "${subfs}" ]; then
 					mountpoint="${subfs}"
@@ -673,8 +671,6 @@
 # This is the main function.
 mountroot()
 {
-	local snaporig snapsub destfs pool POOLS
-
 	# ----------------------------------------------------------------
 	# I N I T I A L   S E T U P
 
@@ -708,7 +704,8 @@
 
 	# ------------
 	# Look for the cache file (if any).
-	[ ! -f ${ZPOOL_CACHE} ] && unset ZPOOL_CACHE
+	[ -f "${ZPOOL_CACHE}" ] || unset ZPOOL_CACHE
+	[ -s "${ZPOOL_CACHE}" ] || unset ZPOOL_CACHE
 
 	# ------------
 	# Compatibility: 'ROOT' is for Debian GNU/Linux (etc),
@@ -737,7 +734,7 @@
 	# No longer set in the defaults file, but it could have been set in
 	# get_pools() in some circumstances. If it's something, but not 'yes',
 	# it's no good to us.
-	[ -n "$USE_DISK_BY_ID" -a "$USE_DISK_BY_ID" != 'yes' ] && \
+	[ -n "$USE_DISK_BY_ID" ] && [ "$USE_DISK_BY_ID" != 'yes' ] && \
 	    unset USE_DISK_BY_ID
 
 	# ----------------------------------------------------------------
@@ -783,12 +780,12 @@
 	# ------------
 	# If we have 'ROOT' (see above), but not 'ZFS_BOOTFS', then use
 	# 'ROOT'
-	[ -n "$ROOT" -a -z "${ZFS_BOOTFS}" ] && ZFS_BOOTFS="$ROOT"
+	[ -n "$ROOT" ] && [ -z "${ZFS_BOOTFS}" ] && ZFS_BOOTFS="$ROOT"
 
 	# ------------
 	# Check for the `-B zfs-bootfs=%s/%u,...` kind of parameter.
 	# NOTE: Only use the pool name and dataset. The rest is not
-	#       supported by ZoL (whatever it's for).
+	#       supported by OpenZFS (whatever it's for).
 	if [ -z "$ZFS_RPOOL" ]
 	then
 		# The ${zfs-bootfs} variable is set at the kernel command
@@ -798,17 +795,18 @@
 		#
 		# Reassign the variable by dumping the environment and
 		# stripping the zfs-bootfs= prefix.  Let the shell handle
-		# quoting through the eval command.
+		# quoting through the eval command:
+		# shellcheck disable=SC2046
 		eval ZFS_RPOOL=$(set | sed -n -e 's,^zfs-bootfs=,,p')
 	fi
 
 	# ------------
 	# No root fs or pool specified - do auto detect.
-	if [ -z "$ZFS_RPOOL" -a -z "${ZFS_BOOTFS}" ]
+	if [ -z "$ZFS_RPOOL" ] && [ -z "${ZFS_BOOTFS}" ]
 	then
 		# Do auto detect. Do this by 'cheating' - set 'root=zfs:AUTO'
 		# which will be caught later
-		ROOT=zfs:AUTO
+		ROOT='zfs:AUTO'
 	fi
 
 	# ----------------------------------------------------------------
@@ -819,6 +817,11 @@
 	then
 		# Try to detect both pool and root fs.
 
+		# If we got here, that means we don't have a hint so as to
+		# the root dataset, but with root=zfs:AUTO on cmdline,
+		# this says "zfs:AUTO" here and interferes with checks later
+		ZFS_BOOTFS=
+
 		[ "$quiet" != "y" ] && \
 		    zfs_log_begin_msg "Attempting to import additional pools."
 
@@ -836,8 +839,8 @@
 		do
 			[ -z "$pool" ] && continue
 
-			import_pool "$pool"
-			find_rootfs "$pool"
+			IFS="$OLD_IFS" import_pool "$pool"
+			IFS="$OLD_IFS" find_rootfs "$pool" && break
 		done
 		IFS="$OLD_IFS"
 
@@ -853,7 +856,7 @@
 	fi
 
 	# Import the pool (if not already done so in the AUTO check above).
-	if [ -n "$ZFS_RPOOL" -a -z "${POOL_IMPORTED}" ]
+	if [ -n "$ZFS_RPOOL" ] && [ -z "${POOL_IMPORTED}" ]
 	then
 		[ "$quiet" != "y" ] && \
 		    zfs_log_begin_msg "Importing ZFS root pool '$ZFS_RPOOL'"
@@ -875,44 +878,20 @@
 		echo ""
 		echo "No pool imported. Manually import the root pool"
 		echo "at the command prompt and then exit."
-		echo "Hint: Try:  zpool import -R ${rootmnt} -N ${ZFS_RPOOL}"
+		echo "Hint: Try:  zpool import -N ${ZFS_RPOOL}"
 		shell
 	fi
 
 	# In case the pool was specified as guid, resolve guid to name
-	pool="$("${ZPOOL}" get name,guid -o name,value -H | \
+	pool="$("${ZPOOL}" get -H -o name,value name,guid | \
 	    awk -v pool="${ZFS_RPOOL}" '$2 == pool { print $1 }')"
 	if [ -n "$pool" ]; then
 		# If $ZFS_BOOTFS contains guid, replace the guid portion with $pool
 		ZFS_BOOTFS=$(echo "$ZFS_BOOTFS" | \
-			sed -e "s/$("${ZPOOL}" get guid -o value "$pool" -H)/$pool/g")
+			sed -e "s/$("${ZPOOL}" get -H -o value guid "$pool")/$pool/g")
 		ZFS_RPOOL="${pool}"
 	fi
 
-	# Set the no-op scheduler on the disks containing the vdevs of
-	# the root pool. For single-queue devices, this scheduler is
-	# "noop", for multi-queue devices, it is "none".
-	# ZFS already does this for wholedisk vdevs (for all pools), so this
-	# is only important for partitions.
-	"${ZPOOL}" status -L "${ZFS_RPOOL}" 2> /dev/null |
-	    awk '/^\t / && !/(mirror|raidz)/ {
-	        dev=$1;
-	        sub(/[0-9]+$/, "", dev);
-	        print dev
-	    }' |
-	while read -r i
-	do
-		SCHEDULER=/sys/block/$i/queue/scheduler
-		if [ -e "${SCHEDULER}" ]
-		then
-			# Query to see what schedulers are available
-			case "$(cat "${SCHEDULER}")" in
-				*noop*) echo noop > "${SCHEDULER}" ;;
-				*none*) echo none > "${SCHEDULER}" ;;
-			esac
-		fi
-	done
-
 
 	# ----------------------------------------------------------------
 	# P R E P A R E   R O O T   F I L E S Y S T E M
@@ -958,12 +937,22 @@
 
 	# Go through the complete list (recursively) of all filesystems below
 	# the real root dataset
-	filesystems=$("${ZFS}" list -oname -tfilesystem -H -r "${ZFS_BOOTFS}")
-	for fs in $filesystems $ZFS_INITRD_ADDITIONAL_DATASETS
-	do
+	filesystems="$("${ZFS}" list -oname -tfilesystem -H -r "${ZFS_BOOTFS}")"
+	OLD_IFS="$IFS" ; IFS="
+"
+	for fs in $filesystems; do
+		IFS="$OLD_IFS" mount_fs "$fs"
+	done
+	IFS="$OLD_IFS"
+	for fs in $ZFS_INITRD_ADDITIONAL_DATASETS; do
 		mount_fs "$fs"
 	done
 
+	touch /run/zfs_unlock_complete
+	if [ -e /run/zfs_unlock_complete_notify ]; then
+		read -r < /run/zfs_unlock_complete_notify
+	fi
+
 	# ------------
 	# Debugging information
 	if [ -n "${ZFS_DEBUG}" ]
@@ -979,8 +968,8 @@
 		echo
 
 		echo "=> waiting for ENTER before continuing because of 'zfsdebug=1'. "
-		echo -n "   'c' for shell, 'r' for reboot, 'ENTER' to continue. "
-		read b
+		printf "%s" "   'c' for shell, 'r' for reboot, 'ENTER' to continue. "
+		read -r b
 
 		[ "$b" = "c" ] && /bin/sh
 		[ "$b" = "r" ] && reboot -f
@@ -990,12 +979,14 @@
 
 	# ------------
 	# Run local bottom script
-	if type run_scripts > /dev/null 2>&1 && \
-	    [ -f "/scripts/local-bottom" -o -d "/scripts/local-bottom" ]
+	if command -v run_scripts > /dev/null 2>&1
 	then
-		[ "$quiet" != "y" ] && \
-		    zfs_log_begin_msg "Running /scripts/local-bottom"
-		run_scripts /scripts/local-bottom
-		[ "$quiet" != "y" ] && zfs_log_end_msg
+		if [ -f "/scripts/local-bottom" ] || [ -d "/scripts/local-bottom" ]
+		then
+			[ "$quiet" != "y" ] && \
+			    zfs_log_begin_msg "Running /scripts/local-bottom"
+			run_scripts /scripts/local-bottom
+			[ "$quiet" != "y" ] && zfs_log_end_msg
+		fi
 	fi
 }

diff --git a/zfs/contrib/initramfs/zfsunlock b/zfs/contrib/initramfs/zfsunlock
new file mode 100755
index 0000000..cf8e452
--- /dev/null
+++ b/zfs/contrib/initramfs/zfsunlock

@@ -0,0 +1,42 @@
+#!/bin/sh
+
+set -eu
+if [ ! -e /run/zfs_fs_name ]; then
+	echo "Wait for the root pool to be imported or press Ctrl-C to exit."
+fi
+while [ ! -e /run/zfs_fs_name ]; do
+	if [ -e /run/zfs_unlock_complete ]; then
+		exit 0
+	fi
+	sleep 1
+done
+echo
+echo "Unlocking encrypted ZFS filesystems..."
+echo "Enter the password or press Ctrl-C to exit."
+echo
+zfs_fs_name=""
+if [ ! -e /run/zfs_unlock_complete_notify ]; then
+	mkfifo /run/zfs_unlock_complete_notify
+fi
+while [ ! -e /run/zfs_unlock_complete ]; do
+	zfs_fs_name=$(cat /run/zfs_fs_name)
+	zfs_console_askpwd_cmd=$(cat /run/zfs_console_askpwd_cmd)
+	systemd-ask-password "Encrypted ZFS password for ${zfs_fs_name}:" | \
+		/sbin/zfs load-key "$zfs_fs_name" || true
+	if [ "$(/sbin/zfs get -H -ovalue keystatus "$zfs_fs_name" 2> /dev/null)" = "available" ]; then
+		echo "Password for $zfs_fs_name accepted."
+		zfs_console_askpwd_pid=$(ps | awk '!'"/awk/ && /$zfs_console_askpwd_cmd/ { print \$1; exit }")
+		if [ -n "$zfs_console_askpwd_pid" ]; then
+			kill "$zfs_console_askpwd_pid"
+		fi
+		# Wait for another filesystem to unlock.
+		while [ "$(cat /run/zfs_fs_name)" = "$zfs_fs_name" ] && [ ! -e /run/zfs_unlock_complete ]; do
+			sleep 1
+		done
+	else
+		echo "Wrong password.  Try again."
+	fi
+done
+echo "Unlocking complete.  Resuming boot sequence..."
+echo "Please reconnect in a while."
+echo "ok" > /run/zfs_unlock_complete_notify

diff --git a/zfs/contrib/intel_qat/patch/0001-cryptohash.diff b/zfs/contrib/intel_qat/patch/0001-cryptohash.diff
new file mode 100644
index 0000000..2d87c8f
--- /dev/null
+++ b/zfs/contrib/intel_qat/patch/0001-cryptohash.diff

@@ -0,0 +1,17 @@
+cryptohash.h was dropped and merged with crypto/sha.sh in 5.8 kernel. Details in:
+https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=228c4f265c6eb60eaa4ed0edb3bf7c113173576c
+
+---
+diff --git a/quickassist/utilities/osal/src/linux/kernel_space/OsalCryptoInterface.c b/quickassist/utilities/osal/src/linux/kernel_space/OsalCryptoInterface.c
+index 4c389da..e602377 100644
+--- a/quickassist/utilities/osal/src/linux/kernel_space/OsalCryptoInterface.c
++++ b/quickassist/utilities/osal/src/linux/kernel_space/OsalCryptoInterface.c
+@@ -66,7 +66,7 @@
+ 
+ #include "Osal.h"
+ #include <linux/crypto.h>
+-#include <linux/cryptohash.h>
++#include <crypto/sha.h>
+ #include <linux/version.h>
+ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29))
+ #include <crypto/internal/hash.h>

diff --git a/zfs/contrib/intel_qat/patch/0001-pci_aer.diff b/zfs/contrib/intel_qat/patch/0001-pci_aer.diff
new file mode 100644
index 0000000..7516ac4
--- /dev/null
+++ b/zfs/contrib/intel_qat/patch/0001-pci_aer.diff

@@ -0,0 +1,20 @@
+In kernel 5.7 the pci_cleanup_aer_uncorrect_error_status() function was
+renamed with the following commit:
+
+git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=894020fdd88c1e9a74c60b67c0f19f1c7696ba2f
+
+This simply updates the function call with the proper name (pci_aer_clear_nonfatal_status()).
+
+---
+diff --git a/quickassist/qat/drivers/crypto/qat/qat_common/adf_aer.c b/quickassist/qat/drivers/crypto/qat/qat_common/adf_aer.c
+index a6ce6df..545bb79 100644
+--- a/quickassist/qat/drivers/crypto/qat/qat_common/adf_aer.c
++++ b/quickassist/qat/drivers/crypto/qat/qat_common/adf_aer.c
+@@ -304,7 +304,7 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
+ 		pr_err("QAT: Can't find acceleration device\n");
+ 		return PCI_ERS_RESULT_DISCONNECT;
+ 	}
+-	pci_cleanup_aer_uncorrect_error_status(pdev);
++	pci_aer_clear_nonfatal_status(pdev);
+ 	if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
+ 		return PCI_ERS_RESULT_DISCONNECT;

diff --git a/zfs/contrib/intel_qat/patch/0001-timespec.diff b/zfs/contrib/intel_qat/patch/0001-timespec.diff
new file mode 100644
index 0000000..04fb053
--- /dev/null
+++ b/zfs/contrib/intel_qat/patch/0001-timespec.diff

@@ -0,0 +1,35 @@
+This patch attempts to expose timespec and getnstimeofday which were
+explicitly hidden in the 5.6 kernel with the introduction of the
+following commits:
+
+git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c766d1472c70d25ad475cf56042af1652e792b23
+git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=412c53a680a97cb1ae2c0ab60230e193bee86387
+
+Code received from users@dpdk.org, issue tracked under QATE-59888.
+
+---
+diff --git a/quickassist/lookaside/access_layer/src/sample_code/performance/framework/linux/kernel_space/cpa_sample_code_utils.c b/quickassist/lookaside/access_layer/src/sample_code/performance/framework/linux/kernel_space/cpa_sample_code_utils.c
+index 4639834..523e376 100644
+--- a/quickassist/lookaside/access_layer/src/sample_code/performance/framework/linux/kernel_space/cpa_sample_code_utils.c
++++ b/quickassist/lookaside/access_layer/src/sample_code/performance/framework/linux/kernel_space/cpa_sample_code_utils.c
+@@ -107,6 +107,8 @@ atomic_t arrived;
+ extern struct device perf_device;
+ #endif
+ 
++#define timespec timespec64
++#define getnstimeofday ktime_get_real_ts64
+ 
+ /* Define a number for timeout */
+ #define SAMPLE_CODE_MAX_LONG (0x7FFFFFFF)
+diff --git a/quickassist/qat/compat/qat_compat.h b/quickassist/qat/compat/qat_compat.h
+index 2a02eaf..3515092 100644
+--- a/quickassist/qat/compat/qat_compat.h
++++ b/quickassist/qat/compat/qat_compat.h
+@@ -466,4 +466,7 @@ static inline void pci_ignore_hotplug(struct pci_dev *dev)
+ #if (RHEL_RELEASE_CODE && RHEL_RELEASE_VERSION(7, 3) <= RHEL_RELEASE_CODE)
+ #define QAT_KPT_CAP_DISCOVERY
+ #endif
++
++#define timespec timespec64
++#define getnstimeofday ktime_get_real_ts64
+ #endif /* _QAT_COMPAT_H_ */

diff --git a/zfs/contrib/intel_qat/patch/LICENSE b/zfs/contrib/intel_qat/patch/LICENSE
new file mode 100644
index 0000000..8e12726
--- /dev/null
+++ b/zfs/contrib/intel_qat/patch/LICENSE

@@ -0,0 +1,30 @@
+BSD LICENSE
+
+Copyright (c) Intel Corporation.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+  * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

diff --git a/zfs/contrib/intel_qat/readme.md b/zfs/contrib/intel_qat/readme.md
new file mode 100644
index 0000000..7e45d39
--- /dev/null
+++ b/zfs/contrib/intel_qat/readme.md

@@ -0,0 +1,27 @@
+# Intel_QAT easy install script
+
+This contrib contains community compatibility patches to get Intel QAT working on the following kernel versions:
+- 5.6
+- 5.7
+- 5.8
+
+These patches are based on the following Intel QAT version:
+[1.7.l.4.10.0-00014](https://01.org/sites/default/files/downloads/qat1.7.l.4.10.0-00014.tar.gz)
+
+When using QAT with above kernels versions, the following patches needs to be applied using:
+patch -p1 < _$PATCH_
+_Where $PATCH refers to the path of the patch in question_
+
+### 5.6
+/patch/0001-timespec.diff
+
+### 5.7
+/patch/0001-pci_aer.diff
+
+### 5.8
+/patch/0001-cryptohash.diff
+
+
+_Patches are supplied by [Storage Performance Development Kit (SPDK)](https://github.com/spdk/spdk)_
+
+

diff --git a/zfs/contrib/pam_zfs_key/Makefile.am b/zfs/contrib/pam_zfs_key/Makefile.am
new file mode 100644
index 0000000..f0f2550
--- /dev/null
+++ b/zfs/contrib/pam_zfs_key/Makefile.am

@@ -0,0 +1,19 @@
+include $(top_srcdir)/config/Rules.am
+
+AM_CFLAGS += $(LIBCRYPTO_CFLAGS)
+
+pammodule_LTLIBRARIES=pam_zfs_key.la
+
+pam_zfs_key_la_SOURCES = pam_zfs_key.c
+
+pam_zfs_key_la_LIBADD = \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libuutil/libuutil.la \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la
+
+pam_zfs_key_la_LDFLAGS = -version-info 1:0:0 -avoid-version -module -shared
+
+pam_zfs_key_la_LIBADD += -lpam $(LIBCRYPTO_LIBS)
+
+dist_pamconfigs_DATA = zfs_key

diff --git a/zfs/contrib/pam_zfs_key/pam_zfs_key.c b/zfs/contrib/pam_zfs_key/pam_zfs_key.c
new file mode 100644
index 0000000..3137037
--- /dev/null
+++ b/zfs/contrib/pam_zfs_key/pam_zfs_key.c

@@ -0,0 +1,806 @@
+/*
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the <organization> nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2020, Felix Dörre
+ * All rights reserved.
+ */
+
+#include <sys/dsl_crypt.h>
+#include <sys/byteorder.h>
+#include <libzfs.h>
+
+#include <syslog.h>
+
+#include <sys/zio_crypt.h>
+#include <openssl/evp.h>
+
+#define	PAM_SM_AUTH
+#define	PAM_SM_PASSWORD
+#define	PAM_SM_SESSION
+#include <security/pam_modules.h>
+
+#if	defined(__linux__)
+#include <security/pam_ext.h>
+#elif	defined(__FreeBSD__)
+#include <security/pam_appl.h>
+static void
+pam_syslog(pam_handle_t *pamh, int loglevel, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	vsyslog(loglevel, fmt, args);
+	va_end(args);
+}
+#endif
+
+#include <string.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/wait.h>
+#include <pwd.h>
+
+#include <sys/mman.h>
+
+static const char PASSWORD_VAR_NAME[] = "pam_zfs_key_authtok";
+
+static libzfs_handle_t *g_zfs;
+
+static void destroy_pw(pam_handle_t *pamh, void *data, int errcode);
+
+typedef struct {
+	size_t len;
+	char *value;
+} pw_password_t;
+
+static pw_password_t *
+alloc_pw_size(size_t len)
+{
+	pw_password_t *pw = malloc(sizeof (pw_password_t));
+	if (!pw) {
+		return (NULL);
+	}
+	pw->len = len;
+	/*
+	 * The use of malloc() triggers a spurious gcc 11 -Wmaybe-uninitialized
+	 * warning in the mlock() function call below, so use calloc().
+	 */
+	pw->value = calloc(len, 1);
+	if (!pw->value) {
+		free(pw);
+		return (NULL);
+	}
+	mlock(pw->value, pw->len);
+	return (pw);
+}
+
+static pw_password_t *
+alloc_pw_string(const char *source)
+{
+	pw_password_t *pw = malloc(sizeof (pw_password_t));
+	if (!pw) {
+		return (NULL);
+	}
+	pw->len = strlen(source) + 1;
+	/*
+	 * The use of malloc() triggers a spurious gcc 11 -Wmaybe-uninitialized
+	 * warning in the mlock() function call below, so use calloc().
+	 */
+	pw->value = calloc(pw->len, 1);
+	if (!pw->value) {
+		free(pw);
+		return (NULL);
+	}
+	mlock(pw->value, pw->len);
+	memcpy(pw->value, source, pw->len);
+	return (pw);
+}
+
+static void
+pw_free(pw_password_t *pw)
+{
+	bzero(pw->value, pw->len);
+	munlock(pw->value, pw->len);
+	free(pw->value);
+	free(pw);
+}
+
+static pw_password_t *
+pw_fetch(pam_handle_t *pamh)
+{
+	const char *token;
+	if (pam_get_authtok(pamh, PAM_AUTHTOK, &token, NULL) != PAM_SUCCESS) {
+		pam_syslog(pamh, LOG_ERR,
+		    "couldn't get password from PAM stack");
+		return (NULL);
+	}
+	if (!token) {
+		pam_syslog(pamh, LOG_ERR,
+		    "token from PAM stack is null");
+		return (NULL);
+	}
+	return (alloc_pw_string(token));
+}
+
+static const pw_password_t *
+pw_fetch_lazy(pam_handle_t *pamh)
+{
+	pw_password_t *pw = pw_fetch(pamh);
+	if (pw == NULL) {
+		return (NULL);
+	}
+	int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, pw, destroy_pw);
+	if (ret != PAM_SUCCESS) {
+		pw_free(pw);
+		pam_syslog(pamh, LOG_ERR, "pam_set_data failed");
+		return (NULL);
+	}
+	return (pw);
+}
+
+static const pw_password_t *
+pw_get(pam_handle_t *pamh)
+{
+	const pw_password_t *authtok = NULL;
+	int ret = pam_get_data(pamh, PASSWORD_VAR_NAME,
+	    (const void**)(&authtok));
+	if (ret == PAM_SUCCESS)
+		return (authtok);
+	if (ret == PAM_NO_MODULE_DATA)
+		return (pw_fetch_lazy(pamh));
+	pam_syslog(pamh, LOG_ERR, "password not available");
+	return (NULL);
+}
+
+static int
+pw_clear(pam_handle_t *pamh)
+{
+	int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, NULL, NULL);
+	if (ret != PAM_SUCCESS) {
+		pam_syslog(pamh, LOG_ERR, "clearing password failed");
+		return (-1);
+	}
+	return (0);
+}
+
+static void
+destroy_pw(pam_handle_t *pamh, void *data, int errcode)
+{
+	if (data != NULL) {
+		pw_free((pw_password_t *)data);
+	}
+}
+
+static int
+pam_zfs_init(pam_handle_t *pamh)
+{
+	int error = 0;
+	if ((g_zfs = libzfs_init()) == NULL) {
+		error = errno;
+		pam_syslog(pamh, LOG_ERR, "Zfs initialization error: %s",
+		    libzfs_error_init(error));
+	}
+	return (error);
+}
+
+static void
+pam_zfs_free(void)
+{
+	libzfs_fini(g_zfs);
+}
+
+static pw_password_t *
+prepare_passphrase(pam_handle_t *pamh, zfs_handle_t *ds,
+    const char *passphrase, nvlist_t *nvlist)
+{
+	pw_password_t *key = alloc_pw_size(WRAPPING_KEY_LEN);
+	if (!key) {
+		return (NULL);
+	}
+	uint64_t salt;
+	uint64_t iters;
+	if (nvlist != NULL) {
+		int fd = open("/dev/urandom", O_RDONLY);
+		if (fd < 0) {
+			pw_free(key);
+			return (NULL);
+		}
+		int bytes_read = 0;
+		char *buf = (char *)&salt;
+		size_t bytes = sizeof (uint64_t);
+		while (bytes_read < bytes) {
+			ssize_t len = read(fd, buf + bytes_read, bytes
+			    - bytes_read);
+			if (len < 0) {
+				close(fd);
+				pw_free(key);
+				return (NULL);
+			}
+			bytes_read += len;
+		}
+		close(fd);
+
+		if (nvlist_add_uint64(nvlist,
+		    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt)) {
+			pam_syslog(pamh, LOG_ERR,
+			    "failed to add salt to nvlist");
+			pw_free(key);
+			return (NULL);
+		}
+		iters = DEFAULT_PBKDF2_ITERATIONS;
+		if (nvlist_add_uint64(nvlist, zfs_prop_to_name(
+		    ZFS_PROP_PBKDF2_ITERS), iters)) {
+			pam_syslog(pamh, LOG_ERR,
+			    "failed to add iters to nvlist");
+			pw_free(key);
+			return (NULL);
+		}
+	} else {
+		salt = zfs_prop_get_int(ds, ZFS_PROP_PBKDF2_SALT);
+		iters = zfs_prop_get_int(ds, ZFS_PROP_PBKDF2_ITERS);
+	}
+
+	salt = LE_64(salt);
+	if (!PKCS5_PBKDF2_HMAC_SHA1((char *)passphrase,
+	    strlen(passphrase), (uint8_t *)&salt,
+	    sizeof (uint64_t), iters, WRAPPING_KEY_LEN,
+	    (uint8_t *)key->value)) {
+		pam_syslog(pamh, LOG_ERR, "pbkdf failed");
+		pw_free(key);
+		return (NULL);
+	}
+	return (key);
+}
+
+static int
+is_key_loaded(pam_handle_t *pamh, const char *ds_name)
+{
+	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
+	if (ds == NULL) {
+		pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
+		return (-1);
+	}
+	int keystatus = zfs_prop_get_int(ds, ZFS_PROP_KEYSTATUS);
+	zfs_close(ds);
+	return (keystatus != ZFS_KEYSTATUS_UNAVAILABLE);
+}
+
+static int
+change_key(pam_handle_t *pamh, const char *ds_name,
+    const char *passphrase)
+{
+	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
+	if (ds == NULL) {
+		pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
+		return (-1);
+	}
+	nvlist_t *nvlist = fnvlist_alloc();
+	pw_password_t *key = prepare_passphrase(pamh, ds, passphrase, nvlist);
+	if (key == NULL) {
+		nvlist_free(nvlist);
+		zfs_close(ds);
+		return (-1);
+	}
+	if (nvlist_add_string(nvlist,
+	    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+	    "prompt")) {
+		pam_syslog(pamh, LOG_ERR, "nvlist_add failed for keylocation");
+		pw_free(key);
+		nvlist_free(nvlist);
+		zfs_close(ds);
+		return (-1);
+	}
+	if (nvlist_add_uint64(nvlist,
+	    zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+	    ZFS_KEYFORMAT_PASSPHRASE)) {
+		pam_syslog(pamh, LOG_ERR, "nvlist_add failed for keyformat");
+		pw_free(key);
+		nvlist_free(nvlist);
+		zfs_close(ds);
+		return (-1);
+	}
+	int ret = lzc_change_key(ds_name, DCP_CMD_NEW_KEY, nvlist,
+	    (uint8_t *)key->value, WRAPPING_KEY_LEN);
+	pw_free(key);
+	if (ret) {
+		pam_syslog(pamh, LOG_ERR, "change_key failed: %d", ret);
+		nvlist_free(nvlist);
+		zfs_close(ds);
+		return (-1);
+	}
+	nvlist_free(nvlist);
+	zfs_close(ds);
+	return (0);
+}
+
+static int
+decrypt_mount(pam_handle_t *pamh, const char *ds_name,
+    const char *passphrase)
+{
+	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
+	if (ds == NULL) {
+		pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
+		return (-1);
+	}
+	pw_password_t *key = prepare_passphrase(pamh, ds, passphrase, NULL);
+	if (key == NULL) {
+		zfs_close(ds);
+		return (-1);
+	}
+	int ret = lzc_load_key(ds_name, B_FALSE, (uint8_t *)key->value,
+	    WRAPPING_KEY_LEN);
+	pw_free(key);
+	if (ret) {
+		pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret);
+		zfs_close(ds);
+		return (-1);
+	}
+	ret = zfs_mount(ds, NULL, 0);
+	if (ret) {
+		pam_syslog(pamh, LOG_ERR, "mount failed: %d", ret);
+		zfs_close(ds);
+		return (-1);
+	}
+	zfs_close(ds);
+	return (0);
+}
+
+static int
+unmount_unload(pam_handle_t *pamh, const char *ds_name)
+{
+	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
+	if (ds == NULL) {
+		pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
+		return (-1);
+	}
+	int ret = zfs_unmount(ds, NULL, 0);
+	if (ret) {
+		pam_syslog(pamh, LOG_ERR, "zfs_unmount failed with: %d", ret);
+		zfs_close(ds);
+		return (-1);
+	}
+
+	ret = lzc_unload_key(ds_name);
+	if (ret) {
+		pam_syslog(pamh, LOG_ERR, "unload_key failed with: %d", ret);
+		zfs_close(ds);
+		return (-1);
+	}
+	zfs_close(ds);
+	return (0);
+}
+
+typedef struct {
+	char *homes_prefix;
+	char *runstatedir;
+	char *homedir;
+	char *dsname;
+	uid_t uid;
+	const char *username;
+	int unmount_and_unload;
+} zfs_key_config_t;
+
+static int
+zfs_key_config_load(pam_handle_t *pamh, zfs_key_config_t *config,
+    int argc, const char **argv)
+{
+	config->homes_prefix = strdup("rpool/home");
+	if (config->homes_prefix == NULL) {
+		pam_syslog(pamh, LOG_ERR, "strdup failure");
+		return (-1);
+	}
+	config->runstatedir = strdup(RUNSTATEDIR "/pam_zfs_key");
+	if (config->runstatedir == NULL) {
+		pam_syslog(pamh, LOG_ERR, "strdup failure");
+		free(config->homes_prefix);
+		return (-1);
+	}
+	const char *name;
+	if (pam_get_user(pamh, &name, NULL) != PAM_SUCCESS) {
+		pam_syslog(pamh, LOG_ERR,
+		    "couldn't get username from PAM stack");
+		free(config->runstatedir);
+		free(config->homes_prefix);
+		return (-1);
+	}
+	struct passwd *entry = getpwnam(name);
+	if (!entry) {
+		free(config->runstatedir);
+		free(config->homes_prefix);
+		return (-1);
+	}
+	config->uid = entry->pw_uid;
+	config->username = name;
+	config->unmount_and_unload = 1;
+	config->dsname = NULL;
+	config->homedir = NULL;
+	for (int c = 0; c < argc; c++) {
+		if (strncmp(argv[c], "homes=", 6) == 0) {
+			free(config->homes_prefix);
+			config->homes_prefix = strdup(argv[c] + 6);
+		} else if (strncmp(argv[c], "runstatedir=", 12) == 0) {
+			free(config->runstatedir);
+			config->runstatedir = strdup(argv[c] + 12);
+		} else if (strcmp(argv[c], "nounmount") == 0) {
+			config->unmount_and_unload = 0;
+		} else if (strcmp(argv[c], "prop_mountpoint") == 0) {
+			config->homedir = strdup(entry->pw_dir);
+		}
+	}
+	return (0);
+}
+
+static void
+zfs_key_config_free(zfs_key_config_t *config)
+{
+	free(config->homes_prefix);
+	free(config->runstatedir);
+	free(config->homedir);
+	free(config->dsname);
+}
+
+static int
+find_dsname_by_prop_value(zfs_handle_t *zhp, void *data)
+{
+	zfs_type_t type = zfs_get_type(zhp);
+	zfs_key_config_t *target = data;
+	char mountpoint[ZFS_MAXPROPLEN];
+
+	/* Skip any datasets whose type does not match */
+	if ((type & ZFS_TYPE_FILESYSTEM) == 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* Skip any datasets whose mountpoint does not match */
+	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
+	if (strcmp(target->homedir, mountpoint) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	target->dsname = strdup(zfs_get_name(zhp));
+	zfs_close(zhp);
+	return (1);
+}
+
+static char *
+zfs_key_config_get_dataset(zfs_key_config_t *config)
+{
+	if (config->homedir != NULL &&
+	    config->homes_prefix != NULL) {
+		zfs_handle_t *zhp = zfs_open(g_zfs, config->homes_prefix,
+		    ZFS_TYPE_FILESYSTEM);
+		if (zhp == NULL) {
+			pam_syslog(NULL, LOG_ERR, "dataset %s not found",
+			    config->homes_prefix);
+			return (NULL);
+		}
+
+		(void) zfs_iter_filesystems(zhp, find_dsname_by_prop_value,
+		    config);
+		zfs_close(zhp);
+		char *dsname = config->dsname;
+		config->dsname = NULL;
+		return (dsname);
+	}
+
+	if (config->homes_prefix == NULL) {
+		return (NULL);
+	}
+
+	size_t len = ZFS_MAX_DATASET_NAME_LEN;
+	size_t total_len = strlen(config->homes_prefix) + 1
+	    + strlen(config->username);
+	if (total_len > len) {
+		return (NULL);
+	}
+	char *ret = malloc(len + 1);
+	if (!ret) {
+		return (NULL);
+	}
+	ret[0] = 0;
+	strcat(ret, config->homes_prefix);
+	strcat(ret, "/");
+	strcat(ret, config->username);
+	return (ret);
+}
+
+static int
+zfs_key_config_modify_session_counter(pam_handle_t *pamh,
+    zfs_key_config_t *config, int delta)
+{
+	const char *runtime_path = config->runstatedir;
+	if (mkdir(runtime_path, S_IRWXU) != 0 && errno != EEXIST) {
+		pam_syslog(pamh, LOG_ERR, "Can't create runtime path: %d",
+		    errno);
+		return (-1);
+	}
+	if (chown(runtime_path, 0, 0) != 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",
+		    errno);
+		return (-1);
+	}
+	if (chmod(runtime_path, S_IRWXU) != 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't chmod runtime path: %d",
+		    errno);
+		return (-1);
+	}
+
+	char *counter_path;
+	if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
+		return (-1);
+
+	const int fd = open(counter_path,
+	    O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
+	    S_IRUSR | S_IWUSR);
+	free(counter_path);
+	if (fd < 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", errno);
+		return (-1);
+	}
+	if (flock(fd, LOCK_EX) != 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't lock counter file: %d", errno);
+		close(fd);
+		return (-1);
+	}
+	char counter[20];
+	char *pos = counter;
+	int remaining = sizeof (counter) - 1;
+	int ret;
+	counter[sizeof (counter) - 1] = 0;
+	while (remaining > 0 && (ret = read(fd, pos, remaining)) > 0) {
+		remaining -= ret;
+		pos += ret;
+	}
+	*pos = 0;
+	long int counter_value = strtol(counter, NULL, 10);
+	counter_value += delta;
+	if (counter_value < 0) {
+		counter_value = 0;
+	}
+	lseek(fd, 0, SEEK_SET);
+	if (ftruncate(fd, 0) != 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't truncate counter file: %d",
+		    errno);
+		close(fd);
+		return (-1);
+	}
+	snprintf(counter, sizeof (counter), "%ld", counter_value);
+	remaining = strlen(counter);
+	pos = counter;
+	while (remaining > 0 && (ret = write(fd, pos, remaining)) > 0) {
+		remaining -= ret;
+		pos += ret;
+	}
+	close(fd);
+	return (counter_value);
+}
+
+__attribute__((visibility("default")))
+PAM_EXTERN int
+pam_sm_authenticate(pam_handle_t *pamh, int flags,
+    int argc, const char **argv)
+{
+	if (pw_fetch_lazy(pamh) == NULL) {
+		return (PAM_AUTH_ERR);
+	}
+
+	return (PAM_SUCCESS);
+}
+
+__attribute__((visibility("default")))
+PAM_EXTERN int
+pam_sm_setcred(pam_handle_t *pamh, int flags,
+    int argc, const char **argv)
+{
+	return (PAM_SUCCESS);
+}
+
+__attribute__((visibility("default")))
+PAM_EXTERN int
+pam_sm_chauthtok(pam_handle_t *pamh, int flags,
+    int argc, const char **argv)
+{
+	if (geteuid() != 0) {
+		pam_syslog(pamh, LOG_ERR,
+		    "Cannot zfs_mount when not being root.");
+		return (PAM_PERM_DENIED);
+	}
+	zfs_key_config_t config;
+	if (zfs_key_config_load(pamh, &config, argc, argv) == -1) {
+		return (PAM_SERVICE_ERR);
+	}
+	if (config.uid < 1000) {
+		zfs_key_config_free(&config);
+		return (PAM_SUCCESS);
+	}
+	{
+		if (pam_zfs_init(pamh) != 0) {
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		char *dataset = zfs_key_config_get_dataset(&config);
+		if (!dataset) {
+			pam_zfs_free();
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		int key_loaded = is_key_loaded(pamh, dataset);
+		if (key_loaded == -1) {
+			free(dataset);
+			pam_zfs_free();
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		free(dataset);
+		pam_zfs_free();
+		if (! key_loaded) {
+			pam_syslog(pamh, LOG_ERR,
+			    "key not loaded, returning try_again");
+			zfs_key_config_free(&config);
+			return (PAM_PERM_DENIED);
+		}
+	}
+
+	if ((flags & PAM_UPDATE_AUTHTOK) != 0) {
+		const pw_password_t *token = pw_get(pamh);
+		if (token == NULL) {
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		if (pam_zfs_init(pamh) != 0) {
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		char *dataset = zfs_key_config_get_dataset(&config);
+		if (!dataset) {
+			pam_zfs_free();
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		if (change_key(pamh, dataset, token->value) == -1) {
+			free(dataset);
+			pam_zfs_free();
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		free(dataset);
+		pam_zfs_free();
+		zfs_key_config_free(&config);
+		if (pw_clear(pamh) == -1) {
+			return (PAM_SERVICE_ERR);
+		}
+	} else {
+		zfs_key_config_free(&config);
+	}
+	return (PAM_SUCCESS);
+}
+
+PAM_EXTERN int
+pam_sm_open_session(pam_handle_t *pamh, int flags,
+    int argc, const char **argv)
+{
+	if (geteuid() != 0) {
+		pam_syslog(pamh, LOG_ERR,
+		    "Cannot zfs_mount when not being root.");
+		return (PAM_SUCCESS);
+	}
+	zfs_key_config_t config;
+	if (zfs_key_config_load(pamh, &config, argc, argv) != 0) {
+		return (PAM_SESSION_ERR);
+	}
+
+	if (config.uid < 1000) {
+		zfs_key_config_free(&config);
+		return (PAM_SUCCESS);
+	}
+
+	int counter = zfs_key_config_modify_session_counter(pamh, &config, 1);
+	if (counter != 1) {
+		zfs_key_config_free(&config);
+		return (PAM_SUCCESS);
+	}
+
+	const pw_password_t *token = pw_get(pamh);
+	if (token == NULL) {
+		zfs_key_config_free(&config);
+		return (PAM_SESSION_ERR);
+	}
+	if (pam_zfs_init(pamh) != 0) {
+		zfs_key_config_free(&config);
+		return (PAM_SERVICE_ERR);
+	}
+	char *dataset = zfs_key_config_get_dataset(&config);
+	if (!dataset) {
+		pam_zfs_free();
+		zfs_key_config_free(&config);
+		return (PAM_SERVICE_ERR);
+	}
+	if (decrypt_mount(pamh, dataset, token->value) == -1) {
+		free(dataset);
+		pam_zfs_free();
+		zfs_key_config_free(&config);
+		return (PAM_SERVICE_ERR);
+	}
+	free(dataset);
+	pam_zfs_free();
+	zfs_key_config_free(&config);
+	if (pw_clear(pamh) == -1) {
+		return (PAM_SERVICE_ERR);
+	}
+	return (PAM_SUCCESS);
+
+}
+
+__attribute__((visibility("default")))
+PAM_EXTERN int
+pam_sm_close_session(pam_handle_t *pamh, int flags,
+    int argc, const char **argv)
+{
+	if (geteuid() != 0) {
+		pam_syslog(pamh, LOG_ERR,
+		    "Cannot zfs_mount when not being root.");
+		return (PAM_SUCCESS);
+	}
+	zfs_key_config_t config;
+	if (zfs_key_config_load(pamh, &config, argc, argv) != 0) {
+		return (PAM_SESSION_ERR);
+	}
+	if (config.uid < 1000) {
+		zfs_key_config_free(&config);
+		return (PAM_SUCCESS);
+	}
+
+	int counter = zfs_key_config_modify_session_counter(pamh, &config, -1);
+	if (counter != 0) {
+		zfs_key_config_free(&config);
+		return (PAM_SUCCESS);
+	}
+
+	if (config.unmount_and_unload) {
+		if (pam_zfs_init(pamh) != 0) {
+			zfs_key_config_free(&config);
+			return (PAM_SERVICE_ERR);
+		}
+		char *dataset = zfs_key_config_get_dataset(&config);
+		if (!dataset) {
+			pam_zfs_free();
+			zfs_key_config_free(&config);
+			return (PAM_SESSION_ERR);
+		}
+		if (unmount_unload(pamh, dataset) == -1) {
+			free(dataset);
+			pam_zfs_free();
+			zfs_key_config_free(&config);
+			return (PAM_SESSION_ERR);
+		}
+		free(dataset);
+		pam_zfs_free();
+	}
+
+	zfs_key_config_free(&config);
+	return (PAM_SUCCESS);
+}

diff --git a/zfs/contrib/pam_zfs_key/zfs_key b/zfs/contrib/pam_zfs_key/zfs_key
new file mode 100644
index 0000000..e3ed5c4
--- /dev/null
+++ b/zfs/contrib/pam_zfs_key/zfs_key

@@ -0,0 +1,13 @@
+Name: Unlock zfs datasets for user
+Default: yes
+Priority: 128
+Auth-Type: Additional
+Auth:
+	optional	pam_zfs_key.so
+Session-Interactive-Only: yes
+Session-Type: Additional
+Session:
+	optional	pam_zfs_key.so
+Password-Type: Additional
+Password:
+	optional	pam_zfs_key.so

diff --git a/zfs/contrib/pyzfs/README b/zfs/contrib/pyzfs/README
index 52983e5..bd22409 100644
--- a/zfs/contrib/pyzfs/README
+++ b/zfs/contrib/pyzfs/README

@@ -25,4 +25,4 @@
 variable on a memory backed filesystem.
 
 Package documentation: http://pyzfs.readthedocs.org
-Package development: https://github.com/zfsonlinux/zfs
+Package development: https://github.com/openzfs/zfs

diff --git a/zfs/contrib/pyzfs/libzfs_core/__init__.py b/zfs/contrib/pyzfs/libzfs_core/__init__.py
index a195b05..25ea3e4 100644
--- a/zfs/contrib/pyzfs/libzfs_core/__init__.py
+++ b/zfs/contrib/pyzfs/libzfs_core/__init__.py

@@ -32,7 +32,7 @@
 in which the error code is produced.
 
 To submit an issue or contribute to development of this package
-please visit its `GitHub repository <https://github.com/zfsonlinux/zfs>`_.
+please visit its `GitHub repository <https://github.com/openzfs/zfs>`_.
 
 .. data:: MAXNAMELEN
 
@@ -73,7 +73,6 @@
     lzc_receive_with_cmdprops,
     lzc_receive_with_header,
     lzc_release,
-    lzc_remap,
     lzc_reopen,
     lzc_rollback,
     lzc_rollback_to,
@@ -129,7 +128,6 @@
     'lzc_receive_with_cmdprops',
     'lzc_receive_with_header',
     'lzc_release',
-    'lzc_remap',
     'lzc_reopen',
     'lzc_rollback',
     'lzc_rollback_to',

diff --git a/zfs/contrib/pyzfs/libzfs_core/_constants.py b/zfs/contrib/pyzfs/libzfs_core/_constants.py
index 55de55d..32402ae 100644
--- a/zfs/contrib/pyzfs/libzfs_core/_constants.py
+++ b/zfs/contrib/pyzfs/libzfs_core/_constants.py

@@ -19,14 +19,31 @@
 """
 
 from __future__ import absolute_import, division, print_function
+import errno
+import sys
+
+
+# Compat for platform-specific errnos
+if sys.platform.startswith('freebsd'):
+    ECHRNG = errno.ENXIO
+    ECKSUM = 97  # EINTEGRITY
+    ETIME = errno.ETIMEDOUT
+else:
+    ECHRNG = errno.ECHRNG
+    ECKSUM = errno.EBADE
+    ETIME = errno.ETIME
 
 
 # https://stackoverflow.com/a/1695250
-def enum(*sequential, **named):
-    enums = dict(((b, a) for a, b in enumerate(sequential)), **named)
+def enum_with_offset(offset, sequential, named):
+    enums = dict(((b, a + offset) for a, b in enumerate(sequential)), **named)
     return type('Enum', (), enums)
 
 
+def enum(*sequential, **named):
+    return enum_with_offset(0, sequential, named)
+
+
 #: Maximum length of any ZFS name.
 MAXNAMELEN = 255
 #: Default channel program limits
@@ -60,12 +77,38 @@
     'ZIO_CRYPT_AES_256_GCM'
 )
 # ZFS-specific error codes
-ZFS_ERR_CHECKPOINT_EXISTS = 1024
-ZFS_ERR_DISCARDING_CHECKPOINT = 1025
-ZFS_ERR_NO_CHECKPOINT = 1026
-ZFS_ERR_DEVRM_IN_PROGRESS = 1027
-ZFS_ERR_VDEV_TOO_BIG = 1028
-ZFS_ERR_WRONG_PARENT = 1033
-
+zfs_errno = enum_with_offset(1024, [
+        'ZFS_ERR_CHECKPOINT_EXISTS',
+        'ZFS_ERR_DISCARDING_CHECKPOINT',
+        'ZFS_ERR_NO_CHECKPOINT',
+        'ZFS_ERR_DEVRM_IN_PROGRESS',
+        'ZFS_ERR_VDEV_TOO_BIG',
+        'ZFS_ERR_IOC_CMD_UNAVAIL',
+        'ZFS_ERR_IOC_ARG_UNAVAIL',
+        'ZFS_ERR_IOC_ARG_REQUIRED',
+        'ZFS_ERR_IOC_ARG_BADTYPE',
+        'ZFS_ERR_WRONG_PARENT',
+        'ZFS_ERR_FROM_IVSET_GUID_MISSING',
+        'ZFS_ERR_FROM_IVSET_GUID_MISMATCH',
+        'ZFS_ERR_SPILL_BLOCK_FLAG_MISSING',
+        'ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE',
+        'ZFS_ERR_EXPORT_IN_PROGRESS',
+        'ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR',
+        'ZFS_ERR_STREAM_TRUNCATED',
+        'ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH',
+        'ZFS_ERR_RESILVER_IN_PROGRESS',
+        'ZFS_ERR_REBUILD_IN_PROGRESS',
+        'ZFS_ERR_BADPROP',
+    ],
+    {}
+)
+# compat before we used the enum helper for these values
+ZFS_ERR_CHECKPOINT_EXISTS = zfs_errno.ZFS_ERR_CHECKPOINT_EXISTS
+assert (ZFS_ERR_CHECKPOINT_EXISTS == 1024)
+ZFS_ERR_DISCARDING_CHECKPOINT = zfs_errno.ZFS_ERR_DISCARDING_CHECKPOINT
+ZFS_ERR_NO_CHECKPOINT = zfs_errno.ZFS_ERR_NO_CHECKPOINT
+ZFS_ERR_DEVRM_IN_PROGRESS = zfs_errno.ZFS_ERR_DEVRM_IN_PROGRESS
+ZFS_ERR_VDEV_TOO_BIG = zfs_errno.ZFS_ERR_VDEV_TOO_BIG
+ZFS_ERR_WRONG_PARENT = zfs_errno.ZFS_ERR_WRONG_PARENT
 
 # vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4

diff --git a/zfs/contrib/pyzfs/libzfs_core/_error_translation.py b/zfs/contrib/pyzfs/libzfs_core/_error_translation.py
index b888fd7..f494461 100644
--- a/zfs/contrib/pyzfs/libzfs_core/_error_translation.py
+++ b/zfs/contrib/pyzfs/libzfs_core/_error_translation.py

@@ -33,13 +33,17 @@
 import string
 from . import exceptions as lzc_exc
 from ._constants import (
+    ECHRNG,
+    ECKSUM,
+    ETIME,
     MAXNAMELEN,
     ZFS_ERR_CHECKPOINT_EXISTS,
     ZFS_ERR_DISCARDING_CHECKPOINT,
     ZFS_ERR_NO_CHECKPOINT,
     ZFS_ERR_DEVRM_IN_PROGRESS,
     ZFS_ERR_VDEV_TOO_BIG,
-    ZFS_ERR_WRONG_PARENT
+    ZFS_ERR_WRONG_PARENT,
+    zfs_errno
 )
 
 
@@ -55,6 +59,8 @@
         raise lzc_exc.ParentNotFound(name)
     if ret == ZFS_ERR_WRONG_PARENT:
         raise lzc_exc.WrongParent(_fs_name(name))
+    if ret == zfs_errno.ZFS_ERR_BADPROP:
+        raise lzc_exc.PropertyInvalid(name)
     raise _generic_exception(ret, name, "Failed to create filesystem")
 
 
@@ -147,21 +153,36 @@
 
 
 def lzc_bookmark_translate_errors(ret, errlist, bookmarks):
+
     if ret == 0:
         return
 
     def _map(ret, name):
+        source = bookmarks[name]
         if ret == errno.EINVAL:
             if name:
-                snap = bookmarks[name]
                 pool_names = map(_pool_name, bookmarks.keys())
-                if not _is_valid_bmark_name(name):
-                    return lzc_exc.BookmarkNameInvalid(name)
-                elif not _is_valid_snap_name(snap):
-                    return lzc_exc.SnapshotNameInvalid(snap)
-                elif _fs_name(name) != _fs_name(snap):
-                    return lzc_exc.BookmarkMismatch(name)
-                elif any(x != _pool_name(name) for x in pool_names):
+
+                # use _validate* functions for MAXNAMELEN check
+                try:
+                    _validate_bmark_name(name)
+                except lzc_exc.ZFSError as e:
+                    return e
+
+                try:
+                    _validate_snap_name(source)
+                    source_is_snap = True
+                except lzc_exc.ZFSError:
+                    source_is_snap = False
+                try:
+                    _validate_bmark_name(source)
+                    source_is_bmark = True
+                except lzc_exc.ZFSError:
+                    source_is_bmark = False
+                if not source_is_snap and not source_is_bmark:
+                    return lzc_exc.BookmarkSourceInvalid(source)
+
+                if any(x != _pool_name(name) for x in pool_names):
                     return lzc_exc.PoolsDiffer(name)
             else:
                 invalid_names = [
@@ -174,6 +195,8 @@
             return lzc_exc.SnapshotNotFound(name)
         if ret == errno.ENOTSUP:
             return lzc_exc.BookmarkNotSupported(name)
+        if ret == zfs_errno.ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR:
+            return lzc_exc.BookmarkMismatch(source)
         return _generic_exception(ret, name, "Failed to create bookmark")
 
     _handle_err_list(
@@ -399,6 +422,8 @@
             def _map(ret, name):
                 if ret == errno.EINVAL:
                     return lzc_exc.PropertyInvalid(name)
+                if ret == zfs_errno.ZFS_ERR_BADPROP:
+                    return lzc_exc.PropertyInvalid(name)
                 return _generic_exception(ret, name, "Failed to set property")
             _handle_err_list(
                 errno.EINVAL, properrs, [snapname],
@@ -444,10 +469,14 @@
         raise lzc_exc.ReadOnlyPool(_pool_name(snapname))
     if ret == errno.EAGAIN:
         raise lzc_exc.SuspendedPool(_pool_name(snapname))
-    if ret == errno.EBADE:  # ECKSUM
+    if ret == ECKSUM:
         raise lzc_exc.BadStream()
     if ret == ZFS_ERR_WRONG_PARENT:
         raise lzc_exc.WrongParent(_fs_name(snapname))
+    if ret == zfs_errno.ZFS_ERR_STREAM_TRUNCATED:
+        raise lzc_exc.StreamTruncated()
+    if ret == zfs_errno.ZFS_ERR_BADPROP:
+        raise lzc_exc.PropertyInvalid(snapname)
 
     raise lzc_exc.StreamIOError(ret)
 
@@ -532,7 +561,7 @@
         return
     if ret == errno.ENOENT:
         raise lzc_exc.PoolNotFound(name)
-    if ret == errno.ETIME:
+    if ret == ETIME:
         raise lzc_exc.ZCPTimeout()
     if ret == errno.ENOMEM:
         raise lzc_exc.ZCPMemoryError()
@@ -540,7 +569,7 @@
         raise lzc_exc.ZCPSpaceError()
     if ret == errno.EPERM:
         raise lzc_exc.ZCPPermissionError()
-    if ret == errno.ECHRNG:
+    if ret == ECHRNG:
         raise lzc_exc.ZCPRuntimeError(error)
     if ret == errno.EINVAL:
         if error is None:
@@ -550,18 +579,6 @@
     raise _generic_exception(ret, name, "Failed to execute channel program")
 
 
-def lzc_remap_translate_error(ret, name):
-    if ret == 0:
-        return
-    if ret == errno.ENOENT:
-        raise lzc_exc.DatasetNotFound(name)
-    if ret == errno.EINVAL:
-        _validate_fs_name(name)
-    if ret == errno.ENOTSUP:
-        return lzc_exc.FeatureNotSupported(name)
-    raise _generic_exception(ret, name, "Failed to remap dataset")
-
-
 def lzc_pool_checkpoint_translate_error(ret, name, discard=False):
     if ret == 0:
         return

diff --git a/zfs/contrib/pyzfs/libzfs_core/_libzfs_core.py b/zfs/contrib/pyzfs/libzfs_core/_libzfs_core.py
index ed3ea32..fcfa5be 100644
--- a/zfs/contrib/pyzfs/libzfs_core/_libzfs_core.py
+++ b/zfs/contrib/pyzfs/libzfs_core/_libzfs_core.py

@@ -319,14 +319,15 @@
     Create bookmarks.
 
     :param bookmarks: a dict that maps names of wanted bookmarks to names of
-        existing snapshots.
+        existing snapshots or bookmarks.
     :type bookmarks: dict of bytes to bytes
     :raises BookmarkFailure: if any of the bookmarks can not be created for any
         reason.
 
     The bookmarks `dict` maps from name of the bookmark
     (e.g. :file:`{pool}/{fs}#{bmark}`) to the name of the snapshot
-    (e.g. :file:`{pool}/{fs}@{snap}`).  All the bookmarks and snapshots must
+    (e.g. :file:`{pool}/{fs}@{snap}`) or existint bookmark
+    :file:`{pool}/{fs}@{snap}`. All the bookmarks and snapshots must
     be in the same pool.
     '''
     errlist = {}
@@ -1563,22 +1564,6 @@
 
 
 @_uncommitted()
-def lzc_remap(name):
-    '''
-    Remaps the ZFS dataset.
-
-    :param bytes name: the name of the dataset to remap.
-    :raises NameInvalid: if the dataset name is invalid.
-    :raises NameTooLong: if the dataset name is too long.
-    :raises DatasetNotFound: if the dataset does not exist.
-    :raises FeatureNotSupported: if the pool containing the dataset does not
-        have the *obsolete_counts* feature enabled.
-    '''
-    ret = _lib.lzc_remap(name)
-    errors.lzc_remap_translate_error(ret, name)
-
-
-@_uncommitted()
 def lzc_pool_checkpoint(name):
     '''
     Creates a checkpoint for the specified pool.

diff --git a/zfs/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py b/zfs/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py
index ce2d9d6..1b46a08 100644
--- a/zfs/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py
+++ b/zfs/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py

@@ -127,7 +127,6 @@
     int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
     int lzc_sync(const char *, nvlist_t *, nvlist_t **);
     int lzc_unload_key(const char *);
-    int lzc_remap(const char *);
     int lzc_pool_checkpoint(const char *);
     int lzc_pool_checkpoint_discard(const char *);
     int lzc_rename(const char *, const char *);

diff --git a/zfs/contrib/pyzfs/libzfs_core/exceptions.py b/zfs/contrib/pyzfs/libzfs_core/exceptions.py
index f8a7754..e484b07 100644
--- a/zfs/contrib/pyzfs/libzfs_core/exceptions.py
+++ b/zfs/contrib/pyzfs/libzfs_core/exceptions.py

@@ -21,12 +21,16 @@
 
 import errno
 from ._constants import (
+    ECHRNG,
+    ECKSUM,
+    ETIME,
     ZFS_ERR_CHECKPOINT_EXISTS,
     ZFS_ERR_DISCARDING_CHECKPOINT,
     ZFS_ERR_NO_CHECKPOINT,
     ZFS_ERR_DEVRM_IN_PROGRESS,
     ZFS_ERR_VDEV_TOO_BIG,
-    ZFS_ERR_WRONG_PARENT
+    ZFS_ERR_WRONG_PARENT,
+    zfs_errno
 )
 
 
@@ -227,7 +231,15 @@
 
 class BookmarkMismatch(ZFSError):
     errno = errno.EINVAL
-    message = "Bookmark is not in snapshot's filesystem"
+    message = "source is not an ancestor of the new bookmark's dataset"
+
+    def __init__(self, name):
+        self.name = name
+
+
+class BookmarkSourceInvalid(ZFSError):
+    errno = errno.EINVAL
+    message = "Bookmark source is not a valid snapshot or existing bookmark"
 
     def __init__(self, name):
         self.name = name
@@ -316,7 +328,7 @@
 
 
 class BadStream(ZFSError):
-    errno = errno.EBADE
+    errno = ECKSUM
     message = "Bad backup stream"
 
 
@@ -340,6 +352,11 @@
     message = "Incompatible embedded feature with encrypted receive"
 
 
+class StreamTruncated(ZFSError):
+    errno = zfs_errno.ZFS_ERR_STREAM_TRUNCATED
+    message = "incomplete stream"
+
+
 class ReceivePropertyFailure(MultipleOperationsFailure):
     message = "Receiving of properties failed for one or more reasons"
 
@@ -524,7 +541,7 @@
 
 
 class ZCPRuntimeError(ZCPError):
-    errno = errno.ECHRNG
+    errno = ECHRNG
     message = "Channel programs encountered a runtime error"
 
     def __init__(self, details):
@@ -537,7 +554,7 @@
 
 
 class ZCPTimeout(ZCPError):
-    errno = errno.ETIME
+    errno = ETIME
     message = "Channel program timed out"
 
 

diff --git a/zfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py b/zfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py
index 8279cef..d949d88 100644
--- a/zfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py
+++ b/zfs/contrib/pyzfs/libzfs_core/test/test_libzfs_core.py

@@ -154,8 +154,8 @@
 
 @contextlib.contextmanager
 def dev_null():
-    with os_open('/dev/null', os.O_WRONLY) as fd:
-        yield fd
+    with tempfile.TemporaryFile(suffix='.zstream') as fd:
+        yield fd.fileno()
 
 
 @contextlib.contextmanager
@@ -252,9 +252,9 @@
 
 
 def snap_always_unmounted_before_destruction():
-    # Apparently ZoL automatically unmounts the snapshot
+    # Apparently OpenZFS automatically unmounts the snapshot
     # only if it is mounted at its default .zfs/snapshot
-    # mountpoint.
+    # mountpoint under Linux.
     return (
         platform.system() != 'Linux', 'snapshot is not auto-unmounted')
 
@@ -1032,17 +1032,37 @@
         bmarks = [ZFSTest.pool.makeName(
             b'fs1#bmark1'), ZFSTest.pool.makeName(b'fs2#bmark1')]
         bmark_dict = {x: y for x, y in zip(bmarks, snaps)}
-
         lzc.lzc_snapshot(snaps)
         lzc.lzc_bookmark(bmark_dict)
         lzc.lzc_destroy_snaps(snaps, defer=False)
 
     @skipUnlessBookmarksSupported
+    def test_bookmark_copying(self):
+        snaps = [ZFSTest.pool.makeName(s) for s in [
+            b'fs1@snap1', b'fs1@snap2', b'fs2@snap1']]
+        bmarks = [ZFSTest.pool.makeName(x) for x in [
+            b'fs1#bmark1', b'fs1#bmark2', b'fs2#bmark1']]
+        bmarks_copies = [ZFSTest.pool.makeName(x) for x in [
+            b'fs1#bmark1_copy', b'fs1#bmark2_copy', b'fs2#bmark1_copy']]
+        bmark_dict = {x: y for x, y in zip(bmarks, snaps)}
+        bmark_copies_dict = {x: y for x, y in zip(bmarks_copies, bmarks)}
+
+        for snap in snaps:
+            lzc.lzc_snapshot([snap])
+        lzc.lzc_bookmark(bmark_dict)
+
+        lzc.lzc_bookmark(bmark_copies_dict)
+        lzc.lzc_destroy_bookmarks(bmarks_copies)
+
+        lzc.lzc_destroy_bookmarks(bmarks)
+        lzc.lzc_destroy_snaps(snaps, defer=False)
+
+    @skipUnlessBookmarksSupported
     def test_bookmarks_empty(self):
         lzc.lzc_bookmark({})
 
     @skipUnlessBookmarksSupported
-    def test_bookmarks_mismatching_name(self):
+    def test_bookmarks_foreign_source(self):
         snaps = [ZFSTest.pool.makeName(b'fs1@snap1')]
         bmarks = [ZFSTest.pool.makeName(b'fs2#bmark1')]
         bmark_dict = {x: y for x, y in zip(bmarks, snaps)}
@@ -1107,7 +1127,7 @@
             self.assertIsInstance(e, lzc_exc.NameTooLong)
 
     @skipUnlessBookmarksSupported
-    def test_bookmarks_mismatching_names(self):
+    def test_bookmarks_foreign_sources(self):
         snaps = [ZFSTest.pool.makeName(
             b'fs1@snap1'), ZFSTest.pool.makeName(b'fs2@snap1')]
         bmarks = [ZFSTest.pool.makeName(
@@ -1122,7 +1142,7 @@
             self.assertIsInstance(e, lzc_exc.BookmarkMismatch)
 
     @skipUnlessBookmarksSupported
-    def test_bookmarks_partially_mismatching_names(self):
+    def test_bookmarks_partially_foreign_sources(self):
         snaps = [ZFSTest.pool.makeName(
             b'fs1@snap1'), ZFSTest.pool.makeName(b'fs2@snap1')]
         bmarks = [ZFSTest.pool.makeName(
@@ -1154,33 +1174,48 @@
 
     @skipUnlessBookmarksSupported
     def test_bookmarks_missing_snap(self):
+        fss = [ZFSTest.pool.makeName(b'fs1'), ZFSTest.pool.makeName(b'fs2')]
         snaps = [ZFSTest.pool.makeName(
             b'fs1@snap1'), ZFSTest.pool.makeName(b'fs2@snap1')]
         bmarks = [ZFSTest.pool.makeName(
             b'fs1#bmark1'), ZFSTest.pool.makeName(b'fs2#bmark1')]
         bmark_dict = {x: y for x, y in zip(bmarks, snaps)}
 
-        lzc.lzc_snapshot(snaps[0:1])
+        lzc.lzc_snapshot(snaps[0:1])  # only create fs1@snap1
+
         with self.assertRaises(lzc_exc.BookmarkFailure) as ctx:
             lzc.lzc_bookmark(bmark_dict)
 
         for e in ctx.exception.errors:
             self.assertIsInstance(e, lzc_exc.SnapshotNotFound)
 
+        # no new bookmarks are created if one or more sources do not exist
+        for fs in fss:
+            fsbmarks = lzc.lzc_get_bookmarks(fs)
+            self.assertEqual(len(fsbmarks), 0)
+
     @skipUnlessBookmarksSupported
     def test_bookmarks_missing_snaps(self):
+        fss = [ZFSTest.pool.makeName(b'fs1'), ZFSTest.pool.makeName(b'fs2')]
         snaps = [ZFSTest.pool.makeName(
             b'fs1@snap1'), ZFSTest.pool.makeName(b'fs2@snap1')]
         bmarks = [ZFSTest.pool.makeName(
             b'fs1#bmark1'), ZFSTest.pool.makeName(b'fs2#bmark1')]
         bmark_dict = {x: y for x, y in zip(bmarks, snaps)}
 
+        # do not create any snapshots
+
         with self.assertRaises(lzc_exc.BookmarkFailure) as ctx:
             lzc.lzc_bookmark(bmark_dict)
 
         for e in ctx.exception.errors:
             self.assertIsInstance(e, lzc_exc.SnapshotNotFound)
 
+        # no new bookmarks are created if one or more sources do not exist
+        for fs in fss:
+            fsbmarks = lzc.lzc_get_bookmarks(fs)
+            self.assertEqual(len(fsbmarks), 0)
+
     @skipUnlessBookmarksSupported
     def test_bookmarks_for_the_same_snap(self):
         snap = ZFSTest.pool.makeName(b'fs1@snap1')
@@ -2679,7 +2714,7 @@
             lzc.lzc_send(src, None, stream.fileno())
             stream.seek(0)
             stream.truncate(1024 * 3)
-            with self.assertRaises(lzc_exc.BadStream):
+            with self.assertRaises(lzc_exc.StreamTruncated):
                 lzc.lzc_receive_resumable(dst, stream.fileno())
             # Resume token code from zfs_send_resume_token_to_nvlist()
             # XXX: if used more than twice move this code into an external func
@@ -2736,7 +2771,7 @@
             lzc.lzc_send(snap2, snap1, stream.fileno())
             stream.seek(0)
             stream.truncate(1024 * 3)
-            with self.assertRaises(lzc_exc.BadStream):
+            with self.assertRaises(lzc_exc.StreamTruncated):
                 lzc.lzc_receive_resumable(dst2, stream.fileno())
             # Resume token code from zfs_send_resume_token_to_nvlist()
             # format: <version>-<cksum>-<packed-size>-<compressed-payload>
@@ -3632,31 +3667,6 @@
             with self.assertRaises(lzc_exc.EncryptionKeyNotLoaded):
                 lzc.lzc_unload_key(fs)
 
-    def test_remap_missing_fs(self):
-        name = b"nonexistent"
-
-        with self.assertRaises(lzc_exc.DatasetNotFound):
-            lzc.lzc_remap(name)
-
-    def test_remap_invalid_fs(self):
-        ds = ZFSTest.pool.makeName(b"fs1")
-        snap = ds + b"@snap1"
-
-        lzc.lzc_snapshot([snap])
-        with self.assertRaises(lzc_exc.NameInvalid):
-            lzc.lzc_remap(snap)
-
-    def test_remap_too_long_fs_name(self):
-        name = ZFSTest.pool.makeTooLongName()
-
-        with self.assertRaises(lzc_exc.NameTooLong):
-            lzc.lzc_remap(name)
-
-    def test_remap(self):
-        name = ZFSTest.pool.makeName(b"fs1")
-
-        lzc.lzc_remap(name)
-
     def test_checkpoint(self):
         pool = ZFSTest.pool.getRoot().getName()
 

diff --git a/zfs/contrib/pyzfs/setup.py.in b/zfs/contrib/pyzfs/setup.py.in
index bd8ffc7..934b318 100644
--- a/zfs/contrib/pyzfs/setup.py.in
+++ b/zfs/contrib/pyzfs/setup.py.in

@@ -29,13 +29,12 @@
         "Development Status :: 4 - Beta",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: Apache Software License",
-        "Programming Language :: Python :: 2",
-        "Programming Language :: Python :: 2.7",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.4",
-        "Programming Language :: Python :: 3.5",
         "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
         "Topic :: System :: Filesystems",
         "Topic :: Software Development :: Libraries",
     ],
@@ -53,7 +52,7 @@
     setup_requires=[
         "cffi",
     ],
-    python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,<4',
+    python_requires='>=3.6,<4',
     zip_safe=False,
     test_suite="libzfs_core.test",
 )

diff --git a/zfs/copy-builtin b/zfs/copy-builtin
index 84f469f..18cc741 100755
--- a/zfs/copy-builtin
+++ b/zfs/copy-builtin

@@ -1,6 +1,6 @@
-#!/bin/bash
+#!/bin/sh
 
-set -e
+set -ef
 
 usage()
 {
@@ -9,107 +9,42 @@
 }
 
 [ "$#" -eq 1 ] || usage
-KERNEL_DIR="$(readlink --canonicalize-existing "$1")"
-
-MODULES=()
-MODULES+="spl"
-for MODULE_DIR in module/*
-do
-	[ -d "$MODULE_DIR" ] || continue
-	[ "spl" = "${MODULE_DIR##*/}" ] && continue
-	MODULES+=("${MODULE_DIR##*/}")
-done
+KERNEL_DIR="$1"
 
 if ! [ -e 'zfs_config.h' ]
 then
-	echo >&2
-	echo "    $0: you did not run configure, or you're not in the ZFS source directory." >&2
-	echo "    $0: run configure with --with-linux=$KERNEL_DIR and --enable-linux-builtin." >&2
-	echo >&2
-	exit 1
-fi
+	echo "$0: you did not run configure, or you're not in the ZFS source directory."
+	echo "$0: run configure with --with-linux=$KERNEL_DIR and --enable-linux-builtin."
 
-make clean || true
-scripts/make_gitrev.sh || true
+	exit 1
+fi >&2
+
+make clean ||:
+make gitrev
 
 rm -rf "$KERNEL_DIR/include/zfs" "$KERNEL_DIR/fs/zfs"
-cp --recursive include "$KERNEL_DIR/include/zfs"
-cp --recursive module "$KERNEL_DIR/fs/zfs"
+cp -R include "$KERNEL_DIR/include/zfs"
+cp -R module "$KERNEL_DIR/fs/zfs"
 cp zfs_config.h "$KERNEL_DIR/include/zfs/"
-rm "$KERNEL_DIR/include/zfs/.gitignore"
 
-for MODULE in "${MODULES[@]}"
-do
-	sed -i '/obj =/d' "$KERNEL_DIR/fs/zfs/$MODULE/Makefile"
-	sed -i '/src =/d' "$KERNEL_DIR/fs/zfs/$MODULE/Makefile"
-	sed -i "s|-I$PWD/module/|-I\$(srctree)/fs/zfs/|" "$KERNEL_DIR/fs/zfs/$MODULE/Makefile"
-done
-
-cat > "$KERNEL_DIR/fs/zfs/Kconfig" <<"EOF"
+cat > "$KERNEL_DIR/fs/zfs/Kconfig" <<EOF
 config ZFS
 	tristate "ZFS filesystem support"
 	depends on EFI_PARTITION
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
 	help
-	  This is the ZFS filesystem from the ZFS On Linux project.
+	  This is the ZFS filesystem from the OpenZFS project.
 
-	  See http://zfsonlinux.org/
+	  See https://github.com/openzfs/zfs
 
 	  To compile this file system support as a module, choose M here.
 
 	  If unsure, say N.
 EOF
 
-{
-	cat <<-"EOF"
-	ZFS_MODULE_CFLAGS  = -I$(srctree)/include/zfs
-	ZFS_MODULE_CFLAGS += -I$(srctree)/include/zfs/spl
-	ZFS_MODULE_CFLAGS += -include $(srctree)/include/zfs/zfs_config.h
-	ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
-	ZFS_MODULE_CPPFLAGS  = -D_KERNEL
-	ZFS_MODULE_CPPFLAGS += -UDEBUG -DNDEBUG
-	export ZFS_MODULE_CFLAGS ZFS_MODULE_CPPFLAGS
+sed -i '/source "fs\/ext2\/Kconfig\"/i\source "fs/zfs/Kconfig"' "$KERNEL_DIR/fs/Kconfig"
+echo 'obj-$(CONFIG_ZFS) += zfs/' >> "$KERNEL_DIR/fs/Makefile"
 
-	obj-$(CONFIG_ZFS) :=
-	EOF
-
-	for MODULE in "${MODULES[@]}"
-	do
-		echo 'obj-$(CONFIG_ZFS) += ' "$MODULE/"
-	done
-} > "$KERNEL_DIR/fs/zfs/Kbuild"
-
-add_after()
-{
-	local FILE="$1"
-	local MARKER="$2"
-	local NEW="$3"
-	local LINE
-
-	while IFS='' read -r LINE
-	do
-		echo "$LINE"
-
-		if [ -n "$MARKER" -a "$LINE" = "$MARKER" ]
-		then
-			echo "$NEW"
-			MARKER=''
-			if IFS='' read -r LINE
-			then
-				[ "$LINE" != "$NEW" ] && echo "$LINE"
-			fi
-		fi
-	done < "$FILE" > "$FILE.new"
-
-	mv "$FILE.new" "$FILE"
-}
-
-add_after "$KERNEL_DIR/fs/Kconfig" 'if BLOCK' 'source "fs/zfs/Kconfig"'
-add_after "$KERNEL_DIR/fs/Makefile" 'endif' 'obj-$(CONFIG_ZFS) += zfs/'
-
-echo >&2
-echo "    $0: done." >&2
-echo "    $0: now you can build the kernel with ZFS support." >&2
-echo "    $0: make sure you enable ZFS support (CONFIG_ZFS) before building." >&2
-echo >&2
+echo "$0: done. now you can build the kernel with ZFS support." >&2
+echo "$0: make sure you enable ZFS support (CONFIG_ZFS) before building." >&2

diff --git a/zfs/etc/Makefile.am b/zfs/etc/Makefile.am
index 67ef94a..aa9ff18 100644
--- a/zfs/etc/Makefile.am
+++ b/zfs/etc/Makefile.am

@@ -1,2 +1,9 @@
-SUBDIRS = default zfs sudoers.d $(ZFS_INIT_SYSTEMD) $(ZFS_INIT_SYSV) $(ZFS_MODULE_LOAD)
+include $(top_srcdir)/config/Shellcheck.am
+
+SUBDIRS = zfs sudoers.d
+SHELLCHECKDIRS = zfs
+if BUILD_LINUX
+SHELLCHECKDIRS += default $(ZFS_INIT_SYSV)
+SUBDIRS += default $(ZFS_INIT_SYSTEMD) $(ZFS_INIT_SYSV) $(ZFS_MODULE_LOAD)
+endif
 DIST_SUBDIRS = default init.d zfs systemd modules-load.d sudoers.d

diff --git a/zfs/etc/default/Makefile.am b/zfs/etc/default/Makefile.am
index f35abd8..f061692 100644
--- a/zfs/etc/default/Makefile.am
+++ b/zfs/etc/default/Makefile.am

@@ -1,12 +1,10 @@
-initconfdir = $(DEFAULT_INITCONF_DIR)
-initconf_SCRIPTS = zfs
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
 
-EXTRA_DIST = \
-	$(top_srcdir)/etc/default/zfs.in
+initconf_DATA = zfs
 
-$(initconf_SCRIPTS):%:%.in Makefile
-	  $(SED) \
-		 -e 's,@sysconfdir\@,$(sysconfdir),g' \
-		 $< >'$@'
+SUBSTFILES += $(initconf_DATA)
 
-CLEANFILES = $(initconf_SCRIPTS)
+SHELLCHECKSCRIPTS = $(initconf_DATA)
+SHELLCHECK_SHELL = sh
+SHELLCHECK_IGNORE = ,SC2034

diff --git a/zfs/etc/default/zfs.in b/zfs/etc/default/zfs.in
index 42fc116..77cc604 100644
--- a/zfs/etc/default/zfs.in
+++ b/zfs/etc/default/zfs.in

@@ -1,4 +1,4 @@
-# ZoL userland configuration.
+# OpenZFS userland configuration.
 
 # NOTE: This file is intended for sysv init and initramfs.
 # Changing some of these settings may not make any difference on
@@ -9,6 +9,12 @@
 # To enable a boolean setting, set it to yes, on, true, or 1.
 # Anything else will be interpreted as unset.
 
+# Run `zfs load-key` during system start?
+ZFS_LOAD_KEY='yes'
+
+# Run `zfs unload-key` during system stop?
+ZFS_UNLOAD_KEY='no'
+
 # Run `zfs mount -a` during system start?
 ZFS_MOUNT='yes'
 
@@ -52,13 +58,6 @@
 # This is a space separated list.
 #ZFS_POOL_EXCEPTIONS="test2"
 
-# List of pools that SHOULD be imported at boot by the initramfs
-# instead of trying to import all available pools.  If this is set
-# then ZFS_POOL_EXCEPTIONS is ignored.
-# Only applicable for Debian GNU/Linux {dkms,initramfs}.
-# This is a semi-colon separated list.
-#ZFS_POOL_IMPORT="pool1;pool2"
-
 # Should the datasets be mounted verbosely?
 # A mount counter will be used when mounting if set to 'yes'.
 VERBOSE_MOUNT='no'
@@ -105,34 +104,6 @@
 # Only applicable for Debian GNU/Linux {dkms,initramfs}.
 ZFS_DKMS_DISABLE_STRIP='no'
 
-# Wait for this many seconds in the initrd pre_mountroot?
-# This delays startup and should be '0' on most systems.
-# Only applicable for Debian GNU/Linux {dkms,initramfs}.
-ZFS_INITRD_PRE_MOUNTROOT_SLEEP='0'
-
-# Wait for this many seconds in the initrd mountroot?
-# This delays startup and should be '0' on most systems. This might help on
-# systems which have their ZFS root on a USB disk that takes just a little
-# longer to be available
-# Only applicable for Debian GNU/Linux {dkms,initramfs}.
-ZFS_INITRD_POST_MODPROBE_SLEEP='0'
-
-# List of additional datasets to mount after the root dataset is mounted?
-#
-# The init script will use the mountpoint specified in the 'mountpoint'
-# property value in the dataset to determine where it should be mounted.
-#
-# This is a space separated list, and will be mounted in the order specified,
-# so if one filesystem depends on a previous mountpoint, make sure to put
-# them in the right order.
-#
-# It is not necessary to add filesystems below the root fs here. It is
-# taken care of by the initrd script automatically. These are only for
-# additional filesystems needed. Such as /opt, /usr/local which is not
-# located under the root fs.
-# Example: If root FS is 'rpool/ROOT/rootfs', this would make sense.
-#ZFS_INITRD_ADDITIONAL_DATASETS="rpool/ROOT/usr rpool/ROOT/var"
-
 # Optional arguments for the ZFS Event Daemon (ZED).
 # See zed(8) for more information on available options.
 #ZED_ARGS="-M"

diff --git a/zfs/etc/init.d/.gitignore b/zfs/etc/init.d/.gitignore
index 43a673d..b3402f8 100644
--- a/zfs/etc/init.d/.gitignore
+++ b/zfs/etc/init.d/.gitignore

@@ -1,4 +1,5 @@
 zfs-import
+zfs-load-key
 zfs-mount
 zfs-share
 zfs-zed

diff --git a/zfs/etc/init.d/Makefile.am b/zfs/etc/init.d/Makefile.am
index 19fa76a..658623f 100644
--- a/zfs/etc/init.d/Makefile.am
+++ b/zfs/etc/init.d/Makefile.am

@@ -1,37 +1,10 @@
-initdir = $(DEFAULT_INIT_DIR)
-init_SCRIPTS = zfs-import zfs-mount zfs-share zfs-zed
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
 
-initconfdir = $(DEFAULT_INITCONF_DIR)
+EXTRA_DIST += README.md
 
-EXTRA_DIST = \
-	$(top_srcdir)/etc/init.d/zfs-share.in \
-	$(top_srcdir)/etc/init.d/zfs-import.in \
-	$(top_srcdir)/etc/init.d/zfs-mount.in \
-	$(top_srcdir)/etc/init.d/zfs-zed.in
+init_SCRIPTS = zfs-import zfs-load-key zfs-mount zfs-share zfs-zed
 
-$(init_SCRIPTS):%:%.in Makefile
-	-(if [ -e /etc/debian_version ]; then \
-		NFS_SRV=nfs-kernel-server; \
-	  else \
-		NFS_SRV=nfs; \
-	  fi; \
-	  if [ -e /sbin/openrc-run ]; then \
-		SHELL=/sbin/openrc-run; \
-	  else \
-		SHELL=/bin/sh; \
-	  fi; \
-	  $(SED) \
-		 -e 's,@bindir\@,$(bindir),g' \
-		 -e 's,@sbindir\@,$(sbindir),g' \
-		 -e 's,@udevdir\@,$(udevdir),g' \
-		 -e 's,@udevruledir\@,$(udevruledir),g' \
-		 -e 's,@sysconfdir\@,$(sysconfdir),g' \
-		 -e 's,@initconfdir\@,$(initconfdir),g' \
-		 -e 's,@initdir\@,$(initdir),g' \
-		 -e 's,@runstatedir\@,$(runstatedir),g' \
-		 -e "s,@SHELL\@,$$SHELL,g" \
-		 -e "s,@NFS_SRV\@,$$NFS_SRV,g" \
-		 $< >'$@'; \
-		chmod +x '$@')
+SUBSTFILES += $(init_SCRIPTS)
 
-CLEANFILES = $(init_SCRIPTS)
+SHELLCHECK_SHELL = dash # local variables

diff --git a/zfs/etc/init.d/README.md b/zfs/etc/init.d/README.md
index ad7c053..f417b24 100644
--- a/zfs/etc/init.d/README.md
+++ b/zfs/etc/init.d/README.md

@@ -16,7 +16,7 @@
 
 SUPPORT
   If you find that they don't work for your platform, please report this
-  at the ZFS On Linux issue tracker at https://github.com/zfsonlinux/zfs/issues.
+  at the OpenZFS issue tracker at https://github.com/openzfs/zfs/issues.
 
   Please include:
 
@@ -42,14 +42,16 @@
   To setup the init script links in /etc/rc?.d manually on a Debian GNU/Linux
   (or derived) system, run the following commands (the order is important!):
 
-    update-rc.d zfs-import start 07 S .       stop 07 0 1 6 .
-    update-rc.d zfs-mount  start 02 2 3 4 5 . stop 06 0 1 6 .
-    update-rc.d zfs-zed    start 07 2 3 4 5 . stop 08 0 1 6 .
-    update-rc.d zfs-share  start 27 2 3 4 5 . stop 05 0 1 6 .
+    update-rc.d zfs-import   start 07 S .       stop 07 0 1 6 .
+    update-rc.d zfs-load-key start 02 2 3 4 5 . stop 06 0 1 6 .
+    update-rc.d zfs-mount    start 02 2 3 4 5 . stop 06 0 1 6 .
+    update-rc.d zfs-zed      start 07 2 3 4 5 . stop 08 0 1 6 .
+    update-rc.d zfs-share    start 27 2 3 4 5 . stop 05 0 1 6 .
 
   To do the same on RedHat, Fedora and/or CentOS:
 
     chkconfig zfs-import
+    chkconfig zfs-load-key
     chkconfig zfs-mount
     chkconfig zfs-zed
     chkconfig zfs-share
@@ -57,6 +59,7 @@
   On Gentoo:
 
     rc-update add zfs-import boot
+    rc-update add zfs-load-key boot
     rc-update add zfs-mount boot
     rc-update add zfs-zed default
     rc-update add zfs-share default

diff --git a/zfs/etc/init.d/zfs-import.in b/zfs/etc/init.d/zfs-import.in
old mode 100644
new mode 100755
index 47c957b..130174f
--- a/zfs/etc/init.d/zfs-import.in
+++ b/zfs/etc/init.d/zfs-import.in

@@ -1,4 +1,4 @@
-#!@SHELL@
+#!@DEFAULT_INIT_SHELL@
 #
 # zfs-import    This script will import ZFS pools
 #
@@ -26,10 +26,8 @@
 #
 # Released under the 2-clause BSD license.
 #
-# The original script that acted as a template for this script came from
-# the Debian GNU/Linux kFreeBSD ZFS packages (which did not include a
-# licensing stansa) in the commit dated Mar 24, 2011:
-#   https://github.com/zfsonlinux/pkg-zfs/commit/80a3ae582b59c0250d7912ba794dca9e669e605a
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
 
 # Source the common init script
 . @sysconfdir@/zfs/zfs-functions
@@ -56,16 +54,12 @@
 # Support function to get a list of all pools, separated with ';'
 find_pools()
 {
-	local CMD="$*"
 	local pools
 
-	pools=$($CMD 2> /dev/null | \
-		grep -E "pool:|^[a-zA-Z0-9]" | \
-		sed 's@.*: @@' | \
+	pools=$("$@" 2> /dev/null | \
+		sed -Ee '/pool:|^[a-zA-Z0-9]/!d' -e 's@.*: @@' | \
 		sort | \
-		while read pool; do \
-		    echo -n "$pool;"
-		done)
+		tr '\n' ';')
 
 	echo "${pools%%;}" # Return without the last ';'.
 }
@@ -77,10 +71,11 @@
 	local exception dir ZPOOL_IMPORT_PATH RET=0 r=1
 
 	# In case not shutdown cleanly.
+	# shellcheck disable=SC2154
 	[ -n "$init" ] && rm -f /etc/dfs/sharetab
 
 	# Just simplify code later on.
-	if [ -n "$USE_DISK_BY_ID" -a "$USE_DISK_BY_ID" != 'yes' ]
+	if [ -n "$USE_DISK_BY_ID" ] && [ "$USE_DISK_BY_ID" != 'yes' ]
 	then
 		# It's something, but not 'yes' so it's no good to us.
 		unset USE_DISK_BY_ID
@@ -153,7 +148,7 @@
 	# to something we can use later with the real import(s). We want to
 	# make sure we find all by* dirs, BUT by-vdev should be first (if it
 	# exists).
-	if [ -n "$USE_DISK_BY_ID" -a -z "$ZPOOL_IMPORT_PATH" ]
+	if [ -n "$USE_DISK_BY_ID" ] && [ -z "$ZPOOL_IMPORT_PATH" ]
 	then
 		local dirs
 		dirs="$(for dir in $(echo /dev/disk/by-*)
@@ -162,7 +157,7 @@
 			echo "$dir" | grep -q /by-vdev && continue
 			[ ! -d "$dir" ] && continue
 
-			echo -n "$dir:"
+			printf "%s" "$dir:"
 		done | sed 's,:$,,g')"
 
 		if [ -d "/dev/disk/by-vdev" ]
@@ -219,6 +214,7 @@
 		# Import by using ZPOOL_IMPORT_PATH (either set above or in
 		# the config file) _or_ with the 'built in' default search
 		# paths. This is the preferred way.
+		# shellcheck disable=SC2086
 		"$ZPOOL" import -N ${ZPOOL_IMPORT_OPTS} "$pool" 2> /dev/null
 		r="$?" ; RET=$((RET + r))
 		if [ "$r" -eq 0 ]
@@ -231,7 +227,7 @@
 		# using the cache file soon and that might succeed.
 		[ ! -f "$ZPOOL_CACHE" ] && zfs_log_end_msg "$RET"
 
-		if [ "$r" -gt 0 -a -f "$ZPOOL_CACHE" ]
+		if [ "$r" -gt 0 ] && [ -f "$ZPOOL_CACHE" ]
 		then
 			# Failed to import without a cache file. Try WITH...
 			if [ -z "$init" ] && check_boolean "$VERBOSE_MOUNT"
@@ -240,6 +236,7 @@
 				zfs_log_progress_msg " using cache file"
 			fi
 
+			# shellcheck disable=SC2086
 			"$ZPOOL" import -c "$ZPOOL_CACHE" -N ${ZPOOL_IMPORT_OPTS} \
 				"$pool" 2> /dev/null
 			r="$?" ; RET=$((RET + r))
@@ -254,7 +251,7 @@
 	[ -n "$init" ] && zfs_log_end_msg "$RET"
 
 	IFS="$OLD_IFS"
-	[ -n "$already_imported" -a -z "$available_pools" ] && return 0
+	[ -n "$already_imported" ] && [ -z "$available_pools" ] && return 0
 
 	return "$RET"
 }

diff --git a/zfs/etc/init.d/zfs-load-key.in b/zfs/etc/init.d/zfs-load-key.in
new file mode 100755
index 0000000..2f8deff
--- /dev/null
+++ b/zfs/etc/init.d/zfs-load-key.in

@@ -0,0 +1,131 @@
+#!@DEFAULT_INIT_SHELL@
+#
+# zfs-load-key  This script will load/unload the zfs filesystems keys.
+#
+# chkconfig:    2345 06 99
+# description:  This script will load or unload the zfs filesystems keys during
+#               system boot/shutdown. Only filesystems with key path set
+#               in keylocation property. See the zfs(8) man page for details.
+# probe: true
+#
+### BEGIN INIT INFO
+# Provides:          zfs-load-key
+# Required-Start:    $local_fs zfs-import
+# Required-Stop:     $local_fs zfs-import
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# X-Start-Before:    zfs-mount
+# X-Stop-After:      zfs-zed
+# Short-Description: Load ZFS keys for filesystems and volumes
+# Description: Run the `zfs load-key` or `zfs unload-key` commands.
+### END INIT INFO
+#
+# Released under the 2-clause BSD license.
+#
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
+
+# Source the common init script
+. @sysconfdir@/zfs/zfs-functions
+
+# ----------------------------------------------------
+
+do_depend()
+{
+	# bootmisc will log to /var which may be a different zfs than root.
+	before bootmisc logger zfs-mount
+
+	after zfs-import sysfs
+	keyword -lxc -openvz -prefix -vserver
+}
+
+# Load keys for all datasets/filesystems
+do_load_keys()
+{
+	zfs_log_begin_msg "Load ZFS filesystem(s) keys"
+
+	"$ZFS" list -Ho name,encryptionroot,keystatus,keylocation |
+	    while IFS="	" read -r name encryptionroot keystatus keylocation; do
+		if [ "$encryptionroot" != "-" ] &&
+			[ "$name" = "$encryptionroot" ] &&
+			[ "$keystatus" = "unavailable" ] &&
+			[ "$keylocation" != "prompt" ] &&
+			[ "$keylocation" != "none" ]
+		then
+			zfs_action "Load key for $encryptionroot" \
+			    "$ZFS" load-key "$encryptionroot"
+		fi
+	done
+
+	zfs_log_end_msg 0
+
+	return 0
+}
+
+# Unload keys for all datasets/filesystems
+do_unload_keys()
+{
+	zfs_log_begin_msg "Unload ZFS filesystem(s) key"
+
+	"$ZFS" list -Ho name,encryptionroot,keystatus | sed '1!G;h;$!d' |
+	    while IFS="	" read -r name encryptionroot keystatus; do
+		if [ "$encryptionroot" != "-" ] &&
+			[ "$name" = "$encryptionroot" ] &&
+			[ "$keystatus" = "available" ]
+		then
+			zfs_action "Unload key for $encryptionroot" \
+			    "$ZFS" unload-key "$encryptionroot"
+		fi
+	done
+
+	zfs_log_end_msg 0
+
+	return 0
+}
+
+do_start()
+{
+	check_boolean "$ZFS_LOAD_KEY" || exit 0
+
+	check_module_loaded "zfs" || exit 0
+
+	do_load_keys
+}
+
+do_stop()
+{
+	check_boolean "$ZFS_UNLOAD_KEY" || exit 0
+
+	check_module_loaded "zfs" || exit 0
+
+	do_unload_keys
+}
+
+# ----------------------------------------------------
+
+if [ ! -e /sbin/openrc-run ]
+then
+	case "$1" in
+		start)
+			do_start
+			;;
+		stop)
+			do_stop
+			;;
+		force-reload|condrestart|reload|restart|status)
+			# no-op
+			;;
+		*)
+			[ -n "$1" ] && echo "Error: Unknown command $1."
+			echo "Usage: $0 {start|stop}"
+			exit 3
+			;;
+	esac
+
+	exit $?
+else
+	# Create wrapper functions since Gentoo don't use the case part.
+	depend() { do_depend; }
+	start() { do_start; }
+	stop() { do_stop; }
+fi

diff --git a/zfs/etc/init.d/zfs-mount.in b/zfs/etc/init.d/zfs-mount.in
old mode 100644
new mode 100755
index fa954e0..000619b
--- a/zfs/etc/init.d/zfs-mount.in
+++ b/zfs/etc/init.d/zfs-mount.in

@@ -1,4 +1,4 @@
-#!@SHELL@
+#!@DEFAULT_INIT_SHELL@
 #
 # zfs-mount     This script will mount/umount the zfs filesystems.
 #
@@ -23,10 +23,8 @@
 #
 # Released under the 2-clause BSD license.
 #
-# The original script that acted as a template for this script came from
-# the Debian GNU/Linux kFreeBSD ZFS packages (which did not include a
-# licensing stansa) in the commit dated Mar 24, 2011:
-#   https://github.com/zfsonlinux/pkg-zfs/commit/80a3ae582b59c0250d7912ba794dca9e669e605a
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
 
 # Source the common init script
 . @sysconfdir@/zfs/zfs-functions
@@ -34,9 +32,8 @@
 # ----------------------------------------------------
 
 chkroot() {
-	while read line; do
-		set -- $line
-		if [ "$2" = "/" ]; then
+	while read -r _ mp _; do
+		if [ "$mp" = "/" ]; then
 			return 0
 		fi
 	done < /proc/self/mounts
@@ -65,7 +62,7 @@
 # Mount all datasets/filesystems
 do_mount()
 {
-	local verbose overlay i mntpt val
+	local verbose overlay i mntpt
 
 	check_boolean "$VERBOSE_MOUNT" && verbose=v
 	check_boolean "$DO_OVERLAY_MOUNTS" && overlay=O
@@ -83,11 +80,11 @@
 
 	read_mtab  "^/dev/(zd|zvol)"
 	read_fstab "^/dev/(zd|zvol)"
-	i=0; var=$(eval echo FSTAB_$i)
-	while [ -n "$(eval echo "$""$var")" ]
+	i=0; var="FSTAB_0"
+	while [ -n "$(eval echo "\$$var")" ]
 	do
-		mntpt=$(eval echo "$""$var")
-		dev=$(eval echo "$"FSTAB_dev_$i)
+		mntpt=$(eval echo "\$$var")
+		dev=$(eval echo "\$FSTAB_dev_$i")
 		if ! in_mtab "$mntpt" && ! is_mounted "$mntpt" && [ -e "$dev" ]
 		then
 			check_boolean "$VERBOSE_MOUNT" && \
@@ -96,15 +93,15 @@
 		fi
 
 		i=$((i + 1))
-		var=$(eval echo FSTAB_$i)
+		var=$(eval echo "FSTAB_$i")
 	done
 
 	read_mtab  "[[:space:]]zfs[[:space:]]"
 	read_fstab "[[:space:]]zfs[[:space:]]"
-	i=0; var=$(eval echo FSTAB_$i)
-	while [ -n "$(eval echo "$""$var")" ]
+	i=0; var=$(eval echo "FSTAB_$i")
+	while [ -n "$(eval echo "\$$var")" ]
 	do
-		mntpt=$(eval echo "$""$var")
+		mntpt=$(eval echo "\$$var")
 		if ! in_mtab "$mntpt" && ! is_mounted "$mntpt"
 		then
 			check_boolean "$VERBOSE_MOUNT" && \
@@ -113,7 +110,7 @@
 		fi
 
 		i=$((i + 1))
-		var=$(eval echo FSTAB_$i)
+		var=$(eval echo "FSTAB_$i")
 	done
 	check_boolean "$VERBOSE_MOUNT" && zfs_log_end_msg 0
 
@@ -136,11 +133,11 @@
 
 	read_mtab  "^/dev/(zd|zvol)"
 	read_fstab "^/dev/(zd|zvol)"
-	i=0; var=$(eval echo FSTAB_$i)
-	while [ -n "$(eval echo "$""$var")" ]
+	i=0; var="FSTAB_0"
+	while [ -n "$(eval echo "\$$var")" ]
 	do
-		mntpt=$(eval echo "$""$var")
-		dev=$(eval echo "$"FSTAB_dev_$i)
+		mntpt=$(eval echo "\$$var")
+		dev=$(eval echo "\$FSTAB_dev_$i")
 		if in_mtab "$mntpt"
 		then
 			check_boolean "$VERBOSE_MOUNT" && \
@@ -149,15 +146,15 @@
 		fi
 
 		i=$((i + 1))
-		var=$(eval echo FSTAB_$i)
+		var=$(eval echo "FSTAB_$i")
 	done
 
 	read_mtab  "[[:space:]]zfs[[:space:]]"
 	read_fstab "[[:space:]]zfs[[:space:]]"
-	i=0; var=$(eval echo FSTAB_$i)
-	while [ -n "$(eval echo "$""$var")" ]
+	i=0; var="FSTAB_0"
+	while [ -n "$(eval echo "\$$var")" ]
 	do
-		mntpt=$(eval echo "$""$var")
+		mntpt=$(eval echo "\$$var")
 		if in_mtab "$mntpt"; then
 			check_boolean "$VERBOSE_MOUNT" && \
 			    zfs_log_progress_msg "$mntpt "
@@ -165,7 +162,7 @@
 		fi
 
 		i=$((i + 1))
-		var=$(eval echo FSTAB_$i)
+		var=$(eval echo "FSTAB_$i")
 	done
 	check_boolean "$VERBOSE_MOUNT" && zfs_log_end_msg 0
 

diff --git a/zfs/etc/init.d/zfs-share.in b/zfs/etc/init.d/zfs-share.in
old mode 100644
new mode 100755
index bdbadf6..ef628fe
--- a/zfs/etc/init.d/zfs-share.in
+++ b/zfs/etc/init.d/zfs-share.in

@@ -1,4 +1,4 @@
-#!@SHELL@
+#!@DEFAULT_INIT_SHELL@
 #
 # zfs-share     This script will network share zfs filesystems and volumes.
 #
@@ -13,8 +13,8 @@
 # Required-Stop:     $local_fs $network $remote_fs zfs-mount
 # Default-Start:     2 3 4 5
 # Default-Stop:      0 1 6
-# Should-Start:      iscsi iscsitarget istgt scst @NFS_SRV@ samba samba4 zfs-mount zfs-zed
-# Should-Stop:       iscsi iscsitarget istgt scst @NFS_SRV@ samba samba4 zfs-mount zfs-zed
+# Should-Start:      iscsi iscsitarget istgt scst @DEFAULT_INIT_NFS_SERVER@ samba samba4 zfs-mount zfs-zed
+# Should-Stop:       iscsi iscsitarget istgt scst @DEFAULT_INIT_NFS_SERVER@ samba samba4 zfs-mount zfs-zed
 # Short-Description: Network share ZFS datasets and volumes.
 # Description:       Run the `zfs share -a` or `zfs unshare -a` commands
 #                    for controlling iSCSI, NFS, or CIFS network shares.
@@ -22,10 +22,8 @@
 #
 # Released under the 2-clause BSD license.
 #
-# The original script that acted as a template for this script came from
-# the Debian GNU/Linux kFreeBSD ZFS packages (which did not include a
-# licensing stansa) in the commit dated Mar 24, 2011:
-#   https://github.com/zfsonlinux/pkg-zfs/commit/80a3ae582b59c0250d7912ba794dca9e669e605a
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
 
 # Source the common init script
 . @sysconfdir@/zfs/zfs-functions

diff --git a/zfs/etc/init.d/zfs-zed.in b/zfs/etc/init.d/zfs-zed.in
old mode 100644
new mode 100755
index fe3c225..e5256cb
--- a/zfs/etc/init.d/zfs-zed.in
+++ b/zfs/etc/init.d/zfs-zed.in

@@ -1,4 +1,4 @@
-#!@SHELL@
+#!@DEFAULT_INIT_SHELL@
 #
 # zfs-zed
 #
@@ -21,10 +21,8 @@
 #
 # Released under the 2-clause BSD license.
 #
-# The original script that acted as a template for this script came from
-# the Debian GNU/Linux kFreeBSD ZFS packages (which did not include a
-# licensing stansa) in the commit dated Mar 24, 2011:
-#   https://github.com/zfsonlinux/pkg-zfs/commit/80a3ae582b59c0250d7912ba794dca9e669e605a
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
 
 # Source the common init script
 . @sysconfdir@/zfs/zfs-functions
@@ -32,6 +30,7 @@
 ZED_NAME="zed"
 ZED_PIDFILE="@runstatedir@/$ZED_NAME.pid"
 
+# shellcheck disable=SC2034
 extra_started_commands="reload"
 
 # Exit if the package is not installed
@@ -57,24 +56,20 @@
 
 do_stop()
 {
-	local pools RET
+	local pools
 	check_module_loaded "zfs" || exit 0
 
 	zfs_action "Stopping ZFS Event Daemon" zfs_daemon_stop \
-	   "$ZED_PIDFILE" "$ZED" "$ZED_NAME"
-	if [ "$?" -eq "0" ]
+	   "$ZED_PIDFILE" "$ZED" "$ZED_NAME" || return "$?"
+
+	# Let's see if we have any pools imported
+	pools=$("$ZPOOL" list -H -oname)
+	if [ -z "$pools" ]
 	then
-		# Let's see if we have any pools imported
-		pools=$("$ZPOOL" list -H -oname)
-		if [ -z "$pools" ]
-		then
-			# No pools imported, it is/should be safe/possible to
-			# unload modules.
-			zfs_action "Unloading modules" rmmod zfs zunicode \
-			    zavl zcommon znvpair zlua spl
-			return "$?"
-		fi
-	else
+		# No pools imported, it is/should be safe/possible to
+		# unload modules.
+		zfs_action "Unloading modules" rmmod zfs zunicode \
+		    zavl zcommon znvpair zlua spl
 		return "$?"
 	fi
 }

diff --git a/zfs/etc/modules-load.d/Makefile.am b/zfs/etc/modules-load.d/Makefile.am
index 47762b7..8a29557 100644
--- a/zfs/etc/modules-load.d/Makefile.am
+++ b/zfs/etc/modules-load.d/Makefile.am

@@ -1,14 +1,2 @@
-modulesload_DATA = \
+dist_modulesload_DATA = \
 	zfs.conf
-
-EXTRA_DIST = \
-	$(top_srcdir)/etc/modules-load.d/zfs.conf.in
-
-$(modulesload_DATA):%:%.in
-	-$(SED) \
-		-e '' \
-		$< >'$@'
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(modulesload_DATA)

diff --git a/zfs/etc/modules-load.d/zfs.conf.in b/zfs/etc/modules-load.d/zfs.conf
similarity index 100%
rename from zfs/etc/modules-load.d/zfs.conf.in
rename to zfs/etc/modules-load.d/zfs.conf


diff --git a/zfs/etc/sudoers.d/Makefile.am b/zfs/etc/sudoers.d/Makefile.am
index ca9186a..6f7ac8d 100644
--- a/zfs/etc/sudoers.d/Makefile.am
+++ b/zfs/etc/sudoers.d/Makefile.am

@@ -2,4 +2,4 @@
 sudoersd_DATA = zfs
 
 EXTRA_DIST = \
-	$(top_srcdir)/etc/sudoers.d/zfs
+	zfs

diff --git a/zfs/etc/sudoers.d/zfs b/zfs/etc/sudoers.d/zfs
index f66ebad..82a25ba 100644
--- a/zfs/etc/sudoers.d/zfs
+++ b/zfs/etc/sudoers.d/zfs

@@ -3,6 +3,7 @@
 ## to read basic SMART health statistics for a pool.
 ##
 ## CAUTION: Any syntax error introduced here will break sudo.
+## Editing with 'visudo' is recommended: visudo -f  /etc/sudoers.d/zfs 
 ##
 
 # ALL ALL = (root) NOPASSWD: /usr/sbin/smartctl -a /dev/[hsv]d[a-z0-9]*

diff --git a/zfs/etc/systemd/Makefile.am b/zfs/etc/systemd/Makefile.am
index 7b47b93..66232a5 100644
--- a/zfs/etc/systemd/Makefile.am
+++ b/zfs/etc/systemd/Makefile.am

@@ -1 +1,4 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 SUBDIRS = system system-generators
+SHELLCHECKDIRS = system-generators

diff --git a/zfs/etc/systemd/system-generators/Makefile.am b/zfs/etc/systemd/system-generators/Makefile.am
index b4df013..e5920bf 100644
--- a/zfs/etc/systemd/system-generators/Makefile.am
+++ b/zfs/etc/systemd/system-generators/Makefile.am

@@ -1,16 +1,14 @@
-systemdgenerator_SCRIPTS = \
+include $(top_srcdir)/config/Rules.am
+
+systemdgenerator_PROGRAMS = \
 	zfs-mount-generator
 
-EXTRA_DIST = \
-	$(top_srcdir)/etc/systemd/system-generators/zfs-mount-generator.in
+zfs_mount_generator_SOURCES = \
+	zfs-mount-generator.c
 
-$(systemdgenerator_SCRIPTS): %: %.in
-	-$(SED) -e 's,@bindir\@,$(bindir),g' \
-		-e 's,@runstatedir\@,$(runstatedir),g' \
-		-e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< >'$@'
+zfs_mount_generator_LDADD = \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la
 
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(systemdgenerator_SCRIPTS)
+zfs_mount_generator_LDFLAGS = -pthread
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/etc/systemd/system-generators/zfs-mount-generator.c b/zfs/etc/systemd/system-generators/zfs-mount-generator.c
new file mode 100644
index 0000000..f4c6c26
--- /dev/null
+++ b/zfs/etc/systemd/system-generators/zfs-mount-generator.c

@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2017 Antonio Russo <antonio.e.russo@gmail.com>
+ * Copyright (c) 2020 InsanePrawn <insane.prawny@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <time.h>
+#include <regex.h>
+#include <search.h>
+#include <dirent.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <libzfs.h>
+
+/*
+ * For debugging only.
+ *
+ * Free statics with trivial life-times,
+ * but saved line filenames are replaced with a static string.
+ */
+#define	FREE_STATICS false
+
+#define	nitems(arr) (sizeof (arr) / sizeof (*arr))
+#define	STRCMP ((int(*)(const void *, const void *))&strcmp)
+
+
+#define	PROGNAME "zfs-mount-generator"
+#define	FSLIST SYSCONFDIR "/zfs/zfs-list.cache"
+#define	ZFS SBINDIR "/zfs"
+
+#define	OUTPUT_HEADER \
+	"# Automatically generated by " PROGNAME "\n" \
+	"\n"
+
+/*
+ * Starts like the one in libzfs_util.c but also matches "//"
+ * and captures until the end, since we actually use it for path extraxion
+ */
+#define	URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$"
+static regex_t uri_regex;
+
+static const char *destdir = "/tmp";
+static int destdir_fd = -1;
+
+static void *known_pools = NULL; /* tsearch() of C strings */
+static void *noauto_files = NULL; /* tsearch() of C strings */
+
+
+static char *
+systemd_escape(const char *input, const char *prepend, const char *append)
+{
+	size_t len = strlen(input);
+	size_t applen = strlen(append);
+	size_t prelen = strlen(prepend);
+	char *ret = malloc(4 * len + prelen + applen + 1);
+	if (!ret) {
+		fprintf(stderr, PROGNAME "[%d]: "
+		    "out of memory to escape \"%s%s%s\"!\n",
+		    getpid(), prepend, input, append);
+		return (NULL);
+	}
+
+	memcpy(ret, prepend, prelen);
+	char *out = ret + prelen;
+
+	const char *cur = input;
+	if (*cur == '.') {
+		memcpy(out, "\\x2e", 4);
+		out += 4;
+		++cur;
+	}
+	for (; *cur; ++cur) {
+		if (*cur == '/')
+			*(out++) = '-';
+		else if (strchr(
+		    "0123456789"
+		    "abcdefghijklmnopqrstuvwxyz"
+		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    ":_.", *cur))
+			*(out++) = *cur;
+		else {
+			sprintf(out, "\\x%02x", (int)*cur);
+			out += 4;
+		}
+	}
+
+	memcpy(out, append, applen + 1);
+	return (ret);
+}
+
+static void
+simplify_path(char *path)
+{
+	char *out = path;
+	for (char *cur = path; *cur; ++cur) {
+		if (*cur == '/') {
+			while (*(cur + 1) == '/')
+				++cur;
+			*(out++) = '/';
+		} else
+			*(out++) = *cur;
+	}
+
+	*(out++) = '\0';
+}
+
+static bool
+strendswith(const char *what, const char *suff)
+{
+	size_t what_l = strlen(what);
+	size_t suff_l = strlen(suff);
+
+	return ((what_l >= suff_l) &&
+	    (strcmp(what + what_l - suff_l, suff) == 0));
+}
+
+/* Assumes already-simplified path, doesn't modify input */
+static char *
+systemd_escape_path(char *input, const char *prepend, const char *append)
+{
+	if (strcmp(input, "/") == 0) {
+		char *ret;
+		if (asprintf(&ret, "%s-%s", prepend, append) == -1) {
+			fprintf(stderr, PROGNAME "[%d]: "
+			    "out of memory to escape \"%s%s%s\"!\n",
+			    getpid(), prepend, input, append);
+			ret = NULL;
+		}
+		return (ret);
+	} else {
+		/*
+		 * path_is_normalized() (flattened for absolute paths here),
+		 * required for proper escaping
+		 */
+		if (strstr(input, "/./") || strstr(input, "/../") ||
+		    strendswith(input, "/.") || strendswith(input, "/.."))
+			return (NULL);
+
+
+		if (input[0] == '/')
+			++input;
+
+		char *back = &input[strlen(input) - 1];
+		bool deslash = *back == '/';
+		if (deslash)
+			*back = '\0';
+
+		char *ret = systemd_escape(input, prepend, append);
+
+		if (deslash)
+			*back = '/';
+		return (ret);
+	}
+}
+
+static FILE *
+fopenat(int dirfd, const char *pathname, int flags,
+    const char *stream_mode, mode_t mode)
+{
+	int fd = openat(dirfd, pathname, flags, mode);
+	if (fd < 0)
+		return (NULL);
+
+	return (fdopen(fd, stream_mode));
+}
+
+static int
+line_worker(char *line, const char *cachefile)
+{
+	int ret = 0;
+	void *tofree_all[8];
+	void **tofree = tofree_all;
+
+	char *toktmp;
+	/* BEGIN CSTYLED */
+	const char *dataset                     = strtok_r(line, "\t", &toktmp);
+	      char *p_mountpoint                = strtok_r(NULL, "\t", &toktmp);
+	const char *p_canmount                  = strtok_r(NULL, "\t", &toktmp);
+	const char *p_atime                     = strtok_r(NULL, "\t", &toktmp);
+	const char *p_relatime                  = strtok_r(NULL, "\t", &toktmp);
+	const char *p_devices                   = strtok_r(NULL, "\t", &toktmp);
+	const char *p_exec                      = strtok_r(NULL, "\t", &toktmp);
+	const char *p_readonly                  = strtok_r(NULL, "\t", &toktmp);
+	const char *p_setuid                    = strtok_r(NULL, "\t", &toktmp);
+	const char *p_nbmand                    = strtok_r(NULL, "\t", &toktmp);
+	const char *p_encroot                   = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	      char *p_keyloc                    = strtok_r(NULL, "\t", &toktmp) ?: strdupa("none");
+	const char *p_systemd_requires          = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	const char *p_systemd_requiresmountsfor = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	const char *p_systemd_before            = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	const char *p_systemd_after             = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	      char *p_systemd_wantedby          = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
+	      char *p_systemd_requiredby        = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
+	const char *p_systemd_nofail            = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	const char *p_systemd_ignore            = strtok_r(NULL, "\t", &toktmp) ?: "-";
+	/* END CSTYLED */
+
+	const char *pool = dataset;
+	if ((toktmp = strchr(pool, '/')) != NULL)
+		pool = strndupa(pool, toktmp - pool);
+
+	if (p_nbmand == NULL) {
+		fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n",
+		    getpid(), dataset);
+		goto err;
+	}
+
+	/* Minimal pre-requisites to mount a ZFS dataset */
+	const char *after = "zfs-import.target";
+	const char *wants = "zfs-import.target";
+	const char *bindsto = NULL;
+	char *wantedby = NULL;
+	char *requiredby = NULL;
+	bool noauto = false;
+	bool wantedby_append = true;
+
+	/*
+	 * zfs-import.target is not needed if the pool is already imported.
+	 * This avoids a dependency loop on root-on-ZFS systems:
+	 *   systemd-random-seed.service After (via RequiresMountsFor)
+	 *   var-lib.mount After
+	 *   zfs-import.target After
+	 *   zfs-import-{cache,scan}.service After
+	 *   cryptsetup.service After
+	 *   systemd-random-seed.service
+	 */
+	if (tfind(pool, &known_pools, STRCMP)) {
+		after = "";
+		wants = "";
+	}
+
+	if (strcmp(p_systemd_after, "-") == 0)
+		p_systemd_after = NULL;
+	if (strcmp(p_systemd_before, "-") == 0)
+		p_systemd_before = NULL;
+	if (strcmp(p_systemd_requires, "-") == 0)
+		p_systemd_requires = NULL;
+	if (strcmp(p_systemd_requiresmountsfor, "-") == 0)
+		p_systemd_requiresmountsfor = NULL;
+
+
+	if (strcmp(p_encroot, "-") != 0) {
+		char *keyloadunit = *(tofree++) =
+		    systemd_escape(p_encroot, "zfs-load-key@", ".service");
+		if (keyloadunit == NULL)
+			goto err;
+
+		if (strcmp(dataset, p_encroot) == 0) {
+			const char *keymountdep = NULL;
+			bool is_prompt = false;
+			bool need_network = false;
+
+			regmatch_t uri_matches[3];
+			if (regexec(&uri_regex, p_keyloc,
+			    nitems(uri_matches), uri_matches, 0) == 0) {
+				p_keyloc[uri_matches[1].rm_eo] = '\0';
+				p_keyloc[uri_matches[2].rm_eo] = '\0';
+				const char *scheme =
+				    &p_keyloc[uri_matches[1].rm_so];
+				const char *path =
+				    &p_keyloc[uri_matches[2].rm_so];
+
+				if (strcmp(scheme, "https") == 0 ||
+				    strcmp(scheme, "http") == 0)
+					need_network = true;
+				else
+					keymountdep = path;
+			} else {
+				if (strcmp(p_keyloc, "prompt") != 0)
+					fprintf(stderr, PROGNAME "[%d]: %s: "
+					    "unknown non-URI keylocation=%s\n",
+					    getpid(), dataset, p_keyloc);
+
+				is_prompt = true;
+			}
+
+
+			/* Generate the key-load .service unit */
+			FILE *keyloadunit_f = fopenat(destdir_fd, keyloadunit,
+			    O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w",
+			    0644);
+			if (!keyloadunit_f) {
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "couldn't open %s under %s: %s\n",
+				    getpid(), dataset, keyloadunit, destdir,
+				    strerror(errno));
+				goto err;
+			}
+
+			fprintf(keyloadunit_f,
+			    OUTPUT_HEADER
+			    "[Unit]\n"
+			    "Description=Load ZFS key for %s\n"
+			    "SourcePath=" FSLIST "/%s\n"
+			    "Documentation=man:zfs-mount-generator(8)\n"
+			    "DefaultDependencies=no\n"
+			    "Wants=%s\n"
+			    "After=%s\n",
+			    dataset, cachefile, wants, after);
+
+			if (need_network)
+				fprintf(keyloadunit_f,
+				    "Wants=network-online.target\n"
+				    "After=network-online.target\n");
+
+			if (p_systemd_requires)
+				fprintf(keyloadunit_f,
+				    "Requires=%s\n", p_systemd_requires);
+
+			if (p_systemd_requiresmountsfor)
+				fprintf(keyloadunit_f,
+				    "RequiresMountsFor=%s\n",
+				    p_systemd_requiresmountsfor);
+			if (keymountdep)
+				fprintf(keyloadunit_f,
+				    "RequiresMountsFor='%s'\n", keymountdep);
+
+			/* BEGIN CSTYLED */
+			fprintf(keyloadunit_f,
+			    "\n"
+			    "[Service]\n"
+			    "Type=oneshot\n"
+			    "RemainAfterExit=yes\n"
+			    "# This avoids a dependency loop involving systemd-journald.socket if this\n"
+			    "# dataset is a parent of the root filesystem.\n"
+			    "StandardOutput=null\n"
+			    "StandardError=null\n"
+			    "ExecStart=/bin/sh -euc '"
+			        "[ \"$$(" ZFS " get -H -o value keystatus \"%s\")\" = \"unavailable\" ] || exit 0;",
+			    dataset);
+			if (is_prompt)
+				fprintf(keyloadunit_f,
+				    "for i in 1 2 3; do "
+				        "systemd-ask-password --id=\"zfs:%s\" \"Enter passphrase for %s:\" |"
+				        "" ZFS " load-key \"%s\" && exit 0;"
+				    "done;"
+				    "exit 1",
+				    dataset, dataset, dataset);
+			else
+				fprintf(keyloadunit_f,
+				    "exec " ZFS " load-key \"%s\"",
+				    dataset);
+
+			fprintf(keyloadunit_f,
+				"'\n"
+				"ExecStop=/bin/sh -euc '"
+				    "[ \"$$(" ZFS " get -H -o value keystatus \"%s\")\" = \"available\" ] || exit 0;"
+				    "exec " ZFS " unload-key \"%s\""
+				"'\n",
+				dataset, dataset);
+			/* END CSTYLED */
+
+			(void) fclose(keyloadunit_f);
+		}
+
+		/* Update dependencies for the mount file to want this */
+		bindsto = keyloadunit;
+		if (after[0] == '\0')
+			after = keyloadunit;
+		else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1)
+			after = *(tofree++) = toktmp;
+		else {
+			fprintf(stderr, PROGNAME "[%d]: %s: "
+			    "out of memory to generate after=\"%s %s\"!\n",
+			    getpid(), dataset, after, keyloadunit);
+			goto err;
+		}
+	}
+
+
+	/* Skip generation of the mount unit if org.openzfs.systemd:ignore=on */
+	if (strcmp(p_systemd_ignore, "-") == 0 ||
+	    strcmp(p_systemd_ignore, "off") == 0) {
+		/* ok */
+	} else if (strcmp(p_systemd_ignore, "on") == 0)
+		goto end;
+	else {
+		fprintf(stderr, PROGNAME "[%d]: %s: "
+		    "invalid org.openzfs.systemd:ignore=%s\n",
+		    getpid(), dataset, p_systemd_ignore);
+		goto err;
+	}
+
+	/* Check for canmount */
+	if (strcmp(p_canmount, "on") == 0) {
+		/* ok */
+	} else if (strcmp(p_canmount, "noauto") == 0)
+		noauto = true;
+	else if (strcmp(p_canmount, "off") == 0)
+		goto end;
+	else {
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n",
+		    getpid(), dataset, p_canmount);
+		goto err;
+	}
+
+	/* Check for legacy and blank mountpoints */
+	if (strcmp(p_mountpoint, "legacy") == 0 ||
+	    strcmp(p_mountpoint, "none") == 0)
+		goto end;
+	else if (p_mountpoint[0] != '/') {
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n",
+		    getpid(), dataset, p_mountpoint);
+		goto err;
+	}
+
+	/* Escape the mountpoint per systemd policy */
+	simplify_path(p_mountpoint);
+	const char *mountfile = systemd_escape_path(p_mountpoint, "", ".mount");
+	if (mountfile == NULL) {
+		fprintf(stderr,
+		    PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n",
+		    getpid(), dataset, p_mountpoint);
+		goto err;
+	}
+
+
+	/*
+	 * Parse options, cf. lib/libzfs/libzfs_mount.c:zfs_add_options
+	 *
+	 * The longest string achievable here is
+	 * ",atime,strictatime,nodev,noexec,rw,nosuid,nomand".
+	 */
+	char opts[64] = "";
+
+	/* atime */
+	if (strcmp(p_atime, "on") == 0) {
+		/* relatime */
+		if (strcmp(p_relatime, "on") == 0)
+			strcat(opts, ",atime,relatime");
+		else if (strcmp(p_relatime, "off") == 0)
+			strcat(opts, ",atime,strictatime");
+		else
+			fprintf(stderr,
+			    PROGNAME "[%d]: %s: invalid relatime=%s\n",
+			    getpid(), dataset, p_relatime);
+	} else if (strcmp(p_atime, "off") == 0) {
+		strcat(opts, ",noatime");
+	} else
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid atime=%s\n",
+		    getpid(), dataset, p_atime);
+
+	/* devices */
+	if (strcmp(p_devices, "on") == 0)
+		strcat(opts, ",dev");
+	else if (strcmp(p_devices, "off") == 0)
+		strcat(opts, ",nodev");
+	else
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid devices=%s\n",
+		    getpid(), dataset, p_devices);
+
+	/* exec */
+	if (strcmp(p_exec, "on") == 0)
+		strcat(opts, ",exec");
+	else if (strcmp(p_exec, "off") == 0)
+		strcat(opts, ",noexec");
+	else
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid exec=%s\n",
+		    getpid(), dataset, p_exec);
+
+	/* readonly */
+	if (strcmp(p_readonly, "on") == 0)
+		strcat(opts, ",ro");
+	else if (strcmp(p_readonly, "off") == 0)
+		strcat(opts, ",rw");
+	else
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid readonly=%s\n",
+		    getpid(), dataset, p_readonly);
+
+	/* setuid */
+	if (strcmp(p_setuid, "on") == 0)
+		strcat(opts, ",suid");
+	else if (strcmp(p_setuid, "off") == 0)
+		strcat(opts, ",nosuid");
+	else
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid setuid=%s\n",
+		    getpid(), dataset, p_setuid);
+
+	/* nbmand */
+	if (strcmp(p_nbmand, "on") == 0)
+		strcat(opts, ",mand");
+	else if (strcmp(p_nbmand, "off") == 0)
+		strcat(opts, ",nomand");
+	else
+		fprintf(stderr, PROGNAME "[%d]: %s: invalid nbmand=%s\n",
+		    getpid(), dataset, p_setuid);
+
+	if (strcmp(p_systemd_wantedby, "-") != 0) {
+		noauto = true;
+
+		if (strcmp(p_systemd_wantedby, "none") != 0)
+			wantedby = p_systemd_wantedby;
+	}
+
+	if (strcmp(p_systemd_requiredby, "-") != 0) {
+		noauto = true;
+
+		if (strcmp(p_systemd_requiredby, "none") != 0)
+			requiredby = p_systemd_requiredby;
+	}
+
+	/*
+	 * For datasets with canmount=on, a dependency is created for
+	 * local-fs.target by default. To avoid regressions, this dependency
+	 * is reduced to "wants" rather than "requires" when nofail!=off.
+	 * **THIS MAY CHANGE**
+	 * noauto=on disables this behavior completely.
+	 */
+	if (!noauto) {
+		if (strcmp(p_systemd_nofail, "off") == 0)
+			requiredby = strdupa("local-fs.target");
+		else {
+			wantedby = strdupa("local-fs.target");
+			wantedby_append = strcmp(p_systemd_nofail, "on") != 0;
+		}
+	}
+
+	/*
+	 * Handle existing files:
+	 * 1.	We never overwrite existing files, although we may delete
+	 * 	files if we're sure they were created by us. (see 5.)
+	 * 2.	We handle files differently based on canmount.
+	 * 	Units with canmount=on always have precedence over noauto.
+	 * 	This is enforced by processing these units before all others.
+	 * 	It is important to use p_canmount and not noauto here,
+	 * 	since we categorise by canmount while other properties,
+	 * 	e.g. org.openzfs.systemd:wanted-by, also modify noauto.
+	 * 3.	If no unit file exists for a noauto dataset, we create one.
+	 * 	Additionally, we use noauto_files to track the unit file names
+	 * 	(which are the systemd-escaped mountpoints) of all (exclusively)
+	 * 	noauto datasets that had a file created.
+	 * 4.	If the file to be created is found in the tracking tree,
+	 * 	we do NOT create it.
+	 * 5.	If a file exists for a noauto dataset,
+	 * 	we check whether the file name is in the array.
+	 * 	If it is, we have multiple noauto datasets for the same
+	 * 	mountpoint. In such cases, we remove the file for safety.
+	 * 	We leave the file name in the tracking array to avoid
+	 * 	further noauto datasets creating a file for this path again.
+	 */
+
+	struct stat stbuf;
+	bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0;
+	bool is_known = tfind(mountfile, &noauto_files, STRCMP) != NULL;
+
+	*(tofree++) = (void *)mountfile;
+	if (already_exists) {
+		if (is_known) {
+			/* If it's in noauto_files, we must be noauto too */
+
+			/* See 5 */
+			errno = 0;
+			(void) unlinkat(destdir_fd, mountfile, 0);
+
+			/* See 2 */
+			fprintf(stderr, PROGNAME "[%d]: %s: "
+			    "removing duplicate noauto unit %s%s%s\n",
+			    getpid(), dataset, mountfile,
+			    errno ? "" : " failed: ",
+			    errno ? "" : strerror(errno));
+		} else {
+			/* Don't log for canmount=noauto */
+			if (strcmp(p_canmount, "on") == 0)
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "%s already exists. Skipping.\n",
+				    getpid(), dataset, mountfile);
+		}
+
+		/* File exists: skip current dataset */
+		goto end;
+	} else {
+		if (is_known) {
+			/* See 4 */
+			goto end;
+		} else if (strcmp(p_canmount, "noauto") == 0) {
+			if (tsearch(mountfile, &noauto_files, STRCMP) == NULL)
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "out of memory for noauto datasets! "
+				    "Not tracking %s.\n",
+				    getpid(), dataset, mountfile);
+			else
+				/* mountfile escaped to noauto_files */
+				*(--tofree) = NULL;
+		}
+	}
+
+
+	FILE *mountfile_f = fopenat(destdir_fd, mountfile,
+	    O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644);
+	if (!mountfile_f) {
+		fprintf(stderr,
+		    PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n",
+		    getpid(), dataset, mountfile, destdir, strerror(errno));
+		goto err;
+	}
+
+	fprintf(mountfile_f,
+	    OUTPUT_HEADER
+	    "[Unit]\n"
+	    "SourcePath=" FSLIST "/%s\n"
+	    "Documentation=man:zfs-mount-generator(8)\n"
+	    "\n"
+	    "Before=",
+	    cachefile);
+
+	if (p_systemd_before)
+		fprintf(mountfile_f, "%s ", p_systemd_before);
+	fprintf(mountfile_f, "zfs-mount.service"); /* Ensures we don't race */
+	if (requiredby)
+		fprintf(mountfile_f, " %s", requiredby);
+	if (wantedby && wantedby_append)
+		fprintf(mountfile_f, " %s", wantedby);
+
+	fprintf(mountfile_f,
+	    "\n"
+	    "After=");
+	if (p_systemd_after)
+		fprintf(mountfile_f, "%s ", p_systemd_after);
+	fprintf(mountfile_f, "%s\n", after);
+
+	fprintf(mountfile_f, "Wants=%s\n", wants);
+
+	if (bindsto)
+		fprintf(mountfile_f, "BindsTo=%s\n", bindsto);
+	if (p_systemd_requires)
+		fprintf(mountfile_f, "Requires=%s\n", p_systemd_requires);
+	if (p_systemd_requiresmountsfor)
+		fprintf(mountfile_f,
+		    "RequiresMountsFor=%s\n", p_systemd_requiresmountsfor);
+
+	fprintf(mountfile_f,
+	    "\n"
+	    "[Mount]\n"
+	    "Where=%s\n"
+	    "What=%s\n"
+	    "Type=zfs\n"
+	    "Options=defaults%s,zfsutil\n",
+	    p_mountpoint, dataset, opts);
+
+	(void) fclose(mountfile_f);
+
+	if (!requiredby && !wantedby)
+		goto end;
+
+	/* Finally, create the appropriate dependencies */
+	char *linktgt;
+	if (asprintf(&linktgt, "../%s", mountfile) == -1) {
+		fprintf(stderr, PROGNAME "[%d]: %s: "
+		    "out of memory for dependents of %s!\n",
+		    getpid(), dataset, mountfile);
+		goto err;
+	}
+	*(tofree++) = linktgt;
+
+	char *dependencies[][2] = {
+		{"wants", wantedby},
+		{"requires", requiredby},
+		{}
+	};
+	for (__typeof__(&*dependencies) dep = &*dependencies; **dep; ++dep) {
+		if (!(*dep)[1])
+			continue;
+
+		for (char *reqby = strtok_r((*dep)[1], " ", &toktmp);
+		    reqby;
+		    reqby = strtok_r(NULL, " ", &toktmp)) {
+			char *depdir;
+			if (asprintf(
+			    &depdir, "%s.%s", reqby, (*dep)[0]) == -1) {
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "out of memory for dependent dir name "
+				    "\"%s.%s\"!\n",
+				    getpid(), dataset, reqby, (*dep)[0]);
+				continue;
+			}
+
+			(void) mkdirat(destdir_fd, depdir, 0755);
+			int depdir_fd = openat(destdir_fd, depdir,
+			    O_PATH | O_DIRECTORY | O_CLOEXEC);
+			if (depdir_fd < 0) {
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "couldn't open %s under %s: %s\n",
+				    getpid(), dataset, depdir, destdir,
+				    strerror(errno));
+				free(depdir);
+				continue;
+			}
+
+			if (symlinkat(linktgt, depdir_fd, mountfile) == -1)
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "couldn't symlink at "
+				    "%s under %s under %s: %s\n",
+				    getpid(), dataset, mountfile,
+				    depdir, destdir, strerror(errno));
+
+			(void) close(depdir_fd);
+			free(depdir);
+		}
+	}
+
+end:
+	if (tofree >= tofree_all + nitems(tofree_all)) {
+		/*
+		 * This won't happen as-is:
+		 * we've got 8 slots and allocate 4 things at most.
+		 */
+		fprintf(stderr,
+		    PROGNAME "[%d]: %s: need to free %zu > %zu!\n",
+		    getpid(), dataset, tofree - tofree_all, nitems(tofree_all));
+		ret = tofree - tofree_all;
+	}
+
+	while (tofree-- != tofree_all)
+		free(*tofree);
+	return (ret);
+err:
+	ret = 1;
+	goto end;
+}
+
+
+static int
+pool_enumerator(zpool_handle_t *pool, void *data __attribute__((unused)))
+{
+	int ret = 0;
+
+	/*
+	 * Pools are guaranteed-unique by the kernel,
+	 * no risk of leaking dupes here
+	 */
+	char *name = strdup(zpool_get_name(pool));
+	if (!name || !tsearch(name, &known_pools, STRCMP)) {
+		free(name);
+		ret = ENOMEM;
+	}
+
+	zpool_close(pool);
+	return (ret);
+}
+
+int
+main(int argc, char **argv)
+{
+	struct timespec time_init = {};
+	clock_gettime(CLOCK_MONOTONIC_RAW, &time_init);
+
+	{
+		int kmfd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
+		if (kmfd >= 0) {
+			(void) dup2(kmfd, STDERR_FILENO);
+			(void) close(kmfd);
+
+			setlinebuf(stderr);
+		}
+	}
+
+	switch (argc) {
+	case 1:
+		/* Use default */
+		break;
+	case 2:
+	case 4:
+		destdir = argv[1];
+		break;
+	default:
+		fprintf(stderr,
+		    PROGNAME "[%d]: wrong argument count: %d\n",
+		    getpid(), argc - 1);
+		_exit(1);
+	}
+
+	{
+		destdir_fd = open(destdir, O_PATH | O_DIRECTORY | O_CLOEXEC);
+		if (destdir_fd < 0) {
+			fprintf(stderr, PROGNAME "[%d]: "
+			    "can't open destination directory %s: %s\n",
+			    getpid(), destdir, strerror(errno));
+			_exit(1);
+		}
+	}
+
+	DIR *fslist_dir = opendir(FSLIST);
+	if (!fslist_dir) {
+		if (errno != ENOENT)
+			fprintf(stderr,
+			    PROGNAME "[%d]: couldn't open " FSLIST ": %s\n",
+			    getpid(), strerror(errno));
+		_exit(0);
+	}
+
+	{
+		libzfs_handle_t *libzfs = libzfs_init();
+		if (libzfs) {
+			if (zpool_iter(libzfs, pool_enumerator, NULL) != 0)
+				fprintf(stderr, PROGNAME "[%d]: "
+				    "error listing pools, ignoring\n",
+				    getpid());
+			libzfs_fini(libzfs);
+		} else
+			fprintf(stderr, PROGNAME "[%d]: "
+			    "couldn't start libzfs, ignoring\n",
+			    getpid());
+	}
+
+	{
+		int regerr = regcomp(&uri_regex, URI_REGEX_S, 0);
+		if (regerr != 0) {
+			fprintf(stderr,
+			    PROGNAME "[%d]: invalid regex: %d\n",
+			    getpid(), regerr);
+			_exit(1);
+		}
+	}
+
+	bool debug = false;
+	char *line = NULL;
+	size_t linelen = 0;
+	{
+		const char *dbgenv = getenv("ZFS_DEBUG");
+		if (dbgenv)
+			debug = atoi(dbgenv);
+		else {
+			FILE *cmdline = fopen("/proc/cmdline", "re");
+			if (cmdline != NULL) {
+				if (getline(&line, &linelen, cmdline) >= 0)
+					debug = strstr(line, "debug");
+				(void) fclose(cmdline);
+			}
+		}
+
+		if (debug && !isatty(STDOUT_FILENO))
+			dup2(STDERR_FILENO, STDOUT_FILENO);
+	}
+
+	struct timespec time_start = {};
+	if (debug)
+		clock_gettime(CLOCK_MONOTONIC_RAW, &time_start);
+
+	struct line {
+		char *line;
+		const char *fname;
+		struct line *next;
+	} *lines_canmount_not_on = NULL;
+
+	int ret = 0;
+	struct dirent *cachent;
+	while ((cachent = readdir(fslist_dir)) != NULL) {
+		if (strcmp(cachent->d_name, ".") == 0 ||
+		    strcmp(cachent->d_name, "..") == 0)
+			continue;
+
+		FILE *cachefile = fopenat(dirfd(fslist_dir), cachent->d_name,
+		    O_RDONLY | O_CLOEXEC, "r", 0);
+		if (!cachefile) {
+			fprintf(stderr, PROGNAME "[%d]: "
+			    "couldn't open %s under " FSLIST ": %s\n",
+			    getpid(), cachent->d_name, strerror(errno));
+			continue;
+		}
+
+		const char *filename = FREE_STATICS ? "(elided)" : NULL;
+
+		ssize_t read;
+		while ((read = getline(&line, &linelen, cachefile)) >= 0) {
+			line[read - 1] = '\0'; /* newline */
+
+			char *canmount = line;
+			canmount += strcspn(canmount, "\t");
+			canmount += strspn(canmount, "\t");
+			canmount += strcspn(canmount, "\t");
+			canmount += strspn(canmount, "\t");
+			bool canmount_on = strncmp(canmount, "on", 2) == 0;
+
+			if (canmount_on)
+				ret |= line_worker(line, cachent->d_name);
+			else {
+				if (filename == NULL)
+					filename =
+					    strdup(cachent->d_name) ?: "(?)";
+
+				struct line *l = calloc(1, sizeof (*l));
+				char *nl = strdup(line);
+				if (l == NULL || nl == NULL) {
+					fprintf(stderr, PROGNAME "[%d]: "
+					    "out of memory for \"%s\" in %s\n",
+					    getpid(), line, cachent->d_name);
+					free(l);
+					free(nl);
+					continue;
+				}
+				l->line = nl;
+				l->fname = filename;
+				l->next = lines_canmount_not_on;
+				lines_canmount_not_on = l;
+			}
+		}
+
+		fclose(cachefile);
+	}
+	free(line);
+
+	while (lines_canmount_not_on) {
+		struct line *l = lines_canmount_not_on;
+		lines_canmount_not_on = l->next;
+
+		ret |= line_worker(l->line, l->fname);
+		if (FREE_STATICS) {
+			free(l->line);
+			free(l);
+		}
+	}
+
+	if (debug) {
+		struct timespec time_end = {};
+		clock_gettime(CLOCK_MONOTONIC_RAW, &time_end);
+
+		struct rusage usage;
+		getrusage(RUSAGE_SELF, &usage);
+		printf(
+		    "\n"
+		    PROGNAME ": "
+		    "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
+		    (unsigned long long) usage.ru_utime.tv_sec,
+		    (unsigned int) usage.ru_utime.tv_usec,
+		    (unsigned long long) usage.ru_stime.tv_sec,
+		    (unsigned int) usage.ru_stime.tv_usec,
+		    usage.ru_maxrss * 1024);
+
+		if (time_start.tv_nsec > time_end.tv_nsec) {
+			time_end.tv_nsec =
+			    1000000000 + time_end.tv_nsec - time_start.tv_nsec;
+			time_end.tv_sec -= 1;
+		} else
+			time_end.tv_nsec -= time_start.tv_nsec;
+		time_end.tv_sec -= time_start.tv_sec;
+
+		if (time_init.tv_nsec > time_start.tv_nsec) {
+			time_start.tv_nsec =
+			    1000000000 + time_start.tv_nsec - time_init.tv_nsec;
+			time_start.tv_sec -= 1;
+		} else
+			time_start.tv_nsec -= time_init.tv_nsec;
+		time_start.tv_sec -= time_init.tv_sec;
+
+		time_init.tv_nsec = time_start.tv_nsec + time_end.tv_nsec;
+		time_init.tv_sec =
+		    time_start.tv_sec + time_end.tv_sec +
+		    time_init.tv_nsec / 1000000000;
+		time_init.tv_nsec %= 1000000000;
+
+		printf(PROGNAME ": "
+		    "total=%llu.%09llus = "
+		    "init=%llu.%09llus + real=%llu.%09llus\n",
+		    (unsigned long long) time_init.tv_sec,
+		    (unsigned long long) time_init.tv_nsec,
+		    (unsigned long long) time_start.tv_sec,
+		    (unsigned long long) time_start.tv_nsec,
+		    (unsigned long long) time_end.tv_sec,
+		    (unsigned long long) time_end.tv_nsec);
+
+		fflush(stdout);
+	}
+
+	if (FREE_STATICS) {
+		closedir(fslist_dir);
+		tdestroy(noauto_files, free);
+		tdestroy(known_pools, free);
+		regfree(&uri_regex);
+	}
+	_exit(ret);
+}

diff --git a/zfs/etc/systemd/system-generators/zfs-mount-generator.in b/zfs/etc/systemd/system-generators/zfs-mount-generator.in
deleted file mode 100755
index 28439f4..0000000
--- a/zfs/etc/systemd/system-generators/zfs-mount-generator.in
+++ /dev/null

@@ -1,473 +0,0 @@
-#!/bin/sh
-
-# zfs-mount-generator - generates systemd mount units for zfs
-# Copyright (c) 2017 Antonio Russo <antonio.e.russo@gmail.com>
-# Copyright (c) 2020 InsanePrawn <insane.prawny@gmail.com>
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-set -e
-
-FSLIST="@sysconfdir@/zfs/zfs-list.cache"
-
-[ -d "${FSLIST}" ] || exit 0
-
-do_fail() {
-  printf 'zfs-mount-generator: %s\n' "$*" > /dev/kmsg
-  exit 1
-}
-
-# test if $1 is in space-separated list $2
-is_known() {
-  query="$1"
-  IFS=' '
-  for element in $2 ; do
-    if [ "$query" = "$element" ] ; then
-      return 0
-    fi
-  done
-  return 1
-}
-
-# create dependency on unit file $1
-# of type $2, i.e. "wants" or "requires"
-# in the target units from space-separated list $3
-create_dependencies() {
-  unitfile="$1"
-  suffix="$2"
-  IFS=' '
-  for target in $3 ; do
-    target_dir="${dest_norm}/${target}.${suffix}/"
-    mkdir -p "${target_dir}"
-    ln -s "../${unitfile}" "${target_dir}"
-  done
-}
-
-# see systemd.generator
-if [ $# -eq 0 ] ; then
-  dest_norm="/tmp"
-elif [ $# -eq 3 ] ; then
-  dest_norm="${1}"
-else
-  do_fail "zero or three arguments required"
-fi
-
-pools=$(zpool list -H -o name || true)
-
-# All needed information about each ZFS is available from
-# zfs list -H -t filesystem -o <properties>
-# cached in $FSLIST, and each line is processed by the following function:
-# See the list below for the properties and their order
-
-process_line() {
-
-  # zfs list -H -o name,...
-  # fields are tab separated
-  IFS="$(printf '\t')"
-  # shellcheck disable=SC2086
-  set -- $1
-
-  dataset="${1}"
-  pool="${dataset%%/*}"
-  p_mountpoint="${2}"
-  p_canmount="${3}"
-  p_atime="${4}"
-  p_relatime="${5}"
-  p_devices="${6}"
-  p_exec="${7}"
-  p_readonly="${8}"
-  p_setuid="${9}"
-  p_nbmand="${10}"
-  p_encroot="${11}"
-  p_keyloc="${12}"
-  p_systemd_requires="${13}"
-  p_systemd_requiresmountsfor="${14}"
-  p_systemd_before="${15}"
-  p_systemd_after="${16}"
-  p_systemd_wantedby="${17}"
-  p_systemd_requiredby="${18}"
-  p_systemd_nofail="${19}"
-  p_systemd_ignore="${20}"
-
-  # Minimal pre-requisites to mount a ZFS dataset
-  # By ordering before zfs-mount.service, we avoid race conditions.
-  after="zfs-import.target"
-  before="zfs-mount.service"
-  wants="zfs-import.target"
-  requires=""
-  requiredmounts=""
-  bindsto=""
-  wantedby=""
-  requiredby=""
-  noauto="off"
-
-  # If the pool is already imported, zfs-import.target is not needed.  This
-  # avoids a dependency loop on root-on-ZFS systems:
-  # systemd-random-seed.service After (via RequiresMountsFor) var-lib.mount
-  # After zfs-import.target After zfs-import-{cache,scan}.service After
-  # cryptsetup.service After systemd-random-seed.service.
-  #
-  # Pools are newline-separated and may contain spaces in their names.
-  # There is no better portable way to set IFS to just a newline.  Using
-  # $(printf '\n') doesn't work because $(...) strips trailing newlines.
-  IFS="
-"
-  for p in $pools ; do
-    if [ "$p" = "$pool" ] ; then
-      after=""
-      wants=""
-      break
-    fi
-  done
-
-  if [ -n "${p_systemd_after}" ] && \
-      [ "${p_systemd_after}" != "-" ] ; then
-    after="${p_systemd_after} ${after}"
-  fi
-
-  if [ -n "${p_systemd_before}" ] && \
-      [ "${p_systemd_before}" != "-" ] ; then
-    before="${p_systemd_before} ${before}"
-  fi
-
-  if [ -n "${p_systemd_requires}" ] && \
-      [ "${p_systemd_requires}" != "-" ] ; then
-    requires="Requires=${p_systemd_requires}"
-  fi
-
-  if [ -n "${p_systemd_requiresmountsfor}" ] && \
-      [ "${p_systemd_requiresmountsfor}" != "-" ] ; then
-    requiredmounts="RequiresMountsFor=${p_systemd_requiresmountsfor}"
-  fi
-
-  # Handle encryption
-  if [ -n "${p_encroot}" ] &&
-      [ "${p_encroot}" != "-" ] ; then
-    keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service"
-    if [ "${p_encroot}" = "${dataset}" ] ; then
-      keymountdep=""
-      if [ "${p_keyloc%%://*}" = "file" ] ; then
-        if [ -n "${requiredmounts}" ] ; then
-          keymountdep="${requiredmounts} '${p_keyloc#file://}'"
-        else
-          keymountdep="RequiresMountsFor='${p_keyloc#file://}'"
-        fi
-        keyloadscript="@sbindir@/zfs load-key \"${dataset}\""
-      elif [ "${p_keyloc}" = "prompt" ] ; then
-        keyloadscript="\
-count=0;\
-while [ \$\$count -lt 3 ];do\
-  systemd-ask-password --id=\"zfs:${dataset}\"\
-    \"Enter passphrase for ${dataset}:\"|\
-    @sbindir@/zfs load-key \"${dataset}\" && exit 0;\
-  count=\$\$((count + 1));\
-done;\
-exit 1"
-      else
-        printf 'zfs-mount-generator: (%s) invalid keylocation\n' \
-          "${dataset}" >/dev/kmsg
-      fi
-      keyloadcmd="\
-/bin/sh -c '\
-set -eu;\
-keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";\
-[ \"\$\$keystatus\" = \"unavailable\" ] || exit 0;\
-${keyloadscript}'"
-      keyunloadcmd="\
-/bin/sh -c '\
-set -eu;\
-keystatus=\"\$\$(@sbindir@/zfs get -H -o value keystatus \"${dataset}\")\";\
-[ \"\$\$keystatus\" = \"available\" ] || exit 0;\
-@sbindir@/zfs unload-key \"${dataset}\"'"
-
-
-
-      # Generate the key-load .service unit
-      #
-      # Note: It is tempting to use a `<<EOF` style here-document for this, but
-      #   bash requires a writable /tmp or $TMPDIR for that. This is not always
-      #   available early during boot.
-      #
-      echo \
-"# Automatically generated by zfs-mount-generator
-
-[Unit]
-Description=Load ZFS key for ${dataset}
-SourcePath=${cachefile}
-Documentation=man:zfs-mount-generator(8)
-DefaultDependencies=no
-Wants=${wants}
-After=${after}
-${requires}
-${keymountdep}
-
-[Service]
-Type=oneshot
-RemainAfterExit=yes
-# This avoids a dependency loop involving systemd-journald.socket if this
-# dataset is a parent of the root filesystem.
-StandardOutput=null
-StandardError=null
-ExecStart=${keyloadcmd}
-ExecStop=${keyunloadcmd}"   > "${dest_norm}/${keyloadunit}"
-    fi
-    # Update the dependencies for the mount file to want the
-    # key-loading unit.
-    wants="${wants}"
-    bindsto="BindsTo=${keyloadunit}"
-    after="${after} ${keyloadunit}"
-  fi
-
-  # Prepare the .mount unit
-
-  # skip generation of the mount unit if org.openzfs.systemd:ignore is "on"
-  if [ -n "${p_systemd_ignore}" ] ; then
-    if [ "${p_systemd_ignore}" = "on" ] ; then
-      return
-    elif [ "${p_systemd_ignore}" = "-" ] \
-      || [ "${p_systemd_ignore}" = "off" ] ; then
-      : # This is OK
-    else
-      do_fail "invalid org.openzfs.systemd:ignore for ${dataset}"
-    fi
-  fi
-
-  # Check for canmount=off .
-  if [ "${p_canmount}" = "off" ] ; then
-    return
-  elif [ "${p_canmount}" = "noauto" ] ; then
-    noauto="on"
-  elif [ "${p_canmount}" = "on" ] ; then
-    : # This is OK
-  else
-    do_fail "invalid canmount for ${dataset}"
-  fi
-
-  # Check for legacy and blank mountpoints.
-  if [ "${p_mountpoint}" = "legacy" ] ; then
-    return
-  elif [ "${p_mountpoint}" = "none" ] ; then
-    return
-  elif [ "${p_mountpoint%"${p_mountpoint#?}"}" != "/" ] ; then
-    do_fail "invalid mountpoint for ${dataset}"
-  fi
-
-  # Escape the mountpoint per systemd policy.
-  mountfile="$(systemd-escape --path --suffix=mount "${p_mountpoint}")"
-
-  # Parse options
-  # see lib/libzfs/libzfs_mount.c:zfs_add_options
-  opts=""
-
-  # atime
-  if [ "${p_atime}" = on ] ; then
-    # relatime
-    if [ "${p_relatime}" = on ] ; then
-      opts="${opts},atime,relatime"
-    elif [ "${p_relatime}" = off ] ; then
-      opts="${opts},atime,strictatime"
-    else
-      printf 'zfs-mount-generator: (%s) invalid relatime\n' \
-        "${dataset}" >/dev/kmsg
-    fi
-  elif [ "${p_atime}" = off ] ; then
-    opts="${opts},noatime"
-  else
-    printf 'zfs-mount-generator: (%s) invalid atime\n' \
-      "${dataset}" >/dev/kmsg
-  fi
-
-  # devices
-  if [ "${p_devices}" = on ] ; then
-    opts="${opts},dev"
-  elif [ "${p_devices}" = off ] ; then
-    opts="${opts},nodev"
-  else
-    printf 'zfs-mount-generator: (%s) invalid devices\n' \
-      "${dataset}" >/dev/kmsg
-  fi
-
-  # exec
-  if [ "${p_exec}" = on ] ; then
-    opts="${opts},exec"
-  elif [ "${p_exec}" = off ] ; then
-    opts="${opts},noexec"
-  else
-    printf 'zfs-mount-generator: (%s) invalid exec\n' \
-      "${dataset}" >/dev/kmsg
-  fi
-
-  # readonly
-  if [ "${p_readonly}" = on ] ; then
-    opts="${opts},ro"
-  elif [ "${p_readonly}" = off ] ; then
-    opts="${opts},rw"
-  else
-    printf 'zfs-mount-generator: (%s) invalid readonly\n' \
-      "${dataset}" >/dev/kmsg
-  fi
-
-  # setuid
-  if [ "${p_setuid}" = on ] ; then
-    opts="${opts},suid"
-  elif [ "${p_setuid}" = off ] ; then
-    opts="${opts},nosuid"
-  else
-    printf 'zfs-mount-generator: (%s) invalid setuid\n' \
-      "${dataset}" >/dev/kmsg
-  fi
-
-  # nbmand
-  if [ "${p_nbmand}" = on ]  ; then
-    opts="${opts},mand"
-  elif [ "${p_nbmand}" = off ] ; then
-    opts="${opts},nomand"
-  else
-    printf 'zfs-mount-generator: (%s) invalid nbmand\n' \
-      "${dataset}" >/dev/kmsg
-  fi
-
-  if [ -n "${p_systemd_wantedby}" ] && \
-      [ "${p_systemd_wantedby}" != "-" ] ; then
-    noauto="on"
-    if [ "${p_systemd_wantedby}" = "none" ] ; then
-      wantedby=""
-    else
-      wantedby="${p_systemd_wantedby}"
-      before="${before} ${wantedby}"
-    fi
-  fi
-
-  if [ -n "${p_systemd_requiredby}" ] && \
-      [ "${p_systemd_requiredby}" != "-" ] ; then
-    noauto="on"
-    if [ "${p_systemd_requiredby}" = "none" ] ; then
-      requiredby=""
-    else
-      requiredby="${p_systemd_requiredby}"
-      before="${before} ${requiredby}"
-    fi
-  fi
-
-  # For datasets with canmount=on, a dependency is created for
-  # local-fs.target by default. To avoid regressions, this dependency
-  # is reduced to "wants" rather than "requires" when nofail is not "off".
-  # **THIS MAY CHANGE**
-  # noauto=on disables this behavior completely.
-  if [ "${noauto}" != "on" ] ; then
-    if [ "${p_systemd_nofail}" = "off" ] ; then
-      requiredby="local-fs.target"
-      before="${before} local-fs.target"
-    else
-      wantedby="local-fs.target"
-      if [ "${p_systemd_nofail}" != "on" ] ; then
-        before="${before} local-fs.target"
-      fi
-    fi
-  fi
-
-  # Handle existing files:
-  # 1.  We never overwrite existing files, although we may delete
-  #     files if we're sure they were created by us. (see 5.)
-  # 2.  We handle files differently based on canmount. Units with canmount=on
-  #     always have precedence over noauto. This is enforced by the sort pipe
-  #     in the loop around this function.
-  #     It is important to use $p_canmount and not $noauto here, since we
-  #     sort by canmount while other properties also modify $noauto, e.g.
-  #     org.openzfs.systemd:wanted-by.
-  # 3.  If no unit file exists for a noauto dataset, we create one.
-  #     Additionally, we use $noauto_files to track the unit file names
-  #     (which are the systemd-escaped mountpoints) of all (exclusively)
-  #     noauto datasets that had a file created.
-  # 4.  If the file to be created is found in the tracking variable,
-  #     we do NOT create it.
-  # 5.  If a file exists for a noauto dataset, we check whether the file
-  #     name is in the variable. If it is, we have multiple noauto datasets
-  #     for the same mountpoint. In such cases, we remove the file for safety.
-  #     To avoid further noauto datasets creating a file for this path again,
-  #     we leave the file name in the tracking variable.
-  if [ -e "${dest_norm}/${mountfile}" ] ; then
-    if is_known "$mountfile" "$noauto_files" ; then
-      # if it's in $noauto_files, we must be noauto too. See 2.
-      printf 'zfs-mount-generator: removing duplicate noauto %s\n' \
-        "${mountfile}" >/dev/kmsg
-      # See 5.
-      rm "${dest_norm}/${mountfile}"
-    else
-      # don't log for canmount=noauto
-      if [  "${p_canmount}" = "on" ] ; then
-        printf 'zfs-mount-generator: %s already exists. Skipping.\n' \
-          "${mountfile}" >/dev/kmsg
-      fi
-    fi
-    # file exists; Skip current dataset.
-    return
-  else
-    if is_known "${mountfile}" "${noauto_files}" ; then
-      # See 4.
-      return
-    elif [ "${p_canmount}" = "noauto" ] ; then
-      noauto_files="${mountfile} ${noauto_files}"
-    fi
-  fi
-
-  # Create the .mount unit file.
-  #
-  # (Do not use `<<EOF`-style here-documents for this, see warning above)
-  #
-  echo \
-"# Automatically generated by zfs-mount-generator
-
-[Unit]
-SourcePath=${cachefile}
-Documentation=man:zfs-mount-generator(8)
-
-Before=${before}
-After=${after}
-Wants=${wants}
-${bindsto}
-${requires}
-${requiredmounts}
-
-[Mount]
-Where=${p_mountpoint}
-What=${dataset}
-Type=zfs
-Options=defaults${opts},zfsutil" > "${dest_norm}/${mountfile}"
-
-  # Finally, create the appropriate dependencies
-  create_dependencies "${mountfile}" "wants" "$wantedby"
-  create_dependencies "${mountfile}" "requires" "$requiredby"
-
-}
-
-for cachefile in "${FSLIST}/"* ; do
-  # Disable glob expansion to protect against special characters when parsing.
-  set -f
-  # Sort cachefile's lines by canmount, "on" before "noauto"
-  # and feed each line into process_line
-  sort -t "$(printf '\t')" -k 3 -r "${cachefile}" | \
-  ( # subshell is necessary for `sort|while read` and $noauto_files
-    noauto_files=""
-    while read -r fs ; do
-      process_line "${fs}"
-    done
-  )
-done

diff --git a/zfs/etc/systemd/system/.gitignore b/zfs/etc/systemd/system/.gitignore
index efada54..4813c65 100644
--- a/zfs/etc/systemd/system/.gitignore
+++ b/zfs/etc/systemd/system/.gitignore

@@ -1,3 +1,4 @@
 *.service
 *.target
 *.preset
+*.timer

diff --git a/zfs/etc/systemd/system/Makefile.am b/zfs/etc/systemd/system/Makefile.am
index 4e14467..35f833d 100644
--- a/zfs/etc/systemd/system/Makefile.am
+++ b/zfs/etc/systemd/system/Makefile.am

@@ -1,3 +1,5 @@
+include $(top_srcdir)/config/Substfiles.am
+
 systemdpreset_DATA = \
 	50-zfs.preset
 
@@ -10,31 +12,14 @@
 	zfs-volume-wait.service \
 	zfs-import.target \
 	zfs-volumes.target \
-	zfs.target
+	zfs.target \
+	zfs-scrub-monthly@.timer \
+	zfs-scrub-weekly@.timer \
+	zfs-scrub@.service
 
-EXTRA_DIST = \
-	$(top_srcdir)/etc/systemd/system/zfs-zed.service.in \
-	$(top_srcdir)/etc/systemd/system/zfs-import-cache.service.in \
-	$(top_srcdir)/etc/systemd/system/zfs-import-scan.service.in \
-	$(top_srcdir)/etc/systemd/system/zfs-mount.service.in \
-	$(top_srcdir)/etc/systemd/system/zfs-share.service.in \
-	$(top_srcdir)/etc/systemd/system/zfs-import.target.in \
-	$(top_srcdir)/etc/systemd/system/zfs-volume-wait.service.in \
-	$(top_srcdir)/etc/systemd/system/zfs-volumes.target.in \
-	$(top_srcdir)/etc/systemd/system/zfs.target.in \
-	$(top_srcdir)/etc/systemd/system/50-zfs.preset.in
-
-$(systemdunit_DATA) $(systemdpreset_DATA):%:%.in
-	-$(SED) -e 's,@bindir\@,$(bindir),g' \
-		-e 's,@runstatedir\@,$(runstatedir),g' \
-		-e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< >'$@'
+SUBSTFILES += $(systemdpreset_DATA) $(systemdunit_DATA)
 
 install-data-hook:
 	$(MKDIR_P) "$(DESTDIR)$(systemdunitdir)"
 	ln -sf /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-import.service"
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(systemdunit_DATA) $(systemdpreset_DATA)
+	ln -sf /dev/null "$(DESTDIR)$(systemdunitdir)/zfs-load-key.service"

diff --git a/zfs/etc/systemd/system/zfs-import-cache.service.in b/zfs/etc/systemd/system/zfs-import-cache.service.in
index 47c5b07..e841ca5 100644
--- a/zfs/etc/systemd/system/zfs-import-cache.service.in
+++ b/zfs/etc/systemd/system/zfs-import-cache.service.in

@@ -5,16 +5,16 @@
 Requires=systemd-udev-settle.service
 After=systemd-udev-settle.service
 After=cryptsetup.target
-After=multipathd.target
+After=multipathd.service
 After=systemd-remount-fs.service
 Before=zfs-import.target
-ConditionPathExists=@sysconfdir@/zfs/zpool.cache
+ConditionFileNotEmpty=@sysconfdir@/zfs/zpool.cache
 ConditionPathIsDirectory=/sys/module/zfs
 
 [Service]
 Type=oneshot
 RemainAfterExit=yes
-ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN
+ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN $ZPOOL_IMPORT_OPTS
 
 [Install]
 WantedBy=zfs-import.target

diff --git a/zfs/etc/systemd/system/zfs-import-scan.service.in b/zfs/etc/systemd/system/zfs-import-scan.service.in
index 6520f32..598ef50 100644
--- a/zfs/etc/systemd/system/zfs-import-scan.service.in
+++ b/zfs/etc/systemd/system/zfs-import-scan.service.in

@@ -5,15 +5,15 @@
 Requires=systemd-udev-settle.service
 After=systemd-udev-settle.service
 After=cryptsetup.target
-After=multipathd.target
+After=multipathd.service
 Before=zfs-import.target
-ConditionPathExists=!@sysconfdir@/zfs/zpool.cache
+ConditionFileNotEmpty=!@sysconfdir@/zfs/zpool.cache
 ConditionPathIsDirectory=/sys/module/zfs
 
 [Service]
 Type=oneshot
 RemainAfterExit=yes
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none
+ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS
 
 [Install]
 WantedBy=zfs-import.target

diff --git a/zfs/etc/systemd/system/zfs-scrub-monthly@.timer.in b/zfs/etc/systemd/system/zfs-scrub-monthly@.timer.in
new file mode 100644
index 0000000..9030684
--- /dev/null
+++ b/zfs/etc/systemd/system/zfs-scrub-monthly@.timer.in

@@ -0,0 +1,12 @@
+[Unit]
+Description=Monthly zpool scrub timer for %i
+Documentation=man:zpool-scrub(8)
+
+[Timer]
+OnCalendar=monthly
+Persistent=true
+RandomizedDelaySec=1h
+Unit=zfs-scrub@%i.service
+
+[Install]
+WantedBy=timers.target

diff --git a/zfs/etc/systemd/system/zfs-scrub-weekly@.timer.in b/zfs/etc/systemd/system/zfs-scrub-weekly@.timer.in
new file mode 100644
index 0000000..ede6995
--- /dev/null
+++ b/zfs/etc/systemd/system/zfs-scrub-weekly@.timer.in

@@ -0,0 +1,12 @@
+[Unit]
+Description=Weekly zpool scrub timer for %i
+Documentation=man:zpool-scrub(8)
+
+[Timer]
+OnCalendar=weekly
+Persistent=true
+RandomizedDelaySec=1h
+Unit=zfs-scrub@%i.service
+
+[Install]
+WantedBy=timers.target

diff --git a/zfs/etc/systemd/system/zfs-scrub@.service.in b/zfs/etc/systemd/system/zfs-scrub@.service.in
new file mode 100644
index 0000000..bebe91d
--- /dev/null
+++ b/zfs/etc/systemd/system/zfs-scrub@.service.in

@@ -0,0 +1,14 @@
+[Unit]
+Description=zpool scrub on %i
+Documentation=man:zpool-scrub(8)
+Requires=zfs.target
+After=zfs.target
+ConditionACPower=true
+ConditionPathIsDirectory=/sys/module/zfs
+
+[Service]
+ExecStart=/bin/sh -c '\
+if @sbindir@/zpool status %i | grep "scrub in progress"; then\
+exec @sbindir@/zpool wait -t scrub %i;\
+else exec @sbindir@/zpool scrub -w %i; fi'
+ExecStop=-/bin/sh -c '@sbindir@/zpool scrub -p %i 2>/dev/null || true'

diff --git a/zfs/etc/systemd/system/zfs-share.service.in b/zfs/etc/systemd/system/zfs-share.service.in
index 5f4ba41..7450775 100644
--- a/zfs/etc/systemd/system/zfs-share.service.in
+++ b/zfs/etc/systemd/system/zfs-share.service.in

@@ -8,11 +8,11 @@
 After=zfs-mount.service
 PartOf=nfs-server.service nfs-kernel-server.service
 PartOf=smb.service
+ConditionPathIsDirectory=/sys/module/zfs
 
 [Service]
 Type=oneshot
 RemainAfterExit=yes
-ExecStartPre=-/bin/rm -f /etc/dfs/sharetab
 ExecStart=@sbindir@/zfs share -a
 
 [Install]

diff --git a/zfs/etc/systemd/system/zfs-volume-wait.service.in b/zfs/etc/systemd/system/zfs-volume-wait.service.in
index 75bd9fc..4c77724 100644
--- a/zfs/etc/systemd/system/zfs-volume-wait.service.in
+++ b/zfs/etc/systemd/system/zfs-volume-wait.service.in

@@ -3,6 +3,7 @@
 DefaultDependencies=no
 After=systemd-udev-settle.service
 After=zfs-import.target
+ConditionPathIsDirectory=/sys/module/zfs
 
 [Service]
 Type=oneshot

diff --git a/zfs/etc/systemd/system/zfs-zed.service.in b/zfs/etc/systemd/system/zfs-zed.service.in
index f431362..be80025 100644
--- a/zfs/etc/systemd/system/zfs-zed.service.in
+++ b/zfs/etc/systemd/system/zfs-zed.service.in

@@ -1,10 +1,11 @@
 [Unit]
 Description=ZFS Event Daemon (zed)
 Documentation=man:zed(8)
+ConditionPathIsDirectory=/sys/module/zfs
 
 [Service]
 ExecStart=@sbindir@/zed -F
-Restart=on-abort
+Restart=always
 
 [Install]
 Alias=zed.service

diff --git a/zfs/etc/zfs/Makefile.am b/zfs/etc/zfs/Makefile.am
index 81567a4..1fc57e1 100644
--- a/zfs/etc/zfs/Makefile.am
+++ b/zfs/etc/zfs/Makefile.am

@@ -1,34 +1,19 @@
+include $(top_srcdir)/config/Substfiles.am
+include $(top_srcdir)/config/Shellcheck.am
+
 pkgsysconfdir = $(sysconfdir)/zfs
 
-pkgsysconf_DATA = \
+dist_pkgsysconf_DATA = \
 	vdev_id.conf.alias.example \
 	vdev_id.conf.sas_direct.example \
 	vdev_id.conf.sas_switch.example \
 	vdev_id.conf.multipath.example \
 	vdev_id.conf.scsi.example
-pkgsysconf_SCRIPTS = \
+
+pkgsysconf_DATA = \
 	zfs-functions
 
-EXTRA_DIST = $(pkgsysconf_DATA) \
-	zfs-functions.in
+SUBSTFILES += $(pkgsysconf_DATA)
 
-$(pkgsysconf_SCRIPTS):%:%.in Makefile
-	-(if [ -e /etc/debian_version ]; then \
-		NFS_SRV=nfs-kernel-server; \
-	  else \
-		NFS_SRV=nfs; \
-	  fi; \
-	  if [ -e /sbin/openrc-run ]; then \
-		SHELL=/sbin/openrc-run; \
-	  else \
-		SHELL=/bin/sh; \
-	  fi; \
-	  $(SED) \
-		 -e 's,@sbindir\@,$(sbindir),g' \
-		 -e 's,@sysconfdir\@,$(sysconfdir),g' \
-		 -e 's,@initconfdir\@,$(initconfdir),g' \
-		 $< >'$@'; \
-	  [ '$@' = 'zfs-functions' ] || \
-		chmod +x '$@')
-
-CLEANFILES = $(pkgsysconf_SCRIPTS)
+SHELLCHECKSCRIPTS = $(pkgsysconf_DATA)
+SHELLCHECK_SHELL = dash # local variables

diff --git a/zfs/etc/zfs/zfs-functions.in b/zfs/etc/zfs/zfs-functions.in
index 043f1b0..30441dc 100644
--- a/zfs/etc/zfs/zfs-functions.in
+++ b/zfs/etc/zfs/zfs-functions.in

@@ -1,26 +1,24 @@
-# This is a script with common functions etc used by zfs-import, zfs-mount,
-# zfs-share and zfs-zed.
+# This is a script with common functions etc used by zfs-import, zfs-load-key,
+# zfs-mount, zfs-share and zfs-zed.
 #
 # It is _NOT_ to be called independently
 #
 # Released under the 2-clause BSD license.
 #
-# The original script that acted as a template for this script came from
-# the Debian GNU/Linux kFreeBSD ZFS packages (which did not include a
-# licensing stansa) in the commit dated Mar 24, 2011:
-#   https://github.com/zfsonlinux/pkg-zfs/commit/80a3ae582b59c0250d7912ba794dca9e669e605a
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
 
 PATH=/sbin:/bin:/usr/bin:/usr/sbin
 
 # Source function library
 if [ -f /etc/rc.d/init.d/functions ]; then
-	# RedHat and derivates
+	# RedHat and derivatives
 	. /etc/rc.d/init.d/functions
 elif [ -L /etc/init.d/functions.sh ]; then
 	# Gentoo
 	. /etc/init.d/functions.sh
 elif [ -f /lib/lsb/init-functions ]; then
-	# LSB, Debian GNU/Linux and derivates
+	# LSB, Debian, and derivatives
 	. /lib/lsb/init-functions
 fi
 
@@ -46,7 +44,7 @@
 		fi
 	}
 
-	zfs_log_begin_msg() { echo -n "$1 "; }
+	zfs_log_begin_msg() { printf "%s" "$1 "; }
 	zfs_log_end_msg() {
 		zfs_set_ifs "$OLD_IFS"
 		if [ "$1" -eq 0 ]; then
@@ -63,17 +61,17 @@
 		echo
 		zfs_set_ifs "$TMP_IFS"
 	}
-	zfs_log_progress_msg() { echo -n $"$1"; }
+	zfs_log_progress_msg() { printf "%s" "$""$1"; }
 elif type einfo > /dev/null 2>&1 ; then
 	# Gentoo functions
 	zfs_log_begin_msg() { ebegin "$1"; }
 	zfs_log_end_msg() { eend "$1"; }
 	zfs_log_failure_msg() { eend "$1"; }
-#	zfs_log_progress_msg() { echo -n "$1"; }
-	zfs_log_progress_msg() { echo -n; }
+#	zfs_log_progress_msg() { printf "%s" "$1"; }
+	zfs_log_progress_msg() { :; }
 else
 	# Unknown - simple substitutes.
-	zfs_log_begin_msg() { echo -n "$1"; }
+	zfs_log_begin_msg() { printf "%s" "$1"; }
 	zfs_log_end_msg() {
 		ret=$1
 		if [ "$ret" -ge 1 ]; then
@@ -84,7 +82,7 @@
 		return "$ret"
 	}
 	zfs_log_failure_msg() { echo "$1"; }
-	zfs_log_progress_msg() { echo -n "$1"; }
+	zfs_log_progress_msg() { printf "%s" "$1"; }
 fi
 
 # Paths to what we need
@@ -94,10 +92,12 @@
 ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache"
 
 # Sensible defaults
+ZFS_LOAD_KEY='yes'
+ZFS_UNLOAD_KEY='no'
 ZFS_MOUNT='yes'
 ZFS_UNMOUNT='yes'
-
-export ZFS ZED ZPOOL ZPOOL_CACHE ZFS_MOUNT ZFS_UNMOUNT
+ZFS_SHARE='yes'
+ZFS_UNSHARE='yes'
 
 # Source zfs configuration, overriding the defaults
 if [ -f @initconfdir@/zfs ]; then
@@ -106,6 +106,9 @@
 
 # ----------------------------------------------------
 
+export ZFS ZED ZPOOL ZPOOL_CACHE ZFS_LOAD_KEY ZFS_UNLOAD_KEY ZFS_MOUNT ZFS_UNMOUNT \
+    ZFS_SHARE ZFS_UNSHARE
+
 zfs_action()
 {
 	local MSG="$1";	shift
@@ -134,27 +137,28 @@
 {
 	local PIDFILE="$1";	shift
 	local DAEMON_BIN="$1";	shift
-	local DAEMON_ARGS="$*"
 
 	if type start-stop-daemon > /dev/null 2>&1 ; then
 		# LSB functions
 		start-stop-daemon --start --quiet --pidfile "$PIDFILE" \
 		    --exec "$DAEMON_BIN" --test > /dev/null || return 1
 
-	        start-stop-daemon --start --quiet --exec "$DAEMON_BIN" -- \
-		    $DAEMON_ARGS || return 2
+		# shellcheck disable=SC2086
+		start-stop-daemon --start --quiet --exec "$DAEMON_BIN" -- \
+		    "$@" || return 2
 
-		# On Debian GNU/Linux, there's a 'sendsigs' script that will
+		# On Debian, there's a 'sendsigs' script that will
 		# kill basically everything quite early and zed is stopped
 		# much later than that. We don't want zed to be among them,
 		# so add the zed pid to list of pids to ignore.
-		if [ -f "$PIDFILE" -a -d /run/sendsigs.omit.d ]
+		if [ -f "$PIDFILE" ] && [ -d /run/sendsigs.omit.d ]
 		then
 			ln -sf "$PIDFILE" /run/sendsigs.omit.d/zed
 		fi
 	elif type daemon > /dev/null 2>&1 ; then
-	        # Fedora/RedHat functions
-		daemon --pidfile "$PIDFILE" "$DAEMON_BIN" $DAEMON_ARGS
+		# Fedora/RedHat functions
+		# shellcheck disable=SC2086
+		daemon --pidfile "$PIDFILE" "$DAEMON_BIN" "$@"
 		return $?
 	else
 		# Unsupported
@@ -180,15 +184,17 @@
 		# LSB functions
 		start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 \
 		    --pidfile "$PIDFILE" --name "$DAEMON_NAME"
-		[ "$?" = 0 ] && rm -f "$PIDFILE"
+		ret="$?"
+		[ "$ret" = 0 ] && rm -f "$PIDFILE"
 
-		return $?
+		return "$ret"
 	elif type killproc > /dev/null 2>&1 ; then
 		# Fedora/RedHat functions
 		killproc -p "$PIDFILE" "$DAEMON_NAME"
-		[ "$?" = 0 ] && rm -f "$PIDFILE"
+		ret="$?"
+		[ "$ret" = 0 ] && rm -f "$PIDFILE"
 
-		return $?
+		return "$ret"
 	else
 		# Unsupported
 		return 3
@@ -232,7 +238,7 @@
 		return $?
 	elif type killproc > /dev/null 2>&1 ; then
 		# Fedora/RedHat functions
-                killproc -p "$PIDFILE" "$DAEMON_NAME" -HUP
+		killproc -p "$PIDFILE" "$DAEMON_NAME" -HUP
 		return $?
 	else
 		# Unsupported
@@ -285,6 +291,7 @@
 		# HOWEVER, only do this if we're called at the boot up
 		# (from init), not if we're running interactively (as in
 		# from the shell - we know what we're doing).
+		# shellcheck disable=SC2154
 		[ -n "$init" ] && exit 3
 	fi
 
@@ -299,6 +306,7 @@
 
 get_root_pool()
 {
+	# shellcheck disable=SC2046
 	set -- $(mount | grep ' on / ')
 	[ "$5" = "zfs" ] && echo "${1%%/*}"
 }
@@ -336,10 +344,11 @@
 read_mtab()
 {
 	local match="$1"
-	local fs mntpnt fstype opts rest TMPFILE
+	local fs mntpnt fstype opts rest
 
 	# Unset all MTAB_* variables
-	unset $(env | grep ^MTAB_ | sed 's,=.*,,')
+	# shellcheck disable=SC2046
+	unset $(env | sed -e '/^MTAB_/!d' -e 's,=.*,,')
 
 	while read -r fs mntpnt fstype opts rest; do
 		if echo "$fs $mntpnt $fstype $opts" | grep -qE "$match"; then
@@ -350,16 +359,15 @@
 			# * We need to use the external echo, because the
 			#   internal one would interpret the backslash code
 			#   (incorrectly), giving us a  instead.
-			mntpnt=$(/bin/echo "$mntpnt" | sed "s,\\\0,\\\00,g")
-			fs=$(/bin/echo "$fs" | sed "s,\\\0,\\\00,")
+			mntpnt=$(/bin/echo "$mntpnt" | sed 's,\\0,\\00,g')
+			fs=$(/bin/echo "$fs" | sed 's,\\0,\\00,')
 
 			# Remove 'unwanted' characters.
-			mntpnt=$(printf '%b\n' "$mntpnt" | sed -e 's,/,,g' \
-			    -e 's,-,,g' -e 's,\.,,g' -e 's, ,,g')
-			fs=$(printf '%b\n' "$fs")
+			mntpnt=$(printf '%b' "$mntpnt" | tr -d '/. -')
+			fs=$(printf '%b' "$fs")
 
 			# Set the variable.
-			eval export MTAB_$mntpnt=\"$fs\"
+			eval export "MTAB_$mntpnt=\"$fs\""
 		fi
 	done < /proc/self/mounts
 }
@@ -368,11 +376,10 @@
 {
 	local mntpnt="$1"
 	# Remove 'unwanted' characters.
-	mntpnt=$(printf '%b\n' "$mntpnt" | sed -e 's,/,,g' \
-	    -e 's,-,,g' -e 's,\.,,g' -e 's, ,,g')   
+	mntpnt=$(printf '%b' "$mntpnt" | tr -d '/. -')
 	local var
 
-	var="$(eval echo MTAB_$mntpnt)"
+	var="$(eval echo "MTAB_$mntpnt")"
 	[ "$(eval echo "$""$var")" != "" ]
 	return "$?"
 }
@@ -381,21 +388,22 @@
 read_fstab()
 {
 	local match="$1"
-	local i var TMPFILE
+	local i var
 
 	# Unset all FSTAB_* variables
-	unset $(env | grep ^FSTAB_ | sed 's,=.*,,')
+	# shellcheck disable=SC2046
+	unset $(env | sed -e '/^FSTAB_/!d' -e 's,=.*,,')
 
 	i=0
 	while read -r fs mntpnt fstype opts; do
-		echo "$fs" | egrep -qE '^#|^$' && continue
-		echo "$mntpnt" | egrep -qE '^none|^swap' && continue
-		echo "$fstype" | egrep -qE '^swap' && continue
+		echo "$fs" | grep -qE '^#|^$' && continue
+		echo "$mntpnt" | grep -qE '^none|^swap' && continue
+		echo "$fstype" | grep -qE '^swap' && continue
 
 		if echo "$fs $mntpnt $fstype $opts" | grep -qE "$match"; then
-			eval export FSTAB_dev_$i="$fs"
-			fs=$(printf '%b\n' "$fs" | sed 's,/,_,g')
-			eval export FSTAB_$i="$mntpnt"
+			eval export "FSTAB_dev_$i=$fs"
+			fs=$(printf '%b' "$fs" | tr '/' '_')
+			eval export "FSTAB_$i=$mntpnt"
 
 			i=$((i + 1))
 		fi
@@ -406,7 +414,7 @@
 {
 	local var
 
-	var="$(eval echo FSTAB_$1)"
+	var="$(eval echo "FSTAB_$1")"
 	[ "${var}" != "" ]
 	return $?
 }
@@ -414,19 +422,11 @@
 is_mounted()
 {
 	local mntpt="$1"
-	local line
+	local mp
 
-	mount | \
-	    while read line; do
-		if echo "$line" | grep -q " on $mntpt "; then
-		    # returns:
-		    #   0 on unsuccessful match
-		    #   1 on a successful match
-		    return 1
-		fi
-	    done
+	while read -r _ mp _; do
+		[ "$mp" = "$mntpt" ] && return 0
+	done < /proc/self/mounts
 
-	# The negation will flip the subshell return result where the default
-	# return value is 0 when a match is not found.
-	return $(( !$? ))
+	return 1
 }

diff --git a/zfs/include/Makefile.am b/zfs/include/Makefile.am
index bac47d9..17286ec 100644
--- a/zfs/include/Makefile.am
+++ b/zfs/include/Makefile.am

@@ -1,25 +1,25 @@
-SUBDIRS = linux spl sys
+SUBDIRS = sys os
 
 COMMON_H = \
-	$(top_srcdir)/include/zfeature_common.h \
-	$(top_srcdir)/include/zfs_comutil.h \
-	$(top_srcdir)/include/zfs_deleg.h \
-	$(top_srcdir)/include/zfs_fletcher.h \
-	$(top_srcdir)/include/zfs_namecheck.h \
-	$(top_srcdir)/include/zfs_prop.h
+	cityhash.h \
+	zfeature_common.h \
+	zfs_comutil.h \
+	zfs_deleg.h \
+	zfs_fletcher.h \
+	zfs_namecheck.h \
+	zfs_prop.h
 
 USER_H = \
-	$(top_srcdir)/include/libnvpair.h \
-	$(top_srcdir)/include/libuutil_common.h \
-	$(top_srcdir)/include/libuutil.h \
-	$(top_srcdir)/include/libuutil_impl.h \
-	$(top_srcdir)/include/libzfs.h \
-	$(top_srcdir)/include/libzfs_core.h \
-	$(top_srcdir)/include/libzfs_impl.h \
-	$(top_srcdir)/include/libzutil.h \
-	$(top_srcdir)/include/thread_pool.h
-
-EXTRA_DIST = $(COMMON_H) $(USER_H)
+	libnvpair.h \
+	libuutil_common.h \
+	libuutil.h \
+	libuutil_impl.h \
+	libzfs.h \
+	libzfsbootenv.h \
+	libzfs_core.h \
+	libzfs_impl.h \
+	libzutil.h \
+	thread_pool.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs
@@ -27,6 +27,8 @@
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include
 kernel_HEADERS = $(COMMON_H)
 endif
+endif

diff --git a/zfs/include/sys/cityhash.h b/zfs/include/cityhash.h
similarity index 100%
rename from zfs/include/sys/cityhash.h
rename to zfs/include/cityhash.h


diff --git a/zfs/include/libzfs.h b/zfs/include/libzfs.h
index cfcaf2a..f747a30 100644
--- a/zfs/include/libzfs.h
+++ b/zfs/include/libzfs.h

@@ -21,13 +21,14 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright Joyent, Inc.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2016, Intel Corporation.
  * Copyright 2016 Nexenta Systems, Inc.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 #ifndef	_LIBZFS_H
@@ -79,7 +80,7 @@
 	EZFS_NODEVICE,		/* no such device in pool */
 	EZFS_BADDEV,		/* invalid device to add */
 	EZFS_NOREPLICAS,	/* no valid replicas */
-	EZFS_RESILVERING,	/* currently resilvering */
+	EZFS_RESILVERING,	/* resilvering (healing reconstruction) */
 	EZFS_BADVERSION,	/* unsupported version */
 	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
 	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
@@ -88,8 +89,8 @@
 	EZFS_ZONED,		/* used improperly in local zone */
 	EZFS_MOUNTFAILED,	/* failed to mount dataset */
 	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
-	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
-	EZFS_SHARENFSFAILED,	/* share(1M) failed */
+	EZFS_UNSHARENFSFAILED,	/* failed to unshare over nfs */
+	EZFS_SHARENFSFAILED,	/* failed to share over nfs */
 	EZFS_PERM,		/* permission denied */
 	EZFS_NOSPC,		/* out of space */
 	EZFS_FAULT,		/* bad address */
@@ -148,6 +149,8 @@
 	EZFS_TRIM_NOTSUP,	/* device does not support trim */
 	EZFS_NO_RESILVER_DEFER,	/* pool doesn't support resilver_defer */
 	EZFS_EXPORT_IN_PROGRESS,	/* currently exporting the pool */
+	EZFS_REBUILDING,	/* resilvering (sequential reconstrution) */
+	EZFS_CKSUM,		/* insufficient replicas */
 	EZFS_UNKNOWN
 } zfs_error_t;
 
@@ -194,6 +197,10 @@
 typedef struct zpool_handle zpool_handle_t;
 typedef struct libzfs_handle libzfs_handle_t;
 
+extern int zpool_wait(zpool_handle_t *, zpool_wait_activity_t);
+extern int zpool_wait_status(zpool_handle_t *, zpool_wait_activity_t,
+    boolean_t *, boolean_t *);
+
 /*
  * Library initialization
  */
@@ -251,10 +258,10 @@
 
 typedef struct splitflags {
 	/* do not split, but return the config that would be split off */
-	int dryrun : 1;
+	unsigned int dryrun : 1;
 
 	/* after splitting, import the pool */
-	int import : 1;
+	unsigned int import : 1;
 	int name_flags;
 } splitflags_t;
 
@@ -265,6 +272,9 @@
 	/* request a secure trim, requires support from device */
 	boolean_t secure;
 
+	/* after starting trim, block until trim completes */
+	boolean_t wait;
+
 	/* trim at the requested rate in bytes/second */
 	uint64_t rate;
 } trimflags_t;
@@ -275,6 +285,8 @@
 extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
 extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
     nvlist_t *);
+extern int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t,
+    nvlist_t *);
 extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *,
     trimflags_t *);
 
@@ -288,13 +300,14 @@
     vdev_state_t *);
 extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
 extern int zpool_vdev_attach(zpool_handle_t *, const char *,
-    const char *, nvlist_t *, int);
+    const char *, nvlist_t *, int, boolean_t);
 extern int zpool_vdev_detach(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove_cancel(zpool_handle_t *);
 extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *);
 extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
     splitflags_t);
+_LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *);
 
 extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
 extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
@@ -304,7 +317,7 @@
     boolean_t *, boolean_t *);
 extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
     boolean_t *, boolean_t *, boolean_t *);
-extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
+extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *);
 extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path);
 
 const char *zpool_get_state_str(zpool_handle_t *);
@@ -317,6 +330,7 @@
     size_t proplen, zprop_source_t *, boolean_t literal);
 extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
     zprop_source_t *);
+extern int zpool_props_refresh(zpool_handle_t *);
 
 extern const char *zpool_prop_to_name(zpool_prop_t);
 extern const char *zpool_prop_values(zpool_prop_t);
@@ -377,6 +391,11 @@
 	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
 	ZPOOL_STATUS_OFFLINE_DEV,	/* device offline */
 	ZPOOL_STATUS_REMOVED_DEV,	/* removed device */
+	ZPOOL_STATUS_REBUILDING,	/* device being rebuilt */
+	ZPOOL_STATUS_REBUILD_SCRUB,	/* recommend scrubbing the pool */
+	ZPOOL_STATUS_NON_NATIVE_ASHIFT,	/* (e.g. 512e dev with ashift of 9) */
+	ZPOOL_STATUS_COMPATIBILITY_ERR,	/* bad 'compatibility' property */
+	ZPOOL_STATUS_INCOMPATIBLE_FEAT,	/* feature set outside compatibility */
 
 	/*
 	 * Finally, the following indicates a healthy pool.
@@ -431,14 +450,17 @@
     int);
 extern int zpool_events_clear(libzfs_handle_t *, int *);
 extern int zpool_events_seek(libzfs_handle_t *, uint64_t, int);
+extern void zpool_obj_to_path_ds(zpool_handle_t *, uint64_t, uint64_t, char *,
+    size_t);
 extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
-    size_t len);
+    size_t);
 extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
 extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
 extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
     nvlist_t *);
 extern int zpool_checkpoint(zpool_handle_t *);
 extern int zpool_discard_checkpoint(zpool_handle_t *);
+extern boolean_t zpool_is_draid_spare(const char *);
 
 /*
  * Basic handle manipulations.  These functions do not create or destroy the
@@ -497,6 +519,9 @@
 extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
 extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
 
+extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t,
+    boolean_t *, boolean_t *);
+
 /*
  * zfs encryption management
  */
@@ -537,7 +562,7 @@
 /*
  * zpool property management
  */
-extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
+extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **, boolean_t);
 extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *,
     size_t);
 extern const char *zpool_prop_default_string(zpool_prop_t);
@@ -623,23 +648,38 @@
 extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
     nvlist_t *props);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
-extern int zfs_rename(zfs_handle_t *, const char *, boolean_t, boolean_t);
+
+typedef struct renameflags {
+	/* recursive rename */
+	unsigned int recursive : 1;
+
+	/* don't unmount file systems */
+	unsigned int nounmount : 1;
+
+	/* force unmount file systems */
+	unsigned int forceunmount : 1;
+} renameflags_t;
+
+extern int zfs_rename(zfs_handle_t *, const char *, renameflags_t);
 
 typedef struct sendflags {
-	/* print informational messages (ie, -v was specified) */
-	boolean_t verbose;
+	/* Amount of extra information to print. */
+	int verbosity;
 
 	/* recursive send  (ie, -R) */
 	boolean_t replicate;
 
+	/* for recursive send, skip sending missing snapshots */
+	boolean_t skipmissing;
+
 	/* for incrementals, do all intermediate snapshots */
 	boolean_t doall;
 
 	/* if dataset is a clone, do incremental from its origin */
 	boolean_t fromorigin;
 
-	/* do deduplication */
-	boolean_t dedup;
+	/* field no longer used, maintained for backwards compatibility */
+	boolean_t pad;
 
 	/* send properties (ie, -p) */
 	boolean_t props;
@@ -653,6 +693,9 @@
 	/* show progress (ie. -v) */
 	boolean_t progress;
 
+	/* show progress as process title (ie. -V) */
+	boolean_t progressastitle;
+
 	/* large blocks (>128K) are permitted */
 	boolean_t largeblock;
 
@@ -662,7 +705,7 @@
 	/* compressed WRITE records are permitted */
 	boolean_t compress;
 
-	/* produce block-level zvol diff stream (no data) */
+    /* produce block-level zvol diff stream (no data) */
 	boolean_t block_diff;
 
 	/* raw encrypted records are permitted */
@@ -673,15 +716,21 @@
 
 	/* include snapshot holds in send stream */
 	boolean_t holds;
+
+	/* stream represents a partially received dataset */
+	boolean_t saved;
 } sendflags_t;
 
 typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
 
 extern int zfs_send(zfs_handle_t *, const char *, const char *,
     sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
-extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t flags);
+extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t *,
+    const char *);
+extern int zfs_send_progress(zfs_handle_t *, int, uint64_t *, uint64_t *);
 extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd,
     const char *);
+extern int zfs_send_saved(zfs_handle_t *, sendflags_t *, int, const char *);
 extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl,
     const char *token);
 
@@ -742,15 +791,22 @@
 
 	/* skip receive of snapshot holds */
 	boolean_t skipholds;
+
+	/* mount the filesystem unless nomount is specified */
+	boolean_t domount;
+
+	/* force unmount while recv snapshot (private) */
+	boolean_t forceunmount;
 } recvflags_t;
 
 extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
     recvflags_t *, int, avl_tree_t *);
 
 typedef enum diff_flags {
-	ZFS_DIFF_PARSEABLE = 0x1,
-	ZFS_DIFF_TIMESTAMP = 0x2,
-	ZFS_DIFF_CLASSIFY = 0x4
+	ZFS_DIFF_PARSEABLE = 1 << 0,
+	ZFS_DIFF_TIMESTAMP = 1 << 1,
+	ZFS_DIFF_CLASSIFY = 1 << 2,
+	ZFS_DIFF_NO_MANGLE = 1 << 3
 } diff_flags_t;
 
 extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *,
@@ -762,7 +818,8 @@
 extern const char *zfs_type_to_name(zfs_type_t);
 extern void zfs_refresh_properties(zfs_handle_t *);
 extern int zfs_name_valid(const char *, zfs_type_t);
-extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, const char *,
+    zfs_type_t);
 extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
 extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
     zfs_type_t);
@@ -775,9 +832,17 @@
 extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
 extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
 extern int zfs_mount(zfs_handle_t *, const char *, int);
+extern int zfs_mount_at(zfs_handle_t *, const char *, int, const char *);
 extern int zfs_unmount(zfs_handle_t *, const char *, int);
 extern int zfs_unmountall(zfs_handle_t *, int);
 
+#if defined(__linux__)
+extern int zfs_parse_mount_options(char *mntopts, unsigned long *mntflags,
+    unsigned long *zfsflags, int sloppy, char *badopt, char *mtabopt);
+extern void zfs_adjust_mount_options(zfs_handle_t *zhp, const char *mntpoint,
+    char *mntopts, char *mtabopt);
+#endif
+
 /*
  * Share support functions.
  */
@@ -802,6 +867,10 @@
 extern int zfs_unshareall(zfs_handle_t *);
 extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
     void *, void *, int, zfs_share_op_t);
+extern void zfs_commit_nfs_shares(void);
+extern void zfs_commit_smb_shares(void);
+extern void zfs_commit_all_shares(void);
+extern void zfs_commit_shares(const char *);
 
 extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
 
@@ -812,15 +881,15 @@
 #define	STDERR_VERBOSE	0x02
 #define	NO_DEFAULT_PATH	0x04 /* Don't use $PATH to lookup the command */
 
-int libzfs_run_process(const char *, char **, int flags);
-int libzfs_run_process_get_stdout(const char *path, char *argv[], char *env[],
-    char **lines[], int *lines_cnt);
-int libzfs_run_process_get_stdout_nopath(const char *path, char *argv[],
-    char *env[], char **lines[], int *lines_cnt);
+int libzfs_run_process(const char *, char **, int);
+int libzfs_run_process_get_stdout(const char *, char *[], char *[],
+    char **[], int *);
+int libzfs_run_process_get_stdout_nopath(const char *, char *[], char *[],
+    char **[], int *);
 
-void libzfs_free_str_array(char **strs, int count);
+void libzfs_free_str_array(char **, int);
 
-int libzfs_envvar_is_set(char *envvar);
+int libzfs_envvar_is_set(char *);
 
 /*
  * Utility functions for zfs version
@@ -839,6 +908,8 @@
  * Label manipulation.
  */
 extern int zpool_clear_label(int);
+extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *);
+extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **);
 
 /*
  * Management interfaces for SMB ACL files
@@ -856,7 +927,33 @@
 extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
 extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
 
-extern int zfs_remap_indirects(libzfs_handle_t *hdl, const char *);
+/*
+ * Parse a features file for -o compatibility
+ */
+typedef enum {
+	ZPOOL_COMPATIBILITY_OK,
+	ZPOOL_COMPATIBILITY_WARNTOKEN,
+	ZPOOL_COMPATIBILITY_BADTOKEN,
+	ZPOOL_COMPATIBILITY_BADFILE,
+	ZPOOL_COMPATIBILITY_NOFILES
+} zpool_compat_status_t;
+
+extern zpool_compat_status_t zpool_load_compat(const char *,
+    boolean_t *, char *, size_t);
+
+#ifdef __FreeBSD__
+
+/*
+ * Attach/detach the given filesystem to/from the given jail.
+ */
+extern int zfs_jail(zfs_handle_t *zhp, int jailid, int attach);
+
+/*
+ * Set loader options for next boot.
+ */
+extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *);
+
+#endif /* __FreeBSD__ */
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/libzfs_core.h b/zfs/include/libzfs_core.h
index 74a64d1..05757de 100644
--- a/zfs/include/libzfs_core.h
+++ b/zfs/include/libzfs_core.h

@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2017 Datto Inc.
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
@@ -49,7 +49,6 @@
 	LZC_DATSET_TYPE_ZVOL
 };
 
-int lzc_remap(const char *fsname);
 int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
 int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *,
     uint_t);
@@ -58,6 +57,7 @@
 int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
 int lzc_bookmark(nvlist_t *, nvlist_t **);
 int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
+int lzc_get_bookmark_props(const char *, nvlist_t **);
 int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
 int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
 int lzc_unload_key(const char *);
@@ -66,6 +66,7 @@
     nvlist_t **);
 int lzc_trim(const char *, pool_trim_func_t, uint64_t, boolean_t,
     nvlist_t *, nvlist_t **);
+int lzc_redact(const char *, const char *, nvlist_t *);
 
 int lzc_snaprange_space(const char *, const char *, uint64_t *);
 
@@ -78,6 +79,8 @@
 	LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
 	LZC_SEND_FLAG_COMPRESS = 1 << 2,
 	LZC_SEND_FLAG_RAW = 1 << 3,
+	LZC_SEND_FLAG_SAVED = 1 << 4,
+	LZC_SEND_FLAG_BLOCKDIFF = 1 << 7,
 };
 
 int lzc_send(const char *, const char *, int, enum lzc_send_flags);
@@ -87,6 +90,10 @@
 
 struct dmu_replay_record;
 
+int lzc_send_redacted(const char *, const char *, int, enum lzc_send_flags,
+    const char *);
+int lzc_send_resume_redacted(const char *, const char *, int,
+    enum lzc_send_flags, uint64_t, uint64_t, const char *);
 int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
     int);
 int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
@@ -100,6 +107,11 @@
     uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, int,
     const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
     uint64_t *, nvlist_t **);
+int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
+int lzc_send_space_resume_redacted(const char *, const char *,
+    enum lzc_send_flags, uint64_t, uint64_t, uint64_t, const char *,
+    int, uint64_t *);
+uint64_t lzc_send_progress(int);
 
 boolean_t lzc_exists(const char *);
 
@@ -120,6 +132,12 @@
 int lzc_pool_checkpoint(const char *);
 int lzc_pool_checkpoint_discard(const char *);
 
+int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
+int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);
+int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
+
+int lzc_set_bootenv(const char *, const nvlist_t *);
+int lzc_get_bootenv(const char *, nvlist_t **);
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/libzfs_impl.h b/zfs/include/libzfs_impl.h
index d561498..043ff9c 100644
--- a/zfs/include/libzfs_impl.h
+++ b/zfs/include/libzfs_impl.h

@@ -1,5 +1,5 @@
 /*
- * CDDL HEADER SART
+ * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
@@ -21,8 +21,9 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2018 Datto Inc.
+ * Copyright 2020 Joyent, Inc.
  */
 
 #ifndef	_LIBZFS_IMPL_H
@@ -33,6 +34,7 @@
 #include <sys/nvpair.h>
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
+#include <regex.h>
 
 #include <libuutil.h>
 #include <libzfs.h>
@@ -47,7 +49,6 @@
 	int libzfs_error;
 	int libzfs_fd;
 	FILE *libzfs_mnttab;
-	FILE *libzfs_sharetab;
 	zpool_handle_t *libzfs_pool_handles;
 	uu_avl_pool_t *libzfs_ns_avlpool;
 	uu_avl_t *libzfs_ns_avl;
@@ -57,8 +58,6 @@
 	char libzfs_desc[1024];
 	int libzfs_printerr;
 	int libzfs_storeerr; /* stuff error messages into buffer */
-	void *libzfs_sharehdl; /* libshare handle */
-	uint_t libzfs_shareflags;
 	boolean_t libzfs_mnttab_enable;
 	/*
 	 * We need a lock to handle the case where parallel mount
@@ -71,11 +70,12 @@
 	int libzfs_pool_iter;
 	char libzfs_chassis_id[256];
 	boolean_t libzfs_prop_debug;
-	boolean_t libzfs_dedup_warning_printed;
+	regex_t libzfs_urire;
+	uint64_t libzfs_max_nvlist;
+	void *libfetch;
+	char *libfetch_load_error;
 };
 
-#define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */
-
 struct zfs_handle {
 	libzfs_handle_t *zfs_hdl;
 	zpool_handle_t *zpool_hdl;
@@ -125,6 +125,14 @@
 	SHARED_SMB = 0x4
 } zfs_share_type_t;
 
+typedef int (*zfs_uri_handler_fn_t)(struct libzfs_handle *, const char *,
+    const char *, zfs_keyformat_t, boolean_t, uint8_t **, size_t *);
+
+typedef struct zfs_uri_handler {
+	const char *zuh_scheme;
+	zfs_uri_handler_fn_t zuh_handler;
+} zfs_uri_handler_t;
+
 #define	CONFIG_BUF_MINSIZE	262144
 
 int zfs_error(libzfs_handle_t *, int, const char *);
@@ -138,6 +146,7 @@
 
 int zfs_standard_error(libzfs_handle_t *, int, const char *);
 int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
 int zpool_standard_error(libzfs_handle_t *, int, const char *);
 int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
 
@@ -159,6 +168,10 @@
  * changelist_gather() flag to force it to iterate on mounted datasets only
  */
 #define	CL_GATHER_ITER_MOUNTED	2
+/*
+ * Use this changelist_gather() flag to prevent unmounting of file systems.
+ */
+#define	CL_GATHER_DONT_UNMOUNT	4
 
 typedef struct prop_changelist prop_changelist_t;
 
@@ -195,17 +208,60 @@
 
 void namespace_clear(libzfs_handle_t *);
 
-/*
- * libshare (sharemgr) interfaces used internally.
- */
-
-extern int zfs_init_libshare(libzfs_handle_t *, int);
-extern void zfs_uninit_libshare(libzfs_handle_t *);
 extern int zfs_parse_options(char *, zfs_share_proto_t);
 
 extern int zfs_unshare_proto(zfs_handle_t *,
     const char *, zfs_share_proto_t *);
 
+typedef struct {
+	zfs_prop_t p_prop;
+	char *p_name;
+	int p_share_err;
+	int p_unshare_err;
+} proto_table_t;
+
+typedef struct differ_info {
+	zfs_handle_t *zhp;
+	char *fromsnap;
+	char *frommnt;
+	char *tosnap;
+	char *tomnt;
+	char *ds;
+	char *dsmnt;
+	char *tmpsnap;
+	char errbuf[1024];
+	boolean_t isclone;
+	boolean_t scripted;
+	boolean_t classify;
+	boolean_t timestamped;
+	boolean_t no_mangle;
+	uint64_t shares;
+	int zerr;
+	int cleanupfd;
+	int outputfd;
+	int datafd;
+} differ_info_t;
+
+extern proto_table_t proto_table[PROTO_END];
+
+extern int do_mount(zfs_handle_t *zhp, const char *mntpt, char *opts,
+    int flags);
+extern int do_unmount(const char *mntpt, int flags);
+extern int zfs_mount_delegation_check(void);
+extern int zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto);
+extern int unshare_one(libzfs_handle_t *hdl, const char *name,
+    const char *mountpoint, zfs_share_proto_t proto);
+extern boolean_t zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
+    zprop_source_t *source, int flags);
+extern zfs_share_type_t is_shared(const char *mountpoint,
+    zfs_share_proto_t proto);
+extern int libzfs_load_module(void);
+extern int zpool_relabel_disk(libzfs_handle_t *hdl, const char *path,
+    const char *msg);
+extern int find_shares_object(differ_info_t *di);
+extern void libzfs_set_pipe_max(int infd);
+extern void zfs_commit_proto(zfs_share_proto_t *);
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/libzfsbootenv.h b/zfs/include/libzfsbootenv.h
new file mode 100644
index 0000000..b078b60
--- /dev/null
+++ b/zfs/include/libzfsbootenv.h

@@ -0,0 +1,41 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _LIBZFSBOOTENV_H
+#define	_LIBZFSBOOTENV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum lzbe_flags {
+	lzbe_add,	/* add data to existing nvlist */
+	lzbe_replace	/* replace current nvlist */
+} lzbe_flags_t;
+
+extern int lzbe_nvlist_get(const char *, const char *, void **);
+extern int lzbe_nvlist_set(const char *, const char *, void *);
+extern void lzbe_nvlist_free(void *);
+extern int lzbe_add_pair(void *, const char *, const char *, void *, size_t);
+extern int lzbe_remove_pair(void *, const char *);
+extern int lzbe_set_boot_device(const char *, lzbe_flags_t, const char *);
+extern int lzbe_get_boot_device(const char *, char **);
+extern int lzbe_bootenv_print(const char *, const char *, FILE *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBZFSBOOTENV_H */

diff --git a/zfs/include/libzutil.h b/zfs/include/libzutil.h
index 69d1e6b..15024a4 100644
--- a/zfs/include/libzutil.h
+++ b/zfs/include/libzutil.h

@@ -79,15 +79,10 @@
 extern int zpool_read_label(int, nvlist_t **, int *);
 extern int zpool_label_disk_wait(const char *, int);
 
-#ifdef HAVE_LIBUDEV
 struct udev_device;
 
 extern int zfs_device_get_devid(struct udev_device *, char *, size_t);
 extern int zfs_device_get_physical(struct udev_device *, char *, size_t);
-#else
-#define	zfs_device_get_devid(dev, bufptr, buflen)	(ENODATA)
-#define	zfs_device_get_physical(dev, bufptr, buflen)	(ENODATA)
-#endif
 
 extern void update_vdev_config_dev_strs(nvlist_t *);
 
@@ -102,20 +97,19 @@
 extern int zfs_resolve_shortname(const char *name, char *path, size_t pathlen);
 
 extern char *zfs_strip_partition(char *);
-extern char *zfs_strip_partition_path(char *);
+extern char *zfs_strip_path(char *);
 
 extern int zfs_strcmp_pathname(const char *, const char *, int);
 
-extern int zfs_dev_is_dm(const char *);
-extern int zfs_dev_is_whole_disk(const char *);
+extern boolean_t zfs_dev_is_dm(const char *);
+extern boolean_t zfs_dev_is_whole_disk(const char *);
+extern int zfs_dev_flush(int);
 extern char *zfs_get_underlying_path(const char *);
 extern char *zfs_get_enclosure_sysfs_path(const char *);
 
-#ifdef HAVE_LIBUDEV
 extern boolean_t is_mpath_whole_disk(const char *);
-#else
-#define	is_mpath_whole_disk(path) (B_FALSE)
-#endif
+
+extern boolean_t zfs_isnumber(const char *);
 
 /*
  * Formats for iostat numbers.  Examples: "12K", "30ms", "4B", "2321234", "-".
@@ -142,6 +136,7 @@
 extern void zfs_nicenum_format(uint64_t, char *, size_t,
     enum zfs_nicenum_format);
 extern void zfs_nicetime(uint64_t, char *, size_t);
+extern void zfs_niceraw(uint64_t, char *, size_t);
 
 #define	nicenum(num, buf, size)	zfs_nicenum(num, buf, size)
 
@@ -149,6 +144,49 @@
 extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***,
     uint_t *);
 
+struct zfs_cmd;
+int zfs_ioctl_fd(int fd, unsigned long request, struct zfs_cmd *zc);
+
+/*
+ * List of colors to use
+ */
+#define	ANSI_BLACK	"\033[0;30m"
+#define	ANSI_RED	"\033[0;31m"
+#define	ANSI_GREEN	"\033[0;32m"
+#define	ANSI_YELLOW	"\033[0;33m"
+#define	ANSI_BLUE	"\033[0;34m"
+#define	ANSI_BOLD_BLUE	"\033[1;34m" /* light blue */
+#define	ANSI_MAGENTA	"\033[0;35m"
+#define	ANSI_CYAN	"\033[0;36m"
+#define	ANSI_GRAY	"\033[0;37m"
+
+#define	ANSI_RESET	"\033[0m"
+#define	ANSI_BOLD	"\033[1m"
+
+int use_color(void);
+void color_start(const char *color);
+void color_end(void);
+int printf_color(const char *color, char *format, ...);
+
+#ifdef __linux__
+extern char **environ;
+_LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]);
+_LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...);
+#else
+#define	zfs_setproctitle(fmt, ...)	setproctitle(fmt, ##__VA_ARGS__)
+#define	zfs_setproctitle_init(x, y, z)	((void)0)
+#endif
+
+/*
+ * These functions are used by the ZFS libraries and cmd/zpool code, but are
+ * not exported in the ABI.
+ */
+typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *);
+int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
+    void *data);
+int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func,
+    void *data);
+void update_vdevs_config_dev_sysfs_path(nvlist_t *config);
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/linux/Makefile.am b/zfs/include/linux/Makefile.am
deleted file mode 100644
index 2455759..0000000
--- a/zfs/include/linux/Makefile.am
+++ /dev/null

@@ -1,29 +0,0 @@
-COMMON_H =
-
-KERNEL_H = \
-	$(top_srcdir)/include/linux/dcache_compat.h \
-	$(top_srcdir)/include/linux/xattr_compat.h \
-	$(top_srcdir)/include/linux/vfs_compat.h \
-	$(top_srcdir)/include/linux/blkdev_compat.h \
-	$(top_srcdir)/include/linux/utsname_compat.h \
-	$(top_srcdir)/include/linux/kmap_compat.h \
-	$(top_srcdir)/include/linux/simd.h \
-	$(top_srcdir)/include/linux/simd_x86.h \
-	$(top_srcdir)/include/linux/simd_aarch64.h \
-	$(top_srcdir)/include/linux/mod_compat.h \
-	$(top_srcdir)/include/linux/page_compat.h \
-	$(top_srcdir)/include/linux/compiler_compat.h
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
-
-if CONFIG_USER
-libzfsdir = $(includedir)/libzfs/linux
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
-endif
-
-if CONFIG_KERNEL
-kerneldir = @prefix@/src/zfs-$(VERSION)/include/linux
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
-endif

diff --git a/zfs/include/linux/blkdev_compat.h b/zfs/include/linux/blkdev_compat.h
deleted file mode 100644
index 9c8b9a2..0000000
--- a/zfs/include/linux/blkdev_compat.h
+++ /dev/null

@@ -1,724 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- * LLNL-CODE-403049.
- */
-
-#ifndef _ZFS_BLKDEV_H
-#define	_ZFS_BLKDEV_H
-
-#include <linux/blkdev.h>
-#include <linux/elevator.h>
-#include <linux/backing-dev.h>
-#include <linux/hdreg.h>
-#include <linux/msdos_fs.h>	/* for SECTOR_* */
-
-#ifndef HAVE_FMODE_T
-typedef unsigned __bitwise__ fmode_t;
-#endif /* HAVE_FMODE_T */
-
-#ifndef HAVE_BLK_QUEUE_FLAG_SET
-static inline void
-blk_queue_flag_set(unsigned int flag, struct request_queue *q)
-{
-	queue_flag_set(flag, q);
-}
-#endif
-
-#ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
-static inline void
-blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
-{
-	queue_flag_clear(flag, q);
-}
-#endif
-
-/*
- * 4.7 - 4.x API,
- * The blk_queue_write_cache() interface has replaced blk_queue_flush()
- * interface.  However, the new interface is GPL-only thus we implement
- * our own trivial wrapper when the GPL-only version is detected.
- *
- * 2.6.36 - 4.6 API,
- * The blk_queue_flush() interface has replaced blk_queue_ordered()
- * interface.  However, while the old interface was available to all the
- * new one is GPL-only.   Thus if the GPL-only version is detected we
- * implement our own trivial helper.
- *
- * 2.6.x - 2.6.35
- * Legacy blk_queue_ordered() interface.
- */
-static inline void
-blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
-{
-#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
-	if (wc)
-		blk_queue_flag_set(QUEUE_FLAG_WC, q);
-	else
-		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
-	if (fua)
-		blk_queue_flag_set(QUEUE_FLAG_FUA, q);
-	else
-		blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
-#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
-	blk_queue_write_cache(q, wc, fua);
-#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
-	if (wc)
-		q->flush_flags |= REQ_FLUSH;
-	if (fua)
-		q->flush_flags |= REQ_FUA;
-#elif defined(HAVE_BLK_QUEUE_FLUSH)
-	blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0));
-#else
-	blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);
-#endif
-}
-
-/*
- * Most of the blk_* macros were removed in 2.6.36.  Ostensibly this was
- * done to improve readability and allow easier grepping.  However, from
- * a portability stand point the macros are helpful.  Therefore the needed
- * macros are redefined here if they are missing from the kernel.
- */
-#ifndef blk_fs_request
-#define	blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
-#endif
-
-/*
- * 2.6.34 API change,
- * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
- */
-#ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS
-#define	blk_queue_max_hw_sectors __blk_queue_max_hw_sectors
-static inline void
-__blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
-{
-	blk_queue_max_sectors(q, max_hw_sectors);
-}
-#endif
-
-/*
- * 2.6.34 API change,
- * The blk_queue_max_segments() function consolidates
- * blk_queue_max_hw_segments() and blk_queue_max_phys_segments().
- */
-#ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS
-#define	blk_queue_max_segments __blk_queue_max_segments
-static inline void
-__blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
-{
-	blk_queue_max_phys_segments(q, max_segments);
-	blk_queue_max_hw_segments(q, max_segments);
-}
-#endif
-
-static inline void
-blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
-{
-#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
-	q->backing_dev_info->ra_pages = ra_pages;
-#else
-	q->backing_dev_info.ra_pages = ra_pages;
-#endif
-}
-
-#ifndef HAVE_GET_DISK_AND_MODULE
-static inline struct kobject *
-get_disk_and_module(struct gendisk *disk)
-{
-	return (get_disk(disk));
-}
-#endif
-
-#ifndef HAVE_GET_DISK_RO
-static inline int
-get_disk_ro(struct gendisk *disk)
-{
-	int policy = 0;
-
-	if (disk->part[0])
-		policy = disk->part[0]->policy;
-
-	return (policy);
-}
-#endif /* HAVE_GET_DISK_RO */
-
-#ifdef HAVE_BIO_BVEC_ITER
-#define	BIO_BI_SECTOR(bio)	(bio)->bi_iter.bi_sector
-#define	BIO_BI_SIZE(bio)	(bio)->bi_iter.bi_size
-#define	BIO_BI_IDX(bio)		(bio)->bi_iter.bi_idx
-#define	BIO_BI_SKIP(bio)	(bio)->bi_iter.bi_bvec_done
-#define	bio_for_each_segment4(bv, bvp, b, i)	\
-	bio_for_each_segment((bv), (b), (i))
-typedef struct bvec_iter bvec_iterator_t;
-#else
-#define	BIO_BI_SECTOR(bio)	(bio)->bi_sector
-#define	BIO_BI_SIZE(bio)	(bio)->bi_size
-#define	BIO_BI_IDX(bio)		(bio)->bi_idx
-#define	BIO_BI_SKIP(bio)	(0)
-#define	bio_for_each_segment4(bv, bvp, b, i)	\
-	bio_for_each_segment((bvp), (b), (i))
-typedef int bvec_iterator_t;
-#endif
-
-/*
- * Portable helper for correctly setting the FAILFAST flags.  The
- * correct usage has changed 3 times from 2.6.12 to 2.6.38.
- */
-static inline void
-bio_set_flags_failfast(struct block_device *bdev, int *flags)
-{
-#ifdef CONFIG_BUG
-	/*
-	 * Disable FAILFAST for loopback devices because of the
-	 * following incorrect BUG_ON() in loop_make_request().
-	 * This support is also disabled for md devices because the
-	 * test suite layers md devices on top of loopback devices.
-	 * This may be removed when the loopback driver is fixed.
-	 *
-	 *   BUG_ON(!lo || (rw != READ && rw != WRITE));
-	 */
-	if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) ||
-	    (MAJOR(bdev->bd_dev) == MD_MAJOR))
-		return;
-
-#ifdef BLOCK_EXT_MAJOR
-	if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
-		return;
-#endif /* BLOCK_EXT_MAJOR */
-#endif /* CONFIG_BUG */
-
-#if defined(HAVE_BIO_RW_FAILFAST_DTD)
-	/* BIO_RW_FAILFAST_* preferred interface from 2.6.28 - 2.6.35 */
-	*flags |= (
-	    (1 << BIO_RW_FAILFAST_DEV) |
-	    (1 << BIO_RW_FAILFAST_TRANSPORT) |
-	    (1 << BIO_RW_FAILFAST_DRIVER));
-#elif defined(HAVE_REQ_FAILFAST_MASK)
-	/*
-	 * REQ_FAILFAST_* preferred interface from 2.6.36 - 2.6.xx,
-	 * the BIO_* and REQ_* flags were unified under REQ_* flags.
-	 */
-	*flags |= REQ_FAILFAST_MASK;
-#else
-#error "Undefined block IO FAILFAST interface."
-#endif
-}
-
-/*
- * Maximum disk label length, it may be undefined for some kernels.
- */
-#ifndef DISK_NAME_LEN
-#define	DISK_NAME_LEN	32
-#endif /* DISK_NAME_LEN */
-
-#ifdef HAVE_BIO_BI_STATUS
-static inline int
-bi_status_to_errno(blk_status_t status)
-{
-	switch (status)	{
-	case BLK_STS_OK:
-		return (0);
-	case BLK_STS_NOTSUPP:
-		return (EOPNOTSUPP);
-	case BLK_STS_TIMEOUT:
-		return (ETIMEDOUT);
-	case BLK_STS_NOSPC:
-		return (ENOSPC);
-	case BLK_STS_TRANSPORT:
-		return (ENOLINK);
-	case BLK_STS_TARGET:
-		return (EREMOTEIO);
-	case BLK_STS_NEXUS:
-		return (EBADE);
-	case BLK_STS_MEDIUM:
-		return (ENODATA);
-	case BLK_STS_PROTECTION:
-		return (EILSEQ);
-	case BLK_STS_RESOURCE:
-		return (ENOMEM);
-	case BLK_STS_AGAIN:
-		return (EAGAIN);
-	case BLK_STS_IOERR:
-		return (EIO);
-	default:
-		return (EIO);
-	}
-}
-
-static inline blk_status_t
-errno_to_bi_status(int error)
-{
-	switch (error) {
-	case 0:
-		return (BLK_STS_OK);
-	case EOPNOTSUPP:
-		return (BLK_STS_NOTSUPP);
-	case ETIMEDOUT:
-		return (BLK_STS_TIMEOUT);
-	case ENOSPC:
-		return (BLK_STS_NOSPC);
-	case ENOLINK:
-		return (BLK_STS_TRANSPORT);
-	case EREMOTEIO:
-		return (BLK_STS_TARGET);
-	case EBADE:
-		return (BLK_STS_NEXUS);
-	case ENODATA:
-		return (BLK_STS_MEDIUM);
-	case EILSEQ:
-		return (BLK_STS_PROTECTION);
-	case ENOMEM:
-		return (BLK_STS_RESOURCE);
-	case EAGAIN:
-		return (BLK_STS_AGAIN);
-	case EIO:
-		return (BLK_STS_IOERR);
-	default:
-		return (BLK_STS_IOERR);
-	}
-}
-#endif /* HAVE_BIO_BI_STATUS */
-
-/*
- * 4.3 API change
- * The bio_endio() prototype changed slightly.  These are helper
- * macro's to ensure the prototype and invocation are handled.
- */
-#ifdef HAVE_1ARG_BIO_END_IO_T
-#ifdef HAVE_BIO_BI_STATUS
-#define	BIO_END_IO_ERROR(bio)		bi_status_to_errno(bio->bi_status)
-#define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
-#define	BIO_END_IO(bio, error)		bio_set_bi_status(bio, error)
-static inline void
-bio_set_bi_status(struct bio *bio, int error)
-{
-	ASSERT3S(error, <=, 0);
-	bio->bi_status = errno_to_bi_status(-error);
-	bio_endio(bio);
-}
-#else
-#define	BIO_END_IO_ERROR(bio)		(-(bio->bi_error))
-#define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
-#define	BIO_END_IO(bio, error)		bio_set_bi_error(bio, error)
-static inline void
-bio_set_bi_error(struct bio *bio, int error)
-{
-	ASSERT3S(error, <=, 0);
-	bio->bi_error = error;
-	bio_endio(bio);
-}
-#endif /* HAVE_BIO_BI_STATUS */
-
-#else
-#define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x, int z)
-#define	BIO_END_IO(bio, error)		bio_endio(bio, error);
-#endif /* HAVE_1ARG_BIO_END_IO_T */
-
-/*
- * 2.6.38 - 2.6.x API,
- *   blkdev_get_by_path()
- *   blkdev_put()
- *
- * 2.6.28 - 2.6.37 API,
- *   open_bdev_exclusive()
- *   close_bdev_exclusive()
- *
- * 2.6.12 - 2.6.27 API,
- *   open_bdev_excl()
- *   close_bdev_excl()
- *
- * Used to exclusively open a block device from within the kernel.
- */
-#if defined(HAVE_BLKDEV_GET_BY_PATH)
-#define	vdev_bdev_open(path, md, hld)	blkdev_get_by_path(path, \
-					    (md) | FMODE_EXCL, hld)
-#define	vdev_bdev_close(bdev, md)	blkdev_put(bdev, (md) | FMODE_EXCL)
-#elif defined(HAVE_OPEN_BDEV_EXCLUSIVE)
-#define	vdev_bdev_open(path, md, hld)	open_bdev_exclusive(path, md, hld)
-#define	vdev_bdev_close(bdev, md)	close_bdev_exclusive(bdev, md)
-#else
-#define	vdev_bdev_open(path, md, hld)	open_bdev_excl(path, md, hld)
-#define	vdev_bdev_close(bdev, md)	close_bdev_excl(bdev)
-#endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */
-
-/*
- * 4.1 - x.y.z API,
- * 3.10.0 CentOS 7.x API,
- *   blkdev_reread_part()
- *
- * For older kernels trigger a re-reading of the partition table by calling
- * check_disk_change() which calls flush_disk() to invalidate the device.
- *
- * For newer kernels (as of 5.10), bdev_check_media_chage is used, in favor of
- * check_disk_change(), with the modification that invalidation is no longer
- * forced.
- */
-#ifdef HAVE_CHECK_DISK_CHANGE
-#define	zfs_check_media_change(bdev)	check_disk_change(bdev)
-#ifdef HAVE_BLKDEV_REREAD_PART
-#define	vdev_bdev_reread_part(bdev)	blkdev_reread_part(bdev)
-#else
-#define	vdev_bdev_reread_part(bdev)	check_disk_change(bdev)
-#endif /* HAVE_BLKDEV_REREAD_PART */
-#else
-#ifdef HAVE_BDEV_CHECK_MEDIA_CHANGE
-static inline int
-zfs_check_media_change(struct block_device *bdev)
-{
-	struct gendisk *gd = bdev->bd_disk;
-	const struct block_device_operations *bdo = gd->fops;
-
-	if (!bdev_check_media_change(bdev))
-		return (0);
-
-	/*
-	 * Force revalidation, to mimic the old behavior of
-	 * check_disk_change()
-	 */
-	if (bdo->revalidate_disk)
-		bdo->revalidate_disk(gd);
-
-	return (0);
-}
-#define	vdev_bdev_reread_part(bdev)	zfs_check_media_change(bdev)
-#else
-/*
- * This is encountered if check_disk_change() and bdev_check_media_change()
- * are not available in the kernel - likely due to an API change that needs
- * to be chased down.
- */
-#error "Unsupported kernel: no usable disk change check"
-#endif /* HAVE_BDEV_CHECK_MEDIA_CHANGE */
-#endif /* HAVE_CHECK_DISK_CHANGE */
-
-/*
- * 2.6.22 API change
- * The function invalidate_bdev() lost it's second argument because
- * it was unused.
- */
-#ifdef HAVE_1ARG_INVALIDATE_BDEV
-#define	vdev_bdev_invalidate(bdev)	invalidate_bdev(bdev)
-#else
-#define	vdev_bdev_invalidate(bdev)	invalidate_bdev(bdev, 1)
-#endif /* HAVE_1ARG_INVALIDATE_BDEV */
-
-/*
- * 2.6.27 API change
- * The function was exported for use, prior to this it existed but the
- * symbol was not exported.
- *
- * 4.4.0-6.21 API change for Ubuntu
- * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
- */
-#ifdef HAVE_1ARG_LOOKUP_BDEV
-#define	vdev_lookup_bdev(path)	lookup_bdev(path)
-#else
-#ifdef HAVE_2ARGS_LOOKUP_BDEV
-#define	vdev_lookup_bdev(path)	lookup_bdev(path, 0)
-#else
-#define	vdev_lookup_bdev(path)	ERR_PTR(-ENOTSUP)
-#endif /* HAVE_2ARGS_LOOKUP_BDEV */
-#endif /* HAVE_1ARG_LOOKUP_BDEV */
-
-/*
- * 2.6.30 API change
- * To ensure good performance preferentially use the physical block size
- * for proper alignment.  The physical size is supposed to be the internal
- * sector size used by the device.  This is often 4096 byte for AF devices,
- * while a smaller 512 byte logical size is supported for compatibility.
- *
- * Unfortunately, many drives still misreport their physical sector size.
- * For devices which are known to lie you may need to manually set this
- * at pool creation time with 'zpool create -o ashift=12 ...'.
- *
- * When the physical block size interface isn't available, we fall back to
- * the logical block size interface and then the older hard sector size.
- */
-#ifdef HAVE_BDEV_PHYSICAL_BLOCK_SIZE
-#define	vdev_bdev_block_size(bdev)	bdev_physical_block_size(bdev)
-#else
-#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
-#define	vdev_bdev_block_size(bdev)	bdev_logical_block_size(bdev)
-#else
-#define	vdev_bdev_block_size(bdev)	bdev_hardsect_size(bdev)
-#endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */
-#endif /* HAVE_BDEV_PHYSICAL_BLOCK_SIZE */
-
-#ifndef HAVE_BIO_SET_OP_ATTRS
-/*
- * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
- */
-static inline void
-bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags)
-{
-	bio->bi_rw |= rw | flags;
-}
-#endif
-
-/*
- * bio_set_flush - Set the appropriate flags in a bio to guarantee
- * data are on non-volatile media on completion.
- *
- * 2.6.X - 2.6.36 API,
- *   WRITE_BARRIER - Tells the block layer to commit all previously submitted
- *   writes to stable storage before this one is started and that the current
- *   write is on stable storage upon completion.  Also prevents reordering
- *   on both sides of the current operation.
- *
- * 2.6.37 - 4.8 API,
- *   Introduce  WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
- *   replacement for WRITE_BARRIER to allow expressing richer semantics
- *   to the block layer.  It's up to the block layer to implement the
- *   semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
- *
- * 4.8 - 4.9 API,
- *   REQ_FLUSH was renamed to REQ_PREFLUSH.  For consistency with previous
- *   ZoL releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
- *
- * 4.10 API,
- *   The read/write flags and their modifiers, including WRITE_FLUSH,
- *   WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
- *   torvalds/linux@70fd7614 and replaced by direct flag modification
- *   of the REQ_ flags in bio->bi_opf.  Use REQ_PREFLUSH.
- */
-static inline void
-bio_set_flush(struct bio *bio)
-{
-#if defined(REQ_PREFLUSH)	/* >= 4.10 */
-	bio_set_op_attrs(bio, 0, REQ_PREFLUSH);
-#elif defined(WRITE_FLUSH_FUA)	/* >= 2.6.37 and <= 4.9 */
-	bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA);
-#elif defined(WRITE_BARRIER)	/* < 2.6.37 */
-	bio_set_op_attrs(bio, 0, WRITE_BARRIER);
-#else
-#error	"Allowing the build will cause bio_set_flush requests to be ignored."
-#endif
-}
-
-/*
- * 4.8 - 4.x API,
- *   REQ_OP_FLUSH
- *
- * 4.8-rc0 - 4.8-rc1,
- *   REQ_PREFLUSH
- *
- * 2.6.36 - 4.7 API,
- *   REQ_FLUSH
- *
- * 2.6.x - 2.6.35 API,
- *   HAVE_BIO_RW_BARRIER
- *
- * Used to determine if a cache flush has been requested.  This check has
- * been left intentionally broad in order to cover both a legacy flush
- * and the new preflush behavior introduced in Linux 4.8.  This is correct
- * in all cases but may have a performance impact for some kernels.  It
- * has the advantage of minimizing kernel specific changes in the zvol code.
- *
- */
-static inline boolean_t
-bio_is_flush(struct bio *bio)
-{
-#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
-	return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH));
-#elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
-	return (bio->bi_opf & REQ_PREFLUSH);
-#elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
-	return (bio->bi_rw & REQ_PREFLUSH);
-#elif defined(REQ_FLUSH)
-	return (bio->bi_rw & REQ_FLUSH);
-#elif defined(HAVE_BIO_RW_BARRIER)
-	return (bio->bi_rw & (1 << BIO_RW_BARRIER));
-#else
-#error	"Allowing the build will cause flush requests to be ignored."
-#endif
-}
-
-/*
- * 4.8 - 4.x API,
- *   REQ_FUA flag moved to bio->bi_opf
- *
- * 2.6.x - 4.7 API,
- *   REQ_FUA
- */
-static inline boolean_t
-bio_is_fua(struct bio *bio)
-{
-#if defined(HAVE_BIO_BI_OPF)
-	return (bio->bi_opf & REQ_FUA);
-#elif defined(REQ_FUA)
-	return (bio->bi_rw & REQ_FUA);
-#else
-#error	"Allowing the build will cause fua requests to be ignored."
-#endif
-}
-
-/*
- * 4.8 - 4.x API,
- *   REQ_OP_DISCARD
- *
- * 2.6.36 - 4.7 API,
- *   REQ_DISCARD
- *
- * 2.6.28 - 2.6.35 API,
- *   BIO_RW_DISCARD
- *
- * In all cases the normal I/O path is used for discards.  The only
- * difference is how the kernel tags individual I/Os as discards.
- *
- * Note that 2.6.32 era kernels provide both BIO_RW_DISCARD and REQ_DISCARD,
- * where BIO_RW_DISCARD is the correct interface.  Therefore, it is important
- * that the HAVE_BIO_RW_DISCARD check occur before the REQ_DISCARD check.
- */
-static inline boolean_t
-bio_is_discard(struct bio *bio)
-{
-#if defined(HAVE_REQ_OP_DISCARD)
-	return (bio_op(bio) == REQ_OP_DISCARD);
-#elif defined(HAVE_BIO_RW_DISCARD)
-	return (bio->bi_rw & (1 << BIO_RW_DISCARD));
-#elif defined(REQ_DISCARD)
-	return (bio->bi_rw & REQ_DISCARD);
-#else
-/* potentially triggering the DMU_MAX_ACCESS assertion.  */
-#error	"Allowing the build will cause discard requests to become writes."
-#endif
-}
-
-/*
- * 4.8 - 4.x API,
- *   REQ_OP_SECURE_ERASE
- *
- * 2.6.36 - 4.7 API,
- *   REQ_SECURE
- *
- * 2.6.x - 2.6.35 API,
- *   Unsupported by kernel
- */
-static inline boolean_t
-bio_is_secure_erase(struct bio *bio)
-{
-#if defined(HAVE_REQ_OP_SECURE_ERASE)
-	return (bio_op(bio) == REQ_OP_SECURE_ERASE);
-#elif defined(REQ_SECURE)
-	return (bio->bi_rw & REQ_SECURE);
-#else
-	return (0);
-#endif
-}
-
-/*
- * 2.6.33 API change
- * Discard granularity and alignment restrictions may now be set.  For
- * older kernels which do not support this it is safe to skip it.
- */
-#ifdef HAVE_DISCARD_GRANULARITY
-static inline void
-blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
-{
-	q->limits.discard_granularity = dg;
-}
-#else
-#define	blk_queue_discard_granularity(x, dg)	((void)0)
-#endif /* HAVE_DISCARD_GRANULARITY */
-
-/*
- * 2.6.32 - 4.x API,
- *   blk_queue_discard()
- */
-#if !defined(HAVE_BLK_QUEUE_DISCARD)
-#define	blk_queue_discard(q)			(0);
-#endif
-
-/*
- * 4.8 - 4.x API,
- *   blk_queue_secure_erase()
- *
- * 2.6.36 - 4.7 API,
- *   blk_queue_secdiscard()
- *
- * 2.6.x - 2.6.35 API,
- *   Unsupported by kernel
- */
-static inline int
-blk_queue_discard_secure(struct request_queue *q)
-{
-#if defined(HAVE_BLK_QUEUE_SECURE_ERASE)
-	return (blk_queue_secure_erase(q));
-#elif defined(HAVE_BLK_QUEUE_SECDISCARD)
-	return (blk_queue_secdiscard(q));
-#else
-	return (0);
-#endif
-}
-
-/*
- * A common holder for vdev_bdev_open() is used to relax the exclusive open
- * semantics slightly.  Internal vdev disk callers may pass VDEV_HOLDER to
- * allow them to open the device multiple times.  Other kernel callers and
- * user space processes which don't pass this value will get EBUSY.  This is
- * currently required for the correct operation of hot spares.
- */
-#define	VDEV_HOLDER			((void *)0x2401de7)
-
-static inline void
-blk_generic_start_io_acct(struct request_queue *q, int rw,
-    unsigned long sectors, struct hd_struct *part)
-{
-#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
-	generic_start_io_acct(rw, sectors, part);
-#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
-	generic_start_io_acct(q, rw, sectors, part);
-#endif
-}
-
-static inline void
-blk_generic_end_io_acct(struct request_queue *q, int rw,
-    struct hd_struct *part, unsigned long start_time)
-{
-#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
-	generic_end_io_acct(rw, part, start_time);
-#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
-	generic_end_io_acct(q, rw, part, start_time);
-#endif
-}
-
-#ifndef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-static inline struct request_queue *
-blk_generic_alloc_queue(make_request_fn make_request, int node_id)
-{
-#if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN)
-	return (blk_alloc_queue(make_request, node_id));
-#else
-	struct request_queue *q = blk_alloc_queue(GFP_KERNEL);
-	if (q != NULL)
-		blk_queue_make_request(q, make_request);
-
-	return (q);
-#endif
-}
-#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
-
-#endif /* _ZFS_BLKDEV_H */

diff --git a/zfs/include/linux/compiler_compat.h b/zfs/include/linux/compiler_compat.h
deleted file mode 100644
index 921d32f..0000000
--- a/zfs/include/linux/compiler_compat.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (C) 2018 Lawrence Livermore National Security, LLC.
- */
-
-#ifndef _ZFS_COMPILER_COMPAT_H
-#define	_ZFS_COMPILER_COMPAT_H
-
-#include <linux/compiler.h>
-
-#if !defined(READ_ONCE)
-#define	READ_ONCE(x)		ACCESS_ONCE(x)
-#endif
-
-#endif	/* _ZFS_COMPILER_COMPAT_H */

diff --git a/zfs/include/linux/dcache_compat.h b/zfs/include/linux/dcache_compat.h
deleted file mode 100644
index bdaa5db..0000000
--- a/zfs/include/linux/dcache_compat.h
+++ /dev/null

@@ -1,83 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- */
-
-#ifndef _ZFS_DCACHE_H
-#define	_ZFS_DCACHE_H
-
-#include <linux/dcache.h>
-
-#define	dname(dentry)	((char *)((dentry)->d_name.name))
-#define	dlen(dentry)	((int)((dentry)->d_name.len))
-
-#ifndef HAVE_D_MAKE_ROOT
-#define	d_make_root(inode)	d_alloc_root(inode)
-#endif /* HAVE_D_MAKE_ROOT */
-
-/*
- * 2.6.30 API change,
- * The const keyword was added to the 'struct dentry_operations' in
- * the dentry structure.  To handle this we define an appropriate
- * dentry_operations_t typedef which can be used.
- */
-#ifdef HAVE_CONST_DENTRY_OPERATIONS
-typedef const struct dentry_operations	dentry_operations_t;
-#else
-typedef struct dentry_operations	dentry_operations_t;
-#endif
-
-/*
- * 2.6.38 API change,
- * Added d_set_d_op() helper function which sets some flags in
- * dentry->d_flags based on which operations are defined.
- */
-#ifndef HAVE_D_SET_D_OP
-static inline void
-d_set_d_op(struct dentry *dentry, dentry_operations_t *op)
-{
-	dentry->d_op = op;
-}
-#endif /* HAVE_D_SET_D_OP */
-
-/*
- * 2.6.38 API addition,
- * Added d_clear_d_op() helper function which clears some flags and the
- * registered dentry->d_op table.  This is required because d_set_d_op()
- * issues a warning when the dentry operations table is already set.
- * For the .zfs control directory to work properly we must be able to
- * override the default operations table and register custom .d_automount
- * and .d_revalidate callbacks.
- */
-static inline void
-d_clear_d_op(struct dentry *dentry)
-{
-#ifdef HAVE_D_SET_D_OP
-	dentry->d_op = NULL;
-	dentry->d_flags &= ~(
-	    DCACHE_OP_HASH | DCACHE_OP_COMPARE |
-	    DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
-#endif /* HAVE_D_SET_D_OP */
-}
-
-#endif /* _ZFS_DCACHE_H */

diff --git a/zfs/include/linux/kmap_compat.h b/zfs/include/linux/kmap_compat.h
deleted file mode 100644
index b9c7f5b..0000000
--- a/zfs/include/linux/kmap_compat.h
+++ /dev/null

@@ -1,48 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- */
-
-#ifndef _ZFS_KMAP_H
-#define	_ZFS_KMAP_H
-
-#include <linux/highmem.h>
-#include <linux/uaccess.h>
-
-#ifdef HAVE_1ARG_KMAP_ATOMIC
-/* 2.6.37 API change */
-#define	zfs_kmap_atomic(page, km_type)		kmap_atomic(page)
-#define	zfs_kunmap_atomic(addr, km_type)	kunmap_atomic(addr)
-#else
-#define	zfs_kmap_atomic(page, km_type)		kmap_atomic(page, km_type)
-#define	zfs_kunmap_atomic(addr, km_type)	kunmap_atomic(addr, km_type)
-#endif
-
-/* 5.0 API change - no more 'type' argument for access_ok() */
-#ifdef HAVE_ACCESS_OK_TYPE
-#define	zfs_access_ok(type, addr, size)	access_ok(type, addr, size)
-#else
-#define	zfs_access_ok(type, addr, size)	access_ok(addr, size)
-#endif
-
-#endif	/* _ZFS_KMAP_H */

diff --git a/zfs/include/linux/mod_compat.h b/zfs/include/linux/mod_compat.h
deleted file mode 100644
index 32aea44..0000000
--- a/zfs/include/linux/mod_compat.h
+++ /dev/null

@@ -1,39 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (C) 2016 Gvozden Neskovic <neskovic@gmail.com>.
- */
-
-#ifndef _MOD_COMPAT_H
-#define	_MOD_COMPAT_H
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-/* Grsecurity kernel API change */
-#ifdef MODULE_PARAM_CALL_CONST
-typedef const struct kernel_param zfs_kernel_param_t;
-#else
-typedef struct kernel_param zfs_kernel_param_t;
-#endif
-
-#endif	/* _MOD_COMPAT_H */

diff --git a/zfs/include/linux/simd.h b/zfs/include/linux/simd.h
deleted file mode 100644
index bb5f0f0..0000000
--- a/zfs/include/linux/simd.h
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2019 Lawrence Livermore National Security, LLC.
- */
-
-#ifndef _SIMD_H
-#define	_SIMD_H
-
-#if defined(__x86)
-#include <linux/simd_x86.h>
-
-#elif defined(__aarch64__)
-#include <linux/simd_aarch64.h>
-#else
-
-#define	kfpu_allowed()		0
-#define	kfpu_begin()		do {} while (0)
-#define	kfpu_end()		do {} while (0)
-#define	kfpu_init()		0
-#define	kfpu_fini()		((void) 0)
-
-#endif
-#endif /* _SIMD_H */

diff --git a/zfs/include/linux/simd_aarch64.h b/zfs/include/linux/simd_aarch64.h
deleted file mode 100644
index 7ba308d..0000000
--- a/zfs/include/linux/simd_aarch64.h
+++ /dev/null

@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2016 Romain Dolbeau <romain@dolbeau.org>.
- */
-
-/*
- * USER API:
- *
- * Kernel fpu methods:
- *	kfpu_allowed()
- *	kfpu_begin()
- *	kfpu_end()
- *	kfpu_init()
- *	kfpu_fini()
- */
-
-#ifndef _SIMD_AARCH64_H
-#define	_SIMD_AARCH64_H
-
-#include <sys/isa_defs.h>
-
-#if defined(__aarch64__)
-
-#include <sys/types.h>
-
-#if defined(_KERNEL)
-#include <asm/neon.h>
-#define	kfpu_allowed()		1
-#define	kfpu_begin()		kernel_neon_begin()
-#define	kfpu_end()		kernel_neon_end()
-#define	kfpu_init()		0
-#define	kfpu_fini()		((void) 0)
-#else
-/*
- * fpu dummy methods for userspace
- */
-#define	kfpu_allowed()		1
-#define	kfpu_begin()		do {} while (0)
-#define	kfpu_end()		do {} while (0)
-#define	kfpu_init()		0
-#define	kfpu_fini()		((void) 0)
-#endif /* defined(_KERNEL) */
-
-#endif /* __aarch64__ */
-
-#endif /* _SIMD_AARCH64_H */

diff --git a/zfs/include/linux/simd_x86.h b/zfs/include/linux/simd_x86.h
deleted file mode 100644
index bf44f6b..0000000
--- a/zfs/include/linux/simd_x86.h
+++ /dev/null

@@ -1,942 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
- */
-
-/*
- * USER API:
- *
- * Kernel fpu methods:
- *	kfpu_allowed()
- *	kfpu_begin()
- *	kfpu_end()
- *	kfpu_init()
- *	kfpu_fini()
- *
- * SIMD support:
- *
- * Following functions should be called to determine whether CPU feature
- * is supported. All functions are usable in kernel and user space.
- * If a SIMD algorithm is using more than one instruction set
- * all relevant feature test functions should be called.
- *
- * Supported features:
- *	zfs_sse_available()
- *	zfs_sse2_available()
- *	zfs_sse3_available()
- *	zfs_ssse3_available()
- *	zfs_sse4_1_available()
- *	zfs_sse4_2_available()
- *
- *	zfs_avx_available()
- *	zfs_avx2_available()
- *
- *	zfs_bmi1_available()
- *	zfs_bmi2_available()
- *
- *	zfs_avx512f_available()
- *	zfs_avx512cd_available()
- *	zfs_avx512er_available()
- *	zfs_avx512pf_available()
- *	zfs_avx512bw_available()
- *	zfs_avx512dq_available()
- *	zfs_avx512vl_available()
- *	zfs_avx512ifma_available()
- *	zfs_avx512vbmi_available()
- *
- * NOTE(AVX-512VL):	If using AVX-512 instructions with 128Bit registers
- *			also add zfs_avx512vl_available() to feature check.
- */
-
-#ifndef _SIMD_X86_H
-#define	_SIMD_X86_H
-
-#include <sys/isa_defs.h>
-
-/* only for __x86 */
-#if defined(__x86)
-
-#include <sys/types.h>
-
-#if defined(_KERNEL)
-#include <asm/cpufeature.h>
-#else
-#include <cpuid.h>
-#endif
-
-#if defined(_KERNEL)
-
-/*
- * Disable the WARN_ON_FPU() macro to prevent additional dependencies
- * when providing the kfpu_* functions.  Relevant warnings are included
- * as appropriate and are unconditionally enabled.
- */
-#if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU)
-#undef CONFIG_X86_DEBUG_FPU
-#endif
-
-#if defined(HAVE_KERNEL_FPU_API_HEADER)
-#include <asm/fpu/api.h>
-#include <asm/fpu/internal.h>
-#else
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#endif
-
-/*
- * The following cases are for kernels which export either the
- * kernel_fpu_* or __kernel_fpu_* functions.
- */
-#if defined(KERNEL_EXPORTS_X86_FPU)
-
-#define	kfpu_allowed()		1
-#define	kfpu_init()		0
-#define	kfpu_fini()		((void) 0)
-
-#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
-#define	kfpu_begin()		\
-{				\
-	preempt_disable();	\
-	__kernel_fpu_begin();	\
-}
-#define	kfpu_end()		\
-{				\
-	__kernel_fpu_end();	\
-	preempt_enable();	\
-}
-
-#elif defined(HAVE_KERNEL_FPU)
-#define	kfpu_begin()		kernel_fpu_begin()
-#define	kfpu_end()		kernel_fpu_end()
-
-#else
-/*
- * This case is unreachable.  When KERNEL_EXPORTS_X86_FPU is defined then
- * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined.
- */
-#error "Unreachable kernel configuration"
-#endif
-
-#else /* defined(KERNEL_EXPORTS_X86_FPU) */
-
-/*
- * When the kernel_fpu_* symbols are unavailable then provide our own
- * versions which allow the FPU to be safely used.
- */
-#if defined(HAVE_KERNEL_FPU_INTERNAL)
-
-#include <linux/mm.h>
-
-extern union fpregs_state **zfs_kfpu_fpregs;
-
-/*
- * Initialize per-cpu variables to store FPU state.
- */
-static inline void
-kfpu_fini(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		if (zfs_kfpu_fpregs[cpu] != NULL) {
-			free_pages((unsigned long)zfs_kfpu_fpregs[cpu],
-			    get_order(sizeof (union fpregs_state)));
-		}
-	}
-
-	kfree(zfs_kfpu_fpregs);
-}
-
-static inline int
-kfpu_init(void)
-{
-	zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
-	    sizeof (union fpregs_state *), GFP_KERNEL);
-	if (zfs_kfpu_fpregs == NULL)
-		return (-ENOMEM);
-
-	/*
-	 * The fxsave and xsave operations require 16-/64-byte alignment of
-	 * the target memory. Since kmalloc() provides no alignment
-	 * guarantee instead use alloc_pages_node().
-	 */
-	unsigned int order = get_order(sizeof (union fpregs_state));
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct page *page = alloc_pages_node(cpu_to_node(cpu),
-		    GFP_KERNEL | __GFP_ZERO, order);
-		if (page == NULL) {
-			kfpu_fini();
-			return (-ENOMEM);
-		}
-
-		zfs_kfpu_fpregs[cpu] = page_address(page);
-	}
-
-	return (0);
-}
-
-#define	kfpu_allowed()		1
-#define	ex_handler_fprestore	ex_handler_default
-
-/*
- * FPU save and restore instructions.
- */
-#define	__asm			__asm__ __volatile__
-#define	kfpu_fxsave(addr)	__asm("fxsave %0" : "=m" (*(addr)))
-#define	kfpu_fxsaveq(addr)	__asm("fxsaveq %0" : "=m" (*(addr)))
-#define	kfpu_fnsave(addr)	__asm("fnsave %0; fwait" : "=m" (*(addr)))
-#define	kfpu_fxrstor(addr)	__asm("fxrstor %0" : : "m" (*(addr)))
-#define	kfpu_fxrstorq(addr)	__asm("fxrstorq %0" : : "m" (*(addr)))
-#define	kfpu_frstor(addr)	__asm("frstor %0" : : "m" (*(addr)))
-#define	kfpu_fxsr_clean(rval)	__asm("fnclex; emms; fildl %P[addr]" \
-				    : : [addr] "m" (rval));
-
-static inline void
-kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
-{
-	uint32_t low, hi;
-	int err;
-
-	low = mask;
-	hi = mask >> 32;
-	XSTATE_XSAVE(addr, low, hi, err);
-	WARN_ON_ONCE(err);
-}
-
-static inline void
-kfpu_save_fxsr(struct fxregs_state *addr)
-{
-	if (IS_ENABLED(CONFIG_X86_32))
-		kfpu_fxsave(addr);
-	else
-		kfpu_fxsaveq(addr);
-}
-
-static inline void
-kfpu_save_fsave(struct fregs_state *addr)
-{
-	kfpu_fnsave(addr);
-}
-
-static inline void
-kfpu_begin(void)
-{
-	/*
-	 * Preemption and interrupts must be disabled for the critical
-	 * region where the FPU state is being modified.
-	 */
-	preempt_disable();
-	local_irq_disable();
-
-	/*
-	 * The current FPU registers need to be preserved by kfpu_begin()
-	 * and restored by kfpu_end().  They are stored in a dedicated
-	 * per-cpu variable, not in the task struct, this allows any user
-	 * FPU state to be correctly preserved and restored.
-	 */
-	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
-
-	if (static_cpu_has(X86_FEATURE_XSAVE)) {
-		kfpu_save_xsave(&state->xsave, ~0);
-	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
-		kfpu_save_fxsr(&state->fxsave);
-	} else {
-		kfpu_save_fsave(&state->fsave);
-	}
-}
-
-static inline void
-kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
-{
-	uint32_t low, hi;
-
-	low = mask;
-	hi = mask >> 32;
-	XSTATE_XRESTORE(addr, low, hi);
-}
-
-static inline void
-kfpu_restore_fxsr(struct fxregs_state *addr)
-{
-	/*
-	 * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
-	 * restores the _x87 FOP, FIP, and FDP registers when an exception
-	 * is pending.  Clean the _x87 state to force the restore.
-	 */
-	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
-		kfpu_fxsr_clean(addr);
-
-	if (IS_ENABLED(CONFIG_X86_32)) {
-		kfpu_fxrstor(addr);
-	} else {
-		kfpu_fxrstorq(addr);
-	}
-}
-
-static inline void
-kfpu_restore_fsave(struct fregs_state *addr)
-{
-	kfpu_frstor(addr);
-}
-
-static inline void
-kfpu_end(void)
-{
-	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
-
-	if (static_cpu_has(X86_FEATURE_XSAVE)) {
-		kfpu_restore_xsave(&state->xsave, ~0);
-	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
-		kfpu_restore_fxsr(&state->fxsave);
-	} else {
-		kfpu_restore_fsave(&state->fsave);
-	}
-
-	local_irq_enable();
-	preempt_enable();
-}
-
-#else
-
-/*
- * FPU support is unavailable.
- */
-#define	kfpu_allowed()		0
-#define	kfpu_begin()		do {} while (0)
-#define	kfpu_end()		do {} while (0)
-#define	kfpu_init()		0
-#define	kfpu_fini()		((void) 0)
-
-#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
-#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
-
-#else /* defined(_KERNEL) */
-/*
- * FPU dummy methods for user space.
- */
-#define	kfpu_allowed()		1
-#define	kfpu_begin()		do {} while (0)
-#define	kfpu_end()		do {} while (0)
-#endif /* defined(_KERNEL) */
-
-/*
- * CPUID feature tests for user-space. Linux kernel provides an interface for
- * CPU feature testing.
- */
-#if !defined(_KERNEL)
-
-/*
- * x86 registers used implicitly by CPUID
- */
-typedef enum cpuid_regs {
-	EAX = 0,
-	EBX,
-	ECX,
-	EDX,
-	CPUID_REG_CNT = 4
-} cpuid_regs_t;
-
-/*
- * List of instruction sets identified by CPUID
- */
-typedef enum cpuid_inst_sets {
-	SSE = 0,
-	SSE2,
-	SSE3,
-	SSSE3,
-	SSE4_1,
-	SSE4_2,
-	OSXSAVE,
-	AVX,
-	AVX2,
-	BMI1,
-	BMI2,
-	AVX512F,
-	AVX512CD,
-	AVX512DQ,
-	AVX512BW,
-	AVX512IFMA,
-	AVX512VBMI,
-	AVX512PF,
-	AVX512ER,
-	AVX512VL,
-	AES,
-	PCLMULQDQ,
-    MOVBE
-} cpuid_inst_sets_t;
-
-/*
- * Instruction set descriptor.
- */
-typedef struct cpuid_feature_desc {
-	uint32_t leaf;		/* CPUID leaf */
-	uint32_t subleaf;	/* CPUID sub-leaf */
-	uint32_t flag;		/* bit mask of the feature */
-	cpuid_regs_t reg;	/* which CPUID return register to test */
-} cpuid_feature_desc_t;
-
-#define	_AVX512F_BIT		(1U << 16)
-#define	_AVX512CD_BIT		(_AVX512F_BIT | (1U << 28))
-#define	_AVX512DQ_BIT		(_AVX512F_BIT | (1U << 17))
-#define	_AVX512BW_BIT		(_AVX512F_BIT | (1U << 30))
-#define	_AVX512IFMA_BIT		(_AVX512F_BIT | (1U << 21))
-#define	_AVX512VBMI_BIT		(1U << 1) /* AVX512F_BIT is on another leaf  */
-#define	_AVX512PF_BIT		(_AVX512F_BIT | (1U << 26))
-#define	_AVX512ER_BIT		(_AVX512F_BIT | (1U << 27))
-#define	_AVX512VL_BIT		(1U << 31) /* if used also check other levels */
-#define	_AES_BIT		(1U << 25)
-#define	_PCLMULQDQ_BIT		(1U << 1)
-#define	_MOVBE_BIT		(1U << 22)
-
-/*
- * Descriptions of supported instruction sets
- */
-static const cpuid_feature_desc_t cpuid_features[] = {
-	[SSE]		= {1U, 0U,	1U << 25,	EDX	},
-	[SSE2]		= {1U, 0U,	1U << 26,	EDX	},
-	[SSE3]		= {1U, 0U,	1U << 0,	ECX	},
-	[SSSE3]		= {1U, 0U,	1U << 9,	ECX	},
-	[SSE4_1]	= {1U, 0U,	1U << 19,	ECX	},
-	[SSE4_2]	= {1U, 0U,	1U << 20,	ECX	},
-	[OSXSAVE]	= {1U, 0U,	1U << 27,	ECX	},
-	[AVX]		= {1U, 0U,	1U << 28,	ECX	},
-	[AVX2]		= {7U, 0U,	1U << 5,	EBX	},
-	[BMI1]		= {7U, 0U,	1U << 3,	EBX	},
-	[BMI2]		= {7U, 0U,	1U << 8,	EBX	},
-	[AVX512F]	= {7U, 0U, _AVX512F_BIT,	EBX	},
-	[AVX512CD]	= {7U, 0U, _AVX512CD_BIT,	EBX	},
-	[AVX512DQ]	= {7U, 0U, _AVX512DQ_BIT,	EBX	},
-	[AVX512BW]	= {7U, 0U, _AVX512BW_BIT,	EBX	},
-	[AVX512IFMA]	= {7U, 0U, _AVX512IFMA_BIT,	EBX	},
-	[AVX512VBMI]	= {7U, 0U, _AVX512VBMI_BIT,	ECX	},
-	[AVX512PF]	= {7U, 0U, _AVX512PF_BIT,	EBX	},
-	[AVX512ER]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
-	[AVX512VL]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
-	[AES]		= {1U, 0U, _AES_BIT,		ECX	},
-	[PCLMULQDQ]	= {1U, 0U, _PCLMULQDQ_BIT,	ECX	},
-	[MOVBE]		= {1U, 0U, _MOVBE_BIT,		ECX	},
-};
-
-/*
- * Check if OS supports AVX and AVX2 by checking XCR0
- * Only call this function if CPUID indicates that AVX feature is
- * supported by the CPU, otherwise it might be an illegal instruction.
- */
-static inline uint64_t
-xgetbv(uint32_t index)
-{
-	uint32_t eax, edx;
-	/* xgetbv - instruction byte code */
-	__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
-	    : "=a" (eax), "=d" (edx)
-	    : "c" (index));
-
-	return ((((uint64_t)edx)<<32) | (uint64_t)eax);
-}
-
-/*
- * Check if CPU supports a feature
- */
-static inline boolean_t
-__cpuid_check_feature(const cpuid_feature_desc_t *desc)
-{
-	uint32_t r[CPUID_REG_CNT];
-
-	if (__get_cpuid_max(0, NULL) >= desc->leaf) {
-		/*
-		 * __cpuid_count is needed to properly check
-		 * for AVX2. It is a macro, so return parameters
-		 * are passed by value.
-		 */
-		__cpuid_count(desc->leaf, desc->subleaf,
-		    r[EAX], r[EBX], r[ECX], r[EDX]);
-		return ((r[desc->reg] & desc->flag) == desc->flag);
-	}
-	return (B_FALSE);
-}
-
-#define	CPUID_FEATURE_CHECK(name, id)				\
-static inline boolean_t						\
-__cpuid_has_ ## name(void)					\
-{								\
-	return (__cpuid_check_feature(&cpuid_features[id]));	\
-}
-
-/*
- * Define functions for user-space CPUID features testing
- */
-CPUID_FEATURE_CHECK(sse, SSE);
-CPUID_FEATURE_CHECK(sse2, SSE2);
-CPUID_FEATURE_CHECK(sse3, SSE3);
-CPUID_FEATURE_CHECK(ssse3, SSSE3);
-CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
-CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
-CPUID_FEATURE_CHECK(avx, AVX);
-CPUID_FEATURE_CHECK(avx2, AVX2);
-CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
-CPUID_FEATURE_CHECK(bmi1, BMI1);
-CPUID_FEATURE_CHECK(bmi2, BMI2);
-CPUID_FEATURE_CHECK(avx512f, AVX512F);
-CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
-CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
-CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
-CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
-CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
-CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
-CPUID_FEATURE_CHECK(avx512er, AVX512ER);
-CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
-CPUID_FEATURE_CHECK(aes, AES);
-CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
-CPUID_FEATURE_CHECK(movbe, MOVBE);
-
-#endif /* !defined(_KERNEL) */
-
-/*
- * Detect register set support
- */
-static inline boolean_t
-__simd_state_enabled(const uint64_t state)
-{
-	boolean_t has_osxsave;
-	uint64_t xcr0;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_OSXSAVE)
-	has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
-#else
-	has_osxsave = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_osxsave = __cpuid_has_osxsave();
-#endif
-
-	if (!has_osxsave)
-		return (B_FALSE);
-
-	xcr0 = xgetbv(0);
-	return ((xcr0 & state) == state);
-}
-
-#define	_XSTATE_SSE_AVX		(0x2 | 0x4)
-#define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
-
-#define	__ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
-#define	__zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
-
-
-/*
- * Check if SSE instruction set is available
- */
-static inline boolean_t
-zfs_sse_available(void)
-{
-#if defined(_KERNEL)
-	return (!!boot_cpu_has(X86_FEATURE_XMM));
-#elif !defined(_KERNEL)
-	return (__cpuid_has_sse());
-#endif
-}
-
-/*
- * Check if SSE2 instruction set is available
- */
-static inline boolean_t
-zfs_sse2_available(void)
-{
-#if defined(_KERNEL)
-	return (!!boot_cpu_has(X86_FEATURE_XMM2));
-#elif !defined(_KERNEL)
-	return (__cpuid_has_sse2());
-#endif
-}
-
-/*
- * Check if SSE3 instruction set is available
- */
-static inline boolean_t
-zfs_sse3_available(void)
-{
-#if defined(_KERNEL)
-	return (!!boot_cpu_has(X86_FEATURE_XMM3));
-#elif !defined(_KERNEL)
-	return (__cpuid_has_sse3());
-#endif
-}
-
-/*
- * Check if SSSE3 instruction set is available
- */
-static inline boolean_t
-zfs_ssse3_available(void)
-{
-#if defined(_KERNEL)
-	return (!!boot_cpu_has(X86_FEATURE_SSSE3));
-#elif !defined(_KERNEL)
-	return (__cpuid_has_ssse3());
-#endif
-}
-
-/*
- * Check if SSE4.1 instruction set is available
- */
-static inline boolean_t
-zfs_sse4_1_available(void)
-{
-#if defined(_KERNEL)
-	return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
-#elif !defined(_KERNEL)
-	return (__cpuid_has_sse4_1());
-#endif
-}
-
-/*
- * Check if SSE4.2 instruction set is available
- */
-static inline boolean_t
-zfs_sse4_2_available(void)
-{
-#if defined(_KERNEL)
-	return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
-#elif !defined(_KERNEL)
-	return (__cpuid_has_sse4_2());
-#endif
-}
-
-/*
- * Check if AVX instruction set is available
- */
-static inline boolean_t
-zfs_avx_available(void)
-{
-	boolean_t has_avx;
-#if defined(_KERNEL)
-	has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
-#elif !defined(_KERNEL)
-	has_avx = __cpuid_has_avx();
-#endif
-
-	return (has_avx && __ymm_enabled());
-}
-
-/*
- * Check if AVX2 instruction set is available
- */
-static inline boolean_t
-zfs_avx2_available(void)
-{
-	boolean_t has_avx2;
-#if defined(_KERNEL)
-	has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
-#elif !defined(_KERNEL)
-	has_avx2 = __cpuid_has_avx2();
-#endif
-
-	return (has_avx2 && __ymm_enabled());
-}
-
-/*
- * Check if BMI1 instruction set is available
- */
-static inline boolean_t
-zfs_bmi1_available(void)
-{
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_BMI1)
-	return (!!boot_cpu_has(X86_FEATURE_BMI1));
-#else
-	return (B_FALSE);
-#endif
-#elif !defined(_KERNEL)
-	return (__cpuid_has_bmi1());
-#endif
-}
-
-/*
- * Check if BMI2 instruction set is available
- */
-static inline boolean_t
-zfs_bmi2_available(void)
-{
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_BMI2)
-	return (!!boot_cpu_has(X86_FEATURE_BMI2));
-#else
-	return (B_FALSE);
-#endif
-#elif !defined(_KERNEL)
-	return (__cpuid_has_bmi2());
-#endif
-}
-
-/*
- * Check if AES instruction set is available
- */
-static inline boolean_t
-zfs_aes_available(void)
-{
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AES)
-	return (!!boot_cpu_has(X86_FEATURE_AES));
-#else
-	return (B_FALSE);
-#endif
-#elif !defined(_KERNEL)
-	return (__cpuid_has_aes());
-#endif
-}
-
-/*
- * Check if PCLMULQDQ instruction set is available
- */
-static inline boolean_t
-zfs_pclmulqdq_available(void)
-{
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_PCLMULQDQ)
-	return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
-#else
-	return (B_FALSE);
-#endif
-#elif !defined(_KERNEL)
-	return (__cpuid_has_pclmulqdq());
-#endif
-}
-
-/*
- * Check if MOVBE instruction is available
- */
-static inline boolean_t
-zfs_movbe_available(void)
-{
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_MOVBE)
-	return (!!boot_cpu_has(X86_FEATURE_MOVBE));
-#else
-	return (B_FALSE);
-#endif
-#elif !defined(_KERNEL)
-	return (__cpuid_has_movbe());
-#endif
-}
-
-/*
- * AVX-512 family of instruction sets:
- *
- * AVX512F	Foundation
- * AVX512CD	Conflict Detection Instructions
- * AVX512ER	Exponential and Reciprocal Instructions
- * AVX512PF	Prefetch Instructions
- *
- * AVX512BW	Byte and Word Instructions
- * AVX512DQ	Double-word and Quadword Instructions
- * AVX512VL	Vector Length Extensions
- *
- * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
- * AVX512VBMI	Vector Byte Manipulation Instructions
- */
-
-
-/* Check if AVX512F instruction set is available */
-static inline boolean_t
-zfs_avx512f_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512F)
-	has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512f();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512CD instruction set is available */
-static inline boolean_t
-zfs_avx512cd_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512CD)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512CD);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512cd();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512ER instruction set is available */
-static inline boolean_t
-zfs_avx512er_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512ER)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512ER);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512er();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512PF instruction set is available */
-static inline boolean_t
-zfs_avx512pf_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512PF)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512PF);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512pf();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512BW instruction set is available */
-static inline boolean_t
-zfs_avx512bw_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512BW)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512BW);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512bw();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512DQ instruction set is available */
-static inline boolean_t
-zfs_avx512dq_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512DQ)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512DQ);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512dq();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512VL instruction set is available */
-static inline boolean_t
-zfs_avx512vl_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512VL)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512VL);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512vl();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512IFMA instruction set is available */
-static inline boolean_t
-zfs_avx512ifma_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512IFMA)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512IFMA);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512ifma();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-/* Check if AVX512VBMI instruction set is available */
-static inline boolean_t
-zfs_avx512vbmi_available(void)
-{
-	boolean_t has_avx512 = B_FALSE;
-
-#if defined(_KERNEL)
-#if defined(X86_FEATURE_AVX512VBMI)
-	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
-	    boot_cpu_has(X86_FEATURE_AVX512VBMI);
-#else
-	has_avx512 = B_FALSE;
-#endif
-#elif !defined(_KERNEL)
-	has_avx512 = __cpuid_has_avx512f() &&
-	    __cpuid_has_avx512vbmi();
-#endif
-
-	return (has_avx512 && __zmm_enabled());
-}
-
-#endif /* defined(__x86) */
-
-#endif /* _SIMD_X86_H */

diff --git a/zfs/include/linux/vfs_compat.h b/zfs/include/linux/vfs_compat.h
deleted file mode 100644
index 28b4541..0000000
--- a/zfs/include/linux/vfs_compat.h
+++ /dev/null

@@ -1,646 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Copyright (C) 2015 Jörg Thalheim.
- */
-
-#ifndef _ZFS_VFS_H
-#define	_ZFS_VFS_H
-
-#include <sys/taskq.h>
-#include <sys/cred.h>
-#include <linux/backing-dev.h>
-#include <linux/compat.h>
-
-/*
- * 2.6.28 API change,
- * Added insert_inode_locked() helper function, prior to this most callers
- * used insert_inode_hash().  The older method doesn't check for collisions
- * in the inode_hashtable but it still acceptable for use.
- */
-#ifndef HAVE_INSERT_INODE_LOCKED
-static inline int
-insert_inode_locked(struct inode *ip)
-{
-	insert_inode_hash(ip);
-	return (0);
-}
-#endif /* HAVE_INSERT_INODE_LOCKED */
-
-/*
- * 2.6.35 API change,
- * Add truncate_setsize() if it is not exported by the Linux kernel.
- *
- * Truncate the inode and pages associated with the inode. The pages are
- * unmapped and removed from cache.
- */
-#ifndef HAVE_TRUNCATE_SETSIZE
-static inline void
-truncate_setsize(struct inode *ip, loff_t new)
-{
-	struct address_space *mapping = ip->i_mapping;
-
-	i_size_write(ip, new);
-
-	unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
-	truncate_inode_pages(mapping, new);
-	unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
-}
-#endif /* HAVE_TRUNCATE_SETSIZE */
-
-/*
- * 2.6.32 - 2.6.33, bdi_setup_and_register() is not available.
- * 2.6.34 - 3.19, bdi_setup_and_register() takes 3 arguments.
- * 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments.
- * 4.12 - x.y, super_setup_bdi_name() new interface.
- */
-#if defined(HAVE_SUPER_SETUP_BDI_NAME)
-extern atomic_long_t zfs_bdi_seq;
-
-static inline int
-zpl_bdi_setup(struct super_block *sb, char *name)
-{
-	return super_setup_bdi_name(sb, "%.28s-%ld", name,
-	    atomic_long_inc_return(&zfs_bdi_seq));
-}
-static inline void
-zpl_bdi_destroy(struct super_block *sb)
-{
-}
-#elif defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER)
-static inline int
-zpl_bdi_setup(struct super_block *sb, char *name)
-{
-	struct backing_dev_info *bdi;
-	int error;
-
-	bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP);
-	error = bdi_setup_and_register(bdi, name);
-	if (error) {
-		kmem_free(bdi, sizeof (struct backing_dev_info));
-		return (error);
-	}
-
-	sb->s_bdi = bdi;
-
-	return (0);
-}
-static inline void
-zpl_bdi_destroy(struct super_block *sb)
-{
-	struct backing_dev_info *bdi = sb->s_bdi;
-
-	bdi_destroy(bdi);
-	kmem_free(bdi, sizeof (struct backing_dev_info));
-	sb->s_bdi = NULL;
-}
-#elif defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER)
-static inline int
-zpl_bdi_setup(struct super_block *sb, char *name)
-{
-	struct backing_dev_info *bdi;
-	int error;
-
-	bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP);
-	error = bdi_setup_and_register(bdi, name, BDI_CAP_MAP_COPY);
-	if (error) {
-		kmem_free(sb->s_bdi, sizeof (struct backing_dev_info));
-		return (error);
-	}
-
-	sb->s_bdi = bdi;
-
-	return (0);
-}
-static inline void
-zpl_bdi_destroy(struct super_block *sb)
-{
-	struct backing_dev_info *bdi = sb->s_bdi;
-
-	bdi_destroy(bdi);
-	kmem_free(bdi, sizeof (struct backing_dev_info));
-	sb->s_bdi = NULL;
-}
-#else
-extern atomic_long_t zfs_bdi_seq;
-
-static inline int
-zpl_bdi_setup(struct super_block *sb, char *name)
-{
-	struct backing_dev_info *bdi;
-	int error;
-
-	bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP);
-	bdi->name = name;
-	bdi->capabilities = BDI_CAP_MAP_COPY;
-
-	error = bdi_init(bdi);
-	if (error) {
-		kmem_free(bdi, sizeof (struct backing_dev_info));
-		return (error);
-	}
-
-	error = bdi_register(bdi, NULL, "%.28s-%ld", name,
-	    atomic_long_inc_return(&zfs_bdi_seq));
-	if (error) {
-		bdi_destroy(bdi);
-		kmem_free(bdi, sizeof (struct backing_dev_info));
-		return (error);
-	}
-
-	sb->s_bdi = bdi;
-
-	return (0);
-}
-static inline void
-zpl_bdi_destroy(struct super_block *sb)
-{
-	struct backing_dev_info *bdi = sb->s_bdi;
-
-	bdi_destroy(bdi);
-	kmem_free(bdi, sizeof (struct backing_dev_info));
-	sb->s_bdi = NULL;
-}
-#endif
-
-/*
- * 4.14 adds SB_* flag definitions, define them to MS_* equivalents
- * if not set.
- */
-#ifndef	SB_RDONLY
-#define	SB_RDONLY	MS_RDONLY
-#endif
-
-#ifndef	SB_SILENT
-#define	SB_SILENT	MS_SILENT
-#endif
-
-#ifndef	SB_ACTIVE
-#define	SB_ACTIVE	MS_ACTIVE
-#endif
-
-#ifndef	SB_POSIXACL
-#define	SB_POSIXACL	MS_POSIXACL
-#endif
-
-#ifndef	SB_MANDLOCK
-#define	SB_MANDLOCK	MS_MANDLOCK
-#endif
-
-#ifndef	SB_NOATIME
-#define	SB_NOATIME	MS_NOATIME
-#endif
-
-/*
- * 2.6.38 API change,
- * LOOKUP_RCU flag introduced to distinguish rcu-walk from ref-walk cases.
- */
-#ifndef LOOKUP_RCU
-#define	LOOKUP_RCU	0x0
-#endif /* LOOKUP_RCU */
-
-/*
- * 3.2-rc1 API change,
- * Add set_nlink() if it is not exported by the Linux kernel.
- *
- * i_nlink is read-only in Linux 3.2, but it can be set directly in
- * earlier kernels.
- */
-#ifndef HAVE_SET_NLINK
-static inline void
-set_nlink(struct inode *inode, unsigned int nlink)
-{
-	inode->i_nlink = nlink;
-}
-#endif /* HAVE_SET_NLINK */
-
-/*
- * 3.3 API change,
- * The VFS .create, .mkdir and .mknod callbacks were updated to take a
- * umode_t type rather than an int.  To cleanly handle both definitions
- * the zpl_umode_t type is introduced and set accordingly.
- */
-#ifdef HAVE_MKDIR_UMODE_T
-typedef	umode_t		zpl_umode_t;
-#else
-typedef	int		zpl_umode_t;
-#endif
-
-/*
- * 3.5 API change,
- * The clear_inode() function replaces end_writeback() and introduces an
- * ordering change regarding when the inode_sync_wait() occurs.  See the
- * configure check in config/kernel-clear-inode.m4 for full details.
- */
-#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE)
-#define	clear_inode(ip)		end_writeback(ip)
-#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */
-
-/*
- * 3.6 API change,
- * The sget() helper function now takes the mount flags as an argument.
- */
-#ifdef HAVE_5ARG_SGET
-#define	zpl_sget(type, cmp, set, fl, mtd)	sget(type, cmp, set, fl, mtd)
-#else
-#define	zpl_sget(type, cmp, set, fl, mtd)	sget(type, cmp, set, mtd)
-#endif /* HAVE_5ARG_SGET */
-
-#if defined(SEEK_HOLE) && defined(SEEK_DATA) && !defined(HAVE_LSEEK_EXECUTE)
-static inline loff_t
-lseek_execute(
-	struct file *filp,
-	struct inode *inode,
-	loff_t offset,
-	loff_t maxsize)
-{
-	if (offset < 0 && !(filp->f_mode & FMODE_UNSIGNED_OFFSET))
-		return (-EINVAL);
-
-	if (offset > maxsize)
-		return (-EINVAL);
-
-	if (offset != filp->f_pos) {
-		spin_lock(&filp->f_lock);
-		filp->f_pos = offset;
-		filp->f_version = 0;
-		spin_unlock(&filp->f_lock);
-	}
-
-	return (offset);
-}
-#endif /* SEEK_HOLE && SEEK_DATA && !HAVE_LSEEK_EXECUTE */
-
-#if defined(CONFIG_FS_POSIX_ACL)
-/*
- * These functions safely approximates the behavior of posix_acl_release()
- * which cannot be used because it calls the GPL-only symbol kfree_rcu().
- * The in-kernel version, which can access the RCU, frees the ACLs after
- * the grace period expires.  Because we're unsure how long that grace
- * period may be this implementation conservatively delays for 60 seconds.
- * This is several orders of magnitude larger than expected grace period.
- * At 60 seconds the kernel will also begin issuing RCU stall warnings.
- */
-
-#include <linux/posix_acl.h>
-
-#if defined(HAVE_POSIX_ACL_RELEASE) && !defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
-#define	zpl_posix_acl_release(arg)		posix_acl_release(arg)
-#else
-void zpl_posix_acl_release_impl(struct posix_acl *);
-
-static inline void
-zpl_posix_acl_release(struct posix_acl *acl)
-{
-	if ((acl == NULL) || (acl == ACL_NOT_CACHED))
-		return;
-#ifdef HAVE_ACL_REFCOUNT
-	if (refcount_dec_and_test(&acl->a_refcount))
-		zpl_posix_acl_release_impl(acl);
-#else
-	if (atomic_dec_and_test(&acl->a_refcount))
-		zpl_posix_acl_release_impl(acl);
-#endif
-}
-#endif /* HAVE_POSIX_ACL_RELEASE */
-
-#ifdef HAVE_SET_CACHED_ACL_USABLE
-#define	zpl_set_cached_acl(ip, ty, n)		set_cached_acl(ip, ty, n)
-#define	zpl_forget_cached_acl(ip, ty)		forget_cached_acl(ip, ty)
-#else
-static inline void
-zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer)
-{
-	struct posix_acl *older = NULL;
-
-	spin_lock(&ip->i_lock);
-
-	if ((newer != ACL_NOT_CACHED) && (newer != NULL))
-		posix_acl_dup(newer);
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		older = ip->i_acl;
-		rcu_assign_pointer(ip->i_acl, newer);
-		break;
-	case ACL_TYPE_DEFAULT:
-		older = ip->i_default_acl;
-		rcu_assign_pointer(ip->i_default_acl, newer);
-		break;
-	}
-
-	spin_unlock(&ip->i_lock);
-
-	zpl_posix_acl_release(older);
-}
-
-static inline void
-zpl_forget_cached_acl(struct inode *ip, int type)
-{
-	zpl_set_cached_acl(ip, type, (struct posix_acl *)ACL_NOT_CACHED);
-}
-#endif /* HAVE_SET_CACHED_ACL_USABLE */
-
-#ifndef HAVE___POSIX_ACL_CHMOD
-#ifdef HAVE_POSIX_ACL_CHMOD
-#define	__posix_acl_chmod(acl, gfp, mode)	posix_acl_chmod(acl, gfp, mode)
-#define	__posix_acl_create(acl, gfp, mode)	posix_acl_create(acl, gfp, mode)
-#else
-static inline int
-__posix_acl_chmod(struct posix_acl **acl, int flags, umode_t umode)
-{
-	struct posix_acl *oldacl = *acl;
-	mode_t mode = umode;
-	int error;
-
-	*acl = posix_acl_clone(*acl, flags);
-	zpl_posix_acl_release(oldacl);
-
-	if (!(*acl))
-		return (-ENOMEM);
-
-	error = posix_acl_chmod_masq(*acl, mode);
-	if (error) {
-		zpl_posix_acl_release(*acl);
-		*acl = NULL;
-	}
-
-	return (error);
-}
-
-static inline int
-__posix_acl_create(struct posix_acl **acl, int flags, umode_t *umodep)
-{
-	struct posix_acl *oldacl = *acl;
-	mode_t mode = *umodep;
-	int error;
-
-	*acl = posix_acl_clone(*acl, flags);
-	zpl_posix_acl_release(oldacl);
-
-	if (!(*acl))
-		return (-ENOMEM);
-
-	error = posix_acl_create_masq(*acl, &mode);
-	*umodep = mode;
-
-	if (error < 0) {
-		zpl_posix_acl_release(*acl);
-		*acl = NULL;
-	}
-
-	return (error);
-}
-#endif /* HAVE_POSIX_ACL_CHMOD */
-#endif /* HAVE___POSIX_ACL_CHMOD */
-
-#ifdef HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T
-typedef umode_t zpl_equivmode_t;
-#else
-typedef mode_t zpl_equivmode_t;
-#endif /* HAVE_POSIX_ACL_EQUIV_MODE_UMODE_T */
-
-/*
- * 4.8 API change,
- * posix_acl_valid() now must be passed a namespace, the namespace from
- * from super block associated with the given inode is used for this purpose.
- */
-#ifdef HAVE_POSIX_ACL_VALID_WITH_NS
-#define	zpl_posix_acl_valid(ip, acl)  posix_acl_valid(ip->i_sb->s_user_ns, acl)
-#else
-#define	zpl_posix_acl_valid(ip, acl)  posix_acl_valid(acl)
-#endif
-
-#endif /* CONFIG_FS_POSIX_ACL */
-
-/*
- * 2.6.38 API change,
- * The is_owner_or_cap() function was renamed to inode_owner_or_capable().
- */
-#ifdef HAVE_INODE_OWNER_OR_CAPABLE
-#define	zpl_inode_owner_or_capable(ip)		inode_owner_or_capable(ip)
-#else
-#define	zpl_inode_owner_or_capable(ip)		is_owner_or_cap(ip)
-#endif /* HAVE_INODE_OWNER_OR_CAPABLE */
-
-/*
- * 3.19 API change
- * struct access f->f_dentry->d_inode was replaced by accessor function
- * file_inode(f)
- */
-#ifndef HAVE_FILE_INODE
-static inline struct inode *file_inode(const struct file *f)
-{
-	return (f->f_dentry->d_inode);
-}
-#endif /* HAVE_FILE_INODE */
-
-/*
- * 4.1 API change
- * struct access file->f_path.dentry was replaced by accessor function
- * file_dentry(f)
- */
-#ifndef HAVE_FILE_DENTRY
-static inline struct dentry *file_dentry(const struct file *f)
-{
-	return (f->f_path.dentry);
-}
-#endif /* HAVE_FILE_DENTRY */
-
-#ifdef HAVE_KUID_HELPERS
-static inline uid_t zfs_uid_read_impl(struct inode *ip)
-{
-#ifdef HAVE_SUPER_USER_NS
-	return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid));
-#else
-	return (from_kuid(kcred->user_ns, ip->i_uid));
-#endif
-}
-
-static inline uid_t zfs_uid_read(struct inode *ip)
-{
-	return (zfs_uid_read_impl(ip));
-}
-
-static inline gid_t zfs_gid_read_impl(struct inode *ip)
-{
-#ifdef HAVE_SUPER_USER_NS
-	return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid));
-#else
-	return (from_kgid(kcred->user_ns, ip->i_gid));
-#endif
-}
-
-static inline gid_t zfs_gid_read(struct inode *ip)
-{
-	return (zfs_gid_read_impl(ip));
-}
-
-static inline void zfs_uid_write(struct inode *ip, uid_t uid)
-{
-#ifdef HAVE_SUPER_USER_NS
-	ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid);
-#else
-	ip->i_uid = make_kuid(kcred->user_ns, uid);
-#endif
-}
-
-static inline void zfs_gid_write(struct inode *ip, gid_t gid)
-{
-#ifdef HAVE_SUPER_USER_NS
-	ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid);
-#else
-	ip->i_gid = make_kgid(kcred->user_ns, gid);
-#endif
-}
-
-#else
-static inline uid_t zfs_uid_read(struct inode *ip)
-{
-	return (ip->i_uid);
-}
-
-static inline gid_t zfs_gid_read(struct inode *ip)
-{
-	return (ip->i_gid);
-}
-
-static inline void zfs_uid_write(struct inode *ip, uid_t uid)
-{
-	ip->i_uid = uid;
-}
-
-static inline void zfs_gid_write(struct inode *ip, gid_t gid)
-{
-	ip->i_gid = gid;
-}
-#endif
-
-/*
- * 2.6.38 API change
- */
-#ifdef HAVE_FOLLOW_DOWN_ONE
-#define	zpl_follow_down_one(path)		follow_down_one(path)
-#define	zpl_follow_up(path)			follow_up(path)
-#else
-#define	zpl_follow_down_one(path)		follow_down(path)
-#define	zpl_follow_up(path)			follow_up(path)
-#endif
-
-/*
- * 4.9 API change
- */
-#ifndef HAVE_SETATTR_PREPARE
-static inline int
-setattr_prepare(struct dentry *dentry, struct iattr *ia)
-{
-	return (inode_change_ok(dentry->d_inode, ia));
-}
-#endif
-
-/*
- * 4.11 API change
- * These macros are defined by kernel 4.11.  We define them so that the same
- * code builds under kernels < 4.11 and >= 4.11.  The macros are set to 0 so
- * that it will create obvious failures if they are accidentally used when built
- * against a kernel >= 4.11.
- */
-
-#ifndef STATX_BASIC_STATS
-#define	STATX_BASIC_STATS	0
-#endif
-
-#ifndef AT_STATX_SYNC_AS_STAT
-#define	AT_STATX_SYNC_AS_STAT	0
-#endif
-
-/*
- * 4.11 API change
- * 4.11 takes struct path *, < 4.11 takes vfsmount *
- */
-
-#ifdef HAVE_VFSMOUNT_IOPS_GETATTR
-#define	ZPL_GETATTR_WRAPPER(func)					\
-static int								\
-func(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)	\
-{									\
-	struct path path = { .mnt = mnt, .dentry = dentry };		\
-	return func##_impl(&path, stat, STATX_BASIC_STATS,		\
-	    AT_STATX_SYNC_AS_STAT);					\
-}
-#elif defined(HAVE_PATH_IOPS_GETATTR)
-#define	ZPL_GETATTR_WRAPPER(func)					\
-static int								\
-func(const struct path *path, struct kstat *stat, u32 request_mask,	\
-    unsigned int query_flags)						\
-{									\
-	return (func##_impl(path, stat, request_mask, query_flags));	\
-}
-#else
-#error
-#endif
-
-/*
- * 4.9 API change
- * Preferred interface to get the current FS time.
- */
-#if !defined(HAVE_CURRENT_TIME)
-static inline struct timespec
-current_time(struct inode *ip)
-{
-	return (timespec_trunc(current_kernel_time(), ip->i_sb->s_time_gran));
-}
-#endif
-
-/*
- * 4.16 API change
- * Added iversion interface for managing inode version field.
- */
-#ifdef HAVE_INODE_SET_IVERSION
-#include <linux/iversion.h>
-#else
-static inline void
-inode_set_iversion(struct inode *ip, u64 val)
-{
-	ip->i_version = val;
-}
-#endif
-
-/*
- * Returns true when called in the context of a 32-bit system call.
- */
-static inline int
-zpl_is_32bit_api(void)
-{
-#ifdef CONFIG_COMPAT
-#ifdef HAVE_IN_COMPAT_SYSCALL
-	return (in_compat_syscall());
-#else
-	return (is_compat_task());
-#endif
-#else
-	return (BITS_PER_LONG == 32);
-#endif
-}
-
-#endif /* _ZFS_VFS_H */

diff --git a/zfs/include/linux/xattr_compat.h b/zfs/include/linux/xattr_compat.h
deleted file mode 100644
index b1c4293..0000000
--- a/zfs/include/linux/xattr_compat.h
+++ /dev/null

@@ -1,251 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- */
-
-#ifndef _ZFS_XATTR_H
-#define	_ZFS_XATTR_H
-
-#include <linux/posix_acl_xattr.h>
-
-/*
- * 2.6.35 API change,
- * The const keyword was added to the 'struct xattr_handler' in the
- * generic Linux super_block structure.  To handle this we define an
- * appropriate xattr_handler_t typedef which can be used.  This was
- * the preferred solution because it keeps the code clean and readable.
- */
-#ifdef HAVE_CONST_XATTR_HANDLER
-typedef const struct xattr_handler	xattr_handler_t;
-#else
-typedef struct xattr_handler		xattr_handler_t;
-#endif
-
-/*
- * 3.7 API change,
- * Preferred XATTR_NAME_* definitions introduced, these are mapped to
- * the previous definitions for older kernels.
- */
-#ifndef XATTR_NAME_POSIX_ACL_DEFAULT
-#define	XATTR_NAME_POSIX_ACL_DEFAULT	POSIX_ACL_XATTR_DEFAULT
-#endif
-
-#ifndef XATTR_NAME_POSIX_ACL_ACCESS
-#define	XATTR_NAME_POSIX_ACL_ACCESS	POSIX_ACL_XATTR_ACCESS
-#endif
-
-/*
- * 4.5 API change,
- */
-#if defined(HAVE_XATTR_LIST_SIMPLE)
-#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
-static bool								\
-fn(struct dentry *dentry)						\
-{									\
-	return (!!__ ## fn(dentry->d_inode, NULL, 0, NULL, 0));		\
-}
-/*
- * 4.4 API change,
- */
-#elif defined(HAVE_XATTR_LIST_DENTRY)
-#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
-static size_t								\
-fn(struct dentry *dentry, char *list, size_t list_size,			\
-    const char *name, size_t name_len, int type)			\
-{									\
-	return (__ ## fn(dentry->d_inode,				\
-	    list, list_size, name, name_len));				\
-}
-/*
- * 2.6.33 API change,
- */
-#elif defined(HAVE_XATTR_LIST_HANDLER)
-#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
-static size_t								\
-fn(const struct xattr_handler *handler, struct dentry *dentry,		\
-    char *list, size_t list_size, const char *name, size_t name_len)	\
-{									\
-	return (__ ## fn(dentry->d_inode,				\
-	    list, list_size, name, name_len));				\
-}
-/*
- * 2.6.32 API
- */
-#elif defined(HAVE_XATTR_LIST_INODE)
-#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
-static size_t								\
-fn(struct inode *ip, char *list, size_t list_size,			\
-    const char *name, size_t name_len)					\
-{									\
-	return (__ ## fn(ip, list, list_size, name, name_len));		\
-}
-#endif
-
-/*
- * 4.7 API change,
- * The xattr_handler->get() callback was changed to take a both dentry and
- * inode, because the dentry might not be attached to an inode yet.
- */
-#if defined(HAVE_XATTR_GET_DENTRY_INODE)
-#define	ZPL_XATTR_GET_WRAPPER(fn)					\
-static int								\
-fn(const struct xattr_handler *handler, struct dentry *dentry,		\
-    struct inode *inode, const char *name, void *buffer, size_t size)	\
-{									\
-	return (__ ## fn(inode, name, buffer, size));			\
-}
-/*
- * 4.4 API change,
- * The xattr_handler->get() callback was changed to take a xattr_handler,
- * and handler_flags argument was removed and should be accessed by
- * handler->flags.
- */
-#elif defined(HAVE_XATTR_GET_HANDLER)
-#define	ZPL_XATTR_GET_WRAPPER(fn)					\
-static int								\
-fn(const struct xattr_handler *handler, struct dentry *dentry,		\
-    const char *name, void *buffer, size_t size)			\
-{									\
-	return (__ ## fn(dentry->d_inode, name, buffer, size));		\
-}
-/*
- * 2.6.33 API change,
- * The xattr_handler->get() callback was changed to take a dentry
- * instead of an inode, and a handler_flags argument was added.
- */
-#elif defined(HAVE_XATTR_GET_DENTRY)
-#define	ZPL_XATTR_GET_WRAPPER(fn)					\
-static int								\
-fn(struct dentry *dentry, const char *name, void *buffer, size_t size,	\
-    int unused_handler_flags)						\
-{									\
-	return (__ ## fn(dentry->d_inode, name, buffer, size));		\
-}
-/*
- * 2.6.32 API
- */
-#elif defined(HAVE_XATTR_GET_INODE)
-#define	ZPL_XATTR_GET_WRAPPER(fn)					\
-static int								\
-fn(struct inode *ip, const char *name, void *buffer, size_t size)	\
-{									\
-	return (__ ## fn(ip, name, buffer, size));			\
-}
-#endif
-
-/*
- * 4.7 API change,
- * The xattr_handler->set() callback was changed to take a both dentry and
- * inode, because the dentry might not be attached to an inode yet.
- */
-#if defined(HAVE_XATTR_SET_DENTRY_INODE)
-#define	ZPL_XATTR_SET_WRAPPER(fn)					\
-static int								\
-fn(const struct xattr_handler *handler, struct dentry *dentry,		\
-    struct inode *inode, const char *name, const void *buffer,		\
-    size_t size, int flags)						\
-{									\
-	return (__ ## fn(inode, name, buffer, size, flags));		\
-}
-/*
- * 4.4 API change,
- * The xattr_handler->set() callback was changed to take a xattr_handler,
- * and handler_flags argument was removed and should be accessed by
- * handler->flags.
- */
-#elif defined(HAVE_XATTR_SET_HANDLER)
-#define	ZPL_XATTR_SET_WRAPPER(fn)					\
-static int								\
-fn(const struct xattr_handler *handler, struct dentry *dentry,		\
-    const char *name, const void *buffer, size_t size, int flags)	\
-{									\
-	return (__ ## fn(dentry->d_inode, name, buffer, size, flags));	\
-}
-/*
- * 2.6.33 API change,
- * The xattr_handler->set() callback was changed to take a dentry
- * instead of an inode, and a handler_flags argument was added.
- */
-#elif defined(HAVE_XATTR_SET_DENTRY)
-#define	ZPL_XATTR_SET_WRAPPER(fn)					\
-static int								\
-fn(struct dentry *dentry, const char *name, const void *buffer,		\
-    size_t size, int flags, int unused_handler_flags)			\
-{									\
-	return (__ ## fn(dentry->d_inode, name, buffer, size, flags));	\
-}
-/*
- * 2.6.32 API
- */
-#elif defined(HAVE_XATTR_SET_INODE)
-#define	ZPL_XATTR_SET_WRAPPER(fn)					\
-static int								\
-fn(struct inode *ip, const char *name, const void *buffer,		\
-    size_t size, int flags)						\
-{									\
-	return (__ ## fn(ip, name, buffer, size, flags));		\
-}
-#endif
-
-#ifdef HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY
-#define	zpl_security_inode_init_security(ip, dip, qstr, nm, val, len)	\
-	security_inode_init_security(ip, dip, qstr, nm, val, len)
-#else
-#define	zpl_security_inode_init_security(ip, dip, qstr, nm, val, len)	\
-	security_inode_init_security(ip, dip, nm, val, len)
-#endif /* HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY */
-
-/*
- * Linux 3.7 API change. posix_acl_{from,to}_xattr gained the user_ns
- * parameter.  All callers are expected to pass the &init_user_ns which
- * is available through the init credential (kcred).
- */
-#ifdef HAVE_POSIX_ACL_FROM_XATTR_USERNS
-static inline struct posix_acl *
-zpl_acl_from_xattr(const void *value, int size)
-{
-	return (posix_acl_from_xattr(kcred->user_ns, value, size));
-}
-
-static inline int
-zpl_acl_to_xattr(struct posix_acl *acl, void *value, int size)
-{
-	return (posix_acl_to_xattr(kcred->user_ns, acl, value, size));
-}
-
-#else
-
-static inline struct posix_acl *
-zpl_acl_from_xattr(const void *value, int size)
-{
-	return (posix_acl_from_xattr(value, size));
-}
-
-static inline int
-zpl_acl_to_xattr(struct posix_acl *acl, void *value, int size)
-{
-	return (posix_acl_to_xattr(acl, value, size));
-}
-#endif /* HAVE_POSIX_ACL_FROM_XATTR_USERNS */
-
-#endif /* _ZFS_XATTR_H */

diff --git a/zfs/include/os/Makefile.am b/zfs/include/os/Makefile.am
new file mode 100644
index 0000000..7eab1ab
--- /dev/null
+++ b/zfs/include/os/Makefile.am

@@ -0,0 +1,6 @@
+if BUILD_LINUX
+SUBDIRS = linux
+endif
+if BUILD_FREEBSD
+SUBDIRS = freebsd
+endif

diff --git a/zfs/include/os/freebsd/Makefile.am b/zfs/include/os/freebsd/Makefile.am
new file mode 100644
index 0000000..3c87d4a
--- /dev/null
+++ b/zfs/include/os/freebsd/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = linux spl zfs

diff --git a/zfs/include/os/freebsd/linux/Makefile.am b/zfs/include/os/freebsd/linux/Makefile.am
new file mode 100644
index 0000000..00cff7f
--- /dev/null
+++ b/zfs/include/os/freebsd/linux/Makefile.am

@@ -0,0 +1,5 @@
+KERNEL_H = \
+	compiler.h \
+	types.h
+
+noinst_HEADERS = $(KERNEL_H)

diff --git a/zfs/include/os/freebsd/linux/compiler.h b/zfs/include/os/freebsd/linux/compiler.h
new file mode 100644
index 0000000..79f9b2d
--- /dev/null
+++ b/zfs/include/os/freebsd/linux/compiler.h

@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iXsystems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
+ * Copyright (c) 2015 François Tigeot
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef	_LINUX_COMPILER_H_
+#define	_LINUX_COMPILER_H_
+
+#include <sys/cdefs.h>
+
+#define	__user
+#define	__kernel
+#define	__safe
+#define	__force
+#define	__nocast
+#define	__iomem
+#define	__chk_user_ptr(x)		((void)0)
+#define	__chk_io_ptr(x)			((void)0)
+#define	__builtin_warning(x, y...)	(1)
+#define	__acquires(x)
+#define	__releases(x)
+#define	__acquire(x)			do { } while (0)
+#define	__release(x)			do { } while (0)
+#define	__cond_lock(x, c)		(c)
+#define	__bitwise
+#define	__devinitdata
+#define	__deprecated
+#define	__init
+#define	__initconst
+#define	__devinit
+#define	__devexit
+#define	__exit
+#define	__rcu
+#define	__percpu
+#define	__weak __weak_symbol
+#define	__malloc
+#define	___stringify(...)		#__VA_ARGS__
+#define	__stringify(...)		___stringify(__VA_ARGS__)
+#define	__attribute_const__		__attribute__((__const__))
+#undef __always_inline
+#define	__always_inline			inline
+#define	noinline			__noinline
+#define	____cacheline_aligned		__aligned(CACHE_LINE_SIZE)
+#define	fallthrough			__attribute__((__fallthrough__))
+
+#if !defined(_KERNEL) && !defined(_STANDALONE)
+#define	likely(x)			__builtin_expect(!!(x), 1)
+#define	unlikely(x)			__builtin_expect(!!(x), 0)
+#endif
+#define	typeof(x)			__typeof(x)
+
+#define	uninitialized_var(x)		x = x
+#define	__maybe_unused			__unused
+#define	__always_unused			__unused
+#define	__must_check			__result_use_check
+
+#define	__printf(a, b)			__printflike(a, b)
+
+#define	barrier()			__asm__ __volatile__("": : :"memory")
+#define	___PASTE(a, b) a##b
+#define	__PASTE(a, b) ___PASTE(a, b)
+
+#define	ACCESS_ONCE(x)			(*(volatile __typeof(x) *)&(x))
+
+#define	WRITE_ONCE(x, v) do {		\
+	barrier();			\
+	ACCESS_ONCE(x) = (v);		\
+	barrier();			\
+} while (0)
+
+#define	lockless_dereference(p) READ_ONCE(p)
+
+#define	_AT(T, X)	((T)(X))
+
+#endif	/* _LINUX_COMPILER_H_ */

diff --git a/zfs/include/os/freebsd/linux/types.h b/zfs/include/os/freebsd/linux/types.h
new file mode 100644
index 0000000..d290317
--- /dev/null
+++ b/zfs/include/os/freebsd/linux/types.h

@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iXsystems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef	_LINUX_TYPES_H_
+#define	_LINUX_TYPES_H_
+
+#include <linux/compiler.h>
+
+
+#ifndef __bitwise__
+#ifdef __CHECKER__
+#define	__bitwise__ __attribute__((bitwise))
+#else
+#define	__bitwise__
+#endif
+#endif
+
+typedef uint16_t __le16;
+typedef uint16_t __be16;
+typedef uint32_t __le32;
+typedef uint32_t __be32;
+typedef uint64_t __le64;
+typedef uint64_t __be64;
+
+typedef unsigned gfp_t;
+typedef off_t loff_t;
+typedef vm_paddr_t resource_size_t;
+typedef uint16_t __bitwise__ __sum16;
+typedef unsigned long pgoff_t;
+typedef unsigned __poll_t;
+
+typedef uint64_t u64;
+typedef u64 phys_addr_t;
+
+typedef size_t __kernel_size_t;
+
+#define	DECLARE_BITMAP(n, bits)						\
+	unsigned long n[howmany(bits, sizeof (long) * 8)]
+
+typedef unsigned long irq_hw_number_t;
+
+struct rcu_head {
+	void *raw[2];
+} __aligned(sizeof (void *));
+
+typedef void (*rcu_callback_t)(struct rcu_head *head);
+typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+typedef int linux_task_fn_t(void *data);
+
+#endif	/* _LINUX_TYPES_H_ */

diff --git a/zfs/include/os/freebsd/spl/Makefile.am b/zfs/include/os/freebsd/spl/Makefile.am
new file mode 100644
index 0000000..b321825
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = acl rpc sys

diff --git a/zfs/include/os/freebsd/spl/acl/Makefile.am b/zfs/include/os/freebsd/spl/acl/Makefile.am
new file mode 100644
index 0000000..5c0698d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/acl/Makefile.am

@@ -0,0 +1,4 @@
+KERNEL_H = \
+	acl_common.h
+
+noinst_HEADERS = $(KERNEL_H)

diff --git a/zfs/include/os/freebsd/spl/acl/acl_common.h b/zfs/include/os/freebsd/spl/acl/acl_common.h
new file mode 100644
index 0000000..44f5bed
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/acl/acl_common.h

@@ -0,0 +1,67 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#ifndef	_ACL_COMMON_H
+#define	_ACL_COMMON_H
+
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct trivial_acl {
+	uint32_t	allow0;		/* allow mask for bits only in owner */
+	uint32_t	deny1;		/* deny mask for bits not in owner */
+	uint32_t	deny2;		/* deny mask for bits not in group */
+	uint32_t	owner;		/* allow mask matching mode */
+	uint32_t	group;		/* allow mask matching mode */
+	uint32_t	everyone;	/* allow mask matching mode */
+} trivial_acl_t;
+
+extern int acltrivial(const char *);
+extern void adjust_ace_pair(ace_t *pair, mode_t mode);
+extern void adjust_ace_pair_common(void *, size_t, size_t, mode_t);
+extern int ace_trivial_common(void *, int,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt, uint16_t *, uint16_t *,
+    uint32_t *mask));
+#if !defined(_KERNEL)
+extern acl_t *acl_alloc(acl_type_t);
+extern void acl_free(acl_t *aclp);
+extern int acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir,
+    uid_t owner, gid_t group);
+#endif	/* !_KERNEL */
+int cmp2acls(void *a, void *b);
+int acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count);
+void acl_trivial_access_masks(mode_t mode, boolean_t isdir,
+    trivial_acl_t *masks);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _ACL_COMMON_H */

diff --git a/zfs/include/os/freebsd/spl/rpc/Makefile.am b/zfs/include/os/freebsd/spl/rpc/Makefile.am
new file mode 100644
index 0000000..f6faf4b
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/rpc/Makefile.am

@@ -0,0 +1,4 @@
+KERNEL_H = \
+	xdr.h
+
+noinst_HEADERS = $(KERNEL_H)

diff --git a/zfs/include/os/freebsd/spl/rpc/xdr.h b/zfs/include/os/freebsd/spl/rpc/xdr.h
new file mode 100644
index 0000000..c98466e
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/rpc/xdr.h

@@ -0,0 +1,71 @@
+/*
+ * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
+ * unrestricted use provided that this legend is included on all tape
+ * media and as a part of the software program in whole or part.  Users
+ * may copy or modify Sun RPC without charge, but are not authorized
+ * to license or distribute it to anyone else except as part of a product or
+ * program developed by the user.
+ *
+ * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
+ * WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
+ *
+ * Sun RPC is provided with no support and without any obligation on the
+ * part of Sun Microsystems, Inc. to assist in its use, correction,
+ * modification or enhancement.
+ *
+ * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
+ * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
+ * OR ANY PART THEREOF.
+ *
+ * In no event will Sun Microsystems, Inc. be liable for any lost revenue
+ * or profits or other special, indirect and consequential damages, even if
+ * Sun has been advised of the possibility of such damages.
+ *
+ * Sun Microsystems, Inc.
+ * 2550 Garcia Avenue
+ * Mountain View, California  94043
+ */
+
+#ifndef	_OPENSOLARIS_RPC_XDR_H_
+#define	_OPENSOLARIS_RPC_XDR_H_
+
+#include <rpc/types.h>
+#include_next <rpc/xdr.h>
+
+#if !defined(_KERNEL) && !defined(_STANDALONE)
+
+#include <assert.h>
+
+/*
+ * Taken from sys/xdr/xdr_mem.c.
+ *
+ * FreeBSD's userland XDR doesn't implement control method (only the kernel),
+ * but OpenSolaris nvpair still depend on it, so we have to implement it here.
+ */
+static __inline bool_t
+xdrmem_control(XDR *xdrs, int request, void *info)
+{
+	xdr_bytesrec *xptr;
+
+	switch (request) {
+	case XDR_GET_BYTES_AVAIL:
+		xptr = (xdr_bytesrec *)info;
+		xptr->xc_is_last_record = TRUE;
+		xptr->xc_num_avail = xdrs->x_handy;
+		return (TRUE);
+	default:
+		assert(!"unexpected request");
+	}
+	return (FALSE);
+}
+
+#undef XDR_CONTROL
+#define	XDR_CONTROL(xdrs, req, op)					\
+	(((xdrs)->x_ops->x_control == NULL) ?				\
+	    xdrmem_control((xdrs), (req), (op)) :			\
+	    (*(xdrs)->x_ops->x_control)(xdrs, req, op))
+
+#endif	/* !_KERNEL && !_STANDALONE */
+
+#endif	/* !_OPENSOLARIS_RPC_XDR_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/Makefile.am b/zfs/include/os/freebsd/spl/sys/Makefile.am
new file mode 100644
index 0000000..7488e56
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/Makefile.am

@@ -0,0 +1,74 @@
+KERNEL_H = \
+	acl_impl.h \
+	acl.h \
+	atomic.h \
+	byteorder.h \
+	callb.h \
+	ccompat.h \
+	ccompile.h \
+	cmn_err.h \
+	condvar.h \
+	cred.h \
+	ctype.h \
+	debug.h \
+	dirent.h \
+	disp.h \
+	dkio.h \
+	extdirent.h \
+	fcntl.h \
+	file.h \
+	freebsd_rwlock.h \
+	idmap.h \
+	inttypes.h \
+	isa_defs.h \
+	kmem_cache.h \
+	kmem.h \
+	kstat.h \
+	list_impl.h \
+	list.h \
+	lock.h \
+	Makefile.am \
+	misc.h \
+	mod_os.h \
+	mode.h \
+	mount.h \
+	mutex.h \
+	param.h \
+	policy.h \
+	proc.h \
+	processor.h \
+	procfs_list.h \
+	random.h \
+	rwlock.h \
+	sdt.h \
+	sid.h \
+	sig.h \
+	simd_x86.h \
+	simd.h \
+	spl_condvar.h \
+	string.h \
+	strings.h \
+	sunddi.h \
+	sysmacros.h \
+	systeminfo.h \
+	systm.h \
+	taskq.h \
+	thread.h \
+	time.h \
+	timer.h \
+	trace_zfs.h \
+	trace.h \
+	types.h \
+	types32.h \
+	uio.h \
+	uuid.h \
+	vfs.h \
+	vm.h \
+	vmsystm.h \
+	vnode_impl.h \
+	vnode.h \
+	wmsum.h \
+	zmod.h \
+	zone.h
+
+noinst_HEADERS = $(KERNEL_H)

diff --git a/zfs/include/os/freebsd/spl/sys/acl.h b/zfs/include/os/freebsd/spl/sys/acl.h
new file mode 100644
index 0000000..ee50b0a
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/acl.h

@@ -0,0 +1,216 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ *
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2017 RackTop Systems.
+ */
+
+#ifndef _SYS_ACL_H
+#define	_SYS_ACL_H
+
+#include <sys/types.h>
+#include <sys/acl_impl.h>
+
+/*
+ * When compiling OpenSolaris kernel code, this file is included instead of the
+ * FreeBSD one.  Include the original sys/acl.h as well.
+ */
+#undef _SYS_ACL_H
+#include_next <sys/acl.h>
+#define	_SYS_ACL_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	MAX_ACL_ENTRIES		(1024)	/* max entries of each type */
+typedef struct {
+	int		a_type;		/* the type of ACL entry */
+	uid_t		a_id;		/* the entry in -uid or gid */
+	o_mode_t	a_perm;		/* the permission field */
+} aclent_t;
+
+typedef struct ace {
+	uid_t		a_who;		/* uid or gid */
+	uint32_t	a_access_mask;	/* read,write,... */
+	uint16_t	a_flags;	/* see below */
+	uint16_t	a_type;		/* allow or deny */
+} ace_t;
+
+/*
+ * The following are Defined types for an aclent_t.
+ */
+#define	USER_OBJ	(0x01)		/* object owner */
+#define	USER		(0x02)		/* additional users */
+#define	GROUP_OBJ	(0x04)		/* owning group of the object */
+#define	GROUP		(0x08)		/* additional groups */
+#define	CLASS_OBJ	(0x10)		/* file group class and mask entry */
+#define	OTHER_OBJ	(0x20)		/* other entry for the object */
+#define	ACL_DEFAULT	(0x1000)	/* default flag */
+/* default object owner */
+#define	DEF_USER_OBJ	(ACL_DEFAULT | USER_OBJ)
+/* default additional users */
+#define	DEF_USER	(ACL_DEFAULT | USER)
+/* default owning group */
+#define	DEF_GROUP_OBJ	(ACL_DEFAULT | GROUP_OBJ)
+/* default additional groups */
+#define	DEF_GROUP	(ACL_DEFAULT | GROUP)
+/* default mask entry */
+#define	DEF_CLASS_OBJ	(ACL_DEFAULT | CLASS_OBJ)
+/* default other entry */
+#define	DEF_OTHER_OBJ	(ACL_DEFAULT | OTHER_OBJ)
+
+/*
+ * The following are defined for ace_t.
+ */
+#define	ACE_READ_DATA		0x00000001
+#define	ACE_LIST_DIRECTORY	0x00000001
+#define	ACE_WRITE_DATA		0x00000002
+#define	ACE_ADD_FILE		0x00000002
+#define	ACE_APPEND_DATA		0x00000004
+#define	ACE_ADD_SUBDIRECTORY	0x00000004
+#define	ACE_READ_NAMED_ATTRS	0x00000008
+#define	ACE_WRITE_NAMED_ATTRS	0x00000010
+#define	ACE_EXECUTE		0x00000020
+#define	ACE_DELETE_CHILD	0x00000040
+#define	ACE_READ_ATTRIBUTES	0x00000080
+#define	ACE_WRITE_ATTRIBUTES	0x00000100
+#define	ACE_DELETE		0x00010000
+#define	ACE_READ_ACL		0x00020000
+#define	ACE_WRITE_ACL		0x00040000
+#define	ACE_WRITE_OWNER		0x00080000
+#define	ACE_SYNCHRONIZE		0x00100000
+
+#define	ACE_FILE_INHERIT_ACE		0x0001
+#define	ACE_DIRECTORY_INHERIT_ACE	0x0002
+#define	ACE_NO_PROPAGATE_INHERIT_ACE	0x0004
+#define	ACE_INHERIT_ONLY_ACE		0x0008
+#define	ACE_SUCCESSFUL_ACCESS_ACE_FLAG	0x0010
+#define	ACE_FAILED_ACCESS_ACE_FLAG	0x0020
+#define	ACE_IDENTIFIER_GROUP		0x0040
+#define	ACE_INHERITED_ACE		0x0080
+#define	ACE_OWNER			0x1000
+#define	ACE_GROUP			0x2000
+#define	ACE_EVERYONE			0x4000
+
+#define	ACE_ACCESS_ALLOWED_ACE_TYPE	0x0000
+#define	ACE_ACCESS_DENIED_ACE_TYPE	0x0001
+#define	ACE_SYSTEM_AUDIT_ACE_TYPE	0x0002
+#define	ACE_SYSTEM_ALARM_ACE_TYPE	0x0003
+
+#define	ACL_AUTO_INHERIT		0x0001
+#define	ACL_PROTECTED			0x0002
+#define	ACL_DEFAULTED			0x0004
+#define	ACL_FLAGS_ALL			(ACL_AUTO_INHERIT|ACL_PROTECTED| \
+    ACL_DEFAULTED)
+
+/*
+ * These are only applicable in a CIFS context.
+ */
+#define	ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE		0x04
+#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE		0x05
+#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE		0x06
+#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE		0x07
+#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE		0x08
+#define	ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE		0x09
+#define	ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE		0x0A
+#define	ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE	0x0B
+#define	ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE	0x0C
+#define	ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE		0x0D
+#define	ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE		0x0E
+#define	ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE	0x0F
+#define	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE	0x10
+
+#define	ACE_ALL_TYPES	0x001F
+
+typedef struct ace_object {
+	uid_t		a_who;		/* uid or gid */
+	uint32_t	a_access_mask;	/* read,write,... */
+	uint16_t	a_flags;	/* see below */
+	uint16_t	a_type;		/* allow or deny */
+	uint8_t		a_obj_type[16];	/* obj type */
+	uint8_t		a_inherit_obj_type[16];  /* inherit obj */
+} ace_object_t;
+
+#define	ACE_ALL_PERMS	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
+    ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
+    ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
+    ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
+
+#define	ACE_ALL_WRITE_PERMS (ACE_WRITE_DATA|ACE_APPEND_DATA| \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS|ACE_WRITE_ACL| \
+    ACE_WRITE_OWNER|ACE_DELETE|ACE_DELETE_CHILD)
+
+#define	ACE_READ_PERMS	(ACE_READ_DATA|ACE_READ_ACL|ACE_READ_ATTRIBUTES| \
+    ACE_READ_NAMED_ATTRS)
+
+#define	ACE_WRITE_PERMS	(ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES| \
+    ACE_WRITE_NAMED_ATTRS)
+
+#define	ACE_MODIFY_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
+    ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
+    ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_SYNCHRONIZE)
+/*
+ * The following flags are supported by both NFSv4 ACLs and ace_t.
+ */
+#define	ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \
+    ACE_DIRECTORY_INHERIT_ACE | \
+    ACE_NO_PROPAGATE_INHERIT_ACE | \
+    ACE_INHERIT_ONLY_ACE | \
+    ACE_INHERITED_ACE | \
+    ACE_IDENTIFIER_GROUP)
+
+#define	ACE_TYPE_FLAGS		(ACE_OWNER|ACE_GROUP|ACE_EVERYONE| \
+    ACE_IDENTIFIER_GROUP)
+#define	ACE_INHERIT_FLAGS	(ACE_FILE_INHERIT_ACE| ACL_INHERITED_ACE| \
+    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
+
+/* cmd args to acl(2) for aclent_t  */
+#define	GETACL			1
+#define	SETACL			2
+#define	GETACLCNT		3
+
+/* cmd's to manipulate ace acls. */
+#define	ACE_GETACL		4
+#define	ACE_SETACL		5
+#define	ACE_GETACLCNT		6
+
+/* minimal acl entries from GETACLCNT */
+#define	MIN_ACL_ENTRIES		4
+
+extern void aces_from_acl(ace_t *aces, int *nentries, const struct acl *aclp);
+extern int acl_from_aces(struct acl *aclp, const ace_t *aces, int nentries);
+extern void ksort(caddr_t, int, int, int (*)(void *, void *));
+extern int cmp2acls(void *, void *);
+
+extern int acl(const char *path, int cmd, int cnt, void *buf);
+extern int facl(int fd, int cmd, int cnt, void *buf);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_ACL_H */

diff --git a/zfs/include/os/freebsd/spl/sys/acl_impl.h b/zfs/include/os/freebsd/spl/sys/acl_impl.h
new file mode 100644
index 0000000..1efbd6d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/acl_impl.h

@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ACL_IMPL_H
+#define	_SYS_ACL_IMPL_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * acl flags
+ *
+ * ACL_AUTO_INHERIT, ACL_PROTECTED and ACL_DEFAULTED
+ * flags can also be stored in this field.
+ */
+#define	ACL_IS_TRIVIAL	0x10000
+#define	ACL_IS_DIR	0x20000
+
+typedef enum acl_type {
+	ACLENT_T = 0,
+	ACE_T = 1
+} zfs_acl_type_t;
+
+struct acl_info {
+	zfs_acl_type_t acl_type;	/* style of acl */
+	int acl_cnt;			/* number of acl entries */
+	int acl_entry_size;		/* sizeof acl entry */
+	int acl_flags;			/* special flags about acl */
+	void *acl_aclp;			/* the acl */
+};
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_ACL_IMPL_H */

diff --git a/zfs/include/os/freebsd/spl/sys/atomic.h b/zfs/include/os/freebsd/spl/sys/atomic.h
new file mode 100644
index 0000000..01b13fc
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/atomic.h

@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_ATOMIC_H_
+#define	_OPENSOLARIS_SYS_ATOMIC_H_
+
+#ifndef _STANDALONE
+
+#include <sys/types.h>
+#include <machine/atomic.h>
+
+#define	atomic_sub_64	atomic_subtract_64
+
+#if defined(__i386__) && (defined(_KERNEL) || defined(KLD_MODULE))
+#define	I386_HAVE_ATOMIC64
+#endif
+
+#if defined(__i386__) || defined(__amd64__) || defined(__arm__)
+/* No spurious failures from fcmpset. */
+#define	STRONG_FCMPSET
+#endif
+
+#if !defined(__LP64__) && !defined(__mips_n32) && \
+	!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64) && \
+	!defined(HAS_EMULATED_ATOMIC64)
+extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
+extern void atomic_dec_64(volatile uint64_t *target);
+extern uint64_t atomic_swap_64(volatile uint64_t *a, uint64_t value);
+extern uint64_t atomic_load_64(volatile uint64_t *a);
+extern uint64_t atomic_add_64_nv(volatile uint64_t *target, int64_t delta);
+extern uint64_t atomic_cas_64(volatile uint64_t *target, uint64_t cmp,
+    uint64_t newval);
+#endif
+
+#define	membar_consumer()		atomic_thread_fence_acq()
+#define	membar_producer()		atomic_thread_fence_rel()
+
+static __inline uint32_t
+atomic_add_32_nv(volatile uint32_t *target, int32_t delta)
+{
+	return (atomic_fetchadd_32(target, delta) + delta);
+}
+
+static __inline uint_t
+atomic_add_int_nv(volatile uint_t *target, int delta)
+{
+	return (atomic_add_32_nv(target, delta));
+}
+
+static __inline void
+atomic_inc_32(volatile uint32_t *target)
+{
+	atomic_add_32(target, 1);
+}
+
+static __inline uint32_t
+atomic_inc_32_nv(volatile uint32_t *target)
+{
+	return (atomic_add_32_nv(target, 1));
+}
+
+static __inline void
+atomic_dec_32(volatile uint32_t *target)
+{
+	atomic_subtract_32(target, 1);
+}
+
+static __inline uint32_t
+atomic_dec_32_nv(volatile uint32_t *target)
+{
+	return (atomic_add_32_nv(target, -1));
+}
+
+#ifndef __sparc64__
+static inline uint32_t
+atomic_cas_32(volatile uint32_t *target, uint32_t cmp, uint32_t newval)
+{
+#ifdef STRONG_FCMPSET
+	(void) atomic_fcmpset_32(target, &cmp, newval);
+#else
+	uint32_t expected = cmp;
+
+	do {
+		if (atomic_fcmpset_32(target, &cmp, newval))
+			break;
+	} while (cmp == expected);
+#endif
+	return (cmp);
+}
+#endif
+
+#if defined(__LP64__) || defined(__mips_n32) || \
+	defined(ARM_HAVE_ATOMIC64) || defined(I386_HAVE_ATOMIC64) || \
+	defined(HAS_EMULATED_ATOMIC64)
+static __inline void
+atomic_dec_64(volatile uint64_t *target)
+{
+	atomic_subtract_64(target, 1);
+}
+
+static inline uint64_t
+atomic_add_64_nv(volatile uint64_t *target, int64_t delta)
+{
+	return (atomic_fetchadd_64(target, delta) + delta);
+}
+
+#ifndef __sparc64__
+static inline uint64_t
+atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t newval)
+{
+#ifdef STRONG_FCMPSET
+	(void) atomic_fcmpset_64(target, &cmp, newval);
+#else
+	uint64_t expected = cmp;
+
+	do {
+		if (atomic_fcmpset_64(target, &cmp, newval))
+			break;
+	} while (cmp == expected);
+#endif
+	return (cmp);
+}
+#endif
+#endif
+
+static __inline void
+atomic_inc_64(volatile uint64_t *target)
+{
+	atomic_add_64(target, 1);
+}
+
+static __inline uint64_t
+atomic_inc_64_nv(volatile uint64_t *target)
+{
+	return (atomic_add_64_nv(target, 1));
+}
+
+static __inline uint64_t
+atomic_dec_64_nv(volatile uint64_t *target)
+{
+	return (atomic_add_64_nv(target, -1));
+}
+
+#if !defined(COMPAT_32BIT) && defined(__LP64__)
+static __inline void *
+atomic_cas_ptr(volatile void *target, void *cmp,  void *newval)
+{
+	return ((void *)atomic_cas_64((volatile uint64_t *)target,
+	    (uint64_t)cmp, (uint64_t)newval));
+}
+#else
+static __inline void *
+atomic_cas_ptr(volatile void *target, void *cmp,  void *newval)
+{
+	return ((void *)atomic_cas_32((volatile uint32_t *)target,
+	    (uint32_t)cmp, (uint32_t)newval));
+}
+#endif	/* !defined(COMPAT_32BIT) && defined(__LP64__) */
+
+#else /* _STANDALONE */
+/*
+ * sometimes atomic_add_64 is defined, sometimes not, but the
+ * following is always right for the boot loader.
+ */
+#undef atomic_add_64
+#define	atomic_add_64(ptr, val) *(ptr) += val
+#endif /* !_STANDALONE */
+
+#endif	/* !_OPENSOLARIS_SYS_ATOMIC_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/byteorder.h b/zfs/include/os/freebsd/spl/sys/byteorder.h
new file mode 100644
index 0000000..0b3d01e
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/byteorder.h

@@ -0,0 +1,109 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _OPENSOLARIS_SYS_BYTEORDER_H_
+#define	_OPENSOLARIS_SYS_BYTEORDER_H_
+
+#include <sys/endian.h>
+
+/*
+ * Macros to reverse byte order
+ */
+#define	BSWAP_8(x)	((x) & 0xff)
+#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+#define	BMASK_8(x)	((x) & 0xff)
+#define	BMASK_16(x)	((x) & 0xffff)
+#define	BMASK_32(x)	((x) & 0xffffffff)
+#define	BMASK_64(x)	(x)
+
+/*
+ * Macros to convert from a specific byte order to/from native byte order
+ */
+#if BYTE_ORDER == _BIG_ENDIAN
+#define	BE_8(x)		BMASK_8(x)
+#define	BE_16(x)	BMASK_16(x)
+#define	BE_32(x)	BMASK_32(x)
+#define	BE_64(x)	BMASK_64(x)
+#define	LE_8(x)		BSWAP_8(x)
+#define	LE_16(x)	BSWAP_16(x)
+#define	LE_32(x)	BSWAP_32(x)
+#define	LE_64(x)	BSWAP_64(x)
+#else
+#define	LE_8(x)		BMASK_8(x)
+#define	LE_16(x)	BMASK_16(x)
+#define	LE_32(x)	BMASK_32(x)
+#define	LE_64(x)	BMASK_64(x)
+#define	BE_8(x)		BSWAP_8(x)
+#define	BE_16(x)	BSWAP_16(x)
+#define	BE_32(x)	BSWAP_32(x)
+#define	BE_64(x)	BSWAP_64(x)
+#endif
+
+#if !defined(_STANDALONE)
+#if BYTE_ORDER == _BIG_ENDIAN
+#define	htonll(x)	BMASK_64(x)
+#define	ntohll(x)	BMASK_64(x)
+#else /* BYTE_ORDER == _LITTLE_ENDIAN */
+#ifndef __LP64__
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
+}
+#else	/* !__LP64__ */
+#define	htonll(x)	BSWAP_64(x)
+#define	ntohll(x)	BSWAP_64(x)
+#endif	/* __LP64__ */
+#endif	/* BYTE_ORDER */
+#endif	/* _STANDALONE */
+
+#define	BE_IN32(xa)	htonl(*((uint32_t *)(void *)(xa)))
+
+#endif /* _OPENSOLARIS_SYS_BYTEORDER_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/callb.h b/zfs/include/os/freebsd/spl/sys/callb.h
new file mode 100644
index 0000000..cc67b02
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/callb.h

@@ -0,0 +1,213 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_CALLB_H
+#define	_SYS_CALLB_H
+
+#include <sys/condvar.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * definitions of callback classes (c_class)
+ *
+ * Callbacks belong in the same class if (1) their callback routines
+ * do the same kind of processing (ideally, using the same callback function)
+ * and (2) they can/should be executed at the same time in a cpr
+ * suspend/resume operation.
+ *
+ * Note: The DAEMON class, in particular, is for stopping kernel threads
+ * and nothing else.  The CALLB_* macros below should be used to deal
+ * with kernel threads, and the callback function should be callb_generic_cpr.
+ * Another idiosyncrasy of the DAEMON class is that if a suspend operation
+ * fails, some of the callback functions may be called with the RESUME
+ * code which were never called with SUSPEND.  Not a problem currently,
+ * but see bug 4201851.
+ */
+#define	CB_CL_CPR_DAEMON	0
+#define	CB_CL_CPR_VM		1
+#define	CB_CL_CPR_CALLOUT	2
+#define	CB_CL_CPR_OBP		3
+#define	CB_CL_CPR_FB		4
+#define	CB_CL_PANIC		5
+#define	CB_CL_CPR_RPC		6
+#define	CB_CL_CPR_PROMPRINTF	7
+#define	CB_CL_UADMIN		8
+#define	CB_CL_CPR_PM		9
+#define	CB_CL_HALT		10
+#define	CB_CL_CPR_DMA		11
+#define	CB_CL_CPR_POST_USER	12
+#define	CB_CL_UADMIN_PRE_VFS    13
+#define	CB_CL_MDBOOT		CB_CL_UADMIN
+#define	CB_CL_ENTER_DEBUGGER	14
+#define	CB_CL_CPR_POST_KERNEL	15
+#define	CB_CL_CPU_DEEP_IDLE	16
+#define	NCBCLASS		17 /* CHANGE ME if classes are added/removed */
+
+/*
+ * CB_CL_CPR_DAEMON class specific definitions are given below:
+ */
+
+/*
+ * code for CPR callb_execute_class
+ */
+#define	CB_CODE_CPR_CHKPT	0
+#define	CB_CODE_CPR_RESUME	1
+
+typedef	void *		callb_id_t;
+/*
+ * Per kernel thread structure for CPR daemon callbacks.
+ * Must be protected by either a existing lock in the daemon or
+ * a new lock created for such a purpose.
+ */
+typedef struct callb_cpr {
+	kmutex_t	*cc_lockp;	/* lock to protect this struct */
+	char		cc_events;	/* various events for CPR */
+	callb_id_t	cc_id;		/* callb id address */
+	kcondvar_t	cc_callb_cv;	/* cv for callback waiting */
+	kcondvar_t	cc_stop_cv;	/* cv to checkpoint block */
+} callb_cpr_t;
+
+/*
+ * cc_events definitions
+ */
+#define	CALLB_CPR_START		1	/* a checkpoint request's started */
+#define	CALLB_CPR_SAFE		2	/* thread is safe for CPR */
+#define	CALLB_CPR_ALWAYS_SAFE	4	/* thread is ALWAYS safe for CPR */
+
+/*
+ * Used when checking that all kernel threads are stopped.
+ */
+#define	CALLB_MAX_RETRY		3	/* when waiting for kthread to sleep */
+#define	CALLB_THREAD_DELAY	10	/* ticks allowed to reach sleep */
+#define	CPR_KTHREAD_TIMEOUT_SEC	90	/* secs before callback times out -- */
+					/* due to pwr mgmt of disks, make -- */
+					/* big enough for worst spinup time */
+
+/*
+ *
+ * CALLB_CPR_INIT macro is used by kernel threads to add their entry to
+ * the callback table and perform other initialization.  It automatically
+ * adds the thread as being in the callback class CB_CL_CPR_DAEMON.
+ *
+ *	cp    - ptr to the callb_cpr_t structure for this kernel thread
+ *
+ *	lockp - pointer to mutex protecting the callb_cpr_t struct
+ *
+ *	func  - pointer to the callback function for this kernel thread.
+ *		It has the prototype boolean_t <func>(void *arg, int code)
+ *		where: arg	- ptr to the callb_cpr_t structure
+ *		       code	- not used for this type of callback
+ *		returns: B_TRUE if successful; B_FALSE if unsuccessful.
+ *
+ *	name  - a string giving the name of the kernel thread
+ *
+ * Note: lockp is the lock to protect the callb_cpr_t (cp) structure
+ * later on.  No lock held is needed for this initialization.
+ */
+#define	CALLB_CPR_INIT(cp, lockp, func, name)	{			\
+		strlcpy(curthread->td_name, (name),			\
+		    sizeof (curthread->td_name));			\
+		bzero((caddr_t)(cp), sizeof (callb_cpr_t));		\
+		(cp)->cc_lockp = lockp;					\
+		(cp)->cc_id = callb_add(func, (void *)(cp),		\
+			CB_CL_CPR_DAEMON, name);			\
+		cv_init(&(cp)->cc_callb_cv, NULL, CV_DEFAULT, NULL);	\
+		cv_init(&(cp)->cc_stop_cv, NULL, CV_DEFAULT, NULL);	\
+	}
+
+#ifndef __lock_lint
+#define	CALLB_CPR_ASSERT(cp)	ASSERT(MUTEX_HELD((cp)->cc_lockp));
+#else
+#define	CALLB_CPR_ASSERT(cp)
+#endif
+/*
+ * Some threads (like the idle threads) do not adhere to the callback
+ * protocol and are always considered safe.  Such threads must never exit.
+ * They register their presence by calling this macro during their
+ * initialization.
+ *
+ * Args:
+ *	t	- thread pointer of the client kernel thread
+ *	name	- a string giving the name of the kernel thread
+ */
+#define	CALLB_CPR_INIT_SAFE(t, name) {					\
+		(void) callb_add_thread(callb_generic_cpr_safe,		\
+		(void *) &callb_cprinfo_safe, CB_CL_CPR_DAEMON,		\
+		    name, t);						\
+	}
+/*
+ * The lock to protect cp's content must be held before
+ * calling the following two macros.
+ *
+ * Any code region between CALLB_CPR_SAFE_BEGIN and CALLB_CPR_SAFE_END
+ * is safe for checkpoint/resume.
+ */
+#define	CALLB_CPR_SAFE_BEGIN(cp) { 			\
+		CALLB_CPR_ASSERT(cp)			\
+		(cp)->cc_events |= CALLB_CPR_SAFE;	\
+		if ((cp)->cc_events & CALLB_CPR_START)	\
+			cv_signal(&(cp)->cc_callb_cv);	\
+	}
+#define	CALLB_CPR_SAFE_END(cp, lockp) {				\
+		CALLB_CPR_ASSERT(cp)				\
+		while ((cp)->cc_events & CALLB_CPR_START)	\
+			cv_wait(&(cp)->cc_stop_cv, lockp);	\
+		(cp)->cc_events &= ~CALLB_CPR_SAFE;		\
+	}
+/*
+ * cv_destroy is nop right now but may be needed in the future.
+ */
+#define	CALLB_CPR_EXIT(cp) {				\
+		CALLB_CPR_ASSERT(cp)			\
+		(cp)->cc_events |= CALLB_CPR_SAFE;	\
+		if ((cp)->cc_events & CALLB_CPR_START)	\
+			cv_signal(&(cp)->cc_callb_cv);	\
+		mutex_exit((cp)->cc_lockp);		\
+		(void) callb_delete((cp)->cc_id);	\
+		cv_destroy(&(cp)->cc_callb_cv);		\
+		cv_destroy(&(cp)->cc_stop_cv);		\
+	}
+
+extern callb_cpr_t callb_cprinfo_safe;
+extern callb_id_t callb_add(boolean_t  (*)(void *, int), void *, int, char *);
+extern callb_id_t callb_add_thread(boolean_t (*)(void *, int),
+    void *, int, char *, kthread_id_t);
+extern int	callb_delete(callb_id_t);
+extern void	callb_execute(callb_id_t, int);
+extern void	*callb_execute_class(int, int);
+extern boolean_t callb_generic_cpr(void *, int);
+extern boolean_t callb_generic_cpr_safe(void *, int);
+extern boolean_t callb_is_stopped(kthread_id_t, caddr_t *);
+extern void	callb_lock_table(void);
+extern void	callb_unlock_table(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_CALLB_H */

diff --git a/zfs/include/os/freebsd/spl/sys/ccompat.h b/zfs/include/os/freebsd/spl/sys/ccompat.h
new file mode 100644
index 0000000..59abe92
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/ccompat.h

@@ -0,0 +1,153 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS_CCOMPAT_H
+#define	_SYS_CCOMPAT_H
+
+#if  __FreeBSD_version < 1300051
+#define	vm_page_valid(m) (m)->valid = VM_PAGE_BITS_ALL
+#define	vm_page_do_sunbusy(m)
+#define	vm_page_none_valid(m) ((m)->valid == 0)
+#else
+#define	vm_page_do_sunbusy(m) vm_page_sunbusy(m)
+#endif
+
+#if  __FreeBSD_version < 1300074
+#define	VOP_UNLOCK1(x)	VOP_UNLOCK(x, 0)
+#else
+#define	VOP_UNLOCK1(x)	VOP_UNLOCK(x)
+#endif
+
+#if  __FreeBSD_version < 1300064
+#define	VN_IS_DOOMED(vp)	((vp)->v_iflag & VI_DOOMED)
+#endif
+
+#if  __FreeBSD_version < 1300068
+#define	VFS_VOP_VECTOR_REGISTER(x)
+#endif
+
+#if  __FreeBSD_version >= 1300076
+#define	getnewvnode_reserve_()	getnewvnode_reserve()
+#else
+#define	getnewvnode_reserve_()	getnewvnode_reserve(1)
+#endif
+
+#if  __FreeBSD_version < 1300102
+#define	ASSERT_VOP_IN_SEQC(zp)
+#define	MNTK_FPLOOKUP 0
+#define	vn_seqc_write_begin(vp)
+#define	vn_seqc_write_end(vp)
+
+#ifndef VFS_SMR_DECLARE
+#define	VFS_SMR_DECLARE
+#endif
+#ifndef VFS_SMR_ZONE_SET
+#define	VFS_SMR_ZONE_SET(zone)
+#endif
+#endif
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+typedef struct {
+	volatile int counter;
+} atomic_t;
+
+	/* BEGIN CSTYLED */
+#define	hlist_for_each(p, head)                                      \
+	for (p = (head)->first; p; p = (p)->next)
+
+#define	hlist_entry(ptr, type, field)   container_of(ptr, type, field)
+
+#define	container_of(ptr, type, member)                         \
+({                                                              \
+        const __typeof(((type *)0)->member) *__p = (ptr);       \
+        (type *)((uintptr_t)__p - offsetof(type, member));      \
+})
+	/* END CSTYLED */
+
+static inline void
+hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	n->next = h->first;
+	if (h->first != NULL)
+		h->first->pprev = &n->next;
+	WRITE_ONCE(h->first, n);
+	n->pprev = &h->first;
+}
+
+static inline void
+hlist_del(struct hlist_node *n)
+{
+	WRITE_ONCE(*(n->pprev), n->next);
+	if (n->next != NULL)
+		n->next->pprev = n->pprev;
+}
+	/* BEGIN CSTYLED */
+#define	READ_ONCE(x) ({			\
+	__typeof(x) __var = ({		\
+		barrier();		\
+		ACCESS_ONCE(x);		\
+	});				\
+	barrier();			\
+	__var;				\
+})
+
+#define	HLIST_HEAD_INIT { }
+#define	HLIST_HEAD(name) struct hlist_head name = HLIST_HEAD_INIT
+#define	INIT_HLIST_HEAD(head) (head)->first = NULL
+
+#define	INIT_HLIST_NODE(node)					\
+	do {																\
+		(node)->next = NULL;											\
+		(node)->pprev = NULL;											\
+	} while (0)
+
+/* END CSTYLED */
+static inline int
+atomic_read(const atomic_t *v)
+{
+	return (READ_ONCE(v->counter));
+}
+
+static inline int
+atomic_inc(atomic_t *v)
+{
+	return (atomic_fetchadd_int(&v->counter, 1) + 1);
+}
+
+static inline int
+atomic_dec(atomic_t *v)
+{
+	return (atomic_fetchadd_int(&v->counter, -1) - 1);
+}
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/ccompile.h b/zfs/include/os/freebsd/spl/sys/ccompile.h
new file mode 100644
index 0000000..7109d42
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/ccompile.h

@@ -0,0 +1,284 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_CCOMPILE_H
+#define	_SYS_CCOMPILE_H
+
+/*
+ * This file contains definitions designed to enable different compilers
+ * to be used harmoniously on Solaris systems.
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allow for version tests for compiler bugs and features.
+ */
+#if defined(__GNUC__)
+#define	__GNUC_VERSION	\
+	(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#else
+#define	__GNUC_VERSION	0
+#endif
+
+#if defined(__ATTRIBUTE_IMPLEMENTED) || defined(__GNUC__)
+
+#if 0
+/*
+ * analogous to lint's PRINTFLIKEn
+ */
+#define	__sun_attr___PRINTFLIKE__(__n)	\
+		__attribute__((__format__(printf, __n, (__n)+1)))
+#define	__sun_attr___VPRINTFLIKE__(__n)	\
+		__attribute__((__format__(printf, __n, 0)))
+
+#define	__sun_attr___KPRINTFLIKE__	__sun_attr___PRINTFLIKE__
+#define	__sun_attr___KVPRINTFLIKE__	__sun_attr___VPRINTFLIKE__
+#else
+/*
+ * Currently the openzfs codebase has a lot of formatting errors
+ * which are not picked up in the linux build because they're not
+ * doing formatting checks. LLVM's kprintf implementation doesn't
+ * actually do format checks!
+ *
+ * For FreeBSD these break under gcc! LLVM shim'ed cmn_err as a
+ * format attribute but also didn't check anything.  If one
+ * replaces it with the above, all of the format issues
+ * in the codebase show up.
+ *
+ * Once those format string issues are addressed, the above
+ * should be flipped on once again.
+ */
+#define	__sun_attr___PRINTFLIKE__(__n)
+#define	__sun_attr___VPRINTFLIKE__(__n)
+#define	__sun_attr___KPRINTFLIKE__(__n)
+#define	__sun_attr___KVPRINTFLIKE__(__n)
+
+#endif
+
+/*
+ * This one's pretty obvious -- the function never returns
+ */
+#define	__sun_attr___noreturn__ __attribute__((__noreturn__))
+
+/*
+ * This is an appropriate label for functions that do not
+ * modify their arguments, e.g. strlen()
+ */
+#define	__sun_attr___pure__	__attribute__((__pure__))
+
+/*
+ * This is a stronger form of __pure__. Can be used for functions
+ * that do not modify their arguments and don't depend on global
+ * memory.
+ */
+#define	__sun_attr___const__	__attribute__((__const__))
+
+/*
+ * structure packing like #pragma pack(1)
+ */
+#define	__sun_attr___packed__	__attribute__((__packed__))
+
+#define	___sun_attr_inner(__a)	__sun_attr_##__a
+#define	__sun_attr__(__a)	___sun_attr_inner __a
+
+#else	/* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */
+
+#define	__sun_attr__(__a)
+
+#endif	/* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */
+
+/*
+ * Shorthand versions for readability
+ */
+
+#define	__PRINTFLIKE(__n)	__sun_attr__((__PRINTFLIKE__(__n)))
+#define	__VPRINTFLIKE(__n)	__sun_attr__((__VPRINTFLIKE__(__n)))
+#define	__KPRINTFLIKE(__n)	__sun_attr__((__KPRINTFLIKE__(__n)))
+#define	__KVPRINTFLIKE(__n)	__sun_attr__((__KVPRINTFLIKE__(__n)))
+#if	defined(_KERNEL) || defined(_STANDALONE)
+#define	__NORETURN		__sun_attr__((__noreturn__))
+#endif /* _KERNEL || _STANDALONE */
+#define	__CONST			__sun_attr__((__const__))
+#define	__PURE			__sun_attr__((__pure__))
+
+#if defined(INVARIANTS) && !defined(ZFS_DEBUG)
+#define	ZFS_DEBUG
+#undef 	NDEBUG
+#endif
+
+#define	EXPORT_SYMBOL(x)
+#define	MODULE_AUTHOR(s)
+#define	MODULE_DESCRIPTION(s)
+#define	MODULE_LICENSE(s)
+#define	module_param(a, b, c)
+#define	module_param_call(a, b, c, d, e)
+#define	module_param_named(a, b, c, d)
+#define	MODULE_PARM_DESC(a, b)
+#define	asm __asm
+#ifdef ZFS_DEBUG
+#undef NDEBUG
+#endif
+#if !defined(ZFS_DEBUG) && !defined(NDEBUG)
+#define	NDEBUG
+#endif
+
+#ifndef EINTEGRITY
+#define	EINTEGRITY 97 /* EINTEGRITY is new in 13 */
+#endif
+
+/*
+ * These are bespoke errnos used in ZFS. We map them to their closest FreeBSD
+ * equivalents. This gives us more useful error messages from strerror(3).
+ */
+#define	ECKSUM	EINTEGRITY
+#define	EFRAGS	ENOSPC
+
+/* Similar for ENOACTIVE */
+#define	ENOTACTIVE	ECANCELED
+
+#define	EREMOTEIO EREMOTE
+#define	ECHRNG ENXIO
+#define	ETIME ETIMEDOUT
+
+#ifndef LOCORE
+#ifndef HAVE_RPC_TYPES
+typedef int bool_t;
+typedef int enum_t;
+#endif
+#endif
+
+#ifndef __cplusplus
+#define	__init
+#define	__exit
+#endif
+
+#if defined(_KERNEL) || defined(_STANDALONE)
+#define	param_set_charp(a, b) (0)
+#define	ATTR_UID AT_UID
+#define	ATTR_GID AT_GID
+#define	ATTR_MODE AT_MODE
+#define	ATTR_XVATTR	AT_XVATTR
+#define	ATTR_CTIME	AT_CTIME
+#define	ATTR_MTIME	AT_MTIME
+#define	ATTR_ATIME	AT_ATIME
+#if defined(_STANDALONE)
+#define	vmem_free kmem_free
+#define	vmem_zalloc kmem_zalloc
+#define	vmem_alloc kmem_zalloc
+#else
+#define	vmem_free zfs_kmem_free
+#define	vmem_zalloc(size, flags) zfs_kmem_alloc(size, flags | M_ZERO)
+#define	vmem_alloc zfs_kmem_alloc
+#endif
+#define	MUTEX_NOLOCKDEP 0
+#define	RW_NOLOCKDEP 0
+
+#else
+#define	FALSE 0
+#define	TRUE 1
+	/*
+	 * XXX We really need to consolidate on standard
+	 * error codes in the common code
+	 */
+#define	ENOSTR ENOTCONN
+#define	ENODATA EINVAL
+
+
+#define	__BSD_VISIBLE 1
+#ifndef	IN_BASE
+#define	__POSIX_VISIBLE 201808
+#define	__XSI_VISIBLE 1000
+#endif
+#define	ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+#define	mmap64 mmap
+/* Note: this file can be used on linux/macOS when bootstrapping tools. */
+#if defined(__FreeBSD__)
+#define	open64 open
+#define	pwrite64 pwrite
+#define	ftruncate64 ftruncate
+#define	lseek64 lseek
+#define	pread64 pread
+#define	stat64 stat
+#define	lstat64 lstat
+#define	statfs64 statfs
+#define	readdir64 readdir
+#define	dirent64 dirent
+#endif
+#define	P2ALIGN(x, align)		((x) & -(align))
+#define	P2CROSS(x, y, align)	(((x) ^ (y)) > (align) - 1)
+#define	P2ROUNDUP(x, align)		((((x) - 1) | ((align) - 1)) + 1)
+#define	P2PHASE(x, align)		((x) & ((align) - 1))
+#define	P2NPHASE(x, align)		(-(x) & ((align) - 1))
+#define	ISP2(x)			(((x) & ((x) - 1)) == 0)
+#define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
+#define	P2BOUNDARY(off, len, align) \
+	(((off) ^ ((off) + (len) - 1)) > (align) - 1)
+
+/*
+ * Typed version of the P2* macros.  These macros should be used to ensure
+ * that the result is correctly calculated based on the data type of (x),
+ * which is passed in as the last argument, regardless of the data
+ * type of the alignment.  For example, if (x) is of type uint64_t,
+ * and we want to round it up to a page boundary using "PAGESIZE" as
+ * the alignment, we can do either
+ *
+ * P2ROUNDUP(x, (uint64_t)PAGESIZE)
+ * or
+ * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
+ */
+#define	P2ALIGN_TYPED(x, align, type)   \
+	((type)(x) & -(type)(align))
+#define	P2PHASE_TYPED(x, align, type)   \
+	((type)(x) & ((type)(align) - 1))
+#define	P2NPHASE_TYPED(x, align, type)  \
+	(-(type)(x) & ((type)(align) - 1))
+#define	P2ROUNDUP_TYPED(x, align, type) \
+	((((type)(x) - 1) | ((type)(align) - 1)) + 1)
+#define	P2END_TYPED(x, align, type)     \
+	(-(~(type)(x) & -(type)(align)))
+#define	P2PHASEUP_TYPED(x, align, phase, type)  \
+	((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
+#define	P2CROSS_TYPED(x, y, align, type)        \
+	(((type)(x) ^ (type)(y)) > (type)(align) - 1)
+#define	P2SAMEHIGHBIT_TYPED(x, y, type) \
+	(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
+
+#define	DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#define	RLIM64_INFINITY RLIM_INFINITY
+#ifndef HAVE_ERESTART
+#define	ERESTART EAGAIN
+#endif
+#define	ABS(a)	((a) < 0 ? -(a) : (a))
+
+#endif
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_CCOMPILE_H */

diff --git a/zfs/include/os/freebsd/spl/sys/cmn_err.h b/zfs/include/os/freebsd/spl/sys/cmn_err.h
new file mode 100644
index 0000000..ba4cff3
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/cmn_err.h

@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CMN_ERR_H
+#define	_SYS_CMN_ERR_H
+
+#if !defined(_ASM)
+#include <sys/_stdarg.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* Common error handling severity levels */
+
+#define	CE_CONT		0	/* continuation		*/
+#define	CE_NOTE		1	/* notice		*/
+#define	CE_WARN		2	/* warning		*/
+#define	CE_PANIC	3	/* panic		*/
+#define	CE_IGNORE	4	/* print nothing	*/
+
+#ifndef _ASM
+
+/*PRINTFLIKE2*/
+extern void cmn_err(int, const char *, ...)
+    __KPRINTFLIKE(2);
+
+extern void vzcmn_err(zoneid_t, int, const char *, __va_list)
+    __KVPRINTFLIKE(3);
+
+extern void vcmn_err(int, const char *, __va_list)
+    __KVPRINTFLIKE(2);
+
+/*PRINTFLIKE3*/
+extern void zcmn_err(zoneid_t, int, const char *, ...)
+    __KPRINTFLIKE(3);
+
+extern void vzprintf(zoneid_t, const char *, __va_list)
+    __KVPRINTFLIKE(2);
+
+/*PRINTFLIKE2*/
+extern void zprintf(zoneid_t, const char *, ...)
+    __KPRINTFLIKE(2);
+
+extern void vuprintf(const char *, __va_list)
+    __KVPRINTFLIKE(1);
+
+/*PRINTFLIKE1*/
+extern void panic(const char *, ...)
+    __KPRINTFLIKE(1) __NORETURN;
+
+extern void vpanic(const char *, __va_list)
+    __KVPRINTFLIKE(1) __NORETURN;
+
+#endif /* !_ASM */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_CMN_ERR_H */

diff --git a/zfs/include/os/freebsd/spl/sys/condvar.h b/zfs/include/os/freebsd/spl/sys/condvar.h
new file mode 100644
index 0000000..9b1893b
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/condvar.h

@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2013 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_CONDVAR_H_
+#define	_OPENSOLARIS_SYS_CONDVAR_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <sys/spl_condvar.h>
+#include <sys/mutex.h>
+#include <sys/time.h>
+#include <sys/errno.h>
+
+/*
+ * cv_timedwait() is similar to cv_wait() except that it additionally expects
+ * a timeout value specified in ticks.  When woken by cv_signal() or
+ * cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is
+ * returned.
+ *
+ * cv_timedwait_sig() behaves the same as cv_timedwait() but blocks
+ * interruptibly and can be woken by a signal (EINTR, ERESTART).  When
+ * this occurs 0 is returned.
+ *
+ * cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait()
+ * and cv_timedwait_sig() which should be used when waiting for outstanding
+ * IO to complete.  They are responsible for updating the iowait accounting
+ * when this is supported by the platform.
+ *
+ * cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution
+ * versions of cv_timedwait() and cv_timedwait_sig().  They expect the timeout
+ * to be specified as a hrtime_t allowing for timeouts of less than a tick.
+ *
+ * N.B. The return values differ slightly from the illumos implementation
+ * which returns the time remaining, instead of 1, when woken.  They both
+ * return -1 on timeout. Consumers which need to know the time remaining
+ * are responsible for tracking it themselves.
+ */
+
+static __inline sbintime_t
+zfs_nstosbt(int64_t _ns)
+{
+	sbintime_t sb = 0;
+
+#ifdef KASSERT
+	KASSERT(_ns >= 0, ("Negative values illegal for nstosbt: %jd", _ns));
+#endif
+	if (_ns >= SBT_1S) {
+		sb = (_ns / 1000000000) * SBT_1S;
+		_ns = _ns % 1000000000;
+	}
+	/* 9223372037 = ceil(2^63 / 1000000000) */
+	sb += ((_ns * 9223372037ull) + 0x7fffffff) >> 31;
+	return (sb);
+}
+
+
+typedef struct cv	kcondvar_t;
+#define	CALLOUT_FLAG_ABSOLUTE C_ABSOLUTE
+
+typedef enum {
+	CV_DEFAULT,
+	CV_DRIVER
+} kcv_type_t;
+
+#define	zfs_cv_init(cv, name, type, arg)	do {			\
+	const char *_name;						\
+	ASSERT((type) == CV_DEFAULT);					\
+	for (_name = #cv; *_name != '\0'; _name++) {			\
+		if (*_name >= 'a' && *_name <= 'z')			\
+			break;						\
+	}								\
+	if (*_name == '\0')						\
+		_name = #cv;						\
+	cv_init((cv), _name);						\
+} while (0)
+#define	cv_init(cv, name, type, arg)	zfs_cv_init(cv, name, type, arg)
+
+
+static inline int
+cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
+{
+
+	return (_cv_wait_sig(cvp, &(mp)->lock_object) == 0);
+}
+
+static inline int
+cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t timo)
+{
+	int rc;
+
+	timo -= ddi_get_lbolt();
+	if (timo <= 0)
+		return (-1);
+	rc = _cv_timedwait_sbt((cvp), &(mp)->lock_object, \
+	    tick_sbt * (timo), 0, C_HARDCLOCK);
+	if (rc == EWOULDBLOCK)
+		return (-1);
+	return (1);
+}
+
+static inline int
+cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t timo)
+{
+	int rc;
+
+	timo -= ddi_get_lbolt();
+	if (timo <= 0)
+		return (-1);
+	rc = _cv_timedwait_sig_sbt(cvp, &(mp)->lock_object, \
+	    tick_sbt * (timo), 0, C_HARDCLOCK);
+	if (rc == EWOULDBLOCK)
+		return (-1);
+	if (rc == EINTR || rc == ERESTART)
+		return (0);
+
+	return (1);
+}
+
+#define	cv_timedwait_io		cv_timedwait
+#define	cv_timedwait_idle	cv_timedwait
+#define	cv_timedwait_sig_io	cv_timedwait_sig
+#define	cv_wait_io		cv_wait
+#define	cv_wait_io_sig		cv_wait_sig
+#define	cv_wait_idle		cv_wait
+#define	cv_timedwait_io_hires	cv_timedwait_hires
+#define	cv_timedwait_idle_hires cv_timedwait_hires
+
+static inline int
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+	hrtime_t hrtime;
+	int rc;
+
+	ASSERT(tim >= res);
+
+	hrtime = gethrtime();
+	if (flag == 0)
+		tim += hrtime;
+
+	if (hrtime >= tim)
+		return (-1);
+	rc = cv_timedwait_sbt(cvp, mp, zfs_nstosbt(tim),
+	    zfs_nstosbt(res), C_ABSOLUTE);
+
+	if (rc == EWOULDBLOCK)
+		return (-1);
+
+	KASSERT(rc == 0, ("unexpected rc value %d", rc));
+	return (1);
+}
+
+static inline int
+cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+    hrtime_t res, int flag)
+{
+	sbintime_t sbt;
+	hrtime_t hrtime;
+	int rc;
+
+	ASSERT(tim >= res);
+
+	hrtime = gethrtime();
+	if (flag == 0)
+		tim += hrtime;
+
+	if (hrtime >= tim)
+		return (-1);
+
+	sbt = zfs_nstosbt(tim);
+	rc = cv_timedwait_sig_sbt(cvp, mp, sbt, zfs_nstosbt(res), C_ABSOLUTE);
+
+	switch (rc) {
+	case EWOULDBLOCK:
+		return (-1);
+	case EINTR:
+	case ERESTART:
+		return (0);
+	default:
+		KASSERT(rc == 0, ("unexpected rc value %d", rc));
+		return (1);
+	}
+}
+
+#endif	/* _OPENSOLARIS_SYS_CONDVAR_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/cred.h b/zfs/include/os/freebsd/spl/sys/cred.h
new file mode 100644
index 0000000..db986af
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/cred.h

@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#ifndef _SYS_CRED_H
+#define	_SYS_CRED_H
+
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * The credential is an opaque kernel private data structure defined in
+ * <sys/cred_impl.h>.
+ */
+
+typedef struct ucred cred_t;
+
+#define	CRED()		curthread->td_ucred
+
+/*
+ * kcred is used when you need all privileges.
+ */
+#define	kcred	(thread0.td_ucred)
+
+#define	KUID_TO_SUID(x)		(x)
+#define	KGID_TO_SGID(x)		(x)
+#define	crgetuid(cr)		((cr)->cr_uid)
+#define	crgetruid(cr)		((cr)->cr_ruid)
+#define	crgetgid(cr)		((cr)->cr_gid)
+#define	crgetgroups(cr)		((cr)->cr_groups)
+#define	crgetngroups(cr)	((cr)->cr_ngroups)
+#define	crgetzoneid(cr) 	((cr)->cr_prison->pr_id)
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_CRED_H */

diff --git a/zfs/include/os/freebsd/spl/sys/ctype.h b/zfs/include/os/freebsd/spl/sys/ctype.h
new file mode 100644
index 0000000..f225858
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/ctype.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+
+#ifndef _SPL_SYS_CTYPE_H_
+#define	_SPL_SYS_CTYPE_H_
+#include_next <sys/ctype.h>
+
+#define	isalnum(ch)	(isalpha(ch) || isdigit(ch))
+#define	iscntrl(C)	(uchar(C) <= 0x1f || uchar(C) == 0x7f)
+#define	isgraph(C)	((C) >= 0x21 && (C) <= 0x7E)
+/* BEGIN CSTYLED */
+#define	ispunct(C)									 \
+	(((C) >= 0x21 && (C) <= 0x2F) ||				 \
+	 ((C) >= 0x3A && (C) <= 0x40) ||				 \
+	 ((C) >= 0x5B && (C) <= 0x60) ||				 \
+	 ((C) >= 0x7B && (C) <= 0x7E))
+/* END CSTYLED */
+
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/debug.h b/zfs/include/os/freebsd/spl/sys/debug.h
new file mode 100644
index 0000000..1f820bc
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/debug.h

@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Available Solaris debug functions.  All of the ASSERT() macros will be
+ * compiled out when NDEBUG is defined, this is the default behavior for
+ * the SPL.  To enable assertions use the --enable-debug with configure.
+ * The VERIFY() functions are never compiled out and cannot be disabled.
+ *
+ * PANIC()	- Panic the node and print message.
+ * ASSERT()	- Assert X is true, if not panic.
+ * ASSERT3B()	- Assert boolean X OP Y is true, if not panic.
+ * ASSERT3S()	- Assert signed X OP Y is true, if not panic.
+ * ASSERT3U()	- Assert unsigned X OP Y is true, if not panic.
+ * ASSERT3P()	- Assert pointer X OP Y is true, if not panic.
+ * ASSERT0()	- Assert value is zero, if not panic.
+ * VERIFY()	- Verify X is true, if not panic.
+ * VERIFY3B()	- Verify boolean X OP Y is true, if not panic.
+ * VERIFY3S()	- Verify signed X OP Y is true, if not panic.
+ * VERIFY3U()	- Verify unsigned X OP Y is true, if not panic.
+ * VERIFY3P()	- Verify pointer X OP Y is true, if not panic.
+ * VERIFY0()	- Verify value is zero, if not panic.
+ */
+
+#ifndef _SPL_DEBUG_H
+#define	_SPL_DEBUG_H
+
+
+/*
+ * Common DEBUG functionality.
+ */
+int spl_panic(const char *file, const char *func, int line,
+    const char *fmt, ...);
+void spl_dumpstack(void);
+
+#ifndef expect
+#define	expect(expr, value) (__builtin_expect((expr), (value)))
+#endif
+#define	likely(expr)   expect((expr) != 0, 1)
+#define	unlikely(expr) expect((expr) != 0, 0)
+
+/* BEGIN CSTYLED */
+#define	PANIC(fmt, a...)						\
+	spl_panic(__FILE__, __FUNCTION__, __LINE__, fmt, ## a)
+
+#define	VERIFY(cond)										\
+	(void) (unlikely(!(cond)) &&							\
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__,			\
+	    "%s", "VERIFY(" #cond ") failed\n"))
+
+#define	VERIFY3B(LEFT, OP, RIGHT)	do {					\
+		const boolean_t _verify3_left = (boolean_t)(LEFT);	\
+		const boolean_t _verify3_right = (boolean_t)(RIGHT);\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%d " #OP " %d)\n",						\
+		    (boolean_t) (_verify3_left),					\
+		    (boolean_t) (_verify3_right));					\
+	} while (0)
+
+#define	VERIFY3S(LEFT, OP, RIGHT)	do {					\
+		const int64_t _verify3_left = (int64_t)(LEFT);		\
+		const int64_t _verify3_right = (int64_t)(RIGHT);	\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%lld " #OP " %lld)\n",					\
+		    (long long) (_verify3_left),					\
+		    (long long) (_verify3_right));					\
+	} while (0)
+
+#define	VERIFY3U(LEFT, OP, RIGHT)	do {					\
+		const uint64_t _verify3_left = (uint64_t)(LEFT);	\
+		const uint64_t _verify3_right = (uint64_t)(RIGHT);	\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%llu " #OP " %llu)\n",					\
+		    (unsigned long long) (_verify3_left),			\
+		    (unsigned long long) (_verify3_right));			\
+	} while (0)
+
+#define	VERIFY3P(LEFT, OP, RIGHT)	do {					\
+		const uintptr_t _verify3_left = (uintptr_t)(LEFT);	\
+		const uintptr_t _verify3_right = (uintptr_t)(RIGHT);\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%px " #OP " %px)\n",					\
+		    (void *) (_verify3_left),						\
+		    (void *) (_verify3_right));						\
+	} while (0)
+
+#define	VERIFY0(RIGHT)	do {								\
+		const int64_t _verify3_left = (int64_t)(0);			\
+		const int64_t _verify3_right = (int64_t)(RIGHT);	\
+		if (unlikely(!(_verify3_left == _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(0 == " #RIGHT ") "						\
+		    "failed (0 == %lld)\n",							\
+		    (long long) (_verify3_right));					\
+	} while (0)
+#define        CTASSERT_GLOBAL(x)              CTASSERT(x)
+
+/*
+ * Debugging disabled (--disable-debug)
+ */
+#ifdef NDEBUG
+
+#define	ASSERT(x)		((void)0)
+#define	ASSERT3B(x,y,z)		((void)0)
+#define	ASSERT3S(x,y,z)		((void)0)
+#define	ASSERT3U(x,y,z)		((void)0)
+#define	ASSERT3P(x,y,z)		((void)0)
+#define	ASSERT0(x)		((void)0)
+#define	IMPLY(A, B)		((void)0)
+#define	EQUIV(A, B)		((void)0)
+
+/*
+ * Debugging enabled (--enable-debug)
+ */
+#else
+
+#define	ASSERT3B	VERIFY3B
+#define	ASSERT3S	VERIFY3S
+#define	ASSERT3U	VERIFY3U
+#define	ASSERT3P	VERIFY3P
+#define	ASSERT0		VERIFY0
+#define	ASSERT		VERIFY
+#define	IMPLY(A, B) \
+	((void)(likely((!(A)) || (B)) || \
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
+	    "(" #A ") implies (" #B ")")))
+#define	EQUIV(A, B) \
+	((void)(likely(!!(A) == !!(B)) || \
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
+	    "(" #A ") is equivalent to (" #B ")")))
+/* END CSTYLED */
+
+#endif /* NDEBUG */
+
+#endif /* SPL_DEBUG_H */

diff --git a/zfs/include/os/freebsd/spl/sys/dirent.h b/zfs/include/os/freebsd/spl/sys/dirent.h
new file mode 100644
index 0000000..2403766
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/dirent.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_DIRENT_H_
+#define	_OPENSOLARIS_SYS_DIRENT_H_
+
+#include <sys/types.h>
+
+#include_next <sys/dirent.h>
+
+typedef	struct dirent	dirent64_t;
+typedef ino_t		ino64_t;
+
+#define	dirent64	dirent
+
+#define	d_ino	d_fileno
+
+#define	DIRENT64_RECLEN(len)	_GENERIC_DIRLEN(len)
+
+#endif	/* !_OPENSOLARIS_SYS_DIRENT_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/disp.h b/zfs/include/os/freebsd/spl/sys/disp.h
new file mode 100644
index 0000000..2be1b76
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/disp.h

@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013 Andriy Gapon
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_DISP_H_
+#define	_OPENSOLARIS_SYS_DISP_H_
+
+#include <sys/proc.h>
+
+#define	kpreempt(x)	kern_yield(PRI_USER)
+
+#endif	/* _OPENSOLARIS_SYS_DISP_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/dkio.h b/zfs/include/os/freebsd/spl/sys/dkio.h
new file mode 100644
index 0000000..aed54ba
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/dkio.h

@@ -0,0 +1,494 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * $FreeBSD$
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _OPENSOLARIS_SYS_DKIO_H_
+#define	_OPENSOLARIS_SYS_DKIO_H_
+
+#include <sys/types.h>	/* Needed for NDKMAP define */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#if defined(_SUNOS_VTOC_16)
+#define	NDKMAP		16		/* # of logical partitions */
+#define	DK_LABEL_LOC	1		/* location of disk label */
+#elif defined(_SUNOS_VTOC_8)
+#define	NDKMAP		8		/* # of logical partitions */
+#define	DK_LABEL_LOC	0		/* location of disk label */
+#else
+#error "No VTOC format defined."
+#endif
+
+/*
+ * Structures and definitions for disk io control commands
+ */
+
+/*
+ * Structures used as data by ioctl calls.
+ */
+
+#define	DK_DEVLEN	16		/* device name max length, including */
+					/* unit # & NULL (ie - "xyc1") */
+
+/*
+ * Used for controller info
+ */
+struct dk_cinfo {
+	char	dki_cname[DK_DEVLEN];	/* controller name (no unit #) */
+	ushort_t dki_ctype;		/* controller type */
+	ushort_t dki_flags;		/* flags */
+	ushort_t dki_cnum;		/* controller number */
+	uint_t	dki_addr;		/* controller address */
+	uint_t	dki_space;		/* controller bus type */
+	uint_t	dki_prio;		/* interrupt priority */
+	uint_t	dki_vec;		/* interrupt vector */
+	char	dki_dname[DK_DEVLEN];	/* drive name (no unit #) */
+	uint_t	dki_unit;		/* unit number */
+	ushort_t dki_partition;		/* partition number */
+	ushort_t dki_maxtransfer;	/* max. transfer size in DEV_BSIZE */
+};
+
+/*
+ * Controller types
+ */
+#define	DKC_UNKNOWN	0
+#define	DKC_CDROM	1	/* CD-ROM, SCSI or otherwise */
+#define	DKC_WDC2880	2
+#define	DKC_XXX_0	3	/* unassigned */
+#define	DKC_XXX_1	4	/* unassigned */
+#define	DKC_DSD5215	5
+#define	DKC_ACB4000	7
+#define	DKC_MD21	8
+#define	DKC_XXX_2	9	/* unassigned */
+#define	DKC_NCRFLOPPY	10
+#define	DKC_SMSFLOPPY	12
+#define	DKC_SCSI_CCS	13	/* SCSI CCS compatible */
+#define	DKC_INTEL82072	14	/* native floppy chip */
+#define	DKC_MD		16	/* meta-disk (virtual-disk) driver */
+#define	DKC_INTEL82077	19	/* 82077 floppy disk controller */
+#define	DKC_DIRECT	20	/* Intel direct attached device i.e. IDE */
+#define	DKC_PCMCIA_MEM	21	/* PCMCIA memory disk-like type */
+#define	DKC_PCMCIA_ATA	22	/* PCMCIA AT Attached type */
+#define	DKC_VBD		23	/* virtual block device */
+
+/*
+ * Sun reserves up through 1023
+ */
+
+#define	DKC_CUSTOMER_BASE	1024
+
+/*
+ * Flags
+ */
+#define	DKI_BAD144	0x01	/* use DEC std 144 bad sector fwding */
+#define	DKI_MAPTRK	0x02	/* controller does track mapping */
+#define	DKI_FMTTRK	0x04	/* formats only full track at a time */
+#define	DKI_FMTVOL	0x08	/* formats only full volume at a time */
+#define	DKI_FMTCYL	0x10	/* formats only full cylinders at a time */
+#define	DKI_HEXUNIT	0x20	/* unit number is printed as 3 hex digits */
+#define	DKI_PCMCIA_PFD	0x40	/* PCMCIA pseudo-floppy memory card */
+
+/*
+ * partition headers:  section 1
+ * Returned in struct dk_allmap by ioctl DKIOC[SG]APART (dkio(7I))
+ */
+struct dk_map {
+	uint64_t	dkl_cylno;	/* starting cylinder */
+	uint64_t	dkl_nblk;	/* number of blocks;  if == 0, */
+					/* partition is undefined */
+};
+
+/*
+ * Used for all partitions
+ */
+struct dk_allmap {
+	struct dk_map	dka_map[NDKMAP];
+};
+
+#if defined(_SYSCALL32)
+struct dk_allmap32 {
+	struct dk_map32	dka_map[NDKMAP];
+};
+#endif /* _SYSCALL32 */
+
+/*
+ * Definition of a disk's geometry
+ */
+struct dk_geom {
+	unsigned short	dkg_ncyl;	/* # of data cylinders */
+	unsigned short	dkg_acyl;	/* # of alternate cylinders */
+	unsigned short	dkg_bcyl;	/* cyl offset (for fixed head area) */
+	unsigned short	dkg_nhead;	/* # of heads */
+	unsigned short	dkg_obs1;	/* obsolete */
+	unsigned short	dkg_nsect;	/* # of data sectors per track */
+	unsigned short	dkg_intrlv;	/* interleave factor */
+	unsigned short	dkg_obs2;	/* obsolete */
+	unsigned short	dkg_obs3;	/* obsolete */
+	unsigned short	dkg_apc;	/* alternates per cyl (SCSI only) */
+	unsigned short	dkg_rpm;	/* revolutions per minute */
+	unsigned short	dkg_pcyl;	/* # of physical cylinders */
+	unsigned short	dkg_write_reinstruct;	/* # sectors to skip, writes */
+	unsigned short	dkg_read_reinstruct;	/* # sectors to skip, reads */
+	unsigned short	dkg_extra[7];	/* for compatible expansion */
+};
+
+/*
+ * These defines are for historic compatibility with old drivers.
+ */
+#define	dkg_bhead	dkg_obs1	/* used to be head offset */
+#define	dkg_gap1	dkg_obs2	/* used to be gap1 */
+#define	dkg_gap2	dkg_obs3	/* used to be gap2 */
+
+/*
+ * Disk io control commands
+ * Warning: some other ioctls with the DIOC prefix exist elsewhere.
+ * The Generic DKIOC numbers are from	0   -  50.
+ *	The Floppy Driver uses		51  - 100.
+ *	The Hard Disk (except SCSI)	101 - 106.	(these are obsolete)
+ *	The CDROM Driver		151 - 200.
+ *	The USCSI ioctl			201 - 250.
+ */
+#define	DKIOC		(0x04 << 8)
+
+/*
+ * The following ioctls are generic in nature and need to be
+ * supported as appropriate by all disk drivers
+ */
+#define	DKIOCGGEOM	(DKIOC|1)		/* Get geometry */
+#define	DKIOCINFO	(DKIOC|3)		/* Get info */
+#define	DKIOCEJECT	(DKIOC|6)		/* Generic 'eject' */
+#define	DKIOCGVTOC	(DKIOC|11)		/* Get VTOC */
+#define	DKIOCSVTOC	(DKIOC|12)		/* Set VTOC & Write to Disk */
+
+/*
+ * Disk Cache Controls.  These ioctls should be supported by
+ * all disk drivers.
+ *
+ * DKIOCFLUSHWRITECACHE when used from user-mode ignores the ioctl
+ * argument, but it should be passed as NULL to allow for future
+ * reinterpretation.  From user-mode, this ioctl request is synchronous.
+ *
+ * When invoked from within the kernel, the arg can be NULL to indicate
+ * a synchronous request or can be the address of a struct dk_callback
+ * to request an asynchronous callback when the flush request is complete.
+ * In this case, the flag to the ioctl must include FKIOCTL and the
+ * dkc_callback field of the pointed to struct must be non-null or the
+ * request is made synchronously.
+ *
+ * In the callback case: if the ioctl returns 0, a callback WILL be performed.
+ * If the ioctl returns non-zero, a callback will NOT be performed.
+ * NOTE: In some cases, the callback may be done BEFORE the ioctl call
+ * returns.  The caller's locking strategy should be prepared for this case.
+ */
+#define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
+
+struct dk_callback {
+	void (*dkc_callback)(void *dkc_cookie, int error);
+	void *dkc_cookie;
+	int dkc_flag;
+};
+
+/* bit flag definitions for dkc_flag */
+#define	FLUSH_VOLATILE		0x1	/* Bit 0: if set, only flush */
+					/* volatile cache; otherwise, flush */
+					/* volatile and non-volatile cache */
+
+#define	DKIOCGETWCE		(DKIOC|36)	/* Get current write cache */
+						/* enablement status */
+#define	DKIOCSETWCE		(DKIOC|37)	/* Enable/Disable write cache */
+
+/*
+ * The following ioctls are used by Sun drivers to communicate
+ * with their associated format routines. Support of these ioctls
+ * is not required of foreign drivers
+ */
+#define	DKIOCSGEOM	(DKIOC|2)		/* Set geometry */
+#define	DKIOCSAPART	(DKIOC|4)		/* Set all partitions */
+#define	DKIOCGAPART	(DKIOC|5)		/* Get all partitions */
+#define	DKIOCG_PHYGEOM	(DKIOC|32)		/* get physical geometry */
+#define	DKIOCG_VIRTGEOM	(DKIOC|33)		/* get virtual geometry */
+
+/*
+ * The following ioctl's are removable media support
+ */
+#define	DKIOCLOCK	(DKIOC|7)	/* Generic 'lock' */
+#define	DKIOCUNLOCK	(DKIOC|8)	/* Generic 'unlock' */
+#define	DKIOCSTATE	(DKIOC|13)	/* Inquire insert/eject state */
+#define	DKIOCREMOVABLE	(DKIOC|16)	/* is media removable */
+
+
+/*
+ * ioctl for hotpluggable devices
+ */
+#define	DKIOCHOTPLUGGABLE	(DKIOC|35)	/* is hotpluggable */
+
+/*
+ * Ioctl to force driver to re-read the alternate partition and rebuild
+ * the internal defect map.
+ */
+#define	DKIOCADDBAD	(DKIOC|20)	/* Re-read the alternate map (IDE) */
+#define	DKIOCGETDEF	(DKIOC|21)	/* read defect list (IDE)	   */
+
+/*
+ * Used by applications to get disk defect information from IDE
+ * drives.
+ */
+#ifdef _SYSCALL32
+struct defect_header32 {
+	int		head;
+	caddr32_t	buffer;
+};
+#endif /* _SYSCALL32 */
+
+struct defect_header {
+	int		head;
+	caddr_t		buffer;
+};
+
+#define	DKIOCPARTINFO	(DKIOC|22)	/* Get partition or slice parameters */
+
+/*
+ * Used by applications to get partition or slice information
+ */
+#ifdef _SYSCALL32
+struct part_info32 {
+	uint32_t	p_start;
+	int		p_length;
+};
+#endif /* _SYSCALL32 */
+
+struct part_info {
+	uint64_t	p_start;
+	int		p_length;
+};
+
+/* The following ioctls are for Optical Memory Device */
+#define	DKIOC_EBP_ENABLE  (DKIOC|40)	/* enable by pass erase on write */
+#define	DKIOC_EBP_DISABLE (DKIOC|41)	/* disable by pass erase on write */
+
+/*
+ * This state enum is the argument passed to the DKIOCSTATE ioctl.
+ */
+enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE };
+
+#define	DKIOCGMEDIAINFO	(DKIOC|42)	/* get information about the media */
+
+/*
+ * ioctls to read/write mboot info.
+ */
+#define	DKIOCGMBOOT	(DKIOC|43)	/* get mboot info */
+#define	DKIOCSMBOOT	(DKIOC|44)	/* set mboot info */
+
+/*
+ * ioctl to get the device temperature.
+ */
+#define	DKIOCGTEMPERATURE	(DKIOC|45)	/* get temperature */
+
+/*
+ * Used for providing the temperature.
+ */
+
+struct	dk_temperature	{
+	uint_t		dkt_flags;	/* Flags */
+	short		dkt_cur_temp;	/* Current disk temperature */
+	short		dkt_ref_temp;	/* reference disk temperature */
+};
+
+#define	DKT_BYPASS_PM		0x1
+#define	DKT_INVALID_TEMP	0xFFFF
+
+
+/*
+ * Media types or profiles known
+ */
+#define	DK_UNKNOWN		0x00	/* Media inserted - type unknown */
+
+
+/*
+ * SFF 8090 Specification Version 3, media types 0x01 - 0xfffe are retained to
+ * maintain compatibility with SFF8090.  The following define the
+ * optical media type.
+ */
+#define	DK_REMOVABLE_DISK	0x02 /* Removable Disk */
+#define	DK_MO_ERASABLE		0x03 /* MO Erasable */
+#define	DK_MO_WRITEONCE		0x04 /* MO Write once */
+#define	DK_AS_MO		0x05 /* AS MO */
+#define	DK_CDROM		0x08 /* CDROM */
+#define	DK_CDR			0x09 /* CD-R */
+#define	DK_CDRW			0x0A /* CD-RW */
+#define	DK_DVDROM		0x10 /* DVD-ROM */
+#define	DK_DVDR			0x11 /* DVD-R */
+#define	DK_DVDRAM		0x12 /* DVD_RAM or DVD-RW */
+
+/*
+ * Media types for other rewritable magnetic media
+ */
+#define	DK_FIXED_DISK		0x10001	/* Fixed disk SCSI or otherwise */
+#define	DK_FLOPPY		0x10002 /* Floppy media */
+#define	DK_ZIP			0x10003 /* IOMEGA ZIP media */
+#define	DK_JAZ			0x10004 /* IOMEGA JAZ media */
+
+#define	DKIOCSETEFI	(DKIOC|17)		/* Set EFI info */
+#define	DKIOCGETEFI	(DKIOC|18)		/* Get EFI info */
+
+#define	DKIOCPARTITION	(DKIOC|9)		/* Get partition info */
+
+/*
+ * Ioctls to get/set volume capabilities related to Logical Volume Managers.
+ * They include the ability to get/set capabilities and to issue a read to a
+ * specific underlying device of a replicated device.
+ */
+
+#define	DKIOCGETVOLCAP	(DKIOC | 25)	/* Get volume capabilities */
+#define	DKIOCSETVOLCAP	(DKIOC | 26)	/* Set volume capabilities */
+#define	DKIOCDMR	(DKIOC | 27)	/* Issue a directed read */
+
+typedef uint_t volcapinfo_t;
+
+typedef uint_t volcapset_t;
+
+#define	DKV_ABR_CAP 0x00000001		/* Support Appl.Based Recovery */
+#define	DKV_DMR_CAP 0x00000002		/* Support Directed  Mirror Read */
+
+typedef struct volcap {
+	volcapinfo_t vc_info;	/* Capabilities available */
+	volcapset_t vc_set;	/* Capabilities set */
+} volcap_t;
+
+#define	VOL_SIDENAME 256
+
+typedef struct vol_directed_rd {
+	int		vdr_flags;
+	offset_t	vdr_offset;
+	size_t		vdr_nbytes;
+	size_t		vdr_bytesread;
+	void		*vdr_data;
+	int		vdr_side;
+	char		vdr_side_name[VOL_SIDENAME];
+} vol_directed_rd_t;
+
+#define	DKV_SIDE_INIT		(-1)
+#define	DKV_DMR_NEXT_SIDE	0x00000001
+#define	DKV_DMR_DONE		0x00000002
+#define	DKV_DMR_ERROR		0x00000004
+#define	DKV_DMR_SUCCESS		0x00000008
+#define	DKV_DMR_SHORT		0x00000010
+
+#ifdef _MULTI_DATAMODEL
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+typedef struct vol_directed_rd32 {
+	int32_t		vdr_flags;
+	offset_t	vdr_offset;	/* 64-bit element on 32-bit alignment */
+	size32_t	vdr_nbytes;
+	size32_t	vdr_bytesread;
+	caddr32_t	vdr_data;
+	int32_t		vdr_side;
+	char		vdr_side_name[VOL_SIDENAME];
+} vol_directed_rd32_t;
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+#endif	/* _MULTI_DATAMODEL */
+
+/*
+ * The ioctl is used to fetch disk's device type, vendor ID,
+ * model number/product ID, firmware revision and serial number together.
+ *
+ * Currently there are two device types - DKD_ATA_TYPE which means the
+ * disk is driven by cmdk/ata or dad/uata driver, and DKD_SCSI_TYPE
+ * which means the disk is driven by sd/scsi hba driver.
+ */
+#define	DKIOC_GETDISKID	(DKIOC|46)
+
+/* These two labels are for dkd_dtype of dk_disk_id_t */
+#define	DKD_ATA_TYPE	0x01 /* ATA disk or legacy mode SATA disk */
+#define	DKD_SCSI_TYPE	0x02 /* SCSI disk or native mode SATA disk */
+
+#define	DKD_ATA_MODEL	40	/* model number length */
+#define	DKD_ATA_FWVER	8	/* firmware revision length */
+#define	DKD_ATA_SERIAL	20	/* serial number length */
+
+#define	DKD_SCSI_VENDOR	8	/* vendor ID length */
+#define	DKD_SCSI_PRODUCT 16	/* product ID length */
+#define	DKD_SCSI_REVLEVEL 4	/* revision level length */
+#define	DKD_SCSI_SERIAL 12	/* serial number length */
+
+/*
+ * The argument type for DKIOC_GETDISKID ioctl.
+ */
+typedef struct dk_disk_id {
+	uint_t	dkd_dtype;
+	union {
+		struct {
+			char dkd_amodel[DKD_ATA_MODEL];		/* 40 bytes */
+			char dkd_afwver[DKD_ATA_FWVER];		/* 8 bytes */
+			char dkd_aserial[DKD_ATA_SERIAL];	/* 20 bytes */
+		} ata_disk_id;
+		struct {
+			char dkd_svendor[DKD_SCSI_VENDOR];	/* 8 bytes */
+			char dkd_sproduct[DKD_SCSI_PRODUCT];	/* 16 bytes */
+			char dkd_sfwver[DKD_SCSI_REVLEVEL];	/* 4 bytes */
+			char dkd_sserial[DKD_SCSI_SERIAL];	/* 12 bytes */
+		} scsi_disk_id;
+	} disk_id;
+} dk_disk_id_t;
+
+/*
+ * The ioctl is used to update the firmware of device.
+ */
+#define	DKIOC_UPDATEFW		(DKIOC|47)
+
+/* The argument type for DKIOC_UPDATEFW ioctl */
+typedef struct dk_updatefw {
+	caddr_t		dku_ptrbuf;	/* pointer to firmware buf */
+	uint_t		dku_size;	/* firmware buf length */
+	uint8_t		dku_type;	/* firmware update type */
+} dk_updatefw_t;
+
+#ifdef _SYSCALL32
+typedef struct dk_updatefw_32 {
+	caddr32_t	dku_ptrbuf;	/* pointer to firmware buf */
+	uint_t		dku_size;	/* firmware buf length */
+	uint8_t		dku_type;	/* firmware update type */
+} dk_updatefw_32_t;
+#endif /* _SYSCALL32 */
+
+/*
+ * firmware update type - temporary or permanent use
+ */
+#define	FW_TYPE_TEMP	0x0		/* temporary use */
+#define	FW_TYPE_PERM	0x1		/* permanent use */
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _OPENSOLARIS_SYS_DKIO_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/extdirent.h b/zfs/include/os/freebsd/spl/sys/extdirent.h
new file mode 100644
index 0000000..b22e8e8
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/extdirent.h

@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_EXTDIRENT_H
+#define	_SYS_EXTDIRENT_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/dirent.h>
+
+/*
+ * Extended file-system independent directory entry.  This style of
+ * dirent provides additional informational flag bits for each
+ * directory entry.  This dirent will be returned instead of the
+ * standard dirent if a VOP_READDIR() requests dirent flags via
+ * V_RDDIR_ENTFLAGS, and if the file system supports the flags.
+ */
+typedef struct edirent {
+	ino64_t		ed_ino;		/* "inode number" of entry */
+	off64_t		ed_off;		/* offset of disk directory entry */
+	uint32_t	ed_eflags;	/* per-entry flags */
+	unsigned short	ed_reclen;	/* length of this record */
+	char		ed_name[1];	/* name of file */
+} edirent_t;
+
+#define	EDIRENT_RECLEN(namelen)	\
+	((offsetof(edirent_t, ed_name[0]) + 1 + (namelen) + 7) & ~ 7)
+#define	EDIRENT_NAMELEN(reclen)	\
+	((reclen) - (offsetof(edirent_t, ed_name[0])))
+
+/*
+ * Extended entry flags
+ *	Extended entries include a bitfield of extra information
+ *	regarding that entry.
+ */
+#define	ED_CASE_CONFLICT  0x10  /* Disconsidering case, entry is not unique */
+
+/*
+ * Extended flags accessor function
+ */
+#define	ED_CASE_CONFLICTS(x)	((x)->ed_eflags & ED_CASE_CONFLICT)
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_EXTDIRENT_H */

diff --git a/zfs/include/os/freebsd/spl/sys/fcntl.h b/zfs/include/os/freebsd/spl/sys/fcntl.h
new file mode 100644
index 0000000..4301d6e
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/fcntl.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SPL_SYS_FCNTL_H_
+#define	_SPL_SYS_FCNTL_H_
+
+#include_next <sys/fcntl.h>
+
+#define	O_LARGEFILE	0
+#define	O_RSYNC		0
+
+#ifndef O_DSYNC
+#define	O_DSYNC		0
+#endif
+
+#endif	/* _SPL_SYS_FCNTL_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/file.h b/zfs/include/os/freebsd/spl/sys/file.h
new file mode 100644
index 0000000..51e59b1
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/file.h

@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_FILE_H_
+#define	_OPENSOLARIS_SYS_FILE_H_
+
+#include <sys/refcount.h>
+#include_next <sys/file.h>
+
+#define	FKIOCTL	0x80000000	/* ioctl addresses are from kernel */
+
+typedef	struct file	file_t;
+
+#include <sys/capsicum.h>
+
+static __inline file_t *
+getf_caps(int fd, cap_rights_t *rightsp)
+{
+	struct file *fp;
+
+	if (fget(curthread, fd, rightsp, &fp) == 0)
+		return (fp);
+	return (NULL);
+}
+
+#endif	/* !_OPENSOLARIS_SYS_FILE_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/freebsd_rwlock.h b/zfs/include/os/freebsd/spl/sys/freebsd_rwlock.h
new file mode 100644
index 0000000..b760f8c
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/freebsd_rwlock.h

@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_FREEBSD_RWLOCK_H_
+#define	_OPENSOLARIS_SYS_FREEBSD_RWLOCK_H_
+
+#include_next <sys/rwlock.h>
+
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/idmap.h b/zfs/include/os/freebsd/spl/sys/idmap.h
new file mode 100644
index 0000000..39eeb90
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/idmap.h

@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_IDMAP_H
+#define	_SYS_IDMAP_H
+
+
+/* Idmap status codes */
+#define	IDMAP_SUCCESS			0
+#define	IDMAP_NEXT			1
+#define	IDMAP_ERR_OTHER			-10000
+#define	IDMAP_ERR_INTERNAL		-9999
+#define	IDMAP_ERR_MEMORY		-9998
+#define	IDMAP_ERR_NORESULT		-9997
+#define	IDMAP_ERR_NOTUSER		-9996
+#define	IDMAP_ERR_NOTGROUP		-9995
+#define	IDMAP_ERR_NOTSUPPORTED		-9994
+#define	IDMAP_ERR_W2U_NAMERULE		-9993
+#define	IDMAP_ERR_U2W_NAMERULE		-9992
+#define	IDMAP_ERR_CACHE			-9991
+#define	IDMAP_ERR_DB			-9990
+#define	IDMAP_ERR_ARG			-9989
+#define	IDMAP_ERR_SID			-9988
+#define	IDMAP_ERR_IDTYPE		-9987
+#define	IDMAP_ERR_RPC_HANDLE		-9986
+#define	IDMAP_ERR_RPC			-9985
+#define	IDMAP_ERR_CLIENT_HANDLE		-9984
+#define	IDMAP_ERR_BUSY			-9983
+#define	IDMAP_ERR_PERMISSION_DENIED	-9982
+#define	IDMAP_ERR_NOMAPPING		-9981
+#define	IDMAP_ERR_NEW_ID_ALLOC_REQD	-9980
+#define	IDMAP_ERR_DOMAIN		-9979
+#define	IDMAP_ERR_SECURITY		-9978
+#define	IDMAP_ERR_NOTFOUND		-9977
+#define	IDMAP_ERR_DOMAIN_NOTFOUND	-9976
+#define	IDMAP_ERR_UPDATE_NOTALLOWED	-9975
+#define	IDMAP_ERR_CFG			-9974
+#define	IDMAP_ERR_CFG_CHANGE		-9973
+#define	IDMAP_ERR_NOTMAPPED_WELLKNOWN	-9972
+#define	IDMAP_ERR_RETRIABLE_NET_ERR	-9971
+#define	IDMAP_ERR_W2U_NAMERULE_CONFLICT	-9970
+#define	IDMAP_ERR_U2W_NAMERULE_CONFLICT	-9969
+#define	IDMAP_ERR_BAD_UTF8		-9968
+#define	IDMAP_ERR_NONE_GENERATED	-9967
+#define	IDMAP_ERR_PROP_UNKNOWN		-9966
+#define	IDMAP_ERR_NS_LDAP_OP_FAILED	-9965
+#define	IDMAP_ERR_NS_LDAP_PARTIAL	-9964
+#define	IDMAP_ERR_NS_LDAP_CFG		-9963
+#define	IDMAP_ERR_NS_LDAP_BAD_WINNAME	-9962
+#define	IDMAP_ERR_NO_ACTIVEDIRECTORY	-9961
+
+/* Reserved GIDs for some well-known SIDs */
+#define	IDMAP_WK_LOCAL_SYSTEM_GID	2147483648U /* 0x80000000 */
+#define	IDMAP_WK_CREATOR_GROUP_GID	2147483649U
+#define	IDMAP_WK__MAX_GID		2147483649U
+
+/* Reserved UIDs for some well-known SIDs */
+#define	IDMAP_WK_CREATOR_OWNER_UID	2147483648U
+#define	IDMAP_WK__MAX_UID		2147483648U
+
+/* Reserved SIDs */
+#define	IDMAP_WK_CREATOR_SID_AUTHORITY	"S-1-3"
+
+/*
+ * Max door RPC size for ID mapping (can't be too large relative to the
+ * default user-land thread stack size, since clnt_door_call()
+ * alloca()s).  See libidmap:idmap_init().
+ */
+#define	IDMAP_MAX_DOOR_RPC		(256 * 1024)
+
+#define	IDMAP_SENTINEL_PID		UINT32_MAX
+#define	IDMAP_ID_IS_EPHEMERAL(pid)	\
+	(((pid) > INT32_MAX) && ((pid) != IDMAP_SENTINEL_PID))
+
+#endif /* _SYS_IDMAP_H */

diff --git a/zfs/include/os/freebsd/spl/sys/inttypes.h b/zfs/include/os/freebsd/spl/sys/inttypes.h
new file mode 100644
index 0000000..651685d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/inttypes.h

@@ -0,0 +1 @@
+/* do not delete */

diff --git a/zfs/include/os/freebsd/spl/sys/isa_defs.h b/zfs/include/os/freebsd/spl/sys/isa_defs.h
new file mode 100644
index 0000000..399d510
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/isa_defs.h

@@ -0,0 +1,712 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_ISA_DEFS_H
+#define	_SYS_ISA_DEFS_H
+#include <sys/endian.h>
+
+/*
+ * This header file serves to group a set of well known defines and to
+ * set these for each instruction set architecture.  These defines may
+ * be divided into two groups;  characteristics of the processor and
+ * implementation choices for Solaris on a processor.
+ *
+ * Processor Characteristics:
+ *
+ * _LITTLE_ENDIAN / _BIG_ENDIAN:
+ *	The natural byte order of the processor.  A pointer to an int points
+ *	to the least/most significant byte of that int.
+ *
+ * _STACK_GROWS_UPWARD / _STACK_GROWS_DOWNWARD:
+ *	The processor specific direction of stack growth.  A push onto the
+ *	stack increases/decreases the stack pointer, so it stores data at
+ *	successively higher/lower addresses.  (Stackless machines ignored
+ *	without regrets).
+ *
+ * _LONG_LONG_HTOL / _LONG_LONG_LTOH:
+ *	A pointer to a long long points to the most/least significant long
+ *	within that long long.
+ *
+ * _BIT_FIELDS_HTOL / _BIT_FIELDS_LTOH:
+ *	The C compiler assigns bit fields from the high/low to the low/high end
+ *	of an int (most to least significant vs. least to most significant).
+ *
+ * _IEEE_754:
+ *	The processor (or supported implementations of the processor)
+ *	supports the ieee-754 floating point standard.  No other floating
+ *	point standards are supported (or significant).  Any other supported
+ *	floating point formats are expected to be cased on the ISA processor
+ *	symbol.
+ *
+ * _CHAR_IS_UNSIGNED / _CHAR_IS_SIGNED:
+ *	The C Compiler implements objects of type `char' as `unsigned' or
+ *	`signed' respectively.  This is really an implementation choice of
+ *	the compiler writer, but it is specified in the ABI and tends to
+ *	be uniform across compilers for an instruction set architecture.
+ *	Hence, it has the properties of a processor characteristic.
+ *
+ * _CHAR_ALIGNMENT / _SHORT_ALIGNMENT / _INT_ALIGNMENT / _LONG_ALIGNMENT /
+ * _LONG_LONG_ALIGNMENT / _DOUBLE_ALIGNMENT / _LONG_DOUBLE_ALIGNMENT /
+ * _POINTER_ALIGNMENT / _FLOAT_ALIGNMENT:
+ *	The ABI defines alignment requirements of each of the primitive
+ *	object types.  Some, if not all, may be hardware requirements as
+ * 	well.  The values are expressed in "byte-alignment" units.
+ *
+ * _MAX_ALIGNMENT:
+ *	The most stringent alignment requirement as specified by the ABI.
+ *	Equal to the maximum of all the above _XXX_ALIGNMENT values.
+ *
+ * _ALIGNMENT_REQUIRED:
+ *	True or false (1 or 0) whether or not the hardware requires the ABI
+ *	alignment.
+ *
+ * _LONG_LONG_ALIGNMENT_32
+ *	The 32-bit ABI supported by a 64-bit kernel may have different
+ *	alignment requirements for primitive object types.  The value of this
+ *	identifier is expressed in "byte-alignment" units.
+ *
+ * _HAVE_CPUID_INSN
+ *	This indicates that the architecture supports the 'cpuid'
+ *	instruction as defined by Intel.  (Intel allows other vendors
+ *	to extend the instruction for their own purposes.)
+ *
+ *
+ * Implementation Choices:
+ *
+ * _ILP32 / _LP64:
+ *	This specifies the compiler data type implementation as specified in
+ *	the relevant ABI.  The choice between these is strongly influenced
+ *	by the underlying hardware, but is not absolutely tied to it.
+ *	Currently only two data type models are supported:
+ *
+ *	_ILP32:
+ *		Int/Long/Pointer are 32 bits.  This is the historical UNIX
+ *		and Solaris implementation.  Due to its historical standing,
+ *		this is the default case.
+ *
+ *	_LP64:
+ *		Long/Pointer are 64 bits, Int is 32 bits.  This is the chosen
+ *		implementation for 64-bit ABIs such as SPARC V9.
+ *
+ *	_I32LPx:
+ *		A compilation environment where 'int' is 32-bit, and
+ *		longs and pointers are simply the same size.
+ *
+ *	In all cases, Char is 8 bits and Short is 16 bits.
+ *
+ * _SUNOS_VTOC_8 / _SUNOS_VTOC_16 / _SVR4_VTOC_16:
+ *	This specifies the form of the disk VTOC (or label):
+ *
+ *	_SUNOS_VTOC_8:
+ *		This is a VTOC form which is upwardly compatible with the
+ *		SunOS 4.x disk label and allows 8 partitions per disk.
+ *
+ *	_SUNOS_VTOC_16:
+ *		In this format the incore vtoc image matches the ondisk
+ *		version.  It allows 16 slices per disk, and is not
+ *		compatible with the SunOS 4.x disk label.
+ *
+ *	Note that these are not the only two VTOC forms possible and
+ *	additional forms may be added.  One possible form would be the
+ *	SVr4 VTOC form.  The symbol for that is reserved now, although
+ *	it is not implemented.
+ *
+ *	_SVR4_VTOC_16:
+ *		This VTOC form is compatible with the System V Release 4
+ *		VTOC (as implemented on the SVr4 Intel and 3b ports) with
+ *		16 partitions per disk.
+ *
+ *
+ * _DMA_USES_PHYSADDR / _DMA_USES_VIRTADDR
+ *	This describes the type of addresses used by system DMA:
+ *
+ *	_DMA_USES_PHYSADDR:
+ *		This type of DMA, used in the x86 implementation,
+ *		requires physical addresses for DMA buffers.  The 24-bit
+ *		addresses used by some legacy boards is the source of the
+ *		"low-memory" (<16MB) requirement for some devices using DMA.
+ *
+ *	_DMA_USES_VIRTADDR:
+ *		This method of DMA allows the use of virtual addresses for
+ *		DMA transfers.
+ *
+ * _FIRMWARE_NEEDS_FDISK / _NO_FDISK_PRESENT
+ *      This indicates the presence/absence of an fdisk table.
+ *
+ *      _FIRMWARE_NEEDS_FDISK
+ *              The fdisk table is required by system firmware.  If present,
+ *              it allows a disk to be subdivided into multiple fdisk
+ *              partitions, each of which is equivalent to a separate,
+ *              virtual disk.  This enables the co-existence of multiple
+ *              operating systems on a shared hard disk.
+ *
+ *      _NO_FDISK_PRESENT
+ *              If the fdisk table is absent, it is assumed that the entire
+ *              media is allocated for a single operating system.
+ *
+ * _HAVE_TEM_FIRMWARE
+ *	Defined if this architecture has the (fallback) option of
+ *	using prom_* calls for doing I/O if a suitable kernel driver
+ *	is not available to do it.
+ *
+ * _DONT_USE_1275_GENERIC_NAMES
+ *		Controls whether or not device tree node names should
+ *		comply with the IEEE 1275 "Generic Names" Recommended
+ *		Practice. With _DONT_USE_GENERIC_NAMES, device-specific
+ *		names identifying the particular device will be used.
+ *
+ * __i386_COMPAT
+ *	This indicates whether the i386 ABI is supported as a *non-native*
+ *	mode for the platform.  When this symbol is defined:
+ *	-	32-bit xstat-style system calls are enabled
+ *	-	32-bit xmknod-style system calls are enabled
+ *	-	32-bit system calls use i386 sizes -and- alignments
+ *
+ *	Note that this is NOT defined for the i386 native environment!
+ *
+ * __x86
+ *	This is ONLY a synonym for defined(__i386) || defined(__amd64)
+ *	which is useful only insofar as these two architectures share
+ *	common attributes.  Analogous to __sparc.
+ *
+ * _PSM_MODULES
+ *	This indicates whether or not the implementation uses PSM
+ *	modules for processor support, reading /etc/mach from inside
+ *	the kernel to extract a list.
+ *
+ * _RTC_CONFIG
+ *	This indicates whether or not the implementation uses /etc/rtc_config
+ *	to configure the real-time clock in the kernel.
+ *
+ * _UNIX_KRTLD
+ *	This indicates that the implementation uses a dynamically
+ *	linked unix + krtld to form the core kernel image at boot
+ *	time, or (in the absence of this symbol) a prelinked kernel image.
+ *
+ * _OBP
+ *	This indicates the firmware interface is OBP.
+ *
+ * _SOFT_HOSTID
+ *	This indicates that the implementation obtains the hostid
+ *	from the file /etc/hostid, rather than from hardware.
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * The following set of definitions characterize Solaris on AMD's
+ * 64-bit systems.
+ */
+#if defined(__x86_64) || defined(__amd64)
+
+#if !defined(__amd64)
+#define	__amd64		/* preferred guard */
+#endif
+
+#if !defined(__x86)
+#define	__x86
+#endif
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+#define	_ALIGNMENT_REQUIRED		1
+
+/*
+ * Different alignment constraints for the i386 ABI in compatibility mode
+ */
+#define	_LONG_LONG_ALIGNMENT_32		4
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_MULTI_DATAMODEL
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	__i386_COMPAT
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_SOFT_HOSTID
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+/*
+ * The feature test macro __i386 is generic for all processors implementing
+ * the Intel 386 instruction set or a superset of it.  Specifically, this
+ * includes all members of the 386, 486, and Pentium family of processors.
+ */
+#elif defined(__i386) || defined(__i386__)
+
+#if !defined(__i386)
+#define	__i386
+#endif
+
+#if !defined(__x86)
+#define	__x86
+#endif
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_LONG_ALIGNMENT		4
+#define	_DOUBLE_ALIGNMENT		4
+#define	_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_LONG_DOUBLE_ALIGNMENT		4
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			4
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_SOFT_HOSTID
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__aarch64__)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_UNSIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+#define	_ALIGNMENT_REQUIRED		1
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__riscv)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_UNSIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+#define	_ALIGNMENT_REQUIRED		1
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__arm__)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_LONG_ALIGNMENT		4
+#define	_DOUBLE_ALIGNMENT		4
+#define	_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_LONG_DOUBLE_ALIGNMENT		4
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			4
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__mips__)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#if defined(__mips_n64)
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		8
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			8
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_INT_ALIGNMENT
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#else
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_LONG_ALIGNMENT		4
+#define	_DOUBLE_ALIGNMENT		4
+#define	_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_LONG_DOUBLE_ALIGNMENT		4
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			4
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__powerpc__)
+
+#if defined(__BIG_ENDIAN__)
+#define	_BIT_FIELDS_HTOL
+#else
+#define	_BIT_FIELDS_LTOH
+#endif
+
+#if !defined(__powerpc)
+#define	__powerpc
+#endif
+
+#if defined(__powerpc64__)
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_MULTI_DATAMODEL
+#else
+#define	_LONG_LONG_ALIGNMENT		4
+#endif
+#define	_LONG_LONG_ALIGNMENT_32		4
+#define	_ALIGNMENT_REQUIRED		1
+
+#define	_SUNOS_VTOC_16	1
+
+/*
+ * The following set of definitions characterize the Solaris on SPARC systems.
+ *
+ * The symbol __sparc indicates any of the SPARC family of processor
+ * architectures.  This includes SPARC V7, SPARC V8 and SPARC V9.
+ *
+ * The symbol __sparcv8 indicates the 32-bit SPARC V8 architecture as defined
+ * by Version 8 of the SPARC Architecture Manual.  (SPARC V7 is close enough
+ * to SPARC V8 for the former to be subsumed into the latter definition.)
+ *
+ * The symbol __sparcv9 indicates the 64-bit SPARC V9 architecture as defined
+ * by Version 9 of the SPARC Architecture Manual.
+ *
+ * The symbols __sparcv8 and __sparcv9 are mutually exclusive, and are only
+ * relevant when the symbol __sparc is defined.
+ */
+/*
+ * XXX Due to the existence of 5110166, "defined(__sparcv9)" needs to be added
+ * to support backwards builds.  This workaround should be removed in s10_71.
+ */
+#elif defined(__sparc) || defined(__sparcv9) || defined(__sparc__)
+#if !defined(__sparc)
+#define	__sparc
+#endif
+
+/*
+ * You can be 32-bit or 64-bit, but not both at the same time.
+ */
+#if defined(__sparcv8) && defined(__sparcv9)
+#error	"SPARC Versions 8 and 9 are mutually exclusive choices"
+#endif
+
+/*
+ * Existing compilers do not set __sparcv8.  Years will transpire before
+ * the compilers can be depended on to set the feature test macro. In
+ * the interim, we'll set it here on the basis of historical behaviour;
+ * if you haven't asked for SPARC V9, then you must've meant SPARC V8.
+ */
+#if !defined(__sparcv9) && !defined(__sparcv8)
+#define	__sparcv8
+#endif
+
+/*
+ * Define the appropriate "processor characteristics" shared between
+ * all Solaris on SPARC systems.
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_HTOL
+#define	_BIT_FIELDS_HTOL
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_ALIGNMENT_REQUIRED		1
+
+/*
+ * Define the appropriate "implementation choices" shared between versions.
+ */
+#define	_SUNOS_VTOC_8
+#define	_DMA_USES_VIRTADDR
+#define	_NO_FDISK_PRESENT
+#define	_HAVE_TEM_FIRMWARE
+#define	_OBP
+
+/*
+ * The following set of definitions characterize the implementation of
+ * 32-bit Solaris on SPARC V8 systems.
+ */
+#if defined(__sparcv8)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_DOUBLE_ALIGNMENT		8
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			8
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#define	_ILP32
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+
+/*
+ * The following set of definitions characterize the implementation of
+ * 64-bit Solaris on SPARC V9 systems.
+ */
+#elif defined(__sparcv9)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_MULTI_DATAMODEL
+
+#else
+#error	"unknown SPARC version"
+#endif
+
+/*
+ * #error is strictly ansi-C, but works as well as anything for K&R systems.
+ */
+#else
+#error "ISA not supported"
+#endif
+
+#if defined(_ILP32) && defined(_LP64)
+#error "Both _ILP32 and _LP64 are defined"
+#endif
+
+#if BYTE_ORDER == _BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
+#elif BYTE_ORDER == _LITTLE_ENDIAN
+#define	_ZFS_LITTLE_ENDIAN
+#else
+#error "unknown byte order"
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ISA_DEFS_H */

diff --git a/zfs/include/os/freebsd/spl/sys/kmem.h b/zfs/include/os/freebsd/spl/sys/kmem.h
new file mode 100644
index 0000000..dc3b4f5
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/kmem.h

@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_KMEM_H_
+#define	_OPENSOLARIS_SYS_KMEM_H_
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/vmem.h>
+#include <sys/counter.h>
+
+#include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+MALLOC_DECLARE(M_SOLARIS);
+
+#define	POINTER_IS_VALID(p)	(!((uintptr_t)(p) & 0x3))
+#define	POINTER_INVALIDATE(pp)	(*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1))
+
+#define	KM_SLEEP		M_WAITOK
+#define	KM_PUSHPAGE		M_WAITOK
+#define	KM_NOSLEEP		M_NOWAIT
+#define	KM_NORMALPRI		0
+#define	KMC_NODEBUG		UMA_ZONE_NODUMP
+
+typedef struct vmem vmem_t;
+
+extern char	*kmem_asprintf(const char *, ...);
+extern char *kmem_vasprintf(const char *fmt, va_list ap);
+
+typedef struct kmem_cache {
+	char		kc_name[32];
+#if !defined(KMEM_DEBUG)
+	uma_zone_t	kc_zone;
+#else
+	size_t		kc_size;
+#endif
+	int		(*kc_constructor)(void *, void *, int);
+	void		(*kc_destructor)(void *, void *);
+	void		*kc_private;
+} kmem_cache_t;
+
+extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
+extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
+
+void *zfs_kmem_alloc(size_t size, int kmflags);
+void zfs_kmem_free(void *buf, size_t size);
+uint64_t kmem_size(void);
+kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
+    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
+    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
+void kmem_cache_destroy(kmem_cache_t *cache);
+void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
+void kmem_cache_free(kmem_cache_t *cache, void *buf);
+boolean_t kmem_cache_reap_active(void);
+void kmem_cache_reap_soon(kmem_cache_t *);
+void kmem_reap(void);
+int kmem_debugging(void);
+void *calloc(size_t n, size_t s);
+
+
+#define	kmem_cache_reap_now kmem_cache_reap_soon
+#define	freemem				vm_free_count()
+#define	minfree				vm_cnt.v_free_min
+#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
+#define	kmem_zalloc(size, kmflags)				\
+	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
+#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
+
+#endif	/* _KERNEL */
+
+#ifdef _STANDALONE
+/*
+ * At the moment, we just need it for the type. We redirect the alloc/free
+ * routines to the usual Free and Malloc in that environment.
+ */
+typedef int kmem_cache_t;
+#endif /* _STANDALONE */
+
+#endif	/* _OPENSOLARIS_SYS_KMEM_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/kmem_cache.h b/zfs/include/os/freebsd/spl/sys/kmem_cache.h
new file mode 100644
index 0000000..9eec3b4
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/kmem_cache.h

@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+
+#ifndef _SPL_KMEM_CACHE_H
+#define	_SPL_KMEM_CACHE_H
+
+#ifdef _KERNEL
+#include <sys/taskq.h>
+
+/* kmem move callback return values */
+typedef enum kmem_cbrc {
+	KMEM_CBRC_YES		= 0,	/* Object moved */
+	KMEM_CBRC_NO		= 1,	/* Object not moved */
+	KMEM_CBRC_LATER		= 2,	/* Object not moved, try again later */
+	KMEM_CBRC_DONT_NEED	= 3,	/* Neither object is needed */
+	KMEM_CBRC_DONT_KNOW	= 4,	/* Object unknown */
+} kmem_cbrc_t;
+
+extern void spl_kmem_cache_set_move(kmem_cache_t *,
+    kmem_cbrc_t (*)(void *, void *, size_t, void *));
+
+#define	kmem_cache_set_move(skc, move)	spl_kmem_cache_set_move(skc, move)
+
+#endif /* _KERNEL */
+
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/kstat.h b/zfs/include/os/freebsd/spl/sys/kstat.h
new file mode 100644
index 0000000..947dfee
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/kstat.h

@@ -0,0 +1,230 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_KSTAT_H
+#define	_SPL_KSTAT_H
+
+#include <sys/types.h>
+#ifndef _STANDALONE
+#include <sys/sysctl.h>
+#endif
+struct list_head {};
+#include <sys/mutex.h>
+#include <sys/proc.h>
+
+#define	KSTAT_STRLEN		255
+#define	KSTAT_RAW_MAX		(128*1024)
+
+/*
+ * For reference valid classes are:
+ * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
+ */
+
+#define	KSTAT_TYPE_RAW		0 /* can be anything; ks_ndata >= 1 */
+#define	KSTAT_TYPE_NAMED	1 /* name/value pair; ks_ndata >= 1 */
+#define	KSTAT_TYPE_INTR		2 /* interrupt stats; ks_ndata == 1 */
+#define	KSTAT_TYPE_IO		3 /* I/O stats; ks_ndata == 1 */
+#define	KSTAT_TYPE_TIMER	4 /* event timer; ks_ndata >= 1 */
+#define	KSTAT_NUM_TYPES		5
+
+#define	KSTAT_DATA_CHAR		0
+#define	KSTAT_DATA_INT32	1
+#define	KSTAT_DATA_UINT32	2
+#define	KSTAT_DATA_INT64	3
+#define	KSTAT_DATA_UINT64	4
+#define	KSTAT_DATA_LONG		5
+#define	KSTAT_DATA_ULONG	6
+#define	KSTAT_DATA_STRING	7
+#define	KSTAT_NUM_DATAS		8
+
+#define	KSTAT_INTR_HARD		0
+#define	KSTAT_INTR_SOFT		1
+#define	KSTAT_INTR_WATCHDOG	2
+#define	KSTAT_INTR_SPURIOUS	3
+#define	KSTAT_INTR_MULTSVC	4
+#define	KSTAT_NUM_INTRS		5
+
+#define	KSTAT_FLAG_VIRTUAL	0x01
+#define	KSTAT_FLAG_VAR_SIZE	0x02
+#define	KSTAT_FLAG_WRITABLE	0x04
+#define	KSTAT_FLAG_PERSISTENT	0x08
+#define	KSTAT_FLAG_DORMANT	0x10
+#define	KSTAT_FLAG_INVALID	0x20
+#define	KSTAT_FLAG_LONGSTRINGS	0x40
+#define	KSTAT_FLAG_NO_HEADERS	0x80
+
+#define	KS_MAGIC		0x9d9d9d9d
+
+/* Dynamic updates */
+#define	KSTAT_READ		0
+#define	KSTAT_WRITE		1
+
+struct kstat_s;
+typedef struct kstat_s kstat_t;
+
+typedef int kid_t;				/* unique kstat id */
+typedef int kstat_update_t(struct kstat_s *, int); /* dynamic update cb */
+
+struct seq_file {
+	char *sf_buf;
+	size_t sf_size;
+};
+
+void seq_printf(struct seq_file *m, const char *fmt, ...);
+
+
+typedef struct kstat_module {
+	char ksm_name[KSTAT_STRLEN+1];		/* module name */
+	struct list_head ksm_module_list;	/* module linkage */
+	struct list_head ksm_kstat_list;	/* list of kstat entries */
+	struct proc_dir_entry *ksm_proc;	/* proc entry */
+} kstat_module_t;
+
+typedef struct kstat_raw_ops {
+	int (*headers)(char *buf, size_t size);
+	int (*seq_headers)(struct seq_file *);
+	int (*data)(char *buf, size_t size, void *data);
+	void *(*addr)(kstat_t *ksp, loff_t index);
+} kstat_raw_ops_t;
+
+struct kstat_s {
+	int		ks_magic;		/* magic value */
+	kid_t		ks_kid;			/* unique kstat ID */
+	hrtime_t	ks_crtime;		/* creation time */
+	hrtime_t	ks_snaptime;		/* last access time */
+	char		ks_module[KSTAT_STRLEN+1]; /* provider module name */
+	int		ks_instance;		/* provider module instance */
+	char		ks_name[KSTAT_STRLEN+1]; /* kstat name */
+	char		ks_class[KSTAT_STRLEN+1]; /* kstat class */
+	uchar_t		ks_type;		/* kstat data type */
+	uchar_t		ks_flags;		/* kstat flags */
+	void		*ks_data;		/* kstat type-specific data */
+	uint_t		ks_ndata;		/* # of data records */
+	size_t		ks_data_size;		/* size of kstat data section */
+	kstat_update_t	*ks_update;		/* dynamic updates */
+	void		*ks_private;		/* private data */
+	void		*ks_private1;		/* private data */
+	kmutex_t	ks_private_lock;	/* kstat private data lock */
+	kmutex_t	*ks_lock;		/* kstat data lock */
+	struct list_head ks_list;		/* kstat linkage */
+	kstat_module_t	*ks_owner;		/* kstat module linkage */
+	kstat_raw_ops_t	ks_raw_ops;		/* ops table for raw type */
+	char		*ks_raw_buf;		/* buf used for raw ops */
+	size_t		ks_raw_bufsize;		/* size of raw ops buffer */
+#ifndef _STANDALONE
+	struct sysctl_ctx_list ks_sysctl_ctx;
+	struct sysctl_oid *ks_sysctl_root;
+#endif /* _STANDALONE */
+};
+
+typedef struct kstat_named_s {
+	char	name[KSTAT_STRLEN];	/* name of counter */
+	uchar_t	data_type;		/* data type */
+	union {
+		char c[16];	/* 128-bit int */
+		int32_t	i32;	/* 32-bit signed int */
+		uint32_t ui32;	/* 32-bit unsigned int */
+		int64_t i64;	/* 64-bit signed int */
+		uint64_t ui64;	/* 64-bit unsigned int */
+		long l;		/* native signed long */
+		ulong_t ul;	/* native unsigned long */
+		struct {
+			union {
+				char *ptr;	/* NULL-term string */
+				char __pad[8];	/* 64-bit padding */
+			} addr;
+			uint32_t len;		/* # bytes for strlen + '\0' */
+		} string;
+	} value;
+} kstat_named_t;
+
+#define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
+#define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
+
+typedef struct kstat_intr {
+	uint_t intrs[KSTAT_NUM_INTRS];
+} kstat_intr_t;
+
+typedef struct kstat_io {
+	u_longlong_t	nread;		/* number of bytes read */
+	u_longlong_t	nwritten;	/* number of bytes written */
+	uint_t		reads;		/* number of read operations */
+	uint_t		writes;		/* number of write operations */
+	hrtime_t	wtime;		/* cumulative wait (pre-service) time */
+	hrtime_t	wlentime;	/* cumulative wait len*time product */
+	hrtime_t	wlastupdate;	/* last time wait queue changed */
+	hrtime_t	rtime;		/* cumulative run (service) time */
+	hrtime_t	rlentime;	/* cumulative run length*time product */
+	hrtime_t	rlastupdate;	/* last time run queue changed */
+	uint_t		wcnt;		/* count of elements in wait state */
+	uint_t		rcnt;		/* count of elements in run state */
+} kstat_io_t;
+
+typedef struct kstat_timer {
+	char		name[KSTAT_STRLEN+1]; /* event name */
+	u_longlong_t	num_events;	 /* number of events */
+	hrtime_t	elapsed_time;	 /* cumulative elapsed time */
+	hrtime_t	min_time;	 /* shortest event duration */
+	hrtime_t	max_time;	 /* longest event duration */
+	hrtime_t	start_time;	 /* previous event start time */
+	hrtime_t	stop_time;	 /* previous event stop time */
+} kstat_timer_t;
+
+int spl_kstat_init(void);
+void spl_kstat_fini(void);
+
+extern void __kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void* (*addr)(kstat_t *ksp, loff_t index));
+
+extern void __kstat_set_seq_raw_ops(kstat_t *ksp,
+    int (*headers)(struct seq_file *),
+    int (*data)(char *buf, size_t size, void *data),
+    void* (*addr)(kstat_t *ksp, loff_t index));
+
+
+extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
+    const char *ks_name, const char *ks_class, uchar_t ks_type,
+    uint_t ks_ndata, uchar_t ks_flags);
+
+extern void __kstat_install(kstat_t *ksp);
+extern void __kstat_delete(kstat_t *ksp);
+
+#define	kstat_set_seq_raw_ops(k, h, d, a) \
+    __kstat_set_seq_raw_ops(k, h, d, a)
+#define	kstat_set_raw_ops(k, h, d, a) \
+    __kstat_set_raw_ops(k, h, d, a)
+#ifndef _STANDALONE
+#define	kstat_create(m, i, n, c, t, s, f) \
+    __kstat_create(m, i, n, c, t, s, f)
+
+#define	kstat_install(k)		__kstat_install(k)
+#define	kstat_delete(k)			__kstat_delete(k)
+#else
+#define	kstat_create(m, i, n, c, t, s, f)	((kstat_t *)0)
+#define	kstat_install(k)
+#define	kstat_delete(k)
+#endif
+
+#endif  /* _SPL_KSTAT_H */

diff --git a/zfs/include/os/freebsd/spl/sys/list.h b/zfs/include/os/freebsd/spl/sys/list.h
new file mode 100644
index 0000000..6db92ed
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/list.h

@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_LIST_H
+#define	_SYS_LIST_H
+
+#include <sys/list_impl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct list_node list_node_t;
+typedef struct list list_t;
+
+void list_create(list_t *, size_t, size_t);
+void list_destroy(list_t *);
+
+void list_insert_after(list_t *, void *, void *);
+void list_insert_before(list_t *, void *, void *);
+void list_insert_head(list_t *, void *);
+void list_insert_tail(list_t *, void *);
+void list_remove(list_t *, void *);
+void *list_remove_head(list_t *);
+void *list_remove_tail(list_t *);
+void list_move_tail(list_t *, list_t *);
+
+void *list_head(list_t *);
+void *list_tail(list_t *);
+void *list_next(list_t *, void *);
+void *list_prev(list_t *, void *);
+int list_is_empty(list_t *);
+
+void list_link_init(list_node_t *);
+void list_link_replace(list_node_t *, list_node_t *);
+
+int list_link_active(list_node_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LIST_H */

diff --git a/zfs/include/os/freebsd/spl/sys/list_impl.h b/zfs/include/os/freebsd/spl/sys/list_impl.h
new file mode 100644
index 0000000..a6614f9
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/list_impl.h

@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_LIST_IMPL_H
+#define	_SYS_LIST_IMPL_H
+
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+struct list_node {
+	struct list_node *list_next;
+	struct list_node *list_prev;
+};
+
+struct list {
+	size_t	list_size;
+	size_t	list_offset;
+	struct list_node list_head;
+};
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LIST_IMPL_H */

diff --git a/zfs/include/os/freebsd/spl/sys/lock.h b/zfs/include/os/freebsd/spl/sys/lock.h
new file mode 100644
index 0000000..7d5dc26
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/lock.h

@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_LOCK_H_
+#define	_OPENSOLARIS_SYS_LOCK_H_
+
+#include_next <sys/lock.h>
+
+#define	LO_ALLMASK	(LO_INITIALIZED | LO_WITNESS | LO_QUIET |	\
+    LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE |	\
+    LO_DUPOK | LO_CLASSMASK | LO_NOPROFILE)
+#define	LO_EXPECTED	(LO_INITIALIZED | LO_WITNESS | LO_RECURSABLE |	\
+    LO_SLEEPABLE | LO_UPGRADABLE | LO_DUPOK | (2 << LO_CLASSSHIFT))
+
+#endif	/* _OPENSOLARIS_SYS_LOCK_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/misc.h b/zfs/include/os/freebsd/spl/sys/misc.h
new file mode 100644
index 0000000..3481507
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/misc.h

@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_MISC_H_
+#define	_OPENSOLARIS_SYS_MISC_H_
+
+#include <sys/limits.h>
+#include <sys/filio.h>
+
+#define	MAXUID	UID_MAX
+
+#define	_ACL_ACLENT_ENABLED	0x1
+#define	_ACL_ACE_ENABLED	0x2
+
+#define	_FIOFFS		(INT_MIN)
+#define	_FIOGDIO	(INT_MIN+1)
+#define	_FIOSDIO	(INT_MIN+2)
+
+#define	F_SEEK_DATA	FIOSEEKDATA
+#define	F_SEEK_HOLE	FIOSEEKHOLE
+
+struct opensolaris_utsname {
+	char	*sysname;
+	char	*nodename;
+	char	*release;
+	char	version[32];
+	char	*machine;
+};
+
+extern char hw_serial[11];
+
+#define	task_io_account_read(n)
+#define	task_io_account_write(n)
+
+#endif	/* _OPENSOLARIS_SYS_MISC_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/mod_os.h b/zfs/include/os/freebsd/spl/sys/mod_os.h
new file mode 100644
index 0000000..46ea2d1
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/mod_os.h

@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SPL_MOD_H
+#define	_SPL_MOD_H
+
+#include <sys/sysctl.h>
+
+#define	ZFS_MODULE_DESCRIPTION(s)
+#define	ZFS_MODULE_AUTHOR(s)
+#define	ZFS_MODULE_LICENSE(s)
+#define	ZFS_MODULE_VERSION(s)
+
+#define	EXPORT_SYMBOL(x)
+#define	module_param(a, b, c)
+#define	MODULE_PARM_DESC(a, b)
+
+#define	ZMOD_RW CTLFLAG_RWTUN
+#define	ZMOD_RD CTLFLAG_RDTUN
+
+/* BEGIN CSTYLED */
+#define	ZFS_MODULE_PARAM(scope_prefix, name_prefix, name, type, perm, desc) \
+    SYSCTL_DECL(_vfs_ ## scope_prefix); \
+    SYSCTL_##type(_vfs_ ## scope_prefix, OID_AUTO, name, perm, &name_prefix ## name, 0, desc)
+
+#define	ZFS_MODULE_PARAM_ARGS	SYSCTL_HANDLER_ARGS
+
+#define	ZFS_MODULE_PARAM_CALL_IMPL(parent, name, perm, args, desc) \
+    SYSCTL_DECL(parent); \
+    SYSCTL_PROC(parent, OID_AUTO, name, CTLFLAG_MPSAFE | perm | args, desc)
+
+#define	ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, func, _, perm, desc) \
+    ZFS_MODULE_PARAM_CALL_IMPL(_vfs_ ## scope_prefix, name, perm, func ## _args(name_prefix ## name), desc)
+
+#define	ZFS_MODULE_VIRTUAL_PARAM_CALL ZFS_MODULE_PARAM_CALL
+
+#define	param_set_arc_long_args(var) \
+    CTLTYPE_ULONG, &var, 0, param_set_arc_long, "LU"
+
+#define	param_set_arc_min_args(var) \
+    CTLTYPE_ULONG, &var, 0, param_set_arc_min, "LU"
+
+#define	param_set_arc_max_args(var) \
+    CTLTYPE_ULONG, &var, 0, param_set_arc_max, "LU"
+
+#define	param_set_arc_int_args(var) \
+    CTLTYPE_INT, &var, 0, param_set_arc_int, "I"
+
+#define	param_set_deadman_failmode_args(var) \
+    CTLTYPE_STRING, NULL, 0, param_set_deadman_failmode, "A"
+
+#define	param_set_deadman_synctime_args(var) \
+    CTLTYPE_ULONG, NULL, 0, param_set_deadman_synctime, "LU"
+
+#define	param_set_deadman_ziotime_args(var) \
+    CTLTYPE_ULONG, NULL, 0, param_set_deadman_ziotime, "LU"
+
+#define	param_set_multihost_interval_args(var) \
+    CTLTYPE_ULONG, &var, 0, param_set_multihost_interval, "LU"
+
+#define	param_set_slop_shift_args(var) \
+    CTLTYPE_INT, &var, 0, param_set_slop_shift, "I"
+
+#define	param_set_min_auto_ashift_args(var) \
+    CTLTYPE_U64, &var, 0, param_set_min_auto_ashift, "QU"
+
+#define	param_set_max_auto_ashift_args(var) \
+    CTLTYPE_U64, &var, 0, param_set_max_auto_ashift, "QU"
+
+#define	fletcher_4_param_set_args(var) \
+    CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
+
+#include <sys/kernel.h>
+#define	module_init(fn)							\
+static void \
+wrap_ ## fn(void *dummy __unused) \
+{								 \
+	fn();						 \
+}																		\
+SYSINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
+
+#define	module_init_early(fn)							\
+static void \
+wrap_ ## fn(void *dummy __unused) \
+{								 \
+	fn();						 \
+}																		\
+SYSINIT(zfs_ ## fn, SI_SUB_INT_CONFIG_HOOKS, SI_ORDER_FIRST, wrap_ ## fn, NULL)
+
+#define	module_exit(fn) 							\
+static void \
+wrap_ ## fn(void *dummy __unused) \
+{								 \
+	fn();						 \
+}																		\
+SYSUNINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
+/* END CSTYLED */
+
+#endif /* SPL_MOD_H */

diff --git a/zfs/include/os/freebsd/spl/sys/mode.h b/zfs/include/os/freebsd/spl/sys/mode.h
new file mode 100644
index 0000000..651685d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/mode.h

@@ -0,0 +1 @@
+/* do not delete */

diff --git a/zfs/include/os/freebsd/spl/sys/mount.h b/zfs/include/os/freebsd/spl/sys/mount.h
new file mode 100644
index 0000000..42614e4
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/mount.h

@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_MOUNT_H_
+#define	_OPENSOLARIS_SYS_MOUNT_H_
+
+#include <sys/param.h>
+#include_next <sys/mount.h>
+#ifdef BUILDING_ZFS
+#include <sys/vfs.h>
+#endif
+#define	MS_FORCE	MNT_FORCE
+#define	MS_REMOUNT	MNT_UPDATE
+
+typedef	struct fid		fid_t;
+
+#endif	/* !_OPENSOLARIS_SYS_MOUNT_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/mutex.h b/zfs/include/os/freebsd/spl/sys/mutex.h
new file mode 100644
index 0000000..e757d12
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/mutex.h

@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_MUTEX_H_
+#define	_OPENSOLARIS_SYS_MUTEX_H_
+
+typedef struct sx	kmutex_t;
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include_next <sys/sdt.h>
+#include_next <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+
+typedef enum {
+	MUTEX_DEFAULT = 0	/* kernel default mutex */
+} kmutex_type_t;
+
+#define	MUTEX_HELD(x)		(mutex_owned(x))
+#define	MUTEX_NOT_HELD(x)	(!mutex_owned(x) || panicstr)
+
+#ifndef OPENSOLARIS_WITNESS
+#define	MUTEX_FLAGS	(SX_DUPOK | SX_NEW | SX_NOWITNESS)
+#else
+#define	MUTEX_FLAGS	(SX_DUPOK | SX_NEW)
+#endif
+
+#define	mutex_init(lock, desc, type, arg)	do {			\
+	const char *_name;						\
+	ASSERT((type) == MUTEX_DEFAULT);				\
+	for (_name = #lock; *_name != '\0'; _name++) {			\
+		if (*_name >= 'a' && *_name <= 'z')			\
+			break;						\
+	}								\
+	if (*_name == '\0')						\
+		_name = #lock;						\
+	sx_init_flags((lock), _name, MUTEX_FLAGS);			\
+} while (0)
+#define	mutex_destroy(lock)	sx_destroy(lock)
+#define	mutex_enter(lock)	sx_xlock(lock)
+#define	mutex_enter_nested(lock, type)	sx_xlock(lock)
+#define	mutex_tryenter(lock)	sx_try_xlock(lock)
+#define	mutex_exit(lock)	sx_xunlock(lock)
+#define	mutex_owned(lock)	sx_xlocked(lock)
+#define	mutex_owner(lock)	sx_xholder(lock)
+#endif	/* _OPENSOLARIS_SYS_MUTEX_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/param.h b/zfs/include/os/freebsd/spl/sys/param.h
new file mode 100644
index 0000000..92724e3
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/param.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2007 John Birrell <jb@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _COMPAT_OPENSOLARIS_SYS_PARAM_H_
+#define	_COMPAT_OPENSOLARIS_SYS_PARAM_H_
+
+#include <sys/types.h>
+#include_next <sys/param.h>
+#define	PAGESIZE	PAGE_SIZE
+#define	ptob(x)		((uint64_t)(x) << PAGE_SHIFT)
+#ifdef _KERNEL
+#include <sys/systm.h>
+#include <sys/libkern.h>
+#endif
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/policy.h b/zfs/include/os/freebsd/spl/sys/policy.h
new file mode 100644
index 0000000..909ae38
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/policy.h

@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $ $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_POLICY_H_
+#define	_OPENSOLARIS_SYS_POLICY_H_
+
+#include <sys/param.h>
+#include <sys/xvattr.h>
+#include <sys/vnode.h>
+struct mount;
+struct vattr;
+struct znode;
+
+int	secpolicy_nfs(cred_t *cr);
+int	secpolicy_zfs(cred_t *crd);
+int	secpolicy_zfs_proc(cred_t *cr, proc_t *proc);
+int	secpolicy_sys_config(cred_t *cr, int checkonly);
+int	secpolicy_zinject(cred_t *cr);
+int	secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp);
+int	secpolicy_basic_link(vnode_t *vp, cred_t *cr);
+int	secpolicy_vnode_owner(vnode_t *vp, cred_t *cr, uid_t owner);
+int	secpolicy_vnode_chown(vnode_t *vp, cred_t *cr, uid_t owner);
+int	secpolicy_vnode_stky_modify(cred_t *cr);
+int	secpolicy_vnode_remove(vnode_t *vp, cred_t *cr);
+int	secpolicy_vnode_access(cred_t *cr, vnode_t *vp, uid_t owner,
+	    accmode_t accmode);
+int	secpolicy_vnode_access2(cred_t *cr, vnode_t *vp, uid_t owner,
+	    accmode_t curmode, accmode_t wantmode);
+int	secpolicy_vnode_any_access(cred_t *cr, vnode_t *vp, uid_t owner);
+int	secpolicy_vnode_setdac(vnode_t *vp, cred_t *cr, uid_t owner);
+int	secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap,
+	    const struct vattr *ovap, int flags,
+	    int unlocked_access(void *, int, cred_t *), void *node);
+int	secpolicy_vnode_create_gid(cred_t *cr);
+int	secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid);
+int	secpolicy_vnode_setid_retain(struct znode *zp, cred_t *cr,
+	    boolean_t issuidroot);
+void	secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr);
+int	secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap,
+	    const struct vattr *ovap, cred_t *cr);
+int	secpolicy_fs_owner(struct mount *vfsp, cred_t *cr);
+int	secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp);
+void	secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp);
+int	secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr,
+	    vtype_t vtype);
+int	secpolicy_smb(cred_t *cr);
+
+
+#if __FreeBSD_version >= 1300005
+#define	spl_priv_check_cred(a, b) priv_check_cred((a), (b))
+#else
+#define	spl_priv_check_cred(a, b) priv_check_cred((a), (b), 0)
+#endif
+#endif	/* _OPENSOLARIS_SYS_POLICY_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/proc.h b/zfs/include/os/freebsd/spl/sys/proc.h
new file mode 100644
index 0000000..8583df5
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/proc.h

@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_PROC_H_
+#define	_OPENSOLARIS_SYS_PROC_H_
+
+#include <sys/param.h>
+#include <sys/kthread.h>
+#include_next <sys/proc.h>
+#include <sys/stdint.h>
+#include <sys/smp.h>
+#include <sys/sched.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/unistd.h>
+#include <sys/kmem.h>
+#include <sys/malloc.h>
+
+#ifdef _KERNEL
+#define	CPU		curcpu
+#define	minclsyspri	PRIBIO
+#define	defclsyspri minclsyspri
+#define	maxclsyspri	PVM
+#define	max_ncpus	(mp_maxid + 1)
+#define	boot_max_ncpus	(mp_maxid + 1)
+
+#define	TS_RUN	0
+
+#define	p0	proc0
+
+#define	t_tid	td_tid
+
+typedef	short		pri_t;
+typedef	struct thread	_kthread;
+typedef	struct thread	kthread_t;
+typedef struct thread	*kthread_id_t;
+typedef struct proc	proc_t;
+
+extern proc_t *system_proc;
+
+static __inline kthread_t *
+do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,
+    size_t len, proc_t *pp, int state, pri_t pri, const char *name)
+{
+	kthread_t *td = NULL;
+	proc_t **ppp;
+	int error;
+
+	/*
+	 * Be sure there are no surprises.
+	 */
+	ASSERT(stk == NULL);
+	ASSERT(len == 0);
+	ASSERT(state == TS_RUN);
+
+	if (pp == &p0)
+		ppp = &system_proc;
+	else
+		ppp = &pp;
+	error = kproc_kthread_add(proc, arg, ppp, &td, RFSTOPPED,
+	    stksize / PAGE_SIZE, "zfskern", "%s", name);
+	if (error == 0) {
+		thread_lock(td);
+		sched_prio(td, pri);
+		sched_add(td, SRQ_BORING);
+#if __FreeBSD_version < 1300068
+		thread_unlock(td);
+#endif
+	}
+	return (td);
+}
+
+#define	thread_create_named(name, stk, stksize, proc, arg, len,	\
+    pp, state, pri) \
+	do_thread_create(stk, stksize, proc, arg, len, pp, state, pri, name)
+#define	thread_create(stk, stksize, proc, arg, len, pp, state, pri) \
+	do_thread_create(stk, stksize, proc, arg, len, pp, state, pri, #proc)
+#define	thread_exit()	kthread_exit()
+
+int	uread(proc_t *, void *, size_t, uintptr_t);
+int	uwrite(proc_t *, void *, size_t, uintptr_t);
+
+static inline boolean_t
+zfs_proc_is_caller(proc_t *p)
+{
+	return (p == curproc);
+}
+
+#endif	/* _KERNEL */
+#endif	/* _OPENSOLARIS_SYS_PROC_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/processor.h b/zfs/include/os/freebsd/spl/sys/processor.h
new file mode 100644
index 0000000..5314984
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/processor.h

@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ *	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+ *	  All Rights Reserved
+ *
+ */
+
+/*
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ *
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_PROCESSOR_H
+#define	_SYS_PROCESSOR_H
+
+#include <sys/types.h>
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Definitions for p_online, processor_info & lgrp system calls.
+ */
+
+/*
+ * Type for an lgrpid
+ */
+typedef uint16_t lgrpid_t;
+
+/*
+ * Type for processor name (CPU number).
+ */
+typedef	int	processorid_t;
+typedef int	chipid_t;
+
+#define	getcpuid() curcpu
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _SYS_PROCESSOR_H */

diff --git a/zfs/include/os/freebsd/spl/sys/procfs_list.h b/zfs/include/os/freebsd/spl/sys/procfs_list.h
new file mode 100644
index 0000000..4bc6037
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/procfs_list.h

@@ -0,0 +1,73 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#ifndef	_SPL_PROCFS_LIST_H
+#define	_SPL_PROCFS_LIST_H
+
+#ifndef _STANDALONE
+
+#include <sys/kstat.h>
+#include <sys/mutex.h>
+
+
+/*
+ * procfs list manipulation
+ */
+
+typedef struct procfs_list procfs_list_t;
+struct procfs_list {
+	void		*pl_private;
+	void		*pl_next_data;
+	kmutex_t	pl_lock;
+	list_t		pl_list;
+	uint64_t	pl_next_id;
+	int		(*pl_show)(struct seq_file *f, void *p);
+	int		(*pl_show_header)(struct seq_file *f);
+	int		(*pl_clear)(procfs_list_t *procfs_list);
+	size_t		pl_node_offset;
+};
+
+typedef struct procfs_list_node {
+	list_node_t	pln_link;
+	uint64_t	pln_id;
+} procfs_list_node_t;
+
+void procfs_list_install(const char *module,
+    const char *submodule,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off);
+void procfs_list_uninstall(procfs_list_t *procfs_list);
+void procfs_list_destroy(procfs_list_t *procfs_list);
+void procfs_list_add(procfs_list_t *procfs_list, void *p);
+
+#else
+typedef int procfs_list_t;
+#endif /* !_STANDALONE */
+
+#endif	/* _SPL_PROCFS_LIST_H */

diff --git a/zfs/include/os/freebsd/spl/sys/random.h b/zfs/include/os/freebsd/spl/sys/random.h
new file mode 100644
index 0000000..7583166
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/random.h

@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_RANDOM_H_
+#define	_OPENSOLARIS_SYS_RANDOM_H_
+
+#include_next <sys/random.h>
+#if  __FreeBSD_version >= 1300108
+#include <sys/prng.h>
+#endif
+
+static inline int
+random_get_bytes(uint8_t *p, size_t s)
+{
+	arc4rand(p, (int)s, 0);
+	return (0);
+}
+
+static inline int
+random_get_pseudo_bytes(uint8_t *p, size_t s)
+{
+	arc4rand(p, (int)s, 0);
+	return (0);
+}
+
+static inline uint32_t
+random_in_range(uint32_t range)
+{
+#if defined(_KERNEL) && __FreeBSD_version >= 1300108
+	return (prng32_bounded(range));
+#else
+	uint32_t r;
+
+	ASSERT(range != 0);
+
+	if (range == 1)
+		return (0);
+
+	(void) random_get_pseudo_bytes((uint8_t *)&r, sizeof (r));
+
+	return (r % range);
+#endif
+}
+
+#endif	/* !_OPENSOLARIS_SYS_RANDOM_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/rwlock.h b/zfs/include/os/freebsd/spl/sys/rwlock.h
new file mode 100644
index 0000000..10107a9
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/rwlock.h

@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_RWLOCK_H_
+#define	_OPENSOLARIS_SYS_RWLOCK_H_
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+
+typedef enum {
+	RW_DEFAULT = 4		/* kernel default rwlock */
+} krw_type_t;
+
+
+typedef enum {
+	RW_NONE		= 0,
+	RW_WRITER	= 1,
+	RW_READER	= 2
+} krw_t;
+
+typedef	struct sx	krwlock_t;
+
+#ifndef OPENSOLARIS_WITNESS
+#define	RW_FLAGS	(SX_DUPOK | SX_NOWITNESS)
+#else
+#define	RW_FLAGS	(SX_DUPOK)
+#endif
+
+#define	RW_READ_HELD(x)		(rw_read_held((x)))
+#define	RW_WRITE_HELD(x)	(rw_write_held((x)))
+#define	RW_LOCK_HELD(x)		(rw_lock_held((x)))
+#define	RW_ISWRITER(x)		(rw_iswriter(x))
+/* BEGIN CSTYLED */
+#define	rw_init(lock, desc, type, arg)	do {				\
+	const char *_name;						\
+	ASSERT((type) == 0 || (type) == RW_DEFAULT);			\
+	KASSERT(((lock)->lock_object.lo_flags & LO_ALLMASK) !=		\
+	    LO_EXPECTED, ("lock %s already initialized", #lock));	\
+	bzero((lock), sizeof(struct sx));				\
+	for (_name = #lock; *_name != '\0'; _name++) {			\
+		if (*_name >= 'a' && *_name <= 'z')			\
+			break;						\
+	}								\
+	if (*_name == '\0')						\
+		_name = #lock;						\
+	sx_init_flags((lock), _name, RW_FLAGS);				\
+} while (0)
+#define	rw_destroy(lock)	sx_destroy(lock)
+#define	rw_enter(lock, how)	do {					\
+	if ((how) == RW_READER)						\
+		sx_slock(lock);						\
+	else /* if ((how) == RW_WRITER) */				\
+		sx_xlock(lock);						\
+	} while (0)
+
+#define	rw_tryenter(lock, how)			   \
+	((how) == RW_READER ? sx_try_slock(lock) : sx_try_xlock(lock))
+#define	rw_exit(lock)		sx_unlock(lock)
+#define	rw_downgrade(lock)	sx_downgrade(lock)
+#define	rw_tryupgrade(lock)	sx_try_upgrade(lock)
+#define	rw_read_held(lock)					  \
+	((lock)->sx_lock != SX_LOCK_UNLOCKED &&	  \
+	 ((lock)->sx_lock & SX_LOCK_SHARED))
+#define	rw_write_held(lock)	sx_xlocked(lock)
+#define	rw_lock_held(lock)	(rw_read_held(lock) || rw_write_held(lock))
+#define	rw_iswriter(lock)	sx_xlocked(lock)
+#define	rw_owner(lock)		sx_xholder(lock)
+
+/* END CSTYLED */
+#endif	/* _OPENSOLARIS_SYS_RWLOCK_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/sdt.h b/zfs/include/os/freebsd/spl/sys/sdt.h
new file mode 100644
index 0000000..496fc58
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/sdt.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_SDT_H_
+#define	_OPENSOLARIS_SYS_SDT_H_
+
+#include_next <sys/sdt.h>
+/* BEGIN CSTYLED */
+#ifdef KDTRACE_HOOKS
+SDT_PROBE_DECLARE(sdt, , , set__error);
+
+#define	SET_ERROR(err) \
+	((sdt_sdt___set__error->id ? \
+	(*sdt_probe_func)(sdt_sdt___set__error->id, \
+	    (uintptr_t)err, 0, 0, 0, 0) : 0), err)
+#else
+#define	SET_ERROR(err) (err)
+#endif
+
+#endif	/* _OPENSOLARIS_SYS_SDT_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/sid.h b/zfs/include/os/freebsd/spl/sys/sid.h
new file mode 100644
index 0000000..f249d05
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/sid.h

@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_SID_H_
+#define	_OPENSOLARIS_SYS_SID_H_
+#include <sys/idmap.h>
+
+typedef struct ksiddomain {
+	char	*kd_name;	/* Domain part of SID */
+	uint_t	kd_len;
+} ksiddomain_t;
+typedef void	ksid_t;
+
+static __inline ksiddomain_t *
+ksid_lookupdomain(const char *domain)
+{
+	ksiddomain_t *kd;
+	size_t len;
+
+	len = strlen(domain) + 1;
+	kd = kmem_alloc(sizeof (*kd), KM_SLEEP);
+	kd->kd_len = (uint_t)len;
+	kd->kd_name = kmem_alloc(len, KM_SLEEP);
+	strcpy(kd->kd_name, domain);
+	return (kd);
+}
+
+static __inline void
+ksiddomain_rele(ksiddomain_t *kd)
+{
+
+	kmem_free(kd->kd_name, kd->kd_len);
+	kmem_free(kd, sizeof (*kd));
+}
+
+#endif	/* _OPENSOLARIS_SYS_SID_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/sig.h b/zfs/include/os/freebsd/spl/sys/sig.h
new file mode 100644
index 0000000..a4d440d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/sig.h

@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2008 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_SIG_H_
+#define	_OPENSOLARIS_SYS_SIG_H_
+
+#ifndef _STANDALONE
+
+#include_next <sys/signal.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/debug.h>
+
+#define	FORREAL		0
+#define	JUSTLOOKING	1
+
+static __inline int
+issig(int why)
+{
+	struct thread *td = curthread;
+	struct proc *p;
+	int sig;
+
+	ASSERT(why == FORREAL || why == JUSTLOOKING);
+	if (SIGPENDING(td)) {
+		if (why == JUSTLOOKING)
+			return (1);
+		p = td->td_proc;
+		PROC_LOCK(p);
+		mtx_lock(&p->p_sigacts->ps_mtx);
+		sig = cursig(td);
+		mtx_unlock(&p->p_sigacts->ps_mtx);
+		PROC_UNLOCK(p);
+		if (sig != 0)
+			return (1);
+	}
+	return (0);
+}
+
+#endif /* !_STANDALONE */
+
+#endif	/* _OPENSOLARIS_SYS_SIG_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/simd.h b/zfs/include/os/freebsd/spl/sys/simd.h
new file mode 100644
index 0000000..53503e8
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/simd.h

@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+
+#ifndef _FREEBSD_SIMD_H
+#define	_FREEBSD_SIMD_H
+#if defined(__amd64__) || defined(__i386__)
+#include <sys/simd_x86.h>
+#else
+
+#define	kfpu_allowed()		0
+#define	kfpu_initialize(tsk)	do {} while (0)
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+#define	kfpu_init()		(0)
+#define	kfpu_fini()		do {} while (0)
+#endif
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/simd_x86.h b/zfs/include/os/freebsd/spl/sys/simd_x86.h
new file mode 100644
index 0000000..480bfd2
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/simd_x86.h

@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/types.h>
+#include <sys/cdefs.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+
+#include <machine/pcb.h>
+#include <x86/x86_var.h>
+#include <x86/specialreg.h>
+
+#define	kfpu_init()		(0)
+#define	kfpu_fini()		do {} while (0)
+#define	kfpu_allowed()		1
+#define	kfpu_initialize(tsk)	do {} while (0)
+
+#define	kfpu_begin() {					\
+	if (__predict_false(!is_fpu_kern_thread(0)))		\
+		fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);\
+}
+
+#define	kfpu_end()	{			\
+	if (__predict_false(curpcb->pcb_flags & PCB_FPUNOSAVE))	\
+		fpu_kern_leave(curthread, NULL);	\
+}
+
+/*
+ * Check if OS supports AVX and AVX2 by checking XCR0
+ * Only call this function if CPUID indicates that AVX feature is
+ * supported by the CPU, otherwise it might be an illegal instruction.
+ */
+static inline uint64_t
+xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+	/* xgetbv - instruction byte code */
+	__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
+	    : "=a" (eax), "=d" (edx)
+	    : "c" (index));
+
+	return ((((uint64_t)edx)<<32) | (uint64_t)eax);
+}
+
+
+/*
+ * Detect register set support
+ */
+static inline boolean_t
+__simd_state_enabled(const uint64_t state)
+{
+	boolean_t has_osxsave;
+	uint64_t xcr0;
+
+	has_osxsave = !!(cpu_feature2 & CPUID2_OSXSAVE);
+
+	if (!has_osxsave)
+		return (B_FALSE);
+
+	xcr0 = xgetbv(0);
+	return ((xcr0 & state) == state);
+}
+
+#define	_XSTATE_SSE_AVX		(0x2 | 0x4)
+#define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
+
+#define	__ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
+#define	__zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
+
+
+/*
+ * Check if SSE instruction set is available
+ */
+static inline boolean_t
+zfs_sse_available(void)
+{
+	return (!!(cpu_feature & CPUID_SSE));
+}
+
+/*
+ * Check if SSE2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse2_available(void)
+{
+	return (!!(cpu_feature & CPUID_SSE2));
+}
+
+/*
+ * Check if SSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_sse3_available(void)
+{
+	return (!!(cpu_feature2 & CPUID2_SSE3));
+}
+
+/*
+ * Check if SSSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_ssse3_available(void)
+{
+	return (!!(cpu_feature2 & CPUID2_SSSE3));
+}
+
+/*
+ * Check if SSE4.1 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_1_available(void)
+{
+	return (!!(cpu_feature2 & CPUID2_SSE41));
+}
+
+/*
+ * Check if SSE4.2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_2_available(void)
+{
+	return (!!(cpu_feature2 & CPUID2_SSE42));
+}
+
+/*
+ * Check if AVX instruction set is available
+ */
+static inline boolean_t
+zfs_avx_available(void)
+{
+	boolean_t has_avx;
+
+	has_avx = !!(cpu_feature2 & CPUID2_AVX);
+
+	return (has_avx && __ymm_enabled());
+}
+
+/*
+ * Check if AVX2 instruction set is available
+ */
+static inline boolean_t
+zfs_avx2_available(void)
+{
+	boolean_t has_avx2;
+
+	has_avx2 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX2);
+
+	return (has_avx2 && __ymm_enabled());
+}
+
+/*
+ * AVX-512 family of instruction sets:
+ *
+ * AVX512F	Foundation
+ * AVX512CD	Conflict Detection Instructions
+ * AVX512ER	Exponential and Reciprocal Instructions
+ * AVX512PF	Prefetch Instructions
+ *
+ * AVX512BW	Byte and Word Instructions
+ * AVX512DQ	Double-word and Quadword Instructions
+ * AVX512VL	Vector Length Extensions
+ *
+ * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
+ * AVX512VBMI	Vector Byte Manipulation Instructions
+ */
+
+
+/* Check if AVX512F instruction set is available */
+static inline boolean_t
+zfs_avx512f_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512CD instruction set is available */
+static inline boolean_t
+zfs_avx512cd_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512CD);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512ER instruction set is available */
+static inline boolean_t
+zfs_avx512er_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512CD);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512PF instruction set is available */
+static inline boolean_t
+zfs_avx512pf_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512PF);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512BW instruction set is available */
+static inline boolean_t
+zfs_avx512bw_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512BW);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512DQ instruction set is available */
+static inline boolean_t
+zfs_avx512dq_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512DQ);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VL instruction set is available */
+static inline boolean_t
+zfs_avx512vl_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512VL);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512IFMA instruction set is available */
+static inline boolean_t
+zfs_avx512ifma_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512IFMA);
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VBMI instruction set is available */
+static inline boolean_t
+zfs_avx512vbmi_available(void)
+{
+	boolean_t has_avx512;
+
+	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
+	    !!(cpu_stdext_feature & CPUID_STDEXT_BMI1);
+
+	return (has_avx512 && __zmm_enabled());
+}

diff --git a/zfs/include/os/freebsd/spl/sys/spl_condvar.h b/zfs/include/os/freebsd/spl/sys/spl_condvar.h
new file mode 100644
index 0000000..7405f64
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/spl_condvar.h

@@ -0,0 +1,81 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SPL_SYS_CONDVAR_H_
+#define	_SPL_SYS_CONDVAR_H_
+
+#ifndef	LOCORE
+#include <sys/queue.h>
+
+struct lock_object;
+struct thread;
+
+TAILQ_HEAD(cv_waitq, thread);
+
+/*
+ * Condition variable.  The waiters count is protected by the mutex that
+ * protects the condition; that is, the mutex that is passed to cv_wait*()
+ * and is held across calls to cv_signal() and cv_broadcast().  It is an
+ * optimization to avoid looking up the sleep queue if there are no waiters.
+ */
+struct cv {
+	const char	*cv_description;
+	int		cv_waiters;
+};
+
+void	cv_init(struct cv *cvp, const char *desc);
+void	cv_destroy(struct cv *cvp);
+
+void	_cv_wait(struct cv *cvp, struct lock_object *lock);
+void	_cv_wait_unlock(struct cv *cvp, struct lock_object *lock);
+int	_cv_wait_sig(struct cv *cvp, struct lock_object *lock);
+int	_cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock,
+	    sbintime_t sbt, sbintime_t pr, int flags);
+int	_cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
+	    sbintime_t sbt, sbintime_t pr, int flags);
+
+void	cv_signal(struct cv *cvp);
+void	cv_broadcastpri(struct cv *cvp, int pri);
+
+#define	cv_wait(cvp, lock)						\
+	_cv_wait((cvp), &(lock)->lock_object)
+#define	cv_wait_unlock(cvp, lock)					\
+	_cv_wait_unlock((cvp), &(lock)->lock_object)
+#define	cv_timedwait_sbt(cvp, lock, sbt, pr, flags)			\
+	_cv_timedwait_sbt((cvp), &(lock)->lock_object, (sbt), (pr), (flags))
+#define	cv_timedwait_sig_sbt(cvp, lock, sbt, pr, flags)			\
+	_cv_timedwait_sig_sbt((cvp), &(lock)->lock_object, (sbt), (pr), (flags))
+
+#define	cv_broadcast(cvp)	cv_broadcastpri(cvp, 0)
+
+#define	cv_wmesg(cvp)		((cvp)->cv_description)
+
+#endif	/* !LOCORE */
+#endif	/* _SYS_CONDVAR_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/string.h b/zfs/include/os/freebsd/spl/sys/string.h
new file mode 100644
index 0000000..859b402
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/string.h

@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_STRING_H_
+#define	_OPENSOLARIS_SYS_STRING_H_
+
+#include <sys/libkern.h>
+
+char	*strpbrk(const char *, const char *);
+void	 strident_canon(char *, size_t);
+void	 kmem_strfree(char *);
+char	*kmem_strdup(const char *s);
+
+#endif	/* _OPENSOLARIS_SYS_STRING_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/strings.h b/zfs/include/os/freebsd/spl/sys/strings.h
new file mode 100644
index 0000000..651685d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/strings.h

@@ -0,0 +1 @@
+/* do not delete */

diff --git a/zfs/include/os/freebsd/spl/sys/sunddi.h b/zfs/include/os/freebsd/spl/sys/sunddi.h
new file mode 100644
index 0000000..bfbc3e1
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/sunddi.h

@@ -0,0 +1,68 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SUNDDI_H
+#define	_SPL_SUNDDI_H
+
+#include <sys/cred.h>
+#include <sys/uio.h>
+#include <sys/mutex.h>
+#include <sys/u8_textprep.h>
+#ifdef BUILDING_ZFS
+#include <sys/vnode.h>
+#endif
+
+typedef int ddi_devid_t;
+
+#define	DDI_DEV_T_NONE				((dev_t)-1)
+#define	DDI_DEV_T_ANY				((dev_t)-2)
+#define	DI_MAJOR_T_UNKNOWN			((major_t)0)
+
+#define	DDI_PROP_DONTPASS			0x0001
+#define	DDI_PROP_CANSLEEP			0x0002
+
+#define	DDI_SUCCESS				0
+#define	DDI_FAILURE				-1
+
+#define	ddi_prop_lookup_string(x1, x2, x3, x4, x5)	(*x5 = NULL)
+#define	ddi_prop_free(x)				(void)0
+#define	ddi_root_node()					(void)0
+
+extern int ddi_strtoul(const char *, char **, int, unsigned long *);
+extern int ddi_strtol(const char *, char **, int, long *);
+extern int ddi_strtoull(const char *, char **, int, unsigned long long *);
+extern int ddi_strtoll(const char *, char **, int, long long *);
+
+extern int ddi_copyin(const void *from, void *to, size_t len, int flags);
+extern int ddi_copyout(const void *from, void *to, size_t len, int flags);
+extern void ddi_sysevent_init(void);
+
+
+int ddi_soft_state_init(void **statep, size_t size, size_t nitems);
+void ddi_soft_state_fini(void **statep);
+
+void *ddi_get_soft_state(void *state, int item);
+int ddi_soft_state_zalloc(void *state, int item);
+void ddi_soft_state_free(void *state, int item);
+
+#endif /* SPL_SUNDDI_H */

diff --git a/zfs/include/os/freebsd/spl/sys/sysmacros.h b/zfs/include/os/freebsd/spl/sys/sysmacros.h
new file mode 100644
index 0000000..7e3ab89
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/sysmacros.h

@@ -0,0 +1,410 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SYSMACROS_H
+#define	_SYS_SYSMACROS_H
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/isa_defs.h>
+#include <sys/libkern.h>
+#include <sys/zone.h>
+#include <sys/condvar.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Some macros for units conversion
+ */
+/*
+ * Disk blocks (sectors) and bytes.
+ */
+#define	dtob(DD)	((DD) << DEV_BSHIFT)
+#define	btod(BB)	(((BB) + DEV_BSIZE - 1) >> DEV_BSHIFT)
+#define	btodt(BB)	((BB) >> DEV_BSHIFT)
+#define	lbtod(BB)	(((offset_t)(BB) + DEV_BSIZE - 1) >> DEV_BSHIFT)
+
+/* common macros */
+#ifndef MIN
+#define	MIN(a, b)	((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define	MAX(a, b)	((a) < (b) ? (b) : (a))
+#endif
+#ifndef ABS
+#define	ABS(a)		((a) < 0 ? -(a) : (a))
+#endif
+#ifndef	SIGNOF
+#define	SIGNOF(a)	((a) < 0 ? -1 : (a) > 0)
+#endif
+#ifndef	ARRAY_SIZE
+#define	ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+#endif
+#ifndef	DIV_ROUND_UP
+#define	DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#endif
+
+#ifdef _STANDALONE
+#define	boot_ncpus 1
+#else /* _STANDALONE */
+#define	boot_ncpus mp_ncpus
+#endif /* _STANDALONE */
+#define	kpreempt_disable() critical_enter()
+#define	kpreempt_enable() critical_exit()
+#define	CPU_SEQID curcpu
+#define	CPU_SEQID_UNSTABLE curcpu
+#define	is_system_labeled()		0
+/*
+ * Convert a single byte to/from binary-coded decimal (BCD).
+ */
+extern unsigned char byte_to_bcd[256];
+extern unsigned char bcd_to_byte[256];
+
+#define	BYTE_TO_BCD(x)	byte_to_bcd[(x) & 0xff]
+#define	BCD_TO_BYTE(x)	bcd_to_byte[(x) & 0xff]
+
+/*
+ * WARNING: The device number macros defined here should not be used by device
+ * drivers or user software. Device drivers should use the device functions
+ * defined in the DDI/DKI interface (see also ddi.h). Application software
+ * should make use of the library routines available in makedev(3). A set of
+ * new device macros are provided to operate on the expanded device number
+ * format supported in SVR4. Macro versions of the DDI device functions are
+ * provided for use by kernel proper routines only. Macro routines bmajor(),
+ * major(), minor(), emajor(), eminor(), and makedev() will be removed or
+ * their definitions changed at the next major release following SVR4.
+ */
+
+#define	O_BITSMAJOR	7	/* # of SVR3 major device bits */
+#define	O_BITSMINOR	8	/* # of SVR3 minor device bits */
+#define	O_MAXMAJ	0x7f	/* SVR3 max major value */
+#define	O_MAXMIN	0xff	/* SVR3 max minor value */
+
+
+#define	L_BITSMAJOR32	14	/* # of SVR4 major device bits */
+#define	L_BITSMINOR32	18	/* # of SVR4 minor device bits */
+#define	L_MAXMAJ32	0x3fff	/* SVR4 max major value */
+#define	L_MAXMIN32	0x3ffff	/* MAX minor for 3b2 software drivers. */
+				/* For 3b2 hardware devices the minor is */
+				/* restricted to 256 (0-255) */
+
+#ifdef _LP64
+#define	L_BITSMAJOR	32	/* # of major device bits in 64-bit Solaris */
+#define	L_BITSMINOR	32	/* # of minor device bits in 64-bit Solaris */
+#define	L_MAXMAJ	0xfffffffful	/* max major value */
+#define	L_MAXMIN	0xfffffffful	/* max minor value */
+#else
+#define	L_BITSMAJOR	L_BITSMAJOR32
+#define	L_BITSMINOR	L_BITSMINOR32
+#define	L_MAXMAJ	L_MAXMAJ32
+#define	L_MAXMIN	L_MAXMIN32
+#endif
+
+/*
+ * These are versions of the kernel routines for compressing and
+ * expanding long device numbers that don't return errors.
+ */
+#if (L_BITSMAJOR32 == L_BITSMAJOR) && (L_BITSMINOR32 == L_BITSMINOR)
+
+#define	DEVCMPL(x)	(x)
+#define	DEVEXPL(x)	(x)
+
+#else
+
+#define	DEVCMPL(x)	\
+	(dev32_t)((((x) >> L_BITSMINOR) > L_MAXMAJ32 || \
+	    ((x) & L_MAXMIN) > L_MAXMIN32) ? NODEV32 : \
+	    ((((x) >> L_BITSMINOR) << L_BITSMINOR32) | ((x) & L_MAXMIN32)))
+
+#define	DEVEXPL(x)	\
+	(((x) == NODEV32) ? NODEV : \
+	makedevice(((x) >> L_BITSMINOR32) & L_MAXMAJ32, (x) & L_MAXMIN32))
+
+#endif /* L_BITSMAJOR32 ... */
+
+/* convert to old (SVR3.2) dev format */
+
+#define	cmpdev(x) \
+	(o_dev_t)((((x) >> L_BITSMINOR) > O_MAXMAJ || \
+	    ((x) & L_MAXMIN) > O_MAXMIN) ? NODEV : \
+	    ((((x) >> L_BITSMINOR) << O_BITSMINOR) | ((x) & O_MAXMIN)))
+
+/* convert to new (SVR4) dev format */
+
+#define	expdev(x) \
+	(dev_t)(((dev_t)(((x) >> O_BITSMINOR) & O_MAXMAJ) << L_BITSMINOR) | \
+	    ((x) & O_MAXMIN))
+
+/*
+ * Macro for checking power of 2 address alignment.
+ */
+#define	IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
+
+/*
+ * Macros for counting and rounding.
+ */
+#define	howmany(x, y)	(((x)+((y)-1))/(y))
+#define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))
+
+/*
+ * Macro to determine if value is a power of 2
+ */
+#define	ISP2(x)		(((x) & ((x) - 1)) == 0)
+
+/*
+ * Macros for various sorts of alignment and rounding.  The "align" must
+ * be a power of 2.  Often times it is a block, sector, or page.
+ */
+
+/*
+ * return x rounded down to an align boundary
+ * eg, P2ALIGN(1200, 1024) == 1024 (1*align)
+ * eg, P2ALIGN(1024, 1024) == 1024 (1*align)
+ * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
+ * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
+ */
+#define	P2ALIGN(x, align)		((x) & -(align))
+
+/*
+ * return x % (mod) align
+ * eg, P2PHASE(0x1234, 0x100) == 0x34 (x-0x12*align)
+ * eg, P2PHASE(0x5600, 0x100) == 0x00 (x-0x56*align)
+ */
+#define	P2PHASE(x, align)		((x) & ((align) - 1))
+
+/*
+ * return how much space is left in this block (but if it's perfectly
+ * aligned, return 0).
+ * eg, P2NPHASE(0x1234, 0x100) == 0xcc (0x13*align-x)
+ * eg, P2NPHASE(0x5600, 0x100) == 0x00 (0x56*align-x)
+ */
+#define	P2NPHASE(x, align)		(-(x) & ((align) - 1))
+
+/*
+ * return x rounded up to an align boundary
+ * eg, P2ROUNDUP(0x1234, 0x100) == 0x1300 (0x13*align)
+ * eg, P2ROUNDUP(0x5600, 0x100) == 0x5600 (0x56*align)
+ */
+#define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
+
+/*
+ * return the ending address of the block that x is in
+ * eg, P2END(0x1234, 0x100) == 0x12ff (0x13*align - 1)
+ * eg, P2END(0x5600, 0x100) == 0x56ff (0x57*align - 1)
+ */
+#define	P2END(x, align)			(-(~(x) & -(align)))
+
+/*
+ * return x rounded up to the next phase (offset) within align.
+ * phase should be < align.
+ * eg, P2PHASEUP(0x1234, 0x100, 0x10) == 0x1310 (0x13*align + phase)
+ * eg, P2PHASEUP(0x5600, 0x100, 0x10) == 0x5610 (0x56*align + phase)
+ */
+#define	P2PHASEUP(x, align, phase)	((phase) - (((phase) - (x)) & -(align)))
+
+/*
+ * return TRUE if adding len to off would cause it to cross an align
+ * boundary.
+ * eg, P2BOUNDARY(0x1234, 0xe0, 0x100) == TRUE (0x1234 + 0xe0 == 0x1314)
+ * eg, P2BOUNDARY(0x1234, 0x50, 0x100) == FALSE (0x1234 + 0x50 == 0x1284)
+ */
+#define	P2BOUNDARY(off, len, align) \
+	(((off) ^ ((off) + (len) - 1)) > (align) - 1)
+
+/*
+ * Return TRUE if they have the same highest bit set.
+ * eg, P2SAMEHIGHBIT(0x1234, 0x1001) == TRUE (the high bit is 0x1000)
+ * eg, P2SAMEHIGHBIT(0x1234, 0x3010) == FALSE (high bit of 0x3010 is 0x2000)
+ */
+#define	P2SAMEHIGHBIT(x, y)		(((x) ^ (y)) < ((x) & (y)))
+
+/*
+ * Typed version of the P2* macros.  These macros should be used to ensure
+ * that the result is correctly calculated based on the data type of (x),
+ * which is passed in as the last argument, regardless of the data
+ * type of the alignment.  For example, if (x) is of type uint64_t,
+ * and we want to round it up to a page boundary using "PAGESIZE" as
+ * the alignment, we can do either
+ *	P2ROUNDUP(x, (uint64_t)PAGESIZE)
+ * or
+ *	P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
+ */
+#define	P2ALIGN_TYPED(x, align, type)	\
+	((type)(x) & -(type)(align))
+#define	P2PHASE_TYPED(x, align, type)	\
+	((type)(x) & ((type)(align) - 1))
+#define	P2NPHASE_TYPED(x, align, type)	\
+	(-(type)(x) & ((type)(align) - 1))
+#define	P2ROUNDUP_TYPED(x, align, type)	\
+	(-(-(type)(x) & -(type)(align)))
+#define	P2END_TYPED(x, align, type)	\
+	(-(~(type)(x) & -(type)(align)))
+#define	P2PHASEUP_TYPED(x, align, phase, type)	\
+	((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
+#define	P2CROSS_TYPED(x, y, align, type)	\
+	(((type)(x) ^ (type)(y)) > (type)(align) - 1)
+#define	P2SAMEHIGHBIT_TYPED(x, y, type) \
+	(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
+
+/*
+ * Macros to atomically increment/decrement a variable.  mutex and var
+ * must be pointers.
+ */
+#define	INCR_COUNT(var, mutex) mutex_enter(mutex), (*(var))++, mutex_exit(mutex)
+#define	DECR_COUNT(var, mutex) mutex_enter(mutex), (*(var))--, mutex_exit(mutex)
+
+/*
+ * Macros to declare bitfields - the order in the parameter list is
+ * Low to High - that is, declare bit 0 first.  We only support 8-bit bitfields
+ * because if a field crosses a byte boundary it's not likely to be meaningful
+ * without reassembly in its nonnative endianness.
+ */
+#if defined(_BIT_FIELDS_LTOH)
+#define	DECL_BITFIELD2(_a, _b)				\
+	uint8_t _a, _b
+#define	DECL_BITFIELD3(_a, _b, _c)			\
+	uint8_t _a, _b, _c
+#define	DECL_BITFIELD4(_a, _b, _c, _d)			\
+	uint8_t _a, _b, _c, _d
+#define	DECL_BITFIELD5(_a, _b, _c, _d, _e)		\
+	uint8_t _a, _b, _c, _d, _e
+#define	DECL_BITFIELD6(_a, _b, _c, _d, _e, _f)		\
+	uint8_t _a, _b, _c, _d, _e, _f
+#define	DECL_BITFIELD7(_a, _b, _c, _d, _e, _f, _g)	\
+	uint8_t _a, _b, _c, _d, _e, _f, _g
+#define	DECL_BITFIELD8(_a, _b, _c, _d, _e, _f, _g, _h)	\
+	uint8_t _a, _b, _c, _d, _e, _f, _g, _h
+#elif defined(_BIT_FIELDS_HTOL)
+#define	DECL_BITFIELD2(_a, _b)				\
+	uint8_t _b, _a
+#define	DECL_BITFIELD3(_a, _b, _c)			\
+	uint8_t _c, _b, _a
+#define	DECL_BITFIELD4(_a, _b, _c, _d)			\
+	uint8_t _d, _c, _b, _a
+#define	DECL_BITFIELD5(_a, _b, _c, _d, _e)		\
+	uint8_t _e, _d, _c, _b, _a
+#define	DECL_BITFIELD6(_a, _b, _c, _d, _e, _f)		\
+	uint8_t _f, _e, _d, _c, _b, _a
+#define	DECL_BITFIELD7(_a, _b, _c, _d, _e, _f, _g)	\
+	uint8_t _g, _f, _e, _d, _c, _b, _a
+#define	DECL_BITFIELD8(_a, _b, _c, _d, _e, _f, _g, _h)	\
+	uint8_t _h, _g, _f, _e, _d, _c, _b, _a
+#else
+#error	One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
+#endif  /* _BIT_FIELDS_LTOH */
+
+#if !defined(_KMEMUSER) && !defined(offsetof)
+
+/* avoid any possibility of clashing with <stddef.h> version */
+
+#define	offsetof(type, field)	__offsetof(type, field)
+#endif
+
+/*
+ * Find highest one bit set.
+ *      Returns bit number + 1 of highest bit that is set, otherwise returns 0.
+ * High order bit is 31 (or 63 in _LP64 kernel).
+ */
+static __inline int
+highbit(ulong_t i)
+{
+#if defined(HAVE_INLINE_FLSL)
+	return (flsl(i));
+#else
+	int h = 1;
+
+	if (i == 0)
+		return (0);
+#ifdef _LP64
+	if (i & 0xffffffff00000000ul) {
+		h += 32; i >>= 32;
+	}
+#endif
+	if (i & 0xffff0000) {
+		h += 16; i >>= 16;
+	}
+	if (i & 0xff00) {
+		h += 8; i >>= 8;
+	}
+	if (i & 0xf0) {
+		h += 4; i >>= 4;
+	}
+	if (i & 0xc) {
+		h += 2; i >>= 2;
+	}
+	if (i & 0x2) {
+		h += 1;
+	}
+	return (h);
+#endif
+}
+
+/*
+ * Find highest one bit set.
+ *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
+ */
+static __inline int
+highbit64(uint64_t i)
+{
+#if defined(HAVE_INLINE_FLSLL)
+	return (flsll(i));
+#else
+	int h = 1;
+
+	if (i == 0)
+		return (0);
+	if (i & 0xffffffff00000000ULL) {
+		h += 32; i >>= 32;
+	}
+	if (i & 0xffff0000) {
+		h += 16; i >>= 16;
+	}
+	if (i & 0xff00) {
+		h += 8; i >>= 8;
+	}
+	if (i & 0xf0) {
+		h += 4; i >>= 4;
+	}
+	if (i & 0xc) {
+		h += 2; i >>= 2;
+	}
+	if (i & 0x2) {
+		h += 1;
+	}
+	return (h);
+#endif
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_SYSMACROS_H */

diff --git a/zfs/include/os/freebsd/spl/sys/systeminfo.h b/zfs/include/os/freebsd/spl/sys/systeminfo.h
new file mode 100644
index 0000000..4028cd7
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/systeminfo.h

@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SYSTEMINFO_H_
+#define	_SYS_SYSTEMINFO_H_
+
+#define	HW_HOSTID_LEN	11
+
+#endif	/* !_SYS_SYSTEMINFO_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/systm.h b/zfs/include/os/freebsd/spl/sys/systm.h
new file mode 100644
index 0000000..98ee955
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/systm.h

@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_SYSTM_H_
+#define	_OPENSOLARIS_SYS_SYSTM_H_
+
+#include <sys/endian.h>
+#include_next <sys/systm.h>
+
+#include <sys/string.h>
+
+#define	PAGESIZE	PAGE_SIZE
+#define	PAGEOFFSET	(PAGESIZE - 1)
+#define	PAGEMASK	(~PAGEOFFSET)
+
+#define	delay(x)	pause("soldelay", (x))
+
+#endif	/* _OPENSOLARIS_SYS_SYSTM_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/taskq.h b/zfs/include/os/freebsd/spl/sys/taskq.h
new file mode 100644
index 0000000..3040549
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/taskq.h

@@ -0,0 +1,124 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_TASKQ_H
+#define	_SYS_TASKQ_H
+
+#ifdef _KERNEL
+
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/taskqueue.h>
+#include <sys/thread.h>
+#include <sys/ck.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	TASKQ_NAMELEN	31
+
+typedef struct taskq {
+	struct taskqueue	*tq_queue;
+} taskq_t;
+
+typedef uintptr_t taskqid_t;
+typedef void (task_func_t)(void *);
+
+typedef struct taskq_ent {
+	struct task	 tqent_task;
+	struct timeout_task tqent_timeout_task;
+	task_func_t	*tqent_func;
+	void		*tqent_arg;
+	taskqid_t tqent_id;
+	CK_LIST_ENTRY(taskq_ent) tqent_hash;
+	uint8_t tqent_type;
+	uint8_t tqent_registered;
+	uint8_t tqent_cancelled;
+	volatile uint32_t tqent_rc;
+} taskq_ent_t;
+
+/*
+ * Public flags for taskq_create(): bit range 0-15
+ */
+#define	TASKQ_PREPOPULATE	0x0001	/* Prepopulate with threads and data */
+#define	TASKQ_CPR_SAFE		0x0002	/* Use CPR safe protocol */
+#define	TASKQ_DYNAMIC		0x0004	/* Use dynamic thread scheduling */
+#define	TASKQ_THREADS_CPU_PCT	0x0008	/* number of threads as % of ncpu */
+#define	TASKQ_DC_BATCH		0x0010	/* Taskq uses SDC in batch mode */
+
+/*
+ * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
+ * KM_SLEEP/KM_NOSLEEP.
+ */
+#define	TQ_SLEEP	0x00	/* Can block for memory */
+#define	TQ_NOSLEEP	0x01	/* cannot block for memory; may fail */
+#define	TQ_NOQUEUE	0x02	/* Do not enqueue if can't dispatch */
+#define	TQ_NOALLOC	0x04	/* cannot allocate memory; may fail */
+#define	TQ_FRONT	0x08	/* Put task at the front of the queue */
+
+#define	TASKQID_INVALID		((taskqid_t)0)
+
+#define	taskq_init_ent(x)
+extern taskq_t *system_taskq;
+/* Global dynamic task queue for long delay */
+extern taskq_t *system_delay_taskq;
+
+extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
+extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
+    uint_t, clock_t);
+extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
+    taskq_ent_t *);
+extern int taskq_empty_ent(taskq_ent_t *);
+taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
+taskq_t	*taskq_create_instance(const char *, int, int, pri_t, int, int, uint_t);
+taskq_t	*taskq_create_proc(const char *, int, pri_t, int, int,
+    struct proc *, uint_t);
+taskq_t	*taskq_create_sysdc(const char *, int, int, int,
+    struct proc *, uint_t, uint_t);
+void	nulltask(void *);
+extern void taskq_destroy(taskq_t *);
+extern void taskq_wait_id(taskq_t *, taskqid_t);
+extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
+extern void taskq_wait(taskq_t *);
+extern int taskq_cancel_id(taskq_t *, taskqid_t);
+extern int taskq_member(taskq_t *, kthread_t *);
+extern taskq_t *taskq_of_curthread(void);
+void	taskq_suspend(taskq_t *);
+int	taskq_suspended(taskq_t *);
+void	taskq_resume(taskq_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _KERNEL */
+
+#ifdef _STANDALONE
+typedef int taskq_ent_t;
+#define	taskq_init_ent(x)
+#endif /* _STANDALONE */
+
+#endif	/* _SYS_TASKQ_H */

diff --git a/zfs/include/os/freebsd/spl/sys/thread.h b/zfs/include/os/freebsd/spl/sys/thread.h
new file mode 100644
index 0000000..4fb1a54
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/thread.h

@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SPL_THREAD_H_
+#define	_SPL_THREAD_H_
+
+#define	getcomm() curthread->td_name
+#define	getpid() curthread->td_tid
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/time.h b/zfs/include/os/freebsd/spl/sys/time.h
new file mode 100644
index 0000000..fbc679a
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/time.h

@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_TIME_H_
+#define	_OPENSOLARIS_SYS_TIME_H_
+#pragma once
+#include_next <sys/time.h>
+#include <sys/debug.h>
+#ifndef _SYS_KERNEL_H_
+extern int hz;
+#endif
+
+#define	SEC		1
+#define	MILLISEC	1000UL
+#define	MICROSEC	1000000UL
+#define	NANOSEC	1000000000UL
+#define	TIME_MAX	LLONG_MAX
+
+#define	MSEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MILLISEC))
+#define	NSEC2MSEC(n)	((n) / (NANOSEC / MILLISEC))
+
+#define	USEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MICROSEC))
+#define	NSEC2USEC(n)	((n) / (NANOSEC / MICROSEC))
+
+#define	NSEC2SEC(n)	((n) / (NANOSEC / SEC))
+#define	SEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / SEC))
+
+typedef longlong_t	hrtime_t;
+
+#if defined(__i386__) || defined(__powerpc__)
+#define	TIMESPEC_OVERFLOW(ts)						\
+	((ts)->tv_sec < INT32_MIN || (ts)->tv_sec > INT32_MAX)
+#else
+#define	TIMESPEC_OVERFLOW(ts)						\
+	((ts)->tv_sec < INT64_MIN || (ts)->tv_sec > INT64_MAX)
+#endif
+
+#define	SEC_TO_TICK(sec)	((sec) * hz)
+#define	NSEC_TO_TICK(nsec)	((nsec) / (NANOSEC / hz))
+
+static __inline hrtime_t
+gethrtime(void)
+{
+	struct timespec ts;
+	hrtime_t nsec;
+
+	nanouptime(&ts);
+	nsec = ((hrtime_t)ts.tv_sec * NANOSEC) + ts.tv_nsec;
+	return (nsec);
+}
+
+#define	gethrestime_sec()	(time_second)
+#define	gethrestime(ts)		getnanotime(ts)
+#define	gethrtime_waitfree()	gethrtime()
+
+extern int nsec_per_tick;	/* nanoseconds per clock tick */
+
+#define	ddi_get_lbolt64()				\
+	(int64_t)(((getsbinuptime() >> 16) * hz) >> 16)
+#define	ddi_get_lbolt()		(clock_t)ddi_get_lbolt64()
+
+#else
+
+static __inline hrtime_t
+gethrtime(void)
+{
+	struct timespec ts;
+	clock_gettime(CLOCK_UPTIME, &ts);
+	return (((u_int64_t)ts.tv_sec) * NANOSEC + ts.tv_nsec);
+}
+#endif	/* !_OPENSOLARIS_SYS_TIME_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/timer.h b/zfs/include/os/freebsd/spl/sys/timer.h
new file mode 100644
index 0000000..d4694bb
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/timer.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SPL_TIMER_H_
+#define	_SPL_TIMER_H_
+#define	ddi_time_after(a, b) ((a) > (b))
+#define	ddi_time_after64(a, b) ((a) > (b))
+#define	usleep_range(wakeup, wakeupepsilon)				   \
+	pause_sbt("usleep_range", ustosbt(wakeup), \
+	ustosbt(wakeupepsilon - wakeup), 0)
+
+#define	schedule() pause("schedule", 1)
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/trace.h b/zfs/include/os/freebsd/spl/sys/trace.h
new file mode 100644
index 0000000..d9639d2
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/trace.h

@@ -0,0 +1 @@
+/* keep me */

diff --git a/zfs/include/os/freebsd/spl/sys/trace_zfs.h b/zfs/include/os/freebsd/spl/sys/trace_zfs.h
new file mode 100644
index 0000000..d9639d2
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/trace_zfs.h

@@ -0,0 +1 @@
+/* keep me */

diff --git a/zfs/include/os/freebsd/spl/sys/types.h b/zfs/include/os/freebsd/spl/sys/types.h
new file mode 100644
index 0000000..ecb91fd
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/types.h

@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SPL_SYS_TYPES_H_
+#define	_SPL_SYS_TYPES_H_
+
+#pragma once
+/*
+ * This is a bag of dirty hacks to keep things compiling.
+ */
+#include_next <sys/types.h>
+
+#ifdef __ILP32__
+typedef __uint64_t u_longlong_t;
+typedef __int64_t longlong_t;
+#else
+typedef unsigned long long	u_longlong_t;
+typedef long long		longlong_t;
+#endif
+#include <sys/stdint.h>
+
+#define	_CLOCK_T_DECLARED
+
+#include <sys/types32.h>
+#include <sys/_stdarg.h>
+#include <linux/types.h>
+
+#define	MAXNAMELEN	256
+
+
+
+typedef	void zfs_kernel_param_t;
+
+typedef	struct timespec	timestruc_t;
+typedef	struct timespec	timespec_t;
+typedef struct timespec inode_timespec_t;
+/* BEGIN CSTYLED */
+typedef u_int		uint_t;
+typedef u_char		uchar_t;
+typedef u_short		ushort_t;
+typedef u_long		ulong_t;
+typedef	int		minor_t;
+/* END CSTYLED */
+#ifndef	_OFF64_T_DECLARED
+#define	_OFF64_T_DECLARED
+typedef off_t		off64_t;
+#endif
+typedef id_t		taskid_t;
+typedef id_t		projid_t;
+typedef id_t		poolid_t;
+typedef uint_t		zoneid_t;
+typedef id_t		ctid_t;
+typedef	mode_t		o_mode_t;
+typedef	uint64_t	pgcnt_t;
+
+#define	B_FALSE	0
+#define	B_TRUE	1
+
+typedef	short		index_t;
+typedef	off_t		offset_t;
+#ifndef _PTRDIFF_T_DECLARED
+typedef	__ptrdiff_t		ptrdiff_t;	/* pointer difference */
+#define	_PTRDIFF_T_DECLARED
+#endif
+typedef	int64_t		rlim64_t;
+typedef	int		major_t;
+
+#else
+#ifdef NEED_SOLARIS_BOOLEAN
+#if defined(__XOPEN_OR_POSIX)
+typedef enum { _B_FALSE, _B_TRUE }	boolean_t;
+#else
+typedef enum { B_FALSE, B_TRUE }	boolean_t;
+#endif /* defined(__XOPEN_OR_POSIX) */
+#endif
+
+typedef	u_longlong_t	u_offset_t;
+typedef	u_longlong_t	len_t;
+
+typedef	longlong_t	diskaddr_t;
+
+#include <sys/debug.h>
+#endif	/* !_OPENSOLARIS_SYS_TYPES_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/types32.h b/zfs/include/os/freebsd/spl/sys/types32.h
new file mode 100644
index 0000000..907b667
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/types32.h

@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SPL_TYPES32_H
+#define	_SPL_TYPES32_H
+
+typedef uint32_t	caddr32_t;
+typedef int32_t	daddr32_t;
+typedef int32_t	time32_t;
+typedef uint32_t	size32_t;
+
+#endif  /* _SPL_TYPES32_H */

diff --git a/zfs/include/os/freebsd/spl/sys/uio.h b/zfs/include/os/freebsd/spl/sys/uio.h
new file mode 100644
index 0000000..b71f2f2
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/uio.h

@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_UIO_H_
+#define	_OPENSOLARIS_SYS_UIO_H_
+
+#ifndef _STANDALONE
+
+#include_next <sys/uio.h>
+#include <sys/_uio.h>
+#include <sys/debug.h>
+
+typedef	struct iovec	iovec_t;
+typedef	enum uio_seg	zfs_uio_seg_t;
+typedef	enum uio_rw	zfs_uio_rw_t;
+
+typedef struct zfs_uio {
+	struct uio	*uio;
+} zfs_uio_t;
+
+#define	GET_UIO_STRUCT(u)	(u)->uio
+#define	zfs_uio_segflg(u)	GET_UIO_STRUCT(u)->uio_segflg
+#define	zfs_uio_offset(u)	GET_UIO_STRUCT(u)->uio_offset
+#define	zfs_uio_resid(u)	GET_UIO_STRUCT(u)->uio_resid
+#define	zfs_uio_iovcnt(u)	GET_UIO_STRUCT(u)->uio_iovcnt
+#define	zfs_uio_iovlen(u, idx)	GET_UIO_STRUCT(u)->uio_iov[(idx)].iov_len
+#define	zfs_uio_iovbase(u, idx)	GET_UIO_STRUCT(u)->uio_iov[(idx)].iov_base
+#define	zfs_uio_td(u)		GET_UIO_STRUCT(u)->uio_td
+#define	zfs_uio_rw(u)		GET_UIO_STRUCT(u)->uio_rw
+#define	zfs_uio_fault_disable(u, set)
+#define	zfs_uio_prefaultpages(size, u)	(0)
+
+static inline void
+zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
+{
+	zfs_uio_offset(uio) = off;
+}
+
+static inline void
+zfs_uio_advance(zfs_uio_t *uio, size_t size)
+{
+	zfs_uio_resid(uio) -= size;
+	zfs_uio_offset(uio) += size;
+}
+
+static __inline void
+zfs_uio_init(zfs_uio_t *uio, struct uio *uio_s)
+{
+	GET_UIO_STRUCT(uio) = uio_s;
+}
+
+int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio);
+
+#endif /* !_STANDALONE */
+
+#endif	/* !_OPENSOLARIS_SYS_UIO_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/uuid.h b/zfs/include/os/freebsd/spl/sys/uuid.h
new file mode 100644
index 0000000..26d46e8
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/uuid.h

@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_UUID_H
+#define	_SYS_UUID_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * The copyright in this file is taken from the original Leach
+ * & Salz UUID specification, from which this implementation
+ * is derived.
+ */
+
+/*
+ * Copyright (c) 1990- 1993, 1996 Open Software Foundation, Inc.
+ * Copyright (c) 1989 by Hewlett-Packard Company, Palo Alto, Ca. &
+ * Digital Equipment Corporation, Maynard, Mass.  Copyright (c) 1998
+ * Microsoft.  To anyone who acknowledges that this file is provided
+ * "AS IS" without any express or implied warranty: permission to use,
+ * copy, modify, and distribute this file for any purpose is hereby
+ * granted without fee, provided that the above copyright notices and
+ * this notice appears in all source code copies, and that none of the
+ * names of Open Software Foundation, Inc., Hewlett-Packard Company,
+ * or Digital Equipment Corporation be used in advertising or
+ * publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Neither Open Software
+ * Foundation, Inc., Hewlett-Packard Company, Microsoft, nor Digital
+ * Equipment Corporation makes any representations about the
+ * suitability of this software for any purpose.
+ */
+
+#include <sys/types.h>
+#include <sys/byteorder.h>
+
+typedef struct {
+	uint8_t		nodeID[6];
+} uuid_node_t;
+
+/*
+ * The uuid type used throughout when referencing uuids themselves
+ */
+typedef struct uuid {
+	uint32_t	time_low;
+	uint16_t	time_mid;
+	uint16_t	time_hi_and_version;
+	uint8_t		clock_seq_hi_and_reserved;
+	uint8_t		clock_seq_low;
+	uint8_t		node_addr[6];
+} uuid_t;
+
+#define	UUID_PRINTABLE_STRING_LENGTH 37
+
+/*
+ * Convert a uuid to/from little-endian format
+ */
+#define	UUID_LE_CONVERT(dest, src)					\
+{									\
+	(dest) = (src);							\
+	(dest).time_low = LE_32((dest).time_low);			\
+	(dest).time_mid = LE_16((dest).time_mid);			\
+	(dest).time_hi_and_version = LE_16((dest).time_hi_and_version);	\
+}
+
+static __inline int
+uuid_is_null(const caddr_t uuid)
+{
+	return (0);
+}
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_UUID_H */

diff --git a/zfs/include/os/freebsd/spl/sys/vfs.h b/zfs/include/os/freebsd/spl/sys/vfs.h
new file mode 100644
index 0000000..a432f6c
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/vfs.h

@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_VFS_H_
+#define	_OPENSOLARIS_SYS_VFS_H_
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#define	rootdir	rootvnode
+
+struct thread;
+struct vnode;
+typedef	struct mount	vfs_t;
+
+typedef	int	umode_t;
+
+#define	vfs_flag	mnt_flag
+#define	vfs_data	mnt_data
+#define	vfs_count	mnt_ref
+#define	vfs_fsid	mnt_stat.f_fsid
+#define	vfs_bsize	mnt_stat.f_bsize
+#define	vfs_resource	mnt_stat.f_mntfromname
+
+#define	v_flag		v_vflag
+#define	v_vfsp		v_mount
+
+#define	VFS_RDONLY	MNT_RDONLY
+#define	VFS_NOSETUID	MNT_NOSUID
+#define	VFS_NOEXEC	MNT_NOEXEC
+
+#define	fs_vscan(vp, cr, async)	(0)
+
+#define	VROOT		VV_ROOT
+
+#define	XU_NGROUPS	16
+
+/*
+ * Structure defining a mount option for a filesystem.
+ * option names are found in mntent.h
+ */
+typedef struct mntopt {
+	char	*mo_name;	/* option name */
+	char	**mo_cancel;	/* list of options cancelled by this one */
+	char	*mo_arg;	/* argument string for this option */
+	int	mo_flags;	/* flags for this mount option */
+	void	*mo_data;	/* filesystem specific data */
+} mntopt_t;
+
+/*
+ * Flags that apply to mount options
+ */
+
+#define	MO_SET		0x01		/* option is set */
+#define	MO_NODISPLAY	0x02		/* option not listed in mnttab */
+#define	MO_HASVALUE	0x04		/* option takes a value */
+#define	MO_IGNORE	0x08		/* option ignored by parser */
+#define	MO_DEFAULT	MO_SET		/* option is on by default */
+#define	MO_TAG		0x10		/* flags a tag set by user program */
+#define	MO_EMPTY	0x20		/* empty space in option table */
+
+#define	VFS_NOFORCEOPT	0x01		/* honor MO_IGNORE (don't set option) */
+#define	VFS_DISPLAY	0x02		/* Turn off MO_NODISPLAY bit for opt */
+#define	VFS_NODISPLAY	0x04		/* Turn on MO_NODISPLAY bit for opt */
+#define	VFS_CREATEOPT	0x08		/* Create the opt if it's not there */
+
+/*
+ * Structure holding mount option strings for the mounted file system.
+ */
+typedef struct mntopts {
+	uint_t		mo_count;		/* number of entries in table */
+	mntopt_t	*mo_list;		/* list of mount options */
+} mntopts_t;
+
+void vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
+    int flags __unused);
+void vfs_clearmntopt(vfs_t *vfsp, const char *name);
+int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
+int mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype,
+    char *fspath, char *fspec, int fsflags);
+
+typedef	uint64_t	vfs_feature_t;
+
+#define	VFSFT_XVATTR		0x100000001	/* Supports xvattr for attrs */
+#define	VFSFT_CASEINSENSITIVE	0x100000002	/* Supports case-insensitive */
+#define	VFSFT_NOCASESENSITIVE	0x100000004	/* NOT case-sensitive */
+#define	VFSFT_DIRENTFLAGS	0x100000008	/* Supports dirent flags */
+#define	VFSFT_ACLONCREATE	0x100000010	/* Supports ACL on create */
+#define	VFSFT_ACEMASKONACCESS	0x100000020	/* Can use ACEMASK for access */
+#define	VFSFT_SYSATTR_VIEWS	0x100000040	/* Supports sysattr view i/f */
+#define	VFSFT_ACCESS_FILTER	0x100000080	/* dirents filtered by access */
+#define	VFSFT_REPARSE		0x100000100	/* Supports reparse point */
+#define	VFSFT_ZEROCOPY_SUPPORTED	0x100000200
+				/* Support loaning /returning cache buffer */
+
+#define	vfs_set_feature(vfsp, feature)		do { } while (0)
+#define	vfs_clear_feature(vfsp, feature)	do { } while (0)
+#define	vfs_has_feature(vfsp, feature)		(0)
+
+#include <sys/mount.h>
+#endif	/* _OPENSOLARIS_SYS_VFS_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/vm.h b/zfs/include/os/freebsd/spl/sys/vm.h
new file mode 100644
index 0000000..7b3830b
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/vm.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_VM_H_
+#define	_OPENSOLARIS_SYS_VM_H_
+
+#include <sys/sf_buf.h>
+
+extern const int zfs_vm_pagerret_bad;
+extern const int zfs_vm_pagerret_error;
+extern const int zfs_vm_pagerret_ok;
+extern const int zfs_vm_pagerput_sync;
+extern const int zfs_vm_pagerput_inval;
+
+void	zfs_vmobject_assert_wlocked(vm_object_t object);
+void	zfs_vmobject_wlock(vm_object_t object);
+void	zfs_vmobject_wunlock(vm_object_t object);
+
+#if __FreeBSD_version >= 1300081
+#define	zfs_vmobject_assert_wlocked_12(x)
+#define	zfs_vmobject_wlock_12(x)
+#define	zfs_vmobject_wunlock_12(x)
+#else
+#define	zfs_vmobject_assert_wlocked_12(x)		\
+	zfs_vmobject_assert_wlocked((x))
+#define	zfs_vmobject_wlock_12(x)				\
+	zfs_vmobject_wlock(x)
+#define	zfs_vmobject_wunlock_12(x)				\
+	zfs_vmobject_wunlock(x)
+#define	vm_page_grab_unlocked(obj, idx, flags)	\
+	vm_page_grab((obj), (idx), (flags))
+#define	vm_page_grab_valid_unlocked(m, obj, idx, flags)	\
+	vm_page_grab_valid((m), (obj), (idx), (flags))
+#endif
+static inline caddr_t
+zfs_map_page(vm_page_t pp, struct sf_buf **sfp)
+{
+	*sfp = sf_buf_alloc(pp, 0);
+	return ((caddr_t)sf_buf_kva(*sfp));
+}
+
+static inline void
+zfs_unmap_page(struct sf_buf *sf)
+{
+	sf_buf_free(sf);
+}
+
+#endif	/* _OPENSOLARIS_SYS_VM_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/vmsystm.h b/zfs/include/os/freebsd/spl/sys/vmsystm.h
new file mode 100644
index 0000000..0db34bb
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/vmsystm.h

@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SPL_VMSYSTM_H_
+#define	_SPL_VMSYSTM_H_
+
+#define	xcopyout copyout
+
+#endif

diff --git a/zfs/include/os/freebsd/spl/sys/vnode.h b/zfs/include/os/freebsd/spl/sys/vnode.h
new file mode 100644
index 0000000..b7ac12f
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/vnode.h

@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_VNODE_H_
+#define	_OPENSOLARIS_SYS_VNODE_H_
+
+struct vnode;
+struct vattr;
+struct xucred;
+
+typedef struct flock	flock64_t;
+typedef	struct vnode	vnode_t;
+typedef	struct vattr	vattr_t;
+typedef enum vtype vtype_t;
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include_next <sys/sdt.h>
+#include <sys/namei.h>
+enum symfollow { NO_FOLLOW = NOFOLLOW };
+
+#define	NOCRED	((struct ucred *)0)	/* no credential available */
+#define	F_FREESP	11 	/* Free file space */
+
+#include <sys/proc.h>
+#include <sys/vnode_impl.h>
+#ifndef IN_BASE
+#include_next <sys/vnode.h>
+#endif
+#include <sys/mount.h>
+#include <sys/cred.h>
+#include <sys/fcntl.h>
+#include <sys/refcount.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/syscallsubr.h>
+#include <sys/vm.h>
+#include <vm/vm_object.h>
+
+typedef	struct vop_vector	vnodeops_t;
+#define	VOP_FID		VOP_VPTOFH
+#define	vop_fid		vop_vptofh
+#define	vop_fid_args	vop_vptofh_args
+#define	a_fid		a_fhp
+
+#define	rootvfs		(rootvnode == NULL ? NULL : rootvnode->v_mount)
+
+#ifndef IN_BASE
+static __inline int
+vn_is_readonly(vnode_t *vp)
+{
+	return (vp->v_mount->mnt_flag & MNT_RDONLY);
+}
+#endif
+#define	vn_vfswlock(vp)		(0)
+#define	vn_vfsunlock(vp)	do { } while (0)
+#define	vn_ismntpt(vp)	   \
+	((vp)->v_type == VDIR && (vp)->v_mountedhere != NULL)
+#define	vn_mountedvfs(vp)	((vp)->v_mountedhere)
+#define	vn_has_cached_data(vp)	\
+	((vp)->v_object != NULL && \
+	(vp)->v_object->resident_page_count > 0)
+
+static __inline void
+vn_flush_cached_data(vnode_t *vp, boolean_t sync)
+{
+#if __FreeBSD_version > 1300054
+	if (vm_object_mightbedirty(vp->v_object)) {
+#else
+	if (vp->v_object->flags & OBJ_MIGHTBEDIRTY) {
+#endif
+		int flags = sync ? OBJPC_SYNC : 0;
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+		zfs_vmobject_wlock(vp->v_object);
+		vm_object_page_clean(vp->v_object, 0, 0, flags);
+		zfs_vmobject_wunlock(vp->v_object);
+		VOP_UNLOCK(vp);
+	}
+}
+
+#define	vn_exists(vp)		do { } while (0)
+#define	vn_invalid(vp)		do { } while (0)
+#define	vn_renamepath(tdvp, svp, tnm, lentnm)	do { } while (0)
+#define	vn_free(vp)		do { } while (0)
+#define	vn_matchops(vp, vops)	((vp)->v_op == &(vops))
+
+#define	VN_HOLD(v)	vref(v)
+#define	VN_RELE(v)	vrele(v)
+#define	VN_URELE(v)	vput(v)
+
+#define	vnevent_create(vp, ct)			do { } while (0)
+#define	vnevent_link(vp, ct)			do { } while (0)
+#define	vnevent_remove(vp, dvp, name, ct)	do { } while (0)
+#define	vnevent_rmdir(vp, dvp, name, ct)	do { } while (0)
+#define	vnevent_rename_src(vp, dvp, name, ct)	do { } while (0)
+#define	vnevent_rename_dest(vp, dvp, name, ct)	do { } while (0)
+#define	vnevent_rename_dest_dir(vp, ct)		do { } while (0)
+
+#define	specvp(vp, rdev, type, cr)	(VN_HOLD(vp), (vp))
+#define	MANDLOCK(vp, mode)	(0)
+
+/*
+ * We will use va_spare is place of Solaris' va_mask.
+ * This field is initialized in zfs_setattr().
+ */
+#define	va_mask		va_spare
+/* TODO: va_fileid is shorter than va_nodeid !!! */
+#define	va_nodeid	va_fileid
+/* TODO: This field needs conversion! */
+#define	va_nblocks	va_bytes
+#define	va_blksize	va_blocksize
+
+#define	MAXOFFSET_T	OFF_MAX
+#define	EXCL		0
+
+#define	FCREAT		O_CREAT
+#define	FTRUNC		O_TRUNC
+#define	FEXCL		O_EXCL
+#ifndef FDSYNC
+#define	FDSYNC		FFSYNC
+#endif
+#define	FRSYNC		FFSYNC
+#define	FSYNC		FFSYNC
+#define	FOFFMAX		0x00
+#define	FIGNORECASE	0x00
+
+/*
+ * Attributes of interest to the caller of setattr or getattr.
+ */
+#define	AT_MODE		0x00002
+#define	AT_UID		0x00004
+#define	AT_GID		0x00008
+#define	AT_FSID		0x00010
+#define	AT_NODEID	0x00020
+#define	AT_NLINK	0x00040
+#define	AT_SIZE		0x00080
+#define	AT_ATIME	0x00100
+#define	AT_MTIME	0x00200
+#define	AT_CTIME	0x00400
+#define	AT_RDEV		0x00800
+#define	AT_BLKSIZE	0x01000
+#define	AT_NBLOCKS	0x02000
+/*			0x04000 */	/* unused */
+#define	AT_SEQ		0x08000
+/*
+ * If AT_XVATTR is set then there are additional bits to process in
+ * the xvattr_t's attribute bitmap.  If this is not set then the bitmap
+ * MUST be ignored.  Note that this bit must be set/cleared explicitly.
+ * That is, setting AT_ALL will NOT set AT_XVATTR.
+ */
+#define	AT_XVATTR	0x10000
+
+#define	AT_ALL		(AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\
+			AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\
+			AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
+
+#define	AT_STAT		(AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\
+			AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV)
+
+#define	AT_TIMES	(AT_ATIME|AT_MTIME|AT_CTIME)
+
+#define	AT_NOSET	(AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|\
+			AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
+
+#ifndef IN_BASE
+static __inline void
+vattr_init_mask(vattr_t *vap)
+{
+
+	vap->va_mask = 0;
+
+	if (vap->va_uid != (uid_t)VNOVAL)
+		vap->va_mask |= AT_UID;
+	if (vap->va_gid != (gid_t)VNOVAL)
+		vap->va_mask |= AT_GID;
+	if (vap->va_size != (u_quad_t)VNOVAL)
+		vap->va_mask |= AT_SIZE;
+	if (vap->va_atime.tv_sec != VNOVAL)
+		vap->va_mask |= AT_ATIME;
+	if (vap->va_mtime.tv_sec != VNOVAL)
+		vap->va_mask |= AT_MTIME;
+	if (vap->va_mode != (uint16_t)VNOVAL)
+		vap->va_mask |= AT_MODE;
+	if (vap->va_flags != VNOVAL)
+		vap->va_mask |= AT_XVATTR;
+}
+#endif
+
+#define		RLIM64_INFINITY 0
+
+static __inline int
+vn_rename(char *from, char *to, enum uio_seg seg)
+{
+
+	ASSERT(seg == UIO_SYSSPACE);
+
+	return (kern_renameat(curthread, AT_FDCWD, from, AT_FDCWD, to, seg));
+}
+
+#include <sys/vfs.h>
+
+#endif	/* _OPENSOLARIS_SYS_VNODE_H_ */

diff --git a/zfs/include/os/freebsd/spl/sys/vnode_impl.h b/zfs/include/os/freebsd/spl/sys/vnode_impl.h
new file mode 100644
index 0000000..c82b1fc
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/vnode_impl.h

@@ -0,0 +1,268 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 RackTop Systems.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_VNODE_IMPL_H
+#define	_SYS_VNODE_IMPL_H
+
+
+#define	IS_DEVVP(vp)	\
+	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
+
+#define	V_XATTRDIR	0x0000	/* attribute unnamed directory */
+
+#define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
+
+/*
+ * The xvattr structure is really a variable length structure that
+ * is made up of:
+ * - The classic vattr_t (xva_vattr)
+ * - a 32 bit quantity (xva_mapsize) that specifies the size of the
+ *   attribute bitmaps in 32 bit words.
+ * - A pointer to the returned attribute bitmap (needed because the
+ *   previous element, the requested attribute bitmap) is variable length.
+ * - The requested attribute bitmap, which is an array of 32 bit words.
+ *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
+ *   the attributes that are being requested.
+ * - The returned attribute bitmap, which is an array of 32 bit words.
+ *   File systems that support optional attributes use the XVA_SET_RTN()
+ *   macro to set the bits corresponding to the attributes that are being
+ *   returned.
+ * - The xoptattr_t structure which contains the attribute values
+ *
+ * xva_mapsize determines how many words in the attribute bitmaps.
+ * Immediately following the attribute bitmaps is the xoptattr_t.
+ * xva_getxoptattr() is used to get the pointer to the xoptattr_t
+ * section.
+ */
+
+#define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
+#define	XVA_MAGIC	0x78766174	/* Magic # for verification */
+
+/*
+ * The xvattr structure is an extensible structure which permits optional
+ * attributes to be requested/returned.  File systems may or may not support
+ * optional attributes.  They do so at their own discretion but if they do
+ * support optional attributes, they must register the VFSFT_XVATTR feature
+ * so that the optional attributes can be set/retrieved.
+ *
+ * The fields of the xvattr structure are:
+ *
+ * xva_vattr - The first element of an xvattr is a legacy vattr structure
+ * which includes the common attributes.  If AT_XVATTR is set in the va_mask
+ * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
+ * set, then only the xva_vattr structure can be used.
+ *
+ * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
+ *
+ * xva_mapsize - Size of requested and returned attribute bitmaps.
+ *
+ * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
+ * size of the array before it, xva_reqattrmap[], could change which means
+ * the location of xva_rtnattrmap[] could change.  This will allow unbundled
+ * file systems to find the location of xva_rtnattrmap[] when the sizes change.
+ *
+ * xva_reqattrmap[] - Array of requested attributes.  Attributes are
+ * represented by a specific bit in a specific element of the attribute
+ * map array.  Callers set the bits corresponding to the attributes
+ * that the caller wants to get/set.
+ *
+ * xva_rtnattrmap[] - Array of attributes that the file system was able to
+ * process.  Not all file systems support all optional attributes.  This map
+ * informs the caller which attributes the underlying file system was able
+ * to set/get.  (Same structure as the requested attributes array in terms
+ * of each attribute  corresponding to specific bits and array elements.)
+ *
+ * xva_xoptattrs - Structure containing values of optional attributes.
+ * These values are only valid if the corresponding bits in xva_reqattrmap
+ * are set and the underlying file system supports those attributes.
+ */
+
+
+
+/*
+ * Attribute bits used in the extensible attribute's (xva's) attribute
+ * bitmaps.  Note that the bitmaps are made up of a variable length number
+ * of 32-bit words.  The convention is to use XAT{n}_{attrname} where "n"
+ * is the element in the bitmap (starting at 1).  This convention is for
+ * the convenience of the maintainer to keep track of which element each
+ * attribute belongs to.
+ *
+ * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY.  CONSUMERS
+ * MUST USE THE XAT_* DEFINES.
+ */
+#define	XAT0_INDEX	0LL		/* Index into bitmap for XAT0 attrs */
+#define	XAT0_CREATETIME	0x00000001	/* Create time of file */
+#define	XAT0_ARCHIVE	0x00000002	/* Archive */
+#define	XAT0_SYSTEM	0x00000004	/* System */
+#define	XAT0_READONLY	0x00000008	/* Readonly */
+#define	XAT0_HIDDEN	0x00000010	/* Hidden */
+#define	XAT0_NOUNLINK	0x00000020	/* Nounlink */
+#define	XAT0_IMMUTABLE	0x00000040	/* immutable */
+#define	XAT0_APPENDONLY	0x00000080	/* appendonly */
+#define	XAT0_NODUMP	0x00000100	/* nodump */
+#define	XAT0_OPAQUE	0x00000200	/* opaque */
+#define	XAT0_AV_QUARANTINED	0x00000400	/* anti-virus quarantine */
+#define	XAT0_AV_MODIFIED	0x00000800	/* anti-virus modified */
+#define	XAT0_AV_SCANSTAMP	0x00001000	/* anti-virus scanstamp */
+#define	XAT0_REPARSE	0x00002000	/* FS reparse point */
+#define	XAT0_GEN	0x00004000	/* object generation number */
+#define	XAT0_OFFLINE	0x00008000	/* offline */
+#define	XAT0_SPARSE	0x00010000	/* sparse */
+
+/* Support for XAT_* optional attributes */
+#define	XVA_MASK		0xffffffff	/* Used to mask off 32 bits */
+#define	XVA_SHFT		32		/* Used to shift index */
+
+/*
+ * Used to pry out the index and attribute bits from the XAT_* attributes
+ * defined below.  Note that we're masking things down to 32 bits then
+ * casting to uint32_t.
+ */
+#define	XVA_INDEX(attr)		((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
+#define	XVA_ATTRBIT(attr)	((uint32_t)((attr) & XVA_MASK))
+
+/*
+ * The following defines present a "flat namespace" so that consumers don't
+ * need to keep track of which element belongs to which bitmap entry.
+ *
+ * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
+ */
+#define	XAT_CREATETIME		((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
+#define	XAT_ARCHIVE		((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
+#define	XAT_SYSTEM		((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
+#define	XAT_READONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
+#define	XAT_HIDDEN		((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
+#define	XAT_NOUNLINK		((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
+#define	XAT_IMMUTABLE		((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
+#define	XAT_APPENDONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
+#define	XAT_NODUMP		((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
+#define	XAT_OPAQUE		((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
+#define	XAT_AV_QUARANTINED	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
+#define	XAT_AV_MODIFIED		((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
+#define	XAT_AV_SCANSTAMP	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
+#define	XAT_REPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
+#define	XAT_GEN			((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
+#define	XAT_OFFLINE		((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
+#define	XAT_SPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
+
+/*
+ * The returned attribute map array (xva_rtnattrmap[]) is located past the
+ * requested attribute map array (xva_reqattrmap[]).  Its location changes
+ * when the array sizes change.  We use a separate pointer in a known location
+ * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[].  This is
+ * set in xva_init()
+ */
+#define	XVA_RTNATTRMAP(xvap)	((xvap)->xva_rtnattrmapp)
+
+#define	MODEMASK	07777		/* mode bits plus permission bits */
+#define	PERMMASK	00777		/* permission bits */
+
+/*
+ * VOP_ACCESS flags
+ */
+#define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
+
+/*
+ * Flags for vnode operations.
+ */
+enum rm		{ RMFILE, RMDIRECTORY };	/* rm or rmdir (remove) */
+enum create	{ CRCREAT, CRMKNOD, CRMKDIR };	/* reason for create */
+
+/*
+ * Structure used by various vnode operations to determine
+ * the context (pid, host, identity) of a caller.
+ *
+ * The cc_caller_id is used to identify one or more callers who invoke
+ * operations, possibly on behalf of others.  For example, the NFS
+ * server could have its own cc_caller_id which can be detected by
+ * vnode/vfs operations or (FEM) monitors on those operations.  New
+ * caller IDs are generated by fs_new_caller_id().
+ */
+typedef struct caller_context {
+	pid_t		cc_pid;		/* Process ID of the caller */
+	int		cc_sysid;	/* System ID, used for remote calls */
+	u_longlong_t	cc_caller_id;	/* Identifier for (set of) caller(s) */
+	ulong_t		cc_flags;
+} caller_context_t;
+
+struct taskq;
+
+/*
+ * Flags for VOP_LOOKUP
+ *
+ * Defined in file.h, but also possible, FIGNORECASE and FSEARCH
+ *
+ */
+#define	LOOKUP_DIR		0x01	/* want parent dir vp */
+#define	LOOKUP_XATTR		0x02	/* lookup up extended attr dir */
+#define	CREATE_XATTR_DIR	0x04	/* Create extended attr dir */
+#define	LOOKUP_HAVE_SYSATTR_DIR	0x08	/* Already created virtual GFS dir */
+
+/*
+ * Flags for VOP_READDIR
+ */
+#define	V_RDDIR_ENTFLAGS	0x01	/* request dirent flags */
+#define	V_RDDIR_ACCFILTER	0x02	/* filter out inaccessible dirents */
+
+/*
+ * Public vnode manipulation functions.
+ */
+
+void	vn_rele_async(struct vnode *vp, struct taskq *taskq);
+
+#define	VN_RELE_ASYNC(vp, taskq)	{ \
+	vn_rele_async(vp, taskq); \
+}
+
+/*
+ * Flags to VOP_SETATTR/VOP_GETATTR.
+ */
+#define	ATTR_UTIME	0x01	/* non-default utime(2) request */
+#define	ATTR_EXEC	0x02	/* invocation from exec(2) */
+#define	ATTR_COMM	0x04	/* yield common vp attributes */
+#define	ATTR_HINT	0x08	/* information returned will be `hint' */
+#define	ATTR_REAL	0x10	/* yield attributes of the real vp */
+#define	ATTR_NOACLCHECK	0x20	/* Don't check ACL when checking permissions */
+#define	ATTR_TRIGGER	0x40	/* Mount first if vnode is a trigger mount */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_VNODE_H */

diff --git a/zfs/include/os/freebsd/spl/sys/wmsum.h b/zfs/include/os/freebsd/spl/sys/wmsum.h
new file mode 100644
index 0000000..9fdd190
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/wmsum.h

@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * wmsum counters are a reduced version of aggsum counters, optimized for
+ * write-mostly scenarios.  They do not provide optimized read functions,
+ * but instead allow much cheaper add function.  The primary usage is
+ * infrequently read statistic counters, not requiring exact precision.
+ *
+ * The FreeBSD implementation is directly mapped into counter(9) KPI.
+ */
+
+#ifndef	_SYS_WMSUM_H
+#define	_SYS_WMSUM_H
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/malloc.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	wmsum_t	counter_u64_t
+
+static inline void
+wmsum_init(wmsum_t *ws, uint64_t value)
+{
+
+	*ws = counter_u64_alloc(M_WAITOK);
+	counter_u64_add(*ws, value);
+}
+
+static inline void
+wmsum_fini(wmsum_t *ws)
+{
+
+	counter_u64_free(*ws);
+}
+
+static inline uint64_t
+wmsum_value(wmsum_t *ws)
+{
+
+	return (counter_u64_fetch(*ws));
+}
+
+static inline void
+wmsum_add(wmsum_t *ws, int64_t delta)
+{
+
+	counter_u64_add(*ws, delta);
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_WMSUM_H */

diff --git a/zfs/include/os/freebsd/spl/sys/zmod.h b/zfs/include/os/freebsd/spl/sys/zmod.h
new file mode 100644
index 0000000..c606b1d
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/zmod.h

@@ -0,0 +1,66 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ZMOD_H
+#define	_ZMOD_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * zmod - RFC-1950-compatible decompression routines
+ *
+ * This file provides the public interfaces to zmod, an in-kernel RFC 1950
+ * decompression library.  More information about the implementation of these
+ * interfaces can be found in the usr/src/uts/common/zmod/ directory.
+ */
+
+#define	Z_OK		0
+#define	Z_STREAM_END	1
+#define	Z_NEED_DICT	2
+#define	Z_ERRNO		(-1)
+#define	Z_STREAM_ERROR	(-2)
+#define	Z_DATA_ERROR	(-3)
+#define	Z_MEM_ERROR	(-4)
+#define	Z_BUF_ERROR	(-5)
+#define	Z_VERSION_ERROR	(-6)
+
+#define	Z_NO_COMPRESSION	0
+#define	Z_BEST_SPEED		1
+#define	Z_BEST_COMPRESSION	9
+#define	Z_DEFAULT_COMPRESSION	(-1)
+
+extern int z_uncompress(void *, size_t *, const void *, size_t);
+extern int z_compress(void *, size_t *, const void *, size_t);
+extern int z_compress_level(void *, size_t *, const void *, size_t, int);
+extern const char *z_strerror(int);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZMOD_H */

diff --git a/zfs/include/os/freebsd/spl/sys/zone.h b/zfs/include/os/freebsd/spl/sys/zone.h
new file mode 100644
index 0000000..dd088de
--- /dev/null
+++ b/zfs/include/os/freebsd/spl/sys/zone.h

@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_ZONE_H_
+#define	_OPENSOLARIS_SYS_ZONE_H_
+
+#include <sys/jail.h>
+
+/*
+ * Macros to help with zone visibility restrictions.
+ */
+
+#define	GLOBAL_ZONEID	0
+
+/*
+ * Is proc in the global zone?
+ */
+#define	INGLOBALZONE(proc)	(!jailed((proc)->p_ucred))
+
+/*
+ * Attach the given dataset to the given jail.
+ */
+extern int zone_dataset_attach(struct ucred *, const char *, int);
+
+/*
+ * Detach the given dataset to the given jail.
+ */
+extern int zone_dataset_detach(struct ucred *, const char *, int);
+
+/*
+ * Returns true if the named pool/dataset is visible in the current zone.
+ */
+extern int zone_dataset_visible(const char *, int *);
+
+/*
+ * Safely get the hostid of the specified zone (defaults to machine's hostid
+ * if the specified zone doesn't emulate a hostid).  Passing NULL retrieves
+ * the global zone's (i.e., physical system's) hostid.
+ */
+extern uint32_t zone_get_hostid(void *);
+
+#endif	/* !_OPENSOLARIS_SYS_ZONE_H_ */

diff --git a/zfs/include/os/freebsd/zfs/Makefile.am b/zfs/include/os/freebsd/zfs/Makefile.am
new file mode 100644
index 0000000..081839c
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = sys

diff --git a/zfs/include/os/freebsd/zfs/sys/Makefile.am b/zfs/include/os/freebsd/zfs/sys/Makefile.am
new file mode 100644
index 0000000..392bb4a
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/Makefile.am

@@ -0,0 +1,15 @@
+KERNEL_H = \
+	freebsd_crypto.h \
+	sha2.h \
+	vdev_os.h \
+	zfs_bootenv_os.h \
+	zfs_context_os.h \
+	zfs_ctldir.h \
+	zfs_dir.h \
+	zfs_ioctl_compat.h \
+	zfs_vfsops_os.h \
+	zfs_vnops_os.h \
+	zfs_znode_impl.h \
+	zpl.h
+
+noinst_HEADERS = $(KERNEL_H)

diff --git a/zfs/include/os/freebsd/zfs/sys/freebsd_crypto.h b/zfs/include/os/freebsd/zfs/sys/freebsd_crypto.h
new file mode 100644
index 0000000..e240f5b
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/freebsd_crypto.h

@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Sean Eric Fagan <sef@ixsystems.com>
+ * Portions Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Portions of this file were taken from GELI's implementation of hmac.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ZFS_FREEBSD_CRYPTO_H
+#define	_ZFS_FREEBSD_CRYPTO_H
+
+#include <sys/errno.h>
+#include <sys/mutex.h>
+#include <opencrypto/cryptodev.h>
+#include <crypto/sha2/sha256.h>
+#include <crypto/sha2/sha512.h>
+
+#define	SUN_CKM_AES_CCM	"CKM_AES_CCM"
+#define	SUN_CKM_AES_GCM	"CKM_AES_GCM"
+#define	SUN_CKM_SHA512_HMAC	"CKM_SHA512_HMAC"
+
+#define	CRYPTO_KEY_RAW	1
+
+#define	CRYPTO_BITS2BYTES(n) ((n) == 0 ? 0 : (((n) - 1) >> 3) + 1)
+#define	CRYPTO_BYTES2BITS(n) ((n) << 3)
+
+struct zio_crypt_info;
+
+typedef struct freebsd_crypt_session {
+	struct mtx		fs_lock;
+	crypto_session_t	fs_sid;
+	boolean_t	fs_done;
+} freebsd_crypt_session_t;
+
+/*
+ * Unused types to minimize code differences.
+ */
+typedef void *crypto_mechanism_t;
+typedef void *crypto_ctx_template_t;
+/*
+ * Unlike the ICP crypto_key type, this only
+ * supports <data, length> (the equivalent of
+ * CRYPTO_KEY_RAW).
+ */
+typedef struct crypto_key {
+	int	ck_format;	/* Unused, but minimizes code diff */
+	void	*ck_data;
+	size_t	ck_length;
+} crypto_key_t;
+
+typedef struct hmac_ctx {
+	SHA512_CTX	innerctx;
+	SHA512_CTX	outerctx;
+} *crypto_context_t;
+
+/*
+ * The only algorithm ZFS uses for hashing is SHA512_HMAC.
+ */
+void crypto_mac(const crypto_key_t *key, const void *in_data,
+	size_t in_data_size, void *out_data, size_t out_data_size);
+void crypto_mac_init(struct hmac_ctx *ctx, const crypto_key_t *key);
+void crypto_mac_update(struct hmac_ctx *ctx, const void *data,
+	size_t data_size);
+void crypto_mac_final(struct hmac_ctx *ctx, void *out_data,
+	size_t out_data_size);
+
+int freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
+    struct zio_crypt_info *, crypto_key_t *);
+void freebsd_crypt_freesession(freebsd_crypt_session_t *sessp);
+
+int freebsd_crypt_uio(boolean_t, freebsd_crypt_session_t *,
+	struct zio_crypt_info *, zfs_uio_t *, crypto_key_t *, uint8_t *,
+	size_t, size_t);
+
+#endif /* _ZFS_FREEBSD_CRYPTO_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/sha2.h b/zfs/include/os/freebsd/zfs/sys/sha2.h
new file mode 100644
index 0000000..e3923e4
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/sha2.h

@@ -0,0 +1,200 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
+
+#ifndef _SYS_SHA2_H
+#define	_SYS_SHA2_H
+
+#include <sys/types.h>		/* for uint_* */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	SHA2_HMAC_MIN_KEY_LEN	1	/* SHA2-HMAC min key length in bytes */
+#define	SHA2_HMAC_MAX_KEY_LEN	INT_MAX	/* SHA2-HMAC max key length in bytes */
+
+#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
+#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
+#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
+
+/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
+#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
+#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
+
+#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
+#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
+
+#define	SHA256			0
+#define	SHA256_HMAC		1
+#define	SHA256_HMAC_GEN		2
+#define	SHA384			3
+#define	SHA384_HMAC		4
+#define	SHA384_HMAC_GEN		5
+#define	SHA512			6
+#define	SHA512_HMAC		7
+#define	SHA512_HMAC_GEN		8
+#define	SHA512_224		9
+#define	SHA512_256		10
+
+/*
+ * SHA2 context.
+ * The contents of this structure are a private interface between the
+ * Init/Update/Final calls of the functions defined below.
+ * Callers must never attempt to read or write any of the fields
+ * in this structure directly.
+ */
+
+#include <crypto/sha2/sha256.h>
+#include <crypto/sha2/sha384.h>
+#include <crypto/sha2/sha512.h>
+#include <crypto/sha2/sha512t.h>
+typedef struct 	{
+	uint32_t algotype;		/* Algorithm Type */
+	union {
+		SHA256_CTX SHA256_ctx;
+		SHA384_CTX SHA384_ctx;
+		SHA512_CTX SHA512_ctx;
+	};
+} SHA2_CTX;
+
+extern void SHA256Init(SHA256_CTX *);
+
+extern void SHA256Update(SHA256_CTX *, const void *, size_t);
+
+extern void SHA256Final(void *, SHA256_CTX *);
+
+extern void SHA384Init(SHA384_CTX *);
+
+extern void SHA384Update(SHA384_CTX *, const void *, size_t);
+
+extern void SHA384Final(void *, SHA384_CTX *);
+
+extern void SHA512Init(SHA512_CTX *);
+
+extern void SHA512Update(SHA512_CTX *, const void *, size_t);
+
+extern void SHA512Final(void *, SHA512_CTX *);
+
+
+static inline void
+SHA2Init(uint64_t mech, SHA2_CTX *c)
+{
+	switch (mech) {
+		case SHA256:
+			SHA256_Init(&c->SHA256_ctx);
+			break;
+		case SHA384:
+			SHA384_Init(&c->SHA384_ctx);
+			break;
+		case SHA512:
+			SHA512_Init(&c->SHA512_ctx);
+			break;
+		case SHA512_256:
+			SHA512_256_Init(&c->SHA512_ctx);
+			break;
+		default:
+			panic("unknown mechanism %ju", (uintmax_t)mech);
+	}
+	c->algotype = (uint32_t)mech;
+}
+
+static inline void
+SHA2Update(SHA2_CTX *c, const void *p, size_t s)
+{
+	switch (c->algotype) {
+		case SHA256:
+			SHA256_Update(&c->SHA256_ctx, p, s);
+			break;
+		case SHA384:
+			SHA384_Update(&c->SHA384_ctx, p, s);
+			break;
+		case SHA512:
+			SHA512_Update(&c->SHA512_ctx, p, s);
+			break;
+		case SHA512_256:
+			SHA512_256_Update(&c->SHA512_ctx, p, s);
+			break;
+		default:
+			panic("unknown mechanism %d", c->algotype);
+	}
+}
+
+static inline void
+SHA2Final(void *p, SHA2_CTX *c)
+{
+	switch (c->algotype) {
+		case SHA256:
+			SHA256_Final(p, &c->SHA256_ctx);
+			break;
+		case SHA384:
+			SHA384_Final(p, &c->SHA384_ctx);
+			break;
+		case SHA512:
+			SHA512_Final(p, &c->SHA512_ctx);
+			break;
+		case SHA512_256:
+			SHA512_256_Final(p, &c->SHA512_ctx);
+			break;
+		default:
+			panic("unknown mechanism %d", c->algotype);
+	}
+}
+
+#ifdef _SHA2_IMPL
+/*
+ * The following types/functions are all private to the implementation
+ * of the SHA2 functions and must not be used by consumers of the interface
+ */
+
+/*
+ * List of support mechanisms in this module.
+ *
+ * It is important to note that in the module, division or modulus calculations
+ * are used on the enumerated type to determine which mechanism is being used;
+ * therefore, changing the order or additional mechanisms should be done
+ * carefully
+ */
+typedef enum sha2_mech_type {
+	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
+	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
+	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
+	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
+	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
+	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
+	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
+	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
+	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
+	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
+	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#endif /* _SHA2_IMPL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/vdev_os.h b/zfs/include/os/freebsd/zfs/sys/vdev_os.h
new file mode 100644
index 0000000..59da954
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/vdev_os.h

@@ -0,0 +1,33 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _SYS_VDEV_OS_H
+#define	_SYS_VDEV_OS_H
+
+extern int vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size);
+extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs,
+    uint64_t *count);
+
+#endif

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_bootenv_os.h b/zfs/include/os/freebsd/zfs/sys/zfs_bootenv_os.h
new file mode 100644
index 0000000..80c71a6
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_bootenv_os.h

@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _ZFS_BOOTENV_OS_H
+#define	_ZFS_BOOTENV_OS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	BOOTENV_OS		BE_FREEBSD_VENDOR
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_BOOTENV_OS_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_context_os.h b/zfs/include/os/freebsd/zfs/sys/zfs_context_os.h
new file mode 100644
index 0000000..a32eb52
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_context_os.h

@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef ZFS_CONTEXT_OS_H_
+#define	ZFS_CONTEXT_OS_H_
+
+#include <sys/condvar.h>
+#include <sys/rwlock.h>
+#include <sys/sig.h>
+#include_next <sys/sdt.h>
+#include <sys/misc.h>
+#include <sys/kdb.h>
+#include <sys/pathname.h>
+#include <sys/conf.h>
+#include <sys/types.h>
+#include <sys/ccompat.h>
+#include <linux/types.h>
+
+#if KSTACK_PAGES * PAGE_SIZE >= 16384
+#define	HAVE_LARGE_STACKS	1
+#endif
+
+#define	cond_resched()		kern_yield(PRI_USER)
+
+#define	taskq_create_sysdc(a, b, d, e, p, dc, f) \
+	    (taskq_create(a, b, maxclsyspri, d, e, f))
+
+#define	tsd_create(keyp, destructor)    do {                 \
+		*(keyp) = osd_thread_register((destructor));         \
+		KASSERT(*(keyp) > 0, ("cannot register OSD"));       \
+} while (0)
+
+#define	tsd_destroy(keyp)	osd_thread_deregister(*(keyp))
+#define	tsd_get(key)	osd_thread_get(curthread, (key))
+#define	tsd_set(key, value)	osd_thread_set(curthread, (key), (value))
+#define	fm_panic	panic
+
+extern int zfs_debug_level;
+extern struct mtx zfs_debug_mtx;
+#define	ZFS_LOG(lvl, ...) do {   \
+		if (((lvl) & 0xff) <= zfs_debug_level) {  \
+			mtx_lock(&zfs_debug_mtx);			  \
+			printf("%s:%u[%d]: ",				  \
+			    __func__, __LINE__, (lvl)); \
+			printf(__VA_ARGS__); \
+			printf("\n"); \
+			if ((lvl) & 0x100) \
+				kdb_backtrace(); \
+			mtx_unlock(&zfs_debug_mtx);	\
+	}	   \
+} while (0)
+
+#define	MSEC_TO_TICK(msec)	(howmany((hrtime_t)(msec) * hz, MILLISEC))
+extern int hz;
+extern int tick;
+typedef int fstrans_cookie_t;
+#define	spl_fstrans_mark() (0)
+#define	spl_fstrans_unmark(x) (x = 0)
+#define	signal_pending(x) SIGPENDING(x)
+#define	current curthread
+#define	thread_join(x)
+typedef struct opensolaris_utsname	utsname_t;
+extern utsname_t *utsname(void);
+extern int spa_import_rootpool(const char *name, bool checkpointrewind);
+#endif

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_ctldir.h b/zfs/include/os/freebsd/zfs/sys/zfs_ctldir.h
new file mode 100644
index 0000000..da02863
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_ctldir.h

@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_ZFS_CTLDIR_H
+#define	_ZFS_CTLDIR_H
+
+#include <sys/vnode.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	ZFS_CTLDIR_NAME		".zfs"
+
+#define	zfs_has_ctldir(zdp)	\
+	((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
+	((zdp)->z_zfsvfs->z_ctldir != NULL))
+#define	zfs_show_ctldir(zdp)	\
+	(zfs_has_ctldir(zdp) && \
+	((zdp)->z_zfsvfs->z_show_ctldir))
+
+void zfsctl_create(zfsvfs_t *);
+void zfsctl_destroy(zfsvfs_t *);
+int zfsctl_root(zfsvfs_t *, int, vnode_t **);
+void zfsctl_init(void);
+void zfsctl_fini(void);
+boolean_t zfsctl_is_node(vnode_t *);
+int zfsctl_snapshot_unmount(const char *snapname, int flags);
+int zfsctl_rename_snapshot(const char *from, const char *to);
+int zfsctl_destroy_snapshot(const char *snapname, int force);
+int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
+
+int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
+
+#define	ZFSCTL_INO_ROOT		0x1
+#define	ZFSCTL_INO_SNAPDIR	0x2
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_CTLDIR_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_dir.h b/zfs/include/os/freebsd/zfs/sys/zfs_dir.h
new file mode 100644
index 0000000..4197e11
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_dir.h

@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_FS_ZFS_DIR_H
+#define	_SYS_FS_ZFS_DIR_H
+
+#include <sys/pathname.h>
+#include <sys/dmu.h>
+#include <sys/zfs_znode.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* zfs_dirent_lock() flags */
+#define	ZNEW		0x0001		/* entry should not exist */
+#define	ZEXISTS		0x0002		/* entry should exist */
+#define	ZSHARED		0x0004		/* shared access (zfs_dirlook()) */
+#define	ZXATTR		0x0008		/* we want the xattr dir */
+#define	ZRENAMING	0x0010		/* znode is being renamed */
+#define	ZCILOOK		0x0020		/* case-insensitive lookup requested */
+#define	ZCIEXACT	0x0040		/* c-i requires c-s match (rename) */
+#define	ZHAVELOCK	0x0080		/* z_name_lock is already held */
+
+/* mknode flags */
+#define	IS_ROOT_NODE	0x01		/* create a root node */
+#define	IS_XATTR	0x02		/* create an extended attribute node */
+
+extern int zfs_dirent_lookup(znode_t *, const char *, znode_t **, int);
+extern int zfs_link_create(znode_t *, const char *, znode_t *, dmu_tx_t *, int);
+extern int zfs_link_destroy(znode_t *, const char *, znode_t *, dmu_tx_t *, int,
+    boolean_t *);
+extern int zfs_dirlook(znode_t *, const char *name, znode_t **);
+extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
+    uint_t, znode_t **, zfs_acl_ids_t *);
+extern void zfs_rmnode(znode_t *);
+extern boolean_t zfs_dirempty(znode_t *);
+extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
+extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
+extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
+extern int zfs_get_xattrdir(znode_t *, znode_t **, cred_t *, int);
+extern int zfs_make_xattrdir(znode_t *, vattr_t *, znode_t **, cred_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_DIR_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_ioctl_compat.h b/zfs/include/os/freebsd/zfs/sys/zfs_ioctl_compat.h
new file mode 100644
index 0000000..91bc48e
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_ioctl_compat.h

@@ -0,0 +1,160 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2014 Xin Li <delphij@FreeBSD.org>.  All rights reserved.
+ * Copyright 2013 Martin Matuska <mm@FreeBSD.org>.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_ZFS_IOCTL_COMPAT_H
+#define	_SYS_ZFS_IOCTL_COMPAT_H
+
+#include <sys/cred.h>
+#include <sys/dmu.h>
+#include <sys/zio.h>
+#include <sys/dsl_deleg.h>
+#include <sys/zfs_ioctl.h>
+
+#ifdef _KERNEL
+#include <sys/nvpair.h>
+#endif  /* _KERNEL */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Backwards ioctl compatibility
+ */
+
+/* ioctl versions for vfs.zfs.version.ioctl */
+#define	ZFS_IOCVER_UNDEF	-1
+#define	ZFS_IOCVER_NONE		0
+#define	ZFS_IOCVER_DEADMAN	1
+#define	ZFS_IOCVER_LZC		2
+#define	ZFS_IOCVER_ZCMD		3
+#define	ZFS_IOCVER_EDBP		4
+#define	ZFS_IOCVER_RESUME	5
+#define	ZFS_IOCVER_INLANES	6
+#define	ZFS_IOCVER_PAD		7
+#define	ZFS_IOCVER_LEGACY	ZFS_IOCVER_PAD
+#define	ZFS_IOCVER_OZFS		15
+
+/* compatibility conversion flag */
+#define	ZFS_CMD_COMPAT_NONE	0
+#define	ZFS_CMD_COMPAT_V15	1
+#define	ZFS_CMD_COMPAT_V28	2
+#define	ZFS_CMD_COMPAT_DEADMAN	3
+#define	ZFS_CMD_COMPAT_LZC	4
+#define	ZFS_CMD_COMPAT_ZCMD	5
+#define	ZFS_CMD_COMPAT_EDBP	6
+#define	ZFS_CMD_COMPAT_RESUME	7
+#define	ZFS_CMD_COMPAT_INLANES	8
+#define	ZFS_CMD_COMPAT_LEGACY	9
+
+#define	ZFS_IOC_COMPAT_PASS	254
+#define	ZFS_IOC_COMPAT_FAIL	255
+
+#define	ZFS_IOCREQ(ioreq)	((ioreq) & 0xff)
+
+typedef struct zfs_iocparm {
+	uint32_t	zfs_ioctl_version;
+	uint64_t	zfs_cmd;
+	uint64_t	zfs_cmd_size;
+} zfs_iocparm_t;
+
+
+#define	LEGACY_MAXPATHLEN 1024
+#define	LEGACY_MAXNAMELEN 256
+
+/*
+ * Note: this struct must have the same layout in 32-bit and 64-bit, so
+ * that 32-bit processes (like /sbin/zfs) can pass it to the 64-bit
+ * kernel.  Therefore, we add padding to it so that no "hidden" padding
+ * is automatically added on 64-bit (but not on 32-bit).
+ */
+typedef struct zfs_cmd_legacy {
+	char		zc_name[LEGACY_MAXPATHLEN];	/* pool|dataset name */
+	uint64_t	zc_nvlist_src;		/* really (char *) */
+	uint64_t	zc_nvlist_src_size;
+	uint64_t	zc_nvlist_dst;		/* really (char *) */
+	uint64_t	zc_nvlist_dst_size;
+	boolean_t	zc_nvlist_dst_filled;	/* put an nvlist in dst? */
+	int		zc_pad2;
+
+	/*
+	 * The following members are for legacy ioctls which haven't been
+	 * converted to the new method.
+	 */
+	uint64_t	zc_history;		/* really (char *) */
+	char		zc_value[LEGACY_MAXPATHLEN * 2];
+	char		zc_string[LEGACY_MAXNAMELEN];
+	uint64_t	zc_guid;
+	uint64_t	zc_nvlist_conf;		/* really (char *) */
+	uint64_t	zc_nvlist_conf_size;
+	uint64_t	zc_cookie;
+	uint64_t	zc_objset_type;
+	uint64_t	zc_perm_action;
+	uint64_t	zc_history_len;
+	uint64_t	zc_history_offset;
+	uint64_t	zc_obj;
+	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
+	zfs_share_t	zc_share;
+	uint64_t	zc_jailid;
+	dmu_objset_stats_t zc_objset_stats;
+	dmu_replay_record_t zc_begin_record;
+	zinject_record_t zc_inject_record;
+	uint32_t	zc_defer_destroy;
+	uint32_t	zc_flags;
+	uint64_t	zc_action_handle;
+	int		zc_cleanup_fd;
+	uint8_t		zc_simple;
+	uint8_t		zc_pad3[3];
+	boolean_t	zc_resumable;
+	uint32_t	zc_pad4;
+	uint64_t	zc_sendobj;
+	uint64_t	zc_fromobj;
+	uint64_t	zc_createtxg;
+	zfs_stat_t	zc_stat;
+} zfs_cmd_legacy_t;
+
+
+#ifdef _KERNEL
+int zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int);
+void zfs_ioctl_compat_post(zfs_cmd_t *, const int, const int);
+nvlist_t *zfs_ioctl_compat_innvl(zfs_cmd_t *, nvlist_t *, const int,
+    const int);
+nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int,
+    const int);
+#endif	/* _KERNEL */
+int zfs_ioctl_legacy_to_ozfs(int request);
+int zfs_ioctl_ozfs_to_legacy(int request);
+void zfs_cmd_legacy_to_ozfs(zfs_cmd_legacy_t *src, zfs_cmd_t *dst);
+void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int);
+void zfs_cmd_ozfs_to_legacy(zfs_cmd_t *src, zfs_cmd_legacy_t *dst);
+
+void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ZFS_IOCTL_COMPAT_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/zfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
new file mode 100644
index 0000000..a263b48
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h

@@ -0,0 +1,318 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
+ * All rights reserved.
+ */
+
+#ifndef	_SYS_FS_ZFS_VFSOPS_H
+#define	_SYS_FS_ZFS_VFSOPS_H
+
+#if __FreeBSD_version >= 1300125
+#define	TEARDOWN_RMS
+#endif
+
+#if __FreeBSD_version >= 1300109
+#define	TEARDOWN_INACTIVE_RMS
+#endif
+
+#include <sys/dataset_kstats.h>
+#include <sys/list.h>
+#include <sys/vfs.h>
+#include <sys/zil.h>
+#include <sys/sa.h>
+#include <sys/rrwlock.h>
+#ifdef TEARDOWN_INACTIVE_RMS
+#include <sys/rmlock.h>
+#endif
+#include <sys/zfs_ioctl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef TEARDOWN_RMS
+typedef struct rmslock zfs_teardown_lock_t;
+#else
+#define	zfs_teardown_lock_t		rrmlock_t
+#endif
+
+#ifdef TEARDOWN_INACTIVE_RMS
+typedef struct rmslock zfs_teardown_inactive_lock_t;
+#else
+#define	zfs_teardown_inactive_lock_t krwlock_t
+#endif
+
+typedef struct zfsvfs zfsvfs_t;
+struct znode;
+
+struct zfsvfs {
+	vfs_t		*z_vfs;		/* generic fs struct */
+	zfsvfs_t	*z_parent;	/* parent fs */
+	objset_t	*z_os;		/* objset reference */
+	uint64_t	z_flags;	/* super_block flags */
+	uint64_t	z_root;		/* id of root znode */
+	uint64_t	z_unlinkedobj;	/* id of unlinked zapobj */
+	uint64_t	z_max_blksz;	/* maximum block size for files */
+	uint64_t	z_fuid_obj;	/* fuid table object number */
+	uint64_t	z_fuid_size;	/* fuid table size */
+	avl_tree_t	z_fuid_idx;	/* fuid tree keyed by index */
+	avl_tree_t	z_fuid_domain;	/* fuid tree keyed by domain */
+	krwlock_t	z_fuid_lock;	/* fuid lock */
+	boolean_t	z_fuid_loaded;	/* fuid tables are loaded */
+	boolean_t	z_fuid_dirty;   /* need to sync fuid table ? */
+	struct zfs_fuid_info	*z_fuid_replay; /* fuid info for replay */
+	zilog_t		*z_log;		/* intent log pointer */
+	uint_t		z_acl_type;	/* type of acl usable on this fs */
+	uint_t		z_acl_mode;	/* acl chmod/mode behavior */
+	uint_t		z_acl_inherit;	/* acl inheritance behavior */
+	zfs_case_t	z_case;		/* case-sense */
+	boolean_t	z_utf8;		/* utf8-only */
+	int		z_norm;		/* normalization flags */
+	boolean_t	z_atime;	/* enable atimes mount option */
+	boolean_t	z_unmounted;	/* unmounted */
+	zfs_teardown_lock_t z_teardown_lock;
+	zfs_teardown_inactive_lock_t z_teardown_inactive_lock;
+	list_t		z_all_znodes;	/* all vnodes in the fs */
+	uint64_t	z_nr_znodes;	/* number of znodes in the fs */
+	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
+	struct zfsctl_root	*z_ctldir;	/* .zfs directory pointer */
+	boolean_t	z_show_ctldir;	/* expose .zfs in the root dir */
+	boolean_t	z_issnap;	/* true if this is a snapshot */
+	boolean_t	z_vscan;	/* virus scan on/off */
+	boolean_t	z_use_fuids;	/* version allows fuids */
+	boolean_t	z_replay;	/* set during ZIL replay */
+	boolean_t	z_use_sa;	/* version allow system attributes */
+	boolean_t	z_xattr_sa;	/* allow xattrs to be stores as SA */
+	boolean_t	z_use_namecache; /* make use of FreeBSD name cache */
+	uint8_t		z_xattr;	/* xattr type in use */
+	uint64_t	z_version;	/* ZPL version */
+	uint64_t	z_shares_dir;	/* hidden shares dir */
+	dataset_kstats_t	z_kstat;	/* fs kstats */
+	kmutex_t	z_lock;
+	uint64_t	z_userquota_obj;
+	uint64_t	z_groupquota_obj;
+	uint64_t	z_userobjquota_obj;
+	uint64_t	z_groupobjquota_obj;
+	uint64_t	z_projectquota_obj;
+	uint64_t	z_projectobjquota_obj;
+	uint64_t	z_replay_eof;	/* New end of file - replay only */
+	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
+#define	ZFS_OBJ_MTX_SZ	64
+	kmutex_t	z_hold_mtx[ZFS_OBJ_MTX_SZ];	/* znode hold locks */
+	struct task	z_unlinked_drain_task;
+};
+
+#ifdef TEARDOWN_RMS
+#define	ZFS_TEARDOWN_INIT(zfsvfs)		\
+	rms_init(&(zfsvfs)->z_teardown_lock, "zfs teardown")
+
+#define	ZFS_TEARDOWN_DESTROY(zfsvfs)		\
+	rms_destroy(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_TRY_ENTER_READ(zfsvfs)	\
+	rms_try_rlock(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag)	\
+	rms_rlock(&(zfsvfs)->z_teardown_lock);
+
+#define	ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag)	\
+	rms_runlock(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, tag)	\
+	rms_wlock(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_EXIT_WRITE(zfsvfs)		\
+	rms_wunlock(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_EXIT(zfsvfs, tag)		\
+	rms_unlock(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_READ_HELD(zfsvfs)		\
+	rms_rowned(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_WRITE_HELD(zfsvfs)		\
+	rms_wowned(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_HELD(zfsvfs)		\
+	rms_owned_any(&(zfsvfs)->z_teardown_lock)
+#else
+#define	ZFS_TEARDOWN_INIT(zfsvfs)		\
+	rrm_init(&(zfsvfs)->z_teardown_lock, B_FALSE)
+
+#define	ZFS_TEARDOWN_DESTROY(zfsvfs)		\
+	rrm_destroy(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_TRY_ENTER_READ(zfsvfs)	\
+	rw_tryenter(&(zfsvfs)->z_teardown_lock, RW_READER)
+
+#define	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag)	\
+	rrm_enter_read(&(zfsvfs)->z_teardown_lock, tag);
+
+#define	ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag)	\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, tag)
+
+#define	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, tag)	\
+	rrm_enter(&(zfsvfs)->z_teardown_lock, RW_WRITER, tag)
+
+#define	ZFS_TEARDOWN_EXIT_WRITE(zfsvfs)		\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, tag)
+
+#define	ZFS_TEARDOWN_EXIT(zfsvfs, tag)		\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, tag)
+
+#define	ZFS_TEARDOWN_READ_HELD(zfsvfs)		\
+	RRM_READ_HELD(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_WRITE_HELD(zfsvfs)		\
+	RRM_WRITE_HELD(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_HELD(zfsvfs)		\
+	RRM_LOCK_HELD(&(zfsvfs)->z_teardown_lock)
+#endif
+
+#ifdef TEARDOWN_INACTIVE_RMS
+#define	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs)		\
+	rms_init(&(zfsvfs)->z_teardown_inactive_lock, "zfs teardown inactive")
+
+#define	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs)		\
+	rms_destroy(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)	\
+	rms_try_rlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs)	\
+	rms_rlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs)		\
+	rms_runlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs)	\
+	rms_wlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs)	\
+	rms_wunlock(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)	\
+	rms_wowned(&(zfsvfs)->z_teardown_inactive_lock)
+#else
+#define	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs)		\
+	rw_init(&(zfsvfs)->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL)
+
+#define	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs)		\
+	rw_destroy(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)	\
+	rw_tryenter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
+
+#define	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs)	\
+	rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_READER)
+
+#define	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs)		\
+	rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs)	\
+	rw_enter(&(zfsvfs)->z_teardown_inactive_lock, RW_WRITER)
+
+#define	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs)	\
+	rw_exit(&(zfsvfs)->z_teardown_inactive_lock)
+
+#define	ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)	\
+	RW_WRITE_HELD(&(zfsvfs)->z_teardown_inactive_lock)
+#endif
+
+#define	ZSB_XATTR	0x0001		/* Enable user xattrs */
+/*
+ * Normal filesystems (those not under .zfs/snapshot) have a total
+ * file ID size limited to 12 bytes (including the length field) due to
+ * NFSv2 protocol's limitation of 32 bytes for a filehandle.  For historical
+ * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
+ * (although the NFSv3 protocol actually permits a maximum of 64 bytes).  It
+ * is not possible to expand beyond 12 bytes without abandoning support
+ * of NFSv2.
+ *
+ * For normal filesystems, we partition up the available space as follows:
+ *	2 bytes		fid length (required)
+ *	6 bytes		object number (48 bits)
+ *	4 bytes		generation number (32 bits)
+ *
+ * We reserve only 48 bits for the object number, as this is the limit
+ * currently defined and imposed by the DMU.
+ */
+typedef struct zfid_short {
+	uint16_t	zf_len;
+	uint8_t		zf_object[6];		/* obj[i] = obj >> (8 * i) */
+	uint8_t		zf_gen[4];		/* gen[i] = gen >> (8 * i) */
+} zfid_short_t;
+
+/*
+ * Filesystems under .zfs/snapshot have a total file ID size of 22[*] bytes
+ * (including the length field).  This makes files under .zfs/snapshot
+ * accessible by NFSv3 and NFSv4, but not NFSv2.
+ *
+ * For files under .zfs/snapshot, we partition up the available space
+ * as follows:
+ *	2 bytes		fid length (required)
+ *	6 bytes		object number (48 bits)
+ *	4 bytes		generation number (32 bits)
+ *	6 bytes		objset id (48 bits)
+ *	4 bytes[**]	currently just zero (32 bits)
+ *
+ * We reserve only 48 bits for the object number and objset id, as these are
+ * the limits currently defined and imposed by the DMU.
+ *
+ * [*] 20 bytes on FreeBSD to fit into the size of struct fid.
+ * [**] 2 bytes on FreeBSD for the above reason.
+ */
+typedef struct zfid_long {
+	zfid_short_t	z_fid;
+	uint8_t		zf_setid[6];		/* obj[i] = obj >> (8 * i) */
+	uint8_t		zf_setgen[2];		/* gen[i] = gen >> (8 * i) */
+} zfid_long_t;
+
+#define	SHORT_FID_LEN	(sizeof (zfid_short_t) - sizeof (uint16_t))
+#define	LONG_FID_LEN	(sizeof (zfid_long_t) - sizeof (uint16_t))
+
+extern uint_t zfs_fsyncer_key;
+extern int zfs_super_owner;
+
+extern void zfs_init(void);
+extern void zfs_fini(void);
+
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+extern int zfs_end_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
+extern int zfsvfs_create(const char *name, boolean_t readonly, zfsvfs_t **zfvp);
+extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
+extern void zfsvfs_free(zfsvfs_t *zfsvfs);
+extern int zfs_check_global_label(const char *dsname, const char *hexsl);
+extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
+extern int zfs_get_temporary_prop(struct dsl_dataset *ds, zfs_prop_t zfs_prop,
+    uint64_t *val, char *setpoint);
+extern int zfs_busy(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_VFSOPS_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_vnops_os.h b/zfs/include/os/freebsd/zfs/sys/zfs_vnops_os.h
new file mode 100644
index 0000000..bf5e03b
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_vnops_os.h

@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS_FS_ZFS_VNOPS_OS_H
+#define	_SYS_FS_ZFS_VNOPS_OS_H
+
+int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
+    uint64_t size, struct vm_page **ppa, dmu_tx_t *tx);
+int dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
+    int *rbehind, int *rahead, int last_size);
+extern int zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags);
+extern int zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap,
+    znode_t **zpp, cred_t *cr, int flags, vsecattr_t *vsecp);
+extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd,
+    cred_t *cr, int flags);
+extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
+extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp,
+    const char *tnm, cred_t *cr, int flags);
+extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
+    const char *link, znode_t **zpp, cred_t *cr, int flags);
+extern int zfs_link(znode_t *tdzp, znode_t *sp,
+    const char *name, cred_t *cr, int flags);
+extern int zfs_space(znode_t *zp, int cmd, struct flock *bfp, int flag,
+    offset_t offset, cred_t *cr);
+extern int zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl,
+    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp);
+extern int zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag,
+    cred_t *cr);
+extern int zfs_write_simple(znode_t *zp, const void *data, size_t len,
+    loff_t pos, size_t *resid);
+
+#endif

diff --git a/zfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/zfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
new file mode 100644
index 0000000..1208841
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h

@@ -0,0 +1,194 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef	_FREEBSD_ZFS_SYS_ZNODE_IMPL_H
+#define	_FREEBSD_ZFS_SYS_ZNODE_IMPL_H
+
+#include <sys/list.h>
+#include <sys/dmu.h>
+#include <sys/sa.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/rrwlock.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_stat.h>
+#include <sys/zfs_rlock.h>
+#include <sys/zfs_acl.h>
+#include <sys/zil.h>
+#include <sys/zfs_project.h>
+#include <vm/vm_object.h>
+#include <sys/uio.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Directory entry locks control access to directory entries.
+ * They are used to protect creates, deletes, and renames.
+ * Each directory znode has a mutex and a list of locked names.
+ */
+#define	ZNODE_OS_FIELDS                 \
+	struct zfsvfs	*z_zfsvfs;      \
+	vnode_t		*z_vnode;       \
+	char		*z_cached_symlink;	\
+	uint64_t		z_uid;          \
+	uint64_t		z_gid;          \
+	uint64_t		z_gen;          \
+	uint64_t		z_atime[2];     \
+	uint64_t		z_links;
+
+#define	ZFS_LINK_MAX	UINT64_MAX
+
+/*
+ * ZFS minor numbers can refer to either a control device instance or
+ * a zvol. Depending on the value of zss_type, zss_data points to either
+ * a zvol_state_t or a zfs_onexit_t.
+ */
+enum zfs_soft_state_type {
+	ZSST_ZVOL,
+	ZSST_CTLDEV
+};
+
+typedef struct zfs_soft_state {
+	enum zfs_soft_state_type zss_type;
+	void *zss_data;
+} zfs_soft_state_t;
+
+extern minor_t zfsdev_minor_alloc(void);
+
+/*
+ * Range locking rules
+ * --------------------
+ * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
+ *    file range needs to be locked as RL_WRITER. Only then can the pages be
+ *    freed etc and zp_size reset. zp_size must be set within range lock.
+ * 2. For writes and punching holes (zfs_write & zfs_space) just the range
+ *    being written or freed needs to be locked as RL_WRITER.
+ *    Multiple writes at the end of the file must coordinate zp_size updates
+ *    to ensure data isn't lost. A compare and swap loop is currently used
+ *    to ensure the file size is at least the offset last written.
+ * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
+ *    read needs to be locked as RL_READER. A check against zp_size can then
+ *    be made for reading beyond end of file.
+ */
+
+/*
+ * Convert between znode pointers and vnode pointers
+ */
+#define	ZTOV(ZP)	((ZP)->z_vnode)
+#define	ZTOI(ZP)	((ZP)->z_vnode)
+#define	VTOZ(VP)	((struct znode *)(VP)->v_data)
+#define	VTOZ_SMR(VP)	((znode_t *)vn_load_v_data_smr(VP))
+#define	ITOZ(VP)	((struct znode *)(VP)->v_data)
+#define	zhold(zp)	vhold(ZTOV((zp)))
+#define	zrele(zp)	vrele(ZTOV((zp)))
+
+#define	ZTOZSB(zp) ((zp)->z_zfsvfs)
+#define	ITOZSB(vp) (VTOZ(vp)->z_zfsvfs)
+#define	ZTOTYPE(zp)	(ZTOV(zp)->v_type)
+#define	ZTOGID(zp) ((zp)->z_gid)
+#define	ZTOUID(zp) ((zp)->z_uid)
+#define	ZTONLNK(zp) ((zp)->z_links)
+#define	Z_ISBLK(type) ((type) == VBLK)
+#define	Z_ISCHR(type) ((type) == VCHR)
+#define	Z_ISLNK(type) ((type) == VLNK)
+#define	Z_ISDIR(type) ((type) == VDIR)
+
+#define	zn_has_cached_data(zp, start, end) \
+    vn_has_cached_data(ZTOV(zp))
+#define	zn_flush_cached_data(zp, sync)	vn_flush_cached_data(ZTOV(zp), sync)
+#define	zn_rlimit_fsize(zp, uio) \
+    vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))
+
+#define	ZFS_ENTER_ERROR(zfsvfs, error) do {			\
+	ZFS_TEARDOWN_ENTER_READ((zfsvfs), FTAG);		\
+	if (__predict_false((zfsvfs)->z_unmounted)) {		\
+		ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG);		\
+		return (error);					\
+	}							\
+} while (0)
+
+/* Called on entry to each ZFS vnode and vfs operation  */
+#define	ZFS_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
+
+/* Must be called before exiting the vop */
+#define	ZFS_EXIT(zfsvfs)	ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG)
+
+#define	ZFS_VERIFY_ZP_ERROR(zp, error) do {			\
+	if (__predict_false((zp)->z_sa_hdl == NULL)) {		\
+		ZFS_EXIT((zp)->z_zfsvfs);			\
+		return (error);					\
+	}							\
+} while (0)
+
+/* Verifies the znode is valid */
+#define	ZFS_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
+
+/*
+ * Macros for dealing with dmu_buf_hold
+ */
+#define	ZFS_OBJ_HASH(obj_num)	((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
+#define	ZFS_OBJ_MUTEX(zfsvfs, obj_num)	\
+	(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
+#define	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
+	mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
+#define	ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \
+	mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
+#define	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
+	mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
+
+/* Encode ZFS stored time values from a struct timespec */
+#define	ZFS_TIME_ENCODE(tp, stmp)		\
+{						\
+	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
+	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
+}
+
+/* Decode ZFS stored time values to a struct timespec */
+#define	ZFS_TIME_DECODE(tp, stmp)		\
+{						\
+	(tp)->tv_sec = (time_t)(stmp)[0];		\
+	(tp)->tv_nsec = (long)(stmp)[1];		\
+}
+#define	ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
+	if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
+		zfs_tstamp_update_setup_ext(zp, ACCESSED, NULL, NULL, B_FALSE);
+
+extern void	zfs_tstamp_update_setup_ext(struct znode *,
+    uint_t, uint64_t [2], uint64_t [2], boolean_t have_tx);
+extern void zfs_znode_free(struct znode *);
+
+extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
+extern int zfsfstype;
+
+extern int zfs_znode_parent_and_name(struct znode *zp, struct znode **dzpp,
+    char *buf);
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _FREEBSD_SYS_FS_ZFS_ZNODE_H */

diff --git a/zfs/include/os/freebsd/zfs/sys/zpl.h b/zfs/include/os/freebsd/zfs/sys/zpl.h
new file mode 100644
index 0000000..fb2b4e0
--- /dev/null
+++ b/zfs/include/os/freebsd/zfs/sys/zpl.h

@@ -0,0 +1 @@
+/* Don't remove */

diff --git a/zfs/include/os/linux/Makefile.am b/zfs/include/os/linux/Makefile.am
new file mode 100644
index 0000000..605a1fc
--- /dev/null
+++ b/zfs/include/os/linux/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = kernel spl zfs

diff --git a/zfs/include/os/linux/kernel/Makefile.am b/zfs/include/os/linux/kernel/Makefile.am
new file mode 100644
index 0000000..08b2f5f
--- /dev/null
+++ b/zfs/include/os/linux/kernel/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = linux

diff --git a/zfs/include/os/linux/kernel/linux/Makefile.am b/zfs/include/os/linux/kernel/linux/Makefile.am
new file mode 100644
index 0000000..6ff0df5
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/Makefile.am

@@ -0,0 +1,22 @@
+KERNEL_H = \
+	dcache_compat.h \
+	xattr_compat.h \
+	vfs_compat.h \
+	blkdev_compat.h \
+	utsname_compat.h \
+	kmap_compat.h \
+	percpu_compat.h \
+	simd.h \
+	simd_x86.h \
+	simd_aarch64.h \
+	simd_powerpc.h \
+	mod_compat.h \
+	page_compat.h \
+	compiler_compat.h
+
+if CONFIG_KERNEL
+if BUILD_LINUX
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/linux
+kernel_HEADERS = $(KERNEL_H)
+endif
+endif

diff --git a/zfs/include/os/linux/kernel/linux/blkdev_compat.h b/zfs/include/os/linux/kernel/linux/blkdev_compat.h
new file mode 100644
index 0000000..912919f
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/blkdev_compat.h

@@ -0,0 +1,664 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ * LLNL-CODE-403049.
+ */
+
+#ifndef _ZFS_BLKDEV_H
+#define	_ZFS_BLKDEV_H
+
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include <linux/hdreg.h>
+#include <linux/major.h>
+#include <linux/msdos_fs.h>	/* for SECTOR_* */
+
+#ifndef HAVE_BLK_QUEUE_FLAG_SET
+static inline void
+blk_queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	queue_flag_set(flag, q);
+}
+#endif
+
+#ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
+static inline void
+blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	queue_flag_clear(flag, q);
+}
+#endif
+
+/*
+ * 4.7 API,
+ * The blk_queue_write_cache() interface has replaced blk_queue_flush()
+ * interface.  However, the new interface is GPL-only thus we implement
+ * our own trivial wrapper when the GPL-only version is detected.
+ *
+ * 2.6.36 - 4.6 API,
+ * The blk_queue_flush() interface has replaced blk_queue_ordered()
+ * interface.  However, while the old interface was available to all the
+ * new one is GPL-only.   Thus if the GPL-only version is detected we
+ * implement our own trivial helper.
+ */
+static inline void
+blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
+{
+#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
+	if (wc)
+		blk_queue_flag_set(QUEUE_FLAG_WC, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
+	if (fua)
+		blk_queue_flag_set(QUEUE_FLAG_FUA, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
+#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
+	blk_queue_write_cache(q, wc, fua);
+#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
+	if (wc)
+		q->flush_flags |= REQ_FLUSH;
+	if (fua)
+		q->flush_flags |= REQ_FUA;
+#elif defined(HAVE_BLK_QUEUE_FLUSH)
+	blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0));
+#else
+#error "Unsupported kernel"
+#endif
+}
+
+static inline void
+blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
+{
+#if !defined(HAVE_BLK_QUEUE_UPDATE_READAHEAD) && \
+	!defined(HAVE_DISK_UPDATE_READAHEAD)
+#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
+	q->backing_dev_info->ra_pages = ra_pages;
+#else
+	q->backing_dev_info.ra_pages = ra_pages;
+#endif
+#endif
+}
+
+#ifdef HAVE_BIO_BVEC_ITER
+#define	BIO_BI_SECTOR(bio)	(bio)->bi_iter.bi_sector
+#define	BIO_BI_SIZE(bio)	(bio)->bi_iter.bi_size
+#define	BIO_BI_IDX(bio)		(bio)->bi_iter.bi_idx
+#define	BIO_BI_SKIP(bio)	(bio)->bi_iter.bi_bvec_done
+#define	bio_for_each_segment4(bv, bvp, b, i)	\
+	bio_for_each_segment((bv), (b), (i))
+typedef struct bvec_iter bvec_iterator_t;
+#else
+#define	BIO_BI_SECTOR(bio)	(bio)->bi_sector
+#define	BIO_BI_SIZE(bio)	(bio)->bi_size
+#define	BIO_BI_IDX(bio)		(bio)->bi_idx
+#define	BIO_BI_SKIP(bio)	(0)
+#define	bio_for_each_segment4(bv, bvp, b, i)	\
+	bio_for_each_segment((bvp), (b), (i))
+typedef int bvec_iterator_t;
+#endif
+
+static inline void
+bio_set_flags_failfast(struct block_device *bdev, int *flags)
+{
+#ifdef CONFIG_BUG
+	/*
+	 * Disable FAILFAST for loopback devices because of the
+	 * following incorrect BUG_ON() in loop_make_request().
+	 * This support is also disabled for md devices because the
+	 * test suite layers md devices on top of loopback devices.
+	 * This may be removed when the loopback driver is fixed.
+	 *
+	 *   BUG_ON(!lo || (rw != READ && rw != WRITE));
+	 */
+	if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) ||
+	    (MAJOR(bdev->bd_dev) == MD_MAJOR))
+		return;
+
+#ifdef BLOCK_EXT_MAJOR
+	if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
+		return;
+#endif /* BLOCK_EXT_MAJOR */
+#endif /* CONFIG_BUG */
+
+	*flags |= REQ_FAILFAST_MASK;
+}
+
+/*
+ * Maximum disk label length, it may be undefined for some kernels.
+ */
+#if !defined(DISK_NAME_LEN)
+#define	DISK_NAME_LEN	32
+#endif /* DISK_NAME_LEN */
+
+#ifdef HAVE_BIO_BI_STATUS
+static inline int
+bi_status_to_errno(blk_status_t status)
+{
+	switch (status)	{
+	case BLK_STS_OK:
+		return (0);
+	case BLK_STS_NOTSUPP:
+		return (EOPNOTSUPP);
+	case BLK_STS_TIMEOUT:
+		return (ETIMEDOUT);
+	case BLK_STS_NOSPC:
+		return (ENOSPC);
+	case BLK_STS_TRANSPORT:
+		return (ENOLINK);
+	case BLK_STS_TARGET:
+		return (EREMOTEIO);
+#ifdef HAVE_BLK_STS_RESV_CONFLICT
+	case BLK_STS_RESV_CONFLICT:
+#else
+	case BLK_STS_NEXUS:
+#endif
+		return (EBADE);
+	case BLK_STS_MEDIUM:
+		return (ENODATA);
+	case BLK_STS_PROTECTION:
+		return (EILSEQ);
+	case BLK_STS_RESOURCE:
+		return (ENOMEM);
+	case BLK_STS_AGAIN:
+		return (EAGAIN);
+	case BLK_STS_IOERR:
+		return (EIO);
+	default:
+		return (EIO);
+	}
+}
+
+static inline blk_status_t
+errno_to_bi_status(int error)
+{
+	switch (error) {
+	case 0:
+		return (BLK_STS_OK);
+	case EOPNOTSUPP:
+		return (BLK_STS_NOTSUPP);
+	case ETIMEDOUT:
+		return (BLK_STS_TIMEOUT);
+	case ENOSPC:
+		return (BLK_STS_NOSPC);
+	case ENOLINK:
+		return (BLK_STS_TRANSPORT);
+	case EREMOTEIO:
+		return (BLK_STS_TARGET);
+	case EBADE:
+#ifdef HAVE_BLK_STS_RESV_CONFLICT
+		return (BLK_STS_RESV_CONFLICT);
+#else
+		return (BLK_STS_NEXUS);
+#endif
+	case ENODATA:
+		return (BLK_STS_MEDIUM);
+	case EILSEQ:
+		return (BLK_STS_PROTECTION);
+	case ENOMEM:
+		return (BLK_STS_RESOURCE);
+	case EAGAIN:
+		return (BLK_STS_AGAIN);
+	case EIO:
+		return (BLK_STS_IOERR);
+	default:
+		return (BLK_STS_IOERR);
+	}
+}
+#endif /* HAVE_BIO_BI_STATUS */
+
+/*
+ * 4.3 API change
+ * The bio_endio() prototype changed slightly.  These are helper
+ * macro's to ensure the prototype and invocation are handled.
+ */
+#ifdef HAVE_1ARG_BIO_END_IO_T
+#ifdef HAVE_BIO_BI_STATUS
+#define	BIO_END_IO_ERROR(bio)		bi_status_to_errno(bio->bi_status)
+#define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
+#define	BIO_END_IO(bio, error)		bio_set_bi_status(bio, error)
+static inline void
+bio_set_bi_status(struct bio *bio, int error)
+{
+	ASSERT3S(error, <=, 0);
+	bio->bi_status = errno_to_bi_status(-error);
+	bio_endio(bio);
+}
+#else
+#define	BIO_END_IO_ERROR(bio)		(-(bio->bi_error))
+#define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
+#define	BIO_END_IO(bio, error)		bio_set_bi_error(bio, error)
+static inline void
+bio_set_bi_error(struct bio *bio, int error)
+{
+	ASSERT3S(error, <=, 0);
+	bio->bi_error = error;
+	bio_endio(bio);
+}
+#endif /* HAVE_BIO_BI_STATUS */
+
+#else
+#define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x, int z)
+#define	BIO_END_IO(bio, error)		bio_endio(bio, error);
+#endif /* HAVE_1ARG_BIO_END_IO_T */
+
+/*
+ * 5.15 MACRO,
+ *   GD_DEAD
+ *
+ * 2.6.36 - 5.14 MACRO,
+ *   GENHD_FL_UP
+ *
+ * Check the disk status and return B_TRUE if alive
+ * otherwise B_FALSE
+ */
+static inline boolean_t
+zfs_check_disk_status(struct block_device *bdev)
+{
+#if defined(GENHD_FL_UP)
+	return (!!(bdev->bd_disk->flags & GENHD_FL_UP));
+#elif defined(GD_DEAD)
+	return (!test_bit(GD_DEAD, &bdev->bd_disk->state));
+#else
+/*
+ * This is encountered if neither GENHD_FL_UP nor GD_DEAD is available in
+ * the kernel - likely due to an MACRO change that needs to be chased down.
+ */
+#error "Unsupported kernel: no usable disk status check"
+#endif
+}
+
+/*
+ * 4.1 API,
+ * 3.10.0 CentOS 7.x API,
+ *   blkdev_reread_part()
+ *
+ * For older kernels trigger a re-reading of the partition table by calling
+ * check_disk_change() which calls flush_disk() to invalidate the device.
+ *
+ * For newer kernels (as of 5.10), bdev_check_media_change is used, in favor of
+ * check_disk_change(), with the modification that invalidation is no longer
+ * forced.
+ */
+#ifdef HAVE_CHECK_DISK_CHANGE
+#define	zfs_check_media_change(bdev)	check_disk_change(bdev)
+#ifdef HAVE_BLKDEV_REREAD_PART
+#define	vdev_bdev_reread_part(bdev)	blkdev_reread_part(bdev)
+#else
+#define	vdev_bdev_reread_part(bdev)	check_disk_change(bdev)
+#endif /* HAVE_BLKDEV_REREAD_PART */
+#else
+#ifdef HAVE_BDEV_CHECK_MEDIA_CHANGE
+static inline int
+zfs_check_media_change(struct block_device *bdev)
+{
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+	struct gendisk *gd = bdev->bd_disk;
+	const struct block_device_operations *bdo = gd->fops;
+#endif
+
+	if (!bdev_check_media_change(bdev))
+		return (0);
+
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+	/*
+	 * Force revalidation, to mimic the old behavior of
+	 * check_disk_change()
+	 */
+	if (bdo->revalidate_disk)
+		bdo->revalidate_disk(gd);
+#endif
+
+	return (0);
+}
+#define	vdev_bdev_reread_part(bdev)	zfs_check_media_change(bdev)
+#elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
+#define	vdev_bdev_reread_part(bdev)	disk_check_media_change(bdev->bd_disk)
+#define	zfs_check_media_change(bdev)	disk_check_media_change(bdev->bd_disk)
+#else
+/*
+ * This is encountered if check_disk_change() and bdev_check_media_change()
+ * are not available in the kernel - likely due to an API change that needs
+ * to be chased down.
+ */
+#error "Unsupported kernel: no usable disk change check"
+#endif /* HAVE_BDEV_CHECK_MEDIA_CHANGE */
+#endif /* HAVE_CHECK_DISK_CHANGE */
+
+/*
+ * 2.6.27 API change
+ * The function was exported for use, prior to this it existed but the
+ * symbol was not exported.
+ *
+ * 4.4.0-6.21 API change for Ubuntu
+ * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
+ *
+ * 5.11 API change
+ * Changed to take a dev_t argument which is set on success and return a
+ * non-zero error code on failure.
+ */
+static inline int
+vdev_lookup_bdev(const char *path, dev_t *dev)
+{
+#if defined(HAVE_DEVT_LOOKUP_BDEV)
+	return (lookup_bdev(path, dev));
+#elif defined(HAVE_1ARG_LOOKUP_BDEV)
+	struct block_device *bdev = lookup_bdev(path);
+	if (IS_ERR(bdev))
+		return (PTR_ERR(bdev));
+
+	*dev = bdev->bd_dev;
+	bdput(bdev);
+
+	return (0);
+#elif defined(HAVE_MODE_LOOKUP_BDEV)
+	struct block_device *bdev = lookup_bdev(path, FMODE_READ);
+	if (IS_ERR(bdev))
+		return (PTR_ERR(bdev));
+
+	*dev = bdev->bd_dev;
+	bdput(bdev);
+
+	return (0);
+#else
+#error "Unsupported kernel"
+#endif
+}
+
+#if defined(HAVE_BLK_MODE_T)
+#define	blk_mode_is_open_write(flag)	((flag) & BLK_OPEN_WRITE)
+#else
+#define	blk_mode_is_open_write(flag)	((flag) & FMODE_WRITE)
+#endif
+
+/*
+ * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
+ */
+#if !defined(HAVE_BIO_SET_OP_ATTRS)
+static inline void
+bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags)
+{
+#if defined(HAVE_BIO_BI_OPF)
+	bio->bi_opf = rw | flags;
+#else
+	bio->bi_rw |= rw | flags;
+#endif /* HAVE_BIO_BI_OPF */
+}
+#endif
+
+/*
+ * bio_set_flush - Set the appropriate flags in a bio to guarantee
+ * data are on non-volatile media on completion.
+ *
+ * 2.6.37 - 4.8 API,
+ *   Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
+ *   replacement for WRITE_BARRIER to allow expressing richer semantics
+ *   to the block layer.  It's up to the block layer to implement the
+ *   semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
+ *
+ * 4.8 - 4.9 API,
+ *   REQ_FLUSH was renamed to REQ_PREFLUSH.  For consistency with previous
+ *   OpenZFS releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
+ *
+ * 4.10 API,
+ *   The read/write flags and their modifiers, including WRITE_FLUSH,
+ *   WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
+ *   torvalds/linux@70fd7614 and replaced by direct flag modification
+ *   of the REQ_ flags in bio->bi_opf.  Use REQ_PREFLUSH.
+ */
+static inline void
+bio_set_flush(struct bio *bio)
+{
+#if defined(HAVE_REQ_PREFLUSH)	/* >= 4.10 */
+	bio_set_op_attrs(bio, 0, REQ_PREFLUSH | REQ_OP_WRITE);
+#elif defined(WRITE_FLUSH_FUA)	/* >= 2.6.37 and <= 4.9 */
+	bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA);
+#else
+#error	"Allowing the build will cause bio_set_flush requests to be ignored."
+#endif
+}
+
+/*
+ * 4.8 API,
+ *   REQ_OP_FLUSH
+ *
+ * 4.8-rc0 - 4.8-rc1,
+ *   REQ_PREFLUSH
+ *
+ * 2.6.36 - 4.7 API,
+ *   REQ_FLUSH
+ *
+ * in all cases but may have a performance impact for some kernels.  It
+ * has the advantage of minimizing kernel specific changes in the zvol code.
+ *
+ */
+static inline boolean_t
+bio_is_flush(struct bio *bio)
+{
+#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
+	return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH));
+#elif defined(HAVE_REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
+	return (bio->bi_opf & REQ_PREFLUSH);
+#elif defined(HAVE_REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
+	return (bio->bi_rw & REQ_PREFLUSH);
+#elif defined(HAVE_REQ_FLUSH)
+	return (bio->bi_rw & REQ_FLUSH);
+#else
+#error	"Unsupported kernel"
+#endif
+}
+
+/*
+ * 4.8 API,
+ *   REQ_FUA flag moved to bio->bi_opf
+ *
+ * 2.6.x - 4.7 API,
+ *   REQ_FUA
+ */
+static inline boolean_t
+bio_is_fua(struct bio *bio)
+{
+#if defined(HAVE_BIO_BI_OPF)
+	return (bio->bi_opf & REQ_FUA);
+#elif defined(REQ_FUA)
+	return (bio->bi_rw & REQ_FUA);
+#else
+#error	"Allowing the build will cause fua requests to be ignored."
+#endif
+}
+
+/*
+ * 4.8 API,
+ *   REQ_OP_DISCARD
+ *
+ * 2.6.36 - 4.7 API,
+ *   REQ_DISCARD
+ *
+ * In all cases the normal I/O path is used for discards.  The only
+ * difference is how the kernel tags individual I/Os as discards.
+ */
+static inline boolean_t
+bio_is_discard(struct bio *bio)
+{
+#if defined(HAVE_REQ_OP_DISCARD)
+	return (bio_op(bio) == REQ_OP_DISCARD);
+#elif defined(HAVE_REQ_DISCARD)
+	return (bio->bi_rw & REQ_DISCARD);
+#else
+#error "Unsupported kernel"
+#endif
+}
+
+/*
+ * 4.8 API,
+ *   REQ_OP_SECURE_ERASE
+ *
+ * 2.6.36 - 4.7 API,
+ *   REQ_SECURE
+ */
+static inline boolean_t
+bio_is_secure_erase(struct bio *bio)
+{
+#if defined(HAVE_REQ_OP_SECURE_ERASE)
+	return (bio_op(bio) == REQ_OP_SECURE_ERASE);
+#elif defined(REQ_SECURE)
+	return (bio->bi_rw & REQ_SECURE);
+#else
+	return (0);
+#endif
+}
+
+/*
+ * 2.6.33 API change
+ * Discard granularity and alignment restrictions may now be set.  For
+ * older kernels which do not support this it is safe to skip it.
+ */
+static inline void
+blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
+{
+	q->limits.discard_granularity = dg;
+}
+
+/*
+ * 5.19 API,
+ *   bdev_max_discard_sectors()
+ *
+ * 2.6.32 API,
+ *   blk_queue_discard()
+ */
+static inline boolean_t
+bdev_discard_supported(struct block_device *bdev)
+{
+#if defined(HAVE_BDEV_MAX_DISCARD_SECTORS)
+	return (!!bdev_max_discard_sectors(bdev));
+#elif defined(HAVE_BLK_QUEUE_DISCARD)
+	return (!!blk_queue_discard(bdev_get_queue(bdev)));
+#else
+#error "Unsupported kernel"
+#endif
+}
+
+/*
+ * 5.19 API,
+ *   bdev_max_secure_erase_sectors()
+ *
+ * 4.8 API,
+ *   blk_queue_secure_erase()
+ *
+ * 2.6.36 - 4.7 API,
+ *   blk_queue_secdiscard()
+ */
+static inline boolean_t
+bdev_secure_discard_supported(struct block_device *bdev)
+{
+#if defined(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS)
+	return (!!bdev_max_secure_erase_sectors(bdev));
+#elif defined(HAVE_BLK_QUEUE_SECURE_ERASE)
+	return (!!blk_queue_secure_erase(bdev_get_queue(bdev)));
+#elif defined(HAVE_BLK_QUEUE_SECDISCARD)
+	return (!!blk_queue_secdiscard(bdev_get_queue(bdev)));
+#else
+#error "Unsupported kernel"
+#endif
+}
+
+/*
+ * A common holder for vdev_bdev_open() is used to relax the exclusive open
+ * semantics slightly.  Internal vdev disk callers may pass VDEV_HOLDER to
+ * allow them to open the device multiple times.  Other kernel callers and
+ * user space processes which don't pass this value will get EBUSY.  This is
+ * currently required for the correct operation of hot spares.
+ */
+#define	VDEV_HOLDER			((void *)0x2401de7)
+
+static inline unsigned long
+blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)),
+    struct gendisk *disk __attribute__((unused)),
+    int rw __attribute__((unused)), struct bio *bio)
+{
+#if defined(HAVE_BDEV_IO_ACCT_63)
+	return (bdev_start_io_acct(bio->bi_bdev, bio_op(bio),
+	    jiffies));
+#elif defined(HAVE_BDEV_IO_ACCT_OLD)
+	return (bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+	    bio_op(bio), jiffies));
+#elif defined(HAVE_DISK_IO_ACCT)
+	return (disk_start_io_acct(disk, bio_sectors(bio), bio_op(bio)));
+#elif defined(HAVE_BIO_IO_ACCT)
+	return (bio_start_io_acct(bio));
+#elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
+	unsigned long start_time = jiffies;
+	generic_start_io_acct(rw, bio_sectors(bio), &disk->part0);
+	return (start_time);
+#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
+	unsigned long start_time = jiffies;
+	generic_start_io_acct(q, rw, bio_sectors(bio), &disk->part0);
+	return (start_time);
+#else
+	/* Unsupported */
+	return (0);
+#endif
+}
+
+static inline void
+blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)),
+    struct gendisk *disk __attribute__((unused)),
+    int rw __attribute__((unused)), struct bio *bio, unsigned long start_time)
+{
+#if defined(HAVE_BDEV_IO_ACCT_63)
+	bdev_end_io_acct(bio->bi_bdev, bio_op(bio), bio_sectors(bio),
+	    start_time);
+#elif defined(HAVE_BDEV_IO_ACCT_OLD)
+	bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+#elif defined(HAVE_DISK_IO_ACCT)
+	disk_end_io_acct(disk, bio_op(bio), start_time);
+#elif defined(HAVE_BIO_IO_ACCT)
+	bio_end_io_acct(bio, start_time);
+#elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
+	generic_end_io_acct(rw, &disk->part0, start_time);
+#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
+	generic_end_io_acct(q, rw, &disk->part0, start_time);
+#endif
+}
+
+#ifndef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+static inline struct request_queue *
+blk_generic_alloc_queue(make_request_fn make_request, int node_id)
+{
+#if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN)
+	return (blk_alloc_queue(make_request, node_id));
+#elif defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH)
+	return (blk_alloc_queue_rh(make_request, node_id));
+#else
+	struct request_queue *q = blk_alloc_queue(GFP_KERNEL);
+	if (q != NULL)
+		blk_queue_make_request(q, make_request);
+
+	return (q);
+#endif
+}
+#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+
+#endif /* _ZFS_BLKDEV_H */

diff --git a/zfs/include/os/linux/kernel/linux/compiler_compat.h b/zfs/include/os/linux/kernel/linux/compiler_compat.h
new file mode 100644
index 0000000..2c0704d
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/compiler_compat.h

@@ -0,0 +1,43 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2018 Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _ZFS_COMPILER_COMPAT_H
+#define	_ZFS_COMPILER_COMPAT_H
+
+#include <linux/compiler.h>
+
+#if !defined(fallthrough)
+#if defined(HAVE_IMPLICIT_FALLTHROUGH)
+#define	fallthrough		__attribute__((__fallthrough__))
+#else
+#define	fallthrough		((void)0)
+#endif
+#endif
+
+#if !defined(READ_ONCE)
+#define	READ_ONCE(x)		ACCESS_ONCE(x)
+#endif
+
+#endif	/* _ZFS_COMPILER_COMPAT_H */

diff --git a/zfs/include/os/linux/kernel/linux/dcache_compat.h b/zfs/include/os/linux/kernel/linux/dcache_compat.h
new file mode 100644
index 0000000..f87f165
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/dcache_compat.h

@@ -0,0 +1,100 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _ZFS_DCACHE_H
+#define	_ZFS_DCACHE_H
+
+#include <linux/dcache.h>
+
+#define	dname(dentry)	((char *)((dentry)->d_name.name))
+#define	dlen(dentry)	((int)((dentry)->d_name.len))
+
+#ifndef HAVE_D_MAKE_ROOT
+#define	d_make_root(inode)	d_alloc_root(inode)
+#endif /* HAVE_D_MAKE_ROOT */
+
+#ifdef HAVE_DENTRY_D_U_ALIASES
+#define	d_alias			d_u.d_alias
+#endif
+
+/*
+ * Starting from Linux 5.13, flush_dcache_page() becomes an inline function
+ * and under some configurations, may indirectly referencing GPL-only
+ * cpu_feature_keys on powerpc. Override this function when it is detected
+ * being GPL-only.
+ */
+#if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY
+#include <linux/simd_powerpc.h>
+#define	flush_dcache_page(page)	do {					\
+		if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&	\
+		    test_bit(PG_dcache_clean, &(page)->flags))		\
+			clear_bit(PG_dcache_clean, &(page)->flags);	\
+	} while (0)
+#endif
+
+/*
+ * 2.6.30 API change,
+ * The const keyword was added to the 'struct dentry_operations' in
+ * the dentry structure.  To handle this we define an appropriate
+ * dentry_operations_t typedef which can be used.
+ */
+typedef const struct dentry_operations	dentry_operations_t;
+
+/*
+ * 2.6.38 API addition,
+ * Added d_clear_d_op() helper function which clears some flags and the
+ * registered dentry->d_op table.  This is required because d_set_d_op()
+ * issues a warning when the dentry operations table is already set.
+ * For the .zfs control directory to work properly we must be able to
+ * override the default operations table and register custom .d_automount
+ * and .d_revalidate callbacks.
+ */
+static inline void
+d_clear_d_op(struct dentry *dentry)
+{
+	dentry->d_op = NULL;
+	dentry->d_flags &= ~(
+	    DCACHE_OP_HASH | DCACHE_OP_COMPARE |
+	    DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
+}
+
+/*
+ * Walk and invalidate all dentry aliases of an inode
+ * unless it's a mountpoint
+ */
+static inline void
+zpl_d_drop_aliases(struct inode *inode)
+{
+	struct dentry *dentry;
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+		if (!IS_ROOT(dentry) && !d_mountpoint(dentry) &&
+		    (dentry->d_inode == inode)) {
+			d_drop(dentry);
+		}
+	}
+	spin_unlock(&inode->i_lock);
+}
+#endif /* _ZFS_DCACHE_H */

diff --git a/zfs/include/os/linux/kernel/linux/kmap_compat.h b/zfs/include/os/linux/kernel/linux/kmap_compat.h
new file mode 100644
index 0000000..42f463a
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/kmap_compat.h

@@ -0,0 +1,43 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ */
+
+#ifndef _ZFS_KMAP_H
+#define	_ZFS_KMAP_H
+
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+
+/* 2.6.37 API change */
+#define	zfs_kmap_atomic(page)	kmap_atomic(page)
+#define	zfs_kunmap_atomic(addr)	kunmap_atomic(addr)
+
+/* 5.0 API change - no more 'type' argument for access_ok() */
+#ifdef HAVE_ACCESS_OK_TYPE
+#define	zfs_access_ok(type, addr, size)	access_ok(type, addr, size)
+#else
+#define	zfs_access_ok(type, addr, size)	access_ok(addr, size)
+#endif
+
+#endif	/* _ZFS_KMAP_H */

diff --git a/zfs/include/os/linux/kernel/linux/mod_compat.h b/zfs/include/os/linux/kernel/linux/mod_compat.h
new file mode 100644
index 0000000..642ff98
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/mod_compat.h

@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic@gmail.com>.
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#ifndef _MOD_COMPAT_H
+#define	_MOD_COMPAT_H
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+/*
+ * Despite constifying struct kernel_param_ops, some older kernels define a
+ * `__check_old_set_param()` function in their headers that checks for a
+ * non-constified `->set()`. This has long been fixed in Linux mainline, but
+ * since we support older kernels, we workaround it by using a preprocessor
+ * definition to disable it.
+ */
+#define	__check_old_set_param(_) (0)
+
+typedef const struct kernel_param zfs_kernel_param_t;
+
+#define	ZMOD_RW 0644
+#define	ZMOD_RD 0444
+
+/* BEGIN CSTYLED */
+#define	INT int
+#define	UINT uint
+#define	ULONG ulong
+#define	LONG long
+#define	STRING charp
+/* END CSTYLED */
+
+enum scope_prefix_types {
+	zfs,
+	zfs_arc,
+	zfs_condense,
+	zfs_dbuf,
+	zfs_dbuf_cache,
+	zfs_deadman,
+	zfs_dedup,
+	zfs_l2arc,
+	zfs_livelist,
+	zfs_livelist_condense,
+	zfs_lua,
+	zfs_metaslab,
+	zfs_mg,
+	zfs_multihost,
+	zfs_prefetch,
+	zfs_reconstruct,
+	zfs_recv,
+	zfs_send,
+	zfs_spa,
+	zfs_trim,
+	zfs_txg,
+	zfs_vdev,
+	zfs_vdev_cache,
+	zfs_vdev_file,
+	zfs_vdev_mirror,
+	zfs_vnops,
+	zfs_zevent,
+	zfs_zio,
+	zfs_zil
+};
+
+/*
+ * Declare a module parameter / sysctl node
+ *
+ * "scope_prefix" the part of the sysctl / sysfs tree the node resides under
+ *   (currently a no-op on Linux)
+ * "name_prefix" the part of the variable name that will be excluded from the
+ *   exported names on platforms with a hierarchical namespace
+ * "name" the part of the variable that will be exposed on platforms with a
+ *    hierarchical namespace, or as name_prefix ## name on Linux
+ * "type" the variable type
+ * "perm" the permissions (read/write or read only)
+ * "desc" a brief description of the option
+ *
+ * Examples:
+ * ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_inc, UINT,
+ * 	ZMOD_RW, "Rotating media load increment for non-seeking I/O's");
+ * on FreeBSD:
+ *   vfs.zfs.vdev.mirror.rotating_inc
+ * on Linux:
+ *   zfs_vdev_mirror_rotating_inc
+ *
+ * ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW,
+ * 	"Limit one prefetch call to this size");
+ * on FreeBSD:
+ *   vfs.zfs.dmu_prefetch_max
+ * on Linux:
+ *   dmu_prefetch_max
+ */
+/* BEGIN CSTYLED */
+#define	ZFS_MODULE_PARAM(scope_prefix, name_prefix, name, type, perm, desc) \
+	CTASSERT_GLOBAL((sizeof (scope_prefix) == sizeof (enum scope_prefix_types))); \
+	module_param(name_prefix ## name, type, perm); \
+	MODULE_PARM_DESC(name_prefix ## name, desc)
+/* END CSTYLED */
+
+/*
+ * Declare a module parameter / sysctl node
+ *
+ * "scope_prefix" the part of the the sysctl / sysfs tree the node resides under
+ *   (currently a no-op on Linux)
+ * "name_prefix" the part of the variable name that will be excluded from the
+ *   exported names on platforms with a hierarchical namespace
+ * "name" the part of the variable that will be exposed on platforms with a
+ *    hierarchical namespace, or as name_prefix ## name on Linux
+ * "setfunc" setter function
+ * "getfunc" getter function
+ * "perm" the permissions (read/write or read only)
+ * "desc" a brief description of the option
+ *
+ * Examples:
+ * ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift,
+ * 	param_get_int, ZMOD_RW, "Reserved free space in pool");
+ * on FreeBSD:
+ *   vfs.zfs.spa_slop_shift
+ * on Linux:
+ *   spa_slop_shift
+ */
+/* BEGIN CSTYLED */
+#define	ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, getfunc, perm, desc) \
+	CTASSERT_GLOBAL((sizeof (scope_prefix) == sizeof (enum scope_prefix_types))); \
+	module_param_call(name_prefix ## name, setfunc, getfunc, &name_prefix ## name, perm); \
+	MODULE_PARM_DESC(name_prefix ## name, desc)
+/* END CSTYLED */
+
+/*
+ * As above, but there is no variable with the name name_prefix ## name,
+ * so NULL is passed to module_param_call instead.
+ */
+/* BEGIN CSTYLED */
+#define	ZFS_MODULE_VIRTUAL_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, getfunc, perm, desc) \
+	CTASSERT_GLOBAL((sizeof (scope_prefix) == sizeof (enum scope_prefix_types))); \
+	module_param_call(name_prefix ## name, setfunc, getfunc, NULL, perm); \
+	MODULE_PARM_DESC(name_prefix ## name, desc)
+/* END CSTYLED */
+
+#define	ZFS_MODULE_PARAM_ARGS	const char *buf, zfs_kernel_param_t *kp
+
+#define	ZFS_MODULE_DESCRIPTION(s) MODULE_DESCRIPTION(s)
+#define	ZFS_MODULE_AUTHOR(s) MODULE_AUTHOR(s)
+#define	ZFS_MODULE_LICENSE(s) MODULE_LICENSE(s)
+#define	ZFS_MODULE_VERSION(s) MODULE_VERSION(s)
+
+#define	module_init_early(fn) module_init(fn)
+
+#endif	/* _MOD_COMPAT_H */

diff --git a/zfs/include/linux/page_compat.h b/zfs/include/os/linux/kernel/linux/page_compat.h
similarity index 100%
rename from zfs/include/linux/page_compat.h
rename to zfs/include/os/linux/kernel/linux/page_compat.h


diff --git a/zfs/include/os/linux/kernel/linux/percpu_compat.h b/zfs/include/os/linux/kernel/linux/percpu_compat.h
new file mode 100644
index 0000000..e7a4242
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/percpu_compat.h

@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#ifndef _ZFS_PERCPU_H
+#define	_ZFS_PERCPU_H
+
+#include <linux/percpu_counter.h>
+
+/*
+ * 3.18 API change,
+ * percpu_counter_init() now must be passed a gfp mask which will be
+ * used for the dynamic allocation of the actual counter.
+ */
+#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP
+#define	percpu_counter_init_common(counter, n, gfp) \
+	percpu_counter_init(counter, n, gfp)
+#else
+#define	percpu_counter_init_common(counter, n, gfp) \
+	percpu_counter_init(counter, n)
+#endif
+
+#endif /* _ZFS_PERCPU_H */

diff --git a/zfs/include/os/linux/kernel/linux/simd.h b/zfs/include/os/linux/kernel/linux/simd.h
new file mode 100644
index 0000000..4cde248
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/simd.h

@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2019 Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _LINUX_SIMD_H
+#define	_LINUX_SIMD_H
+
+#if defined(__x86)
+#include <linux/simd_x86.h>
+
+#elif defined(__aarch64__)
+#include <linux/simd_aarch64.h>
+
+#elif defined(__powerpc__)
+#include <linux/simd_powerpc.h>
+#else
+
+#define	kfpu_allowed()		0
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+#define	kfpu_init()		0
+#define	kfpu_fini()		((void) 0)
+
+#endif
+#endif /* _LINUX_SIMD_H */

diff --git a/zfs/include/os/linux/kernel/linux/simd_aarch64.h b/zfs/include/os/linux/kernel/linux/simd_aarch64.h
new file mode 100644
index 0000000..50937e9
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/simd_aarch64.h

@@ -0,0 +1,54 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau <romain@dolbeau.org>.
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ *	kfpu_allowed()
+ *	kfpu_begin()
+ *	kfpu_end()
+ *	kfpu_init()
+ *	kfpu_fini()
+ */
+
+#ifndef _LINUX_SIMD_AARCH64_H
+#define	_LINUX_SIMD_AARCH64_H
+
+#include <sys/isa_defs.h>
+
+#if defined(__aarch64__)
+
+#include <sys/types.h>
+#include <asm/neon.h>
+
+#define	kfpu_allowed()		1
+#define	kfpu_begin()		kernel_neon_begin()
+#define	kfpu_end()		kernel_neon_end()
+#define	kfpu_init()		0
+#define	kfpu_fini()		((void) 0)
+
+#endif /* __aarch64__ */
+
+#endif /* _LINUX_SIMD_AARCH64_H */

diff --git a/zfs/include/os/linux/kernel/linux/simd_powerpc.h b/zfs/include/os/linux/kernel/linux/simd_powerpc.h
new file mode 100644
index 0000000..422b85a
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/simd_powerpc.h

@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2019 Romain Dolbeau
+ *           <romain.dolbeau@european-processor-initiative.eu>
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ *	kfpu_allowed()
+ *	kfpu_begin()
+ *	kfpu_end()
+ *	kfpu_init()
+ *	kfpu_fini()
+ *
+ * SIMD support:
+ *
+ * Following functions should be called to determine whether CPU feature
+ * is supported. All functions are usable in kernel and user space.
+ * If a SIMD algorithm is using more than one instruction set
+ * all relevant feature test functions should be called.
+ *
+ * Supported features:
+ *	zfs_altivec_available()
+ */
+
+#ifndef _LINUX_SIMD_POWERPC_H
+#define	_LINUX_SIMD_POWERPC_H
+
+/* only for __powerpc__ */
+#if defined(__powerpc__)
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+#include <sys/types.h>
+#include <linux/version.h>
+
+#define	kfpu_allowed()		1
+#define	kfpu_begin()					\
+	{						\
+		preempt_disable();			\
+		enable_kernel_altivec();		\
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+#define	kfpu_end()				\
+	{					\
+		disable_kernel_altivec();	\
+		preempt_enable();		\
+	}
+#else
+/* seems that before 4.5 no-one bothered disabling ... */
+#define	kfpu_end()		preempt_enable()
+#endif
+#define	kfpu_init()		0
+#define	kfpu_fini()		((void) 0)
+
+/*
+ * Linux 4.7 makes cpu_has_feature to use jump labels on powerpc if
+ * CONFIG_JUMP_LABEL_FEATURE_CHECKS is enabled, in this case however it
+ * references GPL-only symbol cpu_feature_keys. Therefore we overrides this
+ * interface when it is detected being GPL-only.
+ */
+#if defined(CONFIG_JUMP_LABEL_FEATURE_CHECKS) && \
+    defined(HAVE_CPU_HAS_FEATURE_GPL_ONLY)
+#define	cpu_has_feature(feature)	early_cpu_has_feature(feature)
+#endif
+
+/*
+ * Check if AltiVec instruction set is available
+ */
+static inline boolean_t
+zfs_altivec_available(void)
+{
+	boolean_t res;
+	/* suggested by macallan at netbsd dot org */
+#if defined(__powerpc64__)
+	u64 msr;
+#else
+	u32 msr;
+#endif
+	kfpu_begin();
+	__asm volatile("mfmsr %0" : "=r"(msr));
+	/*
+	 * 64 bits -> need to check bit 38
+	 * Power ISA Version 3.0B
+	 * p944
+	 * 32 bits -> Need to check bit 6
+	 * AltiVec Technology Programming Environments Manual
+	 * p49 (2-9)
+	 * They are the same, as ppc counts 'backward' ...
+	 */
+	res = (msr & 0x2000000) != 0;
+	kfpu_end();
+	return (res);
+}
+#endif /* defined(__powerpc) */
+
+#endif /* _LINUX_SIMD_POWERPC_H */

diff --git a/zfs/include/os/linux/kernel/linux/simd_x86.h b/zfs/include/os/linux/kernel/linux/simd_x86.h
new file mode 100644
index 0000000..660f0d4
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/simd_x86.h

@@ -0,0 +1,771 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ *	kfpu_allowed()
+ *	kfpu_begin()
+ *	kfpu_end()
+ *	kfpu_init()
+ *	kfpu_fini()
+ *
+ * SIMD support:
+ *
+ * Following functions should be called to determine whether CPU feature
+ * is supported. All functions are usable in kernel and user space.
+ * If a SIMD algorithm is using more than one instruction set
+ * all relevant feature test functions should be called.
+ *
+ * Supported features:
+ *	zfs_sse_available()
+ *	zfs_sse2_available()
+ *	zfs_sse3_available()
+ *	zfs_ssse3_available()
+ *	zfs_sse4_1_available()
+ *	zfs_sse4_2_available()
+ *
+ *	zfs_avx_available()
+ *	zfs_avx2_available()
+ *
+ *	zfs_bmi1_available()
+ *	zfs_bmi2_available()
+ *
+ *	zfs_avx512f_available()
+ *	zfs_avx512cd_available()
+ *	zfs_avx512er_available()
+ *	zfs_avx512pf_available()
+ *	zfs_avx512bw_available()
+ *	zfs_avx512dq_available()
+ *	zfs_avx512vl_available()
+ *	zfs_avx512ifma_available()
+ *	zfs_avx512vbmi_available()
+ *
+ * NOTE(AVX-512VL):	If using AVX-512 instructions with 128Bit registers
+ *			also add zfs_avx512vl_available() to feature check.
+ */
+
+#ifndef _LINUX_SIMD_X86_H
+#define	_LINUX_SIMD_X86_H
+
+/* only for __x86 */
+#if defined(__x86)
+
+#include <sys/types.h>
+#include <asm/cpufeature.h>
+
+/*
+ * Disable the WARN_ON_FPU() macro to prevent additional dependencies
+ * when providing the kfpu_* functions.  Relevant warnings are included
+ * as appropriate and are unconditionally enabled.
+ */
+#if defined(CONFIG_X86_DEBUG_FPU) && !defined(KERNEL_EXPORTS_X86_FPU)
+#undef CONFIG_X86_DEBUG_FPU
+#endif
+
+#if defined(HAVE_KERNEL_FPU_API_HEADER)
+#include <asm/fpu/api.h>
+#if defined(HAVE_KERNEL_FPU_INTERNAL_HEADER)
+#include <asm/fpu/internal.h>
+#endif
+#if defined(HAVE_KERNEL_FPU_XCR_HEADER)
+#include <asm/fpu/xcr.h>
+#endif
+#else
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#endif
+
+/*
+ * The following cases are for kernels which export either the
+ * kernel_fpu_* or __kernel_fpu_* functions.
+ */
+#if defined(KERNEL_EXPORTS_X86_FPU)
+
+#define	kfpu_allowed()		1
+#define	kfpu_init()		0
+#define	kfpu_fini()		((void) 0)
+
+#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
+#define	kfpu_begin()		\
+{				\
+	preempt_disable();	\
+	__kernel_fpu_begin();	\
+}
+#define	kfpu_end()		\
+{				\
+	__kernel_fpu_end();	\
+	preempt_enable();	\
+}
+
+#elif defined(HAVE_KERNEL_FPU)
+#define	kfpu_begin()		kernel_fpu_begin()
+#define	kfpu_end()		kernel_fpu_end()
+
+#else
+/*
+ * This case is unreachable.  When KERNEL_EXPORTS_X86_FPU is defined then
+ * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined.
+ */
+#error "Unreachable kernel configuration"
+#endif
+
+#else /* defined(KERNEL_EXPORTS_X86_FPU) */
+
+/*
+ * When the kernel_fpu_* symbols are unavailable then provide our own
+ * versions which allow the FPU to be safely used.
+ */
+#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
+
+#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
+/*
+ * Some sanity checks.
+ * HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL are exclusive.
+ */
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+#error "HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL defined"
+#endif
+/*
+ * For kernels >= 5.16 we have to use inline assembly with the XSAVE{,OPT,S}
+ * instructions, so we need the toolchain to support at least XSAVE.
+ */
+#if !defined(HAVE_XSAVE)
+#error "Toolchain needs to support the XSAVE assembler instruction"
+#endif
+#endif
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+extern union fpregs_state **zfs_kfpu_fpregs;
+
+/*
+ * Initialize per-cpu variables to store FPU state.
+ */
+static inline void
+kfpu_fini(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (zfs_kfpu_fpregs[cpu] != NULL) {
+			free_pages((unsigned long)zfs_kfpu_fpregs[cpu],
+			    get_order(sizeof (union fpregs_state)));
+		}
+	}
+
+	kfree(zfs_kfpu_fpregs);
+}
+
+static inline int
+kfpu_init(void)
+{
+	zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
+	    sizeof (union fpregs_state *), GFP_KERNEL);
+	if (zfs_kfpu_fpregs == NULL)
+		return (-ENOMEM);
+
+	/*
+	 * The fxsave and xsave operations require 16-/64-byte alignment of
+	 * the target memory. Since kmalloc() provides no alignment
+	 * guarantee instead use alloc_pages_node().
+	 */
+	unsigned int order = get_order(sizeof (union fpregs_state));
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct page *page = alloc_pages_node(cpu_to_node(cpu),
+		    GFP_KERNEL | __GFP_ZERO, order);
+		if (page == NULL) {
+			kfpu_fini();
+			return (-ENOMEM);
+		}
+
+		zfs_kfpu_fpregs[cpu] = page_address(page);
+	}
+
+	return (0);
+}
+
+#define	kfpu_allowed()		1
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+#define	ex_handler_fprestore	ex_handler_default
+#endif
+
+/*
+ * FPU save and restore instructions.
+ */
+#define	__asm			__asm__ __volatile__
+#define	kfpu_fxsave(addr)	__asm("fxsave %0" : "=m" (*(addr)))
+#define	kfpu_fxsaveq(addr)	__asm("fxsaveq %0" : "=m" (*(addr)))
+#define	kfpu_fnsave(addr)	__asm("fnsave %0; fwait" : "=m" (*(addr)))
+#define	kfpu_fxrstor(addr)	__asm("fxrstor %0" : : "m" (*(addr)))
+#define	kfpu_fxrstorq(addr)	__asm("fxrstorq %0" : : "m" (*(addr)))
+#define	kfpu_frstor(addr)	__asm("frstor %0" : : "m" (*(addr)))
+#define	kfpu_fxsr_clean(rval)	__asm("fnclex; emms; fildl %P[addr]" \
+				    : : [addr] "m" (rval));
+
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+static inline void
+kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+	int err;
+
+	low = mask;
+	hi = mask >> 32;
+	XSTATE_XSAVE(addr, low, hi, err);
+	WARN_ON_ONCE(err);
+}
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
+
+#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
+#define	kfpu_do_xsave(instruction, addr, mask)			\
+{								\
+	uint32_t low, hi;					\
+								\
+	low = mask;						\
+	hi = (uint64_t)(mask) >> 32;				\
+	__asm(instruction " %[dst]\n\t"				\
+	    :							\
+	    : [dst] "m" (*(addr)), "a" (low), "d" (hi)		\
+	    : "memory");					\
+}
+#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
+
+static inline void
+kfpu_save_fxsr(struct fxregs_state *addr)
+{
+	if (IS_ENABLED(CONFIG_X86_32))
+		kfpu_fxsave(addr);
+	else
+		kfpu_fxsaveq(addr);
+}
+
+static inline void
+kfpu_save_fsave(struct fregs_state *addr)
+{
+	kfpu_fnsave(addr);
+}
+
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+static inline void
+kfpu_begin(void)
+{
+	/*
+	 * Preemption and interrupts must be disabled for the critical
+	 * region where the FPU state is being modified.
+	 */
+	preempt_disable();
+	local_irq_disable();
+
+	/*
+	 * The current FPU registers need to be preserved by kfpu_begin()
+	 * and restored by kfpu_end().  They are stored in a dedicated
+	 * per-cpu variable, not in the task struct, this allows any user
+	 * FPU state to be correctly preserved and restored.
+	 */
+	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
+	if (static_cpu_has(X86_FEATURE_XSAVE)) {
+		kfpu_save_xsave(&state->xsave, ~0);
+	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
+		kfpu_save_fxsr(&state->fxsave);
+	} else {
+		kfpu_save_fsave(&state->fsave);
+	}
+}
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
+
+#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
+static inline void
+kfpu_begin(void)
+{
+	/*
+	 * Preemption and interrupts must be disabled for the critical
+	 * region where the FPU state is being modified.
+	 */
+	preempt_disable();
+	local_irq_disable();
+
+	/*
+	 * The current FPU registers need to be preserved by kfpu_begin()
+	 * and restored by kfpu_end().  They are stored in a dedicated
+	 * per-cpu variable, not in the task struct, this allows any user
+	 * FPU state to be correctly preserved and restored.
+	 */
+	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
+#if defined(HAVE_XSAVES)
+	if (static_cpu_has(X86_FEATURE_XSAVES)) {
+		kfpu_do_xsave("xsaves", &state->xsave, ~0);
+		return;
+	}
+#endif
+#if defined(HAVE_XSAVEOPT)
+	if (static_cpu_has(X86_FEATURE_XSAVEOPT)) {
+		kfpu_do_xsave("xsaveopt", &state->xsave, ~0);
+		return;
+	}
+#endif
+	if (static_cpu_has(X86_FEATURE_XSAVE)) {
+		kfpu_do_xsave("xsave", &state->xsave, ~0);
+	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
+		kfpu_save_fxsr(&state->fxsave);
+	} else {
+		kfpu_save_fsave(&state->fsave);
+	}
+}
+#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
+
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+static inline void
+kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	XSTATE_XRESTORE(addr, low, hi);
+}
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
+
+#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
+#define	kfpu_do_xrstor(instruction, addr, mask)			\
+{								\
+	uint32_t low, hi;					\
+								\
+	low = mask;						\
+	hi = (uint64_t)(mask) >> 32;				\
+	__asm(instruction " %[src]"				\
+	    :							\
+	    : [src] "m" (*(addr)), "a" (low), "d" (hi)		\
+	    : "memory");					\
+}
+#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
+
+static inline void
+kfpu_restore_fxsr(struct fxregs_state *addr)
+{
+	/*
+	 * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
+	 * restores the _x87 FOP, FIP, and FDP registers when an exception
+	 * is pending.  Clean the _x87 state to force the restore.
+	 */
+	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
+		kfpu_fxsr_clean(addr);
+
+	if (IS_ENABLED(CONFIG_X86_32)) {
+		kfpu_fxrstor(addr);
+	} else {
+		kfpu_fxrstorq(addr);
+	}
+}
+
+static inline void
+kfpu_restore_fsave(struct fregs_state *addr)
+{
+	kfpu_frstor(addr);
+}
+
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+static inline void
+kfpu_end(void)
+{
+	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
+
+	if (static_cpu_has(X86_FEATURE_XSAVE)) {
+		kfpu_restore_xsave(&state->xsave, ~0);
+	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
+		kfpu_restore_fxsr(&state->fxsave);
+	} else {
+		kfpu_restore_fsave(&state->fsave);
+	}
+
+	local_irq_enable();
+	preempt_enable();
+}
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
+
+#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
+static inline void
+kfpu_end(void)
+{
+	union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];
+#if defined(HAVE_XSAVES)
+	if (static_cpu_has(X86_FEATURE_XSAVES)) {
+		kfpu_do_xrstor("xrstors", &state->xsave, ~0);
+		goto out;
+	}
+#endif
+	if (static_cpu_has(X86_FEATURE_XSAVE)) {
+		kfpu_do_xrstor("xrstor", &state->xsave, ~0);
+	} else if (static_cpu_has(X86_FEATURE_FXSR)) {
+		kfpu_restore_fxsr(&state->fxsave);
+	} else {
+		kfpu_restore_fsave(&state->fsave);
+	}
+out:
+	local_irq_enable();
+	preempt_enable();
+
+}
+#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
+
+#else
+
+/*
+ * FPU support is unavailable.
+ */
+#define	kfpu_allowed()		0
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+#define	kfpu_init()		0
+#define	kfpu_fini()		((void) 0)
+
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL) */
+#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
+
+/*
+ * Linux kernel provides an interface for CPU feature testing.
+ */
+
+/*
+ * Detect register set support
+ */
+static inline boolean_t
+__simd_state_enabled(const uint64_t state)
+{
+	boolean_t has_osxsave;
+	uint64_t xcr0;
+
+#if defined(X86_FEATURE_OSXSAVE)
+	has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
+#else
+	has_osxsave = B_FALSE;
+#endif
+	if (!has_osxsave)
+		return (B_FALSE);
+
+	xcr0 = xgetbv(0);
+	return ((xcr0 & state) == state);
+}
+
+#define	_XSTATE_SSE_AVX		(0x2 | 0x4)
+#define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
+
+#define	__ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
+#define	__zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
+
+/*
+ * Check if SSE instruction set is available
+ */
+static inline boolean_t
+zfs_sse_available(void)
+{
+	return (!!boot_cpu_has(X86_FEATURE_XMM));
+}
+
+/*
+ * Check if SSE2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse2_available(void)
+{
+	return (!!boot_cpu_has(X86_FEATURE_XMM2));
+}
+
+/*
+ * Check if SSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_sse3_available(void)
+{
+	return (!!boot_cpu_has(X86_FEATURE_XMM3));
+}
+
+/*
+ * Check if SSSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_ssse3_available(void)
+{
+	return (!!boot_cpu_has(X86_FEATURE_SSSE3));
+}
+
+/*
+ * Check if SSE4.1 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_1_available(void)
+{
+	return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
+}
+
+/*
+ * Check if SSE4.2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_2_available(void)
+{
+	return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
+}
+
+/*
+ * Check if AVX instruction set is available
+ */
+static inline boolean_t
+zfs_avx_available(void)
+{
+	return (boot_cpu_has(X86_FEATURE_AVX) && __ymm_enabled());
+}
+
+/*
+ * Check if AVX2 instruction set is available
+ */
+static inline boolean_t
+zfs_avx2_available(void)
+{
+	return (boot_cpu_has(X86_FEATURE_AVX2) && __ymm_enabled());
+}
+
+/*
+ * Check if BMI1 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi1_available(void)
+{
+#if defined(X86_FEATURE_BMI1)
+	return (!!boot_cpu_has(X86_FEATURE_BMI1));
+#else
+	return (B_FALSE);
+#endif
+}
+
+/*
+ * Check if BMI2 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi2_available(void)
+{
+#if defined(X86_FEATURE_BMI2)
+	return (!!boot_cpu_has(X86_FEATURE_BMI2));
+#else
+	return (B_FALSE);
+#endif
+}
+
+/*
+ * Check if AES instruction set is available
+ */
+static inline boolean_t
+zfs_aes_available(void)
+{
+#if defined(X86_FEATURE_AES)
+	return (!!boot_cpu_has(X86_FEATURE_AES));
+#else
+	return (B_FALSE);
+#endif
+}
+
+/*
+ * Check if PCLMULQDQ instruction set is available
+ */
+static inline boolean_t
+zfs_pclmulqdq_available(void)
+{
+#if defined(X86_FEATURE_PCLMULQDQ)
+	return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
+#else
+	return (B_FALSE);
+#endif
+}
+
+/*
+ * Check if MOVBE instruction is available
+ */
+static inline boolean_t
+zfs_movbe_available(void)
+{
+#if defined(X86_FEATURE_MOVBE)
+	return (!!boot_cpu_has(X86_FEATURE_MOVBE));
+#else
+	return (B_FALSE);
+#endif
+}
+
+/*
+ * AVX-512 family of instruction sets:
+ *
+ * AVX512F	Foundation
+ * AVX512CD	Conflict Detection Instructions
+ * AVX512ER	Exponential and Reciprocal Instructions
+ * AVX512PF	Prefetch Instructions
+ *
+ * AVX512BW	Byte and Word Instructions
+ * AVX512DQ	Double-word and Quadword Instructions
+ * AVX512VL	Vector Length Extensions
+ *
+ * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
+ * AVX512VBMI	Vector Byte Manipulation Instructions
+ */
+
+/*
+ * Check if AVX512F instruction set is available
+ */
+static inline boolean_t
+zfs_avx512f_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512F)
+	has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512CD instruction set is available
+ */
+static inline boolean_t
+zfs_avx512cd_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512CD)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512CD);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512ER instruction set is available
+ */
+static inline boolean_t
+zfs_avx512er_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512ER)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512ER);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512PF instruction set is available
+ */
+static inline boolean_t
+zfs_avx512pf_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512PF)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512PF);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512BW instruction set is available
+ */
+static inline boolean_t
+zfs_avx512bw_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512BW)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512BW);
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512DQ instruction set is available
+ */
+static inline boolean_t
+zfs_avx512dq_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512DQ)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512DQ);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512VL instruction set is available
+ */
+static inline boolean_t
+zfs_avx512vl_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512VL)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512VL);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512IFMA instruction set is available
+ */
+static inline boolean_t
+zfs_avx512ifma_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512IFMA)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512IFMA);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512VBMI instruction set is available
+ */
+static inline boolean_t
+zfs_avx512vbmi_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(X86_FEATURE_AVX512VBMI)
+	has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512VBMI);
+#endif
+	return (has_avx512 && __zmm_enabled());
+}
+
+#endif /* defined(__x86) */
+
+#endif /* _LINUX_SIMD_X86_H */

diff --git a/zfs/include/linux/utsname_compat.h b/zfs/include/os/linux/kernel/linux/utsname_compat.h
similarity index 100%
rename from zfs/include/linux/utsname_compat.h
rename to zfs/include/os/linux/kernel/linux/utsname_compat.h


diff --git a/zfs/include/os/linux/kernel/linux/vfs_compat.h b/zfs/include/os/linux/kernel/linux/vfs_compat.h
new file mode 100644
index 0000000..e82bbf7
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/vfs_compat.h

@@ -0,0 +1,478 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Copyright (C) 2015 Jörg Thalheim.
+ */
+
+#ifndef _ZFS_VFS_H
+#define	_ZFS_VFS_H
+
+#include <sys/taskq.h>
+#include <sys/cred.h>
+#include <linux/backing-dev.h>
+#include <linux/compat.h>
+
+/*
+ * 2.6.34 - 3.19, bdi_setup_and_register() takes 3 arguments.
+ * 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments.
+ * 4.12 - x.y, super_setup_bdi_name() new interface.
+ */
+#if defined(HAVE_SUPER_SETUP_BDI_NAME)
+extern atomic_long_t zfs_bdi_seq;
+
+static inline int
+zpl_bdi_setup(struct super_block *sb, char *name)
+{
+	return super_setup_bdi_name(sb, "%.28s-%ld", name,
+	    atomic_long_inc_return(&zfs_bdi_seq));
+}
+static inline void
+zpl_bdi_destroy(struct super_block *sb)
+{
+}
+#elif defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER)
+static inline int
+zpl_bdi_setup(struct super_block *sb, char *name)
+{
+	struct backing_dev_info *bdi;
+	int error;
+
+	bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP);
+	error = bdi_setup_and_register(bdi, name);
+	if (error) {
+		kmem_free(bdi, sizeof (struct backing_dev_info));
+		return (error);
+	}
+
+	sb->s_bdi = bdi;
+
+	return (0);
+}
+static inline void
+zpl_bdi_destroy(struct super_block *sb)
+{
+	struct backing_dev_info *bdi = sb->s_bdi;
+
+	bdi_destroy(bdi);
+	kmem_free(bdi, sizeof (struct backing_dev_info));
+	sb->s_bdi = NULL;
+}
+#elif defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER)
+static inline int
+zpl_bdi_setup(struct super_block *sb, char *name)
+{
+	struct backing_dev_info *bdi;
+	int error;
+
+	bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP);
+	error = bdi_setup_and_register(bdi, name, BDI_CAP_MAP_COPY);
+	if (error) {
+		kmem_free(sb->s_bdi, sizeof (struct backing_dev_info));
+		return (error);
+	}
+
+	sb->s_bdi = bdi;
+
+	return (0);
+}
+static inline void
+zpl_bdi_destroy(struct super_block *sb)
+{
+	struct backing_dev_info *bdi = sb->s_bdi;
+
+	bdi_destroy(bdi);
+	kmem_free(bdi, sizeof (struct backing_dev_info));
+	sb->s_bdi = NULL;
+}
+#else
+#error "Unsupported kernel"
+#endif
+
+/*
+ * 4.14 adds SB_* flag definitions, define them to MS_* equivalents
+ * if not set.
+ */
+#ifndef	SB_RDONLY
+#define	SB_RDONLY	MS_RDONLY
+#endif
+
+#ifndef	SB_SILENT
+#define	SB_SILENT	MS_SILENT
+#endif
+
+#ifndef	SB_ACTIVE
+#define	SB_ACTIVE	MS_ACTIVE
+#endif
+
+#ifndef	SB_POSIXACL
+#define	SB_POSIXACL	MS_POSIXACL
+#endif
+
+#ifndef	SB_MANDLOCK
+#define	SB_MANDLOCK	MS_MANDLOCK
+#endif
+
+#ifndef	SB_NOATIME
+#define	SB_NOATIME	MS_NOATIME
+#endif
+
+/*
+ * 3.5 API change,
+ * The clear_inode() function replaces end_writeback() and introduces an
+ * ordering change regarding when the inode_sync_wait() occurs.  See the
+ * configure check in config/kernel-clear-inode.m4 for full details.
+ */
+#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE)
+#define	clear_inode(ip)		end_writeback(ip)
+#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */
+
+#if defined(SEEK_HOLE) && defined(SEEK_DATA) && !defined(HAVE_LSEEK_EXECUTE)
+static inline loff_t
+lseek_execute(
+	struct file *filp,
+	struct inode *inode,
+	loff_t offset,
+	loff_t maxsize)
+{
+	if (offset < 0 && !(filp->f_mode & FMODE_UNSIGNED_OFFSET))
+		return (-EINVAL);
+
+	if (offset > maxsize)
+		return (-EINVAL);
+
+	if (offset != filp->f_pos) {
+		spin_lock(&filp->f_lock);
+		filp->f_pos = offset;
+		filp->f_version = 0;
+		spin_unlock(&filp->f_lock);
+	}
+
+	return (offset);
+}
+#endif /* SEEK_HOLE && SEEK_DATA && !HAVE_LSEEK_EXECUTE */
+
+#if defined(CONFIG_FS_POSIX_ACL)
+/*
+ * These functions safely approximates the behavior of posix_acl_release()
+ * which cannot be used because it calls the GPL-only symbol kfree_rcu().
+ * The in-kernel version, which can access the RCU, frees the ACLs after
+ * the grace period expires.  Because we're unsure how long that grace
+ * period may be this implementation conservatively delays for 60 seconds.
+ * This is several orders of magnitude larger than expected grace period.
+ * At 60 seconds the kernel will also begin issuing RCU stall warnings.
+ */
+
+#include <linux/posix_acl.h>
+
+#if defined(HAVE_POSIX_ACL_RELEASE) && !defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
+#define	zpl_posix_acl_release(arg)		posix_acl_release(arg)
+#else
+void zpl_posix_acl_release_impl(struct posix_acl *);
+
+static inline void
+zpl_posix_acl_release(struct posix_acl *acl)
+{
+	if ((acl == NULL) || (acl == ACL_NOT_CACHED))
+		return;
+#ifdef HAVE_ACL_REFCOUNT
+	if (refcount_dec_and_test(&acl->a_refcount))
+		zpl_posix_acl_release_impl(acl);
+#else
+	if (atomic_dec_and_test(&acl->a_refcount))
+		zpl_posix_acl_release_impl(acl);
+#endif
+}
+#endif /* HAVE_POSIX_ACL_RELEASE */
+
+#ifdef HAVE_SET_CACHED_ACL_USABLE
+#define	zpl_set_cached_acl(ip, ty, n)		set_cached_acl(ip, ty, n)
+#define	zpl_forget_cached_acl(ip, ty)		forget_cached_acl(ip, ty)
+#else
+static inline void
+zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer)
+{
+	struct posix_acl *older = NULL;
+
+	spin_lock(&ip->i_lock);
+
+	if ((newer != ACL_NOT_CACHED) && (newer != NULL))
+		posix_acl_dup(newer);
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		older = ip->i_acl;
+		rcu_assign_pointer(ip->i_acl, newer);
+		break;
+	case ACL_TYPE_DEFAULT:
+		older = ip->i_default_acl;
+		rcu_assign_pointer(ip->i_default_acl, newer);
+		break;
+	}
+
+	spin_unlock(&ip->i_lock);
+
+	zpl_posix_acl_release(older);
+}
+
+static inline void
+zpl_forget_cached_acl(struct inode *ip, int type)
+{
+	zpl_set_cached_acl(ip, type, (struct posix_acl *)ACL_NOT_CACHED);
+}
+#endif /* HAVE_SET_CACHED_ACL_USABLE */
+
+/*
+ * 3.1 API change,
+ * posix_acl_chmod() was added as the preferred interface.
+ *
+ * 3.14 API change,
+ * posix_acl_chmod() was changed to __posix_acl_chmod()
+ */
+#ifndef HAVE___POSIX_ACL_CHMOD
+#ifdef HAVE_POSIX_ACL_CHMOD
+#define	__posix_acl_chmod(acl, gfp, mode)	posix_acl_chmod(acl, gfp, mode)
+#define	__posix_acl_create(acl, gfp, mode)	posix_acl_create(acl, gfp, mode)
+#else
+#error "Unsupported kernel"
+#endif /* HAVE_POSIX_ACL_CHMOD */
+#endif /* HAVE___POSIX_ACL_CHMOD */
+
+/*
+ * 4.8 API change,
+ * posix_acl_valid() now must be passed a namespace, the namespace from
+ * from super block associated with the given inode is used for this purpose.
+ */
+#ifdef HAVE_POSIX_ACL_VALID_WITH_NS
+#define	zpl_posix_acl_valid(ip, acl)  posix_acl_valid(ip->i_sb->s_user_ns, acl)
+#else
+#define	zpl_posix_acl_valid(ip, acl)  posix_acl_valid(acl)
+#endif
+
+#endif /* CONFIG_FS_POSIX_ACL */
+
+/*
+ * 3.19 API change
+ * struct access f->f_dentry->d_inode was replaced by accessor function
+ * file_inode(f)
+ */
+#ifndef HAVE_FILE_INODE
+static inline struct inode *file_inode(const struct file *f)
+{
+	return (f->f_dentry->d_inode);
+}
+#endif /* HAVE_FILE_INODE */
+
+/*
+ * 4.1 API change
+ * struct access file->f_path.dentry was replaced by accessor function
+ * file_dentry(f)
+ */
+#ifndef HAVE_FILE_DENTRY
+static inline struct dentry *file_dentry(const struct file *f)
+{
+	return (f->f_path.dentry);
+}
+#endif /* HAVE_FILE_DENTRY */
+
+static inline uid_t zfs_uid_read_impl(struct inode *ip)
+{
+#ifdef HAVE_SUPER_USER_NS
+	return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid));
+#else
+	return (from_kuid(kcred->user_ns, ip->i_uid));
+#endif
+}
+
+static inline uid_t zfs_uid_read(struct inode *ip)
+{
+	return (zfs_uid_read_impl(ip));
+}
+
+static inline gid_t zfs_gid_read_impl(struct inode *ip)
+{
+#ifdef HAVE_SUPER_USER_NS
+	return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid));
+#else
+	return (from_kgid(kcred->user_ns, ip->i_gid));
+#endif
+}
+
+static inline gid_t zfs_gid_read(struct inode *ip)
+{
+	return (zfs_gid_read_impl(ip));
+}
+
+static inline void zfs_uid_write(struct inode *ip, uid_t uid)
+{
+#ifdef HAVE_SUPER_USER_NS
+	ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid);
+#else
+	ip->i_uid = make_kuid(kcred->user_ns, uid);
+#endif
+}
+
+static inline void zfs_gid_write(struct inode *ip, gid_t gid)
+{
+#ifdef HAVE_SUPER_USER_NS
+	ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid);
+#else
+	ip->i_gid = make_kgid(kcred->user_ns, gid);
+#endif
+}
+
+/*
+ * 4.9 API change
+ */
+#if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \
+    defined(HAVE_SETATTR_PREPARE_USERNS) || \
+    defined(HAVE_SETATTR_PREPARE_IDMAP))
+static inline int
+setattr_prepare(struct dentry *dentry, struct iattr *ia)
+{
+	return (inode_change_ok(dentry->d_inode, ia));
+}
+#endif
+
+/*
+ * 4.11 API change
+ * These macros are defined by kernel 4.11.  We define them so that the same
+ * code builds under kernels < 4.11 and >= 4.11.  The macros are set to 0 so
+ * that it will create obvious failures if they are accidentally used when built
+ * against a kernel >= 4.11.
+ */
+
+#ifndef STATX_BASIC_STATS
+#define	STATX_BASIC_STATS	0
+#endif
+
+#ifndef AT_STATX_SYNC_AS_STAT
+#define	AT_STATX_SYNC_AS_STAT	0
+#endif
+
+/*
+ * 4.11 API change
+ * 4.11 takes struct path *, < 4.11 takes vfsmount *
+ */
+
+#ifdef HAVE_VFSMOUNT_IOPS_GETATTR
+#define	ZPL_GETATTR_WRAPPER(func)					\
+static int								\
+func(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)	\
+{									\
+	struct path path = { .mnt = mnt, .dentry = dentry };		\
+	return func##_impl(&path, stat, STATX_BASIC_STATS,		\
+	    AT_STATX_SYNC_AS_STAT);					\
+}
+#elif defined(HAVE_PATH_IOPS_GETATTR)
+#define	ZPL_GETATTR_WRAPPER(func)					\
+static int								\
+func(const struct path *path, struct kstat *stat, u32 request_mask,	\
+    unsigned int query_flags)						\
+{									\
+	return (func##_impl(path, stat, request_mask, query_flags));	\
+}
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
+#define	ZPL_GETATTR_WRAPPER(func)					\
+static int								\
+func(struct user_namespace *user_ns, const struct path *path,	\
+    struct kstat *stat, u32 request_mask, unsigned int query_flags)	\
+{									\
+	return (func##_impl(user_ns, path, stat, request_mask, \
+	    query_flags));	\
+}
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+#define	ZPL_GETATTR_WRAPPER(func)					\
+static int								\
+func(struct mnt_idmap *user_ns, const struct path *path,	\
+    struct kstat *stat, u32 request_mask, unsigned int query_flags)	\
+{									\
+	return (func##_impl(user_ns, path, stat, request_mask,	\
+	    query_flags));	\
+}
+#else
+#error
+#endif
+
+/*
+ * 4.9 API change
+ * Preferred interface to get the current FS time.
+ */
+#if !defined(HAVE_CURRENT_TIME)
+static inline struct timespec
+current_time(struct inode *ip)
+{
+	return (timespec_trunc(current_kernel_time(), ip->i_sb->s_time_gran));
+}
+#endif
+
+/*
+ * 4.16 API change
+ * Added iversion interface for managing inode version field.
+ */
+#ifdef HAVE_INODE_SET_IVERSION
+#include <linux/iversion.h>
+#else
+static inline void
+inode_set_iversion(struct inode *ip, u64 val)
+{
+	ip->i_version = val;
+}
+#endif
+
+/*
+ * Returns true when called in the context of a 32-bit system call.
+ */
+static inline int
+zpl_is_32bit_api(void)
+{
+#ifdef CONFIG_COMPAT
+#ifdef HAVE_IN_COMPAT_SYSCALL
+	return (in_compat_syscall());
+#else
+	return (is_compat_task());
+#endif
+#else
+	return (BITS_PER_LONG == 32);
+#endif
+}
+
+/*
+ * 5.12 API change
+ * To support id-mapped mounts, generic_fillattr() was modified to
+ * accept a new struct user_namespace* as its first arg.
+ *
+ * 6.3 API change
+ * generic_fillattr() first arg is changed to struct mnt_idmap *
+ *
+ */
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP
+#define	zpl_generic_fillattr(idmap, ip, sp)	\
+    generic_fillattr(idmap, ip, sp)
+#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
+#define	zpl_generic_fillattr(user_ns, ip, sp)	\
+    generic_fillattr(user_ns, ip, sp)
+#else
+#define	zpl_generic_fillattr(user_ns, ip, sp)	generic_fillattr(ip, sp)
+#endif
+
+#endif /* _ZFS_VFS_H */

diff --git a/zfs/include/os/linux/kernel/linux/xattr_compat.h b/zfs/include/os/linux/kernel/linux/xattr_compat.h
new file mode 100644
index 0000000..3ffd001
--- /dev/null
+++ b/zfs/include/os/linux/kernel/linux/xattr_compat.h

@@ -0,0 +1,230 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _ZFS_XATTR_H
+#define	_ZFS_XATTR_H
+
+#include <linux/posix_acl_xattr.h>
+
+/*
+ * 2.6.35 API change,
+ * The const keyword was added to the 'struct xattr_handler' in the
+ * generic Linux super_block structure.  To handle this we define an
+ * appropriate xattr_handler_t typedef which can be used.  This was
+ * the preferred solution because it keeps the code clean and readable.
+ */
+typedef const struct xattr_handler	xattr_handler_t;
+
+/*
+ * 4.5 API change,
+ */
+#if defined(HAVE_XATTR_LIST_SIMPLE)
+#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
+static bool								\
+fn(struct dentry *dentry)						\
+{									\
+	return (!!__ ## fn(dentry->d_inode, NULL, 0, NULL, 0));		\
+}
+/*
+ * 4.4 API change,
+ */
+#elif defined(HAVE_XATTR_LIST_DENTRY)
+#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
+static size_t								\
+fn(struct dentry *dentry, char *list, size_t list_size,			\
+    const char *name, size_t name_len, int type)			\
+{									\
+	return (__ ## fn(dentry->d_inode,				\
+	    list, list_size, name, name_len));				\
+}
+/*
+ * 2.6.33 API change,
+ */
+#elif defined(HAVE_XATTR_LIST_HANDLER)
+#define	ZPL_XATTR_LIST_WRAPPER(fn)					\
+static size_t								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    char *list, size_t list_size, const char *name, size_t name_len)	\
+{									\
+	return (__ ## fn(dentry->d_inode,				\
+	    list, list_size, name, name_len));				\
+}
+#else
+#error "Unsupported kernel"
+#endif
+
+/*
+ * 4.7 API change,
+ * The xattr_handler->get() callback was changed to take a both dentry and
+ * inode, because the dentry might not be attached to an inode yet.
+ */
+#if defined(HAVE_XATTR_GET_DENTRY_INODE)
+#define	ZPL_XATTR_GET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    struct inode *inode, const char *name, void *buffer, size_t size)	\
+{									\
+	return (__ ## fn(inode, name, buffer, size));			\
+}
+/*
+ * 4.4 API change,
+ * The xattr_handler->get() callback was changed to take a xattr_handler,
+ * and handler_flags argument was removed and should be accessed by
+ * handler->flags.
+ */
+#elif defined(HAVE_XATTR_GET_HANDLER)
+#define	ZPL_XATTR_GET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    const char *name, void *buffer, size_t size)			\
+{									\
+	return (__ ## fn(dentry->d_inode, name, buffer, size));		\
+}
+/*
+ * 2.6.33 API change,
+ * The xattr_handler->get() callback was changed to take a dentry
+ * instead of an inode, and a handler_flags argument was added.
+ */
+#elif defined(HAVE_XATTR_GET_DENTRY)
+#define	ZPL_XATTR_GET_WRAPPER(fn)					\
+static int								\
+fn(struct dentry *dentry, const char *name, void *buffer, size_t size,	\
+    int unused_handler_flags)						\
+{									\
+	return (__ ## fn(dentry->d_inode, name, buffer, size));		\
+}
+/*
+ * Android API change,
+ * The xattr_handler->get() callback was changed to take a dentry and inode
+ * and flags, because the dentry might not be attached to an inode yet.
+ */
+#elif defined(HAVE_XATTR_GET_DENTRY_INODE_FLAGS)
+#define	ZPL_XATTR_GET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    struct inode *inode, const char *name, void *buffer,		\
+    size_t size, int flags)						\
+{									\
+	return (__ ## fn(inode, name, buffer, size));			\
+}
+#else
+#error "Unsupported kernel"
+#endif
+
+/*
+ * 6.3 API change,
+ * The xattr_handler->set() callback was changed to take the
+ * struct mnt_idmap* as the first arg, to support idmapped
+ * mounts.
+ */
+#if defined(HAVE_XATTR_SET_IDMAP)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct mnt_idmap *user_ns,	\
+    struct dentry *dentry, struct inode *inode, const char *name,	\
+    const void *buffer, size_t size, int flags)	\
+{									\
+	return (__ ## fn(user_ns, inode, name, buffer, size, flags));	\
+}
+/*
+ * 5.12 API change,
+ * The xattr_handler->set() callback was changed to take the
+ * struct user_namespace* as the first arg, to support idmapped
+ * mounts.
+ */
+#elif defined(HAVE_XATTR_SET_USERNS)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \
+    struct dentry *dentry, struct inode *inode, const char *name,	\
+    const void *buffer, size_t size, int flags)	\
+{									\
+	return (__ ## fn(user_ns, inode, name, buffer, size, flags));	\
+}
+/*
+ * 4.7 API change,
+ * The xattr_handler->set() callback was changed to take a both dentry and
+ * inode, because the dentry might not be attached to an inode yet.
+ */
+#elif defined(HAVE_XATTR_SET_DENTRY_INODE)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    struct inode *inode, const char *name, const void *buffer,		\
+    size_t size, int flags)						\
+{									\
+	return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\
+}
+/*
+ * 4.4 API change,
+ * The xattr_handler->set() callback was changed to take a xattr_handler,
+ * and handler_flags argument was removed and should be accessed by
+ * handler->flags.
+ */
+#elif defined(HAVE_XATTR_SET_HANDLER)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    const char *name, const void *buffer, size_t size, int flags)	\
+{									\
+	return (__ ## fn(kcred->user_ns, dentry->d_inode, name,		\
+	    buffer, size, flags));					\
+}
+/*
+ * 2.6.33 API change,
+ * The xattr_handler->set() callback was changed to take a dentry
+ * instead of an inode, and a handler_flags argument was added.
+ */
+#elif defined(HAVE_XATTR_SET_DENTRY)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(struct dentry *dentry, const char *name, const void *buffer,		\
+    size_t size, int flags, int unused_handler_flags)			\
+{									\
+	return (__ ## fn(kcred->user_ns, dentry->d_inode, name, buffer, \
+	    size, flags));						\
+}
+#else
+#error "Unsupported kernel"
+#endif
+
+/*
+ * Linux 3.7 API change. posix_acl_{from,to}_xattr gained the user_ns
+ * parameter.  All callers are expected to pass the &init_user_ns which
+ * is available through the init credential (kcred).
+ */
+static inline struct posix_acl *
+zpl_acl_from_xattr(const void *value, int size)
+{
+	return (posix_acl_from_xattr(kcred->user_ns, value, size));
+}
+
+static inline int
+zpl_acl_to_xattr(struct posix_acl *acl, void *value, int size)
+{
+	return (posix_acl_to_xattr(kcred->user_ns, acl, value, size));
+}
+
+#endif /* _ZFS_XATTR_H */

diff --git a/zfs/include/spl/Makefile.am b/zfs/include/os/linux/spl/Makefile.am
similarity index 100%
rename from zfs/include/spl/Makefile.am
rename to zfs/include/os/linux/spl/Makefile.am


diff --git a/zfs/include/os/linux/spl/rpc/Makefile.am b/zfs/include/os/linux/spl/rpc/Makefile.am
new file mode 100644
index 0000000..13d804f
--- /dev/null
+++ b/zfs/include/os/linux/spl/rpc/Makefile.am

@@ -0,0 +1,7 @@
+KERNEL_H = \
+	xdr.h
+
+if CONFIG_KERNEL
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/spl/rpc
+kernel_HEADERS = $(KERNEL_H)
+endif

diff --git a/zfs/include/os/linux/spl/rpc/xdr.h b/zfs/include/os/linux/spl/rpc/xdr.h
new file mode 100644
index 0000000..c62080a
--- /dev/null
+++ b/zfs/include/os/linux/spl/rpc/xdr.h

@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2008 Sun Microsystems, Inc.
+ *  Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_RPC_XDR_H
+#define	_SPL_RPC_XDR_H
+
+#include <sys/types.h>
+
+typedef int bool_t;
+
+/*
+ * XDR enums and types.
+ */
+enum xdr_op {
+	XDR_ENCODE,
+	XDR_DECODE
+};
+
+struct xdr_ops;
+
+typedef struct {
+	struct xdr_ops	*x_ops;	/* Let caller know xdrmem_create() succeeds */
+	caddr_t		x_addr;	/* Current buffer addr */
+	caddr_t		x_addr_end;	/* End of the buffer */
+	enum xdr_op	x_op;	/* Stream direction */
+} XDR;
+
+typedef bool_t (*xdrproc_t)(XDR *xdrs, void *ptr);
+
+struct xdr_ops {
+	bool_t (*xdr_control)(XDR *, int, void *);
+
+	bool_t (*xdr_char)(XDR *, char *);
+	bool_t (*xdr_u_short)(XDR *, unsigned short *);
+	bool_t (*xdr_u_int)(XDR *, unsigned *);
+	bool_t (*xdr_u_longlong_t)(XDR *, u_longlong_t *);
+
+	bool_t (*xdr_opaque)(XDR *, caddr_t, const uint_t);
+	bool_t (*xdr_string)(XDR *, char **, const uint_t);
+	bool_t (*xdr_array)(XDR *, caddr_t *, uint_t *, const uint_t,
+	    const uint_t, const xdrproc_t);
+};
+
+/*
+ * XDR control operator.
+ */
+#define	XDR_GET_BYTES_AVAIL 1
+
+struct xdr_bytesrec {
+	bool_t xc_is_last_record;
+	size_t xc_num_avail;
+};
+
+/*
+ * XDR functions.
+ */
+void xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
+    const enum xdr_op op);
+
+/* Currently not needed. If needed later, we'll add it to struct xdr_ops */
+#define	xdr_destroy(xdrs) ((void) 0)
+
+#define	xdr_control(xdrs, req, info) \
+	(xdrs)->x_ops->xdr_control((xdrs), (req), (info))
+
+/*
+ * For precaution, the following are defined as static inlines instead of macros
+ * to get some amount of type safety.
+ *
+ * Also, macros wouldn't work in the case where typecasting is done, because it
+ * must be possible to reference the functions' addresses by these names.
+ */
+static inline bool_t xdr_char(XDR *xdrs, char *cp)
+{
+	return (xdrs->x_ops->xdr_char(xdrs, cp));
+}
+
+static inline bool_t xdr_u_short(XDR *xdrs, unsigned short *usp)
+{
+	return (xdrs->x_ops->xdr_u_short(xdrs, usp));
+}
+
+static inline bool_t xdr_short(XDR *xdrs, short *sp)
+{
+	BUILD_BUG_ON(sizeof (short) != 2);
+	return (xdrs->x_ops->xdr_u_short(xdrs, (unsigned short *) sp));
+}
+
+static inline bool_t xdr_u_int(XDR *xdrs, unsigned *up)
+{
+	return (xdrs->x_ops->xdr_u_int(xdrs, up));
+}
+
+static inline bool_t xdr_int(XDR *xdrs, int *ip)
+{
+	BUILD_BUG_ON(sizeof (int) != 4);
+	return (xdrs->x_ops->xdr_u_int(xdrs, (unsigned *)ip));
+}
+
+static inline bool_t xdr_u_longlong_t(XDR *xdrs, u_longlong_t *ullp)
+{
+	return (xdrs->x_ops->xdr_u_longlong_t(xdrs, ullp));
+}
+
+static inline bool_t xdr_longlong_t(XDR *xdrs, longlong_t *llp)
+{
+	BUILD_BUG_ON(sizeof (longlong_t) != 8);
+	return (xdrs->x_ops->xdr_u_longlong_t(xdrs, (u_longlong_t *)llp));
+}
+
+/*
+ * Fixed-length opaque data.
+ */
+static inline bool_t xdr_opaque(XDR *xdrs, caddr_t cp, const uint_t cnt)
+{
+	return (xdrs->x_ops->xdr_opaque(xdrs, cp, cnt));
+}
+
+/*
+ * Variable-length string.
+ * The *sp buffer must have (maxsize + 1) bytes.
+ */
+static inline bool_t xdr_string(XDR *xdrs, char **sp, const uint_t maxsize)
+{
+	return (xdrs->x_ops->xdr_string(xdrs, sp, maxsize));
+}
+
+/*
+ * Variable-length arrays.
+ */
+static inline bool_t xdr_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep,
+    const uint_t maxsize, const uint_t elsize, const xdrproc_t elproc)
+{
+	return xdrs->x_ops->xdr_array(xdrs, arrp, sizep, maxsize, elsize,
+	    elproc);
+}
+
+#endif /* SPL_RPC_XDR_H */

diff --git a/zfs/include/os/linux/spl/sys/Makefile.am b/zfs/include/os/linux/spl/sys/Makefile.am
new file mode 100644
index 0000000..450baff
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/Makefile.am

@@ -0,0 +1,65 @@
+KERNEL_H = \
+	acl.h \
+	atomic.h \
+	byteorder.h \
+	callb.h \
+	callo.h \
+	cmn_err.h \
+	condvar.h \
+	cred.h \
+	ctype.h \
+	debug.h \
+	disp.h \
+	dkio.h \
+	errno.h \
+	fcntl.h \
+	file.h \
+	inttypes.h \
+	isa_defs.h \
+	kmem_cache.h \
+	kmem.h \
+	kstat.h \
+	list.h \
+	misc.h \
+	mod_os.h \
+	mutex.h \
+	param.h \
+	processor.h \
+	proc.h \
+	procfs_list.h \
+	random.h \
+	rwlock.h \
+	shrinker.h \
+	sid.h \
+	signal.h \
+	simd.h \
+	stat.h \
+	strings.h \
+	sunddi.h \
+	sysmacros.h \
+	systeminfo.h \
+	taskq.h \
+	thread.h \
+	time.h \
+	timer.h \
+	trace.h \
+	trace_spl.h \
+	trace_taskq.h \
+	tsd.h \
+	types32.h \
+	types.h \
+	uio.h \
+	user.h \
+	vfs.h \
+	vmem.h \
+	vmsystm.h \
+	vnode.h \
+	wait.h \
+	wmsum.h \
+	zmod.h \
+	zone.h
+
+if CONFIG_KERNEL
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/spl/sys
+kernel_HEADERS = $(KERNEL_H)
+endif

diff --git a/zfs/include/os/linux/spl/sys/acl.h b/zfs/include/os/linux/spl/sys/acl.h
new file mode 100644
index 0000000..5a3d226
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/acl.h

@@ -0,0 +1,118 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_ACL_H
+#define	_SPL_ACL_H
+
+#include <sys/types.h>
+
+typedef struct ace {
+	uid_t a_who;
+	uint32_t a_access_mask;
+	uint16_t a_flags;
+	uint16_t a_type;
+} ace_t;
+
+typedef struct ace_object {
+	uid_t		a_who;		/* uid or gid */
+	uint32_t	a_access_mask;	/* read,write,... */
+	uint16_t	a_flags;	/* see below */
+	uint16_t	a_type;		/* allow or deny */
+	uint8_t		a_obj_type[16];	/* obj type */
+	uint8_t		a_inherit_obj_type[16];	/* inherit obj */
+} ace_object_t;
+
+#define	MAX_ACL_ENTRIES					1024
+
+#define	ACE_READ_DATA					0x00000001
+#define	ACE_LIST_DIRECTORY				0x00000001
+#define	ACE_WRITE_DATA					0x00000002
+#define	ACE_ADD_FILE					0x00000002
+#define	ACE_APPEND_DATA					0x00000004
+#define	ACE_ADD_SUBDIRECTORY				0x00000004
+#define	ACE_READ_NAMED_ATTRS				0x00000008
+#define	ACE_WRITE_NAMED_ATTRS				0x00000010
+#define	ACE_EXECUTE					0x00000020
+#define	ACE_DELETE_CHILD				0x00000040
+#define	ACE_READ_ATTRIBUTES				0x00000080
+#define	ACE_WRITE_ATTRIBUTES				0x00000100
+#define	ACE_DELETE					0x00010000
+#define	ACE_READ_ACL					0x00020000
+#define	ACE_WRITE_ACL					0x00040000
+#define	ACE_WRITE_OWNER					0x00080000
+#define	ACE_SYNCHRONIZE					0x00100000
+
+#define	ACE_FILE_INHERIT_ACE				0x0001
+#define	ACE_DIRECTORY_INHERIT_ACE			0x0002
+#define	ACE_NO_PROPAGATE_INHERIT_ACE			0x0004
+#define	ACE_INHERIT_ONLY_ACE				0x0008
+#define	ACE_SUCCESSFUL_ACCESS_ACE_FLAG			0x0010
+#define	ACE_FAILED_ACCESS_ACE_FLAG			0x0020
+#define	ACE_IDENTIFIER_GROUP				0x0040
+#define	ACE_INHERITED_ACE				0x0080
+#define	ACE_OWNER					0x1000
+#define	ACE_GROUP					0x2000
+#define	ACE_EVERYONE					0x4000
+
+#define	ACE_ACCESS_ALLOWED_ACE_TYPE			0x0000
+#define	ACE_ACCESS_DENIED_ACE_TYPE			0x0001
+#define	ACE_SYSTEM_AUDIT_ACE_TYPE			0x0002
+#define	ACE_SYSTEM_ALARM_ACE_TYPE			0x0003
+
+#define	ACL_AUTO_INHERIT				0x0001
+#define	ACL_PROTECTED					0x0002
+#define	ACL_DEFAULTED					0x0004
+#define	ACL_FLAGS_ALL	(ACL_AUTO_INHERIT|ACL_PROTECTED|ACL_DEFAULTED)
+
+#define	ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE		0x04
+#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE		0x05
+#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE		0x06
+#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE		0x07
+#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE		0x08
+#define	ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE		0x09
+#define	ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE		0x0A
+#define	ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE	0x0B
+#define	ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE	0x0C
+#define	ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE		0x0D
+#define	ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE		0x0E
+#define	ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE	0x0F
+#define	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE	0x10
+
+#define	ACE_ALL_TYPES	0x001F
+
+#define	ACE_TYPE_FLAGS	(ACE_OWNER|ACE_GROUP|ACE_EVERYONE|ACE_IDENTIFIER_GROUP)
+
+/* BEGIN CSTYLED */
+#define	ACE_ALL_PERMS	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
+     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
+     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
+     ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
+/* END CSTYLED */
+
+#define	VSA_ACE						0x0010
+#define	VSA_ACECNT					0x0020
+#define	VSA_ACE_ALLTYPES				0x0040
+#define	VSA_ACE_ACLFLAGS				0x0080
+
+#endif /* _SPL_ACL_H */

diff --git a/zfs/include/os/linux/spl/sys/atomic.h b/zfs/include/os/linux/spl/sys/atomic.h
new file mode 100644
index 0000000..8f7fa5a
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/atomic.h

@@ -0,0 +1,82 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_ATOMIC_H
+#define	_SPL_ATOMIC_H
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <sys/types.h>
+
+/*
+ * Map the atomic_* functions to the Linux counterparts.  This relies on the
+ * fact that the atomic types are internally really a uint32 or uint64.  If
+ * this were to change an alternate approach would be needed.
+ *
+ * N.B. Due to the limitations of the original API atomicity is not strictly
+ * preserved when using the 64-bit functions on a 32-bit system.  In order
+ * to support this all consumers would need to be updated to use the Linux
+ * provided atomic_t and atomic64_t types.
+ */
+#define	atomic_inc_32(v)	atomic_inc((atomic_t *)(v))
+#define	atomic_dec_32(v)	atomic_dec((atomic_t *)(v))
+#define	atomic_add_32(v, i)	atomic_add((i), (atomic_t *)(v))
+#define	atomic_sub_32(v, i)	atomic_sub((i), (atomic_t *)(v))
+#define	atomic_inc_32_nv(v)	atomic_inc_return((atomic_t *)(v))
+#define	atomic_dec_32_nv(v)	atomic_dec_return((atomic_t *)(v))
+#define	atomic_add_32_nv(v, i)	atomic_add_return((i), (atomic_t *)(v))
+#define	atomic_sub_32_nv(v, i)	atomic_sub_return((i), (atomic_t *)(v))
+#define	atomic_cas_32(v, x, y)	atomic_cmpxchg((atomic_t *)(v), x, y)
+#define	atomic_swap_32(v, x)	atomic_xchg((atomic_t *)(v), x)
+#define	atomic_load_32(v)	atomic_read((atomic_t *)(v))
+#define	atomic_store_32(v, x)	atomic_set((atomic_t *)(v), x)
+#define	atomic_inc_64(v)	atomic64_inc((atomic64_t *)(v))
+#define	atomic_dec_64(v)	atomic64_dec((atomic64_t *)(v))
+#define	atomic_add_64(v, i)	atomic64_add((i), (atomic64_t *)(v))
+#define	atomic_sub_64(v, i)	atomic64_sub((i), (atomic64_t *)(v))
+#define	atomic_inc_64_nv(v)	atomic64_inc_return((atomic64_t *)(v))
+#define	atomic_dec_64_nv(v)	atomic64_dec_return((atomic64_t *)(v))
+#define	atomic_add_64_nv(v, i)	atomic64_add_return((i), (atomic64_t *)(v))
+#define	atomic_sub_64_nv(v, i)	atomic64_sub_return((i), (atomic64_t *)(v))
+#define	atomic_cas_64(v, x, y)	atomic64_cmpxchg((atomic64_t *)(v), x, y)
+#define	atomic_swap_64(v, x)	atomic64_xchg((atomic64_t *)(v), x)
+#define	atomic_load_64(v)	atomic64_read((atomic64_t *)(v))
+#define	atomic_store_64(v, x)	atomic64_set((atomic64_t *)(v), x)
+
+#ifdef _LP64
+static __inline__ void *
+atomic_cas_ptr(volatile void *target,  void *cmp, void *newval)
+{
+	return ((void *)atomic_cas_64((volatile uint64_t *)target,
+	    (uint64_t)cmp, (uint64_t)newval));
+}
+#else /* _LP64 */
+static __inline__ void *
+atomic_cas_ptr(volatile void *target,  void *cmp, void *newval)
+{
+	return ((void *)atomic_cas_32((volatile uint32_t *)target,
+	    (uint32_t)cmp, (uint32_t)newval));
+}
+#endif /* _LP64 */
+
+#endif  /* _SPL_ATOMIC_H */

diff --git a/zfs/include/os/linux/spl/sys/byteorder.h b/zfs/include/os/linux/spl/sys/byteorder.h
new file mode 100644
index 0000000..bb5e173
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/byteorder.h

@@ -0,0 +1,86 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_BYTEORDER_H
+#define	_SPL_BYTEORDER_H
+
+#include <asm/byteorder.h>
+
+#if defined(__BIG_ENDIAN) && !defined(_ZFS_BIG_ENDIAN)
+#define	_ZFS_BIG_ENDIAN
+#endif
+
+#if defined(__LITTLE_ENDIAN) && !defined(_ZFS_LITTLE_ENDIAN)
+#define	_ZFS_LITTLE_ENDIAN
+#endif
+
+#include <sys/isa_defs.h>
+
+#define	BSWAP_8(x)	((x) & 0xff)
+#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+#define	LE_16(x)	cpu_to_le16(x)
+#define	LE_32(x)	cpu_to_le32(x)
+#define	LE_64(x)	cpu_to_le64(x)
+#define	BE_16(x)	cpu_to_be16(x)
+#define	BE_32(x)	cpu_to_be32(x)
+#define	BE_64(x)	cpu_to_be64(x)
+
+#define	BE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	BE_IN16(xa) \
+	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
+
+#define	BE_IN32(xa) \
+	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+
+#ifdef _ZFS_BIG_ENDIAN
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return (n);
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return (n);
+}
+#else
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
+}
+#endif
+
+#endif /* SPL_BYTEORDER_H */

diff --git a/zfs/include/os/linux/spl/sys/callb.h b/zfs/include/os/linux/spl/sys/callb.h
new file mode 100644
index 0000000..19ba41f
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/callb.h

@@ -0,0 +1,53 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_CALLB_H
+#define	_SPL_CALLB_H
+
+#include <linux/module.h>
+#include <sys/mutex.h>
+
+#define	CALLB_CPR_ASSERT(cp)		ASSERT(MUTEX_HELD((cp)->cc_lockp));
+
+typedef struct callb_cpr {
+	kmutex_t	*cc_lockp;
+} callb_cpr_t;
+
+#define	CALLB_CPR_INIT(cp, lockp, func, name)   {               \
+	(cp)->cc_lockp = lockp;                                 \
+}
+
+#define	CALLB_CPR_SAFE_BEGIN(cp) {                              \
+	CALLB_CPR_ASSERT(cp);					\
+}
+
+#define	CALLB_CPR_SAFE_END(cp, lockp) {                         \
+	CALLB_CPR_ASSERT(cp);					\
+}
+
+#define	CALLB_CPR_EXIT(cp) {                                    \
+	ASSERT(MUTEX_HELD((cp)->cc_lockp));                     \
+	mutex_exit((cp)->cc_lockp);                             \
+}
+
+#endif  /* _SPL_CALLB_H */

diff --git a/zfs/include/os/linux/spl/sys/callo.h b/zfs/include/os/linux/spl/sys/callo.h
new file mode 100644
index 0000000..e93a15f
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/callo.h

@@ -0,0 +1,51 @@
+/*
+ *  Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_CALLO_H
+#define	_SPL_CALLO_H
+
+/*
+ * Callout flags:
+ *
+ * CALLOUT_FLAG_ROUNDUP
+ *      Roundup the expiration time to the next resolution boundary.
+ *      If this flag is not specified, the expiration time is rounded down.
+ * CALLOUT_FLAG_ABSOLUTE
+ *      Normally, the expiration passed to the timeout API functions is an
+ *      expiration interval. If this flag is specified, then it is
+ *      interpreted as the expiration time itself.
+ * CALLOUT_FLAG_HRESTIME
+ *      Normally, callouts are not affected by changes to system time
+ *      (hrestime). This flag is used to create a callout that is affected
+ *      by system time. If system time changes, these timers must be
+ *      handled in a special way (see callout.c). These are used by condition
+ *      variables and LWP timers that need this behavior.
+ * CALLOUT_FLAG_32BIT
+ *      Legacy interfaces timeout() and realtime_timeout() pass this flag
+ *      to timeout_generic() to indicate that a 32-bit ID should be allocated.
+ */
+#define	CALLOUT_FLAG_ROUNDUP		0x1
+#define	CALLOUT_FLAG_ABSOLUTE		0x2
+#define	CALLOUT_FLAG_HRESTIME		0x4
+#define	CALLOUT_FLAG_32BIT		0x8
+
+#endif  /* _SPL_CALLB_H */

diff --git a/zfs/include/os/linux/spl/sys/cmn_err.h b/zfs/include/os/linux/spl/sys/cmn_err.h
new file mode 100644
index 0000000..f46efde
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/cmn_err.h

@@ -0,0 +1,45 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_CMN_ERR_H
+#define	_SPL_CMN_ERR_H
+
+#if defined(_KERNEL) && defined(HAVE_STANDALONE_LINUX_STDARG)
+#include <linux/stdarg.h>
+#else
+#include <stdarg.h>
+#endif
+
+#define	CE_CONT		0 /* continuation */
+#define	CE_NOTE		1 /* notice */
+#define	CE_WARN		2 /* warning */
+#define	CE_PANIC	3 /* panic */
+#define	CE_IGNORE	4 /* print nothing */
+
+extern void cmn_err(int, const char *, ...);
+extern void vcmn_err(int, const char *, va_list);
+extern void vpanic(const char *, va_list);
+
+#define	fm_panic	panic
+
+#endif /* SPL_CMN_ERR_H */

diff --git a/zfs/include/os/linux/spl/sys/condvar.h b/zfs/include/os/linux/spl/sys/condvar.h
new file mode 100644
index 0000000..ef40576
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/condvar.h

@@ -0,0 +1,119 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_CONDVAR_H
+#define	_SPL_CONDVAR_H
+
+#include <linux/module.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/callo.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+
+/*
+ * cv_timedwait() is similar to cv_wait() except that it additionally expects
+ * a timeout value specified in ticks.  When woken by cv_signal() or
+ * cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is
+ * returned.
+ *
+ * cv_timedwait_sig() behaves the same as cv_timedwait() but blocks
+ * interruptibly and can be woken by a signal (EINTR, ERESTART).  When
+ * this occurs 0 is returned.
+ *
+ * cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait()
+ * and cv_timedwait_sig() which should be used when waiting for outstanding
+ * IO to complete.  They are responsible for updating the iowait accounting
+ * when this is supported by the platform.
+ *
+ * cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution
+ * versions of cv_timedwait() and cv_timedwait_sig().  They expect the timeout
+ * to be specified as a hrtime_t allowing for timeouts of less than a tick.
+ *
+ * N.B. The return values differ slightly from the illumos implementation
+ * which returns the time remaining, instead of 1, when woken.  They both
+ * return -1 on timeout. Consumers which need to know the time remaining
+ * are responsible for tracking it themselves.
+ */
+
+
+/*
+ * The kcondvar_t struct is protected by mutex taken externally before
+ * calling any of the wait/signal funs, and passed into the wait funs.
+ */
+#define	CV_MAGIC			0x346545f4
+#define	CV_DESTROY			0x346545f5
+
+typedef struct {
+	int cv_magic;
+	spl_wait_queue_head_t cv_event;
+	spl_wait_queue_head_t cv_destroy;
+	atomic_t cv_refs;
+	atomic_t cv_waiters;
+	kmutex_t *cv_mutex;
+} kcondvar_t;
+
+typedef enum { CV_DEFAULT = 0, CV_DRIVER } kcv_type_t;
+
+extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *);
+extern void __cv_destroy(kcondvar_t *);
+extern void __cv_wait(kcondvar_t *, kmutex_t *);
+extern void __cv_wait_io(kcondvar_t *, kmutex_t *);
+extern void __cv_wait_idle(kcondvar_t *, kmutex_t *);
+extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *);
+extern int __cv_wait_sig(kcondvar_t *, kmutex_t *);
+extern int __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
+extern int __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t);
+extern int __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
+extern int __cv_timedwait_idle(kcondvar_t *, kmutex_t *, clock_t);
+extern int cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
+    hrtime_t res, int flag);
+extern int cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
+    hrtime_t res, int flag);
+extern int cv_timedwait_idle_hires(kcondvar_t *, kmutex_t *, hrtime_t,
+    hrtime_t res, int flag);
+extern void __cv_signal(kcondvar_t *);
+extern void __cv_broadcast(kcondvar_t *c);
+
+#define	cv_init(cvp, name, type, arg)		__cv_init(cvp, name, type, arg)
+#define	cv_destroy(cvp)				__cv_destroy(cvp)
+#define	cv_wait(cvp, mp)			__cv_wait(cvp, mp)
+#define	cv_wait_io(cvp, mp)			__cv_wait_io(cvp, mp)
+#define	cv_wait_idle(cvp, mp)			__cv_wait_idle(cvp, mp)
+#define	cv_wait_io_sig(cvp, mp)			__cv_wait_io_sig(cvp, mp)
+#define	cv_wait_sig(cvp, mp)			__cv_wait_sig(cvp, mp)
+#define	cv_signal(cvp)				__cv_signal(cvp)
+#define	cv_broadcast(cvp)			__cv_broadcast(cvp)
+
+/*
+ * NB: There is no way to reliably distinguish between having been signalled
+ * and having timed out on Linux. If the client code needs to reliably
+ * distinguish between the two it should use the hires variant.
+ */
+#define	cv_timedwait(cvp, mp, t)		__cv_timedwait(cvp, mp, t)
+#define	cv_timedwait_io(cvp, mp, t)		__cv_timedwait_io(cvp, mp, t)
+#define	cv_timedwait_sig(cvp, mp, t)		__cv_timedwait_sig(cvp, mp, t)
+#define	cv_timedwait_idle(cvp, mp, t)		__cv_timedwait_idle(cvp, mp, t)
+
+
+#endif /* _SPL_CONDVAR_H */

diff --git a/zfs/include/os/linux/spl/sys/cred.h b/zfs/include/os/linux/spl/sys/cred.h
new file mode 100644
index 0000000..501bd45
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/cred.h

@@ -0,0 +1,59 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_CRED_H
+#define	_SPL_CRED_H
+
+#include <linux/module.h>
+#include <linux/cred.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+typedef struct cred cred_t;
+
+#define	kcred		((cred_t *)(init_task.cred))
+#define	CRED()		((cred_t *)current_cred())
+
+/* Linux 4.9 API change, GROUP_AT was removed */
+#ifndef GROUP_AT
+#define	GROUP_AT(gi, i)	((gi)->gid[i])
+#endif
+
+#define	KUID_TO_SUID(x)		(__kuid_val(x))
+#define	KGID_TO_SGID(x)		(__kgid_val(x))
+#define	SUID_TO_KUID(x)		(KUIDT_INIT(x))
+#define	SGID_TO_KGID(x)		(KGIDT_INIT(x))
+#define	KGIDP_TO_SGIDP(x)	(&(x)->val)
+
+extern zidmap_t *zfs_get_init_idmap(void);
+
+extern void crhold(cred_t *cr);
+extern void crfree(cred_t *cr);
+extern uid_t crgetuid(const cred_t *cr);
+extern uid_t crgetruid(const cred_t *cr);
+extern gid_t crgetgid(const cred_t *cr);
+extern int crgetngroups(const cred_t *cr);
+extern gid_t *crgetgroups(const cred_t *cr);
+extern int groupmember(gid_t gid, const cred_t *cr);
+
+#endif  /* _SPL_CRED_H */

diff --git a/zfs/include/os/linux/spl/sys/ctype.h b/zfs/include/os/linux/spl/sys/ctype.h
new file mode 100644
index 0000000..3513206
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/ctype.h

@@ -0,0 +1,29 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_CTYPE_H
+#define	_SPL_CTYPE_H
+
+#include <linux/ctype.h>
+
+#endif /* SPL_CTYPE_H */

diff --git a/zfs/include/os/linux/spl/sys/debug.h b/zfs/include/os/linux/spl/sys/debug.h
new file mode 100644
index 0000000..dc6b85e
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/debug.h

@@ -0,0 +1,168 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Available Solaris debug functions.  All of the ASSERT() macros will be
+ * compiled out when NDEBUG is defined, this is the default behavior for
+ * the SPL.  To enable assertions use the --enable-debug with configure.
+ * The VERIFY() functions are never compiled out and cannot be disabled.
+ *
+ * PANIC()	- Panic the node and print message.
+ * ASSERT()	- Assert X is true, if not panic.
+ * ASSERT3B()	- Assert boolean X OP Y is true, if not panic.
+ * ASSERT3S()	- Assert signed X OP Y is true, if not panic.
+ * ASSERT3U()	- Assert unsigned X OP Y is true, if not panic.
+ * ASSERT3P()	- Assert pointer X OP Y is true, if not panic.
+ * ASSERT0()	- Assert value is zero, if not panic.
+ * VERIFY()	- Verify X is true, if not panic.
+ * VERIFY3B()	- Verify boolean X OP Y is true, if not panic.
+ * VERIFY3S()	- Verify signed X OP Y is true, if not panic.
+ * VERIFY3U()	- Verify unsigned X OP Y is true, if not panic.
+ * VERIFY3P()	- Verify pointer X OP Y is true, if not panic.
+ * VERIFY0()	- Verify value is zero, if not panic.
+ */
+
+#ifndef _SPL_DEBUG_H
+#define	_SPL_DEBUG_H
+
+/*
+ * Common DEBUG functionality.
+ */
+#define	__printflike(a, b)	__printf(a, b)
+
+#ifndef __maybe_unused
+#define	__maybe_unused __attribute__((unused))
+#endif
+
+int spl_panic(const char *file, const char *func, int line,
+    const char *fmt, ...);
+void spl_dumpstack(void);
+
+/* BEGIN CSTYLED */
+#define	PANIC(fmt, a...)						\
+	spl_panic(__FILE__, __FUNCTION__, __LINE__, fmt, ## a)
+
+#define	VERIFY(cond)										\
+	(void) (unlikely(!(cond)) &&							\
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__,			\
+	    "%s", "VERIFY(" #cond ") failed\n"))
+
+#define	VERIFY3B(LEFT, OP, RIGHT)	do {					\
+		const boolean_t _verify3_left = (boolean_t)(LEFT);	\
+		const boolean_t _verify3_right = (boolean_t)(RIGHT);\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%d " #OP " %d)\n",						\
+		    (boolean_t) (_verify3_left),					\
+		    (boolean_t) (_verify3_right));					\
+	} while (0)
+
+#define	VERIFY3S(LEFT, OP, RIGHT)	do {					\
+		const int64_t _verify3_left = (int64_t)(LEFT);		\
+		const int64_t _verify3_right = (int64_t)(RIGHT);	\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%lld " #OP " %lld)\n",					\
+		    (long long) (_verify3_left),					\
+		    (long long) (_verify3_right));					\
+	} while (0)
+
+#define	VERIFY3U(LEFT, OP, RIGHT)	do {					\
+		const uint64_t _verify3_left = (uint64_t)(LEFT);	\
+		const uint64_t _verify3_right = (uint64_t)(RIGHT);	\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%llu " #OP " %llu)\n",					\
+		    (unsigned long long) (_verify3_left),			\
+		    (unsigned long long) (_verify3_right));			\
+	} while (0)
+
+#define	VERIFY3P(LEFT, OP, RIGHT)	do {					\
+		const uintptr_t _verify3_left = (uintptr_t)(LEFT);	\
+		const uintptr_t _verify3_right = (uintptr_t)(RIGHT);\
+		if (unlikely(!(_verify3_left OP _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%px " #OP " %px)\n",					\
+		    (void *) (_verify3_left),						\
+		    (void *) (_verify3_right));						\
+	} while (0)
+
+#define	VERIFY0(RIGHT)	do {								\
+		const int64_t _verify3_left = (int64_t)(0);			\
+		const int64_t _verify3_right = (int64_t)(RIGHT);	\
+		if (unlikely(!(_verify3_left == _verify3_right)))	\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(0 == " #RIGHT ") "						\
+		    "failed (0 == %lld)\n",							\
+		    (long long) (_verify3_right));					\
+	} while (0)
+
+#define	CTASSERT_GLOBAL(x)		_CTASSERT(x, __LINE__)
+#define	CTASSERT(x)			{ _CTASSERT(x, __LINE__); }
+#define	_CTASSERT(x, y)			__CTASSERT(x, y)
+#define	__CTASSERT(x, y)						\
+	typedef char __attribute__ ((unused))				\
+	__compile_time_assertion__ ## y[(x) ? 1 : -1]
+
+/*
+ * Debugging disabled (--disable-debug)
+ */
+#ifdef NDEBUG
+
+#define	ASSERT(x)		((void)0)
+#define	ASSERT3B(x,y,z)		((void)0)
+#define	ASSERT3S(x,y,z)		((void)0)
+#define	ASSERT3U(x,y,z)		((void)0)
+#define	ASSERT3P(x,y,z)		((void)0)
+#define	ASSERT0(x)		((void)0)
+#define	IMPLY(A, B)		((void)0)
+#define	EQUIV(A, B)		((void)0)
+
+/*
+ * Debugging enabled (--enable-debug)
+ */
+#else
+
+#define	ASSERT3B	VERIFY3B
+#define	ASSERT3S	VERIFY3S
+#define	ASSERT3U	VERIFY3U
+#define	ASSERT3P	VERIFY3P
+#define	ASSERT0		VERIFY0
+#define	ASSERT		VERIFY
+#define	IMPLY(A, B) \
+	((void)(likely((!(A)) || (B)) || \
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
+	    "(" #A ") implies (" #B ")")))
+#define	EQUIV(A, B) \
+	((void)(likely(!!(A) == !!(B)) || \
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
+	    "(" #A ") is equivalent to (" #B ")")))
+/* END CSTYLED */
+
+#endif /* NDEBUG */
+
+#endif /* SPL_DEBUG_H */

diff --git a/zfs/include/os/linux/spl/sys/disp.h b/zfs/include/os/linux/spl/sys/disp.h
new file mode 100644
index 0000000..e106d3c
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/disp.h

@@ -0,0 +1,33 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_DISP_H
+#define	_SPL_DISP_H
+
+#include <linux/preempt.h>
+
+#define	kpreempt(unused)	schedule()
+#define	kpreempt_disable()	preempt_disable()
+#define	kpreempt_enable()	preempt_enable()
+
+#endif /* SPL_DISP_H */

diff --git a/zfs/include/os/linux/spl/sys/dkio.h b/zfs/include/os/linux/spl/sys/dkio.h
new file mode 100644
index 0000000..a90b67d
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/dkio.h

@@ -0,0 +1,39 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_DKIO_H
+#define	_SPL_DKIO_H
+
+#define	DFL_SZ(num_exts) \
+	(sizeof (dkioc_free_list_t) + (num_exts - 1) * 16)
+
+#define	DKIOC		(0x04 << 8)
+#define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
+
+/*
+ * ioctl to free space (e.g. SCSI UNMAP) off a disk.
+ * Pass a dkioc_free_list_t containing a list of extents to be freed.
+ */
+#define	DKIOCFREE	(DKIOC|50)
+
+#endif /* _SPL_DKIO_H */

diff --git a/zfs/include/os/linux/spl/sys/errno.h b/zfs/include/os/linux/spl/sys/errno.h
new file mode 100644
index 0000000..f6d9212
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/errno.h

@@ -0,0 +1,57 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2000 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_ERRNO_H
+#define	_SYS_ERRNO_H
+
+#include <linux/errno.h>
+
+#define	ENOTSUP		EOPNOTSUPP
+
+/*
+ * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
+ * graveyard) to indicate checksum errors and fragmentation.
+ */
+#define	ECKSUM	EBADE
+#define	EFRAGS	EBADR
+
+/* Similar for ENOACTIVE */
+#define	ENOTACTIVE	ENOANO
+
+#endif	/* _SYS_ERRNO_H */

diff --git a/zfs/include/os/linux/spl/sys/fcntl.h b/zfs/include/os/linux/spl/sys/fcntl.h
new file mode 100644
index 0000000..a87fdca
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/fcntl.h

@@ -0,0 +1,36 @@
+/*
+ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_FCNTL_H
+#define	_SPL_FCNTL_H
+
+#include <asm/fcntl.h>
+
+#define	F_FREESP 11
+
+#ifdef CONFIG_64BIT
+typedef struct flock flock64_t;
+#else
+typedef struct flock64 flock64_t;
+#endif /* CONFIG_64BIT */
+
+#endif /* _SPL_FCNTL_H */

diff --git a/zfs/include/os/linux/spl/sys/file.h b/zfs/include/os/linux/spl/sys/file.h
new file mode 100644
index 0000000..e0bbd6d
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/file.h

@@ -0,0 +1,51 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_FILE_H
+#define	_SPL_FILE_H
+
+#define	FIGNORECASE		0x00080000
+#define	FKIOCTL			0x80000000
+#define	ED_CASE_CONFLICT	0x10
+
+#ifdef HAVE_INODE_LOCK_SHARED
+#define	spl_inode_lock(ip)		inode_lock(ip)
+#define	spl_inode_unlock(ip)		inode_unlock(ip)
+#define	spl_inode_lock_shared(ip)	inode_lock_shared(ip)
+#define	spl_inode_unlock_shared(ip)	inode_unlock_shared(ip)
+#define	spl_inode_trylock(ip)		inode_trylock(ip)
+#define	spl_inode_trylock_shared(ip)	inode_trylock_shared(ip)
+#define	spl_inode_is_locked(ip)		inode_is_locked(ip)
+#define	spl_inode_lock_nested(ip, s)	inode_lock_nested(ip, s)
+#else
+#define	spl_inode_lock(ip)		mutex_lock(&(ip)->i_mutex)
+#define	spl_inode_unlock(ip)		mutex_unlock(&(ip)->i_mutex)
+#define	spl_inode_lock_shared(ip)	mutex_lock(&(ip)->i_mutex)
+#define	spl_inode_unlock_shared(ip)	mutex_unlock(&(ip)->i_mutex)
+#define	spl_inode_trylock(ip)		mutex_trylock(&(ip)->i_mutex)
+#define	spl_inode_trylock_shared(ip)	mutex_trylock(&(ip)->i_mutex)
+#define	spl_inode_is_locked(ip)		mutex_is_locked(&(ip)->i_mutex)
+#define	spl_inode_lock_nested(ip, s)	mutex_lock_nested(&(ip)->i_mutex, s)
+#endif
+
+#endif /* SPL_FILE_H */

diff --git a/zfs/include/os/linux/spl/sys/inttypes.h b/zfs/include/os/linux/spl/sys/inttypes.h
new file mode 100644
index 0000000..c99973a
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/inttypes.h

@@ -0,0 +1,27 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_INTTYPES_H
+#define	_SPL_INTTYPES_H
+
+#endif /* SPL_INTTYPES_H */

diff --git a/zfs/include/os/linux/spl/sys/isa_defs.h b/zfs/include/os/linux/spl/sys/isa_defs.h
new file mode 100644
index 0000000..2207ee2
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/isa_defs.h

@@ -0,0 +1,253 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef	_SPL_ISA_DEFS_H
+#define	_SPL_ISA_DEFS_H
+
+/* x86_64 arch specific defines */
+#if defined(__x86_64) || defined(__x86_64__)
+
+#if !defined(__x86_64)
+#define	__x86_64
+#endif
+
+#if !defined(__amd64)
+#define	__amd64
+#endif
+
+#if !defined(__x86)
+#define	__x86
+#endif
+
+#if defined(_ILP32)
+/* x32-specific defines; careful to *not* define _LP64 here */
+#else
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#endif
+
+#define	_ALIGNMENT_REQUIRED	1
+
+
+/* i386 arch specific defines */
+#elif defined(__i386) || defined(__i386__)
+
+#if !defined(__i386)
+#define	__i386
+#endif
+
+#if !defined(__x86)
+#define	__x86
+#endif
+
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+
+#define	_ALIGNMENT_REQUIRED	0
+
+/* powerpc (ppc64) arch specific defines */
+#elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
+
+#if !defined(__powerpc)
+#define	__powerpc
+#endif
+
+#if !defined(__powerpc__)
+#define	__powerpc__
+#endif
+
+#if defined(__powerpc64__)
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#else
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#endif
+
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for PPC, so default to 1
+ * out of paranoia.
+ */
+#define	_ALIGNMENT_REQUIRED	1
+
+/* arm arch specific defines */
+#elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
+
+#if !defined(__arm)
+#define	__arm
+#endif
+
+#if !defined(__arm__)
+#define	__arm__
+#endif
+
+#if defined(__aarch64__)
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#else
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#endif
+
+#if defined(__ARMEL__) || defined(__AARCH64EL__)
+#define	_ZFS_LITTLE_ENDIAN
+#else
+#define	_ZFS_BIG_ENDIAN
+#endif
+
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for ARM, so default to 1
+ * out of paranoia.
+ */
+#define	_ALIGNMENT_REQUIRED	1
+
+/* sparc arch specific defines */
+#elif defined(__sparc) || defined(__sparc__)
+
+#if !defined(__sparc)
+#define	__sparc
+#endif
+
+#if !defined(__sparc__)
+#define	__sparc__
+#endif
+
+#if defined(__arch64__)
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#else
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#endif
+
+#define	_ZFS_BIG_ENDIAN
+#define	_SUNOS_VTOC_16
+#define	_ALIGNMENT_REQUIRED	1
+
+/* s390 arch specific defines */
+#elif defined(__s390__)
+#if defined(__s390x__)
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#else
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#endif
+
+#define	_ZFS_BIG_ENDIAN
+
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for s390, so default to 1
+ * out of paranoia.
+ */
+#define	_ALIGNMENT_REQUIRED	1
+
+/* MIPS arch specific defines */
+#elif defined(__mips__)
+
+#if defined(__MIPSEB__)
+#define	_ZFS_BIG_ENDIAN
+#elif defined(__MIPSEL__)
+#define	_ZFS_LITTLE_ENDIAN
+#else
+#error MIPS no endian specified
+#endif
+
+#ifndef _LP64
+#define	_ILP32
+#endif
+
+#define	_SUNOS_VTOC_16
+
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for MIPS, so default to 1
+ * out of paranoia.
+ */
+#define	_ALIGNMENT_REQUIRED	1
+
+/*
+ * RISC-V arch specific defines
+ * only RV64G (including atomic) LP64 is supported yet
+ */
+#elif defined(__riscv) && defined(_LP64) && _LP64 && \
+	defined(__riscv_atomic) && __riscv_atomic
+
+#ifndef	__riscv__
+#define	__riscv__
+#endif
+
+#ifndef	__rv64g__
+#define	__rv64g__
+#endif
+
+#define	_ZFS_LITTLE_ENDIAN
+
+#define	_SUNOS_VTOC_16
+
+#define	_ALIGNMENT_REQUIRED	1
+
+#else
+/*
+ * Currently supported:
+ * x86_64, x32, i386, arm, powerpc, s390, sparc, mips, and RV64G
+ */
+#error "Unsupported ISA type"
+#endif
+
+#if defined(_ILP32) && defined(_LP64)
+#error "Both _ILP32 and _LP64 are defined"
+#endif
+
+#if !defined(_ILP32) && !defined(_LP64)
+#error "Neither _ILP32 or _LP64 are defined"
+#endif
+
+#include <sys/byteorder.h>
+
+/*
+ * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS will be defined by the Linux
+ * kernel for architectures which support efficient unaligned access.
+ */
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define	HAVE_EFFICIENT_UNALIGNED_ACCESS
+#endif
+
+#if defined(_ZFS_LITTLE_ENDIAN) && defined(_ZFS_BIG_ENDIAN)
+#error "Both _ZFS_LITTLE_ENDIAN and _ZFS_BIG_ENDIAN are defined"
+#endif
+
+#if !defined(_ZFS_LITTLE_ENDIAN) && !defined(_ZFS_BIG_ENDIAN)
+#error "Neither _ZFS_LITTLE_ENDIAN or _ZFS_BIG_ENDIAN are defined"
+#endif
+
+#endif	/* _SPL_ISA_DEFS_H */

diff --git a/zfs/include/os/linux/spl/sys/kmem.h b/zfs/include/os/linux/spl/sys/kmem.h
new file mode 100644
index 0000000..a93e87d
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/kmem.h

@@ -0,0 +1,210 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_KMEM_H
+#define	_SPL_KMEM_H
+
+#include <sys/debug.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+extern int kmem_debugging(void);
+extern char *kmem_vasprintf(const char *fmt, va_list ap);
+extern char *kmem_asprintf(const char *fmt, ...);
+extern char *kmem_strdup(const char *str);
+extern void kmem_strfree(char *str);
+
+/*
+ * Memory allocation interfaces
+ */
+#define	KM_SLEEP	0x0000	/* can block for memory; success guaranteed */
+#define	KM_NOSLEEP	0x0001	/* cannot block for memory; may fail */
+#define	KM_PUSHPAGE	0x0004	/* can block for memory; may use reserve */
+#define	KM_ZERO		0x1000	/* zero the allocation */
+#define	KM_VMEM		0x2000	/* caller is vmem_* wrapper */
+
+#define	KM_PUBLIC_MASK	(KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
+
+static int spl_fstrans_check(void);
+void *spl_kvmalloc(size_t size, gfp_t flags);
+
+/*
+ * Convert a KM_* flags mask to its Linux GFP_* counterpart.  The conversion
+ * function is context aware which means that KM_SLEEP allocations can be
+ * safely used in syncing contexts which have set PF_FSTRANS.
+ */
+static inline gfp_t
+kmem_flags_convert(int flags)
+{
+	gfp_t lflags = __GFP_NOWARN | __GFP_COMP;
+
+	if (flags & KM_NOSLEEP) {
+		lflags |= GFP_ATOMIC | __GFP_NORETRY;
+	} else {
+		lflags |= GFP_KERNEL;
+		if (spl_fstrans_check())
+			lflags &= ~(__GFP_IO|__GFP_FS);
+	}
+
+	if (flags & KM_PUSHPAGE)
+		lflags |= __GFP_HIGH;
+
+	if (flags & KM_ZERO)
+		lflags |= __GFP_ZERO;
+
+	return (lflags);
+}
+
+typedef struct {
+	struct task_struct *fstrans_thread;
+	unsigned int saved_flags;
+} fstrans_cookie_t;
+
+/*
+ * Introduced in Linux 3.9, however this cannot be solely relied on before
+ * Linux 3.18 as it doesn't turn off __GFP_FS as it should.
+ */
+#ifdef PF_MEMALLOC_NOIO
+#define	__SPL_PF_MEMALLOC_NOIO (PF_MEMALLOC_NOIO)
+#else
+#define	__SPL_PF_MEMALLOC_NOIO (0)
+#endif
+
+/*
+ * PF_FSTRANS is removed from Linux 4.12
+ */
+#ifdef PF_FSTRANS
+#define	__SPL_PF_FSTRANS (PF_FSTRANS)
+#else
+#define	__SPL_PF_FSTRANS (0)
+#endif
+
+#define	SPL_FSTRANS (__SPL_PF_FSTRANS|__SPL_PF_MEMALLOC_NOIO)
+
+static inline fstrans_cookie_t
+spl_fstrans_mark(void)
+{
+	fstrans_cookie_t cookie;
+
+	BUILD_BUG_ON(SPL_FSTRANS == 0);
+
+	cookie.fstrans_thread = current;
+	cookie.saved_flags = current->flags & SPL_FSTRANS;
+	current->flags |= SPL_FSTRANS;
+
+	return (cookie);
+}
+
+static inline void
+spl_fstrans_unmark(fstrans_cookie_t cookie)
+{
+	ASSERT3P(cookie.fstrans_thread, ==, current);
+	ASSERT((current->flags & SPL_FSTRANS) == SPL_FSTRANS);
+
+	current->flags &= ~SPL_FSTRANS;
+	current->flags |= cookie.saved_flags;
+}
+
+static inline int
+spl_fstrans_check(void)
+{
+	return (current->flags & SPL_FSTRANS);
+}
+
+/*
+ * specifically used to check PF_FSTRANS flag, cannot be relied on for
+ * checking spl_fstrans_mark().
+ */
+static inline int
+__spl_pf_fstrans_check(void)
+{
+	return (current->flags & __SPL_PF_FSTRANS);
+}
+
+/*
+ * Kernel compatibility for GFP flags
+ */
+/* < 4.13 */
+#ifndef __GFP_RETRY_MAYFAIL
+#define	__GFP_RETRY_MAYFAIL	__GFP_REPEAT
+#endif
+/* < 4.4 */
+#ifndef __GFP_RECLAIM
+#define	__GFP_RECLAIM		__GFP_WAIT
+#endif
+
+#ifdef HAVE_ATOMIC64_T
+#define	kmem_alloc_used_add(size)	atomic64_add(size, &kmem_alloc_used)
+#define	kmem_alloc_used_sub(size)	atomic64_sub(size, &kmem_alloc_used)
+#define	kmem_alloc_used_read()		atomic64_read(&kmem_alloc_used)
+#define	kmem_alloc_used_set(size)	atomic64_set(&kmem_alloc_used, size)
+extern atomic64_t kmem_alloc_used;
+extern unsigned long long kmem_alloc_max;
+#else  /* HAVE_ATOMIC64_T */
+#define	kmem_alloc_used_add(size)	atomic_add(size, &kmem_alloc_used)
+#define	kmem_alloc_used_sub(size)	atomic_sub(size, &kmem_alloc_used)
+#define	kmem_alloc_used_read()		atomic_read(&kmem_alloc_used)
+#define	kmem_alloc_used_set(size)	atomic_set(&kmem_alloc_used, size)
+extern atomic_t kmem_alloc_used;
+extern unsigned long long kmem_alloc_max;
+#endif /* HAVE_ATOMIC64_T */
+
+extern unsigned int spl_kmem_alloc_warn;
+extern unsigned int spl_kmem_alloc_max;
+
+#define	kmem_alloc(sz, fl)	spl_kmem_alloc((sz), (fl), __func__, __LINE__)
+#define	kmem_zalloc(sz, fl)	spl_kmem_zalloc((sz), (fl), __func__, __LINE__)
+#define	kmem_free(ptr, sz)	spl_kmem_free((ptr), (sz))
+#define	kmem_cache_reap_active	spl_kmem_cache_reap_active
+
+extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
+extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
+extern void spl_kmem_free(const void *ptr, size_t sz);
+
+/*
+ * 5.8 API change, pgprot_t argument removed.
+ */
+#ifdef HAVE_VMALLOC_PAGE_KERNEL
+#define	spl_vmalloc(size, flags)	__vmalloc(size, flags, PAGE_KERNEL)
+#else
+#define	spl_vmalloc(size, flags)	__vmalloc(size, flags)
+#endif
+
+/*
+ * The following functions are only available for internal use.
+ */
+extern void *spl_kmem_alloc_impl(size_t size, int flags, int node);
+extern void *spl_kmem_alloc_debug(size_t size, int flags, int node);
+extern void *spl_kmem_alloc_track(size_t size, int flags,
+    const char *func, int line, int node);
+extern void spl_kmem_free_impl(const void *buf, size_t size);
+extern void spl_kmem_free_debug(const void *buf, size_t size);
+extern void spl_kmem_free_track(const void *buf, size_t size);
+
+extern int spl_kmem_init(void);
+extern void spl_kmem_fini(void);
+extern int spl_kmem_cache_reap_active(void);
+
+#endif	/* _SPL_KMEM_H */

diff --git a/zfs/include/os/linux/spl/sys/kmem_cache.h b/zfs/include/os/linux/spl/sys/kmem_cache.h
new file mode 100644
index 0000000..48006ec
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/kmem_cache.h

@@ -0,0 +1,215 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_KMEM_CACHE_H
+#define	_SPL_KMEM_CACHE_H
+
+#include <sys/taskq.h>
+
+/*
+ * Slab allocation interfaces.  The SPL slab differs from the standard
+ * Linux SLAB or SLUB primarily in that each cache may be backed by slabs
+ * allocated from the physical or virtual memory address space.  The virtual
+ * slabs allow for good behavior when allocation large objects of identical
+ * size.  This slab implementation also supports both constructors and
+ * destructors which the Linux slab does not.
+ */
+typedef enum kmc_bit {
+	KMC_BIT_NODEBUG		= 1,	/* Default behavior */
+	KMC_BIT_KVMEM		= 7,	/* Use kvmalloc linux allocator  */
+	KMC_BIT_SLAB		= 8,	/* Use Linux slab cache */
+	KMC_BIT_DEADLOCKED	= 14,	/* Deadlock detected */
+	KMC_BIT_GROWING		= 15,	/* Growing in progress */
+	KMC_BIT_REAPING		= 16,	/* Reaping in progress */
+	KMC_BIT_DESTROY		= 17,	/* Destroy in progress */
+	KMC_BIT_TOTAL		= 18,	/* Proc handler helper bit */
+	KMC_BIT_ALLOC		= 19,	/* Proc handler helper bit */
+	KMC_BIT_MAX		= 20,	/* Proc handler helper bit */
+} kmc_bit_t;
+
+/* kmem move callback return values */
+typedef enum kmem_cbrc {
+	KMEM_CBRC_YES		= 0,	/* Object moved */
+	KMEM_CBRC_NO		= 1,	/* Object not moved */
+	KMEM_CBRC_LATER		= 2,	/* Object not moved, try again later */
+	KMEM_CBRC_DONT_NEED	= 3,	/* Neither object is needed */
+	KMEM_CBRC_DONT_KNOW	= 4,	/* Object unknown */
+} kmem_cbrc_t;
+
+#define	KMC_NODEBUG		(1 << KMC_BIT_NODEBUG)
+#define	KMC_KVMEM		(1 << KMC_BIT_KVMEM)
+#define	KMC_SLAB		(1 << KMC_BIT_SLAB)
+#define	KMC_DEADLOCKED		(1 << KMC_BIT_DEADLOCKED)
+#define	KMC_GROWING		(1 << KMC_BIT_GROWING)
+#define	KMC_REAPING		(1 << KMC_BIT_REAPING)
+#define	KMC_DESTROY		(1 << KMC_BIT_DESTROY)
+#define	KMC_TOTAL		(1 << KMC_BIT_TOTAL)
+#define	KMC_ALLOC		(1 << KMC_BIT_ALLOC)
+#define	KMC_MAX			(1 << KMC_BIT_MAX)
+
+#define	KMC_REAP_CHUNK		INT_MAX
+#define	KMC_DEFAULT_SEEKS	1
+
+#define	KMC_RECLAIM_ONCE	0x1	/* Force a single shrinker pass */
+
+extern struct list_head spl_kmem_cache_list;
+extern struct rw_semaphore spl_kmem_cache_sem;
+
+#define	SKM_MAGIC			0x2e2e2e2e
+#define	SKO_MAGIC			0x20202020
+#define	SKS_MAGIC			0x22222222
+#define	SKC_MAGIC			0x2c2c2c2c
+
+#define	SPL_KMEM_CACHE_OBJ_PER_SLAB	8	/* Target objects per slab */
+#define	SPL_KMEM_CACHE_ALIGN		8	/* Default object alignment */
+#ifdef _LP64
+#define	SPL_KMEM_CACHE_MAX_SIZE		32	/* Max slab size in MB */
+#else
+#define	SPL_KMEM_CACHE_MAX_SIZE		4	/* Max slab size in MB */
+#endif
+
+#define	SPL_MAX_ORDER			(MAX_ORDER - 3)
+#define	SPL_MAX_ORDER_NR_PAGES		(1 << (SPL_MAX_ORDER - 1))
+
+#ifdef CONFIG_SLUB
+#define	SPL_MAX_KMEM_CACHE_ORDER	PAGE_ALLOC_COSTLY_ORDER
+#define	SPL_MAX_KMEM_ORDER_NR_PAGES	(1 << (SPL_MAX_KMEM_CACHE_ORDER - 1))
+#else
+#define	SPL_MAX_KMEM_ORDER_NR_PAGES	(KMALLOC_MAX_SIZE >> PAGE_SHIFT)
+#endif
+
+#define	POINTER_IS_VALID(p)		0	/* Unimplemented */
+#define	POINTER_INVALIDATE(pp)			/* Unimplemented */
+
+typedef int (*spl_kmem_ctor_t)(void *, void *, int);
+typedef void (*spl_kmem_dtor_t)(void *, void *);
+
+typedef struct spl_kmem_magazine {
+	uint32_t		skm_magic;	/* Sanity magic */
+	uint32_t		skm_avail;	/* Available objects */
+	uint32_t		skm_size;	/* Magazine size */
+	uint32_t		skm_refill;	/* Batch refill size */
+	struct spl_kmem_cache	*skm_cache;	/* Owned by cache */
+	unsigned int		skm_cpu;	/* Owned by cpu */
+	void			*skm_objs[0];	/* Object pointers */
+} spl_kmem_magazine_t;
+
+typedef struct spl_kmem_obj {
+	uint32_t		sko_magic;	/* Sanity magic */
+	void			*sko_addr;	/* Buffer address */
+	struct spl_kmem_slab	*sko_slab;	/* Owned by slab */
+	struct list_head	sko_list;	/* Free object list linkage */
+} spl_kmem_obj_t;
+
+typedef struct spl_kmem_slab {
+	uint32_t		sks_magic;	/* Sanity magic */
+	uint32_t		sks_objs;	/* Objects per slab */
+	struct spl_kmem_cache	*sks_cache;	/* Owned by cache */
+	struct list_head	sks_list;	/* Slab list linkage */
+	struct list_head	sks_free_list;	/* Free object list */
+	unsigned long		sks_age;	/* Last modify jiffie */
+	uint32_t		sks_ref;	/* Ref count used objects */
+} spl_kmem_slab_t;
+
+typedef struct spl_kmem_alloc {
+	struct spl_kmem_cache	*ska_cache;	/* Owned by cache */
+	int			ska_flags;	/* Allocation flags */
+	taskq_ent_t		ska_tqe;	/* Task queue entry */
+} spl_kmem_alloc_t;
+
+typedef struct spl_kmem_emergency {
+	struct rb_node		ske_node;	/* Emergency tree linkage */
+	unsigned long		ske_obj;	/* Buffer address */
+} spl_kmem_emergency_t;
+
+typedef struct spl_kmem_cache {
+	uint32_t		skc_magic;	/* Sanity magic */
+	uint32_t		skc_name_size;	/* Name length */
+	char			*skc_name;	/* Name string */
+	spl_kmem_magazine_t	**skc_mag;	/* Per-CPU warm cache */
+	uint32_t		skc_mag_size;	/* Magazine size */
+	uint32_t		skc_mag_refill;	/* Magazine refill count */
+	spl_kmem_ctor_t		skc_ctor;	/* Constructor */
+	spl_kmem_dtor_t		skc_dtor;	/* Destructor */
+	void			*skc_private;	/* Private data */
+	void			*skc_vmp;	/* Unused */
+	struct kmem_cache	*skc_linux_cache; /* Linux slab cache if used */
+	unsigned long		skc_flags;	/* Flags */
+	uint32_t		skc_obj_size;	/* Object size */
+	uint32_t		skc_obj_align;	/* Object alignment */
+	uint32_t		skc_slab_objs;	/* Objects per slab */
+	uint32_t		skc_slab_size;	/* Slab size */
+	atomic_t		skc_ref;	/* Ref count callers */
+	taskqid_t		skc_taskqid;	/* Slab reclaim task */
+	struct list_head	skc_list;	/* List of caches linkage */
+	struct list_head	skc_complete_list; /* Completely alloc'ed */
+	struct list_head	skc_partial_list;  /* Partially alloc'ed */
+	struct rb_root		skc_emergency_tree; /* Min sized objects */
+	spinlock_t		skc_lock;	/* Cache lock */
+	spl_wait_queue_head_t	skc_waitq;	/* Allocation waiters */
+	uint64_t		skc_slab_fail;	/* Slab alloc failures */
+	uint64_t		skc_slab_create;  /* Slab creates */
+	uint64_t		skc_slab_destroy; /* Slab destroys */
+	uint64_t		skc_slab_total;	/* Slab total current */
+	uint64_t		skc_slab_alloc;	/* Slab alloc current */
+	uint64_t		skc_slab_max;	/* Slab max historic  */
+	uint64_t		skc_obj_total;	/* Obj total current */
+	uint64_t		skc_obj_alloc;	/* Obj alloc current */
+	struct percpu_counter	skc_linux_alloc;   /* Linux-backed Obj alloc  */
+	uint64_t		skc_obj_max;	/* Obj max historic */
+	uint64_t		skc_obj_deadlock;  /* Obj emergency deadlocks */
+	uint64_t		skc_obj_emergency; /* Obj emergency current */
+	uint64_t		skc_obj_emergency_max; /* Obj emergency max */
+} spl_kmem_cache_t;
+#define	kmem_cache_t		spl_kmem_cache_t
+
+extern spl_kmem_cache_t *spl_kmem_cache_create(char *name, size_t size,
+    size_t align, spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor,
+    void *reclaim, void *priv, void *vmp, int flags);
+extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
+    kmem_cbrc_t (*)(void *, void *, size_t, void *));
+extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
+extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
+extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
+extern void spl_kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags);
+extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc);
+extern void spl_kmem_reap(void);
+extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
+extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
+
+#define	kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) \
+    spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl)
+#define	kmem_cache_set_move(skc, move)	spl_kmem_cache_set_move(skc, move)
+#define	kmem_cache_destroy(skc)		spl_kmem_cache_destroy(skc)
+#define	kmem_cache_alloc(skc, flags)	spl_kmem_cache_alloc(skc, flags)
+#define	kmem_cache_free(skc, obj)	spl_kmem_cache_free(skc, obj)
+#define	kmem_cache_reap_now(skc)	spl_kmem_cache_reap_now(skc)
+#define	kmem_reap()			spl_kmem_reap()
+
+/*
+ * The following functions are only available for internal use.
+ */
+extern int spl_kmem_cache_init(void);
+extern void spl_kmem_cache_fini(void);
+
+#endif	/* _SPL_KMEM_CACHE_H */

diff --git a/zfs/include/os/linux/spl/sys/kstat.h b/zfs/include/os/linux/spl/sys/kstat.h
new file mode 100644
index 0000000..928f707
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/kstat.h

@@ -0,0 +1,218 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_KSTAT_H
+#define	_SPL_KSTAT_H
+
+#include <linux/module.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+
+#define	KSTAT_STRLEN		255
+#define	KSTAT_RAW_MAX		(128*1024)
+
+/*
+ * For reference valid classes are:
+ * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
+ */
+
+#define	KSTAT_TYPE_RAW		0 /* can be anything; ks_ndata >= 1 */
+#define	KSTAT_TYPE_NAMED	1 /* name/value pair; ks_ndata >= 1 */
+#define	KSTAT_TYPE_INTR		2 /* interrupt stats; ks_ndata == 1 */
+#define	KSTAT_TYPE_IO		3 /* I/O stats; ks_ndata == 1 */
+#define	KSTAT_TYPE_TIMER	4 /* event timer; ks_ndata >= 1 */
+#define	KSTAT_NUM_TYPES		5
+
+#define	KSTAT_DATA_CHAR		0
+#define	KSTAT_DATA_INT32	1
+#define	KSTAT_DATA_UINT32	2
+#define	KSTAT_DATA_INT64	3
+#define	KSTAT_DATA_UINT64	4
+#define	KSTAT_DATA_LONG		5
+#define	KSTAT_DATA_ULONG	6
+#define	KSTAT_DATA_STRING	7
+#define	KSTAT_NUM_DATAS		8
+
+#define	KSTAT_INTR_HARD		0
+#define	KSTAT_INTR_SOFT		1
+#define	KSTAT_INTR_WATCHDOG	2
+#define	KSTAT_INTR_SPURIOUS	3
+#define	KSTAT_INTR_MULTSVC	4
+#define	KSTAT_NUM_INTRS		5
+
+#define	KSTAT_FLAG_VIRTUAL	0x01
+#define	KSTAT_FLAG_VAR_SIZE	0x02
+#define	KSTAT_FLAG_WRITABLE	0x04
+#define	KSTAT_FLAG_PERSISTENT	0x08
+#define	KSTAT_FLAG_DORMANT	0x10
+#define	KSTAT_FLAG_INVALID	0x20
+#define	KSTAT_FLAG_LONGSTRINGS	0x40
+#define	KSTAT_FLAG_NO_HEADERS	0x80
+
+#define	KS_MAGIC		0x9d9d9d9d
+
+/* Dynamic updates */
+#define	KSTAT_READ		0
+#define	KSTAT_WRITE		1
+
+struct kstat_s;
+typedef struct kstat_s kstat_t;
+
+typedef int kid_t;				/* unique kstat id */
+typedef int kstat_update_t(struct kstat_s *, int); /* dynamic update cb */
+
+typedef struct kstat_module {
+	char ksm_name[KSTAT_STRLEN+1];		/* module name */
+	struct list_head ksm_module_list;	/* module linkage */
+	struct list_head ksm_kstat_list;	/* list of kstat entries */
+	struct proc_dir_entry *ksm_proc;	/* proc entry */
+} kstat_module_t;
+
+typedef struct kstat_raw_ops {
+	int (*headers)(char *buf, size_t size);
+	int (*data)(char *buf, size_t size, void *data);
+	void *(*addr)(kstat_t *ksp, loff_t index);
+} kstat_raw_ops_t;
+
+typedef struct kstat_proc_entry {
+	char	kpe_name[KSTAT_STRLEN+1];	/* kstat name */
+	char	kpe_module[KSTAT_STRLEN+1];	/* provider module name */
+	kstat_module_t		*kpe_owner;	/* kstat module linkage */
+	struct list_head	kpe_list;	/* kstat linkage */
+	struct proc_dir_entry	*kpe_proc;	/* procfs entry */
+} kstat_proc_entry_t;
+
+struct kstat_s {
+	int		ks_magic;		/* magic value */
+	kid_t		ks_kid;			/* unique kstat ID */
+	hrtime_t	ks_crtime;		/* creation time */
+	hrtime_t	ks_snaptime;		/* last access time */
+	int		ks_instance;		/* provider module instance */
+	char		ks_class[KSTAT_STRLEN+1]; /* kstat class */
+	uchar_t		ks_type;		/* kstat data type */
+	uchar_t		ks_flags;		/* kstat flags */
+	void		*ks_data;		/* kstat type-specific data */
+	uint_t		ks_ndata;		/* # of data records */
+	size_t		ks_data_size;		/* size of kstat data section */
+	kstat_update_t	*ks_update;		/* dynamic updates */
+	void		*ks_private;		/* private data */
+	kmutex_t	ks_private_lock;	/* kstat private data lock */
+	kmutex_t	*ks_lock;		/* kstat data lock */
+	kstat_raw_ops_t	ks_raw_ops;		/* ops table for raw type */
+	char		*ks_raw_buf;		/* buf used for raw ops */
+	size_t		ks_raw_bufsize;		/* size of raw ops buffer */
+	kstat_proc_entry_t	ks_proc;	/* data for procfs entry */
+};
+
+typedef struct kstat_named_s {
+	char	name[KSTAT_STRLEN];	/* name of counter */
+	uchar_t	data_type;		/* data type */
+	union {
+		char c[16];	/* 128-bit int */
+		int32_t	i32;	/* 32-bit signed int */
+		uint32_t ui32;	/* 32-bit unsigned int */
+		int64_t i64;	/* 64-bit signed int */
+		uint64_t ui64;	/* 64-bit unsigned int */
+		long l;		/* native signed long */
+		ulong_t ul;	/* native unsigned long */
+		struct {
+			union {
+				char *ptr;	/* NULL-term string */
+				char __pad[8];	/* 64-bit padding */
+			} addr;
+			uint32_t len;		/* # bytes for strlen + '\0' */
+		} string;
+	} value;
+} kstat_named_t;
+
+#define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
+#define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
+
+#ifdef HAVE_PROC_OPS_STRUCT
+typedef struct proc_ops kstat_proc_op_t;
+#else
+typedef struct file_operations kstat_proc_op_t;
+#endif
+
+typedef struct kstat_intr {
+	uint_t intrs[KSTAT_NUM_INTRS];
+} kstat_intr_t;
+
+typedef struct kstat_io {
+	u_longlong_t	nread;		/* number of bytes read */
+	u_longlong_t	nwritten;	/* number of bytes written */
+	uint_t		reads;		/* number of read operations */
+	uint_t		writes;		/* number of write operations */
+	hrtime_t	wtime;		/* cumulative wait (pre-service) time */
+	hrtime_t	wlentime;	/* cumulative wait len*time product */
+	hrtime_t	wlastupdate;	/* last time wait queue changed */
+	hrtime_t	rtime;		/* cumulative run (service) time */
+	hrtime_t	rlentime;	/* cumulative run length*time product */
+	hrtime_t	rlastupdate;	/* last time run queue changed */
+	uint_t		wcnt;		/* count of elements in wait state */
+	uint_t		rcnt;		/* count of elements in run state */
+} kstat_io_t;
+
+typedef struct kstat_timer {
+	char		name[KSTAT_STRLEN+1]; /* event name */
+	u_longlong_t	num_events;	 /* number of events */
+	hrtime_t	elapsed_time;	 /* cumulative elapsed time */
+	hrtime_t	min_time;	 /* shortest event duration */
+	hrtime_t	max_time;	 /* longest event duration */
+	hrtime_t	start_time;	 /* previous event start time */
+	hrtime_t	stop_time;	 /* previous event stop time */
+} kstat_timer_t;
+
+int spl_kstat_init(void);
+void spl_kstat_fini(void);
+
+extern void __kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void* (*addr)(kstat_t *ksp, loff_t index));
+
+extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
+    const char *ks_name, const char *ks_class, uchar_t ks_type,
+    uint_t ks_ndata, uchar_t ks_flags);
+
+extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep,
+    const char *module, const char *name);
+extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep);
+extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
+    const kstat_proc_op_t *file_ops, void *data);
+
+extern void __kstat_install(kstat_t *ksp);
+extern void __kstat_delete(kstat_t *ksp);
+
+#define	kstat_set_raw_ops(k, h, d, a) \
+    __kstat_set_raw_ops(k, h, d, a)
+#define	kstat_create(m, i, n, c, t, s, f) \
+    __kstat_create(m, i, n, c, t, s, f)
+
+#define	kstat_install(k)		__kstat_install(k)
+#define	kstat_delete(k)			__kstat_delete(k)
+
+#endif  /* _SPL_KSTAT_H */

diff --git a/zfs/include/os/linux/spl/sys/list.h b/zfs/include/os/linux/spl/sys/list.h
new file mode 100644
index 0000000..80300df
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/list.h

@@ -0,0 +1,209 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_LIST_H
+#define	_SPL_LIST_H
+
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <linux/list.h>
+
+/*
+ * NOTE: I have implemented the Solaris list API in terms of the native
+ * linux API.  This has certain advantages in terms of leveraging the linux
+ * list debugging infrastructure, but it also means that the internals of a
+ * list differ slightly than on Solaris.  This is not a problem as long as
+ * all callers stick to the published API.  The two major differences are:
+ *
+ * 1) A list_node_t is mapped to a linux list_head struct which changes
+ *    the name of the list_next/list_prev pointers to next/prev respectively.
+ *
+ * 2) A list_node_t which is not attached to a list on Solaris is denoted
+ *    by having its list_next/list_prev pointers set to NULL.  Under linux
+ *    the next/prev pointers are set to LIST_POISON1 and LIST_POISON2
+ *    respectively.  At this moment this only impacts the implementation
+ *    of the list_link_init() and list_link_active() functions.
+ */
+
+typedef struct list_head list_node_t;
+
+typedef struct list {
+	size_t list_size;
+	size_t list_offset;
+	list_node_t list_head;
+} list_t;
+
+#define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+
+static inline int
+list_is_empty(list_t *list)
+{
+	return (list_empty(&list->list_head));
+}
+
+static inline void
+list_link_init(list_node_t *node)
+{
+	node->next = LIST_POISON1;
+	node->prev = LIST_POISON2;
+}
+
+static inline void
+list_create(list_t *list, size_t size, size_t offset)
+{
+	list->list_size = size;
+	list->list_offset = offset;
+	INIT_LIST_HEAD(&list->list_head);
+}
+
+static inline void
+list_destroy(list_t *list)
+{
+	list_del(&list->list_head);
+}
+
+static inline void
+list_insert_head(list_t *list, void *object)
+{
+	list_add(list_d2l(list, object), &list->list_head);
+}
+
+static inline void
+list_insert_tail(list_t *list, void *object)
+{
+	list_add_tail(list_d2l(list, object), &list->list_head);
+}
+
+static inline void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL)
+		list_insert_head(list, nobject);
+	else
+		list_add(list_d2l(list, nobject), list_d2l(list, object));
+}
+
+static inline void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL)
+		list_insert_tail(list, nobject);
+	else
+		list_add_tail(list_d2l(list, nobject), list_d2l(list, object));
+}
+
+static inline void
+list_remove(list_t *list, void *object)
+{
+	list_del(list_d2l(list, object));
+}
+
+static inline void *
+list_remove_head(list_t *list)
+{
+	list_node_t *head = list->list_head.next;
+	if (head == &list->list_head)
+		return (NULL);
+
+	list_del(head);
+	return (list_object(list, head));
+}
+
+static inline void *
+list_remove_tail(list_t *list)
+{
+	list_node_t *tail = list->list_head.prev;
+	if (tail == &list->list_head)
+		return (NULL);
+
+	list_del(tail);
+	return (list_object(list, tail));
+}
+
+static inline void *
+list_head(list_t *list)
+{
+	if (list_is_empty(list))
+		return (NULL);
+
+	return (list_object(list, list->list_head.next));
+}
+
+static inline void *
+list_tail(list_t *list)
+{
+	if (list_is_empty(list))
+		return (NULL);
+
+	return (list_object(list, list->list_head.prev));
+}
+
+static inline void *
+list_next(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->next != &list->list_head)
+		return (list_object(list, node->next));
+
+	return (NULL);
+}
+
+static inline void *
+list_prev(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->prev != &list->list_head)
+		return (list_object(list, node->prev));
+
+	return (NULL);
+}
+
+static inline int
+list_link_active(list_node_t *node)
+{
+	EQUIV(node->next == LIST_POISON1, node->prev == LIST_POISON2);
+	return (node->next != LIST_POISON1);
+}
+
+static inline void
+spl_list_move_tail(list_t *dst, list_t *src)
+{
+	list_splice_init(&src->list_head, dst->list_head.prev);
+}
+
+#define	list_move_tail(dst, src)	spl_list_move_tail(dst, src)
+
+static inline void
+list_link_replace(list_node_t *old_node, list_node_t *new_node)
+{
+	new_node->next = old_node->next;
+	new_node->prev = old_node->prev;
+	old_node->prev->next = new_node;
+	old_node->next->prev = new_node;
+	list_link_init(old_node);
+}
+
+#endif /* SPL_LIST_H */

diff --git a/zfs/include/os/linux/spl/sys/misc.h b/zfs/include/os/linux/spl/sys/misc.h
new file mode 100644
index 0000000..299fe9c
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/misc.h

@@ -0,0 +1,29 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _OS_LINUX_SPL_MISC_H
+#define	_OS_LINUX_SPL_MISC_H
+
+#include <linux/kobject.h>
+
+extern void spl_signal_kobj_evt(struct block_device *bdev);
+
+#endif

diff --git a/zfs/include/os/linux/spl/sys/mod_os.h b/zfs/include/os/linux/spl/sys/mod_os.h
new file mode 100644
index 0000000..bb43313
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/mod_os.h

@@ -0,0 +1,27 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _SPL_MOD_H
+#define	_SPL_MOD_H
+#include <linux/mod_compat.h>
+
+#endif /* SPL_MOD_H */

diff --git a/zfs/include/os/linux/spl/sys/mutex.h b/zfs/include/os/linux/spl/sys/mutex.h
new file mode 100644
index 0000000..047607f
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/mutex.h

@@ -0,0 +1,184 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_MUTEX_H
+#define	_SPL_MUTEX_H
+
+#include <sys/types.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/lockdep.h>
+#include <linux/compiler_compat.h>
+
+typedef enum {
+	MUTEX_DEFAULT	= 0,
+	MUTEX_SPIN	= 1,
+	MUTEX_ADAPTIVE	= 2,
+	MUTEX_NOLOCKDEP	= 3
+} kmutex_type_t;
+
+typedef struct {
+	struct mutex		m_mutex;
+	spinlock_t		m_lock;	/* used for serializing mutex_exit */
+	kthread_t		*m_owner;
+#ifdef CONFIG_LOCKDEP
+	kmutex_type_t		m_type;
+#endif /* CONFIG_LOCKDEP */
+} kmutex_t;
+
+#define	MUTEX(mp)		(&((mp)->m_mutex))
+
+static inline void
+spl_mutex_set_owner(kmutex_t *mp)
+{
+	mp->m_owner = current;
+}
+
+static inline void
+spl_mutex_clear_owner(kmutex_t *mp)
+{
+	mp->m_owner = NULL;
+}
+
+#define	mutex_owner(mp)		(READ_ONCE((mp)->m_owner))
+#define	mutex_owned(mp)		(mutex_owner(mp) == current)
+#define	MUTEX_HELD(mp)		mutex_owned(mp)
+#define	MUTEX_NOT_HELD(mp)	(!MUTEX_HELD(mp))
+
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_mutex_set_type(kmutex_t *mp, kmutex_type_t type)
+{
+	mp->m_type = type;
+}
+static inline void
+spl_mutex_lockdep_off_maybe(kmutex_t *mp)			\
+{								\
+	if (mp && mp->m_type == MUTEX_NOLOCKDEP)		\
+		lockdep_off();					\
+}
+static inline void
+spl_mutex_lockdep_on_maybe(kmutex_t *mp)			\
+{								\
+	if (mp && mp->m_type == MUTEX_NOLOCKDEP)		\
+		lockdep_on();					\
+}
+#else  /* CONFIG_LOCKDEP */
+#define	spl_mutex_set_type(mp, type)
+#define	spl_mutex_lockdep_off_maybe(mp)
+#define	spl_mutex_lockdep_on_maybe(mp)
+#endif /* CONFIG_LOCKDEP */
+
+/*
+ * The following functions must be a #define	and not static inline.
+ * This ensures that the native linux mutex functions (lock/unlock)
+ * will be correctly located in the users code which is important
+ * for the built in kernel lock analysis tools
+ */
+#undef mutex_init
+#define	mutex_init(mp, name, type, ibc)				\
+{								\
+	static struct lock_class_key __key;			\
+	ASSERT(type == MUTEX_DEFAULT || type == MUTEX_NOLOCKDEP); \
+								\
+	__mutex_init(MUTEX(mp), (name) ? (#name) : (#mp), &__key); \
+	spin_lock_init(&(mp)->m_lock);				\
+	spl_mutex_clear_owner(mp);				\
+	spl_mutex_set_type(mp, type);				\
+}
+
+#undef mutex_destroy
+#define	mutex_destroy(mp)					\
+{								\
+	VERIFY3P(mutex_owner(mp), ==, NULL);			\
+}
+
+/* BEGIN CSTYLED */
+#define	mutex_tryenter(mp)					\
+({								\
+	int _rc_;						\
+								\
+	spl_mutex_lockdep_off_maybe(mp);			\
+	if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1)		\
+		spl_mutex_set_owner(mp);			\
+	spl_mutex_lockdep_on_maybe(mp);				\
+								\
+	_rc_;							\
+})
+/* END CSTYLED */
+
+#define	NESTED_SINGLE 1
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define	mutex_enter_nested(mp, subclass)			\
+{								\
+	ASSERT3P(mutex_owner(mp), !=, current);			\
+	spl_mutex_lockdep_off_maybe(mp);			\
+	mutex_lock_nested(MUTEX(mp), (subclass));		\
+	spl_mutex_lockdep_on_maybe(mp);				\
+	spl_mutex_set_owner(mp);				\
+}
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+#define	mutex_enter_nested(mp, subclass)			\
+{								\
+	ASSERT3P(mutex_owner(mp), !=, current);			\
+	spl_mutex_lockdep_off_maybe(mp);			\
+	mutex_lock(MUTEX(mp));					\
+	spl_mutex_lockdep_on_maybe(mp);				\
+	spl_mutex_set_owner(mp);				\
+}
+#endif /*  CONFIG_DEBUG_LOCK_ALLOC */
+
+#define	mutex_enter(mp) mutex_enter_nested((mp), 0)
+
+/*
+ * The reason for the spinlock:
+ *
+ * The Linux mutex is designed with a fast-path/slow-path design such that it
+ * does not guarantee serialization upon itself, allowing a race where latter
+ * acquirers finish mutex_unlock before former ones.
+ *
+ * The race renders it unsafe to be used for serializing the freeing of an
+ * object in which the mutex is embedded, where the latter acquirer could go
+ * on to free the object while the former one is still doing mutex_unlock and
+ * causing memory corruption.
+ *
+ * However, there are many places in ZFS where the mutex is used for
+ * serializing object freeing, and the code is shared among other OSes without
+ * this issue. Thus, we need the spinlock to force the serialization on
+ * mutex_exit().
+ *
+ * See http://lwn.net/Articles/575477/ for the information about the race.
+ */
+#define	mutex_exit(mp)						\
+{								\
+	spl_mutex_clear_owner(mp);				\
+	spin_lock(&(mp)->m_lock);				\
+	spl_mutex_lockdep_off_maybe(mp);			\
+	mutex_unlock(MUTEX(mp));				\
+	spl_mutex_lockdep_on_maybe(mp);				\
+	spin_unlock(&(mp)->m_lock);				\
+	/* NOTE: do not dereference mp after this point */	\
+}
+
+#endif /* _SPL_MUTEX_H */

diff --git a/zfs/include/os/linux/spl/sys/param.h b/zfs/include/os/linux/spl/sys/param.h
new file mode 100644
index 0000000..d8a12d5
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/param.h

@@ -0,0 +1,35 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_PARAM_H
+#define	_SPL_PARAM_H
+
+#include <asm/page.h>
+
+/* Pages to bytes and back */
+#define	ptob(pages)			((pages) << PAGE_SHIFT)
+#define	btop(bytes)			((bytes) >> PAGE_SHIFT)
+
+#define	MAXUID				UINT32_MAX
+
+#endif /* SPL_PARAM_H */

diff --git a/zfs/include/os/linux/spl/sys/proc.h b/zfs/include/os/linux/spl/sys/proc.h
new file mode 100644
index 0000000..fe48414
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/proc.h

@@ -0,0 +1,41 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_PROC_H
+#define	_SPL_PROC_H
+
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+
+extern struct proc_dir_entry *proc_spl_kstat;
+
+int spl_proc_init(void);
+void spl_proc_fini(void);
+
+static inline boolean_t
+zfs_proc_is_caller(struct task_struct *t)
+{
+	return (t->group_leader == current->group_leader);
+}
+
+#endif /* SPL_PROC_H */

diff --git a/zfs/include/os/linux/spl/sys/processor.h b/zfs/include/os/linux/spl/sys/processor.h
new file mode 100644
index 0000000..5514f07
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/processor.h

@@ -0,0 +1,31 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef	_SPL_PROCESSOR_H
+#define	_SPL_PROCESSOR_H
+
+#define	getcpuid() smp_processor_id()
+
+typedef int	processorid_t;
+
+#endif /* _SPL_PROCESSOR_H */

diff --git a/zfs/include/os/linux/spl/sys/procfs_list.h b/zfs/include/os/linux/spl/sys/procfs_list.h
new file mode 100644
index 0000000..9bb437f
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/procfs_list.h

@@ -0,0 +1,73 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#ifndef	_SPL_PROCFS_LIST_H
+#define	_SPL_PROCFS_LIST_H
+
+#include <sys/kstat.h>
+#include <sys/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+typedef struct procfs_list procfs_list_t;
+struct procfs_list {
+	/* Accessed only by user of a procfs_list */
+	void		*pl_private;
+
+	/*
+	 * Accessed both by user of a procfs_list and by procfs_list
+	 * implementation
+	 */
+	kmutex_t	pl_lock;
+	list_t		pl_list;
+
+	/* Accessed only by procfs_list implementation */
+	uint64_t	pl_next_id;
+	int		(*pl_show)(struct seq_file *f, void *p);
+	int		(*pl_show_header)(struct seq_file *f);
+	int		(*pl_clear)(procfs_list_t *procfs_list);
+	size_t		pl_node_offset;
+	kstat_proc_entry_t	pl_kstat_entry;
+};
+
+typedef struct procfs_list_node {
+	list_node_t	pln_link;
+	uint64_t	pln_id;
+} procfs_list_node_t;
+
+void procfs_list_install(const char *module,
+    const char *submodule,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off);
+void procfs_list_uninstall(procfs_list_t *procfs_list);
+void procfs_list_destroy(procfs_list_t *procfs_list);
+
+void procfs_list_add(procfs_list_t *procfs_list, void *p);
+
+#endif	/* _SPL_PROCFS_LIST_H */

diff --git a/zfs/include/os/linux/spl/sys/random.h b/zfs/include/os/linux/spl/sys/random.h
new file mode 100644
index 0000000..52e97e1
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/random.h

@@ -0,0 +1,54 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_RANDOM_H
+#define	_SPL_RANDOM_H
+
+#include <linux/module.h>
+#include <linux/random.h>
+
+static __inline__ int
+random_get_bytes(uint8_t *ptr, size_t len)
+{
+	get_random_bytes((void *)ptr, (int)len);
+	return (0);
+}
+
+extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
+
+static __inline__ uint32_t
+random_in_range(uint32_t range)
+{
+	uint32_t r;
+
+	ASSERT(range != 0);
+
+	if (range == 1)
+		return (0);
+
+	(void) random_get_pseudo_bytes((uint8_t *)&r, sizeof (r));
+
+	return (r % range);
+}
+
+#endif	/* _SPL_RANDOM_H */

diff --git a/zfs/include/os/linux/spl/sys/rwlock.h b/zfs/include/os/linux/spl/sys/rwlock.h
new file mode 100644
index 0000000..ba7620a
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/rwlock.h

@@ -0,0 +1,200 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_RWLOCK_H
+#define	_SPL_RWLOCK_H
+
+#include <sys/types.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+
+typedef enum {
+	RW_DRIVER	= 2,
+	RW_DEFAULT	= 4,
+	RW_NOLOCKDEP	= 5
+} krw_type_t;
+
+typedef enum {
+	RW_NONE		= 0,
+	RW_WRITER	= 1,
+	RW_READER	= 2
+} krw_t;
+
+typedef struct {
+	struct rw_semaphore rw_rwlock;
+	kthread_t *rw_owner;
+#ifdef CONFIG_LOCKDEP
+	krw_type_t	rw_type;
+#endif /* CONFIG_LOCKDEP */
+} krwlock_t;
+
+#define	SEM(rwp)	(&(rwp)->rw_rwlock)
+
+static inline void
+spl_rw_set_owner(krwlock_t *rwp)
+{
+	rwp->rw_owner = current;
+}
+
+static inline void
+spl_rw_clear_owner(krwlock_t *rwp)
+{
+	rwp->rw_owner = NULL;
+}
+
+static inline kthread_t *
+rw_owner(krwlock_t *rwp)
+{
+	return (rwp->rw_owner);
+}
+
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
+{
+	rwp->rw_type = type;
+}
+static inline void
+spl_rw_lockdep_off_maybe(krwlock_t *rwp)		\
+{							\
+	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
+		lockdep_off();				\
+}
+static inline void
+spl_rw_lockdep_on_maybe(krwlock_t *rwp)			\
+{							\
+	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
+		lockdep_on();				\
+}
+#else  /* CONFIG_LOCKDEP */
+#define	spl_rw_set_type(rwp, type)
+#define	spl_rw_lockdep_off_maybe(rwp)
+#define	spl_rw_lockdep_on_maybe(rwp)
+#endif /* CONFIG_LOCKDEP */
+
+static inline int
+RW_LOCK_HELD(krwlock_t *rwp)
+{
+	return (rwsem_is_locked(SEM(rwp)));
+}
+
+static inline int
+RW_WRITE_HELD(krwlock_t *rwp)
+{
+	return (rw_owner(rwp) == current);
+}
+
+static inline int
+RW_READ_HELD(krwlock_t *rwp)
+{
+	return (RW_LOCK_HELD(rwp) && rw_owner(rwp) == NULL);
+}
+
+/*
+ * The following functions must be a #define and not static inline.
+ * This ensures that the native linux semaphore functions (down/up)
+ * will be correctly located in the users code which is important
+ * for the built in kernel lock analysis tools
+ */
+/* BEGIN CSTYLED */
+#define	rw_init(rwp, name, type, arg)					\
+({									\
+	static struct lock_class_key __key;				\
+	ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP);		\
+									\
+	__init_rwsem(SEM(rwp), #rwp, &__key);				\
+	spl_rw_clear_owner(rwp);					\
+	spl_rw_set_type(rwp, type);					\
+})
+
+/*
+ * The Linux rwsem implementation does not require a matching destroy.
+ */
+#define	rw_destroy(rwp)		((void) 0)
+
+/*
+ * Upgrading a rwsem from a reader to a writer is not supported by the
+ * Linux kernel.  The lock must be dropped and reacquired as a writer.
+ */
+#define	rw_tryupgrade(rwp)	RW_WRITE_HELD(rwp)
+
+#define	rw_tryenter(rwp, rw)						\
+({									\
+	int _rc_ = 0;							\
+									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	switch (rw) {							\
+	case RW_READER:							\
+		_rc_ = down_read_trylock(SEM(rwp));			\
+		break;							\
+	case RW_WRITER:							\
+		if ((_rc_ = down_write_trylock(SEM(rwp))))		\
+			spl_rw_set_owner(rwp);				\
+		break;							\
+	default:							\
+		VERIFY(0);						\
+	}								\
+	spl_rw_lockdep_on_maybe(rwp);					\
+	_rc_;								\
+})
+
+#define	rw_enter(rwp, rw)						\
+({									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	switch (rw) {							\
+	case RW_READER:							\
+		down_read(SEM(rwp));					\
+		break;							\
+	case RW_WRITER:							\
+		down_write(SEM(rwp));					\
+		spl_rw_set_owner(rwp);					\
+		break;							\
+	default:							\
+		VERIFY(0);						\
+	}								\
+	spl_rw_lockdep_on_maybe(rwp);					\
+})
+
+#define	rw_exit(rwp)							\
+({									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	if (RW_WRITE_HELD(rwp)) {					\
+		spl_rw_clear_owner(rwp);				\
+		up_write(SEM(rwp));					\
+	} else {							\
+		ASSERT(RW_READ_HELD(rwp));				\
+		up_read(SEM(rwp));					\
+	}								\
+	spl_rw_lockdep_on_maybe(rwp);					\
+})
+
+#define	rw_downgrade(rwp)						\
+({									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	spl_rw_clear_owner(rwp);					\
+	downgrade_write(SEM(rwp));					\
+	spl_rw_lockdep_on_maybe(rwp);					\
+})
+/* END CSTYLED */
+
+#endif /* _SPL_RWLOCK_H */

diff --git a/zfs/include/os/linux/spl/sys/shrinker.h b/zfs/include/os/linux/spl/sys/shrinker.h
new file mode 100644
index 0000000..d472754
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/shrinker.h

@@ -0,0 +1,113 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SHRINKER_H
+#define	_SPL_SHRINKER_H
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+
+/*
+ * Due to frequent changes in the shrinker API the following
+ * compatibility wrappers should be used.  They are as follows:
+ *
+ *   SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost);
+ *
+ * SPL_SHRINKER_DECLARE is used to declare a shrinker with the name varname,
+ * which is passed to spl_register_shrinker()/spl_unregister_shrinker().
+ * The countfunc returns the number of free-able objects.
+ * The scanfunc returns the number of objects that were freed.
+ * The callbacks can return SHRINK_STOP if further calls can't make any more
+ * progress.  Note that a return value of SHRINK_EMPTY is currently not
+ * supported.
+ *
+ * Example:
+ *
+ * static unsigned long
+ * my_count(struct shrinker *shrink, struct shrink_control *sc)
+ * {
+ *	...calculate number of objects in the cache...
+ *
+ *	return (number of objects in the cache);
+ * }
+ *
+ * static unsigned long
+ * my_scan(struct shrinker *shrink, struct shrink_control *sc)
+ * {
+ *	...scan objects in the cache and reclaim them...
+ * }
+ *
+ * SPL_SHRINKER_DECLARE(my_shrinker, my_count, my_scan, DEFAULT_SEEKS);
+ *
+ * void my_init_func(void) {
+ *	spl_register_shrinker(&my_shrinker);
+ * }
+ */
+
+#ifdef HAVE_REGISTER_SHRINKER_VARARG
+#define	spl_register_shrinker(x)	register_shrinker(x, "zfs-arc-shrinker")
+#else
+#define	spl_register_shrinker(x)	register_shrinker(x)
+#endif
+#define	spl_unregister_shrinker(x)	unregister_shrinker(x)
+
+/*
+ * Linux 3.0 to 3.11 Shrinker API Compatibility.
+ */
+#if defined(HAVE_SINGLE_SHRINKER_CALLBACK)
+#define	SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost)	\
+static int								\
+__ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\
+{									\
+	if (sc->nr_to_scan != 0) {					\
+		(void) scanfunc(shrink, sc);				\
+	}								\
+	return (countfunc(shrink, sc));					\
+}									\
+									\
+static struct shrinker varname = {					\
+	.shrink = __ ## varname ## _wrapper,				\
+	.seeks = seek_cost,						\
+}
+
+#define	SHRINK_STOP	(-1)
+
+/*
+ * Linux 3.12 and later Shrinker API Compatibility.
+ */
+#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
+#define	SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost)	\
+static struct shrinker varname = {					\
+	.count_objects = countfunc,					\
+	.scan_objects = scanfunc,					\
+	.seeks = seek_cost,						\
+}
+
+#else
+/*
+ * Linux 2.x to 2.6.22, or a newer shrinker API has been introduced.
+ */
+#error "Unknown shrinker callback"
+#endif
+
+#endif /* SPL_SHRINKER_H */

diff --git a/zfs/include/os/linux/spl/sys/sid.h b/zfs/include/os/linux/spl/sys/sid.h
new file mode 100644
index 0000000..3cf2711
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/sid.h

@@ -0,0 +1,60 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SID_H
+#define	_SPL_SID_H
+
+typedef struct ksiddomain {
+	char		*kd_name;
+} ksiddomain_t;
+
+typedef enum ksid_index {
+	KSID_USER,
+	KSID_GROUP,
+	KSID_OWNER,
+	KSID_COUNT
+} ksid_index_t;
+
+typedef int ksid_t;
+
+static inline ksiddomain_t *
+ksid_lookupdomain(const char *dom)
+{
+	ksiddomain_t *kd;
+	int len = strlen(dom);
+
+	kd = kmem_zalloc(sizeof (ksiddomain_t), KM_SLEEP);
+	kd->kd_name = kmem_zalloc(len + 1, KM_SLEEP);
+	memcpy(kd->kd_name, dom, len);
+
+	return (kd);
+}
+
+static inline void
+ksiddomain_rele(ksiddomain_t *ksid)
+{
+	kmem_free(ksid->kd_name, strlen(ksid->kd_name) + 1);
+	kmem_free(ksid, sizeof (ksiddomain_t));
+}
+
+#endif /* _SPL_SID_H */

diff --git a/zfs/include/os/linux/spl/sys/signal.h b/zfs/include/os/linux/spl/sys/signal.h
new file mode 100644
index 0000000..6b538c8
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/signal.h

@@ -0,0 +1,38 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SIGNAL_H
+#define	_SPL_SIGNAL_H
+
+#include <linux/sched.h>
+
+#ifdef HAVE_SCHED_SIGNAL_HEADER
+#include <linux/sched/signal.h>
+#endif
+
+#define	FORREAL		0	/* Usual side-effects */
+#define	JUSTLOOKING	1	/* Don't stop the process */
+
+extern int issig(int why);
+
+#endif /* SPL_SIGNAL_H */

diff --git a/zfs/include/os/linux/spl/sys/simd.h b/zfs/include/os/linux/spl/sys/simd.h
new file mode 100644
index 0000000..6fb84d3
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/simd.h

@@ -0,0 +1,30 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SYS_SIMD_H
+#define	_SPL_SYS_SIMD_H
+
+#include <sys/isa_defs.h>
+#include <linux/simd.h>
+
+#endif /* _SPL_SYS_SIMD_H */

diff --git a/zfs/include/os/linux/spl/sys/stat.h b/zfs/include/os/linux/spl/sys/stat.h
new file mode 100644
index 0000000..5987849
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/stat.h

@@ -0,0 +1,29 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_STAT_H
+#define	_SPL_STAT_H
+
+#include <linux/stat.h>
+
+#endif /* SPL_STAT_H */

diff --git a/zfs/include/os/linux/spl/sys/strings.h b/zfs/include/os/linux/spl/sys/strings.h
new file mode 100644
index 0000000..48e417d
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/strings.h

@@ -0,0 +1,30 @@
+/*
+ *  Copyright (C) 2018 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _SPL_SYS_STRINGS_H
+#define	_SPL_SYS_STRINGS_H
+
+#include <linux/string.h>
+
+#define	bzero(ptr, size)		memset(ptr, 0, size)
+#define	bcopy(src, dest, size)		memmove(dest, src, size)
+#define	bcmp(src, dest, size)		memcmp((src), (dest), (size_t)(size))
+
+#endif	/* _SPL_SYS_STRINGS_H */

diff --git a/zfs/include/os/linux/spl/sys/sunddi.h b/zfs/include/os/linux/spl/sys/sunddi.h
new file mode 100644
index 0000000..8524ec9
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/sunddi.h

@@ -0,0 +1,57 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SUNDDI_H
+#define	_SPL_SUNDDI_H
+
+#include <sys/cred.h>
+#include <sys/uio.h>
+#include <sys/mutex.h>
+#include <sys/u8_textprep.h>
+#include <sys/vnode.h>
+
+typedef int ddi_devid_t;
+
+#define	DDI_DEV_T_NONE				((dev_t)-1)
+#define	DDI_DEV_T_ANY				((dev_t)-2)
+#define	DI_MAJOR_T_UNKNOWN			((major_t)0)
+
+#define	DDI_PROP_DONTPASS			0x0001
+#define	DDI_PROP_CANSLEEP			0x0002
+
+#define	DDI_SUCCESS				0
+#define	DDI_FAILURE				-1
+
+#define	ddi_prop_lookup_string(x1, x2, x3, x4, x5)	(*x5 = NULL)
+#define	ddi_prop_free(x)				(void)0
+#define	ddi_root_node()					(void)0
+
+extern int ddi_strtoul(const char *, char **, int, unsigned long *);
+extern int ddi_strtol(const char *, char **, int, long *);
+extern int ddi_strtoull(const char *, char **, int, unsigned long long *);
+extern int ddi_strtoll(const char *, char **, int, long long *);
+
+extern int ddi_copyin(const void *from, void *to, size_t len, int flags);
+extern int ddi_copyout(const void *from, void *to, size_t len, int flags);
+
+#endif /* SPL_SUNDDI_H */

diff --git a/zfs/include/os/linux/spl/sys/sysmacros.h b/zfs/include/os/linux/spl/sys/sysmacros.h
new file mode 100644
index 0000000..98d1ab1
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/sysmacros.h

@@ -0,0 +1,206 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SYSMACROS_H
+#define	_SPL_SYSMACROS_H
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/cpumask.h>
+#include <sys/debug.h>
+#include <sys/zone.h>
+#include <sys/signal.h>
+#include <asm/page.h>
+
+
+#ifndef _KERNEL
+#define	_KERNEL				__KERNEL__
+#endif
+
+#define	FALSE				0
+#define	TRUE				1
+
+#define	INT8_MAX			(127)
+#define	INT8_MIN			(-128)
+#define	UINT8_MAX			(255)
+#define	UINT8_MIN			(0)
+
+#define	INT16_MAX			(32767)
+#define	INT16_MIN			(-32768)
+#define	UINT16_MAX			(65535)
+#define	UINT16_MIN			(0)
+
+#define	INT32_MAX			INT_MAX
+#define	INT32_MIN			INT_MIN
+#define	UINT32_MAX			UINT_MAX
+#define	UINT32_MIN			UINT_MIN
+
+#define	INT64_MAX			LLONG_MAX
+#define	INT64_MIN			LLONG_MIN
+#define	UINT64_MAX			ULLONG_MAX
+#define	UINT64_MIN			ULLONG_MIN
+
+#define	NBBY				8
+
+#define	MAXMSGLEN			256
+#define	MAXNAMELEN			256
+#define	MAXPATHLEN			4096
+#define	MAXOFFSET_T			LLONG_MAX
+#define	MAXBSIZE			8192
+#define	DEV_BSIZE			512
+#define	DEV_BSHIFT			9 /* log2(DEV_BSIZE) */
+
+#define	proc_pageout			NULL
+#define	curproc				current
+#define	max_ncpus			num_possible_cpus()
+#define	boot_ncpus			num_online_cpus()
+#define	CPU_SEQID			smp_processor_id()
+#define	CPU_SEQID_UNSTABLE		raw_smp_processor_id()
+#define	is_system_labeled()		0
+
+#ifndef RLIM64_INFINITY
+#define	RLIM64_INFINITY			(~0ULL)
+#endif
+
+/*
+ * 0..MAX_PRIO-1:		Process priority
+ * 0..MAX_RT_PRIO-1:		RT priority tasks
+ * MAX_RT_PRIO..MAX_PRIO-1:	SCHED_NORMAL tasks
+ *
+ * Treat shim tasks as SCHED_NORMAL tasks
+ */
+#define	minclsyspri			(MAX_PRIO-1)
+#define	maxclsyspri			(MAX_RT_PRIO)
+#define	defclsyspri			(DEFAULT_PRIO)
+
+#ifndef NICE_TO_PRIO
+#define	NICE_TO_PRIO(nice)		(MAX_RT_PRIO + (nice) + 20)
+#endif
+#ifndef PRIO_TO_NICE
+#define	PRIO_TO_NICE(prio)		((prio) - MAX_RT_PRIO - 20)
+#endif
+
+/*
+ * Missing macros
+ */
+#ifndef PAGESIZE
+#define	PAGESIZE			PAGE_SIZE
+#endif
+
+#ifndef PAGESHIFT
+#define	PAGESHIFT			PAGE_SHIFT
+#endif
+
+/* Missing globals */
+extern char spl_gitrev[64];
+extern unsigned long spl_hostid;
+
+/* Missing misc functions */
+extern uint32_t zone_get_hostid(void *zone);
+extern void spl_setup(void);
+extern void spl_cleanup(void);
+
+#define	highbit(x)		__fls(x)
+#define	lowbit(x)		__ffs(x)
+
+#define	highbit64(x)		fls64(x)
+#define	makedevice(maj, min)	makedev(maj, min)
+
+/* common macros */
+#ifndef MIN
+#define	MIN(a, b)		((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define	MAX(a, b)		((a) < (b) ? (b) : (a))
+#endif
+#ifndef ABS
+#define	ABS(a)			((a) < 0 ? -(a) : (a))
+#endif
+#ifndef DIV_ROUND_UP
+#define	DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#endif
+#ifndef roundup
+#define	roundup(x, y)		((((x) + ((y) - 1)) / (y)) * (y))
+#endif
+#ifndef howmany
+#define	howmany(x, y)		(((x) + ((y) - 1)) / (y))
+#endif
+
+/*
+ * Compatibility macros/typedefs needed for Solaris -> Linux port
+ */
+#define	P2ALIGN(x, align)	((x) & -(align))
+#define	P2CROSS(x, y, align)	(((x) ^ (y)) > (align) - 1)
+#define	P2ROUNDUP(x, align)	((((x) - 1) | ((align) - 1)) + 1)
+#define	P2PHASE(x, align)	((x) & ((align) - 1))
+#define	P2NPHASE(x, align)	(-(x) & ((align) - 1))
+#define	ISP2(x)			(((x) & ((x) - 1)) == 0)
+#define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
+#define	P2BOUNDARY(off, len, align) \
+				(((off) ^ ((off) + (len) - 1)) > (align) - 1)
+
+/*
+ * Typed version of the P2* macros.  These macros should be used to ensure
+ * that the result is correctly calculated based on the data type of (x),
+ * which is passed in as the last argument, regardless of the data
+ * type of the alignment.  For example, if (x) is of type uint64_t,
+ * and we want to round it up to a page boundary using "PAGESIZE" as
+ * the alignment, we can do either
+ *
+ * P2ROUNDUP(x, (uint64_t)PAGESIZE)
+ * or
+ * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
+ */
+#define	P2ALIGN_TYPED(x, align, type)   \
+	((type)(x) & -(type)(align))
+#define	P2PHASE_TYPED(x, align, type)   \
+	((type)(x) & ((type)(align) - 1))
+#define	P2NPHASE_TYPED(x, align, type)  \
+	(-(type)(x) & ((type)(align) - 1))
+#define	P2ROUNDUP_TYPED(x, align, type) \
+	((((type)(x) - 1) | ((type)(align) - 1)) + 1)
+#define	P2END_TYPED(x, align, type)     \
+	(-(~(type)(x) & -(type)(align)))
+#define	P2PHASEUP_TYPED(x, align, phase, type)  \
+	((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
+#define	P2CROSS_TYPED(x, y, align, type)	\
+	(((type)(x) ^ (type)(y)) > (type)(align) - 1)
+#define	P2SAMEHIGHBIT_TYPED(x, y, type) \
+	(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
+
+#define	SET_ERROR(err) \
+	(__set_error(__FILE__, __func__, __LINE__, err), err)
+
+#include <linux/sort.h>
+#define	qsort(base, num, size, cmp)		\
+	sort(base, num, size, cmp, NULL)
+
+#if !defined(_KMEMUSER) && !defined(offsetof)
+
+/* avoid any possibility of clashing with <stddef.h> version */
+
+#define	offsetof(s, m)  ((size_t)(&(((s *)0)->m)))
+#endif
+
+#endif  /* _SPL_SYSMACROS_H */

diff --git a/zfs/include/os/linux/spl/sys/systeminfo.h b/zfs/include/os/linux/spl/sys/systeminfo.h
new file mode 100644
index 0000000..d4037a0
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/systeminfo.h

@@ -0,0 +1,35 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_SYSTEMINFO_H
+#define	_SPL_SYSTEMINFO_H
+
+#define	HW_HOSTID_LEN		11		/* minimum buffer size needed */
+						/* to hold a decimal or hex */
+						/* hostid string */
+
+/* Supplemental definitions for Linux. */
+#define	HW_HOSTID_PATH		"/etc/hostid"   /* binary configuration file */
+#define	HW_HOSTID_MASK		0xFFFFFFFF 	/* significant hostid bits */
+
+#endif /* SPL_SYSTEMINFO_H */

diff --git a/zfs/include/os/linux/spl/sys/taskq.h b/zfs/include/os/linux/spl/sys/taskq.h
new file mode 100644
index 0000000..b50175a
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/taskq.h

@@ -0,0 +1,168 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_TASKQ_H
+#define	_SPL_TASKQ_H
+
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <sys/types.h>
+#include <sys/thread.h>
+#include <sys/rwlock.h>
+#include <sys/wait.h>
+
+#define	TASKQ_NAMELEN		31
+
+#define	TASKQ_PREPOPULATE	0x00000001
+#define	TASKQ_CPR_SAFE		0x00000002
+#define	TASKQ_DYNAMIC		0x00000004
+#define	TASKQ_THREADS_CPU_PCT	0x00000008
+#define	TASKQ_DC_BATCH		0x00000010
+#define	TASKQ_ACTIVE		0x80000000
+
+/*
+ * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
+ * KM_SLEEP/KM_NOSLEEP.  TQ_NOQUEUE/TQ_NOALLOC are set particularly
+ * large so as not to conflict with already used GFP_* defines.
+ */
+#define	TQ_SLEEP		0x00000000
+#define	TQ_NOSLEEP		0x00000001
+#define	TQ_PUSHPAGE		0x00000002
+#define	TQ_NOQUEUE		0x01000000
+#define	TQ_NOALLOC		0x02000000
+#define	TQ_NEW			0x04000000
+#define	TQ_FRONT		0x08000000
+
+/*
+ * Reserved taskqid values.
+ */
+#define	TASKQID_INVALID		((taskqid_t)0)
+#define	TASKQID_INITIAL		((taskqid_t)1)
+
+/*
+ * spin_lock(lock) and spin_lock_nested(lock,0) are equivalent,
+ * so TQ_LOCK_DYNAMIC must not evaluate to 0
+ */
+typedef enum tq_lock_role {
+	TQ_LOCK_GENERAL =	0,
+	TQ_LOCK_DYNAMIC =	1,
+} tq_lock_role_t;
+
+typedef unsigned long taskqid_t;
+typedef void (task_func_t)(void *);
+
+typedef struct taskq {
+	spinlock_t		tq_lock;	/* protects taskq_t */
+	char			*tq_name;	/* taskq name */
+	int			tq_instance;	/* instance of tq_name */
+	struct list_head	tq_thread_list;	/* list of all threads */
+	struct list_head	tq_active_list;	/* list of active threads */
+	int			tq_nactive;	/* # of active threads */
+	int			tq_nthreads;	/* # of existing threads */
+	int			tq_nspawn;	/* # of threads being spawned */
+	int			tq_maxthreads;	/* # of threads maximum */
+	/* If PERCPU flag is set, percent of NCPUs to have as threads */
+	int			tq_cpu_pct;
+	int			tq_pri;		/* priority */
+	int			tq_minalloc;	/* min taskq_ent_t pool size */
+	int			tq_maxalloc;	/* max taskq_ent_t pool size */
+	int			tq_nalloc;	/* cur taskq_ent_t pool size */
+	uint_t			tq_flags;	/* flags */
+	taskqid_t		tq_next_id;	/* next pend/work id */
+	taskqid_t		tq_lowest_id;	/* lowest pend/work id */
+	struct list_head	tq_free_list;	/* free taskq_ent_t's */
+	struct list_head	tq_pend_list;	/* pending taskq_ent_t's */
+	struct list_head	tq_prio_list;	/* priority taskq_ent_t's */
+	struct list_head	tq_delay_list;	/* delayed taskq_ent_t's */
+	struct list_head	tq_taskqs;	/* all taskq_t's */
+	spl_wait_queue_head_t	tq_work_waitq;	/* new work waitq */
+	spl_wait_queue_head_t	tq_wait_waitq;	/* wait waitq */
+	tq_lock_role_t		tq_lock_class;	/* class when taking tq_lock */
+	/* list node for the cpu hotplug callback */
+	struct hlist_node	tq_hp_cb_node;
+	boolean_t		tq_hp_support;
+} taskq_t;
+
+typedef struct taskq_ent {
+	spinlock_t		tqent_lock;
+	spl_wait_queue_head_t	tqent_waitq;
+	struct timer_list	tqent_timer;
+	struct list_head	tqent_list;
+	taskqid_t		tqent_id;
+	task_func_t		*tqent_func;
+	void			*tqent_arg;
+	taskq_t			*tqent_taskq;
+	uintptr_t		tqent_flags;
+	unsigned long		tqent_birth;
+} taskq_ent_t;
+
+#define	TQENT_FLAG_PREALLOC	0x1
+#define	TQENT_FLAG_CANCEL	0x2
+
+typedef struct taskq_thread {
+	struct list_head	tqt_thread_list;
+	struct list_head	tqt_active_list;
+	struct task_struct	*tqt_thread;
+	taskq_t			*tqt_tq;
+	taskqid_t		tqt_id;
+	taskq_ent_t		*tqt_task;
+	uintptr_t		tqt_flags;
+} taskq_thread_t;
+
+/* Global system-wide dynamic task queue available for all consumers */
+extern taskq_t *system_taskq;
+/* Global dynamic task queue for long delay */
+extern taskq_t *system_delay_taskq;
+
+/* List of all taskqs */
+extern struct list_head tq_list;
+extern struct rw_semaphore tq_list_sem;
+
+extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
+extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
+    uint_t, clock_t);
+extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
+    taskq_ent_t *);
+extern int taskq_empty_ent(taskq_ent_t *);
+extern void taskq_init_ent(taskq_ent_t *);
+extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
+extern void taskq_destroy(taskq_t *);
+extern void taskq_wait_id(taskq_t *, taskqid_t);
+extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
+extern void taskq_wait(taskq_t *);
+extern int taskq_cancel_id(taskq_t *, taskqid_t);
+extern int taskq_member(taskq_t *, kthread_t *);
+extern taskq_t *taskq_of_curthread(void);
+
+#define	taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
+    taskq_create(name, nthreads, pri, min, max, flags)
+#define	taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
+    taskq_create(name, nthreads, maxclsyspri, min, max, flags)
+
+int spl_taskq_init(void);
+void spl_taskq_fini(void);
+
+#endif  /* _SPL_TASKQ_H */

diff --git a/zfs/include/os/linux/spl/sys/thread.h b/zfs/include/os/linux/spl/sys/thread.h
new file mode 100644
index 0000000..2207423
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/thread.h

@@ -0,0 +1,86 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_THREAD_H
+#define	_SPL_THREAD_H
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/tsd.h>
+
+/*
+ * Thread interfaces
+ */
+#define	TP_MAGIC			0x53535353
+
+#define	TS_SLEEP			TASK_INTERRUPTIBLE
+#define	TS_RUN				TASK_RUNNING
+#define	TS_ZOMB				EXIT_ZOMBIE
+#define	TS_STOPPED			TASK_STOPPED
+
+typedef void (*thread_func_t)(void *);
+
+#define	thread_create_named(name, stk, stksize, func, arg, len,	\
+    pp, state, pri)	\
+	__thread_create(stk, stksize, (thread_func_t)func,		\
+	name, arg, len, pp, state, pri)
+
+/* BEGIN CSTYLED */
+#define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
+	__thread_create(stk, stksize, (thread_func_t)func,		\
+	#func, arg, len, pp, state, pri)
+/* END CSTYLED */
+
+#define	thread_exit()			__thread_exit()
+#define	thread_join(t)			VERIFY(0)
+#define	curthread			current
+#define	getcomm()			current->comm
+#define	getpid()			current->pid
+
+extern kthread_t *__thread_create(caddr_t stk, size_t  stksize,
+    thread_func_t func, const char *name, void *args, size_t len, proc_t *pp,
+    int state, pri_t pri);
+extern void __thread_exit(void);
+extern struct task_struct *spl_kthread_create(int (*func)(void *),
+    void *data, const char namefmt[], ...);
+
+extern proc_t p0;
+
+#ifdef HAVE_SIGINFO
+typedef kernel_siginfo_t spl_kernel_siginfo_t;
+#else
+typedef siginfo_t spl_kernel_siginfo_t;
+#endif
+
+#ifdef HAVE_SET_SPECIAL_STATE
+#define	spl_set_special_state(x) set_special_state((x))
+#else
+#define	spl_set_special_state(x) __set_current_state((x))
+#endif
+
+
+#endif  /* _SPL_THREAD_H */

diff --git a/zfs/include/os/linux/spl/sys/time.h b/zfs/include/os/linux/spl/sys/time.h
new file mode 100644
index 0000000..fec85f8
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/time.h

@@ -0,0 +1,117 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_TIME_H
+#define	_SPL_TIME_H
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <sys/types.h>
+#include <sys/timer.h>
+
+#if defined(CONFIG_64BIT)
+#define	TIME_MAX			INT64_MAX
+#define	TIME_MIN			INT64_MIN
+#else
+#define	TIME_MAX			INT32_MAX
+#define	TIME_MIN			INT32_MIN
+#endif
+
+#define	SEC				1
+#define	MILLISEC			1000
+#define	MICROSEC			1000000
+#define	NANOSEC				1000000000
+
+#define	MSEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MILLISEC))
+#define	NSEC2MSEC(n)	((n) / (NANOSEC / MILLISEC))
+
+#define	USEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MICROSEC))
+#define	NSEC2USEC(n)	((n) / (NANOSEC / MICROSEC))
+
+#define	NSEC2SEC(n)	((n) / (NANOSEC / SEC))
+#define	SEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / SEC))
+
+static const int hz = HZ;
+
+typedef longlong_t		hrtime_t;
+typedef struct timespec		timespec_t;
+
+#define	TIMESPEC_OVERFLOW(ts)		\
+	((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX)
+
+#if defined(HAVE_INODE_TIMESPEC64_TIMES)
+typedef struct timespec64	inode_timespec_t;
+#else
+typedef struct timespec		inode_timespec_t;
+#endif
+
+/* Include for Lustre compatibility */
+#define	timestruc_t	inode_timespec_t
+
+static inline void
+gethrestime(inode_timespec_t *ts)
+{
+#if defined(HAVE_INODE_TIMESPEC64_TIMES)
+
+#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64)
+	ktime_get_coarse_real_ts64(ts);
+#else
+	*ts = current_kernel_time64();
+#endif /* HAVE_KTIME_GET_COARSE_REAL_TS64 */
+
+#else
+	*ts = current_kernel_time();
+#endif
+}
+
+static inline uint64_t
+gethrestime_sec(void)
+{
+#if defined(HAVE_INODE_TIMESPEC64_TIMES)
+#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64)
+	inode_timespec_t ts;
+	ktime_get_coarse_real_ts64(&ts);
+#else
+	inode_timespec_t ts = current_kernel_time64();
+#endif  /* HAVE_KTIME_GET_COARSE_REAL_TS64 */
+
+#else
+	inode_timespec_t ts = current_kernel_time();
+#endif
+	return (ts.tv_sec);
+}
+
+static inline hrtime_t
+gethrtime(void)
+{
+#if defined(HAVE_KTIME_GET_RAW_TS64)
+	struct timespec64 ts;
+	ktime_get_raw_ts64(&ts);
+#else
+	struct timespec ts;
+	getrawmonotonic(&ts);
+#endif
+	return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec);
+}
+
+#endif  /* _SPL_TIME_H */

diff --git a/zfs/include/os/linux/spl/sys/timer.h b/zfs/include/os/linux/spl/sys/timer.h
new file mode 100644
index 0000000..02c3c78
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/timer.h

@@ -0,0 +1,85 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_TIMER_H
+#define	_SPL_TIMER_H
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+
+#define	lbolt				((clock_t)jiffies)
+#define	lbolt64				((int64_t)get_jiffies_64())
+
+#define	ddi_get_lbolt()			((clock_t)jiffies)
+#define	ddi_get_lbolt64()		((int64_t)get_jiffies_64())
+
+#define	ddi_time_before(a, b)		(typecheck(clock_t, a) && \
+					typecheck(clock_t, b) && \
+					((a) - (b) < 0))
+#define	ddi_time_after(a, b)		ddi_time_before(b, a)
+#define	ddi_time_before_eq(a, b)	(!ddi_time_after(a, b))
+#define	ddi_time_after_eq(a, b)		ddi_time_before_eq(b, a)
+
+#define	ddi_time_before64(a, b)		(typecheck(int64_t, a) && \
+					typecheck(int64_t, b) && \
+					((a) - (b) < 0))
+#define	ddi_time_after64(a, b)		ddi_time_before64(b, a)
+#define	ddi_time_before_eq64(a, b)	(!ddi_time_after64(a, b))
+#define	ddi_time_after_eq64(a, b)	ddi_time_before_eq64(b, a)
+
+#define	delay(ticks)			schedule_timeout_uninterruptible(ticks)
+
+#define	SEC_TO_TICK(sec)		((sec) * HZ)
+#define	MSEC_TO_TICK(ms)		msecs_to_jiffies(ms)
+#define	USEC_TO_TICK(us)		usecs_to_jiffies(us)
+#define	NSEC_TO_TICK(ns)		usecs_to_jiffies(ns / NSEC_PER_USEC)
+
+#ifndef from_timer
+#define	from_timer(var, timer, timer_field) \
+	container_of(timer, typeof(*var), timer_field)
+#endif
+
+#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST
+typedef struct timer_list *spl_timer_list_t;
+#else
+typedef unsigned long spl_timer_list_t;
+#endif
+
+#ifndef HAVE_KERNEL_TIMER_SETUP
+
+static inline void
+timer_setup(struct timer_list *timer, void (*func)(spl_timer_list_t), u32 fl)
+{
+#ifdef HAVE_KERNEL_TIMER_LIST_FLAGS
+	(timer)->flags = fl;
+#endif
+	init_timer(timer);
+	setup_timer(timer, func, (spl_timer_list_t)(timer));
+}
+
+#endif /* HAVE_KERNEL_TIMER_SETUP */
+
+#endif  /* _SPL_TIMER_H */

diff --git a/zfs/include/os/linux/spl/sys/trace.h b/zfs/include/os/linux/spl/sys/trace.h
new file mode 100644
index 0000000..b148ace
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/trace.h

@@ -0,0 +1,175 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+
+/*
+ * Calls to DTRACE_PROBE* are mapped to standard Linux kernel trace points
+ * when they are available(when HAVE_DECLARE_EVENT_CLASS is defined).  The
+ * tracepoint event class definitions are found in the general tracing
+ * header file: include/sys/trace_*.h.  See include/sys/trace_vdev.h for
+ * a good example.
+ *
+ * If tracepoints are not available, stub functions are generated which can
+ * be traced using kprobes.  In this case, the DEFINE_DTRACE_PROBE* macros
+ * are used to provide the stub functions and also the prototypes for
+ * those functions.  The mechanism to do this relies on DEFINE_DTRACE_PROBE
+ * macros defined in the general tracing headers(see trace_vdev.h) and
+ * CREATE_TRACE_POINTS being defined only in module/zfs/trace.c.  When ZFS
+ * source files include the general tracing headers, e.g.
+ * module/zfs/vdev_removal.c including trace_vdev.h, DTRACE_PROBE calls
+ * are mapped to stub functions calls and prototypes for those calls are
+ * declared via DEFINE_DTRACE_PROBE*.  Only module/zfs/trace.c defines
+ * CREATE_TRACE_POINTS.  That is followed by includes of all the general
+ * tracing headers thereby defining all stub functions in one place via
+ * the DEFINE_DTRACE_PROBE macros.
+ *
+ * When adding new DTRACE_PROBEs to zfs source, both a tracepoint event
+ * class definition and a DEFINE_DTRACE_PROBE definition are needed to
+ * avoid undefined function errors.
+ */
+
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#if !defined(_TRACE_ZFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ZFS_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * DTRACE_PROBE with 0 arguments is not currently available with
+ *  tracepoint events
+ */
+#define	DTRACE_PROBE(name) \
+	((void)0)
+
+#define	DTRACE_PROBE1(name, t1, arg1) \
+	trace_zfs_##name((arg1))
+
+#define	DTRACE_PROBE2(name, t1, arg1, t2, arg2) \
+	trace_zfs_##name((arg1), (arg2))
+
+#define	DTRACE_PROBE3(name, t1, arg1, t2, arg2, t3, arg3) \
+	trace_zfs_##name((arg1), (arg2), (arg3))
+
+#define	DTRACE_PROBE4(name, t1, arg1, t2, arg2, t3, arg3, t4, arg4) \
+	trace_zfs_##name((arg1), (arg2), (arg3), (arg4))
+
+#endif /* _TRACE_ZFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
+
+#else /* HAVE_DECLARE_EVENT_CLASS */
+
+#define	DTRACE_PROBE(name) \
+	trace_zfs_##name()
+
+#define	DTRACE_PROBE1(name, t1, arg1) \
+	trace_zfs_##name((uintptr_t)(arg1))
+
+#define	DTRACE_PROBE2(name, t1, arg1, t2, arg2) \
+	trace_zfs_##name((uintptr_t)(arg1), (uintptr_t)(arg2))
+
+#define	DTRACE_PROBE3(name, t1, arg1, t2, arg2, t3, arg3) \
+	trace_zfs_##name((uintptr_t)(arg1), (uintptr_t)(arg2), \
+	(uintptr_t)(arg3))
+
+#define	DTRACE_PROBE4(name, t1, arg1, t2, arg2, t3, arg3, t4, arg4) \
+	trace_zfs_##name((uintptr_t)(arg1), (uintptr_t)(arg2), \
+	(uintptr_t)(arg3), (uintptr_t)(arg4))
+
+#define	PROTO_DTRACE_PROBE(name)				\
+	noinline void trace_zfs_##name(void)
+#define	PROTO_DTRACE_PROBE1(name)				\
+	noinline void trace_zfs_##name(uintptr_t)
+#define	PROTO_DTRACE_PROBE2(name)				\
+	noinline void trace_zfs_##name(uintptr_t, uintptr_t)
+#define	PROTO_DTRACE_PROBE3(name)				\
+	noinline void trace_zfs_##name(uintptr_t, uintptr_t,	\
+	uintptr_t)
+#define	PROTO_DTRACE_PROBE4(name)				\
+	noinline void trace_zfs_##name(uintptr_t, uintptr_t,	\
+	uintptr_t, uintptr_t)
+
+#if defined(CREATE_TRACE_POINTS)
+
+#define	FUNC_DTRACE_PROBE(name)					\
+PROTO_DTRACE_PROBE(name);					\
+noinline void trace_zfs_##name(void) { }			\
+EXPORT_SYMBOL(trace_zfs_##name)
+
+#define	FUNC_DTRACE_PROBE1(name)				\
+PROTO_DTRACE_PROBE1(name);					\
+noinline void trace_zfs_##name(uintptr_t arg1) { }		\
+EXPORT_SYMBOL(trace_zfs_##name)
+
+#define	FUNC_DTRACE_PROBE2(name)				\
+PROTO_DTRACE_PROBE2(name);					\
+noinline void trace_zfs_##name(uintptr_t arg1,			\
+    uintptr_t arg2) { }						\
+EXPORT_SYMBOL(trace_zfs_##name)
+
+#define	FUNC_DTRACE_PROBE3(name)				\
+PROTO_DTRACE_PROBE3(name);					\
+noinline void trace_zfs_##name(uintptr_t arg1,			\
+    uintptr_t arg2, uintptr_t arg3) { }				\
+EXPORT_SYMBOL(trace_zfs_##name)
+
+#define	FUNC_DTRACE_PROBE4(name)				\
+PROTO_DTRACE_PROBE4(name);					\
+noinline void trace_zfs_##name(uintptr_t arg1,			\
+    uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) { }		\
+EXPORT_SYMBOL(trace_zfs_##name)
+
+#undef	DEFINE_DTRACE_PROBE
+#define	DEFINE_DTRACE_PROBE(name)	FUNC_DTRACE_PROBE(name)
+
+#undef	DEFINE_DTRACE_PROBE1
+#define	DEFINE_DTRACE_PROBE1(name)	FUNC_DTRACE_PROBE1(name)
+
+#undef	DEFINE_DTRACE_PROBE2
+#define	DEFINE_DTRACE_PROBE2(name)	FUNC_DTRACE_PROBE2(name)
+
+#undef	DEFINE_DTRACE_PROBE3
+#define	DEFINE_DTRACE_PROBE3(name)	FUNC_DTRACE_PROBE3(name)
+
+#undef	DEFINE_DTRACE_PROBE4
+#define	DEFINE_DTRACE_PROBE4(name)	FUNC_DTRACE_PROBE4(name)
+
+#else /* CREATE_TRACE_POINTS */
+
+#define	DEFINE_DTRACE_PROBE(name)	PROTO_DTRACE_PROBE(name)
+#define	DEFINE_DTRACE_PROBE1(name)	PROTO_DTRACE_PROBE1(name)
+#define	DEFINE_DTRACE_PROBE2(name)	PROTO_DTRACE_PROBE2(name)
+#define	DEFINE_DTRACE_PROBE3(name)	PROTO_DTRACE_PROBE3(name)
+#define	DEFINE_DTRACE_PROBE4(name)	PROTO_DTRACE_PROBE4(name)
+
+#endif /* CREATE_TRACE_POINTS */
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/spl/sys/trace_spl.h b/zfs/include/os/linux/spl/sys/trace_spl.h
new file mode 100644
index 0000000..bffd91d
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/trace_spl.h

@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _OS_LINUX_SPL_TRACE_H
+#define	_OS_LINUX_SPL_TRACE_H
+
+#include <sys/taskq.h>
+
+#include <sys/trace.h>
+#include <sys/trace_taskq.h>
+
+#endif

diff --git a/zfs/include/os/linux/spl/sys/trace_taskq.h b/zfs/include/os/linux/spl/sys/trace_taskq.h
new file mode 100644
index 0000000..dbbb3c4
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/trace_taskq.h

@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_taskq
+
+#if !defined(_TRACE_TASKQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_TASKQ_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for single argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ *     taskq_ent_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_taskq_ent_class,
+	TP_PROTO(taskq_ent_t *taskq_ent),
+	TP_ARGS(taskq_ent),
+	TP_STRUCT__entry(
+	    __field(taskq_ent_t *,	taskq_ent)
+	),
+	TP_fast_assign(
+	    __entry->taskq_ent	= taskq_ent;
+	),
+	TP_printk("taskq_ent %p", __entry->taskq_ent)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define DEFINE_TASKQ_EVENT(name) \
+DEFINE_EVENT(zfs_taskq_ent_class, name, \
+	TP_PROTO(taskq_ent_t *taskq_ent), \
+	TP_ARGS(taskq_ent))
+/* END CSTYLED */
+DEFINE_TASKQ_EVENT(zfs_taskq_ent__birth);
+DEFINE_TASKQ_EVENT(zfs_taskq_ent__start);
+DEFINE_TASKQ_EVENT(zfs_taskq_ent__finish);
+
+#endif /* _TRACE_TASKQ_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_taskq
+#include <trace/define_trace.h>
+
+#else
+
+/*
+ * When tracepoints are not available, a DEFINE_DTRACE_PROBE* macro is
+ * needed for each DTRACE_PROBE.  These will be used to generate stub
+ * tracing functions and prototypes for those functions.  See
+ * include/os/linux/spl/sys/trace.h.
+ */
+
+DEFINE_DTRACE_PROBE1(taskq_ent__birth);
+DEFINE_DTRACE_PROBE1(taskq_ent__start);
+DEFINE_DTRACE_PROBE1(taskq_ent__finish);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/spl/sys/tsd.h b/zfs/include/os/linux/spl/sys/tsd.h
new file mode 100644
index 0000000..8cdb9e4
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/tsd.h

@@ -0,0 +1,45 @@
+/*
+ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_TSD_H
+#define	_SPL_TSD_H
+
+#include <sys/types.h>
+
+#define	TSD_HASH_TABLE_BITS_DEFAULT	9
+#define	TSD_KEYS_MAX			32768
+#define	DTOR_PID			(PID_MAX_LIMIT+1)
+#define	PID_KEY				(TSD_KEYS_MAX+1)
+
+typedef void (*dtor_func_t)(void *);
+
+extern int tsd_set(uint_t, void *);
+extern void *tsd_get(uint_t);
+extern void *tsd_get_by_thread(uint_t, kthread_t *);
+extern void tsd_create(uint_t *, dtor_func_t);
+extern void tsd_destroy(uint_t *);
+extern void tsd_exit(void);
+
+int spl_tsd_init(void);
+void spl_tsd_fini(void);
+
+#endif /* _SPL_TSD_H */

diff --git a/zfs/include/os/linux/spl/sys/types.h b/zfs/include/os/linux/spl/sys/types.h
new file mode 100644
index 0000000..9f85685
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/types.h

@@ -0,0 +1,71 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_TYPES_H
+#define	_SPL_TYPES_H
+
+#include <linux/types.h>
+
+typedef enum {
+	B_FALSE = 0,
+	B_TRUE = 1
+} boolean_t;
+
+typedef unsigned char		uchar_t;
+typedef unsigned short		ushort_t;
+typedef unsigned int		uint_t;
+typedef unsigned long		ulong_t;
+typedef unsigned long long	u_longlong_t;
+typedef long long		longlong_t;
+
+typedef long			intptr_t;
+typedef unsigned long long	rlim64_t;
+
+typedef struct task_struct	kthread_t;
+typedef struct task_struct	proc_t;
+
+typedef int			id_t;
+typedef short			pri_t;
+typedef short			index_t;
+typedef longlong_t		offset_t;
+typedef u_longlong_t		u_offset_t;
+typedef ulong_t			pgcnt_t;
+
+typedef int			major_t;
+typedef int			minor_t;
+
+struct user_namespace;
+#ifdef HAVE_IOPS_CREATE_IDMAP
+#include <linux/refcount.h>
+struct mnt_idmap {
+	struct user_namespace *owner;
+	refcount_t count;
+};
+typedef struct mnt_idmap	zidmap_t;
+#else
+typedef struct user_namespace	zidmap_t;
+#endif
+
+extern zidmap_t *zfs_init_idmap;
+
+#endif	/* _SPL_TYPES_H */

diff --git a/zfs/include/os/linux/spl/sys/types32.h b/zfs/include/os/linux/spl/sys/types32.h
new file mode 100644
index 0000000..cb62c75
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/types32.h

@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_TYPES32_H
+#define	_SPL_TYPES32_H
+
+#include <sys/types.h>
+
+typedef uint32_t	caddr32_t;
+typedef int32_t		daddr32_t;
+typedef int32_t		time32_t;
+typedef uint32_t	size32_t;
+
+#endif	/* _SPL_TYPES32_H */

diff --git a/zfs/include/os/linux/spl/sys/uio.h b/zfs/include/os/linux/spl/sys/uio.h
new file mode 100644
index 0000000..68fab03
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/uio.h

@@ -0,0 +1,161 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_UIO_H
+#define	_SPL_UIO_H
+
+#include <sys/debug.h>
+#include <linux/uio.h>
+#include <linux/blkdev.h>
+#include <linux/blkdev_compat.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <asm/uaccess.h>
+#include <sys/types.h>
+
+#if defined(HAVE_VFS_IOV_ITER) && defined(HAVE_FAULT_IN_IOV_ITER_READABLE)
+#define	iov_iter_fault_in_readable(a, b)	fault_in_iov_iter_readable(a, b)
+#endif
+
+typedef struct iovec iovec_t;
+
+typedef enum zfs_uio_rw {
+	UIO_READ =		0,
+	UIO_WRITE =		1,
+} zfs_uio_rw_t;
+
+typedef enum zfs_uio_seg {
+	UIO_USERSPACE =		0,
+	UIO_SYSSPACE =		1,
+	UIO_BVEC =		2,
+#if defined(HAVE_VFS_IOV_ITER)
+	UIO_ITER =		3,
+#endif
+} zfs_uio_seg_t;
+
+typedef struct zfs_uio {
+	union {
+		const struct iovec	*uio_iov;
+		const struct bio_vec	*uio_bvec;
+#if defined(HAVE_VFS_IOV_ITER)
+		struct iov_iter		*uio_iter;
+#endif
+	};
+	int		uio_iovcnt;
+	offset_t	uio_loffset;
+	zfs_uio_seg_t	uio_segflg;
+	boolean_t	uio_fault_disable;
+	uint16_t	uio_fmode;
+	uint16_t	uio_extflg;
+	ssize_t		uio_resid;
+	size_t		uio_skip;
+} zfs_uio_t;
+
+#define	zfs_uio_segflg(u)		(u)->uio_segflg
+#define	zfs_uio_offset(u)		(u)->uio_loffset
+#define	zfs_uio_resid(u)		(u)->uio_resid
+#define	zfs_uio_iovcnt(u)		(u)->uio_iovcnt
+#define	zfs_uio_iovlen(u, idx)		(u)->uio_iov[(idx)].iov_len
+#define	zfs_uio_iovbase(u, idx)		(u)->uio_iov[(idx)].iov_base
+#define	zfs_uio_fault_disable(u, set)	(u)->uio_fault_disable = set
+#define	zfs_uio_rlimit_fsize(z, u)	(0)
+#define	zfs_uio_fault_move(p, n, rw, u)	zfs_uiomove((p), (n), (rw), (u))
+
+extern int zfs_uio_prefaultpages(ssize_t, zfs_uio_t *);
+
+static inline void
+zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
+{
+	uio->uio_loffset = off;
+}
+
+static inline void
+zfs_uio_advance(zfs_uio_t *uio, size_t size)
+{
+	uio->uio_resid -= size;
+	uio->uio_loffset += size;
+}
+
+static inline void
+zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
+    unsigned long nr_segs, offset_t offset, zfs_uio_seg_t seg, ssize_t resid,
+    size_t skip)
+{
+	ASSERT(seg == UIO_USERSPACE || seg == UIO_SYSSPACE);
+
+	uio->uio_iov = iov;
+	uio->uio_iovcnt = nr_segs;
+	uio->uio_loffset = offset;
+	uio->uio_segflg = seg;
+	uio->uio_fault_disable = B_FALSE;
+	uio->uio_fmode = 0;
+	uio->uio_extflg = 0;
+	uio->uio_resid = resid;
+	uio->uio_skip = skip;
+}
+
+static inline void
+zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio)
+{
+	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
+	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
+	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
+	uio->uio_segflg = UIO_BVEC;
+	uio->uio_fault_disable = B_FALSE;
+	uio->uio_fmode = 0;
+	uio->uio_extflg = 0;
+	uio->uio_resid = BIO_BI_SIZE(bio);
+	uio->uio_skip = BIO_BI_SKIP(bio);
+}
+
+#if defined(HAVE_VFS_IOV_ITER)
+static inline void
+zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
+    ssize_t resid, size_t skip)
+{
+	uio->uio_iter = iter;
+	uio->uio_iovcnt = iter->nr_segs;
+	uio->uio_loffset = offset;
+	uio->uio_segflg = UIO_ITER;
+	uio->uio_fault_disable = B_FALSE;
+	uio->uio_fmode = 0;
+	uio->uio_extflg = 0;
+	uio->uio_resid = resid;
+	uio->uio_skip = skip;
+}
+#endif
+
+#if defined(HAVE_ITER_IOV)
+#define	zfs_uio_iter_iov(iter)	iter_iov((iter))
+#else
+#define	zfs_uio_iter_iov(iter)	(iter)->iov
+#endif
+
+#if defined(HAVE_IOV_ITER_TYPE)
+#define	zfs_uio_iov_iter_type(iter)	iov_iter_type((iter))
+#else
+#define	zfs_uio_iov_iter_type(iter)	(iter)->type
+#endif
+
+#endif /* SPL_UIO_H */

diff --git a/zfs/include/os/linux/spl/sys/user.h b/zfs/include/os/linux/spl/sys/user.h
new file mode 100644
index 0000000..13a2edf
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/user.h

@@ -0,0 +1,41 @@
+/*
+ *  Copyright (C) 2015 Cluster Inc.
+ *  Produced at ClusterHQ Inc (cf, DISCLAIMER).
+ *  Written by Richard Yao <richard.yao@clusterhq.com>.
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_USER_H
+#define	_SPL_USER_H
+
+/*
+ * We have uf_info_t for areleasef(). We implement areleasef() using a global
+ * linked list of all open file descriptors with the task structs referenced,
+ * so accessing the correct descriptor from areleasef() only requires knowing
+ * about the Linux task_struct. Since this is internal to our compatibility
+ * layer, we make it an opaque type.
+ *
+ * XXX: If the descriptor changes under us and we do not do a getf() between
+ * the change and using it, we would get an incorrect reference.
+ */
+
+struct uf_info;
+typedef struct uf_info uf_info_t;
+
+#define	P_FINFO(x) ((uf_info_t *)x)
+
+#endif /* SPL_USER_H */

diff --git a/zfs/include/os/linux/spl/sys/vfs.h b/zfs/include/os/linux/spl/sys/vfs.h
new file mode 100644
index 0000000..488f182
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/vfs.h

@@ -0,0 +1,50 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_ZFS_H
+#define	_SPL_ZFS_H
+
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/statfs.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/seq_file.h>
+
+#define	MAXFIDSZ	64
+
+typedef struct spl_fid {
+	union {
+		long fid_pad;
+		struct {
+			ushort_t len;		/* length of data in bytes */
+			char data[MAXFIDSZ];	/* data (variable len) */
+		} _fid;
+	} un;
+} fid_t;
+
+#define	fid_len		un._fid.len
+#define	fid_data	un._fid.data
+
+#endif /* SPL_ZFS_H */

diff --git a/zfs/include/os/linux/spl/sys/vmem.h b/zfs/include/os/linux/spl/sys/vmem.h
new file mode 100644
index 0000000..e77af2a
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/vmem.h

@@ -0,0 +1,101 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_VMEM_H
+#define	_SPL_VMEM_H
+
+#include <sys/kmem.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+
+typedef struct vmem { } vmem_t;
+
+/*
+ * Memory allocation interfaces
+ */
+#define	VMEM_ALLOC	0x01
+#define	VMEM_FREE	0x02
+
+#ifndef VMALLOC_TOTAL
+#define	VMALLOC_TOTAL	(VMALLOC_END - VMALLOC_START)
+#endif
+
+/*
+ * vmem_* is an interface to a low level arena-based memory allocator on
+ * Illumos that is used to allocate virtual address space. The kmem SLAB
+ * allocator allocates slabs from it. Then the generic allocation functions
+ * kmem_{alloc,zalloc,free}() are layered on top of SLAB allocators.
+ *
+ * On Linux, the primary means of doing allocations is via kmalloc(), which
+ * is similarly layered on top of something called the buddy allocator. The
+ * buddy allocator is not available to kernel modules, it uses physical
+ * memory addresses rather than virtual memory addresses and is prone to
+ * fragmentation.
+ *
+ * Linux sets aside a relatively small address space for in-kernel virtual
+ * memory from which allocations can be done using vmalloc().  It might seem
+ * like a good idea to use vmalloc() to implement something similar to
+ * Illumos' allocator. However, this has the following problems:
+ *
+ * 1. Page directory table allocations are hard coded to use GFP_KERNEL.
+ *    Consequently, any KM_PUSHPAGE or KM_NOSLEEP allocations done using
+ *    vmalloc() will not have proper semantics.
+ *
+ * 2. Address space exhaustion is a real issue on 32-bit platforms where
+ *    only a few 100MB are available. The kernel will handle it by spinning
+ *    when it runs out of address space.
+ *
+ * 3. All vmalloc() allocations and frees are protected by a single global
+ *    lock which serializes all allocations.
+ *
+ * 4. Accessing /proc/meminfo and /proc/vmallocinfo will iterate the entire
+ *    list. The former will sum the allocations while the latter will print
+ *    them to user space in a way that user space can keep the lock held
+ *    indefinitely.  When the total number of mapped allocations is large
+ *    (several 100,000) a large amount of time will be spent waiting on locks.
+ *
+ * 5. Linux has a wait_on_bit() locking primitive that assumes physical
+ *    memory is used, it simply does not work on virtual memory.  Certain
+ *    Linux structures (e.g. the superblock) use them and might be embedded
+ *    into a structure from Illumos.  This makes using Linux virtual memory
+ *    unsafe in certain situations.
+ *
+ * It follows that we cannot obtain identical semantics to those on Illumos.
+ * Consequently, we implement the kmem_{alloc,zalloc,free}() functions in
+ * such a way that they can be used as drop-in replacements for small vmem_*
+ * allocations (8MB in size or smaller) and map vmem_{alloc,zalloc,free}()
+ * to them.
+ */
+
+#define	vmem_alloc(sz, fl)	spl_vmem_alloc((sz), (fl), __func__, __LINE__)
+#define	vmem_zalloc(sz, fl)	spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
+#define	vmem_free(ptr, sz)	spl_vmem_free((ptr), (sz))
+
+extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
+extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
+extern void spl_vmem_free(const void *ptr, size_t sz);
+
+int spl_vmem_init(void);
+void spl_vmem_fini(void);
+
+#endif	/* _SPL_VMEM_H */

diff --git a/zfs/include/os/linux/spl/sys/vmsystm.h b/zfs/include/os/linux/spl/sys/vmsystm.h
new file mode 100644
index 0000000..fcd61e8
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/vmsystm.h

@@ -0,0 +1,93 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_VMSYSTM_H
+#define	_SPL_VMSYSTM_H
+
+#include <linux/mmzone.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <sys/types.h>
+#include <asm/uaccess.h>
+
+#ifdef HAVE_TOTALRAM_PAGES_FUNC
+#define	zfs_totalram_pages	totalram_pages()
+#else
+#define	zfs_totalram_pages	totalram_pages
+#endif
+
+#ifdef HAVE_TOTALHIGH_PAGES
+#define	zfs_totalhigh_pages	totalhigh_pages()
+#else
+#define	zfs_totalhigh_pages	totalhigh_pages
+#endif
+
+#define	membar_consumer()		smp_rmb()
+#define	membar_producer()		smp_wmb()
+
+#define	physmem				zfs_totalram_pages
+
+#define	xcopyin(from, to, size)		copy_from_user(to, from, size)
+#define	xcopyout(from, to, size)	copy_to_user(to, from, size)
+
+static __inline__ int
+copyin(const void *from, void *to, size_t len)
+{
+	/* On error copyin routine returns -1 */
+	if (xcopyin(from, to, len))
+		return (-1);
+
+	return (0);
+}
+
+static __inline__ int
+copyout(const void *from, void *to, size_t len)
+{
+	/* On error copyout routine returns -1 */
+	if (xcopyout(from, to, len))
+		return (-1);
+
+	return (0);
+}
+
+static __inline__ int
+copyinstr(const void *from, void *to, size_t len, size_t *done)
+{
+	size_t rc;
+
+	if (len == 0)
+		return (-ENAMETOOLONG);
+
+	/* XXX: Should return ENAMETOOLONG if 'strlen(from) > len' */
+
+	memset(to, 0, len);
+	rc = copyin(from, to, len - 1);
+	if (done != NULL)
+		*done = rc;
+
+	return (0);
+}
+
+#endif /* SPL_VMSYSTM_H */

diff --git a/zfs/include/os/linux/spl/sys/vnode.h b/zfs/include/os/linux/spl/sys/vnode.h
new file mode 100644
index 0000000..64c2706
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/vnode.h

@@ -0,0 +1,112 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_VNODE_H
+#define	_SPL_VNODE_H
+
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/fcntl.h>
+#include <linux/buffer_head.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/user.h>
+
+/*
+ * Prior to linux-2.6.33 only O_DSYNC semantics were implemented and
+ * they used the O_SYNC flag.  As of linux-2.6.33 the this behavior
+ * was properly split in to O_SYNC and O_DSYNC respectively.
+ */
+#ifndef O_DSYNC
+#define	O_DSYNC		O_SYNC
+#endif
+
+#define	F_FREESP	11 	/* Free file space */
+
+
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
+#define	F_SEEK_DATA	SEEK_DATA
+#define	F_SEEK_HOLE	SEEK_HOLE
+#endif
+
+/*
+ * The vnode AT_ flags are mapped to the Linux ATTR_* flags.
+ * This allows them to be used safely with an iattr structure.
+ * The AT_XVATTR flag has been added and mapped to the upper
+ * bit range to avoid conflicting with the standard Linux set.
+ */
+#undef AT_UID
+#undef AT_GID
+
+#define	AT_MODE		ATTR_MODE
+#define	AT_UID		ATTR_UID
+#define	AT_GID		ATTR_GID
+#define	AT_SIZE		ATTR_SIZE
+#define	AT_ATIME	ATTR_ATIME
+#define	AT_MTIME	ATTR_MTIME
+#define	AT_CTIME	ATTR_CTIME
+
+#define	ATTR_XVATTR	(1U << 31)
+#define	AT_XVATTR	ATTR_XVATTR
+
+#define	ATTR_IATTR_MASK	(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_SIZE | \
+			ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_FILE)
+
+#define	CRCREAT		0x01
+#define	RMFILE		0x02
+
+#define	B_INVAL		0x01
+#define	B_TRUNC		0x02
+
+#define	LOOKUP_DIR		0x01
+#define	LOOKUP_XATTR		0x02
+#define	CREATE_XATTR_DIR	0x04
+#define	ATTR_NOACLCHECK		0x20
+
+typedef struct vattr {
+	uint32_t	va_mask;	/* attribute bit-mask */
+	ushort_t	va_mode;	/* acc mode */
+	uid_t		va_uid;		/* owner uid */
+	gid_t		va_gid;		/* owner gid */
+	long		va_fsid;	/* fs id */
+	long		va_nodeid;	/* node # */
+	uint32_t	va_nlink;	/* # links */
+	uint64_t	va_size;	/* file size */
+	inode_timespec_t va_atime;	/* last acc */
+	inode_timespec_t va_mtime;	/* last mod */
+	inode_timespec_t va_ctime;	/* last chg */
+	dev_t		va_rdev;	/* dev */
+	uint64_t	va_nblocks;	/* space used */
+	uint32_t	va_blksize;	/* block size */
+	struct dentry	*va_dentry;	/* dentry to wire */
+} vattr_t;
+#endif /* SPL_VNODE_H */

diff --git a/zfs/include/os/linux/spl/sys/wait.h b/zfs/include/os/linux/spl/sys/wait.h
new file mode 100644
index 0000000..65cd83e
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/wait.h

@@ -0,0 +1,54 @@
+/*
+ *  Copyright (C) 2007-2014 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_WAIT_H
+#define	_SPL_WAIT_H
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#ifndef HAVE_WAIT_ON_BIT_ACTION
+#define	spl_wait_on_bit(word, bit, mode)	wait_on_bit(word, bit, mode)
+#else
+
+static inline int
+spl_bit_wait(void *word)
+{
+	schedule();
+	return (0);
+}
+
+#define	spl_wait_on_bit(word, bit, mode)		\
+	wait_on_bit(word, bit, spl_bit_wait, mode)
+
+#endif /* HAVE_WAIT_ON_BIT_ACTION */
+
+#ifdef HAVE_WAIT_QUEUE_ENTRY_T
+typedef wait_queue_head_t	spl_wait_queue_head_t;
+typedef wait_queue_entry_t	spl_wait_queue_entry_t;
+#else
+typedef wait_queue_head_t	spl_wait_queue_head_t;
+typedef wait_queue_t		spl_wait_queue_entry_t;
+#endif
+
+#endif /* SPL_WAIT_H */

diff --git a/zfs/include/os/linux/spl/sys/wmsum.h b/zfs/include/os/linux/spl/sys/wmsum.h
new file mode 100644
index 0000000..0871bd6
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/wmsum.h

@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * wmsum counters are a reduced version of aggsum counters, optimized for
+ * write-mostly scenarios.  They do not provide optimized read functions,
+ * but instead allow much cheaper add function.  The primary usage is
+ * infrequently read statistic counters, not requiring exact precision.
+ *
+ * The Linux implementation is directly mapped into percpu_counter KPI.
+ */
+
+#ifndef	_SYS_WMSUM_H
+#define	_SYS_WMSUM_H
+
+#include <linux/percpu_counter.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct percpu_counter	wmsum_t;
+
+static inline void
+wmsum_init(wmsum_t *ws, uint64_t value)
+{
+
+#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP
+	percpu_counter_init(ws, value, GFP_KERNEL);
+#else
+	percpu_counter_init(ws, value);
+#endif
+}
+
+static inline void
+wmsum_fini(wmsum_t *ws)
+{
+
+	percpu_counter_destroy(ws);
+}
+
+static inline uint64_t
+wmsum_value(wmsum_t *ws)
+{
+
+	return (percpu_counter_sum(ws));
+}
+
+static inline void
+wmsum_add(wmsum_t *ws, int64_t delta)
+{
+
+#ifdef HAVE_PERCPU_COUNTER_ADD_BATCH
+	percpu_counter_add_batch(ws, delta, INT_MAX / 2);
+#else
+	__percpu_counter_add(ws, delta, INT_MAX / 2);
+#endif
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_WMSUM_H */

diff --git a/zfs/include/os/linux/spl/sys/zmod.h b/zfs/include/os/linux/spl/sys/zmod.h
new file mode 100644
index 0000000..8d27b62
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/zmod.h

@@ -0,0 +1,69 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ *  z_compress_level/z_uncompress are nearly identical copies of the
+ *  compress2/uncompress functions provided by the official zlib package
+ *  available at http://zlib.net/.  The only changes made we to slightly
+ *  adapt the functions called to match the linux kernel implementation
+ *  of zlib.  The full zlib license follows:
+ *
+ *  zlib.h -- interface of the 'zlib' general purpose compression library
+ *  version 1.2.5, April 19th, 2010
+ *
+ *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ *
+ *  Jean-loup Gailly
+ *  Mark Adler
+ */
+
+#ifndef _SPL_ZMOD_H
+#define	_SPL_ZMOD_H
+
+#include <sys/types.h>
+#include <linux/zlib.h>
+
+extern int z_compress_level(void *dest, size_t *destLen, const void *source,
+    size_t sourceLen, int level);
+extern int z_uncompress(void *dest, size_t *destLen, const void *source,
+    size_t sourceLen);
+
+int spl_zlib_init(void);
+void spl_zlib_fini(void);
+
+#endif /* SPL_ZMOD_H */

diff --git a/zfs/include/os/linux/spl/sys/zone.h b/zfs/include/os/linux/spl/sys/zone.h
new file mode 100644
index 0000000..00e30f6
--- /dev/null
+++ b/zfs/include/os/linux/spl/sys/zone.h

@@ -0,0 +1,35 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPL_ZONE_H
+#define	_SPL_ZONE_H
+
+#include <sys/byteorder.h>
+
+#define	GLOBAL_ZONEID			0
+
+#define	zone_dataset_visible(x, y)	(1)
+#define	crgetzoneid(x)			(GLOBAL_ZONEID)
+#define	INGLOBALZONE(z)			(1)
+
+#endif /* SPL_ZONE_H */

diff --git a/zfs/include/os/linux/zfs/Makefile.am b/zfs/include/os/linux/zfs/Makefile.am
new file mode 100644
index 0000000..081839c
--- /dev/null
+++ b/zfs/include/os/linux/zfs/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = sys

diff --git a/zfs/include/os/linux/zfs/sys/Makefile.am b/zfs/include/os/linux/zfs/sys/Makefile.am
new file mode 100644
index 0000000..a075db4
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/Makefile.am

@@ -0,0 +1,31 @@
+KERNEL_H = \
+	policy.h \
+	sha2.h \
+	trace_acl.h \
+	trace_arc.h \
+	trace_common.h \
+	trace_zfs.h \
+	trace_dbgmsg.h \
+	trace_dbuf.h \
+	trace_dmu.h \
+	trace_dnode.h \
+	trace_multilist.h \
+	trace_rrwlock.h \
+	trace_txg.h \
+	trace_vdev.h \
+	trace_zil.h \
+	trace_zio.h \
+	trace_zrlock.h \
+	zfs_bootenv_os.h \
+	zfs_context_os.h \
+	zfs_ctldir.h \
+	zfs_dir.h \
+	zfs_vfsops_os.h \
+	zfs_vnops_os.h \
+	zfs_znode_impl.h \
+	zpl.h
+
+if CONFIG_KERNEL
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys
+kernel_HEADERS = $(KERNEL_H)
+endif

diff --git a/zfs/include/os/linux/zfs/sys/policy.h b/zfs/include/os/linux/zfs/sys/policy.h
new file mode 100644
index 0000000..61afc37
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/policy.h

@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2016, Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_POLICY_H
+#define	_SYS_POLICY_H
+
+#ifdef _KERNEL
+
+#include <sys/cred.h>
+#include <sys/types.h>
+#include <sys/xvattr.h>
+#include <sys/zpl.h>
+
+struct znode;
+
+int secpolicy_nfs(const cred_t *);
+int secpolicy_sys_config(const cred_t *, boolean_t);
+int secpolicy_vnode_access2(const cred_t *, struct inode *,
+    uid_t, mode_t, mode_t);
+int secpolicy_vnode_any_access(const cred_t *, struct inode *, uid_t);
+int secpolicy_vnode_chown(const cred_t *, uid_t);
+int secpolicy_vnode_create_gid(const cred_t *);
+int secpolicy_vnode_remove(const cred_t *);
+int secpolicy_vnode_setdac(const cred_t *, uid_t);
+int secpolicy_vnode_setid_retain(struct znode *, const cred_t *, boolean_t);
+int secpolicy_vnode_setids_setgids(const cred_t *, gid_t);
+int secpolicy_zinject(const cred_t *);
+int secpolicy_zfs(const cred_t *);
+int secpolicy_zfs_proc(const cred_t *, proc_t *);
+void secpolicy_setid_clear(vattr_t *, cred_t *);
+int secpolicy_setid_setsticky_clear(struct inode *, vattr_t *,
+    const vattr_t *, cred_t *);
+int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, mode_t);
+int secpolicy_vnode_setattr(cred_t *, struct inode *, struct vattr *,
+    const struct vattr *, int, int (void *, int, cred_t *), void *);
+int secpolicy_basic_link(const cred_t *);
+
+#endif /* _KERNEL */
+#endif /* _SYS_POLICY_H */

diff --git a/zfs/include/os/linux/zfs/sys/sha2.h b/zfs/include/os/linux/zfs/sys/sha2.h
new file mode 100644
index 0000000..4dd966b
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/sha2.h

@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
+
+#ifndef _SYS_SHA2_H
+#define	_SYS_SHA2_H
+
+#include <sys/types.h>		/* for uint_* */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	SHA2_HMAC_MIN_KEY_LEN	1	/* SHA2-HMAC min key length in bytes */
+#define	SHA2_HMAC_MAX_KEY_LEN	INT_MAX	/* SHA2-HMAC max key length in bytes */
+
+#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
+#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
+#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
+
+/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
+#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
+#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
+
+#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
+#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
+
+#define	SHA256			0
+#define	SHA256_HMAC		1
+#define	SHA256_HMAC_GEN		2
+#define	SHA384			3
+#define	SHA384_HMAC		4
+#define	SHA384_HMAC_GEN		5
+#define	SHA512			6
+#define	SHA512_HMAC		7
+#define	SHA512_HMAC_GEN		8
+#define	SHA512_224		9
+#define	SHA512_256		10
+
+/*
+ * SHA2 context.
+ * The contents of this structure are a private interface between the
+ * Init/Update/Final calls of the functions defined below.
+ * Callers must never attempt to read or write any of the fields
+ * in this structure directly.
+ */
+typedef struct 	{
+	uint32_t algotype;		/* Algorithm Type */
+
+	/* state (ABCDEFGH) */
+	union {
+		uint32_t s32[8];	/* for SHA256 */
+		uint64_t s64[8];	/* for SHA384/512 */
+	} state;
+	/* number of bits */
+	union {
+		uint32_t c32[2];	/* for SHA256 , modulo 2^64 */
+		uint64_t c64[2];	/* for SHA384/512, modulo 2^128 */
+	} count;
+	union {
+		uint8_t		buf8[128];	/* undigested input */
+		uint32_t	buf32[32];	/* realigned input */
+		uint64_t	buf64[16];	/* realigned input */
+	} buf_un;
+} SHA2_CTX;
+
+typedef SHA2_CTX SHA256_CTX;
+typedef SHA2_CTX SHA384_CTX;
+typedef SHA2_CTX SHA512_CTX;
+
+extern void SHA2Init(uint64_t mech, SHA2_CTX *);
+
+extern void SHA2Update(SHA2_CTX *, const void *, size_t);
+
+extern void SHA2Final(void *, SHA2_CTX *);
+
+extern void SHA256Init(SHA256_CTX *);
+
+extern void SHA256Update(SHA256_CTX *, const void *, size_t);
+
+extern void SHA256Final(void *, SHA256_CTX *);
+
+extern void SHA384Init(SHA384_CTX *);
+
+extern void SHA384Update(SHA384_CTX *, const void *, size_t);
+
+extern void SHA384Final(void *, SHA384_CTX *);
+
+extern void SHA512Init(SHA512_CTX *);
+
+extern void SHA512Update(SHA512_CTX *, const void *, size_t);
+
+extern void SHA512Final(void *, SHA512_CTX *);
+
+#ifdef _SHA2_IMPL
+/*
+ * The following types/functions are all private to the implementation
+ * of the SHA2 functions and must not be used by consumers of the interface
+ */
+
+/*
+ * List of support mechanisms in this module.
+ *
+ * It is important to note that in the module, division or modulus calculations
+ * are used on the enumerated type to determine which mechanism is being used;
+ * therefore, changing the order or additional mechanisms should be done
+ * carefully
+ */
+typedef enum sha2_mech_type {
+	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
+	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
+	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
+	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
+	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
+	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
+	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
+	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
+	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
+	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
+	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#endif /* _SHA2_IMPL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_H */

diff --git a/zfs/include/os/linux/zfs/sys/trace_acl.h b/zfs/include/os/linux/zfs/sys/trace_acl.h
new file mode 100644
index 0000000..6565527
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_acl.h

@@ -0,0 +1,162 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_acl
+
+#if !defined(_TRACE_ACL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ACL_H
+
+#include <linux/tracepoint.h>
+#include <linux/vfs_compat.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     znode_t *, ...,
+ *     zfs_ace_hdr_t *, ...,
+ *     uint32_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_ace_class,
+	TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched),
+	TP_ARGS(zn, ace, mask_matched),
+	TP_STRUCT__entry(
+	    __field(uint64_t,		z_id)
+	    __field(uint8_t,		z_unlinked)
+	    __field(uint8_t,		z_atime_dirty)
+	    __field(uint8_t,		z_zn_prefetch)
+	    __field(uint_t,		z_blksz)
+	    __field(uint_t,		z_seq)
+	    __field(uint64_t,		z_mapcnt)
+	    __field(uint64_t,		z_size)
+	    __field(uint64_t,		z_pflags)
+	    __field(uint32_t,		z_sync_cnt)
+	    __field(uint32_t,		z_sync_writes_cnt)
+	    __field(uint32_t,		z_async_writes_cnt)
+	    __field(mode_t,		z_mode)
+	    __field(boolean_t,		z_is_sa)
+	    __field(boolean_t,		z_is_ctldir)
+
+	    __field(uint32_t,		i_uid)
+	    __field(uint32_t,		i_gid)
+	    __field(unsigned long,	i_ino)
+	    __field(unsigned int,	i_nlink)
+	    __field(loff_t,		i_size)
+	    __field(unsigned int,	i_blkbits)
+	    __field(unsigned short,	i_bytes)
+	    __field(umode_t,		i_mode)
+	    __field(__u32,		i_generation)
+
+	    __field(uint16_t,		z_type)
+	    __field(uint16_t,		z_flags)
+	    __field(uint32_t,		z_access_mask)
+
+	    __field(uint32_t,		mask_matched)
+	),
+	TP_fast_assign(
+	    __entry->z_id		= zn->z_id;
+	    __entry->z_unlinked		= zn->z_unlinked;
+	    __entry->z_atime_dirty	= zn->z_atime_dirty;
+	    __entry->z_zn_prefetch	= zn->z_zn_prefetch;
+	    __entry->z_blksz		= zn->z_blksz;
+	    __entry->z_seq		= zn->z_seq;
+	    __entry->z_mapcnt		= zn->z_mapcnt;
+	    __entry->z_size		= zn->z_size;
+	    __entry->z_pflags		= zn->z_pflags;
+	    __entry->z_sync_cnt		= zn->z_sync_cnt;
+	    __entry->z_sync_writes_cnt	= zn->z_sync_writes_cnt;
+	    __entry->z_async_writes_cnt	= zn->z_async_writes_cnt;
+	    __entry->z_mode		= zn->z_mode;
+	    __entry->z_is_sa		= zn->z_is_sa;
+	    __entry->z_is_ctldir	= zn->z_is_ctldir;
+
+	    __entry->i_uid		= KUID_TO_SUID(ZTOI(zn)->i_uid);
+	    __entry->i_gid		= KGID_TO_SGID(ZTOI(zn)->i_gid);
+	    __entry->i_ino		= zn->z_inode.i_ino;
+	    __entry->i_nlink		= zn->z_inode.i_nlink;
+	    __entry->i_size		= zn->z_inode.i_size;
+	    __entry->i_blkbits		= zn->z_inode.i_blkbits;
+	    __entry->i_bytes		= zn->z_inode.i_bytes;
+	    __entry->i_mode		= zn->z_inode.i_mode;
+	    __entry->i_generation	= zn->z_inode.i_generation;
+
+	    __entry->z_type		= ace->z_type;
+	    __entry->z_flags		= ace->z_flags;
+	    __entry->z_access_mask	= ace->z_access_mask;
+
+	    __entry->mask_matched	= mask_matched;
+	),
+	TP_printk("zn { id %llu unlinked %u atime_dirty %u "
+	    "zn_prefetch %u blksz %u seq %u "
+	    "mapcnt %llu size %llu pflags %llu "
+	    "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
+	    "mode 0x%x is_sa %d is_ctldir %d "
+	    "inode { uid %u gid %u ino %lu nlink %u size %lli "
+	    "blkbits %u bytes %u mode 0x%x generation %x } } "
+	    "ace { type %u flags %u access_mask %u } mask_matched %u",
+	    __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
+	    __entry->z_zn_prefetch, __entry->z_blksz,
+	    __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
+	    __entry->z_pflags, __entry->z_sync_cnt,
+	    __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
+	    __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
+	    __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
+	    __entry->i_size, __entry->i_blkbits,
+	    __entry->i_bytes, __entry->i_mode, __entry->i_generation,
+	    __entry->z_type, __entry->z_flags, __entry->z_access_mask,
+	    __entry->mask_matched)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_ACE_EVENT(name) \
+DEFINE_EVENT(zfs_ace_class, name, \
+	TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched), \
+	TP_ARGS(zn, ace, mask_matched))
+/* END CSTYLED */
+DEFINE_ACE_EVENT(zfs_zfs__ace__denies);
+DEFINE_ACE_EVENT(zfs_zfs__ace__allows);
+
+#endif /* _TRACE_ACL_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_acl
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE3(zfs__ace__denies);
+DEFINE_DTRACE_PROBE3(zfs__ace__allows);
+DEFINE_DTRACE_PROBE(zfs__fastpath__execute__access__miss);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_arc.h b/zfs/include/os/linux/zfs/sys/trace_arc.h
new file mode 100644
index 0000000..d3410bc
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_arc.h

@@ -0,0 +1,419 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/list.h>
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_arc
+
+#if !defined(_TRACE_ARC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ARC_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+#include <sys/trace_common.h> /* For ZIO macros */
+
+/*
+ * Generic support for one argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ *     arc_buf_hdr_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
+	TP_PROTO(arc_buf_hdr_t *ab),
+	TP_ARGS(ab),
+	TP_STRUCT__entry(
+	    __array(uint64_t,		hdr_dva_word, 2)
+	    __field(uint64_t,		hdr_birth)
+	    __field(uint32_t,		hdr_flags)
+	    __field(uint32_t,		hdr_bufcnt)
+	    __field(arc_buf_contents_t,	hdr_type)
+	    __field(uint16_t,		hdr_psize)
+	    __field(uint16_t,		hdr_lsize)
+	    __field(uint64_t,		hdr_spa)
+	    __field(arc_state_type_t,	hdr_state_type)
+	    __field(clock_t,		hdr_access)
+	    __field(uint32_t,		hdr_mru_hits)
+	    __field(uint32_t,		hdr_mru_ghost_hits)
+	    __field(uint32_t,		hdr_mfu_hits)
+	    __field(uint32_t,		hdr_mfu_ghost_hits)
+	    __field(uint32_t,		hdr_l2_hits)
+	    __field(int64_t,		hdr_refcount)
+	),
+	TP_fast_assign(
+	    __entry->hdr_dva_word[0]	= ab->b_dva.dva_word[0];
+	    __entry->hdr_dva_word[1]	= ab->b_dva.dva_word[1];
+	    __entry->hdr_birth		= ab->b_birth;
+	    __entry->hdr_flags		= ab->b_flags;
+	    __entry->hdr_bufcnt	= ab->b_l1hdr.b_bufcnt;
+	    __entry->hdr_psize		= ab->b_psize;
+	    __entry->hdr_lsize		= ab->b_lsize;
+	    __entry->hdr_spa		= ab->b_spa;
+	    __entry->hdr_state_type	= ab->b_l1hdr.b_state->arcs_state;
+	    __entry->hdr_access		= ab->b_l1hdr.b_arc_access;
+	    __entry->hdr_mru_hits	= ab->b_l1hdr.b_mru_hits;
+	    __entry->hdr_mru_ghost_hits	= ab->b_l1hdr.b_mru_ghost_hits;
+	    __entry->hdr_mfu_hits	= ab->b_l1hdr.b_mfu_hits;
+	    __entry->hdr_mfu_ghost_hits	= ab->b_l1hdr.b_mfu_ghost_hits;
+	    __entry->hdr_l2_hits	= ab->b_l2hdr.b_hits;
+	    __entry->hdr_refcount	= ab->b_l1hdr.b_refcnt.rc_count;
+	),
+	TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
+	    "flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu "
+	    "state_type %u access %lu mru_hits %u mru_ghost_hits %u "
+	    "mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
+	    __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
+	    __entry->hdr_birth, __entry->hdr_flags,
+	    __entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize,
+	    __entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
+	    __entry->hdr_access, __entry->hdr_mru_hits,
+	    __entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
+	    __entry->hdr_mfu_ghost_hits, __entry->hdr_l2_hits,
+	    __entry->hdr_refcount)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_ARC_BUF_HDR_EVENT(name) \
+DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
+	TP_PROTO(arc_buf_hdr_t *ab), \
+	TP_ARGS(ab))
+/* END CSTYLED */
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__async__upgrade__sync);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     vdev_t *, ...,
+ *     zio_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_l2arc_rw_class,
+	TP_PROTO(vdev_t *vd, zio_t *zio),
+	TP_ARGS(vd, zio),
+	TP_STRUCT__entry(
+	    __field(uint64_t,	vdev_id)
+	    __field(uint64_t,	vdev_guid)
+	    __field(uint64_t,	vdev_state)
+	    ZIO_TP_STRUCT_ENTRY
+	),
+	TP_fast_assign(
+	    __entry->vdev_id	= vd->vdev_id;
+	    __entry->vdev_guid	= vd->vdev_guid;
+	    __entry->vdev_state	= vd->vdev_state;
+	    ZIO_TP_FAST_ASSIGN
+	),
+	TP_printk("vdev { id %llu guid %llu state %llu } "
+	    ZIO_TP_PRINTK_FMT, __entry->vdev_id, __entry->vdev_guid,
+	    __entry->vdev_state, ZIO_TP_PRINTK_ARGS)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_L2ARC_RW_EVENT(name) \
+DEFINE_EVENT(zfs_l2arc_rw_class, name, \
+	TP_PROTO(vdev_t *vd, zio_t *zio), \
+	TP_ARGS(vd, zio))
+/* END CSTYLED */
+DEFINE_L2ARC_RW_EVENT(zfs_l2arc__read);
+DEFINE_L2ARC_RW_EVENT(zfs_l2arc__write);
+
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     zio_t *, ...,
+ *     l2arc_write_callback_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_l2arc_iodone_class,
+	TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb),
+	TP_ARGS(zio, cb),
+	TP_STRUCT__entry(ZIO_TP_STRUCT_ENTRY),
+	TP_fast_assign(ZIO_TP_FAST_ASSIGN),
+	TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_L2ARC_IODONE_EVENT(name) \
+DEFINE_EVENT(zfs_l2arc_iodone_class, name, \
+	TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb), \
+	TP_ARGS(zio, cb))
+/* END CSTYLED */
+DEFINE_L2ARC_IODONE_EVENT(zfs_l2arc__iodone);
+
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ *     arc_buf_hdr_t *, ...,
+ *     const blkptr_t *,
+ *     uint64_t,
+ *     const zbookmark_phys_t *);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_arc_miss_class,
+	TP_PROTO(arc_buf_hdr_t *hdr,
+	    const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb),
+	TP_ARGS(hdr, bp, size, zb),
+	TP_STRUCT__entry(
+	    __array(uint64_t,		hdr_dva_word, 2)
+	    __field(uint64_t,		hdr_birth)
+	    __field(uint32_t,		hdr_flags)
+	    __field(uint32_t,		hdr_bufcnt)
+	    __field(arc_buf_contents_t,	hdr_type)
+	    __field(uint16_t,		hdr_psize)
+	    __field(uint16_t,		hdr_lsize)
+	    __field(uint64_t,		hdr_spa)
+	    __field(arc_state_type_t,	hdr_state_type)
+	    __field(clock_t,		hdr_access)
+	    __field(uint32_t,		hdr_mru_hits)
+	    __field(uint32_t,		hdr_mru_ghost_hits)
+	    __field(uint32_t,		hdr_mfu_hits)
+	    __field(uint32_t,		hdr_mfu_ghost_hits)
+	    __field(uint32_t,		hdr_l2_hits)
+	    __field(int64_t,		hdr_refcount)
+
+	    __array(uint64_t,		bp_dva0, 2)
+	    __array(uint64_t,		bp_dva1, 2)
+	    __array(uint64_t,		bp_dva2, 2)
+	    __array(uint64_t,		bp_cksum, 4)
+
+	    __field(uint64_t,		bp_lsize)
+
+	    __field(uint64_t,		zb_objset)
+	    __field(uint64_t,		zb_object)
+	    __field(int64_t,		zb_level)
+	    __field(uint64_t,		zb_blkid)
+	),
+	TP_fast_assign(
+	    __entry->hdr_dva_word[0]	= hdr->b_dva.dva_word[0];
+	    __entry->hdr_dva_word[1]	= hdr->b_dva.dva_word[1];
+	    __entry->hdr_birth		= hdr->b_birth;
+	    __entry->hdr_flags		= hdr->b_flags;
+	    __entry->hdr_bufcnt		= hdr->b_l1hdr.b_bufcnt;
+	    __entry->hdr_psize		= hdr->b_psize;
+	    __entry->hdr_lsize		= hdr->b_lsize;
+	    __entry->hdr_spa		= hdr->b_spa;
+	    __entry->hdr_state_type	= hdr->b_l1hdr.b_state->arcs_state;
+	    __entry->hdr_access		= hdr->b_l1hdr.b_arc_access;
+	    __entry->hdr_mru_hits	= hdr->b_l1hdr.b_mru_hits;
+	    __entry->hdr_mru_ghost_hits	= hdr->b_l1hdr.b_mru_ghost_hits;
+	    __entry->hdr_mfu_hits	= hdr->b_l1hdr.b_mfu_hits;
+	    __entry->hdr_mfu_ghost_hits	= hdr->b_l1hdr.b_mfu_ghost_hits;
+	    __entry->hdr_l2_hits	= hdr->b_l2hdr.b_hits;
+	    __entry->hdr_refcount	= hdr->b_l1hdr.b_refcnt.rc_count;
+
+	    __entry->bp_dva0[0]		= bp->blk_dva[0].dva_word[0];
+	    __entry->bp_dva0[1]		= bp->blk_dva[0].dva_word[1];
+	    __entry->bp_dva1[0]		= bp->blk_dva[1].dva_word[0];
+	    __entry->bp_dva1[1]		= bp->blk_dva[1].dva_word[1];
+	    __entry->bp_dva2[0]		= bp->blk_dva[2].dva_word[0];
+	    __entry->bp_dva2[1]		= bp->blk_dva[2].dva_word[1];
+	    __entry->bp_cksum[0]	= bp->blk_cksum.zc_word[0];
+	    __entry->bp_cksum[1]	= bp->blk_cksum.zc_word[1];
+	    __entry->bp_cksum[2]	= bp->blk_cksum.zc_word[2];
+	    __entry->bp_cksum[3]	= bp->blk_cksum.zc_word[3];
+
+	    __entry->bp_lsize		= size;
+
+	    __entry->zb_objset		= zb->zb_objset;
+	    __entry->zb_object		= zb->zb_object;
+	    __entry->zb_level		= zb->zb_level;
+	    __entry->zb_blkid		= zb->zb_blkid;
+	),
+	TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
+	    "flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u "
+	    "access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
+	    "mfu_ghost_hits %u l2_hits %u refcount %lli } "
+	    "bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
+	    "0x%llx:0x%llx cksum 0x%llx:0x%llx:0x%llx:0x%llx "
+	    "lsize %llu } zb { objset %llu object %llu level %lli "
+	    "blkid %llu }",
+	    __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
+	    __entry->hdr_birth, __entry->hdr_flags,
+	    __entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize,
+	    __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
+	    __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
+	    __entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
+	    __entry->hdr_l2_hits, __entry->hdr_refcount,
+	    __entry->bp_dva0[0], __entry->bp_dva0[1],
+	    __entry->bp_dva1[0], __entry->bp_dva1[1],
+	    __entry->bp_dva2[0], __entry->bp_dva2[1],
+	    __entry->bp_cksum[0], __entry->bp_cksum[1],
+	    __entry->bp_cksum[2], __entry->bp_cksum[3],
+	    __entry->bp_lsize, __entry->zb_objset, __entry->zb_object,
+	    __entry->zb_level, __entry->zb_blkid)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_ARC_MISS_EVENT(name) \
+DEFINE_EVENT(zfs_arc_miss_class, name, \
+	TP_PROTO(arc_buf_hdr_t *hdr, \
+	    const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb), \
+	TP_ARGS(hdr, bp, size, zb))
+/* END CSTYLED */
+DEFINE_ARC_MISS_EVENT(zfs_arc__miss);
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ *     l2arc_dev_t *, ...,
+ *     list_t *, ...,
+ *     uint64_t, ...,
+ *     boolean_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_l2arc_evict_class,
+	TP_PROTO(l2arc_dev_t *dev,
+	    list_t *buflist, uint64_t taddr, boolean_t all),
+	TP_ARGS(dev, buflist, taddr, all),
+	TP_STRUCT__entry(
+	    __field(uint64_t,		vdev_id)
+	    __field(uint64_t,		vdev_guid)
+	    __field(uint64_t,		vdev_state)
+
+	    __field(uint64_t,		l2ad_hand)
+	    __field(uint64_t,		l2ad_start)
+	    __field(uint64_t,		l2ad_end)
+	    __field(boolean_t,		l2ad_first)
+	    __field(boolean_t,		l2ad_writing)
+
+	    __field(uint64_t,		taddr)
+	    __field(boolean_t,		all)
+	),
+	TP_fast_assign(
+	    __entry->vdev_id		= dev->l2ad_vdev->vdev_id;
+	    __entry->vdev_guid		= dev->l2ad_vdev->vdev_guid;
+	    __entry->vdev_state		= dev->l2ad_vdev->vdev_state;
+
+	    __entry->l2ad_hand		= dev->l2ad_hand;
+	    __entry->l2ad_start		= dev->l2ad_start;
+	    __entry->l2ad_end		= dev->l2ad_end;
+	    __entry->l2ad_first		= dev->l2ad_first;
+	    __entry->l2ad_writing	= dev->l2ad_writing;
+
+	    __entry->taddr		= taddr;
+	    __entry->all		= all;
+	),
+	TP_printk("l2ad { vdev { id %llu guid %llu state %llu } "
+	    "hand %llu start %llu end %llu "
+	    "first %d writing %d } taddr %llu all %d",
+	    __entry->vdev_id, __entry->vdev_guid, __entry->vdev_state,
+	    __entry->l2ad_hand, __entry->l2ad_start,
+	    __entry->l2ad_end, __entry->l2ad_first, __entry->l2ad_writing,
+	    __entry->taddr, __entry->all)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_L2ARC_EVICT_EVENT(name) \
+DEFINE_EVENT(zfs_l2arc_evict_class, name, \
+	TP_PROTO(l2arc_dev_t *dev, \
+	    list_t *buflist, uint64_t taddr, boolean_t all), \
+	TP_ARGS(dev, buflist, taddr, all))
+/* END CSTYLED */
+DEFINE_L2ARC_EVICT_EVENT(zfs_l2arc__evict);
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     uint64_t, ...,
+ *     uint64_t, ...,
+ *     uint64_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_arc_wait_for_eviction_class,
+	TP_PROTO(uint64_t amount, uint64_t arc_evict_count, uint64_t aew_count),
+	TP_ARGS(amount, arc_evict_count, aew_count),
+	TP_STRUCT__entry(
+	    __field(uint64_t,		amount)
+	    __field(uint64_t,		arc_evict_count)
+	    __field(uint64_t,		aew_count)
+	),
+	TP_fast_assign(
+	    __entry->amount		= amount;
+	    __entry->arc_evict_count	= arc_evict_count;
+	    __entry->aew_count		= aew_count;
+	),
+	TP_printk("amount %llu arc_evict_count %llu aew_count %llu",
+	    __entry->amount, __entry->arc_evict_count, __entry->aew_count)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(name) \
+DEFINE_EVENT(zfs_arc_wait_for_eviction_class, name, \
+	TP_PROTO(uint64_t amount, uint64_t arc_evict_count, uint64_t aew_count), \
+	TP_ARGS(amount, arc_evict_count, aew_count))
+/* END CSTYLED */
+DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(zfs_arc__wait__for__eviction);
+
+#endif /* _TRACE_ARC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_arc
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE1(arc__hit);
+DEFINE_DTRACE_PROBE1(arc__evict);
+DEFINE_DTRACE_PROBE1(arc__delete);
+DEFINE_DTRACE_PROBE1(new_state__mru);
+DEFINE_DTRACE_PROBE1(new_state__mfu);
+DEFINE_DTRACE_PROBE1(arc__async__upgrade__sync);
+DEFINE_DTRACE_PROBE1(arc__demand__hit__predictive__prefetch);
+DEFINE_DTRACE_PROBE1(l2arc__hit);
+DEFINE_DTRACE_PROBE1(l2arc__miss);
+DEFINE_DTRACE_PROBE2(l2arc__read);
+DEFINE_DTRACE_PROBE2(l2arc__write);
+DEFINE_DTRACE_PROBE2(l2arc__iodone);
+DEFINE_DTRACE_PROBE3(arc__wait__for__eviction);
+DEFINE_DTRACE_PROBE4(arc__miss);
+DEFINE_DTRACE_PROBE4(l2arc__evict);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/sys/trace_common.h b/zfs/include/os/linux/zfs/sys/trace_common.h
similarity index 100%
rename from zfs/include/sys/trace_common.h
rename to zfs/include/os/linux/zfs/sys/trace_common.h


diff --git a/zfs/include/os/linux/zfs/sys/trace_dbgmsg.h b/zfs/include/os/linux/zfs/sys/trace_dbgmsg.h
new file mode 100644
index 0000000..513918d
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_dbgmsg.h

@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_dbgmsg
+
+#if !defined(_TRACE_DBGMSG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_DBGMSG_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * This file defines tracepoint events for use by the dbgmsg(),
+ * dprintf(), and SET_ERROR() interfaces. These are grouped here because
+ * they all provide a way to store simple messages in the debug log (as
+ * opposed to events used by the DTRACE_PROBE interfaces which typically
+ * dump structured data).
+ *
+ * This header is included inside the trace.h multiple inclusion guard,
+ * and it is guarded above against direct inclusion, so it and need not
+ * be guarded separately.
+ */
+
+/*
+ * Generic support for one argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ *     const char *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_dprintf_class,
+	TP_PROTO(const char *msg),
+	TP_ARGS(msg),
+	TP_STRUCT__entry(
+	    __string(msg, msg)
+	),
+	TP_fast_assign(
+	    __assign_str(msg, msg);
+	),
+	TP_printk("%s", __get_str(msg))
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_DPRINTF_EVENT(name) \
+DEFINE_EVENT(zfs_dprintf_class, name, \
+	TP_PROTO(const char *msg), \
+	TP_ARGS(msg))
+/* END CSTYLED */
+DEFINE_DPRINTF_EVENT(zfs_zfs__dprintf);
+
+#endif /* _TRACE_DBGMSG_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_dbgmsg
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE1(zfs__dprintf);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_dbuf.h b/zfs/include/os/linux/zfs/sys/trace_dbuf.h
new file mode 100644
index 0000000..bd7d791
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_dbuf.h

@@ -0,0 +1,169 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_dbuf
+
+#if !defined(_TRACE_DBUF_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_DBUF_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+#ifndef	TRACE_DBUF_MSG_MAX
+#define	TRACE_DBUF_MSG_MAX	512
+#endif
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     dmu_buf_impl_t *, ...,
+ *     zio_t *, ...);
+ */
+
+#define	DBUF_TP_STRUCT_ENTRY					\
+	__dynamic_array(char,	os_spa,	TRACE_DBUF_MSG_MAX)	\
+	__field(uint64_t,	ds_object)			\
+	__field(uint64_t,	db_object)			\
+	__field(uint64_t,	db_level)			\
+	__field(uint64_t,	db_blkid)			\
+	__field(uint64_t,	db_offset)			\
+	__field(uint64_t,	db_size)			\
+	__field(uint64_t,	db_state)			\
+	__field(int64_t,	db_holds)			\
+	__dynamic_array(char,	msg,	TRACE_DBUF_MSG_MAX)
+
+#define	DBUF_TP_FAST_ASSIGN						\
+	if (db != NULL) {						\
+		__assign_str(os_spa,					\
+		spa_name(DB_DNODE(db)->dn_objset->os_spa));		\
+									\
+		__entry->ds_object = db->db_objset->os_dsl_dataset ?	\
+		db->db_objset->os_dsl_dataset->ds_object : 0;		\
+									\
+		__entry->db_object = db->db.db_object;			\
+		__entry->db_level  = db->db_level;			\
+		__entry->db_blkid  = db->db_blkid;			\
+		__entry->db_offset = db->db.db_offset;			\
+		__entry->db_size   = db->db.db_size;			\
+		__entry->db_state  = db->db_state;			\
+		__entry->db_holds  = zfs_refcount_count(&db->db_holds);	\
+		snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX,		\
+		    DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS);		\
+	} else {							\
+		__assign_str(os_spa, "NULL")				\
+		__entry->ds_object = 0;					\
+		__entry->db_object = 0;					\
+		__entry->db_level  = 0;					\
+		__entry->db_blkid  = 0;					\
+		__entry->db_offset = 0;					\
+		__entry->db_size   = 0;					\
+		__entry->db_state  = 0;					\
+		__entry->db_holds  = 0;					\
+		snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX,		\
+		    "dbuf { NULL }");					\
+	}
+
+#define	DBUF_TP_PRINTK_FMT						\
+	"dbuf { spa \"%s\" objset %llu object %llu level %llu "		\
+	"blkid %llu offset %llu size %llu state %llu holds %lld }"
+
+#define	DBUF_TP_PRINTK_ARGS					\
+	__get_str(os_spa), __entry->ds_object,			\
+	__entry->db_object, __entry->db_level,			\
+	__entry->db_blkid, __entry->db_offset,			\
+	__entry->db_size, __entry->db_state, __entry->db_holds
+
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_dbuf_class,
+	TP_PROTO(dmu_buf_impl_t *db, zio_t *zio),
+	TP_ARGS(db, zio),
+	TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
+	TP_fast_assign(DBUF_TP_FAST_ASSIGN),
+	TP_printk("%s", __get_str(msg))
+);
+
+DECLARE_EVENT_CLASS(zfs_dbuf_state_class,
+	TP_PROTO(dmu_buf_impl_t *db, const char *why),
+	TP_ARGS(db, why),
+	TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
+	TP_fast_assign(DBUF_TP_FAST_ASSIGN),
+	TP_printk("%s", __get_str(msg))
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_DBUF_EVENT(name) \
+DEFINE_EVENT(zfs_dbuf_class, name, \
+	TP_PROTO(dmu_buf_impl_t *db, zio_t *zio), \
+	TP_ARGS(db, zio))
+/* END CSTYLED */
+DEFINE_DBUF_EVENT(zfs_blocked__read);
+
+/* BEGIN CSTYLED */
+#define	DEFINE_DBUF_STATE_EVENT(name) \
+DEFINE_EVENT(zfs_dbuf_state_class, name, \
+	TP_PROTO(dmu_buf_impl_t *db, const char *why), \
+	TP_ARGS(db, why))
+/* END CSTYLED */
+DEFINE_DBUF_STATE_EVENT(zfs_dbuf__state_change);
+
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_dbuf_evict_one_class,
+	TP_PROTO(dmu_buf_impl_t *db, multilist_sublist_t *mls),
+	TP_ARGS(db, mls),
+	TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
+	TP_fast_assign(DBUF_TP_FAST_ASSIGN),
+	TP_printk("%s", __get_str(msg))
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_DBUF_EVICT_ONE_EVENT(name) \
+DEFINE_EVENT(zfs_dbuf_evict_one_class, name, \
+	TP_PROTO(dmu_buf_impl_t *db, multilist_sublist_t *mls), \
+	TP_ARGS(db, mls))
+/* END CSTYLED */
+DEFINE_DBUF_EVICT_ONE_EVENT(zfs_dbuf__evict__one);
+
+#endif /* _TRACE_DBUF_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_dbuf
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE2(blocked__read);
+DEFINE_DTRACE_PROBE2(dbuf__evict__one);
+DEFINE_DTRACE_PROBE2(dbuf__state_change);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_dmu.h b/zfs/include/os/linux/zfs/sys/trace_dmu.h
new file mode 100644
index 0000000..3c64a37
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_dmu.h

@@ -0,0 +1,136 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_dmu
+
+#if !defined(_TRACE_DMU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_DMU_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     dmu_tx_t *, ...,
+ *     uint64_t, ...,
+ *     uint64_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
+	TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time),
+	TP_ARGS(tx, dirty, min_tx_time),
+	TP_STRUCT__entry(
+	    __field(uint64_t,			tx_txg)
+	    __field(uint64_t,			tx_lastsnap_txg)
+	    __field(uint64_t,			tx_lasttried_txg)
+	    __field(boolean_t,			tx_anyobj)
+	    __field(boolean_t,			tx_dirty_delayed)
+	    __field(hrtime_t,			tx_start)
+	    __field(boolean_t,			tx_wait_dirty)
+	    __field(int,			tx_err)
+	    __field(uint64_t,			min_tx_time)
+	    __field(uint64_t,			dirty)
+	),
+	TP_fast_assign(
+	    __entry->tx_txg			= tx->tx_txg;
+	    __entry->tx_lastsnap_txg		= tx->tx_lastsnap_txg;
+	    __entry->tx_lasttried_txg		= tx->tx_lasttried_txg;
+	    __entry->tx_anyobj			= tx->tx_anyobj;
+	    __entry->tx_dirty_delayed		= tx->tx_dirty_delayed;
+	    __entry->tx_start			= tx->tx_start;
+	    __entry->tx_wait_dirty		= tx->tx_wait_dirty;
+	    __entry->tx_err			= tx->tx_err;
+	    __entry->dirty			= dirty;
+	    __entry->min_tx_time		= min_tx_time;
+	),
+	TP_printk("tx { txg %llu lastsnap_txg %llu tx_lasttried_txg %llu "
+	    "anyobj %d dirty_delayed %d start %llu wait_dirty %d err %i "
+	    "} dirty %llu min_tx_time %llu",
+	    __entry->tx_txg, __entry->tx_lastsnap_txg,
+	    __entry->tx_lasttried_txg, __entry->tx_anyobj,
+	    __entry->tx_dirty_delayed, __entry->tx_start,
+	    __entry->tx_wait_dirty, __entry->tx_err,
+	    __entry->dirty, __entry->min_tx_time)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_DELAY_MINTIME_EVENT(name) \
+DEFINE_EVENT(zfs_delay_mintime_class, name, \
+	TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time), \
+	TP_ARGS(tx, dirty, min_tx_time))
+/* END CSTYLED */
+DEFINE_DELAY_MINTIME_EVENT(zfs_delay__mintime);
+
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_free_long_range_class,
+	TP_PROTO(uint64_t long_free_dirty_all_txgs, uint64_t chunk_len, \
+	    uint64_t txg),
+	TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg),
+	TP_STRUCT__entry(
+	    __field(uint64_t,			long_free_dirty_all_txgs)
+	    __field(uint64_t,			chunk_len)
+	    __field(uint64_t,			txg)
+	),
+	TP_fast_assign(
+	    __entry->long_free_dirty_all_txgs	= long_free_dirty_all_txgs;
+	    __entry->chunk_len					= chunk_len;
+	    __entry->txg						= txg;
+	),
+	TP_printk("long_free_dirty_all_txgs %llu chunk_len %llu txg %llu",
+	   __entry->long_free_dirty_all_txgs,
+	   __entry->chunk_len, __entry->txg)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_FREE_LONG_RANGE_EVENT(name) \
+DEFINE_EVENT(zfs_free_long_range_class, name, \
+	TP_PROTO(uint64_t long_free_dirty_all_txgs, \
+	    uint64_t chunk_len, uint64_t txg), \
+	TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg))
+/* END CSTYLED */
+DEFINE_FREE_LONG_RANGE_EVENT(zfs_free__long__range);
+
+#endif /* _TRACE_DMU_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_dmu
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE3(delay__mintime);
+DEFINE_DTRACE_PROBE3(free__long__range);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_dnode.h b/zfs/include/os/linux/zfs/sys/trace_dnode.h
new file mode 100644
index 0000000..27ad6cb
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_dnode.h

@@ -0,0 +1,129 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_dnode
+
+#if !defined(_TRACE_DNODE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_DNODE_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     dnode_t *, ...,
+ *     int64_t, ...,
+ *     uint32_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_dnode_move_class,
+	TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs),
+	TP_ARGS(dn, refcount, dbufs),
+	TP_STRUCT__entry(
+	    __field(uint64_t,		dn_object)
+	    __field(dmu_object_type_t,	dn_type)
+	    __field(uint16_t,		dn_bonuslen)
+	    __field(uint8_t,		dn_bonustype)
+	    __field(uint8_t,		dn_nblkptr)
+	    __field(uint8_t,		dn_checksum)
+	    __field(uint8_t,		dn_compress)
+	    __field(uint8_t,		dn_nlevels)
+	    __field(uint8_t,		dn_indblkshift)
+	    __field(uint8_t,		dn_datablkshift)
+	    __field(uint8_t,		dn_moved)
+	    __field(uint16_t,		dn_datablkszsec)
+	    __field(uint32_t,		dn_datablksz)
+	    __field(uint64_t,		dn_maxblkid)
+	    __field(int64_t,		dn_tx_holds)
+	    __field(int64_t,		dn_holds)
+	    __field(boolean_t,		dn_have_spill)
+
+	    __field(int64_t,		refcount)
+	    __field(uint32_t,		dbufs)
+	),
+	TP_fast_assign(
+	    __entry->dn_object		= dn->dn_object;
+	    __entry->dn_type		= dn->dn_type;
+	    __entry->dn_bonuslen	= dn->dn_bonuslen;
+	    __entry->dn_bonustype	= dn->dn_bonustype;
+	    __entry->dn_nblkptr		= dn->dn_nblkptr;
+	    __entry->dn_checksum	= dn->dn_checksum;
+	    __entry->dn_compress	= dn->dn_compress;
+	    __entry->dn_nlevels		= dn->dn_nlevels;
+	    __entry->dn_indblkshift	= dn->dn_indblkshift;
+	    __entry->dn_datablkshift	= dn->dn_datablkshift;
+	    __entry->dn_moved		= dn->dn_moved;
+	    __entry->dn_datablkszsec	= dn->dn_datablkszsec;
+	    __entry->dn_datablksz	= dn->dn_datablksz;
+	    __entry->dn_maxblkid	= dn->dn_maxblkid;
+	    __entry->dn_tx_holds	= dn->dn_tx_holds.rc_count;
+	    __entry->dn_holds		= dn->dn_holds.rc_count;
+	    __entry->dn_have_spill	= dn->dn_have_spill;
+
+	    __entry->refcount		= refcount;
+	    __entry->dbufs		= dbufs;
+	),
+	TP_printk("dn { object %llu type %d bonuslen %u bonustype %u "
+	    "nblkptr %u checksum %u compress %u nlevels %u indblkshift %u "
+	    "datablkshift %u moved %u datablkszsec %u datablksz %u "
+	    "maxblkid %llu tx_holds %lli holds %lli have_spill %d } "
+	    "refcount %lli dbufs %u",
+	    __entry->dn_object, __entry->dn_type, __entry->dn_bonuslen,
+	    __entry->dn_bonustype, __entry->dn_nblkptr, __entry->dn_checksum,
+	    __entry->dn_compress, __entry->dn_nlevels, __entry->dn_indblkshift,
+	    __entry->dn_datablkshift, __entry->dn_moved,
+	    __entry->dn_datablkszsec, __entry->dn_datablksz,
+	    __entry->dn_maxblkid, __entry->dn_tx_holds, __entry->dn_holds,
+	    __entry->dn_have_spill, __entry->refcount, __entry->dbufs)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_DNODE_MOVE_EVENT(name) \
+DEFINE_EVENT(zfs_dnode_move_class, name, \
+	TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs), \
+	TP_ARGS(dn, refcount, dbufs))
+/* END CSTYLED */
+DEFINE_DNODE_MOVE_EVENT(zfs_dnode__move);
+
+#endif /* _TRACE_DNODE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_dnode
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE3(dnode__move);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_multilist.h b/zfs/include/os/linux/zfs/sys/trace_multilist.h
new file mode 100644
index 0000000..fe68d52
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_multilist.h

@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_multilist
+
+#if !defined(_TRACE_MULTILIST_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_MULTILIST_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     multilist_t *, ...,
+ *     unsigned int, ...,
+ *     void *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_multilist_insert_remove_class,
+	TP_PROTO(multilist_t *ml, unsigned sublist_idx, void *obj),
+	TP_ARGS(ml, sublist_idx, obj),
+	TP_STRUCT__entry(
+	    __field(size_t,		ml_offset)
+	    __field(uint64_t,		ml_num_sublists)
+
+	    __field(unsigned int,	sublist_idx)
+	),
+	TP_fast_assign(
+	    __entry->ml_offset		= ml->ml_offset;
+	    __entry->ml_num_sublists	= ml->ml_num_sublists;
+
+	    __entry->sublist_idx	= sublist_idx;
+	),
+	TP_printk("ml { offset %ld numsublists %llu sublistidx %u } ",
+	    __entry->ml_offset, __entry->ml_num_sublists, __entry->sublist_idx)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_MULTILIST_INSERT_REMOVE_EVENT(name) \
+DEFINE_EVENT(zfs_multilist_insert_remove_class, name, \
+	TP_PROTO(multilist_t *ml, unsigned int sublist_idx, void *obj), \
+	TP_ARGS(ml, sublist_idx, obj))
+/* END CSTYLED */
+DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__insert);
+DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__remove);
+
+#endif /* _TRACE_MULTILIST_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_multilist
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE3(multilist__insert);
+DEFINE_DTRACE_PROBE3(multilist__remove);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_rrwlock.h b/zfs/include/os/linux/zfs/sys/trace_rrwlock.h
new file mode 100644
index 0000000..4c74d62
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_rrwlock.h

@@ -0,0 +1,31 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#else
+
+DEFINE_DTRACE_PROBE(zfs__rrwfastpath__rdmiss);
+DEFINE_DTRACE_PROBE(zfs__rrwfastpath__exitmiss);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_txg.h b/zfs/include/os/linux/zfs/sys/trace_txg.h
new file mode 100644
index 0000000..23d5d35
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_txg.h

@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_txg
+
+#if !defined(_TRACE_TXG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_TXG_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     dsl_pool_t *, ...,
+ *     uint64_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_txg_class,
+	TP_PROTO(dsl_pool_t *dp, uint64_t txg),
+	TP_ARGS(dp, txg),
+	TP_STRUCT__entry(
+	    __field(uint64_t, txg)
+	),
+	TP_fast_assign(
+	    __entry->txg = txg;
+	),
+	TP_printk("txg %llu", __entry->txg)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_TXG_EVENT(name) \
+DEFINE_EVENT(zfs_txg_class, name, \
+	TP_PROTO(dsl_pool_t *dp, uint64_t txg), \
+	TP_ARGS(dp, txg))
+/* END CSTYLED */
+DEFINE_TXG_EVENT(zfs_dsl_pool_sync__done);
+DEFINE_TXG_EVENT(zfs_txg__quiescing);
+DEFINE_TXG_EVENT(zfs_txg__opened);
+DEFINE_TXG_EVENT(zfs_txg__syncing);
+DEFINE_TXG_EVENT(zfs_txg__synced);
+DEFINE_TXG_EVENT(zfs_txg__quiesced);
+
+#endif /* _TRACE_TXG_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_txg
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE2(dsl_pool_sync__done);
+DEFINE_DTRACE_PROBE2(txg__quiescing);
+DEFINE_DTRACE_PROBE2(txg__opened);
+DEFINE_DTRACE_PROBE2(txg__syncing);
+DEFINE_DTRACE_PROBE2(txg__synced);
+DEFINE_DTRACE_PROBE2(txg__quiesced);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_vdev.h b/zfs/include/os/linux/zfs/sys/trace_vdev.h
new file mode 100644
index 0000000..5071144
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_vdev.h

@@ -0,0 +1,140 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+/*
+ * If tracepoints are available define dtrace_probe events for vdev
+ * related probes.  Definitions in include/os/linux/spl/sys/trace.h
+ * will map DTRACE_PROBE* calls to tracepoints.
+ */
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_vdev
+
+#if !defined(_TRACE_VDEV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_VDEV_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ *     spa_t *, ...,
+ *     uint64_t, ...,
+ *     uint64_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_removing_class_3,
+	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size),
+	TP_ARGS(spa, offset, size),
+	TP_STRUCT__entry(
+	    __field(spa_t *,	vdev_spa)
+	    __field(uint64_t,	vdev_offset)
+	    __field(uint64_t,	vdev_size)
+	),
+	TP_fast_assign(
+	    __entry->vdev_spa	= spa;
+	    __entry->vdev_offset = offset;
+	    __entry->vdev_size	= size;
+	),
+	TP_printk("spa %p offset %llu size %llu",
+	    __entry->vdev_spa, __entry->vdev_offset,
+	    __entry->vdev_size)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define DEFINE_REMOVE_FREE_EVENT(name) \
+DEFINE_EVENT(zfs_removing_class_3, name, \
+	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size), \
+	TP_ARGS(spa, offset, size))
+/* END CSTYLED */
+DEFINE_REMOVE_FREE_EVENT(zfs_remove__free__synced);
+DEFINE_REMOVE_FREE_EVENT(zfs_remove__free__unvisited);
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ *     spa_t *, ...,
+ *     uint64_t, ...,
+ *     uint64_t, ...,
+ *     uint64_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_removing_class_4,
+	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size, uint64_t txg),
+	TP_ARGS(spa, offset, size, txg),
+	TP_STRUCT__entry(
+	    __field(spa_t *,	vdev_spa)
+	    __field(uint64_t,	vdev_offset)
+	    __field(uint64_t,	vdev_size)
+	    __field(uint64_t,	vdev_txg)
+	),
+	TP_fast_assign(
+	    __entry->vdev_spa	= spa;
+	    __entry->vdev_offset = offset;
+	    __entry->vdev_size	= size;
+	    __entry->vdev_txg	= txg;
+	),
+	TP_printk("spa %p offset %llu size %llu txg %llu",
+	    __entry->vdev_spa, __entry->vdev_offset,
+	    __entry->vdev_size, __entry->vdev_txg)
+);
+
+/* BEGIN CSTYLED */
+#define DEFINE_REMOVE_FREE_EVENT_TXG(name) \
+DEFINE_EVENT(zfs_removing_class_4, name, \
+	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size,uint64_t txg), \
+	TP_ARGS(spa, offset, size, txg))
+/* END CSTYLED */
+DEFINE_REMOVE_FREE_EVENT_TXG(zfs_remove__free__inflight);
+
+#endif /* _TRACE_VDEV_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_vdev
+#include <trace/define_trace.h>
+
+#else
+
+/*
+ * When tracepoints are not available, a DEFINE_DTRACE_PROBE* macro is
+ * needed for each DTRACE_PROBE.  These will be used to generate stub
+ * tracing functions and prototypes for those functions.  See
+ * include/os/linux/spl/sys/trace.h.
+ */
+
+DEFINE_DTRACE_PROBE3(remove__free__synced);
+DEFINE_DTRACE_PROBE3(remove__free__unvisited);
+DEFINE_DTRACE_PROBE4(remove__free__inflight);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_zfs.h b/zfs/include/os/linux/zfs/sys/trace_zfs.h
new file mode 100644
index 0000000..0e19f8d
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_zfs.h

@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _OS_LINUX_ZFS_TRACE_H
+#define	_OS_LINUX_ZFS_TRACE_H
+
+#include <sys/multilist.h>
+#include <sys/arc_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dmu_tx.h>
+#include <sys/dnode.h>
+#include <sys/zfs_znode.h>
+#include <sys/zil_impl.h>
+#include <sys/zrlock.h>
+
+#include <sys/trace.h>
+#include <sys/trace_acl.h>
+#include <sys/trace_arc.h>
+#include <sys/trace_dbgmsg.h>
+#include <sys/trace_dbuf.h>
+#include <sys/trace_dmu.h>
+#include <sys/trace_dnode.h>
+#include <sys/trace_multilist.h>
+#include <sys/trace_rrwlock.h>
+#include <sys/trace_txg.h>
+#include <sys/trace_vdev.h>
+#include <sys/trace_zil.h>
+#include <sys/trace_zio.h>
+#include <sys/trace_zrlock.h>
+
+#endif

diff --git a/zfs/include/os/linux/zfs/sys/trace_zil.h b/zfs/include/os/linux/zfs/sys/trace_zil.h
new file mode 100644
index 0000000..526846e
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_zil.h

@@ -0,0 +1,229 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_zil
+
+#if !defined(_TRACE_ZIL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ZIL_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+#define	ZILOG_TP_STRUCT_ENTRY						    \
+		__field(uint64_t,	zl_lr_seq)			    \
+		__field(uint64_t,	zl_commit_lr_seq)		    \
+		__field(uint64_t,	zl_destroy_txg)			    \
+		__field(uint64_t,	zl_replaying_seq)		    \
+		__field(uint32_t,	zl_suspend)			    \
+		__field(uint8_t,	zl_suspending)			    \
+		__field(uint8_t,	zl_keep_first)			    \
+		__field(uint8_t,	zl_replay)			    \
+		__field(uint8_t,	zl_stop_sync)			    \
+		__field(uint8_t,	zl_logbias)			    \
+		__field(uint8_t,	zl_sync)			    \
+		__field(int,		zl_parse_error)			    \
+		__field(uint64_t,	zl_parse_blk_seq)		    \
+		__field(uint64_t,	zl_parse_lr_seq)		    \
+		__field(uint64_t,	zl_parse_blk_count)		    \
+		__field(uint64_t,	zl_parse_lr_count)		    \
+		__field(uint64_t,	zl_cur_used)			    \
+		__field(clock_t,	zl_replay_time)			    \
+		__field(uint64_t,	zl_replay_blks)
+
+#define	ZILOG_TP_FAST_ASSIGN						    \
+		__entry->zl_lr_seq		= zilog->zl_lr_seq;	    \
+		__entry->zl_commit_lr_seq	= zilog->zl_commit_lr_seq;  \
+		__entry->zl_destroy_txg	= zilog->zl_destroy_txg;	    \
+		__entry->zl_replaying_seq	= zilog->zl_replaying_seq;  \
+		__entry->zl_suspend		= zilog->zl_suspend;	    \
+		__entry->zl_suspending	= zilog->zl_suspending;		    \
+		__entry->zl_keep_first	= zilog->zl_keep_first;		    \
+		__entry->zl_replay		= zilog->zl_replay;	    \
+		__entry->zl_stop_sync	= zilog->zl_stop_sync;		    \
+		__entry->zl_logbias		= zilog->zl_logbias;	    \
+		__entry->zl_sync		= zilog->zl_sync;	    \
+		__entry->zl_parse_error	= zilog->zl_parse_error;	    \
+		__entry->zl_parse_blk_seq	= zilog->zl_parse_blk_seq;  \
+		__entry->zl_parse_lr_seq	= zilog->zl_parse_lr_seq;   \
+		__entry->zl_parse_blk_count	= zilog->zl_parse_blk_count;\
+		__entry->zl_parse_lr_count	= zilog->zl_parse_lr_count; \
+		__entry->zl_cur_used	= zilog->zl_cur_used;		    \
+		__entry->zl_replay_time	= zilog->zl_replay_time;	    \
+		__entry->zl_replay_blks	= zilog->zl_replay_blks;
+
+#define	ZILOG_TP_PRINTK_FMT						    \
+	"zl { lr_seq %llu commit_lr_seq %llu destroy_txg %llu "		    \
+	"replaying_seq %llu suspend %u suspending %u keep_first %u "	    \
+	"replay %u stop_sync %u logbias %u sync %u "			    \
+	"parse_error %u parse_blk_seq %llu parse_lr_seq %llu "		    \
+	"parse_blk_count %llu parse_lr_count %llu "			    \
+	"cur_used %llu replay_time %lu replay_blks %llu }"
+
+#define	ZILOG_TP_PRINTK_ARGS						    \
+	    __entry->zl_lr_seq, __entry->zl_commit_lr_seq,		    \
+	    __entry->zl_destroy_txg, __entry->zl_replaying_seq,		    \
+	    __entry->zl_suspend, __entry->zl_suspending,		    \
+	    __entry->zl_keep_first, __entry->zl_replay,			    \
+	    __entry->zl_stop_sync, __entry->zl_logbias, __entry->zl_sync,   \
+	    __entry->zl_parse_error, __entry->zl_parse_blk_seq,		    \
+	    __entry->zl_parse_lr_seq, __entry->zl_parse_blk_count,	    \
+	    __entry->zl_parse_lr_count, __entry->zl_cur_used,		    \
+	    __entry->zl_replay_time, __entry->zl_replay_blks
+
+#define	ITX_TP_STRUCT_ENTRY						    \
+		__field(itx_wr_state_t,	itx_wr_state)			    \
+		__field(uint8_t,	itx_sync)			    \
+		__field(zil_callback_t,	itx_callback)			    \
+		__field(void *,		itx_callback_data)		    \
+		__field(uint64_t,	itx_oid)			    \
+									    \
+		__field(uint64_t,	lrc_txtype)			    \
+		__field(uint64_t,	lrc_reclen)			    \
+		__field(uint64_t,	lrc_txg)			    \
+		__field(uint64_t,	lrc_seq)
+
+#define	ITX_TP_FAST_ASSIGN						    \
+		__entry->itx_wr_state		= itx->itx_wr_state;	    \
+		__entry->itx_sync		= itx->itx_sync;	    \
+		__entry->itx_callback		= itx->itx_callback;	    \
+		__entry->itx_callback_data	= itx->itx_callback_data;   \
+		__entry->itx_oid		= itx->itx_oid;		    \
+									    \
+		__entry->lrc_txtype		= itx->itx_lr.lrc_txtype;   \
+		__entry->lrc_reclen		= itx->itx_lr.lrc_reclen;   \
+		__entry->lrc_txg		= itx->itx_lr.lrc_txg;	    \
+		__entry->lrc_seq		= itx->itx_lr.lrc_seq;
+
+#define	ITX_TP_PRINTK_FMT						    \
+	"itx { wr_state %u sync %u callback %p callback_data %p oid %llu"   \
+	" { txtype %llu reclen %llu txg %llu seq %llu } }"
+
+#define	ITX_TP_PRINTK_ARGS						    \
+	    __entry->itx_wr_state, __entry->itx_sync, __entry->itx_callback,\
+	    __entry->itx_callback_data, __entry->itx_oid,		    \
+	    __entry->lrc_txtype, __entry->lrc_reclen, __entry->lrc_txg,	    \
+	    __entry->lrc_seq
+
+#define	ZCW_TP_STRUCT_ENTRY						    \
+		__field(lwb_t *,	zcw_lwb)			    \
+		__field(boolean_t,	zcw_done)			    \
+		__field(int,		zcw_zio_error)			    \
+
+#define	ZCW_TP_FAST_ASSIGN						    \
+		__entry->zcw_lwb		= zcw->zcw_lwb;		    \
+		__entry->zcw_done		= zcw->zcw_done;	    \
+		__entry->zcw_zio_error		= zcw->zcw_zio_error;
+
+#define	ZCW_TP_PRINTK_FMT						    \
+	"zcw { lwb %p done %u error %u }"
+
+#define	ZCW_TP_PRINTK_ARGS						    \
+	    __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_zio_error
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     zilog_t *, ...,
+ *     itx_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_zil_process_itx_class,
+	TP_PROTO(zilog_t *zilog, itx_t *itx),
+	TP_ARGS(zilog, itx),
+	TP_STRUCT__entry(
+	    ZILOG_TP_STRUCT_ENTRY
+	    ITX_TP_STRUCT_ENTRY
+	),
+	TP_fast_assign(
+	    ZILOG_TP_FAST_ASSIGN
+	    ITX_TP_FAST_ASSIGN
+	),
+	TP_printk(
+	    ZILOG_TP_PRINTK_FMT " " ITX_TP_PRINTK_FMT,
+	    ZILOG_TP_PRINTK_ARGS, ITX_TP_PRINTK_ARGS)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define	DEFINE_ZIL_PROCESS_ITX_EVENT(name) \
+DEFINE_EVENT(zfs_zil_process_itx_class, name, \
+	TP_PROTO(zilog_t *zilog, itx_t *itx), \
+	TP_ARGS(zilog, itx))
+DEFINE_ZIL_PROCESS_ITX_EVENT(zfs_zil__process__commit__itx);
+DEFINE_ZIL_PROCESS_ITX_EVENT(zfs_zil__process__normal__itx);
+/* END CSTYLED */
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     zilog_t *, ...,
+ *     zil_commit_waiter_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_zil_commit_io_error_class,
+	TP_PROTO(zilog_t *zilog, zil_commit_waiter_t *zcw),
+	TP_ARGS(zilog, zcw),
+	TP_STRUCT__entry(
+	    ZILOG_TP_STRUCT_ENTRY
+	    ZCW_TP_STRUCT_ENTRY
+	),
+	TP_fast_assign(
+	    ZILOG_TP_FAST_ASSIGN
+	    ZCW_TP_FAST_ASSIGN
+	),
+	TP_printk(
+	    ZILOG_TP_PRINTK_FMT " " ZCW_TP_PRINTK_FMT,
+	    ZILOG_TP_PRINTK_ARGS, ZCW_TP_PRINTK_ARGS)
+);
+
+/* BEGIN CSTYLED */
+#define	DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(name) \
+DEFINE_EVENT(zfs_zil_commit_io_error_class, name, \
+	TP_PROTO(zilog_t *zilog, zil_commit_waiter_t *zcw), \
+	TP_ARGS(zilog, zcw))
+DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
+/* END CSTYLED */
+
+#endif /* _TRACE_ZIL_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_zil
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE2(zil__process__commit__itx);
+DEFINE_DTRACE_PROBE2(zil__process__normal__itx);
+DEFINE_DTRACE_PROBE2(zil__commit__io__error);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_zio.h b/zfs/include/os/linux/zfs/sys/trace_zio.h
new file mode 100644
index 0000000..8655e24
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_zio.h

@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/list.h>
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_zio
+
+#if !defined(_TRACE_ZIO_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ZIO_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+#include <sys/trace_common.h> /* For ZIO macros */
+
+/* BEGIN CSTYLED */
+TRACE_EVENT(zfs_zio__delay__miss,
+	TP_PROTO(zio_t *zio, hrtime_t now),
+	TP_ARGS(zio, now),
+	TP_STRUCT__entry(
+	    ZIO_TP_STRUCT_ENTRY
+	    __field(hrtime_t, now)
+	),
+	TP_fast_assign(
+	    ZIO_TP_FAST_ASSIGN
+	    __entry->now = now;
+	),
+	TP_printk("now %llu " ZIO_TP_PRINTK_FMT, __entry->now,
+	    ZIO_TP_PRINTK_ARGS)
+);
+
+TRACE_EVENT(zfs_zio__delay__hit,
+	TP_PROTO(zio_t *zio, hrtime_t now, hrtime_t diff),
+	TP_ARGS(zio, now, diff),
+	TP_STRUCT__entry(
+	    ZIO_TP_STRUCT_ENTRY
+	    __field(hrtime_t, now)
+	    __field(hrtime_t, diff)
+	),
+	TP_fast_assign(
+	    ZIO_TP_FAST_ASSIGN
+	    __entry->now = now;
+	    __entry->diff = diff;
+	),
+	TP_printk("now %llu diff %llu " ZIO_TP_PRINTK_FMT, __entry->now,
+	    __entry->diff, ZIO_TP_PRINTK_ARGS)
+);
+
+TRACE_EVENT(zfs_zio__delay__skip,
+	TP_PROTO(zio_t *zio),
+	TP_ARGS(zio),
+	TP_STRUCT__entry(ZIO_TP_STRUCT_ENTRY),
+	TP_fast_assign(ZIO_TP_FAST_ASSIGN),
+	TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
+);
+/* END CSTYLED */
+
+#endif /* _TRACE_ZIO_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_zio
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE2(zio__delay__miss);
+DEFINE_DTRACE_PROBE3(zio__delay__hit);
+DEFINE_DTRACE_PROBE1(zio__delay__skip);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/trace_zrlock.h b/zfs/include/os/linux/zfs/sys/trace_zrlock.h
new file mode 100644
index 0000000..23f9577
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/trace_zrlock.h

@@ -0,0 +1,94 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define	TRACE_SYSTEM_VAR zfs_zrlock
+
+#if !defined(_TRACE_ZRLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ZRLOCK_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ *     zrlock_t *, ...,
+ *     uint32_t, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_zrlock_class,
+	TP_PROTO(zrlock_t *zrl, kthread_t *owner, uint32_t n),
+	TP_ARGS(zrl, owner, n),
+	TP_STRUCT__entry(
+	    __field(int32_t,		refcount)
+#ifdef	ZFS_DEBUG
+	    __field(pid_t,		owner_pid)
+	    __field(const char *,	caller)
+#endif
+	    __field(uint32_t,		n)
+	),
+	TP_fast_assign(
+	    __entry->refcount	= zrl->zr_refcount;
+#ifdef	ZFS_DEBUG
+	    __entry->owner_pid	= owner ? owner->pid : 0;
+	    __entry->caller = zrl->zr_caller ? zrl->zr_caller : "(null)";
+#endif
+	    __entry->n		= n;
+	),
+#ifdef	ZFS_DEBUG
+	TP_printk("zrl { refcount %d owner_pid %d caller %s } n %u",
+	    __entry->refcount, __entry->owner_pid, __entry->caller,
+	    __entry->n)
+#else
+	TP_printk("zrl { refcount %d } n %u",
+	    __entry->refcount, __entry->n)
+#endif
+);
+/* END_CSTYLED */
+
+#define	DEFINE_ZRLOCK_EVENT(name) \
+DEFINE_EVENT(zfs_zrlock_class, name, \
+	TP_PROTO(zrlock_t *zrl, kthread_t *owner, uint32_t n), \
+	TP_ARGS(zrl, owner, n))
+DEFINE_ZRLOCK_EVENT(zfs_zrlock__reentry);
+
+#endif /* _TRACE_ZRLOCK_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace_zrlock
+#include <trace/define_trace.h>
+
+#else
+
+DEFINE_DTRACE_PROBE3(zrlock__reentry);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */

diff --git a/zfs/include/os/linux/zfs/sys/zfs_bootenv_os.h b/zfs/include/os/linux/zfs/sys/zfs_bootenv_os.h
new file mode 100644
index 0000000..7b2f083
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_bootenv_os.h

@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _ZFS_BOOTENV_OS_H
+#define	_ZFS_BOOTENV_OS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	BOOTENV_OS		BE_LINUX_VENDOR
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_BOOTENV_OS_H */

diff --git a/zfs/include/os/linux/zfs/sys/zfs_context_os.h b/zfs/include/os/linux/zfs/sys/zfs_context_os.h
new file mode 100644
index 0000000..9e42605
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_context_os.h

@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef ZFS_CONTEXT_OS_H
+#define	ZFS_CONTEXT_OS_H
+
+#include <linux/dcache_compat.h>
+#include <linux/utsname_compat.h>
+#include <linux/compiler_compat.h>
+#include <linux/module.h>
+
+#if THREAD_SIZE >= 16384
+#define	HAVE_LARGE_STACKS	1
+#endif
+
+#endif

diff --git a/zfs/include/os/linux/zfs/sys/zfs_ctldir.h b/zfs/include/os/linux/zfs/sys/zfs_ctldir.h
new file mode 100644
index 0000000..beee349
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_ctldir.h

@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <rohan.puri15@gmail.com>
+ *   Brian Behlendorf <behlendorf1@llnl.gov>
+ */
+
+#ifndef	_ZFS_CTLDIR_H
+#define	_ZFS_CTLDIR_H
+
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+
+#define	ZFS_CTLDIR_NAME		".zfs"
+#define	ZFS_SNAPDIR_NAME	"snapshot"
+#define	ZFS_SHAREDIR_NAME	"shares"
+
+#define	zfs_has_ctldir(zdp)	\
+	((zdp)->z_id == ZTOZSB(zdp)->z_root && \
+	(ZTOZSB(zdp)->z_ctldir != NULL))
+#define	zfs_show_ctldir(zdp)	\
+	(zfs_has_ctldir(zdp) && \
+	(ZTOZSB(zdp)->z_show_ctldir))
+
+extern int zfs_expire_snapshot;
+
+/* zfsctl generic functions */
+extern int zfsctl_create(zfsvfs_t *);
+extern void zfsctl_destroy(zfsvfs_t *);
+extern struct inode *zfsctl_root(znode_t *);
+extern void zfsctl_init(void);
+extern void zfsctl_fini(void);
+extern boolean_t zfsctl_is_node(struct inode *ip);
+extern boolean_t zfsctl_is_snapdir(struct inode *ip);
+extern int zfsctl_fid(struct inode *ip, fid_t *fidp);
+
+/* zfsctl '.zfs' functions */
+extern int zfsctl_root_lookup(struct inode *dip, const char *name,
+    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
+    pathname_t *realpnp);
+
+/* zfsctl '.zfs/snapshot' functions */
+extern int zfsctl_snapdir_lookup(struct inode *dip, const char *name,
+    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
+    pathname_t *realpnp);
+extern int zfsctl_snapdir_rename(struct inode *sdip, const char *sname,
+    struct inode *tdip, const char *tname, cred_t *cr, int flags);
+extern int zfsctl_snapdir_remove(struct inode *dip, const char *name,
+    cred_t *cr, int flags);
+extern int zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname,
+    vattr_t *vap, struct inode **ipp, cred_t *cr, int flags);
+extern int zfsctl_snapshot_mount(struct path *path, int flags);
+extern int zfsctl_snapshot_unmount(const char *snapname, int flags);
+extern int zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid,
+    int delay);
+extern int zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid,
+    int gen, struct inode **ipp);
+
+/* zfsctl '.zfs/shares' functions */
+extern int zfsctl_shares_lookup(struct inode *dip, char *name,
+    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
+    pathname_t *realpnp);
+
+/*
+ * These inodes numbers are reserved for the .zfs control directory.
+ * It is important that they be no larger that 48-bits because only
+ * 6 bytes are reserved in the NFS file handle for the object number.
+ * However, they should be as large as possible to avoid conflicts
+ * with the objects which are assigned monotonically by the dmu.
+ */
+#define	ZFSCTL_INO_ROOT		0x0000FFFFFFFFFFFFULL
+#define	ZFSCTL_INO_SHARES	0x0000FFFFFFFFFFFEULL
+#define	ZFSCTL_INO_SNAPDIR	0x0000FFFFFFFFFFFDULL
+#define	ZFSCTL_INO_SNAPDIRS	0x0000FFFFFFFFFFFCULL
+
+#define	ZFSCTL_EXPIRE_SNAPSHOT	300
+
+#endif	/* _ZFS_CTLDIR_H */

diff --git a/zfs/include/os/linux/zfs/sys/zfs_dir.h b/zfs/include/os/linux/zfs/sys/zfs_dir.h
new file mode 100644
index 0000000..0f15e43
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_dir.h

@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_FS_ZFS_DIR_H
+#define	_SYS_FS_ZFS_DIR_H
+
+#include <sys/pathname.h>
+#include <sys/dmu.h>
+#include <sys/zfs_znode.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* zfs_dirent_lock() flags */
+#define	ZNEW		0x0001		/* entry should not exist */
+#define	ZEXISTS		0x0002		/* entry should exist */
+#define	ZSHARED		0x0004		/* shared access (zfs_dirlook()) */
+#define	ZXATTR		0x0008		/* we want the xattr dir */
+#define	ZRENAMING	0x0010		/* znode is being renamed */
+#define	ZCILOOK		0x0020		/* case-insensitive lookup requested */
+#define	ZCIEXACT	0x0040		/* c-i requires c-s match (rename) */
+#define	ZHAVELOCK	0x0080		/* z_name_lock is already held */
+
+/* mknode flags */
+#define	IS_ROOT_NODE	0x01		/* create a root node */
+#define	IS_XATTR	0x02		/* create an extended attribute node */
+#define	IS_TMPFILE	0x04		/* create a tmpfile */
+
+extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
+    int, int *, pathname_t *);
+extern void zfs_dirent_unlock(zfs_dirlock_t *);
+extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
+extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
+    boolean_t *);
+extern int zfs_dirlook(znode_t *, char *, znode_t **, int, int *,
+    pathname_t *);
+extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
+    uint_t, znode_t **, zfs_acl_ids_t *);
+extern void zfs_rmnode(znode_t *);
+extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
+extern boolean_t zfs_dirempty(znode_t *);
+extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
+extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
+extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
+extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
+extern int zfs_get_xattrdir(znode_t *, znode_t **, cred_t *, int);
+extern int zfs_make_xattrdir(znode_t *, vattr_t *, znode_t **, cred_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_DIR_H */

diff --git a/zfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/zfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
new file mode 100644
index 0000000..7b4a1aa
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_vfsops_os.h

@@ -0,0 +1,260 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ */
+
+#ifndef	_SYS_FS_ZFS_VFSOPS_H
+#define	_SYS_FS_ZFS_VFSOPS_H
+
+#include <sys/dataset_kstats.h>
+#include <sys/isa_defs.h>
+#include <sys/types32.h>
+#include <sys/list.h>
+#include <sys/vfs.h>
+#include <sys/zil.h>
+#include <sys/sa.h>
+#include <sys/rrwlock.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/objlist.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct zfsvfs zfsvfs_t;
+struct znode;
+
+/*
+ * This structure emulates the vfs_t from other platforms.  It's purpose
+ * is to facilitate the handling of mount options and minimize structural
+ * differences between the platforms.
+ */
+typedef struct vfs {
+	struct zfsvfs	*vfs_data;
+	char		*vfs_mntpoint;	/* Primary mount point */
+	uint64_t	vfs_xattr;
+	boolean_t	vfs_readonly;
+	boolean_t	vfs_do_readonly;
+	boolean_t	vfs_setuid;
+	boolean_t	vfs_do_setuid;
+	boolean_t	vfs_exec;
+	boolean_t	vfs_do_exec;
+	boolean_t	vfs_devices;
+	boolean_t	vfs_do_devices;
+	boolean_t	vfs_do_xattr;
+	boolean_t	vfs_atime;
+	boolean_t	vfs_do_atime;
+	boolean_t	vfs_relatime;
+	boolean_t	vfs_do_relatime;
+	boolean_t	vfs_nbmand;
+	boolean_t	vfs_do_nbmand;
+} vfs_t;
+
+typedef struct zfs_mnt {
+	const char	*mnt_osname;	/* Objset name */
+	char		*mnt_data;	/* Raw mount options */
+} zfs_mnt_t;
+
+struct zfsvfs {
+	vfs_t		*z_vfs;		/* generic fs struct */
+	struct super_block *z_sb;	/* generic super_block */
+	struct zfsvfs	*z_parent;	/* parent fs */
+	objset_t	*z_os;		/* objset reference */
+	uint64_t	z_flags;	/* super_block flags */
+	uint64_t	z_root;		/* id of root znode */
+	uint64_t	z_unlinkedobj;	/* id of unlinked zapobj */
+	uint64_t	z_max_blksz;	/* maximum block size for files */
+	uint64_t	z_fuid_obj;	/* fuid table object number */
+	uint64_t	z_fuid_size;	/* fuid table size */
+	avl_tree_t	z_fuid_idx;	/* fuid tree keyed by index */
+	avl_tree_t	z_fuid_domain;	/* fuid tree keyed by domain */
+	krwlock_t	z_fuid_lock;	/* fuid lock */
+	boolean_t	z_fuid_loaded;	/* fuid tables are loaded */
+	boolean_t	z_fuid_dirty;   /* need to sync fuid table ? */
+	struct zfs_fuid_info	*z_fuid_replay; /* fuid info for replay */
+	zilog_t		*z_log;		/* intent log pointer */
+	uint_t		z_acl_mode;	/* acl chmod/mode behavior */
+	uint_t		z_acl_inherit;	/* acl inheritance behavior */
+	uint_t		z_acl_type;	/* type of ACL usable on this FS */
+	zfs_case_t	z_case;		/* case-sense */
+	boolean_t	z_utf8;		/* utf8-only */
+	int		z_norm;		/* normalization flags */
+	boolean_t	z_relatime;	/* enable relatime mount option */
+	boolean_t	z_unmounted;	/* unmounted */
+	rrmlock_t	z_teardown_lock;
+	krwlock_t	z_teardown_inactive_lock;
+	list_t		z_all_znodes;	/* all znodes in the fs */
+	uint64_t	z_nr_znodes;	/* number of znodes in the fs */
+	unsigned long	z_rollback_time; /* last online rollback time */
+	unsigned long	z_snap_defer_time; /* last snapshot unmount deferral */
+	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
+	arc_prune_t	*z_arc_prune;	/* called by ARC to prune caches */
+	struct inode	*z_ctldir;	/* .zfs directory inode */
+	boolean_t	z_show_ctldir;	/* expose .zfs in the root dir */
+	boolean_t	z_issnap;	/* true if this is a snapshot */
+	boolean_t	z_vscan;	/* virus scan on/off */
+	boolean_t	z_use_fuids;	/* version allows fuids */
+	boolean_t	z_replay;	/* set during ZIL replay */
+	boolean_t	z_use_sa;	/* version allow system attributes */
+	boolean_t	z_xattr_sa;	/* allow xattrs to be stores as SA */
+	boolean_t	z_draining;	/* is true when drain is active */
+	boolean_t	z_drain_cancel; /* signal the unlinked drain to stop */
+	uint64_t	z_version;	/* ZPL version */
+	uint64_t	z_shares_dir;	/* hidden shares dir */
+	dataset_kstats_t	z_kstat;	/* fs kstats */
+	kmutex_t	z_lock;
+	uint64_t	z_userquota_obj;
+	uint64_t	z_groupquota_obj;
+	uint64_t	z_userobjquota_obj;
+	uint64_t	z_groupobjquota_obj;
+	uint64_t	z_projectquota_obj;
+	uint64_t	z_projectobjquota_obj;
+	uint64_t	z_replay_eof;	/* New end of file - replay only */
+	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
+	uint64_t	z_hold_size;	/* znode hold array size */
+	avl_tree_t	*z_hold_trees;	/* znode hold trees */
+	kmutex_t	*z_hold_locks;	/* znode hold locks */
+	taskqid_t	z_drain_task;	/* task id for the unlink drain task */
+};
+
+#define	ZFS_TEARDOWN_INIT(zfsvfs)		\
+	rrm_init(&(zfsvfs)->z_teardown_lock, B_FALSE)
+
+#define	ZFS_TEARDOWN_DESTROY(zfsvfs)		\
+	rrm_destroy(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_TRY_ENTER_READ(zfsvfs)	\
+	rw_tryenter(&(zfsvfs)->z_teardown_lock, RW_READER)
+
+#define	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag)	\
+	rrm_enter_read(&(zfsvfs)->z_teardown_lock, tag);
+
+#define	ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag)	\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, tag)
+
+#define	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, tag)	\
+	rrm_enter(&(zfsvfs)->z_teardown_lock, RW_WRITER, tag)
+
+#define	ZFS_TEARDOWN_EXIT_WRITE(zfsvfs)		\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, tag)
+
+#define	ZFS_TEARDOWN_EXIT(zfsvfs, tag)		\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, tag)
+
+#define	ZFS_TEARDOWN_READ_HELD(zfsvfs)		\
+	RRM_READ_HELD(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_WRITE_HELD(zfsvfs)		\
+	RRM_WRITE_HELD(&(zfsvfs)->z_teardown_lock)
+
+#define	ZFS_TEARDOWN_HELD(zfsvfs)		\
+	RRM_LOCK_HELD(&(zfsvfs)->z_teardown_lock)
+
+#define	ZSB_XATTR	0x0001		/* Enable user xattrs */
+
+/*
+ * Allow a maximum number of links.  While ZFS does not internally limit
+ * this the inode->i_nlink member is defined as an unsigned int.  To be
+ * safe we use 2^31-1 as the limit.
+ */
+#define	ZFS_LINK_MAX		((1U << 31) - 1U)
+
+/*
+ * Normal filesystems (those not under .zfs/snapshot) have a total
+ * file ID size limited to 12 bytes (including the length field) due to
+ * NFSv2 protocol's limitation of 32 bytes for a filehandle.  For historical
+ * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
+ * (although the NFSv3 protocol actually permits a maximum of 64 bytes).  It
+ * is not possible to expand beyond 12 bytes without abandoning support
+ * of NFSv2.
+ *
+ * For normal filesystems, we partition up the available space as follows:
+ *	2 bytes		fid length (required)
+ *	6 bytes		object number (48 bits)
+ *	4 bytes		generation number (32 bits)
+ *
+ * We reserve only 48 bits for the object number, as this is the limit
+ * currently defined and imposed by the DMU.
+ */
+typedef struct zfid_short {
+	uint16_t	zf_len;
+	uint8_t		zf_object[6];		/* obj[i] = obj >> (8 * i) */
+	uint8_t		zf_gen[4];		/* gen[i] = gen >> (8 * i) */
+} zfid_short_t;
+
+/*
+ * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
+ * (including the length field).  This makes files under .zfs/snapshot
+ * accessible by NFSv3 and NFSv4, but not NFSv2.
+ *
+ * For files under .zfs/snapshot, we partition up the available space
+ * as follows:
+ *	2 bytes		fid length (required)
+ *	6 bytes		object number (48 bits)
+ *	4 bytes		generation number (32 bits)
+ *	6 bytes		objset id (48 bits)
+ *	4 bytes		currently just zero (32 bits)
+ *
+ * We reserve only 48 bits for the object number and objset id, as these are
+ * the limits currently defined and imposed by the DMU.
+ */
+typedef struct zfid_long {
+	zfid_short_t	z_fid;
+	uint8_t		zf_setid[6];		/* obj[i] = obj >> (8 * i) */
+	uint8_t		zf_setgen[4];		/* gen[i] = gen >> (8 * i) */
+} zfid_long_t;
+
+#define	SHORT_FID_LEN	(sizeof (zfid_short_t) - sizeof (uint16_t))
+#define	LONG_FID_LEN	(sizeof (zfid_long_t) - sizeof (uint16_t))
+
+extern void zfs_init(void);
+extern void zfs_fini(void);
+
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+extern int zfs_end_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+extern void zfs_exit_fs(zfsvfs_t *zfsvfs);
+extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
+extern int zfsvfs_create(const char *name, boolean_t readony, zfsvfs_t **zfvp);
+extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
+extern void zfsvfs_free(zfsvfs_t *zfsvfs);
+extern int zfs_check_global_label(const char *dsname, const char *hexsl);
+
+extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
+extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent);
+extern void zfs_preumount(struct super_block *sb);
+extern int zfs_umount(struct super_block *sb);
+extern int zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm);
+extern int zfs_statvfs(struct inode *ip, struct kstatfs *statp);
+extern int zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp);
+extern int zfs_prune(struct super_block *sb, unsigned long nr_to_scan,
+    int *objects);
+extern int zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop,
+    uint64_t *val, char *setpoint);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_VFSOPS_H */

diff --git a/zfs/include/os/linux/zfs/sys/zfs_vnops_os.h b/zfs/include/os/linux/zfs/sys/zfs_vnops_os.h
new file mode 100644
index 0000000..331f2e2
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_vnops_os.h

@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_SYS_FS_ZFS_VNOPS_OS_H
+#define	_SYS_FS_ZFS_VNOPS_OS_H
+
+#include <sys/vnode.h>
+#include <sys/xvattr.h>
+#include <sys/uio.h>
+#include <sys/cred.h>
+#include <sys/fcntl.h>
+#include <sys/pathname.h>
+#include <sys/zpl.h>
+#include <sys/zfs_file.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+extern int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr);
+extern int zfs_close(struct inode *ip, int flag, cred_t *cr);
+extern int zfs_write_simple(znode_t *zp, const void *data, size_t len,
+    loff_t pos, size_t *resid);
+extern int zfs_lookup(znode_t *dzp, char *nm, znode_t **zpp, int flags,
+    cred_t *cr, int *direntflags, pathname_t *realpnp);
+extern int zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
+    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp);
+extern int zfs_tmpfile(struct inode *dip, vattr_t *vapzfs, int excl,
+    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
+extern int zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags);
+extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
+    znode_t **zpp, cred_t *cr, int flags, vsecattr_t *vsecp);
+extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
+    cred_t *cr, int flags);
+extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
+extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
+extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
+extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
+    char *tnm, cred_t *cr, int flags);
+extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap,
+    char *link, znode_t **zpp, cred_t *cr, int flags);
+extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr);
+extern int zfs_link(znode_t *tdzp, znode_t *szp,
+    char *name, cred_t *cr, int flags);
+extern void zfs_inactive(struct inode *ip);
+extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
+    offset_t offset, cred_t *cr);
+extern int zfs_fid(struct inode *ip, fid_t *fidp);
+extern int zfs_getpage(struct inode *ip, struct page *pp);
+extern int zfs_putpage(struct inode *ip, struct page *pp,
+    struct writeback_control *wbc, boolean_t for_sync);
+extern int zfs_dirty_inode(struct inode *ip, int flags);
+extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
+    size_t len, unsigned long vm_flags);
+extern void zfs_zrele_async(znode_t *zp);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_VNOPS_H */

diff --git a/zfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/zfs/include/os/linux/zfs/sys/zfs_znode_impl.h
new file mode 100644
index 0000000..9b9ac7a
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zfs_znode_impl.h

@@ -0,0 +1,197 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef	_SYS_ZFS_ZNODE_IMPL_H
+#define	_SYS_ZFS_ZNODE_IMPL_H
+
+#ifndef _KERNEL
+#error "no user serviceable parts within"
+#endif
+
+#include <sys/isa_defs.h>
+#include <sys/types32.h>
+#include <sys/list.h>
+#include <sys/dmu.h>
+#include <sys/sa.h>
+#include <sys/time.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/rrwlock.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_stat.h>
+#include <sys/zfs_rlock.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+#define	ZNODE_OS_FIELDS			\
+	inode_timespec_t z_btime; /* creation/birth time (cached) */ \
+	struct inode	z_inode;
+#else
+#define	ZNODE_OS_FIELDS			\
+	inode_timespec_t z_btime; /* creation/birth time (cached) */ \
+	struct inode	z_inode;                                     \
+	boolean_t	z_is_mapped;    /* we are mmap'ed */
+#endif
+
+/*
+ * Convert between znode pointers and inode pointers
+ */
+#define	ZTOI(znode)	(&((znode)->z_inode))
+#define	ITOZ(inode)	(container_of((inode), znode_t, z_inode))
+#define	ZTOZSB(znode)	((zfsvfs_t *)(ZTOI(znode)->i_sb->s_fs_info))
+#define	ITOZSB(inode)	((zfsvfs_t *)((inode)->i_sb->s_fs_info))
+
+#define	ZTOTYPE(zp)	(ZTOI(zp)->i_mode)
+#define	ZTOGID(zp) (ZTOI(zp)->i_gid)
+#define	ZTOUID(zp) (ZTOI(zp)->i_uid)
+#define	ZTONLNK(zp) (ZTOI(zp)->i_nlink)
+
+#define	Z_ISBLK(type) S_ISBLK(type)
+#define	Z_ISCHR(type) S_ISCHR(type)
+#define	Z_ISLNK(type) S_ISLNK(type)
+#define	Z_ISDEV(type)	(S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
+#define	Z_ISDIR(type)	S_ISDIR(type)
+
+#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+#define	zn_has_cached_data(zp, start, end) \
+	filemap_range_has_page(ZTOI(zp)->i_mapping, start, end)
+#else
+#define	zn_has_cached_data(zp, start, end) \
+	((zp)->z_is_mapped)
+#endif
+
+#define	zn_flush_cached_data(zp, sync)	write_inode_now(ZTOI(zp), sync)
+#define	zn_rlimit_fsize(zp, uio)	(0)
+
+/*
+ * zhold() wraps igrab() on Linux, and igrab() may fail when the
+ * inode is in the process of being deleted.  As zhold() must only be
+ * called when a ref already exists - so the inode cannot be
+ * mid-deletion - we VERIFY() this.
+ */
+#define	zhold(zp)	VERIFY3P(igrab(ZTOI((zp))), !=, NULL)
+#define	zrele(zp)	iput(ZTOI((zp)))
+
+/* Called on entry to each ZFS inode and vfs operation. */
+#define	ZFS_ENTER_ERROR(zfsvfs, error)				\
+do {								\
+	ZFS_TEARDOWN_ENTER_READ(zfsvfs, FTAG);			\
+	if (unlikely((zfsvfs)->z_unmounted)) {			\
+		ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG);		\
+		return (error);					\
+	}							\
+} while (0)
+#define	ZFS_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
+#define	ZPL_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, -EIO)
+
+/* Must be called before exiting the operation. */
+#define	ZFS_EXIT(zfsvfs)					\
+do {								\
+	zfs_exit_fs(zfsvfs);					\
+	ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG);			\
+} while (0)
+
+#define	ZPL_EXIT(zfsvfs)					\
+do {								\
+	rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);		\
+} while (0)
+
+/* Verifies the znode is valid. */
+#define	ZFS_VERIFY_ZP_ERROR(zp, error)				\
+do {								\
+	if (unlikely((zp)->z_sa_hdl == NULL)) {			\
+		ZFS_EXIT(ZTOZSB(zp));				\
+		return (error);					\
+	}							\
+} while (0)
+#define	ZFS_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
+#define	ZPL_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, -EIO)
+
+/*
+ * Macros for dealing with dmu_buf_hold
+ */
+#define	ZFS_OBJ_MTX_SZ		64
+#define	ZFS_OBJ_MTX_MAX		(1024 * 1024)
+#define	ZFS_OBJ_HASH(zfsvfs, obj)	((obj) & ((zfsvfs->z_hold_size) - 1))
+
+extern unsigned int zfs_object_mutex_size;
+
+/*
+ * Encode ZFS stored time values from a struct timespec / struct timespec64.
+ */
+#define	ZFS_TIME_ENCODE(tp, stmp)		\
+do {						\
+	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
+	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
+} while (0)
+
+#if defined(HAVE_INODE_TIMESPEC64_TIMES)
+/*
+ * Decode ZFS stored time values to a struct timespec64
+ * 4.18 and newer kernels.
+ */
+#define	ZFS_TIME_DECODE(tp, stmp)		\
+do {						\
+	(tp)->tv_sec = (time64_t)(stmp)[0];	\
+	(tp)->tv_nsec = (long)(stmp)[1];	\
+} while (0)
+#else
+/*
+ * Decode ZFS stored time values to a struct timespec
+ * 4.17 and older kernels.
+ */
+#define	ZFS_TIME_DECODE(tp, stmp)		\
+do {						\
+	(tp)->tv_sec = (time_t)(stmp)[0];	\
+	(tp)->tv_nsec = (long)(stmp)[1];	\
+} while (0)
+#endif /* HAVE_INODE_TIMESPEC64_TIMES */
+
+#define	ZFS_ACCESSTIME_STAMP(zfsvfs, zp)
+
+struct znode;
+
+extern int	zfs_sync(struct super_block *, int, cred_t *);
+extern int	zfs_inode_alloc(struct super_block *, struct inode **ip);
+extern void	zfs_inode_destroy(struct inode *);
+extern void	zfs_mark_inode_dirty(struct inode *);
+extern boolean_t zfs_relatime_need_update(const struct inode *);
+
+#if defined(HAVE_UIO_RW)
+extern caddr_t zfs_map_page(page_t *, enum seg_rw);
+extern void zfs_unmap_page(page_t *, caddr_t);
+#endif /* HAVE_UIO_RW */
+
+extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
+extern int zfsfstype;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ZFS_ZNODE_IMPL_H */

diff --git a/zfs/include/os/linux/zfs/sys/zpl.h b/zfs/include/os/linux/zfs/sys/zpl.h
new file mode 100644
index 0000000..4e08470
--- /dev/null
+++ b/zfs/include/os/linux/zfs/sys/zpl.h

@@ -0,0 +1,210 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef	_SYS_ZPL_H
+#define	_SYS_ZPL_H
+
+#include <sys/mntent.h>
+#include <sys/vfs.h>
+#include <linux/aio.h>
+#include <linux/dcache_compat.h>
+#include <linux/exportfs.h>
+#include <linux/falloc.h>
+#include <linux/parser.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/vfs_compat.h>
+#include <linux/writeback.h>
+#include <linux/xattr_compat.h>
+
+/* zpl_inode.c */
+extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
+    umode_t mode, cred_t *cr);
+
+extern const struct inode_operations zpl_inode_operations;
+extern const struct inode_operations zpl_dir_inode_operations;
+extern const struct inode_operations zpl_symlink_inode_operations;
+extern const struct inode_operations zpl_special_inode_operations;
+
+/* zpl_file.c */
+extern const struct address_space_operations zpl_address_space_operations;
+extern const struct file_operations zpl_file_operations;
+extern const struct file_operations zpl_dir_file_operations;
+
+/* zpl_super.c */
+extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
+
+extern const struct super_operations zpl_super_operations;
+extern const struct export_operations zpl_export_operations;
+extern struct file_system_type zpl_fs_type;
+
+/* zpl_xattr.c */
+extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size);
+extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
+    const struct qstr *qstr);
+#if defined(CONFIG_FS_POSIX_ACL)
+#if defined(HAVE_SET_ACL)
+#if defined(HAVE_SET_ACL_IDMAP_DENTRY)
+extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+    struct posix_acl *acl, int type);
+#elif defined(HAVE_SET_ACL_USERNS)
+extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip,
+    struct posix_acl *acl, int type);
+#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
+extern int zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
+    struct posix_acl *acl, int type);
+#else
+extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type);
+#endif /* HAVE_SET_ACL_USERNS */
+#endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
+extern struct posix_acl *zpl_get_acl(struct inode *ip, int type, bool rcu);
+#elif defined(HAVE_GET_ACL)
+extern struct posix_acl *zpl_get_acl(struct inode *ip, int type);
+#endif
+extern int zpl_init_acl(struct inode *ip, struct inode *dir);
+extern int zpl_chmod_acl(struct inode *ip);
+#else
+static inline int
+zpl_init_acl(struct inode *ip, struct inode *dir)
+{
+	return (0);
+}
+
+static inline int
+zpl_chmod_acl(struct inode *ip)
+{
+	return (0);
+}
+#endif /* CONFIG_FS_POSIX_ACL */
+
+extern xattr_handler_t *zpl_xattr_handlers[];
+
+/* zpl_ctldir.c */
+extern const struct file_operations zpl_fops_root;
+extern const struct inode_operations zpl_ops_root;
+
+extern const struct file_operations zpl_fops_snapdir;
+extern const struct inode_operations zpl_ops_snapdir;
+extern const struct dentry_operations zpl_dops_snapdirs;
+
+extern const struct file_operations zpl_fops_shares;
+extern const struct inode_operations zpl_ops_shares;
+
+#if defined(HAVE_VFS_ITERATE) || defined(HAVE_VFS_ITERATE_SHARED)
+
+#define	ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
+	.actor = _actor,				\
+	.pos = _pos,					\
+}
+
+typedef struct dir_context zpl_dir_context_t;
+
+#define	zpl_dir_emit		dir_emit
+#define	zpl_dir_emit_dot	dir_emit_dot
+#define	zpl_dir_emit_dotdot	dir_emit_dotdot
+#define	zpl_dir_emit_dots	dir_emit_dots
+
+#else
+
+typedef struct zpl_dir_context {
+	void *dirent;
+	const filldir_t actor;
+	loff_t pos;
+} zpl_dir_context_t;
+
+#define	ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
+	.dirent = _dirent,				\
+	.actor = _actor,				\
+	.pos = _pos,					\
+}
+
+static inline bool
+zpl_dir_emit(zpl_dir_context_t *ctx, const char *name, int namelen,
+    uint64_t ino, unsigned type)
+{
+	return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type));
+}
+
+static inline bool
+zpl_dir_emit_dot(struct file *file, zpl_dir_context_t *ctx)
+{
+	return (ctx->actor(ctx->dirent, ".", 1, ctx->pos,
+	    file_inode(file)->i_ino, DT_DIR) == 0);
+}
+
+static inline bool
+zpl_dir_emit_dotdot(struct file *file, zpl_dir_context_t *ctx)
+{
+	return (ctx->actor(ctx->dirent, "..", 2, ctx->pos,
+	    parent_ino(file_dentry(file)), DT_DIR) == 0);
+}
+
+static inline bool
+zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
+{
+	if (ctx->pos == 0) {
+		if (!zpl_dir_emit_dot(file, ctx))
+			return (false);
+		ctx->pos = 1;
+	}
+	if (ctx->pos == 1) {
+		if (!zpl_dir_emit_dotdot(file, ctx))
+			return (false);
+		ctx->pos = 2;
+	}
+	return (true);
+}
+#endif /* HAVE_VFS_ITERATE */
+
+#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
+#define	zpl_inode_timestamp_truncate(ts, ip)	timestamp_truncate(ts, ip)
+#elif defined(HAVE_INODE_TIMESPEC64_TIMES)
+#define	zpl_inode_timestamp_truncate(ts, ip)	\
+	timespec64_trunc(ts, (ip)->i_sb->s_time_gran)
+#else
+#define	zpl_inode_timestamp_truncate(ts, ip)	\
+	timespec_trunc(ts, (ip)->i_sb->s_time_gran)
+#endif
+
+#if defined(HAVE_INODE_OWNER_OR_CAPABLE)
+#define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ip)
+#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_USERNS)
+#define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ns, ip)
+#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP)
+#define	zpl_inode_owner_or_capable(idmap, ip) inode_owner_or_capable(idmap, ip)
+#else
+#error "Unsupported kernel"
+#endif
+
+#if defined(HAVE_SETATTR_PREPARE_USERNS) || defined(HAVE_SETATTR_PREPARE_IDMAP)
+#define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(ns, dentry, ia)
+#else
+/*
+ * Use kernel-provided version, or our own from
+ * linux/vfs_compat.h
+ */
+#define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(dentry, ia)
+#endif
+
+#endif	/* _SYS_ZPL_H */

diff --git a/zfs/include/spl/rpc/Makefile.am b/zfs/include/spl/rpc/Makefile.am
deleted file mode 100644
index 5110cc0..0000000
--- a/zfs/include/spl/rpc/Makefile.am
+++ /dev/null

@@ -1,7 +0,0 @@
-KERNEL_H = \
-	$(top_srcdir)/include/spl/rpc/xdr.h
-
-if CONFIG_KERNEL
-kerneldir = @prefix@/src/zfs-$(VERSION)/include/spl/rpc
-kernel_HEADERS = $(KERNEL_H)
-endif

diff --git a/zfs/include/spl/rpc/xdr.h b/zfs/include/spl/rpc/xdr.h
deleted file mode 100644
index 0b39b46..0000000
--- a/zfs/include/spl/rpc/xdr.h
+++ /dev/null

@@ -1,156 +0,0 @@
-/*
- *  Copyright (c) 2008 Sun Microsystems, Inc.
- *  Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_RPC_XDR_H
-#define	_SPL_RPC_XDR_H
-
-#include <sys/types.h>
-
-typedef int bool_t;
-
-/*
- * XDR enums and types.
- */
-enum xdr_op {
-	XDR_ENCODE,
-	XDR_DECODE
-};
-
-struct xdr_ops;
-
-typedef struct {
-	struct xdr_ops	*x_ops;	/* Let caller know xdrmem_create() succeeds */
-	caddr_t		x_addr;	/* Current buffer addr */
-	caddr_t		x_addr_end;	/* End of the buffer */
-	enum xdr_op	x_op;	/* Stream direction */
-} XDR;
-
-typedef bool_t (*xdrproc_t)(XDR *xdrs, void *ptr);
-
-struct xdr_ops {
-	bool_t (*xdr_control)(XDR *, int, void *);
-
-	bool_t (*xdr_char)(XDR *, char *);
-	bool_t (*xdr_u_short)(XDR *, unsigned short *);
-	bool_t (*xdr_u_int)(XDR *, unsigned *);
-	bool_t (*xdr_u_longlong_t)(XDR *, u_longlong_t *);
-
-	bool_t (*xdr_opaque)(XDR *, caddr_t, const uint_t);
-	bool_t (*xdr_string)(XDR *, char **, const uint_t);
-	bool_t (*xdr_array)(XDR *, caddr_t *, uint_t *, const uint_t,
-	    const uint_t, const xdrproc_t);
-};
-
-/*
- * XDR control operator.
- */
-#define	XDR_GET_BYTES_AVAIL 1
-
-struct xdr_bytesrec {
-	bool_t xc_is_last_record;
-	size_t xc_num_avail;
-};
-
-/*
- * XDR functions.
- */
-void xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
-    const enum xdr_op op);
-
-/* Currently not needed. If needed later, we'll add it to struct xdr_ops */
-#define	xdr_destroy(xdrs) ((void) 0)
-
-#define	xdr_control(xdrs, req, info) \
-	(xdrs)->x_ops->xdr_control((xdrs), (req), (info))
-
-/*
- * For precaution, the following are defined as static inlines instead of macros
- * to get some amount of type safety.
- *
- * Also, macros wouldn't work in the case where typecasting is done, because it
- * must be possible to reference the functions' addresses by these names.
- */
-static inline bool_t xdr_char(XDR *xdrs, char *cp)
-{
-	return (xdrs->x_ops->xdr_char(xdrs, cp));
-}
-
-static inline bool_t xdr_u_short(XDR *xdrs, unsigned short *usp)
-{
-	return (xdrs->x_ops->xdr_u_short(xdrs, usp));
-}
-
-static inline bool_t xdr_short(XDR *xdrs, short *sp)
-{
-	BUILD_BUG_ON(sizeof (short) != 2);
-	return (xdrs->x_ops->xdr_u_short(xdrs, (unsigned short *) sp));
-}
-
-static inline bool_t xdr_u_int(XDR *xdrs, unsigned *up)
-{
-	return (xdrs->x_ops->xdr_u_int(xdrs, up));
-}
-
-static inline bool_t xdr_int(XDR *xdrs, int *ip)
-{
-	BUILD_BUG_ON(sizeof (int) != 4);
-	return (xdrs->x_ops->xdr_u_int(xdrs, (unsigned *)ip));
-}
-
-static inline bool_t xdr_u_longlong_t(XDR *xdrs, u_longlong_t *ullp)
-{
-	return (xdrs->x_ops->xdr_u_longlong_t(xdrs, ullp));
-}
-
-static inline bool_t xdr_longlong_t(XDR *xdrs, longlong_t *llp)
-{
-	BUILD_BUG_ON(sizeof (longlong_t) != 8);
-	return (xdrs->x_ops->xdr_u_longlong_t(xdrs, (u_longlong_t *)llp));
-}
-
-/*
- * Fixed-length opaque data.
- */
-static inline bool_t xdr_opaque(XDR *xdrs, caddr_t cp, const uint_t cnt)
-{
-	return (xdrs->x_ops->xdr_opaque(xdrs, cp, cnt));
-}
-
-/*
- * Variable-length string.
- * The *sp buffer must have (maxsize + 1) bytes.
- */
-static inline bool_t xdr_string(XDR *xdrs, char **sp, const uint_t maxsize)
-{
-	return (xdrs->x_ops->xdr_string(xdrs, sp, maxsize));
-}
-
-/*
- * Variable-length arrays.
- */
-static inline bool_t xdr_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep,
-    const uint_t maxsize, const uint_t elsize, const xdrproc_t elproc)
-{
-	return xdrs->x_ops->xdr_array(xdrs, arrp, sizep, maxsize, elsize,
-	    elproc);
-}
-
-#endif /* SPL_RPC_XDR_H */

diff --git a/zfs/include/spl/sys/Makefile.am b/zfs/include/spl/sys/Makefile.am
deleted file mode 100644
index 3b5b275..0000000
--- a/zfs/include/spl/sys/Makefile.am
+++ /dev/null

@@ -1,61 +0,0 @@
-KERNEL_H = \
-	$(top_srcdir)/include/spl/sys/acl.h \
-	$(top_srcdir)/include/spl/sys/atomic.h \
-	$(top_srcdir)/include/spl/sys/byteorder.h \
-	$(top_srcdir)/include/spl/sys/callb.h \
-	$(top_srcdir)/include/spl/sys/callo.h \
-	$(top_srcdir)/include/spl/sys/cmn_err.h \
-	$(top_srcdir)/include/spl/sys/condvar.h \
-	$(top_srcdir)/include/spl/sys/console.h \
-	$(top_srcdir)/include/spl/sys/cred.h \
-	$(top_srcdir)/include/spl/sys/ctype.h \
-	$(top_srcdir)/include/spl/sys/debug.h \
-	$(top_srcdir)/include/spl/sys/disp.h \
-	$(top_srcdir)/include/spl/sys/dkio.h \
-	$(top_srcdir)/include/spl/sys/errno.h \
-	$(top_srcdir)/include/spl/sys/fcntl.h \
-	$(top_srcdir)/include/spl/sys/file.h \
-	$(top_srcdir)/include/spl/sys/inttypes.h \
-	$(top_srcdir)/include/spl/sys/isa_defs.h \
-	$(top_srcdir)/include/spl/sys/kmem_cache.h \
-	$(top_srcdir)/include/spl/sys/kmem.h \
-	$(top_srcdir)/include/spl/sys/kobj.h \
-	$(top_srcdir)/include/spl/sys/kstat.h \
-	$(top_srcdir)/include/spl/sys/list.h \
-	$(top_srcdir)/include/spl/sys/mode.h \
-	$(top_srcdir)/include/spl/sys/mutex.h \
-	$(top_srcdir)/include/spl/sys/param.h \
-	$(top_srcdir)/include/spl/sys/processor.h \
-	$(top_srcdir)/include/spl/sys/proc.h \
-	$(top_srcdir)/include/spl/sys/procfs_list.h \
-	$(top_srcdir)/include/spl/sys/random.h \
-	$(top_srcdir)/include/spl/sys/rwlock.h \
-	$(top_srcdir)/include/spl/sys/shrinker.h \
-	$(top_srcdir)/include/spl/sys/sid.h \
-	$(top_srcdir)/include/spl/sys/signal.h \
-	$(top_srcdir)/include/spl/sys/stat.h \
-	$(top_srcdir)/include/spl/sys/strings.h \
-	$(top_srcdir)/include/spl/sys/sunddi.h \
-	$(top_srcdir)/include/spl/sys/sysmacros.h \
-	$(top_srcdir)/include/spl/sys/systeminfo.h \
-	$(top_srcdir)/include/spl/sys/taskq.h \
-	$(top_srcdir)/include/spl/sys/thread.h \
-	$(top_srcdir)/include/spl/sys/time.h \
-	$(top_srcdir)/include/spl/sys/timer.h \
-	$(top_srcdir)/include/spl/sys/tsd.h \
-	$(top_srcdir)/include/spl/sys/types32.h \
-	$(top_srcdir)/include/spl/sys/types.h \
-	$(top_srcdir)/include/spl/sys/uio.h \
-	$(top_srcdir)/include/spl/sys/user.h \
-	$(top_srcdir)/include/spl/sys/vfs.h \
-	$(top_srcdir)/include/spl/sys/vmem.h \
-	$(top_srcdir)/include/spl/sys/vmsystm.h \
-	$(top_srcdir)/include/spl/sys/vnode.h \
-	$(top_srcdir)/include/spl/sys/wait.h \
-	$(top_srcdir)/include/spl/sys/zmod.h \
-	$(top_srcdir)/include/spl/sys/zone.h
-
-if CONFIG_KERNEL
-kerneldir = @prefix@/src/zfs-$(VERSION)/include/spl/sys
-kernel_HEADERS = $(KERNEL_H)
-endif

diff --git a/zfs/include/spl/sys/acl.h b/zfs/include/spl/sys/acl.h
deleted file mode 100644
index 9fc79c0..0000000
--- a/zfs/include/spl/sys/acl.h
+++ /dev/null

@@ -1,119 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_ACL_H
-#define	_SPL_ACL_H
-
-#include <sys/types.h>
-
-typedef struct ace {
-	uid_t a_who;
-	uint32_t a_access_mask;
-	uint16_t a_flags;
-	uint16_t a_type;
-} ace_t;
-
-typedef struct ace_object {
-	uid_t		a_who;		/* uid or gid */
-	uint32_t	a_access_mask;	/* read,write,... */
-	uint16_t	a_flags;	/* see below */
-	uint16_t	a_type;		/* allow or deny */
-	uint8_t		a_obj_type[16];	/* obj type */
-	uint8_t		a_inherit_obj_type[16];	/* inherit obj */
-} ace_object_t;
-
-#define	MAX_ACL_ENTRIES					1024
-
-#define	ACE_READ_DATA					0x00000001
-#define	ACE_LIST_DIRECTORY				0x00000001
-#define	ACE_WRITE_DATA					0x00000002
-#define	ACE_ADD_FILE					0x00000002
-#define	ACE_APPEND_DATA					0x00000004
-#define	ACE_ADD_SUBDIRECTORY				0x00000004
-#define	ACE_READ_NAMED_ATTRS				0x00000008
-#define	ACE_WRITE_NAMED_ATTRS				0x00000010
-#define	ACE_EXECUTE					0x00000020
-#define	ACE_DELETE_CHILD				0x00000040
-#define	ACE_READ_ATTRIBUTES				0x00000080
-#define	ACE_WRITE_ATTRIBUTES				0x00000100
-#define	ACE_DELETE					0x00010000
-#define	ACE_READ_ACL					0x00020000
-#define	ACE_WRITE_ACL					0x00040000
-#define	ACE_WRITE_OWNER					0x00080000
-#define	ACE_SYNCHRONIZE					0x00100000
-
-#define	ACE_FILE_INHERIT_ACE				0x0001
-#define	ACE_DIRECTORY_INHERIT_ACE			0x0002
-#define	ACE_NO_PROPAGATE_INHERIT_ACE			0x0004
-#define	ACE_INHERIT_ONLY_ACE				0x0008
-#define	ACE_SUCCESSFUL_ACCESS_ACE_FLAG			0x0010
-#define	ACE_FAILED_ACCESS_ACE_FLAG			0x0020
-#define	ACE_IDENTIFIER_GROUP				0x0040
-#define	ACE_INHERITED_ACE				0x0080
-#define	ACE_OWNER					0x1000
-#define	ACE_GROUP					0x2000
-#define	ACE_EVERYONE					0x4000
-
-#define	ACE_ACCESS_ALLOWED_ACE_TYPE			0x0000
-#define	ACE_ACCESS_DENIED_ACE_TYPE			0x0001
-#define	ACE_SYSTEM_AUDIT_ACE_TYPE			0x0002
-#define	ACE_SYSTEM_ALARM_ACE_TYPE			0x0003
-
-#define	ACL_AUTO_INHERIT				0x0001
-#define	ACL_PROTECTED					0x0002
-#define	ACL_DEFAULTED					0x0004
-#define	ACL_FLAGS_ALL	(ACL_AUTO_INHERIT|ACL_PROTECTED|ACL_DEFAULTED)
-
-#define	ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE		0x04
-#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE		0x05
-#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE		0x06
-#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE		0x07
-#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE		0x08
-#define	ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE		0x09
-#define	ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE		0x0A
-#define	ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE	0x0B
-#define	ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE	0x0C
-#define	ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE		0x0D
-#define	ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE		0x0E
-#define	ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE	0x0F
-#define	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE	0x10
-
-#define	ACE_ALL_TYPES	0x001F
-
-#define	ACE_TYPE_FLAGS	(ACE_OWNER|ACE_GROUP|ACE_EVERYONE|ACE_IDENTIFIER_GROUP)
-
-/* BEGIN CSTYLED */
-#define	ACE_ALL_PERMS	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
-     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
-     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
-     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
-     ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
-/* END CSTYLED */
-
-#define	VSA_ACE						0x0010
-#define	VSA_ACECNT					0x0020
-#define	VSA_ACE_ALLTYPES				0x0040
-#define	VSA_ACE_ACLFLAGS				0x0080
-
-#endif /* _SPL_ACL_H */

diff --git a/zfs/include/spl/sys/atomic.h b/zfs/include/spl/sys/atomic.h
deleted file mode 100644
index 51b5479..0000000
--- a/zfs/include/spl/sys/atomic.h
+++ /dev/null

@@ -1,79 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_ATOMIC_H
-#define	_SPL_ATOMIC_H
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <sys/types.h>
-
-/*
- * Map the atomic_* functions to the Linux counterparts.  This relies on the
- * fact that the atomic types are internally really a uint32 or uint64.  If
- * this were to change an alternate approach would be needed.
- *
- * N.B. Due to the limitations of the original API atomicity is not strictly
- * preserved when using the 64-bit functions on a 32-bit system.  In order
- * to support this all consumers would need to be updated to use the Linux
- * provided atomic_t and atomic64_t types.
- */
-#define	atomic_inc_32(v)	atomic_inc((atomic_t *)(v))
-#define	atomic_dec_32(v)	atomic_dec((atomic_t *)(v))
-#define	atomic_add_32(v, i)	atomic_add((i), (atomic_t *)(v))
-#define	atomic_sub_32(v, i)	atomic_sub((i), (atomic_t *)(v))
-#define	atomic_inc_32_nv(v)	atomic_inc_return((atomic_t *)(v))
-#define	atomic_dec_32_nv(v)	atomic_dec_return((atomic_t *)(v))
-#define	atomic_add_32_nv(v, i)	atomic_add_return((i), (atomic_t *)(v))
-#define	atomic_sub_32_nv(v, i)	atomic_sub_return((i), (atomic_t *)(v))
-#define	atomic_cas_32(v, x, y)	atomic_cmpxchg((atomic_t *)(v), x, y)
-#define	atomic_swap_32(v, x)	atomic_xchg((atomic_t *)(v), x)
-#define	atomic_inc_64(v)	atomic64_inc((atomic64_t *)(v))
-#define	atomic_dec_64(v)	atomic64_dec((atomic64_t *)(v))
-#define	atomic_add_64(v, i)	atomic64_add((i), (atomic64_t *)(v))
-#define	atomic_sub_64(v, i)	atomic64_sub((i), (atomic64_t *)(v))
-#define	atomic_inc_64_nv(v)	atomic64_inc_return((atomic64_t *)(v))
-#define	atomic_dec_64_nv(v)	atomic64_dec_return((atomic64_t *)(v))
-#define	atomic_add_64_nv(v, i)	atomic64_add_return((i), (atomic64_t *)(v))
-#define	atomic_sub_64_nv(v, i)	atomic64_sub_return((i), (atomic64_t *)(v))
-#define	atomic_cas_64(v, x, y)	atomic64_cmpxchg((atomic64_t *)(v), x, y)
-#define	atomic_swap_64(v, x)	atomic64_xchg((atomic64_t *)(v), x)
-
-#ifdef _LP64
-static __inline__ void *
-atomic_cas_ptr(volatile void *target,  void *cmp, void *newval)
-{
-	return ((void *)atomic_cas_64((volatile uint64_t *)target,
-	    (uint64_t)cmp, (uint64_t)newval));
-}
-#else /* _LP64 */
-static __inline__ void *
-atomic_cas_ptr(volatile void *target,  void *cmp, void *newval)
-{
-	return ((void *)atomic_cas_32((volatile uint32_t *)target,
-	    (uint32_t)cmp, (uint32_t)newval));
-}
-#endif /* _LP64 */
-
-#endif  /* _SPL_ATOMIC_H */

diff --git a/zfs/include/spl/sys/byteorder.h b/zfs/include/spl/sys/byteorder.h
deleted file mode 100644
index 4777079..0000000
--- a/zfs/include/spl/sys/byteorder.h
+++ /dev/null

@@ -1,78 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_BYTEORDER_H
-#define	_SPL_BYTEORDER_H
-
-#include <asm/byteorder.h>
-#include <sys/isa_defs.h>
-
-#define	BSWAP_8(x)	((x) & 0xff)
-#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
-#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
-#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
-
-#define	LE_16(x)	cpu_to_le16(x)
-#define	LE_32(x)	cpu_to_le32(x)
-#define	LE_64(x)	cpu_to_le64(x)
-#define	BE_16(x)	cpu_to_be16(x)
-#define	BE_32(x)	cpu_to_be32(x)
-#define	BE_64(x)	cpu_to_be64(x)
-
-#define	BE_IN8(xa) \
-	*((uint8_t *)(xa))
-
-#define	BE_IN16(xa) \
-	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
-
-#define	BE_IN32(xa) \
-	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
-
-#ifdef _BIG_ENDIAN
-static __inline__ uint64_t
-htonll(uint64_t n)
-{
-	return (n);
-}
-
-static __inline__ uint64_t
-ntohll(uint64_t n)
-{
-	return (n);
-}
-#else
-static __inline__ uint64_t
-htonll(uint64_t n)
-{
-	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
-}
-
-static __inline__ uint64_t
-ntohll(uint64_t n)
-{
-	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
-}
-#endif
-
-#endif /* SPL_BYTEORDER_H */

diff --git a/zfs/include/spl/sys/callb.h b/zfs/include/spl/sys/callb.h
deleted file mode 100644
index f1826bf..0000000
--- a/zfs/include/spl/sys/callb.h
+++ /dev/null

@@ -1,54 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_CALLB_H
-#define	_SPL_CALLB_H
-
-#include <linux/module.h>
-#include <sys/mutex.h>
-
-#define	CALLB_CPR_ASSERT(cp)		ASSERT(MUTEX_HELD((cp)->cc_lockp));
-
-typedef struct callb_cpr {
-	kmutex_t	*cc_lockp;
-} callb_cpr_t;
-
-#define	CALLB_CPR_INIT(cp, lockp, func, name)   {               \
-	(cp)->cc_lockp = lockp;                                 \
-}
-
-#define	CALLB_CPR_SAFE_BEGIN(cp) {                              \
-	CALLB_CPR_ASSERT(cp);					\
-}
-
-#define	CALLB_CPR_SAFE_END(cp, lockp) {                         \
-	CALLB_CPR_ASSERT(cp);					\
-}
-
-#define	CALLB_CPR_EXIT(cp) {                                    \
-	ASSERT(MUTEX_HELD((cp)->cc_lockp));                     \
-	mutex_exit((cp)->cc_lockp);                             \
-}
-
-#endif  /* _SPL_CALLB_H */

diff --git a/zfs/include/spl/sys/callo.h b/zfs/include/spl/sys/callo.h
deleted file mode 100644
index c43ac92..0000000
--- a/zfs/include/spl/sys/callo.h
+++ /dev/null

@@ -1,52 +0,0 @@
-/*
- *  Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_CALLO_H
-#define	_SPL_CALLO_H
-
-/*
- * Callout flags:
- *
- * CALLOUT_FLAG_ROUNDUP
- *      Roundup the expiration time to the next resolution boundary.
- *      If this flag is not specified, the expiration time is rounded down.
- * CALLOUT_FLAG_ABSOLUTE
- *      Normally, the expiration passed to the timeout API functions is an
- *      expiration interval. If this flag is specified, then it is
- *      interpreted as the expiration time itself.
- * CALLOUT_FLAG_HRESTIME
- *      Normally, callouts are not affected by changes to system time
- *      (hrestime). This flag is used to create a callout that is affected
- *      by system time. If system time changes, these timers must be
- *      handled in a special way (see callout.c). These are used by condition
- *      variables and LWP timers that need this behavior.
- * CALLOUT_FLAG_32BIT
- *      Legacy interfaces timeout() and realtime_timeout() pass this flag
- *      to timeout_generic() to indicate that a 32-bit ID should be allocated.
- */
-#define	CALLOUT_FLAG_ROUNDUP		0x1
-#define	CALLOUT_FLAG_ABSOLUTE		0x2
-#define	CALLOUT_FLAG_HRESTIME		0x4
-#define	CALLOUT_FLAG_32BIT		0x8
-
-#endif  /* _SPL_CALLB_H */

diff --git a/zfs/include/spl/sys/cmn_err.h b/zfs/include/spl/sys/cmn_err.h
deleted file mode 100644
index be57358..0000000
--- a/zfs/include/spl/sys/cmn_err.h
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_CMN_ERR_H
-#define	_SPL_CMN_ERR_H
-
-#include <stdarg.h>
-
-#define	CE_CONT		0 /* continuation */
-#define	CE_NOTE		1 /* notice */
-#define	CE_WARN		2 /* warning */
-#define	CE_PANIC	3 /* panic */
-#define	CE_IGNORE	4 /* print nothing */
-
-extern void cmn_err(int, const char *, ...);
-extern void vcmn_err(int, const char *, va_list);
-extern void vpanic(const char *, va_list);
-
-#define	fm_panic	panic
-
-#endif /* SPL_CMN_ERR_H */

diff --git a/zfs/include/spl/sys/condvar.h b/zfs/include/spl/sys/condvar.h
deleted file mode 100644
index f1438c4..0000000
--- a/zfs/include/spl/sys/condvar.h
+++ /dev/null

@@ -1,83 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_CONDVAR_H
-#define	_SPL_CONDVAR_H
-
-#include <linux/module.h>
-#include <sys/kmem.h>
-#include <sys/mutex.h>
-#include <sys/callo.h>
-#include <sys/wait.h>
-#include <sys/time.h>
-
-/*
- * The kcondvar_t struct is protected by mutex taken externally before
- * calling any of the wait/signal funs, and passed into the wait funs.
- */
-#define	CV_MAGIC			0x346545f4
-#define	CV_DESTROY			0x346545f5
-
-typedef struct {
-	int cv_magic;
-	spl_wait_queue_head_t cv_event;
-	spl_wait_queue_head_t cv_destroy;
-	atomic_t cv_refs;
-	atomic_t cv_waiters;
-	kmutex_t *cv_mutex;
-} kcondvar_t;
-
-typedef enum { CV_DEFAULT = 0, CV_DRIVER } kcv_type_t;
-
-extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *);
-extern void __cv_destroy(kcondvar_t *);
-extern void __cv_wait(kcondvar_t *, kmutex_t *);
-extern void __cv_wait_io(kcondvar_t *, kmutex_t *);
-extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *);
-extern int __cv_wait_sig(kcondvar_t *, kmutex_t *);
-extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
-extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t);
-extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
-extern clock_t cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
-    hrtime_t res, int flag);
-extern clock_t cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
-    hrtime_t res, int flag);
-extern void __cv_signal(kcondvar_t *);
-extern void __cv_broadcast(kcondvar_t *c);
-
-#define	cv_init(cvp, name, type, arg)		__cv_init(cvp, name, type, arg)
-#define	cv_destroy(cvp)				__cv_destroy(cvp)
-#define	cv_wait(cvp, mp)			__cv_wait(cvp, mp)
-#define	cv_wait_io(cvp, mp)			__cv_wait_io(cvp, mp)
-#define	cv_wait_io_sig(cvp, mp)			__cv_wait_io_sig(cvp, mp)
-#define	cv_wait_sig(cvp, mp)			__cv_wait_sig(cvp, mp)
-#define	cv_wait_interruptible(cvp, mp)		cv_wait_sig(cvp, mp)
-#define	cv_timedwait(cvp, mp, t)		__cv_timedwait(cvp, mp, t)
-#define	cv_timedwait_io(cvp, mp, t)		__cv_timedwait_io(cvp, mp, t)
-#define	cv_timedwait_sig(cvp, mp, t)		__cv_timedwait_sig(cvp, mp, t)
-#define	cv_timedwait_interruptible(cvp, mp, t)	cv_timedwait_sig(cvp, mp, t)
-#define	cv_signal(cvp)				__cv_signal(cvp)
-#define	cv_broadcast(cvp)			__cv_broadcast(cvp)
-
-#endif /* _SPL_CONDVAR_H */

diff --git a/zfs/include/spl/sys/console.h b/zfs/include/spl/sys/console.h
deleted file mode 100644
index 3469cb7..0000000
--- a/zfs/include/spl/sys/console.h
+++ /dev/null

@@ -1,44 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef	_SPL_CONSOLE_H
-#define	_SPL_CONSOLE_H
-
-void
-console_vprintf(const char *fmt, va_list args)
-{
-	vprintk(fmt, args);
-}
-
-void
-console_printf(const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	console_vprintf(fmt, args);
-	va_end(args);
-}
-
-#endif /* _SPL_CONSOLE_H */

diff --git a/zfs/include/spl/sys/cred.h b/zfs/include/spl/sys/cred.h
deleted file mode 100644
index fd06339..0000000
--- a/zfs/include/spl/sys/cred.h
+++ /dev/null

@@ -1,75 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_CRED_H
-#define	_SPL_CRED_H
-
-#include <linux/module.h>
-#include <linux/cred.h>
-#include <sys/types.h>
-#include <sys/vfs.h>
-
-typedef struct cred cred_t;
-
-#define	kcred		((cred_t *)(init_task.cred))
-#define	CRED()		((cred_t *)current_cred())
-
-/* Linux 4.9 API change, GROUP_AT was removed */
-#ifndef GROUP_AT
-#define	GROUP_AT(gi, i)	((gi)->gid[i])
-#endif
-
-#ifdef HAVE_KUIDGID_T
-
-#define	KUID_TO_SUID(x)		(__kuid_val(x))
-#define	KGID_TO_SGID(x)		(__kgid_val(x))
-#define	SUID_TO_KUID(x)		(KUIDT_INIT(x))
-#define	SGID_TO_KGID(x)		(KGIDT_INIT(x))
-#define	KGIDP_TO_SGIDP(x)	(&(x)->val)
-
-#else /* HAVE_KUIDGID_T */
-
-#define	KUID_TO_SUID(x)		(x)
-#define	KGID_TO_SGID(x)		(x)
-#define	SUID_TO_KUID(x)		(x)
-#define	SGID_TO_KGID(x)		(x)
-#define	KGIDP_TO_SGIDP(x)	(x)
-
-#endif /* HAVE_KUIDGID_T */
-
-extern void crhold(cred_t *cr);
-extern void crfree(cred_t *cr);
-extern uid_t crgetuid(const cred_t *cr);
-extern uid_t crgetruid(const cred_t *cr);
-extern uid_t crgetsuid(const cred_t *cr);
-extern uid_t crgetfsuid(const cred_t *cr);
-extern gid_t crgetgid(const cred_t *cr);
-extern gid_t crgetrgid(const cred_t *cr);
-extern gid_t crgetsgid(const cred_t *cr);
-extern gid_t crgetfsgid(const cred_t *cr);
-extern int crgetngroups(const cred_t *cr);
-extern gid_t *crgetgroups(const cred_t *cr);
-extern int groupmember(gid_t gid, const cred_t *cr);
-
-#endif  /* _SPL_CRED_H */

diff --git a/zfs/include/spl/sys/ctype.h b/zfs/include/spl/sys/ctype.h
deleted file mode 100644
index 18beb1d..0000000
--- a/zfs/include/spl/sys/ctype.h
+++ /dev/null

@@ -1,30 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_CTYPE_H
-#define	_SPL_CTYPE_H
-
-#include <linux/ctype.h>
-
-#endif /* SPL_CTYPE_H */

diff --git a/zfs/include/spl/sys/debug.h b/zfs/include/spl/sys/debug.h
deleted file mode 100644
index ecda6bc..0000000
--- a/zfs/include/spl/sys/debug.h
+++ /dev/null

@@ -1,166 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * Available Solaris debug functions.  All of the ASSERT() macros will be
- * compiled out when NDEBUG is defined, this is the default behavior for
- * the SPL.  To enable assertions use the --enable-debug with configure.
- * The VERIFY() functions are never compiled out and cannot be disabled.
- *
- * PANIC()	- Panic the node and print message.
- * ASSERT()	- Assert X is true, if not panic.
- * ASSERTV()	- Wraps a variable declaration which is only used by ASSERT().
- * ASSERT3B()	- Assert boolean X OP Y is true, if not panic.
- * ASSERT3S()	- Assert signed X OP Y is true, if not panic.
- * ASSERT3U()	- Assert unsigned X OP Y is true, if not panic.
- * ASSERT3P()	- Assert pointer X OP Y is true, if not panic.
- * ASSERT0()	- Assert value is zero, if not panic.
- * VERIFY()	- Verify X is true, if not panic.
- * VERIFY3B()	- Verify boolean X OP Y is true, if not panic.
- * VERIFY3S()	- Verify signed X OP Y is true, if not panic.
- * VERIFY3U()	- Verify unsigned X OP Y is true, if not panic.
- * VERIFY3P()	- Verify pointer X OP Y is true, if not panic.
- * VERIFY0()	- Verify value is zero, if not panic.
- */
-
-#ifndef _SPL_DEBUG_H
-#define	_SPL_DEBUG_H
-
-/*
- * Common DEBUG functionality.
- */
-int spl_panic(const char *file, const char *func, int line,
-    const char *fmt, ...);
-void spl_dumpstack(void);
-
-/* BEGIN CSTYLED */
-#define	PANIC(fmt, a...)						\
-	spl_panic(__FILE__, __FUNCTION__, __LINE__, fmt, ## a)
-
-#define	VERIFY(cond)							\
-	(void) (unlikely(!(cond)) &&					\
-	    spl_panic(__FILE__, __FUNCTION__, __LINE__,			\
-	    "%s", "VERIFY(" #cond ") failed\n"))
-
-#define	VERIFY3B(LEFT, OP, RIGHT)	do {				\
-		boolean_t _verify3_left = (boolean_t)(LEFT);		\
-		boolean_t _verify3_right = (boolean_t)(RIGHT);		\
-		if (!(_verify3_left OP _verify3_right))			\
-		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
-		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
-		    "failed (%d " #OP " %d)\n",				\
-		    (boolean_t) (_verify3_left),			\
-		    (boolean_t) (_verify3_right));			\
-	} while (0)
-
-#define	VERIFY3S(LEFT, OP, RIGHT)	do {				\
-		int64_t _verify3_left = (int64_t)(LEFT);		\
-		int64_t _verify3_right = (int64_t)(RIGHT);		\
-		if (!(_verify3_left OP _verify3_right))			\
-		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
-		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
-		    "failed (%lld " #OP " %lld)\n",			\
-		    (long long) (_verify3_left),			\
-		    (long long) (_verify3_right));			\
-	} while (0)
-
-#define	VERIFY3U(LEFT, OP, RIGHT)	do {				\
-		uint64_t _verify3_left = (uint64_t)(LEFT);		\
-		uint64_t _verify3_right = (uint64_t)(RIGHT);		\
-		if (!(_verify3_left OP _verify3_right))			\
-		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
-		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
-		    "failed (%llu " #OP " %llu)\n",			\
-		    (unsigned long long) (_verify3_left),		\
-		    (unsigned long long) (_verify3_right));		\
-	} while (0)
-
-#define	VERIFY3P(LEFT, OP, RIGHT)	do {				\
-		uintptr_t _verify3_left = (uintptr_t)(LEFT);		\
-		uintptr_t _verify3_right = (uintptr_t)(RIGHT);		\
-		if (!(_verify3_left OP _verify3_right))			\
-		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
-		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
-		    "failed (%px " #OP " %px)\n",			\
-		    (void *) (_verify3_left),				\
-		    (void *) (_verify3_right));				\
-	} while (0)
-
-#define	VERIFY0(RIGHT)	do {				\
-		int64_t _verify3_left = (int64_t)(0);		\
-		int64_t _verify3_right = (int64_t)(RIGHT);		\
-		if (!(_verify3_left == _verify3_right))			\
-		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
-		    "VERIFY3(0 == " #RIGHT ") "				\
-		    "failed (0 == %lld)\n",				\
-		    (long long) (_verify3_right));			\
-	} while (0)
-
-#define	CTASSERT_GLOBAL(x)		_CTASSERT(x, __LINE__)
-#define	CTASSERT(x)			{ _CTASSERT(x, __LINE__); }
-#define	_CTASSERT(x, y)			__CTASSERT(x, y)
-#define	__CTASSERT(x, y)						\
-	typedef char __attribute__ ((unused))				\
-	__compile_time_assertion__ ## y[(x) ? 1 : -1]
-
-/*
- * Debugging disabled (--disable-debug)
- */
-#ifdef NDEBUG
-
-#define	ASSERT(x)		((void)0)
-#define	ASSERTV(x)
-#define	ASSERT3B(x,y,z)		((void)0)
-#define	ASSERT3S(x,y,z)		((void)0)
-#define	ASSERT3U(x,y,z)		((void)0)
-#define	ASSERT3P(x,y,z)		((void)0)
-#define	ASSERT0(x)		((void)0)
-#define	IMPLY(A, B)		((void)0)
-#define	EQUIV(A, B)		((void)0)
-
-/*
- * Debugging enabled (--enable-debug)
- */
-#else
-
-#define	ASSERT3B	VERIFY3B
-#define	ASSERT3S	VERIFY3S
-#define	ASSERT3U	VERIFY3U
-#define	ASSERT3P	VERIFY3P
-#define	ASSERT0		VERIFY0
-#define	ASSERT		VERIFY
-#define	ASSERTV(x)		x
-#define	IMPLY(A, B) \
-	((void)(((!(A)) || (B)) || \
-	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
-	    "(" #A ") implies (" #B ")")))
-#define	EQUIV(A, B) \
-	((void)((!!(A) == !!(B)) || \
-	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
-	    "(" #A ") is equivalent to (" #B ")")))
-/* END CSTYLED */
-
-#endif /* NDEBUG */
-
-#endif /* SPL_DEBUG_H */

diff --git a/zfs/include/spl/sys/disp.h b/zfs/include/spl/sys/disp.h
deleted file mode 100644
index 413b623..0000000
--- a/zfs/include/spl/sys/disp.h
+++ /dev/null

@@ -1,34 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_DISP_H
-#define	_SPL_DISP_H
-
-#include <linux/preempt.h>
-
-#define	kpreempt(unused)	schedule()
-#define	kpreempt_disable()	preempt_disable()
-#define	kpreempt_enable()	preempt_enable()
-
-#endif /* SPL_DISP_H */

diff --git a/zfs/include/spl/sys/dkio.h b/zfs/include/spl/sys/dkio.h
deleted file mode 100644
index 49f166a..0000000
--- a/zfs/include/spl/sys/dkio.h
+++ /dev/null

@@ -1,40 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_DKIO_H
-#define	_SPL_DKIO_H
-
-#define	DFL_SZ(num_exts) \
-	(sizeof (dkioc_free_list_t) + (num_exts - 1) * 16)
-
-#define	DKIOC		(0x04 << 8)
-#define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
-
-/*
- * ioctl to free space (e.g. SCSI UNMAP) off a disk.
- * Pass a dkioc_free_list_t containing a list of extents to be freed.
- */
-#define	DKIOCFREE	(DKIOC|50)
-
-#endif /* _SPL_DKIO_H */

diff --git a/zfs/include/spl/sys/errno.h b/zfs/include/spl/sys/errno.h
deleted file mode 100644
index 6015b1a..0000000
--- a/zfs/include/spl/sys/errno.h
+++ /dev/null

@@ -1,47 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2000 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved	*/
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _SYS_ERRNO_H
-#define	_SYS_ERRNO_H
-
-#include <linux/errno.h>
-
-#define	ENOTSUP		EOPNOTSUPP
-
-#endif	/* _SYS_ERRNO_H */

diff --git a/zfs/include/spl/sys/fcntl.h b/zfs/include/spl/sys/fcntl.h
deleted file mode 100644
index 3faa5da..0000000
--- a/zfs/include/spl/sys/fcntl.h
+++ /dev/null

@@ -1,37 +0,0 @@
-/*
- *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_FCNTL_H
-#define	_SPL_FCNTL_H
-
-#include <asm/fcntl.h>
-
-#define	F_FREESP 11
-
-#ifdef CONFIG_64BIT
-typedef struct flock flock64_t;
-#else
-typedef struct flock64 flock64_t;
-#endif /* CONFIG_64BIT */
-
-#endif /* _SPL_FCNTL_H */

diff --git a/zfs/include/spl/sys/file.h b/zfs/include/spl/sys/file.h
deleted file mode 100644
index 05dbc08..0000000
--- a/zfs/include/spl/sys/file.h
+++ /dev/null

@@ -1,52 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_FILE_H
-#define	_SPL_FILE_H
-
-#define	FIGNORECASE		0x00080000
-#define	FKIOCTL			0x80000000
-#define	ED_CASE_CONFLICT	0x10
-
-#ifdef HAVE_INODE_LOCK_SHARED
-#define	spl_inode_lock(ip)		inode_lock(ip)
-#define	spl_inode_unlock(ip)		inode_unlock(ip)
-#define	spl_inode_lock_shared(ip)	inode_lock_shared(ip)
-#define	spl_inode_unlock_shared(ip)	inode_unlock_shared(ip)
-#define	spl_inode_trylock(ip)		inode_trylock(ip)
-#define	spl_inode_trylock_shared(ip)	inode_trylock_shared(ip)
-#define	spl_inode_is_locked(ip)		inode_is_locked(ip)
-#define	spl_inode_lock_nested(ip, s)	inode_lock_nested(ip, s)
-#else
-#define	spl_inode_lock(ip)		mutex_lock(&(ip)->i_mutex)
-#define	spl_inode_unlock(ip)		mutex_unlock(&(ip)->i_mutex)
-#define	spl_inode_lock_shared(ip)	mutex_lock(&(ip)->i_mutex)
-#define	spl_inode_unlock_shared(ip)	mutex_unlock(&(ip)->i_mutex)
-#define	spl_inode_trylock(ip)		mutex_trylock(&(ip)->i_mutex)
-#define	spl_inode_trylock_shared(ip)	mutex_trylock(&(ip)->i_mutex)
-#define	spl_inode_is_locked(ip)		mutex_is_locked(&(ip)->i_mutex)
-#define	spl_inode_lock_nested(ip, s)	mutex_lock_nested(&(ip)->i_mutex, s)
-#endif
-
-#endif /* SPL_FILE_H */

diff --git a/zfs/include/spl/sys/inttypes.h b/zfs/include/spl/sys/inttypes.h
deleted file mode 100644
index 92e7620..0000000
--- a/zfs/include/spl/sys/inttypes.h
+++ /dev/null

@@ -1,28 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_INTTYPES_H
-#define	_SPL_INTTYPES_H
-
-#endif /* SPL_INTTYPES_H */

diff --git a/zfs/include/spl/sys/isa_defs.h b/zfs/include/spl/sys/isa_defs.h
deleted file mode 100644
index 1eb4002..0000000
--- a/zfs/include/spl/sys/isa_defs.h
+++ /dev/null

@@ -1,237 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef	_SPL_ISA_DEFS_H
-#define	_SPL_ISA_DEFS_H
-
-/* x86_64 arch specific defines */
-#if defined(__x86_64) || defined(__x86_64__)
-
-#if !defined(__x86_64)
-#define	__x86_64
-#endif
-
-#if !defined(__amd64)
-#define	__amd64
-#endif
-
-#if !defined(__x86)
-#define	__x86
-#endif
-
-#if !defined(_LP64)
-#define	_LP64
-#endif
-
-#define	_ALIGNMENT_REQUIRED	1
-
-
-/* i386 arch specific defines */
-#elif defined(__i386) || defined(__i386__)
-
-#if !defined(__i386)
-#define	__i386
-#endif
-
-#if !defined(__x86)
-#define	__x86
-#endif
-
-#if !defined(_ILP32)
-#define	_ILP32
-#endif
-
-#define	_ALIGNMENT_REQUIRED	0
-
-/* powerpc (ppc64) arch specific defines */
-#elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
-
-#if !defined(__powerpc)
-#define	__powerpc
-#endif
-
-#if !defined(__powerpc__)
-#define	__powerpc__
-#endif
-
-#if defined(__powerpc64__)
-#if !defined(_LP64)
-#define	_LP64
-#endif
-#else
-#if !defined(_ILP32)
-#define	_ILP32
-#endif
-#endif
-
-/*
- * Illumos doesn't define _ALIGNMENT_REQUIRED for PPC, so default to 1
- * out of paranoia.
- */
-#define	_ALIGNMENT_REQUIRED	1
-
-/* arm arch specific defines */
-#elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
-
-#if !defined(__arm)
-#define	__arm
-#endif
-
-#if !defined(__arm__)
-#define	__arm__
-#endif
-
-#if defined(__aarch64__)
-#if !defined(_LP64)
-#define	_LP64
-#endif
-#else
-#if !defined(_ILP32)
-#define	_ILP32
-#endif
-#endif
-
-#if defined(__ARMEL__) || defined(__AARCH64EL__)
-#define	_LITTLE_ENDIAN
-#else
-#define	_BIG_ENDIAN
-#endif
-
-/*
- * Illumos doesn't define _ALIGNMENT_REQUIRED for ARM, so default to 1
- * out of paranoia.
- */
-#define	_ALIGNMENT_REQUIRED	1
-
-/* sparc arch specific defines */
-#elif defined(__sparc) || defined(__sparc__)
-
-#if !defined(__sparc)
-#define	__sparc
-#endif
-
-#if !defined(__sparc__)
-#define	__sparc__
-#endif
-
-#if defined(__arch64__)
-#if !defined(_LP64)
-#define	_LP64
-#endif
-#else
-#if !defined(_ILP32)
-#define	_ILP32
-#endif
-#endif
-
-#define	_BIG_ENDIAN
-#define	_SUNOS_VTOC_16
-#define	_ALIGNMENT_REQUIRED	1
-
-/* s390 arch specific defines */
-#elif defined(__s390__)
-#if defined(__s390x__)
-#if !defined(_LP64)
-#define	_LP64
-#endif
-#else
-#if !defined(_ILP32)
-#define	_ILP32
-#endif
-#endif
-
-#define	_BIG_ENDIAN
-
-/*
- * Illumos doesn't define _ALIGNMENT_REQUIRED for s390, so default to 1
- * out of paranoia.
- */
-#define	_ALIGNMENT_REQUIRED	1
-
-/* MIPS arch specific defines */
-#elif defined(__mips__)
-
-#if defined(__MIPSEB__)
-#define	_BIG_ENDIAN
-#elif defined(__MIPSEL__)
-#define	_LITTLE_ENDIAN
-#else
-#error MIPS no endian specified
-#endif
-
-#ifndef _LP64
-#define	_ILP32
-#endif
-
-#define	_SUNOS_VTOC_16
-
-/*
- * Illumos doesn't define _ALIGNMENT_REQUIRED for MIPS, so default to 1
- * out of paranoia.
- */
-#define	_ALIGNMENT_REQUIRED	1
-
-#else
-/*
- * Currently supported:
- * x86_64, i386, arm, powerpc, s390, sparc, and mips
- */
-#error "Unsupported ISA type"
-#endif
-
-#if defined(_ILP32) && defined(_LP64)
-#error "Both _ILP32 and _LP64 are defined"
-#endif
-
-#if !defined(_ILP32) && !defined(_LP64)
-#error "Neither _ILP32 or _LP64 are defined"
-#endif
-
-#include <sys/byteorder.h>
-
-/*
- * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS will be defined by the Linux
- * kernel for architectures which support efficient unaligned access.
- */
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-#define	HAVE_EFFICIENT_UNALIGNED_ACCESS
-#endif
-
-#if defined(__LITTLE_ENDIAN) && !defined(_LITTLE_ENDIAN)
-#define	_LITTLE_ENDIAN __LITTLE_ENDIAN
-#endif
-
-#if defined(__BIG_ENDIAN) && !defined(_BIG_ENDIAN)
-#define	_BIG_ENDIAN __BIG_ENDIAN
-#endif
-
-#if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
-#error "Both _LITTLE_ENDIAN and _BIG_ENDIAN are defined"
-#endif
-
-#if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
-#error "Neither _LITTLE_ENDIAN or _BIG_ENDIAN are defined"
-#endif
-
-#endif	/* _SPL_ISA_DEFS_H */

diff --git a/zfs/include/spl/sys/kmem.h b/zfs/include/spl/sys/kmem.h
deleted file mode 100644
index ca15bfe..0000000
--- a/zfs/include/spl/sys/kmem.h
+++ /dev/null

@@ -1,196 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_KMEM_H
-#define	_SPL_KMEM_H
-
-#include <sys/debug.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-
-extern int kmem_debugging(void);
-extern char *kmem_vasprintf(const char *fmt, va_list ap);
-extern char *kmem_asprintf(const char *fmt, ...);
-extern char *strdup(const char *str);
-extern void strfree(char *str);
-
-/*
- * Memory allocation interfaces
- */
-#define	KM_SLEEP	0x0000	/* can block for memory; success guaranteed */
-#define	KM_NOSLEEP	0x0001	/* cannot block for memory; may fail */
-#define	KM_PUSHPAGE	0x0004	/* can block for memory; may use reserve */
-#define	KM_ZERO		0x1000	/* zero the allocation */
-#define	KM_VMEM		0x2000	/* caller is vmem_* wrapper */
-
-#define	KM_PUBLIC_MASK	(KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
-
-static int spl_fstrans_check(void);
-
-/*
- * Convert a KM_* flags mask to its Linux GFP_* counterpart.  The conversion
- * function is context aware which means that KM_SLEEP allocations can be
- * safely used in syncing contexts which have set PF_FSTRANS.
- */
-static inline gfp_t
-kmem_flags_convert(int flags)
-{
-	gfp_t lflags = __GFP_NOWARN | __GFP_COMP;
-
-	if (flags & KM_NOSLEEP) {
-		lflags |= GFP_ATOMIC | __GFP_NORETRY;
-	} else {
-		lflags |= GFP_KERNEL;
-		if (spl_fstrans_check())
-			lflags &= ~(__GFP_IO|__GFP_FS);
-	}
-
-	if (flags & KM_PUSHPAGE)
-		lflags |= __GFP_HIGH;
-
-	if (flags & KM_ZERO)
-		lflags |= __GFP_ZERO;
-
-	return (lflags);
-}
-
-typedef struct {
-	struct task_struct *fstrans_thread;
-	unsigned int saved_flags;
-} fstrans_cookie_t;
-
-/*
- * Introduced in Linux 3.9, however this cannot be solely relied on before
- * Linux 3.18 as it doesn't turn off __GFP_FS as it should.
- */
-#ifdef PF_MEMALLOC_NOIO
-#define	__SPL_PF_MEMALLOC_NOIO (PF_MEMALLOC_NOIO)
-#else
-#define	__SPL_PF_MEMALLOC_NOIO (0)
-#endif
-
-/*
- * PF_FSTRANS is removed from Linux 4.12
- */
-#ifdef PF_FSTRANS
-#define	__SPL_PF_FSTRANS (PF_FSTRANS)
-#else
-#define	__SPL_PF_FSTRANS (0)
-#endif
-
-#define	SPL_FSTRANS (__SPL_PF_FSTRANS|__SPL_PF_MEMALLOC_NOIO)
-
-static inline fstrans_cookie_t
-spl_fstrans_mark(void)
-{
-	fstrans_cookie_t cookie;
-
-	BUILD_BUG_ON(SPL_FSTRANS == 0);
-
-	cookie.fstrans_thread = current;
-	cookie.saved_flags = current->flags & SPL_FSTRANS;
-	current->flags |= SPL_FSTRANS;
-
-	return (cookie);
-}
-
-static inline void
-spl_fstrans_unmark(fstrans_cookie_t cookie)
-{
-	ASSERT3P(cookie.fstrans_thread, ==, current);
-	ASSERT((current->flags & SPL_FSTRANS) == SPL_FSTRANS);
-
-	current->flags &= ~SPL_FSTRANS;
-	current->flags |= cookie.saved_flags;
-}
-
-static inline int
-spl_fstrans_check(void)
-{
-	return (current->flags & SPL_FSTRANS);
-}
-
-/*
- * specifically used to check PF_FSTRANS flag, cannot be relied on for
- * checking spl_fstrans_mark().
- */
-static inline int
-__spl_pf_fstrans_check(void)
-{
-	return (current->flags & __SPL_PF_FSTRANS);
-}
-
-#ifdef HAVE_ATOMIC64_T
-#define	kmem_alloc_used_add(size)	atomic64_add(size, &kmem_alloc_used)
-#define	kmem_alloc_used_sub(size)	atomic64_sub(size, &kmem_alloc_used)
-#define	kmem_alloc_used_read()		atomic64_read(&kmem_alloc_used)
-#define	kmem_alloc_used_set(size)	atomic64_set(&kmem_alloc_used, size)
-extern atomic64_t kmem_alloc_used;
-extern unsigned long long kmem_alloc_max;
-#else  /* HAVE_ATOMIC64_T */
-#define	kmem_alloc_used_add(size)	atomic_add(size, &kmem_alloc_used)
-#define	kmem_alloc_used_sub(size)	atomic_sub(size, &kmem_alloc_used)
-#define	kmem_alloc_used_read()		atomic_read(&kmem_alloc_used)
-#define	kmem_alloc_used_set(size)	atomic_set(&kmem_alloc_used, size)
-extern atomic_t kmem_alloc_used;
-extern unsigned long long kmem_alloc_max;
-#endif /* HAVE_ATOMIC64_T */
-
-extern unsigned int spl_kmem_alloc_warn;
-extern unsigned int spl_kmem_alloc_max;
-
-#define	kmem_alloc(sz, fl)	spl_kmem_alloc((sz), (fl), __func__, __LINE__)
-#define	kmem_zalloc(sz, fl)	spl_kmem_zalloc((sz), (fl), __func__, __LINE__)
-#define	kmem_free(ptr, sz)	spl_kmem_free((ptr), (sz))
-#define	kmem_cache_reap_active	spl_kmem_cache_reap_active
-
-extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
-extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
-extern void spl_kmem_free(const void *ptr, size_t sz);
-
-/*
- * 5.8 API change, pgprot_t argument removed.
- */
-#ifdef HAVE_VMALLOC_PAGE_KERNEL
-#define	spl_vmalloc(size, flags)	__vmalloc(size, flags, PAGE_KERNEL)
-#else
-#define	spl_vmalloc(size, flags)	__vmalloc(size, flags)
-#endif
-
-/*
- * The following functions are only available for internal use.
- */
-extern void *spl_kmem_alloc_impl(size_t size, int flags, int node);
-extern void *spl_kmem_alloc_debug(size_t size, int flags, int node);
-extern void *spl_kmem_alloc_track(size_t size, int flags,
-    const char *func, int line, int node);
-extern void spl_kmem_free_impl(const void *buf, size_t size);
-extern void spl_kmem_free_debug(const void *buf, size_t size);
-extern void spl_kmem_free_track(const void *buf, size_t size);
-
-extern int spl_kmem_init(void);
-extern void spl_kmem_fini(void);
-extern int spl_kmem_cache_reap_active(void);
-
-#endif	/* _SPL_KMEM_H */

diff --git a/zfs/include/spl/sys/kmem_cache.h b/zfs/include/spl/sys/kmem_cache.h
deleted file mode 100644
index 8381b03..0000000
--- a/zfs/include/spl/sys/kmem_cache.h
+++ /dev/null

@@ -1,238 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_KMEM_CACHE_H
-#define	_SPL_KMEM_CACHE_H
-
-#include <sys/taskq.h>
-
-/*
- * Slab allocation interfaces.  The SPL slab differs from the standard
- * Linux SLAB or SLUB primarily in that each cache may be backed by slabs
- * allocated from the physical or virtual memory address space.  The virtual
- * slabs allow for good behavior when allocation large objects of identical
- * size.  This slab implementation also supports both constructors and
- * destructors which the Linux slab does not.
- */
-typedef enum kmc_bit {
-	KMC_BIT_NOTOUCH		= 0,	/* Don't update ages */
-	KMC_BIT_NODEBUG		= 1,	/* Default behavior */
-	KMC_BIT_NOMAGAZINE	= 2,	/* XXX: Unsupported */
-	KMC_BIT_NOHASH		= 3,	/* XXX: Unsupported */
-	KMC_BIT_QCACHE		= 4,	/* XXX: Unsupported */
-	KMC_BIT_KMEM		= 5,	/* Use kmem cache */
-	KMC_BIT_VMEM		= 6,	/* Use vmem cache */
-	KMC_BIT_SLAB		= 7,	/* Use Linux slab cache */
-	KMC_BIT_OFFSLAB		= 8,	/* Objects not on slab */
-	KMC_BIT_DEADLOCKED	= 14,	/* Deadlock detected */
-	KMC_BIT_GROWING		= 15,	/* Growing in progress */
-	KMC_BIT_REAPING		= 16,	/* Reaping in progress */
-	KMC_BIT_DESTROY		= 17,	/* Destroy in progress */
-	KMC_BIT_TOTAL		= 18,	/* Proc handler helper bit */
-	KMC_BIT_ALLOC		= 19,	/* Proc handler helper bit */
-	KMC_BIT_MAX		= 20,	/* Proc handler helper bit */
-} kmc_bit_t;
-
-/* kmem move callback return values */
-typedef enum kmem_cbrc {
-	KMEM_CBRC_YES		= 0,	/* Object moved */
-	KMEM_CBRC_NO		= 1,	/* Object not moved */
-	KMEM_CBRC_LATER		= 2,	/* Object not moved, try again later */
-	KMEM_CBRC_DONT_NEED	= 3,	/* Neither object is needed */
-	KMEM_CBRC_DONT_KNOW	= 4,	/* Object unknown */
-} kmem_cbrc_t;
-
-#define	KMC_NOTOUCH		(1 << KMC_BIT_NOTOUCH)
-#define	KMC_NODEBUG		(1 << KMC_BIT_NODEBUG)
-#define	KMC_NOMAGAZINE		(1 << KMC_BIT_NOMAGAZINE)
-#define	KMC_NOHASH		(1 << KMC_BIT_NOHASH)
-#define	KMC_QCACHE		(1 << KMC_BIT_QCACHE)
-#define	KMC_KMEM		(1 << KMC_BIT_KMEM)
-#define	KMC_VMEM		(1 << KMC_BIT_VMEM)
-#define	KMC_SLAB		(1 << KMC_BIT_SLAB)
-#define	KMC_OFFSLAB		(1 << KMC_BIT_OFFSLAB)
-#define	KMC_DEADLOCKED		(1 << KMC_BIT_DEADLOCKED)
-#define	KMC_GROWING		(1 << KMC_BIT_GROWING)
-#define	KMC_REAPING		(1 << KMC_BIT_REAPING)
-#define	KMC_DESTROY		(1 << KMC_BIT_DESTROY)
-#define	KMC_TOTAL		(1 << KMC_BIT_TOTAL)
-#define	KMC_ALLOC		(1 << KMC_BIT_ALLOC)
-#define	KMC_MAX			(1 << KMC_BIT_MAX)
-
-#define	KMC_REAP_CHUNK		INT_MAX
-#define	KMC_DEFAULT_SEEKS	1
-
-#define	KMC_EXPIRE_AGE		0x1	/* Due to age */
-#define	KMC_EXPIRE_MEM		0x2	/* Due to low memory */
-
-#define	KMC_RECLAIM_ONCE	0x1	/* Force a single shrinker pass */
-
-extern unsigned int spl_kmem_cache_expire;
-extern struct list_head spl_kmem_cache_list;
-extern struct rw_semaphore spl_kmem_cache_sem;
-
-#define	SKM_MAGIC			0x2e2e2e2e
-#define	SKO_MAGIC			0x20202020
-#define	SKS_MAGIC			0x22222222
-#define	SKC_MAGIC			0x2c2c2c2c
-
-#define	SPL_KMEM_CACHE_DELAY		15	/* Minimum slab release age */
-#define	SPL_KMEM_CACHE_REAP		0	/* Default reap everything */
-#define	SPL_KMEM_CACHE_OBJ_PER_SLAB	8	/* Target objects per slab */
-#define	SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN	1	/* Minimum objects per slab */
-#define	SPL_KMEM_CACHE_ALIGN		8	/* Default object alignment */
-#ifdef _LP64
-#define	SPL_KMEM_CACHE_MAX_SIZE		32	/* Max slab size in MB */
-#else
-#define	SPL_KMEM_CACHE_MAX_SIZE		4	/* Max slab size in MB */
-#endif
-
-#define	SPL_MAX_ORDER			(MAX_ORDER - 3)
-#define	SPL_MAX_ORDER_NR_PAGES		(1 << (SPL_MAX_ORDER - 1))
-
-#ifdef CONFIG_SLUB
-#define	SPL_MAX_KMEM_CACHE_ORDER	PAGE_ALLOC_COSTLY_ORDER
-#define	SPL_MAX_KMEM_ORDER_NR_PAGES	(1 << (SPL_MAX_KMEM_CACHE_ORDER - 1))
-#else
-#define	SPL_MAX_KMEM_ORDER_NR_PAGES	(KMALLOC_MAX_SIZE >> PAGE_SHIFT)
-#endif
-
-#define	POINTER_IS_VALID(p)		0	/* Unimplemented */
-#define	POINTER_INVALIDATE(pp)			/* Unimplemented */
-
-typedef int (*spl_kmem_ctor_t)(void *, void *, int);
-typedef void (*spl_kmem_dtor_t)(void *, void *);
-typedef void (*spl_kmem_reclaim_t)(void *);
-
-typedef struct spl_kmem_magazine {
-	uint32_t		skm_magic;	/* Sanity magic */
-	uint32_t		skm_avail;	/* Available objects */
-	uint32_t		skm_size;	/* Magazine size */
-	uint32_t		skm_refill;	/* Batch refill size */
-	struct spl_kmem_cache	*skm_cache;	/* Owned by cache */
-	unsigned long		skm_age;	/* Last cache access */
-	unsigned int		skm_cpu;	/* Owned by cpu */
-	void			*skm_objs[0];	/* Object pointers */
-} spl_kmem_magazine_t;
-
-typedef struct spl_kmem_obj {
-	uint32_t		sko_magic;	/* Sanity magic */
-	void			*sko_addr;	/* Buffer address */
-	struct spl_kmem_slab	*sko_slab;	/* Owned by slab */
-	struct list_head	sko_list;	/* Free object list linkage */
-} spl_kmem_obj_t;
-
-typedef struct spl_kmem_slab {
-	uint32_t		sks_magic;	/* Sanity magic */
-	uint32_t		sks_objs;	/* Objects per slab */
-	struct spl_kmem_cache	*sks_cache;	/* Owned by cache */
-	struct list_head	sks_list;	/* Slab list linkage */
-	struct list_head	sks_free_list;	/* Free object list */
-	unsigned long		sks_age;	/* Last modify jiffie */
-	uint32_t		sks_ref;	/* Ref count used objects */
-} spl_kmem_slab_t;
-
-typedef struct spl_kmem_alloc {
-	struct spl_kmem_cache	*ska_cache;	/* Owned by cache */
-	int			ska_flags;	/* Allocation flags */
-	taskq_ent_t		ska_tqe;	/* Task queue entry */
-} spl_kmem_alloc_t;
-
-typedef struct spl_kmem_emergency {
-	struct rb_node		ske_node;	/* Emergency tree linkage */
-	unsigned long		ske_obj;	/* Buffer address */
-} spl_kmem_emergency_t;
-
-typedef struct spl_kmem_cache {
-	uint32_t		skc_magic;	/* Sanity magic */
-	uint32_t		skc_name_size;	/* Name length */
-	char			*skc_name;	/* Name string */
-	spl_kmem_magazine_t	**skc_mag;	/* Per-CPU warm cache */
-	uint32_t		skc_mag_size;	/* Magazine size */
-	uint32_t		skc_mag_refill;	/* Magazine refill count */
-	spl_kmem_ctor_t		skc_ctor;	/* Constructor */
-	spl_kmem_dtor_t		skc_dtor;	/* Destructor */
-	spl_kmem_reclaim_t	skc_reclaim;	/* Reclaimator */
-	void			*skc_private;	/* Private data */
-	void			*skc_vmp;	/* Unused */
-	struct kmem_cache	*skc_linux_cache; /* Linux slab cache if used */
-	unsigned long		skc_flags;	/* Flags */
-	uint32_t		skc_obj_size;	/* Object size */
-	uint32_t		skc_obj_align;	/* Object alignment */
-	uint32_t		skc_slab_objs;	/* Objects per slab */
-	uint32_t		skc_slab_size;	/* Slab size */
-	uint32_t		skc_delay;	/* Slab reclaim interval */
-	uint32_t		skc_reap;	/* Slab reclaim count */
-	atomic_t		skc_ref;	/* Ref count callers */
-	taskqid_t		skc_taskqid;	/* Slab reclaim task */
-	struct list_head	skc_list;	/* List of caches linkage */
-	struct list_head	skc_complete_list; /* Completely alloc'ed */
-	struct list_head	skc_partial_list;  /* Partially alloc'ed */
-	struct rb_root		skc_emergency_tree; /* Min sized objects */
-	spinlock_t		skc_lock;	/* Cache lock */
-	spl_wait_queue_head_t	skc_waitq;	/* Allocation waiters */
-	uint64_t		skc_slab_fail;	/* Slab alloc failures */
-	uint64_t		skc_slab_create;  /* Slab creates */
-	uint64_t		skc_slab_destroy; /* Slab destroys */
-	uint64_t		skc_slab_total;	/* Slab total current */
-	uint64_t		skc_slab_alloc;	/* Slab alloc current */
-	uint64_t		skc_slab_max;	/* Slab max historic  */
-	uint64_t		skc_obj_total;	/* Obj total current */
-	uint64_t		skc_obj_alloc;	/* Obj alloc current */
-	uint64_t		skc_obj_max;	/* Obj max historic */
-	uint64_t		skc_obj_deadlock;  /* Obj emergency deadlocks */
-	uint64_t		skc_obj_emergency; /* Obj emergency current */
-	uint64_t		skc_obj_emergency_max; /* Obj emergency max */
-} spl_kmem_cache_t;
-#define	kmem_cache_t		spl_kmem_cache_t
-
-extern spl_kmem_cache_t *spl_kmem_cache_create(char *name, size_t size,
-    size_t align, spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor,
-    spl_kmem_reclaim_t reclaim, void *priv, void *vmp, int flags);
-extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
-    kmem_cbrc_t (*)(void *, void *, size_t, void *));
-extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
-extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
-extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
-extern void spl_kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags);
-extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count);
-extern void spl_kmem_reap(void);
-
-#define	kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) \
-    spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl)
-#define	kmem_cache_set_move(skc, move)	spl_kmem_cache_set_move(skc, move)
-#define	kmem_cache_destroy(skc)		spl_kmem_cache_destroy(skc)
-#define	kmem_cache_alloc(skc, flags)	spl_kmem_cache_alloc(skc, flags)
-#define	kmem_cache_free(skc, obj)	spl_kmem_cache_free(skc, obj)
-#define	kmem_cache_reap_now(skc)	\
-    spl_kmem_cache_reap_now(skc, skc->skc_reap)
-#define	kmem_reap()			spl_kmem_reap()
-
-/*
- * The following functions are only available for internal use.
- */
-extern int spl_kmem_cache_init(void);
-extern void spl_kmem_cache_fini(void);
-
-#endif	/* _SPL_KMEM_CACHE_H */

diff --git a/zfs/include/spl/sys/kobj.h b/zfs/include/spl/sys/kobj.h
deleted file mode 100644
index 558ec39..0000000
--- a/zfs/include/spl/sys/kobj.h
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_KOBJ_H
-#define	_SPL_KOBJ_H
-
-#include <sys/vnode.h>
-
-typedef struct _buf {
-	vnode_t *vp;
-} _buf_t;
-
-typedef struct _buf buf_t;
-
-extern struct _buf *kobj_open_file(const char *name);
-extern void kobj_close_file(struct _buf *file);
-extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
-    unsigned off);
-extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
-
-#endif /* SPL_KOBJ_H */

diff --git a/zfs/include/spl/sys/kstat.h b/zfs/include/spl/sys/kstat.h
deleted file mode 100644
index c93c531..0000000
--- a/zfs/include/spl/sys/kstat.h
+++ /dev/null

@@ -1,223 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_KSTAT_H
-#define	_SPL_KSTAT_H
-
-#include <linux/module.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/kmem.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-
-#define	KSTAT_STRLEN		255
-#define	KSTAT_RAW_MAX		(128*1024)
-
-/*
- * For reference valid classes are:
- * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
- */
-
-#define	KSTAT_TYPE_RAW		0 /* can be anything; ks_ndata >= 1 */
-#define	KSTAT_TYPE_NAMED	1 /* name/value pair; ks_ndata >= 1 */
-#define	KSTAT_TYPE_INTR		2 /* interrupt stats; ks_ndata == 1 */
-#define	KSTAT_TYPE_IO		3 /* I/O stats; ks_ndata == 1 */
-#define	KSTAT_TYPE_TIMER	4 /* event timer; ks_ndata >= 1 */
-#define	KSTAT_NUM_TYPES		5
-
-#define	KSTAT_DATA_CHAR		0
-#define	KSTAT_DATA_INT32	1
-#define	KSTAT_DATA_UINT32	2
-#define	KSTAT_DATA_INT64	3
-#define	KSTAT_DATA_UINT64	4
-#define	KSTAT_DATA_LONG		5
-#define	KSTAT_DATA_ULONG	6
-#define	KSTAT_DATA_STRING	7
-#define	KSTAT_NUM_DATAS		8
-
-#define	KSTAT_INTR_HARD		0
-#define	KSTAT_INTR_SOFT		1
-#define	KSTAT_INTR_WATCHDOG	2
-#define	KSTAT_INTR_SPURIOUS	3
-#define	KSTAT_INTR_MULTSVC	4
-#define	KSTAT_NUM_INTRS		5
-
-#define	KSTAT_FLAG_VIRTUAL	0x01
-#define	KSTAT_FLAG_VAR_SIZE	0x02
-#define	KSTAT_FLAG_WRITABLE	0x04
-#define	KSTAT_FLAG_PERSISTENT	0x08
-#define	KSTAT_FLAG_DORMANT	0x10
-#define	KSTAT_FLAG_INVALID	0x20
-#define	KSTAT_FLAG_LONGSTRINGS	0x40
-#define	KSTAT_FLAG_NO_HEADERS	0x80
-
-#define	KS_MAGIC		0x9d9d9d9d
-
-/* Dynamic updates */
-#define	KSTAT_READ		0
-#define	KSTAT_WRITE		1
-
-struct kstat_s;
-typedef struct kstat_s kstat_t;
-
-typedef int kid_t;				/* unique kstat id */
-typedef int kstat_update_t(struct kstat_s *, int); /* dynamic update cb */
-
-typedef struct kstat_module {
-	char ksm_name[KSTAT_STRLEN+1];		/* module name */
-	struct list_head ksm_module_list;	/* module linkage */
-	struct list_head ksm_kstat_list;	/* list of kstat entries */
-	struct proc_dir_entry *ksm_proc;	/* proc entry */
-} kstat_module_t;
-
-typedef struct kstat_raw_ops {
-	int (*headers)(char *buf, size_t size);
-	int (*data)(char *buf, size_t size, void *data);
-	void *(*addr)(kstat_t *ksp, loff_t index);
-} kstat_raw_ops_t;
-
-typedef struct kstat_proc_entry {
-	char	kpe_name[KSTAT_STRLEN+1];	/* kstat name */
-	char	kpe_module[KSTAT_STRLEN+1];	/* provider module name */
-	kstat_module_t		*kpe_owner;	/* kstat module linkage */
-	struct list_head	kpe_list;	/* kstat linkage */
-	struct proc_dir_entry	*kpe_proc;	/* procfs entry */
-} kstat_proc_entry_t;
-
-struct kstat_s {
-	int		ks_magic;		/* magic value */
-	kid_t		ks_kid;			/* unique kstat ID */
-	hrtime_t	ks_crtime;		/* creation time */
-	hrtime_t	ks_snaptime;		/* last access time */
-	int		ks_instance;		/* provider module instance */
-	char		ks_class[KSTAT_STRLEN+1]; /* kstat class */
-	uchar_t		ks_type;		/* kstat data type */
-	uchar_t		ks_flags;		/* kstat flags */
-	void		*ks_data;		/* kstat type-specific data */
-	uint_t		ks_ndata;		/* # of data records */
-	size_t		ks_data_size;		/* size of kstat data section */
-	kstat_update_t	*ks_update;		/* dynamic updates */
-	void		*ks_private;		/* private data */
-	kmutex_t	ks_private_lock;	/* kstat private data lock */
-	kmutex_t	*ks_lock;		/* kstat data lock */
-	kstat_raw_ops_t	ks_raw_ops;		/* ops table for raw type */
-	char		*ks_raw_buf;		/* buf used for raw ops */
-	size_t		ks_raw_bufsize;		/* size of raw ops buffer */
-	kstat_proc_entry_t	ks_proc;	/* data for procfs entry */
-};
-
-typedef struct kstat_named_s {
-	char	name[KSTAT_STRLEN];	/* name of counter */
-	uchar_t	data_type;		/* data type */
-	union {
-		char c[16];	/* 128-bit int */
-		int32_t	i32;	/* 32-bit signed int */
-		uint32_t ui32;	/* 32-bit unsigned int */
-		int64_t i64;	/* 64-bit signed int */
-		uint64_t ui64;	/* 64-bit unsigned int */
-		long l;		/* native signed long */
-		ulong_t ul;	/* native unsigned long */
-		struct {
-			union {
-				char *ptr;	/* NULL-term string */
-				char __pad[8];	/* 64-bit padding */
-			} addr;
-			uint32_t len;		/* # bytes for strlen + '\0' */
-		} string;
-	} value;
-} kstat_named_t;
-
-#define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
-#define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
-
-#ifdef HAVE_PROC_OPS_STRUCT
-typedef struct proc_ops kstat_proc_op_t;
-#else
-typedef struct file_operations kstat_proc_op_t;
-#endif
-
-typedef struct kstat_intr {
-	uint_t intrs[KSTAT_NUM_INTRS];
-} kstat_intr_t;
-
-typedef struct kstat_io {
-	u_longlong_t	nread;		/* number of bytes read */
-	u_longlong_t	nwritten;	/* number of bytes written */
-	uint_t		reads;		/* number of read operations */
-	uint_t		writes;		/* number of write operations */
-	hrtime_t	wtime;		/* cumulative wait (pre-service) time */
-	hrtime_t	wlentime;	/* cumulative wait len*time product */
-	hrtime_t	wlastupdate;	/* last time wait queue changed */
-	hrtime_t	rtime;		/* cumulative run (service) time */
-	hrtime_t	rlentime;	/* cumulative run length*time product */
-	hrtime_t	rlastupdate;	/* last time run queue changed */
-	uint_t		wcnt;		/* count of elements in wait state */
-	uint_t		rcnt;		/* count of elements in run state */
-} kstat_io_t;
-
-typedef struct kstat_timer {
-	char		name[KSTAT_STRLEN+1]; /* event name */
-	u_longlong_t	num_events;	 /* number of events */
-	hrtime_t	elapsed_time;	 /* cumulative elapsed time */
-	hrtime_t	min_time;	 /* shortest event duration */
-	hrtime_t	max_time;	 /* longest event duration */
-	hrtime_t	start_time;	 /* previous event start time */
-	hrtime_t	stop_time;	 /* previous event stop time */
-} kstat_timer_t;
-
-int spl_kstat_init(void);
-void spl_kstat_fini(void);
-
-extern void __kstat_set_raw_ops(kstat_t *ksp,
-    int (*headers)(char *buf, size_t size),
-    int (*data)(char *buf, size_t size, void *data),
-    void* (*addr)(kstat_t *ksp, loff_t index));
-
-extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
-    const char *ks_name, const char *ks_class, uchar_t ks_type,
-    uint_t ks_ndata, uchar_t ks_flags);
-
-extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep,
-    const char *module, const char *name);
-extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep);
-extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
-    const kstat_proc_op_t *file_ops, void *data);
-
-extern void __kstat_install(kstat_t *ksp);
-extern void __kstat_delete(kstat_t *ksp);
-extern void kstat_waitq_enter(kstat_io_t *);
-extern void kstat_waitq_exit(kstat_io_t *);
-extern void kstat_runq_enter(kstat_io_t *);
-extern void kstat_runq_exit(kstat_io_t *);
-
-#define	kstat_set_raw_ops(k, h, d, a) \
-    __kstat_set_raw_ops(k, h, d, a)
-#define	kstat_create(m, i, n, c, t, s, f) \
-    __kstat_create(m, i, n, c, t, s, f)
-
-#define	kstat_install(k)		__kstat_install(k)
-#define	kstat_delete(k)			__kstat_delete(k)
-
-#endif  /* _SPL_KSTAT_H */

diff --git a/zfs/include/spl/sys/list.h b/zfs/include/spl/sys/list.h
deleted file mode 100644
index 74b784e..0000000
--- a/zfs/include/spl/sys/list.h
+++ /dev/null

@@ -1,208 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_LIST_H
-#define	_SPL_LIST_H
-
-#include <sys/types.h>
-#include <linux/list.h>
-
-/*
- * NOTE: I have implemented the Solaris list API in terms of the native
- * linux API.  This has certain advantages in terms of leveraging the linux
- * list debugging infrastructure, but it also means that the internals of a
- * list differ slightly than on Solaris.  This is not a problem as long as
- * all callers stick to the published API.  The two major differences are:
- *
- * 1) A list_node_t is mapped to a linux list_head struct which changes
- *    the name of the list_next/list_prev pointers to next/prev respectively.
- *
- * 2) A list_node_t which is not attached to a list on Solaris is denoted
- *    by having its list_next/list_prev pointers set to NULL.  Under linux
- *    the next/prev pointers are set to LIST_POISON1 and LIST_POISON2
- *    respectively.  At this moment this only impacts the implementation
- *    of the list_link_init() and list_link_active() functions.
- */
-
-typedef struct list_head list_node_t;
-
-typedef struct list {
-	size_t list_size;
-	size_t list_offset;
-	list_node_t list_head;
-} list_t;
-
-#define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
-#define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
-
-static inline int
-list_is_empty(list_t *list)
-{
-	return (list_empty(&list->list_head));
-}
-
-static inline void
-list_link_init(list_node_t *node)
-{
-	node->next = LIST_POISON1;
-	node->prev = LIST_POISON2;
-}
-
-static inline void
-list_create(list_t *list, size_t size, size_t offset)
-{
-	list->list_size = size;
-	list->list_offset = offset;
-	INIT_LIST_HEAD(&list->list_head);
-}
-
-static inline void
-list_destroy(list_t *list)
-{
-	list_del(&list->list_head);
-}
-
-static inline void
-list_insert_head(list_t *list, void *object)
-{
-	list_add(list_d2l(list, object), &list->list_head);
-}
-
-static inline void
-list_insert_tail(list_t *list, void *object)
-{
-	list_add_tail(list_d2l(list, object), &list->list_head);
-}
-
-static inline void
-list_insert_after(list_t *list, void *object, void *nobject)
-{
-	if (object == NULL)
-		list_insert_head(list, nobject);
-	else
-		list_add(list_d2l(list, nobject), list_d2l(list, object));
-}
-
-static inline void
-list_insert_before(list_t *list, void *object, void *nobject)
-{
-	if (object == NULL)
-		list_insert_tail(list, nobject);
-	else
-		list_add_tail(list_d2l(list, nobject), list_d2l(list, object));
-}
-
-static inline void
-list_remove(list_t *list, void *object)
-{
-	list_del(list_d2l(list, object));
-}
-
-static inline void *
-list_remove_head(list_t *list)
-{
-	list_node_t *head = list->list_head.next;
-	if (head == &list->list_head)
-		return (NULL);
-
-	list_del(head);
-	return (list_object(list, head));
-}
-
-static inline void *
-list_remove_tail(list_t *list)
-{
-	list_node_t *tail = list->list_head.prev;
-	if (tail == &list->list_head)
-		return (NULL);
-
-	list_del(tail);
-	return (list_object(list, tail));
-}
-
-static inline void *
-list_head(list_t *list)
-{
-	if (list_is_empty(list))
-		return (NULL);
-
-	return (list_object(list, list->list_head.next));
-}
-
-static inline void *
-list_tail(list_t *list)
-{
-	if (list_is_empty(list))
-		return (NULL);
-
-	return (list_object(list, list->list_head.prev));
-}
-
-static inline void *
-list_next(list_t *list, void *object)
-{
-	list_node_t *node = list_d2l(list, object);
-
-	if (node->next != &list->list_head)
-		return (list_object(list, node->next));
-
-	return (NULL);
-}
-
-static inline void *
-list_prev(list_t *list, void *object)
-{
-	list_node_t *node = list_d2l(list, object);
-
-	if (node->prev != &list->list_head)
-		return (list_object(list, node->prev));
-
-	return (NULL);
-}
-
-static inline int
-list_link_active(list_node_t *node)
-{
-	return (node->next != LIST_POISON1) && (node->prev != LIST_POISON2);
-}
-
-static inline void
-spl_list_move_tail(list_t *dst, list_t *src)
-{
-	list_splice_init(&src->list_head, dst->list_head.prev);
-}
-
-#define	list_move_tail(dst, src)	spl_list_move_tail(dst, src)
-
-static inline void
-list_link_replace(list_node_t *old_node, list_node_t *new_node)
-{
-	new_node->next = old_node->next;
-	new_node->prev = old_node->prev;
-	old_node->prev->next = new_node;
-	old_node->next->prev = new_node;
-	list_link_init(old_node);
-}
-
-#endif /* SPL_LIST_H */

diff --git a/zfs/include/spl/sys/mode.h b/zfs/include/spl/sys/mode.h
deleted file mode 100644
index 02802d0..0000000
--- a/zfs/include/spl/sys/mode.h
+++ /dev/null

@@ -1,32 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_MODE_H
-#define	_SPL_MODE_H
-
-#define	IFTOVT(mode)	vn_mode_to_vtype(mode)
-#define	VTTOIF(vtype)	vn_vtype_to_mode(vtype)
-#define	MAKEIMODE(T, M) (VTTOIF(T) | ((M) & ~S_IFMT))
-
-#endif /* SPL_MODE_H */

diff --git a/zfs/include/spl/sys/mutex.h b/zfs/include/spl/sys/mutex.h
deleted file mode 100644
index 93f3af8..0000000
--- a/zfs/include/spl/sys/mutex.h
+++ /dev/null

@@ -1,185 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_MUTEX_H
-#define	_SPL_MUTEX_H
-
-#include <sys/types.h>
-#include <linux/sched.h>
-#include <linux/mutex.h>
-#include <linux/lockdep.h>
-#include <linux/compiler_compat.h>
-
-typedef enum {
-	MUTEX_DEFAULT	= 0,
-	MUTEX_SPIN	= 1,
-	MUTEX_ADAPTIVE	= 2,
-	MUTEX_NOLOCKDEP	= 3
-} kmutex_type_t;
-
-typedef struct {
-	struct mutex		m_mutex;
-	spinlock_t		m_lock;	/* used for serializing mutex_exit */
-	kthread_t		*m_owner;
-#ifdef CONFIG_LOCKDEP
-	kmutex_type_t		m_type;
-#endif /* CONFIG_LOCKDEP */
-} kmutex_t;
-
-#define	MUTEX(mp)		(&((mp)->m_mutex))
-
-static inline void
-spl_mutex_set_owner(kmutex_t *mp)
-{
-	mp->m_owner = current;
-}
-
-static inline void
-spl_mutex_clear_owner(kmutex_t *mp)
-{
-	mp->m_owner = NULL;
-}
-
-#define	mutex_owner(mp)		(READ_ONCE((mp)->m_owner))
-#define	mutex_owned(mp)		(mutex_owner(mp) == current)
-#define	MUTEX_HELD(mp)		mutex_owned(mp)
-#define	MUTEX_NOT_HELD(mp)	(!MUTEX_HELD(mp))
-
-#ifdef CONFIG_LOCKDEP
-static inline void
-spl_mutex_set_type(kmutex_t *mp, kmutex_type_t type)
-{
-	mp->m_type = type;
-}
-static inline void
-spl_mutex_lockdep_off_maybe(kmutex_t *mp)			\
-{								\
-	if (mp && mp->m_type == MUTEX_NOLOCKDEP)		\
-		lockdep_off();					\
-}
-static inline void
-spl_mutex_lockdep_on_maybe(kmutex_t *mp)			\
-{								\
-	if (mp && mp->m_type == MUTEX_NOLOCKDEP)		\
-		lockdep_on();					\
-}
-#else  /* CONFIG_LOCKDEP */
-#define	spl_mutex_set_type(mp, type)
-#define	spl_mutex_lockdep_off_maybe(mp)
-#define	spl_mutex_lockdep_on_maybe(mp)
-#endif /* CONFIG_LOCKDEP */
-
-/*
- * The following functions must be a #define	and not static inline.
- * This ensures that the native linux mutex functions (lock/unlock)
- * will be correctly located in the users code which is important
- * for the built in kernel lock analysis tools
- */
-#undef mutex_init
-#define	mutex_init(mp, name, type, ibc)				\
-{								\
-	static struct lock_class_key __key;			\
-	ASSERT(type == MUTEX_DEFAULT || type == MUTEX_NOLOCKDEP); \
-								\
-	__mutex_init(MUTEX(mp), (name) ? (#name) : (#mp), &__key); \
-	spin_lock_init(&(mp)->m_lock);				\
-	spl_mutex_clear_owner(mp);				\
-	spl_mutex_set_type(mp, type);				\
-}
-
-#undef mutex_destroy
-#define	mutex_destroy(mp)					\
-{								\
-	VERIFY3P(mutex_owner(mp), ==, NULL);			\
-}
-
-/* BEGIN CSTYLED */
-#define	mutex_tryenter(mp)					\
-({								\
-	int _rc_;						\
-								\
-	spl_mutex_lockdep_off_maybe(mp);			\
-	if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1)		\
-		spl_mutex_set_owner(mp);			\
-	spl_mutex_lockdep_on_maybe(mp);				\
-								\
-	_rc_;							\
-})
-/* END CSTYLED */
-
-#define	NESTED_SINGLE 1
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define	mutex_enter_nested(mp, subclass)			\
-{								\
-	ASSERT3P(mutex_owner(mp), !=, current);			\
-	spl_mutex_lockdep_off_maybe(mp);			\
-	mutex_lock_nested(MUTEX(mp), (subclass));		\
-	spl_mutex_lockdep_on_maybe(mp);				\
-	spl_mutex_set_owner(mp);				\
-}
-#else /* CONFIG_DEBUG_LOCK_ALLOC */
-#define	mutex_enter_nested(mp, subclass)			\
-{								\
-	ASSERT3P(mutex_owner(mp), !=, current);			\
-	spl_mutex_lockdep_off_maybe(mp);			\
-	mutex_lock(MUTEX(mp));					\
-	spl_mutex_lockdep_on_maybe(mp);				\
-	spl_mutex_set_owner(mp);				\
-}
-#endif /*  CONFIG_DEBUG_LOCK_ALLOC */
-
-#define	mutex_enter(mp) mutex_enter_nested((mp), 0)
-
-/*
- * The reason for the spinlock:
- *
- * The Linux mutex is designed with a fast-path/slow-path design such that it
- * does not guarantee serialization upon itself, allowing a race where latter
- * acquirers finish mutex_unlock before former ones.
- *
- * The race renders it unsafe to be used for serializing the freeing of an
- * object in which the mutex is embedded, where the latter acquirer could go
- * on to free the object while the former one is still doing mutex_unlock and
- * causing memory corruption.
- *
- * However, there are many places in ZFS where the mutex is used for
- * serializing object freeing, and the code is shared among other OSes without
- * this issue. Thus, we need the spinlock to force the serialization on
- * mutex_exit().
- *
- * See http://lwn.net/Articles/575477/ for the information about the race.
- */
-#define	mutex_exit(mp)						\
-{								\
-	spl_mutex_clear_owner(mp);				\
-	spin_lock(&(mp)->m_lock);				\
-	spl_mutex_lockdep_off_maybe(mp);			\
-	mutex_unlock(MUTEX(mp));				\
-	spl_mutex_lockdep_on_maybe(mp);				\
-	spin_unlock(&(mp)->m_lock);				\
-	/* NOTE: do not dereference mp after this point */	\
-}
-
-#endif /* _SPL_MUTEX_H */

diff --git a/zfs/include/spl/sys/param.h b/zfs/include/spl/sys/param.h
deleted file mode 100644
index 4ef9291..0000000
--- a/zfs/include/spl/sys/param.h
+++ /dev/null

@@ -1,36 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_PARAM_H
-#define	_SPL_PARAM_H
-
-#include <asm/page.h>
-
-/* Pages to bytes and back */
-#define	ptob(pages)			((pages) << PAGE_SHIFT)
-#define	btop(bytes)			((bytes) >> PAGE_SHIFT)
-
-#define	MAXUID				UINT32_MAX
-
-#endif /* SPL_PARAM_H */

diff --git a/zfs/include/spl/sys/proc.h b/zfs/include/spl/sys/proc.h
deleted file mode 100644
index 05c44bc..0000000
--- a/zfs/include/spl/sys/proc.h
+++ /dev/null

@@ -1,39 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_PROC_H
-#define	_SPL_PROC_H
-
-#include <linux/proc_fs.h>
-
-#ifndef HAVE_PDE_DATA
-#define	PDE_DATA(x) (PDE(x)->data)
-#endif
-
-extern struct proc_dir_entry *proc_spl_kstat;
-
-int spl_proc_init(void);
-void spl_proc_fini(void);
-
-#endif /* SPL_PROC_H */

diff --git a/zfs/include/spl/sys/processor.h b/zfs/include/spl/sys/processor.h
deleted file mode 100644
index a70101f..0000000
--- a/zfs/include/spl/sys/processor.h
+++ /dev/null

@@ -1,32 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef	_SPL_PROCESSOR_H
-#define	_SPL_PROCESSOR_H
-
-#define	getcpuid() smp_processor_id()
-
-typedef int	processorid_t;
-
-#endif /* _SPL_PROCESSOR_H */

diff --git a/zfs/include/spl/sys/procfs_list.h b/zfs/include/spl/sys/procfs_list.h
deleted file mode 100644
index eb1519c..0000000
--- a/zfs/include/spl/sys/procfs_list.h
+++ /dev/null

@@ -1,72 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2018 by Delphix. All rights reserved.
- */
-
-#ifndef	_SPL_PROCFS_LIST_H
-#define	_SPL_PROCFS_LIST_H
-
-#include <sys/kstat.h>
-#include <sys/mutex.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-
-typedef struct procfs_list procfs_list_t;
-struct procfs_list {
-	/* Accessed only by user of a procfs_list */
-	void		*pl_private;
-
-	/*
-	 * Accessed both by user of a procfs_list and by procfs_list
-	 * implementation
-	 */
-	kmutex_t	pl_lock;
-	list_t		pl_list;
-
-	/* Accessed only by procfs_list implementation */
-	uint64_t	pl_next_id;
-	int		(*pl_show)(struct seq_file *f, void *p);
-	int		(*pl_show_header)(struct seq_file *f);
-	int		(*pl_clear)(procfs_list_t *procfs_list);
-	size_t		pl_node_offset;
-	kstat_proc_entry_t	pl_kstat_entry;
-};
-
-typedef struct procfs_list_node {
-	list_node_t	pln_link;
-	uint64_t	pln_id;
-} procfs_list_node_t;
-
-void procfs_list_install(const char *module,
-    const char *name,
-    mode_t mode,
-    procfs_list_t *procfs_list,
-    int (*show)(struct seq_file *f, void *p),
-    int (*show_header)(struct seq_file *f),
-    int (*clear)(procfs_list_t *procfs_list),
-    size_t procfs_list_node_off);
-void procfs_list_uninstall(procfs_list_t *procfs_list);
-void procfs_list_destroy(procfs_list_t *procfs_list);
-
-void procfs_list_add(procfs_list_t *procfs_list, void *p);
-
-#endif	/* _SPL_PROCFS_LIST_H */

diff --git a/zfs/include/spl/sys/random.h b/zfs/include/spl/sys/random.h
deleted file mode 100644
index 93e244f..0000000
--- a/zfs/include/spl/sys/random.h
+++ /dev/null

@@ -1,40 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_RANDOM_H
-#define	_SPL_RANDOM_H
-
-#include <linux/module.h>
-#include <linux/random.h>
-
-static __inline__ int
-random_get_bytes(uint8_t *ptr, size_t len)
-{
-	get_random_bytes((void *)ptr, (int)len);
-	return (0);
-}
-
-extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
-
-#endif	/* _SPL_RANDOM_H */

diff --git a/zfs/include/spl/sys/rwlock.h b/zfs/include/spl/sys/rwlock.h
deleted file mode 100644
index 60f5bfd..0000000
--- a/zfs/include/spl/sys/rwlock.h
+++ /dev/null

@@ -1,201 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_RWLOCK_H
-#define	_SPL_RWLOCK_H
-
-#include <sys/types.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-
-typedef enum {
-	RW_DRIVER	= 2,
-	RW_DEFAULT	= 4,
-	RW_NOLOCKDEP	= 5
-} krw_type_t;
-
-typedef enum {
-	RW_NONE		= 0,
-	RW_WRITER	= 1,
-	RW_READER	= 2
-} krw_t;
-
-typedef struct {
-	struct rw_semaphore rw_rwlock;
-	kthread_t *rw_owner;
-#ifdef CONFIG_LOCKDEP
-	krw_type_t	rw_type;
-#endif /* CONFIG_LOCKDEP */
-} krwlock_t;
-
-#define	SEM(rwp)	(&(rwp)->rw_rwlock)
-
-static inline void
-spl_rw_set_owner(krwlock_t *rwp)
-{
-	rwp->rw_owner = current;
-}
-
-static inline void
-spl_rw_clear_owner(krwlock_t *rwp)
-{
-	rwp->rw_owner = NULL;
-}
-
-static inline kthread_t *
-rw_owner(krwlock_t *rwp)
-{
-	return (rwp->rw_owner);
-}
-
-#ifdef CONFIG_LOCKDEP
-static inline void
-spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
-{
-	rwp->rw_type = type;
-}
-static inline void
-spl_rw_lockdep_off_maybe(krwlock_t *rwp)		\
-{							\
-	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
-		lockdep_off();				\
-}
-static inline void
-spl_rw_lockdep_on_maybe(krwlock_t *rwp)			\
-{							\
-	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
-		lockdep_on();				\
-}
-#else  /* CONFIG_LOCKDEP */
-#define	spl_rw_set_type(rwp, type)
-#define	spl_rw_lockdep_off_maybe(rwp)
-#define	spl_rw_lockdep_on_maybe(rwp)
-#endif /* CONFIG_LOCKDEP */
-
-static inline int
-RW_LOCK_HELD(krwlock_t *rwp)
-{
-	return (rwsem_is_locked(SEM(rwp)));
-}
-
-static inline int
-RW_WRITE_HELD(krwlock_t *rwp)
-{
-	return (rw_owner(rwp) == current);
-}
-
-static inline int
-RW_READ_HELD(krwlock_t *rwp)
-{
-	return (RW_LOCK_HELD(rwp) && rw_owner(rwp) == NULL);
-}
-
-/*
- * The following functions must be a #define and not static inline.
- * This ensures that the native linux semaphore functions (down/up)
- * will be correctly located in the users code which is important
- * for the built in kernel lock analysis tools
- */
-/* BEGIN CSTYLED */
-#define	rw_init(rwp, name, type, arg)					\
-({									\
-	static struct lock_class_key __key;				\
-	ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP);		\
-									\
-	__init_rwsem(SEM(rwp), #rwp, &__key);				\
-	spl_rw_clear_owner(rwp);					\
-	spl_rw_set_type(rwp, type);					\
-})
-
-/*
- * The Linux rwsem implementation does not require a matching destroy.
- */
-#define	rw_destroy(rwp)		((void) 0)
-
-/*
- * Upgrading a rwsem from a reader to a writer is not supported by the
- * Linux kernel.  The lock must be dropped and reacquired as a writer.
- */
-#define	rw_tryupgrade(rwp)	RW_WRITE_HELD(rwp)
-
-#define	rw_tryenter(rwp, rw)						\
-({									\
-	int _rc_ = 0;							\
-									\
-	spl_rw_lockdep_off_maybe(rwp);					\
-	switch (rw) {							\
-	case RW_READER:							\
-		_rc_ = down_read_trylock(SEM(rwp));			\
-		break;							\
-	case RW_WRITER:							\
-		if ((_rc_ = down_write_trylock(SEM(rwp))))		\
-			spl_rw_set_owner(rwp);				\
-		break;							\
-	default:							\
-		VERIFY(0);						\
-	}								\
-	spl_rw_lockdep_on_maybe(rwp);					\
-	_rc_;								\
-})
-
-#define	rw_enter(rwp, rw)						\
-({									\
-	spl_rw_lockdep_off_maybe(rwp);					\
-	switch (rw) {							\
-	case RW_READER:							\
-		down_read(SEM(rwp));					\
-		break;							\
-	case RW_WRITER:							\
-		down_write(SEM(rwp));					\
-		spl_rw_set_owner(rwp);					\
-		break;							\
-	default:							\
-		VERIFY(0);						\
-	}								\
-	spl_rw_lockdep_on_maybe(rwp);					\
-})
-
-#define	rw_exit(rwp)							\
-({									\
-	spl_rw_lockdep_off_maybe(rwp);					\
-	if (RW_WRITE_HELD(rwp)) {					\
-		spl_rw_clear_owner(rwp);				\
-		up_write(SEM(rwp));					\
-	} else {							\
-		ASSERT(RW_READ_HELD(rwp));				\
-		up_read(SEM(rwp));					\
-	}								\
-	spl_rw_lockdep_on_maybe(rwp);					\
-})
-
-#define	rw_downgrade(rwp)						\
-({									\
-	spl_rw_lockdep_off_maybe(rwp);					\
-	spl_rw_clear_owner(rwp);					\
-	downgrade_write(SEM(rwp));					\
-	spl_rw_lockdep_on_maybe(rwp);					\
-})
-/* END CSTYLED */
-
-#endif /* _SPL_RWLOCK_H */

diff --git a/zfs/include/spl/sys/shrinker.h b/zfs/include/spl/sys/shrinker.h
deleted file mode 100644
index 28c1fa7..0000000
--- a/zfs/include/spl/sys/shrinker.h
+++ /dev/null

@@ -1,209 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_SHRINKER_H
-#define	_SPL_SHRINKER_H
-
-#include <linux/mm.h>
-#include <linux/fs.h>
-
-#if !defined(HAVE_SHRINK_CONTROL_STRUCT)
-struct shrink_control {
-	gfp_t gfp_mask;
-	unsigned long nr_to_scan;
-};
-#endif /* HAVE_SHRINK_CONTROL_STRUCT */
-
-/*
- * Due to frequent changes in the shrinker API the following
- * compatibility wrappers should be used.  They are as follows:
- *
- * SPL_SHRINKER_DECLARE is used to declare the shrinker which is
- * passed to spl_register_shrinker()/spl_unregister_shrinker().  Use
- * shrinker_name to set the shrinker variable name, shrinker_callback
- * to set the callback function, and seek_cost to define the cost of
- * reclaiming an object.
- *
- *   SPL_SHRINKER_DECLARE(shrinker_name, shrinker_callback, seek_cost);
- *
- * SPL_SHRINKER_CALLBACK_FWD_DECLARE is used when a forward declaration
- * of the shrinker callback function is required.  Only the callback
- * function needs to be passed.
- *
- *   SPL_SHRINKER_CALLBACK_FWD_DECLARE(shrinker_callback);
- *
- * SPL_SHRINKER_CALLBACK_WRAPPER is used to declare the callback function
- * which is registered with the shrinker.  This function will call your
- * custom shrinker which must use the following prototype.  Notice the
- * leading __'s, these must be appended to the callback_function name.
- *
- *   int  __shrinker_callback(struct shrinker *, struct shrink_control *)
- *   SPL_SHRINKER_CALLBACK_WRAPPER(shrinker_callback);a
- *
- *
- * Example:
- *
- * SPL_SHRINKER_CALLBACK_FWD_DECLARE(my_shrinker_fn);
- * SPL_SHRINKER_DECLARE(my_shrinker, my_shrinker_fn, 1);
- *
- * static int
- * __my_shrinker_fn(struct shrinker *shrink, struct shrink_control *sc)
- * {
- *	if (sc->nr_to_scan) {
- *		...scan objects in the cache and reclaim them...
- *	}
- *
- *	...calculate number of objects in the cache...
- *
- *	return (number of objects in the cache);
- * }
- * SPL_SHRINKER_CALLBACK_WRAPPER(my_shrinker_fn);
- */
-
-#define	spl_register_shrinker(x)	register_shrinker(x)
-#define	spl_unregister_shrinker(x)	unregister_shrinker(x)
-
-/*
- * Linux 2.6.23 - 2.6.34 Shrinker API Compatibility.
- */
-#if defined(HAVE_2ARGS_OLD_SHRINKER_CALLBACK)
-#define	SPL_SHRINKER_DECLARE(s, x, y)					\
-static struct shrinker s = {						\
-	.shrink = x,							\
-	.seeks = y							\
-}
-
-#define	SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)				\
-static int fn(int nr_to_scan, unsigned int gfp_mask)
-
-#define	SPL_SHRINKER_CALLBACK_WRAPPER(fn)				\
-static int								\
-fn(int nr_to_scan, unsigned int gfp_mask)				\
-{									\
-	struct shrink_control sc;					\
-									\
-	sc.nr_to_scan = nr_to_scan;					\
-	sc.gfp_mask = gfp_mask;						\
-									\
-	return (__ ## fn(NULL, &sc));					\
-}
-
-/*
- * Linux 2.6.35 to 2.6.39 Shrinker API Compatibility.
- */
-#elif defined(HAVE_3ARGS_SHRINKER_CALLBACK)
-#define	SPL_SHRINKER_DECLARE(s, x, y)					\
-static struct shrinker s = {						\
-	.shrink = x,							\
-	.seeks = y							\
-}
-
-#define	SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)				\
-static int fn(struct shrinker *, int, unsigned int)
-
-#define	SPL_SHRINKER_CALLBACK_WRAPPER(fn)				\
-static int								\
-fn(struct shrinker *shrink, int nr_to_scan, unsigned int gfp_mask)	\
-{									\
-	struct shrink_control sc;					\
-									\
-	sc.nr_to_scan = nr_to_scan;					\
-	sc.gfp_mask = gfp_mask;						\
-									\
-	return (__ ## fn(shrink, &sc));					\
-}
-
-/*
- * Linux 3.0 to 3.11 Shrinker API Compatibility.
- */
-#elif defined(HAVE_2ARGS_NEW_SHRINKER_CALLBACK)
-#define	SPL_SHRINKER_DECLARE(s, x, y)					\
-static struct shrinker s = {						\
-	.shrink = x,							\
-	.seeks = y							\
-}
-
-#define	SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)				\
-static int fn(struct shrinker *, struct shrink_control *)
-
-#define	SPL_SHRINKER_CALLBACK_WRAPPER(fn)				\
-static int								\
-fn(struct shrinker *shrink, struct shrink_control *sc)			\
-{									\
-	return (__ ## fn(shrink, sc));					\
-}
-
-/*
- * Linux 3.12 and later Shrinker API Compatibility.
- */
-#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-#define	SPL_SHRINKER_DECLARE(s, x, y)					\
-static struct shrinker s = {						\
-	.count_objects = x ## _count_objects,				\
-	.scan_objects = x ## _scan_objects,				\
-	.seeks = y							\
-}
-
-#define	SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)				\
-static unsigned long fn ## _count_objects(struct shrinker *,		\
-    struct shrink_control *);						\
-static unsigned long fn ## _scan_objects(struct shrinker *,		\
-    struct shrink_control *)
-
-#define	SPL_SHRINKER_CALLBACK_WRAPPER(fn)				\
-static unsigned long							\
-fn ## _count_objects(struct shrinker *shrink, struct shrink_control *sc)\
-{									\
-	int __ret__;							\
-									\
-	sc->nr_to_scan = 0;						\
-	__ret__ = __ ## fn(NULL, sc);					\
-									\
-	/* Errors may not be returned and must be converted to zeros */	\
-	return ((__ret__ < 0) ? 0 : __ret__);				\
-}									\
-									\
-static unsigned long							\
-fn ## _scan_objects(struct shrinker *shrink, struct shrink_control *sc)	\
-{									\
-	int __ret__;							\
-									\
-	__ret__ = __ ## fn(NULL, sc);					\
-	return ((__ret__ < 0) ? SHRINK_STOP : __ret__);			\
-}
-#else
-/*
- * Linux 2.x to 2.6.22, or a newer shrinker API has been introduced.
- */
-#error "Unknown shrinker callback"
-#endif
-
-#if defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-typedef unsigned long	spl_shrinker_t;
-#else
-typedef int		spl_shrinker_t;
-#define	SHRINK_STOP	(-1)
-#endif
-
-#endif /* SPL_SHRINKER_H */

diff --git a/zfs/include/spl/sys/sid.h b/zfs/include/spl/sys/sid.h
deleted file mode 100644
index 731b62c..0000000
--- a/zfs/include/spl/sys/sid.h
+++ /dev/null

@@ -1,61 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_SID_H
-#define	_SPL_SID_H
-
-typedef struct ksiddomain {
-	char		*kd_name;
-} ksiddomain_t;
-
-typedef enum ksid_index {
-	KSID_USER,
-	KSID_GROUP,
-	KSID_OWNER,
-	KSID_COUNT
-} ksid_index_t;
-
-typedef int ksid_t;
-
-static inline ksiddomain_t *
-ksid_lookupdomain(const char *dom)
-{
-	ksiddomain_t *kd;
-	int len = strlen(dom);
-
-	kd = kmem_zalloc(sizeof (ksiddomain_t), KM_SLEEP);
-	kd->kd_name = kmem_zalloc(len + 1, KM_SLEEP);
-	memcpy(kd->kd_name, dom, len);
-
-	return (kd);
-}
-
-static inline void
-ksiddomain_rele(ksiddomain_t *ksid)
-{
-	kmem_free(ksid->kd_name, strlen(ksid->kd_name) + 1);
-	kmem_free(ksid, sizeof (ksiddomain_t));
-}
-
-#endif /* _SPL_SID_H */

diff --git a/zfs/include/spl/sys/signal.h b/zfs/include/spl/sys/signal.h
deleted file mode 100644
index 36b8b5d..0000000
--- a/zfs/include/spl/sys/signal.h
+++ /dev/null

@@ -1,55 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_SIGNAL_H
-#define	_SPL_SIGNAL_H
-
-#include <linux/sched.h>
-
-#ifdef HAVE_SCHED_SIGNAL_HEADER
-#include <linux/sched/signal.h>
-#endif
-
-#define	FORREAL		0	/* Usual side-effects */
-#define	JUSTLOOKING	1	/* Don't stop the process */
-
-/*
- * The "why" argument indicates the allowable side-effects of the call:
- *
- * FORREAL:  Extract the next pending signal from p_sig into p_cursig;
- * stop the process if a stop has been requested or if a traced signal
- * is pending.
- *
- * JUSTLOOKING:  Don't stop the process, just indicate whether or not
- * a signal might be pending (FORREAL is needed to tell for sure).
- */
-static __inline__ int
-issig(int why)
-{
-	ASSERT(why == FORREAL || why == JUSTLOOKING);
-
-	return (signal_pending(current));
-}
-
-#endif /* SPL_SIGNAL_H */

diff --git a/zfs/include/spl/sys/stat.h b/zfs/include/spl/sys/stat.h
deleted file mode 100644
index 83018e8..0000000
--- a/zfs/include/spl/sys/stat.h
+++ /dev/null

@@ -1,30 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_STAT_H
-#define	_SPL_STAT_H
-
-#include <linux/stat.h>
-
-#endif /* SPL_STAT_H */

diff --git a/zfs/include/spl/sys/strings.h b/zfs/include/spl/sys/strings.h
deleted file mode 100644
index 8b810c9..0000000
--- a/zfs/include/spl/sys/strings.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- *  Copyright (C) 2018 Lawrence Livermore National Security, LLC.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef _SPL_SYS_STRINGS_H
-#define	_SPL_SYS_STRINGS_H
-
-#include <linux/string.h>
-
-#define	bzero(ptr, size)		memset(ptr, 0, size)
-#define	bcopy(src, dest, size)		memmove(dest, src, size)
-#define	bcmp(src, dest, size)		memcmp((src), (dest), (size_t)(size))
-
-#ifndef HAVE_KSTRTOUL
-#define	kstrtoul strict_strtoul
-#endif
-
-#endif	/* _SPL_SYS_STRINGS_H */

diff --git a/zfs/include/spl/sys/sunddi.h b/zfs/include/spl/sys/sunddi.h
deleted file mode 100644
index 29a6fe0..0000000
--- a/zfs/include/spl/sys/sunddi.h
+++ /dev/null

@@ -1,58 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_SUNDDI_H
-#define	_SPL_SUNDDI_H
-
-#include <sys/cred.h>
-#include <sys/uio.h>
-#include <sys/mutex.h>
-#include <sys/u8_textprep.h>
-#include <sys/vnode.h>
-
-typedef int ddi_devid_t;
-
-#define	DDI_DEV_T_NONE				((dev_t)-1)
-#define	DDI_DEV_T_ANY				((dev_t)-2)
-#define	DI_MAJOR_T_UNKNOWN			((major_t)0)
-
-#define	DDI_PROP_DONTPASS			0x0001
-#define	DDI_PROP_CANSLEEP			0x0002
-
-#define	DDI_SUCCESS				0
-#define	DDI_FAILURE				-1
-
-#define	ddi_prop_lookup_string(x1, x2, x3, x4, x5)	(*x5 = NULL)
-#define	ddi_prop_free(x)				(void)0
-#define	ddi_root_node()					(void)0
-
-extern int ddi_strtoul(const char *, char **, int, unsigned long *);
-extern int ddi_strtol(const char *, char **, int, long *);
-extern int ddi_strtoull(const char *, char **, int, unsigned long long *);
-extern int ddi_strtoll(const char *, char **, int, long long *);
-
-extern int ddi_copyin(const void *from, void *to, size_t len, int flags);
-extern int ddi_copyout(const void *from, void *to, size_t len, int flags);
-
-#endif /* SPL_SUNDDI_H */

diff --git a/zfs/include/spl/sys/sysmacros.h b/zfs/include/spl/sys/sysmacros.h
deleted file mode 100644
index e11eaec..0000000
--- a/zfs/include/spl/sys/sysmacros.h
+++ /dev/null

@@ -1,227 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_SYSMACROS_H
-#define	_SPL_SYSMACROS_H
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/cpumask.h>
-#include <sys/debug.h>
-#include <sys/zone.h>
-#include <sys/signal.h>
-#include <asm/page.h>
-
-#ifdef HAVE_SCHED_RT_HEADER
-#include <linux/sched/rt.h>
-#endif
-
-#ifndef _KERNEL
-#define	_KERNEL				__KERNEL__
-#endif
-
-#define	FALSE				0
-#define	TRUE				1
-
-#define	INT8_MAX			(127)
-#define	INT8_MIN			(-128)
-#define	UINT8_MAX			(255)
-#define	UINT8_MIN			(0)
-
-#define	INT16_MAX			(32767)
-#define	INT16_MIN			(-32768)
-#define	UINT16_MAX			(65535)
-#define	UINT16_MIN			(0)
-
-#define	INT32_MAX			INT_MAX
-#define	INT32_MIN			INT_MIN
-#define	UINT32_MAX			UINT_MAX
-#define	UINT32_MIN			UINT_MIN
-
-#define	INT64_MAX			LLONG_MAX
-#define	INT64_MIN			LLONG_MIN
-#define	UINT64_MAX			ULLONG_MAX
-#define	UINT64_MIN			ULLONG_MIN
-
-#define	NBBY				8
-
-#define	MAXMSGLEN			256
-#define	MAXNAMELEN			256
-#define	MAXPATHLEN			4096
-#define	MAXOFFSET_T			LLONG_MAX
-#define	MAXBSIZE			8192
-#define	DEV_BSIZE			512
-#define	DEV_BSHIFT			9 /* log2(DEV_BSIZE) */
-
-#define	proc_pageout			NULL
-#define	curproc				current
-#define	max_ncpus			num_possible_cpus()
-#define	boot_ncpus			num_online_cpus()
-#define	CPU_SEQID			smp_processor_id()
-#define	is_system_labeled()		0
-
-#ifndef RLIM64_INFINITY
-#define	RLIM64_INFINITY			(~0ULL)
-#endif
-
-/*
- * 0..MAX_PRIO-1:		Process priority
- * 0..MAX_RT_PRIO-1:		RT priority tasks
- * MAX_RT_PRIO..MAX_PRIO-1:	SCHED_NORMAL tasks
- *
- * Treat shim tasks as SCHED_NORMAL tasks
- */
-#define	minclsyspri			(MAX_PRIO-1)
-#define	maxclsyspri			(MAX_RT_PRIO)
-#define	defclsyspri			(DEFAULT_PRIO)
-
-#ifndef NICE_TO_PRIO
-#define	NICE_TO_PRIO(nice)		(MAX_RT_PRIO + (nice) + 20)
-#endif
-#ifndef PRIO_TO_NICE
-#define	PRIO_TO_NICE(prio)		((prio) - MAX_RT_PRIO - 20)
-#endif
-
-/*
- * Missing macros
- */
-#ifndef PAGESIZE
-#define	PAGESIZE			PAGE_SIZE
-#endif
-
-#ifndef PAGESHIFT
-#define	PAGESHIFT			PAGE_SHIFT
-#endif
-
-/* Dtrace probes do not exist in the linux kernel */
-#ifdef DTRACE_PROBE
-#undef  DTRACE_PROBE
-#endif  /* DTRACE_PROBE */
-#define	DTRACE_PROBE(a)					((void)0)
-
-#ifdef DTRACE_PROBE1
-#undef  DTRACE_PROBE1
-#endif  /* DTRACE_PROBE1 */
-#define	DTRACE_PROBE1(a, b, c)				((void)0)
-
-#ifdef DTRACE_PROBE2
-#undef  DTRACE_PROBE2
-#endif  /* DTRACE_PROBE2 */
-#define	DTRACE_PROBE2(a, b, c, d, e)			((void)0)
-
-#ifdef DTRACE_PROBE3
-#undef  DTRACE_PROBE3
-#endif  /* DTRACE_PROBE3 */
-#define	DTRACE_PROBE3(a, b, c, d, e, f, g)		((void)0)
-
-#ifdef DTRACE_PROBE4
-#undef  DTRACE_PROBE4
-#endif  /* DTRACE_PROBE4 */
-#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
-
-/* Missing globals */
-extern char spl_gitrev[64];
-extern unsigned long spl_hostid;
-
-/* Missing misc functions */
-extern uint32_t zone_get_hostid(void *zone);
-extern void spl_setup(void);
-extern void spl_cleanup(void);
-
-#define	highbit(x)		__fls(x)
-#define	lowbit(x)		__ffs(x)
-
-#define	highbit64(x)		fls64(x)
-#define	makedevice(maj, min)	makedev(maj, min)
-
-/* common macros */
-#ifndef MIN
-#define	MIN(a, b)		((a) < (b) ? (a) : (b))
-#endif
-#ifndef MAX
-#define	MAX(a, b)		((a) < (b) ? (b) : (a))
-#endif
-#ifndef ABS
-#define	ABS(a)			((a) < 0 ? -(a) : (a))
-#endif
-#ifndef DIV_ROUND_UP
-#define	DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
-#endif
-#ifndef roundup
-#define	roundup(x, y)		((((x) + ((y) - 1)) / (y)) * (y))
-#endif
-#ifndef howmany
-#define	howmany(x, y)		(((x) + ((y) - 1)) / (y))
-#endif
-
-/*
- * Compatibility macros/typedefs needed for Solaris -> Linux port
- */
-#define	P2ALIGN(x, align)	((x) & -(align))
-#define	P2CROSS(x, y, align)	(((x) ^ (y)) > (align) - 1)
-#define	P2ROUNDUP(x, align)	((((x) - 1) | ((align) - 1)) + 1)
-#define	P2PHASE(x, align)	((x) & ((align) - 1))
-#define	P2NPHASE(x, align)	(-(x) & ((align) - 1))
-#define	ISP2(x)			(((x) & ((x) - 1)) == 0)
-#define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
-#define	P2BOUNDARY(off, len, align) \
-				(((off) ^ ((off) + (len) - 1)) > (align) - 1)
-
-/*
- * Typed version of the P2* macros.  These macros should be used to ensure
- * that the result is correctly calculated based on the data type of (x),
- * which is passed in as the last argument, regardless of the data
- * type of the alignment.  For example, if (x) is of type uint64_t,
- * and we want to round it up to a page boundary using "PAGESIZE" as
- * the alignment, we can do either
- *
- * P2ROUNDUP(x, (uint64_t)PAGESIZE)
- * or
- * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
- */
-#define	P2ALIGN_TYPED(x, align, type)   \
-	((type)(x) & -(type)(align))
-#define	P2PHASE_TYPED(x, align, type)   \
-	((type)(x) & ((type)(align) - 1))
-#define	P2NPHASE_TYPED(x, align, type)  \
-	(-(type)(x) & ((type)(align) - 1))
-#define	P2ROUNDUP_TYPED(x, align, type) \
-	((((type)(x) - 1) | ((type)(align) - 1)) + 1)
-#define	P2END_TYPED(x, align, type)     \
-	(-(~(type)(x) & -(type)(align)))
-#define	P2PHASEUP_TYPED(x, align, phase, type)  \
-	((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
-#define	P2CROSS_TYPED(x, y, align, type)	\
-	(((type)(x) ^ (type)(y)) > (type)(align) - 1)
-#define	P2SAMEHIGHBIT_TYPED(x, y, type) \
-	(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
-
-#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
-
-/* avoid any possibility of clashing with <stddef.h> version */
-
-#define	offsetof(s, m)  ((size_t)(&(((s *)0)->m)))
-#endif
-
-#endif  /* _SPL_SYSMACROS_H */

diff --git a/zfs/include/spl/sys/systeminfo.h b/zfs/include/spl/sys/systeminfo.h
deleted file mode 100644
index 2255691..0000000
--- a/zfs/include/spl/sys/systeminfo.h
+++ /dev/null

@@ -1,36 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_SYSTEMINFO_H
-#define	_SPL_SYSTEMINFO_H
-
-#define	HW_HOSTID_LEN		11		/* minimum buffer size needed */
-						/* to hold a decimal or hex */
-						/* hostid string */
-
-/* Supplemental definitions for Linux. */
-#define	HW_HOSTID_PATH		"/etc/hostid"   /* binary configuration file */
-#define	HW_HOSTID_MASK		0xFFFFFFFF 	/* significant hostid bits */
-
-#endif /* SPL_SYSTEMINFO_H */

diff --git a/zfs/include/spl/sys/taskq.h b/zfs/include/spl/sys/taskq.h
deleted file mode 100644
index 7353367..0000000
--- a/zfs/include/spl/sys/taskq.h
+++ /dev/null

@@ -1,163 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_TASKQ_H
-#define	_SPL_TASKQ_H
-
-#include <linux/module.h>
-#include <linux/gfp.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/kthread.h>
-#include <sys/types.h>
-#include <sys/thread.h>
-#include <sys/rwlock.h>
-#include <sys/wait.h>
-
-#define	TASKQ_NAMELEN		31
-
-#define	TASKQ_PREPOPULATE	0x00000001
-#define	TASKQ_CPR_SAFE		0x00000002
-#define	TASKQ_DYNAMIC		0x00000004
-#define	TASKQ_THREADS_CPU_PCT	0x00000008
-#define	TASKQ_DC_BATCH		0x00000010
-#define	TASKQ_ACTIVE		0x80000000
-
-/*
- * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
- * KM_SLEEP/KM_NOSLEEP.  TQ_NOQUEUE/TQ_NOALLOC are set particularly
- * large so as not to conflict with already used GFP_* defines.
- */
-#define	TQ_SLEEP		0x00000000
-#define	TQ_NOSLEEP		0x00000001
-#define	TQ_PUSHPAGE		0x00000002
-#define	TQ_NOQUEUE		0x01000000
-#define	TQ_NOALLOC		0x02000000
-#define	TQ_NEW			0x04000000
-#define	TQ_FRONT		0x08000000
-
-/*
- * Reserved taskqid values.
- */
-#define	TASKQID_INVALID		((taskqid_t)0)
-#define	TASKQID_INITIAL		((taskqid_t)1)
-
-/*
- * spin_lock(lock) and spin_lock_nested(lock,0) are equivalent,
- * so TQ_LOCK_DYNAMIC must not evaluate to 0
- */
-typedef enum tq_lock_role {
-	TQ_LOCK_GENERAL =	0,
-	TQ_LOCK_DYNAMIC =	1,
-} tq_lock_role_t;
-
-typedef unsigned long taskqid_t;
-typedef void (task_func_t)(void *);
-
-typedef struct taskq {
-	spinlock_t		tq_lock;	/* protects taskq_t */
-	char			*tq_name;	/* taskq name */
-	int			tq_instance;	/* instance of tq_name */
-	struct list_head	tq_thread_list;	/* list of all threads */
-	struct list_head	tq_active_list;	/* list of active threads */
-	int			tq_nactive;	/* # of active threads */
-	int			tq_nthreads;	/* # of existing threads */
-	int			tq_nspawn;	/* # of threads being spawned */
-	int			tq_maxthreads;	/* # of threads maximum */
-	int			tq_pri;		/* priority */
-	int			tq_minalloc;	/* min taskq_ent_t pool size */
-	int			tq_maxalloc;	/* max taskq_ent_t pool size */
-	int			tq_nalloc;	/* cur taskq_ent_t pool size */
-	uint_t			tq_flags;	/* flags */
-	taskqid_t		tq_next_id;	/* next pend/work id */
-	taskqid_t		tq_lowest_id;	/* lowest pend/work id */
-	struct list_head	tq_free_list;	/* free taskq_ent_t's */
-	struct list_head	tq_pend_list;	/* pending taskq_ent_t's */
-	struct list_head	tq_prio_list;	/* priority taskq_ent_t's */
-	struct list_head	tq_delay_list;	/* delayed taskq_ent_t's */
-	struct list_head	tq_taskqs;	/* all taskq_t's */
-	spl_wait_queue_head_t	tq_work_waitq;	/* new work waitq */
-	spl_wait_queue_head_t	tq_wait_waitq;	/* wait waitq */
-	tq_lock_role_t		tq_lock_class;	/* class when taking tq_lock */
-} taskq_t;
-
-typedef struct taskq_ent {
-	spinlock_t		tqent_lock;
-	spl_wait_queue_head_t	tqent_waitq;
-	struct timer_list	tqent_timer;
-	struct list_head	tqent_list;
-	taskqid_t		tqent_id;
-	task_func_t		*tqent_func;
-	void			*tqent_arg;
-	taskq_t			*tqent_taskq;
-	uintptr_t		tqent_flags;
-	unsigned long		tqent_birth;
-} taskq_ent_t;
-
-#define	TQENT_FLAG_PREALLOC	0x1
-#define	TQENT_FLAG_CANCEL	0x2
-
-typedef struct taskq_thread {
-	struct list_head	tqt_thread_list;
-	struct list_head	tqt_active_list;
-	struct task_struct	*tqt_thread;
-	taskq_t			*tqt_tq;
-	taskqid_t		tqt_id;
-	taskq_ent_t		*tqt_task;
-	uintptr_t		tqt_flags;
-} taskq_thread_t;
-
-/* Global system-wide dynamic task queue available for all consumers */
-extern taskq_t *system_taskq;
-/* Global dynamic task queue for long delay */
-extern taskq_t *system_delay_taskq;
-
-/* List of all taskqs */
-extern struct list_head tq_list;
-extern struct rw_semaphore tq_list_sem;
-
-extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
-extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
-    uint_t, clock_t);
-extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
-    taskq_ent_t *);
-extern int taskq_empty_ent(taskq_ent_t *);
-extern void taskq_init_ent(taskq_ent_t *);
-extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
-extern void taskq_destroy(taskq_t *);
-extern void taskq_wait_id(taskq_t *, taskqid_t);
-extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
-extern void taskq_wait(taskq_t *);
-extern int taskq_cancel_id(taskq_t *, taskqid_t);
-extern int taskq_member(taskq_t *, kthread_t *);
-
-#define	taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
-    taskq_create(name, nthreads, pri, min, max, flags)
-#define	taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
-    taskq_create(name, nthreads, maxclsyspri, min, max, flags)
-
-int spl_taskq_init(void);
-void spl_taskq_fini(void);
-
-#endif  /* _SPL_TASKQ_H */

diff --git a/zfs/include/spl/sys/thread.h b/zfs/include/spl/sys/thread.h
deleted file mode 100644
index 3762717..0000000
--- a/zfs/include/spl/sys/thread.h
+++ /dev/null

@@ -1,69 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_THREAD_H
-#define	_SPL_THREAD_H
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <sys/tsd.h>
-
-/*
- * Thread interfaces
- */
-#define	TP_MAGIC			0x53535353
-
-#define	TS_SLEEP			TASK_INTERRUPTIBLE
-#define	TS_RUN				TASK_RUNNING
-#define	TS_ZOMB				EXIT_ZOMBIE
-#define	TS_STOPPED			TASK_STOPPED
-
-typedef void (*thread_func_t)(void *);
-
-/* BEGIN CSTYLED */
-#define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
-	__thread_create(stk, stksize, (thread_func_t)func,		\
-	#func, arg, len, pp, state, pri)
-/* END CSTYLED */
-
-#define	thread_exit()			__thread_exit()
-#define	thread_join(t)			VERIFY(0)
-#define	curthread			current
-#define	getcomm()			current->comm
-#define	getpid()			current->pid
-
-extern kthread_t *__thread_create(caddr_t stk, size_t  stksize,
-    thread_func_t func, const char *name, void *args, size_t len, proc_t *pp,
-    int state, pri_t pri);
-extern void __thread_exit(void);
-extern struct task_struct *spl_kthread_create(int (*func)(void *),
-    void *data, const char namefmt[], ...);
-
-extern proc_t p0;
-
-#endif  /* _SPL_THREAD_H */

diff --git a/zfs/include/spl/sys/time.h b/zfs/include/spl/sys/time.h
deleted file mode 100644
index 4309c30..0000000
--- a/zfs/include/spl/sys/time.h
+++ /dev/null

@@ -1,118 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_TIME_H
-#define	_SPL_TIME_H
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <sys/types.h>
-#include <sys/timer.h>
-
-#if defined(CONFIG_64BIT)
-#define	TIME_MAX			INT64_MAX
-#define	TIME_MIN			INT64_MIN
-#else
-#define	TIME_MAX			INT32_MAX
-#define	TIME_MIN			INT32_MIN
-#endif
-
-#define	SEC				1
-#define	MILLISEC			1000
-#define	MICROSEC			1000000
-#define	NANOSEC				1000000000
-
-#define	MSEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MILLISEC))
-#define	NSEC2MSEC(n)	((n) / (NANOSEC / MILLISEC))
-
-#define	USEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MICROSEC))
-#define	NSEC2USEC(n)	((n) / (NANOSEC / MICROSEC))
-
-#define	NSEC2SEC(n)	((n) / (NANOSEC / SEC))
-#define	SEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / SEC))
-
-static const int hz = HZ;
-
-typedef longlong_t		hrtime_t;
-typedef struct timespec		timespec_t;
-
-#define	TIMESPEC_OVERFLOW(ts)		\
-	((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX)
-
-#if defined(HAVE_INODE_TIMESPEC64_TIMES)
-typedef struct timespec64	inode_timespec_t;
-#else
-typedef struct timespec		inode_timespec_t;
-#endif
-
-/* Include for Lustre compatibility */
-#define	timestruc_t	inode_timespec_t
-
-static inline void
-gethrestime(inode_timespec_t *ts)
-{
-#if defined(HAVE_INODE_TIMESPEC64_TIMES)
-
-#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64)
-	ktime_get_coarse_real_ts64(ts);
-#else
-	*ts = current_kernel_time64();
-#endif /* HAVE_KTIME_GET_COARSE_REAL_TS64 */
-
-#else
-	*ts = current_kernel_time();
-#endif
-}
-
-static inline uint64_t
-gethrestime_sec(void)
-{
-#if defined(HAVE_INODE_TIMESPEC64_TIMES)
-#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64)
-	inode_timespec_t ts;
-	ktime_get_coarse_real_ts64(&ts);
-#else
-	inode_timespec_t ts = current_kernel_time64();
-#endif  /* HAVE_KTIME_GET_COARSE_REAL_TS64 */
-
-#else
-	inode_timespec_t ts = current_kernel_time();
-#endif
-	return (ts.tv_sec);
-}
-
-static inline hrtime_t
-gethrtime(void)
-{
-#if defined(HAVE_KTIME_GET_RAW_TS64)
-	struct timespec64 ts;
-	ktime_get_raw_ts64(&ts);
-#else
-	struct timespec ts;
-	getrawmonotonic(&ts);
-#endif
-	return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec);
-}
-
-#endif  /* _SPL_TIME_H */

diff --git a/zfs/include/spl/sys/timer.h b/zfs/include/spl/sys/timer.h
deleted file mode 100644
index 31d89d3..0000000
--- a/zfs/include/spl/sys/timer.h
+++ /dev/null

@@ -1,100 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_TIMER_H
-#define	_SPL_TIMER_H
-
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/timer.h>
-
-#define	lbolt				((clock_t)jiffies)
-#define	lbolt64				((int64_t)get_jiffies_64())
-
-#define	ddi_get_lbolt()			((clock_t)jiffies)
-#define	ddi_get_lbolt64()		((int64_t)get_jiffies_64())
-
-#define	ddi_time_before(a, b)		(typecheck(clock_t, a) && \
-					typecheck(clock_t, b) && \
-					((a) - (b) < 0))
-#define	ddi_time_after(a, b)		ddi_time_before(b, a)
-#define	ddi_time_before_eq(a, b)	(!ddi_time_after(a, b))
-#define	ddi_time_after_eq(a, b)		ddi_time_before_eq(b, a)
-
-#define	ddi_time_before64(a, b)		(typecheck(int64_t, a) && \
-					typecheck(int64_t, b) && \
-					((a) - (b) < 0))
-#define	ddi_time_after64(a, b)		ddi_time_before64(b, a)
-#define	ddi_time_before_eq64(a, b)	(!ddi_time_after64(a, b))
-#define	ddi_time_after_eq64(a, b)	ddi_time_before_eq64(b, a)
-
-#define	delay(ticks)			schedule_timeout_uninterruptible(ticks)
-
-/* usleep_range() introduced in 2.6.36 */
-#ifndef HAVE_USLEEP_RANGE
-static inline void
-usleep_range(unsigned long min, unsigned long max)
-{
-	unsigned int min_ms = min / USEC_PER_MSEC;
-
-	if (min >= MAX_UDELAY_MS)
-		msleep(min_ms);
-	else
-		udelay(min);
-}
-#endif /* HAVE_USLEEP_RANGE */
-
-#define	SEC_TO_TICK(sec)		((sec) * HZ)
-#define	MSEC_TO_TICK(ms)		msecs_to_jiffies(ms)
-#define	USEC_TO_TICK(us)		usecs_to_jiffies(us)
-#define	NSEC_TO_TICK(ns)		usecs_to_jiffies(ns / NSEC_PER_USEC)
-
-#ifndef from_timer
-#define	from_timer(var, timer, timer_field) \
-	container_of(timer, typeof(*var), timer_field)
-#endif
-
-#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST
-typedef struct timer_list *spl_timer_list_t;
-#else
-typedef unsigned long spl_timer_list_t;
-#endif
-
-#ifndef HAVE_KERNEL_TIMER_SETUP
-
-static inline void
-timer_setup(struct timer_list *timer, void (*func)(spl_timer_list_t), u32 fl)
-{
-#ifdef HAVE_KERNEL_TIMER_LIST_FLAGS
-	(timer)->flags = fl;
-#endif
-	init_timer(timer);
-	setup_timer(timer, func, (spl_timer_list_t)(timer));
-}
-
-#endif /* HAVE_KERNEL_TIMER_SETUP */
-
-#endif  /* _SPL_TIMER_H */

diff --git a/zfs/include/spl/sys/tsd.h b/zfs/include/spl/sys/tsd.h
deleted file mode 100644
index 39a291b..0000000
--- a/zfs/include/spl/sys/tsd.h
+++ /dev/null

@@ -1,46 +0,0 @@
-/*
- *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_TSD_H
-#define	_SPL_TSD_H
-
-#include <sys/types.h>
-
-#define	TSD_HASH_TABLE_BITS_DEFAULT	9
-#define	TSD_KEYS_MAX			32768
-#define	DTOR_PID			(PID_MAX_LIMIT+1)
-#define	PID_KEY				(TSD_KEYS_MAX+1)
-
-typedef void (*dtor_func_t)(void *);
-
-extern int tsd_set(uint_t, void *);
-extern void *tsd_get(uint_t);
-extern void *tsd_get_by_thread(uint_t, kthread_t *);
-extern void tsd_create(uint_t *, dtor_func_t);
-extern void tsd_destroy(uint_t *);
-extern void tsd_exit(void);
-
-int spl_tsd_init(void);
-void spl_tsd_fini(void);
-
-#endif /* _SPL_TSD_H */

diff --git a/zfs/include/spl/sys/types.h b/zfs/include/spl/sys/types.h
deleted file mode 100644
index 719a446..0000000
--- a/zfs/include/spl/sys/types.h
+++ /dev/null

@@ -1,58 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_TYPES_H
-#define	_SPL_TYPES_H
-
-#include <linux/types.h>
-
-typedef enum {
-	B_FALSE = 0,
-	B_TRUE = 1
-} boolean_t;
-
-typedef unsigned char		uchar_t;
-typedef unsigned short		ushort_t;
-typedef unsigned int		uint_t;
-typedef unsigned long		ulong_t;
-typedef unsigned long long	u_longlong_t;
-typedef long long		longlong_t;
-
-typedef unsigned long		intptr_t;
-typedef unsigned long long	rlim64_t;
-
-typedef struct task_struct	kthread_t;
-typedef struct task_struct	proc_t;
-
-typedef int			id_t;
-typedef short			pri_t;
-typedef short			index_t;
-typedef longlong_t		offset_t;
-typedef u_longlong_t		u_offset_t;
-typedef ulong_t			pgcnt_t;
-
-typedef int			major_t;
-typedef int			minor_t;
-
-#endif	/* _SPL_TYPES_H */

diff --git a/zfs/include/spl/sys/types32.h b/zfs/include/spl/sys/types32.h
deleted file mode 100644
index c60ba8c..0000000
--- a/zfs/include/spl/sys/types32.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_TYPES32_H
-#define	_SPL_TYPES32_H
-
-#include <sys/types.h>
-
-typedef uint32_t	caddr32_t;
-typedef int32_t		daddr32_t;
-typedef int32_t		time32_t;
-typedef uint32_t	size32_t;
-
-#endif	/* _SPL_TYPES32_H */

diff --git a/zfs/include/spl/sys/uio.h b/zfs/include/spl/sys/uio.h
deleted file mode 100644
index fac2607..0000000
--- a/zfs/include/spl/sys/uio.h
+++ /dev/null

@@ -1,107 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_UIO_H
-#define	_SPL_UIO_H
-
-#include <linux/uio.h>
-#include <linux/blkdev.h>
-#include <asm/uaccess.h>
-#include <sys/types.h>
-
-typedef struct iovec iovec_t;
-
-typedef enum uio_rw {
-	UIO_READ =		0,
-	UIO_WRITE =		1,
-} uio_rw_t;
-
-typedef enum uio_seg {
-	UIO_USERSPACE =		0,
-	UIO_SYSSPACE =		1,
-	UIO_USERISPACE =	2,
-	UIO_BVEC =		3,
-} uio_seg_t;
-
-typedef struct uio {
-	union {
-		const struct iovec	*uio_iov;
-		const struct bio_vec	*uio_bvec;
-	};
-	int		uio_iovcnt;
-	offset_t	uio_loffset;
-	uio_seg_t	uio_segflg;
-	boolean_t	uio_fault_disable;
-	uint16_t	uio_fmode;
-	uint16_t	uio_extflg;
-	offset_t	uio_limit;
-	ssize_t		uio_resid;
-	size_t		uio_skip;
-} uio_t;
-
-typedef struct aio_req {
-	uio_t		*aio_uio;
-	void		*aio_private;
-} aio_req_t;
-
-typedef enum xuio_type {
-	UIOTYPE_ASYNCIO,
-	UIOTYPE_ZEROCOPY,
-} xuio_type_t;
-
-
-#define	UIOA_IOV_MAX    16
-
-typedef struct uioa_page_s {
-	int	uioa_pfncnt;
-	void	**uioa_ppp;
-	caddr_t	uioa_base;
-	size_t	uioa_len;
-} uioa_page_t;
-
-typedef struct xuio {
-	uio_t xu_uio;
-	enum xuio_type xu_type;
-	union {
-		struct {
-			uint32_t xu_a_state;
-			ssize_t xu_a_mbytes;
-			uioa_page_t *xu_a_lcur;
-			void **xu_a_lppp;
-			void *xu_a_hwst[4];
-			uioa_page_t xu_a_locked[UIOA_IOV_MAX];
-		} xu_aio;
-
-		struct {
-			int xu_zc_rw;
-			void *xu_zc_priv;
-		} xu_zc;
-	} xu_ext;
-} xuio_t;
-
-#define	XUIO_XUZC_PRIV(xuio)	xuio->xu_ext.xu_zc.xu_zc_priv
-#define	XUIO_XUZC_RW(xuio)	xuio->xu_ext.xu_zc.xu_zc_rw
-
-#endif /* SPL_UIO_H */

diff --git a/zfs/include/spl/sys/user.h b/zfs/include/spl/sys/user.h
deleted file mode 100644
index b12cb24..0000000
--- a/zfs/include/spl/sys/user.h
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- *  Copyright (C) 2015 Cluster Inc.
- *  Produced at ClusterHQ Inc (cf, DISCLAIMER).
- *  Written by Richard Yao <richard.yao@clusterhq.com>.
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_USER_H
-#define	_SPL_USER_H
-
-/*
- * We have uf_info_t for areleasef(). We implement areleasef() using a global
- * linked list of all open file descriptors with the task structs referenced,
- * so accessing the correct descriptor from areleasef() only requires knowing
- * about the Linux task_struct. Since this is internal to our compatibility
- * layer, we make it an opaque type.
- *
- * XXX: If the descriptor changes under us and we do not do a getf() between
- * the change and using it, we would get an incorrect reference.
- */
-
-struct uf_info;
-typedef struct uf_info uf_info_t;
-
-#define	P_FINFO(x) ((uf_info_t *)x)
-
-#endif /* SPL_USER_H */

diff --git a/zfs/include/spl/sys/vfs.h b/zfs/include/spl/sys/vfs.h
deleted file mode 100644
index 0d5e1d5..0000000
--- a/zfs/include/spl/sys/vfs.h
+++ /dev/null

@@ -1,51 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_ZFS_H
-#define	_SPL_ZFS_H
-
-#include <linux/mount.h>
-#include <linux/fs.h>
-#include <linux/dcache.h>
-#include <linux/statfs.h>
-#include <linux/xattr.h>
-#include <linux/security.h>
-#include <linux/seq_file.h>
-
-#define	MAXFIDSZ	64
-
-typedef struct spl_fid {
-	union {
-		long fid_pad;
-		struct {
-			ushort_t len;		/* length of data in bytes */
-			char data[MAXFIDSZ];	/* data (variable len) */
-		} _fid;
-	} un;
-} fid_t;
-
-#define	fid_len		un._fid.len
-#define	fid_data	un._fid.data
-
-#endif /* SPL_ZFS_H */

diff --git a/zfs/include/spl/sys/vmem.h b/zfs/include/spl/sys/vmem.h
deleted file mode 100644
index a9b12ee..0000000
--- a/zfs/include/spl/sys/vmem.h
+++ /dev/null

@@ -1,109 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_VMEM_H
-#define	_SPL_VMEM_H
-
-#include <sys/kmem.h>
-#include <linux/sched.h>
-#include <linux/vmalloc.h>
-
-typedef struct vmem { } vmem_t;
-
-extern vmem_t *heap_arena;
-extern vmem_t *zio_alloc_arena;
-extern vmem_t *zio_arena;
-
-extern size_t vmem_size(vmem_t *vmp, int typemask);
-
-/*
- * Memory allocation interfaces
- */
-#define	VMEM_ALLOC	0x01
-#define	VMEM_FREE	0x02
-
-#ifndef VMALLOC_TOTAL
-#define	VMALLOC_TOTAL	(VMALLOC_END - VMALLOC_START)
-#endif
-
-/*
- * vmem_* is an interface to a low level arena-based memory allocator on
- * Illumos that is used to allocate virtual address space. The kmem SLAB
- * allocator allocates slabs from it. Then the generic allocation functions
- * kmem_{alloc,zalloc,free}() are layered on top of SLAB allocators.
- *
- * On Linux, the primary means of doing allocations is via kmalloc(), which
- * is similarly layered on top of something called the buddy allocator. The
- * buddy allocator is not available to kernel modules, it uses physical
- * memory addresses rather than virtual memory addresses and is prone to
- * fragmentation.
- *
- * Linux sets aside a relatively small address space for in-kernel virtual
- * memory from which allocations can be done using vmalloc().  It might seem
- * like a good idea to use vmalloc() to implement something similar to
- * Illumos' allocator. However, this has the following problems:
- *
- * 1. Page directory table allocations are hard coded to use GFP_KERNEL.
- *    Consequently, any KM_PUSHPAGE or KM_NOSLEEP allocations done using
- *    vmalloc() will not have proper semantics.
- *
- * 2. Address space exhaustion is a real issue on 32-bit platforms where
- *    only a few 100MB are available. The kernel will handle it by spinning
- *    when it runs out of address space.
- *
- * 3. All vmalloc() allocations and frees are protected by a single global
- *    lock which serializes all allocations.
- *
- * 4. Accessing /proc/meminfo and /proc/vmallocinfo will iterate the entire
- *    list. The former will sum the allocations while the latter will print
- *    them to user space in a way that user space can keep the lock held
- *    indefinitely.  When the total number of mapped allocations is large
- *    (several 100,000) a large amount of time will be spent waiting on locks.
- *
- * 5. Linux has a wait_on_bit() locking primitive that assumes physical
- *    memory is used, it simply does not work on virtual memory.  Certain
- *    Linux structures (e.g. the superblock) use them and might be embedded
- *    into a structure from Illumos.  This makes using Linux virtual memory
- *    unsafe in certain situations.
- *
- * It follows that we cannot obtain identical semantics to those on Illumos.
- * Consequently, we implement the kmem_{alloc,zalloc,free}() functions in
- * such a way that they can be used as drop-in replacements for small vmem_*
- * allocations (8MB in size or smaller) and map vmem_{alloc,zalloc,free}()
- * to them.
- */
-
-#define	vmem_alloc(sz, fl)	spl_vmem_alloc((sz), (fl), __func__, __LINE__)
-#define	vmem_zalloc(sz, fl)	spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
-#define	vmem_free(ptr, sz)	spl_vmem_free((ptr), (sz))
-#define	vmem_qcache_reap(ptr)	((void)0)
-
-extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
-extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
-extern void spl_vmem_free(const void *ptr, size_t sz);
-
-int spl_vmem_init(void);
-void spl_vmem_fini(void);
-
-#endif	/* _SPL_VMEM_H */

diff --git a/zfs/include/spl/sys/vmsystm.h b/zfs/include/spl/sys/vmsystm.h
deleted file mode 100644
index 8783231..0000000
--- a/zfs/include/spl/sys/vmsystm.h
+++ /dev/null

@@ -1,92 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_VMSYSTM_H
-#define	_SPL_VMSYSTM_H
-
-#include <linux/mmzone.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/highmem.h>
-#include <linux/vmalloc.h>
-#include <sys/types.h>
-#include <asm/uaccess.h>
-
-#ifdef HAVE_TOTALRAM_PAGES_FUNC
-#define	zfs_totalram_pages	totalram_pages()
-#else
-#define	zfs_totalram_pages	totalram_pages
-#endif
-
-#ifdef HAVE_TOTALHIGH_PAGES
-#define	zfs_totalhigh_pages	totalhigh_pages()
-#else
-#define	zfs_totalhigh_pages	totalhigh_pages
-#endif
-
-#define	membar_producer()		smp_wmb()
-#define	physmem				zfs_totalram_pages
-
-#define	xcopyin(from, to, size)		copy_from_user(to, from, size)
-#define	xcopyout(from, to, size)	copy_to_user(to, from, size)
-
-static __inline__ int
-copyin(const void *from, void *to, size_t len)
-{
-	/* On error copyin routine returns -1 */
-	if (xcopyin(from, to, len))
-		return (-1);
-
-	return (0);
-}
-
-static __inline__ int
-copyout(const void *from, void *to, size_t len)
-{
-	/* On error copyout routine returns -1 */
-	if (xcopyout(from, to, len))
-		return (-1);
-
-	return (0);
-}
-
-static __inline__ int
-copyinstr(const void *from, void *to, size_t len, size_t *done)
-{
-	size_t rc;
-
-	if (len == 0)
-		return (-ENAMETOOLONG);
-
-	/* XXX: Should return ENAMETOOLONG if 'strlen(from) > len' */
-
-	memset(to, 0, len);
-	rc = copyin(from, to, len - 1);
-	if (done != NULL)
-		*done = rc;
-
-	return (0);
-}
-
-#endif /* SPL_VMSYSTM_H */

diff --git a/zfs/include/spl/sys/vnode.h b/zfs/include/spl/sys/vnode.h
deleted file mode 100644
index 7bd278e..0000000
--- a/zfs/include/spl/sys/vnode.h
+++ /dev/null

@@ -1,202 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_VNODE_H
-#define	_SPL_VNODE_H
-
-#include <linux/module.h>
-#include <linux/syscalls.h>
-#include <linux/fcntl.h>
-#include <linux/buffer_head.h>
-#include <linux/dcache.h>
-#include <linux/namei.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/fs_struct.h>
-#include <linux/mount.h>
-#include <sys/kmem.h>
-#include <sys/mutex.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/uio.h>
-#include <sys/user.h>
-
-/*
- * Prior to linux-2.6.33 only O_DSYNC semantics were implemented and
- * they used the O_SYNC flag.  As of linux-2.6.33 the this behavior
- * was properly split in to O_SYNC and O_DSYNC respectively.
- */
-#ifndef O_DSYNC
-#define	O_DSYNC		O_SYNC
-#endif
-
-#define	FREAD		1
-#define	FWRITE		2
-#define	FCREAT		O_CREAT
-#define	FTRUNC		O_TRUNC
-#define	FOFFMAX		O_LARGEFILE
-#define	FSYNC		O_SYNC
-#define	FDSYNC		O_DSYNC
-#define	FEXCL		O_EXCL
-#define	FDIRECT		O_DIRECT
-#define	FAPPEND		O_APPEND
-
-#define	FNODSYNC	0x10000 /* fsync pseudo flag */
-#define	FNOFOLLOW	0x20000 /* don't follow symlinks */
-
-#define	F_FREESP	11 	/* Free file space */
-
-
-/*
- * The vnode AT_ flags are mapped to the Linux ATTR_* flags.
- * This allows them to be used safely with an iattr structure.
- * The AT_XVATTR flag has been added and mapped to the upper
- * bit range to avoid conflicting with the standard Linux set.
- */
-#undef AT_UID
-#undef AT_GID
-
-#define	AT_MODE		ATTR_MODE
-#define	AT_UID		ATTR_UID
-#define	AT_GID		ATTR_GID
-#define	AT_SIZE		ATTR_SIZE
-#define	AT_ATIME	ATTR_ATIME
-#define	AT_MTIME	ATTR_MTIME
-#define	AT_CTIME	ATTR_CTIME
-
-#define	ATTR_XVATTR	(1U << 31)
-#define	AT_XVATTR	ATTR_XVATTR
-
-#define	ATTR_IATTR_MASK	(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_SIZE | \
-			ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_FILE)
-
-#define	CRCREAT		0x01
-#define	RMFILE		0x02
-
-#define	B_INVAL		0x01
-#define	B_TRUNC		0x02
-
-#define	LOOKUP_DIR		0x01
-#define	LOOKUP_XATTR		0x02
-#define	CREATE_XATTR_DIR	0x04
-#define	ATTR_NOACLCHECK		0x20
-
-typedef enum vtype {
-	VNON		= 0,
-	VREG		= 1,
-	VDIR		= 2,
-	VBLK		= 3,
-	VCHR		= 4,
-	VLNK		= 5,
-	VFIFO		= 6,
-	VDOOR		= 7,
-	VPROC		= 8,
-	VSOCK		= 9,
-	VPORT		= 10,
-	VBAD		= 11
-} vtype_t;
-
-typedef struct vattr {
-	enum vtype	va_type;	/* vnode type */
-	uint32_t	va_mask;	/* attribute bit-mask */
-	ushort_t	va_mode;	/* acc mode */
-	uid_t		va_uid;		/* owner uid */
-	gid_t		va_gid;		/* owner gid */
-	long		va_fsid;	/* fs id */
-	long		va_nodeid;	/* node # */
-	uint32_t	va_nlink;	/* # links */
-	uint64_t	va_size;	/* file size */
-	inode_timespec_t va_atime;	/* last acc */
-	inode_timespec_t va_mtime;	/* last mod */
-	inode_timespec_t va_ctime;	/* last chg */
-	dev_t		va_rdev;	/* dev */
-	uint64_t	va_nblocks;	/* space used */
-	uint32_t	va_blksize;	/* block size */
-	uint32_t	va_seq;		/* sequence */
-	struct dentry	*va_dentry;	/* dentry to wire */
-} vattr_t;
-
-typedef struct vnode {
-	struct file	*v_file;
-	kmutex_t	v_lock;		/* protects vnode fields */
-	uint_t		v_flag;		/* vnode flags (see below) */
-	uint_t		v_count;	/* reference count */
-	void		*v_data;	/* private data for fs */
-	struct vfs	*v_vfsp;	/* ptr to containing VFS */
-	struct stdata	*v_stream;	/* associated stream */
-	enum vtype	v_type;		/* vnode type */
-	dev_t		v_rdev;		/* device (VCHR, VBLK) */
-	gfp_t		v_gfp_mask;	/* original mapping gfp mask */
-} vnode_t;
-
-typedef struct vn_file {
-	int		f_fd;		/* linux fd for lookup */
-	struct task_struct *f_task;	/* linux task this fd belongs to */
-	struct file	*f_file;	/* linux file struct */
-	atomic_t	f_ref;		/* ref count */
-	kmutex_t	f_lock;		/* struct lock */
-	loff_t		f_offset;	/* offset */
-	vnode_t		*f_vnode;	/* vnode */
-	struct list_head f_list;	/* list referenced file_t's */
-} file_t;
-
-extern vnode_t *vn_alloc(int flag);
-void vn_free(vnode_t *vp);
-extern vtype_t vn_mode_to_vtype(mode_t);
-extern mode_t vn_vtype_to_mode(vtype_t);
-extern int vn_open(const char *path, uio_seg_t seg, int flags, int mode,
-    vnode_t **vpp, int x1, void *x2);
-extern int vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
-    vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd);
-extern int vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len,
-    offset_t off, uio_seg_t seg, int x1, rlim64_t x2,
-    void *x3, ssize_t *residp);
-extern int vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4);
-extern int vn_seek(vnode_t *vp, offset_t o, offset_t *op, void *ct);
-
-extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4);
-extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4);
-extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
-    offset_t offset, void *x6, void *x7);
-extern file_t *vn_getf(int fd);
-extern void vn_releasef(int fd);
-extern void vn_areleasef(int fd, uf_info_t *fip);
-
-int spl_vn_init(void);
-void spl_vn_fini(void);
-
-#define	VOP_CLOSE				vn_close
-#define	VOP_SEEK				vn_seek
-#define	VOP_GETATTR				vn_getattr
-#define	VOP_FSYNC				vn_fsync
-#define	VOP_SPACE				vn_space
-#define	VOP_PUTPAGE(vp, o, s, f, x1, x2)	((void)0)
-#define	vn_is_readonly(vp)			0
-#define	getf					vn_getf
-#define	releasef				vn_releasef
-#define	areleasef				vn_areleasef
-
-extern vnode_t *rootdir;
-
-#endif /* SPL_VNODE_H */

diff --git a/zfs/include/spl/sys/wait.h b/zfs/include/spl/sys/wait.h
deleted file mode 100644
index 5311ff8..0000000
--- a/zfs/include/spl/sys/wait.h
+++ /dev/null

@@ -1,55 +0,0 @@
-/*
- *  Copyright (C) 2007-2014 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_WAIT_H
-#define	_SPL_WAIT_H
-
-#include <linux/sched.h>
-#include <linux/wait.h>
-
-#ifndef HAVE_WAIT_ON_BIT_ACTION
-#define	spl_wait_on_bit(word, bit, mode)	wait_on_bit(word, bit, mode)
-#else
-
-static inline int
-spl_bit_wait(void *word)
-{
-	schedule();
-	return (0);
-}
-
-#define	spl_wait_on_bit(word, bit, mode)		\
-	wait_on_bit(word, bit, spl_bit_wait, mode)
-
-#endif /* HAVE_WAIT_ON_BIT_ACTION */
-
-#ifdef HAVE_WAIT_QUEUE_ENTRY_T
-typedef wait_queue_head_t	spl_wait_queue_head_t;
-typedef wait_queue_entry_t	spl_wait_queue_entry_t;
-#else
-typedef wait_queue_head_t	spl_wait_queue_head_t;
-typedef wait_queue_t		spl_wait_queue_entry_t;
-#endif
-
-#endif /* SPL_WAIT_H */

diff --git a/zfs/include/spl/sys/zmod.h b/zfs/include/spl/sys/zmod.h
deleted file mode 100644
index 95c1a3e..0000000
--- a/zfs/include/spl/sys/zmod.h
+++ /dev/null

@@ -1,78 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *
- *  z_compress_level/z_uncompress are nearly identical copies of the
- *  compress2/uncompress functions provided by the official zlib package
- *  available at http://zlib.net/.  The only changes made we to slightly
- *  adapt the functions called to match the linux kernel implementation
- *  of zlib.  The full zlib license follows:
- *
- *  zlib.h -- interface of the 'zlib' general purpose compression library
- *  version 1.2.5, April 19th, 2010
- *
- *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
- *
- *  This software is provided 'as-is', without any express or implied
- *  warranty.  In no event will the authors be held liable for any damages
- *  arising from the use of this software.
- *
- *  Permission is granted to anyone to use this software for any purpose,
- *  including commercial applications, and to alter it and redistribute it
- *  freely, subject to the following restrictions:
- *
- *  1. The origin of this software must not be misrepresented; you must not
- *     claim that you wrote the original software. If you use this software
- *     in a product, an acknowledgment in the product documentation would be
- *     appreciated but is not required.
- *  2. Altered source versions must be plainly marked as such, and must not be
- *     misrepresented as being the original software.
- *  3. This notice may not be removed or altered from any source distribution.
- *
- *  Jean-loup Gailly
- *  Mark Adler
- */
-
-#ifndef _SPL_ZMOD_H
-#define	_SPL_ZMOD_H
-
-#include <sys/types.h>
-#include <linux/zlib.h>
-
-#ifdef HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
-#define	spl_zlib_deflate_workspacesize(wb, ml) \
-	zlib_deflate_workspacesize(wb, ml)
-#else
-#define	spl_zlib_deflate_workspacesize(wb, ml) \
-	zlib_deflate_workspacesize()
-#endif /* HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE */
-
-extern int z_compress_level(void *dest, size_t *destLen, const void *source,
-    size_t sourceLen, int level);
-extern int z_uncompress(void *dest, size_t *destLen, const void *source,
-    size_t sourceLen);
-
-int spl_zlib_init(void);
-void spl_zlib_fini(void);
-
-#endif /* SPL_ZMOD_H */

diff --git a/zfs/include/spl/sys/zone.h b/zfs/include/spl/sys/zone.h
deleted file mode 100644
index b2efd13..0000000
--- a/zfs/include/spl/sys/zone.h
+++ /dev/null

@@ -1,36 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _SPL_ZONE_H
-#define	_SPL_ZONE_H
-
-#include <sys/byteorder.h>
-
-#define	GLOBAL_ZONEID			0
-
-#define	zone_dataset_visible(x, y)	(1)
-#define	crgetzoneid(x)			(GLOBAL_ZONEID)
-#define	INGLOBALZONE(z)			(1)
-
-#endif /* SPL_ZONE_H */

diff --git a/zfs/include/sys/Makefile.am b/zfs/include/sys/Makefile.am
index 31ffdfb..385c82c 100644
--- a/zfs/include/sys/Makefile.am
+++ b/zfs/include/sys/Makefile.am

@@ -1,158 +1,152 @@
-SUBDIRS = fm fs crypto lua sysevent
+SUBDIRS = fm fs crypto lua sysevent zstd
 
 COMMON_H = \
-	$(top_srcdir)/include/sys/abd.h \
-	$(top_srcdir)/include/sys/aggsum.h \
-	$(top_srcdir)/include/sys/arc.h \
-	$(top_srcdir)/include/sys/arc_impl.h \
-	$(top_srcdir)/include/sys/avl.h \
-	$(top_srcdir)/include/sys/avl_impl.h \
-	$(top_srcdir)/include/sys/blkptr.h \
-	$(top_srcdir)/include/sys/bplist.h \
-	$(top_srcdir)/include/sys/bpobj.h \
-	$(top_srcdir)/include/sys/bptree.h \
-	$(top_srcdir)/include/sys/bqueue.h \
-	$(top_srcdir)/include/sys/cityhash.h \
-	$(top_srcdir)/include/sys/spa_checkpoint.h \
-	$(top_srcdir)/include/sys/dataset_kstats.h \
-	$(top_srcdir)/include/sys/dbuf.h \
-	$(top_srcdir)/include/sys/ddt.h \
-	$(top_srcdir)/include/sys/dmu.h \
-	$(top_srcdir)/include/sys/dmu_impl.h \
-	$(top_srcdir)/include/sys/dmu_objset.h \
-	$(top_srcdir)/include/sys/dmu_recv.h \
-	$(top_srcdir)/include/sys/dmu_send.h \
-	$(top_srcdir)/include/sys/dmu_traverse.h \
-	$(top_srcdir)/include/sys/dmu_tx.h \
-	$(top_srcdir)/include/sys/dmu_zfetch.h \
-	$(top_srcdir)/include/sys/dnode.h \
-	$(top_srcdir)/include/sys/dsl_bookmark.h \
-	$(top_srcdir)/include/sys/dsl_dataset.h \
-	$(top_srcdir)/include/sys/dsl_deadlist.h \
-	$(top_srcdir)/include/sys/dsl_deleg.h \
-	$(top_srcdir)/include/sys/dsl_destroy.h \
-	$(top_srcdir)/include/sys/dsl_dir.h \
-	$(top_srcdir)/include/sys/dsl_crypt.h \
-	$(top_srcdir)/include/sys/dsl_pool.h \
-	$(top_srcdir)/include/sys/dsl_prop.h \
-	$(top_srcdir)/include/sys/dsl_scan.h \
-	$(top_srcdir)/include/sys/dsl_synctask.h \
-	$(top_srcdir)/include/sys/dsl_userhold.h \
-	$(top_srcdir)/include/sys/edonr.h \
-	$(top_srcdir)/include/sys/efi_partition.h \
-	$(top_srcdir)/include/sys/frame.h \
-	$(top_srcdir)/include/sys/hkdf.h \
-	$(top_srcdir)/include/sys/metaslab.h \
-	$(top_srcdir)/include/sys/metaslab_impl.h \
-	$(top_srcdir)/include/sys/mmp.h \
-	$(top_srcdir)/include/sys/mntent.h \
-	$(top_srcdir)/include/sys/multilist.h \
-	$(top_srcdir)/include/sys/note.h \
-	$(top_srcdir)/include/sys/nvpair.h \
-	$(top_srcdir)/include/sys/nvpair_impl.h \
-	$(top_srcdir)/include/sys/pathname.h \
-	$(top_srcdir)/include/sys/policy.h \
-	$(top_srcdir)/include/sys/range_tree.h \
-	$(top_srcdir)/include/sys/refcount.h \
-	$(top_srcdir)/include/sys/rrwlock.h \
-	$(top_srcdir)/include/sys/sa.h \
-	$(top_srcdir)/include/sys/sa_impl.h \
-	$(top_srcdir)/include/sys/sdt.h \
-	$(top_srcdir)/include/sys/sha2.h \
-	$(top_srcdir)/include/sys/skein.h \
-	$(top_srcdir)/include/sys/spa_boot.h \
-	$(top_srcdir)/include/sys/space_map.h \
-	$(top_srcdir)/include/sys/space_reftree.h \
-	$(top_srcdir)/include/sys/spa.h \
-	$(top_srcdir)/include/sys/spa_impl.h \
-	$(top_srcdir)/include/sys/spa_checksum.h \
-	$(top_srcdir)/include/sys/sysevent.h \
-	$(top_srcdir)/include/sys/trace.h \
-	$(top_srcdir)/include/sys/trace_acl.h \
-	$(top_srcdir)/include/sys/trace_arc.h \
-	$(top_srcdir)/include/sys/trace_common.h \
-	$(top_srcdir)/include/sys/trace_dbgmsg.h \
-	$(top_srcdir)/include/sys/trace_dbuf.h \
-	$(top_srcdir)/include/sys/trace_dmu.h \
-	$(top_srcdir)/include/sys/trace_dnode.h \
-	$(top_srcdir)/include/sys/trace_multilist.h \
-	$(top_srcdir)/include/sys/trace_txg.h \
-	$(top_srcdir)/include/sys/trace_vdev.h \
-	$(top_srcdir)/include/sys/trace_zil.h \
-	$(top_srcdir)/include/sys/trace_zio.h \
-	$(top_srcdir)/include/sys/trace_zrlock.h \
-	$(top_srcdir)/include/sys/txg.h \
-	$(top_srcdir)/include/sys/txg_impl.h \
-	$(top_srcdir)/include/sys/u8_textprep_data.h \
-	$(top_srcdir)/include/sys/u8_textprep.h \
-	$(top_srcdir)/include/sys/uberblock.h \
-	$(top_srcdir)/include/sys/uberblock_impl.h \
-	$(top_srcdir)/include/sys/uio_impl.h \
-	$(top_srcdir)/include/sys/unique.h \
-	$(top_srcdir)/include/sys/uuid.h \
-	$(top_srcdir)/include/sys/vdev_disk.h \
-	$(top_srcdir)/include/sys/vdev_file.h \
-	$(top_srcdir)/include/sys/vdev.h \
-	$(top_srcdir)/include/sys/vdev_impl.h \
-	$(top_srcdir)/include/sys/vdev_indirect_births.h \
-	$(top_srcdir)/include/sys/vdev_indirect_mapping.h \
-	$(top_srcdir)/include/sys/vdev_initialize.h \
-	$(top_srcdir)/include/sys/vdev_raidz.h \
-	$(top_srcdir)/include/sys/vdev_raidz_impl.h \
-	$(top_srcdir)/include/sys/vdev_removal.h \
-	$(top_srcdir)/include/sys/vdev_trim.h \
-	$(top_srcdir)/include/sys/xvattr.h \
-	$(top_srcdir)/include/sys/zap.h \
-	$(top_srcdir)/include/sys/zap_impl.h \
-	$(top_srcdir)/include/sys/zap_leaf.h \
-	$(top_srcdir)/include/sys/zcp.h \
-	$(top_srcdir)/include/sys/zcp_global.h \
-	$(top_srcdir)/include/sys/zcp_iter.h \
-	$(top_srcdir)/include/sys/zcp_prop.h \
-	$(top_srcdir)/include/sys/zfeature.h \
-	$(top_srcdir)/include/sys/zfs_acl.h \
-	$(top_srcdir)/include/sys/zfs_context.h \
-	$(top_srcdir)/include/sys/zfs_ctldir.h \
-	$(top_srcdir)/include/sys/zfs_debug.h \
-	$(top_srcdir)/include/sys/zfs_delay.h \
-	$(top_srcdir)/include/sys/zfs_dir.h \
-	$(top_srcdir)/include/sys/zfs_fuid.h \
-	$(top_srcdir)/include/sys/zfs_project.h \
-	$(top_srcdir)/include/sys/zfs_ratelimit.h \
-	$(top_srcdir)/include/sys/zfs_rlock.h \
-	$(top_srcdir)/include/sys/zfs_sa.h \
-	$(top_srcdir)/include/sys/zfs_stat.h \
-	$(top_srcdir)/include/sys/zfs_sysfs.h \
-	$(top_srcdir)/include/sys/zfs_vfsops.h \
-	$(top_srcdir)/include/sys/zfs_vnops.h \
-	$(top_srcdir)/include/sys/zfs_znode.h \
-	$(top_srcdir)/include/sys/zil.h \
-	$(top_srcdir)/include/sys/zil_impl.h \
-	$(top_srcdir)/include/sys/zio_checksum.h \
-	$(top_srcdir)/include/sys/zio_compress.h \
-	$(top_srcdir)/include/sys/zio_crypt.h \
-	$(top_srcdir)/include/sys/zio.h \
-	$(top_srcdir)/include/sys/zio_impl.h \
-	$(top_srcdir)/include/sys/zio_priority.h \
-	$(top_srcdir)/include/sys/zrlock.h \
-	$(top_srcdir)/include/sys/zthr.h
+	abd.h \
+	abd_impl.h \
+	aggsum.h \
+	arc.h \
+	arc_impl.h \
+	avl.h \
+	avl_impl.h \
+	bitops.h \
+	blkptr.h \
+	bplist.h \
+	bpobj.h \
+	bptree.h \
+	btree.h \
+	bqueue.h \
+	dataset_kstats.h \
+	dbuf.h \
+	ddt.h \
+	dmu.h \
+	dmu_impl.h \
+	dmu_objset.h \
+	dmu_recv.h \
+	dmu_redact.h \
+	dmu_send.h \
+	dmu_traverse.h \
+	dmu_tx.h \
+	dmu_zfetch.h \
+	dnode.h \
+	dsl_bookmark.h \
+	dsl_dataset.h \
+	dsl_deadlist.h \
+	dsl_deleg.h \
+	dsl_destroy.h \
+	dsl_dir.h \
+	dsl_crypt.h \
+	dsl_pool.h \
+	dsl_prop.h \
+	dsl_scan.h \
+	dsl_synctask.h \
+	dsl_userhold.h \
+	edonr.h \
+	efi_partition.h \
+	frame.h \
+	hkdf.h \
+	metaslab.h \
+	metaslab_impl.h \
+	mmp.h \
+	mntent.h \
+	mod.h \
+	multilist.h \
+	note.h \
+	nvpair.h \
+	nvpair_impl.h \
+	objlist.h \
+	pathname.h \
+	qat.h \
+	range_tree.h \
+	rrwlock.h \
+	sa.h \
+	sa_impl.h \
+	skein.h \
+	spa_boot.h \
+	spa_checkpoint.h \
+	spa_log_spacemap.h \
+	space_map.h \
+	space_reftree.h \
+	spa.h \
+	spa_impl.h \
+	spa_checksum.h \
+	sysevent.h \
+	txg.h \
+	txg_impl.h \
+	u8_textprep_data.h \
+	u8_textprep.h \
+	uberblock.h \
+	uberblock_impl.h \
+	uio_impl.h \
+	unique.h \
+	uuid.h \
+	vdev_disk.h \
+	vdev_file.h \
+	vdev.h \
+	vdev_draid.h \
+	vdev_impl.h \
+	vdev_indirect_births.h \
+	vdev_indirect_mapping.h \
+	vdev_initialize.h \
+	vdev_raidz.h \
+	vdev_raidz_impl.h \
+	vdev_rebuild.h \
+	vdev_removal.h \
+	vdev_trim.h \
+	xvattr.h \
+	zap.h \
+	zap_impl.h \
+	zap_leaf.h \
+	zcp.h \
+	zcp_global.h \
+	zcp_iter.h \
+	zcp_prop.h \
+	zcp_set.h \
+	zfeature.h \
+	zfs_acl.h \
+	zfs_bootenv.h \
+	zfs_context.h \
+	zfs_debug.h \
+	zfs_delay.h \
+	zfs_file.h \
+	zfs_fuid.h \
+	zfs_project.h \
+	zfs_quota.h \
+	zfs_racct.h \
+	zfs_ratelimit.h \
+	zfs_refcount.h \
+	zfs_rlock.h \
+	zfs_sa.h \
+	zfs_stat.h \
+	zfs_sysfs.h \
+	zfs_vfsops.h \
+	zfs_vnops.h \
+	zfs_znode.h \
+	zil.h \
+	zil_impl.h \
+	zio_checksum.h \
+	zio_compress.h \
+	zio_crypt.h \
+	zio.h \
+	zio_impl.h \
+	zio_priority.h \
+	zrlock.h \
+	zthr.h
 
 KERNEL_H = \
-	$(top_srcdir)/include/sys/zfs_ioctl.h \
-	$(top_srcdir)/include/sys/zfs_onexit.h \
-	${top_srcdir}/include/sys/zpl.h \
-	$(top_srcdir)/include/sys/zvol.h
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	zfs_ioctl.h \
+	zfs_ioctl_impl.h \
+	zfs_onexit.h \
+	zvol.h \
+	zvol_impl.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys
 kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
 endif
+endif

diff --git a/zfs/include/sys/abd.h b/zfs/include/sys/abd.h
index b781be4..5c6bd0c 100644
--- a/zfs/include/sys/abd.h
+++ b/zfs/include/sys/abd.h

@@ -28,75 +28,78 @@
 
 #include <sys/isa_defs.h>
 #include <sys/debug.h>
-#include <sys/refcount.h>
-#ifdef _KERNEL
-#include <linux/mm.h>
-#include <linux/bio.h>
+#include <sys/zfs_refcount.h>
 #include <sys/uio.h>
-#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 typedef enum abd_flags {
-	ABD_FLAG_LINEAR	= 1 << 0,	/* is buffer linear (or scattered)? */
-	ABD_FLAG_OWNER	= 1 << 1,	/* does it own its data buffers? */
-	ABD_FLAG_META	= 1 << 2,	/* does this represent FS metadata? */
-	ABD_FLAG_MULTI_ZONE  = 1 << 3,	/* pages split over memory zones */
-	ABD_FLAG_MULTI_CHUNK = 1 << 4,	/* pages split over multiple chunks */
-	ABD_FLAG_LINEAR_PAGE = 1 << 5,	/* linear but allocd from page */
+	ABD_FLAG_LINEAR		= 1 << 0, /* is buffer linear (or scattered)? */
+	ABD_FLAG_OWNER		= 1 << 1, /* does it own its data buffers? */
+	ABD_FLAG_META		= 1 << 2, /* does this represent FS metadata? */
+	ABD_FLAG_MULTI_ZONE  	= 1 << 3, /* pages split over memory zones */
+	ABD_FLAG_MULTI_CHUNK 	= 1 << 4, /* pages split over multiple chunks */
+	ABD_FLAG_LINEAR_PAGE 	= 1 << 5, /* linear but allocd from page */
+	ABD_FLAG_GANG		= 1 << 6, /* mult ABDs chained together */
+	ABD_FLAG_GANG_FREE	= 1 << 7, /* gang ABD is responsible for mem */
+	ABD_FLAG_ZEROS		= 1 << 8, /* ABD for zero-filled buffer */
+	ABD_FLAG_ALLOCD		= 1 << 9, /* we allocated the abd_t */
 } abd_flags_t;
 
 typedef struct abd {
 	abd_flags_t	abd_flags;
 	uint_t		abd_size;	/* excludes scattered abd_offset */
+	list_node_t	abd_gang_link;
+#ifdef ZFS_DEBUG
 	struct abd	*abd_parent;
 	zfs_refcount_t	abd_children;
+#endif
+	kmutex_t	abd_mtx;
 	union {
 		struct abd_scatter {
 			uint_t		abd_offset;
+#if defined(__FreeBSD__) && defined(_KERNEL)
+			void    *abd_chunks[1]; /* actually variable-length */
+#else
 			uint_t		abd_nents;
 			struct scatterlist *abd_sgl;
+#endif
 		} abd_scatter;
 		struct abd_linear {
 			void		*abd_buf;
 			struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
 		} abd_linear;
+		struct abd_gang {
+			list_t abd_gang_chain;
+		} abd_gang;
 	} abd_u;
 } abd_t;
 
-typedef int abd_iter_func_t(void *buf, size_t len, void *private);
-typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private);
+typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
+typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
 
 extern int zfs_abd_scatter_enabled;
 
-static inline boolean_t
-abd_is_linear(abd_t *abd)
-{
-	return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE);
-}
-
-static inline boolean_t
-abd_is_linear_page(abd_t *abd)
-{
-	return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ?
-	    B_TRUE : B_FALSE);
-}
-
 /*
  * Allocations and deallocations
  */
 
 abd_t *abd_alloc(size_t, boolean_t);
 abd_t *abd_alloc_linear(size_t, boolean_t);
+abd_t *abd_alloc_gang(void);
 abd_t *abd_alloc_for_io(size_t, boolean_t);
 abd_t *abd_alloc_sametype(abd_t *, size_t);
+boolean_t abd_size_alloc_linear(size_t);
+void abd_gang_add(abd_t *, abd_t *, boolean_t);
 void abd_free(abd_t *);
 abd_t *abd_get_offset(abd_t *, size_t);
 abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
+abd_t *abd_get_offset_struct(abd_t *, abd_t *, size_t, size_t);
+abd_t *abd_get_zeros(size_t);
 abd_t *abd_get_from_buf(void *, size_t);
-void abd_put(abd_t *);
+void abd_cache_reap_now(void);
 
 /*
  * Conversion to and from a normal buffer
@@ -123,12 +126,7 @@
 int abd_cmp(abd_t *, abd_t *);
 int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
 void abd_zero_off(abd_t *, size_t, size_t);
-
-#if defined(_KERNEL)
-unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
-		size_t);
-unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
-#endif
+void abd_verify(abd_t *);
 
 void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
 	ssize_t csize, ssize_t dsize, const unsigned parity,
@@ -174,12 +172,48 @@
 }
 
 /*
+ * ABD type check functions
+ */
+static inline boolean_t
+abd_is_linear(abd_t *abd)
+{
+	return ((abd->abd_flags & ABD_FLAG_LINEAR) ? B_TRUE : B_FALSE);
+}
+
+static inline boolean_t
+abd_is_linear_page(abd_t *abd)
+{
+	return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) ? B_TRUE : B_FALSE);
+}
+
+static inline boolean_t
+abd_is_gang(abd_t *abd)
+{
+	return ((abd->abd_flags & ABD_FLAG_GANG) ? B_TRUE : B_FALSE);
+}
+
+static inline uint_t
+abd_get_size(abd_t *abd)
+{
+	return (abd->abd_size);
+}
+
+/*
  * Module lifecycle
+ * Defined in each specific OS's abd_os.c
  */
 
 void abd_init(void);
 void abd_fini(void);
 
+/*
+ * Linux ABD bio functions
+ */
+#if defined(__linux__) && defined(_KERNEL)
+unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
+unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/zfs/include/sys/abd_impl.h b/zfs/include/sys/abd_impl.h
new file mode 100644
index 0000000..e96f1ed
--- /dev/null
+++ b/zfs/include/sys/abd_impl.h

@@ -0,0 +1,111 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
+ */
+
+#ifndef _ABD_IMPL_H
+#define	_ABD_IMPL_H
+
+#include <sys/abd.h>
+#include <sys/wmsum.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum abd_stats_op {
+	ABDSTAT_INCR, /* Increase abdstat values */
+	ABDSTAT_DECR  /* Decrease abdstat values */
+} abd_stats_op_t;
+
+struct scatterlist; /* forward declaration */
+
+struct abd_iter {
+	/* public interface */
+	void		*iter_mapaddr;	/* addr corresponding to iter_pos */
+	size_t		iter_mapsize;	/* length of data valid at mapaddr */
+
+	/* private */
+	abd_t		*iter_abd;	/* ABD being iterated through */
+	size_t		iter_pos;
+	size_t		iter_offset;	/* offset in current sg/abd_buf, */
+					/* abd_offset included */
+	struct scatterlist *iter_sg;	/* current sg */
+};
+
+extern abd_t *abd_zero_scatter;
+
+abd_t *abd_gang_get_offset(abd_t *, size_t *);
+abd_t *abd_alloc_struct(size_t);
+void abd_free_struct(abd_t *);
+
+/*
+ * OS specific functions
+ */
+
+abd_t *abd_alloc_struct_impl(size_t);
+abd_t *abd_get_offset_scatter(abd_t *, abd_t *, size_t, size_t);
+void abd_free_struct_impl(abd_t *);
+void abd_alloc_chunks(abd_t *, size_t);
+void abd_free_chunks(abd_t *);
+void abd_update_scatter_stats(abd_t *, abd_stats_op_t);
+void abd_update_linear_stats(abd_t *, abd_stats_op_t);
+void abd_verify_scatter(abd_t *);
+void abd_free_linear_page(abd_t *);
+/* OS specific abd_iter functions */
+void abd_iter_init(struct abd_iter  *, abd_t *);
+boolean_t abd_iter_at_end(struct abd_iter *);
+void abd_iter_advance(struct abd_iter *, size_t);
+void abd_iter_map(struct abd_iter *);
+void abd_iter_unmap(struct abd_iter *);
+
+/*
+ * Helper macros
+ */
+#define	ABDSTAT_INCR(stat, val) \
+	wmsum_add(&abd_sums.stat, (val))
+#define	ABDSTAT_BUMP(stat)	ABDSTAT_INCR(stat, 1)
+#define	ABDSTAT_BUMPDOWN(stat)	ABDSTAT_INCR(stat, -1)
+
+#define	ABD_SCATTER(abd)	(abd->abd_u.abd_scatter)
+#define	ABD_LINEAR_BUF(abd)	(abd->abd_u.abd_linear.abd_buf)
+#define	ABD_GANG(abd)		(abd->abd_u.abd_gang)
+
+#if defined(_KERNEL)
+#if defined(__FreeBSD__)
+#define	abd_enter_critical(flags)	critical_enter()
+#define	abd_exit_critical(flags)	critical_exit()
+#else
+#define	abd_enter_critical(flags)	local_irq_save(flags)
+#define	abd_exit_critical(flags)	local_irq_restore(flags)
+#endif
+#else /* !_KERNEL */
+#define	abd_enter_critical(flags)	((void)0)
+#define	abd_exit_critical(flags)	((void)0)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _ABD_IMPL_H */

diff --git a/zfs/include/sys/aggsum.h b/zfs/include/sys/aggsum.h
index caa08d7..6580005 100644
--- a/zfs/include/sys/aggsum.h
+++ b/zfs/include/sys/aggsum.h

@@ -39,15 +39,16 @@
 typedef struct aggsum {
 	kmutex_t as_lock;
 	int64_t as_lower_bound;
-	int64_t as_upper_bound;
-	uint64_t as_numbuckets;
-	aggsum_bucket_t *as_buckets;
+	uint64_t as_upper_bound;
+	aggsum_bucket_t *as_buckets ____cacheline_aligned;
+	uint_t as_numbuckets;
+	uint_t as_bucketshift;
 } aggsum_t;
 
 void aggsum_init(aggsum_t *, uint64_t);
 void aggsum_fini(aggsum_t *);
 int64_t aggsum_lower_bound(aggsum_t *);
-int64_t aggsum_upper_bound(aggsum_t *);
+uint64_t aggsum_upper_bound(aggsum_t *);
 int aggsum_compare(aggsum_t *, uint64_t);
 uint64_t aggsum_value(aggsum_t *);
 void aggsum_add(aggsum_t *, int64_t);

diff --git a/zfs/include/sys/arc.h b/zfs/include/sys/arc.h
index d7bb44b..5d81768 100644
--- a/zfs/include/sys/arc.h
+++ b/zfs/include/sys/arc.h

@@ -22,6 +22,8 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2019, Klara Inc.
  */
 
 #ifndef	_SYS_ARC_H
@@ -36,13 +38,20 @@
 #include <sys/zio.h>
 #include <sys/dmu.h>
 #include <sys/spa.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 
 /*
  * Used by arc_flush() to inform arc_evict_state() that it should evict
  * all available buffers from the arc state being passed in.
  */
-#define	ARC_EVICT_ALL	-1ULL
+#define	ARC_EVICT_ALL	UINT64_MAX
+
+/*
+ * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum
+ * block size and a larger block is written.  To leave some safety margin, we
+ * limit the minimum for zfs_arc_max to the maximium transaction size.
+ */
+#define	MIN_ARC_MAX	DMU_MAX_ACCESS
 
 #define	HDR_SET_LSIZE(hdr, x) do { \
 	ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
@@ -70,12 +79,13 @@
  * parameter will be NULL.
  */
 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
-    const blkptr_t *bp, arc_buf_t *buf, void *private);
-typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
-typedef void arc_prune_func_t(int64_t bytes, void *private);
+    const blkptr_t *bp, arc_buf_t *buf, void *priv);
+typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
+typedef void arc_prune_func_t(int64_t bytes, void *priv);
 
 /* Shared module parameters */
 extern int zfs_arc_average_blocksize;
+extern int l2arc_exclude_special;
 
 /* generic arc_done_func_t's which you can use */
 arc_read_done_func_t arc_bcopy_func;
@@ -147,6 +157,17 @@
 	ARC_FLAG_SHARED_DATA		= 1 << 21,
 
 	/*
+	 * Fail this arc_read() (with ENOENT) if the data is not already present
+	 * in cache.
+	 */
+	ARC_FLAG_CACHED_ONLY		= 1 << 22,
+
+	/*
+	 * Don't instantiate an arc_buf_t for arc_read_done.
+	 */
+	ARC_FLAG_NO_BUF			= 1 << 23,
+
+	/*
 	 * The arc buffer's compression mode is stored in the top 7 bits of the
 	 * flags field, so these dummy flags are included so that MDB can
 	 * interpret the enum properly.
@@ -197,6 +218,7 @@
 	ARC_SPACE_DBUF,
 	ARC_SPACE_DNODE,
 	ARC_SPACE_BONUS,
+	ARC_SPACE_ABD_CHUNK_WASTE,
 	ARC_SPACE_NUMTYPES
 } arc_space_type_t;
 
@@ -245,18 +267,20 @@
 arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
     int32_t size);
 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
-    uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
+    uint64_t psize, uint64_t lsize, enum zio_compress compression_type,
+    uint8_t complevel);
 arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
     boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
     const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type);
+    enum zio_compress compression_type, uint8_t complevel);
+uint8_t arc_get_complevel(arc_buf_t *buf);
 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type);
+    enum zio_compress compression_type, uint8_t complevel);
 arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
     const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
     dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type);
+    enum zio_compress compression_type, uint8_t complevel);
 void arc_return_buf(arc_buf_t *buf, void *tag);
 void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
 void arc_buf_destroy(arc_buf_t *buf, void *tag);
@@ -274,16 +298,16 @@
 #endif
 
 int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
-    arc_read_done_func_t *done, void *private, zio_priority_t priority,
+    arc_read_done_func_t *done, void *priv, zio_priority_t priority,
     int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
     blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
     arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
     arc_write_done_func_t *physdone, arc_write_done_func_t *done,
-    void *private, zio_priority_t priority, int zio_flags,
+    void *priv, zio_priority_t priority, int zio_flags,
     const zbookmark_phys_t *zb);
 
-arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *private);
+arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv);
 void arc_remove_prune_callback(arc_prune_t *p);
 void arc_freed(spa_t *spa, const blkptr_t *bp);
 
@@ -291,7 +315,10 @@
 void arc_tempreserve_clear(uint64_t reserve);
 int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
 
+uint64_t arc_all_memory(void);
+uint64_t arc_default_max(uint64_t min, uint64_t allmem);
 uint64_t arc_target_bytes(void);
+void arc_set_limits(uint64_t);
 void arc_init(void);
 void arc_fini(void);
 
@@ -302,10 +329,14 @@
 void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
 void l2arc_remove_vdev(vdev_t *vd);
 boolean_t l2arc_vdev_present(vdev_t *vd);
+void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
+boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top,
+    uint64_t check);
 void l2arc_init(void);
 void l2arc_fini(void);
 void l2arc_start(void);
 void l2arc_stop(void);
+void l2arc_spa_rebuild_start(spa_t *spa);
 
 #ifndef _KERNEL
 extern boolean_t arc_watch;

diff --git a/zfs/include/sys/arc_impl.h b/zfs/include/sys/arc_impl.h
index c8f551d..db6238f 100644
--- a/zfs/include/sys/arc_impl.h
+++ b/zfs/include/sys/arc_impl.h

@@ -20,16 +20,21 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
- * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2013, Delphix. All rights reserved.
+ * Copyright (c) 2013, Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013, Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2020, George Amanakis. All rights reserved.
  */
 
 #ifndef _SYS_ARC_IMPL_H
 #define	_SYS_ARC_IMPL_H
 
 #include <sys/arc.h>
+#include <sys/multilist.h>
 #include <sys/zio_crypt.h>
+#include <sys/zthr.h>
+#include <sys/aggsum.h>
+#include <sys/wmsum.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -71,20 +76,20 @@
 	/*
 	 * list of evictable buffers
 	 */
-	multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
+	multilist_t arcs_list[ARC_BUFC_NUMTYPES];
+	/*
+	 * supports the "dbufs" kstat
+	 */
+	arc_state_type_t arcs_state;
 	/*
 	 * total amount of evictable data in this state
 	 */
-	zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
+	zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
 	/*
 	 * total amount of data in this state; this includes: evictable,
 	 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
 	 */
 	zfs_refcount_t arcs_size;
-	/*
-	 * supports the "dbufs" kstat
-	 */
-	arc_state_type_t arcs_state;
 } arc_state_t;
 
 typedef struct arc_callback arc_callback_t;
@@ -96,6 +101,7 @@
 	boolean_t		acb_encrypted;
 	boolean_t		acb_compressed;
 	boolean_t		acb_noauth;
+	boolean_t		acb_nobuf;
 	zbookmark_phys_t	acb_zb;
 	zio_t			*acb_zio_dummy;
 	zio_t			*acb_zio_head;
@@ -148,24 +154,22 @@
 	kmutex_t		b_freeze_lock;
 	zio_cksum_t		*b_freeze_cksum;
 
-	arc_buf_t		*b_buf;
-	uint32_t		b_bufcnt;
-	/* for waiting on writes to complete */
+	/* for waiting on reads to complete */
 	kcondvar_t		b_cv;
 	uint8_t			b_byteswap;
 
-
 	/* protected by arc state mutex */
 	arc_state_t		*b_state;
 	multilist_node_t	b_arc_node;
 
-	/* updated atomically */
+	/* protected by hash lock */
 	clock_t			b_arc_access;
 	uint32_t		b_mru_hits;
 	uint32_t		b_mru_ghost_hits;
 	uint32_t		b_mfu_hits;
 	uint32_t		b_mfu_ghost_hits;
-	uint32_t		b_l2_hits;
+	uint32_t		b_bufcnt;
+	arc_buf_t		*b_buf;
 
 	/* self protecting */
 	zfs_refcount_t		b_refcnt;
@@ -174,6 +178,241 @@
 	abd_t			*b_pabd;
 } l1arc_buf_hdr_t;
 
+typedef enum l2arc_dev_hdr_flags_t {
+	L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0)	/* mirror of l2ad_first */
+} l2arc_dev_hdr_flags_t;
+
+/*
+ * Pointer used in persistent L2ARC (for pointing to log blocks).
+ */
+typedef struct l2arc_log_blkptr {
+	/*
+	 * Offset of log block within the device, in bytes
+	 */
+	uint64_t	lbp_daddr;
+	/*
+	 * Aligned payload size (in bytes) of the log block
+	 */
+	uint64_t	lbp_payload_asize;
+	/*
+	 * Offset in bytes of the first buffer in the payload
+	 */
+	uint64_t	lbp_payload_start;
+	/*
+	 * lbp_prop has the following format:
+	 *	* logical size (in bytes)
+	 *	* aligned (after compression) size (in bytes)
+	 *	* compression algorithm (we always LZ4-compress l2arc logs)
+	 *	* checksum algorithm (used for lbp_cksum)
+	 */
+	uint64_t	lbp_prop;
+	zio_cksum_t	lbp_cksum;	/* checksum of log */
+} l2arc_log_blkptr_t;
+
+/*
+ * The persistent L2ARC device header.
+ * Byte order of magic determines whether 64-bit bswap of fields is necessary.
+ */
+typedef struct l2arc_dev_hdr_phys {
+	uint64_t	dh_magic;	/* L2ARC_DEV_HDR_MAGIC */
+	uint64_t	dh_version;	/* Persistent L2ARC version */
+
+	/*
+	 * Global L2ARC device state and metadata.
+	 */
+	uint64_t	dh_spa_guid;
+	uint64_t	dh_vdev_guid;
+	uint64_t	dh_log_entries;		/* mirror of l2ad_log_entries */
+	uint64_t	dh_evict;		/* evicted offset in bytes */
+	uint64_t	dh_flags;		/* l2arc_dev_hdr_flags_t */
+	/*
+	 * Used in zdb.c for determining if a log block is valid, in the same
+	 * way that l2arc_rebuild() does.
+	 */
+	uint64_t	dh_start;		/* mirror of l2ad_start */
+	uint64_t	dh_end;			/* mirror of l2ad_end */
+	/*
+	 * Start of log block chain. [0] -> newest log, [1] -> one older (used
+	 * for initiating prefetch).
+	 */
+	l2arc_log_blkptr_t	dh_start_lbps[2];
+	/*
+	 * Aligned size of all log blocks as accounted by vdev_space_update().
+	 */
+	uint64_t	dh_lb_asize;		/* mirror of l2ad_lb_asize */
+	uint64_t	dh_lb_count;		/* mirror of l2ad_lb_count */
+	/*
+	 * Mirrors of vdev_trim_action_time and vdev_trim_state, used to
+	 * display when the cache device was fully trimmed for the last
+	 * time.
+	 */
+	uint64_t		dh_trim_action_time;
+	uint64_t		dh_trim_state;
+	const uint64_t		dh_pad[30];	/* pad to 512 bytes */
+	zio_eck_t		dh_tail;
+} l2arc_dev_hdr_phys_t;
+CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
+
+/*
+ * A single ARC buffer header entry in a l2arc_log_blk_phys_t.
+ */
+typedef struct l2arc_log_ent_phys {
+	dva_t			le_dva;		/* dva of buffer */
+	uint64_t		le_birth;	/* birth txg of buffer */
+	/*
+	 * le_prop has the following format:
+	 *	* logical size (in bytes)
+	 *	* physical (compressed) size (in bytes)
+	 *	* compression algorithm
+	 *	* object type (used to restore arc_buf_contents_t)
+	 *	* protected status (used for encryption)
+	 *	* prefetch status (used in l2arc_read_done())
+	 */
+	uint64_t		le_prop;
+	uint64_t		le_daddr;	/* buf location on l2dev */
+	uint64_t		le_complevel;
+	/*
+	 * We pad the size of each entry to a power of 2 so that the size of
+	 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
+	 * because of the L2ARC_SET_*SIZE macros.
+	 */
+	const uint64_t		le_pad[2];	/* pad to 64 bytes	 */
+} l2arc_log_ent_phys_t;
+
+#define	L2ARC_LOG_BLK_MAX_ENTRIES	(1022)
+
+/*
+ * A log block of up to 1022 ARC buffer log entries, chained into the
+ * persistent L2ARC metadata linked list. Byte order of magic determines
+ * whether 64-bit bswap of fields is necessary.
+ */
+typedef struct l2arc_log_blk_phys {
+	uint64_t		lb_magic;	/* L2ARC_LOG_BLK_MAGIC */
+	/*
+	 * There are 2 chains (headed by dh_start_lbps[2]), and this field
+	 * points back to the previous block in this chain. We alternate
+	 * which chain we append to, so they are time-wise and offset-wise
+	 * interleaved, but that is an optimization rather than for
+	 * correctness.
+	 */
+	l2arc_log_blkptr_t	lb_prev_lbp;	/* pointer to prev log block */
+	/*
+	 * Pad header section to 128 bytes
+	 */
+	uint64_t		lb_pad[7];
+	/* Payload */
+	l2arc_log_ent_phys_t	lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES];
+} l2arc_log_blk_phys_t;				/* 64K total */
+
+/*
+ * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with
+ * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros.
+ */
+CTASSERT_GLOBAL(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t),
+    1ULL << SPA_MINBLOCKSHIFT));
+CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE);
+CTASSERT_GLOBAL(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE);
+
+/*
+ * These structures hold in-flight abd buffers for log blocks as they're being
+ * written to the L2ARC device.
+ */
+typedef struct l2arc_lb_abd_buf {
+	abd_t		*abd;
+	list_node_t	node;
+} l2arc_lb_abd_buf_t;
+
+/*
+ * These structures hold pointers to log blocks present on the L2ARC device.
+ */
+typedef struct l2arc_lb_ptr_buf {
+	l2arc_log_blkptr_t	*lb_ptr;
+	list_node_t		node;
+} l2arc_lb_ptr_buf_t;
+
+/* Macros for setting fields in le_prop and lbp_prop */
+#define	L2BLK_GET_LSIZE(field)	\
+	BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
+#define	L2BLK_SET_LSIZE(field, x)	\
+	BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
+#define	L2BLK_GET_PSIZE(field)	\
+	BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
+#define	L2BLK_SET_PSIZE(field, x)	\
+	BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
+#define	L2BLK_GET_COMPRESS(field)	\
+	BF64_GET((field), 32, SPA_COMPRESSBITS)
+#define	L2BLK_SET_COMPRESS(field, x)	\
+	BF64_SET((field), 32, SPA_COMPRESSBITS, x)
+#define	L2BLK_GET_PREFETCH(field)	BF64_GET((field), 39, 1)
+#define	L2BLK_SET_PREFETCH(field, x)	BF64_SET((field), 39, 1, x)
+#define	L2BLK_GET_CHECKSUM(field)	BF64_GET((field), 40, 8)
+#define	L2BLK_SET_CHECKSUM(field, x)	BF64_SET((field), 40, 8, x)
+#define	L2BLK_GET_TYPE(field)		BF64_GET((field), 48, 8)
+#define	L2BLK_SET_TYPE(field, x)	BF64_SET((field), 48, 8, x)
+#define	L2BLK_GET_PROTECTED(field)	BF64_GET((field), 56, 1)
+#define	L2BLK_SET_PROTECTED(field, x)	BF64_SET((field), 56, 1, x)
+#define	L2BLK_GET_STATE(field)		BF64_GET((field), 57, 4)
+#define	L2BLK_SET_STATE(field, x)	BF64_SET((field), 57, 4, x)
+
+#define	PTR_SWAP(x, y)		\
+	do {			\
+		void *tmp = (x);\
+		x = y;		\
+		y = tmp;	\
+		_NOTE(CONSTCOND)\
+	} while (0)
+
+#define	L2ARC_DEV_HDR_MAGIC	0x5a46534341434845LLU	/* ASCII: "ZFSCACHE" */
+#define	L2ARC_LOG_BLK_MAGIC	0x4c4f47424c4b4844LLU	/* ASCII: "LOGBLKHD" */
+
+/*
+ * L2ARC Internals
+ */
+typedef struct l2arc_dev {
+	vdev_t			*l2ad_vdev;	/* vdev */
+	spa_t			*l2ad_spa;	/* spa */
+	uint64_t		l2ad_hand;	/* next write location */
+	uint64_t		l2ad_start;	/* first addr on device */
+	uint64_t		l2ad_end;	/* last addr on device */
+	boolean_t		l2ad_first;	/* first sweep through */
+	boolean_t		l2ad_writing;	/* currently writing */
+	kmutex_t		l2ad_mtx;	/* lock for buffer list */
+	list_t			l2ad_buflist;	/* buffer list */
+	list_node_t		l2ad_node;	/* device list node */
+	zfs_refcount_t		l2ad_alloc;	/* allocated bytes */
+	/*
+	 * Persistence-related stuff
+	 */
+	l2arc_dev_hdr_phys_t	*l2ad_dev_hdr;	/* persistent device header */
+	uint64_t		l2ad_dev_hdr_asize; /* aligned hdr size */
+	l2arc_log_blk_phys_t	l2ad_log_blk;	/* currently open log block */
+	int			l2ad_log_ent_idx; /* index into cur log blk */
+	/* Number of bytes in current log block's payload */
+	uint64_t		l2ad_log_blk_payload_asize;
+	/*
+	 * Offset (in bytes) of the first buffer in current log block's
+	 * payload.
+	 */
+	uint64_t		l2ad_log_blk_payload_start;
+	/* Flag indicating whether a rebuild is scheduled or is going on */
+	boolean_t		l2ad_rebuild;
+	boolean_t		l2ad_rebuild_cancel;
+	boolean_t		l2ad_rebuild_began;
+	uint64_t		l2ad_log_entries;   /* entries per log blk  */
+	uint64_t		l2ad_evict;	 /* evicted offset in bytes */
+	/* List of pointers to log blocks present in the L2ARC device */
+	list_t			l2ad_lbptr_list;
+	/*
+	 * Aligned size of all log blocks as accounted by vdev_space_update().
+	 */
+	zfs_refcount_t		l2ad_lb_asize;
+	/*
+	 * Number of log blocks present on the device.
+	 */
+	zfs_refcount_t		l2ad_lb_count;
+	boolean_t		l2ad_trim_all; /* TRIM whole device */
+} l2arc_dev_t;
+
 /*
  * Encrypted blocks will need to be stored encrypted on the L2ARC
  * disk as they appear in the main pool. In order for this to work we
@@ -204,32 +443,20 @@
 	uint8_t			b_mac[ZIO_DATA_MAC_LEN];
 } arc_buf_hdr_crypt_t;
 
-typedef struct l2arc_dev {
-	vdev_t			*l2ad_vdev;	/* vdev */
-	spa_t			*l2ad_spa;	/* spa */
-	uint64_t		l2ad_hand;	/* next write location */
-	uint64_t		l2ad_start;	/* first addr on device */
-	uint64_t		l2ad_end;	/* last addr on device */
-	boolean_t		l2ad_first;	/* first sweep through */
-	boolean_t		l2ad_writing;	/* currently writing */
-	kmutex_t		l2ad_mtx;	/* lock for buffer list */
-	list_t			l2ad_buflist;	/* buffer list */
-	list_node_t		l2ad_node;	/* device list node */
-	zfs_refcount_t		l2ad_alloc;	/* allocated bytes */
-} l2arc_dev_t;
-
 typedef struct l2arc_buf_hdr {
 	/* protected by arc_buf_hdr mutex */
 	l2arc_dev_t		*b_dev;		/* L2ARC device */
 	uint64_t		b_daddr;	/* disk address, offset byte */
 	uint32_t		b_hits;
-
+	arc_state_type_t	b_arcs_state;
 	list_node_t		b_l2node;
 } l2arc_buf_hdr_t;
 
 typedef struct l2arc_write_callback {
 	l2arc_dev_t	*l2wcb_dev;		/* device info */
 	arc_buf_hdr_t	*l2wcb_head;		/* head of write buflist */
+	/* in-flight list of log blocks */
+	list_t		l2wcb_abd_list;
 } l2arc_write_callback_t;
 
 struct arc_buf_hdr {
@@ -238,6 +465,9 @@
 	uint64_t		b_birth;
 
 	arc_buf_contents_t	b_type;
+	uint8_t			b_complevel;
+	uint8_t			b_reserved1; /* used for 4 byte alignment */
+	uint16_t		b_reserved2; /* used for 4 byte alignment */
 	arc_buf_hdr_t		*b_hash_next;
 	arc_flags_t		b_flags;
 
@@ -278,6 +508,513 @@
 	 */
 	arc_buf_hdr_crypt_t b_crypt_hdr;
 };
+
+typedef struct arc_stats {
+	kstat_named_t arcstat_hits;
+	kstat_named_t arcstat_misses;
+	kstat_named_t arcstat_demand_data_hits;
+	kstat_named_t arcstat_demand_data_misses;
+	kstat_named_t arcstat_demand_metadata_hits;
+	kstat_named_t arcstat_demand_metadata_misses;
+	kstat_named_t arcstat_prefetch_data_hits;
+	kstat_named_t arcstat_prefetch_data_misses;
+	kstat_named_t arcstat_prefetch_metadata_hits;
+	kstat_named_t arcstat_prefetch_metadata_misses;
+	kstat_named_t arcstat_mru_hits;
+	kstat_named_t arcstat_mru_ghost_hits;
+	kstat_named_t arcstat_mfu_hits;
+	kstat_named_t arcstat_mfu_ghost_hits;
+	kstat_named_t arcstat_deleted;
+	/*
+	 * Number of buffers that could not be evicted because the hash lock
+	 * was held by another thread.  The lock may not necessarily be held
+	 * by something using the same buffer, since hash locks are shared
+	 * by multiple buffers.
+	 */
+	kstat_named_t arcstat_mutex_miss;
+	/*
+	 * Number of buffers skipped when updating the access state due to the
+	 * header having already been released after acquiring the hash lock.
+	 */
+	kstat_named_t arcstat_access_skip;
+	/*
+	 * Number of buffers skipped because they have I/O in progress, are
+	 * indirect prefetch buffers that have not lived long enough, or are
+	 * not from the spa we're trying to evict from.
+	 */
+	kstat_named_t arcstat_evict_skip;
+	/*
+	 * Number of times arc_evict_state() was unable to evict enough
+	 * buffers to reach its target amount.
+	 */
+	kstat_named_t arcstat_evict_not_enough;
+	kstat_named_t arcstat_evict_l2_cached;
+	kstat_named_t arcstat_evict_l2_eligible;
+	kstat_named_t arcstat_evict_l2_eligible_mfu;
+	kstat_named_t arcstat_evict_l2_eligible_mru;
+	kstat_named_t arcstat_evict_l2_ineligible;
+	kstat_named_t arcstat_evict_l2_skip;
+	kstat_named_t arcstat_hash_elements;
+	kstat_named_t arcstat_hash_elements_max;
+	kstat_named_t arcstat_hash_collisions;
+	kstat_named_t arcstat_hash_chains;
+	kstat_named_t arcstat_hash_chain_max;
+	kstat_named_t arcstat_p;
+	kstat_named_t arcstat_c;
+	kstat_named_t arcstat_c_min;
+	kstat_named_t arcstat_c_max;
+	kstat_named_t arcstat_size;
+	/*
+	 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
+	 * Note that the compressed bytes may match the uncompressed bytes
+	 * if the block is either not compressed or compressed arc is disabled.
+	 */
+	kstat_named_t arcstat_compressed_size;
+	/*
+	 * Uncompressed size of the data stored in b_pabd. If compressed
+	 * arc is disabled then this value will be identical to the stat
+	 * above.
+	 */
+	kstat_named_t arcstat_uncompressed_size;
+	/*
+	 * Number of bytes stored in all the arc_buf_t's. This is classified
+	 * as "overhead" since this data is typically short-lived and will
+	 * be evicted from the arc when it becomes unreferenced unless the
+	 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
+	 * values have been set (see comment in dbuf.c for more information).
+	 */
+	kstat_named_t arcstat_overhead_size;
+	/*
+	 * Number of bytes consumed by internal ARC structures necessary
+	 * for tracking purposes; these structures are not actually
+	 * backed by ARC buffers. This includes arc_buf_hdr_t structures
+	 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
+	 * caches), and arc_buf_t structures (allocated via arc_buf_t
+	 * cache).
+	 */
+	kstat_named_t arcstat_hdr_size;
+	/*
+	 * Number of bytes consumed by ARC buffers of type equal to
+	 * ARC_BUFC_DATA. This is generally consumed by buffers backing
+	 * on disk user data (e.g. plain file contents).
+	 */
+	kstat_named_t arcstat_data_size;
+	/*
+	 * Number of bytes consumed by ARC buffers of type equal to
+	 * ARC_BUFC_METADATA. This is generally consumed by buffers
+	 * backing on disk data that is used for internal ZFS
+	 * structures (e.g. ZAP, dnode, indirect blocks, etc).
+	 */
+	kstat_named_t arcstat_metadata_size;
+	/*
+	 * Number of bytes consumed by dmu_buf_impl_t objects.
+	 */
+	kstat_named_t arcstat_dbuf_size;
+	/*
+	 * Number of bytes consumed by dnode_t objects.
+	 */
+	kstat_named_t arcstat_dnode_size;
+	/*
+	 * Number of bytes consumed by bonus buffers.
+	 */
+	kstat_named_t arcstat_bonus_size;
+#if defined(COMPAT_FREEBSD11)
+	/*
+	 * Sum of the previous three counters, provided for compatibility.
+	 */
+	kstat_named_t arcstat_other_size;
+#endif
+
+	/*
+	 * Total number of bytes consumed by ARC buffers residing in the
+	 * arc_anon state. This includes *all* buffers in the arc_anon
+	 * state; e.g. data, metadata, evictable, and unevictable buffers
+	 * are all included in this value.
+	 */
+	kstat_named_t arcstat_anon_size;
+	/*
+	 * Number of bytes consumed by ARC buffers that meet the
+	 * following criteria: backing buffers of type ARC_BUFC_DATA,
+	 * residing in the arc_anon state, and are eligible for eviction
+	 * (e.g. have no outstanding holds on the buffer).
+	 */
+	kstat_named_t arcstat_anon_evictable_data;
+	/*
+	 * Number of bytes consumed by ARC buffers that meet the
+	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
+	 * residing in the arc_anon state, and are eligible for eviction
+	 * (e.g. have no outstanding holds on the buffer).
+	 */
+	kstat_named_t arcstat_anon_evictable_metadata;
+	/*
+	 * Total number of bytes consumed by ARC buffers residing in the
+	 * arc_mru state. This includes *all* buffers in the arc_mru
+	 * state; e.g. data, metadata, evictable, and unevictable buffers
+	 * are all included in this value.
+	 */
+	kstat_named_t arcstat_mru_size;
+	/*
+	 * Number of bytes consumed by ARC buffers that meet the
+	 * following criteria: backing buffers of type ARC_BUFC_DATA,
+	 * residing in the arc_mru state, and are eligible for eviction
+	 * (e.g. have no outstanding holds on the buffer).
+	 */
+	kstat_named_t arcstat_mru_evictable_data;
+	/*
+	 * Number of bytes consumed by ARC buffers that meet the
+	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
+	 * residing in the arc_mru state, and are eligible for eviction
+	 * (e.g. have no outstanding holds on the buffer).
+	 */
+	kstat_named_t arcstat_mru_evictable_metadata;
+	/*
+	 * Total number of bytes that *would have been* consumed by ARC
+	 * buffers in the arc_mru_ghost state. The key thing to note
+	 * here, is the fact that this size doesn't actually indicate
+	 * RAM consumption. The ghost lists only consist of headers and
+	 * don't actually have ARC buffers linked off of these headers.
+	 * Thus, *if* the headers had associated ARC buffers, these
+	 * buffers *would have* consumed this number of bytes.
+	 */
+	kstat_named_t arcstat_mru_ghost_size;
+	/*
+	 * Number of bytes that *would have been* consumed by ARC
+	 * buffers that are eligible for eviction, of type
+	 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
+	 */
+	kstat_named_t arcstat_mru_ghost_evictable_data;
+	/*
+	 * Number of bytes that *would have been* consumed by ARC
+	 * buffers that are eligible for eviction, of type
+	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
+	 */
+	kstat_named_t arcstat_mru_ghost_evictable_metadata;
+	/*
+	 * Total number of bytes consumed by ARC buffers residing in the
+	 * arc_mfu state. This includes *all* buffers in the arc_mfu
+	 * state; e.g. data, metadata, evictable, and unevictable buffers
+	 * are all included in this value.
+	 */
+	kstat_named_t arcstat_mfu_size;
+	/*
+	 * Number of bytes consumed by ARC buffers that are eligible for
+	 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
+	 * state.
+	 */
+	kstat_named_t arcstat_mfu_evictable_data;
+	/*
+	 * Number of bytes consumed by ARC buffers that are eligible for
+	 * eviction, of type ARC_BUFC_METADATA, and reside in the
+	 * arc_mfu state.
+	 */
+	kstat_named_t arcstat_mfu_evictable_metadata;
+	/*
+	 * Total number of bytes that *would have been* consumed by ARC
+	 * buffers in the arc_mfu_ghost state. See the comment above
+	 * arcstat_mru_ghost_size for more details.
+	 */
+	kstat_named_t arcstat_mfu_ghost_size;
+	/*
+	 * Number of bytes that *would have been* consumed by ARC
+	 * buffers that are eligible for eviction, of type
+	 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
+	 */
+	kstat_named_t arcstat_mfu_ghost_evictable_data;
+	/*
+	 * Number of bytes that *would have been* consumed by ARC
+	 * buffers that are eligible for eviction, of type
+	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
+	 */
+	kstat_named_t arcstat_mfu_ghost_evictable_metadata;
+	kstat_named_t arcstat_l2_hits;
+	kstat_named_t arcstat_l2_misses;
+	/*
+	 * Allocated size (in bytes) of L2ARC cached buffers by ARC state.
+	 */
+	kstat_named_t arcstat_l2_prefetch_asize;
+	kstat_named_t arcstat_l2_mru_asize;
+	kstat_named_t arcstat_l2_mfu_asize;
+	/*
+	 * Allocated size (in bytes) of L2ARC cached buffers by buffer content
+	 * type.
+	 */
+	kstat_named_t arcstat_l2_bufc_data_asize;
+	kstat_named_t arcstat_l2_bufc_metadata_asize;
+	kstat_named_t arcstat_l2_feeds;
+	kstat_named_t arcstat_l2_rw_clash;
+	kstat_named_t arcstat_l2_read_bytes;
+	kstat_named_t arcstat_l2_write_bytes;
+	kstat_named_t arcstat_l2_writes_sent;
+	kstat_named_t arcstat_l2_writes_done;
+	kstat_named_t arcstat_l2_writes_error;
+	kstat_named_t arcstat_l2_writes_lock_retry;
+	kstat_named_t arcstat_l2_evict_lock_retry;
+	kstat_named_t arcstat_l2_evict_reading;
+	kstat_named_t arcstat_l2_evict_l1cached;
+	kstat_named_t arcstat_l2_free_on_write;
+	kstat_named_t arcstat_l2_abort_lowmem;
+	kstat_named_t arcstat_l2_cksum_bad;
+	kstat_named_t arcstat_l2_io_error;
+	kstat_named_t arcstat_l2_lsize;
+	kstat_named_t arcstat_l2_psize;
+	kstat_named_t arcstat_l2_hdr_size;
+	/*
+	 * Number of L2ARC log blocks written. These are used for restoring the
+	 * L2ARC. Updated during writing of L2ARC log blocks.
+	 */
+	kstat_named_t arcstat_l2_log_blk_writes;
+	/*
+	 * Moving average of the aligned size of the L2ARC log blocks, in
+	 * bytes. Updated during L2ARC rebuild and during writing of L2ARC
+	 * log blocks.
+	 */
+	kstat_named_t arcstat_l2_log_blk_avg_asize;
+	/* Aligned size of L2ARC log blocks on L2ARC devices. */
+	kstat_named_t arcstat_l2_log_blk_asize;
+	/* Number of L2ARC log blocks present on L2ARC devices. */
+	kstat_named_t arcstat_l2_log_blk_count;
+	/*
+	 * Moving average of the aligned size of L2ARC restored data, in bytes,
+	 * to the aligned size of their metadata in L2ARC, in bytes.
+	 * Updated during L2ARC rebuild and during writing of L2ARC log blocks.
+	 */
+	kstat_named_t arcstat_l2_data_to_meta_ratio;
+	/*
+	 * Number of times the L2ARC rebuild was successful for an L2ARC device.
+	 */
+	kstat_named_t arcstat_l2_rebuild_success;
+	/*
+	 * Number of times the L2ARC rebuild failed because the device header
+	 * was in an unsupported format or corrupted.
+	 */
+	kstat_named_t arcstat_l2_rebuild_abort_unsupported;
+	/*
+	 * Number of times the L2ARC rebuild failed because of IO errors
+	 * while reading a log block.
+	 */
+	kstat_named_t arcstat_l2_rebuild_abort_io_errors;
+	/*
+	 * Number of times the L2ARC rebuild failed because of IO errors when
+	 * reading the device header.
+	 */
+	kstat_named_t arcstat_l2_rebuild_abort_dh_errors;
+	/*
+	 * Number of L2ARC log blocks which failed to be restored due to
+	 * checksum errors.
+	 */
+	kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors;
+	/*
+	 * Number of times the L2ARC rebuild was aborted due to low system
+	 * memory.
+	 */
+	kstat_named_t arcstat_l2_rebuild_abort_lowmem;
+	/* Logical size of L2ARC restored data, in bytes. */
+	kstat_named_t arcstat_l2_rebuild_size;
+	/* Aligned size of L2ARC restored data, in bytes. */
+	kstat_named_t arcstat_l2_rebuild_asize;
+	/*
+	 * Number of L2ARC log entries (buffers) that were successfully
+	 * restored in ARC.
+	 */
+	kstat_named_t arcstat_l2_rebuild_bufs;
+	/*
+	 * Number of L2ARC log entries (buffers) already cached in ARC. These
+	 * were not restored again.
+	 */
+	kstat_named_t arcstat_l2_rebuild_bufs_precached;
+	/*
+	 * Number of L2ARC log blocks that were restored successfully. Each
+	 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
+	 */
+	kstat_named_t arcstat_l2_rebuild_log_blks;
+	kstat_named_t arcstat_memory_throttle_count;
+	kstat_named_t arcstat_memory_direct_count;
+	kstat_named_t arcstat_memory_indirect_count;
+	kstat_named_t arcstat_memory_all_bytes;
+	kstat_named_t arcstat_memory_free_bytes;
+	kstat_named_t arcstat_memory_available_bytes;
+	kstat_named_t arcstat_no_grow;
+	kstat_named_t arcstat_tempreserve;
+	kstat_named_t arcstat_loaned_bytes;
+	kstat_named_t arcstat_prune;
+	kstat_named_t arcstat_meta_used;
+	kstat_named_t arcstat_meta_limit;
+	kstat_named_t arcstat_dnode_limit;
+	kstat_named_t arcstat_meta_max;
+	kstat_named_t arcstat_meta_min;
+	kstat_named_t arcstat_async_upgrade_sync;
+	kstat_named_t arcstat_demand_hit_predictive_prefetch;
+	kstat_named_t arcstat_demand_hit_prescient_prefetch;
+	kstat_named_t arcstat_need_free;
+	kstat_named_t arcstat_sys_free;
+	kstat_named_t arcstat_raw_size;
+	kstat_named_t arcstat_cached_only_in_progress;
+	kstat_named_t arcstat_abd_chunk_waste_size;
+} arc_stats_t;
+
+typedef struct arc_sums {
+	wmsum_t arcstat_hits;
+	wmsum_t arcstat_misses;
+	wmsum_t arcstat_demand_data_hits;
+	wmsum_t arcstat_demand_data_misses;
+	wmsum_t arcstat_demand_metadata_hits;
+	wmsum_t arcstat_demand_metadata_misses;
+	wmsum_t arcstat_prefetch_data_hits;
+	wmsum_t arcstat_prefetch_data_misses;
+	wmsum_t arcstat_prefetch_metadata_hits;
+	wmsum_t arcstat_prefetch_metadata_misses;
+	wmsum_t arcstat_mru_hits;
+	wmsum_t arcstat_mru_ghost_hits;
+	wmsum_t arcstat_mfu_hits;
+	wmsum_t arcstat_mfu_ghost_hits;
+	wmsum_t arcstat_deleted;
+	wmsum_t arcstat_mutex_miss;
+	wmsum_t arcstat_access_skip;
+	wmsum_t arcstat_evict_skip;
+	wmsum_t arcstat_evict_not_enough;
+	wmsum_t arcstat_evict_l2_cached;
+	wmsum_t arcstat_evict_l2_eligible;
+	wmsum_t arcstat_evict_l2_eligible_mfu;
+	wmsum_t arcstat_evict_l2_eligible_mru;
+	wmsum_t arcstat_evict_l2_ineligible;
+	wmsum_t arcstat_evict_l2_skip;
+	wmsum_t arcstat_hash_collisions;
+	wmsum_t arcstat_hash_chains;
+	aggsum_t arcstat_size;
+	wmsum_t arcstat_compressed_size;
+	wmsum_t arcstat_uncompressed_size;
+	wmsum_t arcstat_overhead_size;
+	wmsum_t arcstat_hdr_size;
+	wmsum_t arcstat_data_size;
+	wmsum_t arcstat_metadata_size;
+	wmsum_t arcstat_dbuf_size;
+	aggsum_t arcstat_dnode_size;
+	wmsum_t arcstat_bonus_size;
+	wmsum_t arcstat_l2_hits;
+	wmsum_t arcstat_l2_misses;
+	wmsum_t arcstat_l2_prefetch_asize;
+	wmsum_t arcstat_l2_mru_asize;
+	wmsum_t arcstat_l2_mfu_asize;
+	wmsum_t arcstat_l2_bufc_data_asize;
+	wmsum_t arcstat_l2_bufc_metadata_asize;
+	wmsum_t arcstat_l2_feeds;
+	wmsum_t arcstat_l2_rw_clash;
+	wmsum_t arcstat_l2_read_bytes;
+	wmsum_t arcstat_l2_write_bytes;
+	wmsum_t arcstat_l2_writes_sent;
+	wmsum_t arcstat_l2_writes_done;
+	wmsum_t arcstat_l2_writes_error;
+	wmsum_t arcstat_l2_writes_lock_retry;
+	wmsum_t arcstat_l2_evict_lock_retry;
+	wmsum_t arcstat_l2_evict_reading;
+	wmsum_t arcstat_l2_evict_l1cached;
+	wmsum_t arcstat_l2_free_on_write;
+	wmsum_t arcstat_l2_abort_lowmem;
+	wmsum_t arcstat_l2_cksum_bad;
+	wmsum_t arcstat_l2_io_error;
+	wmsum_t arcstat_l2_lsize;
+	wmsum_t arcstat_l2_psize;
+	aggsum_t arcstat_l2_hdr_size;
+	wmsum_t arcstat_l2_log_blk_writes;
+	wmsum_t arcstat_l2_log_blk_asize;
+	wmsum_t arcstat_l2_log_blk_count;
+	wmsum_t arcstat_l2_rebuild_success;
+	wmsum_t arcstat_l2_rebuild_abort_unsupported;
+	wmsum_t arcstat_l2_rebuild_abort_io_errors;
+	wmsum_t arcstat_l2_rebuild_abort_dh_errors;
+	wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors;
+	wmsum_t arcstat_l2_rebuild_abort_lowmem;
+	wmsum_t arcstat_l2_rebuild_size;
+	wmsum_t arcstat_l2_rebuild_asize;
+	wmsum_t arcstat_l2_rebuild_bufs;
+	wmsum_t arcstat_l2_rebuild_bufs_precached;
+	wmsum_t arcstat_l2_rebuild_log_blks;
+	wmsum_t arcstat_memory_throttle_count;
+	wmsum_t arcstat_memory_direct_count;
+	wmsum_t arcstat_memory_indirect_count;
+	wmsum_t arcstat_prune;
+	aggsum_t arcstat_meta_used;
+	wmsum_t arcstat_async_upgrade_sync;
+	wmsum_t arcstat_demand_hit_predictive_prefetch;
+	wmsum_t arcstat_demand_hit_prescient_prefetch;
+	wmsum_t arcstat_raw_size;
+	wmsum_t arcstat_cached_only_in_progress;
+	wmsum_t arcstat_abd_chunk_waste_size;
+} arc_sums_t;
+
+typedef struct arc_evict_waiter {
+	list_node_t aew_node;
+	kcondvar_t aew_cv;
+	uint64_t aew_count;
+} arc_evict_waiter_t;
+
+#define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
+
+#define	ARCSTAT_INCR(stat, val) \
+	wmsum_add(&arc_sums.stat, (val))
+
+#define	ARCSTAT_BUMP(stat)	ARCSTAT_INCR(stat, 1)
+#define	ARCSTAT_BUMPDOWN(stat)	ARCSTAT_INCR(stat, -1)
+
+#define	arc_no_grow	ARCSTAT(arcstat_no_grow) /* do not grow cache size */
+#define	arc_p		ARCSTAT(arcstat_p)	/* target size of MRU */
+#define	arc_c		ARCSTAT(arcstat_c)	/* target size of cache */
+#define	arc_c_min	ARCSTAT(arcstat_c_min)	/* min target cache size */
+#define	arc_c_max	ARCSTAT(arcstat_c_max)	/* max target cache size */
+#define	arc_sys_free	ARCSTAT(arcstat_sys_free) /* target system free bytes */
+
+#define	arc_anon	(&ARC_anon)
+#define	arc_mru		(&ARC_mru)
+#define	arc_mru_ghost	(&ARC_mru_ghost)
+#define	arc_mfu		(&ARC_mfu)
+#define	arc_mfu_ghost	(&ARC_mfu_ghost)
+#define	arc_l2c_only	(&ARC_l2c_only)
+
+extern taskq_t *arc_prune_taskq;
+extern arc_stats_t arc_stats;
+extern arc_sums_t arc_sums;
+extern hrtime_t arc_growtime;
+extern boolean_t arc_warm;
+extern int arc_grow_retry;
+extern int arc_no_grow_shift;
+extern int arc_shrink_shift;
+extern kmutex_t arc_prune_mtx;
+extern list_t arc_prune_list;
+extern arc_state_t	ARC_mfu;
+extern arc_state_t	ARC_mru;
+extern uint_t zfs_arc_pc_percent;
+extern int arc_lotsfree_percent;
+extern unsigned long zfs_arc_min;
+extern unsigned long zfs_arc_max;
+
+extern void arc_reduce_target_size(int64_t to_free);
+extern boolean_t arc_reclaim_needed(void);
+extern void arc_kmem_reap_soon(void);
+extern void arc_wait_for_eviction(uint64_t, boolean_t);
+
+extern void arc_lowmem_init(void);
+extern void arc_lowmem_fini(void);
+extern void arc_prune_async(int64_t);
+extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
+extern uint64_t arc_free_memory(void);
+extern int64_t arc_available_memory(void);
+extern void arc_tuning_update(boolean_t);
+extern void arc_register_hotplug(void);
+extern void arc_unregister_hotplug(void);
+
+extern int param_set_arc_long(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);
+
+/* used in zdb.c */
+boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
+    const l2arc_log_blkptr_t *lbp);
+
+/* used in vdev_trim.c */
+void l2arc_dev_hdr_update(l2arc_dev_t *dev);
+l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/zfs/include/sys/avl.h b/zfs/include/sys/avl.h
index 962e8b1..ed3c6f8 100644
--- a/zfs/include/sys/avl.h
+++ b/zfs/include/sys/avl.h

@@ -108,9 +108,9 @@
 /*
  * AVL comparator helpers
  */
-#define	AVL_ISIGN(a)	(((a) > 0) - ((a) < 0))
-#define	AVL_CMP(a, b)	(((a) > (b)) - ((a) < (b)))
-#define	AVL_PCMP(a, b)	\
+#define	TREE_ISIGN(a)	(((a) > 0) - ((a) < 0))
+#define	TREE_CMP(a, b)	(((a) > (b)) - ((a) < (b)))
+#define	TREE_PCMP(a, b)	\
 	(((uintptr_t)(a) > (uintptr_t)(b)) - ((uintptr_t)(a) < (uintptr_t)(b)))
 
 /*
@@ -260,6 +260,17 @@
 extern void avl_remove(avl_tree_t *tree, void *node);
 
 /*
+ * Reinsert a node only if its order has changed relative to its nearest
+ * neighbors. To optimize performance avl_update_lt() checks only the previous
+ * node and avl_update_gt() checks only the next node. Use avl_update_lt() and
+ * avl_update_gt() only if you know the direction in which the order of the
+ * node may change.
+ */
+extern boolean_t avl_update(avl_tree_t *, void *);
+extern boolean_t avl_update_lt(avl_tree_t *, void *);
+extern boolean_t avl_update_gt(avl_tree_t *, void *);
+
+/*
  * Swaps the contents of the two trees.
  */
 extern void avl_swap(avl_tree_t *tree1, avl_tree_t *tree2);

diff --git a/zfs/include/sys/bitops.h b/zfs/include/sys/bitops.h
new file mode 100644
index 0000000..56d5207
--- /dev/null
+++ b/zfs/include/sys/bitops.h

@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
+ */
+
+#ifndef _SYS_BITOPS_H
+#define	_SYS_BITOPS_H
+
+#include <sys/zfs_context.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * General-purpose 32-bit and 64-bit bitfield encodings.
+ */
+#define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
+#define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
+#define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
+#define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
+
+#define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
+#define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
+
+#define	BF32_SET(x, low, len, val) do { \
+	ASSERT3U(val, <, 1U << (len)); \
+	ASSERT3U(low + len, <=, 32); \
+	(x) ^= BF32_ENCODE((x >> low) ^ (val), low, len); \
+_NOTE(CONSTCOND) } while (0)
+
+#define	BF64_SET(x, low, len, val) do { \
+	ASSERT3U(val, <, 1ULL << (len)); \
+	ASSERT3U(low + len, <=, 64); \
+	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)); \
+_NOTE(CONSTCOND) } while (0)
+
+#define	BF32_GET_SB(x, low, len, shift, bias)	\
+	((BF32_GET(x, low, len) + (bias)) << (shift))
+#define	BF64_GET_SB(x, low, len, shift, bias)	\
+	((BF64_GET(x, low, len) + (bias)) << (shift))
+
+/*
+ * We use ASSERT3U instead of ASSERT in these macros to prevent a lint error in
+ * the case where val is a constant.  We can't fix ASSERT because it's used as
+ * an expression in several places in the kernel; as a result, changing it to
+ * the do{} while() syntax to allow us to _NOTE the CONSTCOND is not an option.
+ */
+#define	BF32_SET_SB(x, low, len, shift, bias, val) do { \
+	ASSERT3U(IS_P2ALIGNED(val, 1U << shift), !=, B_FALSE); \
+	ASSERT3S((val) >> (shift), >=, bias); \
+	BF32_SET(x, low, len, ((val) >> (shift)) - (bias)); \
+_NOTE(CONSTCOND) } while (0)
+#define	BF64_SET_SB(x, low, len, shift, bias, val) do { \
+	ASSERT3U(IS_P2ALIGNED(val, 1ULL << shift), !=, B_FALSE); \
+	ASSERT3S((val) >> (shift), >=, bias); \
+	BF64_SET(x, low, len, ((val) >> (shift)) - (bias)); \
+_NOTE(CONSTCOND) } while (0)
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_BITOPS_H */

diff --git a/zfs/include/sys/bplist.h b/zfs/include/sys/bplist.h
index 471be90..f8deaf8 100644
--- a/zfs/include/sys/bplist.h
+++ b/zfs/include/sys/bplist.h

@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_BPLIST_H
@@ -49,6 +50,7 @@
 void bplist_append(bplist_t *bpl, const blkptr_t *bp);
 void bplist_iterate(bplist_t *bpl, bplist_itor_t *func,
     void *arg, dmu_tx_t *tx);
+void bplist_clear(bplist_t *bpl);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/bpobj.h b/zfs/include/sys/bpobj.h
index d425e23..2bca0a8 100644
--- a/zfs/include/sys/bpobj.h
+++ b/zfs/include/sys/bpobj.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2015, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_BPOBJ_H
@@ -31,6 +31,7 @@
 #include <sys/txg.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
+#include <sys/bplist.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -48,10 +49,12 @@
 	uint64_t	bpo_uncomp;
 	uint64_t	bpo_subobjs;
 	uint64_t	bpo_num_subobjs;
+	uint64_t	bpo_num_freed;
 } bpobj_phys_t;
 
 #define	BPOBJ_SIZE_V0	(2 * sizeof (uint64_t))
 #define	BPOBJ_SIZE_V1	(4 * sizeof (uint64_t))
+#define	BPOBJ_SIZE_V2	(6 * sizeof (uint64_t))
 
 typedef struct bpobj {
 	kmutex_t	bpo_lock;
@@ -60,12 +63,14 @@
 	int		bpo_epb;
 	uint8_t		bpo_havecomp;
 	uint8_t		bpo_havesubobj;
+	uint8_t		bpo_havefreed;
 	bpobj_phys_t	*bpo_phys;
 	dmu_buf_t	*bpo_dbuf;
 	dmu_buf_t	*bpo_cached_dbuf;
 } bpobj_t;
 
-typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
+typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx);
 
 uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
 uint64_t bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx);
@@ -77,10 +82,14 @@
 boolean_t bpobj_is_open(const bpobj_t *bpo);
 
 int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
-int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
+int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, uint64_t *);
+int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func,
+    void *arg, int64_t start);
 
 void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
-void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
+void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj);
+void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx);
 
 int bpobj_space(bpobj_t *bpo,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
@@ -88,6 +97,9 @@
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 boolean_t bpobj_is_empty(bpobj_t *bpo);
 
+int bplist_append_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx);
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/bqueue.h b/zfs/include/sys/bqueue.h
index 63722df..b962196 100644
--- a/zfs/include/sys/bqueue.h
+++ b/zfs/include/sys/bqueue.h

@@ -13,7 +13,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2018 by Delphix. All rights reserved.
  */
 
 #ifndef	_BQUEUE_H
@@ -30,20 +30,22 @@
 	kmutex_t bq_lock;
 	kcondvar_t bq_add_cv;
 	kcondvar_t bq_pop_cv;
-	uint64_t bq_size;
-	uint64_t bq_maxsize;
+	size_t bq_size;
+	size_t bq_maxsize;
+	uint_t bq_fill_fraction;
 	size_t bq_node_offset;
 } bqueue_t;
 
 typedef struct bqueue_node {
 	list_node_t bqn_node;
-	uint64_t bqn_size;
+	size_t bqn_size;
 } bqueue_node_t;
 
 
-int bqueue_init(bqueue_t *, uint64_t, size_t);
+int bqueue_init(bqueue_t *, uint_t, size_t, size_t);
 void bqueue_destroy(bqueue_t *);
-void bqueue_enqueue(bqueue_t *, void *, uint64_t);
+void bqueue_enqueue(bqueue_t *, void *, size_t);
+void bqueue_enqueue_flush(bqueue_t *, void *, size_t);
 void *bqueue_dequeue(bqueue_t *);
 boolean_t bqueue_empty(bqueue_t *);
 

diff --git a/zfs/include/sys/btree.h b/zfs/include/sys/btree.h
new file mode 100644
index 0000000..883abb5
--- /dev/null
+++ b/zfs/include/sys/btree.h

@@ -0,0 +1,252 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2019 by Delphix. All rights reserved.
+ */
+
+#ifndef	_BTREE_H
+#define	_BTREE_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include	<sys/zfs_context.h>
+
+/*
+ * This file defines the interface for a B-Tree implementation for ZFS. The
+ * tree can be used to store arbitrary sortable data types with low overhead
+ * and good operation performance. In addition the tree intelligently
+ * optimizes bulk in-order insertions to improve memory use and performance.
+ *
+ * Note that for all B-Tree functions, the values returned are pointers to the
+ * internal copies of the data in the tree. The internal data can only be
+ * safely mutated if the changes cannot change the ordering of the element
+ * with respect to any other elements in the tree.
+ *
+ * The major drawback of the B-Tree is that any returned elements or indexes
+ * are only valid until a side-effectful operation occurs, since these can
+ * result in reallocation or relocation of data. Side effectful operations are
+ * defined as insertion, removal, and zfs_btree_destroy_nodes.
+ *
+ * The B-Tree has two types of nodes: core nodes, and leaf nodes. Core
+ * nodes have an array of children pointing to other nodes, and an array of
+ * elements that act as separators between the elements of the subtrees rooted
+ * at its children. Leaf nodes only contain data elements, and form the bottom
+ * layer of the tree. Unlike B+ Trees, in this B-Tree implementation the
+ * elements in the core nodes are not copies of or references to leaf node
+ * elements.  Each element occurs only once in the tree, no matter what kind
+ * of node it is in.
+ *
+ * The tree's height is the same throughout, unlike many other forms of search
+ * tree. Each node (except for the root) must be between half minus one and
+ * completely full of elements (and children) at all times. Any operation that
+ * would put the node outside of that range results in a rebalancing operation
+ * (taking, merging, or splitting).
+ *
+ * This tree was implemented using descriptions from Wikipedia's articles on
+ * B-Trees and B+ Trees.
+ */
+
+/*
+ * Decreasing these values results in smaller memmove operations, but more of
+ * them, and increased memory overhead. Increasing these values results in
+ * higher variance in operation time, and reduces memory overhead.
+ */
+#define	BTREE_CORE_ELEMS	126
+#define	BTREE_LEAF_SIZE		4096
+
+extern kmem_cache_t *zfs_btree_leaf_cache;
+
+typedef struct zfs_btree_hdr {
+	struct zfs_btree_core	*bth_parent;
+	/*
+	 * Set to -1 to indicate core nodes. Other values represent first
+	 * valid element offset for leaf nodes.
+	 */
+	uint32_t		bth_first;
+	/*
+	 * For both leaf and core nodes, represents the number of elements in
+	 * the node. For core nodes, they will have bth_count + 1 children.
+	 */
+	uint32_t		bth_count;
+} zfs_btree_hdr_t;
+
+typedef struct zfs_btree_core {
+	zfs_btree_hdr_t	btc_hdr;
+	zfs_btree_hdr_t	*btc_children[BTREE_CORE_ELEMS + 1];
+	uint8_t		btc_elems[];
+} zfs_btree_core_t;
+
+typedef struct zfs_btree_leaf {
+	zfs_btree_hdr_t	btl_hdr;
+	uint8_t		btl_elems[];
+} zfs_btree_leaf_t;
+
+typedef struct zfs_btree_index {
+	zfs_btree_hdr_t	*bti_node;
+	uint32_t	bti_offset;
+	/*
+	 * True if the location is before the list offset, false if it's at
+	 * the listed offset.
+	 */
+	boolean_t	bti_before;
+} zfs_btree_index_t;
+
+typedef struct btree {
+	int (*bt_compar) (const void *, const void *);
+	size_t			bt_elem_size;
+	size_t			bt_leaf_size;
+	uint32_t		bt_leaf_cap;
+	int32_t			bt_height;
+	uint64_t		bt_num_elems;
+	uint64_t		bt_num_nodes;
+	zfs_btree_hdr_t		*bt_root;
+	zfs_btree_leaf_t	*bt_bulk; // non-null if bulk loading
+} zfs_btree_t;
+
+/*
+ * Allocate and deallocate caches for btree nodes.
+ */
+void zfs_btree_init(void);
+void zfs_btree_fini(void);
+
+/*
+ * Initialize an B-Tree. Arguments are:
+ *
+ * tree   - the tree to be initialized
+ * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
+ *          -1 for <, 0 for ==, and +1 for >
+ * size   - the value of sizeof(struct my_type)
+ * lsize  - custom leaf size
+ */
+void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
+    size_t);
+void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
+    size_t, size_t);
+
+/*
+ * Find a node with a matching value in the tree. Returns the matching node
+ * found. If not found, it returns NULL and then if "where" is not NULL it sets
+ * "where" for use with zfs_btree_add_idx() or zfs_btree_nearest().
+ *
+ * node   - node that has the value being looked for
+ * where  - position for use with zfs_btree_nearest() or zfs_btree_add_idx(),
+ *          may be NULL
+ */
+void *zfs_btree_find(zfs_btree_t *, const void *, zfs_btree_index_t *);
+
+/*
+ * Insert a node into the tree.
+ *
+ * node   - the node to insert
+ * where  - position as returned from zfs_btree_find()
+ */
+void zfs_btree_add_idx(zfs_btree_t *, const void *, const zfs_btree_index_t *);
+
+/*
+ * Return the first or last valued node in the tree. Will return NULL if the
+ * tree is empty. The index can be NULL if the location of the first or last
+ * element isn't required.
+ */
+void *zfs_btree_first(zfs_btree_t *, zfs_btree_index_t *);
+void *zfs_btree_last(zfs_btree_t *, zfs_btree_index_t *);
+
+/*
+ * Return the next or previous valued node in the tree. The second index can
+ * safely be NULL, if the location of the next or previous value isn't
+ * required.
+ */
+void *zfs_btree_next(zfs_btree_t *, const zfs_btree_index_t *,
+    zfs_btree_index_t *);
+void *zfs_btree_prev(zfs_btree_t *, const zfs_btree_index_t *,
+    zfs_btree_index_t *);
+
+/*
+ * Get a value from a tree and an index.
+ */
+void *zfs_btree_get(zfs_btree_t *, zfs_btree_index_t *);
+
+/*
+ * Add a single value to the tree. The value must not compare equal to any
+ * other node already in the tree. Note that the value will be copied out, not
+ * inserted directly. It is safe to free or destroy the value once this
+ * function returns.
+ */
+void zfs_btree_add(zfs_btree_t *, const void *);
+
+/*
+ * Remove a single value from the tree.  The value must be in the tree. The
+ * pointer passed in may be a pointer into a tree-controlled buffer, but it
+ * need not be.
+ */
+void zfs_btree_remove(zfs_btree_t *, const void *);
+
+/*
+ * Remove the value at the given location from the tree.
+ */
+void zfs_btree_remove_idx(zfs_btree_t *, zfs_btree_index_t *);
+
+/*
+ * Return the number of nodes in the tree
+ */
+ulong_t zfs_btree_numnodes(zfs_btree_t *);
+
+/*
+ * Used to destroy any remaining nodes in a tree. The cookie argument should
+ * be initialized to NULL before the first call. Returns a node that has been
+ * removed from the tree and may be free()'d. Returns NULL when the tree is
+ * empty.
+ *
+ * Once you call zfs_btree_destroy_nodes(), you can only continuing calling it
+ * and finally zfs_btree_destroy(). No other B-Tree routines will be valid.
+ *
+ * cookie - an index used to save state between calls to
+ * zfs_btree_destroy_nodes()
+ *
+ * EXAMPLE:
+ *	zfs_btree_t *tree;
+ *	struct my_data *node;
+ *	zfs_btree_index_t *cookie;
+ *
+ *	cookie = NULL;
+ *	while ((node = zfs_btree_destroy_nodes(tree, &cookie)) != NULL)
+ *		data_destroy(node);
+ *	zfs_btree_destroy(tree);
+ */
+void *zfs_btree_destroy_nodes(zfs_btree_t *, zfs_btree_index_t **);
+
+/*
+ * Destroys all nodes in the tree quickly. This doesn't give the caller an
+ * opportunity to iterate over each node and do its own cleanup; for that, use
+ * zfs_btree_destroy_nodes().
+ */
+void zfs_btree_clear(zfs_btree_t *);
+
+/*
+ * Final destroy of an B-Tree. Arguments are:
+ *
+ * tree   - the empty tree to destroy
+ */
+void zfs_btree_destroy(zfs_btree_t *tree);
+
+/* Runs a variety of self-checks on the btree to verify integrity. */
+void zfs_btree_verify(zfs_btree_t *tree);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _BTREE_H */

diff --git a/zfs/include/sys/crypto/Makefile.am b/zfs/include/sys/crypto/Makefile.am
index 7f8156b..eb31f6a 100644
--- a/zfs/include/sys/crypto/Makefile.am
+++ b/zfs/include/sys/crypto/Makefile.am

@@ -1,20 +1,16 @@
 COMMON_H = \
-	$(top_srcdir)/include/sys/crypto/api.h \
-	$(top_srcdir)/include/sys/crypto/common.h \
-	$(top_srcdir)/include/sys/crypto/icp.h
-
-KERNEL_H =
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	api.h \
+	common.h \
+	icp.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys/crypto
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/crypto
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+kernel_HEADERS = $(COMMON_H)
+endif
 endif

diff --git a/zfs/include/sys/crypto/api.h b/zfs/include/sys/crypto/api.h
index 7c3c465..8aecfea 100644
--- a/zfs/include/sys/crypto/api.h
+++ b/zfs/include/sys/crypto/api.h

@@ -58,7 +58,7 @@
  */
 
 #define	CRYPTO_MECH_INVALID	((uint64_t)-1)
-extern crypto_mech_type_t crypto_mech2id(crypto_mech_name_t name);
+extern crypto_mech_type_t crypto_mech2id(char *name);
 
 /*
  * Create and destroy context templates.

diff --git a/zfs/include/sys/crypto/common.h b/zfs/include/sys/crypto/common.h
index a4f9d98..9a23922 100644
--- a/zfs/include/sys/crypto/common.h
+++ b/zfs/include/sys/crypto/common.h

@@ -244,7 +244,7 @@
 		iovec_t cdu_raw;		/* Pointer and length	    */
 
 		/* uio scatter-gather format */
-		uio_t	*cdu_uio;
+		zfs_uio_t	*cdu_uio;
 
 	} cdu;	/* Crypto Data Union */
 } crypto_data_t;

diff --git a/zfs/include/sys/crypto/icp.h b/zfs/include/sys/crypto/icp.h
index 4609e3a..f8fd285 100644
--- a/zfs/include/sys/crypto/icp.h
+++ b/zfs/include/sys/crypto/icp.h

@@ -32,9 +32,6 @@
 int edonr_mod_init(void);
 int edonr_mod_fini(void);
 
-int sha1_mod_init(void);
-int sha1_mod_fini(void);
-
 int sha2_mod_init(void);
 int sha2_mod_fini(void);
 

diff --git a/zfs/include/sys/dataset_kstats.h b/zfs/include/sys/dataset_kstats.h
index 667d1b8..b165b98 100644
--- a/zfs/include/sys/dataset_kstats.h
+++ b/zfs/include/sys/dataset_kstats.h

@@ -27,18 +27,18 @@
 #ifndef _SYS_DATASET_KSTATS_H
 #define	_SYS_DATASET_KSTATS_H
 
-#include <sys/aggsum.h>
+#include <sys/wmsum.h>
 #include <sys/dmu.h>
 #include <sys/kstat.h>
 
-typedef struct dataset_aggsum_stats_t {
-	aggsum_t das_writes;
-	aggsum_t das_nwritten;
-	aggsum_t das_reads;
-	aggsum_t das_nread;
-	aggsum_t das_nunlinks;
-	aggsum_t das_nunlinked;
-} dataset_aggsum_stats_t;
+typedef struct dataset_sum_stats_t {
+	wmsum_t dss_writes;
+	wmsum_t dss_nwritten;
+	wmsum_t dss_reads;
+	wmsum_t dss_nread;
+	wmsum_t dss_nunlinks;
+	wmsum_t dss_nunlinked;
+} dataset_sum_stats_t;
 
 typedef struct dataset_kstat_values {
 	kstat_named_t dkv_ds_name;
@@ -59,7 +59,7 @@
 } dataset_kstat_values_t;
 
 typedef struct dataset_kstats {
-	dataset_aggsum_stats_t dk_aggsums;
+	dataset_sum_stats_t dk_sums;
 	kstat_t *dk_kstats;
 } dataset_kstats_t;
 

diff --git a/zfs/include/sys/dbuf.h b/zfs/include/sys/dbuf.h
index eea9e26..b757b26 100644
--- a/zfs/include/sys/dbuf.h
+++ b/zfs/include/sys/dbuf.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
@@ -34,7 +34,7 @@
 #include <sys/zio.h>
 #include <sys/arc.h>
 #include <sys/zfs_context.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/zrlock.h>
 #include <sys/multilist.h>
 
@@ -108,6 +108,12 @@
 	DR_OVERRIDDEN
 } override_states_t;
 
+typedef enum db_lock_type {
+	DLT_NONE,
+	DLT_PARENT,
+	DLT_OBJSET
+} db_lock_type_t;
+
 typedef struct dbuf_dirty_record {
 	/* link on our parents dirty list */
 	list_node_t dr_dirty_node;
@@ -121,8 +127,18 @@
 	/* pointer back to our dbuf */
 	struct dmu_buf_impl *dr_dbuf;
 
-	/* pointer to next dirty record */
-	struct dbuf_dirty_record *dr_next;
+	/* list link for dbuf dirty records */
+	list_node_t dr_dbuf_node;
+
+	/*
+	 * The dnode we are part of.  Note that the dnode can not be moved or
+	 * evicted due to the hold that's added by dnode_setdirty() or
+	 * dmu_objset_sync_dnodes(), and released by dnode_rele_task() or
+	 * userquota_updates_task().  This hold is necessary for
+	 * dirty_lightweight_leaf-type dirty records, which don't have a hold
+	 * on a dbuf.
+	 */
+	dnode_t *dr_dnode;
 
 	/* pointer to parent dirty record */
 	struct dbuf_dirty_record *dr_parent;
@@ -165,6 +181,17 @@
 			uint8_t	dr_iv[ZIO_DATA_IV_LEN];
 			uint8_t	dr_mac[ZIO_DATA_MAC_LEN];
 		} dl;
+		struct dirty_lightweight_leaf {
+			/*
+			 * This dirty record refers to a leaf (level=0)
+			 * block, whose dbuf has not been instantiated for
+			 * performance reasons.
+			 */
+			uint64_t dr_blkid;
+			abd_t *dr_abd;
+			zio_prop_t dr_props;
+			enum zio_flag dr_flags;
+		} dll;
 	} dt;
 } dbuf_dirty_record_t;
 
@@ -200,6 +227,13 @@
 	 */
 	struct dmu_buf_impl *db_hash_next;
 
+	/*
+	 * Our link on the owner dnodes's dn_dbufs list.
+	 * Protected by its dn_dbufs_mtx.  Should be on the same cache line
+	 * as db_level and db_blkid for the best avl_add() performance.
+	 */
+	avl_node_t db_link;
+
 	/* our block number */
 	uint64_t db_blkid;
 
@@ -217,6 +251,22 @@
 	 */
 	uint8_t db_level;
 
+	/*
+	 * Protects db_buf's contents if they contain an indirect block or data
+	 * block of the meta-dnode. We use this lock to protect the structure of
+	 * the block tree. This means that when modifying this dbuf's data, we
+	 * grab its rwlock. When modifying its parent's data (including the
+	 * blkptr to this dbuf), we grab the parent's rwlock. The lock ordering
+	 * for this lock is:
+	 * 1) dn_struct_rwlock
+	 * 2) db_rwlock
+	 * We don't currently grab multiple dbufs' db_rwlocks at once.
+	 */
+	krwlock_t db_rwlock;
+
+	/* buffer holding our data */
+	arc_buf_t *db_buf;
+
 	/* db_mtx protects the members below */
 	kmutex_t db_mtx;
 
@@ -232,20 +282,11 @@
 	 */
 	zfs_refcount_t db_holds;
 
-	/* buffer holding our data */
-	arc_buf_t *db_buf;
-
 	kcondvar_t db_changed;
 	dbuf_dirty_record_t *db_data_pending;
 
-	/* pointer to most recent dirty record for this buffer */
-	dbuf_dirty_record_t *db_last_dirty;
-
-	/*
-	 * Our link on the owner dnodes's dn_dbufs list.
-	 * Protected by its dn_dbufs_mtx.
-	 */
-	avl_node_t db_link;
+	/* List of dirty records for the buffer sorted newest to oldest. */
+	list_t db_dirty_records;
 
 	/* Link in dbuf_cache or dbuf_metadata_cache */
 	multilist_node_t db_cache_link;
@@ -281,14 +322,16 @@
 } dmu_buf_impl_t;
 
 /* Note: the dbuf hash table is exposed only for the mdb module */
-#define	DBUF_MUTEXES 8192
+#define	DBUF_MUTEXES 2048
 #define	DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
 typedef struct dbuf_hash_table {
 	uint64_t hash_table_mask;
 	dmu_buf_impl_t **hash_table;
-	kmutex_t hash_mutexes[DBUF_MUTEXES];
+	kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
 } dbuf_hash_table_t;
 
+typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
+
 uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
     const uint64_t offset);
 
@@ -304,7 +347,10 @@
     boolean_t fail_sparse, boolean_t fail_uncached,
     void *tag, dmu_buf_impl_t **dbp);
 
-void dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid,
+int dbuf_prefetch_impl(struct dnode *dn, int64_t level, uint64_t blkid,
+    zio_priority_t prio, arc_flags_t aflags, dbuf_prefetch_fn cb,
+    void *arg);
+int dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid,
     zio_priority_t prio, arc_flags_t aflags);
 
 void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
@@ -324,18 +370,24 @@
 void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
 void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
 dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
+dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
+    dmu_tx_t *tx);
 arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
 void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
     bp_embedded_type_t etype, enum zio_compress comp,
     int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
 
+int dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
+    const struct zio_prop *zp, enum zio_flag flags, dmu_tx_t *tx);
+
+void dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx);
 void dbuf_destroy(dmu_buf_impl_t *db);
 
 void dbuf_unoverride(dbuf_dirty_record_t *dr);
 void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
 void dbuf_release_bp(dmu_buf_impl_t *db);
-
-boolean_t dbuf_can_remap(const dmu_buf_impl_t *buf);
+db_lock_type_t dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag);
+void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag);
 
 void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
     struct dmu_tx *);
@@ -345,6 +397,9 @@
 void dbuf_stats_init(dbuf_hash_table_t *hash);
 void dbuf_stats_destroy(void);
 
+int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
+    blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift);
+
 #define	DB_DNODE(_db)		((_db)->db_dnode_handle->dnh_dnode)
 #define	DB_DNODE_LOCK(_db)	((_db)->db_dnode_handle->dnh_zrlock)
 #define	DB_DNODE_ENTER(_db)	(zrl_add(&DB_DNODE_LOCK(_db)))
@@ -356,6 +411,29 @@
 
 boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
 
+static inline dbuf_dirty_record_t *
+dbuf_find_dirty_lte(dmu_buf_impl_t *db, uint64_t txg)
+{
+	dbuf_dirty_record_t *dr;
+
+	for (dr = list_head(&db->db_dirty_records);
+	    dr != NULL && dr->dr_txg > txg;
+	    dr = list_next(&db->db_dirty_records, dr))
+		continue;
+	return (dr);
+}
+
+static inline dbuf_dirty_record_t *
+dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
+{
+	dbuf_dirty_record_t *dr;
+
+	dr = dbuf_find_dirty_lte(db, txg);
+	if (dr && dr->dr_txg == txg)
+		return (dr);
+	return (NULL);
+}
+
 #define	DBUF_GET_BUFC_TYPE(_db)	\
 	(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
 
@@ -364,16 +442,7 @@
 	(dbuf_is_metadata(_db) &&					\
 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
 
-#define	DBUF_IS_L2CACHEABLE(_db)					\
-	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\
-	(dbuf_is_metadata(_db) &&					\
-	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
-
-#define	DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level)				\
-	((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\
-	(((_level) > 0 ||						\
-	DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) &&	\
-	((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
+boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db);
 
 #ifdef ZFS_DEBUG
 
@@ -387,7 +456,7 @@
 	char __db_buf[32]; \
 	uint64_t __db_obj = (dbuf)->db.db_object; \
 	if (__db_obj == DMU_META_DNODE_OBJECT) \
-		(void) strcpy(__db_buf, "mdn"); \
+		(void) strlcpy(__db_buf, "mdn", sizeof (__db_buf));	\
 	else \
 		(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
 		    (u_longlong_t)__db_obj); \

diff --git a/zfs/include/sys/ddt.h b/zfs/include/sys/ddt.h
index 68b8c45..25be6f5 100644
--- a/zfs/include/sys/ddt.h
+++ b/zfs/include/sys/ddt.h

@@ -103,6 +103,10 @@
 	uint64_t	ddp_phys_birth;
 } ddt_phys_t;
 
+/*
+ * Note, we no longer generate new DDT_PHYS_DITTO-type blocks.  However,
+ * we maintain the ability to free existing dedup-ditto blocks.
+ */
 enum ddt_phys_type {
 	DDT_PHYS_DITTO = 0,
 	DDT_PHYS_SINGLE = 1,
@@ -178,15 +182,15 @@
 #define	DDT_NAMELEN	107
 
 extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
-    enum ddt_class class, char *name);
+    enum ddt_class clazz, char *name);
 extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
-    enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
+    enum ddt_class clazz, uint64_t *walk, ddt_entry_t *dde);
 extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
-    enum ddt_class class, uint64_t *count);
+    enum ddt_class clazz, uint64_t *count);
 extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
-    enum ddt_class class, dmu_object_info_t *);
+    enum ddt_class clazz, dmu_object_info_t *);
 extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
-    enum ddt_class class);
+    enum ddt_class clazz);
 
 extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
     uint64_t txg);
@@ -216,10 +220,6 @@
 extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
 extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
 
-extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
-    ddt_phys_t *ddp_willref);
-extern int ddt_ditto_copies_present(ddt_entry_t *dde);
-
 extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
 extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
 
@@ -246,7 +246,7 @@
 extern void ddt_sync(spa_t *spa, uint64_t txg);
 extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
 extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
-    enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
+    enum ddt_class clazz, ddt_entry_t *dde, dmu_tx_t *tx);
 
 extern const ddt_ops_t ddt_zap_ops;
 

diff --git a/zfs/include/sys/dmu.h b/zfs/include/sys/dmu.h
index 56f20f3..12bd887 100644
--- a/zfs/include/sys/dmu.h
+++ b/zfs/include/sys/dmu.h

@@ -20,13 +20,14 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright 2014 HybridCluster. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -49,6 +50,7 @@
 #include <sys/zio_compress.h>
 #include <sys/zio_priority.h>
 #include <sys/uio.h>
+#include <sys/zfs_file.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -135,21 +137,24 @@
 #endif
 
 #define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
-	((ot) & DMU_OT_METADATA) : \
+	(((ot) & DMU_OT_METADATA) != 0) : \
 	DMU_OT_IS_METADATA_IMPL(ot))
 
 #define	DMU_OT_IS_DDT(ot) \
 	((ot) == DMU_OT_DDT_ZAP)
 
-#define	DMU_OT_IS_ZIL(ot) \
-	((ot) == DMU_OT_INTENT_LOG)
+#define	DMU_OT_IS_CRITICAL(ot) \
+	(DMU_OT_IS_METADATA(ot) && \
+	(ot) != DMU_OT_DNODE && \
+	(ot) != DMU_OT_DIRECTORY_CONTENTS && \
+	(ot) != DMU_OT_SA)
 
 /* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */
 #define	DMU_OT_IS_FILE(ot) \
 	((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)
 
 #define	DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
-	((ot) & DMU_OT_ENCRYPTED) : \
+	(((ot) & DMU_OT_ENCRYPTED) != 0) : \
 	DMU_OT_IS_ENCRYPTED_IMPL(ot))
 
 /*
@@ -336,13 +341,11 @@
 int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
     struct nvlist *errlist);
 int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
-int dmu_objset_snapshot_tmp(const char *, const char *, int);
-int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
+int dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
     int flags);
 void dmu_objset_byteswap(void *buf, size_t size);
 int dsl_dataset_rename_snapshot(const char *fsname,
     const char *oldsnapname, const char *newsnapname, boolean_t recursive);
-int dmu_objset_remap_indirects(const char *fsname);
 
 typedef struct dmu_buf {
 	uint64_t db_object;		/* object that this buffer is part of */
@@ -383,6 +386,8 @@
 #define	DMU_POOL_OBSOLETE_BPOBJ		"com.delphix:obsolete_bpobj"
 #define	DMU_POOL_CONDENSING_INDIRECT	"com.delphix:condensing_indirect"
 #define	DMU_POOL_ZPOOL_CHECKPOINT	"com.delphix:zpool_checkpoint"
+#define	DMU_POOL_LOG_SPACEMAP_ZAP	"com.delphix:log_spacemap_zap"
+#define	DMU_POOL_DELETED_CLONES		"com.delphix:deleted_clones"
 
 /*
  * Allocate an object from this objset.  The range of object numbers
@@ -498,12 +503,11 @@
 void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
     dmu_tx_t *tx);
 
-
-int dmu_object_remap_indirects(objset_t *os, uint64_t object, uint64_t txg);
-
 void dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
     void *data, uint8_t etype, uint8_t comp, int uncompressed_size,
     int compressed_size, int byteorder, dmu_tx_t *tx);
+void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+    dmu_tx_t *tx);
 
 /*
  * Decide how to write a block: checksum, compression, number of copies, etc.
@@ -564,7 +568,9 @@
     void *tag, dmu_buf_t **, int flags);
 int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
     void *tag, dmu_buf_t **dbp, int flags);
-
+int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
+    uint64_t length, boolean_t read, void *tag, int *numbufsp,
+    dmu_buf_t ***dbpp, uint32_t flags);
 /*
  * Add a reference to a dmu buffer that has already been held via
  * dmu_buf_hold() in the current context.
@@ -666,7 +672,8 @@
 /*ARGSUSED*/
 static inline void
 dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
-    dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp)
+    dmu_buf_evict_func_t *evict_func_async,
+    dmu_buf_t **clear_on_evict_dbufp __maybe_unused)
 {
 	ASSERT(dbu->dbu_evict_func_sync == NULL);
 	ASSERT(dbu->dbu_evict_func_async == NULL);
@@ -771,11 +778,13 @@
 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
 void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
     int len);
+void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+    int len);
 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
     uint64_t len);
 void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
     uint64_t len);
-void dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
 void dmu_tx_hold_zap_by_dnode(dmu_tx_t *tx, dnode_t *dn, int add,
     const char *name);
@@ -845,15 +854,14 @@
 void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	dmu_tx_t *tx);
 #ifdef _KERNEL
-#include <linux/blkdev_compat.h>
-int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
-int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
-int dmu_read_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size);
-int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
+int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size);
+int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size);
+int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size);
+int dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
 	dmu_tx_t *tx);
-int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
+int dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
 	dmu_tx_t *tx);
-int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size,
+int dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
 	dmu_tx_t *tx);
 #endif
 struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
@@ -863,20 +871,6 @@
 int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
     struct arc_buf *buf, dmu_tx_t *tx);
 #define	dmu_assign_arcbuf	dmu_assign_arcbuf_by_dbuf
-void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
-    dmu_buf_t *handle, dmu_tx_t *tx);
-#ifdef HAVE_UIO_ZEROCOPY
-int dmu_xuio_init(struct xuio *uio, int niov);
-void dmu_xuio_fini(struct xuio *uio);
-int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
-    size_t n);
-int dmu_xuio_cnt(struct xuio *uio);
-struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
-void dmu_xuio_clear(struct xuio *uio, int i);
-#endif /* HAVE_UIO_ZEROCOPY */
-void xuio_stat_wbuf_copied(void);
-void xuio_stat_wbuf_nocopy(void);
-
 extern int zfs_prefetch_disable;
 extern int zfs_max_recordsize;
 
@@ -937,7 +931,7 @@
 void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
 /*
  * Like dmu_object_info_from_db, but faster still when you only care about
- * the size.  This is specifically optimized for zfs_getattr().
+ * the size.
  */
 void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
     u_longlong_t *nblk512);
@@ -951,6 +945,7 @@
 	dmu_objset_type_t dds_type;
 	uint8_t dds_is_snapshot;
 	uint8_t dds_inconsistent;
+	uint8_t dds_redacted;
 	char dds_origin[ZFS_MAX_DATASET_NAME_LEN];
 } dmu_objset_stats_t;
 
@@ -1004,18 +999,26 @@
 extern uint64_t dmu_objset_dnodesize(objset_t *os);
 extern zfs_sync_type_t dmu_objset_syncprop(objset_t *os);
 extern zfs_logbias_op_t dmu_objset_logbias(objset_t *os);
+extern int dmu_objset_blksize(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
 extern int dmu_snapshot_lookup(objset_t *os, const char *name, uint64_t *val);
-extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
+extern int dmu_snapshot_realname(objset_t *os, const char *name, char *real,
     int maxlen, boolean_t *conflict);
 extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp);
 
-typedef int objset_used_cb_t(dmu_object_type_t bonustype,
-    void *bonus, uint64_t *userp, uint64_t *groupp, uint64_t *projectp);
+typedef struct zfs_file_info {
+	uint64_t zfi_user;
+	uint64_t zfi_group;
+	uint64_t zfi_project;
+	uint64_t zfi_generation;
+} zfs_file_info_t;
+
+typedef int file_info_cb_t(dmu_object_type_t bonustype, const void *data,
+    struct zfs_file_info *zoi);
 extern void dmu_objset_register_type(dmu_objset_type_t ost,
-    objset_used_cb_t *cb);
+    file_info_cb_t *cb);
 extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
 extern void *dmu_objset_get_user(objset_t *os);
 
@@ -1068,12 +1071,14 @@
     dmu_traverse_cb_t cb, void *arg);
 
 int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
-    struct vnode *vp, offset_t *offp);
+    zfs_file_t *fp, offset_t *offp);
 
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 extern uint64_t zfs_crc64_table[256];
 
+extern int dmu_prefetch_max;
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/dmu_impl.h b/zfs/include/sys/dmu_impl.h
index 9dec661..def4aad 100644
--- a/zfs/include/sys/dmu_impl.h
+++ b/zfs/include/sys/dmu_impl.h

@@ -24,7 +24,7 @@
  */
 /*
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_DMU_IMPL_H
@@ -164,6 +164,7 @@
  * 	dn_dirty_txg
  * 	dd_assigned_tx
  * 	dn_notxholds
+ *	dn_nodnholds
  * 	dn_dirtyctx
  * 	dn_dirtyctx_firstset
  * 	(dn_phys copy fields?)
@@ -236,47 +237,13 @@
 struct objset;
 struct dmu_pool;
 
-typedef struct dmu_xuio {
-	int next;
-	int cnt;
-	struct arc_buf **bufs;
-	iovec_t *iovp;
-} dmu_xuio_t;
-
-/*
- * The list of data whose inclusion in a send stream can be pending from
- * one call to backup_cb to another.  Multiple calls to dump_free() and
- * dump_freeobjects() can be aggregated into a single DRR_FREE or
- * DRR_FREEOBJECTS replay record.
- */
-typedef enum {
-	PENDING_NONE,
-	PENDING_FREE,
-	PENDING_FREEOBJECTS
-} dmu_pendop_t;
-
-typedef struct dmu_sendarg {
-	list_node_t dsa_link;
-	dmu_replay_record_t *dsa_drr;
-	vnode_t *dsa_vp;
-	int dsa_outfd;
-	proc_t *dsa_proc;
-	offset_t *dsa_off;
-	objset_t *dsa_os;
-	zio_cksum_t dsa_zc;
-	uint64_t dsa_toguid;
-	uint64_t dsa_fromtxg;
-	int dsa_err;
-	dmu_pendop_t dsa_pending_op;
-	uint64_t dsa_featureflags;
-	uint64_t dsa_last_data_object;
-	uint64_t dsa_last_data_offset;
-	uint64_t dsa_resume_object;
-	uint64_t dsa_resume_offset;
-	boolean_t dsa_sent_begin;
-	boolean_t dsa_sent_end;
-	boolean_t block_diff;
-} dmu_sendarg_t;
+typedef struct dmu_sendstatus {
+	list_node_t dss_link;
+	int dss_outfd;
+	proc_t *dss_proc;
+	offset_t *dss_off;
+	uint64_t dss_blocks; /* blocks visited during the sending process */
+} dmu_sendstatus_t;
 
 void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
 void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);

diff --git a/zfs/include/sys/dmu_objset.h b/zfs/include/sys/dmu_objset.h
index c0650bc..fffcbcf 100644
--- a/zfs/include/sys/dmu_objset.h
+++ b/zfs/include/sys/dmu_objset.h

@@ -72,6 +72,10 @@
  */
 #define	OBJSET_CRYPT_PORTABLE_FLAGS_MASK	(0)
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
+#endif
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
@@ -88,6 +92,9 @@
 	char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 -
 	    sizeof (dnode_phys_t)];
 } objset_phys_t;
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
 
 typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);
 
@@ -118,6 +125,7 @@
 	uint64_t os_dnodesize; /* default dnode size for new objects */
 	enum zio_checksum os_checksum;
 	enum zio_compress os_compress;
+	uint8_t os_complevel;
 	uint8_t os_copies;
 	enum zio_checksum os_dedup_checksum;
 	boolean_t os_dedup_verify;
@@ -126,7 +134,7 @@
 	zfs_cache_type_t os_secondary_cache;
 	zfs_sync_type_t os_sync;
 	zfs_redundant_metadata_type_t os_redundant_metadata;
-	int os_recordsize;
+	uint64_t os_recordsize;
 	/*
 	 * The next four values are used as a cache of whatever's on disk, and
 	 * are initialized the first time these properties are queried. Before
@@ -152,7 +160,7 @@
 	/* no lock needed: */
 	struct dmu_tx *os_synctx; /* XXX sketchy */
 	zil_header_t os_zil_header;
-	multilist_t *os_synced_dnodes;
+	multilist_t os_synced_dnodes;
 	uint64_t os_flags;
 	uint64_t os_freed_dnodes;
 	boolean_t os_rescan_dnodes;
@@ -171,7 +179,7 @@
 
 	/* Protected by os_lock */
 	kmutex_t os_lock;
-	multilist_t *os_dirty_dnodes[TXG_SIZE];
+	multilist_t os_dirty_dnodes[TXG_SIZE];
 	list_t os_dnodes;
 	list_t os_downgraded_dbufs;
 
@@ -199,10 +207,6 @@
 #define	DMU_GROUPUSED_DNODE(os)	((os)->os_groupused_dnode.dnh_dnode)
 #define	DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)
 
-#define	DMU_OS_IS_L2CACHEABLE(os)				\
-	((os)->os_secondary_cache == ZFS_CACHE_ALL ||		\
-	(os)->os_secondary_cache == ZFS_CACHE_METADATA)
-
 /* called from zpl */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
@@ -241,10 +245,10 @@
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
     objset_t **osp);
 void dmu_objset_evict(objset_t *os);
-void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
+void dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx);
 void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
 boolean_t dmu_objset_userused_enabled(objset_t *os);
-int dmu_objset_userspace_upgrade(objset_t *os);
+void dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);
 boolean_t dmu_objset_userobjused_enabled(objset_t *os);
 boolean_t dmu_objset_userobjspace_upgradable(objset_t *os);
@@ -254,6 +258,8 @@
 boolean_t dmu_objset_projectquota_present(objset_t *os);
 boolean_t dmu_objset_projectquota_upgradable(objset_t *os);
 void dmu_objset_id_quota_upgrade(objset_t *os);
+int dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype,
+    const void *data, zfs_file_info_t *zfi);
 
 int dmu_fsname(const char *snapname, char *buf);
 

diff --git a/zfs/include/sys/dmu_recv.h b/zfs/include/sys/dmu_recv.h
index ffa8924..7188b2a 100644
--- a/zfs/include/sys/dmu_recv.h
+++ b/zfs/include/sys/dmu_recv.h

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  */
@@ -33,6 +33,7 @@
 #include <sys/dsl_bookmark.h>
 #include <sys/dsl_dataset.h>
 #include <sys/spa.h>
+#include <sys/objlist.h>
 
 extern const char *recv_clone_name;
 
@@ -44,28 +45,43 @@
 	const char *drc_tosnap;
 	boolean_t drc_newfs;
 	boolean_t drc_byteswap;
+	uint64_t drc_featureflags;
 	boolean_t drc_force;
 	boolean_t drc_resumable;
+	boolean_t drc_should_save;
 	boolean_t drc_raw;
 	boolean_t drc_clone;
 	boolean_t drc_spill;
-	struct avl_tree *drc_guid_to_ds_map;
 	nvlist_t *drc_keynvl;
-	zio_cksum_t drc_cksum;
 	uint64_t drc_fromsnapobj;
-	uint64_t drc_newsnapobj;
 	uint64_t drc_ivset_guid;
 	void *drc_owner;
 	cred_t *drc_cred;
+	proc_t *drc_proc;
+	nvlist_t *drc_begin_nvl;
+
+	objset_t *drc_os;
+	zfs_file_t *drc_fp; /* The file to read the stream from */
+	uint64_t drc_voff; /* The current offset in the stream */
+	uint64_t drc_bytes_read;
+	/*
+	 * A record that has had its payload read in, but hasn't yet been handed
+	 * off to the worker thread.
+	 */
+	struct receive_record_arg *drc_rrd;
+	/* A record that has had its header read in, but not its payload. */
+	struct receive_record_arg *drc_next_rrd;
+	zio_cksum_t drc_cksum;
+	zio_cksum_t drc_prev_cksum;
+	/* Sorted list of objects not to issue prefetches for. */
+	objlist_t *drc_ignore_objlist;
 } dmu_recv_cookie_t;
 
-int dmu_recv_begin(char *tofs, char *tosnap,
-    struct dmu_replay_record *drr_begin, boolean_t force, boolean_t resumable,
-    nvlist_t *localprops, nvlist_t *hidden_args, char *origin,
-    dmu_recv_cookie_t *drc);
-int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
-    int cleanup_fd, uint64_t *action_handlep);
-int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);
-boolean_t dmu_objset_is_receiving(objset_t *os);
+int dmu_recv_begin(char *, char *, dmu_replay_record_t *,
+    boolean_t, boolean_t, nvlist_t *, nvlist_t *, char *,
+    dmu_recv_cookie_t *, zfs_file_t *, offset_t *);
+int dmu_recv_stream(dmu_recv_cookie_t *, offset_t *);
+int dmu_recv_end(dmu_recv_cookie_t *, void *);
+boolean_t dmu_objset_is_receiving(objset_t *);
 
 #endif /* _DMU_RECV_H */

diff --git a/zfs/include/sys/dmu_redact.h b/zfs/include/sys/dmu_redact.h
new file mode 100644
index 0000000..85f4b05
--- /dev/null
+++ b/zfs/include/sys/dmu_redact.h

@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+#ifndef _DMU_REDACT_H_
+#define	_DMU_REDACT_H_
+
+#include <sys/spa.h>
+#include <sys/dsl_bookmark.h>
+
+#define	REDACT_BLOCK_MAX_COUNT (1ULL << 48)
+
+static inline uint64_t
+redact_block_get_size(redact_block_phys_t *rbp)
+{
+	return (BF64_GET_SB((rbp)->rbp_size_count, 48, 16, SPA_MINBLOCKSHIFT,
+	    0));
+}
+
+static inline void
+redact_block_set_size(redact_block_phys_t *rbp, uint64_t size)
+{
+	/* cppcheck-suppress syntaxError */
+	BF64_SET_SB((rbp)->rbp_size_count, 48, 16, SPA_MINBLOCKSHIFT, 0, size);
+}
+
+static inline uint64_t
+redact_block_get_count(redact_block_phys_t *rbp)
+{
+	return (BF64_GET_SB((rbp)->rbp_size_count, 0, 48, 0, 1));
+}
+
+static inline void
+redact_block_set_count(redact_block_phys_t *rbp, uint64_t count)
+{
+	/* cppcheck-suppress syntaxError */
+	BF64_SET_SB((rbp)->rbp_size_count, 0, 48, 0, 1, count);
+}
+
+int dmu_redact_snap(const char *, nvlist_t *, const char *);
+#endif /* _DMU_REDACT_H_ */

diff --git a/zfs/include/sys/dmu_send.h b/zfs/include/sys/dmu_send.h
index ff7fb37..2f98e24 100644
--- a/zfs/include/sys/dmu_send.h
+++ b/zfs/include/sys/dmu_send.h

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  */
@@ -31,24 +31,42 @@
 
 #include <sys/inttypes.h>
 #include <sys/dsl_crypt.h>
+#include <sys/dsl_bookmark.h>
 #include <sys/spa.h>
+#include <sys/objlist.h>
+#include <sys/dmu_redact.h>
+
+#define	BEGINNV_REDACT_SNAPS		"redact_snaps"
+#define	BEGINNV_REDACT_FROM_SNAPS	"redact_from_snaps"
+#define	BEGINNV_RESUME_OBJECT		"resume_object"
+#define	BEGINNV_RESUME_OFFSET		"resume_offset"
 
 struct vnode;
 struct dsl_dataset;
 struct drr_begin;
 struct avl_tree;
 struct dmu_replay_record;
-
-int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
-    boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
-    uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
-int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
-    boolean_t stream_compressed, uint64_t *sizep);
-int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
-    boolean_t stream_compressed, uint64_t *sizep);
+struct dmu_send_outparams;
+int
+dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+    boolean_t large_block_ok, boolean_t compressok, boolean_t rawok,
+    boolean_t savedok, boolean_t blockdiff, uint64_t resumeobj, uint64_t resumeoff,
+    const char *redactbook, int outfd, offset_t *off,
+    struct dmu_send_outparams *dsop);
+int dmu_send_estimate_fast(struct dsl_dataset *ds, struct dsl_dataset *fromds,
+    zfs_bookmark_phys_t *frombook, boolean_t stream_compressed,
+    boolean_t saved, uint64_t *sizep);
 int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
     boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
-    boolean_t rawok, int outfd, struct vnode *vp, offset_t *off,
-    boolean_t block_diff);
+    boolean_t rawok, boolean_t savedok, int outfd, offset_t *off,
+    struct dmu_send_outparams *dso);
+
+typedef int (*dmu_send_outfunc_t)(objset_t *os, void *buf, int len, void *arg);
+typedef struct dmu_send_outparams {
+	dmu_send_outfunc_t	dso_outfunc;
+	void			*dso_arg;
+	boolean_t		dso_dryrun;
+	int		block_diff;
+} dmu_send_outparams_t;
 
 #endif /* _DMU_SEND_H */

diff --git a/zfs/include/sys/dmu_traverse.h b/zfs/include/sys/dmu_traverse.h
index 8ceef5c..d76bfe3 100644
--- a/zfs/include/sys/dmu_traverse.h
+++ b/zfs/include/sys/dmu_traverse.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DMU_TRAVERSE_H
@@ -71,6 +71,20 @@
 int traverse_pool(spa_t *spa,
     uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
 
+/*
+ * Note that this calculation cannot overflow with the current maximum indirect
+ * block size (128k).  If that maximum is increased to 1M, however, this
+ * calculation can overflow, and handling would need to be added to ensure
+ * continued correctness.
+ */
+static inline uint64_t
+bp_span_in_blocks(uint8_t indblkshift, uint64_t level)
+{
+	unsigned int shift = level * (indblkshift - SPA_BLKPTRSHIFT);
+	ASSERT3U(shift, <, 64);
+	return (1ULL << shift);
+}
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/dmu_tx.h b/zfs/include/sys/dmu_tx.h
index 36d205e..e8886fd 100644
--- a/zfs/include/sys/dmu_tx.h
+++ b/zfs/include/sys/dmu_tx.h

@@ -32,7 +32,7 @@
 #include <sys/inttypes.h>
 #include <sys/dmu.h>
 #include <sys/txg.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -90,6 +90,7 @@
 	THT_ZAP,
 	THT_SPACE,
 	THT_SPILL,
+	THT_APPEND,
 	THT_NUMTYPES
 };
 
@@ -125,6 +126,7 @@
 	kstat_named_t dmu_tx_dirty_delay;
 	kstat_named_t dmu_tx_dirty_over_max;
 	kstat_named_t dmu_tx_dirty_frees_delay;
+	kstat_named_t dmu_tx_wrlog_delay;
 	kstat_named_t dmu_tx_quota;
 } dmu_tx_stats_t;
 

diff --git a/zfs/include/sys/dmu_zfetch.h b/zfs/include/sys/dmu_zfetch.h
index 8125d07..cd1b79e 100644
--- a/zfs/include/sys/dmu_zfetch.h
+++ b/zfs/include/sys/dmu_zfetch.h

@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  */
 
 #ifndef	_DMU_ZFETCH_H
@@ -40,33 +40,45 @@
 
 struct dnode;				/* so we can reference dnode */
 
-typedef struct zstream {
-	uint64_t	zs_blkid;	/* expect next access at this blkid */
-	uint64_t	zs_pf_blkid;	/* next block to prefetch */
-
-	/*
-	 * We will next prefetch the L1 indirect block of this level-0
-	 * block id.
-	 */
-	uint64_t	zs_ipf_blkid;
-
-	kmutex_t	zs_lock;	/* protects stream */
-	hrtime_t	zs_atime;	/* time last prefetch issued */
-	list_node_t	zs_node;	/* link for zf_stream */
-} zstream_t;
-
 typedef struct zfetch {
-	krwlock_t	zf_rwlock;	/* protects zfetch structure */
+	kmutex_t	zf_lock;	/* protects zfetch structure */
 	list_t		zf_stream;	/* list of zstream_t's */
 	struct dnode	*zf_dnode;	/* dnode that owns this zfetch */
+	int		zf_numstreams;	/* number of zstream_t's */
 } zfetch_t;
 
+typedef struct zstream {
+	uint64_t	zs_blkid;	/* expect next access at this blkid */
+	unsigned int	zs_pf_dist;	/* data prefetch distance in bytes */
+	unsigned int	zs_ipf_dist;	/* L1 prefetch distance in bytes */
+	uint64_t	zs_pf_start;	/* first data block to prefetch */
+	uint64_t	zs_pf_end;	/* data block to prefetch up to */
+	uint64_t	zs_ipf_start;	/* first data block to prefetch L1 */
+	uint64_t	zs_ipf_end;	/* data block to prefetch L1 up to */
+
+	list_node_t	zs_node;	/* link for zf_stream */
+	hrtime_t	zs_atime;	/* time last prefetch issued */
+	zfetch_t	*zs_fetch;	/* parent fetch */
+	boolean_t	zs_missed;	/* stream saw cache misses */
+	boolean_t	zs_more;	/* need more distant prefetch */
+	zfs_refcount_t	zs_callers;	/* number of pending callers */
+	/*
+	 * Number of stream references: dnode, callers and pending blocks.
+	 * The stream memory is freed when the number returns to zero.
+	 */
+	zfs_refcount_t	zs_refs;
+} zstream_t;
+
 void		zfetch_init(void);
 void		zfetch_fini(void);
 
 void		dmu_zfetch_init(zfetch_t *, struct dnode *);
 void		dmu_zfetch_fini(zfetch_t *);
-void		dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t);
+zstream_t	*dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
+    boolean_t);
+void		dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
+void		dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
+    boolean_t);
 
 
 #ifdef	__cplusplus

diff --git a/zfs/include/sys/dnode.h b/zfs/include/sys/dnode.h
index e97e403..39bbdae 100644
--- a/zfs/include/sys/dnode.h
+++ b/zfs/include/sys/dnode.h

@@ -32,10 +32,11 @@
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/zio.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/zrlock.h>
 #include <sys/multilist.h>
+#include <sys/wmsum.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -119,7 +120,11 @@
 #define	DN_MAX_LEVELS	(DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
 	DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)
 
-#define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
+/*
+ * Use the flexible array instead of the fixed length one dn_bonus
+ * to address memcpy/memmove fortify error
+ */
+#define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus_flexible + \
 	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
 #define	DN_MAX_BONUS_LEN(dnp) \
 	((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
@@ -171,7 +176,7 @@
  * example, reading 32 dnodes from a 16k dnode block and all of the spill
  * blocks could issue 33 separate reads. Now suppose those dnodes have size
  * 1024 and therefore don't need spill blocks. Then the worst case number
- * of blocks read is reduced to from 33 to two--one per dnode block.
+ * of blocks read is reduced from 33 to two--one per dnode block.
  *
  * ZFS-on-Linux systems that make heavy use of extended attributes benefit
  * from this feature. In particular, ZFS-on-Linux supports the xattr=sa
@@ -232,8 +237,8 @@
 	 * Both dn_pad2 and dn_pad3 are protected by the block's MAC. This
 	 * allows us to protect any fields that might be added here in the
 	 * future. In either case, developers will want to check
-	 * zio_crypt_init_uios_dnode() to ensure the new field is being
-	 * protected properly.
+	 * zio_crypt_init_uios_dnode() and zio_crypt_do_dnode_hmac_updates()
+	 * to ensure the new field is being protected and updated properly.
 	 */
 	uint64_t dn_pad3[4];
 
@@ -265,6 +270,10 @@
 			    sizeof (blkptr_t)];
 			blkptr_t dn_spill;
 		};
+		struct {
+			blkptr_t __dn_ignore4;
+			uint8_t dn_bonus_flexible[];
+		};
 	};
 } dnode_phys_t;
 
@@ -332,8 +341,9 @@
 	uint64_t dn_assigned_txg;
 	uint64_t dn_dirty_txg;			/* txg dnode was last dirtied */
 	kcondvar_t dn_notxholds;
+	kcondvar_t dn_nodnholds;
 	enum dnode_dirtycontext dn_dirtyctx;
-	uint8_t *dn_dirtyctx_firstset;		/* dbg: contents meaningless */
+	void *dn_dirtyctx_firstset;		/* dbg: contents meaningless */
 
 	/* protected by own devices */
 	zfs_refcount_t dn_tx_holds;
@@ -373,6 +383,13 @@
 };
 
 /*
+ * Since AVL already has embedded element counter, use dn_dbufs_count
+ * only for dbufs not counted there (bonus buffers) and just add them.
+ */
+#define	DN_DBUFS_COUNT(dn)	((dn)->dn_dbufs_count + \
+    avl_numnodes(&(dn)->dn_dbufs))
+
+/*
  * We use this (otherwise unused) bit to indicate if the value of
  * dn_next_maxblkid[txgoff] is valid to use in dnode_sync().
  */
@@ -417,7 +434,9 @@
 void dnode_rele(dnode_t *dn, void *ref);
 void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
 int dnode_try_claim(objset_t *os, uint64_t object, int slots);
+boolean_t dnode_is_dirty(dnode_t *dn);
 void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
+void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag);
 void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
 void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
     dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
@@ -442,7 +461,6 @@
 void dnode_evict_dbufs(dnode_t *dn);
 void dnode_evict_bonus(dnode_t *dn);
 void dnode_free_interior_slots(dnode_t *dn);
-boolean_t dnode_needs_remap(const dnode_t *dn);
 
 #define	DNODE_IS_DIRTY(_dn)						\
 	((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))
@@ -578,10 +596,42 @@
 	kstat_named_t dnode_move_active;
 } dnode_stats_t;
 
+typedef struct dnode_sums {
+	wmsum_t dnode_hold_dbuf_hold;
+	wmsum_t dnode_hold_dbuf_read;
+	wmsum_t dnode_hold_alloc_hits;
+	wmsum_t dnode_hold_alloc_misses;
+	wmsum_t dnode_hold_alloc_interior;
+	wmsum_t dnode_hold_alloc_lock_retry;
+	wmsum_t dnode_hold_alloc_lock_misses;
+	wmsum_t dnode_hold_alloc_type_none;
+	wmsum_t dnode_hold_free_hits;
+	wmsum_t dnode_hold_free_misses;
+	wmsum_t dnode_hold_free_lock_misses;
+	wmsum_t dnode_hold_free_lock_retry;
+	wmsum_t dnode_hold_free_refcount;
+	wmsum_t dnode_hold_free_overflow;
+	wmsum_t dnode_free_interior_lock_retry;
+	wmsum_t dnode_allocate;
+	wmsum_t dnode_reallocate;
+	wmsum_t dnode_buf_evict;
+	wmsum_t dnode_alloc_next_chunk;
+	wmsum_t dnode_alloc_race;
+	wmsum_t dnode_alloc_next_block;
+	wmsum_t dnode_move_invalid;
+	wmsum_t dnode_move_recheck1;
+	wmsum_t dnode_move_recheck2;
+	wmsum_t dnode_move_special;
+	wmsum_t dnode_move_handle;
+	wmsum_t dnode_move_rwlock;
+	wmsum_t dnode_move_active;
+} dnode_sums_t;
+
 extern dnode_stats_t dnode_stats;
+extern dnode_sums_t dnode_sums;
 
 #define	DNODE_STAT_INCR(stat, val) \
-    atomic_add_64(&dnode_stats.stat.value.ui64, (val));
+    wmsum_add(&dnode_sums.stat, (val))
 #define	DNODE_STAT_BUMP(stat) \
     DNODE_STAT_INCR(stat, 1);
 
@@ -592,7 +642,7 @@
 	char __db_buf[32]; \
 	uint64_t __db_obj = (dn)->dn_object; \
 	if (__db_obj == DMU_META_DNODE_OBJECT) \
-		(void) strcpy(__db_buf, "mdn"); \
+		(void) strlcpy(__db_buf, "mdn", sizeof (__db_buf));	\
 	else \
 		(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
 		    (u_longlong_t)__db_obj);\
@@ -607,7 +657,7 @@
 #else
 
 #define	dprintf_dnode(db, fmt, ...)
-#define	DNODE_VERIFY(dn)
+#define	DNODE_VERIFY(dn)		((void) sizeof ((uintptr_t)(dn)))
 #define	FREE_VERIFY(db, start, end, tx)
 
 #endif

diff --git a/zfs/include/sys/dsl_bookmark.h b/zfs/include/sys/dsl_bookmark.h
index ea7d70c..70f4813 100644
--- a/zfs/include/sys/dsl_bookmark.h
+++ b/zfs/include/sys/dsl_bookmark.h

@@ -13,22 +13,21 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_BOOKMARK_H
 #define	_SYS_DSL_BOOKMARK_H
 
 #include <sys/zfs_context.h>
+#include <sys/zfs_refcount.h>
 #include <sys/dsl_dataset.h>
+#include <sys/dsl_pool.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
-struct dsl_pool;
-struct dsl_dataset;
-
 /*
  * On disk zap object.
  */
@@ -57,12 +56,99 @@
 #define	BOOKMARK_PHYS_SIZE_V1	(3 * sizeof (uint64_t))
 #define	BOOKMARK_PHYS_SIZE_V2	(12 * sizeof (uint64_t))
 
+typedef enum zbm_flags {
+	ZBM_FLAG_HAS_FBN = (1 << 0),
+	ZBM_FLAG_SNAPSHOT_EXISTS = (1 << 1),
+} zbm_flags_t;
+
+typedef struct redaction_list_phys {
+	uint64_t rlp_last_object;
+	uint64_t rlp_last_blkid;
+	uint64_t rlp_num_entries;
+	uint64_t rlp_num_snaps;
+	uint64_t rlp_snaps[]; /* variable length */
+} redaction_list_phys_t;
+
+typedef struct redaction_list {
+	dmu_buf_user_t		rl_dbu;
+	redaction_list_phys_t	*rl_phys;
+	dmu_buf_t		*rl_dbuf;
+	uint64_t		rl_object;
+	zfs_refcount_t		rl_longholds;
+	objset_t		*rl_mos;
+} redaction_list_t;
+
+/* node in ds_bookmarks */
+typedef struct dsl_bookmark_node {
+	char *dbn_name; /* free with strfree() */
+	kmutex_t dbn_lock; /* protects dirty/phys in block_killed */
+	boolean_t dbn_dirty; /* in currently syncing txg */
+	zfs_bookmark_phys_t dbn_phys;
+	avl_node_t dbn_node;
+} dsl_bookmark_node_t;
+
+typedef struct redact_block_phys {
+	uint64_t	rbp_object;
+	uint64_t	rbp_blkid;
+	/*
+	 * The top 16 bits of this field represent the block size in sectors of
+	 * the blocks in question; the bottom 48 bits are used to store the
+	 * number of consecutive blocks that are in the redaction list.  They
+	 * should be accessed using the inline functions below.
+	 */
+	uint64_t	rbp_size_count;
+	uint64_t	rbp_padding;
+} redact_block_phys_t;
+
+typedef int (*rl_traverse_callback_t)(redact_block_phys_t *, void *);
+
+
+typedef struct dsl_bookmark_create_arg {
+	nvlist_t *dbca_bmarks;
+	nvlist_t *dbca_errors;
+} dsl_bookmark_create_arg_t;
+
+typedef struct dsl_bookmark_create_redacted_arg {
+	const char	*dbcra_bmark;
+	const char	*dbcra_snap;
+	redaction_list_t **dbcra_rl;
+	uint64_t	dbcra_numsnaps;
+	uint64_t	*dbcra_snaps;
+	void		*dbcra_tag;
+} dsl_bookmark_create_redacted_arg_t;
+
 int dsl_bookmark_create(nvlist_t *, nvlist_t *);
+int dsl_bookmark_create_nvl_validate(nvlist_t *);
+int dsl_bookmark_create_check(void *arg, dmu_tx_t *tx);
+void dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx);
+int dsl_bookmark_create_redacted(const char *, const char *, uint64_t,
+    uint64_t *, void *, redaction_list_t **);
 int dsl_get_bookmarks(const char *, nvlist_t *, nvlist_t *);
 int dsl_get_bookmarks_impl(dsl_dataset_t *, nvlist_t *, nvlist_t *);
+int dsl_get_bookmark_props(const char *, const char *, nvlist_t *);
 int dsl_bookmark_destroy(nvlist_t *, nvlist_t *);
 int dsl_bookmark_lookup(struct dsl_pool *, const char *,
     struct dsl_dataset *, zfs_bookmark_phys_t *);
+int dsl_bookmark_lookup_impl(dsl_dataset_t *, const char *,
+    zfs_bookmark_phys_t *);
+int dsl_redaction_list_hold_obj(struct dsl_pool *, uint64_t, void *,
+    redaction_list_t **);
+void dsl_redaction_list_rele(redaction_list_t *, void *);
+void dsl_redaction_list_long_hold(struct dsl_pool *, redaction_list_t *,
+    void *);
+void dsl_redaction_list_long_rele(redaction_list_t *, void *);
+boolean_t dsl_redaction_list_long_held(redaction_list_t *);
+int dsl_bookmark_init_ds(dsl_dataset_t *);
+void dsl_bookmark_fini_ds(dsl_dataset_t *);
+boolean_t dsl_bookmark_ds_destroyed(dsl_dataset_t *, dmu_tx_t *);
+void dsl_bookmark_snapshotted(dsl_dataset_t *, dmu_tx_t *);
+void dsl_bookmark_block_killed(dsl_dataset_t *, const blkptr_t *, dmu_tx_t *);
+void dsl_bookmark_sync_done(dsl_dataset_t *, dmu_tx_t *);
+void dsl_bookmark_node_add(dsl_dataset_t *, dsl_bookmark_node_t *, dmu_tx_t *);
+uint64_t dsl_bookmark_latest_txg(dsl_dataset_t *);
+int dsl_redaction_list_traverse(redaction_list_t *, zbookmark_phys_t *,
+    rl_traverse_callback_t, void *);
+void dsl_bookmark_next_changed(dsl_dataset_t *, dsl_dataset_t *, dmu_tx_t *);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/dsl_crypt.h b/zfs/include/sys/dsl_crypt.h
index 0f73ea6..835720c 100644
--- a/zfs/include/sys/dsl_crypt.h
+++ b/zfs/include/sys/dsl_crypt.h

@@ -189,7 +189,7 @@
 int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
     dsl_crypto_key_t **dck_out);
 
-int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds,
+int dsl_crypto_populate_key_nvlist(struct objset *os,
     uint64_t from_ivset_guid, nvlist_t **nvl_out);
 int dsl_crypto_recv_raw_key_check(struct dsl_dataset *ds,
     nvlist_t *nvl, dmu_tx_t *tx);

diff --git a/zfs/include/sys/dsl_dataset.h b/zfs/include/sys/dsl_dataset.h
index c464c70..ed934f9 100644
--- a/zfs/include/sys/dsl_dataset.h
+++ b/zfs/include/sys/dsl_dataset.h

@@ -36,7 +36,7 @@
 #include <sys/dsl_synctask.h>
 #include <sys/zfs_context.h>
 #include <sys/dsl_deadlist.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/rrwlock.h>
 #include <sys/dsl_crypt.h>
 #include <zfeature_common.h>
@@ -45,11 +45,13 @@
 extern "C" {
 #endif
 
+extern int zfs_allow_redacted_dataset_mount;
 struct dsl_dataset;
 struct dsl_dir;
 struct dsl_pool;
 struct dsl_crypto_params;
 struct dsl_key_mapping;
+struct zfs_bookmark_phys;
 
 #define	DS_FLAG_INCONSISTENT	(1ULL<<0)
 #define	DS_IS_INCONSISTENT(ds)	\
@@ -115,6 +117,13 @@
 #define	DS_FIELD_REMAP_DEADLIST	"com.delphix:remap_deadlist"
 
 /*
+ * We were receiving an incremental from a redaction bookmark, and these are the
+ * guids of its snapshots.
+ */
+#define	DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS \
+	"com.delphix:resume_redact_book_snaps"
+
+/*
  * This field is set to the ivset guid for encrypted snapshots. This is used
  * for validating raw receives.
  */
@@ -176,7 +185,8 @@
 
 	/* only used in syncing context, only valid for non-snapshots: */
 	struct dsl_dataset *ds_prev;
-	uint64_t ds_bookmarks;  /* DMU_OTN_ZAP_METADATA */
+	uint64_t ds_bookmarks_obj;  /* DMU_OTN_ZAP_METADATA */
+	avl_tree_t ds_bookmarks; /* dsl_bookmark_node_t */
 
 	/* has internal locking: */
 	dsl_deadlist_t ds_deadlist;
@@ -263,7 +273,7 @@
 static inline dsl_dataset_phys_t *
 dsl_dataset_phys(dsl_dataset_t *ds)
 {
-	return (ds->ds_dbuf->db_data);
+	return ((dsl_dataset_phys_t *)ds->ds_dbuf->db_data);
 }
 
 typedef struct dsl_dataset_promote_arg {
@@ -274,6 +284,7 @@
 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
 	nvlist_t *err_ds;
 	cred_t *cr;
+	proc_t *proc;
 } dsl_dataset_promote_arg_t;
 
 typedef struct dsl_dataset_rollback_arg {
@@ -288,6 +299,7 @@
 	nvlist_t *ddsa_props;
 	nvlist_t *ddsa_errors;
 	cred_t *ddsa_cr;
+	proc_t *ddsa_proc;
 } dsl_dataset_snapshot_arg_t;
 
 /*
@@ -304,6 +316,7 @@
 
 /* flags for holding the dataset */
 typedef enum ds_hold_flags {
+	DS_HOLD_FLAG_NONE	= 0 << 0,
 	DS_HOLD_FLAG_DECRYPT	= 1 << 0 /* needs access to encrypted data */
 } ds_hold_flags_t;
 
@@ -314,23 +327,27 @@
 boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
     void *tag);
 int dsl_dataset_create_key_mapping(dsl_dataset_t *ds);
-int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
-    dsl_dataset_t **);
 int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
     ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
 void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds);
-void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
+    void *tag, dsl_dataset_t **);
 void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
     void *tag);
+void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
 int dsl_dataset_own(struct dsl_pool *dp, const char *name,
     ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
+int dsl_dataset_own_force(struct dsl_pool *dp, const char *name,
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
     ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
+int dsl_dataset_own_obj_force(struct dsl_pool *dp, uint64_t dsobj,
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
 void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag, boolean_t override);
 int dsl_dataset_namelen(dsl_dataset_t *ds);
 boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
-boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *,
     struct dsl_crypto_params *, dmu_tx_t *);
@@ -387,9 +404,11 @@
 uint64_t dsl_get_referenced(dsl_dataset_t *ds);
 uint64_t dsl_get_numclones(dsl_dataset_t *ds);
 uint64_t dsl_get_inconsistent(dsl_dataset_t *ds);
+uint64_t dsl_get_redacted(dsl_dataset_t *ds);
 uint64_t dsl_get_available(dsl_dataset_t *ds);
 int dsl_get_written(dsl_dataset_t *ds, uint64_t *written);
 int dsl_get_prev_snap(dsl_dataset_t *ds, char *snap);
+void dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval);
 int dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
     char *source);
 
@@ -401,8 +420,10 @@
     uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp);
 uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
-int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
+int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *newds,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
+int dsl_dataset_space_written_bookmark(struct zfs_bookmark_phys *bmp,
+    dsl_dataset_t *newds, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 
@@ -415,6 +436,8 @@
     uint64_t quota);
 int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
     uint64_t reservation);
+int dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
+    uint64_t compression);
 
 boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
     uint64_t earlier_txg);
@@ -427,7 +450,7 @@
 void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
     dsl_dataset_t *origin_head, dmu_tx_t *tx);
 int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
-    dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr);
+    dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr, proc_t *proc);
 void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
     dmu_tx_t *tx);
 
@@ -463,6 +486,9 @@
 boolean_t dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds,
     spa_feature_t f, uint64_t *outlength, uint64_t **outp);
 
+void dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
+    uint64_t num_redact_snaps, dmu_tx_t *tx);
+
 #ifdef ZFS_DEBUG
 #define	dprintf_ds(ds, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \

diff --git a/zfs/include/sys/dsl_deadlist.h b/zfs/include/sys/dsl_deadlist.h
index 08f3823..64358bb 100644
--- a/zfs/include/sys/dsl_deadlist.h
+++ b/zfs/include/sys/dsl_deadlist.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DEADLIST_H
@@ -28,12 +28,14 @@
 
 #include <sys/bpobj.h>
 #include <sys/zfs_context.h>
+#include <sys/zthr.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct dmu_buf;
+struct dsl_pool;
 struct dsl_dataset;
 
 typedef struct dsl_deadlist_phys {
@@ -46,8 +48,10 @@
 typedef struct dsl_deadlist {
 	objset_t *dl_os;
 	uint64_t dl_object;
-	avl_tree_t dl_tree;
+	avl_tree_t dl_tree; /* contains dsl_deadlist_entry_t */
+	avl_tree_t dl_cache; /* contains dsl_deadlist_cache_entry_t */
 	boolean_t dl_havetree;
+	boolean_t dl_havecache;
 	struct dmu_buf *dl_dbuf;
 	dsl_deadlist_phys_t *dl_phys;
 	kmutex_t dl_lock;
@@ -57,19 +61,49 @@
 	boolean_t dl_oldfmt;
 } dsl_deadlist_t;
 
+typedef struct dsl_deadlist_cache_entry {
+	avl_node_t dlce_node;
+	uint64_t dlce_mintxg;
+	uint64_t dlce_bpobj;
+	uint64_t dlce_bytes;
+	uint64_t dlce_comp;
+	uint64_t dlce_uncomp;
+} dsl_deadlist_cache_entry_t;
+
 typedef struct dsl_deadlist_entry {
 	avl_node_t dle_node;
 	uint64_t dle_mintxg;
 	bpobj_t dle_bpobj;
 } dsl_deadlist_entry_t;
 
+typedef struct livelist_condense_entry {
+	struct dsl_dataset *ds;
+	dsl_deadlist_entry_t *first;
+	dsl_deadlist_entry_t *next;
+	boolean_t syncing;
+	boolean_t cancelled;
+} livelist_condense_entry_t;
+
+extern unsigned long zfs_livelist_max_entries;
+extern int zfs_livelist_min_percent_shared;
+
+typedef int deadlist_iter_t(void *args, dsl_deadlist_entry_t *dle);
+
 void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object);
 void dsl_deadlist_close(dsl_deadlist_t *dl);
+void dsl_deadlist_iterate(dsl_deadlist_t *dl, deadlist_iter_t func, void *arg);
 uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx);
 void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx);
-void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx);
+void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp,
+    boolean_t free, dmu_tx_t *tx);
+int dsl_deadlist_insert_alloc_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
+int dsl_deadlist_insert_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
 void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
 void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
+void dsl_deadlist_remove_entry(dsl_deadlist_t *dl, uint64_t mintxg,
+dmu_tx_t *tx);
+dsl_deadlist_entry_t *dsl_deadlist_first(dsl_deadlist_t *dl);
+dsl_deadlist_entry_t *dsl_deadlist_last(dsl_deadlist_t *dl);
 uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
     uint64_t mrs_obj, dmu_tx_t *tx);
 void dsl_deadlist_space(dsl_deadlist_t *dl,
@@ -81,6 +115,11 @@
 void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
     dmu_tx_t *tx);
 boolean_t dsl_deadlist_is_open(dsl_deadlist_t *dl);
+int dsl_process_sub_livelist(bpobj_t *bpobj, struct bplist *to_free,
+    zthr_t *t, uint64_t *size);
+void dsl_deadlist_clear_entry(dsl_deadlist_entry_t *dle, dsl_deadlist_t *dl,
+    dmu_tx_t *tx);
+void dsl_deadlist_discard_tree(dsl_deadlist_t *dl);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/dsl_deleg.h b/zfs/include/sys/dsl_deleg.h
index bb28014..7f46233 100644
--- a/zfs/include/sys/dsl_deleg.h
+++ b/zfs/include/sys/dsl_deleg.h

@@ -61,7 +61,6 @@
 #define	ZFS_DELEG_PERM_RELEASE		"release"
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 #define	ZFS_DELEG_PERM_BOOKMARK		"bookmark"
-#define	ZFS_DELEG_PERM_REMAP		"remap"
 #define	ZFS_DELEG_PERM_LOAD_KEY		"load-key"
 #define	ZFS_DELEG_PERM_CHANGE_KEY	"change-key"
 #define	ZFS_DELEG_PERM_PROJECTUSED	"projectused"

diff --git a/zfs/include/sys/dsl_destroy.h b/zfs/include/sys/dsl_destroy.h
index ae3ca0c..208d75b 100644
--- a/zfs/include/sys/dsl_destroy.h
+++ b/zfs/include/sys/dsl_destroy.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
@@ -33,6 +33,7 @@
 
 struct nvlist;
 struct dsl_dataset;
+struct dsl_pool;
 struct dmu_tx;
 
 int dsl_destroy_snapshots_nvl(struct nvlist *, boolean_t,
@@ -45,6 +46,7 @@
 int dsl_destroy_snapshot_check_impl(struct dsl_dataset *, boolean_t);
 void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *,
     boolean_t, struct dmu_tx *);
+void dsl_dir_remove_clones_key(dsl_dir_t *, uint64_t, dmu_tx_t *);
 
 typedef struct dsl_destroy_snapshot_arg {
 	const char *ddsa_name;

diff --git a/zfs/include/sys/dsl_dir.h b/zfs/include/sys/dsl_dir.h
index 067bcfb..d635b31 100644
--- a/zfs/include/sys/dsl_dir.h
+++ b/zfs/include/sys/dsl_dir.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
@@ -29,18 +29,20 @@
 #define	_SYS_DSL_DIR_H
 
 #include <sys/dmu.h>
+#include <sys/dsl_deadlist.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/zfs_context.h>
 #include <sys/dsl_crypt.h>
+#include <sys/bplist.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct dsl_dataset;
-
+struct zthr;
 /*
  * DD_FIELD_* are strings that are used in the "extensified" dsl_dir zap object.
  * They should be of the format <reverse-dns>:<field>.
@@ -49,7 +51,7 @@
 #define	DD_FIELD_FILESYSTEM_COUNT	"com.joyent:filesystem_count"
 #define	DD_FIELD_SNAPSHOT_COUNT		"com.joyent:snapshot_count"
 #define	DD_FIELD_CRYPTO_KEY_OBJ		"com.datto:crypto_key_obj"
-#define	DD_FIELD_LAST_REMAP_TXG		"com.delphix:last_remap_txg"
+#define	DD_FIELD_LIVELIST		"com.delphix:livelist"
 
 typedef enum dd_used {
 	DD_USED_HEAD,
@@ -115,6 +117,15 @@
 	/* amount of space we expect to write; == amount of dirty data */
 	int64_t dd_space_towrite[TXG_SIZE];
 
+	dsl_deadlist_t dd_livelist;
+	bplist_t dd_pending_frees;
+	bplist_t dd_pending_allocs;
+
+	kmutex_t dd_activity_lock;
+	kcondvar_t dd_activity_cv;
+	boolean_t dd_activity_cancelled;
+	uint64_t dd_activity_waiters;
+
 	/* protected by dd_lock; keep at end of struct for better locality */
 	char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
 };
@@ -154,7 +165,6 @@
 uint64_t dsl_dir_space_available(dsl_dir_t *dd,
     dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
 void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
-int dsl_dir_get_remaptxg(dsl_dir_t *dd, uint64_t *count);
 void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
 int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
     uint64_t asize, boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx);
@@ -164,18 +174,20 @@
     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
 void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
+void dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
+    int64_t compressed, int64_t uncompressed, int64_t tonew,
+    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
 int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
     uint64_t quota);
 int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
     uint64_t reservation);
 int dsl_dir_activate_fs_ss_limit(const char *);
 int dsl_fs_ss_limit_check(dsl_dir_t *, uint64_t, zfs_prop_t, dsl_dir_t *,
-    cred_t *);
+    cred_t *, proc_t *);
 void dsl_fs_ss_count_adjust(dsl_dir_t *, int64_t, const char *, dmu_tx_t *);
-int dsl_dir_update_last_remap_txg(dsl_dir_t *, uint64_t);
 int dsl_dir_rename(const char *oldname, const char *newname);
 int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
-    uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *);
+    uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *, proc_t *);
 boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
 void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
     uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
@@ -185,6 +197,12 @@
     dmu_tx_t *tx);
 void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx);
 boolean_t dsl_dir_is_zapified(dsl_dir_t *dd);
+void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj);
+void dsl_dir_livelist_close(dsl_dir_t *dd);
+void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total);
+int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
+    boolean_t *waited);
+void dsl_dir_cancel_waiters(dsl_dir_t *dd);
 
 /* internal reserved dir name */
 #define	MOS_DIR_NAME "$MOS"

diff --git a/zfs/include/sys/dsl_pool.h b/zfs/include/sys/dsl_pool.h
index 63ba350..e93bd05 100644
--- a/zfs/include/sys/dsl_pool.h
+++ b/zfs/include/sys/dsl_pool.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -40,6 +40,7 @@
 #include <sys/rrwlock.h>
 #include <sys/dsl_synctask.h>
 #include <sys/mmp.h>
+#include <sys/aggsum.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -54,9 +55,11 @@
 struct dmu_tx;
 struct dsl_scan;
 struct dsl_crypto_params;
+struct dsl_deadlist;
 
 extern unsigned long zfs_dirty_data_max;
 extern unsigned long zfs_dirty_data_max_max;
+extern unsigned long zfs_wrlog_data_max;
 extern int zfs_dirty_data_sync_percent;
 extern int zfs_dirty_data_max_percent;
 extern int zfs_dirty_data_max_max_percent;
@@ -81,7 +84,6 @@
 
 typedef struct zfs_all_blkstats {
 	zfs_blkstat_t	zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
-	kmutex_t	zab_lock;
 } zfs_all_blkstats_t;
 
 
@@ -95,7 +97,7 @@
 	struct dsl_dir *dp_leak_dir;
 	struct dsl_dataset *dp_origin_snap;
 	uint64_t dp_root_dir_obj;
-	struct taskq *dp_iput_taskq;
+	struct taskq *dp_zrele_taskq;
 	struct taskq *dp_unlinked_drain_taskq;
 
 	/* No lock needed - sync context only */
@@ -118,6 +120,9 @@
 	uint64_t dp_mos_compressed_delta;
 	uint64_t dp_mos_uncompressed_delta;
 
+	aggsum_t dp_wrlog_pertxg[TXG_SIZE];
+	aggsum_t dp_wrlog_total;
+
 	/*
 	 * Time of most recently scheduled (furthest in the future)
 	 * wakeup for delayed transactions.
@@ -157,6 +162,9 @@
 uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
 uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
     zfs_space_check_t slop_policy);
+uint64_t dsl_pool_deferred_space(dsl_pool_t *dp);
+void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg);
+boolean_t dsl_pool_need_wrlog_delay(dsl_pool_t *dp);
 void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
 void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
 void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
@@ -176,7 +184,7 @@
 boolean_t dsl_pool_config_held(dsl_pool_t *dp);
 boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
 
-taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
+taskq_t *dsl_pool_zrele_taskq(dsl_pool_t *dp);
 taskq_t *dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp);
 
 int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,

diff --git a/zfs/include/sys/dsl_prop.h b/zfs/include/sys/dsl_prop.h
index 62ef0ba..fba8f90 100644
--- a/zfs/include/sys/dsl_prop.h
+++ b/zfs/include/sys/dsl_prop.h

@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef	_SYS_DSL_PROP_H
@@ -61,6 +62,12 @@
 	zprop_source_t pa_source;
 } dsl_props_arg_t;
 
+typedef struct dsl_props_set_arg {
+	const char *dpsa_dsname;
+	zprop_source_t dpsa_source;
+	nvlist_t *dpsa_props;
+} dsl_props_set_arg_t;
+
 void dsl_prop_init(dsl_dir_t *dd);
 void dsl_prop_fini(dsl_dir_t *dd);
 int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
@@ -85,6 +92,8 @@
     int intsz, int numints, void *buf, char *setpoint,
     boolean_t snapshot);
 
+int dsl_props_set_check(void *arg, dmu_tx_t *tx);
+void dsl_props_set_sync(void *arg, dmu_tx_t *tx);
 void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source,
     nvlist_t *props, dmu_tx_t *tx);
 void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname,

diff --git a/zfs/include/sys/dsl_scan.h b/zfs/include/sys/dsl_scan.h
index 032f7f3..d716510 100644
--- a/zfs/include/sys/dsl_scan.h
+++ b/zfs/include/sys/dsl_scan.h

@@ -42,6 +42,8 @@
 struct dsl_pool;
 struct dmu_tx;
 
+extern int zfs_scan_suspend_progress;
+
 /*
  * All members of this structure must be uint64_t, for byteswap
  * purposes.
@@ -138,6 +140,7 @@
 
 	/* per txg statistics */
 	uint64_t scn_visited_this_txg;	/* total bps visited this txg */
+	uint64_t scn_dedup_frees_this_txg;	/* dedup bps freed this txg */
 	uint64_t scn_holes_this_txg;
 	uint64_t scn_lt_min_this_txg;
 	uint64_t scn_gt_max_this_txg;
@@ -152,7 +155,7 @@
 	dsl_scan_phys_t scn_phys;	/* on disk representation of scan */
 	dsl_scan_phys_t scn_phys_cached;
 	avl_tree_t scn_queue;		/* queue of datasets to scan */
-	uint64_t scn_bytes_pending;	/* outstanding data to issue */
+	uint64_t scn_queues_pending;	/* outstanding data to issue */
 } dsl_scan_t;
 
 typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
@@ -160,6 +163,8 @@
 void scan_init(void);
 void scan_fini(void);
 int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
+int dsl_scan_setup_check(void *, dmu_tx_t *);
+void dsl_scan_setup_sync(void *, dmu_tx_t *);
 void dsl_scan_fini(struct dsl_pool *dp);
 void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
 int dsl_scan_cancel(struct dsl_pool *);

diff --git a/zfs/include/sys/dsl_synctask.h b/zfs/include/sys/dsl_synctask.h
index 957963f..5a5b306 100644
--- a/zfs/include/sys/dsl_synctask.h
+++ b/zfs/include/sys/dsl_synctask.h

@@ -41,10 +41,11 @@
 
 typedef enum zfs_space_check {
 	/*
-	 * Normal space check: if there is less than 3.2% free space,
-	 * the operation will fail.  Operations which are logically
-	 * creating things should use this (e.g. "zfs create", "zfs snapshot").
-	 * User writes (via the ZPL / ZVOL) also fail at this point.
+	 * Normal space check: if there is less than 3.2% free space (bounded
+	 * by spa_max_slop), the operation will fail.  Operations which are
+	 * logically creating things should use this (e.g. "zfs create", "zfs
+	 * snapshot").  User writes (via the ZPL / ZVOL) also fail at this
+	 * point.
 	 */
 	ZFS_SPACE_CHECK_NORMAL,
 
@@ -112,11 +113,11 @@
 int dsl_sync_task(const char *, dsl_checkfunc_t *,
     dsl_syncfunc_t *, void *, int, zfs_space_check_t);
 void dsl_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *,
-    void *, int, zfs_space_check_t, dmu_tx_t *);
+    void *, dmu_tx_t *);
 int dsl_early_sync_task(const char *, dsl_checkfunc_t *,
     dsl_syncfunc_t *, void *, int, zfs_space_check_t);
 void dsl_early_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *,
-    void *, int, zfs_space_check_t, dmu_tx_t *);
+    void *, dmu_tx_t *);
 int dsl_sync_task_sig(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *,
     dsl_sigfunc_t *, void *, int, zfs_space_check_t);
 

diff --git a/zfs/include/sys/fm/Makefile.am b/zfs/include/sys/fm/Makefile.am
index 8bca5d8..7c6c3d4 100644
--- a/zfs/include/sys/fm/Makefile.am
+++ b/zfs/include/sys/fm/Makefile.am

@@ -1,21 +1,17 @@
 SUBDIRS = fs
 
 COMMON_H = \
-	$(top_srcdir)/include/sys/fm/protocol.h \
-	$(top_srcdir)/include/sys/fm/util.h
-
-KERNEL_H =
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	protocol.h \
+	util.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys/fm
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/fm
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+kernel_HEADERS = $(COMMON_H)
+endif
 endif

diff --git a/zfs/include/sys/fm/fs/Makefile.am b/zfs/include/sys/fm/fs/Makefile.am
index fdc9eb5..a662753 100644
--- a/zfs/include/sys/fm/fs/Makefile.am
+++ b/zfs/include/sys/fm/fs/Makefile.am

@@ -1,18 +1,14 @@
 COMMON_H = \
-	$(top_srcdir)/include/sys/fm/fs/zfs.h
-
-KERNEL_H =
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	zfs.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys/fm/fs
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/fm/fs
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+kernel_HEADERS = $(COMMON_H)
+endif
 endif

diff --git a/zfs/include/sys/fm/fs/zfs.h b/zfs/include/sys/fm/fs/zfs.h
index 9bfb123..cd080c8 100644
--- a/zfs/include/sys/fm/fs/zfs.h
+++ b/zfs/include/sys/fm/fs/zfs.h

@@ -23,6 +23,10 @@
  * Use is subject to license terms.
  */
 
+/*
+ *  Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
 #ifndef	_SYS_FM_FS_ZFS_H
 #define	_SYS_FM_FS_ZFS_H
 
@@ -88,6 +92,7 @@
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE		"zio_size"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS	"zio_flags"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE	"zio_stage"
+#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY	"zio_priority"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE	"zio_pipeline"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY	"zio_delay"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP	"zio_timestamp"
@@ -105,6 +110,10 @@
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS	"bad_cleared_bits"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
+#define	FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME	"snapshot_name"
+#define	FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME	"device_name"
+#define	FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME	"raw_name"
+#define	FM_EREPORT_PAYLOAD_ZFS_VOLUME	"volume"
 
 #define	FM_EREPORT_FAILMODE_WAIT		"wait"
 #define	FM_EREPORT_FAILMODE_CONTINUE		"continue"
@@ -114,6 +123,11 @@
 #define	FM_RESOURCE_AUTOREPLACE			"autoreplace"
 #define	FM_RESOURCE_STATECHANGE			"statechange"
 
+#define	FM_RESOURCE_ZFS_SNAPSHOT_MOUNT		"snapshot_mount"
+#define	FM_RESOURCE_ZFS_SNAPSHOT_UNMOUNT		"snapshot_unmount"
+#define	FM_RESOURCE_ZVOL_CREATE_SYMLINK		"zvol_create"
+#define	FM_RESOURCE_ZVOL_REMOVE_SYMLINK		"zvol_remove"
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/fm/util.h b/zfs/include/sys/fm/util.h
index ff54b05..5fb6d1d 100644
--- a/zfs/include/sys/fm/util.h
+++ b/zfs/include/sys/fm/util.h

@@ -31,6 +31,7 @@
 #endif
 
 #include <sys/nvpair.h>
+#include <sys/zfs_file.h>
 
 /*
  * Shared user/kernel definitions for class length, error channel name,
@@ -92,18 +93,20 @@
 
 extern void fm_init(void);
 extern void fm_fini(void);
-extern void fm_nvprint(nvlist_t *);
 extern void zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector);
 extern int zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
 extern void zfs_zevent_drain_all(int *);
-extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
-extern void zfs_zevent_fd_rele(int);
+extern zfs_file_t *zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
+extern void zfs_zevent_fd_rele(zfs_file_t *);
 extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *);
 extern int zfs_zevent_wait(zfs_zevent_t *);
 extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t);
 extern void zfs_zevent_init(zfs_zevent_t **);
 extern void zfs_zevent_destroy(zfs_zevent_t *);
 
+extern void zfs_zevent_track_duplicate(void);
+extern void zfs_ereport_init(void);
+extern void zfs_ereport_fini(void);
 #else
 
 static inline void fm_init(void) { }

diff --git a/zfs/include/sys/fs/Makefile.am b/zfs/include/sys/fs/Makefile.am
index 0859b9f..6a93053 100644
--- a/zfs/include/sys/fs/Makefile.am
+++ b/zfs/include/sys/fs/Makefile.am

@@ -1,18 +1,14 @@
 COMMON_H = \
-	$(top_srcdir)/include/sys/fs/zfs.h
-
-KERNEL_H =
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	zfs.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys/fs
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/fs
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+kernel_HEADERS = $(COMMON_H)
+endif
 endif

diff --git a/zfs/include/sys/fs/zfs.h b/zfs/include/sys/fs/zfs.h
index 6b78072..84f5aee 100644
--- a/zfs/include/sys/fs/zfs.h
+++ b/zfs/include/sys/fs/zfs.h

@@ -21,16 +21,17 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019 Datto Inc.
+ * Portions Copyright 2010 Robert Milkowski
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
  */
 
-/* Portions Copyright 2010 Robert Milkowski */
-
 #ifndef	_SYS_FS_ZFS_H
 #define	_SYS_FS_ZFS_H
 
@@ -115,7 +116,7 @@
 	ZFS_PROP_READONLY,
 	ZFS_PROP_ZONED,
 	ZFS_PROP_SNAPDIR,
-	ZFS_PROP_PRIVATE,		/* not exposed to user, temporary */
+	ZFS_PROP_ACLMODE,
 	ZFS_PROP_ACLINHERIT,
 	ZFS_PROP_CREATETXG,
 	ZFS_PROP_NAME,			/* not exposed to the user */
@@ -181,9 +182,11 @@
 	ZFS_PROP_ENCRYPTION_ROOT,
 	ZFS_PROP_KEY_GUID,
 	ZFS_PROP_KEYSTATUS,
-	ZFS_PROP_REMAPTXG,		/* not exposed to the user */
+	ZFS_PROP_REMAPTXG,		/* obsolete - no longer used */
 	ZFS_PROP_SPECIAL_SMALL_BLOCKS,
 	ZFS_PROP_IVSET_GUID,		/* not exposed to the user */
+	ZFS_PROP_REDACTED,
+	ZFS_PROP_REDACT_SNAPS,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
@@ -208,8 +211,7 @@
 /*
  * Pool properties are identified by these constants and must be added to the
  * end of this list to ensure that external consumers are not affected
- * by the change. If you make any changes to this list, be sure to update
- * the property table in module/zcommon/zpool_prop.c.
+ * by the change.  Properties must be registered in zfs_prop_init().
  */
 typedef enum {
 	ZPOOL_PROP_INVAL = -1,
@@ -245,10 +247,11 @@
 	ZPOOL_PROP_CHECKPOINT,
 	ZPOOL_PROP_LOAD_GUID,
 	ZPOOL_PROP_AUTOTRIM,
+	ZPOOL_PROP_COMPATIBILITY,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 
-/* Small enough to not hog a whole line of printout in zpool(1M). */
+/* Small enough to not hog a whole line of printout in zpool(8). */
 #define	ZPROP_MAX_COMMENT	32
 
 #define	ZPROP_VALUE		"value"
@@ -421,7 +424,9 @@
 
 typedef enum {
 	ZFS_REDUNDANT_METADATA_ALL,
-	ZFS_REDUNDANT_METADATA_MOST
+	ZFS_REDUNDANT_METADATA_MOST,
+	ZFS_REDUNDANT_METADATA_SOME,
+	ZFS_REDUNDANT_METADATA_NONE
 } zfs_redundant_metadata_type_t;
 
 typedef enum {
@@ -572,6 +577,11 @@
 #define	ZPL_VERSION_USERSPACE		ZPL_VERSION_4
 #define	ZPL_VERSION_SA			ZPL_VERSION_5
 
+/* Persistent L2ARC version */
+#define	L2ARC_PERSISTENT_VERSION_1	1ULL
+#define	L2ARC_PERSISTENT_VERSION	L2ARC_PERSISTENT_VERSION_1
+#define	L2ARC_PERSISTENT_VERSION_STRING	"1"
+
 /* Rewind policy information */
 #define	ZPOOL_NO_REWIND		1  /* No policy - default behavior */
 #define	ZPOOL_NEVER_REWIND	2  /* Do not search for best txg or rewind */
@@ -590,8 +600,8 @@
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
- * configuration.  New on-disk names should be prefixed with "<reverse-DNS>:"
- * (e.g. "org.open-zfs:") to avoid conflicting names being developed
+ * configuration.  New on-disk names should be prefixed with "<reversed-DNS>:"
+ * (e.g. "org.openzfs:") to avoid conflicting names being developed
  * independently.
  */
 #define	ZPOOL_CONFIG_VERSION		"version"
@@ -611,6 +621,7 @@
 #define	ZPOOL_CONFIG_PREV_INDIRECT_VDEV	"com.delphix:prev_indirect_vdev"
 #define	ZPOOL_CONFIG_PATH		"path"
 #define	ZPOOL_CONFIG_DEVID		"devid"
+#define	ZPOOL_CONFIG_SPARE_ID		"spareid"
 #define	ZPOOL_CONFIG_METASLAB_ARRAY	"metaslab_array"
 #define	ZPOOL_CONFIG_METASLAB_SHIFT	"metaslab_shift"
 #define	ZPOOL_CONFIG_ASHIFT		"ashift"
@@ -698,6 +709,7 @@
 #define	ZPOOL_CONFIG_SPLIT_LIST		"guid_list"
 #define	ZPOOL_CONFIG_REMOVING		"removing"
 #define	ZPOOL_CONFIG_RESILVER_TXG	"resilver_txg"
+#define	ZPOOL_CONFIG_REBUILD_TXG	"rebuild_txg"
 #define	ZPOOL_CONFIG_COMMENT		"comment"
 #define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
 #define	ZPOOL_CONFIG_SUSPENDED_REASON	"suspended_reason"	/* not stored */
@@ -724,6 +736,8 @@
 #define	ZPOOL_CONFIG_MMP_HOSTID		"mmp_hostid"	/* not stored on disk */
 #define	ZPOOL_CONFIG_ALLOCATION_BIAS	"alloc_bias"	/* not stored on disk */
 #define	ZPOOL_CONFIG_EXPANSION_TIME	"expansion_time"	/* not stored */
+#define	ZPOOL_CONFIG_REBUILD_STATS	"org.openzfs:rebuild_stats"
+#define	ZPOOL_CONFIG_COMPATIBILITY	"compatibility"
 
 /*
  * The persistent vdev state is stored as separate values rather than a single
@@ -746,13 +760,21 @@
 
 /* Rewind data discovered */
 #define	ZPOOL_CONFIG_LOAD_TIME		"rewind_txg_ts"
+#define	ZPOOL_CONFIG_LOAD_META_ERRORS	"verify_meta_errors"
 #define	ZPOOL_CONFIG_LOAD_DATA_ERRORS	"verify_data_errors"
 #define	ZPOOL_CONFIG_REWIND_TIME	"seconds_of_rewind"
 
+/* dRAID configuration */
+#define	ZPOOL_CONFIG_DRAID_NDATA	"draid_ndata"
+#define	ZPOOL_CONFIG_DRAID_NSPARES	"draid_nspares"
+#define	ZPOOL_CONFIG_DRAID_NGROUPS	"draid_ngroups"
+
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
 #define	VDEV_TYPE_REPLACING		"replacing"
 #define	VDEV_TYPE_RAIDZ			"raidz"
+#define	VDEV_TYPE_DRAID			"draid"
+#define	VDEV_TYPE_DRAID_SPARE		"dspare"
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
@@ -762,6 +784,12 @@
 #define	VDEV_TYPE_L2CACHE		"l2cache"
 #define	VDEV_TYPE_INDIRECT		"indirect"
 
+#define	VDEV_RAIDZ_MAXPARITY		3
+
+#define	VDEV_DRAID_MAXPARITY		3
+#define	VDEV_DRAID_MIN_CHILDREN		2
+#define	VDEV_DRAID_MAX_CHILDREN		UINT8_MAX
+
 /* VDEV_TOP_ZAP_* are used in top-level vdev ZAP objects. */
 #define	VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM \
 	"com.delphix:indirect_obsolete_sm"
@@ -769,6 +797,11 @@
 	"com.delphix:obsolete_counts_are_precise"
 #define	VDEV_TOP_ZAP_POOL_CHECKPOINT_SM \
 	"com.delphix:pool_checkpoint_sm"
+#define	VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS \
+	"com.delphix:ms_unflushed_phys_txgs"
+
+#define	VDEV_TOP_ZAP_VDEV_REBUILD_PHYS \
+	"org.openzfs:vdev_rebuild"
 
 #define	VDEV_TOP_ZAP_ALLOCATION_BIAS \
 	"org.zfsonlinux:allocation_bias"
@@ -816,7 +849,20 @@
  * The location of the pool configuration repository, shared between kernel and
  * userland.
  */
+#define	ZPOOL_CACHE_BOOT	"/boot/zfs/zpool.cache"
 #define	ZPOOL_CACHE		"/etc/zfs/zpool.cache"
+/*
+ * Settings for zpool compatibility features files
+ */
+#define	ZPOOL_SYSCONF_COMPAT_D	SYSCONFDIR "/zfs/compatibility.d"
+#define	ZPOOL_DATA_COMPAT_D	PKGDATADIR "/compatibility.d"
+#define	ZPOOL_COMPAT_MAXSIZE	16384
+
+/*
+ * Hard-wired compatibility settings
+ */
+#define	ZPOOL_COMPAT_LEGACY	"legacy"
+#define	ZPOOL_COMPAT_OFF	"off"
 
 /*
  * vdev states are ordered from least to most healthy.
@@ -860,6 +906,7 @@
 	VDEV_AUX_EXTERNAL_PERSIST,	/* persistent forced fault	*/
 	VDEV_AUX_ACTIVE,	/* vdev active on a different host	*/
 	VDEV_AUX_CHILDREN_OFFLINE, /* all children are offline		*/
+	VDEV_AUX_ASHIFT_TOO_BIG, /* vdev's min block size is too large   */
 } vdev_aux_t;
 
 /*
@@ -983,11 +1030,26 @@
 	DSS_NUM_STATES
 } dsl_scan_state_t;
 
+typedef struct vdev_rebuild_stat {
+	uint64_t vrs_state;		/* vdev_rebuild_state_t */
+	uint64_t vrs_start_time;	/* time_t */
+	uint64_t vrs_end_time;		/* time_t */
+	uint64_t vrs_scan_time_ms;	/* total run time (millisecs) */
+	uint64_t vrs_bytes_scanned;	/* allocated bytes scanned */
+	uint64_t vrs_bytes_issued;	/* read bytes issued */
+	uint64_t vrs_bytes_rebuilt;	/* rebuilt bytes */
+	uint64_t vrs_bytes_est;		/* total bytes to scan */
+	uint64_t vrs_errors;		/* scanning errors */
+	uint64_t vrs_pass_time_ms;	/* pass run time (millisecs) */
+	uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
+	uint64_t vrs_pass_bytes_issued;	/* bytes rebuilt since start/resume */
+} vdev_rebuild_stat_t;
+
 /*
- * Errata described by http://zfsonlinux.org/msg/ZFS-8000-ER.  The ordering
- * of this enum must be maintained to ensure the errata identifiers map to
- * the correct documentation.  New errata may only be appended to the list
- * and must contain corresponding documentation at the above link.
+ * Errata described by https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-ER.
+ * The ordering of this enum must be maintained to ensure the errata identifiers
+ * map to the correct documentation.  New errata may only be appended to the
+ * list and must contain corresponding documentation at the above link.
  */
 typedef enum zpool_errata {
 	ZPOOL_ERRATA_NONE,
@@ -1039,8 +1101,19 @@
 	uint64_t	vs_trim_bytes_est;	/* total bytes to trim */
 	uint64_t	vs_trim_state;		/* vdev_trim_state_t */
 	uint64_t	vs_trim_action_time;	/* time_t */
+	uint64_t	vs_rebuild_processed;	/* bytes rebuilt */
+	uint64_t	vs_configured_ashift;   /* TLV vdev_ashift */
+	uint64_t	vs_logical_ashift;	/* vdev_logical_ashift  */
+	uint64_t	vs_physical_ashift;	/* vdev_physical_ashift */
+	uint64_t	vs_pspace;		/* physical capacity */
 } vdev_stat_t;
 
+/* BEGIN CSTYLED */
+#define	VDEV_STAT_VALID(field, uint64_t_field_count) \
+    ((uint64_t_field_count * sizeof (uint64_t)) >=	 \
+     (offsetof(vdev_stat_t, field) + sizeof (((vdev_stat_t *)NULL)->field)))
+/* END CSTYLED */
+
 /*
  * Extended stats
  *
@@ -1100,6 +1173,7 @@
 	POOL_INITIALIZE_START,
 	POOL_INITIALIZE_CANCEL,
 	POOL_INITIALIZE_SUSPEND,
+	POOL_INITIALIZE_UNINIT,
 	POOL_INITIALIZE_FUNCS
 } pool_initialize_func_t;
 
@@ -1141,12 +1215,11 @@
 #define	ZVOL_DRIVER	"zvol"
 #define	ZFS_DRIVER	"zfs"
 #define	ZFS_DEV		"/dev/zfs"
-#define	ZFS_SHARETAB	"/etc/dfs/sharetab"
 
 #define	ZFS_SUPER_MAGIC	0x2fc12fc1
 
 /* general zvol path */
-#define	ZVOL_DIR	"/dev"
+#define	ZVOL_DIR		"/dev/zvol/"
 
 #define	ZVOL_MAJOR		230
 #define	ZVOL_MINOR_BITS		4
@@ -1173,6 +1246,13 @@
 	VDEV_TRIM_COMPLETE,
 } vdev_trim_state_t;
 
+typedef enum {
+	VDEV_REBUILD_NONE,
+	VDEV_REBUILD_ACTIVE,
+	VDEV_REBUILD_CANCELED,
+	VDEV_REBUILD_COMPLETE,
+} vdev_rebuild_state_t;
+
 /*
  * nvlist name constants. Facilitate restricting snapshot iteration range for
  * the "list next snapshot" ioctl
@@ -1187,9 +1267,13 @@
  */
 typedef enum zfs_ioc {
 	/*
-	 * illumos - 81/128 numbers reserved.
+	 * Core features - 81/128 numbers reserved.
 	 */
+#ifdef __FreeBSD__
+	ZFS_IOC_FIRST =	0,
+#else
 	ZFS_IOC_FIRST =	('Z' << 8),
+#endif
 	ZFS_IOC = ZFS_IOC_FIRST,
 	ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST,	/* 0x5a00 */
 	ZFS_IOC_POOL_DESTROY,			/* 0x5a01 */
@@ -1272,20 +1356,23 @@
 	ZFS_IOC_POOL_DISCARD_CHECKPOINT,	/* 0x5a4e */
 	ZFS_IOC_POOL_INITIALIZE,		/* 0x5a4f */
 	ZFS_IOC_POOL_TRIM,			/* 0x5a50 */
+	ZFS_IOC_REDACT,				/* 0x5a51 */
+	ZFS_IOC_GET_BOOKMARK_PROPS,		/* 0x5a52 */
+	ZFS_IOC_WAIT,				/* 0x5a53 */
+	ZFS_IOC_WAIT_FS,			/* 0x5a54 */
 
 	/*
-	 * Linux - 3/64 numbers reserved.
+	 * Per-platform (Optional) - 8/128 numbers reserved.
 	 */
-	ZFS_IOC_LINUX = ('Z' << 8) + 0x80,
-	ZFS_IOC_EVENTS_NEXT,			/* 0x5a81 */
-	ZFS_IOC_EVENTS_CLEAR,			/* 0x5a82 */
-	ZFS_IOC_EVENTS_SEEK,			/* 0x5a83 */
-
-	/*
-	 * FreeBSD - 1/64 numbers reserved.
-	 */
-	ZFS_IOC_FREEBSD = ('Z' << 8) + 0xC0,
-
+	ZFS_IOC_PLATFORM = ZFS_IOC_FIRST + 0x80,
+	ZFS_IOC_EVENTS_NEXT,			/* 0x81 (Linux) */
+	ZFS_IOC_EVENTS_CLEAR,			/* 0x82 (Linux) */
+	ZFS_IOC_EVENTS_SEEK,			/* 0x83 (Linux) */
+	ZFS_IOC_NEXTBOOT,			/* 0x84 (FreeBSD) */
+	ZFS_IOC_JAIL,				/* 0x85 (FreeBSD) */
+	ZFS_IOC_UNJAIL,				/* 0x86 (FreeBSD) */
+	ZFS_IOC_SET_BOOTENV,			/* 0x87 */
+	ZFS_IOC_GET_BOOTENV,			/* 0x88 */
 	ZFS_IOC_LAST
 } zfs_ioc_t;
 
@@ -1303,6 +1390,8 @@
  * not described precisely by generic errno codes.
  *
  * These numbers should not change over time. New entries should be appended.
+ *
+ * (Keep in sync with contrib/pyzfs/libzfs_core/_constants.py)
  */
 typedef enum {
 	ZFS_ERR_CHECKPOINT_EXISTS = 1024,
@@ -1318,7 +1407,14 @@
 	ZFS_ERR_FROM_IVSET_GUID_MISSING,
 	ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
 	ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
+	ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE,
 	ZFS_ERR_EXPORT_IN_PROGRESS,
+	ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR,
+	ZFS_ERR_STREAM_TRUNCATED,
+	ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH,
+	ZFS_ERR_RESILVER_IN_PROGRESS,
+	ZFS_ERR_REBUILD_IN_PROGRESS,
+	ZFS_ERR_BADPROP,
 } zfs_errno_t;
 
 /*
@@ -1334,6 +1430,23 @@
 	SPA_LOAD_CREATE		/* creation in progress */
 } spa_load_state_t;
 
+typedef enum {
+	ZPOOL_WAIT_CKPT_DISCARD,
+	ZPOOL_WAIT_FREE,
+	ZPOOL_WAIT_INITIALIZE,
+	ZPOOL_WAIT_REPLACE,
+	ZPOOL_WAIT_REMOVE,
+	ZPOOL_WAIT_RESILVER,
+	ZPOOL_WAIT_SCRUB,
+	ZPOOL_WAIT_TRIM,
+	ZPOOL_WAIT_NUM_ACTIVITIES
+} zpool_wait_activity_t;
+
+typedef enum {
+	ZFS_WAIT_DELETEQ,
+	ZFS_WAIT_NUM_ACTIVITIES
+} zfs_wait_activity_t;
+
 /*
  * Bookmark name values.
  */
@@ -1360,9 +1473,11 @@
 #define	ZPOOL_HIST_IOCTL	"ioctl"
 #define	ZPOOL_HIST_INPUT_NVL	"in_nvl"
 #define	ZPOOL_HIST_OUTPUT_NVL	"out_nvl"
+#define	ZPOOL_HIST_OUTPUT_SIZE	"out_size"
 #define	ZPOOL_HIST_DSNAME	"dsname"
 #define	ZPOOL_HIST_DSID		"dsid"
 #define	ZPOOL_HIST_ERRNO	"errno"
+#define	ZPOOL_HIST_ELAPSED_NS	"elapsed_ns"
 
 /*
  * Special nvlist name that will not have its args recorded in the pool's
@@ -1385,12 +1500,26 @@
 #define	ZPOOL_TRIM_SECURE		"trim_secure"
 
 /*
+ * The following are names used when invoking ZFS_IOC_POOL_WAIT.
+ */
+#define	ZPOOL_WAIT_ACTIVITY		"wait_activity"
+#define	ZPOOL_WAIT_TAG			"wait_tag"
+#define	ZPOOL_WAIT_WAITED		"wait_waited"
+
+/*
+ * The following are names used when invoking ZFS_IOC_WAIT_FS.
+ */
+#define	ZFS_WAIT_ACTIVITY		"wait_activity"
+#define	ZFS_WAIT_WAITED			"wait_waited"
+
+/*
  * Flags for ZFS_IOC_VDEV_SET_STATE
  */
 #define	ZFS_ONLINE_CHECKREMOVE	0x1
 #define	ZFS_ONLINE_UNSPARE	0x2
 #define	ZFS_ONLINE_FORCEFAULT	0x4
 #define	ZFS_ONLINE_EXPAND	0x8
+#define	ZFS_ONLINE_SPARE	0x10
 #define	ZFS_OFFLINE_TEMPORARY	0x1
 
 /*
@@ -1430,7 +1559,12 @@
  * given payloads:
  *
  *	ESC_ZFS_RESILVER_START
- *	ESC_ZFS_RESILVER_END
+ *	ESC_ZFS_RESILVER_FINISH
+ *
+ *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
+ *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
+ *		ZFS_EV_RESILVER_TYPE	DATA_TYPE_STRING
+ *
  *	ESC_ZFS_POOL_DESTROY
  *	ESC_ZFS_POOL_REGUID
  *
@@ -1484,6 +1618,48 @@
 #define	ZFS_EV_HIST_IOCTL	"history_ioctl"
 #define	ZFS_EV_HIST_DSNAME	"history_dsname"
 #define	ZFS_EV_HIST_DSID	"history_dsid"
+#define	ZFS_EV_RESILVER_TYPE	"resilver_type"
+
+
+/*
+ * We currently support block sizes from 512 bytes to 16MB.
+ * The benefits of larger blocks, and thus larger IO, need to be weighed
+ * against the cost of COWing a giant block to modify one byte, and the
+ * large latency of reading or writing a large block.
+ *
+ * The recordsize property can not be set larger than zfs_max_recordsize
+ * (default 16MB on 64-bit and 1MB on 32-bit). See the comment near
+ * zfs_max_recordsize in dsl_dataset.c for details.
+ *
+ * Note that although the LSIZE field of the blkptr_t can store sizes up
+ * to 32MB, the dnode's dn_datablkszsec can only store sizes up to
+ * 32MB - 512 bytes.  Therefore, we limit SPA_MAXBLOCKSIZE to 16MB.
+ */
+#define	SPA_MINBLOCKSHIFT	9
+#define	SPA_OLD_MAXBLOCKSHIFT	17
+#define	SPA_MAXBLOCKSHIFT	24
+#define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT)
+#define	SPA_OLD_MAXBLOCKSIZE	(1ULL << SPA_OLD_MAXBLOCKSHIFT)
+#define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT)
+
+
+/* supported encryption algorithms */
+enum zio_encrypt {
+	ZIO_CRYPT_INHERIT = 0,
+	ZIO_CRYPT_ON,
+	ZIO_CRYPT_OFF,
+	ZIO_CRYPT_AES_128_CCM,
+	ZIO_CRYPT_AES_192_CCM,
+	ZIO_CRYPT_AES_256_CCM,
+	ZIO_CRYPT_AES_128_GCM,
+	ZIO_CRYPT_AES_192_GCM,
+	ZIO_CRYPT_AES_256_GCM,
+	ZIO_CRYPT_FUNCTIONS
+};
+
+#define	ZIO_CRYPT_ON_VALUE	ZIO_CRYPT_AES_256_GCM
+#define	ZIO_CRYPT_DEFAULT	ZIO_CRYPT_OFF
+
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/lua/Makefile.am b/zfs/include/sys/lua/Makefile.am
index 5f224dc..8b4dafa 100644
--- a/zfs/include/sys/lua/Makefile.am
+++ b/zfs/include/sys/lua/Makefile.am

@@ -1,21 +1,17 @@
 COMMON_H = \
-	$(top_srcdir)/include/sys/lua/lua.h \
-	$(top_srcdir)/include/sys/lua/luaconf.h \
-	$(top_srcdir)/include/sys/lua/lualib.h \
-	$(top_srcdir)/include/sys/lua/lauxlib.h
-
-KERNEL_H =
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	lua.h \
+	luaconf.h \
+	lualib.h \
+	lauxlib.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys/lua
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/lua
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+kernel_HEADERS = $(COMMON_H)
+endif
 endif

diff --git a/zfs/include/sys/lua/luaconf.h b/zfs/include/sys/lua/luaconf.h
index fa78613..83202d7 100644
--- a/zfs/include/sys/lua/luaconf.h
+++ b/zfs/include/sys/lua/luaconf.h

@@ -15,6 +15,7 @@
 extern ssize_t lcompat_sprintf(char *, size_t size, const char *, ...);
 extern int64_t lcompat_strtoll(const char *, char **);
 extern int64_t lcompat_pow(int64_t, int64_t);
+extern int lcompat_hashnum(int64_t);
 
 /*
 ** ==================================================================
@@ -367,11 +368,7 @@
 @@ LUAL_BUFFERSIZE is the buffer size used by the lauxlib buffer system.
 ** CHANGE it if it uses too much C-stack space.
 */
-#ifdef __linux__
 #define LUAL_BUFFERSIZE		512
-#else
-#define LUAL_BUFFERSIZE		1024
-#endif
 
 
 /*

diff --git a/zfs/include/sys/metaslab.h b/zfs/include/sys/metaslab.h
index 3309025..2b4f724 100644
--- a/zfs/include/sys/metaslab.h
+++ b/zfs/include/sys/metaslab.h

@@ -49,16 +49,26 @@
     metaslab_t **);
 void metaslab_fini(metaslab_t *);
 
+void metaslab_set_unflushed_dirty(metaslab_t *, boolean_t);
+void metaslab_set_unflushed_txg(metaslab_t *, uint64_t, dmu_tx_t *);
+void metaslab_set_estimated_condensed_size(metaslab_t *, uint64_t, dmu_tx_t *);
+boolean_t metaslab_unflushed_dirty(metaslab_t *);
+uint64_t metaslab_unflushed_txg(metaslab_t *);
+uint64_t metaslab_estimated_condensed_size(metaslab_t *);
+int metaslab_sort_by_flushed(const void *, const void *);
+void metaslab_unflushed_bump(metaslab_t *, dmu_tx_t *, boolean_t);
+uint64_t metaslab_unflushed_changes_memused(metaslab_t *);
+
 int metaslab_load(metaslab_t *);
-void metaslab_potentially_unload(metaslab_t *, uint64_t);
 void metaslab_unload(metaslab_t *);
+boolean_t metaslab_flush(metaslab_t *, dmu_tx_t *);
 
 uint64_t metaslab_allocated_space(metaslab_t *);
 
 void metaslab_sync(metaslab_t *, uint64_t);
 void metaslab_sync_done(metaslab_t *, uint64_t);
 void metaslab_sync_reassess(metaslab_group_t *);
-uint64_t metaslab_block_maxsize(metaslab_t *);
+uint64_t metaslab_largest_allocatable(metaslab_t *);
 
 /*
  * metaslab alloc flags
@@ -71,6 +81,7 @@
 #define	METASLAB_DONT_THROTTLE		0x10
 #define	METASLAB_MUST_RESERVE		0x20
 #define	METASLAB_FASTWRITE		0x40
+#define	METASLAB_ZIL			0x80
 
 int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
     blkptr_t *, int, uint64_t, blkptr_t *, int, zio_alloc_list_t *, zio_t *,
@@ -88,8 +99,8 @@
 void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
 void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
 
-void metaslab_alloc_trace_init(void);
-void metaslab_alloc_trace_fini(void);
+void metaslab_stat_init(void);
+void metaslab_stat_fini(void);
 void metaslab_trace_init(zio_alloc_list_t *);
 void metaslab_trace_fini(zio_alloc_list_t *);
 
@@ -102,12 +113,15 @@
 boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int,
     zio_t *, int);
 void metaslab_class_throttle_unreserve(metaslab_class_t *, int, int, zio_t *);
-
+void metaslab_class_evict_old(metaslab_class_t *, uint64_t);
 uint64_t metaslab_class_get_alloc(metaslab_class_t *);
 uint64_t metaslab_class_get_space(metaslab_class_t *);
 uint64_t metaslab_class_get_dspace(metaslab_class_t *);
 uint64_t metaslab_class_get_deferred(metaslab_class_t *);
 
+void metaslab_space_update(vdev_t *, metaslab_class_t *,
+    int64_t, int64_t, int64_t);
+
 metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *, int);
 void metaslab_group_destroy(metaslab_group_t *);
 void metaslab_group_activate(metaslab_group_t *);
@@ -122,7 +136,13 @@
 void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *, int);
 void metaslab_recalculate_weight_and_sort(metaslab_t *);
 void metaslab_disable(metaslab_t *);
-void metaslab_enable(metaslab_t *, boolean_t);
+void metaslab_enable(metaslab_t *, boolean_t, boolean_t);
+void metaslab_set_selected_txg(metaslab_t *, uint64_t);
+
+extern int metaslab_debug_load;
+
+range_seg_type_t metaslab_calculate_range_tree_type(vdev_t *vdev,
+    metaslab_t *msp, uint64_t *start, uint64_t *shift);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/metaslab_impl.h b/zfs/include/sys/metaslab_impl.h
index ca1104c..904249c 100644
--- a/zfs/include/sys/metaslab_impl.h
+++ b/zfs/include/sys/metaslab_impl.h

@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_METASLAB_IMPL_H
@@ -36,6 +36,7 @@
 #include <sys/vdev.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
+#include <sys/multilist.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -136,6 +137,29 @@
 #define	WEIGHT_SET_COUNT(weight, x)		BF64_SET((weight), 0, 54, x)
 
 /*
+ * Per-allocator data structure.
+ */
+typedef struct metaslab_class_allocator {
+	metaslab_group_t	*mca_rotor;
+	uint64_t		mca_aliquot;
+
+	/*
+	 * The allocation throttle works on a reservation system. Whenever
+	 * an asynchronous zio wants to perform an allocation it must
+	 * first reserve the number of blocks that it wants to allocate.
+	 * If there aren't sufficient slots available for the pending zio
+	 * then that I/O is throttled until more slots free up. The current
+	 * number of reserved allocations is maintained by the mca_alloc_slots
+	 * refcount. The mca_alloc_max_slots value determines the maximum
+	 * number of allocations that the system allows. Gang blocks are
+	 * allowed to reserve slots even if we've reached the maximum
+	 * number of allocations allowed.
+	 */
+	uint64_t		mca_alloc_max_slots;
+	zfs_refcount_t		mca_alloc_slots;
+} ____cacheline_aligned metaslab_class_allocator_t;
+
+/*
  * A metaslab class encompasses a category of allocatable top-level vdevs.
  * Each top-level vdev is associated with a metaslab group which defines
  * the allocatable region for that vdev. Examples of these categories include
@@ -144,7 +168,7 @@
  * When a block allocation is requested from the SPA it is associated with a
  * metaslab_class_t, and only top-level vdevs (i.e. metaslab groups) belonging
  * to the class can be used to satisfy that request. Allocations are done
- * by traversing the metaslab groups that are linked off of the mc_rotor field.
+ * by traversing the metaslab groups that are linked off of the mca_rotor field.
  * This rotor points to the next metaslab group where allocations will be
  * attempted. Allocating a block is a 3 step process -- select the metaslab
  * group, select the metaslab, and then allocate the block. The metaslab
@@ -155,9 +179,7 @@
 struct metaslab_class {
 	kmutex_t		mc_lock;
 	spa_t			*mc_spa;
-	metaslab_group_t	*mc_rotor;
 	metaslab_ops_t		*mc_ops;
-	uint64_t		mc_aliquot;
 
 	/*
 	 * Track the number of metaslab groups that have been initialized
@@ -172,21 +194,6 @@
 	 */
 	boolean_t		mc_alloc_throttle_enabled;
 
-	/*
-	 * The allocation throttle works on a reservation system. Whenever
-	 * an asynchronous zio wants to perform an allocation it must
-	 * first reserve the number of blocks that it wants to allocate.
-	 * If there aren't sufficient slots available for the pending zio
-	 * then that I/O is throttled until more slots free up. The current
-	 * number of reserved allocations is maintained by the mc_alloc_slots
-	 * refcount. The mc_alloc_max_slots value determines the maximum
-	 * number of allocations that the system allows. Gang blocks are
-	 * allowed to reserve slots even if we've reached the maximum
-	 * number of allocations allowed.
-	 */
-	uint64_t		*mc_alloc_max_slots;
-	zfs_refcount_t		*mc_alloc_slots;
-
 	uint64_t		mc_alloc_groups; /* # of allocatable groups */
 
 	uint64_t		mc_alloc;	/* total allocated space */
@@ -194,9 +201,27 @@
 	uint64_t		mc_space;	/* total space (alloc + free) */
 	uint64_t		mc_dspace;	/* total deflated space */
 	uint64_t		mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
+
+	/*
+	 * List of all loaded metaslabs in the class, sorted in order of most
+	 * recent use.
+	 */
+	multilist_t		mc_metaslab_txg_list;
+
+	metaslab_class_allocator_t	mc_allocator[];
 };
 
 /*
+ * Per-allocator data structure.
+ */
+typedef struct metaslab_group_allocator {
+	uint64_t	mga_cur_max_alloc_queue_depth;
+	zfs_refcount_t	mga_alloc_queue_depth;
+	metaslab_t	*mga_primary;
+	metaslab_t	*mga_secondary;
+} metaslab_group_allocator_t;
+
+/*
  * Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs)
  * of a top-level vdev. They are linked together to form a circular linked
  * list and can belong to only one metaslab class. Metaslab groups may become
@@ -207,8 +232,6 @@
  */
 struct metaslab_group {
 	kmutex_t		mg_lock;
-	metaslab_t		**mg_primaries;
-	metaslab_t		**mg_secondaries;
 	avl_tree_t		mg_metaslab_tree;
 	uint64_t		mg_aliquot;
 	boolean_t		mg_allocatable;		/* can we allocate? */
@@ -243,7 +266,7 @@
 	 *
 	 * Each allocator in each metaslab group has a current queue depth
 	 * (mg_alloc_queue_depth[allocator]) and a current max queue depth
-	 * (mg_cur_max_alloc_queue_depth[allocator]), and each metaslab group
+	 * (mga_cur_max_alloc_queue_depth[allocator]), and each metaslab group
 	 * has an absolute max queue depth (mg_max_alloc_queue_depth).  We
 	 * add IOs to an allocator until the mg_alloc_queue_depth for that
 	 * allocator hits the cur_max. Every time an IO completes for a given
@@ -256,9 +279,7 @@
 	 * groups are unable to handle their share of allocations.
 	 */
 	uint64_t		mg_max_alloc_queue_depth;
-	uint64_t		*mg_cur_max_alloc_queue_depth;
-	zfs_refcount_t		*mg_alloc_queue_depth;
-	int			mg_allocators;
+
 	/*
 	 * A metalab group that can no longer allocate the minimum block
 	 * size will set mg_no_free_space. Once a metaslab group is out
@@ -276,6 +297,9 @@
 	boolean_t		mg_disabled_updating;
 	kmutex_t		mg_ms_disabled_lock;
 	kcondvar_t		mg_ms_disabled_cv;
+
+	int			mg_allocators;
+	metaslab_group_allocator_t	mg_allocator[];
 };
 
 /*
@@ -357,7 +381,7 @@
 	 * write to metaslab data on-disk (i.e flushing entries to
 	 * the metaslab's space map). It helps coordinate readers of
 	 * the metaslab's space map [see spa_vdev_remove_thread()]
-	 * with writers [see metaslab_sync()].
+	 * with writers [see metaslab_sync() or metaslab_flush()].
 	 *
 	 * Note that metaslab_load(), even though a reader, uses
 	 * a completely different mechanism to deal with the reading
@@ -378,6 +402,7 @@
 	range_tree_t	*ms_allocating[TXG_SIZE];
 	range_tree_t	*ms_allocatable;
 	uint64_t	ms_allocated_this_txg;
+	uint64_t	ms_allocating_total;
 
 	/*
 	 * The following range trees are accessed only from syncing context.
@@ -401,7 +426,6 @@
 
 	boolean_t	ms_condensing;	/* condensing? */
 	boolean_t	ms_condense_wanted;
-	uint64_t	ms_condense_checked_txg;
 
 	/*
 	 * The number of consumers which have disabled the metaslab.
@@ -414,6 +438,8 @@
 	 */
 	boolean_t	ms_loaded;
 	boolean_t	ms_loading;
+	kcondvar_t	ms_flush_cv;
+	boolean_t	ms_flushing;
 
 	/*
 	 * The following histograms count entries that are in the
@@ -474,6 +500,13 @@
 	 * stay cached.
 	 */
 	uint64_t	ms_selected_txg;
+	/*
+	 * ms_load/unload_time can be used for performance monitoring
+	 * (e.g. by dtrace or mdb).
+	 */
+	hrtime_t	ms_load_time;	/* time last loaded */
+	hrtime_t	ms_unload_time;	/* time last unloaded */
+	hrtime_t	ms_selected_time; /* time last allocated from */
 
 	uint64_t	ms_alloc_txg;	/* last successful alloc (debug only) */
 	uint64_t	ms_max_size;	/* maximum allocatable size	*/
@@ -493,12 +526,34 @@
 	 * only difference is that the ms_allocatable_by_size is ordered by
 	 * segment sizes.
 	 */
-	avl_tree_t	ms_allocatable_by_size;
+	zfs_btree_t		ms_allocatable_by_size;
+	zfs_btree_t		ms_unflushed_frees_by_size;
 	uint64_t	ms_lbas[MAX_LBAS];
 
 	metaslab_group_t *ms_group;	/* metaslab group		*/
 	avl_node_t	ms_group_node;	/* node in metaslab group tree	*/
 	txg_node_t	ms_txg_node;	/* per-txg dirty metaslab links	*/
+	avl_node_t	ms_spa_txg_node; /* node in spa_metaslabs_by_txg */
+	/*
+	 * Node in metaslab class's selected txg list
+	 */
+	multilist_node_t	ms_class_txg_node;
+
+	/*
+	 * Allocs and frees that are committed to the vdev log spacemap but
+	 * not yet to this metaslab's spacemap.
+	 */
+	range_tree_t	*ms_unflushed_allocs;
+	range_tree_t	*ms_unflushed_frees;
+
+	/*
+	 * We have flushed entries up to but not including this TXG. In
+	 * other words, all changes from this TXG and onward should not
+	 * be in this metaslab's space map and must be read from the
+	 * log space maps.
+	 */
+	uint64_t	ms_unflushed_txg;
+	boolean_t	ms_unflushed_dirty;
 
 	/* updated every time we are done syncing the metaslab's space map */
 	uint64_t	ms_synced_length;
@@ -506,6 +561,11 @@
 	boolean_t	ms_new;
 };
 
+typedef struct metaslab_unflushed_phys {
+	/* on-disk counterpart of ms_unflushed_txg */
+	uint64_t	msp_unflushed_txg;
+} metaslab_unflushed_phys_t;
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/mmp.h b/zfs/include/sys/mmp.h
index 527e332..ce9c449 100644
--- a/zfs/include/sys/mmp.h
+++ b/zfs/include/sys/mmp.h

@@ -63,6 +63,7 @@
 extern void mmp_signal_all_threads(void);
 
 /* Global tuning */
+extern int param_set_multihost_interval(ZFS_MODULE_PARAM_ARGS);
 extern ulong_t zfs_multihost_interval;
 extern uint_t zfs_multihost_fail_intervals;
 extern uint_t zfs_multihost_import_intervals;

diff --git a/zfs/include/sys/mntent.h b/zfs/include/sys/mntent.h
index fac751b..8d578f6 100644
--- a/zfs/include/sys/mntent.h
+++ b/zfs/include/sys/mntent.h

@@ -29,6 +29,8 @@
 #ifndef _SYS_MNTENT_H
 #define	_SYS_MNTENT_H
 
+#define	MNTMAXSTR	128
+
 #define	MNTTYPE_ZFS	"zfs"		/* ZFS file system */
 
 #define	MOUNT_SUCCESS	0x00		/* Success */
@@ -71,8 +73,15 @@
 #define	MNTOPT_STRICTATIME "strictatime" /* strict access time updates */
 #define	MNTOPT_NOSTRICTATIME "nostrictatime" /* No strict access time updates */
 #define	MNTOPT_LAZYTIME "lazytime"	/* Defer access time writing */
+#ifdef __linux__
 #define	MNTOPT_SETUID	"suid"		/* Both setuid and devices allowed */
 #define	MNTOPT_NOSETUID	"nosuid"	/* Neither setuid nor devices allowed */
+#elif defined(__FreeBSD__)
+#define	MNTOPT_SETUID	"setuid"	/* Set uid allowed */
+#define	MNTOPT_NOSETUID	"nosetuid"	/* Set uid not allowed */
+#else
+#error "unknown OS"
+#endif
 #define	MNTOPT_OWNER	"owner"		/* allow owner mount */
 #define	MNTOPT_NOOWNER	"noowner"	/* do not allow owner mount */
 #define	MNTOPT_REMOUNT	"remount"	/* change mount options */

diff --git a/zfs/include/sys/mod.h b/zfs/include/sys/mod.h
new file mode 100644
index 0000000..a5a73ed
--- /dev/null
+++ b/zfs/include/sys/mod.h

@@ -0,0 +1,40 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _SYS_MOD_H
+#define	_SYS_MOD_H
+
+#ifdef _KERNEL
+#include <sys/mod_os.h>
+#else
+/*
+ * Exported symbols
+ */
+#define	EXPORT_SYMBOL(x)
+
+#define	ZFS_MODULE_DESCRIPTION(s)
+#define	ZFS_MODULE_AUTHOR(s)
+#define	ZFS_MODULE_LICENSE(s)
+#define	ZFS_MODULE_VERSION(s)
+#endif
+
+#endif /* SYS_MOD_H */

diff --git a/zfs/include/sys/multilist.h b/zfs/include/sys/multilist.h
index 0c7b407..26f37c3 100644
--- a/zfs/include/sys/multilist.h
+++ b/zfs/include/sys/multilist.h

@@ -71,8 +71,9 @@
 	multilist_sublist_index_func_t	*ml_index_func;
 };
 
+void multilist_create(multilist_t *, size_t, size_t,
+    multilist_sublist_index_func_t *);
 void multilist_destroy(multilist_t *);
-multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *);
 
 void multilist_insert(multilist_t *, void *);
 void multilist_remove(multilist_t *, void *);

diff --git a/zfs/include/sys/nvpair.h b/zfs/include/sys/nvpair.h
index e856793..b0be8bd 100644
--- a/zfs/include/sys/nvpair.h
+++ b/zfs/include/sys/nvpair.h

@@ -62,7 +62,7 @@
 	DATA_TYPE_UINT8,
 	DATA_TYPE_BOOLEAN_ARRAY,
 	DATA_TYPE_INT8_ARRAY,
-#if !defined(_KERNEL)
+#if !defined(_KERNEL) && !defined(_STANDALONE)
 	DATA_TYPE_UINT8_ARRAY,
 	DATA_TYPE_DOUBLE
 #else
@@ -191,7 +191,7 @@
 int nvlist_add_string_array(nvlist_t *, const char *, char *const *, uint_t);
 int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
 int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t);
-#if !defined(_KERNEL)
+#if !defined(_KERNEL) && !defined(_STANDALONE)
 int nvlist_add_double(nvlist_t *, const char *, double);
 #endif
 
@@ -228,7 +228,7 @@
     nvlist_t ***, uint_t *);
 int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *);
 int nvlist_lookup_pairs(nvlist_t *, int, ...);
-#if !defined(_KERNEL)
+#if !defined(_KERNEL) && !defined(_STANDALONE)
 int nvlist_lookup_double(nvlist_t *, const char *, double *);
 #endif
 
@@ -269,7 +269,7 @@
 int nvpair_value_string_array(nvpair_t *, char ***, uint_t *);
 int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *);
 int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
-#if !defined(_KERNEL)
+#if !defined(_KERNEL) && !defined(_STANDALONE)
 int nvpair_value_double(nvpair_t *, double *);
 #endif
 

diff --git a/zfs/include/sys/objlist.h b/zfs/include/sys/objlist.h
new file mode 100644
index 0000000..a124a61
--- /dev/null
+++ b/zfs/include/sys/objlist.h

@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#ifndef	_OBJLIST_H
+#define	_OBJLIST_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include	<sys/zfs_context.h>
+
+typedef struct objlist_node {
+	list_node_t	on_node;
+	uint64_t	on_object;
+} objlist_node_t;
+
+typedef struct objlist {
+	list_t		ol_list; /* List of struct objnode. */
+	/*
+	 * Last object looked up. Used to assert that objects are being looked
+	 * up in ascending order.
+	 */
+	uint64_t	ol_last_lookup;
+} objlist_t;
+
+objlist_t *objlist_create(void);
+void objlist_destroy(objlist_t *);
+boolean_t objlist_exists(objlist_t *, uint64_t);
+void objlist_insert(objlist_t *, uint64_t);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _OBJLIST_H */

diff --git a/zfs/include/sys/pathname.h b/zfs/include/sys/pathname.h
index d79cc5c..52f2131 100644
--- a/zfs/include/sys/pathname.h
+++ b/zfs/include/sys/pathname.h

@@ -54,10 +54,6 @@
  */
 typedef struct pathname {
 	char	*pn_buf;		/* underlying storage */
-#if 0 /* unused in ZoL */
-	char	*pn_path;		/* remaining pathname */
-	size_t	pn_pathlen;		/* remaining length */
-#endif
 	size_t	pn_bufsize;		/* total size of pn_buf */
 } pathname_t;
 

diff --git a/zfs/include/sys/policy.h b/zfs/include/sys/policy.h
deleted file mode 100644
index 23d7d4d..0000000
--- a/zfs/include/sys/policy.h
+++ /dev/null

@@ -1,60 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015, Joyent, Inc. All rights reserved.
- * Copyright (c) 2016, Lawrence Livermore National Security, LLC.
- */
-
-#ifndef _SYS_POLICY_H
-#define	_SYS_POLICY_H
-
-#ifdef _KERNEL
-
-#include <sys/cred.h>
-#include <sys/types.h>
-#include <sys/xvattr.h>
-#include <sys/zpl.h>
-
-int secpolicy_nfs(const cred_t *);
-int secpolicy_sys_config(const cred_t *, boolean_t);
-int secpolicy_vnode_access2(const cred_t *, struct inode *,
-    uid_t, mode_t, mode_t);
-int secpolicy_vnode_any_access(const cred_t *, struct inode *, uid_t);
-int secpolicy_vnode_chown(const cred_t *, uid_t);
-int secpolicy_vnode_create_gid(const cred_t *);
-int secpolicy_vnode_remove(const cred_t *);
-int secpolicy_vnode_setdac(const cred_t *, uid_t);
-int secpolicy_vnode_setid_retain(const cred_t *, boolean_t);
-int secpolicy_vnode_setids_setgids(const cred_t *, gid_t);
-int secpolicy_zinject(const cred_t *);
-int secpolicy_zfs(const cred_t *);
-void secpolicy_setid_clear(vattr_t *, cred_t *);
-int secpolicy_setid_setsticky_clear(struct inode *, vattr_t *,
-    const vattr_t *, cred_t *);
-int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t);
-int secpolicy_vnode_setattr(cred_t *, struct inode *, struct vattr *,
-    const struct vattr *, int, int (void *, int, cred_t *), void *);
-int secpolicy_basic_link(const cred_t *);
-
-#endif /* _KERNEL */
-#endif /* _SYS_POLICY_H */

diff --git a/zfs/module/zfs/qat.h b/zfs/include/sys/qat.h
similarity index 100%
rename from zfs/module/zfs/qat.h
rename to zfs/include/sys/qat.h


diff --git a/zfs/include/sys/range_tree.h b/zfs/include/sys/range_tree.h
index ae1a0c3..daa39e2 100644
--- a/zfs/include/sys/range_tree.h
+++ b/zfs/include/sys/range_tree.h

@@ -24,13 +24,13 @@
  */
 
 /*
- * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_RANGE_TREE_H
 #define	_SYS_RANGE_TREE_H
 
-#include <sys/avl.h>
+#include <sys/btree.h>
 #include <sys/dmu.h>
 
 #ifdef	__cplusplus
@@ -41,20 +41,31 @@
 
 typedef struct range_tree_ops range_tree_ops_t;
 
+typedef enum range_seg_type {
+	RANGE_SEG32,
+	RANGE_SEG64,
+	RANGE_SEG_GAP,
+	RANGE_SEG_NUM_TYPES,
+} range_seg_type_t;
+
 /*
  * Note: the range_tree may not be accessed concurrently; consumers
  * must provide external locking if required.
  */
 typedef struct range_tree {
-	avl_tree_t	rt_root;	/* offset-ordered segment AVL tree */
+	zfs_btree_t	rt_root;	/* offset-ordered segment b-tree */
 	uint64_t	rt_space;	/* sum of all segments in the map */
-	uint64_t	rt_gap;		/* allowable inter-segment gap */
-	range_tree_ops_t *rt_ops;
-
-	/* rt_avl_compare should only be set if rt_arg is an AVL tree */
+	range_seg_type_t rt_type;	/* type of range_seg_t in use */
+	/*
+	 * All data that is stored in the range tree must have a start higher
+	 * than or equal to rt_start, and all sizes and offsets must be
+	 * multiples of 1 << rt_shift.
+	 */
+	uint8_t		rt_shift;
+	uint64_t	rt_start;
+	const range_tree_ops_t *rt_ops;
 	void		*rt_arg;
-	int (*rt_avl_compare)(const void *, const void *);
-
+	uint64_t	rt_gap;		/* allowable inter-segment gap */
 
 	/*
 	 * The rt_histogram maintains a histogram of ranges. Each bucket,
@@ -64,37 +75,221 @@
 	uint64_t	rt_histogram[RANGE_TREE_HISTOGRAM_SIZE];
 } range_tree_t;
 
-typedef struct range_seg {
-	avl_node_t	rs_node;	/* AVL node */
-	avl_node_t	rs_pp_node;	/* AVL picker-private node */
+typedef struct range_seg32 {
+	uint32_t	rs_start;	/* starting offset of this segment */
+	uint32_t	rs_end;		/* ending offset (non-inclusive) */
+} range_seg32_t;
+
+/*
+ * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may
+ * require 64-bit integers for ranges.
+ */
+typedef struct range_seg64 {
+	uint64_t	rs_start;	/* starting offset of this segment */
+	uint64_t	rs_end;		/* ending offset (non-inclusive) */
+} range_seg64_t;
+
+typedef struct range_seg_gap {
 	uint64_t	rs_start;	/* starting offset of this segment */
 	uint64_t	rs_end;		/* ending offset (non-inclusive) */
 	uint64_t	rs_fill;	/* actual fill if gap mode is on */
-} range_seg_t;
+} range_seg_gap_t;
+
+/*
+ * This type needs to be the largest of the range segs, since it will be stack
+ * allocated and then cast the actual type to do tree operations.
+ */
+typedef range_seg_gap_t range_seg_max_t;
+
+/*
+ * This is just for clarity of code purposes, so we can make it clear that a
+ * pointer is to a range seg of some type; when we need to do the actual math,
+ * we'll figure out the real type.
+ */
+typedef void range_seg_t;
 
 struct range_tree_ops {
 	void    (*rtop_create)(range_tree_t *rt, void *arg);
 	void    (*rtop_destroy)(range_tree_t *rt, void *arg);
-	void	(*rtop_add)(range_tree_t *rt, range_seg_t *rs, void *arg);
-	void    (*rtop_remove)(range_tree_t *rt, range_seg_t *rs, void *arg);
+	void	(*rtop_add)(range_tree_t *rt, void *rs, void *arg);
+	void    (*rtop_remove)(range_tree_t *rt, void *rs, void *arg);
 	void	(*rtop_vacate)(range_tree_t *rt, void *arg);
 };
 
+static inline uint64_t
+rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt)
+{
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		return (((const range_seg32_t *)rs)->rs_start);
+	case RANGE_SEG64:
+		return (((const range_seg64_t *)rs)->rs_start);
+	case RANGE_SEG_GAP:
+		return (((const range_seg_gap_t *)rs)->rs_start);
+	default:
+		VERIFY(0);
+		return (0);
+	}
+}
+
+static inline uint64_t
+rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt)
+{
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		return (((const range_seg32_t *)rs)->rs_end);
+	case RANGE_SEG64:
+		return (((const range_seg64_t *)rs)->rs_end);
+	case RANGE_SEG_GAP:
+		return (((const range_seg_gap_t *)rs)->rs_end);
+	default:
+		VERIFY(0);
+		return (0);
+	}
+}
+
+static inline uint64_t
+rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt)
+{
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	switch (rt->rt_type) {
+	case RANGE_SEG32: {
+		const range_seg32_t *r32 = (const range_seg32_t *)rs;
+		return (r32->rs_end - r32->rs_start);
+	}
+	case RANGE_SEG64: {
+		const range_seg64_t *r64 = (const range_seg64_t *)rs;
+		return (r64->rs_end - r64->rs_start);
+	}
+	case RANGE_SEG_GAP:
+		return (((const range_seg_gap_t *)rs)->rs_fill);
+	default:
+		VERIFY(0);
+		return (0);
+	}
+
+}
+
+static inline uint64_t
+rs_get_start(const range_seg_t *rs, const range_tree_t *rt)
+{
+	return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
+}
+
+static inline uint64_t
+rs_get_end(const range_seg_t *rs, const range_tree_t *rt)
+{
+	return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
+}
+
+static inline uint64_t
+rs_get_fill(const range_seg_t *rs, const range_tree_t *rt)
+{
+	return (rs_get_fill_raw(rs, rt) << rt->rt_shift);
+}
+
+static inline void
+rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start)
+{
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		ASSERT3U(start, <=, UINT32_MAX);
+		((range_seg32_t *)rs)->rs_start = (uint32_t)start;
+		break;
+	case RANGE_SEG64:
+		((range_seg64_t *)rs)->rs_start = start;
+		break;
+	case RANGE_SEG_GAP:
+		((range_seg_gap_t *)rs)->rs_start = start;
+		break;
+	default:
+		VERIFY(0);
+	}
+}
+
+static inline void
+rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end)
+{
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		ASSERT3U(end, <=, UINT32_MAX);
+		((range_seg32_t *)rs)->rs_end = (uint32_t)end;
+		break;
+	case RANGE_SEG64:
+		((range_seg64_t *)rs)->rs_end = end;
+		break;
+	case RANGE_SEG_GAP:
+		((range_seg_gap_t *)rs)->rs_end = end;
+		break;
+	default:
+		VERIFY(0);
+	}
+}
+
+static inline void
+rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
+{
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		/* fall through */
+	case RANGE_SEG64:
+		ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs,
+		    rt));
+		break;
+	case RANGE_SEG_GAP:
+		((range_seg_gap_t *)rs)->rs_fill = fill;
+		break;
+	default:
+		VERIFY(0);
+	}
+}
+
+static inline void
+rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start)
+{
+	ASSERT3U(start, >=, rt->rt_start);
+	ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift));
+	rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift);
+}
+
+static inline void
+rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end)
+{
+	ASSERT3U(end, >=, rt->rt_start);
+	ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift));
+	rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift);
+}
+
+static inline void
+rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
+{
+	ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift));
+	rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
+}
+
 typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
 
-void range_tree_init(void);
-void range_tree_fini(void);
-range_tree_t *range_tree_create_impl(range_tree_ops_t *ops, void *arg,
-    int (*avl_compare) (const void *, const void *), uint64_t gap);
-range_tree_t *range_tree_create(range_tree_ops_t *ops, void *arg);
+range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops,
+    range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
+    uint64_t gap);
+range_tree_t *range_tree_create(const range_tree_ops_t *ops,
+    range_seg_type_t type, void *arg, uint64_t start, uint64_t shift);
 void range_tree_destroy(range_tree_t *rt);
 boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
+range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
+boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
+    uint64_t *ostart, uint64_t *osize);
 void range_tree_verify_not_present(range_tree_t *rt,
     uint64_t start, uint64_t size);
-range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
 void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
     uint64_t newstart, uint64_t newsize);
 uint64_t range_tree_space(range_tree_t *rt);
+uint64_t range_tree_numsegs(range_tree_t *rt);
 boolean_t range_tree_is_empty(range_tree_t *rt);
 void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst);
 void range_tree_stat_verify(range_tree_t *rt);
@@ -112,12 +307,10 @@
 void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
 range_seg_t *range_tree_first(range_tree_t *rt);
 
-void rt_avl_create(range_tree_t *rt, void *arg);
-void rt_avl_destroy(range_tree_t *rt, void *arg);
-void rt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg);
-void rt_avl_remove(range_tree_t *rt, range_seg_t *rs, void *arg);
-void rt_avl_vacate(range_tree_t *rt, void *arg);
-extern struct range_tree_ops rt_avl_ops;
+void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
+    range_tree_t *removefrom, range_tree_t *addto);
+void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
+    range_tree_t *addto);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/refcount.h b/zfs/include/sys/refcount.h
deleted file mode 100644
index c8f5862..0000000
--- a/zfs/include/sys/refcount.h
+++ /dev/null

@@ -1,126 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
- */
-
-#ifndef	_SYS_REFCOUNT_H
-#define	_SYS_REFCOUNT_H
-
-#include <sys/inttypes.h>
-#include <sys/list.h>
-#include <sys/zfs_context.h>
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * If the reference is held only by the calling function and not any
- * particular object, use FTAG (which is a string) for the holder_tag.
- * Otherwise, use the object that holds the reference.
- */
-#define	FTAG ((char *)(uintptr_t)__func__)
-
-#ifdef	ZFS_DEBUG
-typedef struct reference {
-	list_node_t ref_link;
-	const void *ref_holder;
-	uint64_t ref_number;
-	uint8_t *ref_removed;
-} reference_t;
-
-typedef struct refcount {
-	kmutex_t rc_mtx;
-	boolean_t rc_tracked;
-	list_t rc_list;
-	list_t rc_removed;
-	uint64_t rc_count;
-	uint64_t rc_removed_count;
-} zfs_refcount_t;
-
-/*
- * Note: zfs_refcount_t must be initialized with
- * refcount_create[_untracked]()
- */
-
-void zfs_refcount_create(zfs_refcount_t *);
-void zfs_refcount_create_untracked(zfs_refcount_t *);
-void zfs_refcount_create_tracked(zfs_refcount_t *);
-void zfs_refcount_destroy(zfs_refcount_t *);
-void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t);
-int zfs_refcount_is_zero(zfs_refcount_t *);
-int64_t zfs_refcount_count(zfs_refcount_t *);
-int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
-int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
-int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
-int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
-void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
-void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *,
-    const void *);
-void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t,
-    const void *, const void *);
-boolean_t zfs_refcount_held(zfs_refcount_t *, const void *);
-boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *);
-
-void zfs_refcount_init(void);
-void zfs_refcount_fini(void);
-
-#else	/* ZFS_DEBUG */
-
-typedef struct refcount {
-	uint64_t rc_count;
-} zfs_refcount_t;
-
-#define	zfs_refcount_create(rc) ((rc)->rc_count = 0)
-#define	zfs_refcount_create_untracked(rc) ((rc)->rc_count = 0)
-#define	zfs_refcount_create_tracked(rc) ((rc)->rc_count = 0)
-#define	zfs_refcount_destroy(rc) ((rc)->rc_count = 0)
-#define	zfs_refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
-#define	zfs_refcount_is_zero(rc) ((rc)->rc_count == 0)
-#define	zfs_refcount_count(rc) ((rc)->rc_count)
-#define	zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
-#define	zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
-#define	zfs_refcount_add_many(rc, number, holder) \
-	atomic_add_64_nv(&(rc)->rc_count, number)
-#define	zfs_refcount_remove_many(rc, number, holder) \
-	atomic_add_64_nv(&(rc)->rc_count, -number)
-#define	zfs_refcount_transfer(dst, src) { \
-	uint64_t __tmp = (src)->rc_count; \
-	atomic_add_64(&(src)->rc_count, -__tmp); \
-	atomic_add_64(&(dst)->rc_count, __tmp); \
-}
-#define	zfs_refcount_transfer_ownership(rc, ch, nh)		((void)0)
-#define	zfs_refcount_transfer_ownership_many(rc, nr, ch, nh)	((void)0)
-#define	zfs_refcount_held(rc, holder)			((rc)->rc_count > 0)
-#define	zfs_refcount_not_held(rc, holder)		(B_TRUE)
-
-#define	zfs_refcount_init()
-#define	zfs_refcount_fini()
-
-#endif	/* ZFS_DEBUG */
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_REFCOUNT_H */

diff --git a/zfs/include/sys/rrwlock.h b/zfs/include/sys/rrwlock.h
index e1c1756..8d296ef 100644
--- a/zfs/include/sys/rrwlock.h
+++ b/zfs/include/sys/rrwlock.h

@@ -37,7 +37,9 @@
 
 #include <sys/inttypes.h>
 #include <sys/zfs_context.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
+
+extern uint_t rrw_tsd_key;
 
 /*
  * A reader-writer lock implementation that allows re-entrant reads, but

diff --git a/zfs/include/sys/sa.h b/zfs/include/sys/sa.h
index 432e0bc..98eb8f9 100644
--- a/zfs/include/sys/sa.h
+++ b/zfs/include/sys/sa.h

@@ -158,7 +158,7 @@
 void sa_handle_unlock(sa_handle_t *);
 
 #ifdef _KERNEL
-int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *);
+int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, zfs_uio_t *);
 int sa_add_projid(sa_handle_t *, dmu_tx_t *, uint64_t);
 #endif
 

diff --git a/zfs/include/sys/sa_impl.h b/zfs/include/sys/sa_impl.h
index 7eddd87..fa10aff 100644
--- a/zfs/include/sys/sa_impl.h
+++ b/zfs/include/sys/sa_impl.h

@@ -28,7 +28,7 @@
 #define	_SYS_SA_IMPL_H
 
 #include <sys/dmu.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/list.h>
 
 /*

diff --git a/zfs/include/sys/sdt.h b/zfs/include/sys/sdt.h
deleted file mode 100644
index 9704072..0000000
--- a/zfs/include/sys/sdt.h
+++ /dev/null

@@ -1,58 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SDT_H
-#define	_SYS_SDT_H
-
-#ifndef _KERNEL
-
-#define	ZFS_PROBE(a)			((void) 0)
-#define	ZFS_PROBE1(a, c)		((void) 0)
-#define	ZFS_PROBE2(a, c, e)		((void) 0)
-#define	ZFS_PROBE3(a, c, e, g)		((void) 0)
-#define	ZFS_PROBE4(a, c, e, g, i)	((void) 0)
-
-#endif /* _KERNEL */
-
-/*
- * The set-error SDT probe is extra static, in that we declare its fake
- * function literally, rather than with the DTRACE_PROBE1() macro.  This is
- * necessary so that SET_ERROR() can evaluate to a value, which wouldn't
- * be possible if it required multiple statements (to declare the function
- * and then call it).
- *
- * SET_ERROR() uses the comma operator so that it can be used without much
- * additional code.  For example, "return (EINVAL);" becomes
- * "return (SET_ERROR(EINVAL));".  Note that the argument will be evaluated
- * twice, so it should not have side effects (e.g. something like:
- * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
- */
-extern void __set_error(const char *file, const char *func, int line, int err);
-#undef SET_ERROR
-#define	SET_ERROR(err) \
-	(__set_error(__FILE__, __func__, __LINE__, err), err)
-
-#endif /* _SYS_SDT_H */

diff --git a/zfs/include/sys/sha2.h b/zfs/include/sys/sha2.h
deleted file mode 100644
index 9039835..0000000
--- a/zfs/include/sys/sha2.h
+++ /dev/null

@@ -1,155 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define	_SYS_SHA2_H
-
-#ifdef  _KERNEL
-#include <sys/types.h>		/* for uint_* */
-#else
-#include <stdint.h>
-#endif
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-#define	SHA2_HMAC_MIN_KEY_LEN	1	/* SHA2-HMAC min key length in bytes */
-#define	SHA2_HMAC_MAX_KEY_LEN	INT_MAX	/* SHA2-HMAC max key length in bytes */
-
-#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
-#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
-#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
-#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
-
-#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
-#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
-
-#define	SHA256			0
-#define	SHA256_HMAC		1
-#define	SHA256_HMAC_GEN		2
-#define	SHA384			3
-#define	SHA384_HMAC		4
-#define	SHA384_HMAC_GEN		5
-#define	SHA512			6
-#define	SHA512_HMAC		7
-#define	SHA512_HMAC_GEN		8
-#define	SHA512_224		9
-#define	SHA512_256		10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-typedef struct 	{
-	uint32_t algotype;		/* Algorithm Type */
-
-	/* state (ABCDEFGH) */
-	union {
-		uint32_t s32[8];	/* for SHA256 */
-		uint64_t s64[8];	/* for SHA384/512 */
-	} state;
-	/* number of bits */
-	union {
-		uint32_t c32[2];	/* for SHA256 , modulo 2^64 */
-		uint64_t c64[2];	/* for SHA384/512, modulo 2^128 */
-	} count;
-	union {
-		uint8_t		buf8[128];	/* undigested input */
-		uint32_t	buf32[32];	/* realigned input */
-		uint64_t	buf64[16];	/* realigned input */
-	} buf_un;
-} SHA2_CTX;
-
-typedef SHA2_CTX SHA256_CTX;
-typedef SHA2_CTX SHA384_CTX;
-typedef SHA2_CTX SHA512_CTX;
-
-extern void SHA2Init(uint64_t mech, SHA2_CTX *);
-
-extern void SHA2Update(SHA2_CTX *, const void *, size_t);
-
-extern void SHA2Final(void *, SHA2_CTX *);
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
-	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
-	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
-	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
-	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
-	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
-	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
-	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
-	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
-	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
-	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */

diff --git a/zfs/include/sys/spa.h b/zfs/include/sys/spa.h
index 4c1dcdc..42f7fec 100644
--- a/zfs/include/sys/spa.h
+++ b/zfs/include/sys/spa.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -28,6 +28,8 @@
  * Copyright 2017 Joyent, Inc.
  * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2019, Klara Inc.
  */
 
 #ifndef _SYS_SPA_H
@@ -42,6 +44,8 @@
 #include <sys/fs/zfs.h>
 #include <sys/spa_checksum.h>
 #include <sys/dmu.h>
+#include <sys/space_map.h>
+#include <sys/bitops.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -62,71 +66,13 @@
 typedef struct ddt_entry ddt_entry_t;
 typedef struct zbookmark_phys zbookmark_phys_t;
 
+struct bpobj;
+struct bplist;
 struct dsl_pool;
 struct dsl_dataset;
 struct dsl_crypto_params;
 
 /*
- * General-purpose 32-bit and 64-bit bitfield encodings.
- */
-#define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
-#define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
-#define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
-#define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
-
-#define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
-#define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
-
-#define	BF32_SET(x, low, len, val) do { \
-	ASSERT3U(val, <, 1U << (len)); \
-	ASSERT3U(low + len, <=, 32); \
-	(x) ^= BF32_ENCODE((x >> low) ^ (val), low, len); \
-_NOTE(CONSTCOND) } while (0)
-
-#define	BF64_SET(x, low, len, val) do { \
-	ASSERT3U(val, <, 1ULL << (len)); \
-	ASSERT3U(low + len, <=, 64); \
-	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)); \
-_NOTE(CONSTCOND) } while (0)
-
-#define	BF32_GET_SB(x, low, len, shift, bias)	\
-	((BF32_GET(x, low, len) + (bias)) << (shift))
-#define	BF64_GET_SB(x, low, len, shift, bias)	\
-	((BF64_GET(x, low, len) + (bias)) << (shift))
-
-#define	BF32_SET_SB(x, low, len, shift, bias, val) do { \
-	ASSERT(IS_P2ALIGNED(val, 1U << shift)); \
-	ASSERT3S((val) >> (shift), >=, bias); \
-	BF32_SET(x, low, len, ((val) >> (shift)) - (bias)); \
-_NOTE(CONSTCOND) } while (0)
-#define	BF64_SET_SB(x, low, len, shift, bias, val) do { \
-	ASSERT(IS_P2ALIGNED(val, 1ULL << shift)); \
-	ASSERT3S((val) >> (shift), >=, bias); \
-	BF64_SET(x, low, len, ((val) >> (shift)) - (bias)); \
-_NOTE(CONSTCOND) } while (0)
-
-/*
- * We currently support block sizes from 512 bytes to 16MB.
- * The benefits of larger blocks, and thus larger IO, need to be weighed
- * against the cost of COWing a giant block to modify one byte, and the
- * large latency of reading or writing a large block.
- *
- * Note that although blocks up to 16MB are supported, the recordsize
- * property can not be set larger than zfs_max_recordsize (default 1MB).
- * See the comment near zfs_max_recordsize in dsl_dataset.c for details.
- *
- * Note that although the LSIZE field of the blkptr_t can store sizes up
- * to 32MB, the dnode's dn_datablkszsec can only store sizes up to
- * 32MB - 512 bytes.  Therefore, we limit SPA_MAXBLOCKSIZE to 16MB.
- */
-#define	SPA_MINBLOCKSHIFT	9
-#define	SPA_OLD_MAXBLOCKSHIFT	17
-#define	SPA_MAXBLOCKSHIFT	24
-#define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT)
-#define	SPA_OLD_MAXBLOCKSIZE	(1ULL << SPA_OLD_MAXBLOCKSHIFT)
-#define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT)
-
-/*
  * Alignment Shift (ashift) is an immutable, internal top-level vdev property
  * which can only be set at vdev creation time. Physical writes are always done
  * according to it, which makes 2^ashift the smallest possible IO on a vdev.
@@ -155,6 +101,7 @@
 
 #define	SPA_COMPRESSBITS	7
 #define	SPA_VDEVBITS		24
+#define	SPA_COMPRESSMASK	((1U << SPA_COMPRESSBITS) - 1)
 
 /*
  * All SPA data is represented by 128-bit data virtual addresses (DVAs).
@@ -402,6 +349,7 @@
 typedef enum bp_embedded_type {
 	BP_EMBEDDED_TYPE_DATA,
 	BP_EMBEDDED_TYPE_RESERVED, /* Reserved for Delphix byteswap feature. */
+	BP_EMBEDDED_TYPE_REDACTED,
 	NUM_BP_EMBEDDED_TYPES
 } bp_embedded_type_t;
 
@@ -436,6 +384,12 @@
 /*
  * Macros to get and set fields in a bp or DVA.
  */
+
+/*
+ * Note, for gang blocks, DVA_GET_ASIZE() is the total space allocated for
+ * this gang DVA including its children BP's.  The space allocated at this
+ * DVA's vdev/offset is vdev_gang_header_asize(vdev).
+ */
 #define	DVA_GET_ASIZE(dva)	\
 	BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0)
 #define	DVA_SET_ASIZE(dva, x)	\
@@ -524,6 +478,9 @@
 #define	BP_GET_BYTEORDER(bp)		BF64_GET((bp)->blk_prop, 63, 1)
 #define	BP_SET_BYTEORDER(bp, x)		BF64_SET((bp)->blk_prop, 63, 1, x)
 
+#define	BP_GET_FREE(bp)			BF64_GET((bp)->blk_fill, 0, 1)
+#define	BP_SET_FREE(bp, x)		BF64_SET((bp)->blk_fill, 0, 1, x)
+
 #define	BP_PHYSICAL_BIRTH(bp)		\
 	(BP_IS_EMBEDDED(bp) ? 0 : \
 	(bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
@@ -602,6 +559,14 @@
 #define	BP_IS_HOLE(bp) \
 	(!BP_IS_EMBEDDED(bp) && DVA_IS_EMPTY(BP_IDENTITY(bp)))
 
+#define	BP_SET_REDACTED(bp) \
+{							\
+	BP_SET_EMBEDDED(bp, B_TRUE);			\
+	BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_REDACTED);	\
+}
+#define	BP_IS_REDACTED(bp) \
+	(BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_REDACTED)
+
 /* BP_IS_RAIDZ(bp) assumes no block compression */
 #define	BP_IS_RAIDZ(bp)		(DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
 				BP_GET_PSIZE(bp))
@@ -623,7 +588,7 @@
 	ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\
 }
 
-#ifdef _BIG_ENDIAN
+#ifdef _ZFS_BIG_ENDIAN
 #define	ZFS_HOST_BYTEORDER	(0ULL)
 #else
 #define	ZFS_HOST_BYTEORDER	(1ULL)
@@ -638,6 +603,7 @@
  * 'func' is either snprintf() or mdb_snprintf().
  * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
  */
+
 #define	SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
 {									\
 	static const char *copyname[] =					\
@@ -678,6 +644,13 @@
 		    (u_longlong_t)BPE_GET_LSIZE(bp),			\
 		    (u_longlong_t)BPE_GET_PSIZE(bp),			\
 		    (u_longlong_t)bp->blk_birth);			\
+	} else if (BP_IS_REDACTED(bp)) {				\
+		len += func(buf + len, size - len,			\
+		    "REDACTED [L%llu %s] size=%llxL birth=%lluL",	\
+		    (u_longlong_t)BP_GET_LEVEL(bp),			\
+		    type,						\
+		    (u_longlong_t)BP_GET_LSIZE(bp),			\
+		    (u_longlong_t)bp->blk_birth);			\
 	} else {							\
 		for (int d = 0; d < BP_GET_NDVAS(bp); d++) {		\
 			const dva_t *dva = &bp->blk_dva[d];		\
@@ -738,14 +711,26 @@
 	SPA_IMPORT_ASSEMBLE
 } spa_import_type_t;
 
+typedef enum spa_mode {
+	SPA_MODE_UNINIT = 0,
+	SPA_MODE_READ = 1,
+	SPA_MODE_WRITE = 2,
+} spa_mode_t;
+
 /*
  * Send TRIM commands in-line during normal pool operation while deleting.
  *	OFF: no
  *	ON: yes
+ * NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
  */
 typedef enum {
 	SPA_AUTOTRIM_OFF = 0,	/* default */
-	SPA_AUTOTRIM_ON
+	SPA_AUTOTRIM_ON,
+#ifdef IN_FREEBSD_BASE
+	SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
+#else
+	SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
+#endif
 } spa_autotrim_t;
 
 /*
@@ -754,6 +739,7 @@
 typedef enum trim_type {
 	TRIM_TYPE_MANUAL = 0,
 	TRIM_TYPE_AUTO = 1,
+	TRIM_TYPE_SIMPLE = 2
 } trim_type_t;
 
 /* state manipulation functions */
@@ -767,12 +753,12 @@
 extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
     uint64_t flags);
 extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
-extern int spa_destroy(char *pool);
+extern int spa_destroy(const char *pool);
 extern int spa_checkpoint(const char *pool);
 extern int spa_checkpoint_discard(const char *pool);
-extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
+extern int spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force,
     boolean_t hardforce);
-extern int spa_reset(char *pool);
+extern int spa_reset(const char *pool);
 extern void spa_async_request(spa_t *spa, int flag);
 extern void spa_async_unrequest(spa_t *spa, int flag);
 extern void spa_async_suspend(spa_t *spa);
@@ -782,6 +768,8 @@
 extern void spa_inject_delref(spa_t *spa);
 extern void spa_scan_stat_init(spa_t *spa);
 extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
+extern int bpobj_enqueue_alloc_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
+extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
 
 #define	SPA_ASYNC_CONFIG_UPDATE			0x01
 #define	SPA_ASYNC_REMOVE			0x02
@@ -794,17 +782,15 @@
 #define	SPA_ASYNC_INITIALIZE_RESTART		0x100
 #define	SPA_ASYNC_TRIM_RESTART			0x200
 #define	SPA_ASYNC_AUTOTRIM_RESTART		0x400
-
-/*
- * Controls the behavior of spa_vdev_remove().
- */
-#define	SPA_REMOVE_UNSPARE	0x01
-#define	SPA_REMOVE_DONE		0x02
+#define	SPA_ASYNC_L2CACHE_REBUILD		0x800
+#define	SPA_ASYNC_L2CACHE_TRIM			0x1000
+#define	SPA_ASYNC_REBUILD_DONE			0x2000
+#define	SPA_ASYNC_DETACH_SPARE			0x4000
 
 /* device manipulation */
 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
 extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
-    int replacing);
+    int replacing, int rebuild);
 extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
     int replace_done);
 extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
@@ -852,13 +838,16 @@
 #define	SPA_CONFIG_UPDATE_POOL	0
 #define	SPA_CONFIG_UPDATE_VDEVS	1
 
-extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t);
+extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
 extern void spa_config_load(void);
 extern nvlist_t *spa_all_configs(uint64_t *);
 extern void spa_config_set(spa_t *spa, nvlist_t *config);
 extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
     int getstats);
 extern void spa_config_update(spa_t *spa, int what);
+extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,
+    vdev_t *parent, uint_t id, int atype);
+
 
 /*
  * Miscellaneous SPA routines in spa_misc.c
@@ -894,7 +883,7 @@
 	uint64_t		count;
 	uint64_t		size;
 	kstat_t			*kstat;
-	void			*private;
+	void			*priv;
 	list_t			list;
 } spa_history_kstat_t;
 
@@ -907,7 +896,6 @@
 	spa_history_list_t	read_history;
 	spa_history_list_t	txg_history;
 	spa_history_kstat_t	tx_assign_histogram;
-	spa_history_kstat_t	io_history;
 	spa_history_list_t	mmp_history;
 	spa_history_kstat_t	state;		/* pool state */
 	spa_history_kstat_t	iostats;
@@ -943,6 +931,12 @@
 	kstat_named_t	autotrim_bytes_skipped;
 	kstat_named_t	autotrim_extents_failed;
 	kstat_named_t	autotrim_bytes_failed;
+	kstat_named_t	simple_trim_extents_written;
+	kstat_named_t	simple_trim_bytes_written;
+	kstat_named_t	simple_trim_extents_skipped;
+	kstat_named_t	simple_trim_bytes_skipped;
+	kstat_named_t	simple_trim_extents_failed;
+	kstat_named_t	simple_trim_bytes_failed;
 } spa_iostats_t;
 
 extern void spa_stats_init(spa_t *spa);
@@ -978,11 +972,14 @@
 /* Pool configuration locks */
 extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
 extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
+extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
+    krw_t rw);
 extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
 extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
 
 /* Pool vdev add/remove lock */
 extern uint64_t spa_vdev_enter(spa_t *spa);
+extern uint64_t spa_vdev_detach_enter(spa_t *spa, uint64_t guid);
 extern uint64_t spa_vdev_config_enter(spa_t *spa);
 extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
     int error, char *tag);
@@ -1037,6 +1034,7 @@
 extern boolean_t spa_deflate(spa_t *spa);
 extern metaslab_class_t *spa_normal_class(spa_t *spa);
 extern metaslab_class_t *spa_log_class(spa_t *spa);
+extern metaslab_class_t *spa_embedded_log_class(spa_t *spa);
 extern metaslab_class_t *spa_special_class(spa_t *spa);
 extern metaslab_class_t *spa_dedup_class(spa_t *spa);
 extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size,
@@ -1054,6 +1052,7 @@
 extern uint64_t spa_bootfs(spa_t *spa);
 extern uint64_t spa_delegation(spa_t *spa);
 extern objset_t *spa_meta_objset(spa_t *spa);
+extern space_map_t *spa_syncing_log_sm(spa_t *spa);
 extern uint64_t spa_deadman_synctime(spa_t *spa);
 extern uint64_t spa_deadman_ziotime(spa_t *spa);
 extern uint64_t spa_dirty_data(spa_t *spa);
@@ -1069,7 +1068,6 @@
 extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
 extern char *spa_strdup(const char *);
 extern void spa_strfree(char *);
-extern uint64_t spa_get_random(uint64_t range);
 extern uint64_t spa_generate_guid(spa_t *spa);
 extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
 extern void spa_freeze(spa_t *spa);
@@ -1103,11 +1101,13 @@
 extern uint64_t spa_missing_tvds_allowed(spa_t *spa);
 extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing);
 extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa);
+extern uint64_t spa_total_metaslabs(spa_t *spa);
 extern boolean_t spa_multihost(spa_t *spa);
 extern uint32_t spa_get_hostid(spa_t *spa);
 extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
+extern boolean_t spa_livelist_delete_check(spa_t *spa);
 
-extern int spa_mode(spa_t *spa);
+extern spa_mode_t spa_mode(spa_t *spa);
 extern uint64_t zfs_strtonum(const char *str, char **nptr);
 
 extern char *spa_his_ievent_table[];
@@ -1120,22 +1120,23 @@
 extern void spa_history_log_version(spa_t *spa, const char *operation,
     dmu_tx_t *tx);
 extern void spa_history_log_internal(spa_t *spa, const char *operation,
-    dmu_tx_t *tx, const char *fmt, ...);
+    dmu_tx_t *tx, const char *fmt, ...) __printflike(4, 5);
 extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
-    dmu_tx_t *tx, const char *fmt, ...);
+    dmu_tx_t *tx, const char *fmt, ...)  __printflike(4, 5);
 extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
-    dmu_tx_t *tx, const char *fmt, ...);
+    dmu_tx_t *tx, const char *fmt, ...) __printflike(4, 5);
 
 extern const char *spa_state_to_name(spa_t *spa);
 
 /* error handling */
 struct zbookmark_phys;
 extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
-extern int zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
-    const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
-    uint64_t length);
-extern boolean_t zfs_ereport_is_valid(const char *class, spa_t *spa, vdev_t *vd,
+extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
+    const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
+extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
     zio_t *zio);
+extern void zfs_ereport_taskq_fini(void);
+extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
 extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
     const char *name, nvlist_t *aux);
 extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
@@ -1157,7 +1158,7 @@
 extern void vdev_mirror_stat_fini(void);
 
 /* Initialization and termination */
-extern void spa_init(int flags);
+extern void spa_init(spa_mode_t mode);
 extern void spa_fini(void);
 extern void spa_boot_init(void);
 
@@ -1170,6 +1171,22 @@
 /* asynchronous event notification */
 extern void spa_event_notify(spa_t *spa, vdev_t *vdev, nvlist_t *hist_nvl,
     const char *name);
+extern void zfs_ereport_zvol_post(const char *subclass, const char *name,
+    const char *device_name, const char *raw_name);
+
+/* waiting for pool activities to complete */
+extern int spa_wait(const char *pool, zpool_wait_activity_t activity,
+    boolean_t *waited);
+extern int spa_wait_tag(const char *name, zpool_wait_activity_t activity,
+    uint64_t tag, boolean_t *waited);
+extern void spa_notify_waiters(spa_t *spa);
+extern void spa_wake_waiters(spa_t *spa);
+
+/* module param call functions */
+int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS);
+int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS);
+int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS);
+int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS);
 
 #ifdef ZFS_DEBUG
 #define	dprintf_bp(bp, fmt, ...) do {				\
@@ -1184,7 +1201,7 @@
 #define	dprintf_bp(bp, fmt, ...)
 #endif
 
-extern int spa_mode_global;			/* mode, e.g. FREAD | FWRITE */
+extern spa_mode_t spa_mode_global;
 extern int zfs_deadman_enabled;
 extern unsigned long zfs_deadman_synctime_ms;
 extern unsigned long zfs_deadman_ziotime_ms;

diff --git a/zfs/include/sys/spa_impl.h b/zfs/include/sys/spa_impl.h
index 659c697..9714bbc 100644
--- a/zfs/include/sys/spa_impl.h
+++ b/zfs/include/sys/spa_impl.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -34,7 +34,9 @@
 
 #include <sys/spa.h>
 #include <sys/spa_checkpoint.h>
+#include <sys/spa_log_spacemap.h>
 #include <sys/vdev.h>
+#include <sys/vdev_rebuild.h>
 #include <sys/vdev_removal.h>
 #include <sys/metaslab.h>
 #include <sys/dmu.h>
@@ -42,18 +44,24 @@
 #include <sys/uberblock_impl.h>
 #include <sys/zfs_context.h>
 #include <sys/avl.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/bplist.h>
 #include <sys/bpobj.h>
 #include <sys/dsl_crypt.h>
 #include <sys/zfeature.h>
 #include <sys/zthr.h>
+#include <sys/dsl_deadlist.h>
 #include <zfeature_common.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
+typedef struct spa_alloc {
+	kmutex_t	spaa_lock;
+	avl_tree_t	spaa_tree;
+} ____cacheline_aligned spa_alloc_t;
+
 typedef struct spa_error_entry {
 	zbookmark_phys_t	se_bookmark;
 	char			*se_name;
@@ -138,9 +146,9 @@
 	kmutex_t	scl_lock;
 	kthread_t	*scl_writer;
 	int		scl_write_wanted;
+	int		scl_count;
 	kcondvar_t	scl_cv;
-	zfs_refcount_t	scl_count;
-} spa_config_lock_t;
+} ____cacheline_aligned spa_config_lock_t;
 
 typedef struct spa_config_dirent {
 	list_node_t	scd_link;
@@ -214,6 +222,7 @@
 	spa_load_state_t spa_load_state;	/* current load operation */
 	boolean_t	spa_indirect_vdevs_loaded; /* mappings loaded? */
 	boolean_t	spa_trust_config;	/* do we trust vdev tree? */
+	boolean_t	spa_is_splitting;	/* in the middle of a split? */
 	spa_config_source_t spa_config_source;	/* where config comes from? */
 	uint64_t	spa_import_flags;	/* import specific flags */
 	spa_taskqs_t	spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
@@ -222,6 +231,7 @@
 	boolean_t	spa_is_exporting;	/* true while exporting pool */
 	metaslab_class_t *spa_normal_class;	/* normal data class */
 	metaslab_class_t *spa_log_class;	/* intent log data class */
+	metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */
 	metaslab_class_t *spa_special_class;	/* special allocation class */
 	metaslab_class_t *spa_dedup_class;	/* dedup allocation class */
 	uint64_t	spa_first_txg;		/* first txg after spa_open() */
@@ -236,21 +246,20 @@
 	kcondvar_t	spa_evicting_os_cv;	/* Objset Eviction Completion */
 	txg_list_t	spa_vdev_txg_list;	/* per-txg dirty vdev list */
 	vdev_t		*spa_root_vdev;		/* top-level vdev container */
-	int		spa_min_ashift;		/* of vdevs in normal class */
-	int		spa_max_ashift;		/* of vdevs in normal class */
+	uint64_t	spa_min_ashift;		/* of vdevs in normal class */
+	uint64_t	spa_max_ashift;		/* of vdevs in normal class */
+	uint64_t	spa_min_alloc;		/* of vdevs in normal class */
 	uint64_t	spa_config_guid;	/* config pool guid */
 	uint64_t	spa_load_guid;		/* spa_load initialized guid */
 	uint64_t	spa_last_synced_guid;	/* last synced guid */
 	list_t		spa_config_dirty_list;	/* vdevs with dirty config */
 	list_t		spa_state_dirty_list;	/* vdevs with dirty state */
 	/*
-	 * spa_alloc_locks and spa_alloc_trees are arrays, whose lengths are
-	 * stored in spa_alloc_count. There is one tree and one lock for each
-	 * allocator, to help improve allocation performance in write-heavy
-	 * workloads.
+	 * spa_allocs is an array, whose lengths is stored in spa_alloc_count.
+	 * There is one tree and one lock for each allocator, to help improve
+	 * allocation performance in write-heavy workloads.
 	 */
-	kmutex_t	*spa_alloc_locks;
-	avl_tree_t	*spa_alloc_trees;
+	spa_alloc_t	*spa_allocs;
 	int		spa_alloc_count;
 
 	spa_aux_vdev_t	spa_spares;		/* hot spares */
@@ -310,6 +319,19 @@
 	spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
 	zthr_t		*spa_checkpoint_discard_zthr;
 
+	space_map_t	*spa_syncing_log_sm;	/* current log space map */
+	avl_tree_t	spa_sm_logs_by_txg;
+	kmutex_t	spa_flushed_ms_lock;	/* for metaslabs_by_flushed */
+	avl_tree_t	spa_metaslabs_by_flushed;
+	spa_unflushed_stats_t	spa_unflushed_stats;
+	list_t		spa_log_summary;
+	uint64_t	spa_log_flushall_txg;
+
+	zthr_t		*spa_livelist_delete_zthr; /* deleting livelists */
+	zthr_t		*spa_livelist_condense_zthr; /* condensing livelists */
+	uint64_t	spa_livelists_to_delete; /* set of livelists to free */
+	livelist_condense_entry_t	spa_to_condense; /* next to condense */
+
 	char		*spa_root;		/* alternate root directory */
 	uint64_t	spa_ena;		/* spa-wide ereport ENA */
 	int		spa_last_open_failed;	/* error if last open failed */
@@ -347,13 +369,13 @@
 	uint8_t		spa_claiming;		/* pool is doing zil_claim() */
 	boolean_t	spa_is_root;		/* pool is root */
 	int		spa_minref;		/* num refs when first opened */
-	int		spa_mode;		/* FREAD | FWRITE */
+	spa_mode_t	spa_mode;		/* SPA_MODE_{READ|WRITE} */
+	boolean_t	spa_read_spacemaps;	/* spacemaps available if ro */
 	spa_log_state_t spa_log_state;		/* log state */
 	uint64_t	spa_autoexpand;		/* lun expansion on/off */
 	ddt_t		*spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
 	uint64_t	spa_ddt_stat_object;	/* DDT statistics */
 	uint64_t	spa_dedup_dspace;	/* Cache get_dedup_dspace() */
-	uint64_t	spa_dedup_ditto;	/* dedup ditto threshold */
 	uint64_t	spa_dedup_checksum;	/* default dedup checksum */
 	uint64_t	spa_dspace;		/* dspace in normal class */
 	kmutex_t	spa_vdev_top_lock;	/* dueling offline/remove */
@@ -361,7 +383,7 @@
 	kcondvar_t	spa_proc_cv;		/* spa_proc_state transitions */
 	spa_proc_state_t spa_proc_state;	/* see definition */
 	proc_t		*spa_proc;		/* "zpool-poolname" process */
-	uint64_t	spa_did;		/* if procp != p0, did of t1 */
+	uintptr_t	spa_did;		/* if procp != p0, did of t1 */
 	boolean_t	spa_autoreplace;	/* autoreplace set in open */
 	int		spa_vdev_locks;		/* locks grabbed */
 	uint64_t	spa_creation_version;	/* version at pool creation */
@@ -399,6 +421,15 @@
 	uint64_t	spa_leaf_list_gen;	/* track leaf_list changes */
 	uint32_t	spa_hostid;		/* cached system hostid */
 
+	/* synchronization for threads in spa_wait */
+	kmutex_t	spa_activities_lock;
+	kcondvar_t	spa_activities_cv;
+	kcondvar_t	spa_waiters_cv;
+	int		spa_waiters;		/* number of waiting threads */
+	boolean_t	spa_waiters_cancel;	/* waiters should return */
+
+	char		*spa_compatibility;	/* compatibility file(s) */
+
 	/*
 	 * spa_refcount & spa_config_lock must be the last elements
 	 * because zfs_refcount_t changes size based on compilation options.
@@ -412,7 +443,8 @@
 };
 
 extern char *spa_config_path;
-
+extern char *zfs_deadman_failmode;
+extern int spa_slop_shift;
 extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
     task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
 extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
@@ -422,7 +454,10 @@
 extern sysevent_t *spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl,
     const char *name);
 extern void spa_event_post(sysevent_t *ev);
-
+extern int param_set_deadman_failmode_common(const char *val);
+extern void spa_set_deadman_synctime(hrtime_t ns);
+extern void spa_set_deadman_ziotime(hrtime_t ns);
+extern const char *spa_history_zone(void);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/spa_log_spacemap.h b/zfs/include/sys/spa_log_spacemap.h
new file mode 100644
index 0000000..72229df
--- /dev/null
+++ b/zfs/include/sys/spa_log_spacemap.h

@@ -0,0 +1,84 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_SPA_LOG_SPACEMAP_H
+#define	_SYS_SPA_LOG_SPACEMAP_H
+
+#include <sys/avl.h>
+
+typedef struct log_summary_entry {
+	uint64_t lse_start;	/* start TXG */
+	uint64_t lse_end;	/* last TXG */
+	uint64_t lse_txgcount;	/* # of TXGs */
+	uint64_t lse_mscount;	/* # of metaslabs needed to be flushed */
+	uint64_t lse_msdcount;	/* # of dirty metaslabs needed to be flushed */
+	uint64_t lse_blkcount;	/* blocks held by this entry  */
+	list_node_t lse_node;
+} log_summary_entry_t;
+
+typedef struct spa_unflushed_stats  {
+	/* used for memory heuristic */
+	uint64_t sus_memused;	/* current memory used for unflushed trees */
+
+	/* used for block heuristic */
+	uint64_t sus_blocklimit;	/* max # of log blocks allowed */
+	uint64_t sus_nblocks;	/* # of blocks in log space maps currently */
+} spa_unflushed_stats_t;
+
+typedef struct spa_log_sm {
+	uint64_t sls_sm_obj;	/* space map object ID */
+	uint64_t sls_txg;	/* txg logged on the space map */
+	uint64_t sls_nblocks;	/* number of blocks in this log */
+	uint64_t sls_mscount;	/* # of metaslabs flushed in the log's txg */
+	avl_node_t sls_node;	/* node in spa_sm_logs_by_txg */
+	space_map_t *sls_sm;	/* space map pointer, if open */
+} spa_log_sm_t;
+
+int spa_ld_log_spacemaps(spa_t *);
+
+void spa_generate_syncing_log_sm(spa_t *, dmu_tx_t *);
+void spa_flush_metaslabs(spa_t *, dmu_tx_t *);
+void spa_sync_close_syncing_log_sm(spa_t *);
+
+void spa_cleanup_old_sm_logs(spa_t *, dmu_tx_t *);
+
+uint64_t spa_log_sm_blocklimit(spa_t *);
+void spa_log_sm_set_blocklimit(spa_t *);
+uint64_t spa_log_sm_nblocks(spa_t *);
+uint64_t spa_log_sm_memused(spa_t *);
+
+void spa_log_sm_decrement_mscount(spa_t *, uint64_t);
+void spa_log_sm_increment_current_mscount(spa_t *);
+
+void spa_log_summary_add_flushed_metaslab(spa_t *, boolean_t);
+void spa_log_summary_dirty_flushed_metaslab(spa_t *, uint64_t);
+void spa_log_summary_decrement_mscount(spa_t *, uint64_t, boolean_t);
+void spa_log_summary_decrement_blkcount(spa_t *, uint64_t);
+
+boolean_t spa_flush_all_logs_requested(spa_t *);
+
+extern int zfs_keep_log_spacemaps_at_export;
+
+#endif /* _SYS_SPA_LOG_SPACEMAP_H */

diff --git a/zfs/include/sys/space_map.h b/zfs/include/sys/space_map.h
index 7731a35..cb81e71 100644
--- a/zfs/include/sys/space_map.h
+++ b/zfs/include/sys/space_map.h

@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_SPACE_MAP_H
@@ -72,6 +72,11 @@
 	 * bucket, smp_histogram[i], contains the number of free regions
 	 * whose size is:
 	 * 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1)
+	 *
+	 * Note that, if log space map feature is enabled, histograms of
+	 * space maps that belong to metaslabs will take into account any
+	 * unflushed changes for their metaslabs, even though the actual
+	 * space map doesn't have entries for these changes.
 	 */
 	uint64_t	smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
 } space_map_phys_t;
@@ -143,6 +148,15 @@
 	uint32_t sme_vdev;	/* max is 2^24-1; SM_NO_VDEVID if not present */
 	uint64_t sme_offset;	/* max is 2^63-1; units of sm_shift */
 	uint64_t sme_run;	/* max is 2^36; units of sm_shift */
+
+	/*
+	 * The following fields are not part of the actual space map entry
+	 * on-disk and they are populated with the values from the debug
+	 * entry most recently visited starting from the beginning to the
+	 * end of the space map.
+	 */
+	uint64_t sme_txg;
+	uint64_t sme_sync_pass;
 } space_map_entry_t;
 
 #define	SM_NO_VDEVID	(1 << SPA_VDEVBITS)
@@ -209,6 +223,8 @@
 uint64_t space_map_object(space_map_t *sm);
 int64_t space_map_allocated(space_map_t *sm);
 uint64_t space_map_length(space_map_t *sm);
+uint64_t space_map_entries(space_map_t *sm, range_tree_t *rt);
+uint64_t space_map_nblocks(space_map_t *sm);
 
 void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
     uint64_t vdev_id, dmu_tx_t *tx);

diff --git a/zfs/include/sys/space_reftree.h b/zfs/include/sys/space_reftree.h
index 249b15b..ca9d41d 100644
--- a/zfs/include/sys/space_reftree.h
+++ b/zfs/include/sys/space_reftree.h

@@ -31,7 +31,7 @@
 #define	_SYS_SPACE_REFTREE_H
 
 #include <sys/range_tree.h>
-
+#include <sys/avl.h>
 #ifdef	__cplusplus
 extern "C" {
 #endif

diff --git a/zfs/include/sys/sysevent/Makefile.am b/zfs/include/sys/sysevent/Makefile.am
index e9af268..64e5376 100644
--- a/zfs/include/sys/sysevent/Makefile.am
+++ b/zfs/include/sys/sysevent/Makefile.am

@@ -1,19 +1,15 @@
 COMMON_H = \
-	$(top_srcdir)/include/sys/sysevent/eventdefs.h \
-	$(top_srcdir)/include/sys/sysevent/dev.h
-
-KERNEL_H =
-
-USER_H =
-
-EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+	eventdefs.h \
+	dev.h
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs/sys/sysevent
-libzfs_HEADERS = $(COMMON_H) $(USER_H)
+libzfs_HEADERS = $(COMMON_H)
 endif
 
 if CONFIG_KERNEL
+if BUILD_LINUX
 kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/sysevent
-kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+kernel_HEADERS = $(COMMON_H)
+endif
 endif

diff --git a/zfs/include/sys/sysevent/dev.h b/zfs/include/sys/sysevent/dev.h
index 1117538..2418bba 100644
--- a/zfs/include/sys/sysevent/dev.h
+++ b/zfs/include/sys/sysevent/dev.h

@@ -244,6 +244,9 @@
 #define	DEV_PATH		"path"
 #define	DEV_IS_PART		"is_slice"
 #define	DEV_SIZE		"dev_size"
+
+/* Size of the whole parent block device (if dev is a partition) */
+#define	DEV_PARENT_SIZE		"dev_parent_size"
 #endif /* __linux__ */
 
 #define	EV_V1			1

diff --git a/zfs/include/sys/trace.h b/zfs/include/sys/trace.h
deleted file mode 100644
index f32ba52..0000000
--- a/zfs/include/sys/trace.h
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#if !defined(_TRACE_ZFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_ZFS_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * The sys/trace_dbgmsg.h header defines tracepoint events for
- * dprintf(), dbgmsg(), and SET_ERROR().
- */
-#define	_SYS_TRACE_DBGMSG_INDIRECT
-#include <sys/trace_dbgmsg.h>
-#undef _SYS_TRACE_DBGMSG_INDIRECT
-
-/*
- * Redefine the DTRACE_PROBE* functions to use Linux tracepoints
- */
-#undef DTRACE_PROBE1
-#define	DTRACE_PROBE1(name, t1, arg1) \
-	trace_zfs_##name((arg1))
-
-#undef DTRACE_PROBE2
-#define	DTRACE_PROBE2(name, t1, arg1, t2, arg2) \
-	trace_zfs_##name((arg1), (arg2))
-
-#undef DTRACE_PROBE3
-#define	DTRACE_PROBE3(name, t1, arg1, t2, arg2, t3, arg3) \
-	trace_zfs_##name((arg1), (arg2), (arg3))
-
-#undef DTRACE_PROBE4
-#define	DTRACE_PROBE4(name, t1, arg1, t2, arg2, t3, arg3, t4, arg4) \
-	trace_zfs_##name((arg1), (arg2), (arg3), (arg4))
-
-#endif /* _TRACE_ZFS_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_acl.h b/zfs/include/sys/trace_acl.h
deleted file mode 100644
index 610bbe2..0000000
--- a/zfs/include/sys/trace_acl.h
+++ /dev/null

@@ -1,156 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_acl
-
-#if !defined(_TRACE_ACL_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_ACL_H
-
-#include <linux/tracepoint.h>
-#include <linux/vfs_compat.h>
-#include <sys/types.h>
-
-/*
- * Generic support for three argument tracepoints of the form:
- *
- * DTRACE_PROBE3(...,
- *     znode_t *, ...,
- *     zfs_ace_hdr_t *, ...,
- *     uint32_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_ace_class,
-	TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched),
-	TP_ARGS(zn, ace, mask_matched),
-	TP_STRUCT__entry(
-	    __field(uint64_t,		z_id)
-	    __field(uint8_t,		z_unlinked)
-	    __field(uint8_t,		z_atime_dirty)
-	    __field(uint8_t,		z_zn_prefetch)
-	    __field(uint8_t,		z_moved)
-	    __field(uint_t,		z_blksz)
-	    __field(uint_t,		z_seq)
-	    __field(uint64_t,		z_mapcnt)
-	    __field(uint64_t,		z_size)
-	    __field(uint64_t,		z_pflags)
-	    __field(uint32_t,		z_sync_cnt)
-	    __field(mode_t,		z_mode)
-	    __field(boolean_t,		z_is_sa)
-	    __field(boolean_t,		z_is_mapped)
-	    __field(boolean_t,		z_is_ctldir)
-	    __field(boolean_t,		z_is_stale)
-
-	    __field(uint32_t,		i_uid)
-	    __field(uint32_t,		i_gid)
-	    __field(unsigned long,	i_ino)
-	    __field(unsigned int,	i_nlink)
-	    __field(loff_t,		i_size)
-	    __field(unsigned int,	i_blkbits)
-	    __field(unsigned short,	i_bytes)
-	    __field(umode_t,		i_mode)
-	    __field(__u32,		i_generation)
-
-	    __field(uint16_t,		z_type)
-	    __field(uint16_t,		z_flags)
-	    __field(uint32_t,		z_access_mask)
-
-	    __field(uint32_t,		mask_matched)
-	),
-	TP_fast_assign(
-	    __entry->z_id		= zn->z_id;
-	    __entry->z_unlinked		= zn->z_unlinked;
-	    __entry->z_atime_dirty	= zn->z_atime_dirty;
-	    __entry->z_zn_prefetch	= zn->z_zn_prefetch;
-	    __entry->z_moved		= zn->z_moved;
-	    __entry->z_blksz		= zn->z_blksz;
-	    __entry->z_seq		= zn->z_seq;
-	    __entry->z_mapcnt		= zn->z_mapcnt;
-	    __entry->z_size		= zn->z_size;
-	    __entry->z_pflags		= zn->z_pflags;
-	    __entry->z_sync_cnt		= zn->z_sync_cnt;
-	    __entry->z_mode		= zn->z_mode;
-	    __entry->z_is_sa		= zn->z_is_sa;
-	    __entry->z_is_mapped	= zn->z_is_mapped;
-	    __entry->z_is_ctldir	= zn->z_is_ctldir;
-	    __entry->z_is_stale		= zn->z_is_stale;
-
-	    __entry->i_uid		= KUID_TO_SUID(ZTOI(zn)->i_uid);
-	    __entry->i_gid		= KGID_TO_SGID(ZTOI(zn)->i_gid);
-	    __entry->i_ino		= zn->z_inode.i_ino;
-	    __entry->i_nlink		= zn->z_inode.i_nlink;
-	    __entry->i_size		= zn->z_inode.i_size;
-	    __entry->i_blkbits		= zn->z_inode.i_blkbits;
-	    __entry->i_bytes		= zn->z_inode.i_bytes;
-	    __entry->i_mode		= zn->z_inode.i_mode;
-	    __entry->i_generation	= zn->z_inode.i_generation;
-
-	    __entry->z_type		= ace->z_type;
-	    __entry->z_flags		= ace->z_flags;
-	    __entry->z_access_mask	= ace->z_access_mask;
-
-	    __entry->mask_matched	= mask_matched;
-	),
-	TP_printk("zn { id %llu unlinked %u atime_dirty %u "
-	    "zn_prefetch %u moved %u blksz %u seq %u "
-	    "mapcnt %llu size %llu pflags %llu "
-	    "sync_cnt %u mode 0x%x is_sa %d "
-	    "is_mapped %d is_ctldir %d is_stale %d inode { "
-	    "uid %u gid %u ino %lu nlink %u size %lli "
-	    "blkbits %u bytes %u mode 0x%x generation %x } } "
-	    "ace { type %u flags %u access_mask %u } mask_matched %u",
-	    __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
-	    __entry->z_zn_prefetch, __entry->z_moved, __entry->z_blksz,
-	    __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
-	    __entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
-	    __entry->z_is_sa, __entry->z_is_mapped,
-	    __entry->z_is_ctldir, __entry->z_is_stale, __entry->i_uid,
-	    __entry->i_gid, __entry->i_ino, __entry->i_nlink,
-	    __entry->i_size, __entry->i_blkbits,
-	    __entry->i_bytes, __entry->i_mode, __entry->i_generation,
-	    __entry->z_type, __entry->z_flags, __entry->z_access_mask,
-	    __entry->mask_matched)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_ACE_EVENT(name) \
-DEFINE_EVENT(zfs_ace_class, name, \
-	TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched), \
-	TP_ARGS(zn, ace, mask_matched))
-/* END CSTYLED */
-DEFINE_ACE_EVENT(zfs_zfs__ace__denies);
-DEFINE_ACE_EVENT(zfs_zfs__ace__allows);
-
-#endif /* _TRACE_ACL_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_acl
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_arc.h b/zfs/include/sys/trace_arc.h
deleted file mode 100644
index c40b58e..0000000
--- a/zfs/include/sys/trace_arc.h
+++ /dev/null

@@ -1,364 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#include <sys/list.h>
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_arc
-
-#if !defined(_TRACE_ARC_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_ARC_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-#include <sys/trace_common.h> /* For ZIO macros */
-
-/*
- * Generic support for one argument tracepoints of the form:
- *
- * DTRACE_PROBE1(...,
- *     arc_buf_hdr_t *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
-	TP_PROTO(arc_buf_hdr_t *ab),
-	TP_ARGS(ab),
-	TP_STRUCT__entry(
-	    __array(uint64_t,		hdr_dva_word, 2)
-	    __field(uint64_t,		hdr_birth)
-	    __field(uint32_t,		hdr_flags)
-	    __field(uint32_t,		hdr_bufcnt)
-	    __field(arc_buf_contents_t,	hdr_type)
-	    __field(uint16_t,		hdr_psize)
-	    __field(uint16_t,		hdr_lsize)
-	    __field(uint64_t,		hdr_spa)
-	    __field(arc_state_type_t,	hdr_state_type)
-	    __field(clock_t,		hdr_access)
-	    __field(uint32_t,		hdr_mru_hits)
-	    __field(uint32_t,		hdr_mru_ghost_hits)
-	    __field(uint32_t,		hdr_mfu_hits)
-	    __field(uint32_t,		hdr_mfu_ghost_hits)
-	    __field(uint32_t,		hdr_l2_hits)
-	    __field(int64_t,		hdr_refcount)
-	),
-	TP_fast_assign(
-	    __entry->hdr_dva_word[0]	= ab->b_dva.dva_word[0];
-	    __entry->hdr_dva_word[1]	= ab->b_dva.dva_word[1];
-	    __entry->hdr_birth		= ab->b_birth;
-	    __entry->hdr_flags		= ab->b_flags;
-	    __entry->hdr_bufcnt	= ab->b_l1hdr.b_bufcnt;
-	    __entry->hdr_psize		= ab->b_psize;
-	    __entry->hdr_lsize		= ab->b_lsize;
-	    __entry->hdr_spa		= ab->b_spa;
-	    __entry->hdr_state_type	= ab->b_l1hdr.b_state->arcs_state;
-	    __entry->hdr_access		= ab->b_l1hdr.b_arc_access;
-	    __entry->hdr_mru_hits	= ab->b_l1hdr.b_mru_hits;
-	    __entry->hdr_mru_ghost_hits	= ab->b_l1hdr.b_mru_ghost_hits;
-	    __entry->hdr_mfu_hits	= ab->b_l1hdr.b_mfu_hits;
-	    __entry->hdr_mfu_ghost_hits	= ab->b_l1hdr.b_mfu_ghost_hits;
-	    __entry->hdr_l2_hits	= ab->b_l1hdr.b_l2_hits;
-	    __entry->hdr_refcount	= ab->b_l1hdr.b_refcnt.rc_count;
-	),
-	TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
-	    "flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu "
-	    "state_type %u access %lu mru_hits %u mru_ghost_hits %u "
-	    "mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
-	    __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
-	    __entry->hdr_birth, __entry->hdr_flags,
-	    __entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize,
-	    __entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
-	    __entry->hdr_access, __entry->hdr_mru_hits,
-	    __entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
-	    __entry->hdr_mfu_ghost_hits, __entry->hdr_l2_hits,
-	    __entry->hdr_refcount)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_ARC_BUF_HDR_EVENT(name) \
-DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
-	TP_PROTO(arc_buf_hdr_t *ab), \
-	TP_ARGS(ab))
-/* END CSTYLED */
-DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__async__upgrade__sync);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
-DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     vdev_t *, ...,
- *     zio_t *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_l2arc_rw_class,
-	TP_PROTO(vdev_t *vd, zio_t *zio),
-	TP_ARGS(vd, zio),
-	TP_STRUCT__entry(
-	    __field(uint64_t,	vdev_id)
-	    __field(uint64_t,	vdev_guid)
-	    __field(uint64_t,	vdev_state)
-	    ZIO_TP_STRUCT_ENTRY
-	),
-	TP_fast_assign(
-	    __entry->vdev_id	= vd->vdev_id;
-	    __entry->vdev_guid	= vd->vdev_guid;
-	    __entry->vdev_state	= vd->vdev_state;
-	    ZIO_TP_FAST_ASSIGN
-	),
-	TP_printk("vdev { id %llu guid %llu state %llu } "
-	    ZIO_TP_PRINTK_FMT, __entry->vdev_id, __entry->vdev_guid,
-	    __entry->vdev_state, ZIO_TP_PRINTK_ARGS)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_L2ARC_RW_EVENT(name) \
-DEFINE_EVENT(zfs_l2arc_rw_class, name, \
-	TP_PROTO(vdev_t *vd, zio_t *zio), \
-	TP_ARGS(vd, zio))
-/* END CSTYLED */
-DEFINE_L2ARC_RW_EVENT(zfs_l2arc__read);
-DEFINE_L2ARC_RW_EVENT(zfs_l2arc__write);
-
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     zio_t *, ...,
- *     l2arc_write_callback_t *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_l2arc_iodone_class,
-	TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb),
-	TP_ARGS(zio, cb),
-	TP_STRUCT__entry(ZIO_TP_STRUCT_ENTRY),
-	TP_fast_assign(ZIO_TP_FAST_ASSIGN),
-	TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_L2ARC_IODONE_EVENT(name) \
-DEFINE_EVENT(zfs_l2arc_iodone_class, name, \
-	TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb), \
-	TP_ARGS(zio, cb))
-/* END CSTYLED */
-DEFINE_L2ARC_IODONE_EVENT(zfs_l2arc__iodone);
-
-
-/*
- * Generic support for four argument tracepoints of the form:
- *
- * DTRACE_PROBE4(...,
- *     arc_buf_hdr_t *, ...,
- *     const blkptr_t *,
- *     uint64_t,
- *     const zbookmark_phys_t *);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_arc_miss_class,
-	TP_PROTO(arc_buf_hdr_t *hdr,
-	    const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb),
-	TP_ARGS(hdr, bp, size, zb),
-	TP_STRUCT__entry(
-	    __array(uint64_t,		hdr_dva_word, 2)
-	    __field(uint64_t,		hdr_birth)
-	    __field(uint32_t,		hdr_flags)
-	    __field(uint32_t,		hdr_bufcnt)
-	    __field(arc_buf_contents_t,	hdr_type)
-	    __field(uint16_t,		hdr_psize)
-	    __field(uint16_t,		hdr_lsize)
-	    __field(uint64_t,		hdr_spa)
-	    __field(arc_state_type_t,	hdr_state_type)
-	    __field(clock_t,		hdr_access)
-	    __field(uint32_t,		hdr_mru_hits)
-	    __field(uint32_t,		hdr_mru_ghost_hits)
-	    __field(uint32_t,		hdr_mfu_hits)
-	    __field(uint32_t,		hdr_mfu_ghost_hits)
-	    __field(uint32_t,		hdr_l2_hits)
-	    __field(int64_t,		hdr_refcount)
-
-	    __array(uint64_t,		bp_dva0, 2)
-	    __array(uint64_t,		bp_dva1, 2)
-	    __array(uint64_t,		bp_dva2, 2)
-	    __array(uint64_t,		bp_cksum, 4)
-
-	    __field(uint64_t,		bp_lsize)
-
-	    __field(uint64_t,		zb_objset)
-	    __field(uint64_t,		zb_object)
-	    __field(int64_t,		zb_level)
-	    __field(uint64_t,		zb_blkid)
-	),
-	TP_fast_assign(
-	    __entry->hdr_dva_word[0]	= hdr->b_dva.dva_word[0];
-	    __entry->hdr_dva_word[1]	= hdr->b_dva.dva_word[1];
-	    __entry->hdr_birth		= hdr->b_birth;
-	    __entry->hdr_flags		= hdr->b_flags;
-	    __entry->hdr_bufcnt		= hdr->b_l1hdr.b_bufcnt;
-	    __entry->hdr_psize		= hdr->b_psize;
-	    __entry->hdr_lsize		= hdr->b_lsize;
-	    __entry->hdr_spa		= hdr->b_spa;
-	    __entry->hdr_state_type	= hdr->b_l1hdr.b_state->arcs_state;
-	    __entry->hdr_access		= hdr->b_l1hdr.b_arc_access;
-	    __entry->hdr_mru_hits	= hdr->b_l1hdr.b_mru_hits;
-	    __entry->hdr_mru_ghost_hits	= hdr->b_l1hdr.b_mru_ghost_hits;
-	    __entry->hdr_mfu_hits	= hdr->b_l1hdr.b_mfu_hits;
-	    __entry->hdr_mfu_ghost_hits	= hdr->b_l1hdr.b_mfu_ghost_hits;
-	    __entry->hdr_l2_hits	= hdr->b_l1hdr.b_l2_hits;
-	    __entry->hdr_refcount	= hdr->b_l1hdr.b_refcnt.rc_count;
-
-	    __entry->bp_dva0[0]		= bp->blk_dva[0].dva_word[0];
-	    __entry->bp_dva0[1]		= bp->blk_dva[0].dva_word[1];
-	    __entry->bp_dva1[0]		= bp->blk_dva[1].dva_word[0];
-	    __entry->bp_dva1[1]		= bp->blk_dva[1].dva_word[1];
-	    __entry->bp_dva2[0]		= bp->blk_dva[2].dva_word[0];
-	    __entry->bp_dva2[1]		= bp->blk_dva[2].dva_word[1];
-	    __entry->bp_cksum[0]	= bp->blk_cksum.zc_word[0];
-	    __entry->bp_cksum[1]	= bp->blk_cksum.zc_word[1];
-	    __entry->bp_cksum[2]	= bp->blk_cksum.zc_word[2];
-	    __entry->bp_cksum[3]	= bp->blk_cksum.zc_word[3];
-
-	    __entry->bp_lsize		= size;
-
-	    __entry->zb_objset		= zb->zb_objset;
-	    __entry->zb_object		= zb->zb_object;
-	    __entry->zb_level		= zb->zb_level;
-	    __entry->zb_blkid		= zb->zb_blkid;
-	),
-	TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
-	    "flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u "
-	    "access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
-	    "mfu_ghost_hits %u l2_hits %u refcount %lli } "
-	    "bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
-	    "0x%llx:0x%llx cksum 0x%llx:0x%llx:0x%llx:0x%llx "
-	    "lsize %llu } zb { objset %llu object %llu level %lli "
-	    "blkid %llu }",
-	    __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
-	    __entry->hdr_birth, __entry->hdr_flags,
-	    __entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize,
-	    __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
-	    __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
-	    __entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
-	    __entry->hdr_l2_hits, __entry->hdr_refcount,
-	    __entry->bp_dva0[0], __entry->bp_dva0[1],
-	    __entry->bp_dva1[0], __entry->bp_dva1[1],
-	    __entry->bp_dva2[0], __entry->bp_dva2[1],
-	    __entry->bp_cksum[0], __entry->bp_cksum[1],
-	    __entry->bp_cksum[2], __entry->bp_cksum[3],
-	    __entry->bp_lsize, __entry->zb_objset, __entry->zb_object,
-	    __entry->zb_level, __entry->zb_blkid)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_ARC_MISS_EVENT(name) \
-DEFINE_EVENT(zfs_arc_miss_class, name, \
-	TP_PROTO(arc_buf_hdr_t *hdr, \
-	    const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb), \
-	TP_ARGS(hdr, bp, size, zb))
-/* END CSTYLED */
-DEFINE_ARC_MISS_EVENT(zfs_arc__miss);
-
-/*
- * Generic support for four argument tracepoints of the form:
- *
- * DTRACE_PROBE4(...,
- *     l2arc_dev_t *, ...,
- *     list_t *, ...,
- *     uint64_t, ...,
- *     boolean_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_l2arc_evict_class,
-	TP_PROTO(l2arc_dev_t *dev,
-	    list_t *buflist, uint64_t taddr, boolean_t all),
-	TP_ARGS(dev, buflist, taddr, all),
-	TP_STRUCT__entry(
-	    __field(uint64_t,		vdev_id)
-	    __field(uint64_t,		vdev_guid)
-	    __field(uint64_t,		vdev_state)
-
-	    __field(uint64_t,		l2ad_hand)
-	    __field(uint64_t,		l2ad_start)
-	    __field(uint64_t,		l2ad_end)
-	    __field(boolean_t,		l2ad_first)
-	    __field(boolean_t,		l2ad_writing)
-
-	    __field(uint64_t,		taddr)
-	    __field(boolean_t,		all)
-	),
-	TP_fast_assign(
-	    __entry->vdev_id		= dev->l2ad_vdev->vdev_id;
-	    __entry->vdev_guid		= dev->l2ad_vdev->vdev_guid;
-	    __entry->vdev_state		= dev->l2ad_vdev->vdev_state;
-
-	    __entry->l2ad_hand		= dev->l2ad_hand;
-	    __entry->l2ad_start		= dev->l2ad_start;
-	    __entry->l2ad_end		= dev->l2ad_end;
-	    __entry->l2ad_first		= dev->l2ad_first;
-	    __entry->l2ad_writing	= dev->l2ad_writing;
-
-	    __entry->taddr		= taddr;
-	    __entry->all		= all;
-	),
-	TP_printk("l2ad { vdev { id %llu guid %llu state %llu } "
-	    "hand %llu start %llu end %llu "
-	    "first %d writing %d } taddr %llu all %d",
-	    __entry->vdev_id, __entry->vdev_guid, __entry->vdev_state,
-	    __entry->l2ad_hand, __entry->l2ad_start,
-	    __entry->l2ad_end, __entry->l2ad_first, __entry->l2ad_writing,
-	    __entry->taddr, __entry->all)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_L2ARC_EVICT_EVENT(name) \
-DEFINE_EVENT(zfs_l2arc_evict_class, name, \
-	TP_PROTO(l2arc_dev_t *dev, \
-	    list_t *buflist, uint64_t taddr, boolean_t all), \
-	TP_ARGS(dev, buflist, taddr, all))
-/* END CSTYLED */
-DEFINE_L2ARC_EVICT_EVENT(zfs_l2arc__evict);
-
-#endif /* _TRACE_ARC_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_arc
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_dbgmsg.h b/zfs/include/sys/trace_dbgmsg.h
deleted file mode 100644
index a4aab1e..0000000
--- a/zfs/include/sys/trace_dbgmsg.h
+++ /dev/null

@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/* Do not include this file directly. Please use <sys/trace.h> instead. */
-#ifndef _SYS_TRACE_DBGMSG_INDIRECT
-#error "trace_dbgmsg.h included directly"
-#endif
-
-/*
- * This file defines tracepoint events for use by the dbgmsg(),
- * dprintf(), and SET_ERROR() interfaces. These are grouped here because
- * they all provide a way to store simple messages in the debug log (as
- * opposed to events used by the DTRACE_PROBE interfaces which typically
- * dump structured data).
- *
- * This header is included inside the trace.h multiple inclusion guard,
- * and it is guarded above against direct inclusion, so it and need not
- * be guarded separately.
- */
-
-/*
- * Generic support for one argument tracepoints of the form:
- *
- * DTRACE_PROBE1(...,
- *     const char *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_dprintf_class,
-	TP_PROTO(const char *msg),
-	TP_ARGS(msg),
-	TP_STRUCT__entry(
-	    __string(msg, msg)
-	),
-	TP_fast_assign(
-	    __assign_str(msg, msg);
-	),
-	TP_printk("%s", __get_str(msg))
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_DPRINTF_EVENT(name) \
-DEFINE_EVENT(zfs_dprintf_class, name, \
-	TP_PROTO(const char *msg), \
-	TP_ARGS(msg))
-/* END CSTYLED */
-DEFINE_DPRINTF_EVENT(zfs_zfs__dprintf);

diff --git a/zfs/include/sys/trace_dbuf.h b/zfs/include/sys/trace_dbuf.h
deleted file mode 100644
index e97b611..0000000
--- a/zfs/include/sys/trace_dbuf.h
+++ /dev/null

@@ -1,145 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_dbuf
-
-#if !defined(_TRACE_DBUF_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_DBUF_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-#ifndef	TRACE_DBUF_MSG_MAX
-#define	TRACE_DBUF_MSG_MAX	512
-#endif
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     dmu_buf_impl_t *, ...,
- *     zio_t *, ...);
- */
-
-#define	DBUF_TP_STRUCT_ENTRY					\
-	__dynamic_array(char,	os_spa,	TRACE_DBUF_MSG_MAX)	\
-	__field(uint64_t,	ds_object)			\
-	__field(uint64_t,	db_object)			\
-	__field(uint64_t,	db_level)			\
-	__field(uint64_t,	db_blkid)			\
-	__field(uint64_t,	db_offset)			\
-	__field(uint64_t,	db_size)			\
-	__field(uint64_t,	db_state)			\
-	__field(int64_t,	db_holds)			\
-	__dynamic_array(char,	msg,	TRACE_DBUF_MSG_MAX)
-
-#define	DBUF_TP_FAST_ASSIGN						\
-	if (db != NULL) {						\
-		__assign_str(os_spa,					\
-		spa_name(DB_DNODE(db)->dn_objset->os_spa));		\
-									\
-		__entry->ds_object = db->db_objset->os_dsl_dataset ?	\
-		db->db_objset->os_dsl_dataset->ds_object : 0;		\
-									\
-		__entry->db_object = db->db.db_object;			\
-		__entry->db_level  = db->db_level;			\
-		__entry->db_blkid  = db->db_blkid;			\
-		__entry->db_offset = db->db.db_offset;			\
-		__entry->db_size   = db->db.db_size;			\
-		__entry->db_state  = db->db_state;			\
-		__entry->db_holds  = zfs_refcount_count(&db->db_holds);	\
-		snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX,		\
-		    DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS);		\
-	} else {							\
-		__assign_str(os_spa, "NULL")				\
-		__entry->ds_object = 0;					\
-		__entry->db_object = 0;					\
-		__entry->db_level  = 0;					\
-		__entry->db_blkid  = 0;					\
-		__entry->db_offset = 0;					\
-		__entry->db_size   = 0;					\
-		__entry->db_state  = 0;					\
-		__entry->db_holds  = 0;					\
-		snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX,		\
-		    "dbuf { NULL }");					\
-	}
-
-#define	DBUF_TP_PRINTK_FMT						\
-	"dbuf { spa \"%s\" objset %llu object %llu level %llu "		\
-	"blkid %llu offset %llu size %llu state %llu holds %lld }"
-
-#define	DBUF_TP_PRINTK_ARGS					\
-	__get_str(os_spa), __entry->ds_object,			\
-	__entry->db_object, __entry->db_level,			\
-	__entry->db_blkid, __entry->db_offset,			\
-	__entry->db_size, __entry->db_state, __entry->db_holds
-
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_dbuf_class,
-	TP_PROTO(dmu_buf_impl_t *db, zio_t *zio),
-	TP_ARGS(db, zio),
-	TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
-	TP_fast_assign(DBUF_TP_FAST_ASSIGN),
-	TP_printk("%s", __get_str(msg))
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_DBUF_EVENT(name) \
-DEFINE_EVENT(zfs_dbuf_class, name, \
-	TP_PROTO(dmu_buf_impl_t *db, zio_t *zio), \
-	TP_ARGS(db, zio))
-/* END CSTYLED */
-DEFINE_DBUF_EVENT(zfs_blocked__read);
-
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_dbuf_evict_one_class,
-	TP_PROTO(dmu_buf_impl_t *db, multilist_sublist_t *mls),
-	TP_ARGS(db, mls),
-	TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
-	TP_fast_assign(DBUF_TP_FAST_ASSIGN),
-	TP_printk("%s", __get_str(msg))
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_DBUF_EVICT_ONE_EVENT(name) \
-DEFINE_EVENT(zfs_dbuf_evict_one_class, name, \
-	TP_PROTO(dmu_buf_impl_t *db, multilist_sublist_t *mls), \
-	TP_ARGS(db, mls))
-/* END CSTYLED */
-DEFINE_DBUF_EVICT_ONE_EVENT(zfs_dbuf__evict__one);
-
-#endif /* _TRACE_DBUF_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_dbuf
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_dmu.h b/zfs/include/sys/trace_dmu.h
deleted file mode 100644
index 24e57f5..0000000
--- a/zfs/include/sys/trace_dmu.h
+++ /dev/null

@@ -1,129 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_dmu
-
-#if !defined(_TRACE_DMU_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_DMU_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * Generic support for three argument tracepoints of the form:
- *
- * DTRACE_PROBE3(...,
- *     dmu_tx_t *, ...,
- *     uint64_t, ...,
- *     uint64_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
-	TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time),
-	TP_ARGS(tx, dirty, min_tx_time),
-	TP_STRUCT__entry(
-	    __field(uint64_t,			tx_txg)
-	    __field(uint64_t,			tx_lastsnap_txg)
-	    __field(uint64_t,			tx_lasttried_txg)
-	    __field(boolean_t,			tx_anyobj)
-	    __field(boolean_t,			tx_dirty_delayed)
-	    __field(hrtime_t,			tx_start)
-	    __field(boolean_t,			tx_wait_dirty)
-	    __field(int,			tx_err)
-	    __field(uint64_t,			min_tx_time)
-	    __field(uint64_t,			dirty)
-	),
-	TP_fast_assign(
-	    __entry->tx_txg			= tx->tx_txg;
-	    __entry->tx_lastsnap_txg		= tx->tx_lastsnap_txg;
-	    __entry->tx_lasttried_txg		= tx->tx_lasttried_txg;
-	    __entry->tx_anyobj			= tx->tx_anyobj;
-	    __entry->tx_dirty_delayed		= tx->tx_dirty_delayed;
-	    __entry->tx_start			= tx->tx_start;
-	    __entry->tx_wait_dirty		= tx->tx_wait_dirty;
-	    __entry->tx_err			= tx->tx_err;
-	    __entry->dirty			= dirty;
-	    __entry->min_tx_time		= min_tx_time;
-	),
-	TP_printk("tx { txg %llu lastsnap_txg %llu tx_lasttried_txg %llu "
-	    "anyobj %d dirty_delayed %d start %llu wait_dirty %d err %i "
-	    "} dirty %llu min_tx_time %llu",
-	    __entry->tx_txg, __entry->tx_lastsnap_txg,
-	    __entry->tx_lasttried_txg, __entry->tx_anyobj,
-	    __entry->tx_dirty_delayed, __entry->tx_start,
-	    __entry->tx_wait_dirty, __entry->tx_err,
-	    __entry->dirty, __entry->min_tx_time)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_DELAY_MINTIME_EVENT(name) \
-DEFINE_EVENT(zfs_delay_mintime_class, name, \
-	TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time), \
-	TP_ARGS(tx, dirty, min_tx_time))
-/* END CSTYLED */
-DEFINE_DELAY_MINTIME_EVENT(zfs_delay__mintime);
-
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_free_long_range_class,
-	TP_PROTO(uint64_t long_free_dirty_all_txgs, uint64_t chunk_len, \
-	    uint64_t txg),
-	TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg),
-	TP_STRUCT__entry(
-	    __field(uint64_t,			long_free_dirty_all_txgs)
-	    __field(uint64_t,			chunk_len)
-	    __field(uint64_t,			txg)
-	),
-	TP_fast_assign(
-	    __entry->long_free_dirty_all_txgs	= long_free_dirty_all_txgs;
-	    __entry->chunk_len					= chunk_len;
-	    __entry->txg						= txg;
-	),
-	TP_printk("long_free_dirty_all_txgs %llu chunk_len %llu txg %llu",
-	   __entry->long_free_dirty_all_txgs,
-	   __entry->chunk_len, __entry->txg)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_FREE_LONG_RANGE_EVENT(name) \
-DEFINE_EVENT(zfs_free_long_range_class, name, \
-	TP_PROTO(uint64_t long_free_dirty_all_txgs, \
-	    uint64_t chunk_len, uint64_t txg), \
-	TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg))
-/* END CSTYLED */
-DEFINE_FREE_LONG_RANGE_EVENT(zfs_free__long__range);
-
-#endif /* _TRACE_DMU_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_dmu
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_dnode.h b/zfs/include/sys/trace_dnode.h
deleted file mode 100644
index 7196a49..0000000
--- a/zfs/include/sys/trace_dnode.h
+++ /dev/null

@@ -1,123 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_dnode
-
-#if !defined(_TRACE_DNODE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_DNODE_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * Generic support for three argument tracepoints of the form:
- *
- * DTRACE_PROBE3(...,
- *     dnode_t *, ...,
- *     int64_t, ...,
- *     uint32_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_dnode_move_class,
-	TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs),
-	TP_ARGS(dn, refcount, dbufs),
-	TP_STRUCT__entry(
-	    __field(uint64_t,		dn_object)
-	    __field(dmu_object_type_t,	dn_type)
-	    __field(uint16_t,		dn_bonuslen)
-	    __field(uint8_t,		dn_bonustype)
-	    __field(uint8_t,		dn_nblkptr)
-	    __field(uint8_t,		dn_checksum)
-	    __field(uint8_t,		dn_compress)
-	    __field(uint8_t,		dn_nlevels)
-	    __field(uint8_t,		dn_indblkshift)
-	    __field(uint8_t,		dn_datablkshift)
-	    __field(uint8_t,		dn_moved)
-	    __field(uint16_t,		dn_datablkszsec)
-	    __field(uint32_t,		dn_datablksz)
-	    __field(uint64_t,		dn_maxblkid)
-	    __field(int64_t,		dn_tx_holds)
-	    __field(int64_t,		dn_holds)
-	    __field(boolean_t,		dn_have_spill)
-
-	    __field(int64_t,		refcount)
-	    __field(uint32_t,		dbufs)
-	),
-	TP_fast_assign(
-	    __entry->dn_object		= dn->dn_object;
-	    __entry->dn_type		= dn->dn_type;
-	    __entry->dn_bonuslen	= dn->dn_bonuslen;
-	    __entry->dn_bonustype	= dn->dn_bonustype;
-	    __entry->dn_nblkptr		= dn->dn_nblkptr;
-	    __entry->dn_checksum	= dn->dn_checksum;
-	    __entry->dn_compress	= dn->dn_compress;
-	    __entry->dn_nlevels		= dn->dn_nlevels;
-	    __entry->dn_indblkshift	= dn->dn_indblkshift;
-	    __entry->dn_datablkshift	= dn->dn_datablkshift;
-	    __entry->dn_moved		= dn->dn_moved;
-	    __entry->dn_datablkszsec	= dn->dn_datablkszsec;
-	    __entry->dn_datablksz	= dn->dn_datablksz;
-	    __entry->dn_maxblkid	= dn->dn_maxblkid;
-	    __entry->dn_tx_holds	= dn->dn_tx_holds.rc_count;
-	    __entry->dn_holds		= dn->dn_holds.rc_count;
-	    __entry->dn_have_spill	= dn->dn_have_spill;
-
-	    __entry->refcount		= refcount;
-	    __entry->dbufs		= dbufs;
-	),
-	TP_printk("dn { object %llu type %d bonuslen %u bonustype %u "
-	    "nblkptr %u checksum %u compress %u nlevels %u indblkshift %u "
-	    "datablkshift %u moved %u datablkszsec %u datablksz %u "
-	    "maxblkid %llu tx_holds %lli holds %lli have_spill %d } "
-	    "refcount %lli dbufs %u",
-	    __entry->dn_object, __entry->dn_type, __entry->dn_bonuslen,
-	    __entry->dn_bonustype, __entry->dn_nblkptr, __entry->dn_checksum,
-	    __entry->dn_compress, __entry->dn_nlevels, __entry->dn_indblkshift,
-	    __entry->dn_datablkshift, __entry->dn_moved,
-	    __entry->dn_datablkszsec, __entry->dn_datablksz,
-	    __entry->dn_maxblkid, __entry->dn_tx_holds, __entry->dn_holds,
-	    __entry->dn_have_spill, __entry->refcount, __entry->dbufs)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_DNODE_MOVE_EVENT(name) \
-DEFINE_EVENT(zfs_dnode_move_class, name, \
-	TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs), \
-	TP_ARGS(dn, refcount, dbufs))
-/* END CSTYLED */
-DEFINE_DNODE_MOVE_EVENT(zfs_dnode__move);
-
-#endif /* _TRACE_DNODE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_dnode
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_multilist.h b/zfs/include/sys/trace_multilist.h
deleted file mode 100644
index ed0b38a..0000000
--- a/zfs/include/sys/trace_multilist.h
+++ /dev/null

@@ -1,82 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_multilist
-
-#if !defined(_TRACE_MULTILIST_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_MULTILIST_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * Generic support for three argument tracepoints of the form:
- *
- * DTRACE_PROBE3(...,
- *     multilist_t *, ...,
- *     unsigned int, ...,
- *     void *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_multilist_insert_remove_class,
-	TP_PROTO(multilist_t *ml, unsigned sublist_idx, void *obj),
-	TP_ARGS(ml, sublist_idx, obj),
-	TP_STRUCT__entry(
-	    __field(size_t,		ml_offset)
-	    __field(uint64_t,		ml_num_sublists)
-
-	    __field(unsigned int,	sublist_idx)
-	),
-	TP_fast_assign(
-	    __entry->ml_offset		= ml->ml_offset;
-	    __entry->ml_num_sublists	= ml->ml_num_sublists;
-
-	    __entry->sublist_idx	= sublist_idx;
-	),
-	TP_printk("ml { offset %ld numsublists %llu sublistidx %u } ",
-	    __entry->ml_offset, __entry->ml_num_sublists, __entry->sublist_idx)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_MULTILIST_INSERT_REMOVE_EVENT(name) \
-DEFINE_EVENT(zfs_multilist_insert_remove_class, name, \
-	TP_PROTO(multilist_t *ml, unsigned int sublist_idx, void *obj), \
-	TP_ARGS(ml, sublist_idx, obj))
-/* END CSTYLED */
-DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__insert);
-DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__remove);
-
-#endif /* _TRACE_MULTILIST_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_multilist
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_txg.h b/zfs/include/sys/trace_txg.h
deleted file mode 100644
index f85c3f9..0000000
--- a/zfs/include/sys/trace_txg.h
+++ /dev/null

@@ -1,78 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_txg
-
-#if !defined(_TRACE_TXG_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_TXG_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     dsl_pool_t *, ...,
- *     uint64_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_txg_class,
-	TP_PROTO(dsl_pool_t *dp, uint64_t txg),
-	TP_ARGS(dp, txg),
-	TP_STRUCT__entry(
-	    __field(uint64_t, txg)
-	),
-	TP_fast_assign(
-	    __entry->txg = txg;
-	),
-	TP_printk("txg %llu", __entry->txg)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_TXG_EVENT(name) \
-DEFINE_EVENT(zfs_txg_class, name, \
-	TP_PROTO(dsl_pool_t *dp, uint64_t txg), \
-	TP_ARGS(dp, txg))
-/* END CSTYLED */
-DEFINE_TXG_EVENT(zfs_dsl_pool_sync__done);
-DEFINE_TXG_EVENT(zfs_txg__quiescing);
-DEFINE_TXG_EVENT(zfs_txg__opened);
-DEFINE_TXG_EVENT(zfs_txg__syncing);
-DEFINE_TXG_EVENT(zfs_txg__synced);
-DEFINE_TXG_EVENT(zfs_txg__quiesced);
-
-#endif /* _TRACE_TXG_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_txg
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_vdev.h b/zfs/include/sys/trace_vdev.h
deleted file mode 100644
index d7af44c..0000000
--- a/zfs/include/sys/trace_vdev.h
+++ /dev/null

@@ -1,119 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_vdev
-
-#if !defined(_TRACE_VDEV_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_VDEV_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * Generic support for three argument tracepoints of the form:
- *
- * DTRACE_PROBE3(...,
- *     spa_t *, ...,
- *     uint64_t, ...,
- *     uint64_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_removing_class_3,
-	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size),
-	TP_ARGS(spa, offset, size),
-	TP_STRUCT__entry(
-	    __field(spa_t *,	vdev_spa)
-	    __field(uint64_t,	vdev_offset)
-	    __field(uint64_t,	vdev_size)
-	),
-	TP_fast_assign(
-	    __entry->vdev_spa	= spa;
-	    __entry->vdev_offset = offset;
-	    __entry->vdev_size	= size;
-	),
-	TP_printk("spa %p offset %llu size %llu",
-	    __entry->vdev_spa, __entry->vdev_offset,
-	    __entry->vdev_size)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define DEFINE_REMOVE_FREE_EVENT(name) \
-DEFINE_EVENT(zfs_removing_class_3, name, \
-	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size), \
-	TP_ARGS(spa, offset, size))
-/* END CSTYLED */
-DEFINE_REMOVE_FREE_EVENT(zfs_remove__free__synced);
-DEFINE_REMOVE_FREE_EVENT(zfs_remove__free__unvisited);
-
-/*
- * Generic support for four argument tracepoints of the form:
- *
- * DTRACE_PROBE4(...,
- *     spa_t *, ...,
- *     uint64_t, ...,
- *     uint64_t, ...,
- *     uint64_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_removing_class_4,
-	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size, uint64_t txg),
-	TP_ARGS(spa, offset, size, txg),
-	TP_STRUCT__entry(
-	    __field(spa_t *,	vdev_spa)
-	    __field(uint64_t,	vdev_offset)
-	    __field(uint64_t,	vdev_size)
-	    __field(uint64_t,	vdev_txg)
-	),
-	TP_fast_assign(
-	    __entry->vdev_spa	= spa;
-	    __entry->vdev_offset = offset;
-	    __entry->vdev_size	= size;
-	    __entry->vdev_txg	= txg;
-	),
-	TP_printk("spa %p offset %llu size %llu txg %llu",
-	    __entry->vdev_spa, __entry->vdev_offset,
-	    __entry->vdev_size, __entry->vdev_txg)
-);
-
-/* BEGIN CSTYLED */
-#define DEFINE_REMOVE_FREE_EVENT_TXG(name) \
-DEFINE_EVENT(zfs_removing_class_4, name, \
-	TP_PROTO(spa_t *spa, uint64_t offset, uint64_t size,uint64_t txg), \
-	TP_ARGS(spa, offset, size, txg))
-/* END CSTYLED */
-DEFINE_REMOVE_FREE_EVENT_TXG(zfs_remove__free__inflight);
-
-#endif /* _TRACE_VDEV_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_vdev
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_zil.h b/zfs/include/sys/trace_zil.h
deleted file mode 100644
index ff16c86..0000000
--- a/zfs/include/sys/trace_zil.h
+++ /dev/null

@@ -1,221 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_zil
-
-#if !defined(_TRACE_ZIL_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_ZIL_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-#define	ZILOG_TP_STRUCT_ENTRY						    \
-		__field(uint64_t,	zl_lr_seq)			    \
-		__field(uint64_t,	zl_commit_lr_seq)		    \
-		__field(uint64_t,	zl_destroy_txg)			    \
-		__field(uint64_t,	zl_replaying_seq)		    \
-		__field(uint32_t,	zl_suspend)			    \
-		__field(uint8_t,	zl_suspending)			    \
-		__field(uint8_t,	zl_keep_first)			    \
-		__field(uint8_t,	zl_replay)			    \
-		__field(uint8_t,	zl_stop_sync)			    \
-		__field(uint8_t,	zl_logbias)			    \
-		__field(uint8_t,	zl_sync)			    \
-		__field(int,		zl_parse_error)			    \
-		__field(uint64_t,	zl_parse_blk_seq)		    \
-		__field(uint64_t,	zl_parse_lr_seq)		    \
-		__field(uint64_t,	zl_parse_blk_count)		    \
-		__field(uint64_t,	zl_parse_lr_count)		    \
-		__field(uint64_t,	zl_cur_used)			    \
-		__field(clock_t,	zl_replay_time)			    \
-		__field(uint64_t,	zl_replay_blks)
-
-#define	ZILOG_TP_FAST_ASSIGN						    \
-		__entry->zl_lr_seq		= zilog->zl_lr_seq;	    \
-		__entry->zl_commit_lr_seq	= zilog->zl_commit_lr_seq;  \
-		__entry->zl_destroy_txg	= zilog->zl_destroy_txg;	    \
-		__entry->zl_replaying_seq	= zilog->zl_replaying_seq;  \
-		__entry->zl_suspend		= zilog->zl_suspend;	    \
-		__entry->zl_suspending	= zilog->zl_suspending;		    \
-		__entry->zl_keep_first	= zilog->zl_keep_first;		    \
-		__entry->zl_replay		= zilog->zl_replay;	    \
-		__entry->zl_stop_sync	= zilog->zl_stop_sync;		    \
-		__entry->zl_logbias		= zilog->zl_logbias;	    \
-		__entry->zl_sync		= zilog->zl_sync;	    \
-		__entry->zl_parse_error	= zilog->zl_parse_error;	    \
-		__entry->zl_parse_blk_seq	= zilog->zl_parse_blk_seq;  \
-		__entry->zl_parse_lr_seq	= zilog->zl_parse_lr_seq;   \
-		__entry->zl_parse_blk_count	= zilog->zl_parse_blk_count;\
-		__entry->zl_parse_lr_count	= zilog->zl_parse_lr_count; \
-		__entry->zl_cur_used	= zilog->zl_cur_used;		    \
-		__entry->zl_replay_time	= zilog->zl_replay_time;	    \
-		__entry->zl_replay_blks	= zilog->zl_replay_blks;
-
-#define	ZILOG_TP_PRINTK_FMT						    \
-	"zl { lr_seq %llu commit_lr_seq %llu destroy_txg %llu "		    \
-	"replaying_seq %llu suspend %u suspending %u keep_first %u "	    \
-	"replay %u stop_sync %u logbias %u sync %u "			    \
-	"parse_error %u parse_blk_seq %llu parse_lr_seq %llu "		    \
-	"parse_blk_count %llu parse_lr_count %llu "			    \
-	"cur_used %llu replay_time %lu replay_blks %llu }"
-
-#define	ZILOG_TP_PRINTK_ARGS						    \
-	    __entry->zl_lr_seq, __entry->zl_commit_lr_seq,		    \
-	    __entry->zl_destroy_txg, __entry->zl_replaying_seq,		    \
-	    __entry->zl_suspend, __entry->zl_suspending,		    \
-	    __entry->zl_keep_first, __entry->zl_replay,			    \
-	    __entry->zl_stop_sync, __entry->zl_logbias, __entry->zl_sync,   \
-	    __entry->zl_parse_error, __entry->zl_parse_blk_seq,		    \
-	    __entry->zl_parse_lr_seq, __entry->zl_parse_blk_count,	    \
-	    __entry->zl_parse_lr_count, __entry->zl_cur_used,		    \
-	    __entry->zl_replay_time, __entry->zl_replay_blks
-
-#define	ITX_TP_STRUCT_ENTRY						    \
-		__field(itx_wr_state_t,	itx_wr_state)			    \
-		__field(uint8_t,	itx_sync)			    \
-		__field(zil_callback_t,	itx_callback)			    \
-		__field(void *,		itx_callback_data)		    \
-		__field(uint64_t,	itx_oid)			    \
-									    \
-		__field(uint64_t,	lrc_txtype)			    \
-		__field(uint64_t,	lrc_reclen)			    \
-		__field(uint64_t,	lrc_txg)			    \
-		__field(uint64_t,	lrc_seq)
-
-#define	ITX_TP_FAST_ASSIGN						    \
-		__entry->itx_wr_state		= itx->itx_wr_state;	    \
-		__entry->itx_sync		= itx->itx_sync;	    \
-		__entry->itx_callback		= itx->itx_callback;	    \
-		__entry->itx_callback_data	= itx->itx_callback_data;   \
-		__entry->itx_oid		= itx->itx_oid;		    \
-									    \
-		__entry->lrc_txtype		= itx->itx_lr.lrc_txtype;   \
-		__entry->lrc_reclen		= itx->itx_lr.lrc_reclen;   \
-		__entry->lrc_txg		= itx->itx_lr.lrc_txg;	    \
-		__entry->lrc_seq		= itx->itx_lr.lrc_seq;
-
-#define	ITX_TP_PRINTK_FMT						    \
-	"itx { wr_state %u sync %u callback %p callback_data %p oid %llu"   \
-	" { txtype %llu reclen %llu txg %llu seq %llu } }"
-
-#define	ITX_TP_PRINTK_ARGS						    \
-	    __entry->itx_wr_state, __entry->itx_sync, __entry->itx_callback,\
-	    __entry->itx_callback_data, __entry->itx_oid,		    \
-	    __entry->lrc_txtype, __entry->lrc_reclen, __entry->lrc_txg,	    \
-	    __entry->lrc_seq
-
-#define	ZCW_TP_STRUCT_ENTRY						    \
-		__field(lwb_t *,	zcw_lwb)			    \
-		__field(boolean_t,	zcw_done)			    \
-		__field(int,		zcw_zio_error)			    \
-
-#define	ZCW_TP_FAST_ASSIGN						    \
-		__entry->zcw_lwb		= zcw->zcw_lwb;		    \
-		__entry->zcw_done		= zcw->zcw_done;	    \
-		__entry->zcw_zio_error		= zcw->zcw_zio_error;
-
-#define	ZCW_TP_PRINTK_FMT						    \
-	"zcw { lwb %p done %u error %u }"
-
-#define	ZCW_TP_PRINTK_ARGS						    \
-	    __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_zio_error
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     zilog_t *, ...,
- *     itx_t *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_zil_process_itx_class,
-	TP_PROTO(zilog_t *zilog, itx_t *itx),
-	TP_ARGS(zilog, itx),
-	TP_STRUCT__entry(
-	    ZILOG_TP_STRUCT_ENTRY
-	    ITX_TP_STRUCT_ENTRY
-	),
-	TP_fast_assign(
-	    ZILOG_TP_FAST_ASSIGN
-	    ITX_TP_FAST_ASSIGN
-	),
-	TP_printk(
-	    ZILOG_TP_PRINTK_FMT " " ITX_TP_PRINTK_FMT,
-	    ZILOG_TP_PRINTK_ARGS, ITX_TP_PRINTK_ARGS)
-);
-/* END CSTYLED */
-
-/* BEGIN CSTYLED */
-#define	DEFINE_ZIL_PROCESS_ITX_EVENT(name) \
-DEFINE_EVENT(zfs_zil_process_itx_class, name, \
-	TP_PROTO(zilog_t *zilog, itx_t *itx), \
-	TP_ARGS(zilog, itx))
-DEFINE_ZIL_PROCESS_ITX_EVENT(zfs_zil__process__commit__itx);
-DEFINE_ZIL_PROCESS_ITX_EVENT(zfs_zil__process__normal__itx);
-/* END CSTYLED */
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     zilog_t *, ...,
- *     zil_commit_waiter_t *, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_zil_commit_io_error_class,
-	TP_PROTO(zilog_t *zilog, zil_commit_waiter_t *zcw),
-	TP_ARGS(zilog, zcw),
-	TP_STRUCT__entry(
-	    ZILOG_TP_STRUCT_ENTRY
-	    ZCW_TP_STRUCT_ENTRY
-	),
-	TP_fast_assign(
-	    ZILOG_TP_FAST_ASSIGN
-	    ZCW_TP_FAST_ASSIGN
-	),
-	TP_printk(
-	    ZILOG_TP_PRINTK_FMT " " ZCW_TP_PRINTK_FMT,
-	    ZILOG_TP_PRINTK_ARGS, ZCW_TP_PRINTK_ARGS)
-);
-
-/* BEGIN CSTYLED */
-#define	DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(name) \
-DEFINE_EVENT(zfs_zil_commit_io_error_class, name, \
-	TP_PROTO(zilog_t *zilog, zil_commit_waiter_t *zcw), \
-	TP_ARGS(zilog, zcw))
-DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
-/* END CSTYLED */
-
-#endif /* _TRACE_ZIL_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_zil
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_zio.h b/zfs/include/sys/trace_zio.h
deleted file mode 100644
index af589b9..0000000
--- a/zfs/include/sys/trace_zio.h
+++ /dev/null

@@ -1,89 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#include <sys/list.h>
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_zio
-
-#if !defined(_TRACE_ZIO_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_ZIO_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-#include <sys/trace_common.h> /* For ZIO macros */
-
-/* BEGIN CSTYLED */
-TRACE_EVENT(zfs_zio__delay__miss,
-	TP_PROTO(zio_t *zio, hrtime_t now),
-	TP_ARGS(zio, now),
-	TP_STRUCT__entry(
-	    ZIO_TP_STRUCT_ENTRY
-	    __field(hrtime_t, now)
-	),
-	TP_fast_assign(
-	    ZIO_TP_FAST_ASSIGN
-	    __entry->now = now;
-	),
-	TP_printk("now %llu " ZIO_TP_PRINTK_FMT, __entry->now,
-	    ZIO_TP_PRINTK_ARGS)
-);
-
-TRACE_EVENT(zfs_zio__delay__hit,
-	TP_PROTO(zio_t *zio, hrtime_t now, hrtime_t diff),
-	TP_ARGS(zio, now, diff),
-	TP_STRUCT__entry(
-	    ZIO_TP_STRUCT_ENTRY
-	    __field(hrtime_t, now)
-	    __field(hrtime_t, diff)
-	),
-	TP_fast_assign(
-	    ZIO_TP_FAST_ASSIGN
-	    __entry->now = now;
-	    __entry->diff = diff;
-	),
-	TP_printk("now %llu diff %llu " ZIO_TP_PRINTK_FMT, __entry->now,
-	    __entry->diff, ZIO_TP_PRINTK_ARGS)
-);
-
-TRACE_EVENT(zfs_zio__delay__skip,
-	TP_PROTO(zio_t *zio),
-	TP_ARGS(zio),
-	TP_STRUCT__entry(ZIO_TP_STRUCT_ENTRY),
-	TP_fast_assign(ZIO_TP_FAST_ASSIGN),
-	TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
-);
-/* END CSTYLED */
-
-#endif /* _TRACE_ZIO_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_zio
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/trace_zrlock.h b/zfs/include/sys/trace_zrlock.h
deleted file mode 100644
index fa330f2..0000000
--- a/zfs/include/sys/trace_zrlock.h
+++ /dev/null

@@ -1,88 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
-
-#undef TRACE_SYSTEM
-#define	TRACE_SYSTEM zfs
-
-#undef TRACE_SYSTEM_VAR
-#define	TRACE_SYSTEM_VAR zfs_zrlock
-
-#if !defined(_TRACE_ZRLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
-#define	_TRACE_ZRLOCK_H
-
-#include <linux/tracepoint.h>
-#include <sys/types.h>
-
-/*
- * Generic support for two argument tracepoints of the form:
- *
- * DTRACE_PROBE2(...,
- *     zrlock_t *, ...,
- *     uint32_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_zrlock_class,
-	TP_PROTO(zrlock_t *zrl, kthread_t *owner, uint32_t n),
-	TP_ARGS(zrl, owner, n),
-	TP_STRUCT__entry(
-	    __field(int32_t,		refcount)
-#ifdef	ZFS_DEBUG
-	    __field(pid_t,		owner_pid)
-	    __field(const char *,	caller)
-#endif
-	    __field(uint32_t,		n)
-	),
-	TP_fast_assign(
-	    __entry->refcount	= zrl->zr_refcount;
-#ifdef	ZFS_DEBUG
-	    __entry->owner_pid	= owner ? owner->pid : 0;
-	    __entry->caller = zrl->zr_caller ? zrl->zr_caller : "(null)";
-#endif
-	    __entry->n		= n;
-	),
-#ifdef	ZFS_DEBUG
-	TP_printk("zrl { refcount %d owner_pid %d caller %s } n %u",
-	    __entry->refcount, __entry->owner_pid, __entry->caller,
-	    __entry->n)
-#else
-	TP_printk("zrl { refcount %d } n %u",
-	    __entry->refcount, __entry->n)
-#endif
-);
-/* END_CSTYLED */
-
-#define	DEFINE_ZRLOCK_EVENT(name) \
-DEFINE_EVENT(zfs_zrlock_class, name, \
-	TP_PROTO(zrlock_t *zrl, kthread_t *owner, uint32_t n), \
-	TP_ARGS(zrl, owner, n))
-DEFINE_ZRLOCK_EVENT(zfs_zrlock__reentry);
-
-#endif /* _TRACE_ZRLOCK_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define	TRACE_INCLUDE_PATH sys
-#define	TRACE_INCLUDE_FILE trace_zrlock
-#include <trace/define_trace.h>
-
-#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */

diff --git a/zfs/include/sys/txg.h b/zfs/include/sys/txg.h
index 260a3b4..f38f000 100644
--- a/zfs/include/sys/txg.h
+++ b/zfs/include/sys/txg.h

@@ -41,6 +41,7 @@
 #define	TXG_MASK		(TXG_SIZE - 1)	/* mask for size	*/
 #define	TXG_INITIAL		TXG_SIZE	/* initial txg 		*/
 #define	TXG_IDX			(txg & TXG_MASK)
+#define	TXG_UNKNOWN		0
 
 /* Number of txgs worth of frees we defer adding to in-core spacemaps */
 #define	TXG_DEFER_SIZE		2
@@ -77,7 +78,7 @@
 
 extern void txg_delay(struct dsl_pool *dp, uint64_t txg, hrtime_t delta,
     hrtime_t resolution);
-extern void txg_kick(struct dsl_pool *dp);
+extern void txg_kick(struct dsl_pool *dp, uint64_t txg);
 
 /*
  * Wait until the given transaction group has finished syncing.

diff --git a/zfs/include/sys/u8_textprep.h b/zfs/include/sys/u8_textprep.h
index f8b5bed..09ab13a 100644
--- a/zfs/include/sys/u8_textprep.h
+++ b/zfs/include/sys/u8_textprep.h

@@ -101,7 +101,7 @@
 #define	U8_ILLEGAL_CHAR			(-1)
 #define	U8_OUT_OF_RANGE_CHAR		(-2)
 
-extern int u8_validate(char *, size_t, char **, int, int *);
+extern int u8_validate(const char *, size_t, char **, int, int *);
 extern int u8_strcmp(const char *, const char *, size_t, int, size_t, int *);
 extern size_t u8_textprep_str(char *, size_t *, char *, size_t *, int, size_t,
 	int *);

diff --git a/zfs/include/sys/uio_impl.h b/zfs/include/sys/uio_impl.h
index cfef0b9..cde3ef4 100644
--- a/zfs/include/sys/uio_impl.h
+++ b/zfs/include/sys/uio_impl.h

@@ -41,9 +41,28 @@
 
 #include <sys/uio.h>
 
-extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
-extern int uio_prefaultpages(ssize_t, uio_t *);
-extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
-extern void uioskip(uio_t *, size_t);
+extern int zfs_uiomove(void *, size_t, zfs_uio_rw_t, zfs_uio_t *);
+extern int zfs_uiocopy(void *, size_t, zfs_uio_rw_t, zfs_uio_t *, size_t *);
+extern void zfs_uioskip(zfs_uio_t *, size_t);
+
+static inline void
+zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len)
+{
+	*base = zfs_uio_iovbase(uio, idx);
+	*len = zfs_uio_iovlen(uio, idx);
+}
+
+static inline offset_t
+zfs_uio_index_at_offset(zfs_uio_t *uio, offset_t off, uint_t *vec_idx)
+{
+	*vec_idx = 0;
+	while (*vec_idx < zfs_uio_iovcnt(uio) &&
+	    off >= zfs_uio_iovlen(uio, *vec_idx)) {
+		off -= zfs_uio_iovlen(uio, *vec_idx);
+		(*vec_idx)++;
+	}
+
+	return (off);
+}
 
 #endif	/* _SYS_UIO_IMPL_H */

diff --git a/zfs/include/sys/vdev.h b/zfs/include/sys/vdev.h
index 339a488..de08bbf 100644
--- a/zfs/include/sys/vdev.h
+++ b/zfs/include/sys/vdev.h

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, Datto Inc. All rights reserved.
  */
@@ -33,6 +33,7 @@
 #include <sys/zio.h>
 #include <sys/dmu.h>
 #include <sys/space_map.h>
+#include <sys/metaslab.h>
 #include <sys/fs/zfs.h>
 
 #ifdef	__cplusplus
@@ -49,10 +50,13 @@
 
 extern int zfs_nocacheflush;
 
+typedef boolean_t vdev_open_children_func_t(vdev_t *vd);
+
 extern void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...);
 extern void vdev_dbgmsg_print_tree(vdev_t *, int);
 extern int vdev_open(vdev_t *);
 extern void vdev_open_children(vdev_t *);
+extern void vdev_open_children_subset(vdev_t *, vdev_open_children_func_t *);
 extern int vdev_validate(vdev_t *);
 extern int vdev_copy_path_strict(vdev_t *, vdev_t *);
 extern void vdev_copy_path_relaxed(vdev_t *, vdev_t *);
@@ -71,9 +75,12 @@
 extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
     uint64_t txg, uint64_t size);
 extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d);
-extern boolean_t vdev_dtl_need_resilver(vdev_t *vd, uint64_t off, size_t size);
+extern boolean_t vdev_default_need_resilver(vdev_t *vd, const dva_t *dva,
+    size_t psize, uint64_t phys_birth);
+extern boolean_t vdev_dtl_need_resilver(vdev_t *vd, const dva_t *dva,
+    size_t psize, uint64_t phys_birth);
 extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
-    int scrub_done);
+    boolean_t scrub_done, boolean_t rebuild_done);
 extern boolean_t vdev_dtl_required(vdev_t *vd);
 extern boolean_t vdev_resilver_needed(vdev_t *vd,
     uint64_t *minp, uint64_t *maxp);
@@ -86,6 +93,7 @@
     uint64_t size);
 extern void spa_vdev_indirect_mark_obsolete(spa_t *spa, uint64_t vdev,
     uint64_t offset, uint64_t size, dmu_tx_t *tx);
+extern boolean_t vdev_replace_in_progress(vdev_t *vdev);
 
 extern void vdev_hold(vdev_t *);
 extern void vdev_rele(vdev_t *);
@@ -96,10 +104,19 @@
 extern void vdev_expand(vdev_t *vd, uint64_t txg);
 extern void vdev_split(vdev_t *vd);
 extern void vdev_deadman(vdev_t *vd, char *tag);
-extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs,
-    range_seg_t *physical_rs);
+
+typedef void vdev_xlate_func_t(void *arg, range_seg64_t *physical_rs);
+
+extern boolean_t vdev_xlate_is_empty(range_seg64_t *rs);
+extern void vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs);
+extern void vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs,
+    vdev_xlate_func_t *func, void *arg);
 
 extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx);
+
+extern metaslab_group_t *vdev_get_mg(vdev_t *vd, metaslab_class_t *mc);
+
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
 extern void vdev_clear_stats(vdev_t *vd);
 extern void vdev_stat_update(zio_t *zio, uint64_t psize);
@@ -116,11 +133,21 @@
 
 extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
 
+/*
+ * Return the amount of space allocated for a gang block header.
+ */
+static inline uint64_t
+vdev_gang_header_asize(vdev_t *vd)
+{
+	return (vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE));
+}
+
 extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
 extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
 extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
     vdev_state_t *);
 extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
+extern int vdev_remove_wanted(spa_t *spa, uint64_t guid);
 extern void vdev_clear(spa_t *spa, vdev_t *vd);
 
 extern boolean_t vdev_is_dead(vdev_t *vd);
@@ -163,6 +190,8 @@
 	VDEV_CONFIG_MISSING = 1 << 4
 } vdev_config_flag_t;
 
+extern void vdev_post_kobj_evt(vdev_t *vd);
+extern void vdev_clear_kobj_evt(vdev_t *vd);
 extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
 extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
     boolean_t getstats, vdev_config_flag_t flags);
@@ -177,7 +206,9 @@
 extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
 extern void vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv);
 extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
-    offset, uint64_t size, zio_done_func_t *done, void *private, int flags);
+    offset, uint64_t size, zio_done_func_t *done, void *priv, int flags);
+extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *);
+extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *);
 
 typedef enum {
 	VDEV_LABEL_CREATE,	/* create/add a new device */

diff --git a/zfs/include/sys/vdev_disk.h b/zfs/include/sys/vdev_disk.h
index 908f5f3..a7e19fb 100644
--- a/zfs/include/sys/vdev_disk.h
+++ b/zfs/include/sys/vdev_disk.h

@@ -42,13 +42,5 @@
 
 #ifdef _KERNEL
 #include <sys/vdev.h>
-
-typedef struct vdev_disk {
-	ddi_devid_t		vd_devid;
-	char			*vd_minor;
-	struct block_device	*vd_bdev;
-	krwlock_t		vd_lock;
-} vdev_disk_t;
-
 #endif /* _KERNEL */
 #endif /* _SYS_VDEV_DISK_H */

diff --git a/zfs/include/sys/vdev_draid.h b/zfs/include/sys/vdev_draid.h
new file mode 100644
index 0000000..dd334ac
--- /dev/null
+++ b/zfs/include/sys/vdev_draid.h

@@ -0,0 +1,111 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_VDEV_DRAID_H
+#define	_SYS_VDEV_DRAID_H
+
+#include <sys/types.h>
+#include <sys/abd.h>
+#include <sys/nvpair.h>
+#include <sys/zio.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_raidz_impl.h>
+#include <sys/vdev.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Constants required to generate and use dRAID permutations.
+ */
+#define	VDEV_DRAID_SEED			0xd7a1d5eed
+#define	VDEV_DRAID_MAX_MAPS		254
+#define	VDEV_DRAID_ROWSHIFT		SPA_MAXBLOCKSHIFT
+#define	VDEV_DRAID_ROWHEIGHT		(1ULL << VDEV_DRAID_ROWSHIFT)
+#define	VDEV_DRAID_REFLOW_RESERVE	(2 * VDEV_DRAID_ROWHEIGHT)
+
+/*
+ * dRAID permutation map.
+ */
+typedef struct draid_map {
+	uint64_t dm_children;	/* # of permutation columns */
+	uint64_t dm_nperms;	/* # of permutation rows */
+	uint64_t dm_seed;	/* dRAID map seed */
+	uint64_t dm_checksum;	/* Checksum of generated map */
+	uint8_t *dm_perms;	/* base permutation array */
+} draid_map_t;
+
+/*
+ * dRAID configuration.
+ */
+typedef struct vdev_draid_config {
+	/*
+	 * Values read from the dRAID nvlist configuration.
+	 */
+	uint64_t vdc_ndata;		/* # of data devices in group */
+	uint64_t vdc_nparity;		/* # of parity devices in group */
+	uint64_t vdc_nspares;		/* # of distributed spares */
+	uint64_t vdc_children;		/* # of children */
+	uint64_t vdc_ngroups;		/* # groups per slice */
+
+	/*
+	 * Immutable derived constants.
+	 */
+	uint8_t *vdc_perms;		/* permutation array */
+	uint64_t vdc_nperms;		/* # of permutations */
+	uint64_t vdc_groupwidth;	/* = data + parity */
+	uint64_t vdc_ndisks;		/* = children - spares */
+	uint64_t vdc_groupsz;		/* = groupwidth * DRAID_ROWSIZE */
+	uint64_t vdc_devslicesz;	/* = (groupsz * groups) / ndisks */
+} vdev_draid_config_t;
+
+/*
+ * Functions for handling dRAID permutation maps.
+ */
+extern uint64_t vdev_draid_rand(uint64_t *);
+extern int vdev_draid_lookup_map(uint64_t, const draid_map_t **);
+extern int vdev_draid_generate_perms(const draid_map_t *, uint8_t **);
+
+/*
+ * General dRAID support functions.
+ */
+extern boolean_t vdev_draid_readable(vdev_t *, uint64_t);
+extern boolean_t vdev_draid_missing(vdev_t *, uint64_t, uint64_t, uint64_t);
+extern uint64_t vdev_draid_asize_to_psize(vdev_t *, uint64_t);
+extern void vdev_draid_map_alloc_empty(zio_t *, struct raidz_row *);
+extern int vdev_draid_map_verify_empty(zio_t *, struct raidz_row *);
+extern nvlist_t *vdev_draid_read_config_spare(vdev_t *);
+
+/* Functions for dRAID distributed spares. */
+extern vdev_t *vdev_draid_spare_get_child(vdev_t *, uint64_t);
+extern vdev_t *vdev_draid_spare_get_parent(vdev_t *);
+extern int vdev_draid_spare_create(nvlist_t *, vdev_t *, uint64_t *, uint64_t);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_DRAID_H */

diff --git a/zfs/include/sys/vdev_file.h b/zfs/include/sys/vdev_file.h
index 9a398c5..1514a44 100644
--- a/zfs/include/sys/vdev_file.h
+++ b/zfs/include/sys/vdev_file.h

@@ -34,7 +34,7 @@
 #endif
 
 typedef struct vdev_file {
-	vnode_t		*vf_vnode;
+	zfs_file_t	*vf_file;
 } vdev_file_t;
 
 extern void vdev_file_init(void);

diff --git a/zfs/include/sys/vdev_impl.h b/zfs/include/sys/vdev_impl.h
index 090ba3f..9d4a806 100644
--- a/zfs/include/sys/vdev_impl.h
+++ b/zfs/include/sys/vdev_impl.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
  */
 
@@ -38,6 +38,7 @@
 #include <sys/uberblock_impl.h>
 #include <sys/vdev_indirect_mapping.h>
 #include <sys/vdev_indirect_births.h>
+#include <sys/vdev_rebuild.h>
 #include <sys/vdev_removal.h>
 #include <sys/zfs_ratelimit.h>
 
@@ -67,14 +68,20 @@
 /*
  * Virtual device operations
  */
+typedef int	vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd);
+typedef void	vdev_kobj_post_evt_func_t(vdev_t *vd);
+typedef void	vdev_fini_func_t(vdev_t *vd);
 typedef int	vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
-    uint64_t *ashift);
+    uint64_t *ashift, uint64_t *pshift);
 typedef void	vdev_close_func_t(vdev_t *vd);
 typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
+typedef uint64_t vdev_min_asize_func_t(vdev_t *vd);
+typedef uint64_t vdev_min_alloc_func_t(vdev_t *vd);
 typedef void	vdev_io_start_func_t(zio_t *zio);
 typedef void	vdev_io_done_func_t(zio_t *zio);
 typedef void	vdev_state_change_func_t(vdev_t *vd, int, int);
-typedef boolean_t vdev_need_resilver_func_t(vdev_t *vd, uint64_t, size_t);
+typedef boolean_t vdev_need_resilver_func_t(vdev_t *vd, const dva_t *dva,
+    size_t psize, uint64_t phys_birth);
 typedef void	vdev_hold_func_t(vdev_t *vd);
 typedef void	vdev_rele_func_t(vdev_t *vd);
 
@@ -86,13 +93,24 @@
  * Given a target vdev, translates the logical range "in" to the physical
  * range "res"
  */
-typedef void vdev_xlation_func_t(vdev_t *cvd, const range_seg_t *in,
-    range_seg_t *res);
+typedef void vdev_xlation_func_t(vdev_t *cvd, const range_seg64_t *logical,
+    range_seg64_t *physical, range_seg64_t *remain);
+typedef uint64_t vdev_rebuild_asize_func_t(vdev_t *vd, uint64_t start,
+    uint64_t size, uint64_t max_segment);
+typedef void vdev_metaslab_init_func_t(vdev_t *vd, uint64_t *startp,
+    uint64_t *sizep);
+typedef void vdev_config_generate_func_t(vdev_t *vd, nvlist_t *nv);
+typedef uint64_t vdev_nparity_func_t(vdev_t *vd);
+typedef uint64_t vdev_ndisks_func_t(vdev_t *vd);
 
 typedef const struct vdev_ops {
+	vdev_init_func_t		*vdev_op_init;
+	vdev_fini_func_t		*vdev_op_fini;
 	vdev_open_func_t		*vdev_op_open;
 	vdev_close_func_t		*vdev_op_close;
 	vdev_asize_func_t		*vdev_op_asize;
+	vdev_min_asize_func_t		*vdev_op_min_asize;
+	vdev_min_alloc_func_t		*vdev_op_min_alloc;
 	vdev_io_start_func_t		*vdev_op_io_start;
 	vdev_io_done_func_t		*vdev_op_io_done;
 	vdev_state_change_func_t	*vdev_op_state_change;
@@ -100,11 +118,13 @@
 	vdev_hold_func_t		*vdev_op_hold;
 	vdev_rele_func_t		*vdev_op_rele;
 	vdev_remap_func_t		*vdev_op_remap;
-	/*
-	 * For translating ranges from non-leaf vdevs (e.g. raidz) to leaves.
-	 * Used when initializing vdevs. Isn't used by leaf ops.
-	 */
 	vdev_xlation_func_t		*vdev_op_xlate;
+	vdev_rebuild_asize_func_t	*vdev_op_rebuild_asize;
+	vdev_metaslab_init_func_t	*vdev_op_metaslab_init;
+	vdev_config_generate_func_t	*vdev_op_config_generate;
+	vdev_nparity_func_t		*vdev_op_nparity;
+	vdev_ndisks_func_t		*vdev_op_ndisks;
+	vdev_kobj_post_evt_func_t	*vdev_op_kobj_evt_post;
 	char				vdev_op_type[16];
 	boolean_t			vdev_op_leaf;
 } vdev_ops_t;
@@ -147,6 +167,9 @@
 	avl_tree_t	vq_write_offset_tree;
 	avl_tree_t	vq_trim_offset_tree;
 	uint64_t	vq_last_offset;
+	zio_priority_t	vq_last_prio;	/* Last sent I/O priority. */
+	uint32_t	vq_ia_active;	/* Active interactive I/Os. */
+	uint32_t	vq_nia_credit;	/* Non-interactive I/Os credit. */
 	hrtime_t	vq_io_complete_ts; /* time last i/o completed */
 	hrtime_t	vq_io_delta_ts;
 	zio_t		vq_io_search; /* used as local for stack reduction */
@@ -215,13 +238,30 @@
 	uint64_t	vdev_min_asize;	/* min acceptable asize		*/
 	uint64_t	vdev_max_asize;	/* max acceptable asize		*/
 	uint64_t	vdev_ashift;	/* block alignment shift	*/
+
+	/*
+	 * Logical block alignment shift
+	 *
+	 * The smallest sized/aligned I/O supported by the device.
+	 */
+	uint64_t	vdev_logical_ashift;
+	/*
+	 * Physical block alignment shift
+	 *
+	 * The device supports logical I/Os with vdev_logical_ashift
+	 * size/alignment, but optimum performance will be achieved by
+	 * aligning/sizing requests to vdev_physical_ashift.  Smaller
+	 * requests may be inflated or incur device level read-modify-write
+	 * operations.
+	 *
+	 * May be 0 to indicate no preference (i.e. use vdev_logical_ashift).
+	 */
+	uint64_t	vdev_physical_ashift;
 	uint64_t	vdev_state;	/* see VDEV_STATE_* #defines	*/
 	uint64_t	vdev_prevstate;	/* used when reopening a vdev	*/
 	vdev_ops_t	*vdev_ops;	/* vdev operations		*/
 	spa_t		*vdev_spa;	/* spa for this vdev		*/
 	void		*vdev_tsd;	/* type-specific data		*/
-	vnode_t		*vdev_name_vp;	/* vnode for pathname		*/
-	vnode_t		*vdev_devid_vp;	/* vnode for devid		*/
 	vdev_t		*vdev_top;	/* top-level vdev		*/
 	vdev_t		*vdev_parent;	/* parent vdev			*/
 	vdev_t		**vdev_child;	/* array of children		*/
@@ -231,8 +271,11 @@
 	boolean_t	vdev_expanding;	/* expand the vdev?		*/
 	boolean_t	vdev_reopening;	/* reopen in progress?		*/
 	boolean_t	vdev_nonrot;	/* true if solid state		*/
+	int		vdev_load_error; /* error on last load		*/
 	int		vdev_open_error; /* error on last open		*/
+	int		vdev_validate_error; /* error on last validate	*/
 	kthread_t	*vdev_open_thread; /* thread opening children	*/
+	kthread_t	*vdev_validate_thread; /* thread validating children */
 	uint64_t	vdev_crtxg;	/* txg when top-level was added */
 
 	/*
@@ -242,6 +285,7 @@
 	uint64_t	vdev_ms_shift;	/* metaslab size shift		*/
 	uint64_t	vdev_ms_count;	/* number of metaslabs		*/
 	metaslab_group_t *vdev_mg;	/* metaslab group		*/
+	metaslab_group_t *vdev_log_mg;	/* embedded slog metaslab group	*/
 	metaslab_t	**vdev_ms;	/* metaslab array		*/
 	uint64_t	vdev_pending_fastwrite; /* allocated fastwrites */
 	txg_list_t	vdev_ms_list;	/* per-txg dirty metaslab lists	*/
@@ -297,13 +341,23 @@
 	uint64_t	vdev_trim_secure;	/* requested secure TRIM */
 	uint64_t	vdev_trim_action_time;	/* start and end time */
 
-	/* for limiting outstanding I/Os (initialize and TRIM) */
+	/* Rebuild related */
+	boolean_t	vdev_rebuilding;
+	boolean_t	vdev_rebuild_exit_wanted;
+	boolean_t	vdev_rebuild_cancel_wanted;
+	boolean_t	vdev_rebuild_reset_wanted;
+	kmutex_t	vdev_rebuild_lock;
+	kcondvar_t	vdev_rebuild_cv;
+	kthread_t	*vdev_rebuild_thread;
+	vdev_rebuild_t	vdev_rebuild_config;
+
+	/* For limiting outstanding I/Os (initialize, TRIM) */
 	kmutex_t	vdev_initialize_io_lock;
 	kcondvar_t	vdev_initialize_io_cv;
 	uint64_t	vdev_initialize_inflight;
 	kmutex_t	vdev_trim_io_lock;
 	kcondvar_t	vdev_trim_io_cv;
-	uint64_t	vdev_trim_inflight[2];
+	uint64_t	vdev_trim_inflight[3];
 
 	/*
 	 * Values stored in the config for an indirect or removing vdev.
@@ -360,7 +414,7 @@
 	uint64_t	vdev_degraded;	/* persistent degraded state	*/
 	uint64_t	vdev_removed;	/* persistent removed state	*/
 	uint64_t	vdev_resilver_txg; /* persistent resilvering state */
-	uint64_t	vdev_nparity;	/* number of parity devices for raidz */
+	uint64_t	vdev_rebuild_txg; /* persistent rebuilding state */
 	char		*vdev_path;	/* vdev path (if any)		*/
 	char		*vdev_devid;	/* vdev devid (if any)		*/
 	char		*vdev_physpath;	/* vdev device path (if any)	*/
@@ -383,6 +437,7 @@
 	boolean_t	vdev_isl2cache;	/* was a l2cache device		*/
 	boolean_t	vdev_copy_uberblocks;  /* post expand copy uberblocks */
 	boolean_t	vdev_resilver_deferred;  /* resilver deferred */
+	boolean_t	vdev_kobj_flag; /* kobj event record */
 	vdev_queue_t	vdev_queue;	/* I/O deadline schedule queue	*/
 	vdev_cache_t	vdev_cache;	/* physical block cache		*/
 	spa_aux_vdev_t	*vdev_aux;	/* for l2cache and spares vdevs	*/
@@ -406,17 +461,16 @@
 	kmutex_t	vdev_probe_lock; /* protects vdev_probe_zio	*/
 
 	/*
-	 * We rate limit ZIO delay and ZIO checksum events, since they
+	 * We rate limit ZIO delay, deadman, and checksum events, since they
 	 * can flood ZED with tons of events when a drive is acting up.
 	 */
 	zfs_ratelimit_t vdev_delay_rl;
+	zfs_ratelimit_t vdev_deadman_rl;
 	zfs_ratelimit_t vdev_checksum_rl;
 };
 
-#define	VDEV_RAIDZ_MAXPARITY	3
-
 #define	VDEV_PAD_SIZE		(8 << 10)
-/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
+/* 2 padding areas (vl_pad1 and vl_be) to skip */
 #define	VDEV_SKIP_SIZE		VDEV_PAD_SIZE * 2
 #define	VDEV_PHYS_SIZE		(112 << 10)
 #define	VDEV_UBERBLOCK_RING	(128 << 10)
@@ -443,12 +497,41 @@
 	zio_eck_t	vp_zbt;
 } vdev_phys_t;
 
+typedef enum vbe_vers {
+	/*
+	 * The bootenv file is stored as ascii text in the envblock.
+	 * It is used by the GRUB bootloader used on Linux to store the
+	 * contents of the grubenv file. The file is stored as raw ASCII,
+	 * and is protected by an embedded checksum. By default, GRUB will
+	 * check if the boot filesystem supports storing the environment data
+	 * in a special location, and if so, will invoke filesystem specific
+	 * logic to retrieve it. This can be overridden by a variable, should
+	 * the user so desire.
+	 */
+	VB_RAW = 0,
+
+	/*
+	 * The bootenv file is converted to an nvlist and then packed into the
+	 * envblock.
+	 */
+	VB_NVLIST = 1
+} vbe_vers_t;
+
+typedef struct vdev_boot_envblock {
+	uint64_t	vbe_version;
+	char		vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) -
+			sizeof (zio_eck_t)];
+	zio_eck_t	vbe_zbt;
+} vdev_boot_envblock_t;
+
+CTASSERT_GLOBAL(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE);
+
 typedef struct vdev_label {
 	char		vl_pad1[VDEV_PAD_SIZE];			/*  8K */
-	char		vl_pad2[VDEV_PAD_SIZE];			/*  8K */
+	vdev_boot_envblock_t	vl_be;				/*  8K */
 	vdev_phys_t	vl_vdev_phys;				/* 112K	*/
 	char		vl_uberblock[VDEV_UBERBLOCK_RING];	/* 128K	*/
-} vdev_label_t;							/* 256K total */
+} vdev_label_t;						/* 256K total */
 
 /*
  * vdev_dirty() flags
@@ -471,6 +554,9 @@
 #define	VDEV_LABEL_END_SIZE	(2 * sizeof (vdev_label_t))
 #define	VDEV_LABELS		4
 #define	VDEV_BEST_LABEL		VDEV_LABELS
+#define	VDEV_OFFSET_IS_LABEL(vd, off)                           \
+	(((off) < VDEV_LABEL_START_SIZE) ||                     \
+	((off) >= ((vd)->vdev_psize - VDEV_LABEL_END_SIZE)))
 
 #define	VDEV_ALLOC_LOAD		0
 #define	VDEV_ALLOC_ADD		1
@@ -516,6 +602,8 @@
 extern vdev_ops_t vdev_mirror_ops;
 extern vdev_ops_t vdev_replacing_ops;
 extern vdev_ops_t vdev_raidz_ops;
+extern vdev_ops_t vdev_draid_ops;
+extern vdev_ops_t vdev_draid_spare_ops;
 extern vdev_ops_t vdev_disk_ops;
 extern vdev_ops_t vdev_file_ops;
 extern vdev_ops_t vdev_missing_ops;
@@ -526,16 +614,20 @@
 /*
  * Common size functions
  */
-extern void vdev_default_xlate(vdev_t *vd, const range_seg_t *in,
-    range_seg_t *out);
+extern void vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs);
 extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
+extern uint64_t vdev_default_min_asize(vdev_t *vd);
 extern uint64_t vdev_get_min_asize(vdev_t *vd);
 extern void vdev_set_min_asize(vdev_t *vd);
+extern uint64_t vdev_get_min_alloc(vdev_t *vd);
+extern uint64_t vdev_get_nparity(vdev_t *vd);
+extern uint64_t vdev_get_ndisks(vdev_t *vd);
 
 /*
  * Global variables
  */
-extern int vdev_standard_sm_blksz;
+extern int zfs_vdev_standard_sm_blksz;
 /* zdb uses this tunable, so it must be declared here to make lint happy. */
 extern int zfs_vdev_cache_size;
 
@@ -552,6 +644,16 @@
  * Other miscellaneous functions
  */
 int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
+void vdev_metaslab_group_create(vdev_t *vd);
+uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b);
+
+/*
+ * Vdev ashift optimization tunables
+ */
+extern uint64_t zfs_vdev_min_auto_ashift;
+extern uint64_t zfs_vdev_max_auto_ashift;
+int param_set_min_auto_ashift(ZFS_MODULE_PARAM_ARGS);
+int param_set_max_auto_ashift(ZFS_MODULE_PARAM_ARGS);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/vdev_initialize.h b/zfs/include/sys/vdev_initialize.h
index 81d39eb..942fc71 100644
--- a/zfs/include/sys/vdev_initialize.h
+++ b/zfs/include/sys/vdev_initialize.h

@@ -33,6 +33,7 @@
 #endif
 
 extern void vdev_initialize(vdev_t *vd);
+extern void vdev_uninitialize(vdev_t *vd);
 extern void vdev_initialize_stop(vdev_t *vd,
     vdev_initializing_state_t tgt_state, list_t *vd_list);
 extern void vdev_initialize_stop_all(vdev_t *vd,

diff --git a/zfs/include/sys/vdev_raidz.h b/zfs/include/sys/vdev_raidz.h
index 0ce2b5e..c7cf0af 100644
--- a/zfs/include/sys/vdev_raidz.h
+++ b/zfs/include/sys/vdev_raidz.h

@@ -32,6 +32,8 @@
 #endif
 
 struct zio;
+struct raidz_col;
+struct raidz_row;
 struct raidz_map;
 #if !defined(_KERNEL)
 struct kernel_param {};
@@ -43,8 +45,14 @@
 struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
     uint64_t);
 void vdev_raidz_map_free(struct raidz_map *);
+void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
 void vdev_raidz_generate_parity(struct raidz_map *);
-int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
+void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
+void vdev_raidz_child_done(zio_t *);
+void vdev_raidz_io_done(zio_t *);
+void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
+
+extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
 
 /*
  * vdev_raidz_math interface
@@ -52,11 +60,16 @@
 void vdev_raidz_math_init(void);
 void vdev_raidz_math_fini(void);
 const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
-int vdev_raidz_math_generate(struct raidz_map *);
-int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *,
-    const int);
+int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *);
+int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
+    const int *, const int *, const int);
 int vdev_raidz_impl_set(const char *);
 
+typedef struct vdev_raidz {
+	int vd_logical_width;
+	int vd_nparity;
+} vdev_raidz_t;
+
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/vdev_raidz_impl.h b/zfs/include/sys/vdev_raidz_impl.h
index 2e38962..908723d 100644
--- a/zfs/include/sys/vdev_raidz_impl.h
+++ b/zfs/include/sys/vdev_raidz_impl.h

@@ -29,6 +29,7 @@
 #include <sys/debug.h>
 #include <sys/kstat.h>
 #include <sys/abd.h>
+#include <sys/vdev_impl.h>
 
 #ifdef  __cplusplus
 extern "C" {
@@ -90,7 +91,7 @@
 typedef void		(*init_impl_f)(void);
 typedef void		(*fini_impl_f)(void);
 
-#define	RAIDZ_IMPL_NAME_MAX	(16)
+#define	RAIDZ_IMPL_NAME_MAX	(20)
 
 typedef struct raidz_impl_ops {
 	init_impl_f init;
@@ -105,34 +106,48 @@
 	uint64_t rc_devidx;		/* child device index for I/O */
 	uint64_t rc_offset;		/* device offset */
 	uint64_t rc_size;		/* I/O size */
+	abd_t rc_abdstruct;		/* rc_abd probably points here */
 	abd_t *rc_abd;			/* I/O data */
-	void *rc_gdata;			/* used to store the "good" version */
+	abd_t *rc_orig_data;		/* pre-reconstruction */
 	int rc_error;			/* I/O error for this device */
 	uint8_t rc_tried;		/* Did we attempt this I/O column? */
 	uint8_t rc_skipped;		/* Did we skip this I/O column? */
+	uint8_t rc_need_orig_restore;	/* need to restore from orig_data? */
+	uint8_t rc_force_repair;	/* Write good data to this column */
+	uint8_t rc_allow_repair;	/* Allow repair I/O to this column */
 } raidz_col_t;
 
+typedef struct raidz_row {
+	uint64_t rr_cols;		/* Regular column count */
+	uint64_t rr_scols;		/* Count including skipped columns */
+	uint64_t rr_bigcols;		/* Remainder data column count */
+	uint64_t rr_missingdata;	/* Count of missing data devices */
+	uint64_t rr_missingparity;	/* Count of missing parity devices */
+	uint64_t rr_firstdatacol;	/* First data column/parity count */
+	abd_t *rr_abd_empty;		/* dRAID empty sector buffer */
+	int rr_nempty;			/* empty sectors included in parity */
+#ifdef ZFS_DEBUG
+	uint64_t rr_offset;		/* Logical offset for *_io_verify() */
+	uint64_t rr_size;		/* Physical size for *_io_verify() */
+#endif
+	raidz_col_t rr_col[0];		/* Flexible array of I/O columns */
+} raidz_row_t;
+
 typedef struct raidz_map {
-	uint64_t rm_cols;		/* Regular column count */
-	uint64_t rm_scols;		/* Count including skipped columns */
-	uint64_t rm_bigcols;		/* Number of oversized columns */
-	uint64_t rm_asize;		/* Actual total I/O size */
-	uint64_t rm_missingdata;	/* Count of missing data devices */
-	uint64_t rm_missingparity;	/* Count of missing parity devices */
-	uint64_t rm_firstdatacol;	/* First data column/parity count */
-	uint64_t rm_nskip;		/* Skipped sectors for padding */
-	uint64_t rm_skipstart;		/* Column index of padding start */
-	abd_t *rm_abd_copy;		/* rm_asize-buffer of copied data */
-	uintptr_t rm_reports;		/* # of referencing checksum reports */
-	uint8_t	rm_freed;		/* map no longer has referencing ZIO */
-	uint8_t	rm_ecksuminjected;	/* checksum error was injected */
+	boolean_t rm_ecksuminjected;	/* checksum error was injected */
+	int rm_nrows;			/* Regular row count */
+	int rm_nskip;			/* RAIDZ sectors skipped for padding */
+	int rm_skipstart;		/* Column index of padding start */
 	const raidz_impl_ops_t *rm_ops;	/* RAIDZ math operations */
-	raidz_col_t rm_col[1];		/* Flexible array of I/O columns */
+	raidz_row_t *rm_row[0];		/* flexible array of rows */
 } raidz_map_t;
 
+
 #define	RAIDZ_ORIGINAL_IMPL	(INT_MAX)
 
 extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
+extern boolean_t raidz_will_scalar_work(void);
+
 #if defined(__x86_64) && defined(HAVE_SSE2)	/* only x86_64 for now */
 extern const raidz_impl_ops_t vdev_raidz_sse2_impl;
 #endif
@@ -152,20 +167,24 @@
 extern const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl;
 extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl;
 #endif
+#if defined(__powerpc__)
+extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl;
+#endif
 
 /*
  * Commonly used raidz_map helpers
  *
  * raidz_parity		Returns parity of the RAIDZ block
  * raidz_ncols		Returns number of columns the block spans
+ *			Note, all rows have the same number of columns.
  * raidz_nbigcols	Returns number of big columns
  * raidz_col_p		Returns pointer to a column
  * raidz_col_size	Returns size of a column
  * raidz_big_size	Returns size of big columns
  * raidz_short_size	Returns size of short columns
  */
-#define	raidz_parity(rm)	((rm)->rm_firstdatacol)
-#define	raidz_ncols(rm)		((rm)->rm_cols)
+#define	raidz_parity(rm)	((rm)->rm_row[0]->rr_firstdatacol)
+#define	raidz_ncols(rm)		((rm)->rm_row[0]->rr_cols)
 #define	raidz_nbigcols(rm)	((rm)->rm_bigcols)
 #define	raidz_col_p(rm, c)	((rm)->rm_col + (c))
 #define	raidz_col_size(rm, c)	((rm)->rm_col[c].rc_size)
@@ -180,10 +199,10 @@
  */
 #define	_RAIDZ_GEN_WRAP(code, impl)					\
 static void								\
-impl ## _gen_ ## code(void *rmp)					\
+impl ## _gen_ ## code(void *rrp)					\
 {									\
-	raidz_map_t *rm = (raidz_map_t *)rmp;				\
-	raidz_generate_## code ## _impl(rm);				\
+	raidz_row_t *rr = (raidz_row_t *)rrp;				\
+	raidz_generate_## code ## _impl(rr);				\
 }
 
 /*
@@ -194,10 +213,10 @@
  */
 #define	_RAIDZ_REC_WRAP(code, impl)					\
 static int								\
-impl ## _rec_ ## code(void *rmp, const int *tgtidx)			\
+impl ## _rec_ ## code(void *rrp, const int *tgtidx)			\
 {									\
-	raidz_map_t *rm = (raidz_map_t *)rmp;				\
-	return (raidz_reconstruct_## code ## _impl(rm, tgtidx));	\
+	raidz_row_t *rr = (raidz_row_t *)rrp;				\
+	return (raidz_reconstruct_## code ## _impl(rr, tgtidx));	\
 }
 
 /*

diff --git a/zfs/include/sys/vdev_rebuild.h b/zfs/include/sys/vdev_rebuild.h
new file mode 100644
index 0000000..b59fbe1
--- /dev/null
+++ b/zfs/include/sys/vdev_rebuild.h

@@ -0,0 +1,101 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018, Intel Corporation.
+ * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef	_SYS_VDEV_REBUILD_H
+#define	_SYS_VDEV_REBUILD_H
+
+#include <sys/spa.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Number of entries in the physical vdev_rebuild_phys structure.  This
+ * state is stored per top-level as VDEV_ZAP_TOP_VDEV_REBUILD_PHYS.
+ */
+#define	REBUILD_PHYS_ENTRIES	12
+
+/*
+ * On-disk rebuild configuration and state.  When adding new fields they
+ * must be added to the end of the structure.
+ */
+typedef struct vdev_rebuild_phys {
+	uint64_t	vrp_rebuild_state;	/* vdev_rebuild_state_t */
+	uint64_t	vrp_last_offset;	/* last rebuilt offset */
+	uint64_t	vrp_min_txg;		/* minimum missing txg */
+	uint64_t	vrp_max_txg;		/* maximum missing txg */
+	uint64_t	vrp_start_time;		/* start time */
+	uint64_t	vrp_end_time;		/* end time */
+	uint64_t	vrp_scan_time_ms;	/* total run time in ms */
+	uint64_t	vrp_bytes_scanned;	/* alloc bytes scanned */
+	uint64_t	vrp_bytes_issued;	/* read bytes rebuilt */
+	uint64_t	vrp_bytes_rebuilt;	/* rebuilt bytes */
+	uint64_t	vrp_bytes_est;		/* total bytes to scan */
+	uint64_t	vrp_errors;		/* errors during rebuild */
+} vdev_rebuild_phys_t;
+
+/*
+ * The vdev_rebuild_t describes the current state and how a top-level vdev
+ * should be rebuilt.  The core elements are the top-vdev, the metaslab being
+ * rebuilt, range tree containing the allocated extents and the on-disk state.
+ */
+typedef struct vdev_rebuild {
+	vdev_t		*vr_top_vdev;		/* top-level vdev to rebuild */
+	metaslab_t	*vr_scan_msp;		/* scanning disabled metaslab */
+	range_tree_t	*vr_scan_tree;		/* scan ranges (in metaslab) */
+	kmutex_t	vr_io_lock;		/* inflight IO lock */
+	kcondvar_t	vr_io_cv;		/* inflight IO cv */
+
+	/* In-core state and progress */
+	uint64_t	vr_scan_offset[TXG_SIZE];
+	uint64_t	vr_prev_scan_time_ms;	/* any previous scan time */
+	uint64_t	vr_bytes_inflight_max;	/* maximum bytes inflight */
+	uint64_t	vr_bytes_inflight;	/* current bytes inflight */
+
+	/* Per-rebuild pass statistics for calculating bandwidth */
+	uint64_t	vr_pass_start_time;
+	uint64_t	vr_pass_bytes_scanned;
+	uint64_t	vr_pass_bytes_issued;
+
+	/* On-disk state updated by vdev_rebuild_zap_update_sync() */
+	vdev_rebuild_phys_t vr_rebuild_phys;
+} vdev_rebuild_t;
+
+boolean_t vdev_rebuild_active(vdev_t *);
+
+int vdev_rebuild_load(vdev_t *);
+void vdev_rebuild(vdev_t *);
+void vdev_rebuild_stop_wait(vdev_t *);
+void vdev_rebuild_stop_all(spa_t *);
+void vdev_rebuild_restart(spa_t *);
+void vdev_rebuild_clear_sync(void *, dmu_tx_t *);
+int vdev_rebuild_get_stats(vdev_t *, vdev_rebuild_stat_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_REBUILD_H */

diff --git a/zfs/include/sys/vdev_trim.h b/zfs/include/sys/vdev_trim.h
index 1e54017..16f4be2 100644
--- a/zfs/include/sys/vdev_trim.h
+++ b/zfs/include/sys/vdev_trim.h

@@ -44,6 +44,8 @@
 extern void vdev_autotrim_stop_all(spa_t *spa);
 extern void vdev_autotrim_stop_wait(vdev_t *vd);
 extern void vdev_autotrim_restart(spa_t *spa);
+extern int vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size);
+extern void vdev_trim_l2arc(spa_t *spa);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/zap.h b/zfs/include/sys/zap.h
index b19b464..fd7a3a1 100644
--- a/zfs/include/sys/zap.h
+++ b/zfs/include/sys/zap.h

@@ -110,7 +110,12 @@
 	 * already randomly distributed.
 	 */
 	ZAP_FLAG_PRE_HASHED_KEY = 1 << 2,
+#if defined(__linux__) && defined(_KERNEL)
+} zfs_zap_flags_t;
+#define	zap_flags_t	zfs_zap_flags_t
+#else
 } zap_flags_t;
+#endif
 
 /*
  * Create a new zapobj with no attributes and return its object number.

diff --git a/zfs/include/sys/zap_impl.h b/zfs/include/sys/zap_impl.h
index 250dde3..3c83448 100644
--- a/zfs/include/sys/zap_impl.h
+++ b/zfs/include/sys/zap_impl.h

@@ -66,10 +66,9 @@
 } mzap_phys_t;
 
 typedef struct mzap_ent {
-	avl_node_t mze_node;
-	int mze_chunkid;
-	uint64_t mze_hash;
-	uint32_t mze_cd; /* copy from mze_phys->mze_cd */
+	uint32_t mze_hash;
+	uint16_t mze_cd; /* copy from mze_phys->mze_cd */
+	uint16_t mze_chunkid;
 } mzap_ent_t;
 
 #define	MZE_PHYS(zap, mze) \
@@ -164,7 +163,7 @@
 			int16_t zap_num_entries;
 			int16_t zap_num_chunks;
 			int16_t zap_alloc_next;
-			avl_tree_t zap_avl;
+			zfs_btree_t zap_tree;
 		} zap_micro;
 	} zap_u;
 } zap_t;
@@ -202,7 +201,7 @@
     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp);
 void zap_unlockdir(zap_t *zap, void *tag);
 void zap_evict_sync(void *dbu);
-zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
+zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
 void zap_name_free(zap_name_t *zn);
 int zap_hashbits(zap_t *zap);
 uint32_t zap_maxcd(zap_t *zap);

diff --git a/zfs/include/sys/zcp.h b/zfs/include/sys/zcp.h
index 5cc520d..d7b1dfa 100644
--- a/zfs/include/sys/zcp.h
+++ b/zfs/include/sys/zcp.h

@@ -75,6 +75,7 @@
 	 * rather than the 'current' thread's.
 	 */
 	cred_t		*zri_cred;
+	proc_t		*zri_proc;
 
 	/*
 	 * The tx in which this channel program is running.
@@ -132,6 +133,14 @@
 	nvlist_t	*zri_outnvl;
 
 	/*
+	 * The keys of this nvlist are datasets which may be zvols and may need
+	 * to have device minor nodes created.  This information is passed from
+	 * syncing context (where the zvol is created) to open context (where we
+	 * create the minor nodes).
+	 */
+	nvlist_t	*zri_new_zvols;
+
+	/*
 	 * The errno number returned to caller of zcp_eval().
 	 */
 	int		zri_result;

diff --git a/zfs/include/sys/zcp_set.h b/zfs/include/sys/zcp_set.h
new file mode 100644
index 0000000..b7428d6
--- /dev/null
+++ b/zfs/include/sys/zcp_set.h

@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _SYS_ZCP_SET_H
+#define	_SYS_ZCP_SET_H
+
+#include <sys/dmu_tx.h>
+#include <sys/dsl_pool.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+typedef struct zcp_set_prop_arg {
+	lua_State	*state;
+	const char	*dsname;
+	const char	*prop;
+	const char	*val;
+} zcp_set_prop_arg_t;
+
+int zcp_set_prop_check(void *arg, dmu_tx_t *tx);
+void zcp_set_prop_sync(void *arg, dmu_tx_t *tx);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZCP_SET_H */

diff --git a/zfs/include/sys/zfs_acl.h b/zfs/include/sys/zfs_acl.h
index 747f4e5..010686a 100644
--- a/zfs/include/sys/zfs_acl.h
+++ b/zfs/include/sys/zfs_acl.h

@@ -220,7 +220,7 @@
 extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
 extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
 extern int zfs_acl_access(struct znode *, int, cred_t *);
-void zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
+int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
 int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
 int zfs_zaccess_rename(struct znode *, struct znode *,
     struct znode *, struct znode *, cred_t *cr);

diff --git a/zfs/include/sys/zfs_bootenv.h b/zfs/include/sys/zfs_bootenv.h
new file mode 100644
index 0000000..7af0a57
--- /dev/null
+++ b/zfs/include/sys/zfs_bootenv.h

@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#ifndef _ZFS_BOOTENV_H
+#define	_ZFS_BOOTENV_H
+
+/*
+ * Define macros for label bootenv nvlist pair keys.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	BOOTENV_VERSION		"version"
+
+#define	BE_ILLUMOS_VENDOR	"illumos"
+#define	BE_FREEBSD_VENDOR	"freebsd"
+#define	BE_GRUB_VENDOR		"grub"
+#define	BE_LINUX_VENDOR		"linux"
+
+#include <sys/zfs_bootenv_os.h>
+
+#define	GRUB_ENVMAP		BE_GRUB_VENDOR ":" "envmap"
+
+#define	FREEBSD_BOOTONCE	BE_FREEBSD_VENDOR ":" "bootonce"
+#define	FREEBSD_BOOTONCE_USED	BE_FREEBSD_VENDOR ":" "bootonce-used"
+#define	FREEBSD_NVSTORE		BE_FREEBSD_VENDOR ":" "nvstore"
+#define	ILLUMOS_BOOTONCE	BE_ILLUMOS_VENDOR ":" "bootonce"
+#define	ILLUMOS_BOOTONCE_USED	BE_ILLUMOS_VENDOR ":" "bootonce-used"
+#define	ILLUMOS_NVSTORE		BE_ILLUMOS_VENDOR ":" "nvstore"
+
+#define	OS_BOOTONCE		BOOTENV_OS ":" "bootonce"
+#define	OS_BOOTONCE_USED	BOOTENV_OS ":" "bootonce-used"
+#define	OS_NVSTORE		BOOTENV_OS ":" "nvstore"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_BOOTENV_H */

diff --git a/zfs/include/sys/zfs_context.h b/zfs/include/sys/zfs_context.h
index def9de7..235a73d 100644
--- a/zfs/include/sys/zfs_context.h
+++ b/zfs/include/sys/zfs_context.h

@@ -21,15 +21,26 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
 #define	_SYS_ZFS_CONTEXT_H
 
-#ifdef __KERNEL__
+#ifdef __cplusplus
+extern "C" {
+#endif
 
+/*
+ * This code compiles in three different contexts. When __KERNEL__ is defined,
+ * the code uses "unix-like" kernel interfaces. When _STANDALONE is defined, the
+ * code is running in a reduced capacity environment of the boot loader which is
+ * generally a subset of both POSIX and kernel interfaces (with a few unique
+ * interfaces too). When neither are defined, it's in a userland POSIX or
+ * similar environment.
+ */
+#if defined(__KERNEL__) || defined(_STANDALONE)
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/atomic.h>
@@ -40,19 +51,17 @@
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/vmem.h>
+#include <sys/misc.h>
 #include <sys/taskq.h>
 #include <sys/param.h>
-#include <sys/kobj.h>
 #include <sys/disp.h>
 #include <sys/debug.h>
 #include <sys/random.h>
 #include <sys/strings.h>
 #include <sys/byteorder.h>
 #include <sys/list.h>
-#include <sys/uio_impl.h>
 #include <sys/time.h>
 #include <sys/zone.h>
-#include <sys/sdt.h>
 #include <sys/kstat.h>
 #include <sys/zfs_debug.h>
 #include <sys/sysevent.h>
@@ -63,10 +72,10 @@
 #include <sys/disp.h>
 #include <sys/trace.h>
 #include <sys/procfs_list.h>
-#include <linux/dcache_compat.h>
-#include <linux/utsname_compat.h>
-
-#else /* _KERNEL */
+#include <sys/mod.h>
+#include <sys/uio_impl.h>
+#include <sys/zfs_context_os.h>
+#else /* _KERNEL || _STANDALONE */
 
 #define	_SYS_MUTEX_H
 #define	_SYS_RWLOCK_H
@@ -88,7 +97,6 @@
 #include <pthread.h>
 #include <setjmp.h>
 #include <assert.h>
-#include <alloca.h>
 #include <umem.h>
 #include <limits.h>
 #include <atomic.h>
@@ -101,13 +109,12 @@
 #include <sys/types.h>
 #include <sys/cred.h>
 #include <sys/sysmacros.h>
-#include <sys/bitmap.h>
 #include <sys/resource.h>
 #include <sys/byteorder.h>
 #include <sys/list.h>
+#include <sys/mod.h>
 #include <sys/uio.h>
 #include <sys/zfs_debug.h>
-#include <sys/sdt.h>
 #include <sys/kstat.h>
 #include <sys/u8_textprep.h>
 #include <sys/sysevent.h>
@@ -115,6 +122,9 @@
 #include <sys/sunddi.h>
 #include <sys/debug.h>
 #include <sys/utsname.h>
+#include <sys/trace_zfs.h>
+
+#include <sys/zfs_context_os.h>
 
 /*
  * Stack
@@ -122,6 +132,7 @@
 
 #define	noinline	__attribute__((noinline))
 #define	likely(x)	__builtin_expect((x), 1)
+#define	unlikely(x)	__builtin_expect((x), 0)
 
 /*
  * Debugging
@@ -171,33 +182,39 @@
 #ifdef DTRACE_PROBE
 #undef	DTRACE_PROBE
 #endif	/* DTRACE_PROBE */
-#define	DTRACE_PROBE(a) \
-	ZFS_PROBE0(#a)
+#define	DTRACE_PROBE(a)
 
 #ifdef DTRACE_PROBE1
 #undef	DTRACE_PROBE1
 #endif	/* DTRACE_PROBE1 */
-#define	DTRACE_PROBE1(a, b, c) \
-	ZFS_PROBE1(#a, (unsigned long)c)
+#define	DTRACE_PROBE1(a, b, c)
 
 #ifdef DTRACE_PROBE2
 #undef	DTRACE_PROBE2
 #endif	/* DTRACE_PROBE2 */
-#define	DTRACE_PROBE2(a, b, c, d, e) \
-	ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e)
+#define	DTRACE_PROBE2(a, b, c, d, e)
 
 #ifdef DTRACE_PROBE3
 #undef	DTRACE_PROBE3
 #endif	/* DTRACE_PROBE3 */
-#define	DTRACE_PROBE3(a, b, c, d, e, f, g) \
-	ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g)
+#define	DTRACE_PROBE3(a, b, c, d, e, f, g)
 
 #ifdef DTRACE_PROBE4
 #undef	DTRACE_PROBE4
 #endif	/* DTRACE_PROBE4 */
-#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \
-	ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \
-	(unsigned long)i)
+#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)
+
+/*
+ * Tunables.
+ */
+typedef struct zfs_kernel_param {
+	const char *name;	/* unused stub */
+} zfs_kernel_param_t;
+
+#define	ZFS_MODULE_PARAM(scope_prefix, name_prefix, name, type, perm, desc)
+#define	ZFS_MODULE_PARAM_ARGS void
+#define	ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, \
+	getfunc, perm, desc)
 
 /*
  * Threads.
@@ -211,6 +228,9 @@
 #define	kpreempt(x)	yield()
 #define	getcomm()	"unknown"
 
+#define	thread_create_named(name, stk, stksize, func, arg, len, \
+    pp, state, pri)	\
+	zk_thread_create(func, arg, stksize, state)
 #define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
 	zk_thread_create(func, arg, stksize, state)
 #define	thread_exit()	pthread_exit(NULL)
@@ -308,49 +328,41 @@
 extern void cv_destroy(kcondvar_t *cv);
 extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
 extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp);
-extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
-extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+extern int cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
+extern int cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
     hrtime_t res, int flag);
 extern void cv_signal(kcondvar_t *cv);
 extern void cv_broadcast(kcondvar_t *cv);
 
 #define	cv_timedwait_io(cv, mp, at)		cv_timedwait(cv, mp, at)
+#define	cv_timedwait_idle(cv, mp, at)		cv_timedwait(cv, mp, at)
 #define	cv_timedwait_sig(cv, mp, at)		cv_timedwait(cv, mp, at)
 #define	cv_wait_io(cv, mp)			cv_wait(cv, mp)
+#define	cv_wait_idle(cv, mp)			cv_wait(cv, mp)
 #define	cv_wait_io_sig(cv, mp)			cv_wait_sig(cv, mp)
 #define	cv_timedwait_sig_hires(cv, mp, t, r, f) \
 	cv_timedwait_hires(cv, mp, t, r, f)
+#define	cv_timedwait_idle_hires(cv, mp, t, r, f) \
+	cv_timedwait_hires(cv, mp, t, r, f)
 
 /*
  * Thread-specific data
  */
 #define	tsd_get(k) pthread_getspecific(k)
 #define	tsd_set(k, v) pthread_setspecific(k, v)
-#define	tsd_create(kp, d) pthread_key_create(kp, d)
+#define	tsd_create(kp, d) pthread_key_create((pthread_key_t *)kp, d)
 #define	tsd_destroy(kp) /* nothing */
-
-/*
- * Thread-specific data
- */
-#define	tsd_get(k) pthread_getspecific(k)
-#define	tsd_set(k, v) pthread_setspecific(k, v)
-#define	tsd_create(kp, d) pthread_key_create(kp, d)
-#define	tsd_destroy(kp) /* nothing */
+#ifdef __FreeBSD__
+typedef off_t loff_t;
+#endif
 
 /*
  * kstat creation, installation and deletion
  */
 extern kstat_t *kstat_create(const char *, int,
     const char *, const char *, uchar_t, ulong_t, uchar_t);
-extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
 extern void kstat_install(kstat_t *);
 extern void kstat_delete(kstat_t *);
-extern void kstat_waitq_enter(kstat_io_t *);
-extern void kstat_waitq_exit(kstat_io_t *);
-extern void kstat_runq_enter(kstat_io_t *);
-extern void kstat_runq_exit(kstat_io_t *);
-extern void kstat_waitq_to_runq(kstat_io_t *);
-extern void kstat_runq_back_to_waitq(kstat_io_t *);
 extern void kstat_set_raw_ops(kstat_t *ksp,
     int (*headers)(char *buf, size_t size),
     int (*data)(char *buf, size_t size, void *data),
@@ -360,9 +372,6 @@
  * procfs list manipulation
  */
 
-struct seq_file { };
-void seq_printf(struct seq_file *m, const char *fmt, ...);
-
 typedef struct procfs_list {
 	void		*pl_private;
 	kmutex_t	pl_lock;
@@ -371,12 +380,17 @@
 	size_t		pl_node_offset;
 } procfs_list_t;
 
+#ifndef __cplusplus
+struct seq_file { };
+void seq_printf(struct seq_file *m, const char *fmt, ...);
+
 typedef struct procfs_list_node {
 	list_node_t	pln_link;
 	uint64_t	pln_id;
 } procfs_list_node_t;
 
 void procfs_list_install(const char *module,
+    const char *submodule,
     const char *name,
     mode_t mode,
     procfs_list_t *procfs_list,
@@ -387,6 +401,7 @@
 void procfs_list_uninstall(procfs_list_t *procfs_list);
 void procfs_list_destroy(procfs_list_t *procfs_list);
 void procfs_list_add(procfs_list_t *procfs_list, void *p);
+#endif
 
 /*
  * Kernel memory
@@ -396,8 +411,7 @@
 #define	KM_NOSLEEP		UMEM_DEFAULT
 #define	KM_NORMALPRI		0	/* not needed with UMEM_DEFAULT */
 #define	KMC_NODEBUG		UMC_NODEBUG
-#define	KMC_KMEM		0x0
-#define	KMC_VMEM		0x0
+#define	KMC_KVMEM		0x0
 #define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
 #define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
 #define	kmem_free(_b, _s)	umem_free(_b, _s)
@@ -412,12 +426,9 @@
 #define	kmem_debugging()	0
 #define	kmem_cache_reap_now(_c)	umem_cache_reap_now(_c);
 #define	kmem_cache_set_move(_c, _cb)	/* nothing */
-#define	vmem_qcache_reap(_v)		/* nothing */
 #define	POINTER_INVALIDATE(_pp)		/* nothing */
 #define	POINTER_IS_VALID(_p)	0
 
-extern vmem_t *zio_arena;
-
 typedef umem_cache_t kmem_cache_t;
 
 typedef enum kmem_cbrc {
@@ -499,6 +510,7 @@
 extern void	taskq_wait_id(taskq_t *, taskqid_t);
 extern void	taskq_wait_outstanding(taskq_t *, taskqid_t);
 extern int	taskq_member(taskq_t *, kthread_t *);
+extern taskq_t	*taskq_of_curthread(void);
 extern int	taskq_cancel_id(taskq_t *, taskqid_t);
 extern void	system_taskq_init(void);
 extern void	system_taskq_fini(void);
@@ -506,16 +518,6 @@
 #define	XVA_MAPSIZE	3
 #define	XVA_MAGIC	0x78766174
 
-/*
- * vnodes
- */
-typedef struct vnode {
-	uint64_t	v_size;
-	int		v_fd;
-	char		*v_path;
-	int		v_dump_fd;
-} vnode_t;
-
 extern char *vn_dumpdir;
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 
@@ -564,7 +566,6 @@
 	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
 } vsecattr_t;
 
-#define	AT_TYPE		0x00001
 #define	AT_MODE		0x00002
 #define	AT_UID		0x00004
 #define	AT_GID		0x00008
@@ -584,42 +585,7 @@
 #define	CRCREAT		0
 
 #define	F_FREESP	11
-
-extern int fop_getattr(vnode_t *vp, vattr_t *vap);
-
-#define	VOP_CLOSE(vp, f, c, o, cr, ct)	vn_close(vp)
-#define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct)	0
-#define	VOP_GETATTR(vp, vap, fl, cr, ct)  fop_getattr((vp), (vap));
-
-#define	VOP_FSYNC(vp, f, cr, ct)	fsync((vp)->v_fd)
-
-#if defined(HAVE_FILE_FALLOCATE) && \
-	defined(FALLOC_FL_PUNCH_HOLE) && \
-	defined(FALLOC_FL_KEEP_SIZE)
-#define	VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) \
-	fallocate((vp)->v_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, \
-	    (flck)->l_start, (flck)->l_len)
-#else
-#define	VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) (0)
-#endif
-
-#define	VN_RELE(vp)	vn_close(vp)
-
-extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
-    int x2, int x3);
-extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
-    int x2, int x3, vnode_t *vp, int fd);
-extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
-    offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
-extern void vn_close(vnode_t *vp);
-
-#define	vn_remove(path, x1, x2)		remove(path)
-#define	vn_rename(from, to, seg)	rename((from), (to))
-#define	vn_is_readonly(vp)		B_FALSE
-
-extern vnode_t *rootdir;
-
-#include <sys/file.h>		/* for FREAD, FWRITE, etc */
+#define	FIGNORECASE	0x80000 /* request case-insensitive lookups */
 
 /*
  * Random stuff
@@ -641,9 +607,9 @@
 extern void delay(clock_t ticks);
 
 #define	SEC_TO_TICK(sec)	((sec) * hz)
-#define	MSEC_TO_TICK(msec)	((msec) / (MILLISEC / hz))
-#define	USEC_TO_TICK(usec)	((usec) / (MICROSEC / hz))
-#define	NSEC_TO_TICK(usec)	((usec) / (NANOSEC / hz))
+#define	MSEC_TO_TICK(msec)	(howmany((hrtime_t)(msec) * hz, MILLISEC))
+#define	USEC_TO_TICK(usec)	(howmany((hrtime_t)(usec) * hz, MICROSEC))
+#define	NSEC_TO_TICK(nsec)	(howmany((hrtime_t)(nsec) * hz, NANOSEC))
 
 #define	max_ncpus	64
 #define	boot_ncpus	(sysconf(_SC_NPROCESSORS_ONLN))
@@ -656,6 +622,7 @@
 #define	defclsyspri	0
 
 #define	CPU_SEQID	((uintptr_t)pthread_self() & (max_ncpus - 1))
+#define	CPU_SEQID_UNSTABLE	CPU_SEQID
 
 #define	kcred		NULL
 #define	CRED()		NULL
@@ -666,22 +633,37 @@
 #define	NN_NUMBUF_SZ	(6)
 
 extern uint64_t physmem;
-extern char *random_path;
-extern char *urandom_path;
+extern const char *random_path;
+extern const char *urandom_path;
 
 extern int highbit64(uint64_t i);
 extern int lowbit64(uint64_t i);
 extern int random_get_bytes(uint8_t *ptr, size_t len);
 extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
 
-extern void kernel_init(int);
+static __inline__ uint32_t
+random_in_range(uint32_t range)
+{
+	uint32_t r;
+
+	ASSERT(range != 0);
+
+	if (range == 1)
+		return (0);
+
+	(void) random_get_pseudo_bytes((uint8_t *)&r, sizeof (r));
+
+	return (r % range);
+}
+
+extern void kernel_init(int mode);
 extern void kernel_fini(void);
 extern void random_init(void);
 extern void random_fini(void);
 
 struct spa;
 extern void show_pool_stats(struct spa *);
-extern int set_global_var(char *arg);
+extern int set_global_var(char const *arg);
 
 typedef struct callb_cpr {
 	kmutex_t	*cc_lockp;
@@ -710,7 +692,8 @@
 
 extern char *kmem_vasprintf(const char *fmt, va_list adx);
 extern char *kmem_asprintf(const char *fmt, ...);
-#define	strfree(str) kmem_free((str), strlen(str) + 1)
+#define	kmem_strfree(str) kmem_free((str), strlen(str) + 1)
+#define	kmem_strdup(s)  strdup(s)
 
 /*
  * Hostname information
@@ -750,16 +733,12 @@
 #define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE	0x07
 #define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE	0x08
 
-extern struct _buf *kobj_open_file(char *name);
-extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
-    unsigned off);
-extern void kobj_close_file(struct _buf *file);
-extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
 extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
 extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
     cred_t *cr);
 extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
 extern int secpolicy_zfs(const cred_t *cr);
+extern int secpolicy_zfs_proc(const cred_t *cr, proc_t *proc);
 extern zoneid_t getzoneid(void);
 
 /* SID stuff */
@@ -792,7 +771,17 @@
 extern int __spl_pf_fstrans_check(void);
 extern int kmem_cache_reap_active(void);
 
-#define	____cacheline_aligned
 
-#endif /* _KERNEL */
+/*
+ * Kernel modules
+ */
+#define	__init
+#define	__exit
+
+#endif  /* _KERNEL || _STANDALONE */
+
+#ifdef __cplusplus
+};
+#endif
+
 #endif	/* _SYS_ZFS_CONTEXT_H */

diff --git a/zfs/include/sys/zfs_ctldir.h b/zfs/include/sys/zfs_ctldir.h
deleted file mode 100644
index 51933bc..0000000
--- a/zfs/include/sys/zfs_ctldir.h
+++ /dev/null

@@ -1,103 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * LLNL-CODE-403049.
- * Rewritten for Linux by:
- *   Rohan Puri <rohan.puri15@gmail.com>
- *   Brian Behlendorf <behlendorf1@llnl.gov>
- */
-
-#ifndef	_ZFS_CTLDIR_H
-#define	_ZFS_CTLDIR_H
-
-#include <sys/vnode.h>
-#include <sys/pathname.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_znode.h>
-
-#define	ZFS_CTLDIR_NAME		".zfs"
-#define	ZFS_SNAPDIR_NAME	"snapshot"
-#define	ZFS_SHAREDIR_NAME	"shares"
-
-#define	zfs_has_ctldir(zdp)	\
-	((zdp)->z_id == ZTOZSB(zdp)->z_root && \
-	(ZTOZSB(zdp)->z_ctldir != NULL))
-#define	zfs_show_ctldir(zdp)	\
-	(zfs_has_ctldir(zdp) && \
-	(ZTOZSB(zdp)->z_show_ctldir))
-
-extern int zfs_expire_snapshot;
-
-/* zfsctl generic functions */
-extern int zfsctl_create(zfsvfs_t *);
-extern void zfsctl_destroy(zfsvfs_t *);
-extern struct inode *zfsctl_root(znode_t *);
-extern void zfsctl_init(void);
-extern void zfsctl_fini(void);
-extern boolean_t zfsctl_is_node(struct inode *ip);
-extern boolean_t zfsctl_is_snapdir(struct inode *ip);
-extern int zfsctl_fid(struct inode *ip, fid_t *fidp);
-
-/* zfsctl '.zfs' functions */
-extern int zfsctl_root_lookup(struct inode *dip, char *name,
-    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
-    pathname_t *realpnp);
-
-/* zfsctl '.zfs/snapshot' functions */
-extern int zfsctl_snapdir_lookup(struct inode *dip, char *name,
-    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
-    pathname_t *realpnp);
-extern int zfsctl_snapdir_rename(struct inode *sdip, char *sname,
-    struct inode *tdip, char *tname, cred_t *cr, int flags);
-extern int zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr,
-    int flags);
-extern int zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
-    struct inode **ipp, cred_t *cr, int flags);
-extern int zfsctl_snapshot_mount(struct path *path, int flags);
-extern int zfsctl_snapshot_unmount(char *snapname, int flags);
-extern int zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid,
-    int delay);
-extern int zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid,
-    int gen, struct inode **ipp);
-
-/* zfsctl '.zfs/shares' functions */
-extern int zfsctl_shares_lookup(struct inode *dip, char *name,
-    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
-    pathname_t *realpnp);
-
-/*
- * These inodes numbers are reserved for the .zfs control directory.
- * It is important that they be no larger that 48-bits because only
- * 6 bytes are reserved in the NFS file handle for the object number.
- * However, they should be as large as possible to avoid conflicts
- * with the objects which are assigned monotonically by the dmu.
- */
-#define	ZFSCTL_INO_ROOT		0x0000FFFFFFFFFFFFULL
-#define	ZFSCTL_INO_SHARES	0x0000FFFFFFFFFFFEULL
-#define	ZFSCTL_INO_SNAPDIR	0x0000FFFFFFFFFFFDULL
-#define	ZFSCTL_INO_SNAPDIRS	0x0000FFFFFFFFFFFCULL
-
-#define	ZFSCTL_EXPIRE_SNAPSHOT	300
-
-#endif	/* _ZFS_CTLDIR_H */

diff --git a/zfs/include/sys/zfs_debug.h b/zfs/include/sys/zfs_debug.h
index 7968a01..7b10351 100644
--- a/zfs/include/sys/zfs_debug.h
+++ b/zfs/include/sys/zfs_debug.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_DEBUG_H
@@ -55,10 +55,13 @@
 #define	ZFS_DEBUG_SET_ERROR		(1 << 9)
 #define	ZFS_DEBUG_INDIRECT_REMAP	(1 << 10)
 #define	ZFS_DEBUG_TRIM			(1 << 11)
+#define	ZFS_DEBUG_LOG_SPACEMAP		(1 << 12)
+#define	ZFS_DEBUG_METASLAB_ALLOC	(1 << 13)
 
+extern void __set_error(const char *file, const char *func, int line, int err);
 extern void __zfs_dbgmsg(char *buf);
 extern void __dprintf(boolean_t dprint, const char *file, const char *func,
-    int line, const char *fmt, ...);
+    int line, const char *fmt, ...)  __attribute__((format(printf, 5, 6)));
 
 /*
  * Some general principles for using zfs_dbgmsg():

diff --git a/zfs/include/sys/zfs_dir.h b/zfs/include/sys/zfs_dir.h
deleted file mode 100644
index bcd4ec2..0000000
--- a/zfs/include/sys/zfs_dir.h
+++ /dev/null

@@ -1,76 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef	_SYS_FS_ZFS_DIR_H
-#define	_SYS_FS_ZFS_DIR_H
-
-#include <sys/pathname.h>
-#include <sys/dmu.h>
-#include <sys/zfs_znode.h>
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/* zfs_dirent_lock() flags */
-#define	ZNEW		0x0001		/* entry should not exist */
-#define	ZEXISTS		0x0002		/* entry should exist */
-#define	ZSHARED		0x0004		/* shared access (zfs_dirlook()) */
-#define	ZXATTR		0x0008		/* we want the xattr dir */
-#define	ZRENAMING	0x0010		/* znode is being renamed */
-#define	ZCILOOK		0x0020		/* case-insensitive lookup requested */
-#define	ZCIEXACT	0x0040		/* c-i requires c-s match (rename) */
-#define	ZHAVELOCK	0x0080		/* z_name_lock is already held */
-
-/* mknode flags */
-#define	IS_ROOT_NODE	0x01		/* create a root node */
-#define	IS_XATTR	0x02		/* create an extended attribute node */
-#define	IS_TMPFILE	0x04		/* create a tmpfile */
-
-extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
-    int, int *, pathname_t *);
-extern void zfs_dirent_unlock(zfs_dirlock_t *);
-extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
-extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
-    boolean_t *);
-extern int zfs_dirlook(znode_t *, char *, struct inode **, int, int *,
-    pathname_t *);
-extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
-    uint_t, znode_t **, zfs_acl_ids_t *);
-extern void zfs_rmnode(znode_t *);
-extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
-extern boolean_t zfs_dirempty(znode_t *);
-extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
-extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
-extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
-extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
-extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
-extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_FS_ZFS_DIR_H */

diff --git a/zfs/include/sys/zfs_file.h b/zfs/include/sys/zfs_file.h
new file mode 100644
index 0000000..02cd1a6
--- /dev/null
+++ b/zfs/include/sys/zfs_file.h

@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef	_SYS_ZFS_FILE_H
+#define	_SYS_ZFS_FILE_H
+
+#include <sys/zfs_context.h>
+
+#ifndef _KERNEL
+typedef struct zfs_file {
+	int f_fd;
+	int f_dump_fd;
+} zfs_file_t;
+#elif defined(__linux__) || defined(__FreeBSD__)
+typedef struct file zfs_file_t;
+#else
+#error "unknown OS"
+#endif
+
+typedef struct zfs_file_attr {
+	uint64_t	zfa_size;	/* file size */
+	mode_t		zfa_mode;	/* file type */
+} zfs_file_attr_t;
+
+int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fp);
+void zfs_file_close(zfs_file_t *fp);
+
+int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid);
+int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off,
+    ssize_t *resid);
+int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid);
+int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off,
+    ssize_t *resid);
+
+int zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence);
+int zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr);
+int zfs_file_fsync(zfs_file_t *fp, int flags);
+int zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len);
+loff_t zfs_file_off(zfs_file_t *fp);
+int zfs_file_unlink(const char *);
+
+zfs_file_t *zfs_file_get(int fd);
+void zfs_file_put(zfs_file_t *fp);
+void *zfs_file_private(zfs_file_t *fp);
+
+#endif /* _SYS_ZFS_FILE_H */

diff --git a/zfs/include/sys/zfs_fuid.h b/zfs/include/sys/zfs_fuid.h
index 5c56f7f..b5b37db 100644
--- a/zfs/include/sys/zfs_fuid.h
+++ b/zfs/include/sys/zfs_fuid.h

@@ -116,6 +116,8 @@
     char **retdomain, boolean_t addok);
 extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx);
 extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+extern int zfs_id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
+    char *buf, size_t len, boolean_t addok);
 #endif
 
 char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);

diff --git a/zfs/include/sys/zfs_ioctl.h b/zfs/include/sys/zfs_ioctl.h
index 6d098ee..ddce276 100644
--- a/zfs/include/sys/zfs_ioctl.h
+++ b/zfs/include/sys/zfs_ioctl.h

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright 2016 RackTop Systems.
  * Copyright (c) 2017, Intel Corporation.
  */
@@ -67,7 +67,8 @@
  * Property values for acltype
  */
 #define	ZFS_ACLTYPE_OFF			0
-#define	ZFS_ACLTYPE_POSIXACL		1
+#define	ZFS_ACLTYPE_POSIX		1
+#define	ZFS_ACLTYPE_NFSV4		2
 
 /*
  * Field manipulation macros for the drr_versioninfo field of the
@@ -101,22 +102,39 @@
 /* flag #18 is reserved for a Delphix feature */
 #define	DMU_BACKUP_FEATURE_LARGE_BLOCKS		(1 << 19)
 #define	DMU_BACKUP_FEATURE_RESUMING		(1 << 20)
-/* flag #21 is reserved for the redacted send/receive feature */
+#define	DMU_BACKUP_FEATURE_REDACTED		(1 << 21)
 #define	DMU_BACKUP_FEATURE_COMPRESSED		(1 << 22)
 #define	DMU_BACKUP_FEATURE_LARGE_DNODE		(1 << 23)
 #define	DMU_BACKUP_FEATURE_RAW			(1 << 24)
-/* flag #25 is reserved for the ZSTD compression feature */
+#define	DMU_BACKUP_FEATURE_ZSTD			(1 << 25)
 #define	DMU_BACKUP_FEATURE_HOLDS		(1 << 26)
+/*
+ * The SWITCH_TO_LARGE_BLOCKS feature indicates that we can receive
+ * incremental LARGE_BLOCKS streams (those with WRITE records of >128KB) even
+ * if the previous send did not use LARGE_BLOCKS, and thus its large blocks
+ * were split into multiple 128KB WRITE records.  (See
+ * flush_write_batch_impl() and receive_object()).  Older software that does
+ * not support this flag may encounter a bug when switching to large blocks,
+ * which causes files to incorrectly be zeroed.
+ *
+ * This flag is currently not set on any send streams.  In the future, we
+ * intend for incremental send streams of snapshots that have large blocks to
+ * use LARGE_BLOCKS by default, and these streams will also have the
+ * SWITCH_TO_LARGE_BLOCKS feature set. This ensures that streams from the
+ * default use of "zfs send" won't encounter the bug mentioned above.
+ */
+#define	DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
 
 /*
  * Mask of all supported backup features
  */
-#define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_DEDUP | \
-    DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
+#define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_SA_SPILL | \
     DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
     DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
     DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
-    DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS)
+    DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
+    DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
+    DMU_BACKUP_FEATURE_ZSTD)
 
 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -208,25 +226,29 @@
 /*
  * zfs ioctl command structure
  */
+
+/* Header is used in C++ so can't forward declare untagged struct */
+struct drr_begin {
+	uint64_t drr_magic;
+	uint64_t drr_versioninfo; /* was drr_version */
+	uint64_t drr_creation_time;
+	dmu_objset_type_t drr_type;
+	uint32_t drr_flags;
+	uint64_t drr_toguid;
+	uint64_t drr_fromguid;
+	char drr_toname[MAXNAMELEN];
+};
+
 typedef struct dmu_replay_record {
 	enum {
 		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
 		DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
-		DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
+		DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, DRR_REDACT,
 		DRR_NUMTYPES
 	} drr_type;
 	uint32_t drr_payloadlen;
 	union {
-		struct drr_begin {
-			uint64_t drr_magic;
-			uint64_t drr_versioninfo; /* was drr_version */
-			uint64_t drr_creation_time;
-			dmu_objset_type_t drr_type;
-			uint32_t drr_flags;
-			uint64_t drr_toguid;
-			uint64_t drr_fromguid;
-			char drr_toname[MAXNAMELEN];
-		} drr_begin;
+		struct drr_begin drr_begin;
 		struct drr_end {
 			zio_cksum_t drr_checksum;
 			uint64_t drr_toguid;
@@ -337,9 +359,15 @@
 			uint8_t drr_flags;
 			uint8_t drr_pad[3];
 		} drr_object_range;
+		struct drr_redact {
+			uint64_t drr_object;
+			uint64_t drr_offset;
+			uint64_t drr_length;
+			uint64_t drr_toguid;
+		} drr_redact;
 
 		/*
-		 * Nore: drr_checksum is overlaid with all record types
+		 * Note: drr_checksum is overlaid with all record types
 		 * except DRR_BEGIN.  Therefore its (non-pad) members
 		 * must not overlap with members from the other structs.
 		 * We accomplish this by putting its members at the very
@@ -487,6 +515,7 @@
 	uint64_t	zc_fromobj;
 	uint64_t	zc_createtxg;
 	zfs_stat_t	zc_stat;
+	uint64_t	zc_zoneid;
 } zfs_cmd_t;
 
 #define BLOCK_DIFF_MAGIC    ((uint8_t)0xa9)
@@ -499,7 +528,6 @@
 } zfs_useracct_t;
 
 #define	ZFSDEV_MAX_MINOR	(1 << 16)
-#define	ZFS_MIN_MINOR	(ZFSDEV_MAX_MINOR + 1)
 
 #define	ZPOOL_EXPORT_AFTER_SPLIT 0x1
 
@@ -535,16 +563,18 @@
  */
 typedef struct zfsdev_state {
 	struct zfsdev_state	*zs_next;	/* next zfsdev_state_t link */
-	struct file		*zs_file;	/* associated file struct */
 	minor_t			zs_minor;	/* made up minor number */
 	void			*zs_onexit;	/* onexit data */
 	void			*zs_zevent;	/* zevent data */
 } zfsdev_state_t;
 
 extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
-extern int zfsdev_getminor(struct file *filp, minor_t *minorp);
+extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
 extern minor_t zfsdev_minor_alloc(void);
 
+extern uint_t zfs_fsyncer_key;
+extern uint_t zfs_allow_log_key;
+
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus

diff --git a/zfs/include/sys/zfs_ioctl_impl.h b/zfs/include/sys/zfs_ioctl_impl.h
new file mode 100644
index 0000000..5774e9e
--- /dev/null
+++ b/zfs/include/sys/zfs_ioctl_impl.h

@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+
+#ifndef _ZFS_IOCTL_IMPL_H_
+#define	_ZFS_IOCTL_IMPL_H_
+
+extern kmutex_t zfsdev_state_lock;
+extern zfsdev_state_t *zfsdev_state_list;
+extern unsigned long zfs_max_nvlist_src_size;
+
+typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
+typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
+typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
+
+typedef enum {
+	POOL_CHECK_NONE		= 1 << 0,
+	POOL_CHECK_SUSPENDED	= 1 << 1,
+	POOL_CHECK_READONLY	= 1 << 2,
+} zfs_ioc_poolcheck_t;
+
+typedef enum {
+	NO_NAME,
+	POOL_NAME,
+	DATASET_NAME,
+	ENTITY_NAME
+} zfs_ioc_namecheck_t;
+
+/*
+ * IOC Keys are used to document and validate user->kernel interface inputs.
+ * See zfs_keys_recv_new for an example declaration. Any key name that is not
+ * listed will be rejected as input.
+ *
+ * The keyname 'optional' is always allowed, and must be an nvlist if present.
+ * Arguments which older kernels can safely ignore can be placed under the
+ * "optional" key.
+ *
+ * When adding new keys to an existing ioc for new functionality, consider:
+ *	- adding an entry into zfs_sysfs.c zfs_features[] list
+ *	- updating the libzfs_input_check.c test utility
+ *
+ * Note: in the ZK_WILDCARDLIST case, the name serves as documentation
+ * for the expected name (bookmark, snapshot, property, etc) but there
+ * is no validation in the preflight zfs_check_input_nvpairs() check.
+ */
+typedef enum {
+	ZK_OPTIONAL = 1 << 0,		/* pair is optional */
+	ZK_WILDCARDLIST = 1 << 1,	/* one or more unspecified key names */
+} ioc_key_flag_t;
+
+typedef struct zfs_ioc_key {
+	const char	*zkey_name;
+	data_type_t	zkey_type;
+	ioc_key_flag_t	zkey_flags;
+} zfs_ioc_key_t;
+
+int zfs_secpolicy_config(zfs_cmd_t *, nvlist_t *, cred_t *);
+
+void zfs_ioctl_register_dataset_nolog(zfs_ioc_t, zfs_ioc_legacy_func_t *,
+    zfs_secpolicy_func_t *, zfs_ioc_poolcheck_t);
+
+void zfs_ioctl_register(const char *, zfs_ioc_t, zfs_ioc_func_t *,
+    zfs_secpolicy_func_t *, zfs_ioc_namecheck_t, zfs_ioc_poolcheck_t,
+    boolean_t, boolean_t, const zfs_ioc_key_t *, size_t);
+
+uint64_t zfs_max_nvlist_src_size_os(void);
+void zfs_ioctl_update_mount_cache(const char *dsname);
+void zfs_ioctl_init_os(void);
+
+boolean_t zfs_vfs_held(zfsvfs_t *);
+int zfs_vfs_ref(zfsvfs_t **);
+void zfs_vfs_rele(zfsvfs_t *);
+
+long zfsdev_ioctl_common(uint_t, zfs_cmd_t *, int);
+int zfsdev_attach(void);
+void zfsdev_detach(void);
+int zfs_kmod_init(void);
+void zfs_kmod_fini(void);
+
+#endif

diff --git a/zfs/include/sys/zfs_onexit.h b/zfs/include/sys/zfs_onexit.h
index 4982bd4..fd3030e 100644
--- a/zfs/include/sys/zfs_onexit.h
+++ b/zfs/include/sys/zfs_onexit.h

@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ZFS_ONEXIT_H
@@ -50,14 +51,10 @@
 
 #endif
 
-extern int zfs_onexit_fd_hold(int fd, minor_t *minorp);
-extern void zfs_onexit_fd_rele(int fd);
+extern zfs_file_t *zfs_onexit_fd_hold(int fd, minor_t *minorp);
+extern void zfs_onexit_fd_rele(zfs_file_t *);
 extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle);
-extern int zfs_onexit_del_cb(minor_t minor, uint64_t action_handle,
-    boolean_t fire);
-extern int zfs_onexit_cb_data(minor_t minor, uint64_t action_handle,
-    void **data);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/zfs_project.h b/zfs/include/sys/zfs_project.h
index 52d5204..81a2389 100644
--- a/zfs/include/sys/zfs_project.h
+++ b/zfs/include/sys/zfs_project.h

@@ -32,7 +32,7 @@
 #endif
 #endif
 
-#include <linux/fs.h>
+#include <sys/vfs.h>
 
 #ifdef FS_PROJINHERIT_FL
 #define	ZFS_PROJINHERIT_FL	FS_PROJINHERIT_FL

diff --git a/zfs/include/sys/zfs_quota.h b/zfs/include/sys/zfs_quota.h
new file mode 100644
index 0000000..b215b8d
--- /dev/null
+++ b/zfs/include/sys/zfs_quota.h

@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _SYS_ZFS_QUOTA_H
+#define	_SYS_ZFS_QUOTA_H
+
+#include <sys/dmu.h>
+#include <sys/fs/zfs.h>
+
+struct zfsvfs;
+struct zfs_file_info_t;
+
+extern int zpl_get_file_info(dmu_object_type_t,
+    const void *, struct zfs_file_info *);
+
+extern int zfs_userspace_one(struct zfsvfs *, zfs_userquota_prop_t,
+    const char *, uint64_t, uint64_t *);
+extern int zfs_userspace_many(struct zfsvfs *, zfs_userquota_prop_t,
+    uint64_t *, void *, uint64_t *);
+extern int zfs_set_userquota(struct zfsvfs *, zfs_userquota_prop_t,
+    const char *, uint64_t, uint64_t);
+
+extern boolean_t zfs_id_overobjquota(struct zfsvfs *, uint64_t, uint64_t);
+extern boolean_t zfs_id_overblockquota(struct zfsvfs *, uint64_t, uint64_t);
+extern boolean_t zfs_id_overquota(struct zfsvfs *, uint64_t, uint64_t);
+
+#endif

diff --git a/zfs/include/sys/zfs_racct.h b/zfs/include/sys/zfs_racct.h
new file mode 100644
index 0000000..cfcdd33
--- /dev/null
+++ b/zfs/include/sys/zfs_racct.h

@@ -0,0 +1,37 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Portions Copyright 2021 iXsystems, Inc.
+ */
+
+#ifndef _SYS_ZFS_RACCT_H
+#define	_SYS_ZFS_RACCT_H
+
+#include <sys/zfs_context.h>
+
+/*
+ * Platform-dependent resource accounting hooks
+ */
+void zfs_racct_read(uint64_t size, uint64_t iops);
+void zfs_racct_write(uint64_t size, uint64_t iops);
+
+#endif /* _SYS_ZFS_RACCT_H */

diff --git a/zfs/include/sys/zfs_refcount.h b/zfs/include/sys/zfs_refcount.h
new file mode 100644
index 0000000..1e64494
--- /dev/null
+++ b/zfs/include/sys/zfs_refcount.h

@@ -0,0 +1,126 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ */
+
+#ifndef	_SYS_ZFS_REFCOUNT_H
+#define	_SYS_ZFS_REFCOUNT_H
+
+#include <sys/inttypes.h>
+#include <sys/list.h>
+#include <sys/zfs_context.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * If the reference is held only by the calling function and not any
+ * particular object, use FTAG (which is a string) for the holder_tag.
+ * Otherwise, use the object that holds the reference.
+ */
+#define	FTAG ((char *)(uintptr_t)__func__)
+
+#ifdef	ZFS_DEBUG
+typedef struct reference {
+	list_node_t ref_link;
+	const void *ref_holder;
+	uint64_t ref_number;
+	uint8_t *ref_removed;
+} reference_t;
+
+typedef struct refcount {
+	kmutex_t rc_mtx;
+	boolean_t rc_tracked;
+	list_t rc_list;
+	list_t rc_removed;
+	uint64_t rc_count;
+	uint64_t rc_removed_count;
+} zfs_refcount_t;
+
+/*
+ * Note: zfs_refcount_t must be initialized with
+ * refcount_create[_untracked]()
+ */
+
+void zfs_refcount_create(zfs_refcount_t *);
+void zfs_refcount_create_untracked(zfs_refcount_t *);
+void zfs_refcount_create_tracked(zfs_refcount_t *);
+void zfs_refcount_destroy(zfs_refcount_t *);
+void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t);
+int zfs_refcount_is_zero(zfs_refcount_t *);
+int64_t zfs_refcount_count(zfs_refcount_t *);
+int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
+int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
+int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
+int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
+void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
+void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *,
+    const void *);
+void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t,
+    const void *, const void *);
+boolean_t zfs_refcount_held(zfs_refcount_t *, const void *);
+boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *);
+
+void zfs_refcount_init(void);
+void zfs_refcount_fini(void);
+
+#else	/* ZFS_DEBUG */
+
+typedef struct refcount {
+	uint64_t rc_count;
+} zfs_refcount_t;
+
+#define	zfs_refcount_create(rc) ((rc)->rc_count = 0)
+#define	zfs_refcount_create_untracked(rc) ((rc)->rc_count = 0)
+#define	zfs_refcount_create_tracked(rc) ((rc)->rc_count = 0)
+#define	zfs_refcount_destroy(rc) ((rc)->rc_count = 0)
+#define	zfs_refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
+#define	zfs_refcount_is_zero(rc) (zfs_refcount_count(rc) == 0)
+#define	zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
+#define	zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
+#define	zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
+#define	zfs_refcount_add_many(rc, number, holder) \
+	atomic_add_64_nv(&(rc)->rc_count, number)
+#define	zfs_refcount_remove_many(rc, number, holder) \
+	atomic_add_64_nv(&(rc)->rc_count, -number)
+#define	zfs_refcount_transfer(dst, src) { \
+	uint64_t __tmp = zfs_refcount_count(src); \
+	atomic_add_64(&(src)->rc_count, -__tmp); \
+	atomic_add_64(&(dst)->rc_count, __tmp); \
+}
+#define	zfs_refcount_transfer_ownership(rc, ch, nh)		((void)0)
+#define	zfs_refcount_transfer_ownership_many(rc, nr, ch, nh)	((void)0)
+#define	zfs_refcount_held(rc, holder)		(zfs_refcount_count(rc) > 0)
+#define	zfs_refcount_not_held(rc, holder)		(B_TRUE)
+
+#define	zfs_refcount_init()
+#define	zfs_refcount_fini()
+
+#endif	/* ZFS_DEBUG */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_REFCOUNT_H */

diff --git a/zfs/include/sys/zfs_rlock.h b/zfs/include/sys/zfs_rlock.h
index 0ac1561..2302abb 100644
--- a/zfs/include/sys/zfs_rlock.h
+++ b/zfs/include/sys/zfs_rlock.h

@@ -71,6 +71,8 @@
 
 zfs_locked_range_t *zfs_rangelock_enter(zfs_rangelock_t *,
     uint64_t, uint64_t, zfs_rangelock_type_t);
+zfs_locked_range_t *zfs_rangelock_tryenter(zfs_rangelock_t *,
+    uint64_t, uint64_t, zfs_rangelock_type_t);
 void zfs_rangelock_exit(zfs_locked_range_t *);
 void zfs_rangelock_reduce(zfs_locked_range_t *, uint64_t, uint64_t);
 

diff --git a/zfs/include/sys/zfs_sa.h b/zfs/include/sys/zfs_sa.h
index 4e6d286..1ca7ced 100644
--- a/zfs/include/sys/zfs_sa.h
+++ b/zfs/include/sys/zfs_sa.h

@@ -134,7 +134,7 @@
 #define	DXATTR_MAX_ENTRY_SIZE	(32768)
 #define	DXATTR_MAX_SA_SIZE	(SPA_OLD_MAXBLOCKSIZE >> 1)
 
-int zfs_sa_readlink(struct znode *, uio_t *);
+int zfs_sa_readlink(struct znode *, zfs_uio_t *);
 void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
 void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
 void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);

diff --git a/zfs/include/sys/zfs_vfsops.h b/zfs/include/sys/zfs_vfsops.h
index 457d027..a438c86 100644
--- a/zfs/include/sys/zfs_vfsops.h
+++ b/zfs/include/sys/zfs_vfsops.h

@@ -18,215 +18,18 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Portions Copyright 2020 iXsystems, Inc.
  */
 
-#ifndef	_SYS_FS_ZFS_VFSOPS_H
-#define	_SYS_FS_ZFS_VFSOPS_H
+#ifndef _SYS_ZFS_VFSOPS_H
+#define	_SYS_ZFS_VFSOPS_H
 
-#include <sys/dataset_kstats.h>
-#include <sys/isa_defs.h>
-#include <sys/types32.h>
-#include <sys/list.h>
-#include <sys/vfs.h>
-#include <sys/zil.h>
-#include <sys/sa.h>
-#include <sys/rrwlock.h>
-#include <sys/dsl_dataset.h>
-#include <sys/zfs_ioctl.h>
-
-#ifdef	__cplusplus
-extern "C" {
+#ifdef _KERNEL
+#include <sys/zfs_vfsops_os.h>
 #endif
 
-typedef struct zfsvfs zfsvfs_t;
-struct znode;
+extern void zfsvfs_update_fromname(const char *, const char *);
 
-/*
- * This structure emulates the vfs_t from other platforms.  It's purpose
- * is to facilitate the handling of mount options and minimize structural
- * differences between the platforms.
- */
-typedef struct vfs {
-	struct zfsvfs	*vfs_data;
-	char		*vfs_mntpoint;	/* Primary mount point */
-	uint64_t	vfs_xattr;
-	boolean_t	vfs_readonly;
-	boolean_t	vfs_do_readonly;
-	boolean_t	vfs_setuid;
-	boolean_t	vfs_do_setuid;
-	boolean_t	vfs_exec;
-	boolean_t	vfs_do_exec;
-	boolean_t	vfs_devices;
-	boolean_t	vfs_do_devices;
-	boolean_t	vfs_do_xattr;
-	boolean_t	vfs_atime;
-	boolean_t	vfs_do_atime;
-	boolean_t	vfs_relatime;
-	boolean_t	vfs_do_relatime;
-	boolean_t	vfs_nbmand;
-	boolean_t	vfs_do_nbmand;
-} vfs_t;
-
-typedef struct zfs_mnt {
-	const char	*mnt_osname;	/* Objset name */
-	char		*mnt_data;	/* Raw mount options */
-} zfs_mnt_t;
-
-struct zfsvfs {
-	vfs_t		*z_vfs;		/* generic fs struct */
-	struct super_block *z_sb;	/* generic super_block */
-	struct zfsvfs	*z_parent;	/* parent fs */
-	objset_t	*z_os;		/* objset reference */
-	uint64_t	z_flags;	/* super_block flags */
-	uint64_t	z_root;		/* id of root znode */
-	uint64_t	z_unlinkedobj;	/* id of unlinked zapobj */
-	uint64_t	z_max_blksz;	/* maximum block size for files */
-	uint64_t	z_fuid_obj;	/* fuid table object number */
-	uint64_t	z_fuid_size;	/* fuid table size */
-	avl_tree_t	z_fuid_idx;	/* fuid tree keyed by index */
-	avl_tree_t	z_fuid_domain;	/* fuid tree keyed by domain */
-	krwlock_t	z_fuid_lock;	/* fuid lock */
-	boolean_t	z_fuid_loaded;	/* fuid tables are loaded */
-	boolean_t	z_fuid_dirty;   /* need to sync fuid table ? */
-	struct zfs_fuid_info	*z_fuid_replay; /* fuid info for replay */
-	zilog_t		*z_log;		/* intent log pointer */
-	uint_t		z_acl_inherit;	/* acl inheritance behavior */
-	uint_t		z_acl_type;	/* type of ACL usable on this FS */
-	zfs_case_t	z_case;		/* case-sense */
-	boolean_t	z_utf8;		/* utf8-only */
-	int		z_norm;		/* normalization flags */
-	boolean_t	z_relatime;	/* enable relatime mount option */
-	boolean_t	z_unmounted;	/* unmounted */
-	rrmlock_t	z_teardown_lock;
-	krwlock_t	z_teardown_inactive_lock;
-	list_t		z_all_znodes;	/* all znodes in the fs */
-	uint64_t	z_nr_znodes;	/* number of znodes in the fs */
-	unsigned long	z_rollback_time; /* last online rollback time */
-	unsigned long	z_snap_defer_time; /* last snapshot unmount deferral */
-	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
-	arc_prune_t	*z_arc_prune;	/* called by ARC to prune caches */
-	struct inode	*z_ctldir;	/* .zfs directory inode */
-	boolean_t	z_show_ctldir;	/* expose .zfs in the root dir */
-	boolean_t	z_issnap;	/* true if this is a snapshot */
-	boolean_t	z_vscan;	/* virus scan on/off */
-	boolean_t	z_use_fuids;	/* version allows fuids */
-	boolean_t	z_replay;	/* set during ZIL replay */
-	boolean_t	z_use_sa;	/* version allow system attributes */
-	boolean_t	z_xattr_sa;	/* allow xattrs to be stores as SA */
-	boolean_t	z_draining;	/* is true when drain is active */
-	boolean_t	z_drain_cancel; /* signal the unlinked drain to stop */
-	uint64_t	z_version;	/* ZPL version */
-	uint64_t	z_shares_dir;	/* hidden shares dir */
-	dataset_kstats_t	z_kstat;	/* fs kstats */
-	kmutex_t	z_lock;
-	uint64_t	z_userquota_obj;
-	uint64_t	z_groupquota_obj;
-	uint64_t	z_userobjquota_obj;
-	uint64_t	z_groupobjquota_obj;
-	uint64_t	z_projectquota_obj;
-	uint64_t	z_projectobjquota_obj;
-	uint64_t	z_replay_eof;	/* New end of file - replay only */
-	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
-	uint64_t	z_hold_size;	/* znode hold array size */
-	avl_tree_t	*z_hold_trees;	/* znode hold trees */
-	kmutex_t	*z_hold_locks;	/* znode hold locks */
-	taskqid_t	z_drain_task;	/* task id for the unlink drain task */
-};
-
-#define	ZSB_XATTR	0x0001		/* Enable user xattrs */
-
-/*
- * Allow a maximum number of links.  While ZFS does not internally limit
- * this the inode->i_nlink member is defined as an unsigned int.  To be
- * safe we use 2^31-1 as the limit.
- */
-#define	ZFS_LINK_MAX		((1U << 31) - 1U)
-
-/*
- * Normal filesystems (those not under .zfs/snapshot) have a total
- * file ID size limited to 12 bytes (including the length field) due to
- * NFSv2 protocol's limitation of 32 bytes for a filehandle.  For historical
- * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
- * (although the NFSv3 protocol actually permits a maximum of 64 bytes).  It
- * is not possible to expand beyond 12 bytes without abandoning support
- * of NFSv2.
- *
- * For normal filesystems, we partition up the available space as follows:
- *	2 bytes		fid length (required)
- *	6 bytes		object number (48 bits)
- *	4 bytes		generation number (32 bits)
- *
- * We reserve only 48 bits for the object number, as this is the limit
- * currently defined and imposed by the DMU.
- */
-typedef struct zfid_short {
-	uint16_t	zf_len;
-	uint8_t		zf_object[6];		/* obj[i] = obj >> (8 * i) */
-	uint8_t		zf_gen[4];		/* gen[i] = gen >> (8 * i) */
-} zfid_short_t;
-
-/*
- * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
- * (including the length field).  This makes files under .zfs/snapshot
- * accessible by NFSv3 and NFSv4, but not NFSv2.
- *
- * For files under .zfs/snapshot, we partition up the available space
- * as follows:
- *	2 bytes		fid length (required)
- *	6 bytes		object number (48 bits)
- *	4 bytes		generation number (32 bits)
- *	6 bytes		objset id (48 bits)
- *	4 bytes		currently just zero (32 bits)
- *
- * We reserve only 48 bits for the object number and objset id, as these are
- * the limits currently defined and imposed by the DMU.
- */
-typedef struct zfid_long {
-	zfid_short_t	z_fid;
-	uint8_t		zf_setid[6];		/* obj[i] = obj >> (8 * i) */
-	uint8_t		zf_setgen[4];		/* gen[i] = gen >> (8 * i) */
-} zfid_long_t;
-
-#define	SHORT_FID_LEN	(sizeof (zfid_short_t) - sizeof (uint16_t))
-#define	LONG_FID_LEN	(sizeof (zfid_long_t) - sizeof (uint16_t))
-
-extern uint_t zfs_fsyncer_key;
-
-extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
-extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
-extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    const char *domain, uint64_t rid, uint64_t *valuep);
-extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
-extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    const char *domain, uint64_t rid, uint64_t quota);
-extern boolean_t zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
-    uint64_t id);
-extern boolean_t zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
-    uint64_t id);
-extern boolean_t zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
-    uint64_t id);
-extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
-extern int zfsvfs_create(const char *name, boolean_t readony, zfsvfs_t **zfvp);
-extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
-extern void zfsvfs_free(zfsvfs_t *zfsvfs);
-extern int zfs_check_global_label(const char *dsname, const char *hexsl);
-
-extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
-extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent);
-extern void zfs_preumount(struct super_block *sb);
-extern int zfs_umount(struct super_block *sb);
-extern int zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm);
-extern int zfs_statvfs(struct dentry *dentry, struct kstatfs *statp);
-extern int zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp);
-extern int zfs_prune(struct super_block *sb, unsigned long nr_to_scan,
-    int *objects);
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_FS_ZFS_VFSOPS_H */
+#endif /* _SYS_ZFS_VFSOPS_H */

diff --git a/zfs/include/sys/zfs_vnops.h b/zfs/include/sys/zfs_vnops.h
index 767cba1..18259f0 100644
--- a/zfs/include/sys/zfs_vnops.h
+++ b/zfs/include/sys/zfs_vnops.h

@@ -24,66 +24,32 @@
 
 #ifndef	_SYS_FS_ZFS_VNOPS_H
 #define	_SYS_FS_ZFS_VNOPS_H
+#include <sys/zfs_vnops_os.h>
 
-#include <sys/vnode.h>
-#include <sys/xvattr.h>
-#include <sys/uio.h>
-#include <sys/cred.h>
-#include <sys/fcntl.h>
-#include <sys/pathname.h>
-#include <sys/zpl.h>
+extern int zfs_fsync(znode_t *, int, cred_t *);
+extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
+extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
+extern int zfs_holey(znode_t *, ulong_t, loff_t *);
+extern int zfs_access(znode_t *, int, int, cred_t *);
 
-#ifdef	__cplusplus
-extern "C" {
+extern int zfs_getsecattr(znode_t *, vsecattr_t *, int, cred_t *);
+extern int zfs_setsecattr(znode_t *, vsecattr_t *, int, cred_t *);
+
+extern int mappedread(znode_t *, int, zfs_uio_t *);
+extern int mappedread_sf(znode_t *, int, zfs_uio_t *);
+extern void update_pages(znode_t *, int64_t, int, objset_t *);
+
+/*
+ * Platform code that asynchronously drops zp's inode / vnode_t.
+ *
+ * Asynchronous dropping ensures that the caller will never drop the
+ * last reference on an inode / vnode_t in the current context.
+ * Doing so while holding open a tx could result in a deadlock if
+ * the platform calls into filesystem again in the implementation
+ * of inode / vnode_t dropping (e.g. call from iput_final()).
+ */
+extern void zfs_zrele_async(znode_t *zp);
+
+extern zil_get_data_t zfs_get_data;
+
 #endif
-
-extern int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr);
-extern int zfs_close(struct inode *ip, int flag, cred_t *cr);
-extern int zfs_holey(struct inode *ip, int cmd, loff_t *off);
-extern int zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
-extern int zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
-extern int zfs_access(struct inode *ip, int mode, int flag, cred_t *cr);
-extern int zfs_lookup(struct inode *dip, char *nm, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp);
-extern int zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
-extern int zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
-extern int zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags);
-extern int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
-    struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp);
-extern int zfs_rmdir(struct inode *dip, char *name, struct inode *cwd,
-    cred_t *cr, int flags);
-extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
-extern int zfs_fsync(struct inode *ip, int syncflag, cred_t *cr);
-extern int zfs_getattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr);
-extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp);
-extern int zfs_setattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr);
-extern int zfs_rename(struct inode *sdip, char *snm, struct inode *tdip,
-    char *tnm, cred_t *cr, int flags);
-extern int zfs_symlink(struct inode *dip, char *name, vattr_t *vap,
-    char *link, struct inode **ipp, cred_t *cr, int flags);
-extern int zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr);
-extern int zfs_link(struct inode *tdip, struct inode *sip,
-    char *name, cred_t *cr, int flags);
-extern void zfs_inactive(struct inode *ip);
-extern int zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
-    offset_t offset, cred_t *cr);
-extern int zfs_fid(struct inode *ip, fid_t *fidp);
-extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
-    cred_t *cr);
-extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
-    cred_t *cr);
-extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
-extern int zfs_putpage(struct inode *ip, struct page *pp,
-    struct writeback_control *wbc);
-extern int zfs_dirty_inode(struct inode *ip, int flags);
-extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
-    size_t len, unsigned long vm_flags);
-extern void zfs_iput_async(struct inode *ip);
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_FS_ZFS_VNOPS_H */

diff --git a/zfs/include/sys/zfs_znode.h b/zfs/include/sys/zfs_znode.h
index 146cf4d..48dab67 100644
--- a/zfs/include/sys/zfs_znode.h
+++ b/zfs/include/sys/zfs_znode.h

@@ -27,18 +27,6 @@
 #ifndef	_SYS_FS_ZFS_ZNODE_H
 #define	_SYS_FS_ZFS_ZNODE_H
 
-#ifdef _KERNEL
-#include <sys/isa_defs.h>
-#include <sys/types32.h>
-#include <sys/list.h>
-#include <sys/dmu.h>
-#include <sys/sa.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/rrwlock.h>
-#include <sys/zfs_sa.h>
-#include <sys/zfs_stat.h>
-#include <sys/zfs_rlock.h>
-#endif
 #include <sys/zfs_acl.h>
 #include <sys/zil.h>
 #include <sys/zfs_project.h>
@@ -169,12 +157,16 @@
 #define	ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
 
+extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
+
+#ifdef _KERNEL
+#include <sys/zfs_znode_impl.h>
+
 /*
  * Directory entry locks control access to directory entries.
  * They are used to protect creates, deletes, and renames.
  * Each directory znode has a mutex and a list of locked names.
  */
-#ifdef _KERNEL
 typedef struct zfs_dirlock {
 	char		*dl_name;	/* directory entry being locked */
 	uint32_t	dl_sharecnt;	/* 0 if exclusive, > 0 if shared */
@@ -195,12 +187,9 @@
 	boolean_t	z_unlinked;	/* file has been unlinked */
 	boolean_t	z_atime_dirty;	/* atime needs to be synced */
 	boolean_t	z_zn_prefetch;	/* Prefetch znodes? */
-	boolean_t	z_moved;	/* Has this znode been moved? */
 	boolean_t	z_is_sa;	/* are we native sa? */
-	boolean_t	z_is_mapped;	/* are we mmap'ed */
 	boolean_t	z_is_ctldir;	/* are we .zfs entry */
-	boolean_t	z_is_stale;	/* are we stale due to rollback? */
-	boolean_t   z_suspended;    /* extra ref from a suspend? */
+	boolean_t	z_suspended;	/* extra ref from a suspend? */
 	uint_t		z_blksz;	/* block size in bytes */
 	uint_t		z_seq;		/* modification sequence number */
 	uint64_t	z_mapcnt;	/* number of pages mapped to file */
@@ -208,6 +197,8 @@
 	uint64_t	z_size;		/* file size (cached) */
 	uint64_t	z_pflags;	/* pflags (cached) */
 	uint32_t	z_sync_cnt;	/* synchronous open count */
+	uint32_t	z_sync_writes_cnt; /* synchronous write count */
+	uint32_t	z_async_writes_cnt; /* asynchronous write count */
 	mode_t		z_mode;		/* mode (cached) */
 	kmutex_t	z_acl_lock;	/* acl data lock */
 	zfs_acl_t	*z_acl_cached;	/* cached acl */
@@ -217,14 +208,19 @@
 	uint64_t	z_projid;	/* project ID */
 	list_node_t	z_link_node;	/* all znodes in fs link */
 	sa_handle_t	*z_sa_hdl;	/* handle to sa data */
-	struct inode	z_inode;	/* generic vfs inode */
+
+	/*
+	 * Platform specific field, defined by each platform and only
+	 * accessible from platform specific code.
+	 */
+	ZNODE_OS_FIELDS;
 } znode_t;
 
 typedef struct znode_hold {
 	uint64_t	zh_obj;		/* object id */
-	kmutex_t	zh_lock;	/* lock serializing object access */
 	avl_node_t	zh_node;	/* avl tree linkage */
-	zfs_refcount_t	zh_refcount;	/* active consumer reference count */
+	kmutex_t	zh_lock;	/* lock serializing object access */
+	int		zh_refcount;	/* active consumer reference count */
 } znode_hold_t;
 
 static inline uint64_t
@@ -235,102 +231,6 @@
 }
 
 /*
- * Range locking rules
- * --------------------
- * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
- *    file range needs to be locked as RL_WRITER. Only then can the pages be
- *    freed etc and zp_size reset. zp_size must be set within range lock.
- * 2. For writes and punching holes (zfs_write & zfs_space) just the range
- *    being written or freed needs to be locked as RL_WRITER.
- *    Multiple writes at the end of the file must coordinate zp_size updates
- *    to ensure data isn't lost. A compare and swap loop is currently used
- *    to ensure the file size is at least the offset last written.
- * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
- *    read needs to be locked as RL_READER. A check against zp_size can then
- *    be made for reading beyond end of file.
- */
-
-/*
- * Convert between znode pointers and inode pointers
- */
-#define	ZTOI(znode)	(&((znode)->z_inode))
-#define	ITOZ(inode)	(container_of((inode), znode_t, z_inode))
-#define	ZTOZSB(znode)	((zfsvfs_t *)(ZTOI(znode)->i_sb->s_fs_info))
-#define	ITOZSB(inode)	((zfsvfs_t *)((inode)->i_sb->s_fs_info))
-
-#define	S_ISDEV(mode)	(S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode))
-
-/* Called on entry to each ZFS inode and vfs operation. */
-#define	ZFS_ENTER_ERROR(zfsvfs, error)				\
-do {								\
-	rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG);	\
-	if ((zfsvfs)->z_unmounted) {				\
-		ZFS_EXIT(zfsvfs);				\
-		return (error);					\
-	}							\
-} while (0)
-#define	ZFS_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
-#define	ZPL_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, -EIO)
-
-/* Must be called before exiting the operation. */
-#define	ZFS_EXIT(zfsvfs)					\
-do {								\
-	rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);		\
-} while (0)
-#define	ZPL_EXIT(zfsvfs)	ZFS_EXIT(zfsvfs)
-
-/* Verifies the znode is valid. */
-#define	ZFS_VERIFY_ZP_ERROR(zp, error)				\
-do {								\
-	if ((zp)->z_sa_hdl == NULL) {				\
-		ZFS_EXIT(ZTOZSB(zp));				\
-		return (error);					\
-	}							\
-} while (0)
-#define	ZFS_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
-#define	ZPL_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, -EIO)
-
-/*
- * Macros for dealing with dmu_buf_hold
- */
-#define	ZFS_OBJ_MTX_SZ		64
-#define	ZFS_OBJ_MTX_MAX		(1024 * 1024)
-#define	ZFS_OBJ_HASH(zfsvfs, obj)	((obj) & ((zfsvfs->z_hold_size) - 1))
-
-extern unsigned int zfs_object_mutex_size;
-
-/*
- * Encode ZFS stored time values from a struct timespec / struct timespec64.
- */
-#define	ZFS_TIME_ENCODE(tp, stmp)		\
-do {						\
-	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
-	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
-} while (0)
-
-#if defined(HAVE_INODE_TIMESPEC64_TIMES)
-/*
- * Decode ZFS stored time values to a struct timespec64
- * 4.18 and newer kernels.
- */
-#define	ZFS_TIME_DECODE(tp, stmp)		\
-do {						\
-	(tp)->tv_sec = (time64_t)(stmp)[0];	\
-	(tp)->tv_nsec = (long)(stmp)[1];	\
-} while (0)
-#else
-/*
- * Decode ZFS stored time values to a struct timespec
- * 4.17 and older kernels.
- */
-#define	ZFS_TIME_DECODE(tp, stmp)		\
-do {						\
-	(tp)->tv_sec = (time_t)(stmp)[0];	\
-	(tp)->tv_nsec = (long)(stmp)[1];	\
-} while (0)
-#endif /* HAVE_INODE_TIMESPEC64_TIMES */
-
-/*
  * Timestamp defines
  */
 #define	ACCESSED		(ATTR_ATIME)
@@ -354,32 +254,27 @@
 extern void	zfs_znode_delete(znode_t *, dmu_tx_t *);
 extern void	zfs_remove_op_tables(void);
 extern int	zfs_create_op_tables(void);
-extern int	zfs_sync(struct super_block *, int, cred_t *);
 extern dev_t	zfs_cmpldev(uint64_t);
 extern int	zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
 extern int	zfs_get_stats(objset_t *os, nvlist_t *nv);
 extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
 extern void	zfs_znode_dmu_fini(znode_t *);
-extern int	zfs_inode_alloc(struct super_block *, struct inode **ip);
-extern void	zfs_inode_destroy(struct inode *);
-extern void	zfs_inode_update(znode_t *);
-extern void	zfs_mark_inode_dirty(struct inode *);
-extern boolean_t zfs_relatime_need_update(const struct inode *);
 
 extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
-    vattr_t *vap);
+    znode_t *dzp, znode_t *zp, const char *name, vsecattr_t *,
+    zfs_fuid_info_t *, vattr_t *vap);
 extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
     vattr_t *vap);
 extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked);
+    znode_t *dzp, const char *name, uint64_t foid, boolean_t unlinked);
 #define	ZFS_NO_OBJECT	0	/* no object id */
 extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, znode_t *zp, char *name);
+    znode_t *dzp, znode_t *zp, const char *name);
 extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, znode_t *zp, char *name, char *link);
+    znode_t *dzp, znode_t *zp, const char *name, const char *link);
 extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
+    znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
+    znode_t *szp);
 extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
     znode_t *zp, offset_t off, ssize_t len, int ioflag,
     zil_callback_t callback, void *callback_data);
@@ -392,19 +287,9 @@
 extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
 extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
 
-#if defined(HAVE_UIO_RW)
-extern caddr_t zfs_map_page(page_t *, enum seg_rw);
-extern void zfs_unmap_page(page_t *, caddr_t);
-#endif /* HAVE_UIO_RW */
+extern void zfs_znode_update_vfs(struct znode *);
 
-extern zil_get_data_t zfs_get_data;
-extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
-extern int zfsfstype;
-
-#endif /* _KERNEL */
-
-extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
-
+#endif
 #ifdef	__cplusplus
 }
 #endif

diff --git a/zfs/include/sys/zil.h b/zfs/include/sys/zil.h
index 6b038a9..a43823b 100644
--- a/zfs/include/sys/zil.h
+++ b/zfs/include/sys/zil.h

@@ -222,6 +222,15 @@
 } lr_ooo_t;
 
 /*
+ * Additional lr_attr_t fields.
+ */
+typedef struct {
+	uint64_t	lr_attr_attrs;		/* all of the attributes */
+	uint64_t	lr_attr_crtime[2];	/* create time */
+	uint8_t		lr_attr_scanstamp[32];
+} lr_attr_end_t;
+
+/*
  * Handle option extended vattr attributes.
  *
  * Whenever new attributes are added the version number
@@ -231,7 +240,7 @@
 typedef struct {
 	uint32_t	lr_attr_masksize; /* number of elements in array */
 	uint32_t	lr_attr_bitmap; /* First entry of array */
-	/* remainder of array and any additional fields */
+	/* remainder of array and additional lr_attr_end_t fields */
 } lr_attr_t;
 
 /*
@@ -373,7 +382,7 @@
  *	- the write occupies only one block
  * WR_COPIED:
  *    If we know we'll immediately be committing the
- *    transaction (FSYNC or FDSYNC), then we allocate a larger
+ *    transaction (O_SYNC or O_DSYNC), then we allocate a larger
  *    log record here for the data and copy the data in.
  * WR_NEED_COPY:
  *    Otherwise we don't allocate a buffer, and *if* we need to
@@ -399,6 +408,7 @@
 	void		*itx_callback_data; /* User data for the callback */
 	size_t		itx_size;	/* allocated itx structure size */
 	uint64_t	itx_oid;	/* object id */
+	uint64_t	itx_gen;	/* gen number for zfs_get_data */
 	lr_t		itx_lr;		/* common part of log record */
 	/* followed by type-specific part of lr_xx_t and its immediate data */
 } itx_t;
@@ -462,12 +472,12 @@
 #define	ZIL_STAT_BUMP(stat) \
     ZIL_STAT_INCR(stat, 1);
 
-typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
+typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
     uint64_t txg);
-typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
+typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg,
     uint64_t txg);
 typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap);
-typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf,
+typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf,
     struct lwb *lwb, zio_t *zio);
 
 extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
@@ -493,8 +503,10 @@
 extern void	zil_itx_destroy(itx_t *itx);
 extern void	zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
 
+extern void	zil_async_to_sync(zilog_t *zilog, uint64_t oid);
 extern void	zil_commit(zilog_t *zilog, uint64_t oid);
 extern void	zil_commit_impl(zilog_t *zilog, uint64_t oid);
+extern void	zil_remove_async(zilog_t *zilog, uint64_t oid);
 
 extern int	zil_reset(const char *osname, void *txarg);
 extern int	zil_claim(struct dsl_pool *dp,

diff --git a/zfs/include/sys/zio.h b/zfs/include/sys/zio.h
index 0046230..39de517 100644
--- a/zfs/include/sys/zio.h
+++ b/zfs/include/sys/zio.h

@@ -22,10 +22,13 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019-2020, Michael Niewöhner
  */
 
 #ifndef _ZIO_H
@@ -85,7 +88,9 @@
 	ZIO_CHECKSUM_NOPARITY,
 	ZIO_CHECKSUM_SHA512,
 	ZIO_CHECKSUM_SKEIN,
+#if !defined(__FreeBSD__)
 	ZIO_CHECKSUM_EDONR,
+#endif
 	ZIO_CHECKSUM_FUNCTIONS
 };
 
@@ -102,24 +107,6 @@
 #define	ZIO_CHECKSUM_VERIFY	(1 << 8)
 
 #define	ZIO_DEDUPCHECKSUM	ZIO_CHECKSUM_SHA256
-#define	ZIO_DEDUPDITTO_MIN	100
-
-/* supported encryption algorithms */
-enum zio_encrypt {
-	ZIO_CRYPT_INHERIT = 0,
-	ZIO_CRYPT_ON,
-	ZIO_CRYPT_OFF,
-	ZIO_CRYPT_AES_128_CCM,
-	ZIO_CRYPT_AES_192_CCM,
-	ZIO_CRYPT_AES_256_CCM,
-	ZIO_CRYPT_AES_128_GCM,
-	ZIO_CRYPT_AES_192_GCM,
-	ZIO_CRYPT_AES_256_GCM,
-	ZIO_CRYPT_FUNCTIONS
-};
-
-#define	ZIO_CRYPT_ON_VALUE	ZIO_CRYPT_AES_256_GCM
-#define	ZIO_CRYPT_DEFAULT	ZIO_CRYPT_OFF
 
 /* macros defining encryption lengths */
 #define	ZIO_OBJSET_MAC_LEN		32
@@ -155,9 +142,18 @@
 	(compress) == ZIO_COMPRESS_GZIP_8 ||		\
 	(compress) == ZIO_COMPRESS_GZIP_9 ||		\
 	(compress) == ZIO_COMPRESS_ZLE ||		\
+	(compress) == ZIO_COMPRESS_ZSTD ||		\
 	(compress) == ZIO_COMPRESS_ON ||		\
 	(compress) == ZIO_COMPRESS_OFF)
 
+
+#define	ZIO_COMPRESS_ALGO(x)	(x & SPA_COMPRESSMASK)
+#define	ZIO_COMPRESS_LEVEL(x)	((x & ~SPA_COMPRESSMASK) >> SPA_COMPRESSBITS)
+#define	ZIO_COMPRESS_RAW(type, level)	(type | ((level) << SPA_COMPRESSBITS))
+
+#define	ZIO_COMPLEVEL_ZSTD(level)	\
+	ZIO_COMPRESS_RAW(ZIO_COMPRESS_ZSTD, level)
+
 #define	ZIO_FAILURE_MODE_WAIT		0
 #define	ZIO_FAILURE_MODE_CONTINUE	1
 #define	ZIO_FAILURE_MODE_PANIC		2
@@ -266,16 +262,6 @@
 	ZIO_WAIT_TYPES
 };
 
-/*
- * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
- * graveyard) to indicate checksum errors and fragmentation.
- */
-#define	ECKSUM	EBADE
-#define	EFRAGS	EBADR
-
-/* Similar for ENOACTIVE */
-#define	ENOTACTIVE	ENOANO
-
 typedef void zio_done_func_t(zio_t *zio);
 
 extern int zio_exclude_metadata;
@@ -338,6 +324,7 @@
 typedef struct zio_prop {
 	enum zio_checksum	zp_checksum;
 	enum zio_compress	zp_compress;
+	uint8_t			zp_complevel;
 	dmu_object_type_t	zp_type;
 	uint8_t			zp_level;
 	uint8_t			zp_copies;
@@ -368,6 +355,7 @@
 	nvlist_t		*zcr_detector;
 	void			*zcr_cbdata;
 	size_t			zcr_cbinfo;	/* passed to zcr_free() */
+	uint64_t		zcr_sector;
 	uint64_t		zcr_align;
 	uint64_t		zcr_length;
 	zio_cksum_finish_f	*zcr_finish;
@@ -377,14 +365,8 @@
 	struct zio_bad_cksum	*zcr_ckinfo;	/* information from failure */
 };
 
-typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr,
-    void *arg);
-
-zio_vsd_cksum_report_f	zio_vsd_default_cksum_report;
-
 typedef struct zio_vsd_ops {
 	zio_done_func_t		*vsd_free;
-	zio_vsd_cksum_report_f	*vsd_cksum_report;
 } zio_vsd_ops_t;
 
 typedef struct zio_gang_node {
@@ -512,6 +494,7 @@
 	zio_gang_node_t	*io_gang_tree;
 	void		*io_executor;
 	void		*io_waiter;
+	void		*io_bio;
 	kmutex_t	io_lock;
 	kcondvar_t	io_cv;
 	int		io_allocator;
@@ -524,27 +507,33 @@
 	taskq_ent_t	io_tqent;
 };
 
+enum blk_verify_flag {
+	BLK_VERIFY_ONLY,
+	BLK_VERIFY_LOG,
+	BLK_VERIFY_HALT
+};
+
 extern int zio_bookmark_compare(const void *, const void *);
 
 extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
-    zio_done_func_t *done, void *private, enum zio_flag flags);
+    zio_done_func_t *done, void *priv, enum zio_flag flags);
 
 extern zio_t *zio_root(spa_t *spa,
-    zio_done_func_t *done, void *private, enum zio_flag flags);
+    zio_done_func_t *done, void *priv, enum zio_flag flags);
 
 extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
-    struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private,
+    struct abd *data, uint64_t lsize, zio_done_func_t *done, void *priv,
     zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
 
 extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
     struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
     zio_done_func_t *ready, zio_done_func_t *children_ready,
     zio_done_func_t *physdone, zio_done_func_t *done,
-    void *private, zio_priority_t priority, enum zio_flag flags,
+    void *priv, zio_priority_t priority, enum zio_flag flags,
     const zbookmark_phys_t *zb);
 
 extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
-    struct abd *data, uint64_t size, zio_done_func_t *done, void *private,
+    struct abd *data, uint64_t size, zio_done_func_t *done, void *priv,
     zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb);
 
 extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
@@ -554,23 +543,23 @@
 
 extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
     const blkptr_t *bp,
-    zio_done_func_t *done, void *private, enum zio_flag flags);
+    zio_done_func_t *done, void *priv, enum zio_flag flags);
 
 extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
-    zio_done_func_t *done, void *private, enum zio_flag flags);
+    zio_done_func_t *done, void *priv, enum zio_flag flags);
 
 extern zio_t *zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
-    zio_done_func_t *done, void *private, zio_priority_t priority,
+    zio_done_func_t *done, void *priv, zio_priority_t priority,
     enum zio_flag flags, enum trim_flag trim_flags);
 
 extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, struct abd *data, int checksum,
-    zio_done_func_t *done, void *private, zio_priority_t priority,
+    zio_done_func_t *done, void *priv, zio_priority_t priority,
     enum zio_flag flags, boolean_t labels);
 
 extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, struct abd *data, int checksum,
-    zio_done_func_t *done, void *private, zio_priority_t priority,
+    zio_done_func_t *done, void *priv, zio_priority_t priority,
     enum zio_flag flags, boolean_t labels);
 
 extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
@@ -583,8 +572,8 @@
 
 extern int zio_wait(zio_t *zio);
 extern void zio_nowait(zio_t *zio);
-extern void zio_execute(zio_t *zio);
-extern void zio_interrupt(zio_t *zio);
+extern void zio_execute(void *zio);
+extern void zio_interrupt(void *zio);
 extern void zio_delay_init(zio_t *zio);
 extern void zio_delay_interrupt(zio_t *zio);
 extern void zio_deadman(zio_t *zio, char *tag);
@@ -608,11 +597,11 @@
 extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
     uint64_t offset, struct abd *data, uint64_t size, int type,
     zio_priority_t priority, enum zio_flag flags,
-    zio_done_func_t *done, void *private);
+    zio_done_func_t *done, void *priv);
 
 extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
     struct abd *data, uint64_t size, zio_type_t type, zio_priority_t priority,
-    enum zio_flag flags, zio_done_func_t *done, void *private);
+    enum zio_flag flags, zio_done_func_t *done, void *priv);
 
 extern void zio_vdev_io_bypass(zio_t *zio);
 extern void zio_vdev_io_reissue(zio_t *zio);
@@ -629,11 +618,16 @@
     enum zio_checksum child, enum zio_checksum parent);
 extern enum zio_compress zio_compress_select(spa_t *spa,
     enum zio_compress child, enum zio_compress parent);
+extern uint8_t zio_complevel_select(spa_t *spa, enum zio_compress compress,
+    uint8_t child, uint8_t parent);
 
 extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
 extern int zio_resume(spa_t *spa);
 extern void zio_resume_wait(spa_t *spa);
 
+extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
+    boolean_t config_held, enum blk_verify_flag blk_verify);
+
 /*
  * Initial setup and teardown.
  */
@@ -664,9 +658,9 @@
 /*
  * Checksum ereport functions
  */
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+extern int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
     const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
-    uint64_t length, void *arg, struct zio_bad_cksum *info);
+    uint64_t length, struct zio_bad_cksum *info);
 extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
     const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
 
@@ -678,12 +672,18 @@
     uint64_t length, const abd_t *good_data, const abd_t *bad_data,
     struct zio_bad_cksum *info);
 
+void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr);
+extern void zfs_ereport_snapshot_post(const char *subclass, spa_t *spa,
+    const char *name);
+
 /* Called from spa_sync(), but primarily an injection handler */
 extern void spa_handle_ignored_writes(spa_t *spa);
 
 /* zbookmark_phys functions */
 boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp,
     const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
+boolean_t zbookmark_subtree_tbd(const struct dnode_phys *dnp,
+    const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
 int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2,
     uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
 

diff --git a/zfs/include/sys/zio_compress.h b/zfs/include/sys/zio_compress.h
index 208117e..4a22ad2 100644
--- a/zfs/include/sys/zio_compress.h
+++ b/zfs/include/sys/zio_compress.h

@@ -21,6 +21,8 @@
 
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2019, Klara Inc.
  * Use is subject to license terms.
  * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
  */
@@ -51,15 +53,86 @@
 	ZIO_COMPRESS_GZIP_9,
 	ZIO_COMPRESS_ZLE,
 	ZIO_COMPRESS_LZ4,
+	ZIO_COMPRESS_ZSTD,
 	ZIO_COMPRESS_FUNCTIONS
 };
 
+/* Compression algorithms that have levels */
+#define	ZIO_COMPRESS_HASLEVEL(compress)	((compress == ZIO_COMPRESS_ZSTD || \
+					(compress >= ZIO_COMPRESS_GZIP_1 && \
+					compress <= ZIO_COMPRESS_GZIP_9)))
+
+#define	ZIO_COMPLEVEL_INHERIT	0
+#define	ZIO_COMPLEVEL_DEFAULT	255
+
+enum zio_zstd_levels {
+	ZIO_ZSTD_LEVEL_INHERIT = 0,
+	ZIO_ZSTD_LEVEL_1,
+#define	ZIO_ZSTD_LEVEL_MIN	ZIO_ZSTD_LEVEL_1
+	ZIO_ZSTD_LEVEL_2,
+	ZIO_ZSTD_LEVEL_3,
+#define	ZIO_ZSTD_LEVEL_DEFAULT	ZIO_ZSTD_LEVEL_3
+	ZIO_ZSTD_LEVEL_4,
+	ZIO_ZSTD_LEVEL_5,
+	ZIO_ZSTD_LEVEL_6,
+	ZIO_ZSTD_LEVEL_7,
+	ZIO_ZSTD_LEVEL_8,
+	ZIO_ZSTD_LEVEL_9,
+	ZIO_ZSTD_LEVEL_10,
+	ZIO_ZSTD_LEVEL_11,
+	ZIO_ZSTD_LEVEL_12,
+	ZIO_ZSTD_LEVEL_13,
+	ZIO_ZSTD_LEVEL_14,
+	ZIO_ZSTD_LEVEL_15,
+	ZIO_ZSTD_LEVEL_16,
+	ZIO_ZSTD_LEVEL_17,
+	ZIO_ZSTD_LEVEL_18,
+	ZIO_ZSTD_LEVEL_19,
+#define	ZIO_ZSTD_LEVEL_MAX	ZIO_ZSTD_LEVEL_19
+	ZIO_ZSTD_LEVEL_RESERVE = 101, /* Leave room for new positive levels */
+	ZIO_ZSTD_LEVEL_FAST, /* Fast levels are negative */
+	ZIO_ZSTD_LEVEL_FAST_1,
+#define	ZIO_ZSTD_LEVEL_FAST_DEFAULT	ZIO_ZSTD_LEVEL_FAST_1
+	ZIO_ZSTD_LEVEL_FAST_2,
+	ZIO_ZSTD_LEVEL_FAST_3,
+	ZIO_ZSTD_LEVEL_FAST_4,
+	ZIO_ZSTD_LEVEL_FAST_5,
+	ZIO_ZSTD_LEVEL_FAST_6,
+	ZIO_ZSTD_LEVEL_FAST_7,
+	ZIO_ZSTD_LEVEL_FAST_8,
+	ZIO_ZSTD_LEVEL_FAST_9,
+	ZIO_ZSTD_LEVEL_FAST_10,
+	ZIO_ZSTD_LEVEL_FAST_20,
+	ZIO_ZSTD_LEVEL_FAST_30,
+	ZIO_ZSTD_LEVEL_FAST_40,
+	ZIO_ZSTD_LEVEL_FAST_50,
+	ZIO_ZSTD_LEVEL_FAST_60,
+	ZIO_ZSTD_LEVEL_FAST_70,
+	ZIO_ZSTD_LEVEL_FAST_80,
+	ZIO_ZSTD_LEVEL_FAST_90,
+	ZIO_ZSTD_LEVEL_FAST_100,
+	ZIO_ZSTD_LEVEL_FAST_500,
+	ZIO_ZSTD_LEVEL_FAST_1000,
+#define	ZIO_ZSTD_LEVEL_FAST_MAX	ZIO_ZSTD_LEVEL_FAST_1000
+	ZIO_ZSTD_LEVEL_AUTO = 251, /* Reserved for future use */
+	ZIO_ZSTD_LEVEL_LEVELS
+};
+
+/* Forward Declaration to avoid visibility problems */
+struct zio_prop;
+
 /* Common signature for all zio compress functions. */
 typedef size_t zio_compress_func_t(void *src, void *dst,
     size_t s_len, size_t d_len, int);
 /* Common signature for all zio decompress functions. */
 typedef int zio_decompress_func_t(void *src, void *dst,
     size_t s_len, size_t d_len, int);
+/* Common signature for all zio decompress and get level functions. */
+typedef int zio_decompresslevel_func_t(void *src, void *dst,
+    size_t s_len, size_t d_len, uint8_t *level);
+/* Common signature for all zio get-compression-level functions. */
+typedef int zio_getlevel_func_t(void *src, size_t s_len, uint8_t *level);
+
 
 /*
  * Common signature for all zio decompress functions using an ABD as input.
@@ -76,6 +149,7 @@
 	int				ci_level;
 	zio_compress_func_t		*ci_compress;
 	zio_decompress_func_t		*ci_decompress;
+	zio_decompresslevel_func_t	*ci_decompress_level;
 } zio_compress_info_t;
 
 extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
@@ -110,11 +184,12 @@
  * Compress and decompress data if necessary.
  */
 extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void *dst,
-    size_t s_len);
+    size_t s_len, uint8_t level);
 extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
-    size_t s_len, size_t d_len);
+    size_t s_len, size_t d_len, uint8_t *level);
 extern int zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
-    size_t s_len, size_t d_len);
+    size_t s_len, size_t d_len, uint8_t *level);
+extern int zio_compress_to_feature(enum zio_compress comp);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/include/sys/zio_crypt.h b/zfs/include/sys/zio_crypt.h
index a029127..d7a63d6 100644
--- a/zfs/include/sys/zio_crypt.h
+++ b/zfs/include/sys/zio_crypt.h

@@ -21,8 +21,12 @@
 #define	_SYS_ZIO_CRYPT_H
 
 #include <sys/dmu.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
+#if defined(__FreeBSD__) && defined(_KERNEL)
+#include <sys/freebsd_crypto.h>
+#else
 #include <sys/crypto/api.h>
+#endif /* __FreeBSD__ */
 #include <sys/nvpair.h>
 #include <sys/avl.h>
 #include <sys/zio.h>
@@ -47,8 +51,15 @@
 /* table of supported crypto algorithms, modes and keylengths. */
 typedef struct zio_crypt_info {
 	/* mechanism name, needed by ICP */
+#if defined(__FreeBSD__) && defined(_KERNEL)
+	/*
+	 * I've deliberately used a different name here, to catch
+	 * ICP-using code.
+	 */
+	const char	*ci_algname;
+#else
 	crypto_mech_name_t ci_mechname;
-
+#endif
 	/* cipher mode type (GCM, CCM) */
 	zio_crypt_type_t ci_crypt_type;
 
@@ -90,8 +101,13 @@
 	/* illumos crypto api current encryption key */
 	crypto_key_t zk_current_key;
 
+#if defined(__FreeBSD__) && defined(_KERNEL)
+	/* Session for current encryption key.  Must always be set */
+	freebsd_crypt_session_t	zk_session;
+#else
 	/* template of current encryption key for illumos crypto api */
 	crypto_ctx_template_t zk_current_tmpl;
+#endif
 
 	/* illumos crypto api current hmac key */
 	crypto_key_t zk_hmac_key;

diff --git a/zfs/include/sys/zio_impl.h b/zfs/include/sys/zio_impl.h
index 8ca1246..4c99857 100644
--- a/zfs/include/sys/zio_impl.h
+++ b/zfs/include/sys/zio_impl.h

@@ -73,9 +73,9 @@
  * the supported transformations:
  *
  * Compression:
- * ZFS supports three different flavors of compression -- gzip, lzjb, and
- * zle. Compression occurs as part of the write pipeline and is performed
- * in the ZIO_STAGE_WRITE_BP_INIT stage.
+ * ZFS supports five different flavors of compression -- gzip, lzjb, lz4, zle,
+ * and zstd. Compression occurs as part of the write pipeline and is
+ * performed in the ZIO_STAGE_WRITE_BP_INIT stage.
  *
  * Dedup:
  * Dedup reads are handled by the ZIO_STAGE_DDT_READ_START and

diff --git a/zfs/include/sys/zio_priority.h b/zfs/include/sys/zio_priority.h
index 0b42290..2d8e7fc 100644
--- a/zfs/include/sys/zio_priority.h
+++ b/zfs/include/sys/zio_priority.h

@@ -31,6 +31,7 @@
 	ZIO_PRIORITY_REMOVAL,		/* reads/writes for vdev removal */
 	ZIO_PRIORITY_INITIALIZING,	/* initializing I/O */
 	ZIO_PRIORITY_TRIM,		/* trim I/O (discard) */
+	ZIO_PRIORITY_REBUILD,		/* reads/writes for vdev rebuild */
 	ZIO_PRIORITY_NUM_QUEUEABLE,
 	ZIO_PRIORITY_NOW,		/* non-queued i/os (e.g. free) */
 } zio_priority_t;

diff --git a/zfs/include/sys/zpl.h b/zfs/include/sys/zpl.h
deleted file mode 100644
index f88ccd5..0000000
--- a/zfs/include/sys/zpl.h
+++ /dev/null

@@ -1,201 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- */
-
-#ifndef	_SYS_ZPL_H
-#define	_SYS_ZPL_H
-
-#include <sys/mntent.h>
-#include <sys/vfs.h>
-#include <linux/aio.h>
-#include <linux/dcache_compat.h>
-#include <linux/exportfs.h>
-#include <linux/falloc.h>
-#include <linux/parser.h>
-#include <linux/task_io_accounting_ops.h>
-#include <linux/vfs_compat.h>
-#include <linux/writeback.h>
-#include <linux/xattr_compat.h>
-
-/* zpl_inode.c */
-extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
-    zpl_umode_t mode, cred_t *cr);
-
-extern const struct inode_operations zpl_inode_operations;
-extern const struct inode_operations zpl_dir_inode_operations;
-extern const struct inode_operations zpl_symlink_inode_operations;
-extern const struct inode_operations zpl_special_inode_operations;
-extern dentry_operations_t zpl_dentry_operations;
-
-/* zpl_file.c */
-extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
-    size_t len, loff_t *ppos, uio_seg_t segment, int flags,
-    cred_t *cr);
-extern ssize_t zpl_write_common(struct inode *ip, const char *buf,
-    size_t len, loff_t *ppos, uio_seg_t segment, int flags,
-    cred_t *cr);
-#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
-extern long zpl_fallocate_common(struct inode *ip, int mode,
-    loff_t offset, loff_t len);
-#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
-
-extern const struct address_space_operations zpl_address_space_operations;
-extern const struct file_operations zpl_file_operations;
-extern const struct file_operations zpl_dir_file_operations;
-
-/* zpl_super.c */
-extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
-
-extern const struct super_operations zpl_super_operations;
-extern const struct export_operations zpl_export_operations;
-extern struct file_system_type zpl_fs_type;
-
-/* zpl_xattr.c */
-extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size);
-extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
-    const struct qstr *qstr);
-#if defined(CONFIG_FS_POSIX_ACL)
-extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type);
-extern struct posix_acl *zpl_get_acl(struct inode *ip, int type);
-#if !defined(HAVE_GET_ACL)
-#if defined(HAVE_CHECK_ACL_WITH_FLAGS)
-extern int zpl_check_acl(struct inode *inode, int mask, unsigned int flags);
-#elif defined(HAVE_CHECK_ACL)
-extern int zpl_check_acl(struct inode *inode, int mask);
-#elif defined(HAVE_PERMISSION_WITH_NAMEIDATA)
-extern int zpl_permission(struct inode *ip, int mask, struct nameidata *nd);
-#elif defined(HAVE_PERMISSION)
-extern int zpl_permission(struct inode *ip, int mask);
-#endif /*  HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* HAVE_GET_ACL */
-
-extern int zpl_init_acl(struct inode *ip, struct inode *dir);
-extern int zpl_chmod_acl(struct inode *ip);
-#else
-static inline int
-zpl_init_acl(struct inode *ip, struct inode *dir)
-{
-	return (0);
-}
-
-static inline int
-zpl_chmod_acl(struct inode *ip)
-{
-	return (0);
-}
-#endif /* CONFIG_FS_POSIX_ACL */
-
-extern xattr_handler_t *zpl_xattr_handlers[];
-
-/* zpl_ctldir.c */
-extern const struct file_operations zpl_fops_root;
-extern const struct inode_operations zpl_ops_root;
-
-extern const struct file_operations zpl_fops_snapdir;
-extern const struct inode_operations zpl_ops_snapdir;
-#ifdef HAVE_AUTOMOUNT
-extern const struct dentry_operations zpl_dops_snapdirs;
-#else
-extern const struct inode_operations zpl_ops_snapdirs;
-#endif /* HAVE_AUTOMOUNT */
-
-extern const struct file_operations zpl_fops_shares;
-extern const struct inode_operations zpl_ops_shares;
-
-#if defined(HAVE_VFS_ITERATE) || defined(HAVE_VFS_ITERATE_SHARED)
-
-#define	ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
-	.actor = _actor,				\
-	.pos = _pos,					\
-}
-
-typedef struct dir_context zpl_dir_context_t;
-
-#define	zpl_dir_emit		dir_emit
-#define	zpl_dir_emit_dot	dir_emit_dot
-#define	zpl_dir_emit_dotdot	dir_emit_dotdot
-#define	zpl_dir_emit_dots	dir_emit_dots
-
-#else
-
-typedef struct zpl_dir_context {
-	void *dirent;
-	const filldir_t actor;
-	loff_t pos;
-} zpl_dir_context_t;
-
-#define	ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
-	.dirent = _dirent,				\
-	.actor = _actor,				\
-	.pos = _pos,					\
-}
-
-static inline bool
-zpl_dir_emit(zpl_dir_context_t *ctx, const char *name, int namelen,
-    uint64_t ino, unsigned type)
-{
-	return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type));
-}
-
-static inline bool
-zpl_dir_emit_dot(struct file *file, zpl_dir_context_t *ctx)
-{
-	return (ctx->actor(ctx->dirent, ".", 1, ctx->pos,
-	    file_inode(file)->i_ino, DT_DIR) == 0);
-}
-
-static inline bool
-zpl_dir_emit_dotdot(struct file *file, zpl_dir_context_t *ctx)
-{
-	return (ctx->actor(ctx->dirent, "..", 2, ctx->pos,
-	    parent_ino(file_dentry(file)), DT_DIR) == 0);
-}
-
-static inline bool
-zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
-{
-	if (ctx->pos == 0) {
-		if (!zpl_dir_emit_dot(file, ctx))
-			return (false);
-		ctx->pos = 1;
-	}
-	if (ctx->pos == 1) {
-		if (!zpl_dir_emit_dotdot(file, ctx))
-			return (false);
-		ctx->pos = 2;
-	}
-	return (true);
-}
-#endif /* HAVE_VFS_ITERATE */
-
-#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
-#define	zpl_inode_timestamp_truncate(ts, ip)	timestamp_truncate(ts, ip)
-#elif defined(HAVE_INODE_TIMESPEC64_TIMES)
-#define	zpl_inode_timestamp_truncate(ts, ip)	\
-	timespec64_trunc(ts, (ip)->i_sb->s_time_gran)
-#else
-#define	zpl_inode_timestamp_truncate(ts, ip)	\
-	timespec_trunc(ts, (ip)->i_sb->s_time_gran)
-#endif
-
-#endif	/* _SYS_ZPL_H */

diff --git a/zfs/include/sys/zrlock.h b/zfs/include/sys/zrlock.h
index b6eba1a..c8c656d 100644
--- a/zfs/include/sys/zrlock.h
+++ b/zfs/include/sys/zrlock.h

@@ -34,9 +34,8 @@
 
 typedef struct zrlock {
 	kmutex_t zr_mtx;
-	volatile int32_t zr_refcount;
 	kcondvar_t zr_cv;
-	uint16_t zr_pad;
+	volatile int32_t zr_refcount;
 #ifdef	ZFS_DEBUG
 	kthread_t *zr_owner;
 	const char *zr_caller;

diff --git a/zfs/include/sys/zstd/Makefile.am b/zfs/include/sys/zstd/Makefile.am
new file mode 100644
index 0000000..16666fe
--- /dev/null
+++ b/zfs/include/sys/zstd/Makefile.am

@@ -0,0 +1,18 @@
+COMMON_H = \
+	$(top_srcdir)/include/sys/zstd/zstd.h
+
+KERNEL_H =
+
+USER_H =
+
+EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+
+if CONFIG_USER
+libzfsdir = $(includedir)/libzfs/sys/zstd
+libzfs_HEADERS = $(COMMON_H) $(USER_H)
+endif
+
+if CONFIG_KERNEL
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/zstd
+kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+endif

diff --git a/zfs/include/sys/zstd/zstd.h b/zfs/include/sys/zstd/zstd.h
new file mode 100644
index 0000000..ca32a74
--- /dev/null
+++ b/zfs/include/sys/zstd/zstd.h

@@ -0,0 +1,229 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016-2018, Klara Inc.
+ * Copyright (c) 2016-2018, Allan Jude
+ * Copyright (c) 2018-2020, Sebastian Gottschall
+ * Copyright (c) 2019-2020, Michael Niewöhner
+ * Copyright (c) 2020, The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ *     under sponsorship from the FreeBSD Foundation.
+ */
+
+#ifndef	_ZFS_ZSTD_H
+#define	_ZFS_ZSTD_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * ZSTD block header
+ * NOTE: all fields in this header are in big endian order.
+ */
+typedef struct zfs_zstd_header {
+	/* Compressed size of data */
+	uint32_t c_len;
+
+	/*
+	 * Version and compression level
+	 * We used to use a union to reference compression level
+	 * and version easily, but as it turns out, relying on the
+	 * ordering of bitfields is not remotely portable.
+	 * So now we have get/set functions in zfs_zstd.c for
+	 * manipulating this in just the right way forever.
+	 */
+	uint32_t raw_version_level;
+	char data[];
+} zfs_zstdhdr_t;
+
+/*
+ * Simple struct to pass the data from raw_version_level around.
+ */
+typedef struct zfs_zstd_meta {
+	uint8_t level;
+	uint32_t version;
+} zfs_zstdmeta_t;
+
+/*
+ * kstat helper macros
+ */
+#define	ZSTDSTAT(stat)		(zstd_stats.stat.value.ui64)
+#define	ZSTDSTAT_ADD(stat, val) \
+	atomic_add_64(&zstd_stats.stat.value.ui64, (val))
+#define	ZSTDSTAT_SUB(stat, val) \
+	atomic_sub_64(&zstd_stats.stat.value.ui64, (val))
+#define	ZSTDSTAT_BUMP(stat)	ZSTDSTAT_ADD(stat, 1)
+
+/* (de)init for user space / kernel emulation */
+int zstd_init(void);
+void zstd_fini(void);
+
+size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
+    size_t d_len, int level);
+int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
+int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
+    size_t d_len, uint8_t *level);
+int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len,
+    size_t d_len, int n);
+void zfs_zstd_cache_reap_now(void);
+
+/*
+ * So, the reason we have all these complicated set/get functions is that
+ * originally, in the zstd "header" we wrote out to disk, we used a 32-bit
+ * bitfield to store the "level" (8 bits) and "version" (24 bits).
+ *
+ * Unfortunately, bitfields make few promises about how they're arranged in
+ * memory...
+ *
+ * By way of example, if we were using version 1.4.5 and level 3, it'd be
+ * level = 0x03, version = 10405/0x0028A5, which gets broken into Vhigh = 0x00,
+ * Vmid = 0x28, Vlow = 0xA5. We include these positions below to help follow
+ * which data winds up where.
+ *
+ * As a consequence, we wound up with little endian platforms with a layout
+ * like this in memory:
+ *
+ *      0       8      16      24      32
+ *      +-------+-------+-------+-------+
+ *      | Vlow  | Vmid  | Vhigh | level |
+ *      +-------+-------+-------+-------+
+ *        =A5     =28     =00     =03
+ *
+ * ...and then, after being run through BE_32(), serializing this out to
+ * disk:
+ *
+ *      0       8      16      24      32
+ *      +-------+-------+-------+-------+
+ *      | level | Vhigh | Vmid  | Vlow  |
+ *      +-------+-------+-------+-------+
+ *        =03     =00     =28     =A5
+ *
+ * while on big-endian systems, since BE_32() is a noop there, both in
+ * memory and on disk, we wind up with:
+ *
+ *      0       8      16      24      32
+ *      +-------+-------+-------+-------+
+ *      | Vhigh | Vmid  | Vlow  | level |
+ *      +-------+-------+-------+-------+
+ *        =00     =28     =A5     =03
+ *
+ * (Vhigh is always 0 until version exceeds 6.55.35. Vmid and Vlow are the
+ * other two bytes of the "version" data.)
+ *
+ * So now we use the BF32_SET macros to get consistent behavior (the
+ * ondisk LE encoding, since x86 currently rules the world) across
+ * platforms, but the "get" behavior requires that we check each of the
+ * bytes in the aforementioned former-bitfield for 0x00, and from there,
+ * we can know which possible layout we're dealing with. (Only the two
+ * that have been observed in the wild are illustrated above, but handlers
+ * for all 4 positions of 0x00 are implemented.
+ */
+
+static inline void
+zfs_get_hdrmeta(const zfs_zstdhdr_t *blob, zfs_zstdmeta_t *res)
+{
+	uint32_t raw = blob->raw_version_level;
+	uint8_t findme = 0xff;
+	int shift;
+	for (shift = 0; shift < 4; shift++) {
+		findme = BF32_GET(raw, 8*shift, 8);
+		if (findme == 0)
+			break;
+	}
+	switch (shift) {
+	case 0:
+		res->level = BF32_GET(raw, 24, 8);
+		res->version = BSWAP_32(raw);
+		res->version = BF32_GET(res->version, 8, 24);
+		break;
+	case 1:
+		res->level = BF32_GET(raw, 0, 8);
+		res->version = BSWAP_32(raw);
+		res->version = BF32_GET(res->version, 0, 24);
+		break;
+	case 2:
+		res->level = BF32_GET(raw, 24, 8);
+		res->version = BF32_GET(raw, 0, 24);
+		break;
+	case 3:
+		res->level = BF32_GET(raw, 0, 8);
+		res->version = BF32_GET(raw, 8, 24);
+		break;
+	default:
+		res->level = 0;
+		res->version = 0;
+		break;
+	}
+}
+
+static inline uint8_t
+zfs_get_hdrlevel(const zfs_zstdhdr_t *blob)
+{
+	uint8_t level = 0;
+	zfs_zstdmeta_t res;
+	zfs_get_hdrmeta(blob, &res);
+	level = res.level;
+	return (level);
+}
+
+static inline uint32_t
+zfs_get_hdrversion(const zfs_zstdhdr_t *blob)
+{
+	uint32_t version = 0;
+	zfs_zstdmeta_t res;
+	zfs_get_hdrmeta(blob, &res);
+	version = res.version;
+	return (version);
+
+}
+
+static inline void
+zfs_set_hdrversion(zfs_zstdhdr_t *blob, uint32_t version)
+{
+	/* cppcheck-suppress syntaxError */
+	BF32_SET(blob->raw_version_level, 0, 24, version);
+}
+
+static inline void
+zfs_set_hdrlevel(zfs_zstdhdr_t *blob, uint8_t level)
+{
+	/* cppcheck-suppress syntaxError */
+	BF32_SET(blob->raw_version_level, 24, 8, level);
+}
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _ZFS_ZSTD_H */

diff --git a/zfs/include/sys/zthr.h b/zfs/include/sys/zthr.h
index 33c218e..4881b45 100644
--- a/zfs/include/sys/zthr.h
+++ b/zfs/include/sys/zthr.h

@@ -24,16 +24,21 @@
 typedef void (zthr_func_t)(void *, zthr_t *);
 typedef boolean_t (zthr_checkfunc_t)(void *, zthr_t *);
 
-extern zthr_t *zthr_create(zthr_checkfunc_t checkfunc,
-    zthr_func_t *func, void *arg);
-extern zthr_t *zthr_create_timer(zthr_checkfunc_t *checkfunc,
-    zthr_func_t *func, void *arg, hrtime_t nano_wait);
+extern zthr_t *zthr_create(const char *zthr_name,
+    zthr_checkfunc_t checkfunc, zthr_func_t *func, void *arg,
+	pri_t pri);
+extern zthr_t *zthr_create_timer(const char *zthr_name,
+    zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg,
+	hrtime_t nano_wait, pri_t pri);
 extern void zthr_destroy(zthr_t *t);
 
 extern void zthr_wakeup(zthr_t *t);
 extern void zthr_cancel(zthr_t *t);
 extern void zthr_resume(zthr_t *t);
+extern void zthr_wait_cycle_done(zthr_t *t);
 
 extern boolean_t zthr_iscancelled(zthr_t *t);
+extern boolean_t zthr_iscurthread(zthr_t *t);
+extern boolean_t zthr_has_waiters(zthr_t *t);
 
 #endif /* _SYS_ZTHR_H */

diff --git a/zfs/include/sys/zvol.h b/zfs/include/sys/zvol.h
index e8b0847..8efb7f5 100644
--- a/zfs/include/sys/zvol.h
+++ b/zfs/include/sys/zvol.h

@@ -35,28 +35,31 @@
 #define	SPEC_MAXOFFSET_T	((1LL << ((NBBY * sizeof (daddr32_t)) + \
 				DEV_BSHIFT - 1)) - 1)
 
-extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async);
-extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async);
-extern void zvol_rename_minors(spa_t *spa, const char *oldname,
-    const char *newname, boolean_t async);
+extern void zvol_create_minor(const char *);
+extern void zvol_create_minors_recursive(const char *);
+extern void zvol_remove_minors(spa_t *, const char *, boolean_t);
+extern void zvol_rename_minors(spa_t *, const char *, const char *, boolean_t);
 
 #ifdef _KERNEL
-typedef struct zvol_state zvol_state_t;
+struct zvol_state;
+typedef struct zvol_state zvol_state_handle_t;
 
-extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
-extern int zvol_check_volblocksize(const char *name, uint64_t volblocksize);
-extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
+extern int zvol_check_volsize(uint64_t, uint64_t);
+extern int zvol_check_volblocksize(const char *, uint64_t);
+extern int zvol_get_stats(objset_t *, nvlist_t *);
 extern boolean_t zvol_is_zvol(const char *);
-extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
+extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *);
 extern int zvol_set_volsize(const char *, uint64_t);
 extern int zvol_set_volblocksize(const char *, uint64_t);
 extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
 extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t);
-extern zvol_state_t *zvol_suspend(const char *);
-extern int zvol_resume(zvol_state_t *);
-extern void *zvol_tag(zvol_state_t *);
+extern zvol_state_handle_t *zvol_suspend(const char *);
+extern int zvol_resume(zvol_state_handle_t *);
+extern void *zvol_tag(zvol_state_handle_t *);
 
 extern int zvol_init(void);
 extern void zvol_fini(void);
+extern int zvol_busy(void);
+
 #endif /* _KERNEL */
 #endif /* _SYS_ZVOL_H */

diff --git a/zfs/include/sys/zvol_impl.h b/zfs/include/sys/zvol_impl.h
new file mode 100644
index 0000000..89fe598
--- /dev/null
+++ b/zfs/include/sys/zvol_impl.h

@@ -0,0 +1,110 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef	_SYS_ZVOL_IMPL_H
+#define	_SYS_ZVOL_IMPL_H
+
+#include <sys/zfs_context.h>
+
+#define	ZVOL_RDONLY	0x1
+/*
+ * Whether the zvol has been written to (as opposed to ZVOL_RDONLY, which
+ * specifies whether or not the zvol _can_ be written to)
+ */
+#define	ZVOL_WRITTEN_TO	0x2
+
+#define	ZVOL_DUMPIFIED	0x4
+
+#define	ZVOL_EXCL	0x8
+
+/*
+ * The in-core state of each volume.
+ */
+typedef struct zvol_state {
+	char			zv_name[MAXNAMELEN];	/* name */
+	uint64_t		zv_volsize;		/* advertised space */
+	uint64_t		zv_volblocksize;	/* volume block size */
+	objset_t		*zv_objset;	/* objset handle */
+	uint32_t		zv_flags;	/* ZVOL_* flags */
+	uint32_t		zv_open_count;	/* open counts */
+	uint32_t		zv_changed;	/* disk changed */
+	uint32_t		zv_volmode;	/* volmode */
+	zilog_t			*zv_zilog;	/* ZIL handle */
+	zfs_rangelock_t		zv_rangelock;	/* for range locking */
+	dnode_t			*zv_dn;		/* dnode hold */
+	dataset_kstats_t	zv_kstat;	/* zvol kstats */
+	list_node_t		zv_next;	/* next zvol_state_t linkage */
+	uint64_t		zv_hash;	/* name hash */
+	struct hlist_node	zv_hlink;	/* hash link */
+	kmutex_t		zv_state_lock;	/* protects zvol_state_t */
+	atomic_t		zv_suspend_ref;	/* refcount for suspend */
+	krwlock_t		zv_suspend_lock;	/* suspend lock */
+	struct zvol_state_os	*zv_zso;	/* private platform state */
+} zvol_state_t;
+
+
+extern list_t zvol_state_list;
+extern krwlock_t zvol_state_lock;
+#define	ZVOL_HT_SIZE	1024
+extern struct hlist_head *zvol_htable;
+#define	ZVOL_HT_HEAD(hash)	(&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
+extern zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE];
+
+extern unsigned int zvol_volmode;
+extern unsigned int zvol_inhibit_dev;
+
+/*
+ * platform independent functions exported to platform code
+ */
+zvol_state_t *zvol_find_by_name_hash(const char *name,
+    uint64_t hash, int mode);
+int zvol_first_open(zvol_state_t *zv, boolean_t readonly);
+uint64_t zvol_name_hash(const char *name);
+void zvol_remove_minors_impl(const char *name);
+void zvol_last_close(zvol_state_t *zv);
+void zvol_insert(zvol_state_t *zv);
+void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
+    uint64_t len, boolean_t sync);
+void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
+    uint64_t size, int sync);
+int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio);
+int zvol_init_impl(void);
+void zvol_fini_impl(void);
+void zvol_wait_close(zvol_state_t *zv);
+
+/*
+ * platform dependent functions exported to platform independent code
+ */
+typedef struct zvol_platform_ops {
+	void (*zv_free)(zvol_state_t *);
+	void (*zv_rename_minor)(zvol_state_t *, const char *);
+	int (*zv_create_minor)(const char *);
+	int (*zv_update_volsize)(zvol_state_t *, uint64_t);
+	boolean_t (*zv_is_zvol)(const char *);
+	void (*zv_clear_private)(zvol_state_t *);
+	void (*zv_set_disk_ro)(zvol_state_t *, int flags);
+	void (*zv_set_capacity)(zvol_state_t *, uint64_t capacity);
+} zvol_platform_ops_t;
+
+void zvol_register_ops(const zvol_platform_ops_t *ops);
+
+#endif

diff --git a/zfs/include/zfeature_common.h b/zfs/include/zfeature_common.h
index 4e75fe7..76dd7ed 100644
--- a/zfs/include/zfeature_common.h
+++ b/zfs/include/zfeature_common.h

@@ -67,6 +67,14 @@
 	SPA_FEATURE_ALLOCATION_CLASSES,
 	SPA_FEATURE_RESILVER_DEFER,
 	SPA_FEATURE_BOOKMARK_V2,
+	SPA_FEATURE_REDACTION_BOOKMARKS,
+	SPA_FEATURE_REDACTED_DATASETS,
+	SPA_FEATURE_BOOKMARK_WRITTEN,
+	SPA_FEATURE_LOG_SPACEMAP,
+	SPA_FEATURE_LIVELIST,
+	SPA_FEATURE_DEVICE_REBUILD,
+	SPA_FEATURE_ZSTD_COMPRESS,
+	SPA_FEATURE_DRAID,
 	SPA_FEATURES
 } spa_feature_t;
 

diff --git a/zfs/include/zfs_comutil.h b/zfs/include/zfs_comutil.h
index 7cdc6d6..17b07d9 100644
--- a/zfs/include/zfs_comutil.h
+++ b/zfs/include/zfs_comutil.h

@@ -34,7 +34,7 @@
 #endif
 
 extern boolean_t zfs_allocatable_devs(nvlist_t *);
-extern boolean_t zfs_special_devs(nvlist_t *);
+extern boolean_t zfs_special_devs(nvlist_t *, char *);
 extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *);
 
 extern int zfs_zpl_version_map(int spa_version);

diff --git a/zfs/include/zfs_deleg.h b/zfs/include/zfs_deleg.h
index 32d6698..5738b2a 100644
--- a/zfs/include/zfs_deleg.h
+++ b/zfs/include/zfs_deleg.h

@@ -77,7 +77,6 @@
 	ZFS_DELEG_NOTE_PROJECTQUOTA,
 	ZFS_DELEG_NOTE_PROJECTOBJUSED,
 	ZFS_DELEG_NOTE_PROJECTOBJQUOTA,
-	ZFS_DELEG_NOTE_REMAP,
 	ZFS_DELEG_NOTE_NONE
 } zfs_deleg_note_t;
 

diff --git a/zfs/include/zfs_fletcher.h b/zfs/include/zfs_fletcher.h
index 5c7a61c..9e8b2cf 100644
--- a/zfs/include/zfs_fletcher.h
+++ b/zfs/include/zfs_fletcher.h

@@ -143,6 +143,10 @@
 extern const fletcher_4_ops_t fletcher_4_avx512f_ops;
 #endif
 
+#if defined(__x86_64) && defined(HAVE_AVX512BW)
+extern const fletcher_4_ops_t fletcher_4_avx512bw_ops;
+#endif
+
 #if defined(__aarch64__)
 extern const fletcher_4_ops_t fletcher_4_aarch64_neon_ops;
 #endif

diff --git a/zfs/include/zfs_namecheck.h b/zfs/include/zfs_namecheck.h
index 56d3d36..197c40b 100644
--- a/zfs/include/zfs_namecheck.h
+++ b/zfs/include/zfs_namecheck.h

@@ -46,6 +46,7 @@
 	NAME_ERR_SELF_REF,		/* reserved self path name ('.') */
 	NAME_ERR_PARENT_REF,		/* reserved parent path name ('..') */
 	NAME_ERR_NO_AT,			/* permission set is missing '@' */
+	NAME_ERR_NO_POUND, 		/* permission set is missing '#' */
 } namecheck_err_t;
 
 #define	ZFS_PERMSET_MAXLEN	64
@@ -56,6 +57,8 @@
 int pool_namecheck(const char *, namecheck_err_t *, char *);
 int entity_namecheck(const char *, namecheck_err_t *, char *);
 int dataset_namecheck(const char *, namecheck_err_t *, char *);
+int snapshot_namecheck(const char *, namecheck_err_t *, char *);
+int bookmark_namecheck(const char *, namecheck_err_t *, char *);
 int dataset_nestcheck(const char *);
 int mountpoint_namecheck(const char *, namecheck_err_t *);
 int zfs_component_namecheck(const char *, namecheck_err_t *, char *);

diff --git a/zfs/lib/Makefile.am b/zfs/lib/Makefile.am
index 8dff773..db7a3fa 100644
--- a/zfs/lib/Makefile.am
+++ b/zfs/lib/Makefile.am

@@ -1,7 +1,43 @@
 # NB: GNU Automake Manual, Chapter 8.3.5: Libtool Convenience Libraries
-# These six libraries are intermediary build components.
-SUBDIRS = libavl libefi libicp libshare libspl libtpool libzutil libunicode
+# These nine libraries are intermediary build components.
+SUBDIRS = libavl libicp libshare libspl libtpool libzstd
+CPPCHECKDIRS  = libavl libicp libnvpair libshare libspl libtpool libunicode
+CPPCHECKDIRS += libuutil libzfs libzfs_core libzfsbootenv libzpool libzutil
 
-# These four libraries, which are installed as the final build product,
-# incorporate the six convenience libraries given above.
-SUBDIRS += libuutil libnvpair libzpool libzfs_core libzfs
+if BUILD_LINUX
+SUBDIRS += libefi
+CPPCHECKDIRS += libefi
+endif
+
+# libnvpair is installed as part of the final build product
+# libzutil depends on it, so it must be compiled before libzutil
+SUBDIRS += libnvpair
+
+# libzutil depends on libefi if present
+SUBDIRS += libzutil libunicode
+
+# These five libraries, which are installed as the final build product,
+# incorporate the eight convenience libraries given above.
+DISTLIBS = libuutil libzfs_core libzfs libzpool libzfsbootenv
+SUBDIRS += $(DISTLIBS)
+DISTLIBS += libnvpair
+
+# An ABI is stored for each of these libraries.  Note that libzpool.so
+# is only linked against by ztest and zdb and no stable ABI is provided.
+ABILIBS = libnvpair libuutil libzfs_core libzfs libzfsbootenv
+
+PHONY = checkabi storeabi cppcheck
+checkabi: $(ABILIBS)
+	set -e ; for dir in $(ABILIBS) ; do \
+		$(MAKE) -C $$dir checkabi ; \
+	done
+
+storeabi: $(ABILIBS)
+	set -e ; for dir in $(ABILIBS) ; do \
+		$(MAKE) -C $$dir storeabi ; \
+	done
+
+cppcheck: $(CPPCHECKDIRS)
+	set -e ; for dir in $(CPPCHECKDIRS) ; do \
+		$(MAKE) -C $$dir cppcheck ; \
+	done

diff --git a/zfs/lib/libavl/Makefile.am b/zfs/lib/libavl/Makefile.am
index 82b30bd..de8ba34 100644
--- a/zfs/lib/libavl/Makefile.am
+++ b/zfs/lib/libavl/Makefile.am

@@ -5,19 +5,15 @@
 # Includes kernel code, generate warnings for large stack frames
 AM_CFLAGS += $(FRAME_LARGER_THAN)
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 noinst_LTLIBRARIES = libavl.la
 
-USER_C =
-
 KERNEL_C = \
 	avl.c
 
 nodist_libavl_la_SOURCES = \
-	$(USER_C) \
 	$(KERNEL_C)
 
-EXTRA_DIST = $(USER_C)
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libefi/Makefile.am b/zfs/lib/libefi/Makefile.am
index 9f69e46..5f77ac4 100644
--- a/zfs/lib/libefi/Makefile.am
+++ b/zfs/lib/libefi/Makefile.am

@@ -1,16 +1,17 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+AM_CFLAGS += $(LIBUUID_CFLAGS) $(ZLIB_CFLAGS)
+
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 noinst_LTLIBRARIES = libefi.la
 
 USER_C = \
 	rdwr_efi.c
 
-nodist_libefi_la_SOURCES = $(USER_C)
+libefi_la_SOURCES = $(USER_C)
 
-libefi_la_LIBADD = $(LIBUUID)
+libefi_la_LIBADD = $(LIBUUID_LIBS) $(ZLIB_LIBS)
 
-EXTRA_DIST = $(USER_C)
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libefi/rdwr_efi.c b/zfs/lib/libefi/rdwr_efi.c
index d880011..ca7a760 100644
--- a/zfs/lib/libefi/rdwr_efi.c
+++ b/zfs/lib/libefi/rdwr_efi.c

@@ -44,6 +44,7 @@
 #include <sys/byteorder.h>
 #include <sys/vdev_disk.h>
 #include <linux/fs.h>
+#include <linux/blkpg.h>
 
 static struct uuid_to_ptag {
 	struct uuid	uuid;
@@ -156,7 +157,7 @@
 
 #if defined(i386) || defined(__amd64) || defined(__arm) || \
     defined(__powerpc) || defined(__sparc) || defined(__s390__) || \
-    defined(__mips__)
+    defined(__mips__) || defined(__rv64g__)
 	{	V_BOOT,		V_UNMNT	},		/* i - 8 */
 	{	V_ALTSCTR,	0	},		/* j - 9 */
 
@@ -209,19 +210,40 @@
 	return (0);
 }
 
+/*
+ * Return back the device name associated with the file descriptor. The
+ * caller is responsible for freeing the memory associated with the
+ * returned string.
+ */
+static char *
+efi_get_devname(int fd)
+{
+	char *path;
+	char *dev_name;
+
+	path = calloc(1, PATH_MAX);
+	if (path == NULL)
+		return (NULL);
+
+	/*
+	 * The libefi API only provides the open fd and not the file path.
+	 * To handle this realpath(3) is used to resolve the block device
+	 * name from /proc/self/fd/<fd>.
+	 */
+	(void) sprintf(path, "/proc/self/fd/%d", fd);
+	dev_name = realpath(path, NULL);
+	free(path);
+	return (dev_name);
+}
+
 static int
 efi_get_info(int fd, struct dk_cinfo *dki_info)
 {
-	char *path;
 	char *dev_path;
 	int rval = 0;
 
 	memset(dki_info, 0, sizeof (*dki_info));
 
-	path = calloc(1, PATH_MAX);
-	if (path == NULL)
-		goto error;
-
 	/*
 	 * The simplest way to get the partition number under linux is
 	 * to parse it out of the /dev/<disk><partition> block device name.
@@ -229,16 +251,10 @@
 	 * populates /dev/ so it may be trusted.  The tricky bit here is
 	 * that the naming convention is based on the block device type.
 	 * So we need to take this in to account when parsing out the
-	 * partition information.  Another issue is that the libefi API
-	 * API only provides the open fd and not the file path.  To handle
-	 * this realpath(3) is used to resolve the block device name from
-	 * /proc/self/fd/<fd>.  Aside from the partition number we collect
+	 * partition information.  Aside from the partition number we collect
 	 * some additional device info.
 	 */
-	(void) sprintf(path, "/proc/self/fd/%d", fd);
-	dev_path = realpath(path, NULL);
-	free(path);
-
+	dev_path = efi_get_devname(fd);
 	if (dev_path == NULL)
 		goto error;
 
@@ -452,6 +468,7 @@
 		    (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1);
 		nparts = vptr->efi_nparts;
 		if ((tmp = realloc(vptr, length)) == NULL) {
+			/* cppcheck-suppress doubleFree */
 			free(vptr);
 			*vtoc = NULL;
 			return (VT_ERROR);
@@ -1108,20 +1125,49 @@
 	return (0);
 }
 
+static int
+call_blkpg_ioctl(int fd, int command, diskaddr_t start,
+    diskaddr_t size, uint_t pno)
+{
+	struct blkpg_ioctl_arg ioctl_arg;
+	struct blkpg_partition  linux_part;
+	memset(&linux_part, 0, sizeof (linux_part));
+
+	char *path = efi_get_devname(fd);
+	if (path == NULL) {
+		(void) fprintf(stderr, "failed to retrieve device name\n");
+		return (VT_EINVAL);
+	}
+
+	linux_part.start = start;
+	linux_part.length = size;
+	linux_part.pno = pno;
+	snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno);
+	linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0';
+	free(path);
+
+	ioctl_arg.op = command;
+	ioctl_arg.flags = 0;
+	ioctl_arg.datalen = sizeof (struct blkpg_partition);
+	ioctl_arg.data = &linux_part;
+
+	return (ioctl(fd, BLKPG, &ioctl_arg));
+}
+
 /*
  * add all the unallocated space to the current label
  */
 int
 efi_use_whole_disk(int fd)
 {
-	struct dk_gpt		*efi_label = NULL;
-	int			rval;
-	int			i;
-	uint_t			resv_index = 0, data_index = 0;
-	diskaddr_t		resv_start = 0, data_start = 0;
-	diskaddr_t		data_size, limit, difference;
-	boolean_t		sync_needed = B_FALSE;
-	uint_t			nblocks;
+	struct dk_gpt *efi_label = NULL;
+	int rval;
+	int i;
+	uint_t resv_index = 0, data_index = 0;
+	diskaddr_t resv_start = 0, data_start = 0;
+	diskaddr_t data_size, limit, difference;
+	boolean_t sync_needed = B_FALSE;
+	uint_t nblocks;
 
 	rval = efi_alloc_and_read(fd, &efi_label);
 	if (rval < 0) {
@@ -1255,19 +1301,73 @@
 	efi_label->efi_parts[resv_index].p_start += difference;
 	efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks;
 
-	rval = efi_write(fd, efi_label);
-	if (rval < 0) {
-		if (efi_debug) {
-			(void) fprintf(stderr,
-			    "efi_use_whole_disk:fail to write label, rval=%d\n",
-			    rval);
-		}
-		efi_free(efi_label);
-		return (rval);
+	/*
+	 * Rescanning the partition table in the kernel can result
+	 * in the device links to be removed (see comment in vdev_disk_open).
+	 * If BLKPG_RESIZE_PARTITION is available, then we can resize
+	 * the partition table online and avoid having to remove the device
+	 * links used by the pool. This provides a very deterministic
+	 * approach to resizing devices and does not require any
+	 * loops waiting for devices to reappear.
+	 */
+#ifdef BLKPG_RESIZE_PARTITION
+	/*
+	 * Delete the reserved partition since we're about to expand
+	 * the data partition and it would overlap with the reserved
+	 * partition.
+	 * NOTE: The starting index for the ioctl is 1 while for the
+	 * EFI partitions it's 0. For that reason we have to add one
+	 * whenever we make an ioctl call.
+	 */
+	rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1);
+	if (rval != 0)
+		goto out;
+
+	/*
+	 * Expand the data partition
+	 */
+	rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION,
+	    efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize,
+	    efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize,
+	    data_index + 1);
+	if (rval != 0) {
+		(void) fprintf(stderr, "Unable to resize data "
+		    "partition:  %d\n", rval);
+		/*
+		 * Since we failed to resize, we need to reset the start
+		 * of the reserve partition and re-create it.
+		 */
+		efi_label->efi_parts[resv_index].p_start -= difference;
 	}
 
+	/*
+	 * Re-add the reserved partition. If we've expanded the data partition
+	 * then we'll move the reserve partition to the end of the data
+	 * partition. Otherwise, we'll recreate the partition in its original
+	 * location. Note that we do this as best-effort and ignore any
+	 * errors that may arise here. This will ensure that we finish writing
+	 * the EFI label.
+	 */
+	(void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION,
+	    efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize,
+	    efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize,
+	    resv_index + 1);
+#endif
+
+	/*
+	 * We're now ready to write the EFI label.
+	 */
+	if (rval == 0) {
+		rval = efi_write(fd, efi_label);
+		if (rval < 0 && efi_debug) {
+			(void) fprintf(stderr, "efi_use_whole_disk:fail "
+			    "to write label, rval=%d\n", rval);
+		}
+	}
+
+out:
 	efi_free(efi_label);
-	return (0);
+	return (rval);
 }
 
 /*

diff --git a/zfs/lib/libicp/Makefile.am b/zfs/lib/libicp/Makefile.am
index b92a707..9a2510d 100644
--- a/zfs/lib/libicp/Makefile.am
+++ b/zfs/lib/libicp/Makefile.am

@@ -6,15 +6,12 @@
 
 # Includes kernel code, generate warnings for large stack frames
 AM_CFLAGS += $(FRAME_LARGER_THAN)
-
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/module/icp/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 noinst_LTLIBRARIES = libicp.la
 
-if TARGET_ASM_X86_64
+if TARGET_CPU_X86_64
 ASM_SOURCES_C = asm-x86_64/aes/aeskey.c
 ASM_SOURCES_AS = \
 	asm-x86_64/aes/aes_amd64.S \
@@ -22,25 +19,13 @@
 	asm-x86_64/modes/gcm_pclmulqdq.S \
 	asm-x86_64/modes/aesni-gcm-x86_64.S \
 	asm-x86_64/modes/ghash-x86_64.S \
-	asm-x86_64/sha1/sha1-x86_64.S \
 	asm-x86_64/sha2/sha256_impl.S \
 	asm-x86_64/sha2/sha512_impl.S
-endif
-
-if TARGET_ASM_I386
+else
 ASM_SOURCES_C =
 ASM_SOURCES_AS =
 endif
 
-if TARGET_ASM_GENERIC
-ASM_SOURCES_C =
-ASM_SOURCES_AS =
-endif
-
-USER_C =
-
-USER_ASM =
-
 KERNEL_C = \
 	spi/kcf_spi.c \
 	api/kcf_ctxops.c \
@@ -62,7 +47,6 @@
 	algs/modes/ctr.c \
 	algs/modes/ccm.c \
 	algs/modes/ecb.c \
-	algs/sha1/sha1.c \
 	algs/sha2/sha2.c \
 	algs/skein/skein.c \
 	algs/skein/skein_block.c \
@@ -70,7 +54,6 @@
 	illumos-crypto.c \
 	io/aes.c \
 	io/edonr_mod.c \
-	io/sha1_mod.c \
 	io/sha2_mod.c \
 	io/skein_mod.c \
 	os/modhash.c \
@@ -85,9 +68,7 @@
 KERNEL_ASM = $(ASM_SOURCES_AS)
 
 nodist_libicp_la_SOURCES = \
-	$(USER_C) \
-	$(USER_ASM) \
 	$(KERNEL_C) \
 	$(KERNEL_ASM)
 
-libicp_la_LIBADD = -lrt
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libnvpair/Makefile.am b/zfs/lib/libnvpair/Makefile.am
index 6d59d7b..f9f1eb5 100644
--- a/zfs/lib/libnvpair/Makefile.am
+++ b/zfs/lib/libnvpair/Makefile.am

@@ -8,12 +8,13 @@
 # and required CFLAGS for libtirpc
 AM_CFLAGS += $(FRAME_LARGER_THAN) $(LIBTIRPC_CFLAGS)
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 lib_LTLIBRARIES = libnvpair.la
 
+include $(top_srcdir)/config/Abigail.am
+
 USER_C = \
 	libnvpair.c \
 	libnvpair_json.c \
@@ -24,11 +25,26 @@
 	nvpair.c \
 	fnvpair.c
 
+dist_libnvpair_la_SOURCES = \
+	$(USER_C)
+
 nodist_libnvpair_la_SOURCES = \
-	$(USER_C) \
 	$(KERNEL_C)
 
-libnvpair_la_LIBADD = $(LIBTIRPC_LIBS)
-libnvpair_la_LDFLAGS = -version-info 1:1:0
+libnvpair_la_LIBADD = \
+	$(abs_top_builddir)/lib/libspl/libspl_assert.la
 
-EXTRA_DIST = $(USER_C)
+libnvpair_la_LIBADD += $(LIBTIRPC_LIBS) $(LTLIBINTL)
+
+libnvpair_la_LDFLAGS =
+
+if !ASAN_ENABLED
+libnvpair_la_LDFLAGS += -Wl,-z,defs
+endif
+
+libnvpair_la_LDFLAGS += -version-info 3:0:0
+
+include $(top_srcdir)/config/CppCheck.am
+
+# Library ABI
+EXTRA_DIST = libnvpair.abi libnvpair.suppr

diff --git a/zfs/lib/libnvpair/libnvpair.abi b/zfs/lib/libnvpair/libnvpair.abi
new file mode 100644
index 0000000..4f961c8
--- /dev/null
+++ b/zfs/lib/libnvpair/libnvpair.abi

@@ -0,0 +1,3137 @@
+<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libnvpair.so.3'>
+  <elf-needed>
+    <dependency name='libtirpc.so.3'/>
+    <dependency name='libc.so.6'/>
+  </elf-needed>
+  <elf-function-symbols>
+    <elf-symbol name='dump_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_boolean' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_boolean_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_byte_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_int8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_nvlist_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_string_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_add_uint8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_alloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_dup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_boolean' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_boolean_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_byte_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_int8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_lookup_uint8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_merge' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_num_pairs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_pack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_pack_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_remove_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_size' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvlist_unpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fnvpair_value_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libspl_assertf' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_reset' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_boolean' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_boolean_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_byte_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_double' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_hrtime' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_int8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_nvlist_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_string_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_add_uint8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_alloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_dup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_empty' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_exists' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_boolean' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_boolean_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_byte_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_double' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_hrtime' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_int8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_nv_alloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_nvlist_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_nvpair_embedded_index' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_pairs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_string_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_lookup_uint8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_merge' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_next_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_nvflag' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_pack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prev_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_print' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_print_json' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_alloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_dofmt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_doindent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_getdest' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_setdest' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_setfmt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctl_setindent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_boolean' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_boolean_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_byte_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_double' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_hrtime' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_int8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_nvlist_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_string_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_prtctlop_uint8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_remove_all' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_remove_nvpair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_size' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_unpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_xalloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_xdup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_xpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_xunpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_type_is_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_boolean_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_boolean_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_byte' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_byte_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_double' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_hrtime' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_int8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_match' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_match_regex' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_nvlist_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_string_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint16_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint32_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint64_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_value_uint8_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-function-symbols>
+  <elf-variable-symbols>
+    <elf-symbol name='aok' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_nosleep' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_nosleep_def' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_sleep' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_alloc_sleep_def' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_fixed_ops' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nv_fixed_ops_def' size='40' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvlist_hashtable_init_size' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nvpair_max_recursion' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-variable-symbols>
+  <abi-instr address-size='64' path='../../module/nvpair/fnvpair.c' language='LANG_C99'>
+    <function-decl name='fnvlist_alloc' mangled-name='fnvlist_alloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_alloc'>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_free' mangled-name='fnvlist_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_free'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_size' mangled-name='fnvlist_size' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_size'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='fnvlist_pack' mangled-name='fnvlist_pack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_pack'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='78c01427' name='sizep'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fnvlist_pack_free' mangled-name='fnvlist_pack_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_pack_free'>
+      <parameter type-id='26a90f95' name='pack'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_unpack' mangled-name='fnvlist_unpack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_unpack'>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_dup' mangled-name='fnvlist_dup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_dup'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_merge' mangled-name='fnvlist_merge' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_merge'>
+      <parameter type-id='5ce45b60' name='dst'/>
+      <parameter type-id='5ce45b60' name='src'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_num_pairs' mangled-name='fnvlist_num_pairs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_num_pairs'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean' mangled-name='fnvlist_add_boolean' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_boolean'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean_value' mangled-name='fnvlist_add_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_boolean_value'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='c19b74c3' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_byte' mangled-name='fnvlist_add_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_byte'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='d8bf0010' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int8' mangled-name='fnvlist_add_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ee31ee44' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint8' mangled-name='fnvlist_add_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='b96825af' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int16' mangled-name='fnvlist_add_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='23bd8cb5' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint16' mangled-name='fnvlist_add_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='149c6638' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int32' mangled-name='fnvlist_add_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='3ff5601b' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint32' mangled-name='fnvlist_add_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='8f92235e' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int64' mangled-name='fnvlist_add_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9da381c4' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint64' mangled-name='fnvlist_add_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9c313c2d' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_string' mangled-name='fnvlist_add_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_string'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='80f4b756' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_nvlist' mangled-name='fnvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_nvlist'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='5ce45b60' name='val'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_nvpair' mangled-name='fnvlist_add_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='3fa542f0' name='pair'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean_array' mangled-name='fnvlist_add_boolean_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_boolean_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='37e3bd22' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_byte_array' mangled-name='fnvlist_add_byte_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_byte_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='45b65157' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int8_array' mangled-name='fnvlist_add_int8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='256d5229' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint8_array' mangled-name='fnvlist_add_uint8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ae3e8ca6' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int16_array' mangled-name='fnvlist_add_int16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='f76f73d0' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint16_array' mangled-name='fnvlist_add_uint16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='8a121f49' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int32_array' mangled-name='fnvlist_add_int32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4aafb922' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint32_array' mangled-name='fnvlist_add_uint32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='90421557' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int64_array' mangled-name='fnvlist_add_int64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_int64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='cb785ebf' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint64_array' mangled-name='fnvlist_add_uint64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_uint64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='5d6479ae' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_string_array' mangled-name='fnvlist_add_string_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_string_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='f319fae0' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_nvlist_array' mangled-name='fnvlist_add_nvlist_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_add_nvlist_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='857bb57e' name='val'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_remove' mangled-name='fnvlist_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_remove'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_remove_nvpair' mangled-name='fnvlist_remove_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_remove_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='3fa542f0' name='pair'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_nvpair' mangled-name='fnvlist_lookup_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='3fa542f0'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_boolean' mangled-name='fnvlist_lookup_boolean' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_boolean'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_boolean_value' mangled-name='fnvlist_lookup_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_boolean_value'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_byte' mangled-name='fnvlist_lookup_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_byte'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='d8bf0010'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int8' mangled-name='fnvlist_lookup_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='ee31ee44'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int16' mangled-name='fnvlist_lookup_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='23bd8cb5'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int32' mangled-name='fnvlist_lookup_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='3ff5601b'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int64' mangled-name='fnvlist_lookup_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='9da381c4'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint8' mangled-name='fnvlist_lookup_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint16' mangled-name='fnvlist_lookup_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint32' mangled-name='fnvlist_lookup_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint64' mangled-name='fnvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_string' mangled-name='fnvlist_lookup_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_string'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_nvlist' mangled-name='fnvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_nvlist'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_boolean_array' mangled-name='fnvlist_lookup_boolean_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_boolean_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='37e3bd22'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_byte_array' mangled-name='fnvlist_lookup_byte_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_byte_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='45b65157'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int8_array' mangled-name='fnvlist_lookup_int8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='256d5229'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint8_array' mangled-name='fnvlist_lookup_uint8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='ae3e8ca6'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int16_array' mangled-name='fnvlist_lookup_int16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='f76f73d0'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint16_array' mangled-name='fnvlist_lookup_uint16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='8a121f49'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int32_array' mangled-name='fnvlist_lookup_int32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='4aafb922'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint32_array' mangled-name='fnvlist_lookup_uint32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='90421557'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_int64_array' mangled-name='fnvlist_lookup_int64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_int64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='cb785ebf'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint64_array' mangled-name='fnvlist_lookup_uint64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvlist_lookup_uint64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='5d6479ae'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/nvpair/nvpair.c' language='LANG_C99'>
+    <typedef-decl name='__u_short' type-id='8efea9e5' id='46c660f8'/>
+    <typedef-decl name='__u_int' type-id='f0981eeb' id='8ae6822f'/>
+    <typedef-decl name='__quad_t' type-id='bd54fe1a' id='2632227a'/>
+    <typedef-decl name='__u_quad_t' type-id='7359adad' id='5f3d50a6'/>
+    <typedef-decl name='u_short' type-id='46c660f8' id='32580e96'/>
+    <typedef-decl name='u_int' type-id='8ae6822f' id='48f7c3f5'/>
+    <typedef-decl name='quad_t' type-id='2632227a' id='f5ef0660'/>
+    <typedef-decl name='u_quad_t' type-id='5f3d50a6' id='bd226ac0'/>
+    <typedef-decl name='bool_t' type-id='3ff5601b' id='310a70df'/>
+    <enum-decl name='xdr_op' id='6badf1b8'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='XDR_ENCODE' value='0'/>
+      <enumerator name='XDR_DECODE' value='1'/>
+      <enumerator name='XDR_FREE' value='2'/>
+    </enum-decl>
+    <class-decl name='__rpc_xdr' size-in-bits='384' is-struct='yes' visibility='default' id='755707df'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='x_op' type-id='6badf1b8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='x_ops' type-id='3457e9c5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='x_public' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='x_private' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='x_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='x_handy' type-id='48f7c3f5' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='xdr_ops' size-in-bits='576' is-struct='yes' visibility='default' id='ea1474f2'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='x_getlong' type-id='a4e6dd3f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='x_putlong' type-id='d447b08c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='x_getbytes' type-id='b0b6ccaa' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='x_putbytes' type-id='581c24b7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='x_getpostn' type-id='1c77bdb0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='x_setpostn' type-id='13fdb43e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='x_inline' type-id='788e1f25' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='x_destroy' type-id='d781e3b2' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='x_control' type-id='733fc725' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='XDR' type-id='755707df' id='bc407f0e'/>
+    <typedef-decl name='xdrproc_t' type-id='94d188f0' id='c28db3e9'/>
+    <pointer-type-def type-id='bc407f0e' size-in-bits='64' id='17fd1621'/>
+    <pointer-type-def type-id='755707df' size-in-bits='64' id='812c6697'/>
+    <qualified-type-def type-id='26a90f95' const='yes' id='57de658a'/>
+    <pointer-type-def type-id='57de658a' size-in-bits='64' id='f319fae0'/>
+    <qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/>
+    <qualified-type-def type-id='bd54fe1a' const='yes' id='9cb2385f'/>
+    <pointer-type-def type-id='9cb2385f' size-in-bits='64' id='218ee02f'/>
+    <qualified-type-def type-id='ea1474f2' const='yes' id='485ede6d'/>
+    <pointer-type-def type-id='485ede6d' size-in-bits='64' id='3457e9c5'/>
+    <pointer-type-def type-id='95e97e5e' size-in-bits='64' id='7292109c'/>
+    <pointer-type-def type-id='441e0c31' size-in-bits='64' id='788e1f25'/>
+    <pointer-type-def type-id='bd54fe1a' size-in-bits='64' id='3ccc2590'/>
+    <pointer-type-def type-id='3fa542f0' size-in-bits='64' id='0b283d2e'/>
+    <pointer-type-def type-id='f5ef0660' size-in-bits='64' id='de23782d'/>
+    <pointer-type-def type-id='a2185560' size-in-bits='64' id='764e437e'/>
+    <pointer-type-def type-id='b59d7dce' size-in-bits='64' id='78c01427'/>
+    <pointer-type-def type-id='e80b47fe' size-in-bits='64' id='94d188f0'/>
+    <pointer-type-def type-id='1c7a4858' size-in-bits='64' id='b0b6ccaa'/>
+    <pointer-type-def type-id='f6358b93' size-in-bits='64' id='581c24b7'/>
+    <pointer-type-def type-id='45354e42' size-in-bits='64' id='d447b08c'/>
+    <pointer-type-def type-id='0760d6d1' size-in-bits='64' id='733fc725'/>
+    <pointer-type-def type-id='be0f9e0b' size-in-bits='64' id='a4e6dd3f'/>
+    <pointer-type-def type-id='16eb5704' size-in-bits='64' id='13fdb43e'/>
+    <pointer-type-def type-id='46870456' size-in-bits='64' id='1c77bdb0'/>
+    <pointer-type-def type-id='48f7c3f5' size-in-bits='64' id='b4c2e924'/>
+    <pointer-type-def type-id='bd226ac0' size-in-bits='64' id='fce59795'/>
+    <pointer-type-def type-id='32580e96' size-in-bits='64' id='ede2c2f5'/>
+    <pointer-type-def type-id='c87cb1d0' size-in-bits='64' id='d781e3b2'/>
+    <var-decl name='nvpair_max_recursion' type-id='95e97e5e' mangled-name='nvpair_max_recursion' visibility='default' elf-symbol-id='nvpair_max_recursion'/>
+    <var-decl name='nvlist_hashtable_init_size' type-id='9c313c2d' mangled-name='nvlist_hashtable_init_size' visibility='default' elf-symbol-id='nvlist_hashtable_init_size'/>
+    <function-decl name='nv_alloc_init' mangled-name='nv_alloc_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nv_alloc_init'>
+      <parameter type-id='11871392' name='nva'/>
+      <parameter type-id='ee1d4944' name='nvo'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nv_alloc_reset' mangled-name='nv_alloc_reset' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nv_alloc_reset'>
+      <parameter type-id='11871392' name='nva'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nv_alloc_fini' mangled-name='nv_alloc_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nv_alloc_fini'>
+      <parameter type-id='11871392' name='nva'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nv_alloc' mangled-name='nvlist_lookup_nv_alloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_nv_alloc'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='11871392'/>
+    </function-decl>
+    <function-decl name='nvlist_nvflag' mangled-name='nvlist_nvflag' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_nvflag'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='3502e3ff'/>
+    </function-decl>
+    <function-decl name='nvlist_alloc' mangled-name='nvlist_alloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_alloc'>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <parameter type-id='3502e3ff' name='nvflag'/>
+      <parameter type-id='95e97e5e' name='kmflag'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_xalloc' mangled-name='nvlist_xalloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_xalloc'>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <parameter type-id='3502e3ff' name='nvflag'/>
+      <parameter type-id='11871392' name='nva'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_free' mangled-name='nvlist_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_free'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_dup' mangled-name='nvlist_dup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_dup'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <parameter type-id='95e97e5e' name='kmflag'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_xdup' mangled-name='nvlist_xdup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_xdup'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <parameter type-id='11871392' name='nva'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove_all' mangled-name='nvlist_remove_all' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_remove_all'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove' mangled-name='nvlist_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_remove'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='8d0687d2' name='type'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove_nvpair' mangled-name='nvlist_remove_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_remove_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='3fa542f0' name='nvp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_boolean' mangled-name='nvlist_add_boolean' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_boolean'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_boolean_value' mangled-name='nvlist_add_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_boolean_value'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='c19b74c3' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_byte' mangled-name='nvlist_add_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_byte'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='d8bf0010' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int8' mangled-name='nvlist_add_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ee31ee44' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint8' mangled-name='nvlist_add_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='b96825af' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int16' mangled-name='nvlist_add_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='23bd8cb5' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint16' mangled-name='nvlist_add_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='149c6638' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int32' mangled-name='nvlist_add_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='3ff5601b' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint32' mangled-name='nvlist_add_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='8f92235e' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int64' mangled-name='nvlist_add_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9da381c4' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64' mangled-name='nvlist_add_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9c313c2d' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_double' mangled-name='nvlist_add_double' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_double'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='a0eb0f08' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_string' mangled-name='nvlist_add_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_string'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='80f4b756' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_boolean_array' mangled-name='nvlist_add_boolean_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_boolean_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='37e3bd22' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_byte_array' mangled-name='nvlist_add_byte_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_byte_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='45b65157' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int8_array' mangled-name='nvlist_add_int8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='256d5229' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint8_array' mangled-name='nvlist_add_uint8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ae3e8ca6' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int16_array' mangled-name='nvlist_add_int16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='f76f73d0' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint16_array' mangled-name='nvlist_add_uint16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='8a121f49' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int32_array' mangled-name='nvlist_add_int32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4aafb922' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint32_array' mangled-name='nvlist_add_uint32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='90421557' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int64_array' mangled-name='nvlist_add_int64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_int64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='cb785ebf' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64_array' mangled-name='nvlist_add_uint64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_uint64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='5d6479ae' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_string_array' mangled-name='nvlist_add_string_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_string_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='f319fae0' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_hrtime' mangled-name='nvlist_add_hrtime' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_hrtime'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='cebdd548' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist' mangled-name='nvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_nvlist'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='5ce45b60' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist_array' mangled-name='nvlist_add_nvlist_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_nvlist_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='857bb57e' name='a'/>
+      <parameter type-id='3502e3ff' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_prev_nvpair' mangled-name='nvlist_prev_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prev_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='3fa542f0' name='nvp'/>
+      <return type-id='3fa542f0'/>
+    </function-decl>
+    <function-decl name='nvlist_empty' mangled-name='nvlist_empty' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_empty'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_boolean' mangled-name='nvlist_lookup_boolean' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_boolean'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_boolean_value' mangled-name='nvlist_lookup_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_boolean_value'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='37e3bd22' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_byte' mangled-name='nvlist_lookup_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_byte'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='45b65157' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int8' mangled-name='nvlist_lookup_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='256d5229' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint8' mangled-name='nvlist_lookup_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint8'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ae3e8ca6' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int16' mangled-name='nvlist_lookup_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='f76f73d0' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint16' mangled-name='nvlist_lookup_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint16'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='8a121f49' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int32' mangled-name='nvlist_lookup_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4aafb922' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint32' mangled-name='nvlist_lookup_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint32'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='90421557' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int64' mangled-name='nvlist_lookup_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='cb785ebf' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64' mangled-name='nvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint64'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='5d6479ae' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_double' mangled-name='nvlist_lookup_double' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_double'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='7408d286' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_string' mangled-name='nvlist_lookup_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_string'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9b23c9ad' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist' mangled-name='nvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_nvlist'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='857bb57e' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_boolean_array' mangled-name='nvlist_lookup_boolean_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_boolean_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='03829398' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_byte_array' mangled-name='nvlist_lookup_byte_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_byte_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='3b0247c7' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int8_array' mangled-name='nvlist_lookup_int8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ee181ab9' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint8_array' mangled-name='nvlist_lookup_uint8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint8_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='d8774064' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int16_array' mangled-name='nvlist_lookup_int16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='7e73928e' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint16_array' mangled-name='nvlist_lookup_uint16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint16_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='bd8768d9' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int32_array' mangled-name='nvlist_lookup_int32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9aa04798' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint32_array' mangled-name='nvlist_lookup_uint32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint32_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='9507d3c7' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int64_array' mangled-name='nvlist_lookup_int64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_int64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='e37ce48f' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64_array' mangled-name='nvlist_lookup_uint64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_uint64_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='892b4acc' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_string_array' mangled-name='nvlist_lookup_string_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_string_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='c0563f85' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist_array' mangled-name='nvlist_lookup_nvlist_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_nvlist_array'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='75be733c' name='a'/>
+      <parameter type-id='4dd26a40' name='n'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_hrtime' mangled-name='nvlist_lookup_hrtime' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_hrtime'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='e379e62d' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_pairs' mangled-name='nvlist_lookup_pairs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_pairs'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='95e97e5e' name='flag'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvpair' mangled-name='nvlist_lookup_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='0b283d2e' name='ret'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvpair_embedded_index' mangled-name='nvlist_lookup_nvpair_embedded_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_lookup_nvpair_embedded_index'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='0b283d2e' name='ret'/>
+      <parameter type-id='7292109c' name='ip'/>
+      <parameter type-id='9b23c9ad' name='ep'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_exists' mangled-name='nvlist_exists' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_exists'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvpair' mangled-name='nvlist_add_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_add_nvpair'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='3fa542f0' name='nvp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_merge' mangled-name='nvlist_merge' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_merge'>
+      <parameter type-id='5ce45b60' name='dst'/>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='95e97e5e' name='flag'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_size' mangled-name='nvlist_size' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_size'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='78c01427' name='size'/>
+      <parameter type-id='95e97e5e' name='encoding'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_pack' mangled-name='nvlist_pack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_pack'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='9b23c9ad' name='bufp'/>
+      <parameter type-id='78c01427' name='buflen'/>
+      <parameter type-id='95e97e5e' name='encoding'/>
+      <parameter type-id='95e97e5e' name='kmflag'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_xpack' mangled-name='nvlist_xpack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_xpack'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='9b23c9ad' name='bufp'/>
+      <parameter type-id='78c01427' name='buflen'/>
+      <parameter type-id='95e97e5e' name='encoding'/>
+      <parameter type-id='11871392' name='nva'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_unpack' mangled-name='nvlist_unpack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_unpack'>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <parameter type-id='95e97e5e' name='kmflag'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_xunpack' mangled-name='nvlist_xunpack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_xunpack'>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <parameter type-id='11871392' name='nva'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtol' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='bd54fe1a'/>
+    </function-decl>
+    <function-decl name='strncmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='xdr_int' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='7292109c'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_u_int' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='b4c2e924'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_short' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='764e437e'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_u_short' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='ede2c2f5'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='b4c2e924'/>
+      <parameter type-id='48f7c3f5'/>
+      <parameter type-id='48f7c3f5'/>
+      <parameter type-id='c28db3e9'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_opaque' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='48f7c3f5'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='48f7c3f5'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_char' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_double' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='7408d286'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_longlong_t' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='de23782d'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdr_u_longlong_t' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='fce59795'/>
+      <return type-id='310a70df'/>
+    </function-decl>
+    <function-decl name='xdrmem_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='17fd1621'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='48f7c3f5'/>
+      <parameter type-id='6badf1b8'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='441e0c31'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='48f7c3f5'/>
+      <return type-id='4aafb922'/>
+    </function-type>
+    <function-type size-in-bits='64' id='e80b47fe'>
+      <parameter type-id='17fd1621'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='1c7a4858'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='48f7c3f5'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='f6358b93'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='48f7c3f5'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='45354e42'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='218ee02f'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='0760d6d1'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='be0f9e0b'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='3ccc2590'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='16eb5704'>
+      <parameter type-id='812c6697'/>
+      <parameter type-id='48f7c3f5'/>
+      <return type-id='310a70df'/>
+    </function-type>
+    <function-type size-in-bits='64' id='46870456'>
+      <parameter type-id='812c6697'/>
+      <return type-id='48f7c3f5'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c87cb1d0'>
+      <parameter type-id='812c6697'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/nvpair/nvpair_alloc_fixed.c' language='LANG_C99'>
+    <var-decl name='nv_fixed_ops' type-id='ee1d4944' mangled-name='nv_fixed_ops' visibility='default' elf-symbol-id='nv_fixed_ops'/>
+    <var-decl name='nv_fixed_ops_def' type-id='aca16c06' mangled-name='nv_fixed_ops_def' visibility='default' elf-symbol-id='nv_fixed_ops_def'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='libnvpair.c' language='LANG_C99'>
+    <type-decl name='char' size-in-bits='8' id='a84c031d'/>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8' id='89feb1ec'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='160' id='664ac0b7'>
+      <subrange length='20' type-id='7359adad' id='fdca39cf'/>
+    </array-type-def>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='re_dfa_t' is-struct='yes' visibility='default' is-declaration-only='yes' id='b48d2441'/>
+    <type-decl name='double' size-in-bits='64' id='a0eb0f08'/>
+    <type-decl name='int' size-in-bits='32' id='95e97e5e'/>
+    <type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
+    <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
+    <type-decl name='short int' size-in-bits='16' id='a2185560'/>
+    <type-decl name='signed char' size-in-bits='8' id='28577a57'/>
+    <type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
+    <type-decl name='unsigned char' size-in-bits='8' id='002ac4a6'/>
+    <type-decl name='unsigned int' size-in-bits='32' id='f0981eeb'/>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+    <type-decl name='variadic parameter type' id='2c1145c5'/>
+    <type-decl name='void' id='48b5725f'/>
+    <typedef-decl name='nvlist_prtctl_t' type-id='196db161' id='b0c1ff8d'/>
+    <enum-decl name='nvlist_indent_mode' id='628aafab'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='NVLIST_INDENT_ABS' value='0'/>
+      <enumerator name='NVLIST_INDENT_TABBED' value='1'/>
+    </enum-decl>
+    <enum-decl name='nvlist_prtctl_fmt' id='c8dcc53a'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='NVLIST_FMT_MEMBER_NAME' value='0'/>
+      <enumerator name='NVLIST_FMT_MEMBER_POSTAMBLE' value='1'/>
+      <enumerator name='NVLIST_FMT_BTWN_ARRAY' value='2'/>
+    </enum-decl>
+    <enum-decl name='data_type_t' naming-typedef-id='8d0687d2' id='aeeae136'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DATA_TYPE_DONTCARE' value='-1'/>
+      <enumerator name='DATA_TYPE_UNKNOWN' value='0'/>
+      <enumerator name='DATA_TYPE_BOOLEAN' value='1'/>
+      <enumerator name='DATA_TYPE_BYTE' value='2'/>
+      <enumerator name='DATA_TYPE_INT16' value='3'/>
+      <enumerator name='DATA_TYPE_UINT16' value='4'/>
+      <enumerator name='DATA_TYPE_INT32' value='5'/>
+      <enumerator name='DATA_TYPE_UINT32' value='6'/>
+      <enumerator name='DATA_TYPE_INT64' value='7'/>
+      <enumerator name='DATA_TYPE_UINT64' value='8'/>
+      <enumerator name='DATA_TYPE_STRING' value='9'/>
+      <enumerator name='DATA_TYPE_BYTE_ARRAY' value='10'/>
+      <enumerator name='DATA_TYPE_INT16_ARRAY' value='11'/>
+      <enumerator name='DATA_TYPE_UINT16_ARRAY' value='12'/>
+      <enumerator name='DATA_TYPE_INT32_ARRAY' value='13'/>
+      <enumerator name='DATA_TYPE_UINT32_ARRAY' value='14'/>
+      <enumerator name='DATA_TYPE_INT64_ARRAY' value='15'/>
+      <enumerator name='DATA_TYPE_UINT64_ARRAY' value='16'/>
+      <enumerator name='DATA_TYPE_STRING_ARRAY' value='17'/>
+      <enumerator name='DATA_TYPE_HRTIME' value='18'/>
+      <enumerator name='DATA_TYPE_NVLIST' value='19'/>
+      <enumerator name='DATA_TYPE_NVLIST_ARRAY' value='20'/>
+      <enumerator name='DATA_TYPE_BOOLEAN_VALUE' value='21'/>
+      <enumerator name='DATA_TYPE_INT8' value='22'/>
+      <enumerator name='DATA_TYPE_UINT8' value='23'/>
+      <enumerator name='DATA_TYPE_BOOLEAN_ARRAY' value='24'/>
+      <enumerator name='DATA_TYPE_INT8_ARRAY' value='25'/>
+      <enumerator name='DATA_TYPE_UINT8_ARRAY' value='26'/>
+      <enumerator name='DATA_TYPE_DOUBLE' value='27'/>
+    </enum-decl>
+    <typedef-decl name='data_type_t' type-id='aeeae136' id='8d0687d2'/>
+    <class-decl name='nvpair' size-in-bits='128' is-struct='yes' visibility='default' id='1c34e459'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvp_size' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvp_name_sz' type-id='23bd8cb5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='48'>
+        <var-decl name='nvp_reserve' type-id='23bd8cb5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvp_value_elem' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='nvp_type' type-id='8d0687d2' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvpair_t' type-id='1c34e459' id='57928edf'/>
+    <class-decl name='nvlist' size-in-bits='192' is-struct='yes' visibility='default' id='ac266fd9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvl_version' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvl_nvflag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvl_priv' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='nvl_flag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='nvl_pad' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvlist_t' type-id='ac266fd9' id='8e8d4be3'/>
+    <enum-decl name='boolean_t' naming-typedef-id='c19b74c3' id='f58c8277'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='B_FALSE' value='0'/>
+      <enumerator name='B_TRUE' value='1'/>
+    </enum-decl>
+    <typedef-decl name='boolean_t' type-id='f58c8277' id='c19b74c3'/>
+    <typedef-decl name='uchar_t' type-id='002ac4a6' id='d8bf0010'/>
+    <typedef-decl name='uint_t' type-id='f0981eeb' id='3502e3ff'/>
+    <typedef-decl name='hrtime_t' type-id='1eb56b1e' id='cebdd548'/>
+    <class-decl name='nvlist_printops' size-in-bits='3456' is-struct='yes' visibility='default' id='ebc6735b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='print_boolean' type-id='e7f43f72' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='print_boolean_value' type-id='e7f43f73' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='print_byte' type-id='e7f43f74' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='print_int8' type-id='e7f43f75' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='print_uint8' type-id='e7f43f76' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='print_int16' type-id='e7f43f77' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='print_uint16' type-id='e7f43f78' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='print_int32' type-id='e7f43f79' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='print_uint32' type-id='e7f43f7a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='print_int64' type-id='e7f43f7b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1280'>
+        <var-decl name='print_uint64' type-id='e7f43f7c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1408'>
+        <var-decl name='print_double' type-id='e7f43f7d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1536'>
+        <var-decl name='print_string' type-id='e7f43f7e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1664'>
+        <var-decl name='print_hrtime' type-id='e7f43f7f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1792'>
+        <var-decl name='print_nvlist' type-id='e7f43f80' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1920'>
+        <var-decl name='print_boolean_array' type-id='e7f43f81' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2048'>
+        <var-decl name='print_byte_array' type-id='e7f43f82' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2176'>
+        <var-decl name='print_int8_array' type-id='e7f43f83' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2304'>
+        <var-decl name='print_uint8_array' type-id='e7f43f84' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2432'>
+        <var-decl name='print_int16_array' type-id='e7f43f85' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2560'>
+        <var-decl name='print_uint16_array' type-id='e7f43f86' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2688'>
+        <var-decl name='print_int32_array' type-id='e7f43f87' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2816'>
+        <var-decl name='print_uint32_array' type-id='e7f43f88' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2944'>
+        <var-decl name='print_int64_array' type-id='e7f43f89' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='3072'>
+        <var-decl name='print_uint64_array' type-id='e7f43f8a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='3200'>
+        <var-decl name='print_string_array' type-id='e7f43f8b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='3328'>
+        <var-decl name='print_nvlist_array' type-id='e7f43f8c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='6d994334' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__1' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f73'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='6a2f50c1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__2' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f74'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='8a1fb33a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__3' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f75'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='506696a8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__4' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f76'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='39b623f9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__5' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f77'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='ea6be4eb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__6' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f78'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='f10f1e84' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__7' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f79'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='1708018d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__8' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f7a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='90174072' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__9' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f7b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='d2af7f32' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__10' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f7c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='0b22f759' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__11' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f7d'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='3be4d568' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__12' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f7e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='c0d0f877' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__13' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f7f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='e1c54c3c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__14' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f80'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='19ea27ae' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__15' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f81'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='7ef0e988' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__16' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f82'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='7391ed39' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__17' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f83'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='42257af5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__18' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f84'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='330cc0d0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__19' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f85'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='506ab59a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__20' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f86'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='ed6a3a3d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__21' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f87'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='750cc41c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__22' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f88'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='292cdbcf' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__23' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f89'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='aaea91b5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__24' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f8a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='7e85a9b6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__25' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f8b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='de20bf07' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__26' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f8c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='op' type-id='2835af80' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='nvlist_prtctl' size-in-bits='576' is-struct='yes' visibility='default' id='d2e8bad9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvprt_fp' type-id='822cd80b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvprt_indent_mode' type-id='628aafab' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='nvprt_indent' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='nvprt_indentinc' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='nvprt_nmfmt' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='nvprt_eomfmt' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='nvprt_btwnarrfmt' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='nvprt_btwnarrfmt_nl' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='nvprt_dfltops' type-id='7be54adb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='nvprt_custops' type-id='7be54adb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='int8_t' type-id='2171a512' id='ee31ee44'/>
+    <typedef-decl name='int16_t' type-id='03896e23' id='23bd8cb5'/>
+    <typedef-decl name='int32_t' type-id='33f57a65' id='3ff5601b'/>
+    <typedef-decl name='int64_t' type-id='0c9942d2' id='9da381c4'/>
+    <typedef-decl name='uint8_t' type-id='c51d6389' id='b96825af'/>
+    <typedef-decl name='uint16_t' type-id='253c2d2a' id='149c6638'/>
+    <typedef-decl name='uint32_t' type-id='62f1140c' id='8f92235e'/>
+    <typedef-decl name='uint64_t' type-id='8910171f' id='9c313c2d'/>
+    <typedef-decl name='__int8_t' type-id='28577a57' id='2171a512'/>
+    <typedef-decl name='__uint8_t' type-id='002ac4a6' id='c51d6389'/>
+    <typedef-decl name='__int16_t' type-id='a2185560' id='03896e23'/>
+    <typedef-decl name='__uint16_t' type-id='8efea9e5' id='253c2d2a'/>
+    <typedef-decl name='__int32_t' type-id='95e97e5e' id='33f57a65'/>
+    <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
+    <typedef-decl name='__int64_t' type-id='bd54fe1a' id='0c9942d2'/>
+    <typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
+    <typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
+    <typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
+    <typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
+    <typedef-decl name='_IO_lock_t' type-id='48b5725f' id='bb4788fa'/>
+    <class-decl name='_IO_FILE' size-in-bits='1728' is-struct='yes' visibility='default' id='ec1ed955'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_IO_read_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_IO_read_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='_IO_read_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='_IO_write_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='_IO_write_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='_IO_write_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='_IO_buf_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='_IO_buf_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='_IO_save_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='_IO_backup_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='_IO_save_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='_markers' type-id='e4c6fa61' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='_chain' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='_fileno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='_flags2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='_old_offset' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='_cur_column' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1040'>
+        <var-decl name='_vtable_offset' type-id='28577a57' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1048'>
+        <var-decl name='_shortbuf' type-id='89feb1ec' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='_lock' type-id='cecf4ea7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='_offset' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='_codecvt' type-id='570f8c59' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1280'>
+        <var-decl name='_wide_data' type-id='c65a1f29' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1344'>
+        <var-decl name='_freeres_list' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1408'>
+        <var-decl name='_freeres_buf' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1472'>
+        <var-decl name='__pad5' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1536'>
+        <var-decl name='_mode' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1568'>
+        <var-decl name='_unused2' type-id='664ac0b7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__re_long_size_t' type-id='7359adad' id='ba516949'/>
+    <typedef-decl name='reg_syntax_t' type-id='7359adad' id='1b72c3b3'/>
+    <class-decl name='re_pattern_buffer' size-in-bits='512' is-struct='yes' visibility='default' id='19fc9a8c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='buffer' type-id='33976309' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='allocated' type-id='ba516949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='used' type-id='ba516949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='syntax' type-id='1b72c3b3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='fastmap' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='translate' type-id='cf536864' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='re_nsub' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='can_be_null' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='449'>
+        <var-decl name='regs_allocated' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='451'>
+        <var-decl name='fastmap_accurate' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='452'>
+        <var-decl name='no_sub' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='453'>
+        <var-decl name='not_bol' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='454'>
+        <var-decl name='not_eol' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='455'>
+        <var-decl name='newline_anchor' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='regex_t' type-id='19fc9a8c' id='aca3bac8'/>
+    <typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
+    <class-decl name='regmatch_t' size-in-bits='64' is-struct='yes' naming-typedef-id='1b941664' visibility='default' id='4f932615'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='rm_so' type-id='54a2a2a8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='rm_eo' type-id='54a2a2a8' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='regmatch_t' type-id='4f932615' id='1b941664'/>
+    <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
+    <pointer-type-def type-id='aa12d1ba' size-in-bits='64' id='822cd80b'/>
+    <qualified-type-def type-id='822cd80b' restrict='yes' id='e75a27e9'/>
+    <pointer-type-def type-id='ec1ed955' size-in-bits='64' id='dca988a5'/>
+    <pointer-type-def type-id='a4036571' size-in-bits='64' id='570f8c59'/>
+    <pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
+    <pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
+    <pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
+    <pointer-type-def type-id='c19b74c3' size-in-bits='64' id='37e3bd22'/>
+    <pointer-type-def type-id='37e3bd22' size-in-bits='64' id='03829398'/>
+    <pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
+    <pointer-type-def type-id='26a90f95' size-in-bits='64' id='9b23c9ad'/>
+    <pointer-type-def type-id='9b23c9ad' size-in-bits='64' id='c0563f85'/>
+    <qualified-type-def type-id='a84c031d' const='yes' id='9b45d938'/>
+    <pointer-type-def type-id='9b45d938' size-in-bits='64' id='80f4b756'/>
+    <qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/>
+    <qualified-type-def type-id='aca3bac8' const='yes' id='2498fd78'/>
+    <pointer-type-def type-id='2498fd78' size-in-bits='64' id='eed6c816'/>
+    <qualified-type-def type-id='eed6c816' restrict='yes' id='a431a9da'/>
+    <pointer-type-def type-id='a0eb0f08' size-in-bits='64' id='7408d286'/>
+    <pointer-type-def type-id='cebdd548' size-in-bits='64' id='e379e62d'/>
+    <pointer-type-def type-id='9f88f76e' size-in-bits='64' id='7ef0e988'/>
+    <pointer-type-def type-id='c5bb1a2b' size-in-bits='64' id='c0d0f877'/>
+    <pointer-type-def type-id='573fea1b' size-in-bits='64' id='de20bf07'/>
+    <pointer-type-def type-id='70284cc6' size-in-bits='64' id='3be4d568'/>
+    <pointer-type-def type-id='700c3bca' size-in-bits='64' id='6d994334'/>
+    <pointer-type-def type-id='18ac1860' size-in-bits='64' id='506ab59a'/>
+    <pointer-type-def type-id='328fee42' size-in-bits='64' id='750cc41c'/>
+    <pointer-type-def type-id='7ba5cd31' size-in-bits='64' id='aaea91b5'/>
+    <pointer-type-def type-id='a86d8029' size-in-bits='64' id='42257af5'/>
+    <pointer-type-def type-id='0b4eb914' size-in-bits='64' id='19ea27ae'/>
+    <pointer-type-def type-id='c6c8144e' size-in-bits='64' id='2835af80'/>
+    <pointer-type-def type-id='20f7b475' size-in-bits='64' id='6a2f50c1'/>
+    <pointer-type-def type-id='102ee17a' size-in-bits='64' id='e1c54c3c'/>
+    <pointer-type-def type-id='49b69c77' size-in-bits='64' id='ea6be4eb'/>
+    <pointer-type-def type-id='cb5d50f1' size-in-bits='64' id='1708018d'/>
+    <pointer-type-def type-id='880d56b8' size-in-bits='64' id='d2af7f32'/>
+    <pointer-type-def type-id='a739bfc6' size-in-bits='64' id='506696a8'/>
+    <pointer-type-def type-id='234f35e8' size-in-bits='64' id='8a1fb33a'/>
+    <pointer-type-def type-id='41f7168a' size-in-bits='64' id='f10f1e84'/>
+    <pointer-type-def type-id='e8d6e508' size-in-bits='64' id='90174072'/>
+    <pointer-type-def type-id='f3daafe5' size-in-bits='64' id='0b22f759'/>
+    <pointer-type-def type-id='17ab04ad' size-in-bits='64' id='39b623f9'/>
+    <pointer-type-def type-id='256cdd75' size-in-bits='64' id='7391ed39'/>
+    <pointer-type-def type-id='cc10a041' size-in-bits='64' id='ed6a3a3d'/>
+    <pointer-type-def type-id='9fd269d3' size-in-bits='64' id='292cdbcf'/>
+    <pointer-type-def type-id='3bd73b0c' size-in-bits='64' id='7e85a9b6'/>
+    <pointer-type-def type-id='0d445e26' size-in-bits='64' id='330cc0d0'/>
+    <pointer-type-def type-id='e4b89f30' size-in-bits='64' id='ed8aa8ba'/>
+    <pointer-type-def type-id='be7f4941' size-in-bits='64' id='2809de35'/>
+    <pointer-type-def type-id='fe5ae69d' size-in-bits='64' id='90d5edb9'/>
+    <pointer-type-def type-id='2783af3c' size-in-bits='64' id='e44553b6'/>
+    <pointer-type-def type-id='33c6e3d8' size-in-bits='64' id='1263777a'/>
+    <pointer-type-def type-id='dadb9eca' size-in-bits='64' id='cbda43ac'/>
+    <pointer-type-def type-id='55b9e070' size-in-bits='64' id='b3fae562'/>
+    <pointer-type-def type-id='8e63c78b' size-in-bits='64' id='8b41e457'/>
+    <pointer-type-def type-id='c542ed33' size-in-bits='64' id='f9668a57'/>
+    <pointer-type-def type-id='5dea179a' size-in-bits='64' id='001d8764'/>
+    <pointer-type-def type-id='b6f659a0' size-in-bits='64' id='44f188f2'/>
+    <pointer-type-def type-id='2765bd17' size-in-bits='64' id='976f721b'/>
+    <pointer-type-def type-id='9e073b5c' size-in-bits='64' id='ee62ad8e'/>
+    <pointer-type-def type-id='2c785071' size-in-bits='64' id='957d9f35'/>
+    <pointer-type-def type-id='aad19bf7' size-in-bits='64' id='4db8acf3'/>
+    <pointer-type-def type-id='0660e71a' size-in-bits='64' id='0ca7b13c'/>
+    <pointer-type-def type-id='250287b8' size-in-bits='64' id='a91bad5a'/>
+    <pointer-type-def type-id='e7344862' size-in-bits='64' id='519bf35c'/>
+    <pointer-type-def type-id='32b6d968' size-in-bits='64' id='92988dea'/>
+    <pointer-type-def type-id='5c975642' size-in-bits='64' id='7f8ee7e4'/>
+    <pointer-type-def type-id='0155b993' size-in-bits='64' id='2c8c4457'/>
+    <pointer-type-def type-id='6e8b02cb' size-in-bits='64' id='eb944897'/>
+    <pointer-type-def type-id='d434b7d7' size-in-bits='64' id='108e6453'/>
+    <pointer-type-def type-id='c645e10f' size-in-bits='64' id='5cbe16ab'/>
+    <pointer-type-def type-id='de41f295' size-in-bits='64' id='d94cdfa1'/>
+    <pointer-type-def type-id='b2fbf64a' size-in-bits='64' id='470a7fd4'/>
+    <pointer-type-def type-id='cc22d314' size-in-bits='64' id='eddda806'/>
+    <pointer-type-def type-id='23bd8cb5' size-in-bits='64' id='f76f73d0'/>
+    <pointer-type-def type-id='f76f73d0' size-in-bits='64' id='7e73928e'/>
+    <pointer-type-def type-id='3ff5601b' size-in-bits='64' id='4aafb922'/>
+    <pointer-type-def type-id='4aafb922' size-in-bits='64' id='9aa04798'/>
+    <pointer-type-def type-id='9da381c4' size-in-bits='64' id='cb785ebf'/>
+    <pointer-type-def type-id='cb785ebf' size-in-bits='64' id='e37ce48f'/>
+    <pointer-type-def type-id='ee31ee44' size-in-bits='64' id='256d5229'/>
+    <pointer-type-def type-id='256d5229' size-in-bits='64' id='ee181ab9'/>
+    <pointer-type-def type-id='ebc6735b' size-in-bits='64' id='7be54adb'/>
+    <pointer-type-def type-id='d2e8bad9' size-in-bits='64' id='196db161'/>
+    <pointer-type-def type-id='8e8d4be3' size-in-bits='64' id='5ce45b60'/>
+    <pointer-type-def type-id='5ce45b60' size-in-bits='64' id='857bb57e'/>
+    <pointer-type-def type-id='857bb57e' size-in-bits='64' id='75be733c'/>
+    <pointer-type-def type-id='57928edf' size-in-bits='64' id='3fa542f0'/>
+    <pointer-type-def type-id='b48d2441' size-in-bits='64' id='33976309'/>
+    <pointer-type-def type-id='aca3bac8' size-in-bits='64' id='d33f11cb'/>
+    <pointer-type-def type-id='1b941664' size-in-bits='64' id='7e2979d5'/>
+    <qualified-type-def type-id='7e2979d5' restrict='yes' id='fc212857'/>
+    <pointer-type-def type-id='d8bf0010' size-in-bits='64' id='45b65157'/>
+    <pointer-type-def type-id='45b65157' size-in-bits='64' id='3b0247c7'/>
+    <pointer-type-def type-id='149c6638' size-in-bits='64' id='8a121f49'/>
+    <pointer-type-def type-id='8a121f49' size-in-bits='64' id='bd8768d9'/>
+    <pointer-type-def type-id='8f92235e' size-in-bits='64' id='90421557'/>
+    <pointer-type-def type-id='90421557' size-in-bits='64' id='9507d3c7'/>
+    <pointer-type-def type-id='9c313c2d' size-in-bits='64' id='5d6479ae'/>
+    <pointer-type-def type-id='5d6479ae' size-in-bits='64' id='892b4acc'/>
+    <pointer-type-def type-id='b96825af' size-in-bits='64' id='ae3e8ca6'/>
+    <pointer-type-def type-id='ae3e8ca6' size-in-bits='64' id='d8774064'/>
+    <pointer-type-def type-id='3502e3ff' size-in-bits='64' id='4dd26a40'/>
+    <pointer-type-def type-id='002ac4a6' size-in-bits='64' id='cf536864'/>
+    <pointer-type-def type-id='48b5725f' size-in-bits='64' id='eaa32e2f'/>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='re_dfa_t' is-struct='yes' visibility='default' is-declaration-only='yes' id='b48d2441'/>
+    <function-decl name='nvlist_next_nvpair' mangled-name='nvlist_next_nvpair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_next_nvpair'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='3fa542f0'/>
+    </function-decl>
+    <function-decl name='nvpair_name' mangled-name='nvpair_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_name'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='nvpair_type' mangled-name='nvpair_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_type'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='8d0687d2'/>
+    </function-decl>
+    <function-decl name='nvpair_type_is_array' mangled-name='nvpair_type_is_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_type_is_array'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_boolean_value' mangled-name='nvpair_value_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_boolean_value'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='37e3bd22'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_byte' mangled-name='nvpair_value_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_byte'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='45b65157'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int8' mangled-name='nvpair_value_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int8'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='256d5229'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint8' mangled-name='nvpair_value_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint8'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='ae3e8ca6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int16' mangled-name='nvpair_value_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int16'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='f76f73d0'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint16' mangled-name='nvpair_value_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint16'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='8a121f49'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int32' mangled-name='nvpair_value_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int32'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='4aafb922'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint32' mangled-name='nvpair_value_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint32'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='90421557'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int64' mangled-name='nvpair_value_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='cb785ebf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint64' mangled-name='nvpair_value_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_string' mangled-name='nvpair_value_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_string'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_nvlist' mangled-name='nvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_nvlist'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_boolean_array' mangled-name='nvpair_value_boolean_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_boolean_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='03829398'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_byte_array' mangled-name='nvpair_value_byte_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_byte_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='3b0247c7'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int8_array' mangled-name='nvpair_value_int8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int8_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='ee181ab9'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint8_array' mangled-name='nvpair_value_uint8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint8_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='d8774064'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int16_array' mangled-name='nvpair_value_int16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int16_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='7e73928e'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint16_array' mangled-name='nvpair_value_uint16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint16_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='bd8768d9'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int32_array' mangled-name='nvpair_value_int32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int32_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='9aa04798'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint32_array' mangled-name='nvpair_value_uint32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint32_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='9507d3c7'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int64_array' mangled-name='nvpair_value_int64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_int64_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='e37ce48f'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint64_array' mangled-name='nvpair_value_uint64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_uint64_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='892b4acc'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_string_array' mangled-name='nvpair_value_string_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_string_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='c0563f85'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_nvlist_array' mangled-name='nvpair_value_nvlist_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_nvlist_array'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='75be733c'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_hrtime' mangled-name='nvpair_value_hrtime' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_hrtime'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='e379e62d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_double' mangled-name='nvpair_value_double' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_double'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='7408d286'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_setdest' mangled-name='nvlist_prtctl_setdest' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_setdest'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='822cd80b' name='fp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_getdest' mangled-name='nvlist_prtctl_getdest' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_getdest'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <return type-id='822cd80b'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_setindent' mangled-name='nvlist_prtctl_setindent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_setindent'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='628aafab' name='mode'/>
+      <parameter type-id='95e97e5e' name='start'/>
+      <parameter type-id='95e97e5e' name='inc'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_doindent' mangled-name='nvlist_prtctl_doindent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_doindent'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='95e97e5e' name='onemore'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_setfmt' mangled-name='nvlist_prtctl_setfmt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_setfmt'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='c8dcc53a' name='which'/>
+      <parameter type-id='80f4b756' name='fmt'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_dofmt' mangled-name='nvlist_prtctl_dofmt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_dofmt'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='c8dcc53a' name='which'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_boolean' mangled-name='nvlist_prtctlop_boolean' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_boolean'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='1263777a' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_boolean_value' mangled-name='nvlist_prtctlop_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_boolean_value'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='976f721b' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_byte' mangled-name='nvlist_prtctlop_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_byte'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='519bf35c' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int8' mangled-name='nvlist_prtctlop_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int8'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='a91bad5a' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint8' mangled-name='nvlist_prtctlop_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint8'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='eb944897' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int16' mangled-name='nvlist_prtctlop_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int16'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='957d9f35' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint16' mangled-name='nvlist_prtctlop_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint16'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='92988dea' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int32' mangled-name='nvlist_prtctlop_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int32'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='4db8acf3' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint32' mangled-name='nvlist_prtctlop_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint32'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='7f8ee7e4' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int64' mangled-name='nvlist_prtctlop_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int64'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='0ca7b13c' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint64' mangled-name='nvlist_prtctlop_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint64'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='2c8c4457' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_double' mangled-name='nvlist_prtctlop_double' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_double'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='e44553b6' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_string' mangled-name='nvlist_prtctlop_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_string'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='2809de35' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_hrtime' mangled-name='nvlist_prtctlop_hrtime' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_hrtime'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='ee62ad8e' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_nvlist' mangled-name='nvlist_prtctlop_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_nvlist'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='001d8764' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_boolean_array' mangled-name='nvlist_prtctlop_boolean_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_boolean_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='ed8aa8ba' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_byte_array' mangled-name='nvlist_prtctlop_byte_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_byte_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='108e6453' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int8_array' mangled-name='nvlist_prtctlop_int8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int8_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='f9668a57' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint8_array' mangled-name='nvlist_prtctlop_uint8_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint8_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='eddda806' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int16_array' mangled-name='nvlist_prtctlop_int16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int16_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='cbda43ac' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint16_array' mangled-name='nvlist_prtctlop_uint16_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint16_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='5cbe16ab' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int32_array' mangled-name='nvlist_prtctlop_int32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int32_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='b3fae562' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint32_array' mangled-name='nvlist_prtctlop_uint32_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint32_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='d94cdfa1' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_int64_array' mangled-name='nvlist_prtctlop_int64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_int64_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='8b41e457' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_uint64_array' mangled-name='nvlist_prtctlop_uint64_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_uint64_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='470a7fd4' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_string_array' mangled-name='nvlist_prtctlop_string_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_string_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='90d5edb9' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctlop_nvlist_array' mangled-name='nvlist_prtctlop_nvlist_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctlop_nvlist_array'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <parameter type-id='44f188f2' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_alloc' mangled-name='nvlist_prtctl_alloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_alloc'>
+      <return type-id='b0c1ff8d'/>
+    </function-decl>
+    <function-decl name='nvlist_prtctl_free' mangled-name='nvlist_prtctl_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prtctl_free'>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_print' mangled-name='nvlist_print' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_print'>
+      <parameter type-id='822cd80b' name='fp'/>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_prt' mangled-name='nvlist_prt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_prt'>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <parameter type-id='b0c1ff8d' name='pctl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='dump_nvlist' mangled-name='dump_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='dump_nvlist'>
+      <parameter type-id='5ce45b60' name='list'/>
+      <parameter type-id='95e97e5e' name='indent'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvpair_value_match_regex' mangled-name='nvpair_value_match_regex' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_match_regex'>
+      <parameter type-id='3fa542f0' name='nvp'/>
+      <parameter type-id='95e97e5e' name='ai'/>
+      <parameter type-id='26a90f95' name='value'/>
+      <parameter type-id='d33f11cb' name='value_regex'/>
+      <parameter type-id='9b23c9ad' name='ep'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_match' mangled-name='nvpair_value_match' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvpair_value_match'>
+      <parameter type-id='3fa542f0' name='nvp'/>
+      <parameter type-id='95e97e5e' name='ai'/>
+      <parameter type-id='26a90f95' name='value'/>
+      <parameter type-id='9b23c9ad' name='ep'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='dcgettext' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='regexec' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a431a9da'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='fc212857'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='printf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='malloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='calloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strcmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strcspn' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='strspn' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='9f88f76e'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='37e3bd22'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c5bb1a2b'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='573fea1b'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='70284cc6'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='a0eb0f08'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='700c3bca'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='18ac1860'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f76f73d0'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='328fee42'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='4aafb922'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='7ba5cd31'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='cb785ebf'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='a86d8029'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='256d5229'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='0b4eb914'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c6c8144e'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='20f7b475'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='102ee17a'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='cebdd548'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='49b69c77'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='23bd8cb5'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='cb5d50f1'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='3ff5601b'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='880d56b8'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9da381c4'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='a739bfc6'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ee31ee44'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='234f35e8'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='d8bf0010'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='41f7168a'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='149c6638'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='e8d6e508'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='f3daafe5'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='17ab04ad'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b96825af'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='256cdd75'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='45b65157'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='cc10a041'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8a121f49'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='9fd269d3'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='90421557'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='3bd73b0c'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='0d445e26'>
+      <parameter type-id='196db161'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='e4b89f30'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='37e3bd22'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='be7f4941'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='fe5ae69d'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='2783af3c'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='a0eb0f08'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='33c6e3d8'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='dadb9eca'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f76f73d0'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='55b9e070'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='4aafb922'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='8e63c78b'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='cb785ebf'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c542ed33'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='256d5229'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='5dea179a'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='b6f659a0'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='2765bd17'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='9e073b5c'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='cebdd548'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='2c785071'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='23bd8cb5'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='aad19bf7'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='3ff5601b'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='0660e71a'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9da381c4'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='250287b8'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ee31ee44'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='e7344862'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='d8bf0010'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='32b6d968'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='149c6638'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='5c975642'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='0155b993'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='6e8b02cb'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b96825af'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='d434b7d7'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='45b65157'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c645e10f'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8a121f49'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='de41f295'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='90421557'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='b2fbf64a'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='cc22d314'>
+      <parameter type-id='b0c1ff8d'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libnvpair_json.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32' id='8e0573fd'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <class-decl name='__mbstate_t' size-in-bits='64' is-struct='yes' naming-typedef-id='55e5b2b5' visibility='default' id='87447a2e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__count' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='__value' type-id='ac5ab595' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__' size-in-bits='32' is-anonymous='yes' visibility='default' id='ac5ab595'>
+      <data-member access='public'>
+        <var-decl name='__wch' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__wchb' type-id='8e0573fd' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='__mbstate_t' type-id='87447a2e' id='55e5b2b5'/>
+    <typedef-decl name='mbstate_t' type-id='55e5b2b5' id='3d7d8cbf'/>
+    <typedef-decl name='wchar_t' type-id='95e97e5e' id='928221d2'/>
+    <pointer-type-def type-id='3d7d8cbf' size-in-bits='64' id='a68021ce'/>
+    <qualified-type-def type-id='a68021ce' restrict='yes' id='03aaab72'/>
+    <pointer-type-def type-id='928221d2' size-in-bits='64' id='323d93c1'/>
+    <qualified-type-def type-id='323d93c1' restrict='yes' id='f1358bc3'/>
+    <function-decl name='fnvpair_value_boolean_value' mangled-name='fnvpair_value_boolean_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_boolean_value'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_byte' mangled-name='fnvpair_value_byte' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_byte'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='d8bf0010'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_int8' mangled-name='fnvpair_value_int8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_int8'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='ee31ee44'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_int16' mangled-name='fnvpair_value_int16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_int16'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='23bd8cb5'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_int32' mangled-name='fnvpair_value_int32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_int32'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='3ff5601b'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_int64' mangled-name='fnvpair_value_int64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_int64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='9da381c4'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_uint8' mangled-name='fnvpair_value_uint8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_uint8'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_uint16' mangled-name='fnvpair_value_uint16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_uint16'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_uint32' mangled-name='fnvpair_value_uint32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_uint32'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_uint64' mangled-name='fnvpair_value_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_uint64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_string' mangled-name='fnvpair_value_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_string'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_nvlist' mangled-name='fnvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fnvpair_value_nvlist'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='libspl_assertf' mangled-name='libspl_assertf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libspl_assertf'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_print_json' mangled-name='nvlist_print_json' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='nvlist_print_json'>
+      <parameter type-id='822cd80b' name='fp'/>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__ctype_get_mb_cur_max' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='mbrtowc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f1358bc3'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='03aaab72'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='nvpair_alloc_system.c' language='LANG_C99'>
+    <class-decl name='__va_list_tag' size-in-bits='192' is-struct='yes' visibility='default' id='d5027220'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='gp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='fp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='overflow_arg_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='reg_save_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nv_alloc_ops_t' type-id='8f6cc4f4' id='03e8ffd6'/>
+    <class-decl name='nv_alloc' size-in-bits='128' is-struct='yes' visibility='default' id='98213087'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nva_ops' type-id='ee1d4944' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nva_arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nv_alloc_t' type-id='98213087' id='cca08635'/>
+    <class-decl name='nv_alloc_ops' size-in-bits='320' is-struct='yes' visibility='default' id='8f6cc4f4'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nv_ao_init' type-id='76da8447' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nv_ao_fini' type-id='fe356f6f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='nv_ao_alloc' type-id='9ff7f508' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='nv_ao_free' type-id='520da3f4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='nv_ao_reset' type-id='fe356f6f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='d5027220' size-in-bits='64' id='b7f2d5e6'/>
+    <qualified-type-def type-id='03e8ffd6' const='yes' id='aca16c06'/>
+    <pointer-type-def type-id='aca16c06' size-in-bits='64' id='ee1d4944'/>
+    <pointer-type-def type-id='e9ff7293' size-in-bits='64' id='76da8447'/>
+    <pointer-type-def type-id='cca08635' size-in-bits='64' id='11871392'/>
+    <pointer-type-def type-id='51a21b4b' size-in-bits='64' id='fe356f6f'/>
+    <pointer-type-def type-id='1169c032' size-in-bits='64' id='520da3f4'/>
+    <pointer-type-def type-id='9fff962e' size-in-bits='64' id='9ff7f508'/>
+    <var-decl name='nv_alloc_nosleep' type-id='11871392' mangled-name='nv_alloc_nosleep' visibility='default' elf-symbol-id='nv_alloc_nosleep'/>
+    <var-decl name='nv_alloc_sleep_def' type-id='cca08635' mangled-name='nv_alloc_sleep_def' visibility='default' elf-symbol-id='nv_alloc_sleep_def'/>
+    <var-decl name='nv_alloc_nosleep_def' type-id='cca08635' mangled-name='nv_alloc_nosleep_def' visibility='default' elf-symbol-id='nv_alloc_nosleep_def'/>
+    <var-decl name='nv_alloc_sleep' type-id='11871392' mangled-name='nv_alloc_sleep' visibility='default' elf-symbol-id='nv_alloc_sleep'/>
+    <function-type size-in-bits='64' id='e9ff7293'>
+      <parameter type-id='11871392'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='51a21b4b'>
+      <parameter type-id='11871392'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='1169c032'>
+      <parameter type-id='11871392'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='9fff962e'>
+      <parameter type-id='11871392'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='assert.c' language='LANG_C99'>
+    <var-decl name='aok' type-id='95e97e5e' mangled-name='aok' visibility='default' elf-symbol-id='aok'/>
+    <function-decl name='vfprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='abort' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+</abi-corpus>

diff --git a/zfs/lib/libnvpair/libnvpair.suppr b/zfs/lib/libnvpair/libnvpair.suppr
new file mode 100644
index 0000000..f4db8a4
--- /dev/null
+++ b/zfs/lib/libnvpair/libnvpair.suppr

@@ -0,0 +1,2 @@
+[suppress_type]
+	name = FILE*

diff --git a/zfs/lib/libnvpair/libnvpair_json.c b/zfs/lib/libnvpair/libnvpair_json.c
index 37a3923..15b6f4a 100644
--- a/zfs/lib/libnvpair/libnvpair_json.c
+++ b/zfs/lib/libnvpair/libnvpair_json.c

@@ -54,6 +54,13 @@
 
 	FPRINTF(fp, "\"");
 	while ((sz = mbrtowc(&c, input, MB_CUR_MAX, &mbr)) > 0) {
+		if (sz == (size_t)-1 || sz == (size_t)-2) {
+			/*
+			 * We last read an invalid multibyte character sequence,
+			 * so return an error.
+			 */
+			return (-1);
+		}
 		switch (c) {
 		case '"':
 			FPRINTF(fp, "\\\"");
@@ -97,14 +104,6 @@
 		input += sz;
 	}
 
-	if (sz == (size_t)-1 || sz == (size_t)-2) {
-		/*
-		 * We last read an invalid multibyte character sequence,
-		 * so return an error.
-		 */
-		return (-1);
-	}
-
 	FPRINTF(fp, "\"");
 	return (0);
 }

diff --git a/zfs/lib/libshare/Makefile.am b/zfs/lib/libshare/Makefile.am
index 462e333..0fce333 100644
--- a/zfs/lib/libshare/Makefile.am
+++ b/zfs/lib/libshare/Makefile.am

@@ -1,19 +1,30 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+DEFAULT_INCLUDES += -I$(srcdir)
+
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 noinst_LTLIBRARIES = libshare.la
 
 USER_C = \
 	libshare_impl.h \
 	libshare.c \
-	nfs.c \
 	nfs.h \
-	smb.c \
 	smb.h
 
-nodist_libshare_la_SOURCES = $(USER_C)
+if BUILD_LINUX
+USER_C += \
+	os/linux/nfs.c \
+	os/linux/smb.c
+endif
 
-EXTRA_DIST = $(USER_C)
+if BUILD_FREEBSD
+USER_C += \
+	os/freebsd/nfs.c \
+	os/freebsd/smb.c
+endif
+
+libshare_la_SOURCES = $(USER_C)
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libshare/libshare.c b/zfs/lib/libshare/libshare.c
index 0965911..d32a282 100644
--- a/zfs/lib/libshare/libshare.c
+++ b/zfs/lib/libshare/libshare.c

@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Gunnar Beutner
+ * Copyright (c) 2018, 2020 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
@@ -29,30 +30,20 @@
 #include <errno.h>
 #include <strings.h>
 #include <libintl.h>
+#include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <libzfs.h>
 #include <libshare.h>
+#include "libzfs_impl.h"
 #include "libshare_impl.h"
 #include "nfs.h"
 #include "smb.h"
 
-static sa_share_impl_t find_share(sa_handle_impl_t handle,
-    const char *sharepath);
-static sa_share_impl_t alloc_share(const char *sharepath);
+static sa_share_impl_t alloc_share(const char *zfsname, const char *path);
 static void free_share(sa_share_impl_t share);
 
-static void parse_sharetab(sa_handle_impl_t impl_handle);
-static int process_share(sa_handle_impl_t impl_handle,
-    sa_share_impl_t impl_share, char *pathname, char *resource,
-    char *fstype, char *options, char *description,
-    char *dataset, boolean_t from_sharetab);
-static void update_sharetab(sa_handle_impl_t impl_handle);
-
-static int update_zfs_share(sa_share_impl_t impl_handle, const char *proto);
-static int update_zfs_shares(sa_handle_impl_t impl_handle, const char *proto);
-
 static int fstypes_count;
 static sa_fstype_t *fstypes;
 
@@ -78,28 +69,6 @@
 	return (fstype);
 }
 
-sa_handle_t
-sa_init(int init_service)
-{
-	sa_handle_impl_t impl_handle;
-
-	impl_handle = calloc(1, sizeof (struct sa_handle_impl));
-
-	if (impl_handle == NULL)
-		return (NULL);
-
-	impl_handle->zfs_libhandle = libzfs_init();
-
-	if (impl_handle->zfs_libhandle != NULL) {
-		libzfs_print_on_error(impl_handle->zfs_libhandle, B_TRUE);
-	}
-
-	parse_sharetab(impl_handle);
-	update_zfs_shares(impl_handle, NULL);
-
-	return ((sa_handle_t)impl_handle);
-}
-
 __attribute__((constructor)) static void
 libshare_init(void)
 {
@@ -107,448 +76,101 @@
 	libshare_smb_init();
 }
 
-static void
-parse_sharetab(sa_handle_impl_t impl_handle)
+int
+sa_enable_share(const char *zfsname, const char *mountpoint,
+    const char *shareopts, char *protocol)
 {
-	FILE *fp;
-	char line[512];
-	char *eol, *pathname, *resource, *fstype, *options, *description;
-
-	fp = fopen(ZFS_SHARETAB, "r");
-
-	if (fp == NULL)
-		return;
-
-	while (fgets(line, sizeof (line), fp) != NULL) {
-		eol = line + strlen(line) - 1;
-
-		while (eol >= line) {
-			if (*eol != '\r' && *eol != '\n')
-				break;
-
-			*eol = '\0';
-			eol--;
-		}
-
-		pathname = line;
-
-		if ((resource = strchr(pathname, '\t')) == NULL)
-			continue;
-
-		*resource = '\0';
-		resource++;
-
-		if ((fstype = strchr(resource, '\t')) == NULL)
-			continue;
-
-		*fstype = '\0';
-		fstype++;
-
-		if ((options = strchr(fstype, '\t')) == NULL)
-			continue;
-
-		*options = '\0';
-		options++;
-
-		if ((description = strchr(fstype, '\t')) != NULL) {
-			*description = '\0';
-			description++;
-		}
-
-		if (strcmp(resource, "-") == 0)
-			resource = NULL;
-
-		(void) process_share(impl_handle, NULL, pathname, resource,
-		    fstype, options, description, NULL, B_TRUE);
-	}
-
-	fclose(fp);
-}
-
-static void
-update_sharetab(sa_handle_impl_t impl_handle)
-{
-	sa_share_impl_t impl_share;
-	int temp_fd;
-	FILE *temp_fp;
-	char tempfile[] = ZFS_SHARETAB".XXXXXX";
-	sa_fstype_t *fstype;
-	const char *resource;
-
-	if (mkdir("/etc/dfs", 0755) < 0 && errno != EEXIST) {
-		return;
-	}
-
-	temp_fd = mkstemp(tempfile);
-
-	if (temp_fd < 0)
-		return;
-
-	temp_fp = fdopen(temp_fd, "w");
-
-	if (temp_fp == NULL)
-		return;
-
-	impl_share = impl_handle->shares;
-	while (impl_share != NULL) {
-		fstype = fstypes;
-		while (fstype != NULL) {
-			if (FSINFO(impl_share, fstype)->active &&
-			    FSINFO(impl_share, fstype)->shareopts != NULL) {
-				resource = FSINFO(impl_share, fstype)->resource;
-
-				if (resource == NULL)
-					resource = "-";
-
-				fprintf(temp_fp, "%s\t%s\t%s\t%s\n",
-				    impl_share->sharepath, resource,
-				    fstype->name,
-				    FSINFO(impl_share, fstype)->shareopts);
-			}
-
-			fstype = fstype->next;
-		}
-
-		impl_share = impl_share->next;
-	}
-
-	fflush(temp_fp);
-	fsync(temp_fd);
-	fclose(temp_fp);
-
-	(void) rename(tempfile, ZFS_SHARETAB);
-}
-
-typedef struct update_cookie_s {
-	sa_handle_impl_t handle;
-	const char *proto;
-} update_cookie_t;
-
-static int
-update_zfs_shares_cb(zfs_handle_t *zhp, void *pcookie)
-{
-	update_cookie_t *udata = (update_cookie_t *)pcookie;
-	char mountpoint[ZFS_MAXPROPLEN];
-	char shareopts[ZFS_MAXPROPLEN];
-	char *dataset;
-	zfs_type_t type = zfs_get_type(zhp);
-
-	if (type == ZFS_TYPE_FILESYSTEM &&
-	    zfs_iter_filesystems(zhp, update_zfs_shares_cb, pcookie) != 0) {
-		zfs_close(zhp);
-		return (1);
-	}
-
-	if (type != ZFS_TYPE_FILESYSTEM) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
-	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	dataset = (char *)zfs_get_name(zhp);
-
-	if (dataset == NULL) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	if (!zfs_is_mounted(zhp, NULL)) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	if ((udata->proto == NULL || strcmp(udata->proto, "nfs") == 0) &&
-	    zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
-	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0 &&
-	    strcmp(shareopts, "off") != 0) {
-		(void) process_share(udata->handle, NULL, mountpoint, NULL,
-		    "nfs", shareopts, NULL, dataset, B_FALSE);
-	}
-
-	if ((udata->proto == NULL || strcmp(udata->proto, "smb") == 0) &&
-	    zfs_prop_get(zhp, ZFS_PROP_SHARESMB, shareopts,
-	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0 &&
-	    strcmp(shareopts, "off") != 0) {
-		(void) process_share(udata->handle, NULL, mountpoint, NULL,
-		    "smb", shareopts, NULL, dataset, B_FALSE);
-	}
-
-	zfs_close(zhp);
-
-	return (0);
-}
-
-static int
-update_zfs_share(sa_share_impl_t impl_share, const char *proto)
-{
-	sa_handle_impl_t impl_handle = impl_share->handle;
-	zfs_handle_t *zhp;
-	update_cookie_t udata;
-
-	if (impl_handle->zfs_libhandle == NULL)
-			return (SA_SYSTEM_ERR);
-
-	assert(impl_share->dataset != NULL);
-
-	zhp = zfs_open(impl_share->handle->zfs_libhandle, impl_share->dataset,
-	    ZFS_TYPE_FILESYSTEM);
-
-	if (zhp == NULL)
-		return (SA_SYSTEM_ERR);
-
-	udata.handle = impl_handle;
-	udata.proto = proto;
-	(void) update_zfs_shares_cb(zhp, &udata);
-
-	return (SA_OK);
-}
-
-static int
-update_zfs_shares(sa_handle_impl_t impl_handle, const char *proto)
-{
-	update_cookie_t udata;
-
-	if (impl_handle->zfs_libhandle == NULL)
-		return (SA_SYSTEM_ERR);
-
-	udata.handle = impl_handle;
-	udata.proto = proto;
-	(void) zfs_iter_root(impl_handle->zfs_libhandle, update_zfs_shares_cb,
-	    &udata);
-
-	return (SA_OK);
-}
-
-static int
-process_share(sa_handle_impl_t impl_handle, sa_share_impl_t impl_share,
-    char *pathname, char *resource, char *proto,
-    char *options, char *description, char *dataset,
-    boolean_t from_sharetab)
-{
-	struct stat statbuf;
-	int rc;
-	char *resource_dup = NULL, *dataset_dup = NULL;
-	boolean_t new_share;
+	int rc, ret = SA_OK;
+	boolean_t found_protocol = B_FALSE;
 	sa_fstype_t *fstype;
 
-	new_share = B_FALSE;
-
+	sa_share_impl_t impl_share = alloc_share(zfsname, mountpoint);
 	if (impl_share == NULL)
-		impl_share = find_share(impl_handle, pathname);
-
-	if (impl_share == NULL) {
-		if (lstat(pathname, &statbuf) != 0 ||
-		    !S_ISDIR(statbuf.st_mode))
-			return (SA_BAD_PATH);
-
-		impl_share = alloc_share(pathname);
-
-		if (impl_share == NULL) {
-			rc = SA_NO_MEMORY;
-			goto err;
-		}
-
-		new_share = B_TRUE;
-	}
-
-	if (dataset != NULL) {
-		dataset_dup = strdup(dataset);
-
-		if (dataset_dup == NULL) {
-			rc = SA_NO_MEMORY;
-			goto err;
-		}
-	}
-
-	free(impl_share->dataset);
-	impl_share->dataset = dataset_dup;
-
-	rc = SA_INVALID_PROTOCOL;
+		return (SA_NO_MEMORY);
 
 	fstype = fstypes;
 	while (fstype != NULL) {
-		if (strcmp(fstype->name, proto) == 0) {
-			if (resource != NULL) {
-				resource_dup = strdup(resource);
-
-				if (resource_dup == NULL) {
-					rc = SA_NO_MEMORY;
-					goto err;
-				}
-			}
-
-			free(FSINFO(impl_share, fstype)->resource);
-			FSINFO(impl_share, fstype)->resource = resource_dup;
+		if (strcmp(fstype->name, protocol) == 0) {
 
 			rc = fstype->ops->update_shareopts(impl_share,
-			    resource, options);
+			    shareopts);
+			if (rc != SA_OK)
+				break;
 
-			if (rc == SA_OK && from_sharetab)
-				FSINFO(impl_share, fstype)->active = B_TRUE;
+			rc = fstype->ops->enable_share(impl_share);
+			if (rc != SA_OK)
+				ret = rc;
 
-			break;
+			found_protocol = B_TRUE;
 		}
 
 		fstype = fstype->next;
 	}
+	free_share(impl_share);
 
-	if (rc != SA_OK)
-		goto err;
+	return (found_protocol ? ret : SA_INVALID_PROTOCOL);
+}
 
-	if (new_share) {
-		impl_share->handle = impl_handle;
+int
+sa_disable_share(const char *mountpoint, char *protocol)
+{
+	int rc, ret = SA_OK;
+	boolean_t found_protocol = B_FALSE;
+	sa_fstype_t *fstype;
 
-		impl_share->next = impl_handle->shares;
-		impl_handle->shares = impl_share;
+	sa_share_impl_t impl_share = alloc_share(NULL, mountpoint);
+	if (impl_share == NULL)
+		return (SA_NO_MEMORY);
 
+	fstype = fstypes;
+	while (fstype != NULL) {
+		if (strcmp(fstype->name, protocol) == 0) {
+
+			rc = fstype->ops->disable_share(impl_share);
+			if (rc != SA_OK)
+				ret = rc;
+
+			found_protocol = B_TRUE;
+		}
+
+		fstype = fstype->next;
 	}
+	free_share(impl_share);
 
-err:
-	if (rc != SA_OK) {
-		if (new_share)
-			free_share(impl_share);
+	return (found_protocol ? ret : SA_INVALID_PROTOCOL);
+}
+
+boolean_t
+sa_is_shared(const char *mountpoint, char *protocol)
+{
+	sa_fstype_t *fstype;
+	boolean_t ret = B_FALSE;
+
+	/* guid value is not used */
+	sa_share_impl_t impl_share = alloc_share(NULL, mountpoint);
+	if (impl_share == NULL)
+		return (B_FALSE);
+
+	fstype = fstypes;
+	while (fstype != NULL) {
+		if (strcmp(fstype->name, protocol) == 0) {
+			ret = fstype->ops->is_shared(impl_share);
+		}
+		fstype = fstype->next;
 	}
-
-	return (rc);
+	free_share(impl_share);
+	return (ret);
 }
 
 void
-sa_fini(sa_handle_t handle)
+sa_commit_shares(const char *protocol)
 {
-	sa_handle_impl_t impl_handle = (sa_handle_impl_t)handle;
-	sa_share_impl_t impl_share, next;
-	sa_share_impl_t *pcurr;
-
-	if (impl_handle == NULL)
-		return;
-
-	/*
-	 * clean up shares which don't have a non-NULL dataset property,
-	 * which means they're in sharetab but we couldn't find their
-	 * ZFS dataset.
-	 */
-	pcurr = &(impl_handle->shares);
-	impl_share = *pcurr;
-	while (impl_share != NULL) {
-		next = impl_share->next;
-
-		if (impl_share->dataset == NULL) {
-			/* remove item from the linked list */
-			*pcurr = next;
-
-			sa_disable_share(impl_share, NULL);
-
-			free_share(impl_share);
-		} else {
-			pcurr = &(impl_share->next);
-		}
-
-		impl_share = next;
-	}
-
-	update_sharetab(impl_handle);
-
-	if (impl_handle->zfs_libhandle != NULL)
-		libzfs_fini(impl_handle->zfs_libhandle);
-
-	impl_share = impl_handle->shares;
-	while (impl_share != NULL) {
-		next = impl_share->next;
-		free_share(impl_share);
-		impl_share = next;
-	}
-
-	free(impl_handle);
-}
-
-static sa_share_impl_t
-find_share(sa_handle_impl_t impl_handle, const char *sharepath)
-{
-	sa_share_impl_t impl_share;
-
-	impl_share = impl_handle->shares;
-	while (impl_share != NULL) {
-		if (strcmp(impl_share->sharepath, sharepath) == 0) {
-			break;
-		}
-
-		impl_share = impl_share->next;
-	}
-
-	return (impl_share);
-}
-
-sa_share_t
-sa_find_share(sa_handle_t handle, char *sharepath)
-{
-	return ((sa_share_t)find_share((sa_handle_impl_t)handle, sharepath));
-}
-
-int
-sa_enable_share(sa_share_t share, char *protocol)
-{
-	sa_share_impl_t impl_share = (sa_share_impl_t)share;
-	int rc, ret = SA_OK;
-	boolean_t found_protocol = B_FALSE;
-	sa_fstype_t *fstype;
-
-	fstype = fstypes;
+	sa_fstype_t *fstype = fstypes;
 	while (fstype != NULL) {
-		if (protocol == NULL || strcmp(fstype->name, protocol) == 0) {
-			update_zfs_share(impl_share, fstype->name);
-
-			rc = fstype->ops->enable_share(impl_share);
-
-			if (rc != SA_OK)
-				ret = rc;
-			else
-				FSINFO(impl_share, fstype)->active = B_TRUE;
-
-			found_protocol = B_TRUE;
-		}
-
+		if (strcmp(fstype->name, protocol) == 0)
+			fstype->ops->commit_shares();
 		fstype = fstype->next;
 	}
-
-	update_sharetab(impl_share->handle);
-
-	return (found_protocol ? ret : SA_INVALID_PROTOCOL);
-}
-
-int
-sa_disable_share(sa_share_t share, char *protocol)
-{
-	sa_share_impl_t impl_share = (sa_share_impl_t)share;
-	int rc, ret = SA_OK;
-	boolean_t found_protocol = B_FALSE;
-	sa_fstype_t *fstype;
-
-	fstype = fstypes;
-	while (fstype != NULL) {
-		if (protocol == NULL || strcmp(fstype->name, protocol) == 0) {
-			rc = fstype->ops->disable_share(impl_share);
-
-			if (rc == SA_OK) {
-				fstype->ops->clear_shareopts(impl_share);
-
-				FSINFO(impl_share, fstype)->active = B_FALSE;
-			} else
-				ret = rc;
-
-			found_protocol = B_TRUE;
-		}
-
-		fstype = fstype->next;
-	}
-
-	update_sharetab(impl_share->handle);
-
-	return (found_protocol ? ret : SA_INVALID_PROTOCOL);
 }
 
 /*
@@ -674,7 +296,7 @@
 }
 
 int
-sa_parse_legacy_options(sa_group_t group, char *options, char *proto)
+sa_validate_shareopts(char *options, char *proto)
 {
 	sa_fstype_t *fstype;
 
@@ -691,25 +313,8 @@
 	return (SA_INVALID_PROTOCOL);
 }
 
-boolean_t
-sa_needs_refresh(sa_handle_t handle)
-{
-	return (B_TRUE);
-}
-
-libzfs_handle_t *
-sa_get_zfs_handle(sa_handle_t handle)
-{
-	sa_handle_impl_t impl_handle = (sa_handle_impl_t)handle;
-
-	if (impl_handle == NULL)
-		return (NULL);
-
-	return (impl_handle->zfs_libhandle);
-}
-
 static sa_share_impl_t
-alloc_share(const char *sharepath)
+alloc_share(const char *zfsname, const char *mountpoint)
 {
 	sa_share_impl_t impl_share;
 
@@ -718,17 +323,24 @@
 	if (impl_share == NULL)
 		return (NULL);
 
-	impl_share->sharepath = strdup(sharepath);
-
-	if (impl_share->sharepath == NULL) {
+	if (mountpoint != NULL &&
+	    ((impl_share->sa_mountpoint = strdup(mountpoint)) == NULL)) {
 		free(impl_share);
 		return (NULL);
 	}
 
-	impl_share->fsinfo = calloc(fstypes_count, sizeof (sa_share_fsinfo_t));
+	if (zfsname != NULL &&
+	    ((impl_share->sa_zfsname = strdup(zfsname)) == NULL)) {
+		free(impl_share->sa_mountpoint);
+		free(impl_share);
+		return (NULL);
+	}
 
-	if (impl_share->fsinfo == NULL) {
-		free(impl_share->sharepath);
+	impl_share->sa_fsinfo = calloc(fstypes_count,
+	    sizeof (sa_share_fsinfo_t));
+	if (impl_share->sa_fsinfo == NULL) {
+		free(impl_share->sa_mountpoint);
+		free(impl_share->sa_zfsname);
 		free(impl_share);
 		return (NULL);
 	}
@@ -744,34 +356,11 @@
 	fstype = fstypes;
 	while (fstype != NULL) {
 		fstype->ops->clear_shareopts(impl_share);
-
-		free(FSINFO(impl_share, fstype)->resource);
-
 		fstype = fstype->next;
 	}
 
-	free(impl_share->sharepath);
-	free(impl_share->dataset);
-	free(impl_share->fsinfo);
+	free(impl_share->sa_mountpoint);
+	free(impl_share->sa_zfsname);
+	free(impl_share->sa_fsinfo);
 	free(impl_share);
 }
-
-int
-sa_zfs_process_share(sa_handle_t handle, sa_group_t group, sa_share_t share,
-    char *mountpoint, char *proto, zprop_source_t source, char *shareopts,
-    char *sourcestr, char *dataset)
-{
-	sa_handle_impl_t impl_handle = (sa_handle_impl_t)handle;
-	sa_share_impl_t impl_share = (sa_share_impl_t)share;
-
-	return (process_share(impl_handle, impl_share, mountpoint, NULL,
-	    proto, shareopts, NULL, dataset, B_FALSE));
-}
-
-void
-sa_update_sharetab_ts(sa_handle_t handle)
-{
-	sa_handle_impl_t impl_handle = (sa_handle_impl_t)handle;
-
-	update_sharetab(impl_handle);
-}

diff --git a/zfs/lib/libshare/libshare_impl.h b/zfs/lib/libshare/libshare_impl.h
index 18d619b..cc5ef40 100644
--- a/zfs/lib/libshare/libshare_impl.h
+++ b/zfs/lib/libshare/libshare_impl.h

@@ -22,36 +22,32 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Gunnar Beutner
+ * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
  */
 
-struct sa_handle_impl;
-
 typedef struct sa_share_fsinfo {
-	boolean_t active;
-	char *resource;
 	char *shareopts;
 } sa_share_fsinfo_t;
 
 typedef struct sa_share_impl {
-	struct sa_share_impl *next;
+	char *sa_mountpoint;
+	char *sa_zfsname;
 
-	struct sa_handle_impl *handle;
-
-	char *sharepath;
-	char *dataset;
-
-	sa_share_fsinfo_t *fsinfo; /* per-fstype information */
+	sa_share_fsinfo_t *sa_fsinfo; /* per-fstype information */
 } *sa_share_impl_t;
 
-#define	FSINFO(impl_share, fstype) (&(impl_share->fsinfo[fstype->fsinfo_index]))
+#define	FSINFO(impl_share, fstype) \
+	(&(impl_share->sa_fsinfo[fstype->fsinfo_index]))
 
 typedef struct sa_share_ops {
 	int (*enable_share)(sa_share_impl_t share);
 	int (*disable_share)(sa_share_impl_t share);
+	boolean_t (*is_shared)(sa_share_impl_t share);
 	int (*validate_shareopts)(const char *shareopts);
 	int (*update_shareopts)(sa_share_impl_t impl_share,
-	    const char *resource, const char *shareopts);
+	    const char *shareopts);
 	void (*clear_shareopts)(sa_share_impl_t impl_share);
+	int (*commit_shares)(void);
 } sa_share_ops_t;
 
 typedef struct sa_fstype {
@@ -62,9 +58,4 @@
 	int fsinfo_index;
 } sa_fstype_t;
 
-typedef struct sa_handle_impl {
-	libzfs_handle_t *zfs_libhandle;
-	sa_share_impl_t shares;
-} *sa_handle_impl_t;
-
 sa_fstype_t *register_fstype(const char *name, const sa_share_ops_t *ops);

diff --git a/zfs/lib/libshare/nfs.c b/zfs/lib/libshare/nfs.c
deleted file mode 100644
index 7cc5ae4..0000000
--- a/zfs/lib/libshare/nfs.c
+++ /dev/null

@@ -1,747 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 Gunnar Beutner
- * Copyright (c) 2012 Cyril Plisko. All rights reserved.
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <strings.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <libzfs.h>
-#include <libshare.h>
-#include "libshare_impl.h"
-
-static boolean_t nfs_available(void);
-
-static sa_fstype_t *nfs_fstype;
-
-/*
- * nfs_exportfs_temp_fd refers to a temporary copy of the output
- * from exportfs -v.
- */
-static int nfs_exportfs_temp_fd = -1;
-
-typedef int (*nfs_shareopt_callback_t)(const char *opt, const char *value,
-    void *cookie);
-
-typedef int (*nfs_host_callback_t)(const char *sharepath, const char *host,
-    const char *security, const char *access, void *cookie);
-
-/*
- * Invokes the specified callback function for each Solaris share option
- * listed in the specified string.
- */
-static int
-foreach_nfs_shareopt(const char *shareopts,
-    nfs_shareopt_callback_t callback, void *cookie)
-{
-	char *shareopts_dup, *opt, *cur, *value;
-	int was_nul, rc;
-
-	if (shareopts == NULL)
-		return (SA_OK);
-
-	shareopts_dup = strdup(shareopts);
-
-	if (shareopts_dup == NULL)
-		return (SA_NO_MEMORY);
-
-	opt = shareopts_dup;
-	was_nul = 0;
-
-	while (1) {
-		cur = opt;
-
-		while (*cur != ',' && *cur != '\0')
-			cur++;
-
-		if (*cur == '\0')
-			was_nul = 1;
-
-		*cur = '\0';
-
-		if (cur > opt) {
-			value = strchr(opt, '=');
-
-			if (value != NULL) {
-				*value = '\0';
-				value++;
-			}
-
-			rc = callback(opt, value, cookie);
-
-			if (rc != SA_OK) {
-				free(shareopts_dup);
-				return (rc);
-			}
-		}
-
-		opt = cur + 1;
-
-		if (was_nul)
-			break;
-	}
-
-	free(shareopts_dup);
-
-	return (0);
-}
-
-typedef struct nfs_host_cookie_s {
-	nfs_host_callback_t callback;
-	const char *sharepath;
-	void *cookie;
-	const char *security;
-} nfs_host_cookie_t;
-
-/*
- * Helper function for foreach_nfs_host. This function checks whether the
- * current share option is a host specification and invokes a callback
- * function with information about the host.
- */
-static int
-foreach_nfs_host_cb(const char *opt, const char *value, void *pcookie)
-{
-	int rc;
-	const char *access;
-	char *host_dup, *host, *next;
-	nfs_host_cookie_t *udata = (nfs_host_cookie_t *)pcookie;
-
-#ifdef DEBUG
-	fprintf(stderr, "foreach_nfs_host_cb: key=%s, value=%s\n", opt, value);
-#endif
-
-	if (strcmp(opt, "sec") == 0)
-		udata->security = value;
-
-	if (strcmp(opt, "rw") == 0 || strcmp(opt, "ro") == 0) {
-		if (value == NULL)
-			value = "*";
-
-		access = opt;
-
-		host_dup = strdup(value);
-
-		if (host_dup == NULL)
-			return (SA_NO_MEMORY);
-
-		host = host_dup;
-
-		do {
-			next = strchr(host, ':');
-			if (next != NULL) {
-				*next = '\0';
-				next++;
-			}
-
-			rc = udata->callback(udata->sharepath, host,
-			    udata->security, access, udata->cookie);
-
-			if (rc != SA_OK) {
-				free(host_dup);
-
-				return (rc);
-			}
-
-			host = next;
-		} while (host != NULL);
-
-		free(host_dup);
-	}
-
-	return (SA_OK);
-}
-
-/*
- * Invokes a callback function for all NFS hosts that are set for a share.
- */
-static int
-foreach_nfs_host(sa_share_impl_t impl_share, nfs_host_callback_t callback,
-    void *cookie)
-{
-	nfs_host_cookie_t udata;
-	char *shareopts;
-
-	udata.callback = callback;
-	udata.sharepath = impl_share->sharepath;
-	udata.cookie = cookie;
-	udata.security = "sys";
-
-	shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
-
-	return foreach_nfs_shareopt(shareopts, foreach_nfs_host_cb,
-	    &udata);
-}
-
-/*
- * Converts a Solaris NFS host specification to its Linux equivalent.
- */
-static int
-get_linux_hostspec(const char *solaris_hostspec, char **plinux_hostspec)
-{
-	/*
-	 * For now we just support CIDR masks (e.g. @192.168.0.0/16) and host
-	 * wildcards (e.g. *.example.org).
-	 */
-	if (solaris_hostspec[0] == '@') {
-		/*
-		 * Solaris host specifier, e.g. @192.168.0.0/16; we just need
-		 * to skip the @ in this case
-		 */
-		*plinux_hostspec = strdup(solaris_hostspec + 1);
-	} else {
-		*plinux_hostspec = strdup(solaris_hostspec);
-	}
-
-	if (*plinux_hostspec == NULL) {
-		return (SA_NO_MEMORY);
-	}
-
-	return (SA_OK);
-}
-
-/*
- * Used internally by nfs_enable_share to enable sharing for a single host.
- */
-static int
-nfs_enable_share_one(const char *sharepath, const char *host,
-    const char *security, const char *access, void *pcookie)
-{
-	int rc;
-	char *linuxhost, *hostpath, *opts;
-	const char *linux_opts = (const char *)pcookie;
-	char *argv[6];
-
-	/* exportfs -i -o sec=XX,rX,<opts> <host>:<sharepath> */
-
-	rc = get_linux_hostspec(host, &linuxhost);
-
-	if (rc < 0)
-		exit(1);
-
-	hostpath = malloc(strlen(linuxhost) + 1 + strlen(sharepath) + 1);
-
-	if (hostpath == NULL) {
-		free(linuxhost);
-
-		exit(1);
-	}
-
-	sprintf(hostpath, "%s:%s", linuxhost, sharepath);
-
-	free(linuxhost);
-
-	if (linux_opts == NULL)
-		linux_opts = "";
-
-	opts = malloc(4 + strlen(security) + 4 + strlen(linux_opts) + 1);
-
-	if (opts == NULL)
-		exit(1);
-
-	sprintf(opts, "sec=%s,%s,%s", security, access, linux_opts);
-
-#ifdef DEBUG
-	fprintf(stderr, "sharing %s with opts %s\n", hostpath, opts);
-#endif
-
-	argv[0] = "/usr/sbin/exportfs";
-	argv[1] = "-i";
-	argv[2] = "-o";
-	argv[3] = opts;
-	argv[4] = hostpath;
-	argv[5] = NULL;
-
-	rc = libzfs_run_process(argv[0], argv, 0);
-
-	free(hostpath);
-	free(opts);
-
-	if (rc < 0)
-		return (SA_SYSTEM_ERR);
-	else
-		return (SA_OK);
-}
-
-/*
- * Adds a Linux share option to an array of NFS options.
- */
-static int
-add_linux_shareopt(char **plinux_opts, const char *key, const char *value)
-{
-	size_t len = 0;
-	char *new_linux_opts;
-
-	if (*plinux_opts != NULL)
-		len = strlen(*plinux_opts);
-
-	new_linux_opts = realloc(*plinux_opts, len + 1 + strlen(key) +
-	    (value ? 1 + strlen(value) : 0) + 1);
-
-	if (new_linux_opts == NULL)
-		return (SA_NO_MEMORY);
-
-	new_linux_opts[len] = '\0';
-
-	if (len > 0)
-		strcat(new_linux_opts, ",");
-
-	strcat(new_linux_opts, key);
-
-	if (value != NULL) {
-		strcat(new_linux_opts, "=");
-		strcat(new_linux_opts, value);
-	}
-
-	*plinux_opts = new_linux_opts;
-
-	return (SA_OK);
-}
-
-/*
- * Validates and converts a single Solaris share option to its Linux
- * equivalent.
- */
-static int
-get_linux_shareopts_cb(const char *key, const char *value, void *cookie)
-{
-	char **plinux_opts = (char **)cookie;
-
-	/* host-specific options, these are taken care of elsewhere */
-	if (strcmp(key, "ro") == 0 || strcmp(key, "rw") == 0 ||
-	    strcmp(key, "sec") == 0)
-		return (SA_OK);
-
-	if (strcmp(key, "anon") == 0)
-		key = "anonuid";
-
-	if (strcmp(key, "root_mapping") == 0) {
-		(void) add_linux_shareopt(plinux_opts, "root_squash", NULL);
-		key = "anonuid";
-	}
-
-	if (strcmp(key, "nosub") == 0)
-		key = "subtree_check";
-
-	if (strcmp(key, "insecure") != 0 && strcmp(key, "secure") != 0 &&
-	    strcmp(key, "async") != 0 && strcmp(key, "sync") != 0 &&
-	    strcmp(key, "no_wdelay") != 0 && strcmp(key, "wdelay") != 0 &&
-	    strcmp(key, "nohide") != 0 && strcmp(key, "hide") != 0 &&
-	    strcmp(key, "crossmnt") != 0 &&
-	    strcmp(key, "no_subtree_check") != 0 &&
-	    strcmp(key, "subtree_check") != 0 &&
-	    strcmp(key, "insecure_locks") != 0 &&
-	    strcmp(key, "secure_locks") != 0 &&
-	    strcmp(key, "no_auth_nlm") != 0 && strcmp(key, "auth_nlm") != 0 &&
-	    strcmp(key, "no_acl") != 0 && strcmp(key, "mountpoint") != 0 &&
-	    strcmp(key, "mp") != 0 && strcmp(key, "fsuid") != 0 &&
-	    strcmp(key, "refer") != 0 && strcmp(key, "replicas") != 0 &&
-	    strcmp(key, "root_squash") != 0 &&
-	    strcmp(key, "no_root_squash") != 0 &&
-	    strcmp(key, "all_squash") != 0 &&
-	    strcmp(key, "no_all_squash") != 0 && strcmp(key, "fsid") != 0 &&
-	    strcmp(key, "anonuid") != 0 && strcmp(key, "anongid") != 0) {
-		return (SA_SYNTAX_ERR);
-	}
-
-	(void) add_linux_shareopt(plinux_opts, key, value);
-
-	return (SA_OK);
-}
-
-/*
- * Takes a string containing Solaris share options (e.g. "sync,no_acl") and
- * converts them to a NULL-terminated array of Linux NFS options.
- */
-static int
-get_linux_shareopts(const char *shareopts, char **plinux_opts)
-{
-	int rc;
-
-	assert(plinux_opts != NULL);
-
-	*plinux_opts = NULL;
-
-	/* no_subtree_check - Default as of nfs-utils v1.1.0 */
-	(void) add_linux_shareopt(plinux_opts, "no_subtree_check", NULL);
-
-	/* mountpoint - Restrict exports to ZFS mountpoints */
-	(void) add_linux_shareopt(plinux_opts, "mountpoint", NULL);
-
-	rc = foreach_nfs_shareopt(shareopts, get_linux_shareopts_cb,
-	    plinux_opts);
-
-	if (rc != SA_OK) {
-		free(*plinux_opts);
-		*plinux_opts = NULL;
-	}
-
-	return (rc);
-}
-
-/*
- * Enables NFS sharing for the specified share.
- */
-static int
-nfs_enable_share(sa_share_impl_t impl_share)
-{
-	char *shareopts, *linux_opts;
-	int rc;
-
-	if (!nfs_available()) {
-		return (SA_SYSTEM_ERR);
-	}
-
-	shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
-
-	if (shareopts == NULL)
-		return (SA_OK);
-
-	rc = get_linux_shareopts(shareopts, &linux_opts);
-
-	if (rc != SA_OK)
-		return (rc);
-
-	rc = foreach_nfs_host(impl_share, nfs_enable_share_one, linux_opts);
-
-	free(linux_opts);
-
-	return (rc);
-}
-
-/*
- * Used internally by nfs_disable_share to disable sharing for a single host.
- */
-static int
-nfs_disable_share_one(const char *sharepath, const char *host,
-    const char *security, const char *access, void *cookie)
-{
-	int rc;
-	char *linuxhost, *hostpath;
-	char *argv[4];
-
-	rc = get_linux_hostspec(host, &linuxhost);
-
-	if (rc < 0)
-		exit(1);
-
-	hostpath = malloc(strlen(linuxhost) + 1 + strlen(sharepath) + 1);
-
-	if (hostpath == NULL) {
-		free(linuxhost);
-		exit(1);
-	}
-
-	sprintf(hostpath, "%s:%s", linuxhost, sharepath);
-
-	free(linuxhost);
-
-#ifdef DEBUG
-	fprintf(stderr, "unsharing %s\n", hostpath);
-#endif
-
-	argv[0] = "/usr/sbin/exportfs";
-	argv[1] = "-u";
-	argv[2] = hostpath;
-	argv[3] = NULL;
-
-	rc = libzfs_run_process(argv[0], argv, 0);
-
-	free(hostpath);
-
-	if (rc < 0)
-		return (SA_SYSTEM_ERR);
-	else
-		return (SA_OK);
-}
-
-/*
- * Disables NFS sharing for the specified share.
- */
-static int
-nfs_disable_share(sa_share_impl_t impl_share)
-{
-	if (!nfs_available()) {
-		/*
-		 * The share can't possibly be active, so nothing
-		 * needs to be done to disable it.
-		 */
-		return (SA_OK);
-	}
-
-	return (foreach_nfs_host(impl_share, nfs_disable_share_one, NULL));
-}
-
-/*
- * Checks whether the specified NFS share options are syntactically correct.
- */
-static int
-nfs_validate_shareopts(const char *shareopts)
-{
-	char *linux_opts;
-	int rc;
-
-	rc = get_linux_shareopts(shareopts, &linux_opts);
-
-	if (rc != SA_OK)
-		return (rc);
-
-	free(linux_opts);
-
-	return (SA_OK);
-}
-
-/*
- * Checks whether a share is currently active.
- */
-static boolean_t
-nfs_is_share_active(sa_share_impl_t impl_share)
-{
-	int fd;
-	char line[512];
-	char *tab, *cur;
-	FILE *nfs_exportfs_temp_fp;
-
-	if (!nfs_available())
-		return (B_FALSE);
-
-	if ((fd = dup(nfs_exportfs_temp_fd)) == -1)
-		return (B_FALSE);
-
-	nfs_exportfs_temp_fp = fdopen(fd, "r");
-
-	if (nfs_exportfs_temp_fp == NULL)
-		return (B_FALSE);
-
-	if (fseek(nfs_exportfs_temp_fp, 0, SEEK_SET) < 0) {
-		fclose(nfs_exportfs_temp_fp);
-		return (B_FALSE);
-	}
-
-	while (fgets(line, sizeof (line), nfs_exportfs_temp_fp) != NULL) {
-		/*
-		 * exportfs uses separate lines for the share path
-		 * and the export options when the share path is longer
-		 * than a certain amount of characters; this ignores
-		 * the option lines
-		 */
-		if (line[0] == '\t')
-			continue;
-
-		tab = strchr(line, '\t');
-
-		if (tab != NULL) {
-			*tab = '\0';
-			cur = tab - 1;
-		} else {
-			/*
-			 * there's no tab character, which means the
-			 * NFS options are on a separate line; we just
-			 * need to remove the new-line character
-			 * at the end of the line
-			 */
-			cur = line + strlen(line) - 1;
-		}
-
-		/* remove trailing spaces and new-line characters */
-		while (cur >= line && (*cur == ' ' || *cur == '\n'))
-			*cur-- = '\0';
-
-		if (strcmp(line, impl_share->sharepath) == 0) {
-			fclose(nfs_exportfs_temp_fp);
-			return (B_TRUE);
-		}
-	}
-
-	fclose(nfs_exportfs_temp_fp);
-
-	return (B_FALSE);
-}
-
-/*
- * Called to update a share's options. A share's options might be out of
- * date if the share was loaded from disk (i.e. /etc/dfs/sharetab) and the
- * "sharenfs" dataset property has changed in the meantime. This function
- * also takes care of re-enabling the share if necessary.
- */
-static int
-nfs_update_shareopts(sa_share_impl_t impl_share, const char *resource,
-    const char *shareopts)
-{
-	char *shareopts_dup;
-	boolean_t needs_reshare = B_FALSE;
-	char *old_shareopts;
-
-	FSINFO(impl_share, nfs_fstype)->active =
-	    nfs_is_share_active(impl_share);
-
-	old_shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
-
-	if (strcmp(shareopts, "on") == 0)
-		shareopts = "rw,crossmnt";
-
-	if (FSINFO(impl_share, nfs_fstype)->active && old_shareopts != NULL &&
-	    strcmp(old_shareopts, shareopts) != 0) {
-		needs_reshare = B_TRUE;
-		nfs_disable_share(impl_share);
-	}
-
-	shareopts_dup = strdup(shareopts);
-
-	if (shareopts_dup == NULL)
-		return (SA_NO_MEMORY);
-
-	if (old_shareopts != NULL)
-		free(old_shareopts);
-
-	FSINFO(impl_share, nfs_fstype)->shareopts = shareopts_dup;
-
-	if (needs_reshare)
-		nfs_enable_share(impl_share);
-
-	return (SA_OK);
-}
-
-/*
- * Clears a share's NFS options. Used by libshare to
- * clean up shares that are about to be free()'d.
- */
-static void
-nfs_clear_shareopts(sa_share_impl_t impl_share)
-{
-	free(FSINFO(impl_share, nfs_fstype)->shareopts);
-	FSINFO(impl_share, nfs_fstype)->shareopts = NULL;
-}
-
-static const sa_share_ops_t nfs_shareops = {
-	.enable_share = nfs_enable_share,
-	.disable_share = nfs_disable_share,
-
-	.validate_shareopts = nfs_validate_shareopts,
-	.update_shareopts = nfs_update_shareopts,
-	.clear_shareopts = nfs_clear_shareopts,
-};
-
-/*
- * nfs_check_exportfs() checks that the exportfs command runs
- * and also maintains a temporary copy of the output from
- * exportfs -v.
- * To update this temporary copy simply call this function again.
- *
- * TODO : Use /var/lib/nfs/etab instead of our private copy.
- *        But must implement locking to prevent concurrent access.
- *
- * TODO : The temporary file descriptor is never closed since
- *        there is no libshare_nfs_fini() function.
- */
-static int
-nfs_check_exportfs(void)
-{
-	pid_t pid;
-	int rc, status;
-	static char nfs_exportfs_tempfile[] = "/tmp/exportfs.XXXXXX";
-
-	/*
-	 * Close any existing temporary copies of output from exportfs.
-	 * We have already called unlink() so file will be deleted.
-	 */
-	if (nfs_exportfs_temp_fd >= 0)
-		close(nfs_exportfs_temp_fd);
-
-	nfs_exportfs_temp_fd = mkstemp(nfs_exportfs_tempfile);
-
-	if (nfs_exportfs_temp_fd < 0)
-		return (SA_SYSTEM_ERR);
-
-	unlink(nfs_exportfs_tempfile);
-
-	(void) fcntl(nfs_exportfs_temp_fd, F_SETFD, FD_CLOEXEC);
-
-	pid = fork();
-
-	if (pid < 0) {
-		(void) close(nfs_exportfs_temp_fd);
-		nfs_exportfs_temp_fd = -1;
-		return (SA_SYSTEM_ERR);
-	}
-
-	if (pid > 0) {
-		while ((rc = waitpid(pid, &status, 0)) <= 0 &&
-		    errno == EINTR) { }
-
-		if (rc <= 0) {
-			(void) close(nfs_exportfs_temp_fd);
-			nfs_exportfs_temp_fd = -1;
-			return (SA_SYSTEM_ERR);
-		}
-
-		if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-			(void) close(nfs_exportfs_temp_fd);
-			nfs_exportfs_temp_fd = -1;
-			return (SA_CONFIG_ERR);
-		}
-
-		return (SA_OK);
-	}
-
-	/* child */
-
-	/* exportfs -v */
-
-	if (dup2(nfs_exportfs_temp_fd, STDOUT_FILENO) < 0)
-		exit(1);
-
-	rc = execlp("/usr/sbin/exportfs", "exportfs", "-v", NULL);
-
-	if (rc < 0) {
-		exit(1);
-	}
-
-	exit(0);
-}
-
-/*
- * Provides a convenient wrapper for determining nfs availability
- */
-static boolean_t
-nfs_available(void)
-{
-	if (nfs_exportfs_temp_fd == -1)
-		(void) nfs_check_exportfs();
-
-	return ((nfs_exportfs_temp_fd != -1) ? B_TRUE : B_FALSE);
-}
-
-/*
- * Initializes the NFS functionality of libshare.
- */
-void
-libshare_nfs_init(void)
-{
-	nfs_fstype = register_fstype("nfs", &nfs_shareops);
-}

diff --git a/zfs/lib/libshare/os/freebsd/nfs.c b/zfs/lib/libshare/os/freebsd/nfs.c
new file mode 100644
index 0000000..97092bd
--- /dev/null
+++ b/zfs/lib/libshare/os/freebsd/nfs.c

@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/vfs.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libutil.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <libintl.h>
+
+#include "libzfs_impl.h"
+#include "libshare_impl.h"
+#include "nfs.h"
+
+#define	_PATH_MOUNTDPID	"/var/run/mountd.pid"
+#define	FILE_HEADER	"# !!! DO NOT EDIT THIS FILE MANUALLY !!!\n\n"
+#define	OPTSSIZE	1024
+#define	MAXLINESIZE	(PATH_MAX + OPTSSIZE)
+#define	ZFS_EXPORTS_FILE	"/etc/zfs/exports"
+#define	ZFS_EXPORTS_LOCK	ZFS_EXPORTS_FILE".lock"
+
+static sa_fstype_t *nfs_fstype;
+
+static int nfs_lock_fd = -1;
+
+/*
+ * The nfs_exports_[lock|unlock] is used to guard against conconcurrent
+ * updates to the exports file. Each protocol is responsible for
+ * providing the necessary locking to ensure consistency.
+ */
+static int
+nfs_exports_lock(void)
+{
+	int err;
+
+	nfs_lock_fd = open(ZFS_EXPORTS_LOCK,
+	    O_RDWR | O_CREAT | O_CLOEXEC, 0600);
+	if (nfs_lock_fd == -1) {
+		err = errno;
+		fprintf(stderr, "failed to lock %s: %s\n",
+		    ZFS_EXPORTS_LOCK, strerror(err));
+		return (err);
+	}
+	if (flock(nfs_lock_fd, LOCK_EX) != 0) {
+		err = errno;
+		fprintf(stderr, "failed to lock %s: %s\n",
+		    ZFS_EXPORTS_LOCK, strerror(err));
+		(void) close(nfs_lock_fd);
+		return (err);
+	}
+	return (0);
+}
+
+static void
+nfs_exports_unlock(void)
+{
+	verify(nfs_lock_fd > 0);
+
+	if (flock(nfs_lock_fd, LOCK_UN) != 0) {
+		fprintf(stderr, "failed to unlock %s: %s\n",
+		    ZFS_EXPORTS_LOCK, strerror(errno));
+	}
+	close(nfs_lock_fd);
+	nfs_lock_fd = -1;
+}
+
+/*
+ * Read one line from a file. Skip comments, empty lines and a line with a
+ * mountpoint specified in the 'skip' argument.
+ *
+ * NOTE: This function returns a static buffer and thus is not thread-safe.
+ */
+static char *
+zgetline(FILE *fd, const char *skip)
+{
+	static char line[MAXLINESIZE];
+	size_t len, skiplen = 0;
+	char *s, last;
+
+	if (skip != NULL)
+		skiplen = strlen(skip);
+	for (;;) {
+		s = fgets(line, sizeof (line), fd);
+		if (s == NULL)
+			return (NULL);
+		/* Skip empty lines and comments. */
+		if (line[0] == '\n' || line[0] == '#')
+			continue;
+		len = strlen(line);
+		if (line[len - 1] == '\n')
+			line[len - 1] = '\0';
+		last = line[skiplen];
+		/* Skip the given mountpoint. */
+		if (skip != NULL && strncmp(skip, line, skiplen) == 0 &&
+		    (last == '\t' || last == ' ' || last == '\0')) {
+			continue;
+		}
+		break;
+	}
+	return (line);
+}
+
+/*
+ * This function translate options to a format acceptable by exports(5), eg.
+ *
+ *	-ro -network=192.168.0.0 -mask=255.255.255.0 -maproot=0 \
+ *	zfs.freebsd.org 69.147.83.54
+ *
+ * Accepted input formats:
+ *
+ *	ro,network=192.168.0.0,mask=255.255.255.0,maproot=0,zfs.freebsd.org
+ *	ro network=192.168.0.0 mask=255.255.255.0 maproot=0 zfs.freebsd.org
+ *	-ro,-network=192.168.0.0,-mask=255.255.255.0,-maproot=0,zfs.freebsd.org
+ *	-ro -network=192.168.0.0 -mask=255.255.255.0 -maproot=0 \
+ *	zfs.freebsd.org
+ *
+ * Recognized keywords:
+ *
+ *	ro, maproot, mapall, mask, network, sec, alldirs, public, webnfs,
+ *	index, quiet
+ *
+ * NOTE: This function returns a static buffer and thus is not thread-safe.
+ */
+static char *
+translate_opts(const char *shareopts)
+{
+	static const char *known_opts[] = { "ro", "maproot", "mapall", "mask",
+	    "network", "sec", "alldirs", "public", "webnfs", "index", "quiet",
+	    NULL };
+	static char newopts[OPTSSIZE];
+	char oldopts[OPTSSIZE];
+	char *o, *s = NULL;
+	unsigned int i;
+	size_t len;
+
+	strlcpy(oldopts, shareopts, sizeof (oldopts));
+	newopts[0] = '\0';
+	s = oldopts;
+	while ((o = strsep(&s, "-, ")) != NULL) {
+		if (o[0] == '\0')
+			continue;
+		for (i = 0; known_opts[i] != NULL; i++) {
+			len = strlen(known_opts[i]);
+			if (strncmp(known_opts[i], o, len) == 0 &&
+			    (o[len] == '\0' || o[len] == '=')) {
+				strlcat(newopts, "-", sizeof (newopts));
+				break;
+			}
+		}
+		strlcat(newopts, o, sizeof (newopts));
+		strlcat(newopts, " ", sizeof (newopts));
+	}
+	return (newopts);
+}
+
+static char *
+nfs_init_tmpfile(void)
+{
+	char *tmpfile = NULL;
+
+	if (asprintf(&tmpfile, "%s%s", ZFS_EXPORTS_FILE, ".XXXXXXXX") == -1) {
+		fprintf(stderr, "Unable to allocate buffer for temporary "
+		    "file name\n");
+		return (NULL);
+	}
+
+	int fd = mkstemp(tmpfile);
+	if (fd == -1) {
+		fprintf(stderr, "Unable to create temporary file: %s",
+		    strerror(errno));
+		free(tmpfile);
+		return (NULL);
+	}
+	close(fd);
+	return (tmpfile);
+}
+
+static int
+nfs_fini_tmpfile(char *tmpfile)
+{
+	if (rename(tmpfile, ZFS_EXPORTS_FILE) == -1) {
+		fprintf(stderr, "Unable to rename %s: %s\n", tmpfile,
+		    strerror(errno));
+		unlink(tmpfile);
+		free(tmpfile);
+		return (SA_SYSTEM_ERR);
+	}
+	free(tmpfile);
+	return (SA_OK);
+}
+
+/*
+ * This function copies all entries from the exports file to "filename",
+ * omitting any entries for the specified mountpoint.
+ */
+static int
+nfs_copy_entries(char *filename, const char *mountpoint)
+{
+	int error = SA_OK;
+	char *line;
+
+	FILE *oldfp = fopen(ZFS_EXPORTS_FILE, "re");
+	FILE *newfp = fopen(filename, "w+e");
+	if (newfp == NULL) {
+		fprintf(stderr, "failed to open %s file: %s", filename,
+		    strerror(errno));
+		fclose(oldfp);
+		return (SA_SYSTEM_ERR);
+	}
+	fputs(FILE_HEADER, newfp);
+
+	/*
+	 * The ZFS_EXPORTS_FILE may not exist yet. If that's the
+	 * case then just write out the new file.
+	 */
+	if (oldfp != NULL) {
+		while ((line = zgetline(oldfp, mountpoint)) != NULL)
+			fprintf(newfp, "%s\n", line);
+		if (ferror(oldfp) != 0) {
+			error = ferror(oldfp);
+		}
+		if (fclose(oldfp) != 0) {
+			fprintf(stderr, "Unable to close file %s: %s\n",
+			    filename, strerror(errno));
+			error = error != 0 ? error : SA_SYSTEM_ERR;
+		}
+	}
+
+	if (error == 0 && ferror(newfp) != 0) {
+		error = ferror(newfp);
+	}
+
+	if (fclose(newfp) != 0) {
+		fprintf(stderr, "Unable to close file %s: %s\n",
+		    filename, strerror(errno));
+		error = error != 0 ? error : SA_SYSTEM_ERR;
+	}
+	return (error);
+}
+
+static int
+nfs_enable_share(sa_share_impl_t impl_share)
+{
+	char *filename = NULL;
+	int error;
+
+	if ((filename = nfs_init_tmpfile()) == NULL)
+		return (SA_SYSTEM_ERR);
+
+	error = nfs_exports_lock();
+	if (error != 0) {
+		unlink(filename);
+		free(filename);
+		return (error);
+	}
+
+	error = nfs_copy_entries(filename, impl_share->sa_mountpoint);
+	if (error != SA_OK) {
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (error);
+	}
+
+	FILE *fp = fopen(filename, "a+e");
+	if (fp == NULL) {
+		fprintf(stderr, "failed to open %s file: %s", filename,
+		    strerror(errno));
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (SA_SYSTEM_ERR);
+	}
+	char *shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
+	if (strcmp(shareopts, "on") == 0)
+		shareopts = "";
+
+	if (fprintf(fp, "%s\t%s\n", impl_share->sa_mountpoint,
+	    translate_opts(shareopts)) < 0) {
+		fprintf(stderr, "failed to write to %s\n", filename);
+		fclose(fp);
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (SA_SYSTEM_ERR);
+	}
+
+	if (fclose(fp) != 0) {
+		fprintf(stderr, "Unable to close file %s: %s\n",
+		    filename, strerror(errno));
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (SA_SYSTEM_ERR);
+	}
+	error = nfs_fini_tmpfile(filename);
+	nfs_exports_unlock();
+	return (error);
+}
+
+static int
+nfs_disable_share(sa_share_impl_t impl_share)
+{
+	int error;
+	char *filename = NULL;
+
+	if ((filename = nfs_init_tmpfile()) == NULL)
+		return (SA_SYSTEM_ERR);
+
+	error = nfs_exports_lock();
+	if (error != 0) {
+		unlink(filename);
+		free(filename);
+		return (error);
+	}
+
+	error = nfs_copy_entries(filename, impl_share->sa_mountpoint);
+	if (error != SA_OK) {
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (error);
+	}
+
+	error = nfs_fini_tmpfile(filename);
+	nfs_exports_unlock();
+	return (error);
+}
+
+static boolean_t
+nfs_is_shared(sa_share_impl_t impl_share)
+{
+	char *s, last, line[MAXLINESIZE];
+	size_t len;
+	char *mntpoint = impl_share->sa_mountpoint;
+	size_t mntlen = strlen(mntpoint);
+
+	FILE *fp = fopen(ZFS_EXPORTS_FILE, "re");
+	if (fp == NULL)
+		return (B_FALSE);
+
+	for (;;) {
+		s = fgets(line, sizeof (line), fp);
+		if (s == NULL)
+			return (B_FALSE);
+		/* Skip empty lines and comments. */
+		if (line[0] == '\n' || line[0] == '#')
+			continue;
+		len = strlen(line);
+		if (line[len - 1] == '\n')
+			line[len - 1] = '\0';
+		last = line[mntlen];
+		/* Skip the given mountpoint. */
+		if (strncmp(mntpoint, line, mntlen) == 0 &&
+		    (last == '\t' || last == ' ' || last == '\0')) {
+			fclose(fp);
+			return (B_TRUE);
+		}
+	}
+	fclose(fp);
+	return (B_FALSE);
+}
+
+static int
+nfs_validate_shareopts(const char *shareopts)
+{
+	return (SA_OK);
+}
+
+static int
+nfs_update_shareopts(sa_share_impl_t impl_share, const char *shareopts)
+{
+	FSINFO(impl_share, nfs_fstype)->shareopts = (char *)shareopts;
+	return (SA_OK);
+}
+
+static void
+nfs_clear_shareopts(sa_share_impl_t impl_share)
+{
+	FSINFO(impl_share, nfs_fstype)->shareopts = NULL;
+}
+
+/*
+ * Commit the shares by restarting mountd.
+ */
+static int
+nfs_commit_shares(void)
+{
+	struct pidfh *pfh;
+	pid_t mountdpid;
+
+start:
+	pfh = pidfile_open(_PATH_MOUNTDPID, 0600, &mountdpid);
+	if (pfh != NULL) {
+		/* mountd(8) is not running. */
+		pidfile_remove(pfh);
+		return (SA_OK);
+	}
+	if (errno != EEXIST) {
+		/* Cannot open pidfile for some reason. */
+		return (SA_SYSTEM_ERR);
+	}
+	if (mountdpid == -1) {
+		/* mountd(8) exists, but didn't write the PID yet */
+		usleep(500);
+		goto start;
+	}
+	/* We have mountd(8) PID in mountdpid variable. */
+	kill(mountdpid, SIGHUP);
+	return (SA_OK);
+}
+
+static const sa_share_ops_t nfs_shareops = {
+	.enable_share = nfs_enable_share,
+	.disable_share = nfs_disable_share,
+	.is_shared = nfs_is_shared,
+
+	.validate_shareopts = nfs_validate_shareopts,
+	.update_shareopts = nfs_update_shareopts,
+	.clear_shareopts = nfs_clear_shareopts,
+	.commit_shares = nfs_commit_shares,
+};
+
+/*
+ * Initializes the NFS functionality of libshare.
+ */
+void
+libshare_nfs_init(void)
+{
+	nfs_fstype = register_fstype("nfs", &nfs_shareops);
+}

diff --git a/zfs/lib/libshare/os/freebsd/smb.c b/zfs/lib/libshare/os/freebsd/smb.c
new file mode 100644
index 0000000..5b606ab
--- /dev/null
+++ b/zfs/lib/libshare/os/freebsd/smb.c

@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <libzfs.h>
+#include <libshare.h>
+#include "libshare_impl.h"
+#include "smb.h"
+
+static sa_fstype_t *smb_fstype;
+
+/*
+ * Enables SMB sharing for the specified share.
+ */
+static int
+smb_enable_share(sa_share_impl_t impl_share)
+{
+	fprintf(stderr, "No SMB support in FreeBSD yet.\n");
+	return (SA_NOT_SUPPORTED);
+}
+/*
+ * Disables SMB sharing for the specified share.
+ */
+static int
+smb_disable_share(sa_share_impl_t impl_share)
+{
+	fprintf(stderr, "No SMB support in FreeBSD yet.\n");
+	return (SA_NOT_SUPPORTED);
+}
+
+/*
+ * Checks whether the specified SMB share options are syntactically correct.
+ */
+static int
+smb_validate_shareopts(const char *shareopts)
+{
+	fprintf(stderr, "No SMB support in FreeBSD yet.\n");
+	return (SA_NOT_SUPPORTED);
+}
+
+/*
+ * Checks whether a share is currently active.
+ */
+static boolean_t
+smb_is_share_active(sa_share_impl_t impl_share)
+{
+	return (B_FALSE);
+}
+
+/*
+ * Called to update a share's options. A share's options might be out of
+ * date if the share was loaded from disk and the "sharesmb" dataset
+ * property has changed in the meantime. This function also takes care
+ * of re-enabling the share if necessary.
+ */
+static int
+smb_update_shareopts(sa_share_impl_t impl_share, const char *shareopts)
+{
+	return (SA_OK);
+}
+
+static int
+smb_update_shares(void)
+{
+	/* Not implemented */
+	return (0);
+}
+/*
+ * Clears a share's SMB options. Used by libshare to
+ * clean up shares that are about to be free()'d.
+ */
+static void
+smb_clear_shareopts(sa_share_impl_t impl_share)
+{
+	FSINFO(impl_share, smb_fstype)->shareopts = NULL;
+}
+
+static const sa_share_ops_t smb_shareops = {
+	.enable_share = smb_enable_share,
+	.disable_share = smb_disable_share,
+	.is_shared = smb_is_share_active,
+
+	.validate_shareopts = smb_validate_shareopts,
+	.update_shareopts = smb_update_shareopts,
+	.clear_shareopts = smb_clear_shareopts,
+	.commit_shares = smb_update_shares,
+};
+
+/*
+ * Initializes the SMB functionality of libshare.
+ */
+void
+libshare_smb_init(void)
+{
+	smb_fstype = register_fstype("smb", &smb_shareops);
+}

diff --git a/zfs/lib/libshare/os/linux/nfs.c b/zfs/lib/libshare/os/linux/nfs.c
new file mode 100644
index 0000000..a7bcbd1
--- /dev/null
+++ b/zfs/lib/libshare/os/linux/nfs.c

@@ -0,0 +1,729 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Gunnar Beutner
+ * Copyright (c) 2012 Cyril Plisko. All rights reserved.
+ * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
+ */
+
+#include <dirent.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <libzfs.h>
+#include <libshare.h>
+#include "libshare_impl.h"
+#include "nfs.h"
+
+#define	FILE_HEADER		"# !!! DO NOT EDIT THIS FILE MANUALLY !!!\n\n"
+#define	ZFS_EXPORTS_DIR		"/etc/exports.d"
+#define	ZFS_EXPORTS_FILE	ZFS_EXPORTS_DIR"/zfs.exports"
+#define	ZFS_EXPORTS_LOCK	ZFS_EXPORTS_FILE".lock"
+
+static sa_fstype_t *nfs_fstype;
+
+typedef int (*nfs_shareopt_callback_t)(const char *opt, const char *value,
+    void *cookie);
+
+typedef int (*nfs_host_callback_t)(const char *sharepath, const char *filename,
+    const char *host, const char *security, const char *access, void *cookie);
+
+static int nfs_lock_fd = -1;
+
+/*
+ * The nfs_exports_[lock|unlock] is used to guard against conconcurrent
+ * updates to the exports file. Each protocol is responsible for
+ * providing the necessary locking to ensure consistency.
+ */
+static int
+nfs_exports_lock(void)
+{
+	int err;
+
+	nfs_lock_fd = open(ZFS_EXPORTS_LOCK,
+	    O_RDWR | O_CREAT | O_CLOEXEC, 0600);
+	if (nfs_lock_fd == -1) {
+		err = errno;
+		fprintf(stderr, "failed to lock %s: %s\n",
+		    ZFS_EXPORTS_LOCK, strerror(err));
+		return (err);
+	}
+	if (flock(nfs_lock_fd, LOCK_EX) != 0) {
+		err = errno;
+		fprintf(stderr, "failed to lock %s: %s\n",
+		    ZFS_EXPORTS_LOCK, strerror(err));
+		(void) close(nfs_lock_fd);
+		return (err);
+	}
+	return (0);
+}
+
+static void
+nfs_exports_unlock(void)
+{
+	verify(nfs_lock_fd > 0);
+
+	if (flock(nfs_lock_fd, LOCK_UN) != 0) {
+		fprintf(stderr, "failed to unlock %s: %s\n",
+		    ZFS_EXPORTS_LOCK, strerror(errno));
+	}
+	close(nfs_lock_fd);
+	nfs_lock_fd = -1;
+}
+
+/*
+ * Invokes the specified callback function for each Solaris share option
+ * listed in the specified string.
+ */
+static int
+foreach_nfs_shareopt(const char *shareopts,
+    nfs_shareopt_callback_t callback, void *cookie)
+{
+	char *shareopts_dup, *opt, *cur, *value;
+	int was_nul, error;
+
+	if (shareopts == NULL)
+		return (SA_OK);
+
+	if (strcmp(shareopts, "on") == 0)
+		shareopts = "rw,crossmnt";
+
+	shareopts_dup = strdup(shareopts);
+
+
+	if (shareopts_dup == NULL)
+		return (SA_NO_MEMORY);
+
+	opt = shareopts_dup;
+	was_nul = 0;
+
+	while (1) {
+		cur = opt;
+
+		while (*cur != ',' && *cur != '\0')
+			cur++;
+
+		if (*cur == '\0')
+			was_nul = 1;
+
+		*cur = '\0';
+
+		if (cur > opt) {
+			value = strchr(opt, '=');
+
+			if (value != NULL) {
+				*value = '\0';
+				value++;
+			}
+
+			error = callback(opt, value, cookie);
+
+			if (error != SA_OK) {
+				free(shareopts_dup);
+				return (error);
+			}
+		}
+
+		opt = cur + 1;
+
+		if (was_nul)
+			break;
+	}
+
+	free(shareopts_dup);
+
+	return (SA_OK);
+}
+
+typedef struct nfs_host_cookie_s {
+	nfs_host_callback_t callback;
+	const char *sharepath;
+	void *cookie;
+	const char *filename;
+	const char *security;
+} nfs_host_cookie_t;
+
+/*
+ * Helper function for foreach_nfs_host. This function checks whether the
+ * current share option is a host specification and invokes a callback
+ * function with information about the host.
+ */
+static int
+foreach_nfs_host_cb(const char *opt, const char *value, void *pcookie)
+{
+	int error;
+	const char *access;
+	char *host_dup, *host, *next;
+	nfs_host_cookie_t *udata = (nfs_host_cookie_t *)pcookie;
+
+#ifdef DEBUG
+	fprintf(stderr, "foreach_nfs_host_cb: key=%s, value=%s\n", opt, value);
+#endif
+
+	if (strcmp(opt, "sec") == 0)
+		udata->security = value;
+
+	if (strcmp(opt, "rw") == 0 || strcmp(opt, "ro") == 0) {
+		if (value == NULL)
+			value = "*";
+
+		access = opt;
+
+		host_dup = strdup(value);
+
+		if (host_dup == NULL)
+			return (SA_NO_MEMORY);
+
+		host = host_dup;
+
+		do {
+			next = strchr(host, ':');
+			if (next != NULL) {
+				*next = '\0';
+				next++;
+			}
+
+			error = udata->callback(udata->filename,
+			    udata->sharepath, host, udata->security,
+			    access, udata->cookie);
+
+			if (error != SA_OK) {
+				free(host_dup);
+
+				return (error);
+			}
+
+			host = next;
+		} while (host != NULL);
+
+		free(host_dup);
+	}
+
+	return (SA_OK);
+}
+
+/*
+ * Invokes a callback function for all NFS hosts that are set for a share.
+ */
+static int
+foreach_nfs_host(sa_share_impl_t impl_share, char *filename,
+    nfs_host_callback_t callback, void *cookie)
+{
+	nfs_host_cookie_t udata;
+	char *shareopts;
+
+	udata.callback = callback;
+	udata.sharepath = impl_share->sa_mountpoint;
+	udata.cookie = cookie;
+	udata.filename = filename;
+	udata.security = "sys";
+
+	shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
+
+	return (foreach_nfs_shareopt(shareopts, foreach_nfs_host_cb,
+	    &udata));
+}
+
+/*
+ * Converts a Solaris NFS host specification to its Linux equivalent.
+ */
+static int
+get_linux_hostspec(const char *solaris_hostspec, char **plinux_hostspec)
+{
+	/*
+	 * For now we just support CIDR masks (e.g. @192.168.0.0/16) and host
+	 * wildcards (e.g. *.example.org).
+	 */
+	if (solaris_hostspec[0] == '@') {
+		/*
+		 * Solaris host specifier, e.g. @192.168.0.0/16; we just need
+		 * to skip the @ in this case
+		 */
+		*plinux_hostspec = strdup(solaris_hostspec + 1);
+	} else {
+		*plinux_hostspec = strdup(solaris_hostspec);
+	}
+
+	if (*plinux_hostspec == NULL) {
+		return (SA_NO_MEMORY);
+	}
+
+	return (SA_OK);
+}
+
+/*
+ * Adds a Linux share option to an array of NFS options.
+ */
+static int
+add_linux_shareopt(char **plinux_opts, const char *key, const char *value)
+{
+	size_t len = 0;
+	char *new_linux_opts;
+
+	if (*plinux_opts != NULL)
+		len = strlen(*plinux_opts);
+
+	new_linux_opts = realloc(*plinux_opts, len + 1 + strlen(key) +
+	    (value ? 1 + strlen(value) : 0) + 1);
+
+	if (new_linux_opts == NULL)
+		return (SA_NO_MEMORY);
+
+	new_linux_opts[len] = '\0';
+
+	if (len > 0)
+		strcat(new_linux_opts, ",");
+
+	strcat(new_linux_opts, key);
+
+	if (value != NULL) {
+		strcat(new_linux_opts, "=");
+		strcat(new_linux_opts, value);
+	}
+
+	*plinux_opts = new_linux_opts;
+
+	return (SA_OK);
+}
+
+/*
+ * Validates and converts a single Solaris share option to its Linux
+ * equivalent.
+ */
+static int
+get_linux_shareopts_cb(const char *key, const char *value, void *cookie)
+{
+	char **plinux_opts = (char **)cookie;
+
+	/* host-specific options, these are taken care of elsewhere */
+	if (strcmp(key, "ro") == 0 || strcmp(key, "rw") == 0 ||
+	    strcmp(key, "sec") == 0)
+		return (SA_OK);
+
+	if (strcmp(key, "anon") == 0)
+		key = "anonuid";
+
+	if (strcmp(key, "root_mapping") == 0) {
+		(void) add_linux_shareopt(plinux_opts, "root_squash", NULL);
+		key = "anonuid";
+	}
+
+	if (strcmp(key, "nosub") == 0)
+		key = "subtree_check";
+
+	if (strcmp(key, "insecure") != 0 && strcmp(key, "secure") != 0 &&
+	    strcmp(key, "async") != 0 && strcmp(key, "sync") != 0 &&
+	    strcmp(key, "no_wdelay") != 0 && strcmp(key, "wdelay") != 0 &&
+	    strcmp(key, "nohide") != 0 && strcmp(key, "hide") != 0 &&
+	    strcmp(key, "crossmnt") != 0 &&
+	    strcmp(key, "no_subtree_check") != 0 &&
+	    strcmp(key, "subtree_check") != 0 &&
+	    strcmp(key, "insecure_locks") != 0 &&
+	    strcmp(key, "secure_locks") != 0 &&
+	    strcmp(key, "no_auth_nlm") != 0 && strcmp(key, "auth_nlm") != 0 &&
+	    strcmp(key, "no_acl") != 0 && strcmp(key, "mountpoint") != 0 &&
+	    strcmp(key, "mp") != 0 && strcmp(key, "fsuid") != 0 &&
+	    strcmp(key, "refer") != 0 && strcmp(key, "replicas") != 0 &&
+	    strcmp(key, "root_squash") != 0 &&
+	    strcmp(key, "no_root_squash") != 0 &&
+	    strcmp(key, "all_squash") != 0 &&
+	    strcmp(key, "no_all_squash") != 0 && strcmp(key, "fsid") != 0 &&
+	    strcmp(key, "anonuid") != 0 && strcmp(key, "anongid") != 0) {
+		return (SA_SYNTAX_ERR);
+	}
+
+	(void) add_linux_shareopt(plinux_opts, key, value);
+
+	return (SA_OK);
+}
+
+/*
+ * Takes a string containing Solaris share options (e.g. "sync,no_acl") and
+ * converts them to a NULL-terminated array of Linux NFS options.
+ */
+static int
+get_linux_shareopts(const char *shareopts, char **plinux_opts)
+{
+	int error;
+
+	assert(plinux_opts != NULL);
+
+	*plinux_opts = NULL;
+
+	/* no_subtree_check - Default as of nfs-utils v1.1.0 */
+	(void) add_linux_shareopt(plinux_opts, "no_subtree_check", NULL);
+
+	/* mountpoint - Restrict exports to ZFS mountpoints */
+	(void) add_linux_shareopt(plinux_opts, "mountpoint", NULL);
+
+	error = foreach_nfs_shareopt(shareopts, get_linux_shareopts_cb,
+	    plinux_opts);
+
+	if (error != SA_OK) {
+		free(*plinux_opts);
+		*plinux_opts = NULL;
+	}
+
+	return (error);
+}
+
+static char *
+nfs_init_tmpfile(void)
+{
+	char *tmpfile = NULL;
+	struct stat sb;
+
+	if (stat(ZFS_EXPORTS_DIR, &sb) < 0 &&
+	    mkdir(ZFS_EXPORTS_DIR, 0755) < 0) {
+		fprintf(stderr, "failed to create %s: %s\n",
+		    ZFS_EXPORTS_DIR, strerror(errno));
+		return (NULL);
+	}
+
+	if (asprintf(&tmpfile, "%s%s", ZFS_EXPORTS_FILE, ".XXXXXXXX") == -1) {
+		fprintf(stderr, "Unable to allocate temporary file\n");
+		return (NULL);
+	}
+
+	int fd = mkstemp(tmpfile);
+	if (fd == -1) {
+		fprintf(stderr, "Unable to create temporary file: %s",
+		    strerror(errno));
+		free(tmpfile);
+		return (NULL);
+	}
+	close(fd);
+	return (tmpfile);
+}
+
+static int
+nfs_fini_tmpfile(char *tmpfile)
+{
+	if (rename(tmpfile, ZFS_EXPORTS_FILE) == -1) {
+		fprintf(stderr, "Unable to rename %s: %s\n", tmpfile,
+		    strerror(errno));
+		unlink(tmpfile);
+		free(tmpfile);
+		return (SA_SYSTEM_ERR);
+	}
+	free(tmpfile);
+	return (SA_OK);
+}
+
+/*
+ * This function populates an entry into /etc/exports.d/zfs.exports.
+ * This file is consumed by the linux nfs server so that zfs shares are
+ * automatically exported upon boot or whenever the nfs server restarts.
+ */
+static int
+nfs_add_entry(const char *filename, const char *sharepath,
+    const char *host, const char *security, const char *access_opts,
+    void *pcookie)
+{
+	int error;
+	char *linuxhost;
+	const char *linux_opts = (const char *)pcookie;
+
+	error = get_linux_hostspec(host, &linuxhost);
+	if (error != SA_OK)
+		return (error);
+
+	if (linux_opts == NULL)
+		linux_opts = "";
+
+	FILE *fp = fopen(filename, "a+e");
+	if (fp == NULL) {
+		fprintf(stderr, "failed to open %s file: %s", filename,
+		    strerror(errno));
+		free(linuxhost);
+		return (SA_SYSTEM_ERR);
+	}
+
+	if (fprintf(fp, "%s %s(sec=%s,%s,%s)\n", sharepath, linuxhost,
+	    security, access_opts, linux_opts) < 0) {
+		fprintf(stderr, "failed to write to %s\n", filename);
+		free(linuxhost);
+		fclose(fp);
+		return (SA_SYSTEM_ERR);
+	}
+
+	free(linuxhost);
+	if (fclose(fp) != 0) {
+		fprintf(stderr, "Unable to close file %s: %s\n",
+		    filename, strerror(errno));
+		return (SA_SYSTEM_ERR);
+	}
+	return (SA_OK);
+}
+
+/*
+ * This function copies all entries from the exports file to "filename",
+ * omitting any entries for the specified mountpoint.
+ */
+static int
+nfs_copy_entries(char *filename, const char *mountpoint)
+{
+	char *buf = NULL;
+	size_t buflen = 0;
+	int error = SA_OK;
+
+	FILE *oldfp = fopen(ZFS_EXPORTS_FILE, "re");
+	FILE *newfp = fopen(filename, "w+e");
+	if (newfp == NULL) {
+		fprintf(stderr, "failed to open %s file: %s", filename,
+		    strerror(errno));
+		fclose(oldfp);
+		return (SA_SYSTEM_ERR);
+	}
+	fputs(FILE_HEADER, newfp);
+
+	/*
+	 * The ZFS_EXPORTS_FILE may not exist yet. If that's the
+	 * case then just write out the new file.
+	 */
+	if (oldfp != NULL) {
+		while (getline(&buf, &buflen, oldfp) != -1) {
+			char *space = NULL;
+
+			if (buf[0] == '\n' || buf[0] == '#')
+				continue;
+
+			if ((space = strchr(buf, ' ')) != NULL) {
+				int mountpoint_len = strlen(mountpoint);
+
+				if (space - buf == mountpoint_len &&
+				    strncmp(mountpoint, buf,
+				    mountpoint_len) == 0) {
+					continue;
+				}
+			}
+			fputs(buf, newfp);
+		}
+
+		if (ferror(oldfp) != 0) {
+			error = ferror(oldfp);
+		}
+		if (fclose(oldfp) != 0) {
+			fprintf(stderr, "Unable to close file %s: %s\n",
+			    filename, strerror(errno));
+			error = error != 0 ? error : SA_SYSTEM_ERR;
+		}
+	}
+
+	if (error == 0 && ferror(newfp) != 0) {
+		error = ferror(newfp);
+	}
+
+	free(buf);
+	if (fclose(newfp) != 0) {
+		fprintf(stderr, "Unable to close file %s: %s\n",
+		    filename, strerror(errno));
+		error = error != 0 ? error : SA_SYSTEM_ERR;
+	}
+	return (error);
+}
+
+/*
+ * Enables NFS sharing for the specified share.
+ */
+static int
+nfs_enable_share(sa_share_impl_t impl_share)
+{
+	char *shareopts, *linux_opts;
+	char *filename = NULL;
+	int error;
+
+	if ((filename = nfs_init_tmpfile()) == NULL)
+		return (SA_SYSTEM_ERR);
+
+	error = nfs_exports_lock();
+	if (error != 0) {
+		unlink(filename);
+		free(filename);
+		return (error);
+	}
+
+	error = nfs_copy_entries(filename, impl_share->sa_mountpoint);
+	if (error != SA_OK) {
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (error);
+	}
+
+	shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
+	error = get_linux_shareopts(shareopts, &linux_opts);
+	if (error != SA_OK) {
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (error);
+	}
+
+	error = foreach_nfs_host(impl_share, filename, nfs_add_entry,
+	    linux_opts);
+	free(linux_opts);
+	if (error == 0) {
+		error = nfs_fini_tmpfile(filename);
+	} else {
+		unlink(filename);
+		free(filename);
+	}
+	nfs_exports_unlock();
+	return (error);
+}
+
+/*
+ * Disables NFS sharing for the specified share.
+ */
+static int
+nfs_disable_share(sa_share_impl_t impl_share)
+{
+	int error;
+	char *filename = NULL;
+
+	if ((filename = nfs_init_tmpfile()) == NULL)
+		return (SA_SYSTEM_ERR);
+
+	error = nfs_exports_lock();
+	if (error != 0) {
+		unlink(filename);
+		free(filename);
+		return (error);
+	}
+
+	error = nfs_copy_entries(filename, impl_share->sa_mountpoint);
+	if (error != SA_OK) {
+		unlink(filename);
+		free(filename);
+		nfs_exports_unlock();
+		return (error);
+	}
+	error = nfs_fini_tmpfile(filename);
+	nfs_exports_unlock();
+	return (error);
+}
+
+static boolean_t
+nfs_is_shared(sa_share_impl_t impl_share)
+{
+	size_t buflen = 0;
+	char *buf = NULL;
+
+	FILE *fp = fopen(ZFS_EXPORTS_FILE, "re");
+	if (fp == NULL) {
+		return (B_FALSE);
+	}
+	while ((getline(&buf, &buflen, fp)) != -1) {
+		char *space = NULL;
+
+		if ((space = strchr(buf, ' ')) != NULL) {
+			int mountpoint_len = strlen(impl_share->sa_mountpoint);
+
+			if (space - buf == mountpoint_len &&
+			    strncmp(impl_share->sa_mountpoint, buf,
+			    mountpoint_len) == 0) {
+				fclose(fp);
+				free(buf);
+				return (B_TRUE);
+			}
+		}
+	}
+	free(buf);
+	fclose(fp);
+	return (B_FALSE);
+}
+
+/*
+ * Checks whether the specified NFS share options are syntactically correct.
+ */
+static int
+nfs_validate_shareopts(const char *shareopts)
+{
+	char *linux_opts;
+	int error;
+
+	error = get_linux_shareopts(shareopts, &linux_opts);
+
+	if (error != SA_OK)
+		return (error);
+
+	free(linux_opts);
+	return (SA_OK);
+}
+
+static int
+nfs_update_shareopts(sa_share_impl_t impl_share, const char *shareopts)
+{
+	FSINFO(impl_share, nfs_fstype)->shareopts = (char *)shareopts;
+	return (SA_OK);
+}
+
+/*
+ * Clears a share's NFS options. Used by libshare to
+ * clean up shares that are about to be free()'d.
+ */
+static void
+nfs_clear_shareopts(sa_share_impl_t impl_share)
+{
+	FSINFO(impl_share, nfs_fstype)->shareopts = NULL;
+}
+
+static int
+nfs_commit_shares(void)
+{
+	char *argv[] = {
+	    "/usr/sbin/exportfs",
+	    "-ra",
+	    NULL
+	};
+
+	return (libzfs_run_process(argv[0], argv, 0));
+}
+
+static const sa_share_ops_t nfs_shareops = {
+	.enable_share = nfs_enable_share,
+	.disable_share = nfs_disable_share,
+	.is_shared = nfs_is_shared,
+
+	.validate_shareopts = nfs_validate_shareopts,
+	.update_shareopts = nfs_update_shareopts,
+	.clear_shareopts = nfs_clear_shareopts,
+	.commit_shares = nfs_commit_shares,
+};
+
+/*
+ * Initializes the NFS functionality of libshare.
+ */
+void
+libshare_nfs_init(void)
+{
+	nfs_fstype = register_fstype("nfs", &nfs_shareops);
+}

diff --git a/zfs/lib/libshare/os/linux/smb.c b/zfs/lib/libshare/os/linux/smb.c
new file mode 100644
index 0000000..45811ff
--- /dev/null
+++ b/zfs/lib/libshare/os/linux/smb.c

@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011,2012 Turbo Fredriksson <turbo@bayour.com>, based on nfs.c
+ *                         by Gunnar Beutner
+ * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
+ *
+ * This is an addition to the zfs device driver to add, modify and remove SMB
+ * shares using the 'net share' command that comes with Samba.
+ *
+ * TESTING
+ * Make sure that samba listens to 'localhost' (127.0.0.1) and that the options
+ * 'usershare max shares' and 'usershare owner only' have been reviewed/set
+ * accordingly (see zfs(8) for information).
+ *
+ * Once configuration in samba have been done, test that this
+ * works with the following three commands (in this case, my ZFS
+ * filesystem is called 'share/Test1'):
+ *
+ *	(root)# net -U root -S 127.0.0.1 usershare add Test1 /share/Test1 \
+ *		"Comment: /share/Test1" "Everyone:F"
+ *	(root)# net usershare list | grep -i test
+ *	(root)# net -U root -S 127.0.0.1 usershare delete Test1
+ *
+ * The first command will create a user share that gives everyone full access.
+ * To limit the access below that, use normal UNIX commands (chmod, chown etc).
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <libzfs.h>
+#include <libshare.h>
+#include "libshare_impl.h"
+#include "smb.h"
+
+static boolean_t smb_available(void);
+
+static sa_fstype_t *smb_fstype;
+
+smb_share_t *smb_shares;
+static int smb_disable_share(sa_share_impl_t impl_share);
+static boolean_t smb_is_share_active(sa_share_impl_t impl_share);
+
+/*
+ * Retrieve the list of SMB shares.
+ */
+static int
+smb_retrieve_shares(void)
+{
+	int rc = SA_OK;
+	char file_path[PATH_MAX], line[512], *token, *key, *value;
+	char *dup_value = NULL, *path = NULL, *comment = NULL, *name = NULL;
+	char *guest_ok = NULL;
+	DIR *shares_dir;
+	FILE *share_file_fp = NULL;
+	struct dirent *directory;
+	struct stat eStat;
+	smb_share_t *shares, *new_shares = NULL;
+
+	/* opendir(), stat() */
+	shares_dir = opendir(SHARE_DIR);
+	if (shares_dir == NULL)
+		return (SA_SYSTEM_ERR);
+
+	/* Go through the directory, looking for shares */
+	while ((directory = readdir(shares_dir))) {
+		if (directory->d_name[0] == '.')
+			continue;
+
+		snprintf(file_path, sizeof (file_path),
+		    "%s/%s", SHARE_DIR, directory->d_name);
+
+		if (stat(file_path, &eStat) == -1) {
+			rc = SA_SYSTEM_ERR;
+			goto out;
+		}
+
+		if (!S_ISREG(eStat.st_mode))
+			continue;
+
+		if ((share_file_fp = fopen(file_path, "re")) == NULL) {
+			rc = SA_SYSTEM_ERR;
+			goto out;
+		}
+
+		name = strdup(directory->d_name);
+		if (name == NULL) {
+			rc = SA_NO_MEMORY;
+			goto out;
+		}
+
+		while (fgets(line, sizeof (line), share_file_fp)) {
+			if (line[0] == '#')
+				continue;
+
+			/* Trim trailing new-line character(s). */
+			while (line[strlen(line) - 1] == '\r' ||
+			    line[strlen(line) - 1] == '\n')
+				line[strlen(line) - 1] = '\0';
+
+			/* Split the line in two, separated by '=' */
+			token = strchr(line, '=');
+			if (token == NULL)
+				continue;
+
+			key = line;
+			value = token + 1;
+			*token = '\0';
+
+			dup_value = strdup(value);
+			if (dup_value == NULL) {
+				rc = SA_NO_MEMORY;
+				goto out;
+			}
+
+			if (strcmp(key, "path") == 0) {
+				free(path);
+				path = dup_value;
+			} else if (strcmp(key, "comment") == 0) {
+				free(comment);
+				comment = dup_value;
+			} else if (strcmp(key, "guest_ok") == 0) {
+				free(guest_ok);
+				guest_ok = dup_value;
+			} else
+				free(dup_value);
+
+			dup_value = NULL;
+
+			if (path == NULL || comment == NULL || guest_ok == NULL)
+				continue; /* Incomplete share definition */
+			else {
+				shares = (smb_share_t *)
+				    malloc(sizeof (smb_share_t));
+				if (shares == NULL) {
+					rc = SA_NO_MEMORY;
+					goto out;
+				}
+
+				(void) strlcpy(shares->name, name,
+				    sizeof (shares->name));
+
+				(void) strlcpy(shares->path, path,
+				    sizeof (shares->path));
+
+				(void) strlcpy(shares->comment, comment,
+				    sizeof (shares->comment));
+
+				shares->guest_ok = atoi(guest_ok);
+
+				shares->next = new_shares;
+				new_shares = shares;
+
+				free(path);
+				free(comment);
+				free(guest_ok);
+
+				path = NULL;
+				comment = NULL;
+				guest_ok = NULL;
+			}
+		}
+
+out:
+		if (share_file_fp != NULL) {
+			fclose(share_file_fp);
+			share_file_fp = NULL;
+		}
+
+		free(name);
+		free(path);
+		free(comment);
+		free(guest_ok);
+
+		name = NULL;
+		path = NULL;
+		comment = NULL;
+		guest_ok = NULL;
+	}
+	closedir(shares_dir);
+
+	smb_shares = new_shares;
+
+	return (rc);
+}
+
+/*
+ * Used internally by smb_enable_share to enable sharing for a single host.
+ */
+static int
+smb_enable_share_one(const char *sharename, const char *sharepath)
+{
+	char *argv[10], *pos;
+	char name[SMB_NAME_MAX], comment[SMB_COMMENT_MAX];
+	int rc;
+
+	/* Support ZFS share name regexp '[[:alnum:]_-.: ]' */
+	strlcpy(name, sharename, sizeof (name));
+	name [sizeof (name)-1] = '\0';
+
+	pos = name;
+	while (*pos != '\0') {
+		switch (*pos) {
+		case '/':
+		case '-':
+		case ':':
+		case ' ':
+			*pos = '_';
+		}
+
+		++pos;
+	}
+
+	/*
+	 * CMD: net -S NET_CMD_ARG_HOST usershare add Test1 /share/Test1 \
+	 *      "Comment" "Everyone:F"
+	 */
+	snprintf(comment, sizeof (comment), "Comment: %s", sharepath);
+
+	argv[0] = NET_CMD_PATH;
+	argv[1] = (char *)"-S";
+	argv[2] = NET_CMD_ARG_HOST;
+	argv[3] = (char *)"usershare";
+	argv[4] = (char *)"add";
+	argv[5] = (char *)name;
+	argv[6] = (char *)sharepath;
+	argv[7] = (char *)comment;
+	argv[8] = "Everyone:F";
+	argv[9] = NULL;
+
+	rc = libzfs_run_process(argv[0], argv, 0);
+	if (rc < 0)
+		return (SA_SYSTEM_ERR);
+
+	/* Reload the share file */
+	(void) smb_retrieve_shares();
+
+	return (SA_OK);
+}
+
+/*
+ * Enables SMB sharing for the specified share.
+ */
+static int
+smb_enable_share(sa_share_impl_t impl_share)
+{
+	char *shareopts;
+
+	if (!smb_available())
+		return (SA_SYSTEM_ERR);
+
+	if (smb_is_share_active(impl_share))
+		smb_disable_share(impl_share);
+
+	shareopts = FSINFO(impl_share, smb_fstype)->shareopts;
+	if (shareopts == NULL) /* on/off */
+		return (SA_SYSTEM_ERR);
+
+	if (strcmp(shareopts, "off") == 0)
+		return (SA_OK);
+
+	/* Magic: Enable (i.e., 'create new') share */
+	return (smb_enable_share_one(impl_share->sa_zfsname,
+	    impl_share->sa_mountpoint));
+}
+
+/*
+ * Used internally by smb_disable_share to disable sharing for a single host.
+ */
+static int
+smb_disable_share_one(const char *sharename)
+{
+	int rc;
+	char *argv[7];
+
+	/* CMD: net -S NET_CMD_ARG_HOST usershare delete Test1 */
+	argv[0] = NET_CMD_PATH;
+	argv[1] = (char *)"-S";
+	argv[2] = NET_CMD_ARG_HOST;
+	argv[3] = (char *)"usershare";
+	argv[4] = (char *)"delete";
+	argv[5] = strdup(sharename);
+	argv[6] = NULL;
+
+	rc = libzfs_run_process(argv[0], argv, 0);
+	if (rc < 0)
+		return (SA_SYSTEM_ERR);
+	else
+		return (SA_OK);
+}
+
+/*
+ * Disables SMB sharing for the specified share.
+ */
+static int
+smb_disable_share(sa_share_impl_t impl_share)
+{
+	smb_share_t *shares = smb_shares;
+
+	if (!smb_available()) {
+		/*
+		 * The share can't possibly be active, so nothing
+		 * needs to be done to disable it.
+		 */
+		return (SA_OK);
+	}
+
+	while (shares != NULL) {
+		if (strcmp(impl_share->sa_mountpoint, shares->path) == 0)
+			return (smb_disable_share_one(shares->name));
+
+		shares = shares->next;
+	}
+
+	return (SA_OK);
+}
+
+/*
+ * Checks whether the specified SMB share options are syntactically correct.
+ */
+static int
+smb_validate_shareopts(const char *shareopts)
+{
+	/* TODO: Accept 'name' and sec/acl (?) */
+	if ((strcmp(shareopts, "off") == 0) || (strcmp(shareopts, "on") == 0))
+		return (SA_OK);
+
+	return (SA_SYNTAX_ERR);
+}
+
+/*
+ * Checks whether a share is currently active.
+ */
+static boolean_t
+smb_is_share_active(sa_share_impl_t impl_share)
+{
+	smb_share_t *iter = smb_shares;
+
+	if (!smb_available())
+		return (B_FALSE);
+
+	/* Retrieve the list of (possible) active shares */
+	smb_retrieve_shares();
+
+	while (iter != NULL) {
+		if (strcmp(impl_share->sa_mountpoint, iter->path) == 0)
+			return (B_TRUE);
+
+		iter = iter->next;
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Called to update a share's options. A share's options might be out of
+ * date if the share was loaded from disk and the "sharesmb" dataset
+ * property has changed in the meantime. This function also takes care
+ * of re-enabling the share if necessary.
+ */
+static int
+smb_update_shareopts(sa_share_impl_t impl_share, const char *shareopts)
+{
+	if (!impl_share)
+		return (SA_SYSTEM_ERR);
+
+	FSINFO(impl_share, smb_fstype)->shareopts = (char *)shareopts;
+	return (SA_OK);
+}
+
+static int
+smb_update_shares(void)
+{
+	/* Not implemented */
+	return (0);
+}
+
+/*
+ * Clears a share's SMB options. Used by libshare to
+ * clean up shares that are about to be free()'d.
+ */
+static void
+smb_clear_shareopts(sa_share_impl_t impl_share)
+{
+	FSINFO(impl_share, smb_fstype)->shareopts = NULL;
+}
+
+static const sa_share_ops_t smb_shareops = {
+	.enable_share = smb_enable_share,
+	.disable_share = smb_disable_share,
+	.is_shared = smb_is_share_active,
+
+	.validate_shareopts = smb_validate_shareopts,
+	.update_shareopts = smb_update_shareopts,
+	.clear_shareopts = smb_clear_shareopts,
+	.commit_shares = smb_update_shares,
+};
+
+/*
+ * Provides a convenient wrapper for determining SMB availability
+ */
+static boolean_t
+smb_available(void)
+{
+	struct stat statbuf;
+
+	if (lstat(SHARE_DIR, &statbuf) != 0 ||
+	    !S_ISDIR(statbuf.st_mode))
+		return (B_FALSE);
+
+	if (access(NET_CMD_PATH, F_OK) != 0)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * Initializes the SMB functionality of libshare.
+ */
+void
+libshare_smb_init(void)
+{
+	smb_fstype = register_fstype("smb", &smb_shareops);
+}

diff --git a/zfs/lib/libshare/smb.c b/zfs/lib/libshare/smb.c
deleted file mode 100644
index f567f7c..0000000
--- a/zfs/lib/libshare/smb.c
+++ /dev/null

@@ -1,467 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011,2012 Turbo Fredriksson <turbo@bayour.com>, based on nfs.c
- *                         by Gunnar Beutner
- *
- * This is an addition to the zfs device driver to add, modify and remove SMB
- * shares using the 'net share' command that comes with Samba.
- *
- * TESTING
- * Make sure that samba listens to 'localhost' (127.0.0.1) and that the options
- * 'usershare max shares' and 'usershare owner only' have been reviewed/set
- * accordingly (see zfs(8) for information).
- *
- * Once configuration in samba have been done, test that this
- * works with the following three commands (in this case, my ZFS
- * filesystem is called 'share/Test1'):
- *
- *	(root)# net -U root -S 127.0.0.1 usershare add Test1 /share/Test1 \
- *		"Comment: /share/Test1" "Everyone:F"
- *	(root)# net usershare list | grep -i test
- *	(root)# net -U root -S 127.0.0.1 usershare delete Test1
- *
- * The first command will create a user share that gives everyone full access.
- * To limit the access below that, use normal UNIX commands (chmod, chown etc).
- */
-
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <strings.h>
-#include <fcntl.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <libzfs.h>
-#include <libshare.h>
-#include "libshare_impl.h"
-#include "smb.h"
-
-static boolean_t smb_available(void);
-
-static sa_fstype_t *smb_fstype;
-
-smb_share_t *smb_shares;
-
-/*
- * Retrieve the list of SMB shares.
- */
-static int
-smb_retrieve_shares(void)
-{
-	int rc = SA_OK;
-	char file_path[PATH_MAX], line[512], *token, *key, *value;
-	char *dup_value = NULL, *path = NULL, *comment = NULL, *name = NULL;
-	char *guest_ok = NULL;
-	DIR *shares_dir;
-	FILE *share_file_fp = NULL;
-	struct dirent *directory;
-	struct stat eStat;
-	smb_share_t *shares, *new_shares = NULL;
-
-	/* opendir(), stat() */
-	shares_dir = opendir(SHARE_DIR);
-	if (shares_dir == NULL)
-		return (SA_SYSTEM_ERR);
-
-	/* Go through the directory, looking for shares */
-	while ((directory = readdir(shares_dir))) {
-		if (directory->d_name[0] == '.')
-			continue;
-
-		snprintf(file_path, sizeof (file_path),
-		    "%s/%s", SHARE_DIR, directory->d_name);
-
-		if (stat(file_path, &eStat) == -1) {
-			rc = SA_SYSTEM_ERR;
-			goto out;
-		}
-
-		if (!S_ISREG(eStat.st_mode))
-			continue;
-
-		if ((share_file_fp = fopen(file_path, "r")) == NULL) {
-			rc = SA_SYSTEM_ERR;
-			goto out;
-		}
-
-		name = strdup(directory->d_name);
-		if (name == NULL) {
-			rc = SA_NO_MEMORY;
-			goto out;
-		}
-
-		while (fgets(line, sizeof (line), share_file_fp)) {
-			if (line[0] == '#')
-				continue;
-
-			/* Trim trailing new-line character(s). */
-			while (line[strlen(line) - 1] == '\r' ||
-			    line[strlen(line) - 1] == '\n')
-				line[strlen(line) - 1] = '\0';
-
-			/* Split the line in two, separated by '=' */
-			token = strchr(line, '=');
-			if (token == NULL)
-				continue;
-
-			key = line;
-			value = token + 1;
-			*token = '\0';
-
-			dup_value = strdup(value);
-			if (dup_value == NULL) {
-				rc = SA_NO_MEMORY;
-				goto out;
-			}
-
-			if (strcmp(key, "path") == 0) {
-				free(path);
-				path = dup_value;
-			} else if (strcmp(key, "comment") == 0) {
-				free(comment);
-				comment = dup_value;
-			} else if (strcmp(key, "guest_ok") == 0) {
-				free(guest_ok);
-				guest_ok = dup_value;
-			} else
-				free(dup_value);
-
-			dup_value = NULL;
-
-			if (path == NULL || comment == NULL || guest_ok == NULL)
-				continue; /* Incomplete share definition */
-			else {
-				shares = (smb_share_t *)
-				    malloc(sizeof (smb_share_t));
-				if (shares == NULL) {
-					rc = SA_NO_MEMORY;
-					goto out;
-				}
-
-				(void) strlcpy(shares->name, name,
-				    sizeof (shares->name));
-
-				(void) strlcpy(shares->path, path,
-				    sizeof (shares->path));
-
-				(void) strlcpy(shares->comment, comment,
-				    sizeof (shares->comment));
-
-				shares->guest_ok = atoi(guest_ok);
-
-				shares->next = new_shares;
-				new_shares = shares;
-
-				free(path);
-				free(comment);
-				free(guest_ok);
-
-				path = NULL;
-				comment = NULL;
-				guest_ok = NULL;
-			}
-		}
-
-out:
-		if (share_file_fp != NULL) {
-			fclose(share_file_fp);
-			share_file_fp = NULL;
-		}
-
-		free(name);
-		free(path);
-		free(comment);
-		free(guest_ok);
-
-		name = NULL;
-		path = NULL;
-		comment = NULL;
-		guest_ok = NULL;
-	}
-	closedir(shares_dir);
-
-	smb_shares = new_shares;
-
-	return (rc);
-}
-
-/*
- * Used internally by smb_enable_share to enable sharing for a single host.
- */
-static int
-smb_enable_share_one(const char *sharename, const char *sharepath)
-{
-	char *argv[10], *pos;
-	char name[SMB_NAME_MAX], comment[SMB_COMMENT_MAX];
-	int rc;
-
-	/* Support ZFS share name regexp '[[:alnum:]_-.: ]' */
-	strlcpy(name, sharename, sizeof (name));
-	name [sizeof (name)-1] = '\0';
-
-	pos = name;
-	while (*pos != '\0') {
-		switch (*pos) {
-		case '/':
-		case '-':
-		case ':':
-		case ' ':
-			*pos = '_';
-		}
-
-		++pos;
-	}
-
-	/*
-	 * CMD: net -S NET_CMD_ARG_HOST usershare add Test1 /share/Test1 \
-	 *      "Comment" "Everyone:F"
-	 */
-	snprintf(comment, sizeof (comment), "Comment: %s", sharepath);
-
-	argv[0] = NET_CMD_PATH;
-	argv[1] = (char *)"-S";
-	argv[2] = NET_CMD_ARG_HOST;
-	argv[3] = (char *)"usershare";
-	argv[4] = (char *)"add";
-	argv[5] = (char *)name;
-	argv[6] = (char *)sharepath;
-	argv[7] = (char *)comment;
-	argv[8] = "Everyone:F";
-	argv[9] = NULL;
-
-	rc = libzfs_run_process(argv[0], argv, 0);
-	if (rc < 0)
-		return (SA_SYSTEM_ERR);
-
-	/* Reload the share file */
-	(void) smb_retrieve_shares();
-
-	return (SA_OK);
-}
-
-/*
- * Enables SMB sharing for the specified share.
- */
-static int
-smb_enable_share(sa_share_impl_t impl_share)
-{
-	char *shareopts;
-
-	if (!smb_available())
-		return (SA_SYSTEM_ERR);
-
-	shareopts = FSINFO(impl_share, smb_fstype)->shareopts;
-	if (shareopts == NULL) /* on/off */
-		return (SA_SYSTEM_ERR);
-
-	if (strcmp(shareopts, "off") == 0)
-		return (SA_OK);
-
-	/* Magic: Enable (i.e., 'create new') share */
-	return (smb_enable_share_one(impl_share->dataset,
-	    impl_share->sharepath));
-}
-
-/*
- * Used internally by smb_disable_share to disable sharing for a single host.
- */
-static int
-smb_disable_share_one(const char *sharename)
-{
-	int rc;
-	char *argv[7];
-
-	/* CMD: net -S NET_CMD_ARG_HOST usershare delete Test1 */
-	argv[0] = NET_CMD_PATH;
-	argv[1] = (char *)"-S";
-	argv[2] = NET_CMD_ARG_HOST;
-	argv[3] = (char *)"usershare";
-	argv[4] = (char *)"delete";
-	argv[5] = strdup(sharename);
-	argv[6] = NULL;
-
-	rc = libzfs_run_process(argv[0], argv, 0);
-	if (rc < 0)
-		return (SA_SYSTEM_ERR);
-	else
-		return (SA_OK);
-}
-
-/*
- * Disables SMB sharing for the specified share.
- */
-static int
-smb_disable_share(sa_share_impl_t impl_share)
-{
-	smb_share_t *shares = smb_shares;
-
-	if (!smb_available()) {
-		/*
-		 * The share can't possibly be active, so nothing
-		 * needs to be done to disable it.
-		 */
-		return (SA_OK);
-	}
-
-	while (shares != NULL) {
-		if (strcmp(impl_share->sharepath, shares->path) == 0)
-			return (smb_disable_share_one(shares->name));
-
-		shares = shares->next;
-	}
-
-	return (SA_OK);
-}
-
-/*
- * Checks whether the specified SMB share options are syntactically correct.
- */
-static int
-smb_validate_shareopts(const char *shareopts)
-{
-	/* TODO: Accept 'name' and sec/acl (?) */
-	if ((strcmp(shareopts, "off") == 0) || (strcmp(shareopts, "on") == 0))
-		return (SA_OK);
-
-	return (SA_SYNTAX_ERR);
-}
-
-/*
- * Checks whether a share is currently active.
- */
-static boolean_t
-smb_is_share_active(sa_share_impl_t impl_share)
-{
-	smb_share_t *iter = smb_shares;
-
-	if (!smb_available())
-		return (B_FALSE);
-
-	/* Retrieve the list of (possible) active shares */
-	smb_retrieve_shares();
-
-	while (iter != NULL) {
-		if (strcmp(impl_share->sharepath, iter->path) == 0)
-			return (B_TRUE);
-
-		iter = iter->next;
-	}
-
-	return (B_FALSE);
-}
-
-/*
- * Called to update a share's options. A share's options might be out of
- * date if the share was loaded from disk and the "sharesmb" dataset
- * property has changed in the meantime. This function also takes care
- * of re-enabling the share if necessary.
- */
-static int
-smb_update_shareopts(sa_share_impl_t impl_share, const char *resource,
-    const char *shareopts)
-{
-	char *shareopts_dup;
-	boolean_t needs_reshare = B_FALSE;
-	char *old_shareopts;
-
-	if (!impl_share)
-		return (SA_SYSTEM_ERR);
-
-	FSINFO(impl_share, smb_fstype)->active =
-	    smb_is_share_active(impl_share);
-
-	old_shareopts = FSINFO(impl_share, smb_fstype)->shareopts;
-
-	if (FSINFO(impl_share, smb_fstype)->active && old_shareopts != NULL &&
-	    strcmp(old_shareopts, shareopts) != 0) {
-		needs_reshare = B_TRUE;
-		smb_disable_share(impl_share);
-	}
-
-	shareopts_dup = strdup(shareopts);
-
-	if (shareopts_dup == NULL)
-		return (SA_NO_MEMORY);
-
-	if (old_shareopts != NULL)
-		free(old_shareopts);
-
-	FSINFO(impl_share, smb_fstype)->shareopts = shareopts_dup;
-
-	if (needs_reshare)
-		smb_enable_share(impl_share);
-
-	return (SA_OK);
-}
-
-/*
- * Clears a share's SMB options. Used by libshare to
- * clean up shares that are about to be free()'d.
- */
-static void
-smb_clear_shareopts(sa_share_impl_t impl_share)
-{
-	free(FSINFO(impl_share, smb_fstype)->shareopts);
-	FSINFO(impl_share, smb_fstype)->shareopts = NULL;
-}
-
-static const sa_share_ops_t smb_shareops = {
-	.enable_share = smb_enable_share,
-	.disable_share = smb_disable_share,
-
-	.validate_shareopts = smb_validate_shareopts,
-	.update_shareopts = smb_update_shareopts,
-	.clear_shareopts = smb_clear_shareopts,
-};
-
-/*
- * Provides a convenient wrapper for determining SMB availability
- */
-static boolean_t
-smb_available(void)
-{
-	struct stat statbuf;
-
-	if (lstat(SHARE_DIR, &statbuf) != 0 ||
-	    !S_ISDIR(statbuf.st_mode))
-		return (B_FALSE);
-
-	if (access(NET_CMD_PATH, F_OK) != 0)
-		return (B_FALSE);
-
-	return (B_TRUE);
-}
-
-/*
- * Initializes the SMB functionality of libshare.
- */
-void
-libshare_smb_init(void)
-{
-	smb_fstype = register_fstype("smb", &smb_shareops);
-}

diff --git a/zfs/lib/libspl/Makefile.am b/zfs/lib/libspl/Makefile.am
index cd74676..b59919b 100644
--- a/zfs/lib/libspl/Makefile.am
+++ b/zfs/lib/libspl/Makefile.am

@@ -1,42 +1,48 @@
 include $(top_srcdir)/config/Rules.am
 
-VPATH = \
-	$(top_srcdir)/lib/libspl \
-	$(top_srcdir)/lib/libspl/$(TARGET_ASM_DIR)
+SUBDIRS = include
 
-SUBDIRS = include $(TARGET_ASM_DIR)
-DIST_SUBDIRS = include asm-generic asm-i386 asm-x86_64
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/lib/libspl/include
+noinst_LTLIBRARIES = libspl_assert.la libspl.la
 
-AM_CFLAGS += $(LIBTIRPC_CFLAGS)
-
-AM_CCASFLAGS = \
-	$(CFLAGS)
-
-noinst_LTLIBRARIES = libspl.la
+libspl_assert_la_SOURCES = \
+	assert.c
 
 USER_C = \
-	getexecname.c \
-	gethostid.c \
-	getmntany.c \
+	atomic.c \
 	list.c \
 	mkdirp.c \
 	page.c \
 	strlcat.c \
 	strlcpy.c \
 	timestamp.c \
-	zone.c \
 	include/sys/list.h \
 	include/sys/list_impl.h
 
-USER_ASM = atomic.S
+if BUILD_LINUX
+USER_C += \
+	os/linux/getexecname.c \
+	os/linux/gethostid.c \
+	os/linux/getmntany.c \
+	os/linux/zone.c
+endif
 
-nodist_libspl_la_SOURCES = \
-	$(USER_C) \
-	$(USER_ASM)
+if BUILD_FREEBSD
+USER_C += \
+	os/freebsd/getexecname.c \
+	os/freebsd/gethostid.c \
+	os/freebsd/getmntany.c \
+	os/freebsd/mnttab.c \
+	os/freebsd/zone.c
+endif
 
-libspl_la_LIBADD = -lrt $(LIBTIRPC_LIBS)
+libspl_la_SOURCES = $(USER_C)
 
-EXTRA_DIST = $(USER_C)
+libspl_la_LIBADD = \
+	libspl_assert.la
+
+libspl_la_LIBADD += $(LIBATOMIC_LIBS) $(LIBCLOCK_GETTIME)
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libspl/asm-generic/Makefile.am b/zfs/lib/libspl/asm-generic/Makefile.am
deleted file mode 100644
index 17fe501..0000000
--- a/zfs/lib/libspl/asm-generic/Makefile.am
+++ /dev/null

@@ -1,18 +0,0 @@
-include $(top_srcdir)/config/Rules.am
-
-DEFAULT_INCLUDES += \
-        -I$(top_srcdir)/lib/libspl/include
-
-atomic_SOURCE = atomic.c
-atomic_ASM = atomic.S
-
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	  $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -fPIC
-EXTRA_DIST = $(atomic_SOURCE)
-
-# Generates assembly to simplify inclusion in ../Makefile.am
-all-am:
-	$(COMPILE) -c -S $(atomic_SOURCE) -o $(atomic_ASM)
-
-clean-generic:
-	$(RM) $(atomic_ASM)

diff --git a/zfs/lib/libspl/asm-generic/atomic.c b/zfs/lib/libspl/asm-generic/atomic.c
deleted file mode 100644
index 03f8ddc..0000000
--- a/zfs/lib/libspl/asm-generic/atomic.c
+++ /dev/null

@@ -1,500 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2009 by Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <atomic.h>
-#include <assert.h>
-#include <pthread.h>
-
-/*
- * All operations are implemented by serializing them through a global
- * pthread mutex.  This provides a correct generic implementation.
- * However all supported architectures are encouraged to provide a
- * native implementation is assembly for performance reasons.
- */
-pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/*
- * These are the void returning variants
- */
-/* BEGIN CSTYLED */
-#define	ATOMIC_INC(name, type) \
-	void atomic_inc_##name(volatile type *target)			\
-	{								\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		(*target)++;						\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-	}
-
-ATOMIC_INC(long, unsigned long)
-ATOMIC_INC(8, uint8_t)
-ATOMIC_INC(uchar, uchar_t)
-ATOMIC_INC(16, uint16_t)
-ATOMIC_INC(ushort, ushort_t)
-ATOMIC_INC(32, uint32_t)
-ATOMIC_INC(uint, uint_t)
-ATOMIC_INC(ulong, ulong_t)
-ATOMIC_INC(64, uint64_t)
-
-
-#define	ATOMIC_DEC(name, type) \
-	void atomic_dec_##name(volatile type *target)			\
-	{								\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		(*target)--;						\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-	}
-
-ATOMIC_DEC(long, unsigned long)
-ATOMIC_DEC(8, uint8_t)
-ATOMIC_DEC(uchar, uchar_t)
-ATOMIC_DEC(16, uint16_t)
-ATOMIC_DEC(ushort, ushort_t)
-ATOMIC_DEC(32, uint32_t)
-ATOMIC_DEC(uint, uint_t)
-ATOMIC_DEC(ulong, ulong_t)
-ATOMIC_DEC(64, uint64_t)
-
-
-#define	ATOMIC_ADD(name, type1, type2) \
-	void atomic_add_##name(volatile type1 *target, type2 bits)	\
-	{								\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		*target += bits;					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-	}
-
-ATOMIC_ADD(8, uint8_t, int8_t)
-ATOMIC_ADD(char, uchar_t, signed char)
-ATOMIC_ADD(16, uint16_t, int16_t)
-ATOMIC_ADD(short, ushort_t, short)
-ATOMIC_ADD(32, uint32_t, int32_t)
-ATOMIC_ADD(int, uint_t, int)
-ATOMIC_ADD(long, ulong_t, long)
-ATOMIC_ADD(64, uint64_t, int64_t)
-
-void
-atomic_add_ptr(volatile void *target, ssize_t bits)
-{
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	*(caddr_t *)target += bits;
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-}
-
-
-#define	ATOMIC_SUB(name, type1, type2) \
-	void atomic_sub_##name(volatile type1 *target, type2 bits)	\
-	{								\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		*target -= bits;					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-	}
-
-ATOMIC_SUB(8, uint8_t, int8_t)
-ATOMIC_SUB(char, uchar_t, signed char)
-ATOMIC_SUB(16, uint16_t, int16_t)
-ATOMIC_SUB(short, ushort_t, short)
-ATOMIC_SUB(32, uint32_t, int32_t)
-ATOMIC_SUB(int, uint_t, int)
-ATOMIC_SUB(long, ulong_t, long)
-ATOMIC_SUB(64, uint64_t, int64_t)
-
-void
-atomic_sub_ptr(volatile void *target, ssize_t bits)
-{
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	*(caddr_t *)target -= bits;
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-}
-
-
-#define	ATOMIC_OR(name, type) \
-	void atomic_or_##name(volatile type *target, type bits)		\
-	{								\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		*target |= bits;					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-	}
-
-ATOMIC_OR(8, uint8_t)
-ATOMIC_OR(uchar, uchar_t)
-ATOMIC_OR(16, uint16_t)
-ATOMIC_OR(ushort, ushort_t)
-ATOMIC_OR(32, uint32_t)
-ATOMIC_OR(uint, uint_t)
-ATOMIC_OR(ulong, ulong_t)
-ATOMIC_OR(64, uint64_t)
-
-
-#define	ATOMIC_AND(name, type) \
-	void atomic_and_##name(volatile type *target, type bits)	\
-	{								\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		*target &= bits;					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-	}
-
-ATOMIC_AND(8, uint8_t)
-ATOMIC_AND(uchar, uchar_t)
-ATOMIC_AND(16, uint16_t)
-ATOMIC_AND(ushort, ushort_t)
-ATOMIC_AND(32, uint32_t)
-ATOMIC_AND(uint, uint_t)
-ATOMIC_AND(ulong, ulong_t)
-ATOMIC_AND(64, uint64_t)
-
-
-/*
- * New value returning variants
- */
-
-#define	ATOMIC_INC_NV(name, type) \
-	type atomic_inc_##name##_nv(volatile type *target)		\
-	{								\
-		type rc;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		rc = (++(*target));					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (rc);						\
-	}
-
-ATOMIC_INC_NV(long, unsigned long)
-ATOMIC_INC_NV(8, uint8_t)
-ATOMIC_INC_NV(uchar, uchar_t)
-ATOMIC_INC_NV(16, uint16_t)
-ATOMIC_INC_NV(ushort, ushort_t)
-ATOMIC_INC_NV(32, uint32_t)
-ATOMIC_INC_NV(uint, uint_t)
-ATOMIC_INC_NV(ulong, ulong_t)
-ATOMIC_INC_NV(64, uint64_t)
-
-
-#define	ATOMIC_DEC_NV(name, type) \
-	type atomic_dec_##name##_nv(volatile type *target)		\
-	{								\
-		type rc;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		rc = (--(*target));					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (rc);						\
-	}
-
-ATOMIC_DEC_NV(long, unsigned long)
-ATOMIC_DEC_NV(8, uint8_t)
-ATOMIC_DEC_NV(uchar, uchar_t)
-ATOMIC_DEC_NV(16, uint16_t)
-ATOMIC_DEC_NV(ushort, ushort_t)
-ATOMIC_DEC_NV(32, uint32_t)
-ATOMIC_DEC_NV(uint, uint_t)
-ATOMIC_DEC_NV(ulong, ulong_t)
-ATOMIC_DEC_NV(64, uint64_t)
-
-
-#define	ATOMIC_ADD_NV(name, type1, type2) \
-	type1 atomic_add_##name##_nv(volatile type1 *target, type2 bits)\
-	{								\
-		type1 rc;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		rc = (*target += bits);					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (rc);						\
-	}
-
-ATOMIC_ADD_NV(8, uint8_t, int8_t)
-ATOMIC_ADD_NV(char, uchar_t, signed char)
-ATOMIC_ADD_NV(16, uint16_t, int16_t)
-ATOMIC_ADD_NV(short, ushort_t, short)
-ATOMIC_ADD_NV(32, uint32_t, int32_t)
-ATOMIC_ADD_NV(int, uint_t, int)
-ATOMIC_ADD_NV(long, ulong_t, long)
-ATOMIC_ADD_NV(64, uint64_t, int64_t)
-
-void *
-atomic_add_ptr_nv(volatile void *target, ssize_t bits)
-{
-	void *ptr;
-
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	ptr = (*(caddr_t *)target += bits);
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-
-	return (ptr);
-}
-
-
-#define	ATOMIC_SUB_NV(name, type1, type2) \
-	type1 atomic_sub_##name##_nv(volatile type1 *target, type2 bits)\
-	{								\
-		type1 rc;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		rc = (*target -= bits);					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (rc);						\
-	}
-
-ATOMIC_SUB_NV(8, uint8_t, int8_t)
-ATOMIC_SUB_NV(char, uchar_t, signed char)
-ATOMIC_SUB_NV(16, uint16_t, int16_t)
-ATOMIC_SUB_NV(short, ushort_t, short)
-ATOMIC_SUB_NV(32, uint32_t, int32_t)
-ATOMIC_SUB_NV(int, uint_t, int)
-ATOMIC_SUB_NV(long, ulong_t, long)
-ATOMIC_SUB_NV(64, uint64_t, int64_t)
-
-void *
-atomic_sub_ptr_nv(volatile void *target, ssize_t bits)
-{
-	void *ptr;
-
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	ptr = (*(caddr_t *)target -= bits);
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-
-	return (ptr);
-}
-
-
-#define	ATOMIC_OR_NV(name, type) \
-	type atomic_or_##name##_nv(volatile type *target, type bits)	\
-	{								\
-		type rc;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		rc = (*target |= bits);					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (rc);						\
-	}
-
-ATOMIC_OR_NV(long, unsigned long)
-ATOMIC_OR_NV(8, uint8_t)
-ATOMIC_OR_NV(uchar, uchar_t)
-ATOMIC_OR_NV(16, uint16_t)
-ATOMIC_OR_NV(ushort, ushort_t)
-ATOMIC_OR_NV(32, uint32_t)
-ATOMIC_OR_NV(uint, uint_t)
-ATOMIC_OR_NV(ulong, ulong_t)
-ATOMIC_OR_NV(64, uint64_t)
-
-
-#define	ATOMIC_AND_NV(name, type) \
-	type atomic_and_##name##_nv(volatile type *target, type bits)	\
-	{								\
-		type rc;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		rc = (*target &= bits);					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (rc);						\
-	}
-
-ATOMIC_AND_NV(long, unsigned long)
-ATOMIC_AND_NV(8, uint8_t)
-ATOMIC_AND_NV(uchar, uchar_t)
-ATOMIC_AND_NV(16, uint16_t)
-ATOMIC_AND_NV(ushort, ushort_t)
-ATOMIC_AND_NV(32, uint32_t)
-ATOMIC_AND_NV(uint, uint_t)
-ATOMIC_AND_NV(ulong, ulong_t)
-ATOMIC_AND_NV(64, uint64_t)
-
-
-/*
- *  If *arg1 == arg2, set *arg1 = arg3; return old value
- */
-
-#define	ATOMIC_CAS(name, type) \
-	type atomic_cas_##name(volatile type *target, type arg1, type arg2) \
-	{								\
-		type old;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		old = *target;						\
-		if (old == arg1)					\
-			*target = arg2;					\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (old);						\
-	}
-
-ATOMIC_CAS(8, uint8_t)
-ATOMIC_CAS(uchar, uchar_t)
-ATOMIC_CAS(16, uint16_t)
-ATOMIC_CAS(ushort, ushort_t)
-ATOMIC_CAS(32, uint32_t)
-ATOMIC_CAS(uint, uint_t)
-ATOMIC_CAS(ulong, ulong_t)
-ATOMIC_CAS(64, uint64_t)
-
-void *
-atomic_cas_ptr(volatile void *target, void *arg1, void *arg2)
-{
-	void *old;
-
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	old = *(void **)target;
-	if (old == arg1)
-		*(void **)target = arg2;
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-
-	return (old);
-}
-
-
-/*
- * Swap target and return old value
- */
-
-#define	ATOMIC_SWAP(name, type) \
-	type atomic_swap_##name(volatile type *target, type bits)	\
-	{								\
-		type old;						\
-		VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);	\
-		old = *target;						\
-		*target = bits;						\
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);	\
-		return (old);						\
-	}
-
-ATOMIC_SWAP(8, uint8_t)
-ATOMIC_SWAP(uchar, uchar_t)
-ATOMIC_SWAP(16, uint16_t)
-ATOMIC_SWAP(ushort, ushort_t)
-ATOMIC_SWAP(32, uint32_t)
-ATOMIC_SWAP(uint, uint_t)
-ATOMIC_SWAP(ulong, ulong_t)
-ATOMIC_SWAP(64, uint64_t)
-/* END CSTYLED */
-
-void *
-atomic_swap_ptr(volatile void *target, void *bits)
-{
-	void *old;
-
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	old = *(void **)target;
-	*(void **)target = bits;
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-
-	return (old);
-}
-
-
-int
-atomic_set_long_excl(volatile ulong_t *target, uint_t value)
-{
-	ulong_t bit;
-
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	bit = (1UL << value);
-	if ((*target & bit) != 0) {
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-		return (-1);
-	}
-	*target |= bit;
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-
-	return (0);
-}
-
-int
-atomic_clear_long_excl(volatile ulong_t *target, uint_t value)
-{
-	ulong_t bit;
-
-	VERIFY3S(pthread_mutex_lock(&atomic_lock), ==, 0);
-	bit = (1UL << value);
-	if ((*target & bit) != 0) {
-		VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-		return (-1);
-	}
-	*target &= ~bit;
-	VERIFY3S(pthread_mutex_unlock(&atomic_lock), ==, 0);
-
-	return (0);
-}
-
-void
-membar_enter(void)
-{
-	/* XXX - Implement me */
-}
-
-void
-membar_exit(void)
-{
-	/* XXX - Implement me */
-}
-
-void
-membar_producer(void)
-{
-	/* XXX - Implement me */
-}
-
-void
-membar_consumer(void)
-{
-	/* XXX - Implement me */
-}
-
-/* Legacy kernel interfaces; they will go away (eventually). */
-
-uint8_t
-cas8(uint8_t *target, uint8_t arg1, uint8_t arg2)
-{
-	return (atomic_cas_8(target, arg1, arg2));
-}
-
-uint32_t
-cas32(uint32_t *target, uint32_t arg1, uint32_t arg2)
-{
-	return (atomic_cas_32(target, arg1, arg2));
-}
-
-uint64_t
-cas64(uint64_t *target, uint64_t arg1, uint64_t arg2)
-{
-	return (atomic_cas_64(target, arg1, arg2));
-}
-
-ulong_t
-caslong(ulong_t *target, ulong_t arg1, ulong_t arg2)
-{
-	return (atomic_cas_ulong(target, arg1, arg2));
-}
-
-void *
-casptr(void *target, void *arg1, void *arg2)
-{
-	return (atomic_cas_ptr(target, arg1, arg2));
-}
-
-void
-atomic_and_long(ulong_t *target, ulong_t bits)
-{
-	return (atomic_and_ulong(target, bits));
-}
-
-void
-atomic_or_long(ulong_t *target, ulong_t bits)
-{
-	return (atomic_or_ulong(target, bits));
-}

diff --git a/zfs/lib/libspl/asm-i386/Makefile.am b/zfs/lib/libspl/asm-i386/Makefile.am
deleted file mode 100644
index e112610..0000000
--- a/zfs/lib/libspl/asm-i386/Makefile.am
+++ /dev/null

@@ -1 +0,0 @@
-noinst_HEADERS = atomic.S

diff --git a/zfs/lib/libspl/asm-i386/atomic.S b/zfs/lib/libspl/asm-i386/atomic.S
deleted file mode 100644
index 3086d55..0000000
--- a/zfs/lib/libspl/asm-i386/atomic.S
+++ /dev/null

@@ -1,836 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-	.ident	"%Z%%M%	%I%	%E% SMI"
-
-	.file	"%M%"
-
-#define _ASM
-#include <ia32/sys/asm_linkage.h>
-
-	ENTRY(atomic_inc_8)
-	ALTENTRY(atomic_inc_uchar)
-	movl	4(%esp), %eax
-	lock
-	incb	(%eax)
-	ret
-	SET_SIZE(atomic_inc_uchar)
-	SET_SIZE(atomic_inc_8)
-
-	ENTRY(atomic_inc_16)
-	ALTENTRY(atomic_inc_ushort)
-	movl	4(%esp), %eax
-	lock
-	incw	(%eax)
-	ret
-	SET_SIZE(atomic_inc_ushort)
-	SET_SIZE(atomic_inc_16)
-
-	ENTRY(atomic_inc_32)
-	ALTENTRY(atomic_inc_uint)
-	ALTENTRY(atomic_inc_ulong)
-	movl	4(%esp), %eax
-	lock
-	incl	(%eax)
-	ret
-	SET_SIZE(atomic_inc_ulong)
-	SET_SIZE(atomic_inc_uint)
-	SET_SIZE(atomic_inc_32)
-
-	ENTRY(atomic_inc_8_nv)
-	ALTENTRY(atomic_inc_uchar_nv)
-	movl	4(%esp), %edx
-	movb	(%edx), %al
-1:
-	leal	1(%eax), %ecx
-	lock
-	cmpxchgb %cl, (%edx)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_inc_uchar_nv)
-	SET_SIZE(atomic_inc_8_nv)
-
-	ENTRY(atomic_inc_16_nv)
-	ALTENTRY(atomic_inc_ushort_nv)
-	movl	4(%esp), %edx
-	movw	(%edx), %ax
-1:
-	leal	1(%eax), %ecx
-	lock
-	cmpxchgw %cx, (%edx)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_inc_ushort_nv)
-	SET_SIZE(atomic_inc_16_nv)
-
-	ENTRY(atomic_inc_32_nv)
-	ALTENTRY(atomic_inc_uint_nv)
-	ALTENTRY(atomic_inc_ulong_nv)
-	movl	4(%esp), %edx
-	movl	(%edx), %eax
-1:
-	leal	1(%eax), %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_inc_ulong_nv)
-	SET_SIZE(atomic_inc_uint_nv)
-	SET_SIZE(atomic_inc_32_nv)
-
-	/*
-	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
-	 * separated, you need to also edit the libc i386 platform
-	 * specific mapfile and remove the NODYNSORT attribute
-	 * from atomic_inc_64_nv.
-	 */
-	ENTRY(atomic_inc_64)
-	ALTENTRY(atomic_inc_64_nv)
-	pushl	%edi
-	pushl	%ebx
-	movl	12(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-1:
-	xorl	%ebx, %ebx
-	xorl	%ecx, %ecx
-	incl	%ebx
-	addl	%eax, %ebx
-	adcl	%edx, %ecx
-	lock
-	cmpxchg8b (%edi)
-	jne	1b
-	movl	%ebx, %eax
-	movl	%ecx, %edx
-	popl	%ebx
-	popl	%edi
-	ret
-	SET_SIZE(atomic_inc_64_nv)
-	SET_SIZE(atomic_inc_64)
-
-	ENTRY(atomic_dec_8)
-	ALTENTRY(atomic_dec_uchar)
-	movl	4(%esp), %eax
-	lock
-	decb	(%eax)
-	ret
-	SET_SIZE(atomic_dec_uchar)
-	SET_SIZE(atomic_dec_8)
-
-	ENTRY(atomic_dec_16)
-	ALTENTRY(atomic_dec_ushort)
-	movl	4(%esp), %eax
-	lock
-	decw	(%eax)
-	ret
-	SET_SIZE(atomic_dec_ushort)
-	SET_SIZE(atomic_dec_16)
-
-	ENTRY(atomic_dec_32)
-	ALTENTRY(atomic_dec_uint)
-	ALTENTRY(atomic_dec_ulong)
-	movl	4(%esp), %eax
-	lock
-	decl	(%eax)
-	ret
-	SET_SIZE(atomic_dec_ulong)
-	SET_SIZE(atomic_dec_uint)
-	SET_SIZE(atomic_dec_32)
-
-	ENTRY(atomic_dec_8_nv)
-	ALTENTRY(atomic_dec_uchar_nv)
-	movl	4(%esp), %edx
-	movb	(%edx), %al
-1:
-	leal	-1(%eax), %ecx
-	lock
-	cmpxchgb %cl, (%edx)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_dec_uchar_nv)
-	SET_SIZE(atomic_dec_8_nv)
-
-	ENTRY(atomic_dec_16_nv)
-	ALTENTRY(atomic_dec_ushort_nv)
-	movl	4(%esp), %edx
-	movw	(%edx), %ax
-1:
-	leal	-1(%eax), %ecx
-	lock
-	cmpxchgw %cx, (%edx)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_dec_ushort_nv)
-	SET_SIZE(atomic_dec_16_nv)
-
-	ENTRY(atomic_dec_32_nv)
-	ALTENTRY(atomic_dec_uint_nv)
-	ALTENTRY(atomic_dec_ulong_nv)
-	movl	4(%esp), %edx
-	movl	(%edx), %eax
-1:
-	leal	-1(%eax), %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_dec_ulong_nv)
-	SET_SIZE(atomic_dec_uint_nv)
-	SET_SIZE(atomic_dec_32_nv)
-
-	/*
-	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
-	 * separated, it is important to edit the libc i386 platform
-	 * specific mapfile and remove the NODYNSORT attribute
-	 * from atomic_dec_64_nv.
-	 */
-	ENTRY(atomic_dec_64)
-	ALTENTRY(atomic_dec_64_nv)
-	pushl	%edi
-	pushl	%ebx
-	movl	12(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-1:
-	xorl	%ebx, %ebx
-	xorl	%ecx, %ecx
-	not	%ecx
-	not	%ebx
-	addl	%eax, %ebx
-	adcl	%edx, %ecx
-	lock
-	cmpxchg8b (%edi)
-	jne	1b
-	movl	%ebx, %eax
-	movl	%ecx, %edx
-	popl	%ebx
-	popl	%edi
-	ret
-	SET_SIZE(atomic_dec_64_nv)
-	SET_SIZE(atomic_dec_64)
-
-	ENTRY(atomic_add_8)
-	ALTENTRY(atomic_add_char)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	addb	%cl, (%eax)
-	ret
-	SET_SIZE(atomic_add_char)
-	SET_SIZE(atomic_add_8)
-
-	ENTRY(atomic_add_16)
-	ALTENTRY(atomic_add_short)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	addw	%cx, (%eax)
-	ret
-	SET_SIZE(atomic_add_short)
-	SET_SIZE(atomic_add_16)
-
-	ENTRY(atomic_add_32)
-	ALTENTRY(atomic_add_int)
-	ALTENTRY(atomic_add_ptr)
-	ALTENTRY(atomic_add_long)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	addl	%ecx, (%eax)
-	ret
-	SET_SIZE(atomic_add_long)
-	SET_SIZE(atomic_add_ptr)
-	SET_SIZE(atomic_add_int)
-	SET_SIZE(atomic_add_32)
-
-	ENTRY(atomic_sub_8)
-	ALTENTRY(atomic_sub_char)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	subb	%cl, (%eax)
-	ret
-	SET_SIZE(atomic_sub_char)
-	SET_SIZE(atomic_sub_8)
-
-	ENTRY(atomic_sub_16)
-	ALTENTRY(atomic_sub_short)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	subw	%cx, (%eax)
-	ret
-	SET_SIZE(atomic_sub_short)
-	SET_SIZE(atomic_sub_16)
-
-	ENTRY(atomic_sub_32)
-	ALTENTRY(atomic_sub_int)
-	ALTENTRY(atomic_sub_ptr)
-	ALTENTRY(atomic_sub_long)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	subl	%ecx, (%eax)
-	ret
-	SET_SIZE(atomic_sub_long)
-	SET_SIZE(atomic_sub_ptr)
-	SET_SIZE(atomic_sub_int)
-	SET_SIZE(atomic_sub_32)
-
-	ENTRY(atomic_or_8)
-	ALTENTRY(atomic_or_uchar)
-	movl	4(%esp), %eax
-	movb	8(%esp), %cl
-	lock
-	orb	%cl, (%eax)
-	ret
-	SET_SIZE(atomic_or_uchar)
-	SET_SIZE(atomic_or_8)
-
-	ENTRY(atomic_or_16)
-	ALTENTRY(atomic_or_ushort)
-	movl	4(%esp), %eax
-	movw	8(%esp), %cx
-	lock
-	orw	%cx, (%eax)
-	ret
-	SET_SIZE(atomic_or_ushort)
-	SET_SIZE(atomic_or_16)
-
-	ENTRY(atomic_or_32)
-	ALTENTRY(atomic_or_uint)
-	ALTENTRY(atomic_or_ulong)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	orl	%ecx, (%eax)
-	ret
-	SET_SIZE(atomic_or_ulong)
-	SET_SIZE(atomic_or_uint)
-	SET_SIZE(atomic_or_32)
-
-	ENTRY(atomic_and_8)
-	ALTENTRY(atomic_and_uchar)
-	movl	4(%esp), %eax
-	movb	8(%esp), %cl
-	lock
-	andb	%cl, (%eax)
-	ret
-	SET_SIZE(atomic_and_uchar)
-	SET_SIZE(atomic_and_8)
-
-	ENTRY(atomic_and_16)
-	ALTENTRY(atomic_and_ushort)
-	movl	4(%esp), %eax
-	movw	8(%esp), %cx
-	lock
-	andw	%cx, (%eax)
-	ret
-	SET_SIZE(atomic_and_ushort)
-	SET_SIZE(atomic_and_16)
-
-	ENTRY(atomic_and_32)
-	ALTENTRY(atomic_and_uint)
-	ALTENTRY(atomic_and_ulong)
-	movl	4(%esp), %eax
-	movl	8(%esp), %ecx
-	lock
-	andl	%ecx, (%eax)
-	ret
-	SET_SIZE(atomic_and_ulong)
-	SET_SIZE(atomic_and_uint)
-	SET_SIZE(atomic_and_32)
-
-	ENTRY(atomic_add_8_nv)
-	ALTENTRY(atomic_add_char_nv)
-	movl	4(%esp), %edx
-	movb	(%edx), %al
-1:
-	movl	8(%esp), %ecx
-	addb	%al, %cl
-	lock
-	cmpxchgb %cl, (%edx)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_add_char_nv)
-	SET_SIZE(atomic_add_8_nv)
-
-	ENTRY(atomic_add_16_nv)
-	ALTENTRY(atomic_add_short_nv)
-	movl	4(%esp), %edx
-	movw	(%edx), %ax
-1:
-	movl	8(%esp), %ecx
-	addw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%edx)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_add_short_nv)
-	SET_SIZE(atomic_add_16_nv)
-
-	ENTRY(atomic_add_32_nv)
-	ALTENTRY(atomic_add_int_nv)
-	ALTENTRY(atomic_add_ptr_nv)
-	ALTENTRY(atomic_add_long_nv)
-	movl	4(%esp), %edx
-	movl	(%edx), %eax
-1:
-	movl	8(%esp), %ecx
-	addl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_add_long_nv)
-	SET_SIZE(atomic_add_ptr_nv)
-	SET_SIZE(atomic_add_int_nv)
-	SET_SIZE(atomic_add_32_nv)
-
-	ENTRY(atomic_sub_8_nv)
-	ALTENTRY(atomic_sub_char_nv)
-	movl	4(%esp), %edx
-	movb	(%edx), %al
-1:
-	movl	8(%esp), %ecx
-	subb	%al, %cl
-	lock
-	cmpxchgb %cl, (%edx)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_sub_char_nv)
-	SET_SIZE(atomic_sub_8_nv)
-
-	ENTRY(atomic_sub_16_nv)
-	ALTENTRY(atomic_sub_short_nv)
-	movl	4(%esp), %edx
-	movw	(%edx), %ax
-1:
-	movl	8(%esp), %ecx
-	subw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%edx)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_sub_short_nv)
-	SET_SIZE(atomic_sub_16_nv)
-
-	ENTRY(atomic_sub_32_nv)
-	ALTENTRY(atomic_sub_int_nv)
-	ALTENTRY(atomic_sub_ptr_nv)
-	ALTENTRY(atomic_sub_long_nv)
-	movl	4(%esp), %edx
-	movl	(%edx), %eax
-1:
-	movl	8(%esp), %ecx
-	subl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_sub_long_nv)
-	SET_SIZE(atomic_sub_ptr_nv)
-	SET_SIZE(atomic_sub_int_nv)
-	SET_SIZE(atomic_sub_32_nv)
-
-	/*
-	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
-	 * separated, it is important to edit the libc i386 platform
-	 * specific mapfile and remove the NODYNSORT attribute
-	 * from atomic_add_64_nv.
-	 */
-	ENTRY(atomic_add_64)
-	ALTENTRY(atomic_add_64_nv)
-	pushl	%edi
-	pushl	%ebx
-	movl	12(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-1:
-	movl	16(%esp), %ebx
-	movl	20(%esp), %ecx
-	addl	%eax, %ebx
-	adcl	%edx, %ecx
-	lock
-	cmpxchg8b (%edi)
-	jne	1b
-	movl	%ebx, %eax
-	movl	%ecx, %edx
-	popl	%ebx
-	popl	%edi
-	ret
-	SET_SIZE(atomic_add_64_nv)
-	SET_SIZE(atomic_add_64)
-
-	ENTRY(atomic_sub_64)
-	ALTENTRY(atomic_sub_64_nv)
-	pushl	%edi
-	pushl	%ebx
-	movl	12(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-1:
-	movl	16(%esp), %ebx
-	movl	20(%esp), %ecx
-	subl	%eax, %ebx
-	sbbl	%edx, %ecx
-	lock
-	cmpxchg8b (%edi)
-	jne	1b
-	movl	%ebx, %eax
-	movl	%ecx, %edx
-	popl	%ebx
-	popl	%edi
-	ret
-	SET_SIZE(atomic_sub_64_nv)
-	SET_SIZE(atomic_sub_64)
-
-	ENTRY(atomic_or_8_nv)
-	ALTENTRY(atomic_or_uchar_nv)
-	movl	4(%esp), %edx
-	movb	(%edx), %al
-1:
-	movl	8(%esp), %ecx
-	orb	%al, %cl
-	lock
-	cmpxchgb %cl, (%edx)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_or_uchar_nv)
-	SET_SIZE(atomic_or_8_nv)
-
-	ENTRY(atomic_or_16_nv)
-	ALTENTRY(atomic_or_ushort_nv)
-	movl	4(%esp), %edx
-	movw	(%edx), %ax
-1:
-	movl	8(%esp), %ecx
-	orw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%edx)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_or_ushort_nv)
-	SET_SIZE(atomic_or_16_nv)
-
-	ENTRY(atomic_or_32_nv)
-	ALTENTRY(atomic_or_uint_nv)
-	ALTENTRY(atomic_or_ulong_nv)
-	movl	4(%esp), %edx
-	movl	(%edx), %eax
-1:
-	movl	8(%esp), %ecx
-	orl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_or_ulong_nv)
-	SET_SIZE(atomic_or_uint_nv)
-	SET_SIZE(atomic_or_32_nv)
-
-	/*
-	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
-	 * separated, it is important to edit the libc i386 platform
-	 * specific mapfile and remove the NODYNSORT attribute
-	 * from atomic_or_64_nv.
-	 */
-	ENTRY(atomic_or_64)
-	ALTENTRY(atomic_or_64_nv)
-	pushl	%edi
-	pushl	%ebx
-	movl	12(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-1:
-	movl	16(%esp), %ebx
-	movl	20(%esp), %ecx
-	orl	%eax, %ebx
-	orl	%edx, %ecx
-	lock
-	cmpxchg8b (%edi)
-	jne	1b
-	movl	%ebx, %eax
-	movl	%ecx, %edx
-	popl	%ebx
-	popl	%edi
-	ret
-	SET_SIZE(atomic_or_64_nv)
-	SET_SIZE(atomic_or_64)
-
-	ENTRY(atomic_and_8_nv)
-	ALTENTRY(atomic_and_uchar_nv)
-	movl	4(%esp), %edx
-	movb	(%edx), %al
-1:
-	movl	8(%esp), %ecx
-	andb	%al, %cl
-	lock
-	cmpxchgb %cl, (%edx)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_and_uchar_nv)
-	SET_SIZE(atomic_and_8_nv)
-
-	ENTRY(atomic_and_16_nv)
-	ALTENTRY(atomic_and_ushort_nv)
-	movl	4(%esp), %edx
-	movw	(%edx), %ax
-1:
-	movl	8(%esp), %ecx
-	andw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%edx)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_and_ushort_nv)
-	SET_SIZE(atomic_and_16_nv)
-
-	ENTRY(atomic_and_32_nv)
-	ALTENTRY(atomic_and_uint_nv)
-	ALTENTRY(atomic_and_ulong_nv)
-	movl	4(%esp), %edx
-	movl	(%edx), %eax
-1:
-	movl	8(%esp), %ecx
-	andl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_and_ulong_nv)
-	SET_SIZE(atomic_and_uint_nv)
-	SET_SIZE(atomic_and_32_nv)
-
-	/*
-	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
-	 * separated, it is important to edit the libc i386 platform
-	 * specific mapfile and remove the NODYNSORT attribute
-	 * from atomic_and_64_nv.
-	 */
-	ENTRY(atomic_and_64)
-	ALTENTRY(atomic_and_64_nv)
-	pushl	%edi
-	pushl	%ebx
-	movl	12(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-1:
-	movl	16(%esp), %ebx
-	movl	20(%esp), %ecx
-	andl	%eax, %ebx
-	andl	%edx, %ecx
-	lock
-	cmpxchg8b (%edi)
-	jne	1b
-	movl	%ebx, %eax
-	movl	%ecx, %edx
-	popl	%ebx
-	popl	%edi
-	ret
-	SET_SIZE(atomic_and_64_nv)
-	SET_SIZE(atomic_and_64)
-
-	ENTRY(atomic_cas_8)
-	ALTENTRY(atomic_cas_uchar)
-	movl	4(%esp), %edx
-	movzbl	8(%esp), %eax
-	movb	12(%esp), %cl
-	lock
-	cmpxchgb %cl, (%edx)
-	ret
-	SET_SIZE(atomic_cas_uchar)
-	SET_SIZE(atomic_cas_8)
-
-	ENTRY(atomic_cas_16)
-	ALTENTRY(atomic_cas_ushort)
-	movl	4(%esp), %edx
-	movzwl	8(%esp), %eax
-	movw	12(%esp), %cx
-	lock
-	cmpxchgw %cx, (%edx)
-	ret
-	SET_SIZE(atomic_cas_ushort)
-	SET_SIZE(atomic_cas_16)
-
-	ENTRY(atomic_cas_32)
-	ALTENTRY(atomic_cas_uint)
-	ALTENTRY(atomic_cas_ulong)
-	ALTENTRY(atomic_cas_ptr)
-	movl	4(%esp), %edx
-	movl	8(%esp), %eax
-	movl	12(%esp), %ecx
-	lock
-	cmpxchgl %ecx, (%edx)
-	ret
-	SET_SIZE(atomic_cas_ptr)
-	SET_SIZE(atomic_cas_ulong)
-	SET_SIZE(atomic_cas_uint)
-	SET_SIZE(atomic_cas_32)
-
-	ENTRY(atomic_cas_64)
-	pushl	%ebx
-	pushl	%esi
-	movl	12(%esp), %esi
-	movl	16(%esp), %eax
-	movl	20(%esp), %edx
-	movl	24(%esp), %ebx
-	movl	28(%esp), %ecx
-	lock
-	cmpxchg8b (%esi)
-	popl	%esi
-	popl	%ebx
-	ret
-	SET_SIZE(atomic_cas_64)
-
-	ENTRY(atomic_swap_8)
-	ALTENTRY(atomic_swap_uchar)
-	movl	4(%esp), %edx
-	movzbl	8(%esp), %eax
-	lock
-	xchgb	%al, (%edx)
-	ret
-	SET_SIZE(atomic_swap_uchar)
-	SET_SIZE(atomic_swap_8)
-
-	ENTRY(atomic_swap_16)
-	ALTENTRY(atomic_swap_ushort)
-	movl	4(%esp), %edx
-	movzwl	8(%esp), %eax
-	lock
-	xchgw	%ax, (%edx)
-	ret
-	SET_SIZE(atomic_swap_ushort)
-	SET_SIZE(atomic_swap_16)
-
-	ENTRY(atomic_swap_32)
-	ALTENTRY(atomic_swap_uint)
-	ALTENTRY(atomic_swap_ptr)
-	ALTENTRY(atomic_swap_ulong)
-	movl	4(%esp), %edx
-	movl	8(%esp), %eax
-	lock
-	xchgl	%eax, (%edx)
-	ret
-	SET_SIZE(atomic_swap_ulong)
-	SET_SIZE(atomic_swap_ptr)
-	SET_SIZE(atomic_swap_uint)
-	SET_SIZE(atomic_swap_32)
-
-	ENTRY(atomic_swap_64)
-	pushl	%esi
-	pushl	%ebx
-	movl	12(%esp), %esi
-	movl	16(%esp), %ebx
-	movl	20(%esp), %ecx
-	movl	(%esi), %eax
-	movl	4(%esi), %edx
-1:
-	lock
-	cmpxchg8b (%esi)
-	jne	1b
-	popl	%ebx
-	popl	%esi
-	ret
-	SET_SIZE(atomic_swap_64)
-
-	ENTRY(atomic_set_long_excl)
-	movl	4(%esp), %edx
-	movl	8(%esp), %ecx
-	xorl	%eax, %eax
-	lock
-	btsl	%ecx, (%edx)
-	jnc	1f
-	decl	%eax
-1:
-	ret
-	SET_SIZE(atomic_set_long_excl)
-
-	ENTRY(atomic_clear_long_excl)
-	movl	4(%esp), %edx
-	movl	8(%esp), %ecx
-	xorl	%eax, %eax
-	lock
-	btrl	%ecx, (%edx)
-	jc	1f
-	decl	%eax
-1:
-	ret
-	SET_SIZE(atomic_clear_long_excl)
-
-	/*
-	 * NOTE: membar_enter, membar_exit, membar_producer, and 
-	 * membar_consumer are all identical routines. We define them
-	 * separately, instead of using ALTENTRY definitions to alias them
-	 * together, so that DTrace and debuggers will see a unique address
-	 * for them, allowing more accurate tracing.
-	*/
-
-
-	ENTRY(membar_enter)
-	lock
-	xorl	$0, (%esp)
-	ret
-	SET_SIZE(membar_enter)
-
-	ENTRY(membar_exit)
-	lock
-	xorl	$0, (%esp)
-	ret
-	SET_SIZE(membar_exit)
-
-	ENTRY(membar_producer)
-	lock
-	xorl	$0, (%esp)
-	ret
-	SET_SIZE(membar_producer)
-
-	ENTRY(membar_consumer)
-	lock
-	xorl	$0, (%esp)
-	ret
-	SET_SIZE(membar_consumer)
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif

diff --git a/zfs/lib/libspl/asm-x86_64/Makefile.am b/zfs/lib/libspl/asm-x86_64/Makefile.am
deleted file mode 100644
index e112610..0000000
--- a/zfs/lib/libspl/asm-x86_64/Makefile.am
+++ /dev/null

@@ -1 +0,0 @@
-noinst_HEADERS = atomic.S

diff --git a/zfs/lib/libspl/asm-x86_64/atomic.S b/zfs/lib/libspl/asm-x86_64/atomic.S
deleted file mode 100644
index 49c9b2a..0000000
--- a/zfs/lib/libspl/asm-x86_64/atomic.S
+++ /dev/null

@@ -1,687 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-	.ident	"%Z%%M%	%I%	%E% SMI"
-
-	.file	"%M%"
-
-#define _ASM
-#include <ia32/sys/asm_linkage.h>
-
-	ENTRY(atomic_inc_8)
-	ALTENTRY(atomic_inc_uchar)
-	lock
-	incb	(%rdi)
-	ret
-	SET_SIZE(atomic_inc_uchar)
-	SET_SIZE(atomic_inc_8)
-
-	ENTRY(atomic_inc_16)
-	ALTENTRY(atomic_inc_ushort)
-	lock
-	incw	(%rdi)
-	ret
-	SET_SIZE(atomic_inc_ushort)
-	SET_SIZE(atomic_inc_16)
-
-	ENTRY(atomic_inc_32)
-	ALTENTRY(atomic_inc_uint)
-	lock
-	incl	(%rdi)
-	ret
-	SET_SIZE(atomic_inc_uint)
-	SET_SIZE(atomic_inc_32)
-
-	ENTRY(atomic_inc_64)
-	ALTENTRY(atomic_inc_ulong)
-	lock
-	incq	(%rdi)
-	ret
-	SET_SIZE(atomic_inc_ulong)
-	SET_SIZE(atomic_inc_64)
-
-	ENTRY(atomic_inc_8_nv)
-	ALTENTRY(atomic_inc_uchar_nv)
-	movb	(%rdi), %al
-1:
-	leaq	1(%rax), %rcx
-	lock
-	cmpxchgb %cl, (%rdi)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_inc_uchar_nv)
-	SET_SIZE(atomic_inc_8_nv)
-
-	ENTRY(atomic_inc_16_nv)
-	ALTENTRY(atomic_inc_ushort_nv)
-	movw	(%rdi), %ax
-1:
-	leaq	1(%rax), %rcx
-	lock
-	cmpxchgw %cx, (%rdi)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_inc_ushort_nv)
-	SET_SIZE(atomic_inc_16_nv)
-
-	ENTRY(atomic_inc_32_nv)
-	ALTENTRY(atomic_inc_uint_nv)
-	movl	(%rdi), %eax
-1:
-	leaq	1(%rax), %rcx
-	lock
-	cmpxchgl %ecx, (%rdi)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_inc_uint_nv)
-	SET_SIZE(atomic_inc_32_nv)
-
-	ENTRY(atomic_inc_64_nv)
-	ALTENTRY(atomic_inc_ulong_nv)
-	movq	(%rdi), %rax
-1:
-	leaq	1(%rax), %rcx
-	lock
-	cmpxchgq %rcx, (%rdi)
-	jne	1b
-	movq	%rcx, %rax
-	ret
-	SET_SIZE(atomic_inc_ulong_nv)
-	SET_SIZE(atomic_inc_64_nv)
-
-	ENTRY(atomic_dec_8)
-	ALTENTRY(atomic_dec_uchar)
-	lock
-	decb	(%rdi)
-	ret
-	SET_SIZE(atomic_dec_uchar)
-	SET_SIZE(atomic_dec_8)
-
-	ENTRY(atomic_dec_16)
-	ALTENTRY(atomic_dec_ushort)
-	lock
-	decw	(%rdi)
-	ret
-	SET_SIZE(atomic_dec_ushort)
-	SET_SIZE(atomic_dec_16)
-
-	ENTRY(atomic_dec_32)
-	ALTENTRY(atomic_dec_uint)
-	lock
-	decl	(%rdi)
-	ret
-	SET_SIZE(atomic_dec_uint)
-	SET_SIZE(atomic_dec_32)
-
-	ENTRY(atomic_dec_64)
-	ALTENTRY(atomic_dec_ulong)
-	lock
-	decq	(%rdi)
-	ret
-	SET_SIZE(atomic_dec_ulong)
-	SET_SIZE(atomic_dec_64)
-
-	ENTRY(atomic_dec_8_nv)
-	ALTENTRY(atomic_dec_uchar_nv)
-	movb	(%rdi), %al
-1:
-	leaq	-1(%rax), %rcx
-	lock
-	cmpxchgb %cl, (%rdi)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_dec_uchar_nv)
-	SET_SIZE(atomic_dec_8_nv)
-
-	ENTRY(atomic_dec_16_nv)
-	ALTENTRY(atomic_dec_ushort_nv)
-	movw	(%rdi), %ax
-1:
-	leaq	-1(%rax), %rcx
-	lock
-	cmpxchgw %cx, (%rdi)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_dec_ushort_nv)
-	SET_SIZE(atomic_dec_16_nv)
-
-	ENTRY(atomic_dec_32_nv)
-	ALTENTRY(atomic_dec_uint_nv)
-	movl	(%rdi), %eax
-1:
-	leaq	-1(%rax), %rcx
-	lock
-	cmpxchgl %ecx, (%rdi)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_dec_uint_nv)
-	SET_SIZE(atomic_dec_32_nv)
-
-	ENTRY(atomic_dec_64_nv)
-	ALTENTRY(atomic_dec_ulong_nv)
-	movq	(%rdi), %rax
-1:
-	leaq	-1(%rax), %rcx
-	lock
-	cmpxchgq %rcx, (%rdi)
-	jne	1b
-	movq	%rcx, %rax
-	ret
-	SET_SIZE(atomic_dec_ulong_nv)
-	SET_SIZE(atomic_dec_64_nv)
-
-	ENTRY(atomic_add_8)
-	ALTENTRY(atomic_add_char)
-	lock
-	addb	%sil, (%rdi)
-	ret
-	SET_SIZE(atomic_add_char)
-	SET_SIZE(atomic_add_8)
-
-	ENTRY(atomic_add_16)
-	ALTENTRY(atomic_add_short)
-	lock
-	addw	%si, (%rdi)
-	ret
-	SET_SIZE(atomic_add_short)
-	SET_SIZE(atomic_add_16)
-
-	ENTRY(atomic_add_32)
-	ALTENTRY(atomic_add_int)
-	lock
-	addl	%esi, (%rdi)
-	ret
-	SET_SIZE(atomic_add_int)
-	SET_SIZE(atomic_add_32)
-
-	ENTRY(atomic_add_64)
-	ALTENTRY(atomic_add_ptr)
-	ALTENTRY(atomic_add_long)
-	lock
-	addq	%rsi, (%rdi)
-	ret
-	SET_SIZE(atomic_add_long)
-	SET_SIZE(atomic_add_ptr)
-	SET_SIZE(atomic_add_64)
-
-	ENTRY(atomic_sub_8)
-	ALTENTRY(atomic_sub_char)
-	lock
-	subb	%sil, (%rdi)
-	ret
-	SET_SIZE(atomic_sub_char)
-	SET_SIZE(atomic_sub_8)
-
-	ENTRY(atomic_sub_16)
-	ALTENTRY(atomic_sub_short)
-	lock
-	subw	%si, (%rdi)
-	ret
-	SET_SIZE(atomic_sub_short)
-	SET_SIZE(atomic_sub_16)
-
-	ENTRY(atomic_sub_32)
-	ALTENTRY(atomic_sub_int)
-	lock
-	subl	%esi, (%rdi)
-	ret
-	SET_SIZE(atomic_sub_int)
-	SET_SIZE(atomic_sub_32)
-
-	ENTRY(atomic_sub_64)
-	ALTENTRY(atomic_sub_ptr)
-	ALTENTRY(atomic_sub_long)
-	lock
-	subq	%rsi, (%rdi)
-	ret
-	SET_SIZE(atomic_sub_long)
-	SET_SIZE(atomic_sub_ptr)
-	SET_SIZE(atomic_sub_64)
-
-	ENTRY(atomic_or_8)
-	ALTENTRY(atomic_or_uchar)
-	lock
-	orb	%sil, (%rdi)
-	ret
-	SET_SIZE(atomic_or_uchar)
-	SET_SIZE(atomic_or_8)
-
-	ENTRY(atomic_or_16)
-	ALTENTRY(atomic_or_ushort)
-	lock
-	orw	%si, (%rdi)
-	ret
-	SET_SIZE(atomic_or_ushort)
-	SET_SIZE(atomic_or_16)
-
-	ENTRY(atomic_or_32)
-	ALTENTRY(atomic_or_uint)
-	lock
-	orl	%esi, (%rdi)
-	ret
-	SET_SIZE(atomic_or_uint)
-	SET_SIZE(atomic_or_32)
-
-	ENTRY(atomic_or_64)
-	ALTENTRY(atomic_or_ulong)
-	lock
-	orq	%rsi, (%rdi)
-	ret
-	SET_SIZE(atomic_or_ulong)
-	SET_SIZE(atomic_or_64)
-
-	ENTRY(atomic_and_8)
-	ALTENTRY(atomic_and_uchar)
-	lock
-	andb	%sil, (%rdi)
-	ret
-	SET_SIZE(atomic_and_uchar)
-	SET_SIZE(atomic_and_8)
-
-	ENTRY(atomic_and_16)
-	ALTENTRY(atomic_and_ushort)
-	lock
-	andw	%si, (%rdi)
-	ret
-	SET_SIZE(atomic_and_ushort)
-	SET_SIZE(atomic_and_16)
-
-	ENTRY(atomic_and_32)
-	ALTENTRY(atomic_and_uint)
-	lock
-	andl	%esi, (%rdi)
-	ret
-	SET_SIZE(atomic_and_uint)
-	SET_SIZE(atomic_and_32)
-
-	ENTRY(atomic_and_64)
-	ALTENTRY(atomic_and_ulong)
-	lock
-	andq	%rsi, (%rdi)
-	ret
-	SET_SIZE(atomic_and_ulong)
-	SET_SIZE(atomic_and_64)
-
-	ENTRY(atomic_add_8_nv)
-	ALTENTRY(atomic_add_char_nv)
-	movb	(%rdi), %al
-1:
-	movb	%sil, %cl
-	addb	%al, %cl
-	lock
-	cmpxchgb %cl, (%rdi)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_add_char_nv)
-	SET_SIZE(atomic_add_8_nv)
-
-	ENTRY(atomic_add_16_nv)
-	ALTENTRY(atomic_add_short_nv)
-	movw	(%rdi), %ax
-1:
-	movw	%si, %cx
-	addw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%rdi)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_add_short_nv)
-	SET_SIZE(atomic_add_16_nv)
-
-	ENTRY(atomic_add_32_nv)
-	ALTENTRY(atomic_add_int_nv)
-	movl	(%rdi), %eax
-1:
-	movl	%esi, %ecx
-	addl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%rdi)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_add_int_nv)
-	SET_SIZE(atomic_add_32_nv)
-
-	ENTRY(atomic_add_64_nv)
-	ALTENTRY(atomic_add_ptr_nv)
-	ALTENTRY(atomic_add_long_nv)
-	movq	(%rdi), %rax
-1:
-	movq	%rsi, %rcx
-	addq	%rax, %rcx
-	lock
-	cmpxchgq %rcx, (%rdi)
-	jne	1b
-	movq	%rcx, %rax
-	ret
-	SET_SIZE(atomic_add_long_nv)
-	SET_SIZE(atomic_add_ptr_nv)
-	SET_SIZE(atomic_add_64_nv)
-
-	ENTRY(atomic_sub_8_nv)
-	ALTENTRY(atomic_sub_char_nv)
-	movb	(%rdi), %al
-1:
-	movb	%sil, %cl
-	subb	%al, %cl
-	lock
-	cmpxchgb %cl, (%rdi)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_sub_char_nv)
-	SET_SIZE(atomic_sub_8_nv)
-
-	ENTRY(atomic_sub_16_nv)
-	ALTENTRY(atomic_sub_short_nv)
-	movw	(%rdi), %ax
-1:
-	movw	%si, %cx
-	subw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%rdi)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_sub_short_nv)
-	SET_SIZE(atomic_sub_16_nv)
-
-	ENTRY(atomic_sub_32_nv)
-	ALTENTRY(atomic_sub_int_nv)
-	movl	(%rdi), %eax
-1:
-	movl	%esi, %ecx
-	subl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%rdi)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_sub_int_nv)
-	SET_SIZE(atomic_sub_32_nv)
-
-	ENTRY(atomic_sub_64_nv)
-	ALTENTRY(atomic_sub_ptr_nv)
-	ALTENTRY(atomic_sub_long_nv)
-	movq	(%rdi), %rax
-1:
-	movq	%rsi, %rcx
-	subq	%rax, %rcx
-	lock
-	cmpxchgq %rcx, (%rdi)
-	jne	1b
-	movq	%rcx, %rax
-	ret
-	SET_SIZE(atomic_sub_long_nv)
-	SET_SIZE(atomic_sub_ptr_nv)
-	SET_SIZE(atomic_sub_64_nv)
-
-	ENTRY(atomic_and_8_nv)
-	ALTENTRY(atomic_and_uchar_nv)
-	movb	(%rdi), %al
-1:
-	movb	%sil, %cl
-	andb	%al, %cl
-	lock
-	cmpxchgb %cl, (%rdi)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_and_uchar_nv)
-	SET_SIZE(atomic_and_8_nv)
-
-	ENTRY(atomic_and_16_nv)
-	ALTENTRY(atomic_and_ushort_nv)
-	movw	(%rdi), %ax
-1:
-	movw	%si, %cx
-	andw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%rdi)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_and_ushort_nv)
-	SET_SIZE(atomic_and_16_nv)
-
-	ENTRY(atomic_and_32_nv)
-	ALTENTRY(atomic_and_uint_nv)
-	movl	(%rdi), %eax
-1:
-	movl	%esi, %ecx
-	andl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%rdi)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_and_uint_nv)
-	SET_SIZE(atomic_and_32_nv)
-
-	ENTRY(atomic_and_64_nv)
-	ALTENTRY(atomic_and_ulong_nv)
-	movq	(%rdi), %rax
-1:
-	movq	%rsi, %rcx
-	andq	%rax, %rcx
-	lock
-	cmpxchgq %rcx, (%rdi)
-	jne	1b
-	movq	%rcx, %rax
-	ret
-	SET_SIZE(atomic_and_ulong_nv)
-	SET_SIZE(atomic_and_64_nv)
-
-	ENTRY(atomic_or_8_nv)
-	ALTENTRY(atomic_or_uchar_nv)
-	movb	(%rdi), %al
-1:
-	movb	%sil, %cl
-	orb	%al, %cl
-	lock
-	cmpxchgb %cl, (%rdi)
-	jne	1b
-	movzbl	%cl, %eax
-	ret
-	SET_SIZE(atomic_and_uchar_nv)
-	SET_SIZE(atomic_and_8_nv)
-
-	ENTRY(atomic_or_16_nv)
-	ALTENTRY(atomic_or_ushort_nv)
-	movw	(%rdi), %ax
-1:
-	movw	%si, %cx
-	orw	%ax, %cx
-	lock
-	cmpxchgw %cx, (%rdi)
-	jne	1b
-	movzwl	%cx, %eax
-	ret
-	SET_SIZE(atomic_or_ushort_nv)
-	SET_SIZE(atomic_or_16_nv)
-
-	ENTRY(atomic_or_32_nv)
-	ALTENTRY(atomic_or_uint_nv)
-	movl	(%rdi), %eax
-1:
-	movl	%esi, %ecx
-	orl	%eax, %ecx
-	lock
-	cmpxchgl %ecx, (%rdi)
-	jne	1b
-	movl	%ecx, %eax
-	ret
-	SET_SIZE(atomic_or_uint_nv)
-	SET_SIZE(atomic_or_32_nv)
-
-	ENTRY(atomic_or_64_nv)
-	ALTENTRY(atomic_or_ulong_nv)
-	movq	(%rdi), %rax
-1:
-	movq	%rsi, %rcx
-	orq	%rax, %rcx
-	lock
-	cmpxchgq %rcx, (%rdi)
-	jne	1b
-	movq	%rcx, %rax
-	ret
-	SET_SIZE(atomic_or_ulong_nv)
-	SET_SIZE(atomic_or_64_nv)
-
-	ENTRY(atomic_cas_8)
-	ALTENTRY(atomic_cas_uchar)
-	movzbl	%sil, %eax
-	lock
-	cmpxchgb %dl, (%rdi)
-	ret
-	SET_SIZE(atomic_cas_uchar)
-	SET_SIZE(atomic_cas_8)
-
-	ENTRY(atomic_cas_16)
-	ALTENTRY(atomic_cas_ushort)
-	movzwl	%si, %eax
-	lock
-	cmpxchgw %dx, (%rdi)
-	ret
-	SET_SIZE(atomic_cas_ushort)
-	SET_SIZE(atomic_cas_16)
-
-	ENTRY(atomic_cas_32)
-	ALTENTRY(atomic_cas_uint)
-	movl	%esi, %eax
-	lock
-	cmpxchgl %edx, (%rdi)
-	ret
-	SET_SIZE(atomic_cas_uint)
-	SET_SIZE(atomic_cas_32)
-
-	ENTRY(atomic_cas_64)
-	ALTENTRY(atomic_cas_ulong)
-	ALTENTRY(atomic_cas_ptr)
-	movq	%rsi, %rax
-	lock
-	cmpxchgq %rdx, (%rdi)
-	ret
-	SET_SIZE(atomic_cas_ptr)
-	SET_SIZE(atomic_cas_ulong)
-	SET_SIZE(atomic_cas_64)
-
-	ENTRY(atomic_swap_8)
-	ALTENTRY(atomic_swap_uchar)
-	movzbl	%sil, %eax
-	lock
-	xchgb %al, (%rdi)
-	ret
-	SET_SIZE(atomic_swap_uchar)
-	SET_SIZE(atomic_swap_8)
-
-	ENTRY(atomic_swap_16)
-	ALTENTRY(atomic_swap_ushort)
-	movzwl	%si, %eax
-	lock
-	xchgw %ax, (%rdi)
-	ret
-	SET_SIZE(atomic_swap_ushort)
-	SET_SIZE(atomic_swap_16)
-
-	ENTRY(atomic_swap_32)
-	ALTENTRY(atomic_swap_uint)
-	movl	%esi, %eax
-	lock
-	xchgl %eax, (%rdi)
-	ret
-	SET_SIZE(atomic_swap_uint)
-	SET_SIZE(atomic_swap_32)
-
-	ENTRY(atomic_swap_64)
-	ALTENTRY(atomic_swap_ulong)
-	ALTENTRY(atomic_swap_ptr)
-	movq	%rsi, %rax
-	lock
-	xchgq %rax, (%rdi)
-	ret
-	SET_SIZE(atomic_swap_ptr)
-	SET_SIZE(atomic_swap_ulong)
-	SET_SIZE(atomic_swap_64)
-
-	ENTRY(atomic_set_long_excl)
-	xorl	%eax, %eax
-	lock
-	btsq	%rsi, (%rdi)
-	jnc	1f
-	decl	%eax
-1:
-	ret
-	SET_SIZE(atomic_set_long_excl)
-
-	ENTRY(atomic_clear_long_excl)
-	xorl	%eax, %eax
-	lock
-	btrq	%rsi, (%rdi)
-	jc	1f
-	decl	%eax
-1:
-	ret
-	SET_SIZE(atomic_clear_long_excl)
-
-	/*
-	 * NOTE: membar_enter, and membar_exit are identical routines. 
-	 * We define them separately, instead of using an ALTENTRY
-	 * definitions to alias them together, so that DTrace and
-	 * debuggers will see a unique address for them, allowing 
-	 * more accurate tracing.
-	*/
-
-	ENTRY(membar_enter)
-	mfence
-	ret
-	SET_SIZE(membar_enter)
-
-	ENTRY(membar_exit)
-	mfence
-	ret
-	SET_SIZE(membar_exit)
-
-	ENTRY(membar_producer)
-	sfence
-	ret
-	SET_SIZE(membar_producer)
-
-	ENTRY(membar_consumer)
-	lfence
-	ret
-	SET_SIZE(membar_consumer)
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif

diff --git a/zfs/lib/libspl/assert.c b/zfs/lib/libspl/assert.c
new file mode 100644
index 0000000..94290ae
--- /dev/null
+++ b/zfs/lib/libspl/assert.c

@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+
+int aok = 0;
+
+/* printf version of libspl_assert */
+void
+libspl_assertf(const char *file, const char *func, int line,
+    const char *format, ...)
+{
+	va_list args;
+
+	va_start(args, format);
+	vfprintf(stderr, format, args);
+	fprintf(stderr, "\n");
+	fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func);
+	va_end(args);
+	if (aok) {
+		return;
+	}
+	abort();
+}

diff --git a/zfs/lib/libspl/atomic.c b/zfs/lib/libspl/atomic.c
new file mode 100644
index 0000000..4717d81
--- /dev/null
+++ b/zfs/lib/libspl/atomic.c

@@ -0,0 +1,368 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009 by Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <atomic.h>
+
+/*
+ * These are the void returning variants
+ */
+/* BEGIN CSTYLED */
+#define	ATOMIC_INC(name, type) \
+	void atomic_inc_##name(volatile type *target)			\
+	{								\
+		(void) __atomic_add_fetch(target, 1, __ATOMIC_SEQ_CST);	\
+	}
+
+ATOMIC_INC(8, uint8_t)
+ATOMIC_INC(uchar, uchar_t)
+ATOMIC_INC(16, uint16_t)
+ATOMIC_INC(ushort, ushort_t)
+ATOMIC_INC(32, uint32_t)
+ATOMIC_INC(uint, uint_t)
+ATOMIC_INC(ulong, ulong_t)
+ATOMIC_INC(64, uint64_t)
+
+
+#define	ATOMIC_DEC(name, type) \
+	void atomic_dec_##name(volatile type *target)			\
+	{								\
+		(void) __atomic_sub_fetch(target, 1, __ATOMIC_SEQ_CST);	\
+	}
+
+ATOMIC_DEC(8, uint8_t)
+ATOMIC_DEC(uchar, uchar_t)
+ATOMIC_DEC(16, uint16_t)
+ATOMIC_DEC(ushort, ushort_t)
+ATOMIC_DEC(32, uint32_t)
+ATOMIC_DEC(uint, uint_t)
+ATOMIC_DEC(ulong, ulong_t)
+ATOMIC_DEC(64, uint64_t)
+
+
+#define	ATOMIC_ADD(name, type1, type2) \
+	void atomic_add_##name(volatile type1 *target, type2 bits)	\
+	{								\
+		(void) __atomic_add_fetch(target, bits, __ATOMIC_SEQ_CST); \
+	}
+
+ATOMIC_ADD(8, uint8_t, int8_t)
+ATOMIC_ADD(char, uchar_t, signed char)
+ATOMIC_ADD(16, uint16_t, int16_t)
+ATOMIC_ADD(short, ushort_t, short)
+ATOMIC_ADD(32, uint32_t, int32_t)
+ATOMIC_ADD(int, uint_t, int)
+ATOMIC_ADD(long, ulong_t, long)
+ATOMIC_ADD(64, uint64_t, int64_t)
+
+void
+atomic_add_ptr(volatile void *target, ssize_t bits)
+{
+	(void) __atomic_add_fetch((void **)target, bits, __ATOMIC_SEQ_CST);
+}
+
+
+#define	ATOMIC_SUB(name, type1, type2) \
+	void atomic_sub_##name(volatile type1 *target, type2 bits)	\
+	{								\
+		(void) __atomic_sub_fetch(target, bits, __ATOMIC_SEQ_CST); \
+	}
+
+ATOMIC_SUB(8, uint8_t, int8_t)
+ATOMIC_SUB(char, uchar_t, signed char)
+ATOMIC_SUB(16, uint16_t, int16_t)
+ATOMIC_SUB(short, ushort_t, short)
+ATOMIC_SUB(32, uint32_t, int32_t)
+ATOMIC_SUB(int, uint_t, int)
+ATOMIC_SUB(long, ulong_t, long)
+ATOMIC_SUB(64, uint64_t, int64_t)
+
+void
+atomic_sub_ptr(volatile void *target, ssize_t bits)
+{
+	(void) __atomic_sub_fetch((void **)target, bits, __ATOMIC_SEQ_CST);
+}
+
+
+#define	ATOMIC_OR(name, type) \
+	void atomic_or_##name(volatile type *target, type bits)		\
+	{								\
+		(void) __atomic_or_fetch(target, bits, __ATOMIC_SEQ_CST); \
+	}
+
+ATOMIC_OR(8, uint8_t)
+ATOMIC_OR(uchar, uchar_t)
+ATOMIC_OR(16, uint16_t)
+ATOMIC_OR(ushort, ushort_t)
+ATOMIC_OR(32, uint32_t)
+ATOMIC_OR(uint, uint_t)
+ATOMIC_OR(ulong, ulong_t)
+ATOMIC_OR(64, uint64_t)
+
+
+#define	ATOMIC_AND(name, type) \
+	void atomic_and_##name(volatile type *target, type bits)	\
+	{								\
+		(void) __atomic_and_fetch(target, bits, __ATOMIC_SEQ_CST); \
+	}
+
+ATOMIC_AND(8, uint8_t)
+ATOMIC_AND(uchar, uchar_t)
+ATOMIC_AND(16, uint16_t)
+ATOMIC_AND(ushort, ushort_t)
+ATOMIC_AND(32, uint32_t)
+ATOMIC_AND(uint, uint_t)
+ATOMIC_AND(ulong, ulong_t)
+ATOMIC_AND(64, uint64_t)
+
+
+/*
+ * New value returning variants
+ */
+
+#define	ATOMIC_INC_NV(name, type) \
+	type atomic_inc_##name##_nv(volatile type *target)		\
+	{								\
+		return (__atomic_add_fetch(target, 1, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_INC_NV(8, uint8_t)
+ATOMIC_INC_NV(uchar, uchar_t)
+ATOMIC_INC_NV(16, uint16_t)
+ATOMIC_INC_NV(ushort, ushort_t)
+ATOMIC_INC_NV(32, uint32_t)
+ATOMIC_INC_NV(uint, uint_t)
+ATOMIC_INC_NV(ulong, ulong_t)
+ATOMIC_INC_NV(64, uint64_t)
+
+
+#define	ATOMIC_DEC_NV(name, type) \
+	type atomic_dec_##name##_nv(volatile type *target)		\
+	{								\
+		return (__atomic_sub_fetch(target, 1, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_DEC_NV(8, uint8_t)
+ATOMIC_DEC_NV(uchar, uchar_t)
+ATOMIC_DEC_NV(16, uint16_t)
+ATOMIC_DEC_NV(ushort, ushort_t)
+ATOMIC_DEC_NV(32, uint32_t)
+ATOMIC_DEC_NV(uint, uint_t)
+ATOMIC_DEC_NV(ulong, ulong_t)
+ATOMIC_DEC_NV(64, uint64_t)
+
+
+#define	ATOMIC_ADD_NV(name, type1, type2) \
+	type1 atomic_add_##name##_nv(volatile type1 *target, type2 bits) \
+	{								\
+		return (__atomic_add_fetch(target, bits, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_ADD_NV(8, uint8_t, int8_t)
+ATOMIC_ADD_NV(char, uchar_t, signed char)
+ATOMIC_ADD_NV(16, uint16_t, int16_t)
+ATOMIC_ADD_NV(short, ushort_t, short)
+ATOMIC_ADD_NV(32, uint32_t, int32_t)
+ATOMIC_ADD_NV(int, uint_t, int)
+ATOMIC_ADD_NV(long, ulong_t, long)
+ATOMIC_ADD_NV(64, uint64_t, int64_t)
+
+void *
+atomic_add_ptr_nv(volatile void *target, ssize_t bits)
+{
+	return (__atomic_add_fetch((void **)target, bits, __ATOMIC_SEQ_CST));
+}
+
+
+#define	ATOMIC_SUB_NV(name, type1, type2) \
+	type1 atomic_sub_##name##_nv(volatile type1 *target, type2 bits) \
+	{								\
+		return (__atomic_sub_fetch(target, bits, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_SUB_NV(8, uint8_t, int8_t)
+ATOMIC_SUB_NV(char, uchar_t, signed char)
+ATOMIC_SUB_NV(16, uint16_t, int16_t)
+ATOMIC_SUB_NV(short, ushort_t, short)
+ATOMIC_SUB_NV(32, uint32_t, int32_t)
+ATOMIC_SUB_NV(int, uint_t, int)
+ATOMIC_SUB_NV(long, ulong_t, long)
+ATOMIC_SUB_NV(64, uint64_t, int64_t)
+
+void *
+atomic_sub_ptr_nv(volatile void *target, ssize_t bits)
+{
+	return (__atomic_sub_fetch((void **)target, bits, __ATOMIC_SEQ_CST));
+}
+
+
+#define	ATOMIC_OR_NV(name, type) \
+	type atomic_or_##name##_nv(volatile type *target, type bits)	\
+	{								\
+		return (__atomic_or_fetch(target, bits, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_OR_NV(8, uint8_t)
+ATOMIC_OR_NV(uchar, uchar_t)
+ATOMIC_OR_NV(16, uint16_t)
+ATOMIC_OR_NV(ushort, ushort_t)
+ATOMIC_OR_NV(32, uint32_t)
+ATOMIC_OR_NV(uint, uint_t)
+ATOMIC_OR_NV(ulong, ulong_t)
+ATOMIC_OR_NV(64, uint64_t)
+
+
+#define	ATOMIC_AND_NV(name, type) \
+	type atomic_and_##name##_nv(volatile type *target, type bits)	\
+	{								\
+		return (__atomic_and_fetch(target, bits, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_AND_NV(8, uint8_t)
+ATOMIC_AND_NV(uchar, uchar_t)
+ATOMIC_AND_NV(16, uint16_t)
+ATOMIC_AND_NV(ushort, ushort_t)
+ATOMIC_AND_NV(32, uint32_t)
+ATOMIC_AND_NV(uint, uint_t)
+ATOMIC_AND_NV(ulong, ulong_t)
+ATOMIC_AND_NV(64, uint64_t)
+
+
+/*
+ * If *tgt == exp, set *tgt = des; return old value
+ *
+ * This may not look right on the first pass (or the sixteenth), but,
+ * from https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html:
+ * > If they are not equal, the operation is a read
+ * > and the current contents of *ptr are written into *expected.
+ * And, in the converse case, exp is already *target by definition.
+ */
+
+#define	ATOMIC_CAS(name, type) \
+	type atomic_cas_##name(volatile type *target, type exp, type des) \
+	{								\
+		__atomic_compare_exchange_n(target, &exp, des, B_FALSE,	\
+		    __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);		\
+		return (exp);						\
+	}
+
+ATOMIC_CAS(8, uint8_t)
+ATOMIC_CAS(uchar, uchar_t)
+ATOMIC_CAS(16, uint16_t)
+ATOMIC_CAS(ushort, ushort_t)
+ATOMIC_CAS(32, uint32_t)
+ATOMIC_CAS(uint, uint_t)
+ATOMIC_CAS(ulong, ulong_t)
+ATOMIC_CAS(64, uint64_t)
+
+void *
+atomic_cas_ptr(volatile void *target, void *exp, void *des)
+{
+
+	__atomic_compare_exchange_n((void **)target, &exp, des, B_FALSE,
+	    __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+	return (exp);
+}
+
+
+/*
+ * Swap target and return old value
+ */
+
+#define	ATOMIC_SWAP(name, type) \
+	type atomic_swap_##name(volatile type *target, type bits)	\
+	{								\
+		return (__atomic_exchange_n(target, bits, __ATOMIC_SEQ_CST)); \
+	}
+
+ATOMIC_SWAP(8, uint8_t)
+ATOMIC_SWAP(uchar, uchar_t)
+ATOMIC_SWAP(16, uint16_t)
+ATOMIC_SWAP(ushort, ushort_t)
+ATOMIC_SWAP(32, uint32_t)
+ATOMIC_SWAP(uint, uint_t)
+ATOMIC_SWAP(ulong, ulong_t)
+ATOMIC_SWAP(64, uint64_t)
+/* END CSTYLED */
+
+void *
+atomic_swap_ptr(volatile void *target, void *bits)
+{
+	return (__atomic_exchange_n((void **)target, bits, __ATOMIC_SEQ_CST));
+}
+
+#ifndef _LP64
+uint64_t
+atomic_load_64(volatile uint64_t *target)
+{
+	return (__atomic_load_n(target, __ATOMIC_RELAXED));
+}
+
+void
+atomic_store_64(volatile uint64_t *target, uint64_t bits)
+{
+	return (__atomic_store_n(target, bits, __ATOMIC_RELAXED));
+}
+#endif
+
+int
+atomic_set_long_excl(volatile ulong_t *target, uint_t value)
+{
+	ulong_t bit = 1UL << value;
+	ulong_t old = __atomic_fetch_or(target, bit, __ATOMIC_SEQ_CST);
+	return ((old & bit) ? -1 : 0);
+}
+
+int
+atomic_clear_long_excl(volatile ulong_t *target, uint_t value)
+{
+	ulong_t bit = 1UL << value;
+	ulong_t old = __atomic_fetch_and(target, ~bit, __ATOMIC_SEQ_CST);
+	return ((old & bit) ? 0 : -1);
+}
+
+void
+membar_enter(void)
+{
+	__atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+void
+membar_exit(void)
+{
+	__atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+void
+membar_producer(void)
+{
+	__atomic_thread_fence(__ATOMIC_RELEASE);
+}
+
+void
+membar_consumer(void)
+{
+	__atomic_thread_fence(__ATOMIC_ACQUIRE);
+}

diff --git a/zfs/lib/libspl/getexecname.c b/zfs/lib/libspl/getexecname.c
deleted file mode 100644
index c21a110..0000000
--- a/zfs/lib/libspl/getexecname.c
+++ /dev/null

@@ -1,58 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <limits.h>
-
-const char *
-getexecname(void)
-{
-	static char execname[PATH_MAX + 1] = "";
-	static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
-	char *ptr = NULL;
-	ssize_t rc;
-
-	(void) pthread_mutex_lock(&mtx);
-
-	if (strlen(execname) == 0) {
-		rc = readlink("/proc/self/exe",
-		    execname, sizeof (execname) - 1);
-		if (rc == -1) {
-			execname[0] = '\0';
-		} else {
-			execname[rc] = '\0';
-			ptr = execname;
-		}
-	} else {
-		ptr = execname;
-	}
-
-	(void) pthread_mutex_unlock(&mtx);
-	return (ptr);
-}

diff --git a/zfs/lib/libspl/gethostid.c b/zfs/lib/libspl/gethostid.c
deleted file mode 100644
index 1eb93f4..0000000
--- a/zfs/lib/libspl/gethostid.c
+++ /dev/null

@@ -1,86 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2017, Lawrence Livermore National Security, LLC.
- */
-
-#include <fcntl.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/systeminfo.h>
-
-static unsigned long
-get_spl_hostid(void)
-{
-	FILE *f;
-	unsigned long hostid;
-	char *env;
-
-	/*
-	 * Allow the hostid to be subverted for testing.
-	 */
-	env = getenv("ZFS_HOSTID");
-	if (env) {
-		hostid = strtoull(env, NULL, 0);
-		return (hostid & HOSTID_MASK);
-	}
-
-	f = fopen("/sys/module/spl/parameters/spl_hostid", "r");
-	if (!f)
-		return (0);
-
-	if (fscanf(f, "%lu", &hostid) != 1)
-		hostid = 0;
-
-	fclose(f);
-
-	return (hostid & HOSTID_MASK);
-}
-
-unsigned long
-get_system_hostid(void)
-{
-	unsigned long system_hostid = get_spl_hostid();
-	/*
-	 * We do not use the library call gethostid() because
-	 * it generates a hostid value that the kernel is
-	 * unaware of, if the spl_hostid module parameter has not
-	 * been set and there is no system hostid file (e.g.
-	 * /etc/hostid).  The kernel and userspace must agree.
-	 * See comments above hostid_read() in the SPL.
-	 */
-	if (system_hostid == 0) {
-		int fd, rc;
-		unsigned long hostid;
-		int hostid_size = 4;  /* 4 bytes regardless of arch */
-
-		fd = open("/etc/hostid", O_RDONLY);
-		if (fd >= 0) {
-			rc = read(fd, &hostid, hostid_size);
-			if (rc > 0)
-				system_hostid = (hostid & HOSTID_MASK);
-			close(fd);
-		}
-	}
-	return (system_hostid);
-}

diff --git a/zfs/lib/libspl/getmntany.c b/zfs/lib/libspl/getmntany.c
deleted file mode 100644
index 43e523e..0000000
--- a/zfs/lib/libspl/getmntany.c
+++ /dev/null

@@ -1,102 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Copyright 2006 Ricardo Correia.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*	Copyright (c) 1988 AT&T	*/
-/*	  All Rights Reserved	*/
-
-#include <stdio.h>
-#include <string.h>
-#include <mntent.h>
-#include <sys/mnttab.h>
-
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#define	BUFSIZE	(MNT_LINE_MAX + 2)
-
-__thread char buf[BUFSIZE];
-
-#define	DIFF(xx)	( \
-	    (mrefp->xx != NULL) && \
-	    (mgetp->xx == NULL || strcmp(mrefp->xx, mgetp->xx) != 0))
-
-int
-getmntany(FILE *fp, struct mnttab *mgetp, struct mnttab *mrefp)
-{
-	int ret;
-
-	while (
-	    ((ret = _sol_getmntent(fp, mgetp)) == 0) && (
-	    DIFF(mnt_special) || DIFF(mnt_mountp) ||
-	    DIFF(mnt_fstype) || DIFF(mnt_mntopts))) { }
-
-	return (ret);
-}
-
-int
-_sol_getmntent(FILE *fp, struct mnttab *mgetp)
-{
-	struct mntent mntbuf;
-	struct mntent *ret;
-
-	ret = getmntent_r(fp, &mntbuf, buf, BUFSIZE);
-
-	if (ret != NULL) {
-		mgetp->mnt_special = mntbuf.mnt_fsname;
-		mgetp->mnt_mountp = mntbuf.mnt_dir;
-		mgetp->mnt_fstype = mntbuf.mnt_type;
-		mgetp->mnt_mntopts = mntbuf.mnt_opts;
-		return (0);
-	}
-
-	if (feof(fp))
-		return (-1);
-
-	return (MNT_TOOLONG);
-}
-
-int
-getextmntent(FILE *fp, struct extmnttab *mp, int len)
-{
-	int ret;
-	struct stat64 st;
-
-	ret = _sol_getmntent(fp, (struct mnttab *)mp);
-	if (ret == 0) {
-		if (stat64(mp->mnt_mountp, &st) != 0) {
-			mp->mnt_major = 0;
-			mp->mnt_minor = 0;
-			return (ret);
-		}
-		mp->mnt_major = major(st.st_dev);
-		mp->mnt_minor = minor(st.st_dev);
-	}
-
-	return (ret);
-}

diff --git a/zfs/lib/libspl/include/Makefile.am b/zfs/lib/libspl/include/Makefile.am
index 842a8fb..9ca08b2 100644
--- a/zfs/lib/libspl/include/Makefile.am
+++ b/zfs/lib/libspl/include/Makefile.am

@@ -1,23 +1,22 @@
-SUBDIRS = ia32 rpc sys util
+SUBDIRS = ia32 rpc sys util os
 
 libspldir = $(includedir)/libspl
 libspl_HEADERS = \
-	$(top_srcdir)/lib/libspl/include/assert.h \
-	$(top_srcdir)/lib/libspl/include/atomic.h \
-	$(top_srcdir)/lib/libspl/include/devid.h \
-	$(top_srcdir)/lib/libspl/include/libdevinfo.h \
-	$(top_srcdir)/lib/libspl/include/libgen.h \
-	$(top_srcdir)/lib/libspl/include/libshare.h \
-	$(top_srcdir)/lib/libspl/include/limits.h \
-	$(top_srcdir)/lib/libspl/include/locale.h \
-	$(top_srcdir)/lib/libspl/include/statcommon.h \
-	$(top_srcdir)/lib/libspl/include/stdio.h \
-	$(top_srcdir)/lib/libspl/include/stdlib.h \
-	$(top_srcdir)/lib/libspl/include/string.h \
-	$(top_srcdir)/lib/libspl/include/stropts.h \
-	$(top_srcdir)/lib/libspl/include/thread.h \
-	$(top_srcdir)/lib/libspl/include/tzfile.h \
-	$(top_srcdir)/lib/libspl/include/ucred.h \
-	$(top_srcdir)/lib/libspl/include/umem.h \
-	$(top_srcdir)/lib/libspl/include/unistd.h \
-	$(top_srcdir)/lib/libspl/include/zone.h
+	assert.h \
+	atomic.h \
+	libdevinfo.h \
+	libgen.h \
+	libshare.h \
+	limits.h \
+	locale.h \
+	statcommon.h \
+	stdio.h \
+	stdlib.h \
+	string.h \
+	stropts.h \
+	thread.h \
+	tzfile.h \
+	ucred.h \
+	umem.h \
+	unistd.h \
+	zone.h

diff --git a/zfs/lib/libspl/include/assert.h b/zfs/lib/libspl/include/assert.h
index 820519c..0503ce4 100644
--- a/zfs/lib/libspl/include/assert.h
+++ b/zfs/lib/libspl/include/assert.h

@@ -33,36 +33,18 @@
 #include <stdlib.h>
 #include <stdarg.h>
 
-#ifndef _KERNEL
+/* Set to non-zero to avoid abort()ing on an assertion failure */
 extern int aok;
-#endif
+
+/* printf version of libspl_assert */
+extern void libspl_assertf(const char *file, const char *func, int line,
+    const char *format, ...);
 
 static inline int
 libspl_assert(const char *buf, const char *file, const char *func, int line)
 {
-	fprintf(stderr, "%s\n", buf);
-	fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func);
-	if (aok) {
-		return (0);
-	}
-	abort();
-}
-
-/* printf version of libspl_assert */
-static inline void
-libspl_assertf(const char *file, const char *func, int line, char *format, ...)
-{
-	va_list args;
-
-	va_start(args, format);
-	vfprintf(stderr, format, args);
-	fprintf(stderr, "\n");
-	fprintf(stderr, "ASSERT at %s:%d:%s()", file, line, func);
-	va_end(args);
-	if (aok) {
-		return;
-	}
-	abort();
+	libspl_assertf(file, func, line, "%s", buf);
+	return (0);
 }
 
 #ifdef verify
@@ -145,7 +127,6 @@
 #define	ASSERT0(x)		((void)0)
 #define	ASSERT(x)		((void)0)
 #define	assert(x)		((void)0)
-#define	ASSERTV(x)
 #define	IMPLY(A, B)		((void)0)
 #define	EQUIV(A, B)		((void)0)
 #else
@@ -156,7 +137,6 @@
 #define	ASSERT0		VERIFY0
 #define	ASSERT		VERIFY
 #define	assert		VERIFY
-#define	ASSERTV(x)		x
 #define	IMPLY(A, B) \
 	((void)(((!(A)) || (B)) || \
 	    libspl_assert("(" #A ") implies (" #B ")", \

diff --git a/zfs/lib/libspl/include/atomic.h b/zfs/lib/libspl/include/atomic.h
index f8c257f..8dd1d65 100644
--- a/zfs/lib/libspl/include/atomic.h
+++ b/zfs/lib/libspl/include/atomic.h

@@ -246,6 +246,49 @@
 #endif
 
 /*
+ * Atomically read variable.
+ */
+#define	atomic_load_char(p)	(*(volatile uchar_t *)(p))
+#define	atomic_load_short(p)	(*(volatile ushort_t *)(p))
+#define	atomic_load_int(p)	(*(volatile uint_t *)(p))
+#define	atomic_load_long(p)	(*(volatile ulong_t *)(p))
+#define	atomic_load_ptr(p)	(*(volatile __typeof(*p) *)(p))
+#define	atomic_load_8(p)	(*(volatile uint8_t *)(p))
+#define	atomic_load_16(p)	(*(volatile uint16_t *)(p))
+#define	atomic_load_32(p)	(*(volatile uint32_t *)(p))
+#ifdef _LP64
+#define	atomic_load_64(p)	(*(volatile uint64_t *)(p))
+#elif defined(_INT64_TYPE)
+extern uint64_t atomic_load_64(volatile uint64_t *);
+#endif
+
+/*
+ * Atomically write variable.
+ */
+#define	atomic_store_char(p, v)		\
+	(*(volatile uchar_t *)(p) = (uchar_t)(v))
+#define	atomic_store_short(p, v)	\
+	(*(volatile ushort_t *)(p) = (ushort_t)(v))
+#define	atomic_store_int(p, v)		\
+	(*(volatile uint_t *)(p) = (uint_t)(v))
+#define	atomic_store_long(p, v)		\
+	(*(volatile ulong_t *)(p) = (ulong_t)(v))
+#define	atomic_store_ptr(p, v)		\
+	(*(volatile __typeof(*p) *)(p) = (v))
+#define	atomic_store_8(p, v)		\
+	(*(volatile uint8_t *)(p) = (uint8_t)(v))
+#define	atomic_store_16(p, v)		\
+	(*(volatile uint16_t *)(p) = (uint16_t)(v))
+#define	atomic_store_32(p, v)		\
+	(*(volatile uint32_t *)(p) = (uint32_t)(v))
+#ifdef _LP64
+#define	atomic_store_64(p, v)		\
+	(*(volatile uint64_t *)(p) = (uint64_t)(v))
+#elif defined(_INT64_TYPE)
+extern void atomic_store_64(volatile uint64_t *, uint64_t);
+#endif
+
+/*
  * Perform an exclusive atomic bit set/clear on a target.
  * Returns 0 if bit was successfully set/cleared, or -1
  * if the bit was already set/cleared.

diff --git a/zfs/lib/libspl/include/devid.h b/zfs/lib/libspl/include/devid.h
deleted file mode 100644
index 8e48328..0000000
--- a/zfs/lib/libspl/include/devid.h
+++ /dev/null

@@ -1,32 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBSPL_DEVID_H
-#define	_LIBSPL_DEVID_H
-
-#include <sys/types.h>
-
-#endif

diff --git a/zfs/lib/libspl/include/ia32/sys/Makefile.am b/zfs/lib/libspl/include/ia32/sys/Makefile.am
index c8136ee..6832884 100644
--- a/zfs/lib/libspl/include/ia32/sys/Makefile.am
+++ b/zfs/lib/libspl/include/ia32/sys/Makefile.am

@@ -1,3 +1,3 @@
 libspldir = $(includedir)/libspl/ia32/sys
 libspl_HEADERS = \
-        $(top_srcdir)/lib/libspl/include/ia32/sys/asm_linkage.h
+        asm_linkage.h

diff --git a/zfs/lib/libspl/include/libshare.h b/zfs/lib/libspl/include/libshare.h
index 4016ff0..ea53f8c 100644
--- a/zfs/lib/libspl/include/libshare.h
+++ b/zfs/lib/libspl/include/libshare.h

@@ -22,14 +22,11 @@
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
  */
 #ifndef _LIBSPL_LIBSHARE_H
 #define	_LIBSPL_LIBSHARE_H
 
-typedef void *sa_handle_t;	/* opaque handle to access core functions */
-typedef void *sa_group_t;
-typedef void *sa_share_t;
-
 /* API Initialization */
 #define	SA_INIT_SHARE_API	0x0001	/* init share specific interface */
 #define	SA_INIT_CONTROL_API	0x0002	/* init control specific interface */
@@ -74,23 +71,16 @@
 #define	SA_SHARE_EXISTS		33	/* path or file is already shared */
 
 /* initialization */
-extern sa_handle_t sa_init(int);
-extern void sa_fini(sa_handle_t);
 extern char *sa_errorstr(int);
 
 /* share control */
-extern sa_share_t sa_find_share(sa_handle_t, char *);
-extern int sa_enable_share(sa_group_t, char *);
-extern int sa_disable_share(sa_share_t, char *);
+extern int sa_enable_share(const char *, const char *, const char *,
+    char *);
+extern int sa_disable_share(const char *, char *);
+extern boolean_t sa_is_shared(const char *, char *);
+extern void sa_commit_shares(const char *);
 
 /* protocol specific interfaces */
-extern int sa_parse_legacy_options(sa_group_t, char *, char *);
-
-/* ZFS functions */
-extern boolean_t sa_needs_refresh(sa_handle_t handle);
-libzfs_handle_t *sa_get_zfs_handle(sa_handle_t handle);
-extern int sa_zfs_process_share(sa_handle_t handle, sa_group_t group,
-    sa_share_t share, char *mountpoint, char *proto, zprop_source_t source,
-    char *shareopts, char *sourcestr, char *dataset);
+extern int sa_validate_shareopts(char *, char *);
 
 #endif /* _LIBSPL_LIBSHARE_H */

diff --git a/zfs/lib/libspl/include/limits.h b/zfs/lib/libspl/include/limits.h
index 1a42cfe..5d996eb 100644
--- a/zfs/lib/libspl/include/limits.h
+++ b/zfs/lib/libspl/include/limits.h

@@ -25,16 +25,21 @@
  */
 
 #include_next <limits.h>
+#include <float.h>
 
 #ifndef _LIBSPL_LIMITS_H
 #define	_LIBSPL_LIMITS_H
 
+#ifndef DBL_DIG
 #define	DBL_DIG		15
 #define	DBL_MAX		1.7976931348623157081452E+308
 #define	DBL_MIN		2.2250738585072013830903E-308
+#endif
 
+#ifndef FLT_DIG
 #define	FLT_DIG		6
 #define	FLT_MAX		3.4028234663852885981170E+38F
 #define	FLT_MIN		1.1754943508222875079688E-38F
+#endif
 
 #endif /* _LIBSPL_LIMITS_H */

diff --git a/zfs/lib/libspl/include/os/Makefile.am b/zfs/lib/libspl/include/os/Makefile.am
new file mode 100644
index 0000000..7b362e0
--- /dev/null
+++ b/zfs/lib/libspl/include/os/Makefile.am

@@ -0,0 +1,7 @@
+if BUILD_FREEBSD
+SUBDIRS = freebsd
+endif
+
+if BUILD_LINUX
+SUBDIRS = linux
+endif

diff --git a/zfs/lib/libspl/include/os/freebsd/Makefile.am b/zfs/lib/libspl/include/os/freebsd/Makefile.am
new file mode 100644
index 0000000..f06325e
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/Makefile.am

@@ -0,0 +1,5 @@
+SUBDIRS = sys
+
+libspldir = $(includedir)/libspl
+libspl_HEADERS = \
+	fcntl.h

diff --git a/zfs/lib/libspl/include/os/freebsd/fcntl.h b/zfs/lib/libspl/include/os/freebsd/fcntl.h
new file mode 100644
index 0000000..26d571a
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/fcntl.h

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LIBSPL_FCNTL_H_
+#define	_LIBSPL_FCNTL_H_
+
+#include_next <fcntl.h>
+
+#include <sys/fcntl.h>
+
+#endif	/* _LIBSPL_FCNTL_H_ */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/Makefile.am b/zfs/lib/libspl/include/os/freebsd/sys/Makefile.am
new file mode 100644
index 0000000..7a85460
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/Makefile.am

@@ -0,0 +1,12 @@
+libspldir = $(includedir)/libspl/sys
+libspl_HEADERS = \
+	byteorder.h \
+	fcntl.h \
+	file.h \
+	mnttab.h \
+	mount.h \
+	param.h \
+	stat.h \
+	sysmacros.h \
+	vfs.h \
+	zfs_context_os.h

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/byteorder.h b/zfs/lib/libspl/include/os/freebsd/sys/byteorder.h
new file mode 100644
index 0000000..cd692d3
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/byteorder.h

@@ -0,0 +1,192 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_BYTEORDER_H
+#define	_SYS_BYTEORDER_H
+
+#include <sys/endian.h>
+#include <netinet/in.h>
+#include <sys/isa_defs.h>
+#include <sys/int_types.h>
+
+#if defined(__GNUC__) && defined(_ASM_INLINES) && \
+	(defined(__i386) || defined(__amd64))
+#include <asm/byteorder.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * macros for conversion between host and (internet) network byte order
+ */
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
+
+/*
+ * Macros to reverse byte order
+ */
+#define	BSWAP_8(x)	((x) & 0xff)
+#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+#define	BMASK_8(x)	((x) & 0xff)
+#define	BMASK_16(x)	((x) & 0xffff)
+#define	BMASK_32(x)	((x) & 0xffffffff)
+#define	BMASK_64(x)	(x)
+
+/*
+ * Macros to convert from a specific byte order to/from native byte order
+ */
+#ifdef _ZFS_BIG_ENDIAN
+#define	BE_8(x)		BMASK_8(x)
+#define	BE_16(x)	BMASK_16(x)
+#define	BE_32(x)	BMASK_32(x)
+#define	BE_64(x)	BMASK_64(x)
+#define	LE_8(x)		BSWAP_8(x)
+#define	LE_16(x)	BSWAP_16(x)
+#define	LE_32(x)	BSWAP_32(x)
+#define	LE_64(x)	BSWAP_64(x)
+#else
+#define	LE_8(x)		BMASK_8(x)
+#define	LE_16(x)	BMASK_16(x)
+#define	LE_32(x)	BMASK_32(x)
+#define	LE_64(x)	BMASK_64(x)
+#define	BE_8(x)		BSWAP_8(x)
+#define	BE_16(x)	BSWAP_16(x)
+#define	BE_32(x)	BSWAP_32(x)
+#define	BE_64(x)	BSWAP_64(x)
+#endif
+
+#ifdef _ZFS_BIG_ENDIAN
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return (n);
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return (n);
+}
+#else
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
+}
+#endif
+
+/*
+ * Macros to read unaligned values from a specific byte order to
+ * native byte order
+ */
+
+#define	BE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	BE_IN16(xa) \
+	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
+
+#define	BE_IN32(xa) \
+	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+
+#define	BE_IN64(xa) \
+	(((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4))
+
+#define	LE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	LE_IN16(xa) \
+	(((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa))
+
+#define	LE_IN32(xa) \
+	(((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa))
+
+#define	LE_IN64(xa) \
+	(((uint64_t)LE_IN32((uint8_t *)(xa) + 4) << 32) | LE_IN32(xa))
+
+/*
+ * Macros to write unaligned values from native byte order to a specific byte
+ * order.
+ */
+
+#define	BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define	BE_OUT16(xa, yv) \
+	BE_OUT8((uint8_t *)(xa) + 1, yv); \
+	BE_OUT8((uint8_t *)(xa), (yv) >> 8);
+
+#define	BE_OUT32(xa, yv) \
+	BE_OUT16((uint8_t *)(xa) + 2, yv); \
+	BE_OUT16((uint8_t *)(xa), (yv) >> 16);
+
+#define	BE_OUT64(xa, yv) \
+	BE_OUT32((uint8_t *)(xa) + 4, yv); \
+	BE_OUT32((uint8_t *)(xa), (yv) >> 32);
+
+#define	LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define	LE_OUT16(xa, yv) \
+	LE_OUT8((uint8_t *)(xa), yv); \
+	LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8);
+
+#define	LE_OUT32(xa, yv) \
+	LE_OUT16((uint8_t *)(xa), yv); \
+	LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16);
+
+#define	LE_OUT64(xa, yv) \
+	LE_OUT32((uint8_t *)(xa), yv); \
+	LE_OUT32((uint8_t *)(xa) + 4, (yv) >> 32);
+
+#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_BYTEORDER_H */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/fcntl.h b/zfs/lib/libspl/include/os/freebsd/sys/fcntl.h
new file mode 100644
index 0000000..c8a37a1
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/fcntl.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LIBSPL_SYS_FCNTL_H_
+#define	_LIBSPL_SYS_FCNTL_H_
+
+#include_next <sys/fcntl.h>
+
+#define	O_LARGEFILE	0
+#define	O_RSYNC		0
+
+#ifndef O_DSYNC
+#define	O_DSYNC		0
+#endif
+
+#endif	/* _LIBSPL_SYS_FCNTL_H_ */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/file.h b/zfs/lib/libspl/include/os/freebsd/sys/file.h
new file mode 100644
index 0000000..27fd288
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/file.h

@@ -0,0 +1,42 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_FILE_H
+#define	_LIBSPL_SYS_FILE_H
+
+#include_next <sys/file.h>
+
+#define	FCREAT	O_CREAT
+#define	FTRUNC	O_TRUNC
+#define	FSYNC	O_SYNC
+#define	FDSYNC	O_DSYNC
+#define	FEXCL	O_EXCL
+
+#define	FNODSYNC	0x10000	/* fsync pseudo flag */
+#define	FNOFOLLOW	0x20000	/* don't follow symlinks */
+#define	FIGNORECASE	0x80000	/* request case-insensitive lookups */
+
+#endif

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/mnttab.h b/zfs/lib/libspl/include/os/freebsd/sys/mnttab.h
new file mode 100644
index 0000000..c08349b
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/mnttab.h

@@ -0,0 +1,85 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/*  All Rights Reserved  */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2006 Ricardo Correia */
+
+#ifndef _SYS_MNTTAB_H
+#define	_SYS_MNTTAB_H
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#ifdef MNTTAB
+#undef MNTTAB
+#endif /* MNTTAB */
+
+#include <paths.h>
+#include <sys/mount.h>
+#define	MNTTAB		_PATH_DEVZERO
+#define	MS_NOMNTTAB		0x0
+#define	MS_RDONLY		0x1
+#define	umount2(p, f)	unmount(p, f)
+#define	MNT_LINE_MAX	4108
+
+#define	MNT_TOOLONG	1	/* entry exceeds MNT_LINE_MAX */
+#define	MNT_TOOMANY	2	/* too many fields in line */
+#define	MNT_TOOFEW	3	/* too few fields in line */
+
+struct mnttab {
+	char *mnt_special;
+	char *mnt_mountp;
+	char *mnt_fstype;
+	char *mnt_mntopts;
+};
+
+/*
+ * NOTE: fields in extmnttab should match struct mnttab till new fields
+ * are encountered, this allows hasmntopt to work properly when its arg is
+ * a pointer to an extmnttab struct cast to a mnttab struct pointer.
+ */
+
+struct extmnttab {
+	char *mnt_special;
+	char *mnt_mountp;
+	char *mnt_fstype;
+	char *mnt_mntopts;
+	uint_t mnt_major;
+	uint_t mnt_minor;
+};
+
+struct stat64;
+struct statfs;
+
+extern int getmntany(FILE *fp, struct mnttab *mp, struct mnttab *mpref);
+extern int _sol_getmntent(FILE *fp, struct mnttab *mp);
+extern int getextmntent(const char *path, struct extmnttab *entry,
+    struct stat64 *statbuf);
+extern void statfs2mnttab(struct statfs *sfs, struct mnttab *mp);
+char *hasmntopt(struct mnttab *mnt, char *opt);
+int getmntent(FILE *fp, struct mnttab *mp);
+
+#endif

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/mount.h b/zfs/lib/libspl/include/os/freebsd/sys/mount.h
new file mode 100644
index 0000000..e995185
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/mount.h

@@ -0,0 +1,104 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _LIBSPL_SYS_MOUNT_H
+#define	_LIBSPL_SYS_MOUNT_H
+
+#undef _SYS_MOUNT_H_
+#include_next <sys/mount.h>
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#if !defined(BLKGETSIZE64)
+#define	BLKGETSIZE64		DIOCGMEDIASIZE
+#endif
+
+/*
+ * Some old glibc headers don't correctly define MS_DIRSYNC and
+ * instead use the enum name S_WRITE.  When using these older
+ * headers define MS_DIRSYNC to be S_WRITE.
+ */
+#if !defined(MS_DIRSYNC)
+#define	MS_DIRSYNC		S_WRITE
+#endif
+
+/*
+ * Some old glibc headers don't correctly define MS_POSIXACL and
+ * instead leave it undefined.  When using these older headers define
+ * MS_POSIXACL to the reserved value of (1<<16).
+ */
+#if !defined(MS_POSIXACL)
+#define	MS_POSIXACL		(1<<16)
+#endif
+
+#define	MS_NOSUID	MNT_NOSUID
+#define	MS_NOEXEC	MNT_NOEXEC
+#define	MS_NODEV	0
+#define	S_WRITE		0
+#define	MS_BIND		0
+#define	MS_REMOUNT	0
+#define	MS_SYNCHRONOUS	MNT_SYNCHRONOUS
+
+#define	MS_USERS	(MS_NOEXEC|MS_NOSUID|MS_NODEV)
+#define	MS_OWNER	(MS_NOSUID|MS_NODEV)
+#define	MS_GROUP	(MS_NOSUID|MS_NODEV)
+#define	MS_COMMENT	0
+
+/*
+ * Older glibc <sys/mount.h> headers did not define all the available
+ * umount2(2) flags.  Both MNT_FORCE and MNT_DETACH are supported in the
+ * kernel back to 2.4.11 so we define them correctly if they are missing.
+ */
+#ifdef MNT_FORCE
+#define	MS_FORCE	MNT_FORCE
+#else
+#define	MS_FORCE	0x00000001
+#endif /* MNT_FORCE */
+
+#ifdef MNT_DETACH
+#define	MS_DETACH	MNT_DETACH
+#else
+#define	MS_DETACH	0x00000002
+#endif /* MNT_DETACH */
+
+/*
+ * Overlay mount is default in Linux, but for solaris/zfs
+ * compatibility, MS_OVERLAY is defined to explicitly have the user
+ * provide a flag (-O) to mount over a non empty directory.
+ */
+#define	MS_OVERLAY	0x00000004
+
+/*
+ * MS_CRYPT indicates that encryption keys should be loaded if they are not
+ * already available. This is not defined in glibc, but it is never seen by
+ * the kernel so it will not cause any problems.
+ */
+#define	MS_CRYPT	0x00000008
+
+#endif /* _LIBSPL_SYS_MOUNT_H */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/param.h b/zfs/lib/libspl/include/os/freebsd/sys/param.h
new file mode 100644
index 0000000..cb5260e
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/param.h

@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_PARAM_H
+#define	_LIBSPL_SYS_PARAM_H
+
+#include_next <sys/param.h>
+#include <unistd.h>
+
+/*
+ * File system parameters and macros.
+ *
+ * The file system is made out of blocks of at most MAXBSIZE units,
+ * with smaller units (fragments) only in the last direct block.
+ * MAXBSIZE primarily determines the size of buffers in the buffer
+ * pool. It may be made larger without any effect on existing
+ * file systems; however making it smaller may make some file
+ * systems unmountable.
+ *
+ * Note that the blocked devices are assumed to have DEV_BSIZE
+ * "sectors" and that fragments must be some multiple of this size.
+ */
+#define	MAXNAMELEN	256
+
+#define	UID_NOACCESS	60002		/* user ID no access */
+
+#define	MAXUID		UINT32_MAX	/* max user id */
+#define	MAXPROJID	MAXUID		/* max project id */
+
+#ifdef	PAGESIZE
+#undef	PAGESIZE
+#endif /* PAGESIZE */
+
+extern size_t spl_pagesize(void);
+#define	PAGESIZE	(spl_pagesize())
+
+extern int execvpe(const char *name, char * const argv[], char * const envp[]);
+
+#endif

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/stat.h b/zfs/lib/libspl/include/os/freebsd/sys/stat.h
new file mode 100644
index 0000000..38c684d
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/stat.h

@@ -0,0 +1,85 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _LIBSPL_SYS_STAT_H
+#define	_LIBSPL_SYS_STAT_H
+
+#include_next <sys/stat.h>
+
+/* Note: this file can be used on linux/macOS when bootstrapping tools. */
+
+#if defined(__FreeBSD__)
+#include <sys/mount.h> /* for BLKGETSIZE64 */
+
+#define	stat64	stat
+
+#define	MAXOFFSET_T	OFF_MAX
+
+#ifndef _KERNEL
+#include <sys/disk.h>
+
+static __inline int
+fstat64(int fd, struct stat *sb)
+{
+	int ret;
+
+	ret = fstat(fd, sb);
+	if (ret == 0) {
+		if (S_ISCHR(sb->st_mode))
+			(void) ioctl(fd, DIOCGMEDIASIZE, &sb->st_size);
+	}
+	return (ret);
+}
+#endif
+
+/*
+ * Emulate Solaris' behavior of returning the block device size in fstat64().
+ */
+static inline int
+fstat64_blk(int fd, struct stat64 *st)
+{
+	if (fstat64(fd, st) == -1)
+		return (-1);
+
+	/* In Linux we need to use an ioctl to get the size of a block device */
+	if (S_ISBLK(st->st_mode)) {
+		if (ioctl(fd, BLKGETSIZE64, &st->st_size) != 0)
+			return (-1);
+	}
+
+	return (0);
+}
+#endif /* defined(__FreeBSD__) */
+
+/*
+ * Only Intel-based Macs have a separate stat64; Arm-based Macs are like
+ * FreeBSD and have a full 64-bit stat from the start.
+ */
+#if defined(__APPLE__) && !(defined(__i386__) || defined(__x86_64__))
+#define	stat64	stat
+#define	fstat64	fstat
+#endif
+
+#endif /* _LIBSPL_SYS_STAT_H */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/sysmacros.h b/zfs/lib/libspl/include/os/freebsd/sys/sysmacros.h
new file mode 100644
index 0000000..d9639d2
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/sysmacros.h

@@ -0,0 +1 @@
+/* keep me */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/vfs.h b/zfs/lib/libspl/include/os/freebsd/sys/vfs.h
new file mode 100644
index 0000000..55eb3c2
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/vfs.h

@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef ZFS_SYS_VFS_H_
+#define	ZFS_SYS_VFS_H_
+
+#include_next <sys/statvfs.h>
+
+int fsshare(const char *, const char *, const char *);
+int fsunshare(const char *, const char *);
+
+#endif /* !ZFS_SYS_VFS_H_ */

diff --git a/zfs/lib/libspl/include/os/freebsd/sys/zfs_context_os.h b/zfs/lib/libspl/include/os/freebsd/sys/zfs_context_os.h
new file mode 100644
index 0000000..b9bf487
--- /dev/null
+++ b/zfs/lib/libspl/include/os/freebsd/sys/zfs_context_os.h

@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef ZFS_CONTEXT_OS_H_
+#define	ZFS_CONTEXT_OS_H_
+
+#define	HAVE_LARGE_STACKS	1
+#define	ZFS_EXPORTS_PATH	"/etc/zfs/exports"
+
+#endif

diff --git a/zfs/lib/libspl/include/os/linux/Makefile.am b/zfs/lib/libspl/include/os/linux/Makefile.am
new file mode 100644
index 0000000..081839c
--- /dev/null
+++ b/zfs/lib/libspl/include/os/linux/Makefile.am

@@ -0,0 +1 @@
+SUBDIRS = sys

diff --git a/zfs/lib/libspl/include/os/linux/sys/Makefile.am b/zfs/lib/libspl/include/os/linux/sys/Makefile.am
new file mode 100644
index 0000000..1ec07a7
--- /dev/null
+++ b/zfs/lib/libspl/include/os/linux/sys/Makefile.am

@@ -0,0 +1,10 @@
+libspldir = $(includedir)/libspl/sys
+libspl_HEADERS = \
+	byteorder.h \
+	errno.h \
+	mnttab.h \
+	mount.h \
+	param.h \
+	stat.h \
+	sysmacros.h \
+	zfs_context_os.h

diff --git a/zfs/lib/libspl/include/os/linux/sys/byteorder.h b/zfs/lib/libspl/include/os/linux/sys/byteorder.h
new file mode 100644
index 0000000..d5ee3e2
--- /dev/null
+++ b/zfs/lib/libspl/include/os/linux/sys/byteorder.h

@@ -0,0 +1,223 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_BYTEORDER_H
+#define	_SYS_BYTEORDER_H
+
+#if defined(__GNUC__) && defined(_ASM_INLINES) && \
+	(defined(__i386) || defined(__amd64))
+#include <asm/byteorder.h>
+#endif
+
+#include <sys/isa_defs.h>
+#include <sys/int_types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * macros for conversion between host and (internet) network byte order
+ */
+
+#if defined(_ZFS_BIG_ENDIAN) && !defined(ntohl) && !defined(__lint)
+/* big-endian */
+#define	ntohl(x)	(x)
+#define	ntohs(x)	(x)
+#define	htonl(x)	(x)
+#define	htons(x)	(x)
+
+#elif !defined(ntohl) /* little-endian */
+
+#ifndef	_IN_PORT_T
+#define	_IN_PORT_T
+typedef uint16_t in_port_t;
+#endif
+
+#ifndef	_IN_ADDR_T
+#define	_IN_ADDR_T
+typedef uint32_t in_addr_t;
+#endif
+
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5)
+extern	uint32_t htonl(uint32_t);
+extern	uint16_t htons(uint16_t);
+extern 	uint32_t ntohl(uint32_t);
+extern	uint16_t ntohs(uint16_t);
+#else
+extern	in_addr_t htonl(in_addr_t);
+extern	in_port_t htons(in_port_t);
+extern 	in_addr_t ntohl(in_addr_t);
+extern	in_port_t ntohs(in_port_t);
+#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) */
+#endif
+
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
+
+/*
+ * Macros to reverse byte order
+ */
+#define	BSWAP_8(x)	((x) & 0xff)
+#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+#define	BMASK_8(x)	((x) & 0xff)
+#define	BMASK_16(x)	((x) & 0xffff)
+#define	BMASK_32(x)	((x) & 0xffffffff)
+#define	BMASK_64(x)	(x)
+
+/*
+ * Macros to convert from a specific byte order to/from native byte order
+ */
+#ifdef _ZFS_BIG_ENDIAN
+#define	BE_8(x)		BMASK_8(x)
+#define	BE_16(x)	BMASK_16(x)
+#define	BE_32(x)	BMASK_32(x)
+#define	BE_64(x)	BMASK_64(x)
+#define	LE_8(x)		BSWAP_8(x)
+#define	LE_16(x)	BSWAP_16(x)
+#define	LE_32(x)	BSWAP_32(x)
+#define	LE_64(x)	BSWAP_64(x)
+#else
+#define	LE_8(x)		BMASK_8(x)
+#define	LE_16(x)	BMASK_16(x)
+#define	LE_32(x)	BMASK_32(x)
+#define	LE_64(x)	BMASK_64(x)
+#define	BE_8(x)		BSWAP_8(x)
+#define	BE_16(x)	BSWAP_16(x)
+#define	BE_32(x)	BSWAP_32(x)
+#define	BE_64(x)	BSWAP_64(x)
+#endif
+
+#ifdef _ZFS_BIG_ENDIAN
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return (n);
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return (n);
+}
+#else
+static __inline__ uint64_t
+htonll(uint64_t n)
+{
+	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n)
+{
+	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
+}
+#endif
+
+/*
+ * Macros to read unaligned values from a specific byte order to
+ * native byte order
+ */
+
+#define	BE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	BE_IN16(xa) \
+	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
+
+#define	BE_IN32(xa) \
+	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+
+#define	BE_IN64(xa) \
+	(((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4))
+
+#define	LE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	LE_IN16(xa) \
+	(((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa))
+
+#define	LE_IN32(xa) \
+	(((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa))
+
+#define	LE_IN64(xa) \
+	(((uint64_t)LE_IN32((uint8_t *)(xa) + 4) << 32) | LE_IN32(xa))
+
+/*
+ * Macros to write unaligned values from native byte order to a specific byte
+ * order.
+ */
+
+#define	BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define	BE_OUT16(xa, yv) \
+	BE_OUT8((uint8_t *)(xa) + 1, yv); \
+	BE_OUT8((uint8_t *)(xa), (yv) >> 8);
+
+#define	BE_OUT32(xa, yv) \
+	BE_OUT16((uint8_t *)(xa) + 2, yv); \
+	BE_OUT16((uint8_t *)(xa), (yv) >> 16);
+
+#define	BE_OUT64(xa, yv) \
+	BE_OUT32((uint8_t *)(xa) + 4, yv); \
+	BE_OUT32((uint8_t *)(xa), (yv) >> 32);
+
+#define	LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define	LE_OUT16(xa, yv) \
+	LE_OUT8((uint8_t *)(xa), yv); \
+	LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8);
+
+#define	LE_OUT32(xa, yv) \
+	LE_OUT16((uint8_t *)(xa), yv); \
+	LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16);
+
+#define	LE_OUT64(xa, yv) \
+	LE_OUT32((uint8_t *)(xa), yv); \
+	LE_OUT32((uint8_t *)(xa) + 4, (yv) >> 32);
+
+#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_BYTEORDER_H */

diff --git a/zfs/lib/libspl/include/os/linux/sys/errno.h b/zfs/lib/libspl/include/os/linux/sys/errno.h
new file mode 100644
index 0000000..30d20ab
--- /dev/null
+++ b/zfs/lib/libspl/include/os/linux/sys/errno.h

@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2017 Zettabyte Software, LLC.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Compiling against musl correctly points out that including sys/errno.h is
+ * disallowed by the Single UNIX Specification when building in userspace, so
+ * we implement a dummy header to redirect the include to the proper header.
+ */
+#ifndef _LIBSPL_SYS_ERRNO_H
+#define	_LIBSPL_SYS_ERRNO_H
+
+#include <errno.h>
+/*
+ * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
+ * graveyard) to indicate checksum errors and fragmentation.
+ */
+#define	ECKSUM	EBADE
+#define	EFRAGS	EBADR
+
+/* Similar for ENOACTIVE */
+#define	ENOTACTIVE	ENOANO
+
+#endif /* _LIBSPL_SYS_ERRNO_H */

diff --git a/zfs/lib/libspl/include/os/linux/sys/mnttab.h b/zfs/lib/libspl/include/os/linux/sys/mnttab.h
new file mode 100644
index 0000000..1957293
--- /dev/null
+++ b/zfs/lib/libspl/include/os/linux/sys/mnttab.h

@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/*  All Rights Reserved  */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2006 Ricardo Correia */
+
+#ifndef _SYS_MNTTAB_H
+#define	_SYS_MNTTAB_H
+
+#include <stdio.h>
+#include <mntent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#ifdef MNTTAB
+#undef MNTTAB
+#endif /* MNTTAB */
+
+#define	MNTTAB		"/proc/self/mounts"
+#define	MNT_LINE_MAX	4108
+
+#define	MNT_TOOLONG	1	/* entry exceeds MNT_LINE_MAX */
+#define	MNT_TOOMANY	2	/* too many fields in line */
+#define	MNT_TOOFEW	3	/* too few fields in line */
+
+struct mnttab {
+	char *mnt_special;
+	char *mnt_mountp;
+	char *mnt_fstype;
+	char *mnt_mntopts;
+};
+
+/*
+ * NOTE: fields in extmnttab should match struct mnttab till new fields
+ * are encountered, this allows hasmntopt to work properly when its arg is
+ * a pointer to an extmnttab struct cast to a mnttab struct pointer.
+ */
+
+struct extmnttab {
+	char *mnt_special;
+	char *mnt_mountp;
+	char *mnt_fstype;
+	char *mnt_mntopts;
+	uint_t mnt_major;
+	uint_t mnt_minor;
+};
+
+struct statfs;
+
+extern int getmntany(FILE *fp, struct mnttab *mp, struct mnttab *mpref);
+extern int _sol_getmntent(FILE *fp, struct mnttab *mp);
+extern int getextmntent(const char *path, struct extmnttab *mp,
+    struct stat64 *statbuf);
+static inline char *_sol_hasmntopt(struct mnttab *mnt, char *opt)
+{
+	struct mntent mnt_new;
+
+	mnt_new.mnt_opts = mnt->mnt_mntopts;
+
+	return (hasmntopt(&mnt_new, opt));
+}
+
+#define	hasmntopt	_sol_hasmntopt
+#define	getmntent	_sol_getmntent
+
+#endif

diff --git a/zfs/lib/libspl/include/sys/mount.h b/zfs/lib/libspl/include/os/linux/sys/mount.h
similarity index 100%
rename from zfs/lib/libspl/include/sys/mount.h
rename to zfs/lib/libspl/include/os/linux/sys/mount.h


diff --git a/zfs/lib/libspl/include/sys/param.h b/zfs/lib/libspl/include/os/linux/sys/param.h
similarity index 100%
rename from zfs/lib/libspl/include/sys/param.h
rename to zfs/lib/libspl/include/os/linux/sys/param.h


diff --git a/zfs/lib/libspl/include/sys/stat.h b/zfs/lib/libspl/include/os/linux/sys/stat.h
similarity index 100%
rename from zfs/lib/libspl/include/sys/stat.h
rename to zfs/lib/libspl/include/os/linux/sys/stat.h


diff --git a/zfs/lib/libspl/include/sys/sysmacros.h b/zfs/lib/libspl/include/os/linux/sys/sysmacros.h
similarity index 100%
rename from zfs/lib/libspl/include/sys/sysmacros.h
rename to zfs/lib/libspl/include/os/linux/sys/sysmacros.h


diff --git a/zfs/lib/libspl/include/os/linux/sys/zfs_context_os.h b/zfs/lib/libspl/include/os/linux/sys/zfs_context_os.h
new file mode 100644
index 0000000..81ced52
--- /dev/null
+++ b/zfs/lib/libspl/include/os/linux/sys/zfs_context_os.h

@@ -0,0 +1,28 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef ZFS_CONTEXT_OS_H
+#define	ZFS_CONTEXT_OS_H
+
+#define	HAVE_LARGE_STACKS	1
+
+#endif

diff --git a/zfs/lib/libspl/include/rpc/Makefile.am b/zfs/lib/libspl/include/rpc/Makefile.am
index 78ee5a2..7fe1d7f 100644
--- a/zfs/lib/libspl/include/rpc/Makefile.am
+++ b/zfs/lib/libspl/include/rpc/Makefile.am

@@ -1,3 +1,3 @@
 libspldir = $(includedir)/libspl/rpc
 libspl_HEADERS = \
-	$(top_srcdir)/lib/libspl/include/rpc/xdr.h
+	xdr.h

diff --git a/zfs/lib/libspl/include/rpc/xdr.h b/zfs/lib/libspl/include/rpc/xdr.h
index 27e4395..51d71f6 100644
--- a/zfs/lib/libspl/include/rpc/xdr.h
+++ b/zfs/lib/libspl/include/rpc/xdr.h

@@ -40,10 +40,13 @@
 
 #define	XDR_GET_BYTES_AVAIL 1
 
-typedef struct xdr_bytesrec {
+#ifndef HAVE_XDR_BYTESREC
+struct xdr_bytesrec {
 	bool_t xc_is_last_record;
 	size_t xc_num_avail;
-} xdr_bytesrec_t;
+};
+#endif
+typedef struct xdr_bytesrec  xdr_bytesrec_t;
 
 /*
  * This functionality is not required and is disabled in user space.

diff --git a/zfs/lib/libspl/include/sys/Makefile.am b/zfs/lib/libspl/include/sys/Makefile.am
index e7af317..6816a01 100644
--- a/zfs/lib/libspl/include/sys/Makefile.am
+++ b/zfs/lib/libspl/include/sys/Makefile.am

@@ -2,52 +2,47 @@
 
 libspldir = $(includedir)/libspl/sys
 libspl_HEADERS = \
-	$(top_srcdir)/lib/libspl/include/sys/acl.h \
-	$(top_srcdir)/lib/libspl/include/sys/acl_impl.h \
-	$(top_srcdir)/lib/libspl/include/sys/bitmap.h \
-	$(top_srcdir)/lib/libspl/include/sys/byteorder.h \
-	$(top_srcdir)/lib/libspl/include/sys/callb.h \
-	$(top_srcdir)/lib/libspl/include/sys/cmn_err.h \
-	$(top_srcdir)/lib/libspl/include/sys/cred.h \
-	$(top_srcdir)/lib/libspl/include/sys/debug.h \
-	$(top_srcdir)/lib/libspl/include/sys/dkio.h \
-	$(top_srcdir)/lib/libspl/include/sys/dklabel.h \
-	$(top_srcdir)/lib/libspl/include/sys/errno.h \
-	$(top_srcdir)/lib/libspl/include/sys/feature_tests.h \
-	$(top_srcdir)/lib/libspl/include/sys/file.h \
-	$(top_srcdir)/lib/libspl/include/sys/int_limits.h \
-	$(top_srcdir)/lib/libspl/include/sys/int_types.h \
-	$(top_srcdir)/lib/libspl/include/sys/inttypes.h \
-	$(top_srcdir)/lib/libspl/include/sys/isa_defs.h \
-	$(top_srcdir)/lib/libspl/include/sys/kmem.h \
-	$(top_srcdir)/lib/libspl/include/sys/kstat.h \
-	$(top_srcdir)/lib/libspl/include/sys/list.h \
-	$(top_srcdir)/lib/libspl/include/sys/list_impl.h \
-	$(top_srcdir)/lib/libspl/include/sys/mhd.h \
-	$(top_srcdir)/lib/libspl/include/sys/mkdev.h \
-	$(top_srcdir)/lib/libspl/include/sys/mnttab.h \
-	$(top_srcdir)/lib/libspl/include/sys/mount.h \
-	$(top_srcdir)/lib/libspl/include/sys/param.h \
-	$(top_srcdir)/lib/libspl/include/sys/policy.h \
-	$(top_srcdir)/lib/libspl/include/sys/poll.h \
-	$(top_srcdir)/lib/libspl/include/sys/priv.h \
-	$(top_srcdir)/lib/libspl/include/sys/processor.h \
-	$(top_srcdir)/lib/libspl/include/sys/signal.h \
-	$(top_srcdir)/lib/libspl/include/sys/stack.h \
-	$(top_srcdir)/lib/libspl/include/sys/stat.h \
-	$(top_srcdir)/lib/libspl/include/sys/stdtypes.h \
-	$(top_srcdir)/lib/libspl/include/sys/strings.h \
-	$(top_srcdir)/lib/libspl/include/sys/stropts.h \
-	$(top_srcdir)/lib/libspl/include/sys/sunddi.h \
-	$(top_srcdir)/lib/libspl/include/sys/sysmacros.h \
-	$(top_srcdir)/lib/libspl/include/sys/systeminfo.h \
-	$(top_srcdir)/lib/libspl/include/sys/time.h \
-	$(top_srcdir)/lib/libspl/include/sys/types32.h \
-	$(top_srcdir)/lib/libspl/include/sys/types.h \
-	$(top_srcdir)/lib/libspl/include/sys/tzfile.h \
-	$(top_srcdir)/lib/libspl/include/sys/uio.h \
-	$(top_srcdir)/lib/libspl/include/sys/va_list.h \
-	$(top_srcdir)/lib/libspl/include/sys/varargs.h \
-	$(top_srcdir)/lib/libspl/include/sys/vnode.h \
-	$(top_srcdir)/lib/libspl/include/sys/vtoc.h \
-	$(top_srcdir)/lib/libspl/include/sys/zone.h
+	acl.h \
+	acl_impl.h \
+	callb.h \
+	cmn_err.h \
+	cred.h \
+	debug.h \
+	dkio.h \
+	dklabel.h \
+	feature_tests.h \
+	int_limits.h \
+	int_types.h \
+	inttypes.h \
+	isa_defs.h \
+	kmem.h \
+	kstat.h \
+	list.h \
+	list_impl.h \
+	mhd.h \
+	mkdev.h \
+	policy.h \
+	poll.h \
+	priv.h \
+	processor.h \
+	sha2.h \
+	simd.h \
+	stack.h \
+	stdtypes.h \
+	strings.h \
+	stropts.h \
+	sunddi.h \
+	systeminfo.h \
+	time.h \
+	trace_spl.h \
+	trace_zfs.h \
+	types32.h \
+	types.h \
+	tzfile.h \
+	uio.h \
+	va_list.h \
+	varargs.h \
+	vnode.h \
+	vtoc.h \
+	wmsum.h \
+	zone.h

diff --git a/zfs/lib/libspl/include/sys/acl.h b/zfs/lib/libspl/include/sys/acl.h
index e6df864..3116842 100644
--- a/zfs/lib/libspl/include/sys/acl.h
+++ b/zfs/lib/libspl/include/sys/acl.h

@@ -19,8 +19,12 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef _SYS_ACL_H
@@ -75,23 +79,24 @@
 /*
  * The following are defined for ace_t.
  */
-#define	ACE_READ_DATA		0x00000001
-#define	ACE_LIST_DIRECTORY	0x00000001
-#define	ACE_WRITE_DATA		0x00000002
-#define	ACE_ADD_FILE		0x00000002
-#define	ACE_APPEND_DATA		0x00000004
-#define	ACE_ADD_SUBDIRECTORY	0x00000004
-#define	ACE_READ_NAMED_ATTRS	0x00000008
-#define	ACE_WRITE_NAMED_ATTRS	0x00000010
-#define	ACE_EXECUTE		0x00000020
-#define	ACE_DELETE_CHILD	0x00000040
-#define	ACE_READ_ATTRIBUTES	0x00000080
-#define	ACE_WRITE_ATTRIBUTES	0x00000100
-#define	ACE_DELETE		0x00010000
-#define	ACE_READ_ACL		0x00020000
-#define	ACE_WRITE_ACL		0x00040000
-#define	ACE_WRITE_OWNER		0x00080000
-#define	ACE_SYNCHRONIZE		0x00100000
+#define	ACE_READ_DATA		0x00000001	/* file: read data */
+#define	ACE_LIST_DIRECTORY	0x00000001	/* dir: list files */
+#define	ACE_WRITE_DATA		0x00000002	/* file: write data */
+#define	ACE_ADD_FILE		0x00000002	/* dir: create file */
+#define	ACE_APPEND_DATA		0x00000004	/* file: append data */
+#define	ACE_ADD_SUBDIRECTORY	0x00000004	/* dir: create subdir */
+#define	ACE_READ_NAMED_ATTRS	0x00000008	/* FILE_READ_EA */
+#define	ACE_WRITE_NAMED_ATTRS	0x00000010	/* FILE_WRITE_EA */
+#define	ACE_EXECUTE		0x00000020	/* file: execute */
+#define	ACE_TRAVERSE		0x00000020	/* dir: lookup name */
+#define	ACE_DELETE_CHILD	0x00000040	/* dir: unlink child */
+#define	ACE_READ_ATTRIBUTES	0x00000080	/* (all) stat, etc. */
+#define	ACE_WRITE_ATTRIBUTES	0x00000100	/* (all) utimes, etc. */
+#define	ACE_DELETE		0x00010000	/* (all) unlink self */
+#define	ACE_READ_ACL		0x00020000	/* (all) getsecattr */
+#define	ACE_WRITE_ACL		0x00040000	/* (all) setsecattr */
+#define	ACE_WRITE_OWNER		0x00080000	/* (all) chown */
+#define	ACE_SYNCHRONIZE		0x00100000	/* (all) */
 
 #define	ACE_FILE_INHERIT_ACE		0x0001
 #define	ACE_DIRECTORY_INHERIT_ACE	0x0002
@@ -116,8 +121,6 @@
 #define	ACL_FLAGS_ALL			(ACL_AUTO_INHERIT|ACL_PROTECTED| \
     ACL_DEFAULTED)
 
-#ifdef _KERNEL
-
 /*
  * These are only applicable in a CIFS context.
  */
@@ -137,6 +140,8 @@
 
 #define	ACE_ALL_TYPES	0x001F
 
+#if defined(_KERNEL)
+
 typedef struct ace_object {
 	uid_t		a_who;		/* uid or gid */
 	uint32_t	a_access_mask;	/* read,write,... */
@@ -154,6 +159,21 @@
     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
     ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
 
+#define	ACE_ALL_WRITE_PERMS (ACE_WRITE_DATA|ACE_APPEND_DATA| \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS|ACE_WRITE_ACL| \
+    ACE_WRITE_OWNER|ACE_DELETE|ACE_DELETE_CHILD)
+
+#define	ACE_READ_PERMS	(ACE_READ_DATA|ACE_READ_ACL|ACE_READ_ATTRIBUTES| \
+    ACE_READ_NAMED_ATTRS)
+
+#define	ACE_WRITE_PERMS	(ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES| \
+    ACE_WRITE_NAMED_ATTRS)
+
+#define	ACE_MODIFY_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
+    ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
+    ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_SYNCHRONIZE)
+
 /*
  * The following flags are supported by both NFSv4 ACLs and ace_t.
  */
@@ -217,6 +237,7 @@
 #define	ACL_APPEND_ID	0x1 	/* append uid/gid to user/group entries */
 #define	ACL_COMPACT_FMT	0x2 	/* build ACL in ls -V format */
 #define	ACL_NORESOLVE	0x4	/* don't do name service lookups */
+#define	ACL_SID_FMT	0x8	/* use usersid/groupsid when appropriate */
 
 /*
  * Legacy aclcheck errors for aclent_t ACLs
@@ -272,13 +293,8 @@
 
 #endif	/* !defined(_KERNEL) */
 
-#if defined(__STDC__)
 extern int acl(const char *path, int cmd, int cnt, void *buf);
 extern int facl(int fd, int cmd, int cnt, void *buf);
-#else	/* !__STDC__ */
-extern int acl();
-extern int facl();
-#endif	/* defined(__STDC__) */
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/lib/libspl/include/sys/bitmap.h b/zfs/lib/libspl/include/sys/bitmap.h
deleted file mode 100644
index 95122ab..0000000
--- a/zfs/lib/libspl/include/sys/bitmap.h
+++ /dev/null

@@ -1,30 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBSPL_SYS_BITMAP_H
-#define	_LIBSPL_SYS_BITMAP_H
-
-#endif

diff --git a/zfs/lib/libspl/include/sys/byteorder.h b/zfs/lib/libspl/include/sys/byteorder.h
deleted file mode 100644
index 72d40b1..0000000
--- a/zfs/lib/libspl/include/sys/byteorder.h
+++ /dev/null

@@ -1,225 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved  	*/
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _SYS_BYTEORDER_H
-#define	_SYS_BYTEORDER_H
-
-
-
-#include <sys/isa_defs.h>
-#include <sys/int_types.h>
-
-#if defined(__GNUC__) && defined(_ASM_INLINES) && \
-	(defined(__i386) || defined(__amd64))
-#include <asm/byteorder.h>
-#endif
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * macros for conversion between host and (internet) network byte order
- */
-
-#if defined(_BIG_ENDIAN) && !defined(ntohl) && !defined(__lint)
-/* big-endian */
-#define	ntohl(x)	(x)
-#define	ntohs(x)	(x)
-#define	htonl(x)	(x)
-#define	htons(x)	(x)
-
-#elif !defined(ntohl) /* little-endian */
-
-#ifndef	_IN_PORT_T
-#define	_IN_PORT_T
-typedef uint16_t in_port_t;
-#endif
-
-#ifndef	_IN_ADDR_T
-#define	_IN_ADDR_T
-typedef uint32_t in_addr_t;
-#endif
-
-#if !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5)
-extern	uint32_t htonl(uint32_t);
-extern	uint16_t htons(uint16_t);
-extern 	uint32_t ntohl(uint32_t);
-extern	uint16_t ntohs(uint16_t);
-#else
-extern	in_addr_t htonl(in_addr_t);
-extern	in_port_t htons(in_port_t);
-extern 	in_addr_t ntohl(in_addr_t);
-extern	in_port_t ntohs(in_port_t);
-#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) */
-#endif
-
-#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
-
-/*
- * Macros to reverse byte order
- */
-#define	BSWAP_8(x)	((x) & 0xff)
-#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
-#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
-#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
-
-#define	BMASK_8(x)	((x) & 0xff)
-#define	BMASK_16(x)	((x) & 0xffff)
-#define	BMASK_32(x)	((x) & 0xffffffff)
-#define	BMASK_64(x)	(x)
-
-/*
- * Macros to convert from a specific byte order to/from native byte order
- */
-#ifdef _BIG_ENDIAN
-#define	BE_8(x)		BMASK_8(x)
-#define	BE_16(x)	BMASK_16(x)
-#define	BE_32(x)	BMASK_32(x)
-#define	BE_64(x)	BMASK_64(x)
-#define	LE_8(x)		BSWAP_8(x)
-#define	LE_16(x)	BSWAP_16(x)
-#define	LE_32(x)	BSWAP_32(x)
-#define	LE_64(x)	BSWAP_64(x)
-#else
-#define	LE_8(x)		BMASK_8(x)
-#define	LE_16(x)	BMASK_16(x)
-#define	LE_32(x)	BMASK_32(x)
-#define	LE_64(x)	BMASK_64(x)
-#define	BE_8(x)		BSWAP_8(x)
-#define	BE_16(x)	BSWAP_16(x)
-#define	BE_32(x)	BSWAP_32(x)
-#define	BE_64(x)	BSWAP_64(x)
-#endif
-
-#ifdef _BIG_ENDIAN
-static __inline__ uint64_t
-htonll(uint64_t n)
-{
-	return (n);
-}
-
-static __inline__ uint64_t
-ntohll(uint64_t n)
-{
-	return (n);
-}
-#else
-static __inline__ uint64_t
-htonll(uint64_t n)
-{
-	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
-}
-
-static __inline__ uint64_t
-ntohll(uint64_t n)
-{
-	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
-}
-#endif
-
-/*
- * Macros to read unaligned values from a specific byte order to
- * native byte order
- */
-
-#define	BE_IN8(xa) \
-	*((uint8_t *)(xa))
-
-#define	BE_IN16(xa) \
-	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
-
-#define	BE_IN32(xa) \
-	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
-
-#define	BE_IN64(xa) \
-	(((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4))
-
-#define	LE_IN8(xa) \
-	*((uint8_t *)(xa))
-
-#define	LE_IN16(xa) \
-	(((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa))
-
-#define	LE_IN32(xa) \
-	(((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa))
-
-#define	LE_IN64(xa) \
-	(((uint64_t)LE_IN32((uint8_t *)(xa) + 4) << 32) | LE_IN32(xa))
-
-/*
- * Macros to write unaligned values from native byte order to a specific byte
- * order.
- */
-
-#define	BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
-
-#define	BE_OUT16(xa, yv) \
-	BE_OUT8((uint8_t *)(xa) + 1, yv); \
-	BE_OUT8((uint8_t *)(xa), (yv) >> 8);
-
-#define	BE_OUT32(xa, yv) \
-	BE_OUT16((uint8_t *)(xa) + 2, yv); \
-	BE_OUT16((uint8_t *)(xa), (yv) >> 16);
-
-#define	BE_OUT64(xa, yv) \
-	BE_OUT32((uint8_t *)(xa) + 4, yv); \
-	BE_OUT32((uint8_t *)(xa), (yv) >> 32);
-
-#define	LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
-
-#define	LE_OUT16(xa, yv) \
-	LE_OUT8((uint8_t *)(xa), yv); \
-	LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8);
-
-#define	LE_OUT32(xa, yv) \
-	LE_OUT16((uint8_t *)(xa), yv); \
-	LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16);
-
-#define	LE_OUT64(xa, yv) \
-	LE_OUT32((uint8_t *)(xa), yv); \
-	LE_OUT32((uint8_t *)(xa) + 4, (yv) >> 32);
-
-#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_BYTEORDER_H */

diff --git a/zfs/lib/libspl/include/sys/debug.h b/zfs/lib/libspl/include/sys/debug.h
index fde4a01..af18da9 100644
--- a/zfs/lib/libspl/include/sys/debug.h
+++ b/zfs/lib/libspl/include/sys/debug.h

@@ -29,4 +29,12 @@
 
 #include <assert.h>
 
+#ifndef	__printflike
+#define	__printflike(x, y) __attribute__((__format__(__printf__, x, y)))
+#endif
+
+#ifndef __maybe_unused
+#define	__maybe_unused __attribute__((unused))
+#endif
+
 #endif

diff --git a/zfs/lib/libspl/include/sys/dkio.h b/zfs/lib/libspl/include/sys/dkio.h
index 2e6b9a1..f3c641f 100644
--- a/zfs/lib/libspl/include/sys/dkio.h
+++ b/zfs/lib/libspl/include/sys/dkio.h

@@ -59,7 +59,6 @@
 	uint_t	dki_vec;		/* interrupt vector */
 	char	dki_dname[DK_DEVLEN];	/* drive name (no unit #) */
 	uint_t	dki_unit;		/* unit number */
-	uint_t	dki_slave;		/* slave number */
 	ushort_t dki_partition;		/* partition number */
 	ushort_t dki_maxtransfer;	/* max. transfer size in DEV_BSIZE */
 };

diff --git a/zfs/lib/libspl/include/sys/dklabel.h b/zfs/lib/libspl/include/sys/dklabel.h
index 95faf2b..8c2ca06 100644
--- a/zfs/lib/libspl/include/sys/dklabel.h
+++ b/zfs/lib/libspl/include/sys/dklabel.h

@@ -31,7 +31,6 @@
 
 #include <sys/isa_defs.h>
 #include <sys/types32.h>
-#include <sys/isa_defs.h>
 
 #ifdef	__cplusplus
 extern "C" {

diff --git a/zfs/lib/libspl/include/sys/dktp/Makefile.am b/zfs/lib/libspl/include/sys/dktp/Makefile.am
index 9887675..4ad3695 100644
--- a/zfs/lib/libspl/include/sys/dktp/Makefile.am
+++ b/zfs/lib/libspl/include/sys/dktp/Makefile.am

@@ -1,4 +1,4 @@
 libspldir = $(includedir)/libspl/sys/dktp
 libspl_HEADERS = \
-	$(top_srcdir)/lib/libspl/include/sys/dktp/fdisk.h
+	fdisk.h
 

diff --git a/zfs/lib/libspl/include/sys/errno.h b/zfs/lib/libspl/include/sys/errno.h
deleted file mode 100644
index e8bfbe3..0000000
--- a/zfs/lib/libspl/include/sys/errno.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2017 Zettabyte Software, LLC.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Compiling against musl correctly points out that including sys/errno.h is
- * disallowed by the Single UNIX Specification when building in userspace, so
- * we implement a dummy header to redirect the include to the proper header.
- */
-#ifndef _LIBSPL_SYS_ERRNO_H
-#define	_LIBSPL_SYS_ERRNO_H
-#include <errno.h>
-#endif /* _LIBSPL_SYS_ERRNO_H */

diff --git a/zfs/lib/libspl/include/sys/feature_tests.h b/zfs/lib/libspl/include/sys/feature_tests.h
index 1a68b75..c9564b2 100644
--- a/zfs/lib/libspl/include/sys/feature_tests.h
+++ b/zfs/lib/libspl/include/sys/feature_tests.h

@@ -27,6 +27,15 @@
 #ifndef _SYS_FEATURE_TESTS_H
 #define	_SYS_FEATURE_TESTS_H
 
-#define	__NORETURN	__attribute__((__noreturn__))
+#define	____cacheline_aligned
+#define	__NORETURN		__attribute__((__noreturn__))
+
+#if !defined(fallthrough) && !defined(_LIBCPP_VERSION)
+#if defined(HAVE_IMPLICIT_FALLTHROUGH)
+#define	fallthrough		__attribute__((__fallthrough__))
+#else
+#define	fallthrough		((void)0)
+#endif
+#endif
 
 #endif

diff --git a/zfs/lib/libspl/include/sys/file.h b/zfs/lib/libspl/include/sys/file.h
deleted file mode 100644
index e0752ac..0000000
--- a/zfs/lib/libspl/include/sys/file.h
+++ /dev/null

@@ -1,49 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBSPL_SYS_FILE_H
-#define	_LIBSPL_SYS_FILE_H
-
-#include_next <sys/file.h>
-
-#include <sys/user.h>
-
-#define	FREAD	1
-#define	FWRITE	2
-// #define	FAPPEND  8
-
-#define	FCREAT	O_CREAT
-#define	FTRUNC	O_TRUNC
-#define	FOFFMAX	O_LARGEFILE
-#define	FSYNC	O_SYNC
-#define	FDSYNC	O_DSYNC
-#define	FEXCL	O_EXCL
-
-#define	FNODSYNC	0x10000	/* fsync pseudo flag */
-#define	FNOFOLLOW	0x20000	/* don't follow symlinks */
-#define	FIGNORECASE	0x80000	/* request case-insensitive lookups */
-
-#endif

diff --git a/zfs/lib/libspl/include/sys/isa_defs.h b/zfs/lib/libspl/include/sys/isa_defs.h
index 7a90e07..8c0932f 100644
--- a/zfs/lib/libspl/include/sys/isa_defs.h
+++ b/zfs/lib/libspl/include/sys/isa_defs.h

@@ -46,12 +46,16 @@
 #define	__x86
 #endif
 
+#if defined(_ILP32)
+/* x32-specific defines; careful to *not* define _LP64 here */
+#else
 #if !defined(_LP64)
 #define	_LP64
 #endif
+#endif
 
-#if !defined(_LITTLE_ENDIAN)
-#define	_LITTLE_ENDIAN
+#if !defined(_ZFS_LITTLE_ENDIAN)
+#define	_ZFS_LITTLE_ENDIAN
 #endif
 
 #define	_SUNOS_VTOC_16
@@ -72,8 +76,8 @@
 #define	_ILP32
 #endif
 
-#if !defined(_LITTLE_ENDIAN)
-#define	_LITTLE_ENDIAN
+#if !defined(_ZFS_LITTLE_ENDIAN)
+#define	_ZFS_LITTLE_ENDIAN
 #endif
 
 #define	_SUNOS_VTOC_16
@@ -103,6 +107,24 @@
 #define	_SUNOS_VTOC_16
 #define	HAVE_EFFICIENT_UNALIGNED_ACCESS
 
+#if defined(__BYTE_ORDER)
+#if defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
+#elif defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+#define	_ZFS_LITTLE_ENDIAN
+#endif
+#elif defined(_BYTE_ORDER)
+#if defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
+#elif defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
+#define	_ZFS_LITTLE_ENDIAN
+#endif
+#elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)
+#define	_ZFS_BIG_ENDIAN
+#elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
+#define	_ZFS_LITTLE_ENDIAN
+#endif
+
 /* arm arch specific defines */
 #elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
 
@@ -125,9 +147,9 @@
 #endif
 
 #if defined(__ARMEL__) || defined(__AARCH64EL__)
-#define	_LITTLE_ENDIAN
+#define	_ZFS_LITTLE_ENDIAN
 #else
-#define	_BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
 #endif
 
 #define	_SUNOS_VTOC_16
@@ -147,7 +169,7 @@
 #define	__sparc__
 #endif
 
-#define	_BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
 #define	_SUNOS_VTOC_16
 
 #if defined(__arch64__)
@@ -172,30 +194,49 @@
 #endif
 #endif
 
-#define	_BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
 #define	_SUNOS_VTOC_16
 
 /* MIPS arch specific defines */
 #elif defined(__mips__)
 
 #if defined(__MIPSEB__)
-#define	_BIG_ENDIAN
+#define	_ZFS_BIG_ENDIAN
 #elif defined(__MIPSEL__)
-#define	_LITTLE_ENDIAN
+#define	_ZFS_LITTLE_ENDIAN
 #else
 #error MIPS no endian specified
 #endif
 
-#ifndef _LP64
+#if !defined(_LP64) && !defined(_ILP32)
 #define	_ILP32
 #endif
 
 #define	_SUNOS_VTOC_16
 
+/*
+ * RISC-V arch specific defines
+ * only RV64G (including atomic) LP64 is supported yet
+ */
+#elif defined(__riscv) && defined(_LP64) && _LP64 && \
+	defined(__riscv_atomic) && __riscv_atomic
+
+#ifndef	__riscv__
+#define	__riscv__
+#endif
+
+#ifndef	__rv64g__
+#define	__rv64g__
+#endif
+
+#define	_ZFS_LITTLE_ENDIAN
+
+#define	_SUNOS_VTOC_16
+
 #else
 /*
  * Currently supported:
- * x86_64, i386, arm, powerpc, s390, sparc, and mips
+ * x86_64, x32, i386, arm, powerpc, s390, sparc, mips, and RV64G
  */
 #error "Unsupported ISA type"
 #endif
@@ -208,12 +249,12 @@
 #error "Neither _ILP32 or _LP64 are defined"
 #endif
 
-#if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
-#error "Both _LITTLE_ENDIAN and _BIG_ENDIAN are defined"
+#if defined(_ZFS_LITTLE_ENDIAN) && defined(_ZFS_BIG_ENDIAN)
+#error "Both _ZFS_LITTLE_ENDIAN and _ZFS_BIG_ENDIAN are defined"
 #endif
 
-#if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
-#error "Neither _LITTLE_ENDIAN nor _BIG_ENDIAN are defined"
+#if !defined(_ZFS_LITTLE_ENDIAN) && !defined(_ZFS_BIG_ENDIAN)
+#error "Neither _ZFS_LITTLE_ENDIAN nor _ZFS_BIG_ENDIAN are defined"
 #endif
 
 #ifdef  __cplusplus

diff --git a/zfs/lib/libspl/include/sys/kstat.h b/zfs/lib/libspl/include/sys/kstat.h
index 69fb6d4..f73fb92 100644
--- a/zfs/lib/libspl/include/sys/kstat.h
+++ b/zfs/lib/libspl/include/sys/kstat.h

@@ -796,12 +796,6 @@
 extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
 extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
 extern void kstat_timer_init(kstat_timer_t *, const char *);
-extern void kstat_waitq_enter(kstat_io_t *);
-extern void kstat_waitq_exit(kstat_io_t *);
-extern void kstat_runq_enter(kstat_io_t *);
-extern void kstat_runq_exit(kstat_io_t *);
-extern void kstat_waitq_to_runq(kstat_io_t *);
-extern void kstat_runq_back_to_waitq(kstat_io_t *);
 extern void kstat_timer_start(kstat_timer_t *);
 extern void kstat_timer_stop(kstat_timer_t *);
 

diff --git a/zfs/lib/libspl/include/sys/list_impl.h b/zfs/lib/libspl/include/sys/list_impl.h
index a6614f9..b5655b9 100644
--- a/zfs/lib/libspl/include/sys/list_impl.h
+++ b/zfs/lib/libspl/include/sys/list_impl.h

@@ -34,8 +34,8 @@
 #endif
 
 struct list_node {
-	struct list_node *list_next;
-	struct list_node *list_prev;
+	struct list_node *next;
+	struct list_node *prev;
 };
 
 struct list {

diff --git a/zfs/lib/libspl/include/sys/mnttab.h b/zfs/lib/libspl/include/sys/mnttab.h
deleted file mode 100644
index dd9eb9b..0000000
--- a/zfs/lib/libspl/include/sys/mnttab.h
+++ /dev/null

@@ -1,86 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/*  All Rights Reserved  */
-/*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2006 Ricardo Correia */
-
-#ifndef _SYS_MNTTAB_H
-#define	_SYS_MNTTAB_H
-
-#include <stdio.h>
-#include <mntent.h>
-#include <sys/types.h>
-
-#ifdef MNTTAB
-#undef MNTTAB
-#endif /* MNTTAB */
-
-#define	MNTTAB		"/proc/self/mounts"
-#define	MNT_LINE_MAX	4108
-
-#define	MNT_TOOLONG	1	/* entry exceeds MNT_LINE_MAX */
-#define	MNT_TOOMANY	2	/* too many fields in line */
-#define	MNT_TOOFEW	3	/* too few fields in line */
-
-struct mnttab {
-	char *mnt_special;
-	char *mnt_mountp;
-	char *mnt_fstype;
-	char *mnt_mntopts;
-};
-
-/*
- * NOTE: fields in extmnttab should match struct mnttab till new fields
- * are encountered, this allows hasmntopt to work properly when its arg is
- * a pointer to an extmnttab struct cast to a mnttab struct pointer.
- */
-
-struct extmnttab {
-	char *mnt_special;
-	char *mnt_mountp;
-	char *mnt_fstype;
-	char *mnt_mntopts;
-	uint_t mnt_major;
-	uint_t mnt_minor;
-};
-
-extern int getmntany(FILE *fp, struct mnttab *mp, struct mnttab *mpref);
-extern int _sol_getmntent(FILE *fp, struct mnttab *mp);
-extern int getextmntent(FILE *fp, struct extmnttab *mp, int len);
-
-static inline char *_sol_hasmntopt(struct mnttab *mnt, char *opt)
-{
-	struct mntent mnt_new;
-
-	mnt_new.mnt_opts = mnt->mnt_mntopts;
-
-	return (hasmntopt(&mnt_new, opt));
-}
-
-#define	hasmntopt	_sol_hasmntopt
-#define	getmntent	_sol_getmntent
-
-#endif

diff --git a/zfs/lib/libspl/include/sys/sha2.h b/zfs/lib/libspl/include/sys/sha2.h
new file mode 100644
index 0000000..e2f66d2
--- /dev/null
+++ b/zfs/lib/libspl/include/sys/sha2.h

@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
+
+#ifndef _SYS_SHA2_H
+#define	_SYS_SHA2_H
+
+#include <stdint.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	SHA2_HMAC_MIN_KEY_LEN	1	/* SHA2-HMAC min key length in bytes */
+#define	SHA2_HMAC_MAX_KEY_LEN	INT_MAX	/* SHA2-HMAC max key length in bytes */
+
+#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
+#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
+#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
+
+/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
+#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
+#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
+
+#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
+#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
+
+#define	SHA256			0
+#define	SHA256_HMAC		1
+#define	SHA256_HMAC_GEN		2
+#define	SHA384			3
+#define	SHA384_HMAC		4
+#define	SHA384_HMAC_GEN		5
+#define	SHA512			6
+#define	SHA512_HMAC		7
+#define	SHA512_HMAC_GEN		8
+#define	SHA512_224		9
+#define	SHA512_256		10
+
+/*
+ * SHA2 context.
+ * The contents of this structure are a private interface between the
+ * Init/Update/Final calls of the functions defined below.
+ * Callers must never attempt to read or write any of the fields
+ * in this structure directly.
+ */
+typedef struct 	{
+	uint32_t algotype;		/* Algorithm Type */
+
+	/* state (ABCDEFGH) */
+	union {
+		uint32_t s32[8];	/* for SHA256 */
+		uint64_t s64[8];	/* for SHA384/512 */
+	} state;
+	/* number of bits */
+	union {
+		uint32_t c32[2];	/* for SHA256 , modulo 2^64 */
+		uint64_t c64[2];	/* for SHA384/512, modulo 2^128 */
+	} count;
+	union {
+		uint8_t		buf8[128];	/* undigested input */
+		uint32_t	buf32[32];	/* realigned input */
+		uint64_t	buf64[16];	/* realigned input */
+	} buf_un;
+} SHA2_CTX;
+
+typedef SHA2_CTX SHA256_CTX;
+typedef SHA2_CTX SHA384_CTX;
+typedef SHA2_CTX SHA512_CTX;
+
+extern void SHA256Init(SHA256_CTX *);
+
+extern void SHA256Update(SHA256_CTX *, const void *, size_t);
+
+extern void SHA256Final(void *, SHA256_CTX *);
+
+extern void SHA384Init(SHA384_CTX *);
+
+extern void SHA384Update(SHA384_CTX *, const void *, size_t);
+
+extern void SHA384Final(void *, SHA384_CTX *);
+
+extern void SHA512Init(SHA512_CTX *);
+
+extern void SHA512Update(SHA512_CTX *, const void *, size_t);
+
+extern void SHA512Final(void *, SHA512_CTX *);
+
+extern void SHA2Init(uint64_t mech, SHA2_CTX *);
+
+extern void SHA2Update(SHA2_CTX *, const void *, size_t);
+
+extern void SHA2Final(void *, SHA2_CTX *);
+
+#ifdef _SHA2_IMPL
+/*
+ * The following types/functions are all private to the implementation
+ * of the SHA2 functions and must not be used by consumers of the interface
+ */
+
+/*
+ * List of support mechanisms in this module.
+ *
+ * It is important to note that in the module, division or modulus calculations
+ * are used on the enumerated type to determine which mechanism is being used;
+ * therefore, changing the order or additional mechanisms should be done
+ * carefully
+ */
+typedef enum sha2_mech_type {
+	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
+	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
+	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
+	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
+	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
+	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
+	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
+	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
+	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
+	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
+	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#endif /* _SHA2_IMPL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_H */

diff --git a/zfs/lib/libspl/include/sys/signal.h b/zfs/lib/libspl/include/sys/signal.h
deleted file mode 100644
index df9221a..0000000
--- a/zfs/lib/libspl/include/sys/signal.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2017 Zettabyte Software, LLC.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Compiling against musl correctly points out that including sys/signal.h is
- * disallowed by the Single UNIX Specification when building in userspace, so
- * we implement a dummy header to redirect the include to the proper header.
- */
-#ifndef _LIBSPL_SYS_SIGNAL_H
-#define	_LIBSPL_SYS_SIGNAL_H
-#include <signal.h>
-#endif /* _LIBSPL_SYS_SIGNAL_H */

diff --git a/zfs/lib/libspl/include/sys/simd.h b/zfs/lib/libspl/include/sys/simd.h
new file mode 100644
index 0000000..dceedb6
--- /dev/null
+++ b/zfs/lib/libspl/include/sys/simd.h

@@ -0,0 +1,502 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_SIMD_H
+#define	_LIBSPL_SYS_SIMD_H
+
+#include <sys/isa_defs.h>
+#include <sys/types.h>
+
+#if defined(__x86)
+#include <cpuid.h>
+
+#define	kfpu_allowed()		1
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+#define	kfpu_init()		0
+#define	kfpu_fini()		((void) 0)
+
+/*
+ * CPUID feature tests for user-space.
+ *
+ * x86 registers used implicitly by CPUID
+ */
+typedef enum cpuid_regs {
+	EAX = 0,
+	EBX,
+	ECX,
+	EDX,
+	CPUID_REG_CNT = 4
+} cpuid_regs_t;
+
+/*
+ * List of instruction sets identified by CPUID
+ */
+typedef enum cpuid_inst_sets {
+	SSE = 0,
+	SSE2,
+	SSE3,
+	SSSE3,
+	SSE4_1,
+	SSE4_2,
+	OSXSAVE,
+	AVX,
+	AVX2,
+	BMI1,
+	BMI2,
+	AVX512F,
+	AVX512CD,
+	AVX512DQ,
+	AVX512BW,
+	AVX512IFMA,
+	AVX512VBMI,
+	AVX512PF,
+	AVX512ER,
+	AVX512VL,
+	AES,
+	PCLMULQDQ,
+	MOVBE
+} cpuid_inst_sets_t;
+
+/*
+ * Instruction set descriptor.
+ */
+typedef struct cpuid_feature_desc {
+	uint32_t leaf;		/* CPUID leaf */
+	uint32_t subleaf;	/* CPUID sub-leaf */
+	uint32_t flag;		/* bit mask of the feature */
+	cpuid_regs_t reg;	/* which CPUID return register to test */
+} cpuid_feature_desc_t;
+
+#define	_AVX512F_BIT		(1U << 16)
+#define	_AVX512CD_BIT		(_AVX512F_BIT | (1U << 28))
+#define	_AVX512DQ_BIT		(_AVX512F_BIT | (1U << 17))
+#define	_AVX512BW_BIT		(_AVX512F_BIT | (1U << 30))
+#define	_AVX512IFMA_BIT		(_AVX512F_BIT | (1U << 21))
+#define	_AVX512VBMI_BIT		(1U << 1) /* AVX512F_BIT is on another leaf  */
+#define	_AVX512PF_BIT		(_AVX512F_BIT | (1U << 26))
+#define	_AVX512ER_BIT		(_AVX512F_BIT | (1U << 27))
+#define	_AVX512VL_BIT		(1U << 31) /* if used also check other levels */
+#define	_AES_BIT		(1U << 25)
+#define	_PCLMULQDQ_BIT		(1U << 1)
+#define	_MOVBE_BIT		(1U << 22)
+
+/*
+ * Descriptions of supported instruction sets
+ */
+static const cpuid_feature_desc_t cpuid_features[] = {
+	[SSE]		= {1U, 0U,	1U << 25,	EDX	},
+	[SSE2]		= {1U, 0U,	1U << 26,	EDX	},
+	[SSE3]		= {1U, 0U,	1U << 0,	ECX	},
+	[SSSE3]		= {1U, 0U,	1U << 9,	ECX	},
+	[SSE4_1]	= {1U, 0U,	1U << 19,	ECX	},
+	[SSE4_2]	= {1U, 0U,	1U << 20,	ECX	},
+	[OSXSAVE]	= {1U, 0U,	1U << 27,	ECX	},
+	[AVX]		= {1U, 0U,	1U << 28,	ECX	},
+	[AVX2]		= {7U, 0U,	1U << 5,	EBX	},
+	[BMI1]		= {7U, 0U,	1U << 3,	EBX	},
+	[BMI2]		= {7U, 0U,	1U << 8,	EBX	},
+	[AVX512F]	= {7U, 0U, _AVX512F_BIT,	EBX	},
+	[AVX512CD]	= {7U, 0U, _AVX512CD_BIT,	EBX	},
+	[AVX512DQ]	= {7U, 0U, _AVX512DQ_BIT,	EBX	},
+	[AVX512BW]	= {7U, 0U, _AVX512BW_BIT,	EBX	},
+	[AVX512IFMA]	= {7U, 0U, _AVX512IFMA_BIT,	EBX	},
+	[AVX512VBMI]	= {7U, 0U, _AVX512VBMI_BIT,	ECX	},
+	[AVX512PF]	= {7U, 0U, _AVX512PF_BIT,	EBX	},
+	[AVX512ER]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
+	[AVX512VL]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
+	[AES]		= {1U, 0U, _AES_BIT,		ECX	},
+	[PCLMULQDQ]	= {1U, 0U, _PCLMULQDQ_BIT,	ECX	},
+	[MOVBE]		= {1U, 0U, _MOVBE_BIT,		ECX	},
+};
+
+/*
+ * Check if OS supports AVX and AVX2 by checking XCR0
+ * Only call this function if CPUID indicates that AVX feature is
+ * supported by the CPU, otherwise it might be an illegal instruction.
+ */
+static inline uint64_t
+xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+	/* xgetbv - instruction byte code */
+	__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
+	    : "=a" (eax), "=d" (edx)
+	    : "c" (index));
+
+	return ((((uint64_t)edx)<<32) | (uint64_t)eax);
+}
+
+/*
+ * Check if CPU supports a feature
+ */
+static inline boolean_t
+__cpuid_check_feature(const cpuid_feature_desc_t *desc)
+{
+	uint32_t r[CPUID_REG_CNT];
+
+	if (__get_cpuid_max(0, NULL) >= desc->leaf) {
+		/*
+		 * __cpuid_count is needed to properly check
+		 * for AVX2. It is a macro, so return parameters
+		 * are passed by value.
+		 */
+		__cpuid_count(desc->leaf, desc->subleaf,
+		    r[EAX], r[EBX], r[ECX], r[EDX]);
+		return ((r[desc->reg] & desc->flag) == desc->flag);
+	}
+	return (B_FALSE);
+}
+
+#define	CPUID_FEATURE_CHECK(name, id)				\
+static inline boolean_t						\
+__cpuid_has_ ## name(void)					\
+{								\
+	return (__cpuid_check_feature(&cpuid_features[id]));	\
+}
+
+/*
+ * Define functions for user-space CPUID features testing
+ */
+CPUID_FEATURE_CHECK(sse, SSE);
+CPUID_FEATURE_CHECK(sse2, SSE2);
+CPUID_FEATURE_CHECK(sse3, SSE3);
+CPUID_FEATURE_CHECK(ssse3, SSSE3);
+CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
+CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
+CPUID_FEATURE_CHECK(avx, AVX);
+CPUID_FEATURE_CHECK(avx2, AVX2);
+CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
+CPUID_FEATURE_CHECK(bmi1, BMI1);
+CPUID_FEATURE_CHECK(bmi2, BMI2);
+CPUID_FEATURE_CHECK(avx512f, AVX512F);
+CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
+CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
+CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
+CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
+CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
+CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
+CPUID_FEATURE_CHECK(avx512er, AVX512ER);
+CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
+CPUID_FEATURE_CHECK(aes, AES);
+CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
+CPUID_FEATURE_CHECK(movbe, MOVBE);
+
+/*
+ * Detect register set support
+ */
+static inline boolean_t
+__simd_state_enabled(const uint64_t state)
+{
+	boolean_t has_osxsave;
+	uint64_t xcr0;
+
+	has_osxsave = __cpuid_has_osxsave();
+	if (!has_osxsave)
+		return (B_FALSE);
+
+	xcr0 = xgetbv(0);
+	return ((xcr0 & state) == state);
+}
+
+#define	_XSTATE_SSE_AVX		(0x2 | 0x4)
+#define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
+
+#define	__ymm_enabled()		__simd_state_enabled(_XSTATE_SSE_AVX)
+#define	__zmm_enabled()		__simd_state_enabled(_XSTATE_AVX512)
+
+/*
+ * Check if SSE instruction set is available
+ */
+static inline boolean_t
+zfs_sse_available(void)
+{
+	return (__cpuid_has_sse());
+}
+
+/*
+ * Check if SSE2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse2_available(void)
+{
+	return (__cpuid_has_sse2());
+}
+
+/*
+ * Check if SSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_sse3_available(void)
+{
+	return (__cpuid_has_sse3());
+}
+
+/*
+ * Check if SSSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_ssse3_available(void)
+{
+	return (__cpuid_has_ssse3());
+}
+
+/*
+ * Check if SSE4.1 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_1_available(void)
+{
+	return (__cpuid_has_sse4_1());
+}
+
+/*
+ * Check if SSE4.2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_2_available(void)
+{
+	return (__cpuid_has_sse4_2());
+}
+
+/*
+ * Check if AVX instruction set is available
+ */
+static inline boolean_t
+zfs_avx_available(void)
+{
+	return (__cpuid_has_avx() && __ymm_enabled());
+}
+
+/*
+ * Check if AVX2 instruction set is available
+ */
+static inline boolean_t
+zfs_avx2_available(void)
+{
+	return (__cpuid_has_avx2() && __ymm_enabled());
+}
+
+/*
+ * Check if BMI1 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi1_available(void)
+{
+	return (__cpuid_has_bmi1());
+}
+
+/*
+ * Check if BMI2 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi2_available(void)
+{
+	return (__cpuid_has_bmi2());
+}
+
+/*
+ * Check if AES instruction set is available
+ */
+static inline boolean_t
+zfs_aes_available(void)
+{
+	return (__cpuid_has_aes());
+}
+
+/*
+ * Check if PCLMULQDQ instruction set is available
+ */
+static inline boolean_t
+zfs_pclmulqdq_available(void)
+{
+	return (__cpuid_has_pclmulqdq());
+}
+
+/*
+ * Check if MOVBE instruction is available
+ */
+static inline boolean_t
+zfs_movbe_available(void)
+{
+	return (__cpuid_has_movbe());
+}
+
+/*
+ * AVX-512 family of instruction sets:
+ *
+ * AVX512F	Foundation
+ * AVX512CD	Conflict Detection Instructions
+ * AVX512ER	Exponential and Reciprocal Instructions
+ * AVX512PF	Prefetch Instructions
+ *
+ * AVX512BW	Byte and Word Instructions
+ * AVX512DQ	Double-word and Quadword Instructions
+ * AVX512VL	Vector Length Extensions
+ *
+ * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
+ * AVX512VBMI	Vector Byte Manipulation Instructions
+ */
+
+/*
+ * Check if AVX512F instruction set is available
+ */
+static inline boolean_t
+zfs_avx512f_available(void)
+{
+	return (__cpuid_has_avx512f() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512CD instruction set is available
+ */
+static inline boolean_t
+zfs_avx512cd_available(void)
+{
+	return (__cpuid_has_avx512cd() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512ER instruction set is available
+ */
+static inline boolean_t
+zfs_avx512er_available(void)
+{
+	return (__cpuid_has_avx512er() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512PF instruction set is available
+ */
+static inline boolean_t
+zfs_avx512pf_available(void)
+{
+	return (__cpuid_has_avx512pf() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512BW instruction set is available
+ */
+static inline boolean_t
+zfs_avx512bw_available(void)
+{
+	return (__cpuid_has_avx512bw() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512DQ instruction set is available
+ */
+static inline boolean_t
+zfs_avx512dq_available(void)
+{
+	return (__cpuid_has_avx512dq() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512VL instruction set is available
+ */
+static inline boolean_t
+zfs_avx512vl_available(void)
+{
+	return (__cpuid_has_avx512vl() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512IFMA instruction set is available
+ */
+static inline boolean_t
+zfs_avx512ifma_available(void)
+{
+	return (__cpuid_has_avx512ifma() && __zmm_enabled());
+}
+
+/*
+ * Check if AVX512VBMI instruction set is available
+ */
+static inline boolean_t
+zfs_avx512vbmi_available(void)
+{
+	return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
+	    __zmm_enabled());
+}
+
+#elif defined(__aarch64__)
+
+#define	kfpu_allowed()		1
+#define	kfpu_initialize(tsk)	do {} while (0)
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+
+#elif defined(__powerpc__)
+
+#define	kfpu_allowed()		1
+#define	kfpu_initialize(tsk)	do {} while (0)
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+
+/*
+ * Check if AltiVec instruction set is available
+ * No easy way beyond 'altivec works' :-(
+ */
+#include <signal.h>
+#include <setjmp.h>
+
+#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
+static jmp_buf env;
+static void sigillhandler(int x)
+{
+	longjmp(env, 1);
+}
+#endif
+
+static inline boolean_t
+zfs_altivec_available(void)
+{
+	boolean_t has_altivec = B_FALSE;
+#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
+	sighandler_t savesig;
+	savesig = signal(SIGILL, sigillhandler);
+	if (setjmp(env)) {
+		signal(SIGILL, savesig);
+		has_altivec = B_FALSE;
+	} else {
+		__asm__ __volatile__("vor 0,0,0\n" : : : "v0");
+		signal(SIGILL, savesig);
+		has_altivec = B_TRUE;
+	}
+#endif
+	return (has_altivec);
+}
+#else
+
+#define	kfpu_allowed()		0
+#define	kfpu_initialize(tsk)	do {} while (0)
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+
+#endif
+
+#endif /* _LIBSPL_SYS_SIMD_H */

diff --git a/zfs/lib/libspl/include/sys/trace_spl.h b/zfs/lib/libspl/include/sys/trace_spl.h
new file mode 100644
index 0000000..b80d288
--- /dev/null
+++ b/zfs/lib/libspl/include/sys/trace_spl.h

@@ -0,0 +1,24 @@
+/* Here to keep the libspl build happy */
+
+#ifndef _LIBSPL_SPL_TRACE_H
+#define	_LIBSPL_SPL_TRACE_H
+
+/*
+ * The set-error SDT probe is extra static, in that we declare its fake
+ * function literally, rather than with the DTRACE_PROBE1() macro.  This is
+ * necessary so that SET_ERROR() can evaluate to a value, which wouldn't
+ * be possible if it required multiple statements (to declare the function
+ * and then call it).
+ *
+ * SET_ERROR() uses the comma operator so that it can be used without much
+ * additional code.  For example, "return (EINVAL);" becomes
+ * "return (SET_ERROR(EINVAL));".  Note that the argument will be evaluated
+ * twice, so it should not have side effects (e.g. something like:
+ * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
+ */
+#undef SET_ERROR
+#define	SET_ERROR(err) \
+	(__set_error(__FILE__, __func__, __LINE__, err), err)
+
+
+#endif

diff --git a/zfs/lib/libspl/include/sys/trace_zfs.h b/zfs/lib/libspl/include/sys/trace_zfs.h
new file mode 100644
index 0000000..87ed5ad
--- /dev/null
+++ b/zfs/lib/libspl/include/sys/trace_zfs.h

@@ -0,0 +1,24 @@
+/* Here to keep the libspl build happy */
+
+#ifndef _LIBSPL_ZFS_TRACE_H
+#define	_LIBSPL_ZFS_TRACE_H
+
+/*
+ * The set-error SDT probe is extra static, in that we declare its fake
+ * function literally, rather than with the DTRACE_PROBE1() macro.  This is
+ * necessary so that SET_ERROR() can evaluate to a value, which wouldn't
+ * be possible if it required multiple statements (to declare the function
+ * and then call it).
+ *
+ * SET_ERROR() uses the comma operator so that it can be used without much
+ * additional code.  For example, "return (EINVAL);" becomes
+ * "return (SET_ERROR(EINVAL));".  Note that the argument will be evaluated
+ * twice, so it should not have side effects (e.g. something like:
+ * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
+ */
+#undef SET_ERROR
+#define	SET_ERROR(err) \
+	(__set_error(__FILE__, __func__, __LINE__, err), err)
+
+
+#endif

diff --git a/zfs/lib/libspl/include/sys/uio.h b/zfs/lib/libspl/include/sys/uio.h
index 91ee3b3..81ade54 100644
--- a/zfs/lib/libspl/include/sys/uio.h
+++ b/zfs/lib/libspl/include/sys/uio.h

@@ -40,69 +40,73 @@
 #ifndef	_LIBSPL_SYS_UIO_H
 #define	_LIBSPL_SYS_UIO_H
 
+#include <sys/types.h>
 #include_next <sys/uio.h>
 
+#ifdef __APPLE__
+#include <sys/_types/_iovec_t.h>
+#endif
+
+#include <stdint.h>
 typedef struct iovec iovec_t;
 
-typedef enum uio_rw {
+#if defined(__linux__) || defined(__APPLE__)
+typedef enum zfs_uio_rw {
 	UIO_READ =	0,
 	UIO_WRITE =	1,
-} uio_rw_t;
+} zfs_uio_rw_t;
 
-typedef enum uio_seg {
+typedef enum zfs_uio_seg {
 	UIO_USERSPACE =	0,
 	UIO_SYSSPACE =	1,
-	UIO_USERISPACE = 2,
-} uio_seg_t;
+} zfs_uio_seg_t;
 
-typedef struct uio {
+#elif defined(__FreeBSD__)
+typedef enum uio_seg  zfs_uio_seg_t;
+#endif
+
+typedef struct zfs_uio {
 	struct iovec	*uio_iov;	/* pointer to array of iovecs */
 	int		uio_iovcnt;	/* number of iovecs */
 	offset_t	uio_loffset;	/* file offset */
-	uio_seg_t	uio_segflg;	/* address space (kernel or user) */
+	zfs_uio_seg_t	uio_segflg;	/* address space (kernel or user) */
 	uint16_t	uio_fmode;	/* file mode flags */
 	uint16_t	uio_extflg;	/* extended flags */
-	offset_t	uio_limit;	/* u-limit (maximum byte offset) */
 	ssize_t		uio_resid;	/* residual count */
-} uio_t;
+} zfs_uio_t;
 
-typedef enum xuio_type {
-	UIOTYPE_ASYNCIO,
-	UIOTYPE_ZEROCOPY,
-} xuio_type_t;
+#define	zfs_uio_segflg(uio)		(uio)->uio_segflg
+#define	zfs_uio_offset(uio)		(uio)->uio_loffset
+#define	zfs_uio_resid(uio)		(uio)->uio_resid
+#define	zfs_uio_iovcnt(uio)		(uio)->uio_iovcnt
+#define	zfs_uio_iovlen(uio, idx)	(uio)->uio_iov[(idx)].iov_len
+#define	zfs_uio_iovbase(uio, idx)	(uio)->uio_iov[(idx)].iov_base
 
-#define	UIOA_IOV_MAX	16
+static inline void
+zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len)
+{
+	*base = zfs_uio_iovbase(uio, idx);
+	*len = zfs_uio_iovlen(uio, idx);
+}
 
-typedef struct uioa_page_s {		/* locked uio_iov state */
-	int	uioa_pfncnt;		/* count of pfn_t(s) in *uioa_ppp */
-	void	**uioa_ppp;		/* page_t or pfn_t array */
-	caddr_t	uioa_base;		/* address base */
-	size_t	uioa_len;		/* span length */
-} uioa_page_t;
+static inline void
+zfs_uio_advance(zfs_uio_t *uio, size_t size)
+{
+	uio->uio_resid -= size;
+	uio->uio_loffset += size;
+}
 
-typedef struct xuio {
-	uio_t xu_uio;				/* embedded UIO structure */
+static inline offset_t
+zfs_uio_index_at_offset(zfs_uio_t *uio, offset_t off, uint_t *vec_idx)
+{
+	*vec_idx = 0;
+	while (*vec_idx < (uint_t)zfs_uio_iovcnt(uio) &&
+	    off >= (offset_t)zfs_uio_iovlen(uio, *vec_idx)) {
+		off -= zfs_uio_iovlen(uio, *vec_idx);
+		(*vec_idx)++;
+	}
 
-	/* Extended uio fields */
-	enum xuio_type xu_type;			/* uio type */
-	union {
-		struct {
-			uint32_t xu_a_state;	/* state of async i/o */
-			ssize_t xu_a_mbytes;	/* bytes moved */
-			uioa_page_t *xu_a_lcur;	/* uioa_locked[] pointer */
-			void **xu_a_lppp;	/* lcur->uioa_pppp[] pointer */
-			void *xu_a_hwst[4];	/* opaque hardware state */
-			uioa_page_t xu_a_locked[UIOA_IOV_MAX];
-		} xu_aio;
-
-		struct {
-			int xu_zc_rw;		/* read or write buffer */
-			void *xu_zc_priv;	/* fs specific */
-		} xu_zc;
-	} xu_ext;
-} xuio_t;
-
-#define	XUIO_XUZC_PRIV(xuio)	xuio->xu_ext.xu_zc.xu_zc_priv
-#define	XUIO_XUZC_RW(xuio)	xuio->xu_ext.xu_zc.xu_zc_rw
+	return (off);
+}
 
 #endif	/* _SYS_UIO_H */

diff --git a/zfs/lib/libspl/include/sys/wmsum.h b/zfs/lib/libspl/include/sys/wmsum.h
new file mode 100644
index 0000000..0679af7
--- /dev/null
+++ b/zfs/lib/libspl/include/sys/wmsum.h

@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * wmsum counters are a reduced version of aggsum counters, optimized for
+ * write-mostly scenarios.  They do not provide optimized read functions,
+ * but instead allow much cheaper add function.  The primary usage is
+ * infrequently read statistic counters, not requiring exact precision.
+ *
+ * In user-space due to lack of better implementation mapped to aggsum.
+ */
+
+#ifndef	_SYS_WMSUM_H
+#define	_SYS_WMSUM_H
+
+#include <sys/aggsum.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	wmsum_t	aggsum_t
+
+static inline void
+wmsum_init(wmsum_t *ws, uint64_t value)
+{
+
+	aggsum_init(ws, value);
+}
+
+static inline void
+wmsum_fini(wmsum_t *ws)
+{
+
+	aggsum_fini(ws);
+}
+
+static inline uint64_t
+wmsum_value(wmsum_t *ws)
+{
+
+	return (aggsum_value(ws));
+}
+
+static inline void
+wmsum_add(wmsum_t *ws, int64_t delta)
+{
+
+	aggsum_add(ws, delta);
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_WMSUM_H */

diff --git a/zfs/lib/libspl/include/umem.h b/zfs/lib/libspl/include/umem.h
index 59dc931..65f1259 100644
--- a/zfs/lib/libspl/include/umem.h
+++ b/zfs/lib/libspl/include/umem.h

@@ -36,6 +36,7 @@
  *
  * https://labs.omniti.com/trac/portableumem
  */
+#include <sys/debug.h>
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -56,10 +57,7 @@
 /*
  * Flags for umem_cache_create()
  */
-#define	UMC_NOTOUCH		0x00010000
 #define	UMC_NODEBUG		0x00020000
-#define	UMC_NOMAGAZINE		0x00040000
-#define	UMC_NOHASH		0x00080000
 
 #define	UMEM_CACHE_NAMELEN	31
 
@@ -80,6 +78,11 @@
 	int			cache_cflags;
 } umem_cache_t;
 
+/* Prototypes for functions to provide defaults for umem envvars */
+const char *_umem_debug_init(void);
+const char *_umem_options_init(void);
+const char *_umem_logging_init(void);
+
 static inline void *
 umem_alloc(size_t size, int flags)
 {
@@ -126,13 +129,13 @@
 }
 
 static inline void
-umem_free(void *ptr, size_t size)
+umem_free(void *ptr, size_t size __maybe_unused)
 {
 	free(ptr);
 }
 
 static inline void
-umem_nofail_callback(umem_nofail_callback_t *cb)
+umem_nofail_callback(umem_nofail_callback_t *cb __maybe_unused)
 {}
 
 static inline umem_cache_t *
@@ -145,7 +148,7 @@
 {
 	umem_cache_t *cp;
 
-	cp = umem_alloc(sizeof (umem_cache_t), UMEM_DEFAULT);
+	cp = (umem_cache_t *)umem_alloc(sizeof (umem_cache_t), UMEM_DEFAULT);
 	if (cp) {
 		strlcpy(cp->cache_name, name, UMEM_CACHE_NAMELEN);
 		cp->cache_bufsize = bufsize;
@@ -194,7 +197,7 @@
 }
 
 static inline void
-umem_cache_reap_now(umem_cache_t *cp)
+umem_cache_reap_now(umem_cache_t *cp __maybe_unused)
 {
 }
 

diff --git a/zfs/lib/libspl/include/util/Makefile.am b/zfs/lib/libspl/include/util/Makefile.am
index 060e143..ab553bc 100644
--- a/zfs/lib/libspl/include/util/Makefile.am
+++ b/zfs/lib/libspl/include/util/Makefile.am

@@ -1,3 +1,3 @@
 libspldir = $(includedir)/libspl
 libspl_HEADERS = \
-	$(top_srcdir)/lib/libspl/include/util/sscanf.h
+	sscanf.h

diff --git a/zfs/lib/libspl/include/zone.h b/zfs/lib/libspl/include/zone.h
index b4a6deb..b0ac2d9 100644
--- a/zfs/lib/libspl/include/zone.h
+++ b/zfs/lib/libspl/include/zone.h

@@ -26,25 +26,16 @@
 #ifndef _LIBSPL_ZONE_H
 #define	_LIBSPL_ZONE_H
 
-
-
 #include <sys/types.h>
 #include <sys/zone.h>
-#include <sys/priv.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #define	GLOBAL_ZONEID		0
-#define	GLOBAL_ZONEID_NAME	"global"
 
-/*
- * Functions for mapping between id and name for active zones.
- */
 extern zoneid_t		getzoneid(void);
-extern zoneid_t		getzoneidbyname(const char *);
-extern ssize_t		getzonenamebyid(zoneid_t, char *, size_t);
 
 #ifdef	__cplusplus
 }

diff --git a/zfs/lib/libspl/list.c b/zfs/lib/libspl/list.c
index b29dc8a..0f2f373 100644
--- a/zfs/lib/libspl/list.c
+++ b/zfs/lib/libspl/list.c

@@ -35,28 +35,28 @@
 
 #define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
 #define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
-#define	list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+#define	list_empty(a) ((a)->list_head.next == &(a)->list_head)
 
 #define	list_insert_after_node(list, node, object) {	\
 	list_node_t *lnew = list_d2l(list, object);	\
-	lnew->list_prev = (node);			\
-	lnew->list_next = (node)->list_next;		\
-	(node)->list_next->list_prev = lnew;		\
-	(node)->list_next = lnew;			\
+	lnew->prev = (node);			\
+	lnew->next = (node)->next;		\
+	(node)->next->prev = lnew;		\
+	(node)->next = lnew;			\
 }
 
 #define	list_insert_before_node(list, node, object) {	\
 	list_node_t *lnew = list_d2l(list, object);	\
-	lnew->list_next = (node);			\
-	lnew->list_prev = (node)->list_prev;		\
-	(node)->list_prev->list_next = lnew;		\
-	(node)->list_prev = lnew;			\
+	lnew->next = (node);			\
+	lnew->prev = (node)->prev;		\
+	(node)->prev->next = lnew;		\
+	(node)->prev = lnew;			\
 }
 
 #define	list_remove_node(node)					\
-	(node)->list_prev->list_next = (node)->list_next;	\
-	(node)->list_next->list_prev = (node)->list_prev;	\
-	(node)->list_next = (node)->list_prev = NULL
+	(node)->prev->next = (node)->next;	\
+	(node)->next->prev = (node)->prev;	\
+	(node)->next = (node)->prev = NULL
 
 void
 list_create(list_t *list, size_t size, size_t offset)
@@ -67,8 +67,7 @@
 
 	list->list_size = size;
 	list->list_offset = offset;
-	list->list_head.list_next = list->list_head.list_prev =
-	    &list->list_head;
+	list->list_head.next = list->list_head.prev = &list->list_head;
 }
 
 void
@@ -77,10 +76,10 @@
 	list_node_t *node = &list->list_head;
 
 	ASSERT(list);
-	ASSERT(list->list_head.list_next == node);
-	ASSERT(list->list_head.list_prev == node);
+	ASSERT(list->list_head.next == node);
+	ASSERT(list->list_head.prev == node);
 
-	node->list_next = node->list_prev = NULL;
+	node->next = node->prev = NULL;
 }
 
 void
@@ -124,14 +123,14 @@
 {
 	list_node_t *lold = list_d2l(list, object);
 	ASSERT(!list_empty(list));
-	ASSERT(lold->list_next != NULL);
+	ASSERT(lold->next != NULL);
 	list_remove_node(lold);
 }
 
 void *
 list_remove_head(list_t *list)
 {
-	list_node_t *head = list->list_head.list_next;
+	list_node_t *head = list->list_head.next;
 	if (head == &list->list_head)
 		return (NULL);
 	list_remove_node(head);
@@ -141,7 +140,7 @@
 void *
 list_remove_tail(list_t *list)
 {
-	list_node_t *tail = list->list_head.list_prev;
+	list_node_t *tail = list->list_head.prev;
 	if (tail == &list->list_head)
 		return (NULL);
 	list_remove_node(tail);
@@ -153,7 +152,7 @@
 {
 	if (list_empty(list))
 		return (NULL);
-	return (list_object(list, list->list_head.list_next));
+	return (list_object(list, list->list_head.next));
 }
 
 void *
@@ -161,7 +160,7 @@
 {
 	if (list_empty(list))
 		return (NULL);
-	return (list_object(list, list->list_head.list_prev));
+	return (list_object(list, list->list_head.prev));
 }
 
 void *
@@ -169,8 +168,8 @@
 {
 	list_node_t *node = list_d2l(list, object);
 
-	if (node->list_next != &list->list_head)
-		return (list_object(list, node->list_next));
+	if (node->next != &list->list_head)
+		return (list_object(list, node->next));
 
 	return (NULL);
 }
@@ -180,8 +179,8 @@
 {
 	list_node_t *node = list_d2l(list, object);
 
-	if (node->list_prev != &list->list_head)
-		return (list_object(list, node->list_prev));
+	if (node->prev != &list->list_head)
+		return (list_object(list, node->prev));
 
 	return (NULL);
 }
@@ -201,13 +200,13 @@
 	if (list_empty(src))
 		return;
 
-	dstnode->list_prev->list_next = srcnode->list_next;
-	srcnode->list_next->list_prev = dstnode->list_prev;
-	dstnode->list_prev = srcnode->list_prev;
-	srcnode->list_prev->list_next = dstnode;
+	dstnode->prev->next = srcnode->next;
+	srcnode->next->prev = dstnode->prev;
+	dstnode->prev = srcnode->prev;
+	srcnode->prev->next = dstnode;
 
 	/* empty src list */
-	srcnode->list_next = srcnode->list_prev = srcnode;
+	srcnode->next = srcnode->prev = srcnode;
 }
 
 void
@@ -216,24 +215,25 @@
 	ASSERT(list_link_active(lold));
 	ASSERT(!list_link_active(lnew));
 
-	lnew->list_next = lold->list_next;
-	lnew->list_prev = lold->list_prev;
-	lold->list_prev->list_next = lnew;
-	lold->list_next->list_prev = lnew;
-	lold->list_next = lold->list_prev = NULL;
+	lnew->next = lold->next;
+	lnew->prev = lold->prev;
+	lold->prev->next = lnew;
+	lold->next->prev = lnew;
+	lold->next = lold->prev = NULL;
 }
 
 void
 list_link_init(list_node_t *ln)
 {
-	ln->list_next = NULL;
-	ln->list_prev = NULL;
+	ln->next = NULL;
+	ln->prev = NULL;
 }
 
 int
 list_link_active(list_node_t *ln)
 {
-	return (ln->list_next != NULL);
+	EQUIV(ln->next == NULL, ln->prev == NULL);
+	return (ln->next != NULL);
 }
 
 int

diff --git a/zfs/lib/libspl/os/freebsd/getexecname.c b/zfs/lib/libspl/os/freebsd/getexecname.c
new file mode 100644
index 0000000..2b057cc
--- /dev/null
+++ b/zfs/lib/libspl/os/freebsd/getexecname.c

@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+const char *
+getexecname(void)
+{
+	static char execname[PATH_MAX + 1] = "";
+	static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
+	char *ptr = NULL;
+	ssize_t rc;
+
+	(void) pthread_mutex_lock(&mtx);
+
+	if (strlen(execname) == 0) {
+		int error, name[4];
+		size_t len;
+
+		name[0] = CTL_KERN;
+		name[1] = KERN_PROC;
+		name[2] = KERN_PROC_PATHNAME;
+		name[3] = -1;
+		len = PATH_MAX;
+		error = sysctl(name, nitems(name), execname, &len, NULL, 0);
+		if (error != 0) {
+			rc = -1;
+		} else {
+			rc = len;
+		}
+		if (rc == -1) {
+			execname[0] = '\0';
+		} else {
+			execname[rc] = '\0';
+			ptr = execname;
+		}
+	} else {
+		ptr = execname;
+	}
+
+	(void) pthread_mutex_unlock(&mtx);
+	return (ptr);
+}

diff --git a/zfs/lib/libspl/os/freebsd/gethostid.c b/zfs/lib/libspl/os/freebsd/gethostid.c
new file mode 100644
index 0000000..7bd567f
--- /dev/null
+++ b/zfs/lib/libspl/os/freebsd/gethostid.c

@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2017, Lawrence Livermore National Security, LLC.
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+
+unsigned long
+get_system_hostid(void)
+{
+	return (gethostid());
+}

diff --git a/zfs/lib/libspl/os/freebsd/getmntany.c b/zfs/lib/libspl/os/freebsd/getmntany.c
new file mode 100644
index 0000000..0ef2405
--- /dev/null
+++ b/zfs/lib/libspl/os/freebsd/getmntany.c

@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Ricardo Correia.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1988 AT&T	*/
+/*	  All Rights Reserved	*/
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/errno.h>
+#include <sys/mnttab.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+int
+getextmntent(const char *path, struct extmnttab *entry, struct stat64 *statbuf)
+{
+	struct statfs sfs;
+
+	if (strlen(path) >= MAXPATHLEN) {
+		(void) fprintf(stderr, "invalid object; pathname too long\n");
+		return (-1);
+	}
+
+	if (stat64(path, statbuf) != 0) {
+		(void) fprintf(stderr, "cannot open '%s': %s\n",
+		    path, strerror(errno));
+		return (-1);
+	}
+
+	if (statfs(path, &sfs) != 0) {
+		(void) fprintf(stderr, "%s: %s\n", path,
+		    strerror(errno));
+		return (-1);
+	}
+	statfs2mnttab(&sfs, (struct mnttab *)entry);
+	return (0);
+}

diff --git a/zfs/lib/libspl/os/freebsd/mnttab.c b/zfs/lib/libspl/os/freebsd/mnttab.c
new file mode 100644
index 0000000..a240ca7
--- /dev/null
+++ b/zfs/lib/libspl/os/freebsd/mnttab.c

@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file implements Solaris compatible getmntany() and hasmntopt()
+ * functions.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *
+mntopt(char **p)
+{
+	char *cp = *p;
+	char *retstr;
+
+	while (*cp && isspace(*cp))
+		cp++;
+
+	retstr = cp;
+	while (*cp && *cp != ',')
+		cp++;
+
+	if (*cp) {
+		*cp = '\0';
+		cp++;
+	}
+
+	*p = cp;
+	return (retstr);
+}
+
+char *
+hasmntopt(struct mnttab *mnt, char *opt)
+{
+	char tmpopts[MNT_LINE_MAX];
+	char *f, *opts = tmpopts;
+
+	if (mnt->mnt_mntopts == NULL)
+		return (NULL);
+	(void) strcpy(opts, mnt->mnt_mntopts);
+	f = mntopt(&opts);
+	for (; *f; f = mntopt(&opts)) {
+		if (strncmp(opt, f, strlen(opt)) == 0)
+			return (f - tmpopts + mnt->mnt_mntopts);
+	}
+	return (NULL);
+}
+
+static void
+optadd(char *mntopts, size_t size, const char *opt)
+{
+
+	if (mntopts[0] != '\0')
+		strlcat(mntopts, ",", size);
+	strlcat(mntopts, opt, size);
+}
+
+static __thread char gfstypename[MFSNAMELEN];
+static __thread char gmntfromname[MNAMELEN];
+static __thread char gmntonname[MNAMELEN];
+static __thread char gmntopts[MNTMAXSTR];
+
+void
+statfs2mnttab(struct statfs *sfs, struct mnttab *mp)
+{
+	long flags;
+
+	strlcpy(gfstypename, sfs->f_fstypename, sizeof (gfstypename));
+	mp->mnt_fstype = gfstypename;
+
+	strlcpy(gmntfromname, sfs->f_mntfromname, sizeof (gmntfromname));
+	mp->mnt_special = gmntfromname;
+
+	strlcpy(gmntonname, sfs->f_mntonname, sizeof (gmntonname));
+	mp->mnt_mountp = gmntonname;
+
+	flags = sfs->f_flags;
+	gmntopts[0] = '\0';
+#define	OPTADD(opt)	optadd(gmntopts, sizeof (gmntopts), (opt))
+	if (flags & MNT_RDONLY)
+		OPTADD(MNTOPT_RO);
+	else
+		OPTADD(MNTOPT_RW);
+	if (flags & MNT_NOSUID)
+		OPTADD(MNTOPT_NOSETUID);
+	else
+		OPTADD(MNTOPT_SETUID);
+	if (flags & MNT_UPDATE)
+		OPTADD(MNTOPT_REMOUNT);
+	if (flags & MNT_NOATIME)
+		OPTADD(MNTOPT_NOATIME);
+	else
+		OPTADD(MNTOPT_ATIME);
+	OPTADD(MNTOPT_NOXATTR);
+	if (flags & MNT_NOEXEC)
+		OPTADD(MNTOPT_NOEXEC);
+	else
+		OPTADD(MNTOPT_EXEC);
+#undef	OPTADD
+	mp->mnt_mntopts = gmntopts;
+}
+
+static pthread_rwlock_t gsfs_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct statfs *gsfs = NULL;
+static int allfs = 0;
+
+static int
+statfs_init(void)
+{
+	struct statfs *sfs;
+	int error;
+
+	(void) pthread_rwlock_wrlock(&gsfs_lock);
+
+	if (gsfs != NULL) {
+		free(gsfs);
+		gsfs = NULL;
+	}
+	allfs = getfsstat(NULL, 0, MNT_NOWAIT);
+	if (allfs == -1)
+		goto fail;
+	gsfs = malloc(sizeof (gsfs[0]) * allfs * 2);
+	if (gsfs == NULL)
+		goto fail;
+	allfs = getfsstat(gsfs, (long)(sizeof (gsfs[0]) * allfs * 2),
+	    MNT_NOWAIT);
+	if (allfs == -1)
+		goto fail;
+	sfs = realloc(gsfs, allfs * sizeof (gsfs[0]));
+	if (sfs != NULL)
+		gsfs = sfs;
+	(void) pthread_rwlock_unlock(&gsfs_lock);
+	return (0);
+fail:
+	error = errno;
+	if (gsfs != NULL)
+		free(gsfs);
+	gsfs = NULL;
+	allfs = 0;
+	(void) pthread_rwlock_unlock(&gsfs_lock);
+	return (error);
+}
+
+int
+getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp)
+{
+	int i, error;
+
+	error = statfs_init();
+	if (error != 0)
+		return (error);
+
+	(void) pthread_rwlock_rdlock(&gsfs_lock);
+
+	for (i = 0; i < allfs; i++) {
+		if (mrefp->mnt_special != NULL &&
+		    strcmp(mrefp->mnt_special, gsfs[i].f_mntfromname) != 0) {
+			continue;
+		}
+		if (mrefp->mnt_mountp != NULL &&
+		    strcmp(mrefp->mnt_mountp, gsfs[i].f_mntonname) != 0) {
+			continue;
+		}
+		if (mrefp->mnt_fstype != NULL &&
+		    strcmp(mrefp->mnt_fstype, gsfs[i].f_fstypename) != 0) {
+			continue;
+		}
+		statfs2mnttab(&gsfs[i], mgetp);
+		(void) pthread_rwlock_unlock(&gsfs_lock);
+		return (0);
+	}
+	(void) pthread_rwlock_unlock(&gsfs_lock);
+	return (-1);
+}
+
+int
+getmntent(FILE *fp, struct mnttab *mp)
+{
+	int error, nfs;
+
+	nfs = (int)lseek(fileno(fp), 0, SEEK_CUR);
+	if (nfs == -1)
+		return (errno);
+	/* If nfs is 0, we want to refresh out cache. */
+	if (nfs == 0 || gsfs == NULL) {
+		error = statfs_init();
+		if (error != 0)
+			return (error);
+	}
+	(void) pthread_rwlock_rdlock(&gsfs_lock);
+	if (nfs >= allfs) {
+		(void) pthread_rwlock_unlock(&gsfs_lock);
+		return (-1);
+	}
+	statfs2mnttab(&gsfs[nfs], mp);
+	(void) pthread_rwlock_unlock(&gsfs_lock);
+	if (lseek(fileno(fp), 1, SEEK_CUR) == -1)
+		return (errno);
+	return (0);
+}

diff --git a/zfs/lib/libspl/os/freebsd/zone.c b/zfs/lib/libspl/os/freebsd/zone.c
new file mode 100644
index 0000000..c07cb05
--- /dev/null
+++ b/zfs/lib/libspl/os/freebsd/zone.c

@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <zone.h>
+
+zoneid_t
+getzoneid(void)
+{
+	size_t size;
+	int jailid;
+
+	/* Information that we are in jail or not is enough for our needs. */
+	size = sizeof (jailid);
+	if (sysctlbyname("security.jail.jailed", &jailid, &size, NULL, 0) == -1)
+		assert(!"No security.jail.jailed sysctl!");
+	return ((zoneid_t)jailid);
+}

diff --git a/zfs/lib/libspl/os/linux/getexecname.c b/zfs/lib/libspl/os/linux/getexecname.c
new file mode 100644
index 0000000..6352a1a
--- /dev/null
+++ b/zfs/lib/libspl/os/linux/getexecname.c

@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <limits.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+const char *
+getexecname(void)
+{
+	static char execname[PATH_MAX + 1] = "";
+	static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
+	char *ptr = NULL;
+	ssize_t rc;
+
+	(void) pthread_mutex_lock(&mtx);
+
+	if (strlen(execname) == 0) {
+		rc = readlink("/proc/self/exe",
+		    execname, sizeof (execname) - 1);
+		if (rc == -1) {
+			execname[0] = '\0';
+		} else {
+			execname[rc] = '\0';
+			ptr = execname;
+		}
+	} else {
+		ptr = execname;
+	}
+
+	(void) pthread_mutex_unlock(&mtx);
+	return (ptr);
+}

diff --git a/zfs/lib/libspl/os/linux/gethostid.c b/zfs/lib/libspl/os/linux/gethostid.c
new file mode 100644
index 0000000..c04b7fd
--- /dev/null
+++ b/zfs/lib/libspl/os/linux/gethostid.c

@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2017, Lawrence Livermore National Security, LLC.
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+
+static unsigned long
+get_spl_hostid(void)
+{
+	FILE *f;
+	unsigned long hostid;
+	char *env;
+
+	/*
+	 * Allow the hostid to be subverted for testing.
+	 */
+	env = getenv("ZFS_HOSTID");
+	if (env)
+		return (strtoull(env, NULL, 0));
+
+	f = fopen("/proc/sys/kernel/spl/hostid", "re");
+	if (!f)
+		return (0);
+
+	if (fscanf(f, "%lx", &hostid) != 1)
+		hostid = 0;
+
+	fclose(f);
+
+	return (hostid);
+}
+
+unsigned long
+get_system_hostid(void)
+{
+	unsigned long hostid = get_spl_hostid();
+
+	/*
+	 * We do not use gethostid(3) because it can return a bogus ID,
+	 * depending on the libc and /etc/hostid presence,
+	 * and the kernel and userspace must agree.
+	 * See comments above hostid_read() in the SPL.
+	 */
+	if (hostid == 0) {
+		int fd = open("/etc/hostid", O_RDONLY | O_CLOEXEC);
+		if (fd >= 0) {
+			if (read(fd, &hostid, 4) < 0)
+				hostid = 0;
+			(void) close(fd);
+		}
+	}
+
+	return (hostid & HOSTID_MASK);
+}

diff --git a/zfs/lib/libspl/os/linux/getmntany.c b/zfs/lib/libspl/os/linux/getmntany.c
new file mode 100644
index 0000000..a27db60
--- /dev/null
+++ b/zfs/lib/libspl/os/linux/getmntany.c

@@ -0,0 +1,166 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Ricardo Correia.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1988 AT&T	*/
+/*	  All Rights Reserved	*/
+
+#include <stdio.h>
+#include <string.h>
+#include <mntent.h>
+#include <sys/errno.h>
+#include <sys/mnttab.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#define	BUFSIZE	(MNT_LINE_MAX + 2)
+
+__thread char buf[BUFSIZE];
+
+#define	DIFF(xx)	( \
+	    (mrefp->xx != NULL) && \
+	    (mgetp->xx == NULL || strcmp(mrefp->xx, mgetp->xx) != 0))
+
+int
+getmntany(FILE *fp, struct mnttab *mgetp, struct mnttab *mrefp)
+{
+	int ret;
+
+	while (
+	    ((ret = _sol_getmntent(fp, mgetp)) == 0) && (
+	    DIFF(mnt_special) || DIFF(mnt_mountp) ||
+	    DIFF(mnt_fstype) || DIFF(mnt_mntopts))) { }
+
+	return (ret);
+}
+
+int
+_sol_getmntent(FILE *fp, struct mnttab *mgetp)
+{
+	struct mntent mntbuf;
+	struct mntent *ret;
+
+	ret = getmntent_r(fp, &mntbuf, buf, BUFSIZE);
+
+	if (ret != NULL) {
+		mgetp->mnt_special = mntbuf.mnt_fsname;
+		mgetp->mnt_mountp = mntbuf.mnt_dir;
+		mgetp->mnt_fstype = mntbuf.mnt_type;
+		mgetp->mnt_mntopts = mntbuf.mnt_opts;
+		return (0);
+	}
+
+	if (feof(fp))
+		return (-1);
+
+	return (MNT_TOOLONG);
+}
+
+static int
+getextmntent_impl(FILE *fp, struct extmnttab *mp, int len)
+{
+	int ret;
+	struct stat64 st;
+
+	ret = _sol_getmntent(fp, (struct mnttab *)mp);
+	if (ret == 0) {
+		if (stat64(mp->mnt_mountp, &st) != 0) {
+			mp->mnt_major = 0;
+			mp->mnt_minor = 0;
+			return (ret);
+		}
+		mp->mnt_major = major(st.st_dev);
+		mp->mnt_minor = minor(st.st_dev);
+	}
+
+	return (ret);
+}
+
+int
+getextmntent(const char *path, struct extmnttab *entry, struct stat64 *statbuf)
+{
+	struct stat64 st;
+	FILE *fp;
+	int match;
+
+	if (strlen(path) >= MAXPATHLEN) {
+		(void) fprintf(stderr, "invalid object; pathname too long\n");
+		return (-1);
+	}
+
+	/*
+	 * Search for the path in /proc/self/mounts. Rather than looking for the
+	 * specific path, which can be fooled by non-standard paths (i.e. ".."
+	 * or "//"), we stat() the path and search for the corresponding
+	 * (major,minor) device pair.
+	 */
+	if (stat64(path, statbuf) != 0) {
+		(void) fprintf(stderr, "cannot open '%s': %s\n",
+		    path, strerror(errno));
+		return (-1);
+	}
+
+
+#ifdef HAVE_SETMNTENT
+	if ((fp = setmntent(MNTTAB, "re")) == NULL) {
+#else
+	if ((fp = fopen(MNTTAB, "re")) == NULL) {
+#endif
+		(void) fprintf(stderr, "cannot open %s\n", MNTTAB);
+		return (-1);
+	}
+
+	/*
+	 * Search for the given (major,minor) pair in the mount table.
+	 */
+
+	match = 0;
+	while (getextmntent_impl(fp, entry, sizeof (*entry)) == 0) {
+		if (makedev(entry->mnt_major, entry->mnt_minor) ==
+		    statbuf->st_dev) {
+			match = 1;
+			break;
+		}
+	}
+	(void) fclose(fp);
+
+	if (!match) {
+		(void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
+		    path);
+		return (-1);
+	}
+
+	if (stat64(entry->mnt_mountp, &st) != 0) {
+		entry->mnt_major = 0;
+		entry->mnt_minor = 0;
+		return (-1);
+	}
+
+	return (0);
+}

diff --git a/zfs/lib/libspl/os/linux/zone.c b/zfs/lib/libspl/os/linux/zone.c
new file mode 100644
index 0000000..393a16a
--- /dev/null
+++ b/zfs/lib/libspl/os/linux/zone.c

@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Ricardo Correia.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <zone.h>
+
+zoneid_t
+getzoneid(void)
+{
+	return (GLOBAL_ZONEID);
+}

diff --git a/zfs/lib/libspl/timestamp.c b/zfs/lib/libspl/timestamp.c
index eab15f3..22ecb39 100644
--- a/zfs/lib/libspl/timestamp.c
+++ b/zfs/lib/libspl/timestamp.c

@@ -51,7 +51,7 @@
 		fmt = nl_langinfo(_DATE_FMT);
 
 	if (timestamp_fmt == UDATE) {
-		(void) printf("%ld\n", t);
+		(void) printf("%lld\n", (longlong_t)t);
 	} else if (timestamp_fmt == DDATE) {
 		char dstr[64];
 		int len;

diff --git a/zfs/lib/libspl/zone.c b/zfs/lib/libspl/zone.c
deleted file mode 100644
index 4a0e600..0000000
--- a/zfs/lib/libspl/zone.c
+++ /dev/null

@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Ricardo Correia.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <zone.h>
-#include <string.h>
-#include <errno.h>
-
-int aok = 0;
-
-zoneid_t
-getzoneid()
-{
-	return (GLOBAL_ZONEID);
-}
-
-zoneid_t
-getzoneidbyname(const char *name)
-{
-	if (name == NULL)
-		return (GLOBAL_ZONEID);
-
-	if (strcmp(name, GLOBAL_ZONEID_NAME) == 0)
-		return (GLOBAL_ZONEID);
-
-	return (EINVAL);
-}
-
-ssize_t
-getzonenamebyid(zoneid_t id, char *buf, size_t buflen)
-{
-	if (id != GLOBAL_ZONEID)
-		return (EINVAL);
-
-	ssize_t ret = strlen(GLOBAL_ZONEID_NAME) + 1;
-
-	if (buf == NULL || buflen == 0)
-		return (ret);
-
-	strncpy(buf, GLOBAL_ZONEID_NAME, buflen);
-	buf[buflen - 1] = '\0';
-
-	return (ret);
-}

diff --git a/zfs/lib/libtpool/Makefile.am b/zfs/lib/libtpool/Makefile.am
index 586eec2..ce9d03a 100644
--- a/zfs/lib/libtpool/Makefile.am
+++ b/zfs/lib/libtpool/Makefile.am

@@ -1,8 +1,10 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61118
+AM_CFLAGS += $(NO_CLOBBERED)
+
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 noinst_LTLIBRARIES = libtpool.la
 
@@ -10,11 +12,6 @@
 	thread_pool.c \
 	thread_pool_impl.h
 
-nodist_libtpool_la_SOURCES = $(USER_C)
+libtpool_la_SOURCES = $(USER_C)
 
-libtpool_la_LIBADD = \
-	$(top_builddir)/lib/libspl/libspl.la
-
-libtpool_la_LDFLAGS = -pthread
-
-EXTRA_DIST = $(USER_C)
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libtpool/thread_pool.c b/zfs/lib/libtpool/thread_pool.c
index 267fa83..892beef 100644
--- a/zfs/lib/libtpool/thread_pool.c
+++ b/zfs/lib/libtpool/thread_pool.c

@@ -597,56 +597,3 @@
 	pthread_mutex_unlock(&tpool->tp_mutex);
 	return (0);
 }
-
-void
-postfork1_child_tpool(void)
-{
-	pthread_t my_tid = pthread_self();
-	tpool_t *tpool;
-	tpool_job_t *job;
-
-	/*
-	 * All of the thread pool workers are gone, except possibly
-	 * for the current thread, if it is a thread pool worker thread.
-	 * Retain the thread pools, but make them all empty.  Whatever
-	 * jobs were queued or running belong to the parent process.
-	 */
-top:
-	if ((tpool = thread_pools) == NULL)
-		return;
-
-	do {
-		tpool_active_t *activep;
-
-		(void) pthread_mutex_init(&tpool->tp_mutex, NULL);
-		(void) pthread_cond_init(&tpool->tp_busycv, NULL);
-		(void) pthread_cond_init(&tpool->tp_workcv, NULL);
-		(void) pthread_cond_init(&tpool->tp_waitcv, NULL);
-		for (job = tpool->tp_head; job; job = tpool->tp_head) {
-			tpool->tp_head = job->tpj_next;
-			free(job);
-		}
-		tpool->tp_tail = NULL;
-		tpool->tp_njobs = 0;
-		for (activep = tpool->tp_active; activep;
-		    activep = activep->tpa_next) {
-			if (activep->tpa_tid == my_tid) {
-				activep->tpa_next = NULL;
-				break;
-			}
-		}
-		tpool->tp_idle = 0;
-		tpool->tp_current = 0;
-		if ((tpool->tp_active = activep) != NULL)
-			tpool->tp_current = 1;
-		tpool->tp_flags &= ~TP_WAIT;
-		if (tpool->tp_flags & (TP_DESTROY | TP_ABANDON)) {
-			tpool->tp_flags &= ~TP_DESTROY;
-			tpool->tp_flags |= TP_ABANDON;
-			if (tpool->tp_current == 0) {
-				delete_pool(tpool);
-				goto top;	/* start over */
-			}
-		}
-	} while ((tpool = tpool->tp_forw) != thread_pools);
-}

diff --git a/zfs/lib/libunicode/Makefile.am b/zfs/lib/libunicode/Makefile.am
index 0a4734c..5b12b3e 100644
--- a/zfs/lib/libunicode/Makefile.am
+++ b/zfs/lib/libunicode/Makefile.am

@@ -5,20 +5,16 @@
 # Includes kernel code, generate warnings for large stack frames
 AM_CFLAGS += $(FRAME_LARGER_THAN)
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 noinst_LTLIBRARIES = libunicode.la
 
-USER_C =
-
 KERNEL_C = \
 	u8_textprep.c \
 	uconv.c
 
 nodist_libunicode_la_SOURCES = \
-	$(USER_C) \
 	$(KERNEL_C)
 
-EXTRA_DIST = $(USER_C)
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libuutil/Makefile.am b/zfs/lib/libuutil/Makefile.am
index 09eef79..05b7ed0 100644
--- a/zfs/lib/libuutil/Makefile.am
+++ b/zfs/lib/libuutil/Makefile.am

@@ -1,11 +1,12 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 lib_LTLIBRARIES = libuutil.la
 
+include $(top_srcdir)/config/Abigail.am
+
 USER_C = \
 	uu_alloc.c \
 	uu_avl.c \
@@ -17,12 +18,23 @@
 	uu_pname.c \
 	uu_string.c
 
-nodist_libuutil_la_SOURCES = $(USER_C)
+libuutil_la_SOURCES = $(USER_C)
 
 libuutil_la_LIBADD = \
-	$(top_builddir)/lib/libavl/libavl.la \
-	$(top_builddir)/lib/libspl/libspl.la
+	$(abs_top_builddir)/lib/libavl/libavl.la \
+	$(abs_top_builddir)/lib/libspl/libspl.la
 
-libuutil_la_LDFLAGS = -pthread -version-info 1:1:0
+libuutil_la_LIBADD += $(LTLIBINTL)
 
-EXTRA_DIST = $(USER_C)
+libuutil_la_LDFLAGS = -pthread
+
+if !ASAN_ENABLED
+libuutil_la_LDFLAGS += -Wl,-z,defs
+endif
+
+libuutil_la_LDFLAGS += -version-info 3:0:0
+
+include $(top_srcdir)/config/CppCheck.am
+
+# Library ABI
+EXTRA_DIST = libuutil.abi libuutil.suppr

diff --git a/zfs/lib/libuutil/libuutil.abi b/zfs/lib/libuutil/libuutil.abi
new file mode 100644
index 0000000..21418ec
--- /dev/null
+++ b/zfs/lib/libuutil/libuutil.abi

@@ -0,0 +1,2276 @@
+<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libuutil.so.3'>
+  <elf-needed>
+    <dependency name='libc.so.6'/>
+    <dependency name='ld-linux-x86-64.so.2'/>
+  </elf-needed>
+  <elf-function-symbols>
+    <elf-symbol name='_sol_getmntent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_char' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_char_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_int_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_long' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_long_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_ptr_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_short' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_short_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_clear_long_excl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_set_long_excl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_char' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_char_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_int_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_long' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_long_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_ptr_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_short' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_short_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_add' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_destroy_nodes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_find' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_first' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_insert' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_insert_here' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_is_empty' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_last' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_nearest' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_numnodes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_swap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_update' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_update_gt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_update_lt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_walk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getextmntent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getmntany' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getzoneid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libspl_assertf' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_head' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_after' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_before' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_head' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_is_empty' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_link_active' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_link_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_link_replace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_move_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_prev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_remove_head' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_remove_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_consumer' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_enter' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_exit' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_producer' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='mkdirp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='print_timestamp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='spl_pagesize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='strlcat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='strlcpy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_alt_exit' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_find' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_first' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_insert' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_last' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_lockup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_nearest_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_nearest_prev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_node_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_node_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_numnodes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_pool_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_pool_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_prev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_release' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_teardown' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_walk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_walk_end' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_walk_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_avl_walk_start' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_check_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_die' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_dprintf' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_dprintf_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_dprintf_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_dprintf_getname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_dump' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_exit_fatal' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_exit_ok' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_exit_usage' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_getpname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_find' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_first' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_insert' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_insert_after' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_insert_before' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_last' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_lockup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_nearest_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_nearest_prev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_node_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_node_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_numnodes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_pool_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_pool_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_prev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_release' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_teardown' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_walk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_walk_end' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_walk_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_list_walk_start' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_memdup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_msprintf' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_open_tmp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_panic' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_set_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_setpname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_strbw' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_strcaseeq' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_strdup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_streq' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_strerror' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_strndup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_vdie' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_vwarn' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_vxdie' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_warn' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_xdie' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_zalloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-function-symbols>
+  <elf-variable-symbols>
+    <elf-symbol name='aok' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='buf' size='4110' type='tls-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='pagesize' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_exit_fatal_value' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_exit_ok_value' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='uu_exit_usage_value' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-variable-symbols>
+  <abi-instr address-size='64' path='../../module/avl/avl.c' language='LANG_C99'>
+    <function-decl name='libspl_assertf' mangled-name='libspl_assertf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libspl_assertf'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_insert_here' mangled-name='avl_insert_here' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_insert_here'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <parameter type-id='eaa32e2f' name='new_data'/>
+      <parameter type-id='eaa32e2f' name='here'/>
+      <parameter type-id='95e97e5e' name='direction'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_add' mangled-name='avl_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_add'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <parameter type-id='eaa32e2f' name='new_node'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_update_lt' mangled-name='avl_update_lt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_update_lt'>
+      <parameter type-id='a3681dea' name='t'/>
+      <parameter type-id='eaa32e2f' name='obj'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='avl_update_gt' mangled-name='avl_update_gt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_update_gt'>
+      <parameter type-id='a3681dea' name='t'/>
+      <parameter type-id='eaa32e2f' name='obj'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='avl_update' mangled-name='avl_update' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_update'>
+      <parameter type-id='a3681dea' name='t'/>
+      <parameter type-id='eaa32e2f' name='obj'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='avl_swap' mangled-name='avl_swap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_swap'>
+      <parameter type-id='a3681dea' name='tree1'/>
+      <parameter type-id='a3681dea' name='tree2'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_is_empty' mangled-name='avl_is_empty' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_is_empty'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='assert.c' language='LANG_C99'>
+    <var-decl name='aok' type-id='95e97e5e' mangled-name='aok' visibility='default' elf-symbol-id='aok'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='atomic.c' language='LANG_C99'>
+    <typedef-decl name='int16_t' type-id='03896e23' id='23bd8cb5'/>
+    <typedef-decl name='int32_t' type-id='33f57a65' id='3ff5601b'/>
+    <typedef-decl name='uint16_t' type-id='253c2d2a' id='149c6638'/>
+    <typedef-decl name='__int16_t' type-id='a2185560' id='03896e23'/>
+    <typedef-decl name='__uint16_t' type-id='8efea9e5' id='253c2d2a'/>
+    <typedef-decl name='__int32_t' type-id='95e97e5e' id='33f57a65'/>
+    <typedef-decl name='__ssize_t' type-id='bd54fe1a' id='41060289'/>
+    <typedef-decl name='ssize_t' type-id='41060289' id='79a0948f'/>
+    <qualified-type-def type-id='149c6638' volatile='yes' id='5120c5f7'/>
+    <pointer-type-def type-id='5120c5f7' size-in-bits='64' id='93977ae7'/>
+    <qualified-type-def type-id='8f92235e' volatile='yes' id='430e0681'/>
+    <pointer-type-def type-id='430e0681' size-in-bits='64' id='3a147f31'/>
+    <qualified-type-def type-id='b96825af' volatile='yes' id='84ff7d66'/>
+    <pointer-type-def type-id='84ff7d66' size-in-bits='64' id='aa323ea4'/>
+    <qualified-type-def type-id='ee1f298e' volatile='yes' id='6f7e09cb'/>
+    <pointer-type-def type-id='6f7e09cb' size-in-bits='64' id='64698d33'/>
+    <qualified-type-def type-id='48b5725f' volatile='yes' id='b0b3cbf9'/>
+    <pointer-type-def type-id='b0b3cbf9' size-in-bits='64' id='fe09dd29'/>
+    <function-decl name='atomic_inc_8' mangled-name='atomic_inc_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_16' mangled-name='atomic_inc_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_32' mangled-name='atomic_inc_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_ulong' mangled-name='atomic_inc_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_8' mangled-name='atomic_dec_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_16' mangled-name='atomic_dec_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_32' mangled-name='atomic_dec_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_ulong' mangled-name='atomic_dec_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_8' mangled-name='atomic_add_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_16' mangled-name='atomic_add_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_32' mangled-name='atomic_add_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_ptr' mangled-name='atomic_add_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_8' mangled-name='atomic_sub_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_16' mangled-name='atomic_sub_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_32' mangled-name='atomic_sub_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_ptr' mangled-name='atomic_sub_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_8' mangled-name='atomic_or_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_16' mangled-name='atomic_or_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_32' mangled-name='atomic_or_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_ulong' mangled-name='atomic_or_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_8' mangled-name='atomic_and_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_16' mangled-name='atomic_and_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_32' mangled-name='atomic_and_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_ulong' mangled-name='atomic_and_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_8_nv' mangled-name='atomic_inc_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_inc_16_nv' mangled-name='atomic_inc_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_inc_32_nv' mangled-name='atomic_inc_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_inc_ulong_nv' mangled-name='atomic_inc_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_dec_8_nv' mangled-name='atomic_dec_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_dec_16_nv' mangled-name='atomic_dec_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_dec_32_nv' mangled-name='atomic_dec_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_dec_ulong_nv' mangled-name='atomic_dec_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_add_8_nv' mangled-name='atomic_add_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_add_16_nv' mangled-name='atomic_add_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_add_32_nv' mangled-name='atomic_add_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_add_long_nv' mangled-name='atomic_add_long_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_long_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='bd54fe1a' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_add_ptr_nv' mangled-name='atomic_add_ptr_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_ptr_nv'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_8_nv' mangled-name='atomic_sub_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_sub_16_nv' mangled-name='atomic_sub_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_sub_32_nv' mangled-name='atomic_sub_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_sub_long_nv' mangled-name='atomic_sub_long_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_long_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='bd54fe1a' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_sub_ptr_nv' mangled-name='atomic_sub_ptr_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_ptr_nv'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_or_8_nv' mangled-name='atomic_or_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_or_16_nv' mangled-name='atomic_or_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_or_32_nv' mangled-name='atomic_or_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_or_ulong_nv' mangled-name='atomic_or_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_and_8_nv' mangled-name='atomic_and_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_and_16_nv' mangled-name='atomic_and_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_and_32_nv' mangled-name='atomic_and_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_and_ulong_nv' mangled-name='atomic_and_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_cas_8' mangled-name='atomic_cas_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='exp'/>
+      <parameter type-id='b96825af' name='des'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_cas_16' mangled-name='atomic_cas_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='exp'/>
+      <parameter type-id='149c6638' name='des'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_cas_32' mangled-name='atomic_cas_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='exp'/>
+      <parameter type-id='8f92235e' name='des'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_cas_ulong' mangled-name='atomic_cas_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='exp'/>
+      <parameter type-id='ee1f298e' name='des'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_cas_ptr' mangled-name='atomic_cas_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='eaa32e2f' name='exp'/>
+      <parameter type-id='eaa32e2f' name='des'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_swap_8' mangled-name='atomic_swap_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_swap_16' mangled-name='atomic_swap_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_swap_32' mangled-name='atomic_swap_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_swap_ulong' mangled-name='atomic_swap_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_swap_ptr' mangled-name='atomic_swap_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='eaa32e2f' name='bits'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_set_long_excl' mangled-name='atomic_set_long_excl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_set_long_excl'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='3502e3ff' name='value'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='atomic_clear_long_excl' mangled-name='atomic_clear_long_excl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_clear_long_excl'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='3502e3ff' name='value'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='membar_enter' mangled-name='membar_enter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='membar_enter'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='membar_producer' mangled-name='membar_producer' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='membar_producer'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='membar_consumer' mangled-name='membar_consumer' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='membar_consumer'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='list.c' language='LANG_C99'>
+    <typedef-decl name='list_node_t' type-id='b0b5e45e' id='b21843b2'/>
+    <typedef-decl name='list_t' type-id='e824dae9' id='0899125f'/>
+    <class-decl name='list_node' size-in-bits='128' is-struct='yes' visibility='default' id='b0b5e45e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='next' type-id='b03eadb4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='prev' type-id='b03eadb4' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='list' size-in-bits='256' is-struct='yes' visibility='default' id='e824dae9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='list_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='list_offset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='list_head' type-id='b0b5e45e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='b0b5e45e' size-in-bits='64' id='b03eadb4'/>
+    <pointer-type-def type-id='b21843b2' size-in-bits='64' id='ccc38265'/>
+    <pointer-type-def type-id='0899125f' size-in-bits='64' id='352ec160'/>
+    <function-decl name='list_create' mangled-name='list_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_create'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <parameter type-id='b59d7dce' name='offset'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_destroy' mangled-name='list_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_destroy'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_after' mangled-name='list_insert_after' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_after'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <parameter type-id='eaa32e2f' name='nobject'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_before' mangled-name='list_insert_before' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_before'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <parameter type-id='eaa32e2f' name='nobject'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_head' mangled-name='list_insert_head' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_head'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_tail' mangled-name='list_insert_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_tail'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_remove' mangled-name='list_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_remove'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_remove_head' mangled-name='list_remove_head' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_remove_head'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_remove_tail' mangled-name='list_remove_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_remove_tail'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_head' mangled-name='list_head' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_head'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_tail' mangled-name='list_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_tail'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_next' mangled-name='list_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_next'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_prev' mangled-name='list_prev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_prev'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_move_tail' mangled-name='list_move_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_move_tail'>
+      <parameter type-id='352ec160' name='dst'/>
+      <parameter type-id='352ec160' name='src'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_link_replace' mangled-name='list_link_replace' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_link_replace'>
+      <parameter type-id='ccc38265' name='lold'/>
+      <parameter type-id='ccc38265' name='lnew'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_link_init' mangled-name='list_link_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_link_init'>
+      <parameter type-id='ccc38265' name='ln'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_link_active' mangled-name='list_link_active' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_link_active'>
+      <parameter type-id='ccc38265' name='ln'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='list_is_empty' mangled-name='list_is_empty' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_is_empty'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='mkdirp.c' language='LANG_C99'>
+    <typedef-decl name='__mode_t' type-id='f0981eeb' id='e1c52942'/>
+    <typedef-decl name='mode_t' type-id='e1c52942' id='d50d396c'/>
+    <typedef-decl name='wchar_t' type-id='95e97e5e' id='928221d2'/>
+    <qualified-type-def type-id='26a90f95' restrict='yes' id='266fe297'/>
+    <qualified-type-def type-id='928221d2' const='yes' id='effb3702'/>
+    <pointer-type-def type-id='effb3702' size-in-bits='64' id='f077d3f8'/>
+    <qualified-type-def type-id='f077d3f8' restrict='yes' id='598aab80'/>
+    <pointer-type-def type-id='928221d2' size-in-bits='64' id='323d93c1'/>
+    <qualified-type-def type-id='323d93c1' restrict='yes' id='f1358bc3'/>
+    <function-decl name='mkdirp' mangled-name='mkdirp' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='mkdirp'>
+      <parameter type-id='80f4b756' name='d'/>
+      <parameter type-id='d50d396c' name='mode'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='calloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='mbstowcs' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f1358bc3'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='wcstombs' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='598aab80'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='mkdir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='e1c52942'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/getexecname.c' language='LANG_C99'>
+    <function-decl name='readlink' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/gethostid.c' language='LANG_C99'>
+    <type-decl name='long long unsigned int' size-in-bits='64' id='3a47d82b'/>
+    <pointer-type-def type-id='26a90f95' size-in-bits='64' id='9b23c9ad'/>
+    <qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/>
+    <function-decl name='fclose' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtoull' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='3a47d82b'/>
+    </function-decl>
+    <function-decl name='getenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='close' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='read' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='get_system_hostid' mangled-name='get_system_hostid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='get_system_hostid'>
+      <return type-id='7359adad'/>
+    </function-decl>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/getmntany.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='03085adc' size-in-bits='192' id='083f8d58'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32880' id='ad756b7f'>
+      <subrange length='4110' type-id='7359adad' id='8aa676f7'/>
+    </array-type-def>
+    <class-decl name='mnttab' size-in-bits='256' is-struct='yes' visibility='default' id='1b055409'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_mountp' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_fstype' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='extmnttab' size-in-bits='320' is-struct='yes' visibility='default' id='0c544dc0'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_mountp' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_fstype' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='mnt_major' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='mnt_minor' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='stat64' size-in-bits='1152' is-struct='yes' visibility='default' id='0bbec9cd'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='st_dev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='st_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='st_nlink' type-id='80f0b9df' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='st_mode' type-id='e1c52942' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='st_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='st_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='__pad0' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='st_rdev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='st_size' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='st_blksize' type-id='d3f10a7f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='st_blocks' type-id='4e711bf1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='st_atim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='st_mtim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='st_ctim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='__glibc_reserved' type-id='083f8d58' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__dev_t' type-id='7359adad' id='35ed8932'/>
+    <typedef-decl name='__uid_t' type-id='f0981eeb' id='cc5fcceb'/>
+    <typedef-decl name='__gid_t' type-id='f0981eeb' id='d94ec6d9'/>
+    <typedef-decl name='__ino64_t' type-id='7359adad' id='71288a47'/>
+    <typedef-decl name='__nlink_t' type-id='7359adad' id='80f0b9df'/>
+    <typedef-decl name='__blksize_t' type-id='bd54fe1a' id='d3f10a7f'/>
+    <typedef-decl name='__blkcnt64_t' type-id='bd54fe1a' id='4e711bf1'/>
+    <class-decl name='mntent' size-in-bits='320' is-struct='yes' visibility='default' id='56fe4a37'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_fsname' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_dir' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_type' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_opts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='mnt_freq' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='mnt_passno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='0c544dc0' size-in-bits='64' id='394fc496'/>
+    <pointer-type-def type-id='56fe4a37' size-in-bits='64' id='b6b61d2f'/>
+    <qualified-type-def type-id='b6b61d2f' restrict='yes' id='3cad23cd'/>
+    <pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/>
+    <pointer-type-def type-id='0bbec9cd' size-in-bits='64' id='62f7a03d'/>
+    <qualified-type-def type-id='62f7a03d' restrict='yes' id='f1cadedf'/>
+    <function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='3cad23cd'/>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='b6b61d2f'/>
+    </function-decl>
+    <function-decl name='feof' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='stat64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='f1cadedf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <var-decl name='buf' type-id='ad756b7f' mangled-name='buf' visibility='default' elf-symbol-id='buf'/>
+    <function-decl name='getmntany' mangled-name='getmntany' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getmntany'>
+      <parameter type-id='822cd80b' name='fp'/>
+      <parameter type-id='9d424d31' name='mgetp'/>
+      <parameter type-id='9d424d31' name='mrefp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='_sol_getmntent' mangled-name='_sol_getmntent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='_sol_getmntent'>
+      <parameter type-id='822cd80b' name='fp'/>
+      <parameter type-id='9d424d31' name='mgetp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='getextmntent' mangled-name='getextmntent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getextmntent'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='394fc496' name='entry'/>
+      <parameter type-id='62f7a03d' name='statbuf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
+    <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+    <function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
+      <return type-id='4da03624'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='page.c' language='LANG_C99'>
+    <var-decl name='pagesize' type-id='b59d7dce' mangled-name='pagesize' visibility='default' elf-symbol-id='pagesize'/>
+    <function-decl name='spl_pagesize' mangled-name='spl_pagesize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='spl_pagesize'>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='sysconf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='bd54fe1a'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='strlcat.c' language='LANG_C99'>
+    <function-decl name='strlcat' mangled-name='strlcat' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='strlcat'>
+      <parameter type-id='26a90f95' name='dst'/>
+      <parameter type-id='80f4b756' name='src'/>
+      <parameter type-id='b59d7dce' name='dstsize'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='timestamp.c' language='LANG_C99'>
+    <class-decl name='tm' size-in-bits='448' is-struct='yes' visibility='default' id='dddf6ca2'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tm_sec' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='tm_min' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tm_hour' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='tm_mday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='tm_mon' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='tm_year' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='tm_wday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='tm_yday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='tm_isdst' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='tm_gmtoff' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='tm_zone' type-id='80f4b756' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='time_t' type-id='65eda9c0' id='c9d12d66'/>
+    <typedef-decl name='nl_item' type-id='95e97e5e' id='03b79a94'/>
+    <qualified-type-def type-id='c9d12d66' const='yes' id='588b3216'/>
+    <pointer-type-def type-id='588b3216' size-in-bits='64' id='9f201474'/>
+    <qualified-type-def type-id='dddf6ca2' const='yes' id='e824a34f'/>
+    <pointer-type-def type-id='e824a34f' size-in-bits='64' id='d6ad37ff'/>
+    <qualified-type-def type-id='d6ad37ff' restrict='yes' id='f8c6051d'/>
+    <pointer-type-def type-id='c9d12d66' size-in-bits='64' id='b2eb2c3f'/>
+    <pointer-type-def type-id='dddf6ca2' size-in-bits='64' id='d915a820'/>
+    <function-decl name='print_timestamp' mangled-name='print_timestamp' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='print_timestamp'>
+      <parameter type-id='3502e3ff' name='timestamp_fmt'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nl_langinfo' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='03b79a94'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='printf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='time' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b2eb2c3f'/>
+      <return type-id='c9d12d66'/>
+    </function-decl>
+    <function-decl name='strftime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='f8c6051d'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='localtime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9f201474'/>
+      <return type-id='d915a820'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_alloc.c' language='LANG_C99'>
+    <type-decl name='char' size-in-bits='8' id='a84c031d'/>
+    <class-decl name='__va_list_tag' size-in-bits='192' is-struct='yes' visibility='default' id='d5027220'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='gp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='fp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='overflow_arg_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='reg_save_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <type-decl name='int' size-in-bits='32' id='95e97e5e'/>
+    <type-decl name='unsigned int' size-in-bits='32' id='f0981eeb'/>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='variadic parameter type' id='2c1145c5'/>
+    <type-decl name='void' id='48b5725f'/>
+    <typedef-decl name='uint_t' type-id='f0981eeb' id='3502e3ff'/>
+    <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
+    <pointer-type-def type-id='d5027220' size-in-bits='64' id='b7f2d5e6'/>
+    <pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
+    <qualified-type-def type-id='a84c031d' const='yes' id='9b45d938'/>
+    <pointer-type-def type-id='9b45d938' size-in-bits='64' id='80f4b756'/>
+    <pointer-type-def type-id='48b5725f' size-in-bits='64' id='eaa32e2f'/>
+    <function-decl name='uu_set_error' mangled-name='uu_set_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_set_error'>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_zalloc' mangled-name='uu_zalloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_zalloc'>
+      <parameter type-id='b59d7dce' name='n'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_free' mangled-name='uu_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_free'>
+      <parameter type-id='eaa32e2f' name='p'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_strdup' mangled-name='uu_strdup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_strdup'>
+      <parameter type-id='80f4b756' name='str'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='uu_strndup' mangled-name='uu_strndup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_strndup'>
+      <parameter type-id='80f4b756' name='s'/>
+      <parameter type-id='b59d7dce' name='n'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='uu_memdup' mangled-name='uu_memdup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_memdup'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='b59d7dce' name='sz'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_msprintf' mangled-name='uu_msprintf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_msprintf'>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='vsnprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='malloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='memcpy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='memset' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='strlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='strnlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_avl.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='bf311473' size-in-bits='128' id='f0f65199'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='320' id='36c46961'>
+      <subrange length='40' type-id='7359adad' id='8f80b239'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32' id='8e0573fd'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='512' id='59daf3ef'>
+      <subrange length='64' type-id='7359adad' id='b10be967'/>
+    </array-type-def>
+    <type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
+    <type-decl name='short int' size-in-bits='16' id='a2185560'/>
+    <type-decl name='signed char' size-in-bits='8' id='28577a57'/>
+    <array-type-def dimensions='1' type-id='e475ab95' size-in-bits='192' id='0ce65a8b'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <type-decl name='unsigned char' size-in-bits='8' id='002ac4a6'/>
+    <typedef-decl name='uu_compare_fn_t' type-id='add6e811' id='40f93560'/>
+    <typedef-decl name='uu_walk_fn_t' type-id='96ee24a5' id='9d1aa0dc'/>
+    <typedef-decl name='uu_avl_pool_t' type-id='12a530a8' id='7f84e390'/>
+    <typedef-decl name='uu_avl_t' type-id='4af029d1' id='bb7f0973'/>
+    <class-decl name='uu_avl_node' size-in-bits='192' is-struct='yes' visibility='default' id='f65f4326'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uan_opaque' type-id='0ce65a8b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='uu_avl_node_t' type-id='f65f4326' id='73a65116'/>
+    <typedef-decl name='uu_avl_walk_t' type-id='e70a39e3' id='edd8457b'/>
+    <typedef-decl name='uu_avl_index_t' type-id='e475ab95' id='5d7f5fc8'/>
+    <class-decl name='uu_avl_walk' size-in-bits='320' is-struct='yes' visibility='default' id='e70a39e3'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uaw_next' type-id='5842d146' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='uaw_prev' type-id='5842d146' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='uaw_avl' type-id='a5c21a38' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='uaw_next_result' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='uaw_dir' type-id='ee31ee44' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='264'>
+        <var-decl name='uaw_robust' type-id='b96825af' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='uu_avl' size-in-bits='960' is-struct='yes' visibility='default' id='4af029d1'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ua_next_enc' type-id='e475ab95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='ua_prev_enc' type-id='e475ab95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='ua_pool' type-id='de82c773' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='ua_parent_enc' type-id='e475ab95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='ua_debug' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='264'>
+        <var-decl name='ua_index' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='ua_tree' type-id='b351119f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='ua_null_walk' type-id='edd8457b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='uu_avl_pool' size-in-bits='2176' is-struct='yes' visibility='default' id='12a530a8'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uap_next' type-id='de82c773' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='uap_prev' type-id='de82c773' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='uap_name' type-id='59daf3ef' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='uap_nodeoffset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='uap_objsize' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='uap_cmp' type-id='d502b39f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='uap_debug' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='840'>
+        <var-decl name='uap_last_index' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='uap_lock' type-id='7a6844eb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='uap_null_avl' type-id='bb7f0973' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='avl_tree_t' type-id='b351119f' id='f20fbd51'/>
+    <typedef-decl name='avl_index_t' type-id='e475ab95' id='fba6cb51'/>
+    <class-decl name='avl_node' size-in-bits='192' is-struct='yes' visibility='default' id='428b67b3'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='avl_child' type-id='f0f65199' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='avl_pcb' type-id='e475ab95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='avl_tree' size-in-bits='320' is-struct='yes' visibility='default' id='b351119f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='avl_root' type-id='bf311473' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='avl_compar' type-id='585e1de9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='avl_offset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='avl_numnodes' type-id='ee1f298e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='avl_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ulong_t' type-id='7359adad' id='ee1f298e'/>
+    <union-decl name='pthread_mutexattr_t' size-in-bits='32' naming-typedef-id='8afd6070' visibility='default' id='7300eb00'>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='8e0573fd' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_mutexattr_t' type-id='7300eb00' id='8afd6070'/>
+    <union-decl name='pthread_mutex_t' size-in-bits='320' naming-typedef-id='7a6844eb' visibility='default' id='70681f9b'>
+      <data-member access='public'>
+        <var-decl name='__data' type-id='4c734837' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='36c46961' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_mutex_t' type-id='70681f9b' id='7a6844eb'/>
+    <typedef-decl name='int8_t' type-id='2171a512' id='ee31ee44'/>
+    <typedef-decl name='uint8_t' type-id='c51d6389' id='b96825af'/>
+    <typedef-decl name='uint32_t' type-id='62f1140c' id='8f92235e'/>
+    <class-decl name='__pthread_mutex_s' size-in-bits='320' is-struct='yes' visibility='default' id='4c734837'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__lock' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='__count' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='__owner' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='__nusers' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='__kind' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='__spins' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='176'>
+        <var-decl name='__elision' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='__list' type-id='518fb49c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__pthread_internal_list' size-in-bits='128' is-struct='yes' visibility='default' id='0e01899c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__prev' type-id='4d98cd5a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='__next' type-id='4d98cd5a' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__pthread_list_t' type-id='0e01899c' id='518fb49c'/>
+    <typedef-decl name='__int8_t' type-id='28577a57' id='2171a512'/>
+    <typedef-decl name='__uint8_t' type-id='002ac4a6' id='c51d6389'/>
+    <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
+    <typedef-decl name='uintptr_t' type-id='7359adad' id='e475ab95'/>
+    <pointer-type-def type-id='0e01899c' size-in-bits='64' id='4d98cd5a'/>
+    <pointer-type-def type-id='fba6cb51' size-in-bits='64' id='32adbf30'/>
+    <pointer-type-def type-id='428b67b3' size-in-bits='64' id='bf311473'/>
+    <pointer-type-def type-id='b351119f' size-in-bits='64' id='716943c7'/>
+    <pointer-type-def type-id='f20fbd51' size-in-bits='64' id='a3681dea'/>
+    <qualified-type-def type-id='8afd6070' const='yes' id='1d853360'/>
+    <pointer-type-def type-id='1d853360' size-in-bits='64' id='c2afbd7e'/>
+    <pointer-type-def type-id='96ee24a5' size-in-bits='64' id='585e1de9'/>
+    <pointer-type-def type-id='7a6844eb' size-in-bits='64' id='18c91f9e'/>
+    <pointer-type-def type-id='5d7f5fc8' size-in-bits='64' id='813a2225'/>
+    <pointer-type-def type-id='73a65116' size-in-bits='64' id='2dc35b9d'/>
+    <pointer-type-def type-id='7f84e390' size-in-bits='64' id='de82c773'/>
+    <pointer-type-def type-id='bb7f0973' size-in-bits='64' id='a5c21a38'/>
+    <pointer-type-def type-id='edd8457b' size-in-bits='64' id='5842d146'/>
+    <pointer-type-def type-id='40f93560' size-in-bits='64' id='d502b39f'/>
+    <pointer-type-def type-id='9d1aa0dc' size-in-bits='64' id='30a42b6d'/>
+    <pointer-type-def type-id='eaa32e2f' size-in-bits='64' id='63e171df'/>
+    <function-decl name='uu_check_name' mangled-name='uu_check_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_check_name'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='uu_panic' mangled-name='uu_panic' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_panic'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_create' mangled-name='avl_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_create'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='585e1de9'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_find' mangled-name='avl_find' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_find'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='32adbf30'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_insert' mangled-name='avl_insert' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_insert'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='fba6cb51'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_first' mangled-name='avl_first' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_first'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_last' mangled-name='avl_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_last'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_nearest' mangled-name='avl_nearest' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_nearest'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='fba6cb51'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_remove' mangled-name='avl_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_remove'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_numnodes' mangled-name='avl_numnodes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_numnodes'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='avl_destroy_nodes' mangled-name='avl_destroy_nodes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_destroy_nodes'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='63e171df'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_destroy' mangled-name='avl_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_destroy'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_walk' mangled-name='avl_walk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_walk'>
+      <parameter type-id='716943c7'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='strlcpy' mangled-name='strlcpy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='strlcpy'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='uu_avl_pool_create' mangled-name='uu_avl_pool_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_pool_create'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='b59d7dce' name='objsize'/>
+      <parameter type-id='b59d7dce' name='nodeoffset'/>
+      <parameter type-id='d502b39f' name='compare_func'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='de82c773'/>
+    </function-decl>
+    <function-decl name='uu_avl_pool_destroy' mangled-name='uu_avl_pool_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_pool_destroy'>
+      <parameter type-id='de82c773' name='pp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_node_init' mangled-name='uu_avl_node_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_node_init'>
+      <parameter type-id='eaa32e2f' name='base'/>
+      <parameter type-id='2dc35b9d' name='np'/>
+      <parameter type-id='de82c773' name='pp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_node_fini' mangled-name='uu_avl_node_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_node_fini'>
+      <parameter type-id='eaa32e2f' name='base'/>
+      <parameter type-id='2dc35b9d' name='np'/>
+      <parameter type-id='de82c773' name='pp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_create' mangled-name='uu_avl_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_create'>
+      <parameter type-id='de82c773' name='pp'/>
+      <parameter type-id='eaa32e2f' name='parent'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='a5c21a38'/>
+    </function-decl>
+    <function-decl name='uu_avl_destroy' mangled-name='uu_avl_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_destroy'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_numnodes' mangled-name='uu_avl_numnodes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_numnodes'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='uu_avl_first' mangled-name='uu_avl_first' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_first'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_last' mangled-name='uu_avl_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_last'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_next' mangled-name='uu_avl_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_next'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='eaa32e2f' name='node'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_prev' mangled-name='uu_avl_prev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_prev'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='eaa32e2f' name='node'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk_start' mangled-name='uu_avl_walk_start' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_walk_start'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='5842d146'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk_next' mangled-name='uu_avl_walk_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_walk_next'>
+      <parameter type-id='5842d146' name='wp'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk_end' mangled-name='uu_avl_walk_end' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_walk_end'>
+      <parameter type-id='5842d146' name='wp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk' mangled-name='uu_avl_walk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_walk'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='30a42b6d' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='uu_avl_remove' mangled-name='uu_avl_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_remove'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_teardown' mangled-name='uu_avl_teardown' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_teardown'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='63e171df' name='cookie'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_find' mangled-name='uu_avl_find' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_find'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <parameter type-id='813a2225' name='out'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_insert' mangled-name='uu_avl_insert' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_insert'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <parameter type-id='5d7f5fc8' name='idx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_nearest_next' mangled-name='uu_avl_nearest_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_nearest_next'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='5d7f5fc8' name='idx'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_nearest_prev' mangled-name='uu_avl_nearest_prev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_nearest_prev'>
+      <parameter type-id='a5c21a38' name='ap'/>
+      <parameter type-id='5d7f5fc8' name='idx'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_lockup' mangled-name='uu_avl_lockup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_lockup'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_release' mangled-name='uu_avl_release' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_avl_release'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <parameter type-id='c2afbd7e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_lock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_unlock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='96ee24a5'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='add6e811'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_dprintf.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8' id='89feb1ec'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='160' id='664ac0b7'>
+      <subrange length='20' type-id='7359adad' id='fdca39cf'/>
+    </array-type-def>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+    <typedef-decl name='uu_dprintf_t' type-id='0538fe4f' id='2367d595'/>
+    <enum-decl name='uu_dprintf_severity_t' naming-typedef-id='ceb5296f' id='c43e6e7b'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='UU_DPRINTF_SILENT' value='0'/>
+      <enumerator name='UU_DPRINTF_FATAL' value='1'/>
+      <enumerator name='UU_DPRINTF_WARNING' value='2'/>
+      <enumerator name='UU_DPRINTF_NOTICE' value='3'/>
+      <enumerator name='UU_DPRINTF_INFO' value='4'/>
+      <enumerator name='UU_DPRINTF_DEBUG' value='5'/>
+    </enum-decl>
+    <typedef-decl name='uu_dprintf_severity_t' type-id='c43e6e7b' id='ceb5296f'/>
+    <class-decl name='uu_dprintf' size-in-bits='128' is-struct='yes' visibility='default' id='0538fe4f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uud_name' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='uud_severity' type-id='ceb5296f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='uud_flags' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
+    <typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
+    <typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
+    <typedef-decl name='_IO_lock_t' type-id='48b5725f' id='bb4788fa'/>
+    <class-decl name='_IO_FILE' size-in-bits='1728' is-struct='yes' visibility='default' id='ec1ed955'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_IO_read_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_IO_read_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='_IO_read_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='_IO_write_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='_IO_write_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='_IO_write_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='_IO_buf_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='_IO_buf_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='_IO_save_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='_IO_backup_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='_IO_save_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='_markers' type-id='e4c6fa61' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='_chain' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='_fileno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='_flags2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='_old_offset' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='_cur_column' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1040'>
+        <var-decl name='_vtable_offset' type-id='28577a57' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1048'>
+        <var-decl name='_shortbuf' type-id='89feb1ec' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='_lock' type-id='cecf4ea7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='_offset' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='_codecvt' type-id='570f8c59' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1280'>
+        <var-decl name='_wide_data' type-id='c65a1f29' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1344'>
+        <var-decl name='_freeres_list' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1408'>
+        <var-decl name='_freeres_buf' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1472'>
+        <var-decl name='__pad5' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1536'>
+        <var-decl name='_mode' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1568'>
+        <var-decl name='_unused2' type-id='664ac0b7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='aa12d1ba' size-in-bits='64' id='822cd80b'/>
+    <qualified-type-def type-id='822cd80b' restrict='yes' id='e75a27e9'/>
+    <pointer-type-def type-id='ec1ed955' size-in-bits='64' id='dca988a5'/>
+    <pointer-type-def type-id='a4036571' size-in-bits='64' id='570f8c59'/>
+    <pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
+    <pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
+    <pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
+    <qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/>
+    <pointer-type-def type-id='2367d595' size-in-bits='64' id='ed73b5ca'/>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <function-decl name='uu_dprintf_create' mangled-name='uu_dprintf_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_dprintf_create'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='ceb5296f' name='severity'/>
+      <parameter type-id='3502e3ff' name='flags'/>
+      <return type-id='ed73b5ca'/>
+    </function-decl>
+    <function-decl name='uu_dprintf' mangled-name='uu_dprintf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_dprintf'>
+      <parameter type-id='ed73b5ca' name='D'/>
+      <parameter type-id='ceb5296f' name='severity'/>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_dprintf_destroy' mangled-name='uu_dprintf_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_dprintf_destroy'>
+      <parameter type-id='ed73b5ca' name='D'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_dprintf_getname' mangled-name='uu_dprintf_getname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_dprintf_getname'>
+      <parameter type-id='ed73b5ca' name='D'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='dcgettext' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='vfprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strdup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_ident.c' language='LANG_C99'>
+    <function-decl name='strchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_list.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='e475ab95' size-in-bits='128' id='d0e9cdae'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <typedef-decl name='uu_list_pool_t' type-id='55168cab' id='38a2549d'/>
+    <typedef-decl name='uu_list_t' type-id='1d04bdf0' id='82e88484'/>
+    <class-decl name='uu_list_node' size-in-bits='128' is-struct='yes' visibility='default' id='f8f3cec5'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uln_opaque' type-id='d0e9cdae' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='uu_list_node_t' type-id='f8f3cec5' id='c4dc472f'/>
+    <typedef-decl name='uu_list_walk_t' type-id='b80e3208' id='9fed32d2'/>
+    <typedef-decl name='uu_list_index_t' type-id='e475ab95' id='f0dd35ff'/>
+    <class-decl name='uu_list_node_impl' size-in-bits='128' is-struct='yes' visibility='default' id='700a795c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uln_next' type-id='5af1298a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='uln_prev' type-id='5af1298a' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='uu_list_node_impl_t' type-id='700a795c' id='8e5864b0'/>
+    <class-decl name='uu_list_walk' size-in-bits='320' is-struct='yes' visibility='default' id='b80e3208'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ulw_next' type-id='4d848103' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='ulw_prev' type-id='4d848103' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='ulw_list' type-id='0c0b229b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='ulw_dir' type-id='ee31ee44' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='200'>
+        <var-decl name='ulw_robust' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='ulw_next_result' type-id='a085247f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='uu_list' size-in-bits='896' is-struct='yes' visibility='default' id='1d04bdf0'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ul_next_enc' type-id='e475ab95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='ul_prev_enc' type-id='e475ab95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='ul_pool' type-id='0941e04e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='ul_parent_enc' type-id='e475ab95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='ul_offset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='ul_numnodes' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='ul_debug' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='392'>
+        <var-decl name='ul_sorted' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='400'>
+        <var-decl name='ul_index' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='ul_null_node' type-id='8e5864b0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='ul_null_walk' type-id='9fed32d2' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='uu_list_pool' size-in-bits='2112' is-struct='yes' visibility='default' id='55168cab'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ulp_next' type-id='0941e04e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='ulp_prev' type-id='0941e04e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='ulp_name' type-id='59daf3ef' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='ulp_nodeoffset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='ulp_objsize' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='ulp_cmp' type-id='d502b39f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='ulp_debug' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='840'>
+        <var-decl name='ulp_last_index' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='ulp_lock' type-id='7a6844eb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='ulp_null_list' type-id='82e88484' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='f0dd35ff' size-in-bits='64' id='ecbc0046'/>
+    <pointer-type-def type-id='700a795c' size-in-bits='64' id='5af1298a'/>
+    <pointer-type-def type-id='8e5864b0' size-in-bits='64' id='a085247f'/>
+    <pointer-type-def type-id='c4dc472f' size-in-bits='64' id='dbe143f4'/>
+    <pointer-type-def type-id='38a2549d' size-in-bits='64' id='0941e04e'/>
+    <pointer-type-def type-id='82e88484' size-in-bits='64' id='0c0b229b'/>
+    <pointer-type-def type-id='9fed32d2' size-in-bits='64' id='4d848103'/>
+    <function-decl name='uu_list_pool_create' mangled-name='uu_list_pool_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_pool_create'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='b59d7dce' name='objsize'/>
+      <parameter type-id='b59d7dce' name='nodeoffset'/>
+      <parameter type-id='d502b39f' name='compare_func'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='0941e04e'/>
+    </function-decl>
+    <function-decl name='uu_list_pool_destroy' mangled-name='uu_list_pool_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_pool_destroy'>
+      <parameter type-id='0941e04e' name='pp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_node_init' mangled-name='uu_list_node_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_node_init'>
+      <parameter type-id='eaa32e2f' name='base'/>
+      <parameter type-id='dbe143f4' name='np_arg'/>
+      <parameter type-id='0941e04e' name='pp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_node_fini' mangled-name='uu_list_node_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_node_fini'>
+      <parameter type-id='eaa32e2f' name='base'/>
+      <parameter type-id='dbe143f4' name='np_arg'/>
+      <parameter type-id='0941e04e' name='pp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_create' mangled-name='uu_list_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_create'>
+      <parameter type-id='0941e04e' name='pp'/>
+      <parameter type-id='eaa32e2f' name='parent'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='0c0b229b'/>
+    </function-decl>
+    <function-decl name='uu_list_destroy' mangled-name='uu_list_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_destroy'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_insert' mangled-name='uu_list_insert' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_insert'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <parameter type-id='f0dd35ff' name='idx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_find' mangled-name='uu_list_find' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_find'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <parameter type-id='ecbc0046' name='out'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_nearest_next' mangled-name='uu_list_nearest_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_nearest_next'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='f0dd35ff' name='idx'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_nearest_prev' mangled-name='uu_list_nearest_prev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_nearest_prev'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='f0dd35ff' name='idx'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_walk_start' mangled-name='uu_list_walk_start' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_walk_start'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='4d848103'/>
+    </function-decl>
+    <function-decl name='uu_list_walk_next' mangled-name='uu_list_walk_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_walk_next'>
+      <parameter type-id='4d848103' name='wp'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_walk_end' mangled-name='uu_list_walk_end' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_walk_end'>
+      <parameter type-id='4d848103' name='wp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_walk' mangled-name='uu_list_walk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_walk'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='30a42b6d' name='func'/>
+      <parameter type-id='eaa32e2f' name='private'/>
+      <parameter type-id='8f92235e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='uu_list_remove' mangled-name='uu_list_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_remove'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_teardown' mangled-name='uu_list_teardown' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_teardown'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='63e171df' name='cookie'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_insert_before' mangled-name='uu_list_insert_before' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_insert_before'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='target'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='uu_list_insert_after' mangled-name='uu_list_insert_after' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_insert_after'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='target'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='uu_list_numnodes' mangled-name='uu_list_numnodes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_numnodes'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='uu_list_first' mangled-name='uu_list_first' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_first'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_last' mangled-name='uu_list_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_last'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_next' mangled-name='uu_list_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_next'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_prev' mangled-name='uu_list_prev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_prev'>
+      <parameter type-id='0c0b229b' name='lp'/>
+      <parameter type-id='eaa32e2f' name='elem'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_list_lockup' mangled-name='uu_list_lockup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_lockup'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_list_release' mangled-name='uu_list_release' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_list_release'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_misc.c' language='LANG_C99'>
+    <typedef-decl name='pthread_t' type-id='7359adad' id='4051f5e7'/>
+    <typedef-decl name='pthread_key_t' type-id='f0981eeb' id='2de5383b'/>
+    <qualified-type-def type-id='8efea9e5' const='yes' id='3beb2af4'/>
+    <pointer-type-def type-id='3beb2af4' size-in-bits='64' id='31347b7a'/>
+    <pointer-type-def type-id='31347b7a' size-in-bits='64' id='c59e1ef0'/>
+    <pointer-type-def type-id='95e97e5e' size-in-bits='64' id='7292109c'/>
+    <pointer-type-def type-id='2de5383b' size-in-bits='64' id='ce04b822'/>
+    <pointer-type-def type-id='ee076206' size-in-bits='64' id='953b12f8'/>
+    <pointer-type-def type-id='c5c76c9c' size-in-bits='64' id='b7f9d8e6'/>
+    <function-decl name='uu_error' mangled-name='uu_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_error'>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='uu_strerror' mangled-name='uu_strerror' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_strerror'>
+      <parameter type-id='8f92235e' name='code'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='uu_dump' mangled-name='uu_dump' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_dump'>
+      <parameter type-id='822cd80b' name='out'/>
+      <parameter type-id='80f4b756' name='prefix'/>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='b59d7dce' name='len'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='__ctype_b_loc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='c59e1ef0'/>
+    </function-decl>
+    <function-decl name='__errno_location' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='7292109c'/>
+    </function-decl>
+    <function-decl name='pthread_self' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='4051f5e7'/>
+    </function-decl>
+    <function-decl name='pthread_key_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='ce04b822'/>
+      <parameter type-id='b7f9d8e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_getspecific' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='2de5383b'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='pthread_setspecific' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='2de5383b'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_atfork' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='953b12f8'/>
+      <parameter type-id='953b12f8'/>
+      <parameter type-id='953b12f8'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='abort' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='pause' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='ee076206'>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c5c76c9c'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_open.c' language='LANG_C99'>
+    <typedef-decl name='__time_t' type-id='bd54fe1a' id='65eda9c0'/>
+    <typedef-decl name='__clockid_t' type-id='95e97e5e' id='08f9a87a'/>
+    <typedef-decl name='__syscall_slong_t' type-id='bd54fe1a' id='03085adc'/>
+    <typedef-decl name='clockid_t' type-id='08f9a87a' id='a1c3b834'/>
+    <class-decl name='timespec' size-in-bits='128' is-struct='yes' visibility='default' id='a9c79a1f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tv_sec' type-id='65eda9c0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tv_nsec' type-id='03085adc' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='a9c79a1f' size-in-bits='64' id='3d83ba87'/>
+    <function-decl name='uu_open_tmp' mangled-name='uu_open_tmp' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_open_tmp'>
+      <parameter type-id='80f4b756' name='dir'/>
+      <parameter type-id='3502e3ff' name='uflags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='snprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='clock_gettime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a1c3b834'/>
+      <parameter type-id='3d83ba87'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='unlink' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_pname.c' language='LANG_C99'>
+    <function-decl name='getexecname' mangled-name='getexecname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getexecname'>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <var-decl name='uu_exit_ok_value' type-id='95e97e5e' mangled-name='uu_exit_ok_value' visibility='default' elf-symbol-id='uu_exit_ok_value'/>
+    <var-decl name='uu_exit_fatal_value' type-id='95e97e5e' mangled-name='uu_exit_fatal_value' visibility='default' elf-symbol-id='uu_exit_fatal_value'/>
+    <var-decl name='uu_exit_usage_value' type-id='95e97e5e' mangled-name='uu_exit_usage_value' visibility='default' elf-symbol-id='uu_exit_usage_value'/>
+    <function-decl name='uu_exit_ok' mangled-name='uu_exit_ok' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_exit_ok'>
+      <return type-id='7292109c'/>
+    </function-decl>
+    <function-decl name='uu_exit_fatal' mangled-name='uu_exit_fatal' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_exit_fatal'>
+      <return type-id='7292109c'/>
+    </function-decl>
+    <function-decl name='uu_exit_usage' mangled-name='uu_exit_usage' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_exit_usage'>
+      <return type-id='7292109c'/>
+    </function-decl>
+    <function-decl name='uu_alt_exit' mangled-name='uu_alt_exit' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_alt_exit'>
+      <parameter type-id='95e97e5e' name='profile'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_vwarn' mangled-name='uu_vwarn' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_vwarn'>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter type-id='b7f2d5e6' name='alist'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_warn' mangled-name='uu_warn' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_warn'>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_vdie' mangled-name='uu_vdie' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_vdie'>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter type-id='b7f2d5e6' name='alist'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_die' mangled-name='uu_die' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_die'>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_vxdie' mangled-name='uu_vxdie' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_vxdie'>
+      <parameter type-id='95e97e5e' name='status'/>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter type-id='b7f2d5e6' name='alist'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_xdie' mangled-name='uu_xdie' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_xdie'>
+      <parameter type-id='95e97e5e' name='status'/>
+      <parameter type-id='80f4b756' name='format'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_setpname' mangled-name='uu_setpname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_setpname'>
+      <parameter type-id='26a90f95' name='arg0'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='uu_getpname' mangled-name='uu_getpname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_getpname'>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='exit' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strrchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strerror' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='uu_string.c' language='LANG_C99'>
+    <enum-decl name='boolean_t' naming-typedef-id='c19b74c3' id='f58c8277'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='B_FALSE' value='0'/>
+      <enumerator name='B_TRUE' value='1'/>
+    </enum-decl>
+    <typedef-decl name='boolean_t' type-id='f58c8277' id='c19b74c3'/>
+    <function-decl name='uu_streq' mangled-name='uu_streq' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_streq'>
+      <parameter type-id='80f4b756' name='a'/>
+      <parameter type-id='80f4b756' name='b'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='uu_strcaseeq' mangled-name='uu_strcaseeq' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_strcaseeq'>
+      <parameter type-id='80f4b756' name='a'/>
+      <parameter type-id='80f4b756' name='b'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='uu_strbw' mangled-name='uu_strbw' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_strbw'>
+      <parameter type-id='80f4b756' name='a'/>
+      <parameter type-id='80f4b756' name='b'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='strcmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strncmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strcasecmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+</abi-corpus>

diff --git a/zfs/lib/libuutil/libuutil.suppr b/zfs/lib/libuutil/libuutil.suppr
new file mode 100644
index 0000000..f4db8a4
--- /dev/null
+++ b/zfs/lib/libuutil/libuutil.suppr

@@ -0,0 +1,2 @@
+[suppress_type]
+	name = FILE*

diff --git a/zfs/lib/libuutil/uu_avl.c b/zfs/lib/libuutil/uu_avl.c
index 0400088..53def0e 100644
--- a/zfs/lib/libuutil/uu_avl.c
+++ b/zfs/lib/libuutil/uu_avl.c

@@ -128,6 +128,7 @@
 	pp->uap_next->uap_prev = pp->uap_prev;
 	pp->uap_prev->uap_next = pp->uap_next;
 	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+	(void) pthread_mutex_destroy(&pp->uap_lock);
 	pp->uap_prev = NULL;
 	pp->uap_next = NULL;
 	uu_free(pp);

diff --git a/zfs/lib/libuutil/uu_open.c b/zfs/lib/libuutil/uu_open.c
index cf5c545..7311775 100644
--- a/zfs/lib/libuutil/uu_open.c
+++ b/zfs/lib/libuutil/uu_open.c

@@ -36,12 +36,6 @@
 #include <stdio.h>
 #include <unistd.h>
 
-#ifdef _LP64
-#define	TMPPATHFMT	"%s/uu%ld"
-#else /* _LP64 */
-#define	TMPPATHFMT	"%s/uu%lld"
-#endif /* _LP64 */
-
 /*ARGSUSED*/
 int
 uu_open_tmp(const char *dir, uint_t uflags)
@@ -55,7 +49,7 @@
 	for (;;) {
 		(void) snprintf(fname, PATH_MAX, "%s/uu%lld", dir, gethrtime());
 
-		f = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600);
+		f = open(fname, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0600);
 
 		if (f >= 0 || errno != EEXIST)
 			break;

diff --git a/zfs/lib/libuutil/uu_string.c b/zfs/lib/libuutil/uu_string.c
index 66afba0..67024c3 100644
--- a/zfs/lib/libuutil/uu_string.c
+++ b/zfs/lib/libuutil/uu_string.c

@@ -29,8 +29,6 @@
 
 #include <string.h>
 #include <sys/types.h>
-#include <stdio.h>
-#include <malloc.h>
 #include <ctype.h>
 #include "libuutil.h"
 

diff --git a/zfs/lib/libzfs/.gitignore b/zfs/lib/libzfs/.gitignore
index d719bc1..9336a5c 100644
--- a/zfs/lib/libzfs/.gitignore
+++ b/zfs/lib/libzfs/.gitignore

@@ -1,2 +1 @@
 /libzfs.pc
-/libzfs_core.pc

diff --git a/zfs/lib/libzfs/Makefile.am b/zfs/lib/libzfs/Makefile.am
index c131843..77e12b9 100644
--- a/zfs/lib/libzfs/Makefile.am
+++ b/zfs/lib/libzfs/Makefile.am

@@ -6,18 +6,17 @@
 	$(top_srcdir)/lib/libzfs
 
 # Suppress unused but set variable warnings often due to ASSERTs
-AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
+AM_CFLAGS += $(LIBCRYPTO_CFLAGS) $(ZLIB_CFLAGS)
 
-libzfs_pcdir = $(libdir)/pkgconfig
-libzfs_pc_DATA = libzfs.pc libzfs_core.pc
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/module/icp/include \
-	-I$(top_srcdir)/lib/libspl/include
+pkgconfig_DATA = libzfs.pc
 
 lib_LTLIBRARIES = libzfs.la
 
+include $(top_srcdir)/config/Abigail.am
+
 USER_C = \
 	libzfs_changelist.c \
 	libzfs_config.c \
@@ -32,8 +31,25 @@
 	libzfs_status.c \
 	libzfs_util.c
 
+
+if BUILD_FREEBSD
+USER_C += \
+	os/freebsd/libzfs_compat.c \
+	os/freebsd/libzfs_ioctl_compat.c \
+	os/freebsd/libzfs_zmount.c
+endif
+
+if BUILD_LINUX
+USER_C += \
+	os/linux/libzfs_mount_os.c \
+	os/linux/libzfs_pool_os.c \
+	os/linux/libzfs_sendrecv_os.c \
+	os/linux/libzfs_util_os.c
+endif
+
 KERNEL_C = \
 	algs/sha2/sha2.c \
+	cityhash.c \
 	zfeature_common.c \
 	zfs_comutil.c \
 	zfs_deleg.c \
@@ -46,25 +62,39 @@
 	zfs_fletcher_superscalar4.c \
 	zfs_namecheck.c \
 	zfs_prop.c \
-	zfs_uio.c \
 	zpool_prop.c \
 	zprop_common.c
 
+dist_libzfs_la_SOURCES = \
+	$(USER_C)
+
 nodist_libzfs_la_SOURCES = \
-	$(USER_C) \
 	$(KERNEL_C)
 
 libzfs_la_LIBADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libshare/libshare.la \
-	$(top_builddir)/lib/libuutil/libuutil.la \
-	$(top_builddir)/lib/libzfs_core/libzfs_core.la \
-	$(top_builddir)/lib/libzutil/libzutil.la
+	$(abs_top_builddir)/lib/libshare/libshare.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libuutil/libuutil.la
 
-libzfs_la_LIBADD += -lm $(LIBSSL)
-libzfs_la_LDFLAGS = -version-info 2:0:0
+libzfs_la_LIBADD += -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
 
-EXTRA_DIST = $(libzfs_pc_DATA) $(USER_C)
+libzfs_la_LDFLAGS = -pthread
+
+if !ASAN_ENABLED
+libzfs_la_LDFLAGS += -Wl,-z,defs
+endif
+
+if BUILD_FREEBSD
+libzfs_la_LIBADD += -lutil -lgeom
+endif
+
+libzfs_la_LDFLAGS += -version-info 5:0:1
+
+include $(top_srcdir)/config/CppCheck.am
+
+# Library ABI
+EXTRA_DIST = libzfs.abi libzfs.suppr
 
 # Licensing data
 EXTRA_DIST += THIRDPARTYLICENSE.openssl THIRDPARTYLICENSE.openssl.descrip

diff --git a/zfs/lib/libzfs/libzfs.abi b/zfs/lib/libzfs/libzfs.abi
new file mode 100644
index 0000000..58c2d76
--- /dev/null
+++ b/zfs/lib/libzfs/libzfs.abi

@@ -0,0 +1,7606 @@
+<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.4'>
+  <elf-needed>
+    <dependency name='libzfs_core.so.3'/>
+    <dependency name='libuuid.so.1'/>
+    <dependency name='libblkid.so.1'/>
+    <dependency name='libudev.so.1'/>
+    <dependency name='libnvpair.so.3'/>
+    <dependency name='libtirpc.so.3'/>
+    <dependency name='libuutil.so.3'/>
+    <dependency name='libm.so.6'/>
+    <dependency name='libcrypto.so.1.1'/>
+    <dependency name='libz.so.1'/>
+    <dependency name='libc.so.6'/>
+  </elf-needed>
+  <elf-function-symbols>
+    <elf-symbol name='SHA256Init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='SHA2Final' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='SHA2Init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='SHA2Update' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='SHA384Init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='SHA512Init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='bookmark_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_gather' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_haszonedchild' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_postfix' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_prefix' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_rename' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='changelist_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='cityhash4' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='color_end' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='color_start' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='create_parents' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='dataset_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='dataset_nestcheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='do_mount' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='do_unmount' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='entity_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='find_shares_object' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_2_byteswap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_2_incremental_byteswap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_2_incremental_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_2_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_byteswap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_impl_set' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_incremental_byteswap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_incremental_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_native_varsize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='get_dataset_depth' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getprop_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='is_mounted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='is_shared' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='isa_child_of' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libshare_nfs_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libshare_smb_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_add_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_envvar_is_set' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_errno' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_error_action' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_error_description' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_error_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_free_str_array' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_load_module' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_mnttab_add' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_mnttab_cache' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_mnttab_find' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_mnttab_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_mnttab_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_mnttab_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_print_on_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_run_process' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_run_process_get_stdout' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_run_process_get_stdout_nopath' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_set_pipe_max' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='make_bookmark_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='make_dataset_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='make_dataset_handle_zc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='make_dataset_simple_handle_zc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='mountpoint_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='namespace_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='no_memory' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='permset_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='pool_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='printf_color' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='register_fstype' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='remove_mountpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_commit_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_disable_share' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_enable_share' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_errorstr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_is_shared' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_validate_shareopts' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='snapshot_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='unshare_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='use_color' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zcmd_alloc_dst_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zcmd_expand_dst_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zcmd_free_nvlists' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zcmd_read_dst_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zcmd_write_conf_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zcmd_write_src_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfeature_depends_on' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfeature_is_supported' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfeature_is_valid_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfeature_lookup_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfeature_lookup_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_adjust_mount_options' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_alloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_allocatable_devs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_asprintf' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_bookmark_exists' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_clone' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_close' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_commit_all_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_commit_nfs_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_commit_proto' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_commit_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_commit_smb_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_component_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_create_ancestors' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_attempt_load_keys' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_clone_check' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_get_encryption_root' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_load_key' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_rewrap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_crypto_unload_key' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_dataset_exists' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_dataset_name_hidden' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_deleg_canonicalize_perm' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_deleg_verify_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_deleg_whokey' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_destroy_snaps' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_destroy_snaps_nvl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_error_aux' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_error_fmt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_expand_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_foreach_mountpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_all_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_clones_nvl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_fsacl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_holds' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_pool_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_pool_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_recvd_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_user_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_handle_dup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_hold' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_hold_nvl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_ioctl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_is_mountable' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_is_mounted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_is_shared' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_is_shared_nfs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_is_shared_proto' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_is_shared_smb' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_bookmarks' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_children' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_dependents' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_filesystems' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_mounted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_root' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_snapshots' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_snapshots_sorted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_iter_snapspec' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_mod_supported' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_mount' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_mount_at' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_mount_delegation_check' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_name_to_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_name_valid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_nicestrtonum' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_parent_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_parse_mount_options' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_parse_options' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_path_to_zhandle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_promote' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_default_numeric' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_default_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_delegatable' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_encryption_key_param' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_numeric' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_recvd' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_table' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_userquota' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_userquota_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_written' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_get_written_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_index_to_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_inherit' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_inheritable' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_is_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_random_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_readonly' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_set' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_set_list' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_setonce' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_string_to_index' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_user' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_userquota' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_valid_for_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_valid_keylocation' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_values' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_visible' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prop_written' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_prune_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_realloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_receive' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_refresh_properties' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_release' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_rename' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_rollback' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_save_arguments' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_send' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_send_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_send_progress' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_send_resume' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_send_resume_token_to_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_send_saved' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_set_fsacl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_setprop_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_share' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_share_nfs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_share_proto' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_share_smb' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_shareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_show_diffs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_smb_acl_add' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_smb_acl_purge' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_smb_acl_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_smb_acl_rename' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_snapshot' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_snapshot_nvl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_spa_version' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_spa_version_map' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_special_devs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_standard_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_standard_error_fmt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_strdup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_type_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unmount' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshare_nfs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshare_proto' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshare_smb' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshareall_bypath' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshareall_bytype' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshareall_nfs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_unshareall_smb' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_validate_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_version_print' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_version_userland' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_zpl_version_map' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_add' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_clear_label' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_close' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_disable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' alias='zpool_unmount_datasets' is-defined='yes'/>
+    <elf-symbol name='zpool_discard_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_enable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' alias='zpool_mount_datasets' is-defined='yes'/>
+    <elf-symbol name='zpool_events_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_events_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_events_seek' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_expand_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_explain_recover' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_export' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_export_force' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_feature_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_find_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_find_vdev_by_physpath' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_free_handles' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_config' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_errlog' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_features' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_handle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_history' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_load_policy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_physpath' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_prop_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_state' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_state_str' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_get_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_import_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_import_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_in_use' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_initialize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_initialize_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_is_draid_spare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_iter' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_load_compat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_log_history' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_mount_datasets' type='func-type' binding='weak-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_name_to_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_name_valid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_obj_to_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_obj_to_path_ds' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_open_silent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_default_numeric' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_default_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_feature' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_get_feature' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_get_table' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_get_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_index_to_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_random_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_readonly' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_setonce' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_string_to_index' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_unsupported' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_prop_values' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_props_refresh' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_refresh_stats' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_reguid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_relabel_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_reopen_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_scan' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_set_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_set_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_skip_pool' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_standard_error' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_standard_error_fmt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_sync_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_trim' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_unmount_datasets' type='func-type' binding='weak-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_upgrade' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_attach' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_degrade' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_detach' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_fault' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_indirect_size' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_offline' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_online' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_path_to_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_expand_list' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_free_list' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_get_list' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_index_to_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_iter' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_iter_common' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_name_to_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_parse_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_print_one_property' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_random_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_register_hidden' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_register_impl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_register_index' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_register_number' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_register_string' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_string_to_index' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_valid_for_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_values' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zprop_width' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zvol_volsize_to_reservation' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-function-symbols>
+  <elf-variable-symbols>
+    <elf-symbol name='fletcher_4_abd_ops' size='24' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_avx2_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_avx512bw_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_avx512f_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_sse2_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_ssse3_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_superscalar4_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='nfs_only' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='proto_table' size='48' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='share_all_proto' size='12' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='smb_only' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='smb_shares' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='spa_feature_table' size='1904' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_max_dataset_nesting' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_userquota_prop_prefixes' size='96' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-variable-symbols>
+  <abi-instr address-size='64' path='libshare.c' language='LANG_C99'>
+    <class-decl name='sa_share_fsinfo' size-in-bits='64' is-struct='yes' visibility='default' id='412a8a55'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='shareopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='sa_share_fsinfo_t' type-id='412a8a55' id='24463d51'/>
+    <class-decl name='sa_share_impl' size-in-bits='192' is-struct='yes' visibility='default' id='72b09bf8'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='sa_mountpoint' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='sa_zfsname' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='sa_fsinfo' type-id='17934354' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='sa_share_impl_t' type-id='2722c1de' id='a48b47d0'/>
+    <class-decl name='sa_share_ops' size-in-bits='448' is-struct='yes' visibility='default' id='9990a42a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='enable_share' type-id='fa1f29ce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='disable_share' type-id='fa1f29ce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='is_shared' type-id='f337456d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='validate_shareopts' type-id='70487b28' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='update_shareopts' type-id='8c9ca98d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='clear_shareopts' type-id='20e6b301' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='commit_shares' type-id='1db260e5' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='sa_share_ops_t' type-id='9990a42a' id='cfdd2674'/>
+    <class-decl name='sa_fstype' size-in-bits='256' is-struct='yes' visibility='default' id='b329094d'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='next' type-id='3a81ee0d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='name' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='ops' type-id='4f0de78a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='fsinfo_index' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='sa_fstype_t' type-id='b329094d' id='639af739'/>
+    <qualified-type-def type-id='cfdd2674' const='yes' id='3903d8a4'/>
+    <pointer-type-def type-id='3903d8a4' size-in-bits='64' id='4f0de78a'/>
+    <pointer-type-def type-id='276427e1' size-in-bits='64' id='1db260e5'/>
+    <pointer-type-def type-id='5113b296' size-in-bits='64' id='70487b28'/>
+    <pointer-type-def type-id='c13578bc' size-in-bits='64' id='fa1f29ce'/>
+    <pointer-type-def type-id='4d896449' size-in-bits='64' id='8c9ca98d'/>
+    <pointer-type-def type-id='b329094d' size-in-bits='64' id='3a81ee0d'/>
+    <pointer-type-def type-id='639af739' size-in-bits='64' id='0dd0309c'/>
+    <pointer-type-def type-id='24463d51' size-in-bits='64' id='17934354'/>
+    <pointer-type-def type-id='72b09bf8' size-in-bits='64' id='2722c1de'/>
+    <pointer-type-def type-id='86373eb1' size-in-bits='64' id='f337456d'/>
+    <pointer-type-def type-id='6b19040d' size-in-bits='64' id='20e6b301'/>
+    <function-decl name='register_fstype' mangled-name='register_fstype' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='register_fstype'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='4f0de78a' name='ops'/>
+      <return type-id='0dd0309c'/>
+    </function-decl>
+    <function-decl name='libshare_nfs_init' mangled-name='libshare_nfs_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libshare_nfs_init'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libshare_smb_init' mangled-name='libshare_smb_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libshare_smb_init'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='276427e1'>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='5113b296'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='c13578bc'>
+      <parameter type-id='a48b47d0'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='4d896449'>
+      <parameter type-id='a48b47d0'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='86373eb1'>
+      <parameter type-id='a48b47d0'/>
+      <return type-id='c19b74c3'/>
+    </function-type>
+    <function-type size-in-bits='64' id='6b19040d'>
+      <parameter type-id='a48b47d0'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/nfs.c' language='LANG_C99'>
+    <function-decl name='rename' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fputs' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='e75a27e9'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='flock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='mkdir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='e1c52942'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/smb.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='2040' id='11641789'>
+      <subrange length='255' type-id='7359adad' id='36e7f891'/>
+    </array-type-def>
+    <class-decl name='smb_share_s' size-in-bits='36992' is-struct='yes' visibility='default' id='a75bc907'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='name' type-id='11641789' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2040'>
+        <var-decl name='path' type-id='d16c6df4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='34808'>
+        <var-decl name='comment' type-id='11641789' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='36864'>
+        <var-decl name='guest_ok' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='36928'>
+        <var-decl name='next' type-id='05ed1c5f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='smb_share_t' type-id='a75bc907' id='2d05afd9'/>
+    <class-decl name='dirent' size-in-bits='2240' is-struct='yes' visibility='default' id='611586a1'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='d_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='d_off' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='d_reclen' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='144'>
+        <var-decl name='d_type' type-id='002ac4a6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='152'>
+        <var-decl name='d_name' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/>
+    <pointer-type-def type-id='a75bc907' size-in-bits='64' id='05ed1c5f'/>
+    <pointer-type-def type-id='2d05afd9' size-in-bits='64' id='a3e5c654'/>
+    <var-decl name='smb_shares' type-id='a3e5c654' mangled-name='smb_shares' visibility='default' elf-symbol-id='smb_shares'/>
+    <function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='f09217ba'/>
+    </function-decl>
+    <function-decl name='fgets' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='e75a27e9'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/icp/algs/sha2/sha2.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='8f92235e' size-in-bits='64' id='337c1cdd'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='8f92235e' size-in-bits='1024' id='388e96b8'>
+      <subrange length='32' type-id='7359adad' id='ae5bde82'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='8f92235e' size-in-bits='256' id='2f8b211b'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='1024' id='b316cf0d'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='512' id='c5d13f42'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='1024' id='c768f32d'>
+      <subrange length='128' type-id='7359adad' id='1eb1687a'/>
+    </array-type-def>
+    <class-decl name='SHA2_CTX' size-in-bits='1728' is-struct='yes' naming-typedef-id='2aec903e' visibility='default' id='51cc0913'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='algotype' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='state' type-id='ac5ab595' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='count' type-id='ac5ab596' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='buf_un' type-id='ac5ab597' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__' size-in-bits='512' is-anonymous='yes' visibility='default' id='ac5ab595'>
+      <data-member access='public'>
+        <var-decl name='s32' type-id='2f8b211b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='s64' type-id='c5d13f42' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <union-decl name='__anonymous_union__1' size-in-bits='128' is-anonymous='yes' visibility='default' id='ac5ab596'>
+      <data-member access='public'>
+        <var-decl name='c32' type-id='337c1cdd' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='c64' type-id='c1c22e6c' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <union-decl name='__anonymous_union__2' size-in-bits='1024' is-anonymous='yes' visibility='default' id='ac5ab597'>
+      <data-member access='public'>
+        <var-decl name='buf8' type-id='c768f32d' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='buf32' type-id='388e96b8' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='buf64' type-id='b316cf0d' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='SHA2_CTX' type-id='51cc0913' id='2aec903e'/>
+    <typedef-decl name='SHA256_CTX' type-id='2aec903e' id='1ef7fe01'/>
+    <typedef-decl name='SHA384_CTX' type-id='2aec903e' id='139bfea5'/>
+    <typedef-decl name='SHA512_CTX' type-id='2aec903e' id='33c643d0'/>
+    <pointer-type-def type-id='1ef7fe01' size-in-bits='64' id='aacf5386'/>
+    <pointer-type-def type-id='2aec903e' size-in-bits='64' id='5d626b03'/>
+    <pointer-type-def type-id='139bfea5' size-in-bits='64' id='074c43fa'/>
+    <pointer-type-def type-id='33c643d0' size-in-bits='64' id='ceba8189'/>
+    <function-decl name='htonl' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8f92235e'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='SHA2Init' mangled-name='SHA2Init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='SHA2Init'>
+      <parameter type-id='9c313c2d' name='mech'/>
+      <parameter type-id='5d626b03' name='ctx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='SHA256Init' mangled-name='SHA256Init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='SHA256Init'>
+      <parameter type-id='aacf5386' name='ctx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='SHA384Init' mangled-name='SHA384Init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='SHA384Init'>
+      <parameter type-id='074c43fa' name='ctx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='SHA512Init' mangled-name='SHA512Init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='SHA512Init'>
+      <parameter type-id='ceba8189' name='ctx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='SHA2Update' mangled-name='SHA2Update' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='SHA2Update'>
+      <parameter type-id='5d626b03' name='ctx'/>
+      <parameter type-id='eaa32e2f' name='inptr'/>
+      <parameter type-id='b59d7dce' name='input_len'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='SHA2Final' mangled-name='SHA2Final' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='SHA2Final'>
+      <parameter type-id='eaa32e2f' name='digest'/>
+      <parameter type-id='5d626b03' name='ctx'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/cityhash.c' language='LANG_C99'>
+    <function-decl name='cityhash4' mangled-name='cityhash4' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='cityhash4'>
+      <parameter type-id='9c313c2d' name='w1'/>
+      <parameter type-id='9c313c2d' name='w2'/>
+      <parameter type-id='9c313c2d' name='w3'/>
+      <parameter type-id='9c313c2d' name='w4'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfeature_common.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='15232' id='d96379d0'>
+      <subrange length='34' type-id='7359adad' id='6a6a7e00'/>
+    </array-type-def>
+    <enum-decl name='zfeature_flags' id='6db816a4'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFEATURE_FLAG_READONLY_COMPAT' value='1'/>
+      <enumerator name='ZFEATURE_FLAG_MOS' value='2'/>
+      <enumerator name='ZFEATURE_FLAG_ACTIVATE_ON_ENABLE' value='4'/>
+      <enumerator name='ZFEATURE_FLAG_PER_DATASET' value='8'/>
+    </enum-decl>
+    <typedef-decl name='zfeature_flags_t' type-id='6db816a4' id='fc329033'/>
+    <enum-decl name='zfeature_type' id='c4fa2355'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFEATURE_TYPE_BOOLEAN' value='0'/>
+      <enumerator name='ZFEATURE_TYPE_UINT64_ARRAY' value='1'/>
+      <enumerator name='ZFEATURE_NUM_TYPES' value='2'/>
+    </enum-decl>
+    <typedef-decl name='zfeature_type_t' type-id='c4fa2355' id='732d2bb2'/>
+    <class-decl name='zfeature_info' size-in-bits='448' is-struct='yes' visibility='default' id='1178d146'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='fi_feature' type-id='d6618c78' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='fi_uname' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='fi_guid' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='fi_desc' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='fi_flags' type-id='fc329033' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='fi_zfs_mod_supported' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='fi_type' type-id='732d2bb2' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='fi_depends' type-id='1acff326' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfeature_info_t' type-id='1178d146' id='83f29ca2'/>
+    <qualified-type-def type-id='d6618c78' const='yes' id='81a65028'/>
+    <pointer-type-def type-id='81a65028' size-in-bits='64' id='1acff326'/>
+    <var-decl name='spa_feature_table' type-id='d96379d0' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
+    <var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
+    <function-decl name='zfeature_is_valid_guid' mangled-name='zfeature_is_valid_guid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_is_valid_guid'>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfeature_depends_on' mangled-name='zfeature_depends_on' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_depends_on'>
+      <parameter type-id='d6618c78' name='fid'/>
+      <parameter type-id='d6618c78' name='check'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_mod_supported' mangled-name='zfs_mod_supported' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_mod_supported'>
+      <parameter type-id='80f4b756' name='scope'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_comutil.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='80f4b756' size-in-bits='2624' id='ef31fedf'>
+      <subrange length='41' type-id='7359adad' id='cb834f44'/>
+    </array-type-def>
+    <pointer-type-def type-id='8f92235e' size-in-bits='64' id='90421557'/>
+    <function-decl name='nvpair_value_uint32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='90421557'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <var-decl name='zfs_history_event_names' type-id='ef31fedf' mangled-name='zfs_history_event_names' visibility='default' elf-symbol-id='zfs_history_event_names'/>
+    <function-decl name='zfs_allocatable_devs' mangled-name='zfs_allocatable_devs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_allocatable_devs'>
+      <parameter type-id='5ce45b60' name='nv'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_special_devs' mangled-name='zfs_special_devs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_special_devs'>
+      <parameter type-id='5ce45b60' name='nv'/>
+      <parameter type-id='26a90f95' name='type'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_zpl_version_map' mangled-name='zfs_zpl_version_map' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_zpl_version_map'>
+      <parameter type-id='95e97e5e' name='spa_version'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_spa_version_map' mangled-name='zfs_spa_version_map' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_spa_version_map'>
+      <parameter type-id='95e97e5e' name='zpl_version'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_dataset_name_hidden' mangled-name='zfs_dataset_name_hidden' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dataset_name_hidden'>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_deleg.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='f3f851ad' size-in-bits='4096' id='3dd2cc5f'>
+      <subrange length='32' type-id='7359adad' id='ae5bde82'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='f3f851ad' size-in-bits='infinite' id='bc4e5d90'>
+      <subrange length='infinite' id='031f2035'/>
+    </array-type-def>
+    <enum-decl name='zfs_deleg_who_type_t' naming-typedef-id='36d4bd5a' id='b5fa5816'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_DELEG_WHO_UNKNOWN' value='0'/>
+      <enumerator name='ZFS_DELEG_USER' value='117'/>
+      <enumerator name='ZFS_DELEG_USER_SETS' value='85'/>
+      <enumerator name='ZFS_DELEG_GROUP' value='103'/>
+      <enumerator name='ZFS_DELEG_GROUP_SETS' value='71'/>
+      <enumerator name='ZFS_DELEG_EVERYONE' value='101'/>
+      <enumerator name='ZFS_DELEG_EVERYONE_SETS' value='69'/>
+      <enumerator name='ZFS_DELEG_CREATE' value='99'/>
+      <enumerator name='ZFS_DELEG_CREATE_SETS' value='67'/>
+      <enumerator name='ZFS_DELEG_NAMED_SET' value='115'/>
+      <enumerator name='ZFS_DELEG_NAMED_SET_SETS' value='83'/>
+    </enum-decl>
+    <typedef-decl name='zfs_deleg_who_type_t' type-id='b5fa5816' id='36d4bd5a'/>
+    <enum-decl name='zfs_deleg_note_t' naming-typedef-id='4613c173' id='729d4547'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_DELEG_NOTE_CREATE' value='0'/>
+      <enumerator name='ZFS_DELEG_NOTE_DESTROY' value='1'/>
+      <enumerator name='ZFS_DELEG_NOTE_SNAPSHOT' value='2'/>
+      <enumerator name='ZFS_DELEG_NOTE_ROLLBACK' value='3'/>
+      <enumerator name='ZFS_DELEG_NOTE_CLONE' value='4'/>
+      <enumerator name='ZFS_DELEG_NOTE_PROMOTE' value='5'/>
+      <enumerator name='ZFS_DELEG_NOTE_RENAME' value='6'/>
+      <enumerator name='ZFS_DELEG_NOTE_SEND' value='7'/>
+      <enumerator name='ZFS_DELEG_NOTE_RECEIVE' value='8'/>
+      <enumerator name='ZFS_DELEG_NOTE_ALLOW' value='9'/>
+      <enumerator name='ZFS_DELEG_NOTE_USERPROP' value='10'/>
+      <enumerator name='ZFS_DELEG_NOTE_MOUNT' value='11'/>
+      <enumerator name='ZFS_DELEG_NOTE_SHARE' value='12'/>
+      <enumerator name='ZFS_DELEG_NOTE_USERQUOTA' value='13'/>
+      <enumerator name='ZFS_DELEG_NOTE_GROUPQUOTA' value='14'/>
+      <enumerator name='ZFS_DELEG_NOTE_USERUSED' value='15'/>
+      <enumerator name='ZFS_DELEG_NOTE_GROUPUSED' value='16'/>
+      <enumerator name='ZFS_DELEG_NOTE_USEROBJQUOTA' value='17'/>
+      <enumerator name='ZFS_DELEG_NOTE_GROUPOBJQUOTA' value='18'/>
+      <enumerator name='ZFS_DELEG_NOTE_USEROBJUSED' value='19'/>
+      <enumerator name='ZFS_DELEG_NOTE_GROUPOBJUSED' value='20'/>
+      <enumerator name='ZFS_DELEG_NOTE_HOLD' value='21'/>
+      <enumerator name='ZFS_DELEG_NOTE_RELEASE' value='22'/>
+      <enumerator name='ZFS_DELEG_NOTE_DIFF' value='23'/>
+      <enumerator name='ZFS_DELEG_NOTE_BOOKMARK' value='24'/>
+      <enumerator name='ZFS_DELEG_NOTE_LOAD_KEY' value='25'/>
+      <enumerator name='ZFS_DELEG_NOTE_CHANGE_KEY' value='26'/>
+      <enumerator name='ZFS_DELEG_NOTE_PROJECTUSED' value='27'/>
+      <enumerator name='ZFS_DELEG_NOTE_PROJECTQUOTA' value='28'/>
+      <enumerator name='ZFS_DELEG_NOTE_PROJECTOBJUSED' value='29'/>
+      <enumerator name='ZFS_DELEG_NOTE_PROJECTOBJQUOTA' value='30'/>
+      <enumerator name='ZFS_DELEG_NOTE_NONE' value='31'/>
+    </enum-decl>
+    <typedef-decl name='zfs_deleg_note_t' type-id='729d4547' id='4613c173'/>
+    <class-decl name='zfs_deleg_perm_tab' size-in-bits='128' is-struct='yes' visibility='default' id='5aa05c1f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='z_perm' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='z_note' type-id='4613c173' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_deleg_perm_tab_t' type-id='5aa05c1f' id='f3f851ad'/>
+    <var-decl name='zfs_deleg_perm_tab' type-id='bc4e5d90' mangled-name='zfs_deleg_perm_tab' visibility='default' elf-symbol-id='zfs_deleg_perm_tab'/>
+    <function-decl name='permset_namecheck' mangled-name='permset_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='permset_namecheck'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='053457bd'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_delegatable' mangled-name='zfs_prop_delegatable' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_delegatable'>
+      <parameter type-id='58603c44'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_deleg_canonicalize_perm' mangled-name='zfs_deleg_canonicalize_perm' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_deleg_canonicalize_perm'>
+      <parameter type-id='80f4b756' name='perm'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_deleg_verify_nvlist' mangled-name='zfs_deleg_verify_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_deleg_verify_nvlist'>
+      <parameter type-id='5ce45b60' name='nvp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_deleg_whokey' mangled-name='zfs_deleg_whokey' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_deleg_whokey'>
+      <parameter type-id='26a90f95' name='attr'/>
+      <parameter type-id='36d4bd5a' name='type'/>
+      <parameter type-id='a84c031d' name='inheritchr'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_fletcher.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='90dbb6d6' size-in-bits='2048' id='16582e69'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='8240361c' size-in-bits='1024' id='481f90b1'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='7c1ab40c' size-in-bits='512' id='cbd91ec1'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='6d059eaa' size-in-bits='1024' id='729b6ebb'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <enum-decl name='zio_byteorder_t' naming-typedef-id='595a65ec' id='fc861be0'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZIO_CHECKSUM_NATIVE' value='0'/>
+      <enumerator name='ZIO_CHECKSUM_BYTESWAP' value='1'/>
+    </enum-decl>
+    <typedef-decl name='zio_byteorder_t' type-id='fc861be0' id='595a65ec'/>
+    <class-decl name='zio_abd_checksum_data' size-in-bits='256' is-struct='yes' visibility='default' id='4bf4b004'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='acd_byteorder' type-id='595a65ec' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='acd_ctx' type-id='0f7df99e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='acd_zcp' type-id='c24fc2ee' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='acd_private' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zio_abd_checksum_data_t' type-id='4bf4b004' id='74e39470'/>
+    <typedef-decl name='zio_abd_checksum_init_t' type-id='a5444274' id='029a8ebe'/>
+    <typedef-decl name='zio_abd_checksum_fini_t' type-id='a5444274' id='d6fd5c6c'/>
+    <typedef-decl name='zio_abd_checksum_iter_t' type-id='f4a1892e' id='cefa0f4a'/>
+    <class-decl name='zio_abd_checksum_func' size-in-bits='192' is-struct='yes' visibility='default' id='aa14691a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='acf_init' type-id='0bcca125' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='acf_fini' type-id='bfe36153' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='acf_iter' type-id='1e276399' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zio_abd_checksum_func_t' type-id='3f8e8d11' id='c2eb138a'/>
+    <class-decl name='zfs_fletcher_superscalar' size-in-bits='256' is-struct='yes' visibility='default' id='28efb250'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='v' type-id='85c64d26' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_fletcher_superscalar_t' type-id='28efb250' id='6d059eaa'/>
+    <class-decl name='zfs_fletcher_sse' size-in-bits='128' is-struct='yes' visibility='default' id='acd4019a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='v' type-id='c1c22e6c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_fletcher_sse_t' type-id='acd4019a' id='7c1ab40c'/>
+    <class-decl name='zfs_fletcher_avx' size-in-bits='256' is-struct='yes' visibility='default' id='8c208dfa'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='v' type-id='85c64d26' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_fletcher_avx_t' type-id='8c208dfa' id='8240361c'/>
+    <class-decl name='zfs_fletcher_avx512' size-in-bits='512' is-struct='yes' visibility='default' id='c6d0c382'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='v' type-id='c5d13f42' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_fletcher_avx512_t' type-id='c6d0c382' id='90dbb6d6'/>
+    <union-decl name='fletcher_4_ctx' size-in-bits='2048' visibility='default' id='1f951ade'>
+      <data-member access='public'>
+        <var-decl name='scalar' type-id='39730d0b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='superscalar' type-id='729b6ebb' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='sse' type-id='cbd91ec1' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='avx' type-id='481f90b1' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='avx512' type-id='16582e69' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='fletcher_4_ctx_t' type-id='1f951ade' id='4b675395'/>
+    <qualified-type-def type-id='aa14691a' const='yes' id='3f8e8d11'/>
+    <pointer-type-def type-id='4b675395' size-in-bits='64' id='0f7df99e'/>
+    <qualified-type-def type-id='8f92235e' volatile='yes' id='430e0681'/>
+    <pointer-type-def type-id='430e0681' size-in-bits='64' id='3a147f31'/>
+    <pointer-type-def type-id='74e39470' size-in-bits='64' id='eefe7427'/>
+    <pointer-type-def type-id='d6fd5c6c' size-in-bits='64' id='bfe36153'/>
+    <pointer-type-def type-id='029a8ebe' size-in-bits='64' id='0bcca125'/>
+    <pointer-type-def type-id='cefa0f4a' size-in-bits='64' id='1e276399'/>
+    <var-decl name='fletcher_4_abd_ops' type-id='c2eb138a' mangled-name='fletcher_4_abd_ops' visibility='default' elf-symbol-id='fletcher_4_abd_ops'/>
+    <function-decl name='atomic_swap_32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3a147f31'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='membar_producer' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_init' mangled-name='fletcher_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_init'>
+      <parameter type-id='c24fc2ee' name='zcp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_2_incremental_native' mangled-name='fletcher_2_incremental_native' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_2_incremental_native'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fletcher_2_native' mangled-name='fletcher_2_native' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_2_native'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='9c313c2d' name='size'/>
+      <parameter type-id='eaa32e2f' name='ctx_template'/>
+      <parameter type-id='c24fc2ee' name='zcp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_2_incremental_byteswap' mangled-name='fletcher_2_incremental_byteswap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_2_incremental_byteswap'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fletcher_2_byteswap' mangled-name='fletcher_2_byteswap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_2_byteswap'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='9c313c2d' name='size'/>
+      <parameter type-id='eaa32e2f' name='ctx_template'/>
+      <parameter type-id='c24fc2ee' name='zcp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_4_impl_set' mangled-name='fletcher_4_impl_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_impl_set'>
+      <parameter type-id='80f4b756' name='val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fletcher_4_native' mangled-name='fletcher_4_native' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_native'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='9c313c2d' name='size'/>
+      <parameter type-id='eaa32e2f' name='ctx_template'/>
+      <parameter type-id='c24fc2ee' name='zcp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_4_byteswap' mangled-name='fletcher_4_byteswap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_byteswap'>
+      <parameter type-id='eaa32e2f' name='buf'/>
+      <parameter type-id='9c313c2d' name='size'/>
+      <parameter type-id='eaa32e2f' name='ctx_template'/>
+      <parameter type-id='c24fc2ee' name='zcp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='f4a1892e'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='a5444274'>
+      <parameter type-id='eefe7427'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_fletcher_avx512.c' language='LANG_C99'>
+    <typedef-decl name='fletcher_4_init_f' type-id='173aa527' id='b9ae1656'/>
+    <typedef-decl name='fletcher_4_fini_f' type-id='0ad5b8a8' id='c4c1f4fc'/>
+    <typedef-decl name='fletcher_4_compute_f' type-id='38147eff' id='ad1dc4cb'/>
+    <class-decl name='fletcher_4_func' size-in-bits='512' is-struct='yes' visibility='default' id='57f479a0'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='init_native' type-id='b9ae1656' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='fini_native' type-id='c4c1f4fc' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='compute_native' type-id='ad1dc4cb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='init_byteswap' type-id='b9ae1656' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='fini_byteswap' type-id='c4c1f4fc' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='compute_byteswap' type-id='ad1dc4cb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='valid' type-id='297d38bc' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='name' type-id='80f4b756' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='fletcher_4_ops_t' type-id='57f479a0' id='eba91718'/>
+    <qualified-type-def type-id='eba91718' const='yes' id='9eeabdc8'/>
+    <pointer-type-def type-id='e9e61702' size-in-bits='64' id='297d38bc'/>
+    <pointer-type-def type-id='fe40251b' size-in-bits='64' id='173aa527'/>
+    <pointer-type-def type-id='17fb1f83' size-in-bits='64' id='38147eff'/>
+    <pointer-type-def type-id='fb39e25e' size-in-bits='64' id='0ad5b8a8'/>
+    <var-decl name='fletcher_4_avx512f_ops' type-id='9eeabdc8' mangled-name='fletcher_4_avx512f_ops' visibility='default' elf-symbol-id='fletcher_4_avx512f_ops'/>
+    <var-decl name='fletcher_4_avx512bw_ops' type-id='9eeabdc8' mangled-name='fletcher_4_avx512bw_ops' visibility='default' elf-symbol-id='fletcher_4_avx512bw_ops'/>
+    <function-type size-in-bits='64' id='e9e61702'>
+      <return type-id='c19b74c3'/>
+    </function-type>
+    <function-type size-in-bits='64' id='fe40251b'>
+      <parameter type-id='0f7df99e'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='17fb1f83'>
+      <parameter type-id='0f7df99e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='fb39e25e'>
+      <parameter type-id='0f7df99e'/>
+      <parameter type-id='c24fc2ee'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_fletcher_intel.c' language='LANG_C99'>
+    <var-decl name='fletcher_4_avx2_ops' type-id='9eeabdc8' mangled-name='fletcher_4_avx2_ops' visibility='default' elf-symbol-id='fletcher_4_avx2_ops'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_fletcher_sse.c' language='LANG_C99'>
+    <var-decl name='fletcher_4_sse2_ops' type-id='9eeabdc8' mangled-name='fletcher_4_sse2_ops' visibility='default' elf-symbol-id='fletcher_4_sse2_ops'/>
+    <var-decl name='fletcher_4_ssse3_ops' type-id='9eeabdc8' mangled-name='fletcher_4_ssse3_ops' visibility='default' elf-symbol-id='fletcher_4_ssse3_ops'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_fletcher_superscalar.c' language='LANG_C99'>
+    <var-decl name='fletcher_4_superscalar_ops' type-id='9eeabdc8' mangled-name='fletcher_4_superscalar_ops' visibility='default' elf-symbol-id='fletcher_4_superscalar_ops'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_fletcher_superscalar4.c' language='LANG_C99'>
+    <var-decl name='fletcher_4_superscalar4_ops' type-id='9eeabdc8' mangled-name='fletcher_4_superscalar4_ops' visibility='default' elf-symbol-id='fletcher_4_superscalar4_ops'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_namecheck.c' language='LANG_C99'>
+    <var-decl name='zfs_max_dataset_nesting' type-id='95e97e5e' mangled-name='zfs_max_dataset_nesting' visibility='default' elf-symbol-id='zfs_max_dataset_nesting'/>
+    <function-decl name='get_dataset_depth' mangled-name='get_dataset_depth' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='get_dataset_depth'>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_component_namecheck' mangled-name='zfs_component_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_component_namecheck'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='053457bd' name='why'/>
+      <parameter type-id='26a90f95' name='what'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='dataset_namecheck' mangled-name='dataset_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='dataset_namecheck'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='053457bd' name='why'/>
+      <parameter type-id='26a90f95' name='what'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='bookmark_namecheck' mangled-name='bookmark_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='bookmark_namecheck'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='053457bd' name='why'/>
+      <parameter type-id='26a90f95' name='what'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='snapshot_namecheck' mangled-name='snapshot_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='snapshot_namecheck'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='053457bd' name='why'/>
+      <parameter type-id='26a90f95' name='what'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zfs_prop.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='80f4b756' size-in-bits='768' id='35e4b367'>
+      <subrange length='12' type-id='7359adad' id='84827bdc'/>
+    </array-type-def>
+    <var-decl name='zfs_userquota_prop_prefixes' type-id='35e4b367' mangled-name='zfs_userquota_prop_prefixes' visibility='default' elf-symbol-id='zfs_userquota_prop_prefixes'/>
+    <function-decl name='zprop_register_impl' mangled-name='zprop_register_impl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_register_impl'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='31429eff'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='999701cc'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='c8bc397b'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_register_string' mangled-name='zprop_register_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_register_string'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='999701cc'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_register_number' mangled-name='zprop_register_number' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_register_number'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='999701cc'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_register_index' mangled-name='zprop_register_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_register_index'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='999701cc'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c8bc397b'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_register_hidden' mangled-name='zprop_register_hidden' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_register_hidden'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='31429eff'/>
+      <parameter type-id='999701cc'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_index_to_string' mangled-name='zprop_index_to_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_index_to_string'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='7d3cd834'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zprop_random_value' mangled-name='zprop_random_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_random_value'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zfs_prop_string_to_index' mangled-name='zfs_prop_string_to_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_string_to_index'>
+      <parameter type-id='58603c44' name='prop'/>
+      <parameter type-id='80f4b756' name='string'/>
+      <parameter type-id='5d6479ae' name='index'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_random_value' mangled-name='zfs_prop_random_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_random_value'>
+      <parameter type-id='58603c44' name='prop'/>
+      <parameter type-id='9c313c2d' name='seed'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zfs_prop_visible' mangled-name='zfs_prop_visible' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_visible'>
+      <parameter type-id='58603c44' name='prop'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_values' mangled-name='zfs_prop_values' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_values'>
+      <parameter type-id='58603c44' name='prop'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_prop_is_string' mangled-name='zfs_prop_is_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_is_string'>
+      <parameter type-id='58603c44' name='prop'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_column_name' mangled-name='zfs_prop_column_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_column_name'>
+      <parameter type-id='58603c44' name='prop'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_prop_align_right' mangled-name='zfs_prop_align_right' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_align_right'>
+      <parameter type-id='58603c44' name='prop'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zpool_prop.c' language='LANG_C99'>
+    <function-decl name='zpool_prop_string_to_index' mangled-name='zpool_prop_string_to_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_string_to_index'>
+      <parameter type-id='5d0c23fb' name='prop'/>
+      <parameter type-id='80f4b756' name='string'/>
+      <parameter type-id='5d6479ae' name='index'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_prop_random_value' mangled-name='zpool_prop_random_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_random_value'>
+      <parameter type-id='5d0c23fb' name='prop'/>
+      <parameter type-id='9c313c2d' name='seed'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zpool_prop_values' mangled-name='zpool_prop_values' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_values'>
+      <parameter type-id='5d0c23fb' name='prop'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_prop_column_name' mangled-name='zpool_prop_column_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_column_name'>
+      <parameter type-id='5d0c23fb' name='prop'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_prop_align_right' mangled-name='zpool_prop_align_right' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_align_right'>
+      <parameter type-id='5d0c23fb' name='prop'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='../../module/zcommon/zprop_common.c' language='LANG_C99'>
+    <function-decl name='__ctype_tolower_loc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='24f95ba5'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_changelist.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='bf311473' size-in-bits='128' id='f0f65199'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <type-decl name='char' size-in-bits='8' id='a84c031d'/>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8192' id='b54ce520'>
+      <subrange length='1024' type-id='7359adad' id='c60446f8'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8' id='89feb1ec'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='160' id='664ac0b7'>
+      <subrange length='20' type-id='7359adad' id='fdca39cf'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='2048' id='d1617432'>
+      <subrange length='256' type-id='7359adad' id='36e5b9fa'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='320' id='36c46961'>
+      <subrange length='40' type-id='7359adad' id='8f80b239'/>
+    </array-type-def>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='re_dfa_t' is-struct='yes' visibility='default' is-declaration-only='yes' id='b48d2441'/>
+    <class-decl name='uu_avl' is-struct='yes' visibility='default' is-declaration-only='yes' id='4af029d1'/>
+    <class-decl name='uu_avl_pool' is-struct='yes' visibility='default' is-declaration-only='yes' id='12a530a8'/>
+    <class-decl name='uu_avl_walk' is-struct='yes' visibility='default' is-declaration-only='yes' id='e70a39e3'/>
+    <type-decl name='int' size-in-bits='32' id='95e97e5e'/>
+    <type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
+    <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
+    <type-decl name='short int' size-in-bits='16' id='a2185560'/>
+    <type-decl name='signed char' size-in-bits='8' id='28577a57'/>
+    <array-type-def dimensions='1' type-id='e475ab95' size-in-bits='192' id='0ce65a8b'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
+    <type-decl name='unsigned char' size-in-bits='8' id='002ac4a6'/>
+    <type-decl name='unsigned int' size-in-bits='32' id='f0981eeb'/>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+    <type-decl name='void' id='48b5725f'/>
+    <typedef-decl name='uu_compare_fn_t' type-id='add6e811' id='40f93560'/>
+    <typedef-decl name='uu_avl_pool_t' type-id='12a530a8' id='7f84e390'/>
+    <typedef-decl name='uu_avl_t' type-id='4af029d1' id='bb7f0973'/>
+    <class-decl name='uu_avl_node' size-in-bits='192' is-struct='yes' visibility='default' id='f65f4326'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='uan_opaque' type-id='0ce65a8b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='uu_avl_node_t' type-id='f65f4326' id='73a65116'/>
+    <typedef-decl name='uu_avl_walk_t' type-id='e70a39e3' id='edd8457b'/>
+    <typedef-decl name='uu_avl_index_t' type-id='e475ab95' id='5d7f5fc8'/>
+    <typedef-decl name='zfs_handle_t' type-id='f6ee4445' id='775509eb'/>
+    <typedef-decl name='zpool_handle_t' type-id='67002a8a' id='b1efc708'/>
+    <typedef-decl name='libzfs_handle_t' type-id='c8a9d9d8' id='95942d0c'/>
+    <typedef-decl name='zfs_iter_f' type-id='5571cde4' id='d8e49ab9'/>
+    <class-decl name='libzfs_handle' size-in-bits='20352' is-struct='yes' visibility='default' id='c8a9d9d8'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='libzfs_error' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='libzfs_fd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='libzfs_mnttab' type-id='822cd80b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='libzfs_pool_handles' type-id='4c81de99' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='libzfs_ns_avlpool' type-id='de82c773' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='libzfs_ns_avl' type-id='a5c21a38' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='libzfs_ns_gen' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='libzfs_desc_active' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='416'>
+        <var-decl name='libzfs_action' type-id='b54ce520' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8608'>
+        <var-decl name='libzfs_desc' type-id='b54ce520' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='16800'>
+        <var-decl name='libzfs_printerr' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='16832'>
+        <var-decl name='libzfs_storeerr' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='16864'>
+        <var-decl name='libzfs_mnttab_enable' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='16896'>
+        <var-decl name='libzfs_mnttab_cache_lock' type-id='7a6844eb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='17216'>
+        <var-decl name='libzfs_mnttab_cache' type-id='f20fbd51' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='17536'>
+        <var-decl name='libzfs_pool_iter' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='17568'>
+        <var-decl name='libzfs_chassis_id' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='19616'>
+        <var-decl name='libzfs_prop_debug' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='19648'>
+        <var-decl name='libzfs_urire' type-id='aca3bac8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='20160'>
+        <var-decl name='libzfs_max_nvlist' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='20224'>
+        <var-decl name='libfetch' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='20288'>
+        <var-decl name='libfetch_load_error' type-id='26a90f95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='zfs_handle' size-in-bits='4928' is-struct='yes' visibility='default' id='f6ee4445'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zfs_hdl' type-id='b0382bb3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zpool_hdl' type-id='4c81de99' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zfs_name' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2176'>
+        <var-decl name='zfs_type' type-id='2e45de5d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2208'>
+        <var-decl name='zfs_head_type' type-id='2e45de5d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2240'>
+        <var-decl name='zfs_dmustats' type-id='b2c14f17' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='4544'>
+        <var-decl name='zfs_props' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='4608'>
+        <var-decl name='zfs_user_props' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='4672'>
+        <var-decl name='zfs_recvd_props' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='4736'>
+        <var-decl name='zfs_mntcheck' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='4800'>
+        <var-decl name='zfs_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='4864'>
+        <var-decl name='zfs_props_table' type-id='ae3e8ca6' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='zpool_handle' size-in-bits='2560' is-struct='yes' visibility='default' id='67002a8a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zpool_hdl' type-id='b0382bb3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zpool_next' type-id='4c81de99' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zpool_name' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2176'>
+        <var-decl name='zpool_state' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2240'>
+        <var-decl name='zpool_config_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2304'>
+        <var-decl name='zpool_config' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2368'>
+        <var-decl name='zpool_old_config' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2432'>
+        <var-decl name='zpool_props' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2496'>
+        <var-decl name='zpool_start_block' type-id='804dc465' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <enum-decl name='zfs_share_proto_t' naming-typedef-id='a7913f77' id='d34e3aab'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='PROTO_NFS' value='0'/>
+      <enumerator name='PROTO_SMB' value='1'/>
+      <enumerator name='PROTO_END' value='2'/>
+    </enum-decl>
+    <typedef-decl name='zfs_share_proto_t' type-id='d34e3aab' id='a7913f77'/>
+    <typedef-decl name='prop_changelist_t' type-id='d86edc51' id='eae6431d'/>
+    <typedef-decl name='avl_tree_t' type-id='b351119f' id='f20fbd51'/>
+    <class-decl name='avl_node' size-in-bits='192' is-struct='yes' visibility='default' id='428b67b3'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='avl_child' type-id='f0f65199' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='avl_pcb' type-id='e475ab95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='avl_tree' size-in-bits='320' is-struct='yes' visibility='default' id='b351119f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='avl_root' type-id='bf311473' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='avl_compar' type-id='585e1de9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='avl_offset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='avl_numnodes' type-id='ee1f298e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='avl_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='dmu_objset_stats' size-in-bits='2304' is-struct='yes' visibility='default' id='098f0221'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='dds_num_clones' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='dds_creation_txg' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='dds_guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='dds_type' type-id='230f1e16' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='dds_is_snapshot' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='232'>
+        <var-decl name='dds_inconsistent' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='240'>
+        <var-decl name='dds_redacted' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='248'>
+        <var-decl name='dds_origin' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='dmu_objset_stats_t' type-id='098f0221' id='b2c14f17'/>
+    <enum-decl name='zfs_type_t' naming-typedef-id='2e45de5d' id='5d8f7321'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_TYPE_FILESYSTEM' value='1'/>
+      <enumerator name='ZFS_TYPE_SNAPSHOT' value='2'/>
+      <enumerator name='ZFS_TYPE_VOLUME' value='4'/>
+      <enumerator name='ZFS_TYPE_POOL' value='8'/>
+      <enumerator name='ZFS_TYPE_BOOKMARK' value='16'/>
+    </enum-decl>
+    <typedef-decl name='zfs_type_t' type-id='5d8f7321' id='2e45de5d'/>
+    <enum-decl name='dmu_objset_type' id='6b1b19f9'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DMU_OST_NONE' value='0'/>
+      <enumerator name='DMU_OST_META' value='1'/>
+      <enumerator name='DMU_OST_ZFS' value='2'/>
+      <enumerator name='DMU_OST_ZVOL' value='3'/>
+      <enumerator name='DMU_OST_OTHER' value='4'/>
+      <enumerator name='DMU_OST_ANY' value='5'/>
+      <enumerator name='DMU_OST_NUMTYPES' value='6'/>
+    </enum-decl>
+    <typedef-decl name='dmu_objset_type_t' type-id='6b1b19f9' id='230f1e16'/>
+    <enum-decl name='zfs_prop_t' naming-typedef-id='58603c44' id='4b000d60'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPROP_CONT' value='-2'/>
+      <enumerator name='ZPROP_INVAL' value='-1'/>
+      <enumerator name='ZFS_PROP_TYPE' value='0'/>
+      <enumerator name='ZFS_PROP_CREATION' value='1'/>
+      <enumerator name='ZFS_PROP_USED' value='2'/>
+      <enumerator name='ZFS_PROP_AVAILABLE' value='3'/>
+      <enumerator name='ZFS_PROP_REFERENCED' value='4'/>
+      <enumerator name='ZFS_PROP_COMPRESSRATIO' value='5'/>
+      <enumerator name='ZFS_PROP_MOUNTED' value='6'/>
+      <enumerator name='ZFS_PROP_ORIGIN' value='7'/>
+      <enumerator name='ZFS_PROP_QUOTA' value='8'/>
+      <enumerator name='ZFS_PROP_RESERVATION' value='9'/>
+      <enumerator name='ZFS_PROP_VOLSIZE' value='10'/>
+      <enumerator name='ZFS_PROP_VOLBLOCKSIZE' value='11'/>
+      <enumerator name='ZFS_PROP_RECORDSIZE' value='12'/>
+      <enumerator name='ZFS_PROP_MOUNTPOINT' value='13'/>
+      <enumerator name='ZFS_PROP_SHARENFS' value='14'/>
+      <enumerator name='ZFS_PROP_CHECKSUM' value='15'/>
+      <enumerator name='ZFS_PROP_COMPRESSION' value='16'/>
+      <enumerator name='ZFS_PROP_ATIME' value='17'/>
+      <enumerator name='ZFS_PROP_DEVICES' value='18'/>
+      <enumerator name='ZFS_PROP_EXEC' value='19'/>
+      <enumerator name='ZFS_PROP_SETUID' value='20'/>
+      <enumerator name='ZFS_PROP_READONLY' value='21'/>
+      <enumerator name='ZFS_PROP_ZONED' value='22'/>
+      <enumerator name='ZFS_PROP_SNAPDIR' value='23'/>
+      <enumerator name='ZFS_PROP_ACLMODE' value='24'/>
+      <enumerator name='ZFS_PROP_ACLINHERIT' value='25'/>
+      <enumerator name='ZFS_PROP_CREATETXG' value='26'/>
+      <enumerator name='ZFS_PROP_NAME' value='27'/>
+      <enumerator name='ZFS_PROP_CANMOUNT' value='28'/>
+      <enumerator name='ZFS_PROP_ISCSIOPTIONS' value='29'/>
+      <enumerator name='ZFS_PROP_XATTR' value='30'/>
+      <enumerator name='ZFS_PROP_NUMCLONES' value='31'/>
+      <enumerator name='ZFS_PROP_COPIES' value='32'/>
+      <enumerator name='ZFS_PROP_VERSION' value='33'/>
+      <enumerator name='ZFS_PROP_UTF8ONLY' value='34'/>
+      <enumerator name='ZFS_PROP_NORMALIZE' value='35'/>
+      <enumerator name='ZFS_PROP_CASE' value='36'/>
+      <enumerator name='ZFS_PROP_VSCAN' value='37'/>
+      <enumerator name='ZFS_PROP_NBMAND' value='38'/>
+      <enumerator name='ZFS_PROP_SHARESMB' value='39'/>
+      <enumerator name='ZFS_PROP_REFQUOTA' value='40'/>
+      <enumerator name='ZFS_PROP_REFRESERVATION' value='41'/>
+      <enumerator name='ZFS_PROP_GUID' value='42'/>
+      <enumerator name='ZFS_PROP_PRIMARYCACHE' value='43'/>
+      <enumerator name='ZFS_PROP_SECONDARYCACHE' value='44'/>
+      <enumerator name='ZFS_PROP_USEDSNAP' value='45'/>
+      <enumerator name='ZFS_PROP_USEDDS' value='46'/>
+      <enumerator name='ZFS_PROP_USEDCHILD' value='47'/>
+      <enumerator name='ZFS_PROP_USEDREFRESERV' value='48'/>
+      <enumerator name='ZFS_PROP_USERACCOUNTING' value='49'/>
+      <enumerator name='ZFS_PROP_STMF_SHAREINFO' value='50'/>
+      <enumerator name='ZFS_PROP_DEFER_DESTROY' value='51'/>
+      <enumerator name='ZFS_PROP_USERREFS' value='52'/>
+      <enumerator name='ZFS_PROP_LOGBIAS' value='53'/>
+      <enumerator name='ZFS_PROP_UNIQUE' value='54'/>
+      <enumerator name='ZFS_PROP_OBJSETID' value='55'/>
+      <enumerator name='ZFS_PROP_DEDUP' value='56'/>
+      <enumerator name='ZFS_PROP_MLSLABEL' value='57'/>
+      <enumerator name='ZFS_PROP_SYNC' value='58'/>
+      <enumerator name='ZFS_PROP_DNODESIZE' value='59'/>
+      <enumerator name='ZFS_PROP_REFRATIO' value='60'/>
+      <enumerator name='ZFS_PROP_WRITTEN' value='61'/>
+      <enumerator name='ZFS_PROP_CLONES' value='62'/>
+      <enumerator name='ZFS_PROP_LOGICALUSED' value='63'/>
+      <enumerator name='ZFS_PROP_LOGICALREFERENCED' value='64'/>
+      <enumerator name='ZFS_PROP_INCONSISTENT' value='65'/>
+      <enumerator name='ZFS_PROP_VOLMODE' value='66'/>
+      <enumerator name='ZFS_PROP_FILESYSTEM_LIMIT' value='67'/>
+      <enumerator name='ZFS_PROP_SNAPSHOT_LIMIT' value='68'/>
+      <enumerator name='ZFS_PROP_FILESYSTEM_COUNT' value='69'/>
+      <enumerator name='ZFS_PROP_SNAPSHOT_COUNT' value='70'/>
+      <enumerator name='ZFS_PROP_SNAPDEV' value='71'/>
+      <enumerator name='ZFS_PROP_ACLTYPE' value='72'/>
+      <enumerator name='ZFS_PROP_SELINUX_CONTEXT' value='73'/>
+      <enumerator name='ZFS_PROP_SELINUX_FSCONTEXT' value='74'/>
+      <enumerator name='ZFS_PROP_SELINUX_DEFCONTEXT' value='75'/>
+      <enumerator name='ZFS_PROP_SELINUX_ROOTCONTEXT' value='76'/>
+      <enumerator name='ZFS_PROP_RELATIME' value='77'/>
+      <enumerator name='ZFS_PROP_REDUNDANT_METADATA' value='78'/>
+      <enumerator name='ZFS_PROP_OVERLAY' value='79'/>
+      <enumerator name='ZFS_PROP_PREV_SNAP' value='80'/>
+      <enumerator name='ZFS_PROP_RECEIVE_RESUME_TOKEN' value='81'/>
+      <enumerator name='ZFS_PROP_ENCRYPTION' value='82'/>
+      <enumerator name='ZFS_PROP_KEYLOCATION' value='83'/>
+      <enumerator name='ZFS_PROP_KEYFORMAT' value='84'/>
+      <enumerator name='ZFS_PROP_PBKDF2_SALT' value='85'/>
+      <enumerator name='ZFS_PROP_PBKDF2_ITERS' value='86'/>
+      <enumerator name='ZFS_PROP_ENCRYPTION_ROOT' value='87'/>
+      <enumerator name='ZFS_PROP_KEY_GUID' value='88'/>
+      <enumerator name='ZFS_PROP_KEYSTATUS' value='89'/>
+      <enumerator name='ZFS_PROP_REMAPTXG' value='90'/>
+      <enumerator name='ZFS_PROP_SPECIAL_SMALL_BLOCKS' value='91'/>
+      <enumerator name='ZFS_PROP_IVSET_GUID' value='92'/>
+      <enumerator name='ZFS_PROP_REDACTED' value='93'/>
+      <enumerator name='ZFS_PROP_REDACT_SNAPS' value='94'/>
+      <enumerator name='ZFS_NUM_PROPS' value='95'/>
+    </enum-decl>
+    <typedef-decl name='zfs_prop_t' type-id='4b000d60' id='58603c44'/>
+    <enum-decl name='zprop_source_t' naming-typedef-id='a2256d42' id='5903f80e'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPROP_SRC_NONE' value='1'/>
+      <enumerator name='ZPROP_SRC_DEFAULT' value='2'/>
+      <enumerator name='ZPROP_SRC_TEMPORARY' value='4'/>
+      <enumerator name='ZPROP_SRC_LOCAL' value='8'/>
+      <enumerator name='ZPROP_SRC_INHERITED' value='16'/>
+      <enumerator name='ZPROP_SRC_RECEIVED' value='32'/>
+    </enum-decl>
+    <typedef-decl name='zprop_source_t' type-id='5903f80e' id='a2256d42'/>
+    <class-decl name='nvlist' size-in-bits='192' is-struct='yes' visibility='default' id='ac266fd9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvl_version' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvl_nvflag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvl_priv' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='nvl_flag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='nvl_pad' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvlist_t' type-id='ac266fd9' id='8e8d4be3'/>
+    <enum-decl name='boolean_t' naming-typedef-id='c19b74c3' id='f58c8277'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='B_FALSE' value='0'/>
+      <enumerator name='B_TRUE' value='1'/>
+    </enum-decl>
+    <typedef-decl name='boolean_t' type-id='f58c8277' id='c19b74c3'/>
+    <typedef-decl name='ulong_t' type-id='7359adad' id='ee1f298e'/>
+    <typedef-decl name='longlong_t' type-id='1eb56b1e' id='9b3ff54f'/>
+    <typedef-decl name='diskaddr_t' type-id='9b3ff54f' id='804dc465'/>
+    <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+    <class-decl name='prop_changelist' size-in-bits='448' is-struct='yes' visibility='default' id='d86edc51'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='cl_prop' type-id='58603c44' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='cl_realprop' type-id='58603c44' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='cl_shareprop' type-id='58603c44' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='cl_pool' type-id='de82c773' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='cl_tree' type-id='a5c21a38' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='cl_waslegacy' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='cl_allchildren' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='cl_alldependents' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='cl_mflags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='cl_gflags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='416'>
+        <var-decl name='cl_haszonedchild' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='pthread_mutex_t' size-in-bits='320' naming-typedef-id='7a6844eb' visibility='default' id='70681f9b'>
+      <data-member access='public'>
+        <var-decl name='__data' type-id='4c734837' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='36c46961' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_mutex_t' type-id='70681f9b' id='7a6844eb'/>
+    <typedef-decl name='int32_t' type-id='33f57a65' id='3ff5601b'/>
+    <typedef-decl name='uint8_t' type-id='c51d6389' id='b96825af'/>
+    <typedef-decl name='uint32_t' type-id='62f1140c' id='8f92235e'/>
+    <typedef-decl name='uint64_t' type-id='8910171f' id='9c313c2d'/>
+    <class-decl name='__pthread_mutex_s' size-in-bits='320' is-struct='yes' visibility='default' id='4c734837'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__lock' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='__count' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='__owner' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='__nusers' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='__kind' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='__spins' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='176'>
+        <var-decl name='__elision' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='__list' type-id='518fb49c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__pthread_internal_list' size-in-bits='128' is-struct='yes' visibility='default' id='0e01899c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__prev' type-id='4d98cd5a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='__next' type-id='4d98cd5a' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__pthread_list_t' type-id='0e01899c' id='518fb49c'/>
+    <typedef-decl name='__uint8_t' type-id='002ac4a6' id='c51d6389'/>
+    <typedef-decl name='__int32_t' type-id='95e97e5e' id='33f57a65'/>
+    <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
+    <typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
+    <typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
+    <typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
+    <typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
+    <typedef-decl name='_IO_lock_t' type-id='48b5725f' id='bb4788fa'/>
+    <class-decl name='_IO_FILE' size-in-bits='1728' is-struct='yes' visibility='default' id='ec1ed955'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_IO_read_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_IO_read_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='_IO_read_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='_IO_write_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='_IO_write_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='_IO_write_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='_IO_buf_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='_IO_buf_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='_IO_save_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='_IO_backup_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='_IO_save_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='_markers' type-id='e4c6fa61' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='_chain' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='_fileno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='_flags2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='_old_offset' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='_cur_column' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1040'>
+        <var-decl name='_vtable_offset' type-id='28577a57' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1048'>
+        <var-decl name='_shortbuf' type-id='89feb1ec' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='_lock' type-id='cecf4ea7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='_offset' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='_codecvt' type-id='570f8c59' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1280'>
+        <var-decl name='_wide_data' type-id='c65a1f29' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1344'>
+        <var-decl name='_freeres_list' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1408'>
+        <var-decl name='_freeres_buf' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1472'>
+        <var-decl name='__pad5' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1536'>
+        <var-decl name='_mode' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1568'>
+        <var-decl name='_unused2' type-id='664ac0b7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__re_long_size_t' type-id='7359adad' id='ba516949'/>
+    <typedef-decl name='reg_syntax_t' type-id='7359adad' id='1b72c3b3'/>
+    <class-decl name='re_pattern_buffer' size-in-bits='512' is-struct='yes' visibility='default' id='19fc9a8c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='buffer' type-id='33976309' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='allocated' type-id='ba516949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='used' type-id='ba516949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='syntax' type-id='1b72c3b3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='fastmap' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='translate' type-id='cf536864' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='re_nsub' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='can_be_null' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='449'>
+        <var-decl name='regs_allocated' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='451'>
+        <var-decl name='fastmap_accurate' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='452'>
+        <var-decl name='no_sub' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='453'>
+        <var-decl name='not_bol' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='454'>
+        <var-decl name='not_eol' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='455'>
+        <var-decl name='newline_anchor' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='regex_t' type-id='19fc9a8c' id='aca3bac8'/>
+    <typedef-decl name='uintptr_t' type-id='7359adad' id='e475ab95'/>
+    <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
+    <pointer-type-def type-id='aa12d1ba' size-in-bits='64' id='822cd80b'/>
+    <pointer-type-def type-id='ec1ed955' size-in-bits='64' id='dca988a5'/>
+    <pointer-type-def type-id='a4036571' size-in-bits='64' id='570f8c59'/>
+    <pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
+    <pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
+    <pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
+    <pointer-type-def type-id='0e01899c' size-in-bits='64' id='4d98cd5a'/>
+    <pointer-type-def type-id='428b67b3' size-in-bits='64' id='bf311473'/>
+    <pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
+    <pointer-type-def type-id='26a90f95' size-in-bits='64' id='9b23c9ad'/>
+    <qualified-type-def type-id='a84c031d' const='yes' id='9b45d938'/>
+    <pointer-type-def type-id='9b45d938' size-in-bits='64' id='80f4b756'/>
+    <qualified-type-def type-id='775509eb' const='yes' id='5eadf2db'/>
+    <pointer-type-def type-id='5eadf2db' size-in-bits='64' id='fcd57163'/>
+    <pointer-type-def type-id='96ee24a5' size-in-bits='64' id='585e1de9'/>
+    <pointer-type-def type-id='cb9628fa' size-in-bits='64' id='5571cde4'/>
+    <pointer-type-def type-id='95942d0c' size-in-bits='64' id='b0382bb3'/>
+    <pointer-type-def type-id='8e8d4be3' size-in-bits='64' id='5ce45b60'/>
+    <pointer-type-def type-id='eae6431d' size-in-bits='64' id='0d41d328'/>
+    <pointer-type-def type-id='b48d2441' size-in-bits='64' id='33976309'/>
+    <pointer-type-def type-id='b96825af' size-in-bits='64' id='ae3e8ca6'/>
+    <pointer-type-def type-id='002ac4a6' size-in-bits='64' id='cf536864'/>
+    <pointer-type-def type-id='5d7f5fc8' size-in-bits='64' id='813a2225'/>
+    <pointer-type-def type-id='73a65116' size-in-bits='64' id='2dc35b9d'/>
+    <pointer-type-def type-id='7f84e390' size-in-bits='64' id='de82c773'/>
+    <pointer-type-def type-id='bb7f0973' size-in-bits='64' id='a5c21a38'/>
+    <pointer-type-def type-id='edd8457b' size-in-bits='64' id='5842d146'/>
+    <pointer-type-def type-id='40f93560' size-in-bits='64' id='d502b39f'/>
+    <pointer-type-def type-id='48b5725f' size-in-bits='64' id='eaa32e2f'/>
+    <pointer-type-def type-id='775509eb' size-in-bits='64' id='9200a744'/>
+    <pointer-type-def type-id='a7913f77' size-in-bits='64' id='bf9c30ee'/>
+    <pointer-type-def type-id='b1efc708' size-in-bits='64' id='4c81de99'/>
+    <pointer-type-def type-id='a2256d42' size-in-bits='64' id='debc6aa3'/>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='re_dfa_t' is-struct='yes' visibility='default' is-declaration-only='yes' id='b48d2441'/>
+    <class-decl name='uu_avl' is-struct='yes' visibility='default' is-declaration-only='yes' id='4af029d1'/>
+    <class-decl name='uu_avl_pool' is-struct='yes' visibility='default' is-declaration-only='yes' id='12a530a8'/>
+    <class-decl name='uu_avl_walk' is-struct='yes' visibility='default' is-declaration-only='yes' id='e70a39e3'/>
+    <function-decl name='uu_avl_pool_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='d502b39f'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='de82c773'/>
+    </function-decl>
+    <function-decl name='uu_avl_pool_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='de82c773'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_node_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='2dc35b9d'/>
+      <parameter type-id='de82c773'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='de82c773'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='a5c21a38'/>
+    </function-decl>
+    <function-decl name='uu_avl_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_last' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk_start' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='5842d146'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk_next' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5842d146'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_walk_end' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5842d146'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_find' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='813a2225'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_insert' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5d7f5fc8'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uu_avl_remove' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_get_handle' mangled-name='zfs_get_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_handle'>
+      <parameter type-id='9200a744'/>
+      <return type-id='b0382bb3'/>
+    </function-decl>
+    <function-decl name='zfs_open' mangled-name='zfs_open' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_open'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='zfs_close' mangled-name='zfs_close' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_close'>
+      <parameter type-id='9200a744'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_get_name' mangled-name='zfs_get_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_name'>
+      <parameter type-id='fcd57163'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get' mangled-name='zfs_prop_get' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='58603c44'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='debc6aa3'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_int' mangled-name='zfs_prop_get_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_int'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='58603c44'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zfs_iter_children' mangled-name='zfs_iter_children' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_children'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='d8e49ab9'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_iter_dependents' mangled-name='zfs_iter_dependents' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_dependents'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='d8e49ab9'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_iter_mounted' mangled-name='zfs_iter_mounted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_mounted'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='d8e49ab9'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_refresh_properties' mangled-name='zfs_refresh_properties' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_refresh_properties'>
+      <parameter type-id='9200a744'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_is_mounted' mangled-name='zfs_is_mounted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_is_mounted'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_mount' mangled-name='zfs_mount' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_mount'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unmount' mangled-name='zfs_unmount' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unmount'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_is_shared' mangled-name='zfs_is_shared' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_is_shared'>
+      <parameter type-id='9200a744'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_share_nfs' mangled-name='zfs_share_nfs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_share_nfs'>
+      <parameter type-id='9200a744'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_share_smb' mangled-name='zfs_share_smb' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_share_smb'>
+      <parameter type-id='9200a744'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshare_nfs' mangled-name='zfs_unshare_nfs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshare_nfs'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshare_smb' mangled-name='zfs_unshare_smb' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshare_smb'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_commit_nfs_shares' mangled-name='zfs_commit_nfs_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_commit_nfs_shares'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_commit_smb_shares' mangled-name='zfs_commit_smb_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_commit_smb_shares'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_error' mangled-name='zfs_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_error'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_alloc' mangled-name='zfs_alloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_alloc'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='remove_mountpoint' mangled-name='remove_mountpoint' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='remove_mountpoint'>
+      <parameter type-id='9200a744'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_unshare_proto' mangled-name='zfs_unshare_proto' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshare_proto'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='bf9c30ee'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_commit_proto' mangled-name='zfs_commit_proto' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_commit_proto'>
+      <parameter type-id='bf9c30ee'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='getzoneid' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='4da03624'/>
+    </function-decl>
+    <function-decl name='strlcat' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='strlcpy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='changelist_prefix' mangled-name='changelist_prefix' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_prefix'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='changelist_postfix' mangled-name='changelist_postfix' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_postfix'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='isa_child_of' mangled-name='isa_child_of' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='isa_child_of'>
+      <parameter type-id='80f4b756' name='dataset'/>
+      <parameter type-id='80f4b756' name='parent'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='changelist_rename' mangled-name='changelist_rename' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_rename'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <parameter type-id='80f4b756' name='src'/>
+      <parameter type-id='80f4b756' name='dst'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='changelist_unshare' mangled-name='changelist_unshare' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_unshare'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <parameter type-id='bf9c30ee' name='proto'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='changelist_haszonedchild' mangled-name='changelist_haszonedchild' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_haszonedchild'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='changelist_remove' mangled-name='changelist_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_remove'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='changelist_free' mangled-name='changelist_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_free'>
+      <parameter type-id='0d41d328' name='clp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='changelist_gather' mangled-name='changelist_gather' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='changelist_gather'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='58603c44' name='prop'/>
+      <parameter type-id='95e97e5e' name='gather_flags'/>
+      <parameter type-id='95e97e5e' name='mnt_flags'/>
+      <return type-id='0d41d328'/>
+    </function-decl>
+    <function-decl name='free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strcmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strncmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='96ee24a5'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='add6e811'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='cb9628fa'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_config.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32768' id='d16c6df4'>
+      <subrange length='4096' type-id='7359adad' id='bc1b5ddc'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='65536' id='163f6aa5'>
+      <subrange length='8192' type-id='7359adad' id='c88f397d'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='128' id='c1c22e6c'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='24' id='d3490169'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <type-decl name='variadic parameter type' id='2c1145c5'/>
+    <typedef-decl name='zpool_iter_f' type-id='3aebb66f' id='fa476e62'/>
+    <enum-decl name='data_type_t' naming-typedef-id='8d0687d2' id='aeeae136'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DATA_TYPE_DONTCARE' value='-1'/>
+      <enumerator name='DATA_TYPE_UNKNOWN' value='0'/>
+      <enumerator name='DATA_TYPE_BOOLEAN' value='1'/>
+      <enumerator name='DATA_TYPE_BYTE' value='2'/>
+      <enumerator name='DATA_TYPE_INT16' value='3'/>
+      <enumerator name='DATA_TYPE_UINT16' value='4'/>
+      <enumerator name='DATA_TYPE_INT32' value='5'/>
+      <enumerator name='DATA_TYPE_UINT32' value='6'/>
+      <enumerator name='DATA_TYPE_INT64' value='7'/>
+      <enumerator name='DATA_TYPE_UINT64' value='8'/>
+      <enumerator name='DATA_TYPE_STRING' value='9'/>
+      <enumerator name='DATA_TYPE_BYTE_ARRAY' value='10'/>
+      <enumerator name='DATA_TYPE_INT16_ARRAY' value='11'/>
+      <enumerator name='DATA_TYPE_UINT16_ARRAY' value='12'/>
+      <enumerator name='DATA_TYPE_INT32_ARRAY' value='13'/>
+      <enumerator name='DATA_TYPE_UINT32_ARRAY' value='14'/>
+      <enumerator name='DATA_TYPE_INT64_ARRAY' value='15'/>
+      <enumerator name='DATA_TYPE_UINT64_ARRAY' value='16'/>
+      <enumerator name='DATA_TYPE_STRING_ARRAY' value='17'/>
+      <enumerator name='DATA_TYPE_HRTIME' value='18'/>
+      <enumerator name='DATA_TYPE_NVLIST' value='19'/>
+      <enumerator name='DATA_TYPE_NVLIST_ARRAY' value='20'/>
+      <enumerator name='DATA_TYPE_BOOLEAN_VALUE' value='21'/>
+      <enumerator name='DATA_TYPE_INT8' value='22'/>
+      <enumerator name='DATA_TYPE_UINT8' value='23'/>
+      <enumerator name='DATA_TYPE_BOOLEAN_ARRAY' value='24'/>
+      <enumerator name='DATA_TYPE_INT8_ARRAY' value='25'/>
+      <enumerator name='DATA_TYPE_UINT8_ARRAY' value='26'/>
+      <enumerator name='DATA_TYPE_DOUBLE' value='27'/>
+    </enum-decl>
+    <typedef-decl name='data_type_t' type-id='aeeae136' id='8d0687d2'/>
+    <class-decl name='nvpair' size-in-bits='128' is-struct='yes' visibility='default' id='1c34e459'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvp_size' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvp_name_sz' type-id='23bd8cb5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='48'>
+        <var-decl name='nvp_reserve' type-id='23bd8cb5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvp_value_elem' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='nvp_type' type-id='8d0687d2' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvpair_t' type-id='1c34e459' id='57928edf'/>
+    <class-decl name='drr_begin' size-in-bits='2432' is-struct='yes' visibility='default' id='09fcdc01'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_magic' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_versioninfo' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_creation_time' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_type' type-id='230f1e16' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='drr_flags' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_fromguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_toname' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='zinject_record' size-in-bits='2816' is-struct='yes' visibility='default' id='3216f820'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zi_objset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zi_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zi_start' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='zi_end' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='zi_guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='zi_level' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='zi_error' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='zi_type' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='zi_freq' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='zi_failfast' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='zi_func' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2560'>
+        <var-decl name='zi_iotype' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2592'>
+        <var-decl name='zi_duration' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2624'>
+        <var-decl name='zi_timer' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2688'>
+        <var-decl name='zi_nlanes' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2752'>
+        <var-decl name='zi_cmd' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2784'>
+        <var-decl name='zi_dvas' type-id='8f92235e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zinject_record_t' type-id='3216f820' id='a4301ca6'/>
+    <class-decl name='zfs_share' size-in-bits='256' is-struct='yes' visibility='default' id='feb6f2da'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='z_exportdata' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='z_sharedata' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='z_sharetype' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='z_sharemax' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_share_t' type-id='feb6f2da' id='ee5cec36'/>
+    <class-decl name='zfs_cmd' size-in-bits='109952' is-struct='yes' visibility='default' id='3522cd69'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zc_name' type-id='d16c6df4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32768'>
+        <var-decl name='zc_nvlist_src' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32832'>
+        <var-decl name='zc_nvlist_src_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32896'>
+        <var-decl name='zc_nvlist_dst' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32960'>
+        <var-decl name='zc_nvlist_dst_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33024'>
+        <var-decl name='zc_nvlist_dst_filled' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33056'>
+        <var-decl name='zc_pad2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33088'>
+        <var-decl name='zc_history' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33152'>
+        <var-decl name='zc_value' type-id='163f6aa5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='98688'>
+        <var-decl name='zc_string' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100736'>
+        <var-decl name='zc_guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100800'>
+        <var-decl name='zc_nvlist_conf' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100864'>
+        <var-decl name='zc_nvlist_conf_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100928'>
+        <var-decl name='zc_cookie' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100992'>
+        <var-decl name='zc_objset_type' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101056'>
+        <var-decl name='zc_perm_action' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101120'>
+        <var-decl name='zc_history_len' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101184'>
+        <var-decl name='zc_history_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101248'>
+        <var-decl name='zc_obj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101312'>
+        <var-decl name='zc_iflags' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101376'>
+        <var-decl name='zc_share' type-id='ee5cec36' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101632'>
+        <var-decl name='zc_objset_stats' type-id='b2c14f17' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='103936'>
+        <var-decl name='zc_begin_record' type-id='09fcdc01' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='106368'>
+        <var-decl name='zc_inject_record' type-id='a4301ca6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109184'>
+        <var-decl name='zc_defer_destroy' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109216'>
+        <var-decl name='zc_flags' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109248'>
+        <var-decl name='zc_action_handle' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109312'>
+        <var-decl name='zc_cleanup_fd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109344'>
+        <var-decl name='zc_simple' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109352'>
+        <var-decl name='zc_pad' type-id='d3490169' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109376'>
+        <var-decl name='zc_sendobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109440'>
+        <var-decl name='zc_fromobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109504'>
+        <var-decl name='zc_createtxg' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109568'>
+        <var-decl name='zc_stat' type-id='0371a9c7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109888'>
+        <var-decl name='zc_zoneid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_cmd_t' type-id='3522cd69' id='a5559cdd'/>
+    <class-decl name='zfs_stat' size-in-bits='320' is-struct='yes' visibility='default' id='6417f0b9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zs_gen' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zs_mode' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zs_links' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='zs_ctime' type-id='c1c22e6c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_stat_t' type-id='6417f0b9' id='0371a9c7'/>
+    <typedef-decl name='int16_t' type-id='03896e23' id='23bd8cb5'/>
+    <typedef-decl name='__int16_t' type-id='a2185560' id='03896e23'/>
+    <pointer-type-def type-id='c19b74c3' size-in-bits='64' id='37e3bd22'/>
+    <pointer-type-def type-id='2bce87e3' size-in-bits='64' id='3aebb66f'/>
+    <pointer-type-def type-id='95e97e5e' size-in-bits='64' id='7292109c'/>
+    <pointer-type-def type-id='5ce45b60' size-in-bits='64' id='857bb57e'/>
+    <pointer-type-def type-id='57928edf' size-in-bits='64' id='3fa542f0'/>
+    <pointer-type-def type-id='eaa32e2f' size-in-bits='64' id='63e171df'/>
+    <pointer-type-def type-id='3522cd69' size-in-bits='64' id='b65f7fd1'/>
+    <pointer-type-def type-id='a5559cdd' size-in-bits='64' id='e4ec4540'/>
+    <pointer-type-def type-id='4c81de99' size-in-bits='64' id='237193c9'/>
+    <function-decl name='uu_avl_first' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_next' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='uu_avl_teardown' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a5c21a38'/>
+      <parameter type-id='63e171df'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='zfs_ioctl' mangled-name='zfs_ioctl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_ioctl'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='b65f7fd1'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_strdup' mangled-name='zfs_strdup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_strdup'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='no_memory' mangled-name='no_memory' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='no_memory'>
+      <parameter type-id='b0382bb3'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_standard_error' mangled-name='zfs_standard_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_standard_error'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zcmd_alloc_dst_nvlist' mangled-name='zcmd_alloc_dst_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zcmd_alloc_dst_nvlist'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='e4ec4540'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zcmd_expand_dst_nvlist' mangled-name='zcmd_expand_dst_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zcmd_expand_dst_nvlist'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='e4ec4540'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zcmd_read_dst_nvlist' mangled-name='zcmd_read_dst_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zcmd_read_dst_nvlist'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='e4ec4540'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zcmd_free_nvlists' mangled-name='zcmd_free_nvlists' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zcmd_free_nvlists'>
+      <parameter type-id='e4ec4540'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='make_dataset_handle' mangled-name='make_dataset_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='make_dataset_handle'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='zpool_open_silent' mangled-name='zpool_open_silent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_open_silent'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='237193c9'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_dup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_exists' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='nvlist_next_nvpair' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='3fa542f0'/>
+    </function-decl>
+    <function-decl name='nvpair_name' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='nvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libspl_assertf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='namespace_clear' mangled-name='namespace_clear' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='namespace_clear'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_get_config' mangled-name='zpool_get_config' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_config'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='857bb57e' name='oldconfig'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zpool_get_features' mangled-name='zpool_get_features' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_features'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zpool_refresh_stats' mangled-name='zpool_refresh_stats' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_refresh_stats'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='37e3bd22' name='missing'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_skip_pool' mangled-name='zpool_skip_pool' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_skip_pool'>
+      <parameter type-id='80f4b756' name='poolname'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zpool_iter' mangled-name='zpool_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_iter'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='fa476e62' name='func'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_iter_root' mangled-name='zfs_iter_root' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_root'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='d8e49ab9' name='func'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__errno_location' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='7292109c'/>
+    </function-decl>
+    <function-decl name='dcgettext' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='getenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strcpy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='2bce87e3'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_crypto.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='fb7c6451' size-in-bits='256' id='64177143'>
+      <subrange length='32' type-id='7359adad' id='ae5bde82'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='95e97e5e' size-in-bits='896' id='47394ee0'>
+      <subrange length='28' type-id='7359adad' id='3db583d7'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='7359adad' size-in-bits='1024' id='d2baa450'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <enum-decl name='zpool_prop_t' naming-typedef-id='5d0c23fb' id='af1ba157'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPOOL_PROP_INVAL' value='-1'/>
+      <enumerator name='ZPOOL_PROP_NAME' value='0'/>
+      <enumerator name='ZPOOL_PROP_SIZE' value='1'/>
+      <enumerator name='ZPOOL_PROP_CAPACITY' value='2'/>
+      <enumerator name='ZPOOL_PROP_ALTROOT' value='3'/>
+      <enumerator name='ZPOOL_PROP_HEALTH' value='4'/>
+      <enumerator name='ZPOOL_PROP_GUID' value='5'/>
+      <enumerator name='ZPOOL_PROP_VERSION' value='6'/>
+      <enumerator name='ZPOOL_PROP_BOOTFS' value='7'/>
+      <enumerator name='ZPOOL_PROP_DELEGATION' value='8'/>
+      <enumerator name='ZPOOL_PROP_AUTOREPLACE' value='9'/>
+      <enumerator name='ZPOOL_PROP_CACHEFILE' value='10'/>
+      <enumerator name='ZPOOL_PROP_FAILUREMODE' value='11'/>
+      <enumerator name='ZPOOL_PROP_LISTSNAPS' value='12'/>
+      <enumerator name='ZPOOL_PROP_AUTOEXPAND' value='13'/>
+      <enumerator name='ZPOOL_PROP_DEDUPDITTO' value='14'/>
+      <enumerator name='ZPOOL_PROP_DEDUPRATIO' value='15'/>
+      <enumerator name='ZPOOL_PROP_FREE' value='16'/>
+      <enumerator name='ZPOOL_PROP_ALLOCATED' value='17'/>
+      <enumerator name='ZPOOL_PROP_READONLY' value='18'/>
+      <enumerator name='ZPOOL_PROP_ASHIFT' value='19'/>
+      <enumerator name='ZPOOL_PROP_COMMENT' value='20'/>
+      <enumerator name='ZPOOL_PROP_EXPANDSZ' value='21'/>
+      <enumerator name='ZPOOL_PROP_FREEING' value='22'/>
+      <enumerator name='ZPOOL_PROP_FRAGMENTATION' value='23'/>
+      <enumerator name='ZPOOL_PROP_LEAKED' value='24'/>
+      <enumerator name='ZPOOL_PROP_MAXBLOCKSIZE' value='25'/>
+      <enumerator name='ZPOOL_PROP_TNAME' value='26'/>
+      <enumerator name='ZPOOL_PROP_MAXDNODESIZE' value='27'/>
+      <enumerator name='ZPOOL_PROP_MULTIHOST' value='28'/>
+      <enumerator name='ZPOOL_PROP_CHECKPOINT' value='29'/>
+      <enumerator name='ZPOOL_PROP_LOAD_GUID' value='30'/>
+      <enumerator name='ZPOOL_PROP_AUTOTRIM' value='31'/>
+      <enumerator name='ZPOOL_PROP_COMPATIBILITY' value='32'/>
+      <enumerator name='ZPOOL_NUM_PROPS' value='33'/>
+    </enum-decl>
+    <typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
+    <typedef-decl name='uint_t' type-id='f0981eeb' id='3502e3ff'/>
+    <class-decl name='sigaction' size-in-bits='1216' is-struct='yes' visibility='default' id='fe391c48'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__sigaction_handler' type-id='ac5ab598' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='sa_mask' type-id='b9c97942' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='sa_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='sa_restorer' type-id='953b12f8' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__' size-in-bits='64' is-anonymous='yes' visibility='default' id='ac5ab598'>
+      <data-member access='public'>
+        <var-decl name='sa_handler' type-id='8cdd9566' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='sa_sigaction' type-id='6e756877' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='termios' size-in-bits='480' is-struct='yes' visibility='default' id='ad55d2bc'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='c_iflag' type-id='241ce6f8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='c_oflag' type-id='241ce6f8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='c_cflag' type-id='241ce6f8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='c_lflag' type-id='241ce6f8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='c_line' type-id='fb7c6451' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='136'>
+        <var-decl name='c_cc' type-id='64177143' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='416'>
+        <var-decl name='c_ispeed' type-id='6a8e8a14' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='c_ospeed' type-id='6a8e8a14' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='cc_t' type-id='002ac4a6' id='fb7c6451'/>
+    <typedef-decl name='speed_t' type-id='f0981eeb' id='6a8e8a14'/>
+    <typedef-decl name='tcflag_t' type-id='f0981eeb' id='241ce6f8'/>
+    <typedef-decl name='__uid_t' type-id='f0981eeb' id='cc5fcceb'/>
+    <typedef-decl name='__pid_t' type-id='95e97e5e' id='3629bad8'/>
+    <typedef-decl name='__clock_t' type-id='bd54fe1a' id='4d66c6d7'/>
+    <typedef-decl name='__ssize_t' type-id='bd54fe1a' id='41060289'/>
+    <class-decl name='__sigset_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='b9c97942' visibility='default' id='2616147f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__val' type-id='d2baa450' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__sigset_t' type-id='2616147f' id='b9c97942'/>
+    <union-decl name='sigval' size-in-bits='64' visibility='default' id='a094b870'>
+      <data-member access='public'>
+        <var-decl name='sival_int' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='sival_ptr' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='__sigval_t' type-id='a094b870' id='eabacd01'/>
+    <class-decl name='siginfo_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='cb681f62' visibility='default' id='d8149419'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_signo' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='si_errno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='si_code' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='__pad0' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_sifields' type-id='ac5ab599' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__1' size-in-bits='896' is-anonymous='yes' visibility='default' id='ac5ab599'>
+      <data-member access='public'>
+        <var-decl name='_pad' type-id='47394ee0' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_kill' type-id='e7f43f72' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_timer' type-id='e7f43f73' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_rt' type-id='e7f43f74' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_sigchld' type-id='e7f43f75' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_sigfault' type-id='e7f43f76' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_sigpoll' type-id='e7f43f77' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_sigsys' type-id='e7f43f78' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='__anonymous_struct__1' size-in-bits='64' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_pid' type-id='3629bad8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='si_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__2' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f73'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_tid' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='si_overrun' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='si_sigval' type-id='eabacd01' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__3' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f74'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_pid' type-id='3629bad8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='si_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='si_sigval' type-id='eabacd01' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__4' size-in-bits='256' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f75'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_pid' type-id='3629bad8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='si_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='si_status' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='si_utime' type-id='4d66c6d7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='si_stime' type-id='4d66c6d7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__5' size-in-bits='256' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f76'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_addr' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='si_addr_lsb' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_bounds' type-id='ac5ab59a' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__2' size-in-bits='128' is-anonymous='yes' visibility='default' id='ac5ab59a'>
+      <data-member access='public'>
+        <var-decl name='_addr_bnd' type-id='e7f43f79' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_pkey' type-id='62f1140c' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='__anonymous_struct__6' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f79'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_lower' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_upper' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__7' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f77'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='si_band' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='si_fd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__anonymous_struct__8' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f78'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_call_addr' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_syscall' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='_arch' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='siginfo_t' type-id='d8149419' id='cb681f62'/>
+    <typedef-decl name='sigset_t' type-id='b9c97942' id='daf33c64'/>
+    <typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
+    <class-decl name='regmatch_t' size-in-bits='64' is-struct='yes' naming-typedef-id='1b941664' visibility='default' id='4f932615'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='rm_so' type-id='54a2a2a8' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='rm_eo' type-id='54a2a2a8' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='regmatch_t' type-id='4f932615' id='1b941664'/>
+    <typedef-decl name='__sighandler_t' type-id='03347643' id='8cdd9566'/>
+    <typedef-decl name='ssize_t' type-id='41060289' id='79a0948f'/>
+    <qualified-type-def type-id='822cd80b' restrict='yes' id='e75a27e9'/>
+    <qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/>
+    <qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/>
+    <qualified-type-def type-id='aca3bac8' const='yes' id='2498fd78'/>
+    <pointer-type-def type-id='2498fd78' size-in-bits='64' id='eed6c816'/>
+    <qualified-type-def type-id='eed6c816' restrict='yes' id='a431a9da'/>
+    <qualified-type-def type-id='fe391c48' const='yes' id='14a93b33'/>
+    <pointer-type-def type-id='14a93b33' size-in-bits='64' id='9f68085b'/>
+    <qualified-type-def type-id='9f68085b' restrict='yes' id='e2a5e6f9'/>
+    <qualified-type-def type-id='ad55d2bc' const='yes' id='a46bf13f'/>
+    <pointer-type-def type-id='a46bf13f' size-in-bits='64' id='eaec840f'/>
+    <qualified-type-def type-id='002ac4a6' const='yes' id='ea86de29'/>
+    <pointer-type-def type-id='ea86de29' size-in-bits='64' id='354f7eb9'/>
+    <qualified-type-def type-id='8efea9e5' const='yes' id='3beb2af4'/>
+    <pointer-type-def type-id='3beb2af4' size-in-bits='64' id='31347b7a'/>
+    <pointer-type-def type-id='31347b7a' size-in-bits='64' id='c59e1ef0'/>
+    <pointer-type-def type-id='1b941664' size-in-bits='64' id='7e2979d5'/>
+    <qualified-type-def type-id='7e2979d5' restrict='yes' id='fc212857'/>
+    <pointer-type-def type-id='fe391c48' size-in-bits='64' id='568dd84e'/>
+    <qualified-type-def type-id='568dd84e' restrict='yes' id='3d8ee6f2'/>
+    <pointer-type-def type-id='cb681f62' size-in-bits='64' id='185869c1'/>
+    <pointer-type-def type-id='daf33c64' size-in-bits='64' id='9e80f729'/>
+    <pointer-type-def type-id='b59d7dce' size-in-bits='64' id='78c01427'/>
+    <qualified-type-def type-id='78c01427' restrict='yes' id='d19b2c25'/>
+    <pointer-type-def type-id='ad55d2bc' size-in-bits='64' id='665a4eda'/>
+    <pointer-type-def type-id='9c313c2d' size-in-bits='64' id='5d6479ae'/>
+    <pointer-type-def type-id='ae3e8ca6' size-in-bits='64' id='d8774064'/>
+    <pointer-type-def type-id='3502e3ff' size-in-bits='64' id='4dd26a40'/>
+    <pointer-type-def type-id='ee076206' size-in-bits='64' id='953b12f8'/>
+    <pointer-type-def type-id='f712e2b7' size-in-bits='64' id='03347643'/>
+    <pointer-type-def type-id='ef70d893' size-in-bits='64' id='6e756877'/>
+    <qualified-type-def type-id='eaa32e2f' restrict='yes' id='1b7446cd'/>
+    <function-decl name='zpool_get_prop_int' mangled-name='zpool_get_prop_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_prop_int'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='5d0c23fb'/>
+      <parameter type-id='debc6aa3'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zfs_handle_dup' mangled-name='zfs_handle_dup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_handle_dup'>
+      <parameter type-id='9200a744'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='zfs_valid_proplist' mangled-name='zfs_valid_proplist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valid_proplist'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='2e45de5d'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_prop_to_name' mangled-name='zfs_prop_to_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_to_name'>
+      <parameter type-id='58603c44'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_iter_filesystems' mangled-name='zfs_iter_filesystems' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_filesystems'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='d8e49ab9'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_parent_name' mangled-name='zfs_parent_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_parent_name'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_load_key' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_unload_key' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_change_key' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_error_aux' mangled-name='zfs_error_aux' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_error_aux'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_name_to_prop' mangled-name='zfs_name_to_prop' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_name_to_prop'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='58603c44'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fnvlist_alloc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_get_encryption_root' mangled-name='zfs_crypto_get_encryption_root' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_get_encryption_root'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='37e3bd22' name='is_encroot'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_create' mangled-name='zfs_crypto_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_create'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='parent_name'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='5ce45b60' name='pool_props'/>
+      <parameter type-id='c19b74c3' name='stdin_available'/>
+      <parameter type-id='d8774064' name='wkeydata_out'/>
+      <parameter type-id='4dd26a40' name='wkeylen_out'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_clone_check' mangled-name='zfs_crypto_clone_check' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_clone_check'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='9200a744' name='origin_zhp'/>
+      <parameter type-id='26a90f95' name='parent_name'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_attempt_load_keys' mangled-name='zfs_crypto_attempt_load_keys' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_attempt_load_keys'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='fsname'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_load_key' mangled-name='zfs_crypto_load_key' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_load_key'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='c19b74c3' name='noop'/>
+      <parameter type-id='26a90f95' name='alt_keylocation'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_unload_key' mangled-name='zfs_crypto_unload_key' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_unload_key'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_crypto_rewrap' mangled-name='zfs_crypto_rewrap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_crypto_rewrap'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='5ce45b60' name='raw_props'/>
+      <parameter type-id='c19b74c3' name='inheritkey'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__ctype_b_loc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='c59e1ef0'/>
+    </function-decl>
+    <function-decl name='dlopen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='dlsym' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='1b7446cd'/>
+      <parameter type-id='9d26089a'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='dlerror' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='PKCS5_PBKDF2_HMAC_SHA1' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='354f7eb9'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='cf536864'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='regexec' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a431a9da'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='fc212857'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='kill' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3629bad8'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sigemptyset' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9e80f729'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sigaction' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='e2a5e6f9'/>
+      <parameter type-id='3d8ee6f2'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fclose' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fflush' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fdopen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='822cd80b'/>
+    </function-decl>
+    <function-decl name='printf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='snprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='asprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fputc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__getdelim' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='d19b2c25'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='e75a27e9'/>
+      <return type-id='41060289'/>
+    </function-decl>
+    <function-decl name='fread' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='1b7446cd'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='e75a27e9'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='rewind' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='ferror' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fileno' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='malloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='calloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='memcpy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='strdup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strerror' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='tcgetattr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='665a4eda'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='tcsetattr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaec840f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='close' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='read' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='getpid' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='3629bad8'/>
+    </function-decl>
+    <function-decl name='isatty' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='unlink' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='ee076206'>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='f712e2b7'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+    <function-type size-in-bits='64' id='ef70d893'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='185869c1'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_dataset.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32' id='8e0573fd'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <class-decl name='zprop_list' size-in-bits='448' is-struct='yes' visibility='default' id='bd9b4291'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='pl_prop' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='pl_user_prop' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='pl_next' type-id='9f1a1109' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='pl_all' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='pl_width' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='pl_recvd_width' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='pl_fixed' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zprop_list_t' type-id='bd9b4291' id='bdb8ac4f'/>
+    <class-decl name='renameflags' size-in-bits='32' is-struct='yes' visibility='default' id='7aee5792'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='recursive' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1'>
+        <var-decl name='nounmount' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2'>
+        <var-decl name='forceunmount' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='renameflags_t' type-id='7aee5792' id='067170c2'/>
+    <typedef-decl name='zfs_userspace_cb_t' type-id='ca64ff60' id='16c5f410'/>
+    <enum-decl name='lzc_dataset_type' id='bc9887f1'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='LZC_DATSET_TYPE_ZFS' value='2'/>
+      <enumerator name='LZC_DATSET_TYPE_ZVOL' value='3'/>
+    </enum-decl>
+    <typedef-decl name='avl_index_t' type-id='e475ab95' id='fba6cb51'/>
+    <enum-decl name='zfs_userquota_prop_t' naming-typedef-id='279fde6a' id='5258d2f6'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_PROP_USERUSED' value='0'/>
+      <enumerator name='ZFS_PROP_USERQUOTA' value='1'/>
+      <enumerator name='ZFS_PROP_GROUPUSED' value='2'/>
+      <enumerator name='ZFS_PROP_GROUPQUOTA' value='3'/>
+      <enumerator name='ZFS_PROP_USEROBJUSED' value='4'/>
+      <enumerator name='ZFS_PROP_USEROBJQUOTA' value='5'/>
+      <enumerator name='ZFS_PROP_GROUPOBJUSED' value='6'/>
+      <enumerator name='ZFS_PROP_GROUPOBJQUOTA' value='7'/>
+      <enumerator name='ZFS_PROP_PROJECTUSED' value='8'/>
+      <enumerator name='ZFS_PROP_PROJECTQUOTA' value='9'/>
+      <enumerator name='ZFS_PROP_PROJECTOBJUSED' value='10'/>
+      <enumerator name='ZFS_PROP_PROJECTOBJQUOTA' value='11'/>
+      <enumerator name='ZFS_NUM_USERQUOTA_PROPS' value='12'/>
+    </enum-decl>
+    <typedef-decl name='zfs_userquota_prop_t' type-id='5258d2f6' id='279fde6a'/>
+    <enum-decl name='zfs_wait_activity_t' naming-typedef-id='3024501a' id='527d5dc6'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_WAIT_DELETEQ' value='0'/>
+      <enumerator name='ZFS_WAIT_NUM_ACTIVITIES' value='1'/>
+    </enum-decl>
+    <typedef-decl name='zfs_wait_activity_t' type-id='527d5dc6' id='3024501a'/>
+    <enum-decl name='namecheck_err_t' naming-typedef-id='8e0af06e' id='f43bbcda'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='NAME_ERR_LEADING_SLASH' value='0'/>
+      <enumerator name='NAME_ERR_EMPTY_COMPONENT' value='1'/>
+      <enumerator name='NAME_ERR_TRAILING_SLASH' value='2'/>
+      <enumerator name='NAME_ERR_INVALCHAR' value='3'/>
+      <enumerator name='NAME_ERR_MULTIPLE_DELIMITERS' value='4'/>
+      <enumerator name='NAME_ERR_NOLETTER' value='5'/>
+      <enumerator name='NAME_ERR_RESERVED' value='6'/>
+      <enumerator name='NAME_ERR_DISKLIKE' value='7'/>
+      <enumerator name='NAME_ERR_TOOLONG' value='8'/>
+      <enumerator name='NAME_ERR_SELF_REF' value='9'/>
+      <enumerator name='NAME_ERR_PARENT_REF' value='10'/>
+      <enumerator name='NAME_ERR_NO_AT' value='11'/>
+      <enumerator name='NAME_ERR_NO_POUND' value='12'/>
+    </enum-decl>
+    <typedef-decl name='namecheck_err_t' type-id='f43bbcda' id='8e0af06e'/>
+    <enum-decl name='zprop_type_t' naming-typedef-id='31429eff' id='87676253'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='PROP_TYPE_NUMBER' value='0'/>
+      <enumerator name='PROP_TYPE_STRING' value='1'/>
+      <enumerator name='PROP_TYPE_INDEX' value='2'/>
+    </enum-decl>
+    <typedef-decl name='zprop_type_t' type-id='87676253' id='31429eff'/>
+    <class-decl name='mnttab' size-in-bits='256' is-struct='yes' visibility='default' id='1b055409'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_mountp' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_fstype' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='pthread_mutexattr_t' size-in-bits='32' naming-typedef-id='8afd6070' visibility='default' id='7300eb00'>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='8e0573fd' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_mutexattr_t' type-id='7300eb00' id='8afd6070'/>
+    <typedef-decl name='int64_t' type-id='0c9942d2' id='9da381c4'/>
+    <typedef-decl name='__int64_t' type-id='bd54fe1a' id='0c9942d2'/>
+    <typedef-decl name='__gid_t' type-id='f0981eeb' id='d94ec6d9'/>
+    <typedef-decl name='__time_t' type-id='bd54fe1a' id='65eda9c0'/>
+    <class-decl name='tm' size-in-bits='448' is-struct='yes' visibility='default' id='dddf6ca2'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tm_sec' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='tm_min' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tm_hour' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='tm_mday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='tm_mon' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='tm_year' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='tm_wday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='tm_yday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='tm_isdst' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='tm_gmtoff' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='tm_zone' type-id='80f4b756' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='time_t' type-id='65eda9c0' id='c9d12d66'/>
+    <class-decl name='group' size-in-bits='256' is-struct='yes' visibility='default' id='01a1b934'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='gr_name' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='gr_passwd' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='gr_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='gr_mem' type-id='9b23c9ad' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='mntent' size-in-bits='320' is-struct='yes' visibility='default' id='56fe4a37'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_fsname' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_dir' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_type' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_opts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='mnt_freq' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='mnt_passno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='passwd' size-in-bits='384' is-struct='yes' visibility='default' id='a63d15a3'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='pw_name' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='pw_passwd' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='pw_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='pw_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='pw_gecos' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='pw_dir' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='pw_shell' type-id='26a90f95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='uid_t' type-id='cc5fcceb' id='354978ed'/>
+    <pointer-type-def type-id='fba6cb51' size-in-bits='64' id='32adbf30'/>
+    <pointer-type-def type-id='f20fbd51' size-in-bits='64' id='a3681dea'/>
+    <qualified-type-def type-id='26a90f95' restrict='yes' id='266fe297'/>
+    <pointer-type-def type-id='80f4b756' size-in-bits='64' id='7d3cd834'/>
+    <qualified-type-def type-id='56fe4a37' const='yes' id='a75125ce'/>
+    <pointer-type-def type-id='a75125ce' size-in-bits='64' id='48bea5ec'/>
+    <qualified-type-def type-id='8afd6070' const='yes' id='1d853360'/>
+    <pointer-type-def type-id='1d853360' size-in-bits='64' id='c2afbd7e'/>
+    <qualified-type-def type-id='c9d12d66' const='yes' id='588b3216'/>
+    <pointer-type-def type-id='588b3216' size-in-bits='64' id='9f201474'/>
+    <qualified-type-def type-id='9f201474' restrict='yes' id='d6e2847c'/>
+    <qualified-type-def type-id='dddf6ca2' const='yes' id='e824a34f'/>
+    <pointer-type-def type-id='e824a34f' size-in-bits='64' id='d6ad37ff'/>
+    <qualified-type-def type-id='d6ad37ff' restrict='yes' id='f8c6051d'/>
+    <pointer-type-def type-id='01a1b934' size-in-bits='64' id='566b3f52'/>
+    <pointer-type-def type-id='7e291ce6' size-in-bits='64' id='ca64ff60'/>
+    <pointer-type-def type-id='9da381c4' size-in-bits='64' id='cb785ebf'/>
+    <pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/>
+    <pointer-type-def type-id='8e0af06e' size-in-bits='64' id='053457bd'/>
+    <pointer-type-def type-id='857bb57e' size-in-bits='64' id='75be733c'/>
+    <pointer-type-def type-id='a63d15a3' size-in-bits='64' id='a195f4a3'/>
+    <pointer-type-def type-id='7a6844eb' size-in-bits='64' id='18c91f9e'/>
+    <pointer-type-def type-id='dddf6ca2' size-in-bits='64' id='d915a820'/>
+    <qualified-type-def type-id='d915a820' restrict='yes' id='f099ad08'/>
+    <pointer-type-def type-id='5d6479ae' size-in-bits='64' id='892b4acc'/>
+    <pointer-type-def type-id='bd9b4291' size-in-bits='64' id='9f1a1109'/>
+    <pointer-type-def type-id='bdb8ac4f' size-in-bits='64' id='3a9b2288'/>
+    <pointer-type-def type-id='3a9b2288' size-in-bits='64' id='e4378506'/>
+    <function-decl name='zpool_open' mangled-name='zpool_open' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_open'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='4c81de99'/>
+    </function-decl>
+    <function-decl name='zpool_open_canfail' mangled-name='zpool_open_canfail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_open_canfail'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='4c81de99'/>
+    </function-decl>
+    <function-decl name='zpool_close' mangled-name='zpool_close' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_close'>
+      <parameter type-id='4c81de99'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_get_name' mangled-name='zpool_get_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_name'>
+      <parameter type-id='4c81de99'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_get_prop' mangled-name='zpool_get_prop' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_prop'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='5d0c23fb'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='debc6aa3'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_default_string' mangled-name='zfs_prop_default_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_default_string'>
+      <parameter type-id='58603c44'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_prop_default_numeric' mangled-name='zfs_prop_default_numeric' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_default_numeric'>
+      <parameter type-id='58603c44'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zpool_prop_get_feature' mangled-name='zpool_prop_get_feature' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_get_feature'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_iter_snapshots' mangled-name='zfs_iter_snapshots' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_snapshots'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='d8e49ab9'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_iter_bookmarks' mangled-name='zfs_iter_bookmarks' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_bookmarks'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='d8e49ab9'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_share' mangled-name='zfs_share' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_share'>
+      <parameter type-id='9200a744'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_commit_all_shares' mangled-name='zfs_commit_all_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_commit_all_shares'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_nicestrtonum' mangled-name='zfs_nicestrtonum' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_nicestrtonum'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_snapshot' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='bc9887f1'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_clone' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_promote' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_destroy_snaps' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_bookmarks' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_destroy_bookmarks' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_hold' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_release' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_holds' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_exists' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='lzc_rollback_to' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_channel_program_nosync' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_wait_fs' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='3024501a'/>
+      <parameter type-id='37e3bd22'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_error_fmt' mangled-name='zfs_error_fmt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_error_fmt'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_standard_error_fmt' mangled-name='zfs_standard_error_fmt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_standard_error_fmt'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_setprop_error' mangled-name='zfs_setprop_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setprop_error'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='58603c44'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_parse_value' mangled-name='zprop_parse_value' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_parse_value'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='2e45de5d'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zprop_expand_list' mangled-name='zprop_expand_list' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_expand_list'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='e4378506'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zcmd_write_src_nvlist' mangled-name='zcmd_write_src_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zcmd_write_src_nvlist'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='e4ec4540'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_name_valid' mangled-name='zpool_name_valid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_name_valid'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_parse_options' mangled-name='zfs_parse_options' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_parse_options'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='a7913f77'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_nicebytes' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_nicenum' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='585e1de9'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_find' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='32adbf30'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_add' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_remove' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_numnodes' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='avl_destroy_nodes' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='63e171df'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_prop_readonly' mangled-name='zfs_prop_readonly' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_readonly'>
+      <parameter type-id='58603c44'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_inheritable' mangled-name='zfs_prop_inheritable' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_inheritable'>
+      <parameter type-id='58603c44'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_setonce' mangled-name='zfs_prop_setonce' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_setonce'>
+      <parameter type-id='58603c44'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_encryption_key_param' mangled-name='zfs_prop_encryption_key_param' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_encryption_key_param'>
+      <parameter type-id='58603c44'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_valid_keylocation' mangled-name='zfs_prop_valid_keylocation' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_valid_keylocation'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_user' mangled-name='zfs_prop_user' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_user'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_userquota' mangled-name='zfs_prop_userquota' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_userquota'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_written' mangled-name='zfs_prop_written' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_written'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_prop_index_to_string' mangled-name='zfs_prop_index_to_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_index_to_string'>
+      <parameter type-id='58603c44'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='7d3cd834'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_valid_for_type' mangled-name='zfs_prop_valid_for_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_valid_for_type'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='2e45de5d'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='nvlist_alloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_size' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='78c01427'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_pack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='78c01427'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_unpack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_boolean' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8d0687d2'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove_all' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_int64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='cb785ebf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='892b4acc'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='75be733c'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_empty' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='nvpair_type' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='8d0687d2'/>
+    </function-decl>
+    <function-decl name='nvpair_value_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fnvlist_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_int32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='3ff5601b'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='entity_namecheck' mangled-name='entity_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='entity_namecheck'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='053457bd'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='dataset_nestcheck' mangled-name='dataset_nestcheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='dataset_nestcheck'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='mountpoint_namecheck' mangled-name='mountpoint_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='mountpoint_namecheck'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='053457bd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_type' mangled-name='zfs_prop_get_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_type'>
+      <parameter type-id='58603c44'/>
+      <return type-id='31429eff'/>
+    </function-decl>
+    <function-decl name='getmntany' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <parameter type-id='9d424d31'/>
+      <parameter type-id='9d424d31'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='_sol_getmntent' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <parameter type-id='9d424d31'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_type_to_name' mangled-name='zfs_type_to_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_type_to_name'>
+      <parameter type-id='2e45de5d' name='type'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_validate_name' mangled-name='zfs_validate_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_validate_name'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='95e97e5e' name='type'/>
+      <parameter type-id='c19b74c3' name='modifying'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_name_valid' mangled-name='zfs_name_valid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_name_valid'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='2e45de5d' name='type'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_free_handles' mangled-name='zpool_free_handles' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_free_handles'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='make_dataset_handle_zc' mangled-name='make_dataset_handle_zc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='make_dataset_handle_zc'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='e4ec4540' name='zc'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='make_dataset_simple_handle_zc' mangled-name='make_dataset_simple_handle_zc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='make_dataset_simple_handle_zc'>
+      <parameter type-id='9200a744' name='pzhp'/>
+      <parameter type-id='e4ec4540' name='zc'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='zfs_bookmark_exists' mangled-name='zfs_bookmark_exists' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_bookmark_exists'>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='make_bookmark_handle' mangled-name='make_bookmark_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='make_bookmark_handle'>
+      <parameter type-id='9200a744' name='parent'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='5ce45b60' name='bmark_props'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='libzfs_mnttab_init' mangled-name='libzfs_mnttab_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_mnttab_init'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_mnttab_fini' mangled-name='libzfs_mnttab_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_mnttab_fini'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_mnttab_cache' mangled-name='libzfs_mnttab_cache' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_mnttab_cache'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='c19b74c3' name='enable'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_mnttab_find' mangled-name='libzfs_mnttab_find' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_mnttab_find'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='9d424d31' name='entry'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_mnttab_add' mangled-name='libzfs_mnttab_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_mnttab_add'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='special'/>
+      <parameter type-id='80f4b756' name='mountp'/>
+      <parameter type-id='80f4b756' name='mntopts'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_mnttab_remove' mangled-name='libzfs_mnttab_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_mnttab_remove'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_spa_version' mangled-name='zfs_spa_version' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_spa_version'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='7292109c' name='spa_version'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_set' mangled-name='zfs_prop_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_set'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='80f4b756' name='propval'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_set_list' mangled-name='zfs_prop_set_list' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_set_list'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_inherit' mangled-name='zfs_prop_inherit' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_inherit'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='c19b74c3' name='received'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='getprop_uint64' mangled-name='getprop_uint64' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getprop_uint64'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='58603c44' name='prop'/>
+      <parameter type-id='9b23c9ad' name='source'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_recvd' mangled-name='zfs_prop_get_recvd' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_recvd'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='26a90f95' name='propbuf'/>
+      <parameter type-id='b59d7dce' name='proplen'/>
+      <parameter type-id='c19b74c3' name='literal'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_get_clones_nvl' mangled-name='zfs_get_clones_nvl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_clones_nvl'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_numeric' mangled-name='zfs_prop_get_numeric' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_numeric'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='58603c44' name='prop'/>
+      <parameter type-id='5d6479ae' name='value'/>
+      <parameter type-id='debc6aa3' name='src'/>
+      <parameter type-id='26a90f95' name='statbuf'/>
+      <parameter type-id='b59d7dce' name='statlen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_userquota_int' mangled-name='zfs_prop_get_userquota_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_userquota_int'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='5d6479ae' name='propvalue'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_userquota' mangled-name='zfs_prop_get_userquota' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_userquota'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='26a90f95' name='propbuf'/>
+      <parameter type-id='95e97e5e' name='proplen'/>
+      <parameter type-id='c19b74c3' name='literal'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_written_int' mangled-name='zfs_prop_get_written_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_written_int'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='5d6479ae' name='propvalue'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_written' mangled-name='zfs_prop_get_written' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_written'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='26a90f95' name='propbuf'/>
+      <parameter type-id='95e97e5e' name='proplen'/>
+      <parameter type-id='c19b74c3' name='literal'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_get_pool_name' mangled-name='zfs_get_pool_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_pool_name'>
+      <parameter type-id='fcd57163' name='zhp'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zfs_get_type' mangled-name='zfs_get_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_type'>
+      <parameter type-id='fcd57163' name='zhp'/>
+      <return type-id='2e45de5d'/>
+    </function-decl>
+    <function-decl name='zfs_dataset_exists' mangled-name='zfs_dataset_exists' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dataset_exists'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='2e45de5d' name='types'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='create_parents' mangled-name='create_parents' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='create_parents'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='target'/>
+      <parameter type-id='95e97e5e' name='prefixlen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_create_ancestors' mangled-name='zfs_create_ancestors' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_create_ancestors'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_create' mangled-name='zfs_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_create'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='2e45de5d' name='type'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_destroy' mangled-name='zfs_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_destroy'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='c19b74c3' name='defer'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_destroy_snaps' mangled-name='zfs_destroy_snaps' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_destroy_snaps'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='26a90f95' name='snapname'/>
+      <parameter type-id='c19b74c3' name='defer'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_destroy_snaps_nvl' mangled-name='zfs_destroy_snaps_nvl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_destroy_snaps_nvl'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='5ce45b60' name='snaps'/>
+      <parameter type-id='c19b74c3' name='defer'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_clone' mangled-name='zfs_clone' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_clone'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='target'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_promote' mangled-name='zfs_promote' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_promote'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_snapshot_nvl' mangled-name='zfs_snapshot_nvl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_snapshot_nvl'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='5ce45b60' name='snaps'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_snapshot' mangled-name='zfs_snapshot' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_snapshot'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='c19b74c3' name='recursive'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_rollback' mangled-name='zfs_rollback' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_rollback'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='9200a744' name='snap'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_rename' mangled-name='zfs_rename' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_rename'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='target'/>
+      <parameter type-id='067170c2' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_get_all_props' mangled-name='zfs_get_all_props' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_all_props'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_get_recvd_props' mangled-name='zfs_get_recvd_props' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_recvd_props'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_get_user_props' mangled-name='zfs_get_user_props' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_user_props'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_expand_proplist' mangled-name='zfs_expand_proplist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_expand_proplist'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='e4378506' name='plp'/>
+      <parameter type-id='c19b74c3' name='received'/>
+      <parameter type-id='c19b74c3' name='literal'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_prune_proplist' mangled-name='zfs_prune_proplist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prune_proplist'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='ae3e8ca6' name='props'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_smb_acl_add' mangled-name='zfs_smb_acl_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_smb_acl_add'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='dataset'/>
+      <parameter type-id='26a90f95' name='path'/>
+      <parameter type-id='26a90f95' name='resource'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_smb_acl_remove' mangled-name='zfs_smb_acl_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_smb_acl_remove'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='dataset'/>
+      <parameter type-id='26a90f95' name='path'/>
+      <parameter type-id='26a90f95' name='resource'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_smb_acl_purge' mangled-name='zfs_smb_acl_purge' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_smb_acl_purge'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='dataset'/>
+      <parameter type-id='26a90f95' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_smb_acl_rename' mangled-name='zfs_smb_acl_rename' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_smb_acl_rename'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='dataset'/>
+      <parameter type-id='26a90f95' name='path'/>
+      <parameter type-id='26a90f95' name='oldname'/>
+      <parameter type-id='26a90f95' name='newname'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_userspace' mangled-name='zfs_userspace' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userspace'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='279fde6a' name='type'/>
+      <parameter type-id='16c5f410' name='func'/>
+      <parameter type-id='eaa32e2f' name='arg'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_hold' mangled-name='zfs_hold' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_hold'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='tag'/>
+      <parameter type-id='c19b74c3' name='recursive'/>
+      <parameter type-id='95e97e5e' name='cleanup_fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_hold_nvl' mangled-name='zfs_hold_nvl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_hold_nvl'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='95e97e5e' name='cleanup_fd'/>
+      <parameter type-id='5ce45b60' name='holds'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_release' mangled-name='zfs_release' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_release'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='tag'/>
+      <parameter type-id='c19b74c3' name='recursive'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_get_fsacl' mangled-name='zfs_get_fsacl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_fsacl'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='857bb57e' name='nvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_set_fsacl' mangled-name='zfs_set_fsacl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_set_fsacl'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='c19b74c3' name='un'/>
+      <parameter type-id='5ce45b60' name='nvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_get_holds' mangled-name='zfs_get_holds' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_holds'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='857bb57e' name='nvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zvol_volsize_to_reservation' mangled-name='zvol_volsize_to_reservation' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zvol_volsize_to_reservation'>
+      <parameter type-id='4c81de99' name='zph'/>
+      <parameter type-id='9c313c2d' name='volsize'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zfs_wait_status' mangled-name='zfs_wait_status' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_wait_status'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='3024501a' name='activity'/>
+      <parameter type-id='37e3bd22' name='missing'/>
+      <parameter type-id='37e3bd22' name='waited'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='getgrnam' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='566b3f52'/>
+    </function-decl>
+    <function-decl name='hasmntopt' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='48bea5ec'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <parameter type-id='c2afbd7e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_lock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_unlock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='getpwnam' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='a195f4a3'/>
+    </function-decl>
+    <function-decl name='fprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtol' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='bd54fe1a'/>
+    </function-decl>
+    <function-decl name='strtoul' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='7359adad'/>
+    </function-decl>
+    <function-decl name='abort' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strncpy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strrchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strcspn' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='strstr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strsep' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='9d26089a'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='ioctl' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='7359adad'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strftime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='f8c6051d'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='localtime_r' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='d6e2847c'/>
+      <parameter type-id='f099ad08'/>
+      <return type-id='d915a820'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='7e291ce6'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='354978ed'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_diff.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='448' id='6093ff7c'>
+      <subrange length='56' type-id='7359adad' id='f8137894'/>
+    </array-type-def>
+    <class-decl name='differ_info' size-in-bits='9024' is-struct='yes' visibility='default' id='d41965ee'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zhp' type-id='9200a744' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='fromsnap' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='frommnt' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='tosnap' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='tomnt' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='ds' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='dsmnt' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='tmpsnap' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='errbuf' type-id='b54ce520' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8704'>
+        <var-decl name='isclone' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8736'>
+        <var-decl name='scripted' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8768'>
+        <var-decl name='classify' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8800'>
+        <var-decl name='timestamped' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8832'>
+        <var-decl name='shares' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8896'>
+        <var-decl name='zerr' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8928'>
+        <var-decl name='cleanupfd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8960'>
+        <var-decl name='outputfd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8992'>
+        <var-decl name='datafd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='differ_info_t' type-id='d41965ee' id='e8525f0e'/>
+    <typedef-decl name='pthread_t' type-id='7359adad' id='4051f5e7'/>
+    <union-decl name='pthread_attr_t' size-in-bits='448' visibility='default' id='b63afacd'>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='6093ff7c' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_attr_t' type-id='b63afacd' id='7d8569fd'/>
+    <qualified-type-def type-id='7d8569fd' const='yes' id='e06dee2d'/>
+    <pointer-type-def type-id='e06dee2d' size-in-bits='64' id='540db505'/>
+    <qualified-type-def type-id='540db505' restrict='yes' id='e1815e87'/>
+    <pointer-type-def type-id='e8525f0e' size-in-bits='64' id='ee78f675'/>
+    <pointer-type-def type-id='4051f5e7' size-in-bits='64' id='e01b5462'/>
+    <qualified-type-def type-id='e01b5462' restrict='yes' id='cc338b26'/>
+    <pointer-type-def type-id='cd5d79f4' size-in-bits='64' id='5ad9edb6'/>
+    <function-decl name='is_mounted' mangled-name='is_mounted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='is_mounted'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_asprintf' mangled-name='zfs_asprintf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_asprintf'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='find_shares_object' mangled-name='find_shares_object' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='find_shares_object'>
+      <parameter type-id='ee78f675'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_show_diffs' mangled-name='zfs_show_diffs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_show_diffs'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='95e97e5e' name='outfd'/>
+      <parameter type-id='80f4b756' name='fromsnap'/>
+      <parameter type-id='80f4b756' name='tosnap'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='cc338b26'/>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='5ad9edb6'/>
+      <parameter type-id='1b7446cd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_join' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4051f5e7'/>
+      <parameter type-id='63e171df'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_cancel' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4051f5e7'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pipe2' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7292109c'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='cd5d79f4'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='eaa32e2f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_import.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='03085adc' size-in-bits='192' id='083f8d58'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <typedef-decl name='refresh_config_func_t' type-id='29f040d2' id='b7c58eaa'/>
+    <typedef-decl name='pool_active_func_t' type-id='baa42fef' id='de5d1d8f'/>
+    <class-decl name='pool_config_ops' size-in-bits='128' is-struct='yes' visibility='default' id='8b092c69'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='pco_refresh_config' type-id='e7c00489' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='pco_pool_active' type-id='9eadf5e0' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='pool_config_ops_t' type-id='1a21babe' id='b1e62775'/>
+    <enum-decl name='pool_state' id='4871ac24'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_STATE_ACTIVE' value='0'/>
+      <enumerator name='POOL_STATE_EXPORTED' value='1'/>
+      <enumerator name='POOL_STATE_DESTROYED' value='2'/>
+      <enumerator name='POOL_STATE_SPARE' value='3'/>
+      <enumerator name='POOL_STATE_L2CACHE' value='4'/>
+      <enumerator name='POOL_STATE_UNINITIALIZED' value='5'/>
+      <enumerator name='POOL_STATE_UNAVAIL' value='6'/>
+      <enumerator name='POOL_STATE_POTENTIALLY_ACTIVE' value='7'/>
+    </enum-decl>
+    <typedef-decl name='pool_state_t' type-id='4871ac24' id='084a08a3'/>
+    <class-decl name='stat64' size-in-bits='1152' is-struct='yes' visibility='default' id='0bbec9cd'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='st_dev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='st_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='st_nlink' type-id='80f0b9df' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='st_mode' type-id='e1c52942' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='st_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='st_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='__pad0' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='st_rdev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='st_size' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='st_blksize' type-id='d3f10a7f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='st_blocks' type-id='4e711bf1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='st_atim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='st_mtim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='st_ctim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='__glibc_reserved' type-id='083f8d58' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__dev_t' type-id='7359adad' id='35ed8932'/>
+    <typedef-decl name='__ino64_t' type-id='7359adad' id='71288a47'/>
+    <typedef-decl name='__mode_t' type-id='f0981eeb' id='e1c52942'/>
+    <typedef-decl name='__nlink_t' type-id='7359adad' id='80f0b9df'/>
+    <typedef-decl name='__blksize_t' type-id='bd54fe1a' id='d3f10a7f'/>
+    <typedef-decl name='__blkcnt64_t' type-id='bd54fe1a' id='4e711bf1'/>
+    <typedef-decl name='__syscall_slong_t' type-id='bd54fe1a' id='03085adc'/>
+    <class-decl name='timespec' size-in-bits='128' is-struct='yes' visibility='default' id='a9c79a1f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tv_sec' type-id='65eda9c0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tv_nsec' type-id='03085adc' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <qualified-type-def type-id='8b092c69' const='yes' id='1a21babe'/>
+    <pointer-type-def type-id='de5d1d8f' size-in-bits='64' id='9eadf5e0'/>
+    <pointer-type-def type-id='084a08a3' size-in-bits='64' id='b9ea57b8'/>
+    <pointer-type-def type-id='b7c58eaa' size-in-bits='64' id='e7c00489'/>
+    <pointer-type-def type-id='0bbec9cd' size-in-bits='64' id='62f7a03d'/>
+    <function-decl name='zcmd_write_conf_nvlist' mangled-name='zcmd_write_conf_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zcmd_write_conf_nvlist'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='e4ec4540'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <var-decl name='libzfs_config_ops' type-id='b1e62775' mangled-name='libzfs_config_ops' visibility='default' elf-symbol-id='libzfs_config_ops'/>
+    <function-decl name='zpool_read_label' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='7292109c'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_clear_label' mangled-name='zpool_clear_label' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_clear_label'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_in_use' mangled-name='zpool_in_use' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_in_use'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='b9ea57b8' name='state'/>
+      <parameter type-id='9b23c9ad' name='namestr'/>
+      <parameter type-id='37e3bd22' name='inuse'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='memset' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='fstat64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='62f7a03d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pread64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='724e4de6'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='pwrite64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='724e4de6'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='baa42fef'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='37e3bd22'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='29f040d2'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='5ce45b60'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_iter.c' language='LANG_C99'>
+    <pointer-type-def type-id='b351119f' size-in-bits='64' id='716943c7'/>
+    <function-decl name='avl_first' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_walk' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='716943c7'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_iter_snapshots_sorted' mangled-name='zfs_iter_snapshots_sorted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_snapshots_sorted'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='d8e49ab9' name='callback'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <parameter type-id='9c313c2d' name='min_txg'/>
+      <parameter type-id='9c313c2d' name='max_txg'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_iter_snapspec' mangled-name='zfs_iter_snapspec' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_iter_snapspec'>
+      <parameter type-id='9200a744' name='fs_zhp'/>
+      <parameter type-id='80f4b756' name='spec_orig'/>
+      <parameter type-id='d8e49ab9' name='func'/>
+      <parameter type-id='eaa32e2f' name='arg'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_mount.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='6028cbfe' size-in-bits='256' id='b39b9aa7'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <class-decl name='__dirstream' is-struct='yes' visibility='default' is-declaration-only='yes' id='20cd73f2'/>
+    <class-decl name='tpool' is-struct='yes' visibility='default' is-declaration-only='yes' id='88d1b7f9'/>
+    <array-type-def dimensions='1' type-id='95e97e5e' size-in-bits='64' id='e4266c7e'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='f1bd64e2' size-in-bits='384' id='b2c36c9f'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a7913f77' size-in-bits='64' alignment-in-bits='32' id='79c9b3ac'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a7913f77' size-in-bits='96' alignment-in-bits='32' id='7dc77b61'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <class-decl name='get_all_cb' size-in-bits='192' is-struct='yes' visibility='default' id='803dac95'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='cb_handles' type-id='4507922a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='cb_alloc' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='cb_used' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='get_all_cb_t' type-id='803dac95' id='9b293607'/>
+    <enum-decl name='zfs_share_type_t' naming-typedef-id='7eb57c2d' id='5bc85791'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='SHARED_NOT_SHARED' value='0'/>
+      <enumerator name='SHARED_NFS' value='2'/>
+      <enumerator name='SHARED_SMB' value='4'/>
+    </enum-decl>
+    <typedef-decl name='zfs_share_type_t' type-id='5bc85791' id='7eb57c2d'/>
+    <class-decl name='proto_table_t' size-in-bits='192' is-struct='yes' naming-typedef-id='f1bd64e2' visibility='default' id='f4c8e1ed'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='p_prop' type-id='58603c44' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='p_name' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='p_share_err' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='p_unshare_err' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='proto_table_t' type-id='f4c8e1ed' id='f1bd64e2'/>
+    <typedef-decl name='tpool_t' type-id='88d1b7f9' id='b1bbf10d'/>
+    <class-decl name='dirent64' size-in-bits='2240' is-struct='yes' visibility='default' id='5725d813'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='d_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='d_off' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='d_reclen' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='144'>
+        <var-decl name='d_type' type-id='002ac4a6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='152'>
+        <var-decl name='d_name' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='statfs64' size-in-bits='960' is-struct='yes' visibility='default' id='a2a6be1a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='f_type' type-id='6028cbfe' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='f_bsize' type-id='6028cbfe' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='f_blocks' type-id='95fe1a02' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='f_bfree' type-id='95fe1a02' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='f_bavail' type-id='95fe1a02' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='f_files' type-id='0c3a4dde' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='f_ffree' type-id='0c3a4dde' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='f_fsid' type-id='0f35d263' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='f_namelen' type-id='6028cbfe' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='f_frsize' type-id='6028cbfe' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='f_flags' type-id='6028cbfe' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='f_spare' type-id='b39b9aa7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='stat' size-in-bits='1152' is-struct='yes' visibility='default' id='aafc373f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='st_dev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='st_ino' type-id='e43e523d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='st_nlink' type-id='80f0b9df' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='st_mode' type-id='e1c52942' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='st_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='st_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='__pad0' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='st_rdev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='st_size' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='st_blksize' type-id='d3f10a7f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='st_blocks' type-id='dbc43803' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='st_atim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='st_mtim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='st_ctim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='__glibc_reserved' type-id='083f8d58' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__ino_t' type-id='7359adad' id='e43e523d'/>
+    <class-decl name='__fsid_t' size-in-bits='64' is-struct='yes' naming-typedef-id='0f35d263' visibility='default' id='ea35c84a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__val' type-id='e4266c7e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__fsid_t' type-id='ea35c84a' id='0f35d263'/>
+    <typedef-decl name='__blkcnt_t' type-id='bd54fe1a' id='dbc43803'/>
+    <typedef-decl name='__fsblkcnt64_t' type-id='7359adad' id='95fe1a02'/>
+    <typedef-decl name='__fsfilcnt64_t' type-id='7359adad' id='0c3a4dde'/>
+    <typedef-decl name='__fsword_t' type-id='bd54fe1a' id='6028cbfe'/>
+    <typedef-decl name='DIR' type-id='20cd73f2' id='54a5d683'/>
+    <typedef-decl name='mode_t' type-id='e1c52942' id='d50d396c'/>
+    <typedef-decl name='__compar_fn_t' type-id='585e1de9' id='aba7edd8'/>
+    <pointer-type-def type-id='54a5d683' size-in-bits='64' id='f09217ba'/>
+    <pointer-type-def type-id='5725d813' size-in-bits='64' id='07b96073'/>
+    <pointer-type-def type-id='9b293607' size-in-bits='64' id='77bf1784'/>
+    <pointer-type-def type-id='7d8569fd' size-in-bits='64' id='7347a39e'/>
+    <pointer-type-def type-id='aafc373f' size-in-bits='64' id='4330df87'/>
+    <qualified-type-def type-id='4330df87' restrict='yes' id='73665405'/>
+    <pointer-type-def type-id='a2a6be1a' size-in-bits='64' id='7fd094c8'/>
+    <pointer-type-def type-id='b1bbf10d' size-in-bits='64' id='9cf59a50'/>
+    <pointer-type-def type-id='c5c76c9c' size-in-bits='64' id='b7f9d8e6'/>
+    <pointer-type-def type-id='9200a744' size-in-bits='64' id='4507922a'/>
+    <class-decl name='__dirstream' is-struct='yes' visibility='default' is-declaration-only='yes' id='20cd73f2'/>
+    <class-decl name='tpool' is-struct='yes' visibility='default' is-declaration-only='yes' id='88d1b7f9'/>
+    <function-decl name='zfs_realloc' mangled-name='zfs_realloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_realloc'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <var-decl name='proto_table' type-id='b2c36c9f' mangled-name='proto_table' visibility='default' elf-symbol-id='proto_table'/>
+    <function-decl name='do_mount' mangled-name='do_mount' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='do_mount'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='do_unmount' mangled-name='do_unmount' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='do_unmount'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='tpool_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='7347a39e'/>
+      <return type-id='9cf59a50'/>
+    </function-decl>
+    <function-decl name='tpool_dispatch' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9cf59a50'/>
+      <parameter type-id='b7f9d8e6'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='tpool_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9cf59a50'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='tpool_wait' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9cf59a50'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='use_color' mangled-name='use_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='use_color'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='mkdirp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='d50d396c'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sa_errorstr' mangled-name='sa_errorstr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_errorstr'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='sa_enable_share' mangled-name='sa_enable_share' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_enable_share'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sa_disable_share' mangled-name='sa_disable_share' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_disable_share'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sa_is_shared' mangled-name='sa_is_shared' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_is_shared'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='sa_commit_shares' mangled-name='sa_commit_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_commit_shares'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='sa_validate_shareopts' mangled-name='sa_validate_shareopts' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_validate_shareopts'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <var-decl name='nfs_only' type-id='79c9b3ac' mangled-name='nfs_only' visibility='default' elf-symbol-id='nfs_only'/>
+    <var-decl name='smb_only' type-id='79c9b3ac' mangled-name='smb_only' visibility='default' elf-symbol-id='smb_only'/>
+    <var-decl name='share_all_proto' type-id='7dc77b61' mangled-name='share_all_proto' visibility='default' elf-symbol-id='share_all_proto'/>
+    <function-decl name='zfs_is_mountable' mangled-name='zfs_is_mountable' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_is_mountable'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <parameter type-id='debc6aa3' name='source'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_mount_at' mangled-name='zfs_mount_at' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_mount_at'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='options'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <parameter type-id='80f4b756' name='mountpoint'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unmountall' mangled-name='zfs_unmountall' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unmountall'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='unshare_one' mangled-name='unshare_one' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='unshare_one'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='80f4b756' name='mountpoint'/>
+      <parameter type-id='a7913f77' name='proto'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='is_shared' mangled-name='is_shared' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='is_shared'>
+      <parameter type-id='80f4b756' name='mountpoint'/>
+      <parameter type-id='a7913f77' name='proto'/>
+      <return type-id='7eb57c2d'/>
+    </function-decl>
+    <function-decl name='zfs_share_proto' mangled-name='zfs_share_proto' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_share_proto'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='bf9c30ee' name='proto'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshare' mangled-name='zfs_unshare' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshare'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_is_shared_proto' mangled-name='zfs_is_shared_proto' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_is_shared_proto'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='9b23c9ad' name='where'/>
+      <parameter type-id='a7913f77' name='proto'/>
+      <return type-id='7eb57c2d'/>
+    </function-decl>
+    <function-decl name='zfs_is_shared_nfs' mangled-name='zfs_is_shared_nfs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_is_shared_nfs'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='9b23c9ad' name='where'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_is_shared_smb' mangled-name='zfs_is_shared_smb' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_is_shared_smb'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='9b23c9ad' name='where'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_commit_shares' mangled-name='zfs_commit_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_commit_shares'>
+      <parameter type-id='80f4b756' name='proto'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_unshareall_nfs' mangled-name='zfs_unshareall_nfs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshareall_nfs'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshareall_smb' mangled-name='zfs_unshareall_smb' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshareall_smb'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshareall' mangled-name='zfs_unshareall' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshareall'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshareall_bypath' mangled-name='zfs_unshareall_bypath' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshareall_bypath'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='mountpoint'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_unshareall_bytype' mangled-name='zfs_unshareall_bytype' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshareall_bytype'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='mountpoint'/>
+      <parameter type-id='80f4b756' name='proto'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_add_handle' mangled-name='libzfs_add_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_add_handle'>
+      <parameter type-id='77bf1784' name='cbp'/>
+      <parameter type-id='9200a744' name='zhp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_foreach_mountpoint' mangled-name='zfs_foreach_mountpoint' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_foreach_mountpoint'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='4507922a' name='handles'/>
+      <parameter type-id='b59d7dce' name='num_handles'/>
+      <parameter type-id='d8e49ab9' name='func'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <parameter type-id='c19b74c3' name='parallel'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_enable_datasets' mangled-name='zpool_enable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_enable_datasets'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='mntopts'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_disable_datasets' mangled-name='zpool_disable_datasets' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disable_datasets'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fdopendir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='f09217ba'/>
+    </function-decl>
+    <function-decl name='closedir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f09217ba'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='readdir64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f09217ba'/>
+      <return type-id='07b96073'/>
+    </function-decl>
+    <function-decl name='qsort' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='aba7edd8'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='statfs64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='7fd094c8'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='rmdir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='c5c76c9c'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_pool.c' language='LANG_C99'>
+    <type-decl name='long long unsigned int' size-in-bits='64' id='3a47d82b'/>
+    <class-decl name='splitflags' size-in-bits='64' is-struct='yes' visibility='default' id='dc01bf52'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='dryrun' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1'>
+        <var-decl name='import' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='name_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='splitflags_t' type-id='dc01bf52' id='325c1e34'/>
+    <class-decl name='trimflags' size-in-bits='192' is-struct='yes' visibility='default' id='8ef58008'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='fullpool' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='secure' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='wait' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='rate' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='trimflags_t' type-id='8ef58008' id='a093cbb8'/>
+    <enum-decl name='zpool_status_t' naming-typedef-id='d3dd6294' id='5e770b40'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPOOL_STATUS_CORRUPT_CACHE' value='0'/>
+      <enumerator name='ZPOOL_STATUS_MISSING_DEV_R' value='1'/>
+      <enumerator name='ZPOOL_STATUS_MISSING_DEV_NR' value='2'/>
+      <enumerator name='ZPOOL_STATUS_CORRUPT_LABEL_R' value='3'/>
+      <enumerator name='ZPOOL_STATUS_CORRUPT_LABEL_NR' value='4'/>
+      <enumerator name='ZPOOL_STATUS_BAD_GUID_SUM' value='5'/>
+      <enumerator name='ZPOOL_STATUS_CORRUPT_POOL' value='6'/>
+      <enumerator name='ZPOOL_STATUS_CORRUPT_DATA' value='7'/>
+      <enumerator name='ZPOOL_STATUS_FAILING_DEV' value='8'/>
+      <enumerator name='ZPOOL_STATUS_VERSION_NEWER' value='9'/>
+      <enumerator name='ZPOOL_STATUS_HOSTID_MISMATCH' value='10'/>
+      <enumerator name='ZPOOL_STATUS_HOSTID_ACTIVE' value='11'/>
+      <enumerator name='ZPOOL_STATUS_HOSTID_REQUIRED' value='12'/>
+      <enumerator name='ZPOOL_STATUS_IO_FAILURE_WAIT' value='13'/>
+      <enumerator name='ZPOOL_STATUS_IO_FAILURE_CONTINUE' value='14'/>
+      <enumerator name='ZPOOL_STATUS_IO_FAILURE_MMP' value='15'/>
+      <enumerator name='ZPOOL_STATUS_BAD_LOG' value='16'/>
+      <enumerator name='ZPOOL_STATUS_ERRATA' value='17'/>
+      <enumerator name='ZPOOL_STATUS_UNSUP_FEAT_READ' value='18'/>
+      <enumerator name='ZPOOL_STATUS_UNSUP_FEAT_WRITE' value='19'/>
+      <enumerator name='ZPOOL_STATUS_FAULTED_DEV_R' value='20'/>
+      <enumerator name='ZPOOL_STATUS_FAULTED_DEV_NR' value='21'/>
+      <enumerator name='ZPOOL_STATUS_VERSION_OLDER' value='22'/>
+      <enumerator name='ZPOOL_STATUS_FEAT_DISABLED' value='23'/>
+      <enumerator name='ZPOOL_STATUS_RESILVERING' value='24'/>
+      <enumerator name='ZPOOL_STATUS_OFFLINE_DEV' value='25'/>
+      <enumerator name='ZPOOL_STATUS_REMOVED_DEV' value='26'/>
+      <enumerator name='ZPOOL_STATUS_REBUILDING' value='27'/>
+      <enumerator name='ZPOOL_STATUS_REBUILD_SCRUB' value='28'/>
+      <enumerator name='ZPOOL_STATUS_NON_NATIVE_ASHIFT' value='29'/>
+      <enumerator name='ZPOOL_STATUS_COMPATIBILITY_ERR' value='30'/>
+      <enumerator name='ZPOOL_STATUS_INCOMPATIBLE_FEAT' value='31'/>
+      <enumerator name='ZPOOL_STATUS_OK' value='32'/>
+    </enum-decl>
+    <typedef-decl name='zpool_status_t' type-id='5e770b40' id='d3dd6294'/>
+    <enum-decl name='zpool_compat_status_t' naming-typedef-id='901b78d1' id='20676925'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPOOL_COMPATIBILITY_OK' value='0'/>
+      <enumerator name='ZPOOL_COMPATIBILITY_WARNTOKEN' value='1'/>
+      <enumerator name='ZPOOL_COMPATIBILITY_BADTOKEN' value='2'/>
+      <enumerator name='ZPOOL_COMPATIBILITY_BADFILE' value='3'/>
+      <enumerator name='ZPOOL_COMPATIBILITY_NOFILES' value='4'/>
+    </enum-decl>
+    <typedef-decl name='zpool_compat_status_t' type-id='20676925' id='901b78d1'/>
+    <class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zlp_rewind' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zlp_maxmeta' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zlp_maxdata' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='zlp_txg' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zpool_load_policy_t' type-id='2f65b36f' id='d11b7617'/>
+    <enum-decl name='vdev_state' id='21566197'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='VDEV_STATE_UNKNOWN' value='0'/>
+      <enumerator name='VDEV_STATE_CLOSED' value='1'/>
+      <enumerator name='VDEV_STATE_OFFLINE' value='2'/>
+      <enumerator name='VDEV_STATE_REMOVED' value='3'/>
+      <enumerator name='VDEV_STATE_CANT_OPEN' value='4'/>
+      <enumerator name='VDEV_STATE_FAULTED' value='5'/>
+      <enumerator name='VDEV_STATE_DEGRADED' value='6'/>
+      <enumerator name='VDEV_STATE_HEALTHY' value='7'/>
+    </enum-decl>
+    <typedef-decl name='vdev_state_t' type-id='21566197' id='35acf840'/>
+    <enum-decl name='vdev_aux' id='7f5bcca4'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='VDEV_AUX_NONE' value='0'/>
+      <enumerator name='VDEV_AUX_OPEN_FAILED' value='1'/>
+      <enumerator name='VDEV_AUX_CORRUPT_DATA' value='2'/>
+      <enumerator name='VDEV_AUX_NO_REPLICAS' value='3'/>
+      <enumerator name='VDEV_AUX_BAD_GUID_SUM' value='4'/>
+      <enumerator name='VDEV_AUX_TOO_SMALL' value='5'/>
+      <enumerator name='VDEV_AUX_BAD_LABEL' value='6'/>
+      <enumerator name='VDEV_AUX_VERSION_NEWER' value='7'/>
+      <enumerator name='VDEV_AUX_VERSION_OLDER' value='8'/>
+      <enumerator name='VDEV_AUX_UNSUP_FEAT' value='9'/>
+      <enumerator name='VDEV_AUX_SPARED' value='10'/>
+      <enumerator name='VDEV_AUX_ERR_EXCEEDED' value='11'/>
+      <enumerator name='VDEV_AUX_IO_FAILURE' value='12'/>
+      <enumerator name='VDEV_AUX_BAD_LOG' value='13'/>
+      <enumerator name='VDEV_AUX_EXTERNAL' value='14'/>
+      <enumerator name='VDEV_AUX_SPLIT_POOL' value='15'/>
+      <enumerator name='VDEV_AUX_BAD_ASHIFT' value='16'/>
+      <enumerator name='VDEV_AUX_EXTERNAL_PERSIST' value='17'/>
+      <enumerator name='VDEV_AUX_ACTIVE' value='18'/>
+      <enumerator name='VDEV_AUX_CHILDREN_OFFLINE' value='19'/>
+      <enumerator name='VDEV_AUX_ASHIFT_TOO_BIG' value='20'/>
+    </enum-decl>
+    <typedef-decl name='vdev_aux_t' type-id='7f5bcca4' id='9d774e0b'/>
+    <enum-decl name='pool_scan_func' id='1b092565'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_SCAN_NONE' value='0'/>
+      <enumerator name='POOL_SCAN_SCRUB' value='1'/>
+      <enumerator name='POOL_SCAN_RESILVER' value='2'/>
+      <enumerator name='POOL_SCAN_FUNCS' value='3'/>
+    </enum-decl>
+    <typedef-decl name='pool_scan_func_t' type-id='1b092565' id='7313fbe2'/>
+    <enum-decl name='pool_scrub_cmd' id='a1474cbd'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_SCRUB_NORMAL' value='0'/>
+      <enumerator name='POOL_SCRUB_PAUSE' value='1'/>
+      <enumerator name='POOL_SCRUB_FLAGS_END' value='2'/>
+    </enum-decl>
+    <typedef-decl name='pool_scrub_cmd_t' type-id='a1474cbd' id='b51cf3c2'/>
+    <enum-decl name='zpool_errata' id='d9abbf54'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPOOL_ERRATA_NONE' value='0'/>
+      <enumerator name='ZPOOL_ERRATA_ZOL_2094_SCRUB' value='1'/>
+      <enumerator name='ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY' value='2'/>
+      <enumerator name='ZPOOL_ERRATA_ZOL_6845_ENCRYPTION' value='3'/>
+      <enumerator name='ZPOOL_ERRATA_ZOL_8308_ENCRYPTION' value='4'/>
+    </enum-decl>
+    <typedef-decl name='zpool_errata_t' type-id='d9abbf54' id='688c495b'/>
+    <enum-decl name='pool_initialize_func' id='5c246ad4'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_INITIALIZE_START' value='0'/>
+      <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
+      <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
+      <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
+      <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
+    </enum-decl>
+    <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
+    <enum-decl name='pool_trim_func' id='54ed608a'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_TRIM_START' value='0'/>
+      <enumerator name='POOL_TRIM_CANCEL' value='1'/>
+      <enumerator name='POOL_TRIM_SUSPEND' value='2'/>
+      <enumerator name='POOL_TRIM_FUNCS' value='3'/>
+    </enum-decl>
+    <typedef-decl name='pool_trim_func_t' type-id='54ed608a' id='b1146b8d'/>
+    <enum-decl name='zpool_wait_activity_t' naming-typedef-id='73446457' id='849338e3'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPOOL_WAIT_CKPT_DISCARD' value='0'/>
+      <enumerator name='ZPOOL_WAIT_FREE' value='1'/>
+      <enumerator name='ZPOOL_WAIT_INITIALIZE' value='2'/>
+      <enumerator name='ZPOOL_WAIT_REPLACE' value='3'/>
+      <enumerator name='ZPOOL_WAIT_REMOVE' value='4'/>
+      <enumerator name='ZPOOL_WAIT_RESILVER' value='5'/>
+      <enumerator name='ZPOOL_WAIT_SCRUB' value='6'/>
+      <enumerator name='ZPOOL_WAIT_TRIM' value='7'/>
+      <enumerator name='ZPOOL_WAIT_NUM_ACTIVITIES' value='8'/>
+    </enum-decl>
+    <typedef-decl name='zpool_wait_activity_t' type-id='849338e3' id='73446457'/>
+    <enum-decl name='spa_feature' id='33ecb627'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='SPA_FEATURE_NONE' value='-1'/>
+      <enumerator name='SPA_FEATURE_ASYNC_DESTROY' value='0'/>
+      <enumerator name='SPA_FEATURE_EMPTY_BPOBJ' value='1'/>
+      <enumerator name='SPA_FEATURE_LZ4_COMPRESS' value='2'/>
+      <enumerator name='SPA_FEATURE_MULTI_VDEV_CRASH_DUMP' value='3'/>
+      <enumerator name='SPA_FEATURE_SPACEMAP_HISTOGRAM' value='4'/>
+      <enumerator name='SPA_FEATURE_ENABLED_TXG' value='5'/>
+      <enumerator name='SPA_FEATURE_HOLE_BIRTH' value='6'/>
+      <enumerator name='SPA_FEATURE_EXTENSIBLE_DATASET' value='7'/>
+      <enumerator name='SPA_FEATURE_EMBEDDED_DATA' value='8'/>
+      <enumerator name='SPA_FEATURE_BOOKMARKS' value='9'/>
+      <enumerator name='SPA_FEATURE_FS_SS_LIMIT' value='10'/>
+      <enumerator name='SPA_FEATURE_LARGE_BLOCKS' value='11'/>
+      <enumerator name='SPA_FEATURE_LARGE_DNODE' value='12'/>
+      <enumerator name='SPA_FEATURE_SHA512' value='13'/>
+      <enumerator name='SPA_FEATURE_SKEIN' value='14'/>
+      <enumerator name='SPA_FEATURE_EDONR' value='15'/>
+      <enumerator name='SPA_FEATURE_USEROBJ_ACCOUNTING' value='16'/>
+      <enumerator name='SPA_FEATURE_ENCRYPTION' value='17'/>
+      <enumerator name='SPA_FEATURE_PROJECT_QUOTA' value='18'/>
+      <enumerator name='SPA_FEATURE_DEVICE_REMOVAL' value='19'/>
+      <enumerator name='SPA_FEATURE_OBSOLETE_COUNTS' value='20'/>
+      <enumerator name='SPA_FEATURE_POOL_CHECKPOINT' value='21'/>
+      <enumerator name='SPA_FEATURE_SPACEMAP_V2' value='22'/>
+      <enumerator name='SPA_FEATURE_ALLOCATION_CLASSES' value='23'/>
+      <enumerator name='SPA_FEATURE_RESILVER_DEFER' value='24'/>
+      <enumerator name='SPA_FEATURE_BOOKMARK_V2' value='25'/>
+      <enumerator name='SPA_FEATURE_REDACTION_BOOKMARKS' value='26'/>
+      <enumerator name='SPA_FEATURE_REDACTED_DATASETS' value='27'/>
+      <enumerator name='SPA_FEATURE_BOOKMARK_WRITTEN' value='28'/>
+      <enumerator name='SPA_FEATURE_LOG_SPACEMAP' value='29'/>
+      <enumerator name='SPA_FEATURE_LIVELIST' value='30'/>
+      <enumerator name='SPA_FEATURE_DEVICE_REBUILD' value='31'/>
+      <enumerator name='SPA_FEATURE_ZSTD_COMPRESS' value='32'/>
+      <enumerator name='SPA_FEATURE_DRAID' value='33'/>
+      <enumerator name='SPA_FEATURES' value='34'/>
+    </enum-decl>
+    <typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
+    <qualified-type-def type-id='8e8d4be3' const='yes' id='693c3853'/>
+    <pointer-type-def type-id='693c3853' size-in-bits='64' id='22cce67b'/>
+    <pointer-type-def type-id='d6618c78' size-in-bits='64' id='a8425263'/>
+    <qualified-type-def type-id='62f7a03d' restrict='yes' id='f1cadedf'/>
+    <pointer-type-def type-id='a093cbb8' size-in-bits='64' id='b13f38c3'/>
+    <pointer-type-def type-id='35acf840' size-in-bits='64' id='17f3480d'/>
+    <pointer-type-def type-id='688c495b' size-in-bits='64' id='cec6f2e4'/>
+    <pointer-type-def type-id='d11b7617' size-in-bits='64' id='23432aaa'/>
+    <function-decl name='zpool_get_handle' mangled-name='zpool_get_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_handle'>
+      <parameter type-id='4c81de99'/>
+      <return type-id='b0382bb3'/>
+    </function-decl>
+    <function-decl name='zpool_prop_to_name' mangled-name='zpool_prop_to_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_to_name'>
+      <parameter type-id='5d0c23fb'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_get_status' mangled-name='zpool_get_status' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_status'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='9b23c9ad'/>
+      <parameter type-id='cec6f2e4'/>
+      <return type-id='d3dd6294'/>
+    </function-decl>
+    <function-decl name='zpool_prop_default_string' mangled-name='zpool_prop_default_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_default_string'>
+      <parameter type-id='5d0c23fb'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_prop_default_numeric' mangled-name='zpool_prop_default_numeric' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_default_numeric'>
+      <parameter type-id='5d0c23fb'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='lzc_initialize' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='7063e1ab'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_trim' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b1146b8d'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_sync' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_reopen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_pool_checkpoint' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_pool_checkpoint_discard' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_wait' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='73446457'/>
+      <parameter type-id='37e3bd22'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_wait_tag' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='73446457'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='37e3bd22'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_set_bootenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='22cce67b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_bootenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_standard_error' mangled-name='zpool_standard_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_standard_error'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_standard_error_fmt' mangled-name='zpool_standard_error_fmt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_standard_error_fmt'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_relabel_disk' mangled-name='zpool_relabel_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_relabel_disk'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_resolve_shortname' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_strip_partition' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zfs_strip_path' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zfs_strcmp_pathname' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_history_unpack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='75be733c'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_name_to_prop' mangled-name='zpool_name_to_prop' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_name_to_prop'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='5d0c23fb'/>
+    </function-decl>
+    <function-decl name='zpool_prop_readonly' mangled-name='zpool_prop_readonly' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_readonly'>
+      <parameter type-id='5d0c23fb'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zpool_prop_setonce' mangled-name='zpool_prop_setonce' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_setonce'>
+      <parameter type-id='5d0c23fb'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zpool_prop_feature' mangled-name='zpool_prop_feature' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_feature'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zpool_prop_index_to_string' mangled-name='zpool_prop_index_to_string' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_index_to_string'>
+      <parameter type-id='5d0c23fb'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='7d3cd834'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint8_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean_value' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9da381c4'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_int64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='9da381c4'/>
+    </function-decl>
+    <function-decl name='zfeature_is_supported' mangled-name='zfeature_is_supported' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_is_supported'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfeature_lookup_guid' mangled-name='zfeature_lookup_guid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_lookup_guid'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='a8425263'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfeature_lookup_name' mangled-name='zfeature_lookup_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_lookup_name'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='a8425263'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_get_load_policy' mangled-name='zpool_get_load_policy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_load_policy'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='23432aaa'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='pool_namecheck' mangled-name='pool_namecheck' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='pool_namecheck'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='053457bd'/>
+      <parameter type-id='26a90f95'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_prop_get_type' mangled-name='zpool_prop_get_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_get_type'>
+      <parameter type-id='5d0c23fb'/>
+      <return type-id='31429eff'/>
+    </function-decl>
+    <function-decl name='get_system_hostid' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='7359adad'/>
+    </function-decl>
+    <function-decl name='zpool_props_refresh' mangled-name='zpool_props_refresh' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_props_refresh'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_state_to_name' mangled-name='zpool_state_to_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_state_to_name'>
+      <parameter type-id='35acf840' name='state'/>
+      <parameter type-id='9d774e0b' name='aux'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_pool_state_to_name' mangled-name='zpool_pool_state_to_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_pool_state_to_name'>
+      <parameter type-id='084a08a3' name='state'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_get_state_str' mangled-name='zpool_get_state_str' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_state_str'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_set_prop' mangled-name='zpool_set_prop' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_set_prop'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='80f4b756' name='propval'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_expand_proplist' mangled-name='zpool_expand_proplist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_expand_proplist'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='e4378506' name='plp'/>
+      <parameter type-id='c19b74c3' name='literal'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_get_state' mangled-name='zpool_get_state' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_state'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_is_draid_spare' mangled-name='zpool_is_draid_spare' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_is_draid_spare'>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zpool_create' mangled-name='zpool_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_create'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='5ce45b60' name='nvroot'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='5ce45b60' name='fsprops'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_destroy' mangled-name='zpool_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_destroy'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='log_str'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_checkpoint' mangled-name='zpool_checkpoint' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_checkpoint'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_discard_checkpoint' mangled-name='zpool_discard_checkpoint' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_discard_checkpoint'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_add' mangled-name='zpool_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_add'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='5ce45b60' name='nvroot'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_export' mangled-name='zpool_export' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_export'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='80f4b756' name='log_str'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_export_force' mangled-name='zpool_export_force' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_export_force'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='log_str'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_explain_recover' mangled-name='zpool_explain_recover' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_explain_recover'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='95e97e5e' name='reason'/>
+      <parameter type-id='5ce45b60' name='config'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_import' mangled-name='zpool_import' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_import'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='5ce45b60' name='config'/>
+      <parameter type-id='80f4b756' name='newname'/>
+      <parameter type-id='26a90f95' name='altroot'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_print_unsup_feat' mangled-name='zpool_print_unsup_feat' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_print_unsup_feat'>
+      <parameter type-id='5ce45b60' name='config'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_import_props' mangled-name='zpool_import_props' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_import_props'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='5ce45b60' name='config'/>
+      <parameter type-id='80f4b756' name='newname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_initialize' mangled-name='zpool_initialize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_initialize'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='7063e1ab' name='cmd_type'/>
+      <parameter type-id='5ce45b60' name='vds'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_initialize_wait' mangled-name='zpool_initialize_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_initialize_wait'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='7063e1ab' name='cmd_type'/>
+      <parameter type-id='5ce45b60' name='vds'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_trim' mangled-name='zpool_trim' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_trim'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='b1146b8d' name='cmd_type'/>
+      <parameter type-id='5ce45b60' name='vds'/>
+      <parameter type-id='b13f38c3' name='trim_flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_scan' mangled-name='zpool_scan' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_scan'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='7313fbe2' name='func'/>
+      <parameter type-id='b51cf3c2' name='cmd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_find_vdev_by_physpath' mangled-name='zpool_find_vdev_by_physpath' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_find_vdev_by_physpath'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='ppath'/>
+      <parameter type-id='37e3bd22' name='avail_spare'/>
+      <parameter type-id='37e3bd22' name='l2cache'/>
+      <parameter type-id='37e3bd22' name='log'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zpool_find_vdev' mangled-name='zpool_find_vdev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_find_vdev'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='37e3bd22' name='avail_spare'/>
+      <parameter type-id='37e3bd22' name='l2cache'/>
+      <parameter type-id='37e3bd22' name='log'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zpool_get_physpath' mangled-name='zpool_get_physpath' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_physpath'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='26a90f95' name='physpath'/>
+      <parameter type-id='b59d7dce' name='phypath_size'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_path_to_guid' mangled-name='zpool_vdev_path_to_guid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_path_to_guid'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_online' mangled-name='zpool_vdev_online' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_online'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <parameter type-id='17f3480d' name='newstate'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_offline' mangled-name='zpool_vdev_offline' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_offline'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='c19b74c3' name='istmp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_remove_wanted' mangled-name='zpool_vdev_remove_wanted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_remove_wanted'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_fault' mangled-name='zpool_vdev_fault' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_fault'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='guid'/>
+      <parameter type-id='9d774e0b' name='aux'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_degrade' mangled-name='zpool_vdev_degrade' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_degrade'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='guid'/>
+      <parameter type-id='9d774e0b' name='aux'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_attach' mangled-name='zpool_vdev_attach' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_attach'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='old_disk'/>
+      <parameter type-id='80f4b756' name='new_disk'/>
+      <parameter type-id='5ce45b60' name='nvroot'/>
+      <parameter type-id='95e97e5e' name='replacing'/>
+      <parameter type-id='c19b74c3' name='rebuild'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_detach' mangled-name='zpool_vdev_detach' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_detach'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_split' mangled-name='zpool_vdev_split' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_split'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='26a90f95' name='newname'/>
+      <parameter type-id='857bb57e' name='newroot'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='325c1e34' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_remove' mangled-name='zpool_vdev_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_remove'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_remove_cancel' mangled-name='zpool_vdev_remove_cancel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_remove_cancel'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_indirect_size' mangled-name='zpool_vdev_indirect_size' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_indirect_size'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='5d6479ae' name='sizep'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_clear' mangled-name='zpool_clear' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_clear'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='5ce45b60' name='rewindnvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_clear' mangled-name='zpool_vdev_clear' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_clear'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='guid'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_reguid' mangled-name='zpool_reguid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_reguid'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_reopen_one' mangled-name='zpool_reopen_one' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_reopen_one'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_sync_one' mangled-name='zpool_sync_one' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_sync_one'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_name' mangled-name='zpool_vdev_name' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_name'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='5ce45b60' name='nv'/>
+      <parameter type-id='95e97e5e' name='name_flags'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zpool_get_errlog' mangled-name='zpool_get_errlog' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_errlog'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='857bb57e' name='nverrlistp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_upgrade' mangled-name='zpool_upgrade' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_upgrade'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='new_version'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_save_arguments' mangled-name='zfs_save_arguments' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_save_arguments'>
+      <parameter type-id='95e97e5e' name='argc'/>
+      <parameter type-id='9b23c9ad' name='argv'/>
+      <parameter type-id='26a90f95' name='string'/>
+      <parameter type-id='95e97e5e' name='len'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_log_history' mangled-name='zpool_log_history' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_log_history'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='message'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_get_history' mangled-name='zpool_get_history' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_history'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='857bb57e' name='nvhisp'/>
+      <parameter type-id='5d6479ae' name='off'/>
+      <parameter type-id='37e3bd22' name='eof'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_events_next' mangled-name='zpool_events_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_events_next'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='857bb57e' name='nvp'/>
+      <parameter type-id='7292109c' name='dropped'/>
+      <parameter type-id='f0981eeb' name='flags'/>
+      <parameter type-id='95e97e5e' name='zevent_fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_events_clear' mangled-name='zpool_events_clear' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_events_clear'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='7292109c' name='count'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_events_seek' mangled-name='zpool_events_seek' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_events_seek'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='9c313c2d' name='eid'/>
+      <parameter type-id='95e97e5e' name='zevent_fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_obj_to_path' mangled-name='zpool_obj_to_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_obj_to_path'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='dsobj'/>
+      <parameter type-id='9c313c2d' name='obj'/>
+      <parameter type-id='26a90f95' name='pathname'/>
+      <parameter type-id='b59d7dce' name='len'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_obj_to_path_ds' mangled-name='zpool_obj_to_path_ds' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_obj_to_path_ds'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='dsobj'/>
+      <parameter type-id='9c313c2d' name='obj'/>
+      <parameter type-id='26a90f95' name='pathname'/>
+      <parameter type-id='b59d7dce' name='len'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_wait' mangled-name='zpool_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_wait'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='73446457' name='activity'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_wait_status' mangled-name='zpool_wait_status' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_wait_status'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='73446457' name='activity'/>
+      <parameter type-id='37e3bd22' name='missing'/>
+      <parameter type-id='37e3bd22' name='waited'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_set_bootenv' mangled-name='zpool_set_bootenv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_set_bootenv'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='22cce67b' name='envmap'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_get_bootenv' mangled-name='zpool_get_bootenv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_bootenv'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='857bb57e' name='nvlp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_load_compat' mangled-name='zpool_load_compat' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_load_compat'>
+      <parameter type-id='80f4b756' name='compat'/>
+      <parameter type-id='37e3bd22' name='features'/>
+      <parameter type-id='26a90f95' name='report'/>
+      <parameter type-id='b59d7dce' name='rlen'/>
+      <return type-id='901b78d1'/>
+    </function-decl>
+    <function-decl name='__xpg_basename' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strtoull' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='3a47d82b'/>
+    </function-decl>
+    <function-decl name='realpath' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='266fe297'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='memcmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtok_r' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strncasecmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='munmap' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='stat64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='f1cadedf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_sendrecv.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='2176' id='8c2bcad1'>
+      <subrange length='34' type-id='7359adad' id='6a6a7e00'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='256' id='85c64d26'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='96' id='fa8ef949'>
+      <subrange length='12' type-id='7359adad' id='84827bdc'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='128' id='fa9986a5'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='40' id='0f4ddd0b'>
+      <subrange length='5' type-id='7359adad' id='53010e10'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='48' id='0f562bd0'>
+      <subrange length='6' type-id='7359adad' id='52fa524b'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='64' id='13339fda'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <class-decl name='sendflags' size-in-bits='576' is-struct='yes' visibility='default' id='f6aa15be'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='verbosity' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='replicate' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='skipmissing' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='doall' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='fromorigin' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='pad' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='props' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='dryrun' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='parsable' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='progress' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='progressastitle' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='largeblock' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='embed_data' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='416'>
+        <var-decl name='compress' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='raw' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='backup' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='holds' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='544'>
+        <var-decl name='saved' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='sendflags_t' type-id='f6aa15be' id='945467e6'/>
+    <typedef-decl name='snapfilter_cb_t' type-id='d2a5e211' id='3d3ffb69'/>
+    <class-decl name='recvflags' size-in-bits='416' is-struct='yes' visibility='default' id='34a384dc'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='verbose' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='isprefix' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='istail' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='dryrun' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='force' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='canmountoff' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='resumable' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='byteswap' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='nomount' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='holds' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='skipholds' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='domount' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='forceunmount' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='recvflags_t' type-id='34a384dc' id='9e59d1d4'/>
+    <enum-decl name='lzc_send_flags' id='bfbd3c8e'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='LZC_SEND_FLAG_EMBED_DATA' value='1'/>
+      <enumerator name='LZC_SEND_FLAG_LARGE_BLOCK' value='2'/>
+      <enumerator name='LZC_SEND_FLAG_COMPRESS' value='4'/>
+      <enumerator name='LZC_SEND_FLAG_RAW' value='8'/>
+      <enumerator name='LZC_SEND_FLAG_SAVED' value='16'/>
+    </enum-decl>
+    <class-decl name='ddt_key' size-in-bits='320' is-struct='yes' visibility='default' id='e0a4a1cb'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ddk_cksum' type-id='39730d0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='ddk_prop' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ddt_key_t' type-id='e0a4a1cb' id='67f6d2cf'/>
+    <enum-decl name='dmu_object_type' id='04b3b0b9'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DMU_OT_NONE' value='0'/>
+      <enumerator name='DMU_OT_OBJECT_DIRECTORY' value='1'/>
+      <enumerator name='DMU_OT_OBJECT_ARRAY' value='2'/>
+      <enumerator name='DMU_OT_PACKED_NVLIST' value='3'/>
+      <enumerator name='DMU_OT_PACKED_NVLIST_SIZE' value='4'/>
+      <enumerator name='DMU_OT_BPOBJ' value='5'/>
+      <enumerator name='DMU_OT_BPOBJ_HDR' value='6'/>
+      <enumerator name='DMU_OT_SPACE_MAP_HEADER' value='7'/>
+      <enumerator name='DMU_OT_SPACE_MAP' value='8'/>
+      <enumerator name='DMU_OT_INTENT_LOG' value='9'/>
+      <enumerator name='DMU_OT_DNODE' value='10'/>
+      <enumerator name='DMU_OT_OBJSET' value='11'/>
+      <enumerator name='DMU_OT_DSL_DIR' value='12'/>
+      <enumerator name='DMU_OT_DSL_DIR_CHILD_MAP' value='13'/>
+      <enumerator name='DMU_OT_DSL_DS_SNAP_MAP' value='14'/>
+      <enumerator name='DMU_OT_DSL_PROPS' value='15'/>
+      <enumerator name='DMU_OT_DSL_DATASET' value='16'/>
+      <enumerator name='DMU_OT_ZNODE' value='17'/>
+      <enumerator name='DMU_OT_OLDACL' value='18'/>
+      <enumerator name='DMU_OT_PLAIN_FILE_CONTENTS' value='19'/>
+      <enumerator name='DMU_OT_DIRECTORY_CONTENTS' value='20'/>
+      <enumerator name='DMU_OT_MASTER_NODE' value='21'/>
+      <enumerator name='DMU_OT_UNLINKED_SET' value='22'/>
+      <enumerator name='DMU_OT_ZVOL' value='23'/>
+      <enumerator name='DMU_OT_ZVOL_PROP' value='24'/>
+      <enumerator name='DMU_OT_PLAIN_OTHER' value='25'/>
+      <enumerator name='DMU_OT_UINT64_OTHER' value='26'/>
+      <enumerator name='DMU_OT_ZAP_OTHER' value='27'/>
+      <enumerator name='DMU_OT_ERROR_LOG' value='28'/>
+      <enumerator name='DMU_OT_SPA_HISTORY' value='29'/>
+      <enumerator name='DMU_OT_SPA_HISTORY_OFFSETS' value='30'/>
+      <enumerator name='DMU_OT_POOL_PROPS' value='31'/>
+      <enumerator name='DMU_OT_DSL_PERMS' value='32'/>
+      <enumerator name='DMU_OT_ACL' value='33'/>
+      <enumerator name='DMU_OT_SYSACL' value='34'/>
+      <enumerator name='DMU_OT_FUID' value='35'/>
+      <enumerator name='DMU_OT_FUID_SIZE' value='36'/>
+      <enumerator name='DMU_OT_NEXT_CLONES' value='37'/>
+      <enumerator name='DMU_OT_SCAN_QUEUE' value='38'/>
+      <enumerator name='DMU_OT_USERGROUP_USED' value='39'/>
+      <enumerator name='DMU_OT_USERGROUP_QUOTA' value='40'/>
+      <enumerator name='DMU_OT_USERREFS' value='41'/>
+      <enumerator name='DMU_OT_DDT_ZAP' value='42'/>
+      <enumerator name='DMU_OT_DDT_STATS' value='43'/>
+      <enumerator name='DMU_OT_SA' value='44'/>
+      <enumerator name='DMU_OT_SA_MASTER_NODE' value='45'/>
+      <enumerator name='DMU_OT_SA_ATTR_REGISTRATION' value='46'/>
+      <enumerator name='DMU_OT_SA_ATTR_LAYOUTS' value='47'/>
+      <enumerator name='DMU_OT_SCAN_XLATE' value='48'/>
+      <enumerator name='DMU_OT_DEDUP' value='49'/>
+      <enumerator name='DMU_OT_DEADLIST' value='50'/>
+      <enumerator name='DMU_OT_DEADLIST_HDR' value='51'/>
+      <enumerator name='DMU_OT_DSL_CLONES' value='52'/>
+      <enumerator name='DMU_OT_BPOBJ_SUBOBJ' value='53'/>
+      <enumerator name='DMU_OT_NUMTYPES' value='54'/>
+      <enumerator name='DMU_OTN_UINT8_DATA' value='128'/>
+      <enumerator name='DMU_OTN_UINT8_METADATA' value='192'/>
+      <enumerator name='DMU_OTN_UINT16_DATA' value='129'/>
+      <enumerator name='DMU_OTN_UINT16_METADATA' value='193'/>
+      <enumerator name='DMU_OTN_UINT32_DATA' value='130'/>
+      <enumerator name='DMU_OTN_UINT32_METADATA' value='194'/>
+      <enumerator name='DMU_OTN_UINT64_DATA' value='131'/>
+      <enumerator name='DMU_OTN_UINT64_METADATA' value='195'/>
+      <enumerator name='DMU_OTN_ZAP_DATA' value='132'/>
+      <enumerator name='DMU_OTN_ZAP_METADATA' value='196'/>
+      <enumerator name='DMU_OTN_UINT8_ENC_DATA' value='160'/>
+      <enumerator name='DMU_OTN_UINT8_ENC_METADATA' value='224'/>
+      <enumerator name='DMU_OTN_UINT16_ENC_DATA' value='161'/>
+      <enumerator name='DMU_OTN_UINT16_ENC_METADATA' value='225'/>
+      <enumerator name='DMU_OTN_UINT32_ENC_DATA' value='162'/>
+      <enumerator name='DMU_OTN_UINT32_ENC_METADATA' value='226'/>
+      <enumerator name='DMU_OTN_UINT64_ENC_DATA' value='163'/>
+      <enumerator name='DMU_OTN_UINT64_ENC_METADATA' value='227'/>
+      <enumerator name='DMU_OTN_ZAP_ENC_DATA' value='164'/>
+      <enumerator name='DMU_OTN_ZAP_ENC_METADATA' value='228'/>
+    </enum-decl>
+    <typedef-decl name='dmu_object_type_t' type-id='04b3b0b9' id='5c9d8906'/>
+    <class-decl name='zio_cksum' size-in-bits='256' is-struct='yes' visibility='default' id='1d53e28b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zc_word' type-id='85c64d26' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zio_cksum_t' type-id='1d53e28b' id='39730d0b'/>
+    <class-decl name='dmu_replay_record' size-in-bits='2496' is-struct='yes' visibility='default' id='781a52d7'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_type' type-id='08f5ca17' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='drr_payloadlen' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_u' type-id='ac5ab59b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <enum-decl name='__anonymous_enum__' is-anonymous='yes' id='08f5ca17'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DRR_BEGIN' value='0'/>
+      <enumerator name='DRR_OBJECT' value='1'/>
+      <enumerator name='DRR_FREEOBJECTS' value='2'/>
+      <enumerator name='DRR_WRITE' value='3'/>
+      <enumerator name='DRR_FREE' value='4'/>
+      <enumerator name='DRR_END' value='5'/>
+      <enumerator name='DRR_WRITE_BYREF' value='6'/>
+      <enumerator name='DRR_SPILL' value='7'/>
+      <enumerator name='DRR_WRITE_EMBEDDED' value='8'/>
+      <enumerator name='DRR_OBJECT_RANGE' value='9'/>
+      <enumerator name='DRR_REDACT' value='10'/>
+      <enumerator name='DRR_NUMTYPES' value='11'/>
+    </enum-decl>
+    <union-decl name='__anonymous_union__' size-in-bits='2432' is-anonymous='yes' visibility='default' id='ac5ab59b'>
+      <data-member access='public'>
+        <var-decl name='drr_begin' type-id='09fcdc01' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_end' type-id='6ee25631' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_object' type-id='f9ad530b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_freeobjects' type-id='a27d958e' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_write' type-id='4cc69e4b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_free' type-id='c836cfd2' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_write_byref' type-id='e511cdce' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_spill' type-id='1e69a80a' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_write_embedded' type-id='98b1345e' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_object_range' type-id='aba1f9e1' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_redact' type-id='50389039' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_checksum' type-id='a5fe3647' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='drr_end' size-in-bits='320' is-struct='yes' visibility='default' id='6ee25631'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_checksum' type-id='39730d0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_object' size-in-bits='448' is-struct='yes' visibility='default' id='f9ad530b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_type' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='drr_bonustype' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_blksz' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='drr_bonuslen' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_checksumtype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='200'>
+        <var-decl name='drr_compress' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='208'>
+        <var-decl name='drr_dn_slots' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='216'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='drr_raw_bonuslen' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_indblkshift' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='328'>
+        <var-decl name='drr_nlevels' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='336'>
+        <var-decl name='drr_nblkptr' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='344'>
+        <var-decl name='drr_pad' type-id='0f4ddd0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_maxblkid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_freeobjects' size-in-bits='192' is-struct='yes' visibility='default' id='a27d958e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_firstobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_numobjs' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_write' size-in-bits='1088' is-struct='yes' visibility='default' id='4cc69e4b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_type' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='drr_pad' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_logical_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_checksumtype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='328'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='336'>
+        <var-decl name='drr_compressiontype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='344'>
+        <var-decl name='drr_pad2' type-id='0f4ddd0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_key' type-id='67f6d2cf' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='drr_compressed_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='drr_salt' type-id='13339fda' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='drr_iv' type-id='fa8ef949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='drr_mac' type-id='fa9986a5' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_free' size-in-bits='256' is-struct='yes' visibility='default' id='c836cfd2'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_write_byref' size-in-bits='832' is-struct='yes' visibility='default' id='e511cdce'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_refguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_refobject' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_refoffset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='drr_checksumtype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='456'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='464'>
+        <var-decl name='drr_pad2' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='drr_key' type-id='67f6d2cf' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_spill' size-in-bits='640' is-struct='yes' visibility='default' id='1e69a80a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='200'>
+        <var-decl name='drr_compressiontype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='208'>
+        <var-decl name='drr_pad' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_compressed_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_salt' type-id='13339fda' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_iv' type-id='fa8ef949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='drr_mac' type-id='fa9986a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='608'>
+        <var-decl name='drr_type' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_write_embedded' size-in-bits='384' is-struct='yes' visibility='default' id='98b1345e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_compression' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='264'>
+        <var-decl name='drr_etype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='272'>
+        <var-decl name='drr_pad' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_lsize' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='drr_psize' type-id='8f92235e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_object_range' size-in-bits='512' is-struct='yes' visibility='default' id='aba1f9e1'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_firstobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_numslots' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_salt' type-id='13339fda' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_iv' type-id='fa8ef949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='drr_mac' type-id='fa9986a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='488'>
+        <var-decl name='drr_pad' type-id='d3490169' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_redact' size-in-bits='256' is-struct='yes' visibility='default' id='50389039'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_checksum' size-in-bits='2432' is-struct='yes' visibility='default' id='a5fe3647'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_pad' type-id='8c2bcad1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2176'>
+        <var-decl name='drr_checksum' type-id='39730d0b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='Byte' type-id='002ac4a6' id='efb9ba06'/>
+    <typedef-decl name='uLong' type-id='7359adad' id='5bbcce85'/>
+    <typedef-decl name='Bytef' type-id='efb9ba06' id='c1606520'/>
+    <typedef-decl name='uLongf' type-id='5bbcce85' id='4d39af59'/>
+    <pointer-type-def type-id='c1606520' size-in-bits='64' id='4c667223'/>
+    <qualified-type-def type-id='c1606520' const='yes' id='a6124a50'/>
+    <pointer-type-def type-id='a6124a50' size-in-bits='64' id='e8cb3e0e'/>
+    <qualified-type-def type-id='781a52d7' const='yes' id='413ab2b8'/>
+    <pointer-type-def type-id='413ab2b8' size-in-bits='64' id='41671bd6'/>
+    <pointer-type-def type-id='3ff5601b' size-in-bits='64' id='4aafb922'/>
+    <pointer-type-def type-id='9e59d1d4' size-in-bits='64' id='4ea84b4f'/>
+    <pointer-type-def type-id='945467e6' size-in-bits='64' id='8def7735'/>
+    <pointer-type-def type-id='3d3ffb69' size-in-bits='64' id='72a26210'/>
+    <pointer-type-def type-id='c9d12d66' size-in-bits='64' id='b2eb2c3f'/>
+    <pointer-type-def type-id='4d39af59' size-in-bits='64' id='60db3356'/>
+    <pointer-type-def type-id='39730d0b' size-in-bits='64' id='c24fc2ee'/>
+    <function-decl name='nvlist_print' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_get_pool_handle' mangled-name='zfs_get_pool_handle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_pool_handle'>
+      <parameter type-id='fcd57163'/>
+      <return type-id='4c81de99'/>
+    </function-decl>
+    <function-decl name='lzc_send_redacted' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='bfbd3c8e'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_resume_redacted' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='bfbd3c8e'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_receive_with_cmdprops' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='41671bd6'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_space' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='bfbd3c8e'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_space_resume_redacted' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='bfbd3c8e'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_rename' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_set_pipe_max' mangled-name='libzfs_set_pipe_max' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_set_pipe_max'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_setproctitle' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_insert' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='fba6cb51'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_boolean' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvpair_value_int32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='4aafb922'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fnvlist_size' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='fnvlist_merge' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_nvpair' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_remove' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_boolean_value' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='5d6479ae'/>
+    </function-decl>
+    <function-decl name='fletcher_4_native_varsize' mangled-name='fletcher_4_native_varsize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_native_varsize'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='c24fc2ee'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_4_incremental_native' mangled-name='fletcher_4_incremental_native' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_incremental_native'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fletcher_4_incremental_byteswap' mangled-name='fletcher_4_incremental_byteswap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_incremental_byteswap'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_send_progress' mangled-name='zfs_send_progress' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send_progress'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='5d6479ae' name='bytes_written'/>
+      <parameter type-id='5d6479ae' name='blocks_visited'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_send_resume_token_to_nvlist' mangled-name='zfs_send_resume_token_to_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send_resume_token_to_nvlist'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='token'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zfs_send_resume' mangled-name='zfs_send_resume' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send_resume'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='8def7735' name='flags'/>
+      <parameter type-id='95e97e5e' name='outfd'/>
+      <parameter type-id='80f4b756' name='resume_token'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_send_saved' mangled-name='zfs_send_saved' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send_saved'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='8def7735' name='flags'/>
+      <parameter type-id='95e97e5e' name='outfd'/>
+      <parameter type-id='80f4b756' name='resume_token'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_send' mangled-name='zfs_send' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='fromsnap'/>
+      <parameter type-id='80f4b756' name='tosnap'/>
+      <parameter type-id='8def7735' name='flags'/>
+      <parameter type-id='95e97e5e' name='outfd'/>
+      <parameter type-id='72a26210' name='filter_func'/>
+      <parameter type-id='eaa32e2f' name='cb_arg'/>
+      <parameter type-id='857bb57e' name='debugnvp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_send_one' mangled-name='zfs_send_one' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send_one'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='8def7735' name='flags'/>
+      <parameter type-id='80f4b756' name='redactbook'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_receive' mangled-name='zfs_receive' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_receive'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='tosnap'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='4ea84b4f' name='flags'/>
+      <parameter type-id='95e97e5e' name='infd'/>
+      <parameter type-id='a3681dea' name='stream_avl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='perror' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strcat' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strndup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='time' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b2eb2c3f'/>
+      <return type-id='c9d12d66'/>
+    </function-decl>
+    <function-decl name='localtime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9f201474'/>
+      <return type-id='d915a820'/>
+    </function-decl>
+    <function-decl name='write' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='sleep' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f0981eeb'/>
+      <return type-id='f0981eeb'/>
+    </function-decl>
+    <function-decl name='uncompress' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4c667223'/>
+      <parameter type-id='60db3356'/>
+      <parameter type-id='e8cb3e0e'/>
+      <parameter type-id='5bbcce85'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='d2a5e211'>
+      <parameter type-id='9200a744'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='c19b74c3'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_status.c' language='LANG_C99'>
+    <function-decl name='zpool_import_status' mangled-name='zpool_import_status' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_import_status'>
+      <parameter type-id='5ce45b60' name='config'/>
+      <parameter type-id='9b23c9ad' name='msgid'/>
+      <parameter type-id='cec6f2e4' name='errata'/>
+      <return type-id='d3dd6294'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_util.c' language='LANG_C99'>
+    <class-decl name='__va_list_tag' size-in-bits='192' is-struct='yes' visibility='default' id='d5027220'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='gp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='fp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='overflow_arg_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='reg_save_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <type-decl name='double' size-in-bits='64' id='a0eb0f08'/>
+    <array-type-def dimensions='1' type-id='95e97e5e' size-in-bits='192' id='e41bdf22'>
+      <subrange length='6' type-id='7359adad' id='52fa524b'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='19cefcee' size-in-bits='160' alignment-in-bits='32' id='3fcf57d2'>
+      <subrange length='5' type-id='7359adad' id='53010e10'/>
+    </array-type-def>
+    <enum-decl name='zfs_get_column_t' naming-typedef-id='19cefcee' id='223bdcaa'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='GET_COL_NONE' value='0'/>
+      <enumerator name='GET_COL_NAME' value='1'/>
+      <enumerator name='GET_COL_PROPERTY' value='2'/>
+      <enumerator name='GET_COL_VALUE' value='3'/>
+      <enumerator name='GET_COL_RECVD' value='4'/>
+      <enumerator name='GET_COL_SOURCE' value='5'/>
+    </enum-decl>
+    <typedef-decl name='zfs_get_column_t' type-id='223bdcaa' id='19cefcee'/>
+    <class-decl name='zprop_get_cbdata' size-in-bits='640' is-struct='yes' visibility='default' id='f3d3c319'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='cb_sources' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='cb_columns' type-id='3fcf57d2' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='cb_colwidths' type-id='e41bdf22' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='cb_scripted' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='416'>
+        <var-decl name='cb_literal' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='cb_first' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='cb_proplist' type-id='3a9b2288' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='cb_type' type-id='2e45de5d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zprop_get_cbdata_t' type-id='f3d3c319' id='f3d87113'/>
+    <typedef-decl name='zprop_func' type-id='2e711a2a' id='1ec3747a'/>
+    <enum-decl name='zprop_attr_t' naming-typedef-id='999701cc' id='77d05200'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='PROP_DEFAULT' value='0'/>
+      <enumerator name='PROP_READONLY' value='1'/>
+      <enumerator name='PROP_INHERIT' value='2'/>
+      <enumerator name='PROP_ONETIME' value='3'/>
+      <enumerator name='PROP_ONETIME_DEFAULT' value='4'/>
+    </enum-decl>
+    <typedef-decl name='zprop_attr_t' type-id='77d05200' id='999701cc'/>
+    <class-decl name='zfs_index' size-in-bits='128' is-struct='yes' visibility='default' id='87957af9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='pi_name' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='pi_value' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zprop_index_t' type-id='87957af9' id='64636ce3'/>
+    <class-decl name='zprop_desc_t' size-in-bits='704' is-struct='yes' naming-typedef-id='ffa52b96' visibility='default' id='bbff5e4b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='pd_name' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='pd_propnum' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='pd_proptype' type-id='31429eff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='pd_strdefault' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='pd_numdefault' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='pd_attr' type-id='999701cc' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='pd_types' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='pd_values' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='pd_colname' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='pd_rightalign' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='pd_visible' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='pd_zfs_mod_supported' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='pd_table' type-id='c8bc397b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='pd_table_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zprop_desc_t' type-id='bbff5e4b' id='ffa52b96'/>
+    <class-decl name='extmnttab' size-in-bits='320' is-struct='yes' visibility='default' id='0c544dc0'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_mountp' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_fstype' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='mnt_major' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='mnt_minor' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='d5027220' size-in-bits='64' id='b7f2d5e6'/>
+    <qualified-type-def type-id='26a90f95' const='yes' id='57de658a'/>
+    <pointer-type-def type-id='57de658a' size-in-bits='64' id='f319fae0'/>
+    <pointer-type-def type-id='9b23c9ad' size-in-bits='64' id='c0563f85'/>
+    <qualified-type-def type-id='33f57a65' const='yes' id='21fd6035'/>
+    <pointer-type-def type-id='21fd6035' size-in-bits='64' id='a0de50cd'/>
+    <pointer-type-def type-id='a0de50cd' size-in-bits='64' id='24f95ba5'/>
+    <qualified-type-def type-id='64636ce3' const='yes' id='072f7953'/>
+    <pointer-type-def type-id='072f7953' size-in-bits='64' id='c8bc397b'/>
+    <pointer-type-def type-id='0c544dc0' size-in-bits='64' id='394fc496'/>
+    <pointer-type-def type-id='c70fa2e8' size-in-bits='64' id='2e711a2a'/>
+    <pointer-type-def type-id='aca3bac8' size-in-bits='64' id='d33f11cb'/>
+    <qualified-type-def type-id='d33f11cb' restrict='yes' id='5c53ba29'/>
+    <pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/>
+    <pointer-type-def type-id='f3d87113' size-in-bits='64' id='0d2a0670'/>
+    <function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_core_init' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_core_fini' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_load_module' mangled-name='libzfs_load_module' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_load_module'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_prop_unsupported' mangled-name='zpool_prop_unsupported' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_unsupported'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zpool_feature_init' mangled-name='zpool_feature_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_feature_init'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_4_init' mangled-name='fletcher_4_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_init'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fletcher_4_fini' mangled-name='fletcher_4_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_fini'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_prop_init' mangled-name='zfs_prop_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_init'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_prop_get_table' mangled-name='zfs_prop_get_table' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_get_table'>
+      <return type-id='76c8174b'/>
+    </function-decl>
+    <function-decl name='zpool_prop_init' mangled-name='zpool_prop_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_init'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_prop_get_table' mangled-name='zpool_prop_get_table' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_get_table'>
+      <return type-id='76c8174b'/>
+    </function-decl>
+    <function-decl name='zprop_iter_common' mangled-name='zprop_iter_common' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_iter_common'>
+      <parameter type-id='1ec3747a'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='c19b74c3'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zprop_name_to_prop' mangled-name='zprop_name_to_prop' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_name_to_prop'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zprop_string_to_index' mangled-name='zprop_string_to_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_string_to_index'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zprop_values' mangled-name='zprop_values' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_values'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zprop_width' mangled-name='zprop_width' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_width'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='37e3bd22'/>
+      <parameter type-id='2e45de5d'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='zprop_valid_for_type' mangled-name='zprop_valid_for_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_valid_for_type'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='2e45de5d'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='getextmntent' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='394fc496'/>
+      <parameter type-id='62f7a03d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_errno' mangled-name='libzfs_errno' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_errno'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_error_action' mangled-name='libzfs_error_action' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_error_action'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='libzfs_error_description' mangled-name='libzfs_error_description' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_error_description'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='libzfs_print_on_error' mangled-name='libzfs_print_on_error' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_print_on_error'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='c19b74c3' name='printerr'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_run_process' mangled-name='libzfs_run_process' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_run_process'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='9b23c9ad' name='argv'/>
+      <parameter type-id='95e97e5e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_run_process_get_stdout' mangled-name='libzfs_run_process_get_stdout' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_run_process_get_stdout'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='9b23c9ad' name='argv'/>
+      <parameter type-id='9b23c9ad' name='env'/>
+      <parameter type-id='c0563f85' name='lines'/>
+      <parameter type-id='7292109c' name='lines_cnt'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_run_process_get_stdout_nopath' mangled-name='libzfs_run_process_get_stdout_nopath' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_run_process_get_stdout_nopath'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='9b23c9ad' name='argv'/>
+      <parameter type-id='9b23c9ad' name='env'/>
+      <parameter type-id='c0563f85' name='lines'/>
+      <parameter type-id='7292109c' name='lines_cnt'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_free_str_array' mangled-name='libzfs_free_str_array' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_free_str_array'>
+      <parameter type-id='9b23c9ad' name='strs'/>
+      <parameter type-id='95e97e5e' name='count'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_envvar_is_set' mangled-name='libzfs_envvar_is_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_envvar_is_set'>
+      <parameter type-id='26a90f95' name='envvar'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_init' mangled-name='libzfs_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_init'>
+      <return type-id='b0382bb3'/>
+    </function-decl>
+    <function-decl name='libzfs_fini' mangled-name='libzfs_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_fini'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_path_to_zhandle' mangled-name='zfs_path_to_zhandle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_path_to_zhandle'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='2e45de5d' name='argtype'/>
+      <return type-id='9200a744'/>
+    </function-decl>
+    <function-decl name='zprop_print_one_property' mangled-name='zprop_print_one_property' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_print_one_property'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='0d2a0670' name='cbp'/>
+      <parameter type-id='80f4b756' name='propname'/>
+      <parameter type-id='80f4b756' name='value'/>
+      <parameter type-id='a2256d42' name='sourcetype'/>
+      <parameter type-id='80f4b756' name='source'/>
+      <parameter type-id='80f4b756' name='recvd_value'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_get_list' mangled-name='zprop_get_list' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_get_list'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='26a90f95' name='props'/>
+      <parameter type-id='e4378506' name='listp'/>
+      <parameter type-id='2e45de5d' name='type'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zprop_free_list' mangled-name='zprop_free_list' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_free_list'>
+      <parameter type-id='3a9b2288' name='pl'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zprop_iter' mangled-name='zprop_iter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zprop_iter'>
+      <parameter type-id='1ec3747a' name='func'/>
+      <parameter type-id='eaa32e2f' name='cb'/>
+      <parameter type-id='c19b74c3' name='show_all'/>
+      <parameter type-id='c19b74c3' name='ordered'/>
+      <parameter type-id='2e45de5d' name='type'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_version_userland' mangled-name='zfs_version_userland' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_userland'>
+      <parameter type-id='26a90f95' name='version'/>
+      <parameter type-id='95e97e5e' name='len'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_version_print' mangled-name='zfs_version_print' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_print'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='color_start' mangled-name='color_start' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='color_start'>
+      <parameter type-id='26a90f95' name='color'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='color_end' mangled-name='color_end' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='color_end'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='printf_color' mangled-name='printf_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='printf_color'>
+      <parameter type-id='26a90f95' name='color'/>
+      <parameter type-id='26a90f95' name='format'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pow' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a0eb0f08'/>
+      <parameter type-id='a0eb0f08'/>
+      <return type-id='a0eb0f08'/>
+    </function-decl>
+    <function-decl name='__ctype_toupper_loc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='24f95ba5'/>
+    </function-decl>
+    <function-decl name='dlclose' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='regcomp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5c53ba29'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='regfree' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='d33f11cb'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='vfprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='vsnprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='vasprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtod' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <return type-id='a0eb0f08'/>
+    </function-decl>
+    <function-decl name='realloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='exit' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strnlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='waitpid' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3629bad8'/>
+      <parameter type-id='7292109c'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='3629bad8'/>
+    </function-decl>
+    <function-decl name='dup2' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='execve' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f319fae0'/>
+      <parameter type-id='f319fae0'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='execv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f319fae0'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='execvp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f319fae0'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='execvpe' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f319fae0'/>
+      <parameter type-id='f319fae0'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='_exit' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='vfork' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='3629bad8'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='c70fa2e8'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/libzfs_mount_os.c' language='LANG_C99'>
+    <pointer-type-def type-id='7359adad' size-in-bits='64' id='1d2c2b85'/>
+    <function-decl name='mount' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='7359adad'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='umount2' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='geteuid' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='cc5fcceb'/>
+    </function-decl>
+    <function-decl name='zfs_parse_mount_options' mangled-name='zfs_parse_mount_options' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_parse_mount_options'>
+      <parameter type-id='26a90f95' name='mntopts'/>
+      <parameter type-id='1d2c2b85' name='mntflags'/>
+      <parameter type-id='1d2c2b85' name='zfsflags'/>
+      <parameter type-id='95e97e5e' name='sloppy'/>
+      <parameter type-id='26a90f95' name='badopt'/>
+      <parameter type-id='26a90f95' name='mtabopt'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_adjust_mount_options' mangled-name='zfs_adjust_mount_options' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_adjust_mount_options'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='mntpoint'/>
+      <parameter type-id='26a90f95' name='mntopts'/>
+      <parameter type-id='26a90f95' name='mtabopt'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_mount_delegation_check' mangled-name='zfs_mount_delegation_check' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_mount_delegation_check'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/libzfs_pool_os.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='288' id='16e6f2c6'>
+      <subrange length='36' type-id='7359adad' id='ae666bde'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a65ae39c' size-in-bits='960' id='fa198beb'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='3502e3ff' size-in-bits='384' id='dba89ba3'>
+      <subrange length='12' type-id='7359adad' id='84827bdc'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='3502e3ff' size-in-bits='256' id='01d84ed4'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <class-decl name='dk_part' size-in-bits='960' is-struct='yes' visibility='default' id='a65ae39c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='p_start' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='p_size' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='p_guid' type-id='214f32ea' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='p_tag' type-id='d908a348' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='272'>
+        <var-decl name='p_flag' type-id='d908a348' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='p_name' type-id='16e6f2c6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='p_uguid' type-id='214f32ea' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='p_resv' type-id='01d84ed4' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='dk_gpt' size-in-bits='1920' is-struct='yes' visibility='default' id='dd4a2e5a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='efi_version' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='efi_nparts' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='efi_part_size' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='efi_lbasize' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='efi_last_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='efi_first_u_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='efi_last_u_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='efi_disk_uguid' type-id='214f32ea' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='efi_flags' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='efi_reserved1' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='efi_altern_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='efi_reserved' type-id='dba89ba3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='efi_parts' type-id='fa198beb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='uuid' size-in-bits='128' is-struct='yes' visibility='default' id='214f32ea'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='time_low' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='time_mid' type-id='149c6638' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='48'>
+        <var-decl name='time_hi_and_version' type-id='149c6638' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='clock_seq_hi_and_reserved' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='72'>
+        <var-decl name='clock_seq_low' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='80'>
+        <var-decl name='node_addr' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ushort_t' type-id='8efea9e5' id='d908a348'/>
+    <typedef-decl name='uint16_t' type-id='253c2d2a' id='149c6638'/>
+    <typedef-decl name='__uint16_t' type-id='8efea9e5' id='253c2d2a'/>
+    <pointer-type-def type-id='dd4a2e5a' size-in-bits='64' id='0d8119a8'/>
+    <pointer-type-def type-id='0d8119a8' size-in-bits='64' id='c43b27a6'/>
+    <function-decl name='zpool_label_disk_wait' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_alloc_and_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='8f92235e'/>
+      <parameter type-id='c43b27a6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_alloc_and_read' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='c43b27a6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_write' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='0d8119a8'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_rescan' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='0d8119a8'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='efi_use_whole_disk' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='rand' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fsync' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/libzfs_util_os.c' language='LANG_C99'>
+    <typedef-decl name='__useconds_t' type-id='f0981eeb' id='4e80d4b1'/>
+    <typedef-decl name='__clockid_t' type-id='95e97e5e' id='08f9a87a'/>
+    <typedef-decl name='clockid_t' type-id='08f9a87a' id='a1c3b834'/>
+    <pointer-type-def type-id='a9c79a1f' size-in-bits='64' id='3d83ba87'/>
+    <function-decl name='sched_yield' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='clock_gettime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a1c3b834'/>
+      <parameter type-id='3d83ba87'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='usleep' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4e80d4b1'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_error_init' mangled-name='libzfs_error_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_error_init'>
+      <parameter type-id='95e97e5e' name='error'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+  </abi-instr>
+</abi-corpus>

diff --git a/zfs/lib/libzfs/libzfs.pc.in b/zfs/lib/libzfs/libzfs.pc.in
index 1122401..afe5635 100644
--- a/zfs/lib/libzfs/libzfs.pc.in
+++ b/zfs/lib/libzfs/libzfs.pc.in

@@ -6,7 +6,9 @@
 Name: libzfs
 Description: LibZFS library
 Version: @VERSION@
-URL: http://zfsonlinux.org
+URL: https://github.com/openzfs/zfs
 Requires: libzfs_core
+Requires.private: @LIBCRYPTO_PC@ @ZLIB_PC@
 Cflags: -I${includedir}/libzfs -I${includedir}/libspl
 Libs: -L${libdir} -lzfs -lnvpair
+Libs.private: -luutil -lm -pthread

diff --git a/zfs/lib/libzfs/libzfs.suppr b/zfs/lib/libzfs/libzfs.suppr
new file mode 100644
index 0000000..d55b5b7
--- /dev/null
+++ b/zfs/lib/libzfs/libzfs.suppr

@@ -0,0 +1,13 @@
+[suppress_type]
+	name = FILE*
+
+[suppress_type]
+	type_kind = typedef
+	name = SHA256_CTX
+
+[suppress_type]
+	type_kind = typedef
+	name = SHA2_CTX
+
+[suppress_variable]
+	name = zfs_deleg_perm_tab

diff --git a/zfs/lib/libzfs/libzfs_changelist.c b/zfs/lib/libzfs/libzfs_changelist.c
index 72f6410..1592b75 100644
--- a/zfs/lib/libzfs/libzfs_changelist.c
+++ b/zfs/lib/libzfs/libzfs_changelist.c

@@ -24,7 +24,7 @@
  * Use is subject to license terms.
  *
  * Portions Copyright 2007 Ramprakash Jelari
- * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2020 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright (c) 2018 Datto Inc.
  */
@@ -98,6 +98,7 @@
 	prop_changenode_t *cn;
 	uu_avl_walk_t *walk;
 	int ret = 0;
+	boolean_t commit_smb_shares = B_FALSE;
 
 	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
 	    clp->cl_prop != ZFS_PROP_SHARESMB)
@@ -127,6 +128,8 @@
 			 */
 			switch (clp->cl_prop) {
 			case ZFS_PROP_MOUNTPOINT:
+				if (clp->cl_gflags & CL_GATHER_DONT_UNMOUNT)
+					break;
 				if (zfs_unmount(cn->cn_handle, NULL,
 				    clp->cl_mflags) != 0) {
 					ret = -1;
@@ -135,6 +138,7 @@
 				break;
 			case ZFS_PROP_SHARESMB:
 				(void) zfs_unshare_smb(cn->cn_handle, NULL);
+				commit_smb_shares = B_TRUE;
 				break;
 
 			default:
@@ -143,6 +147,8 @@
 		}
 	}
 
+	if (commit_smb_shares)
+		zfs_commit_smb_shares();
 	uu_avl_walk_end(walk);
 
 	if (ret == -1)
@@ -167,7 +173,8 @@
 	uu_avl_walk_t *walk;
 	char shareopts[ZFS_MAXPROPLEN];
 	int errors = 0;
-	libzfs_handle_t *hdl;
+	boolean_t commit_smb_shares = B_FALSE;
+	boolean_t commit_nfs_shares = B_FALSE;
 
 	/*
 	 * If we're changing the mountpoint, attempt to destroy the underlying
@@ -179,22 +186,11 @@
 	if ((cn = uu_avl_last(clp->cl_tree)) == NULL)
 		return (0);
 
-	if (clp->cl_prop == ZFS_PROP_MOUNTPOINT)
+	if (clp->cl_prop == ZFS_PROP_MOUNTPOINT &&
+	    !(clp->cl_gflags & CL_GATHER_DONT_UNMOUNT))
 		remove_mountpoint(cn->cn_handle);
 
 	/*
-	 * It is possible that the changelist_prefix() used libshare
-	 * to unshare some entries. Since libshare caches data, an
-	 * attempt to reshare during postfix can fail unless libshare
-	 * is uninitialized here so that it will reinitialize later.
-	 */
-	if (cn->cn_handle != NULL) {
-		hdl = cn->cn_handle->zfs_hdl;
-		assert(hdl != NULL);
-		zfs_uninit_libshare(hdl);
-	}
-
-	/*
 	 * We walk the datasets in reverse, because we want to mount any parent
 	 * datasets before mounting the children.  We walk all datasets even if
 	 * there are errors.
@@ -242,7 +238,8 @@
 		needs_key = (zfs_prop_get_int(cn->cn_handle,
 		    ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE);
 
-		mounted = zfs_is_mounted(cn->cn_handle, NULL);
+		mounted = (clp->cl_gflags & CL_GATHER_DONT_UNMOUNT) ||
+		    zfs_is_mounted(cn->cn_handle, NULL);
 
 		if (!mounted && !needs_key && (cn->cn_mounted ||
 		    ((sharenfs || sharesmb || clp->cl_waslegacy) &&
@@ -260,16 +257,25 @@
 		 * if the filesystem is currently shared, so that we can
 		 * adopt any new options.
 		 */
-		if (sharenfs && mounted)
+		if (sharenfs && mounted) {
 			errors += zfs_share_nfs(cn->cn_handle);
-		else if (cn->cn_shared || clp->cl_waslegacy)
+			commit_nfs_shares = B_TRUE;
+		} else if (cn->cn_shared || clp->cl_waslegacy) {
 			errors += zfs_unshare_nfs(cn->cn_handle, NULL);
-		if (sharesmb && mounted)
+			commit_nfs_shares = B_TRUE;
+		}
+		if (sharesmb && mounted) {
 			errors += zfs_share_smb(cn->cn_handle);
-		else if (cn->cn_shared || clp->cl_waslegacy)
+			commit_smb_shares = B_TRUE;
+		} else if (cn->cn_shared || clp->cl_waslegacy) {
 			errors += zfs_unshare_smb(cn->cn_handle, NULL);
+			commit_smb_shares = B_TRUE;
+		}
 	}
-
+	if (commit_nfs_shares)
+		zfs_commit_nfs_shares();
+	if (commit_smb_shares)
+		zfs_commit_smb_shares();
 	uu_avl_walk_end(walk);
 
 	return (errors ? -1 : 0);
@@ -357,6 +363,7 @@
 			ret = -1;
 	}
 
+	zfs_commit_proto(proto);
 	uu_avl_walk_end(walk);
 
 	return (ret);

diff --git a/zfs/lib/libzfs/libzfs_config.c b/zfs/lib/libzfs/libzfs_config.c
index 67379d0..a3ecc4a 100644
--- a/zfs/lib/libzfs/libzfs_config.c
+++ b/zfs/lib/libzfs/libzfs_config.c

@@ -133,7 +133,7 @@
 
 	for (;;) {
 		zc.zc_cookie = hdl->libzfs_ns_gen;
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
+		if (zfs_ioctl(hdl, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
 			switch (errno) {
 			case EEXIST:
 				/*
@@ -279,7 +279,7 @@
 		return (-1);
 
 	for (;;) {
-		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
+		if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_STATS,
 		    &zc) == 0) {
 			/*
 			 * The real error is returned in the zc_cookie field.

diff --git a/zfs/lib/libzfs/libzfs_core.pc.in b/zfs/lib/libzfs/libzfs_core.pc.in
deleted file mode 100644
index 2b6a86b..0000000
--- a/zfs/lib/libzfs/libzfs_core.pc.in
+++ /dev/null

@@ -1,11 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: libzfs_core
-Description: LibZFS core library
-Version: @VERSION@
-URL: http://zfsonlinux.org
-Cflags: -I${includedir}/libzfs -I${includedir}/libspl
-Libs: -L${libdir} -lzfs_core

diff --git a/zfs/lib/libzfs/libzfs_crypto.c b/zfs/lib/libzfs/libzfs_crypto.c
index b7b567e..f77becd 100644
--- a/zfs/lib/libzfs/libzfs_crypto.c
+++ b/zfs/lib/libzfs/libzfs_crypto.c

@@ -15,6 +15,7 @@
 
 /*
  * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
  */
 
 #include <sys/zfs_context.h>
@@ -25,6 +26,16 @@
 #include <signal.h>
 #include <errno.h>
 #include <openssl/evp.h>
+#if LIBFETCH_DYNAMIC
+#include <dlfcn.h>
+#endif
+#if LIBFETCH_IS_FETCH
+#include <sys/param.h>
+#include <stdio.h>
+#include <fetch.h>
+#elif LIBFETCH_IS_LIBCURL
+#include <curl/curl.h>
+#endif
 #include <libzfs.h>
 #include "libzfs_impl.h"
 #include "zfeature_common.h"
@@ -50,25 +61,31 @@
  * technically ok if the salt is known to the attacker).
  */
 
-typedef enum key_locator {
-	KEY_LOCATOR_NONE,
-	KEY_LOCATOR_PROMPT,
-	KEY_LOCATOR_URI
-} key_locator_t;
-
 #define	MIN_PASSPHRASE_LEN 8
 #define	MAX_PASSPHRASE_LEN 512
 #define	MAX_KEY_PROMPT_ATTEMPTS 3
 
 static int caught_interrupt;
 
+static int get_key_material_file(libzfs_handle_t *, const char *, const char *,
+    zfs_keyformat_t, boolean_t, uint8_t **, size_t *);
+static int get_key_material_https(libzfs_handle_t *, const char *, const char *,
+    zfs_keyformat_t, boolean_t, uint8_t **, size_t *);
+
+static zfs_uri_handler_t uri_handlers[] = {
+	{ "file", get_key_material_file },
+	{ "https", get_key_material_https },
+	{ "http", get_key_material_https },
+	{ NULL, NULL }
+};
+
 static int
 pkcs11_get_urandom(uint8_t *buf, size_t bytes)
 {
 	int rand;
 	ssize_t bytes_read = 0;
 
-	rand = open("/dev/urandom", O_RDONLY);
+	rand = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
 
 	if (rand < 0)
 		return (rand);
@@ -85,15 +102,49 @@
 	return (bytes_read);
 }
 
-static zfs_keylocation_t
-zfs_prop_parse_keylocation(const char *str)
+static int
+zfs_prop_parse_keylocation(libzfs_handle_t *restrict hdl, const char *str,
+    zfs_keylocation_t *restrict locp, char **restrict schemep)
 {
-	if (strcmp("prompt", str) == 0)
-		return (ZFS_KEYLOCATION_PROMPT);
-	else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0)
-		return (ZFS_KEYLOCATION_URI);
+	*locp = ZFS_KEYLOCATION_NONE;
+	*schemep = NULL;
 
-	return (ZFS_KEYLOCATION_NONE);
+	if (strcmp("prompt", str) == 0) {
+		*locp = ZFS_KEYLOCATION_PROMPT;
+		return (0);
+	}
+
+	regmatch_t pmatch[2];
+
+	if (regexec(&hdl->libzfs_urire, str, ARRAY_SIZE(pmatch),
+	    pmatch, 0) == 0) {
+		size_t scheme_len;
+
+		if (pmatch[1].rm_so == -1) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Invalid URI"));
+			return (EINVAL);
+		}
+
+		scheme_len = pmatch[1].rm_eo - pmatch[1].rm_so;
+
+		*schemep = calloc(1, scheme_len + 1);
+		if (*schemep == NULL) {
+			int ret = errno;
+
+			errno = 0;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Invalid URI"));
+			return (ret);
+		}
+
+		(void) memcpy(*schemep, str + pmatch[1].rm_so, scheme_len);
+		*locp = ZFS_KEYLOCATION_URI;
+		return (0);
+	}
+
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Invalid keylocation"));
+	return (EINVAL);
 }
 
 static int
@@ -146,62 +197,235 @@
 	}
 }
 
+/* do basic validation of the key material */
 static int
-get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat,
-    boolean_t again, boolean_t newkey, uint8_t **buf, size_t *len_out)
+validate_key(libzfs_handle_t *hdl, zfs_keyformat_t keyformat,
+    const char *key, size_t keylen)
 {
-	int ret = 0, bytes;
+	switch (keyformat) {
+	case ZFS_KEYFORMAT_RAW:
+		/* verify the key length is correct */
+		if (keylen < WRAPPING_KEY_LEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Raw key too short (expected %u)."),
+			    WRAPPING_KEY_LEN);
+			return (EINVAL);
+		}
+
+		if (keylen > WRAPPING_KEY_LEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Raw key too long (expected %u)."),
+			    WRAPPING_KEY_LEN);
+			return (EINVAL);
+		}
+		break;
+	case ZFS_KEYFORMAT_HEX:
+		/* verify the key length is correct */
+		if (keylen < WRAPPING_KEY_LEN * 2) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Hex key too short (expected %u)."),
+			    WRAPPING_KEY_LEN * 2);
+			return (EINVAL);
+		}
+
+		if (keylen > WRAPPING_KEY_LEN * 2) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Hex key too long (expected %u)."),
+			    WRAPPING_KEY_LEN * 2);
+			return (EINVAL);
+		}
+
+		/* check for invalid hex digits */
+		for (size_t i = 0; i < WRAPPING_KEY_LEN * 2; i++) {
+			if (!isxdigit(key[i])) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "Invalid hex character detected."));
+				return (EINVAL);
+			}
+		}
+		break;
+	case ZFS_KEYFORMAT_PASSPHRASE:
+		/* verify the length is within bounds */
+		if (keylen > MAX_PASSPHRASE_LEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Passphrase too long (max %u)."),
+			    MAX_PASSPHRASE_LEN);
+			return (EINVAL);
+		}
+
+		if (keylen < MIN_PASSPHRASE_LEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Passphrase too short (min %u)."),
+			    MIN_PASSPHRASE_LEN);
+			return (EINVAL);
+		}
+		break;
+	default:
+		/* can't happen, checked above */
+		break;
+	}
+
+	return (0);
+}
+
+static int
+libzfs_getpassphrase(zfs_keyformat_t keyformat, boolean_t is_reenter,
+    boolean_t new_key, const char *fsname,
+    char **restrict res, size_t *restrict reslen)
+{
+	FILE *f = stdin;
 	size_t buflen = 0;
+	ssize_t bytes;
+	int ret = 0;
 	struct termios old_term, new_term;
 	struct sigaction act, osigint, osigtstp;
 
-	*len_out = 0;
+	*res = NULL;
+	*reslen = 0;
 
-	if (isatty(fileno(fd))) {
-		/*
-		 * handle SIGINT and ignore SIGSTP. This is necessary to
-		 * restore the state of the terminal.
-		 */
-		caught_interrupt = 0;
-		act.sa_flags = 0;
-		(void) sigemptyset(&act.sa_mask);
-		act.sa_handler = catch_signal;
+	/*
+	 * handle SIGINT and ignore SIGSTP. This is necessary to
+	 * restore the state of the terminal.
+	 */
+	caught_interrupt = 0;
+	act.sa_flags = 0;
+	(void) sigemptyset(&act.sa_mask);
+	act.sa_handler = catch_signal;
 
-		(void) sigaction(SIGINT, &act, &osigint);
-		act.sa_handler = SIG_IGN;
-		(void) sigaction(SIGTSTP, &act, &osigtstp);
+	(void) sigaction(SIGINT, &act, &osigint);
+	act.sa_handler = SIG_IGN;
+	(void) sigaction(SIGTSTP, &act, &osigtstp);
 
-		/* prompt for the key */
-		if (fsname != NULL) {
-			(void) printf("%s %s%s for '%s': ",
-			    (again) ? "Re-enter" : "Enter",
-			    (newkey) ? "new " : "",
-			    get_format_prompt_string(keyformat), fsname);
-		} else {
-			(void) printf("%s %s%s: ",
-			    (again) ? "Re-enter" : "Enter",
-			    (newkey) ? "new " : "",
-			    get_format_prompt_string(keyformat));
+	(void) printf("%s %s%s",
+	    is_reenter ? "Re-enter" : "Enter",
+	    new_key ? "new " : "",
+	    get_format_prompt_string(keyformat));
+	if (fsname != NULL)
+		(void) printf(" for '%s'", fsname);
+	(void) fputc(':', stdout);
+	(void) fflush(stdout);
 
-		}
-		(void) fflush(stdout);
+	/* disable the terminal echo for key input */
+	(void) tcgetattr(fileno(f), &old_term);
 
-		/* disable the terminal echo for key input */
-		(void) tcgetattr(fileno(fd), &old_term);
+	new_term = old_term;
+	new_term.c_lflag &= ~(ECHO | ECHOE | ECHOK | ECHONL);
 
-		new_term = old_term;
-		new_term.c_lflag &= ~(ECHO | ECHOE | ECHOK | ECHONL);
-
-		ret = tcsetattr(fileno(fd), TCSAFLUSH, &new_term);
-		if (ret != 0) {
-			ret = errno;
-			errno = 0;
-			goto out;
-		}
+	ret = tcsetattr(fileno(f), TCSAFLUSH, &new_term);
+	if (ret != 0) {
+		ret = errno;
+		errno = 0;
+		goto out;
 	}
 
+	bytes = getline(res, &buflen, f);
+	if (bytes < 0) {
+		ret = errno;
+		errno = 0;
+		goto out;
+	}
+
+	/* trim the ending newline if it exists */
+	if (bytes > 0 && (*res)[bytes - 1] == '\n') {
+		(*res)[bytes - 1] = '\0';
+		bytes--;
+	}
+
+	*reslen = bytes;
+
+out:
+	/* reset the terminal */
+	(void) tcsetattr(fileno(f), TCSAFLUSH, &old_term);
+	(void) sigaction(SIGINT, &osigint, NULL);
+	(void) sigaction(SIGTSTP, &osigtstp, NULL);
+
+	/* if we caught a signal, re-throw it now */
+	if (caught_interrupt != 0)
+		(void) kill(getpid(), caught_interrupt);
+
+	/* print the newline that was not echo'd */
+	(void) printf("\n");
+
+	return (ret);
+}
+
+static int
+get_key_interactive(libzfs_handle_t *restrict hdl, const char *fsname,
+    zfs_keyformat_t keyformat, boolean_t confirm_key, boolean_t newkey,
+    uint8_t **restrict outbuf, size_t *restrict len_out)
+{
+	char *buf = NULL, *buf2 = NULL;
+	size_t buflen = 0, buf2len = 0;
+	int ret = 0;
+
+	ASSERT(isatty(fileno(stdin)));
+
+	/* raw keys cannot be entered on the terminal */
+	if (keyformat == ZFS_KEYFORMAT_RAW) {
+		ret = EINVAL;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Cannot enter raw keys on the terminal"));
+		goto out;
+	}
+
+	/* prompt for the key */
+	if ((ret = libzfs_getpassphrase(keyformat, B_FALSE, newkey, fsname,
+	    &buf, &buflen)) != 0) {
+		free(buf);
+		buf = NULL;
+		buflen = 0;
+		goto out;
+	}
+
+	if (!confirm_key)
+		goto out;
+
+	if ((ret = validate_key(hdl, keyformat, buf, buflen)) != 0) {
+		free(buf);
+		return (ret);
+	}
+
+	ret = libzfs_getpassphrase(keyformat, B_TRUE, newkey, fsname, &buf2,
+	    &buf2len);
+	if (ret != 0) {
+		free(buf);
+		free(buf2);
+		buf = buf2 = NULL;
+		buflen = buf2len = 0;
+		goto out;
+	}
+
+	if (buflen != buf2len || strcmp(buf, buf2) != 0) {
+		free(buf);
+		buf = NULL;
+		buflen = 0;
+
+		ret = EINVAL;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Provided keys do not match."));
+	}
+
+	free(buf2);
+
+out:
+	*outbuf = (uint8_t *)buf;
+	*len_out = buflen;
+	return (ret);
+}
+
+static int
+get_key_material_raw(FILE *fd, zfs_keyformat_t keyformat,
+    uint8_t **buf, size_t *len_out)
+{
+	int ret = 0;
+	size_t buflen = 0;
+
+	*len_out = 0;
+
 	/* read the key material */
 	if (keyformat != ZFS_KEYFORMAT_RAW) {
+		ssize_t bytes;
+
 		bytes = getline((char **)buf, &buflen, fd);
 		if (bytes < 0) {
 			ret = errno;
@@ -210,25 +434,29 @@
 		}
 
 		/* trim the ending newline if it exists */
-		if ((*buf)[bytes - 1] == '\n') {
+		if (bytes > 0 && (*buf)[bytes - 1] == '\n') {
 			(*buf)[bytes - 1] = '\0';
 			bytes--;
 		}
+
+		*len_out = bytes;
 	} else {
+		size_t n;
+
 		/*
 		 * Raw keys may have newline characters in them and so can't
 		 * use getline(). Here we attempt to read 33 bytes so that we
 		 * can properly check the key length (the file should only have
 		 * 32 bytes).
 		 */
-		*buf = malloc((WRAPPING_KEY_LEN + 1) * sizeof (char));
+		*buf = malloc((WRAPPING_KEY_LEN + 1) * sizeof (uint8_t));
 		if (*buf == NULL) {
 			ret = ENOMEM;
 			goto out;
 		}
 
-		bytes = fread(*buf, 1, WRAPPING_KEY_LEN + 1, fd);
-		if (bytes < 0) {
+		n = fread(*buf, 1, WRAPPING_KEY_LEN + 1, fd);
+		if (n == 0 || ferror(fd)) {
 			/* size errors are handled by the calling function */
 			free(*buf);
 			*buf = NULL;
@@ -236,28 +464,208 @@
 			errno = 0;
 			goto out;
 		}
+
+		*len_out = n;
 	}
-
-	*len_out = bytes;
-
 out:
-	if (isatty(fileno(fd))) {
-		/* reset the terminal */
-		(void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term);
-		(void) sigaction(SIGINT, &osigint, NULL);
-		(void) sigaction(SIGTSTP, &osigtstp, NULL);
+	return (ret);
+}
 
-		/* if we caught a signal, re-throw it now */
-		if (caught_interrupt != 0) {
-			(void) kill(getpid(), caught_interrupt);
-		}
+static int
+get_key_material_file(libzfs_handle_t *hdl, const char *uri,
+    const char *fsname, zfs_keyformat_t keyformat, boolean_t newkey,
+    uint8_t **restrict buf, size_t *restrict len_out)
+{
+	FILE *f = NULL;
+	int ret = 0;
 
-		/* print the newline that was not echo'd */
-		printf("\n");
+	if (strlen(uri) < 7)
+		return (EINVAL);
+
+	if ((f = fopen(uri + 7, "re")) == NULL) {
+		ret = errno;
+		errno = 0;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Failed to open key material file: %s"), strerror(ret));
+		return (ret);
 	}
 
+	ret = get_key_material_raw(f, keyformat, buf, len_out);
+
+	(void) fclose(f);
+
 	return (ret);
+}
 
+static int
+get_key_material_https(libzfs_handle_t *hdl, const char *uri,
+    const char *fsname, zfs_keyformat_t keyformat, boolean_t newkey,
+    uint8_t **restrict buf, size_t *restrict len_out)
+{
+	int ret = 0;
+	FILE *key = NULL;
+	boolean_t is_http = strncmp(uri, "http:", strlen("http:")) == 0;
+
+	if (strlen(uri) < (is_http ? 7 : 8)) {
+		ret = EINVAL;
+		goto end;
+	}
+
+#if LIBFETCH_DYNAMIC
+#define	LOAD_FUNCTION(func) \
+	__typeof__(func) *func = dlsym(hdl->libfetch, #func);
+
+	if (hdl->libfetch == NULL)
+		hdl->libfetch = dlopen(LIBFETCH_SONAME, RTLD_LAZY);
+
+	if (hdl->libfetch == NULL) {
+		hdl->libfetch = (void *)-1;
+		char *err = dlerror();
+		if (err)
+			hdl->libfetch_load_error = strdup(err);
+	}
+
+	if (hdl->libfetch == (void *)-1) {
+		ret = ENOSYS;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Couldn't load %s: %s"),
+		    LIBFETCH_SONAME, hdl->libfetch_load_error ?: "(?)");
+		goto end;
+	}
+
+	boolean_t ok;
+#if LIBFETCH_IS_FETCH
+	LOAD_FUNCTION(fetchGetURL);
+	char *fetchLastErrString = dlsym(hdl->libfetch, "fetchLastErrString");
+
+	ok = fetchGetURL && fetchLastErrString;
+#elif LIBFETCH_IS_LIBCURL
+	LOAD_FUNCTION(curl_easy_init);
+	LOAD_FUNCTION(curl_easy_setopt);
+	LOAD_FUNCTION(curl_easy_perform);
+	LOAD_FUNCTION(curl_easy_cleanup);
+	LOAD_FUNCTION(curl_easy_strerror);
+	LOAD_FUNCTION(curl_easy_getinfo);
+
+	ok = curl_easy_init && curl_easy_setopt && curl_easy_perform &&
+	    curl_easy_cleanup && curl_easy_strerror && curl_easy_getinfo;
+#endif
+	if (!ok) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "keylocation=%s back-end %s missing symbols."),
+		    is_http ? "http://" : "https://", LIBFETCH_SONAME);
+		ret = ENOSYS;
+		goto end;
+	}
+#endif
+
+#if LIBFETCH_IS_FETCH
+	key = fetchGetURL(uri, "");
+	if (key == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Couldn't GET %s: %s"),
+		    uri, fetchLastErrString);
+		ret = ENETDOWN;
+	}
+#elif LIBFETCH_IS_LIBCURL
+	CURL *curl = curl_easy_init();
+	if (curl == NULL) {
+		ret = ENOTSUP;
+		goto end;
+	}
+
+	int kfd = -1;
+#ifdef O_TMPFILE
+	kfd = open(getenv("TMPDIR") ?: "/tmp",
+	    O_RDWR | O_TMPFILE | O_EXCL | O_CLOEXEC, 0600);
+	if (kfd != -1)
+		goto kfdok;
+#endif
+
+	char *path;
+	if (asprintf(&path,
+	    "%s/libzfs-XXXXXXXX.https", getenv("TMPDIR") ?: "/tmp") == -1) {
+		ret = ENOMEM;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s"),
+		    strerror(ret));
+		goto end;
+	}
+
+	kfd = mkostemps(path, strlen(".https"), O_CLOEXEC);
+	if (kfd == -1) {
+		ret = errno;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Couldn't create temporary file %s: %s"),
+		    path, strerror(ret));
+		free(path);
+		goto end;
+	}
+	(void) unlink(path);
+	free(path);
+
+kfdok:
+	if ((key = fdopen(kfd, "r+")) == NULL) {
+		ret = errno;
+		(void) close(kfd);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Couldn't reopen temporary file: %s"), strerror(ret));
+		goto end;
+	}
+
+	char errbuf[CURL_ERROR_SIZE] = "";
+	char *cainfo = getenv("SSL_CA_CERT_FILE"); /* matches fetch(3) */
+	char *capath = getenv("SSL_CA_CERT_PATH"); /* matches fetch(3) */
+	char *clcert = getenv("SSL_CLIENT_CERT_FILE"); /* matches fetch(3) */
+	char *clkey  = getenv("SSL_CLIENT_KEY_FILE"); /* matches fetch(3) */
+	(void) curl_easy_setopt(curl, CURLOPT_URL, uri);
+	(void) curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+	(void) curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, 30000L);
+	(void) curl_easy_setopt(curl, CURLOPT_WRITEDATA, key);
+	(void) curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errbuf);
+	if (cainfo != NULL)
+		(void) curl_easy_setopt(curl, CURLOPT_CAINFO, cainfo);
+	if (capath != NULL)
+		(void) curl_easy_setopt(curl, CURLOPT_CAPATH, capath);
+	if (clcert != NULL)
+		(void) curl_easy_setopt(curl, CURLOPT_SSLCERT, clcert);
+	if (clkey != NULL)
+		(void) curl_easy_setopt(curl, CURLOPT_SSLKEY, clkey);
+
+	CURLcode res = curl_easy_perform(curl);
+
+	if (res != CURLE_OK) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Failed to connect to %s: %s"),
+		    uri, strlen(errbuf) ? errbuf : curl_easy_strerror(res));
+		ret = ENETDOWN;
+	} else {
+		long resp = 200;
+		(void) curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp);
+
+		if (resp < 200 || resp >= 300) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Couldn't GET %s: %ld"),
+			    uri, resp);
+			ret = ENOENT;
+		} else
+			rewind(key);
+	}
+
+	curl_easy_cleanup(curl);
+#else
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "No keylocation=%s back-end."), is_http ? "http://" : "https://");
+	ret = ENOSYS;
+#endif
+
+end:
+	if (ret == 0)
+		ret = get_key_material_raw(key, keyformat, buf, len_out);
+
+	if (key != NULL)
+		fclose(key);
+
+	return (ret);
 }
 
 /*
@@ -271,41 +679,58 @@
     zfs_keyformat_t keyformat, char *keylocation, const char *fsname,
     uint8_t **km_out, size_t *kmlen_out, boolean_t *can_retry_out)
 {
-	int ret, i;
+	int ret;
 	zfs_keylocation_t keyloc = ZFS_KEYLOCATION_NONE;
-	FILE *fd = NULL;
-	uint8_t *km = NULL, *km2 = NULL;
-	size_t kmlen, kmlen2;
+	uint8_t *km = NULL;
+	size_t kmlen = 0;
+	char *uri_scheme = NULL;
+	zfs_uri_handler_t *handler = NULL;
 	boolean_t can_retry = B_FALSE;
 
 	/* verify and parse the keylocation */
-	keyloc = zfs_prop_parse_keylocation(keylocation);
+	ret = zfs_prop_parse_keylocation(hdl, keylocation, &keyloc,
+	    &uri_scheme);
+	if (ret != 0)
+		goto error;
 
 	/* open the appropriate file descriptor */
 	switch (keyloc) {
 	case ZFS_KEYLOCATION_PROMPT:
-		fd = stdin;
-		if (isatty(fileno(fd))) {
-			can_retry = B_TRUE;
-
-			/* raw keys cannot be entered on the terminal */
-			if (keyformat == ZFS_KEYFORMAT_RAW) {
-				ret = EINVAL;
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "Cannot enter raw keys on the terminal"));
-				goto error;
-			}
+		if (isatty(fileno(stdin))) {
+			can_retry = keyformat != ZFS_KEYFORMAT_RAW;
+			ret = get_key_interactive(hdl, fsname, keyformat,
+			    do_verify, newkey, &km, &kmlen);
+		} else {
+			/* fetch the key material into the buffer */
+			ret = get_key_material_raw(stdin, keyformat, &km,
+			    &kmlen);
 		}
+
+		if (ret != 0)
+			goto error;
+
 		break;
 	case ZFS_KEYLOCATION_URI:
-		fd = fopen(&keylocation[7], "r");
-		if (!fd) {
-			ret = errno;
-			errno = 0;
+		ret = ENOTSUP;
+
+		for (handler = uri_handlers; handler->zuh_scheme != NULL;
+		    handler++) {
+			if (strcmp(handler->zuh_scheme, uri_scheme) != 0)
+				continue;
+
+			if ((ret = handler->zuh_handler(hdl, keylocation,
+			    fsname, keyformat, newkey, &km, &kmlen)) != 0)
+				goto error;
+
+			break;
+		}
+
+		if (ret == ENOTSUP) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Failed to open key material file"));
+			    "URI scheme is not supported"));
 			goto error;
 		}
+
 		break;
 	default:
 		ret = EINVAL;
@@ -314,126 +739,27 @@
 		goto error;
 	}
 
-	/* fetch the key material into the buffer */
-	ret = get_key_material_raw(fd, fsname, keyformat, B_FALSE, newkey,
-	    &km, &kmlen);
-	if (ret != 0)
+	if ((ret = validate_key(hdl, keyformat, (const char *)km, kmlen)) != 0)
 		goto error;
 
-	/* do basic validation of the key material */
-	switch (keyformat) {
-	case ZFS_KEYFORMAT_RAW:
-		/* verify the key length is correct */
-		if (kmlen < WRAPPING_KEY_LEN) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Raw key too short (expected %u)."),
-			    WRAPPING_KEY_LEN);
-			goto error;
-		}
-
-		if (kmlen > WRAPPING_KEY_LEN) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Raw key too long (expected %u)."),
-			    WRAPPING_KEY_LEN);
-			goto error;
-		}
-		break;
-	case ZFS_KEYFORMAT_HEX:
-		/* verify the key length is correct */
-		if (kmlen < WRAPPING_KEY_LEN * 2) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Hex key too short (expected %u)."),
-			    WRAPPING_KEY_LEN * 2);
-			goto error;
-		}
-
-		if (kmlen > WRAPPING_KEY_LEN * 2) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Hex key too long (expected %u)."),
-			    WRAPPING_KEY_LEN * 2);
-			goto error;
-		}
-
-		/* check for invalid hex digits */
-		for (i = 0; i < WRAPPING_KEY_LEN * 2; i++) {
-			if (!isxdigit((char)km[i])) {
-				ret = EINVAL;
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "Invalid hex character detected."));
-				goto error;
-			}
-		}
-		break;
-	case ZFS_KEYFORMAT_PASSPHRASE:
-		/* verify the length is within bounds */
-		if (kmlen > MAX_PASSPHRASE_LEN) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Passphrase too long (max %u)."),
-			    MAX_PASSPHRASE_LEN);
-			goto error;
-		}
-
-		if (kmlen < MIN_PASSPHRASE_LEN) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Passphrase too short (min %u)."),
-			    MIN_PASSPHRASE_LEN);
-			goto error;
-		}
-		break;
-	default:
-		/* can't happen, checked above */
-		break;
-	}
-
-	if (do_verify && isatty(fileno(fd))) {
-		ret = get_key_material_raw(fd, fsname, keyformat, B_TRUE,
-		    newkey, &km2, &kmlen2);
-		if (ret != 0)
-			goto error;
-
-		if (kmlen2 != kmlen ||
-		    (memcmp((char *)km, (char *)km2, kmlen) != 0)) {
-			ret = EINVAL;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Provided keys do not match."));
-			goto error;
-		}
-	}
-
-	if (fd != stdin)
-		fclose(fd);
-
-	if (km2 != NULL)
-		free(km2);
-
 	*km_out = km;
 	*kmlen_out = kmlen;
 	if (can_retry_out != NULL)
 		*can_retry_out = can_retry;
 
+	free(uri_scheme);
 	return (0);
 
 error:
-	if (km != NULL)
-		free(km);
-
-	if (km2 != NULL)
-		free(km2);
-
-	if (fd != NULL && fd != stdin)
-		fclose(fd);
+	free(km);
 
 	*km_out = NULL;
 	*kmlen_out = 0;
+
 	if (can_retry_out != NULL)
 		*can_retry_out = can_retry;
 
+	free(uri_scheme);
 	return (ret);
 }
 
@@ -813,7 +1139,7 @@
 		}
 
 		ret = populate_create_encryption_params_nvlists(hdl, NULL,
-		    B_FALSE, keyformat, keylocation, props, &wkeydata,
+		    B_TRUE, keyformat, keylocation, props, &wkeydata,
 		    &wkeylen);
 		if (ret != 0)
 			goto out;

diff --git a/zfs/lib/libzfs/libzfs_dataset.c b/zfs/lib/libzfs/libzfs_dataset.c
index 4a07c8d..09352af 100644
--- a/zfs/lib/libzfs/libzfs_dataset.c
+++ b/zfs/lib/libzfs/libzfs_dataset.c

@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2019 Joyent, Inc.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2012 DEY Storage Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
@@ -32,6 +32,7 @@
  * Copyright 2017-2018 RackTop Systems.
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
+ * Copyright (c) 2021 Matt Fiddaman
  */
 
 #include <ctype.h>
@@ -48,7 +49,6 @@
 #include <sys/mount.h>
 #include <pwd.h>
 #include <grp.h>
-#include <stddef.h>
 #include <ucred.h>
 #ifdef HAVE_IDMAP
 #include <idmap.h>
@@ -66,7 +66,6 @@
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
-#include "libzfs.h"
 #include "zfs_deleg.h"
 
 static int userquota_propname_decode(const char *propname, boolean_t zoned,
@@ -332,7 +331,7 @@
 
 	(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
 
-	while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) {
+	while (zfs_ioctl(hdl, ZFS_IOC_OBJSET_STATS, zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, zc) != 0) {
 				return (-1);
@@ -360,7 +359,7 @@
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
-	while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) {
+	while (zfs_ioctl(hdl, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				return (-1);
@@ -450,14 +449,19 @@
 	 * We've managed to open the dataset and gather statistics.  Determine
 	 * the high-level type.
 	 */
-	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
 		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
-	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+	} else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) {
 		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
-	else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER)
+	} else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) {
+		errno = EINVAL;
 		return (-1);
-	else
+	} else if (zhp->zfs_dmustats.dds_inconsistent) {
+		errno = EBUSY;
+		return (-1);
+	} else {
 		abort();
+	}
 
 	if (zhp->zfs_dmustats.dds_is_snapshot)
 		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
@@ -595,7 +599,6 @@
 	int err;
 	boolean_t rv;
 
-
 	(void) strlcpy(fsname, path, sizeof (fsname));
 	pound = strchr(fsname, '#');
 	if (pound == NULL)
@@ -795,7 +798,7 @@
 
 	rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special);
 
-	return (AVL_ISIGN(rv));
+	return (TREE_ISIGN(rv));
 }
 
 void
@@ -807,13 +810,13 @@
 	    sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
 }
 
-int
+static int
 libzfs_mnttab_update(libzfs_handle_t *hdl)
 {
 	struct mnttab entry;
 
 	/* Reopen MNTTAB to prevent reading stale data from open file */
-	if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
+	if (freopen(MNTTAB, "re", hdl->libzfs_mnttab) == NULL)
 		return (ENOENT);
 
 	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
@@ -884,7 +887,7 @@
 			libzfs_mnttab_fini(hdl);
 
 		/* Reopen MNTTAB to prevent reading stale data from open file */
-		if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
+		if (freopen(MNTTAB, "re", hdl->libzfs_mnttab) == NULL)
 			return (ENOENT);
 
 		srch.mnt_special = (char *)fsname;
@@ -932,10 +935,15 @@
 		 * Another thread may have already added this entry
 		 * via libzfs_mnttab_update. If so we should skip it.
 		 */
-		if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL)
+		if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL) {
+			free(mtn->mtn_mt.mnt_special);
+			free(mtn->mtn_mt.mnt_mountp);
+			free(mtn->mtn_mt.mnt_fstype);
+			free(mtn->mtn_mt.mnt_mntopts);
 			free(mtn);
-		else
+		} else {
 			avl_add(&hdl->libzfs_mnttab_cache, mtn);
+		}
 	}
 	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 }
@@ -1009,6 +1017,7 @@
 	nvlist_t *ret;
 	int chosen_normal = -1;
 	int chosen_utf = -1;
+	int set_maxbs = 0;
 
 	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
@@ -1226,12 +1235,17 @@
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
+			/* save the ZFS_PROP_RECORDSIZE during create op */
+			if (zpool_hdl == NULL && prop == ZFS_PROP_RECORDSIZE) {
+				set_maxbs = intval;
+			}
 			break;
 		}
 
 		case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
 		{
-			int maxbs = SPA_OLD_MAXBLOCKSIZE;
+			int maxbs =
+			    set_maxbs == 0 ? SPA_OLD_MAXBLOCKSIZE : set_maxbs;
 			char buf[64];
 
 			if (zpool_hdl != NULL) {
@@ -1258,9 +1272,9 @@
 			    intval > maxbs || !ISP2(intval))) {
 				zfs_nicebytes(maxbs, buf, sizeof (buf));
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "invalid '%s=%d' property: must be zero or "
-				    "a power of 2 from 512B to %s"), propname,
-				    intval, buf);
+				    "invalid '%s=%llu' property: must be zero "
+				    "or a power of 2 from 512B to %s"),
+				    propname, (unsigned long long)intval, buf);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
@@ -1360,10 +1374,9 @@
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
+			fallthrough;
 		}
 
-			/*FALLTHRU*/
-
 		case ZFS_PROP_SHARESMB:
 		case ZFS_PROP_SHARENFS:
 			/*
@@ -1428,49 +1441,14 @@
 				else
 					proto = PROTO_NFS;
 
-				/*
-				 * Must be an valid sharing protocol
-				 * option string so init the libshare
-				 * in order to enable the parser and
-				 * then parse the options. We use the
-				 * control API since we don't care about
-				 * the current configuration and don't
-				 * want the overhead of loading it
-				 * until we actually do something.
-				 */
-
-				if (zfs_init_libshare(hdl,
-				    SA_INIT_CONTROL_API) != SA_OK) {
-					/*
-					 * An error occurred so we can't do
-					 * anything
-					 */
-					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-					    "'%s' cannot be set: problem "
-					    "in share initialization"),
-					    propname);
-					(void) zfs_error(hdl, EZFS_BADPROP,
-					    errbuf);
-					goto error;
-				}
-
 				if (zfs_parse_options(strval, proto) != SA_OK) {
-					/*
-					 * There was an error in parsing so
-					 * deal with it by issuing an error
-					 * message and leaving after
-					 * uninitializing the libshare
-					 * interface.
-					 */
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set to invalid "
 					    "options"), propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
-					zfs_uninit_libshare(hdl);
 					goto error;
 				}
-				zfs_uninit_libshare(hdl);
 			}
 
 			break;
@@ -1616,7 +1594,7 @@
 	return (NULL);
 }
 
-int
+static int
 zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl)
 {
 	uint64_t old_volsize;
@@ -1718,114 +1696,6 @@
 	return (1);
 }
 
-void
-zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
-    char *errbuf)
-{
-	switch (err) {
-
-	case ENOSPC:
-		/*
-		 * For quotas and reservations, ENOSPC indicates
-		 * something different; setting a quota or reservation
-		 * doesn't use any disk space.
-		 */
-		switch (prop) {
-		case ZFS_PROP_QUOTA:
-		case ZFS_PROP_REFQUOTA:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "size is less than current used or "
-			    "reserved space"));
-			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
-			break;
-
-		case ZFS_PROP_RESERVATION:
-		case ZFS_PROP_REFRESERVATION:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "size is greater than available space"));
-			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
-			break;
-
-		default:
-			(void) zfs_standard_error(hdl, err, errbuf);
-			break;
-		}
-		break;
-
-	case EBUSY:
-		(void) zfs_standard_error(hdl, EBUSY, errbuf);
-		break;
-
-	case EROFS:
-		(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
-		break;
-
-	case E2BIG:
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "property value too long"));
-		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
-		break;
-
-	case ENOTSUP:
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "pool and or dataset must be upgraded to set this "
-		    "property or value"));
-		(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
-		break;
-
-	case ERANGE:
-		if (prop == ZFS_PROP_COMPRESSION ||
-		    prop == ZFS_PROP_DNODESIZE ||
-		    prop == ZFS_PROP_RECORDSIZE) {
-			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "property setting is not allowed on "
-			    "bootable datasets"));
-			(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
-		} else if (prop == ZFS_PROP_CHECKSUM ||
-		    prop == ZFS_PROP_DEDUP) {
-			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "property setting is not allowed on "
-			    "root pools"));
-			(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
-		} else {
-			(void) zfs_standard_error(hdl, err, errbuf);
-		}
-		break;
-
-	case EINVAL:
-		if (prop == ZPROP_INVAL) {
-			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
-		} else {
-			(void) zfs_standard_error(hdl, err, errbuf);
-		}
-		break;
-
-	case EACCES:
-		if (prop == ZFS_PROP_KEYLOCATION) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "keylocation may only be set on encryption roots"));
-			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
-		} else {
-			(void) zfs_standard_error(hdl, err, errbuf);
-		}
-		break;
-
-	case EOVERFLOW:
-		/*
-		 * This platform can't address a volume this big.
-		 */
-#ifdef _ILP32
-		if (prop == ZFS_PROP_VOLSIZE) {
-			(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
-			break;
-		}
-#endif
-		/* FALLTHROUGH */
-	default:
-		(void) zfs_standard_error(hdl, err, errbuf);
-	}
-}
-
 static boolean_t
 zfs_is_namespace_prop(zfs_prop_t prop)
 {
@@ -1892,7 +1762,8 @@
 	nvlist_t *nvl;
 	int nvl_len = 0;
 	int added_resv = 0;
-	zfs_prop_t prop = 0;
+	zfs_prop_t prop;
+	boolean_t nsprop = B_FALSE;
 	nvpair_t *elem;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
@@ -1939,6 +1810,7 @@
 	    elem = nvlist_next_nvpair(nvl, elem)) {
 
 		prop = zfs_name_to_prop(nvpair_name(elem));
+		nsprop |= zfs_is_namespace_prop(prop);
 
 		assert(cl_idx < nvl_len);
 		/*
@@ -2039,8 +1911,7 @@
 			 * if one of the options handled by the generic
 			 * Linux namespace layer has been modified.
 			 */
-			if (zfs_is_namespace_prop(prop) &&
-			    zfs_is_mounted(zhp, NULL))
+			if (nsprop && zfs_is_mounted(zhp, NULL))
 				ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
 		}
 	}
@@ -2093,6 +1964,7 @@
 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
 			return (zfs_standard_error(hdl, errno, errbuf));
 
+		(void) get_stats(zhp);
 		return (0);
 	}
 
@@ -2345,7 +2217,9 @@
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_SETUID:
+#ifndef __FreeBSD__
 	case ZFS_PROP_XATTR:
+#endif
 	case ZFS_PROP_NBMAND:
 		*val = getprop_uint64(zhp, prop, source);
 
@@ -2417,6 +2291,10 @@
 		*val = zhp->zfs_dmustats.dds_inconsistent;
 		break;
 
+	case ZFS_PROP_REDACTED:
+		*val = zhp->zfs_dmustats.dds_redacted;
+		break;
+
 	default:
 		switch (zfs_prop_get_type(prop)) {
 		case PROP_TYPE_NUMBER:
@@ -2520,7 +2398,7 @@
 	nvpair_t *pair;
 
 	value = zfs_get_clones_nvl(zhp);
-	if (value == NULL)
+	if (value == NULL || nvlist_empty(value))
 		return (-1);
 
 	propbuf[0] = '\0';
@@ -2541,7 +2419,7 @@
 	char buf[ZFS_MAX_DATASET_NAME_LEN];
 };
 
-int
+static int
 get_clones_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct get_clones_arg *gca = arg;
@@ -2629,6 +2507,37 @@
 	return (value);
 }
 
+static int
+get_rsnaps_string(zfs_handle_t *zhp, char *propbuf, size_t proplen)
+{
+	nvlist_t *value;
+	uint64_t *snaps;
+	uint_t nsnaps;
+
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &value) != 0)
+		return (-1);
+	if (nvlist_lookup_uint64_array(value, ZPROP_VALUE, &snaps,
+	    &nsnaps) != 0)
+		return (-1);
+	if (nsnaps == 0) {
+		/* There's no redaction snapshots; pass a special value back */
+		(void) snprintf(propbuf, proplen, "none");
+		return (0);
+	}
+	propbuf[0] = '\0';
+	for (int i = 0; i < nsnaps; i++) {
+		char buf[128];
+		if (propbuf[0] != '\0')
+			(void) strlcat(propbuf, ",", proplen);
+		(void) snprintf(buf, sizeof (buf), "%llu",
+		    (u_longlong_t)snaps[i]);
+		(void) strlcat(propbuf, buf, proplen);
+	}
+
+	return (0);
+}
+
 /*
  * Accepts a property and value and checks that the value
  * matches the one found by the channel program. If they are
@@ -2823,6 +2732,11 @@
 		zcp_check(zhp, prop, 0, str);
 		break;
 
+	case ZFS_PROP_REDACT_SNAPS:
+		if (get_rsnaps_string(zhp, propbuf, proplen) != 0)
+			return (-1);
+		break;
+
 	case ZFS_PROP_CLONES:
 		if (get_clones_string(zhp, propbuf, proplen) != 0)
 			return (-1);
@@ -2990,11 +2904,12 @@
 	case ZFS_PROP_GUID:
 	case ZFS_PROP_CREATETXG:
 	case ZFS_PROP_OBJSETID:
+	case ZFS_PROP_PBKDF2_ITERS:
 		/*
 		 * These properties are stored as numbers, but they are
-		 * identifiers.
+		 * identifiers or counters.
 		 * We don't want them to be pretty printed, because pretty
-		 * printing mangles the ID into a truncated and useless value.
+		 * printing truncates their values making them useless.
 		 */
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
@@ -3083,7 +2998,7 @@
 	return (val);
 }
 
-int
+static int
 zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val)
 {
 	char buf[64];
@@ -3291,7 +3206,7 @@
 	if (err)
 		return (err);
 
-	err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc);
+	err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_USERSPACE_ONE, &zc);
 	if (err)
 		return (err);
 
@@ -3342,6 +3257,9 @@
 	return (0);
 }
 
+/*
+ * propname must start with "written@" or "written#".
+ */
 int
 zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue)
@@ -3352,8 +3270,10 @@
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
-	snapname = strchr(propname, '@') + 1;
-	if (strchr(snapname, '@')) {
+	assert(zfs_prop_written(propname));
+	snapname = propname + strlen("written@");
+	if (strchr(snapname, '@') != NULL || strchr(snapname, '#') != NULL) {
+		/* full snapshot or bookmark name specified */
 		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 	} else {
 		/* snapname is the short name, append it to zhp's fsname */
@@ -3364,11 +3284,10 @@
 		cp = strchr(zc.zc_value, '@');
 		if (cp != NULL)
 			*cp = '\0';
-		(void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value));
-		(void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value));
+		(void) strlcat(zc.zc_value, snapname - 1, sizeof (zc.zc_value));
 	}
 
-	err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc);
+	err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SPACE_WRITTEN, &zc);
 	if (err)
 		return (err);
 
@@ -3505,7 +3424,7 @@
 		slash = parent + strlen(parent);
 	(void) strncpy(zc.zc_name, parent, slash - parent);
 	zc.zc_name[slash - parent] = '\0';
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
+	if (zfs_ioctl(hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
 	    errno == ENOENT) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "no such pool '%s'"), zc.zc_name);
@@ -3647,6 +3566,7 @@
 
 		zfs_close(h);
 	}
+	zfs_commit_all_shares();
 
 	return (0);
 
@@ -3856,8 +3776,8 @@
 			if (type == ZFS_TYPE_VOLUME)
 				return (zfs_error(hdl, EZFS_VOLTOOBIG,
 				    errbuf));
+			fallthrough;
 #endif
-			/* FALLTHROUGH */
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
@@ -4176,36 +4096,6 @@
 	return (rv);
 }
 
-int
-zfs_remap_indirects(libzfs_handle_t *hdl, const char *fs)
-{
-	int err;
-	char errbuf[1024];
-
-	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "cannot remap dataset '%s'"), fs);
-
-	err = lzc_remap(fs);
-
-	if (err != 0) {
-		switch (err) {
-		case ENOTSUP:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "pool must be upgraded"));
-			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
-			break;
-		case EINVAL:
-			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
-			break;
-		default:
-			(void) zfs_standard_error(hdl, err, errbuf);
-			break;
-		}
-	}
-
-	return (err);
-}
-
 /*
  * Creates snapshots.  The keys in the snaps nvlist are the snapshots to be
  * created.
@@ -4492,14 +4382,14 @@
  * Renames the given dataset.
  */
 int
-zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive,
-    boolean_t force_unmount)
+zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags)
 {
 	int ret = 0;
 	zfs_cmd_t zc = {"\0"};
 	char *delim;
 	prop_changelist_t *cl = NULL;
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
+	char property[ZFS_MAXPROPLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[1024];
 
@@ -4551,7 +4441,7 @@
 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	} else {
-		if (recursive) {
+		if (flags.recursive) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "recursive rename must be a snapshot"));
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
@@ -4592,8 +4482,19 @@
 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
-	if (recursive) {
-		zfs_handle_t *zhrp;
+	/*
+	 * Avoid unmounting file systems with mountpoint property set to
+	 * 'legacy' or 'none' even if -u option is not given.
+	 */
+	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
+	    !flags.recursive && !flags.nounmount &&
+	    zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, property,
+	    sizeof (property), NULL, NULL, 0, B_FALSE) == 0 &&
+	    (strcmp(property, "legacy") == 0 ||
+	    strcmp(property, "none") == 0)) {
+		flags.nounmount = B_TRUE;
+	}
+	if (flags.recursive) {
 		char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
 		if (parentname == NULL) {
 			ret = -1;
@@ -4601,7 +4502,8 @@
 		}
 		delim = strchr(parentname, '@');
 		*delim = '\0';
-		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET);
+		zfs_handle_t *zhrp = zfs_open(zhp->zfs_hdl, parentname,
+		    ZFS_TYPE_DATASET);
 		free(parentname);
 		if (zhrp == NULL) {
 			ret = -1;
@@ -4610,8 +4512,9 @@
 		zfs_close(zhrp);
 	} else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) {
 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME,
+		    flags.nounmount ? CL_GATHER_DONT_UNMOUNT :
 		    CL_GATHER_ITER_MOUNTED,
-		    force_unmount ? MS_FORCE : 0)) == NULL)
+		    flags.forceunmount ? MS_FORCE : 0)) == NULL)
 			return (-1);
 
 		if (changelist_haszonedchild(cl)) {
@@ -4635,7 +4538,8 @@
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
 
-	zc.zc_cookie = recursive;
+	zc.zc_cookie = !!flags.recursive;
+	zc.zc_cookie |= (!!flags.nounmount) << 1;
 
 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
 		/*
@@ -4645,7 +4549,7 @@
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot rename '%s'"), zc.zc_name);
 
-		if (recursive && errno == EEXIST) {
+		if (flags.recursive && errno == EEXIST) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "a child dataset already has a snapshot "
 			    "with the new name"));
@@ -4942,8 +4846,6 @@
 
 		zc.zc_nvlist_dst_size = sizeof (buf);
 		if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
-			char errbuf[1024];
-
 			if ((errno == ENOTSUP &&
 			    (type == ZFS_PROP_USEROBJUSED ||
 			    type == ZFS_PROP_GROUPOBJUSED ||
@@ -4955,10 +4857,9 @@
 			    type == ZFS_PROP_PROJECTQUOTA)))
 				break;
 
-			(void) snprintf(errbuf, sizeof (errbuf),
+			return (zfs_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN,
-			    "cannot get used/quota for %s"), zc.zc_name);
-			return (zfs_standard_error_fmt(hdl, errno, errbuf));
+			    "cannot get used/quota for %s"), zc.zc_name));
 		}
 		if (zc.zc_nvlist_dst_size == 0)
 			break;
@@ -5187,7 +5088,7 @@
 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		default:
-			(void) zfs_standard_error_fmt(hdl, errno, errbuf);
+			(void) zfs_standard_error(hdl, errno, errbuf);
 		}
 	}
 
@@ -5206,7 +5107,7 @@
 			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		default:
-			(void) zfs_standard_error_fmt(hdl,
+			(void) zfs_standard_error(hdl,
 			    fnvpair_value_int32(elem), errbuf);
 		}
 	}
@@ -5241,7 +5142,7 @@
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
+	if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"),
 		    zc.zc_name);
@@ -5263,17 +5164,16 @@
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
-			err = zfs_standard_error_fmt(hdl, errno, errbuf);
+			err = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	} else {
 		/* success */
 		int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
 		if (rc) {
-			(void) snprintf(errbuf, sizeof (errbuf), dgettext(
+			err = zfs_standard_error_fmt(hdl, rc, dgettext(
 			    TEXT_DOMAIN, "cannot get permissions on '%s'"),
 			    zc.zc_name);
-			err = zfs_standard_error_fmt(hdl, rc, errbuf);
 		}
 	}
 
@@ -5326,7 +5226,7 @@
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
-			err = zfs_standard_error_fmt(hdl, errno, errbuf);
+			err = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	}
@@ -5363,7 +5263,7 @@
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
-			err = zfs_standard_error_fmt(hdl, errno, errbuf);
+			err = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	}
@@ -5443,6 +5343,16 @@
  * 160k.  Again, 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as calculated in
  * the 128k block example above.
  *
+ * The situation is slightly different for dRAID since the minimum allocation
+ * size is the full group width.  The same 8K block above would be written as
+ * follows in a dRAID group:
+ *
+ * +-------+-------+-------+-------+-------+
+ * | disk1 | disk2 | disk3 | disk4 | disk5 |
+ * +-------+-------+-------+-------+-------+
+ * |  P0   |  D0   |  D1   |  S0   |  S1   |
+ * +-------+-------+-------+-------+-------+
+ *
  * Compression may lead to a variety of block sizes being written for the same
  * volume or file.  There is no clear way to reserve just the amount of space
  * that will be required, so the worst case (no compression) is assumed.
@@ -5473,6 +5383,23 @@
 }
 
 /*
+ * Derived from function of same name in module/zfs/vdev_draid.c.  Returns the
+ * amount of space (in bytes) that will be allocated for the specified block
+ * size.
+ */
+static uint64_t
+vdev_draid_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift,
+    uint64_t blksize)
+{
+	ASSERT3U(ndisks, >, nparity);
+	uint64_t ndata = ndisks - nparity;
+	uint64_t rows = ((blksize - 1) / (ndata << ashift)) + 1;
+	uint64_t asize = (rows * ndisks) << ashift;
+
+	return (asize);
+}
+
+/*
  * Determine how much space will be allocated if it lands on the most space-
  * inefficient top-level vdev.  Returns the size in bytes required to store one
  * copy of the volume data.  See theory comment above.
@@ -5481,7 +5408,7 @@
 volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize)
 {
 	nvlist_t *config, *tree, **vdevs;
-	uint_t nvdevs, v;
+	uint_t nvdevs;
 	uint64_t ret = 0;
 
 	config = zpool_get_config(zhp, NULL);
@@ -5491,33 +5418,61 @@
 		return (nblocks * blksize);
 	}
 
-	for (v = 0; v < nvdevs; v++) {
+	for (int v = 0; v < nvdevs; v++) {
 		char *type;
 		uint64_t nparity, ashift, asize, tsize;
-		nvlist_t **disks;
-		uint_t ndisks;
 		uint64_t volsize;
 
 		if (nvlist_lookup_string(vdevs[v], ZPOOL_CONFIG_TYPE,
-		    &type) != 0 || strcmp(type, VDEV_TYPE_RAIDZ) != 0 ||
-		    nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_NPARITY,
-		    &nparity) != 0 ||
-		    nvlist_lookup_uint64(vdevs[v], ZPOOL_CONFIG_ASHIFT,
-		    &ashift) != 0 ||
-		    nvlist_lookup_nvlist_array(vdevs[v], ZPOOL_CONFIG_CHILDREN,
-		    &disks, &ndisks) != 0) {
+		    &type) != 0)
 			continue;
+
+		if (strcmp(type, VDEV_TYPE_RAIDZ) != 0 &&
+		    strcmp(type, VDEV_TYPE_DRAID) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(vdevs[v],
+		    ZPOOL_CONFIG_NPARITY, &nparity) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(vdevs[v],
+		    ZPOOL_CONFIG_ASHIFT, &ashift) != 0)
+			continue;
+
+		if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+			nvlist_t **disks;
+			uint_t ndisks;
+
+			if (nvlist_lookup_nvlist_array(vdevs[v],
+			    ZPOOL_CONFIG_CHILDREN, &disks, &ndisks) != 0)
+				continue;
+
+			/* allocation size for the "typical" 128k block */
+			tsize = vdev_raidz_asize(ndisks, nparity, ashift,
+			    SPA_OLD_MAXBLOCKSIZE);
+
+			/* allocation size for the blksize block */
+			asize = vdev_raidz_asize(ndisks, nparity, ashift,
+			    blksize);
+		} else {
+			uint64_t ndata;
+
+			if (nvlist_lookup_uint64(vdevs[v],
+			    ZPOOL_CONFIG_DRAID_NDATA, &ndata) != 0)
+				continue;
+
+			/* allocation size for the "typical" 128k block */
+			tsize = vdev_draid_asize(ndata + nparity, nparity,
+			    ashift, SPA_OLD_MAXBLOCKSIZE);
+
+			/* allocation size for the blksize block */
+			asize = vdev_draid_asize(ndata + nparity, nparity,
+			    ashift, blksize);
 		}
 
-		/* allocation size for the "typical" 128k block */
-		tsize = vdev_raidz_asize(ndisks, nparity, ashift,
-		    SPA_OLD_MAXBLOCKSIZE);
-		/* allocation size for the blksize block */
-		asize = vdev_raidz_asize(ndisks, nparity, ashift, blksize);
-
 		/*
-		 * Scale this size down as a ratio of 128k / tsize.  See theory
-		 * statement above.
+		 * Scale this size down as a ratio of 128k / tsize.
+		 * See theory statement above.
 		 */
 		volsize = nblocks * asize * SPA_OLD_MAXBLOCKSIZE / tsize;
 		if (volsize > ret) {
@@ -5584,3 +5539,31 @@
 	volsize += numdb;
 	return (volsize);
 }
+
+/*
+ * Wait for the given activity and return the status of the wait (whether or not
+ * any waiting was done) in the 'waited' parameter. Non-existent fses are
+ * reported via the 'missing' parameter, rather than by printing an error
+ * message. This is convenient when this function is called in a loop over a
+ * long period of time (as it is, for example, by zfs's wait cmd). In that
+ * scenario, a fs being exported or destroyed should be considered a normal
+ * event, so we don't want to print an error when we find that the fs doesn't
+ * exist.
+ */
+int
+zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity,
+    boolean_t *missing, boolean_t *waited)
+{
+	int error = lzc_wait_fs(zhp->zfs_name, activity, waited);
+	*missing = (error == ENOENT);
+	if (*missing)
+		return (0);
+
+	if (error != 0) {
+		(void) zfs_standard_error_fmt(zhp->zfs_hdl, error,
+		    dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"),
+		    zhp->zfs_name);
+	}
+
+	return (error);
+}

diff --git a/zfs/lib/libzfs/libzfs_diff.c b/zfs/lib/libzfs/libzfs_diff.c
index 1b5c44b..cf62594 100644
--- a/zfs/lib/libzfs/libzfs_diff.c
+++ b/zfs/lib/libzfs/libzfs_diff.c

@@ -45,37 +45,21 @@
 #include <pthread.h>
 #include <sys/zfs_ioctl.h>
 #include <libzfs.h>
+#include <libzutil.h>
 #include "libzfs_impl.h"
 
 #define	ZDIFF_SNAPDIR		"/.zfs/snapshot/"
-#define	ZDIFF_SHARESDIR		"/.zfs/shares/"
 #define	ZDIFF_PREFIX		"zfs-diff-%d"
 
 #define	ZDIFF_ADDED	'+'
-#define	ZDIFF_MODIFIED	'M'
+#define	ZDIFF_MODIFIED	"M"
 #define	ZDIFF_REMOVED	'-'
-#define	ZDIFF_RENAMED	'R'
+#define	ZDIFF_RENAMED	"R"
 
-typedef struct differ_info {
-	zfs_handle_t *zhp;
-	char *fromsnap;
-	char *frommnt;
-	char *tosnap;
-	char *tomnt;
-	char *ds;
-	char *dsmnt;
-	char *tmpsnap;
-	char errbuf[1024];
-	boolean_t isclone;
-	boolean_t scripted;
-	boolean_t classify;
-	boolean_t timestamped;
-	uint64_t shares;
-	int zerr;
-	int cleanupfd;
-	int outputfd;
-	int datafd;
-} differ_info_t;
+#define	ZDIFF_ADDED_COLOR    ANSI_GREEN
+#define	ZDIFF_MODIFIED_COLOR ANSI_YELLOW
+#define	ZDIFF_REMOVED_COLOR  ANSI_RED
+#define	ZDIFF_RENAMED_COLOR  ANSI_BOLD_BLUE
 
 /*
  * Given a {dsname, object id}, get the object path
@@ -91,7 +75,7 @@
 	zc.zc_obj = obj;
 
 	errno = 0;
-	error = ioctl(di->zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc);
+	error = zfs_ioctl(di->zhp->zfs_hdl, ZFS_IOC_OBJ_TO_STATS, &zc);
 	di->zerr = errno;
 
 	/* we can get stats even if we failed to get a path */
@@ -143,119 +127,136 @@
 
 	while ((c = *string++) != '\0') {
 		if (c > ' ' && c != '\\' && c < '\177') {
-			(void) fprintf(fp, "%c", c);
+			(void) fputc(c, fp);
 		} else {
-			(void) fprintf(fp, "\\%04o", (uint8_t)c);
+			(void) fprintf(fp, "\\%04hho", (uint8_t)c);
 		}
 	}
 }
 
-static void
-print_what(FILE *fp, mode_t what)
+/*
+ * Takes the type of change (like `print_file`), outputs the appropriate color
+ */
+static const char *
+type_to_color(char type)
 {
-	char symbol;
+	if (type == '+')
+		return (ZDIFF_ADDED_COLOR);
+	else if (type == '-')
+		return (ZDIFF_REMOVED_COLOR);
+	else if (type == 'M')
+		return (ZDIFF_MODIFIED_COLOR);
+	else if (type == 'R')
+		return (ZDIFF_RENAMED_COLOR);
+	else
+		return (NULL);
+}
 
+
+static char
+get_what(mode_t what)
+{
 	switch (what & S_IFMT) {
 	case S_IFBLK:
-		symbol = 'B';
-		break;
+		return ('B');
 	case S_IFCHR:
-		symbol = 'C';
-		break;
+		return ('C');
 	case S_IFDIR:
-		symbol = '/';
-		break;
+		return ('/');
 #ifdef S_IFDOOR
 	case S_IFDOOR:
-		symbol = '>';
-		break;
+		return ('>');
 #endif
 	case S_IFIFO:
-		symbol = '|';
-		break;
+		return ('|');
 	case S_IFLNK:
-		symbol = '@';
-		break;
+		return ('@');
 #ifdef S_IFPORT
 	case S_IFPORT:
-		symbol = 'P';
-		break;
+		return ('P');
 #endif
 	case S_IFSOCK:
-		symbol = '=';
-		break;
+		return ('=');
 	case S_IFREG:
-		symbol = 'F';
-		break;
+		return ('F');
 	default:
-		symbol = '?';
-		break;
+		return ('?');
 	}
-	(void) fprintf(fp, "%c", symbol);
 }
 
 static void
 print_cmn(FILE *fp, differ_info_t *di, const char *file)
 {
-	stream_bytes(fp, di->dsmnt);
-	stream_bytes(fp, file);
+	if (!di->no_mangle) {
+		stream_bytes(fp, di->dsmnt);
+		stream_bytes(fp, file);
+	} else {
+		(void) fputs(di->dsmnt, fp);
+		(void) fputs(file, fp);
+	}
 }
 
 static void
 print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new,
     zfs_stat_t *isb)
 {
+	if (isatty(fileno(fp)))
+		color_start(ZDIFF_RENAMED_COLOR);
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
-	(void) fprintf(fp, "%c\t", ZDIFF_RENAMED);
-	if (di->classify) {
-		print_what(fp, isb->zs_mode);
-		(void) fprintf(fp, "\t");
-	}
+	(void) fputs(ZDIFF_RENAMED "\t", fp);
+	if (di->classify)
+		(void) fprintf(fp, "%c\t", get_what(isb->zs_mode));
 	print_cmn(fp, di, old);
-	if (di->scripted)
-		(void) fprintf(fp, "\t");
-	else
-		(void) fprintf(fp, " -> ");
+	(void) fputs(di->scripted ? "\t" : " -> ", fp);
 	print_cmn(fp, di, new);
-	(void) fprintf(fp, "\n");
+	(void) fputc('\n', fp);
+
+	if (isatty(fileno(fp)))
+		color_end();
 }
 
 static void
 print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file,
     zfs_stat_t *isb)
 {
+	if (isatty(fileno(fp)))
+		color_start(ZDIFF_MODIFIED_COLOR);
+
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
-	(void) fprintf(fp, "%c\t", ZDIFF_MODIFIED);
-	if (di->classify) {
-		print_what(fp, isb->zs_mode);
-		(void) fprintf(fp, "\t");
-	}
+	(void) fputs(ZDIFF_MODIFIED "\t", fp);
+	if (di->classify)
+		(void) fprintf(fp, "%c\t", get_what(isb->zs_mode));
 	print_cmn(fp, di, file);
-	(void) fprintf(fp, "\t(%+d)", delta);
-	(void) fprintf(fp, "\n");
+	(void) fprintf(fp, "\t(%+d)\n", delta);
+	if (isatty(fileno(fp)))
+		color_end();
 }
 
 static void
 print_file(FILE *fp, differ_info_t *di, char type, const char *file,
     zfs_stat_t *isb)
 {
+	if (isatty(fileno(fp)))
+		color_start(type_to_color(type));
+
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fprintf(fp, "%c\t", type);
-	if (di->classify) {
-		print_what(fp, isb->zs_mode);
-		(void) fprintf(fp, "\t");
-	}
+	if (di->classify)
+		(void) fprintf(fp, "%c\t", get_what(isb->zs_mode));
 	print_cmn(fp, di, file);
-	(void) fprintf(fp, "\n");
+	(void) fputc('\n', fp);
+
+	if (isatty(fileno(fp)))
+		color_end();
 }
 
 static int
@@ -264,6 +265,7 @@
 	struct zfs_stat fsb, tsb;
 	mode_t fmode, tmode;
 	char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN];
+	boolean_t already_logged = B_FALSE;
 	int fobjerr, tobjerr;
 	int change;
 
@@ -275,22 +277,36 @@
 	 * we get ENOENT, then the object just didn't exist in that
 	 * snapshot.  If we get ENOTSUP, then we tried to get
 	 * info on a non-ZPL object, which we don't care about anyway.
+	 * For any other error we print a warning which includes the
+	 * errno and continue.
 	 */
+
 	fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname,
 	    MAXPATHLEN, &fsb);
-	if (fobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP)
-		return (-1);
+	if (fobjerr && di->zerr != ENOTSUP && di->zerr != ENOENT) {
+		zfs_error_aux(di->zhp->zfs_hdl, "%s", strerror(di->zerr));
+		zfs_error(di->zhp->zfs_hdl, di->zerr, di->errbuf);
+		/*
+		 * Let's not print an error for the same object more than
+		 * once if it happens in both snapshots
+		 */
+		already_logged = B_TRUE;
+	}
 
 	tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname,
 	    MAXPATHLEN, &tsb);
-	if (tobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP)
-		return (-1);
 
+	if (tobjerr && di->zerr != ENOTSUP && di->zerr != ENOENT) {
+		if (!already_logged) {
+			zfs_error_aux(di->zhp->zfs_hdl,
+			    "%s", strerror(di->zerr));
+			zfs_error(di->zhp->zfs_hdl, di->zerr, di->errbuf);
+		}
+	}
 	/*
 	 * Unallocated object sharing the same meta dnode block
 	 */
 	if (fobjerr && tobjerr) {
-		ASSERT(di->zerr == ENOENT || di->zerr == ENOTSUP);
 		di->zerr = 0;
 		return (0);
 	}
@@ -333,7 +349,7 @@
 			print_link_change(fp, di, change,
 			    change > 0 ? fobjname : tobjname, &tsb);
 		} else if (strcmp(fobjname, tobjname) == 0) {
-			print_file(fp, di, ZDIFF_MODIFIED, fobjname, &tsb);
+			print_file(fp, di, *ZDIFF_MODIFIED, fobjname, &tsb);
 		} else {
 			print_rename(fp, di, fobjname, tobjname, &tsb);
 		}
@@ -365,12 +381,11 @@
 {
 	struct zfs_stat sb;
 
-	if (get_stats_for_obj(di, di->fromsnap, object, namebuf,
-	    maxlen, &sb) != 0) {
-		return (-1);
-	}
+	(void) get_stats_for_obj(di, di->fromsnap, object, namebuf,
+	    maxlen, &sb);
+
 	/* Don't print if in the delete queue on from side */
-	if (di->zerr == ESTALE) {
+	if (di->zerr == ESTALE || di->zerr == ENOENT) {
 		di->zerr = 0;
 		return (0);
 	}
@@ -394,7 +409,7 @@
 	while (zc.zc_obj < dr->ddr_last) {
 		int err;
 
-		err = ioctl(lhdl->libzfs_fd, ZFS_IOC_NEXT_OBJ, &zc);
+		err = zfs_ioctl(lhdl, ZFS_IOC_NEXT_OBJ, &zc);
 		if (err == 0) {
 			if (zc.zc_obj == di->shares) {
 				zc.zc_obj++;
@@ -405,8 +420,6 @@
 			}
 			err = describe_free(fp, di, zc.zc_obj, fobjname,
 			    MAXPATHLEN);
-			if (err)
-				break;
 		} else if (errno == ESRCH) {
 			break;
 		} else {
@@ -488,25 +501,6 @@
 }
 
 static int
-find_shares_object(differ_info_t *di)
-{
-	char fullpath[MAXPATHLEN];
-	struct stat64 sb = { 0 };
-
-	(void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN);
-	(void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN);
-
-	if (stat64(fullpath, &sb) != 0) {
-		(void) snprintf(di->errbuf, sizeof (di->errbuf),
-		    dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath);
-		return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf));
-	}
-
-	di->shares = (uint64_t)sb.st_ino;
-	return (0);
-}
-
-static int
 make_temp_snapshot(differ_info_t *di)
 {
 	libzfs_handle_t *hdl = di->zhp->zfs_hdl;
@@ -517,7 +511,7 @@
 	(void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name));
 	zc.zc_cleanup_fd = di->cleanupfd;
 
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) {
+	if (zfs_ioctl(hdl, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) {
 		int err = errno;
 		if (err == EPERM) {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
@@ -737,7 +731,7 @@
 {
 	di->zhp = zhp;
 
-	di->cleanupfd = open(ZFS_DEV, O_RDWR);
+	di->cleanupfd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
 	VERIFY(di->cleanupfd >= 0);
 
 	if (get_snapshot_names(di, fromsnap, tosnap) != 0)
@@ -771,8 +765,8 @@
 		return (-1);
 	}
 
-	if (pipe(pipefd)) {
-		zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+	if (pipe2(pipefd, O_CLOEXEC)) {
+		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
 		teardown_differ_info(&di);
 		return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf));
 	}
@@ -780,12 +774,13 @@
 	di.scripted = (flags & ZFS_DIFF_PARSEABLE);
 	di.classify = (flags & ZFS_DIFF_CLASSIFY);
 	di.timestamped = (flags & ZFS_DIFF_TIMESTAMP);
+	di.no_mangle = (flags & ZFS_DIFF_NO_MANGLE);
 
 	di.outputfd = outfd;
 	di.datafd = pipefd[0];
 
 	if (pthread_create(&tid, NULL, differ, &di)) {
-		zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
 		(void) close(pipefd[0]);
 		(void) close(pipefd[1]);
 		teardown_differ_info(&di);
@@ -798,7 +793,7 @@
 	(void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1);
 	zc.zc_cookie = pipefd[1];
 
-	iocerr = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DIFF, &zc);
+	iocerr = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DIFF, &zc);
 	if (iocerr != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "Unable to obtain diffs"));
@@ -811,14 +806,14 @@
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "\n   Not an earlier snapshot from the same fs"));
 		} else if (errno != EPIPE || di.zerr == 0) {
-			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
 		}
 		(void) close(pipefd[1]);
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
 		teardown_differ_info(&di);
 		if (di.zerr != 0 && di.zerr != EPIPE) {
-			zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr));
+			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(di.zerr));
 			return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
 		} else {
 			return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf));
@@ -829,7 +824,7 @@
 	(void) pthread_join(tid, NULL);
 
 	if (di.zerr != 0) {
-		zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr));
+		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(di.zerr));
 		return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
 	}
 	teardown_differ_info(&di);

diff --git a/zfs/lib/libzfs/libzfs_import.c b/zfs/lib/libzfs/libzfs_import.c
index 3d7a0bf..ddaa5de 100644
--- a/zfs/lib/libzfs/libzfs_import.c
+++ b/zfs/lib/libzfs/libzfs_import.c

@@ -26,7 +26,6 @@
  * Copyright (c) 2016, Intel Corporation.
  */
 
-#include <devid.h>
 #include <errno.h>
 #include <libintl.h>
 #include <libgen.h>
@@ -39,6 +38,7 @@
 #include <libzfs.h>
 #include <libzfs_impl.h>
 #include <libzutil.h>
+#include <sys/arc_impl.h>
 
 /*
  * Returns true if the named pool matches the given GUID.
@@ -77,14 +77,14 @@
 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
 		return (NULL);
 
-	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 4);
+	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 32);
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (NULL);
 	}
 
-	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
+	while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT,
 	    &zc)) != 0 && errno == ENOMEM) {
 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 			zcmd_free_nvlists(&zc);
@@ -112,7 +112,6 @@
 	return (refresh_config((libzfs_handle_t *)handle, tryconfig));
 }
 
-
 static int
 pool_active_libzfs(void *handle, const char *name, uint64_t guid,
     boolean_t *isactive)
@@ -147,8 +146,10 @@
 	struct stat64 statbuf;
 	int l;
 	vdev_label_t *label;
+	l2arc_dev_hdr_phys_t *l2dhdr = NULL;
 	uint64_t size;
 	int labels_cleared = 0;
+	boolean_t clear_l2arc_header = B_FALSE, header_cleared = B_FALSE;
 
 	if (fstat64_blk(fd, &statbuf) == -1)
 		return (0);
@@ -158,8 +159,13 @@
 	if ((label = calloc(1, sizeof (vdev_label_t))) == NULL)
 		return (-1);
 
+	if ((l2dhdr = calloc(1, sizeof (l2arc_dev_hdr_phys_t))) == NULL) {
+		free(label);
+		return (-1);
+	}
+
 	for (l = 0; l < VDEV_LABELS; l++) {
-		uint64_t state, guid;
+		uint64_t state, guid, l2cache;
 		nvlist_t *config;
 
 		if (pread64(fd, label, sizeof (vdev_label_t),
@@ -186,6 +192,15 @@
 			continue;
 		}
 
+		/* If the device is a cache device clear the header. */
+		if (!clear_l2arc_header) {
+			if (nvlist_lookup_uint64(config,
+			    ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&
+			    l2cache == POOL_STATE_L2CACHE) {
+				clear_l2arc_header = B_TRUE;
+			}
+		}
+
 		nvlist_free(config);
 
 		/*
@@ -203,11 +218,21 @@
 		}
 	}
 
+	/* Clear the L2ARC header. */
+	if (clear_l2arc_header &&
+	    pwrite64(fd, l2dhdr, sizeof (l2arc_dev_hdr_phys_t),
+	    VDEV_LABEL_START_SIZE) == sizeof (l2arc_dev_hdr_phys_t))
+			header_cleared = B_TRUE;
+
 	free(label);
+	free(l2dhdr);
 
 	if (labels_cleared == 0)
 		return (-1);
 
+	if (clear_l2arc_header && !header_cleared)
+		return (-1);
+
 	return (0);
 }
 

diff --git a/zfs/lib/libzfs/libzfs_iter.c b/zfs/lib/libzfs/libzfs_iter.c
index 5e9a1ec..7806e21 100644
--- a/zfs/lib/libzfs/libzfs_iter.c
+++ b/zfs/lib/libzfs/libzfs_iter.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2019 Datto Inc.
  */
@@ -38,7 +38,7 @@
 
 #include "libzfs_impl.h"
 
-int
+static int
 zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 {
 	nvlist_t *nvl = zfs_get_clones_nvl(zhp);
@@ -69,7 +69,7 @@
 	orig_cookie = zc->zc_cookie;
 top:
 	(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
-	rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc);
+	rc = zfs_ioctl(zhp->zfs_hdl, arg, zc);
 
 	if (rc == -1) {
 		switch (errno) {
@@ -212,10 +212,12 @@
 
 	/* Setup the requested properties nvlist. */
 	props = fnvlist_alloc();
-	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_GUID));
-	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATETXG));
-	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATION));
-	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_IVSET_GUID));
+	for (zfs_prop_t p = 0; p < ZFS_NUM_PROPS; p++) {
+		if (zfs_prop_valid_for_type(p, ZFS_TYPE_BOOKMARK, B_FALSE)) {
+			fnvlist_add_boolean(props, zfs_prop_to_name(p));
+		}
+	}
+	fnvlist_add_boolean(props, "redact_complete");
 
 	if ((err = lzc_get_bookmarks(zhp->zfs_name, props, &bmarks)) != 0)
 		goto out;
@@ -300,7 +302,7 @@
 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
 
-	return (AVL_CMP(lcreate, rcreate));
+	return (TREE_CMP(lcreate, rcreate));
 }
 
 int
@@ -563,7 +565,7 @@
 	FILE *mnttab;
 	int err = 0;
 
-	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
+	if ((mnttab = fopen(MNTTAB, "re")) == NULL)
 		return (ENOENT);
 
 	while (err == 0 && getmntent(mnttab, &entry) == 0) {

diff --git a/zfs/lib/libzfs/libzfs_mount.c b/zfs/lib/libzfs/libzfs_mount.c
index d62801c..82a67ee 100644
--- a/zfs/lib/libzfs/libzfs_mount.c
+++ b/zfs/lib/libzfs/libzfs_mount.c

@@ -22,7 +22,7 @@
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2021 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2018 Datto Inc.
@@ -37,6 +37,7 @@
  *
  *	zfs_is_mounted()
  *	zfs_mount()
+ *	zfs_mount_at()
  *	zfs_unmount()
  *	zfs_unmountall()
  *
@@ -94,7 +95,6 @@
 static int mount_tp_nthr = 512;	/* tpool threads for multi-threaded mounting */
 
 static void zfs_mount_task(void *);
-static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
     zfs_share_proto_t);
 
@@ -102,13 +102,6 @@
  * The share protocols table must be in the same order as the zfs_share_proto_t
  * enum in libzfs_impl.h
  */
-typedef struct {
-	zfs_prop_t p_prop;
-	char *p_name;
-	int p_share_err;
-	int p_unshare_err;
-} proto_table_t;
-
 proto_table_t proto_table[PROTO_END] = {
 	{ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
 	{ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
@@ -129,60 +122,7 @@
 	PROTO_END
 };
 
-/*
- * Search the sharetab for the given mountpoint and protocol, returning
- * a zfs_share_type_t value.
- */
-static zfs_share_type_t
-is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
-{
-	char buf[MAXPATHLEN], *tab;
-	char *ptr;
 
-	if (hdl->libzfs_sharetab == NULL)
-		return (SHARED_NOT_SHARED);
-
-	/* Reopen ZFS_SHARETAB to prevent reading stale data from open file */
-	if (freopen(ZFS_SHARETAB, "r", hdl->libzfs_sharetab) == NULL)
-		return (SHARED_NOT_SHARED);
-
-	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
-
-	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
-
-		/* the mountpoint is the first entry on each line */
-		if ((tab = strchr(buf, '\t')) == NULL)
-			continue;
-
-		*tab = '\0';
-		if (strcmp(buf, mountpoint) == 0) {
-			/*
-			 * the protocol field is the third field
-			 * skip over second field
-			 */
-			ptr = ++tab;
-			if ((tab = strchr(ptr, '\t')) == NULL)
-				continue;
-			ptr = ++tab;
-			if ((tab = strchr(ptr, '\t')) == NULL)
-				continue;
-			*tab = '\0';
-			if (strcmp(ptr,
-			    proto_table[proto].p_name) == 0) {
-				switch (proto) {
-				case PROTO_NFS:
-					return (SHARED_NFS);
-				case PROTO_SMB:
-					return (SHARED_SMB);
-				default:
-					return (0);
-				}
-			}
-		}
-	}
-
-	return (SHARED_NOT_SHARED);
-}
 
 static boolean_t
 dir_is_empty_stat(const char *dirname)
@@ -301,12 +241,29 @@
 }
 
 /*
+ * Checks any higher order concerns about whether the given dataset is
+ * mountable, false otherwise.  zfs_is_mountable_internal specifically assumes
+ * that the caller has verified the sanity of mounting the dataset at
+ * mountpoint to the extent the caller wants.
+ */
+static boolean_t
+zfs_is_mountable_internal(zfs_handle_t *zhp, const char *mountpoint)
+{
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
+	    getzoneid() == GLOBAL_ZONEID)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
  * Returns true if the given dataset is mountable, false otherwise.  Returns the
  * mountpoint in 'buf'.
  */
-static boolean_t
+boolean_t
 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
-    zprop_source_t *source)
+    zprop_source_t *source, int flags)
 {
 	char sourceloc[MAXNAMELEN];
 	zprop_source_t sourcetype;
@@ -325,8 +282,10 @@
 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
 		return (B_FALSE);
 
-	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
-	    getzoneid() == GLOBAL_ZONEID)
+	if (!zfs_is_mountable_internal(zhp, buf))
+		return (B_FALSE);
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))
 		return (B_FALSE);
 
 	if (source)
@@ -353,68 +312,6 @@
  */
 
 static int
-do_mount(const char *src, const char *mntpt, char *opts)
-{
-	char *argv[9] = {
-	    "/bin/mount",
-	    "--no-canonicalize",
-	    "-t", MNTTYPE_ZFS,
-	    "-o", opts,
-	    (char *)src,
-	    (char *)mntpt,
-	    (char *)NULL };
-	int rc;
-
-	/* Return only the most critical mount error */
-	rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE);
-	if (rc) {
-		if (rc & MOUNT_FILEIO)
-			return (EIO);
-		if (rc & MOUNT_USER)
-			return (EINTR);
-		if (rc & MOUNT_SOFTWARE)
-			return (EPIPE);
-		if (rc & MOUNT_BUSY)
-			return (EBUSY);
-		if (rc & MOUNT_SYSERR)
-			return (EAGAIN);
-		if (rc & MOUNT_USAGE)
-			return (EINVAL);
-
-		return (ENXIO); /* Generic error */
-	}
-
-	return (0);
-}
-
-static int
-do_unmount(const char *mntpt, int flags)
-{
-	char force_opt[] = "-f";
-	char lazy_opt[] = "-l";
-	char *argv[7] = {
-	    "/bin/umount",
-	    "-t", MNTTYPE_ZFS,
-	    NULL, NULL, NULL, NULL };
-	int rc, count = 3;
-
-	if (flags & MS_FORCE) {
-		argv[count] = force_opt;
-		count++;
-	}
-
-	if (flags & MS_DETACH) {
-		argv[count] = lazy_opt;
-		count++;
-	}
-
-	argv[count] = (char *)mntpt;
-	rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE);
-
-	return (rc ? EINVAL : 0);
-}
-
-static int
 zfs_add_option(zfs_handle_t *zhp, char *options, int len,
     zfs_prop_t prop, char *on, char *off)
 {
@@ -466,16 +363,31 @@
 	return (error);
 }
 
+int
+zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,
+	    flags))
+		return (0);
+
+	return (zfs_mount_at(zhp, options, flags, mountpoint));
+}
+
 /*
  * Mount the given filesystem.
  */
 int
-zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
+zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags,
+    const char *mountpoint)
 {
 	struct stat buf;
-	char mountpoint[ZFS_MAXPROPLEN];
 	char mntopts[MNT_LINE_MAX];
 	char overlay[ZFS_MAXPROPLEN];
+	char prop_encroot[MAXNAMELEN];
+	boolean_t is_encroot;
+	zfs_handle_t *encroot_hp = zhp;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	uint64_t keystatus;
 	int remount = 0, rc;
@@ -489,15 +401,16 @@
 	if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)
 		remount = 1;
 
+	/* Potentially duplicates some checks if invoked by zfs_mount(). */
+	if (!zfs_is_mountable_internal(zhp, mountpoint))
+		return (0);
+
 	/*
 	 * If the pool is imported read-only then all mounts must be read-only
 	 */
 	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
 		(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));
 
-	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
-		return (0);
-
 	/*
 	 * Append default mount options which apply to the mount point.
 	 * This is done because under Linux (unlike Solaris) multiple mount
@@ -533,7 +446,27 @@
 		 */
 		if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
 			if (flags & MS_CRYPT) {
-				rc = zfs_crypto_load_key(zhp, B_FALSE, NULL);
+				rc = zfs_crypto_get_encryption_root(zhp,
+				    &is_encroot, prop_encroot);
+				if (rc) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "Failed to get encryption root for "
+					    "'%s'."), zfs_get_name(zhp));
+					return (rc);
+				}
+
+				if (!is_encroot) {
+					encroot_hp = zfs_open(hdl, prop_encroot,
+					    ZFS_TYPE_DATASET);
+					if (encroot_hp == NULL)
+						return (hdl->libzfs_error);
+				}
+
+				rc = zfs_crypto_load_key(encroot_hp,
+				    B_FALSE, NULL);
+
+				if (!is_encroot)
+					zfs_close(encroot_hp);
 				if (rc)
 					return (rc);
 			} else {
@@ -556,7 +489,8 @@
 	if (lstat(mountpoint, &buf) != 0) {
 		if (mkdirp(mountpoint, 0755) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "failed to create mountpoint"));
+			    "failed to create mountpoint: %s"),
+			    strerror(errno));
 			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 			    mountpoint));
@@ -564,8 +498,8 @@
 	}
 
 	/*
-	 * Overlay mounts are disabled by default but may be enabled
-	 * via the 'overlay' property or the 'zfs mount -O' option.
+	 * Overlay mounts are enabled by default but may be disabled
+	 * via the 'overlay' property. The -O flag remains for compatibility.
 	 */
 	if (!(flags & MS_OVERLAY)) {
 		if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
@@ -579,7 +513,7 @@
 	/*
 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
 	 * mount.  We don't perform this check if 'remount' is
-	 * specified or if overlay option(-O) is given
+	 * specified or if overlay option (-O) is given
 	 */
 	if ((flags & MS_OVERLAY) == 0 && !remount &&
 	    !dir_is_empty(mountpoint)) {
@@ -590,7 +524,7 @@
 	}
 
 	/* perform the mount */
-	rc = do_mount(zfs_get_name(zhp), mountpoint, mntopts);
+	rc = do_mount(zhp, mountpoint, mntopts, flags);
 	if (rc) {
 		/*
 		 * Generic errors are nasty, but there are just way too many
@@ -604,19 +538,17 @@
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Insufficient privileges"));
 		} else if (rc == ENOTSUP) {
-			char buf[256];
 			int spa_version;
 
 			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
-			(void) snprintf(buf, sizeof (buf),
-			    dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Can't mount a version %llu "
 			    "file system on a version %d pool. Pool must be"
 			    " upgraded to mount this file system."),
 			    (u_longlong_t)zfs_prop_get_int(zhp,
 			    ZFS_PROP_VERSION), spa_version);
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
 		} else {
-			zfs_error_aux(hdl, strerror(rc));
+			zfs_error_aux(hdl, "%s", strerror(rc));
 		}
 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
@@ -642,7 +574,28 @@
 
 	error = do_unmount(mountpoint, flags);
 	if (error != 0) {
-		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
+		int libzfs_err;
+
+		switch (error) {
+		case EBUSY:
+			libzfs_err = EZFS_BUSY;
+			break;
+		case EIO:
+			libzfs_err = EZFS_IO;
+			break;
+		case ENOENT:
+			libzfs_err = EZFS_NOENT;
+			break;
+		case ENOMEM:
+			libzfs_err = EZFS_NOMEM;
+			break;
+		case EPERM:
+			libzfs_err = EZFS_PERM;
+			break;
+		default:
+			libzfs_err = EZFS_UMOUNTFAILED;
+		}
+		return (zfs_error_fmt(hdl, libzfs_err,
 		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
 		    mountpoint));
 	}
@@ -659,6 +612,7 @@
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	struct mnttab entry;
 	char *mntpt = NULL;
+	boolean_t encroot, unmounted = B_FALSE;
 
 	/* check to see if we need to unmount the filesystem */
 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
@@ -681,14 +635,41 @@
 			free(mntpt);
 			return (-1);
 		}
+		zfs_commit_all_shares();
 
 		if (unmount_one(hdl, mntpt, flags) != 0) {
 			free(mntpt);
 			(void) zfs_shareall(zhp);
+			zfs_commit_all_shares();
 			return (-1);
 		}
+
 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
 		free(mntpt);
+		unmounted = B_TRUE;
+	}
+
+	/*
+	 * If the MS_CRYPT flag is provided we must ensure we attempt to
+	 * unload the dataset's key regardless of whether we did any work
+	 * to unmount it. We only do this for encryption roots.
+	 */
+	if ((flags & MS_CRYPT) != 0 &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
+		zfs_refresh_properties(zhp);
+
+		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&
+		    unmounted) {
+			(void) zfs_mount(zhp, NULL, 0);
+			return (-1);
+		}
+
+		if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
+		    ZFS_KEYSTATUS_AVAILABLE &&
+		    zfs_crypto_unload_key(zhp) != 0) {
+			(void) zfs_mount(zhp, NULL, 0);
+			return (-1);
+		}
 	}
 
 	return (0);
@@ -706,7 +687,7 @@
 	int ret;
 
 	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
-	    CL_GATHER_ITER_MOUNTED, 0);
+	    CL_GATHER_ITER_MOUNTED, flags);
 	if (clp == NULL)
 		return (-1);
 
@@ -732,6 +713,94 @@
 	return (rc ? B_TRUE : B_FALSE);
 }
 
+/*
+ * Unshare a filesystem by mountpoint.
+ */
+int
+unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
+    zfs_share_proto_t proto)
+{
+	int err;
+
+	err = sa_disable_share(mountpoint, proto_table[proto].p_name);
+	if (err != SA_OK) {
+		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+		    name, sa_errorstr(err)));
+	}
+	return (0);
+}
+
+/*
+ * Query libshare for the given mountpoint and protocol, returning
+ * a zfs_share_type_t value.
+ */
+zfs_share_type_t
+is_shared(const char *mountpoint, zfs_share_proto_t proto)
+{
+	if (sa_is_shared(mountpoint, proto_table[proto].p_name)) {
+		switch (proto) {
+		case PROTO_NFS:
+			return (SHARED_NFS);
+		case PROTO_SMB:
+			return (SHARED_SMB);
+		default:
+			return (SHARED_NOT_SHARED);
+		}
+	}
+	return (SHARED_NOT_SHARED);
+}
+
+/*
+ * Share the given filesystem according to the options in the specified
+ * protocol specific properties (sharenfs, sharesmb).  We rely
+ * on "libshare" to do the dirty work for us.
+ */
+int
+zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	char shareopts[ZFS_MAXPROPLEN];
+	char sourcestr[ZFS_MAXPROPLEN];
+	zfs_share_proto_t *curr_proto;
+	zprop_source_t sourcetype;
+	int err = 0;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
+		return (0);
+
+	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
+		/*
+		 * Return success if there are no share options.
+		 */
+		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
+		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
+		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
+		    strcmp(shareopts, "off") == 0)
+			continue;
+
+		/*
+		 * If the 'zoned' property is set, then zfs_is_mountable()
+		 * will have already bailed out if we are in the global zone.
+		 * But local zones cannot be NFS servers, so we ignore it for
+		 * local zones as well.
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
+			continue;
+
+		err = sa_enable_share(zfs_get_name(zhp), mountpoint, shareopts,
+		    proto_table[*curr_proto].p_name);
+		if (err != SA_OK) {
+			return (zfs_error_fmt(zhp->zfs_hdl,
+			    proto_table[*curr_proto].p_share_err,
+			    dgettext(TEXT_DOMAIN, "cannot share '%s: %s'"),
+			    zfs_get_name(zhp), sa_errorstr(err)));
+		}
+
+	}
+	return (0);
+}
+
 int
 zfs_share(zfs_handle_t *zhp)
 {
@@ -758,7 +827,7 @@
 	if (!zfs_is_mounted(zhp, &mountpoint))
 		return (SHARED_NOT_SHARED);
 
-	if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto))
+	if ((rc = is_shared(mountpoint, proto))
 	    != SHARED_NOT_SHARED) {
 		if (where != NULL)
 			*where = mountpoint;
@@ -786,59 +855,6 @@
 }
 
 /*
- * zfs_init_libshare(zhandle, service)
- *
- * Initialize the libshare API if it hasn't already been initialized.
- * In all cases it returns 0 if it succeeded and an error if not. The
- * service value is which part(s) of the API to initialize and is a
- * direct map to the libshare sa_init(service) interface.
- */
-int
-zfs_init_libshare(libzfs_handle_t *zhandle, int service)
-{
-	int ret = SA_OK;
-
-	if (ret == SA_OK && zhandle->libzfs_shareflags & ZFSSHARE_MISS) {
-		/*
-		 * We had a cache miss. Most likely it is a new ZFS
-		 * dataset that was just created. We want to make sure
-		 * so check timestamps to see if a different process
-		 * has updated any of the configuration. If there was
-		 * some non-ZFS change, we need to re-initialize the
-		 * internal cache.
-		 */
-		zhandle->libzfs_shareflags &= ~ZFSSHARE_MISS;
-		if (sa_needs_refresh(zhandle->libzfs_sharehdl)) {
-			zfs_uninit_libshare(zhandle);
-			zhandle->libzfs_sharehdl = sa_init(service);
-		}
-	}
-
-	if (ret == SA_OK && zhandle && zhandle->libzfs_sharehdl == NULL)
-		zhandle->libzfs_sharehdl = sa_init(service);
-
-	if (ret == SA_OK && zhandle->libzfs_sharehdl == NULL)
-		ret = SA_NO_MEMORY;
-
-	return (ret);
-}
-
-/*
- * zfs_uninit_libshare(zhandle)
- *
- * Uninitialize the libshare API if it hasn't already been
- * uninitialized. It is OK to call multiple times.
- */
-void
-zfs_uninit_libshare(libzfs_handle_t *zhandle)
-{
-	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
-		sa_fini(zhandle->libzfs_sharehdl);
-		zhandle->libzfs_sharehdl = NULL;
-	}
-}
-
-/*
  * zfs_parse_options(options, proto)
  *
  * Call the legacy parse interface to get the protocol specific
@@ -847,105 +863,46 @@
 int
 zfs_parse_options(char *options, zfs_share_proto_t proto)
 {
-	return (sa_parse_legacy_options(NULL, options,
-	    proto_table[proto].p_name));
+	return (sa_validate_shareopts(options, proto_table[proto].p_name));
 }
 
-/*
- * Share the given filesystem according to the options in the specified
- * protocol specific properties (sharenfs, sharesmb).  We rely
- * on "libshare" to do the dirty work for us.
- */
-static int
-zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+void
+zfs_commit_proto(zfs_share_proto_t *proto)
 {
-	char mountpoint[ZFS_MAXPROPLEN];
-	char shareopts[ZFS_MAXPROPLEN];
-	char sourcestr[ZFS_MAXPROPLEN];
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	sa_share_t share;
 	zfs_share_proto_t *curr_proto;
-	zprop_source_t sourcetype;
-	int ret;
-
-	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
-		return (0);
-
 	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
-		/*
-		 * Return success if there are no share options.
-		 */
-		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
-		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
-		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
-		    strcmp(shareopts, "off") == 0)
-			continue;
-
-		ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API);
-		if (ret != SA_OK) {
-			(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
-			    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
-			    zfs_get_name(zhp), sa_errorstr(ret));
-			return (-1);
-		}
-
-		/*
-		 * If the 'zoned' property is set, then zfs_is_mountable()
-		 * will have already bailed out if we are in the global zone.
-		 * But local zones cannot be NFS servers, so we ignore it for
-		 * local zones as well.
-		 */
-		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
-			continue;
-
-		share = sa_find_share(hdl->libzfs_sharehdl, mountpoint);
-		if (share == NULL) {
-			/*
-			 * This may be a new file system that was just
-			 * created so isn't in the internal cache
-			 * (second time through). Rather than
-			 * reloading the entire configuration, we can
-			 * assume ZFS has done the checking and it is
-			 * safe to add this to the internal
-			 * configuration.
-			 */
-			if (sa_zfs_process_share(hdl->libzfs_sharehdl,
-			    NULL, NULL, mountpoint,
-			    proto_table[*curr_proto].p_name, sourcetype,
-			    shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
-				(void) zfs_error_fmt(hdl,
-				    proto_table[*curr_proto].p_share_err,
-				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
-				    zfs_get_name(zhp));
-				return (-1);
-			}
-			hdl->libzfs_shareflags |= ZFSSHARE_MISS;
-			share = sa_find_share(hdl->libzfs_sharehdl,
-			    mountpoint);
-		}
-		if (share != NULL) {
-			int err;
-			err = sa_enable_share(share,
-			    proto_table[*curr_proto].p_name);
-			if (err != SA_OK) {
-				(void) zfs_error_fmt(hdl,
-				    proto_table[*curr_proto].p_share_err,
-				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
-				    zfs_get_name(zhp));
-				return (-1);
-			}
-		} else {
-			(void) zfs_error_fmt(hdl,
-			    proto_table[*curr_proto].p_share_err,
-			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
-			    zfs_get_name(zhp));
-			return (-1);
-		}
-
+		sa_commit_shares(proto_table[*curr_proto].p_name);
 	}
-	return (0);
 }
 
+void
+zfs_commit_nfs_shares(void)
+{
+	zfs_commit_proto(nfs_only);
+}
+
+void
+zfs_commit_smb_shares(void)
+{
+	zfs_commit_proto(smb_only);
+}
+
+void
+zfs_commit_all_shares(void)
+{
+	zfs_commit_proto(share_all_proto);
+}
+
+void
+zfs_commit_shares(const char *proto)
+{
+	if (proto == NULL)
+		zfs_commit_proto(share_all_proto);
+	else if (strcmp(proto, "nfs") == 0)
+		zfs_commit_proto(nfs_only);
+	else if (strcmp(proto, "smb") == 0)
+		zfs_commit_proto(smb_only);
+}
 
 int
 zfs_share_nfs(zfs_handle_t *zhp)
@@ -966,50 +923,6 @@
 }
 
 /*
- * Unshare a filesystem by mountpoint.
- */
-static int
-unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
-    zfs_share_proto_t proto)
-{
-	sa_share_t share;
-	int err;
-	char *mntpt;
-	/*
-	 * Mountpoint could get trashed if libshare calls getmntany
-	 * which it does during API initialization, so strdup the
-	 * value.
-	 */
-	mntpt = zfs_strdup(hdl, mountpoint);
-
-	/* make sure libshare initialized */
-	if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
-		free(mntpt);	/* don't need the copy anymore */
-		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
-		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
-		    name, sa_errorstr(err)));
-	}
-
-	share = sa_find_share(hdl->libzfs_sharehdl, mntpt);
-	free(mntpt);	/* don't need the copy anymore */
-
-	if (share != NULL) {
-		err = sa_disable_share(share, proto_table[proto].p_name);
-		if (err != SA_OK) {
-			return (zfs_error_fmt(hdl,
-			    proto_table[proto].p_unshare_err,
-			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
-			    name, sa_errorstr(err)));
-		}
-	} else {
-		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
-		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
-		    name));
-	}
-	return (0);
-}
-
-/*
  * Unshare the given filesystem.
  */
 int
@@ -1034,12 +947,13 @@
 		for (curr_proto = proto; *curr_proto != PROTO_END;
 		    curr_proto++) {
 
-			if (is_shared(hdl, mntpt, *curr_proto) &&
-			    unshare_one(hdl, zhp->zfs_name,
-			    mntpt, *curr_proto) != 0) {
-				if (mntpt != NULL)
-					free(mntpt);
-				return (-1);
+			if (is_shared(mntpt, *curr_proto)) {
+				if (unshare_one(hdl, zhp->zfs_name,
+				    mntpt, *curr_proto) != 0) {
+					if (mntpt != NULL)
+						free(mntpt);
+					return (-1);
+				}
 			}
 		}
 	}
@@ -1064,7 +978,7 @@
 /*
  * Same as zfs_unmountall(), but for NFS and SMB unshares.
  */
-int
+static int
 zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
 {
 	prop_changelist_t *clp;
@@ -1136,7 +1050,7 @@
 	zprop_source_t source;
 
 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
-	    &source))
+	    &source, 0))
 		return;
 
 	if (source == ZPROP_SRC_DEFAULT ||
@@ -1434,7 +1348,7 @@
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
 
 	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
-		return;
+		goto out;
 
 	/*
 	 * We dispatch tasks to mount filesystems with mountpoints underneath
@@ -1455,6 +1369,8 @@
 		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
 		    mp->mnt_func, mp->mnt_data, mp->mnt_tp);
 	}
+
+out:
 	free(mp);
 }
 
@@ -1579,6 +1495,8 @@
 	    zfs_share_one, &ms, B_FALSE);
 	if (ms.ms_mntstatus != 0)
 		ret = ms.ms_mntstatus;
+	else
+		zfs_commit_all_shares();
 
 out:
 	for (int i = 0; i < cb.cb_used; i++)
@@ -1622,7 +1540,7 @@
 	namelen = strlen(zhp->zpool_name);
 
 	/* Reopen MNTTAB to prevent reading stale data from open file */
-	if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
+	if (freopen(MNTTAB, "re", hdl->libzfs_mnttab) == NULL)
 		return (ENOENT);
 
 	used = alloc = 0;
@@ -1704,12 +1622,13 @@
 		zfs_share_proto_t *curr_proto;
 		for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
 		    curr_proto++) {
-			if (is_shared(hdl, mountpoints[i], *curr_proto) &&
+			if (is_shared(mountpoints[i], *curr_proto) &&
 			    unshare_one(hdl, mountpoints[i],
 			    mountpoints[i], *curr_proto) != 0)
 				goto out;
 		}
 	}
+	zfs_commit_all_shares();
 
 	/*
 	 * Now unmount everything, removing the underlying directories as

diff --git a/zfs/lib/libzfs/libzfs_pool.c b/zfs/lib/libzfs/libzfs_pool.c
index d45b87c..fc6c6e8 100644
--- a/zfs/lib/libzfs/libzfs_pool.c
+++ b/zfs/lib/libzfs/libzfs_pool.c

@@ -22,16 +22,16 @@
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright (c) 2018 Datto Inc.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 #include <errno.h>
-#include <devid.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -42,11 +42,13 @@
 #include <sys/stat.h>
 #include <sys/efi_partition.h>
 #include <sys/systeminfo.h>
-#include <sys/vtoc.h>
 #include <sys/zfs_ioctl.h>
+#include <sys/zfs_sysfs.h>
 #include <sys/vdev_disk.h>
+#include <sys/types.h>
 #include <dlfcn.h>
 #include <libzutil.h>
+#include <fcntl.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
@@ -54,12 +56,11 @@
 #include "zfs_comutil.h"
 #include "zfeature_common.h"
 
-static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
 static boolean_t zpool_vdev_is_interior(const char *name);
 
 typedef struct prop_flags {
-	int create:1;	/* Validate property on creation */
-	int import:1;	/* Validate property on import */
+	unsigned int create:1;	/* Validate property on creation */
+	unsigned int import:1;	/* Validate property on import */
 } prop_flags_t;
 
 /*
@@ -79,7 +80,7 @@
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
 		return (-1);
 
-	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
+	while (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				zcmd_free_nvlists(&zc);
@@ -101,7 +102,7 @@
 	return (0);
 }
 
-static int
+int
 zpool_props_refresh(zpool_handle_t *zhp)
 {
 	nvlist_t *old_props;
@@ -305,6 +306,7 @@
 		case ZPOOL_PROP_ALTROOT:
 		case ZPOOL_PROP_CACHEFILE:
 		case ZPOOL_PROP_COMMENT:
+		case ZPOOL_PROP_COMPATIBILITY:
 			if (zhp->zpool_props != NULL ||
 			    zpool_get_all_props(zhp) == 0) {
 				(void) strlcpy(buf,
@@ -312,7 +314,7 @@
 				    len);
 				break;
 			}
-			/* FALLTHROUGH */
+			fallthrough;
 		default:
 			(void) strlcpy(buf, "-", len);
 			break;
@@ -403,7 +405,7 @@
 				(void) snprintf(buf, len, "-");
 				break;
 			}
-			/* FALLTHROUGH */
+			fallthrough;
 		default:
 			(void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
 		}
@@ -432,7 +434,7 @@
  * Assuming bootfs is a valid dataset name.
  */
 static boolean_t
-bootfs_name_valid(const char *pool, char *bootfs)
+bootfs_name_valid(const char *pool, const char *bootfs)
 {
 	int len = strlen(pool);
 	if (bootfs[0] == '\0')
@@ -448,17 +450,6 @@
 	return (B_FALSE);
 }
 
-boolean_t
-zpool_is_bootable(zpool_handle_t *zhp)
-{
-	char bootfs[ZFS_MAX_DATASET_NAME_LEN];
-
-	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
-	    sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
-	    sizeof (bootfs)) != 0);
-}
-
-
 /*
  * Given an nvlist of zpool properties to be set, validate that they are
  * correct, and parse any numeric properties (index, boolean, etc) if they are
@@ -476,6 +467,7 @@
 	char *slash, *check;
 	struct stat64 statbuf;
 	zpool_handle_t *zhp;
+	char report[1024];
 
 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
@@ -495,7 +487,8 @@
 			if (err != 0) {
 				ASSERT3U(err, ==, ENOENT);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "invalid feature '%s'"), fname);
+				    "feature '%s' unsupported by kernel"),
+				    fname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
@@ -570,8 +563,8 @@
 			if (intval < version ||
 			    !SPA_VERSION_IS_SUPPORTED(intval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "property '%s' number %d is invalid."),
-				    propname, intval);
+				    "property '%s' number %llu is invalid."),
+				    propname, (unsigned long long)intval);
 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 				goto error;
 			}
@@ -581,10 +574,11 @@
 			if (intval != 0 &&
 			    (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "property '%s' number %d is invalid, only "
-				    "values between %" PRId32 " and "
-				    "%" PRId32 " are allowed."),
-				    propname, intval, ASHIFT_MIN, ASHIFT_MAX);
+				    "property '%s' number %llu is invalid, "
+				    "only values between %" PRId32 " and %"
+				    PRId32 " are allowed."),
+				    propname, (unsigned long long)intval,
+				    ASHIFT_MIN, ASHIFT_MAX);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
@@ -684,6 +678,20 @@
 			*slash = '/';
 			break;
 
+		case ZPOOL_PROP_COMPATIBILITY:
+			switch (zpool_load_compat(strval, NULL, report, 1024)) {
+			case ZPOOL_COMPATIBILITY_OK:
+			case ZPOOL_COMPATIBILITY_WARNTOKEN:
+				break;
+			case ZPOOL_COMPATIBILITY_BADFILE:
+			case ZPOOL_COMPATIBILITY_BADTOKEN:
+			case ZPOOL_COMPATIBILITY_NOFILES:
+				zfs_error_aux(hdl, "%s", report);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
 		case ZPOOL_PROP_COMMENT:
 			for (check = strval; *check != '\0'; check++) {
 				if (!isprint(*check)) {
@@ -722,15 +730,8 @@
 			}
 			break;
 		case ZPOOL_PROP_DEDUPDITTO:
-			if (intval < ZIO_DEDUPDITTO_MIN && intval != 0) {
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "property '%s' value %d is invalid; only "
-				    "values of 0 or >= %" PRId32 " are allowed "
-				    "for this property."),
-				    propname, intval, ZIO_DEDUPDITTO_MIN);
-				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
-				goto error;
-			}
+			printf("Note: property '%s' no longer has "
+			    "any effect\n", propname);
 			break;
 
 		default:
@@ -804,7 +805,8 @@
 }
 
 int
-zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
+zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
+    boolean_t literal)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	zprop_list_t *entry;
@@ -883,13 +885,12 @@
 	}
 
 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
-
-		if (entry->pl_fixed)
+		if (entry->pl_fixed && !literal)
 			continue;
 
 		if (entry->pl_prop != ZPROP_INVAL &&
 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
-		    NULL, B_FALSE) == 0) {
+		    NULL, literal) == 0) {
 			if (strlen(buf) > entry->pl_width)
 				entry->pl_width = strlen(buf);
 		}
@@ -981,6 +982,7 @@
 	if (ret == 0 && !isopen &&
 	    (strncmp(pool, "mirror", 6) == 0 ||
 	    strncmp(pool, "raidz", 5) == 0 ||
+	    strncmp(pool, "draid", 5) == 0 ||
 	    strncmp(pool, "spare", 5) == 0 ||
 	    strcmp(pool, "log") == 0)) {
 		if (hdl != NULL)
@@ -1208,6 +1210,61 @@
 }
 
 /*
+ * Check if vdev list contains a dRAID vdev
+ */
+static boolean_t
+zpool_has_draid_vdev(nvlist_t *nvroot)
+{
+	nvlist_t **child;
+	uint_t children;
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (uint_t c = 0; c < children; c++) {
+			char *type;
+
+			if (nvlist_lookup_string(child[c],
+			    ZPOOL_CONFIG_TYPE, &type) == 0 &&
+			    strcmp(type, VDEV_TYPE_DRAID) == 0) {
+				return (B_TRUE);
+			}
+		}
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Output a dRAID top-level vdev name in to the provided buffer.
+ */
+static char *
+zpool_draid_name(char *name, int len, uint64_t data, uint64_t parity,
+    uint64_t spares, uint64_t children)
+{
+	snprintf(name, len, "%s%llu:%llud:%lluc:%llus",
+	    VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data,
+	    (u_longlong_t)children, (u_longlong_t)spares);
+
+	return (name);
+}
+
+/*
+ * Return B_TRUE if the provided name is a dRAID spare name.
+ */
+boolean_t
+zpool_is_draid_spare(const char *name)
+{
+	uint64_t spare_id, parity, vdev_id;
+
+	if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu",
+	    (u_longlong_t *)&parity, (u_longlong_t *)&vdev_id,
+	    (u_longlong_t *)&spare_id) == 3) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
  * Create the named pool, using the provided vdev list.  It is assumed
  * that the consumer has already validated the contents of the nvlist, so we
  * don't have to worry about error semantics.
@@ -1361,10 +1418,16 @@
 			    "one or more devices is out of space"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
-		case ENOTBLK:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "cache device must be a disk or disk slice"));
-			return (zfs_error(hdl, EZFS_BADDEV, msg));
+		case EINVAL:
+			if (zpool_has_draid_vdev(nvroot) &&
+			    zfeature_lookup_name("draid", NULL) != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "dRAID vdevs are unsupported by the "
+				    "kernel"));
+				return (zfs_error(hdl, EZFS_BADDEV, msg));
+			} else {
+				return (zpool_standard_error(hdl, errno, msg));
+			}
 
 		default:
 			return (zpool_standard_error(hdl, errno, msg));
@@ -1521,9 +1584,19 @@
 			break;
 
 		case EINVAL:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "invalid config; a pool with removing/removed "
-			    "vdevs does not support adding raidz vdevs"));
+
+			if (zpool_has_draid_vdev(nvroot) &&
+			    zfeature_lookup_name("draid", NULL) != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "dRAID vdevs are unsupported by the "
+				    "kernel"));
+			} else {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "invalid config; a pool with removing/"
+				    "removed vdevs does not support adding "
+				    "raidz or dRAID vdevs"));
+			}
+
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
@@ -1553,12 +1626,6 @@
 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
 			break;
 
-		case ENOTBLK:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "cache device must be a disk or disk slice"));
-			(void) zfs_error(hdl, EZFS_BADDEV, msg);
-			break;
-
 		default:
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
@@ -1582,10 +1649,6 @@
     const char *log_str)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
-
-	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
-	    "cannot export '%s'"), zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = force;
@@ -1600,11 +1663,13 @@
 			    "'%s' has an active shared spare which could be"
 			    " used by other pools once '%s' is exported."),
 			    zhp->zpool_name, zhp->zpool_name);
-			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
-			    msg));
+			return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
+			    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
+			    zhp->zpool_name));
 		default:
 			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
-			    msg));
+			    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
+			    zhp->zpool_name));
 		}
 	}
 
@@ -2014,7 +2079,7 @@
 					    "the zgenhostid(8) command.\n"));
 				}
 
-				(void) zfs_error_aux(hdl, aux);
+				(void) zfs_error_aux(hdl, "%s", aux);
 			}
 			(void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
 			break;
@@ -2159,14 +2224,13 @@
 }
 
 /*
- * Begin, suspend, or cancel the initialization (initializing of all free
- * blocks) for the given vdevs in the given pool.
+ * Begin, suspend, cancel, or uninit (clear) the initialization (initializing
+ * of all free blocks) for the given vdevs in the given pool.
  */
-int
-zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
-    nvlist_t *vds)
+static int
+zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+    nvlist_t *vds, boolean_t wait)
 {
-	char msg[1024];
 	int err;
 
 	nvlist_t *vdev_guids = fnvlist_alloc();
@@ -2178,26 +2242,51 @@
 	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
 	    guids_to_paths, &vd_errlist);
 
-	if (err == 0) {
-		err = lzc_initialize(zhp->zpool_name, cmd_type,
-		    vdev_guids, &errlist);
-		if (err == 0) {
-			fnvlist_free(vdev_guids);
-			fnvlist_free(guids_to_paths);
-			return (0);
-		}
-
-		if (errlist != NULL) {
-			vd_errlist = fnvlist_lookup_nvlist(errlist,
-			    ZPOOL_INITIALIZE_VDEVS);
-		}
-
-		(void) snprintf(msg, sizeof (msg),
-		    dgettext(TEXT_DOMAIN, "operation failed"));
-	} else {
+	if (err != 0) {
 		verify(vd_errlist != NULL);
+		goto list_errors;
 	}
 
+	err = lzc_initialize(zhp->zpool_name, cmd_type,
+	    vdev_guids, &errlist);
+
+	if (err != 0) {
+		if (errlist != NULL && nvlist_lookup_nvlist(errlist,
+		    ZPOOL_INITIALIZE_VDEVS, &vd_errlist) == 0) {
+			goto list_errors;
+		}
+
+		if (err == EINVAL && cmd_type == POOL_INITIALIZE_UNINIT) {
+			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
+			    "uninitialize is not supported by kernel"));
+		}
+
+		(void) zpool_standard_error(zhp->zpool_hdl, err,
+		    dgettext(TEXT_DOMAIN, "operation failed"));
+		goto out;
+	}
+
+	if (wait) {
+		for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL;
+		    elem = nvlist_next_nvpair(vdev_guids, elem)) {
+
+			uint64_t guid = fnvpair_value_uint64(elem);
+
+			err = lzc_wait_tag(zhp->zpool_name,
+			    ZPOOL_WAIT_INITIALIZE, guid, NULL);
+			if (err != 0) {
+				(void) zpool_standard_error_fmt(zhp->zpool_hdl,
+				    err, dgettext(TEXT_DOMAIN, "error "
+				    "waiting for '%s' to initialize"),
+				    nvpair_name(elem));
+
+				goto out;
+			}
+		}
+	}
+	goto out;
+
+list_errors:
 	for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
 	    elem = nvlist_next_nvpair(vd_errlist, elem)) {
 		int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
@@ -2211,15 +2300,28 @@
 		    "cannot initialize '%s'", path);
 	}
 
+out:
 	fnvlist_free(vdev_guids);
 	fnvlist_free(guids_to_paths);
 
-	if (vd_errlist != NULL) {
+	if (vd_errlist != NULL)
 		fnvlist_free(vd_errlist);
-		return (-1);
-	}
 
-	return (zpool_standard_error(zhp->zpool_hdl, err, msg));
+	return (err == 0 ? 0 : -1);
+}
+
+int
+zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+    nvlist_t *vds)
+{
+	return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));
+}
+
+int
+zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+    nvlist_t *vds)
+{
+	return (zpool_initialize_impl(zhp, cmd_type, vds, B_TRUE));
 }
 
 static int
@@ -2241,47 +2343,50 @@
 	return (err);
 }
 
-/*
- * Begin, suspend, or cancel the TRIM (discarding of all free blocks) for
- * the given vdevs in the given pool.
- */
-int
-zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
-    trimflags_t *trim_flags)
+static int
+zpool_trim_wait(zpool_handle_t *zhp, nvlist_t *vdev_guids)
 {
-	char msg[1024];
 	int err;
-
-	nvlist_t *vdev_guids = fnvlist_alloc();
-	nvlist_t *guids_to_paths = fnvlist_alloc();
-	nvlist_t *vd_errlist = NULL;
-	nvlist_t *errlist;
 	nvpair_t *elem;
 
-	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
-	    guids_to_paths, &vd_errlist);
-	if (err == 0) {
-		err = lzc_trim(zhp->zpool_name, cmd_type, trim_flags->rate,
-		    trim_flags->secure, vdev_guids, &errlist);
-		if (err == 0) {
-			fnvlist_free(vdev_guids);
-			fnvlist_free(guids_to_paths);
-			return (0);
-		}
+	for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL;
+	    elem = nvlist_next_nvpair(vdev_guids, elem)) {
 
-		if (errlist != NULL) {
-			vd_errlist = fnvlist_lookup_nvlist(errlist,
-			    ZPOOL_TRIM_VDEVS);
-		}
+		uint64_t guid = fnvpair_value_uint64(elem);
 
-		(void) snprintf(msg, sizeof (msg),
-		    dgettext(TEXT_DOMAIN, "operation failed"));
-	} else {
-		verify(vd_errlist != NULL);
+		err = lzc_wait_tag(zhp->zpool_name,
+		    ZPOOL_WAIT_TRIM, guid, NULL);
+		if (err != 0) {
+			(void) zpool_standard_error_fmt(zhp->zpool_hdl,
+			    err, dgettext(TEXT_DOMAIN, "error "
+			    "waiting to trim '%s'"), nvpair_name(elem));
+
+			return (err);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check errlist and report any errors, omitting ones which should be
+ * suppressed. Returns B_TRUE if any errors were reported.
+ */
+static boolean_t
+check_trim_errs(zpool_handle_t *zhp, trimflags_t *trim_flags,
+    nvlist_t *guids_to_paths, nvlist_t *vds, nvlist_t *errlist)
+{
+	nvpair_t *elem;
+	boolean_t reported_errs = B_FALSE;
+	int num_vds = 0;
+	int num_suppressed_errs = 0;
+
+	for (elem = nvlist_next_nvpair(vds, NULL);
+	    elem != NULL; elem = nvlist_next_nvpair(vds, elem)) {
+		num_vds++;
 	}
 
-	for (elem = nvlist_next_nvpair(vd_errlist, NULL);
-	    elem != NULL; elem = nvlist_next_nvpair(vd_errlist, elem)) {
+	for (elem = nvlist_next_nvpair(errlist, NULL);
+	    elem != NULL; elem = nvlist_next_nvpair(errlist, elem)) {
 		int64_t vd_error = xlate_trim_err(fnvpair_value_int64(elem));
 		char *path;
 
@@ -2293,9 +2398,11 @@
 		if (vd_error == EZFS_TRIM_NOTSUP &&
 		    trim_flags->fullpool &&
 		    !trim_flags->secure) {
+			num_suppressed_errs++;
 			continue;
 		}
 
+		reported_errs = B_TRUE;
 		if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
 		    &path) != 0)
 			path = nvpair_name(elem);
@@ -2304,15 +2411,72 @@
 		    "cannot trim '%s'", path);
 	}
 
-	fnvlist_free(vdev_guids);
-	fnvlist_free(guids_to_paths);
-
-	if (vd_errlist != NULL) {
-		fnvlist_free(vd_errlist);
-		return (-1);
+	if (num_suppressed_errs == num_vds) {
+		(void) zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
+		    "no devices in pool support trim operations"));
+		(void) (zfs_error(zhp->zpool_hdl, EZFS_TRIM_NOTSUP,
+		    dgettext(TEXT_DOMAIN, "cannot trim")));
+		reported_errs = B_TRUE;
 	}
 
-	return (zpool_standard_error(zhp->zpool_hdl, err, msg));
+	return (reported_errs);
+}
+
+/*
+ * Begin, suspend, or cancel the TRIM (discarding of all free blocks) for
+ * the given vdevs in the given pool.
+ */
+int
+zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
+    trimflags_t *trim_flags)
+{
+	int err;
+	int retval = 0;
+
+	nvlist_t *vdev_guids = fnvlist_alloc();
+	nvlist_t *guids_to_paths = fnvlist_alloc();
+	nvlist_t *errlist = NULL;
+
+	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
+	    guids_to_paths, &errlist);
+	if (err != 0) {
+		check_trim_errs(zhp, trim_flags, guids_to_paths, vds, errlist);
+		retval = -1;
+		goto out;
+	}
+
+	err = lzc_trim(zhp->zpool_name, cmd_type, trim_flags->rate,
+	    trim_flags->secure, vdev_guids, &errlist);
+	if (err != 0) {
+		nvlist_t *vd_errlist;
+		if (errlist != NULL && nvlist_lookup_nvlist(errlist,
+		    ZPOOL_TRIM_VDEVS, &vd_errlist) == 0) {
+			if (check_trim_errs(zhp, trim_flags, guids_to_paths,
+			    vds, vd_errlist)) {
+				retval = -1;
+				goto out;
+			}
+		} else {
+			char msg[1024];
+
+			(void) snprintf(msg, sizeof (msg),
+			    dgettext(TEXT_DOMAIN, "operation failed"));
+			zpool_standard_error(zhp->zpool_hdl, err, msg);
+			retval = -1;
+			goto out;
+		}
+	}
+
+
+	if (trim_flags->wait)
+		retval = zpool_trim_wait(zhp, vdev_guids);
+
+out:
+	if (errlist != NULL)
+		fnvlist_free(errlist);
+	fnvlist_free(vdev_guids);
+	fnvlist_free(guids_to_paths);
+	return (retval);
 }
 
 /*
@@ -2373,7 +2537,8 @@
 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 		(void) nvlist_lookup_uint64_array(nvroot,
 		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
-		if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
+		if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
+		    ps->pss_state == DSS_SCANNING) {
 			if (cmd == POOL_SCRUB_PAUSE)
 				return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
 			else
@@ -2488,6 +2653,36 @@
 			errno = 0;
 			vdev_id = strtoull(idx, &end, 10);
 
+			/*
+			 * If we are looking for a raidz and a parity is
+			 * specified, make sure it matches.
+			 */
+			int rzlen = strlen(VDEV_TYPE_RAIDZ);
+			assert(rzlen == strlen(VDEV_TYPE_DRAID));
+			int typlen = strlen(type);
+			if ((strncmp(type, VDEV_TYPE_RAIDZ, rzlen) == 0 ||
+			    strncmp(type, VDEV_TYPE_DRAID, rzlen) == 0) &&
+			    typlen != rzlen) {
+				uint64_t vdev_parity;
+				int parity = *(type + rzlen) - '0';
+
+				if (parity <= 0 || parity > 3 ||
+				    (typlen - rzlen) != 1) {
+					/*
+					 * Nonsense parity specified, can
+					 * never match
+					 */
+					free(type);
+					return (NULL);
+				}
+				verify(nvlist_lookup_uint64(nv,
+				    ZPOOL_CONFIG_NPARITY, &vdev_parity) == 0);
+				if ((int)vdev_parity != parity) {
+					free(type);
+					break;
+				}
+			}
+
 			free(type);
 			if (errno != 0)
 				return (NULL);
@@ -2605,6 +2800,11 @@
 	    VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
 	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
 		return (B_TRUE);
+
+	if (strncmp(name, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0 &&
+	    !zpool_is_draid_spare(name))
+		return (B_TRUE);
+
 	return (B_FALSE);
 }
 
@@ -2789,45 +2989,6 @@
 }
 
 /*
- * If the device has being dynamically expanded then we need to relabel
- * the disk to use the new unallocated space.
- */
-static int
-zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
-{
-	int fd, error;
-
-	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-		    "relabel '%s': unable to open device: %d"), path, errno);
-		return (zfs_error(hdl, EZFS_OPENFAILED, msg));
-	}
-
-	/*
-	 * It's possible that we might encounter an error if the device
-	 * does not have any unallocated space left. If so, we simply
-	 * ignore that error and continue on.
-	 *
-	 * Also, we don't call efi_rescan() - that would just return EBUSY.
-	 * The module will do it for us in vdev_disk_open().
-	 */
-	error = efi_use_whole_disk(fd);
-
-	/* Flush the buffers to disk and invalidate the page cache. */
-	(void) fsync(fd);
-	(void) ioctl(fd, BLKFLSBUF);
-
-	(void) close(fd);
-	if (error && error != VT_ENOSPC) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-		    "relabel '%s': unable to read disk capacity"), path);
-		return (zfs_error(hdl, EZFS_NOCAP, msg));
-	}
-
-	return (0);
-}
-
-/*
  * Convert a vdev path to a GUID.  Returns GUID or 0 on error.
  *
  * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
@@ -2895,7 +3056,7 @@
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-	if (avail_spare)
+	if (!(flags & ZFS_ONLINE_SPARE) && avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if ((flags & ZFS_ONLINE_EXPAND ||
@@ -3003,6 +3164,40 @@
 }
 
 /*
+ * Remove the specified vdev asynchronously from the configuration, so
+ * that it may come ONLINE if reinserted. This is called from zed on
+ * Udev remove event.
+ * Note: We also have a similar function zpool_vdev_remove() that
+ * removes the vdev from the pool.
+ */
+int
+zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = {"\0"};
+	char errbuf[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
+
+	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
+
+	zc.zc_cookie = VDEV_STATE_REMOVED;
+
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, errbuf));
+}
+
+/*
  * Mark the given vdev faulted.
  */
 int
@@ -3020,7 +3215,7 @@
 	zc.zc_cookie = VDEV_STATE_FAULTED;
 	zc.zc_obj = aux;
 
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
@@ -3055,7 +3250,7 @@
 	zc.zc_cookie = VDEV_STATE_DEGRADED;
 	zc.zc_obj = aux;
 
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
@@ -3077,7 +3272,8 @@
 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
 		    &type) == 0);
 
-		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+		if ((strcmp(type, VDEV_TYPE_SPARE) == 0 ||
+		    strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0) &&
 		    children == 2 && child[which] == tgt)
 			return (B_TRUE);
 
@@ -3094,8 +3290,8 @@
  * If 'replacing' is specified, the new disk will replace the old one.
  */
 int
-zpool_vdev_attach(zpool_handle_t *zhp,
-    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
+zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
+    const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
@@ -3108,7 +3304,6 @@
 	uint_t children;
 	nvlist_t *config_root;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	boolean_t rootpool = zpool_is_bootable(zhp);
 
 	if (replacing)
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@@ -3130,6 +3325,14 @@
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 	zc.zc_cookie = replacing;
+	zc.zc_simple = rebuild;
+
+	if (rebuild &&
+	    zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "the loaded zfs module doesn't support device rebuilds"));
+		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+	}
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0 || children != 1) {
@@ -3168,18 +3371,8 @@
 
 	zcmd_free_nvlists(&zc);
 
-	if (ret == 0) {
-		if (rootpool) {
-			/*
-			 * XXX need a better way to prevent user from
-			 * booting up a half-baked vdev.
-			 */
-			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
-			    "sure to wait until resilver is done "
-			    "before rebooting.\n"));
-		}
+	if (ret == 0)
 		return (0);
-	}
 
 	switch (errno) {
 	case ENOTSUP:
@@ -3190,16 +3383,25 @@
 			uint64_t version = zpool_get_prop_int(zhp,
 			    ZPOOL_PROP_VERSION, NULL);
 
-			if (islog)
+			if (islog) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "cannot replace a log with a spare"));
-			else if (version >= SPA_VERSION_MULTI_REPLACE)
+			} else if (rebuild) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "only mirror and dRAID vdevs support "
+				    "sequential reconstruction"));
+			} else if (zpool_is_draid_spare(new_disk)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "dRAID spares can only replace child "
+				    "devices in their parent's dRAID vdev"));
+			} else if (version >= SPA_VERSION_MULTI_REPLACE) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "already in replacing/spare config; wait "
 				    "for completion or use 'zpool detach'"));
-			else
+			} else {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "cannot replace a replacing device"));
+			}
 		} else {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "can only attach to mirrors and top-level "
@@ -3362,7 +3564,7 @@
     nvlist_t *props, splitflags_t flags)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char msg[1024], *bias;
 	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
 	nvlist_t **varray = NULL, *zc_props = NULL;
 	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
@@ -3420,6 +3622,7 @@
 
 	for (c = 0; c < children; c++) {
 		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
+		boolean_t is_special = B_FALSE, is_dedup = B_FALSE;
 		char *type;
 		nvlist_t **mchild, *vdev;
 		uint_t mchildren;
@@ -3453,13 +3656,26 @@
 		lastlog = 0;
 		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
 		    == 0);
-		if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
+
+		if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) {
+			vdev = child[c];
+			if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
+				goto out;
+			continue;
+		} else if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Source pool must be composed only of mirrors\n"));
 			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
 			goto out;
 		}
 
+		if (nvlist_lookup_string(child[c],
+		    ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0) {
+			if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0)
+				is_special = B_TRUE;
+			else if (strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0)
+				is_dedup = B_TRUE;
+		}
 		verify(nvlist_lookup_nvlist_array(child[c],
 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
 
@@ -3477,6 +3693,20 @@
 
 		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
 			goto out;
+
+		if (flags.dryrun != 0) {
+			if (is_dedup == B_TRUE) {
+				if (nvlist_add_string(varray[vcount - 1],
+				    ZPOOL_CONFIG_ALLOCATION_BIAS,
+				    VDEV_ALLOC_BIAS_DEDUP) != 0)
+					goto out;
+			} else if (is_special == B_TRUE) {
+				if (nvlist_add_string(varray[vcount - 1],
+				    ZPOOL_CONFIG_ALLOCATION_BIAS,
+				    VDEV_ALLOC_BIAS_SPECIAL) != 0)
+					goto out;
+			}
+		}
 	}
 
 	/* did we find every disk the user specified? */
@@ -3586,6 +3816,12 @@
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 
+	if (zpool_is_draid_spare(path)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dRAID spares cannot be removed"));
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+	}
+
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
@@ -3598,13 +3834,6 @@
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
-	if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "root pool can not have removed devices, "
-		    "because GRUB does not understand them"));
-		return (zfs_error(hdl, EINVAL, msg));
-	}
-
 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
@@ -3790,7 +4019,7 @@
 	zc.zc_guid = guid;
 	zc.zc_cookie = ZPOOL_NO_REWIND;
 
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
+	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
@@ -3857,86 +4086,6 @@
 	return (0);
 }
 
-#if defined(__sun__) || defined(__sun)
-/*
- * Convert from a devid string to a path.
- */
-static char *
-devid_to_path(char *devid_str)
-{
-	ddi_devid_t devid;
-	char *minor;
-	char *path;
-	devid_nmlist_t *list = NULL;
-	int ret;
-
-	if (devid_str_decode(devid_str, &devid, &minor) != 0)
-		return (NULL);
-
-	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
-
-	devid_str_free(minor);
-	devid_free(devid);
-
-	if (ret != 0)
-		return (NULL);
-
-	/*
-	 * In a case the strdup() fails, we will just return NULL below.
-	 */
-	path = strdup(list[0].devname);
-
-	devid_free_nmlist(list);
-
-	return (path);
-}
-
-/*
- * Convert from a path to a devid string.
- */
-static char *
-path_to_devid(const char *path)
-{
-	int fd;
-	ddi_devid_t devid;
-	char *minor, *ret;
-
-	if ((fd = open(path, O_RDONLY)) < 0)
-		return (NULL);
-
-	minor = NULL;
-	ret = NULL;
-	if (devid_get(fd, &devid) == 0) {
-		if (devid_get_minor_name(fd, &minor) == 0)
-			ret = devid_str_encode(devid, minor);
-		if (minor != NULL)
-			devid_str_free(minor);
-		devid_free(devid);
-	}
-	(void) close(fd);
-
-	return (ret);
-}
-
-/*
- * Issue the necessary ioctl() to update the stored path value for the vdev.  We
- * ignore any failure here, since a common case is for an unprivileged user to
- * type 'zpool status', and we'll display the correct information anyway.
- */
-static void
-set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
-{
-	zfs_cmd_t zc = {"\0"};
-
-	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
-	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
-	    &zc.zc_guid) == 0);
-
-	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
-}
-#endif /* sun */
-
 #define	PATH_BUF_LEN	64
 
 /*
@@ -3992,54 +4141,6 @@
 		(void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
 		path = buf;
 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
-#if defined(__sun__) || defined(__sun)
-		/*
-		 * Live VDEV path updates to a kernel VDEV during a
-		 * zpool_vdev_name lookup are not supported on Linux.
-		 */
-		char *devid;
-		vdev_stat_t *vs;
-		uint_t vsc;
-
-		/*
-		 * If the device is dead (faulted, offline, etc) then don't
-		 * bother opening it.  Otherwise we may be forcing the user to
-		 * open a misbehaving device, which can have undesirable
-		 * effects.
-		 */
-		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
-		    (uint64_t **)&vs, &vsc) != 0 ||
-		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
-		    zhp != NULL &&
-		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
-			/*
-			 * Determine if the current path is correct.
-			 */
-			char *newdevid = path_to_devid(path);
-
-			if (newdevid == NULL ||
-			    strcmp(devid, newdevid) != 0) {
-				char *newpath;
-
-				if ((newpath = devid_to_path(devid)) != NULL) {
-					/*
-					 * Update the path appropriately.
-					 */
-					set_path(zhp, nv, newpath);
-					if (nvlist_add_string(nv,
-					    ZPOOL_CONFIG_PATH, newpath) == 0)
-						verify(nvlist_lookup_string(nv,
-						    ZPOOL_CONFIG_PATH,
-						    &path) == 0);
-					free(newpath);
-				}
-			}
-
-			if (newdevid)
-				devid_str_free(newdevid);
-		}
-#endif /* sun */
-
 		if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
 			char *rp = realpath(path, NULL);
 			if (rp) {
@@ -4054,14 +4155,14 @@
 		 */
 		if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
 		    !(name_flags & VDEV_NAME_PATH)) {
-			path = strrchr(path, '/');
-			path++;
+			path = zfs_strip_path(path);
 		}
 
 		/*
-		 * Remove the partition from the path it this is a whole disk.
+		 * Remove the partition from the path if this is a whole disk.
 		 */
-		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
+		if (strcmp(type, VDEV_TYPE_DRAID_SPARE) != 0 &&
+		    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
 		    == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
 			return (zfs_strip_partition(path));
 		}
@@ -4080,6 +4181,27 @@
 		}
 
 		/*
+		 * If it's a dRAID device, we add parity, groups, and spares.
+		 */
+		if (strcmp(path, VDEV_TYPE_DRAID) == 0) {
+			uint64_t ndata, nparity, nspares;
+			nvlist_t **child;
+			uint_t children;
+
+			verify(nvlist_lookup_nvlist_array(nv,
+			    ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
+			verify(nvlist_lookup_uint64(nv,
+			    ZPOOL_CONFIG_NPARITY, &nparity) == 0);
+			verify(nvlist_lookup_uint64(nv,
+			    ZPOOL_CONFIG_DRAID_NDATA, &ndata) == 0);
+			verify(nvlist_lookup_uint64(nv,
+			    ZPOOL_CONFIG_DRAID_NSPARES, &nspares) == 0);
+
+			path = zpool_draid_name(buf, sizeof (buf), ndata,
+			    nparity, nspares, children);
+		}
+
+		/*
 		 * We identify each top-level vdev by using a <type-id>
 		 * naming convention.
 		 */
@@ -4129,7 +4251,7 @@
 	zc.zc_nvlist_dst_size = count;
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 	for (;;) {
-		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
+		if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG,
 		    &zc) != 0) {
 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
 			if (errno == ENOMEM) {
@@ -4244,7 +4366,7 @@
 	fnvlist_add_string(args, "message", message);
 	err = zcmd_write_src_nvlist(hdl, &zc, args);
 	if (err == 0)
-		err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
+		err = zfs_ioctl(hdl, ZFS_IOC_LOG_HISTORY, &zc);
 	nvlist_free(args);
 	zcmd_free_nvlists(&zc);
 	return (err);
@@ -4271,7 +4393,7 @@
 	zc.zc_history_len = *len;
 	zc.zc_history_offset = *off;
 
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
+	if (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
 		switch (errno) {
 		case EPERM:
 			return (zfs_error_fmt(hdl, EZFS_PERM,
@@ -4436,13 +4558,10 @@
 zpool_events_clear(libzfs_handle_t *hdl, int *count)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
-
-	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
-	    "cannot clear events"));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
-		return (zpool_standard_error_fmt(hdl, errno, msg));
+		return (zpool_standard_error(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot clear events")));
 
 	if (count != NULL)
 		*count = (int)zc.zc_cookie; /* # of events cleared */
@@ -4486,9 +4605,9 @@
 	return (error);
 }
 
-void
-zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
-    char *pathname, size_t len)
+static void
+zpool_obj_to_path_impl(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
+    char *pathname, size_t len, boolean_t always_unmounted)
 {
 	zfs_cmd_t zc = {"\0"};
 	boolean_t mounted = B_FALSE;
@@ -4505,7 +4624,7 @@
 	/* get the dataset's name */
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_obj = dsobj;
-	if (ioctl(zhp->zpool_hdl->libzfs_fd,
+	if (zfs_ioctl(zhp->zpool_hdl,
 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
 		/* just write out a path of two object numbers */
 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
@@ -4515,12 +4634,13 @@
 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
 
 	/* find out if the dataset is mounted */
-	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
+	mounted = !always_unmounted && is_mounted(zhp->zpool_hdl, dsname,
+	    &mntpnt);
 
 	/* get the corrupted object's path */
 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
 	zc.zc_obj = obj;
-	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_OBJ_TO_PATH,
 	    &zc) == 0) {
 		if (mounted) {
 			(void) snprintf(pathname, len, "%s%s", mntpnt,
@@ -4536,256 +4656,334 @@
 	free(mntpnt);
 }
 
-/*
- * Read the EFI label from the config, if a label does not exist then
- * pass back the error to the caller. If the caller has passed a non-NULL
- * diskaddr argument then we set it to the starting address of the EFI
- * partition.
- */
-static int
-read_efi_label(nvlist_t *config, diskaddr_t *sb)
+void
+zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
+    char *pathname, size_t len)
 {
-	char *path;
-	int fd;
-	char diskname[MAXPATHLEN];
-	int err = -1;
-
-	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
-		return (err);
-
-	(void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
-	    strrchr(path, '/'));
-	if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
-		struct dk_gpt *vtoc;
-
-		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
-			if (sb != NULL)
-				*sb = vtoc->efi_parts[0].p_start;
-			efi_free(vtoc);
-		}
-		(void) close(fd);
-	}
-	return (err);
+	zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_FALSE);
 }
 
-/*
- * determine where a partition starts on a disk in the current
- * configuration
- */
-static diskaddr_t
-find_start_block(nvlist_t *config)
+void
+zpool_obj_to_path_ds(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
+    char *pathname, size_t len)
 {
-	nvlist_t **child;
-	uint_t c, children;
-	diskaddr_t sb = MAXOFFSET_T;
-	uint64_t wholedisk;
-
-	if (nvlist_lookup_nvlist_array(config,
-	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
-		if (nvlist_lookup_uint64(config,
-		    ZPOOL_CONFIG_WHOLE_DISK,
-		    &wholedisk) != 0 || !wholedisk) {
-			return (MAXOFFSET_T);
-		}
-		if (read_efi_label(config, &sb) < 0)
-			sb = MAXOFFSET_T;
-		return (sb);
-	}
-
-	for (c = 0; c < children; c++) {
-		sb = find_start_block(child[c]);
-		if (sb != MAXOFFSET_T) {
-			return (sb);
-		}
-	}
-	return (MAXOFFSET_T);
+	zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_TRUE);
 }
-
-static int
-zpool_label_disk_check(char *path)
-{
-	struct dk_gpt *vtoc;
-	int fd, err;
-
-	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
-		return (errno);
-
-	if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
-		(void) close(fd);
-		return (err);
-	}
-
-	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
-		efi_free(vtoc);
-		(void) close(fd);
-		return (EIDRM);
-	}
-
-	efi_free(vtoc);
-	(void) close(fd);
-	return (0);
-}
-
 /*
- * Generate a unique partition name for the ZFS member.  Partitions must
- * have unique names to ensure udev will be able to create symlinks under
- * /dev/disk/by-partlabel/ for all pool members.  The partition names are
- * of the form <pool>-<unique-id>.
- */
-static void
-zpool_label_name(char *label_name, int label_size)
-{
-	uint64_t id = 0;
-	int fd;
-
-	fd = open("/dev/urandom", O_RDONLY);
-	if (fd >= 0) {
-		if (read(fd, &id, sizeof (id)) != sizeof (id))
-			id = 0;
-
-		close(fd);
-	}
-
-	if (id == 0)
-		id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
-
-	snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
-}
-
-/*
- * Label an individual disk.  The name provided is the short name,
- * stripped of any leading /dev path.
+ * Wait while the specified activity is in progress in the pool.
  */
 int
-zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
+zpool_wait(zpool_handle_t *zhp, zpool_wait_activity_t activity)
 {
-	char path[MAXPATHLEN];
-	struct dk_gpt *vtoc;
-	int rval, fd;
-	size_t resv = EFI_MIN_RESV_SIZE;
-	uint64_t slice_size;
-	diskaddr_t start_block;
-	char errbuf[1024];
+	boolean_t missing;
 
-	/* prepare an error message just in case */
-	(void) snprintf(errbuf, sizeof (errbuf),
-	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
+	int error = zpool_wait_status(zhp, activity, &missing, NULL);
 
-	if (zhp) {
-		nvlist_t *nvroot;
-
-		verify(nvlist_lookup_nvlist(zhp->zpool_config,
-		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-
-		if (zhp->zpool_start_block == 0)
-			start_block = find_start_block(nvroot);
-		else
-			start_block = zhp->zpool_start_block;
-		zhp->zpool_start_block = start_block;
+	if (missing) {
+		(void) zpool_standard_error_fmt(zhp->zpool_hdl, ENOENT,
+		    dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
+		    zhp->zpool_name);
+		return (ENOENT);
 	} else {
-		/* new pool */
-		start_block = NEW_START_BLOCK;
+		return (error);
+	}
+}
+
+/*
+ * Wait for the given activity and return the status of the wait (whether or not
+ * any waiting was done) in the 'waited' parameter. Non-existent pools are
+ * reported via the 'missing' parameter, rather than by printing an error
+ * message. This is convenient when this function is called in a loop over a
+ * long period of time (as it is, for example, by zpool's wait cmd). In that
+ * scenario, a pool being exported or destroyed should be considered a normal
+ * event, so we don't want to print an error when we find that the pool doesn't
+ * exist.
+ */
+int
+zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
+    boolean_t *missing, boolean_t *waited)
+{
+	int error = lzc_wait(zhp->zpool_name, activity, waited);
+	*missing = (error == ENOENT);
+	if (*missing)
+		return (0);
+
+	if (error != 0) {
+		(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
+		    dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
+		    zhp->zpool_name);
 	}
 
-	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+	return (error);
+}
 
-	if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
-		/*
-		 * This shouldn't happen.  We've long since verified that this
-		 * is a valid device.
-		 */
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-		    "label '%s': unable to open device: %d"), path, errno);
-		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+int
+zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
+{
+	int error = lzc_set_bootenv(zhp->zpool_name, envmap);
+	if (error != 0) {
+		(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
+		    dgettext(TEXT_DOMAIN,
+		    "error setting bootenv in pool '%s'"), zhp->zpool_name);
 	}
 
-	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
-		/*
-		 * The only way this can fail is if we run out of memory, or we
-		 * were unable to read the disk's capacity
-		 */
-		if (errno == ENOMEM)
-			(void) no_memory(hdl);
+	return (error);
+}
 
-		(void) close(fd);
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-		    "label '%s': unable to read disk capacity"), path);
+int
+zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp)
+{
+	nvlist_t *nvl;
+	int error;
 
-		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+	nvl = NULL;
+	error = lzc_get_bootenv(zhp->zpool_name, &nvl);
+	if (error != 0) {
+		(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
+		    dgettext(TEXT_DOMAIN,
+		    "error getting bootenv in pool '%s'"), zhp->zpool_name);
+	} else {
+		*nvlp = nvl;
 	}
 
-	slice_size = vtoc->efi_last_u_lba + 1;
-	slice_size -= EFI_MIN_RESV_SIZE;
-	if (start_block == MAXOFFSET_T)
-		start_block = NEW_START_BLOCK;
-	slice_size -= start_block;
-	slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
+	return (error);
+}
 
-	vtoc->efi_parts[0].p_start = start_block;
-	vtoc->efi_parts[0].p_size = slice_size;
+/*
+ * Attempt to read and parse feature file(s) (from "compatibility" property).
+ * Files contain zpool feature names, comma or whitespace-separated.
+ * Comments (# character to next newline) are discarded.
+ *
+ * Arguments:
+ *  compatibility : string containing feature filenames
+ *  features : either NULL or pointer to array of boolean
+ *  report : either NULL or pointer to string buffer
+ *  rlen : length of "report" buffer
+ *
+ * compatibility is NULL (unset), "", "off", "legacy", or list of
+ * comma-separated filenames. filenames should either be absolute,
+ * or relative to:
+ *   1) ZPOOL_SYSCONF_COMPAT_D (eg: /etc/zfs/compatibility.d) or
+ *   2) ZPOOL_DATA_COMPAT_D (eg: /usr/share/zfs/compatibility.d).
+ * (Unset), "" or "off" => enable all features
+ * "legacy" => disable all features
+ *
+ * Any feature names read from files which match unames in spa_feature_table
+ * will have the corresponding boolean set in the features array (if non-NULL).
+ * If more than one feature set specified, only features present in *all* of
+ * them will be set.
+ *
+ * "report" if not NULL will be populated with a suitable status message.
+ *
+ * Return values:
+ *   ZPOOL_COMPATIBILITY_OK : files read and parsed ok
+ *   ZPOOL_COMPATIBILITY_BADFILE : file too big or not a text file
+ *   ZPOOL_COMPATIBILITY_BADTOKEN : SYSCONF file contains invalid feature name
+ *   ZPOOL_COMPATIBILITY_WARNTOKEN : DATA file contains invalid feature name
+ *   ZPOOL_COMPATIBILITY_NOFILES : no feature files found
+ */
+zpool_compat_status_t
+zpool_load_compat(const char *compat, boolean_t *features, char *report,
+    size_t rlen)
+{
+	int sdirfd, ddirfd, featfd;
+	struct stat fs;
+	char *fc;
+	char *ps, *ls, *ws;
+	char *file, *line, *word;
+
+	char l_compat[ZFS_MAXPROPLEN];
+
+	boolean_t ret_nofiles = B_TRUE;
+	boolean_t ret_badfile = B_FALSE;
+	boolean_t ret_badtoken = B_FALSE;
+	boolean_t ret_warntoken = B_FALSE;
+
+	/* special cases (unset), "" and "off" => enable all features */
+	if (compat == NULL || compat[0] == '\0' ||
+	    strcmp(compat, ZPOOL_COMPAT_OFF) == 0) {
+		if (features != NULL)
+			for (uint_t i = 0; i < SPA_FEATURES; i++)
+				features[i] = B_TRUE;
+		if (report != NULL)
+			strlcpy(report, gettext("all features enabled"), rlen);
+		return (ZPOOL_COMPATIBILITY_OK);
+	}
+
+	/* Final special case "legacy" => disable all features */
+	if (strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0) {
+		if (features != NULL)
+			for (uint_t i = 0; i < SPA_FEATURES; i++)
+				features[i] = B_FALSE;
+		if (report != NULL)
+			strlcpy(report, gettext("all features disabled"), rlen);
+		return (ZPOOL_COMPATIBILITY_OK);
+	}
 
 	/*
-	 * Why we use V_USR: V_BACKUP confuses users, and is considered
-	 * disposable by some EFI utilities (since EFI doesn't have a backup
-	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
-	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
-	 * etc. were all pretty specific.  V_USR is as close to reality as we
-	 * can get, in the absence of V_OTHER.
+	 * Start with all true; will be ANDed with results from each file
 	 */
-	vtoc->efi_parts[0].p_tag = V_USR;
-	zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
+	if (features != NULL)
+		for (uint_t i = 0; i < SPA_FEATURES; i++)
+			features[i] = B_TRUE;
 
-	vtoc->efi_parts[8].p_start = slice_size + start_block;
-	vtoc->efi_parts[8].p_size = resv;
-	vtoc->efi_parts[8].p_tag = V_RESERVED;
-
-	rval = efi_write(fd, vtoc);
-
-	/* Flush the buffers to disk and invalidate the page cache. */
-	(void) fsync(fd);
-	(void) ioctl(fd, BLKFLSBUF);
-
-	if (rval == 0)
-		rval = efi_rescan(fd);
+	char err_badfile[ZFS_MAXPROPLEN] = "";
+	char err_badtoken[ZFS_MAXPROPLEN] = "";
 
 	/*
-	 * Some block drivers (like pcata) may not support EFI GPT labels.
-	 * Print out a helpful error message directing the user to manually
-	 * label the disk and give a specific slice.
+	 * We ignore errors from the directory open()
+	 * as they're only needed if the filename is relative
+	 * which will be checked during the openat().
 	 */
-	if (rval != 0) {
-		(void) close(fd);
-		efi_free(vtoc);
 
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
-		    "parted(8) and then provide a specific slice: %d"), rval);
-		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+/* O_PATH safer than O_RDONLY if system allows it */
+#if defined(O_PATH)
+#define	ZC_DIR_FLAGS (O_DIRECTORY | O_CLOEXEC | O_PATH)
+#else
+#define	ZC_DIR_FLAGS (O_DIRECTORY | O_CLOEXEC | O_RDONLY)
+#endif
+
+	sdirfd = open(ZPOOL_SYSCONF_COMPAT_D, ZC_DIR_FLAGS);
+	ddirfd = open(ZPOOL_DATA_COMPAT_D, ZC_DIR_FLAGS);
+
+	(void) strlcpy(l_compat, compat, ZFS_MAXPROPLEN);
+
+	for (file = strtok_r(l_compat, ",", &ps);
+	    file != NULL;
+	    file = strtok_r(NULL, ",", &ps)) {
+
+		boolean_t l_features[SPA_FEATURES];
+
+		enum { Z_SYSCONF, Z_DATA } source;
+
+		/* try sysconfdir first, then datadir */
+		source = Z_SYSCONF;
+		if ((featfd = openat(sdirfd, file, O_RDONLY | O_CLOEXEC)) < 0) {
+			featfd = openat(ddirfd, file, O_RDONLY | O_CLOEXEC);
+			source = Z_DATA;
+		}
+
+		/* File readable and correct size? */
+		if (featfd < 0 ||
+		    fstat(featfd, &fs) < 0 ||
+		    fs.st_size < 1 ||
+		    fs.st_size > ZPOOL_COMPAT_MAXSIZE) {
+			(void) close(featfd);
+			strlcat(err_badfile, file, ZFS_MAXPROPLEN);
+			strlcat(err_badfile, " ", ZFS_MAXPROPLEN);
+			ret_badfile = B_TRUE;
+			continue;
+		}
+
+/* Prefault the file if system allows */
+#if defined(MAP_POPULATE)
+#define	ZC_MMAP_FLAGS (MAP_PRIVATE | MAP_POPULATE)
+#elif defined(MAP_PREFAULT_READ)
+#define	ZC_MMAP_FLAGS (MAP_PRIVATE | MAP_PREFAULT_READ)
+#else
+#define	ZC_MMAP_FLAGS (MAP_PRIVATE)
+#endif
+
+		/* private mmap() so we can strtok safely */
+		fc = (char *)mmap(NULL, fs.st_size, PROT_READ | PROT_WRITE,
+		    ZC_MMAP_FLAGS, featfd, 0);
+		(void) close(featfd);
+
+		/* map ok, and last character == newline? */
+		if (fc == MAP_FAILED || fc[fs.st_size - 1] != '\n') {
+			(void) munmap((void *) fc, fs.st_size);
+			strlcat(err_badfile, file, ZFS_MAXPROPLEN);
+			strlcat(err_badfile, " ", ZFS_MAXPROPLEN);
+			ret_badfile = B_TRUE;
+			continue;
+		}
+
+		ret_nofiles = B_FALSE;
+
+		for (uint_t i = 0; i < SPA_FEATURES; i++)
+			l_features[i] = B_FALSE;
+
+		/* replace final newline with NULL to ensure string ends */
+		fc[fs.st_size - 1] = '\0';
+
+		for (line = strtok_r(fc, "\n", &ls);
+		    line != NULL;
+		    line = strtok_r(NULL, "\n", &ls)) {
+			/* discard comments */
+			char *r = strchr(line, '#');
+			if (r != NULL)
+				*r = '\0';
+
+			for (word = strtok_r(line, ", \t", &ws);
+			    word != NULL;
+			    word = strtok_r(NULL, ", \t", &ws)) {
+				/* Find matching feature name */
+				uint_t f;
+				for (f = 0; f < SPA_FEATURES; f++) {
+					zfeature_info_t *fi =
+					    &spa_feature_table[f];
+					if (strcmp(word, fi->fi_uname) == 0) {
+						l_features[f] = B_TRUE;
+						break;
+					}
+				}
+				if (f < SPA_FEATURES)
+					continue;
+
+				/* found an unrecognized word */
+				/* lightly sanitize it */
+				if (strlen(word) > 32)
+					word[32] = '\0';
+				for (char *c = word; *c != '\0'; c++)
+					if (!isprint(*c))
+						*c = '?';
+
+				strlcat(err_badtoken, word, ZFS_MAXPROPLEN);
+				strlcat(err_badtoken, " ", ZFS_MAXPROPLEN);
+				if (source == Z_SYSCONF)
+					ret_badtoken = B_TRUE;
+				else
+					ret_warntoken = B_TRUE;
+			}
+		}
+		(void) munmap((void *) fc, fs.st_size);
+
+		if (features != NULL)
+			for (uint_t i = 0; i < SPA_FEATURES; i++)
+				features[i] &= l_features[i];
 	}
+	(void) close(sdirfd);
+	(void) close(ddirfd);
 
-	(void) close(fd);
-	efi_free(vtoc);
-
-	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
-	(void) zfs_append_partition(path, MAXPATHLEN);
-
-	/* Wait to udev to signal use the device has settled. */
-	rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
-	if (rval) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
-		    "detect device partitions on '%s': %d"), path, rval);
-		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	/* Return the most serious error */
+	if (ret_badfile) {
+		if (report != NULL)
+			snprintf(report, rlen, gettext("could not read/"
+			    "parse feature file(s): %s"), err_badfile);
+		return (ZPOOL_COMPATIBILITY_BADFILE);
 	}
-
-	/* We can't be to paranoid.  Read the label back and verify it. */
-	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
-	rval = zpool_label_disk_check(path);
-	if (rval) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
-		    "EFI label on '%s' is damaged.  Ensure\nthis device "
-		    "is not in use, and is functioning properly: %d"),
-		    path, rval);
-		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	if (ret_nofiles) {
+		if (report != NULL)
+			strlcpy(report,
+			    gettext("no valid compatibility files specified"),
+			    rlen);
+		return (ZPOOL_COMPATIBILITY_NOFILES);
 	}
-
-	return (0);
+	if (ret_badtoken) {
+		if (report != NULL)
+			snprintf(report, rlen, gettext("invalid feature "
+			    "name(s) in local compatibility files: %s"),
+			    err_badtoken);
+		return (ZPOOL_COMPATIBILITY_BADTOKEN);
+	}
+	if (ret_warntoken) {
+		if (report != NULL)
+			snprintf(report, rlen, gettext("unrecognized feature "
+			    "name(s) in distribution compatibility files: %s"),
+			    err_badtoken);
+		return (ZPOOL_COMPATIBILITY_WARNTOKEN);
+	}
+	if (report != NULL)
+		strlcpy(report, gettext("compatibility set ok"), rlen);
+	return (ZPOOL_COMPATIBILITY_OK);
 }

diff --git a/zfs/lib/libzfs/libzfs_sendrecv.c b/zfs/lib/libzfs/libzfs_sendrecv.c
index faab408..4bdb25e 100644
--- a/zfs/lib/libzfs/libzfs_sendrecv.c
+++ b/zfs/lib/libzfs/libzfs_sendrecv.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * All rights reserved
@@ -48,7 +48,6 @@
 #include <sys/avl.h>
 #include <sys/debug.h>
 #include <sys/stat.h>
-#include <stddef.h>
 #include <pthread.h>
 #include <umem.h>
 #include <time.h>
@@ -61,6 +60,7 @@
 #include "zfs_prop.h"
 #include "zfs_fletcher.h"
 #include "libzfs_impl.h"
+#include <cityhash.h>
 #include <zlib.h>
 #include <sys/zio_checksum.h>
 #include <sys/dsl_crypt.h>
@@ -68,135 +68,26 @@
 #include <sys/socket.h>
 #include <sys/sha2.h>
 
-/* in libzfs_dataset.c */
-extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
-
 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
-    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
-    uint64_t *, const char *, nvlist_t *);
+    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **,
+    const char *, nvlist_t *);
+static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
+    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
+    uint64_t num_redact_snaps, char *name);
 static int guid_to_name(libzfs_handle_t *, const char *,
     uint64_t, boolean_t, char *);
 
-static const zio_cksum_t zero_cksum = { { 0 } };
-
-typedef struct dedup_arg {
-	int	inputfd;
-	int	outputfd;
-	libzfs_handle_t  *dedup_hdl;
-} dedup_arg_t;
-
 typedef struct progress_arg {
 	zfs_handle_t *pa_zhp;
 	int pa_fd;
 	boolean_t pa_parsable;
+	boolean_t pa_estimate;
+	int pa_verbosity;
+	boolean_t pa_astitle;
+	boolean_t pa_progress;
+	uint64_t pa_size;
 } progress_arg_t;
 
-typedef struct dataref {
-	uint64_t ref_guid;
-	uint64_t ref_object;
-	uint64_t ref_offset;
-} dataref_t;
-
-typedef struct dedup_entry {
-	struct dedup_entry	*dde_next;
-	zio_cksum_t dde_chksum;
-	uint64_t dde_prop;
-	dataref_t dde_ref;
-} dedup_entry_t;
-
-#define	MAX_DDT_PHYSMEM_PERCENT		20
-#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
-
-typedef struct dedup_table {
-	dedup_entry_t	**dedup_hash_array;
-	umem_cache_t	*ddecache;
-	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
-	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
-	uint64_t	ddt_count;
-	int		numhashbits;
-	boolean_t	ddt_full;
-} dedup_table_t;
-
-static int
-high_order_bit(uint64_t n)
-{
-	int count;
-
-	for (count = 0; n != 0; count++)
-		n >>= 1;
-	return (count);
-}
-
-static size_t
-ssread(void *buf, size_t len, FILE *stream)
-{
-	size_t outlen;
-
-	if ((outlen = fread(buf, len, 1, stream)) == 0)
-		return (0);
-
-	return (outlen);
-}
-
-static void
-ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
-    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
-{
-	dedup_entry_t	*dde;
-
-	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
-		if (ddt->ddt_full == B_FALSE) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Dedup table full.  Deduplication will continue "
-			    "with existing table entries"));
-			ddt->ddt_full = B_TRUE;
-		}
-		return;
-	}
-
-	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
-	    != NULL) {
-		assert(*ddepp == NULL);
-		dde->dde_next = NULL;
-		dde->dde_chksum = *cs;
-		dde->dde_prop = prop;
-		dde->dde_ref = *dr;
-		*ddepp = dde;
-		ddt->cur_ddt_size += sizeof (dedup_entry_t);
-		ddt->ddt_count++;
-	}
-}
-
-/*
- * Using the specified dedup table, do a lookup for an entry with
- * the checksum cs.  If found, return the block's reference info
- * in *dr. Otherwise, insert a new entry in the dedup table, using
- * the reference information specified by *dr.
- *
- * return value:  true - entry was found
- *		  false - entry was not found
- */
-static boolean_t
-ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
-    uint64_t prop, dataref_t *dr)
-{
-	uint32_t hashcode;
-	dedup_entry_t **ddepp;
-
-	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
-
-	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
-	    ddepp = &((*ddepp)->dde_next)) {
-		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
-		    (*ddepp)->dde_prop == prop) {
-			*dr = (*ddepp)->dde_ref;
-			return (B_TRUE);
-		}
-	}
-	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
-	return (B_FALSE);
-}
-
 static int
 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
     zio_cksum_t *zc, int outfd)
@@ -223,274 +114,6 @@
 }
 
 /*
- * This function is started in a separate thread when the dedup option
- * has been requested.  The main send thread determines the list of
- * snapshots to be included in the send stream and makes the ioctl calls
- * for each one.  But instead of having the ioctl send the output to the
- * the output fd specified by the caller of zfs_send()), the
- * ioctl is told to direct the output to a pipe, which is read by the
- * alternate thread running THIS function.  This function does the
- * dedup'ing by:
- *  1. building a dedup table (the DDT)
- *  2. doing checksums on each data block and inserting a record in the DDT
- *  3. looking for matching checksums, and
- *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
- *      a duplicate block is found.
- * The output of this function then goes to the output fd requested
- * by the caller of zfs_send().
- */
-static void *
-cksummer(void *arg)
-{
-	dedup_arg_t *dda = arg;
-	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
-	dmu_replay_record_t thedrr = { 0 };
-	dmu_replay_record_t *drr = &thedrr;
-	FILE *ofp;
-	int outfd;
-	dedup_table_t ddt;
-	zio_cksum_t stream_cksum;
-	uint64_t numbuckets;
-
-#ifdef _ILP32
-	ddt.max_ddt_size = SMALLEST_POSSIBLE_MAX_DDT_MB << 20;
-#else
-	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
-	ddt.max_ddt_size =
-	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
-	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
-#endif
-
-	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
-
-	/*
-	 * numbuckets must be a power of 2.  Increase number to
-	 * a power of 2 if necessary.
-	 */
-	if (!ISP2(numbuckets))
-		numbuckets = 1ULL << high_order_bit(numbuckets);
-
-	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
-	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
-	    NULL, NULL, NULL, NULL, NULL, 0);
-	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
-	ddt.numhashbits = high_order_bit(numbuckets) - 1;
-	ddt.ddt_full = B_FALSE;
-
-	outfd = dda->outputfd;
-	ofp = fdopen(dda->inputfd, "r");
-	while (ssread(drr, sizeof (*drr), ofp) != 0) {
-
-		/*
-		 * kernel filled in checksum, we are going to write same
-		 * record, but need to regenerate checksum.
-		 */
-		if (drr->drr_type != DRR_BEGIN) {
-			bzero(&drr->drr_u.drr_checksum.drr_checksum,
-			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
-		}
-
-		switch (drr->drr_type) {
-		case DRR_BEGIN:
-		{
-			struct drr_begin *drrb = &drr->drr_u.drr_begin;
-			int fflags;
-			int sz = 0;
-			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
-
-			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
-
-			/* set the DEDUP feature flag for this stream */
-			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
-			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
-			    DMU_BACKUP_FEATURE_DEDUPPROPS);
-			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
-
-			if (drr->drr_payloadlen != 0) {
-				sz = drr->drr_payloadlen;
-
-				if (sz > SPA_MAXBLOCKSIZE) {
-					buf = zfs_realloc(dda->dedup_hdl, buf,
-					    SPA_MAXBLOCKSIZE, sz);
-				}
-				(void) ssread(buf, sz, ofp);
-				if (ferror(stdin))
-					perror("fread");
-			}
-			if (dump_record(drr, buf, sz, &stream_cksum,
-			    outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_END:
-		{
-			struct drr_end *drre = &drr->drr_u.drr_end;
-			/* use the recalculated checksum */
-			drre->drr_checksum = stream_cksum;
-			if (dump_record(drr, NULL, 0, &stream_cksum,
-			    outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_OBJECT:
-		{
-			struct drr_object *drro = &drr->drr_u.drr_object;
-			if (drro->drr_bonuslen > 0) {
-				(void) ssread(buf,
-				    DRR_OBJECT_PAYLOAD_SIZE(drro), ofp);
-			}
-			if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro),
-			    &stream_cksum, outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_SPILL:
-		{
-			struct drr_spill *drrs = &drr->drr_u.drr_spill;
-			(void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp);
-			if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs),
-			    &stream_cksum, outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_FREEOBJECTS:
-		{
-			if (dump_record(drr, NULL, 0, &stream_cksum,
-			    outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_WRITE:
-		{
-			struct drr_write *drrw = &drr->drr_u.drr_write;
-			dataref_t	dataref;
-			uint64_t	payload_size;
-
-			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
-			(void) ssread(buf, payload_size, ofp);
-
-			/*
-			 * Use the existing checksum if it's dedup-capable,
-			 * else calculate a SHA256 checksum for it.
-			 */
-
-			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
-			    zero_cksum) ||
-			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) {
-				SHA2_CTX ctx;
-				zio_cksum_t tmpsha256;
-
-				SHA2Init(SHA256, &ctx);
-				SHA2Update(&ctx, buf, payload_size);
-				SHA2Final(&tmpsha256, &ctx);
-
-				drrw->drr_key.ddk_cksum.zc_word[0] =
-				    BE_64(tmpsha256.zc_word[0]);
-				drrw->drr_key.ddk_cksum.zc_word[1] =
-				    BE_64(tmpsha256.zc_word[1]);
-				drrw->drr_key.ddk_cksum.zc_word[2] =
-				    BE_64(tmpsha256.zc_word[2]);
-				drrw->drr_key.ddk_cksum.zc_word[3] =
-				    BE_64(tmpsha256.zc_word[3]);
-				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
-				drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
-			}
-
-			dataref.ref_guid = drrw->drr_toguid;
-			dataref.ref_object = drrw->drr_object;
-			dataref.ref_offset = drrw->drr_offset;
-
-			if (ddt_update(dda->dedup_hdl, &ddt,
-			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
-			    &dataref)) {
-				dmu_replay_record_t wbr_drr = {0};
-				struct drr_write_byref *wbr_drrr =
-				    &wbr_drr.drr_u.drr_write_byref;
-
-				/* block already present in stream */
-				wbr_drr.drr_type = DRR_WRITE_BYREF;
-
-				wbr_drrr->drr_object = drrw->drr_object;
-				wbr_drrr->drr_offset = drrw->drr_offset;
-				wbr_drrr->drr_length = drrw->drr_logical_size;
-				wbr_drrr->drr_toguid = drrw->drr_toguid;
-				wbr_drrr->drr_refguid = dataref.ref_guid;
-				wbr_drrr->drr_refobject =
-				    dataref.ref_object;
-				wbr_drrr->drr_refoffset =
-				    dataref.ref_offset;
-
-				wbr_drrr->drr_checksumtype =
-				    drrw->drr_checksumtype;
-				wbr_drrr->drr_flags = drrw->drr_flags;
-				wbr_drrr->drr_key.ddk_cksum =
-				    drrw->drr_key.ddk_cksum;
-				wbr_drrr->drr_key.ddk_prop =
-				    drrw->drr_key.ddk_prop;
-
-				if (dump_record(&wbr_drr, NULL, 0,
-				    &stream_cksum, outfd) != 0)
-					goto out;
-			} else {
-				/* block not previously seen */
-				if (dump_record(drr, buf, payload_size,
-				    &stream_cksum, outfd) != 0)
-					goto out;
-			}
-			break;
-		}
-
-		case DRR_WRITE_EMBEDDED:
-		{
-			struct drr_write_embedded *drrwe =
-			    &drr->drr_u.drr_write_embedded;
-			(void) ssread(buf,
-			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
-			if (dump_record(drr, buf,
-			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
-			    &stream_cksum, outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_FREE:
-		{
-			if (dump_record(drr, NULL, 0, &stream_cksum,
-			    outfd) != 0)
-				goto out;
-			break;
-		}
-
-		case DRR_OBJECT_RANGE:
-		{
-			if (dump_record(drr, NULL, 0, &stream_cksum,
-			    outfd) != 0)
-				goto out;
-			break;
-		}
-
-		default:
-			(void) fprintf(stderr, "INVALID record type 0x%x\n",
-			    drr->drr_type);
-			/* should never happen, so assert */
-			assert(B_FALSE);
-		}
-	}
-out:
-	umem_cache_destroy(ddt.ddecache);
-	free(ddt.dedup_hash_array);
-	free(buf);
-	(void) fclose(ofp);
-
-	return (NULL);
-}
-
-/*
  * Routines for dealing with the AVL tree of fs-nvlists
  */
 typedef struct fsavl_node {
@@ -506,7 +129,7 @@
 	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
 	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
 
-	return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
+	return (TREE_CMP(fn1->fn_guid, fn2->fn_guid));
 }
 
 /*
@@ -565,15 +188,15 @@
 		nvlist_t *nvfs, *snaps;
 		nvpair_t *snapelem = NULL;
 
-		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
-		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+		nvfs = fnvpair_value_nvlist(fselem);
+		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
 
 		while ((snapelem =
 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
 			fsavl_node_t *fn;
 			uint64_t guid;
 
-			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
+			guid = fnvpair_value_uint64(snapelem);
 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
 				fsavl_destroy(fsavl);
 				return (NULL);
@@ -586,8 +209,9 @@
 			 * Note: if there are multiple snaps with the
 			 * same GUID, we ignore all but one.
 			 */
-			if (avl_find(fsavl, fn, NULL) == NULL)
-				avl_add(fsavl, fn);
+			avl_index_t where = 0;
+			if (avl_find(fsavl, fn, &where) == NULL)
+				avl_insert(fsavl, fn, where);
 			else
 				free(fn);
 		}
@@ -627,6 +251,7 @@
 	boolean_t raw;
 	boolean_t doall;
 	boolean_t replicate;
+	boolean_t skipmissing;
 	boolean_t verbose;
 	boolean_t backup;
 	boolean_t seenfrom;
@@ -690,7 +315,7 @@
 		return (0);
 	}
 
-	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
+	fnvlist_add_uint64(sd->parent_snaps, snapname, guid);
 	/*
 	 * NB: if there is no fromsnap here (it's a newly created fs in
 	 * an incremental replication), we will substitute the tosnap.
@@ -700,6 +325,15 @@
 	}
 
 	if (!sd->recursive) {
+
+		/*
+		 * To allow a doall stream to work properly
+		 * with a NULL fromsnap
+		 */
+		if (sd->doall && sd->fromsnap == NULL && !sd->seenfrom) {
+			sd->seenfrom = B_TRUE;
+		}
+
 		if (!sd->seenfrom && isfromsnap) {
 			sd->seenfrom = B_TRUE;
 			zfs_close(zhp);
@@ -715,18 +349,16 @@
 			sd->seento = B_TRUE;
 	}
 
-	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
+	nv = fnvlist_alloc();
 	send_iterate_prop(zhp, sd->backup, nv);
-	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
-	nvlist_free(nv);
+	fnvlist_add_nvlist(sd->snapprops, snapname, nv);
+	fnvlist_free(nv);
 	if (sd->holds) {
-		nvlist_t *holds = fnvlist_alloc();
-		int err = lzc_get_holds(zhp->zfs_name, &holds);
-		if (err == 0) {
-			VERIFY(0 == nvlist_add_nvlist(sd->snapholds,
-			    snapname, holds));
+		nvlist_t *holds;
+		if (lzc_get_holds(zhp->zfs_name, &holds) == 0) {
+			fnvlist_add_nvlist(sd->snapholds, snapname, holds);
+			fnvlist_free(holds);
 		}
-		fnvlist_free(holds);
 	}
 
 	zfs_close(zhp);
@@ -799,14 +431,12 @@
 		if (zfs_prop_user(propname) ||
 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 			char *value;
-			verify(nvlist_lookup_string(propnv,
-			    ZPROP_VALUE, &value) == 0);
-			VERIFY(0 == nvlist_add_string(nv, propname, value));
+			value = fnvlist_lookup_string(propnv, ZPROP_VALUE);
+			fnvlist_add_string(nv, propname, value);
 		} else {
 			uint64_t value;
-			verify(nvlist_lookup_uint64(propnv,
-			    ZPROP_VALUE, &value) == 0);
-			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
+			value = fnvlist_lookup_uint64(propnv, ZPROP_VALUE);
+			fnvlist_add_uint64(nv, propname, value);
 		}
 	}
 }
@@ -871,7 +501,8 @@
 	 * - skip sending the current dataset if it was created later than
 	 *   the parent tosnap
 	 * - return error if the current dataset was created earlier than
-	 *   the parent tosnap
+	 *   the parent tosnap, unless --skip-missing specified. Then
+	 *   just print a warning
 	 */
 	if (sd->tosnap != NULL && tosnap_txg == 0) {
 		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
@@ -880,13 +511,18 @@
 				    "skipping dataset %s: snapshot %s does "
 				    "not exist\n"), zhp->zfs_name, sd->tosnap);
 			}
+		} else if (sd->skipmissing) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "WARNING: skipping dataset %s and its children:"
+			    " snapshot %s does not exist\n"),
+			    zhp->zfs_name, sd->tosnap);
 		} else {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "cannot send %s@%s%s: snapshot %s@%s does not "
 			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
 			    dgettext(TEXT_DOMAIN, " recursively") : "",
 			    zhp->zfs_name, sd->tosnap);
-			rv = -1;
+			rv = EZFS_NOENT;
 		}
 		goto out;
 	}
@@ -951,8 +587,7 @@
 	sd->parent_snaps = fnvlist_alloc();
 	sd->snapprops = fnvlist_alloc();
 	if (sd->holds)
-		VERIFY(0 == nvlist_alloc(&sd->snapholds, NV_UNIQUE_NAME, 0));
-
+		sd->snapholds = fnvlist_alloc();
 
 	/*
 	 * If this is a "doall" send, a replicate send or we're just trying
@@ -989,6 +624,18 @@
 	fnvlist_free(sd->snapprops);
 	fnvlist_free(sd->snapholds);
 
+	/* Do not allow the size of the properties list to exceed the limit */
+	if ((fnvlist_size(nvfs) + fnvlist_size(sd->fss)) >
+	    zhp->zfs_hdl->libzfs_max_nvlist) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "warning: cannot send %s@%s: the size of the list of "
+		    "snapshots and properties is too large to be received "
+		    "successfully.\n"
+		    "Select a smaller number of snapshots to send.\n"),
+		    zhp->zfs_name, sd->tosnap);
+		rv = EZFS_NOSPC;
+		goto out;
+	}
 	/* add this fs to nvlist */
 	(void) snprintf(guidstring, sizeof (guidstring),
 	    "0x%llx", (longlong_t)guid);
@@ -1012,8 +659,9 @@
 static int
 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
     const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t doall,
-    boolean_t replicate, boolean_t verbose, boolean_t backup, boolean_t holds,
-    boolean_t props, nvlist_t **nvlp, avl_tree_t **avlp)
+    boolean_t replicate, boolean_t skipmissing, boolean_t verbose,
+    boolean_t backup, boolean_t holds, boolean_t props, nvlist_t **nvlp,
+    avl_tree_t **avlp)
 {
 	zfs_handle_t *zhp;
 	send_data_t sd = { 0 };
@@ -1023,7 +671,7 @@
 	if (zhp == NULL)
 		return (EZFS_BADTYPE);
 
-	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
+	sd.fss = fnvlist_alloc();
 	sd.fsname = fsname;
 	sd.fromsnap = fromsnap;
 	sd.tosnap = tosnap;
@@ -1031,13 +679,14 @@
 	sd.raw = raw;
 	sd.doall = doall;
 	sd.replicate = replicate;
+	sd.skipmissing = skipmissing;
 	sd.verbose = verbose;
 	sd.backup = backup;
 	sd.holds = holds;
 	sd.props = props;
 
 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
-		nvlist_free(sd.fss);
+		fnvlist_free(sd.fss);
 		if (avlp != NULL)
 			*avlp = NULL;
 		*nvlp = NULL;
@@ -1045,7 +694,7 @@
 	}
 
 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
-		nvlist_free(sd.fss);
+		fnvlist_free(sd.fss);
 		*nvlp = NULL;
 		return (EZFS_NOMEM);
 	}
@@ -1064,8 +713,9 @@
 	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
 	uint64_t prevsnap_obj;
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
-	boolean_t verbose, dryrun, parsable, progress, embed_data, block_diff, std_out;
+	boolean_t dryrun, parsable, progress, embed_data, block_diff, std_out;
 	boolean_t large_block, compress, raw, holds;
+	boolean_t progressastitle;
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
@@ -1076,6 +726,7 @@
 	nvlist_t *debugnv;
 	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
 	int cleanup_fd;
+	int verbosity;
 	uint64_t size;
 } send_dump_data_t;
 
@@ -1121,7 +772,7 @@
 		case EFAULT:
 		case EROFS:
 		case EINVAL:
-			zfs_error_aux(hdl, strerror(error));
+			zfs_error_aux(hdl, "%s", strerror(error));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
@@ -1156,10 +807,9 @@
 	zc.zc_flags = flags;
 	zc.zc_block_diff = block_diff ? BLOCK_DIFF_MAGIC : 0;
 
-	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
+	thisdbg = fnvlist_alloc();
 	if (fromsnap && fromsnap[0] != '\0') {
-		VERIFY(0 == nvlist_add_string(thisdbg,
-		    "fromsnap", fromsnap));
+		fnvlist_add_string(thisdbg, "fromsnap", fromsnap);
 	}
 
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
@@ -1167,12 +817,11 @@
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);
 
-		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
+		fnvlist_add_uint64(thisdbg, "error", errno);
 		if (debugnv) {
-			VERIFY(0 == nvlist_add_nvlist(debugnv,
-			    zhp->zfs_name, thisdbg));
+			fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
 		}
-		nvlist_free(thisdbg);
+		fnvlist_free(thisdbg);
 
 		switch (errno) {
 		case EXDEV:
@@ -1205,7 +854,8 @@
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
-			zfs_error_aux(hdl, strerror(errno));
+		case EINVAL:
+			zfs_error_aux(hdl, "%s", strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
@@ -1214,8 +864,8 @@
 	}
 
 	if (debugnv)
-		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
-	nvlist_free(thisdbg);
+		fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
+	fnvlist_free(thisdbg);
 
 	return (0);
 }
@@ -1235,43 +885,89 @@
 	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 }
 
+int
+zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
+    uint64_t *blocks_visited)
+{
+	zfs_cmd_t zc = {"\0"};
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	zc.zc_cookie = fd;
+	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
+		return (errno);
+	if (bytes_written != NULL)
+		*bytes_written = zc.zc_cookie;
+	if (blocks_visited != NULL)
+		*blocks_visited = zc.zc_objset_type;
+	return (0);
+}
+
 static void *
 send_progress_thread(void *arg)
 {
 	progress_arg_t *pa = arg;
-	zfs_cmd_t zc = {"\0"};
 	zfs_handle_t *zhp = pa->pa_zhp;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	unsigned long long bytes;
+	uint64_t bytes;
+	uint64_t blocks;
+	uint64_t total = pa->pa_size / 100;
 	char buf[16];
 	time_t t;
 	struct tm *tm;
-
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
-	if (!pa->pa_parsable)
-		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT %s\n",
-		    zhp->zfs_name);
+	boolean_t firstloop = B_TRUE;
 
 	/*
 	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
 	 */
 	for (;;) {
+		int err;
 		(void) sleep(1);
+		if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
+		    &blocks)) != 0) {
+			if (err == EINTR || err == ENOENT)
+				return ((void *)0);
+			return ((void *)(uintptr_t)err);
+		}
 
-		zc.zc_cookie = pa->pa_fd;
-		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
-			return ((void *)-1);
+		if (firstloop && !pa->pa_parsable && pa->pa_progress) {
+			(void) fprintf(stderr,
+			    "TIME       %s   %sSNAPSHOT %s\n",
+			    pa->pa_estimate ? "BYTES" : " SENT",
+			    pa->pa_verbosity >= 2 ? "   BLOCKS    " : "",
+			    zhp->zfs_name);
+			firstloop = B_FALSE;
+		}
 
 		(void) time(&t);
 		tm = localtime(&t);
-		bytes = zc.zc_cookie;
 
-		if (pa->pa_parsable) {
+		if (pa->pa_astitle) {
+			char buf_bytes[16];
+			char buf_size[16];
+			int pct;
+			zfs_nicenum(bytes, buf_bytes, sizeof (buf_bytes));
+			zfs_nicenum(pa->pa_size, buf_size, sizeof (buf_size));
+			pct = (total > 0) ? bytes / total : 100;
+			zfs_setproctitle("sending %s (%d%%: %s/%s)",
+			    zhp->zfs_name, MIN(pct, 100), buf_bytes, buf_size);
+		}
+
+		if (pa->pa_verbosity >= 2 && pa->pa_parsable) {
+			(void) fprintf(stderr,
+			    "%02d:%02d:%02d\t%llu\t%llu\t%s\n",
+			    tm->tm_hour, tm->tm_min, tm->tm_sec,
+			    (u_longlong_t)bytes, (u_longlong_t)blocks,
+			    zhp->zfs_name);
+		} else if (pa->pa_verbosity >= 2) {
+			zfs_nicenum(bytes, buf, sizeof (buf));
+			(void) fprintf(stderr,
+			    "%02d:%02d:%02d   %5s    %8llu    %s\n",
+			    tm->tm_hour, tm->tm_min, tm->tm_sec,
+			    buf, (u_longlong_t)blocks, zhp->zfs_name);
+		} else if (pa->pa_parsable) {
 			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
 			    tm->tm_hour, tm->tm_min, tm->tm_sec,
-			    bytes, zhp->zfs_name);
-		} else {
+			    (u_longlong_t)bytes, zhp->zfs_name);
+		} else if (pa->pa_progress) {
 			zfs_nicebytes(bytes, buf, sizeof (buf));
 			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
 			    tm->tm_hour, tm->tm_min, tm->tm_sec,
@@ -1368,6 +1064,8 @@
 		flags |= LZC_SEND_FLAG_COMPRESS;
 	if (sdd->raw)
 		flags |= LZC_SEND_FLAG_RAW;
+	if (sdd->block_diff)
+		flags |= LZC_SEND_FLAG_BLOCKDIFF;
 
 	if (!sdd->doall && !isfromsnap && !istosnap) {
 		if (sdd->replicate) {
@@ -1380,10 +1078,8 @@
 			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
 			    zhp->zfs_dmustats.dds_guid, &snapname);
 
-			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
-			    "snapprops", &snapprops));
-			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
-			    thissnap, &snapprops));
+			snapprops = fnvlist_lookup_nvlist(nvfs, "snapprops");
+			snapprops = fnvlist_lookup_nvlist(snapprops, thissnap);
 			exclude = !nvlist_exists(snapprops, "is_clone_origin");
 		} else {
 			exclude = B_TRUE;
@@ -1412,7 +1108,7 @@
 	fromorigin = sdd->prevsnap[0] == '\0' &&
 	    (sdd->fromorigin || sdd->replicate);
 
-	if (sdd->verbose) {
+	if (sdd->verbosity != 0) {
 		uint64_t size = 0;
 		char fromds[ZFS_MAX_DATASET_NAME_LEN];
 
@@ -1437,10 +1133,15 @@
 		 * If progress reporting is requested, spawn a new thread to
 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
 		 */
-		if (sdd->progress) {
+		if (sdd->progress || sdd->progressastitle) {
 			pa.pa_zhp = zhp;
 			pa.pa_fd = sdd->outfd;
 			pa.pa_parsable = sdd->parsable;
+			pa.pa_estimate = B_FALSE;
+			pa.pa_verbosity = sdd->verbosity;
+			pa.pa_size = sdd->size;
+			pa.pa_astitle = sdd->progressastitle;
+			pa.pa_progress = sdd->progress;
 
 			if ((err = pthread_create(&tid, NULL,
 			    send_progress_thread, &pa)) != 0) {
@@ -1452,9 +1153,19 @@
 		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
 		    fromorigin, sdd->outfd, flags, sdd->debugnv, sdd->block_diff);
 
-		if (sdd->progress) {
+		if (sdd->progress || sdd->progressastitle) {
+			void *status = NULL;
 			(void) pthread_cancel(tid);
-			(void) pthread_join(tid, NULL);
+			(void) pthread_join(tid, &status);
+			int error = (int)(uintptr_t)status;
+			if (error != 0 && status != PTHREAD_CANCELED) {
+				char errbuf[1024];
+				(void) snprintf(errbuf, sizeof (errbuf),
+				    dgettext(TEXT_DOMAIN,
+				    "progress thread exited nonzero"));
+				return (zfs_standard_error(zhp->zfs_hdl, error,
+				    errbuf));
+			}
 		}
 	}
 
@@ -1475,7 +1186,7 @@
 
 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
 	    zhp->zfs_name, sdd->tosnap);
-	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "WARNING: could not send %s@%s: does not exist\n"),
 		    zhp->zfs_name, sdd->tosnap);
@@ -1493,7 +1204,7 @@
 		 */
 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
 		    zhp->zfs_name, sdd->fromsnap);
-		if (ioctl(zhp->zfs_hdl->libzfs_fd,
+		if (zfs_ioctl(zhp->zfs_hdl,
 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 			missingfrom = B_TRUE;
 		}
@@ -1590,7 +1301,7 @@
 		nvlist_t *nvfs;
 		uint64_t origin_guid = 0;
 
-		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
+		nvfs = fnvpair_value_nvlist(fspair);
 		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
 		if (origin_guid != 0) {
 			char *snapname;
@@ -1598,12 +1309,12 @@
 			    origin_guid, &snapname);
 			if (origin_nv != NULL) {
 				nvlist_t *snapprops;
-				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
-				    "snapprops", &snapprops));
-				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
-				    snapname, &snapprops));
-				VERIFY(0 == nvlist_add_boolean(
-				    snapprops, "is_clone_origin"));
+				snapprops = fnvlist_lookup_nvlist(origin_nv,
+				    "snapprops");
+				snapprops = fnvlist_lookup_nvlist(snapprops,
+				    snapname);
+				fnvlist_add_boolean(snapprops,
+				    "is_clone_origin");
 			}
 		}
 	}
@@ -1618,11 +1329,11 @@
 		uint64_t origin_guid = 0;
 		uint64_t parent_guid = 0;
 
-		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
+		fslist = fnvpair_value_nvlist(fspair);
 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
 			continue;
 
-		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
+		fsname = fnvlist_lookup_string(fslist, "name");
 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
 		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
 		    &parent_guid);
@@ -1654,7 +1365,7 @@
 		if (zhp == NULL)
 			return (-1);
 		err = dump_filesystem(zhp, sdd);
-		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
+		fnvlist_add_boolean(fslist, "sent");
 		progress = B_TRUE;
 		zfs_close(zhp);
 		if (err)
@@ -1670,7 +1381,7 @@
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *fslist;
 
-		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
+		fslist = fnvpair_value_nvlist(fspair);
 		(void) nvlist_remove_all(fslist, "sent");
 	}
 
@@ -1753,10 +1464,195 @@
 	}
 	return (nv);
 }
+static enum lzc_send_flags
+lzc_flags_from_sendflags(const sendflags_t *flags)
+{
+	enum lzc_send_flags lzc_flags = 0;
+	if (flags->largeblock)
+		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
+	if (flags->embed_data)
+		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
+	if (flags->compress)
+		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
+	if (flags->raw)
+		lzc_flags |= LZC_SEND_FLAG_RAW;
+	if (flags->saved)
+		lzc_flags |= LZC_SEND_FLAG_SAVED;
+	if (flags->block_diff)
+		lzc_flags |= LZC_SEND_FLAG_BLOCKDIFF;
+	return (lzc_flags);
+}
 
-int
-zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
-    const char *resume_token)
+static int
+estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
+    uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes,
+    const char *redactbook, char *errbuf, uint64_t *sizep)
+{
+	uint64_t size;
+	FILE *fout = flags->dryrun ? stdout : stderr;
+	progress_arg_t pa = { 0 };
+	int err = 0;
+	pthread_t ptid;
+
+	if (flags->progress || flags->progressastitle) {
+		pa.pa_zhp = zhp;
+		pa.pa_fd = fd;
+		pa.pa_parsable = flags->parsable;
+		pa.pa_estimate = B_TRUE;
+		pa.pa_verbosity = flags->verbosity;
+
+		err = pthread_create(&ptid, NULL,
+		    send_progress_thread, &pa);
+		if (err != 0) {
+			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_THREADCREATEFAILED, errbuf));
+		}
+	}
+
+	err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
+	    lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes,
+	    redactbook, fd, &size);
+	*sizep = size;
+
+	if (flags->progress || flags->progressastitle) {
+		void *status = NULL;
+		(void) pthread_cancel(ptid);
+		(void) pthread_join(ptid, &status);
+		int error = (int)(uintptr_t)status;
+		if (error != 0 && status != PTHREAD_CANCELED) {
+			char errbuf[1024];
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "progress thread exited "
+			    "nonzero"));
+			return (zfs_standard_error(zhp->zfs_hdl, error,
+			    errbuf));
+		}
+	}
+
+	if (!flags->progress && !flags->parsable)
+		return (err);
+
+	if (err != 0) {
+		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err));
+		return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
+		    errbuf));
+	}
+	send_print_verbose(fout, zhp->zfs_name, from, size,
+	    flags->parsable);
+
+	if (flags->parsable) {
+		(void) fprintf(fout, "size\t%llu\n", (longlong_t)size);
+	} else {
+		char buf[16];
+		zfs_nicenum(size, buf, sizeof (buf));
+		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
+		    "total estimated size is %s\n"), buf);
+	}
+	return (0);
+}
+
+static boolean_t
+redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
+{
+	for (int i = 0; i < num_snaps; i++) {
+		if (snaps[i] == guid)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static boolean_t
+redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1,
+    const uint64_t *snaps2, uint64_t num_snaps2)
+{
+	if (num_snaps1 != num_snaps2)
+		return (B_FALSE);
+	for (int i = 0; i < num_snaps1; i++) {
+		if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i]))
+			return (B_FALSE);
+	}
+	return (B_TRUE);
+}
+
+/*
+ * Check that the list of redaction snapshots in the bookmark matches the send
+ * we're resuming, and return whether or not it's complete.
+ *
+ * Note that the caller needs to free the contents of *bookname with free() if
+ * this function returns successfully.
+ */
+static int
+find_redact_book(libzfs_handle_t *hdl, const char *path,
+    const uint64_t *redact_snap_guids, int num_redact_snaps,
+    char **bookname)
+{
+	char errbuf[1024];
+	int error = 0;
+	nvlist_t *props = fnvlist_alloc();
+	nvlist_t *bmarks;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot resume send"));
+
+	fnvlist_add_boolean(props, "redact_complete");
+	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
+	error = lzc_get_bookmarks(path, props, &bmarks);
+	fnvlist_free(props);
+	if (error != 0) {
+		if (error == ESRCH) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "nonexistent redaction bookmark provided"));
+		} else if (error == ENOENT) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "dataset to be sent no longer exists"));
+		} else {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "unknown error: %s"), strerror(error));
+		}
+		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+	}
+	nvpair_t *pair;
+	for (pair = nvlist_next_nvpair(bmarks, NULL); pair;
+	    pair = nvlist_next_nvpair(bmarks, pair)) {
+
+		nvlist_t *bmark = fnvpair_value_nvlist(pair);
+		nvlist_t *vallist = fnvlist_lookup_nvlist(bmark,
+		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
+		uint_t len = 0;
+		uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist,
+		    ZPROP_VALUE, &len);
+		if (redact_snaps_equal(redact_snap_guids,
+		    num_redact_snaps, bmarksnaps, len)) {
+			break;
+		}
+	}
+	if (pair == NULL)  {
+		fnvlist_free(bmarks);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no appropriate redaction bookmark exists"));
+		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+	}
+	char *name = nvpair_name(pair);
+	nvlist_t *bmark = fnvpair_value_nvlist(pair);
+	nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete");
+	boolean_t complete = fnvlist_lookup_boolean_value(vallist,
+	    ZPROP_VALUE);
+	if (!complete) {
+		fnvlist_free(bmarks);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "incomplete redaction bookmark provided"));
+		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+	}
+	*bookname = strndup(name, ZFS_MAX_DATASET_NAME_LEN);
+	ASSERT3P(*bookname, !=, NULL);
+	fnvlist_free(bmarks);
+	return (0);
+}
+
+static int
+zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
+    nvlist_t *resume_nvl)
 {
 	char errbuf[1024];
 	char *toname;
@@ -1766,21 +1662,16 @@
 	int error = 0;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	enum lzc_send_flags lzc_flags = 0;
-	FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr;
+	FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr;
+	uint64_t *redact_snap_guids = NULL;
+	int num_redact_snaps = 0;
+	char *redact_book = NULL;
+	uint64_t size = 0;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot resume send"));
 
-	nvlist_t *resume_nvl =
-	    zfs_send_resume_token_to_nvlist(hdl, resume_token);
-	if (resume_nvl == NULL) {
-		/*
-		 * zfs_error_aux has already been set by
-		 * zfs_send_resume_token_to_nvlist
-		 */
-		return (zfs_error(hdl, EZFS_FAULT, errbuf));
-	}
-	if (flags->verbose) {
+	if (flags->verbosity != 0) {
 		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
 		    "resume token contents:\n"));
 		nvlist_print(fout, resume_nvl);
@@ -1806,19 +1697,29 @@
 		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
 	if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
 		lzc_flags |= LZC_SEND_FLAG_RAW;
+	if (flags->saved || nvlist_exists(resume_nvl, "savedok"))
+		lzc_flags |= LZC_SEND_FLAG_SAVED;
+	if (flags->saved || nvlist_exists(resume_nvl, "blockdiff"))
+		lzc_flags |= LZC_SEND_FLAG_BLOCKDIFF;
 
-	if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
-		if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "'%s' is no longer the same snapshot used in "
-			    "the initial send"), toname);
-		} else {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "'%s' used in the initial send no longer exists"),
-			    toname);
+	if (flags->saved) {
+		(void) strcpy(name, toname);
+	} else {
+		error = guid_to_name(hdl, toname, toguid, B_FALSE, name);
+		if (error != 0) {
+			if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' is no longer the same snapshot "
+				    "used in the initial send"), toname);
+			} else {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' used in the initial send no "
+				    "longer exists"), toname);
+			}
+			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
 		}
-		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
 	}
+
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -1826,8 +1727,14 @@
 		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
 	}
 
+	if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps",
+	    &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) {
+		num_redact_snaps = -1;
+	}
+
 	if (fromguid != 0) {
-		if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
+		if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE,
+		    redact_snap_guids, num_redact_snaps, name) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "incremental source %#llx no longer exists"),
 			    (longlong_t)fromguid);
@@ -1836,14 +1743,45 @@
 		fromname = name;
 	}
 
-	if (flags->verbose) {
-		uint64_t size = 0;
-		error = lzc_send_space(zhp->zfs_name, fromname,
-		    lzc_flags, &size);
-		if (error == 0)
-			size = MAX(0, (int64_t)(size - bytes));
-		send_print_verbose(fout, zhp->zfs_name, fromname,
-		    size, flags->parsable);
+	redact_snap_guids = NULL;
+
+	if (nvlist_lookup_uint64_array(resume_nvl,
+	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids,
+	    (uint_t *)&num_redact_snaps) == 0) {
+		char path[ZFS_MAX_DATASET_NAME_LEN];
+
+		(void) strlcpy(path, toname, sizeof (path));
+		char *at = strchr(path, '@');
+		ASSERT3P(at, !=, NULL);
+
+		*at = '\0';
+
+		if ((error = find_redact_book(hdl, path, redact_snap_guids,
+		    num_redact_snaps, &redact_book)) != 0) {
+			return (error);
+		}
+	}
+
+	if (flags->verbosity != 0 || flags->progressastitle) {
+		/*
+		 * Some of these may have come from the resume token, set them
+		 * here for size estimate purposes.
+		 */
+		sendflags_t tmpflags = *flags;
+		if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK)
+			tmpflags.largeblock = B_TRUE;
+		if (lzc_flags & LZC_SEND_FLAG_COMPRESS)
+			tmpflags.compress = B_TRUE;
+		if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA)
+			tmpflags.embed_data = B_TRUE;
+		if (lzc_flags & LZC_SEND_FLAG_RAW)
+			tmpflags.raw = B_TRUE;
+		if (lzc_flags & LZC_SEND_FLAG_SAVED)
+			tmpflags.saved = B_TRUE;
+		if (lzc_flags & LZC_SEND_FLAG_BLOCKDIFF)
+			tmpflags.block_diff = B_TRUE;
+		error = estimate_size(zhp, fromname, outfd, &tmpflags,
+		    resumeobj, resumeoff, bytes, redact_book, errbuf, &size);
 	}
 
 	if (!flags->dryrun) {
@@ -1853,25 +1791,44 @@
 		 * If progress reporting is requested, spawn a new thread to
 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
 		 */
-		if (flags->progress) {
+		if (flags->progress || flags->progressastitle) {
 			pa.pa_zhp = zhp;
 			pa.pa_fd = outfd;
 			pa.pa_parsable = flags->parsable;
+			pa.pa_estimate = B_FALSE;
+			pa.pa_verbosity = flags->verbosity;
+			pa.pa_size = size;
+			pa.pa_astitle = flags->progressastitle;
+			pa.pa_progress = flags->progress;
 
 			error = pthread_create(&tid, NULL,
 			    send_progress_thread, &pa);
 			if (error != 0) {
+				if (redact_book != NULL)
+					free(redact_book);
 				zfs_close(zhp);
 				return (error);
 			}
 		}
 
-		error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
-		    lzc_flags, resumeobj, resumeoff);
+		error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
+		    lzc_flags, resumeobj, resumeoff, redact_book);
+		if (redact_book != NULL)
+			free(redact_book);
 
-		if (flags->progress) {
+		if (flags->progress || flags->progress) {
+			void *status = NULL;
 			(void) pthread_cancel(tid);
-			(void) pthread_join(tid, NULL);
+			(void) pthread_join(tid, &status);
+			int error = (int)(uintptr_t)status;
+			if (error != 0 && status != PTHREAD_CANCELED) {
+				char errbuf[1024];
+				(void) snprintf(errbuf, sizeof (errbuf),
+				    dgettext(TEXT_DOMAIN,
+				    "progress thread exited nonzero"));
+				zfs_close(zhp);
+				return (zfs_standard_error(hdl, error, errbuf));
+			}
 		}
 
 		char errbuf[1024];
@@ -1887,6 +1844,12 @@
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "source key must be loaded"));
 			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+		case ESRCH:
+			if (lzc_exists(zhp->zfs_name)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "incremental source could not be found"));
+			}
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EXDEV:
 		case ENOENT:
@@ -1901,22 +1864,286 @@
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
-			zfs_error_aux(hdl, strerror(errno));
+			zfs_error_aux(hdl, "%s", strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
+	} else {
+		if (redact_book != NULL)
+			free(redact_book);
 	}
 
-
 	zfs_close(zhp);
 
 	return (error);
 }
 
+int
+zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
+    const char *resume_token)
+{
+	int ret;
+	char errbuf[1024];
+	nvlist_t *resume_nvl;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot resume send"));
+
+	resume_nvl = zfs_send_resume_token_to_nvlist(hdl, resume_token);
+	if (resume_nvl == NULL) {
+		/*
+		 * zfs_error_aux has already been set by
+		 * zfs_send_resume_token_to_nvlist()
+		 */
+		return (zfs_error(hdl, EZFS_FAULT, errbuf));
+	}
+
+	ret = zfs_send_resume_impl(hdl, flags, outfd, resume_nvl);
+	fnvlist_free(resume_nvl);
+
+	return (ret);
+}
+
+int
+zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
+    const char *resume_token)
+{
+	int ret;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *saved_nvl = NULL, *resume_nvl = NULL;
+	uint64_t saved_guid = 0, resume_guid = 0;
+	uint64_t obj = 0, off = 0, bytes = 0;
+	char token_buf[ZFS_MAXPROPLEN];
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "saved send failed"));
+
+	ret = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
+	    token_buf, sizeof (token_buf), NULL, NULL, 0, B_TRUE);
+	if (ret != 0)
+		goto out;
+
+	saved_nvl = zfs_send_resume_token_to_nvlist(hdl, token_buf);
+	if (saved_nvl == NULL) {
+		/*
+		 * zfs_error_aux has already been set by
+		 * zfs_send_resume_token_to_nvlist()
+		 */
+		ret = zfs_error(hdl, EZFS_FAULT, errbuf);
+		goto out;
+	}
+
+	/*
+	 * If a resume token is provided we use the object and offset
+	 * from that instead of the default, which starts from the
+	 * beginning.
+	 */
+	if (resume_token != NULL) {
+		resume_nvl = zfs_send_resume_token_to_nvlist(hdl,
+		    resume_token);
+		if (resume_nvl == NULL) {
+			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
+			goto out;
+		}
+
+		if (nvlist_lookup_uint64(resume_nvl, "object", &obj) != 0 ||
+		    nvlist_lookup_uint64(resume_nvl, "offset", &off) != 0 ||
+		    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
+		    nvlist_lookup_uint64(resume_nvl, "toguid",
+		    &resume_guid) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "provided resume token is corrupt"));
+			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
+			goto out;
+		}
+
+		if (nvlist_lookup_uint64(saved_nvl, "toguid",
+		    &saved_guid)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "dataset's resume token is corrupt"));
+			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
+			goto out;
+		}
+
+		if (resume_guid != saved_guid) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "provided resume token does not match dataset"));
+			ret = zfs_error(hdl, EZFS_BADBACKUP, errbuf);
+			goto out;
+		}
+	}
+
+	(void) nvlist_remove_all(saved_nvl, "object");
+	fnvlist_add_uint64(saved_nvl, "object", obj);
+
+	(void) nvlist_remove_all(saved_nvl, "offset");
+	fnvlist_add_uint64(saved_nvl, "offset", off);
+
+	(void) nvlist_remove_all(saved_nvl, "bytes");
+	fnvlist_add_uint64(saved_nvl, "bytes", bytes);
+
+	(void) nvlist_remove_all(saved_nvl, "toname");
+	fnvlist_add_string(saved_nvl, "toname", zhp->zfs_name);
+
+	ret = zfs_send_resume_impl(hdl, flags, outfd, saved_nvl);
+
+out:
+	fnvlist_free(saved_nvl);
+	fnvlist_free(resume_nvl);
+	return (ret);
+}
+
 /*
- * Generate a send stream for the dataset identified by the argument zhp.
+ * This function informs the target system that the recursive send is complete.
+ * The record is also expected in the case of a send -p.
+ */
+static int
+send_conclusion_record(int fd, zio_cksum_t *zc)
+{
+	dmu_replay_record_t drr = { 0 };
+	drr.drr_type = DRR_END;
+	if (zc != NULL)
+		drr.drr_u.drr_end.drr_checksum = *zc;
+	if (write(fd, &drr, sizeof (drr)) == -1) {
+		return (errno);
+	}
+	return (0);
+}
+
+/*
+ * This function is responsible for sending the records that contain the
+ * necessary information for the target system's libzfs to be able to set the
+ * properties of the filesystem being received, or to be able to prepare for
+ * a recursive receive.
+ *
+ * The "zhp" argument is the handle of the snapshot we are sending
+ * (the "tosnap").  The "from" argument is the short snapshot name (the part
+ * after the @) of the incremental source.
+ */
+static int
+send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
+    boolean_t gather_props, boolean_t recursive, boolean_t verbose,
+    boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t skipmissing,
+    boolean_t backup, boolean_t holds, boolean_t props, boolean_t doall,
+    nvlist_t **fssp, avl_tree_t **fsavlp)
+{
+	int err = 0;
+	char *packbuf = NULL;
+	size_t buflen = 0;
+	zio_cksum_t zc = { {0} };
+	int featureflags = 0;
+	/* name of filesystem/volume that contains snapshot we are sending */
+	char tofs[ZFS_MAX_DATASET_NAME_LEN];
+	/* short name of snap we are sending */
+	char *tosnap = "";
+
+	char errbuf[1024];
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "warning: cannot send '%s'"), zhp->zfs_name);
+	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
+	    ZFS_PROP_VERSION) >= ZPL_VERSION_SA) {
+		featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+	}
+
+	if (holds)
+		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
+
+	(void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN);
+	char *at = strchr(tofs, '@');
+	if (at != NULL) {
+		*at = '\0';
+		tosnap = at + 1;
+	}
+
+	if (gather_props) {
+		nvlist_t *hdrnv = fnvlist_alloc();
+		nvlist_t *fss = NULL;
+
+		if (from != NULL)
+			fnvlist_add_string(hdrnv, "fromsnap", from);
+		fnvlist_add_string(hdrnv, "tosnap", tosnap);
+		if (!recursive)
+			fnvlist_add_boolean(hdrnv, "not_recursive");
+
+		if (raw) {
+			fnvlist_add_boolean(hdrnv, "raw");
+		}
+
+		if ((err = gather_nvlist(zhp->zfs_hdl, tofs,
+		    from, tosnap, recursive, raw, doall, replicate, skipmissing,
+		    verbose, backup, holds, props, &fss, fsavlp)) != 0) {
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
+			    errbuf));
+		}
+		/*
+		 * Do not allow the size of the properties list to exceed
+		 * the limit
+		 */
+		if ((fnvlist_size(fss) + fnvlist_size(hdrnv)) >
+		    zhp->zfs_hdl->libzfs_max_nvlist) {
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "warning: cannot send '%s': "
+			    "the size of the list of snapshots and properties "
+			    "is too large to be received successfully.\n"
+			    "Select a smaller number of snapshots to send.\n"),
+			    zhp->zfs_name);
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOSPC,
+			    errbuf));
+		}
+		fnvlist_add_nvlist(hdrnv, "fss", fss);
+		VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR,
+		    0));
+		if (fssp != NULL) {
+			*fssp = fss;
+		} else {
+			fnvlist_free(fss);
+		}
+		fnvlist_free(hdrnv);
+	}
+
+	if (!dryrun) {
+		dmu_replay_record_t drr = { 0 };
+		/* write first begin record */
+		drr.drr_type = DRR_BEGIN;
+		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
+		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
+		    drr_versioninfo, DMU_COMPOUNDSTREAM);
+		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
+		    drr_versioninfo, featureflags);
+		if (snprintf(drr.drr_u.drr_begin.drr_toname,
+		    sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs,
+		    tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) {
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
+			    errbuf));
+		}
+		drr.drr_payloadlen = buflen;
+
+		err = dump_record(&drr, packbuf, buflen, &zc, fd);
+		free(packbuf);
+		if (err != 0) {
+			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err));
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
+			    errbuf));
+		}
+		err = send_conclusion_record(fd, &zc);
+		if (err != 0) {
+			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err));
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
+			    errbuf));
+		}
+	}
+	return (0);
+}
+
+/*
+ * Generate a send stream.  The "zhp" argument is the filesystem/volume
+ * that contains the snapshot to send.  The "fromsnap" argument is the
+ * short name (the part after the '@') of the snapshot that is the
+ * incremental source to send from (if non-NULL).  The "tosnap" argument
+ * is the short name of the snapshot to send.
  *
  * The content of the send stream is the snapshot identified by
  * 'tosnap'.  Incremental streams are requested in two ways:
@@ -1943,10 +2170,6 @@
 	avl_tree_t *fsavl = NULL;
 	static uint64_t holdseq;
 	int spa_version;
-	pthread_t tid = 0;
-	int pipefd[2];
-	dedup_arg_t dda = { 0 };
-	int featureflags = 0;
 	FILE *fout;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -1958,140 +2181,62 @@
 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 	}
 
-	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
-		uint64_t version;
-		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
-		if (version >= ZPL_VERSION_SA) {
-			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+	if (fromsnap) {
+		char full_fromsnap_name[ZFS_MAX_DATASET_NAME_LEN];
+		if (snprintf(full_fromsnap_name, sizeof (full_fromsnap_name),
+		    "%s@%s", zhp->zfs_name, fromsnap) >=
+		    sizeof (full_fromsnap_name)) {
+			err = EINVAL;
+			goto stderr_out;
 		}
-	}
-
-	if (flags->holds)
-		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
-
-	/*
-	 * Start the dedup thread if this is a dedup stream. We do not bother
-	 * doing this if this a raw send of an encrypted dataset with dedup off
-	 * because normal encrypted blocks won't dedup.
-	 */
-	if (flags->dedup && !flags->dryrun && !(flags->raw &&
-	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
-	    zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) {
-		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
-		    DMU_BACKUP_FEATURE_DEDUPPROPS);
-		if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) {
-			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
-			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
-			    errbuf));
+		zfs_handle_t *fromsnapn = zfs_open(zhp->zfs_hdl,
+		    full_fromsnap_name, ZFS_TYPE_SNAPSHOT);
+		if (fromsnapn == NULL) {
+			err = -1;
+			goto err_out;
 		}
-		dda.outputfd = outfd;
-		dda.inputfd = pipefd[1];
-		dda.dedup_hdl = zhp->zfs_hdl;
-		if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
-			(void) close(pipefd[0]);
-			(void) close(pipefd[1]);
-			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
-			return (zfs_error(zhp->zfs_hdl,
-			    EZFS_THREADCREATEFAILED, errbuf));
-		}
+		zfs_close(fromsnapn);
 	}
 
 	if (flags->replicate || flags->doall || flags->props ||
 	    flags->holds || flags->backup) {
-		dmu_replay_record_t drr = { 0 };
-		char *packbuf = NULL;
-		size_t buflen = 0;
-		zio_cksum_t zc;
-
-		ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
-
-		if (flags->replicate || flags->props || flags->backup ||
-		    flags->holds) {
-			nvlist_t *hdrnv;
-
-			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
-			if (fromsnap) {
-				VERIFY(0 == nvlist_add_string(hdrnv,
-				    "fromsnap", fromsnap));
-			}
-			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
-			if (!flags->replicate) {
-				VERIFY(0 == nvlist_add_boolean(hdrnv,
-				    "not_recursive"));
-			}
-			if (flags->raw) {
-				VERIFY(0 == nvlist_add_boolean(hdrnv, "raw"));
-			}
-
-			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
-			    fromsnap, tosnap, flags->replicate, flags->raw,
-			    flags->doall, flags->replicate, flags->verbose,
-			    flags->backup, flags->holds, flags->props, &fss,
-			    &fsavl);
-			if (err)
-				goto err_out;
-			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
-			err = nvlist_pack(hdrnv, &packbuf, &buflen,
-			    NV_ENCODE_XDR, 0);
-			if (debugnvp)
-				*debugnvp = hdrnv;
-			else
-				nvlist_free(hdrnv);
-			if (err)
-				goto stderr_out;
+		char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN];
+		if (snprintf(full_tosnap_name, sizeof (full_tosnap_name),
+		    "%s@%s", zhp->zfs_name, tosnap) >=
+		    sizeof (full_tosnap_name)) {
+			err = EINVAL;
+			goto stderr_out;
 		}
-
-		if (!flags->dryrun) {
-			/* write first begin record */
-			drr.drr_type = DRR_BEGIN;
-			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
-			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
-			    drr_versioninfo, DMU_COMPOUNDSTREAM);
-			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
-			    drr_versioninfo, featureflags);
-			if (snprintf(drr.drr_u.drr_begin.drr_toname,
-			    sizeof (drr.drr_u.drr_begin.drr_toname),
-			    "%s@%s", zhp->zfs_name, tosnap) >=
-			    sizeof (drr.drr_u.drr_begin.drr_toname)) {
-				err = EINVAL;
-				goto stderr_out;
-			}
-			drr.drr_payloadlen = buflen;
-
-			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
-			free(packbuf);
-			if (err != 0)
-				goto stderr_out;
-
-			/* write end record */
-			bzero(&drr, sizeof (drr));
-			drr.drr_type = DRR_END;
-			drr.drr_u.drr_end.drr_checksum = zc;
-			err = write(outfd, &drr, sizeof (drr));
-			if (err == -1) {
-				err = errno;
-				goto stderr_out;
-			}
-
-			err = 0;
+		zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl,
+		    full_tosnap_name, ZFS_TYPE_SNAPSHOT);
+		if (tosnap == NULL) {
+			err = -1;
+			goto err_out;
 		}
+		err = send_prelim_records(tosnap, fromsnap, outfd,
+		    flags->replicate || flags->props || flags->holds,
+		    flags->replicate, flags->verbosity > 0, flags->dryrun,
+		    flags->raw, flags->replicate, flags->skipmissing,
+		    flags->backup, flags->holds, flags->props, flags->doall,
+		    &fss, &fsavl);
+		zfs_close(tosnap);
+		if (err != 0)
+			goto err_out;
 	}
 
 	/* dump each stream */
 	sdd.fromsnap = fromsnap;
 	sdd.tosnap = tosnap;
-	if (tid != 0)
-		sdd.outfd = pipefd[0];
-	else
-		sdd.outfd = outfd;
+	sdd.outfd = outfd;
 	sdd.replicate = flags->replicate;
 	sdd.doall = flags->doall;
 	sdd.fromorigin = flags->fromorigin;
 	sdd.fss = fss;
 	sdd.fsavl = fsavl;
-	sdd.verbose = flags->verbose;
+	sdd.verbosity = flags->verbosity;
 	sdd.parsable = flags->parsable;
 	sdd.progress = flags->progress;
+	sdd.progressastitle = flags->progressastitle;
 	sdd.dryrun = flags->dryrun;
 	sdd.large_block = flags->largeblock;
 	sdd.embed_data = flags->embed_data;
@@ -2103,7 +2248,7 @@
 	sdd.filter_cb_arg = cb_arg;
 	if (debugnvp)
 		sdd.debugnv = *debugnvp;
-	if (sdd.verbose && sdd.dryrun)
+	if (sdd.verbosity != 0 && sdd.dryrun)
 		sdd.std_out = B_TRUE;
 	fout = sdd.std_out ? stdout : stderr;
 
@@ -2121,7 +2266,7 @@
 		++holdseq;
 		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
 		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
-		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
+		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
 		if (sdd.cleanup_fd < 0) {
 			err = errno;
 			goto stderr_out;
@@ -2132,7 +2277,7 @@
 		sdd.snapholds = NULL;
 	}
 
-	if (flags->verbose || sdd.snapholds != NULL) {
+	if (flags->verbosity != 0 || sdd.snapholds != NULL) {
 		/*
 		 * Do a verbose no-op dry run to get all the verbose output
 		 * or to gather snapshot hold's before generating any data,
@@ -2144,7 +2289,7 @@
 		if (err != 0)
 			goto stderr_out;
 
-		if (flags->verbose) {
+		if (flags->verbosity != 0) {
 			if (flags->parsable) {
 				(void) fprintf(fout, "size\t%llu\n",
 				    (longlong_t)sdd.size);
@@ -2176,24 +2321,17 @@
 		}
 
 		sdd.dryrun = B_FALSE;
-		sdd.verbose = B_FALSE;
+		sdd.verbosity = 0;
 	}
 
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
-	nvlist_free(fss);
+	fnvlist_free(fss);
 
 	/* Ensure no snaps found is treated as an error. */
 	if (err == 0 && !sdd.seento)
 		err = ENOENT;
 
-	if (tid != 0) {
-		if (err != 0)
-			(void) pthread_cancel(tid);
-		(void) close(pipefd[0]);
-		(void) pthread_join(tid, NULL);
-	}
-
 	if (sdd.cleanup_fd != -1) {
 		VERIFY(0 == close(sdd.cleanup_fd));
 		sdd.cleanup_fd = -1;
@@ -2206,12 +2344,9 @@
 		 * there was some error, because it might not be totally
 		 * failed.
 		 */
-		dmu_replay_record_t drr = { 0 };
-		drr.drr_type = DRR_END;
-		if (write(outfd, &drr, sizeof (drr)) == -1) {
-			return (zfs_standard_error(zhp->zfs_hdl,
-			    errno, errbuf));
-		}
+		err = send_conclusion_record(outfd, NULL);
+		if (err != 0)
+			return (zfs_standard_error(zhp->zfs_hdl, err, errbuf));
 	}
 
 	return (err || sdd.err);
@@ -2220,56 +2355,239 @@
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
 	fsavl_destroy(fsavl);
-	nvlist_free(fss);
+	fnvlist_free(fss);
 	fnvlist_free(sdd.snapholds);
 
 	if (sdd.cleanup_fd != -1)
 		VERIFY(0 == close(sdd.cleanup_fd));
-	if (tid != 0) {
-		(void) pthread_cancel(tid);
-		(void) close(pipefd[0]);
-		(void) pthread_join(tid, NULL);
-	}
 	return (err);
 }
 
-int
-zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags)
+static zfs_handle_t *
+name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
 {
-	int err = 0;
+	char dirname[ZFS_MAX_DATASET_NAME_LEN];
+	(void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN);
+	char *c = strchr(dirname, '@');
+	if (c != NULL)
+		*c = '\0';
+	return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET));
+}
+
+/*
+ * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either
+ * an earlier snapshot in the same filesystem, or a snapshot before later's
+ * origin, or it's origin's origin, etc.
+ */
+static boolean_t
+snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
+{
+	boolean_t ret;
+	uint64_t later_txg =
+	    (later->zfs_type == ZFS_TYPE_FILESYSTEM ||
+	    later->zfs_type == ZFS_TYPE_VOLUME ?
+	    UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG));
+	uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG);
+
+	if (earlier_txg >= later_txg)
+		return (B_FALSE);
+
+	zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl,
+	    earlier->zfs_name);
+	zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl,
+	    later->zfs_name);
+
+	if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) {
+		zfs_close(earlier_dir);
+		zfs_close(later_dir);
+		return (B_TRUE);
+	}
+
+	char clonename[ZFS_MAX_DATASET_NAME_LEN];
+	if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename,
+	    ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) {
+		zfs_close(earlier_dir);
+		zfs_close(later_dir);
+		return (B_FALSE);
+	}
+
+	zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename,
+	    ZFS_TYPE_DATASET);
+	uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG);
+
+	/*
+	 * If "earlier" is exactly the origin, then
+	 * snapshot_is_before(earlier, origin) will return false (because
+	 * they're the same).
+	 */
+	if (origin_txg == earlier_txg &&
+	    strcmp(origin->zfs_name, earlier->zfs_name) == 0) {
+		zfs_close(earlier_dir);
+		zfs_close(later_dir);
+		zfs_close(origin);
+		return (B_TRUE);
+	}
+	zfs_close(earlier_dir);
+	zfs_close(later_dir);
+
+	ret = snapshot_is_before(earlier, origin);
+	zfs_close(origin);
+	return (ret);
+}
+
+/*
+ * The "zhp" argument is the handle of the dataset to send (typically a
+ * snapshot).  The "from" argument is the full name of the snapshot or
+ * bookmark that is the incremental source.
+ */
+int
+zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
+    const char *redactbook)
+{
+	int err;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	enum lzc_send_flags lzc_flags = 0;
-	FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr;
+	char *name = zhp->zfs_name;
+	pthread_t ptid;
+	progress_arg_t pa = { 0 };
+	uint64_t size = 0;
+
 	char errbuf[1024];
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "warning: cannot send '%s'"), name);
 
-	if (flags.largeblock)
-		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
-	if (flags.embed_data)
-		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
-	if (flags.compress)
-		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
-	if (flags.raw)
-		lzc_flags |= LZC_SEND_FLAG_RAW;
+	if (from != NULL && strchr(from, '@')) {
+		zfs_handle_t *from_zhp = zfs_open(hdl, from,
+		    ZFS_TYPE_DATASET);
+		if (from_zhp == NULL)
+			return (-1);
+		if (!snapshot_is_before(from_zhp, zhp)) {
+			zfs_close(from_zhp);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not an earlier snapshot from the same fs"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+		}
+		zfs_close(from_zhp);
+	}
 
-	if (flags.verbose) {
-		uint64_t size = 0;
-		err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size);
-		if (err == 0) {
-			send_print_verbose(fout, zhp->zfs_name, from, size,
-			    flags.parsable);
-		} else {
-			(void) fprintf(stderr, "Cannot estimate send size: "
-			    "%s\n", strerror(errno));
+	if (redactbook != NULL) {
+		char bookname[ZFS_MAX_DATASET_NAME_LEN];
+		nvlist_t *redact_snaps;
+		zfs_handle_t *book_zhp;
+		char *at, *pound;
+		int dsnamelen;
+
+		pound = strchr(redactbook, '#');
+		if (pound != NULL)
+			redactbook = pound + 1;
+		at = strchr(name, '@');
+		if (at == NULL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cannot do a redacted send to a filesystem"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		}
+		dsnamelen = at - name;
+		if (snprintf(bookname, sizeof (bookname), "%.*s#%s",
+		    dsnamelen, name, redactbook)
+		    >= sizeof (bookname)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid bookmark name"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+		book_zhp = zfs_open(hdl, bookname, ZFS_TYPE_BOOKMARK);
+		if (book_zhp == NULL)
+			return (-1);
+		if (nvlist_lookup_nvlist(book_zhp->zfs_props,
+		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
+		    &redact_snaps) != 0 || redact_snaps == NULL) {
+			zfs_close(book_zhp);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not a redaction bookmark"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		}
+		zfs_close(book_zhp);
+	}
+
+	// MLW - Uncomment to test flag continuity. 
+	// fprintf(stderr, "flags->block_diff) %d", flags->block_diff);
+
+	/*
+	 * Send fs properties
+	 */
+	if (flags->props || flags->holds || flags->backup ) {
+		/*
+		 * Note: the header generated by send_prelim_records()
+		 * assumes that the incremental source is in the same
+		 * filesystem/volume as the target (which is a requirement
+		 * when doing "zfs send -R").  But that isn't always the
+		 * case here (e.g. send from snap in origin, or send from
+		 * bookmark).  We pass from=NULL, which will omit this
+		 * information from the prelim records; it isn't used
+		 * when receiving this type of stream.
+		 */
+		err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE,
+		    flags->verbosity > 0, flags->dryrun, flags->raw,
+		    flags->replicate, B_FALSE, flags->backup, flags->holds,
+		    flags->props, flags->doall, NULL, NULL);
+		if (err != 0)
+			return (err);
+	}
+
+	/*
+	 * Perform size estimate if verbose was specified.
+	 */
+	if (flags->verbosity != 0 || flags->progressastitle) {
+		err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook,
+		    errbuf, &size);
+		if (err != 0)
+			return (err);
+	}
+
+	if (flags->dryrun)
+		return (0);
+
+	/*
+	 * If progress reporting is requested, spawn a new thread to poll
+	 * ZFS_IOC_SEND_PROGRESS at a regular interval.
+	 */
+	if (flags->progress || flags->progressastitle) {
+		pa.pa_zhp = zhp;
+		pa.pa_fd = fd;
+		pa.pa_parsable = flags->parsable;
+		pa.pa_estimate = B_FALSE;
+		pa.pa_verbosity = flags->verbosity;
+		pa.pa_size = size;
+		pa.pa_astitle = flags->progressastitle;
+		pa.pa_progress = flags->progress;
+
+		err = pthread_create(&ptid, NULL,
+		    send_progress_thread, &pa);
+		if (err != 0) {
+			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_THREADCREATEFAILED, errbuf));
 		}
 	}
 
-	if (flags.dryrun)
-		return (err);
+	err = lzc_send_redacted(name, from, fd,
+	    lzc_flags_from_sendflags(flags), redactbook);
 
-	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "warning: cannot send '%s'"), zhp->zfs_name);
+	if (flags->progress || flags->progressastitle) {
+		void *status = NULL;
+		(void) pthread_cancel(ptid);
+		(void) pthread_join(ptid, &status);
+		int error = (int)(uintptr_t)status;
+		if (error != 0 && status != PTHREAD_CANCELED)
+			return (zfs_standard_error_fmt(hdl, error,
+			    dgettext(TEXT_DOMAIN,
+			    "progress thread exited nonzero")));
+	}
 
-	err = lzc_send(zhp->zfs_name, from, fd, lzc_flags);
+	if (err == 0 && (flags->props || flags->holds || flags->backup || flags->block_diff)) {
+		/* Write the final end record. */
+		err = send_conclusion_record(fd, NULL);
+		if (err != 0)
+			return (zfs_standard_error(hdl, err, errbuf));
+	}
 	if (err != 0) {
 		switch (errno) {
 		case EXDEV:
@@ -2279,7 +2597,7 @@
 
 		case ENOENT:
 		case ESRCH:
-			if (lzc_exists(zhp->zfs_name)) {
+			if (lzc_exists(name)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "incremental source (%s) does not exist"),
 				    from);
@@ -2298,7 +2616,9 @@
 			return (zfs_error(hdl, EZFS_BUSY, errbuf));
 
 		case EDQUOT:
+		case EFAULT:
 		case EFBIG:
+		case EINVAL:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
@@ -2306,9 +2626,8 @@
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
-		case EFAULT:
 		case EROFS:
-			zfs_error_aux(hdl, strerror(errno));
+			zfs_error_aux(hdl, "%s", strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
@@ -2330,8 +2649,6 @@
 	int rv;
 	int len = ilen;
 
-	assert(ilen <= SPA_MAXBLOCKSIZE);
-
 	do {
 		rv = read(fd, cp, len);
 		cp += rv;
@@ -2365,6 +2682,12 @@
 	if (buf == NULL)
 		return (ENOMEM);
 
+	if (len > hdl->libzfs_max_nvlist) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "nvlist too large"));
+		free(buf);
+		return (ENOMEM);
+	}
+
 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
 	if (err != 0) {
 		free(buf);
@@ -2623,8 +2946,38 @@
 	boolean_t bookmark_ok;
 	char *name;
 	char *skip;
+	uint64_t *redact_snap_guids;
+	uint64_t num_redact_snaps;
 } guid_to_name_data_t;
 
+static boolean_t
+redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd)
+{
+	uint64_t *bmark_snaps;
+	uint_t bmark_num_snaps;
+	nvlist_t *nvl;
+	if (zhp->zfs_type != ZFS_TYPE_BOOKMARK)
+		return (B_FALSE);
+
+	nvl = fnvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
+	bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE,
+	    &bmark_num_snaps);
+	if (bmark_num_snaps != gtnd->num_redact_snaps)
+		return (B_FALSE);
+	int i = 0;
+	for (; i < bmark_num_snaps; i++) {
+		int j = 0;
+		for (; j < bmark_num_snaps; j++) {
+			if (bmark_snaps[i] == gtnd->redact_snap_guids[j])
+				break;
+		}
+		if (j == bmark_num_snaps)
+			break;
+	}
+	return (i == bmark_num_snaps);
+}
+
 static int
 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
 {
@@ -2639,7 +2992,8 @@
 		return (0);
 	}
 
-	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
+	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid &&
+	    (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) {
 		(void) strcpy(gtnd->name, zhp->zfs_name);
 		zfs_close(zhp);
 		return (EEXIST);
@@ -2658,10 +3012,19 @@
  * progressively larger portions of the hierarchy.  This allows one to send a
  * tree of datasets individually and guarantee that we will find the source
  * guid within that hierarchy, even if there are multiple matches elsewhere.
+ *
+ * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with
+ * the specified number of redaction snapshots.  If num_redact_snaps isn't 0 or
+ * -1, then redact_snap_guids will be an array of the guids of the snapshots the
+ * redaction bookmark was created with.  If num_redact_snaps is -1, then we will
+ * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the
+ * given guid.  Note that a redaction bookmark can be returned if
+ * num_redact_snaps == -1.
  */
 static int
-guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
-    boolean_t bookmark_ok, char *name)
+guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
+    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
+    uint64_t num_redact_snaps, char *name)
 {
 	char pname[ZFS_MAX_DATASET_NAME_LEN];
 	guid_to_name_data_t gtnd;
@@ -2670,6 +3033,8 @@
 	gtnd.bookmark_ok = bookmark_ok;
 	gtnd.name = name;
 	gtnd.skip = NULL;
+	gtnd.redact_snap_guids = redact_snap_guids;
+	gtnd.num_redact_snaps = num_redact_snaps;
 
 	/*
 	 * Search progressively larger portions of the hierarchy, starting
@@ -2708,6 +3073,14 @@
 	return (ENOENT);
 }
 
+static int
+guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
+    boolean_t bookmark_ok, char *name)
+{
+	return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL,
+	    -1, name));
+}
+
 /*
  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
  * guid1 is after guid2.
@@ -2729,14 +3102,14 @@
 		return (1);
 
 	nvfs = fsavl_find(avl, guid1, &snapname);
-	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	fsname = fnvlist_lookup_string(nvfs, "name");
 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
 	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
 	if (guid1hdl == NULL)
 		return (-1);
 
 	nvfs = fsavl_find(avl, guid2, &snapname);
-	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	fsname = fnvlist_lookup_string(nvfs, "name");
 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
 	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
 	if (guid2hdl == NULL) {
@@ -2767,21 +3140,14 @@
  * sent datasets to their final locations in the dataset hierarchy.
  */
 static int
-recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname,
+recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs,
     nvlist_t *stream_nv, avl_tree_t *stream_avl)
 {
 	int err;
 	nvpair_t *fselem = NULL;
 	nvlist_t *stream_fss;
-	char *cp;
-	char top_zfs[ZFS_MAX_DATASET_NAME_LEN];
 
-	(void) strcpy(top_zfs, destname);
-	cp = strrchr(top_zfs, '@');
-	if (cp != NULL)
-		*cp = '\0';
-
-	VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss));
+	stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
 
 	while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
 		zfs_handle_t *zhp = NULL;
@@ -2795,9 +3161,9 @@
 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 
 		keylocation[0] = '\0';
-		VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs));
-		VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps));
-		VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props));
+		stream_nvfs = fnvpair_value_nvlist(fselem);
+		snaps = fnvlist_lookup_nvlist(stream_nvfs, "snaps");
+		props = fnvlist_lookup_nvlist(stream_nvfs, "props");
 		stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
 
 		/* find a snapshot from the stream that exists locally */
@@ -2805,8 +3171,8 @@
 		while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
 			uint64_t guid;
 
-			VERIFY(0 == nvpair_value_uint64(snapel, &guid));
-			err = guid_to_name(hdl, destname, guid, B_FALSE,
+			guid = fnvpair_value_uint64(snapel);
+			err = guid_to_name(hdl, top_zfs, guid, B_FALSE,
 			    fsname);
 			if (err == 0)
 				break;
@@ -2850,9 +3216,8 @@
 				}
 			}
 
-			VERIFY(0 == nvlist_lookup_string(props,
-			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
-			    &stream_keylocation));
+			stream_keylocation = fnvlist_lookup_string(props,
+			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
 
 			/*
 			 * Refresh the properties in case the call to
@@ -2918,7 +3283,7 @@
 	boolean_t needagain, progress, recursive;
 	char *s1, *s2;
 
-	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
+	fromsnap = fnvlist_lookup_string(stream_nv, "fromsnap");
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
@@ -2929,10 +3294,10 @@
 again:
 	needagain = progress = B_FALSE;
 
-	VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
+	deleted = fnvlist_alloc();
 
 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
-	    recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE,
+	    recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE, B_FALSE,
 	    B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
 		return (error);
 
@@ -2952,11 +3317,11 @@
 
 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
 
-		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
-		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
-		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
-		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
-		    &parent_fromsnap_guid));
+		nvfs = fnvpair_value_nvlist(fselem);
+		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
+		fsname = fnvlist_lookup_string(nvfs, "name");
+		parent_fromsnap_guid = fnvlist_lookup_uint64(nvfs,
+		    "parentfromsnap");
 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
 
 		/*
@@ -2967,7 +3332,7 @@
 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
 			uint64_t thisguid;
 
-			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			thisguid = fnvpair_value_uint64(snapelem);
 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
 
 			if (stream_nvfs != NULL)
@@ -2987,8 +3352,8 @@
 
 				origin_nvfs = fsavl_find(local_avl, originguid,
 				    NULL);
-				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
-				    "name", &origin_fsname));
+				origin_fsname = fnvlist_lookup_string(
+				    origin_nvfs, "name");
 				error = recv_promote(hdl, fsname, origin_fsname,
 				    flags);
 				if (error == 0)
@@ -2999,7 +3364,7 @@
 				break;
 			case -1:
 				fsavl_destroy(local_avl);
-				nvlist_free(local_nv);
+				fnvlist_free(local_nv);
 				return (-1);
 			}
 			/*
@@ -3019,7 +3384,7 @@
 
 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
 
-			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			thisguid = fnvpair_value_uint64(snapelem);
 			found = fsavl_find(stream_avl, thisguid,
 			    &stream_snapname);
 
@@ -3113,10 +3478,9 @@
 			continue;
 		}
 
-		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
-		    "name", &stream_fsname));
-		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
-		    "parentfromsnap", &stream_parent_fromsnap_guid));
+		stream_fsname = fnvlist_lookup_string(stream_nvfs, "name");
+		stream_parent_fromsnap_guid = fnvlist_lookup_uint64(
+		    stream_nvfs, "parentfromsnap");
 
 		s1 = strrchr(fsname, '/');
 		s2 = strrchr(stream_fsname, '/');
@@ -3165,8 +3529,7 @@
 			if (parent != NULL) {
 				char *pname;
 
-				VERIFY(0 == nvlist_lookup_string(parent, "name",
-				    &pname));
+				pname = fnvlist_lookup_string(parent, "name");
 				(void) snprintf(tryname, sizeof (tryname),
 				    "%s%s", pname, strrchr(stream_fsname, '/'));
 			} else {
@@ -3183,8 +3546,7 @@
 			    strlen(tofs)+1, newname, flags);
 
 			if (renamed != NULL && newname[0] != '\0') {
-				VERIFY(0 == nvlist_add_boolean(renamed,
-				    newname));
+				fnvlist_add_boolean(renamed, newname);
 			}
 
 			if (error)
@@ -3196,8 +3558,8 @@
 
 doagain:
 	fsavl_destroy(local_avl);
-	nvlist_free(local_nv);
-	nvlist_free(deleted);
+	fnvlist_free(local_nv);
+	fnvlist_free(deleted);
 
 	if (needagain && progress) {
 		/* do another pass to fix up temporary names */
@@ -3212,8 +3574,7 @@
 static int
 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
-    char **top_zfs, int cleanup_fd, uint64_t *action_handlep,
-    nvlist_t *cmdprops)
+    char **top_zfs, nvlist_t *cmdprops)
 {
 	nvlist_t *stream_nv = NULL;
 	avl_tree_t *stream_avl = NULL;
@@ -3293,8 +3654,7 @@
 	if (drr->drr_payloadlen != 0) {
 		nvlist_t *stream_fss;
 
-		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
-		    &stream_fss));
+		stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "couldn't allocate avl tree"));
@@ -3330,8 +3690,7 @@
 			}
 
 			if (!flags->dryrun && !flags->nomount) {
-				VERIFY(0 == nvlist_alloc(&renamed,
-				    NV_UNIQUE_NAME, 0));
+				renamed = fnvlist_alloc();
 			}
 
 			softerr = recv_incremental_replication(hdl, tofs, flags,
@@ -3347,7 +3706,8 @@
 				    ZFS_TYPE_FILESYSTEM);
 				if (zhp != NULL) {
 					clp = changelist_gather(zhp,
-					    ZFS_PROP_MOUNTPOINT, 0, 0);
+					    ZFS_PROP_MOUNTPOINT, 0,
+					    flags->forceunmount ? MS_FORCE : 0);
 					zfs_close(zhp);
 					if (clp != NULL) {
 						softerr |=
@@ -3357,7 +3717,7 @@
 				}
 			}
 
-			nvlist_free(renamed);
+			fnvlist_free(renamed);
 		}
 	}
 
@@ -3389,8 +3749,7 @@
 		 * recv_skip() and return 0).
 		 */
 		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
-		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
-		    action_handlep, sendsnap, cmdprops);
+		    sendfs, stream_nv, stream_avl, top_zfs, sendsnap, cmdprops);
 		if (error == ENODATA) {
 			error = 0;
 			break;
@@ -3407,14 +3766,14 @@
 		    stream_nv, stream_avl, NULL);
 	}
 
-	if (raw && softerr == 0) {
-		softerr = recv_fix_encryption_hierarchy(hdl, destname,
+	if (raw && softerr == 0 && *top_zfs != NULL) {
+		softerr = recv_fix_encryption_hierarchy(hdl, *top_zfs,
 		    stream_nv, stream_avl);
 	}
 
 out:
 	fsavl_destroy(stream_avl);
-	nvlist_free(stream_nv);
+	fnvlist_free(stream_nv);
 	if (softerr)
 		error = -2;
 	if (anyerr)
@@ -3493,6 +3852,7 @@
 			}
 			payload_size =
 			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
+			assert(payload_size <= SPA_MAXBLOCKSIZE);
 			(void) recv_read(hdl, fd, buf,
 			    payload_size, B_FALSE, NULL);
 			break;
@@ -3540,12 +3900,12 @@
 
 static void
 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
-    boolean_t resumable)
+    boolean_t resumable, boolean_t checksum)
 {
 	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
 
-	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-	    "checksum mismatch or incomplete stream"));
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, (checksum ?
+	    "checksum mismatch" : "incomplete stream")));
 
 	if (!resumable)
 		return;
@@ -3642,24 +4002,6 @@
 		if (prop == ZFS_PROP_ORIGIN)
 			continue;
 
-		/*
-		 * we're trying to override or exclude a property that does not
-		 * make sense for this type of dataset, but we don't want to
-		 * fail if the receive is recursive: this comes in handy when
-		 * the send stream contains, for instance, a child ZVOL and
-		 * we're trying to receive it with "-o atime=on"
-		 */
-		if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
-		    !zfs_prop_user(name)) {
-			if (recursive)
-				continue;
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "property '%s' does not apply to datasets of this "
-			    "type"), name);
-			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
-			goto error;
-		}
-
 		/* raw streams can't override encryption properties */
 		if ((zfs_prop_encryption_key_param(prop) ||
 		    prop == ZFS_PROP_ENCRYPTION) && raw) {
@@ -3670,6 +4012,15 @@
 			goto error;
 		}
 
+		/*
+		 * For plain replicated send, we can ignore encryption
+		 * properties other than first stream
+		 */
+		if ((zfs_prop_encryption_key_param(prop) || prop ==
+		    ZFS_PROP_ENCRYPTION) && !newfs && recursive && !raw) {
+			continue;
+		}
+
 		/* incremental streams can only exclude encryption properties */
 		if ((zfs_prop_encryption_key_param(prop) ||
 		    prop == ZFS_PROP_ENCRYPTION) && !newfs &&
@@ -3688,6 +4039,16 @@
 			 * a property: this is done by forcing an explicit
 			 * inherit on the destination so the effective value is
 			 * not the one we received from the send stream.
+			 */
+			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
+			    !zfs_prop_user(name)) {
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "Warning: %s: property '%s' does not "
+				    "apply to datasets of this type\n"),
+				    fsname, name);
+				continue;
+			}
+			/*
 			 * We do this only if the property is not already
 			 * locally-set, in which case its value will take
 			 * priority over the received anyway.
@@ -3707,14 +4068,32 @@
 			 * properties: if we're asked to exclude this kind of
 			 * values we remove them from "recvprops" input nvlist.
 			 */
-			if (!zfs_prop_inheritable(prop) &&
-			    !zfs_prop_user(name) && /* can be inherited too */
+			if (!zfs_prop_user(name) && /* can be inherited too */
+			    !zfs_prop_inheritable(prop) &&
 			    nvlist_exists(recvprops, name))
 				fnvlist_remove(recvprops, name);
 			else
 				fnvlist_add_nvpair(*oxprops, nvp);
 			break;
 		case DATA_TYPE_STRING: /* -o property=value */
+			/*
+			 * we're trying to override a property that does not
+			 * make sense for this type of dataset, but we don't
+			 * want to fail if the receive is recursive: this comes
+			 * in handy when the send stream contains, for
+			 * instance, a child ZVOL and we're trying to receive
+			 * it with "-o atime=on"
+			 */
+			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
+			    !zfs_prop_user(name)) {
+				if (recursive)
+					continue;
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' does not apply to datasets "
+				    "of this type"), name);
+				ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
 			fnvlist_add_nvpair(oprops, nvp);
 			break;
 		default:
@@ -3741,7 +4120,8 @@
 		if (cp != NULL)
 			*cp = '\0';
 
-		if (!raw && zfs_crypto_create(hdl, namebuf, voprops, NULL,
+		if (!raw && !(!newfs && recursive) &&
+		    zfs_crypto_create(hdl, namebuf, voprops, NULL,
 		    B_FALSE, wkeydata_out, wkeylen_out) != 0) {
 			fnvlist_free(voprops);
 			ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
@@ -3776,8 +4156,8 @@
 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
     const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
     dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
-    avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
-    uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
+    avl_tree_t *stream_avl, char **top_zfs,
+    const char *finalsnap, nvlist_t *cmdprops)
 {
 	time_t begin_time;
 	int ioctl_err, ioctl_errno, err;
@@ -3786,7 +4166,7 @@
 	char errbuf[1024];
 	const char *chopprefix;
 	boolean_t newfs = B_FALSE;
-	boolean_t stream_wantsnewfs;
+	boolean_t stream_wantsnewfs, stream_resumingnewfs;
 	boolean_t newprops = B_FALSE;
 	uint64_t read_bytes = 0;
 	uint64_t errflags = 0;
@@ -3809,6 +4189,7 @@
 	boolean_t toplevel = B_FALSE;
 	boolean_t zoned = B_FALSE;
 	boolean_t hastoken = B_FALSE;
+	boolean_t redacted;
 	uint8_t *wkeydata = NULL;
 	uint_t wkeylen = 0;
 
@@ -3835,7 +4216,7 @@
 		    &parent_snapguid);
 		err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
 		if (err) {
-			VERIFY(0 == nvlist_alloc(&rcvprops, NV_UNIQUE_NAME, 0));
+			rcvprops = fnvlist_alloc();
 			newprops = B_TRUE;
 		}
 
@@ -3856,22 +4237,22 @@
 		}
 
 		if (flags->canmountoff) {
-			VERIFY(0 == nvlist_add_uint64(rcvprops,
-			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
+			fnvlist_add_uint64(rcvprops,
+			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0);
 		} else if (newprops) {	/* nothing in rcvprops, eliminate it */
-			nvlist_free(rcvprops);
+			fnvlist_free(rcvprops);
 			rcvprops = NULL;
 			newprops = B_FALSE;
 		}
 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
-			VERIFY(0 == nvlist_lookup_nvlist(lookup,
-			    snapname, &snapprops_nvlist));
+			snapprops_nvlist = fnvlist_lookup_nvlist(lookup,
+			    snapname);
 		}
 		if (holds) {
 			if (0 == nvlist_lookup_nvlist(fs, "snapholds",
 			    &lookup)) {
-				VERIFY(0 == nvlist_lookup_nvlist(lookup,
-				    snapname, &snapholds_nvlist));
+				snapholds_nvlist = fnvlist_lookup_nvlist(
+				    lookup, snapname);
 			}
 		}
 	}
@@ -3986,24 +4367,16 @@
 			(void) printf("found clone origin %s\n", origin);
 	}
 
-	if (!hdl->libzfs_dedup_warning_printed &&
-	    (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
 	    DMU_BACKUP_FEATURE_DEDUP)) {
 		(void) fprintf(stderr,
-		    gettext("WARNING: This is a deduplicated send stream.  "
-		    "The ability to send and\n"
-		    "receive deduplicated send streams is deprecated.  "
-		    "In the future, the\n"
-		    "ability to receive a deduplicated send stream with "
-		    "\"zfs receive\" will be\n"
-		    "removed. However, in the future, a utility will be "
-		    "provided to convert a\n"
-		    "deduplicated send stream to a regular "
-		    "(non-deduplicated) stream. This\n"
-		    "future utility will require that the send stream be "
-		    "located in a\n"
-		    "seek-able file, rather than provided by a pipe.\n\n"));
-		hdl->libzfs_dedup_warning_printed = B_TRUE;
+		    gettext("ERROR: \"zfs receive\" no longer supports "
+		    "deduplicated send streams.  Use\n"
+		    "the \"zstream redup\" command to convert this stream "
+		    "to a regular,\n"
+		    "non-deduplicated stream.\n"));
+		err = zfs_error(hdl, EZFS_NOTSUP, errbuf);
+		goto out;
 	}
 
 	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
@@ -4014,6 +4387,8 @@
 	    DMU_BACKUP_FEATURE_EMBED_DATA;
 	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
 	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
+	stream_resumingnewfs = (drrb->drr_fromguid == 0 ||
+	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming;
 
 	if (stream_wantsnewfs) {
 		/*
@@ -4076,6 +4451,9 @@
 	(void) strcpy(name, destsnap);
 	*strchr(name, '@') = '\0';
 
+	redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+	    DMU_BACKUP_FEATURE_REDACTED;
+
 	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
 		zfs_cmd_t zc = {"\0"};
 		zfs_handle_t *zhp;
@@ -4100,7 +4478,7 @@
 				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
 				goto out;
 			}
-			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+			if (zfs_ioctl(hdl, ZFS_IOC_SNAPSHOT_LIST_NEXT,
 			    &zc) == 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "destination has snapshots (eg. %s)\n"
@@ -4118,7 +4496,7 @@
 				goto out;
 			}
 			if (is_volume &&
-			    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT,
+			    zfs_ioctl(hdl, ZFS_IOC_DATASET_LIST_NEXT,
 			    &zc) == 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "destination has children (eg. %s)\n"
@@ -4178,9 +4556,10 @@
 		}
 
 		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
-		    stream_wantsnewfs) {
+		    (stream_wantsnewfs || stream_resumingnewfs)) {
 			/* We can't do online recv in this case */
-			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
+			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+			    flags->forceunmount ? MS_FORCE : 0);
 			if (clp == NULL) {
 				zfs_close(zhp);
 				err = -1;
@@ -4278,28 +4657,17 @@
 		(void) fflush(stdout);
 	}
 
-	if (flags->dryrun) {
-		void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
-
-		/*
-		 * We have read the DRR_BEGIN record, but we have
-		 * not yet read the payload. For non-dryrun sends
-		 * this will be done by the kernel, so we must
-		 * emulate that here, before attempting to read
-		 * more records.
-		 */
-		err = recv_read(hdl, infd, buf, drr->drr_payloadlen,
-		    flags->byteswap, NULL);
-		free(buf);
-		if (err != 0)
-			goto out;
-
-		err = recv_skip(hdl, infd, flags->byteswap);
-		goto out;
+	/*
+	 * If this is the top-level dataset, record it so we can use it
+	 * for recursive operations later.
+	 */
+	if (top_zfs != NULL &&
+	    (*top_zfs == NULL || strcmp(*top_zfs, name) == 0)) {
+		toplevel = B_TRUE;
+		if (*top_zfs == NULL)
+			*top_zfs = zfs_strdup(hdl, name);
 	}
 
-	if (top_zfs && (*top_zfs == NULL || strcmp(*top_zfs, name) == 0))
-		toplevel = B_TRUE;
 	if (drrb->drr_type == DMU_OST_ZVOL) {
 		type = ZFS_TYPE_VOLUME;
 	} else if (drrb->drr_type == DMU_OST_ZFS) {
@@ -4333,10 +4701,30 @@
 		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
 	}
 
+	if (flags->dryrun) {
+		void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
+
+		/*
+		 * We have read the DRR_BEGIN record, but we have
+		 * not yet read the payload. For non-dryrun sends
+		 * this will be done by the kernel, so we must
+		 * emulate that here, before attempting to read
+		 * more records.
+		 */
+		err = recv_read(hdl, infd, buf, drr->drr_payloadlen,
+		    flags->byteswap, NULL);
+		free(buf);
+		if (err != 0)
+			goto out;
+
+		err = recv_skip(hdl, infd, flags->byteswap);
+		goto out;
+	}
+
 	err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
 	    oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
-	    raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags,
-	    action_handlep, &prop_errors);
+	    raw, infd, drr_noswap, -1, &read_bytes, &errflags,
+	    NULL, &prop_errors);
 	ioctl_errno = ioctl_err;
 	prop_errflags = errflags;
 
@@ -4398,12 +4786,11 @@
 		for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
 		    pair != NULL;
 		    pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
-			VERIFY(0 == nvlist_add_string(holds, destsnap,
-			    nvpair_name(pair)));
+			fnvlist_add_string(holds, destsnap, nvpair_name(pair));
 		}
 		(void) lzc_hold(holds, cleanup_fd, &errors);
-		nvlist_free(snapholds_nvlist);
-		nvlist_free(holds);
+		fnvlist_free(snapholds_nvlist);
+		fnvlist_free(holds);
 	}
 
 	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
@@ -4423,12 +4810,12 @@
 		 */
 		*cp = '\0';
 		if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
-		    B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_TRUE,
-		    &local_nv, &local_avl) == 0) {
+		    B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE,
+		    B_TRUE, &local_nv, &local_avl) == 0) {
 			*cp = '@';
 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
 			fsavl_destroy(local_avl);
-			nvlist_free(local_nv);
+			fnvlist_free(local_nv);
 
 			if (fs != NULL) {
 				if (flags->verbose) {
@@ -4499,7 +4886,15 @@
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ECKSUM:
-			recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
+		case ZFS_ERR_STREAM_TRUNCATED:
+			recv_ecksum_set_aux(hdl, destsnap, flags->resumable,
+			    ioctl_err == ECKSUM);
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "incremental send stream requires -L "
+			    "(--large-block), to match previous receive."));
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ENOTSUP:
@@ -4515,7 +4910,8 @@
 		case ZFS_ERR_FROM_IVSET_GUID_MISSING:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "IV set guid missing. See errata %u at "
-			    "http://zfsonlinux.org/msg/ZFS-8000-ER."),
+			    "https://openzfs.github.io/openzfs-docs/msg/"
+			    "ZFS-8000-ER."),
 			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
@@ -4542,7 +4938,7 @@
 				(void) zfs_error(hdl, EZFS_BUSY, errbuf);
 				break;
 			}
-			/* fallthru */
+			fallthrough;
 		default:
 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
 		}
@@ -4553,35 +4949,15 @@
 	 * children of the target filesystem if we did a replication
 	 * receive (indicated by stream_avl being non-NULL).
 	 */
-	cp = strchr(destsnap, '@');
-	if (cp && (ioctl_err == 0 || !newfs)) {
-		zfs_handle_t *h;
-
-		*cp = '\0';
-		h = zfs_open(hdl, destsnap,
-		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
-		if (h != NULL) {
-			if (h->zfs_type == ZFS_TYPE_VOLUME) {
-				*cp = '@';
-			} else if (newfs || stream_avl) {
-				/*
-				 * Track the first/top of hierarchy fs,
-				 * for mounting and sharing later.
-				 */
-				if (top_zfs && *top_zfs == NULL)
-					*top_zfs = zfs_strdup(hdl, destsnap);
-			}
-			zfs_close(h);
-		}
-		*cp = '@';
-	}
-
 	if (clp) {
 		if (!flags->nomount)
 			err |= changelist_postfix(clp);
 		changelist_free(clp);
 	}
 
+	if ((newfs || stream_avl) && type == ZFS_TYPE_FILESYSTEM && !redacted)
+		flags->domount = B_TRUE;
+
 	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
 		    "failed to clear unreceived properties on %s"), name);
@@ -4608,25 +4984,25 @@
 		zfs_nicebytes(bytes, buf1, sizeof (buf1));
 		zfs_nicebytes(bytes/delta, buf2, sizeof (buf1));
 
-		(void) printf("received %s stream in %lu seconds (%s/sec)\n",
-		    buf1, delta, buf2);
+		(void) printf("received %s stream in %lld seconds (%s/sec)\n",
+		    buf1, (longlong_t)delta, buf2);
 	}
 
 	err = 0;
 out:
 	if (prop_errors != NULL)
-		nvlist_free(prop_errors);
+		fnvlist_free(prop_errors);
 
 	if (tmp_keylocation[0] != '\0') {
-		VERIFY(0 == nvlist_add_string(rcvprops,
-		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation));
+		fnvlist_add_string(rcvprops,
+		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation);
 	}
 
 	if (newprops)
-		nvlist_free(rcvprops);
+		fnvlist_free(rcvprops);
 
-	nvlist_free(oxprops);
-	nvlist_free(origprops);
+	fnvlist_free(oxprops);
+	fnvlist_free(origprops);
 
 	return (err);
 }
@@ -4685,8 +5061,8 @@
 static int
 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
     const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
-    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
-    uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
+    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs,
+    const char *finalsnap, nvlist_t *cmdprops)
 {
 	int err;
 	dmu_replay_record_t drr, drr_noswap;
@@ -4763,15 +5139,13 @@
 	if (!DMU_STREAM_SUPPORTED(featureflags) ||
 	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "stream has unsupported feature, feature flags = %lx"),
-		    featureflags);
+		    "stream has unsupported feature, feature flags = %llx"),
+		    (unsigned long long)featureflags);
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	/* Holds feature is set once in the compound stream header. */
-	boolean_t holds = (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
-	    DMU_BACKUP_FEATURE_HOLDS);
-	if (holds)
+	if (featureflags & DMU_BACKUP_FEATURE_HOLDS)
 		flags->holds = B_TRUE;
 
 	if (strchr(drrb->drr_toname, '@') == NULL) {
@@ -4798,12 +5172,12 @@
 		}
 		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
 		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
-		    cleanup_fd, action_handlep, finalsnap, cmdprops));
+		    finalsnap, cmdprops));
 	} else {
 		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 		    DMU_COMPOUNDSTREAM);
 		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
-		    &zcksum, top_zfs, cleanup_fd, action_handlep, cmdprops));
+		    &zcksum, top_zfs, cmdprops));
 	}
 }
 
@@ -4820,8 +5194,6 @@
 {
 	char *top_zfs = NULL;
 	int err;
-	int cleanup_fd;
-	uint64_t action_handle = 0;
 	struct stat sb;
 	char *originsnap = NULL;
 
@@ -4834,37 +5206,12 @@
 		return (-2);
 	}
 
-#ifdef __linux__
-#ifndef F_SETPIPE_SZ
-#define	F_SETPIPE_SZ (F_SETLEASE + 7)
-#endif /* F_SETPIPE_SZ */
-
-#ifndef F_GETPIPE_SZ
-#define	F_GETPIPE_SZ (F_GETLEASE + 7)
-#endif /* F_GETPIPE_SZ */
-
 	/*
 	 * It is not uncommon for gigabytes to be processed in zfs receive.
-	 * Speculatively increase the buffer size via Linux-specific fcntl()
-	 * call.
+	 * Speculatively increase the buffer size if supported by the platform.
 	 */
-	if (S_ISFIFO(sb.st_mode)) {
-		FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "r");
-
-		if (procf != NULL) {
-			unsigned long max_psize;
-			long cur_psize;
-			if (fscanf(procf, "%lu", &max_psize) > 0) {
-				cur_psize = fcntl(infd, F_GETPIPE_SZ);
-				if (cur_psize > 0 &&
-				    max_psize > (unsigned long) cur_psize)
-					(void) fcntl(infd, F_SETPIPE_SZ,
-					    max_psize);
-			}
-			fclose(procf);
-		}
-	}
-#endif /* __linux__ */
+	if (S_ISFIFO(sb.st_mode))
+		libzfs_set_pipe_max(infd);
 
 	if (props) {
 		err = nvlist_lookup_string(props, "origin", &originsnap);
@@ -4872,32 +5219,42 @@
 			return (err);
 	}
 
-	cleanup_fd = open(ZFS_DEV, O_RDWR);
-	VERIFY(cleanup_fd >= 0);
-
 	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
-	    stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL, props);
+	    stream_avl, &top_zfs, NULL, props);
 
-	VERIFY(0 == close(cleanup_fd));
-
-	if (err == 0 && !flags->nomount && top_zfs) {
+	if (err == 0 && !flags->nomount && flags->domount && top_zfs) {
 		zfs_handle_t *zhp = NULL;
 		prop_changelist_t *clp = NULL;
 
-		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
-		if (zhp != NULL) {
-			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
-			    CL_GATHER_MOUNT_ALWAYS, 0);
-			zfs_close(zhp);
-			if (clp != NULL) {
-				/* mount and share received datasets */
-				err = changelist_postfix(clp);
-				changelist_free(clp);
-			}
-		}
-		if (zhp == NULL || clp == NULL || err)
+		zhp = zfs_open(hdl, top_zfs,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL) {
 			err = -1;
+			goto out;
+		} else {
+			if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+				zfs_close(zhp);
+				goto out;
+			}
+
+			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
+			    CL_GATHER_MOUNT_ALWAYS,
+			    flags->forceunmount ? MS_FORCE : 0);
+			zfs_close(zhp);
+			if (clp == NULL) {
+				err = -1;
+				goto out;
+			}
+
+			/* mount and share received datasets */
+			err = changelist_postfix(clp);
+			changelist_free(clp);
+			if (err != 0)
+				err = -1;
+		}
 	}
+
+out:
 	if (top_zfs)
 		free(top_zfs);
 

diff --git a/zfs/lib/libzfs/libzfs_status.c b/zfs/lib/libzfs/libzfs_status.c
index ebf497d..33d6e1b 100644
--- a/zfs/lib/libzfs/libzfs_status.c
+++ b/zfs/lib/libzfs/libzfs_status.c

@@ -23,6 +23,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 /*
@@ -43,6 +44,7 @@
 
 #include <libzfs.h>
 #include <libzutil.h>
+#include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/systeminfo.h>
@@ -84,6 +86,10 @@
 	 *	ZPOOL_STATUS_RESILVERING
 	 *	ZPOOL_STATUS_OFFLINE_DEV
 	 *	ZPOOL_STATUS_REMOVED_DEV
+	 *	ZPOOL_STATUS_REBUILDING
+	 *	ZPOOL_STATUS_REBUILD_SCRUB
+	 *	ZPOOL_STATUS_COMPATIBILITY_ERR
+	 *	ZPOOL_STATUS_INCOMPATIBLE_FEAT
 	 *	ZPOOL_STATUS_OK
 	 */
 };
@@ -92,57 +98,69 @@
 
 /* ARGSUSED */
 static int
-vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
+vdev_missing(vdev_stat_t *vs, uint_t vsc)
 {
-	return (state == VDEV_STATE_CANT_OPEN &&
-	    aux == VDEV_AUX_OPEN_FAILED);
+	return (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_OPEN_FAILED);
 }
 
 /* ARGSUSED */
 static int
-vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
+vdev_faulted(vdev_stat_t *vs, uint_t vsc)
 {
-	return (state == VDEV_STATE_FAULTED);
+	return (vs->vs_state == VDEV_STATE_FAULTED);
 }
 
 /* ARGSUSED */
 static int
-vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
+vdev_errors(vdev_stat_t *vs, uint_t vsc)
 {
-	return (state == VDEV_STATE_DEGRADED || errs != 0);
+	return (vs->vs_state == VDEV_STATE_DEGRADED ||
+	    vs->vs_read_errors != 0 || vs->vs_write_errors != 0 ||
+	    vs->vs_checksum_errors != 0);
 }
 
 /* ARGSUSED */
 static int
-vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
+vdev_broken(vdev_stat_t *vs, uint_t vsc)
 {
-	return (state == VDEV_STATE_CANT_OPEN);
+	return (vs->vs_state == VDEV_STATE_CANT_OPEN);
 }
 
 /* ARGSUSED */
 static int
-vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
+vdev_offlined(vdev_stat_t *vs, uint_t vsc)
 {
-	return (state == VDEV_STATE_OFFLINE);
+	return (vs->vs_state == VDEV_STATE_OFFLINE);
 }
 
 /* ARGSUSED */
 static int
-vdev_removed(uint64_t state, uint64_t aux, uint64_t errs)
+vdev_removed(vdev_stat_t *vs, uint_t vsc)
 {
-	return (state == VDEV_STATE_REMOVED);
+	return (vs->vs_state == VDEV_STATE_REMOVED);
+}
+
+static int
+vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc)
+{
+	if (getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") != NULL)
+		return (0);
+
+	return (VDEV_STAT_VALID(vs_physical_ashift, vsc) &&
+	    vs->vs_configured_ashift < vs->vs_physical_ashift);
 }
 
 /*
  * Detect if any leaf devices that have seen errors or could not be opened.
  */
 static boolean_t
-find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
+find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t),
+    boolean_t ignore_replacing)
 {
 	nvlist_t **child;
 	vdev_stat_t *vs;
-	uint_t c, children;
-	char *type;
+	uint_t c, vsc, children;
 
 	/*
 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
@@ -150,23 +168,25 @@
 	 * out again.  We'll pick up the fact that a resilver is happening
 	 * later.
 	 */
-	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
-	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
-		return (B_FALSE);
+	if (ignore_replacing == B_TRUE) {
+		char *type;
+
+		verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE,
+		    &type) == 0);
+		if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
+			return (B_FALSE);
+	}
 
 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0) {
 		for (c = 0; c < children; c++)
-			if (find_vdev_problem(child[c], func))
+			if (find_vdev_problem(child[c], func, ignore_replacing))
 				return (B_TRUE);
 	} else {
 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
-		    (uint64_t **)&vs, &c) == 0);
+		    (uint64_t **)&vs, &vsc) == 0);
 
-		if (func(vs->vs_state, vs->vs_aux,
-		    vs->vs_read_errors +
-		    vs->vs_write_errors +
-		    vs->vs_checksum_errors))
+		if (func(vs, vsc) != 0)
 			return (B_TRUE);
 	}
 
@@ -176,7 +196,7 @@
 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child,
 	    &children) == 0) {
 		for (c = 0; c < children; c++)
-			if (find_vdev_problem(child[c], func))
+			if (find_vdev_problem(child[c], func, ignore_replacing))
 				return (B_TRUE);
 	}
 
@@ -195,13 +215,14 @@
  *	- Check for any data errors
  *	- Check for any faulted or missing devices in a replicated config
  *	- Look for any devices showing errors
- *	- Check for any resilvering devices
+ *	- Check for any resilvering or rebuilding devices
  *
  * There can obviously be multiple errors within a single pool, so this routine
  * only picks the most damaging of all the current errors to report.
  */
 static zpool_status_t
-check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
+check_status(nvlist_t *config, boolean_t isimport,
+    zpool_errata_t *erratap, const char *compat)
 {
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
@@ -234,6 +255,49 @@
 		return (ZPOOL_STATUS_RESILVERING);
 
 	/*
+	 * Currently rebuilding a vdev, check top-level vdevs.
+	 */
+	vdev_rebuild_stat_t *vrs = NULL;
+	nvlist_t **child;
+	uint_t c, i, children;
+	uint64_t rebuild_end_time = 0;
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((nvlist_lookup_uint64_array(child[c],
+			    ZPOOL_CONFIG_REBUILD_STATS,
+			    (uint64_t **)&vrs, &i) == 0) && (vrs != NULL)) {
+				uint64_t state = vrs->vrs_state;
+
+				if (state == VDEV_REBUILD_ACTIVE) {
+					return (ZPOOL_STATUS_REBUILDING);
+				} else if (state == VDEV_REBUILD_COMPLETE &&
+				    vrs->vrs_end_time > rebuild_end_time) {
+					rebuild_end_time = vrs->vrs_end_time;
+				}
+			}
+		}
+
+		/*
+		 * If we can determine when the last scrub was run, and it
+		 * was before the last rebuild completed, then recommend
+		 * that the pool be scrubbed to verify all checksums.  When
+		 * ps is NULL we can infer the pool has never been scrubbed.
+		 */
+		if (rebuild_end_time > 0) {
+			if (ps != NULL) {
+				if ((ps->pss_state == DSS_FINISHED &&
+				    ps->pss_func == POOL_SCAN_SCRUB &&
+				    rebuild_end_time > ps->pss_end_time) ||
+				    ps->pss_state == DSS_NONE)
+					return (ZPOOL_STATUS_REBUILD_SCRUB);
+			} else {
+				return (ZPOOL_STATUS_REBUILD_SCRUB);
+			}
+		}
+	}
+
+	/*
 	 * The multihost property is set and the pool may be active.
 	 */
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
@@ -317,15 +381,15 @@
 	 * Bad devices in non-replicated config.
 	 */
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
-	    find_vdev_problem(nvroot, vdev_faulted))
+	    find_vdev_problem(nvroot, vdev_faulted, B_TRUE))
 		return (ZPOOL_STATUS_FAULTED_DEV_NR);
 
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
-	    find_vdev_problem(nvroot, vdev_missing))
+	    find_vdev_problem(nvroot, vdev_missing, B_TRUE))
 		return (ZPOOL_STATUS_MISSING_DEV_NR);
 
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
-	    find_vdev_problem(nvroot, vdev_broken))
+	    find_vdev_problem(nvroot, vdev_broken, B_TRUE))
 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
 
 	/*
@@ -347,32 +411,38 @@
 	/*
 	 * Missing devices in a replicated config.
 	 */
-	if (find_vdev_problem(nvroot, vdev_faulted))
+	if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE))
 		return (ZPOOL_STATUS_FAULTED_DEV_R);
-	if (find_vdev_problem(nvroot, vdev_missing))
+	if (find_vdev_problem(nvroot, vdev_missing, B_TRUE))
 		return (ZPOOL_STATUS_MISSING_DEV_R);
-	if (find_vdev_problem(nvroot, vdev_broken))
+	if (find_vdev_problem(nvroot, vdev_broken, B_TRUE))
 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
 
 	/*
 	 * Devices with errors
 	 */
-	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
+	if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE))
 		return (ZPOOL_STATUS_FAILING_DEV);
 
 	/*
 	 * Offlined devices
 	 */
-	if (find_vdev_problem(nvroot, vdev_offlined))
+	if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE))
 		return (ZPOOL_STATUS_OFFLINE_DEV);
 
 	/*
 	 * Removed device
 	 */
-	if (find_vdev_problem(nvroot, vdev_removed))
+	if (find_vdev_problem(nvroot, vdev_removed, B_TRUE))
 		return (ZPOOL_STATUS_REMOVED_DEV);
 
 	/*
+	 * Suboptimal, but usable, ashift configuration.
+	 */
+	if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE))
+		return (ZPOOL_STATUS_NON_NATIVE_ASHIFT);
+
+	/*
 	 * Informational errata available.
 	 */
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata);
@@ -384,11 +454,17 @@
 	/*
 	 * Outdated, but usable, version
 	 */
-	if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION)
-		return (ZPOOL_STATUS_VERSION_OLDER);
+	if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) {
+		/* "legacy" compatibility disables old version reporting */
+		if (compat != NULL && strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0)
+			return (ZPOOL_STATUS_OK);
+		else
+			return (ZPOOL_STATUS_VERSION_OLDER);
+	}
 
 	/*
-	 * Usable pool with disabled features
+	 * Usable pool with disabled or superfluous features
+	 * (superfluous = beyond what's requested by 'compatibility')
 	 */
 	if (version >= SPA_VERSION_FEATURES) {
 		int i;
@@ -405,10 +481,24 @@
 			    ZPOOL_CONFIG_FEATURE_STATS);
 		}
 
+		/* check against all features, or limited set? */
+		boolean_t c_features[SPA_FEATURES];
+
+		switch (zpool_load_compat(compat, c_features, NULL, 0)) {
+		case ZPOOL_COMPATIBILITY_OK:
+		case ZPOOL_COMPATIBILITY_WARNTOKEN:
+			break;
+		default:
+			return (ZPOOL_STATUS_COMPATIBILITY_ERR);
+		}
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zfeature_info_t *fi = &spa_feature_table[i];
-			if (!nvlist_exists(feat, fi->fi_guid))
+			if (!fi->fi_zfs_mod_supported)
+				continue;
+			if (c_features[i] && !nvlist_exists(feat, fi->fi_guid))
 				return (ZPOOL_STATUS_FEAT_DISABLED);
+			if (!c_features[i] && nvlist_exists(feat, fi->fi_guid))
+				return (ZPOOL_STATUS_INCOMPATIBLE_FEAT);
 		}
 	}
 
@@ -418,7 +508,18 @@
 zpool_status_t
 zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata)
 {
-	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata);
+	/*
+	 * pass in the desired feature set, as
+	 * it affects check for disabled features
+	 */
+	char compatibility[ZFS_MAXPROPLEN];
+	if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compatibility,
+	    ZFS_MAXPROPLEN, NULL, B_FALSE) != 0)
+		compatibility[0] = '\0';
+
+	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata,
+	    compatibility);
+
 	if (msgid != NULL) {
 		if (ret >= NMSGID)
 			*msgid = NULL;
@@ -431,7 +532,7 @@
 zpool_status_t
 zpool_import_status(nvlist_t *config, char **msgid, zpool_errata_t *errata)
 {
-	zpool_status_t ret = check_status(config, B_TRUE, errata);
+	zpool_status_t ret = check_status(config, B_TRUE, errata, NULL);
 
 	if (ret >= NMSGID)
 		*msgid = NULL;

diff --git a/zfs/lib/libzfs/libzfs_util.c b/zfs/lib/libzfs/libzfs_util.c
index 4a96766..7c4d310 100644
--- a/zfs/lib/libzfs/libzfs_util.c
+++ b/zfs/lib/libzfs/libzfs_util.c

@@ -21,10 +21,14 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright 2020 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2020 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Allan Jude
+ * under sponsorship from the FreeBSD Foundation.
  */
 
 /*
@@ -40,6 +44,9 @@
 #include <strings.h>
 #include <unistd.h>
 #include <math.h>
+#if LIBFETCH_DYNAMIC
+#include <dlfcn.h>
+#endif
 #include <sys/stat.h>
 #include <sys/mnttab.h>
 #include <sys/mntent.h>
@@ -54,7 +61,13 @@
 #include "zfeature_common.h"
 #include <zfs_fletcher.h>
 #include <libzutil.h>
-#include <sys/zfs_sysfs.h>
+
+/*
+ * We only care about the scheme in order to match the scheme
+ * with the handler. Each handler should validate the full URI
+ * as necessary.
+ */
+#define	URI_REGEX	"^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):"
 
 int
 libzfs_errno(libzfs_handle_t *hdl)
@@ -63,31 +76,6 @@
 }
 
 const char *
-libzfs_error_init(int error)
-{
-	switch (error) {
-	case ENXIO:
-		return (dgettext(TEXT_DOMAIN, "The ZFS modules are not "
-		    "loaded.\nTry running '/sbin/modprobe zfs' as root "
-		    "to load them."));
-	case ENOENT:
-		return (dgettext(TEXT_DOMAIN, "/dev/zfs and /proc/self/mounts "
-		    "are required.\nTry running 'udevadm trigger' and 'mount "
-		    "-t proc proc /proc' as root."));
-	case ENOEXEC:
-		return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be "
-		    "auto-loaded.\nTry running '/sbin/modprobe zfs' as "
-		    "root to manually load them."));
-	case EACCES:
-		return (dgettext(TEXT_DOMAIN, "Permission denied the "
-		    "ZFS utilities must be run as root."));
-	default:
-		return (dgettext(TEXT_DOMAIN, "Failed to initialize the "
-		    "libzfs library."));
-	}
-}
-
-const char *
 libzfs_error_action(libzfs_handle_t *hdl)
 {
 	return (hdl->libzfs_action);
@@ -163,15 +151,15 @@
 	case EZFS_MOUNTFAILED:
 		return (dgettext(TEXT_DOMAIN, "mount failed"));
 	case EZFS_UMOUNTFAILED:
-		return (dgettext(TEXT_DOMAIN, "umount failed"));
+		return (dgettext(TEXT_DOMAIN, "unmount failed"));
 	case EZFS_UNSHARENFSFAILED:
-		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
+		return (dgettext(TEXT_DOMAIN, "NFS share removal failed"));
 	case EZFS_SHARENFSFAILED:
-		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+		return (dgettext(TEXT_DOMAIN, "NFS share creation failed"));
 	case EZFS_UNSHARESMBFAILED:
-		return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
+		return (dgettext(TEXT_DOMAIN, "SMB share removal failed"));
 	case EZFS_SHARESMBFAILED:
-		return (dgettext(TEXT_DOMAIN, "smb add share failed"));
+		return (dgettext(TEXT_DOMAIN, "SMB share creation failed"));
 	case EZFS_PERM:
 		return (dgettext(TEXT_DOMAIN, "permission denied"));
 	case EZFS_NOSPC:
@@ -182,6 +170,8 @@
 		return (dgettext(TEXT_DOMAIN, "I/O error"));
 	case EZFS_INTR:
 		return (dgettext(TEXT_DOMAIN, "signal received"));
+	case EZFS_CKSUM:
+		return (dgettext(TEXT_DOMAIN, "insufficient replicas"));
 	case EZFS_ISSPARE:
 		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
 		    "spare"));
@@ -305,6 +295,9 @@
 		    "resilver_defer feature"));
 	case EZFS_EXPORT_IN_PROGRESS:
 		return (dgettext(TEXT_DOMAIN, "pool export in progress"));
+	case EZFS_REBUILDING:
+		return (dgettext(TEXT_DOMAIN, "currently sequentially "
+		    "resilvering"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
@@ -343,7 +336,8 @@
 	if (hdl->libzfs_printerr) {
 		if (error == EZFS_UNKNOWN) {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
-			    "error: %s\n"), libzfs_error_description(hdl));
+			    "error: %s: %s\n"), hdl->libzfs_action,
+			    libzfs_error_description(hdl));
 			abort();
 		}
 
@@ -400,6 +394,10 @@
 	case EINTR:
 		zfs_verror(hdl, EZFS_INTR, fmt, ap);
 		return (-1);
+
+	case ECKSUM:
+		zfs_verror(hdl, EZFS_CKSUM, fmt, ap);
+		return (-1);
 	}
 
 	return (0);
@@ -470,6 +468,7 @@
 	case EREMOTEIO:
 		zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap);
 		break;
+	case ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE:
 	case ZFS_ERR_IOC_CMD_UNAVAIL:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
 		    "module does not support this operation. A reboot may "
@@ -489,8 +488,11 @@
 	case ZFS_ERR_WRONG_PARENT:
 		zfs_verror(hdl, EZFS_WRONG_PARENT, fmt, ap);
 		break;
+	case ZFS_ERR_BADPROP:
+		zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
+		break;
 	default:
-		zfs_error_aux(hdl, strerror(error));
+		zfs_error_aux(hdl, "%s", strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
 		break;
 	}
@@ -499,6 +501,118 @@
 	return (-1);
 }
 
+void
+zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
+    char *errbuf)
+{
+	switch (err) {
+
+	case ENOSPC:
+		/*
+		 * For quotas and reservations, ENOSPC indicates
+		 * something different; setting a quota or reservation
+		 * doesn't use any disk space.
+		 */
+		switch (prop) {
+		case ZFS_PROP_QUOTA:
+		case ZFS_PROP_REFQUOTA:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "size is less than current used or "
+			    "reserved space"));
+			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+			break;
+
+		case ZFS_PROP_RESERVATION:
+		case ZFS_PROP_REFRESERVATION:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "size is greater than available space"));
+			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+			break;
+
+		default:
+			(void) zfs_standard_error(hdl, err, errbuf);
+			break;
+		}
+		break;
+
+	case EBUSY:
+		(void) zfs_standard_error(hdl, EBUSY, errbuf);
+		break;
+
+	case EROFS:
+		(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
+		break;
+
+	case E2BIG:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "property value too long"));
+		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		break;
+
+	case ENOTSUP:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool and or dataset must be upgraded to set this "
+		    "property or value"));
+		(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+		break;
+
+	case ERANGE:
+		if (prop == ZFS_PROP_COMPRESSION ||
+		    prop == ZFS_PROP_DNODESIZE ||
+		    prop == ZFS_PROP_RECORDSIZE) {
+			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "property setting is not allowed on "
+			    "bootable datasets"));
+			(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+		} else if (prop == ZFS_PROP_CHECKSUM ||
+		    prop == ZFS_PROP_DEDUP) {
+			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "property setting is not allowed on "
+			    "root pools"));
+			(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+		} else {
+			(void) zfs_standard_error(hdl, err, errbuf);
+		}
+		break;
+
+	case EINVAL:
+		if (prop == ZPROP_INVAL) {
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		} else {
+			(void) zfs_standard_error(hdl, err, errbuf);
+		}
+		break;
+
+	case ZFS_ERR_BADPROP:
+		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		break;
+
+	case EACCES:
+		if (prop == ZFS_PROP_KEYLOCATION) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "keylocation may only be set on encryption roots"));
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		} else {
+			(void) zfs_standard_error(hdl, err, errbuf);
+		}
+		break;
+
+	case EOVERFLOW:
+		/*
+		 * This platform can't address a volume this big.
+		 */
+#ifdef _ILP32
+		if (prop == ZFS_PROP_VOLSIZE) {
+			(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
+			break;
+		}
+#endif
+		fallthrough;
+	default:
+		(void) zfs_standard_error(hdl, err, errbuf);
+	}
+}
+
 int
 zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
 {
@@ -566,7 +680,7 @@
 	case ENOSPC:
 	case EDQUOT:
 		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
-		return (-1);
+		break;
 
 	case EAGAIN:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -603,6 +717,15 @@
 	case ZFS_ERR_EXPORT_IN_PROGRESS:
 		zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap);
 		break;
+	case ZFS_ERR_RESILVER_IN_PROGRESS:
+		zfs_verror(hdl, EZFS_RESILVERING, fmt, ap);
+		break;
+	case ZFS_ERR_REBUILD_IN_PROGRESS:
+		zfs_verror(hdl, EZFS_REBUILDING, fmt, ap);
+		break;
+	case ZFS_ERR_BADPROP:
+		zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
+		break;
 	case ZFS_ERR_IOC_CMD_UNAVAIL:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
 		    "module does not support this operation. A reboot may "
@@ -620,7 +743,7 @@
 		zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap);
 		break;
 	default:
-		zfs_error_aux(hdl, strerror(error));
+		zfs_error_aux(hdl, "%s", strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
 	}
 
@@ -668,8 +791,10 @@
 
 	va_end(ap);
 
-	if (err < 0)
+	if (err < 0) {
 		(void) no_memory(hdl);
+		ret = NULL;
+	}
 
 	return (ret);
 }
@@ -711,19 +836,6 @@
 	hdl->libzfs_printerr = printerr;
 }
 
-static int
-libzfs_module_loaded(const char *module)
-{
-	const char path_prefix[] = "/sys/module/";
-	char path[256];
-
-	memcpy(path, path_prefix, sizeof (path_prefix) - 1);
-	strcpy(path + sizeof (path_prefix) - 1, module);
-
-	return (access(path, F_OK) == 0);
-}
-
-
 /*
  * Read lines from an open file descriptor and store them in an array of
  * strings until EOF.  lines[] will be allocated and populated with all the
@@ -783,13 +895,13 @@
 	 * Setup a pipe between our child and parent process if we're
 	 * reading stdout.
 	 */
-	if ((lines != NULL) && pipe(link) == -1)
-		return (-ESTRPIPE);
+	if ((lines != NULL) && pipe2(link, O_CLOEXEC) == -1)
+		return (-EPIPE);
 
 	pid = vfork();
 	if (pid == 0) {
 		/* Child process */
-		devnull_fd = open("/dev/null", O_WRONLY);
+		devnull_fd = open("/dev/null", O_WRONLY | O_CLOEXEC);
 
 		if (devnull_fd < 0)
 			_exit(-1);
@@ -799,15 +911,11 @@
 		else if (lines != NULL) {
 			/* Save the output to lines[] */
 			dup2(link[1], STDOUT_FILENO);
-			close(link[0]);
-			close(link[1]);
 		}
 
 		if (!(flags & STDERR_VERBOSE))
 			(void) dup2(devnull_fd, STDERR_FILENO);
 
-		close(devnull_fd);
-
 		if (flags & NO_DEFAULT_PATH) {
 			if (env == NULL)
 				execv(path, argv);
@@ -902,85 +1010,14 @@
 	return (0);
 }
 
-/*
- * Verify the required ZFS_DEV device is available and optionally attempt
- * to load the ZFS modules.  Under normal circumstances the modules
- * should already have been loaded by some external mechanism.
- *
- * Environment variables:
- * - ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules.
- * - ZFS_MODULE_TIMEOUT="<seconds>"     - Seconds to wait for ZFS_DEV
- */
-static int
-libzfs_load_module(const char *module)
-{
-	char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0};
-	char *load_str, *timeout_str;
-	long timeout = 10; /* seconds */
-	long busy_timeout = 10; /* milliseconds */
-	int load = 0, fd;
-	hrtime_t start;
-
-	/* Optionally request module loading */
-	if (!libzfs_module_loaded(module)) {
-		load_str = getenv("ZFS_MODULE_LOADING");
-		if (load_str) {
-			if (!strncasecmp(load_str, "YES", strlen("YES")) ||
-			    !strncasecmp(load_str, "ON", strlen("ON")))
-				load = 1;
-			else
-				load = 0;
-		}
-
-		if (load) {
-			if (libzfs_run_process("/sbin/modprobe", argv, 0))
-				return (ENOEXEC);
-		}
-
-		if (!libzfs_module_loaded(module))
-			return (ENXIO);
-	}
-
-	/*
-	 * Device creation by udev is asynchronous and waiting may be
-	 * required.  Busy wait for 10ms and then fall back to polling every
-	 * 10ms for the allowed timeout (default 10s, max 10m).  This is
-	 * done to optimize for the common case where the device is
-	 * immediately available and to avoid penalizing the possible
-	 * case where udev is slow or unable to create the device.
-	 */
-	timeout_str = getenv("ZFS_MODULE_TIMEOUT");
-	if (timeout_str) {
-		timeout = strtol(timeout_str, NULL, 0);
-		timeout = MAX(MIN(timeout, (10 * 60)), 0); /* 0 <= N <= 600 */
-	}
-
-	start = gethrtime();
-	do {
-		fd = open(ZFS_DEV, O_RDWR);
-		if (fd >= 0) {
-			(void) close(fd);
-			return (0);
-		} else if (errno != ENOENT) {
-			return (errno);
-		} else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) {
-			sched_yield();
-		} else {
-			usleep(10 * MILLISEC);
-		}
-	} while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC));
-
-	return (ENOENT);
-}
-
 libzfs_handle_t *
 libzfs_init(void)
 {
 	libzfs_handle_t *hdl;
 	int error;
+	char *env;
 
-	error = libzfs_load_module(ZFS_DRIVER);
-	if (error) {
+	if ((error = libzfs_load_module()) != 0) {
 		errno = error;
 		return (NULL);
 	}
@@ -989,28 +1026,29 @@
 		return (NULL);
 	}
 
-	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+	if (regcomp(&hdl->libzfs_urire, URI_REGEX, 0) != 0) {
+		free(hdl);
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR|O_EXCL|O_CLOEXEC)) < 0) {
 		free(hdl);
 		return (NULL);
 	}
 
 #ifdef HAVE_SETMNTENT
-	if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "r")) == NULL) {
+	if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "re")) == NULL) {
 #else
-	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "re")) == NULL) {
 #endif
 		(void) close(hdl->libzfs_fd);
 		free(hdl);
 		return (NULL);
 	}
 
-	hdl->libzfs_sharetab = fopen(ZFS_SHARETAB, "r");
-
 	if (libzfs_core_init() != 0) {
 		(void) close(hdl->libzfs_fd);
 		(void) fclose(hdl->libzfs_mnttab);
-		if (hdl->libzfs_sharetab)
-			(void) fclose(hdl->libzfs_sharetab);
 		free(hdl);
 		return (NULL);
 	}
@@ -1024,6 +1062,18 @@
 	if (getenv("ZFS_PROP_DEBUG") != NULL) {
 		hdl->libzfs_prop_debug = B_TRUE;
 	}
+	if ((env = getenv("ZFS_SENDRECV_MAX_NVLIST")) != NULL) {
+		if ((error = zfs_nicestrtonum(hdl, env,
+		    &hdl->libzfs_max_nvlist))) {
+			errno = error;
+			(void) close(hdl->libzfs_fd);
+			(void) fclose(hdl->libzfs_mnttab);
+			free(hdl);
+			return (NULL);
+		}
+	} else {
+		hdl->libzfs_max_nvlist = (SPA_MAXBLOCKSIZE * 4);
+	}
 
 	/*
 	 * For testing, remove some settable properties and features
@@ -1054,14 +1104,17 @@
 #else
 		(void) fclose(hdl->libzfs_mnttab);
 #endif
-	if (hdl->libzfs_sharetab)
-		(void) fclose(hdl->libzfs_sharetab);
-	zfs_uninit_libshare(hdl);
 	zpool_free_handles(hdl);
 	namespace_clear(hdl);
 	libzfs_mnttab_fini(hdl);
 	libzfs_core_fini();
+	regfree(&hdl->libzfs_urire);
 	fletcher_4_fini();
+#if LIBFETCH_DYNAMIC
+	if (hdl->libfetch != (void *)-1 && hdl->libfetch != NULL)
+		(void) dlclose(hdl->libfetch);
+	free(hdl->libfetch_load_error);
+#endif
 	free(hdl);
 }
 
@@ -1090,11 +1143,10 @@
  * fs/vol/snap/bkmark name.
  */
 zfs_handle_t *
-zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
+zfs_path_to_zhandle(libzfs_handle_t *hdl, const char *path, zfs_type_t argtype)
 {
 	struct stat64 statbuf;
 	struct extmnttab entry;
-	int ret;
 
 	if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
 		/*
@@ -1103,24 +1155,12 @@
 		return (zfs_open(hdl, path, argtype));
 	}
 
-	if (stat64(path, &statbuf) != 0) {
-		(void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
-		return (NULL);
-	}
-
 	/* Reopen MNTTAB to prevent reading stale data from open file */
-	if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
+	if (freopen(MNTTAB, "re", hdl->libzfs_mnttab) == NULL)
 		return (NULL);
 
-	while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) {
-		if (makedevice(entry.mnt_major, entry.mnt_minor) ==
-		    statbuf.st_dev) {
-			break;
-		}
-	}
-	if (ret != 0) {
+	if (getextmntent(path, &entry, &statbuf) != 0)
 		return (NULL);
-	}
 
 	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
 		(void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
@@ -1227,12 +1267,6 @@
 	return (0);
 }
 
-int
-zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
-{
-	return (ioctl(hdl->libzfs_fd, request, zc));
-}
-
 /*
  * ================================================================
  * API shared by zfs and zpool property management
@@ -1538,7 +1572,12 @@
 
 		fval *= pow(2, shift);
 
-		if (fval > UINT64_MAX) {
+		/*
+		 * UINT64_MAX is not exactly representable as a double.
+		 * The closest representation is UINT64_MAX + 1, so we
+		 * use a >= comparison instead of > for the bounds check.
+		 */
+		if (fval >= (double)UINT64_MAX) {
 			if (hdl)
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "numeric value is too large"));
@@ -1883,7 +1922,7 @@
 	zfs_type_t type;
 } expand_data_t;
 
-int
+static int
 zprop_expand_list_cb(int prop, void *cb)
 {
 	zprop_list_t *entry;
@@ -1960,36 +1999,6 @@
 }
 
 /*
- * Fill given version buffer with zfs kernel version read from ZFS_SYSFS_DIR
- * Returns 0 on success, and -1 on error (with errno set)
- */
-int
-zfs_version_kernel(char *version, int len)
-{
-	int _errno;
-	int fd;
-	int rlen;
-
-	if ((fd = open(ZFS_SYSFS_DIR "/version", O_RDONLY)) == -1)
-		return (-1);
-
-	if ((rlen = read(fd, version, len)) == -1) {
-		version[0] = '\0';
-		_errno = errno;
-		(void) close(fd);
-		errno = _errno;
-		return (-1);
-	}
-
-	version[rlen-1] = '\0';  /* discard '\n' */
-
-	if (close(fd) == -1)
-		return (-1);
-
-	return (0);
-}
-
-/*
  * Prints both zfs userland and kernel versions
  * Returns 0 on success, and -1 on error (with errno set)
  */
@@ -1999,16 +2008,116 @@
 	char zver_userland[128];
 	char zver_kernel[128];
 
+	zfs_version_userland(zver_userland, sizeof (zver_userland));
+
+	(void) printf("%s\n", zver_userland);
+
 	if (zfs_version_kernel(zver_kernel, sizeof (zver_kernel)) == -1) {
 		fprintf(stderr, "zfs_version_kernel() failed: %s\n",
 		    strerror(errno));
 		return (-1);
 	}
 
-	zfs_version_userland(zver_userland, sizeof (zver_userland));
-
-	(void) printf("%s\n", zver_userland);
 	(void) printf("zfs-kmod-%s\n", zver_kernel);
 
 	return (0);
 }
+
+/*
+ * Return 1 if the user requested ANSI color output, and our terminal supports
+ * it.  Return 0 for no color.
+ */
+int
+use_color(void)
+{
+	static int use_color = -1;
+	char *term;
+
+	/*
+	 * Optimization:
+	 *
+	 * For each zpool invocation, we do a single check to see if we should
+	 * be using color or not, and cache that value for the lifetime of the
+	 * the zpool command.  That makes it cheap to call use_color() when
+	 * we're printing with color.  We assume that the settings are not going
+	 * to change during the invocation of a zpool command (the user isn't
+	 * going to change the ZFS_COLOR value while zpool is running, for
+	 * example).
+	 */
+	if (use_color != -1) {
+		/*
+		 * We've already figured out if we should be using color or
+		 * not.  Return the cached value.
+		 */
+		return (use_color);
+	}
+
+	term = getenv("TERM");
+	/*
+	 * The user sets the ZFS_COLOR env var set to enable zpool ANSI color
+	 * output.  However if NO_COLOR is set (https://no-color.org/) then
+	 * don't use it.  Also, don't use color if terminal doesn't support
+	 * it.
+	 */
+	if (libzfs_envvar_is_set("ZFS_COLOR") &&
+	    !libzfs_envvar_is_set("NO_COLOR") &&
+	    isatty(STDOUT_FILENO) && term && strcmp("dumb", term) != 0 &&
+	    strcmp("unknown", term) != 0) {
+		/* Color supported */
+		use_color = 1;
+	} else {
+		use_color = 0;
+	}
+
+	return (use_color);
+}
+
+/*
+ * The functions color_start() and color_end() are used for when you want
+ * to colorize a block of text.
+ *
+ * For example:
+ * color_start(ANSI_RED)
+ * printf("hello");
+ * printf("world");
+ * color_end();
+ */
+void
+color_start(const char *color)
+{
+	if (color && use_color()) {
+		fputs(color, stdout);
+		fflush(stdout);
+	}
+}
+
+void
+color_end(void)
+{
+	if (use_color()) {
+		fputs(ANSI_RESET, stdout);
+		fflush(stdout);
+	}
+}
+
+/*
+ * printf() with a color. If color is NULL, then do a normal printf.
+ */
+int
+printf_color(const char *color, char *format, ...)
+{
+	va_list aptr;
+	int rc;
+
+	if (color)
+		color_start(color);
+
+	va_start(aptr, format);
+	rc = vprintf(format, aptr);
+	va_end(aptr);
+
+	if (color)
+		color_end();
+
+	return (rc);
+}

diff --git a/zfs/lib/libzfs/os/freebsd/libzfs_compat.c b/zfs/lib/libzfs/os/freebsd/libzfs_compat.c
new file mode 100644
index 0000000..0e8a3b1
--- /dev/null
+++ b/zfs/lib/libzfs/os/freebsd/libzfs_compat.c

@@ -0,0 +1,332 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ */
+#include <os/freebsd/zfs/sys/zfs_ioctl_compat.h>
+#include <libzfs_impl.h>
+#include <libzfs.h>
+#include <libzutil.h>
+#include <sys/sysctl.h>
+#include <libintl.h>
+#include <sys/linker.h>
+#include <sys/module.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+
+#ifdef IN_BASE
+#define	ZFS_KMOD	"zfs"
+#else
+#define	ZFS_KMOD	"openzfs"
+#endif
+
+void
+libzfs_set_pipe_max(int infd)
+{
+	/* FreeBSD automatically resizes */
+}
+
+static int
+execvPe(const char *name, const char *path, char * const *argv,
+    char * const *envp)
+{
+	const char **memp;
+	size_t cnt, lp, ln;
+	int eacces, save_errno;
+	char *cur, buf[MAXPATHLEN];
+	const char *p, *bp;
+	struct stat sb;
+
+	eacces = 0;
+
+	/* If it's an absolute or relative path name, it's easy. */
+	if (strchr(name, '/')) {
+		bp = name;
+		cur = NULL;
+		goto retry;
+	}
+	bp = buf;
+
+	/* If it's an empty path name, fail in the usual POSIX way. */
+	if (*name == '\0') {
+		errno = ENOENT;
+		return (-1);
+	}
+
+	cur = alloca(strlen(path) + 1);
+	if (cur == NULL) {
+		errno = ENOMEM;
+		return (-1);
+	}
+	strcpy(cur, path);
+	while ((p = strsep(&cur, ":")) != NULL) {
+		/*
+		 * It's a SHELL path -- double, leading and trailing colons
+		 * mean the current directory.
+		 */
+		if (*p == '\0') {
+			p = ".";
+			lp = 1;
+		} else
+			lp = strlen(p);
+		ln = strlen(name);
+
+		/*
+		 * If the path is too long complain.  This is a possible
+		 * security issue; given a way to make the path too long
+		 * the user may execute the wrong program.
+		 */
+		if (lp + ln + 2 > sizeof (buf)) {
+			(void) write(STDERR_FILENO, "execvP: ", 8);
+			(void) write(STDERR_FILENO, p, lp);
+			(void) write(STDERR_FILENO, ": path too long\n",
+			    16);
+			continue;
+		}
+		bcopy(p, buf, lp);
+		buf[lp] = '/';
+		bcopy(name, buf + lp + 1, ln);
+		buf[lp + ln + 1] = '\0';
+
+retry:		(void) execve(bp, argv, envp);
+		switch (errno) {
+		case E2BIG:
+			goto done;
+		case ELOOP:
+		case ENAMETOOLONG:
+		case ENOENT:
+			break;
+		case ENOEXEC:
+			for (cnt = 0; argv[cnt]; ++cnt)
+				;
+			memp = alloca((cnt + 2) * sizeof (char *));
+			if (memp == NULL) {
+				/* errno = ENOMEM; XXX override ENOEXEC? */
+				goto done;
+			}
+			memp[0] = "sh";
+			memp[1] = bp;
+			bcopy(argv + 1, memp + 2, cnt * sizeof (char *));
+			execve(_PATH_BSHELL, __DECONST(char **, memp), envp);
+			goto done;
+		case ENOMEM:
+			goto done;
+		case ENOTDIR:
+			break;
+		case ETXTBSY:
+			/*
+			 * We used to retry here, but sh(1) doesn't.
+			 */
+			goto done;
+		default:
+			/*
+			 * EACCES may be for an inaccessible directory or
+			 * a non-executable file.  Call stat() to decide
+			 * which.  This also handles ambiguities for EFAULT
+			 * and EIO, and undocumented errors like ESTALE.
+			 * We hope that the race for a stat() is unimportant.
+			 */
+			save_errno = errno;
+			if (stat(bp, &sb) != 0)
+				break;
+			if (save_errno == EACCES) {
+				eacces = 1;
+				continue;
+			}
+			errno = save_errno;
+			goto done;
+		}
+	}
+	if (eacces)
+		errno = EACCES;
+	else
+		errno = ENOENT;
+done:
+	return (-1);
+}
+
+int
+execvpe(const char *name, char * const argv[], char * const envp[])
+{
+	const char *path;
+
+	/* Get the path we're searching. */
+	if ((path = getenv("PATH")) == NULL)
+		path = _PATH_DEFPATH;
+
+	return (execvPe(name, path, argv, envp));
+}
+
+#define	ERRBUFLEN 256
+
+static __thread char errbuf[ERRBUFLEN];
+
+const char *
+libzfs_error_init(int error)
+{
+	char *msg = errbuf;
+	size_t len, msglen = ERRBUFLEN;
+
+	if (modfind("zfs") < 0) {
+		len = snprintf(msg, msglen, dgettext(TEXT_DOMAIN,
+		    "Failed to load %s module: "), ZFS_KMOD);
+		msg += len;
+		msglen -= len;
+	}
+
+	(void) snprintf(msg, msglen, "%s", strerror(error));
+
+	return (errbuf);
+}
+
+int
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
+{
+	return (zfs_ioctl_fd(hdl->libzfs_fd, request, zc));
+}
+
+/*
+ * Verify the required ZFS_DEV device is available and optionally attempt
+ * to load the ZFS modules.  Under normal circumstances the modules
+ * should already have been loaded by some external mechanism.
+ */
+int
+libzfs_load_module(void)
+{
+	/*
+	 * XXX: kldfind(ZFS_KMOD) would be nice here, but we retain
+	 * modfind("zfs") so out-of-base openzfs userland works with the
+	 * in-base module.
+	 */
+	if (modfind("zfs") < 0) {
+		/* Not present in kernel, try loading it. */
+		if (kldload(ZFS_KMOD) < 0 && errno != EEXIST) {
+			return (errno);
+		}
+	}
+	return (0);
+}
+
+int
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
+{
+	return (0);
+}
+
+int
+zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
+{
+	return (0);
+}
+
+int
+find_shares_object(differ_info_t *di)
+{
+	return (0);
+}
+
+/*
+ * Attach/detach the given filesystem to/from the given jail.
+ */
+int
+zfs_jail(zfs_handle_t *zhp, int jailid, int attach)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = {"\0"};
+	char errbuf[1024];
+	unsigned long cmd;
+	int ret;
+
+	if (attach) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name);
+	} else {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot unjail '%s'"), zhp->zfs_name);
+	}
+
+	switch (zhp->zfs_type) {
+	case ZFS_TYPE_VOLUME:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "volumes can not be jailed"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_SNAPSHOT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be jailed"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_BOOKMARK:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "bookmarks can not be jailed"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_POOL:
+	case ZFS_TYPE_FILESYSTEM:
+		/* OK */
+		;
+	}
+	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	zc.zc_objset_type = DMU_OST_ZFS;
+	zc.zc_zoneid = jailid;
+
+	cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL;
+	if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
+		zfs_standard_error(hdl, errno, errbuf);
+
+	return (ret);
+}
+
+/*
+ * Set loader options for next boot.
+ */
+int
+zpool_nextboot(libzfs_handle_t *hdl, uint64_t pool_guid, uint64_t dev_guid,
+    const char *command)
+{
+	zfs_cmd_t zc = {"\0"};
+	nvlist_t *args;
+	int error;
+
+	args = fnvlist_alloc();
+	fnvlist_add_uint64(args, ZPOOL_CONFIG_POOL_GUID, pool_guid);
+	fnvlist_add_uint64(args, ZPOOL_CONFIG_GUID, dev_guid);
+	fnvlist_add_string(args, "command", command);
+	error = zcmd_write_src_nvlist(hdl, &zc, args);
+	if (error == 0)
+		error = zfs_ioctl(hdl, ZFS_IOC_NEXTBOOT, &zc);
+	zcmd_free_nvlists(&zc);
+	nvlist_free(args);
+	return (error);
+}
+
+/*
+ * Fill given version buffer with zfs kernel version.
+ * Returns 0 on success, and -1 on error (with errno set)
+ */
+int
+zfs_version_kernel(char *version, int len)
+{
+	size_t l = len;
+
+	return (sysctlbyname("vfs.zfs.version.module",
+	    version, &l, NULL, 0));
+}

diff --git a/zfs/lib/libzfs/os/freebsd/libzfs_ioctl_compat.c b/zfs/lib/libzfs/os/freebsd/libzfs_ioctl_compat.c
new file mode 100644
index 0000000..18b93fe
--- /dev/null
+++ b/zfs/lib/libzfs/os/freebsd/libzfs_ioctl_compat.c

@@ -0,0 +1,432 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013 Xin Li <delphij@FreeBSD.org>. All rights reserved.
+ * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ * Portions Copyright 2005, 2010, Oracle and/or its affiliates.
+ * All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cred.h>
+#include <sys/dmu.h>
+#include <sys/zio.h>
+#include <sys/nvpair.h>
+#include <sys/dsl_deleg.h>
+#include <sys/zfs_ioctl.h>
+#include "zfs_namecheck.h"
+#include <os/freebsd/zfs/sys/zfs_ioctl_compat.h>
+
+/*
+ * FreeBSD zfs_cmd compatibility with older binaries
+ * appropriately remap/extend the zfs_cmd_t structure
+ */
+void
+zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
+{
+
+}
+#if 0
+static int
+zfs_ioctl_compat_get_nvlist(uint64_t nvl, size_t size, int iflag,
+    nvlist_t **nvp)
+{
+	char *packed;
+	int error;
+	nvlist_t *list = NULL;
+
+	/*
+	 * Read in and unpack the user-supplied nvlist.
+	 */
+	if (size == 0)
+		return (EINVAL);
+
+#ifdef _KERNEL
+	packed = kmem_alloc(size, KM_SLEEP);
+	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
+	    iflag)) != 0) {
+		kmem_free(packed, size);
+		return (error);
+	}
+#else
+	packed = (void *)(uintptr_t)nvl;
+#endif
+
+	error = nvlist_unpack(packed, size, &list, 0);
+
+#ifdef _KERNEL
+	kmem_free(packed, size);
+#endif
+
+	if (error != 0)
+		return (error);
+
+	*nvp = list;
+	return (0);
+}
+
+static int
+zfs_ioctl_compat_put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	char *packed = NULL;
+	int error = 0;
+	size_t size;
+
+	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
+
+#ifdef _KERNEL
+	packed = kmem_alloc(size, KM_SLEEP);
+	VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
+	    KM_SLEEP) == 0);
+
+	if (ddi_copyout(packed,
+	    (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0)
+		error = EFAULT;
+	kmem_free(packed, size);
+#else
+	packed = (void *)(uintptr_t)zc->zc_nvlist_dst;
+	VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
+	    0) == 0);
+#endif
+
+	zc->zc_nvlist_dst_size = size;
+	return (error);
+}
+
+static void
+zfs_ioctl_compat_fix_stats_nvlist(nvlist_t *nvl)
+{
+	nvlist_t **child;
+	nvlist_t *nvroot = NULL;
+	vdev_stat_t *vs;
+	uint_t c, children, nelem;
+
+	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			zfs_ioctl_compat_fix_stats_nvlist(child[c]);
+		}
+	}
+
+	if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0)
+		zfs_ioctl_compat_fix_stats_nvlist(nvroot);
+	if ((nvlist_lookup_uint64_array(nvl, "stats",
+	    (uint64_t **)&vs, &nelem) == 0)) {
+		nvlist_add_uint64_array(nvl,
+		    ZPOOL_CONFIG_VDEV_STATS,
+		    (uint64_t *)vs, nelem);
+		nvlist_remove(nvl, "stats",
+		    DATA_TYPE_UINT64_ARRAY);
+	}
+}
+
+
+static int
+zfs_ioctl_compat_fix_stats(zfs_cmd_t *zc, const int nc)
+{
+	nvlist_t *nv, *nvp = NULL;
+	nvpair_t *elem;
+	int error;
+
+	if ((error = zfs_ioctl_compat_get_nvlist(zc->zc_nvlist_dst,
+	    zc->zc_nvlist_dst_size, zc->zc_iflags, &nv)) != 0)
+		return (error);
+
+	if (nc == 5) { /* ZFS_IOC_POOL_STATS */
+		elem = NULL;
+		while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
+			if (nvpair_value_nvlist(elem, &nvp) == 0)
+				zfs_ioctl_compat_fix_stats_nvlist(nvp);
+		}
+		elem = NULL;
+	} else
+		zfs_ioctl_compat_fix_stats_nvlist(nv);
+
+	error = zfs_ioctl_compat_put_nvlist(zc, nv);
+
+	nvlist_free(nv);
+
+	return (error);
+}
+
+static int
+zfs_ioctl_compat_pool_get_props(zfs_cmd_t *zc)
+{
+	nvlist_t *nv, *nva = NULL;
+	int error;
+
+	if ((error = zfs_ioctl_compat_get_nvlist(zc->zc_nvlist_dst,
+	    zc->zc_nvlist_dst_size, zc->zc_iflags, &nv)) != 0)
+		return (error);
+
+	if (nvlist_lookup_nvlist(nv, "used", &nva) == 0) {
+		nvlist_add_nvlist(nv, "allocated", nva);
+		nvlist_remove(nv, "used", DATA_TYPE_NVLIST);
+	}
+
+	if (nvlist_lookup_nvlist(nv, "available", &nva) == 0) {
+		nvlist_add_nvlist(nv, "free", nva);
+		nvlist_remove(nv, "available", DATA_TYPE_NVLIST);
+	}
+
+	error = zfs_ioctl_compat_put_nvlist(zc, nv);
+
+	nvlist_free(nv);
+
+	return (error);
+}
+#endif
+
+#ifdef _KERNEL
+int
+zfs_ioctl_compat_pre(zfs_cmd_t *zc, int *vec, const int cflag)
+{
+	int error = 0;
+
+	/* are we creating a clone? */
+	if (*vec == ZFS_IOC_CREATE && zc->zc_value[0] != '\0')
+		*vec = ZFS_IOC_CLONE;
+
+	if (cflag == ZFS_CMD_COMPAT_V15) {
+		switch (*vec) {
+
+		case 7: /* ZFS_IOC_POOL_SCRUB (v15) */
+			zc->zc_cookie = POOL_SCAN_SCRUB;
+			break;
+		}
+	}
+
+	return (error);
+}
+
+void
+zfs_ioctl_compat_post(zfs_cmd_t *zc, int vec, const int cflag)
+{
+	if (cflag == ZFS_CMD_COMPAT_V15) {
+		switch (vec) {
+		case ZFS_IOC_POOL_CONFIGS:
+		case ZFS_IOC_POOL_STATS:
+		case ZFS_IOC_POOL_TRYIMPORT:
+			zfs_ioctl_compat_fix_stats(zc, vec);
+			break;
+		case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */
+			zfs_ioctl_compat_pool_get_props(zc);
+			break;
+		}
+	}
+}
+
+nvlist_t *
+zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t *innvl, const int vec,
+    const int cflag)
+{
+	nvlist_t *nvl, *tmpnvl, *hnvl;
+	nvpair_t *elem;
+	char *poolname, *snapname;
+	int err;
+
+	if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC ||
+	    cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP ||
+	    cflag == ZFS_CMD_COMPAT_RESUME || cflag == ZFS_CMD_COMPAT_INLANES)
+		goto out;
+
+	switch (vec) {
+	case ZFS_IOC_CREATE:
+		nvl = fnvlist_alloc();
+		fnvlist_add_int32(nvl, "type", zc->zc_objset_type);
+		if (innvl != NULL) {
+			fnvlist_add_nvlist(nvl, "props", innvl);
+			nvlist_free(innvl);
+		}
+		return (nvl);
+	break;
+	case ZFS_IOC_CLONE:
+		nvl = fnvlist_alloc();
+		fnvlist_add_string(nvl, "origin", zc->zc_value);
+		if (innvl != NULL) {
+			fnvlist_add_nvlist(nvl, "props", innvl);
+			nvlist_free(innvl);
+		}
+		return (nvl);
+	break;
+	case ZFS_IOC_SNAPSHOT:
+		if (innvl == NULL)
+			goto out;
+		nvl = fnvlist_alloc();
+		fnvlist_add_nvlist(nvl, "props", innvl);
+		tmpnvl = fnvlist_alloc();
+		snapname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
+		fnvlist_add_boolean(tmpnvl, snapname);
+		kmem_free(snapname, strlen(snapname + 1));
+		/* check if we are doing a recursive snapshot */
+		if (zc->zc_cookie)
+			dmu_get_recursive_snaps_nvl(zc->zc_name, zc->zc_value,
+			    tmpnvl);
+		fnvlist_add_nvlist(nvl, "snaps", tmpnvl);
+		fnvlist_free(tmpnvl);
+		nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	case ZFS_IOC_SPACE_SNAPS:
+		nvl = fnvlist_alloc();
+		fnvlist_add_string(nvl, "firstsnap", zc->zc_value);
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		return (nvl);
+	break;
+	case ZFS_IOC_DESTROY_SNAPS:
+		if (innvl == NULL && cflag == ZFS_CMD_COMPAT_DEADMAN)
+			goto out;
+		nvl = fnvlist_alloc();
+		if (innvl != NULL) {
+			fnvlist_add_nvlist(nvl, "snaps", innvl);
+		} else {
+			/*
+			 * We are probably called by even older binaries,
+			 * allocate and populate nvlist with recursive
+			 * snapshots
+			 */
+			if (zfs_component_namecheck(zc->zc_value, NULL,
+			    NULL) == 0) {
+				tmpnvl = fnvlist_alloc();
+				if (dmu_get_recursive_snaps_nvl(zc->zc_name,
+				    zc->zc_value, tmpnvl) == 0)
+					fnvlist_add_nvlist(nvl, "snaps",
+					    tmpnvl);
+				nvlist_free(tmpnvl);
+			}
+		}
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	case ZFS_IOC_HOLD:
+		nvl = fnvlist_alloc();
+		tmpnvl = fnvlist_alloc();
+		if (zc->zc_cleanup_fd != -1)
+			fnvlist_add_int32(nvl, "cleanup_fd",
+			    (int32_t)zc->zc_cleanup_fd);
+		if (zc->zc_cookie) {
+			hnvl = fnvlist_alloc();
+			if (dmu_get_recursive_snaps_nvl(zc->zc_name,
+			    zc->zc_value, hnvl) == 0) {
+				elem = NULL;
+				while ((elem = nvlist_next_nvpair(hnvl,
+				    elem)) != NULL) {
+					nvlist_add_string(tmpnvl,
+					    nvpair_name(elem), zc->zc_string);
+				}
+			}
+			nvlist_free(hnvl);
+		} else {
+			snapname = kmem_asprintf("%s@%s", zc->zc_name,
+			    zc->zc_value);
+			nvlist_add_string(tmpnvl, snapname, zc->zc_string);
+			kmem_free(snapname, strlen(snapname + 1));
+		}
+		fnvlist_add_nvlist(nvl, "holds", tmpnvl);
+		nvlist_free(tmpnvl);
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	case ZFS_IOC_RELEASE:
+		nvl = fnvlist_alloc();
+		tmpnvl = fnvlist_alloc();
+		if (zc->zc_cookie) {
+			hnvl = fnvlist_alloc();
+			if (dmu_get_recursive_snaps_nvl(zc->zc_name,
+			    zc->zc_value, hnvl) == 0) {
+				elem = NULL;
+				while ((elem = nvlist_next_nvpair(hnvl,
+				    elem)) != NULL) {
+					fnvlist_add_boolean(tmpnvl,
+					    zc->zc_string);
+					fnvlist_add_nvlist(nvl,
+					    nvpair_name(elem), tmpnvl);
+				}
+			}
+			nvlist_free(hnvl);
+		} else {
+			snapname = kmem_asprintf("%s@%s", zc->zc_name,
+			    zc->zc_value);
+			fnvlist_add_boolean(tmpnvl, zc->zc_string);
+			fnvlist_add_nvlist(nvl, snapname, tmpnvl);
+			kmem_free(snapname, strlen(snapname + 1));
+		}
+		nvlist_free(tmpnvl);
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	}
+out:
+	return (innvl);
+}
+
+nvlist_t *
+zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t *outnvl, const int vec,
+    const int cflag)
+{
+	nvlist_t *tmpnvl;
+
+	if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC ||
+	    cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP ||
+	    cflag == ZFS_CMD_COMPAT_RESUME || cflag == ZFS_CMD_COMPAT_INLANES)
+		return (outnvl);
+
+	switch (vec) {
+	case ZFS_IOC_SPACE_SNAPS:
+		(void) nvlist_lookup_uint64(outnvl, "used", &zc->zc_cookie);
+		(void) nvlist_lookup_uint64(outnvl, "compressed",
+		    &zc->zc_objset_type);
+		(void) nvlist_lookup_uint64(outnvl, "uncompressed",
+		    &zc->zc_perm_action);
+		nvlist_free(outnvl);
+		/* return empty outnvl */
+		tmpnvl = fnvlist_alloc();
+		return (tmpnvl);
+	break;
+	case ZFS_IOC_CREATE:
+	case ZFS_IOC_CLONE:
+	case ZFS_IOC_HOLD:
+	case ZFS_IOC_RELEASE:
+		nvlist_free(outnvl);
+		/* return empty outnvl */
+		tmpnvl = fnvlist_alloc();
+		return (tmpnvl);
+	break;
+	}
+
+	return (outnvl);
+}
+#endif /* KERNEL */

diff --git a/zfs/lib/libzfs/os/freebsd/libzfs_zmount.c b/zfs/lib/libzfs/os/freebsd/libzfs_zmount.c
new file mode 100644
index 0000000..699d330
--- /dev/null
+++ b/zfs/lib/libzfs/os/freebsd/libzfs_zmount.c

@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file implements Solaris compatible zmount() function.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/uio.h>
+#include <sys/mntent.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mnttab.h>
+#include <sys/errno.h>
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+static void
+build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
+    size_t len)
+{
+	int i;
+
+	if (*iovlen < 0)
+		return;
+	i = *iovlen;
+	*iov = realloc(*iov, sizeof (**iov) * (i + 2));
+	if (*iov == NULL) {
+		*iovlen = -1;
+		return;
+	}
+	(*iov)[i].iov_base = strdup(name);
+	(*iov)[i].iov_len = strlen(name) + 1;
+	i++;
+	(*iov)[i].iov_base = val;
+	if (len == (size_t)-1) {
+		if (val != NULL)
+			len = strlen(val) + 1;
+		else
+			len = 0;
+	}
+	(*iov)[i].iov_len = (int)len;
+	*iovlen = ++i;
+}
+
+static int
+do_mount_(const char *spec, const char *dir, int mflag, char *fstype,
+    char *dataptr, int datalen, char *optptr, int optlen)
+{
+	struct iovec *iov;
+	char *optstr, *p, *tofree;
+	int iovlen, rv;
+
+	assert(spec != NULL);
+	assert(dir != NULL);
+	assert(fstype != NULL);
+	assert(strcmp(fstype, MNTTYPE_ZFS) == 0);
+	assert(dataptr == NULL);
+	assert(datalen == 0);
+	assert(optptr != NULL);
+	assert(optlen > 0);
+
+	tofree = optstr = strdup(optptr);
+	assert(optstr != NULL);
+
+	iov = NULL;
+	iovlen = 0;
+	if (strstr(optstr, MNTOPT_REMOUNT) != NULL)
+		build_iovec(&iov, &iovlen, "update", NULL, 0);
+	if (mflag & MS_RDONLY)
+		build_iovec(&iov, &iovlen, "ro", NULL, 0);
+	build_iovec(&iov, &iovlen, "fstype", fstype, (size_t)-1);
+	build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir),
+	    (size_t)-1);
+	build_iovec(&iov, &iovlen, "from", __DECONST(char *, spec), (size_t)-1);
+	while ((p = strsep(&optstr, ",/")) != NULL)
+		build_iovec(&iov, &iovlen, p, NULL, (size_t)-1);
+	rv = nmount(iov, iovlen, 0);
+	free(tofree);
+	if (rv < 0)
+		return (errno);
+	return (rv);
+}
+
+int
+do_mount(zfs_handle_t *zhp, const char *mntpt, char *opts, int flags)
+{
+
+	return (do_mount_(zfs_get_name(zhp), mntpt, flags, MNTTYPE_ZFS, NULL, 0,
+	    opts, sizeof (mntpt)));
+}
+
+int
+do_unmount(const char *mntpt, int flags)
+{
+	if (unmount(mntpt, flags) < 0)
+		return (errno);
+	return (0);
+}
+
+int
+zfs_mount_delegation_check(void)
+{
+	return (0);
+}

diff --git a/zfs/lib/libzfs/os/linux/libzfs_mount_os.c b/zfs/lib/libzfs/os/linux/libzfs_mount_os.c
new file mode 100644
index 0000000..b800e06
--- /dev/null
+++ b/zfs/lib/libzfs/os/linux/libzfs_mount_os.c

@@ -0,0 +1,413 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2021 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
+ * Copyright 2017 RackTop Systems.
+ * Copyright (c) 2018 Datto Inc.
+ * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
+ */
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/mntent.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/dsl_crypt.h>
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include <thread_pool.h>
+
+#define	ZS_COMMENT	0x00000000	/* comment */
+#define	ZS_ZFSUTIL	0x00000001	/* caller is zfs(8) */
+
+typedef struct option_map {
+	const char *name;
+	unsigned long mntmask;
+	unsigned long zfsmask;
+} option_map_t;
+
+static const option_map_t option_map[] = {
+	/* Canonicalized filesystem independent options from mount(8) */
+	{ MNTOPT_NOAUTO,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_DEFAULTS,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_NODEVICES,	MS_NODEV,	ZS_COMMENT	},
+	{ MNTOPT_DEVICES,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_DIRSYNC,	MS_DIRSYNC,	ZS_COMMENT	},
+	{ MNTOPT_NOEXEC,	MS_NOEXEC,	ZS_COMMENT	},
+	{ MNTOPT_EXEC,		MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_GROUP,		MS_GROUP,	ZS_COMMENT	},
+	{ MNTOPT_NETDEV,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_NOFAIL,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_NOSUID,	MS_NOSUID,	ZS_COMMENT	},
+	{ MNTOPT_SUID,		MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_OWNER,		MS_OWNER,	ZS_COMMENT	},
+	{ MNTOPT_REMOUNT,	MS_REMOUNT,	ZS_COMMENT	},
+	{ MNTOPT_RO,		MS_RDONLY,	ZS_COMMENT	},
+	{ MNTOPT_RW,		MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_SYNC,		MS_SYNCHRONOUS,	ZS_COMMENT	},
+	{ MNTOPT_USER,		MS_USERS,	ZS_COMMENT	},
+	{ MNTOPT_USERS,		MS_USERS,	ZS_COMMENT	},
+	/* acl flags passed with util-linux-2.24 mount command */
+	{ MNTOPT_ACL,		MS_POSIXACL,	ZS_COMMENT	},
+	{ MNTOPT_NOACL,		MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_POSIXACL,	MS_POSIXACL,	ZS_COMMENT	},
+#ifdef MS_NOATIME
+	{ MNTOPT_NOATIME,	MS_NOATIME,	ZS_COMMENT	},
+	{ MNTOPT_ATIME,		MS_COMMENT,	ZS_COMMENT	},
+#endif
+#ifdef MS_NODIRATIME
+	{ MNTOPT_NODIRATIME,	MS_NODIRATIME,	ZS_COMMENT	},
+	{ MNTOPT_DIRATIME,	MS_COMMENT,	ZS_COMMENT	},
+#endif
+#ifdef MS_RELATIME
+	{ MNTOPT_RELATIME,	MS_RELATIME,	ZS_COMMENT	},
+	{ MNTOPT_NORELATIME,	MS_COMMENT,	ZS_COMMENT	},
+#endif
+#ifdef MS_STRICTATIME
+	{ MNTOPT_STRICTATIME,	MS_STRICTATIME,	ZS_COMMENT	},
+	{ MNTOPT_NOSTRICTATIME,	MS_COMMENT,	ZS_COMMENT	},
+#endif
+#ifdef MS_LAZYTIME
+	{ MNTOPT_LAZYTIME,	MS_LAZYTIME,	ZS_COMMENT	},
+#endif
+	{ MNTOPT_CONTEXT,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_FSCONTEXT,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_DEFCONTEXT,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_ROOTCONTEXT,	MS_COMMENT,	ZS_COMMENT	},
+#ifdef MS_I_VERSION
+	{ MNTOPT_IVERSION,	MS_I_VERSION,	ZS_COMMENT	},
+#endif
+#ifdef MS_MANDLOCK
+	{ MNTOPT_NBMAND,	MS_MANDLOCK,	ZS_COMMENT	},
+	{ MNTOPT_NONBMAND,	MS_COMMENT,	ZS_COMMENT	},
+#endif
+	/* Valid options not found in mount(8) */
+	{ MNTOPT_BIND,		MS_BIND,	ZS_COMMENT	},
+#ifdef MS_REC
+	{ MNTOPT_RBIND,		MS_BIND|MS_REC,	ZS_COMMENT	},
+#endif
+	{ MNTOPT_COMMENT,	MS_COMMENT,	ZS_COMMENT	},
+#ifdef MS_NOSUB
+	{ MNTOPT_NOSUB,		MS_NOSUB,	ZS_COMMENT	},
+#endif
+#ifdef MS_SILENT
+	{ MNTOPT_QUIET,		MS_SILENT,	ZS_COMMENT	},
+#endif
+	/* Custom zfs options */
+	{ MNTOPT_XATTR,		MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_NOXATTR,	MS_COMMENT,	ZS_COMMENT	},
+	{ MNTOPT_ZFSUTIL,	MS_COMMENT,	ZS_ZFSUTIL	},
+	{ NULL,			0,		0		} };
+
+/*
+ * Break the mount option in to a name/value pair.  The name is
+ * validated against the option map and mount flags set accordingly.
+ */
+static int
+parse_option(char *mntopt, unsigned long *mntflags,
+    unsigned long *zfsflags, int sloppy)
+{
+	const option_map_t *opt;
+	char *ptr, *name, *value = NULL;
+	int error = 0;
+
+	name = strdup(mntopt);
+	if (name == NULL)
+		return (ENOMEM);
+
+	for (ptr = name; ptr && *ptr; ptr++) {
+		if (*ptr == '=') {
+			*ptr = '\0';
+			value = ptr+1;
+			VERIFY3P(value, !=, NULL);
+			break;
+		}
+	}
+
+	for (opt = option_map; opt->name != NULL; opt++) {
+		if (strncmp(name, opt->name, strlen(name)) == 0) {
+			*mntflags |= opt->mntmask;
+			*zfsflags |= opt->zfsmask;
+			error = 0;
+			goto out;
+		}
+	}
+
+	if (!sloppy)
+		error = ENOENT;
+out:
+	/* If required further process on the value may be done here */
+	free(name);
+	return (error);
+}
+
+/*
+ * Translate the mount option string in to MS_* mount flags for the
+ * kernel vfs.  When sloppy is non-zero unknown options will be ignored
+ * otherwise they are considered fatal are copied in to badopt.
+ */
+int
+zfs_parse_mount_options(char *mntopts, unsigned long *mntflags,
+    unsigned long *zfsflags, int sloppy, char *badopt, char *mtabopt)
+{
+	int error = 0, quote = 0, flag = 0, count = 0;
+	char *ptr, *opt, *opts;
+
+	opts = strdup(mntopts);
+	if (opts == NULL)
+		return (ENOMEM);
+
+	*mntflags = 0;
+	opt = NULL;
+
+	/*
+	 * Scan through all mount options which must be comma delimited.
+	 * We must be careful to notice regions which are double quoted
+	 * and skip commas in these regions.  Each option is then checked
+	 * to determine if it is a known option.
+	 */
+	for (ptr = opts; ptr && !flag; ptr++) {
+		if (opt == NULL)
+			opt = ptr;
+
+		if (*ptr == '"')
+			quote = !quote;
+
+		if (quote)
+			continue;
+
+		if (*ptr == '\0')
+			flag = 1;
+
+		if ((*ptr == ',') || (*ptr == '\0')) {
+			*ptr = '\0';
+
+			error = parse_option(opt, mntflags, zfsflags, sloppy);
+			if (error) {
+				strcpy(badopt, opt);
+				goto out;
+
+			}
+
+			if (!(*mntflags & MS_REMOUNT) &&
+			    !(*zfsflags & ZS_ZFSUTIL) &&
+			    mtabopt != NULL) {
+				if (count > 0)
+					strlcat(mtabopt, ",", MNT_LINE_MAX);
+
+				strlcat(mtabopt, opt, MNT_LINE_MAX);
+				count++;
+			}
+
+			opt = NULL;
+		}
+	}
+
+out:
+	free(opts);
+	return (error);
+}
+
+static void
+append_mntopt(const char *name, const char *val, char *mntopts,
+    char *mtabopt, boolean_t quote)
+{
+	char tmp[MNT_LINE_MAX];
+
+	snprintf(tmp, MNT_LINE_MAX, quote ? ",%s=\"%s\"" : ",%s=%s", name, val);
+
+	if (mntopts)
+		strlcat(mntopts, tmp, MNT_LINE_MAX);
+
+	if (mtabopt)
+		strlcat(mtabopt, tmp, MNT_LINE_MAX);
+}
+
+static void
+zfs_selinux_setcontext(zfs_handle_t *zhp, zfs_prop_t zpt, const char *name,
+    char *mntopts, char *mtabopt)
+{
+	char context[ZFS_MAXPROPLEN];
+
+	if (zfs_prop_get(zhp, zpt, context, sizeof (context),
+	    NULL, NULL, 0, B_FALSE) == 0) {
+		if (strcmp(context, "none") != 0)
+			append_mntopt(name, context, mntopts, mtabopt, B_TRUE);
+	}
+}
+
+void
+zfs_adjust_mount_options(zfs_handle_t *zhp, const char *mntpoint,
+    char *mntopts, char *mtabopt)
+{
+	char prop[ZFS_MAXPROPLEN];
+
+	/*
+	 * Checks to see if the ZFS_PROP_SELINUX_CONTEXT exists
+	 * if it does, create a tmp variable in case it's needed
+	 * checks to see if the selinux context is set to the default
+	 * if it is, allow the setting of the other context properties
+	 * this is needed because the 'context' property overrides others
+	 * if it is not the default, set the 'context' property
+	 */
+	if (zfs_prop_get(zhp, ZFS_PROP_SELINUX_CONTEXT, prop, sizeof (prop),
+	    NULL, NULL, 0, B_FALSE) == 0) {
+		if (strcmp(prop, "none") == 0) {
+			zfs_selinux_setcontext(zhp, ZFS_PROP_SELINUX_FSCONTEXT,
+			    MNTOPT_FSCONTEXT, mntopts, mtabopt);
+			zfs_selinux_setcontext(zhp, ZFS_PROP_SELINUX_DEFCONTEXT,
+			    MNTOPT_DEFCONTEXT, mntopts, mtabopt);
+			zfs_selinux_setcontext(zhp,
+			    ZFS_PROP_SELINUX_ROOTCONTEXT, MNTOPT_ROOTCONTEXT,
+			    mntopts, mtabopt);
+		} else {
+			append_mntopt(MNTOPT_CONTEXT, prop,
+			    mntopts, mtabopt, B_TRUE);
+		}
+	}
+
+	/* A hint used to determine an auto-mounted snapshot mount point */
+	append_mntopt(MNTOPT_MNTPOINT, mntpoint, mntopts, NULL, B_FALSE);
+}
+
+/*
+ * By default the filesystem by preparing the mount options (i.e. parsing
+ * some flags from the "opts" parameter into the "flags" parameter) and then
+ * directly calling the system call mount(2). We don't need the mount utility
+ * or update /etc/mtab, because this is a symlink on all modern systems.
+ *
+ * If the environment variable ZFS_MOUNT_HELPER is set, we fall back to the
+ * previous behavior:
+ * The filesystem is mounted by invoking the system mount utility rather
+ * than by the system call mount(2).  This ensures that the /etc/mtab
+ * file is correctly locked for the update.  Performing our own locking
+ * and /etc/mtab update requires making an unsafe assumption about how
+ * the mount utility performs its locking.  Unfortunately, this also means
+ * in the case of a mount failure we do not have the exact errno.  We must
+ * make due with return value from the mount process.
+ */
+int
+do_mount(zfs_handle_t *zhp, const char *mntpt, char *opts, int flags)
+{
+	const char *src = zfs_get_name(zhp);
+	int error = 0;
+
+	if (!libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
+		char badopt[MNT_LINE_MAX] = {0};
+		unsigned long mntflags = flags, zfsflags = 0;
+		char myopts[MNT_LINE_MAX] = {0};
+
+		if (zfs_parse_mount_options(opts, &mntflags,
+		    &zfsflags, 0, badopt, NULL)) {
+			return (EINVAL);
+		}
+		strlcat(myopts, opts, MNT_LINE_MAX);
+		zfs_adjust_mount_options(zhp, mntpt, myopts, NULL);
+		if (mount(src, mntpt, MNTTYPE_ZFS, mntflags, myopts)) {
+			return (errno);
+		}
+	} else {
+		char *argv[9] = {
+		    "/bin/mount",
+		    "--no-canonicalize",
+		    "-t", MNTTYPE_ZFS,
+		    "-o", opts,
+		    (char *)src,
+		    (char *)mntpt,
+		    (char *)NULL };
+
+		/* Return only the most critical mount error */
+		error = libzfs_run_process(argv[0], argv,
+		    STDOUT_VERBOSE|STDERR_VERBOSE);
+		if (error) {
+			if (error & MOUNT_FILEIO) {
+				error = EIO;
+			} else if (error & MOUNT_USER) {
+				error = EINTR;
+			} else if (error & MOUNT_SOFTWARE) {
+				error = EPIPE;
+			} else if (error & MOUNT_BUSY) {
+				error = EBUSY;
+			} else if (error & MOUNT_SYSERR) {
+				error = EAGAIN;
+			} else if (error & MOUNT_USAGE) {
+				error = EINVAL;
+			} else
+				error = ENXIO; /* Generic error */
+		}
+	}
+
+	return (error);
+}
+
+int
+do_unmount(const char *mntpt, int flags)
+{
+	if (!libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
+		int rv = umount2(mntpt, flags);
+
+		return (rv < 0 ? errno : 0);
+	}
+
+	char force_opt[] = "-f";
+	char lazy_opt[] = "-l";
+	char *argv[7] = {
+	    "/bin/umount",
+	    "-t", MNTTYPE_ZFS,
+	    NULL, NULL, NULL, NULL };
+	int rc, count = 3;
+
+	if (flags & MS_FORCE) {
+		argv[count] = force_opt;
+		count++;
+	}
+
+	if (flags & MS_DETACH) {
+		argv[count] = lazy_opt;
+		count++;
+	}
+
+	argv[count] = (char *)mntpt;
+	rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE);
+
+	return (rc ? EINVAL : 0);
+}
+
+int
+zfs_mount_delegation_check(void)
+{
+	return ((geteuid() != 0) ? EACCES : 0);
+}

diff --git a/zfs/lib/libzfs/os/linux/libzfs_pool_os.c b/zfs/lib/libzfs/os/linux/libzfs_pool_os.c
new file mode 100644
index 0000000..747b565
--- /dev/null
+++ b/zfs/lib/libzfs/os/linux/libzfs_pool_os.c

@@ -0,0 +1,342 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
+ * Copyright (c) 2018 Datto Inc.
+ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
+ */
+
+#include <errno.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <zone.h>
+#include <sys/stat.h>
+#include <sys/efi_partition.h>
+#include <sys/systeminfo.h>
+#include <sys/vtoc.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/vdev_disk.h>
+#include <dlfcn.h>
+#include <libzutil.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+#include "zfs_comutil.h"
+#include "zfeature_common.h"
+
+/*
+ * If the device has being dynamically expanded then we need to relabel
+ * the disk to use the new unallocated space.
+ */
+int
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
+{
+	int fd, error;
+
+	if ((fd = open(path, O_RDWR|O_DIRECT|O_CLOEXEC)) < 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "relabel '%s': unable to open device: %d"), path, errno);
+		return (zfs_error(hdl, EZFS_OPENFAILED, msg));
+	}
+
+	/*
+	 * It's possible that we might encounter an error if the device
+	 * does not have any unallocated space left. If so, we simply
+	 * ignore that error and continue on.
+	 */
+	error = efi_use_whole_disk(fd);
+
+	/* Flush the buffers to disk and invalidate the page cache. */
+	(void) fsync(fd);
+	(void) ioctl(fd, BLKFLSBUF);
+
+	(void) close(fd);
+	if (error && error != VT_ENOSPC) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "relabel '%s': unable to read disk capacity"), path);
+		return (zfs_error(hdl, EZFS_NOCAP, msg));
+	}
+	return (0);
+}
+
+/*
+ * Read the EFI label from the config, if a label does not exist then
+ * pass back the error to the caller. If the caller has passed a non-NULL
+ * diskaddr argument then we set it to the starting address of the EFI
+ * partition.
+ */
+static int
+read_efi_label(nvlist_t *config, diskaddr_t *sb)
+{
+	char *path;
+	int fd;
+	char diskname[MAXPATHLEN];
+	int err = -1;
+
+	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
+		return (err);
+
+	(void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
+	    strrchr(path, '/'));
+	if ((fd = open(diskname, O_RDONLY|O_DIRECT|O_CLOEXEC)) >= 0) {
+		struct dk_gpt *vtoc;
+
+		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
+			if (sb != NULL)
+				*sb = vtoc->efi_parts[0].p_start;
+			efi_free(vtoc);
+		}
+		(void) close(fd);
+	}
+	return (err);
+}
+
+/*
+ * determine where a partition starts on a disk in the current
+ * configuration
+ */
+static diskaddr_t
+find_start_block(nvlist_t *config)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	diskaddr_t sb = MAXOFFSET_T;
+	uint64_t wholedisk;
+
+	if (nvlist_lookup_nvlist_array(config,
+	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
+		if (nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk) != 0 || !wholedisk) {
+			return (MAXOFFSET_T);
+		}
+		if (read_efi_label(config, &sb) < 0)
+			sb = MAXOFFSET_T;
+		return (sb);
+	}
+
+	for (c = 0; c < children; c++) {
+		sb = find_start_block(child[c]);
+		if (sb != MAXOFFSET_T) {
+			return (sb);
+		}
+	}
+	return (MAXOFFSET_T);
+}
+
+static int
+zpool_label_disk_check(char *path)
+{
+	struct dk_gpt *vtoc;
+	int fd, err;
+
+	if ((fd = open(path, O_RDONLY|O_DIRECT|O_CLOEXEC)) < 0)
+		return (errno);
+
+	if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+		(void) close(fd);
+		return (err);
+	}
+
+	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+		efi_free(vtoc);
+		(void) close(fd);
+		return (EIDRM);
+	}
+
+	efi_free(vtoc);
+	(void) close(fd);
+	return (0);
+}
+
+/*
+ * Generate a unique partition name for the ZFS member.  Partitions must
+ * have unique names to ensure udev will be able to create symlinks under
+ * /dev/disk/by-partlabel/ for all pool members.  The partition names are
+ * of the form <pool>-<unique-id>.
+ */
+static void
+zpool_label_name(char *label_name, int label_size)
+{
+	uint64_t id = 0;
+	int fd;
+
+	fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
+	if (fd >= 0) {
+		if (read(fd, &id, sizeof (id)) != sizeof (id))
+			id = 0;
+
+		close(fd);
+	}
+
+	if (id == 0)
+		id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
+
+	snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
+}
+
+/*
+ * Label an individual disk.  The name provided is the short name,
+ * stripped of any leading /dev path.
+ */
+int
+zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
+{
+	char path[MAXPATHLEN];
+	struct dk_gpt *vtoc;
+	int rval, fd;
+	size_t resv = EFI_MIN_RESV_SIZE;
+	uint64_t slice_size;
+	diskaddr_t start_block;
+	char errbuf[1024];
+
+	/* prepare an error message just in case */
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
+
+	if (zhp) {
+		nvlist_t *nvroot;
+
+		verify(nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+
+		if (zhp->zpool_start_block == 0)
+			start_block = find_start_block(nvroot);
+		else
+			start_block = zhp->zpool_start_block;
+		zhp->zpool_start_block = start_block;
+	} else {
+		/* new pool */
+		start_block = NEW_START_BLOCK;
+	}
+
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+
+	if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL|O_CLOEXEC)) < 0) {
+		/*
+		 * This shouldn't happen.  We've long since verified that this
+		 * is a valid device.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "label '%s': unable to open device: %d"), path, errno);
+		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+	}
+
+	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
+		/*
+		 * The only way this can fail is if we run out of memory, or we
+		 * were unable to read the disk's capacity
+		 */
+		if (errno == ENOMEM)
+			(void) no_memory(hdl);
+
+		(void) close(fd);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "label '%s': unable to read disk capacity"), path);
+
+		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+	}
+
+	slice_size = vtoc->efi_last_u_lba + 1;
+	slice_size -= EFI_MIN_RESV_SIZE;
+	if (start_block == MAXOFFSET_T)
+		start_block = NEW_START_BLOCK;
+	slice_size -= start_block;
+	slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
+
+	vtoc->efi_parts[0].p_start = start_block;
+	vtoc->efi_parts[0].p_size = slice_size;
+
+	/*
+	 * Why we use V_USR: V_BACKUP confuses users, and is considered
+	 * disposable by some EFI utilities (since EFI doesn't have a backup
+	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
+	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
+	 * etc. were all pretty specific.  V_USR is as close to reality as we
+	 * can get, in the absence of V_OTHER.
+	 */
+	vtoc->efi_parts[0].p_tag = V_USR;
+	zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
+
+	vtoc->efi_parts[8].p_start = slice_size + start_block;
+	vtoc->efi_parts[8].p_size = resv;
+	vtoc->efi_parts[8].p_tag = V_RESERVED;
+
+	rval = efi_write(fd, vtoc);
+
+	/* Flush the buffers to disk and invalidate the page cache. */
+	(void) fsync(fd);
+	(void) ioctl(fd, BLKFLSBUF);
+
+	if (rval == 0)
+		rval = efi_rescan(fd);
+
+	/*
+	 * Some block drivers (like pcata) may not support EFI GPT labels.
+	 * Print out a helpful error message directing the user to manually
+	 * label the disk and give a specific slice.
+	 */
+	if (rval != 0) {
+		(void) close(fd);
+		efi_free(vtoc);
+
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
+		    "parted(8) and then provide a specific slice: %d"), rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+
+	(void) close(fd);
+	efi_free(vtoc);
+
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+	(void) zfs_append_partition(path, MAXPATHLEN);
+
+	/* Wait to udev to signal use the device has settled. */
+	rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
+	if (rval) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
+		    "detect device partitions on '%s': %d"), path, rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+
+	/* We can't be to paranoid.  Read the label back and verify it. */
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+	rval = zpool_label_disk_check(path);
+	if (rval) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
+		    "EFI label on '%s' is damaged.  Ensure\nthis device "
+		    "is not in use, and is functioning properly: %d"),
+		    path, rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+	return (0);
+}

diff --git a/zfs/lib/libzfs/os/linux/libzfs_sendrecv_os.c b/zfs/lib/libzfs/os/linux/libzfs_sendrecv_os.c
new file mode 100644
index 0000000..8fc02ff
--- /dev/null
+++ b/zfs/lib/libzfs/os/linux/libzfs_sendrecv_os.c

@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+#ifndef F_SETPIPE_SZ
+#define	F_SETPIPE_SZ (F_SETLEASE + 7)
+#endif /* F_SETPIPE_SZ */
+
+#ifndef F_GETPIPE_SZ
+#define	F_GETPIPE_SZ (F_GETLEASE + 7)
+#endif /* F_GETPIPE_SZ */
+
+void
+libzfs_set_pipe_max(int infd)
+{
+#if __linux__
+	/*
+	 * Sadly, Linux has an unfixed deadlock if you do SETPIPE_SZ on a pipe
+	 * with data in it.
+	 * cf. #13232, https://bugzilla.kernel.org/show_bug.cgi?id=212295
+	 *
+	 * And since the problem is in waking up the writer, there's nothing
+	 * we can do about it from here.
+	 *
+	 * So if people want to, they can set this, but they
+	 * may regret it...
+	 */
+	if (getenv("ZFS_SET_PIPE_MAX") == NULL)
+		return;
+#endif
+
+	FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re");
+
+	if (procf != NULL) {
+		unsigned long max_psize;
+		long cur_psize;
+		if (fscanf(procf, "%lu", &max_psize) > 0) {
+			cur_psize = fcntl(infd, F_GETPIPE_SZ);
+			if (cur_psize > 0 &&
+			    max_psize > (unsigned long) cur_psize)
+				fcntl(infd, F_SETPIPE_SZ,
+				    max_psize);
+		}
+		fclose(procf);
+	}
+}

diff --git a/zfs/lib/libzfs/os/linux/libzfs_util_os.c b/zfs/lib/libzfs/os/linux/libzfs_util_os.c
new file mode 100644
index 0000000..e2482c5
--- /dev/null
+++ b/zfs/lib/libzfs/os/linux/libzfs_util_os.c

@@ -0,0 +1,215 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <math.h>
+#include <sys/stat.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <libzfs.h>
+#include <libzfs_core.h>
+
+#include "libzfs_impl.h"
+#include "zfs_prop.h"
+#include <libzutil.h>
+#include <sys/zfs_sysfs.h>
+
+#define	ZDIFF_SHARESDIR		"/.zfs/shares/"
+
+int
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
+{
+	return (ioctl(hdl->libzfs_fd, request, zc));
+}
+
+const char *
+libzfs_error_init(int error)
+{
+	switch (error) {
+	case ENXIO:
+		return (dgettext(TEXT_DOMAIN, "The ZFS modules are not "
+		    "loaded.\nTry running '/sbin/modprobe zfs' as root "
+		    "to load them."));
+	case ENOENT:
+		return (dgettext(TEXT_DOMAIN, "/dev/zfs and /proc/self/mounts "
+		    "are required.\nTry running 'udevadm trigger' and 'mount "
+		    "-t proc proc /proc' as root."));
+	case ENOEXEC:
+		return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be "
+		    "auto-loaded.\nTry running '/sbin/modprobe zfs' as "
+		    "root to manually load them."));
+	case EACCES:
+		return (dgettext(TEXT_DOMAIN, "Permission denied the "
+		    "ZFS utilities must be run as root."));
+	default:
+		return (dgettext(TEXT_DOMAIN, "Failed to initialize the "
+		    "libzfs library."));
+	}
+}
+
+static int
+libzfs_module_loaded(const char *module)
+{
+	const char path_prefix[] = "/sys/module/";
+	char path[256];
+
+	memcpy(path, path_prefix, sizeof (path_prefix) - 1);
+	strcpy(path + sizeof (path_prefix) - 1, module);
+
+	return (access(path, F_OK) == 0);
+}
+
+/*
+ * Verify the required ZFS_DEV device is available and optionally attempt
+ * to load the ZFS modules.  Under normal circumstances the modules
+ * should already have been loaded by some external mechanism.
+ *
+ * Environment variables:
+ * - ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules.
+ * - ZFS_MODULE_TIMEOUT="<seconds>"     - Seconds to wait for ZFS_DEV
+ */
+static int
+libzfs_load_module_impl(const char *module)
+{
+	char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0};
+	char *load_str, *timeout_str;
+	long timeout = 10; /* seconds */
+	long busy_timeout = 10; /* milliseconds */
+	int load = 0, fd;
+	hrtime_t start;
+
+	/* Optionally request module loading */
+	if (!libzfs_module_loaded(module)) {
+		load_str = getenv("ZFS_MODULE_LOADING");
+		if (load_str) {
+			if (!strncasecmp(load_str, "YES", strlen("YES")) ||
+			    !strncasecmp(load_str, "ON", strlen("ON")))
+				load = 1;
+			else
+				load = 0;
+		}
+
+		if (load) {
+			if (libzfs_run_process("/sbin/modprobe", argv, 0))
+				return (ENOEXEC);
+		}
+
+		if (!libzfs_module_loaded(module))
+			return (ENXIO);
+	}
+
+	/*
+	 * Device creation by udev is asynchronous and waiting may be
+	 * required.  Busy wait for 10ms and then fall back to polling every
+	 * 10ms for the allowed timeout (default 10s, max 10m).  This is
+	 * done to optimize for the common case where the device is
+	 * immediately available and to avoid penalizing the possible
+	 * case where udev is slow or unable to create the device.
+	 */
+	timeout_str = getenv("ZFS_MODULE_TIMEOUT");
+	if (timeout_str) {
+		timeout = strtol(timeout_str, NULL, 0);
+		timeout = MAX(MIN(timeout, (10 * 60)), 0); /* 0 <= N <= 600 */
+	}
+
+	start = gethrtime();
+	do {
+		fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
+		if (fd >= 0) {
+			(void) close(fd);
+			return (0);
+		} else if (errno != ENOENT) {
+			return (errno);
+		} else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) {
+			sched_yield();
+		} else {
+			usleep(10 * MILLISEC);
+		}
+	} while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC));
+
+	return (ENOENT);
+}
+
+int
+libzfs_load_module(void)
+{
+	return (libzfs_load_module_impl(ZFS_DRIVER));
+}
+
+int
+find_shares_object(differ_info_t *di)
+{
+	char fullpath[MAXPATHLEN];
+	struct stat64 sb = { 0 };
+
+	(void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN);
+	(void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN);
+
+	if (stat64(fullpath, &sb) != 0) {
+		(void) snprintf(di->errbuf, sizeof (di->errbuf),
+		    dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath);
+		return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf));
+	}
+
+	di->shares = (uint64_t)sb.st_ino;
+	return (0);
+}
+
+/*
+ * Fill given version buffer with zfs kernel version read from ZFS_SYSFS_DIR
+ * Returns 0 on success, and -1 on error (with errno set)
+ */
+int
+zfs_version_kernel(char *version, int len)
+{
+	int _errno;
+	int fd;
+	int rlen;
+
+	if ((fd = open(ZFS_SYSFS_DIR "/version", O_RDONLY | O_CLOEXEC)) == -1)
+		return (-1);
+
+	if ((rlen = read(fd, version, len)) == -1) {
+		version[0] = '\0';
+		_errno = errno;
+		(void) close(fd);
+		errno = _errno;
+		return (-1);
+	}
+
+	version[rlen-1] = '\0';  /* discard '\n' */
+
+	if (close(fd) == -1)
+		return (-1);
+
+	return (0);
+}

diff --git a/zfs/lib/libzfs_core/.gitignore b/zfs/lib/libzfs_core/.gitignore
new file mode 100644
index 0000000..c428d63
--- /dev/null
+++ b/zfs/lib/libzfs_core/.gitignore

@@ -0,0 +1 @@
+/libzfs_core.pc

diff --git a/zfs/lib/libzfs_core/Makefile.am b/zfs/lib/libzfs_core/Makefile.am
index 421b8b4..33a889a 100644
--- a/zfs/lib/libzfs_core/Makefile.am
+++ b/zfs/lib/libzfs_core/Makefile.am

@@ -1,20 +1,38 @@
 include $(top_srcdir)/config/Rules.am
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+pkgconfig_DATA = libzfs_core.pc
+
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 lib_LTLIBRARIES = libzfs_core.la
 
+include $(top_srcdir)/config/Abigail.am
+
 USER_C = \
 	libzfs_core.c
 
-nodist_libzfs_core_la_SOURCES = $(USER_C)
+libzfs_core_la_SOURCES = $(USER_C)
 
 libzfs_core_la_LIBADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libuutil/libuutil.la
+	$(abs_top_builddir)/lib/libzutil/libzutil.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
 
-libzfs_core_la_LDFLAGS = -version-info 1:0:0
+libzfs_core_la_LIBADD += $(LTLIBINTL)
 
-EXTRA_DIST = $(USER_C)
+libzfs_core_la_LDFLAGS = -pthread
+
+if !ASAN_ENABLED
+libzfs_core_la_LDFLAGS += -Wl,-z,defs
+endif
+
+if BUILD_FREEBSD
+libzfs_core_la_LIBADD += -lutil -lgeom
+endif
+
+libzfs_core_la_LDFLAGS += -version-info 3:0:0
+
+include $(top_srcdir)/config/CppCheck.am
+
+# Library ABI
+EXTRA_DIST = libzfs_core.abi libzfs_core.suppr

diff --git a/zfs/lib/libzfs_core/libzfs_core.abi b/zfs/lib/libzfs_core/libzfs_core.abi
new file mode 100644
index 0000000..7ede3e0
--- /dev/null
+++ b/zfs/lib/libzfs_core/libzfs_core.abi

@@ -0,0 +1,4512 @@
+<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs_core.so.3'>
+  <elf-needed>
+    <dependency name='libuuid.so.1'/>
+    <dependency name='libz.so.1'/>
+    <dependency name='libm.so.6'/>
+    <dependency name='libblkid.so.1'/>
+    <dependency name='libudev.so.1'/>
+    <dependency name='libnvpair.so.3'/>
+    <dependency name='libtirpc.so.3'/>
+    <dependency name='libc.so.6'/>
+    <dependency name='ld-linux-x86-64.so.2'/>
+  </elf-needed>
+  <elf-function-symbols>
+    <elf-symbol name='_sol_getmntent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_char' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_char_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_int_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_long' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_long_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_ptr_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_short' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_add_short_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_and_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_cas_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_clear_long_excl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_dec_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_inc_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uchar_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_uint_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ulong_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_or_ushort_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_set_long_excl' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_16_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_32_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_64_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_8_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_char' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_char_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_int_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_long' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_long_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_ptr_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_short' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_sub_short_nv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_16' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_32' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_8' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_ptr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_uchar' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_uint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_ulong' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='atomic_swap_ushort' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_add' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_destroy_nodes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_find' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_first' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_insert' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_insert_here' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_is_empty' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_last' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_nearest' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_numnodes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_swap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_update' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_update_gt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_update_lt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='avl_walk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_alloc_and_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_alloc_and_read' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_auto_sense' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_err_check' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_rescan' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_type' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_use_whole_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_write' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='for_each_vdev_cb' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='for_each_vdev_in_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getextmntent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getmntany' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='getzoneid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='is_mpath_whole_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='label_paths' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libspl_assertf' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_core_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='libzfs_core_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_head' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_after' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_before' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_head' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_insert_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_is_empty' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_link_active' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_link_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_link_replace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_move_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_next' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_prev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_remove_head' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_remove_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='list_tail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_bookmark' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_change_key' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_channel_program' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_channel_program_nosync' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_clone' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_destroy_bookmarks' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_destroy_snaps' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_exists' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_get_bookmark_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_get_bookmarks' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_get_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_get_holds' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_hold' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_initialize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_load_key' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_pool_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_pool_checkpoint_discard' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_promote' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_receive' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_receive_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_receive_resumable' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_receive_with_cmdprops' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_receive_with_header' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_redact' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_release' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_rename' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_reopen' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_rollback' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_rollback_to' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_send' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_send_redacted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_send_resume' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_send_resume_redacted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_send_space' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_send_space_resume_redacted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_set_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_snaprange_space' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_snapshot' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_sync' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_trim' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_unload_key' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_wait_fs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzc_wait_tag' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_consumer' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_enter' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_exit' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='membar_producer' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='mkdirp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='print_timestamp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='slice_cache_compare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='spl_pagesize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='strlcat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='strlcpy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_abandon' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_dispatch' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_member' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_resume' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_suspend' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_suspended' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='tpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='update_vdev_config_dev_strs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='update_vdevs_config_dev_sysfs_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_append_partition' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_dev_flush' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_dev_is_dm' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_dev_is_whole_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_device_get_devid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_device_get_physical' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_enclosure_sysfs_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_get_underlying_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_ioctl_fd' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_isnumber' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_nicebytes' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_nicenum' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_nicenum_format' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_niceraw' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_nicetime' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_resolve_shortname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_setproctitle' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_setproctitle_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_strcmp_pathname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_strip_partition' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_strip_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_default_search_paths' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_dump_ddt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_find_config' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_find_import_blkid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_history_unpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_label_disk_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_open_func' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_read_label' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_search_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zutil_alloc' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zutil_strdup' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-function-symbols>
+  <elf-variable-symbols>
+    <elf-symbol name='aok' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='buf' size='4110' type='tls-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='default_vtoc_map' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='efi_debug' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='pagesize' size='8' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-variable-symbols>
+  <abi-instr address-size='64' path='../../module/avl/avl.c' language='LANG_C99'>
+    <function-decl name='avl_last' mangled-name='avl_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_last'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_nearest' mangled-name='avl_nearest' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_nearest'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <parameter type-id='fba6cb51' name='where'/>
+      <parameter type-id='95e97e5e' name='direction'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_insert_here' mangled-name='avl_insert_here' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_insert_here'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <parameter type-id='eaa32e2f' name='new_data'/>
+      <parameter type-id='eaa32e2f' name='here'/>
+      <parameter type-id='95e97e5e' name='direction'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_add' mangled-name='avl_add' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_add'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <parameter type-id='eaa32e2f' name='new_node'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_remove' mangled-name='avl_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_remove'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_update_lt' mangled-name='avl_update_lt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_update_lt'>
+      <parameter type-id='a3681dea' name='t'/>
+      <parameter type-id='eaa32e2f' name='obj'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='avl_update_gt' mangled-name='avl_update_gt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_update_gt'>
+      <parameter type-id='a3681dea' name='t'/>
+      <parameter type-id='eaa32e2f' name='obj'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='avl_update' mangled-name='avl_update' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_update'>
+      <parameter type-id='a3681dea' name='t'/>
+      <parameter type-id='eaa32e2f' name='obj'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='avl_swap' mangled-name='avl_swap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_swap'>
+      <parameter type-id='a3681dea' name='tree1'/>
+      <parameter type-id='a3681dea' name='tree2'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_numnodes' mangled-name='avl_numnodes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_numnodes'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='avl_is_empty' mangled-name='avl_is_empty' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_is_empty'>
+      <parameter type-id='a3681dea' name='tree'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='rdwr_efi.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='72d5edd1' size-in-bits='512' id='c1dc88bc'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <class-decl name='dk_map2' size-in-bits='32' is-struct='yes' visibility='default' id='72d5edd1'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='p_tag' type-id='149c6638' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='16'>
+        <var-decl name='p_flag' type-id='149c6638' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='Byte' type-id='002ac4a6' id='efb9ba06'/>
+    <typedef-decl name='uInt' type-id='f0981eeb' id='09110a74'/>
+    <typedef-decl name='uLong' type-id='7359adad' id='5bbcce85'/>
+    <typedef-decl name='Bytef' type-id='efb9ba06' id='c1606520'/>
+    <qualified-type-def type-id='c1606520' const='yes' id='a6124a50'/>
+    <pointer-type-def type-id='a6124a50' size-in-bits='64' id='e8cb3e0e'/>
+    <qualified-type-def type-id='002ac4a6' const='yes' id='ea86de29'/>
+    <pointer-type-def type-id='ea86de29' size-in-bits='64' id='354f7eb9'/>
+    <pointer-type-def type-id='002ac4a6' size-in-bits='64' id='cf536864'/>
+    <var-decl name='default_vtoc_map' type-id='c1dc88bc' mangled-name='default_vtoc_map' visibility='default' elf-symbol-id='default_vtoc_map'/>
+    <var-decl name='efi_debug' type-id='95e97e5e' mangled-name='efi_debug' visibility='default' elf-symbol-id='efi_debug'/>
+    <function-decl name='efi_alloc_and_read' mangled-name='efi_alloc_and_read' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_alloc_and_read'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='c43b27a6' name='vtoc'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_rescan' mangled-name='efi_rescan' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_rescan'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_use_whole_disk' mangled-name='efi_use_whole_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_use_whole_disk'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_write' mangled-name='efi_write' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_write'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='0d8119a8' name='vtoc'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_type' mangled-name='efi_type' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_type'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_err_check' mangled-name='efi_err_check' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_err_check'>
+      <parameter type-id='0d8119a8' name='vtoc'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='efi_auto_sense' mangled-name='efi_auto_sense' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_auto_sense'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='c43b27a6' name='vtoc'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='sprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='write' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='fsync' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='uuid_generate' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='cf536864'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='uuid_is_null' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='354f7eb9'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='crc32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5bbcce85'/>
+      <parameter type-id='e8cb3e0e'/>
+      <parameter type-id='09110a74'/>
+      <return type-id='5bbcce85'/>
+    </function-decl>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='assert.c' language='LANG_C99'>
+    <var-decl name='aok' type-id='95e97e5e' mangled-name='aok' visibility='default' elf-symbol-id='aok'/>
+    <function-decl name='vfprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='abort' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='atomic.c' language='LANG_C99'>
+    <typedef-decl name='int8_t' type-id='2171a512' id='ee31ee44'/>
+    <typedef-decl name='__int8_t' type-id='28577a57' id='2171a512'/>
+    <qualified-type-def type-id='149c6638' volatile='yes' id='5120c5f7'/>
+    <pointer-type-def type-id='5120c5f7' size-in-bits='64' id='93977ae7'/>
+    <qualified-type-def type-id='8f92235e' volatile='yes' id='430e0681'/>
+    <pointer-type-def type-id='430e0681' size-in-bits='64' id='3a147f31'/>
+    <qualified-type-def type-id='b96825af' volatile='yes' id='84ff7d66'/>
+    <pointer-type-def type-id='84ff7d66' size-in-bits='64' id='aa323ea4'/>
+    <qualified-type-def type-id='ee1f298e' volatile='yes' id='6f7e09cb'/>
+    <pointer-type-def type-id='6f7e09cb' size-in-bits='64' id='64698d33'/>
+    <function-decl name='atomic_inc_8' mangled-name='atomic_inc_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_16' mangled-name='atomic_inc_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_32' mangled-name='atomic_inc_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_ulong' mangled-name='atomic_inc_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_8' mangled-name='atomic_dec_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_16' mangled-name='atomic_dec_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_32' mangled-name='atomic_dec_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_dec_ulong' mangled-name='atomic_dec_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_8' mangled-name='atomic_add_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_16' mangled-name='atomic_add_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_32' mangled-name='atomic_add_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_add_ptr' mangled-name='atomic_add_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_8' mangled-name='atomic_sub_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_16' mangled-name='atomic_sub_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_32' mangled-name='atomic_sub_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_ptr' mangled-name='atomic_sub_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_8' mangled-name='atomic_or_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_16' mangled-name='atomic_or_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_32' mangled-name='atomic_or_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_or_ulong' mangled-name='atomic_or_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_8' mangled-name='atomic_and_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_16' mangled-name='atomic_and_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_32' mangled-name='atomic_and_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_and_ulong' mangled-name='atomic_and_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='atomic_inc_8_nv' mangled-name='atomic_inc_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_inc_16_nv' mangled-name='atomic_inc_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_inc_32_nv' mangled-name='atomic_inc_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_inc_ulong_nv' mangled-name='atomic_inc_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_inc_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_dec_8_nv' mangled-name='atomic_dec_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_dec_16_nv' mangled-name='atomic_dec_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_dec_32_nv' mangled-name='atomic_dec_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_dec_ulong_nv' mangled-name='atomic_dec_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_dec_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_add_8_nv' mangled-name='atomic_add_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_add_16_nv' mangled-name='atomic_add_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_add_32_nv' mangled-name='atomic_add_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_add_long_nv' mangled-name='atomic_add_long_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_long_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='bd54fe1a' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_add_ptr_nv' mangled-name='atomic_add_ptr_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_add_ptr_nv'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_sub_8_nv' mangled-name='atomic_sub_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='ee31ee44' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_sub_16_nv' mangled-name='atomic_sub_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='23bd8cb5' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_sub_32_nv' mangled-name='atomic_sub_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='3ff5601b' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_sub_long_nv' mangled-name='atomic_sub_long_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_long_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='bd54fe1a' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_sub_ptr_nv' mangled-name='atomic_sub_ptr_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_sub_ptr_nv'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='79a0948f' name='bits'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_or_8_nv' mangled-name='atomic_or_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_or_16_nv' mangled-name='atomic_or_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_or_32_nv' mangled-name='atomic_or_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_or_ulong_nv' mangled-name='atomic_or_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_or_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_and_8_nv' mangled-name='atomic_and_8_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_8_nv'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_and_16_nv' mangled-name='atomic_and_16_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_16_nv'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_and_32_nv' mangled-name='atomic_and_32_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_32_nv'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_and_ulong_nv' mangled-name='atomic_and_ulong_nv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_and_ulong_nv'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_cas_8' mangled-name='atomic_cas_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='exp'/>
+      <parameter type-id='b96825af' name='des'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_cas_16' mangled-name='atomic_cas_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='exp'/>
+      <parameter type-id='149c6638' name='des'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_cas_32' mangled-name='atomic_cas_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='exp'/>
+      <parameter type-id='8f92235e' name='des'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_cas_ulong' mangled-name='atomic_cas_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='exp'/>
+      <parameter type-id='ee1f298e' name='des'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_cas_ptr' mangled-name='atomic_cas_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_cas_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='eaa32e2f' name='exp'/>
+      <parameter type-id='eaa32e2f' name='des'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_swap_8' mangled-name='atomic_swap_8' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_8'>
+      <parameter type-id='aa323ea4' name='target'/>
+      <parameter type-id='b96825af' name='bits'/>
+      <return type-id='b96825af'/>
+    </function-decl>
+    <function-decl name='atomic_swap_16' mangled-name='atomic_swap_16' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_16'>
+      <parameter type-id='93977ae7' name='target'/>
+      <parameter type-id='149c6638' name='bits'/>
+      <return type-id='149c6638'/>
+    </function-decl>
+    <function-decl name='atomic_swap_32' mangled-name='atomic_swap_32' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_32'>
+      <parameter type-id='3a147f31' name='target'/>
+      <parameter type-id='8f92235e' name='bits'/>
+      <return type-id='8f92235e'/>
+    </function-decl>
+    <function-decl name='atomic_swap_ulong' mangled-name='atomic_swap_ulong' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_ulong'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='ee1f298e' name='bits'/>
+      <return type-id='ee1f298e'/>
+    </function-decl>
+    <function-decl name='atomic_swap_ptr' mangled-name='atomic_swap_ptr' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_swap_ptr'>
+      <parameter type-id='fe09dd29' name='target'/>
+      <parameter type-id='eaa32e2f' name='bits'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='atomic_set_long_excl' mangled-name='atomic_set_long_excl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_set_long_excl'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='3502e3ff' name='value'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='atomic_clear_long_excl' mangled-name='atomic_clear_long_excl' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='atomic_clear_long_excl'>
+      <parameter type-id='64698d33' name='target'/>
+      <parameter type-id='3502e3ff' name='value'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='membar_enter' mangled-name='membar_enter' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='membar_enter'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='membar_producer' mangled-name='membar_producer' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='membar_producer'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='membar_consumer' mangled-name='membar_consumer' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='membar_consumer'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='list.c' language='LANG_C99'>
+    <typedef-decl name='list_node_t' type-id='b0b5e45e' id='b21843b2'/>
+    <typedef-decl name='list_t' type-id='e824dae9' id='0899125f'/>
+    <class-decl name='list_node' size-in-bits='128' is-struct='yes' visibility='default' id='b0b5e45e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='next' type-id='b03eadb4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='prev' type-id='b03eadb4' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='list' size-in-bits='256' is-struct='yes' visibility='default' id='e824dae9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='list_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='list_offset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='list_head' type-id='b0b5e45e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='b0b5e45e' size-in-bits='64' id='b03eadb4'/>
+    <pointer-type-def type-id='b21843b2' size-in-bits='64' id='ccc38265'/>
+    <pointer-type-def type-id='0899125f' size-in-bits='64' id='352ec160'/>
+    <function-decl name='list_create' mangled-name='list_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_create'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <parameter type-id='b59d7dce' name='offset'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_destroy' mangled-name='list_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_destroy'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_after' mangled-name='list_insert_after' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_after'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <parameter type-id='eaa32e2f' name='nobject'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_before' mangled-name='list_insert_before' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_before'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <parameter type-id='eaa32e2f' name='nobject'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_head' mangled-name='list_insert_head' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_head'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_insert_tail' mangled-name='list_insert_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_insert_tail'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_remove' mangled-name='list_remove' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_remove'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_remove_head' mangled-name='list_remove_head' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_remove_head'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_remove_tail' mangled-name='list_remove_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_remove_tail'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_head' mangled-name='list_head' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_head'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_tail' mangled-name='list_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_tail'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_next' mangled-name='list_next' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_next'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_prev' mangled-name='list_prev' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_prev'>
+      <parameter type-id='352ec160' name='list'/>
+      <parameter type-id='eaa32e2f' name='object'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='list_move_tail' mangled-name='list_move_tail' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_move_tail'>
+      <parameter type-id='352ec160' name='dst'/>
+      <parameter type-id='352ec160' name='src'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_link_replace' mangled-name='list_link_replace' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_link_replace'>
+      <parameter type-id='ccc38265' name='lold'/>
+      <parameter type-id='ccc38265' name='lnew'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_link_init' mangled-name='list_link_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_link_init'>
+      <parameter type-id='ccc38265' name='ln'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='list_link_active' mangled-name='list_link_active' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_link_active'>
+      <parameter type-id='ccc38265' name='ln'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='list_is_empty' mangled-name='list_is_empty' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='list_is_empty'>
+      <parameter type-id='352ec160' name='list'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='mkdirp.c' language='LANG_C99'>
+    <typedef-decl name='mode_t' type-id='e1c52942' id='d50d396c'/>
+    <typedef-decl name='wchar_t' type-id='95e97e5e' id='928221d2'/>
+    <qualified-type-def type-id='928221d2' const='yes' id='effb3702'/>
+    <pointer-type-def type-id='effb3702' size-in-bits='64' id='f077d3f8'/>
+    <qualified-type-def type-id='f077d3f8' restrict='yes' id='598aab80'/>
+    <pointer-type-def type-id='928221d2' size-in-bits='64' id='323d93c1'/>
+    <qualified-type-def type-id='323d93c1' restrict='yes' id='f1358bc3'/>
+    <function-decl name='mkdirp' mangled-name='mkdirp' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='mkdirp'>
+      <parameter type-id='80f4b756' name='d'/>
+      <parameter type-id='d50d396c' name='mode'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='mbstowcs' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f1358bc3'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='wcstombs' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='598aab80'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='mkdir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='e1c52942'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/getexecname.c' language='LANG_C99'>
+    <function-decl name='getexecname' mangled-name='getexecname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getexecname'>
+      <return type-id='80f4b756'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/gethostid.c' language='LANG_C99'>
+    <function-decl name='get_system_hostid' mangled-name='get_system_hostid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='get_system_hostid'>
+      <return type-id='7359adad'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/getmntany.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32880' id='ad756b7f'>
+      <subrange length='4110' type-id='7359adad' id='8aa676f7'/>
+    </array-type-def>
+    <class-decl name='mnttab' size-in-bits='256' is-struct='yes' visibility='default' id='1b055409'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_mountp' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_fstype' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='extmnttab' size-in-bits='320' is-struct='yes' visibility='default' id='0c544dc0'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_special' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_mountp' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_fstype' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_mntopts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='mnt_major' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='mnt_minor' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='mntent' size-in-bits='320' is-struct='yes' visibility='default' id='56fe4a37'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='mnt_fsname' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='mnt_dir' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='mnt_type' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='mnt_opts' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='mnt_freq' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='mnt_passno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <pointer-type-def type-id='0c544dc0' size-in-bits='64' id='394fc496'/>
+    <pointer-type-def type-id='56fe4a37' size-in-bits='64' id='b6b61d2f'/>
+    <qualified-type-def type-id='b6b61d2f' restrict='yes' id='3cad23cd'/>
+    <pointer-type-def type-id='1b055409' size-in-bits='64' id='9d424d31'/>
+    <function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='3cad23cd'/>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='b6b61d2f'/>
+    </function-decl>
+    <function-decl name='feof' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <var-decl name='buf' type-id='ad756b7f' mangled-name='buf' visibility='default' elf-symbol-id='buf'/>
+    <function-decl name='getmntany' mangled-name='getmntany' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getmntany'>
+      <parameter type-id='822cd80b' name='fp'/>
+      <parameter type-id='9d424d31' name='mgetp'/>
+      <parameter type-id='9d424d31' name='mrefp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='_sol_getmntent' mangled-name='_sol_getmntent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='_sol_getmntent'>
+      <parameter type-id='822cd80b' name='fp'/>
+      <parameter type-id='9d424d31' name='mgetp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='getextmntent' mangled-name='getextmntent' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getextmntent'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='394fc496' name='entry'/>
+      <parameter type-id='62f7a03d' name='statbuf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
+    <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+    <function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
+      <return type-id='4da03624'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='page.c' language='LANG_C99'>
+    <var-decl name='pagesize' type-id='b59d7dce' mangled-name='pagesize' visibility='default' elf-symbol-id='pagesize'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='strlcat.c' language='LANG_C99'>
+    <function-decl name='memcpy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='timestamp.c' language='LANG_C99'>
+    <class-decl name='tm' size-in-bits='448' is-struct='yes' visibility='default' id='dddf6ca2'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tm_sec' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='tm_min' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tm_hour' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='tm_mday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='tm_mon' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='tm_year' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='tm_wday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='tm_yday' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='tm_isdst' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='tm_gmtoff' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='tm_zone' type-id='80f4b756' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='time_t' type-id='65eda9c0' id='c9d12d66'/>
+    <typedef-decl name='nl_item' type-id='95e97e5e' id='03b79a94'/>
+    <qualified-type-def type-id='c9d12d66' const='yes' id='588b3216'/>
+    <pointer-type-def type-id='588b3216' size-in-bits='64' id='9f201474'/>
+    <qualified-type-def type-id='dddf6ca2' const='yes' id='e824a34f'/>
+    <pointer-type-def type-id='e824a34f' size-in-bits='64' id='d6ad37ff'/>
+    <qualified-type-def type-id='d6ad37ff' restrict='yes' id='f8c6051d'/>
+    <pointer-type-def type-id='c9d12d66' size-in-bits='64' id='b2eb2c3f'/>
+    <pointer-type-def type-id='dddf6ca2' size-in-bits='64' id='d915a820'/>
+    <function-decl name='print_timestamp' mangled-name='print_timestamp' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='print_timestamp'>
+      <parameter type-id='3502e3ff' name='timestamp_fmt'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nl_langinfo' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='03b79a94'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='time' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b2eb2c3f'/>
+      <return type-id='c9d12d66'/>
+    </function-decl>
+    <function-decl name='strftime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='f8c6051d'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='localtime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9f201474'/>
+      <return type-id='d915a820'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='thread_pool.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='8901473c' size-in-bits='576' id='f5da478b'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='49ef3ffd' size-in-bits='1024' id='a14403f5'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='384' id='36d7f119'>
+      <subrange length='48' type-id='7359adad' id='8f6d2a81'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='bd54fe1a' size-in-bits='512' id='5d4efd44'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='f0981eeb' size-in-bits='64' id='0d532ec1'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='7359adad' size-in-bits='1024' id='d2baa450'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='eaa32e2f' size-in-bits='256' id='209ef23f'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <typedef-decl name='tpool_job_t' type-id='3b8579e5' id='66a0afc9'/>
+    <class-decl name='tpool_job' size-in-bits='192' is-struct='yes' visibility='default' id='3b8579e5'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tpj_next' type-id='f32b30e4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tpj_func' type-id='b7f9d8e6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='tpj_arg' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='tpool_active_t' type-id='c8d086f4' id='6fcda10e'/>
+    <class-decl name='tpool_active' size-in-bits='128' is-struct='yes' visibility='default' id='c8d086f4'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tpa_next' type-id='ad33e5e7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tpa_tid' type-id='4051f5e7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__cpu_mask' type-id='7359adad' id='49ef3ffd'/>
+    <class-decl name='cpu_set_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='8037c762' visibility='default' id='1f20d231'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__bits' type-id='a14403f5' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='cpu_set_t' type-id='1f20d231' id='8037c762'/>
+    <typedef-decl name='pthread_t' type-id='7359adad' id='4051f5e7'/>
+    <union-decl name='pthread_condattr_t' size-in-bits='32' naming-typedef-id='836265dd' visibility='default' id='33dd3aad'>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='8e0573fd' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_condattr_t' type-id='33dd3aad' id='836265dd'/>
+    <union-decl name='pthread_cond_t' size-in-bits='384' naming-typedef-id='62fab762' visibility='default' id='cbb12c12'>
+      <data-member access='public'>
+        <var-decl name='__data' type-id='c987b47c' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='36d7f119' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='1eb56b1e' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_cond_t' type-id='cbb12c12' id='62fab762'/>
+    <typedef-decl name='__jmp_buf' type-id='5d4efd44' id='379a1ab7'/>
+    <class-decl name='__pthread_cond_s' size-in-bits='384' is-struct='yes' visibility='default' id='c987b47c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='' type-id='ac5ab595' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='' type-id='ac5ab596' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='__g_refs' type-id='0d532ec1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='__g_size' type-id='0d532ec1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='__g1_orig_size' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='__wrefs' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='__g_signals' type-id='0d532ec1' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__1' size-in-bits='64' is-anonymous='yes' visibility='default' id='ac5ab595'>
+      <data-member access='public'>
+        <var-decl name='__wseq' type-id='3a47d82b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__wseq32' type-id='e7f43f72' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='__anonymous_struct__' size-in-bits='64' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__low' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='__high' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__2' size-in-bits='64' is-anonymous='yes' visibility='default' id='ac5ab596'>
+      <data-member access='public'>
+        <var-decl name='__g1_start' type-id='3a47d82b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__g1_start32' type-id='e7f43f72' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='__sigset_t' size-in-bits='1024' is-struct='yes' naming-typedef-id='b9c97942' visibility='default' id='2616147f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__val' type-id='d2baa450' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__sigset_t' type-id='2616147f' id='b9c97942'/>
+    <class-decl name='sched_param' size-in-bits='32' is-struct='yes' visibility='default' id='0897719a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='sched_priority' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__cancel_jmp_buf_tag' size-in-bits='576' is-struct='yes' visibility='default' id='8901473c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__cancel_jmp_buf' type-id='379a1ab7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='__mask_was_saved' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__pthread_unwind_buf_t' size-in-bits='832' is-struct='yes' naming-typedef-id='4423cf7f' visibility='default' id='a0abc656'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__cancel_jmp_buf' type-id='f5da478b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='__pad' type-id='209ef23f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__pthread_unwind_buf_t' type-id='a0abc656' id='4423cf7f'/>
+    <pointer-type-def type-id='8901473c' size-in-bits='64' id='eb91b7ea'/>
+    <pointer-type-def type-id='4423cf7f' size-in-bits='64' id='ba7c727c'/>
+    <pointer-type-def type-id='b9c97942' size-in-bits='64' id='bbf06c47'/>
+    <qualified-type-def type-id='bbf06c47' restrict='yes' id='65e6ec45'/>
+    <qualified-type-def type-id='b9c97942' const='yes' id='191f6b72'/>
+    <pointer-type-def type-id='191f6b72' size-in-bits='64' id='e475fb88'/>
+    <qualified-type-def type-id='e475fb88' restrict='yes' id='5a8729d0'/>
+    <qualified-type-def type-id='8037c762' const='yes' id='f50ea9b2'/>
+    <pointer-type-def type-id='f50ea9b2' size-in-bits='64' id='5e14fa48'/>
+    <qualified-type-def type-id='7d8569fd' const='yes' id='e06dee2d'/>
+    <pointer-type-def type-id='e06dee2d' size-in-bits='64' id='540db505'/>
+    <qualified-type-def type-id='540db505' restrict='yes' id='e1815e87'/>
+    <qualified-type-def type-id='836265dd' const='yes' id='7d24c58d'/>
+    <pointer-type-def type-id='7d24c58d' size-in-bits='64' id='a7e325e5'/>
+    <qualified-type-def type-id='a7e325e5' restrict='yes' id='4c428e67'/>
+    <qualified-type-def type-id='0897719a' const='yes' id='c4a7b189'/>
+    <pointer-type-def type-id='c4a7b189' size-in-bits='64' id='36fca399'/>
+    <qualified-type-def type-id='36fca399' restrict='yes' id='37e4897b'/>
+    <qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/>
+    <pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/>
+    <qualified-type-def type-id='e05e8614' restrict='yes' id='0be2e71c'/>
+    <pointer-type-def type-id='8037c762' size-in-bits='64' id='d74a6869'/>
+    <qualified-type-def type-id='7292109c' restrict='yes' id='6942f6a4'/>
+    <qualified-type-def type-id='7347a39e' restrict='yes' id='578ba182'/>
+    <pointer-type-def type-id='62fab762' size-in-bits='64' id='db285b03'/>
+    <qualified-type-def type-id='db285b03' restrict='yes' id='2a468b41'/>
+    <qualified-type-def type-id='18c91f9e' restrict='yes' id='6e745582'/>
+    <pointer-type-def type-id='4051f5e7' size-in-bits='64' id='e01b5462'/>
+    <qualified-type-def type-id='e01b5462' restrict='yes' id='cc338b26'/>
+    <pointer-type-def type-id='0897719a' size-in-bits='64' id='23cbcb08'/>
+    <qualified-type-def type-id='23cbcb08' restrict='yes' id='b09b2050'/>
+    <qualified-type-def type-id='78c01427' restrict='yes' id='d19b2c25'/>
+    <pointer-type-def type-id='6fcda10e' size-in-bits='64' id='ad33e5e7'/>
+    <pointer-type-def type-id='66a0afc9' size-in-bits='64' id='f32b30e4'/>
+    <pointer-type-def type-id='cd5d79f4' size-in-bits='64' id='5ad9edb6'/>
+    <qualified-type-def type-id='eaa32e2f' restrict='yes' id='1b7446cd'/>
+    <qualified-type-def type-id='63e171df' restrict='yes' id='9e7a3a7d'/>
+    <function-decl name='tpool_abandon' mangled-name='tpool_abandon' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_abandon'>
+      <parameter type-id='9cf59a50' name='tpool'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='tpool_suspend' mangled-name='tpool_suspend' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_suspend'>
+      <parameter type-id='9cf59a50' name='tpool'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='tpool_suspended' mangled-name='tpool_suspended' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_suspended'>
+      <parameter type-id='9cf59a50' name='tpool'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='tpool_resume' mangled-name='tpool_resume' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_resume'>
+      <parameter type-id='9cf59a50' name='tpool'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='tpool_member' mangled-name='tpool_member' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_member'>
+      <parameter type-id='9cf59a50' name='tpool'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__sysconf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='bd54fe1a'/>
+    </function-decl>
+    <function-decl name='pthread_sigmask' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='5a8729d0'/>
+      <parameter type-id='65e6ec45'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_create' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='cc338b26'/>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='5ad9edb6'/>
+      <parameter type-id='1b7446cd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_self' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='4051f5e7'/>
+    </function-decl>
+    <function-decl name='pthread_attr_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getdetachstate' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='540db505'/>
+      <parameter type-id='7292109c'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setdetachstate' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getguardsize' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='540db505'/>
+      <parameter type-id='78c01427'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setguardsize' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getschedparam' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='b09b2050'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setschedparam' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='578ba182'/>
+      <parameter type-id='37e4897b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getschedpolicy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='6942f6a4'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setschedpolicy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getinheritsched' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='6942f6a4'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setinheritsched' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getscope' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='6942f6a4'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setscope' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getstack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e1815e87'/>
+      <parameter type-id='9e7a3a7d'/>
+      <parameter type-id='d19b2c25'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setstack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_setaffinity_np' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='7347a39e'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='5e14fa48'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_attr_getaffinity_np' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='540db505'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='d74a6869'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_setcancelstate' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='7292109c'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_setcanceltype' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='7292109c'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_cancel' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4051f5e7'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__pthread_register_cancel' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='ba7c727c'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='__pthread_unregister_cancel' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='ba7c727c'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='__pthread_unwind_next' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='ba7c727c'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='pthread_cond_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='2a468b41'/>
+      <parameter type-id='4c428e67'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_cond_signal' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='db285b03'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_cond_broadcast' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='db285b03'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_cond_wait' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='2a468b41'/>
+      <parameter type-id='6e745582'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_cond_timedwait' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='2a468b41'/>
+      <parameter type-id='6e745582'/>
+      <parameter type-id='0be2e71c'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='cd5d79f4'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='eaa32e2f'/>
+    </function-type>
+    <pointer-type-def type-id='b1bbf10d' size-in-bits='64' id='9cf59a50'/>
+    <typedef-decl name='tpool_t' type-id='88d1b7f9' id='b1bbf10d'/>
+    <class-decl name='tpool' size-in-bits='2496' is-struct='yes' visibility='default' id='88d1b7f9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tp_forw' type-id='9cf59a50' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tp_back' type-id='9cf59a50' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='tp_mutex' type-id='7a6844eb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='tp_busycv' type-id='62fab762' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='tp_workcv' type-id='62fab762' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='tp_waitcv' type-id='62fab762' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1600'>
+        <var-decl name='tp_active' type-id='ad33e5e7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1664'>
+        <var-decl name='tp_head' type-id='f32b30e4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1728'>
+        <var-decl name='tp_tail' type-id='f32b30e4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1792'>
+        <var-decl name='tp_attr' type-id='7d8569fd' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2240'>
+        <var-decl name='tp_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2272'>
+        <var-decl name='tp_linger' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2304'>
+        <var-decl name='tp_njobs' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2336'>
+        <var-decl name='tp_minimum' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2368'>
+        <var-decl name='tp_maximum' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2400'>
+        <var-decl name='tp_current' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2432'>
+        <var-decl name='tp_idle' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <function-type size-in-bits='64' id='c5c76c9c'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='libzfs_core.c' language='LANG_C99'>
+    <type-decl name='char' size-in-bits='8' id='a84c031d'/>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='2048' id='d1617432'>
+      <subrange length='256' type-id='7359adad' id='36e5b9fa'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32768' id='d16c6df4'>
+      <subrange length='4096' type-id='7359adad' id='bc1b5ddc'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='320' id='36c46961'>
+      <subrange length='40' type-id='7359adad' id='8f80b239'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='65536' id='163f6aa5'>
+      <subrange length='8192' type-id='7359adad' id='c88f397d'/>
+    </array-type-def>
+    <type-decl name='int' size-in-bits='32' id='95e97e5e'/>
+    <type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
+    <type-decl name='short int' size-in-bits='16' id='a2185560'/>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='128' id='c1c22e6c'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='2176' id='8c2bcad1'>
+      <subrange length='34' type-id='7359adad' id='6a6a7e00'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='9c313c2d' size-in-bits='256' id='85c64d26'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='96' id='fa8ef949'>
+      <subrange length='12' type-id='7359adad' id='84827bdc'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='128' id='fa9986a5'>
+      <subrange length='16' type-id='7359adad' id='848d0938'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='24' id='d3490169'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='40' id='0f4ddd0b'>
+      <subrange length='5' type-id='7359adad' id='53010e10'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='48' id='0f562bd0'>
+      <subrange length='6' type-id='7359adad' id='52fa524b'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='b96825af' size-in-bits='64' id='13339fda'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
+    <type-decl name='unsigned char' size-in-bits='8' id='002ac4a6'/>
+    <type-decl name='unsigned int' size-in-bits='32' id='f0981eeb'/>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='variadic parameter type' id='2c1145c5'/>
+    <type-decl name='void' id='48b5725f'/>
+    <enum-decl name='lzc_dataset_type' id='bc9887f1'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='LZC_DATSET_TYPE_ZFS' value='2'/>
+      <enumerator name='LZC_DATSET_TYPE_ZVOL' value='3'/>
+    </enum-decl>
+    <enum-decl name='lzc_send_flags' id='bfbd3c8e'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='LZC_SEND_FLAG_EMBED_DATA' value='1'/>
+      <enumerator name='LZC_SEND_FLAG_LARGE_BLOCK' value='2'/>
+      <enumerator name='LZC_SEND_FLAG_COMPRESS' value='4'/>
+      <enumerator name='LZC_SEND_FLAG_RAW' value='8'/>
+      <enumerator name='LZC_SEND_FLAG_SAVED' value='16'/>
+    </enum-decl>
+    <class-decl name='ddt_key' size-in-bits='320' is-struct='yes' visibility='default' id='e0a4a1cb'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ddk_cksum' type-id='39730d0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='ddk_prop' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ddt_key_t' type-id='e0a4a1cb' id='67f6d2cf'/>
+    <enum-decl name='dmu_object_type' id='04b3b0b9'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DMU_OT_NONE' value='0'/>
+      <enumerator name='DMU_OT_OBJECT_DIRECTORY' value='1'/>
+      <enumerator name='DMU_OT_OBJECT_ARRAY' value='2'/>
+      <enumerator name='DMU_OT_PACKED_NVLIST' value='3'/>
+      <enumerator name='DMU_OT_PACKED_NVLIST_SIZE' value='4'/>
+      <enumerator name='DMU_OT_BPOBJ' value='5'/>
+      <enumerator name='DMU_OT_BPOBJ_HDR' value='6'/>
+      <enumerator name='DMU_OT_SPACE_MAP_HEADER' value='7'/>
+      <enumerator name='DMU_OT_SPACE_MAP' value='8'/>
+      <enumerator name='DMU_OT_INTENT_LOG' value='9'/>
+      <enumerator name='DMU_OT_DNODE' value='10'/>
+      <enumerator name='DMU_OT_OBJSET' value='11'/>
+      <enumerator name='DMU_OT_DSL_DIR' value='12'/>
+      <enumerator name='DMU_OT_DSL_DIR_CHILD_MAP' value='13'/>
+      <enumerator name='DMU_OT_DSL_DS_SNAP_MAP' value='14'/>
+      <enumerator name='DMU_OT_DSL_PROPS' value='15'/>
+      <enumerator name='DMU_OT_DSL_DATASET' value='16'/>
+      <enumerator name='DMU_OT_ZNODE' value='17'/>
+      <enumerator name='DMU_OT_OLDACL' value='18'/>
+      <enumerator name='DMU_OT_PLAIN_FILE_CONTENTS' value='19'/>
+      <enumerator name='DMU_OT_DIRECTORY_CONTENTS' value='20'/>
+      <enumerator name='DMU_OT_MASTER_NODE' value='21'/>
+      <enumerator name='DMU_OT_UNLINKED_SET' value='22'/>
+      <enumerator name='DMU_OT_ZVOL' value='23'/>
+      <enumerator name='DMU_OT_ZVOL_PROP' value='24'/>
+      <enumerator name='DMU_OT_PLAIN_OTHER' value='25'/>
+      <enumerator name='DMU_OT_UINT64_OTHER' value='26'/>
+      <enumerator name='DMU_OT_ZAP_OTHER' value='27'/>
+      <enumerator name='DMU_OT_ERROR_LOG' value='28'/>
+      <enumerator name='DMU_OT_SPA_HISTORY' value='29'/>
+      <enumerator name='DMU_OT_SPA_HISTORY_OFFSETS' value='30'/>
+      <enumerator name='DMU_OT_POOL_PROPS' value='31'/>
+      <enumerator name='DMU_OT_DSL_PERMS' value='32'/>
+      <enumerator name='DMU_OT_ACL' value='33'/>
+      <enumerator name='DMU_OT_SYSACL' value='34'/>
+      <enumerator name='DMU_OT_FUID' value='35'/>
+      <enumerator name='DMU_OT_FUID_SIZE' value='36'/>
+      <enumerator name='DMU_OT_NEXT_CLONES' value='37'/>
+      <enumerator name='DMU_OT_SCAN_QUEUE' value='38'/>
+      <enumerator name='DMU_OT_USERGROUP_USED' value='39'/>
+      <enumerator name='DMU_OT_USERGROUP_QUOTA' value='40'/>
+      <enumerator name='DMU_OT_USERREFS' value='41'/>
+      <enumerator name='DMU_OT_DDT_ZAP' value='42'/>
+      <enumerator name='DMU_OT_DDT_STATS' value='43'/>
+      <enumerator name='DMU_OT_SA' value='44'/>
+      <enumerator name='DMU_OT_SA_MASTER_NODE' value='45'/>
+      <enumerator name='DMU_OT_SA_ATTR_REGISTRATION' value='46'/>
+      <enumerator name='DMU_OT_SA_ATTR_LAYOUTS' value='47'/>
+      <enumerator name='DMU_OT_SCAN_XLATE' value='48'/>
+      <enumerator name='DMU_OT_DEDUP' value='49'/>
+      <enumerator name='DMU_OT_DEADLIST' value='50'/>
+      <enumerator name='DMU_OT_DEADLIST_HDR' value='51'/>
+      <enumerator name='DMU_OT_DSL_CLONES' value='52'/>
+      <enumerator name='DMU_OT_BPOBJ_SUBOBJ' value='53'/>
+      <enumerator name='DMU_OT_NUMTYPES' value='54'/>
+      <enumerator name='DMU_OTN_UINT8_DATA' value='128'/>
+      <enumerator name='DMU_OTN_UINT8_METADATA' value='192'/>
+      <enumerator name='DMU_OTN_UINT16_DATA' value='129'/>
+      <enumerator name='DMU_OTN_UINT16_METADATA' value='193'/>
+      <enumerator name='DMU_OTN_UINT32_DATA' value='130'/>
+      <enumerator name='DMU_OTN_UINT32_METADATA' value='194'/>
+      <enumerator name='DMU_OTN_UINT64_DATA' value='131'/>
+      <enumerator name='DMU_OTN_UINT64_METADATA' value='195'/>
+      <enumerator name='DMU_OTN_ZAP_DATA' value='132'/>
+      <enumerator name='DMU_OTN_ZAP_METADATA' value='196'/>
+      <enumerator name='DMU_OTN_UINT8_ENC_DATA' value='160'/>
+      <enumerator name='DMU_OTN_UINT8_ENC_METADATA' value='224'/>
+      <enumerator name='DMU_OTN_UINT16_ENC_DATA' value='161'/>
+      <enumerator name='DMU_OTN_UINT16_ENC_METADATA' value='225'/>
+      <enumerator name='DMU_OTN_UINT32_ENC_DATA' value='162'/>
+      <enumerator name='DMU_OTN_UINT32_ENC_METADATA' value='226'/>
+      <enumerator name='DMU_OTN_UINT64_ENC_DATA' value='163'/>
+      <enumerator name='DMU_OTN_UINT64_ENC_METADATA' value='227'/>
+      <enumerator name='DMU_OTN_ZAP_ENC_DATA' value='164'/>
+      <enumerator name='DMU_OTN_ZAP_ENC_METADATA' value='228'/>
+    </enum-decl>
+    <typedef-decl name='dmu_object_type_t' type-id='04b3b0b9' id='5c9d8906'/>
+    <class-decl name='dmu_objset_stats' size-in-bits='2304' is-struct='yes' visibility='default' id='098f0221'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='dds_num_clones' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='dds_creation_txg' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='dds_guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='dds_type' type-id='230f1e16' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='dds_is_snapshot' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='232'>
+        <var-decl name='dds_inconsistent' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='240'>
+        <var-decl name='dds_redacted' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='248'>
+        <var-decl name='dds_origin' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='dmu_objset_stats_t' type-id='098f0221' id='b2c14f17'/>
+    <enum-decl name='dmu_objset_type' id='6b1b19f9'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DMU_OST_NONE' value='0'/>
+      <enumerator name='DMU_OST_META' value='1'/>
+      <enumerator name='DMU_OST_ZFS' value='2'/>
+      <enumerator name='DMU_OST_ZVOL' value='3'/>
+      <enumerator name='DMU_OST_OTHER' value='4'/>
+      <enumerator name='DMU_OST_ANY' value='5'/>
+      <enumerator name='DMU_OST_NUMTYPES' value='6'/>
+    </enum-decl>
+    <typedef-decl name='dmu_objset_type_t' type-id='6b1b19f9' id='230f1e16'/>
+    <enum-decl name='pool_initialize_func' id='5c246ad4'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_INITIALIZE_START' value='0'/>
+      <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
+      <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
+      <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
+      <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
+    </enum-decl>
+    <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
+    <enum-decl name='pool_trim_func' id='54ed608a'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='POOL_TRIM_START' value='0'/>
+      <enumerator name='POOL_TRIM_CANCEL' value='1'/>
+      <enumerator name='POOL_TRIM_SUSPEND' value='2'/>
+      <enumerator name='POOL_TRIM_FUNCS' value='3'/>
+    </enum-decl>
+    <typedef-decl name='pool_trim_func_t' type-id='54ed608a' id='b1146b8d'/>
+    <enum-decl name='zpool_wait_activity_t' naming-typedef-id='73446457' id='849338e3'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZPOOL_WAIT_CKPT_DISCARD' value='0'/>
+      <enumerator name='ZPOOL_WAIT_FREE' value='1'/>
+      <enumerator name='ZPOOL_WAIT_INITIALIZE' value='2'/>
+      <enumerator name='ZPOOL_WAIT_REPLACE' value='3'/>
+      <enumerator name='ZPOOL_WAIT_REMOVE' value='4'/>
+      <enumerator name='ZPOOL_WAIT_RESILVER' value='5'/>
+      <enumerator name='ZPOOL_WAIT_SCRUB' value='6'/>
+      <enumerator name='ZPOOL_WAIT_TRIM' value='7'/>
+      <enumerator name='ZPOOL_WAIT_NUM_ACTIVITIES' value='8'/>
+    </enum-decl>
+    <typedef-decl name='zpool_wait_activity_t' type-id='849338e3' id='73446457'/>
+    <enum-decl name='zfs_wait_activity_t' naming-typedef-id='3024501a' id='527d5dc6'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_WAIT_DELETEQ' value='0'/>
+      <enumerator name='ZFS_WAIT_NUM_ACTIVITIES' value='1'/>
+    </enum-decl>
+    <typedef-decl name='zfs_wait_activity_t' type-id='527d5dc6' id='3024501a'/>
+    <enum-decl name='data_type_t' naming-typedef-id='8d0687d2' id='aeeae136'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DATA_TYPE_DONTCARE' value='-1'/>
+      <enumerator name='DATA_TYPE_UNKNOWN' value='0'/>
+      <enumerator name='DATA_TYPE_BOOLEAN' value='1'/>
+      <enumerator name='DATA_TYPE_BYTE' value='2'/>
+      <enumerator name='DATA_TYPE_INT16' value='3'/>
+      <enumerator name='DATA_TYPE_UINT16' value='4'/>
+      <enumerator name='DATA_TYPE_INT32' value='5'/>
+      <enumerator name='DATA_TYPE_UINT32' value='6'/>
+      <enumerator name='DATA_TYPE_INT64' value='7'/>
+      <enumerator name='DATA_TYPE_UINT64' value='8'/>
+      <enumerator name='DATA_TYPE_STRING' value='9'/>
+      <enumerator name='DATA_TYPE_BYTE_ARRAY' value='10'/>
+      <enumerator name='DATA_TYPE_INT16_ARRAY' value='11'/>
+      <enumerator name='DATA_TYPE_UINT16_ARRAY' value='12'/>
+      <enumerator name='DATA_TYPE_INT32_ARRAY' value='13'/>
+      <enumerator name='DATA_TYPE_UINT32_ARRAY' value='14'/>
+      <enumerator name='DATA_TYPE_INT64_ARRAY' value='15'/>
+      <enumerator name='DATA_TYPE_UINT64_ARRAY' value='16'/>
+      <enumerator name='DATA_TYPE_STRING_ARRAY' value='17'/>
+      <enumerator name='DATA_TYPE_HRTIME' value='18'/>
+      <enumerator name='DATA_TYPE_NVLIST' value='19'/>
+      <enumerator name='DATA_TYPE_NVLIST_ARRAY' value='20'/>
+      <enumerator name='DATA_TYPE_BOOLEAN_VALUE' value='21'/>
+      <enumerator name='DATA_TYPE_INT8' value='22'/>
+      <enumerator name='DATA_TYPE_UINT8' value='23'/>
+      <enumerator name='DATA_TYPE_BOOLEAN_ARRAY' value='24'/>
+      <enumerator name='DATA_TYPE_INT8_ARRAY' value='25'/>
+      <enumerator name='DATA_TYPE_UINT8_ARRAY' value='26'/>
+      <enumerator name='DATA_TYPE_DOUBLE' value='27'/>
+    </enum-decl>
+    <typedef-decl name='data_type_t' type-id='aeeae136' id='8d0687d2'/>
+    <class-decl name='nvpair' size-in-bits='128' is-struct='yes' visibility='default' id='1c34e459'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvp_size' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvp_name_sz' type-id='23bd8cb5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='48'>
+        <var-decl name='nvp_reserve' type-id='23bd8cb5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvp_value_elem' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='nvp_type' type-id='8d0687d2' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvpair_t' type-id='1c34e459' id='57928edf'/>
+    <class-decl name='nvlist' size-in-bits='192' is-struct='yes' visibility='default' id='ac266fd9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvl_version' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvl_nvflag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvl_priv' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='nvl_flag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='nvl_pad' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvlist_t' type-id='ac266fd9' id='8e8d4be3'/>
+    <class-decl name='zio_cksum' size-in-bits='256' is-struct='yes' visibility='default' id='1d53e28b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zc_word' type-id='85c64d26' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zio_cksum_t' type-id='1d53e28b' id='39730d0b'/>
+    <class-decl name='drr_begin' size-in-bits='2432' is-struct='yes' visibility='default' id='09fcdc01'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_magic' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_versioninfo' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_creation_time' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_type' type-id='230f1e16' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='drr_flags' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_fromguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_toname' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='dmu_replay_record' size-in-bits='2496' is-struct='yes' visibility='default' id='781a52d7'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_type' type-id='08f5ca17' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='drr_payloadlen' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_u' type-id='ac5ab597' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <enum-decl name='__anonymous_enum__' is-anonymous='yes' id='08f5ca17'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='DRR_BEGIN' value='0'/>
+      <enumerator name='DRR_OBJECT' value='1'/>
+      <enumerator name='DRR_FREEOBJECTS' value='2'/>
+      <enumerator name='DRR_WRITE' value='3'/>
+      <enumerator name='DRR_FREE' value='4'/>
+      <enumerator name='DRR_END' value='5'/>
+      <enumerator name='DRR_WRITE_BYREF' value='6'/>
+      <enumerator name='DRR_SPILL' value='7'/>
+      <enumerator name='DRR_WRITE_EMBEDDED' value='8'/>
+      <enumerator name='DRR_OBJECT_RANGE' value='9'/>
+      <enumerator name='DRR_REDACT' value='10'/>
+      <enumerator name='DRR_NUMTYPES' value='11'/>
+    </enum-decl>
+    <union-decl name='__anonymous_union__' size-in-bits='2432' is-anonymous='yes' visibility='default' id='ac5ab597'>
+      <data-member access='public'>
+        <var-decl name='drr_begin' type-id='09fcdc01' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_end' type-id='6ee25631' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_object' type-id='f9ad530b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_freeobjects' type-id='a27d958e' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_write' type-id='4cc69e4b' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_free' type-id='c836cfd2' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_write_byref' type-id='e511cdce' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_spill' type-id='1e69a80a' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_write_embedded' type-id='98b1345e' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_object_range' type-id='aba1f9e1' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_redact' type-id='50389039' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='drr_checksum' type-id='a5fe3647' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='drr_end' size-in-bits='320' is-struct='yes' visibility='default' id='6ee25631'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_checksum' type-id='39730d0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_object' size-in-bits='448' is-struct='yes' visibility='default' id='f9ad530b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_type' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='drr_bonustype' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_blksz' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='drr_bonuslen' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_checksumtype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='200'>
+        <var-decl name='drr_compress' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='208'>
+        <var-decl name='drr_dn_slots' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='216'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='drr_raw_bonuslen' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_indblkshift' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='328'>
+        <var-decl name='drr_nlevels' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='336'>
+        <var-decl name='drr_nblkptr' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='344'>
+        <var-decl name='drr_pad' type-id='0f4ddd0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_maxblkid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_freeobjects' size-in-bits='192' is-struct='yes' visibility='default' id='a27d958e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_firstobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_numobjs' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_write' size-in-bits='1088' is-struct='yes' visibility='default' id='4cc69e4b'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_type' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='drr_pad' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_logical_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_checksumtype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='328'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='336'>
+        <var-decl name='drr_compressiontype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='344'>
+        <var-decl name='drr_pad2' type-id='0f4ddd0b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_key' type-id='67f6d2cf' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='drr_compressed_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='drr_salt' type-id='13339fda' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='drr_iv' type-id='fa8ef949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='drr_mac' type-id='fa9986a5' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_free' size-in-bits='256' is-struct='yes' visibility='default' id='c836cfd2'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_write_byref' size-in-bits='832' is-struct='yes' visibility='default' id='e511cdce'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_refguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_refobject' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_refoffset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='drr_checksumtype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='456'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='464'>
+        <var-decl name='drr_pad2' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='drr_key' type-id='67f6d2cf' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_spill' size-in-bits='640' is-struct='yes' visibility='default' id='1e69a80a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='200'>
+        <var-decl name='drr_compressiontype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='208'>
+        <var-decl name='drr_pad' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_compressed_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_salt' type-id='13339fda' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='drr_iv' type-id='fa8ef949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='drr_mac' type-id='fa9986a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='608'>
+        <var-decl name='drr_type' type-id='5c9d8906' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_write_embedded' size-in-bits='384' is-struct='yes' visibility='default' id='98b1345e'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_compression' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='264'>
+        <var-decl name='drr_etype' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='272'>
+        <var-decl name='drr_pad' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='drr_lsize' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='drr_psize' type-id='8f92235e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_object_range' size-in-bits='512' is-struct='yes' visibility='default' id='aba1f9e1'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_firstobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_numslots' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_salt' type-id='13339fda' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='drr_iv' type-id='fa8ef949' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='drr_mac' type-id='fa9986a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='drr_flags' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='488'>
+        <var-decl name='drr_pad' type-id='d3490169' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_redact' size-in-bits='256' is-struct='yes' visibility='default' id='50389039'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='drr_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='drr_length' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='drr_toguid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='drr_checksum' size-in-bits='2432' is-struct='yes' visibility='default' id='a5fe3647'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='drr_pad' type-id='8c2bcad1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2176'>
+        <var-decl name='drr_checksum' type-id='39730d0b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='dmu_replay_record_t' type-id='781a52d7' id='8b8fc893'/>
+    <class-decl name='zinject_record' size-in-bits='2816' is-struct='yes' visibility='default' id='3216f820'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zi_objset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zi_object' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zi_start' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='zi_end' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='zi_guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='zi_level' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='zi_error' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='zi_type' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='zi_freq' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='zi_failfast' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='zi_func' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2560'>
+        <var-decl name='zi_iotype' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2592'>
+        <var-decl name='zi_duration' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2624'>
+        <var-decl name='zi_timer' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2688'>
+        <var-decl name='zi_nlanes' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2752'>
+        <var-decl name='zi_cmd' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2784'>
+        <var-decl name='zi_dvas' type-id='8f92235e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zinject_record_t' type-id='3216f820' id='a4301ca6'/>
+    <class-decl name='zfs_share' size-in-bits='256' is-struct='yes' visibility='default' id='feb6f2da'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='z_exportdata' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='z_sharedata' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='z_sharetype' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='z_sharemax' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_share_t' type-id='feb6f2da' id='ee5cec36'/>
+    <class-decl name='zfs_cmd' size-in-bits='109952' is-struct='yes' visibility='default' id='3522cd69'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zc_name' type-id='d16c6df4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32768'>
+        <var-decl name='zc_nvlist_src' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32832'>
+        <var-decl name='zc_nvlist_src_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32896'>
+        <var-decl name='zc_nvlist_dst' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32960'>
+        <var-decl name='zc_nvlist_dst_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33024'>
+        <var-decl name='zc_nvlist_dst_filled' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33056'>
+        <var-decl name='zc_pad2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33088'>
+        <var-decl name='zc_history' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='33152'>
+        <var-decl name='zc_value' type-id='163f6aa5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='98688'>
+        <var-decl name='zc_string' type-id='d1617432' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100736'>
+        <var-decl name='zc_guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100800'>
+        <var-decl name='zc_nvlist_conf' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100864'>
+        <var-decl name='zc_nvlist_conf_size' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100928'>
+        <var-decl name='zc_cookie' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='100992'>
+        <var-decl name='zc_objset_type' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101056'>
+        <var-decl name='zc_perm_action' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101120'>
+        <var-decl name='zc_history_len' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101184'>
+        <var-decl name='zc_history_offset' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101248'>
+        <var-decl name='zc_obj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101312'>
+        <var-decl name='zc_iflags' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101376'>
+        <var-decl name='zc_share' type-id='ee5cec36' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='101632'>
+        <var-decl name='zc_objset_stats' type-id='b2c14f17' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='103936'>
+        <var-decl name='zc_begin_record' type-id='09fcdc01' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='106368'>
+        <var-decl name='zc_inject_record' type-id='a4301ca6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109184'>
+        <var-decl name='zc_defer_destroy' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109216'>
+        <var-decl name='zc_flags' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109248'>
+        <var-decl name='zc_action_handle' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109312'>
+        <var-decl name='zc_cleanup_fd' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109344'>
+        <var-decl name='zc_simple' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109352'>
+        <var-decl name='zc_pad' type-id='d3490169' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109376'>
+        <var-decl name='zc_sendobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109440'>
+        <var-decl name='zc_fromobj' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109504'>
+        <var-decl name='zc_createtxg' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109568'>
+        <var-decl name='zc_stat' type-id='0371a9c7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='109888'>
+        <var-decl name='zc_zoneid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='zfs_stat' size-in-bits='320' is-struct='yes' visibility='default' id='6417f0b9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='zs_gen' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='zs_mode' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='zs_links' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='zs_ctime' type-id='c1c22e6c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='zfs_stat_t' type-id='6417f0b9' id='0371a9c7'/>
+    <enum-decl name='boolean_t' naming-typedef-id='c19b74c3' id='f58c8277'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='B_FALSE' value='0'/>
+      <enumerator name='B_TRUE' value='1'/>
+    </enum-decl>
+    <typedef-decl name='boolean_t' type-id='f58c8277' id='c19b74c3'/>
+    <typedef-decl name='uchar_t' type-id='002ac4a6' id='d8bf0010'/>
+    <typedef-decl name='uint_t' type-id='f0981eeb' id='3502e3ff'/>
+    <union-decl name='pthread_mutex_t' size-in-bits='320' naming-typedef-id='7a6844eb' visibility='default' id='70681f9b'>
+      <data-member access='public'>
+        <var-decl name='__data' type-id='4c734837' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='36c46961' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_mutex_t' type-id='70681f9b' id='7a6844eb'/>
+    <typedef-decl name='int16_t' type-id='03896e23' id='23bd8cb5'/>
+    <typedef-decl name='int32_t' type-id='33f57a65' id='3ff5601b'/>
+    <typedef-decl name='uint8_t' type-id='c51d6389' id='b96825af'/>
+    <typedef-decl name='uint32_t' type-id='62f1140c' id='8f92235e'/>
+    <typedef-decl name='uint64_t' type-id='8910171f' id='9c313c2d'/>
+    <class-decl name='__pthread_mutex_s' size-in-bits='320' is-struct='yes' visibility='default' id='4c734837'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__lock' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='__count' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='__owner' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='__nusers' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='__kind' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='__spins' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='176'>
+        <var-decl name='__elision' type-id='a2185560' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='__list' type-id='518fb49c' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='__pthread_internal_list' size-in-bits='128' is-struct='yes' visibility='default' id='0e01899c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='__prev' type-id='4d98cd5a' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='__next' type-id='4d98cd5a' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__pthread_list_t' type-id='0e01899c' id='518fb49c'/>
+    <typedef-decl name='__uint8_t' type-id='002ac4a6' id='c51d6389'/>
+    <typedef-decl name='__int16_t' type-id='a2185560' id='03896e23'/>
+    <typedef-decl name='__int32_t' type-id='95e97e5e' id='33f57a65'/>
+    <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
+    <typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
+    <typedef-decl name='__ssize_t' type-id='bd54fe1a' id='41060289'/>
+    <typedef-decl name='ssize_t' type-id='41060289' id='79a0948f'/>
+    <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
+    <pointer-type-def type-id='0e01899c' size-in-bits='64' id='4d98cd5a'/>
+    <pointer-type-def type-id='c19b74c3' size-in-bits='64' id='37e3bd22'/>
+    <pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
+    <qualified-type-def type-id='a84c031d' const='yes' id='9b45d938'/>
+    <pointer-type-def type-id='9b45d938' size-in-bits='64' id='80f4b756'/>
+    <qualified-type-def type-id='8b8fc893' const='yes' id='9623bc03'/>
+    <pointer-type-def type-id='9623bc03' size-in-bits='64' id='8341348b'/>
+    <qualified-type-def type-id='8e8d4be3' const='yes' id='693c3853'/>
+    <pointer-type-def type-id='693c3853' size-in-bits='64' id='22cce67b'/>
+    <pointer-type-def type-id='95e97e5e' size-in-bits='64' id='7292109c'/>
+    <pointer-type-def type-id='8e8d4be3' size-in-bits='64' id='5ce45b60'/>
+    <pointer-type-def type-id='5ce45b60' size-in-bits='64' id='857bb57e'/>
+    <pointer-type-def type-id='57928edf' size-in-bits='64' id='3fa542f0'/>
+    <pointer-type-def type-id='7a6844eb' size-in-bits='64' id='18c91f9e'/>
+    <pointer-type-def type-id='b59d7dce' size-in-bits='64' id='78c01427'/>
+    <pointer-type-def type-id='d8bf0010' size-in-bits='64' id='45b65157'/>
+    <pointer-type-def type-id='9c313c2d' size-in-bits='64' id='5d6479ae'/>
+    <pointer-type-def type-id='b96825af' size-in-bits='64' id='ae3e8ca6'/>
+    <pointer-type-def type-id='48b5725f' size-in-bits='64' id='eaa32e2f'/>
+    <pointer-type-def type-id='3522cd69' size-in-bits='64' id='b65f7fd1'/>
+    <function-decl name='zfs_ioctl_fd' mangled-name='zfs_ioctl_fd' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_ioctl_fd'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='7359adad'/>
+      <parameter type-id='b65f7fd1'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_unpack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_next_nvpair' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='3fa542f0'/>
+    </function-decl>
+    <function-decl name='nvpair_name' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fnvlist_alloc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_pack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='78c01427'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='fnvlist_pack_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_unpack' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_dup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_boolean_value' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_int32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='3ff5601b'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_byte_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='45b65157'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint8_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_boolean_value' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='9c313c2d'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='libspl_assertf' mangled-name='libspl_assertf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libspl_assertf'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strlcpy' mangled-name='strlcpy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='strlcpy'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='libzfs_core_init' mangled-name='libzfs_core_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_core_init'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='libzfs_core_fini' mangled-name='libzfs_core_fini' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_core_fini'>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='lzc_create' mangled-name='lzc_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_create'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='bc9887f1' name='type'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='ae3e8ca6' name='wkeydata'/>
+      <parameter type-id='3502e3ff' name='wkeylen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_clone' mangled-name='lzc_clone' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_clone'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_promote' mangled-name='lzc_promote' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_promote'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='26a90f95' name='snapnamebuf'/>
+      <parameter type-id='95e97e5e' name='snapnamelen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_rename' mangled-name='lzc_rename' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_rename'>
+      <parameter type-id='80f4b756' name='source'/>
+      <parameter type-id='80f4b756' name='target'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_destroy' mangled-name='lzc_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_destroy'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_snapshot' mangled-name='lzc_snapshot' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_snapshot'>
+      <parameter type-id='5ce45b60' name='snaps'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_destroy_snaps' mangled-name='lzc_destroy_snaps' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_destroy_snaps'>
+      <parameter type-id='5ce45b60' name='snaps'/>
+      <parameter type-id='c19b74c3' name='defer'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_snaprange_space' mangled-name='lzc_snaprange_space' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_snaprange_space'>
+      <parameter type-id='80f4b756' name='firstsnap'/>
+      <parameter type-id='80f4b756' name='lastsnap'/>
+      <parameter type-id='5d6479ae' name='usedp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_exists' mangled-name='lzc_exists' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_exists'>
+      <parameter type-id='80f4b756' name='dataset'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='lzc_sync' mangled-name='lzc_sync' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_sync'>
+      <parameter type-id='80f4b756' name='pool_name'/>
+      <parameter type-id='5ce45b60' name='innvl'/>
+      <parameter type-id='857bb57e' name='outnvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_hold' mangled-name='lzc_hold' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_hold'>
+      <parameter type-id='5ce45b60' name='holds'/>
+      <parameter type-id='95e97e5e' name='cleanup_fd'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_release' mangled-name='lzc_release' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_release'>
+      <parameter type-id='5ce45b60' name='holds'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_holds' mangled-name='lzc_get_holds' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_get_holds'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='857bb57e' name='holdsp'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send' mangled-name='lzc_send' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_send'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='bfbd3c8e' name='flags'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_redacted' mangled-name='lzc_send_redacted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_send_redacted'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='bfbd3c8e' name='flags'/>
+      <parameter type-id='80f4b756' name='redactbook'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_resume' mangled-name='lzc_send_resume' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_send_resume'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='bfbd3c8e' name='flags'/>
+      <parameter type-id='9c313c2d' name='resumeobj'/>
+      <parameter type-id='9c313c2d' name='resumeoff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_resume_redacted' mangled-name='lzc_send_resume_redacted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_send_resume_redacted'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='bfbd3c8e' name='flags'/>
+      <parameter type-id='9c313c2d' name='resumeobj'/>
+      <parameter type-id='9c313c2d' name='resumeoff'/>
+      <parameter type-id='80f4b756' name='redactbook'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_space_resume_redacted' mangled-name='lzc_send_space_resume_redacted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_send_space_resume_redacted'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='bfbd3c8e' name='flags'/>
+      <parameter type-id='9c313c2d' name='resumeobj'/>
+      <parameter type-id='9c313c2d' name='resumeoff'/>
+      <parameter type-id='9c313c2d' name='resume_bytes'/>
+      <parameter type-id='80f4b756' name='redactbook'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='5d6479ae' name='spacep'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_send_space' mangled-name='lzc_send_space' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_send_space'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='80f4b756' name='from'/>
+      <parameter type-id='bfbd3c8e' name='flags'/>
+      <parameter type-id='5d6479ae' name='spacep'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_receive' mangled-name='lzc_receive' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='c19b74c3' name='raw'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_receive_resumable' mangled-name='lzc_receive_resumable' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive_resumable'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='c19b74c3' name='raw'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_receive_with_header' mangled-name='lzc_receive_with_header' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive_with_header'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='c19b74c3' name='resumable'/>
+      <parameter type-id='c19b74c3' name='raw'/>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='8341348b' name='begin_record'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_receive_one' mangled-name='lzc_receive_one' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive_one'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='c19b74c3' name='resumable'/>
+      <parameter type-id='c19b74c3' name='raw'/>
+      <parameter type-id='95e97e5e' name='input_fd'/>
+      <parameter type-id='8341348b' name='begin_record'/>
+      <parameter type-id='95e97e5e' name='cleanup_fd'/>
+      <parameter type-id='5d6479ae' name='read_bytes'/>
+      <parameter type-id='5d6479ae' name='errflags'/>
+      <parameter type-id='5d6479ae' name='action_handle'/>
+      <parameter type-id='857bb57e' name='errors'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_receive_with_cmdprops' mangled-name='lzc_receive_with_cmdprops' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_receive_with_cmdprops'>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='5ce45b60' name='cmdprops'/>
+      <parameter type-id='ae3e8ca6' name='wkeydata'/>
+      <parameter type-id='3502e3ff' name='wkeylen'/>
+      <parameter type-id='80f4b756' name='origin'/>
+      <parameter type-id='c19b74c3' name='force'/>
+      <parameter type-id='c19b74c3' name='resumable'/>
+      <parameter type-id='c19b74c3' name='raw'/>
+      <parameter type-id='95e97e5e' name='input_fd'/>
+      <parameter type-id='8341348b' name='begin_record'/>
+      <parameter type-id='95e97e5e' name='cleanup_fd'/>
+      <parameter type-id='5d6479ae' name='read_bytes'/>
+      <parameter type-id='5d6479ae' name='errflags'/>
+      <parameter type-id='5d6479ae' name='action_handle'/>
+      <parameter type-id='857bb57e' name='errors'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_rollback' mangled-name='lzc_rollback' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_rollback'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='26a90f95' name='snapnamebuf'/>
+      <parameter type-id='95e97e5e' name='snapnamelen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_rollback_to' mangled-name='lzc_rollback_to' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_rollback_to'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='80f4b756' name='snapname'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_bookmark' mangled-name='lzc_bookmark' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_bookmark'>
+      <parameter type-id='5ce45b60' name='bookmarks'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_bookmarks' mangled-name='lzc_get_bookmarks' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_get_bookmarks'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='857bb57e' name='bmarks'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_bookmark_props' mangled-name='lzc_get_bookmark_props' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_get_bookmark_props'>
+      <parameter type-id='80f4b756' name='bookmark'/>
+      <parameter type-id='857bb57e' name='props'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_destroy_bookmarks' mangled-name='lzc_destroy_bookmarks' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_destroy_bookmarks'>
+      <parameter type-id='5ce45b60' name='bmarks'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_channel_program' mangled-name='lzc_channel_program' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_channel_program'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='80f4b756' name='program'/>
+      <parameter type-id='9c313c2d' name='instrlimit'/>
+      <parameter type-id='9c313c2d' name='memlimit'/>
+      <parameter type-id='5ce45b60' name='argnvl'/>
+      <parameter type-id='857bb57e' name='outnvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_pool_checkpoint' mangled-name='lzc_pool_checkpoint' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_pool_checkpoint'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_pool_checkpoint_discard' mangled-name='lzc_pool_checkpoint_discard' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_pool_checkpoint_discard'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_channel_program_nosync' mangled-name='lzc_channel_program_nosync' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_channel_program_nosync'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='80f4b756' name='program'/>
+      <parameter type-id='9c313c2d' name='timeout'/>
+      <parameter type-id='9c313c2d' name='memlimit'/>
+      <parameter type-id='5ce45b60' name='argnvl'/>
+      <parameter type-id='857bb57e' name='outnvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_load_key' mangled-name='lzc_load_key' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_load_key'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='c19b74c3' name='noop'/>
+      <parameter type-id='ae3e8ca6' name='wkeydata'/>
+      <parameter type-id='3502e3ff' name='wkeylen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_unload_key' mangled-name='lzc_unload_key' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_unload_key'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_change_key' mangled-name='lzc_change_key' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_change_key'>
+      <parameter type-id='80f4b756' name='fsname'/>
+      <parameter type-id='9c313c2d' name='crypt_cmd'/>
+      <parameter type-id='5ce45b60' name='props'/>
+      <parameter type-id='ae3e8ca6' name='wkeydata'/>
+      <parameter type-id='3502e3ff' name='wkeylen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_reopen' mangled-name='lzc_reopen' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_reopen'>
+      <parameter type-id='80f4b756' name='pool_name'/>
+      <parameter type-id='c19b74c3' name='scrub_restart'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_initialize' mangled-name='lzc_initialize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_initialize'>
+      <parameter type-id='80f4b756' name='poolname'/>
+      <parameter type-id='7063e1ab' name='cmd_type'/>
+      <parameter type-id='5ce45b60' name='vdevs'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_trim' mangled-name='lzc_trim' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_trim'>
+      <parameter type-id='80f4b756' name='poolname'/>
+      <parameter type-id='b1146b8d' name='cmd_type'/>
+      <parameter type-id='9c313c2d' name='rate'/>
+      <parameter type-id='c19b74c3' name='secure'/>
+      <parameter type-id='5ce45b60' name='vdevs'/>
+      <parameter type-id='857bb57e' name='errlist'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_redact' mangled-name='lzc_redact' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_redact'>
+      <parameter type-id='80f4b756' name='snapshot'/>
+      <parameter type-id='80f4b756' name='bookname'/>
+      <parameter type-id='5ce45b60' name='snapnv'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_wait' mangled-name='lzc_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_wait'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='73446457' name='activity'/>
+      <parameter type-id='37e3bd22' name='waited'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_wait_tag' mangled-name='lzc_wait_tag' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_wait_tag'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='73446457' name='activity'/>
+      <parameter type-id='9c313c2d' name='tag'/>
+      <parameter type-id='37e3bd22' name='waited'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_wait_fs' mangled-name='lzc_wait_fs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_wait_fs'>
+      <parameter type-id='80f4b756' name='fs'/>
+      <parameter type-id='3024501a' name='activity'/>
+      <parameter type-id='37e3bd22' name='waited'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_set_bootenv' mangled-name='lzc_set_bootenv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_set_bootenv'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='22cce67b' name='env'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzc_get_bootenv' mangled-name='lzc_get_bootenv' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_get_bootenv'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='857bb57e' name='outnvl'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='__errno_location' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='7292109c'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_lock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_unlock' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='malloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strrchr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strcspn' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='close' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='read' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/zutil_device_path_os.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='288' id='16e6f2c6'>
+      <subrange length='36' type-id='7359adad' id='ae666bde'/>
+    </array-type-def>
+    <class-decl name='udev' is-struct='yes' visibility='default' is-declaration-only='yes' id='e4a7fb7f'/>
+    <class-decl name='udev_device' is-struct='yes' visibility='default' is-declaration-only='yes' id='640b33ca'/>
+    <array-type-def dimensions='1' type-id='a65ae39c' size-in-bits='960' id='fa198beb'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
+    <array-type-def dimensions='1' type-id='3502e3ff' size-in-bits='384' id='dba89ba3'>
+      <subrange length='12' type-id='7359adad' id='84827bdc'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='3502e3ff' size-in-bits='256' id='01d84ed4'>
+      <subrange length='8' type-id='7359adad' id='56e0c0b1'/>
+    </array-type-def>
+    <class-decl name='dk_part' size-in-bits='960' is-struct='yes' visibility='default' id='a65ae39c'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='p_start' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='p_size' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='p_guid' type-id='214f32ea' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='p_tag' type-id='d908a348' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='272'>
+        <var-decl name='p_flag' type-id='d908a348' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='p_name' type-id='16e6f2c6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='p_uguid' type-id='214f32ea' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='p_resv' type-id='01d84ed4' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='dk_gpt' size-in-bits='1920' is-struct='yes' visibility='default' id='dd4a2e5a'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='efi_version' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='efi_nparts' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='efi_part_size' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='efi_lbasize' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='efi_last_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='efi_first_u_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='efi_last_u_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='efi_disk_uguid' type-id='214f32ea' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='efi_flags' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='480'>
+        <var-decl name='efi_reserved1' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='efi_altern_lba' type-id='804dc465' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='efi_reserved' type-id='dba89ba3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='efi_parts' type-id='fa198beb' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='uuid' size-in-bits='128' is-struct='yes' visibility='default' id='214f32ea'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='time_low' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='time_mid' type-id='149c6638' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='48'>
+        <var-decl name='time_hi_and_version' type-id='149c6638' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='clock_seq_hi_and_reserved' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='72'>
+        <var-decl name='clock_seq_low' type-id='b96825af' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='80'>
+        <var-decl name='node_addr' type-id='0f562bd0' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ushort_t' type-id='8efea9e5' id='d908a348'/>
+    <typedef-decl name='longlong_t' type-id='1eb56b1e' id='9b3ff54f'/>
+    <typedef-decl name='diskaddr_t' type-id='9b3ff54f' id='804dc465'/>
+    <class-decl name='dirent' size-in-bits='2240' is-struct='yes' visibility='default' id='611586a1'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='d_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='d_off' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='d_reclen' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='144'>
+        <var-decl name='d_type' type-id='002ac4a6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='152'>
+        <var-decl name='d_name' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='uint16_t' type-id='253c2d2a' id='149c6638'/>
+    <typedef-decl name='__uint16_t' type-id='8efea9e5' id='253c2d2a'/>
+    <pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/>
+    <pointer-type-def type-id='dd4a2e5a' size-in-bits='64' id='0d8119a8'/>
+    <pointer-type-def type-id='0d8119a8' size-in-bits='64' id='c43b27a6'/>
+    <pointer-type-def type-id='e4a7fb7f' size-in-bits='64' id='025eefe7'/>
+    <pointer-type-def type-id='640b33ca' size-in-bits='64' id='b32bae08'/>
+    <class-decl name='udev' is-struct='yes' visibility='default' is-declaration-only='yes' id='e4a7fb7f'/>
+    <class-decl name='udev_device' is-struct='yes' visibility='default' is-declaration-only='yes' id='640b33ca'/>
+    <function-decl name='efi_alloc_and_init' mangled-name='efi_alloc_and_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_alloc_and_init'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='8f92235e'/>
+      <parameter type-id='c43b27a6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='efi_free' mangled-name='efi_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='efi_free'>
+      <parameter type-id='0d8119a8'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='udev_new' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='025eefe7'/>
+    </function-decl>
+    <function-decl name='udev_device_unref' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b32bae08'/>
+      <return type-id='b32bae08'/>
+    </function-decl>
+    <function-decl name='udev_device_new_from_subsystem_sysname' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='025eefe7'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b32bae08'/>
+    </function-decl>
+    <function-decl name='udev_device_get_property_value' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b32bae08'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='fclose' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fgets' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='e75a27e9'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strstr' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='readlink' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='zfs_strip_partition' mangled-name='zfs_strip_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_strip_partition'>
+      <parameter type-id='26a90f95' name='path'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zfs_strip_path' mangled-name='zfs_strip_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_strip_path'>
+      <parameter type-id='26a90f95' name='path'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zfs_get_enclosure_sysfs_path' mangled-name='zfs_get_enclosure_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_enclosure_sysfs_path'>
+      <parameter type-id='80f4b756' name='dev_name'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zfs_dev_is_dm' mangled-name='zfs_dev_is_dm' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dev_is_dm'>
+      <parameter type-id='80f4b756' name='dev_name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_dev_is_whole_disk' mangled-name='zfs_dev_is_whole_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dev_is_whole_disk'>
+      <parameter type-id='80f4b756' name='dev_name'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'>
+      <parameter type-id='80f4b756' name='dev_name'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='is_mpath_whole_disk' mangled-name='is_mpath_whole_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='is_mpath_whole_disk'>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/zutil_import_os.c' language='LANG_C99'>
+    <class-decl name='blkid_struct_cache' is-struct='yes' visibility='default' is-declaration-only='yes' id='09286066'/>
+    <class-decl name='blkid_struct_dev' is-struct='yes' visibility='default' is-declaration-only='yes' id='86223623'/>
+    <class-decl name='blkid_struct_dev_iterate' is-struct='yes' visibility='default' is-declaration-only='yes' id='d88420d6'/>
+    <class-decl name='udev_list_entry' is-struct='yes' visibility='default' is-declaration-only='yes' id='e7dbdca3'/>
+    <typedef-decl name='__useconds_t' type-id='f0981eeb' id='4e80d4b1'/>
+    <typedef-decl name='__clockid_t' type-id='95e97e5e' id='08f9a87a'/>
+    <typedef-decl name='clockid_t' type-id='08f9a87a' id='a1c3b834'/>
+    <typedef-decl name='blkid_dev' type-id='8433f053' id='f47b023a'/>
+    <typedef-decl name='blkid_cache' type-id='940e3afc' id='0882dfdf'/>
+    <typedef-decl name='blkid_dev_iterate' type-id='b8fa2efc' id='f4760fa7'/>
+    <pointer-type-def type-id='0882dfdf' size-in-bits='64' id='2e3e7caa'/>
+    <pointer-type-def type-id='f47b023a' size-in-bits='64' id='d87f9b75'/>
+    <pointer-type-def type-id='09286066' size-in-bits='64' id='940e3afc'/>
+    <pointer-type-def type-id='86223623' size-in-bits='64' id='8433f053'/>
+    <pointer-type-def type-id='d88420d6' size-in-bits='64' id='b8fa2efc'/>
+    <qualified-type-def type-id='62f7a03d' restrict='yes' id='f1cadedf'/>
+    <pointer-type-def type-id='a9c79a1f' size-in-bits='64' id='3d83ba87'/>
+    <pointer-type-def type-id='e7dbdca3' size-in-bits='64' id='deabd0d3'/>
+    <class-decl name='blkid_struct_cache' is-struct='yes' visibility='default' is-declaration-only='yes' id='09286066'/>
+    <class-decl name='blkid_struct_dev' is-struct='yes' visibility='default' is-declaration-only='yes' id='86223623'/>
+    <class-decl name='blkid_struct_dev_iterate' is-struct='yes' visibility='default' is-declaration-only='yes' id='d88420d6'/>
+    <class-decl name='udev_list_entry' is-struct='yes' visibility='default' is-declaration-only='yes' id='e7dbdca3'/>
+    <function-decl name='nvlist_remove_all' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='blkid_put_cache' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='0882dfdf'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='blkid_get_cache' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='2e3e7caa'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='blkid_dev_devname' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f47b023a'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='blkid_dev_iterate_begin' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='0882dfdf'/>
+      <return type-id='f4760fa7'/>
+    </function-decl>
+    <function-decl name='blkid_dev_set_search' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f4760fa7'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='blkid_dev_next' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f4760fa7'/>
+      <parameter type-id='d87f9b75'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='blkid_dev_iterate_end' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f4760fa7'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='blkid_probe_all_new' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='0882dfdf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='udev_unref' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='025eefe7'/>
+      <return type-id='025eefe7'/>
+    </function-decl>
+    <function-decl name='udev_list_entry_get_next' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='deabd0d3'/>
+      <return type-id='deabd0d3'/>
+    </function-decl>
+    <function-decl name='udev_list_entry_get_name' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='deabd0d3'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='udev_device_get_parent_with_subsystem_devtype' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b32bae08'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b32bae08'/>
+    </function-decl>
+    <function-decl name='udev_device_get_devlinks_list_entry' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b32bae08'/>
+      <return type-id='deabd0d3'/>
+    </function-decl>
+    <function-decl name='sched_yield' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtoul' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='7359adad'/>
+    </function-decl>
+    <function-decl name='strncasecmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='stat64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='f1cadedf'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='clock_gettime' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a1c3b834'/>
+      <parameter type-id='3d83ba87'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='usleep' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4e80d4b1'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_dev_flush' mangled-name='zfs_dev_flush' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dev_flush'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_device_get_devid' mangled-name='zfs_device_get_devid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_device_get_devid'>
+      <parameter type-id='b32bae08' name='dev'/>
+      <parameter type-id='26a90f95' name='bufptr'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_device_get_physical' mangled-name='zfs_device_get_physical' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_device_get_physical'>
+      <parameter type-id='b32bae08' name='dev'/>
+      <parameter type-id='26a90f95' name='bufptr'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_label_disk_wait' mangled-name='zpool_label_disk_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk_wait'>
+      <parameter type-id='80f4b756' name='path'/>
+      <parameter type-id='95e97e5e' name='timeout_ms'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='os/linux/zutil_setproctitle.c' language='LANG_C99'>
+    <function-decl name='warnx' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='setenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_setproctitle_init' mangled-name='zfs_setproctitle_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle_init'>
+      <parameter type-id='95e97e5e' name='argc'/>
+      <parameter type-id='9b23c9ad' name='argv'/>
+      <parameter type-id='9b23c9ad' name='envp'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_setproctitle' mangled-name='zfs_setproctitle' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle'>
+      <parameter type-id='80f4b756' name='fmt'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='zutil_device_path.c' language='LANG_C99'>
+    <qualified-type-def type-id='26a90f95' restrict='yes' id='266fe297'/>
+    <qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
+    <pointer-type-def type-id='b99c00c9' size-in-bits='64' id='13956559'/>
+    <qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/>
+    <function-decl name='zpool_default_search_paths' mangled-name='zpool_default_search_paths' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_default_search_paths'>
+      <parameter type-id='78c01427'/>
+      <return type-id='13956559'/>
+    </function-decl>
+    <function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strlcat' mangled-name='strlcat' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='strlcat'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='zfs_resolve_shortname' mangled-name='zfs_resolve_shortname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_resolve_shortname'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='26a90f95' name='path'/>
+      <parameter type-id='b59d7dce' name='len'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zfs_strcmp_pathname' mangled-name='zfs_strcmp_pathname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_strcmp_pathname'>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='80f4b756' name='cmp'/>
+      <parameter type-id='95e97e5e' name='wholedisk'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='snprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='getenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='memset' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='strcmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strdup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strtok' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='266fe297'/>
+      <parameter type-id='9d26089a'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='zutil_import.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='03085adc' size-in-bits='192' id='083f8d58'>
+      <subrange length='3' type-id='7359adad' id='56f209d2'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='bf311473' size-in-bits='128' id='f0f65199'>
+      <subrange length='2' type-id='7359adad' id='52efc4ef'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8192' id='b54ce520'>
+      <subrange length='1024' type-id='7359adad' id='c60446f8'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8' id='89feb1ec'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='160' id='664ac0b7'>
+      <subrange length='20' type-id='7359adad' id='fdca39cf'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='256' id='16dc656a'>
+      <subrange length='32' type-id='7359adad' id='ae5bde82'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='32' id='8e0573fd'>
+      <subrange length='4' type-id='7359adad' id='16fe7105'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='448' id='6093ff7c'>
+      <subrange length='56' type-id='7359adad' id='f8137894'/>
+    </array-type-def>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='__dirstream' is-struct='yes' visibility='default' is-declaration-only='yes' id='20cd73f2'/>
+    <class-decl name='__va_list_tag' size-in-bits='192' is-struct='yes' visibility='default' id='d5027220'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='gp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='fp_offset' type-id='f0981eeb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='overflow_arg_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='reg_save_area' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='tpool' size-in-bits='2496' is-struct='yes' visibility='default' id='88d1b7f9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tp_forw' type-id='9cf59a50' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tp_back' type-id='9cf59a50' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='tp_mutex' type-id='7a6844eb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='tp_busycv' type-id='62fab762' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='tp_workcv' type-id='62fab762' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='tp_waitcv' type-id='62fab762' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1600'>
+        <var-decl name='tp_active' type-id='ad33e5e7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1664'>
+        <var-decl name='tp_head' type-id='f32b30e4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1728'>
+        <var-decl name='tp_tail' type-id='f32b30e4' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1792'>
+        <var-decl name='tp_attr' type-id='7d8569fd' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2240'>
+        <var-decl name='tp_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2272'>
+        <var-decl name='tp_linger' type-id='3502e3ff' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2304'>
+        <var-decl name='tp_njobs' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2336'>
+        <var-decl name='tp_minimum' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2368'>
+        <var-decl name='tp_maximum' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2400'>
+        <var-decl name='tp_current' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='2432'>
+        <var-decl name='tp_idle' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <array-type-def dimensions='1' type-id='95e97e5e' size-in-bits='384' id='73b82f0f'>
+      <subrange length='12' type-id='7359adad' id='84827bdc'/>
+    </array-type-def>
+    <type-decl name='long long unsigned int' size-in-bits='64' id='3a47d82b'/>
+    <type-decl name='signed char' size-in-bits='8' id='28577a57'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+    <typedef-decl name='refresh_config_func_t' type-id='29f040d2' id='b7c58eaa'/>
+    <typedef-decl name='pool_active_func_t' type-id='baa42fef' id='de5d1d8f'/>
+    <class-decl name='pool_config_ops' size-in-bits='128' is-struct='yes' visibility='default' id='8b092c69'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='pco_refresh_config' type-id='e7c00489' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='pco_pool_active' type-id='9eadf5e0' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='pool_config_ops_t' type-id='1a21babe' id='b1e62775'/>
+    <class-decl name='importargs' size-in-bits='448' is-struct='yes' visibility='default' id='7ac83801'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='path' type-id='9b23c9ad' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='paths' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='poolname' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='guid' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='cachefile' type-id='80f4b756' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='can_be_active' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='352'>
+        <var-decl name='scan' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='policy' type-id='5ce45b60' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='importargs_t' type-id='7ac83801' id='7a842a6b'/>
+    <typedef-decl name='pool_vdev_iter_f' type-id='6c16a6c8' id='dff793e0'/>
+    <typedef-decl name='avl_tree_t' type-id='b351119f' id='f20fbd51'/>
+    <typedef-decl name='avl_index_t' type-id='e475ab95' id='fba6cb51'/>
+    <class-decl name='avl_node' size-in-bits='192' is-struct='yes' visibility='default' id='428b67b3'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='avl_child' type-id='f0f65199' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='avl_pcb' type-id='e475ab95' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='avl_tree' size-in-bits='320' is-struct='yes' visibility='default' id='b351119f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='avl_root' type-id='bf311473' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='avl_compar' type-id='585e1de9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='avl_offset' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='avl_numnodes' type-id='ee1f298e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='avl_size' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='tpool_t' type-id='88d1b7f9' id='b1bbf10d'/>
+    <typedef-decl name='ulong_t' type-id='7359adad' id='ee1f298e'/>
+    <class-decl name='libpc_handle' size-in-bits='8448' is-struct='yes' visibility='default' id='7c8737f0'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='lpc_printerr' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='lpc_open_access_error' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='lpc_desc_active' type-id='c19b74c3' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='lpc_desc' type-id='b54ce520' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8320'>
+        <var-decl name='lpc_ops' type-id='f095e320' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='8384'>
+        <var-decl name='lpc_lib_handle' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='libpc_handle_t' type-id='7c8737f0' id='8a70a786'/>
+    <class-decl name='aiocb' size-in-bits='1344' is-struct='yes' visibility='default' id='e4957c49'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='aio_fildes' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='aio_lio_opcode' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='aio_reqprio' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='aio_buf' type-id='fe09dd29' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='aio_nbytes' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='aio_sigevent' type-id='519bc206' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='__next_prio' type-id='924bbc81' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='__abs_prio' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='864'>
+        <var-decl name='__policy' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='__error_code' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='__return_value' type-id='41060289' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='aio_offset' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='__glibc_reserved' type-id='16dc656a' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='dirent64' size-in-bits='2240' is-struct='yes' visibility='default' id='5725d813'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='d_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='d_off' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='d_reclen' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='144'>
+        <var-decl name='d_type' type-id='002ac4a6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='152'>
+        <var-decl name='d_name' type-id='d1617432' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='pthread_mutexattr_t' size-in-bits='32' naming-typedef-id='8afd6070' visibility='default' id='7300eb00'>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='8e0573fd' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_mutexattr_t' type-id='7300eb00' id='8afd6070'/>
+    <union-decl name='pthread_attr_t' size-in-bits='448' visibility='default' id='b63afacd'>
+      <data-member access='public'>
+        <var-decl name='__size' type-id='6093ff7c' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='__align' type-id='bd54fe1a' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='pthread_attr_t' type-id='b63afacd' id='7d8569fd'/>
+    <class-decl name='stat' size-in-bits='1152' is-struct='yes' visibility='default' id='aafc373f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='st_dev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='st_ino' type-id='e43e523d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='st_nlink' type-id='80f0b9df' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='st_mode' type-id='e1c52942' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='st_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='st_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='__pad0' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='st_rdev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='st_size' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='st_blksize' type-id='d3f10a7f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='st_blocks' type-id='dbc43803' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='st_atim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='st_mtim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='st_ctim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='__glibc_reserved' type-id='083f8d58' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='stat64' size-in-bits='1152' is-struct='yes' visibility='default' id='0bbec9cd'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='st_dev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='st_ino' type-id='71288a47' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='st_nlink' type-id='80f0b9df' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='st_mode' type-id='e1c52942' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='224'>
+        <var-decl name='st_uid' type-id='cc5fcceb' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='st_gid' type-id='d94ec6d9' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='288'>
+        <var-decl name='__pad0' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='st_rdev' type-id='35ed8932' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='st_size' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='st_blksize' type-id='d3f10a7f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='st_blocks' type-id='4e711bf1' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='st_atim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='st_mtim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='st_ctim' type-id='a9c79a1f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='__glibc_reserved' type-id='083f8d58' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='__dev_t' type-id='7359adad' id='35ed8932'/>
+    <typedef-decl name='__uid_t' type-id='f0981eeb' id='cc5fcceb'/>
+    <typedef-decl name='__gid_t' type-id='f0981eeb' id='d94ec6d9'/>
+    <typedef-decl name='__ino_t' type-id='7359adad' id='e43e523d'/>
+    <typedef-decl name='__ino64_t' type-id='7359adad' id='71288a47'/>
+    <typedef-decl name='__mode_t' type-id='f0981eeb' id='e1c52942'/>
+    <typedef-decl name='__nlink_t' type-id='7359adad' id='80f0b9df'/>
+    <typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
+    <typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
+    <typedef-decl name='__pid_t' type-id='95e97e5e' id='3629bad8'/>
+    <typedef-decl name='__time_t' type-id='bd54fe1a' id='65eda9c0'/>
+    <typedef-decl name='__blksize_t' type-id='bd54fe1a' id='d3f10a7f'/>
+    <typedef-decl name='__blkcnt_t' type-id='bd54fe1a' id='dbc43803'/>
+    <typedef-decl name='__blkcnt64_t' type-id='bd54fe1a' id='4e711bf1'/>
+    <typedef-decl name='__syscall_slong_t' type-id='bd54fe1a' id='03085adc'/>
+    <typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
+    <union-decl name='sigval' size-in-bits='64' visibility='default' id='a094b870'>
+      <data-member access='public'>
+        <var-decl name='sival_int' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='sival_ptr' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <typedef-decl name='__sigval_t' type-id='a094b870' id='eabacd01'/>
+    <class-decl name='sigevent' size-in-bits='512' is-struct='yes' visibility='default' id='519bc206'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='sigev_value' type-id='eabacd01' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='sigev_signo' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='96'>
+        <var-decl name='sigev_notify' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_sigev_un' type-id='ac5ab598' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <union-decl name='__anonymous_union__' size-in-bits='384' is-anonymous='yes' visibility='default' id='ac5ab598'>
+      <data-member access='public'>
+        <var-decl name='_pad' type-id='73b82f0f' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_tid' type-id='3629bad8' visibility='default'/>
+      </data-member>
+      <data-member access='public'>
+        <var-decl name='_sigev_thread' type-id='e7f43f73' visibility='default'/>
+      </data-member>
+    </union-decl>
+    <class-decl name='__anonymous_struct__' size-in-bits='128' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f73'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_function' type-id='5f147c28' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_attribute' type-id='7347a39e' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='_IO_lock_t' type-id='48b5725f' id='bb4788fa'/>
+    <class-decl name='_IO_FILE' size-in-bits='1728' is-struct='yes' visibility='default' id='ec1ed955'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_IO_read_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_IO_read_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='_IO_read_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='_IO_write_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='_IO_write_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='_IO_write_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='_IO_buf_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='_IO_buf_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='_IO_save_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='_IO_backup_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='_IO_save_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='_markers' type-id='e4c6fa61' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='_chain' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='_fileno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='_flags2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='_old_offset' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='_cur_column' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1040'>
+        <var-decl name='_vtable_offset' type-id='28577a57' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1048'>
+        <var-decl name='_shortbuf' type-id='89feb1ec' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='_lock' type-id='cecf4ea7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='_offset' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='_codecvt' type-id='570f8c59' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1280'>
+        <var-decl name='_wide_data' type-id='c65a1f29' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1344'>
+        <var-decl name='_freeres_list' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1408'>
+        <var-decl name='_freeres_buf' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1472'>
+        <var-decl name='__pad5' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1536'>
+        <var-decl name='_mode' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1568'>
+        <var-decl name='_unused2' type-id='664ac0b7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <class-decl name='timespec' size-in-bits='128' is-struct='yes' visibility='default' id='a9c79a1f'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='tv_sec' type-id='65eda9c0' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='tv_nsec' type-id='03085adc' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='DIR' type-id='20cd73f2' id='54a5d683'/>
+    <typedef-decl name='uintptr_t' type-id='7359adad' id='e475ab95'/>
+    <pointer-type-def type-id='54a5d683' size-in-bits='64' id='f09217ba'/>
+    <pointer-type-def type-id='aa12d1ba' size-in-bits='64' id='822cd80b'/>
+    <qualified-type-def type-id='822cd80b' restrict='yes' id='e75a27e9'/>
+    <pointer-type-def type-id='ec1ed955' size-in-bits='64' id='dca988a5'/>
+    <pointer-type-def type-id='a4036571' size-in-bits='64' id='570f8c59'/>
+    <pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
+    <pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
+    <pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
+    <pointer-type-def type-id='d5027220' size-in-bits='64' id='b7f2d5e6'/>
+    <pointer-type-def type-id='e4957c49' size-in-bits='64' id='924bbc81'/>
+    <qualified-type-def type-id='924bbc81' const='yes' id='5499dcde'/>
+    <pointer-type-def type-id='5499dcde' size-in-bits='64' id='2236d41c'/>
+    <qualified-type-def type-id='2236d41c' restrict='yes' id='31488924'/>
+    <pointer-type-def type-id='fba6cb51' size-in-bits='64' id='32adbf30'/>
+    <pointer-type-def type-id='428b67b3' size-in-bits='64' id='bf311473'/>
+    <pointer-type-def type-id='b351119f' size-in-bits='64' id='716943c7'/>
+    <pointer-type-def type-id='f20fbd51' size-in-bits='64' id='a3681dea'/>
+    <pointer-type-def type-id='a3681dea' size-in-bits='64' id='fce6d540'/>
+    <pointer-type-def type-id='26a90f95' size-in-bits='64' id='9b23c9ad'/>
+    <qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/>
+    <qualified-type-def type-id='e4957c49' const='yes' id='fced9da2'/>
+    <pointer-type-def type-id='fced9da2' size-in-bits='64' id='b20efd18'/>
+    <qualified-type-def type-id='8b092c69' const='yes' id='1a21babe'/>
+    <qualified-type-def type-id='8afd6070' const='yes' id='1d853360'/>
+    <pointer-type-def type-id='1d853360' size-in-bits='64' id='c2afbd7e'/>
+    <pointer-type-def type-id='5725d813' size-in-bits='64' id='07b96073'/>
+    <pointer-type-def type-id='7a842a6b' size-in-bits='64' id='07ee4a58'/>
+    <pointer-type-def type-id='2ec2411e' size-in-bits='64' id='6c16a6c8'/>
+    <pointer-type-def type-id='96ee24a5' size-in-bits='64' id='585e1de9'/>
+    <pointer-type-def type-id='8a70a786' size-in-bits='64' id='5507783b'/>
+    <pointer-type-def type-id='857bb57e' size-in-bits='64' id='75be733c'/>
+    <pointer-type-def type-id='de5d1d8f' size-in-bits='64' id='9eadf5e0'/>
+    <pointer-type-def type-id='b1e62775' size-in-bits='64' id='f095e320'/>
+    <pointer-type-def type-id='7d8569fd' size-in-bits='64' id='7347a39e'/>
+    <pointer-type-def type-id='b7c58eaa' size-in-bits='64' id='e7c00489'/>
+    <pointer-type-def type-id='519bc206' size-in-bits='64' id='ef2f159c'/>
+    <qualified-type-def type-id='ef2f159c' restrict='yes' id='de0eb5a4'/>
+    <pointer-type-def type-id='aafc373f' size-in-bits='64' id='4330df87'/>
+    <qualified-type-def type-id='4330df87' restrict='yes' id='73665405'/>
+    <pointer-type-def type-id='0bbec9cd' size-in-bits='64' id='62f7a03d'/>
+    <pointer-type-def type-id='b1bbf10d' size-in-bits='64' id='9cf59a50'/>
+    <pointer-type-def type-id='5d6479ae' size-in-bits='64' id='892b4acc'/>
+    <pointer-type-def type-id='3502e3ff' size-in-bits='64' id='4dd26a40'/>
+    <pointer-type-def type-id='f1abb096' size-in-bits='64' id='5f147c28'/>
+    <pointer-type-def type-id='c5c76c9c' size-in-bits='64' id='b7f9d8e6'/>
+    <pointer-type-def type-id='eaa32e2f' size-in-bits='64' id='63e171df'/>
+    <qualified-type-def type-id='48b5725f' volatile='yes' id='b0b3cbf9'/>
+    <pointer-type-def type-id='b0b3cbf9' size-in-bits='64' id='fe09dd29'/>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='__dirstream' is-struct='yes' visibility='default' is-declaration-only='yes' id='20cd73f2'/>
+    <function-decl name='update_vdev_config_dev_strs' mangled-name='update_vdev_config_dev_strs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdev_config_dev_strs'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='update_vdevs_config_dev_sysfs_path' mangled-name='update_vdevs_config_dev_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdevs_config_dev_sysfs_path'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_create' mangled-name='avl_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_create'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='585e1de9'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_find' mangled-name='avl_find' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_find'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='32adbf30'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_insert' mangled-name='avl_insert' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_insert'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='fba6cb51'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_first' mangled-name='avl_first' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_first'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_destroy_nodes' mangled-name='avl_destroy_nodes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_destroy_nodes'>
+      <parameter type-id='a3681dea'/>
+      <parameter type-id='63e171df'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='avl_destroy' mangled-name='avl_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_destroy'>
+      <parameter type-id='a3681dea'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='avl_walk' mangled-name='avl_walk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_walk'>
+      <parameter type-id='716943c7'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='nvlist_alloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_dup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8d0687d2'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='892b4acc'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='75be733c'/>
+      <parameter type-id='4dd26a40'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_empty' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='nvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fnvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvpair_value_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='3fa542f0'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='tpool_create' mangled-name='tpool_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_create'>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='7347a39e'/>
+      <return type-id='9cf59a50'/>
+    </function-decl>
+    <function-decl name='tpool_dispatch' mangled-name='tpool_dispatch' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_dispatch'>
+      <parameter type-id='9cf59a50'/>
+      <parameter type-id='b7f9d8e6'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='tpool_destroy' mangled-name='tpool_destroy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_destroy'>
+      <parameter type-id='9cf59a50'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='tpool_wait' mangled-name='tpool_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='tpool_wait'>
+      <parameter type-id='9cf59a50'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='spl_pagesize' mangled-name='spl_pagesize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='spl_pagesize'>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+    <function-decl name='zutil_alloc' mangled-name='zutil_alloc' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zutil_alloc'>
+      <parameter type-id='5507783b' name='hdl'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='zutil_strdup' mangled-name='zutil_strdup' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zutil_strdup'>
+      <parameter type-id='5507783b' name='hdl'/>
+      <parameter type-id='80f4b756' name='str'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='zpool_read_label' mangled-name='zpool_read_label' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_read_label'>
+      <parameter type-id='95e97e5e' name='fd'/>
+      <parameter type-id='857bb57e' name='config'/>
+      <parameter type-id='7292109c' name='num_labels'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='slice_cache_compare' mangled-name='slice_cache_compare' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='slice_cache_compare'>
+      <parameter type-id='eaa32e2f' name='arg1'/>
+      <parameter type-id='eaa32e2f' name='arg2'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='label_paths' mangled-name='label_paths' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='label_paths'>
+      <parameter type-id='5507783b' name='hdl'/>
+      <parameter type-id='5ce45b60' name='label'/>
+      <parameter type-id='9b23c9ad' name='path'/>
+      <parameter type-id='9b23c9ad' name='devid'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_search_import' mangled-name='zpool_search_import' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_search_import'>
+      <parameter type-id='eaa32e2f' name='hdl'/>
+      <parameter type-id='07ee4a58' name='import'/>
+      <parameter type-id='f095e320' name='pco'/>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='zpool_find_config' mangled-name='zpool_find_config' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_find_config'>
+      <parameter type-id='eaa32e2f' name='hdl'/>
+      <parameter type-id='80f4b756' name='target'/>
+      <parameter type-id='857bb57e' name='configp'/>
+      <parameter type-id='07ee4a58' name='args'/>
+      <parameter type-id='f095e320' name='pco'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='for_each_vdev_cb' mangled-name='for_each_vdev_cb' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='for_each_vdev_cb'>
+      <parameter type-id='eaa32e2f' name='zhp'/>
+      <parameter type-id='5ce45b60' name='nv'/>
+      <parameter type-id='dff793e0' name='func'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='for_each_vdev_in_nvlist' mangled-name='for_each_vdev_in_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='for_each_vdev_in_nvlist'>
+      <parameter type-id='5ce45b60' name='nvroot'/>
+      <parameter type-id='dff793e0' name='func'/>
+      <parameter type-id='eaa32e2f' name='data'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_find_import_blkid' mangled-name='zpool_find_import_blkid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_find_import_blkid'>
+      <parameter type-id='5507783b'/>
+      <parameter type-id='18c91f9e'/>
+      <parameter type-id='fce6d540'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_open_func' mangled-name='zpool_open_func' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_open_func'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='f09217ba'/>
+    </function-decl>
+    <function-decl name='closedir' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f09217ba'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='readdir64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='f09217ba'/>
+      <return type-id='07b96073'/>
+    </function-decl>
+    <function-decl name='dirname' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='__xpg_basename' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='dcgettext' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_init' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <parameter type-id='c2afbd7e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pthread_mutex_destroy' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='18c91f9e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='vsnprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='26a90f95'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b7f2d5e6'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='asprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strtoull' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='3a47d82b'/>
+    </function-decl>
+    <function-decl name='calloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+    <function-decl name='posix_memalign' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='63e171df'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='exit' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='realpath' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='9d26089a'/>
+      <parameter type-id='266fe297'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strncmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strpbrk' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strerror' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='ioctl' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='7359adad'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fstat64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='62f7a03d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='pread64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <parameter type-id='724e4de6'/>
+      <return type-id='79a0948f'/>
+    </function-decl>
+    <function-decl name='sysconf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='bd54fe1a'/>
+    </function-decl>
+    <function-decl name='geteuid' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='cc5fcceb'/>
+    </function-decl>
+    <function-type size-in-bits='64' id='baa42fef'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <parameter type-id='37e3bd22'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='2ec2411e'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='96ee24a5'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='95e97e5e'/>
+    </function-type>
+    <function-type size-in-bits='64' id='29f040d2'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='5ce45b60'/>
+    </function-type>
+    <function-type size-in-bits='64' id='f1abb096'>
+      <parameter type-id='eabacd01'/>
+      <return type-id='48b5725f'/>
+    </function-type>
+  </abi-instr>
+  <abi-instr address-size='64' path='zutil_nicenum.c' language='LANG_C99'>
+    <type-decl name='double' size-in-bits='64' id='a0eb0f08'/>
+    <type-decl name='long double' size-in-bits='128' id='e095c704'/>
+    <enum-decl name='zfs_nicenum_format' id='29cf1969'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='ZFS_NICENUM_1024' value='0'/>
+      <enumerator name='ZFS_NICENUM_BYTES' value='1'/>
+      <enumerator name='ZFS_NICENUM_TIME' value='2'/>
+      <enumerator name='ZFS_NICENUM_RAW' value='3'/>
+      <enumerator name='ZFS_NICENUM_RAWTIME' value='4'/>
+    </enum-decl>
+    <qualified-type-def type-id='8efea9e5' const='yes' id='3beb2af4'/>
+    <pointer-type-def type-id='3beb2af4' size-in-bits='64' id='31347b7a'/>
+    <pointer-type-def type-id='31347b7a' size-in-bits='64' id='c59e1ef0'/>
+    <function-decl name='zfs_isnumber' mangled-name='zfs_isnumber' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_isnumber'>
+      <parameter type-id='80f4b756' name='str'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='zfs_nicenum_format' mangled-name='zfs_nicenum_format' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_nicenum_format'>
+      <parameter type-id='9c313c2d' name='num'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <parameter type-id='29cf1969' name='format'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_nicenum' mangled-name='zfs_nicenum' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_nicenum'>
+      <parameter type-id='9c313c2d' name='num'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_nicetime' mangled-name='zfs_nicetime' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_nicetime'>
+      <parameter type-id='9c313c2d' name='num'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_niceraw' mangled-name='zfs_niceraw' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_niceraw'>
+      <parameter type-id='9c313c2d' name='num'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zfs_nicebytes' mangled-name='zfs_nicebytes' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_nicebytes'>
+      <parameter type-id='9c313c2d' name='num'/>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='b59d7dce' name='buflen'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='powl' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e095c704'/>
+      <parameter type-id='e095c704'/>
+      <return type-id='e095c704'/>
+    </function-decl>
+    <function-decl name='floor' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='a0eb0f08'/>
+      <return type-id='a0eb0f08'/>
+    </function-decl>
+    <function-decl name='__ctype_b_loc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='c59e1ef0'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='zutil_pool.c' language='LANG_C99'>
+    <array-type-def dimensions='1' type-id='853fd5dc' size-in-bits='32768' id='b505fc2f'>
+      <subrange length='64' type-id='7359adad' id='b10be967'/>
+    </array-type-def>
+    <class-decl name='ddt_stat' size-in-bits='512' is-struct='yes' visibility='default' id='65242dfe'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='dds_blocks' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='dds_lsize' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='dds_psize' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='dds_dsize' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='dds_ref_blocks' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='dds_ref_lsize' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='dds_ref_psize' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='dds_ref_dsize' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ddt_stat_t' type-id='65242dfe' id='853fd5dc'/>
+    <class-decl name='ddt_histogram' size-in-bits='32768' is-struct='yes' visibility='default' id='bc2b3086'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='ddh_stat' type-id='b505fc2f' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='ddt_histogram_t' type-id='bc2b3086' id='2d7fe832'/>
+    <qualified-type-def type-id='2d7fe832' const='yes' id='ec92d602'/>
+    <pointer-type-def type-id='ec92d602' size-in-bits='64' id='932720f8'/>
+    <qualified-type-def type-id='853fd5dc' const='yes' id='764c298c'/>
+    <pointer-type-def type-id='764c298c' size-in-bits='64' id='dfe59052'/>
+    <function-decl name='zpool_dump_ddt' mangled-name='zpool_dump_ddt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_dump_ddt'>
+      <parameter type-id='dfe59052' name='dds_total'/>
+      <parameter type-id='932720f8' name='ddh'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_history_unpack' mangled-name='zpool_history_unpack' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_history_unpack'>
+      <parameter type-id='26a90f95' name='buf'/>
+      <parameter type-id='9c313c2d' name='bytes_read'/>
+      <parameter type-id='5d6479ae' name='leftover'/>
+      <parameter type-id='75be733c' name='records'/>
+      <parameter type-id='4dd26a40' name='numrecords'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='printf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='realloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='eaa32e2f'/>
+    </function-decl>
+  </abi-instr>
+</abi-corpus>

diff --git a/zfs/lib/libzfs_core/libzfs_core.c b/zfs/lib/libzfs_core/libzfs_core.c
index eb332bc..e00c829 100644
--- a/zfs/lib/libzfs_core/libzfs_core.c
+++ b/zfs/lib/libzfs_core/libzfs_core.c

@@ -20,11 +20,12 @@
  */
 
 /*
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2017 Datto Inc.
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
  */
 
 /*
@@ -84,6 +85,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <pthread.h>
+#include <libzutil.h>
 #include <sys/nvpair.h>
 #include <sys/param.h>
 #include <sys/types.h>
@@ -95,7 +97,7 @@
 static int g_refcount;
 
 #ifdef ZFS_DEBUG
-static zfs_ioc_t fail_ioc_cmd;
+static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
 static zfs_errno_t fail_ioc_err;
 
 static void
@@ -116,7 +118,7 @@
 	 * cannot checkpoint 'tank': the loaded zfs module does not support
 	 * this operation. A reboot may be required to enable this operation.
 	 */
-	if (fail_ioc_cmd == 0) {
+	if (fail_ioc_cmd == ZFS_IOC_LAST) {
 		char *ioc_test = getenv("ZFS_IOC_TEST");
 		unsigned int ioc_num = 0, ioc_err = 0;
 
@@ -135,7 +137,7 @@
 {
 	(void) pthread_mutex_lock(&g_lock);
 	if (g_refcount == 0) {
-		g_fd = open(ZFS_DEV, O_RDWR);
+		g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC);
 		if (g_fd < 0) {
 			(void) pthread_mutex_unlock(&g_lock);
 			return (errno);
@@ -208,7 +210,7 @@
 		}
 	}
 
-	while (ioctl(g_fd, ioc, &zc) != 0) {
+	while (zfs_ioctl_fd(g_fd, ioc, &zc) != 0) {
 		/*
 		 * If ioctl exited with ENOMEM, we retry the ioctl after
 		 * increasing the size of the destination nvlist.
@@ -231,7 +233,7 @@
 			break;
 		}
 	}
-	if (zc.zc_nvlist_dst_filled) {
+	if (zc.zc_nvlist_dst_filled && resultp != NULL) {
 		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
 		    zc.zc_nvlist_dst_size);
 	}
@@ -291,13 +293,13 @@
 	 * The promote ioctl is still legacy, so we need to construct our
 	 * own zfs_cmd_t rather than using lzc_ioctl().
 	 */
-	zfs_cmd_t zc = { "\0" };
+	zfs_cmd_t zc = {"\0"};
 
 	ASSERT3S(g_refcount, >, 0);
 	VERIFY3S(g_fd, !=, -1);
 
 	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
-	if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
+	if (zfs_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
 		int error = errno;
 		if (error == EEXIST && snapnamebuf != NULL)
 			(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
@@ -307,25 +309,16 @@
 }
 
 int
-lzc_remap(const char *fsname)
-{
-	int error;
-	nvlist_t *args = fnvlist_alloc();
-	error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
-	nvlist_free(args);
-	return (error);
-}
-
-int
 lzc_rename(const char *source, const char *target)
 {
-	zfs_cmd_t zc = { "\0" };
+	zfs_cmd_t zc = {"\0"};
 	int error;
+
 	ASSERT3S(g_refcount, >, 0);
 	VERIFY3S(g_fd, !=, -1);
 	(void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
-	error = ioctl(g_fd, ZFS_IOC_RENAME, &zc);
+	error = zfs_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
 	if (error != 0)
 		error = errno;
 	return (error);
@@ -475,7 +468,7 @@
 	VERIFY3S(g_fd, !=, -1);
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
+	return (zfs_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
 }
 
 /*
@@ -632,13 +625,43 @@
 lzc_send(const char *snapname, const char *from, int fd,
     enum lzc_send_flags flags)
 {
-	return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
+	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
+	    NULL));
+}
+
+int
+lzc_send_redacted(const char *snapname, const char *from, int fd,
+    enum lzc_send_flags flags, const char *redactbook)
+{
+	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
+	    redactbook));
 }
 
 int
 lzc_send_resume(const char *snapname, const char *from, int fd,
     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
 {
+	return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
+	    resumeoff, NULL));
+}
+
+/*
+ * snapname: The name of the "tosnap", or the snapshot whose contents we are
+ * sending.
+ * from: The name of the "fromsnap", or the incremental source.
+ * fd: File descriptor to write the stream to.
+ * flags: flags that determine features to be used by the stream.
+ * resumeobj: Object to resume from, for resuming send
+ * resumeoff: Offset to resume from, for resuming send.
+ * redactnv: nvlist of string -> boolean(ignored) containing the names of all
+ * the snapshots that we should redact with respect to.
+ * redactbook: Name of the redaction bookmark to create.
+ */
+int
+lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
+    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
+    const char *redactbook)
+{
 	nvlist_t *args;
 	int err;
 
@@ -654,10 +677,17 @@
 		fnvlist_add_boolean(args, "compressok");
 	if (flags & LZC_SEND_FLAG_RAW)
 		fnvlist_add_boolean(args, "rawok");
+	if (flags & LZC_SEND_FLAG_SAVED)
+		fnvlist_add_boolean(args, "savedok");
+	if (flags & LZC_SEND_FLAG_BLOCKDIFF)
+		fnvlist_add_boolean(args, "blockdiff");
 	if (resumeobj != 0 || resumeoff != 0) {
 		fnvlist_add_uint64(args, "resume_object", resumeobj);
 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
 	}
+	if (redactbook != NULL)
+		fnvlist_add_string(args, "redactbook", redactbook);
+
 	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
 	nvlist_free(args);
 	return (err);
@@ -676,11 +706,13 @@
  * are traversed, looking for blocks with a birth time since the creation TXG of
  * the snapshot this bookmark was created from.  This will result in
  * significantly more I/O and be less efficient than a send space estimation on
- * an equivalent snapshot.
+ * an equivalent snapshot. This process is also used if redact_snaps is
+ * non-null.
  */
 int
-lzc_send_space(const char *snapname, const char *from,
-    enum lzc_send_flags flags, uint64_t *spacep)
+lzc_send_space_resume_redacted(const char *snapname, const char *from,
+    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
+    uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
 {
 	nvlist_t *args;
 	nvlist_t *result;
@@ -697,6 +729,16 @@
 		fnvlist_add_boolean(args, "compressok");
 	if (flags & LZC_SEND_FLAG_RAW)
 		fnvlist_add_boolean(args, "rawok");
+	if (resumeobj != 0 || resumeoff != 0) {
+		fnvlist_add_uint64(args, "resume_object", resumeobj);
+		fnvlist_add_uint64(args, "resume_offset", resumeoff);
+		fnvlist_add_uint64(args, "bytes", resume_bytes);
+	}
+	if (redactbook != NULL)
+		fnvlist_add_string(args, "redactbook", redactbook);
+	if (fd != -1)
+		fnvlist_add_int32(args, "fd", fd);
+
 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 	nvlist_free(args);
 	if (err == 0)
@@ -705,6 +747,14 @@
 	return (err);
 }
 
+int
+lzc_send_space(const char *snapname, const char *from,
+    enum lzc_send_flags flags, uint64_t *spacep)
+{
+	return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
+	    NULL, -1, spacep));
+}
+
 static int
 recv_read(int fd, void *buf, int ilen)
 {
@@ -736,14 +786,14 @@
 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
     uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
     boolean_t resumable, boolean_t raw, int input_fd,
-    const dmu_replay_record_t *begin_record, int cleanup_fd,
-    uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
-    nvlist_t **errors)
+    const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
+    uint64_t *errflags, nvlist_t **errors)
 {
 	dmu_replay_record_t drr;
 	char fsname[MAXPATHLEN];
 	char *atp;
 	int error;
+	boolean_t payload = B_FALSE;
 
 	ASSERT3S(g_refcount, >, 0);
 	VERIFY3S(g_fd, !=, -1);
@@ -774,13 +824,13 @@
 			return (error);
 	} else {
 		drr = *begin_record;
+		payload = (begin_record->drr_payloadlen != 0);
 	}
 
 	/*
-	 * Raw receives, resumable receives, and receives that include a
-	 * wrapping key all use the new interface.
+	 * All receives with a payload should use the new interface.
 	 */
-	if (resumable || raw || wkeydata != NULL) {
+	if (resumable || raw || wkeydata != NULL || payload) {
 		nvlist_t *outnvl = NULL;
 		nvlist_t *innvl = fnvlist_alloc();
 
@@ -820,12 +870,6 @@
 		if (resumable)
 			fnvlist_add_boolean(innvl, "resumable");
 
-		if (cleanup_fd >= 0)
-			fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);
-
-		if (action_handle != NULL)
-			fnvlist_add_uint64(innvl, "action_handle",
-			    *action_handle);
 
 		error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
 
@@ -837,10 +881,6 @@
 			error = nvlist_lookup_uint64(outnvl, "error_flags",
 			    errflags);
 
-		if (error == 0 && action_handle != NULL)
-			error = nvlist_lookup_uint64(outnvl, "action_handle",
-			    action_handle);
-
 		if (error == 0 && errors != NULL) {
 			nvlist_t *nvl;
 			error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
@@ -852,7 +892,8 @@
 		fnvlist_free(outnvl);
 	} else {
 		zfs_cmd_t zc = {"\0"};
-		char *packed = NULL;
+		char *rp_packed = NULL;
+		char *lp_packed = NULL;
 		size_t size;
 
 		ASSERT3S(g_refcount, >, 0);
@@ -861,14 +902,14 @@
 		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 
 		if (recvdprops != NULL) {
-			packed = fnvlist_pack(recvdprops, &size);
-			zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
+			rp_packed = fnvlist_pack(recvdprops, &size);
+			zc.zc_nvlist_src = (uint64_t)(uintptr_t)rp_packed;
 			zc.zc_nvlist_src_size = size;
 		}
 
 		if (localprops != NULL) {
-			packed = fnvlist_pack(localprops, &size);
-			zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
+			lp_packed = fnvlist_pack(localprops, &size);
+			zc.zc_nvlist_conf = (uint64_t)(uintptr_t)lp_packed;
 			zc.zc_nvlist_conf_size = size;
 		}
 
@@ -883,17 +924,11 @@
 		zc.zc_cleanup_fd = -1;
 		zc.zc_action_handle = 0;
 
-		if (cleanup_fd >= 0)
-			zc.zc_cleanup_fd = cleanup_fd;
-
-		if (action_handle != NULL)
-			zc.zc_action_handle = *action_handle;
-
 		zc.zc_nvlist_dst_size = 128 * 1024;
 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 		    malloc(zc.zc_nvlist_dst_size);
 
-		error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
+		error = zfs_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
 		if (error != 0) {
 			error = errno;
 		} else {
@@ -903,17 +938,16 @@
 			if (errflags != NULL)
 				*errflags = zc.zc_obj;
 
-			if (action_handle != NULL)
-				*action_handle = zc.zc_action_handle;
-
 			if (errors != NULL)
 				VERIFY0(nvlist_unpack(
 				    (void *)(uintptr_t)zc.zc_nvlist_dst,
 				    zc.zc_nvlist_dst_size, errors, KM_SLEEP));
 		}
 
-		if (packed != NULL)
-			fnvlist_pack_free(packed, size);
+		if (rp_packed != NULL)
+			fnvlist_pack_free(rp_packed, size);
+		if (lp_packed != NULL)
+			fnvlist_pack_free(lp_packed, size);
 		free((void *)(uintptr_t)zc.zc_nvlist_dst);
 	}
 
@@ -938,7 +972,7 @@
     boolean_t force, boolean_t raw, int fd)
 {
 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-	    B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
+	    B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
 }
 
 /*
@@ -952,7 +986,7 @@
     boolean_t force, boolean_t raw, int fd)
 {
 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-	    B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
+	    B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
 }
 
 /*
@@ -975,7 +1009,7 @@
 		return (EINVAL);
 
 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-	    resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL));
+	    resumable, raw, fd, begin_record, NULL, NULL, NULL));
 }
 
 /*
@@ -991,9 +1025,7 @@
  * The 'errflags' value will contain zprop_errflags_t flags which are
  * used to describe any failures.
  *
- * The 'action_handle' is used to pass the handle for this guid/ds mapping.
- * It should be set to zero on first call and will contain an updated handle
- * on success, it should be passed in subsequent calls.
+ * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
  *
  * The 'errors' nvlist contains an entry for each unapplied received
  * property.  Callers are responsible for freeing this nvlist.
@@ -1005,8 +1037,8 @@
     nvlist_t **errors)
 {
 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
-	    resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes,
-	    errflags, action_handle, errors));
+	    resumable, raw, input_fd, begin_record,
+	    read_bytes, errflags, errors));
 }
 
 /*
@@ -1025,8 +1057,8 @@
     nvlist_t **errors)
 {
 	return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
-	    force, resumable, raw, input_fd, begin_record, cleanup_fd,
-	    read_bytes, errflags, action_handle, errors));
+	    force, resumable, raw, input_fd, begin_record,
+	    read_bytes, errflags, errors));
 }
 
 /*
@@ -1080,11 +1112,13 @@
 }
 
 /*
- * Creates bookmarks.
+ * Creates new bookmarks from existing snapshot or bookmark.
  *
- * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
- * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
- * snapshots must be in the same pool.
+ * The bookmarks nvlist maps from the full name of the new bookmark to
+ * the full name of the source snapshot or bookmark.
+ * All the bookmarks and snapshots must be in the same pool.
+ * The new bookmarks names must be unique.
+ * => see function dsl_bookmark_create_nvl_validate
  *
  * The returned results nvlist will have an entry for each bookmark that failed.
  * The value will be the (int32) error code.
@@ -1099,7 +1133,7 @@
 	int error;
 	char pool[ZFS_MAX_DATASET_NAME_LEN];
 
-	/* determine the pool name */
+	/* determine pool name from first bookmark */
 	elem = nvlist_next_nvpair(bookmarks, NULL);
 	if (elem == NULL)
 		return (0);
@@ -1118,19 +1152,33 @@
  * parameter is an nvlist of property names (with no values) that will be
  * returned for each bookmark.
  *
- * The following are valid properties on bookmarks, all of which are numbers
- * (represented as uint64 in the nvlist)
+ * The following are valid properties on bookmarks, most of which are numbers
+ * (represented as uint64 in the nvlist), except redact_snaps, which is a
+ * uint64 array, and redact_complete, which is a boolean
  *
  * "guid" - globally unique identifier of the snapshot it refers to
  * "createtxg" - txg when the snapshot it refers to was created
  * "creation" - timestamp when the snapshot it refers to was created
  * "ivsetguid" - IVset guid for identifying encrypted snapshots
+ * "redact_snaps" - list of guids of the redaction snapshots for the specified
+ *     bookmark.  If the bookmark is not a redaction bookmark, the nvlist will
+ *     not contain an entry for this value.  If it is redacted with respect to
+ *     no snapshots, it will contain value -> NULL uint64 array
+ * "redact_complete" - boolean value; true if the redaction bookmark is
+ *     complete, false otherwise.
  *
  * The format of the returned nvlist as follows:
  * <short name of bookmark> -> {
  *     <name of property> -> {
  *         "value" -> uint64
  *     }
+ *     ...
+ *     "redact_snaps" -> {
+ *         "value" -> uint64 array
+ *     }
+ *     "redact_complete" -> {
+ *         "value" -> boolean value
+ *     }
  *  }
  */
 int
@@ -1140,6 +1188,33 @@
 }
 
 /*
+ * Get bookmark properties.
+ *
+ * Given a bookmark's full name, retrieve all properties for the bookmark.
+ *
+ * The format of the returned property list is as follows:
+ * {
+ *     <name of property> -> {
+ *         "value" -> uint64
+ *     }
+ *     ...
+ *     "redact_snaps" -> {
+ *         "value" -> uint64 array
+ * }
+ */
+int
+lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
+{
+	int error;
+
+	nvlist_t *innvl = fnvlist_alloc();
+	error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
+	fnvlist_free(innvl);
+
+	return (error);
+}
+
+/*
  * Destroys bookmarks.
  *
  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
@@ -1479,3 +1554,92 @@
 
 	return (error);
 }
+
+/*
+ * Create a redaction bookmark named bookname by redacting snapshot with respect
+ * to all the snapshots in snapnv.
+ */
+int
+lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
+{
+	nvlist_t *args = fnvlist_alloc();
+	fnvlist_add_string(args, "bookname", bookname);
+	fnvlist_add_nvlist(args, "snapnv", snapnv);
+	int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
+	fnvlist_free(args);
+	return (error);
+}
+
+static int
+wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
+    uint64_t tag, boolean_t *waited)
+{
+	nvlist_t *args = fnvlist_alloc();
+	nvlist_t *result = NULL;
+
+	fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
+	if (use_tag)
+		fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
+
+	int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
+
+	if (error == 0 && waited != NULL)
+		*waited = fnvlist_lookup_boolean_value(result,
+		    ZPOOL_WAIT_WAITED);
+
+	fnvlist_free(args);
+	fnvlist_free(result);
+
+	return (error);
+}
+
+int
+lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
+{
+	return (wait_common(pool, activity, B_FALSE, 0, waited));
+}
+
+int
+lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
+    boolean_t *waited)
+{
+	return (wait_common(pool, activity, B_TRUE, tag, waited));
+}
+
+int
+lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
+{
+	nvlist_t *args = fnvlist_alloc();
+	nvlist_t *result = NULL;
+
+	fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
+
+	int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
+
+	if (error == 0 && waited != NULL)
+		*waited = fnvlist_lookup_boolean_value(result,
+		    ZFS_WAIT_WAITED);
+
+	fnvlist_free(args);
+	fnvlist_free(result);
+
+	return (error);
+}
+
+/*
+ * Set the bootenv contents for the given pool.
+ */
+int
+lzc_set_bootenv(const char *pool, const nvlist_t *env)
+{
+	return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
+}
+
+/*
+ * Get the contents of the bootenv of the given pool.
+ */
+int
+lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
+{
+	return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
+}

diff --git a/zfs/lib/libzfs_core/libzfs_core.pc.in b/zfs/lib/libzfs_core/libzfs_core.pc.in
new file mode 100644
index 0000000..bc9582e
--- /dev/null
+++ b/zfs/lib/libzfs_core/libzfs_core.pc.in

@@ -0,0 +1,13 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libzfs_core
+Description: LibZFS core library
+Version: @VERSION@
+URL: https://github.com/openzfs/zfs
+Requires.private: @LIBBLKID_PC@ @LIBUUID_PC@ @LIBTIRPC_PC@ @ZLIB_PC@
+Cflags: -I${includedir}/libzfs -I${includedir}/libspl
+Libs: -L${libdir} -lzfs_core -lnvpair
+Libs.private: @LIBCLOCK_GETTIME@ @LIBUDEV_LIBS@ -lm -pthread

diff --git a/zfs/lib/libzfs_core/libzfs_core.suppr b/zfs/lib/libzfs_core/libzfs_core.suppr
new file mode 100644
index 0000000..109d331
--- /dev/null
+++ b/zfs/lib/libzfs_core/libzfs_core.suppr

@@ -0,0 +1,5 @@
+[suppress_type]
+	name = FILE*
+
+[suppress_type]
+	name = pthread_cond_t

diff --git a/zfs/lib/libzfsbootenv/.gitignore b/zfs/lib/libzfsbootenv/.gitignore
new file mode 100644
index 0000000..3fea5c6
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/.gitignore

@@ -0,0 +1 @@
+/libzfsbootenv.pc

diff --git a/zfs/lib/libzfsbootenv/Makefile.am b/zfs/lib/libzfsbootenv/Makefile.am
new file mode 100644
index 0000000..8a6bb76
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/Makefile.am

@@ -0,0 +1,42 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgconfig_DATA = libzfsbootenv.pc
+
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
+lib_LTLIBRARIES = libzfsbootenv.la
+
+include $(top_srcdir)/config/Abigail.am
+
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+endif
+if BUILD_LINUX
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/linux/zfs
+endif
+
+USER_C = \
+	lzbe_device.c \
+	lzbe_pair.c \
+	lzbe_util.c
+
+dist_libzfsbootenv_la_SOURCES = \
+	$(USER_C)
+
+libzfsbootenv_la_LIBADD = \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+libzfsbootenv_la_LDFLAGS =
+
+if !ASAN_ENABLED
+libzfsbootenv_la_LDFLAGS += -Wl,-z,defs
+endif
+
+libzfsbootenv_la_LDFLAGS += -version-info 1:0:0
+
+include $(top_srcdir)/config/CppCheck.am
+
+# Library ABI
+EXTRA_DIST = libzfsbootenv.abi libzfsbootenv.suppr

diff --git a/zfs/lib/libzfsbootenv/libzfsbootenv.abi b/zfs/lib/libzfsbootenv/libzfsbootenv.abi
new file mode 100644
index 0000000..0ddd41d
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/libzfsbootenv.abi

@@ -0,0 +1,577 @@
+<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfsbootenv.so.1'>
+  <elf-needed>
+    <dependency name='libzfs.so.4'/>
+    <dependency name='libzfs_core.so.3'/>
+    <dependency name='libuuid.so.1'/>
+    <dependency name='libblkid.so.1'/>
+    <dependency name='libudev.so.1'/>
+    <dependency name='libuutil.so.3'/>
+    <dependency name='libm.so.6'/>
+    <dependency name='libcrypto.so.1.1'/>
+    <dependency name='libz.so.1'/>
+    <dependency name='libnvpair.so.3'/>
+    <dependency name='libtirpc.so.3'/>
+    <dependency name='libc.so.6'/>
+  </elf-needed>
+  <elf-function-symbols>
+    <elf-symbol name='lzbe_add_pair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_bootenv_print' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_get_boot_device' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_nvlist_free' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_nvlist_get' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_nvlist_set' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_remove_pair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='lzbe_set_boot_device' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+  </elf-function-symbols>
+  <abi-instr address-size='64' path='lzbe_device.c' language='LANG_C99'>
+    <type-decl name='char' size-in-bits='8' id='a84c031d'/>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='8' id='89feb1ec'>
+      <subrange length='1' type-id='7359adad' id='52f813b4'/>
+    </array-type-def>
+    <array-type-def dimensions='1' type-id='a84c031d' size-in-bits='160' id='664ac0b7'>
+      <subrange length='20' type-id='7359adad' id='fdca39cf'/>
+    </array-type-def>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='libzfs_handle' is-struct='yes' visibility='default' is-declaration-only='yes' id='c8a9d9d8'/>
+    <class-decl name='zpool_handle' is-struct='yes' visibility='default' is-declaration-only='yes' id='67002a8a'/>
+    <type-decl name='int' size-in-bits='32' id='95e97e5e'/>
+    <type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
+    <type-decl name='signed char' size-in-bits='8' id='28577a57'/>
+    <type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
+    <type-decl name='unsigned int' size-in-bits='32' id='f0981eeb'/>
+    <type-decl name='unsigned long int' size-in-bits='64' id='7359adad'/>
+    <type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
+    <type-decl name='variadic parameter type' id='2c1145c5'/>
+    <type-decl name='void' id='48b5725f'/>
+    <typedef-decl name='zpool_handle_t' type-id='67002a8a' id='b1efc708'/>
+    <typedef-decl name='libzfs_handle_t' type-id='c8a9d9d8' id='95942d0c'/>
+    <enum-decl name='lzbe_flags' id='2b77720b'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='lzbe_add' value='0'/>
+      <enumerator name='lzbe_replace' value='1'/>
+    </enum-decl>
+    <typedef-decl name='lzbe_flags_t' type-id='2b77720b' id='a1936f04'/>
+    <class-decl name='nvlist' size-in-bits='192' is-struct='yes' visibility='default' id='ac266fd9'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='nvl_version' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='32'>
+        <var-decl name='nvl_nvflag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='nvl_priv' type-id='9c313c2d' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='nvl_flag' type-id='8f92235e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='160'>
+        <var-decl name='nvl_pad' type-id='3ff5601b' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='nvlist_t' type-id='ac266fd9' id='8e8d4be3'/>
+    <enum-decl name='boolean_t' naming-typedef-id='c19b74c3' id='f58c8277'>
+      <underlying-type type-id='9cac1fee'/>
+      <enumerator name='B_FALSE' value='0'/>
+      <enumerator name='B_TRUE' value='1'/>
+    </enum-decl>
+    <typedef-decl name='boolean_t' type-id='f58c8277' id='c19b74c3'/>
+    <typedef-decl name='int32_t' type-id='33f57a65' id='3ff5601b'/>
+    <typedef-decl name='uint32_t' type-id='62f1140c' id='8f92235e'/>
+    <typedef-decl name='uint64_t' type-id='8910171f' id='9c313c2d'/>
+    <typedef-decl name='__int32_t' type-id='95e97e5e' id='33f57a65'/>
+    <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
+    <typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
+    <typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
+    <typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
+    <typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
+    <typedef-decl name='_IO_lock_t' type-id='48b5725f' id='bb4788fa'/>
+    <class-decl name='_IO_FILE' size-in-bits='1728' is-struct='yes' visibility='default' id='ec1ed955'>
+      <data-member access='public' layout-offset-in-bits='0'>
+        <var-decl name='_flags' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='64'>
+        <var-decl name='_IO_read_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='128'>
+        <var-decl name='_IO_read_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='192'>
+        <var-decl name='_IO_read_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='256'>
+        <var-decl name='_IO_write_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='_IO_write_ptr' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='384'>
+        <var-decl name='_IO_write_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='448'>
+        <var-decl name='_IO_buf_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='512'>
+        <var-decl name='_IO_buf_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='576'>
+        <var-decl name='_IO_save_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='640'>
+        <var-decl name='_IO_backup_base' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='704'>
+        <var-decl name='_IO_save_end' type-id='26a90f95' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='768'>
+        <var-decl name='_markers' type-id='e4c6fa61' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='832'>
+        <var-decl name='_chain' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='896'>
+        <var-decl name='_fileno' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='928'>
+        <var-decl name='_flags2' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='960'>
+        <var-decl name='_old_offset' type-id='79989e9c' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1024'>
+        <var-decl name='_cur_column' type-id='8efea9e5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1040'>
+        <var-decl name='_vtable_offset' type-id='28577a57' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1048'>
+        <var-decl name='_shortbuf' type-id='89feb1ec' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1088'>
+        <var-decl name='_lock' type-id='cecf4ea7' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1152'>
+        <var-decl name='_offset' type-id='724e4de6' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1216'>
+        <var-decl name='_codecvt' type-id='570f8c59' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1280'>
+        <var-decl name='_wide_data' type-id='c65a1f29' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1344'>
+        <var-decl name='_freeres_list' type-id='dca988a5' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1408'>
+        <var-decl name='_freeres_buf' type-id='eaa32e2f' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1472'>
+        <var-decl name='__pad5' type-id='b59d7dce' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1536'>
+        <var-decl name='_mode' type-id='95e97e5e' visibility='default'/>
+      </data-member>
+      <data-member access='public' layout-offset-in-bits='1568'>
+        <var-decl name='_unused2' type-id='664ac0b7' visibility='default'/>
+      </data-member>
+    </class-decl>
+    <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
+    <pointer-type-def type-id='aa12d1ba' size-in-bits='64' id='822cd80b'/>
+    <qualified-type-def type-id='822cd80b' restrict='yes' id='e75a27e9'/>
+    <pointer-type-def type-id='ec1ed955' size-in-bits='64' id='dca988a5'/>
+    <pointer-type-def type-id='a4036571' size-in-bits='64' id='570f8c59'/>
+    <pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
+    <pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
+    <pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
+    <pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
+    <pointer-type-def type-id='26a90f95' size-in-bits='64' id='9b23c9ad'/>
+    <qualified-type-def type-id='9b23c9ad' restrict='yes' id='8c85230f'/>
+    <qualified-type-def type-id='a84c031d' const='yes' id='9b45d938'/>
+    <pointer-type-def type-id='9b45d938' size-in-bits='64' id='80f4b756'/>
+    <qualified-type-def type-id='80f4b756' restrict='yes' id='9d26089a'/>
+    <qualified-type-def type-id='8e8d4be3' const='yes' id='693c3853'/>
+    <pointer-type-def type-id='693c3853' size-in-bits='64' id='22cce67b'/>
+    <pointer-type-def type-id='95942d0c' size-in-bits='64' id='b0382bb3'/>
+    <pointer-type-def type-id='8e8d4be3' size-in-bits='64' id='5ce45b60'/>
+    <pointer-type-def type-id='5ce45b60' size-in-bits='64' id='857bb57e'/>
+    <pointer-type-def type-id='9c313c2d' size-in-bits='64' id='5d6479ae'/>
+    <pointer-type-def type-id='48b5725f' size-in-bits='64' id='eaa32e2f'/>
+    <pointer-type-def type-id='b1efc708' size-in-bits='64' id='4c81de99'/>
+    <class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
+    <class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
+    <class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
+    <class-decl name='libzfs_handle' is-struct='yes' visibility='default' is-declaration-only='yes' id='c8a9d9d8'/>
+    <class-decl name='zpool_handle' is-struct='yes' visibility='default' is-declaration-only='yes' id='67002a8a'/>
+    <function-decl name='libzfs_init' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='b0382bb3'/>
+    </function-decl>
+    <function-decl name='libzfs_fini' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b0382bb3'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='libzfs_error_description' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b0382bb3'/>
+      <return type-id='80f4b756'/>
+    </function-decl>
+    <function-decl name='zpool_open' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='4c81de99'/>
+    </function-decl>
+    <function-decl name='zpool_close' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4c81de99'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_set_bootenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='22cce67b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_get_bootenv' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9b23c9ad'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_exists' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='c19b74c3'/>
+    </function-decl>
+    <function-decl name='fnvlist_alloc' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='5ce45b60'/>
+    </function-decl>
+    <function-decl name='fnvlist_free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_add_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='fnvlist_remove' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='lzbe_set_boot_device' mangled-name='lzbe_set_boot_device' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_set_boot_device'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='a1936f04' name='flag'/>
+      <parameter type-id='80f4b756' name='device'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzbe_get_boot_device' mangled-name='lzbe_get_boot_device' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_get_boot_device'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='9b23c9ad' name='device'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='fprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e75a27e9'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='asprintf' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='8c85230f'/>
+      <parameter type-id='9d26089a'/>
+      <parameter is-variadic='yes'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='free' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='eaa32e2f'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='strncmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b59d7dce'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strdup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
+    <function-decl name='strlen' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='b59d7dce'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='lzbe_pair.c' language='LANG_C99'>
+    <type-decl name='short int' size-in-bits='16' id='a2185560'/>
+    <type-decl name='unsigned char' size-in-bits='8' id='002ac4a6'/>
+    <typedef-decl name='uchar_t' type-id='002ac4a6' id='d8bf0010'/>
+    <typedef-decl name='uint_t' type-id='f0981eeb' id='3502e3ff'/>
+    <typedef-decl name='int8_t' type-id='2171a512' id='ee31ee44'/>
+    <typedef-decl name='int16_t' type-id='03896e23' id='23bd8cb5'/>
+    <typedef-decl name='int64_t' type-id='0c9942d2' id='9da381c4'/>
+    <typedef-decl name='uint8_t' type-id='c51d6389' id='b96825af'/>
+    <typedef-decl name='uint16_t' type-id='253c2d2a' id='149c6638'/>
+    <typedef-decl name='__int8_t' type-id='28577a57' id='2171a512'/>
+    <typedef-decl name='__uint8_t' type-id='002ac4a6' id='c51d6389'/>
+    <typedef-decl name='__int16_t' type-id='a2185560' id='03896e23'/>
+    <typedef-decl name='__uint16_t' type-id='8efea9e5' id='253c2d2a'/>
+    <typedef-decl name='__int64_t' type-id='bd54fe1a' id='0c9942d2'/>
+    <pointer-type-def type-id='c19b74c3' size-in-bits='64' id='37e3bd22'/>
+    <qualified-type-def type-id='26a90f95' const='yes' id='57de658a'/>
+    <pointer-type-def type-id='57de658a' size-in-bits='64' id='f319fae0'/>
+    <pointer-type-def type-id='23bd8cb5' size-in-bits='64' id='f76f73d0'/>
+    <pointer-type-def type-id='3ff5601b' size-in-bits='64' id='4aafb922'/>
+    <pointer-type-def type-id='9da381c4' size-in-bits='64' id='cb785ebf'/>
+    <pointer-type-def type-id='ee31ee44' size-in-bits='64' id='256d5229'/>
+    <pointer-type-def type-id='d8bf0010' size-in-bits='64' id='45b65157'/>
+    <pointer-type-def type-id='149c6638' size-in-bits='64' id='8a121f49'/>
+    <pointer-type-def type-id='8f92235e' size-in-bits='64' id='90421557'/>
+    <pointer-type-def type-id='b96825af' size-in-bits='64' id='ae3e8ca6'/>
+    <pointer-type-def type-id='eaa32e2f' size-in-bits='64' id='63e171df'/>
+    <function-decl name='nvlist_alloc' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_dup' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_boolean_value' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='c19b74c3'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_byte' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='d8bf0010'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int8' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ee31ee44'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint8' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='b96825af'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int16' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='23bd8cb5'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint16' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='149c6638'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='3ff5601b'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint32' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8f92235e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9da381c4'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='9c313c2d'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_string' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_boolean_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='37e3bd22'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_byte_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='45b65157'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int8_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='256d5229'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint8_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='ae3e8ca6'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int16_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f76f73d0'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint16_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='8a121f49'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int32_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='4aafb922'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint32_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='90421557'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_int64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='cb785ebf'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_uint64_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='5d6479ae'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_string_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='f319fae0'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_add_nvlist_array' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <parameter type-id='3502e3ff'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_remove_all' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='nvlist_lookup_nvlist' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='5ce45b60'/>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='857bb57e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzbe_nvlist_get' mangled-name='lzbe_nvlist_get' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_nvlist_get'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='80f4b756' name='key'/>
+      <parameter type-id='63e171df' name='ptr'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzbe_nvlist_set' mangled-name='lzbe_nvlist_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_nvlist_set'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='80f4b756' name='key'/>
+      <parameter type-id='eaa32e2f' name='ptr'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzbe_nvlist_free' mangled-name='lzbe_nvlist_free' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_nvlist_free'>
+      <parameter type-id='eaa32e2f' name='ptr'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='lzbe_add_pair' mangled-name='lzbe_add_pair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_add_pair'>
+      <parameter type-id='eaa32e2f' name='ptr'/>
+      <parameter type-id='80f4b756' name='key'/>
+      <parameter type-id='80f4b756' name='type'/>
+      <parameter type-id='eaa32e2f' name='value'/>
+      <parameter type-id='b59d7dce' name='size'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='lzbe_remove_pair' mangled-name='lzbe_remove_pair' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_remove_pair'>
+      <parameter type-id='eaa32e2f' name='ptr'/>
+      <parameter type-id='80f4b756' name='key'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='strcmp' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+  <abi-instr address-size='64' path='lzbe_util.c' language='LANG_C99'>
+    <function-decl name='nvlist_print' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='822cd80b'/>
+      <parameter type-id='5ce45b60'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='lzbe_bootenv_print' mangled-name='lzbe_bootenv_print' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_bootenv_print'>
+      <parameter type-id='80f4b756' name='pool'/>
+      <parameter type-id='80f4b756' name='nvlist'/>
+      <parameter type-id='822cd80b' name='of'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+  </abi-instr>
+</abi-corpus>

diff --git a/zfs/lib/libzfsbootenv/libzfsbootenv.pc.in b/zfs/lib/libzfsbootenv/libzfsbootenv.pc.in
new file mode 100644
index 0000000..986286d
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/libzfsbootenv.pc.in

@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libzfsbootenv
+Description: LibZFSBootENV library
+Version: @VERSION@
+URL: https://github.com/openzfs/zfs
+Requires: libzfs
+Cflags: -I${includedir}
+Libs: -L${libdir} -lzfsbootenv

diff --git a/zfs/lib/libzfsbootenv/libzfsbootenv.suppr b/zfs/lib/libzfsbootenv/libzfsbootenv.suppr
new file mode 100644
index 0000000..f4db8a4
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/libzfsbootenv.suppr

@@ -0,0 +1,2 @@
+[suppress_type]
+	name = FILE*

diff --git a/zfs/lib/libzfsbootenv/lzbe_device.c b/zfs/lib/libzfsbootenv/lzbe_device.c
new file mode 100644
index 0000000..2d9c7b7
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/lzbe_device.c

@@ -0,0 +1,163 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <libzfs.h>
+#include <libzfsbootenv.h>
+#include <sys/zfs_bootenv.h>
+#include <sys/vdev_impl.h>
+
+/*
+ * Store device name to zpool label bootenv area.
+ * This call will set bootenv version to VB_NVLIST, if bootenv currently
+ * does contain other version, then old data will be replaced.
+ */
+int
+lzbe_set_boot_device(const char *pool, lzbe_flags_t flag, const char *device)
+{
+	libzfs_handle_t *hdl;
+	zpool_handle_t *zphdl;
+	nvlist_t *nv;
+	char *descriptor;
+	uint64_t version;
+	int rv = -1;
+
+	if (pool == NULL || *pool == '\0')
+		return (rv);
+
+	if ((hdl = libzfs_init()) == NULL)
+		return (rv);
+
+	zphdl = zpool_open(hdl, pool);
+	if (zphdl == NULL) {
+		libzfs_fini(hdl);
+		return (rv);
+	}
+
+	switch (flag) {
+	case lzbe_add:
+		rv = zpool_get_bootenv(zphdl, &nv);
+		if (rv == 0) {
+			/*
+			 * We got the nvlist, check for version.
+			 * if version is missing or is not VB_NVLIST,
+			 * create new list.
+			 */
+			rv = nvlist_lookup_uint64(nv, BOOTENV_VERSION,
+			    &version);
+			if (rv == 0 && version == VB_NVLIST)
+				break;
+
+			/* Drop this nvlist */
+			fnvlist_free(nv);
+		}
+		fallthrough;
+	case lzbe_replace:
+		nv = fnvlist_alloc();
+		break;
+	default:
+		return (rv);
+	}
+
+	/* version is mandatory */
+	fnvlist_add_uint64(nv, BOOTENV_VERSION, VB_NVLIST);
+
+	/*
+	 * If device name is empty, remove boot device configuration.
+	 */
+	if ((device == NULL || *device == '\0')) {
+		if (nvlist_exists(nv, OS_BOOTONCE))
+			fnvlist_remove(nv, OS_BOOTONCE);
+	} else {
+		/*
+		 * Use device name directly if it does start with
+		 * prefix "zfs:". Otherwise, add prefix and suffix.
+		 */
+		if (strncmp(device, "zfs:", 4) == 0) {
+			fnvlist_add_string(nv, OS_BOOTONCE, device);
+		} else {
+			if (asprintf(&descriptor, "zfs:%s:", device) > 0) {
+				fnvlist_add_string(nv, OS_BOOTONCE, descriptor);
+				free(descriptor);
+			} else
+				rv = ENOMEM;
+		}
+	}
+
+	rv = zpool_set_bootenv(zphdl, nv);
+	if (rv != 0)
+		fprintf(stderr, "%s\n", libzfs_error_description(hdl));
+
+	fnvlist_free(nv);
+	zpool_close(zphdl);
+	libzfs_fini(hdl);
+	return (rv);
+}
+
+/*
+ * Return boot device name from bootenv, if set.
+ */
+int
+lzbe_get_boot_device(const char *pool, char **device)
+{
+	libzfs_handle_t *hdl;
+	zpool_handle_t *zphdl;
+	nvlist_t *nv;
+	char *val;
+	int rv = -1;
+
+	if (pool == NULL || *pool == '\0' || device == NULL)
+		return (rv);
+
+	if ((hdl = libzfs_init()) == NULL)
+		return (rv);
+
+	zphdl = zpool_open(hdl, pool);
+	if (zphdl == NULL) {
+		libzfs_fini(hdl);
+		return (rv);
+	}
+
+	rv = zpool_get_bootenv(zphdl, &nv);
+	if (rv == 0) {
+		rv = nvlist_lookup_string(nv, OS_BOOTONCE, &val);
+		if (rv == 0) {
+			/*
+			 * zfs device descriptor is in form of "zfs:dataset:",
+			 * we only do need dataset name.
+			 */
+			if (strncmp(val, "zfs:", 4) == 0) {
+				val += 4;
+				val = strdup(val);
+				if (val != NULL) {
+					size_t len = strlen(val);
+
+					if (val[len - 1] == ':')
+						val[len - 1] = '\0';
+					*device = val;
+				} else {
+					rv = ENOMEM;
+				}
+			} else {
+				rv = EINVAL;
+			}
+		}
+		nvlist_free(nv);
+	}
+
+	zpool_close(zphdl);
+	libzfs_fini(hdl);
+	return (rv);
+}

diff --git a/zfs/lib/libzfsbootenv/lzbe_pair.c b/zfs/lib/libzfsbootenv/lzbe_pair.c
new file mode 100644
index 0000000..831355b
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/lzbe_pair.c

@@ -0,0 +1,347 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <libzfs.h>
+#include <libzfsbootenv.h>
+#include <sys/zfs_bootenv.h>
+#include <sys/vdev_impl.h>
+
+/*
+ * Get or create nvlist. If key is not NULL, get nvlist from bootenv,
+ * otherwise return bootenv.
+ */
+int
+lzbe_nvlist_get(const char *pool, const char *key, void **ptr)
+{
+	libzfs_handle_t *hdl;
+	zpool_handle_t *zphdl;
+	nvlist_t *nv;
+	int rv = -1;
+
+	if (pool == NULL || *pool == '\0')
+		return (rv);
+
+	if ((hdl = libzfs_init()) == NULL) {
+		return (rv);
+	}
+
+	zphdl = zpool_open(hdl, pool);
+	if (zphdl == NULL) {
+		libzfs_fini(hdl);
+		return (rv);
+	}
+
+	rv = zpool_get_bootenv(zphdl, &nv);
+	if (rv == 0) {
+		nvlist_t *nvl, *dup;
+
+		if (key != NULL) {
+			rv = nvlist_lookup_nvlist(nv, key, &nvl);
+			if (rv == 0) {
+				rv = nvlist_dup(nvl, &dup, 0);
+				nvlist_free(nv);
+				if (rv == 0)
+					nv = dup;
+				else
+					nv = NULL;
+			} else {
+				nvlist_free(nv);
+				rv = nvlist_alloc(&nv, NV_UNIQUE_NAME, 0);
+			}
+		}
+		*ptr = nv;
+	}
+
+	zpool_close(zphdl);
+	libzfs_fini(hdl);
+	return (rv);
+}
+
+int
+lzbe_nvlist_set(const char *pool, const char *key, void *ptr)
+{
+	libzfs_handle_t *hdl;
+	zpool_handle_t *zphdl;
+	nvlist_t *nv;
+	uint64_t version;
+	int rv = -1;
+
+	if (pool == NULL || *pool == '\0')
+		return (rv);
+
+	if ((hdl = libzfs_init()) == NULL) {
+		return (rv);
+	}
+
+	zphdl = zpool_open(hdl, pool);
+	if (zphdl == NULL) {
+		libzfs_fini(hdl);
+		return (rv);
+	}
+
+	if (key != NULL) {
+		rv = zpool_get_bootenv(zphdl, &nv);
+		if (rv == 0) {
+			/*
+			 * We got the nvlist, check for version.
+			 * if version is missing or is not VB_NVLIST,
+			 * create new list.
+			 */
+			rv = nvlist_lookup_uint64(nv, BOOTENV_VERSION,
+			    &version);
+			if (rv != 0 || version != VB_NVLIST) {
+				/* Drop this nvlist */
+				fnvlist_free(nv);
+				/* Create and prepare new nvlist */
+				nv = fnvlist_alloc();
+				fnvlist_add_uint64(nv, BOOTENV_VERSION,
+				    VB_NVLIST);
+			}
+			rv = nvlist_add_nvlist(nv, key, ptr);
+			if (rv == 0)
+				rv = zpool_set_bootenv(zphdl, nv);
+			nvlist_free(nv);
+		}
+	} else {
+		rv = zpool_set_bootenv(zphdl, ptr);
+	}
+
+	zpool_close(zphdl);
+	libzfs_fini(hdl);
+	return (rv);
+}
+
+/*
+ * free nvlist we got via lzbe_nvlist_get()
+ */
+void
+lzbe_nvlist_free(void *ptr)
+{
+	nvlist_free(ptr);
+}
+
+static const char *typenames[] = {
+	"DATA_TYPE_UNKNOWN",
+	"DATA_TYPE_BOOLEAN",
+	"DATA_TYPE_BYTE",
+	"DATA_TYPE_INT16",
+	"DATA_TYPE_UINT16",
+	"DATA_TYPE_INT32",
+	"DATA_TYPE_UINT32",
+	"DATA_TYPE_INT64",
+	"DATA_TYPE_UINT64",
+	"DATA_TYPE_STRING",
+	"DATA_TYPE_BYTE_ARRAY",
+	"DATA_TYPE_INT16_ARRAY",
+	"DATA_TYPE_UINT16_ARRAY",
+	"DATA_TYPE_INT32_ARRAY",
+	"DATA_TYPE_UINT32_ARRAY",
+	"DATA_TYPE_INT64_ARRAY",
+	"DATA_TYPE_UINT64_ARRAY",
+	"DATA_TYPE_STRING_ARRAY",
+	"DATA_TYPE_HRTIME",
+	"DATA_TYPE_NVLIST",
+	"DATA_TYPE_NVLIST_ARRAY",
+	"DATA_TYPE_BOOLEAN_VALUE",
+	"DATA_TYPE_INT8",
+	"DATA_TYPE_UINT8",
+	"DATA_TYPE_BOOLEAN_ARRAY",
+	"DATA_TYPE_INT8_ARRAY",
+	"DATA_TYPE_UINT8_ARRAY"
+};
+
+static int
+nvpair_type_from_name(const char *name)
+{
+	unsigned i;
+
+	for (i = 0; i < ARRAY_SIZE(typenames); i++) {
+		if (strcmp(name, typenames[i]) == 0)
+			return (i);
+	}
+	return (0);
+}
+
+/*
+ * Add pair defined by key, type and value into nvlist.
+ */
+int
+lzbe_add_pair(void *ptr, const char *key, const char *type, void *value,
+    size_t size)
+{
+	nvlist_t *nv = ptr;
+	data_type_t dt;
+	int rv = 0;
+
+	if (ptr == NULL || key == NULL || value == NULL)
+		return (rv);
+
+	if (type == NULL)
+		type = "DATA_TYPE_STRING";
+	dt = nvpair_type_from_name(type);
+	if (dt == DATA_TYPE_UNKNOWN)
+		return (EINVAL);
+
+	switch (dt) {
+	case DATA_TYPE_BYTE:
+		if (size != sizeof (uint8_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_byte(nv, key, *(uint8_t *)value);
+		break;
+
+	case DATA_TYPE_INT16:
+		if (size != sizeof (int16_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_int16(nv, key, *(int16_t *)value);
+		break;
+
+	case DATA_TYPE_UINT16:
+		if (size != sizeof (uint16_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_uint16(nv, key, *(uint16_t *)value);
+		break;
+
+	case DATA_TYPE_INT32:
+		if (size != sizeof (int32_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_int32(nv, key, *(int32_t *)value);
+		break;
+
+	case DATA_TYPE_UINT32:
+		if (size != sizeof (uint32_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_uint32(nv, key, *(uint32_t *)value);
+		break;
+
+	case DATA_TYPE_INT64:
+		if (size != sizeof (int64_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_int64(nv, key, *(int64_t *)value);
+		break;
+
+	case DATA_TYPE_UINT64:
+		if (size != sizeof (uint64_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_uint64(nv, key, *(uint64_t *)value);
+		break;
+
+	case DATA_TYPE_STRING:
+		rv = nvlist_add_string(nv, key, value);
+		break;
+
+	case DATA_TYPE_BYTE_ARRAY:
+		rv = nvlist_add_byte_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_INT16_ARRAY:
+		rv = nvlist_add_int16_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_UINT16_ARRAY:
+		rv = nvlist_add_uint16_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_INT32_ARRAY:
+		rv = nvlist_add_int32_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_UINT32_ARRAY:
+		rv = nvlist_add_uint32_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_INT64_ARRAY:
+		rv = nvlist_add_int64_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_UINT64_ARRAY:
+		rv = nvlist_add_uint64_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_STRING_ARRAY:
+		rv = nvlist_add_string_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_NVLIST:
+		rv = nvlist_add_nvlist(nv, key, (nvlist_t *)value);
+		break;
+
+	case DATA_TYPE_NVLIST_ARRAY:
+		rv = nvlist_add_nvlist_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_BOOLEAN_VALUE:
+		if (size != sizeof (boolean_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_boolean_value(nv, key, *(boolean_t *)value);
+		break;
+
+	case DATA_TYPE_INT8:
+		if (size != sizeof (int8_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_int8(nv, key, *(int8_t *)value);
+		break;
+
+	case DATA_TYPE_UINT8:
+		if (size != sizeof (uint8_t)) {
+			rv = EINVAL;
+			break;
+		}
+		rv = nvlist_add_uint8(nv, key, *(uint8_t *)value);
+		break;
+
+	case DATA_TYPE_BOOLEAN_ARRAY:
+		rv = nvlist_add_boolean_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_INT8_ARRAY:
+		rv = nvlist_add_int8_array(nv, key, value, size);
+		break;
+
+	case DATA_TYPE_UINT8_ARRAY:
+		rv = nvlist_add_uint8_array(nv, key, value, size);
+		break;
+
+	default:
+		return (ENOTSUP);
+	}
+
+	return (rv);
+}
+
+int
+lzbe_remove_pair(void *ptr, const char *key)
+{
+
+	return (nvlist_remove_all(ptr, key));
+}

diff --git a/zfs/lib/libzfsbootenv/lzbe_util.c b/zfs/lib/libzfsbootenv/lzbe_util.c
new file mode 100644
index 0000000..35e9854
--- /dev/null
+++ b/zfs/lib/libzfsbootenv/lzbe_util.c

@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2020 Toomas Soome <tsoome@me.com>
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <libzfs.h>
+#include <libzfsbootenv.h>
+
+/*
+ * Output bootenv information.
+ */
+int
+lzbe_bootenv_print(const char *pool, const char *nvlist, FILE *of)
+{
+	nvlist_t *nv;
+	int rv = -1;
+
+	if (pool == NULL || *pool == '\0' || of == NULL)
+		return (rv);
+
+	rv = lzbe_nvlist_get(pool, nvlist, (void **)&nv);
+	if (rv == 0) {
+		nvlist_print(of, nv);
+		nvlist_free(nv);
+	}
+
+	return (rv);
+}

diff --git a/zfs/lib/libzpool/Makefile.am b/zfs/lib/libzpool/Makefile.am
index 91f4750..4ce3b4c 100644
--- a/zfs/lib/libzpool/Makefile.am
+++ b/zfs/lib/libzpool/Makefile.am

@@ -4,19 +4,28 @@
 	$(top_srcdir)/module/zfs \
 	$(top_srcdir)/module/zcommon \
 	$(top_srcdir)/module/lua \
+	$(top_srcdir)/module/os/linux/zfs \
 	$(top_srcdir)/lib/libzpool
 
-# Suppress unused but set variable warnings often due to ASSERTs
-AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+endif
+if BUILD_LINUX
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/linux/zfs
+endif
+
+# Unconditionally enable debugging for libzpool
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
 
 # Includes kernel code generate warnings for large stack frames
 AM_CFLAGS += $(FRAME_LARGER_THAN)
 
+AM_CFLAGS += $(ZLIB_CFLAGS)
+
 AM_CFLAGS += -DLIB_ZPOOL_BUILD
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
 
 lib_LTLIBRARIES = libzpool.la
 
@@ -38,16 +47,18 @@
 	zfs_fletcher_superscalar4.c \
 	zfs_namecheck.c \
 	zfs_prop.c \
-	zfs_uio.c \
 	zpool_prop.c \
 	zprop_common.c \
 	abd.c \
+	abd_os.c \
 	aggsum.c \
 	arc.c \
+	arc_os.c \
 	blkptr.c \
 	bplist.c \
 	bpobj.c \
 	bptree.c \
+	btree.c \
 	bqueue.c \
 	cityhash.c \
 	dbuf.c \
@@ -59,6 +70,7 @@
 	dmu_object.c \
 	dmu_objset.c \
 	dmu_recv.c \
+	dmu_redact.c \
 	dmu_send.c \
 	dmu_traverse.c \
 	dmu_tx.c \
@@ -86,6 +98,7 @@
 	metaslab.c \
 	mmp.c \
 	multilist.c \
+	objlist.c \
 	pathname.c \
 	range_tree.c \
 	refcount.c \
@@ -99,6 +112,7 @@
 	spa_config.c \
 	spa_errlog.c \
 	spa_history.c \
+	spa_log_spacemap.c \
 	spa_misc.c \
 	spa_stats.c \
 	space_map.c \
@@ -109,6 +123,8 @@
 	unique.c \
 	vdev.c \
 	vdev_cache.c \
+	vdev_draid.c \
+	vdev_draid_rand.c \
 	vdev_file.c \
 	vdev_indirect_births.c \
 	vdev_indirect.c \
@@ -128,6 +144,8 @@
 	vdev_raidz_math_scalar.c \
 	vdev_raidz_math_sse2.c \
 	vdev_raidz_math_ssse3.c \
+	vdev_raidz_math_powerpc_altivec.c \
+	vdev_rebuild.c \
 	vdev_removal.c \
 	vdev_root.c \
 	vdev_trim.c \
@@ -138,12 +156,14 @@
 	zcp_get.c \
 	zcp_global.c \
 	zcp_iter.c \
+	zcp_set.c \
 	zcp_synctask.c \
 	zfeature.c \
 	zfs_byteswap.c \
 	zfs_debug.c \
 	zfs_fm.c \
 	zfs_fuid.c \
+	zfs_racct.c \
 	zfs_sa.c \
 	zfs_znode.c \
 	zfs_ratelimit.c \
@@ -184,18 +204,37 @@
 	lvm.c \
 	lzio.c
 
+dist_libzpool_la_SOURCES = \
+	$(USER_C)
+
 nodist_libzpool_la_SOURCES = \
-	$(USER_C) \
 	$(KERNEL_C) \
 	$(LUA_C)
 
 libzpool_la_LIBADD = \
-	$(top_builddir)/lib/libicp/libicp.la \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libunicode/libunicode.la \
-	$(top_builddir)/lib/libzutil/libzutil.la
+	$(abs_top_builddir)/lib/libicp/libicp.la \
+	$(abs_top_builddir)/lib/libunicode/libunicode.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libzstd/libzstd.la
 
-libzpool_la_LIBADD += $(ZLIB) -ldl
-libzpool_la_LDFLAGS = -pthread -version-info 2:0:0
+libzpool_la_LIBADD += $(LIBCLOCK_GETTIME) $(ZLIB_LIBS) -ldl -lm
 
-EXTRA_DIST = $(USER_C)
+libzpool_la_LDFLAGS = -pthread
+
+if !ASAN_ENABLED
+libzpool_la_LDFLAGS += -Wl,-z,defs
+endif
+
+if BUILD_FREEBSD
+libzpool_la_LIBADD += -lgeom
+endif
+
+libzpool_la_LDFLAGS += -version-info 5:0:0
+
+if TARGET_CPU_POWERPC
+vdev_raidz_math_powerpc_altivec.$(OBJEXT): CFLAGS += -maltivec
+vdev_raidz_math_powerpc_altivec.l$(OBJEXT): CFLAGS += -maltivec
+endif
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libzpool/kernel.c b/zfs/lib/libzpool/kernel.c
index 5d80f9e..5f47402 100644
--- a/zfs/lib/libzpool/kernel.c
+++ b/zfs/lib/libzpool/kernel.c

@@ -20,38 +20,40 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  */
 
 #include <assert.h>
 #include <fcntl.h>
+#include <libgen.h>
 #include <poll.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <zlib.h>
-#include <libgen.h>
-#include <sys/signal.h>
+#include <sys/crypto/icp.h>
+#include <sys/processor.h>
+#include <sys/rrwlock.h>
 #include <sys/spa.h>
 #include <sys/stat.h>
-#include <sys/processor.h>
-#include <sys/zfs_context.h>
-#include <sys/rrwlock.h>
-#include <sys/utsname.h>
-#include <sys/time.h>
 #include <sys/systeminfo.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zstd/zstd.h>
+#include <sys/zvol.h>
 #include <zfs_fletcher.h>
-#include <sys/crypto/icp.h>
+#include <zlib.h>
 
 /*
  * Emulation of kernel services in userland.
  */
 
 uint64_t physmem;
-vnode_t *rootdir = (vnode_t *)0xabcd1234;
 char hw_serial[HW_HOSTID_LEN];
 struct utsname hw_utsname;
-vmem_t *zio_arena = NULL;
 
 /* If set, all blocks read will be copied to the specified directory. */
 char *vn_dumpdir = NULL;
@@ -144,36 +146,6 @@
 kstat_delete(kstat_t *ksp)
 {}
 
-/*ARGSUSED*/
-void
-kstat_waitq_enter(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_waitq_exit(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_runq_enter(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_runq_exit(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_waitq_to_runq(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_runq_back_to_waitq(kstat_io_t *kiop)
-{}
-
 void
 kstat_set_raw_ops(kstat_t *ksp,
     int (*headers)(char *buf, size_t size),
@@ -345,7 +317,7 @@
 	return (1);
 }
 
-clock_t
+int
 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
 {
 	int error;
@@ -379,7 +351,7 @@
 }
 
 /*ARGSUSED*/
-clock_t
+int
 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
     int flag)
 {
@@ -442,6 +414,7 @@
 
 void
 procfs_list_install(const char *module,
+    const char *submodule,
     const char *name,
     mode_t mode,
     procfs_list_t *procfs_list,
@@ -487,231 +460,6 @@
  * vnode operations
  * =========================================================================
  */
-/*
- * Note: for the xxxat() versions of these functions, we assume that the
- * starting vp is always rootdir (which is true for spa_directory.c, the only
- * ZFS consumer of these interfaces).  We assert this is true, and then emulate
- * them by adding '/' in front of the path.
- */
-
-/*ARGSUSED*/
-int
-vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
-{
-	int fd = -1;
-	int dump_fd = -1;
-	vnode_t *vp;
-	int old_umask = 0;
-	char *realpath;
-	struct stat64 st;
-	int err;
-
-	realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
-
-	/*
-	 * If we're accessing a real disk from userland, we need to use
-	 * the character interface to avoid caching.  This is particularly
-	 * important if we're trying to look at a real in-kernel storage
-	 * pool from userland, e.g. via zdb, because otherwise we won't
-	 * see the changes occurring under the segmap cache.
-	 * On the other hand, the stupid character device returns zero
-	 * for its size.  So -- gag -- we open the block device to get
-	 * its size, and remember it for subsequent VOP_GETATTR().
-	 */
-#if defined(__sun__) || defined(__sun)
-	if (strncmp(path, "/dev/", 5) == 0) {
-#else
-	if (0) {
-#endif
-		char *dsk;
-		fd = open64(path, O_RDONLY);
-		if (fd == -1) {
-			err = errno;
-			free(realpath);
-			return (err);
-		}
-		if (fstat64(fd, &st) == -1) {
-			err = errno;
-			close(fd);
-			free(realpath);
-			return (err);
-		}
-		close(fd);
-		(void) sprintf(realpath, "%s", path);
-		dsk = strstr(path, "/dsk/");
-		if (dsk != NULL)
-			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
-			    dsk + 1);
-	} else {
-		(void) sprintf(realpath, "%s", path);
-		if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
-			err = errno;
-			free(realpath);
-			return (err);
-		}
-	}
-
-	if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
-#ifdef __linux__
-		flags |= O_DIRECT;
-#endif
-	}
-
-	if (flags & FCREAT)
-		old_umask = umask(0);
-
-	/*
-	 * The construct 'flags - FREAD' conveniently maps combinations of
-	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
-	 */
-	fd = open64(realpath, flags - FREAD, mode);
-	if (fd == -1) {
-		err = errno;
-		free(realpath);
-		return (err);
-	}
-
-	if (flags & FCREAT)
-		(void) umask(old_umask);
-
-	if (vn_dumpdir != NULL) {
-		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
-		(void) snprintf(dumppath, MAXPATHLEN,
-		    "%s/%s", vn_dumpdir, basename(realpath));
-		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
-		umem_free(dumppath, MAXPATHLEN);
-		if (dump_fd == -1) {
-			err = errno;
-			free(realpath);
-			close(fd);
-			return (err);
-		}
-	} else {
-		dump_fd = -1;
-	}
-
-	free(realpath);
-
-	if (fstat64_blk(fd, &st) == -1) {
-		err = errno;
-		close(fd);
-		if (dump_fd != -1)
-			close(dump_fd);
-		return (err);
-	}
-
-	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
-
-	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
-
-	vp->v_fd = fd;
-	vp->v_size = st.st_size;
-	vp->v_path = spa_strdup(path);
-	vp->v_dump_fd = dump_fd;
-
-	return (0);
-}
-
-/*ARGSUSED*/
-int
-vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
-    int x3, vnode_t *startvp, int fd)
-{
-	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
-	int ret;
-
-	ASSERT(startvp == rootdir);
-	(void) sprintf(realpath, "/%s", path);
-
-	/* fd ignored for now, need if want to simulate nbmand support */
-	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
-
-	umem_free(realpath, strlen(path) + 2);
-
-	return (ret);
-}
-
-/*ARGSUSED*/
-int
-vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
-    int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
-{
-	ssize_t rc, done = 0, split;
-
-	if (uio == UIO_READ) {
-		rc = pread64(vp->v_fd, addr, len, offset);
-		if (vp->v_dump_fd != -1 && rc != -1) {
-			int status;
-			status = pwrite64(vp->v_dump_fd, addr, rc, offset);
-			ASSERT(status != -1);
-		}
-	} else {
-		/*
-		 * To simulate partial disk writes, we split writes into two
-		 * system calls so that the process can be killed in between.
-		 */
-		int sectors = len >> SPA_MINBLOCKSHIFT;
-		split = (sectors > 0 ? rand() % sectors : 0) <<
-		    SPA_MINBLOCKSHIFT;
-		rc = pwrite64(vp->v_fd, addr, split, offset);
-		if (rc != -1) {
-			done = rc;
-			rc = pwrite64(vp->v_fd, (char *)addr + split,
-			    len - split, offset + split);
-		}
-	}
-
-#ifdef __linux__
-	if (rc == -1 && errno == EINVAL) {
-		/*
-		 * Under Linux, this most likely means an alignment issue
-		 * (memory or disk) due to O_DIRECT, so we abort() in order to
-		 * catch the offender.
-		 */
-		abort();
-	}
-#endif
-	if (rc == -1)
-		return (errno);
-
-	done += rc;
-
-	if (residp)
-		*residp = len - done;
-	else if (done != len)
-		return (EIO);
-	return (0);
-}
-
-void
-vn_close(vnode_t *vp)
-{
-	close(vp->v_fd);
-	if (vp->v_dump_fd != -1)
-		close(vp->v_dump_fd);
-	spa_strfree(vp->v_path);
-	umem_free(vp, sizeof (vnode_t));
-}
-
-/*
- * At a minimum we need to update the size since vdev_reopen()
- * will no longer call vn_openat().
- */
-int
-fop_getattr(vnode_t *vp, vattr_t *vap)
-{
-	struct stat64 st;
-	int err;
-
-	if (fstat64_blk(vp->v_fd, &st) == -1) {
-		err = errno;
-		close(vp->v_fd);
-		return (err);
-	}
-
-	vap->va_size = st.st_size;
-	return (0);
-}
 
 /*
  * =========================================================================
@@ -907,60 +655,6 @@
 
 /*
  * =========================================================================
- * kobj interfaces
- * =========================================================================
- */
-struct _buf *
-kobj_open_file(char *name)
-{
-	struct _buf *file;
-	vnode_t *vp;
-
-	/* set vp as the _fd field of the file */
-	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
-	    -1) != 0)
-		return ((void *)-1UL);
-
-	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
-	file->_fd = (intptr_t)vp;
-	return (file);
-}
-
-int
-kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
-	ssize_t resid = 0;
-
-	if (vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
-	    UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
-		return (-1);
-
-	return (size - resid);
-}
-
-void
-kobj_close_file(struct _buf *file)
-{
-	vn_close((vnode_t *)file->_fd);
-	umem_free(file, sizeof (struct _buf));
-}
-
-int
-kobj_get_filesize(struct _buf *file, uint64_t *size)
-{
-	struct stat64 st;
-	vnode_t *vp = (vnode_t *)file->_fd;
-
-	if (fstat64(vp->v_fd, &st) == -1) {
-		vn_close(vp);
-		return (errno);
-	}
-	*size = st.st_size;
-	return (0);
-}
-
-/*
- * =========================================================================
  * misc routines
  * =========================================================================
  */
@@ -999,15 +693,15 @@
 	return (__builtin_ffsll(i));
 }
 
-char *random_path = "/dev/random";
-char *urandom_path = "/dev/urandom";
+const char *random_path = "/dev/random";
+const char *urandom_path = "/dev/urandom";
 static int random_fd = -1, urandom_fd = -1;
 
 void
 random_init(void)
 {
-	VERIFY((random_fd = open(random_path, O_RDONLY)) != -1);
-	VERIFY((urandom_fd = open(urandom_path, O_RDONLY)) != -1);
+	VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
+	VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
 }
 
 void
@@ -1102,11 +796,11 @@
 
 	physmem = sysconf(_SC_PHYS_PAGES);
 
-	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
+	dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
 
 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
-	    (mode & FWRITE) ? get_system_hostid() : 0);
+	    (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0);
 
 	random_init();
 
@@ -1115,7 +809,9 @@
 	system_taskq_init();
 	icp_init();
 
-	spa_init(mode);
+	zstd_init();
+
+	spa_init((spa_mode_t)mode);
 
 	fletcher_4_init();
 
@@ -1128,6 +824,8 @@
 	fletcher_4_fini();
 	spa_fini();
 
+	zstd_fini();
+
 	icp_fini();
 	system_taskq_fini();
 
@@ -1188,6 +886,12 @@
 	return (0);
 }
 
+int
+secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
+{
+	return (0);
+}
+
 ksiddomain_t *
 ksid_lookupdomain(const char *dom)
 {
@@ -1232,16 +936,16 @@
 }
 
 /* ARGSUSED */
-int
+zfs_file_t *
 zfs_onexit_fd_hold(int fd, minor_t *minorp)
 {
 	*minorp = 0;
-	return (0);
+	return (NULL);
 }
 
 /* ARGSUSED */
 void
-zfs_onexit_fd_rele(int fd)
+zfs_onexit_fd_rele(zfs_file_t *fp)
 {
 }
 
@@ -1253,20 +957,6 @@
 	return (0);
 }
 
-/* ARGSUSED */
-int
-zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
-{
-	return (0);
-}
-
-/* ARGSUSED */
-int
-zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
-{
-	return (0);
-}
-
 fstrans_cookie_t
 spl_fstrans_mark(void)
 {
@@ -1293,12 +983,12 @@
 void *zvol_tag = "zvol_tag";
 
 void
-zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
+zvol_create_minor(const char *name)
 {
 }
 
 void
-zvol_remove_minor(spa_t *spa, const char *name, boolean_t async)
+zvol_create_minors_recursive(const char *name)
 {
 }
 
@@ -1312,3 +1002,384 @@
     boolean_t async)
 {
 }
+
+/*
+ * Open file
+ *
+ * path - fully qualified path to file
+ * flags - file attributes O_READ / O_WRITE / O_EXCL
+ * fpp - pointer to return file pointer
+ *
+ * Returns 0 on success underlying error on failure.
+ */
+int
+zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
+{
+	int fd = -1;
+	int dump_fd = -1;
+	int err;
+	int old_umask = 0;
+	zfs_file_t *fp;
+	struct stat64 st;
+
+	if (!(flags & O_CREAT) && stat64(path, &st) == -1)
+		return (errno);
+
+	if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
+		flags |= O_DIRECT;
+
+	if (flags & O_CREAT)
+		old_umask = umask(0);
+
+	fd = open64(path, flags, mode);
+	if (fd == -1)
+		return (errno);
+
+	if (flags & O_CREAT)
+		(void) umask(old_umask);
+
+	if (vn_dumpdir != NULL) {
+		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
+		char *inpath = basename((char *)(uintptr_t)path);
+
+		(void) snprintf(dumppath, MAXPATHLEN,
+		    "%s/%s", vn_dumpdir, inpath);
+		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
+		umem_free(dumppath, MAXPATHLEN);
+		if (dump_fd == -1) {
+			err = errno;
+			close(fd);
+			return (err);
+		}
+	} else {
+		dump_fd = -1;
+	}
+
+	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+	fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
+	fp->f_fd = fd;
+	fp->f_dump_fd = dump_fd;
+	*fpp = fp;
+
+	return (0);
+}
+
+void
+zfs_file_close(zfs_file_t *fp)
+{
+	close(fp->f_fd);
+	if (fp->f_dump_fd != -1)
+		close(fp->f_dump_fd);
+
+	umem_free(fp, sizeof (zfs_file_t));
+}
+
+/*
+ * Stateful write - use os internal file pointer to determine where to
+ * write and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * resid -  pointer to count of unwritten bytes  (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc = write(fp->f_fd, buf, count);
+	if (rc < 0)
+		return (errno);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless write - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to write to (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pwrite(zfs_file_t *fp, const void *buf,
+    size_t count, loff_t pos, ssize_t *resid)
+{
+	ssize_t rc, split, done;
+	int sectors;
+
+	/*
+	 * To simulate partial disk writes, we split writes into two
+	 * system calls so that the process can be killed in between.
+	 * This is used by ztest to simulate realistic failure modes.
+	 */
+	sectors = count >> SPA_MINBLOCKSHIFT;
+	split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
+	rc = pwrite64(fp->f_fd, buf, split, pos);
+	if (rc != -1) {
+		done = rc;
+		rc = pwrite64(fp->f_fd, (char *)buf + split,
+		    count - split, pos + split);
+	}
+#ifdef __linux__
+	if (rc == -1 && errno == EINVAL) {
+		/*
+		 * Under Linux, this most likely means an alignment issue
+		 * (memory or disk) due to O_DIRECT, so we abort() in order
+		 * to catch the offender.
+		 */
+		abort();
+	}
+#endif
+
+	if (rc < 0)
+		return (errno);
+
+	done += rc;
+
+	if (resid) {
+		*resid = count - done;
+	} else if (done != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateful read - use os internal file pointer to determine where to
+ * read and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to read
+ * resid -  pointer to count of unread bytes (if short read)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
+{
+	int rc;
+
+	rc = read(fp->f_fd, buf, count);
+	if (rc < 0)
+		return (errno);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless read - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to read from (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc = pread64(fp->f_fd, buf, count, off);
+	if (rc < 0) {
+#ifdef __linux__
+		/*
+		 * Under Linux, this most likely means an alignment issue
+		 * (memory or disk) due to O_DIRECT, so we abort() in order to
+		 * catch the offender.
+		 */
+		if (errno == EINVAL)
+			abort();
+#endif
+		return (errno);
+	}
+
+	if (fp->f_dump_fd != -1) {
+		int status;
+
+		status = pwrite64(fp->f_dump_fd, buf, rc, off);
+		ASSERT(status != -1);
+	}
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * lseek - set / get file pointer
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * offp - value to seek to, returns current value plus passed offset
+ * whence - see man pages for standard lseek whence values
+ *
+ * Returns 0 on success errno on failure (ESPIPE for non seekable types)
+ */
+int
+zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
+{
+	loff_t rc;
+
+	rc = lseek(fp->f_fd, *offp, whence);
+	if (rc < 0)
+		return (errno);
+
+	*offp = rc;
+
+	return (0);
+}
+
+/*
+ * Get file attributes
+ *
+ * filp - file pointer
+ * zfattr - pointer to file attr structure
+ *
+ * Currently only used for fetching size and file mode
+ *
+ * Returns 0 on success or error code of underlying getattr call on failure.
+ */
+int
+zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
+{
+	struct stat64 st;
+
+	if (fstat64_blk(fp->f_fd, &st) == -1)
+		return (errno);
+
+	zfattr->zfa_size = st.st_size;
+	zfattr->zfa_mode = st.st_mode;
+
+	return (0);
+}
+
+/*
+ * Sync file to disk
+ *
+ * filp - file pointer
+ * flags - O_SYNC and or O_DSYNC
+ *
+ * Returns 0 on success or error code of underlying sync call on failure.
+ */
+int
+zfs_file_fsync(zfs_file_t *fp, int flags)
+{
+	int rc;
+
+	rc = fsync(fp->f_fd);
+	if (rc < 0)
+		return (errno);
+
+	return (0);
+}
+
+/*
+ * fallocate - allocate or free space on disk
+ *
+ * fp - file pointer
+ * mode (non-standard options for hole punching etc)
+ * offset - offset to start allocating or freeing from
+ * len - length to free / allocate
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
+{
+#ifdef __linux__
+	return (fallocate(fp->f_fd, mode, offset, len));
+#else
+	return (EOPNOTSUPP);
+#endif
+}
+
+/*
+ * Request current file pointer offset
+ *
+ * fp - pointer to file
+ *
+ * Returns current file offset.
+ */
+loff_t
+zfs_file_off(zfs_file_t *fp)
+{
+	return (lseek(fp->f_fd, SEEK_CUR, 0));
+}
+
+/*
+ * unlink file
+ *
+ * path - fully qualified file path
+ *
+ * Returns 0 on success.
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_unlink(const char *path)
+{
+	return (remove(path));
+}
+
+/*
+ * Get reference to file pointer
+ *
+ * fd - input file descriptor
+ *
+ * Returns pointer to file struct or NULL.
+ * Unsupported in user space.
+ */
+zfs_file_t *
+zfs_file_get(int fd)
+{
+	abort();
+
+	return (NULL);
+}
+/*
+ * Drop reference to file pointer
+ *
+ * fp - pointer to file struct
+ *
+ * Unsupported in user space.
+ */
+void
+zfs_file_put(zfs_file_t *fp)
+{
+	abort();
+}
+
+void
+zfsvfs_update_fromname(const char *oldname, const char *newname)
+{
+}

diff --git a/zfs/lib/libzpool/taskq.c b/zfs/lib/libzpool/taskq.c
index ae67906..456080f 100644
--- a/zfs/lib/libzpool/taskq.c
+++ b/zfs/lib/libzpool/taskq.c

@@ -34,6 +34,8 @@
 taskq_t *system_taskq;
 taskq_t *system_delay_taskq;
 
+static pthread_key_t taskq_tsd;
+
 #define	TASKQ_ACTIVE	0x00010000
 
 static taskq_ent_t *
@@ -213,6 +215,8 @@
 	taskq_ent_t *t;
 	boolean_t prealloc;
 
+	VERIFY0(pthread_setspecific(taskq_tsd, tq));
+
 	mutex_enter(&tq->tq_lock);
 	while (tq->tq_flags & TASKQ_ACTIVE) {
 		if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
@@ -343,6 +347,12 @@
 	return (0);
 }
 
+taskq_t *
+taskq_of_curthread(void)
+{
+	return (pthread_getspecific(taskq_tsd));
+}
+
 int
 taskq_cancel_id(taskq_t *tq, taskqid_t id)
 {
@@ -352,6 +362,7 @@
 void
 system_taskq_init(void)
 {
+	VERIFY0(pthread_key_create(&taskq_tsd, NULL));
 	system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
 	    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
 	system_delay_taskq = taskq_create("delay_taskq", 4, maxclsyspri, 4,
@@ -365,4 +376,5 @@
 	system_taskq = NULL; /* defensive */
 	taskq_destroy(system_delay_taskq);
 	system_delay_taskq = NULL;
+	VERIFY0(pthread_key_delete(taskq_tsd));
 }

diff --git a/zfs/lib/libzpool/util.c b/zfs/lib/libzpool/util.c
index 67bc209..f4e3314 100644
--- a/zfs/lib/libzpool/util.c
+++ b/zfs/lib/libzpool/util.c

@@ -33,7 +33,7 @@
 #include <stdlib.h>
 #include <sys/spa.h>
 #include <sys/fs/zfs.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/zfs_ioctl.h>
 #include <dlfcn.h>
 #include <libzutil.h>
@@ -148,18 +148,55 @@
 	nvlist_free(config);
 }
 
+/* *k_out must be freed by the caller */
+static int
+set_global_var_parse_kv(const char *arg, char **k_out, u_longlong_t *v_out)
+{
+	int err;
+	VERIFY(arg);
+	char *d = strdup(arg);
+
+	char *save = NULL;
+	char *k = strtok_r(d, "=", &save);
+	char *v_str = strtok_r(NULL, "=", &save);
+	char *follow = strtok_r(NULL, "=", &save);
+	if (k == NULL || v_str == NULL || follow != NULL) {
+		err = EINVAL;
+		goto err_free;
+	}
+
+	u_longlong_t val = strtoull(v_str, NULL, 0);
+	if (val > UINT32_MAX) {
+		fprintf(stderr, "Value for global variable '%s' must "
+		    "be a 32-bit unsigned integer, got '%s'\n", k, v_str);
+		err = EOVERFLOW;
+		goto err_free;
+	}
+
+	*k_out = strdup(k);
+	*v_out = val;
+	free(d);
+	return (0);
+
+err_free:
+	free(d);
+
+	return (err);
+}
+
 /*
  * Sets given global variable in libzpool to given unsigned 32-bit value.
  * arg: "<variable>=<value>"
  */
 int
-set_global_var(char *arg)
+set_global_var(char const *arg)
 {
 	void *zpoolhdl;
-	char *varname = arg, *varval;
+	char *varname;
 	u_longlong_t val;
+	int ret;
 
-#ifndef _LITTLE_ENDIAN
+#ifndef _ZFS_LITTLE_ENDIAN
 	/*
 	 * On big endian systems changing a 64-bit variable would set the high
 	 * 32 bits instead of the low 32 bits, which could cause unexpected
@@ -167,19 +204,12 @@
 	 */
 	fprintf(stderr, "Setting global variables is only supported on "
 	    "little-endian systems\n");
-	return (ENOTSUP);
+	ret = ENOTSUP;
+	goto out_ret;
 #endif
-	if (arg != NULL && (varval = strchr(arg, '=')) != NULL) {
-		*varval = '\0';
-		varval++;
-		val = strtoull(varval, NULL, 0);
-		if (val > UINT32_MAX) {
-			fprintf(stderr, "Value for global variable '%s' must "
-			    "be a 32-bit unsigned integer\n", varname);
-			return (EOVERFLOW);
-		}
-	} else {
-		return (EINVAL);
+
+	if ((ret = set_global_var_parse_kv(arg, &varname, &val)) != 0) {
+		goto out_ret;
 	}
 
 	zpoolhdl = dlopen("libzpool.so", RTLD_LAZY);
@@ -189,18 +219,26 @@
 		if (var == NULL) {
 			fprintf(stderr, "Global variable '%s' does not exist "
 			    "in libzpool.so\n", varname);
-			return (EINVAL);
+			ret = EINVAL;
+			goto out_dlclose;
 		}
 		*var = (uint32_t)val;
 
-		dlclose(zpoolhdl);
 	} else {
 		fprintf(stderr, "Failed to open libzpool.so to set global "
 		    "variable\n");
-		return (EIO);
+		ret = EIO;
+		goto out_free;
 	}
 
-	return (0);
+	ret = 0;
+
+out_dlclose:
+	dlclose(zpoolhdl);
+out_free:
+	free(varname);
+out_ret:
+	return (ret);
 }
 
 static nvlist_t *
@@ -223,7 +261,7 @@
 	 * Use ZFS_IOC_POOL_SYNC to confirm if a pool is active
 	 */
 
-	fd = open(ZFS_DEV, O_RDWR);
+	fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
 	if (fd < 0)
 		return (-1);
 
@@ -237,7 +275,7 @@
 	zcp->zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 	zcp->zc_nvlist_src_size = size;
 
-	ret = ioctl(fd, ZFS_IOC_POOL_SYNC, zcp);
+	ret = zfs_ioctl_fd(fd, ZFS_IOC_POOL_SYNC, zcp);
 
 	fnvlist_pack_free(packed, size);
 	free((void *)(uintptr_t)zcp->zc_nvlist_dst);

diff --git a/zfs/lib/libzstd/Makefile.am b/zfs/lib/libzstd/Makefile.am
new file mode 100644
index 0000000..e3bc5c4
--- /dev/null
+++ b/zfs/lib/libzstd/Makefile.am

@@ -0,0 +1,25 @@
+include $(top_srcdir)/config/Rules.am
+
+VPATH = $(top_srcdir)/module/zstd
+
+# -fno-tree-vectorize is set for gcc in zstd/common/compiler.h
+# Set it for other compilers, too.
+AM_CFLAGS += -fno-tree-vectorize
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
+noinst_LTLIBRARIES = libzstd.la
+
+KERNEL_C = \
+	lib/zstd.c \
+	zfs_zstd.c
+
+nodist_libzstd_la_SOURCES = $(KERNEL_C)
+
+lib/zstd.$(OBJEXT):  CFLAGS += -fno-tree-vectorize -include $(top_srcdir)/module/zstd/include/zstd_compat_wrapper.h -Wp,-w
+lib/zstd.l$(OBJEXT): CFLAGS += -fno-tree-vectorize -include $(top_srcdir)/module/zstd/include/zstd_compat_wrapper.h -Wp,-w
+
+zfs_zstd.$(OBJEXT):  CFLAGS += -include $(top_srcdir)/module/zstd/include/zstd_compat_wrapper.h
+zfs_zstd.l$(OBJEXT): CFLAGS += -include $(top_srcdir)/module/zstd/include/zstd_compat_wrapper.h
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libzutil/Makefile.am b/zfs/lib/libzutil/Makefile.am
index 720b843..0ddc241 100644
--- a/zfs/lib/libzutil/Makefile.am
+++ b/zfs/lib/libzutil/Makefile.am

@@ -1,27 +1,56 @@
 include $(top_srcdir)/config/Rules.am
 
-# Suppress unused but set variable warnings often due to ASSERTs
-AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
+AM_CFLAGS += $(LIBBLKID_CFLAGS) $(LIBUDEV_CFLAGS)
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
+DEFAULT_INCLUDES += -I$(srcdir)
 
 noinst_LTLIBRARIES = libzutil.la
 
 USER_C = \
 	zutil_device_path.c \
 	zutil_import.c \
+	zutil_import.h \
 	zutil_nicenum.c \
 	zutil_pool.c
 
-nodist_libzutil_la_SOURCES = $(USER_C)
+if BUILD_LINUX
+USER_C += \
+	os/linux/zutil_setproctitle.c \
+	os/linux/zutil_device_path_os.c \
+	os/linux/zutil_import_os.c \
+	os/linux/zutil_compat.c
+endif
+
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+
+USER_C += \
+	os/freebsd/zutil_device_path_os.c \
+	os/freebsd/zutil_import_os.c \
+	os/freebsd/zutil_compat.c
+
+VPATH += $(top_srcdir)/module/os/freebsd/zfs
+
+nodist_libzutil_la_SOURCES = zfs_ioctl_compat.c
+endif
+
+libzutil_la_SOURCES = $(USER_C)
 
 libzutil_la_LIBADD = \
-	$(top_builddir)/lib/libavl/libavl.la \
-        $(top_builddir)/lib/libefi/libefi.la \
-	$(top_builddir)/lib/libtpool/libtpool.la
+	$(abs_top_builddir)/lib/libavl/libavl.la \
+	$(abs_top_builddir)/lib/libtpool/libtpool.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libspl/libspl.la
 
-libzutil_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV)
+if BUILD_LINUX
+libzutil_la_LIBADD += \
+	$(abs_top_builddir)/lib/libefi/libefi.la \
+	-lrt
+endif
 
-EXTRA_DIST = $(USER_C)
+libzutil_la_LIBADD += -lm $(LIBBLKID_LIBS) $(LIBUDEV_LIBS)
+
+include $(top_srcdir)/config/CppCheck.am

diff --git a/zfs/lib/libzutil/os/freebsd/zutil_compat.c b/zfs/lib/libzutil/os/freebsd/zutil_compat.c
new file mode 100644
index 0000000..baaf4b5
--- /dev/null
+++ b/zfs/lib/libzutil/os/freebsd/zutil_compat.c

@@ -0,0 +1,124 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/zfs_ioctl.h>
+#include <os/freebsd/zfs/sys/zfs_ioctl_compat.h>
+#include <libzutil.h>
+
+#include <err.h>
+
+int zfs_ioctl_version = ZFS_IOCVER_UNDEF;
+
+/*
+ * Get zfs_ioctl_version
+ */
+static int
+get_zfs_ioctl_version(void)
+{
+	size_t ver_size;
+	int ver = ZFS_IOCVER_NONE;
+
+	ver_size = sizeof (ver);
+	sysctlbyname("vfs.zfs.version.ioctl", &ver, &ver_size, NULL, 0);
+
+	return (ver);
+}
+
+static int
+zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag)
+{
+	int newrequest, ret;
+	void *zc_c = NULL;
+	unsigned long ncmd;
+	zfs_iocparm_t zp;
+
+	switch (cflag) {
+	case ZFS_CMD_COMPAT_NONE:
+		ncmd = _IOWR('Z', request, zfs_iocparm_t);
+		zp.zfs_cmd = (uint64_t)(uintptr_t)zc;
+		zp.zfs_cmd_size = sizeof (zfs_cmd_t);
+		zp.zfs_ioctl_version = ZFS_IOCVER_OZFS;
+		break;
+	case ZFS_CMD_COMPAT_LEGACY:
+		newrequest = zfs_ioctl_ozfs_to_legacy(request);
+		ncmd = _IOWR('Z', newrequest, zfs_iocparm_t);
+		zc_c = malloc(sizeof (zfs_cmd_legacy_t));
+		zfs_cmd_ozfs_to_legacy(zc, zc_c);
+		zp.zfs_cmd = (uint64_t)(uintptr_t)zc_c;
+		zp.zfs_cmd_size = sizeof (zfs_cmd_legacy_t);
+		zp.zfs_ioctl_version = ZFS_IOCVER_LEGACY;
+		break;
+	default:
+		abort();
+		return (EINVAL);
+	}
+
+	ret = ioctl(fd, ncmd, &zp);
+	if (ret) {
+		if (zc_c)
+			free(zc_c);
+		return (ret);
+	}
+	if (zc_c) {
+		zfs_cmd_legacy_to_ozfs(zc_c, zc);
+		free(zc_c);
+	}
+	return (ret);
+}
+
+/*
+ * This is FreeBSD version of ioctl, because Solaris' ioctl() updates
+ * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
+ * error is returned zc_nvlist_dst_size won't be updated.
+ */
+int
+zfs_ioctl_fd(int fd, unsigned long request, zfs_cmd_t *zc)
+{
+	size_t oldsize;
+	int ret, cflag = ZFS_CMD_COMPAT_NONE;
+
+	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
+		zfs_ioctl_version = get_zfs_ioctl_version();
+
+	switch (zfs_ioctl_version) {
+		case ZFS_IOCVER_LEGACY:
+			cflag = ZFS_CMD_COMPAT_LEGACY;
+			break;
+		case ZFS_IOCVER_OZFS:
+			cflag = ZFS_CMD_COMPAT_NONE;
+			break;
+		default:
+			errx(1, "unrecognized zfs ioctl version %d",
+			    zfs_ioctl_version);
+	}
+
+	oldsize = zc->zc_nvlist_dst_size;
+	ret = zcmd_ioctl_compat(fd, request, zc, cflag);
+
+	if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
+		ret = -1;
+		errno = ENOMEM;
+	}
+
+	return (ret);
+}

diff --git a/zfs/lib/libzutil/os/freebsd/zutil_device_path_os.c b/zfs/lib/libzutil/os/freebsd/zutil_device_path_os.c
new file mode 100644
index 0000000..71c9360
--- /dev/null
+++ b/zfs/lib/libzutil/os/freebsd/zutil_device_path_os.c

@@ -0,0 +1,132 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/stat.h>
+
+#include <libgeom.h>
+
+#include <libzutil.h>
+
+/*
+ * We don't strip/append partitions on FreeBSD.
+ */
+
+/*
+ * Note: The caller must free the returned string.
+ */
+char *
+zfs_strip_partition(char *dev)
+{
+	return (strdup(dev));
+}
+
+int
+zfs_append_partition(char *path, size_t max_len)
+{
+	return (strnlen(path, max_len));
+}
+
+/*
+ * Strip the path from a device name.
+ * On FreeBSD we only want to remove "/dev/" from the beginning of
+ * paths if present.
+ */
+char *
+zfs_strip_path(char *path)
+{
+	if (strncmp(path, _PATH_DEV, sizeof (_PATH_DEV) - 1) == 0)
+		return (path + sizeof (_PATH_DEV) - 1);
+	else
+		return (path);
+}
+
+char *
+zfs_get_underlying_path(const char *dev_name)
+{
+
+	if (dev_name == NULL)
+		return (NULL);
+
+	return (realpath(dev_name, NULL));
+}
+
+boolean_t
+zfs_dev_is_whole_disk(const char *dev_name)
+{
+	int fd;
+
+	fd = g_open(dev_name, 0);
+	if (fd >= 0) {
+		g_close(fd);
+		return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Wait up to timeout_ms for udev to set up the device node.  The device is
+ * considered ready when libudev determines it has been initialized, all of
+ * the device links have been verified to exist, and it has been allowed to
+ * settle.  At this point the device the device can be accessed reliably.
+ * Depending on the complexity of the udev rules this process could take
+ * several seconds.
+ */
+int
+zpool_label_disk_wait(const char *path, int timeout_ms)
+{
+	int settle_ms = 50;
+	long sleep_ms = 10;
+	hrtime_t start, settle;
+	struct stat64 statbuf;
+
+	start = gethrtime();
+	settle = 0;
+
+	do {
+		errno = 0;
+		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
+			if (settle == 0)
+				settle = gethrtime();
+			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
+				return (0);
+		} else if (errno != ENOENT) {
+			return (errno);
+		}
+
+		usleep(sleep_ms * MILLISEC);
+	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+	return (ENODEV);
+}
+
+/* ARGSUSED */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+	return (B_FALSE);
+}

diff --git a/zfs/lib/libzutil/os/freebsd/zutil_import_os.c b/zfs/lib/libzutil/os/freebsd/zutil_import_os.c
new file mode 100644
index 0000000..7c48e06
--- /dev/null
+++ b/zfs/lib/libzutil/os/freebsd/zutil_import_os.c

@@ -0,0 +1,254 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+/*
+ * Pool import support functions.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device.  If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ *	pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded.  Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed.  Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <sys/types.h>
+#include <sys/disk.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+
+#include <aio.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <libgen.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <sys/efi_partition.h>
+#include <thread_pool.h>
+#include <libgeom.h>
+
+#include <sys/vdev_impl.h>
+
+#include <libzutil.h>
+
+#include "zutil_import.h"
+
+/*
+ * Update a leaf vdev's persistent device strings
+ *
+ * - only applies for a dedicated leaf vdev (aka whole disk)
+ * - updated during pool create|add|attach|import
+ * - used for matching device matching during auto-{online,expand,replace}
+ * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
+ * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
+ *
+ * On FreeBSD we currently just strip devid and phys_path to avoid confusion.
+ */
+void
+update_vdev_config_dev_strs(nvlist_t *nv)
+{
+	(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+	(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+}
+
+/*
+ * Do not even look at these devices.
+ */
+static const char * const excluded_devs[] = {
+	"nfslock",
+	"sequencer",
+	"zfs",
+};
+#define	EXCLUDED_DIR		"/dev/"
+#define	EXCLUDED_DIR_LEN	5
+
+void
+zpool_open_func(void *arg)
+{
+	rdsk_node_t *rn = arg;
+	struct stat64 statbuf;
+	nvlist_t *config;
+	size_t i;
+	int num_labels;
+	int fd;
+	off_t mediasize = 0;
+
+	/*
+	 * Do not even look at excluded devices.
+	 */
+	if (strncmp(rn->rn_name, EXCLUDED_DIR, EXCLUDED_DIR_LEN) == 0) {
+		char *name = rn->rn_name + EXCLUDED_DIR_LEN;
+		for (i = 0; i < nitems(excluded_devs); ++i) {
+			const char *excluded_name = excluded_devs[i];
+			size_t len = strlen(excluded_name);
+			if (strncmp(name, excluded_name, len) == 0) {
+				return;
+			}
+		}
+	}
+
+	/*
+	 * O_NONBLOCK so we don't hang trying to open things like serial ports.
+	 */
+	if ((fd = open(rn->rn_name, O_RDONLY|O_NONBLOCK|O_CLOEXEC)) < 0)
+		return;
+
+	/*
+	 * Ignore failed stats.
+	 */
+	if (fstat64(fd, &statbuf) != 0)
+		goto out;
+	/*
+	 * We only want regular files, character devs and block devs.
+	 */
+	if (S_ISREG(statbuf.st_mode)) {
+		/* Check if this file is too small to hold a zpool. */
+		if (statbuf.st_size < SPA_MINDEVSIZE) {
+			goto out;
+		}
+	} else if (S_ISCHR(statbuf.st_mode) || S_ISBLK(statbuf.st_mode)) {
+		/* Check if this device is too small to hold a zpool. */
+		if (ioctl(fd, DIOCGMEDIASIZE, &mediasize) != 0 ||
+		    mediasize < SPA_MINDEVSIZE) {
+			goto out;
+		}
+	} else {
+		goto out;
+	}
+
+	if (zpool_read_label(fd, &config, &num_labels) != 0)
+		goto out;
+	if (num_labels == 0) {
+		nvlist_free(config);
+		goto out;
+	}
+
+	rn->rn_config = config;
+	rn->rn_num_labels = num_labels;
+
+	/* TODO: Reuse labelpaths logic from Linux? */
+out:
+	(void) close(fd);
+}
+
+static const char *
+zpool_default_import_path[] = {
+	"/dev"
+};
+
+const char * const *
+zpool_default_search_paths(size_t *count)
+{
+	*count = nitems(zpool_default_import_path);
+	return (zpool_default_import_path);
+}
+
+int
+zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+    avl_tree_t **slice_cache)
+{
+	const char *oid = "vfs.zfs.vol.recursive";
+	char *end, path[MAXPATHLEN];
+	rdsk_node_t *slice;
+	struct gmesh mesh;
+	struct gclass *mp;
+	struct ggeom *gp;
+	struct gprovider *pp;
+	avl_index_t where;
+	int error, value;
+	size_t pathleft, size = sizeof (value);
+	boolean_t skip_zvols = B_FALSE;
+
+	end = stpcpy(path, "/dev/");
+	pathleft = &path[sizeof (path)] - end;
+
+	error = geom_gettree(&mesh);
+	if (error != 0)
+		return (error);
+
+	if (sysctlbyname(oid, &value, &size, NULL, 0) == 0 && value == 0)
+		skip_zvols = B_TRUE;
+
+	*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
+	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
+	    offsetof(rdsk_node_t, rn_node));
+
+	LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
+		if (skip_zvols && strcmp(mp->lg_name, "ZFS::ZVOL") == 0)
+			continue;
+		LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
+			LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+				strlcpy(end, pp->lg_name, pathleft);
+				slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+				slice->rn_name = zutil_strdup(hdl, path);
+				slice->rn_vdev_guid = 0;
+				slice->rn_lock = lock;
+				slice->rn_avl = *slice_cache;
+				slice->rn_hdl = hdl;
+				slice->rn_labelpaths = B_FALSE;
+				slice->rn_order = IMPORT_ORDER_DEFAULT;
+
+				pthread_mutex_lock(lock);
+				if (avl_find(*slice_cache, slice, &where)) {
+					free(slice->rn_name);
+					free(slice);
+				} else {
+					avl_insert(*slice_cache, slice, where);
+				}
+				pthread_mutex_unlock(lock);
+			}
+		}
+	}
+
+	geom_deletetree(&mesh);
+
+	return (0);
+}
+
+int
+zfs_dev_flush(int fd __unused)
+{
+	return (0);
+}
+
+void
+update_vdevs_config_dev_sysfs_path(nvlist_t *config)
+{
+}

diff --git a/zfs/lib/libzutil/os/linux/zutil_compat.c b/zfs/lib/libzutil/os/linux/zutil_compat.c
new file mode 100644
index 0000000..173ae9c
--- /dev/null
+++ b/zfs/lib/libzutil/os/linux/zutil_compat.c

@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/zfs_ioctl.h>
+#include <libzutil.h>
+
+int
+zfs_ioctl_fd(int fd, unsigned long request, zfs_cmd_t *zc)
+{
+	return (ioctl(fd, request, zc));
+}

diff --git a/zfs/lib/libzutil/os/linux/zutil_device_path_os.c b/zfs/lib/libzutil/os/linux/zutil_device_path_os.c
new file mode 100644
index 0000000..e443899
--- /dev/null
+++ b/zfs/lib/libzutil/os/linux/zutil_device_path_os.c

@@ -0,0 +1,683 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/efi_partition.h>
+
+#ifdef HAVE_LIBUDEV
+#include <libudev.h>
+#endif
+
+#include <libzutil.h>
+
+/*
+ * Append partition suffix to an otherwise fully qualified device path.
+ * This is used to generate the name the full path as its stored in
+ * ZPOOL_CONFIG_PATH for whole disk devices.  On success the new length
+ * of 'path' will be returned on error a negative value is returned.
+ */
+int
+zfs_append_partition(char *path, size_t max_len)
+{
+	int len = strlen(path);
+
+	if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
+	    (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
+		if (len + 6 >= max_len)
+			return (-1);
+
+		(void) strcat(path, "-part1");
+		len += 6;
+	} else {
+		if (len + 2 >= max_len)
+			return (-1);
+
+		if (isdigit(path[len-1])) {
+			(void) strcat(path, "p1");
+			len += 2;
+		} else {
+			(void) strcat(path, "1");
+			len += 1;
+		}
+	}
+
+	return (len);
+}
+
+/*
+ * Remove partition suffix from a vdev path.  Partition suffixes may take three
+ * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
+ * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
+ * third case only occurs when preceded by a string matching the regular
+ * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
+ *
+ * caller must free the returned string
+ */
+char *
+zfs_strip_partition(char *path)
+{
+	char *tmp = strdup(path);
+	char *part = NULL, *d = NULL;
+	if (!tmp)
+		return (NULL);
+
+	if ((part = strstr(tmp, "-part")) && part != tmp) {
+		d = part + 5;
+	} else if ((part = strrchr(tmp, 'p')) &&
+	    part > tmp + 1 && isdigit(*(part-1))) {
+		d = part + 1;
+	} else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
+	    tmp[1] == 'd') {
+		for (d = &tmp[2]; isalpha(*d); part = ++d) { }
+	} else if (strncmp("xvd", tmp, 3) == 0) {
+		for (d = &tmp[3]; isalpha(*d); part = ++d) { }
+	}
+	if (part && d && *d != '\0') {
+		for (; isdigit(*d); d++) { }
+		if (*d == '\0')
+			*part = '\0';
+	}
+
+	return (tmp);
+}
+
+/*
+ * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
+ *
+ * path:	/dev/sda1
+ * returns:	/dev/sda
+ *
+ * Returned string must be freed.
+ */
+static char *
+zfs_strip_partition_path(char *path)
+{
+	char *newpath = strdup(path);
+	char *sd_offset;
+	char *new_sd;
+
+	if (!newpath)
+		return (NULL);
+
+	/* Point to "sda1" part of "/dev/sda1" */
+	sd_offset = strrchr(newpath, '/') + 1;
+
+	/* Get our new name "sda" */
+	new_sd = zfs_strip_partition(sd_offset);
+	if (!new_sd) {
+		free(newpath);
+		return (NULL);
+	}
+
+	/* Paste the "sda" where "sda1" was */
+	strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
+
+	/* Free temporary "sda" */
+	free(new_sd);
+
+	return (newpath);
+}
+
+/*
+ * Strip the unwanted portion of a device path.
+ */
+char *
+zfs_strip_path(char *path)
+{
+	return (strrchr(path, '/') + 1);
+}
+
+/*
+ * Read the contents of a sysfs file into an allocated buffer and remove the
+ * last newline.
+ *
+ * This is useful for reading sysfs files that return a single string.  Return
+ * an allocated string pointer on success, NULL otherwise.  Returned buffer
+ * must be freed by the user.
+ */
+static char *
+zfs_read_sysfs_file(char *filepath)
+{
+	char buf[4096];	/* all sysfs files report 4k size */
+	char *str = NULL;
+
+	FILE *fp = fopen(filepath, "r");
+	if (fp == NULL) {
+		return (NULL);
+	}
+	if (fgets(buf, sizeof (buf), fp) == buf) {
+		/* success */
+
+		/* Remove the last newline (if any) */
+		size_t len = strlen(buf);
+		if (buf[len - 1] == '\n') {
+			buf[len - 1] = '\0';
+		}
+		str = strdup(buf);
+	}
+
+	fclose(fp);
+
+	return (str);
+}
+
+/*
+ * Given a dev name like "nvme0n1", return the full PCI slot sysfs path to
+ * the drive (in /sys/bus/pci/slots).
+ *
+ * For example:
+ *     dev:            "nvme0n1"
+ *     returns:        "/sys/bus/pci/slots/0"
+ *
+ * 'dev' must be an NVMe device.
+ *
+ * Returned string must be freed.  Returns NULL on error or no sysfs path.
+ */
+static char *
+zfs_get_pci_slots_sys_path(const char *dev_name)
+{
+	DIR *dp = NULL;
+	struct dirent *ep;
+	char *address1 = NULL;
+	char *address2 = NULL;
+	char *path = NULL;
+	char buf[MAXPATHLEN];
+	char *tmp;
+
+	/* If they preface 'dev' with a path (like "/dev") then strip it off */
+	tmp = strrchr(dev_name, '/');
+	if (tmp != NULL)
+		dev_name = tmp + 1;    /* +1 since we want the chr after '/' */
+
+	if (strncmp("nvme", dev_name, 4) != 0)
+		return (NULL);
+
+	(void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address",
+	    dev_name);
+
+	address1 = zfs_read_sysfs_file(buf);
+	if (!address1)
+		return (NULL);
+
+	/*
+	 * /sys/block/nvme0n1/device/address format will
+	 * be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be
+	 * "0000:01:00".  Just NULL terminate at the '.' so they match.
+	 */
+	tmp = strrchr(address1, '.');
+	if (tmp != NULL)
+		*tmp = '\0';
+
+	dp = opendir("/sys/bus/pci/slots/");
+	if (dp == NULL) {
+		free(address1);
+		return (NULL);
+	}
+
+	/*
+	 * Look through all the /sys/bus/pci/slots/ subdirs
+	 */
+	while ((ep = readdir(dp))) {
+		/*
+		 * We only care about directory names that are a single number.
+		 * Sometimes there's other directories like
+		 * "/sys/bus/pci/slots/0-3/" in there - skip those.
+		 */
+		if (!zfs_isnumber(ep->d_name))
+			continue;
+
+		(void) snprintf(buf, sizeof (buf),
+		    "/sys/bus/pci/slots/%s/address", ep->d_name);
+
+		address2 = zfs_read_sysfs_file(buf);
+		if (!address2)
+			continue;
+
+		if (strcmp(address1, address2) == 0) {
+			/* Addresses match, we're all done */
+			free(address2);
+			if (asprintf(&path, "/sys/bus/pci/slots/%s",
+			    ep->d_name) == -1) {
+				continue;
+			}
+			break;
+		}
+		free(address2);
+	}
+
+	closedir(dp);
+	free(address1);
+
+	return (path);
+}
+
+/*
+ * Given a dev name like "sda", return the full enclosure sysfs path to
+ * the disk.  You can also pass in the name with "/dev" prepended
+ * to it (like /dev/sda).  This works for both JBODs and NVMe PCI devices.
+ *
+ * For example, disk "sda" in enclosure slot 1:
+ *     dev_name:       "sda"
+ *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
+ *
+ * Or:
+ *
+ *      dev_name:   "nvme0n1"
+ *      returns:    "/sys/bus/pci/slots/0"
+ *
+ * 'dev' must be a non-devicemapper device.
+ *
+ * Returned string must be freed.  Returns NULL on error.
+ */
+char *
+zfs_get_enclosure_sysfs_path(const char *dev_name)
+{
+	DIR *dp = NULL;
+	struct dirent *ep;
+	char buf[MAXPATHLEN];
+	char *tmp1 = NULL;
+	char *tmp2 = NULL;
+	char *tmp3 = NULL;
+	char *path = NULL;
+	size_t size;
+	int tmpsize;
+
+	if (dev_name == NULL)
+		return (NULL);
+
+	/* If they preface 'dev' with a path (like "/dev") then strip it off */
+	tmp1 = strrchr(dev_name, '/');
+	if (tmp1 != NULL)
+		dev_name = tmp1 + 1;    /* +1 since we want the chr after '/' */
+
+	tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
+	if (tmpsize == -1 || tmp1 == NULL) {
+		tmp1 = NULL;
+		goto end;
+	}
+
+	dp = opendir(tmp1);
+	if (dp == NULL)
+		goto end;
+
+	/*
+	 * Look though all sysfs entries in /sys/block/<dev>/device for
+	 * the enclosure symlink.
+	 */
+	while ((ep = readdir(dp))) {
+		/* Ignore everything that's not our enclosure_device link */
+		if (strstr(ep->d_name, "enclosure_device") == NULL)
+			continue;
+
+		if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1) {
+			tmp2 = NULL;
+			break;
+		}
+
+		size = readlink(tmp2, buf, sizeof (buf));
+
+		/* Did readlink fail or crop the link name? */
+		if (size == -1 || size >= sizeof (buf))
+			break;
+
+		/*
+		 * We got a valid link.  readlink() doesn't terminate strings
+		 * so we have to do it.
+		 */
+		buf[size] = '\0';
+
+		/*
+		 * Our link will look like:
+		 *
+		 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
+		 *
+		 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
+		 */
+		tmp3 = strstr(buf, "enclosure");
+		if (tmp3 == NULL)
+			break;
+
+		if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
+			/* If asprintf() fails, 'path' is undefined */
+			path = NULL;
+			break;
+		}
+
+		if (path == NULL)
+			break;
+	}
+
+end:
+	free(tmp2);
+	free(tmp1);
+
+	if (dp != NULL)
+		closedir(dp);
+
+	if (!path) {
+		/*
+		 * This particular disk isn't in a JBOD.  It could be an NVMe
+		 * drive. If so, look up the NVMe device's path in
+		 * /sys/bus/pci/slots/. Within that directory is a 'attention'
+		 * file which controls the NVMe fault LED.
+		 */
+		path = zfs_get_pci_slots_sys_path(dev_name);
+	}
+
+	return (path);
+}
+
+/*
+ * Allocate and return the underlying device name for a device mapper device.
+ *
+ * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
+ * DM device (like /dev/disk/by-vdev/A0) are also allowed.
+ *
+ * If the DM device has multiple underlying devices (like with multipath
+ * DM devices), then favor underlying devices that have a symlink back to their
+ * back to their enclosure device in sysfs.  This will be useful for the
+ * zedlet scripts that toggle the fault LED.
+ *
+ * Returns an underlying device name, or NULL on error or no match.  If dm_name
+ * is not a DM device then return NULL.
+ *
+ * NOTE: The returned name string must be *freed*.
+ */
+static char *
+dm_get_underlying_path(const char *dm_name)
+{
+	DIR *dp = NULL;
+	struct dirent *ep;
+	char *realp;
+	char *tmp = NULL;
+	char *path = NULL;
+	char *dev_str;
+	int size;
+	char *first_path = NULL;
+	char *enclosure_path;
+
+	if (dm_name == NULL)
+		return (NULL);
+
+	/* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
+	realp = realpath(dm_name, NULL);
+	if (realp == NULL)
+		return (NULL);
+
+	/*
+	 * If they preface 'dev' with a path (like "/dev") then strip it off.
+	 * We just want the 'dm-N' part.
+	 */
+	tmp = strrchr(realp, '/');
+	if (tmp != NULL)
+		dev_str = tmp + 1;    /* +1 since we want the chr after '/' */
+	else
+		dev_str = tmp;
+
+	if ((size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str)) == -1) {
+		tmp = NULL;
+		goto end;
+	}
+
+	dp = opendir(tmp);
+	if (dp == NULL)
+		goto end;
+
+	/*
+	 * A device-mapper device can have multiple paths to it (multipath).
+	 * Favor paths that have a symlink back to their enclosure device.
+	 * We have to do this since some enclosures may only provide a symlink
+	 * back for one underlying path to a disk and not the other.
+	 *
+	 * If no paths have links back to their enclosure, then just return the
+	 * first path.
+	 */
+	while ((ep = readdir(dp))) {
+		if (ep->d_type != DT_DIR) {	/* skip "." and ".." dirs */
+			if (!first_path)
+				first_path = strdup(ep->d_name);
+
+			enclosure_path =
+			    zfs_get_enclosure_sysfs_path(ep->d_name);
+
+			if (!enclosure_path)
+				continue;
+
+			if ((size = asprintf(
+			    &path, "/dev/%s", ep->d_name)) == -1)
+				path = NULL;
+			free(enclosure_path);
+			break;
+		}
+	}
+
+end:
+	if (dp != NULL)
+		closedir(dp);
+	free(tmp);
+	free(realp);
+
+	if (!path && first_path) {
+		/*
+		 * None of the underlying paths had a link back to their
+		 * enclosure devices.  Throw up out hands and return the first
+		 * underlying path.
+		 */
+		if ((size = asprintf(&path, "/dev/%s", first_path)) == -1)
+			path = NULL;
+	}
+
+	free(first_path);
+	return (path);
+}
+
+/*
+ * Return B_TRUE if device is a device mapper or multipath device.
+ * Return B_FALSE if not.
+ */
+boolean_t
+zfs_dev_is_dm(const char *dev_name)
+{
+
+	char *tmp;
+	tmp = dm_get_underlying_path(dev_name);
+	if (tmp == NULL)
+		return (B_FALSE);
+
+	free(tmp);
+	return (B_TRUE);
+}
+
+/*
+ * By "whole disk" we mean an entire physical disk (something we can
+ * label, toggle the write cache on, etc.) as opposed to the full
+ * capacity of a pseudo-device such as lofi or did.  We act as if we
+ * are labeling the disk, which should be a pretty good test of whether
+ * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
+ * it isn't.
+ */
+boolean_t
+zfs_dev_is_whole_disk(const char *dev_name)
+{
+	struct dk_gpt *label = NULL;
+	int fd;
+
+	if ((fd = open(dev_name, O_RDONLY | O_DIRECT | O_CLOEXEC)) < 0)
+		return (B_FALSE);
+
+	if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
+		(void) close(fd);
+		return (B_FALSE);
+	}
+
+	efi_free(label);
+	(void) close(fd);
+
+	return (B_TRUE);
+}
+
+/*
+ * Lookup the underlying device for a device name
+ *
+ * Often you'll have a symlink to a device, a partition device,
+ * or a multipath device, and want to look up the underlying device.
+ * This function returns the underlying device name.  If the device
+ * name is already the underlying device, then just return the same
+ * name.  If the device is a DM device with multiple underlying devices
+ * then return the first one.
+ *
+ * For example:
+ *
+ * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
+ * dev_name:	/dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
+ * returns:	/dev/sda
+ *
+ * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
+ * dev_name:	/dev/mapper/mpatha
+ * returns:	/dev/sda (first device)
+ *
+ * 3. /dev/sda (already the underlying device)
+ * dev_name:	/dev/sda
+ * returns:	/dev/sda
+ *
+ * 4. /dev/dm-3 (mapped to /dev/sda)
+ * dev_name:	/dev/dm-3
+ * returns:	/dev/sda
+ *
+ * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
+ * dev_name:	/dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
+ * returns:	/dev/sdb
+ *
+ * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
+ * dev_name:	/dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
+ * returns:	/dev/sda
+ *
+ * Returns underlying device name, or NULL on error or no match.
+ *
+ * NOTE: The returned name string must be *freed*.
+ */
+char *
+zfs_get_underlying_path(const char *dev_name)
+{
+	char *name = NULL;
+	char *tmp;
+
+	if (dev_name == NULL)
+		return (NULL);
+
+	tmp = dm_get_underlying_path(dev_name);
+
+	/* dev_name not a DM device, so just un-symlinkize it */
+	if (tmp == NULL)
+		tmp = realpath(dev_name, NULL);
+
+	if (tmp != NULL) {
+		name = zfs_strip_partition_path(tmp);
+		free(tmp);
+	}
+
+	return (name);
+}
+
+
+#ifdef HAVE_LIBUDEV
+
+/*
+ * A disk is considered a multipath whole disk when:
+ *	DEVNAME key value has "dm-"
+ *	DM_UUID key exists and starts with 'mpath-'
+ *	ID_PART_TABLE_TYPE key does not exist or is not gpt
+ *	ID_FS_LABEL key does not exist (disk isn't labeled)
+ */
+static boolean_t
+is_mpath_udev_sane(struct udev_device *dev)
+{
+	const char *devname, *type, *uuid, *label;
+
+	devname = udev_device_get_property_value(dev, "DEVNAME");
+	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
+	uuid = udev_device_get_property_value(dev, "DM_UUID");
+	label = udev_device_get_property_value(dev, "ID_FS_LABEL");
+
+	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
+	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
+	    ((uuid != NULL) && (strncmp(uuid, "mpath-", 6) == 0)) &&
+	    (label == NULL)) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Check if a disk is a multipath "blank" disk:
+ *
+ * 1. The disk has udev values that suggest it's a multipath disk
+ * 2. The disk is not currently labeled with a filesystem of any type
+ * 3. There are no partitions on the disk
+ */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+	struct udev *udev;
+	struct udev_device *dev = NULL;
+	char nodepath[MAXPATHLEN];
+	char *sysname;
+
+	if (realpath(path, nodepath) == NULL)
+		return (B_FALSE);
+	sysname = strrchr(nodepath, '/') + 1;
+	if (strncmp(sysname, "dm-", 3) != 0)
+		return (B_FALSE);
+	if ((udev = udev_new()) == NULL)
+		return (B_FALSE);
+	if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
+	    sysname)) == NULL) {
+		udev_device_unref(dev);
+		return (B_FALSE);
+	}
+
+	/* Sanity check some udev values */
+	boolean_t is_sane = is_mpath_udev_sane(dev);
+	udev_device_unref(dev);
+
+	return (is_sane);
+}
+
+#else /* HAVE_LIBUDEV */
+
+/* ARGSUSED */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+	return (B_FALSE);
+}
+
+#endif /* HAVE_LIBUDEV */

diff --git a/zfs/lib/libzutil/os/linux/zutil_import_os.c b/zfs/lib/libzutil/os/linux/zutil_import_os.c
new file mode 100644
index 0000000..6c406d3
--- /dev/null
+++ b/zfs/lib/libzutil/os/linux/zutil_import_os.c

@@ -0,0 +1,911 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+/*
+ * Pool import support functions.
+ *
+ * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
+ * these commands are expected to run in the global zone, we can assume
+ * that the devices are all readable when called.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device.  If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ *	pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded.  Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed.  Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <libgen.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/dktp/fdisk.h>
+#include <sys/vdev_impl.h>
+#include <sys/fs/zfs.h>
+
+#include <thread_pool.h>
+#include <libzutil.h>
+#include <libnvpair.h>
+#include <libzfs.h>
+
+#include "zutil_import.h"
+
+#ifdef HAVE_LIBUDEV
+#include <libudev.h>
+#include <sched.h>
+#endif
+#include <blkid/blkid.h>
+
+#define	DEFAULT_IMPORT_PATH_SIZE	9
+#define	DEV_BYID_PATH	"/dev/disk/by-id/"
+
+static boolean_t
+is_watchdog_dev(char *dev)
+{
+	/* For 'watchdog' dev */
+	if (strcmp(dev, "watchdog") == 0)
+		return (B_TRUE);
+
+	/* For 'watchdog<digit><whatever> */
+	if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+int
+zfs_dev_flush(int fd)
+{
+	return (ioctl(fd, BLKFLSBUF));
+}
+
+void
+zpool_open_func(void *arg)
+{
+	rdsk_node_t *rn = arg;
+	libpc_handle_t *hdl = rn->rn_hdl;
+	struct stat64 statbuf;
+	nvlist_t *config;
+	char *bname, *dupname;
+	uint64_t vdev_guid = 0;
+	int error;
+	int num_labels = 0;
+	int fd;
+
+	/*
+	 * Skip devices with well known prefixes there can be side effects
+	 * when opening devices which need to be avoided.
+	 *
+	 * hpet     - High Precision Event Timer
+	 * watchdog - Watchdog must be closed in a special way.
+	 */
+	dupname = zutil_strdup(hdl, rn->rn_name);
+	bname = basename(dupname);
+	error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
+	free(dupname);
+	if (error)
+		return;
+
+	/*
+	 * Ignore failed stats.  We only want regular files and block devices.
+	 */
+	if (stat64(rn->rn_name, &statbuf) != 0 ||
+	    (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
+		return;
+
+	/*
+	 * Preferentially open using O_DIRECT to bypass the block device
+	 * cache which may be stale for multipath devices.  An EINVAL errno
+	 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
+	 */
+	fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
+	if ((fd < 0) && (errno == EINVAL))
+		fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
+	if ((fd < 0) && (errno == EACCES))
+		hdl->lpc_open_access_error = B_TRUE;
+	if (fd < 0)
+		return;
+
+	/*
+	 * This file is too small to hold a zpool
+	 */
+	if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
+		(void) close(fd);
+		return;
+	}
+
+	error = zpool_read_label(fd, &config, &num_labels);
+	if (error != 0) {
+		(void) close(fd);
+		return;
+	}
+
+	if (num_labels == 0) {
+		(void) close(fd);
+		nvlist_free(config);
+		return;
+	}
+
+	/*
+	 * Check that the vdev is for the expected guid.  Additional entries
+	 * are speculatively added based on the paths stored in the labels.
+	 * Entries with valid paths but incorrect guids must be removed.
+	 */
+	error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
+	if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
+		(void) close(fd);
+		nvlist_free(config);
+		return;
+	}
+
+	(void) close(fd);
+
+	rn->rn_config = config;
+	rn->rn_num_labels = num_labels;
+
+	/*
+	 * Add additional entries for paths described by this label.
+	 */
+	if (rn->rn_labelpaths) {
+		char *path = NULL;
+		char *devid = NULL;
+		char *env = NULL;
+		rdsk_node_t *slice;
+		avl_index_t where;
+		int timeout;
+		int error;
+
+		if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
+			return;
+
+		env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
+		if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
+		    timeout < 0) {
+			timeout = DISK_LABEL_WAIT;
+		}
+
+		/*
+		 * Allow devlinks to stabilize so all paths are available.
+		 */
+		zpool_label_disk_wait(rn->rn_name, timeout);
+
+		if (path != NULL) {
+			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+			slice->rn_name = zutil_strdup(hdl, path);
+			slice->rn_vdev_guid = vdev_guid;
+			slice->rn_avl = rn->rn_avl;
+			slice->rn_hdl = hdl;
+			slice->rn_order = IMPORT_ORDER_PREFERRED_1;
+			slice->rn_labelpaths = B_FALSE;
+			pthread_mutex_lock(rn->rn_lock);
+			if (avl_find(rn->rn_avl, slice, &where)) {
+			pthread_mutex_unlock(rn->rn_lock);
+				free(slice->rn_name);
+				free(slice);
+			} else {
+				avl_insert(rn->rn_avl, slice, where);
+				pthread_mutex_unlock(rn->rn_lock);
+				zpool_open_func(slice);
+			}
+		}
+
+		if (devid != NULL) {
+			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+			error = asprintf(&slice->rn_name, "%s%s",
+			    DEV_BYID_PATH, devid);
+			if (error == -1) {
+				free(slice);
+				return;
+			}
+
+			slice->rn_vdev_guid = vdev_guid;
+			slice->rn_avl = rn->rn_avl;
+			slice->rn_hdl = hdl;
+			slice->rn_order = IMPORT_ORDER_PREFERRED_2;
+			slice->rn_labelpaths = B_FALSE;
+			pthread_mutex_lock(rn->rn_lock);
+			if (avl_find(rn->rn_avl, slice, &where)) {
+				pthread_mutex_unlock(rn->rn_lock);
+				free(slice->rn_name);
+				free(slice);
+			} else {
+				avl_insert(rn->rn_avl, slice, where);
+				pthread_mutex_unlock(rn->rn_lock);
+				zpool_open_func(slice);
+			}
+		}
+	}
+}
+
+static char *
+zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
+	"/dev/disk/by-vdev",	/* Custom rules, use first if they exist */
+	"/dev/mapper",		/* Use multipath devices before components */
+	"/dev/disk/by-partlabel", /* Single unique entry set by user */
+	"/dev/disk/by-partuuid", /* Generated partition uuid */
+	"/dev/disk/by-label",	/* Custom persistent labels */
+	"/dev/disk/by-uuid",	/* Single unique entry and persistent */
+	"/dev/disk/by-id",	/* May be multiple entries and persistent */
+	"/dev/disk/by-path",	/* Encodes physical location and persistent */
+	"/dev"			/* UNSAFE device names will change */
+};
+
+const char * const *
+zpool_default_search_paths(size_t *count)
+{
+	*count = DEFAULT_IMPORT_PATH_SIZE;
+	return ((const char * const *)zpool_default_import_path);
+}
+
+/*
+ * Given a full path to a device determine if that device appears in the
+ * import search path.  If it does return the first match and store the
+ * index in the passed 'order' variable, otherwise return an error.
+ */
+static int
+zfs_path_order(char *name, int *order)
+{
+	int i = 0, error = ENOENT;
+	char *dir, *env, *envdup;
+
+	env = getenv("ZPOOL_IMPORT_PATH");
+	if (env) {
+		envdup = strdup(env);
+		dir = strtok(envdup, ":");
+		while (dir) {
+			if (strncmp(name, dir, strlen(dir)) == 0) {
+				*order = i;
+				error = 0;
+				break;
+			}
+			dir = strtok(NULL, ":");
+			i++;
+		}
+		free(envdup);
+	} else {
+		for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
+			if (strncmp(name, zpool_default_import_path[i],
+			    strlen(zpool_default_import_path[i])) == 0) {
+				*order = i;
+				error = 0;
+				break;
+			}
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Use libblkid to quickly enumerate all known zfs devices.
+ */
+int
+zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+    avl_tree_t **slice_cache)
+{
+	rdsk_node_t *slice;
+	blkid_cache cache;
+	blkid_dev_iterate iter;
+	blkid_dev dev;
+	avl_index_t where;
+	int error;
+
+	*slice_cache = NULL;
+
+	error = blkid_get_cache(&cache, NULL);
+	if (error != 0)
+		return (error);
+
+	error = blkid_probe_all_new(cache);
+	if (error != 0) {
+		blkid_put_cache(cache);
+		return (error);
+	}
+
+	iter = blkid_dev_iterate_begin(cache);
+	if (iter == NULL) {
+		blkid_put_cache(cache);
+		return (EINVAL);
+	}
+
+	error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
+	if (error != 0) {
+		blkid_dev_iterate_end(iter);
+		blkid_put_cache(cache);
+		return (error);
+	}
+
+	*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
+	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
+	    offsetof(rdsk_node_t, rn_node));
+
+	while (blkid_dev_next(iter, &dev) == 0) {
+		slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+		slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
+		slice->rn_vdev_guid = 0;
+		slice->rn_lock = lock;
+		slice->rn_avl = *slice_cache;
+		slice->rn_hdl = hdl;
+		slice->rn_labelpaths = B_TRUE;
+
+		error = zfs_path_order(slice->rn_name, &slice->rn_order);
+		if (error == 0)
+			slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
+		else
+			slice->rn_order = IMPORT_ORDER_DEFAULT;
+
+		pthread_mutex_lock(lock);
+		if (avl_find(*slice_cache, slice, &where)) {
+			free(slice->rn_name);
+			free(slice);
+		} else {
+			avl_insert(*slice_cache, slice, where);
+		}
+		pthread_mutex_unlock(lock);
+	}
+
+	blkid_dev_iterate_end(iter);
+	blkid_put_cache(cache);
+
+	return (0);
+}
+
+/*
+ * Linux persistent device strings for vdev labels
+ *
+ * based on libudev for consistency with libudev disk add/remove events
+ */
+
+typedef struct vdev_dev_strs {
+	char	vds_devid[128];
+	char	vds_devphys[128];
+} vdev_dev_strs_t;
+
+#ifdef HAVE_LIBUDEV
+
+/*
+ * Obtain the persistent device id string (describes what)
+ *
+ * used by ZED vdev matching for auto-{online,expand,replace}
+ */
+int
+zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+	struct udev_list_entry *entry;
+	const char *bus;
+	char devbyid[MAXPATHLEN];
+
+	/* The bus based by-id path is preferred */
+	bus = udev_device_get_property_value(dev, "ID_BUS");
+
+	if (bus == NULL) {
+		const char *dm_uuid;
+
+		/*
+		 * For multipath nodes use the persistent uuid based identifier
+		 *
+		 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
+		 */
+		dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
+		if (dm_uuid != NULL) {
+			(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
+			return (0);
+		}
+
+		/*
+		 * For volumes use the persistent /dev/zvol/dataset identifier
+		 */
+		entry = udev_device_get_devlinks_list_entry(dev);
+		while (entry != NULL) {
+			const char *name;
+
+			name = udev_list_entry_get_name(entry);
+			if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+				(void) strlcpy(bufptr, name, buflen);
+				return (0);
+			}
+			entry = udev_list_entry_get_next(entry);
+		}
+
+		/*
+		 * NVME 'by-id' symlinks are similar to bus case
+		 */
+		struct udev_device *parent;
+
+		parent = udev_device_get_parent_with_subsystem_devtype(dev,
+		    "nvme", NULL);
+		if (parent != NULL)
+			bus = "nvme";	/* continue with bus symlink search */
+		else
+			return (ENODATA);
+	}
+
+	/*
+	 * locate the bus specific by-id link
+	 */
+	(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
+	entry = udev_device_get_devlinks_list_entry(dev);
+	while (entry != NULL) {
+		const char *name;
+
+		name = udev_list_entry_get_name(entry);
+		if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
+			name += strlen(DEV_BYID_PATH);
+			(void) strlcpy(bufptr, name, buflen);
+			return (0);
+		}
+		entry = udev_list_entry_get_next(entry);
+	}
+
+	return (ENODATA);
+}
+
+/*
+ * Obtain the persistent physical location string (describes where)
+ *
+ * used by ZED vdev matching for auto-{online,expand,replace}
+ */
+int
+zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+	const char *physpath = NULL;
+	struct udev_list_entry *entry;
+
+	/*
+	 * Normal disks use ID_PATH for their physical path.
+	 */
+	physpath = udev_device_get_property_value(dev, "ID_PATH");
+	if (physpath != NULL && strlen(physpath) > 0) {
+		(void) strlcpy(bufptr, physpath, buflen);
+		return (0);
+	}
+
+	/*
+	 * Device mapper devices are virtual and don't have a physical
+	 * path. For them we use ID_VDEV instead, which is setup via the
+	 * /etc/vdev_id.conf file.  ID_VDEV provides a persistent path
+	 * to a virtual device.  If you don't have vdev_id.conf setup,
+	 * you cannot use multipath autoreplace with device mapper.
+	 */
+	physpath = udev_device_get_property_value(dev, "ID_VDEV");
+	if (physpath != NULL && strlen(physpath) > 0) {
+		(void) strlcpy(bufptr, physpath, buflen);
+		return (0);
+	}
+
+	/*
+	 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
+	 */
+	entry = udev_device_get_devlinks_list_entry(dev);
+	while (entry != NULL) {
+		physpath = udev_list_entry_get_name(entry);
+		if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+			(void) strlcpy(bufptr, physpath, buflen);
+			return (0);
+		}
+		entry = udev_list_entry_get_next(entry);
+	}
+
+	/*
+	 * For all other devices fallback to using the by-uuid name.
+	 */
+	entry = udev_device_get_devlinks_list_entry(dev);
+	while (entry != NULL) {
+		physpath = udev_list_entry_get_name(entry);
+		if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
+			(void) strlcpy(bufptr, physpath, buflen);
+			return (0);
+		}
+		entry = udev_list_entry_get_next(entry);
+	}
+
+	return (ENODATA);
+}
+
+/*
+ * A disk is considered a multipath whole disk when:
+ *	DEVNAME key value has "dm-"
+ *	DM_NAME key value has "mpath" prefix
+ *	DM_UUID key exists
+ *	ID_PART_TABLE_TYPE key does not exist or is not gpt
+ */
+static boolean_t
+udev_mpath_whole_disk(struct udev_device *dev)
+{
+	const char *devname, *type, *uuid;
+
+	devname = udev_device_get_property_value(dev, "DEVNAME");
+	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
+	uuid = udev_device_get_property_value(dev, "DM_UUID");
+
+	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
+	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
+	    (uuid != NULL)) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+static int
+udev_device_is_ready(struct udev_device *dev)
+{
+#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
+	return (udev_device_get_is_initialized(dev));
+#else
+	/* wait for DEVLINKS property to be initialized */
+	return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
+#endif
+}
+
+#else
+
+/* ARGSUSED */
+int
+zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+	return (ENODATA);
+}
+
+/* ARGSUSED */
+int
+zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+	return (ENODATA);
+}
+
+#endif /* HAVE_LIBUDEV */
+
+/*
+ * Wait up to timeout_ms for udev to set up the device node.  The device is
+ * considered ready when libudev determines it has been initialized, all of
+ * the device links have been verified to exist, and it has been allowed to
+ * settle.  At this point the device the device can be accessed reliably.
+ * Depending on the complexity of the udev rules this process could take
+ * several seconds.
+ */
+int
+zpool_label_disk_wait(const char *path, int timeout_ms)
+{
+#ifdef HAVE_LIBUDEV
+	struct udev *udev;
+	struct udev_device *dev = NULL;
+	char nodepath[MAXPATHLEN];
+	char *sysname = NULL;
+	int ret = ENODEV;
+	int settle_ms = 50;
+	long sleep_ms = 10;
+	hrtime_t start, settle;
+
+	if ((udev = udev_new()) == NULL)
+		return (ENXIO);
+
+	start = gethrtime();
+	settle = 0;
+
+	do {
+		if (sysname == NULL) {
+			if (realpath(path, nodepath) != NULL) {
+				sysname = strrchr(nodepath, '/') + 1;
+			} else {
+				(void) usleep(sleep_ms * MILLISEC);
+				continue;
+			}
+		}
+
+		dev = udev_device_new_from_subsystem_sysname(udev,
+		    "block", sysname);
+		if ((dev != NULL) && udev_device_is_ready(dev)) {
+			struct udev_list_entry *links, *link = NULL;
+
+			ret = 0;
+			links = udev_device_get_devlinks_list_entry(dev);
+
+			udev_list_entry_foreach(link, links) {
+				struct stat64 statbuf;
+				const char *name;
+
+				name = udev_list_entry_get_name(link);
+				errno = 0;
+				if (stat64(name, &statbuf) == 0 && errno == 0)
+					continue;
+
+				settle = 0;
+				ret = ENODEV;
+				break;
+			}
+
+			if (ret == 0) {
+				if (settle == 0) {
+					settle = gethrtime();
+				} else if (NSEC2MSEC(gethrtime() - settle) >=
+				    settle_ms) {
+					udev_device_unref(dev);
+					break;
+				}
+			}
+		}
+
+		udev_device_unref(dev);
+		(void) usleep(sleep_ms * MILLISEC);
+
+	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+	udev_unref(udev);
+
+	return (ret);
+#else
+	int settle_ms = 50;
+	long sleep_ms = 10;
+	hrtime_t start, settle;
+	struct stat64 statbuf;
+
+	start = gethrtime();
+	settle = 0;
+
+	do {
+		errno = 0;
+		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
+			if (settle == 0)
+				settle = gethrtime();
+			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
+				return (0);
+		} else if (errno != ENOENT) {
+			return (errno);
+		}
+
+		usleep(sleep_ms * MILLISEC);
+	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+	return (ENODEV);
+#endif /* HAVE_LIBUDEV */
+}
+
+/*
+ * Encode the persistent devices strings
+ * used for the vdev disk label
+ */
+static int
+encode_device_strings(const char *path, vdev_dev_strs_t *ds,
+    boolean_t wholedisk)
+{
+#ifdef HAVE_LIBUDEV
+	struct udev *udev;
+	struct udev_device *dev = NULL;
+	char nodepath[MAXPATHLEN];
+	char *sysname;
+	int ret = ENODEV;
+	hrtime_t start;
+
+	if ((udev = udev_new()) == NULL)
+		return (ENXIO);
+
+	/* resolve path to a runtime device node instance */
+	if (realpath(path, nodepath) == NULL)
+		goto no_dev;
+
+	sysname = strrchr(nodepath, '/') + 1;
+
+	/*
+	 * Wait up to 3 seconds for udev to set up the device node context
+	 */
+	start = gethrtime();
+	do {
+		dev = udev_device_new_from_subsystem_sysname(udev, "block",
+		    sysname);
+		if (dev == NULL)
+			goto no_dev;
+		if (udev_device_is_ready(dev))
+			break;  /* udev ready */
+
+		udev_device_unref(dev);
+		dev = NULL;
+
+		if (NSEC2MSEC(gethrtime() - start) < 10)
+			(void) sched_yield();	/* yield/busy wait up to 10ms */
+		else
+			(void) usleep(10 * MILLISEC);
+
+	} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
+
+	if (dev == NULL)
+		goto no_dev;
+
+	/*
+	 * Only whole disks require extra device strings
+	 */
+	if (!wholedisk && !udev_mpath_whole_disk(dev))
+		goto no_dev;
+
+	ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
+	if (ret != 0)
+		goto no_dev_ref;
+
+	/* physical location string (optional) */
+	if (zfs_device_get_physical(dev, ds->vds_devphys,
+	    sizeof (ds->vds_devphys)) != 0) {
+		ds->vds_devphys[0] = '\0'; /* empty string --> not available */
+	}
+
+no_dev_ref:
+	udev_device_unref(dev);
+no_dev:
+	udev_unref(udev);
+
+	return (ret);
+#else
+	return (ENOENT);
+#endif
+}
+
+/*
+ * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
+ * in the nvlist * (if applicable).  Like:
+ *    vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
+ */
+static void
+update_vdev_config_dev_sysfs_path(nvlist_t *nv, char *path)
+{
+	char *upath, *spath;
+
+	/* Add enclosure sysfs path (if disk is in an enclosure). */
+	upath = zfs_get_underlying_path(path);
+	spath = zfs_get_enclosure_sysfs_path(upath);
+
+	if (spath) {
+		nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath);
+	} else {
+		nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+	}
+
+	free(upath);
+	free(spath);
+}
+
+/*
+ * This will get called for each leaf vdev.
+ */
+static int
+sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
+{
+	char *path = NULL;
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		return (1);
+
+	/* Rescan our enclosure sysfs path for this vdev */
+	update_vdev_config_dev_sysfs_path(nv, path);
+	return (0);
+}
+
+/*
+ * Given an nvlist for our pool (with vdev tree), iterate over all the
+ * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
+ */
+void
+update_vdevs_config_dev_sysfs_path(nvlist_t *config)
+{
+	nvlist_t *nvroot = NULL;
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
+}
+
+/*
+ * Update a leaf vdev's persistent device strings
+ *
+ * - only applies for a dedicated leaf vdev (aka whole disk)
+ * - updated during pool create|add|attach|import
+ * - used for matching device matching during auto-{online,expand,replace}
+ * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
+ * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
+ *
+ * single device node example:
+ * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
+ * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
+ *
+ * multipath device node example:
+ * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
+ *
+ * We also store the enclosure sysfs path for turning on enclosure LEDs
+ * (if applicable):
+ *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
+ */
+void
+update_vdev_config_dev_strs(nvlist_t *nv)
+{
+	vdev_dev_strs_t vds;
+	char *env, *type, *path;
+	uint64_t wholedisk = 0;
+
+	/*
+	 * For the benefit of legacy ZFS implementations, allow
+	 * for opting out of devid strings in the vdev label.
+	 *
+	 * example use:
+	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
+	 *
+	 * explanation:
+	 * Older OpenZFS implementations had issues when attempting to
+	 * display pool config VDEV names if a "devid" NVP value is
+	 * present in the pool's config.
+	 *
+	 * For example, a pool that originated on illumos platform would
+	 * have a devid value in the config and "zpool status" would fail
+	 * when listing the config.
+	 *
+	 * A pool can be stripped of any "devid" values on import or
+	 * prevented from adding them on zpool create|add by setting
+	 * ZFS_VDEV_DEVID_OPT_OUT.
+	 */
+	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
+	if (env && (strtoul(env, NULL, 0) > 0 ||
+	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+		return;
+	}
+
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
+	    strcmp(type, VDEV_TYPE_DISK) != 0) {
+		return;
+	}
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		return;
+	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
+
+	/*
+	 * Update device string values in the config nvlist.
+	 */
+	if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
+		(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
+		if (vds.vds_devphys[0] != '\0') {
+			(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
+			    vds.vds_devphys);
+		}
+		update_vdev_config_dev_sysfs_path(nv, path);
+	} else {
+		/* Clear out any stale entries. */
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+	}
+}

diff --git a/zfs/lib/libzutil/os/linux/zutil_setproctitle.c b/zfs/lib/libzutil/os/linux/zutil_setproctitle.c
new file mode 100644
index 0000000..4a6d12c
--- /dev/null
+++ b/zfs/lib/libzutil/os/linux/zutil_setproctitle.c

@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2013 Guillem Jover <guillem@hadrons.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <err.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/param.h>
+#include <libzutil.h>
+
+static struct {
+	/* Original value. */
+	const char *arg0;
+
+	/* Title space available. */
+	char *base, *end;
+
+	/* Pointer to original nul character within base. */
+	char *nul;
+
+	boolean_t warned;
+	boolean_t reset;
+	int error;
+} SPT;
+
+#define	LIBBSD_IS_PATHNAME_SEPARATOR(c) ((c) == '/')
+#define	SPT_MAXTITLE 255
+
+extern const char *__progname;
+
+static const char *
+getprogname(void)
+{
+	return (__progname);
+}
+
+static void
+setprogname(const char *progname)
+{
+	size_t i;
+
+	for (i = strlen(progname); i > 0; i--) {
+		if (LIBBSD_IS_PATHNAME_SEPARATOR(progname[i - 1])) {
+			__progname = progname + i;
+			return;
+		}
+	}
+	__progname = progname;
+}
+
+
+static inline size_t
+spt_min(size_t a, size_t b)
+{
+	return ((a < b) ? a : b);
+}
+
+/*
+ * For discussion on the portability of the various methods, see
+ * https://lists.freebsd.org/pipermail/freebsd-stable/2008-June/043136.html
+ */
+static int
+spt_clearenv(void)
+{
+	char **tmp;
+
+	tmp = malloc(sizeof (*tmp));
+	if (tmp == NULL)
+		return (errno);
+
+	tmp[0] = NULL;
+	environ = tmp;
+
+	return (0);
+}
+
+static int
+spt_copyenv(int envc, char *envp[])
+{
+	char **envcopy;
+	char *eq;
+	int envsize;
+	int i, error;
+
+	if (environ != envp)
+		return (0);
+
+	/*
+	 * Make a copy of the old environ array of pointers, in case
+	 * clearenv() or setenv() is implemented to free the internal
+	 * environ array, because we will need to access the old environ
+	 * contents to make the new copy.
+	 */
+	envsize = (envc + 1) * sizeof (char *);
+	envcopy = malloc(envsize);
+	if (envcopy == NULL)
+		return (errno);
+	memcpy(envcopy, envp, envsize);
+
+	error = spt_clearenv();
+	if (error) {
+		environ = envp;
+		free(envcopy);
+		return (error);
+	}
+
+	for (i = 0; envcopy[i]; i++) {
+		eq = strchr(envcopy[i], '=');
+		if (eq == NULL)
+			continue;
+
+		*eq = '\0';
+		if (setenv(envcopy[i], eq + 1, 1) < 0)
+			error = errno;
+		*eq = '=';
+
+		if (error) {
+			environ = envp;
+			free(envcopy);
+			return (error);
+		}
+	}
+
+	/*
+	 * Dispose of the shallow copy, now that we've finished transfering
+	 * the old environment.
+	 */
+	free(envcopy);
+
+	return (0);
+}
+
+static int
+spt_copyargs(int argc, char *argv[])
+{
+	char *tmp;
+	int i;
+
+	for (i = 1; i < argc || (i >= argc && argv[i]); i++) {
+		if (argv[i] == NULL)
+			continue;
+
+		tmp = strdup(argv[i]);
+		if (tmp == NULL)
+			return (errno);
+
+		argv[i] = tmp;
+	}
+
+	return (0);
+}
+
+void
+zfs_setproctitle_init(int argc, char *argv[], char *envp[])
+{
+	char *base, *end, *nul, *tmp;
+	int i, envc, error;
+
+	/* Try to make sure we got called with main() arguments. */
+	if (argc < 0)
+		return;
+
+	base = argv[0];
+	if (base == NULL)
+		return;
+
+	nul = base + strlen(base);
+	end = nul + 1;
+
+	for (i = 0; i < argc || (i >= argc && argv[i]); i++) {
+		if (argv[i] == NULL || argv[i] != end)
+			continue;
+
+		end = argv[i] + strlen(argv[i]) + 1;
+	}
+
+	for (i = 0; envp[i]; i++) {
+		if (envp[i] != end)
+			continue;
+
+		end = envp[i] + strlen(envp[i]) + 1;
+	}
+	envc = i;
+
+	SPT.arg0 = strdup(argv[0]);
+	if (SPT.arg0 == NULL) {
+		SPT.error = errno;
+		return;
+	}
+
+	tmp = strdup(getprogname());
+	if (tmp == NULL) {
+		SPT.error = errno;
+		return;
+	}
+	setprogname(tmp);
+
+	error = spt_copyenv(envc, envp);
+	if (error) {
+		SPT.error = error;
+		return;
+	}
+
+	error = spt_copyargs(argc, argv);
+	if (error) {
+		SPT.error = error;
+		return;
+	}
+
+	SPT.nul  = nul;
+	SPT.base = base;
+	SPT.end  = end;
+}
+
+void
+zfs_setproctitle(const char *fmt, ...)
+{
+	/* Use buffer in case argv[0] is passed. */
+	char buf[SPT_MAXTITLE + 1];
+	va_list ap;
+	char *nul;
+	int len;
+	if (SPT.base == NULL) {
+		if (!SPT.warned) {
+			warnx("setproctitle not initialized, please"
+			    "call zfs_setproctitle_init()");
+			SPT.warned = B_TRUE;
+		}
+		return;
+	}
+
+	if (fmt) {
+		if (fmt[0] == '-') {
+			/* Skip program name prefix. */
+			fmt++;
+			len = 0;
+		} else {
+			/* Print program name heading for grep. */
+			snprintf(buf, sizeof (buf), "%s: ", getprogname());
+			len = strlen(buf);
+		}
+
+		va_start(ap, fmt);
+		len += vsnprintf(buf + len, sizeof (buf) - len, fmt, ap);
+		va_end(ap);
+	} else {
+		len = snprintf(buf, sizeof (buf), "%s", SPT.arg0);
+	}
+
+	if (len <= 0) {
+		SPT.error = errno;
+		return;
+	}
+
+	if (!SPT.reset) {
+		memset(SPT.base, 0, SPT.end - SPT.base);
+		SPT.reset = B_TRUE;
+	} else {
+		memset(SPT.base, 0, spt_min(sizeof (buf), SPT.end - SPT.base));
+	}
+
+	len = spt_min(len, spt_min(sizeof (buf), SPT.end - SPT.base) - 1);
+	memcpy(SPT.base, buf, len);
+	nul = SPT.base + len;
+
+	if (nul < SPT.nul) {
+		*SPT.nul = '.';
+	} else if (nul == SPT.nul && nul + 1 < SPT.end) {
+		*SPT.nul = ' ';
+		*++nul = '\0';
+	}
+}

diff --git a/zfs/lib/libzutil/zutil_device_path.c b/zfs/lib/libzutil/zutil_device_path.c
index 1dc0d4d..27ca80e 100644
--- a/zfs/lib/libzutil/zutil_device_path.c
+++ b/zfs/lib/libzutil/zutil_device_path.c

@@ -23,54 +23,13 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
-#include <ctype.h>
 #include <errno.h>
-#include <dirent.h>
-#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <sys/efi_partition.h>
 
 #include <libzutil.h>
-#ifdef HAVE_LIBUDEV
-#include <libudev.h>
-#endif
-
-/*
- * Append partition suffix to an otherwise fully qualified device path.
- * This is used to generate the name the full path as its stored in
- * ZPOOL_CONFIG_PATH for whole disk devices.  On success the new length
- * of 'path' will be returned on error a negative value is returned.
- */
-int
-zfs_append_partition(char *path, size_t max_len)
-{
-	int len = strlen(path);
-
-	if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
-	    (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
-		if (len + 6 >= max_len)
-			return (-1);
-
-		(void) strcat(path, "-part1");
-		len += 6;
-	} else {
-		if (len + 2 >= max_len)
-			return (-1);
-
-		if (isdigit(path[len-1])) {
-			(void) strcat(path, "p1");
-			len += 2;
-		} else {
-			(void) strcat(path, "1");
-			len += 1;
-		}
-	}
-
-	return (len);
-}
 
 /*
  * Given a shorthand device name check if a file by that name exists in any
@@ -213,413 +172,3 @@
 
 	return (0);
 }
-
-/*
- * Allocate and return the underlying device name for a device mapper device.
- * If a device mapper device maps to multiple devices, return the first device.
- *
- * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
- * DM device (like /dev/disk/by-vdev/A0) are also allowed.
- *
- * Returns device name, or NULL on error or no match.  If dm_name is not a DM
- * device then return NULL.
- *
- * NOTE: The returned name string must be *freed*.
- */
-static char *
-dm_get_underlying_path(const char *dm_name)
-{
-	DIR *dp = NULL;
-	struct dirent *ep;
-	char *realp;
-	char *tmp = NULL;
-	char *path = NULL;
-	char *dev_str;
-	int size;
-
-	if (dm_name == NULL)
-		return (NULL);
-
-	/* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
-	realp = realpath(dm_name, NULL);
-	if (realp == NULL)
-		return (NULL);
-
-	/*
-	 * If they preface 'dev' with a path (like "/dev") then strip it off.
-	 * We just want the 'dm-N' part.
-	 */
-	tmp = strrchr(realp, '/');
-	if (tmp != NULL)
-		dev_str = tmp + 1;    /* +1 since we want the chr after '/' */
-	else
-		dev_str = tmp;
-
-	size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
-	if (size == -1 || !tmp)
-		goto end;
-
-	dp = opendir(tmp);
-	if (dp == NULL)
-		goto end;
-
-	/* Return first sd* entry in /sys/block/dm-N/slaves/ */
-	while ((ep = readdir(dp))) {
-		if (ep->d_type != DT_DIR) {	/* skip "." and ".." dirs */
-			size = asprintf(&path, "/dev/%s", ep->d_name);
-			break;
-		}
-	}
-
-end:
-	if (dp != NULL)
-		closedir(dp);
-	free(tmp);
-	free(realp);
-	return (path);
-}
-
-/*
- * Return 1 if device is a device mapper or multipath device.
- * Return 0 if not.
- */
-int
-zfs_dev_is_dm(const char *dev_name)
-{
-
-	char *tmp;
-	tmp = dm_get_underlying_path(dev_name);
-	if (tmp == NULL)
-		return (0);
-
-	free(tmp);
-	return (1);
-}
-
-/*
- * By "whole disk" we mean an entire physical disk (something we can
- * label, toggle the write cache on, etc.) as opposed to the full
- * capacity of a pseudo-device such as lofi or did.  We act as if we
- * are labeling the disk, which should be a pretty good test of whether
- * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
- * it isn't.
- */
-int
-zfs_dev_is_whole_disk(const char *dev_name)
-{
-	struct dk_gpt *label;
-	int fd;
-
-	if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
-		return (0);
-
-	if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
-		(void) close(fd);
-		return (0);
-	}
-
-	efi_free(label);
-	(void) close(fd);
-
-	return (1);
-}
-
-/*
- * Lookup the underlying device for a device name
- *
- * Often you'll have a symlink to a device, a partition device,
- * or a multipath device, and want to look up the underlying device.
- * This function returns the underlying device name.  If the device
- * name is already the underlying device, then just return the same
- * name.  If the device is a DM device with multiple underlying devices
- * then return the first one.
- *
- * For example:
- *
- * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
- * dev_name:	/dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
- * returns:	/dev/sda
- *
- * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
- * dev_name:	/dev/mapper/mpatha
- * returns:	/dev/sda (first device)
- *
- * 3. /dev/sda (already the underlying device)
- * dev_name:	/dev/sda
- * returns:	/dev/sda
- *
- * 4. /dev/dm-3 (mapped to /dev/sda)
- * dev_name:	/dev/dm-3
- * returns:	/dev/sda
- *
- * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
- * dev_name:	/dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
- * returns:	/dev/sdb
- *
- * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
- * dev_name:	/dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
- * returns:	/dev/sda
- *
- * Returns underlying device name, or NULL on error or no match.
- *
- * NOTE: The returned name string must be *freed*.
- */
-char *
-zfs_get_underlying_path(const char *dev_name)
-{
-	char *name = NULL;
-	char *tmp;
-
-	if (dev_name == NULL)
-		return (NULL);
-
-	tmp = dm_get_underlying_path(dev_name);
-
-	/* dev_name not a DM device, so just un-symlinkize it */
-	if (tmp == NULL)
-		tmp = realpath(dev_name, NULL);
-
-	if (tmp != NULL) {
-		name = zfs_strip_partition_path(tmp);
-		free(tmp);
-	}
-
-	return (name);
-}
-
-/*
- * Given a dev name like "sda", return the full enclosure sysfs path to
- * the disk.  You can also pass in the name with "/dev" prepended
- * to it (like /dev/sda).
- *
- * For example, disk "sda" in enclosure slot 1:
- *     dev:            "sda"
- *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
- *
- * 'dev' must be a non-devicemapper device.
- *
- * Returned string must be freed.
- */
-char *
-zfs_get_enclosure_sysfs_path(const char *dev_name)
-{
-	DIR *dp = NULL;
-	struct dirent *ep;
-	char buf[MAXPATHLEN];
-	char *tmp1 = NULL;
-	char *tmp2 = NULL;
-	char *tmp3 = NULL;
-	char *path = NULL;
-	size_t size;
-	int tmpsize;
-
-	if (dev_name == NULL)
-		return (NULL);
-
-	/* If they preface 'dev' with a path (like "/dev") then strip it off */
-	tmp1 = strrchr(dev_name, '/');
-	if (tmp1 != NULL)
-		dev_name = tmp1 + 1;    /* +1 since we want the chr after '/' */
-
-	tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
-	if (tmpsize == -1 || tmp1 == NULL) {
-		tmp1 = NULL;
-		goto end;
-	}
-
-	dp = opendir(tmp1);
-	if (dp == NULL) {
-		tmp1 = NULL;	/* To make free() at the end a NOP */
-		goto end;
-	}
-
-	/*
-	 * Look though all sysfs entries in /sys/block/<dev>/device for
-	 * the enclosure symlink.
-	 */
-	while ((ep = readdir(dp))) {
-		/* Ignore everything that's not our enclosure_device link */
-		if (strstr(ep->d_name, "enclosure_device") == NULL)
-			continue;
-
-		if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
-		    tmp2 == NULL)
-			break;
-
-		size = readlink(tmp2, buf, sizeof (buf));
-
-		/* Did readlink fail or crop the link name? */
-		if (size == -1 || size >= sizeof (buf)) {
-			free(tmp2);
-			tmp2 = NULL;	/* To make free() at the end a NOP */
-			break;
-		}
-
-		/*
-		 * We got a valid link.  readlink() doesn't terminate strings
-		 * so we have to do it.
-		 */
-		buf[size] = '\0';
-
-		/*
-		 * Our link will look like:
-		 *
-		 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
-		 *
-		 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
-		 */
-		tmp3 = strstr(buf, "enclosure");
-		if (tmp3 == NULL)
-			break;
-
-		if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
-			/* If asprintf() fails, 'path' is undefined */
-			path = NULL;
-			break;
-		}
-
-		if (path == NULL)
-			break;
-	}
-
-end:
-	free(tmp2);
-	free(tmp1);
-
-	if (dp != NULL)
-		closedir(dp);
-
-	return (path);
-}
-
-/*
- * Remove partition suffix from a vdev path.  Partition suffixes may take three
- * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
- * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
- * third case only occurs when preceded by a string matching the regular
- * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
- *
- * caller must free the returned string
- */
-char *
-zfs_strip_partition(char *path)
-{
-	char *tmp = strdup(path);
-	char *part = NULL, *d = NULL;
-	if (!tmp)
-		return (NULL);
-
-	if ((part = strstr(tmp, "-part")) && part != tmp) {
-		d = part + 5;
-	} else if ((part = strrchr(tmp, 'p')) &&
-	    part > tmp + 1 && isdigit(*(part-1))) {
-		d = part + 1;
-	} else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
-	    tmp[1] == 'd') {
-		for (d = &tmp[2]; isalpha(*d); part = ++d) { }
-	} else if (strncmp("xvd", tmp, 3) == 0) {
-		for (d = &tmp[3]; isalpha(*d); part = ++d) { }
-	}
-	if (part && d && *d != '\0') {
-		for (; isdigit(*d); d++) { }
-		if (*d == '\0')
-			*part = '\0';
-	}
-
-	return (tmp);
-}
-
-/*
- * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
- *
- * path:	/dev/sda1
- * returns:	/dev/sda
- *
- * Returned string must be freed.
- */
-char *
-zfs_strip_partition_path(char *path)
-{
-	char *newpath = strdup(path);
-	char *sd_offset;
-	char *new_sd;
-
-	if (!newpath)
-		return (NULL);
-
-	/* Point to "sda1" part of "/dev/sda1" */
-	sd_offset = strrchr(newpath, '/') + 1;
-
-	/* Get our new name "sda" */
-	new_sd = zfs_strip_partition(sd_offset);
-	if (!new_sd) {
-		free(newpath);
-		return (NULL);
-	}
-
-	/* Paste the "sda" where "sda1" was */
-	strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
-
-	/* Free temporary "sda" */
-	free(new_sd);
-
-	return (newpath);
-}
-
-#ifdef HAVE_LIBUDEV
-/*
- * A disk is considered a multipath whole disk when:
- *	DEVNAME key value has "dm-"
- *	DM_NAME key value has "mpath" prefix
- *	DM_UUID key exists
- *	ID_PART_TABLE_TYPE key does not exist or is not gpt
- */
-static boolean_t
-udev_mpath_whole_disk(struct udev_device *dev)
-{
-	const char *devname, *type, *uuid;
-
-	devname = udev_device_get_property_value(dev, "DEVNAME");
-	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
-	uuid = udev_device_get_property_value(dev, "DM_UUID");
-
-	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
-	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
-	    (uuid != NULL)) {
-		return (B_TRUE);
-	}
-
-	return (B_FALSE);
-}
-
-/*
- * Check if a disk is effectively a multipath whole disk
- */
-boolean_t
-is_mpath_whole_disk(const char *path)
-{
-	struct udev *udev;
-	struct udev_device *dev = NULL;
-	char nodepath[MAXPATHLEN];
-	char *sysname;
-	boolean_t wholedisk = B_FALSE;
-
-	if (realpath(path, nodepath) == NULL)
-		return (B_FALSE);
-	sysname = strrchr(nodepath, '/') + 1;
-	if (strncmp(sysname, "dm-", 3) != 0)
-		return (B_FALSE);
-	if ((udev = udev_new()) == NULL)
-		return (B_FALSE);
-	if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
-	    sysname)) == NULL) {
-		udev_device_unref(dev);
-		return (B_FALSE);
-	}
-
-	wholedisk = udev_mpath_whole_disk(dev);
-
-	udev_device_unref(dev);
-	return (wholedisk);
-}
-#endif

diff --git a/zfs/lib/libzutil/zutil_import.c b/zfs/lib/libzutil/zutil_import.c
index e84680a..98f1389 100644
--- a/zfs/lib/libzutil/zutil_import.c
+++ b/zfs/lib/libzutil/zutil_import.c

@@ -24,6 +24,7 @@
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright 2015 RackTop Systems.
  * Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 /*
@@ -46,16 +47,12 @@
  * using our derived config, and record the results.
  */
 
+#include <aio.h>
 #include <ctype.h>
-#include <devid.h>
 #include <dirent.h>
 #include <errno.h>
 #include <libintl.h>
 #include <libgen.h>
-#ifdef HAVE_LIBUDEV
-#include <libudev.h>
-#include <sched.h>
-#endif
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -65,36 +62,16 @@
 #include <sys/dktp/fdisk.h>
 #include <sys/vdev_impl.h>
 #include <sys/fs/zfs.h>
-#include <sys/vdev_impl.h>
 
-#include <blkid/blkid.h>
 #include <thread_pool.h>
 #include <libzutil.h>
 #include <libnvpair.h>
 
-#define	IMPORT_ORDER_PREFERRED_1	1
-#define	IMPORT_ORDER_PREFERRED_2	2
-#define	IMPORT_ORDER_SCAN_OFFSET	10
-#define	IMPORT_ORDER_DEFAULT		100
-#define	DEFAULT_IMPORT_PATH_SIZE	9
-
-#define	EZFS_BADCACHE	"invalid or missing cache file"
-#define	EZFS_BADPATH	"must be an absolute path"
-#define	EZFS_NOMEM	"out of memory"
-#define	EZFS_EACESS	"some devices require root privileges"
-
-typedef struct libpc_handle {
-	boolean_t lpc_printerr;
-	boolean_t lpc_open_access_error;
-	boolean_t lpc_desc_active;
-	char lpc_desc[1024];
-	const pool_config_ops_t *lpc_ops;
-	void *lpc_lib_handle;
-} libpc_handle_t;
+#include "zutil_import.h"
 
 /*PRINTFLIKE2*/
 static void
-zfs_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
+zutil_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
 {
 	va_list ap;
 
@@ -107,7 +84,8 @@
 }
 
 static void
-zfs_verror(libpc_handle_t *hdl, const char *error, const char *fmt, va_list ap)
+zutil_verror(libpc_handle_t *hdl, const char *error, const char *fmt,
+    va_list ap)
 {
 	char action[1024];
 
@@ -128,13 +106,13 @@
 
 /*PRINTFLIKE3*/
 static int
-zfs_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...)
+zutil_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
-	zfs_verror(hdl, error, fmt, ap);
+	zutil_verror(hdl, error, fmt, ap);
 
 	va_end(ap);
 
@@ -142,36 +120,36 @@
 }
 
 static int
-zfs_error(libpc_handle_t *hdl, const char *error, const char *msg)
+zutil_error(libpc_handle_t *hdl, const char *error, const char *msg)
 {
-	return (zfs_error_fmt(hdl, error, "%s", msg));
+	return (zutil_error_fmt(hdl, error, "%s", msg));
 }
 
 static int
-no_memory(libpc_handle_t *hdl)
+zutil_no_memory(libpc_handle_t *hdl)
 {
-	zfs_error(hdl, EZFS_NOMEM, "internal error");
+	zutil_error(hdl, EZFS_NOMEM, "internal error");
 	exit(1);
 }
 
-static void *
-zfs_alloc(libpc_handle_t *hdl, size_t size)
+void *
+zutil_alloc(libpc_handle_t *hdl, size_t size)
 {
 	void *data;
 
 	if ((data = calloc(1, size)) == NULL)
-		(void) no_memory(hdl);
+		(void) zutil_no_memory(hdl);
 
 	return (data);
 }
 
-static char *
-zfs_strdup(libpc_handle_t *hdl, const char *str)
+char *
+zutil_strdup(libpc_handle_t *hdl, const char *str)
 {
 	char *ret;
 
 	if ((ret = strdup(str)) == NULL)
-		(void) no_memory(hdl);
+		(void) zutil_no_memory(hdl);
 
 	return (ret);
 }
@@ -210,472 +188,6 @@
 	name_entry_t		*names;
 } pool_list_t;
 
-#define	ZVOL_ROOT	"/dev/zvol"
-#define	DEV_BYID_PATH	"/dev/disk/by-id/"
-
-/*
- * Linux persistent device strings for vdev labels
- *
- * based on libudev for consistency with libudev disk add/remove events
- */
-
-typedef struct vdev_dev_strs {
-	char	vds_devid[128];
-	char	vds_devphys[128];
-} vdev_dev_strs_t;
-
-#ifdef HAVE_LIBUDEV
-/*
- * Obtain the persistent device id string (describes what)
- *
- * used by ZED vdev matching for auto-{online,expand,replace}
- */
-int
-zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
-{
-	struct udev_list_entry *entry;
-	const char *bus;
-	char devbyid[MAXPATHLEN];
-
-	/* The bus based by-id path is preferred */
-	bus = udev_device_get_property_value(dev, "ID_BUS");
-
-	if (bus == NULL) {
-		const char *dm_uuid;
-
-		/*
-		 * For multipath nodes use the persistent uuid based identifier
-		 *
-		 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
-		 */
-		dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
-		if (dm_uuid != NULL) {
-			(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
-			return (0);
-		}
-
-		/*
-		 * For volumes use the persistent /dev/zvol/dataset identifier
-		 */
-		entry = udev_device_get_devlinks_list_entry(dev);
-		while (entry != NULL) {
-			const char *name;
-
-			name = udev_list_entry_get_name(entry);
-			if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
-				(void) strlcpy(bufptr, name, buflen);
-				return (0);
-			}
-			entry = udev_list_entry_get_next(entry);
-		}
-
-		/*
-		 * NVME 'by-id' symlinks are similar to bus case
-		 */
-		struct udev_device *parent;
-
-		parent = udev_device_get_parent_with_subsystem_devtype(dev,
-		    "nvme", NULL);
-		if (parent != NULL)
-			bus = "nvme";	/* continue with bus symlink search */
-		else
-			return (ENODATA);
-	}
-
-	/*
-	 * locate the bus specific by-id link
-	 */
-	(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
-	entry = udev_device_get_devlinks_list_entry(dev);
-	while (entry != NULL) {
-		const char *name;
-
-		name = udev_list_entry_get_name(entry);
-		if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
-			name += strlen(DEV_BYID_PATH);
-			(void) strlcpy(bufptr, name, buflen);
-			return (0);
-		}
-		entry = udev_list_entry_get_next(entry);
-	}
-
-	return (ENODATA);
-}
-
-/*
- * Obtain the persistent physical location string (describes where)
- *
- * used by ZED vdev matching for auto-{online,expand,replace}
- */
-int
-zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
-{
-	const char *physpath = NULL;
-	struct udev_list_entry *entry;
-
-	/*
-	 * Normal disks use ID_PATH for their physical path.
-	 */
-	physpath = udev_device_get_property_value(dev, "ID_PATH");
-	if (physpath != NULL && strlen(physpath) > 0) {
-		(void) strlcpy(bufptr, physpath, buflen);
-		return (0);
-	}
-
-	/*
-	 * Device mapper devices are virtual and don't have a physical
-	 * path. For them we use ID_VDEV instead, which is setup via the
-	 * /etc/vdev_id.conf file.  ID_VDEV provides a persistent path
-	 * to a virtual device.  If you don't have vdev_id.conf setup,
-	 * you cannot use multipath autoreplace with device mapper.
-	 */
-	physpath = udev_device_get_property_value(dev, "ID_VDEV");
-	if (physpath != NULL && strlen(physpath) > 0) {
-		(void) strlcpy(bufptr, physpath, buflen);
-		return (0);
-	}
-
-	/*
-	 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
-	 */
-	entry = udev_device_get_devlinks_list_entry(dev);
-	while (entry != NULL) {
-		physpath = udev_list_entry_get_name(entry);
-		if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
-			(void) strlcpy(bufptr, physpath, buflen);
-			return (0);
-		}
-		entry = udev_list_entry_get_next(entry);
-	}
-
-	/*
-	 * For all other devices fallback to using the by-uuid name.
-	 */
-	entry = udev_device_get_devlinks_list_entry(dev);
-	while (entry != NULL) {
-		physpath = udev_list_entry_get_name(entry);
-		if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
-			(void) strlcpy(bufptr, physpath, buflen);
-			return (0);
-		}
-		entry = udev_list_entry_get_next(entry);
-	}
-
-	return (ENODATA);
-}
-
-/*
- * A disk is considered a multipath whole disk when:
- *	DEVNAME key value has "dm-"
- *	DM_NAME key value has "mpath" prefix
- *	DM_UUID key exists
- *	ID_PART_TABLE_TYPE key does not exist or is not gpt
- */
-static boolean_t
-udev_mpath_whole_disk(struct udev_device *dev)
-{
-	const char *devname, *type, *uuid;
-
-	devname = udev_device_get_property_value(dev, "DEVNAME");
-	type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
-	uuid = udev_device_get_property_value(dev, "DM_UUID");
-
-	if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
-	    ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
-	    (uuid != NULL)) {
-		return (B_TRUE);
-	}
-
-	return (B_FALSE);
-}
-
-static int
-udev_device_is_ready(struct udev_device *dev)
-{
-#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
-	return (udev_device_get_is_initialized(dev));
-#else
-	/* wait for DEVLINKS property to be initialized */
-	return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
-#endif
-}
-#endif /* HAVE_LIBUDEV */
-
-/*
- * Wait up to timeout_ms for udev to set up the device node.  The device is
- * considered ready when libudev determines it has been initialized, all of
- * the device links have been verified to exist, and it has been allowed to
- * settle.  At this point the device the device can be accessed reliably.
- * Depending on the complexity of the udev rules this process could take
- * several seconds.
- */
-int
-zpool_label_disk_wait(const char *path, int timeout_ms)
-{
-#ifdef HAVE_LIBUDEV
-	struct udev *udev;
-	struct udev_device *dev = NULL;
-	char nodepath[MAXPATHLEN];
-	char *sysname = NULL;
-	int ret = ENODEV;
-	int settle_ms = 50;
-	long sleep_ms = 10;
-	hrtime_t start, settle;
-
-	if ((udev = udev_new()) == NULL)
-		return (ENXIO);
-
-	start = gethrtime();
-	settle = 0;
-
-	do {
-		if (sysname == NULL) {
-			if (realpath(path, nodepath) != NULL) {
-				sysname = strrchr(nodepath, '/') + 1;
-			} else {
-				(void) usleep(sleep_ms * MILLISEC);
-				continue;
-			}
-		}
-
-		dev = udev_device_new_from_subsystem_sysname(udev,
-		    "block", sysname);
-		if ((dev != NULL) && udev_device_is_ready(dev)) {
-			struct udev_list_entry *links, *link = NULL;
-
-			ret = 0;
-			links = udev_device_get_devlinks_list_entry(dev);
-
-			udev_list_entry_foreach(link, links) {
-				struct stat64 statbuf;
-				const char *name;
-
-				name = udev_list_entry_get_name(link);
-				errno = 0;
-				if (stat64(name, &statbuf) == 0 && errno == 0)
-					continue;
-
-				settle = 0;
-				ret = ENODEV;
-				break;
-			}
-
-			if (ret == 0) {
-				if (settle == 0) {
-					settle = gethrtime();
-				} else if (NSEC2MSEC(gethrtime() - settle) >=
-				    settle_ms) {
-					udev_device_unref(dev);
-					break;
-				}
-			}
-		}
-
-		udev_device_unref(dev);
-		(void) usleep(sleep_ms * MILLISEC);
-
-	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
-
-	udev_unref(udev);
-
-	return (ret);
-#else
-	int settle_ms = 50;
-	long sleep_ms = 10;
-	hrtime_t start, settle;
-	struct stat64 statbuf;
-
-	start = gethrtime();
-	settle = 0;
-
-	do {
-		errno = 0;
-		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
-			if (settle == 0)
-				settle = gethrtime();
-			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
-				return (0);
-		} else if (errno != ENOENT) {
-			return (errno);
-		}
-
-		usleep(sleep_ms * MILLISEC);
-	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
-
-	return (ENODEV);
-#endif /* HAVE_LIBUDEV */
-}
-
-/*
- * Encode the persistent devices strings
- * used for the vdev disk label
- */
-static int
-encode_device_strings(const char *path, vdev_dev_strs_t *ds,
-    boolean_t wholedisk)
-{
-#ifdef HAVE_LIBUDEV
-	struct udev *udev;
-	struct udev_device *dev = NULL;
-	char nodepath[MAXPATHLEN];
-	char *sysname;
-	int ret = ENODEV;
-	hrtime_t start;
-
-	if ((udev = udev_new()) == NULL)
-		return (ENXIO);
-
-	/* resolve path to a runtime device node instance */
-	if (realpath(path, nodepath) == NULL)
-		goto no_dev;
-
-	sysname = strrchr(nodepath, '/') + 1;
-
-	/*
-	 * Wait up to 3 seconds for udev to set up the device node context
-	 */
-	start = gethrtime();
-	do {
-		dev = udev_device_new_from_subsystem_sysname(udev, "block",
-		    sysname);
-		if (dev == NULL)
-			goto no_dev;
-		if (udev_device_is_ready(dev))
-			break;  /* udev ready */
-
-		udev_device_unref(dev);
-		dev = NULL;
-
-		if (NSEC2MSEC(gethrtime() - start) < 10)
-			(void) sched_yield();	/* yield/busy wait up to 10ms */
-		else
-			(void) usleep(10 * MILLISEC);
-
-	} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
-
-	if (dev == NULL)
-		goto no_dev;
-
-	/*
-	 * Only whole disks require extra device strings
-	 */
-	if (!wholedisk && !udev_mpath_whole_disk(dev))
-		goto no_dev;
-
-	ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
-	if (ret != 0)
-		goto no_dev_ref;
-
-	/* physical location string (optional) */
-	if (zfs_device_get_physical(dev, ds->vds_devphys,
-	    sizeof (ds->vds_devphys)) != 0) {
-		ds->vds_devphys[0] = '\0'; /* empty string --> not available */
-	}
-
-no_dev_ref:
-	udev_device_unref(dev);
-no_dev:
-	udev_unref(udev);
-
-	return (ret);
-#else
-	return (ENOENT);
-#endif
-}
-
-/*
- * Update a leaf vdev's persistent device strings (Linux only)
- *
- * - only applies for a dedicated leaf vdev (aka whole disk)
- * - updated during pool create|add|attach|import
- * - used for matching device matching during auto-{online,expand,replace}
- * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
- * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
- *
- * single device node example:
- * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
- * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
- *
- * multipath device node example:
- * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
- *
- * We also store the enclosure sysfs path for turning on enclosure LEDs
- * (if applicable):
- *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
- */
-void
-update_vdev_config_dev_strs(nvlist_t *nv)
-{
-	vdev_dev_strs_t vds;
-	char *env, *type, *path;
-	uint64_t wholedisk = 0;
-	char *upath, *spath;
-
-	/*
-	 * For the benefit of legacy ZFS implementations, allow
-	 * for opting out of devid strings in the vdev label.
-	 *
-	 * example use:
-	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
-	 *
-	 * explanation:
-	 * Older ZFS on Linux implementations had issues when attempting to
-	 * display pool config VDEV names if a "devid" NVP value is present
-	 * in the pool's config.
-	 *
-	 * For example, a pool that originated on illumos platform would
-	 * have a devid value in the config and "zpool status" would fail
-	 * when listing the config.
-	 *
-	 * A pool can be stripped of any "devid" values on import or
-	 * prevented from adding them on zpool create|add by setting
-	 * ZFS_VDEV_DEVID_OPT_OUT.
-	 */
-	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
-	if (env && (strtoul(env, NULL, 0) > 0 ||
-	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
-		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
-		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
-		return;
-	}
-
-	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
-	    strcmp(type, VDEV_TYPE_DISK) != 0) {
-		return;
-	}
-	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
-		return;
-	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
-
-	/*
-	 * Update device string values in config nvlist
-	 */
-	if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
-		(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
-		if (vds.vds_devphys[0] != '\0') {
-			(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
-			    vds.vds_devphys);
-		}
-
-		/* Add enclosure sysfs path (if disk is in an enclosure) */
-		upath = zfs_get_underlying_path(path);
-		spath = zfs_get_enclosure_sysfs_path(upath);
-		if (spath)
-			nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
-			    spath);
-		else
-			nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
-
-		free(upath);
-		free(spath);
-	} else {
-		/* clear out any stale entries */
-		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
-		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
-		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
-	}
-}
-
 /*
  * Go through and fix up any path and/or devid information for the given vdev
  * configuration.
@@ -753,7 +265,6 @@
 	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
 		return (-1);
 
-	/* Linux only - update ZPOOL_CONFIG_DEVID and ZPOOL_CONFIG_PHYS_PATH */
 	update_vdev_config_dev_strs(nv);
 
 	return (0);
@@ -781,10 +292,10 @@
 	    &state) == 0 &&
 	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
-		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+		if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
 			return (-1);
 
-		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+		if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
 			free(ne);
 			return (-1);
 		}
@@ -826,7 +337,7 @@
 	}
 
 	if (pe == NULL) {
-		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+		if ((pe = zutil_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
 			return (-1);
 		}
 		pe->pe_guid = pool_guid;
@@ -844,7 +355,7 @@
 	}
 
 	if (ve == NULL) {
-		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+		if ((ve = zutil_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
 			return (-1);
 		}
 		ve->ve_guid = top_guid;
@@ -863,7 +374,7 @@
 	}
 
 	if (ce == NULL) {
-		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+		if ((ce = zutil_alloc(hdl, sizeof (config_entry_t))) == NULL) {
 			return (-1);
 		}
 		ce->ce_txg = txg;
@@ -878,10 +389,10 @@
 	 * mappings so that we can fix up the configuration as necessary before
 	 * doing the import.
 	 */
-	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+	if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
 		return (-1);
 
-	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+	if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
 		free(ne);
 		return (-1);
 	}
@@ -896,7 +407,7 @@
 }
 
 static int
-pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
+zutil_pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
     boolean_t *isactive)
 {
 	ASSERT(hdl->lpc_ops->pco_pool_active != NULL);
@@ -908,7 +419,7 @@
 }
 
 static nvlist_t *
-refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig)
+zutil_refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig)
 {
 	ASSERT(hdl->lpc_ops->pco_refresh_config != NULL);
 
@@ -956,11 +467,9 @@
 	uint64_t guid;
 	uint_t children = 0;
 	nvlist_t **child = NULL;
-	uint_t holes;
 	uint64_t *hole_array, max_id;
 	uint_t c;
 	boolean_t isactive;
-	uint64_t hostid;
 	nvlist_t *nvl;
 	boolean_t valid_top_config = B_FALSE;
 
@@ -968,7 +477,8 @@
 		goto nomem;
 
 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
-		uint64_t id, max_txg = 0;
+		uint64_t id, max_txg = 0, hostid = 0;
+		uint_t holes = 0;
 
 		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
 			goto nomem;
@@ -1041,12 +551,14 @@
 				 *	pool guid
 				 *	name
 				 *	comment (if available)
+				 *	compatibility features (if available)
 				 *	pool state
 				 *	hostid (if available)
 				 *	hostname (if available)
 				 */
 				uint64_t state, version;
 				char *comment = NULL;
+				char *compatibility = NULL;
 
 				version = fnvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_VERSION);
@@ -1066,6 +578,13 @@
 					fnvlist_add_string(config,
 					    ZPOOL_CONFIG_COMMENT, comment);
 
+				if (nvlist_lookup_string(tmp,
+				    ZPOOL_CONFIG_COMPATIBILITY,
+				    &compatibility) == 0)
+					fnvlist_add_string(config,
+					    ZPOOL_CONFIG_COMPATIBILITY,
+					    compatibility);
+
 				state = fnvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_POOL_STATE);
 				fnvlist_add_uint64(config,
@@ -1096,7 +615,7 @@
 			if (id >= children) {
 				nvlist_t **newchild;
 
-				newchild = zfs_alloc(hdl, (id + 1) *
+				newchild = zutil_alloc(hdl, (id + 1) *
 				    sizeof (nvlist_t *));
 				if (newchild == NULL)
 					goto nomem;
@@ -1128,7 +647,7 @@
 			} else if (max_id > children) {
 				nvlist_t **newchild;
 
-				newchild = zfs_alloc(hdl, (max_id) *
+				newchild = zutil_alloc(hdl, (max_id) *
 				    sizeof (nvlist_t *));
 				if (newchild == NULL)
 					goto nomem;
@@ -1266,7 +785,7 @@
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 
-		if (pool_active(hdl, name, guid, &isactive) != 0)
+		if (zutil_pool_active(hdl, name, guid, &isactive) != 0)
 			goto error;
 
 		if (isactive) {
@@ -1281,7 +800,7 @@
 				goto nomem;
 		}
 
-		if ((nvl = refresh_config(hdl, config)) == NULL) {
+		if ((nvl = zutil_refresh_config(hdl, config)) == NULL) {
 			nvlist_free(config);
 			config = NULL;
 			continue;
@@ -1346,7 +865,7 @@
 	return (ret);
 
 nomem:
-	(void) no_memory(hdl);
+	(void) zutil_no_memory(hdl);
 error:
 	nvlist_free(config);
 	nvlist_free(ret);
@@ -1369,16 +888,16 @@
 }
 
 /*
- * Given a file descriptor, read the label information and return an nvlist
- * describing the configuration, if there is one.  The number of valid
- * labels found will be returned in num_labels when non-NULL.
+ * The same description applies as to zpool_read_label below,
+ * except here we do it without aio, presumably because an aio call
+ * errored out in a way we think not using it could circumvent.
  */
-int
-zpool_read_label(int fd, nvlist_t **config, int *num_labels)
+static int
+zpool_read_label_slow(int fd, nvlist_t **config, int *num_labels)
 {
 	struct stat64 statbuf;
 	int l, count = 0;
-	vdev_label_t *label;
+	vdev_phys_t *label;
 	nvlist_t *expected_config = NULL;
 	uint64_t expected_guid = 0, size;
 	int error;
@@ -1395,13 +914,14 @@
 
 	for (l = 0; l < VDEV_LABELS; l++) {
 		uint64_t state, guid, txg;
+		off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE;
 
-		if (pread64(fd, label, sizeof (vdev_label_t),
-		    label_offset(size, l)) != sizeof (vdev_label_t))
+		if (pread64(fd, label, sizeof (vdev_phys_t),
+		    offset) != sizeof (vdev_phys_t))
 			continue;
 
-		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
-		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
+		if (nvlist_unpack(label->vp_nvlist,
+		    sizeof (label->vp_nvlist), config, 0) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
@@ -1444,18 +964,137 @@
 	return (0);
 }
 
-typedef struct rdsk_node {
-	char *rn_name;			/* Full path to device */
-	int rn_order;			/* Preferred order (low to high) */
-	int rn_num_labels;		/* Number of valid labels */
-	uint64_t rn_vdev_guid;		/* Expected vdev guid when set */
-	libpc_handle_t *rn_hdl;
-	nvlist_t *rn_config;		/* Label config */
-	avl_tree_t *rn_avl;
-	avl_node_t rn_node;
-	pthread_mutex_t *rn_lock;
-	boolean_t rn_labelpaths;
-} rdsk_node_t;
+/*
+ * Given a file descriptor, read the label information and return an nvlist
+ * describing the configuration, if there is one.  The number of valid
+ * labels found will be returned in num_labels when non-NULL.
+ */
+int
+zpool_read_label(int fd, nvlist_t **config, int *num_labels)
+{
+	struct stat64 statbuf;
+	struct aiocb aiocbs[VDEV_LABELS];
+	struct aiocb *aiocbps[VDEV_LABELS];
+	vdev_phys_t *labels;
+	nvlist_t *expected_config = NULL;
+	uint64_t expected_guid = 0, size;
+	int error, l, count = 0;
+
+	*config = NULL;
+
+	if (fstat64_blk(fd, &statbuf) == -1)
+		return (0);
+	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+	error = posix_memalign((void **)&labels, PAGESIZE,
+	    VDEV_LABELS * sizeof (*labels));
+	if (error)
+		return (-1);
+
+	memset(aiocbs, 0, sizeof (aiocbs));
+	for (l = 0; l < VDEV_LABELS; l++) {
+		off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE;
+
+		aiocbs[l].aio_fildes = fd;
+		aiocbs[l].aio_offset = offset;
+		aiocbs[l].aio_buf = &labels[l];
+		aiocbs[l].aio_nbytes = sizeof (vdev_phys_t);
+		aiocbs[l].aio_lio_opcode = LIO_READ;
+		aiocbps[l] = &aiocbs[l];
+	}
+
+	if (lio_listio(LIO_WAIT, aiocbps, VDEV_LABELS, NULL) != 0) {
+		int saved_errno = errno;
+		boolean_t do_slow = B_FALSE;
+		error = -1;
+
+		if (errno == EAGAIN || errno == EINTR || errno == EIO) {
+			/*
+			 * A portion of the requests may have been submitted.
+			 * Clean them up.
+			 */
+			for (l = 0; l < VDEV_LABELS; l++) {
+				errno = 0;
+				switch (aio_error(&aiocbs[l])) {
+				case EINVAL:
+					break;
+				case EINPROGRESS:
+					// This shouldn't be possible to
+					// encounter, die if we do.
+					ASSERT(B_FALSE);
+					fallthrough;
+				case EOPNOTSUPP:
+				case ENOSYS:
+					do_slow = B_TRUE;
+					fallthrough;
+				case 0:
+				default:
+					(void) aio_return(&aiocbs[l]);
+				}
+			}
+		}
+		if (do_slow) {
+			/*
+			 * At least some IO involved access unsafe-for-AIO
+			 * files. Let's try again, without AIO this time.
+			 */
+			error = zpool_read_label_slow(fd, config, num_labels);
+			saved_errno = errno;
+		}
+		free(labels);
+		errno = saved_errno;
+		return (error);
+	}
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+		uint64_t state, guid, txg;
+
+		if (aio_return(&aiocbs[l]) != sizeof (vdev_phys_t))
+			continue;
+
+		if (nvlist_unpack(labels[l].vp_nvlist,
+		    sizeof (labels[l].vp_nvlist), config, 0) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
+		    &guid) != 0 || guid == 0) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		if (expected_guid) {
+			if (expected_guid == guid)
+				count++;
+
+			nvlist_free(*config);
+		} else {
+			expected_config = *config;
+			expected_guid = guid;
+			count++;
+		}
+	}
+
+	if (num_labels != NULL)
+		*num_labels = count;
+
+	free(labels);
+	*config = expected_config;
+
+	return (0);
+}
 
 /*
  * Sorted by full path and then vdev guid to allow for multiple entries with
@@ -1466,7 +1105,7 @@
  * include overwritten pool labels, devices which are visible from multiple
  * hosts and multipath devices.
  */
-static int
+int
 slice_cache_compare(const void *arg1, const void *arg2)
 {
 	const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
@@ -1475,25 +1114,11 @@
 	uint64_t guid2 = ((rdsk_node_t *)arg2)->rn_vdev_guid;
 	int rv;
 
-	rv = AVL_ISIGN(strcmp(nm1, nm2));
+	rv = TREE_ISIGN(strcmp(nm1, nm2));
 	if (rv)
 		return (rv);
 
-	return (AVL_CMP(guid1, guid2));
-}
-
-static boolean_t
-is_watchdog_dev(char *dev)
-{
-	/* For 'watchdog' dev */
-	if (strcmp(dev, "watchdog") == 0)
-		return (B_TRUE);
-
-	/* For 'watchdog<digit><whatever> */
-	if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
-		return (B_TRUE);
-
-	return (B_FALSE);
+	return (TREE_CMP(guid1, guid2));
 }
 
 static int
@@ -1540,7 +1165,7 @@
  * and store these strings as config_path and devid_path respectively.
  * The returned pointers are only valid as long as label remains valid.
  */
-static int
+int
 label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, char **devid)
 {
 	nvlist_t *nvroot;
@@ -1560,173 +1185,13 @@
 }
 
 static void
-zpool_open_func(void *arg)
-{
-	rdsk_node_t *rn = arg;
-	libpc_handle_t *hdl = rn->rn_hdl;
-	struct stat64 statbuf;
-	nvlist_t *config;
-	char *bname, *dupname;
-	uint64_t vdev_guid = 0;
-	int error;
-	int num_labels = 0;
-	int fd;
-
-	/*
-	 * Skip devices with well known prefixes there can be side effects
-	 * when opening devices which need to be avoided.
-	 *
-	 * hpet     - High Precision Event Timer
-	 * watchdog - Watchdog must be closed in a special way.
-	 */
-	dupname = zfs_strdup(hdl, rn->rn_name);
-	bname = basename(dupname);
-	error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
-	free(dupname);
-	if (error)
-		return;
-
-	/*
-	 * Ignore failed stats.  We only want regular files and block devices.
-	 */
-	if (stat64(rn->rn_name, &statbuf) != 0 ||
-	    (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
-		return;
-
-	/*
-	 * Preferentially open using O_DIRECT to bypass the block device
-	 * cache which may be stale for multipath devices.  An EINVAL errno
-	 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
-	 */
-	fd = open(rn->rn_name, O_RDONLY | O_DIRECT);
-	if ((fd < 0) && (errno == EINVAL))
-		fd = open(rn->rn_name, O_RDONLY);
-
-	if ((fd < 0) && (errno == EACCES))
-		hdl->lpc_open_access_error = B_TRUE;
-
-	if (fd < 0)
-		return;
-
-	/*
-	 * This file is too small to hold a zpool
-	 */
-	if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
-		(void) close(fd);
-		return;
-	}
-
-	error = zpool_read_label(fd, &config, &num_labels);
-	if (error != 0) {
-		(void) close(fd);
-		return;
-	}
-
-	if (num_labels == 0) {
-		(void) close(fd);
-		nvlist_free(config);
-		return;
-	}
-
-	/*
-	 * Check that the vdev is for the expected guid.  Additional entries
-	 * are speculatively added based on the paths stored in the labels.
-	 * Entries with valid paths but incorrect guids must be removed.
-	 */
-	error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
-	if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
-		(void) close(fd);
-		nvlist_free(config);
-		return;
-	}
-
-	(void) close(fd);
-
-	rn->rn_config = config;
-	rn->rn_num_labels = num_labels;
-
-	/*
-	 * Add additional entries for paths described by this label.
-	 */
-	if (rn->rn_labelpaths) {
-		char *path = NULL;
-		char *devid = NULL;
-		char *env = NULL;
-		rdsk_node_t *slice;
-		avl_index_t where;
-		int timeout;
-		int error;
-
-		if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
-			return;
-
-		env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
-		if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
-		    timeout < 0) {
-			timeout = DISK_LABEL_WAIT;
-		}
-
-		/*
-		 * Allow devlinks to stabilize so all paths are available.
-		 */
-		zpool_label_disk_wait(rn->rn_name, timeout);
-
-		if (path != NULL) {
-			slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
-			slice->rn_name = zfs_strdup(hdl, path);
-			slice->rn_vdev_guid = vdev_guid;
-			slice->rn_avl = rn->rn_avl;
-			slice->rn_hdl = hdl;
-			slice->rn_order = IMPORT_ORDER_PREFERRED_1;
-			slice->rn_labelpaths = B_FALSE;
-			pthread_mutex_lock(rn->rn_lock);
-			if (avl_find(rn->rn_avl, slice, &where)) {
-			pthread_mutex_unlock(rn->rn_lock);
-				free(slice->rn_name);
-				free(slice);
-			} else {
-				avl_insert(rn->rn_avl, slice, where);
-				pthread_mutex_unlock(rn->rn_lock);
-				zpool_open_func(slice);
-			}
-		}
-
-		if (devid != NULL) {
-			slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
-			error = asprintf(&slice->rn_name, "%s%s",
-			    DEV_BYID_PATH, devid);
-			if (error == -1) {
-				free(slice);
-				return;
-			}
-
-			slice->rn_vdev_guid = vdev_guid;
-			slice->rn_avl = rn->rn_avl;
-			slice->rn_hdl = hdl;
-			slice->rn_order = IMPORT_ORDER_PREFERRED_2;
-			slice->rn_labelpaths = B_FALSE;
-			pthread_mutex_lock(rn->rn_lock);
-			if (avl_find(rn->rn_avl, slice, &where)) {
-				pthread_mutex_unlock(rn->rn_lock);
-				free(slice->rn_name);
-				free(slice);
-			} else {
-				avl_insert(rn->rn_avl, slice, where);
-				pthread_mutex_unlock(rn->rn_lock);
-				zpool_open_func(slice);
-			}
-		}
-	}
-}
-
-static void
 zpool_find_import_scan_add_slice(libpc_handle_t *hdl, pthread_mutex_t *lock,
     avl_tree_t *cache, const char *path, const char *name, int order)
 {
 	avl_index_t where;
 	rdsk_node_t *slice;
 
-	slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+	slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
 	if (asprintf(&slice->rn_name, "%s/%s", path, name) == -1) {
 		free(slice);
 		return;
@@ -1762,8 +1227,8 @@
 		if (error == ENOENT)
 			return (0);
 
-		zfs_error_aux(hdl, strerror(error));
-		(void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+		zutil_error_aux(hdl, strerror(error));
+		(void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(
 		    TEXT_DOMAIN, "cannot resolve path '%s'"), dir);
 		return (error);
 	}
@@ -1771,8 +1236,8 @@
 	dirp = opendir(path);
 	if (dirp == NULL) {
 		error = errno;
-		zfs_error_aux(hdl, strerror(error));
-		(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+		zutil_error_aux(hdl, strerror(error));
+		(void) zutil_error_fmt(hdl, EZFS_BADPATH,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
 		return (error);
 	}
@@ -1807,8 +1272,8 @@
 	 * whole path because if it's a symlink, we want the
 	 * path of the symlink not where it points to.
 	 */
-	d = zfs_strdup(hdl, dir);
-	b = zfs_strdup(hdl, dir);
+	d = zutil_strdup(hdl, dir);
+	b = zutil_strdup(hdl, dir);
 	dpath = dirname(d);
 	name = basename(b);
 
@@ -1819,8 +1284,8 @@
 			goto out;
 		}
 
-		zfs_error_aux(hdl, strerror(error));
-		(void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+		zutil_error_aux(hdl, strerror(error));
+		(void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(
 		    TEXT_DOMAIN, "cannot resolve path '%s'"), dir);
 		goto out;
 	}
@@ -1838,7 +1303,7 @@
  */
 static int
 zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock,
-    avl_tree_t **slice_cache, char **dir, int dirs)
+    avl_tree_t **slice_cache, const char * const *dir, size_t dirs)
 {
 	avl_tree_t *cache;
 	rdsk_node_t *slice;
@@ -1846,7 +1311,7 @@
 	int i, error;
 
 	*slice_cache = NULL;
-	cache = zfs_alloc(hdl, sizeof (avl_tree_t));
+	cache = zutil_alloc(hdl, sizeof (avl_tree_t));
 	avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t),
 	    offsetof(rdsk_node_t, rn_node));
 
@@ -1858,15 +1323,15 @@
 			if (error == ENOENT)
 				continue;
 
-			zfs_error_aux(hdl, strerror(error));
-			(void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+			zutil_error_aux(hdl, strerror(error));
+			(void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(
 			    TEXT_DOMAIN, "cannot resolve path '%s'"), dir[i]);
 			goto error;
 		}
 
 		/*
 		 * If dir[i] is a directory, we walk through it and add all
-		 * the entry to the cache. If it's not a directory, we just
+		 * the entries to the cache. If it's not a directory, we just
 		 * add it to the cache.
 		 */
 		if (S_ISDIR(sbuf.st_mode)) {
@@ -1894,139 +1359,6 @@
 	return (error);
 }
 
-static char *
-zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
-	"/dev/disk/by-vdev",	/* Custom rules, use first if they exist */
-	"/dev/mapper",		/* Use multipath devices before components */
-	"/dev/disk/by-partlabel", /* Single unique entry set by user */
-	"/dev/disk/by-partuuid", /* Generated partition uuid */
-	"/dev/disk/by-label",	/* Custom persistent labels */
-	"/dev/disk/by-uuid",	/* Single unique entry and persistent */
-	"/dev/disk/by-id",	/* May be multiple entries and persistent */
-	"/dev/disk/by-path",	/* Encodes physical location and persistent */
-	"/dev"			/* UNSAFE device names will change */
-};
-
-const char * const *
-zpool_default_search_paths(size_t *count)
-{
-	*count = DEFAULT_IMPORT_PATH_SIZE;
-	return ((const char * const *)zpool_default_import_path);
-}
-
-/*
- * Given a full path to a device determine if that device appears in the
- * import search path.  If it does return the first match and store the
- * index in the passed 'order' variable, otherwise return an error.
- */
-static int
-zfs_path_order(char *name, int *order)
-{
-	int i = 0, error = ENOENT;
-	char *dir, *env, *envdup;
-
-	env = getenv("ZPOOL_IMPORT_PATH");
-	if (env) {
-		envdup = strdup(env);
-		dir = strtok(envdup, ":");
-		while (dir) {
-			if (strncmp(name, dir, strlen(dir)) == 0) {
-				*order = i;
-				error = 0;
-				break;
-			}
-			dir = strtok(NULL, ":");
-			i++;
-		}
-		free(envdup);
-	} else {
-		for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
-			if (strncmp(name, zpool_default_import_path[i],
-			    strlen(zpool_default_import_path[i])) == 0) {
-				*order = i;
-				error = 0;
-				break;
-			}
-		}
-	}
-
-	return (error);
-}
-
-/*
- * Use libblkid to quickly enumerate all known zfs devices.
- */
-static int
-zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
-    avl_tree_t **slice_cache)
-{
-	rdsk_node_t *slice;
-	blkid_cache cache;
-	blkid_dev_iterate iter;
-	blkid_dev dev;
-	avl_index_t where;
-	int error;
-
-	*slice_cache = NULL;
-
-	error = blkid_get_cache(&cache, NULL);
-	if (error != 0)
-		return (error);
-
-	error = blkid_probe_all_new(cache);
-	if (error != 0) {
-		blkid_put_cache(cache);
-		return (error);
-	}
-
-	iter = blkid_dev_iterate_begin(cache);
-	if (iter == NULL) {
-		blkid_put_cache(cache);
-		return (EINVAL);
-	}
-
-	error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
-	if (error != 0) {
-		blkid_dev_iterate_end(iter);
-		blkid_put_cache(cache);
-		return (error);
-	}
-
-	*slice_cache = zfs_alloc(hdl, sizeof (avl_tree_t));
-	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
-	    offsetof(rdsk_node_t, rn_node));
-
-	while (blkid_dev_next(iter, &dev) == 0) {
-		slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
-		slice->rn_name = zfs_strdup(hdl, blkid_dev_devname(dev));
-		slice->rn_vdev_guid = 0;
-		slice->rn_lock = lock;
-		slice->rn_avl = *slice_cache;
-		slice->rn_hdl = hdl;
-		slice->rn_labelpaths = B_TRUE;
-
-		error = zfs_path_order(slice->rn_name, &slice->rn_order);
-		if (error == 0)
-			slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
-		else
-			slice->rn_order = IMPORT_ORDER_DEFAULT;
-
-		pthread_mutex_lock(lock);
-		if (avl_find(*slice_cache, slice, &where)) {
-			free(slice->rn_name);
-			free(slice);
-		} else {
-			avl_insert(*slice_cache, slice, where);
-		}
-		pthread_mutex_unlock(lock);
-	}
-
-	blkid_dev_iterate_end(iter);
-	blkid_put_cache(cache);
-
-	return (0);
-}
-
 /*
  * Given a list of directories to search, find all pools stored on disk.  This
  * includes partial pools which are not available to import.  If no args are
@@ -2035,7 +1367,8 @@
  * to import a specific pool.
  */
 static nvlist_t *
-zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg)
+zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg,
+    pthread_mutex_t *lock, avl_tree_t *cache)
 {
 	nvlist_t *ret = NULL;
 	pool_list_t pools = { 0 };
@@ -2043,36 +1376,11 @@
 	vdev_entry_t *ve, *venext;
 	config_entry_t *ce, *cenext;
 	name_entry_t *ne, *nenext;
-	pthread_mutex_t lock;
-	avl_tree_t *cache;
 	rdsk_node_t *slice;
 	void *cookie;
 	tpool_t *t;
 
 	verify(iarg->poolname == NULL || iarg->guid == 0);
-	pthread_mutex_init(&lock, NULL);
-
-	/*
-	 * Locate pool member vdevs using libblkid or by directory scanning.
-	 * On success a newly allocated AVL tree which is populated with an
-	 * entry for each discovered vdev will be returned as the cache.
-	 * It's the callers responsibility to consume and destroy this tree.
-	 */
-	if (iarg->scan || iarg->paths != 0) {
-		int dirs = iarg->paths;
-		char **dir = iarg->path;
-
-		if (dirs == 0) {
-			dir = zpool_default_import_path;
-			dirs = DEFAULT_IMPORT_PATH_SIZE;
-		}
-
-		if (zpool_find_import_scan(hdl, &lock, &cache, dir,  dirs) != 0)
-			return (NULL);
-	} else {
-		if (zpool_find_import_blkid(hdl, &lock, &cache) != 0)
-			return (NULL);
-	}
 
 	/*
 	 * Create a thread pool to parallelize the process of reading and
@@ -2136,7 +1444,8 @@
 				 * would prevent a zdb -e of active pools with
 				 * no cachefile.
 				 */
-				fd = open(slice->rn_name, O_RDONLY | O_EXCL);
+				fd = open(slice->rn_name,
+				    O_RDONLY | O_EXCL | O_CLOEXEC);
 				if (fd >= 0 || iarg->can_be_active) {
 					if (fd >= 0)
 						close(fd);
@@ -2152,7 +1461,6 @@
 	}
 	avl_destroy(cache);
 	free(cache);
-	pthread_mutex_destroy(&lock);
 
 	ret = get_configs(hdl, &pools, iarg->can_be_active, iarg->policy);
 
@@ -2180,13 +1488,42 @@
 }
 
 /*
+ * Given a config, discover the paths for the devices which
+ * exist in the config.
+ */
+static int
+discover_cached_paths(libpc_handle_t *hdl, nvlist_t *nv,
+    avl_tree_t *cache, pthread_mutex_t *lock)
+{
+	char *path = NULL;
+	uint_t children;
+	nvlist_t **child;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (int c = 0; c < children; c++) {
+			discover_cached_paths(hdl, child[c], cache, lock);
+		}
+	}
+
+	/*
+	 * Once we have the path, we need to add the directory to
+	 * our directory cache.
+	 */
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+		return (zpool_find_import_scan_dir(hdl, lock, cache,
+		    dirname(path), 0));
+	}
+	return (0);
+}
+
+/*
  * Given a cache file, return the contents as a list of importable pools.
  * poolname or guid (but not both) are provided by the caller when trying
  * to import a specific pool.
  */
 static nvlist_t *
-zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
-    const char *poolname, uint64_t guid)
+zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg)
 {
 	char *buf;
 	int fd;
@@ -2198,24 +1535,24 @@
 	uint64_t this_guid;
 	boolean_t active;
 
-	verify(poolname == NULL || guid == 0);
+	verify(iarg->poolname == NULL || iarg->guid == 0);
 
-	if ((fd = open(cachefile, O_RDONLY)) < 0) {
-		zfs_error_aux(hdl, "%s", strerror(errno));
-		(void) zfs_error(hdl, EZFS_BADCACHE,
+	if ((fd = open(iarg->cachefile, O_RDONLY | O_CLOEXEC)) < 0) {
+		zutil_error_aux(hdl, "%s", strerror(errno));
+		(void) zutil_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
 		return (NULL);
 	}
 
 	if (fstat64(fd, &statbuf) != 0) {
-		zfs_error_aux(hdl, "%s", strerror(errno));
+		zutil_error_aux(hdl, "%s", strerror(errno));
 		(void) close(fd);
-		(void) zfs_error(hdl, EZFS_BADCACHE,
+		(void) zutil_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
 		return (NULL);
 	}
 
-	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
+	if ((buf = zutil_alloc(hdl, statbuf.st_size)) == NULL) {
 		(void) close(fd);
 		return (NULL);
 	}
@@ -2223,7 +1560,7 @@
 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
 		(void) close(fd);
 		free(buf);
-		(void) zfs_error(hdl, EZFS_BADCACHE,
+		(void) zutil_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN,
 		    "failed to read cache file contents"));
 		return (NULL);
@@ -2233,7 +1570,7 @@
 
 	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
 		free(buf);
-		(void) zfs_error(hdl, EZFS_BADCACHE,
+		(void) zutil_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN,
 		    "invalid or corrupt cache file contents"));
 		return (NULL);
@@ -2246,7 +1583,7 @@
 	 * state.
 	 */
 	if (nvlist_alloc(&pools, 0, 0) != 0) {
-		(void) no_memory(hdl);
+		(void) zutil_no_memory(hdl);
 		nvlist_free(raw);
 		return (NULL);
 	}
@@ -2256,14 +1593,14 @@
 		src = fnvpair_value_nvlist(elem);
 
 		name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
-		if (poolname != NULL && strcmp(poolname, name) != 0)
+		if (iarg->poolname != NULL && strcmp(iarg->poolname, name) != 0)
 			continue;
 
 		this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
-		if (guid != 0 && guid != this_guid)
+		if (iarg->guid != 0 && iarg->guid != this_guid)
 			continue;
 
-		if (pool_active(hdl, name, this_guid, &active) != 0) {
+		if (zutil_pool_active(hdl, name, this_guid, &active) != 0) {
 			nvlist_free(raw);
 			nvlist_free(pools);
 			return (NULL);
@@ -2272,22 +1609,86 @@
 		if (active)
 			continue;
 
+		if (iarg->scan) {
+			uint64_t saved_guid = iarg->guid;
+			const char *saved_poolname = iarg->poolname;
+			pthread_mutex_t lock;
+
+			/*
+			 * Create the device cache that will hold the
+			 * devices we will scan based on the cachefile.
+			 * This will get destroyed and freed by
+			 * zpool_find_import_impl.
+			 */
+			avl_tree_t *cache = zutil_alloc(hdl,
+			    sizeof (avl_tree_t));
+			avl_create(cache, slice_cache_compare,
+			    sizeof (rdsk_node_t),
+			    offsetof(rdsk_node_t, rn_node));
+			nvlist_t *nvroot = fnvlist_lookup_nvlist(src,
+			    ZPOOL_CONFIG_VDEV_TREE);
+
+			/*
+			 * We only want to find the pool with this_guid.
+			 * We will reset these values back later.
+			 */
+			iarg->guid = this_guid;
+			iarg->poolname = NULL;
+
+			/*
+			 * We need to build up a cache of devices that exists
+			 * in the paths pointed to by the cachefile. This allows
+			 * us to preserve the device namespace that was
+			 * originally specified by the user but also lets us
+			 * scan devices in those directories in case they had
+			 * been renamed.
+			 */
+			pthread_mutex_init(&lock, NULL);
+			discover_cached_paths(hdl, nvroot, cache, &lock);
+			nvlist_t *nv = zpool_find_import_impl(hdl, iarg,
+			    &lock, cache);
+			pthread_mutex_destroy(&lock);
+
+			/*
+			 * zpool_find_import_impl will return back
+			 * a list of pools that it found based on the
+			 * device cache. There should only be one pool
+			 * since we're looking for a specific guid.
+			 * We will use that pool to build up the final
+			 * pool nvlist which is returned back to the
+			 * caller.
+			 */
+			nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
+			if (pair == NULL)
+				continue;
+			fnvlist_add_nvlist(pools, nvpair_name(pair),
+			    fnvpair_value_nvlist(pair));
+
+			VERIFY3P(nvlist_next_nvpair(nv, pair), ==, NULL);
+
+			iarg->guid = saved_guid;
+			iarg->poolname = saved_poolname;
+			continue;
+		}
+
 		if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE,
-		    cachefile) != 0) {
-			(void) no_memory(hdl);
+		    iarg->cachefile) != 0) {
+			(void) zutil_no_memory(hdl);
 			nvlist_free(raw);
 			nvlist_free(pools);
 			return (NULL);
 		}
 
-		if ((dst = refresh_config(hdl, src)) == NULL) {
+		update_vdevs_config_dev_sysfs_path(src);
+
+		if ((dst = zutil_refresh_config(hdl, src)) == NULL) {
 			nvlist_free(raw);
 			nvlist_free(pools);
 			return (NULL);
 		}
 
 		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
-			(void) no_memory(hdl);
+			(void) zutil_no_memory(hdl);
 			nvlist_free(dst);
 			nvlist_free(raw);
 			nvlist_free(pools);
@@ -2295,11 +1696,51 @@
 		}
 		nvlist_free(dst);
 	}
-
 	nvlist_free(raw);
 	return (pools);
 }
 
+static nvlist_t *
+zpool_find_import(libpc_handle_t *hdl, importargs_t *iarg)
+{
+	pthread_mutex_t lock;
+	avl_tree_t *cache;
+	nvlist_t *pools = NULL;
+
+	verify(iarg->poolname == NULL || iarg->guid == 0);
+	pthread_mutex_init(&lock, NULL);
+
+	/*
+	 * Locate pool member vdevs by blkid or by directory scanning.
+	 * On success a newly allocated AVL tree which is populated with an
+	 * entry for each discovered vdev will be returned in the cache.
+	 * It's the caller's responsibility to consume and destroy this tree.
+	 */
+	if (iarg->scan || iarg->paths != 0) {
+		size_t dirs = iarg->paths;
+		const char * const *dir = (const char * const *)iarg->path;
+
+		if (dirs == 0)
+			dir = zpool_default_search_paths(&dirs);
+
+		if (zpool_find_import_scan(hdl, &lock, &cache,
+		    dir, dirs) != 0) {
+			pthread_mutex_destroy(&lock);
+			return (NULL);
+		}
+	} else {
+		if (zpool_find_import_blkid(hdl, &lock, &cache) != 0) {
+			pthread_mutex_destroy(&lock);
+			return (NULL);
+		}
+	}
+
+	pools = zpool_find_import_impl(hdl, iarg, &lock, cache);
+	pthread_mutex_destroy(&lock);
+	return (pools);
+}
+
+
 nvlist_t *
 zpool_search_import(void *hdl, importargs_t *import,
     const pool_config_ops_t *pco)
@@ -2314,14 +1755,13 @@
 	verify(import->poolname == NULL || import->guid == 0);
 
 	if (import->cachefile != NULL)
-		pools = zpool_find_import_cached(&handle, import->cachefile,
-		    import->poolname, import->guid);
+		pools = zpool_find_import_cached(&handle, import);
 	else
-		pools = zpool_find_import_impl(&handle, import);
+		pools = zpool_find_import(&handle, import);
 
 	if ((pools == NULL || nvlist_empty(pools)) &&
 	    handle.lpc_open_access_error && geteuid() != 0) {
-		(void) zfs_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN,
+		(void) zutil_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN,
 		    "no pools found"));
 	}
 
@@ -2351,17 +1791,14 @@
 	nvlist_t *pools;
 	nvlist_t *match = NULL;
 	nvlist_t *config = NULL;
-	char *name = NULL, *sepp = NULL;
-	char sep = '\0';
+	char *sepp = NULL;
 	int count = 0;
 	char *targetdup = strdup(target);
 
 	*configp = NULL;
 
-	if ((sepp = strpbrk(targetdup, "/@")) != NULL) {
-		sep = *sepp;
+	if ((sepp = strpbrk(targetdup, "/@")) != NULL)
 		*sepp = '\0';
-	}
 
 	pools = zpool_search_import(hdl, args, pco);
 
@@ -2375,11 +1812,11 @@
 					/* multiple matches found */
 					continue;
 				} else {
-					match = config;
-					name = nvpair_name(elem);
+					match = fnvlist_dup(config);
 				}
 			}
 		}
+		fnvlist_free(pools);
 	}
 
 	if (count == 0) {
@@ -2389,6 +1826,7 @@
 
 	if (count > 1) {
 		free(targetdup);
+		fnvlist_free(match);
 		return (EINVAL);
 	}
 
@@ -2397,3 +1835,69 @@
 
 	return (0);
 }
+
+/*
+ * Internal function for iterating over the vdevs.
+ *
+ * For each vdev, func() will be called and will be passed 'zhp' (which is
+ * typically the zpool_handle_t cast as a void pointer), the vdev's nvlist, and
+ * a user-defined data pointer).
+ *
+ * The return values from all the func() calls will be OR'd together and
+ * returned.
+ */
+int
+for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
+    void *data)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	int ret = 0;
+	int i;
+	char *type;
+
+	const char *list[] = {
+	    ZPOOL_CONFIG_SPARES,
+	    ZPOOL_CONFIG_L2CACHE,
+	    ZPOOL_CONFIG_CHILDREN
+	};
+
+	for (i = 0; i < ARRAY_SIZE(list); i++) {
+		if (nvlist_lookup_nvlist_array(nv, list[i], &child,
+		    &children) == 0) {
+			for (c = 0; c < children; c++) {
+				uint64_t ishole = 0;
+
+				(void) nvlist_lookup_uint64(child[c],
+				    ZPOOL_CONFIG_IS_HOLE, &ishole);
+
+				if (ishole)
+					continue;
+
+				ret |= for_each_vdev_cb(zhp, child[c],
+				    func, data);
+			}
+		}
+	}
+
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
+		return (ret);
+
+	/* Don't run our function on root vdevs */
+	if (strcmp(type, VDEV_TYPE_ROOT) != 0) {
+		ret |= func(zhp, nv, data);
+	}
+
+	return (ret);
+}
+
+/*
+ * Given an ZPOOL_CONFIG_VDEV_TREE nvpair, iterate over all the vdevs, calling
+ * func() for each one.  func() is passed the vdev's nvlist and an optional
+ * user-defined 'data' pointer.
+ */
+int
+for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data)
+{
+	return (for_each_vdev_cb(NULL, nvroot, func, data));
+}

diff --git a/zfs/lib/libzutil/zutil_import.h b/zfs/lib/libzutil/zutil_import.h
new file mode 100644
index 0000000..0108eb4
--- /dev/null
+++ b/zfs/lib/libzutil/zutil_import.h

@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+#ifndef _LIBZUTIL_ZUTIL_IMPORT_H_
+#define	_LIBZUTIL_ZUTIL_IMPORT_H_
+
+#define	EZFS_BADCACHE	"invalid or missing cache file"
+#define	EZFS_BADPATH	"must be an absolute path"
+#define	EZFS_NOMEM	"out of memory"
+#define	EZFS_EACESS	"some devices require root privileges"
+
+#define	IMPORT_ORDER_PREFERRED_1	1
+#define	IMPORT_ORDER_PREFERRED_2	2
+#define	IMPORT_ORDER_SCAN_OFFSET	10
+#define	IMPORT_ORDER_DEFAULT		100
+
+typedef struct libpc_handle {
+	boolean_t lpc_printerr;
+	boolean_t lpc_open_access_error;
+	boolean_t lpc_desc_active;
+	char lpc_desc[1024];
+	const pool_config_ops_t *lpc_ops;
+	void *lpc_lib_handle;
+} libpc_handle_t;
+
+
+int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path,
+    char **devid);
+int zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+    avl_tree_t **slice_cache);
+
+void * zutil_alloc(libpc_handle_t *hdl, size_t size);
+char *zutil_strdup(libpc_handle_t *hdl, const char *str);
+
+typedef struct rdsk_node {
+	char *rn_name;			/* Full path to device */
+	int rn_order;			/* Preferred order (low to high) */
+	int rn_num_labels;		/* Number of valid labels */
+	uint64_t rn_vdev_guid;		/* Expected vdev guid when set */
+	libpc_handle_t *rn_hdl;
+	nvlist_t *rn_config;		/* Label config */
+	avl_tree_t *rn_avl;
+	avl_node_t rn_node;
+	pthread_mutex_t *rn_lock;
+	boolean_t rn_labelpaths;
+} rdsk_node_t;
+
+int slice_cache_compare(const void *, const void *);
+
+void zpool_open_func(void *);
+
+#endif /* _LIBZUTIL_ZUTIL_IMPORT_H_ */

diff --git a/zfs/lib/libzutil/zutil_nicenum.c b/zfs/lib/libzutil/zutil_nicenum.c
index 9a81011..4dcac1f 100644
--- a/zfs/lib/libzutil/zutil_nicenum.c
+++ b/zfs/lib/libzutil/zutil_nicenum.c

@@ -23,9 +23,36 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
+#include <ctype.h>
 #include <math.h>
 #include <stdio.h>
 #include <libzutil.h>
+#include <string.h>
+
+/*
+ * Return B_TRUE if "str" is a number string, B_FALSE otherwise.
+ * Works for integer and floating point numbers.
+ */
+boolean_t
+zfs_isnumber(const char *str)
+{
+	if (!*str)
+		return (B_FALSE);
+
+	for (; *str; str++)
+		if (!(isdigit(*str) || (*str == '.')))
+			return (B_FALSE);
+
+	/*
+	 * Numbers should not end with a period ("." ".." or "5." are
+	 * not valid)
+	 */
+	if (str[strlen(str) - 1] == '.') {
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
 
 /*
  * Convert a number to an appropriately human-readable output.

diff --git a/zfs/man/Makefile.am b/zfs/man/Makefile.am
index 841cb9c..64650c2 100644
--- a/zfs/man/Makefile.am
+++ b/zfs/man/Makefile.am

@@ -1 +1,118 @@
-SUBDIRS = man1 man5 man8
+include $(top_srcdir)/config/Substfiles.am
+
+EXTRA_DIST += \
+	man1/cstyle.1
+
+dist_man_MANS = \
+	man1/zhack.1 \
+	man1/ztest.1 \
+	man1/raidz_test.1 \
+	man1/zvol_wait.1 \
+	man1/arcstat.1 \
+	\
+	man5/vdev_id.conf.5 \
+	\
+	man4/spl.4 \
+	man4/zfs.4 \
+	\
+	man7/dracut.zfs.7 \
+	man7/zpool-features.7 \
+	man7/zfsconcepts.7 \
+	man7/zfsprops.7 \
+	man7/zpoolconcepts.7 \
+	man7/zpoolprops.7 \
+	\
+	man8/fsck.zfs.8 \
+	man8/mount.zfs.8 \
+	man8/vdev_id.8 \
+	man8/zdb.8 \
+	man8/zfs.8 \
+	man8/zfs-allow.8 \
+	man8/zfs-bookmark.8 \
+	man8/zfs-change-key.8 \
+	man8/zfs-clone.8 \
+	man8/zfs-create.8 \
+	man8/zfs-destroy.8 \
+	man8/zfs-diff.8 \
+	man8/zfs-get.8 \
+	man8/zfs-groupspace.8 \
+	man8/zfs-hold.8 \
+	man8/zfs-inherit.8 \
+	man8/zfs-jail.8 \
+	man8/zfs-list.8 \
+	man8/zfs-load-key.8 \
+	man8/zfs-mount.8 \
+	man8/zfs-program.8 \
+	man8/zfs-project.8 \
+	man8/zfs-projectspace.8 \
+	man8/zfs-promote.8 \
+	man8/zfs-receive.8 \
+	man8/zfs-recv.8 \
+	man8/zfs-redact.8 \
+	man8/zfs-release.8 \
+	man8/zfs-rename.8 \
+	man8/zfs-rollback.8 \
+	man8/zfs-send.8 \
+	man8/zfs-set.8 \
+	man8/zfs-share.8 \
+	man8/zfs-snapshot.8 \
+	man8/zfs-unallow.8 \
+	man8/zfs-unjail.8 \
+	man8/zfs-unload-key.8 \
+	man8/zfs-unmount.8 \
+	man8/zfs-upgrade.8 \
+	man8/zfs-userspace.8 \
+	man8/zfs-wait.8 \
+	man8/zfs_ids_to_path.8 \
+	man8/zgenhostid.8 \
+	man8/zinject.8 \
+	man8/zpool.8 \
+	man8/zpool-add.8 \
+	man8/zpool-attach.8 \
+	man8/zpool-checkpoint.8 \
+	man8/zpool-clear.8 \
+	man8/zpool-create.8 \
+	man8/zpool-destroy.8 \
+	man8/zpool-detach.8 \
+	man8/zpool-events.8 \
+	man8/zpool-export.8 \
+	man8/zpool-get.8 \
+	man8/zpool-history.8 \
+	man8/zpool-import.8 \
+	man8/zpool-initialize.8 \
+	man8/zpool-iostat.8 \
+	man8/zpool-labelclear.8 \
+	man8/zpool-list.8 \
+	man8/zpool-offline.8 \
+	man8/zpool-online.8 \
+	man8/zpool-reguid.8 \
+	man8/zpool-remove.8 \
+	man8/zpool-reopen.8 \
+	man8/zpool-replace.8 \
+	man8/zpool-resilver.8 \
+	man8/zpool-scrub.8 \
+	man8/zpool-set.8 \
+	man8/zpool-split.8 \
+	man8/zpool-status.8 \
+	man8/zpool-sync.8 \
+	man8/zpool-trim.8 \
+	man8/zpool-upgrade.8 \
+	man8/zpool-wait.8 \
+	man8/zstream.8 \
+	man8/zstreamdump.8 \
+	man8/zpool_influxdb.8
+
+nodist_man_MANS = \
+	man8/zed.8 \
+	man8/zfs-mount-generator.8
+
+SUBSTFILES += $(nodist_man_MANS)
+
+
+if BUILD_LINUX
+# The manual pager in most Linux distros defaults to "BSD" when .Os is blank,
+# but leaving it blank makes things a lot easier on
+# FreeBSD when OpenZFS is vendored in the base system.
+install-data-hook:
+	cd $(DESTDIR)$(mandir) && $(SED) ${ac_inplace} -e 's/^\.Os$$/.Os OpenZFS/' $(dist_man_MANS) $(nodist_man_MANS)
+endif

diff --git a/zfs/man/man1/Makefile.am b/zfs/man/man1/Makefile.am
deleted file mode 100644
index 2af917f..0000000
--- a/zfs/man/man1/Makefile.am
+++ /dev/null

@@ -1,5 +0,0 @@
-dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1
-EXTRA_DIST = cstyle.1
-
-install-data-local:
-	$(INSTALL) -d -m 0755 "$(DESTDIR)$(mandir)/man1"

diff --git a/zfs/man/man1/arcstat.1 b/zfs/man/man1/arcstat.1
new file mode 100644
index 0000000..a69cd89
--- /dev/null
+++ b/zfs/man/man1/arcstat.1

@@ -0,0 +1,184 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright 2014 Adam Stevko.  All rights reserved.
+.\" Copyright (c) 2015 by Delphix. All rights reserved.
+.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
+.\"
+.Dd May 26, 2021
+.Dt ARCSTAT 1
+.Os
+.
+.Sh NAME
+.Nm arcstat
+.Nd report ZFS ARC and L2ARC statistics
+.Sh SYNOPSIS
+.Nm
+.Op Fl havxp
+.Op Fl f Ar field Ns Op , Ns Ar field Ns …
+.Op Fl o Ar file
+.Op Fl s Ar string
+.Op Ar interval
+.Op Ar count
+.
+.Sh DESCRIPTION
+.Nm
+prints various ZFS ARC and L2ARC statistics in vmstat-like fashion:
+.Bl -tag -compact -offset Ds -width "l2asize"
+.It Sy c
+ARC target size
+.It Sy dh%
+Demand data hit percentage
+.It Sy dm%
+Demand data miss percentage
+.It Sy mfu
+MFU list hits per second
+.It Sy mh%
+Metadata hit percentage
+.It Sy mm%
+Metadata miss percentage
+.It Sy mru
+MRU list hits per second
+.It Sy ph%
+Prefetch hits percentage
+.It Sy pm%
+Prefetch miss percentage
+.It Sy dhit
+Demand data hits per second
+.It Sy dmis
+Demand data misses per second
+.It Sy hit%
+ARC hit percentage
+.It Sy hits
+ARC reads per second
+.It Sy mfug
+MFU ghost list hits per second
+.It Sy mhit
+Metadata hits per second
+.It Sy miss
+ARC misses per second
+.It Sy mmis
+Metadata misses per second
+.It Sy mrug
+MRU ghost list hits per second
+.It Sy phit
+Prefetch hits per second
+.It Sy pmis
+Prefetch misses per second
+.It Sy read
+Total ARC accesses per second
+.It Sy time
+Current time
+.It Sy size
+ARC size
+.It Sy arcsz
+Alias for
+.Sy size
+.It Sy dread
+Demand data accesses per second
+.It Sy eskip
+evict_skip per second
+.It Sy miss%
+ARC miss percentage
+.It Sy mread
+Metadata accesses per second
+.It Sy pread
+Prefetch accesses per second
+.It Sy l2hit%
+L2ARC access hit percentage
+.It Sy l2hits
+L2ARC hits per second
+.It Sy l2miss
+L2ARC misses per second
+.It Sy l2read
+Total L2ARC accesses per second
+.It Sy l2pref
+L2ARC prefetch allocated size per second
+.It Sy l2pref%
+L2ARC prefetch allocated size percentage
+.It Sy l2mfu
+L2ARC MFU allocated size per second
+.It Sy l2mfu%
+L2ARC MFU allocated size percentage
+.It Sy l2mru
+L2ARC MRU allocated size per second
+.It Sy l2mru%
+L2ARC MRU allocated size percentage
+.It Sy l2data
+L2ARC data (buf content) allocated size per second
+.It Sy l2data%
+L2ARC data (buf content) allocated size percentage
+.It Sy l2meta
+L2ARC metadata (buf content) allocated size per second
+.It Sy l2meta%
+L2ARC metadata (buf content) allocated size percentage
+.It Sy l2size
+Size of the L2ARC
+.It Sy mtxmis
+mutex_miss per second
+.It Sy l2bytes
+Bytes read per second from the L2ARC
+.It Sy l2miss%
+L2ARC access miss percentage
+.It Sy l2asize
+Actual (compressed) size of the L2ARC
+.It Sy grow
+ARC grow disabled
+.It Sy need
+ARC reclaim needed
+.It Sy free
+The ARC's idea of how much free memory there is, which includes evictable memory in the page cache.
+Since the ARC tries to keep
+.Sy avail
+above zero,
+.Sy avail
+is usually more instructive to observe than
+.Sy free .
+.It Sy avail
+The ARC's idea of how much free memory is available to it, which is a bit less than
+.Sy free .
+May temporarily be negative, in which case the ARC will reduce the target size
+.Sy c .
+.El
+.
+.Sh OPTIONS
+.Bl -tag -width "-v"
+.It Fl a
+Print all possible stats.
+.It Fl f
+Display only specific fields.
+See
+.Sx DESCRIPTION
+for supported statistics.
+.It Fl h
+Display help message.
+.It Fl o
+Report statistics to a file instead of the standard output.
+.It Fl p
+Disable auto-scaling of numerical fields (for raw, machine-parsable values).
+.It Fl s
+Display data with a specified separator (default: 2 spaces).
+.It Fl x
+Print extended stats
+.Pq same as Fl f Sy time , Ns Sy mfu , Ns Sy mru , Ns Sy mfug , Ns Sy mrug , Ns Sy eskip , Ns Sy mtxmis , Ns Sy dread , Ns Sy pread , Ns Sy read .
+.It Fl v
+Show field headers and definitions
+.El
+.
+.Sh OPERANDS
+The following operands are supported:
+.Bl -tag -compact -offset Ds -width "interval"
+.It Ar interval
+Specify the sampling interval in seconds.
+.It Ar count
+Display only
+.Ar count
+reports.
+.El

diff --git a/zfs/man/man1/cstyle.1 b/zfs/man/man1/cstyle.1
index f77d534..f5f9ec7 100644
--- a/zfs/man/man1/cstyle.1
+++ b/zfs/man/man1/cstyle.1

@@ -20,148 +20,141 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.TH cstyle 1 "28 March 2005"
-.SH NAME
-.I cstyle
-\- check for some common stylistic errors in C source files
-.SH SYNOPSIS
-\fBcstyle [-chpvCP] [-o constructs] [file...]\fP
-.LP
-.SH DESCRIPTION
-.IX "OS-Net build tools" "cstyle" "" "\fBcstyle\fP"
-.LP
-.I cstyle
-inspects C source files (*.c and *.h) for common stylistic errors.  It
-attempts to check for the cstyle documented in
-\fIhttp://www.cis.upenn.edu/~lee/06cse480/data/cstyle.ms.pdf\fP.
+.Dd May 26, 2021
+.Dt CSTYLE 1
+.Os
+.
+.Sh NAME
+.Nm cstyle
+.Nd check for some common stylistic errors in C source files
+.Sh SYNOPSIS
+.Nm
+.Op Fl chpvCP
+.Op Fl o Ar construct Ns Op , Ns Ar construct Ns …
+.Oo Ar file Oc Ns …
+.Sh DESCRIPTION
+.Nm
+inspects C source files (*.c and *.h) for common stylistic errors.
+It attempts to check for the cstyle documented in
+.Lk http://www.cis.upenn.edu/~lee/06cse480/data/cstyle.ms.pdf .
 Note that there is much in that document that
-.I cannot
-be checked for; just because your code is \fBcstyle(1)\fP clean does not
-mean that you've followed Sun's C style.  \fICaveat emptor\fP.
-.LP
-.SH OPTIONS
-.LP
+.Em cannot
+be checked for; just because your code is
+.Nm Ns -clean
+does not mean that you've followed Sun's C style.
+.Em Caveat emptor .
+.
+.Sh OPTIONS
 The following options are supported:
-.TP 4
-.B \-c
-Check continuation line indentation inside of functions.  Sun's C style
+.Bl -tag -width "-c"
+.It Fl c
+Check continuation line indentation inside of functions.
+Sun's C style
 states that all statements must be indented to an appropriate tab stop,
-and any continuation lines after them must be indented \fIexactly\fP four
-spaces from the start line.  This option enables a series of checks
-designed to find continuation line problems within functions only.  The
-checks have some limitations;  see CONTINUATION CHECKING, below.
-.LP
-.TP 4
-.B \-h
-Performs heuristic checks that are sometimes wrong.  Not generally used.
-.LP
-.TP 4
-.B \-p
-Performs some of the more picky checks.  Includes ANSI #else and #endif
-rules, and tries to detect spaces after casts.  Used as part of the
-putback checks.
-.LP
-.TP 4
-.B \-v
-Verbose output;  includes the text of the line of error, and, for
-\fB-c\fP, the first statement in the current continuation block.
-.LP
-.TP 4
-.B \-C
+and any continuation lines after them must be indented
+.Em exactly
+four spaces from the start line.
+This option enables a series of checks designed to find
+continuation line problems within functions only.
+The checks have some limitations; see
+.Sy CONTINUATION CHECKING ,
+below.
+.It Fl h
+Performs heuristic checks that are sometimes wrong.
+Not generally used.
+.It Fl p
+Performs some of the more picky checks.
+Includes ANSI
+.Sy #else
+and
+.Sy #endif
+rules, and tries to detect spaces after casts.
+Used as part of the putback checks.
+.It Fl v
+Verbose output; includes the text of the line of error, and, for
+.Fl c ,
+the first statement in the current continuation block.
+.It Fl C
 Ignore errors in header comments (i.e. block comments starting in the
-first column).  Not generally used.
-.LP
-.TP 4
-.B \-P
-Check for use of non-POSIX types.  Historically, types like "u_int" and
-"u_long" were used, but they are now deprecated in favor of the POSIX
-types uint_t, ulong_t, etc.  This detects any use of the deprecated
-types.  Used as part of the putback checks.
-.LP
-.TP 4
-.B \-o \fIconstructs\fP
-Allow a comma-separated list of additional constructs.  Available
-constructs include:
-.LP
-.TP 10
-.B doxygen
-Allow doxygen-style block comments (\fB/**\fP and \fB/*!\fP)
-.LP
-.TP 10
-.B splint
-Allow splint-style lint comments (\fB/*@...@*/\fP)
-.LP
-.SH NOTES
-.LP
-The cstyle rule for the OS/Net consolidation is that all new files must
-be \fB-pP\fP clean.  For existing files, the following invocations are
-run against both the old and new files:
-.LP
-.TP 4
-\fBcstyle file\fB
-.LP
-.TP 4
-\fBcstyle -p file\fB
-.LP
-.TP 4
-\fBcstyle -pP file\fB
-.LP
-If the old file gave no errors for one of the invocations, the new file
-must also give no errors.  This way, files can only become more clean.
-.LP
-.SH CONTINUATION CHECKING
-.LP
+first column).
+Not generally used.
+.It Fl P
+Check for use of non-POSIX types.
+Historically, types like
+.Sy u_int
+and
+.Sy u_long
+were used, but they are now deprecated in favor of the POSIX
+types
+.Sy uint_t ,
+.Sy ulong_t ,
+etc.
+This detects any use of the deprecated types.
+Used as part of the putback checks.
+.It Fl o Ar construct Ns Op , Ns Ar construct Ns …
+Available constructs include:
+.Bl -tag -compact -width "doxygen"
+.It Sy doxygen
+Allow doxygen-style block comments
+.Pq Sy /** No and Sy /*!\& .
+.It Sy splint
+Allow splint-style lint comments
+.Pq Sy /*@ Ns ... Ns Sy @*/ .
+.El
+.El
+.
+.Sh CONTINUATION CHECKING
 The continuation checker is a reasonably simple state machine that knows
 something about how C is laid out, and can match parenthesis, etc. over
-multiple lines.  It does have some limitations:
-.LP
-.TP 4
-.B 1.
+multiple lines.
+It does have some limitations:
+.Bl -enum
+.It
 Preprocessor macros which cause unmatched parenthesis will confuse the
-checker for that line.  To fix this, you'll need to make sure that each
-branch of the #if statement has balanced parenthesis.
-.LP
-.TP 4
-.B 2.
-Some \fBcpp\fP macros do not require ;s after them.  Any such macros
-*must* be ALL_CAPS; any lower case letters will cause bad output.
-.LP
-The bad output will generally be corrected after the next \fB;\fP,
-\fB{\fP, or \fB}\fP.
-.LP
-Some continuation error messages deserve some additional explanation
-.LP
-.TP 4
-.B
-multiple statements continued over multiple lines
-A multi-line statement which is not broken at statement
-boundaries.  For example:
-.RS 4
-.HP 4
+checker for that line.
+To fix this, you'll need to make sure that each branch of the
+.Sy #if
+statement has balanced parenthesis.
+.It
+Some
+.Xr cpp 1
+macros do not require
+.Sy ;\& Ns s after them.
+Any such macros
+.Em must
+be ALL_CAPS; any lower case letters will cause bad output.
+.Pp
+The bad output will generally be corrected after the next
+.Sy ;\& , { , No or Sy } .
+.El
+Some continuation error messages deserve some additional explanation:
+.Bl -tag -width Ds
+.It Sy multiple statements continued over multiple lines
+A multi-line statement which is not broken at statement boundaries.
+For example:
+.Bd -literal -compact -offset Ds
 if (this_is_a_long_variable == another_variable) a =
-.br
-b + c;
-.LP
-Will trigger this error.  Instead, do:
-.HP 8
+    b + c;
+.Ed
+.Pp
+Will trigger this error.
+Instead, do:
+.Bd -literal -compact -offset Ds
 if (this_is_a_long_variable == another_variable)
-.br
-a = b + c;
-.RE
-.LP
-.TP 4
-.B
-empty if/for/while body not on its own line
+    a = b + c;
+.Ed
+.It Sy empty if/for/while body not on its own line
 For visibility, empty bodies for if, for, and while statements should be
-on their own line.  For example:
-.RS 4
-.HP 4
+on their own line.
+For example:
+.Bd -literal -compact -offset Ds
 while (do_something(&x) == 0);
-.LP
-Will trigger this error.  Instead, do:
-.HP 8
+.Ed
+.Pp
+Will trigger this error.
+Instead, do:
+.Bd -literal -compact -offset Ds
 while (do_something(&x) == 0)
-.br
-;
-.RE
-
+    ;
+.Ed
+.El

diff --git a/zfs/man/man1/raidz_test.1 b/zfs/man/man1/raidz_test.1
index 423177a..4283a4b 100644
--- a/zfs/man/man1/raidz_test.1
+++ b/zfs/man/man1/raidz_test.1

@@ -1,4 +1,3 @@
-'\" t
 .\"
 .\" CDDL HEADER START
 .\"
@@ -19,79 +18,84 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright (c) 2016 Gvozden Nešković. All rights reserved.
 .\"
-.TH raidz_test 1 "2016" "ZFS on Linux" "User Commands"
-
-.SH NAME
-\fBraidz_test\fR \- raidz implementation verification and benchmarking tool
-.SH SYNOPSIS
-.LP
-.BI "raidz_test <options>"
-.SH DESCRIPTION
-.LP
-This manual page documents briefly the \fBraidz_test\fR command.
-.LP
-Purpose of this tool is to run all supported raidz implementation and verify
-results of all methods. Tool also contains a parameter sweep option where all
-parameters affecting RAIDZ block are verified (like ashift size, data offset,
-data size, etc...).
-The tool also supports a benchmarking mode using -B option.
-.SH OPTION
-.HP
-.BI "\-h" ""
-.IP
+.Dd May 26, 2021
+.Dt RAIDZ_TEST 1
+.Os
+.
+.Sh NAME
+.Nm raidz_test
+.Nd raidz implementation verification and benchmarking tool
+.Sh SYNOPSIS
+.Nm
+.Op Fl StBevTD
+.Op Fl a Ar ashift
+.Op Fl o Ar zio_off_shift
+.Op Fl d Ar raidz_data_disks
+.Op Fl s Ar zio_size_shift
+.Op Fl r Ar reflow_offset
+.
+.Sh DESCRIPTION
+The purpose of this tool is to run all supported raidz implementation and verify
+the results of all methods.
+It also contains a parameter sweep option where all
+parameters affecting a RAIDZ block are verified (like ashift size, data offset,
+data size, etc.).
+The tool also supports a benchmarking mode using the
+.Fl B
+option.
+.
+.Sh OPTION
+.Bl -tag -width "-B(enchmark)"
+.It Fl h
 Print a help summary.
-.HP
-.BI "\-a" " ashift (default: 9)"
-.IP
+.It Fl a Ar ashift Pq default: Sy 9
 Ashift value.
-.HP
-.BI "\-o" " zio_off_shift" " (default: 0)"
-.IP
-Zio offset for raidz block. Offset value is 1 << (zio_off_shift)
-.HP
-.BI "\-d" " raidz_data_disks" " (default: 8)"
-.IP
-Number of raidz data disks to use. Additional disks for parity will be used
-during testing.
-.HP
-.BI "\-s" " zio_size_shift" " (default: 19)"
-.IP
-Size of data for raidz block. Size is 1 << (zio_size_shift).
-.HP
-.BI "\-S(weep)"
-.IP
-Sweep parameter space while verifying the raidz implementations. This option
-will exhaust all most of valid values for -a -o -d -s options. Runtime using
-this option will be long.
-.HP
-.BI "\-t(imeout)"
-.IP
-Wall time for sweep test in seconds. The actual runtime could be longer.
-.HP
-.BI "\-B(enchmark)"
-.IP
-This options starts the benchmark mode. All implementations are benchmarked
-using increasing per disk data size. Results are given as throughput per disk,
-measured in MiB/s.
-.HP
-.BI "\-v(erbose)"
-.IP
+.It Fl o Ar zio_off_shift Pq default: Sy 0
+ZIO offset for each raidz block.
+The offset's value is
+.Em 2^zio_off_shift .
+.It Fl d Ar raidz_data_disks Pq default: Sy 8
+Number of raidz data disks to use.
+Additional disks will be used for parity.
+.It Fl s Ar zio_size_shift Pq default: Sy 19
+Size of data for raidz block.
+The real size is
+.Em 2^zio_size_shift .
+.It Fl r Ar reflow_offset Pq default: Sy uint max
+Set raidz expansion offset.
+The expanded raidz map allocation function will
+produce different map configurations depending on this value.
+.It Fl S Ns Pq weep
+Sweep parameter space while verifying the raidz implementations.
+This option
+will exhaust all most of valid values for the
+.Fl aods
+options.
+Runtime using this option will be long.
+.It Fl t Ns Pq imeout
+Wall time for sweep test in seconds.
+The actual runtime could be longer.
+.It Fl B Ns Pq enchmark
+All implementations are benchmarked using increasing per disk data size.
+Results are given as throughput per disk, measured in MiB/s.
+.It Fl e Ns Pq xpansion
+Use expanded raidz map allocation function.
+.It Fl v Ns Pq erbose
 Increase verbosity.
-.HP
-.BI "\-T(est the test)"
-.IP
-Debugging option. When this option is specified tool is supposed to fail
-all tests. This is to check if tests would properly verify bit-exactness.
-.HP
-.BI "\-D(ebug)"
-.IP
-Debugging option. Specify to attach gdb when SIGSEGV or SIGABRT are received.
-.HP
-
-.SH "SEE ALSO"
-.BR "ztest (1)"
-.SH "AUTHORS"
-vdev_raidz, created for ZFS on Linux by Gvozden Nešković <neskovic@gmail.com>
+.It Fl T Ns Pq est the test
+Debugging option: fail all tests.
+This is to check if tests would properly verify bit-exactness.
+.It Fl D Ns Pq ebug
+Debugging option: attach
+.Xr gdb 1
+when
+.Sy SIGSEGV
+or
+.Sy SIGABRT
+are received.
+.El
+.
+.Sh "SEE ALSO"
+.Xr ztest 1

diff --git a/zfs/man/man1/zhack.1 b/zfs/man/man1/zhack.1
index 11d300b..83046ee 100644
--- a/zfs/man/man1/zhack.1
+++ b/zfs/man/man1/zhack.1

@@ -1,4 +1,3 @@
-'\" t
 .\"
 .\" CDDL HEADER START
 .\"
@@ -19,63 +18,108 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
 .\"
-.TH zhack 1 "2013 MAR 16" "ZFS on Linux" "User Commands"
-
-.SH NAME
-zhack \- libzpool debugging tool
-.SH DESCRIPTION
+.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
+.\"
+.Dd May 26, 2021
+.Dt ZHACK 1
+.Os
+.
+.Sh NAME
+.Nm zhack
+.Nd libzpool debugging tool
+.Sh DESCRIPTION
 This utility pokes configuration changes directly into a ZFS pool,
 which is dangerous and can cause data corruption.
-.SH SYNOPSIS
-.LP
-.BI "zhack [\-c " "cachefile" "] [\-d " "dir" "] <" "subcommand" "> [" "arguments" "]"
-.SH OPTIONS
-.HP
-.BI "\-c" " cachefile"
-.IP
-Read the \fIpool\fR configuration from the \fIcachefile\fR, which is
-/etc/zfs/zpool.cache by default.
-.HP
-.BI "\-d" " dir"
-.IP
-Search for \fIpool\fR members in the \fIdir\fR path. Can be specified
-more than once.
-.SH SUBCOMMANDS
-.LP
-.BI "feature stat " "pool"
-.IP
+.Sh SYNOPSIS
+.Bl -tag -width Ds
+.It Xo
+.Nm zhack
+.Cm feature stat
+.Ar pool
+.Xc
 List feature flags.
-.LP
-.BI "feature enable [\-d " "description" "] [\-r] " "pool guid"
-.IP
-Add a new feature to \fIpool\fR that is uniquely identified by
-\fIguid\fR, which is specified in the same form as a zfs(8) user
-property.
-.IP
-The \fIdescription\fR is a short human readable explanation of the new
+.
+.It Xo
+.Nm zhack
+.Cm feature enable
+.Op Fl d Ar description
+.Op Fl r
+.Ar pool
+.Ar guid
+.Xc
+Add a new feature to
+.Ar pool
+that is uniquely identified by
+.Ar guid ,
+which is specified in the same form as a
+.Xr zfs 8
+user property.
+.Pp
+The
+.Ar description
+is a short human readable explanation of the new feature.
+.Pp
+The
+.Fl r
+flag indicates that
+.Ar pool
+can be safely opened in read-only mode by a system that does not understand the
+.Ar guid
 feature.
-.IP
-The \fB\-r\fR switch indicates that \fIpool\fR can be safely opened
-in read-only mode by a system that does not have the \fIguid\fR
-feature.
-.LP
-.BI "feature ref [\-d|\-m] " "pool guid"
-.IP
-Increment the reference count of the \fIguid\fR feature in \fIpool\fR.
-.IP
-The \fB\-d\fR switch decrements the reference count of the \fIguid\fR
-feature in \fIpool\fR.
-.IP
-The \fB\-m\fR switch indicates that the \fIguid\fR feature is now
-required to read the pool MOS.
-.SH EXAMPLES
-.LP
-.nf
-# zhack feature stat tank
-
+.
+.It Xo
+.Nm zhack
+.Cm feature ref
+.Op Fl d Ns | Ns Fl m
+.Ar pool
+.Ar guid
+.Xc
+Increment the reference count of the
+.Ar guid
+feature in
+.Ar pool .
+.Pp
+The
+.Fl d
+flag decrements the reference count of the
+.Ar guid
+feature in
+.Ar pool
+instead.
+.Pp
+The
+.Fl m
+flag indicates that the
+.Ar guid
+feature is now required to read the pool MOS.
+.El
+.
+.Sh GLOBAL OPTIONS
+The following can be passed to all
+.Nm
+invocations before any subcommand:
+.Bl -tag -width "-d dir"
+.It Fl c Ar cachefile
+Read
+.Ar pool
+configuration from the
+.Ar cachefile ,
+which is
+.Pa /etc/zfs/zpool.cache
+by default.
+.It Fl d Ar dir
+Search for
+.Ar pool
+members in
+.Ar dir .
+Can be specified more than once.
+.El
+.
+.Sh EXAMPLES
+.Bd -literal
+.No # Nm zhack Cm feature stat Ar tank
 for_read_obj:
 	org.illumos:lz4_compress = 0
 for_write_obj:
@@ -85,14 +129,12 @@
 	com.delphix:async_destroy = Destroy filesystems asynchronously.
 	com.delphix:empty_bpobj = Snapshots use less space.
 	org.illumos:lz4_compress = LZ4 compression algorithm support.
-.LP
-# zhack feature enable -d 'Predict future disk failures.' \\
-    tank com.example:clairvoyance
-.LP
-# zhack feature ref tank com.example:clairvoyance
-.SH AUTHORS
-This man page was written by Darik Horn <dajhorn@vanadac.com>.
-.SH SEE ALSO
-.BR zfs (8),
-.BR zpool-features (5),
-.BR ztest (1)
+
+.No # Nm zhack Cm feature enable Fl d No 'Predict future disk failures.' Ar tank com.example:clairvoyance
+.No # Nm zhack Cm feature ref Ar tank com.example:clairvoyance
+.Ed
+.
+.Sh SEE ALSO
+.Xr ztest 1 ,
+.Xr zpool-features 7 ,
+.Xr zfs 8

diff --git a/zfs/man/man1/ztest.1 b/zfs/man/man1/ztest.1
index 84e56c8..fd1374a 100644
--- a/zfs/man/man1/ztest.1
+++ b/zfs/man/man1/ztest.1

@@ -1,4 +1,3 @@
-'\" t
 .\"
 .\" CDDL HEADER START
 .\"
@@ -19,161 +18,216 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright (c) 2009 Oracle and/or its affiliates. All rights reserved.
 .\" Copyright (c) 2009 Michael Gebetsroither <michael.geb@gmx.at>. All rights
 .\" reserved.
+.\" Copyright (c) 2017, Intel Corporation.
 .\"
-.TH ztest 1 "2009 NOV 01" "ZFS on Linux" "User Commands"
-
-.SH NAME
-\fBztest\fR \- was written by the ZFS Developers as a ZFS unit test.
-.SH SYNOPSIS
-.LP
-.BI "ztest <options>"
-.SH DESCRIPTION
-.LP
-This manual page documents briefly the \fBztest\fR command.
-.LP
-\fBztest\fR was written by the ZFS Developers as a ZFS unit test. The
-tool was developed in tandem with the ZFS functionality and was
-executed nightly as one of the many regression test against the daily
-build. As features were added to ZFS, unit tests were also added to
-\fBztest\fR.  In addition, a separate test development team wrote and
+.Dd May 26, 2021
+.Dt ZTEST 1
+.Os
+.
+.Sh NAME
+.Nm ztest
+.Nd was written by the ZFS Developers as a ZFS unit test
+.Sh SYNOPSIS
+.Nm
+.Op Fl VEG
+.Op Fl v Ar vdevs
+.Op Fl s Ar size_of_each_vdev
+.Op Fl a Ar alignment_shift
+.Op Fl m Ar mirror_copies
+.Op Fl r Ar raidz_disks/draid_disks
+.Op Fl R Ar raid_parity
+.Op Fl K Ar raid_kind
+.Op Fl D Ar draid_data
+.Op Fl S Ar draid_spares
+.Op Fl C Ar vdev_class_state
+.Op Fl d Ar datasets
+.Op Fl t Ar threads
+.Op Fl g Ar gang_block_threshold
+.Op Fl i Ar initialize_pool_i_times
+.Op Fl k Ar kill_percentage
+.Op Fl p Ar pool_name
+.Op Fl T Ar time
+.Op Fl z Ar zil_failure_rate
+.
+.Sh DESCRIPTION
+.Nm
+was written by the ZFS Developers as a ZFS unit test.
+The tool was developed in tandem with the ZFS functionality and was
+executed nightly as one of the many regression test against the daily build.
+As features were added to ZFS, unit tests were also added to
+.Nm .
+In addition, a separate test development team wrote and
 executed more functional and stress tests.
-.LP
-By default \fBztest\fR runs for ten minutes and uses block files
-(stored in /tmp) to create pools rather than using physical disks.
-Block files afford \fBztest\fR its flexibility to play around with
+.
+.Pp
+By default
+.Nm
+runs for ten minutes and uses block files
+(stored in
+.Pa /tmp )
+to create pools rather than using physical disks.
+Block files afford
+.Nm
+its flexibility to play around with
 zpool components without requiring large hardware configurations.
-However, storing the block files in /tmp may not work for you if you
+However, storing the block files in
+.Pa /tmp
+may not work for you if you
 have a small tmp directory.
-.LP
-By default is non-verbose. This is why entering the command above will
-result in \fBztest\fR quietly executing for 5 minutes. The -V option
-can be used to increase the verbosity of the tool. Adding multiple -V
-option is allowed and the more you add the more chatty \fBztest\fR
+.
+.Pp
+By default is non-verbose.
+This is why entering the command above will result in
+.Nm
+quietly executing for 5 minutes.
+The
+.Fl V
+option can be used to increase the verbosity of the tool.
+Adding multiple
+.Fl V
+options is allowed and the more you add the more chatty
+.Nm
 becomes.
-.LP
-After the \fBztest\fR run completes, you should notice many ztest.*
-files lying around. Once the run completes you can safely remove these
-files. Note that you shouldn't remove these files during a run. You
-can re-use these files in your next \fBztest\fR run by using the -E
+.
+.Pp
+After the
+.Nm
+run completes, you should notice many
+.Pa ztest.*
+files lying around.
+Once the run completes you can safely remove these files.
+Note that you shouldn't remove these files during a run.
+You can re-use these files in your next
+.Nm
+run by using the
+.Fl E
 option.
-.SH OPTIONS
-.HP
-.BI "\-?" ""
-.IP
+.
+.Sh OPTIONS
+.Bl -tag -width "-v v"
+.It Fl h , \&? , -help
 Print a help summary.
-.HP
-.BI "\-v" " vdevs" " (default: 5)
-.IP
+.It Fl v , -vdevs Ns = (default: Sy 5 )
 Number of vdevs.
-.HP
-.BI "\-s" " size_of_each_vdev" " (default: 64M)"
-.IP
+.It Fl s , -vdev-size Ns = (default: Sy 64M )
 Size of each vdev.
-.HP
-.BI "\-a" " alignment_shift" " (default: 9) (use 0 for random)"
-.IP
-Used alignment in test.
-.HP
-.BI "\-m" " mirror_copies" " (default: 2)"
-.IP
+.It Fl a , -alignment-shift Ns = (default: Sy 9 ) No (use Sy 0 No for random)
+Alignment shift used in test.
+.It Fl m , -mirror-copies Ns = (default: Sy 2 )
 Number of mirror copies.
-.HP
-.BI "\-r" " raidz_disks" " (default: 4)"
-.IP
-Number of raidz disks.
-.HP
-.BI "\-R" " raidz_parity" " (default: 1)"
-.IP
-Raidz parity.
-.HP
-.BI "\-d" " datasets" " (default: 7)"
-.IP
+.It Fl r , -raid-disks Ns = (default: Sy 4 No for raidz/ Ns Sy 16 No for draid)
+Number of raidz/draid disks.
+.It Fl R , -raid-parity Ns = (default: Sy 1 )
+Raid parity (raidz & draid).
+.It Fl K , -raid-kind Ns = Ns Sy raidz Ns | Ns Sy draid Ns | Ns Sy random No (default: Sy random )
+The kind of RAID config to use.
+With
+.Sy random
+the kind alternates between raidz and draid.
+.It Fl D , -draid-data Ns = (default: Sy 4 )
+Number of data disks in a dRAID redundancy group.
+.It Fl S , -draid-spares Ns = (default: Sy 1 )
+Number of dRAID distributed spare disks.
+.It Fl d , -datasets Ns = (default: Sy 7 )
 Number of datasets.
-.HP
-.BI "\-t" " threads" " (default: 23)"
-.IP
+.It Fl t , -threads Ns = (default: Sy 23 )
 Number of threads.
-.HP
-.BI "\-g" " gang_block_threshold" " (default: 32K)"
-.IP
+.It Fl g , -gang-block-threshold Ns = (default: Sy 32K )
 Gang block threshold.
-.HP
-.BI "\-i" " initialize_pool_i_times" " (default: 1)"
-.IP
-Number of pool initialisations.
-.HP
-.BI "\-k" " kill_percentage" " (default: 70%)"
-.IP
+.It Fl i , -init-count Ns = (default: Sy 1 )
+Number of pool initializations.
+.It Fl k , -kill-percentage Ns = (default: Sy 70% )
 Kill percentage.
-.HP
-.BI "\-p" " pool_name" " (default: ztest)"
-.IP
+.It Fl p , -pool-name Ns = (default: Sy ztest )
 Pool name.
-.HP
-.BI "\-V(erbose)"
-.IP
-Verbose (use multiple times for ever more blather).
-.HP
-.BI "\-E(xisting)"
-.IP
+.It Fl f , -vdev-file-directory Ns = (default: Pa /tmp )
+File directory for vdev files.
+.It Fl M , -multi-host
+Multi-host; simulate pool imported on remote host.
+.It Fl E , -use-existing-pool
 Use existing pool (use existing pool instead of creating new one).
-.HP
-.BI "\-T" " time" " (default: 300 sec)"
-.IP
+.It Fl T , -run-time Ns = (default: Sy 300 Ns s)
 Total test run time.
-.HP
-.BI "\-z" " zil_failure_rate" " (default: fail every 2^5 allocs)
-.IP
-Injected failure rate.
-.HP
-.BI "\-G"
-.IP
-Dump zfs_dbgmsg buffer before exiting.
-.SH "EXAMPLES"
-.LP
-To override /tmp as your location for block files, you can use the -f
+.It Fl P , -pass-time Ns = (default: Sy 60 Ns s)
+Time per pass.
+.It Fl F , -freeze-loops Ns = (default: Sy 50 )
+Max loops in
+.Fn spa_freeze .
+.It Fl B , -alt-ztest Ns =
+Alternate ztest path.
+.It Fl C , -vdev-class-state Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy random No (default: Sy random )
+The vdev allocation class state.
+.It Fl o , -option Ns = Ns Ar variable Ns = Ns Ar value
+Set global
+.Ar variable
+to an unsigned 32-bit integer
+.Ar value
+(little-endian only).
+.It Fl G , -dump-debug
+Dump zfs_dbgmsg buffer before exiting due to an error.
+.It Fl V , -verbose
+Verbose (use multiple times for ever more verbosity).
+.El
+.
+.Sh EXAMPLES
+To override
+.Pa /tmp
+as your location for block files, you can use the
+.Fl f
 option:
-.IP
-ztest -f /
-.LP
-To get an idea of what ztest is actually testing try this:
-.IP
-ztest -f / -VVV
-.LP
-Maybe you'd like to run ztest for longer? To do so simply use the -T
+.Dl # ztest -f /
+.Pp
+To get an idea of what
+.Nm
+is actually testing try this:
+.Dl # ztest -f / -VVV
+.Pp
+Maybe you'd like to run
+.Nm ztest
+for longer? To do so simply use the
+.Fl T
 option and specify the runlength in seconds like so:
-.IP
-ztest -f / -V -T 120
-
-.SH "ENVIRONMENT VARIABLES"
-.TP
-.B "ZFS_HOSTID=id"
-Use \fBid\fR instead of the SPL hostid to identify this host.  Intended for use
-with ztest, but this environment variable will affect any utility which uses
-libzpool, including \fBzpool(8)\fR.  Since the kernel is unaware of this setting
+.Dl # ztest -f / -V -T 120
+.
+.Sh ENVIRONMENT VARIABLES
+.Bl -tag -width "ZF"
+.It Ev ZFS_HOSTID Ns = Ns Em id
+Use
+.Em id
+instead of the SPL hostid to identify this host.
+Intended for use with
+.Nm , but this environment variable will affect any utility which uses
+libzpool, including
+.Xr zpool 8 .
+Since the kernel is unaware of this setting,
 results with utilities other than ztest are undefined.
-.TP
-.B "ZFS_STACK_SIZE=stacksize"
-Limit the default stack size to \fBstacksize\fR bytes for the purpose of
-detecting and debugging kernel stack overflows.  This value defaults to
-\fB32K\fR which is double the default \fB16K\fR Linux kernel stack size.
-
+.It Ev ZFS_STACK_SIZE Ns = Ns Em stacksize
+Limit the default stack size to
+.Em stacksize
+bytes for the purpose of
+detecting and debugging kernel stack overflows.
+This value defaults to
+.Em 32K
+which is double the default
+.Em 16K
+Linux kernel stack size.
+.Pp
 In practice, setting the stack size slightly higher is needed because
 differences in stack usage between kernel and user space can lead to spurious
-stack overflows (especially when debugging is enabled).  The specified value
+stack overflows (especially when debugging is enabled).
+The specified value
 will be rounded up to a floor of PTHREAD_STACK_MIN which is the minimum stack
 required for a NULL procedure in user space.
-
-By default the stack size is limited to 256K.
-.SH "SEE ALSO"
-.BR "spl-module-parameters (5)" ","
-.BR "zpool (1)" ","
-.BR "zfs (1)" ","
-.BR "zdb (1)" ","
-.SH "AUTHOR"
-This manual page was transferred to asciidoc by Michael Gebetsroither
-<gebi@grml.org> from http://opensolaris.org/os/community/zfs/ztest/
+.Pp
+By default the stack size is limited to
+.Em 256K .
+.El
+.
+.Sh SEE ALSO
+.Xr zdb 1 ,
+.Xr zfs 1 ,
+.Xr zpool 1 ,
+.Xr spl 4

diff --git a/zfs/man/man1/zvol_wait.1 b/zfs/man/man1/zvol_wait.1
index 0366da5..0fb47ce 100644
--- a/zfs/man/man1/zvol_wait.1
+++ b/zfs/man/man1/zvol_wait.1

@@ -1,21 +1,32 @@
-.Dd July 5, 2019
-.Dt ZVOL_WAIT 1 SMM
-.Os Linux
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.Dd May 27, 2021
+.Dt ZVOL_WAIT 1
+.Os
+.
 .Sh NAME
 .Nm zvol_wait
-.Nd Wait for ZFS volume links in
-.Em /dev
-to be created.
+.Nd wait for ZFS volume links to appear in /dev
 .Sh SYNOPSIS
 .Nm
+.
 .Sh DESCRIPTION
-When a ZFS pool is imported, ZFS will register each ZFS volume
-(zvol) as a disk device with the system. As the disks are registered,
-.Xr \fBudev 7\fR
-will asynchronously create symlinks under
-.Em /dev/zvol
-using the zvol's name.
+When a ZFS pool is imported, the volumes within it will appear as block devices.
+As they're registered,
+.Xr udev 7
+asynchronously creates symlinks under
+.Pa /dev/zvol
+using the volumes' names.
 .Nm
-will wait for all those symlinks to be created before returning.
+will wait for all those symlinks to be created before exiting.
+.
 .Sh SEE ALSO
-.Xr \fBudev 7\fR
+.Xr udev 7

diff --git a/zfs/man/man4/spl.4 b/zfs/man/man4/spl.4
new file mode 100644
index 0000000..11cde14
--- /dev/null
+++ b/zfs/man/man4/spl.4

@@ -0,0 +1,195 @@
+.\"
+.\" The contents of this file are subject to the terms of the Common Development
+.\" and Distribution License (the "License").  You may not use this file except
+.\" in compliance with the License. You can obtain a copy of the license at
+.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\"
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License. When distributing Covered Code, include this
+.\" CDDL HEADER in each file and include the License file at
+.\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
+.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
+.\" own identifying information:
+.\" Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
+.\"
+.Dd August 24, 2020
+.Dt SPL 4
+.Os
+.
+.Sh NAME
+.Nm spl
+.Nd parameters of the SPL kernel module
+.
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Sy spl_kmem_cache_kmem_threads Ns = Ns Sy 4 Pq uint
+The number of threads created for the spl_kmem_cache task queue.
+This task queue is responsible for allocating new slabs
+for use by the kmem caches.
+For the majority of systems and workloads only a small number of threads are
+required.
+.
+.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint
+When this is set it prevents Linux from being able to rapidly reclaim all the
+memory held by the kmem caches.
+This may be useful in circumstances where it's preferable that Linux
+reclaim memory from some other subsystem first.
+Setting this will increase the likelihood out of memory events on a memory
+constrained system.
+.
+.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint
+The preferred number of objects per slab in the cache.
+In general, a larger value will increase the caches memory footprint
+while decreasing the time required to perform an allocation.
+Conversely, a smaller value will minimize the footprint
+and improve cache reclaim time but individual allocations may take longer.
+.
+.It Sy spl_kmem_cache_max_size Ns = Ns Sy 32 Po 64-bit Pc or Sy 4 Po 32-bit Pc Pq uint
+The maximum size of a kmem cache slab in MiB.
+This effectively limits the maximum cache object size to
+.Sy spl_kmem_cache_max_size Ns / Ns Sy spl_kmem_cache_obj_per_slab .
+.Pp
+Caches may not be created with
+object sized larger than this limit.
+.
+.It Sy spl_kmem_cache_slab_limit Ns = Ns Sy 16384 Pq uint
+For small objects the Linux slab allocator should be used to make the most
+efficient use of the memory.
+However, large objects are not supported by
+the Linux slab and therefore the SPL implementation is preferred.
+This value is used to determine the cutoff between a small and large object.
+.Pp
+Objects of size
+.Sy spl_kmem_cache_slab_limit
+or smaller will be allocated using the Linux slab allocator,
+large objects use the SPL allocator.
+A cutoff of 16K was determined to be optimal for architectures using 4K pages.
+.
+.It Sy spl_kmem_alloc_warn Ns = Ns Sy 32768 Pq uint
+As a general rule
+.Fn kmem_alloc
+allocations should be small,
+preferably just a few pages, since they must by physically contiguous.
+Therefore, a rate limited warning will be printed to the console for any
+.Fn kmem_alloc
+which exceeds a reasonable threshold.
+.Pp
+The default warning threshold is set to eight pages but capped at 32K to
+accommodate systems using large pages.
+This value was selected to be small enough to ensure
+the largest allocations are quickly noticed and fixed.
+But large enough to avoid logging any warnings when a allocation size is
+larger than optimal but not a serious concern.
+Since this value is tunable, developers are encouraged to set it lower
+when testing so any new largish allocations are quickly caught.
+These warnings may be disabled by setting the threshold to zero.
+.
+.It Sy spl_kmem_alloc_max Ns = Ns Sy KMALLOC_MAX_SIZE Ns / Ns Sy 4 Pq uint
+Large
+.Fn kmem_alloc
+allocations will fail if they exceed
+.Sy KMALLOC_MAX_SIZE .
+Allocations which are marginally smaller than this limit may succeed but
+should still be avoided due to the expense of locating a contiguous range
+of free pages.
+Therefore, a maximum kmem size with reasonable safely margin of 4x is set.
+.Fn kmem_alloc
+allocations larger than this maximum will quickly fail.
+.Fn vmem_alloc
+allocations less than or equal to this value will use
+.Fn kmalloc ,
+but shift to
+.Fn vmalloc
+when exceeding this value.
+.
+.It Sy spl_kmem_cache_magazine_size Ns = Ns Sy 0 Pq uint
+Cache magazines are an optimization designed to minimize the cost of
+allocating memory.
+They do this by keeping a per-cpu cache of recently
+freed objects, which can then be reallocated without taking a lock.
+This can improve performance on highly contended caches.
+However, because objects in magazines will prevent otherwise empty slabs
+from being immediately released this may not be ideal for low memory machines.
+.Pp
+For this reason,
+.Sy spl_kmem_cache_magazine_size
+can be used to set a maximum magazine size.
+When this value is set to 0 the magazine size will
+be automatically determined based on the object size.
+Otherwise magazines will be limited to 2-256 objects per magazine (i.e per cpu).
+Magazines may never be entirely disabled in this implementation.
+.
+.It Sy spl_hostid Ns = Ns Sy 0 Pq ulong
+The system hostid, when set this can be used to uniquely identify a system.
+By default this value is set to zero which indicates the hostid is disabled.
+It can be explicitly enabled by placing a unique non-zero value in
+.Pa /etc/hostid .
+.
+.It Sy spl_hostid_path Ns = Ns Pa /etc/hostid Pq charp
+The expected path to locate the system hostid when specified.
+This value may be overridden for non-standard configurations.
+.
+.It Sy spl_panic_halt Ns = Ns Sy 0 Pq uint
+Cause a kernel panic on assertion failures.
+When not enabled, the thread is halted to facilitate further debugging.
+.Pp
+Set to a non-zero value to enable.
+.
+.It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint
+Kick stuck taskq to spawn threads.
+When writing a non-zero value to it, it will scan all the taskqs.
+If any of them have a pending task more than 5 seconds old,
+it will kick it to spawn more threads.
+This can be used if you find a rare
+deadlock occurs because one or more taskqs didn't spawn a thread when it should.
+.
+.It Sy spl_taskq_thread_bind Ns = Ns Sy 0 Pq int
+Bind taskq threads to specific CPUs.
+When enabled all taskq threads will be distributed evenly
+across the available CPUs.
+By default, this behavior is disabled to allow the Linux scheduler
+the maximum flexibility to determine where a thread should run.
+.
+.It Sy spl_taskq_thread_dynamic Ns = Ns Sy 1 Pq int
+Allow dynamic taskqs.
+When enabled taskqs which set the
+.Sy TASKQ_DYNAMIC
+flag will by default create only a single thread.
+New threads will be created on demand up to a maximum allowed number
+to facilitate the completion of outstanding tasks.
+Threads which are no longer needed will be promptly destroyed.
+By default this behavior is enabled but it can be disabled to
+aid performance analysis or troubleshooting.
+.
+.It Sy spl_taskq_thread_priority Ns = Ns Sy 1 Pq int
+Allow newly created taskq threads to set a non-default scheduler priority.
+When enabled, the priority specified when a taskq is created will be applied
+to all threads created by that taskq.
+When disabled all threads will use the default Linux kernel thread priority.
+By default, this behavior is enabled.
+.
+.It Sy spl_taskq_thread_sequential Ns = Ns Sy 4 Pq int
+The number of items a taskq worker thread must handle without interruption
+before requesting a new worker thread be spawned.
+This is used to control
+how quickly taskqs ramp up the number of threads processing the queue.
+Because Linux thread creation and destruction are relatively inexpensive a
+small default value has been selected.
+This means that normally threads will be created aggressively which is desirable.
+Increasing this value will
+result in a slower thread creation rate which may be preferable for some
+configurations.
+.
+.It Sy spl_max_show_tasks Ns = Ns Sy 512 Pq uint
+The maximum number of tasks per pending list in each taskq shown in
+.Pa /proc/spl/taskq{,-all} .
+Write
+.Sy 0
+to turn off the limit.
+The proc file will walk the lists with lock held,
+reading it could cause a lock-up if the list grow too large
+without limiting the output.
+"(truncated)" will be shown if the list is larger than the limit.
+.El

diff --git a/zfs/man/man4/zfs.4 b/zfs/man/man4/zfs.4
new file mode 100644
index 0000000..0c60a9c
--- /dev/null
+++ b/zfs/man/man4/zfs.4

@@ -0,0 +1,2468 @@
+.\"
+.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
+.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
+.\" Copyright (c) 2019 Datto Inc.
+.\" The contents of this file are subject to the terms of the Common Development
+.\" and Distribution License (the "License").  You may not use this file except
+.\" in compliance with the License. You can obtain a copy of the license at
+.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\"
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License. When distributing Covered Code, include this
+.\" CDDL HEADER in each file and include the License file at
+.\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
+.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
+.\" own identifying information:
+.\" Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.Dd January 10, 2023
+.Dt ZFS 4
+.Os
+.
+.Sh NAME
+.Nm zfs
+.Nd tuning of the ZFS kernel module
+.
+.Sh DESCRIPTION
+The ZFS module supports these parameters:
+.Bl -tag -width Ds
+.It Sy dbuf_cache_max_bytes Ns = Ns Sy ULONG_MAX Ns B Pq ulong
+Maximum size in bytes of the dbuf cache.
+The target size is determined by the MIN versus
+.No 1/2^ Ns Sy dbuf_cache_shift Pq 1/32nd
+of the target ARC size.
+The behavior of the dbuf cache and its associated settings
+can be observed via the
+.Pa /proc/spl/kstat/zfs/dbufstats
+kstat.
+.
+.It Sy dbuf_metadata_cache_max_bytes Ns = Ns Sy ULONG_MAX Ns B Pq ulong
+Maximum size in bytes of the metadata dbuf cache.
+The target size is determined by the MIN versus
+.No 1/2^ Ns Sy dbuf_metadata_cache_shift Pq 1/64th
+of the target ARC size.
+The behavior of the metadata dbuf cache and its associated settings
+can be observed via the
+.Pa /proc/spl/kstat/zfs/dbufstats
+kstat.
+.
+.It Sy dbuf_cache_hiwater_pct Ns = Ns Sy 10 Ns % Pq uint
+The percentage over
+.Sy dbuf_cache_max_bytes
+when dbufs must be evicted directly.
+.
+.It Sy dbuf_cache_lowater_pct Ns = Ns Sy 10 Ns % Pq uint
+The percentage below
+.Sy dbuf_cache_max_bytes
+when the evict thread stops evicting dbufs.
+.
+.It Sy dbuf_cache_shift Ns = Ns Sy 5 Pq int
+Set the size of the dbuf cache
+.Pq Sy dbuf_cache_max_bytes
+to a log2 fraction of the target ARC size.
+.
+.It Sy dbuf_metadata_cache_shift Ns = Ns Sy 6 Pq int
+Set the size of the dbuf metadata cache
+.Pq Sy dbuf_metadata_cache_max_bytes
+to a log2 fraction of the target ARC size.
+.
+.It Sy dmu_object_alloc_chunk_shift Ns = Ns Sy 7 Po 128 Pc Pq int
+dnode slots allocated in a single operation as a power of 2.
+The default value minimizes lock contention for the bulk operation performed.
+.
+.It Sy dmu_prefetch_max Ns = Ns Sy 134217728 Ns B Po 128MB Pc Pq int
+Limit the amount we can prefetch with one call to this amount in bytes.
+This helps to limit the amount of memory that can be used by prefetching.
+.
+.It Sy ignore_hole_birth Pq int
+Alias for
+.Sy send_holes_without_birth_time .
+.
+.It Sy l2arc_feed_again Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Turbo L2ARC warm-up.
+When the L2ARC is cold the fill interval will be set as fast as possible.
+.
+.It Sy l2arc_feed_min_ms Ns = Ns Sy 200 Pq ulong
+Min feed interval in milliseconds.
+Requires
+.Sy l2arc_feed_again Ns = Ns Ar 1
+and only applicable in related situations.
+.
+.It Sy l2arc_feed_secs Ns = Ns Sy 1 Pq ulong
+Seconds between L2ARC writing.
+.
+.It Sy l2arc_headroom Ns = Ns Sy 2 Pq ulong
+How far through the ARC lists to search for L2ARC cacheable content,
+expressed as a multiplier of
+.Sy l2arc_write_max .
+ARC persistence across reboots can be achieved with persistent L2ARC
+by setting this parameter to
+.Sy 0 ,
+allowing the full length of ARC lists to be searched for cacheable content.
+.
+.It Sy l2arc_headroom_boost Ns = Ns Sy 200 Ns % Pq ulong
+Scales
+.Sy l2arc_headroom
+by this percentage when L2ARC contents are being successfully compressed
+before writing.
+A value of
+.Sy 100
+disables this feature.
+.
+.It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Controls whether buffers present on special vdevs are eligibile for caching
+into L2ARC.
+If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
+.
+.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq  int
+Controls whether only MFU metadata and data are cached from ARC into L2ARC.
+This may be desired to avoid wasting space on L2ARC when reading/writing large
+amounts of data that are not expected to be accessed more than once.
+.Pp
+The default is off,
+meaning both MRU and MFU data and metadata are cached.
+When turning off this feature, some MRU buffers will still be present
+in ARC and eventually cached on L2ARC.
+.No If Sy l2arc_noprefetch Ns = Ns Sy 0 ,
+some prefetched buffers will be cached to L2ARC, and those might later
+transition to MRU, in which case the
+.Sy l2arc_mru_asize No arcstat will not be Sy 0 .
+.Pp
+Regardless of
+.Sy l2arc_noprefetch ,
+some MFU buffers might be evicted from ARC,
+accessed later on as prefetches and transition to MRU as prefetches.
+If accessed again they are counted as MRU and the
+.Sy l2arc_mru_asize No arcstat will not be Sy 0 .
+.Pp
+The ARC status of L2ARC buffers when they were first cached in
+L2ARC can be seen in the
+.Sy l2arc_mru_asize , Sy l2arc_mfu_asize , No and Sy l2arc_prefetch_asize
+arcstats when importing the pool or onlining a cache
+device if persistent L2ARC is enabled.
+.Pp
+The
+.Sy evict_l2_eligible_mru
+arcstat does not take into account if this option is enabled as the information
+provided by the
+.Sy evict_l2_eligible_m[rf]u
+arcstats can be used to decide if toggling this option is appropriate
+for the current workload.
+.
+.It Sy l2arc_meta_percent Ns = Ns Sy 33 Ns % Pq int
+Percent of ARC size allowed for L2ARC-only headers.
+Since L2ARC buffers are not evicted on memory pressure,
+too many headers on a system with an irrationally large L2ARC
+can render it slow or unusable.
+This parameter limits L2ARC writes and rebuilds to achieve the target.
+.
+.It Sy l2arc_trim_ahead Ns = Ns Sy 0 Ns % Pq ulong
+Trims ahead of the current write size
+.Pq Sy l2arc_write_max
+on L2ARC devices by this percentage of write size if we have filled the device.
+If set to
+.Sy 100
+we TRIM twice the space required to accommodate upcoming writes.
+A minimum of
+.Sy 64MB
+will be trimmed.
+It also enables TRIM of the whole L2ARC device upon creation
+or addition to an existing pool or if the header of the device is
+invalid upon importing a pool or onlining a cache device.
+A value of
+.Sy 0
+disables TRIM on L2ARC altogether and is the default as it can put significant
+stress on the underlying storage devices.
+This will vary depending of how well the specific device handles these commands.
+.
+.It Sy l2arc_noprefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Do not write buffers to L2ARC if they were prefetched but not used by
+applications.
+In case there are prefetched buffers in L2ARC and this option
+is later set, we do not read the prefetched buffers from L2ARC.
+Unsetting this option is useful for caching sequential reads from the
+disks to L2ARC and serve those reads from L2ARC later on.
+This may be beneficial in case the L2ARC device is significantly faster
+in sequential reads than the disks of the pool.
+.Pp
+Use
+.Sy 1
+to disable and
+.Sy 0
+to enable caching/reading prefetches to/from L2ARC.
+.
+.It Sy l2arc_norw Ns = Ns Sy 0 Ns | Ns 1 Pq int
+No reads during writes.
+.
+.It Sy l2arc_write_boost Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq ulong
+Cold L2ARC devices will have
+.Sy l2arc_write_max
+increased by this amount while they remain cold.
+.
+.It Sy l2arc_write_max Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq ulong
+Max write bytes per interval.
+.
+.It Sy l2arc_rebuild_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Rebuild the L2ARC when importing a pool (persistent L2ARC).
+This can be disabled if there are problems importing a pool
+or attaching an L2ARC device (e.g. the L2ARC device is slow
+in reading stored log metadata, or the metadata
+has become somehow fragmented/unusable).
+.
+.It Sy l2arc_rebuild_blocks_min_l2size Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong
+Mininum size of an L2ARC device required in order to write log blocks in it.
+The log blocks are used upon importing the pool to rebuild the persistent L2ARC.
+.Pp
+For L2ARC devices less than 1GB, the amount of data
+.Fn l2arc_evict
+evicts is significant compared to the amount of restored L2ARC data.
+In this case, do not write log blocks in L2ARC in order not to waste space.
+.
+.It Sy metaslab_aliquot Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong
+Metaslab granularity, in bytes.
+This is roughly similar to what would be referred to as the "stripe size"
+in traditional RAID arrays.
+In normal operation, ZFS will try to write this amount of data to each disk
+before moving on to the next top-level vdev.
+.
+.It Sy metaslab_bias_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable metaslab group biasing based on their vdevs' over- or under-utilization
+relative to the pool.
+.
+.It Sy metaslab_force_ganging Ns = Ns Sy 16777217 Ns B Ns B Po 16MB + 1B Pc Pq ulong
+Make some blocks above a certain size be gang blocks.
+This option is used by the test suite to facilitate testing.
+.
+.It Sy zfs_default_bs Ns = Ns Sy 9 Po 512 B Pc Pq int
+Default dnode block size as a power of 2.
+.
+.It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int
+Default dnode indirect block size as a power of 2.
+.
+.It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Ns B Po 1MB Pc Pq int
+When attempting to log an output nvlist of an ioctl in the on-disk history,
+the output will not be stored if it is larger than this size (in bytes).
+This must be less than
+.Sy DMU_MAX_ACCESS Pq 64MB .
+This applies primarily to
+.Fn zfs_ioc_channel_program Pq cf. Xr zfs-program 8 .
+.
+.It Sy zfs_keep_log_spacemaps_at_export Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Prevent log spacemaps from being destroyed during pool exports and destroys.
+.
+.It Sy zfs_metaslab_segment_weight_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable/disable segment-based metaslab selection.
+.
+.It Sy zfs_metaslab_switch_threshold Ns = Ns Sy 2 Pq int
+When using segment-based metaslab selection, continue allocating
+from the active metaslab until this option's
+worth of buckets have been exhausted.
+.
+.It Sy metaslab_debug_load Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Load all metaslabs during pool import.
+.
+.It Sy metaslab_debug_unload Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Prevent metaslabs from being unloaded.
+.
+.It Sy metaslab_fragmentation_factor_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable use of the fragmentation metric in computing metaslab weights.
+.
+.It Sy metaslab_df_max_search Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int
+Maximum distance to search forward from the last offset.
+Without this limit, fragmented pools can see
+.Em >100`000
+iterations and
+.Fn metaslab_block_picker
+becomes the performance limiting factor on high-performance storage.
+.Pp
+With the default setting of
+.Sy 16MB ,
+we typically see less than
+.Em 500
+iterations, even with very fragmented
+.Sy ashift Ns = Ns Sy 9
+pools.
+The maximum number of iterations possible is
+.Sy metaslab_df_max_search / 2^(ashift+1) .
+With the default setting of
+.Sy 16MB
+this is
+.Em 16*1024 Pq with Sy ashift Ns = Ns Sy 9
+or
+.Em 2*1024 Pq with Sy ashift Ns = Ns Sy 12 .
+.
+.It Sy metaslab_df_use_largest_segment Ns = Ns Sy 0 Ns | Ns 1 Pq int
+If not searching forward (due to
+.Sy metaslab_df_max_search , metaslab_df_free_pct ,
+.No or Sy metaslab_df_alloc_threshold ) ,
+this tunable controls which segment is used.
+If set, we will use the largest free segment.
+If unset, we will use a segment of at least the requested size.
+.
+.It Sy zfs_metaslab_max_size_cache_sec Ns = Ns Sy 3600 Ns s Po 1h Pc Pq ulong
+When we unload a metaslab, we cache the size of the largest free chunk.
+We use that cached size to determine whether or not to load a metaslab
+for a given allocation.
+As more frees accumulate in that metaslab while it's unloaded,
+the cached max size becomes less and less accurate.
+After a number of seconds controlled by this tunable,
+we stop considering the cached max size and start
+considering only the histogram instead.
+.
+.It Sy zfs_metaslab_mem_limit Ns = Ns Sy 25 Ns % Pq int
+When we are loading a new metaslab, we check the amount of memory being used
+to store metaslab range trees.
+If it is over a threshold, we attempt to unload the least recently used metaslab
+to prevent the system from clogging all of its memory with range trees.
+This tunable sets the percentage of total system memory that is the threshold.
+.
+.It Sy zfs_metaslab_try_hard_before_gang Ns = Ns Sy 0 Ns | Ns 1 Pq int
+.Bl -item -compact
+.It
+If unset, we will first try normal allocation.
+.It
+If that fails then we will do a gang allocation.
+.It
+If that fails then we will do a "try hard" gang allocation.
+.It
+If that fails then we will have a multi-layer gang block.
+.El
+.Pp
+.Bl -item -compact
+.It
+If set, we will first try normal allocation.
+.It
+If that fails then we will do a "try hard" allocation.
+.It
+If that fails we will do a gang allocation.
+.It
+If that fails we will do a "try hard" gang allocation.
+.It
+If that fails then we will have a multi-layer gang block.
+.El
+.
+.It Sy zfs_metaslab_find_max_tries Ns = Ns Sy 100 Pq int
+When not trying hard, we only consider this number of the best metaslabs.
+This improves performance, especially when there are many metaslabs per vdev
+and the allocation can't actually be satisfied
+(so we would otherwise iterate all metaslabs).
+.
+.It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq int
+When a vdev is added, target this number of metaslabs per top-level vdev.
+.
+.It Sy zfs_vdev_default_ms_shift Ns = Ns Sy 29 Po 512MB Pc Pq int
+Default limit for metaslab size.
+.
+.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy 14 Pq ulong
+Maximum ashift used when optimizing for logical -> physical sector size on new
+top-level vdevs.
+May be increased up to
+.Sy ASHIFT_MAX Po 16 Pc ,
+but this may negatively impact pool space efficiency.
+.
+.It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq ulong
+Minimum ashift used when creating new top-level vdevs.
+.
+.It Sy zfs_vdev_min_ms_count Ns = Ns Sy 16 Pq int
+Minimum number of metaslabs to create in a top-level vdev.
+.
+.It Sy vdev_validate_skip Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Skip label validation steps during pool import.
+Changing is not recommended unless you know what you're doing
+and are recovering a damaged label.
+.
+.It Sy zfs_vdev_ms_count_limit Ns = Ns Sy 131072 Po 128k Pc Pq int
+Practical upper limit of total metaslabs per top-level vdev.
+.
+.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable metaslab group preloading.
+.
+.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Give more weight to metaslabs with lower LBAs,
+assuming they have greater bandwidth,
+as is typically the case on a modern constant angular velocity disk drive.
+.
+.It Sy metaslab_unload_delay Ns = Ns Sy 32 Pq int
+After a metaslab is used, we keep it loaded for this many TXGs, to attempt to
+reduce unnecessary reloading.
+Note that both this many TXGs and
+.Sy metaslab_unload_delay_ms
+milliseconds must pass before unloading will occur.
+.
+.It Sy metaslab_unload_delay_ms Ns = Ns Sy 600000 Ns ms Po 10min Pc Pq int
+After a metaslab is used, we keep it loaded for this many milliseconds,
+to attempt to reduce unnecessary reloading.
+Note, that both this many milliseconds and
+.Sy metaslab_unload_delay
+TXGs must pass before unloading will occur.
+.
+.It Sy reference_history Ns = Ns Sy 3 Pq int
+Maximum reference holders being tracked when reference_tracking_enable is active.
+.
+.It Sy reference_tracking_enable Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Track reference holders to
+.Sy refcount_t
+objects (debug builds only).
+.
+.It Sy send_holes_without_birth_time Ns = Ns Sy 1 Ns | Ns 0 Pq int
+When set, the
+.Sy hole_birth
+optimization will not be used, and all holes will always be sent during a
+.Nm zfs Cm send .
+This is useful if you suspect your datasets are affected by a bug in
+.Sy hole_birth .
+.
+.It Sy spa_config_path Ns = Ns Pa /etc/zfs/zpool.cache Pq charp
+SPA config file.
+.
+.It Sy spa_asize_inflation Ns = Ns Sy 24 Pq int
+Multiplication factor used to estimate actual disk consumption from the
+size of data being written.
+The default value is a worst case estimate,
+but lower values may be valid for a given pool depending on its configuration.
+Pool administrators who understand the factors involved
+may wish to specify a more realistic inflation factor,
+particularly if they operate close to quota or capacity limits.
+.
+.It Sy spa_load_print_vdev_tree Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Whether to print the vdev tree in the debugging message buffer during pool import.
+.
+.It Sy spa_load_verify_data Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Whether to traverse data blocks during an "extreme rewind"
+.Pq Fl X
+import.
+.Pp
+An extreme rewind import normally performs a full traversal of all
+blocks in the pool for verification.
+If this parameter is unset, the traversal skips non-metadata blocks.
+It can be toggled once the
+import has started to stop or start the traversal of non-metadata blocks.
+.
+.It Sy spa_load_verify_metadata  Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Whether to traverse blocks during an "extreme rewind"
+.Pq Fl X
+pool import.
+.Pp
+An extreme rewind import normally performs a full traversal of all
+blocks in the pool for verification.
+If this parameter is unset, the traversal is not performed.
+It can be toggled once the import has started to stop or start the traversal.
+.
+.It Sy spa_load_verify_shift Ns = Ns Sy 4 Po 1/16th Pc Pq int
+Sets the maximum number of bytes to consume during pool import to the log2
+fraction of the target ARC size.
+.
+.It Sy spa_slop_shift Ns = Ns Sy 5 Po 1/32nd Pc Pq int
+Normally, we don't allow the last
+.Sy 3.2% Pq Sy 1/2^spa_slop_shift
+of space in the pool to be consumed.
+This ensures that we don't run the pool completely out of space,
+due to unaccounted changes (e.g. to the MOS).
+It also limits the worst-case time to allocate space.
+If we have less than this amount of free space,
+most ZPL operations (e.g. write, create) will return
+.Sy ENOSPC .
+.
+.It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int
+During top-level vdev removal, chunks of data are copied from the vdev
+which may include free space in order to trade bandwidth for IOPS.
+This parameter determines the maximum span of free space, in bytes,
+which will be included as "unnecessary" data in a chunk of copied data.
+.Pp
+The default value here was chosen to align with
+.Sy zfs_vdev_read_gap_limit ,
+which is a similar concept when doing
+regular reads (but there's no reason it has to be the same).
+.
+.It Sy vdev_file_logical_ashift Ns = Ns Sy 9 Po 512B Pc Pq ulong
+Logical ashift for file-based devices.
+.
+.It Sy vdev_file_physical_ashift Ns = Ns Sy 9 Po 512B Pc Pq ulong
+Physical ashift for file-based devices.
+.
+.It Sy zap_iterate_prefetch Ns = Ns Sy 1 Ns | Ns 0 Pq int
+If set, when we start iterating over a ZAP object,
+prefetch the entire object (all leaf blocks).
+However, this is limited by
+.Sy dmu_prefetch_max .
+.
+.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong
+If prefetching is enabled, disable prefetching for reads larger than this size.
+.
+.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
+Min bytes to prefetch per stream.
+Prefetch distance starts from the demand access size and quickly grows to
+this value, doubling on each hit.
+After that it may grow further by 1/8 per hit, but only if some prefetch
+since last time haven't completed in time to satisfy demand request, i.e.
+prefetch depth didn't cover the read latency or the pool got saturated.
+.
+.It Sy zfetch_max_distance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
+Max bytes to prefetch per stream.
+.
+.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64MB Pc Pq uint
+Max bytes to prefetch indirects for per stream.
+.
+.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
+Max number of streams per zfetch (prefetch streams per file).
+.
+.It Sy zfetch_min_sec_reap Ns = Ns Sy 1 Pq uint
+Min time before inactive prefetch stream can be reclaimed
+.
+.It Sy zfetch_max_sec_reap Ns = Ns Sy 2 Pq uint
+Max time before inactive prefetch stream can be deleted
+.
+.It Sy zfs_abd_scatter_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enables ARC from using scatter/gather lists and forces all allocations to be
+linear in kernel memory.
+Disabling can improve performance in some code paths
+at the expense of fragmented kernel memory.
+.
+.It Sy zfs_abd_scatter_max_order Ns = Ns Sy MAX_ORDER-1 Pq uint
+Maximum number of consecutive memory pages allocated in a single block for
+scatter/gather lists.
+.Pp
+The value of
+.Sy MAX_ORDER
+depends on kernel configuration.
+.
+.It Sy zfs_abd_scatter_min_size Ns = Ns Sy 1536 Ns B Po 1.5kB Pc Pq uint
+This is the minimum allocation size that will use scatter (page-based) ABDs.
+Smaller allocations will use linear ABDs.
+.
+.It Sy zfs_arc_dnode_limit Ns = Ns Sy 0 Ns B Pq ulong
+When the number of bytes consumed by dnodes in the ARC exceeds this number of
+bytes, try to unpin some of it in response to demand for non-metadata.
+This value acts as a ceiling to the amount of dnode metadata, and defaults to
+.Sy 0 ,
+which indicates that a percent which is based on
+.Sy zfs_arc_dnode_limit_percent
+of the ARC meta buffers that may be used for dnodes.
+.Pp
+Also see
+.Sy zfs_arc_meta_prune
+which serves a similar purpose but is used
+when the amount of metadata in the ARC exceeds
+.Sy zfs_arc_meta_limit
+rather than in response to overall demand for non-metadata.
+.
+.It Sy zfs_arc_dnode_limit_percent Ns = Ns Sy 10 Ns % Pq ulong
+Percentage that can be consumed by dnodes of ARC meta buffers.
+.Pp
+See also
+.Sy zfs_arc_dnode_limit ,
+which serves a similar purpose but has a higher priority if nonzero.
+.
+.It Sy zfs_arc_dnode_reduce_percent Ns = Ns Sy 10 Ns % Pq ulong
+Percentage of ARC dnodes to try to scan in response to demand for non-metadata
+when the number of bytes consumed by dnodes exceeds
+.Sy zfs_arc_dnode_limit .
+.
+.It Sy zfs_arc_average_blocksize Ns = Ns Sy 8192 Ns B Po 8kB Pc Pq int
+The ARC's buffer hash table is sized based on the assumption of an average
+block size of this value.
+This works out to roughly 1MB of hash table per 1GB of physical memory
+with 8-byte pointers.
+For configurations with a known larger average block size,
+this value can be increased to reduce the memory footprint.
+.
+.It Sy zfs_arc_eviction_pct Ns = Ns Sy 200 Ns % Pq int
+When
+.Fn arc_is_overflowing ,
+.Fn arc_get_data_impl
+waits for this percent of the requested amount of data to be evicted.
+For example, by default, for every
+.Em 2kB
+that's evicted,
+.Em 1kB
+of it may be "reused" by a new allocation.
+Since this is above
+.Sy 100 Ns % ,
+it ensures that progress is made towards getting
+.Sy arc_size No under Sy arc_c .
+Since this is finite, it ensures that allocations can still happen,
+even during the potentially long time that
+.Sy arc_size No is more than Sy arc_c .
+.
+.It Sy zfs_arc_evict_batch_limit Ns = Ns Sy 10 Pq int
+Number ARC headers to evict per sub-list before proceeding to another sub-list.
+This batch-style operation prevents entire sub-lists from being evicted at once
+but comes at a cost of additional unlocking and locking.
+.
+.It Sy zfs_arc_grow_retry Ns = Ns Sy 0 Ns s Pq int
+If set to a non zero value, it will replace the
+.Sy arc_grow_retry
+value with this value.
+The
+.Sy arc_grow_retry
+.No value Pq default Sy 5 Ns s
+is the number of seconds the ARC will wait before
+trying to resume growth after a memory pressure event.
+.
+.It Sy zfs_arc_lotsfree_percent Ns = Ns Sy 10 Ns % Pq int
+Throttle I/O when free system memory drops below this percentage of total
+system memory.
+Setting this value to
+.Sy 0
+will disable the throttle.
+.
+.It Sy zfs_arc_max Ns = Ns Sy 0 Ns B Pq ulong
+Max size of ARC in bytes.
+If
+.Sy 0 ,
+then the max size of ARC is determined by the amount of system memory installed.
+Under Linux, half of system memory will be used as the limit.
+Under
+.Fx ,
+the larger of
+.Sy all_system_memory - 1GB No and Sy 5/8 * all_system_memory
+will be used as the limit.
+This value must be at least
+.Sy 67108864 Ns B Pq 64MB .
+.Pp
+This value can be changed dynamically, with some caveats.
+It cannot be set back to
+.Sy 0
+while running, and reducing it below the current ARC size will not cause
+the ARC to shrink without memory pressure to induce shrinking.
+.
+.It Sy zfs_arc_meta_adjust_restarts Ns = Ns Sy 4096 Pq ulong
+The number of restart passes to make while scanning the ARC attempting
+the free buffers in order to stay below the
+.Sy fs_arc_meta_limit .
+This value should not need to be tuned but is available to facilitate
+performance analysis.
+.
+.It Sy zfs_arc_meta_limit Ns = Ns Sy 0 Ns B Pq ulong
+The maximum allowed size in bytes that metadata buffers are allowed to
+consume in the ARC.
+When this limit is reached, metadata buffers will be reclaimed,
+even if the overall
+.Sy arc_c_max
+has not been reached.
+It defaults to
+.Sy 0 ,
+which indicates that a percentage based on
+.Sy zfs_arc_meta_limit_percent
+of the ARC may be used for metadata.
+.Pp
+This value my be changed dynamically, except that must be set to an explicit value
+.Pq cannot be set back to Sy 0 .
+.
+.It Sy zfs_arc_meta_limit_percent Ns = Ns Sy 75 Ns % Pq ulong
+Percentage of ARC buffers that can be used for metadata.
+.Pp
+See also
+.Sy zfs_arc_meta_limit ,
+which serves a similar purpose but has a higher priority if nonzero.
+.
+.It Sy zfs_arc_meta_min Ns = Ns Sy 0 Ns B Pq ulong
+The minimum allowed size in bytes that metadata buffers may consume in
+the ARC.
+.
+.It Sy zfs_arc_meta_prune Ns = Ns Sy 10000 Pq int
+The number of dentries and inodes to be scanned looking for entries
+which can be dropped.
+This may be required when the ARC reaches the
+.Sy zfs_arc_meta_limit
+because dentries and inodes can pin buffers in the ARC.
+Increasing this value will cause to dentry and inode caches
+to be pruned more aggressively.
+Setting this value to
+.Sy 0
+will disable pruning the inode and dentry caches.
+.
+.It Sy zfs_arc_meta_strategy Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Define the strategy for ARC metadata buffer eviction (meta reclaim strategy):
+.Bl -tag -compact -offset 4n -width "0 (META_ONLY)"
+.It Sy 0 Pq META_ONLY
+evict only the ARC metadata buffers
+.It Sy 1 Pq BALANCED
+additional data buffers may be evicted if required
+to evict the required number of metadata buffers.
+.El
+.
+.It Sy zfs_arc_min Ns = Ns Sy 0 Ns B Pq ulong
+Min size of ARC in bytes.
+.No If set to Sy 0 , arc_c_min
+will default to consuming the larger of
+.Sy 32MB No or Sy all_system_memory/32 .
+.
+.It Sy zfs_arc_min_prefetch_ms Ns = Ns Sy 0 Ns ms Ns Po Ns ≡ Ns 1s Pc Pq int
+Minimum time prefetched blocks are locked in the ARC.
+.
+.It Sy zfs_arc_min_prescient_prefetch_ms Ns = Ns Sy 0 Ns ms Ns Po Ns ≡ Ns 6s Pc Pq int
+Minimum time "prescient prefetched" blocks are locked in the ARC.
+These blocks are meant to be prefetched fairly aggressively ahead of
+the code that may use them.
+.
+.It Sy zfs_arc_prune_task_threads Ns = Ns Sy 1 Pq int
+Number of arc_prune threads.
+.Fx
+does not need more than one.
+Linux may theoretically use one per mount point up to number of CPUs,
+but that was not proven to be useful.
+.
+.It Sy zfs_max_missing_tvds Ns = Ns Sy 0 Pq int
+Number of missing top-level vdevs which will be allowed during
+pool import (only in read-only mode).
+.
+.It Sy zfs_max_nvlist_src_size Ns = Sy 0 Pq ulong
+Maximum size in bytes allowed to be passed as
+.Sy zc_nvlist_src_size
+for ioctls on
+.Pa /dev/zfs .
+This prevents a user from causing the kernel to allocate
+an excessive amount of memory.
+When the limit is exceeded, the ioctl fails with
+.Sy EINVAL
+and a description of the error is sent to the
+.Pa zfs-dbgmsg
+log.
+This parameter should not need to be touched under normal circumstances.
+If
+.Sy 0 ,
+equivalent to a quarter of the user-wired memory limit under
+.Fx
+and to
+.Sy 134217728 Ns B Pq 128MB
+under Linux.
+.
+.It Sy zfs_multilist_num_sublists Ns = Ns Sy 0 Pq int
+To allow more fine-grained locking, each ARC state contains a series
+of lists for both data and metadata objects.
+Locking is performed at the level of these "sub-lists".
+This parameters controls the number of sub-lists per ARC state,
+and also applies to other uses of the multilist data structure.
+.Pp
+If
+.Sy 0 ,
+equivalent to the greater of the number of online CPUs and
+.Sy 4 .
+.
+.It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int
+The ARC size is considered to be overflowing if it exceeds the current
+ARC target size
+.Pq Sy arc_c
+by thresholds determined by this parameter.
+Exceeding by
+.Sy ( arc_c >> zfs_arc_overflow_shift ) * 0.5
+starts ARC reclamation process.
+If that appears insufficient, exceeding by
+.Sy ( arc_c >> zfs_arc_overflow_shift ) * 1.5
+blocks new buffer allocation until the reclaim thread catches up.
+Started reclamation process continues till ARC size returns below the
+target size.
+.Pp
+The default value of
+.Sy 8
+causes the ARC to start reclamation if it exceeds the target size by
+.Em 0.2%
+of the target size, and block allocations by
+.Em 0.6% .
+.
+.It Sy zfs_arc_p_min_shift Ns = Ns Sy 0 Pq int
+If nonzero, this will update
+.Sy arc_p_min_shift Pq default Sy 4
+with the new value.
+.Sy arc_p_min_shift No is used as a shift of Sy arc_c
+when calculating the minumum
+.Sy arc_p No size.
+.
+.It Sy zfs_arc_p_dampener_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Disable
+.Sy arc_p
+adapt dampener, which reduces the maximum single adjustment to
+.Sy arc_p .
+.
+.It Sy zfs_arc_shrink_shift Ns = Ns Sy 0 Pq int
+If nonzero, this will update
+.Sy arc_shrink_shift Pq default Sy 7
+with the new value.
+.
+.It Sy zfs_arc_pc_percent Ns = Ns Sy 0 Ns % Po off Pc Pq uint
+Percent of pagecache to reclaim ARC to.
+.Pp
+This tunable allows the ZFS ARC to play more nicely
+with the kernel's LRU pagecache.
+It can guarantee that the ARC size won't collapse under scanning
+pressure on the pagecache, yet still allows the ARC to be reclaimed down to
+.Sy zfs_arc_min
+if necessary.
+This value is specified as percent of pagecache size (as measured by
+.Sy NR_FILE_PAGES ) ,
+where that percent may exceed
+.Sy 100 .
+This
+only operates during memory pressure/reclaim.
+.
+.It Sy zfs_arc_shrinker_limit Ns = Ns Sy 10000 Pq int
+This is a limit on how many pages the ARC shrinker makes available for
+eviction in response to one page allocation attempt.
+Note that in practice, the kernel's shrinker can ask us to evict
+up to about four times this for one allocation attempt.
+.Pp
+The default limit of
+.Sy 10000 Pq in practice, Em 160MB No per allocation attempt with 4kB pages
+limits the amount of time spent attempting to reclaim ARC memory to
+less than 100ms per allocation attempt,
+even with a small average compressed block size of ~8kB.
+.Pp
+The parameter can be set to 0 (zero) to disable the limit,
+and only applies on Linux.
+.
+.It Sy zfs_arc_sys_free Ns = Ns Sy 0 Ns B Pq ulong
+The target number of bytes the ARC should leave as free memory on the system.
+If zero, equivalent to the bigger of
+.Sy 512kB No and Sy all_system_memory/64 .
+.
+.It Sy zfs_autoimport_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Disable pool import at module load by ignoring the cache file
+.Pq Sy spa_config_path .
+.
+.It Sy zfs_checksum_events_per_second Ns = Ns Sy 20 Ns /s Pq uint
+Rate limit checksum events to this many per second.
+Note that this should not be set below the ZED thresholds
+(currently 10 checksums over 10 seconds)
+or else the daemon may not trigger any action.
+.
+.It Sy zfs_commit_timeout_pct Ns = Ns Sy 5 Ns % Pq int
+This controls the amount of time that a ZIL block (lwb) will remain "open"
+when it isn't "full", and it has a thread waiting for it to be committed to
+stable storage.
+The timeout is scaled based on a percentage of the last lwb
+latency to avoid significantly impacting the latency of each individual
+transaction record (itx).
+.
+.It Sy zfs_condense_indirect_commit_entry_delay_ms Ns = Ns Sy 0 Ns ms Pq int
+Vdev indirection layer (used for device removal) sleeps for this many
+milliseconds during mapping generation.
+Intended for use with the test suite to throttle vdev removal speed.
+.
+.It Sy zfs_condense_indirect_obsolete_pct Ns = Ns Sy 25 Ns % Pq int
+Minimum percent of obsolete bytes in vdev mapping required to attempt to condense
+.Pq see Sy zfs_condense_indirect_vdevs_enable .
+Intended for use with the test suite
+to facilitate triggering condensing as needed.
+.
+.It Sy zfs_condense_indirect_vdevs_enable Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable condensing indirect vdev mappings.
+When set, attempt to condense indirect vdev mappings
+if the mapping uses more than
+.Sy zfs_condense_min_mapping_bytes
+bytes of memory and if the obsolete space map object uses more than
+.Sy zfs_condense_max_obsolete_bytes
+bytes on-disk.
+The condensing process is an attempt to save memory by removing obsolete mappings.
+.
+.It Sy zfs_condense_max_obsolete_bytes Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong
+Only attempt to condense indirect vdev mappings if the on-disk size
+of the obsolete space map object is greater than this number of bytes
+.Pq see Sy zfs_condense_indirect_vdevs_enable .
+.
+.It Sy zfs_condense_min_mapping_bytes Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq ulong
+Minimum size vdev mapping to attempt to condense
+.Pq see Sy zfs_condense_indirect_vdevs_enable .
+.
+.It Sy zfs_dbgmsg_enable Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Internally ZFS keeps a small log to facilitate debugging.
+The log is enabled by default, and can be disabled by unsetting this option.
+The contents of the log can be accessed by reading
+.Pa /proc/spl/kstat/zfs/dbgmsg .
+Writing
+.Sy 0
+to the file clears the log.
+.Pp
+This setting does not influence debug prints due to
+.Sy zfs_flags .
+.
+.It Sy zfs_dbgmsg_maxsize Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int
+Maximum size of the internal ZFS debug log.
+.
+.It Sy zfs_dbuf_state_index Ns = Ns Sy 0 Pq int
+Historically used for controlling what reporting was available under
+.Pa /proc/spl/kstat/zfs .
+No effect.
+.
+.It Sy zfs_deadman_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+When a pool sync operation takes longer than
+.Sy zfs_deadman_synctime_ms ,
+or when an individual I/O operation takes longer than
+.Sy zfs_deadman_ziotime_ms ,
+then the operation is considered to be "hung".
+If
+.Sy zfs_deadman_enabled
+is set, then the deadman behavior is invoked as described by
+.Sy zfs_deadman_failmode .
+By default, the deadman is enabled and set to
+.Sy wait
+which results in "hung" I/Os only being logged.
+The deadman is automatically disabled when a pool gets suspended.
+.
+.It Sy zfs_deadman_failmode Ns = Ns Sy wait Pq charp
+Controls the failure behavior when the deadman detects a "hung" I/O operation.
+Valid values are:
+.Bl -tag -compact -offset 4n -width "continue"
+.It Sy wait
+Wait for a "hung" operation to complete.
+For each "hung" operation a "deadman" event will be posted
+describing that operation.
+.It Sy continue
+Attempt to recover from a "hung" operation by re-dispatching it
+to the I/O pipeline if possible.
+.It Sy panic
+Panic the system.
+This can be used to facilitate automatic fail-over
+to a properly configured fail-over partner.
+.El
+.
+.It Sy zfs_deadman_checktime_ms Ns = Ns Sy 60000 Ns ms Po 1min Pc Pq int
+Check time in milliseconds.
+This defines the frequency at which we check for hung I/O requests
+and potentially invoke the
+.Sy zfs_deadman_failmode
+behavior.
+.
+.It Sy zfs_deadman_synctime_ms Ns = Ns Sy 600000 Ns ms Po 10min Pc Pq ulong
+Interval in milliseconds after which the deadman is triggered and also
+the interval after which a pool sync operation is considered to be "hung".
+Once this limit is exceeded the deadman will be invoked every
+.Sy zfs_deadman_checktime_ms
+milliseconds until the pool sync completes.
+.
+.It Sy zfs_deadman_ziotime_ms Ns = Ns Sy 300000 Ns ms Po 5min Pc Pq ulong
+Interval in milliseconds after which the deadman is triggered and an
+individual I/O operation is considered to be "hung".
+As long as the operation remains "hung",
+the deadman will be invoked every
+.Sy zfs_deadman_checktime_ms
+milliseconds until the operation completes.
+.
+.It Sy zfs_dedup_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Enable prefetching dedup-ed blocks which are going to be freed.
+.
+.It Sy zfs_delay_min_dirty_percent Ns = Ns Sy 60 Ns % Pq int
+Start to delay each transaction once there is this amount of dirty data,
+expressed as a percentage of
+.Sy zfs_dirty_data_max .
+This value should be at least
+.Sy zfs_vdev_async_write_active_max_dirty_percent .
+.No See Sx ZFS TRANSACTION DELAY .
+.
+.It Sy zfs_delay_scale Ns = Ns Sy 500000 Pq int
+This controls how quickly the transaction delay approaches infinity.
+Larger values cause longer delays for a given amount of dirty data.
+.Pp
+For the smoothest delay, this value should be about 1 billion divided
+by the maximum number of operations per second.
+This will smoothly handle between ten times and a tenth of this number.
+.No See Sx ZFS TRANSACTION DELAY .
+.Pp
+.Sy zfs_delay_scale * zfs_dirty_data_max Em must be smaller than Sy 2^64 .
+.
+.It Sy zfs_disable_ivset_guid_check Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disables requirement for IVset GUIDs to be present and match when doing a raw
+receive of encrypted datasets.
+Intended for users whose pools were created with
+OpenZFS pre-release versions and now have compatibility issues.
+.
+.It Sy zfs_key_max_salt_uses Ns = Ns Sy 400000000 Po 4*10^8 Pc Pq ulong
+Maximum number of uses of a single salt value before generating a new one for
+encrypted datasets.
+The default value is also the maximum.
+.
+.It Sy zfs_object_mutex_size Ns = Ns Sy 64 Pq uint
+Size of the znode hashtable used for holds.
+.Pp
+Due to the need to hold locks on objects that may not exist yet, kernel mutexes
+are not created per-object and instead a hashtable is used where collisions
+will result in objects waiting when there is not actually contention on the
+same object.
+.
+.It Sy zfs_slow_io_events_per_second Ns = Ns Sy 20 Ns /s Pq int
+Rate limit delay and deadman zevents (which report slow I/Os) to this many per
+second.
+.
+.It Sy zfs_unflushed_max_mem_amt Ns = Ns Sy 1073741824 Ns B Po 1GB Pc Pq ulong
+Upper-bound limit for unflushed metadata changes to be held by the
+log spacemap in memory, in bytes.
+.
+.It Sy zfs_unflushed_max_mem_ppm Ns = Ns Sy 1000 Ns ppm Po 0.1% Pc Pq ulong
+Part of overall system memory that ZFS allows to be used
+for unflushed metadata changes by the log spacemap, in millionths.
+.
+.It Sy zfs_unflushed_log_block_max Ns = Ns Sy 131072 Po 128k Pc Pq ulong
+Describes the maximum number of log spacemap blocks allowed for each pool.
+The default value means that the space in all the log spacemaps
+can add up to no more than
+.Sy 131072
+blocks (which means
+.Em 16GB
+of logical space before compression and ditto blocks,
+assuming that blocksize is
+.Em 128kB ) .
+.Pp
+This tunable is important because it involves a trade-off between import
+time after an unclean export and the frequency of flushing metaslabs.
+The higher this number is, the more log blocks we allow when the pool is
+active which means that we flush metaslabs less often and thus decrease
+the number of I/Os for spacemap updates per TXG.
+At the same time though, that means that in the event of an unclean export,
+there will be more log spacemap blocks for us to read, inducing overhead
+in the import time of the pool.
+The lower the number, the amount of flushing increases, destroying log
+blocks quicker as they become obsolete faster, which leaves less blocks
+to be read during import time after a crash.
+.Pp
+Each log spacemap block existing during pool import leads to approximately
+one extra logical I/O issued.
+This is the reason why this tunable is exposed in terms of blocks rather
+than space used.
+.
+.It Sy zfs_unflushed_log_block_min Ns = Ns Sy 1000 Pq ulong
+If the number of metaslabs is small and our incoming rate is high,
+we could get into a situation that we are flushing all our metaslabs every TXG.
+Thus we always allow at least this many log blocks.
+.
+.It Sy zfs_unflushed_log_block_pct Ns = Ns Sy 400 Ns % Pq ulong
+Tunable used to determine the number of blocks that can be used for
+the spacemap log, expressed as a percentage of the total number of
+unflushed metaslabs in the pool.
+.
+.It Sy zfs_unflushed_log_txg_max Ns = Ns Sy 1000 Pq ulong
+Tunable limiting maximum time in TXGs any metaslab may remain unflushed.
+It effectively limits maximum number of unflushed per-TXG spacemap logs
+that need to be read after unclean pool export.
+.
+.It Sy zfs_unlink_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+When enabled, files will not be asynchronously removed from the list of pending
+unlinks and the space they consume will be leaked.
+Once this option has been disabled and the dataset is remounted,
+the pending unlinks will be processed and the freed space returned to the pool.
+This option is used by the test suite.
+.
+.It Sy zfs_delete_blocks Ns = Ns Sy 20480 Pq ulong
+This is the used to define a large file for the purposes of deletion.
+Files containing more than
+.Sy zfs_delete_blocks
+will be deleted asynchronously, while smaller files are deleted synchronously.
+Decreasing this value will reduce the time spent in an
+.Xr unlink 2
+system call, at the expense of a longer delay before the freed space is available.
+.
+.It Sy zfs_dirty_data_max Ns = Pq int
+Determines the dirty space limit in bytes.
+Once this limit is exceeded, new writes are halted until space frees up.
+This parameter takes precedence over
+.Sy zfs_dirty_data_max_percent .
+.No See Sx ZFS TRANSACTION DELAY .
+.Pp
+Defaults to
+.Sy physical_ram/10 ,
+capped at
+.Sy zfs_dirty_data_max_max .
+.
+.It Sy zfs_dirty_data_max_max Ns = Pq int
+Maximum allowable value of
+.Sy zfs_dirty_data_max ,
+expressed in bytes.
+This limit is only enforced at module load time, and will be ignored if
+.Sy zfs_dirty_data_max
+is later changed.
+This parameter takes precedence over
+.Sy zfs_dirty_data_max_max_percent .
+.No See Sx ZFS TRANSACTION DELAY .
+.Pp
+Defaults to
+.Sy physical_ram/4 ,
+.
+.It Sy zfs_dirty_data_max_max_percent Ns = Ns Sy 25 Ns % Pq int
+Maximum allowable value of
+.Sy zfs_dirty_data_max ,
+expressed as a percentage of physical RAM.
+This limit is only enforced at module load time, and will be ignored if
+.Sy zfs_dirty_data_max
+is later changed.
+The parameter
+.Sy zfs_dirty_data_max_max
+takes precedence over this one.
+.No See Sx ZFS TRANSACTION DELAY .
+.
+.It Sy zfs_dirty_data_max_percent Ns = Ns Sy 10 Ns % Pq int
+Determines the dirty space limit, expressed as a percentage of all memory.
+Once this limit is exceeded, new writes are halted until space frees up.
+The parameter
+.Sy zfs_dirty_data_max
+takes precedence over this one.
+.No See Sx ZFS TRANSACTION DELAY .
+.Pp
+Subject to
+.Sy zfs_dirty_data_max_max .
+.
+.It Sy zfs_dirty_data_sync_percent Ns = Ns Sy 20 Ns % Pq int
+Start syncing out a transaction group if there's at least this much dirty data
+.Pq as a percentage of Sy zfs_dirty_data_max .
+This should be less than
+.Sy zfs_vdev_async_write_active_min_dirty_percent .
+.
+.It Sy zfs_wrlog_data_max Ns = Pq int
+The upper limit of write-transaction zil log data size in bytes.
+Write operations are throttled when approaching the limit until log data is
+cleared out after transaction group sync.
+Because of some overhead, it should be set at least 2 times the size of
+.Sy zfs_dirty_data_max
+.No to prevent harming normal write throughput.
+It also should be smaller than the size of the slog device if slog is present.
+.Pp
+Defaults to
+.Sy zfs_dirty_data_max*2
+.
+.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint
+Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be
+preallocated for a file in order to guarantee that later writes will not
+run out of space.
+Instead,
+.Xr fallocate 2
+space preallocation only checks that sufficient space is currently available
+in the pool or the user's project quota allocation,
+and then creates a sparse file of the requested size.
+The requested space is multiplied by
+.Sy zfs_fallocate_reserve_percent
+to allow additional space for indirect blocks and other internal metadata.
+Setting this to
+.Sy 0
+disables support for
+.Xr fallocate 2
+and causes it to return
+.Sy EOPNOTSUPP .
+.
+.It Sy zfs_fletcher_4_impl Ns = Ns Sy fastest Pq string
+Select a fletcher 4 implementation.
+.Pp
+Supported selectors are:
+.Sy fastest , scalar , sse2 , ssse3 , avx2 , avx512f , avx512bw ,
+.No and Sy aarch64_neon .
+All except
+.Sy fastest No and Sy scalar
+require instruction set extensions to be available,
+and will only appear if ZFS detects that they are present at runtime.
+If multiple implementations of fletcher 4 are available, the
+.Sy fastest
+will be chosen using a micro benchmark.
+Selecting
+.Sy scalar
+results in the original CPU-based calculation being used.
+Selecting any option other than
+.Sy fastest No or Sy scalar
+results in vector instructions
+from the respective CPU instruction set being used.
+.
+.It Sy zfs_free_bpobj_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable/disable the processing of the free_bpobj object.
+.
+.It Sy zfs_async_block_max_blocks Ns = Ns Sy ULONG_MAX Po unlimited Pc Pq ulong
+Maximum number of blocks freed in a single TXG.
+.
+.It Sy zfs_max_async_dedup_frees Ns = Ns Sy 100000 Po 10^5 Pc Pq ulong
+Maximum number of dedup blocks freed in a single TXG.
+.
+.It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Pq ulong
+If nonzer, override record size calculation for
+.Nm zfs Cm send
+estimates.
+.
+.It Sy zfs_vdev_async_read_max_active Ns = Ns Sy 3 Pq int
+Maximum asynchronous read I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_async_read_min_active Ns = Ns Sy 1 Pq int
+Minimum asynchronous read I/O operation active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_async_write_active_max_dirty_percent Ns = Ns Sy 60 Ns % Pq int
+When the pool has more than this much dirty data, use
+.Sy zfs_vdev_async_write_max_active
+to limit active async writes.
+If the dirty data is between the minimum and maximum,
+the active I/O limit is linearly interpolated.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_async_write_active_min_dirty_percent Ns = Ns Sy 30 Ns % Pq int
+When the pool has less than this much dirty data, use
+.Sy zfs_vdev_async_write_min_active
+to limit active async writes.
+If the dirty data is between the minimum and maximum,
+the active I/O limit is linearly
+interpolated.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_async_write_max_active Ns = Ns Sy 30 Pq int
+Maximum asynchronous write I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_async_write_min_active Ns = Ns Sy 2 Pq int
+Minimum asynchronous write I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.Pp
+Lower values are associated with better latency on rotational media but poorer
+resilver performance.
+The default value of
+.Sy 2
+was chosen as a compromise.
+A value of
+.Sy 3
+has been shown to improve resilver performance further at a cost of
+further increasing latency.
+.
+.It Sy zfs_vdev_initializing_max_active Ns = Ns Sy 1 Pq int
+Maximum initializing I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_initializing_min_active Ns = Ns Sy 1 Pq int
+Minimum initializing I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_max_active Ns = Ns Sy 1000 Pq int
+The maximum number of I/O operations active to each device.
+Ideally, this will be at least the sum of each queue's
+.Sy max_active .
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_open_timeout_ms Ns = Ns Sy 1000 Pq uint
+Timeout value to wait before determining a device is missing
+during import.
+This is helpful for transient missing paths due
+to links being briefly removed and recreated in response to
+udev events.
+.
+.It Sy zfs_vdev_rebuild_max_active Ns = Ns Sy 3 Pq int
+Maximum sequential resilver I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_rebuild_min_active Ns = Ns Sy 1 Pq int
+Minimum sequential resilver I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_removal_max_active Ns = Ns Sy 2 Pq int
+Maximum removal I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_removal_min_active Ns = Ns Sy 1 Pq int
+Minimum removal I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_scrub_max_active Ns = Ns Sy 2 Pq int
+Maximum scrub I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_scrub_min_active Ns = Ns Sy 1 Pq int
+Minimum scrub I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_sync_read_max_active Ns = Ns Sy 10 Pq int
+Maximum synchronous read I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_sync_read_min_active Ns = Ns Sy 10 Pq int
+Minimum synchronous read I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_sync_write_max_active Ns = Ns Sy 10 Pq int
+Maximum synchronous write I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_sync_write_min_active Ns = Ns Sy 10 Pq int
+Minimum synchronous write I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_trim_max_active Ns = Ns Sy 2 Pq int
+Maximum trim/discard I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_trim_min_active Ns = Ns Sy 1 Pq int
+Minimum trim/discard I/O operations active to each device.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_nia_delay Ns = Ns Sy 5 Pq int
+For non-interactive I/O (scrub, resilver, removal, initialize and rebuild),
+the number of concurrently-active I/O operations is limited to
+.Sy zfs_*_min_active ,
+unless the vdev is "idle".
+When there are no interactive I/O operatinons active (synchronous or otherwise),
+and
+.Sy zfs_vdev_nia_delay
+operations have completed since the last interactive operation,
+then the vdev is considered to be "idle",
+and the number of concurrently-active non-interactive operations is increased to
+.Sy zfs_*_max_active .
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_nia_credit Ns = Ns Sy 5 Pq int
+Some HDDs tend to prioritize sequential I/O so strongly, that concurrent
+random I/O latency reaches several seconds.
+On some HDDs this happens even if sequential I/O operations
+are submitted one at a time, and so setting
+.Sy zfs_*_max_active Ns = Sy 1
+does not help.
+To prevent non-interactive I/O, like scrub,
+from monopolizing the device, no more than
+.Sy zfs_vdev_nia_credit operations can be sent
+while there are outstanding incomplete interactive operations.
+This enforced wait ensures the HDD services the interactive I/O
+within a reasonable amount of time.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_queue_depth_pct Ns = Ns Sy 1000 Ns % Pq int
+Maximum number of queued allocations per top-level vdev expressed as
+a percentage of
+.Sy zfs_vdev_async_write_max_active ,
+which allows the system to detect devices that are more capable
+of handling allocations and to allocate more blocks to those devices.
+This allows for dynamic allocation distribution when devices are imbalanced,
+as fuller devices will tend to be slower than empty devices.
+.Pp
+Also see
+.Sy zio_dva_throttle_enabled .
+.
+.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
+Time before expiring
+.Pa .zfs/snapshot .
+.
+.It Sy zfs_admin_snapshot Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Allow the creation, removal, or renaming of entries in the
+.Sy .zfs/snapshot
+directory to cause the creation, destruction, or renaming of snapshots.
+When enabled, this functionality works both locally and over NFS exports
+which have the
+.Em no_root_squash
+option set.
+.
+.It Sy zfs_flags Ns = Ns Sy 0 Pq int
+Set additional debugging flags.
+The following flags may be bitwise-ored together:
+.TS
+box;
+lbz r l l .
+	Value	Symbolic Name	Description
+_
+	1	ZFS_DEBUG_DPRINTF	Enable dprintf entries in the debug log.
+*	2	ZFS_DEBUG_DBUF_VERIFY	Enable extra dbuf verifications.
+*	4	ZFS_DEBUG_DNODE_VERIFY	Enable extra dnode verifications.
+	8	ZFS_DEBUG_SNAPNAMES	Enable snapshot name verification.
+	16	ZFS_DEBUG_MODIFY	Check for illegally modified ARC buffers.
+	64	ZFS_DEBUG_ZIO_FREE	Enable verification of block frees.
+	128	ZFS_DEBUG_HISTOGRAM_VERIFY	Enable extra spacemap histogram verifications.
+	256	ZFS_DEBUG_METASLAB_VERIFY	Verify space accounting on disk matches in-memory \fBrange_trees\fP.
+	512	ZFS_DEBUG_SET_ERROR	Enable \fBSET_ERROR\fP and dprintf entries in the debug log.
+	1024	ZFS_DEBUG_INDIRECT_REMAP	Verify split blocks created by device removal.
+	2048	ZFS_DEBUG_TRIM	Verify TRIM ranges are always within the allocatable range tree.
+	4096	ZFS_DEBUG_LOG_SPACEMAP	Verify that the log summary is consistent with the spacemap log
+			       and enable \fBzfs_dbgmsgs\fP for metaslab loading and flushing.
+.TE
+.Sy \& * No Requires debug build.
+.
+.It Sy zfs_btree_verify_intensity Ns = Ns Sy 0 Pq uint
+Enables btree verification.
+The following settings are culminative:
+.TS
+box;
+lbz r l l .
+	Value	Description
+
+	1	Verify height.
+	2	Verify pointers from children to parent.
+	3	Verify element counts.
+	4	Verify element order. (expensive)
+*	5	Verify unused memory is poisoned. (expensive)
+.TE
+.Sy \& * No Requires debug build.
+.
+.It Sy zfs_free_leak_on_eio Ns = Ns Sy 0 Ns | Ns 1 Pq int
+If destroy encounters an
+.Sy EIO
+while reading metadata (e.g. indirect blocks),
+space referenced by the missing metadata can not be freed.
+Normally this causes the background destroy to become "stalled",
+as it is unable to make forward progress.
+While in this stalled state, all remaining space to free
+from the error-encountering filesystem is "temporarily leaked".
+Set this flag to cause it to ignore the
+.Sy EIO ,
+permanently leak the space from indirect blocks that can not be read,
+and continue to free everything else that it can.
+.Pp
+The default "stalling" behavior is useful if the storage partially
+fails (i.e. some but not all I/O operations fail), and then later recovers.
+In this case, we will be able to continue pool operations while it is
+partially failed, and when it recovers, we can continue to free the
+space, with no leaks.
+Note, however, that this case is actually fairly rare.
+.Pp
+Typically pools either
+.Bl -enum -compact -offset 4n -width "1."
+.It
+fail completely (but perhaps temporarily,
+e.g. due to a top-level vdev going offline), or
+.It
+have localized, permanent errors (e.g. disk returns the wrong data
+due to bit flip or firmware bug).
+.El
+In the former case, this setting does not matter because the
+pool will be suspended and the sync thread will not be able to make
+forward progress regardless.
+In the latter, because the error is permanent, the best we can do
+is leak the minimum amount of space,
+which is what setting this flag will do.
+It is therefore reasonable for this flag to normally be set,
+but we chose the more conservative approach of not setting it,
+so that there is no possibility of
+leaking space in the "partial temporary" failure case.
+.
+.It Sy zfs_free_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq int
+During a
+.Nm zfs Cm destroy
+operation using the
+.Sy async_destroy
+feature,
+a minimum of this much time will be spent working on freeing blocks per TXG.
+.
+.It Sy zfs_obsolete_min_time_ms Ns = Ns Sy 500 Ns ms Pq int
+Similar to
+.Sy zfs_free_min_time_ms ,
+but for cleanup of old indirection records for removed vdevs.
+.
+.It Sy zfs_immediate_write_sz Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq long
+Largest data block to write to the ZIL.
+Larger blocks will be treated as if the dataset being written to had the
+.Sy logbias Ns = Ns Sy throughput
+property set.
+.
+.It Sy zfs_initialize_value Ns = Ns Sy 16045690984833335022 Po 0xDEADBEEFDEADBEEE Pc Pq ulong
+Pattern written to vdev free space by
+.Xr zpool-initialize 8 .
+.
+.It Sy zfs_initialize_chunk_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong
+Size of writes used by
+.Xr zpool-initialize 8 .
+This option is used by the test suite.
+.
+.It Sy zfs_livelist_max_entries Ns = Ns Sy 500000 Po 5*10^5 Pc Pq ulong
+The threshold size (in block pointers) at which we create a new sub-livelist.
+Larger sublists are more costly from a memory perspective but the fewer
+sublists there are, the lower the cost of insertion.
+.
+.It Sy zfs_livelist_min_percent_shared Ns = Ns Sy 75 Ns % Pq int
+If the amount of shared space between a snapshot and its clone drops below
+this threshold, the clone turns off the livelist and reverts to the old
+deletion method.
+This is in place because livelists no long give us a benefit
+once a clone has been overwritten enough.
+.
+.It Sy zfs_livelist_condense_new_alloc Ns = Ns Sy 0 Pq int
+Incremented each time an extra ALLOC blkptr is added to a livelist entry while
+it is being condensed.
+This option is used by the test suite to track race conditions.
+.
+.It Sy zfs_livelist_condense_sync_cancel Ns = Ns Sy 0 Pq int
+Incremented each time livelist condensing is canceled while in
+.Fn spa_livelist_condense_sync .
+This option is used by the test suite to track race conditions.
+.
+.It Sy zfs_livelist_condense_sync_pause Ns = Ns Sy 0 Ns | Ns 1 Pq int
+When set, the livelist condense process pauses indefinitely before
+executing the synctask -
+.Fn spa_livelist_condense_sync .
+This option is used by the test suite to trigger race conditions.
+.
+.It Sy zfs_livelist_condense_zthr_cancel Ns = Ns Sy 0 Pq int
+Incremented each time livelist condensing is canceled while in
+.Fn spa_livelist_condense_cb .
+This option is used by the test suite to track race conditions.
+.
+.It Sy zfs_livelist_condense_zthr_pause Ns = Ns Sy 0 Ns | Ns 1 Pq int
+When set, the livelist condense process pauses indefinitely before
+executing the open context condensing work in
+.Fn spa_livelist_condense_cb .
+This option is used by the test suite to trigger race conditions.
+.
+.It Sy zfs_lua_max_instrlimit Ns = Ns Sy 100000000 Po 10^8 Pc Pq ulong
+The maximum execution time limit that can be set for a ZFS channel program,
+specified as a number of Lua instructions.
+.
+.It Sy zfs_lua_max_memlimit Ns = Ns Sy 104857600 Po 100MB Pc Pq ulong
+The maximum memory limit that can be set for a ZFS channel program, specified
+in bytes.
+.
+.It Sy zfs_max_dataset_nesting Ns = Ns Sy 50 Pq int
+The maximum depth of nested datasets.
+This value can be tuned temporarily to
+fix existing datasets that exceed the predefined limit.
+.
+.It Sy zfs_max_log_walking Ns = Ns Sy 5 Pq ulong
+The number of past TXGs that the flushing algorithm of the log spacemap
+feature uses to estimate incoming log blocks.
+.
+.It Sy zfs_max_logsm_summary_length Ns = Ns Sy 10 Pq ulong
+Maximum number of rows allowed in the summary of the spacemap log.
+.
+.It Sy zfs_max_recordsize Ns = Ns Sy 1048576 Po 1MB Pc Pq int
+We currently support block sizes from
+.Em 512B No to Em 16MB .
+The benefits of larger blocks, and thus larger I/O,
+need to be weighed against the cost of COWing a giant block to modify one byte.
+Additionally, very large blocks can have an impact on I/O latency,
+and also potentially on the memory allocator.
+Therefore, we do not allow the recordsize to be set larger than this tunable.
+Larger blocks can be created by changing it,
+and pools with larger blocks can always be imported and used,
+regardless of this setting.
+.
+.It Sy zfs_allow_redacted_dataset_mount Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Allow datasets received with redacted send/receive to be mounted.
+Normally disabled because these datasets may be missing key data.
+.
+.It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq ulong
+Minimum number of metaslabs to flush per dirty TXG.
+.
+.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq int
+Allow metaslabs to keep their active state as long as their fragmentation
+percentage is no more than this value.
+An active metaslab that exceeds this threshold
+will no longer keep its active status allowing better metaslabs to be selected.
+.
+.It Sy zfs_mg_fragmentation_threshold Ns = Ns Sy 95 Ns % Pq int
+Metaslab groups are considered eligible for allocations if their
+fragmentation metric (measured as a percentage) is less than or equal to
+this value.
+If a metaslab group exceeds this threshold then it will be
+skipped unless all metaslab groups within the metaslab class have also
+crossed this threshold.
+.
+.It Sy zfs_mg_noalloc_threshold Ns = Ns Sy 0 Ns % Pq int
+Defines a threshold at which metaslab groups should be eligible for allocations.
+The value is expressed as a percentage of free space
+beyond which a metaslab group is always eligible for allocations.
+If a metaslab group's free space is less than or equal to the
+threshold, the allocator will avoid allocating to that group
+unless all groups in the pool have reached the threshold.
+Once all groups have reached the threshold, all groups are allowed to accept
+allocations.
+The default value of
+.Sy 0
+disables the feature and causes all metaslab groups to be eligible for allocations.
+.Pp
+This parameter allows one to deal with pools having heavily imbalanced
+vdevs such as would be the case when a new vdev has been added.
+Setting the threshold to a non-zero percentage will stop allocations
+from being made to vdevs that aren't filled to the specified percentage
+and allow lesser filled vdevs to acquire more allocations than they
+otherwise would under the old
+.Sy zfs_mg_alloc_failures
+facility.
+.
+.It Sy zfs_ddt_data_is_special Ns = Ns Sy 1 Ns | Ns 0 Pq int
+If enabled, ZFS will place DDT data into the special allocation class.
+.
+.It Sy zfs_user_indirect_is_special Ns = Ns Sy 1 Ns | Ns 0 Pq int
+If enabled, ZFS will place user data indirect blocks
+into the special allocation class.
+.
+.It Sy zfs_multihost_history Ns = Ns Sy 0 Pq int
+Historical statistics for this many latest multihost updates will be available in
+.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /multihost .
+.
+.It Sy zfs_multihost_interval Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq ulong
+Used to control the frequency of multihost writes which are performed when the
+.Sy multihost
+pool property is on.
+This is one of the factors used to determine the
+length of the activity check during import.
+.Pp
+The multihost write period is
+.Sy zfs_multihost_interval / leaf-vdevs .
+On average a multihost write will be issued for each leaf vdev
+every
+.Sy zfs_multihost_interval
+milliseconds.
+In practice, the observed period can vary with the I/O load
+and this observed value is the delay which is stored in the uberblock.
+.
+.It Sy zfs_multihost_import_intervals Ns = Ns Sy 20 Pq uint
+Used to control the duration of the activity test on import.
+Smaller values of
+.Sy zfs_multihost_import_intervals
+will reduce the import time but increase
+the risk of failing to detect an active pool.
+The total activity check time is never allowed to drop below one second.
+.Pp
+On import the activity check waits a minimum amount of time determined by
+.Sy zfs_multihost_interval * zfs_multihost_import_intervals ,
+or the same product computed on the host which last had the pool imported,
+whichever is greater.
+The activity check time may be further extended if the value of MMP
+delay found in the best uberblock indicates actual multihost updates happened
+at longer intervals than
+.Sy zfs_multihost_interval .
+A minimum of
+.Em 100ms
+is enforced.
+.Pp
+.Sy 0 No is equivalent to Sy 1 .
+.
+.It Sy zfs_multihost_fail_intervals Ns = Ns Sy 10 Pq uint
+Controls the behavior of the pool when multihost write failures or delays are
+detected.
+.Pp
+When
+.Sy 0 ,
+multihost write failures or delays are ignored.
+The failures will still be reported to the ZED which depending on
+its configuration may take action such as suspending the pool or offlining a
+device.
+.Pp
+Otherwise, the pool will be suspended if
+.Sy zfs_multihost_fail_intervals * zfs_multihost_interval
+milliseconds pass without a successful MMP write.
+This guarantees the activity test will see MMP writes if the pool is imported.
+.Sy 1 No is equivalent to Sy 2 ;
+this is necessary to prevent the pool from being suspended
+due to normal, small I/O latency variations.
+.
+.It Sy zfs_no_scrub_io Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Set to disable scrub I/O.
+This results in scrubs not actually scrubbing data and
+simply doing a metadata crawl of the pool instead.
+.
+.It Sy zfs_no_scrub_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Set to disable block prefetching for scrubs.
+.
+.It Sy zfs_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable cache flush operations on disks when writing.
+Setting this will cause pool corruption on power loss
+if a volatile out-of-order write cache is enabled.
+.
+.It Sy zfs_nopwrite_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Allow no-operation writes.
+The occurrence of nopwrites will further depend on other pool properties
+.Pq i.a. the checksumming and compression algorithms .
+.
+.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable forcing TXG sync to find holes.
+When enabled forces ZFS to sync data when
+.Sy SEEK_HOLE No or Sy SEEK_DATA
+flags are used allowing holes in a file to be accurately reported.
+When disabled holes will not be reported in recently dirtied files.
+.
+.It Sy zfs_pd_bytes_max Ns = Ns Sy 52428800 Ns B Po 50MB Pc Pq int
+The number of bytes which should be prefetched during a pool traversal, like
+.Nm zfs Cm send
+or other data crawling operations.
+.
+.It Sy zfs_traverse_indirect_prefetch_limit Ns = Ns Sy 32 Pq int
+The number of blocks pointed by indirect (non-L0) block which should be
+prefetched during a pool traversal, like
+.Nm zfs Cm send
+or other data crawling operations.
+.
+.It Sy zfs_per_txg_dirty_frees_percent Ns = Ns Sy 30 Ns % Pq ulong
+Control percentage of dirtied indirect blocks from frees allowed into one TXG.
+After this threshold is crossed, additional frees will wait until the next TXG.
+.Sy 0 No disables this throttle.
+.
+.It Sy zfs_prefetch_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable predictive prefetch.
+Note that it leaves "prescient" prefetch (for. e.g.\&
+.Nm zfs Cm send )
+intact.
+Unlike predictive prefetch, prescient prefetch never issues I/O
+that ends up not being needed, so it can't hurt performance.
+.
+.It Sy zfs_qat_checksum_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable QAT hardware acceleration for SHA256 checksums.
+May be unset after the ZFS modules have been loaded to initialize the QAT
+hardware as long as support is compiled in and the QAT driver is present.
+.
+.It Sy zfs_qat_compress_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable QAT hardware acceleration for gzip compression.
+May be unset after the ZFS modules have been loaded to initialize the QAT
+hardware as long as support is compiled in and the QAT driver is present.
+.
+.It Sy zfs_qat_encrypt_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable QAT hardware acceleration for AES-GCM encryption.
+May be unset after the ZFS modules have been loaded to initialize the QAT
+hardware as long as support is compiled in and the QAT driver is present.
+.
+.It Sy zfs_vnops_read_chunk_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq long
+Bytes to read per chunk.
+.
+.It Sy zfs_read_history Ns = Ns Sy 0 Pq int
+Historical statistics for this many latest reads will be available in
+.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /reads .
+.
+.It Sy zfs_read_history_hits Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Include cache hits in read history
+.
+.It Sy zfs_rebuild_max_segment Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong
+Maximum read segment size to issue when sequentially resilvering a
+top-level vdev.
+.
+.It Sy zfs_rebuild_scrub_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Automatically start a pool scrub when the last active sequential resilver
+completes in order to verify the checksums of all blocks which have been
+resilvered.
+This is enabled by default and strongly recommended.
+.
+.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong
+Maximum amount of I/O that can be concurrently issued for a sequential
+resilver per leaf device, given in bytes.
+.
+.It Sy zfs_reconstruct_indirect_combinations_max Ns = Ns Sy 4096 Pq int
+If an indirect split block contains more than this many possible unique
+combinations when being reconstructed, consider it too computationally
+expensive to check them all.
+Instead, try at most this many randomly selected
+combinations each time the block is accessed.
+This allows all segment copies to participate fairly
+in the reconstruction when all combinations
+cannot be checked and prevents repeated use of one bad copy.
+.
+.It Sy zfs_recover Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Set to attempt to recover from fatal errors.
+This should only be used as a last resort,
+as it typically results in leaked space, or worse.
+.
+.It Sy zfs_removal_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Ignore hard IO errors during device removal.
+When set, if a device encounters a hard IO error during the removal process
+the removal will not be cancelled.
+This can result in a normally recoverable block becoming permanently damaged
+and is hence not recommended.
+This should only be used as a last resort when the
+pool cannot be returned to a healthy state prior to removing the device.
+.
+.It Sy zfs_removal_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq int
+This is used by the test suite so that it can ensure that certain actions
+happen while in the middle of a removal.
+.
+.It Sy zfs_remove_max_segment Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int
+The largest contiguous segment that we will attempt to allocate when removing
+a device.
+If there is a performance problem with attempting to allocate large blocks,
+consider decreasing this.
+The default value is also the maximum.
+.
+.It Sy zfs_resilver_disable_defer Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Ignore the
+.Sy resilver_defer
+feature, causing an operation that would start a resilver to
+immediately restart the one in progress.
+.
+.It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3s Pc Pq int
+Resilvers are processed by the sync thread.
+While resilvering, it will spend at least this much time
+working on a resilver between TXG flushes.
+.
+.It Sy zfs_scan_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int
+If set, remove the DTL (dirty time list) upon completion of a pool scan (scrub),
+even if there were unrepairable errors.
+Intended to be used during pool repair or recovery to
+stop resilvering when the pool is next imported.
+.
+.It Sy zfs_scrub_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq int
+Scrubs are processed by the sync thread.
+While scrubbing, it will spend at least this much time
+working on a scrub between TXG flushes.
+.
+.It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2h Pc Pq int
+To preserve progress across reboots, the sequential scan algorithm periodically
+needs to stop metadata scanning and issue all the verification I/O to disk.
+The frequency of this flushing is determined by this tunable.
+.
+.It Sy zfs_scan_fill_weight Ns = Ns Sy 3 Pq int
+This tunable affects how scrub and resilver I/O segments are ordered.
+A higher number indicates that we care more about how filled in a segment is,
+while a lower number indicates we care more about the size of the extent without
+considering the gaps within a segment.
+This value is only tunable upon module insertion.
+Changing the value afterwards will have no affect on scrub or resilver performance.
+.
+.It Sy zfs_scan_issue_strategy Ns = Ns Sy 0 Pq int
+Determines the order that data will be verified while scrubbing or resilvering:
+.Bl -tag -compact -offset 4n -width "a"
+.It Sy 1
+Data will be verified as sequentially as possible, given the
+amount of memory reserved for scrubbing
+.Pq see Sy zfs_scan_mem_lim_fact .
+This may improve scrub performance if the pool's data is very fragmented.
+.It Sy 2
+The largest mostly-contiguous chunk of found data will be verified first.
+By deferring scrubbing of small segments, we may later find adjacent data
+to coalesce and increase the segment size.
+.It Sy 0
+.No Use strategy Sy 1 No during normal verification
+.No and strategy Sy 2 No while taking a checkpoint.
+.El
+.
+.It Sy zfs_scan_legacy Ns = Ns Sy 0 Ns | Ns 1 Pq int
+If unset, indicates that scrubs and resilvers will gather metadata in
+memory before issuing sequential I/O.
+Otherwise indicates that the legacy algorithm will be used,
+where I/O is initiated as soon as it is discovered.
+Unsetting will not affect scrubs or resilvers that are already in progress.
+.
+.It Sy zfs_scan_max_ext_gap Ns = Ns Sy 2097152 Ns B Po 2MB Pc Pq int
+Sets the largest gap in bytes between scrub/resilver I/O operations
+that will still be considered sequential for sorting purposes.
+Changing this value will not
+affect scrubs or resilvers that are already in progress.
+.
+.It Sy zfs_scan_mem_lim_fact Ns = Ns Sy 20 Ns ^-1 Pq int
+Maximum fraction of RAM used for I/O sorting by sequential scan algorithm.
+This tunable determines the hard limit for I/O sorting memory usage.
+When the hard limit is reached we stop scanning metadata and start issuing
+data verification I/O.
+This is done until we get below the soft limit.
+.
+.It Sy zfs_scan_mem_lim_soft_fact Ns = Ns Sy 20 Ns ^-1 Pq int
+The fraction of the hard limit used to determined the soft limit for I/O sorting
+by the sequential scan algorithm.
+When we cross this limit from below no action is taken.
+When we cross this limit from above it is because we are issuing verification I/O.
+In this case (unless the metadata scan is done) we stop issuing verification I/O
+and start scanning metadata again until we get to the hard limit.
+.
+.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+When reporting resilver throughput and estimated completion time use the
+performance observed over roughly the last
+.Sy zfs_scan_report_txgs
+TXGs.
+When set to zero performance is calculated over the time between checkpoints.
+.
+.It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Enforce tight memory limits on pool scans when a sequential scan is in progress.
+When disabled, the memory limit may be exceeded by fast disks.
+.
+.It Sy zfs_scan_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Freezes a scrub/resilver in progress without actually pausing it.
+Intended for testing/debugging.
+.
+.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
+Maximum amount of data that can be concurrently issued at once for scrubs and
+resilvers per leaf device, given in bytes.
+.
+.It Sy zfs_send_corrupt_data Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Allow sending of corrupt data (ignore read/checksum errors when sending).
+.
+.It Sy zfs_send_unmodified_spill_blocks Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Include unmodified spill blocks in the send stream.
+Under certain circumstances, previous versions of ZFS could incorrectly
+remove the spill block from an existing object.
+Including unmodified copies of the spill blocks creates a backwards-compatible
+stream which will recreate a spill block if it was incorrectly removed.
+.
+.It Sy zfs_send_no_prefetch_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int
+The fill fraction of the
+.Nm zfs Cm send
+internal queues.
+The fill fraction controls the timing with which internal threads are woken up.
+.
+.It Sy zfs_send_no_prefetch_queue_length Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int
+The maximum number of bytes allowed in
+.Nm zfs Cm send Ns 's
+internal queues.
+.
+.It Sy zfs_send_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int
+The fill fraction of the
+.Nm zfs Cm send
+prefetch queue.
+The fill fraction controls the timing with which internal threads are woken up.
+.
+.It Sy zfs_send_queue_length Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int
+The maximum number of bytes allowed that will be prefetched by
+.Nm zfs Cm send .
+This value must be at least twice the maximum block size in use.
+.
+.It Sy zfs_recv_queue_ff Ns = Ns Sy 20 Ns ^-1 Pq int
+The fill fraction of the
+.Nm zfs Cm receive
+queue.
+The fill fraction controls the timing with which internal threads are woken up.
+.
+.It Sy zfs_recv_queue_length Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int
+The maximum number of bytes allowed in the
+.Nm zfs Cm receive
+queue.
+This value must be at least twice the maximum block size in use.
+.
+.It Sy zfs_recv_write_batch_size Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int
+The maximum amount of data, in bytes, that
+.Nm zfs Cm receive
+will write in one DMU transaction.
+This is the uncompressed size, even when receiving a compressed send stream.
+This setting will not reduce the write size below a single block.
+Capped at a maximum of
+.Sy 32MB .
+.
+.It Sy zfs_override_estimate_recordsize Ns = Ns Sy 0 Ns | Ns 1 Pq ulong
+Setting this variable overrides the default logic for estimating block
+sizes when doing a
+.Nm zfs Cm send .
+The default heuristic is that the average block size
+will be the current recordsize.
+Override this value if most data in your dataset is not of that size
+and you require accurate zfs send size estimates.
+.
+.It Sy zfs_sync_pass_deferred_free Ns = Ns Sy 2 Pq int
+Flushing of data to disk is done in passes.
+Defer frees starting in this pass.
+.
+.It Sy zfs_spa_discard_memory_limit Ns = Ns Sy 16777216 Ns B Po 16MB Pc Pq int
+Maximum memory used for prefetching a checkpoint's space map on each
+vdev while discarding the checkpoint.
+.
+.It Sy zfs_special_class_metadata_reserve_pct Ns = Ns Sy 25 Ns % Pq int
+Only allow small data blocks to be allocated on the special and dedup vdev
+types when the available free space percentage on these vdevs exceeds this value.
+This ensures reserved space is available for pool metadata as the
+special vdevs approach capacity.
+.
+.It Sy zfs_sync_pass_dont_compress Ns = Ns Sy 8 Pq int
+Starting in this sync pass, disable compression (including of metadata).
+With the default setting, in practice, we don't have this many sync passes,
+so this has no effect.
+.Pp
+The original intent was that disabling compression would help the sync passes
+to converge.
+However, in practice, disabling compression increases
+the average number of sync passes; because when we turn compression off,
+many blocks' size will change, and thus we have to re-allocate
+(not overwrite) them.
+It also increases the number of
+.Em 128kB
+allocations (e.g. for indirect blocks and spacemaps)
+because these will not be compressed.
+The
+.Em 128kB
+allocations are especially detrimental to performance
+on highly fragmented systems, which may have very few free segments of this size,
+and may need to load new metaslabs to satisfy these allocations.
+.
+.It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq int
+Rewrite new block pointers starting in this pass.
+.
+.It Sy zfs_sync_taskq_batch_pct Ns = Ns Sy 75 Ns % Pq int
+This controls the number of threads used by
+.Sy dp_sync_taskq .
+The default value of
+.Sy 75%
+will create a maximum of one thread per CPU.
+.
+.It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128MB Pc Pq uint
+Maximum size of TRIM command.
+Larger ranges will be split into chunks no larger than this value before issuing.
+.
+.It Sy zfs_trim_extent_bytes_min Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq uint
+Minimum size of TRIM commands.
+TRIM ranges smaller than this will be skipped,
+unless they're part of a larger range which was chunked.
+This is done because it's common for these small TRIMs
+to negatively impact overall performance.
+.
+.It Sy zfs_trim_metaslab_skip Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Skip uninitialized metaslabs during the TRIM process.
+This option is useful for pools constructed from large thinly-provisioned devices
+where TRIM operations are slow.
+As a pool ages, an increasing fraction of the pool's metaslabs
+will be initialized, progressively degrading the usefulness of this option.
+This setting is stored when starting a manual TRIM and will
+persist for the duration of the requested TRIM.
+.
+.It Sy zfs_trim_queue_limit Ns = Ns Sy 10 Pq uint
+Maximum number of queued TRIMs outstanding per leaf vdev.
+The number of concurrent TRIM commands issued to the device is controlled by
+.Sy zfs_vdev_trim_min_active No and Sy zfs_vdev_trim_max_active .
+.
+.It Sy zfs_trim_txg_batch Ns = Ns Sy 32 Pq uint
+The number of transaction groups' worth of frees which should be aggregated
+before TRIM operations are issued to the device.
+This setting represents a trade-off between issuing larger,
+more efficient TRIM operations and the delay
+before the recently trimmed space is available for use by the device.
+.Pp
+Increasing this value will allow frees to be aggregated for a longer time.
+This will result is larger TRIM operations and potentially increased memory usage.
+Decreasing this value will have the opposite effect.
+The default of
+.Sy 32
+was determined to be a reasonable compromise.
+.
+.It Sy zfs_txg_history Ns = Ns Sy 0 Pq int
+Historical statistics for this many latest TXGs will be available in
+.Pa /proc/spl/kstat/zfs/ Ns Ao Ar pool Ac Ns Pa /TXGs .
+.
+.It Sy zfs_txg_timeout Ns = Ns Sy 5 Ns s Pq int
+Flush dirty data to disk at least every this many seconds (maximum TXG duration).
+.
+.It Sy zfs_vdev_aggregate_trim Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Allow TRIM I/Os to be aggregated.
+This is normally not helpful because the extents to be trimmed
+will have been already been aggregated by the metaslab.
+This option is provided for debugging and performance analysis.
+.
+.It Sy zfs_vdev_aggregation_limit Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int
+Max vdev I/O aggregation size.
+.
+.It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq int
+Max vdev I/O aggregation size for non-rotating media.
+.
+.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64kB Pc Pq int
+Shift size to inflate reads to.
+.
+.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16kB Pc Pq int
+Inflate reads smaller than this value to meet the
+.Sy zfs_vdev_cache_bshift
+size
+.Pq default Sy 64kB .
+.
+.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq int
+Total size of the per-disk cache in bytes.
+.Pp
+Currently this feature is disabled, as it has been found to not be helpful
+for performance and in some cases harmful.
+.
+.It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int
+A number by which the balancing algorithm increments the load calculation for
+the purpose of selecting the least busy mirror member when an I/O operation
+immediately follows its predecessor on rotational vdevs
+for the purpose of making decisions based on load.
+.
+.It Sy zfs_vdev_mirror_rotating_seek_inc Ns = Ns Sy 5 Pq int
+A number by which the balancing algorithm increments the load calculation for
+the purpose of selecting the least busy mirror member when an I/O operation
+lacks locality as defined by
+.Sy zfs_vdev_mirror_rotating_seek_offset .
+Operations within this that are not immediately following the previous operation
+are incremented by half.
+.
+.It Sy zfs_vdev_mirror_rotating_seek_offset Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq int
+The maximum distance for the last queued I/O operation in which
+the balancing algorithm considers an operation to have locality.
+.No See Sx ZFS I/O SCHEDULER .
+.
+.It Sy zfs_vdev_mirror_non_rotating_inc Ns = Ns Sy 0 Pq int
+A number by which the balancing algorithm increments the load calculation for
+the purpose of selecting the least busy mirror member on non-rotational vdevs
+when I/O operations do not immediately follow one another.
+.
+.It Sy zfs_vdev_mirror_non_rotating_seek_inc Ns = Ns Sy 1 Pq int
+A number by which the balancing algorithm increments the load calculation for
+the purpose of selecting the least busy mirror member when an I/O operation lacks
+locality as defined by the
+.Sy zfs_vdev_mirror_rotating_seek_offset .
+Operations within this that are not immediately following the previous operation
+are incremented by half.
+.
+.It Sy zfs_vdev_read_gap_limit Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int
+Aggregate read I/O operations if the on-disk gap between them is within this
+threshold.
+.
+.It Sy zfs_vdev_write_gap_limit Ns = Ns Sy 4096 Ns B Po 4kB Pc Pq int
+Aggregate write I/O operations if the on-disk gap between them is within this
+threshold.
+.
+.It Sy zfs_vdev_raidz_impl Ns = Ns Sy fastest Pq string
+Select the raidz parity implementation to use.
+.Pp
+Variants that don't depend on CPU-specific features
+may be selected on module load, as they are supported on all systems.
+The remaining options may only be set after the module is loaded,
+as they are available only if the implementations are compiled in
+and supported on the running system.
+.Pp
+Once the module is loaded,
+.Pa /sys/module/zfs/parameters/zfs_vdev_raidz_impl
+will show the available options,
+with the currently selected one enclosed in square brackets.
+.Pp
+.TS
+lb l l .
+fastest	selected by built-in benchmark
+original	original implementation
+scalar	scalar implementation
+sse2	SSE2 instruction set	64-bit x86
+ssse3	SSSE3 instruction set	64-bit x86
+avx2	AVX2 instruction set	64-bit x86
+avx512f	AVX512F instruction set	64-bit x86
+avx512bw	AVX512F & AVX512BW instruction sets	64-bit x86
+aarch64_neon	NEON	Aarch64/64-bit ARMv8
+aarch64_neonx2	NEON with more unrolling	Aarch64/64-bit ARMv8
+powerpc_altivec	Altivec	PowerPC
+.TE
+.
+.It Sy zfs_vdev_scheduler Pq charp
+.Sy DEPRECATED .
+Prints warning to kernel log for compatibility.
+.
+.It Sy zfs_zevent_len_max Ns = Ns Sy 512 Pq int
+Max event queue length.
+Events in the queue can be viewed with
+.Xr zpool-events 8 .
+.
+.It Sy zfs_zevent_retain_max Ns = Ns Sy 2000 Pq int
+Maximum recent zevent records to retain for duplicate checking.
+Setting this to
+.Sy 0
+disables duplicate detection.
+.
+.It Sy zfs_zevent_retain_expire_secs Ns = Ns Sy 900 Ns s Po 15min Pc Pq int
+Lifespan for a recent ereport that was retained for duplicate checking.
+.
+.It Sy zfs_zil_clean_taskq_maxalloc Ns = Ns Sy 1048576 Pq int
+The maximum number of taskq entries that are allowed to be cached.
+When this limit is exceeded transaction records (itxs)
+will be cleaned synchronously.
+.
+.It Sy zfs_zil_clean_taskq_minalloc Ns = Ns Sy 1024 Pq int
+The number of taskq entries that are pre-populated when the taskq is first
+created and are immediately available for use.
+.
+.It Sy zfs_zil_clean_taskq_nthr_pct Ns = Ns Sy 100 Ns % Pq int
+This controls the number of threads used by
+.Sy dp_zil_clean_taskq .
+The default value of
+.Sy 100%
+will create a maximum of one thread per cpu.
+.
+.It Sy zil_maxblocksize Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq int
+This sets the maximum block size used by the ZIL.
+On very fragmented pools, lowering this
+.Pq typically to Sy 36kB
+can improve performance.
+.
+.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
+This sets the minimum delay in nanoseconds ZIL care to delay block commit,
+waiting for more records.
+If ZIL writes are too fast, kernel may not be able sleep for so short interval,
+increasing log latency above allowed by
+.Sy zfs_commit_timeout_pct .
+.
+.It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable the cache flush commands that are normally sent to disk by
+the ZIL after an LWB write has completed.
+Setting this will cause ZIL corruption on power loss
+if a volatile out-of-order write cache is enabled.
+.
+.It Sy zil_replay_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Disable intent logging replay.
+Can be disabled for recovery from corrupted ZIL.
+.
+.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768kB Pc Pq ulong
+Limit SLOG write size per commit executed with synchronous priority.
+Any writes above that will be executed with lower (asynchronous) priority
+to limit potential SLOG device abuse by single active ZIL writer.
+.
+.It Sy zfs_embedded_slog_min_ms Ns = Ns Sy 64  Pq int
+Usually, one metaslab from each normal-class vdev is dedicated for use by
+the ZIL to log synchronous writes.
+However, if there are fewer than
+.Sy zfs_embedded_slog_min_ms
+metaslabs in the vdev, this functionality is disabled.
+This ensures that we don't set aside an unreasonable amount of space for the ZIL.
+.
+.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int
+If non-zero, the zio deadman will produce debugging messages
+.Pq see Sy zfs_dbgmsg_enable
+for all zios, rather than only for leaf zios possessing a vdev.
+This is meant to be used by developers to gain
+diagnostic information for hang conditions which don't involve a mutex
+or other locking primitive: typically conditions in which a thread in
+the zio pipeline is looping indefinitely.
+.
+.It Sy zio_slow_io_ms Ns = Ns Sy 30000 Ns ms Po 30s Pc Pq int
+When an I/O operation takes more than this much time to complete,
+it's marked as slow.
+Each slow operation causes a delay zevent.
+Slow I/O counters can be seen with
+.Nm zpool Cm status Fl s .
+.
+.It Sy zio_dva_throttle_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Throttle block allocations in the I/O pipeline.
+This allows for dynamic allocation distribution when devices are imbalanced.
+When enabled, the maximum number of pending allocations per top-level vdev
+is limited by
+.Sy zfs_vdev_queue_depth_pct .
+.
+.It Sy zio_requeue_io_start_cut_in_line Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Prioritize requeued I/O.
+.
+.It Sy zio_taskq_batch_pct Ns = Ns Sy 80 Ns % Pq uint
+Percentage of online CPUs which will run a worker thread for I/O.
+These workers are responsible for I/O work such as compression and
+checksum calculations.
+Fractional number of CPUs will be rounded down.
+.Pp
+The default value of
+.Sy 80%
+was chosen to avoid using all CPUs which can result in
+latency issues and inconsistent application performance,
+especially when slower compression and/or checksumming is enabled.
+.
+.It Sy zio_taskq_batch_tpq Ns = Ns Sy 0 Pq uint
+Number of worker threads per taskq.
+Lower values improve I/O ordering and CPU utilization,
+while higher reduces lock contention.
+.Pp
+If
+.Sy 0 ,
+generate a system-dependent value close to 6 threads per taskq.
+.
+.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Do not create zvol device nodes.
+This may slightly improve startup time on
+systems with a very large number of zvols.
+.
+.It Sy zvol_major Ns = Ns Sy 230 Pq uint
+Major number for zvol block devices.
+.
+.It Sy zvol_max_discard_blocks Ns = Ns Sy 16384 Pq ulong
+Discard (TRIM) operations done on zvols will be done in batches of this
+many blocks, where block size is determined by the
+.Sy volblocksize
+property of a zvol.
+.
+.It Sy zvol_prefetch_bytes Ns = Ns Sy 131072 Ns B Po 128kB Pc Pq uint
+When adding a zvol to the system, prefetch this many bytes
+from the start and end of the volume.
+Prefetching these regions of the volume is desirable,
+because they are likely to be accessed immediately by
+.Xr blkid 8
+or the kernel partitioner.
+.
+.It Sy zvol_request_sync Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+When processing I/O requests for a zvol, submit them synchronously.
+This effectively limits the queue depth to
+.Em 1
+for each I/O submitter.
+When unset, requests are handled asynchronously by a thread pool.
+The number of requests which can be handled concurrently is controlled by
+.Sy zvol_threads .
+.
+.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
+Max number of threads which can handle zvol I/O requests concurrently.
+.
+.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
+Defines zvol block devices behaviour when
+.Sy volmode Ns = Ns Sy default :
+.Bl -tag -compact -offset 4n -width "a"
+.It Sy 1
+.No equivalent to Sy full
+.It Sy 2
+.No equivalent to Sy dev
+.It Sy 3
+.No equivalent to Sy none
+.El
+.El
+.
+.Sh ZFS I/O SCHEDULER
+ZFS issues I/O operations to leaf vdevs to satisfy and complete I/O operations.
+The scheduler determines when and in what order those operations are issued.
+The scheduler divides operations into five I/O classes,
+prioritized in the following order: sync read, sync write, async read,
+async write, and scrub/resilver.
+Each queue defines the minimum and maximum number of concurrent operations
+that may be issued to the device.
+In addition, the device has an aggregate maximum,
+.Sy zfs_vdev_max_active .
+Note that the sum of the per-queue minima must not exceed the aggregate maximum.
+If the sum of the per-queue maxima exceeds the aggregate maximum,
+then the number of active operations may reach
+.Sy zfs_vdev_max_active ,
+in which case no further operations will be issued,
+regardless of whether all per-queue minima have been met.
+.Pp
+For many physical devices, throughput increases with the number of
+concurrent operations, but latency typically suffers.
+Furthermore, physical devices typically have a limit
+at which more concurrent operations have no
+effect on throughput or can actually cause it to decrease.
+.Pp
+The scheduler selects the next operation to issue by first looking for an
+I/O class whose minimum has not been satisfied.
+Once all are satisfied and the aggregate maximum has not been hit,
+the scheduler looks for classes whose maximum has not been satisfied.
+Iteration through the I/O classes is done in the order specified above.
+No further operations are issued
+if the aggregate maximum number of concurrent operations has been hit,
+or if there are no operations queued for an I/O class that has not hit its maximum.
+Every time an I/O operation is queued or an operation completes,
+the scheduler looks for new operations to issue.
+.Pp
+In general, smaller
+.Sy max_active Ns s
+will lead to lower latency of synchronous operations.
+Larger
+.Sy max_active Ns s
+may lead to higher overall throughput, depending on underlying storage.
+.Pp
+The ratio of the queues'
+.Sy max_active Ns s
+determines the balance of performance between reads, writes, and scrubs.
+For example, increasing
+.Sy zfs_vdev_scrub_max_active
+will cause the scrub or resilver to complete more quickly,
+but reads and writes to have higher latency and lower throughput.
+.Pp
+All I/O classes have a fixed maximum number of outstanding operations,
+except for the async write class.
+Asynchronous writes represent the data that is committed to stable storage
+during the syncing stage for transaction groups.
+Transaction groups enter the syncing state periodically,
+so the number of queued async writes will quickly burst up
+and then bleed down to zero.
+Rather than servicing them as quickly as possible,
+the I/O scheduler changes the maximum number of active async write operations
+according to the amount of dirty data in the pool.
+Since both throughput and latency typically increase with the number of
+concurrent operations issued to physical devices, reducing the
+burstiness in the number of concurrent operations also stabilizes the
+response time of operations from other – and in particular synchronous – queues.
+In broad strokes, the I/O scheduler will issue more concurrent operations
+from the async write queue as there's more dirty data in the pool.
+.
+.Ss Async Writes
+The number of concurrent operations issued for the async write I/O class
+follows a piece-wise linear function defined by a few adjustable points:
+.Bd -literal
+       |              o---------| <-- \fBzfs_vdev_async_write_max_active\fP
+  ^    |             /^         |
+  |    |            / |         |
+active |           /  |         |
+ I/O   |          /   |         |
+count  |         /    |         |
+       |        /     |         |
+       |-------o      |         | <-- \fBzfs_vdev_async_write_min_active\fP
+      0|_______^______|_________|
+       0%      |      |       100% of \fBzfs_dirty_data_max\fP
+               |      |
+               |      `-- \fBzfs_vdev_async_write_active_max_dirty_percent\fP
+               `--------- \fBzfs_vdev_async_write_active_min_dirty_percent\fP
+.Ed
+.Pp
+Until the amount of dirty data exceeds a minimum percentage of the dirty
+data allowed in the pool, the I/O scheduler will limit the number of
+concurrent operations to the minimum.
+As that threshold is crossed, the number of concurrent operations issued
+increases linearly to the maximum at the specified maximum percentage
+of the dirty data allowed in the pool.
+.Pp
+Ideally, the amount of dirty data on a busy pool will stay in the sloped
+part of the function between
+.Sy zfs_vdev_async_write_active_min_dirty_percent
+and
+.Sy zfs_vdev_async_write_active_max_dirty_percent .
+If it exceeds the maximum percentage,
+this indicates that the rate of incoming data is
+greater than the rate that the backend storage can handle.
+In this case, we must further throttle incoming writes,
+as described in the next section.
+.
+.Sh ZFS TRANSACTION DELAY
+We delay transactions when we've determined that the backend storage
+isn't able to accommodate the rate of incoming writes.
+.Pp
+If there is already a transaction waiting, we delay relative to when
+that transaction will finish waiting.
+This way the calculated delay time
+is independent of the number of threads concurrently executing transactions.
+.Pp
+If we are the only waiter, wait relative to when the transaction started,
+rather than the current time.
+This credits the transaction for "time already served",
+e.g. reading indirect blocks.
+.Pp
+The minimum time for a transaction to take is calculated as
+.Dl min_time = min( Ns Sy zfs_delay_scale No * (dirty - min) / (max - dirty), 100ms)
+.Pp
+The delay has two degrees of freedom that can be adjusted via tunables.
+The percentage of dirty data at which we start to delay is defined by
+.Sy zfs_delay_min_dirty_percent .
+This should typically be at or above
+.Sy zfs_vdev_async_write_active_max_dirty_percent ,
+so that we only start to delay after writing at full speed
+has failed to keep up with the incoming write rate.
+The scale of the curve is defined by
+.Sy zfs_delay_scale .
+Roughly speaking, this variable determines the amount of delay at the midpoint of the curve.
+.Bd -literal
+delay
+ 10ms +-------------------------------------------------------------*+
+      |                                                             *|
+  9ms +                                                             *+
+      |                                                             *|
+  8ms +                                                             *+
+      |                                                            * |
+  7ms +                                                            * +
+      |                                                            * |
+  6ms +                                                            * +
+      |                                                            * |
+  5ms +                                                           *  +
+      |                                                           *  |
+  4ms +                                                           *  +
+      |                                                           *  |
+  3ms +                                                          *   +
+      |                                                          *   |
+  2ms +                                              (midpoint) *    +
+      |                                                  |    **     |
+  1ms +                                                  v ***       +
+      |             \fBzfs_delay_scale\fP ---------->     ********         |
+    0 +-------------------------------------*********----------------+
+      0%                    <- \fBzfs_dirty_data_max\fP ->               100%
+.Ed
+.Pp
+Note, that since the delay is added to the outstanding time remaining on the
+most recent transaction it's effectively the inverse of IOPS.
+Here, the midpoint of
+.Em 500us
+translates to
+.Em 2000 IOPS .
+The shape of the curve
+was chosen such that small changes in the amount of accumulated dirty data
+in the first three quarters of the curve yield relatively small differences
+in the amount of delay.
+.Pp
+The effects can be easier to understand when the amount of delay is
+represented on a logarithmic scale:
+.Bd -literal
+delay
+100ms +-------------------------------------------------------------++
+      +                                                              +
+      |                                                              |
+      +                                                             *+
+ 10ms +                                                             *+
+      +                                                           ** +
+      |                                              (midpoint)  **  |
+      +                                                  |     **    +
+  1ms +                                                  v ****      +
+      +             \fBzfs_delay_scale\fP ---------->        *****         +
+      |                                             ****             |
+      +                                          ****                +
+100us +                                        **                    +
+      +                                       *                      +
+      |                                      *                       |
+      +                                     *                        +
+ 10us +                                     *                        +
+      +                                                              +
+      |                                                              |
+      +                                                              +
+      +--------------------------------------------------------------+
+      0%                    <- \fBzfs_dirty_data_max\fP ->               100%
+.Ed
+.Pp
+Note here that only as the amount of dirty data approaches its limit does
+the delay start to increase rapidly.
+The goal of a properly tuned system should be to keep the amount of dirty data
+out of that range by first ensuring that the appropriate limits are set
+for the I/O scheduler to reach optimal throughput on the back-end storage,
+and then by changing the value of
+.Sy zfs_delay_scale
+to increase the steepness of the curve.

diff --git a/zfs/man/man5/Makefile.am b/zfs/man/man5/Makefile.am
deleted file mode 100644
index 1c0683e..0000000
--- a/zfs/man/man5/Makefile.am
+++ /dev/null

@@ -1,9 +0,0 @@
-dist_man_MANS = \
-	vdev_id.conf.5 \
-	zpool-features.5 \
-	spl-module-parameters.5 \
-	zfs-module-parameters.5 \
-	zfs-events.5
-
-install-data-local:
-	$(INSTALL) -d -m 0755 "$(DESTDIR)$(mandir)/man5"

diff --git a/zfs/man/man5/spl-module-parameters.5 b/zfs/man/man5/spl-module-parameters.5
deleted file mode 100644
index 30d9fc7..0000000
--- a/zfs/man/man5/spl-module-parameters.5
+++ /dev/null

@@ -1,357 +0,0 @@
-'\" te
-.\"
-.\" Copyright 2013 Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
-.\"
-.TH SPL-MODULE-PARAMETERS 5 "Oct 28, 2017"
-.SH NAME
-spl\-module\-parameters \- SPL module parameters
-.SH DESCRIPTION
-.sp
-.LP
-Description of the different parameters to the SPL module.
-
-.SS "Module parameters"
-.sp
-.LP
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_expire\fR (uint)
-.ad
-.RS 12n
-Cache expiration is part of default Illumos cache behavior.  The idea is
-that objects in magazines which have not been recently accessed should be
-returned to the slabs periodically.  This is known as cache aging and
-when enabled objects will be typically returned after 15 seconds.
-.sp
-On the other hand Linux slabs are designed to never move objects back to
-the slabs unless there is memory pressure.  This is possible because under
-Linux the cache will be notified when memory is low and objects can be
-released.
-.sp
-By default only the Linux method is enabled.  It has been shown to improve
-responsiveness on low memory systems and not negatively impact the performance
-of systems with more memory.  This policy may be changed by setting the
-\fBspl_kmem_cache_expire\fR bit mask as follows, both policies may be enabled
-concurrently.
-.sp
-0x01 - Aging (Illumos), 0x02 - Low memory (Linux)
-.sp
-Default value: \fB0x02\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_kmem_threads\fR (uint)
-.ad
-.RS 12n
-The number of threads created for the spl_kmem_cache task queue.  This task
-queue is responsible for allocating new slabs for use by the kmem caches.
-For the majority of systems and workloads only a small number of threads are
-required.
-.sp
-Default value: \fB4\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_reclaim\fR (uint)
-.ad
-.RS 12n
-When this is set it prevents Linux from being able to rapidly reclaim all the
-memory held by the kmem caches.  This may be useful in circumstances where
-it's preferable that Linux reclaim memory from some other subsystem first.
-Setting this will increase the likelihood out of memory events on a memory
-constrained system.
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_obj_per_slab\fR (uint)
-.ad
-.RS 12n
-The preferred number of objects per slab in the cache.   In general, a larger
-value will increase the caches memory footprint while decreasing the time
-required to perform an allocation.  Conversely, a smaller value will minimize
-the footprint and improve cache reclaim time but individual allocations may
-take longer.
-.sp
-Default value: \fB8\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_obj_per_slab_min\fR (uint)
-.ad
-.RS 12n
-The minimum number of objects allowed per slab.  Normally slabs will contain
-\fBspl_kmem_cache_obj_per_slab\fR objects but for caches that contain very
-large objects it's desirable to only have a few, or even just one, object per
-slab.
-.sp
-Default value: \fB1\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_max_size\fR (uint)
-.ad
-.RS 12n
-The maximum size of a kmem cache slab in MiB.  This effectively limits
-the maximum cache object size to \fBspl_kmem_cache_max_size\fR /
-\fBspl_kmem_cache_obj_per_slab\fR.  Caches may not be created with
-object sized larger than this limit.
-.sp
-Default value: \fB32 (64-bit) or 4 (32-bit)\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_slab_limit\fR (uint)
-.ad
-.RS 12n
-For small objects the Linux slab allocator should be used to make the most
-efficient use of the memory.  However, large objects are not supported by
-the Linux slab and therefore the SPL implementation is preferred.  This
-value is used to determine the cutoff between a small and large object.
-.sp
-Objects of \fBspl_kmem_cache_slab_limit\fR or smaller will be allocated
-using the Linux slab allocator, large objects use the SPL allocator.  A
-cutoff of 16K was determined to be optimal for architectures using 4K pages.
-.sp
-Default value: \fB16,384\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_kmem_limit\fR (uint)
-.ad
-.RS 12n
-Depending on the size of a cache object it may be backed by kmalloc()'d
-or vmalloc()'d memory.  This is because the size of the required allocation
-greatly impacts the best way to allocate the memory.
-.sp
-When objects are small and only a small number of memory pages need to be
-allocated, ideally just one, then kmalloc() is very efficient.  However,
-when allocating multiple pages with kmalloc() it gets increasingly expensive
-because the pages must be physically contiguous.
-.sp
-For this reason we shift to vmalloc() for slabs of large objects which
-which removes the need for contiguous pages.  We cannot use vmalloc() in
-all cases because there is significant locking overhead involved.  This
-function takes a single global lock over the entire virtual address range
-which serializes all allocations.  Using slightly different allocation
-functions for small and large objects allows us to handle a wide range of
-object sizes.
-.sp
-The \fBspl_kmem_cache_kmem_limit\fR value is used to determine this cutoff
-size.  One quarter the PAGE_SIZE is used as the default value because
-\fBspl_kmem_cache_obj_per_slab\fR defaults to 16.  This means that at
-most we will need to allocate four contiguous pages.
-.sp
-Default value: \fBPAGE_SIZE/4\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_alloc_warn\fR (uint)
-.ad
-.RS 12n
-As a general rule kmem_alloc() allocations should be small, preferably
-just a few pages since they must by physically contiguous.  Therefore, a
-rate limited warning will be printed to the console for any kmem_alloc()
-which exceeds a reasonable threshold.
-.sp
-The default warning threshold is set to eight pages but capped at 32K to
-accommodate systems using large pages.  This value was selected to be small
-enough to ensure the largest allocations are quickly noticed and fixed.
-But large enough to avoid logging any warnings when a allocation size is
-larger than optimal but not a serious concern.  Since this value is tunable,
-developers are encouraged to set it lower when testing so any new largish
-allocations are quickly caught.  These warnings may be disabled by setting
-the threshold to zero.
-.sp
-Default value: \fB32,768\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_alloc_max\fR (uint)
-.ad
-.RS 12n
-Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
-Allocations which are marginally smaller than this limit may succeed but
-should still be avoided due to the expense of locating a contiguous range
-of free pages.  Therefore, a maximum kmem size with reasonable safely
-margin of 4x is set.  Kmem_alloc() allocations larger than this maximum
-will quickly fail.  Vmem_alloc() allocations less than or equal to this
-value will use kmalloc(), but shift to vmalloc() when exceeding this value.
-.sp
-Default value: \fBKMALLOC_MAX_SIZE/4\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_kmem_cache_magazine_size\fR (uint)
-.ad
-.RS 12n
-Cache magazines are an optimization designed to minimize the cost of
-allocating memory.  They do this by keeping a per-cpu cache of recently
-freed objects, which can then be reallocated without taking a lock. This
-can improve performance on highly contended caches.  However, because
-objects in magazines will prevent otherwise empty slabs from being
-immediately released this may not be ideal for low memory machines.
-.sp
-For this reason \fBspl_kmem_cache_magazine_size\fR can be used to set a
-maximum magazine size.  When this value is set to 0 the magazine size will
-be automatically determined based on the object size.  Otherwise magazines
-will be limited to 2-256 objects per magazine (i.e per cpu).  Magazines
-may never be entirely disabled in this implementation.
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_hostid\fR (ulong)
-.ad
-.RS 12n
-The system hostid, when set this can be used to uniquely identify a system.
-By default this value is set to zero which indicates the hostid is disabled.
-It can be explicitly enabled by placing a unique non-zero value in
-\fB/etc/hostid/\fR.
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_hostid_path\fR (charp)
-.ad
-.RS 12n
-The expected path to locate the system hostid when specified.  This value
-may be overridden for non-standard configurations.
-.sp
-Default value: \fB/etc/hostid\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_panic_halt\fR (uint)
-.ad
-.RS 12n
-Cause a kernel panic on assertion failures. When not enabled, the thread is 
-halted to facilitate further debugging.
-.sp
-Set to a non-zero value to enable.
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_taskq_kick\fR (uint)
-.ad
-.RS 12n
-Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will
-scan all the taskqs. If any of them have a pending task more than 5 seconds old,
-it will kick it to spawn more threads. This can be used if you find a rare
-deadlock occurs because one or more taskqs didn't spawn a thread when it should.
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_taskq_thread_bind\fR (int)
-.ad
-.RS 12n
-Bind taskq threads to specific CPUs.  When enabled all taskq threads will
-be distributed evenly  over the available CPUs.  By default, this behavior
-is disabled to allow the Linux scheduler the maximum flexibility to determine
-where a thread should run.
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_taskq_thread_dynamic\fR (int)
-.ad
-.RS 12n
-Allow dynamic taskqs.  When enabled taskqs which set the TASKQ_DYNAMIC flag
-will by default create only a single thread.  New threads will be created on
-demand up to a maximum allowed number to facilitate the completion of
-outstanding tasks.  Threads which are no longer needed will be promptly
-destroyed.  By default this behavior is enabled but it can be disabled to
-aid performance analysis or troubleshooting.
-.sp
-Default value: \fB1\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_taskq_thread_priority\fR (int)
-.ad
-.RS 12n
-Allow newly created taskq threads to set a non-default scheduler priority.
-When enabled the priority specified when a taskq is created will be applied
-to all threads created by that taskq.  When disabled all threads will use
-the default Linux kernel thread priority.  By default, this behavior is
-enabled.
-.sp
-Default value: \fB1\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_taskq_thread_sequential\fR (int)
-.ad
-.RS 12n
-The number of items a taskq worker thread must handle without interruption
-before requesting a new worker thread be spawned.  This is used to control
-how quickly taskqs ramp up the number of threads processing the queue.
-Because Linux thread creation and destruction are relatively inexpensive a
-small default value has been selected.  This means that normally threads will
-be created aggressively which is desirable.  Increasing this value will
-result in a slower thread creation rate which may be preferable for some
-configurations.
-.sp
-Default value: \fB4\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBspl_max_show_tasks\fR (uint)
-.ad
-.RS 12n
-The maximum number of tasks per pending list in each taskq shown in
-/proc/spl/{taskq,taskq-all}. Write 0 to turn off the limit. The proc file will
-walk the lists with lock held, reading it could cause a lock up if the list
-grow too large without limiting the output. "(truncated)" will be shown if the
-list is larger than the limit.
-.sp
-Default value: \fB512\fR
-.RE

diff --git a/zfs/man/man5/vdev_id.conf.5 b/zfs/man/man5/vdev_id.conf.5
index 89c5ee9..a2d38ad 100644
--- a/zfs/man/man5/vdev_id.conf.5
+++ b/zfs/man/man5/vdev_id.conf.5

@@ -1,222 +1,249 @@
-.TH vdev_id.conf 5
-.SH NAME
-vdev_id.conf \- Configuration file for vdev_id
-.SH DESCRIPTION
-.I vdev_id.conf
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.Dd May 26, 2021
+.Dt VDEV_ID.CONF 5
+.Os
+.
+.Sh NAME
+.Nm vdev_id.conf
+.Nd configuration file for vdev_id(8)
+.Sh DESCRIPTION
+.Nm
 is the configuration file for
-.BR vdev_id (8).
+.Xr vdev_id 8 .
 It controls the default behavior of
-.BR vdev_id (8)
+.Xr vdev_id 8
 while it is mapping a disk device name to an alias.
-.PP
+.Pp
 The
-.I vdev_id.conf
+.Nm
 file uses a simple format consisting of a keyword followed by one or
-more values on a single line.  Any line not beginning with a recognized
-keyword is ignored.  Comments may optionally begin with a hash
-character.
-
+more values on a single line.
+Any line not beginning with a recognized keyword is ignored.
+Comments may optionally begin with a hash character.
+.Pp
 The following keywords and values are used.
-.TP
-\fIalias\fR <name> <devlink>
-Maps a device link in the /dev directory hierarchy to a new device
-name.  The udev rule defining the device link must have run prior to
-.BR vdev_id (8).
+.Bl -tag -width "-h"
+.It Sy alias Ar name Ar devlink
+Maps a device link in the
+.Pa /dev
+directory hierarchy to a new device name.
+The udev rule defining the device link must have run prior to
+.Xr vdev_id 8 .
 A defined alias takes precedence over a topology-derived name, but the
-two naming methods can otherwise coexist.  For example, one might name
-drives in a JBOD with the sas_direct topology while naming an internal
-L2ARC device with an alias.
-
-\fIname\fR - the name of the link to the device that will by created in
-/dev/disk/by-vdev.
-
-\fIdevlink\fR - the name of the device link that has already been
-defined by udev.  This may be an absolute path or the base filename.
-
-.TP
-\fIchannel\fR [pci_slot] <port> <name>
+two naming methods can otherwise coexist.
+For example, one might name drives in a JBOD with the
+.Sy sas_direct
+topology while naming an internal L2ARC device with an alias.
+.Pp
+.Ar name
+is the name of the link to the device that will by created under
+.Pa /dev/disk/by-vdev .
+.Pp
+.Ar devlink
+is the name of the device link that has already been
+defined by udev.
+This may be an absolute path or the base filename.
+.
+.It Sy channel [ Ns Ar pci_slot ] Ar port Ar name
 Maps a physical path to a channel name (typically representing a single
 disk enclosure).
-
-.TP
-\fIenclosure_symlinks\fR <yes|no>
-Additionally create /dev/by-enclosure symlinks to the disk enclosure
-sg devices using the naming scheme from vdev_id.conf.
-\fIenclosure_symlinks\fR is only allowed for sas_direct mode.
-.TP
-\fIenclosure_symlinks_prefix\fR <prefix>
-Specify the prefix for the enclosure symlinks in the form of:
-
-/dev/by-enclosure/<prefix>-<channel><num>
-
-Defaults to "enc" if not specified.
-.TP
-\fIpci_slot\fR - specifies the PCI SLOT of the HBA
-hosting the disk enclosure being mapped, as found in the output of
-.BR lspci (8).
-This argument is not used in sas_switch mode.
-
-\fIport\fR - specifies the numeric identifier of the HBA or SAS switch port
-connected to the disk enclosure being mapped.
-
-\fIname\fR - specifies the name of the channel.
-
-.TP
-\fIslot\fR <old> <new> [channel]
+.
+.It Sy enclosure_symlinks Sy yes Ns | Ns Sy no
+Additionally create
+.Pa /dev/by-enclosure
+symlinks to the disk enclosure
+.Em sg
+devices using the naming scheme from
+.Pa vdev_id.conf .
+.Sy enclosure_symlinks
+is only allowed for
+.Sy sas_direct
+mode.
+.
+.It Sy enclosure_symlinks_prefix Ar prefix
+Specify the prefix for the enclosure symlinks in the form
+.Pa /dev/by-enclosure/ Ns Ao Ar prefix Ac Ns - Ns Ao Ar channel Ac Ns Aq Ar num
+.Pp
+Defaults to
+.Dq Em enc .
+.
+.It Sy slot Ar prefix Ar new Op Ar channel
 Maps a disk slot number as reported by the operating system to an
-alternative slot number.  If the \fIchannel\fR parameter is specified
+alternative slot number.
+If the
+.Ar channel
+parameter is specified
 then the mapping is only applied to slots in the named channel,
-otherwise the mapping is applied to all channels. The first-specified
-\fIslot\fR rule that can match a slot takes precedence.  Therefore a
-channel-specific mapping for a given slot should generally appear before
-a generic mapping for the same slot.  In this way a custom mapping may
-be applied to a particular channel and a default mapping applied to the
-others.
-
-.TP
-\fImultipath\fR <yes|no>
+otherwise the mapping is applied to all channels.
+The first-specified
+.Ar slot
+rule that can match a slot takes precedence.
+Therefore a channel-specific mapping for a given slot should generally appear
+before a generic mapping for the same slot.
+In this way a custom mapping may be applied to a particular channel
+and a default mapping applied to the others.
+.
+.It Sy multipath Sy yes Ns | Ns Sy no
 Specifies whether
-.BR vdev_id (8)
-will handle only dm-multipath devices.  If set to "yes" then
-.BR vdev_id (8)
+.Xr vdev_id 8
+will handle only dm-multipath devices.
+If set to
+.Sy yes
+then
+.Xr vdev_id 8
 will examine the first running component disk of a dm-multipath
-device as listed by the
-.BR multipath (8)
-command to determine the physical path.
-.TP
-\fItopology\fR <sas_direct|sas_switch>
+device as provided by the driver command to determine the physical path.
+.
+.It Sy topology Sy sas_direct Ns | Ns Sy sas_switch Ns | Ns Sy scsi
 Identifies a physical topology that governs how physical paths are
-mapped to channels.
-
-\fIsas_direct\fR - in this mode a channel is uniquely identified by
-a PCI slot and a HBA port number
-
-\fIsas_switch\fR - in this mode a channel is uniquely identified by
-a SAS switch port number
-
-.TP
-\fIphys_per_port\fR <num>
+mapped to channels:
+.Bl -tag -compact -width "sas_direct and scsi"
+.It Sy sas_direct No and Sy scsi
+channels are uniquely identified by a PCI slot and HBA port number
+.It Sy sas_switch
+channels are uniquely identified by a SAS switch port number
+.El
+.
+.It Sy phys_per_port Ar num
 Specifies the number of PHY devices associated with a SAS HBA port or SAS
 switch port.
-.BR vdev_id (8)
+.Xr vdev_id 8
 internally uses this value to determine which HBA or switch port a
-device is connected to.  The default is 4.
-
-.TP
-\fIslot\fR <bay|phy|port|id|lun|ses>
+device is connected to.
+The default is
+.Sy 4 .
+.
+.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy ses
 Specifies from which element of a SAS identifier the slot number is
-taken.  The default is bay.
-
-\fIbay\fR - read the slot number from the bay identifier.
-
-\fIphy\fR - read the slot number from the phy identifier.
-
-\fIport\fR - use the SAS port as the slot number.
-
-\fIid\fR - use the scsi id as the slot number.
-
-\fIlun\fR - use the scsi lun as the slot number.
-
-\fIses\fR - use the SCSI Enclosure Services (SES) enclosure device slot number,
+taken.
+The default is
+.Sy bay :
+.Bl -tag -compact -width "port"
+.It Sy bay
+read the slot number from the bay identifier.
+.It Sy phy
+read the slot number from the phy identifier.
+.It Sy port
+use the SAS port as the slot number.
+.It Sy id
+use the scsi id as the slot number.
+.It Sy lun
+use the scsi lun as the slot number.
+.It Sy ses
+use the SCSI Enclosure Services (SES) enclosure device slot number,
 as reported by
-.BR sg_ses (8).
-This is intended for use only on systems where \fIbay\fR is unsupported,
-noting that \fIport\fR and \fIid\fR may be unstable across disk replacement.
-.SH EXAMPLES
-A non-multipath configuration with direct-attached SAS enclosures and an
-arbitrary slot re-mapping.
-.P
-.nf
-	multipath     no
-	topology      sas_direct
-	phys_per_port 4
-	slot          bay
-
-	#       PCI_SLOT HBA PORT  CHANNEL NAME
-	channel 85:00.0  1         A
-	channel 85:00.0  0         B
-	channel 86:00.0  1         C
-	channel 86:00.0  0         D
-
-	# Custom mapping for Channel A
-
-	#    Linux      Mapped
-	#    Slot       Slot      Channel
-	slot 1          7         A
-	slot 2          10        A
-	slot 3          3         A
-	slot 4          6         A
-
-	# Default mapping for B, C, and D
-
-	slot 1          4
-	slot 2          2
-	slot 3          1
-	slot 4          3
-.fi
-.P
-A SAS-switch topology.  Note that the
-.I channel
-keyword takes only two arguments in this example.
-.P
-.nf
-	topology      sas_switch
-
-	#       SWITCH PORT  CHANNEL NAME
-	channel 1            A
-	channel 2            B
-	channel 3            C
-	channel 4            D
-.fi
-.P
-A multipath configuration.  Note that channel names have multiple
-definitions - one per physical path.
-.P
-.nf
-	multipath yes
-
-	#       PCI_SLOT HBA PORT  CHANNEL NAME
-	channel 85:00.0  1         A
-	channel 85:00.0  0         B
-	channel 86:00.0  1         A
-	channel 86:00.0  0         B
-.fi
-.P
-A configuration with enclosure_symlinks enabled.
-.P
-.nf
-	multipath yes
-	enclosure_symlinks yes
-
-	#          PCI_ID      HBA PORT     CHANNEL NAME
-	channel    05:00.0     1            U
-	channel    05:00.0     0            L
-	channel    06:00.0     1            U
-	channel    06:00.0     0            L
-.fi
-In addition to the disks symlinks, this configuration will create:
-.P
-.nf
-	/dev/by-enclosure/enc-L0
-	/dev/by-enclosure/enc-L1
-	/dev/by-enclosure/enc-U0
-	/dev/by-enclosure/enc-U1
-.fi
-.P
-A configuration using device link aliases.
-.P
-.nf
-	#     by-vdev
-	#     name     fully qualified or base name of device link
-	alias d1       /dev/disk/by-id/wwn-0x5000c5002de3b9ca
-	alias d2       wwn-0x5000c5002def789e
-.fi
-.P
-
-.SH FILES
-.TP
-.I /etc/zfs/vdev_id.conf
+.Xr sg_ses 8 .
+Intended for use only on systems where
+.Sy bay
+is unsupported,
+noting that
+.Sy port
+and
+.Sy id
+may be unstable across disk replacement.
+.El
+.El
+.
+.Sh FILES
+.Bl -tag -width "-v v"
+.It Pa /etc/zfs/vdev_id.conf
 The configuration file for
-.BR vdev_id (8).
-.SH SEE ALSO
-.BR vdev_id (8)
+.Xr vdev_id 8 .
+.El
+.
+.Sh EXAMPLES
+A non-multipath configuration with direct-attached SAS enclosures and an
+arbitrary slot re-mapping:
+.Bd -literal -compact -offset Ds
+multipath     no
+topology      sas_direct
+phys_per_port 4
+slot          bay
+
+#       PCI_SLOT HBA PORT  CHANNEL NAME
+channel 85:00.0  1         A
+channel 85:00.0  0         B
+channel 86:00.0  1         C
+channel 86:00.0  0         D
+
+# Custom mapping for Channel A
+
+#    Linux      Mapped
+#    Slot       Slot      Channel
+slot 1          7         A
+slot 2          10        A
+slot 3          3         A
+slot 4          6         A
+
+# Default mapping for B, C, and D
+
+slot 1          4
+slot 2          2
+slot 3          1
+slot 4          3
+.Ed
+.Pp
+A SAS-switch topology.
+Note, that the
+.Ar channel
+keyword takes only two arguments in this example:
+.Bd -literal -compact -offset Ds
+topology      sas_switch
+
+#       SWITCH PORT  CHANNEL NAME
+channel 1            A
+channel 2            B
+channel 3            C
+channel 4            D
+.Ed
+.Pp
+A multipath configuration.
+Note that channel names have multiple definitions - one per physical path:
+.Bd -literal -compact -offset Ds
+multipath yes
+
+#       PCI_SLOT HBA PORT  CHANNEL NAME
+channel 85:00.0  1         A
+channel 85:00.0  0         B
+channel 86:00.0  1         A
+channel 86:00.0  0         B
+.Ed
+.Pp
+A configuration with enclosure_symlinks enabled:
+.Bd -literal -compact -offset Ds
+multipath yes
+enclosure_symlinks yes
+
+#          PCI_ID      HBA PORT     CHANNEL NAME
+channel    05:00.0     1            U
+channel    05:00.0     0            L
+channel    06:00.0     1            U
+channel    06:00.0     0            L
+.Ed
+In addition to the disks symlinks, this configuration will create:
+.Bd -literal -compact -offset Ds
+/dev/by-enclosure/enc-L0
+/dev/by-enclosure/enc-L1
+/dev/by-enclosure/enc-U0
+/dev/by-enclosure/enc-U1
+.Ed
+.Pp
+A configuration using device link aliases:
+.Bd -literal -compact -offset Ds
+#     by-vdev
+#     name     fully qualified or base name of device link
+alias d1       /dev/disk/by-id/wwn-0x5000c5002de3b9ca
+alias d2       wwn-0x5000c5002def789e
+.Ed
+.
+.Sh SEE ALSO
+.Xr vdev_id 8

diff --git a/zfs/man/man5/zfs-events.5 b/zfs/man/man5/zfs-events.5
deleted file mode 100644
index 4a28be7..0000000
--- a/zfs/man/man5/zfs-events.5
+++ /dev/null

@@ -1,965 +0,0 @@
-'\" te
-.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
-.\" Portions Copyright 2018 by Richard Elling
-.\" The contents of this file are subject to the terms of the Common Development
-.\" and Distribution License (the "License").  You may not use this file except
-.\" in compliance with the License. You can obtain a copy of the license at
-.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
-.\"
-.\" See the License for the specific language governing permissions and
-.\" limitations under the License. When distributing Covered Code, include this
-.\" CDDL HEADER in each file and include the License file at
-.\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
-.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
-.\" own identifying information:
-.\" Portions Copyright [yyyy] [name of copyright owner]
-.TH ZFS-EVENTS 5 "Oct 24, 2018"
-.SH NAME
-zfs\-events \- Events created by the ZFS filesystem.
-.SH DESCRIPTION
-.sp
-.LP
-Description of the different events generated by the ZFS stack.
-.sp
-Most of these don't have any description. The events generated by ZFS
-have never been publicly documented.  What is here is intended as a
-starting point to provide documentation for all possible events.
-.sp
-To view all events created since the loading of the ZFS infrastructure
-(i.e, "the module"), run
-.P
-.nf
-\fBzpool events\fR
-.fi
-.P
-to get a short list, and
-.P
-.nf
-\fBzpool events -v\fR
-.fi
-.P
-to get a full detail of the events and what information
-is available about it.
-.sp
-This man page lists the different subclasses that are issued
-in the case of an event. The full event name would be
-\fIereport.fs.zfs.SUBCLASS\fR, but we only list the last
-part here.
-
-.SS "EVENTS (SUBCLASS)"
-.sp
-.LP
-
-.sp
-.ne 2
-.na
-\fBchecksum\fR
-.ad
-.RS 12n
-Issued when a checksum error has been detected.
-.RE
-
-.sp
-.ne 2
-.na
-\fBio\fR
-.ad
-.RS 12n
-Issued when there is an I/O error in a vdev in the pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdata\fR
-.ad
-.RS 12n
-Issued when there have been data errors in the pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdeadman\fR
-.ad
-.RS 12n
-Issued when an I/O is determined to be "hung", this can be caused by lost
-completion events due to flaky hardware or drivers.  See the
-\fBzfs_deadman_failmode\fR module option description for additional
-information regarding "hung" I/O detection and configuration.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdelay\fR
-.ad
-.RS 12n
-Issued when a completed I/O exceeds the maximum allowed time specified
-by the \fBzio_slow_io_ms\fR module option.  This can be an indicator of
-problems with the underlying storage device.  The number of delay events is
-ratelimited by the \fBzfs_slow_io_events_per_second\fR module parameter.
-.RE
-
-.sp
-.ne 2
-.na
-\fBconfig.sync\fR
-.ad
-.RS 12n
-Issued every time a vdev change have been done to the pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzpool\fR
-.ad
-.RS 12n
-Issued when a pool cannot be imported.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzpool.destroy\fR
-.ad
-.RS 12n
-Issued when a pool is destroyed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzpool.export\fR
-.ad
-.RS 12n
-Issued when a pool is exported.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzpool.import\fR
-.ad
-.RS 12n
-Issued when a pool is imported.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzpool.reguid\fR
-.ad
-.RS 12n
-Issued when a REGUID (new unique identifier for the pool have been regenerated) have been detected.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.unknown\fR
-.ad
-.RS 12n
-Issued when the vdev is unknown. Such as trying to clear device
-errors on a vdev that have failed/been kicked from the system/pool
-and is no longer available.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.open_failed\fR
-.ad
-.RS 12n
-Issued when a vdev could not be opened (because it didn't exist for example).
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.corrupt_data\fR
-.ad
-.RS 12n
-Issued when corrupt data have been detected on a vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.no_replicas\fR
-.ad
-.RS 12n
-Issued when there are no more replicas to sustain the pool.
-This would lead to the pool being \fIDEGRADED\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.bad_guid_sum\fR
-.ad
-.RS 12n
-Issued when a missing device in the pool have been detected.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.too_small\fR
-.ad
-.RS 12n
-Issued when the system (kernel) have removed a device, and ZFS
-notices that the device isn't there any more. This is usually
-followed by a \fBprobe_failure\fR event.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.bad_label\fR
-.ad
-.RS 12n
-Issued when the label is OK but invalid.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.bad_ashift\fR
-.ad
-.RS 12n
-Issued when the ashift alignment requirement has increased.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.remove\fR
-.ad
-.RS 12n
-Issued when a vdev is detached from a mirror (or a spare detached from a
-vdev where it have been used to replace a failed drive - only works if
-the original drive have been readded).
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.clear\fR
-.ad
-.RS 12n
-Issued when clearing device errors in a pool. Such as running \fBzpool clear\fR
-on a device in the pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.check\fR
-.ad
-.RS 12n
-Issued when a check to see if a given vdev could be opened is started.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.spare\fR
-.ad
-.RS 12n
-Issued when a spare have kicked in to replace a failed device.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev.autoexpand\fR
-.ad
-.RS 12n
-Issued when a vdev can be automatically expanded.
-.RE
-
-.sp
-.ne 2
-.na
-\fBio_failure\fR
-.ad
-.RS 12n
-Issued when there is an I/O failure in a vdev in the pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBprobe_failure\fR
-.ad
-.RS 12n
-Issued when a probe fails on a vdev. This would occur if a vdev
-have been kicked from the system outside of ZFS (such as the kernel
-have removed the device).
-.RE
-
-.sp
-.ne 2
-.na
-\fBlog_replay\fR
-.ad
-.RS 12n
-Issued when the intent log cannot be replayed.  The can occur in the case
-of a missing or damaged log device.
-.RE
-
-.sp
-.ne 2
-.na
-\fBresilver.start\fR
-.ad
-.RS 12n
-Issued when a resilver is started.
-.RE
-
-.sp
-.ne 2
-.na
-\fBresilver.finish\fR
-.ad
-.RS 12n
-Issued when the running resilver have finished.
-.RE
-
-.sp
-.ne 2
-.na
-\fBscrub.start\fR
-.ad
-.RS 12n
-Issued when a scrub is started on a pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBscrub.finish\fR
-.ad
-.RS 12n
-Issued when a pool has finished scrubbing.
-.RE
-
-.sp
-.ne 2
-.na
-\fBscrub.abort\fR
-.ad
-.RS 12n
-Issued when a scrub is aborted on a pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBscrub.resume\fR
-.ad
-.RS 12n
-Issued when a scrub is resumed on a pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBscrub.paused\fR
-.ad
-.RS 12n
-Issued when a scrub is paused on a pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbootfs.vdev.attach\fR
-.ad
-.RS 12n
-.RE
-
-.SS "PAYLOADS"
-.sp
-.LP
-This is the payload (data, information) that accompanies an
-event.
-.sp
-For
-.BR zed (8),
-these are set to uppercase and prefixed with \fBZEVENT_\fR.
-
-.sp
-.ne 2
-.na
-\fBpool\fR
-.ad
-.RS 12n
-Pool name.
-.RE
-
-.sp
-.ne 2
-.na
-\fBpool_failmode\fR
-.ad
-.RS 12n
-Failmode - \fBwait\fR, \fBcontinue\fR or \fBpanic\fR.
-See
-.BR zpool (8)
-(\fIfailmode\fR property) for more information.
-.RE
-
-.sp
-.ne 2
-.na
-\fBpool_guid\fR
-.ad
-.RS 12n
-The GUID of the pool.
-.RE
-
-.sp
-.ne 2
-.na
-\fBpool_context\fR
-.ad
-.RS 12n
-The load state for the pool (0=none, 1=open, 2=import, 3=tryimport, 4=recover
-5=error).
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_guid\fR
-.ad
-.RS 12n
-The GUID of the vdev in question (the vdev failing or operated upon with
-\fBzpool clear\fR etc).
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_type\fR
-.ad
-.RS 12n
-Type of vdev - \fBdisk\fR, \fBfile\fR, \fBmirror\fR etc. See
-.BR zpool (8)
-under \fBVirtual Devices\fR for more information on possible values.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_path\fR
-.ad
-.RS 12n
-Full path of the vdev, including any \fI-partX\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_devid\fR
-.ad
-.RS 12n
-ID of vdev (if any).
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_fru\fR
-.ad
-.RS 12n
-Physical FRU location.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_state\fR
-.ad
-.RS 12n
-State of vdev (0=uninitialized, 1=closed, 2=offline, 3=removed, 4=failed to open, 5=faulted, 6=degraded, 7=healthy).
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_ashift\fR
-.ad
-.RS 12n
-The ashift value of the vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_complete_ts\fR
-.ad
-.RS 12n
-The time the last I/O completed for the specified vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_delta_ts\fR
-.ad
-.RS 12n
-The time since the last I/O completed for the specified vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_spare_paths\fR
-.ad
-.RS 12n
-List of spares, including full path and any \fI-partX\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_spare_guids\fR
-.ad
-.RS 12n
-GUID(s) of spares.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_read_errors\fR
-.ad
-.RS 12n
-How many read errors that have been detected on the vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_write_errors\fR
-.ad
-.RS 12n
-How many write errors that have been detected on the vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_cksum_errors\fR
-.ad
-.RS 12n
-How many checksum errors that have been detected on the vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBparent_guid\fR
-.ad
-.RS 12n
-GUID of the vdev parent.
-.RE
-
-.sp
-.ne 2
-.na
-\fBparent_type\fR
-.ad
-.RS 12n
-Type of parent. See \fBvdev_type\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBparent_path\fR
-.ad
-.RS 12n
-Path of the vdev parent (if any).
-.RE
-
-.sp
-.ne 2
-.na
-\fBparent_devid\fR
-.ad
-.RS 12n
-ID of the vdev parent (if any).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_objset\fR
-.ad
-.RS 12n
-The object set number for a given I/O.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_object\fR
-.ad
-.RS 12n
-The object number for a given I/O.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_level\fR
-.ad
-.RS 12n
-The indirect level for the block. Level 0 is the lowest level and includes
-data blocks. Values > 0 indicate metadata blocks at the appropriate level.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_blkid\fR
-.ad
-.RS 12n
-The block ID for a given I/O.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_err\fR
-.ad
-.RS 12n
-The errno for a failure when handling a given I/O. The errno is compatible
-with \fBerrno\fR(3) with the value for EBADE (0x34) used to indicate ZFS
-checksum error.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_offset\fR
-.ad
-.RS 12n
-The offset in bytes of where to write the I/O for the specified vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_size\fR
-.ad
-.RS 12n
-The size in bytes of the I/O.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_flags\fR
-.ad
-.RS 12n
-The current flags describing how the I/O should be handled.  See the
-\fBI/O FLAGS\fR section for the full list of I/O flags.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_stage\fR
-.ad
-.RS 12n
-The current stage of the I/O in the pipeline.  See the \fBI/O STAGES\fR
-section for a full list of all the I/O stages.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_pipeline\fR
-.ad
-.RS 12n
-The valid pipeline stages for the I/O.  See the \fBI/O STAGES\fR section for a
-full list of all the I/O stages.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_delay\fR
-.ad
-.RS 12n
-The time elapsed (in nanoseconds) waiting for the block layer to complete the
-I/O.  Unlike \fBzio_delta\fR this does not include any vdev queuing time and is
-therefore solely a measure of the block layer performance.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_timestamp\fR
-.ad
-.RS 12n
-The time when a given I/O was submitted.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_delta\fR
-.ad
-.RS 12n
-The time required to service a given I/O.
-.RE
-
-.sp
-.ne 2
-.na
-\fBprev_state\fR
-.ad
-.RS 12n
-The previous state of the vdev.
-.RE
-
-.sp
-.ne 2
-.na
-\fBcksum_expected\fR
-.ad
-.RS 12n
-The expected checksum value for the block.
-.RE
-
-.sp
-.ne 2
-.na
-\fBcksum_actual\fR
-.ad
-.RS 12n
-The actual checksum value for an errant block.
-.RE
-
-.sp
-.ne 2
-.na
-\fBcksum_algorithm\fR
-.ad
-.RS 12n
-Checksum algorithm used. See \fBzfs\fR(8) for more information on checksum
-algorithms available.
-.RE
-
-.sp
-.ne 2
-.na
-\fBcksum_byteswap\fR
-.ad
-.RS 12n
-Whether or not the data is byteswapped.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_ranges\fR
-.ad
-.RS 12n
-[start, end) pairs of corruption offsets. Offsets are always aligned on a
-64-bit boundary, and can include some gaps of non-corruption.
-(See \fBbad_ranges_min_gap\fR)
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_ranges_min_gap\fR
-.ad
-.RS 12n
-In order to bound the size of the \fBbad_ranges\fR array, gaps of non-corruption
-less than or equal to \fBbad_ranges_min_gap\fR bytes have been merged with
-adjacent corruption. Always at least 8 bytes, since corruption is detected
-on a 64-bit word basis.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_range_sets\fR
-.ad
-.RS 12n
-This array has one element per range in \fBbad_ranges\fR. Each element contains
-the count of bits in that range which were clear in the good data and set
-in the bad data.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_range_clears\fR
-.ad
-.RS 12n
-This array has one element per range in \fBbad_ranges\fR. Each element contains
-the count of bits for that range which were set in the good data and clear in
-the bad data.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_set_bits\fR
-.ad
-.RS 12n
-If this field exists, it is an array of: (bad data & ~(good data)); that is,
-the bits set in the bad data which are cleared in the good data. Each element
-corresponds a byte whose offset is in a range in \fBbad_ranges\fR, and the
-array is ordered by offset. Thus, the first element is the first byte in the
-first \fBbad_ranges\fR range, and the last element is the last byte in the last
-\fBbad_ranges\fR range.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_cleared_bits\fR
-.ad
-.RS 12n
-Like \fBbad_set_bits\fR, but contains: (good data & ~(bad data)); that is,
-the bits set in the good data which are cleared in the bad data.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_set_histogram\fR
-.ad
-.RS 12n
-If this field exists, it is an array of counters. Each entry counts bits set
-in a particular bit of a big-endian uint64 type. The first entry counts bits
-set in the high-order bit of the first byte, the 9th byte, etc, and the last
-entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
-This information is useful for observing a stuck bit in a parallel data path,
-such as IDE or parallel SCSI.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbad_cleared_histogram\fR
-.ad
-.RS 12n
-If this field exists, it is an array of counters. Each entry counts bit clears
-in a particular bit of a big-endian uint64 type. The first entry counts bits
-clears of the high-order bit of the first byte, the 9th byte, etc, and the
-last entry counts clears of the low-order bit of the 8th byte, the 16th byte,
-etc. This information is useful for observing a stuck bit in a parallel data
-path, such as IDE or parallel SCSI.
-.RE
-
-.SS "I/O STAGES"
-.sp
-.LP
-The ZFS I/O pipeline is comprised of various stages which are defined
-below.  The individual stages are used to construct these basic I/O
-operations: Read, Write, Free, Claim, and Ioctl.  These stages may be
-set on an event to describe the life cycle of a given I/O.
-
-.TS
-tab(:);
-l l l .
-Stage:Bit Mask:Operations
-_:_:_
-ZIO_STAGE_OPEN:0x00000001:RWFCI
-
-ZIO_STAGE_READ_BP_INIT:0x00000002:R----
-ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W---
-ZIO_STAGE_FREE_BP_INIT:0x00000008:--F--
-ZIO_STAGE_ISSUE_ASYNC:0x00000010:RWF--
-ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W---
-
-ZIO_STAGE_ENCRYPT:0x00000040:-W---
-ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W---
-
-ZIO_STAGE_NOP_WRITE:0x00000100:-W---
-
-ZIO_STAGE_DDT_READ_START:0x00000200:R----
-ZIO_STAGE_DDT_READ_DONE:0x00000400:R----
-ZIO_STAGE_DDT_WRITE:0x00000800:-W---
-ZIO_STAGE_DDT_FREE:0x00001000:--F--
-
-ZIO_STAGE_GANG_ASSEMBLE:0x00002000:RWFC-
-ZIO_STAGE_GANG_ISSUE:0x00004000:RWFC-
-
-ZIO_STAGE_DVA_THROTTLE:0x00008000:-W---
-ZIO_STAGE_DVA_ALLOCATE:0x00010000:-W---
-ZIO_STAGE_DVA_FREE:0x00020000:--F--
-ZIO_STAGE_DVA_CLAIM:0x00040000:---C-
-
-ZIO_STAGE_READY:0x00080000:RWFCI
-
-ZIO_STAGE_VDEV_IO_START:0x00100000:RW--I
-ZIO_STAGE_VDEV_IO_DONE:0x00200000:RW--I
-ZIO_STAGE_VDEV_IO_ASSESS:0x00400000:RW--I
-
-ZIO_STAGE_CHECKSUM_VERIFY:0x00800000:R----
-
-ZIO_STAGE_DONE:0x01000000:RWFCI
-.TE
-
-.SS "I/O FLAGS"
-.sp
-.LP
-Every I/O in the pipeline contains a set of flags which describe its
-function and are used to govern its behavior.  These flags will be set
-in an event as an \fBzio_flags\fR payload entry.
-
-.TS
-tab(:);
-l l .
-Flag:Bit Mask
-_:_
-ZIO_FLAG_DONT_AGGREGATE:0x00000001
-ZIO_FLAG_IO_REPAIR:0x00000002
-ZIO_FLAG_SELF_HEAL:0x00000004
-ZIO_FLAG_RESILVER:0x00000008
-ZIO_FLAG_SCRUB:0x00000010
-ZIO_FLAG_SCAN_THREAD:0x00000020
-ZIO_FLAG_PHYSICAL:0x00000040
-
-ZIO_FLAG_CANFAIL:0x00000080
-ZIO_FLAG_SPECULATIVE:0x00000100
-ZIO_FLAG_CONFIG_WRITER:0x00000200
-ZIO_FLAG_DONT_RETRY:0x00000400
-ZIO_FLAG_DONT_CACHE:0x00000800
-ZIO_FLAG_NODATA:0x00001000
-ZIO_FLAG_INDUCE_DAMAGE:0x00002000
-
-ZIO_FLAG_IO_ALLOCATING:0x00004000
-ZIO_FLAG_IO_RETRY:0x00008000
-ZIO_FLAG_PROBE:0x00010000
-ZIO_FLAG_TRYHARD:0x00020000
-ZIO_FLAG_OPTIONAL:0x00040000
-
-ZIO_FLAG_DONT_QUEUE:0x00080000
-ZIO_FLAG_DONT_PROPAGATE:0x00100000
-ZIO_FLAG_IO_BYPASS:0x00200000
-ZIO_FLAG_IO_REWRITE:0x00400000
-ZIO_FLAG_RAW_COMPRESS:0x00800000
-ZIO_FLAG_RAW_ENCRYPT:0x01000000
-
-ZIO_FLAG_GANG_CHILD:0x02000000
-ZIO_FLAG_DDT_CHILD:0x04000000
-ZIO_FLAG_GODFATHER:0x08000000
-ZIO_FLAG_NOPWRITE:0x10000000
-ZIO_FLAG_REEXECUTED:0x20000000
-ZIO_FLAG_DELEGATED:0x40000000
-ZIO_FLAG_FASTWRITE:0x80000000
-.TE

diff --git a/zfs/man/man5/zfs-module-parameters.5 b/zfs/man/man5/zfs-module-parameters.5
deleted file mode 100644
index 8d30e94..0000000
--- a/zfs/man/man5/zfs-module-parameters.5
+++ /dev/null

@@ -1,3320 +0,0 @@
-'\" te
-.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
-.\" Copyright (c) 2019 by Delphix. All rights reserved.
-.\" Copyright (c) 2019 Datto Inc.
-.\" The contents of this file are subject to the terms of the Common Development
-.\" and Distribution License (the "License").  You may not use this file except
-.\" in compliance with the License. You can obtain a copy of the license at
-.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
-.\"
-.\" See the License for the specific language governing permissions and
-.\" limitations under the License. When distributing Covered Code, include this
-.\" CDDL HEADER in each file and include the License file at
-.\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
-.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
-.\" own identifying information:
-.\" Portions Copyright [yyyy] [name of copyright owner]
-.TH ZFS-MODULE-PARAMETERS 5 "Feb 15, 2019"
-.SH NAME
-zfs\-module\-parameters \- ZFS module parameters
-.SH DESCRIPTION
-.sp
-.LP
-Description of the different parameters to the ZFS module.
-
-.SS "Module parameters"
-.sp
-.LP
-
-.sp
-.ne 2
-.na
-\fBdbuf_cache_max_bytes\fR (ulong)
-.ad
-.RS 12n
-Maximum size in bytes of the dbuf cache.  When \fB0\fR this value will default
-to \fB1/2^dbuf_cache_shift\fR (1/32) of the target ARC size, otherwise the
-provided value in bytes will be used.  The behavior of the dbuf cache and its
-associated settings can be observed via the \fB/proc/spl/kstat/zfs/dbufstats\fR
-kstat.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdbuf_metadata_cache_max_bytes\fR (ulong)
-.ad
-.RS 12n
-Maximum size in bytes of the metadata dbuf cache.  When \fB0\fR this value will
-default to \fB1/2^dbuf_cache_shift\fR (1/16) of the target ARC size, otherwise
-the provided value in bytes will be used.  The behavior of the metadata dbuf
-cache and its associated settings can be observed via the
-\fB/proc/spl/kstat/zfs/dbufstats\fR kstat.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdbuf_cache_hiwater_pct\fR (uint)
-.ad
-.RS 12n
-The percentage over \fBdbuf_cache_max_bytes\fR when dbufs must be evicted
-directly.
-.sp
-Default value: \fB10\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdbuf_cache_lowater_pct\fR (uint)
-.ad
-.RS 12n
-The percentage below \fBdbuf_cache_max_bytes\fR when the evict thread stops
-evicting dbufs.
-.sp
-Default value: \fB10\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdbuf_cache_shift\fR (int)
-.ad
-.RS 12n
-Set the size of the dbuf cache, \fBdbuf_cache_max_bytes\fR, to a log2 fraction
-of the target arc size.
-.sp
-Default value: \fB5\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdbuf_metadata_cache_shift\fR (int)
-.ad
-.RS 12n
-Set the size of the dbuf metadata cache, \fBdbuf_metadata_cache_max_bytes\fR,
-to a log2 fraction of the target arc size.
-.sp
-Default value: \fB6\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdmu_prefetch_max\fR (int)
-.ad
-.RS 12n
-Limit the amount we can prefetch with one call to this amount (in bytes).
-This helps to limit the amount of memory that can be used by prefetching.
-.sp
-Default value: \fB134,217,728\fR (128MB).
-.RE
-
-.sp
-.ne 2
-.na
-\fBignore_hole_birth\fR (int)
-.ad
-.RS 12n
-This is an alias for \fBsend_holes_without_birth_time\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_feed_again\fR (int)
-.ad
-.RS 12n
-Turbo L2ARC warm-up. When the L2ARC is cold the fill interval will be set as
-fast as possible.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR to disable.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_feed_min_ms\fR (ulong)
-.ad
-.RS 12n
-Min feed interval in milliseconds. Requires \fBl2arc_feed_again=1\fR and only
-applicable in related situations.
-.sp
-Default value: \fB200\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_feed_secs\fR (ulong)
-.ad
-.RS 12n
-Seconds between L2ARC writing
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_headroom\fR (ulong)
-.ad
-.RS 12n
-How far through the ARC lists to search for L2ARC cacheable content, expressed
-as a multiplier of \fBl2arc_write_max\fR
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_headroom_boost\fR (ulong)
-.ad
-.RS 12n
-Scales \fBl2arc_headroom\fR by this percentage when L2ARC contents are being
-successfully compressed before writing. A value of 100 disables this feature.
-.sp
-Default value: \fB200\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_noprefetch\fR (int)
-.ad
-.RS 12n
-Do not write buffers to L2ARC if they were prefetched but not used by
-applications
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR to disable.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_norw\fR (int)
-.ad
-.RS 12n
-No reads during writes
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_write_boost\fR (ulong)
-.ad
-.RS 12n
-Cold L2ARC devices will have \fBl2arc_write_max\fR increased by this amount
-while they remain cold.
-.sp
-Default value: \fB8,388,608\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBl2arc_write_max\fR (ulong)
-.ad
-.RS 12n
-Max write bytes per interval
-.sp
-Default value: \fB8,388,608\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_aliquot\fR (ulong)
-.ad
-.RS 12n
-Metaslab granularity, in bytes. This is roughly similar to what would be
-referred to as the "stripe size" in traditional RAID arrays. In normal
-operation, ZFS will try to write this amount of data to a top-level vdev
-before moving on to the next one.
-.sp
-Default value: \fB524,288\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_bias_enabled\fR (int)
-.ad
-.RS 12n
-Enable metaslab group biasing based on its vdev's over- or under-utilization
-relative to the pool.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_force_ganging\fR (ulong)
-.ad
-.RS 12n
-Make some blocks above a certain size be gang blocks.  This option is used
-by the test suite to facilitate testing.
-.sp
-Default value: \fB16,777,217\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_metaslab_segment_weight_enabled\fR (int)
-.ad
-.RS 12n
-Enable/disable segment-based metaslab selection.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_metaslab_switch_threshold\fR (int)
-.ad
-.RS 12n
-When using segment-based metaslab selection, continue allocating
-from the active metaslab until \fBzfs_metaslab_switch_threshold\fR
-worth of buckets have been exhausted.
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_debug_load\fR (int)
-.ad
-.RS 12n
-Load all metaslabs during pool import.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_debug_unload\fR (int)
-.ad
-.RS 12n
-Prevent metaslabs from being unloaded.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_fragmentation_factor_enabled\fR (int)
-.ad
-.RS 12n
-Enable use of the fragmentation metric in computing metaslab weights.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_df_max_search\fR (int)
-.ad
-.RS 12n
-Maximum distance to search forward from the last offset. Without this limit,
-fragmented pools can see >100,000 iterations and metaslab_block_picker()
-becomes the performance limiting factor on high-performance storage.
-
-With the default setting of 16MB, we typically see less than 500 iterations,
-even with very fragmented, ashift=9 pools. The maximum number of iterations
-possible is: \fBmetaslab_df_max_search / (2 * (1<<ashift))\fR.
-With the default setting of 16MB this is 16*1024 (with ashift=9) or 2048
-(with ashift=12).
-.sp
-Default value: \fB16,777,216\fR (16MB)
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_df_use_largest_segment\fR (int)
-.ad
-.RS 12n
-If we are not searching forward (due to metaslab_df_max_search,
-metaslab_df_free_pct, or metaslab_df_alloc_threshold), this tunable controls
-what segment is used.  If it is set, we will use the largest free segment. 
-If it is not set, we will use a segment of exactly the requested size (or
-larger).
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_default_ms_count\fR (int)
-.ad
-.RS 12n
-When a vdev is added target this number of metaslabs per top-level vdev.
-.sp
-Default value: \fB200\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_min_ms_count\fR (int)
-.ad
-.RS 12n
-Minimum number of metaslabs to create in a top-level vdev.
-.sp
-Default value: \fB16\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_ms_count_limit\fR (int)
-.ad
-.RS 12n
-Practical upper limit of total metaslabs per top-level vdev.
-.sp
-Default value: \fB131,072\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_preload_enabled\fR (int)
-.ad
-.RS 12n
-Enable metaslab group preloading.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmetaslab_lba_weighting_enabled\fR (int)
-.ad
-.RS 12n
-Give more weight to metaslabs with lower LBAs, assuming they have
-greater bandwidth as is typically the case on a modern constant
-angular velocity disk drive.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBsend_holes_without_birth_time\fR (int)
-.ad
-.RS 12n
-When set, the hole_birth optimization will not be used, and all holes will
-always be sent on zfs send.  This is useful if you suspect your datasets are
-affected by a bug in hole_birth.
-.sp
-Use \fB1\fR for on (default) and \fB0\fR for off.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_config_path\fR (charp)
-.ad
-.RS 12n
-SPA config file
-.sp
-Default value: \fB/etc/zfs/zpool.cache\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_asize_inflation\fR (int)
-.ad
-.RS 12n
-Multiplication factor used to estimate actual disk consumption from the
-size of data being written. The default value is a worst case estimate,
-but lower values may be valid for a given pool depending on its
-configuration.  Pool administrators who understand the factors involved
-may wish to specify a more realistic inflation factor, particularly if
-they operate close to quota or capacity limits.
-.sp
-Default value: \fB24\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_load_print_vdev_tree\fR (int)
-.ad
-.RS 12n
-Whether to print the vdev tree in the debugging message buffer during pool import.
-Use 0 to disable and 1 to enable.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_load_verify_data\fR (int)
-.ad
-.RS 12n
-Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR)
-import.  Use 0 to disable and 1 to enable.
-
-An extreme rewind import normally performs a full traversal of all
-blocks in the pool for verification.  If this parameter is set to 0,
-the traversal skips non-metadata blocks.  It can be toggled once the
-import has started to stop or start the traversal of non-metadata blocks.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_load_verify_metadata\fR (int)
-.ad
-.RS 12n
-Whether to traverse blocks during an "extreme rewind" (\fB-X\fR)
-pool import.  Use 0 to disable and 1 to enable.
-
-An extreme rewind import normally performs a full traversal of all
-blocks in the pool for verification.  If this parameter is set to 0,
-the traversal is not performed.  It can be toggled once the import has
-started to stop or start the traversal.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_load_verify_shift\fR (int)
-.ad
-.RS 12n
-Sets the maximum number of bytes to consume during pool import to the log2
-fraction of the target arc size.
-.sp
-Default value: \fB4\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspa_slop_shift\fR (int)
-.ad
-.RS 12n
-Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space
-in the pool to be consumed.  This ensures that we don't run the pool
-completely out of space, due to unaccounted changes (e.g. to the MOS).
-It also limits the worst-case time to allocate space.  If we have
-less than this amount of free space, most ZPL operations (e.g. write,
-create) will return ENOSPC.
-.sp
-Default value: \fB5\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBvdev_removal_max_span\fR (int)
-.ad
-.RS 12n
-During top-level vdev removal, chunks of data are copied from the vdev
-which may include free space in order to trade bandwidth for IOPS.
-This parameter determines the maximum span of free space (in bytes)
-which will be included as "unnecessary" data in a chunk of copied data.
-
-The default value here was chosen to align with
-\fBzfs_vdev_read_gap_limit\fR, which is a similar concept when doing
-regular reads (but there's no reason it has to be the same).
-.sp
-Default value: \fB32,768\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzap_iterate_prefetch\fR (int)
-.ad
-.RS 12n
-If this is set, when we start iterating over a ZAP object, zfs will prefetch
-the entire object (all leaf blocks).  However, this is limited by
-\fBdmu_prefetch_max\fR.
-.sp
-Use \fB1\fR for on (default) and \fB0\fR for off.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfetch_array_rd_sz\fR (ulong)
-.ad
-.RS 12n
-If prefetching is enabled, disable prefetching for reads larger than this size.
-.sp
-Default value: \fB1,048,576\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfetch_max_distance\fR (uint)
-.ad
-.RS 12n
-Max bytes to prefetch per stream (default 8MB).
-.sp
-Default value: \fB8,388,608\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfetch_max_streams\fR (uint)
-.ad
-.RS 12n
-Max number of streams per zfetch (prefetch streams per file).
-.sp
-Default value: \fB8\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfetch_min_sec_reap\fR (uint)
-.ad
-.RS 12n
-Min time before an active prefetch stream can be reclaimed
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_abd_scatter_min_size\fR (uint)
-.ad
-.RS 12n
-This is the minimum allocation size that will use scatter (page-based)
-ABD's.  Smaller allocations will use linear ABD's.
-.sp
-Default value: \fB1536\fR (512B and 1KB allocations will be linear).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_dnode_limit\fR (ulong)
-.ad
-.RS 12n
-When the number of bytes consumed by dnodes in the ARC exceeds this number of
-bytes, try to unpin some of it in response to demand for non-metadata. This
-value acts as a ceiling to the amount of dnode metadata, and defaults to 0 which
-indicates that a percent which is based on \fBzfs_arc_dnode_limit_percent\fR of
-the ARC meta buffers that may be used for dnodes.
-
-See also \fBzfs_arc_meta_prune\fR which serves a similar purpose but is used
-when the amount of metadata in the ARC exceeds \fBzfs_arc_meta_limit\fR rather
-than in response to overall demand for non-metadata.
-
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_dnode_limit_percent\fR (ulong)
-.ad
-.RS 12n
-Percentage that can be consumed by dnodes of ARC meta buffers.
-.sp
-See also \fBzfs_arc_dnode_limit\fR which serves a similar purpose but has a
-higher priority if set to nonzero value.
-.sp
-Default value: \fB10\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_dnode_reduce_percent\fR (ulong)
-.ad
-.RS 12n
-Percentage of ARC dnodes to try to scan in response to demand for non-metadata
-when the number of bytes consumed by dnodes exceeds \fBzfs_arc_dnode_limit\fR.
-
-.sp
-Default value: \fB10\fR% of the number of dnodes in the ARC.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_average_blocksize\fR (int)
-.ad
-.RS 12n
-The ARC's buffer hash table is sized based on the assumption of an average
-block size of \fBzfs_arc_average_blocksize\fR (default 8K).  This works out
-to roughly 1MB of hash table per 1GB of physical memory with 8-byte pointers.
-For configurations with a known larger average block size this value can be
-increased to reduce the memory footprint.
-
-.sp
-Default value: \fB8192\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_evict_batch_limit\fR (int)
-.ad
-.RS 12n
-Number ARC headers to evict per sub-list before proceeding to another sub-list.
-This batch-style operation prevents entire sub-lists from being evicted at once
-but comes at a cost of additional unlocking and locking.
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_grow_retry\fR (int)
-.ad
-.RS 12n
-If set to a non zero value, it will replace the arc_grow_retry value with this value.
-The arc_grow_retry value (default 5) is the number of seconds the ARC will wait before
-trying to resume growth after a memory pressure event.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_lotsfree_percent\fR (int)
-.ad
-.RS 12n
-Throttle I/O when free system memory drops below this percentage of total
-system memory.  Setting this value to 0 will disable the throttle.
-.sp
-Default value: \fB10\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_max\fR (ulong)
-.ad
-.RS 12n
-Max arc size of ARC in bytes. If set to 0 then it will consume 1/2 of system
-RAM. This value must be at least 67108864 (64 megabytes).
-.sp
-This value can be changed dynamically with some caveats. It cannot be set back
-to 0 while running and reducing it below the current ARC size will not cause
-the ARC to shrink without memory pressure to induce shrinking.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_meta_adjust_restarts\fR (ulong)
-.ad
-.RS 12n
-The number of restart passes to make while scanning the ARC attempting
-the free buffers in order to stay below the \fBzfs_arc_meta_limit\fR.
-This value should not need to be tuned but is available to facilitate
-performance analysis.
-.sp
-Default value: \fB4096\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_meta_limit\fR (ulong)
-.ad
-.RS 12n
-The maximum allowed size in bytes that meta data buffers are allowed to
-consume in the ARC.  When this limit is reached meta data buffers will
-be reclaimed even if the overall arc_c_max has not been reached.  This
-value defaults to 0 which indicates that a percent which is based on
-\fBzfs_arc_meta_limit_percent\fR of the ARC may be used for meta data.
-.sp
-This value my be changed dynamically except that it cannot be set back to 0
-for a specific percent of the ARC; it must be set to an explicit value.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_meta_limit_percent\fR (ulong)
-.ad
-.RS 12n
-Percentage of ARC buffers that can be used for meta data.
-
-See also \fBzfs_arc_meta_limit\fR which serves a similar purpose but has a
-higher priority if set to nonzero value.
-
-.sp
-Default value: \fB75\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_meta_min\fR (ulong)
-.ad
-.RS 12n
-The minimum allowed size in bytes that meta data buffers may consume in
-the ARC.  This value defaults to 0 which disables a floor on the amount
-of the ARC devoted meta data.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_meta_prune\fR (int)
-.ad
-.RS 12n
-The number of dentries and inodes to be scanned looking for entries
-which can be dropped.  This may be required when the ARC reaches the
-\fBzfs_arc_meta_limit\fR because dentries and inodes can pin buffers
-in the ARC.  Increasing this value will cause to dentry and inode caches
-to be pruned more aggressively.  Setting this value to 0 will disable
-pruning the inode and dentry caches.
-.sp
-Default value: \fB10,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_meta_strategy\fR (int)
-.ad
-.RS 12n
-Define the strategy for ARC meta data buffer eviction (meta reclaim strategy).
-A value of 0 (META_ONLY) will evict only the ARC meta data buffers.
-A value of 1 (BALANCED) indicates that additional data buffers may be evicted if
-that is required to in order to evict the required number of meta data buffers.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_min\fR (ulong)
-.ad
-.RS 12n
-Min arc size of ARC in bytes. If set to 0 then arc_c_min will default to
-consuming the larger of 32M or 1/32 of total system memory.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_min_prefetch_ms\fR (int)
-.ad
-.RS 12n
-Minimum time prefetched blocks are locked in the ARC, specified in ms.
-A value of \fB0\fR will default to 1000 ms.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_min_prescient_prefetch_ms\fR (int)
-.ad
-.RS 12n
-Minimum time "prescient prefetched" blocks are locked in the ARC, specified
-in ms. These blocks are meant to be prefetched fairly aggressively ahead of
-the code that may use them. A value of \fB0\fR will default to 6000 ms.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_max_missing_tvds\fR (int)
-.ad
-.RS 12n
-Number of missing top-level vdevs which will be allowed during
-pool import (only in read-only mode).
-.sp
-Default value: \fB0\fR
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_multilist_num_sublists\fR (int)
-.ad
-.RS 12n
-To allow more fine-grained locking, each ARC state contains a series
-of lists for both data and meta data objects.  Locking is performed at
-the level of these "sub-lists".  This parameters controls the number of
-sub-lists per ARC state, and also applies to other uses of the
-multilist data structure.
-.sp
-Default value: \fB4\fR or the number of online CPUs, whichever is greater
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_overflow_shift\fR (int)
-.ad
-.RS 12n
-The ARC size is considered to be overflowing if it exceeds the current
-ARC target size (arc_c) by a threshold determined by this parameter.
-The threshold is calculated as a fraction of arc_c using the formula
-"arc_c >> \fBzfs_arc_overflow_shift\fR".
-
-The default value of 8 causes the ARC to be considered to be overflowing
-if it exceeds the target size by 1/256th (0.3%) of the target size.
-
-When the ARC is overflowing, new buffer allocations are stalled until
-the reclaim thread catches up and the overflow condition no longer exists.
-.sp
-Default value: \fB8\fR.
-.RE
-
-.sp
-.ne 2
-.na
-
-\fBzfs_arc_p_min_shift\fR (int)
-.ad
-.RS 12n
-If set to a non zero value, this will update arc_p_min_shift (default 4)
-with the new value.
-arc_p_min_shift is used to shift of arc_c for calculating both min and max
-max arc_p
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_p_dampener_disable\fR (int)
-.ad
-.RS 12n
-Disable arc_p adapt dampener
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR to disable.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_shrink_shift\fR (int)
-.ad
-.RS 12n
-If set to a non zero value, this will update arc_shrink_shift (default 7)
-with the new value.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_pc_percent\fR (uint)
-.ad
-.RS 12n
-Percent of pagecache to reclaim arc to
-
-This tunable allows ZFS arc to play more nicely with the kernel's LRU
-pagecache. It can guarantee that the arc size won't collapse under scanning
-pressure on the pagecache, yet still allows arc to be reclaimed down to
-zfs_arc_min if necessary. This value is specified as percent of pagecache
-size (as measured by NR_FILE_PAGES) where that percent may exceed 100. This
-only operates during memory pressure/reclaim.
-.sp
-Default value: \fB0\fR% (disabled).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_arc_sys_free\fR (ulong)
-.ad
-.RS 12n
-The target number of bytes the ARC should leave as free memory on the system.
-Defaults to the larger of 1/64 of physical memory or 512K.  Setting this
-option to a non-zero value will override the default.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_autoimport_disable\fR (int)
-.ad
-.RS 12n
-Disable pool import at module load by ignoring the cache file (typically \fB/etc/zfs/zpool.cache\fR).
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_checksums_per_second\fR (int)
-.ad
-.RS 12n
-Rate limit checksum events to this many per second.  Note that this should
-not be set below the zed thresholds (currently 10 checksums over 10 sec)
-or else zed may not trigger any action.
-.sp
-Default value: 20
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_commit_timeout_pct\fR (int)
-.ad
-.RS 12n
-This controls the amount of time that a ZIL block (lwb) will remain "open"
-when it isn't "full", and it has a thread waiting for it to be committed to
-stable storage.  The timeout is scaled based on a percentage of the last lwb
-latency to avoid significantly impacting the latency of each individual
-transaction record (itx).
-.sp
-Default value: \fB5\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_condense_indirect_vdevs_enable\fR (int)
-.ad
-.RS 12n
-Enable condensing indirect vdev mappings.  When set to a non-zero value,
-attempt to condense indirect vdev mappings if the mapping uses more than
-\fBzfs_condense_min_mapping_bytes\fR bytes of memory and if the obsolete
-space map object uses more than \fBzfs_condense_max_obsolete_bytes\fR
-bytes on-disk.  The condensing process is an attempt to save memory by
-removing obsolete mappings.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_condense_max_obsolete_bytes\fR (ulong)
-.ad
-.RS 12n
-Only attempt to condense indirect vdev mappings if the on-disk size
-of the obsolete space map object is greater than this number of bytes
-(see \fBfBzfs_condense_indirect_vdevs_enable\fR).
-.sp
-Default value: \fB1,073,741,824\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_condense_min_mapping_bytes\fR (ulong)
-.ad
-.RS 12n
-Minimum size vdev mapping to attempt to condense (see
-\fBzfs_condense_indirect_vdevs_enable\fR).
-.sp
-Default value: \fB131,072\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dbgmsg_enable\fR (int)
-.ad
-.RS 12n
-Internally ZFS keeps a small log to facilitate debugging.  By default the log
-is disabled, to enable it set this option to 1.  The contents of the log can
-be accessed by reading the /proc/spl/kstat/zfs/dbgmsg file.  Writing 0 to
-this proc file clears the log.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dbgmsg_maxsize\fR (int)
-.ad
-.RS 12n
-The maximum size in bytes of the internal ZFS debug log.
-.sp
-Default value: \fB4M\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dbuf_state_index\fR (int)
-.ad
-.RS 12n
-This feature is currently unused. It is normally used for controlling what
-reporting is available under /proc/spl/kstat/zfs.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_deadman_enabled\fR (int)
-.ad
-.RS 12n
-When a pool sync operation takes longer than \fBzfs_deadman_synctime_ms\fR
-milliseconds, or when an individual I/O takes longer than
-\fBzfs_deadman_ziotime_ms\fR milliseconds, then the operation is considered to
-be "hung".  If \fBzfs_deadman_enabled\fR is set then the deadman behavior is
-invoked as described by the \fBzfs_deadman_failmode\fR module option.
-By default the deadman is enabled and configured to \fBwait\fR which results
-in "hung" I/Os only being logged.  The deadman is automatically disabled
-when a pool gets suspended.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_deadman_failmode\fR (charp)
-.ad
-.RS 12n
-Controls the failure behavior when the deadman detects a "hung" I/O.  Valid
-values are \fBwait\fR, \fBcontinue\fR, and \fBpanic\fR.
-.sp
-\fBwait\fR - Wait for a "hung" I/O to complete.  For each "hung" I/O a
-"deadman" event will be posted describing that I/O.
-.sp
-\fBcontinue\fR - Attempt to recover from a "hung" I/O by re-dispatching it
-to the I/O pipeline if possible.
-.sp
-\fBpanic\fR - Panic the system.  This can be used to facilitate an automatic
-fail-over to a properly configured fail-over partner.
-.sp
-Default value: \fBwait\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_deadman_checktime_ms\fR (int)
-.ad
-.RS 12n
-Check time in milliseconds. This defines the frequency at which we check
-for hung I/O and potentially invoke the \fBzfs_deadman_failmode\fR behavior.
-.sp
-Default value: \fB60,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_deadman_synctime_ms\fR (ulong)
-.ad
-.RS 12n
-Interval in milliseconds after which the deadman is triggered and also
-the interval after which a pool sync operation is considered to be "hung".
-Once this limit is exceeded the deadman will be invoked every
-\fBzfs_deadman_checktime_ms\fR milliseconds until the pool sync completes.
-.sp
-Default value: \fB600,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_deadman_ziotime_ms\fR (ulong)
-.ad
-.RS 12n
-Interval in milliseconds after which the deadman is triggered and an
-individual I/O operation is considered to be "hung".  As long as the I/O
-remains "hung" the deadman will be invoked every \fBzfs_deadman_checktime_ms\fR
-milliseconds until the I/O completes.
-.sp
-Default value: \fB300,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dedup_prefetch\fR (int)
-.ad
-.RS 12n
-Enable prefetching dedup-ed blks
-.sp
-Use \fB1\fR for yes and \fB0\fR to disable (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_delay_min_dirty_percent\fR (int)
-.ad
-.RS 12n
-Start to delay each transaction once there is this amount of dirty data,
-expressed as a percentage of \fBzfs_dirty_data_max\fR.
-This value should be >= zfs_vdev_async_write_active_max_dirty_percent.
-See the section "ZFS TRANSACTION DELAY".
-.sp
-Default value: \fB60\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_delay_scale\fR (int)
-.ad
-.RS 12n
-This controls how quickly the transaction delay approaches infinity.
-Larger values cause longer delays for a given amount of dirty data.
-.sp
-For the smoothest delay, this value should be about 1 billion divided
-by the maximum number of operations per second.  This will smoothly
-handle between 10x and 1/10th this number.
-.sp
-See the section "ZFS TRANSACTION DELAY".
-.sp
-Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
-.sp
-Default value: \fB500,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_slow_io_events_per_second\fR (int)
-.ad
-.RS 12n
-Rate limit delay zevents (which report slow I/Os) to this many per second.
-.sp
-Default value: 20
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_unlink_suspend_progress\fR (uint)
-.ad
-.RS 12n
-When enabled, files will not be asynchronously removed from the list of pending
-unlinks and the space they consume will be leaked. Once this option has been
-disabled and the dataset is remounted, the pending unlinks will be processed
-and the freed space returned to the pool.
-This option is used by the test suite to facilitate testing.
-.sp
-Uses \fB0\fR (default) to allow progress and \fB1\fR to pause progress.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_delete_blocks\fR (ulong)
-.ad
-.RS 12n
-This is the used to define a large file for the purposes of delete.  Files
-containing more than \fBzfs_delete_blocks\fR will be deleted asynchronously
-while smaller files are deleted synchronously.  Decreasing this value will
-reduce the time spent in an unlink(2) system call at the expense of a longer
-delay before the freed space is available.
-.sp
-Default value: \fB20,480\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dirty_data_max\fR (int)
-.ad
-.RS 12n
-Determines the dirty space limit in bytes.  Once this limit is exceeded, new
-writes are halted until space frees up. This parameter takes precedence
-over \fBzfs_dirty_data_max_percent\fR.
-See the section "ZFS TRANSACTION DELAY".
-.sp
-Default value: \fB10\fR% of physical RAM, capped at \fBzfs_dirty_data_max_max\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dirty_data_max_max\fR (int)
-.ad
-.RS 12n
-Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed in bytes.
-This limit is only enforced at module load time, and will be ignored if
-\fBzfs_dirty_data_max\fR is later changed.  This parameter takes
-precedence over \fBzfs_dirty_data_max_max_percent\fR. See the section
-"ZFS TRANSACTION DELAY".
-.sp
-Default value: \fB25\fR% of physical RAM.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dirty_data_max_max_percent\fR (int)
-.ad
-.RS 12n
-Maximum allowable value of \fBzfs_dirty_data_max\fR, expressed as a
-percentage of physical RAM.  This limit is only enforced at module load
-time, and will be ignored if \fBzfs_dirty_data_max\fR is later changed.
-The parameter \fBzfs_dirty_data_max_max\fR takes precedence over this
-one. See the section "ZFS TRANSACTION DELAY".
-.sp
-Default value: \fB25\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dirty_data_max_percent\fR (int)
-.ad
-.RS 12n
-Determines the dirty space limit, expressed as a percentage of all
-memory.  Once this limit is exceeded, new writes are halted until space frees
-up.  The parameter \fBzfs_dirty_data_max\fR takes precedence over this
-one.  See the section "ZFS TRANSACTION DELAY".
-.sp
-Default value: \fB10\fR%, subject to \fBzfs_dirty_data_max_max\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dirty_data_sync_percent\fR (int)
-.ad
-.RS 12n
-Start syncing out a transaction group if there's at least this much dirty data
-as a percentage of \fBzfs_dirty_data_max\fR.  This should be less than
-\fBzfs_vdev_async_write_active_min_dirty_percent\fR.
-.sp
-Default value: \fB20\fR% of \fBzfs_dirty_data_max\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_fletcher_4_impl\fR (string)
-.ad
-.RS 12n
-Select a fletcher 4 implementation.
-.sp
-Supported selectors are: \fBfastest\fR, \fBscalar\fR, \fBsse2\fR, \fBssse3\fR,
-\fBavx2\fR, \fBavx512f\fR, and \fBaarch64_neon\fR.
-All of the selectors except \fBfastest\fR and \fBscalar\fR require instruction
-set extensions to be available and will only appear if ZFS detects that they are
-present at runtime. If multiple implementations of fletcher 4 are available,
-the \fBfastest\fR will be chosen using a micro benchmark. Selecting \fBscalar\fR
-results in the original, CPU based calculation, being used. Selecting any option
-other than \fBfastest\fR and \fBscalar\fR results in vector instructions from
-the respective CPU instruction set being used.
-.sp
-Default value: \fBfastest\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_free_bpobj_enabled\fR (int)
-.ad
-.RS 12n
-Enable/disable the processing of the free_bpobj object.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_async_block_max_blocks\fR (ulong)
-.ad
-.RS 12n
-Maximum number of blocks freed in a single txg.
-.sp
-Default value: \fB100,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_override_estimate_recordsize\fR (ulong)
-.ad
-.RS 12n
-Record size calculation override for zfs send estimates.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_async_read_max_active\fR (int)
-.ad
-.RS 12n
-Maximum asynchronous read I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB3\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_async_read_min_active\fR (int)
-.ad
-.RS 12n
-Minimum asynchronous read I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_async_write_active_max_dirty_percent\fR (int)
-.ad
-.RS 12n
-When the pool has more than
-\fBzfs_vdev_async_write_active_max_dirty_percent\fR dirty data, use
-\fBzfs_vdev_async_write_max_active\fR to limit active async writes.  If
-the dirty data is between min and max, the active I/O limit is linearly
-interpolated. See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB60\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_async_write_active_min_dirty_percent\fR (int)
-.ad
-.RS 12n
-When the pool has less than
-\fBzfs_vdev_async_write_active_min_dirty_percent\fR dirty data, use
-\fBzfs_vdev_async_write_min_active\fR to limit active async writes.  If
-the dirty data is between min and max, the active I/O limit is linearly
-interpolated. See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB30\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_async_write_max_active\fR (int)
-.ad
-.RS 12n
-Maximum asynchronous write I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_async_write_min_active\fR (int)
-.ad
-.RS 12n
-Minimum asynchronous write I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Lower values are associated with better latency on rotational media but poorer
-resilver performance. The default value of 2 was chosen as a compromise. A
-value of 3 has been shown to improve resilver performance further at a cost of
-further increasing latency.
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_initializing_max_active\fR (int)
-.ad
-.RS 12n
-Maximum initializing I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_initializing_min_active\fR (int)
-.ad
-.RS 12n
-Minimum initializing I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_max_active\fR (int)
-.ad
-.RS 12n
-The maximum number of I/Os active to each device.  Ideally, this will be >=
-the sum of each queue's max_active.  It must be at least the sum of each
-queue's min_active.  See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_removal_max_active\fR (int)
-.ad
-.RS 12n
-Maximum removal I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_removal_min_active\fR (int)
-.ad
-.RS 12n
-Minimum removal I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_scrub_max_active\fR (int)
-.ad
-.RS 12n
-Maximum scrub I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_scrub_min_active\fR (int)
-.ad
-.RS 12n
-Minimum scrub I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_sync_read_max_active\fR (int)
-.ad
-.RS 12n
-Maximum synchronous read I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_sync_read_min_active\fR (int)
-.ad
-.RS 12n
-Minimum synchronous read I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_sync_write_max_active\fR (int)
-.ad
-.RS 12n
-Maximum synchronous write I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_sync_write_min_active\fR (int)
-.ad
-.RS 12n
-Minimum synchronous write I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_trim_max_active\fR (int)
-.ad
-.RS 12n
-Maximum trim/discard I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_trim_min_active\fR (int)
-.ad
-.RS 12n
-Minimum trim/discard I/Os active to each device.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_queue_depth_pct\fR (int)
-.ad
-.RS 12n
-Maximum number of queued allocations per top-level vdev expressed as
-a percentage of \fBzfs_vdev_async_write_max_active\fR which allows the
-system to detect devices that are more capable of handling allocations
-and to allocate more blocks to those devices.  It allows for dynamic
-allocation distribution when devices are imbalanced as fuller devices
-will tend to be slower than empty devices.
-
-See also \fBzio_dva_throttle_enabled\fR.
-.sp
-Default value: \fB1000\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_expire_snapshot\fR (int)
-.ad
-.RS 12n
-Seconds to expire .zfs/snapshot
-.sp
-Default value: \fB300\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_admin_snapshot\fR (int)
-.ad
-.RS 12n
-Allow the creation, removal, or renaming of entries in the .zfs/snapshot
-directory to cause the creation, destruction, or renaming of snapshots.
-When enabled this functionality works both locally and over NFS exports
-which have the 'no_root_squash' option set. This functionality is disabled
-by default.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_flags\fR (int)
-.ad
-.RS 12n
-Set additional debugging flags. The following flags may be bitwise-or'd
-together.
-.sp
-.TS
-box;
-rB lB
-lB lB
-r l.
-Value	Symbolic Name
-	Description
-_
-1	ZFS_DEBUG_DPRINTF
-	Enable dprintf entries in the debug log.
-_
-2	ZFS_DEBUG_DBUF_VERIFY *
-	Enable extra dbuf verifications.
-_
-4	ZFS_DEBUG_DNODE_VERIFY *
-	Enable extra dnode verifications.
-_
-8	ZFS_DEBUG_SNAPNAMES
-	Enable snapshot name verification.
-_
-16	ZFS_DEBUG_MODIFY
-	Check for illegally modified ARC buffers.
-_
-64	ZFS_DEBUG_ZIO_FREE
-	Enable verification of block frees.
-_
-128	ZFS_DEBUG_HISTOGRAM_VERIFY
-	Enable extra spacemap histogram verifications.
-_
-256	ZFS_DEBUG_METASLAB_VERIFY
-	Verify space accounting on disk matches in-core range_trees.
-_
-512	ZFS_DEBUG_SET_ERROR
-	Enable SET_ERROR and dprintf entries in the debug log.
-_
-1024	ZFS_DEBUG_INDIRECT_REMAP
-	Verify split blocks created by device removal.
-_
-2048	ZFS_DEBUG_TRIM
-	Verify TRIM ranges are always within the allocatable range tree.
-.TE
-.sp
-* Requires debug build.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_free_leak_on_eio\fR (int)
-.ad
-.RS 12n
-If destroy encounters an EIO while reading metadata (e.g. indirect
-blocks), space referenced by the missing metadata can not be freed.
-Normally this causes the background destroy to become "stalled", as
-it is unable to make forward progress.  While in this stalled state,
-all remaining space to free from the error-encountering filesystem is
-"temporarily leaked".  Set this flag to cause it to ignore the EIO,
-permanently leak the space from indirect blocks that can not be read,
-and continue to free everything else that it can.
-
-The default, "stalling" behavior is useful if the storage partially
-fails (i.e. some but not all i/os fail), and then later recovers.  In
-this case, we will be able to continue pool operations while it is
-partially failed, and when it recovers, we can continue to free the
-space, with no leaks.  However, note that this case is actually
-fairly rare.
-
-Typically pools either (a) fail completely (but perhaps temporarily,
-e.g. a top-level vdev going offline), or (b) have localized,
-permanent errors (e.g. disk returns the wrong data due to bit flip or
-firmware bug).  In case (a), this setting does not matter because the
-pool will be suspended and the sync thread will not be able to make
-forward progress regardless.  In case (b), because the error is
-permanent, the best we can do is leak the minimum amount of space,
-which is what setting this flag will do.  Therefore, it is reasonable
-for this flag to normally be set, but we chose the more conservative
-approach of not setting it, so that there is no possibility of
-leaking space in the "partial temporary" failure case.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_free_min_time_ms\fR (int)
-.ad
-.RS 12n
-During a \fBzfs destroy\fR operation using \fBfeature@async_destroy\fR a minimum
-of this much time will be spent working on freeing blocks per txg.
-.sp
-Default value: \fB1,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_immediate_write_sz\fR (long)
-.ad
-.RS 12n
-Largest data block to write to zil. Larger blocks will be treated as if the
-dataset being written to had the property setting \fBlogbias=throughput\fR.
-.sp
-Default value: \fB32,768\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_initialize_value\fR (ulong)
-.ad
-.RS 12n
-Pattern written to vdev free space by \fBzpool initialize\fR.
-.sp
-Default value: \fB16,045,690,984,833,335,022\fR (0xdeadbeefdeadbeee).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_lua_max_instrlimit\fR (ulong)
-.ad
-.RS 12n
-The maximum execution time limit that can be set for a ZFS channel program,
-specified as a number of Lua instructions.
-.sp
-Default value: \fB100,000,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_lua_max_memlimit\fR (ulong)
-.ad
-.RS 12n
-The maximum memory limit that can be set for a ZFS channel program, specified
-in bytes.
-.sp
-Default value: \fB104,857,600\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_max_dataset_nesting\fR (int)
-.ad
-.RS 12n
-The maximum depth of nested datasets.  This value can be tuned temporarily to
-fix existing datasets that exceed the predefined limit.
-.sp
-Default value: \fB50\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_max_recordsize\fR (int)
-.ad
-.RS 12n
-We currently support block sizes from 512 bytes to 16MB.  The benefits of
-larger blocks, and thus larger I/O, need to be weighed against the cost of
-COWing a giant block to modify one byte.  Additionally, very large blocks
-can have an impact on i/o latency, and also potentially on the memory
-allocator.  Therefore, we do not allow the recordsize to be set larger than
-zfs_max_recordsize (default 1MB).  Larger blocks can be created by changing
-this tunable, and pools with larger blocks can always be imported and used,
-regardless of this setting.
-.sp
-Default value: \fB1,048,576\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_metaslab_fragmentation_threshold\fR (int)
-.ad
-.RS 12n
-Allow metaslabs to keep their active state as long as their fragmentation
-percentage is less than or equal to this value. An active metaslab that
-exceeds this threshold will no longer keep its active status allowing
-better metaslabs to be selected.
-.sp
-Default value: \fB70\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_mg_fragmentation_threshold\fR (int)
-.ad
-.RS 12n
-Metaslab groups are considered eligible for allocations if their
-fragmentation metric (measured as a percentage) is less than or equal to
-this value. If a metaslab group exceeds this threshold then it will be
-skipped unless all metaslab groups within the metaslab class have also
-crossed this threshold.
-.sp
-Default value: \fB95\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_mg_noalloc_threshold\fR (int)
-.ad
-.RS 12n
-Defines a threshold at which metaslab groups should be eligible for
-allocations.  The value is expressed as a percentage of free space
-beyond which a metaslab group is always eligible for allocations.
-If a metaslab group's free space is less than or equal to the
-threshold, the allocator will avoid allocating to that group
-unless all groups in the pool have reached the threshold.  Once all
-groups have reached the threshold, all groups are allowed to accept
-allocations.  The default value of 0 disables the feature and causes
-all metaslab groups to be eligible for allocations.
-
-This parameter allows one to deal with pools having heavily imbalanced
-vdevs such as would be the case when a new vdev has been added.
-Setting the threshold to a non-zero percentage will stop allocations
-from being made to vdevs that aren't filled to the specified percentage
-and allow lesser filled vdevs to acquire more allocations than they
-otherwise would under the old \fBzfs_mg_alloc_failures\fR facility.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_ddt_data_is_special\fR (int)
-.ad
-.RS 12n
-If enabled, ZFS will place DDT data into the special allocation class.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_user_indirect_is_special\fR (int)
-.ad
-.RS 12n
-If enabled, ZFS will place user data (both file and zvol) indirect blocks
-into the special allocation class.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_multihost_history\fR (int)
-.ad
-.RS 12n
-Historical statistics for the last N multihost updates will be available in
-\fB/proc/spl/kstat/zfs/<pool>/multihost\fR
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_multihost_interval\fR (ulong)
-.ad
-.RS 12n
-Used to control the frequency of multihost writes which are performed when the
-\fBmultihost\fR pool property is on.  This is one factor used to determine the
-length of the activity check during import.
-.sp
-The multihost write period is \fBzfs_multihost_interval / leaf-vdevs\fR
-milliseconds.  On average a multihost write will be issued for each leaf vdev
-every \fBzfs_multihost_interval\fR milliseconds.  In practice, the observed
-period can vary with the I/O load and this observed value is the delay which is
-stored in the uberblock.
-.sp
-Default value: \fB1000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_multihost_import_intervals\fR (uint)
-.ad
-.RS 12n
-Used to control the duration of the activity test on import.  Smaller values of
-\fBzfs_multihost_import_intervals\fR will reduce the import time but increase
-the risk of failing to detect an active pool.  The total activity check time is
-never allowed to drop below one second.
-.sp
-On import the activity check waits a minimum amount of time determined by
-\fBzfs_multihost_interval * zfs_multihost_import_intervals\fR, or the same
-product computed on the host which last had the pool imported (whichever is
-greater).  The activity check time may be further extended if the value of mmp
-delay found in the best uberblock indicates actual multihost updates happened
-at longer intervals than \fBzfs_multihost_interval\fR.  A minimum value of
-\fB100ms\fR is enforced.
-.sp
-A value of 0 is ignored and treated as if it was set to 1.
-.sp
-Default value: \fB20\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_multihost_fail_intervals\fR (uint)
-.ad
-.RS 12n
-Controls the behavior of the pool when multihost write failures or delays are
-detected.
-.sp
-When \fBzfs_multihost_fail_intervals = 0\fR, multihost write failures or delays
-are ignored.  The failures will still be reported to the ZED which depending on
-its configuration may take action such as suspending the pool or offlining a
-device.
-
-.sp
-When \fBzfs_multihost_fail_intervals > 0\fR, the pool will be suspended if
-\fBzfs_multihost_fail_intervals * zfs_multihost_interval\fR milliseconds pass
-without a successful mmp write.  This guarantees the activity test will see
-mmp writes if the pool is imported.  A value of 1 is ignored and treated as
-if it was set to 2.  This is necessary to prevent the pool from being suspended
-due to normal, small I/O latency variations.
-
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_no_scrub_io\fR (int)
-.ad
-.RS 12n
-Set for no scrub I/O. This results in scrubs not actually scrubbing data and
-simply doing a metadata crawl of the pool instead.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_no_scrub_prefetch\fR (int)
-.ad
-.RS 12n
-Set to disable block prefetching for scrubs.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_nocacheflush\fR (int)
-.ad
-.RS 12n
-Disable cache flush operations on disks when writing.  Setting this will
-cause pool corruption on power loss if a volatile out-of-order write cache
-is enabled.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_nopwrite_enabled\fR (int)
-.ad
-.RS 12n
-Enable NOP writes
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR to disable.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_dmu_offset_next_sync\fR (int)
-.ad
-.RS 12n
-Enable forcing txg sync to find holes. When enabled forces ZFS to act
-like prior versions when SEEK_HOLE or SEEK_DATA flags are used, which
-when a dnode is dirty causes txg's to be synced so that this data can be
-found.
-.sp
-Use \fB1\fR for yes and \fB0\fR to disable (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_pd_bytes_max\fR (int)
-.ad
-.RS 12n
-The number of bytes which should be prefetched during a pool traversal
-(eg: \fBzfs send\fR or other data crawling operations)
-.sp
-Default value: \fB52,428,800\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_per_txg_dirty_frees_percent \fR (ulong)
-.ad
-.RS 12n
-Tunable to control percentage of dirtied indirect blocks from frees allowed
-into one TXG. After this threshold is crossed, additional frees will wait until
-the next TXG.
-A value of zero will disable this throttle.
-.sp
-Default value: \fB5\fR, set to \fB0\fR to disable.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_prefetch_disable\fR (int)
-.ad
-.RS 12n
-This tunable disables predictive prefetch.  Note that it leaves "prescient"
-prefetch (e.g. prefetch for zfs send) intact.  Unlike predictive prefetch,
-prescient prefetch never issues i/os that end up not being needed, so it
-can't hurt performance.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_qat_checksum_disable\fR (int)
-.ad
-.RS 12n
-This tunable disables qat hardware acceleration for sha256 checksums. It
-may be set after the zfs modules have been loaded to initialize the qat
-hardware as long as support is compiled in and the qat driver is present.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_qat_compress_disable\fR (int)
-.ad
-.RS 12n
-This tunable disables qat hardware acceleration for gzip compression. It
-may be set after the zfs modules have been loaded to initialize the qat
-hardware as long as support is compiled in and the qat driver is present.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_qat_encrypt_disable\fR (int)
-.ad
-.RS 12n
-This tunable disables qat hardware acceleration for AES-GCM encryption. It
-may be set after the zfs modules have been loaded to initialize the qat
-hardware as long as support is compiled in and the qat driver is present.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_read_chunk_size\fR (long)
-.ad
-.RS 12n
-Bytes to read per chunk
-.sp
-Default value: \fB1,048,576\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_read_history\fR (int)
-.ad
-.RS 12n
-Historical statistics for the last N reads will be available in
-\fB/proc/spl/kstat/zfs/<pool>/reads\fR
-.sp
-Default value: \fB0\fR (no data is kept).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_read_history_hits\fR (int)
-.ad
-.RS 12n
-Include cache hits in read history
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_reconstruct_indirect_combinations_max\fR (int)
-.ad
-.RS 12na
-If an indirect split block contains more than this many possible unique
-combinations when being reconstructed, consider it too computationally
-expensive to check them all. Instead, try at most
-\fBzfs_reconstruct_indirect_combinations_max\fR randomly-selected
-combinations each time the block is accessed.  This allows all segment
-copies to participate fairly in the reconstruction when all combinations
-cannot be checked and prevents repeated use of one bad copy.
-.sp
-Default value: \fB4096\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_recover\fR (int)
-.ad
-.RS 12n
-Set to attempt to recover from fatal errors. This should only be used as a
-last resort, as it typically results in leaked space, or worse.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_removal_ignore_errors\fR (int)
-.ad
-.RS 12n
-.sp
-Ignore hard IO errors during device removal.  When set, if a device encounters
-a hard IO error during the removal process the removal will not be cancelled.
-This can result in a normally recoverable block becoming permanently damaged
-and is not recommended.  This should only be used as a last resort when the
-pool cannot be returned to a healthy state prior to removing the device.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_removal_suspend_progress\fR (int)
-.ad
-.RS 12n
-.sp
-This is used by the test suite so that it can ensure that certain actions
-happen while in the middle of a removal.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_remove_max_segment\fR (int)
-.ad
-.RS 12n
-.sp
-The largest contiguous segment that we will attempt to allocate when removing
-a device.  This can be no larger than 16MB.  If there is a performance
-problem with attempting to allocate large blocks, consider decreasing this.
-.sp
-Default value: \fB16,777,216\fR (16MB).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_resilver_min_time_ms\fR (int)
-.ad
-.RS 12n
-Resilvers are processed by the sync thread. While resilvering it will spend
-at least this much time working on a resilver between txg flushes.
-.sp
-Default value: \fB3,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_ignore_errors\fR (int)
-.ad
-.RS 12n
-If set to a nonzero value, remove the DTL (dirty time list) upon
-completion of a pool scan (scrub) even if there were unrepairable
-errors.  It is intended to be used during pool repair or recovery to
-stop resilvering when the pool is next imported.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scrub_min_time_ms\fR (int)
-.ad
-.RS 12n
-Scrubs are processed by the sync thread. While scrubbing it will spend
-at least this much time working on a scrub between txg flushes.
-.sp
-Default value: \fB1,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_checkpoint_intval\fR (int)
-.ad
-.RS 12n
-To preserve progress across reboots the sequential scan algorithm periodically
-needs to stop metadata scanning and issue all the verifications I/Os to disk.
-The frequency of this flushing is determined by the
-\fBzfs_scan_checkpoint_intval\fR tunable.
-.sp
-Default value: \fB7200\fR seconds (every 2 hours).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_fill_weight\fR (int)
-.ad
-.RS 12n
-This tunable affects how scrub and resilver I/O segments are ordered. A higher
-number indicates that we care more about how filled in a segment is, while a
-lower number indicates we care more about the size of the extent without
-considering the gaps within a segment. This value is only tunable upon module
-insertion. Changing the value afterwards will have no affect on scrub or
-resilver performance.
-.sp
-Default value: \fB3\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_issue_strategy\fR (int)
-.ad
-.RS 12n
-Determines the order that data will be verified while scrubbing or resilvering.
-If set to \fB1\fR, data will be verified as sequentially as possible, given the
-amount of memory reserved for scrubbing (see \fBzfs_scan_mem_lim_fact\fR). This
-may improve scrub performance if the pool's data is very fragmented. If set to
-\fB2\fR, the largest mostly-contiguous chunk of found data will be verified
-first. By deferring scrubbing of small segments, we may later find adjacent data
-to coalesce and increase the segment size. If set to \fB0\fR, zfs will use
-strategy \fB1\fR during normal verification and strategy \fB2\fR while taking a
-checkpoint.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_legacy\fR (int)
-.ad
-.RS 12n
-A value of 0 indicates that scrubs and resilvers will gather metadata in
-memory before issuing sequential I/O. A value of 1 indicates that the legacy
-algorithm will be used where I/O is initiated as soon as it is discovered.
-Changing this value to 0 will not affect scrubs or resilvers that are already
-in progress.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_max_ext_gap\fR (int)
-.ad
-.RS 12n
-Indicates the largest gap in bytes between scrub / resilver I/Os that will still
-be considered sequential for sorting purposes. Changing this value will not
-affect scrubs or resilvers that are already in progress.
-.sp
-Default value: \fB2097152 (2 MB)\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_mem_lim_fact\fR (int)
-.ad
-.RS 12n
-Maximum fraction of RAM used for I/O sorting by sequential scan algorithm.
-This tunable determines the hard limit for I/O sorting memory usage.
-When the hard limit is reached we stop scanning metadata and start issuing
-data verification I/O. This is done until we get below the soft limit.
-.sp
-Default value: \fB20\fR which is 5% of RAM (1/20).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_mem_lim_soft_fact\fR (int)
-.ad
-.RS 12n
-The fraction of the hard limit used to determined the soft limit for I/O sorting
-by the sequential scan algorithm. When we cross this limit from below no action
-is taken. When we cross this limit from above it is because we are issuing
-verification I/O. In this case (unless the metadata scan is done) we stop
-issuing verification I/O and start scanning metadata again until we get to the
-hard limit.
-.sp
-Default value: \fB20\fR which is 5% of the hard limit (1/20).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_scan_vdev_limit\fR (int)
-.ad
-.RS 12n
-Maximum amount of data that can be concurrently issued at once for scrubs and
-resilvers per leaf device, given in bytes.
-.sp
-Default value: \fB41943040\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_send_corrupt_data\fR (int)
-.ad
-.RS 12n
-Allow sending of corrupt data (ignore read/checksum errors when sending data)
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_send_unmodified_spill_blocks\fR (int)
-.ad
-.RS 12n
-Include unmodified spill blocks in the send stream. Under certain circumstances
-previous versions of ZFS could incorrectly remove the spill block from an
-existing object.  Including unmodified copies of the spill blocks creates a
-backwards compatible stream which will recreate a spill block if it was
-incorrectly removed.
-.sp
-Use \fB1\fR for yes (default) and \fB0\fR for no.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_send_queue_length\fR (int)
-.ad
-.RS 12n
-The maximum number of bytes allowed in the \fBzfs send\fR queue. This value
-must be at least twice the maximum block size in use.
-.sp
-Default value: \fB16,777,216\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_recv_queue_length\fR (int)
-.ad
-.RS 12n
-The maximum number of bytes allowed in the \fBzfs receive\fR queue. This value
-must be at least twice the maximum block size in use.
-.sp
-Default value: \fB16,777,216\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_sync_pass_deferred_free\fR (int)
-.ad
-.RS 12n
-Flushing of data to disk is done in passes. Defer frees starting in this pass
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_spa_discard_memory_limit\fR (int)
-.ad
-.RS 12n
-Maximum memory used for prefetching a checkpoint's space map on each
-vdev while discarding the checkpoint.
-.sp
-Default value: \fB16,777,216\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_special_class_metadata_reserve_pct\fR (int)
-.ad
-.RS 12n
-Only allow small data blocks to be allocated on the special and dedup vdev
-types when the available free space percentage on these vdevs exceeds this
-value. This ensures reserved space is available for pool meta data as the
-special vdevs approach capacity.
-.sp
-Default value: \fB25\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_sync_pass_dont_compress\fR (int)
-.ad
-.RS 12n
-Starting in this sync pass, we disable compression (including of metadata). 
-With the default setting, in practice, we don't have this many sync passes,
-so this has no effect.
-.sp
-The original intent was that disabling compression would help the sync passes
-to converge. However, in practice disabling compression increases the average
-number of sync passes, because when we turn compression off, a lot of block's
-size will change and thus we have to re-allocate (not overwrite) them. It
-also increases the number of 128KB allocations (e.g. for indirect blocks and
-spacemaps) because these will not be compressed. The 128K allocations are
-especially detrimental to performance on highly fragmented systems, which may
-have very few free segments of this size, and may need to load new metaslabs
-to satisfy 128K allocations.
-.sp
-Default value: \fB8\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_sync_pass_rewrite\fR (int)
-.ad
-.RS 12n
-Rewrite new block pointers starting in this pass
-.sp
-Default value: \fB2\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_sync_taskq_batch_pct\fR (int)
-.ad
-.RS 12n
-This controls the number of threads used by the dp_sync_taskq.  The default
-value of 75% will create a maximum of one thread per cpu.
-.sp
-Default value: \fB75\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_trim_extent_bytes_max\fR (unsigned int)
-.ad
-.RS 12n
-Maximum size of TRIM command.  Ranges larger than this will be split in to
-chunks no larger than \fBzfs_trim_extent_bytes_max\fR bytes before being
-issued to the device.
-.sp
-Default value: \fB134,217,728\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_trim_extent_bytes_min\fR (unsigned int)
-.ad
-.RS 12n
-Minimum size of TRIM commands.  TRIM ranges smaller than this will be skipped
-unless they're part of a larger range which was broken in to chunks.  This is
-done because it's common for these small TRIMs to negatively impact overall
-performance.  This value can be set to 0 to TRIM all unallocated space.
-.sp
-Default value: \fB32,768\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_trim_metaslab_skip\fR (unsigned int)
-.ad
-.RS 12n
-Skip uninitialized metaslabs during the TRIM process.  This option is useful
-for pools constructed from large thinly-provisioned devices where TRIM
-operations are slow.  As a pool ages an increasing fraction of the pools
-metaslabs will be initialized progressively degrading the usefulness of
-this option.  This setting is stored when starting a manual TRIM and will
-persist for the duration of the requested TRIM.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_trim_queue_limit\fR (unsigned int)
-.ad
-.RS 12n
-Maximum number of queued TRIMs outstanding per leaf vdev.  The number of
-concurrent TRIM commands issued to the device is controlled by the
-\fBzfs_vdev_trim_min_active\fR and \fBzfs_vdev_trim_max_active\fR module
-options.
-.sp
-Default value: \fB10\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_trim_txg_batch\fR (unsigned int)
-.ad
-.RS 12n
-The number of transaction groups worth of frees which should be aggregated
-before TRIM operations are issued to the device.  This setting represents a
-trade-off between issuing larger, more efficient TRIM operations and the
-delay before the recently trimmed space is available for use by the device.
-.sp
-Increasing this value will allow frees to be aggregated for a longer time.
-This will result is larger TRIM operations and potentially increased memory
-usage.  Decreasing this value will have the opposite effect.  The default
-value of 32 was determined to be a reasonable compromise.
-.sp
-Default value: \fB32\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_txg_history\fR (int)
-.ad
-.RS 12n
-Historical statistics for the last N txgs will be available in
-\fB/proc/spl/kstat/zfs/<pool>/txgs\fR
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_txg_timeout\fR (int)
-.ad
-.RS 12n
-Flush dirty data to disk at least every N seconds (maximum txg duration)
-.sp
-Default value: \fB5\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_aggregate_trim\fR (int)
-.ad
-.RS 12n
-Allow TRIM I/Os to be aggregated.  This is normally not helpful because
-the extents to be trimmed will have been already been aggregated by the
-metaslab.  This option is provided for debugging and performance analysis.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_aggregation_limit\fR (int)
-.ad
-.RS 12n
-Max vdev I/O aggregation size
-.sp
-Default value: \fB1,048,576\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_aggregation_limit_non_rotating\fR (int)
-.ad
-.RS 12n
-Max vdev I/O aggregation size for non-rotating media
-.sp
-Default value: \fB131,072\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_cache_bshift\fR (int)
-.ad
-.RS 12n
-Shift size to inflate reads too
-.sp
-Default value: \fB16\fR (effectively 65536).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_cache_max\fR (int)
-.ad
-.RS 12n
-Inflate reads smaller than this value to meet the \fBzfs_vdev_cache_bshift\fR
-size (default 64k).
-.sp
-Default value: \fB16384\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_cache_size\fR (int)
-.ad
-.RS 12n
-Total size of the per-disk cache in bytes.
-.sp
-Currently this feature is disabled as it has been found to not be helpful
-for performance and in some cases harmful.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_mirror_rotating_inc\fR (int)
-.ad
-.RS 12n
-A number by which the balancing algorithm increments the load calculation for
-the purpose of selecting the least busy mirror member when an I/O immediately
-follows its predecessor on rotational vdevs for the purpose of making decisions
-based on load.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_mirror_rotating_seek_inc\fR (int)
-.ad
-.RS 12n
-A number by which the balancing algorithm increments the load calculation for
-the purpose of selecting the least busy mirror member when an I/O lacks
-locality as defined by the zfs_vdev_mirror_rotating_seek_offset.  I/Os within
-this that are not immediately following the previous I/O are incremented by
-half.
-.sp
-Default value: \fB5\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_mirror_rotating_seek_offset\fR (int)
-.ad
-.RS 12n
-The maximum distance for the last queued I/O in which the balancing algorithm
-considers an I/O to have locality.
-See the section "ZFS I/O SCHEDULER".
-.sp
-Default value: \fB1048576\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_mirror_non_rotating_inc\fR (int)
-.ad
-.RS 12n
-A number by which the balancing algorithm increments the load calculation for
-the purpose of selecting the least busy mirror member on non-rotational vdevs
-when I/Os do not immediately follow one another.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_mirror_non_rotating_seek_inc\fR (int)
-.ad
-.RS 12n
-A number by which the balancing algorithm increments the load calculation for
-the purpose of selecting the least busy mirror member when an I/O lacks
-locality as defined by the zfs_vdev_mirror_rotating_seek_offset. I/Os within
-this that are not immediately following the previous I/O are incremented by
-half.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_read_gap_limit\fR (int)
-.ad
-.RS 12n
-Aggregate read I/O operations if the gap on-disk between them is within this
-threshold.
-.sp
-Default value: \fB32,768\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_write_gap_limit\fR (int)
-.ad
-.RS 12n
-Aggregate write I/O over gap
-.sp
-Default value: \fB4,096\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_vdev_raidz_impl\fR (string)
-.ad
-.RS 12n
-Parameter for selecting raidz parity implementation to use.
-
-Options marked (always) below may be selected on module load as they are
-supported on all systems.
-The remaining options may only be set after the module is loaded, as they
-are available only if the implementations are compiled in and supported
-on the running system.
-
-Once the module is loaded, the content of
-/sys/module/zfs/parameters/zfs_vdev_raidz_impl will show available options
-with the currently selected one enclosed in [].
-Possible options are:
-  fastest  - (always) implementation selected using built-in benchmark
-  original - (always) original raidz implementation
-  scalar   - (always) scalar raidz implementation
-  sse2     - implementation using SSE2 instruction set (64bit x86 only)
-  ssse3    - implementation using SSSE3 instruction set (64bit x86 only)
-  avx2     - implementation using AVX2 instruction set (64bit x86 only)
-  avx512f  - implementation using AVX512F instruction set (64bit x86 only)
-  avx512bw - implementation using AVX512F & AVX512BW instruction sets (64bit x86 only)
-  aarch64_neon - implementation using NEON (Aarch64/64 bit ARMv8 only)
-  aarch64_neonx2 - implementation using NEON with more unrolling (Aarch64/64 bit ARMv8 only)
-.sp
-Default value: \fBfastest\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_zevent_cols\fR (int)
-.ad
-.RS 12n
-When zevents are logged to the console use this as the word wrap width.
-.sp
-Default value: \fB80\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_zevent_console\fR (int)
-.ad
-.RS 12n
-Log events to the console
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_zevent_len_max\fR (int)
-.ad
-.RS 12n
-Max event queue length. A value of 0 will result in a calculated value which
-increases with the number of CPUs in the system (minimum 64 events). Events
-in the queue can be viewed with the \fBzpool events\fR command.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_zil_clean_taskq_maxalloc\fR (int)
-.ad
-.RS 12n
-The maximum number of taskq entries that are allowed to be cached.  When this
-limit is exceeded transaction records (itxs) will be cleaned synchronously.
-.sp
-Default value: \fB1048576\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_zil_clean_taskq_minalloc\fR (int)
-.ad
-.RS 12n
-The number of taskq entries that are pre-populated when the taskq is first
-created and are immediately available for use.
-.sp
-Default value: \fB1024\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzfs_zil_clean_taskq_nthr_pct\fR (int)
-.ad
-.RS 12n
-This controls the number of threads used by the dp_zil_clean_taskq.  The default
-value of 100% will create a maximum of one thread per cpu.
-.sp
-Default value: \fB100\fR%.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzil_maxblocksize\fR (int)
-.ad
-.RS 12n
-This sets the maximum block size used by the ZIL.  On very fragmented pools,
-lowering this (typically to 36KB) can improve performance.
-.sp
-Default value: \fB131072\fR (128KB).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzil_nocacheflush\fR (int)
-.ad
-.RS 12n
-Disable the cache flush commands that are normally sent to the disk(s) by
-the ZIL after an LWB write has completed. Setting this will cause ZIL
-corruption on power loss if a volatile out-of-order write cache is enabled.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzil_replay_disable\fR (int)
-.ad
-.RS 12n
-Disable intent logging replay. Can be disabled for recovery from corrupted
-ZIL
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzil_slog_bulk\fR (ulong)
-.ad
-.RS 12n
-Limit SLOG write size per commit executed with synchronous priority.
-Any writes above that will be executed with lower (asynchronous) priority
-to limit potential SLOG device abuse by single active ZIL writer.
-.sp
-Default value: \fB786,432\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_deadman_log_all\fR (int)
-.ad
-.RS 12n
-If non-zero, the zio deadman will produce debugging messages (see
-\fBzfs_dbgmsg_enable\fR) for all zios, rather than only for leaf
-zios possessing a vdev. This is meant to be used by developers to gain
-diagnostic information for hang conditions which don't involve a mutex
-or other locking primitive; typically conditions in which a thread in
-the zio pipeline is looping indefinitely.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_decompress_fail_fraction\fR (int)
-.ad
-.RS 12n
-If non-zero, this value represents the denominator of the probability that zfs
-should induce a decompression failure. For instance, for a 5% decompression
-failure rate, this value should be set to 20.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_slow_io_ms\fR (int)
-.ad
-.RS 12n
-When an I/O operation takes more than \fBzio_slow_io_ms\fR milliseconds to
-complete is marked as a slow I/O.  Each slow I/O causes a delay zevent.  Slow
-I/O counters can be seen with "zpool status -s".
-
-.sp
-Default value: \fB30,000\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_dva_throttle_enabled\fR (int)
-.ad
-.RS 12n
-Throttle block allocations in the I/O pipeline. This allows for
-dynamic allocation distribution when devices are imbalanced.
-When enabled, the maximum number of pending allocations per top-level vdev
-is limited by \fBzfs_vdev_queue_depth_pct\fR.
-.sp
-Default value: \fB1\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_requeue_io_start_cut_in_line\fR (int)
-.ad
-.RS 12n
-Prioritize requeued I/O
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzio_taskq_batch_pct\fR (uint)
-.ad
-.RS 12n
-Percentage of online CPUs (or CPU cores, etc) which will run a worker thread
-for I/O. These workers are responsible for I/O work such as compression and
-checksum calculations. Fractional number of CPUs will be rounded down.
-.sp
-The default value of 75 was chosen to avoid using all CPUs which can result in
-latency issues and inconsistent application performance, especially when high
-compression is enabled.
-.sp
-Default value: \fB75\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_inhibit_dev\fR (uint)
-.ad
-.RS 12n
-Do not create zvol device nodes. This may slightly improve startup time on
-systems with a very large number of zvols.
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_major\fR (uint)
-.ad
-.RS 12n
-Major number for zvol block devices
-.sp
-Default value: \fB230\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_max_discard_blocks\fR (ulong)
-.ad
-.RS 12n
-Discard (aka TRIM) operations done on zvols will be done in batches of this
-many blocks, where block size is determined by the \fBvolblocksize\fR property
-of a zvol.
-.sp
-Default value: \fB16,384\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_prefetch_bytes\fR (uint)
-.ad
-.RS 12n
-When adding a zvol to the system prefetch \fBzvol_prefetch_bytes\fR
-from the start and end of the volume.  Prefetching these regions
-of the volume is desirable because they are likely to be accessed
-immediately by \fBblkid(8)\fR or by the kernel scanning for a partition
-table.
-.sp
-Default value: \fB131,072\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_request_sync\fR (uint)
-.ad
-.RS 12n
-When processing I/O requests for a zvol submit them synchronously.  This
-effectively limits the queue depth to 1 for each I/O submitter.  When set
-to 0 requests are handled asynchronously by a thread pool.  The number of
-requests which can be handled concurrently is controller by \fBzvol_threads\fR.
-.sp
-Default value: \fB0\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_threads\fR (uint)
-.ad
-.RS 12n
-Max number of threads which can handle zvol I/O requests concurrently.
-.sp
-Default value: \fB32\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzvol_volmode\fR (uint)
-.ad
-.RS 12n
-Defines zvol block devices behaviour when \fBvolmode\fR is set to \fBdefault\fR.
-Valid values are \fB1\fR (full), \fB2\fR (dev) and \fB3\fR (none).
-.sp
-Default value: \fB1\fR.
-.RE
-
-.SH ZFS I/O SCHEDULER
-ZFS issues I/O operations to leaf vdevs to satisfy and complete I/Os.
-The I/O scheduler determines when and in what order those operations are
-issued.  The I/O scheduler divides operations into five I/O classes
-prioritized in the following order: sync read, sync write, async read,
-async write, and scrub/resilver.  Each queue defines the minimum and
-maximum number of concurrent operations that may be issued to the
-device.  In addition, the device has an aggregate maximum,
-\fBzfs_vdev_max_active\fR. Note that the sum of the per-queue minimums
-must not exceed the aggregate maximum.  If the sum of the per-queue
-maximums exceeds the aggregate maximum, then the number of active I/Os
-may reach \fBzfs_vdev_max_active\fR, in which case no further I/Os will
-be issued regardless of whether all per-queue minimums have been met.
-.sp
-For many physical devices, throughput increases with the number of
-concurrent operations, but latency typically suffers. Further, physical
-devices typically have a limit at which more concurrent operations have no
-effect on throughput or can actually cause it to decrease.
-.sp
-The scheduler selects the next operation to issue by first looking for an
-I/O class whose minimum has not been satisfied. Once all are satisfied and
-the aggregate maximum has not been hit, the scheduler looks for classes
-whose maximum has not been satisfied. Iteration through the I/O classes is
-done in the order specified above. No further operations are issued if the
-aggregate maximum number of concurrent operations has been hit or if there
-are no operations queued for an I/O class that has not hit its maximum.
-Every time an I/O is queued or an operation completes, the I/O scheduler
-looks for new operations to issue.
-.sp
-In general, smaller max_active's will lead to lower latency of synchronous
-operations.  Larger max_active's may lead to higher overall throughput,
-depending on underlying storage.
-.sp
-The ratio of the queues' max_actives determines the balance of performance
-between reads, writes, and scrubs.  E.g., increasing
-\fBzfs_vdev_scrub_max_active\fR will cause the scrub or resilver to complete
-more quickly, but reads and writes to have higher latency and lower throughput.
-.sp
-All I/O classes have a fixed maximum number of outstanding operations
-except for the async write class. Asynchronous writes represent the data
-that is committed to stable storage during the syncing stage for
-transaction groups. Transaction groups enter the syncing state
-periodically so the number of queued async writes will quickly burst up
-and then bleed down to zero. Rather than servicing them as quickly as
-possible, the I/O scheduler changes the maximum number of active async
-write I/Os according to the amount of dirty data in the pool.  Since
-both throughput and latency typically increase with the number of
-concurrent operations issued to physical devices, reducing the
-burstiness in the number of concurrent operations also stabilizes the
-response time of operations from other -- and in particular synchronous
--- queues. In broad strokes, the I/O scheduler will issue more
-concurrent operations from the async write queue as there's more dirty
-data in the pool.
-.sp
-Async Writes
-.sp
-The number of concurrent operations issued for the async write I/O class
-follows a piece-wise linear function defined by a few adjustable points.
-.nf
-
-       |              o---------| <-- zfs_vdev_async_write_max_active
-  ^    |             /^         |
-  |    |            / |         |
-active |           /  |         |
- I/O   |          /   |         |
-count  |         /    |         |
-       |        /     |         |
-       |-------o      |         | <-- zfs_vdev_async_write_min_active
-      0|_______^______|_________|
-       0%      |      |       100% of zfs_dirty_data_max
-               |      |
-               |      `-- zfs_vdev_async_write_active_max_dirty_percent
-               `--------- zfs_vdev_async_write_active_min_dirty_percent
-
-.fi
-Until the amount of dirty data exceeds a minimum percentage of the dirty
-data allowed in the pool, the I/O scheduler will limit the number of
-concurrent operations to the minimum. As that threshold is crossed, the
-number of concurrent operations issued increases linearly to the maximum at
-the specified maximum percentage of the dirty data allowed in the pool.
-.sp
-Ideally, the amount of dirty data on a busy pool will stay in the sloped
-part of the function between \fBzfs_vdev_async_write_active_min_dirty_percent\fR
-and \fBzfs_vdev_async_write_active_max_dirty_percent\fR. If it exceeds the
-maximum percentage, this indicates that the rate of incoming data is
-greater than the rate that the backend storage can handle. In this case, we
-must further throttle incoming writes, as described in the next section.
-
-.SH ZFS TRANSACTION DELAY
-We delay transactions when we've determined that the backend storage
-isn't able to accommodate the rate of incoming writes.
-.sp
-If there is already a transaction waiting, we delay relative to when
-that transaction will finish waiting.  This way the calculated delay time
-is independent of the number of threads concurrently executing
-transactions.
-.sp
-If we are the only waiter, wait relative to when the transaction
-started, rather than the current time.  This credits the transaction for
-"time already served", e.g. reading indirect blocks.
-.sp
-The minimum time for a transaction to take is calculated as:
-.nf
-    min_time = zfs_delay_scale * (dirty - min) / (max - dirty)
-    min_time is then capped at 100 milliseconds.
-.fi
-.sp
-The delay has two degrees of freedom that can be adjusted via tunables.  The
-percentage of dirty data at which we start to delay is defined by
-\fBzfs_delay_min_dirty_percent\fR. This should typically be at or above
-\fBzfs_vdev_async_write_active_max_dirty_percent\fR so that we only start to
-delay after writing at full speed has failed to keep up with the incoming write
-rate. The scale of the curve is defined by \fBzfs_delay_scale\fR. Roughly speaking,
-this variable determines the amount of delay at the midpoint of the curve.
-.sp
-.nf
-delay
- 10ms +-------------------------------------------------------------*+
-      |                                                             *|
-  9ms +                                                             *+
-      |                                                             *|
-  8ms +                                                             *+
-      |                                                            * |
-  7ms +                                                            * +
-      |                                                            * |
-  6ms +                                                            * +
-      |                                                            * |
-  5ms +                                                           *  +
-      |                                                           *  |
-  4ms +                                                           *  +
-      |                                                           *  |
-  3ms +                                                          *   +
-      |                                                          *   |
-  2ms +                                              (midpoint) *    +
-      |                                                  |    **     |
-  1ms +                                                  v ***       +
-      |             zfs_delay_scale ---------->     ********         |
-    0 +-------------------------------------*********----------------+
-      0%                    <- zfs_dirty_data_max ->               100%
-.fi
-.sp
-Note that since the delay is added to the outstanding time remaining on the
-most recent transaction, the delay is effectively the inverse of IOPS.
-Here the midpoint of 500us translates to 2000 IOPS. The shape of the curve
-was chosen such that small changes in the amount of accumulated dirty data
-in the first 3/4 of the curve yield relatively small differences in the
-amount of delay.
-.sp
-The effects can be easier to understand when the amount of delay is
-represented on a log scale:
-.sp
-.nf
-delay
-100ms +-------------------------------------------------------------++
-      +                                                              +
-      |                                                              |
-      +                                                             *+
- 10ms +                                                             *+
-      +                                                           ** +
-      |                                              (midpoint)  **  |
-      +                                                  |     **    +
-  1ms +                                                  v ****      +
-      +             zfs_delay_scale ---------->        *****         +
-      |                                             ****             |
-      +                                          ****                +
-100us +                                        **                    +
-      +                                       *                      +
-      |                                      *                       |
-      +                                     *                        +
- 10us +                                     *                        +
-      +                                                              +
-      |                                                              |
-      +                                                              +
-      +--------------------------------------------------------------+
-      0%                    <- zfs_dirty_data_max ->               100%
-.fi
-.sp
-Note here that only as the amount of dirty data approaches its limit does
-the delay start to increase rapidly. The goal of a properly tuned system
-should be to keep the amount of dirty data out of that range by first
-ensuring that the appropriate limits are set for the I/O scheduler to reach
-optimal throughput on the backend storage, and then by changing the value
-of \fBzfs_delay_scale\fR to increase the steepness of the curve.

diff --git a/zfs/man/man5/zpool-features.5 b/zfs/man/man5/zpool-features.5
deleted file mode 100644
index 2534d3d..0000000
--- a/zfs/man/man5/zpool-features.5
+++ /dev/null

@@ -1,827 +0,0 @@
-'\" te
-.\" Copyright (c) 2013, 2017 by Delphix. All rights reserved.
-.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
-.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
-.\" The contents of this file are subject to the terms of the Common Development
-.\" and Distribution License (the "License").  You may not use this file except
-.\" in compliance with the License. You can obtain a copy of the license at
-.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
-.\"
-.\" See the License for the specific language governing permissions and
-.\" limitations under the License. When distributing Covered Code, include this
-.\" CDDL HEADER in each file and include the License file at
-.\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
-.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
-.\" own identifying information:
-.\" Portions Copyright [yyyy] [name of copyright owner]
-.TH ZPOOL-FEATURES 5 "Jun 8, 2018"
-.SH NAME
-zpool\-features \- ZFS pool feature descriptions
-.SH DESCRIPTION
-.sp
-.LP
-ZFS pool on\-disk format versions are specified via "features" which replace
-the old on\-disk format numbers (the last supported on\-disk format number is
-28). To enable a feature on a pool use the \fBupgrade\fR subcommand of the
-zpool(8) command, or set the \fBfeature@\fR\fIfeature_name\fR property
-to \fBenabled\fR.
-.sp
-.LP
-The pool format does not affect file system version compatibility or the ability
-to send file systems between pools.
-.sp
-.LP
-Since most features can be enabled independently of each other the on\-disk
-format of the pool is specified by the set of all features marked as
-\fBactive\fR on the pool. If the pool was created by another software version
-this set may include unsupported features.
-.SS "Identifying features"
-.sp
-.LP
-Every feature has a GUID of the form \fIcom.example:feature_name\fR. The
-reverse DNS name ensures that the feature's GUID is unique across all ZFS
-implementations. When unsupported features are encountered on a pool they will
-be identified by their GUIDs. Refer to the documentation for the ZFS
-implementation that created the pool for information about those features.
-.sp
-.LP
-Each supported feature also has a short name. By convention a feature's short
-name is the portion of its GUID which follows the ':' (e.g.
-\fIcom.example:feature_name\fR would have the short name \fIfeature_name\fR),
-however a feature's short name may differ across ZFS implementations if
-following the convention would result in name conflicts.
-.SS "Feature states"
-.sp
-.LP
-Features can be in one of three states:
-.sp
-.ne 2
-.na
-\fBactive\fR
-.ad
-.RS 12n
-This feature's on\-disk format changes are in effect on the pool. Support for
-this feature is required to import the pool in read\-write mode. If this
-feature is not read-only compatible, support is also required to import the pool
-in read\-only mode (see "Read\-only compatibility").
-.RE
-
-.sp
-.ne 2
-.na
-\fBenabled\fR
-.ad
-.RS 12n
-An administrator has marked this feature as enabled on the pool, but the
-feature's on\-disk format changes have not been made yet. The pool can still be
-imported by software that does not support this feature, but changes may be made
-to the on\-disk format at any time which will move the feature to the
-\fBactive\fR state. Some features may support returning to the \fBenabled\fR
-state after becoming \fBactive\fR. See feature\-specific documentation for
-details.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdisabled\fR
-.ad
-.RS 12n
-This feature's on\-disk format changes have not been made and will not be made
-unless an administrator moves the feature to the \fBenabled\fR state. Features
-cannot be disabled once they have been enabled.
-.RE
-
-.sp
-.LP
-The state of supported features is exposed through pool properties of the form
-\fIfeature@short_name\fR.
-.SS "Read\-only compatibility"
-.sp
-.LP
-Some features may make on\-disk format changes that do not interfere with other
-software's ability to read from the pool. These features are referred to as
-"read\-only compatible". If all unsupported features on a pool are read\-only
-compatible, the pool can be imported in read\-only mode by setting the
-\fBreadonly\fR property during import (see zpool(8) for details on
-importing pools).
-.SS "Unsupported features"
-.sp
-.LP
-For each unsupported feature enabled on an imported pool a pool property
-named \fIunsupported@feature_name\fR will indicate why the import was allowed
-despite the unsupported feature. Possible values for this property are:
-
-.sp
-.ne 2
-.na
-\fBinactive\fR
-.ad
-.RS 12n
-The feature is in the \fBenabled\fR state and therefore the pool's on\-disk
-format is still compatible with software that does not support this feature.
-.RE
-
-.sp
-.ne 2
-.na
-\fBreadonly\fR
-.ad
-.RS 12n
-The feature is read\-only compatible and the pool has been imported in
-read\-only mode.
-.RE
-
-.SS "Feature dependencies"
-.sp
-.LP
-Some features depend on other features being enabled in order to function
-properly. Enabling a feature will automatically enable any features it
-depends on.
-.SH FEATURES
-.sp
-.LP
-The following features are supported on this system:
-
-.sp
-.ne 2
-.na
-\fBallocation_classes\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.zfsonlinux:allocation_classes
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-This feature enables support for separate allocation classes.
-
-This feature becomes \fBactive\fR when a dedicated allocation class vdev
-(dedup or special) is created with the \fBzpool create\fR or \fBzpool add\fR
-subcommands. With device removal, it can be returned to the \fBenabled\fR
-state if all the dedicated allocation class vdevs are removed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBasync_destroy\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:async_destroy
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-Destroying a file system requires traversing all of its data in order to
-return its used space to the pool. Without \fBasync_destroy\fR the file system
-is not fully removed until all space has been reclaimed. If the destroy
-operation is interrupted by a reboot or power outage the next attempt to open
-the pool will need to complete the destroy operation synchronously.
-
-When \fBasync_destroy\fR is enabled the file system's data will be reclaimed
-by a background process, allowing the destroy operation to complete without
-traversing the entire file system. The background process is able to resume
-interrupted destroys after the pool has been opened, eliminating the need
-to finish interrupted destroys as part of the open operation. The amount
-of space remaining to be reclaimed by the background process is available
-through the \fBfreeing\fR property.
-
-This feature is only \fBactive\fR while \fBfreeing\fR is non\-zero.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbookmarks\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:bookmarks
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature enables use of the \fBzfs bookmark\fR subcommand.
-
-This feature is \fBactive\fR while any bookmarks exist in the pool.
-All bookmarks in the pool can be listed by running
-\fBzfs list -t bookmark -r \fIpoolname\fR\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBbookmark_v2\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.datto:bookmark_v2
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	bookmark, extensible_dataset
-.TE
-
-This feature enables the creation and management of larger bookmarks which are
-needed for other features in ZFS.
-
-This feature becomes \fBactive\fR when a v2 bookmark is created and will be
-returned to the \fBenabled\fR state when all v2 bookmarks are destroyed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBdevice_removal\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:device_removal
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	none
-.TE
-
-This feature enables the \fBzpool remove\fR subcommand to remove top-level
-vdevs, evacuating them to reduce the total size of the pool.
-
-This feature becomes \fBactive\fR when the \fBzpool remove\fR subcommand is used
-on a top-level vdev, and will never return to being \fBenabled\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBedonr\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.illumos:edonr
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature enables the use of the Edon-R hash algorithm for checksum,
-including for nopwrite (if compression is also enabled, an overwrite of
-a block whose checksum matches the data being written will be ignored).
-In an abundance of caution, Edon-R requires verification when used with
-dedup: \fBzfs set dedup=edonr,verify\fR.  See \fBzfs\fR(8).
-
-Edon-R is a very high-performance hash algorithm that was part
-of the NIST SHA-3 competition. It provides extremely high hash
-performance (over 350% faster than SHA-256), but was not selected
-because of its unsuitability as a general purpose secure hash algorithm.
-This implementation utilizes the new salted checksumming functionality
-in ZFS, which means that the checksum is pre-seeded with a secret
-256-bit random key (stored on the pool) before being fed the data block
-to be checksummed. Thus the produced checksums are unique to a given
-pool.
-
-When the \fBedonr\fR feature is set to \fBenabled\fR, the administrator
-can turn on the \fBedonr\fR checksum on any dataset using the
-\fBzfs set checksum=edonr\fR. See zfs(8). This feature becomes
-\fBactive\fR once a \fBchecksum\fR property has been set to \fBedonr\fR,
-and will return to being \fBenabled\fR once all filesystems that have
-ever had their checksum set to \fBedonr\fR are destroyed.
-
-The \fBedonr\fR feature is not supported by GRUB and must not be used on
-the pool if GRUB needs to access the pool (e.g. for /boot).
-.RE
-
-.sp
-.ne 2
-.na
-\fBembedded_data\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:embedded_data
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	none
-.TE
-
-This feature improves the performance and compression ratio of
-highly-compressible blocks.  Blocks whose contents can compress to 112 bytes
-or smaller can take advantage of this feature.
-
-When this feature is enabled, the contents of highly-compressible blocks are
-stored in the block "pointer" itself (a misnomer in this case, as it contains
-the compressed data, rather than a pointer to its location on disk).  Thus
-the space of the block (one sector, typically 512 bytes or 4KB) is saved,
-and no additional i/o is needed to read and write the data block.
-
-This feature becomes \fBactive\fR as soon as it is enabled and will
-never return to being \fBenabled\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBempty_bpobj\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:empty_bpobj
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-This feature increases the performance of creating and using a large
-number of snapshots of a single filesystem or volume, and also reduces
-the disk space required.
-
-When there are many snapshots, each snapshot uses many Block Pointer
-Objects (bpobj's) to track blocks associated with that snapshot.
-However, in common use cases, most of these bpobj's are empty.  This
-feature allows us to create each bpobj on-demand, thus eliminating the
-empty bpobjs.
-
-This feature is \fBactive\fR while there are any filesystems, volumes,
-or snapshots which were created after enabling this feature.
-.RE
-
-.sp
-.ne 2
-.na
-\fBenabled_txg\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:enabled_txg
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-Once this feature is enabled ZFS records the transaction group number
-in which new features are enabled. This has no user-visible impact,
-but other features may depend on this feature.
-
-This feature becomes \fBactive\fR as soon as it is enabled and will
-never return to being \fBenabled\fB.
-.RE
-
-.sp
-.ne 2
-.na
-\fBencryption\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.datto:encryption
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	bookmark_v2, extensible_dataset
-.TE
-
-This feature enables the creation and management of natively encrypted datasets.
-
-This feature becomes \fBactive\fR when an encrypted dataset is created and will
-be returned to the \fBenabled\fR state when all datasets that use this feature
-are destroyed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBextensible_dataset\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:extensible_dataset
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	none
-.TE
-
-This feature allows more flexible use of internal ZFS data structures,
-and exists for other features to depend on.
-
-This feature will be \fBactive\fR when the first dependent feature uses it,
-and will be returned to the \fBenabled\fR state when all datasets that use
-this feature are destroyed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBfilesystem_limits\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.joyent:filesystem_limits
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature enables filesystem and snapshot limits. These limits can be used
-to control how many filesystems and/or snapshots can be created at the point in
-the tree on which the limits are set.
-
-This feature is \fBactive\fR once either of the limit properties has been
-set on a dataset. Once activated the feature is never deactivated.
-.RE
-
-.sp
-.ne 2
-.na
-\fBhole_birth\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:hole_birth
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	enabled_txg
-.TE
-
-This feature has/had bugs, the result of which is that, if you do a
-\fBzfs send -i\fR (or \fB-R\fR, since it uses \fB-i\fR) from an affected
-dataset, the receiver will not see any checksum or other errors, but the
-resulting destination snapshot will not match the source.  Its use by
-\fBzfs send -i\fR has been disabled by default.  See the
-\fBsend_holes_without_birth_time\fR module parameter in
-zfs-module-parameters(5).
-
-This feature improves performance of incremental sends (\fBzfs send -i\fR)
-and receives for objects with many holes. The most common case of
-hole-filled objects is zvols.
-
-An incremental send stream from snapshot \fBA\fR to snapshot \fBB\fR
-contains information about every block that changed between \fBA\fR and
-\fBB\fR. Blocks which did not change between those snapshots can be
-identified and omitted from the stream using a piece of metadata called
-the 'block birth time', but birth times are not recorded for holes (blocks
-filled only with zeroes). Since holes created after \fBA\fR cannot be
-distinguished from holes created before \fBA\fR, information about every
-hole in the entire filesystem or zvol is included in the send stream.
-
-For workloads where holes are rare this is not a problem. However, when
-incrementally replicating filesystems or zvols with many holes (for
-example a zvol formatted with another filesystem) a lot of time will
-be spent sending and receiving unnecessary information about holes that
-already exist on the receiving side.
-
-Once the \fBhole_birth\fR feature has been enabled the block birth times
-of all new holes will be recorded. Incremental sends between snapshots
-created after this feature is enabled will use this new metadata to avoid
-sending information about holes that already exist on the receiving side.
-
-This feature becomes \fBactive\fR as soon as it is enabled and will
-never return to being \fBenabled\fB.
-.RE
-
-.sp
-.ne 2
-.na
-\fBlarge_blocks\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.open-zfs:large_blocks
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	extensible_dataset
-.TE
-
-The \fBlarge_block\fR feature allows the record size on a dataset to be
-set larger than 128KB.
-
-This feature becomes \fBactive\fR once a dataset contains a file with
-a block size larger than 128KB, and will return to being \fBenabled\fR once all
-filesystems that have ever had their recordsize larger than 128KB are destroyed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBlarge_dnode\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.zfsonlinux:large_dnode
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	extensible_dataset
-.TE
-
-The \fBlarge_dnode\fR feature allows the size of dnodes in a dataset to be
-set larger than 512B.
-
-This feature becomes \fBactive\fR once a dataset contains an object with
-a dnode larger than 512B, which occurs as a result of setting the
-\fBdnodesize\fR dataset property to a value other than \fBlegacy\fR. The
-feature will return to being \fBenabled\fR once all filesystems that
-have ever contained a dnode larger than 512B are destroyed. Large dnodes
-allow more data to be stored in the bonus buffer, thus potentially
-improving performance by avoiding the use of spill blocks.
-.RE
-
-.sp
-.ne 2
-.na
-\fBlz4_compress\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.illumos:lz4_compress
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	none
-.TE
-
-\fBlz4\fR is a high-performance real-time compression algorithm that
-features significantly faster compression and decompression as well as a
-higher compression ratio than the older \fBlzjb\fR compression.
-Typically, \fBlz4\fR compression is approximately 50% faster on
-compressible data and 200% faster on incompressible data than
-\fBlzjb\fR. It is also approximately 80% faster on decompression, while
-giving approximately 10% better compression ratio.
-
-When the \fBlz4_compress\fR feature is set to \fBenabled\fR, the
-administrator can turn on \fBlz4\fR compression on any dataset on the
-pool using the zfs(8) command. Please note that doing so will
-immediately activate the \fBlz4_compress\fR feature on the underlying
-pool using the zfs(8) command. Also, all newly written metadata
-will be compressed with \fBlz4\fR algorithm. Since this feature is not
-read-only compatible, this operation will render the pool unimportable
-on systems without support for the \fBlz4_compress\fR feature.
-
-Booting off of \fBlz4\fR-compressed root pools is supported.
-
-This feature becomes \fBactive\fR as soon as it is enabled and will
-never return to being \fBenabled\fB.
-.RE
-
-.sp
-.ne 2
-.na
-\fBmulti_vdev_crash_dump\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.joyent:multi_vdev_crash_dump
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	none
-.TE
-
-This feature allows a dump device to be configured with a pool comprised
-of multiple vdevs.  Those vdevs may be arranged in any mirrored or raidz
-configuration.
-
-When the \fBmulti_vdev_crash_dump\fR feature is set to \fBenabled\fR,
-the administrator can use the \fBdumpadm\fR(1M) command to configure a
-dump device on a pool comprised of multiple vdevs.
-
-Under Linux this feature is registered for compatibility but not used.
-New pools created under Linux will have the feature \fBenabled\fR but
-will never transition to \fB\fBactive\fR.  This functionality is not
-required in order to support crash dumps under Linux.  Existing pools
-where this feature is \fB\fBactive\fR can be imported.
-.RE
-
-.sp
-.ne 2
-.na
-\fBobsolete_counts\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:obsolete_counts
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	device_removal
-.TE
-
-This feature is an enhancement of device_removal, which will over time
-reduce the memory used to track removed devices.  When indirect blocks
-are freed or remapped, we note that their part of the indirect mapping
-is "obsolete", i.e. no longer needed.
-
-This feature becomes \fBactive\fR when the \fBzpool remove\fR subcommand is
-used on a top-level vdev, and will never return to being \fBenabled\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBproject_quota\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.zfsonlinux:project_quota
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature allows administrators to account the spaces and objects usage
-information against the project identifier (ID).
-
-The project ID is new object-based attribute. When upgrading an existing
-filesystem, object without project ID attribute will be assigned a zero
-project ID. After this feature is enabled, newly created object will inherit
-its parent directory's project ID if the parent inherit flag is set (via
-\fBchattr +/-P\fR or \fBzfs project [-s|-C]\fR). Otherwise, the new object's
-project ID will be set as zero. An object's project ID can be changed at
-anytime by the owner (or privileged user) via \fBchattr -p $prjid\fR or
-\fBzfs project -p $prjid\fR.
-
-This feature will become \fBactive\fR as soon as it is enabled and will never
-return to being \fBdisabled\fR. Each filesystem will be upgraded automatically
-when remounted or when new file is created under that filesystem. The upgrade
-can also be triggered on filesystems via `zfs set version=current <pool/fs>`.
-The upgrade process runs in the background and may take a while to complete
-for the filesystems containing a large number of files.
-.RE
-
-.sp
-.ne 2
-.na
-\fBresilver_defer\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.datto:resilver_defer
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-This feature allows zfs to postpone new resilvers if an existing one is already
-in progress. Without this feature, any new resilvers will cause the currently
-running one to be immediately restarted from the beginning.
-
-This feature becomes \fBactive\fR once a resilver has been deferred, and
-returns to being \fBenabled\fR when the deferred resilver begins.
-.RE
-
-.sp
-.ne 2
-.na
-\fBsha512\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.illumos:sha512
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature enables the use of the SHA-512/256 truncated hash algorithm
-(FIPS 180-4) for checksum and dedup. The native 64-bit arithmetic of
-SHA-512 provides an approximate 50% performance boost over SHA-256 on
-64-bit hardware and is thus a good minimum-change replacement candidate
-for systems where hash performance is important, but these systems
-cannot for whatever reason utilize the faster \fBskein\fR and
-\fBedonr\fR algorithms.
-
-When the \fBsha512\fR feature is set to \fBenabled\fR, the administrator
-can turn on the \fBsha512\fR checksum on any dataset using
-\fBzfs set checksum=sha512\fR. See zfs(8). This feature becomes
-\fBactive\fR once a \fBchecksum\fR property has been set to \fBsha512\fR,
-and will return to being \fBenabled\fR once all filesystems that have
-ever had their checksum set to \fBsha512\fR are destroyed.
-
-The \fBsha512\fR feature is not supported by GRUB and must not be used on
-the pool if GRUB needs to access the pool (e.g. for /boot).
-.RE
-
-.sp
-.ne 2
-.na
-\fBskein\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.illumos:skein
-READ\-ONLY COMPATIBLE	no
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature enables the use of the Skein hash algorithm for checksum
-and dedup. Skein is a high-performance secure hash algorithm that was a
-finalist in the NIST SHA-3 competition. It provides a very high security
-margin and high performance on 64-bit hardware (80% faster than
-SHA-256). This implementation also utilizes the new salted checksumming
-functionality in ZFS, which means that the checksum is pre-seeded with a
-secret 256-bit random key (stored on the pool) before being fed the data
-block to be checksummed. Thus the produced checksums are unique to a
-given pool, preventing hash collision attacks on systems with dedup.
-
-When the \fBskein\fR feature is set to \fBenabled\fR, the administrator
-can turn on the \fBskein\fR checksum on any dataset using
-\fBzfs set checksum=skein\fR. See zfs(8). This feature becomes
-\fBactive\fR once a \fBchecksum\fR property has been set to \fBskein\fR,
-and will return to being \fBenabled\fR once all filesystems that have
-ever had their checksum set to \fBskein\fR are destroyed.
-
-The \fBskein\fR feature is not supported by GRUB and must not be used on
-the pool if GRUB needs to access the pool (e.g. for /boot).
-.RE
-
-.sp
-.ne 2
-.na
-\fBspacemap_histogram\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:spacemap_histogram
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-This features allows ZFS to maintain more information about how free space
-is organized within the pool. If this feature is \fBenabled\fR, ZFS will
-set this feature to \fBactive\fR when a new space map object is created or
-an existing space map is upgraded to the new format. Once the feature is
-\fBactive\fR, it will remain in that state until the pool is destroyed.
-.RE
-
-.sp
-.ne 2
-.na
-\fBspacemap_v2\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:spacemap_v2
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-This feature enables the use of the new space map encoding which
-consists of two words (instead of one) whenever it is advantageous.
-The new encoding allows space maps to represent large regions of
-space more efficiently on-disk while also increasing their maximum
-addressable offset.
-
-This feature becomes \fBactive\fR once it is \fBenabled\fR, and never
-returns back to being \fBenabled\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fBuserobj_accounting\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	org.zfsonlinux:userobj_accounting
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	extensible_dataset
-.TE
-
-This feature allows administrators to account the object usage information
-by user and group.
-
-This feature becomes \fBactive\fR as soon as it is enabled and will never
-return to being \fBenabled\fR. Each filesystem will be upgraded automatically
-when remounted, or when new files are created under that filesystem.
-The upgrade can also be started manually on filesystems by running
-`zfs set version=current <pool/fs>`. The upgrade process runs in the background
-and may take a while to complete for filesystems containing a large number of
-files.
-.RE
-
-.sp
-.ne 2
-.na
-\fBzpool_checkpoint\fR
-.ad
-.RS 4n
-.TS
-l l .
-GUID	com.delphix:zpool_checkpoint
-READ\-ONLY COMPATIBLE	yes
-DEPENDENCIES	none
-.TE
-
-This feature enables the \fBzpool checkpoint\fR subcommand that can
-checkpoint the state of the pool at the time it was issued and later
-rewind back to it or discard it.
-
-This feature becomes \fBactive\fR when the \fBzpool checkpoint\fR subcommand
-is used to checkpoint the pool.
-The feature will only return back to being \fBenabled\fR when the pool
-is rewound or the checkpoint has been discarded.
-.RE
-
-.SH "SEE ALSO"
-zpool(8)

diff --git a/zfs/man/man7/dracut.zfs.7 b/zfs/man/man7/dracut.zfs.7
new file mode 100644
index 0000000..d9234bd
--- /dev/null
+++ b/zfs/man/man7/dracut.zfs.7

@@ -0,0 +1,278 @@
+.\" SPDX-License-Identifier: 0BSD
+.\"
+.Dd March 28, 2023
+.Dt DRACUT.ZFS 7
+.Os
+.
+.Sh NAME
+.Nm dracut.zfs
+.Nd overview of ZFS dracut hooks
+.
+.Sh SYNOPSIS
+.Bd -literal -compact
+                      parse-zfs.sh \(-> dracut-cmdline.service
+                          |                     \(da
+                          |                     …
+                          |                     \(da
+                          \e\(em\(em\(em\(em\(em\(em\(em\(em\(-> dracut-initqueue.service
+                                                |                      zfs-import-opts.sh
+   zfs-load-module.service                      \(da                          |       |
+     |                  |                sysinit.target                    \(da       |
+     \(da                  |                       |        zfs-import-scan.service   \(da
+zfs-import-scan.service \(da                       \(da           | zfs-import-cache.service
+     |   zfs-import-cache.service         basic.target      |     |
+     \e__________________|                       |           \(da     \(da
+                        \(da                       |     zfs-load-key.sh
+     zfs-env-bootfs.service                     |         |
+                        \(da                       \(da         \(da
+                 zfs-import.target \(-> dracut-pre-mount.service
+                        |          \(ua            |
+                        | dracut-zfs-generator  |
+                        | _____________________/|
+                        |/                      \(da
+                        |                   sysroot.mount \(<-\(em\(em\(em dracut-zfs-generator
+                        |                       |
+                        |                       \(da
+                        |             initrd-root-fs.target \(<-\(em zfs-nonroot-necessities.service
+                        |                       |                                 |
+                        |                       \(da                                 |
+                        \(da             dracut-mount.service                        |
+       zfs-snapshot-bootfs.service              |                                 |
+                        |                       \(da                                 |
+                        \(da                       …                                 |
+       zfs-rollback-bootfs.service              |                                 |
+                        |                       \(da                                 |
+                        |          /sysroot/{usr,etc,lib,&c.} \(<-\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em\(em/
+                        |                       |
+                        |                       \(da
+                        |                initrd-fs.target
+                        \e______________________ |
+                                               \e|
+                                                \(da
+        export-zfs.sh                      initrd.target
+              |                                 |
+              \(da                                 \(da
+   dracut-shutdown.service                      …
+                                                |
+                                                \(da
+                 zfs-needshutdown.sh \(-> initrd-cleanup.service
+.Ed
+.Pp
+Compare
+.Xr dracut.bootup 7
+for the full flowchart.
+.
+.Sh DESCRIPTION
+Under dracut, booting with
+.No ZFS-on- Ns Pa /
+is facilitated by a number of hooks in the
+.Nm 90zfs
+module.
+.Pp
+Booting into a ZFS dataset requires
+.Sy mountpoint Ns = Ns Pa /
+to be set on the dataset containing the root filesystem (henceforth "the boot dataset") and at the very least either the
+.Sy bootfs
+property to be set to that dataset, or the
+.Sy root=
+kernel cmdline (or dracut drop-in) argument to specify it.
+.Pp
+All children of the boot dataset with
+.Sy canmount Ns = Ns Sy on
+with
+.Sy mountpoint Ns s
+matching
+.Pa /etc , /bin , /lib , /lib?? , /libx32 , No and Pa /usr
+globs are deemed essential and will be mounted as well.
+.Pp
+.Xr zfs-mount-generator 8
+is recommended for proper functioning of the system afterward (correct mount properties, remounting, &c.).
+.
+.Sh CMDLINE
+.Ss Standard
+.Bl -tag -compact -width ".Sy root=zfs:AUTO , root=zfs: , root=zfs , Op Sy root="
+.It Sy root=zfs:\& Ns Ar dataset , Sy root=ZFS= Ns Ar dataset
+Use
+.Ar dataset
+as the boot dataset.
+All pluses
+.Pq Sq +
+are replaced with spaces
+.Pq Sq \  .
+.
+.It Sy root=zfs:AUTO , root=zfs:\& , root=zfs , Op Sy root=
+After import, search for the first pool with the
+.Sy bootfs
+property set, use its value as-if specified as the
+.Ar dataset
+above.
+.
+.It Sy rootfstype=zfs root= Ns Ar dataset
+Equivalent to
+.Sy root=zfs:\& Ns Ar dataset .
+.
+.It Sy rootfstype=zfs Op Sy root=
+Equivalent to
+.Sy root=zfs:AUTO .
+.
+.It Sy rootflags= Ns Ar flags
+Mount the boot dataset with
+.Fl o Ar flags ;
+cf.\&
+.Sx Temporary Mount Point Properties
+in
+.Xr zfsprops 7 .
+These properties will not last, since all filesystems will be re-mounted from the real root.
+.
+.It Sy debug
+If specified,
+.Nm dracut-zfs-generator
+logs to the journal.
+.El
+.Pp
+Be careful about setting neither
+.Sy rootfstype=zfs
+nor
+.Sy root=zfs:\& Ns Ar dataset
+\(em other automatic boot selection methods, like
+.Nm systemd-gpt-auto-generator
+and
+.Nm systemd-fstab-generator
+might take precedent.
+.
+.Ss ZFS-specific
+.Bl -tag -compact -width ".Sy bootfs.snapshot Ns Op Sy = Ns Ar snapshot-name"
+.It Sy bootfs.snapshot Ns Op Sy = Ns Ar snapshot-name
+Execute
+.Nm zfs Cm snapshot Ar boot-dataset Ns Sy @ Ns Ar snapshot-name
+before pivoting to the real root.
+.Ar snapshot-name
+defaults to the current kernel release.
+.
+.It Sy bootfs.rollback Ns Op Sy = Ns Ar snapshot-name
+Execute
+.Nm zfs Cm snapshot Fl Rf Ar boot-dataset Ns Sy @ Ns Ar snapshot-name
+before pivoting to the real root.
+.Ar snapshot-name
+defaults to the current kernel release.
+.
+.It Sy spl_hostid= Ns Ar host-id
+Use
+.Xr zgenhostid 8
+to set the host ID to
+.Ar host-id ;
+otherwise,
+.Pa /etc/hostid
+inherited from the real root is used.
+.
+.It Sy zfs_force , zfs.force , zfsforce
+Appends
+.Fl f
+to all
+.Nm zpool Cm import
+invocations; primarily useful in conjunction with
+.Sy spl_hostid= ,
+or if no host ID was inherited.
+.El
+.
+.Sh FILES
+.Bl -tag -width 0
+.It Pa parse-zfs.sh Pq Sy cmdline
+Processes
+.Sy spl_hostid= .
+If
+.Sy root=
+matches a known pattern, above, provides
+.Pa /dev/root
+and delays the initqueue until
+.Xr zfs 4
+is loaded,
+.
+.It Pa zfs-import-opts.sh Pq Nm systemd No environment generator
+Turns
+.Sy zfs_force , zfs.force , No or Sy zfsforce
+into
+.Ev ZPOOL_IMPORT_OPTS Ns = Ns Fl f
+for
+.Pa zfs-import-scan.service
+or
+.Pa zfs-import-cache.service .
+.
+.It Pa zfs-load-key.sh Pq Sy pre-mount
+Loads encryption keys for the boot dataset and its essential descendants.
+.Bl -tag -compact -offset 4n -width ".Sy keylocation Ns = Ns Sy https:// Ns Ar URL , Sy keylocation Ns = Ns Sy http:// Ns Ar URL"
+.It Sy keylocation Ns = Ns Sy prompt
+Is prompted for via
+.Nm systemd-ask-password
+thrice.
+.
+.It Sy keylocation Ns = Ns Sy https:// Ns Ar URL , Sy keylocation Ns = Ns Sy http:// Ns Ar URL
+.Pa network-online.target
+is started before loading.
+.
+.It Sy keylocation Ns = Ns Sy file:// Ns Ar path
+If
+.Ar path
+doesn't exist,
+.Nm udevadm No is Cm settle Ns d .
+If it still doesn't, it's waited for for up to
+.Sy 10 Ns s .
+.El
+.
+.It Pa zfs-env-bootfs.service Pq Nm systemd No service
+After pool import, sets
+.Ev BOOTFS Ns =
+in the systemd environment to the first non-null
+.Sy bootfs
+value in iteration order.
+.
+.It Pa dracut-zfs-generator Pq Nm systemd No generator
+Generates
+.Pa sysroot.mount Pq using Sy rootflags= , No if any .
+If an explicit boot dataset was specified, also generates essential mountpoints
+.Pq Pa sysroot-etc.mount , sysroot-bin.mount , No &c.\& ,
+otherwise generates
+.Pa zfs-nonroot-necessities.service
+which mounts them explicitly after
+.Pa /sysroot
+using
+.Ev BOOTFS Ns = .
+.
+.It Pa zfs-snapshot-bootfs.service , zfs-rollback-bootfs.service Pq Nm systemd No services
+Consume
+.Sy bootfs.snapshot
+and
+.Sy bootfs.rollback
+as described in
+.Sx CMDLINE  .
+Use
+.Ev BOOTFS Ns =
+if no explicit boot dataset was specified.
+.
+.It Pa zfs-needshutdown.sh Pq Sy cleanup
+If any pools were imported, signals that shutdown hooks are required.
+.
+.It Pa export-zfs.sh Pq Sy shutdown
+Forcibly exports all pools.
+.
+.It Pa /etc/hostid , /etc/zfs/zpool.cache , /etc/zfs/vdev_id.conf Pq regular files
+Included verbatim, hostonly.
+.
+.It Pa mount-zfs.sh Pq Sy mount
+Does nothing on
+.Nm systemd
+systems
+.Pq if Pa dracut-zfs-generator No succeeded .
+Otherwise, loads encryption key for the boot dataset from the console or via plymouth.
+It may not work at all!
+.El
+.
+.Sh SEE ALSO
+.Xr dracut.bootup 7 ,
+.Xr zfsprops 7 ,
+.Xr zpoolprops 7 ,
+.Xr dracut-shutdown.service 8 ,
+.Xr systemd-fstab-generator 8 ,
+.Xr systemd-gpt-auto-generator 8 ,
+.Xr zfs-mount-generator 8 ,
+.Xr zgenhostid 8

diff --git a/zfs/man/man7/zfsconcepts.7 b/zfs/man/man7/zfsconcepts.7
new file mode 100644
index 0000000..f958035
--- /dev/null
+++ b/zfs/man/man7/zfsconcepts.7

@@ -0,0 +1,206 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 30, 2019
+.Dt ZFSCONCEPTS 7
+.Os
+.
+.Sh NAME
+.Nm zfsconcepts
+.Nd overview of ZFS concepts
+.
+.Sh DESCRIPTION
+.Ss ZFS File System Hierarchy
+A ZFS storage pool is a logical collection of devices that provide space for
+datasets.
+A storage pool is also the root of the ZFS file system hierarchy.
+.Pp
+The root of the pool can be accessed as a file system, such as mounting and
+unmounting, taking snapshots, and setting properties.
+The physical storage characteristics, however, are managed by the
+.Xr zpool 8
+command.
+.Pp
+See
+.Xr zpool 8
+for more information on creating and administering pools.
+.Ss Snapshots
+A snapshot is a read-only copy of a file system or volume.
+Snapshots can be created extremely quickly, and initially consume no additional
+space within the pool.
+As data within the active dataset changes, the snapshot consumes more data than
+would otherwise be shared with the active dataset.
+.Pp
+Snapshots can have arbitrary names.
+Snapshots of volumes can be cloned or rolled back, visibility is determined
+by the
+.Sy snapdev
+property of the parent volume.
+.Pp
+File system snapshots can be accessed under the
+.Pa .zfs/snapshot
+directory in the root of the file system.
+Snapshots are automatically mounted on demand and may be unmounted at regular
+intervals.
+The visibility of the
+.Pa .zfs
+directory can be controlled by the
+.Sy snapdir
+property.
+.Ss Bookmarks
+A bookmark is like a snapshot, a read-only copy of a file system or volume.
+Bookmarks can be created extremely quickly, compared to snapshots, and they
+consume no additional space within the pool.
+Bookmarks can also have arbitrary names, much like snapshots.
+.Pp
+Unlike snapshots, bookmarks can not be accessed through the filesystem in any way.
+From a storage standpoint a bookmark just provides a way to reference
+when a snapshot was created as a distinct object.
+Bookmarks are initially tied to a snapshot, not the filesystem or volume,
+and they will survive if the snapshot itself is destroyed.
+Since they are very light weight there's little incentive to destroy them.
+.Ss Clones
+A clone is a writable volume or file system whose initial contents are the same
+as another dataset.
+As with snapshots, creating a clone is nearly instantaneous, and initially
+consumes no additional space.
+.Pp
+Clones can only be created from a snapshot.
+When a snapshot is cloned, it creates an implicit dependency between the parent
+and child.
+Even though the clone is created somewhere else in the dataset hierarchy, the
+original snapshot cannot be destroyed as long as a clone exists.
+The
+.Sy origin
+property exposes this dependency, and the
+.Cm destroy
+command lists any such dependencies, if they exist.
+.Pp
+The clone parent-child dependency relationship can be reversed by using the
+.Cm promote
+subcommand.
+This causes the
+.Qq origin
+file system to become a clone of the specified file system, which makes it
+possible to destroy the file system that the clone was created from.
+.Ss "Mount Points"
+Creating a ZFS file system is a simple operation, so the number of file systems
+per system is likely to be numerous.
+To cope with this, ZFS automatically manages mounting and unmounting file
+systems without the need to edit the
+.Pa /etc/fstab
+file.
+All automatically managed file systems are mounted by ZFS at boot time.
+.Pp
+By default, file systems are mounted under
+.Pa /path ,
+where
+.Ar path
+is the name of the file system in the ZFS namespace.
+Directories are created and destroyed as needed.
+.Pp
+A file system can also have a mount point set in the
+.Sy mountpoint
+property.
+This directory is created as needed, and ZFS automatically mounts the file
+system when the
+.Nm zfs Cm mount Fl a
+command is invoked
+.Po without editing
+.Pa /etc/fstab
+.Pc .
+The
+.Sy mountpoint
+property can be inherited, so if
+.Em pool/home
+has a mount point of
+.Pa /export/stuff ,
+then
+.Em pool/home/user
+automatically inherits a mount point of
+.Pa /export/stuff/user .
+.Pp
+A file system
+.Sy mountpoint
+property of
+.Sy none
+prevents the file system from being mounted.
+.Pp
+If needed, ZFS file systems can also be managed with traditional tools
+.Po
+.Nm mount ,
+.Nm umount ,
+.Pa /etc/fstab
+.Pc .
+If a file system's mount point is set to
+.Sy legacy ,
+ZFS makes no attempt to manage the file system, and the administrator is
+responsible for mounting and unmounting the file system.
+Because pools must
+be imported before a legacy mount can succeed, administrators should ensure
+that legacy mounts are only attempted after the zpool import process
+finishes at boot time.
+For example, on machines using systemd, the mount option
+.Pp
+.Nm x-systemd.requires=zfs-import.target
+.Pp
+will ensure that the zfs-import completes before systemd attempts mounting
+the filesystem.
+See
+.Xr systemd.mount 5
+for details.
+.Ss Deduplication
+Deduplication is the process for removing redundant data at the block level,
+reducing the total amount of data stored.
+If a file system has the
+.Sy dedup
+property enabled, duplicate data blocks are removed synchronously.
+The result
+is that only unique data is stored and common components are shared among files.
+.Pp
+Deduplicating data is a very resource-intensive operation.
+It is generally recommended that you have at least 1.25 GiB of RAM
+per 1 TiB of storage when you enable deduplication.
+Calculating the exact requirement depends heavily
+on the type of data stored in the pool.
+.Pp
+Enabling deduplication on an improperly-designed system can result in
+performance issues (slow IO and administrative operations).
+It can potentially lead to problems importing a pool due to memory exhaustion.
+Deduplication can consume significant processing power (CPU) and memory as well
+as generate additional disk IO.
+.Pp
+Before creating a pool with deduplication enabled, ensure that you have planned
+your hardware requirements appropriately and implemented appropriate recovery
+practices, such as regular backups.
+Consider using the
+.Sy compression
+property as a less resource-intensive alternative.

diff --git a/zfs/man/man7/zfsprops.7 b/zfs/man/man7/zfsprops.7
new file mode 100644
index 0000000..1e2ca40
--- /dev/null
+++ b/zfs/man/man7/zfsprops.7

@@ -0,0 +1,2080 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
+.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
+.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
+.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
+.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
+.\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
+.\"
+.Dd July 21, 2022
+.Dt ZFSPROPS 7
+.Os
+.
+.Sh NAME
+.Nm zfsprops
+.Nd native and user-defined properties of ZFS datasets
+.
+.Sh DESCRIPTION
+Properties are divided into two types, native properties and user-defined
+.Po or
+.Qq user
+.Pc
+properties.
+Native properties either export internal statistics or control ZFS behavior.
+In addition, native properties are either editable or read-only.
+User properties have no effect on ZFS behavior, but you can use them to annotate
+datasets in a way that is meaningful in your environment.
+For more information about user properties, see the
+.Sx User Properties
+section, below.
+.
+.Ss Native Properties
+Every dataset has a set of properties that export statistics about the dataset
+as well as control various behaviors.
+Properties are inherited from the parent unless overridden by the child.
+Some properties apply only to certain types of datasets
+.Pq file systems, volumes, or snapshots .
+.Pp
+The values of numeric properties can be specified using human-readable suffixes
+.Po for example,
+.Sy k ,
+.Sy KB ,
+.Sy M ,
+.Sy Gb ,
+and so forth, up to
+.Sy Z
+for zettabyte
+.Pc .
+The following are all valid
+.Pq and equal
+specifications:
+.Li 1536M, 1.5g, 1.50GB .
+.Pp
+The values of non-numeric properties are case sensitive and must be lowercase,
+except for
+.Sy mountpoint ,
+.Sy sharenfs ,
+and
+.Sy sharesmb .
+.Pp
+The following native properties consist of read-only statistics about the
+dataset.
+These properties can be neither set, nor inherited.
+Native properties apply to all dataset types unless otherwise noted.
+.Bl -tag -width "usedbyrefreservation"
+.It Sy available
+The amount of space available to the dataset and all its children, assuming that
+there is no other activity in the pool.
+Because space is shared within a pool, availability can be limited by any number
+of factors, including physical pool size, quotas, reservations, or other
+datasets within the pool.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy avail .
+.It Sy compressratio
+For non-snapshots, the compression ratio achieved for the
+.Sy used
+space of this dataset, expressed as a multiplier.
+The
+.Sy used
+property includes descendant datasets, and, for clones, does not include the
+space shared with the origin snapshot.
+For snapshots, the
+.Sy compressratio
+is the same as the
+.Sy refcompressratio
+property.
+Compression can be turned on by running:
+.Nm zfs Cm set Sy compression Ns = Ns Sy on Ar dataset .
+The default value is
+.Sy off .
+.It Sy createtxg
+The transaction group (txg) in which the dataset was created.
+Bookmarks have the same
+.Sy createtxg
+as the snapshot they are initially tied to.
+This property is suitable for ordering a list of snapshots,
+e.g. for incremental send and receive.
+.It Sy creation
+The time this dataset was created.
+.It Sy clones
+For snapshots, this property is a comma-separated list of filesystems or volumes
+which are clones of this snapshot.
+The clones'
+.Sy origin
+property is this snapshot.
+If the
+.Sy clones
+property is not empty, then this snapshot can not be destroyed
+.Po even with the
+.Fl r
+or
+.Fl f
+options
+.Pc .
+The roles of origin and clone can be swapped by promoting the clone with the
+.Nm zfs Cm promote
+command.
+.It Sy defer_destroy
+This property is
+.Sy on
+if the snapshot has been marked for deferred destroy by using the
+.Nm zfs Cm destroy Fl d
+command.
+Otherwise, the property is
+.Sy off .
+.It Sy encryptionroot
+For encrypted datasets, indicates where the dataset is currently inheriting its
+encryption key from.
+Loading or unloading a key for the
+.Sy encryptionroot
+will implicitly load / unload the key for any inheriting datasets (see
+.Nm zfs Cm load-key
+and
+.Nm zfs Cm unload-key
+for details).
+Clones will always share an
+encryption key with their origin.
+See the
+.Sx Encryption
+section of
+.Xr zfs-load-key 8
+for details.
+.It Sy filesystem_count
+The total number of filesystems and volumes that exist under this location in
+the dataset tree.
+This value is only available when a
+.Sy filesystem_limit
+has been set somewhere in the tree under which the dataset resides.
+.It Sy keystatus
+Indicates if an encryption key is currently loaded into ZFS.
+The possible values are
+.Sy none ,
+.Sy available ,
+and
+.Sy unavailable .
+See
+.Nm zfs Cm load-key
+and
+.Nm zfs Cm unload-key .
+.It Sy guid
+The 64 bit GUID of this dataset or bookmark which does not change over its
+entire lifetime.
+When a snapshot is sent to another pool, the received snapshot has the same GUID.
+Thus, the
+.Sy guid
+is suitable to identify a snapshot across pools.
+.It Sy logicalreferenced
+The amount of space that is
+.Qq logically
+accessible by this dataset.
+See the
+.Sy referenced
+property.
+The logical space ignores the effect of the
+.Sy compression
+and
+.Sy copies
+properties, giving a quantity closer to the amount of data that applications
+see.
+However, it does include space consumed by metadata.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy lrefer .
+.It Sy logicalused
+The amount of space that is
+.Qq logically
+consumed by this dataset and all its descendents.
+See the
+.Sy used
+property.
+The logical space ignores the effect of the
+.Sy compression
+and
+.Sy copies
+properties, giving a quantity closer to the amount of data that applications
+see.
+However, it does include space consumed by metadata.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy lused .
+.It Sy mounted
+For file systems, indicates whether the file system is currently mounted.
+This property can be either
+.Sy yes
+or
+.Sy no .
+.It Sy objsetid
+A unique identifier for this dataset within the pool.
+Unlike the dataset's
+.Sy guid , No the Sy objsetid
+of a dataset is not transferred to other pools when the snapshot is copied
+with a send/receive operation.
+The
+.Sy objsetid
+can be reused (for a new dataset) after the dataset is deleted.
+.It Sy origin
+For cloned file systems or volumes, the snapshot from which the clone was
+created.
+See also the
+.Sy clones
+property.
+.It Sy receive_resume_token
+For filesystems or volumes which have saved partially-completed state from
+.Nm zfs Cm receive Fl s ,
+this opaque token can be provided to
+.Nm zfs Cm send Fl t
+to resume and complete the
+.Nm zfs Cm receive .
+.It Sy redact_snaps
+For bookmarks, this is the list of snapshot guids the bookmark contains a redaction
+list for.
+For snapshots, this is the list of snapshot guids the snapshot is redacted with
+respect to.
+.It Sy referenced
+The amount of data that is accessible by this dataset, which may or may not be
+shared with other datasets in the pool.
+When a snapshot or clone is created, it initially references the same amount of
+space as the file system or snapshot it was created from, since its contents are
+identical.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy refer .
+.It Sy refcompressratio
+The compression ratio achieved for the
+.Sy referenced
+space of this dataset, expressed as a multiplier.
+See also the
+.Sy compressratio
+property.
+.It Sy snapshot_count
+The total number of snapshots that exist under this location in the dataset
+tree.
+This value is only available when a
+.Sy snapshot_limit
+has been set somewhere in the tree under which the dataset resides.
+.It Sy type
+The type of dataset:
+.Sy filesystem ,
+.Sy volume ,
+.Sy snapshot ,
+or
+.Sy bookmark .
+.It Sy used
+The amount of space consumed by this dataset and all its descendents.
+This is the value that is checked against this dataset's quota and reservation.
+The space used does not include this dataset's reservation, but does take into
+account the reservations of any descendent datasets.
+The amount of space that a dataset consumes from its parent, as well as the
+amount of space that is freed if this dataset is recursively destroyed, is the
+greater of its space used and its reservation.
+.Pp
+The used space of a snapshot
+.Po see the
+.Sx Snapshots
+section of
+.Xr zfsconcepts 7
+.Pc
+is space that is referenced exclusively by this snapshot.
+If this snapshot is destroyed, the amount of
+.Sy used
+space will be freed.
+Space that is shared by multiple snapshots isn't accounted for in this metric.
+When a snapshot is destroyed, space that was previously shared with this
+snapshot can become unique to snapshots adjacent to it, thus changing the used
+space of those snapshots.
+The used space of the latest snapshot can also be affected by changes in the
+file system.
+Note that the
+.Sy used
+space of a snapshot is a subset of the
+.Sy written
+space of the snapshot.
+.Pp
+The amount of space used, available, or referenced does not take into account
+pending changes.
+Pending changes are generally accounted for within a few seconds.
+Committing a change to a disk using
+.Xr fsync 2
+or
+.Sy O_SYNC
+does not necessarily guarantee that the space usage information is updated
+immediately.
+.It Sy usedby*
+The
+.Sy usedby*
+properties decompose the
+.Sy used
+properties into the various reasons that space is used.
+Specifically,
+.Sy used No =
+.Sy usedbychildren No +
+.Sy usedbydataset No +
+.Sy usedbyrefreservation No +
+.Sy usedbysnapshots .
+These properties are only available for datasets created on
+.Nm zpool
+.Qo version 13 Qc
+pools.
+.It Sy usedbychildren
+The amount of space used by children of this dataset, which would be freed if
+all the dataset's children were destroyed.
+.It Sy usedbydataset
+The amount of space used by this dataset itself, which would be freed if the
+dataset were destroyed
+.Po after first removing any
+.Sy refreservation
+and destroying any necessary snapshots or descendents
+.Pc .
+.It Sy usedbyrefreservation
+The amount of space used by a
+.Sy refreservation
+set on this dataset, which would be freed if the
+.Sy refreservation
+was removed.
+.It Sy usedbysnapshots
+The amount of space consumed by snapshots of this dataset.
+In particular, it is the amount of space that would be freed if all of this
+dataset's snapshots were destroyed.
+Note that this is not simply the sum of the snapshots'
+.Sy used
+properties because space can be shared by multiple snapshots.
+.It Sy userused Ns @ Ns Ar user
+The amount of space consumed by the specified user in this dataset.
+Space is charged to the owner of each file, as displayed by
+.Nm ls Fl l .
+The amount of space charged is displayed by
+.Nm du No and Nm ls Fl s .
+See the
+.Nm zfs Cm userspace
+command for more information.
+.Pp
+Unprivileged users can access only their own space usage.
+The root user, or a user who has been granted the
+.Sy userused
+privilege with
+.Nm zfs Cm allow ,
+can access everyone's usage.
+.Pp
+The
+.Sy userused Ns @ Ns Ar ...
+properties are not displayed by
+.Nm zfs Cm get Sy all .
+The user's name must be appended after the
+.Sy @
+symbol, using one of the following forms:
+.Bl -bullet -compact -offset 4n
+.It
+POSIX name
+.Pq Qq joe
+.It
+POSIX numeric ID
+.Pq Qq 789
+.It
+SID name
+.Pq Qq joe.smith@mydomain
+.It
+SID numeric ID
+.Pq Qq S-1-123-456-789
+.El
+.Pp
+Files created on Linux always have POSIX owners.
+.It Sy userobjused Ns @ Ns Ar user
+The
+.Sy userobjused
+property is similar to
+.Sy userused
+but instead it counts the number of objects consumed by a user.
+This property counts all objects allocated on behalf of the user,
+it may differ from the results of system tools such as
+.Nm df Fl i .
+.Pp
+When the property
+.Sy xattr Ns = Ns Sy on
+is set on a file system additional objects will be created per-file to store
+extended attributes.
+These additional objects are reflected in the
+.Sy userobjused
+value and are counted against the user's
+.Sy userobjquota .
+When a file system is configured to use
+.Sy xattr Ns = Ns Sy sa
+no additional internal objects are normally required.
+.It Sy userrefs
+This property is set to the number of user holds on this snapshot.
+User holds are set by using the
+.Nm zfs Cm hold
+command.
+.It Sy groupused Ns @ Ns Ar group
+The amount of space consumed by the specified group in this dataset.
+Space is charged to the group of each file, as displayed by
+.Nm ls Fl l .
+See the
+.Sy userused Ns @ Ns Ar user
+property for more information.
+.Pp
+Unprivileged users can only access their own groups' space usage.
+The root user, or a user who has been granted the
+.Sy groupused
+privilege with
+.Nm zfs Cm allow ,
+can access all groups' usage.
+.It Sy groupobjused Ns @ Ns Ar group
+The number of objects consumed by the specified group in this dataset.
+Multiple objects may be charged to the group for each file when extended
+attributes are in use.
+See the
+.Sy userobjused Ns @ Ns Ar user
+property for more information.
+.Pp
+Unprivileged users can only access their own groups' space usage.
+The root user, or a user who has been granted the
+.Sy groupobjused
+privilege with
+.Nm zfs Cm allow ,
+can access all groups' usage.
+.It Sy projectused Ns @ Ns Ar project
+The amount of space consumed by the specified project in this dataset.
+Project is identified via the project identifier (ID) that is object-based
+numeral attribute.
+An object can inherit the project ID from its parent object (if the
+parent has the flag of inherit project ID that can be set and changed via
+.Nm chattr Fl /+P
+or
+.Nm zfs project Fl s )
+when being created.
+The privileged user can set and change object's project
+ID via
+.Nm chattr Fl p
+or
+.Nm zfs project Fl s
+anytime.
+Space is charged to the project of each file, as displayed by
+.Nm lsattr Fl p
+or
+.Nm zfs project .
+See the
+.Sy userused Ns @ Ns Ar user
+property for more information.
+.Pp
+The root user, or a user who has been granted the
+.Sy projectused
+privilege with
+.Nm zfs allow ,
+can access all projects' usage.
+.It Sy projectobjused Ns @ Ns Ar project
+The
+.Sy projectobjused
+is similar to
+.Sy projectused
+but instead it counts the number of objects consumed by project.
+When the property
+.Sy xattr Ns = Ns Sy on
+is set on a fileset, ZFS will create additional objects per-file to store
+extended attributes.
+These additional objects are reflected in the
+.Sy projectobjused
+value and are counted against the project's
+.Sy projectobjquota .
+When a filesystem is configured to use
+.Sy xattr Ns = Ns Sy sa
+no additional internal objects are required.
+See the
+.Sy userobjused Ns @ Ns Ar user
+property for more information.
+.Pp
+The root user, or a user who has been granted the
+.Sy projectobjused
+privilege with
+.Nm zfs allow ,
+can access all projects' objects usage.
+.It Sy volblocksize
+For volumes, specifies the block size of the volume.
+The
+.Sy blocksize
+cannot be changed once the volume has been written, so it should be set at
+volume creation time.
+The default
+.Sy blocksize
+for volumes is 8 Kbytes.
+Any power of 2 from 512 bytes to 128 Kbytes is valid.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy volblock .
+.It Sy written
+The amount of space
+.Sy referenced
+by this dataset, that was written since the previous snapshot
+.Pq i.e. that is not referenced by the previous snapshot .
+.It Sy written Ns @ Ns Ar snapshot
+The amount of
+.Sy referenced
+space written to this dataset since the specified snapshot.
+This is the space that is referenced by this dataset but was not referenced by
+the specified snapshot.
+.Pp
+The
+.Ar snapshot
+may be specified as a short snapshot name
+.Pq just the part after the Sy @ ,
+in which case it will be interpreted as a snapshot in the same filesystem as
+this dataset.
+The
+.Ar snapshot
+may be a full snapshot name
+.Pq Ar filesystem Ns @ Ns Ar snapshot ,
+which for clones may be a snapshot in the origin's filesystem
+.Pq or the origin of the origin's filesystem, etc.
+.El
+.Pp
+The following native properties can be used to change the behavior of a ZFS
+dataset.
+.Bl -tag -width ""
+.It Xo
+.Sy aclinherit Ns = Ns Sy discard Ns | Ns Sy noallow Ns | Ns
+.Sy restricted Ns | Ns Sy passthrough Ns | Ns Sy passthrough-x
+.Xc
+Controls how ACEs are inherited when files and directories are created.
+.Bl -tag -compact -offset 4n -width "passthrough-x"
+.It Sy discard
+does not inherit any ACEs.
+.It Sy noallow
+only inherits inheritable ACEs that specify
+.Qq deny
+permissions.
+.It Sy restricted
+default, removes the
+.Sy write_acl
+and
+.Sy write_owner
+permissions when the ACE is inherited.
+.It Sy passthrough
+inherits all inheritable ACEs without any modifications.
+.It Sy passthrough-x
+same meaning as
+.Sy passthrough ,
+except that the
+.Sy owner@ , group@ , No and Sy everyone@
+ACEs inherit the execute permission only if the file creation mode also requests
+the execute bit.
+.El
+.Pp
+When the property value is set to
+.Sy passthrough ,
+files are created with a mode determined by the inheritable ACEs.
+If no inheritable ACEs exist that affect the mode, then the mode is set in
+accordance to the requested mode from the application.
+.Pp
+The
+.Sy aclinherit
+property does not apply to POSIX ACLs.
+.It Xo
+.Sy aclmode Ns = Ns Sy discard Ns | Ns Sy groupmask Ns | Ns
+.Sy passthrough Ns | Ns Sy restricted Ns
+.Xc
+Controls how an ACL is modified during chmod(2) and how inherited ACEs
+are modified by the file creation mode:
+.Bl -tag -compact -offset 4n -width "passthrough"
+.It Sy discard
+default, deletes all
+.Sy ACEs
+except for those representing
+the mode of the file or directory requested by
+.Xr chmod 2 .
+.It Sy groupmask
+reduces permissions granted in all
+.Sy ALLOW
+entries found in the
+.Sy ACL
+such that they are no greater than the group permissions specified by
+.Xr chmod 2 .
+.It Sy passthrough
+indicates that no changes are made to the ACL other than creating or updating
+the necessary ACL entries to represent the new mode of the file or directory.
+.It Sy restricted
+will cause the
+.Xr chmod 2
+operation to return an error when used on any file or directory which has
+a non-trivial ACL whose entries can not be represented by a mode.
+.Xr chmod 2
+is required to change the set user ID, set group ID, or sticky bits on a file
+or directory, as they do not have equivalent ACL entries.
+In order to use
+.Xr chmod 2
+on a file or directory with a non-trivial ACL when
+.Sy aclmode
+is set to
+.Sy restricted ,
+you must first remove all ACL entries which do not represent the current mode.
+.El
+.It Sy acltype Ns = Ns Sy off Ns | Ns Sy nfsv4 Ns | Ns Sy posix
+Controls whether ACLs are enabled and if so what type of ACL to use.
+When this property is set to a type of ACL not supported by the current
+platform, the behavior is the same as if it were set to
+.Sy off .
+.Bl -tag -compact -offset 4n -width "posixacl"
+.It Sy off
+default on Linux, when a file system has the
+.Sy acltype
+property set to off then ACLs are disabled.
+.It Sy noacl
+an alias for
+.Sy off
+.It Sy nfsv4
+default on
+.Fx ,
+indicates that NFSv4-style ZFS ACLs should be used.
+These ACLs can be managed with the
+.Xr getfacl 1
+and
+.Xr setfacl 1 .
+The
+.Sy nfsv4
+ZFS ACL type is not yet supported on Linux.
+.It Sy posix
+indicates POSIX ACLs should be used.
+POSIX ACLs are specific to Linux and are not functional on other platforms.
+POSIX ACLs are stored as an extended
+attribute and therefore will not overwrite any existing NFSv4 ACLs which
+may be set.
+.It Sy posixacl
+an alias for
+.Sy posix
+.El
+.Pp
+To obtain the best performance when setting
+.Sy posix
+users are strongly encouraged to set the
+.Sy xattr Ns = Ns Sy sa
+property.
+This will result in the POSIX ACL being stored more efficiently on disk.
+But as a consequence, all new extended attributes will only be
+accessible from OpenZFS implementations which support the
+.Sy xattr Ns = Ns Sy sa
+property.
+See the
+.Sy xattr
+property for more details.
+.It Sy atime Ns = Ns Sy on Ns | Ns Sy off
+Controls whether the access time for files is updated when they are read.
+Turning this property off avoids producing write traffic when reading files and
+can result in significant performance gains, though it might confuse mailers
+and other similar utilities.
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy atime
+and
+.Sy noatime
+mount options.
+The default value is
+.Sy on .
+See also
+.Sy relatime
+below.
+.It Sy canmount Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy noauto
+If this property is set to
+.Sy off ,
+the file system cannot be mounted, and is ignored by
+.Nm zfs Cm mount Fl a .
+Setting this property to
+.Sy off
+is similar to setting the
+.Sy mountpoint
+property to
+.Sy none ,
+except that the dataset still has a normal
+.Sy mountpoint
+property, which can be inherited.
+Setting this property to
+.Sy off
+allows datasets to be used solely as a mechanism to inherit properties.
+One example of setting
+.Sy canmount Ns = Ns Sy off
+is to have two datasets with the same
+.Sy mountpoint ,
+so that the children of both datasets appear in the same directory, but might
+have different inherited characteristics.
+.Pp
+When set to
+.Sy noauto ,
+a dataset can only be mounted and unmounted explicitly.
+The dataset is not mounted automatically when the dataset is created or
+imported, nor is it mounted by the
+.Nm zfs Cm mount Fl a
+command or unmounted by the
+.Nm zfs Cm unmount Fl a
+command.
+.Pp
+This property is not inherited.
+.It Xo
+.Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns
+.Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns
+.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr
+.Xc
+Controls the checksum used to verify data integrity.
+The default value is
+.Sy on ,
+which automatically selects an appropriate algorithm
+.Po currently,
+.Sy fletcher4 ,
+but this may change in future releases
+.Pc .
+The value
+.Sy off
+disables integrity checking on user data.
+The value
+.Sy noparity
+not only disables integrity but also disables maintaining parity for user data.
+This setting is used internally by a dump device residing on a RAID-Z pool and
+should not be used by any other dataset.
+Disabling checksums is
+.Em NOT
+a recommended practice.
+.Pp
+The
+.Sy sha512 ,
+.Sy skein ,
+and
+.Sy edonr
+checksum algorithms require enabling the appropriate features on the pool.
+.Fx
+does not support the
+.Sy edonr
+algorithm.
+.Pp
+Please see
+.Xr zpool-features 7
+for more information on these algorithms.
+.Pp
+Changing this property affects only newly-written data.
+.It Xo
+.Sy compression Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy gzip Ns | Ns
+.Sy gzip- Ns Ar N Ns | Ns Sy lz4 Ns | Ns Sy lzjb Ns | Ns Sy zle Ns | Ns Sy zstd Ns | Ns
+.Sy zstd- Ns Ar N Ns | Ns Sy zstd-fast Ns | Ns Sy zstd-fast- Ns Ar N
+.Xc
+Controls the compression algorithm used for this dataset.
+.Pp
+Setting compression to
+.Sy on
+indicates that the current default compression algorithm should be used.
+The default balances compression and decompression speed, with compression ratio
+and is expected to work well on a wide variety of workloads.
+Unlike all other settings for this property,
+.Sy on
+does not select a fixed compression type.
+As new compression algorithms are added to ZFS and enabled on a pool, the
+default compression algorithm may change.
+The current default compression algorithm is either
+.Sy lzjb
+or, if the
+.Sy lz4_compress
+feature is enabled,
+.Sy lz4 .
+.Pp
+The
+.Sy lz4
+compression algorithm is a high-performance replacement for the
+.Sy lzjb
+algorithm.
+It features significantly faster compression and decompression, as well as a
+moderately higher compression ratio than
+.Sy lzjb ,
+but can only be used on pools with the
+.Sy lz4_compress
+feature set to
+.Sy enabled .
+See
+.Xr zpool-features 7
+for details on ZFS feature flags and the
+.Sy lz4_compress
+feature.
+.Pp
+The
+.Sy lzjb
+compression algorithm is optimized for performance while providing decent data
+compression.
+.Pp
+The
+.Sy gzip
+compression algorithm uses the same compression as the
+.Xr gzip 1
+command.
+You can specify the
+.Sy gzip
+level by using the value
+.Sy gzip- Ns Ar N ,
+where
+.Ar N
+is an integer from 1
+.Pq fastest
+to 9
+.Pq best compression ratio .
+Currently,
+.Sy gzip
+is equivalent to
+.Sy gzip-6
+.Po which is also the default for
+.Xr gzip 1
+.Pc .
+.Pp
+The
+.Sy zstd
+compression algorithm provides both high compression ratios and good performance.
+You can specify the
+.Sy zstd
+level by using the value
+.Sy zstd- Ns Ar N ,
+where
+.Ar N
+is an integer from 1
+.Pq fastest
+to 19
+.Pq best compression ratio .
+.Sy zstd
+is equivalent to
+.Sy zstd-3 .
+.Pp
+Faster speeds at the cost of the compression ratio can be requested by
+setting a negative
+.Sy zstd
+level.
+This is done using
+.Sy zstd-fast- Ns Ar N ,
+where
+.Ar N
+is an integer in [1-9,10,20,30,...,100,500,1000] which maps to a negative
+.Sy zstd
+level.
+The lower the level the faster the compression -
+.Ar 1000 No provides the fastest compression and lowest compression ratio.
+.Sy zstd-fast
+is equivalent to
+.Sy zstd-fast-1 .
+.Pp
+The
+.Sy zle
+compression algorithm compresses runs of zeros.
+.Pp
+This property can also be referred to by its shortened column name
+.Sy compress .
+Changing this property affects only newly-written data.
+.Pp
+When any setting except
+.Sy off
+is selected, compression will explicitly check for blocks consisting of only
+zeroes (the NUL byte).
+When a zero-filled block is detected, it is stored as
+a hole and not compressed using the indicated compression algorithm.
+.Pp
+Any block being compressed must be no larger than 7/8 of its original size
+after compression, otherwise the compression will not be considered worthwhile
+and the block saved uncompressed.
+Note that when the logical block is less than
+8 times the disk sector size this effectively reduces the necessary compression
+ratio; for example, 8kB blocks on disks with 4kB disk sectors must compress to 1/2
+or less of their original size.
+.It Xo
+.Sy context Ns = Ns Sy none Ns | Ns
+.Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level
+.Xc
+This flag sets the SELinux context for all files in the file system under
+a mount point for that file system.
+See
+.Xr selinux 8
+for more information.
+.It Xo
+.Sy fscontext Ns = Ns Sy none Ns | Ns
+.Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level
+.Xc
+This flag sets the SELinux context for the file system file system being
+mounted.
+See
+.Xr selinux 8
+for more information.
+.It Xo
+.Sy defcontext Ns = Ns Sy none Ns | Ns
+.Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level
+.Xc
+This flag sets the SELinux default context for unlabeled files.
+See
+.Xr selinux 8
+for more information.
+.It Xo
+.Sy rootcontext Ns = Ns Sy none Ns | Ns
+.Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level
+.Xc
+This flag sets the SELinux context for the root inode of the file system.
+See
+.Xr selinux 8
+for more information.
+.It Sy copies Ns = Ns Sy 1 Ns | Ns Sy 2 Ns | Ns Sy 3
+Controls the number of copies of data stored for this dataset.
+These copies are in addition to any redundancy provided by the pool, for
+example, mirroring or RAID-Z.
+The copies are stored on different disks, if possible.
+The space used by multiple copies is charged to the associated file and dataset,
+changing the
+.Sy used
+property and counting against quotas and reservations.
+.Pp
+Changing this property only affects newly-written data.
+Therefore, set this property at file system creation time by using the
+.Fl o Sy copies Ns = Ns Ar N
+option.
+.Pp
+Remember that ZFS will not import a pool with a missing top-level vdev.
+Do
+.Em NOT
+create, for example a two-disk striped pool and set
+.Sy copies Ns = Ns Ar 2
+on some datasets thinking you have setup redundancy for them.
+When a disk fails you will not be able to import the pool
+and will have lost all of your data.
+.Pp
+Encrypted datasets may not have
+.Sy copies Ns = Ns Ar 3
+since the implementation stores some encryption metadata where the third copy
+would normally be.
+.It Sy devices Ns = Ns Sy on Ns | Ns Sy off
+Controls whether device nodes can be opened on this file system.
+The default value is
+.Sy on .
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy dev
+and
+.Sy nodev
+mount options.
+.It Xo
+.Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns
+.Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns
+.Sy edonr , Ns Sy verify
+.Xc
+Configures deduplication for a dataset.
+The default value is
+.Sy off .
+The default deduplication checksum is
+.Sy sha256
+(this may change in the future).
+When
+.Sy dedup
+is enabled, the checksum defined here overrides the
+.Sy checksum
+property.
+Setting the value to
+.Sy verify
+has the same effect as the setting
+.Sy sha256 , Ns Sy verify .
+.Pp
+If set to
+.Sy verify ,
+ZFS will do a byte-to-byte comparison in case of two blocks having the same
+signature to make sure the block contents are identical.
+Specifying
+.Sy verify
+is mandatory for the
+.Sy edonr
+algorithm.
+.Pp
+Unless necessary, deduplication should
+.Em not
+be enabled on a system.
+See the
+.Sx Deduplication
+section of
+.Xr zfsconcepts 7 .
+.It Xo
+.Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns
+.Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k
+.Xc
+Specifies a compatibility mode or literal value for the size of dnodes in the
+file system.
+The default value is
+.Sy legacy .
+Setting this property to a value other than
+.Sy legacy No requires the Sy large_dnode No pool feature to be enabled.
+.Pp
+Consider setting
+.Sy dnodesize
+to
+.Sy auto
+if the dataset uses the
+.Sy xattr Ns = Ns Sy sa
+property setting and the workload makes heavy use of extended attributes.
+This
+may be applicable to SELinux-enabled systems, Lustre servers, and Samba
+servers, for example.
+Literal values are supported for cases where the optimal
+size is known in advance and for performance testing.
+.Pp
+Leave
+.Sy dnodesize
+set to
+.Sy legacy
+if you need to receive a send stream of this dataset on a pool that doesn't
+enable the
+.Sy large_dnode
+feature, or if you need to import this pool on a system that doesn't support the
+.Sy large_dnode No feature.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy dnsize .
+.It Xo
+.Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns
+.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns
+.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm
+.Xc
+Controls the encryption cipher suite (block cipher, key length, and mode) used
+for this dataset.
+Requires the
+.Sy encryption
+feature to be enabled on the pool.
+Requires a
+.Sy keyformat
+to be set at dataset creation time.
+.Pp
+Selecting
+.Sy encryption Ns = Ns Sy on
+when creating a dataset indicates that the default encryption suite will be
+selected, which is currently
+.Sy aes-256-gcm .
+In order to provide consistent data protection, encryption must be specified at
+dataset creation time and it cannot be changed afterwards.
+.Pp
+For more details and caveats about encryption see the
+.Sx Encryption
+section of
+.Xr zfs-load-key 8 .
+.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase
+Controls what format the user's encryption key will be provided as.
+This property is only set when the dataset is encrypted.
+.Pp
+Raw keys and hex keys must be 32 bytes long (regardless of the chosen
+encryption suite) and must be randomly generated.
+A raw key can be generated with the following command:
+.Dl # Nm dd Sy if=/dev/urandom bs=32 count=1 Sy of= Ns Pa /path/to/output/key
+.Pp
+Passphrases must be between 8 and 512 bytes long and will be processed through
+PBKDF2 before being used (see the
+.Sy pbkdf2iters
+property).
+Even though the encryption suite cannot be changed after dataset creation,
+the keyformat can be with
+.Nm zfs Cm change-key .
+.It Xo
+.Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Em </absolute/file/path> Ns | Ns Sy https:// Ns Em <address> | Ns Sy http:// Ns Em <address>
+.Xc
+Controls where the user's encryption key will be loaded from by default for
+commands such as
+.Nm zfs Cm load-key
+and
+.Nm zfs Cm mount Fl l .
+This property is only set for encrypted datasets which are encryption roots.
+If unspecified, the default is
+.Sy prompt .
+.Pp
+Even though the encryption suite cannot be changed after dataset creation, the
+keylocation can be with either
+.Nm zfs Cm set
+or
+.Nm zfs Cm change-key .
+If
+.Sy prompt
+is selected ZFS will ask for the key at the command prompt when it is required
+to access the encrypted data (see
+.Nm zfs Cm load-key
+for details).
+This setting will also allow the key to be passed in via the standard input stream,
+but users should be careful not to place keys which should be kept secret on
+the command line.
+If a file URI is selected, the key will be loaded from the
+specified absolute file path.
+If an HTTPS or HTTP URL is selected, it will be GETted using
+.Xr fetch 3 ,
+libcurl, or nothing, depending on compile-time configuration and run-time
+availability.
+The
+.Ev SSL_CA_CERT_FILE
+environment variable can be set to set the location
+of the concatenated certificate store.
+The
+.Ev SSL_CA_CERT_PATH
+environment variable can be set to override the location
+of the directory containing the certificate authority bundle.
+The
+.Ev SSL_CLIENT_CERT_FILE
+and
+.Ev SSL_CLIENT_KEY_FILE
+environment variables can be set to configure the path
+to the client certificate and its key.
+.It Sy pbkdf2iters Ns = Ns Ar iterations
+Controls the number of PBKDF2 iterations that a
+.Sy passphrase
+encryption key should be run through when processing it into an encryption key.
+This property is only defined when encryption is enabled and a keyformat of
+.Sy passphrase
+is selected.
+The goal of PBKDF2 is to significantly increase the
+computational difficulty needed to brute force a user's passphrase.
+This is accomplished by forcing the attacker to run each passphrase through a
+computationally expensive hashing function many times before they arrive at the
+resulting key.
+A user who actually knows the passphrase will only have to pay this cost once.
+As CPUs become better at processing, this number should be
+raised to ensure that a brute force attack is still not possible.
+The current default is
+.Sy 350000
+and the minimum is
+.Sy 100000 .
+This property may be changed with
+.Nm zfs Cm change-key .
+.It Sy exec Ns = Ns Sy on Ns | Ns Sy off
+Controls whether processes can be executed from within this file system.
+The default value is
+.Sy on .
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy exec
+and
+.Sy noexec
+mount options.
+.It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none
+Limits the number of filesystems and volumes that can exist under this point in
+the dataset tree.
+The limit is not enforced if the user is allowed to change the limit.
+Setting a
+.Sy filesystem_limit
+to
+.Sy on
+a descendent of a filesystem that already has a
+.Sy filesystem_limit
+does not override the ancestor's
+.Sy filesystem_limit ,
+but rather imposes an additional limit.
+This feature must be enabled to be used
+.Po see
+.Xr zpool-features 7
+.Pc .
+.It Sy special_small_blocks Ns = Ns Ar size
+This value represents the threshold block size for including small file
+blocks into the special allocation class.
+Blocks smaller than or equal to this
+value will be assigned to the special allocation class while greater blocks
+will be assigned to the regular class.
+Valid values are zero or a power of two from 512B up to 1M.
+The default size is 0 which means no small file blocks
+will be allocated in the special class.
+.Pp
+Before setting this property, a special class vdev must be added to the
+pool.
+See
+.Xr zpoolconcepts 7
+for more details on the special allocation class.
+.It Sy mountpoint Ns = Ns Pa path Ns | Ns Sy none Ns | Ns Sy legacy
+Controls the mount point used for this file system.
+See the
+.Sx Mount Points
+section of
+.Xr zfsconcepts 7
+for more information on how this property is used.
+.Pp
+When the
+.Sy mountpoint
+property is changed for a file system, the file system and any children that
+inherit the mount point are unmounted.
+If the new value is
+.Sy legacy ,
+then they remain unmounted.
+Otherwise, they are automatically remounted in the new location if the property
+was previously
+.Sy legacy
+or
+.Sy none ,
+or if they were mounted before the property was changed.
+In addition, any shared file systems are unshared and shared in the new
+location.
+.It Sy nbmand Ns = Ns Sy on Ns | Ns Sy off
+Controls whether the file system should be mounted with
+.Sy nbmand
+.Pq Non-blocking mandatory locks .
+This is used for SMB clients.
+Changes to this property only take effect when the file system is umounted and
+remounted.
+Support for these locks is scarce and not described by POSIX.
+.It Sy overlay Ns = Ns Sy on Ns | Ns Sy off
+Allow mounting on a busy directory or a directory which already contains
+files or directories.
+This is the default mount behavior for Linux and
+.Fx
+file systems.
+On these platforms the property is
+.Sy on
+by default.
+Set to
+.Sy off
+to disable overlay mounts for consistency with OpenZFS on other platforms.
+.It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata
+Controls what is cached in the primary cache
+.Pq ARC .
+If this property is set to
+.Sy all ,
+then both user data and metadata is cached.
+If this property is set to
+.Sy none ,
+then neither user data nor metadata is cached.
+If this property is set to
+.Sy metadata ,
+then only metadata is cached.
+The default value is
+.Sy all .
+.It Sy quota Ns = Ns Ar size Ns | Ns Sy none
+Limits the amount of space a dataset and its descendents can consume.
+This property enforces a hard limit on the amount of space used.
+This includes all space consumed by descendents, including file systems and
+snapshots.
+Setting a quota on a descendent of a dataset that already has a quota does not
+override the ancestor's quota, but rather imposes an additional limit.
+.Pp
+Quotas cannot be set on volumes, as the
+.Sy volsize
+property acts as an implicit quota.
+.It Sy snapshot_limit Ns = Ns Ar count Ns | Ns Sy none
+Limits the number of snapshots that can be created on a dataset and its
+descendents.
+Setting a
+.Sy snapshot_limit
+on a descendent of a dataset that already has a
+.Sy snapshot_limit
+does not override the ancestor's
+.Sy snapshot_limit ,
+but rather imposes an additional limit.
+The limit is not enforced if the user is allowed to change the limit.
+For example, this means that recursive snapshots taken from the global zone are
+counted against each delegated dataset within a zone.
+This feature must be enabled to be used
+.Po see
+.Xr zpool-features 7
+.Pc .
+.It Sy userquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none
+Limits the amount of space consumed by the specified user.
+User space consumption is identified by the
+.Sy userspace@ Ns Ar user
+property.
+.Pp
+Enforcement of user quotas may be delayed by several seconds.
+This delay means that a user might exceed their quota before the system notices
+that they are over quota and begins to refuse additional writes with the
+.Er EDQUOT
+error message.
+See the
+.Nm zfs Cm userspace
+command for more information.
+.Pp
+Unprivileged users can only access their own groups' space usage.
+The root user, or a user who has been granted the
+.Sy userquota
+privilege with
+.Nm zfs Cm allow ,
+can get and set everyone's quota.
+.Pp
+This property is not available on volumes, on file systems before version 4, or
+on pools before version 15.
+The
+.Sy userquota@ Ns Ar ...
+properties are not displayed by
+.Nm zfs Cm get Sy all .
+The user's name must be appended after the
+.Sy @
+symbol, using one of the following forms:
+.Bl -bullet -compact -offset 4n
+.It
+POSIX name
+.Pq Qq joe
+.It
+POSIX numeric ID
+.Pq Qq 789
+.It
+SID name
+.Pq Qq joe.smith@mydomain
+.It
+SID numeric ID
+.Pq Qq S-1-123-456-789
+.El
+.Pp
+Files created on Linux always have POSIX owners.
+.It Sy userobjquota@ Ns Ar user Ns = Ns Ar size Ns | Ns Sy none
+The
+.Sy userobjquota
+is similar to
+.Sy userquota
+but it limits the number of objects a user can create.
+Please refer to
+.Sy userobjused
+for more information about how objects are counted.
+.It Sy groupquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none
+Limits the amount of space consumed by the specified group.
+Group space consumption is identified by the
+.Sy groupused@ Ns Ar group
+property.
+.Pp
+Unprivileged users can access only their own groups' space usage.
+The root user, or a user who has been granted the
+.Sy groupquota
+privilege with
+.Nm zfs Cm allow ,
+can get and set all groups' quotas.
+.It Sy groupobjquota@ Ns Ar group Ns = Ns Ar size Ns | Ns Sy none
+The
+.Sy groupobjquota
+is similar to
+.Sy groupquota
+but it limits number of objects a group can consume.
+Please refer to
+.Sy userobjused
+for more information about how objects are counted.
+.It Sy projectquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none
+Limits the amount of space consumed by the specified project.
+Project space consumption is identified by the
+.Sy projectused@ Ns Ar project
+property.
+Please refer to
+.Sy projectused
+for more information about how project is identified and set/changed.
+.Pp
+The root user, or a user who has been granted the
+.Sy projectquota
+privilege with
+.Nm zfs allow ,
+can access all projects' quota.
+.It Sy projectobjquota@ Ns Ar project Ns = Ns Ar size Ns | Ns Sy none
+The
+.Sy projectobjquota
+is similar to
+.Sy projectquota
+but it limits number of objects a project can consume.
+Please refer to
+.Sy userobjused
+for more information about how objects are counted.
+.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off
+Controls whether this dataset can be modified.
+The default value is
+.Sy off .
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy ro
+and
+.Sy rw
+mount options.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy rdonly .
+.It Sy recordsize Ns = Ns Ar size
+Specifies a suggested block size for files in the file system.
+This property is designed solely for use with database workloads that access
+files in fixed-size records.
+ZFS automatically tunes block sizes according to internal algorithms optimized
+for typical access patterns.
+.Pp
+For databases that create very large files but access them in small random
+chunks, these algorithms may be suboptimal.
+Specifying a
+.Sy recordsize
+greater than or equal to the record size of the database can result in
+significant performance gains.
+Use of this property for general purpose file systems is strongly discouraged,
+and may adversely affect performance.
+.Pp
+The size specified must be a power of two greater than or equal to
+.Ar 512B
+and less than or equal to
+.Ar 128kB .
+If the
+.Sy large_blocks
+feature is enabled on the pool, the size may be up to
+.Ar 1MB .
+See
+.Xr zpool-features 7
+for details on ZFS feature flags.
+.Pp
+Changing the file system's
+.Sy recordsize
+affects only files created afterward; existing files are unaffected.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy recsize .
+.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most Ns | Ns Sy some Ns | Ns Sy none
+Controls what types of metadata are stored redundantly.
+ZFS stores an extra copy of metadata, so that if a single block is corrupted,
+the amount of user data lost is limited.
+This extra copy is in addition to any redundancy provided at the pool level
+.Pq e.g. by mirroring or RAID-Z ,
+and is in addition to an extra copy specified by the
+.Sy copies
+property
+.Pq up to a total of 3 copies .
+For example if the pool is mirrored,
+.Sy copies Ns = Ns 2 ,
+and
+.Sy redundant_metadata Ns = Ns Sy most ,
+then ZFS stores 6 copies of most metadata, and 4 copies of data and some
+metadata.
+.Pp
+When set to
+.Sy all ,
+ZFS stores an extra copy of all metadata.
+If a single on-disk block is corrupt, at worst a single block of user data
+.Po which is
+.Sy recordsize
+bytes long
+.Pc
+can be lost.
+.Pp
+When set to
+.Sy most ,
+ZFS stores an extra copy of most types of metadata.
+This can improve performance of random writes, because less metadata must be
+written.
+In practice, at worst about 1000 blocks
+.Po of
+.Sy recordsize
+bytes each
+.Pc
+of user data can be lost if a single on-disk block is corrupt.
+The exact behavior of which metadata blocks are stored redundantly may change in
+future releases.
+.Pp
+When set to
+.Sy some ,
+ZFS stores an extra copy of only critical metadata.
+This can improve file create performance since less metadata needs to be written.
+If a single on-disk block is corrupt, at worst a single user file can be lost.
+.Pp
+When set to
+.Sy none ,
+ZFS does not store any copies of metadata redundantly.
+If a single on-disk block is corrupt, an entire dataset can be lost.
+.Pp
+The default value is
+.Sy all .
+.It Sy refquota Ns = Ns Ar size Ns | Ns Sy none
+Limits the amount of space a dataset can consume.
+This property enforces a hard limit on the amount of space used.
+This hard limit does not include space used by descendents, including file
+systems and snapshots.
+.It Sy refreservation Ns = Ns Ar size Ns | Ns Sy none Ns | Ns Sy auto
+The minimum amount of space guaranteed to a dataset, not including its
+descendents.
+When the amount of space used is below this value, the dataset is treated as if
+it were taking up the amount of space specified by
+.Sy refreservation .
+The
+.Sy refreservation
+reservation is accounted for in the parent datasets' space used, and counts
+against the parent datasets' quotas and reservations.
+.Pp
+If
+.Sy refreservation
+is set, a snapshot is only allowed if there is enough free pool space outside of
+this reservation to accommodate the current number of
+.Qq referenced
+bytes in the dataset.
+.Pp
+If
+.Sy refreservation
+is set to
+.Sy auto ,
+a volume is thick provisioned
+.Po or
+.Qq not sparse
+.Pc .
+.Sy refreservation Ns = Ns Sy auto
+is only supported on volumes.
+See
+.Sy volsize
+in the
+.Sx Native Properties
+section for more information about sparse volumes.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy refreserv .
+.It Sy relatime Ns = Ns Sy on Ns | Ns Sy off
+Controls the manner in which the access time is updated when
+.Sy atime Ns = Ns Sy on
+is set.
+Turning this property on causes the access time to be updated relative
+to the modify or change time.
+Access time is only updated if the previous
+access time was earlier than the current modify or change time or if the
+existing access time hasn't been updated within the past 24 hours.
+The default value is
+.Sy off .
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy relatime
+and
+.Sy norelatime
+mount options.
+.It Sy reservation Ns = Ns Ar size Ns | Ns Sy none
+The minimum amount of space guaranteed to a dataset and its descendants.
+When the amount of space used is below this value, the dataset is treated as if
+it were taking up the amount of space specified by its reservation.
+Reservations are accounted for in the parent datasets' space used, and count
+against the parent datasets' quotas and reservations.
+.Pp
+This property can also be referred to by its shortened column name,
+.Sy reserv .
+.It Sy secondarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata
+Controls what is cached in the secondary cache
+.Pq L2ARC .
+If this property is set to
+.Sy all ,
+then both user data and metadata is cached.
+If this property is set to
+.Sy none ,
+then neither user data nor metadata is cached.
+If this property is set to
+.Sy metadata ,
+then only metadata is cached.
+The default value is
+.Sy all .
+.It Sy setuid Ns = Ns Sy on Ns | Ns Sy off
+Controls whether the setuid bit is respected for the file system.
+The default value is
+.Sy on .
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy suid
+and
+.Sy nosuid
+mount options.
+.It Sy sharesmb Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts
+Controls whether the file system is shared by using
+.Sy Samba USERSHARES
+and what options are to be used.
+Otherwise, the file system is automatically shared and unshared with the
+.Nm zfs Cm share
+and
+.Nm zfs Cm unshare
+commands.
+If the property is set to on, the
+.Xr net 8
+command is invoked to create a
+.Sy USERSHARE .
+.Pp
+Because SMB shares requires a resource name, a unique resource name is
+constructed from the dataset name.
+The constructed name is a copy of the
+dataset name except that the characters in the dataset name, which would be
+invalid in the resource name, are replaced with underscore (_) characters.
+Linux does not currently support additional options which might be available
+on Solaris.
+.Pp
+If the
+.Sy sharesmb
+property is set to
+.Sy off ,
+the file systems are unshared.
+.Pp
+The share is created with the ACL (Access Control List) "Everyone:F" ("F"
+stands for "full permissions", i.e. read and write permissions) and no guest
+access (which means Samba must be able to authenticate a real user, system
+passwd/shadow, LDAP or smbpasswd based) by default.
+This means that any additional access control
+(disallow specific user specific access etc) must be done on the underlying file system.
+.It Sy sharenfs Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Ar opts
+Controls whether the file system is shared via NFS, and what options are to be
+used.
+A file system with a
+.Sy sharenfs
+property of
+.Sy off
+is managed with the
+.Xr exportfs 8
+command and entries in the
+.Pa /etc/exports
+file.
+Otherwise, the file system is automatically shared and unshared with the
+.Nm zfs Cm share
+and
+.Nm zfs Cm unshare
+commands.
+If the property is set to
+.Sy on ,
+the dataset is shared using the default options:
+.Dl sec=sys,rw,crossmnt,no_subtree_check
+.Pp
+Please note that the options are comma-separated, unlike those found in
+.Xr exports 5 .
+This is done to negate the need for quoting, as well as to make parsing
+with scripts easier.
+.Pp
+See
+.Xr exports 5
+for the meaning of the default options.
+Otherwise, the
+.Xr exportfs 8
+command is invoked with options equivalent to the contents of this property.
+.Pp
+When the
+.Sy sharenfs
+property is changed for a dataset, the dataset and any children inheriting the
+property are re-shared with the new options, only if the property was previously
+.Sy off ,
+or if they were shared before the property was changed.
+If the new property is
+.Sy off ,
+the file systems are unshared.
+.It Sy logbias Ns = Ns Sy latency Ns | Ns Sy throughput
+Provide a hint to ZFS about handling of synchronous requests in this dataset.
+If
+.Sy logbias
+is set to
+.Sy latency
+.Pq the default ,
+ZFS will use pool log devices
+.Pq if configured
+to handle the requests at low latency.
+If
+.Sy logbias
+is set to
+.Sy throughput ,
+ZFS will not use configured pool log devices.
+ZFS will instead optimize synchronous operations for global pool throughput and
+efficient use of resources.
+.It Sy snapdev Ns = Ns Sy hidden Ns | Ns Sy visible
+Controls whether the volume snapshot devices under
+.Pa /dev/zvol/ Ns Aq Ar pool
+are hidden or visible.
+The default value is
+.Sy hidden .
+.It Sy snapdir Ns = Ns Sy hidden Ns | Ns Sy visible
+Controls whether the
+.Pa .zfs
+directory is hidden or visible in the root of the file system as discussed in
+the
+.Sx Snapshots
+section of
+.Xr zfsconcepts 7 .
+The default value is
+.Sy hidden .
+.It Sy sync Ns = Ns Sy standard Ns | Ns Sy always Ns | Ns Sy disabled
+Controls the behavior of synchronous requests
+.Pq e.g. fsync, O_DSYNC .
+.Sy standard
+is the POSIX-specified behavior of ensuring all synchronous requests
+are written to stable storage and all devices are flushed to ensure
+data is not cached by device controllers
+.Pq this is the default .
+.Sy always
+causes every file system transaction to be written and flushed before its
+system call returns.
+This has a large performance penalty.
+.Sy disabled
+disables synchronous requests.
+File system transactions are only committed to stable storage periodically.
+This option will give the highest performance.
+However, it is very dangerous as ZFS would be ignoring the synchronous
+transaction demands of applications such as databases or NFS.
+Administrators should only use this option when the risks are understood.
+.It Sy version Ns = Ns Ar N Ns | Ns Sy current
+The on-disk version of this file system, which is independent of the pool
+version.
+This property can only be set to later supported versions.
+See the
+.Nm zfs Cm upgrade
+command.
+.It Sy volsize Ns = Ns Ar size
+For volumes, specifies the logical size of the volume.
+By default, creating a volume establishes a reservation of equal size.
+For storage pools with a version number of 9 or higher, a
+.Sy refreservation
+is set instead.
+Any changes to
+.Sy volsize
+are reflected in an equivalent change to the reservation
+.Pq or Sy refreservation .
+The
+.Sy volsize
+can only be set to a multiple of
+.Sy volblocksize ,
+and cannot be zero.
+.Pp
+The reservation is kept equal to the volume's logical size to prevent unexpected
+behavior for consumers.
+Without the reservation, the volume could run out of space, resulting in
+undefined behavior or data corruption, depending on how the volume is used.
+These effects can also occur when the volume size is changed while it is in use
+.Pq particularly when shrinking the size .
+Extreme care should be used when adjusting the volume size.
+.Pp
+Though not recommended, a
+.Qq sparse volume
+.Po also known as
+.Qq thin provisioned
+.Pc
+can be created by specifying the
+.Fl s
+option to the
+.Nm zfs Cm create Fl V
+command, or by changing the value of the
+.Sy refreservation
+property
+.Po or
+.Sy reservation
+property on pool version 8 or earlier
+.Pc
+after the volume has been created.
+A
+.Qq sparse volume
+is a volume where the value of
+.Sy refreservation
+is less than the size of the volume plus the space required to store its
+metadata.
+Consequently, writes to a sparse volume can fail with
+.Er ENOSPC
+when the pool is low on space.
+For a sparse volume, changes to
+.Sy volsize
+are not reflected in the
+.Sy refreservation .
+A volume that is not sparse is said to be
+.Qq thick provisioned .
+A sparse volume can become thick provisioned by setting
+.Sy refreservation
+to
+.Sy auto .
+.It Sy volmode Ns = Ns Sy default Ns | Ns Sy full Ns | Ns Sy geom Ns | Ns Sy dev Ns | Ns Sy none
+This property specifies how volumes should be exposed to the OS.
+Setting it to
+.Sy full
+exposes volumes as fully fledged block devices, providing maximal
+functionality.
+The value
+.Sy geom
+is just an alias for
+.Sy full
+and is kept for compatibility.
+Setting it to
+.Sy dev
+hides its partitions.
+Volumes with property set to
+.Sy none
+are not exposed outside ZFS, but can be snapshotted, cloned, replicated, etc,
+that can be suitable for backup purposes.
+Value
+.Sy default
+means that volumes exposition is controlled by system-wide tunable
+.Sy zvol_volmode ,
+where
+.Sy full ,
+.Sy dev
+and
+.Sy none
+are encoded as 1, 2 and 3 respectively.
+The default value is
+.Sy full .
+.It Sy vscan Ns = Ns Sy on Ns | Ns Sy off
+Controls whether regular files should be scanned for viruses when a file is
+opened and closed.
+In addition to enabling this property, the virus scan service must also be
+enabled for virus scanning to occur.
+The default value is
+.Sy off .
+This property is not used on Linux.
+.It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy sa
+Controls whether extended attributes are enabled for this file system.
+Two styles of extended attributes are supported: either directory based
+or system attribute based.
+.Pp
+The default value of
+.Sy on
+enables directory based extended attributes.
+This style of extended attribute imposes no practical limit
+on either the size or number of attributes which can be set on a file.
+Although under Linux the
+.Xr getxattr 2
+and
+.Xr setxattr 2
+system calls limit the maximum size to 64K.
+This is the most compatible
+style of extended attribute and is supported by all ZFS implementations.
+.Pp
+System attribute based xattrs can be enabled by setting the value to
+.Sy sa .
+The key advantage of this type of xattr is improved performance.
+Storing extended attributes as system attributes
+significantly decreases the amount of disk IO required.
+Up to 64K of data may be stored per-file in the space reserved for system attributes.
+If there is not enough space available for an extended attribute
+then it will be automatically written as a directory based xattr.
+System attribute based extended attributes are not accessible
+on platforms which do not support the
+.Sy xattr Ns = Ns Sy sa
+feature.
+.Pp
+The use of system attribute based xattrs is strongly encouraged for users of
+SELinux or POSIX ACLs.
+Both of these features heavily rely on extended
+attributes and benefit significantly from the reduced access time.
+.Pp
+The values
+.Sy on
+and
+.Sy off
+are equivalent to the
+.Sy xattr
+and
+.Sy noxattr
+mount options.
+.It Sy jailed Ns = Ns Sy off Ns | Ns Sy on
+Controls whether the dataset is managed from a jail.
+See
+.Xr zfs-jail 8
+for more information.
+Jails are a
+.Fx
+feature and are not relevant on other platforms.
+The default value is
+.Sy off .
+.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
+Controls whether the dataset is managed from a non-global zone.
+Zones are a Solaris feature and are not relevant on other platforms.
+The default value is
+.Sy off .
+.El
+.Pp
+The following three properties cannot be changed after the file system is
+created, and therefore, should be set when the file system is created.
+If the properties are not set with the
+.Nm zfs Cm create
+or
+.Nm zpool Cm create
+commands, these properties are inherited from the parent dataset.
+If the parent dataset lacks these properties due to having been created prior to
+these features being supported, the new file system will have the default values
+for these properties.
+.Bl -tag -width ""
+.It Xo
+.Sy casesensitivity Ns = Ns Sy sensitive Ns | Ns
+.Sy insensitive Ns | Ns Sy mixed
+.Xc
+Indicates whether the file name matching algorithm used by the file system
+should be case-sensitive, case-insensitive, or allow a combination of both
+styles of matching.
+The default value for the
+.Sy casesensitivity
+property is
+.Sy sensitive .
+Traditionally,
+.Ux
+and POSIX file systems have case-sensitive file names.
+.Pp
+The
+.Sy mixed
+value for the
+.Sy casesensitivity
+property indicates that the file system can support requests for both
+case-sensitive and case-insensitive matching behavior.
+Currently, case-insensitive matching behavior on a file system that supports
+mixed behavior is limited to the SMB server product.
+For more information about the
+.Sy mixed
+value behavior, see the "ZFS Administration Guide".
+.It Xo
+.Sy normalization Ns = Ns Sy none Ns | Ns Sy formC Ns | Ns
+.Sy formD Ns | Ns Sy formKC Ns | Ns Sy formKD
+.Xc
+Indicates whether the file system should perform a
+.Sy unicode
+normalization of file names whenever two file names are compared, and which
+normalization algorithm should be used.
+File names are always stored unmodified, names are normalized as part of any
+comparison process.
+If this property is set to a legal value other than
+.Sy none ,
+and the
+.Sy utf8only
+property was left unspecified, the
+.Sy utf8only
+property is automatically set to
+.Sy on .
+The default value of the
+.Sy normalization
+property is
+.Sy none .
+This property cannot be changed after the file system is created.
+.It Sy utf8only Ns = Ns Sy on Ns | Ns Sy off
+Indicates whether the file system should reject file names that include
+characters that are not present in the
+.Sy UTF-8
+character code set.
+If this property is explicitly set to
+.Sy off ,
+the normalization property must either not be explicitly set or be set to
+.Sy none .
+The default value for the
+.Sy utf8only
+property is
+.Sy off .
+This property cannot be changed after the file system is created.
+.El
+.Pp
+The
+.Sy casesensitivity ,
+.Sy normalization ,
+and
+.Sy utf8only
+properties are also new permissions that can be assigned to non-privileged users
+by using the ZFS delegated administration feature.
+.
+.Ss Temporary Mount Point Properties
+When a file system is mounted, either through
+.Xr mount 8
+for legacy mounts or the
+.Nm zfs Cm mount
+command for normal file systems, its mount options are set according to its
+properties.
+The correlation between properties and mount options is as follows:
+.Bl -tag -compact -offset Ds -width "rootcontext="
+.It Sy atime
+atime/noatime
+.It Sy canmount
+auto/noauto
+.It Sy devices
+dev/nodev
+.It Sy exec
+exec/noexec
+.It Sy readonly
+ro/rw
+.It Sy relatime
+relatime/norelatime
+.It Sy setuid
+suid/nosuid
+.It Sy xattr
+xattr/noxattr
+.It Sy nbmand
+mand/nomand
+.It Sy context Ns =
+context=
+.It Sy fscontext Ns =
+fscontext=
+.It Sy defcontext Ns =
+defcontext=
+.It Sy rootcontext Ns =
+rootcontext=
+.El
+.Pp
+In addition, these options can be set on a per-mount basis using the
+.Fl o
+option, without affecting the property that is stored on disk.
+The values specified on the command line override the values stored in the
+dataset.
+The
+.Sy nosuid
+option is an alias for
+.Sy nodevices , Ns Sy nosetuid .
+These properties are reported as
+.Qq temporary
+by the
+.Nm zfs Cm get
+command.
+If the properties are changed while the dataset is mounted, the new setting
+overrides any temporary settings.
+.
+.Ss User Properties
+In addition to the standard native properties, ZFS supports arbitrary user
+properties.
+User properties have no effect on ZFS behavior, but applications or
+administrators can use them to annotate datasets
+.Pq file systems, volumes, and snapshots .
+.Pp
+User property names must contain a colon
+.Pq Qq Sy \&:
+character to distinguish them from native properties.
+They may contain lowercase letters, numbers, and the following punctuation
+characters: colon
+.Pq Qq Sy \&: ,
+dash
+.Pq Qq Sy - ,
+period
+.Pq Qq Sy \&. ,
+and underscore
+.Pq Qq Sy _ .
+The expected convention is that the property name is divided into two portions
+such as
+.Ar module : Ns Ar property ,
+but this namespace is not enforced by ZFS.
+User property names can be at most 256 characters, and cannot begin with a dash
+.Pq Qq Sy - .
+.Pp
+When making programmatic use of user properties, it is strongly suggested to use
+a reversed DNS domain name for the
+.Ar module
+component of property names to reduce the chance that two
+independently-developed packages use the same property name for different
+purposes.
+.Pp
+The values of user properties are arbitrary strings, are always inherited, and
+are never validated.
+All of the commands that operate on properties
+.Po Nm zfs Cm list ,
+.Nm zfs Cm get ,
+.Nm zfs Cm set ,
+and so forth
+.Pc
+can be used to manipulate both native properties and user properties.
+Use the
+.Nm zfs Cm inherit
+command to clear a user property.
+If the property is not defined in any parent dataset, it is removed entirely.
+Property values are limited to 8192 bytes.

diff --git a/zfs/man/man7/zpool-features.7 b/zfs/man/man7/zpool-features.7
new file mode 100644
index 0000000..83ca911
--- /dev/null
+++ b/zfs/man/man7/zpool-features.7

@@ -0,0 +1,842 @@
+.\"
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" The contents of this file are subject to the terms of the Common Development
+.\" and Distribution License (the "License").  You may not use this file except
+.\" in compliance with the License. You can obtain a copy of the license at
+.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\"
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License. When distributing Covered Code, include this
+.\" CDDL HEADER in each file and include the License file at
+.\" usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this
+.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
+.\" own identifying information:
+.\" Portions Copyright [yyyy] [name of copyright owner]
+.\" Copyright (c) 2019, Klara Inc.
+.\" Copyright (c) 2019, Allan Jude
+.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+.\"
+.Dd May 31, 2021
+.Dt ZPOOL-FEATURES 7
+.Os
+.
+.Sh NAME
+.Nm zpool-features
+.Nd description of ZFS pool features
+.
+.Sh DESCRIPTION
+ZFS pool on-disk format versions are specified via "features" which replace
+the old on-disk format numbers (the last supported on-disk format number is 28).
+To enable a feature on a pool use the
+.Nm zpool Cm upgrade ,
+or set the
+.Sy feature Ns @ Ns Ar feature-name
+property to
+.Sy enabled .
+Please also see the
+.Sx Compatibility feature sets
+section for information on how sets of features may be enabled together.
+.Pp
+The pool format does not affect file system version compatibility or the ability
+to send file systems between pools.
+.Pp
+Since most features can be enabled independently of each other, the on-disk
+format of the pool is specified by the set of all features marked as
+.Sy active
+on the pool.
+If the pool was created by another software version
+this set may include unsupported features.
+.
+.Ss Identifying features
+Every feature has a GUID of the form
+.Ar com.example : Ns Ar feature-name .
+The reversed DNS name ensures that the feature's GUID is unique across all ZFS
+implementations.
+When unsupported features are encountered on a pool they will
+be identified by their GUIDs.
+Refer to the documentation for the ZFS
+implementation that created the pool for information about those features.
+.Pp
+Each supported feature also has a short name.
+By convention a feature's short name is the portion of its GUID which follows the
+.Sq \&:
+(i.e.
+.Ar com.example : Ns Ar feature-name
+would have the short name
+.Ar feature-name ) ,
+however a feature's short name may differ across ZFS implementations if
+following the convention would result in name conflicts.
+.
+.Ss Feature states
+Features can be in one of three states:
+.Bl -tag -width "disabled"
+.It Sy active
+This feature's on-disk format changes are in effect on the pool.
+Support for this feature is required to import the pool in read-write mode.
+If this feature is not read-only compatible,
+support is also required to import the pool in read-only mode
+.Pq see Sx Read-only compatibility .
+.It Sy enabled
+An administrator has marked this feature as enabled on the pool, but the
+feature's on-disk format changes have not been made yet.
+The pool can still be imported by software that does not support this feature,
+but changes may be made to the on-disk format at any time
+which will move the feature to the
+.Sy active
+state.
+Some features may support returning to the
+.Sy enabled
+state after becoming
+.Sy active .
+See feature-specific documentation for details.
+.It Sy disabled
+This feature's on-disk format changes have not been made and will not be made
+unless an administrator moves the feature to the
+.Sy enabled
+state.
+Features cannot be disabled once they have been enabled.
+.El
+.Pp
+The state of supported features is exposed through pool properties of the form
+.Sy feature Ns @ Ns Ar short-name .
+.
+.Ss Read-only compatibility
+Some features may make on-disk format changes that do not interfere with other
+software's ability to read from the pool.
+These features are referred to as
+.Dq read-only compatible .
+If all unsupported features on a pool are read-only compatible,
+the pool can be imported in read-only mode by setting the
+.Sy readonly
+property during import (see
+.Xr zpool-import 8
+for details on importing pools).
+.
+.Ss Unsupported features
+For each unsupported feature enabled on an imported pool, a pool property
+named
+.Sy unsupported Ns @ Ns Ar feature-name
+will indicate why the import was allowed despite the unsupported feature.
+Possible values for this property are:
+.Bl -tag -width "readonly"
+.It Sy inactive
+The feature is in the
+.Sy enabled
+state and therefore the pool's on-disk
+format is still compatible with software that does not support this feature.
+.It Sy readonly
+The feature is read-only compatible and the pool has been imported in
+read-only mode.
+.El
+.
+.Ss Feature dependencies
+Some features depend on other features being enabled in order to function.
+Enabling a feature will automatically enable any features it depends on.
+.
+.Ss Compatibility feature sets
+It is sometimes necessary for a pool to maintain compatibility with a
+specific on-disk format, by enabling and disabling particular features.
+The
+.Sy compatibility
+feature facilitates this by allowing feature sets to be read from text files.
+When set to
+.Sy off
+(the default), compatibility feature sets are disabled
+(i.e. all features are enabled); when set to
+.Sy legacy ,
+no features are enabled.
+When set to a comma-separated list of filenames
+(each filename may either be an absolute path, or relative to
+.Pa /etc/zfs/compatibility.d
+or
+.Pa /usr/share/zfs/compatibility.d ) ,
+the lists of requested features are read from those files,
+separated by whitespace and/or commas.
+Only features present in all files are enabled.
+.Pp
+Simple sanity checks are applied to the files:
+they must be between 1B and 16kB in size, and must end with a newline character.
+.Pp
+The requested features are applied when a pool is created using
+.Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar …
+and controls which features are enabled when using
+.Nm zpool Cm upgrade .
+.Nm zpool Cm status
+will not show a warning about disabled features which are not part
+of the requested feature set.
+.Pp
+The special value
+.Sy legacy
+prevents any features from being enabled, either via
+.Nm zpool Cm upgrade
+or
+.Nm zpool Cm set Sy feature Ns @ Ns Ar feature-name Ns = Ns Sy enabled .
+This setting also prevents pools from being upgraded to newer on-disk versions.
+This is a safety measure to prevent new features from being
+accidentally enabled, breaking compatibility.
+.Pp
+By convention, compatibility files in
+.Pa /usr/share/zfs/compatibility.d
+are provided by the distribution, and include feature sets
+supported by important versions of popular distributions, and feature
+sets commonly supported at the start of each year.
+Compatibility files in
+.Pa /etc/zfs/compatibility.d ,
+if present, will take precedence over files with the same name in
+.Pa /usr/share/zfs/compatibility.d .
+.Pp
+If an unrecognized feature is found in these files, an error message will
+be shown.
+If the unrecognized feature is in a file in
+.Pa /etc/zfs/compatibility.d ,
+this is treated as an error and processing will stop.
+If the unrecognized feature is under
+.Pa /usr/share/zfs/compatibility.d ,
+this is treated as a warning and processing will continue.
+This difference is to allow distributions to include features
+which might not be recognized by the currently-installed binaries.
+.Pp
+Compatibility files may include comments:
+any text from
+.Sq #
+to the end of the line is ignored.
+.Pp
+.Sy Example :
+.Bd -literal -compact -offset 4n
+.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
+# Features which are supported by GRUB2
+async_destroy
+bookmarks
+embedded_data
+empty_bpobj
+enabled_txg
+extensible_dataset
+filesystem_limits
+hole_birth
+large_blocks
+lz4_compress
+spacemap_histogram
+
+.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
+.Ed
+.Pp
+See
+.Xr zpool-create 8
+and
+.Xr zpool-upgrade 8
+for more information on how these commands are affected by feature sets.
+.
+.de feature
+.It Sy \\$2
+.Bl -tag -compact -width "READ-ONLY COMPATIBLE"
+.It GUID
+.Sy \\$1:\\$2
+.if !"\\$4"" \{\
+.It DEPENDENCIES
+\fB\\$4\fP\c
+.if !"\\$5"" , \fB\\$5\fP\c
+.if !"\\$6"" , \fB\\$6\fP\c
+.if !"\\$7"" , \fB\\$7\fP\c
+.if !"\\$8"" , \fB\\$8\fP\c
+.if !"\\$9"" , \fB\\$9\fP\c
+.\}
+.It READ-ONLY COMPATIBLE
+\\$3
+.El
+.Pp
+..
+.
+.ds instant-never \
+.No This feature becomes Sy active No as soon as it is enabled \
+and will never return to being Sy enabled .
+.
+.ds remount-upgrade \
+.No Each filesystem will be upgraded automatically when remounted, \
+or when a new file is created under that filesystem. \
+The upgrade can also be triggered on filesystems via \
+Nm zfs Cm set Sy version Ns = Ns Sy current Ar fs . \
+No The upgrade process runs in the background and may take a while to complete \
+for filesystems containing large amounts of files.
+.
+.de checksum-spiel
+When the
+.Sy \\$1
+feature is set to
+.Sy enabled ,
+the administrator can turn on the
+.Sy \\$1
+checksum on any dataset using
+.Nm zfs Cm set Sy checksum Ns = Ns Sy \\$1 Ar dset
+.Po see Xr zfs-set 8 Pc .
+This feature becomes
+.Sy active
+once a
+.Sy checksum
+property has been set to
+.Sy \\$1 ,
+and will return to being
+.Sy enabled
+once all filesystems that have ever had their checksum set to
+.Sy \\$1
+are destroyed.
+..
+.
+.Sh FEATURES
+The following features are supported on this system:
+.Bl -tag -width Ds
+.feature org.zfsonlinux allocation_classes yes
+This feature enables support for separate allocation classes.
+.Pp
+This feature becomes
+.Sy active
+when a dedicated allocation class vdev (dedup or special) is created with the
+.Nm zpool Cm create No or Nm zpool Cm add No commands .
+With device removal, it can be returned to the
+.Sy enabled
+state if all the dedicated allocation class vdevs are removed.
+.
+.feature com.delphix async_destroy yes
+Destroying a file system requires traversing all of its data in order to
+return its used space to the pool.
+Without
+.Sy async_destroy ,
+the file system is not fully removed until all space has been reclaimed.
+If the destroy operation is interrupted by a reboot or power outage,
+the next attempt to open the pool will need to complete the destroy
+operation synchronously.
+.Pp
+When
+.Sy async_destroy
+is enabled, the file system's data will be reclaimed by a background process,
+allowing the destroy operation to complete
+without traversing the entire file system.
+The background process is able to resume
+interrupted destroys after the pool has been opened, eliminating the need
+to finish interrupted destroys as part of the open operation.
+The amount of space remaining to be reclaimed by the background process
+is available through the
+.Sy freeing
+property.
+.Pp
+This feature is only
+.Sy active
+while
+.Sy freeing
+is non-zero.
+.
+.feature com.delphix bookmarks yes extensible_dataset
+This feature enables use of the
+.Nm zfs Cm bookmark
+command.
+.Pp
+This feature is
+.Sy active
+while any bookmarks exist in the pool.
+All bookmarks in the pool can be listed by running
+.Nm zfs Cm list Fl t Sy bookmark Fl r Ar poolname .
+.
+.feature com.datto bookmark_v2 no bookmark extensible_dataset
+This feature enables the creation and management of larger bookmarks which are
+needed for other features in ZFS.
+.Pp
+This feature becomes
+.Sy active
+when a v2 bookmark is created and will be returned to the
+.Sy enabled
+state when all v2 bookmarks are destroyed.
+.
+.feature com.delphix bookmark_written no bookmark extensible_dataset bookmark_v2
+This feature enables additional bookmark accounting fields, enabling the
+.Sy written Ns # Ns Ar bookmark
+property (space written since a bookmark) and estimates of
+send stream sizes for incrementals from bookmarks.
+.Pp
+This feature becomes
+.Sy active
+when a bookmark is created and will be
+returned to the
+.Sy enabled
+state when all bookmarks with these fields are destroyed.
+.
+.feature org.openzfs device_rebuild yes
+This feature enables the ability for the
+.Nm zpool Cm attach
+and
+.Nm zpool Cm replace
+commands to perform sequential reconstruction
+(instead of healing reconstruction) when resilvering.
+.Pp
+Sequential reconstruction resilvers a device in LBA order without immediately
+verifying the checksums.
+Once complete, a scrub is started, which then verifies the checksums.
+This approach allows full redundancy to be restored to the pool
+in the minimum amount of time.
+This two-phase approach will take longer than a healing resilver
+when the time to verify the checksums is included.
+However, unless there is additional pool damage,
+no checksum errors should be reported by the scrub.
+This feature is incompatible with raidz configurations.
+.
+This feature becomes
+.Sy active
+while a sequential resilver is in progress, and returns to
+.Sy enabled
+when the resilver completes.
+.
+.feature com.delphix device_removal no
+This feature enables the
+.Nm zpool Cm remove
+command to remove top-level vdevs,
+evacuating them to reduce the total size of the pool.
+.Pp
+This feature becomes
+.Sy active
+when the
+.Nm zpool Cm remove
+command is used
+on a top-level vdev, and will never return to being
+.Sy enabled .
+.
+.feature org.openzfs draid no
+This feature enables use of the
+.Sy draid
+vdev type.
+dRAID is a variant of raidz which provides integrated distributed
+hot spares that allow faster resilvering while retaining the benefits of raidz.
+Data, parity, and spare space are organized in redundancy groups
+and distributed evenly over all of the devices.
+.Pp
+This feature becomes
+.Sy active
+when creating a pool which uses the
+.Sy draid
+vdev type, or when adding a new
+.Sy draid
+vdev to an existing pool.
+.
+.feature org.illumos edonr no extensible_dataset
+This feature enables the use of the Edon-R hash algorithm for checksum,
+including for nopwrite (if compression is also enabled, an overwrite of
+a block whose checksum matches the data being written will be ignored).
+In an abundance of caution, Edon-R requires verification when used with
+dedup:
+.Nm zfs Cm set Sy dedup Ns = Ns Sy edonr , Ns Sy verify
+.Po see Xr zfs-set 8 Pc .
+.Pp
+Edon-R is a very high-performance hash algorithm that was part
+of the NIST SHA-3 competition.
+It provides extremely high hash performance (over 350% faster than SHA-256),
+but was not selected because of its unsuitability
+as a general purpose secure hash algorithm.
+This implementation utilizes the new salted checksumming functionality
+in ZFS, which means that the checksum is pre-seeded with a secret
+256-bit random key (stored on the pool) before being fed the data block
+to be checksummed.
+Thus the produced checksums are unique to a given pool,
+preventing hash collision attacks on systems with dedup.
+.Pp
+.checksum-spiel edonr
+.Pp
+.Fx does not support the Sy edonr No feature.
+.
+.feature com.delphix embedded_data no
+This feature improves the performance and compression ratio of
+highly-compressible blocks.
+Blocks whose contents can compress to 112 bytes
+or smaller can take advantage of this feature.
+.Pp
+When this feature is enabled, the contents of highly-compressible blocks are
+stored in the block "pointer" itself (a misnomer in this case, as it contains
+the compressed data, rather than a pointer to its location on disk).
+Thus the space of the block (one sector, typically 512B or 4kB) is saved,
+and no additional I/O is needed to read and write the data block.
+.
+\*[instant-never]
+.
+.feature com.delphix empty_bpobj yes
+This feature increases the performance of creating and using a large
+number of snapshots of a single filesystem or volume, and also reduces
+the disk space required.
+.Pp
+When there are many snapshots, each snapshot uses many Block Pointer
+Objects (bpobjs) to track blocks associated with that snapshot.
+However, in common use cases, most of these bpobjs are empty.
+This feature allows us to create each bpobj on-demand,
+thus eliminating the empty bpobjs.
+.Pp
+This feature is
+.Sy active
+while there are any filesystems, volumes,
+or snapshots which were created after enabling this feature.
+.
+.feature com.delphix enabled_txg yes
+Once this feature is enabled, ZFS records the transaction group number
+in which new features are enabled.
+This has no user-visible impact, but other features may depend on this feature.
+.Pp
+This feature becomes
+.Sy active
+ as soon as it is enabled and will
+never return to being
+.Sy enabled .
+.
+.feature com.datto encryption no bookmark_v2 extensible_dataset
+This feature enables the creation and management of natively encrypted datasets.
+.Pp
+This feature becomes
+.Sy active
+when an encrypted dataset is created and will be returned to the
+.Sy enabled
+state when all datasets that use this feature are destroyed.
+.
+.feature com.delphix extensible_dataset no
+This feature allows more flexible use of internal ZFS data structures,
+and exists for other features to depend on.
+.Pp
+This feature will be
+.Sy active
+when the first dependent feature uses it, and will be returned to the
+.Sy enabled
+state when all datasets that use this feature are destroyed.
+.
+.feature com.joyent filesystem_limits yes extensible_dataset
+This feature enables filesystem and snapshot limits.
+These limits can be used to control how many filesystems and/or snapshots
+can be created at the point in the tree on which the limits are set.
+.Pp
+This feature is
+.Sy active
+once either of the limit properties has been set on a dataset.
+Once activated the feature is never deactivated.
+.
+.feature com.delphix hole_birth no enabled_txg
+This feature has/had bugs, the result of which is that, if you do a
+.Nm zfs Cm send Fl i
+.Pq or Fl R , No since it uses Fl i
+from an affected dataset, the receiving party will not see any checksum
+or other errors, but the resulting destination snapshot
+will not match the source.
+Its use by
+.Nm zfs Cm send Fl i
+has been disabled by default
+.Pq see Sy send_holes_without_birth_time No in Xr zfs 4 .
+.Pp
+This feature improves performance of incremental sends
+.Pq Nm zfs Cm send Fl i
+and receives for objects with many holes.
+The most common case of hole-filled objects is zvols.
+.Pp
+An incremental send stream from snapshot
+.Sy A No to snapshot Sy B
+contains information about every block that changed between
+.Sy A No and Sy B .
+Blocks which did not change between those snapshots can be
+identified and omitted from the stream using a piece of metadata called
+the "block birth time", but birth times are not recorded for holes
+(blocks filled only with zeroes).
+Since holes created after
+.Sy A No cannot be distinguished from holes created before Sy A ,
+information about every hole in the entire filesystem or zvol
+is included in the send stream.
+.Pp
+For workloads where holes are rare this is not a problem.
+However, when incrementally replicating filesystems or zvols with many holes
+(for example a zvol formatted with another filesystem) a lot of time will
+be spent sending and receiving unnecessary information about holes that
+already exist on the receiving side.
+.Pp
+Once the
+.Sy hole_birth
+feature has been enabled the block birth times
+of all new holes will be recorded.
+Incremental sends between snapshots created after this feature is enabled
+will use this new metadata to avoid sending information about holes that
+already exist on the receiving side.
+.Pp
+\*[instant-never]
+.
+.feature org.open-zfs large_blocks no extensible_dataset
+This feature allows the record size on a dataset to be set larger than 128kB.
+.Pp
+This feature becomes
+.Sy active
+once a dataset contains a file with a block size larger than 128kB,
+and will return to being
+.Sy enabled
+once all filesystems that have ever had their recordsize larger than 128kB
+are destroyed.
+.
+.feature org.zfsonlinux large_dnode no extensible_dataset
+This feature allows the size of dnodes in a dataset to be set larger than 512B.
+.
+This feature becomes
+.Sy active
+once a dataset contains an object with a dnode larger than 512B,
+which occurs as a result of setting the
+.Sy dnodesize
+dataset property to a value other than
+.Sy legacy .
+The feature will return to being
+.Sy enabled
+once all filesystems that have ever contained a dnode larger than 512B
+are destroyed.
+Large dnodes allow more data to be stored in the bonus buffer,
+thus potentially improving performance by avoiding the use of spill blocks.
+.
+.feature com.delphix livelist yes
+This feature allows clones to be deleted faster than the traditional method
+when a large number of random/sparse writes have been made to the clone.
+All blocks allocated and freed after a clone is created are tracked by the
+the clone's livelist which is referenced during the deletion of the clone.
+The feature is activated when a clone is created and remains
+.Sy active
+until all clones have been destroyed.
+.
+.feature com.delphix log_spacemap yes com.delphix:spacemap_v2
+This feature improves performance for heavily-fragmented pools,
+especially when workloads are heavy in random-writes.
+It does so by logging all the metaslab changes on a single spacemap every TXG
+instead of scattering multiple writes to all the metaslab spacemaps.
+.Pp
+\*[instant-never]
+.
+.feature org.illumos lz4_compress no
+.Sy lz4
+is a high-performance real-time compression algorithm that
+features significantly faster compression and decompression as well as a
+higher compression ratio than the older
+.Sy lzjb
+compression.
+Typically,
+.Sy lz4
+compression is approximately 50% faster on compressible data and 200% faster
+on incompressible data than
+.Sy lzjb .
+It is also approximately 80% faster on decompression,
+while giving approximately a 10% better compression ratio.
+.Pp
+When the
+.Sy lz4_compress
+feature is set to
+.Sy enabled ,
+the administrator can turn on
+.Sy lz4
+compression on any dataset on the pool using the
+.Xr zfs-set 8
+command.
+All newly written metadata will be compressed with the
+.Sy lz4
+algorithm.
+.Pp
+\*[instant-never]
+.
+.feature com.joyent multi_vdev_crash_dump no
+This feature allows a dump device to be configured with a pool comprised
+of multiple vdevs.
+Those vdevs may be arranged in any mirrored or raidz configuration.
+.Pp
+When the
+.Sy multi_vdev_crash_dump
+feature is set to
+.Sy enabled ,
+the administrator can use
+.Xr dumpadm 1M
+to configure a dump device on a pool comprised of multiple vdevs.
+.Pp
+Under
+.Fx
+and Linux this feature is unused, but registered for compatibility.
+New pools created on these systems will have the feature
+.Sy enabled
+but will never transition to
+.Sy active ,
+as this functionality is not required for crash dump support.
+Existing pools where this feature is
+.Sy active
+can be imported.
+.
+.feature com.delphix obsolete_counts yes device_removal
+This feature is an enhancement of
+.Sy device_removal ,
+which will over time reduce the memory used to track removed devices.
+When indirect blocks are freed or remapped,
+we note that their part of the indirect mapping is "obsolete" – no longer needed.
+.Pp
+This feature becomes
+.Sy active
+when the
+.Nm zpool Cm remove
+command is used on a top-level vdev, and will never return to being
+.Sy enabled .
+.
+.feature org.zfsonlinux project_quota yes extensible_dataset
+This feature allows administrators to account the spaces and objects usage
+information against the project identifier (ID).
+.Pp
+The project ID is an object-based attribute.
+When upgrading an existing filesystem,
+objects without a project ID will be assigned a zero project ID.
+When this feature is enabled, newly created objects inherit
+their parent directories' project ID if the parent's inherit flag is set
+.Pq via Nm chattr Sy [+-]P No or Nm zfs Cm project Fl s Ns | Ns Fl C .
+Otherwise, the new object's project ID will be zero.
+An object's project ID can be changed at any time by the owner
+(or privileged user) via
+.Nm chattr Fl p Ar prjid
+or
+.Nm zfs Cm project Fl p Ar prjid .
+.Pp
+This feature will become
+.Sy active
+as soon as it is enabled and will never return to being
+.Sy disabled .
+\*[remount-upgrade]
+.
+.feature com.delphix redaction_bookmarks no bookmarks extensible_dataset
+This feature enables the use of redacted
+.Nm zfs Cm send Ns s ,
+which create redaction bookmarks storing the list of blocks
+redacted by the send that created them.
+For more information about redacted sends, see
+.Xr zfs-send 8 .
+.
+.feature com.delphix redacted_datasets no extensible_dataset
+This feature enables the receiving of redacted
+.Nm zfs Cm send Ns
+streams. which create redacted datasets when received.
+These datasets are missing some of their blocks,
+and so cannot be safely mounted, and their contents cannot be safely read.
+For more information about redacted receives, see
+.Xr zfs-send 8 .
+.
+.feature com.datto resilver_defer yes
+This feature allows ZFS to postpone new resilvers if an existing one is already
+in progress.
+Without this feature, any new resilvers will cause the currently
+running one to be immediately restarted from the beginning.
+.Pp
+This feature becomes
+.Sy active
+once a resilver has been deferred, and returns to being
+.Sy enabled
+when the deferred resilver begins.
+.
+.feature org.illumos sha512 no extensible_dataset
+This feature enables the use of the SHA-512/256 truncated hash algorithm
+(FIPS 180-4) for checksum and dedup.
+The native 64-bit arithmetic of SHA-512 provides an approximate 50%
+performance boost over SHA-256 on 64-bit hardware
+and is thus a good minimum-change replacement candidate
+for systems where hash performance is important,
+but these systems cannot for whatever reason utilize the faster
+.Sy skein No and Sy edonr
+algorithms.
+.Pp
+.checksum-spiel sha512
+.
+.feature org.illumos skein no extensible_dataset
+This feature enables the use of the Skein hash algorithm for checksum and dedup.
+Skein is a high-performance secure hash algorithm that was a
+finalist in the NIST SHA-3 competition.
+It provides a very high security margin and high performance on 64-bit hardware
+(80% faster than SHA-256).
+This implementation also utilizes the new salted checksumming
+functionality in ZFS, which means that the checksum is pre-seeded with a
+secret 256-bit random key (stored on the pool) before being fed the data
+block to be checksummed.
+Thus the produced checksums are unique to a given pool,
+preventing hash collision attacks on systems with dedup.
+.Pp
+.checksum-spiel skein
+.
+.feature com.delphix spacemap_histogram yes
+This features allows ZFS to maintain more information about how free space
+is organized within the pool.
+If this feature is
+.Sy enabled ,
+it will be activated when a new space map object is created, or
+an existing space map is upgraded to the new format,
+and never returns back to being
+.Sy enabled .
+.
+.feature com.delphix spacemap_v2 yes
+This feature enables the use of the new space map encoding which
+consists of two words (instead of one) whenever it is advantageous.
+The new encoding allows space maps to represent large regions of
+space more efficiently on-disk while also increasing their maximum
+addressable offset.
+.Pp
+This feature becomes
+.Sy active
+once it is
+.Sy enabled ,
+and never returns back to being
+.Sy enabled .
+.
+.feature org.zfsonlinux userobj_accounting yes extensible_dataset
+This feature allows administrators to account the object usage information
+by user and group.
+.Pp
+\*[instant-never]
+\*[remount-upgrade]
+.
+.feature com.delphix zpool_checkpoint yes
+This feature enables the
+.Nm zpool Cm checkpoint
+command that can checkpoint the state of the pool
+at the time it was issued and later rewind back to it or discard it.
+.Pp
+This feature becomes
+.Sy active
+when the
+.Nm zpool Cm checkpoint
+command is used to checkpoint the pool.
+The feature will only return back to being
+.Sy enabled
+when the pool is rewound or the checkpoint has been discarded.
+.
+.feature org.freebsd zstd_compress no extensible_dataset
+.Sy zstd
+is a high-performance compression algorithm that features a
+combination of high compression ratios and high speed.
+Compared to
+.Sy gzip ,
+.Sy zstd
+offers slightly better compression at much higher speeds.
+Compared to
+.Sy lz4 ,
+.Sy zstd
+offers much better compression while being only modestly slower.
+Typically,
+.Sy zstd
+compression speed ranges from 250 to 500 MB/s per thread
+and decompression speed is over 1 GB/s per thread.
+.Pp
+When the
+.Sy zstd
+feature is set to
+.Sy enabled ,
+the administrator can turn on
+.Sy zstd
+compression of any dataset using
+.Nm zfs Cm set Sy compress Ns = Ns Sy zstd Ar dset
+.Po see Xr zfs-set 8 Pc .
+This feature becomes
+.Sy active
+once a
+.Sy compress
+property has been set to
+.Sy zstd ,
+and will return to being
+.Sy enabled
+once all filesystems that have ever had their
+.Sy compress
+property set to
+.Sy zstd
+are destroyed.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool 8

diff --git a/zfs/man/man7/zpoolconcepts.7 b/zfs/man/man7/zpoolconcepts.7
new file mode 100644
index 0000000..58132ba
--- /dev/null
+++ b/zfs/man/man7/zpoolconcepts.7

@@ -0,0 +1,512 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd June 2, 2021
+.Dt ZPOOLCONCEPTS 7
+.Os
+.
+.Sh NAME
+.Nm zpoolconcepts
+.Nd overview of ZFS storage pools
+.
+.Sh DESCRIPTION
+.Ss Virtual Devices (vdevs)
+A "virtual device" describes a single device or a collection of devices
+organized according to certain performance and fault characteristics.
+The following virtual devices are supported:
+.Bl -tag -width "special"
+.It Sy disk
+A block device, typically located under
+.Pa /dev .
+ZFS can use individual slices or partitions, though the recommended mode of
+operation is to use whole disks.
+A disk can be specified by a full path, or it can be a shorthand name
+.Po the relative portion of the path under
+.Pa /dev
+.Pc .
+A whole disk can be specified by omitting the slice or partition designation.
+For example,
+.Pa sda
+is equivalent to
+.Pa /dev/sda .
+When given a whole disk, ZFS automatically labels the disk, if necessary.
+.It Sy file
+A regular file.
+The use of files as a backing store is strongly discouraged.
+It is designed primarily for experimental purposes, as the fault tolerance of a
+file is only as good as the file system on which it resides.
+A file must be specified by a full path.
+.It Sy mirror
+A mirror of two or more devices.
+Data is replicated in an identical fashion across all components of a mirror.
+A mirror with
+.Em N No disks of size Em X No can hold Em X No bytes and can withstand Em N-1
+devices failing without losing data.
+.It Sy raidz , raidz1 , raidz2 , raidz3
+A variation on RAID-5 that allows for better distribution of parity and
+eliminates the RAID-5
+.Qq write hole
+.Pq in which data and parity become inconsistent after a power loss .
+Data and parity is striped across all disks within a raidz group.
+.Pp
+A raidz group can have single, double, or triple parity, meaning that the
+raidz group can sustain one, two, or three failures, respectively, without
+losing any data.
+The
+.Sy raidz1
+vdev type specifies a single-parity raidz group; the
+.Sy raidz2
+vdev type specifies a double-parity raidz group; and the
+.Sy raidz3
+vdev type specifies a triple-parity raidz group.
+The
+.Sy raidz
+vdev type is an alias for
+.Sy raidz1 .
+.Pp
+A raidz group with
+.Em N No disks of size Em X No with Em P No parity disks can hold approximately
+.Em (N-P)*X No bytes and can withstand Em P No devices failing without losing data.
+The minimum number of devices in a raidz group is one more than the number of
+parity disks.
+The recommended number is between 3 and 9 to help increase performance.
+.It Sy draid , draid1 , draid2 , draid3
+A variant of raidz that provides integrated distributed hot spares which
+allows for faster resilvering while retaining the benefits of raidz.
+A dRAID vdev is constructed from multiple internal raidz groups, each with
+.Em D No data devices and Em P No parity devices.
+These groups are distributed over all of the children in order to fully
+utilize the available disk performance.
+.Pp
+Unlike raidz, dRAID uses a fixed stripe width (padding as necessary with
+zeros) to allow fully sequential resilvering.
+This fixed stripe width significantly effects both usable capacity and IOPS.
+For example, with the default
+.Em D=8 No and Em 4kB No disk sectors the minimum allocation size is Em 32kB .
+If using compression, this relatively large allocation size can reduce the
+effective compression ratio.
+When using ZFS volumes and dRAID, the default of the
+.Sy volblocksize
+property is increased to account for the allocation size.
+If a dRAID pool will hold a significant amount of small blocks, it is
+recommended to also add a mirrored
+.Sy special
+vdev to store those blocks.
+.Pp
+In regards to I/O, performance is similar to raidz since for any read all
+.Em D No data disks must be accessed.
+Delivered random IOPS can be reasonably approximated as
+.Sy floor((N-S)/(D+P))*single_drive_IOPS .
+.Pp
+Like raidzm a dRAID can have single-, double-, or triple-parity.
+The
+.Sy draid1 ,
+.Sy draid2 ,
+and
+.Sy draid3
+types can be used to specify the parity level.
+The
+.Sy draid
+vdev type is an alias for
+.Sy draid1 .
+.Pp
+A dRAID with
+.Em N No disks of size Em X , D No data disks per redundancy group, Em P
+.No parity level, and Em S No distributed hot spares can hold approximately
+.Em (N-S)*(D/(D+P))*X No bytes and can withstand Em P
+devices failing without losing data.
+.It Sy draid Ns Oo Ar parity Oc Ns Oo Sy \&: Ns Ar data Ns Sy d Oc Ns Oo Sy \&: Ns Ar children Ns Sy c Oc Ns Oo Sy \&: Ns Ar spares Ns Sy s Oc
+A non-default dRAID configuration can be specified by appending one or more
+of the following optional arguments to the
+.Sy draid
+keyword:
+.Bl -tag -compact -width "children"
+.It Ar parity
+The parity level (1-3).
+.It Ar data
+The number of data devices per redundancy group.
+In general, a smaller value of
+.Em D No will increase IOPS, improve the compression ratio,
+and speed up resilvering at the expense of total usable capacity.
+Defaults to
+.Em 8 , No unless Em N-P-S No is less than Em 8 .
+.It Ar children
+The expected number of children.
+Useful as a cross-check when listing a large number of devices.
+An error is returned when the provided number of children differs.
+.It Ar spares
+The number of distributed hot spares.
+Defaults to zero.
+.El
+.It Sy spare
+A pseudo-vdev which keeps track of available hot spares for a pool.
+For more information, see the
+.Sx Hot Spares
+section.
+.It Sy log
+A separate intent log device.
+If more than one log device is specified, then writes are load-balanced between
+devices.
+Log devices can be mirrored.
+However, raidz vdev types are not supported for the intent log.
+For more information, see the
+.Sx Intent Log
+section.
+.It Sy dedup
+A device dedicated solely for deduplication tables.
+The redundancy of this device should match the redundancy of the other normal
+devices in the pool.
+If more than one dedup device is specified, then
+allocations are load-balanced between those devices.
+.It Sy special
+A device dedicated solely for allocating various kinds of internal metadata,
+and optionally small file blocks.
+The redundancy of this device should match the redundancy of the other normal
+devices in the pool.
+If more than one special device is specified, then
+allocations are load-balanced between those devices.
+.Pp
+For more information on special allocations, see the
+.Sx Special Allocation Class
+section.
+.It Sy cache
+A device used to cache storage pool data.
+A cache device cannot be configured as a mirror or raidz group.
+For more information, see the
+.Sx Cache Devices
+section.
+.El
+.Pp
+Virtual devices cannot be nested, so a mirror or raidz virtual device can only
+contain files or disks.
+Mirrors of mirrors
+.Pq or other combinations
+are not allowed.
+.Pp
+A pool can have any number of virtual devices at the top of the configuration
+.Po known as
+.Qq root vdevs
+.Pc .
+Data is dynamically distributed across all top-level devices to balance data
+among devices.
+As new virtual devices are added, ZFS automatically places data on the newly
+available devices.
+.Pp
+Virtual devices are specified one at a time on the command line,
+separated by whitespace.
+Keywords like
+.Sy mirror No and Sy raidz
+are used to distinguish where a group ends and another begins.
+For example, the following creates a pool with two root vdevs,
+each a mirror of two disks:
+.Dl # Nm zpool Cm create Ar mypool Sy mirror Ar sda sdb Sy mirror Ar sdc sdd
+.
+.Ss Device Failure and Recovery
+ZFS supports a rich set of mechanisms for handling device failure and data
+corruption.
+All metadata and data is checksummed, and ZFS automatically repairs bad data
+from a good copy when corruption is detected.
+.Pp
+In order to take advantage of these features, a pool must make use of some form
+of redundancy, using either mirrored or raidz groups.
+While ZFS supports running in a non-redundant configuration, where each root
+vdev is simply a disk or file, this is strongly discouraged.
+A single case of bit corruption can render some or all of your data unavailable.
+.Pp
+A pool's health status is described by one of three states:
+.Sy online , degraded , No or Sy faulted .
+An online pool has all devices operating normally.
+A degraded pool is one in which one or more devices have failed, but the data is
+still available due to a redundant configuration.
+A faulted pool has corrupted metadata, or one or more faulted devices, and
+insufficient replicas to continue functioning.
+.Pp
+The health of the top-level vdev, such as a mirror or raidz device,
+is potentially impacted by the state of its associated vdevs,
+or component devices.
+A top-level vdev or component device is in one of the following states:
+.Bl -tag -width "DEGRADED"
+.It Sy DEGRADED
+One or more top-level vdevs is in the degraded state because one or more
+component devices are offline.
+Sufficient replicas exist to continue functioning.
+.Pp
+One or more component devices is in the degraded or faulted state, but
+sufficient replicas exist to continue functioning.
+The underlying conditions are as follows:
+.Bl -bullet -compact
+.It
+The number of checksum errors exceeds acceptable levels and the device is
+degraded as an indication that something may be wrong.
+ZFS continues to use the device as necessary.
+.It
+The number of I/O errors exceeds acceptable levels.
+The device could not be marked as faulted because there are insufficient
+replicas to continue functioning.
+.El
+.It Sy FAULTED
+One or more top-level vdevs is in the faulted state because one or more
+component devices are offline.
+Insufficient replicas exist to continue functioning.
+.Pp
+One or more component devices is in the faulted state, and insufficient
+replicas exist to continue functioning.
+The underlying conditions are as follows:
+.Bl -bullet -compact
+.It
+The device could be opened, but the contents did not match expected values.
+.It
+The number of I/O errors exceeds acceptable levels and the device is faulted to
+prevent further use of the device.
+.El
+.It Sy OFFLINE
+The device was explicitly taken offline by the
+.Nm zpool Cm offline
+command.
+.It Sy ONLINE
+The device is online and functioning.
+.It Sy REMOVED
+The device was physically removed while the system was running.
+Device removal detection is hardware-dependent and may not be supported on all
+platforms.
+.It Sy UNAVAIL
+The device could not be opened.
+If a pool is imported when a device was unavailable, then the device will be
+identified by a unique identifier instead of its path since the path was never
+correct in the first place.
+.El
+.Pp
+Checksum errors represent events where a disk returned data that was expected
+to be correct, but was not.
+In other words, these are instances of silent data corruption.
+The checksum errors are reported in
+.Nm zpool Cm status
+and
+.Nm zpool Cm events .
+When a block is stored redundantly, a damaged block may be reconstructed
+(e.g. from raidz parity or a mirrored copy).
+In this case, ZFS reports the checksum error against the disks that contained
+damaged data.
+If a block is unable to be reconstructed (e.g. due to 3 disks being damaged
+in a raidz2 group), it is not possible to determine which disks were silently
+corrupted.
+In this case, checksum errors are reported for all disks on which the block
+is stored.
+.Pp
+If a device is removed and later re-attached to the system,
+ZFS attempts online the device automatically.
+Device attachment detection is hardware-dependent
+and might not be supported on all platforms.
+.
+.Ss Hot Spares
+ZFS allows devices to be associated with pools as
+.Qq hot spares .
+These devices are not actively used in the pool, but when an active device
+fails, it is automatically replaced by a hot spare.
+To create a pool with hot spares, specify a
+.Sy spare
+vdev with any number of devices.
+For example,
+.Dl # Nm zpool Cm create Ar pool Sy mirror Ar sda sdb Sy spare Ar sdc sdd
+.Pp
+Spares can be shared across multiple pools, and can be added with the
+.Nm zpool Cm add
+command and removed with the
+.Nm zpool Cm remove
+command.
+Once a spare replacement is initiated, a new
+.Sy spare
+vdev is created within the configuration that will remain there until the
+original device is replaced.
+At this point, the hot spare becomes available again if another device fails.
+.Pp
+If a pool has a shared spare that is currently being used, the pool can not be
+exported since other pools may use this shared spare, which may lead to
+potential data corruption.
+.Pp
+Shared spares add some risk.
+If the pools are imported on different hosts,
+and both pools suffer a device failure at the same time,
+both could attempt to use the spare at the same time.
+This may not be detected, resulting in data corruption.
+.Pp
+An in-progress spare replacement can be cancelled by detaching the hot spare.
+If the original faulted device is detached, then the hot spare assumes its
+place in the configuration, and is removed from the spare list of all active
+pools.
+.Pp
+The
+.Sy draid
+vdev type provides distributed hot spares.
+These hot spares are named after the dRAID vdev they're a part of
+.Po Sy draid1 Ns - Ns Ar 2 Ns - Ns Ar 3 No specifies spare Ar 3 No of vdev Ar 2 ,
+.No which is a single parity dRAID Pc
+and may only be used by that dRAID vdev.
+Otherwise, they behave the same as normal hot spares.
+.Pp
+Spares cannot replace log devices.
+.
+.Ss Intent Log
+The ZFS Intent Log (ZIL) satisfies POSIX requirements for synchronous
+transactions.
+For instance, databases often require their transactions to be on stable storage
+devices when returning from a system call.
+NFS and other applications can also use
+.Xr fsync 2
+to ensure data stability.
+By default, the intent log is allocated from blocks within the main pool.
+However, it might be possible to get better performance using separate intent
+log devices such as NVRAM or a dedicated disk.
+For example:
+.Dl # Nm zpool Cm create Ar pool sda sdb Sy log Ar sdc
+.Pp
+Multiple log devices can also be specified, and they can be mirrored.
+See the
+.Sx EXAMPLES
+section for an example of mirroring multiple log devices.
+.Pp
+Log devices can be added, replaced, attached, detached and removed.
+In addition, log devices are imported and exported as part of the pool
+that contains them.
+Mirrored devices can be removed by specifying the top-level mirror vdev.
+.
+.Ss Cache Devices
+Devices can be added to a storage pool as
+.Qq cache devices .
+These devices provide an additional layer of caching between main memory and
+disk.
+For read-heavy workloads, where the working set size is much larger than what
+can be cached in main memory, using cache devices allows much more of this
+working set to be served from low latency media.
+Using cache devices provides the greatest performance improvement for random
+read-workloads of mostly static content.
+.Pp
+To create a pool with cache devices, specify a
+.Sy cache
+vdev with any number of devices.
+For example:
+.Dl # Nm zpool Cm create Ar pool sda sdb Sy cache Ar sdc sdd
+.Pp
+Cache devices cannot be mirrored or part of a raidz configuration.
+If a read error is encountered on a cache device, that read I/O is reissued to
+the original storage pool device, which might be part of a mirrored or raidz
+configuration.
+.Pp
+The content of the cache devices is persistent across reboots and restored
+asynchronously when importing the pool in L2ARC (persistent L2ARC).
+This can be disabled by setting
+.Sy l2arc_rebuild_enabled Ns = Ns Sy 0 .
+For cache devices smaller than
+.Em 1GB ,
+we do not write the metadata structures
+required for rebuilding the L2ARC in order not to waste space.
+This can be changed with
+.Sy l2arc_rebuild_blocks_min_l2size .
+The cache device header
+.Pq Em 512B
+is updated even if no metadata structures are written.
+Setting
+.Sy l2arc_headroom Ns = Ns Sy 0
+will result in scanning the full-length ARC lists for cacheable content to be
+written in L2ARC (persistent ARC).
+If a cache device is added with
+.Nm zpool Cm add
+its label and header will be overwritten and its contents are not going to be
+restored in L2ARC, even if the device was previously part of the pool.
+If a cache device is onlined with
+.Nm zpool Cm online
+its contents will be restored in L2ARC.
+This is useful in case of memory pressure
+where the contents of the cache device are not fully restored in L2ARC.
+The user can off- and online the cache device when there is less memory pressure
+in order to fully restore its contents to L2ARC.
+.
+.Ss Pool checkpoint
+Before starting critical procedures that include destructive actions
+.Pq like Nm zfs Cm destroy ,
+an administrator can checkpoint the pool's state and in the case of a
+mistake or failure, rewind the entire pool back to the checkpoint.
+Otherwise, the checkpoint can be discarded when the procedure has completed
+successfully.
+.Pp
+A pool checkpoint can be thought of as a pool-wide snapshot and should be used
+with care as it contains every part of the pool's state, from properties to vdev
+configuration.
+Thus, certain operations are not allowed while a pool has a checkpoint.
+Specifically, vdev removal/attach/detach, mirror splitting, and
+changing the pool's GUID.
+Adding a new vdev is supported, but in the case of a rewind it will have to be
+added again.
+Finally, users of this feature should keep in mind that scrubs in a pool that
+has a checkpoint do not repair checkpointed data.
+.Pp
+To create a checkpoint for a pool:
+.Dl # Nm zpool Cm checkpoint Ar pool
+.Pp
+To later rewind to its checkpointed state, you need to first export it and
+then rewind it during import:
+.Dl # Nm zpool Cm export Ar pool
+.Dl # Nm zpool Cm import Fl -rewind-to-checkpoint Ar pool
+.Pp
+To discard the checkpoint from a pool:
+.Dl # Nm zpool Cm checkpoint Fl d Ar pool
+.Pp
+Dataset reservations (controlled by the
+.Sy reservation No and Sy refreservation
+properties) may be unenforceable while a checkpoint exists, because the
+checkpoint is allowed to consume the dataset's reservation.
+Finally, data that is part of the checkpoint but has been freed in the
+current state of the pool won't be scanned during a scrub.
+.
+.Ss Special Allocation Class
+Allocations in the special class are dedicated to specific block types.
+By default this includes all metadata, the indirect blocks of user data, and
+any deduplication tables.
+The class can also be provisioned to accept small file blocks.
+.Pp
+A pool must always have at least one normal
+.Pq non- Ns Sy dedup Ns /- Ns Sy special
+vdev before
+other devices can be assigned to the special class.
+If the
+.Sy special
+class becomes full, then allocations intended for it
+will spill back into the normal class.
+.Pp
+Deduplication tables can be excluded from the special class by unsetting the
+.Sy zfs_ddt_data_is_special
+ZFS module parameter.
+.Pp
+Inclusion of small file blocks in the special class is opt-in.
+Each dataset can control the size of small file blocks allowed
+in the special class by setting the
+.Sy special_small_blocks
+property to nonzero.
+See
+.Xr zfsprops 7
+for more info on this property.

diff --git a/zfs/man/man7/zpoolprops.7 b/zfs/man/man7/zpoolprops.7
new file mode 100644
index 0000000..5bd1c2b
--- /dev/null
+++ b/zfs/man/man7/zpoolprops.7

@@ -0,0 +1,417 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+.\"
+.Dd May 27, 2021
+.Dt ZPOOLPROPS 7
+.Os
+.
+.Sh NAME
+.Nm zpoolprops
+.Nd properties of ZFS storage pools
+.
+.Sh DESCRIPTION
+Each pool has several properties associated with it.
+Some properties are read-only statistics while others are configurable and
+change the behavior of the pool.
+.Pp
+The following are read-only properties:
+.Bl -tag -width "unsupported@guid"
+.It Cm allocated
+Amount of storage used within the pool.
+See
+.Sy fragmentation
+and
+.Sy free
+for more information.
+.It Sy capacity
+Percentage of pool space used.
+This property can also be referred to by its shortened column name,
+.Sy cap .
+.It Sy expandsize
+Amount of uninitialized space within the pool or device that can be used to
+increase the total capacity of the pool.
+On whole-disk vdevs, this is the space beyond the end of the GPT –
+typically occurring when a LUN is dynamically expanded
+or a disk replaced with a larger one.
+On partition vdevs, this is the space appended to the partition after it was
+added to the pool – most likely by resizing it in-place.
+The space can be claimed for the pool by bringing it online with
+.Sy autoexpand=on
+or using
+.Nm zpool Cm online Fl e .
+.It Sy fragmentation
+The amount of fragmentation in the pool.
+As the amount of space
+.Sy allocated
+increases, it becomes more difficult to locate
+.Sy free
+space.
+This may result in lower write performance compared to pools with more
+unfragmented free space.
+.It Sy free
+The amount of free space available in the pool.
+By contrast, the
+.Xr zfs 8
+.Sy available
+property describes how much new data can be written to ZFS filesystems/volumes.
+The zpool
+.Sy free
+property is not generally useful for this purpose, and can be substantially more than the zfs
+.Sy available
+space.
+This discrepancy is due to several factors, including raidz parity;
+zfs reservation, quota, refreservation, and refquota properties; and space set aside by
+.Sy spa_slop_shift
+(see
+.Xr zfs 4
+for more information).
+.It Sy freeing
+After a file system or snapshot is destroyed, the space it was using is
+returned to the pool asynchronously.
+.Sy freeing
+is the amount of space remaining to be reclaimed.
+Over time
+.Sy freeing
+will decrease while
+.Sy free
+increases.
+.It Sy leaked
+Space not released while
+.Sy freeing
+due to corruption, now permanently leaked into the pool.
+.It Sy health
+The current health of the pool.
+Health can be one of
+.Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL .
+.It Sy guid
+A unique identifier for the pool.
+.It Sy load_guid
+A unique identifier for the pool.
+Unlike the
+.Sy guid
+property, this identifier is generated every time we load the pool (i.e. does
+not persist across imports/exports) and never changes while the pool is loaded
+(even if a
+.Sy reguid
+operation takes place).
+.It Sy size
+Total size of the storage pool.
+.It Sy unsupported@ Ns Em guid
+Information about unsupported features that are enabled on the pool.
+See
+.Xr zpool-features 7
+for details.
+.El
+.Pp
+The space usage properties report actual physical space available to the
+storage pool.
+The physical space can be different from the total amount of space that any
+contained datasets can actually use.
+The amount of space used in a raidz configuration depends on the characteristics
+of the data being written.
+In addition, ZFS reserves some space for internal accounting that the
+.Xr zfs 8
+command takes into account, but the
+.Nm
+command does not.
+For non-full pools of a reasonable size, these effects should be invisible.
+For small pools, or pools that are close to being completely full, these
+discrepancies may become more noticeable.
+.Pp
+The following property can be set at creation time and import time:
+.Bl -tag -width Ds
+.It Sy altroot
+Alternate root directory.
+If set, this directory is prepended to any mount points within the pool.
+This can be used when examining an unknown pool where the mount points cannot be
+trusted, or in an alternate boot environment, where the typical paths are not
+valid.
+.Sy altroot
+is not a persistent property.
+It is valid only while the system is up.
+Setting
+.Sy altroot
+defaults to using
+.Sy cachefile Ns = Ns Sy none ,
+though this may be overridden using an explicit setting.
+.El
+.Pp
+The following property can be set only at import time:
+.Bl -tag -width Ds
+.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off
+If set to
+.Sy on ,
+the pool will be imported in read-only mode.
+This property can also be referred to by its shortened column name,
+.Sy rdonly .
+.El
+.Pp
+The following properties can be set at creation time and import time, and later
+changed with the
+.Nm zpool Cm set
+command:
+.Bl -tag -width Ds
+.It Sy ashift Ns = Ns Sy ashift
+Pool sector size exponent, to the power of
+.Sy 2
+(internally referred to as
+.Sy ashift ) .
+Values from 9 to 16, inclusive, are valid; also, the
+value 0 (the default) means to auto-detect using the kernel's block
+layer and a ZFS internal exception list.
+I/O operations will be aligned to the specified size boundaries.
+Additionally, the minimum (disk)
+write size will be set to the specified size, so this represents a
+space vs. performance trade-off.
+For optimal performance, the pool sector size should be greater than
+or equal to the sector size of the underlying disks.
+The typical case for setting this property is when
+performance is important and the underlying disks use 4KiB sectors but
+report 512B sectors to the OS (for compatibility reasons); in that
+case, set
+.Sy ashift Ns = Ns Sy 12
+(which is
+.Sy 1<<12 No = Sy 4096 ) .
+When set, this property is
+used as the default hint value in subsequent vdev operations (add,
+attach and replace).
+Changing this value will not modify any existing
+vdev, not even on disk replacement; however it can be used, for
+instance, to replace a dying 512B sectors disk with a newer 4KiB
+sectors device: this will probably result in bad performance but at the
+same time could prevent loss of data.
+.It Sy autoexpand Ns = Ns Sy on Ns | Ns Sy off
+Controls automatic pool expansion when the underlying LUN is grown.
+If set to
+.Sy on ,
+the pool will be resized according to the size of the expanded device.
+If the device is part of a mirror or raidz then all devices within that
+mirror/raidz group must be expanded before the new space is made available to
+the pool.
+The default behavior is
+.Sy off .
+This property can also be referred to by its shortened column name,
+.Sy expand .
+.It Sy autoreplace Ns = Ns Sy on Ns | Ns Sy off
+Controls automatic device replacement.
+If set to
+.Sy off ,
+device replacement must be initiated by the administrator by using the
+.Nm zpool Cm replace
+command.
+If set to
+.Sy on ,
+any new device, found in the same physical location as a device that previously
+belonged to the pool, is automatically formatted and replaced.
+The default behavior is
+.Sy off .
+This property can also be referred to by its shortened column name,
+.Sy replace .
+Autoreplace can also be used with virtual disks (like device
+mapper) provided that you use the /dev/disk/by-vdev paths setup by
+vdev_id.conf.
+See the
+.Xr vdev_id 8
+manual page for more details.
+Autoreplace and autoonline require the ZFS Event Daemon be configured and
+running.
+See the
+.Xr zed 8
+manual page for more details.
+.It Sy autotrim Ns = Ns Sy on Ns | Ns Sy off
+When set to
+.Sy on
+space which has been recently freed, and is no longer allocated by the pool,
+will be periodically trimmed.
+This allows block device vdevs which support
+BLKDISCARD, such as SSDs, or file vdevs on which the underlying file system
+supports hole-punching, to reclaim unused blocks.
+The default value for this property is
+.Sy off .
+.Pp
+Automatic TRIM does not immediately reclaim blocks after a free.
+Instead, it will optimistically delay allowing smaller ranges to be aggregated
+into a few larger ones.
+These can then be issued more efficiently to the storage.
+TRIM on L2ARC devices is enabled by setting
+.Sy l2arc_trim_ahead > 0 .
+.Pp
+Be aware that automatic trimming of recently freed data blocks can put
+significant stress on the underlying storage devices.
+This will vary depending of how well the specific device handles these commands.
+For lower-end devices it is often possible to achieve most of the benefits
+of automatic trimming by running an on-demand (manual) TRIM periodically
+using the
+.Nm zpool Cm trim
+command.
+.It Sy bootfs Ns = Ns Sy (unset) Ns | Ns Ar pool Ns Op / Ns Ar dataset
+Identifies the default bootable dataset for the root pool.
+This property is expected to be set mainly by the installation and upgrade programs.
+Not all Linux distribution boot processes use the bootfs property.
+.It Sy cachefile Ns = Ns Ar path Ns | Ns Sy none
+Controls the location of where the pool configuration is cached.
+Discovering all pools on system startup requires a cached copy of the
+configuration data that is stored on the root file system.
+All pools in this cache are automatically imported when the system boots.
+Some environments, such as install and clustering, need to cache this
+information in a different location so that pools are not automatically
+imported.
+Setting this property caches the pool configuration in a different location that
+can later be imported with
+.Nm zpool Cm import Fl c .
+Setting it to the value
+.Sy none
+creates a temporary pool that is never cached, and the
+.Qq
+.Pq empty string
+uses the default location.
+.Pp
+Multiple pools can share the same cache file.
+Because the kernel destroys and recreates this file when pools are added and
+removed, care should be taken when attempting to access this file.
+When the last pool using a
+.Sy cachefile
+is exported or destroyed, the file will be empty.
+.It Sy comment Ns = Ns Ar text
+A text string consisting of printable ASCII characters that will be stored
+such that it is available even if the pool becomes faulted.
+An administrator can provide additional information about a pool using this
+property.
+.It Sy compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns …
+Specifies that the pool maintain compatibility with specific feature sets.
+When set to
+.Sy off
+(or unset) compatibility is disabled (all features may be enabled); when set to
+.Sy legacy Ns
+no features may be enabled.
+When set to a comma-separated list of filenames
+(each filename may either be an absolute path, or relative to
+.Pa /etc/zfs/compatibility.d
+or
+.Pa /usr/share/zfs/compatibility.d )
+the lists of requested features are read from those files, separated by
+whitespace and/or commas.
+Only features present in all files may be enabled.
+.Pp
+See
+.Xr zpool-features 7 ,
+.Xr zpool-create 8
+and
+.Xr zpool-upgrade 8
+for more information on the operation of compatibility feature sets.
+.It Sy dedupditto Ns = Ns Ar number
+This property is deprecated and no longer has any effect.
+.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off
+Controls whether a non-privileged user is granted access based on the dataset
+permissions defined on the dataset.
+See
+.Xr zfs 8
+for more information on ZFS delegated administration.
+.It Sy failmode Ns = Ns Sy wait Ns | Ns Sy continue Ns | Ns Sy panic
+Controls the system behavior in the event of catastrophic pool failure.
+This condition is typically a result of a loss of connectivity to the underlying
+storage device(s) or a failure of all devices within the pool.
+The behavior of such an event is determined as follows:
+.Bl -tag -width "continue"
+.It Sy wait
+Blocks all I/O access until the device connectivity is recovered and the errors
+are cleared with
+.Nm zpool Cm clear .
+This is the default behavior.
+.It Sy continue
+Returns
+.Er EIO
+to any new write I/O requests but allows reads to any of the remaining healthy
+devices.
+Any write requests that have yet to be committed to disk would be blocked.
+.It Sy panic
+Prints out a message to the console and generates a system crash dump.
+.El
+.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled
+The value of this property is the current state of
+.Ar feature_name .
+The only valid value when setting this property is
+.Sy enabled
+which moves
+.Ar feature_name
+to the enabled state.
+See
+.Xr zpool-features 7
+for details on feature states.
+.It Sy listsnapshots Ns = Ns Sy on Ns | Ns Sy off
+Controls whether information about snapshots associated with this pool is
+output when
+.Nm zfs Cm list
+is run without the
+.Fl t
+option.
+The default value is
+.Sy off .
+This property can also be referred to by its shortened name,
+.Sy listsnaps .
+.It Sy multihost Ns = Ns Sy on Ns | Ns Sy off
+Controls whether a pool activity check should be performed during
+.Nm zpool Cm import .
+When a pool is determined to be active it cannot be imported, even with the
+.Fl f
+option.
+This property is intended to be used in failover configurations
+where multiple hosts have access to a pool on shared storage.
+.Pp
+Multihost provides protection on import only.
+It does not protect against an
+individual device being used in multiple pools, regardless of the type of vdev.
+See the discussion under
+.Nm zpool Cm create .
+.Pp
+When this property is on, periodic writes to storage occur to show the pool is
+in use.
+See
+.Sy zfs_multihost_interval
+in the
+.Xr zfs 4
+manual page.
+In order to enable this property each host must set a unique hostid.
+See
+.Xr genhostid 1
+.Xr zgenhostid 8
+.Xr spl 4
+for additional details.
+The default value is
+.Sy off .
+.It Sy version Ns = Ns Ar version
+The current on-disk version of the pool.
+This can be increased, but never decreased.
+The preferred method of updating pools is with the
+.Nm zpool Cm upgrade
+command, though this property can be used when a specific version is needed for
+backwards compatibility.
+Once feature flags are enabled on a pool this property will no longer have a
+value.
+.El

diff --git a/zfs/man/man8/Makefile.am b/zfs/man/man8/Makefile.am
deleted file mode 100644
index d5df665..0000000
--- a/zfs/man/man8/Makefile.am
+++ /dev/null

@@ -1,32 +0,0 @@
-dist_man_MANS = \
-	fsck.zfs.8 \
-	mount.zfs.8 \
-	vdev_id.8 \
-	zdb.8 \
-	zfs.8 \
-	zfs-program.8 \
-	zgenhostid.8 \
-	zinject.8 \
-	zpool.8 \
-	zstreamdump.8
-
-nodist_man_MANS = \
-	zed.8 \
-	zfs-mount-generator.8
-
-EXTRA_DIST = \
-	zed.8.in \
-	zfs-mount-generator.8.in
-
-$(nodist_man_MANS): %: %.in
-	-$(SED) -e 's,@zfsexecdir\@,$(zfsexecdir),g' \
-		-e 's,@systemdgeneratordir\@,$(systemdgeneratordir),g' \
-		-e 's,@runstatedir\@,$(runstatedir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< >'$@'
-
-install-data-local:
-	$(INSTALL) -d -m 0755 "$(DESTDIR)$(mandir)/man8"
-
-CLEANFILES = \
-	$(nodist_man_MANS)

diff --git a/zfs/man/man8/fsck.zfs.8 b/zfs/man/man8/fsck.zfs.8
index baa8c33..0ce7576 100644
--- a/zfs/man/man8/fsck.zfs.8
+++ b/zfs/man/man8/fsck.zfs.8

@@ -1,4 +1,3 @@
-'\" t
 .\"
 .\" CDDL HEADER START
 .\"
@@ -19,49 +18,60 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
 .\"
-.TH fsck.zfs 8 "2013 MAR 16" "ZFS on Linux" "System Administration Commands"
-
-.SH NAME
-fsck.zfs \- Dummy ZFS filesystem checker.
-
-.SH SYNOPSIS
-.LP
-.BI "fsck.zfs [" "options" "] <" "dataset" ">"
-
-.SH DESCRIPTION
-.LP
-\fBfsck.zfs\fR is a shell stub that does nothing and always returns
-true. It is installed by ZoL because some Linux distributions expect
-a fsck helper for all filesystems.
-
-.SH OPTIONS
-.HP
-All \fIoptions\fR and the \fIdataset\fR are ignored.
-
-.SH "NOTES"
-.LP
-ZFS datasets are checked by running \fBzpool scrub\fR on the
-containing pool. An individual ZFS dataset is never checked
-independently of its pool, which is unlike a regular filesystem.
-
-.SH "BUGS"
-.LP
-On some systems, if the \fIdataset\fR is in a degraded pool, then it
-might be appropriate for \fBfsck.zfs\fR to return exit code 4 to
-indicate an uncorrected filesystem error.
-.LP
-Similarly, if the \fIdataset\fR is in a faulted pool and has a legacy
-/etc/fstab record, then \fBfsck.zfs\fR should return exit code 8 to
-indicate a fatal operational error.
-
-.SH "AUTHORS"
-.LP
-Darik Horn <dajhorn@vanadac.com>.
-
-.SH "SEE ALSO"
-.BR fsck (8),
-.BR fstab (5),
-.BR zpool (8)
+.Dd May 26, 2021
+.Dt FSCK.ZFS 8
+.Os
+.
+.Sh NAME
+.Nm fsck.zfs
+.Nd dummy ZFS filesystem checker
+.Sh SYNOPSIS
+.Nm
+.Op Ar options
+.Ar dataset Ns No …
+.
+.Sh DESCRIPTION
+.Nm
+is a thin shell wrapper that at most checks the status of a dataset's container pool.
+It is installed by OpenZFS because some Linux
+distributions expect a fsck helper for all filesystems.
+.Pp
+If more than one
+.Ar dataset
+is specified, each is checked in turn and the results binary-ored.
+.
+.Sh OPTIONS
+Ignored.
+.
+.Sh NOTES
+ZFS datasets are checked by running
+.Nm zpool Cm scrub
+on the containing pool.
+An individual ZFS dataset is never checked independently of its pool,
+which is unlike a regular filesystem.
+.Pp
+However, the
+.Xr fsck 8
+interface still allows it to communicate some errors: if the
+.Ar dataset
+is in a degraded pool, then
+.Nm
+will return exit code
+.Sy 4
+to indicate an uncorrected filesystem error.
+.Pp
+Similarly, if the
+.Ar dataset
+is in a faulted pool and has a legacy
+.Pa /etc/fstab
+record, then
+.Nm
+will return exit code
+.Sy 8
+to indicate a fatal operational error.
+.Sh SEE ALSO
+.Xr fstab 5 ,
+.Xr fsck 8 ,
+.Xr zpool-scrub 8

diff --git a/zfs/man/man8/mount.zfs.8 b/zfs/man/man8/mount.zfs.8
index 4b71367..2101f70 100644
--- a/zfs/man/man8/mount.zfs.8
+++ b/zfs/man/man8/mount.zfs.8

@@ -1,4 +1,3 @@
-'\" t
 .\"
 .\" CDDL HEADER START
 .\"
@@ -19,126 +18,75 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
 .\"
-.TH mount.zfs 8 "2013 FEB 28" "ZFS on Linux" "System Administration Commands"
-
-.SH NAME
-mount.zfs \- mount a ZFS filesystem
-.SH SYNOPSIS
-.LP
-.BI "mount.zfs [\-sfnvh] [\-o " options "]" " dataset mountpoint
-
-.SH DESCRIPTION
-.BR mount.zfs
-is part of the zfsutils package for Linux. It is a helper program that
-is usually invoked by the
-.BR mount (8)
+.Dd May 24, 2021
+.Dt MOUNT.ZFS 8
+.Os
+.
+.Sh NAME
+.Nm mount.zfs
+.Nd mount ZFS filesystem
+.Sh SYNOPSIS
+.Nm
+.Op Fl sfnvh
+.Op Fl o Ar options
+.Ar dataset
+.Ar mountpoint
+.
+.Sh DESCRIPTION
+The
+.Nm
+helper is used by
+.Xr mount 8
+to mount filesystem snapshots and
+.Sy mountpoint= Ns Ar legacy
+ZFS filesystems, as well as by
+.Xr zfs 8
+when the
+.Sy ZFS_MOUNT_HELPER
+environment variable is not set.
+Users should should invoke either
+.Xr mount 8
 or
-.BR zfs (8)
-commands to mount a ZFS dataset.
-
-All
-.I options
-are handled according to the FILESYSTEM INDEPENDENT MOUNT OPTIONS
-section in the
-.BR mount (8)
-manual, except for those described below.
-
-The
-.I dataset
-parameter is a ZFS filesystem name, as output by the
-.B "zfs list -H -o name
-command. This parameter never has a leading slash character and is
-not a device name.
-
-The
-.I mountpoint
-parameter is the path name of a directory.
-
-
-.SH OPTIONS
-.TP
-.BI "\-s"
-Ignore bad or sloppy mount options.
-.TP
-.BI "\-f"
-Do a fake mount; do not perform the mount operation.
-.TP
-.BI "\-n"
-Do not update the /etc/mtab file.
-.TP
-.BI "\-v"
-Increase verbosity.
-.TP
-.BI "\-h"
+.Xr zfs 8
+in most cases.
+.Pp
+.Ar options
+are handled according to the
+.Em Temporary Mount Point Properties
+section in
+.Xr zfsprops 7 ,
+except for those described below.
+.Pp
+If
+.Pa /etc/mtab
+is a regular file and
+.Fl n
+was not specified, it will be updated via libmount.
+.
+.Sh OPTIONS
+.Bl -tag -width "-o xa"
+.It Fl s
+Ignore unknown (sloppy) mount options.
+.It Fl f
+Do everything except actually executing the system call.
+.It Fl n
+Never update
+.Pa /etc/mtab .
+.It Fl v
+Print resolved mount options and parser state.
+.It Fl h
 Print the usage message.
-.TP
-.BI "\-o context"
-This flag sets the SELinux context for all files in the filesystem
-under that mountpoint.
-.TP
-.BI "\-o fscontext"
-This flag sets the SELinux context for the filesystem being mounted.
-.TP
-.BI "\-o defcontext"
-This flag sets the SELinux context for unlabeled files.
-.TP
-.BI "\-o rootcontext"
-This flag sets the SELinux context for the root inode of the filesystem.
-.TP
-.BI "\-o legacy"
-This private flag indicates that the
-.I dataset
-has an entry in the /etc/fstab file.
-.TP
-.BI "\-o noxattr"
-This private flag disables extended attributes.
-.TP
-.BI "\-o xattr
-This private flag enables directory-based extended attributes and, if
-appropriate, adds a ZFS context to the selinux system policy.
-.TP
-.BI "\-o saxattr
-This private flag enables system attributed-based extended attributes and, if
-appropriate, adds a ZFS context to the selinux system policy.
-.TP
-.BI "\-o dirxattr
-Equivalent to
-.BR xattr .
-.TP
-.BI "\-o zfsutil"
+.It Fl o Ar zfsutil
 This private flag indicates that
-.BR mount (8)
+.Xr mount 8
 is being called by the
-.BR zfs (8)
+.Xr zfs 8
 command.
-
-.SH NOTES
-ZFS conventionally requires that the
-.I mountpoint
-be an empty directory, but the Linux implementation inconsistently
-enforces the requirement.
-
-The
-.BR mount.zfs
-helper does not mount the contents of zvols.
-
-.SH FILES
-.TP 18n
-.I /etc/fstab
-The static filesystem table.
-.TP
-.I /etc/mtab
-The mounted filesystem table.
-.SH "AUTHORS"
-The primary author of
-.BR mount.zfs
-is Brian Behlendorf <behlendorf1@llnl.gov>.
-
-This man page was written by Darik Horn <dajhorn@vanadac.com>.
-.SH "SEE ALSO"
-.BR fstab (5),
-.BR mount (8),
-.BR zfs (8)
+.El
+.
+.Sh SEE ALSO
+.Xr fstab 5 ,
+.Xr mount 8 ,
+.Xr zfs-mount 8

diff --git a/zfs/man/man8/vdev_id.8 b/zfs/man/man8/vdev_id.8
index 70956c6..2b327b3 100644
--- a/zfs/man/man8/vdev_id.8
+++ b/zfs/man/man8/vdev_id.8

@@ -1,77 +1,93 @@
-.TH vdev_id 8
-.SH NAME
-vdev_id \- generate user-friendly names for JBOD disks
-.SH SYNOPSIS
-.LP
-.nf
-\fBvdev_id\fR <-d dev> [-c config_file] [-g sas_direct|sas_switch]
-                 [-m] [-p phys_per_port]
-\fBvdev_id\fR -h
-.fi
-.SH DESCRIPTION
-The \fBvdev_id\fR command is a udev helper which parses the file
-.BR /etc/zfs/vdev_id.conf (5)
-to map a physical path in a storage topology to a channel name.  The
-channel name is combined with a disk enclosure slot number to create an
-alias that reflects the physical location of the drive.  This is
-particularly helpful when it comes to tasks like replacing failed
-drives.  Slot numbers may also be re-mapped in case the default
-numbering is unsatisfactory.  The drive aliases will be created as
-symbolic links in /dev/disk/by-vdev.
-
-The currently supported topologies are sas_direct and sas_switch.  A
-multipath mode is supported in which dm-mpath devices are handled by
-examining the first-listed running component disk as reported by the
-.BR multipath (8)
-command.  In multipath mode the configuration file should contain a
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.Dd May 26, 2021
+.Dt VDEV_ID 8
+.Os
+.
+.Sh NAME
+.Nm vdev_id
+.Nd generate user-friendly names for JBOD disks
+.Sh SYNOPSIS
+.Nm
+.Fl d Ar dev
+.Fl c Ar config_file
+.Fl g Sy sas_direct Ns | Ns Sy sas_switch Ns | Ns Sy scsi
+.Fl m
+.Fl p Ar phys_per_port
+.
+.Sh DESCRIPTION
+.Nm
+is an udev helper which parses
+.Xr vdev_id.conf 5
+to map a physical path in a storage topology to a channel name.
+The channel name is combined with a disk enclosure slot number to create
+an alias that reflects the physical location of the drive.
+This is particularly helpful when it comes to tasks like replacing failed drives.
+Slot numbers may also be remapped in case the default numbering is unsatisfactory.
+The drive aliases will be created as symbolic links in
+.Pa /dev/disk/by-vdev .
+.Pp
+The currently supported topologies are
+.Sy sas_direct ,
+.Sy sas_switch ,
+and
+.Sy scsi .
+A multipath mode is supported in which dm-mpath devices are handled by
+examining the first running component disk as reported by the driver.
+In multipath mode the configuration file should contain a
 channel definition with the same name for each path to a given
 enclosure.
-
-.BR vdev_id
+.Pp
+.Nm
 also supports creating aliases based on existing udev links in the /dev
-hierarchy using the \fIalias\fR configuration file keyword.  See the
-.BR vdev_id.conf (5)
-man page for details.
-
-.SH OPTIONS
-.TP
-\fB\-c\fR <config_file>
-Specifies the path to an alternate configuration file.  The default is
-/etc/zfs/vdev_id.conf.
-.TP
-\fB\-d\fR <device>
-This is the only mandatory argument.  Specifies the name of a device
-in /dev, i.e. "sda".
-.TP
-\fB\-g\fR <sas_direct|sas_switch>
+hierarchy using the
+.Sy alias
+configuration file keyword.
+See
+.Xr vdev_id.conf 5
+for details.
+.
+.Sh OPTIONS
+.Bl -tag -width "-m"
+.It Fl d Ar device
+The device node to classify, like
+.Pa /dev/sda .
+.It Fl c Ar config_file
+Specifies the path to an alternate configuration file.
+The default is
+.Pa /etc/zfs/vdev_id.conf .
+.It Fl g Sy sas_direct Ns | Ns Sy sas_switch Ns | Ns Sy scsi
 Identifies a physical topology that governs how physical paths are
-mapped to channels.
-
-\fIsas_direct\fR - in this mode a channel is uniquely identified by
-a PCI slot and a HBA port number
-
-\fIsas_switch\fR - in this mode a channel is uniquely identified by
-a SAS switch port number
-.TP
-\fB\-m\fR
-Specifies that
-.BR vdev_id (8)
-will handle only dm-multipath devices.  If set to "yes" then
-.BR vdev_id (8)
-will examine the first running component disk of a dm-multipath
-device as listed by the
-.BR multipath (8)
-command to determine the physical path.
-.TP
-\fB\-p\fR <phys_per_port>
+mapped to channels:
+.Bl -tag -compact -width "sas_direct and scsi"
+.It Sy sas_direct No and Sy scsi
+channels are uniquely identified by a PCI slot and HBA port number
+.It Sy sas_switch
+channels are uniquely identified by a SAS switch port number
+.El
+.It Fl m
+Only handle dm-multipath devices.
+If specified, examine the first running component disk of a dm-multipath
+device as provided by the driver to determine the physical path.
+.It Fl p Ar phys_per_port
 Specifies the number of PHY devices associated with a SAS HBA port or SAS
 switch port.
-.BR vdev_id (8)
+.Nm
 internally uses this value to determine which HBA or switch port a
-device is connected to.  The default is 4.
-.TP
-\fB\-h\fR
+device is connected to.
+The default is
+.Sy 4 .
+.It Fl h
 Print a usage summary.
-.SH SEE ALSO
-.LP
-\fBvdev_id.conf\fR(5)
+.El
+.
+.Sh SEE ALSO
+.Xr vdev_id.conf 5

diff --git a/zfs/man/man8/zdb.8 b/zfs/man/man8/zdb.8
index e907d03..6a7ea2c 100644
--- a/zfs/man/man8/zdb.8
+++ b/zfs/man/man8/zdb.8

@@ -8,34 +8,36 @@
 .\" source.  A copy of the CDDL is also available via the Internet at
 .\" http://www.illumos.org/license/CDDL.
 .\"
-.\"
 .\" Copyright 2012, Richard Lowe.
-.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012, 2019 by Delphix. All rights reserved.
 .\" Copyright 2017 Nexenta Systems, Inc.
 .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 .\" Copyright (c) 2017 Intel Corporation.
 .\"
-.Dd April 14, 2019
-.Dt ZDB 8 SMM
-.Os Linux
+.Dd October 7, 2020
+.Dt ZDB 8
+.Os
+.
 .Sh NAME
 .Nm zdb
-.Nd display zpool debugging and consistency information
+.Nd display ZFS storage pool debugging and consistency information
 .Sh SYNOPSIS
 .Nm
-.Op Fl AbcdDFGhikLMPsvXY
-.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
+.Op Fl AbcdDFGhikLMNPsvXYy
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
 .Op Fl I Ar inflight I/Os
-.Oo Fl o Ar var Ns = Ns Ar value Oc Ns ...
+.Oo Fl o Ar var Ns = Ns Ar value Oc Ns …
 .Op Fl t Ar txg
 .Op Fl U Ar cache
 .Op Fl x Ar dumpdir
-.Op Ar poolname Op Ar object ...
+.Op Ar poolname Ns Op / Ns Ar dataset | objset ID
+.Op Ar object Ns | Ns Ar range Ns …
 .Nm
 .Op Fl AdiPv
-.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
 .Op Fl U Ar cache
-.Ar dataset Op Ar object ...
+.Ar poolname Ns Op Ar / Ns Ar dataset | objset ID
+.Op Ar object Ns | Ns Ar range Ns …
 .Nm
 .Fl C
 .Op Fl A
@@ -43,7 +45,7 @@
 .Nm
 .Fl E
 .Op Fl A
-.Ar word0 Ns \&: Ns Ar word1 Ns :...: Ns Ar word15
+.Ar word0 : Ns Ar word1 Ns :…: Ns Ar word15
 .Nm
 .Fl l
 .Op Fl Aqu
@@ -51,25 +53,29 @@
 .Nm
 .Fl m
 .Op Fl AFLPXY
-.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
 .Op Fl t Ar txg
 .Op Fl U Ar cache
-.Ar poolname Op Ar vdev Op Ar metaslab ...
+.Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns …
 .Nm
 .Fl O
 .Ar dataset path
 .Nm
+.Fl r
+.Ar dataset path destination
+.Nm
 .Fl R
 .Op Fl A
-.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
 .Op Fl U Ar cache
-.Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [<lsize>/]<psize> Ns Op : Ns Ar flags
+.Ar poolname vdev : Ns Ar offset : Ns Oo Ar lsize Ns / Oc Ns Ar psize Ns Op : Ns Ar flags
 .Nm
 .Fl S
 .Op Fl AP
-.Op Fl e Oo Fl V Oc Op Fl p Ar path ...
+.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
 .Op Fl U Ar cache
 .Ar poolname
+.
 .Sh DESCRIPTION
 The
 .Nm
@@ -95,11 +101,11 @@
 .Qq Sy @
 characters, it is interpreted as a pool name.
 The root dataset can be specified as
-.Ar pool Ns /
-.Pq pool name followed by a slash .
+.Qq Ar pool Ns / .
 .Pp
 When operating on an imported and active pool it is possible, though unlikely,
 that zdb may interpret inconsistent pool data and behave erratically.
+.
 .Sh OPTIONS
 Display options:
 .Bl -tag -width Ds
@@ -131,11 +137,59 @@
 Display information about datasets.
 Specified once, displays basic dataset information: ID, create transaction,
 size, and object count.
+See
+.Fl N
+for determining if
+.Op Ar poolname Ns Op / Ns Ar dataset | objset ID
+is to use the specified
+.Op Ar dataset | objset ID
+as a string (dataset name) or a number (objset ID) when
+datasets have numeric names.
 .Pp
 If specified multiple times provides greater and greater verbosity.
 .Pp
-If object IDs are specified, display information about those specific objects
-only.
+If object IDs or object ID ranges are specified, display information about
+those specific objects or ranges only.
+.Pp
+An object ID range is specified in terms of a colon-separated tuple of
+the form
+.Ao start Ac : Ns Ao end Ac Ns Op : Ns Ao flags Ac .
+The fields
+.Ar start
+and
+.Ar end
+are integer object identifiers that denote the upper and lower bounds
+of the range.
+An
+.Ar end
+value of -1 specifies a range with no upper bound.
+The
+.Ar flags
+field optionally specifies a set of flags, described below, that control
+which object types are dumped.
+By default, all object types are dumped.
+A minus sign
+.Pq -
+negates the effect of the flag that follows it and has no effect unless
+preceded by the
+.Ar A
+flag.
+For example, the range 0:-1:A-d will dump all object types except for directories.
+.Pp
+.Bl -tag -compact -width Ds
+.It Sy A
+Dump all objects (this is the default)
+.It Sy d
+Dump ZFS directory objects
+.It Sy f
+Dump ZFS plain file objects
+.It Sy m
+Dump SPA space map objects
+.It Sy z
+Dump ZAP objects
+.It Sy -
+Negate the effect of next flag
+.El
 .It Fl D
 Display deduplication statistics, including the deduplication ratio
 .Pq Sy dedup ,
@@ -157,7 +211,7 @@
 Dump the contents of the deduplication tables describing duplicate blocks.
 .It Fl DDDDD
 Also dump the contents of the deduplication tables describing unique blocks.
-.It Fl E Ar word0 Ns \&: Ns Ar word1 Ns :...: Ns Ar word15
+.It Fl E Ar word0 : Ns Ar word1 Ns :…: Ns Ar word15
 Decode and display block from an embedded block pointer specified by the
 .Ar word
 arguments.
@@ -174,30 +228,43 @@
 Examine the checkpointed state of the pool.
 Note, the on disk format of the pool is not reverted to the checkpointed state.
 .It Fl l Ar device
-Read the vdev labels from the specified device.
+Read the vdev labels and L2ARC header from the specified device.
 .Nm Fl l
 will return 0 if valid label was found, 1 if error occurred, and 2 if no valid
-labels were found. Each unique configuration is displayed only once.
+labels were found.
+The presence of L2ARC header is indicated by a specific
+sequence (L2ARC_DEV_HDR_MAGIC).
+If there is an accounting error in the size or the number of L2ARC log blocks
+.Nm Fl l
+will return 1.
+Each unique configuration is displayed only once.
 .It Fl ll Ar device
 In addition display label space usage stats.
+If a valid L2ARC header was found
+also display the properties of log blocks used for restoring L2ARC contents
+(persistent L2ARC).
 .It Fl lll Ar device
 Display every configuration, unique or not.
+If a valid L2ARC header was found
+also display the properties of log entries in log blocks used for restoring
+L2ARC contents (persistent L2ARC).
 .Pp
 If the
 .Fl q
-option is also specified, don't print the labels.
+option is also specified, don't print the labels or the L2ARC header.
 .Pp
 If the
 .Fl u
-option is also specified, also display the uberblocks on this device.  Specify
-multiple times to increase verbosity.
+option is also specified, also display the uberblocks on this device.
+Specify multiple times to increase verbosity.
 .It Fl L
 Disable leak detection and the loading of space maps.
 By default,
 .Nm
 verifies that all non-free blocks are referenced, which can be very expensive.
 .It Fl m
-Display the offset, spacemap, and free space of each metaslab.
+Display the offset, spacemap, free space of each metaslab, all the log
+spacemaps and their obsolete entry statistics.
 .It Fl mm
 Also display information about the on-disk free space histogram associated with
 each metaslab.
@@ -213,6 +280,14 @@
 percentage of free space in each space map.
 .It Fl MMM
 Display every spacemap record.
+.It Fl N
+Same as
+.Fl d
+but force zdb to interpret the
+.Op Ar dataset | objset ID
+in
+.Op Ar poolname Ns Op / Ns Ar dataset | objset ID
+as a numeric objset ID.
 .It Fl O Ar dataset path
 Look up the specified
 .Ar path
@@ -226,8 +301,21 @@
 This option can be combined with
 .Fl v
 for increasing verbosity.
+.It Fl r Ar dataset path destination
+Copy the specified
+.Ar path
+inside of the
+.Ar dataset
+to the specified destination.
+Specified
+.Ar path
+must be relative to the root of
+.Ar dataset .
+This option can be combined with
+.Fl v
+for increasing verbosity.
 .It Xo
-.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar [<lsize>/]<psize> Ns Op : Ns Ar flags
+.Fl R Ar poolname vdev : Ns Ar offset : Ns Oo Ar lsize Ns / Oc Ns Ar psize Ns Op : Ns Ar flags
 .Xc
 Read and display a block from the specified device.
 By default the block is displayed as a hex dump, but see the description of the
@@ -247,11 +335,12 @@
 .Pp
 .Bl -tag -compact -width "b offset"
 .It Sy b Ar offset
-Print block pointer
+Print block pointer at hex offset
 .It Sy c
 Calculate and display checksums
 .It Sy d
-Decompress the block. Set environment variable
+Decompress the block.
+Set environment variable
 .Nm ZDB_NO_ZLE
 to skip zle when guessing.
 .It Sy e
@@ -288,7 +377,7 @@
 demoted to warnings.
 .It Fl AAA
 Do not abort if asserts fail and also enable panic recovery.
-.It Fl e Op Fl p Ar path ...
+.It Fl e Oo Fl p Ar path Oc Ns …
 Operate on an exported pool, not present in
 .Pa /etc/zfs/zpool.cache .
 The
@@ -318,14 +407,16 @@
 This option affects the performance of the
 .Fl c
 option.
-.It Fl o Ar var Ns = Ns Ar value ...
+.It Fl o Ar var Ns = Ns Ar value …
 Set the given global libzpool variable to the provided value.
 The value must be an unsigned 32-bit integer.
 Currently only little-endian systems are supported to avoid accidentally setting
 the high 32 bits of 64-bit variables.
 .It Fl P
-Print numbers in an unscaled form more amenable to parsing, eg. 1000000 rather
-than 1M.
+Print numbers in an unscaled form more amenable to parsing, e.g.\&
+.Sy 1000000
+rather than
+.Sy 1M .
 .It Fl t Ar transaction
 Specify the highest transaction to use when searching for uberblocks.
 See also the
@@ -355,6 +446,12 @@
 Attempt all possible combinations when reconstructing indirect split blocks.
 This flag disables the individual I/O deadman timer in order to allow as
 much time as required for the attempted reconstruction.
+.It Fl y
+Perform validation for livelists that are being deleted.
+Scans through the livelist and metaslabs, checking for duplicate entries
+and compares the two, checking for potential double frees.
+If it encounters issues, warnings will be printed, but the command will not
+necessarily fail.
 .El
 .Pp
 Specifying a display option more than once enables verbosity for only that
@@ -362,51 +459,51 @@
 .Pp
 If no options are specified, all information about the named pool will be
 displayed at default verbosity.
+.
 .Sh EXAMPLES
 .Bl -tag -width Ds
 .It Xo
-.Sy Example 1
+.Sy Example 1 :
 Display the configuration of imported pool
-.Pa rpool
+.Ar rpool
 .Xc
 .Bd -literal
-# zdb -C rpool
-
+.No # Nm zdb Fl C Ar rpool
 MOS Configuration:
         version: 28
         name: 'rpool'
- ...
+ …
 .Ed
 .It Xo
-.Sy Example 2
+.Sy Example 2 :
 Display basic dataset information about
-.Pa rpool
+.Ar rpool
 .Xc
 .Bd -literal
-# zdb -d rpool
+.No # Nm zdb Fl d Ar rpool
 Dataset mos [META], ID 0, cr_txg 4, 26.9M, 1051 objects
 Dataset rpool/swap [ZVOL], ID 59, cr_txg 356, 486M, 2 objects
- ...
+ …
 .Ed
 .It Xo
-.Sy Example 3
+.Sy Example 3 :
 Display basic information about object 0 in
-.Pa rpool/export/home
+.Ar rpool/export/home
 .Xc
 .Bd -literal
-# zdb -d rpool/export/home 0
+.No # Nm zdb Fl d Ar rpool/export/home 0
 Dataset rpool/export/home [ZPL], ID 137, cr_txg 1546, 32K, 8 objects
 
     Object  lvl   iblk   dblk  dsize  lsize   %full  type
          0    7    16K    16K  15.0K    16K   25.00  DMU dnode
 .Ed
 .It Xo
-.Sy Example 4
+.Sy Example 4 :
 Display the predicted effect of enabling deduplication on
-.Pa rpool
+.Ar rpool
 .Xc
 .Bd -literal
-# zdb -S rpool
+.No # Nm zdb Fl S Ar rpool
 Simulated DDT histogram:
 
 bucket              allocated                       referenced
@@ -415,10 +512,11 @@
 ------   ------   -----   -----   -----   ------   -----   -----   -----
      1     694K   27.1G   15.0G   15.0G     694K   27.1G   15.0G   15.0G
      2    35.0K   1.33G    699M    699M    74.7K   2.79G   1.45G   1.45G
- ...
+ …
 dedup = 1.11, compress = 1.80, copies = 1.00, dedup * compress / copies = 2.00
 .Ed
 .El
+.
 .Sh SEE ALSO
 .Xr zfs 8 ,
 .Xr zpool 8

diff --git a/zfs/man/man8/zed.8.in b/zfs/man/man8/zed.8.in
index 097a8f4..d329760 100644
--- a/zfs/man/man8/zed.8.in
+++ b/zfs/man/man8/zed.8.in

@@ -1,9 +1,8 @@
 .\"
-.\" This file is part of the ZFS Event Daemon (ZED)
-.\" for ZFS on Linux (ZoL) <http://zfsonlinux.org/>.
+.\" This file is part of the ZFS Event Daemon (ZED).
 .\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
 .\" Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
-.\" Refer to the ZoL git commit log for authoritative copyright attribution.
+.\" Refer to the OpenZFS git commit log for authoritative copyright attribution.
 .\"
 .\" The contents of this file are subject to the terms of the
 .\" Common Development and Distribution License Version 1.0 (CDDL-1.0).
@@ -11,250 +10,246 @@
 .\" "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
 .\" You may not use this file except in compliance with the license.
 .\"
-.TH ZED 8 "Octember 1, 2013" "ZFS on Linux" "System Administration Commands"
-
-.SH NAME
-ZED \- ZFS Event Daemon
-
-.SH SYNOPSIS
-.HP
-.B zed
-.\" [\fB\-c\fR \fIconfigfile\fR]
-[\fB\-d\fR \fIzedletdir\fR]
-[\fB\-f\fR]
-[\fB\-F\fR]
-[\fB\-h\fR]
-[\fB\-L\fR]
-[\fB\-M\fR]
-[\fB\-p\fR \fIpidfile\fR]
-[\fB\-P\fR \fIpath\fR]
-[\fB\-s\fR \fIstatefile\fR]
-[\fB\-v\fR]
-[\fB\-V\fR]
-[\fB\-Z\fR]
-
-.SH DESCRIPTION
-.PP
-\fBZED\fR (ZFS Event Daemon) monitors events generated by the ZFS kernel
-module.  When a zevent (ZFS Event) is posted, \fBZED\fR will run any ZEDLETs
-(ZFS Event Daemon Linkage for Executable Tasks) that have been enabled for the
-corresponding zevent class.
-
-.SH OPTIONS
-.TP
-.BI \-h
+.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049)
+.\"
+.Dd May 26, 2021
+.Dt ZED 8
+.Os
+.
+.Sh NAME
+.Nm ZED
+.Nd ZFS Event Daemon
+.Sh SYNOPSIS
+.Nm
+.Op Fl fFhILMvVZ
+.Op Fl d Ar zedletdir
+.Op Fl p Ar pidfile
+.Op Fl P Ar path
+.Op Fl s Ar statefile
+.Op Fl j Ar jobs
+.
+.Sh DESCRIPTION
+The
+.Nm
+(ZFS Event Daemon) monitors events generated by the ZFS kernel
+module.
+When a zevent (ZFS Event) is posted, the
+.Nm
+will run any ZEDLETs (ZFS Event Daemon Linkage for Executable Tasks)
+that have been enabled for the corresponding zevent class.
+.
+.Sh OPTIONS
+.Bl -tag -width "-h"
+.It Fl h
 Display a summary of the command-line options.
-.TP
-.BI \-L
+.It Fl L
 Display license information.
-.TP
-.BI \-V
+.It Fl V
 Display version information.
-.TP
-.BI \-v
+.It Fl v
 Be verbose.
-.TP
-.BI \-f
+.It Fl f
 Force the daemon to run if at all possible, disabling security checks and
-throwing caution to the wind.  Not recommended for use in production.
-.TP
-.BI \-F
-Run the daemon in the foreground.
-.TP
-.BI \-M
+throwing caution to the wind.
+Not recommended for use in production.
+.It Fl F
+Don't daemonise: remain attached to the controlling terminal,
+log to the standard I/O streams.
+.It Fl M
 Lock all current and future pages in the virtual memory address space.
 This may help the daemon remain responsive when the system is under heavy
 memory pressure.
-.TP
-.BI \-Z
+.It Fl I
+Request that the daemon idle rather than exit when the kernel modules are not loaded.
+Processing of events will start, or resume, when the kernel modules are (re)loaded.
+Under Linux the kernel modules cannot be unloaded while the daemon is running.
+.It Fl Z
 Zero the daemon's state, thereby allowing zevents still within the kernel
 to be reprocessed.
-.\" .TP
-.\" .BI \-c\  configfile
-.\" Read the configuration from the specified file.
-.TP
-.BI \-d\  zedletdir
+.It Fl d Ar zedletdir
 Read the enabled ZEDLETs from the specified directory.
-.TP
-.BI \-p\  pidfile
+.It Fl p Ar pidfile
 Write the daemon's process ID to the specified file.
-.TP
-.BI \-P\  path
-Custom $PATH for zedlets to use.  Normally zedlets run in a locked-down
-environment, with hardcoded paths to the ZFS commands ($ZFS, $ZPOOL, $ZED, ...),
-and a hardcoded $PATH.  This is done for security reasons.  However, the
-ZFS test suite uses a custom PATH for its ZFS commands, and passes it to zed
-with -P.  In short, -P is only to be used by the ZFS test suite; never use
+.It Fl P Ar path
+Custom
+.Ev $PATH
+for zedlets to use.
+Normally zedlets run in a locked-down environment, with hardcoded paths to the ZFS commands
+.Pq Ev $ZFS , $ZPOOL , $ZED , ... ,
+and a hard-coded
+.Ev $PATH .
+This is done for security reasons.
+However, the ZFS test suite uses a custom PATH for its ZFS commands, and passes it to
+.Nm
+with
+.Fl P .
+In short,
+.Fl P
+is only to be used by the ZFS test suite; never use
 it in production!
-.TP
-.BI \-s\  statefile
+.It Fl s Ar statefile
 Write the daemon's state to the specified file.
-.SH ZEVENTS
-.PP
-A zevent is comprised of a list of nvpairs (name/value pairs).  Each zevent
-contains an EID (Event IDentifier) that uniquely identifies it throughout
+.It Fl j Ar jobs
+Allow at most
+.Ar jobs
+ZEDLETs to run concurrently,
+delaying execution of new ones until they finish.
+Defaults to
+.Sy 16 .
+.El
+.Sh ZEVENTS
+A zevent is comprised of a list of nvpairs (name/value pairs).
+Each zevent contains an EID (Event IDentifier) that uniquely identifies it throughout
 the lifetime of the loaded ZFS kernel module; this EID is a monotonically
 increasing integer that resets to 1 each time the kernel module is loaded.
 Each zevent also contains a class string that identifies the type of event.
 For brevity, a subclass string is defined that omits the leading components
-of the class string.  Additional nvpairs exist to provide event details.
-.PP
+of the class string.
+Additional nvpairs exist to provide event details.
+.Pp
 The kernel maintains a list of recent zevents that can be viewed (along with
-their associated lists of nvpairs) using the "\fBzpool events \-v\fR" command.
-
-.SH CONFIGURATION
-.PP
+their associated lists of nvpairs) using the
+.Nm zpool Cm events Fl v
+command.
+.
+.Sh CONFIGURATION
 ZEDLETs to be invoked in response to zevents are located in the
-\fIenabled-zedlets\fR directory.  These can be symlinked or copied from the
-\fIinstalled-zedlets\fR directory; symlinks allow for automatic updates
+.Em enabled-zedlets
+directory
+.Pq Ar zedletdir .
+These can be symlinked or copied from the
+.Em installed-zedlets
+directory; symlinks allow for automatic updates
 from the installed ZEDLETs, whereas copies preserve local modifications.
-As a security measure, ZEDLETs must be owned by root.  They must have
-execute permissions for the user, but they must not have write permissions
-for group or other.  Dotfiles are ignored.
-.PP
+As a security measure, since ownership change is a privileged operation,
+ZEDLETs must be owned by root.
+They must have execute permissions for the user,
+but they must not have write permissions for group or other.
+Dotfiles are ignored.
+.Pp
 ZEDLETs are named after the zevent class for which they should be invoked.
 In particular, a ZEDLET will be invoked for a given zevent if either its
 class or subclass string is a prefix of its filename (and is followed by
-a non-alphabetic character).  As a special case, the prefix "all" matches
-all zevents.  Multiple ZEDLETs may be invoked for a given zevent.
-
-.SH ZEDLETS
-.PP
+a non-alphabetic character).
+As a special case, the prefix
+.Sy all
+matches all zevents.
+Multiple ZEDLETs may be invoked for a given zevent.
+.
+.Sh ZEDLETS
 ZEDLETs are executables invoked by the ZED in response to a given zevent.
 They should be written under the presumption they can be invoked concurrently,
 and they should use appropriate locking to access any shared resources.
 Common variables used by ZEDLETs can be stored in the default rc file which
-is sourced by scripts; these variables should be prefixed with "ZED_".
-.PP
+is sourced by scripts; these variables should be prefixed with
+.Sy ZED_ .
+.Pp
 The zevent nvpairs are passed to ZEDLETs as environment variables.
 Each nvpair name is converted to an environment variable in the following
-manner: 1) it is prefixed with "ZEVENT_", 2) it is converted to uppercase,
-and 3) each non-alphanumeric character is converted to an underscore.
+manner:
+.Bl -enum -compact
+.It
+it is prefixed with
+.Sy ZEVENT_ ,
+.It
+it is converted to uppercase, and
+.It
+each non-alphanumeric character is converted to an underscore.
+.El
+.Pp
 Some additional environment variables have been defined to present certain
-nvpair values in a more convenient form.  An incomplete list of zevent
-environment variables is as follows:
-.TP
-.B
-ZEVENT_EID
+nvpair values in a more convenient form.
+An incomplete list of zevent environment variables is as follows:
+.Bl -tag -compact -width "ZEVENT_TIME_STRING"
+.It Sy ZEVENT_EID
 The Event IDentifier.
-.TP
-.B
-ZEVENT_CLASS
+.It Sy ZEVENT_CLASS
 The zevent class string.
-.TP
-.B
-ZEVENT_SUBCLASS
+.It Sy ZEVENT_SUBCLASS
 The zevent subclass string.
-.TP
-.B
-ZEVENT_TIME
+.It Sy ZEVENT_TIME
 The time at which the zevent was posted as
-"\fIseconds\fR\ \fInanoseconds\fR" since the Epoch.
-.TP
-.B
-ZEVENT_TIME_SECS
-The \fIseconds\fR component of ZEVENT_TIME.
-.TP
-.B
-ZEVENT_TIME_NSECS
-The \fInanoseconds\fR component of ZEVENT_TIME.
-.TP
-.B
-ZEVENT_TIME_STRING
-An almost-RFC3339-compliant string for ZEVENT_TIME.
-.PP
+.Dq Em seconds nanoseconds
+since the Epoch.
+.It Sy ZEVENT_TIME_SECS
+The
+.Em seconds
+component of
+.Sy ZEVENT_TIME .
+.It Sy ZEVENT_TIME_NSECS
+The
+.Em nanoseconds
+component of
+.Sy ZEVENT_TIME .
+.It Sy ZEVENT_TIME_STRING
+An almost-RFC3339-compliant string for
+.Sy ZEVENT_TIME .
+.El
+.Pp
 Additionally, the following ZED & ZFS variables are defined:
-.TP
-.B
-ZED_PID
+.Bl -tag -compact -width "ZEVENT_TIME_STRING"
+.It Sy ZED_PID
 The daemon's process ID.
-.TP
-.B
-ZED_ZEDLET_DIR
-The daemon's current \fIenabled-zedlets\fR directory.
-.TP
-.B
-ZFS_ALIAS
-The ZFS alias (\fIname-version-release\fR) string used to build the daemon.
-.TP
-.B
-ZFS_VERSION
-The ZFS version used to build the daemon.
-.TP
-.B
-ZFS_RELEASE
-The ZFS release used to build the daemon.
-.PP
-ZEDLETs may need to call other ZFS commands.  The installation paths of
-the following executables are defined: \fBZDB\fR, \fBZED\fR, \fBZFS\fR,
-\fBZINJECT\fR, and \fBZPOOL\fR.  These variables can be overridden in the
-rc file if needed.
-
-.SH FILES
-.\" .TP
-.\" @sysconfdir@/zfs/zed.conf
-.\" The default configuration file for the daemon.
-.TP
-.I @sysconfdir@/zfs/zed.d
+.It Sy ZED_ZEDLET_DIR
+The daemon's current
+.Em enabled-zedlets
+directory.
+.It Sy ZFS_ALIAS
+The alias
+.Pq Dq Em name Ns - Ns Em version Ns - Ns Em release
+string of the ZFS distribution the daemon is part of.
+.It Sy ZFS_VERSION
+The ZFS version the daemon is part of.
+.It Sy ZFS_RELEASE
+The ZFS release the daemon is part of.
+.El
+.Pp
+ZEDLETs may need to call other ZFS commands.
+The installation paths of the following executables are defined as environment variables:
+.Sy ZDB ,
+.Sy ZED ,
+.Sy ZFS ,
+.Sy ZINJECT ,
+and
+.Sy ZPOOL .
+These variables may be overridden in the rc file.
+.
+.Sh FILES
+.Bl -tag -width "-c"
+.It Pa @sysconfdir@/zfs/zed.d
 The default directory for enabled ZEDLETs.
-.TP
-.I @sysconfdir@/zfs/zed.d/zed.rc
+.It Pa @sysconfdir@/zfs/zed.d/zed.rc
 The default rc file for common variables used by ZEDLETs.
-.TP
-.I @zfsexecdir@/zed.d
+.It Pa @zfsexecdir@/zed.d
 The default directory for installed ZEDLETs.
-.TP
-.I @runstatedir@/zed.pid
+.It Pa @runstatedir@/zed.pid
 The default file containing the daemon's process ID.
-.TP
-.I @runstatedir@/zed.state
+.It Pa @runstatedir@/zed.state
 The default file containing the daemon's state.
-
-.SH SIGNALS
-.TP
-.B HUP
+.El
+.
+.Sh SIGNALS
+.Bl -tag -width "-c"
+.It Sy SIGHUP
 Reconfigure the daemon and rescan the directory for enabled ZEDLETs.
-.TP
-.B TERM
+.It Sy SIGTERM , SIGINT
 Terminate the daemon.
-
-.SH NOTES
-.PP
-\fBZED\fR requires root privileges.
-.\" Do not taunt zed.
-
-.SH BUGS
-.PP
-Events are processed synchronously by a single thread.  This can delay the
-processing of simultaneous zevents.
-.PP
-There is no maximum timeout for ZEDLET execution.  Consequently, a misbehaving
-ZEDLET can delay the processing of subsequent zevents.
-.PP
-The ownership and permissions of the \fIenabled-zedlets\fR directory (along
-with all parent directories) are not checked.  If any of these directories
-are improperly owned or permissioned, an unprivileged user could insert a
-ZEDLET to be executed as root.  The requirement that ZEDLETs be owned by
-root mitigates this to some extent.
-.PP
+.El
+.
+.Sh SEE ALSO
+.Xr zfs 8 ,
+.Xr zpool 8 ,
+.Xr zpool-events 8
+.
+.Sh NOTES
+The
+.Nm
+requires root privileges.
+.Pp
+Do not taunt the
+.Nm .
+.
+.Sh BUGS
 ZEDLETs are unable to return state/status information to the kernel.
-.PP
-Some zevent nvpair types are not handled.  These are denoted by zevent
-environment variables having a "_NOT_IMPLEMENTED_" value.
-.PP
+.Pp
 Internationalization support via gettext has not been added.
-.PP
-The configuration file is not yet implemented.
-.PP
-The diagnosis engine is not yet implemented.
-
-.SH LICENSE
-.PP
-\fBZED\fR (ZFS Event Daemon) is distributed under the terms of the
-Common Development and Distribution License Version 1.0 (CDDL\-1.0).
-.PP
-Developed at Lawrence Livermore National Laboratory (LLNL\-CODE\-403049).
-
-.SH SEE ALSO
-.BR zfs (8),
-.BR zpool (8)

diff --git a/zfs/man/man8/zfs-allow.8 b/zfs/man/man8/zfs-allow.8
new file mode 100644
index 0000000..bbd62ed
--- /dev/null
+++ b/zfs/man/man8/zfs-allow.8

@@ -0,0 +1,386 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-ALLOW 8
+.Os
+.
+.Sh NAME
+.Nm zfs-allow
+.Nd delegate ZFS administration permissions to unprivileged users
+.Sh SYNOPSIS
+.Nm zfs
+.Cm allow
+.Op Fl dglu
+.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns …
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm allow
+.Op Fl dl
+.Fl e Ns | Ns Sy everyone
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm allow
+.Fl c
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm allow
+.Fl s No @ Ns Ar setname
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm unallow
+.Op Fl dglru
+.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns …
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm unallow
+.Op Fl dlr
+.Fl e Ns | Ns Sy everyone
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm unallow
+.Op Fl r
+.Fl c
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm unallow
+.Op Fl r
+.Fl s No @ Ns Ar setname
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm allow
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Displays permissions that have been delegated on the specified filesystem or
+volume.
+See the other forms of
+.Nm zfs Cm allow
+for more information.
+.Pp
+Delegations are supported under Linux with the exception of
+.Sy mount ,
+.Sy unmount ,
+.Sy mountpoint ,
+.Sy canmount ,
+.Sy rename ,
+and
+.Sy share .
+These permissions cannot be delegated because the Linux
+.Xr mount 8
+command restricts modifications of the global namespace to the root user.
+.It Xo
+.Nm zfs
+.Cm allow
+.Op Fl dglu
+.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns …
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+.It Xo
+.Nm zfs
+.Cm allow
+.Op Fl dl
+.Fl e Ns | Ns Sy everyone
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Delegates ZFS administration permission for the file systems to non-privileged
+users.
+.Bl -tag -width "-d"
+.It Fl d
+Allow only for the descendent file systems.
+.It Fl e Ns | Ns Sy everyone
+Specifies that the permissions be delegated to everyone.
+.It Fl g Ar group Ns Oo , Ns Ar group Oc Ns …
+Explicitly specify that permissions are delegated to the group.
+.It Fl l
+Allow
+.Qq locally
+only for the specified file system.
+.It Fl u Ar user Ns Oo , Ns Ar user Oc Ns …
+Explicitly specify that permissions are delegated to the user.
+.It Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns …
+Specifies to whom the permissions are delegated.
+Multiple entities can be specified as a comma-separated list.
+If neither of the
+.Fl gu
+options are specified, then the argument is interpreted preferentially as the
+keyword
+.Sy everyone ,
+then as a user name, and lastly as a group name.
+To specify a user or group named
+.Qq everyone ,
+use the
+.Fl g
+or
+.Fl u
+options.
+To specify a group with the same name as a user, use the
+.Fl g
+options.
+.It Xo
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Xc
+The permissions to delegate.
+Multiple permissions may be specified as a comma-separated list.
+Permission names are the same as ZFS subcommand and property names.
+See the property list below.
+Property set names, which begin with
+.Sy @ ,
+may be specified.
+See the
+.Fl s
+form below for details.
+.El
+.Pp
+If neither of the
+.Fl dl
+options are specified, or both are, then the permissions are allowed for the
+file system or volume, and all of its descendents.
+.Pp
+Permissions are generally the ability to use a ZFS subcommand or change a ZFS
+property.
+The following permissions are available:
+.TS
+l l l .
+NAME	TYPE	NOTES
+_	_	_
+allow	subcommand	Must also have the permission that is being allowed
+bookmark	subcommand
+clone	subcommand	Must also have the \fBcreate\fR ability and \fBmount\fR ability in the origin file system
+create	subcommand	Must also have the \fBmount\fR ability. Must also have the \fBrefreservation\fR ability to create a non-sparse volume.
+destroy	subcommand	Must also have the \fBmount\fR ability
+diff	subcommand	Allows lookup of paths within a dataset given an object number, and the ability to create snapshots necessary to \fBzfs diff\fR.
+hold	subcommand	Allows adding a user hold to a snapshot
+load-key	subcommand	Allows loading and unloading of encryption key (see \fBzfs load-key\fR and \fBzfs unload-key\fR).
+change-key	subcommand	Allows changing an encryption key via \fBzfs change-key\fR.
+mount	subcommand	Allows mounting/umounting ZFS datasets
+promote	subcommand	Must also have the \fBmount\fR and \fBpromote\fR ability in the origin file system
+receive	subcommand	Must also have the \fBmount\fR and \fBcreate\fR ability
+release	subcommand	Allows releasing a user hold which might destroy the snapshot
+rename	subcommand	Must also have the \fBmount\fR and \fBcreate\fR ability in the new parent
+rollback	subcommand	Must also have the \fBmount\fR ability
+send	subcommand
+share	subcommand	Allows sharing file systems over NFS or SMB protocols
+snapshot	subcommand	Must also have the \fBmount\fR ability
+
+groupquota	other	Allows accessing any \fBgroupquota@\fI...\fR property
+groupobjquota	other	Allows accessing any \fBgroupobjquota@\fI...\fR property
+groupused	other	Allows reading any \fBgroupused@\fI...\fR property
+groupobjused	other	Allows reading any \fBgroupobjused@\fI...\fR property
+userprop	other	Allows changing any user property
+userquota	other	Allows accessing any \fBuserquota@\fI...\fR property
+userobjquota	other	Allows accessing any \fBuserobjquota@\fI...\fR property
+userused	other	Allows reading any \fBuserused@\fI...\fR property
+userobjused	other	Allows reading any \fBuserobjused@\fI...\fR property
+projectobjquota	other	Allows accessing any \fBprojectobjquota@\fI...\fR property
+projectquota	other	Allows accessing any \fBprojectquota@\fI...\fR property
+projectobjused	other	Allows reading any \fBprojectobjused@\fI...\fR property
+projectused	other	Allows reading any \fBprojectused@\fI...\fR property
+
+aclinherit	property
+aclmode	property
+acltype	property
+atime	property
+canmount	property
+casesensitivity	property
+checksum	property
+compression	property
+context	property
+copies	property
+dedup	property
+defcontext	property
+devices	property
+dnodesize	property
+encryption	property
+exec	property
+filesystem_limit	property
+fscontext	property
+keyformat	property
+keylocation	property
+logbias	property
+mlslabel	property
+mountpoint	property
+nbmand	property
+normalization	property
+overlay	property
+pbkdf2iters	property
+primarycache	property
+quota	property
+readonly	property
+recordsize	property
+redundant_metadata	property
+refquota	property
+refreservation	property
+relatime	property
+reservation	property
+rootcontext	property
+secondarycache	property
+setuid	property
+sharenfs	property
+sharesmb	property
+snapdev	property
+snapdir	property
+snapshot_limit	property
+special_small_blocks	property
+sync	property
+utf8only	property
+version	property
+volblocksize	property
+volmode	property
+volsize	property
+vscan	property
+xattr	property
+zoned	property
+.TE
+.It Xo
+.Nm zfs
+.Cm allow
+.Fl c
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Sets
+.Qq create time
+permissions.
+These permissions are granted
+.Pq locally
+to the creator of any newly-created descendent file system.
+.It Xo
+.Nm zfs
+.Cm allow
+.Fl s No @ Ns Ar setname
+.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns …
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Defines or adds permissions to a permission set.
+The set can be used by other
+.Nm zfs Cm allow
+commands for the specified file system and its descendents.
+Sets are evaluated dynamically, so changes to a set are immediately reflected.
+Permission sets follow the same naming restrictions as ZFS file systems, but the
+name must begin with
+.Sy @ ,
+and can be no more than 64 characters long.
+.It Xo
+.Nm zfs
+.Cm unallow
+.Op Fl dglru
+.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns …
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+.It Xo
+.Nm zfs
+.Cm unallow
+.Op Fl dlr
+.Fl e Ns | Ns Sy everyone
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+.It Xo
+.Nm zfs
+.Cm unallow
+.Op Fl r
+.Fl c
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Removes permissions that were granted with the
+.Nm zfs Cm allow
+command.
+No permissions are explicitly denied, so other permissions granted are still in
+effect.
+For example, if the permission is granted by an ancestor.
+If no permissions are specified, then all permissions for the specified
+.Ar user ,
+.Ar group ,
+or
+.Sy everyone
+are removed.
+Specifying
+.Sy everyone
+.Po or using the
+.Fl e
+option
+.Pc
+only removes the permissions that were granted to everyone, not all permissions
+for every user and group.
+See the
+.Nm zfs Cm allow
+command for a description of the
+.Fl ldugec
+options.
+.Bl -tag -width "-r"
+.It Fl r
+Recursively remove the permissions from this file system and all descendents.
+.El
+.It Xo
+.Nm zfs
+.Cm unallow
+.Op Fl r
+.Fl s No @ Ns Ar setname
+.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
+.Ar setname Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Removes permissions from a permission set.
+If no permissions are specified, then all permissions are removed, thus removing
+the set entirely.
+.El

diff --git a/zfs/man/man8/zfs-bookmark.8 b/zfs/man/man8/zfs-bookmark.8
new file mode 100644
index 0000000..094a7b3
--- /dev/null
+++ b/zfs/man/man8/zfs-bookmark.8

@@ -0,0 +1,67 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\" Copyright (c) 2019, 2020 by Christian Schwarz. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-BOOKMARK 8
+.Os
+.
+.Sh NAME
+.Nm zfs-bookmark
+.Nd create bookmark of ZFS snapshot
+.Sh SYNOPSIS
+.Nm zfs
+.Cm bookmark
+.Ar snapshot Ns | Ns Ar bookmark
+.Ar newbookmark
+.
+.Sh DESCRIPTION
+Creates a new bookmark of the given snapshot or bookmark.
+Bookmarks mark the point in time when the snapshot was created, and can be used
+as the incremental source for a
+.Nm zfs Cm send .
+.Pp
+When creating a bookmark from an existing redaction bookmark, the resulting
+bookmark is
+.Em not
+a redaction bookmark.
+.Pp
+This feature must be enabled to be used.
+See
+.Xr zpool-features 7
+for details on ZFS feature flags and the
+.Sy bookmarks
+feature.
+.
+.Sh SEE ALSO
+.Xr zfs-destroy 8 ,
+.Xr zfs-send 8 ,
+.Xr zfs-snapshot 8

diff --git a/zfs/man/man8/zfs-change-key.8 b/zfs/man/man8/zfs-change-key.8
new file mode 120000
index 0000000..d027a41
--- /dev/null
+++ b/zfs/man/man8/zfs-change-key.8

@@ -0,0 +1 @@
+zfs-load-key.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-clone.8 b/zfs/man/man8/zfs-clone.8
new file mode 100644
index 0000000..0640244
--- /dev/null
+++ b/zfs/man/man8/zfs-clone.8

@@ -0,0 +1,70 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-CLONE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-clone
+.Nd clone snapshot of ZFS dataset
+.Sh SYNOPSIS
+.Nm zfs
+.Cm clone
+.Op Fl p
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Ar snapshot Ar filesystem Ns | Ns Ar volume
+.
+.Sh DESCRIPTION
+See the
+.Sx Clones
+section of
+.Xr zfsconcepts 7
+for details.
+The target dataset can be located anywhere in the ZFS hierarchy,
+and is created as the same type as the original.
+.Bl -tag -width Ds
+.It Fl o Ar property Ns = Ns Ar value
+Sets the specified property; see
+.Nm zfs Cm create
+for details.
+.It Fl p
+Creates all the non-existing parent datasets.
+Datasets created in this manner are automatically mounted according to the
+.Sy mountpoint
+property inherited from their parent.
+If the target filesystem or volume already exists, the operation completes
+successfully.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-promote 8 ,
+.Xr zfs-snapshot 8

diff --git a/zfs/man/man8/zfs-create.8 b/zfs/man/man8/zfs-create.8
new file mode 100644
index 0000000..55397fa
--- /dev/null
+++ b/zfs/man/man8/zfs-create.8

@@ -0,0 +1,249 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd December 1, 2020
+.Dt ZFS-CREATE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-create
+.Nd create ZFS dataset
+.Sh SYNOPSIS
+.Nm zfs
+.Cm create
+.Op Fl Pnpuv
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Ar filesystem
+.Nm zfs
+.Cm create
+.Op Fl ps
+.Op Fl b Ar blocksize
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Fl V Ar size Ar volume
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm create
+.Op Fl Pnpuv
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Ar filesystem
+.Xc
+Creates a new ZFS file system.
+The file system is automatically mounted according to the
+.Sy mountpoint
+property inherited from the parent, unless the
+.Fl u
+option is used.
+.Bl -tag -width "-o"
+.It Fl o Ar property Ns = Ns Ar value
+Sets the specified property as if the command
+.Nm zfs Cm set Ar property Ns = Ns Ar value
+was invoked at the same time the dataset was created.
+Any editable ZFS property can also be set at creation time.
+Multiple
+.Fl o
+options can be specified.
+An error results if the same property is specified in multiple
+.Fl o
+options.
+.It Fl p
+Creates all the non-existing parent datasets.
+Datasets created in this manner are automatically mounted according to the
+.Sy mountpoint
+property inherited from their parent.
+Any property specified on the command line using the
+.Fl o
+option is ignored.
+If the target filesystem already exists, the operation completes successfully.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+creation.
+No datasets will be created.
+This is useful in conjunction with the
+.Fl v
+or
+.Fl P
+flags to validate properties that are passed via
+.Fl o
+options and those implied by other options.
+The actual dataset creation can still fail due to insufficient privileges or
+available capacity.
+.It Fl P
+Print machine-parsable verbose information about the created dataset.
+Each line of output contains a key and one or two values, all separated by tabs.
+The
+.Sy create_ancestors
+and
+.Sy create
+keys have
+.Em filesystem
+as their only value.
+The
+.Sy create_ancestors
+key only appears if the
+.Fl p
+option is used.
+The
+.Sy property
+key has two values, a property name that property's value.
+The
+.Sy property
+key may appear zero or more times, once for each property that will be set local
+to
+.Em filesystem
+due to the use of the
+.Fl o
+option.
+.It Fl u
+Do not mount the newly created file system.
+.It Fl v
+Print verbose information about the created dataset.
+.El
+.It Xo
+.Nm zfs
+.Cm create
+.Op Fl ps
+.Op Fl b Ar blocksize
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Fl V Ar size Ar volume
+.Xc
+Creates a volume of the given size.
+The volume is exported as a block device in
+.Pa /dev/zvol/path ,
+where
+.Em path
+is the name of the volume in the ZFS namespace.
+The size represents the logical size as exported by the device.
+By default, a reservation of equal size is created.
+.Pp
+.Ar size
+is automatically rounded up to the nearest multiple of the
+.Sy blocksize .
+.Bl -tag -width "-b"
+.It Fl b Ar blocksize
+Equivalent to
+.Fl o Sy volblocksize Ns = Ns Ar blocksize .
+If this option is specified in conjunction with
+.Fl o Sy volblocksize ,
+the resulting behavior is undefined.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the specified property as if the
+.Nm zfs Cm set Ar property Ns = Ns Ar value
+command was invoked at the same time the dataset was created.
+Any editable ZFS property can also be set at creation time.
+Multiple
+.Fl o
+options can be specified.
+An error results if the same property is specified in multiple
+.Fl o
+options.
+.It Fl p
+Creates all the non-existing parent datasets.
+Datasets created in this manner are automatically mounted according to the
+.Sy mountpoint
+property inherited from their parent.
+Any property specified on the command line using the
+.Fl o
+option is ignored.
+If the target filesystem already exists, the operation completes successfully.
+.It Fl s
+Creates a sparse volume with no reservation.
+See
+.Sy volsize
+in the
+.Em Native Properties
+section of
+.Xr zfsprops 7
+for more information about sparse volumes.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+creation.
+No datasets will be created.
+This is useful in conjunction with the
+.Fl v
+or
+.Fl P
+flags to validate properties that are passed via
+.Fl o
+options and those implied by other options.
+The actual dataset creation can still fail due to insufficient privileges or
+available capacity.
+.It Fl P
+Print machine-parsable verbose information about the created dataset.
+Each line of output contains a key and one or two values, all separated by tabs.
+The
+.Sy create_ancestors
+and
+.Sy create
+keys have
+.Em volume
+as their only value.
+The
+.Sy create_ancestors
+key only appears if the
+.Fl p
+option is used.
+The
+.Sy property
+key has two values, a property name that property's value.
+The
+.Sy property
+key may appear zero or more times, once for each property that will be set local
+to
+.Em volume
+due to the use of the
+.Fl b
+or
+.Fl o
+options, as well as
+.Sy refreservation
+if the volume is not sparse.
+.It Fl v
+Print verbose information about the created dataset.
+.El
+.El
+.Ss ZFS Volumes as Swap
+ZFS volumes may be used as swap devices.
+After creating the volume with the
+.Nm zfs Cm create Fl V
+enable the swap area using the
+.Xr swapon 8
+command.
+Swapping to files on ZFS filesystems is not supported.
+.
+.Sh SEE ALSO
+.Xr zfs-destroy 8 ,
+.Xr zfs-list 8 ,
+.Xr zpool-create 8

diff --git a/zfs/man/man8/zfs-destroy.8 b/zfs/man/man8/zfs-destroy.8
new file mode 100644
index 0000000..51d9b7a
--- /dev/null
+++ b/zfs/man/man8/zfs-destroy.8

@@ -0,0 +1,178 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 30, 2019
+.Dt ZFS-DESTROY 8
+.Os
+.
+.Sh NAME
+.Nm zfs-destroy
+.Nd destroy ZFS dataset, snapshots, or bookmark
+.Sh SYNOPSIS
+.Nm zfs
+.Cm destroy
+.Op Fl Rfnprv
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm destroy
+.Op Fl Rdnprv
+.Ar filesystem Ns | Ns Ar volume Ns @ Ns Ar snap Ns
+.Oo % Ns Ar snap Ns Oo , Ns Ar snap Ns Oo % Ns Ar snap Oc Oc Oc Ns …
+.Nm zfs
+.Cm destroy
+.Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm destroy
+.Op Fl Rfnprv
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Destroys the given dataset.
+By default, the command unshares any file systems that are currently shared,
+unmounts any file systems that are currently mounted, and refuses to destroy a
+dataset that has active dependents
+.Pq children or clones .
+.Bl -tag -width "-R"
+.It Fl R
+Recursively destroy all dependents, including cloned file systems outside the
+target hierarchy.
+.It Fl f
+Forcibly unmount file systems.
+This option has no effect on non-file systems or unmounted file systems.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+deletion.
+No data will be deleted.
+This is useful in conjunction with the
+.Fl v
+or
+.Fl p
+flags to determine what data would be deleted.
+.It Fl p
+Print machine-parsable verbose information about the deleted data.
+.It Fl r
+Recursively destroy all children.
+.It Fl v
+Print verbose information about the deleted data.
+.El
+.Pp
+Extreme care should be taken when applying either the
+.Fl r
+or the
+.Fl R
+options, as they can destroy large portions of a pool and cause unexpected
+behavior for mounted file systems in use.
+.It Xo
+.Nm zfs
+.Cm destroy
+.Op Fl Rdnprv
+.Ar filesystem Ns | Ns Ar volume Ns @ Ns Ar snap Ns
+.Oo % Ns Ar snap Ns Oo , Ns Ar snap Ns Oo % Ns Ar snap Oc Oc Oc Ns …
+.Xc
+The given snapshots are destroyed immediately if and only if the
+.Nm zfs Cm destroy
+command without the
+.Fl d
+option would have destroyed it.
+Such immediate destruction would occur, for example, if the snapshot had no
+clones and the user-initiated reference count were zero.
+.Pp
+If a snapshot does not qualify for immediate destruction, it is marked for
+deferred deletion.
+In this state, it exists as a usable, visible snapshot until both of the
+preconditions listed above are met, at which point it is destroyed.
+.Pp
+An inclusive range of snapshots may be specified by separating the first and
+last snapshots with a percent sign.
+The first and/or last snapshots may be left blank, in which case the
+filesystem's oldest or newest snapshot will be implied.
+.Pp
+Multiple snapshots
+.Pq or ranges of snapshots
+of the same filesystem or volume may be specified in a comma-separated list of
+snapshots.
+Only the snapshot's short name
+.Po the part after the
+.Sy @
+.Pc
+should be specified when using a range or comma-separated list to identify
+multiple snapshots.
+.Bl -tag -width "-R"
+.It Fl R
+Recursively destroy all clones of these snapshots, including the clones,
+snapshots, and children.
+If this flag is specified, the
+.Fl d
+flag will have no effect.
+.It Fl d
+Destroy immediately.
+If a snapshot cannot be destroyed now, mark it for deferred destruction.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+deletion.
+No data will be deleted.
+This is useful in conjunction with the
+.Fl p
+or
+.Fl v
+flags to determine what data would be deleted.
+.It Fl p
+Print machine-parsable verbose information about the deleted data.
+.It Fl r
+Destroy
+.Pq or mark for deferred deletion
+all snapshots with this name in descendent file systems.
+.It Fl v
+Print verbose information about the deleted data.
+.Pp
+Extreme care should be taken when applying either the
+.Fl r
+or the
+.Fl R
+options, as they can destroy large portions of a pool and cause unexpected
+behavior for mounted file systems in use.
+.El
+.It Xo
+.Nm zfs
+.Cm destroy
+.Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark
+.Xc
+The given bookmark is destroyed.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-create 8 ,
+.Xr zfs-hold 8

diff --git a/zfs/man/man8/zfs-diff.8 b/zfs/man/man8/zfs-diff.8
new file mode 100644
index 0000000..a347f32
--- /dev/null
+++ b/zfs/man/man8/zfs-diff.8

@@ -0,0 +1,102 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 29, 2021
+.Dt ZFS-DIFF 8
+.Os
+.
+.Sh NAME
+.Nm zfs-diff
+.Nd show difference between ZFS snapshots
+.Sh SYNOPSIS
+.Nm zfs
+.Cm diff
+.Op Fl FHth
+.Ar snapshot Ar snapshot Ns | Ns Ar filesystem
+.
+.Sh DESCRIPTION
+Display the difference between a snapshot of a given filesystem and another
+snapshot of that filesystem from a later time or the current contents of the
+filesystem.
+The first column is a character indicating the type of change, the other columns
+indicate pathname, new pathname
+.Pq in case of rename ,
+change in link count, and optionally file type and/or change time.
+The types of change are:
+.Bl -tag -compact -offset Ds -width "M"
+.It Sy -
+The path has been removed
+.It Sy +
+The path has been created
+.It Sy M
+The path has been modified
+.It Sy R
+The path has been renamed
+.El
+.Bl -tag -width "-F"
+.It Fl F
+Display an indication of the type of file, in a manner similar to the
+.Fl F
+option of
+.Xr ls 1 .
+.Bl -tag -compact -offset 2n -width "B"
+.It Sy B
+Block device
+.It Sy C
+Character device
+.It Sy /
+Directory
+.It Sy >
+Door
+.It Sy |\&
+Named pipe
+.It Sy @
+Symbolic link
+.It Sy P
+Event port
+.It Sy =
+Socket
+.It Sy F
+Regular file
+.El
+.It Fl H
+Give more parsable tab-separated output, without header lines and without
+arrows.
+.It Fl t
+Display the path's inode change time as the first column of output.
+.It Fl h
+Do not
+.Sy \e0 Ns Ar ooo Ns -escape
+non-ASCII paths.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-snapshot 8

diff --git a/zfs/man/man8/zfs-get.8 b/zfs/man/man8/zfs-get.8
new file mode 120000
index 0000000..c70b41a
--- /dev/null
+++ b/zfs/man/man8/zfs-get.8

@@ -0,0 +1 @@
+zfs-set.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-groupspace.8 b/zfs/man/man8/zfs-groupspace.8
new file mode 120000
index 0000000..8bc2f1d
--- /dev/null
+++ b/zfs/man/man8/zfs-groupspace.8

@@ -0,0 +1 @@
+zfs-userspace.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-hold.8 b/zfs/man/man8/zfs-hold.8
new file mode 100644
index 0000000..5e46520
--- /dev/null
+++ b/zfs/man/man8/zfs-hold.8

@@ -0,0 +1,112 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 30, 2019
+.Dt ZFS-HOLD 8
+.Os
+.
+.Sh NAME
+.Nm zfs-hold
+.Nd hold ZFS snapshots to prevent their removal
+.Sh SYNOPSIS
+.Nm zfs
+.Cm hold
+.Op Fl r
+.Ar tag Ar snapshot Ns …
+.Nm zfs
+.Cm holds
+.Op Fl rH
+.Ar snapshot Ns …
+.Nm zfs
+.Cm release
+.Op Fl r
+.Ar tag Ar snapshot Ns …
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm hold
+.Op Fl r
+.Ar tag Ar snapshot Ns …
+.Xc
+Adds a single reference, named with the
+.Ar tag
+argument, to the specified snapshots.
+Each snapshot has its own tag namespace, and tags must be unique within that
+space.
+.Pp
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the
+.Nm zfs Cm destroy
+command return
+.Sy EBUSY .
+.Bl -tag -width "-r"
+.It Fl r
+Specifies that a hold with the given tag is applied recursively to the snapshots
+of all descendent file systems.
+.El
+.It Xo
+.Nm zfs
+.Cm holds
+.Op Fl rH
+.Ar snapshot Ns …
+.Xc
+Lists all existing user references for the given snapshot or snapshots.
+.Bl -tag -width "-r"
+.It Fl r
+Lists the holds that are set on the named descendent snapshots, in addition to
+listing the holds on the named snapshot.
+.It Fl H
+Do not print headers, use tab-delimited output.
+.El
+.It Xo
+.Nm zfs
+.Cm release
+.Op Fl r
+.Ar tag Ar snapshot Ns …
+.Xc
+Removes a single reference, named with the
+.Ar tag
+argument, from the specified snapshot or snapshots.
+The tag must already exist for each snapshot.
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the
+.Nm zfs Cm destroy
+command return
+.Sy EBUSY .
+.Bl -tag -width "-r"
+.It Fl r
+Recursively releases a hold with the given tag on the snapshots of all
+descendent file systems.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-destroy 8

diff --git a/zfs/man/man8/zfs-inherit.8 b/zfs/man/man8/zfs-inherit.8
new file mode 120000
index 0000000..c70b41a
--- /dev/null
+++ b/zfs/man/man8/zfs-inherit.8

@@ -0,0 +1 @@
+zfs-set.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-jail.8 b/zfs/man/man8/zfs-jail.8
new file mode 100644
index 0000000..4f9faae
--- /dev/null
+++ b/zfs/man/man8/zfs-jail.8

@@ -0,0 +1,123 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
+.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
+.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
+.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
+.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-JAIL 8
+.Os
+.
+.Sh NAME
+.Nm zfs-jail
+.Nd attach or detach ZFS filesystem from FreeBSD jail
+.Sh SYNOPSIS
+.Nm zfs Cm jail
+.Ar jailid Ns | Ns Ar jailname
+.Ar filesystem
+.Nm zfs Cm unjail
+.Ar jailid Ns | Ns Ar jailname
+.Ar filesystem
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm jail
+.Ar jailid Ns | Ns Ar jailname
+.Ar filesystem
+.Xc
+Attach the specified
+.Ar filesystem
+to the jail identified by JID
+.Ar jailid
+or name
+.Ar jailname .
+From now on this file system tree can be managed from within a jail if the
+.Sy jailed
+property has been set.
+To use this functionality, the jail needs the
+.Sy allow.mount
+and
+.Sy allow.mount.zfs
+parameters set to
+.Sy 1
+and the
+.Sy enforce_statfs
+parameter set to a value lower than
+.Sy 2 .
+.Pp
+You cannot attach a jailed dataset's children to another jail.
+You can also not attach the root file system
+of the jail or any dataset which needs to be mounted before the zfs rc script
+is run inside the jail, as it would be attached unmounted until it is
+mounted from the rc script inside the jail.
+.Pp
+To allow management of the dataset from within a jail, the
+.Sy jailed
+property has to be set and the jail needs access to the
+.Pa /dev/zfs
+device.
+The
+.Sy quota
+property cannot be changed from within a jail.
+.Pp
+After a dataset is attached to a jail and the
+.Sy jailed
+property is set, a jailed file system cannot be mounted outside the jail,
+since the jail administrator might have set the mount point to an unacceptable value.
+.Pp
+See
+.Xr jail 8
+for more information on managing jails.
+Jails are a
+.Fx
+feature and are not relevant on other platforms.
+.It Xo
+.Nm zfs
+.Cm unjail
+.Ar jailid Ns | Ns Ar jailname
+.Ar filesystem
+.Xc
+Detaches the specified
+.Ar filesystem
+from the jail identified by JID
+.Ar jailid
+or name
+.Ar jailname .
+.El
+.Sh SEE ALSO
+.Xr zfsprops 7 ,
+.Xr jail 8

diff --git a/zfs/man/man8/zfs-list.8 b/zfs/man/man8/zfs-list.8
new file mode 100644
index 0000000..5200483
--- /dev/null
+++ b/zfs/man/man8/zfs-list.8

@@ -0,0 +1,162 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-LIST 8
+.Os
+.
+.Sh NAME
+.Nm zfs-list
+.Nd list properties of ZFS datasets
+.Sh SYNOPSIS
+.Nm zfs
+.Cm list
+.Op Fl r Ns | Ns Fl d Ar depth
+.Op Fl Hp
+.Oo Fl o Ar property Ns Oo , Ns Ar property Oc Ns … Oc
+.Oo Fl s Ar property Oc Ns …
+.Oo Fl S Ar property Oc Ns …
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns …
+.
+.Sh DESCRIPTION
+If specified, you can list property information by the absolute pathname or the
+relative pathname.
+By default, all file systems and volumes are displayed.
+Snapshots are displayed if the
+.Sy listsnapshots
+pool property is
+.Sy on
+.Po the default is
+.Sy off
+.Pc ,
+or if the
+.Fl t Sy snapshot
+or
+.Fl t Sy all
+options are specified.
+The following fields are displayed:
+.Sy name , Sy used , Sy available , Sy referenced , Sy mountpoint .
+.Bl -tag -width "-H"
+.It Fl H
+Used for scripting mode.
+Do not print headers and separate fields by a single tab instead of arbitrary
+white space.
+.It Fl S Ar property
+Same as the
+.Fl s
+option, but sorts by property in descending order.
+.It Fl d Ar depth
+Recursively display any children of the dataset, limiting the recursion to
+.Ar depth .
+A
+.Ar depth
+of
+.Sy 1
+will display only the dataset and its direct children.
+.It Fl o Ar property
+A comma-separated list of properties to display.
+The property must be:
+.Bl -bullet -compact
+.It
+One of the properties described in the
+.Sx Native Properties
+section of
+.Xr zfsprops 7
+.It
+A user property
+.It
+The value
+.Sy name
+to display the dataset name
+.It
+The value
+.Sy space
+to display space usage properties on file systems and volumes.
+This is a shortcut for specifying
+.Fl o Ns \ \& Ns Sy name , Ns Sy avail , Ns Sy used , Ns Sy usedsnap , Ns
+.Sy usedds , Ns Sy usedrefreserv , Ns Sy usedchild
+.Fl t Sy filesystem , Ns Sy volume .
+.El
+.It Fl p
+Display numbers in parsable
+.Pq exact
+values.
+.It Fl r
+Recursively display any children of the dataset on the command line.
+.It Fl s Ar property
+A property for sorting the output by column in ascending order based on the
+value of the property.
+The property must be one of the properties described in the
+.Sx Properties
+section of
+.Xr zfsprops 7
+or the value
+.Sy name
+to sort by the dataset name.
+Multiple properties can be specified at one time using multiple
+.Fl s
+property options.
+Multiple
+.Fl s
+options are evaluated from left to right in decreasing order of importance.
+The following is a list of sorting criteria:
+.Bl -bullet -compact
+.It
+Numeric types sort in numeric order.
+.It
+String types sort in alphabetical order.
+.It
+Types inappropriate for a row sort that row to the literal bottom, regardless of
+the specified ordering.
+.El
+.Pp
+If no sorting options are specified the existing behavior of
+.Nm zfs Cm list
+is preserved.
+.It Fl t Ar type
+A comma-separated list of types to display, where
+.Ar type
+is one of
+.Sy filesystem ,
+.Sy snapshot ,
+.Sy volume ,
+.Sy bookmark ,
+or
+.Sy all .
+For example, specifying
+.Fl t Sy snapshot
+displays only snapshots.
+.El
+.
+.Sh SEE ALSO
+.Xr zfsprops 7 ,
+.Xr zfs-get 8

diff --git a/zfs/man/man8/zfs-load-key.8 b/zfs/man/man8/zfs-load-key.8
new file mode 100644
index 0000000..b12a79e
--- /dev/null
+++ b/zfs/man/man8/zfs-load-key.8

@@ -0,0 +1,301 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd January 13, 2020
+.Dt ZFS-LOAD-KEY 8
+.Os
+.
+.Sh NAME
+.Nm zfs-load-key
+.Nd load, unload, or change encryption key of ZFS dataset
+.Sh SYNOPSIS
+.Nm zfs
+.Cm load-key
+.Op Fl nr
+.Op Fl L Ar keylocation
+.Fl a Ns | Ns Ar filesystem
+.Nm zfs
+.Cm unload-key
+.Op Fl r
+.Fl a Ns | Ns Ar filesystem
+.Nm zfs
+.Cm change-key
+.Op Fl l
+.Op Fl o Ar keylocation Ns = Ns Ar value
+.Op Fl o Ar keyformat Ns = Ns Ar value
+.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
+.Ar filesystem
+.Nm zfs
+.Cm change-key
+.Fl i
+.Op Fl l
+.Ar filesystem
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm load-key
+.Op Fl nr
+.Op Fl L Ar keylocation
+.Fl a Ns | Ns Ar filesystem
+.Xc
+Load the key for
+.Ar filesystem ,
+allowing it and all children that inherit the
+.Sy keylocation
+property to be accessed.
+The key will be expected in the format specified by the
+.Sy keyformat
+and location specified by the
+.Sy keylocation
+property.
+Note that if the
+.Sy keylocation
+is set to
+.Sy prompt
+the terminal will interactively wait for the key to be entered.
+Loading a key will not automatically mount the dataset.
+If that functionality is desired,
+.Nm zfs Cm mount Fl l
+will ask for the key and mount the dataset
+.Po
+see
+.Xr zfs-mount 8
+.Pc .
+Once the key is loaded the
+.Sy keystatus
+property will become
+.Sy available .
+.Bl -tag -width "-r"
+.It Fl r
+Recursively loads the keys for the specified filesystem and all descendent
+encryption roots.
+.It Fl a
+Loads the keys for all encryption roots in all imported pools.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+.Cm load-key .
+This will cause
+.Nm zfs
+to simply check that the provided key is correct.
+This command may be run even if the key is already loaded.
+.It Fl L Ar keylocation
+Use
+.Ar keylocation
+instead of the
+.Sy keylocation
+property.
+This will not change the value of the property on the dataset.
+Note that if used with either
+.Fl r
+or
+.Fl a ,
+.Ar keylocation
+may only be given as
+.Sy prompt .
+.El
+.It Xo
+.Nm zfs
+.Cm unload-key
+.Op Fl r
+.Fl a Ns | Ns Ar filesystem
+.Xc
+Unloads a key from ZFS, removing the ability to access the dataset and all of
+its children that inherit the
+.Sy keylocation
+property.
+This requires that the dataset is not currently open or mounted.
+Once the key is unloaded the
+.Sy keystatus
+property will become
+.Sy unavailable .
+.Bl -tag -width "-r"
+.It Fl r
+Recursively unloads the keys for the specified filesystem and all descendent
+encryption roots.
+.It Fl a
+Unloads the keys for all encryption roots in all imported pools.
+.El
+.It Xo
+.Nm zfs
+.Cm change-key
+.Op Fl l
+.Op Fl o Ar keylocation Ns = Ns Ar value
+.Op Fl o Ar keyformat Ns = Ns Ar value
+.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
+.Ar filesystem
+.Xc
+.It Xo
+.Nm zfs
+.Cm change-key
+.Fl i
+.Op Fl l
+.Ar filesystem
+.Xc
+Changes the user's key (e.g. a passphrase) used to access a dataset.
+This command requires that the existing key for the dataset is already loaded.
+This command may also be used to change the
+.Sy keylocation ,
+.Sy keyformat ,
+and
+.Sy pbkdf2iters
+properties as needed.
+If the dataset was not previously an encryption root it will become one.
+Alternatively, the
+.Fl i
+flag may be provided to cause an encryption root to inherit the parent's key
+instead.
+.Pp
+If the user's key is compromised,
+.Nm zfs Cm change-key
+does not necessarily protect existing or newly-written data from attack.
+Newly-written data will continue to be encrypted with the same master key as
+the existing data.
+The master key is compromised if an attacker obtains a
+user key and the corresponding wrapped master key.
+Currently,
+.Nm zfs Cm change-key
+does not overwrite the previous wrapped master key on disk, so it is
+accessible via forensic analysis for an indeterminate length of time.
+.Pp
+In the event of a master key compromise, ideally the drives should be securely
+erased to remove all the old data (which is readable using the compromised
+master key), a new pool created, and the data copied back.
+This can be approximated in place by creating new datasets, copying the data
+.Pq e.g. using Nm zfs Cm send | Nm zfs Cm recv ,
+and then clearing the free space with
+.Nm zpool Cm trim Fl -secure
+if supported by your hardware, otherwise
+.Nm zpool Cm initialize .
+.Bl -tag -width "-r"
+.It Fl l
+Ensures the key is loaded before attempting to change the key.
+This is effectively equivalent to running
+.Nm zfs Cm load-key Ar filesystem ; Nm zfs Cm change-key Ar filesystem
+.It Fl o Ar property Ns = Ns Ar value
+Allows the user to set encryption key properties
+.Pq Sy keyformat , keylocation , No and Sy pbkdf2iters
+while changing the key.
+This is the only way to alter
+.Sy keyformat
+and
+.Sy pbkdf2iters
+after the dataset has been created.
+.It Fl i
+Indicates that zfs should make
+.Ar filesystem
+inherit the key of its parent.
+Note that this command can only be run on an encryption root
+that has an encrypted parent.
+.El
+.El
+.Ss Encryption
+Enabling the
+.Sy encryption
+feature allows for the creation of encrypted filesystems and volumes.
+ZFS will encrypt file and volume data, file attributes, ACLs, permission bits,
+directory listings, FUID mappings, and
+.Sy userused Ns / Ns Sy groupused
+data.
+ZFS will not encrypt metadata related to the pool structure, including
+dataset and snapshot names, dataset hierarchy, properties, file size, file
+holes, and deduplication tables (though the deduplicated data itself is
+encrypted).
+.Pp
+Key rotation is managed by ZFS.
+Changing the user's key (e.g. a passphrase)
+does not require re-encrypting the entire dataset.
+Datasets can be scrubbed,
+resilvered, renamed, and deleted without the encryption keys being loaded (see the
+.Cm load-key
+subcommand for more info on key loading).
+.Pp
+Creating an encrypted dataset requires specifying the
+.Sy encryption No and Sy keyformat
+properties at creation time, along with an optional
+.Sy keylocation No and Sy pbkdf2iters .
+After entering an encryption key, the
+created dataset will become an encryption root.
+Any descendant datasets will
+inherit their encryption key from the encryption root by default, meaning that
+loading, unloading, or changing the key for the encryption root will implicitly
+do the same for all inheriting datasets.
+If this inheritance is not desired, simply supply a
+.Sy keyformat
+when creating the child dataset or use
+.Nm zfs Cm change-key
+to break an existing relationship, creating a new encryption root on the child.
+Note that the child's
+.Sy keyformat
+may match that of the parent while still creating a new encryption root, and
+that changing the
+.Sy encryption
+property alone does not create a new encryption root; this would simply use a
+different cipher suite with the same key as its encryption root.
+The one exception is that clones will always use their origin's encryption key.
+As a result of this exception, some encryption-related properties
+.Pq namely Sy keystatus , keyformat ,  keylocation , No and Sy pbkdf2iters
+do not inherit like other ZFS properties and instead use the value determined
+by their encryption root.
+Encryption root inheritance can be tracked via the read-only
+.Sy encryptionroot
+property.
+.Pp
+Encryption changes the behavior of a few ZFS
+operations.
+Encryption is applied after compression so compression ratios are preserved.
+Normally checksums in ZFS are 256 bits long, but for encrypted data
+the checksum is 128 bits of the user-chosen checksum and 128 bits of MAC from
+the encryption suite, which provides additional protection against maliciously
+altered data.
+Deduplication is still possible with encryption enabled but for security,
+datasets will only deduplicate against themselves, their snapshots,
+and their clones.
+.Pp
+There are a few limitations on encrypted datasets.
+Encrypted data cannot be embedded via the
+.Sy embedded_data
+feature.
+Encrypted datasets may not have
+.Sy copies Ns = Ns Em 3
+since the implementation stores some encryption metadata where the third copy
+would normally be.
+Since compression is applied before encryption, datasets may
+be vulnerable to a CRIME-like attack if applications accessing the data allow for it.
+Deduplication with encryption will leak information about which blocks
+are equivalent in a dataset and will incur an extra CPU cost for each block written.
+.
+.Sh SEE ALSO
+.Xr zfsprops 7 ,
+.Xr zfs-create 8 ,
+.Xr zfs-set 8

diff --git a/zfs/man/man8/zfs-mount-generator.8.in b/zfs/man/man8/zfs-mount-generator.8.in
index 41a2999..ae89370 100644
--- a/zfs/man/man8/zfs-mount-generator.8.in
+++ b/zfs/man/man8/zfs-mount-generator.8.in

@@ -21,228 +21,161 @@
 .\" LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 .\" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 .\" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-.TH "ZFS\-MOUNT\-GENERATOR" "8" "2020-01-19" "ZFS" "zfs-mount-generator" "\""
-
-.SH "NAME"
-zfs\-mount\-generator \- generates systemd mount units for ZFS
-.SH SYNOPSIS
-.B @systemdgeneratordir@/zfs\-mount\-generator
-.sp
-.SH DESCRIPTION
-zfs\-mount\-generator implements the \fBGenerators Specification\fP
-of
-.BR systemd (1),
-and is called during early boot to generate
-.BR systemd.mount (5)
-units for automatically mounted datasets. Mount ordering and dependencies
-are created for all tracked pools (see below).
-
-.SS ENCRYPTION KEYS
-If the dataset is an encryption root, a service that loads the associated key (either from file or through a
-.BR systemd\-ask\-password (1)
-prompt) will be created. This service
-. BR RequiresMountsFor
-the path of the key (if file-based) and also copies the mount unit's
-.BR After ,
-.BR Before
-and
-.BR Requires .
-All mount units of encrypted datasets add the key\-load service for their encryption root to their
-.BR Wants
-and
-.BR After .
-The service will not be
-.BR Want ed
-or
-.BR Require d
-by
-.BR local-fs.target
-directly, and so will only be started manually or as a dependency of a started mount unit.
-
-.SS UNIT ORDERING AND DEPENDENCIES
-mount unit's
-.BR Before
-\->
-key\-load service (if any)
-\->
-mount unit
-\->
-mount unit's
-.BR After
-
-It is worth nothing that when a mount unit is activated, it activates all available mount units for parent paths to its mountpoint, i.e. activating the mount unit for /tmp/foo/1/2/3 automatically activates all available mount units for /tmp, /tmp/foo, /tmp/foo/1, and /tmp/foo/1/2. This is true for any combination of mount units from any sources, not just ZFS.
-
-.SS CACHE FILE
-Because ZFS pools may not be available very early in the boot process,
-information on ZFS mountpoints must be stored separately. The output of the command
-.PP
-.RS 4
-zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand,encroot,keylocation,org.openzfs.systemd:requires,org.openzfs.systemd:requires-mounts-for,org.openzfs.systemd:before,org.openzfs.systemd:after,org.openzfs.systemd:wanted-by,org.openzfs.systemd:required-by,org.openzfs.systemd:nofail,org.openzfs.systemd:ignore
-
-.RE
-.PP
-for datasets that should be mounted by systemd, should be kept
-separate from the pool, at
-.PP
-.RS 4
-.RI @sysconfdir@/zfs/zfs-list.cache/ POOLNAME
+.\"
+.Dd May 31, 2021
+.Dt ZFS-MOUNT-GENERATOR 8
+.Os
 .
-.RE
-.PP
-The cache file, if writeable, will be kept synchronized with the pool
-state by the ZEDLET
-.PP
-.RS 4
-history_event-zfs-list-cacher.sh .
-.RE
-.PP
-.sp
-.SS PROPERTIES
-The behavior of the generator script can be influenced by the following dataset properties:
-.sp
-.TP 4
-.BR canmount = on | off | noauto
-If a dataset has
-.BR mountpoint
-set and
-.BR canmount
-is not
-.BR off ,
-a mount unit will be generated.
-Additionally, if
-.BR canmount
-is
-.BR on ,
-.BR local-fs.target
-will gain a dependency on the mount unit.
-
-This behavior is equal to the
-.BR auto
+.Sh NAME
+.Nm zfs-mount-generator
+.Nd generate systemd mount units for ZFS filesystems
+.Sh SYNOPSIS
+.Pa @systemdgeneratordir@/zfs-mount-generator
+.
+.Sh DESCRIPTION
+.Nm
+is a
+.Xr systemd.generator 7
+that generates native
+.Xr systemd.mount 5
+units for configured ZFS datasets.
+.
+.Ss Properties
+.Bl -tag -compact -width "org.openzfs.systemd:required-by=unit[ unit]…"
+.It Sy mountpoint Ns =
+.No Skipped if Sy legacy No or Sy none .
+.
+.It Sy canmount Ns =
+.No Skipped if Sy off .
+.No Skipped if only Sy noauto
+datasets exist for a given mountpoint and there's more than one.
+.No Datasets with Sy yes No take precedence over ones with Sy noauto No for the same mountpoint.
+.No Sets logical Em noauto No flag if Sy noauto .
+Encryption roots always generate
+.Sy zfs-load-key@ Ns Ar root Ns Sy .service ,
+even if
+.Sy off .
+.
+.It Sy atime Ns = , Sy relatime Ns = , Sy devices Ns = , Sy exec Ns = , Sy readonly Ns = , Sy setuid Ns = , Sy nbmand Ns =
+Used to generate mount options equivalent to
+.Nm zfs Cm mount .
+.
+.It Sy encroot Ns = , Sy keylocation Ns =
+If the dataset is an encryption root, its mount unit will bind to
+.Sy zfs-load-key@ Ns Ar root Ns Sy .service ,
+with additional dependencies as follows:
+.Bl -tag -compact -offset Ds -width "keylocation=https://URL (et al.)"
+.It Sy keylocation Ns = Ns Sy prompt
+None, uses
+.Xr systemd-ask-password 1
+.It Sy keylocation Ns = Ns Sy https:// Ns Ar URL Pq et al.\&
+.Sy Wants Ns = , Sy After Ns = : Pa network-online.target
+.It Sy keylocation Ns = Ns Sy file:// Ns < Ns Ar path Ns >
+.Sy RequiresMountsFor Ns = Ns Ar path
+.El
+.
+The service also uses the same
+.Sy Wants Ns = ,
+.Sy After Ns = ,
+.Sy Requires Ns = , No and
+.Sy RequiresMountsFor Ns = ,
+as the mount unit.
+.
+.It Sy org.openzfs.systemd:requires Ns = Ns Pa path Ns Oo " " Ns Pa path Oc Ns …
+.No Sets Sy Requires Ns = for the mount- and key-loading unit.
+.
+.It Sy org.openzfs.systemd:requires-mounts-for Ns = Ns Pa path Ns Oo " " Ns Pa path Oc Ns …
+.No Sets Sy RequiresMountsFor Ns = for the mount- and key-loading unit.
+.
+.It Sy org.openzfs.systemd:before Ns = Ns Pa unit Ns Oo " " Ns Pa unit Oc Ns …
+.No Sets Sy Before Ns = for the mount unit.
+.
+.It Sy org.openzfs.systemd:after Ns = Ns Pa unit Ns Oo " " Ns Pa unit Oc Ns …
+.No Sets Sy After Ns = for the mount unit.
+.
+.It Sy org.openzfs.systemd:wanted-by Ns = Ns Pa unit Ns Oo " " Ns Pa unit Oc Ns …
+.No Sets logical Em noauto No flag (see below).
+.No If not Sy none , No sets Sy WantedBy Ns = for the mount unit.
+.It Sy org.openzfs.systemd:required-by Ns = Ns Pa unit Ns Oo " " Ns Pa unit Oc Ns …
+.No Sets logical Em noauto No flag (see below).
+.No If not Sy none , No sets Sy RequiredBy Ns = for the mount unit.
+.
+.It Sy org.openzfs.systemd:nofail Ns = Ns (unset) Ns | Ns Sy on Ns | Ns Sy off
+Waxes or wanes strength of default reverse dependencies of the mount unit, see below.
+.
+.It Sy org.openzfs.systemd:ignore Ns = Ns Sy on Ns | Ns Sy off
+.No Skip if Sy on .
+.No Defaults to Sy off .
+.El
+.
+.Ss Unit Ordering And Dependencies
+Additionally, unless the pool the dataset resides on
+is imported at generation time, both units gain
+.Sy Wants Ns = Ns Pa zfs-import.target
 and
-.BR noauto
-legacy mount options, see
-.BR systemd.mount (5).
-
-Encryption roots always generate a key-load service, even for
-.BR canmount=off .
-.TP 4
-.BR org.openzfs.systemd:requires\-mounts\-for = \fIpath\fR...
-Space\-separated list of mountpoints to require to be mounted for this mount unit
-.TP 4
-.BR org.openzfs.systemd:before = \fIunit\fR...
-The mount unit and associated key\-load service will be ordered before this space\-separated list of units.
-.TP 4
-.BR org.openzfs.systemd:after = \fIunit\fR...
-The mount unit and associated key\-load service will be ordered after this space\-separated list of units.
-.TP 4
-.BR org.openzfs.systemd:wanted\-by = \fIunit\fR...
-Space-separated list of units that will gain a
-.BR Wants
-dependency on this mount unit.
-Setting this property implies
-.BR noauto .
-.TP 4
-.BR org.openzfs.systemd:required\-by = \fIunit\fR...
-Space-separated list of units that will gain a
-.BR Requires
-dependency on this mount unit.
-Setting this property implies
-.BR noauto .
-.TP 4
-.BR org.openzfs.systemd:nofail = unset | on | off
-Toggles between a
-.BR Wants
-and
-.BR Requires
-type of dependency between the mount unit and
-.BR local-fs.target ,
-if
-.BR noauto
-isn't set or implied.
-
-.BR on :
-Mount will be
-.BR WantedBy
-local-fs.target
-
-.BR off :
-Mount will be
-.BR Before
-and
-.BR RequiredBy
-local-fs.target
-
-.BR unset :
-Mount will be
-.BR Before
-and
-.BR WantedBy
-local-fs.target
-.TP 4
-.BR org.openzfs.systemd:ignore = on | off
-If set to
-.BR on ,
-do not generate a mount unit for this dataset.
-
-.RE
-See also
-.BR systemd.mount (5)
-
-.PP
-.SH EXAMPLE
+.Sy After Ns = Ns Pa zfs-import.target .
+.Pp
+Additionally, unless the logical
+.Em noauto
+flag is set, the mount unit gains a reverse-dependency for
+.Pa local-fs.target
+of strength
+.Bl -tag -compact -offset Ds -width "(unset)"
+.It (unset)
+.Sy WantedBy Ns = No + Sy Before Ns =
+.It Sy on
+.Sy WantedBy Ns =
+.It Sy off
+.Sy RequiredBy Ns = No + Sy Before Ns =
+.El
+.
+.Ss Cache File
+Because ZFS pools may not be available very early in the boot process,
+information on ZFS mountpoints must be stored separately.
+The output of
+.Dl Nm zfs Cm list Fl Ho Ar name , Ns Aq every property above in order
+for datasets that should be mounted by systemd should be kept at
+.Pa @sysconfdir@/zfs/zfs-list.cache/ Ns Ar poolname ,
+and, if writeable, will be kept synchronized for the entire pool by the
+.Pa history_event-zfs-list-cacher.sh
+ZEDLET, if enabled
+.Pq see Xr zed 8 .
+.
+.Sh ENVIRONMENT
+If the
+.Sy ZFS_DEBUG
+environment variable is nonzero
+.Pq or unset and Pa /proc/cmdline No contains Qq Sy debug ,
+print summary accounting information at the end.
+.
+.Sh EXAMPLES
 To begin, enable tracking for the pool:
-.PP
-.RS 4
-touch
-.RI @sysconfdir@/zfs/zfs-list.cache/ POOLNAME
-.RE
-.PP
-Then, enable the tracking ZEDLET:
-.PP
-.RS 4
-ln -s "@zfsexecdir@/zed.d/history_event-zfs-list-cacher.sh" "@sysconfdir@/zfs/zed.d"
-
-systemctl enable zfs-zed.service
-
-systemctl restart zfs-zed.service
-.RE
-.PP
-Force the running of the ZEDLET by setting a monitored property, e.g.
-.BR canmount ,
-for at least one dataset in the pool:
-.PP
-.RS 4
-zfs set canmount=on
-.I DATASET
-.RE
-.PP
-This forces an update to the stale cache file.
-
-To test the generator output, run
-.PP
-.RS 4
-@systemdgeneratordir@/zfs-mount-generator /tmp/zfs-mount-generator . .
-.RE
-.PP
-This will generate units and dependencies in
-.I /tmp/zfs-mount-generator
-for you to inspect them. The second and third argument are ignored.
-
-If you're satisfied with the generated units, instruct systemd to re-run all generators:
-.PP
-.RS 4
-systemctl daemon-reload
-.RE
-.PP
-
-.sp
-.SH SEE ALSO
-.BR zfs (5)
-.BR zfs-events (5)
-.BR zed (8)
-.BR zpool (5)
-.BR systemd (1)
-.BR systemd.target (5)
-.BR systemd.special (7)
-.BR systemd.mount (7)
+.Dl # Nm touch Pa @sysconfdir@/zfs/zfs-list.cache/ Ns Ar poolname
+Then enable the tracking ZEDLET:
+.Dl # Nm ln Fl s Pa @zfsexecdir@/zed.d/history_event-zfs-list-cacher.sh @sysconfdir@/zfs/zed.d
+.Dl # Nm systemctl Cm enable Pa zfs-zed.service
+.Dl # Nm systemctl Cm restart Pa zfs-zed.service
+.Pp
+If no history event is in the queue,
+inject one to ensure the ZEDLET runs to refresh the cache file
+by setting a monitored property somewhere on the pool:
+.Dl # Nm zfs Cm set Sy relatime Ns = Ns Sy off Ar poolname/dset
+.Dl # Nm zfs Cm inherit Sy relatime Ar poolname/dset
+.Pp
+To test the generator output:
+.Dl $ Nm mkdir Pa /tmp/zfs-mount-generator
+.Dl $ Nm @systemdgeneratordir@/zfs-mount-generator Pa /tmp/zfs-mount-generator
+.
+If the generated units are satisfactory, instruct
+.Nm systemd
+to re-run all generators:
+.Dl # Nm systemctl daemon-reload
+.
+.Sh SEE ALSO
+.Xr systemd.mount 5 ,
+.Xr systemd.target 5 ,
+.Xr zfs 5 ,
+.Xr systemd.generator 7 ,
+.Xr systemd.special 7 ,
+.Xr zed 8 ,
+.Xr zpool-events 8

diff --git a/zfs/man/man8/zfs-mount.8 b/zfs/man/man8/zfs-mount.8
new file mode 100644
index 0000000..42ce6b5
--- /dev/null
+++ b/zfs/man/man8/zfs-mount.8

@@ -0,0 +1,130 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd February 16, 2019
+.Dt ZFS-MOUNT 8
+.Os
+.
+.Sh NAME
+.Nm zfs-mount
+.Nd manage mount state of ZFS filesystems
+.Sh SYNOPSIS
+.Nm zfs
+.Cm mount
+.Nm zfs
+.Cm mount
+.Op Fl Oflv
+.Op Fl o Ar options
+.Fl a Ns | Ns Ar filesystem
+.Nm zfs
+.Cm unmount
+.Op Fl fu
+.Fl a Ns | Ns Ar filesystem Ns | Ns Ar mountpoint
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm mount
+.Xc
+Displays all ZFS file systems currently mounted.
+.It Xo
+.Nm zfs
+.Cm mount
+.Op Fl Oflv
+.Op Fl o Ar options
+.Fl a Ns | Ns Ar filesystem
+.Xc
+Mount ZFS filesystem on a path described by its
+.Sy mountpoint
+property, if the path exists and is empty.
+If
+.Sy mountpoint
+is set to
+.Em legacy ,
+the filesystem should be instead mounted using
+.Xr mount 8 .
+.Bl -tag -width "-O"
+.It Fl O
+Perform an overlay mount.
+Allows mounting in non-empty
+.Sy mountpoint .
+See
+.Xr mount 8
+for more information.
+.It Fl a
+Mount all available ZFS file systems.
+Invoked automatically as part of the boot process if configured.
+.It Ar filesystem
+Mount the specified filesystem.
+.It Fl o Ar options
+An optional, comma-separated list of mount options to use temporarily for the
+duration of the mount.
+See the
+.Em Temporary Mount Point Properties
+section of
+.Xr zfsprops 7
+for details.
+.It Fl l
+Load keys for encrypted filesystems as they are being mounted.
+This is equivalent to executing
+.Nm zfs Cm load-key
+on each encryption root before mounting it.
+Note that if a filesystem has
+.Sy keylocation Ns = Ns Sy prompt ,
+this will cause the terminal to interactively block after asking for the key.
+.It Fl v
+Report mount progress.
+.It Fl f
+Attempt to force mounting of all filesystems, even those that couldn't normally be mounted (e.g. redacted datasets).
+.El
+.It Xo
+.Nm zfs
+.Cm unmount
+.Op Fl fu
+.Fl a Ns | Ns Ar filesystem Ns | Ns Ar mountpoint
+.Xc
+Unmounts currently mounted ZFS file systems.
+.Bl -tag -width "-a"
+.It Fl a
+Unmount all available ZFS file systems.
+Invoked automatically as part of the shutdown process.
+.It Fl f
+Forcefully unmount the file system, even if it is currently in use.
+This option is not supported on Linux.
+.It Fl u
+Unload keys for any encryption roots unmounted by this command.
+.It Ar filesystem Ns | Ns Ar mountpoint
+Unmount the specified filesystem.
+The command can also be given a path to a ZFS file system mount point on the
+system.
+.El
+.El

diff --git a/zfs/man/man8/zfs-program.8 b/zfs/man/man8/zfs-program.8
index 532fda1..4a9718c 100644
--- a/zfs/man/man8/zfs-program.8
+++ b/zfs/man/man8/zfs-program.8

@@ -1,3 +1,4 @@
+.\"
 .\" This file and its contents are supplied under the terms of the
 .\" Common Development and Distribution License ("CDDL"), version 1.0.
 .\" You may only use this file in accordance with the terms of version
@@ -7,23 +8,27 @@
 .\" source.  A copy of the CDDL is also available via the Internet at
 .\" http://www.illumos.org/license/CDDL.
 .\"
-.\"
 .\" Copyright (c) 2016, 2019 by Delphix. All Rights Reserved.
+.\" Copyright (c) 2019, 2020 by Christian Schwarz. All Rights Reserved.
+.\" Copyright 2020 Joyent, Inc.
 .\"
-.Dd February 26, 2019
+.Dd May 27, 2021
 .Dt ZFS-PROGRAM 8
 .Os
+.
 .Sh NAME
-.Nm zfs program
-.Nd executes ZFS channel programs
+.Nm zfs-program
+.Nd execute ZFS channel programs
 .Sh SYNOPSIS
-.Cm "zfs program"
+.Nm zfs
+.Cm program
 .Op Fl jn
 .Op Fl t Ar instruction-limit
 .Op Fl m Ar memory-limit
 .Ar pool
 .Ar script
-.\".Op Ar optional arguments to channel program
+.Op Ar script arguments
+.
 .Sh DESCRIPTION
 The ZFS channel program interface allows ZFS administrative operations to be
 run programmatically as a Lua script.
@@ -34,22 +39,22 @@
 .Pp
 A modified version of the Lua 5.2 interpreter is used to run channel program
 scripts.
-The Lua 5.2 manual can be found at:
-.Bd -centered -offset indent
+The Lua 5.2 manual can be found at
 .Lk http://www.lua.org/manual/5.2/
-.Ed
 .Pp
 The channel program given by
 .Ar script
 will be run on
 .Ar pool ,
 and any attempts to access or modify other pools will cause an error.
+.
 .Sh OPTIONS
 .Bl -tag -width "-t"
 .It Fl j
-Display channel program output in JSON format. When this flag is specified and
-standard output is empty - channel program encountered an error. The details of
-such an error will be printed to standard error in plain text.
+Display channel program output in JSON format.
+When this flag is specified and standard output is empty -
+channel program encountered an error.
+The details of such an error will be printed to standard error in plain text.
 .It Fl n
 Executes a read-only channel program, which runs faster.
 The program cannot change on-disk state by calling functions from the
@@ -75,15 +80,17 @@
 described in the
 .Sx LUA INTERFACE
 section below.
+.
 .Sh LUA INTERFACE
 A channel program can be invoked either from the command line, or via a library
 call to
 .Fn lzc_channel_program .
+.
 .Ss Arguments
 Arguments passed to the channel program are converted to a Lua table.
 If invoked from the command line, extra arguments to the Lua script will be
 accessible as an array stored in the argument table with the key 'argv':
-.Bd -literal -offset indent
+.Bd -literal -compact -offset indent
 args = ...
 argv = args["argv"]
 -- argv == {1="arg1", 2="arg2", ...}
@@ -92,7 +99,7 @@
 If invoked from the libZFS interface, an arbitrary argument list can be
 passed to the channel program, which is accessible via the same
 "..." syntax in Lua:
-.Bd -literal -offset indent
+.Bd -literal -compact -offset indent
 args = ...
 -- args == {"foo"="bar", "baz"={...}, ...}
 .Ed
@@ -105,37 +112,35 @@
 in a C array passed to a channel program will be stored in
 .Va arr[1]
 when accessed from Lua.
+.
 .Ss Return Values
 Lua return statements take the form:
-.Bd -literal -offset indent
-return ret0, ret1, ret2, ...
-.Ed
+.Dl return ret0, ret1, ret2, ...
 .Pp
 Return statements returning multiple values are permitted internally in a
 channel program script, but attempting to return more than one value from the
 top level of the channel program is not permitted and will throw an error.
 However, tables containing multiple values can still be returned.
 If invoked from the command line, a return statement:
-.Bd -literal -offset indent
+.Bd -literal -compact -offset indent
 a = {foo="bar", baz=2}
 return a
 .Ed
 .Pp
 Will be output formatted as:
-.Bd -literal -offset indent
+.Bd -literal -compact -offset indent
 Channel program fully executed with return value:
     return:
         baz: 2
         foo: 'bar'
 .Ed
+.
 .Ss Fatal Errors
 If the channel program encounters a fatal error while running, a non-zero exit
 status will be returned.
 If more information about the error is available, a singleton list will be
 returned detailing the error:
-.Bd -literal -offset indent
-error: "error string, including Lua stack trace"
-.Ed
+.Dl error: \&"error string, including Lua stack trace"
 .Pp
 If a fatal error is returned, the channel program may have not executed at all,
 may have partially executed, or may have fully executed but failed to pass a
@@ -153,12 +158,13 @@
 If a channel program attempts to return too large a value, the program will
 fully execute but exit with a nonzero status code and no return value.
 .Pp
-.Em Note:
+.Em Note :
 ZFS API functions do not generate Fatal Errors when correctly invoked, they
 return an error code and the channel program continues executing.
 See the
 .Sx ZFS API
 section below for function-specific details on error return codes.
+.
 .Ss Lua to C Value Conversion
 When invoking a channel program via the libZFS interface, it is necessary to
 translate arguments and return values from Lua values to their C equivalents,
@@ -168,37 +174,37 @@
 A Lua table which is returned from the channel program will be recursively
 converted to an nvlist, with table values converted to their natural
 equivalents:
-.Bd -literal -offset indent
-string -> string
-number -> int64
-boolean -> boolean_value
-nil -> boolean (no value)
-table -> nvlist
-.Ed
+.TS
+cw3 l c l .
+	string	->	string
+	number	->	int64
+	boolean	->	boolean_value
+	nil	->	boolean (no value)
+	table	->	nvlist
+.TE
 .Pp
 Likewise, table keys are replaced by string equivalents as follows:
-.Bd -literal -offset indent
-string -> no change
-number -> signed decimal string ("%lld")
-boolean -> "true" | "false"
-.Ed
+.TS
+cw3 l c l .
+	string	->	no change
+	number	->	signed decimal string ("%lld")
+	boolean	->	"true" | "false"
+.TE
 .Pp
 Any collision of table key strings (for example, the string "true" and a
 true boolean value) will cause a fatal error.
 .Pp
 Lua numbers are represented internally as signed 64-bit integers.
+.
 .Sh LUA STANDARD LIBRARY
 The following Lua built-in base library functions are available:
-.Bd -literal -offset indent
-assert                  rawlen
-collectgarbage          rawget
-error                   rawset
-getmetatable            select
-ipairs                  setmetatable
-next                    tonumber
-pairs                   tostring
-rawequal                type
-.Ed
+.TS
+cw3 l l l l .
+	assert	rawlen	collectgarbage	rawget
+	error	rawset	getmetatable	select
+	ipairs	setmetatable	next	tonumber
+	pairs	tostring	rawequal	type
+.TE
 .Pp
 All functions in the
 .Em coroutine ,
@@ -211,15 +217,13 @@
 .Pp
 The following functions base library functions have been disabled and are
 not available for use in channel programs:
-.Bd -literal -offset indent
-dofile
-loadfile
-load
-pcall
-print
-xpcall
-.Ed
+.TS
+cw3 l l l l l l .
+	dofile	loadfile	load	pcall	print	xpcall
+.TE
+.
 .Sh ZFS API
+.
 .Ss Function Arguments
 Each API function takes a fixed set of required positional arguments and
 optional keyword arguments.
@@ -228,22 +232,17 @@
 argument.
 When using parentheses to specify the arguments to a Lua function, only
 positional arguments can be used:
-.Bd -literal -offset indent
-zfs.sync.destroy("rpool@snap")
-.Ed
+.Dl Sy zfs.sync.destroy Ns Pq \&"rpool@snap"
 .Pp
 To use keyword arguments, functions must be called with a single argument that
 is a Lua table containing entries mapping integers to positional arguments and
 strings to keyword arguments:
-.Bd -literal -offset indent
-zfs.sync.destroy({1="rpool@snap", defer=true})
-.Ed
+.Dl Sy zfs.sync.destroy Ns Pq {1="rpool@snap", defer=true}
 .Pp
 The Lua language allows curly braces to be used in place of parenthesis as
 syntactic sugar for this calling convention:
-.Bd -literal -offset indent
-zfs.sync.snapshot{"rpool@snap", defer=true}
-.Ed
+.Dl Sy zfs.sync.snapshot Ns {"rpool@snap", defer=true}
+.
 .Ss Function Return Values
 If an API function succeeds, it returns 0.
 If it fails, it returns an error code and the channel program continues
@@ -258,13 +257,11 @@
 Different keys will exist in the error details table depending on the function
 and error case.
 Any such function may be called expecting a single return value:
-.Bd -literal -offset indent
-errno = zfs.sync.promote(dataset)
-.Ed
+.Dl errno = Sy zfs.sync.promote Ns Pq dataset
 .Pp
 Or, the error details can be retrieved:
-.Bd -literal -offset indent
-errno, details = zfs.sync.promote(dataset)
+.Bd -literal -compact -offset indent
+.No errno, details = Sy zfs.sync.promote Ns Pq dataset
 if (errno == EEXIST) then
     assert(details ~= Nil)
     list_of_conflicting_snapshots = details
@@ -273,48 +270,46 @@
 .Pp
 The following global aliases for API function error return codes are defined
 for use in channel programs:
-.Bd -literal -offset indent
-EPERM     ECHILD      ENODEV      ENOSPC
-ENOENT    EAGAIN      ENOTDIR     ESPIPE
-ESRCH     ENOMEM      EISDIR      EROFS
-EINTR     EACCES      EINVAL      EMLINK
-EIO       EFAULT      ENFILE      EPIPE
-ENXIO     ENOTBLK     EMFILE      EDOM
-E2BIG     EBUSY       ENOTTY      ERANGE
-ENOEXEC   EEXIST      ETXTBSY     EDQUOT
-EBADF     EXDEV       EFBIG
-.Ed
+.TS
+cw3 l l l l l l l .
+	EPERM	ECHILD	ENODEV	ENOSPC	ENOENT	EAGAIN	ENOTDIR
+	ESPIPE	ESRCH	ENOMEM	EISDIR	EROFS	EINTR	EACCES
+	EINVAL	EMLINK	EIO	EFAULT	ENFILE	EPIPE	ENXIO
+	ENOTBLK	EMFILE	EDOM	E2BIG	EBUSY	ENOTTY	ERANGE
+	ENOEXEC	EEXIST	ETXTBSY	EDQUOT	EBADF	EXDEV	EFBIG
+.TE
+.
 .Ss API Functions
-For detailed descriptions of the exact behavior of any zfs administrative
+For detailed descriptions of the exact behavior of any ZFS administrative
 operations, see the main
-.Xr zfs 1
+.Xr zfs 8
 manual page.
 .Bl -tag -width "xx"
-.It Em zfs.debug(msg)
+.It Fn zfs.debug msg
 Record a debug message in the zfs_dbgmsg log.
 A log of these messages can be printed via mdb's "::zfs_dbgmsg" command, or
-can be monitored live by running:
-.Bd -literal -offset indent
-  dtrace -n 'zfs-dbgmsg{trace(stringof(arg0))}'
-.Ed
+can be monitored live by running
+.Dl dtrace -n 'zfs-dbgmsg{trace(stringof(arg0))}'
 .Pp
-msg (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "property (string)"
+.It Ar msg Pq string
 Debug message to be printed.
-.Ed
-.It Em zfs.exists(dataset)
+.El
+.It Fn zfs.exists dataset
 Returns true if the given dataset exists, or false if it doesn't.
 A fatal error will be thrown if the dataset is not in the target pool.
 That is, in a channel program running on rpool,
-zfs.exists("rpool/nonexistent_fs") returns false, but
-zfs.exists("somepool/fs_that_may_exist") will error.
+.Sy zfs.exists Ns Pq \&"rpool/nonexistent_fs"
+returns false, but
+.Sy zfs.exists Ns Pq \&"somepool/fs_that_may_exist"
+will error.
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "property (string)"
+.It Ar dataset Pq string
 Dataset to check for existence.
 Must be in the target pool.
-.Ed
-.It Em zfs.get_prop(dataset, property)
+.El
+.It Fn zfs.get_prop dataset property
 Returns two values.
 First, a string, number or table containing the property value for the given
 dataset.
@@ -323,22 +318,25 @@
 Throws a Lua error if the dataset is invalid or the property doesn't exist.
 Note that Lua only supports int64 number types whereas ZFS number properties
 are uint64.
-This means very large values (like guid) may wrap around and appear negative.
+This means very large values (like GUIDs) may wrap around and appear negative.
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "property (string)"
+.It Ar dataset Pq string
 Filesystem or snapshot path to retrieve properties from.
-.Ed
-.Pp
-property (string)
-.Bd -ragged -compact -offset "xxxx"
+.It Ar property Pq string
 Name of property to retrieve.
-All filesystem, snapshot and volume properties are supported except
-for 'mounted' and 'iscsioptions.'
-Also supports the 'written@snap' and 'written#bookmark' properties and
-the '<user|group><quota|used>@id' properties, though the id must be in numeric
-form.
-.Ed
+All filesystem, snapshot and volume properties are supported except for
+.Sy mounted
+and
+.Sy iscsioptions .
+Also supports the
+.Sy written@ Ns Ar snap
+and
+.Sy written# Ns Ar bookmark
+properties and the
+.Ao Sy user Ns | Ns Sy group Ac Ns Ao Sy quota Ns | Ns Sy used Ac Ns Sy @ Ns Ar id
+properties, though the id must be in numeric form.
+.El
 .El
 .Bl -tag -width "xx"
 .It Sy zfs.sync submodule
@@ -347,45 +345,73 @@
 .Pp
 The available sync submodule functions are as follows:
 .Bl -tag -width "xx"
-.It Em zfs.sync.destroy(dataset, [defer=true|false])
+.It Sy zfs.sync.destroy Ns Pq Ar dataset , Op Ar defer Ns = Ns Sy true Ns | Ns Sy false
 Destroy the given dataset.
 Returns 0 on successful destroy, or a nonzero error code if the dataset could
 not be destroyed (for example, if the dataset has any active children or
 clones).
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar dataset Pq string
 Filesystem or snapshot to be destroyed.
-.Ed
-.Pp
-[optional] defer (boolean)
-.Bd -ragged -compact -offset "xxxx"
+.It Op Ar defer Pq boolean
 Valid only for destroying snapshots.
 If set to true, and the snapshot has holds or clones, allows the snapshot to be
 marked for deferred deletion rather than failing.
-.Ed
-.It Em zfs.sync.promote(dataset)
+.El
+.It Fn zfs.sync.inherit dataset property
+Clears the specified property in the given dataset, causing it to be inherited
+from an ancestor, or restored to the default if no ancestor property is set.
+The
+.Nm zfs Cm inherit Fl S
+option has not been implemented.
+Returns 0 on success, or a nonzero error code if the property could not be
+cleared.
+.Pp
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar dataset Pq string
+Filesystem or snapshot containing the property to clear.
+.It Ar property Pq string
+The property to clear.
+Allowed properties are the same as those for the
+.Nm zfs Cm inherit
+command.
+.El
+.It Fn zfs.sync.promote dataset
 Promote the given clone to a filesystem.
 Returns 0 on successful promotion, or a nonzero error code otherwise.
 If EEXIST is returned, the second return value will be an array of the clone's
 snapshots whose names collide with snapshots of the parent filesystem.
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar dataset Pq string
 Clone to be promoted.
-.Ed
-.It Em zfs.sync.rollback(filesystem)
+.El
+.It Fn zfs.sync.rollback filesystem
 Rollback to the previous snapshot for a dataset.
 Returns 0 on successful rollback, or a nonzero error code otherwise.
 Rollbacks can be performed on filesystems or zvols, but not on snapshots
 or mounted datasets.
 EBUSY is returned in the case where the filesystem is mounted.
 .Pp
-filesystem (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar filesystem Pq string
 Filesystem to rollback.
-.Ed
-.It Em zfs.sync.snapshot(dataset)
+.El
+.It Fn zfs.sync.set_prop dataset property value
+Sets the given property on a dataset.
+Currently only user properties are supported.
+Returns 0 if the property was set, or a nonzero error code otherwise.
+.Pp
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar dataset Pq string
+The dataset where the property will be set.
+.It Ar property Pq string
+The property to set.
+.It Ar value Pq string
+The value of the property to be set.
+.El
+.It Fn zfs.sync.snapshot dataset
 Create a snapshot of a filesystem.
 Returns 0 if the snapshot was successfully created,
 and a nonzero error code otherwise.
@@ -393,89 +419,142 @@
 Note: Taking a snapshot will fail on any pool older than legacy version 27.
 To enable taking snapshots from ZCP scripts, the pool must be upgraded.
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar dataset Pq string
 Name of snapshot to create.
-.Ed
+.El
+.It Fn zfs.sync.bookmark source newbookmark
+Create a bookmark of an existing source snapshot or bookmark.
+Returns 0 if the new bookmark was successfully created,
+and a nonzero error code otherwise.
+.Pp
+Note: Bookmarking requires the corresponding pool feature to be enabled.
+.Pp
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar source Pq string
+Full name of the existing snapshot or bookmark.
+.It Ar newbookmark Pq string
+Full name of the new bookmark.
+.El
 .El
 .It Sy zfs.check submodule
-For each function in the zfs.sync submodule, there is a corresponding zfs.check
+For each function in the
+.Sy zfs.sync
+submodule, there is a corresponding
+.Sy zfs.check
 function which performs a "dry run" of the same operation.
-Each takes the same arguments as its zfs.sync counterpart and returns 0 if the
-operation would succeed, or a non-zero error code if it would fail, along with
-any other error details.
+Each takes the same arguments as its
+.Sy zfs.sync
+counterpart and returns 0 if the operation would succeed,
+or a non-zero error code if it would fail, along with any other error details.
 That is, each has the same behavior as the corresponding sync function except
 for actually executing the requested change.
 For example,
-.Em zfs.check.destroy("fs")
+.Fn zfs.check.destroy \&"fs"
 returns 0 if
-.Em zfs.sync.destroy("fs")
+.Fn zfs.sync.destroy \&"fs"
 would successfully destroy the dataset.
 .Pp
-The available zfs.check functions are:
-.Bl -tag -width "xx"
-.It Em zfs.check.destroy(dataset, [defer=true|false])
-.It Em zfs.check.promote(dataset)
-.It Em zfs.check.rollback(filesystem)
-.It Em zfs.check.snapshot(dataset)
+The available
+.Sy zfs.check
+functions are:
+.Bl -tag -compact -width "xx"
+.It Sy zfs.check.destroy Ns Pq Ar dataset , Op Ar defer Ns = Ns Sy true Ns | Ns Sy false
+.It Fn zfs.check.promote dataset
+.It Fn zfs.check.rollback filesystem
+.It Fn zfs.check.set_property dataset property value
+.It Fn zfs.check.snapshot dataset
 .El
 .It Sy zfs.list submodule
 The zfs.list submodule provides functions for iterating over datasets and
 properties.
 Rather than returning tables, these functions act as Lua iterators, and are
 generally used as follows:
-.Bd -literal -offset indent
-for child in zfs.list.children("rpool") do
+.Bd -literal -compact -offset indent
+.No for child in Fn zfs.list.children \&"rpool" No do
     ...
 end
 .Ed
 .Pp
-The available zfs.list functions are:
+The available
+.Sy zfs.list
+functions are:
 .Bl -tag -width "xx"
-.It Em zfs.list.clones(snapshot)
+.It Fn zfs.list.clones snapshot
 Iterate through all clones of the given snapshot.
 .Pp
-snapshot (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar snapshot Pq string
 Must be a valid snapshot path in the current pool.
-.Ed
-.It Em zfs.list.snapshots(dataset)
+.El
+.It Fn zfs.list.snapshots dataset
 Iterate through all snapshots of the given dataset.
-Each snapshot is returned as a string containing the full dataset name, e.g.
-"pool/fs@snap".
+Each snapshot is returned as a string containing the full dataset name,
+e.g. "pool/fs@snap".
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar dataset Pq string
 Must be a valid filesystem or volume.
-.Ed
-.It Em zfs.list.children(dataset)
+.El
+.It Fn zfs.list.children dataset
 Iterate through all direct children of the given dataset.
-Each child is returned as a string containing the full dataset name, e.g.
-"pool/fs/child".
+Each child is returned as a string containing the full dataset name,
+e.g. "pool/fs/child".
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar dataset Pq string
 Must be a valid filesystem or volume.
-.Ed
-.It Em zfs.list.properties(dataset)
-Iterate through all user properties for the given dataset.
+.El
+.It Fn zfs.list.bookmarks dataset
+Iterate through all bookmarks of the given dataset.
+Each bookmark is returned as a string containing the full dataset name,
+e.g. "pool/fs#bookmark".
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar dataset Pq string
+Must be a valid filesystem or volume.
+.El
+.It Fn zfs.list.holds snapshot
+Iterate through all user holds on the given snapshot.
+Each hold is returned
+as a pair of the hold's tag and the timestamp (in seconds since the epoch) at
+which it was created.
+.Pp
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar snapshot Pq string
+Must be a valid snapshot.
+.El
+.It Fn zfs.list.properties dataset
+An alias for zfs.list.user_properties (see relevant entry).
+.Pp
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar dataset Pq string
 Must be a valid filesystem, snapshot, or volume.
-.Ed
-.It Em zfs.list.system_properties(dataset)
+.El
+.It Fn zfs.list.user_properties dataset
+Iterate through all user properties for the given dataset.
+For each step of the iteration, output the property name, its value,
+and its source.
+Throws a Lua error if the dataset is invalid.
+.Pp
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar dataset Pq string
+Must be a valid filesystem, snapshot, or volume.
+.El
+.It Fn zfs.list.system_properties dataset
 Returns an array of strings, the names of the valid system (non-user defined)
 properties for the given dataset.
 Throws a Lua error if the dataset is invalid.
 .Pp
-dataset (string)
-.Bd -ragged -compact -offset "xxxx"
+.Bl -tag -compact -width "snapshot (string)"
+.It Ar dataset Pq string
 Must be a valid filesystem, snapshot or volume.
-.Ed
 .El
 .El
+.El
+.
 .Sh EXAMPLES
+.
 .Ss Example 1
 The following channel program recursively destroys a filesystem and all its
 snapshots and children in a naive manner.
@@ -492,6 +571,7 @@
 end
 destroy_recursive("pool/somefs")
 .Ed
+.
 .Ss Example 2
 A more verbose and robust version of the same channel program, which
 properly detects and reports errors, and also takes the dataset to destroy
@@ -530,6 +610,7 @@
 results["failed"] = failed
 return results
 .Ed
+.
 .Ss Example 3
 The following function performs a forced promote operation by attempting to
 promote the given clone and destroying any conflicting snapshots.

diff --git a/zfs/man/man8/zfs-project.8 b/zfs/man/man8/zfs-project.8
new file mode 100644
index 0000000..f264a11
--- /dev/null
+++ b/zfs/man/man8/zfs-project.8

@@ -0,0 +1,141 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-PROJECT 8
+.Os
+.
+.Sh NAME
+.Nm zfs-project
+.Nd manage projects in ZFS filesystem
+.Sh SYNOPSIS
+.Nm zfs
+.Cm project
+.Oo Fl d Ns | Ns Fl r Ns Oc
+.Ar file Ns | Ns Ar directory Ns …
+.Nm zfs
+.Cm project
+.Fl C
+.Oo Fl kr Ns Oc
+.Ar file Ns | Ns Ar directory Ns …
+.Nm zfs
+.Cm project
+.Fl c
+.Oo Fl 0 Ns Oc
+.Oo Fl d Ns | Ns Fl r Ns Oc
+.Op Fl p Ar id
+.Ar file Ns | Ns Ar directory Ns …
+.Nm zfs
+.Cm project
+.Op Fl p Ar id
+.Oo Fl rs Ns Oc
+.Ar file Ns | Ns Ar directory Ns …
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm project
+.Oo Fl d Ns | Ns Fl r Ns Oc
+.Ar file Ns | Ns Ar directory Ns …
+.Xc
+List project identifier (ID) and inherit flag of files and directories.
+.Bl -tag -width "-d"
+.It Fl d
+Show the directory project ID and inherit flag, not its children.
+.It Fl r
+List subdirectories recursively.
+.El
+.It Xo
+.Nm zfs
+.Cm project
+.Fl C
+.Oo Fl kr Ns Oc
+.Ar file Ns | Ns Ar directory Ns …
+.Xc
+Clear project inherit flag and/or ID on the files and directories.
+.Bl -tag -width "-k"
+.It Fl k
+Keep the project ID unchanged.
+If not specified, the project ID will be reset to zero.
+.It Fl r
+Clear subdirectories' flags recursively.
+.El
+.It Xo
+.Nm zfs
+.Cm project
+.Fl c
+.Oo Fl 0 Ns Oc
+.Oo Fl d Ns | Ns Fl r Ns Oc
+.Op Fl p Ar id
+.Ar file Ns | Ns Ar directory Ns …
+.Xc
+Check project ID and inherit flag on the files and directories:
+report entries without the project inherit flag, or with project IDs different from the
+target directory's project ID or the one specified with
+.Fl p .
+.Bl -tag -width "-p id"
+.It Fl 0
+Delimit filenames with a NUL byte instead of newline.
+.It Fl d
+Check the directory project ID and inherit flag, not its children.
+.It Fl p Ar id
+Compare to
+.Ar id
+instead of the target files and directories' project IDs.
+.It Fl r
+Check subdirectories recursively.
+.El
+.It Xo
+.Nm zfs
+.Cm project
+.Fl p Ar id
+.Oo Fl rs Ns Oc
+.Ar file Ns | Ns Ar directory Ns …
+.Xc
+Set project ID and/or inherit flag on the files and directories.
+.Bl -tag -width "-p id"
+.It Fl p Ar id
+Set the project ID to the given value.
+.It Fl r
+Set on subdirectories recursively.
+.It Fl s
+Set project inherit flag on the given files and directories.
+This is usually used for setting up tree quotas with
+.Fl r .
+In that case, the directory's project ID
+will be set for all its descendants, unless specified explicitly with
+.Fl p .
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-projectspace 8

diff --git a/zfs/man/man8/zfs-projectspace.8 b/zfs/man/man8/zfs-projectspace.8
new file mode 120000
index 0000000..8bc2f1d
--- /dev/null
+++ b/zfs/man/man8/zfs-projectspace.8

@@ -0,0 +1 @@
+zfs-userspace.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-promote.8 b/zfs/man/man8/zfs-promote.8
new file mode 100644
index 0000000..ba8cd5f
--- /dev/null
+++ b/zfs/man/man8/zfs-promote.8

@@ -0,0 +1,64 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 30, 2019
+.Dt ZFS-PROMOTE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-promote
+.Nd promote clone dataset to no longer depend on origin snapshot
+.Sh SYNOPSIS
+.Nm zfs
+.Cm promote
+.Ar clone
+.
+.Sh DESCRIPTION
+The
+.Nm zfs Cm promote
+command makes it possible to destroy the dataset that the clone was created from.
+The clone parent-child dependency relationship is reversed, so that the origin
+dataset becomes a clone of the specified dataset.
+.Pp
+The snapshot that was cloned, and any snapshots previous to this snapshot, are
+now owned by the promoted clone.
+The space they use moves from the origin dataset to the promoted clone, so
+enough space must be available to accommodate these snapshots.
+No new space is consumed by this operation, but the space accounting is
+adjusted.
+The promoted clone must not have any conflicting snapshot names of its own.
+The
+.Nm zfs Cm rename
+subcommand can be used to rename any conflicting snapshots.
+.
+.Sh SEE ALSO
+.Xr zfs-clone 8 ,
+.Xr zfs-rename 8

diff --git a/zfs/man/man8/zfs-receive.8 b/zfs/man/man8/zfs-receive.8
new file mode 100644
index 0000000..b81bfc5
--- /dev/null
+++ b/zfs/man/man8/zfs-receive.8

@@ -0,0 +1,400 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd February 16, 2020
+.Dt ZFS-RECEIVE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-receive
+.Nd create snapshot from backup stream
+.Sh SYNOPSIS
+.Nm zfs
+.Cm receive
+.Op Fl FhMnsuv
+.Op Fl o Sy origin Ns = Ns Ar snapshot
+.Op Fl o Ar property Ns = Ns Ar value
+.Op Fl x Ar property
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Nm zfs
+.Cm receive
+.Op Fl FhMnsuv
+.Op Fl d Ns | Ns Fl e
+.Op Fl o Sy origin Ns = Ns Ar snapshot
+.Op Fl o Ar property Ns = Ns Ar value
+.Op Fl x Ar property
+.Ar filesystem
+.Nm zfs
+.Cm receive
+.Fl A
+.Ar filesystem Ns | Ns Ar volume
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm receive
+.Op Fl FhMnsuv
+.Op Fl o Sy origin Ns = Ns Ar snapshot
+.Op Fl o Ar property Ns = Ns Ar value
+.Op Fl x Ar property
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Xc
+.It Xo
+.Nm zfs
+.Cm receive
+.Op Fl FhMnsuv
+.Op Fl d Ns | Ns Fl e
+.Op Fl o Sy origin Ns = Ns Ar snapshot
+.Op Fl o Ar property Ns = Ns Ar value
+.Op Fl x Ar property
+.Ar filesystem
+.Xc
+Creates a snapshot whose contents are as specified in the stream provided on
+standard input.
+If a full stream is received, then a new file system is created as well.
+Streams are created using the
+.Nm zfs Cm send
+subcommand, which by default creates a full stream.
+.Nm zfs Cm recv
+can be used as an alias for
+.Nm zfs Cm receive .
+.Pp
+If an incremental stream is received, then the destination file system must
+already exist, and its most recent snapshot must match the incremental stream's
+source.
+For
+.Sy zvols ,
+the destination device link is destroyed and recreated, which means the
+.Sy zvol
+cannot be accessed during the
+.Cm receive
+operation.
+.Pp
+When a snapshot replication package stream that is generated by using the
+.Nm zfs Cm send Fl R
+command is received, any snapshots that do not exist on the sending location are
+destroyed by using the
+.Nm zfs Cm destroy Fl d
+command.
+.Pp
+The ability to send and receive deduplicated send streams has been removed.
+However, a deduplicated send stream created with older software can be converted
+to a regular (non-deduplicated) stream by using the
+.Nm zstream Cm redup
+command.
+.Pp
+If
+.Fl o Em property Ns = Ns Ar value
+or
+.Fl x Em property
+is specified, it applies to the effective value of the property throughout
+the entire subtree of replicated datasets.
+Effective property values will be set
+.Pq Fl o
+or inherited
+.Pq Fl x
+on the topmost in the replicated subtree.
+In descendant datasets, if the
+property is set by the send stream, it will be overridden by forcing the
+property to be inherited from the top‐most file system.
+Received properties are retained in spite of being overridden
+and may be restored with
+.Nm zfs Cm inherit Fl S .
+Specifying
+.Fl o Sy origin Ns = Ns Em snapshot
+is a special case because, even if
+.Sy origin
+is a read-only property and cannot be set, it's allowed to receive the send
+stream as a clone of the given snapshot.
+.Pp
+Raw encrypted send streams (created with
+.Nm zfs Cm send Fl w )
+may only be received as is, and cannot be re-encrypted, decrypted, or
+recompressed by the receive process.
+Unencrypted streams can be received as
+encrypted datasets, either through inheritance or by specifying encryption
+parameters with the
+.Fl o
+options.
+Note that the
+.Sy keylocation
+property cannot be overridden to
+.Sy prompt
+during a receive.
+This is because the receive process itself is already using
+the standard input for the send stream.
+Instead, the property can be overridden after the receive completes.
+.Pp
+The added security provided by raw sends adds some restrictions to the send
+and receive process.
+ZFS will not allow a mix of raw receives and non-raw receives.
+Specifically, any raw incremental receives that are attempted after
+a non-raw receive will fail.
+Non-raw receives do not have this restriction and,
+therefore, are always possible.
+Because of this, it is best practice to always
+use either raw sends for their security benefits or non-raw sends for their
+flexibility when working with encrypted datasets, but not a combination.
+.Pp
+The reason for this restriction stems from the inherent restrictions of the
+AEAD ciphers that ZFS uses to encrypt data.
+When using ZFS native encryption,
+each block of data is encrypted against a randomly generated number known as
+the "initialization vector" (IV), which is stored in the filesystem metadata.
+This number is required by the encryption algorithms whenever the data is to
+be decrypted.
+Together, all of the IVs provided for all of the blocks in a
+given snapshot are collectively called an "IV set".
+When ZFS performs a raw send, the IV set is transferred from the source
+to the destination in the send stream.
+When ZFS performs a non-raw send, the data is decrypted by the source
+system and re-encrypted by the destination system, creating a snapshot with
+effectively the same data, but a different IV set.
+In order for decryption to work after a raw send, ZFS must ensure that
+the IV set used on both the source and destination side match.
+When an incremental raw receive is performed on
+top of an existing snapshot, ZFS will check to confirm that the "from"
+snapshot on both the source and destination were using the same IV set,
+ensuring the new IV set is consistent.
+.Pp
+The name of the snapshot
+.Pq and file system, if a full stream is received
+that this subcommand creates depends on the argument type and the use of the
+.Fl d
+or
+.Fl e
+options.
+.Pp
+If the argument is a snapshot name, the specified
+.Ar snapshot
+is created.
+If the argument is a file system or volume name, a snapshot with the same name
+as the sent snapshot is created within the specified
+.Ar filesystem
+or
+.Ar volume .
+If neither of the
+.Fl d
+or
+.Fl e
+options are specified, the provided target snapshot name is used exactly as
+provided.
+.Pp
+The
+.Fl d
+and
+.Fl e
+options cause the file system name of the target snapshot to be determined by
+appending a portion of the sent snapshot's name to the specified target
+.Ar filesystem .
+If the
+.Fl d
+option is specified, all but the first element of the sent snapshot's file
+system path
+.Pq usually the pool name
+is used and any required intermediate file systems within the specified one are
+created.
+If the
+.Fl e
+option is specified, then only the last element of the sent snapshot's file
+system name
+.Pq i.e. the name of the source file system itself
+is used as the target file system name.
+.Bl -tag -width "-F"
+.It Fl F
+Force a rollback of the file system to the most recent snapshot before
+performing the receive operation.
+If receiving an incremental replication stream
+.Po for example, one generated by
+.Nm zfs Cm send Fl R Op Fl i Ns | Ns Fl I
+.Pc ,
+destroy snapshots and file systems that do not exist on the sending side.
+.It Fl d
+Discard the first element of the sent snapshot's file system name, using the
+remaining elements to determine the name of the target file system for the new
+snapshot as described in the paragraph above.
+.It Fl e
+Discard all but the last element of the sent snapshot's file system name, using
+that element to determine the name of the target file system for the new
+snapshot as described in the paragraph above.
+.It Fl h
+Skip the receive of holds.
+There is no effect if holds are not sent.
+.It Fl M
+Force an unmount of the file system while receiving a snapshot.
+This option is not supported on Linux.
+.It Fl n
+Do not actually receive the stream.
+This can be useful in conjunction with the
+.Fl v
+option to verify the name the receive operation would use.
+.It Fl o Sy origin Ns = Ns Ar snapshot
+Forces the stream to be received as a clone of the given snapshot.
+If the stream is a full send stream, this will create the filesystem
+described by the stream as a clone of the specified snapshot.
+Which snapshot was specified will not affect the success or failure of the
+receive, as long as the snapshot does exist.
+If the stream is an incremental send stream, all the normal verification will be
+performed.
+.It Fl o Em property Ns = Ns Ar value
+Sets the specified property as if the command
+.Nm zfs Cm set Em property Ns = Ns Ar value
+was invoked immediately before the receive.
+When receiving a stream from
+.Nm zfs Cm send Fl R ,
+causes the property to be inherited by all descendant datasets, as through
+.Nm zfs Cm inherit Em property
+was run on any descendant datasets that have this property set on the
+sending system.
+.Pp
+If the send stream was sent with
+.Fl c
+then overriding the
+.Sy compression
+property will have no affect on received data but the
+.Sy compression
+property will be set.
+To have the data recompressed on receive remove the
+.Fl c
+flag from the send stream.
+.Pp
+Any editable property can be set at receive time.
+Set-once properties bound
+to the received data, such as
+.Sy normalization
+and
+.Sy casesensitivity ,
+cannot be set at receive time even when the datasets are newly created by
+.Nm zfs Cm receive .
+Additionally both settable properties
+.Sy version
+and
+.Sy volsize
+cannot be set at receive time.
+.Pp
+The
+.Fl o
+option may be specified multiple times, for different properties.
+An error results if the same property is specified in multiple
+.Fl o
+or
+.Fl x
+options.
+.Pp
+The
+.Fl o
+option may also be used to override encryption properties upon initial receive.
+This allows unencrypted streams to be received as encrypted datasets.
+To cause the received dataset (or root dataset of a recursive stream) to be
+received as an encryption root, specify encryption properties in the same
+manner as is required for
+.Nm zfs Cm create .
+For instance:
+.Dl # Nm zfs Cm send Pa tank/test@snap1 | Nm zfs Cm recv Fl o Sy encryption Ns = Ns Sy on Fl o Sy keyformat Ns = Ns Sy passphrase Fl o Sy keylocation Ns = Ns Pa file:///path/to/keyfile
+.Pp
+Note that
+.Fl o Sy keylocation Ns = Ns Sy prompt
+may not be specified here, since the standard input
+is already being utilized for the send stream.
+Once the receive has completed, you can use
+.Nm zfs Cm set
+to change this setting after the fact.
+Similarly, you can receive a dataset as an encrypted child by specifying
+.Fl x Sy encryption
+to force the property to be inherited.
+Overriding encryption properties (except for
+.Sy keylocation )
+is not possible with raw send streams.
+.It Fl s
+If the receive is interrupted, save the partially received state, rather
+than deleting it.
+Interruption may be due to premature termination of the stream
+.Po e.g. due to network failure or failure of the remote system
+if the stream is being read over a network connection
+.Pc ,
+a checksum error in the stream, termination of the
+.Nm zfs Cm receive
+process, or unclean shutdown of the system.
+.Pp
+The receive can be resumed with a stream generated by
+.Nm zfs Cm send Fl t Ar token ,
+where the
+.Ar token
+is the value of the
+.Sy receive_resume_token
+property of the filesystem or volume which is received into.
+.Pp
+To use this flag, the storage pool must have the
+.Sy extensible_dataset
+feature enabled.
+See
+.Xr zpool-features 7
+for details on ZFS feature flags.
+.It Fl u
+File system that is associated with the received stream is not mounted.
+.It Fl v
+Print verbose information about the stream and the time required to perform the
+receive operation.
+.It Fl x Em property
+Ensures that the effective value of the specified property after the
+receive is unaffected by the value of that property in the send stream (if any),
+as if the property had been excluded from the send stream.
+.Pp
+If the specified property is not present in the send stream, this option does
+nothing.
+.Pp
+If a received property needs to be overridden, the effective value will be
+set or inherited, depending on whether the property is inheritable or not.
+.Pp
+In the case of an incremental update,
+.Fl x
+leaves any existing local setting or explicit inheritance unchanged.
+.Pp
+All
+.Fl o
+restrictions (e.g. set-once) apply equally to
+.Fl x .
+.El
+.It Xo
+.Nm zfs
+.Cm receive
+.Fl A
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+Abort an interrupted
+.Nm zfs Cm receive Fl s ,
+deleting its saved partially received state.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-send 8 ,
+.Xr zstream 8

diff --git a/zfs/man/man8/zfs-recv.8 b/zfs/man/man8/zfs-recv.8
new file mode 120000
index 0000000..f11b7ad
--- /dev/null
+++ b/zfs/man/man8/zfs-recv.8

@@ -0,0 +1 @@
+zfs-receive.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-redact.8 b/zfs/man/man8/zfs-redact.8
new file mode 120000
index 0000000..f7c6057
--- /dev/null
+++ b/zfs/man/man8/zfs-redact.8

@@ -0,0 +1 @@
+zfs-send.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-release.8 b/zfs/man/man8/zfs-release.8
new file mode 120000
index 0000000..58809d6
--- /dev/null
+++ b/zfs/man/man8/zfs-release.8

@@ -0,0 +1 @@
+zfs-hold.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-rename.8 b/zfs/man/man8/zfs-rename.8
new file mode 100644
index 0000000..6caee50
--- /dev/null
+++ b/zfs/man/man8/zfs-rename.8

@@ -0,0 +1,123 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd September 1, 2020
+.Dt ZFS-RENAME 8
+.Os
+.
+.Sh NAME
+.Nm zfs-rename
+.Nd rename ZFS dataset
+.Sh SYNOPSIS
+.Nm zfs
+.Cm rename
+.Op Fl f
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Nm zfs
+.Cm rename
+.Fl p
+.Op Fl f
+.Ar filesystem Ns | Ns Ar volume
+.Ar filesystem Ns | Ns Ar volume
+.Nm zfs
+.Cm rename
+.Fl u
+.Op Fl f
+.Ar filesystem Ar filesystem
+.Nm zfs
+.Cm rename
+.Fl r
+.Ar snapshot Ar snapshot
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm rename
+.Op Fl f
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Xc
+.It Xo
+.Nm zfs
+.Cm rename
+.Fl p
+.Op Fl f
+.Ar filesystem Ns | Ns Ar volume
+.Ar filesystem Ns | Ns Ar volume
+.Xc
+.It Xo
+.Nm zfs
+.Cm rename
+.Fl u
+.Op Fl f
+.Ar filesystem
+.Ar filesystem
+.Xc
+Renames the given dataset.
+The new target can be located anywhere in the ZFS hierarchy, with the exception
+of snapshots.
+Snapshots can only be renamed within the parent file system or volume.
+When renaming a snapshot, the parent file system of the snapshot does not need
+to be specified as part of the second argument.
+Renamed file systems can inherit new mount points, in which case they are
+unmounted and remounted at the new mount point.
+.Bl -tag -width "-a"
+.It Fl f
+Force unmount any file systems that need to be unmounted in the process.
+This flag has no effect if used together with the
+.Fl u
+flag.
+.It Fl p
+Creates all the nonexistent parent datasets.
+Datasets created in this manner are automatically mounted according to the
+.Sy mountpoint
+property inherited from their parent.
+.It Fl u
+Do not remount file systems during rename.
+If a file system's
+.Sy mountpoint
+property is set to
+.Sy legacy
+or
+.Sy none ,
+the file system is not unmounted even if this option is not given.
+.El
+.It Xo
+.Nm zfs
+.Cm rename
+.Fl r
+.Ar snapshot Ar snapshot
+.Xc
+Recursively rename the snapshots of all descendent datasets.
+Snapshots are the only dataset that can be renamed recursively.
+.El

diff --git a/zfs/man/man8/zfs-rollback.8 b/zfs/man/man8/zfs-rollback.8
new file mode 100644
index 0000000..08e914b
--- /dev/null
+++ b/zfs/man/man8/zfs-rollback.8

@@ -0,0 +1,75 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-ROLLBACK 8
+.Os
+.
+.Sh NAME
+.Nm zfs-rollback
+.Nd roll ZFS dataset back to snapshot
+.Sh SYNOPSIS
+.Nm zfs
+.Cm rollback
+.Op Fl Rfr
+.Ar snapshot
+.
+.Sh DESCRIPTION
+When a dataset is rolled back, all data that has changed since the snapshot is
+discarded, and the dataset reverts to the state at the time of the snapshot.
+By default, the command refuses to roll back to a snapshot other than the most
+recent one.
+In order to do so, all intermediate snapshots and bookmarks must be destroyed by
+specifying the
+.Fl r
+option.
+.Pp
+The
+.Fl rR
+options do not recursively destroy the child snapshots of a recursive snapshot.
+Only direct snapshots of the specified filesystem are destroyed by either of
+these options.
+To completely roll back a recursive snapshot, you must roll back the individual
+child snapshots.
+.Bl -tag -width "-R"
+.It Fl R
+Destroy any more recent snapshots and bookmarks, as well as any clones of those
+snapshots.
+.It Fl f
+Used with the
+.Fl R
+option to force an unmount of any clone file systems that are to be destroyed.
+.It Fl r
+Destroy any snapshots and bookmarks more recent than the one specified.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-snapshot 8

diff --git a/zfs/man/man8/zfs-send.8 b/zfs/man/man8/zfs-send.8
new file mode 100644
index 0000000..3280a1e
--- /dev/null
+++ b/zfs/man/man8/zfs-send.8

@@ -0,0 +1,654 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd January 12, 2023
+.Dt ZFS-SEND 8
+.Os
+.
+.Sh NAME
+.Nm zfs-send
+.Nd generate backup stream of ZFS dataset
+.Sh SYNOPSIS
+.Nm zfs
+.Cm send
+.Op Fl DLPVRbcehnpsvw
+.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
+.Ar snapshot
+.Nm zfs
+.Cm send
+.Op Fl DLPVcensvw
+.Op Fl i Ar snapshot Ns | Ns Ar bookmark
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Nm zfs
+.Cm send
+.Fl -redact Ar redaction_bookmark
+.Op Fl DLPVcenpv
+.Op Fl i Ar snapshot Ns | Ns Ar bookmark
+.Ar snapshot
+.Nm zfs
+.Cm send
+.Op Fl PVenv
+.Fl t
+.Ar receive_resume_token
+.Nm zfs
+.Cm send
+.Op Fl PVnv
+.Fl S Ar filesystem
+.Nm zfs
+.Cm redact
+.Ar snapshot redaction_bookmark
+.Ar redaction_snapshot Ns …
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm send
+.Op Fl DLPVRbcehnpvw
+.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
+.Ar snapshot
+.Xc
+Creates a stream representation of the second
+.Ar snapshot ,
+which is written to standard output.
+The output can be redirected to a file or to a different system
+.Po for example, using
+.Xr ssh 1
+.Pc .
+By default, a full stream is generated.
+.Bl -tag -width "-D"
+.It Fl D , -dedup
+Deduplicated send is no longer supported.
+This flag is accepted for backwards compatibility, but a regular,
+non-deduplicated stream will be generated.
+.It Fl I Ar snapshot
+Generate a stream package that sends all intermediary snapshots from the first
+snapshot to the second snapshot.
+For example,
+.Fl I Em @a Em fs@d
+is similar to
+.Fl i Em @a Em fs@b Ns \&; Fl i Em @b Em fs@c Ns \&; Fl i Em @c Em fs@d .
+The incremental source may be specified as with the
+.Fl i
+option.
+.It Fl L , -large-block
+Generate a stream which may contain blocks larger than 128KB.
+This flag has no effect if the
+.Sy large_blocks
+pool feature is disabled, or if the
+.Sy recordsize
+property of this filesystem has never been set above 128KB.
+The receiving system must have the
+.Sy large_blocks
+pool feature enabled as well.
+See
+.Xr zpool-features 7
+for details on ZFS feature flags and the
+.Sy large_blocks
+feature.
+.It Fl P , -parsable
+Print machine-parsable verbose information about the stream package generated.
+.It Fl R , -replicate
+Generate a replication stream package, which will replicate the specified
+file system, and all descendent file systems, up to the named snapshot.
+When received, all properties, snapshots, descendent file systems, and clones
+are preserved.
+.Pp
+If the
+.Fl i
+or
+.Fl I
+flags are used in conjunction with the
+.Fl R
+flag, an incremental replication stream is generated.
+The current values of properties, and current snapshot and file system names are
+set when the stream is received.
+If the
+.Fl F
+flag is specified when this stream is received, snapshots and file systems that
+do not exist on the sending side are destroyed.
+If the
+.Fl R
+flag is used to send encrypted datasets, then
+.Fl w
+must also be specified.
+.It Fl V , -proctitle
+Set the process title to a per-second report of how much data has been sent.
+.It Fl e , -embed
+Generate a more compact stream by using
+.Sy WRITE_EMBEDDED
+records for blocks which are stored more compactly on disk by the
+.Sy embedded_data
+pool feature.
+This flag has no effect if the
+.Sy embedded_data
+feature is disabled.
+The receiving system must have the
+.Sy embedded_data
+feature enabled.
+If the
+.Sy lz4_compress
+feature is active on the sending system, then the receiving system must have
+that feature enabled as well.
+Datasets that are sent with this flag may not be
+received as an encrypted dataset, since encrypted datasets cannot use the
+.Sy embedded_data
+feature.
+See
+.Xr zpool-features 7
+for details on ZFS feature flags and the
+.Sy embedded_data
+feature.
+.It Fl b , -backup
+Sends only received property values whether or not they are overridden by local
+settings, but only if the dataset has ever been received.
+Use this option when you want
+.Nm zfs Cm receive
+to restore received properties backed up on the sent dataset and to avoid
+sending local settings that may have nothing to do with the source dataset,
+but only with how the data is backed up.
+.It Fl c , -compressed
+Generate a more compact stream by using compressed WRITE records for blocks
+which are compressed on disk and in memory
+.Po see the
+.Sy compression
+property for details
+.Pc .
+If the
+.Sy lz4_compress
+feature is active on the sending system, then the receiving system must have
+that feature enabled as well.
+If the
+.Sy large_blocks
+feature is enabled on the sending system but the
+.Fl L
+option is not supplied in conjunction with
+.Fl c ,
+then the data will be decompressed before sending so it can be split into
+smaller block sizes.
+Streams sent with
+.Fl c
+will not have their data recompressed on the receiver side using
+.Fl o Sy compress Ns = Ar value .
+The data will stay compressed as it was from the sender.
+The new compression property will be set for future data.
+.It Fl w , -raw
+For encrypted datasets, send data exactly as it exists on disk.
+This allows backups to be taken even if encryption keys are not currently loaded.
+The backup may then be received on an untrusted machine since that machine will
+not have the encryption keys to read the protected data or alter it without
+being detected.
+Upon being received, the dataset will have the same encryption
+keys as it did on the send side, although the
+.Sy keylocation
+property will be defaulted to
+.Sy prompt
+if not otherwise provided.
+For unencrypted datasets, this flag will be equivalent to
+.Fl Lec .
+Note that if you do not use this flag for sending encrypted datasets, data will
+be sent unencrypted and may be re-encrypted with a different encryption key on
+the receiving system, which will disable the ability to do a raw send to that
+system for incrementals.
+.It Fl h , -holds
+Generate a stream package that includes any snapshot holds (created with the
+.Nm zfs Cm hold
+command), and indicating to
+.Nm zfs Cm receive
+that the holds be applied to the dataset on the receiving system.
+.It Fl i Ar snapshot
+Generate an incremental stream from the first
+.Ar snapshot
+.Pq the incremental source
+to the second
+.Ar snapshot
+.Pq the incremental target .
+The incremental source can be specified as the last component of the snapshot
+name
+.Po the
+.Sy @
+character and following
+.Pc
+and it is assumed to be from the same file system as the incremental target.
+.Pp
+If the destination is a clone, the source may be the origin snapshot, which must
+be fully specified
+.Po for example,
+.Em pool/fs@origin ,
+not just
+.Em @origin
+.Pc .
+.It Fl n , -dryrun
+Do a dry-run
+.Pq Qq No-op
+send.
+Do not generate any actual send data.
+This is useful in conjunction with the
+.Fl v
+or
+.Fl P
+flags to determine what data will be sent.
+In this case, the verbose output will be written to standard output
+.Po contrast with a non-dry-run, where the stream is written to standard output
+and the verbose output goes to standard error
+.Pc .
+.It Fl p , -props
+Include the dataset's properties in the stream.
+This flag is implicit when
+.Fl R
+is specified.
+The receiving system must also support this feature.
+Sends of encrypted datasets must use
+.Fl w
+when using this flag.
+.It Fl s , -skip-missing
+Allows sending a replication stream even when there are snapshots missing in the
+hierarchy.
+When a snapshot is missing, instead of throwing an error and aborting the send,
+a warning is printed to the standard error stream and the dataset to which it belongs
+and its descendents are skipped.
+This flag can only be used in conjunction with
+.Fl R .
+.It Fl v , -verbose
+Print verbose information about the stream package generated.
+This information includes a per-second report of how much data has been sent.
+.Pp
+The format of the stream is committed.
+You will be able to receive your streams on future versions of ZFS.
+.El
+.It Xo
+.Nm zfs
+.Cm send
+.Op Fl DLPVcenvw
+.Op Fl i Ar snapshot Ns | Ns Ar bookmark
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
+.Xc
+Generate a send stream, which may be of a filesystem, and may be incremental
+from a bookmark.
+If the destination is a filesystem or volume, the pool must be read-only, or the
+filesystem must not be mounted.
+When the stream generated from a filesystem or volume is received, the default
+snapshot name will be
+.Qq --head-- .
+.Bl -tag -width "-D"
+.It Fl D , -dedup
+Deduplicated send is no longer supported.
+This flag is accepted for backwards compatibility, but a regular,
+non-deduplicated stream will be generated.
+.It Fl L , -large-block
+Generate a stream which may contain blocks larger than 128KB.
+This flag has no effect if the
+.Sy large_blocks
+pool feature is disabled, or if the
+.Sy recordsize
+property of this filesystem has never been set above 128KB.
+The receiving system must have the
+.Sy large_blocks
+pool feature enabled as well.
+See
+.Xr zpool-features 7
+for details on ZFS feature flags and the
+.Sy large_blocks
+feature.
+.It Fl P , -parsable
+Print machine-parsable verbose information about the stream package generated.
+.It Fl c , -compressed
+Generate a more compact stream by using compressed WRITE records for blocks
+which are compressed on disk and in memory
+.Po see the
+.Sy compression
+property for details
+.Pc .
+If the
+.Sy lz4_compress
+feature is active on the sending system, then the receiving system must have
+that feature enabled as well.
+If the
+.Sy large_blocks
+feature is enabled on the sending system but the
+.Fl L
+option is not supplied in conjunction with
+.Fl c ,
+then the data will be decompressed before sending so it can be split into
+smaller block sizes.
+.It Fl w , -raw
+For encrypted datasets, send data exactly as it exists on disk.
+This allows backups to be taken even if encryption keys are not currently loaded.
+The backup may then be received on an untrusted machine since that machine will
+not have the encryption keys to read the protected data or alter it without
+being detected.
+Upon being received, the dataset will have the same encryption
+keys as it did on the send side, although the
+.Sy keylocation
+property will be defaulted to
+.Sy prompt
+if not otherwise provided.
+For unencrypted datasets, this flag will be equivalent to
+.Fl Lec .
+Note that if you do not use this flag for sending encrypted datasets, data will
+be sent unencrypted and may be re-encrypted with a different encryption key on
+the receiving system, which will disable the ability to do a raw send to that
+system for incrementals.
+.It Fl e , -embed
+Generate a more compact stream by using
+.Sy WRITE_EMBEDDED
+records for blocks which are stored more compactly on disk by the
+.Sy embedded_data
+pool feature.
+This flag has no effect if the
+.Sy embedded_data
+feature is disabled.
+The receiving system must have the
+.Sy embedded_data
+feature enabled.
+If the
+.Sy lz4_compress
+feature is active on the sending system, then the receiving system must have
+that feature enabled as well.
+Datasets that are sent with this flag may not be received as an encrypted dataset,
+since encrypted datasets cannot use the
+.Sy embedded_data
+feature.
+See
+.Xr zpool-features 7
+for details on ZFS feature flags and the
+.Sy embedded_data
+feature.
+.It Fl i Ar snapshot Ns | Ns Ar bookmark
+Generate an incremental send stream.
+The incremental source must be an earlier snapshot in the destination's history.
+It will commonly be an earlier snapshot in the destination's file system, in
+which case it can be specified as the last component of the name
+.Po the
+.Sy #
+or
+.Sy @
+character and following
+.Pc .
+.Pp
+If the incremental target is a clone, the incremental source can be the origin
+snapshot, or an earlier snapshot in the origin's filesystem, or the origin's
+origin, etc.
+.It Fl n , -dryrun
+Do a dry-run
+.Pq Qq No-op
+send.
+Do not generate any actual send data.
+This is useful in conjunction with the
+.Fl v
+or
+.Fl P
+flags to determine what data will be sent.
+In this case, the verbose output will be written to standard output
+.Po contrast with a non-dry-run, where the stream is written to standard output
+and the verbose output goes to standard error
+.Pc .
+.It Fl v , -verbose
+Print verbose information about the stream package generated.
+This information includes a per-second report of how much data has been sent.
+.El
+.It Xo
+.Nm zfs
+.Cm send
+.Fl -redact Ar redaction_bookmark
+.Op Fl DLPVcenpv
+.Op Fl i Ar snapshot Ns | Ns Ar bookmark
+.Ar snapshot
+.Xc
+Generate a redacted send stream.
+This send stream contains all blocks from the snapshot being sent that aren't
+included in the redaction list contained in the bookmark specified by the
+.Fl -redact
+(or
+.Fl d )
+flag.
+The resulting send stream is said to be redacted with respect to the snapshots
+the bookmark specified by the
+.Fl -redact No flag was created with.
+The bookmark must have been created by running
+.Nm zfs Cm redact
+on the snapshot being sent.
+.Pp
+This feature can be used to allow clones of a filesystem to be made available on
+a remote system, in the case where their parent need not (or needs to not) be
+usable.
+For example, if a filesystem contains sensitive data, and it has clones where
+that sensitive data has been secured or replaced with dummy data, redacted sends
+can be used to replicate the secured data without replicating the original
+sensitive data, while still sharing all possible blocks.
+A snapshot that has been redacted with respect to a set of snapshots will
+contain all blocks referenced by at least one snapshot in the set, but will
+contain none of the blocks referenced by none of the snapshots in the set.
+In other words, if all snapshots in the set have modified a given block in the
+parent, that block will not be sent; but if one or more snapshots have not
+modified a block in the parent, they will still reference the parent's block, so
+that block will be sent.
+Note that only user data will be redacted.
+.Pp
+When the redacted send stream is received, we will generate a redacted
+snapshot.
+Due to the nature of redaction, a redacted dataset can only be used in the
+following ways:
+.Bl -enum -width "a."
+.It
+To receive, as a clone, an incremental send from the original snapshot to one
+of the snapshots it was redacted with respect to.
+In this case, the stream will produce a valid dataset when received because all
+blocks that were redacted in the parent are guaranteed to be present in the
+child's send stream.
+This use case will produce a normal snapshot, which can be used just like other
+snapshots.
+.
+.It
+To receive an incremental send from the original snapshot to something
+redacted with respect to a subset of the set of snapshots the initial snapshot
+was redacted with respect to.
+In this case, each block that was redacted in the original is still redacted
+(redacting with respect to additional snapshots causes less data to be redacted
+(because the snapshots define what is permitted, and everything else is
+redacted)).
+This use case will produce a new redacted snapshot.
+.It
+To receive an incremental send from a redaction bookmark of the original
+snapshot that was created when redacting with respect to a subset of the set of
+snapshots the initial snapshot was created with respect to
+anything else.
+A send stream from such a redaction bookmark will contain all of the blocks
+necessary to fill in any redacted data, should it be needed, because the sending
+system is aware of what blocks were originally redacted.
+This will either produce a normal snapshot or a redacted one, depending on
+whether the new send stream is redacted.
+.It
+To receive an incremental send from a redacted version of the initial
+snapshot that is redacted with respect to a subject of the set of snapshots the
+initial snapshot was created with respect to.
+A send stream from a compatible redacted dataset will contain all of the blocks
+necessary to fill in any redacted data.
+This will either produce a normal snapshot or a redacted one, depending on
+whether the new send stream is redacted.
+.It
+To receive a full send as a clone of the redacted snapshot.
+Since the stream is a full send, it definitionally contains all the data needed
+to create a new dataset.
+This use case will either produce a normal snapshot or a redacted one, depending
+on whether the full send stream was redacted.
+.El
+.Pp
+These restrictions are detected and enforced by
+.Nm zfs Cm receive ;
+a redacted send stream will contain the list of snapshots that the stream is
+redacted with respect to.
+These are stored with the redacted snapshot, and are used to detect and
+correctly handle the cases above.
+Note that for technical reasons,
+raw sends and redacted sends cannot be combined at this time.
+.It Xo
+.Nm zfs
+.Cm send
+.Op Fl PVenv
+.Fl t
+.Ar receive_resume_token
+.Xc
+Creates a send stream which resumes an interrupted receive.
+The
+.Ar receive_resume_token
+is the value of this property on the filesystem or volume that was being
+received into.
+See the documentation for
+.Nm zfs Cm receive Fl s
+for more details.
+.It Xo
+.Nm zfs
+.Cm send
+.Op Fl PVnv
+.Op Fl i Ar snapshot Ns | Ns Ar bookmark
+.Fl S
+.Ar filesystem
+.Xc
+Generate a send stream from a dataset that has been partially received.
+.Bl -tag -width "-L"
+.It Fl S , -saved
+This flag requires that the specified filesystem previously received a resumable
+send that did not finish and was interrupted.
+In such scenarios this flag
+enables the user to send this partially received state.
+Using this flag will always use the last fully received snapshot
+as the incremental source if it exists.
+.El
+.It Xo
+.Nm zfs
+.Cm redact
+.Ar snapshot redaction_bookmark
+.Ar redaction_snapshot Ns …
+.Xc
+Generate a new redaction bookmark.
+In addition to the typical bookmark information, a redaction bookmark contains
+the list of redacted blocks and the list of redaction snapshots specified.
+The redacted blocks are blocks in the snapshot which are not referenced by any
+of the redaction snapshots.
+These blocks are found by iterating over the metadata in each redaction snapshot
+to determine what has been changed since the target snapshot.
+Redaction is designed to support redacted zfs sends; see the entry for
+.Nm zfs Cm send
+for more information on the purpose of this operation.
+If a redact operation fails partway through (due to an error or a system
+failure), the redaction can be resumed by rerunning the same command.
+.El
+.Ss Redaction
+ZFS has support for a limited version of data subsetting, in the form of
+redaction.
+Using the
+.Nm zfs Cm redact
+command, a
+.Sy redaction bookmark
+can be created that stores a list of blocks containing sensitive information.
+When provided to
+.Nm zfs Cm send ,
+this causes a
+.Sy redacted send
+to occur.
+Redacted sends omit the blocks containing sensitive information,
+replacing them with REDACT records.
+When these send streams are received, a
+.Sy redacted dataset
+is created.
+A redacted dataset cannot be mounted by default, since it is incomplete.
+It can be used to receive other send streams.
+In this way datasets can be used for data backup and replication,
+with all the benefits that zfs send and receive have to offer,
+while protecting sensitive information from being
+stored on less-trusted machines or services.
+.Pp
+For the purposes of redaction, there are two steps to the process.
+A redact step, and a send/receive step.
+First, a redaction bookmark is created.
+This is done by providing the
+.Nm zfs Cm redact
+command with a parent snapshot, a bookmark to be created, and a number of
+redaction snapshots.
+These redaction snapshots must be descendants of the parent snapshot,
+and they should modify data that is considered sensitive in some way.
+Any blocks of data modified by all of the redaction snapshots will
+be listed in the redaction bookmark, because it represents the truly sensitive
+information.
+When it comes to the send step, the send process will not send
+the blocks listed in the redaction bookmark, instead replacing them with
+REDACT records.
+When received on the target system, this will create a
+redacted dataset, missing the data that corresponds to the blocks in the
+redaction bookmark on the sending system.
+The incremental send streams from
+the original parent to the redaction snapshots can then also be received on
+the target system, and this will produce a complete snapshot that can be used
+normally.
+Incrementals from one snapshot on the parent filesystem and another
+can also be done by sending from the redaction bookmark, rather than the
+snapshots themselves.
+.Pp
+In order to make the purpose of the feature more clear, an example is provided.
+Consider a zfs filesystem containing four files.
+These files represent information for an online shopping service.
+One file contains a list of usernames and passwords, another contains purchase histories,
+a third contains click tracking data, and a fourth contains user preferences.
+The owner of this data wants to make it available for their development teams to
+test against, and their market research teams to do analysis on.
+The development teams need information about user preferences and the click
+tracking data, while the market research teams need information about purchase
+histories and user preferences.
+Neither needs access to the usernames and passwords.
+However, because all of this data is stored in one ZFS filesystem,
+it must all be sent and received together.
+In addition, the owner of the data
+wants to take advantage of features like compression, checksumming, and
+snapshots, so they do want to continue to use ZFS to store and transmit their data.
+Redaction can help them do so.
+First, they would make two clones of a snapshot of the data on the source.
+In one clone, they create the setup they want their market research team to see;
+they delete the usernames and passwords file,
+and overwrite the click tracking data with dummy information.
+In another, they create the setup they want the development teams
+to see, by replacing the passwords with fake information and replacing the
+purchase histories with randomly generated ones.
+They would then create a redaction bookmark on the parent snapshot,
+using snapshots on the two clones as redaction snapshots.
+The parent can then be sent, redacted, to the target
+server where the research and development teams have access.
+Finally, incremental sends from the parent snapshot to each of the clones can be sent
+to and received on the target server; these snapshots are identical to the
+ones on the source, and are ready to be used, while the parent snapshot on the
+target contains none of the username and password data present on the source,
+because it was removed by the redacted send operation.
+.
+.Sh SEE ALSO
+.Xr zfs-bookmark 8 ,
+.Xr zfs-receive 8 ,
+.Xr zfs-redact 8 ,
+.Xr zfs-snapshot 8

diff --git a/zfs/man/man8/zfs-set.8 b/zfs/man/man8/zfs-set.8
new file mode 100644
index 0000000..ccd90f0
--- /dev/null
+++ b/zfs/man/man8/zfs-set.8

@@ -0,0 +1,183 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 2, 2021
+.Dt ZFS-SET 8
+.Os
+.
+.Sh NAME
+.Nm zfs-set
+.Nd set properties on ZFS datasets
+.Sh SYNOPSIS
+.Nm zfs
+.Cm set
+.Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns …
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns …
+.Nm zfs
+.Cm get
+.Op Fl r Ns | Ns Fl d Ar depth
+.Op Fl Hp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar source Ns Oo , Ns Ar source Oc Ns … Oc
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Cm all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns …
+.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns | Ns Ar bookmark Oc Ns …
+.Nm zfs
+.Cm inherit
+.Op Fl rS
+.Ar property Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns …
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm set
+.Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns …
+.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns …
+.Xc
+Only some properties can be edited.
+See
+.Xr zfsprops 7
+for more information on what properties can be set and acceptable
+values.
+Numeric values can be specified as exact values, or in a human-readable form
+with a suffix of
+.Sy B , K , M , G , T , P , E , Z
+.Po for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes,
+or zettabytes, respectively
+.Pc .
+User properties can be set on snapshots.
+For more information, see the
+.Em User Properties
+section of
+.Xr zfsprops 7 .
+.It Xo
+.Nm zfs
+.Cm get
+.Op Fl r Ns | Ns Fl d Ar depth
+.Op Fl Hp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar source Ns Oo , Ns Ar source Oc Ns … Oc
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Cm all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns …
+.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns | Ns Ar bookmark Oc Ns …
+.Xc
+Displays properties for the given datasets.
+If no datasets are specified, then the command displays properties for all
+datasets on the system.
+For each property, the following columns are displayed:
+.Bl -tag -compact -offset 4n -width "property"
+.It Sy name
+Dataset name
+.It Sy property
+Property name
+.It Sy value
+Property value
+.It Sy source
+Property source
+.Sy local , default , inherited , temporary , received , No or Sy - Pq none .
+.El
+.Pp
+All columns are displayed by default, though this can be controlled by using the
+.Fl o
+option.
+This command takes a comma-separated list of properties as described in the
+.Sx Native Properties
+and
+.Sx User Properties
+sections of
+.Xr zfsprops 7 .
+.Pp
+The value
+.Sy all
+can be used to display all properties that apply to the given dataset's type
+.Pq Sy filesystem , volume , snapshot , No or Sy bookmark .
+.Bl -tag -width "-s source"
+.It Fl H
+Display output in a form more easily parsed by scripts.
+Any headers are omitted, and fields are explicitly separated by a single tab
+instead of an arbitrary amount of space.
+.It Fl d Ar depth
+Recursively display any children of the dataset, limiting the recursion to
+.Ar depth .
+A depth of
+.Sy 1
+will display only the dataset and its direct children.
+.It Fl o Ar field
+A comma-separated list of columns to display, defaults to
+.Sy name , Ns Sy property , Ns Sy value , Ns Sy source .
+.It Fl p
+Display numbers in parsable
+.Pq exact
+values.
+.It Fl r
+Recursively display properties for any children.
+.It Fl s Ar source
+A comma-separated list of sources to display.
+Those properties coming from a source other than those in this list are ignored.
+Each source must be one of the following:
+.Sy local , default , inherited , temporary , received , No or Sy none .
+The default value is all sources.
+.It Fl t Ar type
+A comma-separated list of types to display, where
+.Ar type
+is one of
+.Sy filesystem , snapshot , volume , bookmark , No or Sy all .
+.El
+.It Xo
+.Nm zfs
+.Cm inherit
+.Op Fl rS
+.Ar property Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns …
+.Xc
+Clears the specified property, causing it to be inherited from an ancestor,
+restored to default if no ancestor has the property set, or with the
+.Fl S
+option reverted to the received value if one exists.
+See
+.Xr zfsprops 7
+for a listing of default values, and details on which properties can be
+inherited.
+.Bl -tag -width "-r"
+.It Fl r
+Recursively inherit the given property for all children.
+.It Fl S
+Revert the property to the received value, if one exists;
+otherwise, for non-inheritable properties, to the default;
+otherwise, operate as if the
+.Fl S
+option was not specified.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zfsprops 7 ,
+.Xr zfs-list 8

diff --git a/zfs/man/man8/zfs-share.8 b/zfs/man/man8/zfs-share.8
new file mode 100644
index 0000000..89121ea
--- /dev/null
+++ b/zfs/man/man8/zfs-share.8

@@ -0,0 +1,100 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 17, 2021
+.Dt ZFS-SHARE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-share
+.Nd share and unshare ZFS filesystems
+.Sh SYNOPSIS
+.Nm zfs
+.Cm share
+.Op Fl l
+.Fl a Ns | Ns Ar filesystem
+.Nm zfs
+.Cm unshare
+.Fl a Ns | Ns Ar filesystem Ns | Ns Ar mountpoint
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm share
+.Op Fl l
+.Fl a Ns | Ns Ar filesystem
+.Xc
+Shares available ZFS file systems.
+.Bl -tag -width "-a"
+.It Fl l
+Load keys for encrypted filesystems as they are being mounted.
+This is equivalent to executing
+.Nm zfs Cm load-key
+on each encryption root before mounting it.
+Note that if a filesystem has
+.Sy keylocation Ns = Ns Sy prompt ,
+this will cause the terminal to interactively block after asking for the key.
+.It Fl a
+Share all available ZFS file systems.
+Invoked automatically as part of the boot process.
+.It Ar filesystem
+Share the specified filesystem according to the
+.Sy sharenfs
+and
+.Sy sharesmb
+properties.
+File systems are shared when the
+.Sy sharenfs
+or
+.Sy sharesmb
+property is set.
+.El
+.It Xo
+.Nm zfs
+.Cm unshare
+.Fl a Ns | Ns Ar filesystem Ns | Ns Ar mountpoint
+.Xc
+Unshares currently shared ZFS file systems.
+.Bl -tag -width "-a"
+.It Fl a
+Unshare all available ZFS file systems.
+Invoked automatically as part of the shutdown process.
+.It Ar filesystem Ns | Ns Ar mountpoint
+Unshare the specified filesystem.
+The command can also be given a path to a ZFS file system shared on the system.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr exports 5 ,
+.Xr smb.conf 5 ,
+.Xr zfsprops 7

diff --git a/zfs/man/man8/zfs-snapshot.8 b/zfs/man/man8/zfs-snapshot.8
new file mode 100644
index 0000000..225123f
--- /dev/null
+++ b/zfs/man/man8/zfs-snapshot.8

@@ -0,0 +1,76 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd May 27, 2021
+.Dt ZFS-SNAPSHOT 8
+.Os
+.
+.Sh NAME
+.Nm zfs-snapshot
+.Nd create snapshots of ZFS datasets
+.Sh SYNOPSIS
+.Nm zfs
+.Cm snapshot
+.Op Fl r
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Ar dataset Ns @ Ns Ar snapname Ns …
+.
+.Sh DESCRIPTION
+All previous modifications by successful system calls to the file system are
+part of the snapshots.
+Snapshots are taken atomically, so that all snapshots correspond to the same
+moment in time.
+.Nm zfs Cm snap
+can be used as an alias for
+.Nm zfs Cm snapshot .
+See the
+.Sx Snapshots
+section of
+.Xr zfsconcepts 7
+for details.
+.Bl -tag -width "-o"
+.It Fl o Ar property Ns = Ns Ar value
+Set the specified property; see
+.Nm zfs Cm create
+for details.
+.It Fl r
+Recursively create snapshots of all descendent datasets
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-bookmark 8 ,
+.Xr zfs-clone 8 ,
+.Xr zfs-destroy 8 ,
+.Xr zfs-diff 8 ,
+.Xr zfs-hold 8 ,
+.Xr zfs-rename 8 ,
+.Xr zfs-rollback 8 ,
+.Xr zfs-send 8

diff --git a/zfs/man/man8/zfs-unallow.8 b/zfs/man/man8/zfs-unallow.8
new file mode 120000
index 0000000..8886f33
--- /dev/null
+++ b/zfs/man/man8/zfs-unallow.8

@@ -0,0 +1 @@
+zfs-allow.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-unjail.8 b/zfs/man/man8/zfs-unjail.8
new file mode 120000
index 0000000..04cc05a
--- /dev/null
+++ b/zfs/man/man8/zfs-unjail.8

@@ -0,0 +1 @@
+zfs-jail.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-unload-key.8 b/zfs/man/man8/zfs-unload-key.8
new file mode 120000
index 0000000..d027a41
--- /dev/null
+++ b/zfs/man/man8/zfs-unload-key.8

@@ -0,0 +1 @@
+zfs-load-key.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-unmount.8 b/zfs/man/man8/zfs-unmount.8
new file mode 120000
index 0000000..be0d9db
--- /dev/null
+++ b/zfs/man/man8/zfs-unmount.8

@@ -0,0 +1 @@
+zfs-mount.8
\ No newline at end of file

diff --git a/zfs/man/man8/zfs-upgrade.8 b/zfs/man/man8/zfs-upgrade.8
new file mode 100644
index 0000000..f3620fa
--- /dev/null
+++ b/zfs/man/man8/zfs-upgrade.8

@@ -0,0 +1,103 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 30, 2019
+.Dt ZFS-UPGRADE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-upgrade
+.Nd manage on-disk version of ZFS filesystems
+.Sh SYNOPSIS
+.Nm zfs
+.Cm upgrade
+.Nm zfs
+.Cm upgrade
+.Fl v
+.Nm zfs
+.Cm upgrade
+.Op Fl r
+.Op Fl V Ar version
+.Fl a Ns | Ns Ar filesystem
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm upgrade
+.Xc
+Displays a list of file systems that are not the most recent version.
+.It Xo
+.Nm zfs
+.Cm upgrade
+.Fl v
+.Xc
+Displays a list of currently supported file system versions.
+.It Xo
+.Nm zfs
+.Cm upgrade
+.Op Fl r
+.Op Fl V Ar version
+.Fl a Ns | Ns Ar filesystem
+.Xc
+Upgrades file systems to a new on-disk version.
+Once this is done, the file systems will no longer be accessible on systems
+running older versions of ZFS.
+.Nm zfs Cm send
+streams generated from new snapshots of these file systems cannot be accessed on
+systems running older versions of ZFS.
+.Pp
+In general, the file system version is independent of the pool version.
+See
+.Xr zpool-features 7
+for information on features of ZFS storage pools.
+.Pp
+In some cases, the file system version and the pool version are interrelated and
+the pool version must be upgraded before the file system version can be
+upgraded.
+.Bl -tag -width "filesystem"
+.It Fl V Ar version
+Upgrade to
+.Ar version .
+If not specified, upgrade to the most recent version.
+This
+option can only be used to increase the version number, and only up to the most
+recent version supported by this version of ZFS.
+.It Fl a
+Upgrade all file systems on all imported pools.
+.It Ar filesystem
+Upgrade the specified file system.
+.It Fl r
+Upgrade the specified file system and all descendent file systems.
+.El
+.El
+.Sh SEE ALSO
+.Xr zpool-upgrade 8

diff --git a/zfs/man/man8/zfs-userspace.8 b/zfs/man/man8/zfs-userspace.8
new file mode 100644
index 0000000..b7bd61b
--- /dev/null
+++ b/zfs/man/man8/zfs-userspace.8

@@ -0,0 +1,187 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\"
+.Dd June 30, 2019
+.Dt ZFS-USERSPACE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-userspace
+.Nd display space and quotas of ZFS dataset
+.Sh SYNOPSIS
+.Nm zfs
+.Cm userspace
+.Op Fl Hinp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar field Oc Ns …
+.Oo Fl S Ar field Oc Ns …
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar snapshot Ns | Ns Ar path
+.Nm zfs
+.Cm groupspace
+.Op Fl Hinp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar field Oc Ns …
+.Oo Fl S Ar field Oc Ns …
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar snapshot Ns | Ns Ar path
+.Nm zfs
+.Cm projectspace
+.Op Fl Hp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar field Oc Ns …
+.Oo Fl S Ar field Oc Ns …
+.Ar filesystem Ns | Ns Ar snapshot Ns | Ns Ar path
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm userspace
+.Op Fl Hinp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar field Oc Ns …
+.Oo Fl S Ar field Oc Ns …
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar snapshot Ns | Ns Ar path
+.Xc
+Displays space consumed by, and quotas on, each user in the specified filesystem,
+snapshot, or path.
+If a path is given, the filesystem that contains that path will be used.
+This corresponds to the
+.Sy userused@ Ns Em user ,
+.Sy userobjused@ Ns Em user ,
+.Sy userquota@ Ns Em user ,
+and
+.Sy userobjquota@ Ns Em user
+properties.
+.Bl -tag -width "-S field"
+.It Fl H
+Do not print headers, use tab-delimited output.
+.It Fl S Ar field
+Sort by this field in reverse order.
+See
+.Fl s .
+.It Fl i
+Translate SID to POSIX ID.
+The POSIX ID may be ephemeral if no mapping exists.
+Normal POSIX interfaces
+.Pq like Xr stat 2 , Nm ls Fl l
+perform this translation, so the
+.Fl i
+option allows the output from
+.Nm zfs Cm userspace
+to be compared directly with those utilities.
+However,
+.Fl i
+may lead to confusion if some files were created by an SMB user before a
+SMB-to-POSIX name mapping was established.
+In such a case, some files will be owned by the SMB entity and some by the POSIX
+entity.
+However, the
+.Fl i
+option will report that the POSIX entity has the total usage and quota for both.
+.It Fl n
+Print numeric ID instead of user/group name.
+.It Fl o Ar field Ns Oo , Ns Ar field Oc Ns …
+Display only the specified fields from the following set:
+.Sy type ,
+.Sy name ,
+.Sy used ,
+.Sy quota .
+The default is to display all fields.
+.It Fl p
+Use exact
+.Pq parsable
+numeric output.
+.It Fl s Ar field
+Sort output by this field.
+The
+.Fl s
+and
+.Fl S
+flags may be specified multiple times to sort first by one field, then by
+another.
+The default is
+.Fl s Sy type Fl s Sy name .
+.It Fl t Ar type Ns Oo , Ns Ar type Oc Ns …
+Print only the specified types from the following set:
+.Sy all ,
+.Sy posixuser ,
+.Sy smbuser ,
+.Sy posixgroup ,
+.Sy smbgroup .
+The default is
+.Fl t Sy posixuser , Ns Sy smbuser .
+The default can be changed to include group types.
+.El
+.It Xo
+.Nm zfs
+.Cm groupspace
+.Op Fl Hinp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar field Oc Ns …
+.Oo Fl S Ar field Oc Ns …
+.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns … Oc
+.Ar filesystem Ns | Ns Ar snapshot
+.Xc
+Displays space consumed by, and quotas on, each group in the specified
+filesystem or snapshot.
+This subcommand is identical to
+.Cm userspace ,
+except that the default types to display are
+.Fl t Sy posixgroup , Ns Sy smbgroup .
+.It Xo
+.Nm zfs
+.Cm projectspace
+.Op Fl Hp
+.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns … Oc
+.Oo Fl s Ar field Oc Ns …
+.Oo Fl S Ar field Oc Ns …
+.Ar filesystem Ns | Ns Ar snapshot Ns | Ns Ar path
+.Xc
+Displays space consumed by, and quotas on, each project in the specified
+filesystem or snapshot.
+This subcommand is identical to
+.Cm userspace ,
+except that the project identifier is a numeral, not a name.
+So need neither the option
+.Fl i
+for SID to POSIX ID nor
+.Fl n
+for numeric ID, nor
+.Fl t
+for types.
+.El
+.
+.Sh SEE ALSO
+.Xr zfsprops 7 ,
+.Xr zfs-set 8

diff --git a/zfs/man/man8/zfs-wait.8 b/zfs/man/man8/zfs-wait.8
new file mode 100644
index 0000000..81bc156
--- /dev/null
+++ b/zfs/man/man8/zfs-wait.8

@@ -0,0 +1,65 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 31, 2021
+.Dt ZFS-WAIT 8
+.Os
+.
+.Sh NAME
+.Nm zfs-wait
+.Nd wait for activity in ZFS filesystem to stop
+.Sh SYNOPSIS
+.Nm zfs
+.Cm wait
+.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns …
+.Ar filesystem
+.
+.Sh DESCRIPTION
+Waits until all background activity of the given types has ceased in the given
+filesystem.
+The activity could cease because it has completed or because the filesystem has
+been destroyed or unmounted.
+If no activities are specified, the command waits until background activity of
+every type listed below has ceased.
+If there is no activity of the given types in progress, the command returns
+immediately.
+.Pp
+These are the possible values for
+.Ar activity ,
+along with what each one waits for:
+.Bl -tag -compact -offset Ds -width "deleteq"
+.It Sy deleteq
+The filesystem's internal delete queue to empty
+.El
+.Pp
+Note that the internal delete queue does not finish draining until
+all large files have had time to be fully destroyed and all open file
+handles to unlinked files are closed.
+.
+.Sh SEE ALSO
+.Xr lsof 8

diff --git a/zfs/man/man8/zfs.8 b/zfs/man/man8/zfs.8
index ec15e36..23220b7 100644
--- a/zfs/man/man8/zfs.8
+++ b/zfs/man/man8/zfs.8

@@ -18,318 +18,40 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
 .\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
+.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
+.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
 .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
 .\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
 .\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
 .\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
+.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
+.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
 .\" Copyright 2019 Richard Laager. All rights reserved.
 .\" Copyright 2018 Nexenta Systems, Inc.
-.\" Copyright 2018 Joyent, Inc.
+.\" Copyright 2019 Joyent, Inc.
 .\"
-.Dd April 30, 2019
-.Dt ZFS 8 SMM
-.Os Linux
+.Dd June 30, 2019
+.Dt ZFS 8
+.Os
+.
 .Sh NAME
 .Nm zfs
-.Nd configures ZFS file systems
+.Nd configure ZFS datasets
 .Sh SYNOPSIS
 .Nm
 .Fl ?V
 .Nm
-.Cm create
-.Op Fl p
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Ar filesystem
-.Nm
-.Cm create
-.Op Fl ps
-.Op Fl b Ar blocksize
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Fl V Ar size Ar volume
-.Nm
-.Cm destroy
-.Op Fl Rfnprv
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm destroy
-.Op Fl Rdnprv
-.Ar filesystem Ns | Ns Ar volume Ns @ Ns Ar snap Ns
-.Oo % Ns Ar snap Ns Oo , Ns Ar snap Ns Oo % Ns Ar snap Oc Oc Oc Ns ...
-.Nm
-.Cm destroy
-.Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark
-.Nm
-.Cm snapshot
-.Op Fl r
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Ar filesystem Ns @ Ns Ar snapname Ns | Ns Ar volume Ns @ Ns Ar snapname Ns ...
-.Nm
-.Cm rollback
-.Op Fl Rfr
-.Ar snapshot
-.Nm
-.Cm clone
-.Op Fl p
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Ar snapshot Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm promote
-.Ar clone-filesystem
-.Nm
-.Cm rename
-.Op Fl f
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Nm
-.Cm rename
-.Op Fl fp
-.Ar filesystem Ns | Ns Ar volume
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm rename
-.Fl r
-.Ar snapshot Ar snapshot
-.Nm
-.Cm list
-.Op Fl r Ns | Ns Fl d Ar depth
-.Op Fl Hp
-.Oo Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... Oc
-.Oo Fl s Ar property Oc Ns ...
-.Oo Fl S Ar property Oc Ns ...
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns ...
-.Nm
-.Cm set
-.Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
-.Nm
-.Cm get
-.Op Fl r Ns | Ns Fl d Ar depth
-.Op Fl Hp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar source Ns Oo , Ns Ar source Oc Ns ... Oc
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Cm all | Ar property Ns Oo , Ns Ar property Oc Ns ...
-.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns | Ns Ar bookmark Oc Ns ...
-.Nm
-.Cm inherit
-.Op Fl rS
-.Ar property Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
-.Nm
-.Cm upgrade
-.Nm
-.Cm upgrade
-.Fl v
-.Nm
-.Cm upgrade
-.Op Fl r
-.Op Fl V Ar version
-.Fl a | Ar filesystem
-.Nm
-.Cm userspace
-.Op Fl Hinp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar field Oc Ns ...
-.Oo Fl S Ar field Oc Ns ...
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar snapshot
-.Nm
-.Cm groupspace
-.Op Fl Hinp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar field Oc Ns ...
-.Oo Fl S Ar field Oc Ns ...
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar snapshot
-.Nm
-.Cm projectspace
-.Op Fl Hp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar field Oc Ns ...
-.Oo Fl S Ar field Oc Ns ...
-.Ar filesystem Ns | Ns Ar snapshot
-.Nm
-.Cm project
-.Oo Fl d Ns | Ns Fl r Ns Oc
-.Ar file Ns | Ns Ar directory Ns ...
-.Nm
-.Cm project
-.Fl C
-.Oo Fl kr Ns Oc
-.Ar file Ns | Ns Ar directory Ns ...
-.Nm
-.Cm project
-.Fl c
-.Oo Fl 0 Ns Oc
-.Oo Fl d Ns | Ns Fl r Ns Oc
-.Op Fl p Ar id
-.Ar file Ns | Ns Ar directory Ns ...
-.Nm
-.Cm project
-.Op Fl p Ar id
-.Oo Fl rs Ns Oc
-.Ar file Ns | Ns Ar directory Ns ...
-.Nm
-.Cm mount
-.Nm
-.Cm mount
-.Op Fl Olv
-.Op Fl o Ar options
-.Fl a | Ar filesystem
-.Nm
-.Cm unmount
-.Op Fl f
-.Fl a | Ar filesystem Ns | Ns Ar mountpoint
-.Nm
-.Cm share
-.Fl a | Ar filesystem
-.Nm
-.Cm unshare
-.Fl a | Ar filesystem Ns | Ns Ar mountpoint
-.Nm
-.Cm bookmark
-.Ar snapshot bookmark
-.Nm
-.Cm send
-.Op Fl DLPRbcehnpvw
-.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
-.Ar snapshot
-.Nm
-.Cm send
-.Op Fl LPcenvw
-.Op Fl i Ar snapshot Ns | Ns Ar bookmark
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Nm
-.Cm send
-.Op Fl Penv
-.Fl t Ar receive_resume_token
-.Nm
-.Cm receive
-.Op Fl Fhnsuv
-.Op Fl o Sy origin Ns = Ns Ar snapshot
-.Op Fl o Ar property Ns = Ns Ar value
-.Op Fl x Ar property
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Nm
-.Cm receive
-.Op Fl Fhnsuv
-.Op Fl d Ns | Ns Fl e
-.Op Fl o Sy origin Ns = Ns Ar snapshot
-.Op Fl o Ar property Ns = Ns Ar value
-.Op Fl x Ar property
-.Ar filesystem
-.Nm
-.Cm receive
-.Fl A
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm allow
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm allow
-.Op Fl dglu
-.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns ...
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm allow
-.Op Fl dl
-.Fl e Ns | Ns Sy everyone
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm allow
-.Fl c
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm allow
-.Fl s No @ Ns Ar setname
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm unallow
-.Op Fl dglru
-.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns ...
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm unallow
-.Op Fl dlr
-.Fl e Ns | Ns Sy everyone
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm unallow
-.Op Fl r
-.Fl c
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm unallow
-.Op Fl r
-.Fl s @ Ns Ar setname
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Nm
-.Cm hold
-.Op Fl r
-.Ar tag Ar snapshot Ns ...
-.Nm
-.Cm holds
-.Op Fl rH
-.Ar snapshot Ns ...
-.Nm
-.Cm release
-.Op Fl r
-.Ar tag Ar snapshot Ns ...
-.Nm
-.Cm diff
-.Op Fl FHt
-.Ar snapshot Ar snapshot Ns | Ns Ar filesystem
-.Nm
-.Cm program
-.Op Fl jn
-.Op Fl t Ar instruction-limit
-.Op Fl m Ar memory-limit
-.Ar pool script
-.Op --
-.Ar arg1 No ...
-.Nm
-.Cm load-key
-.Op Fl nr
-.Op Fl L Ar keylocation
-.Fl a | Ar filesystem
-.Nm
-.Cm unload-key
-.Op Fl r
-.Fl a | Ar filesystem
-.Nm
-.Cm change-key
-.Op Fl l
-.Op Fl o Ar keylocation Ns = Ns Ar value
-.Op Fl o Ar keyformat Ns = Ns Ar value
-.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
-.Ar filesystem
-.Nm
-.Cm change-key
-.Fl i
-.Op Fl l
-.Ar filesystem
-.Nm
 .Cm version
+.Nm
+.Cm subcommand
+.Op Ar arguments
+.
 .Sh DESCRIPTION
 The
 .Nm
@@ -337,23 +59,18 @@
 .Xr zpool 8 .
 A dataset is identified by a unique path within the ZFS namespace.
 For example:
-.Bd -literal
-pool/{filesystem,volume,snapshot}
-.Ed
+.Dl pool/{filesystem,volume,snapshot}
 .Pp
 where the maximum length of a dataset name is
-.Dv MAXNAMELEN
-.Pq 256 bytes
+.Sy MAXNAMELEN Pq 256B
 and the maximum amount of nesting allowed in a path is 50 levels deep.
 .Pp
 A dataset can be one of the following:
-.Bl -tag -width "file system"
+.Bl -tag -offset Ds -width "file system"
 .It Sy file system
-A ZFS dataset of type
-.Sy filesystem
-can be mounted within the standard system namespace and behaves like other file
+Can be mounted within the standard system namespace and behaves like other file
 systems.
-While ZFS file systems are designed to be POSIX compliant, known issues exist
+While ZFS file systems are designed to be POSIX-compliant, known issues exist
 that prevent compliance in some cases.
 Applications that depend on standards conformance might fail due to non-standard
 behavior when checking file system free space.
@@ -370,2091 +87,40 @@
 .It Sy bookmark
 Much like a
 .Sy snapshot ,
-but without the hold on on-disk data. It can be used as the source of a send
-(but not for a receive). It is specified as
+but without the hold on on-disk data.
+It can be used as the source of a send (but not for a receive).
+It is specified as
 .Ar filesystem Ns # Ns Ar name
 or
 .Ar volume Ns # Ns Ar name .
 .El
-.Ss ZFS File System Hierarchy
-A ZFS storage pool is a logical collection of devices that provide space for
-datasets.
-A storage pool is also the root of the ZFS file system hierarchy.
-.Pp
-The root of the pool can be accessed as a file system, such as mounting and
-unmounting, taking snapshots, and setting properties.
-The physical storage characteristics, however, are managed by the
-.Xr zpool 8
-command.
 .Pp
 See
-.Xr zpool 8
-for more information on creating and administering pools.
-.Ss Snapshots
-A snapshot is a read-only copy of a file system or volume.
-Snapshots can be created extremely quickly, and initially consume no additional
-space within the pool.
-As data within the active dataset changes, the snapshot consumes more data than
-would otherwise be shared with the active dataset.
-.Pp
-Snapshots can have arbitrary names.
-Snapshots of volumes can be cloned or rolled back, visibility is determined
-by the
-.Sy snapdev
-property of the parent volume.
-.Pp
-File system snapshots can be accessed under the
-.Pa .zfs/snapshot
-directory in the root of the file system.
-Snapshots are automatically mounted on demand and may be unmounted at regular
-intervals.
-The visibility of the
-.Pa .zfs
-directory can be controlled by the
-.Sy snapdir
-property.
-.Ss Bookmarks
-A bookmark is like a snapshot, a read-only copy of a file system or volume.
-Bookmarks can be created extremely quickly, compared to snapshots, and they
-consume no additional space within the pool. Bookmarks can also have arbitrary
-names, much like snapshots.
-.Pp
-Unlike snapshots, bookmarks can not be accessed through the filesystem in any
-way. From a storage standpoint a bookmark just provides a way to reference
-when a snapshot was created as a distinct object. Bookmarks are initially
-tied to a snapshot, not the filesystem or volume, and they will survive if the
-snapshot itself is destroyed. Since they are very light weight there's little
-incentive to destroy them.
-.Ss Clones
-A clone is a writable volume or file system whose initial contents are the same
-as another dataset.
-As with snapshots, creating a clone is nearly instantaneous, and initially
-consumes no additional space.
-.Pp
-Clones can only be created from a snapshot.
-When a snapshot is cloned, it creates an implicit dependency between the parent
-and child.
-Even though the clone is created somewhere else in the dataset hierarchy, the
-original snapshot cannot be destroyed as long as a clone exists.
-The
-.Sy origin
-property exposes this dependency, and the
-.Cm destroy
-command lists any such dependencies, if they exist.
-.Pp
-The clone parent-child dependency relationship can be reversed by using the
-.Cm promote
-subcommand.
-This causes the
-.Qq origin
-file system to become a clone of the specified file system, which makes it
-possible to destroy the file system that the clone was created from.
-.Ss "Mount Points"
-Creating a ZFS file system is a simple operation, so the number of file systems
-per system is likely to be numerous.
-To cope with this, ZFS automatically manages mounting and unmounting file
-systems without the need to edit the
-.Pa /etc/fstab
-file.
-All automatically managed file systems are mounted by ZFS at boot time.
-.Pp
-By default, file systems are mounted under
-.Pa /path ,
-where
-.Ar path
-is the name of the file system in the ZFS namespace.
-Directories are created and destroyed as needed.
-.Pp
-A file system can also have a mount point set in the
-.Sy mountpoint
-property.
-This directory is created as needed, and ZFS automatically mounts the file
-system when the
-.Nm zfs Cm mount Fl a
-command is invoked
-.Po without editing
-.Pa /etc/fstab
-.Pc .
-The
-.Sy mountpoint
-property can be inherited, so if
-.Em pool/home
-has a mount point of
-.Pa /export/stuff ,
-then
-.Em pool/home/user
-automatically inherits a mount point of
-.Pa /export/stuff/user .
-.Pp
-A file system
-.Sy mountpoint
-property of
-.Sy none
-prevents the file system from being mounted.
-.Pp
-If needed, ZFS file systems can also be managed with traditional tools
-.Po
-.Nm mount ,
-.Nm umount ,
-.Pa /etc/fstab
-.Pc .
-If a file system's mount point is set to
-.Sy legacy ,
-ZFS makes no attempt to manage the file system, and the administrator is
-responsible for mounting and unmounting the file system. Because pools must
-be imported before a legacy mount can succeed, administrators should ensure
-that legacy mounts are only attempted after the zpool import process
-finishes at boot time. For example, on machines using systemd, the mount
-option
-.Pp
-.Nm x-systemd.requires=zfs-import.target
-.Pp
-will ensure that the zfs-import completes before systemd attempts mounting
-the filesystem. See systemd.mount(5) for details.
-.Ss Deduplication
-Deduplication is the process for removing redundant data at the block level,
-reducing the total amount of data stored. If a file system has the
-.Sy dedup
-property enabled, duplicate data blocks are removed synchronously. The result
-is that only unique data is stored and common components are shared among files.
-.Pp
-Deduplicating data is a very resource-intensive operation. It is generally
-recommended that you have at least 1.25 GiB of RAM per 1 TiB of storage when
-you enable deduplication. Calculating the exact requirement depends heavily
-on the type of data stored in the pool.
-.Pp
-Enabling deduplication on an improperly-designed system can result in
-performance issues (slow IO and administrative operations). It can potentially
-lead to problems importing a pool due to memory exhaustion. Deduplication
-can consume significant processing power (CPU) and memory as well as generate
-additional disk IO.
-.Pp
-Before creating a pool with deduplication enabled, ensure that you have planned
-your hardware requirements appropriately and implemented appropriate recovery
-practices, such as regular backups. As an alternative to deduplication
-consider using
-.Sy compression=on ,
-as a less resource-intensive alternative.
-.Ss Native Properties
-Properties are divided into two types, native properties and user-defined
-.Po or
-.Qq user
-.Pc
+.Xr zfsconcepts 7
+for details.
+.
+.Ss Properties
+Properties are divided into two types: native properties and user-defined
+.Pq or Qq user
 properties.
 Native properties either export internal statistics or control ZFS behavior.
 In addition, native properties are either editable or read-only.
 User properties have no effect on ZFS behavior, but you can use them to annotate
 datasets in a way that is meaningful in your environment.
-For more information about user properties, see the
-.Sx User Properties
-section, below.
-.Pp
-Every dataset has a set of properties that export statistics about the dataset
-as well as control various behaviors.
-Properties are inherited from the parent unless overridden by the child.
-Some properties apply only to certain types of datasets
-.Pq file systems, volumes, or snapshots .
-.Pp
-The values of numeric properties can be specified using human-readable suffixes
-.Po for example,
-.Sy k ,
-.Sy KB ,
-.Sy M ,
-.Sy Gb ,
-and so forth, up to
-.Sy Z
-for zettabyte
-.Pc .
-The following are all valid
-.Pq and equal
-specifications:
-.Li 1536M, 1.5g, 1.50GB .
-.Pp
-The values of non-numeric properties are case sensitive and must be lowercase,
-except for
-.Sy mountpoint ,
-.Sy sharenfs ,
-and
-.Sy sharesmb .
-.Pp
-The following native properties consist of read-only statistics about the
-dataset.
-These properties can be neither set, nor inherited.
-Native properties apply to all dataset types unless otherwise noted.
-.Bl -tag -width "usedbyrefreservation"
-.It Sy available
-The amount of space available to the dataset and all its children, assuming that
-there is no other activity in the pool.
-Because space is shared within a pool, availability can be limited by any number
-of factors, including physical pool size, quotas, reservations, or other
-datasets within the pool.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy avail .
-.It Sy compressratio
-For non-snapshots, the compression ratio achieved for the
-.Sy used
-space of this dataset, expressed as a multiplier.
-The
-.Sy used
-property includes descendant datasets, and, for clones, does not include the
-space shared with the origin snapshot.
-For snapshots, the
-.Sy compressratio
-is the same as the
-.Sy refcompressratio
-property.
-Compression can be turned on by running:
-.Nm zfs Cm set Sy compression Ns = Ns Sy on Ar dataset .
-The default value is
-.Sy off .
-.It Sy createtxg
-The transaction group (txg) in which the dataset was created. Bookmarks have
-the same
-.Sy createtxg
-as the snapshot they are initially tied to. This property is suitable for
-ordering a list of snapshots, e.g. for incremental send and receive.
-.It Sy creation
-The time this dataset was created.
-.It Sy clones
-For snapshots, this property is a comma-separated list of filesystems or volumes
-which are clones of this snapshot.
-The clones'
-.Sy origin
-property is this snapshot.
-If the
-.Sy clones
-property is not empty, then this snapshot can not be destroyed
-.Po even with the
-.Fl r
-or
-.Fl f
-options
-.Pc .
-The roles of origin and clone can be swapped by promoting the clone with the
-.Nm zfs Cm promote
-command.
-.It Sy defer_destroy
-This property is
-.Sy on
-if the snapshot has been marked for deferred destroy by using the
-.Nm zfs Cm destroy Fl d
-command.
-Otherwise, the property is
-.Sy off .
-.It Sy encryptionroot
-For encrypted datasets, indicates where the dataset is currently inheriting its
-encryption key from. Loading or unloading a key for the
-.Sy encryptionroot
-will implicitly load / unload the key for any inheriting datasets (see
-.Nm zfs Cm load-key
-and
-.Nm zfs Cm unload-key
-for details).
-Clones will always share an
-encryption key with their origin. See the
-.Sx Encryption
-section for details.
-.It Sy filesystem_count
-The total number of filesystems and volumes that exist under this location in
-the dataset tree.
-This value is only available when a
-.Sy filesystem_limit
-has been set somewhere in the tree under which the dataset resides.
-.It Sy keystatus
-Indicates if an encryption key is currently loaded into ZFS. The possible
-values are
-.Sy none ,
-.Sy available ,
-and
-.Sy unavailable .
-See
-.Nm zfs Cm load-key
-and
-.Nm zfs Cm unload-key .
-.It Sy guid
-The 64 bit GUID of this dataset or bookmark which does not change over its
-entire lifetime. When a snapshot is sent to another pool, the received
-snapshot has the same GUID. Thus, the
-.Sy guid
-is suitable to identify a snapshot across pools.
-.It Sy logicalreferenced
-The amount of space that is
-.Qq logically
-accessible by this dataset.
-See the
-.Sy referenced
-property.
-The logical space ignores the effect of the
-.Sy compression
-and
-.Sy copies
-properties, giving a quantity closer to the amount of data that applications
-see.
-However, it does include space consumed by metadata.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy lrefer .
-.It Sy logicalused
-The amount of space that is
-.Qq logically
-consumed by this dataset and all its descendents.
-See the
-.Sy used
-property.
-The logical space ignores the effect of the
-.Sy compression
-and
-.Sy copies
-properties, giving a quantity closer to the amount of data that applications
-see.
-However, it does include space consumed by metadata.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy lused .
-.It Sy mounted
-For file systems, indicates whether the file system is currently mounted.
-This property can be either
-.Sy yes
-or
-.Sy no .
-.It Sy objsetid
-A unique identifier for this dataset within the pool. Unlike the dataset's
-.Sy guid
-, the
-.Sy objsetid
-of a dataset is not transferred to other pools when the snapshot is copied
-with a send/receive operation.
-The
-.Sy objsetid
-can be reused (for a new datatset) after the dataset is deleted.
-.It Sy origin
-For cloned file systems or volumes, the snapshot from which the clone was
-created.
-See also the
-.Sy clones
-property.
-.It Sy receive_resume_token
-For filesystems or volumes which have saved partially-completed state from
-.Sy zfs receive -s ,
-this opaque token can be provided to
-.Sy zfs send -t
-to resume and complete the
-.Sy zfs receive .
-.It Sy referenced
-The amount of data that is accessible by this dataset, which may or may not be
-shared with other datasets in the pool.
-When a snapshot or clone is created, it initially references the same amount of
-space as the file system or snapshot it was created from, since its contents are
-identical.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy refer .
-.It Sy refcompressratio
-The compression ratio achieved for the
-.Sy referenced
-space of this dataset, expressed as a multiplier.
-See also the
-.Sy compressratio
-property.
-.It Sy snapshot_count
-The total number of snapshots that exist under this location in the dataset
-tree.
-This value is only available when a
-.Sy snapshot_limit
-has been set somewhere in the tree under which the dataset resides.
-.It Sy type
-The type of dataset:
-.Sy filesystem ,
-.Sy volume ,
-or
-.Sy snapshot .
-.It Sy used
-The amount of space consumed by this dataset and all its descendents.
-This is the value that is checked against this dataset's quota and reservation.
-The space used does not include this dataset's reservation, but does take into
-account the reservations of any descendent datasets.
-The amount of space that a dataset consumes from its parent, as well as the
-amount of space that is freed if this dataset is recursively destroyed, is the
-greater of its space used and its reservation.
-.Pp
-The used space of a snapshot
-.Po see the
-.Sx Snapshots
-section
-.Pc
-is space that is referenced exclusively by this snapshot.
-If this snapshot is destroyed, the amount of
-.Sy used
-space will be freed.
-Space that is shared by multiple snapshots isn't accounted for in this metric.
-When a snapshot is destroyed, space that was previously shared with this
-snapshot can become unique to snapshots adjacent to it, thus changing the used
-space of those snapshots.
-The used space of the latest snapshot can also be affected by changes in the
-file system.
-Note that the
-.Sy used
-space of a snapshot is a subset of the
-.Sy written
-space of the snapshot.
-.Pp
-The amount of space used, available, or referenced does not take into account
-pending changes.
-Pending changes are generally accounted for within a few seconds.
-Committing a change to a disk using
-.Xr fsync 2
-or
-.Dv O_SYNC
-does not necessarily guarantee that the space usage information is updated
-immediately.
-.It Sy usedby*
-The
-.Sy usedby*
-properties decompose the
-.Sy used
-properties into the various reasons that space is used.
-Specifically,
-.Sy used No =
-.Sy usedbychildren No +
-.Sy usedbydataset No +
-.Sy usedbyrefreservation No +
-.Sy usedbysnapshots .
-These properties are only available for datasets created on
-.Nm zpool
-.Qo version 13 Qc
-pools.
-.It Sy usedbychildren
-The amount of space used by children of this dataset, which would be freed if
-all the dataset's children were destroyed.
-.It Sy usedbydataset
-The amount of space used by this dataset itself, which would be freed if the
-dataset were destroyed
-.Po after first removing any
-.Sy refreservation
-and destroying any necessary snapshots or descendents
-.Pc .
-.It Sy usedbyrefreservation
-The amount of space used by a
-.Sy refreservation
-set on this dataset, which would be freed if the
-.Sy refreservation
-was removed.
-.It Sy usedbysnapshots
-The amount of space consumed by snapshots of this dataset.
-In particular, it is the amount of space that would be freed if all of this
-dataset's snapshots were destroyed.
-Note that this is not simply the sum of the snapshots'
-.Sy used
-properties because space can be shared by multiple snapshots.
-.It Sy userused Ns @ Ns Em user
-The amount of space consumed by the specified user in this dataset.
-Space is charged to the owner of each file, as displayed by
-.Nm ls Fl l .
-The amount of space charged is displayed by
-.Nm du
-and
-.Nm ls Fl s .
-See the
-.Nm zfs Cm userspace
-subcommand for more information.
-.Pp
-Unprivileged users can access only their own space usage.
-The root user, or a user who has been granted the
-.Sy userused
-privilege with
-.Nm zfs Cm allow ,
-can access everyone's usage.
-.Pp
-The
-.Sy userused Ns @ Ns Em ...
-properties are not displayed by
-.Nm zfs Cm get Sy all .
-The user's name must be appended after the @ symbol, using one of the following
-forms:
-.Bl -bullet -width ""
-.It
-.Em POSIX name
-.Po for example,
-.Sy joe
-.Pc
-.It
-.Em POSIX numeric ID
-.Po for example,
-.Sy 789
-.Pc
-.It
-.Em SID name
-.Po for example,
-.Sy joe.smith@mydomain
-.Pc
-.It
-.Em SID numeric ID
-.Po for example,
-.Sy S-1-123-456-789
-.Pc
-.El
-.Pp
-Files created on Linux always have POSIX owners.
-.It Sy userobjused Ns @ Ns Em user
-The
-.Sy userobjused
-property is similar to
-.Sy userused
-but instead it counts the number of objects consumed by a user. This property
-counts all objects allocated on behalf of the user, it may differ from the
-results of system tools such as
-.Nm df Fl i .
-.Pp
-When the property
-.Sy xattr=on
-is set on a file system additional objects will be created per-file to store
-extended attributes. These additional objects are reflected in the
-.Sy userobjused
-value and are counted against the user's
-.Sy userobjquota .
-When a file system is configured to use
-.Sy xattr=sa
-no additional internal objects are normally required.
-.It Sy userrefs
-This property is set to the number of user holds on this snapshot.
-User holds are set by using the
-.Nm zfs Cm hold
-command.
-.It Sy groupused Ns @ Ns Em group
-The amount of space consumed by the specified group in this dataset.
-Space is charged to the group of each file, as displayed by
-.Nm ls Fl l .
-See the
-.Sy userused Ns @ Ns Em user
-property for more information.
-.Pp
-Unprivileged users can only access their own groups' space usage.
-The root user, or a user who has been granted the
-.Sy groupused
-privilege with
-.Nm zfs Cm allow ,
-can access all groups' usage.
-.It Sy groupobjused Ns @ Ns Em group
-The number of objects consumed by the specified group in this dataset.
-Multiple objects may be charged to the group for each file when extended
-attributes are in use. See the
-.Sy userobjused Ns @ Ns Em user
-property for more information.
-.Pp
-Unprivileged users can only access their own groups' space usage.
-The root user, or a user who has been granted the
-.Sy groupobjused
-privilege with
-.Nm zfs Cm allow ,
-can access all groups' usage.
-.It Sy projectused Ns @ Ns Em project
-The amount of space consumed by the specified project in this dataset. Project
-is identified via the project identifier (ID) that is object-based numeral
-attribute. An object can inherit the project ID from its parent object (if the
-parent has the flag of inherit project ID that can be set and changed via
-.Nm chattr Fl /+P
-or
-.Nm zfs project Fl s )
-when being created. The privileged user can set and change object's project
-ID via
-.Nm chattr Fl p
-or
-.Nm zfs project Fl s
-anytime. Space is charged to the project of each file, as displayed by
-.Nm lsattr Fl p
-or
-.Nm zfs project .
-See the
-.Sy userused Ns @ Ns Em user
-property for more information.
-.Pp
-The root user, or a user who has been granted the
-.Sy projectused
-privilege with
-.Nm zfs allow ,
-can access all projects' usage.
-.It Sy projectobjused Ns @ Ns Em project
-The
-.Sy projectobjused
-is similar to
-.Sy projectused
-but instead it counts the number of objects consumed by project. When the
-property
-.Sy xattr=on
-is set on a fileset, ZFS will create additional objects per-file to store
-extended attributes. These additional objects are reflected in the
-.Sy projectobjused
-value and are counted against the project's
-.Sy projectobjquota .
-When a filesystem is configured to use
-.Sy xattr=sa
-no additional internal objects are required. See the
-.Sy userobjused Ns @ Ns Em user
-property for more information.
-.Pp
-The root user, or a user who has been granted the
-.Sy projectobjused
-privilege with
-.Nm zfs allow ,
-can access all projects' objects usage.
-.It Sy volblocksize
-For volumes, specifies the block size of the volume.
-The
-.Sy blocksize
-cannot be changed once the volume has been written, so it should be set at
-volume creation time.
-The default
-.Sy blocksize
-for volumes is 8 Kbytes.
-Any power of 2 from 512 bytes to 128 Kbytes is valid.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy volblock .
-.It Sy written
-The amount of space
-.Sy referenced
-by this dataset, that was written since the previous snapshot
-.Pq i.e. that is not referenced by the previous snapshot .
-.It Sy written Ns @ Ns Em snapshot
-The amount of
-.Sy referenced
-space written to this dataset since the specified snapshot.
-This is the space that is referenced by this dataset but was not referenced by
-the specified snapshot.
-.Pp
-The
-.Em snapshot
-may be specified as a short snapshot name
-.Po just the part after the
-.Sy @
-.Pc ,
-in which case it will be interpreted as a snapshot in the same filesystem as
-this dataset.
-The
-.Em snapshot
-may be a full snapshot name
-.Po Em filesystem Ns @ Ns Em snapshot Pc ,
-which for clones may be a snapshot in the origin's filesystem
-.Pq or the origin of the origin's filesystem, etc.
-.El
-.Pp
-The following native properties can be used to change the behavior of a ZFS
-dataset.
-.Bl -tag -width ""
-.It Xo
-.Sy aclinherit Ns = Ns Sy discard Ns | Ns Sy noallow Ns | Ns
-.Sy restricted Ns | Ns Sy passthrough Ns | Ns Sy passthrough-x
-.Xc
-Controls how ACEs are inherited when files and directories are created.
-.Bl -tag -width "passthrough-x"
-.It Sy discard
-does not inherit any ACEs.
-.It Sy noallow
-only inherits inheritable ACEs that specify
-.Qq deny
-permissions.
-.It Sy restricted
-default, removes the
-.Sy write_acl
-and
-.Sy write_owner
-permissions when the ACE is inherited.
-.It Sy passthrough
-inherits all inheritable ACEs without any modifications.
-.It Sy passthrough-x
-same meaning as
-.Sy passthrough ,
-except that the
-.Sy owner@ ,
-.Sy group@ ,
-and
-.Sy everyone@
-ACEs inherit the execute permission only if the file creation mode also requests
-the execute bit.
-.El
-.Pp
-When the property value is set to
-.Sy passthrough ,
-files are created with a mode determined by the inheritable ACEs.
-If no inheritable ACEs exist that affect the mode, then the mode is set in
-accordance to the requested mode from the application.
-.Pp
-The
-.Sy aclinherit
-property does not apply to POSIX ACLs.
-.It Sy acltype Ns = Ns Sy off Ns | Ns Sy noacl Ns | Ns Sy posixacl
-Controls whether ACLs are enabled and if so what type of ACL to use.
-.Bl -tag -width "posixacl"
-.It Sy off
-default, when a file system has the
-.Sy acltype
-property set to off then ACLs are disabled.
-.It Sy noacl
-an alias for
-.Sy off
-.It Sy posixacl
-indicates POSIX ACLs should be used. POSIX ACLs are specific to Linux and are
-not functional on other platforms. POSIX ACLs are stored as an extended
-attribute and therefore will not overwrite any existing NFSv4 ACLs which
-may be set.
-.El
-.Pp
-To obtain the best performance when setting
-.Sy posixacl
-users are strongly encouraged to set the
-.Sy xattr=sa
-property. This will result in the POSIX ACL being stored more efficiently on
-disk. But as a consequence, all new extended attributes will only be
-accessible from OpenZFS implementations which support the
-.Sy xattr=sa
-property. See the
-.Sy xattr
-property for more details.
-.It Sy atime Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the access time for files is updated when they are read.
-Turning this property off avoids producing write traffic when reading files and
-can result in significant performance gains, though it might confuse mailers
-and other similar utilities. The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy atime
-and
-.Sy noatime
-mount options. The default value is
-.Sy on .
-See also
-.Sy relatime
-below.
-.It Sy canmount Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy noauto
-If this property is set to
-.Sy off ,
-the file system cannot be mounted, and is ignored by
-.Nm zfs Cm mount Fl a .
-Setting this property to
-.Sy off
-is similar to setting the
-.Sy mountpoint
-property to
-.Sy none ,
-except that the dataset still has a normal
-.Sy mountpoint
-property, which can be inherited.
-Setting this property to
-.Sy off
-allows datasets to be used solely as a mechanism to inherit properties.
-One example of setting
-.Sy canmount Ns = Ns Sy off
-is to have two datasets with the same
-.Sy mountpoint ,
-so that the children of both datasets appear in the same directory, but might
-have different inherited characteristics.
-.Pp
-When set to
-.Sy noauto ,
-a dataset can only be mounted and unmounted explicitly.
-The dataset is not mounted automatically when the dataset is created or
-imported, nor is it mounted by the
-.Nm zfs Cm mount Fl a
-command or unmounted by the
-.Nm zfs Cm unmount Fl a
-command.
-.Pp
-This property is not inherited.
-.It Xo
-.Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns
-.Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns
-.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr
-.Xc
-Controls the checksum used to verify data integrity.
-The default value is
-.Sy on ,
-which automatically selects an appropriate algorithm
-.Po currently,
-.Sy fletcher4 ,
-but this may change in future releases
-.Pc .
-The value
-.Sy off
-disables integrity checking on user data.
-The value
-.Sy noparity
-not only disables integrity but also disables maintaining parity for user data.
-This setting is used internally by a dump device residing on a RAID-Z pool and
-should not be used by any other dataset.
-Disabling checksums is
-.Sy NOT
-a recommended practice.
-.Pp
-The
-.Sy sha512 ,
-.Sy skein ,
-and
-.Sy edonr
-checksum algorithms require enabling the appropriate features on the pool.
-These pool features are not supported by GRUB and must not be used on the
-pool if GRUB needs to access the pool (e.g. for /boot).
-.Pp
-Please see
-.Xr zpool-features 5
-for more information on these algorithms.
-.Pp
-Changing this property affects only newly-written data.
-.It Xo
-.Sy compression Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy gzip Ns | Ns
-.Sy gzip- Ns Em N Ns | Ns Sy lz4 Ns | Ns Sy lzjb Ns | Ns Sy zle
-.Xc
-Controls the compression algorithm used for this dataset.
-.Pp
-Setting compression to
-.Sy on
-indicates that the current default compression algorithm should be used.
-The default balances compression and decompression speed, with compression ratio
-and is expected to work well on a wide variety of workloads.
-Unlike all other settings for this property,
-.Sy on
-does not select a fixed compression type.
-As new compression algorithms are added to ZFS and enabled on a pool, the
-default compression algorithm may change.
-The current default compression algorithm is either
-.Sy lzjb
-or, if the
-.Sy lz4_compress
-feature is enabled,
-.Sy lz4 .
-.Pp
-The
-.Sy lz4
-compression algorithm is a high-performance replacement for the
-.Sy lzjb
-algorithm.
-It features significantly faster compression and decompression, as well as a
-moderately higher compression ratio than
-.Sy lzjb ,
-but can only be used on pools with the
-.Sy lz4_compress
-feature set to
-.Sy enabled .
-See
-.Xr zpool-features 5
-for details on ZFS feature flags and the
-.Sy lz4_compress
-feature.
-.Pp
-The
-.Sy lzjb
-compression algorithm is optimized for performance while providing decent data
-compression.
-.Pp
-The
-.Sy gzip
-compression algorithm uses the same compression as the
-.Xr gzip 1
-command.
-You can specify the
-.Sy gzip
-level by using the value
-.Sy gzip- Ns Em N ,
-where
-.Em N
-is an integer from 1
-.Pq fastest
-to 9
-.Pq best compression ratio .
-Currently,
-.Sy gzip
-is equivalent to
-.Sy gzip-6
-.Po which is also the default for
-.Xr gzip 1
-.Pc .
-.Pp
-The
-.Sy zle
-compression algorithm compresses runs of zeros.
-.Pp
-This property can also be referred to by its shortened column name
-.Sy compress .
-Changing this property affects only newly-written data.
-.Pp
-When any setting except
-.Sy off
-is selected, compression will explicitly check for blocks consisting of only
-zeroes (the NUL byte).  When a zero-filled block is detected, it is stored as
-a hole and not compressed using the indicated compression algorithm.
-.Pp
-Any block being compressed must be no larger than 7/8 of its original size
-after compression, otherwise the compression will not be considered worthwhile
-and the block saved uncompressed. Note that when the logical block is less than
-8 times the disk sector size this effectively reduces the necessary compression
-ratio; for example 8k blocks on disks with 4k disk sectors must compress to 1/2
-or less of their original size.
-.It Xo
-.Sy context Ns = Ns Sy none Ns | Ns
-.Em SELinux_User:SElinux_Role:Selinux_Type:Sensitivity_Level
-.Xc
-This flag sets the SELinux context for all files in the file system under
-a mount point for that file system. See
-.Xr selinux 8
-for more information.
-.It Xo
-.Sy fscontext Ns = Ns Sy none Ns | Ns
-.Em SELinux_User:SElinux_Role:Selinux_Type:Sensitivity_Level
-.Xc
-This flag sets the SELinux context for the file system file system being
-mounted. See
-.Xr selinux 8
-for more information.
-.It Xo
-.Sy defcontext Ns = Ns Sy none Ns | Ns
-.Em SELinux_User:SElinux_Role:Selinux_Type:Sensitivity_Level
-.Xc
-This flag sets the SELinux default context for unlabeled files. See
-.Xr selinux 8
-for more information.
-.It Xo
-.Sy rootcontext Ns = Ns Sy none Ns | Ns
-.Em SELinux_User:SElinux_Role:Selinux_Type:Sensitivity_Level
-.Xc
-This flag sets the SELinux context for the root inode of the file system. See
-.Xr selinux 8
-for more information.
-.It Sy copies Ns = Ns Sy 1 Ns | Ns Sy 2 Ns | Ns Sy 3
-Controls the number of copies of data stored for this dataset.
-These copies are in addition to any redundancy provided by the pool, for
-example, mirroring or RAID-Z.
-The copies are stored on different disks, if possible.
-The space used by multiple copies is charged to the associated file and dataset,
-changing the
-.Sy used
-property and counting against quotas and reservations.
-.Pp
-Changing this property only affects newly-written data.
-Therefore, set this property at file system creation time by using the
-.Fl o Sy copies Ns = Ns Ar N
-option.
-.Pp
-Remember that ZFS will not import a pool with a missing top-level vdev. Do
-.Sy NOT
-create, for example a two-disk striped pool and set
-.Sy copies=2
-on some datasets thinking you have setup redundancy for them. When a disk
-fails you will not be able to import the pool and will have lost all of your
-data.
-.Pp
-Encrypted datasets may not have
-.Sy copies Ns = Ns Em 3
-since the implementation stores some encryption metadata where the third copy
-would normally be.
-.It Sy devices Ns = Ns Sy on Ns | Ns Sy off
-Controls whether device nodes can be opened on this file system.
-The default value is
-.Sy on .
-The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy dev
-and
-.Sy nodev
-mount options.
-.It Xo
-.Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns
-.Sy sha256[,verify] Ns | Ns Sy sha512[,verify] Ns | Ns Sy skein[,verify] Ns | Ns
-.Sy edonr,verify
-.Xc
-Configures deduplication for a dataset. The default value is
-.Sy off .
-The default deduplication checksum is
-.Sy sha256
-(this may change in the future). When
-.Sy dedup
-is enabled, the checksum defined here overrides the
-.Sy checksum
-property. Setting the value to
-.Sy verify
-has the same effect as the setting
-.Sy sha256,verify.
-.Pp
-If set to
-.Sy verify ,
-ZFS will do a byte-to-byte comparsion in case of two blocks having the same
-signature to make sure the block contents are identical. Specifying
-.Sy verify
-is mandatory for the
-.Sy edonr
-algorithm.
-.Pp
-Unless necessary, deduplication should NOT be enabled on a system. See
-.Sx Deduplication
-above.
-.It Xo
-.Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns
-.Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k
-.Xc
-Specifies a compatibility mode or literal value for the size of dnodes in the
-file system. The default value is
-.Sy legacy .
-Setting this property to a value other than
-.Sy legacy
-requires the large_dnode pool feature to be enabled.
-.Pp
-Consider setting
-.Sy dnodesize
-to
-.Sy auto
-if the dataset uses the
-.Sy xattr=sa
-property setting and the workload makes heavy use of extended attributes. This
-may be applicable to SELinux-enabled systems, Lustre servers, and Samba
-servers, for example. Literal values are supported for cases where the optimal
-size is known in advance and for performance testing.
-.Pp
-Leave
-.Sy dnodesize
-set to
-.Sy legacy
-if you need to receive a send stream of this dataset on a pool that doesn't
-enable the large_dnode feature, or if you need to import this pool on a system
-that doesn't support the large_dnode feature.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy dnsize .
-.It Xo
-.Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns
-.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns
-.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm
-.Xc
-Controls the encryption cipher suite (block cipher, key length, and mode) used
-for this dataset. Requires the
-.Sy encryption
-feature to be enabled on the pool.
-Requires a
-.Sy keyformat
-to be set at dataset creation time.
-.Pp
-Selecting
-.Sy encryption Ns = Ns Sy on
-when creating a dataset indicates that the default encryption suite will be
-selected, which is currently
-.Sy aes-256-gcm .
-In order to provide consistent data protection, encryption must be specified at
-dataset creation time and it cannot be changed afterwards.
-.Pp
-For more details and caveats about encryption see the
-.Sy Encryption
-section.
-.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase
-Controls what format the user's encryption key will be provided as. This
-property is only set when the dataset is encrypted.
-.Pp
-Raw keys and hex keys must be 32 bytes long (regardless of the chosen
-encryption suite) and must be randomly generated. A raw key can be generated
-with the following command:
-.Bd -literal
-# dd if=/dev/urandom of=/path/to/output/key bs=32 count=1
-.Ed
-.Pp
-Passphrases must be between 8 and 512 bytes long and will be processed through
-PBKDF2 before being used (see the
-.Sy pbkdf2iters
-property). Even though the
-encryption suite cannot be changed after dataset creation, the keyformat can be
-with
-.Nm zfs Cm change-key .
-.It Xo
-.Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Em </absolute/file/path>
-.Xc
-Controls where the user's encryption key will be loaded from by default for
-commands such as
-.Nm zfs Cm load-key
-and
-.Nm zfs Cm mount Cm -l .
-This property is only set for encrypted datasets which are encryption roots. If
-unspecified, the default is
-.Sy prompt.
-.Pp
-Even though the encryption suite cannot be changed after dataset creation, the
-keylocation can be with either
-.Nm zfs Cm set
-or
-.Nm zfs Cm change-key .
-If
-.Sy prompt
-is selected ZFS will ask for the key at the command prompt when it is required
-to access the encrypted data (see
-.Nm zfs Cm load-key
-for details). This setting will also allow the key to be passed in via STDIN,
-but users should be careful not to place keys which should be kept secret on
-the command line. If a file URI is selected, the key will be loaded from the
-specified absolute file path.
-.It Sy pbkdf2iters Ns = Ns Ar iterations
-Controls the number of PBKDF2 iterations that a
-.Sy passphrase
-encryption key should be run through when processing it into an encryption key.
-This property is only defined when encryption is enabled and a keyformat of
-.Sy passphrase
-is selected. The goal of PBKDF2 is to significantly increase the
-computational difficulty needed to brute force a user's passphrase. This is
-accomplished by forcing the attacker to run each passphrase through a
-computationally expensive hashing function many times before they arrive at the
-resulting key. A user who actually knows the passphrase will only have to pay
-this cost once. As CPUs become better at processing, this number should be
-raised to ensure that a brute force attack is still not possible. The current
-default is
-.Sy 350000
-and the minimum is
-.Sy 100000 .
-This property may be changed with
-.Nm zfs Cm change-key .
-.It Sy exec Ns = Ns Sy on Ns | Ns Sy off
-Controls whether processes can be executed from within this file system.
-The default value is
-.Sy on .
-The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy exec
-and
-.Sy noexec
-mount options.
-.It Sy filesystem_limit Ns = Ns Em count Ns | Ns Sy none
-Limits the number of filesystems and volumes that can exist under this point in
-the dataset tree.
-The limit is not enforced if the user is allowed to change the limit.
-Setting a
-.Sy filesystem_limit
-to
-.Sy on
-a descendent of a filesystem that already has a
-.Sy filesystem_limit
-does not override the ancestor's
-.Sy filesystem_limit ,
-but rather imposes an additional limit.
-This feature must be enabled to be used
-.Po see
-.Xr zpool-features 5
-.Pc .
-.It Sy special_small_blocks Ns = Ns Em size
-This value represents the threshold block size for including small file
-blocks into the special allocation class. Blocks smaller than or equal to this
-value will be assigned to the special allocation class while greater blocks
-will be assigned to the regular class. Valid values are zero or a power of two
-from 512B up to 1M. The default size is 0 which means no small file blocks
-will be allocated in the special class.
-.Pp
-Before setting this property, a special class vdev must be added to the
-pool. See
-.Xr zpool 8
-for more details on the special allocation class.
-.It Sy mountpoint Ns = Ns Pa path Ns | Ns Sy none Ns | Ns Sy legacy
-Controls the mount point used for this file system.
-See the
-.Sx Mount Points
-section for more information on how this property is used.
-.Pp
-When the
-.Sy mountpoint
-property is changed for a file system, the file system and any children that
-inherit the mount point are unmounted.
-If the new value is
-.Sy legacy ,
-then they remain unmounted.
-Otherwise, they are automatically remounted in the new location if the property
-was previously
-.Sy legacy
-or
-.Sy none ,
-or if they were mounted before the property was changed.
-In addition, any shared file systems are unshared and shared in the new
-location.
-.It Sy nbmand Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the file system should be mounted with
-.Sy nbmand
-.Pq Non Blocking mandatory locks .
-This is used for SMB clients.
-Changes to this property only take effect when the file system is umounted and
-remounted.
-See
-.Xr mount 8
-for more information on
-.Sy nbmand
-mounts. This property is not used on Linux.
-.It Sy overlay Ns = Ns Sy off Ns | Ns Sy on
-Allow mounting on a busy directory or a directory which already contains
-files or directories. This is the default mount behavior for Linux file systems.
-For consistency with OpenZFS on other platforms overlay mounts are
-.Sy off
-by default. Set to
-.Sy on
-to enable overlay mounts.
-.It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata
-Controls what is cached in the primary cache
-.Pq ARC .
-If this property is set to
-.Sy all ,
-then both user data and metadata is cached.
-If this property is set to
-.Sy none ,
-then neither user data nor metadata is cached.
-If this property is set to
-.Sy metadata ,
-then only metadata is cached.
-The default value is
-.Sy all .
-.It Sy quota Ns = Ns Em size Ns | Ns Sy none
-Limits the amount of space a dataset and its descendents can consume.
-This property enforces a hard limit on the amount of space used.
-This includes all space consumed by descendents, including file systems and
-snapshots.
-Setting a quota on a descendent of a dataset that already has a quota does not
-override the ancestor's quota, but rather imposes an additional limit.
-.Pp
-Quotas cannot be set on volumes, as the
-.Sy volsize
-property acts as an implicit quota.
-.It Sy snapshot_limit Ns = Ns Em count Ns | Ns Sy none
-Limits the number of snapshots that can be created on a dataset and its
-descendents.
-Setting a
-.Sy snapshot_limit
-on a descendent of a dataset that already has a
-.Sy snapshot_limit
-does not override the ancestor's
-.Sy snapshot_limit ,
-but rather imposes an additional limit.
-The limit is not enforced if the user is allowed to change the limit.
-For example, this means that recursive snapshots taken from the global zone are
-counted against each delegated dataset within a zone.
-This feature must be enabled to be used
-.Po see
-.Xr zpool-features 5
-.Pc .
-.It Sy userquota@ Ns Em user Ns = Ns Em size Ns | Ns Sy none
-Limits the amount of space consumed by the specified user.
-User space consumption is identified by the
-.Sy userspace@ Ns Em user
-property.
-.Pp
-Enforcement of user quotas may be delayed by several seconds.
-This delay means that a user might exceed their quota before the system notices
-that they are over quota and begins to refuse additional writes with the
-.Er EDQUOT
-error message.
-See the
-.Nm zfs Cm userspace
-subcommand for more information.
-.Pp
-Unprivileged users can only access their own groups' space usage.
-The root user, or a user who has been granted the
-.Sy userquota
-privilege with
-.Nm zfs Cm allow ,
-can get and set everyone's quota.
-.Pp
-This property is not available on volumes, on file systems before version 4, or
-on pools before version 15.
-The
-.Sy userquota@ Ns Em ...
-properties are not displayed by
-.Nm zfs Cm get Sy all .
-The user's name must be appended after the
-.Sy @
-symbol, using one of the following forms:
-.Bl -bullet
-.It
-.Em POSIX name
-.Po for example,
-.Sy joe
-.Pc
-.It
-.Em POSIX numeric ID
-.Po for example,
-.Sy 789
-.Pc
-.It
-.Em SID name
-.Po for example,
-.Sy joe.smith@mydomain
-.Pc
-.It
-.Em SID numeric ID
-.Po for example,
-.Sy S-1-123-456-789
-.Pc
-.El
-.Pp
-Files created on Linux always have POSIX owners.
-.It Sy userobjquota@ Ns Em user Ns = Ns Em size Ns | Ns Sy none
-The
-.Sy userobjquota
-is similar to
-.Sy userquota
-but it limits the number of objects a user can create. Please refer to
-.Sy userobjused
-for more information about how objects are counted.
-.It Sy groupquota@ Ns Em group Ns = Ns Em size Ns | Ns Sy none
-Limits the amount of space consumed by the specified group.
-Group space consumption is identified by the
-.Sy groupused@ Ns Em group
-property.
-.Pp
-Unprivileged users can access only their own groups' space usage.
-The root user, or a user who has been granted the
-.Sy groupquota
-privilege with
-.Nm zfs Cm allow ,
-can get and set all groups' quotas.
-.It Sy groupobjquota@ Ns Em group Ns = Ns Em size Ns | Ns Sy none
-The
-.Sy groupobjquota
-is similar to
-.Sy groupquota
-but it limits number of objects a group can consume. Please refer to
-.Sy userobjused
-for more information about how objects are counted.
-.It Sy projectquota@ Ns Em project Ns = Ns Em size Ns | Ns Sy none
-Limits the amount of space consumed by the specified project. Project
-space consumption is identified by the
-.Sy projectused@ Ns Em project
-property. Please refer to
-.Sy projectused
-for more information about how project is identified and set/changed.
-.Pp
-The root user, or a user who has been granted the
-.Sy projectquota
-privilege with
-.Nm zfs allow ,
-can access all projects' quota.
-.It Sy projectobjquota@ Ns Em project Ns = Ns Em size Ns | Ns Sy none
-The
-.Sy projectobjquota
-is similar to
-.Sy projectquota
-but it limits number of objects a project can consume. Please refer to
-.Sy userobjused
-for more information about how objects are counted.
-.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off
-Controls whether this dataset can be modified.
-The default value is
-.Sy off .
-The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy ro
-and
-.Sy rw
-mount options.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy rdonly .
-.It Sy recordsize Ns = Ns Em size
-Specifies a suggested block size for files in the file system.
-This property is designed solely for use with database workloads that access
-files in fixed-size records.
-ZFS automatically tunes block sizes according to internal algorithms optimized
-for typical access patterns.
-.Pp
-For databases that create very large files but access them in small random
-chunks, these algorithms may be suboptimal.
-Specifying a
-.Sy recordsize
-greater than or equal to the record size of the database can result in
-significant performance gains.
-Use of this property for general purpose file systems is strongly discouraged,
-and may adversely affect performance.
-.Pp
-The size specified must be a power of two greater than or equal to 512 and less
-than or equal to 128 Kbytes.
-If the
-.Sy large_blocks
-feature is enabled on the pool, the size may be up to 1 Mbyte.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags.
-.Pp
-Changing the file system's
-.Sy recordsize
-affects only files created afterward; existing files are unaffected.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy recsize .
-.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most
-Controls what types of metadata are stored redundantly.
-ZFS stores an extra copy of metadata, so that if a single block is corrupted,
-the amount of user data lost is limited.
-This extra copy is in addition to any redundancy provided at the pool level
-.Pq e.g. by mirroring or RAID-Z ,
-and is in addition to an extra copy specified by the
-.Sy copies
-property
-.Pq up to a total of 3 copies .
-For example if the pool is mirrored,
-.Sy copies Ns = Ns 2 ,
-and
-.Sy redundant_metadata Ns = Ns Sy most ,
-then ZFS stores 6 copies of most metadata, and 4 copies of data and some
-metadata.
-.Pp
-When set to
-.Sy all ,
-ZFS stores an extra copy of all metadata.
-If a single on-disk block is corrupt, at worst a single block of user data
-.Po which is
-.Sy recordsize
-bytes long
-.Pc
-can be lost.
-.Pp
-When set to
-.Sy most ,
-ZFS stores an extra copy of most types of metadata.
-This can improve performance of random writes, because less metadata must be
-written.
-In practice, at worst about 100 blocks
-.Po of
-.Sy recordsize
-bytes each
-.Pc
-of user data can be lost if a single on-disk block is corrupt.
-The exact behavior of which metadata blocks are stored redundantly may change in
-future releases.
-.Pp
-The default value is
-.Sy all .
-.It Sy refquota Ns = Ns Em size Ns | Ns Sy none
-Limits the amount of space a dataset can consume.
-This property enforces a hard limit on the amount of space used.
-This hard limit does not include space used by descendents, including file
-systems and snapshots.
-.It Sy refreservation Ns = Ns Em size Ns | Ns Sy none Ns | Ns Sy auto
-The minimum amount of space guaranteed to a dataset, not including its
-descendents.
-When the amount of space used is below this value, the dataset is treated as if
-it were taking up the amount of space specified by
-.Sy refreservation .
-The
-.Sy refreservation
-reservation is accounted for in the parent datasets' space used, and counts
-against the parent datasets' quotas and reservations.
-.Pp
-If
-.Sy refreservation
-is set, a snapshot is only allowed if there is enough free pool space outside of
-this reservation to accommodate the current number of
-.Qq referenced
-bytes in the dataset.
-.Pp
-If
-.Sy refreservation
-is set to
-.Sy auto ,
-a volume is thick provisioned
-.Po or
-.Qq not sparse
-.Pc .
-.Sy refreservation Ns = Ns Sy auto
-is only supported on volumes.
-See
-.Sy volsize
-in the
-.Sx Native Properties
-section for more information about sparse volumes.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy refreserv .
-.It Sy relatime Ns = Ns Sy on Ns | Ns Sy off
-Controls the manner in which the access time is updated when
-.Sy atime=on
-is set. Turning this property on causes the access time to be updated relative
-to the modify or change time. Access time is only updated if the previous
-access time was earlier than the current modify or change time or if the
-existing access time hasn't been updated within the past 24 hours. The default
-value is
-.Sy off .
-The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy relatime
-and
-.Sy norelatime
-mount options.
-.It Sy reservation Ns = Ns Em size Ns | Ns Sy none
-The minimum amount of space guaranteed to a dataset and its descendants.
-When the amount of space used is below this value, the dataset is treated as if
-it were taking up the amount of space specified by its reservation.
-Reservations are accounted for in the parent datasets' space used, and count
-against the parent datasets' quotas and reservations.
-.Pp
-This property can also be referred to by its shortened column name,
-.Sy reserv .
-.It Sy secondarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata
-Controls what is cached in the secondary cache
-.Pq L2ARC .
-If this property is set to
-.Sy all ,
-then both user data and metadata is cached.
-If this property is set to
-.Sy none ,
-then neither user data nor metadata is cached.
-If this property is set to
-.Sy metadata ,
-then only metadata is cached.
-The default value is
-.Sy all .
-.It Sy setuid Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the setuid bit is respected for the file system.
-The default value is
-.Sy on .
-The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy suid
-and
-.Sy nosuid
-mount options.
-.It Sy sharesmb Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Em opts
-Controls whether the file system is shared by using
-.Sy Samba USERSHARES
-and what options are to be used. Otherwise, the file system is automatically
-shared and unshared with the
-.Nm zfs Cm share
-and
-.Nm zfs Cm unshare
-commands. If the property is set to on, the
-.Xr net 8
-command is invoked to create a
-.Sy USERSHARE .
-.Pp
-Because SMB shares requires a resource name, a unique resource name is
-constructed from the dataset name. The constructed name is a copy of the
-dataset name except that the characters in the dataset name, which would be
-invalid in the resource name, are replaced with underscore (_) characters.
-Linux does not currently support additional options which might be available
-on Solaris.
-.Pp
-If the
-.Sy sharesmb
-property is set to
-.Sy off ,
-the file systems are unshared.
-.Pp
-The share is created with the ACL (Access Control List) "Everyone:F" ("F"
-stands for "full permissions", ie. read and write permissions) and no guest
-access (which means Samba must be able to authenticate a real user, system
-passwd/shadow, LDAP or smbpasswd based) by default. This means that any
-additional access control (disallow specific user specific access etc) must
-be done on the underlying file system.
-.It Sy sharenfs Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Em opts
-Controls whether the file system is shared via NFS, and what options are to be
-used.
-A file system with a
-.Sy sharenfs
-property of
-.Sy off
-is managed with the
-.Xr exportfs 8
-command and entries in the
-.Em /etc/exports
-file.
-Otherwise, the file system is automatically shared and unshared with the
-.Nm zfs Cm share
-and
-.Nm zfs Cm unshare
-commands.
-If the property is set to
-.Sy on ,
-the dataset is shared using the default options:
-.Pp
-.Em sec=sys,rw,crossmnt,no_subtree_check
-.Pp
-See
-.Xr exports 5
-for the meaning of the default options. Otherwise, the
-.Xr exportfs 8
-command is invoked with options equivalent to the contents of this property.
-.Pp
-When the
-.Sy sharenfs
-property is changed for a dataset, the dataset and any children inheriting the
-property are re-shared with the new options, only if the property was previously
-.Sy off ,
-or if they were shared before the property was changed.
-If the new property is
-.Sy off ,
-the file systems are unshared.
-.It Sy logbias Ns = Ns Sy latency Ns | Ns Sy throughput
-Provide a hint to ZFS about handling of synchronous requests in this dataset.
-If
-.Sy logbias
-is set to
-.Sy latency
-.Pq the default ,
-ZFS will use pool log devices
-.Pq if configured
-to handle the requests at low latency.
-If
-.Sy logbias
-is set to
-.Sy throughput ,
-ZFS will not use configured pool log devices.
-ZFS will instead optimize synchronous operations for global pool throughput and
-efficient use of resources.
-.It Sy snapdev Ns = Ns Sy hidden Ns | Ns Sy visible
-Controls whether the volume snapshot devices under
-.Em /dev/zvol/<pool>
-are hidden or visible. The default value is
-.Sy hidden .
-.It Sy snapdir Ns = Ns Sy hidden Ns | Ns Sy visible
-Controls whether the
-.Pa .zfs
-directory is hidden or visible in the root of the file system as discussed in
-the
-.Sx Snapshots
-section.
-The default value is
-.Sy hidden .
-.It Sy sync Ns = Ns Sy standard Ns | Ns Sy always Ns | Ns Sy disabled
-Controls the behavior of synchronous requests
-.Pq e.g. fsync, O_DSYNC .
-.Sy standard
-is the
-.Tn POSIX
-specified behavior of ensuring all synchronous requests are written to stable
-storage and all devices are flushed to ensure data is not cached by device
-controllers
-.Pq this is the default .
-.Sy always
-causes every file system transaction to be written and flushed before its
-system call returns.
-This has a large performance penalty.
-.Sy disabled
-disables synchronous requests.
-File system transactions are only committed to stable storage periodically.
-This option will give the highest performance.
-However, it is very dangerous as ZFS would be ignoring the synchronous
-transaction demands of applications such as databases or NFS.
-Administrators should only use this option when the risks are understood.
-.It Sy version Ns = Ns Em N Ns | Ns Sy current
-The on-disk version of this file system, which is independent of the pool
-version.
-This property can only be set to later supported versions.
-See the
-.Nm zfs Cm upgrade
-command.
-.It Sy volsize Ns = Ns Em size
-For volumes, specifies the logical size of the volume.
-By default, creating a volume establishes a reservation of equal size.
-For storage pools with a version number of 9 or higher, a
-.Sy refreservation
-is set instead.
-Any changes to
-.Sy volsize
-are reflected in an equivalent change to the reservation
-.Po or
-.Sy refreservation
-.Pc .
-The
-.Sy volsize
-can only be set to a multiple of
-.Sy volblocksize ,
-and cannot be zero.
-.Pp
-The reservation is kept equal to the volume's logical size to prevent unexpected
-behavior for consumers.
-Without the reservation, the volume could run out of space, resulting in
-undefined behavior or data corruption, depending on how the volume is used.
-These effects can also occur when the volume size is changed while it is in use
-.Pq particularly when shrinking the size .
-Extreme care should be used when adjusting the volume size.
-.Pp
-Though not recommended, a
-.Qq sparse volume
-.Po also known as
-.Qq thin provisioned
-.Pc
-can be created by specifying the
-.Fl s
-option to the
-.Nm zfs Cm create Fl V
-command, or by changing the value of the
-.Sy refreservation
-property
-.Po or
-.Sy reservation
-property on pool version 8 or earlier
-.Pc
-after the volume has been created.
-A
-.Qq sparse volume
-is a volume where the value of
-.Sy refreservation
-is less than the size of the volume plus the space required to store its
-metadata.
-Consequently, writes to a sparse volume can fail with
-.Er ENOSPC
-when the pool is low on space.
-For a sparse volume, changes to
-.Sy volsize
-are not reflected in the
-.Sy refreservation.
-A volume that is not sparse is said to be
-.Qq thick provisioned .
-A sparse volume can become thick provisioned by setting
-.Sy refreservation
-to
-.Sy auto .
-.It Sy volmode Ns = Ns Cm default | full | geom | dev | none
-This property specifies how volumes should be exposed to the OS.
-Setting it to
-.Sy full
-exposes volumes as fully fledged block devices, providing maximal
-functionality. The value
-.Sy geom
-is just an alias for
-.Sy full
-and is kept for compatibility.
-Setting it to
-.Sy dev
-hides its partitions.
-Volumes with property set to
-.Sy none
-are not exposed outside ZFS, but can be snapshoted, cloned, replicated, etc,
-that can be suitable for backup purposes.
-Value
-.Sy default
-means that volumes exposition is controlled by system-wide tunable
-.Va zvol_volmode ,
-where
-.Sy full ,
-.Sy dev
-and
-.Sy none
-are encoded as 1, 2 and 3 respectively.
-The default values is
-.Sy full .
-.It Sy vscan Ns = Ns Sy on Ns | Ns Sy off
-Controls whether regular files should be scanned for viruses when a file is
-opened and closed.
-In addition to enabling this property, the virus scan service must also be
-enabled for virus scanning to occur.
-The default value is
-.Sy off .
-This property is not used on Linux.
-.It Sy xattr Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy sa
-Controls whether extended attributes are enabled for this file system. Two
-styles of extended attributes are supported either directory based or system
-attribute based.
-.Pp
-The default value of
-.Sy on
-enables directory based extended attributes. This style of extended attribute
-imposes no practical limit on either the size or number of attributes which
-can be set on a file. Although under Linux the
-.Xr getxattr 2
-and
-.Xr setxattr 2
-system calls limit the maximum size to 64K. This is the most compatible
-style of extended attribute and is supported by all OpenZFS implementations.
-.Pp
-System attribute based xattrs can be enabled by setting the value to
-.Sy sa .
-The key advantage of this type of xattr is improved performance. Storing
-extended attributes as system attributes significantly decreases the amount of
-disk IO required. Up to 64K of data may be stored per-file in the space
-reserved for system attributes. If there is not enough space available for
-an extended attribute then it will be automatically written as a directory
-based xattr. System attribute based extended attributes are not accessible
-on platforms which do not support the
-.Sy xattr=sa
-feature.
-.Pp
-The use of system attribute based xattrs is strongly encouraged for users of
-SELinux or POSIX ACLs. Both of these features heavily rely of extended
-attributes and benefit significantly from the reduced access time.
-.Pp
-The values
-.Sy on
-and
-.Sy off
-are equivalent to the
-.Sy xattr
-and
-.Sy noxattr
-mount options.
-.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the dataset is managed from a non-global zone. Zones are a
-Solaris feature and are not relevant on Linux. The default value is
-.Sy off .
-.El
-.Pp
-The following three properties cannot be changed after the file system is
-created, and therefore, should be set when the file system is created.
-If the properties are not set with the
-.Nm zfs Cm create
-or
-.Nm zpool Cm create
-commands, these properties are inherited from the parent dataset.
-If the parent dataset lacks these properties due to having been created prior to
-these features being supported, the new file system will have the default values
-for these properties.
-.Bl -tag -width ""
-.It Xo
-.Sy casesensitivity Ns = Ns Sy sensitive Ns | Ns
-.Sy insensitive Ns | Ns Sy mixed
-.Xc
-Indicates whether the file name matching algorithm used by the file system
-should be case-sensitive, case-insensitive, or allow a combination of both
-styles of matching.
-The default value for the
-.Sy casesensitivity
-property is
-.Sy sensitive .
-Traditionally,
-.Ux
-and
-.Tn POSIX
-file systems have case-sensitive file names.
-.Pp
-The
-.Sy mixed
-value for the
-.Sy casesensitivity
-property indicates that the file system can support requests for both
-case-sensitive and case-insensitive matching behavior.
-Currently, case-insensitive matching behavior on a file system that supports
-mixed behavior is limited to the SMB server product.
-For more information about the
-.Sy mixed
-value behavior, see the "ZFS Administration Guide".
-.It Xo
-.Sy normalization Ns = Ns Sy none Ns | Ns Sy formC Ns | Ns
-.Sy formD Ns | Ns Sy formKC Ns | Ns Sy formKD
-.Xc
-Indicates whether the file system should perform a
-.Sy unicode
-normalization of file names whenever two file names are compared, and which
-normalization algorithm should be used.
-File names are always stored unmodified, names are normalized as part of any
-comparison process.
-If this property is set to a legal value other than
-.Sy none ,
-and the
-.Sy utf8only
-property was left unspecified, the
-.Sy utf8only
-property is automatically set to
-.Sy on .
-The default value of the
-.Sy normalization
-property is
-.Sy none .
-This property cannot be changed after the file system is created.
-.It Sy utf8only Ns = Ns Sy on Ns | Ns Sy off
-Indicates whether the file system should reject file names that include
-characters that are not present in the
-.Sy UTF-8
-character code set.
-If this property is explicitly set to
-.Sy off ,
-the normalization property must either not be explicitly set or be set to
-.Sy none .
-The default value for the
-.Sy utf8only
-property is
-.Sy off .
-This property cannot be changed after the file system is created.
-.El
-.Pp
-The
-.Sy casesensitivity ,
-.Sy normalization ,
-and
-.Sy utf8only
-properties are also new permissions that can be assigned to non-privileged users
-by using the ZFS delegated administration feature.
-.Ss "Temporary Mount Point Properties"
-When a file system is mounted, either through
-.Xr mount 8
-for legacy mounts or the
-.Nm zfs Cm mount
-command for normal file systems, its mount options are set according to its
-properties.
-The correlation between properties and mount options is as follows:
-.Bd -literal
-    PROPERTY                MOUNT OPTION
-    atime                   atime/noatime
-    canmount                auto/noauto
-    devices                 dev/nodev
-    exec                    exec/noexec
-    readonly                ro/rw
-    relatime                relatime/norelatime
-    setuid                  suid/nosuid
-    xattr                   xattr/noxattr
-.Ed
-.Pp
-In addition, these options can be set on a per-mount basis using the
-.Fl o
-option, without affecting the property that is stored on disk.
-The values specified on the command line override the values stored in the
-dataset.
-The
-.Sy nosuid
-option is an alias for
-.Sy nodevices Ns \&, Ns Sy nosetuid .
-These properties are reported as
-.Qq temporary
-by the
-.Nm zfs Cm get
-command.
-If the properties are changed while the dataset is mounted, the new setting
-overrides any temporary settings.
-.Ss "User Properties"
-In addition to the standard native properties, ZFS supports arbitrary user
-properties.
-User properties have no effect on ZFS behavior, but applications or
-administrators can use them to annotate datasets
-.Pq file systems, volumes, and snapshots .
-.Pp
-User property names must contain a colon
-.Pq Qq Sy \&:
-character to distinguish them from native properties.
-They may contain lowercase letters, numbers, and the following punctuation
-characters: colon
-.Pq Qq Sy \&: ,
-dash
-.Pq Qq Sy - ,
-period
-.Pq Qq Sy \&. ,
-and underscore
-.Pq Qq Sy _ .
-The expected convention is that the property name is divided into two portions
-such as
-.Em module Ns \&: Ns Em property ,
-but this namespace is not enforced by ZFS.
-User property names can be at most 256 characters, and cannot begin with a dash
-.Pq Qq Sy - .
-.Pp
-When making programmatic use of user properties, it is strongly suggested to use
-a reversed
-.Sy DNS
-domain name for the
-.Em module
-component of property names to reduce the chance that two
-independently-developed packages use the same property name for different
-purposes.
-.Pp
-The values of user properties are arbitrary strings, are always inherited, and
-are never validated.
-All of the commands that operate on properties
-.Po Nm zfs Cm list ,
-.Nm zfs Cm get ,
-.Nm zfs Cm set ,
-and so forth
-.Pc
-can be used to manipulate both native properties and user properties.
-Use the
-.Nm zfs Cm inherit
-command to clear a user property.
-If the property is not defined in any parent dataset, it is removed entirely.
-Property values are limited to 8192 bytes.
-.Ss ZFS Volumes as Swap
-ZFS volumes may be used as swap devices. After creating the volume with the
-.Nm zfs Cm create Fl V
-command set up and enable the swap area using the
-.Xr mkswap 8
-and
-.Xr swapon 8
-commands. Do not swap to a file on a ZFS file system. A ZFS swap file
-configuration is not supported.
+For more information about properties, see
+.Xr zfsprops 7 .
+.
 .Ss Encryption
 Enabling the
 .Sy encryption
-feature allows for the creation of encrypted filesystems and volumes.  ZFS
-will encrypt file and zvol data, file attributes, ACLs, permission bits,
+feature allows for the creation of encrypted filesystems and volumes.
+ZFS will encrypt file and zvol data, file attributes, ACLs, permission bits,
 directory listings, FUID mappings, and
-.Sy userused
-/
-.Sy groupused
-data.  ZFS will not encrypt metadata related to the pool structure, including
-dataset and snapshot names, dataset hierarchy, properties, file size, file
-holes, and deduplication tables (though the deduplicated data itself is
-encrypted).
-.Pp
-Key rotation is managed by ZFS.  Changing the user's key (e.g. a passphrase)
-does not require re-encrypting the entire dataset.  Datasets can be scrubbed,
-resilvered, renamed, and deleted without the encryption keys being loaded (see the
-.Nm zfs Cm load-key
-subcommand for more info on key loading).
-.Pp
-Creating an encrypted dataset requires specifying the
-.Sy encryption
-and
-.Sy keyformat
-properties at creation time, along with an optional
-.Sy keylocation
-and
-.Sy pbkdf2iters .
-After entering an encryption key, the
-created dataset will become an encryption root. Any descendant datasets will
-inherit their encryption key from the encryption root by default, meaning that
-loading, unloading, or changing the key for the encryption root will implicitly
-do the same for all inheriting datasets. If this inheritance is not desired,
-simply supply a
-.Sy keyformat
-when creating the child dataset or use
-.Nm zfs Cm change-key
-to break an existing relationship, creating a new encryption root on the child.
-Note that the child's
-.Sy keyformat
-may match that of the parent while still creating a new encryption root, and
-that changing the
-.Sy encryption
-property alone does not create a new encryption root; this would simply use a
-different cipher suite with the same key as its encryption root. The one
-exception is that clones will always use their origin's encryption key.
-As a result of this exception, some encryption-related properties (namely
-.Sy keystatus ,
-.Sy keyformat ,
-.Sy keylocation ,
-and
-.Sy pbkdf2iters )
-do not inherit like other ZFS properties and instead use the value determined
-by their encryption root. Encryption root inheritance can be tracked via the
-read-only
-.Sy encryptionroot
-property.
-.Pp
-Encryption changes the behavior of a few ZFS
-operations. Encryption is applied after compression so compression ratios are
-preserved. Normally checksums in ZFS are 256 bits long, but for encrypted data
-the checksum is 128 bits of the user-chosen checksum and 128 bits of MAC from
-the encryption suite, which provides additional protection against maliciously
-altered data. Deduplication is still possible with encryption enabled but for
-security, datasets will only dedup against themselves, their snapshots, and
-their clones.
-.Pp
-There are a few limitations on encrypted datasets. Encrypted data cannot be
-embedded via the
-.Sy embedded_data
-feature. Encrypted datasets may not have
-.Sy copies Ns = Ns Em 3
-since the implementation stores some encryption metadata where the third copy
-would normally be. Since compression is applied before encryption datasets may
-be vulnerable to a CRIME-like attack if applications accessing the data allow
-for it. Deduplication with encryption will leak information about which blocks
-are equivalent in a dataset and will incur an extra CPU cost per block written.
+.Sy userused Ns / Ns Sy groupused Ns / Ns Sy projectused
+data.
+For an overview of encryption, see
+.Xr zfs-load-key 8 .
+.
 .Sh SUBCOMMANDS
 All subcommands that modify state are logged persistently to the pool in their
 original form.
@@ -2463,2200 +129,8 @@
 Displays a help message.
 .It Xo
 .Nm
-.Fl V, -version
+.Fl V , -version
 .Xc
-An alias for the
-.Nm zfs Cm version
-subcommand.
-.It Xo
-.Nm
-.Cm create
-.Op Fl p
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Ar filesystem
-.Xc
-Creates a new ZFS file system.
-The file system is automatically mounted according to the
-.Sy mountpoint
-property inherited from the parent.
-.Bl -tag -width "-o"
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property as if the command
-.Nm zfs Cm set Ar property Ns = Ns Ar value
-was invoked at the same time the dataset was created.
-Any editable ZFS property can also be set at creation time.
-Multiple
-.Fl o
-options can be specified.
-An error results if the same property is specified in multiple
-.Fl o
-options.
-.It Fl p
-Creates all the non-existing parent datasets.
-Datasets created in this manner are automatically mounted according to the
-.Sy mountpoint
-property inherited from their parent.
-Any property specified on the command line using the
-.Fl o
-option is ignored.
-If the target filesystem already exists, the operation completes successfully.
-.El
-.It Xo
-.Nm
-.Cm create
-.Op Fl ps
-.Op Fl b Ar blocksize
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Fl V Ar size Ar volume
-.Xc
-Creates a volume of the given size.
-The volume is exported as a block device in
-.Pa /dev/zvol/path ,
-where
-.Em path
-is the name of the volume in the ZFS namespace.
-The size represents the logical size as exported by the device.
-By default, a reservation of equal size is created.
-.Pp
-.Ar size
-is automatically rounded up to the nearest 128 Kbytes to ensure that the volume
-has an integral number of blocks regardless of
-.Sy blocksize .
-.Bl -tag -width "-b"
-.It Fl b Ar blocksize
-Equivalent to
-.Fl o Sy volblocksize Ns = Ns Ar blocksize .
-If this option is specified in conjunction with
-.Fl o Sy volblocksize ,
-the resulting behavior is undefined.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property as if the
-.Nm zfs Cm set Ar property Ns = Ns Ar value
-command was invoked at the same time the dataset was created.
-Any editable ZFS property can also be set at creation time.
-Multiple
-.Fl o
-options can be specified.
-An error results if the same property is specified in multiple
-.Fl o
-options.
-.It Fl p
-Creates all the non-existing parent datasets.
-Datasets created in this manner are automatically mounted according to the
-.Sy mountpoint
-property inherited from their parent.
-Any property specified on the command line using the
-.Fl o
-option is ignored.
-If the target filesystem already exists, the operation completes successfully.
-.It Fl s
-Creates a sparse volume with no reservation.
-See
-.Sy volsize
-in the
-.Sx Native Properties
-section for more information about sparse volumes.
-.El
-.It Xo
-.Nm
-.Cm destroy
-.Op Fl Rfnprv
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Destroys the given dataset.
-By default, the command unshares any file systems that are currently shared,
-unmounts any file systems that are currently mounted, and refuses to destroy a
-dataset that has active dependents
-.Pq children or clones .
-.Bl -tag -width "-R"
-.It Fl R
-Recursively destroy all dependents, including cloned file systems outside the
-target hierarchy.
-.It Fl f
-Force an unmount of any file systems using the
-.Nm unmount Fl f
-command.
-This option has no effect on non-file systems or unmounted file systems.
-.It Fl n
-Do a dry-run
-.Pq Qq No-op
-deletion.
-No data will be deleted.
-This is useful in conjunction with the
-.Fl v
-or
-.Fl p
-flags to determine what data would be deleted.
-.It Fl p
-Print machine-parsable verbose information about the deleted data.
-.It Fl r
-Recursively destroy all children.
-.It Fl v
-Print verbose information about the deleted data.
-.El
-.Pp
-Extreme care should be taken when applying either the
-.Fl r
-or the
-.Fl R
-options, as they can destroy large portions of a pool and cause unexpected
-behavior for mounted file systems in use.
-.It Xo
-.Nm
-.Cm destroy
-.Op Fl Rdnprv
-.Ar filesystem Ns | Ns Ar volume Ns @ Ns Ar snap Ns
-.Oo % Ns Ar snap Ns Oo , Ns Ar snap Ns Oo % Ns Ar snap Oc Oc Oc Ns ...
-.Xc
-The given snapshots are destroyed immediately if and only if the
-.Nm zfs Cm destroy
-command without the
-.Fl d
-option would have destroyed it.
-Such immediate destruction would occur, for example, if the snapshot had no
-clones and the user-initiated reference count were zero.
-.Pp
-If a snapshot does not qualify for immediate destruction, it is marked for
-deferred deletion.
-In this state, it exists as a usable, visible snapshot until both of the
-preconditions listed above are met, at which point it is destroyed.
-.Pp
-An inclusive range of snapshots may be specified by separating the first and
-last snapshots with a percent sign.
-The first and/or last snapshots may be left blank, in which case the
-filesystem's oldest or newest snapshot will be implied.
-.Pp
-Multiple snapshots
-.Pq or ranges of snapshots
-of the same filesystem or volume may be specified in a comma-separated list of
-snapshots.
-Only the snapshot's short name
-.Po the part after the
-.Sy @
-.Pc
-should be specified when using a range or comma-separated list to identify
-multiple snapshots.
-.Bl -tag -width "-R"
-.It Fl R
-Recursively destroy all clones of these snapshots, including the clones,
-snapshots, and children.
-If this flag is specified, the
-.Fl d
-flag will have no effect.
-.It Fl d
-Destroy immediately. If a snapshot cannot be destroyed now, mark it for
-deferred destruction.
-.It Fl n
-Do a dry-run
-.Pq Qq No-op
-deletion.
-No data will be deleted.
-This is useful in conjunction with the
-.Fl p
-or
-.Fl v
-flags to determine what data would be deleted.
-.It Fl p
-Print machine-parsable verbose information about the deleted data.
-.It Fl r
-Destroy
-.Pq or mark for deferred deletion
-all snapshots with this name in descendent file systems.
-.It Fl v
-Print verbose information about the deleted data.
-.Pp
-Extreme care should be taken when applying either the
-.Fl r
-or the
-.Fl R
-options, as they can destroy large portions of a pool and cause unexpected
-behavior for mounted file systems in use.
-.El
-.It Xo
-.Nm
-.Cm destroy
-.Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark
-.Xc
-The given bookmark is destroyed.
-.It Xo
-.Nm
-.Cm snapshot
-.Op Fl r
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Ar filesystem Ns @ Ns Ar snapname Ns | Ns Ar volume Ns @ Ns Ar snapname Ns ...
-.Xc
-Creates snapshots with the given names.
-All previous modifications by successful system calls to the file system are
-part of the snapshots.
-Snapshots are taken atomically, so that all snapshots correspond to the same
-moment in time.
-.Nm zfs Cm snap
-can be used as an alias for
-.Nm zfs Cm snapshot.
-See the
-.Sx Snapshots
-section for details.
-.Bl -tag -width "-o"
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property; see
-.Nm zfs Cm create
-for details.
-.It Fl r
-Recursively create snapshots of all descendent datasets
-.El
-.It Xo
-.Nm
-.Cm rollback
-.Op Fl Rfr
-.Ar snapshot
-.Xc
-Roll back the given dataset to a previous snapshot.
-When a dataset is rolled back, all data that has changed since the snapshot is
-discarded, and the dataset reverts to the state at the time of the snapshot.
-By default, the command refuses to roll back to a snapshot other than the most
-recent one.
-In order to do so, all intermediate snapshots and bookmarks must be destroyed by
-specifying the
-.Fl r
-option.
-.Pp
-The
-.Fl rR
-options do not recursively destroy the child snapshots of a recursive snapshot.
-Only direct snapshots of the specified filesystem are destroyed by either of
-these options.
-To completely roll back a recursive snapshot, you must rollback the individual
-child snapshots.
-.Bl -tag -width "-R"
-.It Fl R
-Destroy any more recent snapshots and bookmarks, as well as any clones of those
-snapshots.
-.It Fl f
-Used with the
-.Fl R
-option to force an unmount of any clone file systems that are to be destroyed.
-.It Fl r
-Destroy any snapshots and bookmarks more recent than the one specified.
-.El
-.It Xo
-.Nm
-.Cm clone
-.Op Fl p
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Ar snapshot Ar filesystem Ns | Ns Ar volume
-.Xc
-Creates a clone of the given snapshot.
-See the
-.Sx Clones
-section for details.
-The target dataset can be located anywhere in the ZFS hierarchy, and is created
-as the same type as the original.
-.Bl -tag -width "-o"
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property; see
-.Nm zfs Cm create
-for details.
-.It Fl p
-Creates all the non-existing parent datasets.
-Datasets created in this manner are automatically mounted according to the
-.Sy mountpoint
-property inherited from their parent.
-If the target filesystem or volume already exists, the operation completes
-successfully.
-.El
-.It Xo
-.Nm
-.Cm promote
-.Ar clone-filesystem
-.Xc
-Promotes a clone file system to no longer be dependent on its
-.Qq origin
-snapshot.
-This makes it possible to destroy the file system that the clone was created
-from.
-The clone parent-child dependency relationship is reversed, so that the origin
-file system becomes a clone of the specified file system.
-.Pp
-The snapshot that was cloned, and any snapshots previous to this snapshot, are
-now owned by the promoted clone.
-The space they use moves from the origin file system to the promoted clone, so
-enough space must be available to accommodate these snapshots.
-No new space is consumed by this operation, but the space accounting is
-adjusted.
-The promoted clone must not have any conflicting snapshot names of its own.
-The
-.Cm rename
-subcommand can be used to rename any conflicting snapshots.
-.It Xo
-.Nm
-.Cm rename
-.Op Fl f
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Xc
-.It Xo
-.Nm
-.Cm rename
-.Op Fl fp
-.Ar filesystem Ns | Ns Ar volume
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Renames the given dataset.
-The new target can be located anywhere in the ZFS hierarchy, with the exception
-of snapshots.
-Snapshots can only be renamed within the parent file system or volume.
-When renaming a snapshot, the parent file system of the snapshot does not need
-to be specified as part of the second argument.
-Renamed file systems can inherit new mount points, in which case they are
-unmounted and remounted at the new mount point.
-.Bl -tag -width "-a"
-.It Fl f
-Force unmount any filesystems that need to be unmounted in the process.
-.It Fl p
-Creates all the nonexistent parent datasets.
-Datasets created in this manner are automatically mounted according to the
-.Sy mountpoint
-property inherited from their parent.
-.El
-.It Xo
-.Nm
-.Cm rename
-.Fl r
-.Ar snapshot Ar snapshot
-.Xc
-Recursively rename the snapshots of all descendent datasets.
-Snapshots are the only dataset that can be renamed recursively.
-.It Xo
-.Nm
-.Cm list
-.Op Fl r Ns | Ns Fl d Ar depth
-.Op Fl Hp
-.Oo Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... Oc
-.Oo Fl s Ar property Oc Ns ...
-.Oo Fl S Ar property Oc Ns ...
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Oc Ns ...
-.Xc
-Lists the property information for the given datasets in tabular form.
-If specified, you can list property information by the absolute pathname or the
-relative pathname.
-By default, all file systems and volumes are displayed.
-Snapshots are displayed if the
-.Sy listsnaps
-property is
-.Sy on
-.Po the default is
-.Sy off
-.Pc .
-The following fields are displayed:
-.Sy name Ns \&, Sy used Ns \&, Sy available Ns \&, Sy referenced Ns \&, Sy mountpoint Ns .
-.Bl -tag -width "-H"
-.It Fl H
-Used for scripting mode.
-Do not print headers and separate fields by a single tab instead of arbitrary
-white space.
-.It Fl S Ar property
-Same as the
-.Fl s
-option, but sorts by property in descending order.
-.It Fl d Ar depth
-Recursively display any children of the dataset, limiting the recursion to
-.Ar depth .
-A
-.Ar depth
-of
-.Sy 1
-will display only the dataset and its direct children.
-.It Fl o Ar property
-A comma-separated list of properties to display.
-The property must be:
-.Bl -bullet
-.It
-One of the properties described in the
-.Sx Native Properties
-section
-.It
-A user property
-.It
-The value
-.Sy name
-to display the dataset name
-.It
-The value
-.Sy space
-to display space usage properties on file systems and volumes.
-This is a shortcut for specifying
-.Fl o Sy name Ns \&, Ns Sy avail Ns \&, Ns Sy used Ns \&, Ns Sy usedsnap Ns \&, Ns
-.Sy usedds Ns \&, Ns Sy usedrefreserv Ns \&, Ns Sy usedchild Fl t
-.Sy filesystem Ns \&, Ns Sy volume
-syntax.
-.El
-.It Fl p
-Display numbers in parsable
-.Pq exact
-values.
-.It Fl r
-Recursively display any children of the dataset on the command line.
-.It Fl s Ar property
-A property for sorting the output by column in ascending order based on the
-value of the property.
-The property must be one of the properties described in the
-.Sx Properties
-section or the value
-.Sy name
-to sort by the dataset name.
-Multiple properties can be specified at one time using multiple
-.Fl s
-property options.
-Multiple
-.Fl s
-options are evaluated from left to right in decreasing order of importance.
-The following is a list of sorting criteria:
-.Bl -bullet
-.It
-Numeric types sort in numeric order.
-.It
-String types sort in alphabetical order.
-.It
-Types inappropriate for a row sort that row to the literal bottom, regardless of
-the specified ordering.
-.El
-.Pp
-If no sorting options are specified the existing behavior of
-.Nm zfs Cm list
-is preserved.
-.It Fl t Ar type
-A comma-separated list of types to display, where
-.Ar type
-is one of
-.Sy filesystem ,
-.Sy snapshot ,
-.Sy volume ,
-.Sy bookmark ,
-or
-.Sy all .
-For example, specifying
-.Fl t Sy snapshot
-displays only snapshots.
-.El
-.It Xo
-.Nm
-.Cm set
-.Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
-.Xc
-Sets the property or list of properties to the given value(s) for each dataset.
-Only some properties can be edited.
-See the
-.Sx Properties
-section for more information on what properties can be set and acceptable
-values.
-Numeric values can be specified as exact values, or in a human-readable form
-with a suffix of
-.Sy B , K , M , G , T , P , E , Z
-.Po for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes,
-or zettabytes, respectively
-.Pc .
-User properties can be set on snapshots.
-For more information, see the
-.Sx User Properties
-section.
-.It Xo
-.Nm
-.Cm get
-.Op Fl r Ns | Ns Fl d Ar depth
-.Op Fl Hp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar source Ns Oo , Ns Ar source Oc Ns ... Oc
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Cm all | Ar property Ns Oo , Ns Ar property Oc Ns ...
-.Oo Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns | Ns Ar bookmark Oc Ns ...
-.Xc
-Displays properties for the given datasets.
-If no datasets are specified, then the command displays properties for all
-datasets on the system.
-For each property, the following columns are displayed:
-.Bd -literal
-    name      Dataset name
-    property  Property name
-    value     Property value
-    source    Property source  \fBlocal\fP, \fBdefault\fP, \fBinherited\fP,
-              \fBtemporary\fP, \fBreceived\fP or none (\fB-\fP).
-.Ed
-.Pp
-All columns are displayed by default, though this can be controlled by using the
-.Fl o
-option.
-This command takes a comma-separated list of properties as described in the
-.Sx Native Properties
-and
-.Sx User Properties
-sections.
-.Pp
-The value
-.Sy all
-can be used to display all properties that apply to the given dataset's type
-.Pq filesystem, volume, snapshot, or bookmark .
-.Bl -tag -width "-H"
-.It Fl H
-Display output in a form more easily parsed by scripts.
-Any headers are omitted, and fields are explicitly separated by a single tab
-instead of an arbitrary amount of space.
-.It Fl d Ar depth
-Recursively display any children of the dataset, limiting the recursion to
-.Ar depth .
-A depth of
-.Sy 1
-will display only the dataset and its direct children.
-.It Fl o Ar field
-A comma-separated list of columns to display.
-.Sy name Ns \&, Ns Sy property Ns \&, Ns Sy value Ns \&, Ns Sy source
-is the default value.
-.It Fl p
-Display numbers in parsable
-.Pq exact
-values.
-.It Fl r
-Recursively display properties for any children.
-.It Fl s Ar source
-A comma-separated list of sources to display.
-Those properties coming from a source other than those in this list are ignored.
-Each source must be one of the following:
-.Sy local ,
-.Sy default ,
-.Sy inherited ,
-.Sy temporary ,
-.Sy received ,
-and
-.Sy none .
-The default value is all sources.
-.It Fl t Ar type
-A comma-separated list of types to display, where
-.Ar type
-is one of
-.Sy filesystem ,
-.Sy snapshot ,
-.Sy volume ,
-.Sy bookmark ,
-or
-.Sy all .
-.El
-.It Xo
-.Nm
-.Cm inherit
-.Op Fl rS
-.Ar property Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
-.Xc
-Clears the specified property, causing it to be inherited from an ancestor,
-restored to default if no ancestor has the property set, or with the
-.Fl S
-option reverted to the received value if one exists.
-See the
-.Sx Properties
-section for a listing of default values, and details on which properties can be
-inherited.
-.Bl -tag -width "-r"
-.It Fl r
-Recursively inherit the given property for all children.
-.It Fl S
-Revert the property to the received value if one exists; otherwise operate as
-if the
-.Fl S
-option was not specified.
-.El
-.It Xo
-.Nm
-.Cm upgrade
-.Xc
-Displays a list of file systems that are not the most recent version.
-.It Xo
-.Nm
-.Cm upgrade
-.Fl v
-.Xc
-Displays a list of currently supported file system versions.
-.It Xo
-.Nm
-.Cm upgrade
-.Op Fl r
-.Op Fl V Ar version
-.Fl a | Ar filesystem
-.Xc
-Upgrades file systems to a new on-disk version.
-Once this is done, the file systems will no longer be accessible on systems
-running older versions of the software.
-.Nm zfs Cm send
-streams generated from new snapshots of these file systems cannot be accessed on
-systems running older versions of the software.
-.Pp
-In general, the file system version is independent of the pool version.
-See
-.Xr zpool 8
-for information on the
-.Nm zpool Cm upgrade
-command.
-.Pp
-In some cases, the file system version and the pool version are interrelated and
-the pool version must be upgraded before the file system version can be
-upgraded.
-.Bl -tag -width "-V"
-.It Fl V Ar version
-Upgrade to the specified
-.Ar version .
-If the
-.Fl V
-flag is not specified, this command upgrades to the most recent version.
-This
-option can only be used to increase the version number, and only up to the most
-recent version supported by this software.
-.It Fl a
-Upgrade all file systems on all imported pools.
-.It Ar filesystem
-Upgrade the specified file system.
-.It Fl r
-Upgrade the specified file system and all descendent file systems.
-.El
-.It Xo
-.Nm
-.Cm userspace
-.Op Fl Hinp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar field Oc Ns ...
-.Oo Fl S Ar field Oc Ns ...
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar snapshot
-.Xc
-Displays space consumed by, and quotas on, each user in the specified filesystem
-or snapshot.
-This corresponds to the
-.Sy userused@ Ns Em user ,
-.Sy userobjused@ Ns Em user ,
-.Sy userquota@ Ns Em user,
-and
-.Sy userobjquota@ Ns Em user
-properties.
-.Bl -tag -width "-H"
-.It Fl H
-Do not print headers, use tab-delimited output.
-.It Fl S Ar field
-Sort by this field in reverse order.
-See
-.Fl s .
-.It Fl i
-Translate SID to POSIX ID.
-The POSIX ID may be ephemeral if no mapping exists.
-Normal POSIX interfaces
-.Po for example,
-.Xr stat 2 ,
-.Nm ls Fl l
-.Pc
-perform this translation, so the
-.Fl i
-option allows the output from
-.Nm zfs Cm userspace
-to be compared directly with those utilities.
-However,
-.Fl i
-may lead to confusion if some files were created by an SMB user before a
-SMB-to-POSIX name mapping was established.
-In such a case, some files will be owned by the SMB entity and some by the POSIX
-entity.
-However, the
-.Fl i
-option will report that the POSIX entity has the total usage and quota for both.
-.It Fl n
-Print numeric ID instead of user/group name.
-.It Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
-Display only the specified fields from the following set:
-.Sy type ,
-.Sy name ,
-.Sy used ,
-.Sy quota .
-The default is to display all fields.
-.It Fl p
-Use exact
-.Pq parsable
-numeric output.
-.It Fl s Ar field
-Sort output by this field.
-The
-.Fl s
-and
-.Fl S
-flags may be specified multiple times to sort first by one field, then by
-another.
-The default is
-.Fl s Sy type Fl s Sy name .
-.It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ...
-Print only the specified types from the following set:
-.Sy all ,
-.Sy posixuser ,
-.Sy smbuser ,
-.Sy posixgroup ,
-.Sy smbgroup .
-The default is
-.Fl t Sy posixuser Ns \&, Ns Sy smbuser .
-The default can be changed to include group types.
-.El
-.It Xo
-.Nm
-.Cm groupspace
-.Op Fl Hinp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar field Oc Ns ...
-.Oo Fl S Ar field Oc Ns ...
-.Oo Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar snapshot
-.Xc
-Displays space consumed by, and quotas on, each group in the specified
-filesystem or snapshot.
-This subcommand is identical to
-.Nm zfs Cm userspace ,
-except that the default types to display are
-.Fl t Sy posixgroup Ns \&, Ns Sy smbgroup .
-.It Xo
-.Nm
-.Cm projectspace
-.Op Fl Hp
-.Oo Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... Oc
-.Oo Fl s Ar field Oc Ns ...
-.Oo Fl S Ar field Oc Ns ...
-.Ar filesystem Ns | Ns Ar snapshot
-.Xc
-Displays space consumed by, and quotas on, each project in the specified
-filesystem or snapshot. This subcommand is identical to
-.Nm zfs Cm userspace ,
-except that the project identifier is numeral, not name. So need neither
-the option
-.Sy -i
-for SID to POSIX ID nor
-.Sy -n
-for numeric ID, nor
-.Sy -t
-for types.
-.It Xo
-.Nm
-.Cm project
-.Oo Fl d Ns | Ns Fl r Ns Oc
-.Ar file Ns | Ns Ar directory Ns ...
-.Xc
-List project identifier (ID) and inherit flag of file(s) or directories.
-.Bl -tag -width "-d"
-.It Fl d
-Show the directory project ID and inherit flag, not its childrens. It will
-overwrite the former specified
-.Fl r
-option.
-.It Fl r
-Show on subdirectories recursively. It will overwrite the former specified
-.Fl d
-option.
-.El
-.It Xo
-.Nm
-.Cm project
-.Fl C
-.Oo Fl kr Ns Oc
-.Ar file Ns | Ns Ar directory Ns ...
-.Xc
-Clear project inherit flag and/or ID on the file(s) or directories.
-.Bl -tag -width "-k"
-.It Fl k
-Keep the project ID unchanged. If not specified, the project ID will be reset
-as zero.
-.It Fl r
-Clear on subdirectories recursively.
-.El
-.It Xo
-.Nm
-.Cm project
-.Fl c
-.Oo Fl 0 Ns Oc
-.Oo Fl d Ns | Ns Fl r Ns Oc
-.Op Fl p Ar id
-.Ar file Ns | Ns Ar directory Ns ...
-.Xc
-Check project ID and inherit flag on the file(s) or directories, report the
-entries without project inherit flag or with different project IDs from the
-specified (via
-.Fl p
-option) value or the target directory's project ID.
-.Bl -tag -width "-0"
-.It Fl 0
-Print file name with a trailing NUL instead of newline (by default), like
-"find -print0".
-.It Fl d
-Check the directory project ID and inherit flag, not its childrens. It will
-overwrite the former specified
-.Fl r
-option.
-.It Fl p
-Specify the referenced ID for comparing with the target file(s) or directories'
-project IDs. If not specified, the target (top) directory's project ID will be
-used as the referenced one.
-.It Fl r
-Check on subdirectories recursively. It will overwrite the former specified
-.Fl d
-option.
-.El
-.It Xo
-.Nm
-.Cm project
-.Op Fl p Ar id
-.Oo Fl rs Ns Oc
-.Ar file Ns | Ns Ar directory Ns ...
-.Xc
-.Bl -tag -width "-p"
-Set project ID and/or inherit flag on the file(s) or directories.
-.It Fl p
-Set the file(s)' or directories' project ID with the given value.
-.It Fl r
-Set on subdirectories recursively.
-.It Fl s
-Set project inherit flag on the given file(s) or directories. It is usually used
-for setup tree quota on the directory target with
-.Fl r
-option specified together. When setup tree quota, by default the directory's
-project ID will be set to all its descendants unless you specify the project
-ID via
-.Fl p
-option explicitly.
-.El
-.It Xo
-.Nm
-.Cm mount
-.Xc
-Displays all ZFS file systems currently mounted.
-.It Xo
-.Nm
-.Cm mount
-.Op Fl Olv
-.Op Fl o Ar options
-.Fl a | Ar filesystem
-.Xc
-Mount ZFS filesystem on a path described by its
-.Sy mountpoint
-property, if the path exists and is empty. If
-.Sy mountpoint
-is set to
-.Em legacy ,
-the filesystem should be instead mounted using
-.Xr mount 8 .
-.Bl -tag -width "-O"
-.It Fl O
-Perform an overlay mount. Allows mounting in non-empty
-.Sy mountpoint .
-See
-.Xr mount 8
-for more information.
-.It Fl a
-Mount all available ZFS file systems.
-Invoked automatically as part of the boot process if configured.
-.It Ar filesystem
-Mount the specified filesystem.
-.It Fl o Ar options
-An optional, comma-separated list of mount options to use temporarily for the
-duration of the mount.
-See the
-.Sx Temporary Mount Point Properties
-section for details.
-.It Fl l
-Load keys for encrypted filesystems as they are being mounted. This is
-equivalent to executing
-.Nm zfs Cm load-key
-on each encryption root before mounting it. Note that if a filesystem has a
-.Sy keylocation
-of
-.Sy prompt
-this will cause the terminal to interactively block after asking for the key.
-.It Fl v
-Report mount progress.
-.El
-.It Xo
-.Nm
-.Cm unmount
-.Op Fl f
-.Fl a | Ar filesystem Ns | Ns Ar mountpoint
-.Xc
-Unmounts currently mounted ZFS file systems.
-.Bl -tag -width "-a"
-.It Fl a
-Unmount all available ZFS file systems.
-Invoked automatically as part of the shutdown process.
-.It Ar filesystem Ns | Ns Ar mountpoint
-Unmount the specified filesystem.
-The command can also be given a path to a ZFS file system mount point on the
-system.
-.It Fl f
-Forcefully unmount the file system, even if it is currently in use.
-.El
-.It Xo
-.Nm
-.Cm share
-.Fl a | Ar filesystem
-.Xc
-Shares available ZFS file systems.
-.Bl -tag -width "-a"
-.It Fl a
-Share all available ZFS file systems.
-Invoked automatically as part of the boot process.
-.It Ar filesystem
-Share the specified filesystem according to the
-.Sy sharenfs
-and
-.Sy sharesmb
-properties.
-File systems are shared when the
-.Sy sharenfs
-or
-.Sy sharesmb
-property is set.
-.El
-.It Xo
-.Nm
-.Cm unshare
-.Fl a | Ar filesystem Ns | Ns Ar mountpoint
-.Xc
-Unshares currently shared ZFS file systems.
-.Bl -tag -width "-a"
-.It Fl a
-Unshare all available ZFS file systems.
-Invoked automatically as part of the shutdown process.
-.It Ar filesystem Ns | Ns Ar mountpoint
-Unshare the specified filesystem.
-The command can also be given a path to a ZFS file system shared on the system.
-.El
-.It Xo
-.Nm
-.Cm bookmark
-.Ar snapshot bookmark
-.Xc
-Creates a bookmark of the given snapshot.
-Bookmarks mark the point in time when the snapshot was created, and can be used
-as the incremental source for a
-.Nm zfs Cm send
-command.
-.Pp
-This feature must be enabled to be used.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags and the
-.Sy bookmarks
-feature.
-.It Xo
-.Nm
-.Cm send
-.Op Fl DLPRbcehnpvw
-.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
-.Ar snapshot
-.Xc
-Creates a stream representation of the second
-.Ar snapshot ,
-which is written to standard output.
-The output can be redirected to a file or to a different system
-.Po for example, using
-.Xr ssh 1
-.Pc .
-By default, a full stream is generated.
-.Bl -tag -width "-D"
-.It Fl D, -dedup
-Generate a deduplicated stream.
-\fBDeduplicated send is deprecated and will be removed in a future release.\fR
-(In the future, the flag will be accepted but a regular, non-deduplicated
-stream will be generated.)
-Blocks which would have been sent multiple times in the send stream will only be
-sent once.
-The receiving system must also support this feature to receive a deduplicated
-stream.
-This flag can be used regardless of the dataset's
-.Sy dedup
-property, but performance will be much better if the filesystem uses a
-dedup-capable checksum
-.Po for example,
-.Sy sha256
-.Pc .
-.It Fl I Ar snapshot
-Generate a stream package that sends all intermediary snapshots from the first
-snapshot to the second snapshot.
-For example,
-.Fl I Em @a Em fs@d
-is similar to
-.Fl i Em @a Em fs@b Ns \&; Fl i Em @b Em fs@c Ns \&; Fl i Em @c Em fs@d .
-The incremental source may be specified as with the
-.Fl i
-option.
-.It Fl L, -large-block
-Generate a stream which may contain blocks larger than 128KB.
-This flag has no effect if the
-.Sy large_blocks
-pool feature is disabled, or if the
-.Sy recordsize
-property of this filesystem has never been set above 128KB.
-The receiving system must have the
-.Sy large_blocks
-pool feature enabled as well.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags and the
-.Sy large_blocks
-feature.
-.It Fl P, -parsable
-Print machine-parsable verbose information about the stream package generated.
-.It Fl R, -replicate
-Generate a replication stream package, which will replicate the specified
-file system, and all descendent file systems, up to the named snapshot.
-When received, all properties, snapshots, descendent file systems, and clones
-are preserved.
-.Pp
-If the
-.Fl i
-or
-.Fl I
-flags are used in conjunction with the
-.Fl R
-flag, an incremental replication stream is generated.
-The current values of properties, and current snapshot and file system names are
-set when the stream is received.
-If the
-.Fl F
-flag is specified when this stream is received, snapshots and file systems that
-do not exist on the sending side are destroyed. If the
-.Fl R
-flag is used to send encrypted datasets, then
-.Fl w
-must also be specified.
-.It Fl e, -embed
-Generate a more compact stream by using
-.Sy WRITE_EMBEDDED
-records for blocks which are stored more compactly on disk by the
-.Sy embedded_data
-pool feature.
-This flag has no effect if the
-.Sy embedded_data
-feature is disabled.
-The receiving system must have the
-.Sy embedded_data
-feature enabled.
-If the
-.Sy lz4_compress
-feature is active on the sending system, then the receiving system must have
-that feature enabled as well. Datasets that are sent with this flag may not be
-received as an encrypted dataset, since encrypted datasets cannot use the
-.Sy embedded_data
-feature.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags and the
-.Sy embedded_data
-feature.
-.It Fl b, -backup
-Sends only received property values whether or not they are overridden by local
-settings, but only if the dataset has ever been received. Use this option when
-you want
-.Nm zfs Cm receive
-to restore received properties backed up on the sent dataset and to avoid
-sending local settings that may have nothing to do with the source dataset,
-but only with how the data is backed up.
-.It Fl c, -compressed
-Generate a more compact stream by using compressed WRITE records for blocks
-which are compressed on disk and in memory
-.Po see the
-.Sy compression
-property for details
-.Pc .
-If the
-.Sy lz4_compress
-feature is active on the sending system, then the receiving system must have
-that feature enabled as well.
-If the
-.Sy large_blocks
-feature is enabled on the sending system but the
-.Fl L
-option is not supplied in conjunction with
-.Fl c ,
-then the data will be decompressed before sending so it can be split into
-smaller block sizes.
-.It Fl w, -raw
-For encrypted datasets, send data exactly as it exists on disk. This allows
-backups to be taken even if encryption keys are not currently loaded. The
-backup may then be received on an untrusted machine since that machine will
-not have the encryption keys to read the protected data or alter it without
-being detected. Upon being received, the dataset will have the same encryption
-keys as it did on the send side, although the
-.Sy keylocation
-property will be defaulted to
-.Sy prompt
-if not otherwise provided. For unencrypted datasets, this flag will be
-equivalent to
-.Fl Lec .
-Note that if you do not use this flag for sending encrypted datasets, data will
-be sent unencrypted and may be re-encrypted with a different encryption key on
-the receiving system, which will disable the ability to do a raw send to that
-system for incrementals.
-.It Fl h, -holds
-Generate a stream package that includes any snapshot holds (created with the
-.Sy zfs hold
-command), and indicating to
-.Sy zfs receive
-that the holds be applied to the dataset on the receiving system.
-.It Fl i Ar snapshot
-Generate an incremental stream from the first
-.Ar snapshot
-.Pq the incremental source
-to the second
-.Ar snapshot
-.Pq the incremental target .
-The incremental source can be specified as the last component of the snapshot
-name
-.Po the
-.Sy @
-character and following
-.Pc
-and it is assumed to be from the same file system as the incremental target.
-.Pp
-If the destination is a clone, the source may be the origin snapshot, which must
-be fully specified
-.Po for example,
-.Em pool/fs@origin ,
-not just
-.Em @origin
-.Pc .
-.It Fl n, -dryrun
-Do a dry-run
-.Pq Qq No-op
-send.
-Do not generate any actual send data.
-This is useful in conjunction with the
-.Fl v
-or
-.Fl P
-flags to determine what data will be sent.
-In this case, the verbose output will be written to standard output
-.Po contrast with a non-dry-run, where the stream is written to standard output
-and the verbose output goes to standard error
-.Pc .
-.It Fl p, -props
-Include the dataset's properties in the stream.
-This flag is implicit when
-.Fl R
-is specified.
-The receiving system must also support this feature. Sends of encrypted datasets
-must use
-.Fl w
-when using this flag.
-.It Fl v, -verbose
-Print verbose information about the stream package generated.
-This information includes a per-second report of how much data has been sent.
-.Pp
-The format of the stream is committed.
-You will be able to receive your streams on future versions of ZFS.
-.El
-.It Xo
-.Nm
-.Cm send
-.Op Fl LPcenvw
-.Op Fl i Ar snapshot Ns | Ns Ar bookmark
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Xc
-Generate a send stream, which may be of a filesystem, and may be incremental
-from a bookmark.
-If the destination is a filesystem or volume, the pool must be read-only, or the
-filesystem must not be mounted.
-When the stream generated from a filesystem or volume is received, the default
-snapshot name will be
-.Qq --head-- .
-.Bl -tag -width "-L"
-.It Fl L, -large-block
-Generate a stream which may contain blocks larger than 128KB.
-This flag has no effect if the
-.Sy large_blocks
-pool feature is disabled, or if the
-.Sy recordsize
-property of this filesystem has never been set above 128KB.
-The receiving system must have the
-.Sy large_blocks
-pool feature enabled as well.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags and the
-.Sy large_blocks
-feature.
-.It Fl P, -parsable
-Print machine-parsable verbose information about the stream package generated.
-.It Fl c, -compressed
-Generate a more compact stream by using compressed WRITE records for blocks
-which are compressed on disk and in memory
-.Po see the
-.Sy compression
-property for details
-.Pc .
-If the
-.Sy lz4_compress
-feature is active on the sending system, then the receiving system must have
-that feature enabled as well.
-If the
-.Sy large_blocks
-feature is enabled on the sending system but the
-.Fl L
-option is not supplied in conjunction with
-.Fl c ,
-then the data will be decompressed before sending so it can be split into
-smaller block sizes.
-.It Fl w, -raw
-For encrypted datasets, send data exactly as it exists on disk. This allows
-backups to be taken even if encryption keys are not currently loaded. The
-backup may then be received on an untrusted machine since that machine will
-not have the encryption keys to read the protected data or alter it without
-being detected. Upon being received, the dataset will have the same encryption
-keys as it did on the send side, although the
-.Sy keylocation
-property will be defaulted to
-.Sy prompt
-if not otherwise provided. For unencrypted datasets, this flag will be
-equivalent to
-.Fl Lec .
-Note that if you do not use this flag for sending encrypted datasets, data will
-be sent unencrypted and may be re-encrypted with a different encryption key on
-the receiving system, which will disable the ability to do a raw send to that
-system for incrementals.
-.It Fl e, -embed
-Generate a more compact stream by using
-.Sy WRITE_EMBEDDED
-records for blocks which are stored more compactly on disk by the
-.Sy embedded_data
-pool feature.
-This flag has no effect if the
-.Sy embedded_data
-feature is disabled.
-The receiving system must have the
-.Sy embedded_data
-feature enabled.
-If the
-.Sy lz4_compress
-feature is active on the sending system, then the receiving system must have
-that feature enabled as well. Datasets that are sent with this flag may not be
-received as an encrypted dataset, since encrypted datasets cannot use the
-.Sy embedded_data
-feature.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags and the
-.Sy embedded_data
-feature.
-.It Fl i Ar snapshot Ns | Ns Ar bookmark
-Generate an incremental send stream.
-The incremental source must be an earlier snapshot in the destination's history.
-It will commonly be an earlier snapshot in the destination's file system, in
-which case it can be specified as the last component of the name
-.Po the
-.Sy #
-or
-.Sy @
-character and following
-.Pc .
-.Pp
-If the incremental target is a clone, the incremental source can be the origin
-snapshot, or an earlier snapshot in the origin's filesystem, or the origin's
-origin, etc.
-.It Fl n, -dryrun
-Do a dry-run
-.Pq Qq No-op
-send.
-Do not generate any actual send data.
-This is useful in conjunction with the
-.Fl v
-or
-.Fl P
-flags to determine what data will be sent.
-In this case, the verbose output will be written to standard output
-.Po contrast with a non-dry-run, where the stream is written to standard output
-and the verbose output goes to standard error
-.Pc .
-.It Fl v, -verbose
-Print verbose information about the stream package generated.
-This information includes a per-second report of how much data has been sent.
-.El
-.It Xo
-.Nm
-.Cm send
-.Op Fl Penv
-.Fl t
-.Ar receive_resume_token
-.Xc
-Creates a send stream which resumes an interrupted receive.
-The
-.Ar receive_resume_token
-is the value of this property on the filesystem or volume that was being
-received into.
-See the documentation for
-.Sy zfs receive -s
-for more details.
-.It Xo
-.Nm
-.Cm receive
-.Op Fl Fhnsuv
-.Op Fl o Sy origin Ns = Ns Ar snapshot
-.Op Fl o Ar property Ns = Ns Ar value
-.Op Fl x Ar property
-.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
-.Xc
-.It Xo
-.Nm
-.Cm receive
-.Op Fl Fhnsuv
-.Op Fl d Ns | Ns Fl e
-.Op Fl o Sy origin Ns = Ns Ar snapshot
-.Op Fl o Ar property Ns = Ns Ar value
-.Op Fl x Ar property
-.Ar filesystem
-.Xc
-Creates a snapshot whose contents are as specified in the stream provided on
-standard input.
-If a full stream is received, then a new file system is created as well.
-Streams are created using the
-.Nm zfs Cm send
-subcommand, which by default creates a full stream.
-.Nm zfs Cm recv
-can be used as an alias for
-.Nm zfs Cm receive.
-.Pp
-If an incremental stream is received, then the destination file system must
-already exist, and its most recent snapshot must match the incremental stream's
-source.
-For
-.Sy zvols ,
-the destination device link is destroyed and recreated, which means the
-.Sy zvol
-cannot be accessed during the
-.Cm receive
-operation.
-.Pp
-When a snapshot replication package stream that is generated by using the
-.Nm zfs Cm send Fl R
-command is received, any snapshots that do not exist on the sending location are
-destroyed by using the
-.Nm zfs Cm destroy Fl d
-command.
-.Pp
-Deduplicated send streams can be generated by using the
-.Nm zfs Cm send Fl D
-command.
-\fBThe ability to send and receive deduplicated send streams is deprecated.\fR
-In the future, the ability to receive a deduplicated send stream with
-.Nm zfs Cm receive
-will be removed.
-However, in the future, a utility will be provided to convert a
-deduplicated send stream to a regular (non-deduplicated) stream.
-This future utility will require that the send stream be located in a
-seek-able file, rather than provided by a pipe.
-.Pp
-If
-.Fl o Em property Ns = Ns Ar value
-or
-.Fl x Em property
-is specified, it applies to the effective value of the property throughout
-the entire subtree of replicated datasets. Effective property values will be
-set (
-.Fl o
-) or inherited (
-.Fl x
-) on the topmost in the replicated subtree. In descendant datasets, if the
-property is set by the send stream, it will be overridden by forcing the
-property to be inherited from the top‐most file system. Received properties
-are retained in spite of being overridden and may be restored with
-.Nm zfs Cm inherit Fl S .
-Specifying
-.Fl o Sy origin Ns = Ns Em snapshot
-is a special case because, even if
-.Sy origin
-is a read-only property and cannot be set, it's allowed to receive the send
-stream as a clone of the given snapshot.
-.Pp
-Raw encrypted send streams (created with
-.Nm zfs Cm send Fl w
-) may only be received as is, and cannot be re-encrypted, decrypted, or
-recompressed by the receive process. Unencrypted streams can be received as
-encrypted datasets, either through inheritance or by specifying encryption
-parameters with the
-.Fl o
-options. Note that the
-.Sy keylocation
-property cannot be overridden to
-.Sy prompt
-during a receive. This is because the receive process itself is already using
-stdin for the send stream. Instead, the property can be overridden after the
-receive completes.
-.Pp
-The added security provided by raw sends adds some restrictions to the send
-and receive process. ZFS will not allow a mix of raw receives and non-raw
-receives. Specifically, any raw incremental receives that are attempted after
-a non-raw receive will fail. Non-raw receives do not have this restriction and,
-therefore, are always possible. Because of this, it is best practice to always
-use either raw sends for their security benefits or non-raw sends for their
-flexibility when working with encrypted datasets, but not a combination.
-.Pp
-The reason for this restriction stems from the inherent restrictions of the
-AEAD ciphers that ZFS uses to encrypt data. When using ZFS native encryption,
-each block of data is encrypted against a randomly generated number known as
-the "initialization vector" (IV), which is stored in the filesystem metadata.
-This number is required by the encryption algorithms whenever the data is to
-be decrypted. Together, all of the IVs provided for all of the blocks in a
-given snapshot are collectively called an "IV set". When ZFS performs a raw
-send, the IV set is transferred from the source to the destination in the send
-stream. When ZFS performs a non-raw send, the data is decrypted by the source
-system and re-encrypted by the destination system, creating a snapshot with
-effectively the same data, but a different IV set. In order for decryption to
-work after a raw send, ZFS must ensure that the IV set used on both the source
-and destination side match. When an incremental raw receive is performed on
-top of an existing snapshot, ZFS will check to confirm that the "from"
-snapshot on both the source and destination were using the same IV set,
-ensuring the new IV set is consistent.
-.Pp
-The name of the snapshot
-.Pq and file system, if a full stream is received
-that this subcommand creates depends on the argument type and the use of the
-.Fl d
-or
-.Fl e
-options.
-.Pp
-If the argument is a snapshot name, the specified
-.Ar snapshot
-is created.
-If the argument is a file system or volume name, a snapshot with the same name
-as the sent snapshot is created within the specified
-.Ar filesystem
-or
-.Ar volume .
-If neither of the
-.Fl d
-or
-.Fl e
-options are specified, the provided target snapshot name is used exactly as
-provided.
-.Pp
-The
-.Fl d
-and
-.Fl e
-options cause the file system name of the target snapshot to be determined by
-appending a portion of the sent snapshot's name to the specified target
-.Ar filesystem .
-If the
-.Fl d
-option is specified, all but the first element of the sent snapshot's file
-system path
-.Pq usually the pool name
-is used and any required intermediate file systems within the specified one are
-created.
-If the
-.Fl e
-option is specified, then only the last element of the sent snapshot's file
-system name
-.Pq i.e. the name of the source file system itself
-is used as the target file system name.
-.Bl -tag -width "-F"
-.It Fl F
-Force a rollback of the file system to the most recent snapshot before
-performing the receive operation.
-If receiving an incremental replication stream
-.Po for example, one generated by
-.Nm zfs Cm send Fl R Op Fl i Ns | Ns Fl I
-.Pc ,
-destroy snapshots and file systems that do not exist on the sending side.
-.It Fl d
-Discard the first element of the sent snapshot's file system name, using the
-remaining elements to determine the name of the target file system for the new
-snapshot as described in the paragraph above.
-.It Fl e
-Discard all but the last element of the sent snapshot's file system name, using
-that element to determine the name of the target file system for the new
-snapshot as described in the paragraph above.
-.It Fl h
-Skip the receive of holds.  There is no effect if holds are not sent.
-.It Fl n
-Do not actually receive the stream.
-This can be useful in conjunction with the
-.Fl v
-option to verify the name the receive operation would use.
-.It Fl o Sy origin Ns = Ns Ar snapshot
-Forces the stream to be received as a clone of the given snapshot.
-If the stream is a full send stream, this will create the filesystem
-described by the stream as a clone of the specified snapshot.
-Which snapshot was specified will not affect the success or failure of the
-receive, as long as the snapshot does exist.
-If the stream is an incremental send stream, all the normal verification will be
-performed.
-.It Fl o Em property Ns = Ns Ar value
-Sets the specified property as if the command
-.Nm zfs Cm set Em property Ns = Ns Ar value
-was invoked immediately before the receive. When receiving a stream from
-.Nm zfs Cm send Fl R ,
-causes the property to be inherited by all descendant datasets, as through
-.Nm zfs Cm inherit Em property
-was run on any descendant datasets that have this property set on the
-sending system.
-.Pp
-Any editable property can be set at receive time. Set-once properties bound
-to the received data, such as
-.Sy normalization
-and
-.Sy casesensitivity ,
-cannot be set at receive time even when the datasets are newly created by
-.Nm zfs Cm receive .
-Additionally both settable properties
-.Sy version
-and
-.Sy volsize
-cannot be set at receive time.
-.Pp
-The
-.Fl o
-option may be specified multiple times, for different properties. An error
-results if the same property is specified in multiple
-.Fl o
-or
-.Fl x
-options.
-.Pp
-The
-.Fl o
-option may also be used to override encryption properties upon initial
-receive. This allows unencrypted streams to be received as encrypted datasets.
-To cause the received dataset (or root dataset of a recursive stream) to be
-received as an encryption root, specify encryption properties in the same
-manner as is required for
-.Nm
-.Cm create .
-For instance:
-.Bd -literal
-# zfs send tank/test@snap1 | zfs recv -o encryption=on -o keyformat=passphrase -o keylocation=file:///path/to/keyfile
-.Ed
-.Pp
-Note that
-.Op Fl o Ar keylocation Ns = Ns Ar prompt
-may not be specified here, since stdin is already being utilized for the send
-stream. Once the receive has completed, you can use
-.Nm
-.Cm set
-to change this setting after the fact. Similarly, you can receive a dataset as
-an encrypted child by specifying
-.Op Fl x Ar encryption
-to force the property to be inherited. Overriding encryption properties (except
-for
-.Sy keylocation Ns )
-is not possible with raw send streams.
-.It Fl s
-If the receive is interrupted, save the partially received state, rather
-than deleting it.
-Interruption may be due to premature termination of the stream
-.Po e.g. due to network failure or failure of the remote system
-if the stream is being read over a network connection
-.Pc ,
-a checksum error in the stream, termination of the
-.Nm zfs Cm receive
-process, or unclean shutdown of the system.
-.Pp
-The receive can be resumed with a stream generated by
-.Nm zfs Cm send Fl t Ar token ,
-where the
-.Ar token
-is the value of the
-.Sy receive_resume_token
-property of the filesystem or volume which is received into.
-.Pp
-To use this flag, the storage pool must have the
-.Sy extensible_dataset
-feature enabled.
-See
-.Xr zpool-features 5
-for details on ZFS feature flags.
-.It Fl u
-File system that is associated with the received stream is not mounted.
-.It Fl v
-Print verbose information about the stream and the time required to perform the
-receive operation.
-.It Fl x Em property
-Ensures that the effective value of the specified property after the
-receive is unaffected by the value of that property in the send stream (if any),
-as if the property had been excluded from the send stream.
-.Pp
-If the specified property is not present in the send stream, this option does
-nothing.
-.Pp
-If a received property needs to be overridden, the effective value will be
-set or inherited, depending on whether the property is inheritable or not.
-.Pp
-In the case of an incremental update,
-.Fl x
-leaves any existing local setting or explicit inheritance unchanged.
-.Pp
-All
-.Fl o
-restrictions (e.g. set-once) apply equally to
-.Fl x .
-.El
-.It Xo
-.Nm
-.Cm receive
-.Fl A
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Abort an interrupted
-.Nm zfs Cm receive Fl s ,
-deleting its saved partially received state.
-.It Xo
-.Nm
-.Cm allow
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Displays permissions that have been delegated on the specified filesystem or
-volume.
-See the other forms of
-.Nm zfs Cm allow
-for more information.
-.Pp
-Delegations are supported under Linux with the exception of
-.Sy mount ,
-.Sy unmount ,
-.Sy mountpoint ,
-.Sy canmount ,
-.Sy rename ,
-and
-.Sy share .
-These permissions cannot be delegated because the Linux
-.Xr mount 8
-command restricts modifications of the global namespace to the root user.
-.It Xo
-.Nm
-.Cm allow
-.Op Fl dglu
-.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns ...
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-.It Xo
-.Nm
-.Cm allow
-.Op Fl dl
-.Fl e Ns | Ns Sy everyone
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Delegates ZFS administration permission for the file systems to non-privileged
-users.
-.Bl -tag -width "-d"
-.It Fl d
-Allow only for the descendent file systems.
-.It Fl e Ns | Ns Sy everyone
-Specifies that the permissions be delegated to everyone.
-.It Fl g Ar group Ns Oo , Ns Ar group Oc Ns ...
-Explicitly specify that permissions are delegated to the group.
-.It Fl l
-Allow
-.Qq locally
-only for the specified file system.
-.It Fl u Ar user Ns Oo , Ns Ar user Oc Ns ...
-Explicitly specify that permissions are delegated to the user.
-.It Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns ...
-Specifies to whom the permissions are delegated.
-Multiple entities can be specified as a comma-separated list.
-If neither of the
-.Fl gu
-options are specified, then the argument is interpreted preferentially as the
-keyword
-.Sy everyone ,
-then as a user name, and lastly as a group name.
-To specify a user or group named
-.Qq everyone ,
-use the
-.Fl g
-or
-.Fl u
-options.
-To specify a group with the same name as a user, use the
-.Fl g
-options.
-.It Xo
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Xc
-The permissions to delegate.
-Multiple permissions may be specified as a comma-separated list.
-Permission names are the same as ZFS subcommand and property names.
-See the property list below.
-Property set names, which begin with
-.Sy @ ,
-may be specified.
-See the
-.Fl s
-form below for details.
-.El
-.Pp
-If neither of the
-.Fl dl
-options are specified, or both are, then the permissions are allowed for the
-file system or volume, and all of its descendents.
-.Pp
-Permissions are generally the ability to use a ZFS subcommand or change a ZFS
-property.
-The following permissions are available:
-.Bd -literal
-NAME             TYPE           NOTES
-allow            subcommand     Must also have the permission that is
-                                being allowed
-clone            subcommand     Must also have the 'create' ability and
-                                'mount' ability in the origin file system
-create           subcommand     Must also have the 'mount' ability.
-                                Must also have the 'refreservation' ability to
-                                create a non-sparse volume.
-destroy          subcommand     Must also have the 'mount' ability
-diff             subcommand     Allows lookup of paths within a dataset
-                                given an object number, and the ability
-                                to create snapshots necessary to
-                                'zfs diff'.
-load-key         subcommand     Allows loading and unloading of encryption key
-                                (see 'zfs load-key' and 'zfs unload-key').
-change-key       subcommand     Allows changing an encryption key via
-                                'zfs change-key'.
-mount            subcommand     Allows mount/umount of ZFS datasets
-promote          subcommand     Must also have the 'mount' and 'promote'
-                                ability in the origin file system
-receive          subcommand     Must also have the 'mount' and 'create'
-                                ability
-rename           subcommand     Must also have the 'mount' and 'create'
-                                ability in the new parent
-rollback         subcommand     Must also have the 'mount' ability
-send             subcommand
-share            subcommand     Allows sharing file systems over NFS
-                                or SMB protocols
-snapshot         subcommand     Must also have the 'mount' ability
-
-groupquota       other          Allows accessing any groupquota@...
-                                property
-groupused        other          Allows reading any groupused@... property
-userprop         other          Allows changing any user property
-userquota        other          Allows accessing any userquota@...
-                                property
-userused         other          Allows reading any userused@... property
-projectobjquota  other          Allows accessing any projectobjquota@...
-                                property
-projectquota     other          Allows accessing any projectquota@... property
-projectobjused   other          Allows reading any projectobjused@... property
-projectused      other          Allows reading any projectused@... property
-
-aclinherit       property
-acltype          property
-atime            property
-canmount         property
-casesensitivity  property
-checksum         property
-compression      property
-copies           property
-devices          property
-exec             property
-filesystem_limit property
-mountpoint       property
-nbmand           property
-normalization    property
-primarycache     property
-quota            property
-readonly         property
-recordsize       property
-refquota         property
-refreservation   property
-reservation      property
-secondarycache   property
-setuid           property
-sharenfs         property
-sharesmb         property
-snapdir          property
-snapshot_limit   property
-utf8only         property
-version          property
-volblocksize     property
-volsize          property
-vscan            property
-xattr            property
-zoned            property
-.Ed
-.It Xo
-.Nm
-.Cm allow
-.Fl c
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Sets
-.Qq create time
-permissions.
-These permissions are granted
-.Pq locally
-to the creator of any newly-created descendent file system.
-.It Xo
-.Nm
-.Cm allow
-.Fl s No @ Ns Ar setname
-.Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ...
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Defines or adds permissions to a permission set.
-The set can be used by other
-.Nm zfs Cm allow
-commands for the specified file system and its descendents.
-Sets are evaluated dynamically, so changes to a set are immediately reflected.
-Permission sets follow the same naming restrictions as ZFS file systems, but the
-name must begin with
-.Sy @ ,
-and can be no more than 64 characters long.
-.It Xo
-.Nm
-.Cm unallow
-.Op Fl dglru
-.Ar user Ns | Ns Ar group Ns Oo , Ns Ar user Ns | Ns Ar group Oc Ns ...
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-.It Xo
-.Nm
-.Cm unallow
-.Op Fl dlr
-.Fl e Ns | Ns Sy everyone
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-.It Xo
-.Nm
-.Cm unallow
-.Op Fl r
-.Fl c
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Removes permissions that were granted with the
-.Nm zfs Cm allow
-command.
-No permissions are explicitly denied, so other permissions granted are still in
-effect.
-For example, if the permission is granted by an ancestor.
-If no permissions are specified, then all permissions for the specified
-.Ar user ,
-.Ar group ,
-or
-.Sy everyone
-are removed.
-Specifying
-.Sy everyone
-.Po or using the
-.Fl e
-option
-.Pc
-only removes the permissions that were granted to everyone, not all permissions
-for every user and group.
-See the
-.Nm zfs Cm allow
-command for a description of the
-.Fl ldugec
-options.
-.Bl -tag -width "-r"
-.It Fl r
-Recursively remove the permissions from this file system and all descendents.
-.El
-.It Xo
-.Nm
-.Cm unallow
-.Op Fl r
-.Fl s No @ Ns Ar setname
-.Oo Ar perm Ns | Ns @ Ns Ar setname Ns Oo , Ns Ar perm Ns | Ns @ Ns
-.Ar setname Oc Ns ... Oc
-.Ar filesystem Ns | Ns Ar volume
-.Xc
-Removes permissions from a permission set.
-If no permissions are specified, then all permissions are removed, thus removing
-the set entirely.
-.It Xo
-.Nm
-.Cm hold
-.Op Fl r
-.Ar tag Ar snapshot Ns ...
-.Xc
-Adds a single reference, named with the
-.Ar tag
-argument, to the specified snapshot or snapshots.
-Each snapshot has its own tag namespace, and tags must be unique within that
-space.
-.Pp
-If a hold exists on a snapshot, attempts to destroy that snapshot by using the
-.Nm zfs Cm destroy
-command return
-.Er EBUSY .
-.Bl -tag -width "-r"
-.It Fl r
-Specifies that a hold with the given tag is applied recursively to the snapshots
-of all descendent file systems.
-.El
-.It Xo
-.Nm
-.Cm holds
-.Op Fl rH
-.Ar snapshot Ns ...
-.Xc
-Lists all existing user references for the given snapshot or snapshots.
-.Bl -tag -width "-r"
-.It Fl r
-Lists the holds that are set on the named descendent snapshots, in addition to
-listing the holds on the named snapshot.
-.It Fl H
-Do not print headers, use tab-delimited output.
-.El
-.It Xo
-.Nm
-.Cm release
-.Op Fl r
-.Ar tag Ar snapshot Ns ...
-.Xc
-Removes a single reference, named with the
-.Ar tag
-argument, from the specified snapshot or snapshots.
-The tag must already exist for each snapshot.
-If a hold exists on a snapshot, attempts to destroy that snapshot by using the
-.Nm zfs Cm destroy
-command return
-.Er EBUSY .
-.Bl -tag -width "-r"
-.It Fl r
-Recursively releases a hold with the given tag on the snapshots of all
-descendent file systems.
-.El
-.It Xo
-.Nm
-.Cm diff
-.Op Fl FHt
-.Ar snapshot Ar snapshot Ns | Ns Ar filesystem
-.Xc
-Display the difference between a snapshot of a given filesystem and another
-snapshot of that filesystem from a later time or the current contents of the
-filesystem.
-The first column is a character indicating the type of change, the other columns
-indicate pathname, new pathname
-.Pq in case of rename ,
-change in link count, and optionally file type and/or change time.
-The types of change are:
-.Bd -literal
--       The path has been removed
-+       The path has been created
-M       The path has been modified
-R       The path has been renamed
-.Ed
-.Bl -tag -width "-F"
-.It Fl F
-Display an indication of the type of file, in a manner similar to the
-.Fl
-option of
-.Xr ls 1 .
-.Bd -literal
-B       Block device
-C       Character device
-/       Directory
->       Door
-|       Named pipe
-@       Symbolic link
-P       Event port
-=       Socket
-F       Regular file
-.Ed
-.It Fl H
-Give more parsable tab-separated output, without header lines and without
-arrows.
-.It Fl t
-Display the path's inode change time as the first column of output.
-.El
-.It Xo
-.Nm
-.Cm program
-.Op Fl jn
-.Op Fl t Ar instruction-limit
-.Op Fl m Ar memory-limit
-.Ar pool script
-.Op --
-.Ar arg1 No ...
-.Xc
-Executes
-.Ar script
-as a ZFS channel program on
-.Ar pool .
-The ZFS channel
-program interface allows ZFS administrative operations to be run
-programmatically via a Lua script.
-The entire script is executed atomically, with no other administrative
-operations taking effect concurrently.
-A library of ZFS calls is made available to channel program scripts.
-Channel programs may only be run with root privileges.
-.sp
-For full documentation of the ZFS channel program interface, see the manual
-page for
-.Xr zfs-program 8 .
-.Bl -tag -width ""
-.It Fl j
-Display channel program output in JSON format. When this flag is specified and
-standard output is empty - channel program encountered an error. The details of
-such an error will be printed to standard error in plain text.
-.It Fl n
-Executes a read-only channel program, which runs faster.
-The program cannot change on-disk state by calling functions from
-the zfs.sync submodule.
-The program can be used to gather information such as properties and
-determining if changes would succeed (zfs.check.*).
-Without this flag, all pending changes must be synced to disk before
-a channel program can complete.
-.It Fl t Ar instruction-limit
-Limit the number of Lua instructions to execute.
-If a channel program executes more than the specified number of instructions,
-it will be stopped and an error will be returned.
-The default limit is 10 million instructions, and it can be set to a maximum of
-100 million instructions.
-.It Fl m Ar memory-limit
-Memory limit, in bytes.
-If a channel program attempts to allocate more memory than the given limit,
-it will be stopped and an error returned.
-The default memory limit is 10 MB, and can be set to a maximum of 100 MB.
-.sp
-All remaining argument strings are passed directly to the channel program as
-arguments.
-See
-.Xr zfs-program 8
-for more information.
-.El
-.It Xo
-.Nm
-.Cm load-key
-.Op Fl nr
-.Op Fl L Ar keylocation
-.Fl a | Ar filesystem
-.Xc
-Load the key for
-.Ar filesystem ,
-allowing it and all children that inherit the
-.Sy keylocation
-property to be accessed. The key will be expected in the format specified by the
-.Sy keyformat
-and location specified by the
-.Sy keylocation
-property. Note that if the
-.Sy keylocation
-is set to
-.Sy prompt
-the terminal will interactively wait for the key to be entered. Loading a key
-will not automatically mount the dataset. If that functionality is desired,
-.Nm zfs Cm mount Sy -l
-will ask for the key and mount the dataset. Once the key is loaded the
-.Sy keystatus
-property will become
-.Sy available .
-.Bl -tag -width "-r"
-.It Fl r
-Recursively loads the keys for the specified filesystem and all descendent
-encryption roots.
-.It Fl a
-Loads the keys for all encryption roots in all imported pools.
-.It Fl n
-Do a dry-run
-.Pq Qq No-op
-load-key. This will cause zfs to simply check that the
-provided key is correct. This command may be run even if the key is already
-loaded.
-.It Fl L Ar keylocation
-Use
-.Ar keylocation
-instead of the
-.Sy keylocation
-property. This will not change the value of the property on the dataset. Note
-that if used with either
-.Fl r
-or
-.Fl a ,
-.Ar keylocation
-may only be given as
-.Sy prompt .
-.El
-.It Xo
-.Nm
-.Cm unload-key
-.Op Fl r
-.Fl a | Ar filesystem
-.Xc
-Unloads a key from ZFS, removing the ability to access the dataset and all of
-its children that inherit the
-.Sy keylocation
-property. This requires that the dataset is not currently open or mounted. Once
-the key is unloaded the
-.Sy keystatus
-property will become
-.Sy unavailable .
-.Bl -tag -width "-r"
-.It Fl r
-Recursively unloads the keys for the specified filesystem and all descendent
-encryption roots.
-.It Fl a
-Unloads the keys for all encryption roots in all imported pools.
-.El
-.It Xo
-.Nm
-.Cm change-key
-.Op Fl l
-.Op Fl o Ar keylocation Ns = Ns Ar value
-.Op Fl o Ar keyformat Ns = Ns Ar value
-.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
-.Ar filesystem
-.Xc
-.It Xo
-.Nm
-.Cm change-key
-.Fl i
-.Op Fl l
-.Ar filesystem
-.Xc
-Allows a user to change the encryption key used to access a dataset. This
-command requires that the existing key for the dataset is already loaded into
-ZFS. This command may also be used to change the
-.Sy keylocation ,
-.Sy keyformat ,
-and
-.Sy pbkdf2iters
-properties as needed. If the dataset was not previously an encryption root it
-will become one. Alternatively, the
-.Fl i
-flag may be provided to cause an encryption root to inherit the parent's key
-instead.
-.Bl -tag -width "-r"
-.It Fl l
-Ensures the key is loaded before attempting to change the key. This is
-effectively equivalent to
-.Qq Nm zfs Cm load-key Ar filesystem ; Nm zfs Cm change-key Ar filesystem
-.It Fl o Ar property Ns = Ns Ar value
-Allows the user to set encryption key properties (
-.Sy keyformat ,
-.Sy keylocation ,
-and
-.Sy pbkdf2iters
-) while changing the key. This is the only way to alter
-.Sy keyformat
-and
-.Sy pbkdf2iters
-after the dataset has been created.
-.It Fl i
-Indicates that zfs should make
-.Ar filesystem
-inherit the key of its parent. Note that this command can only be run on an
-encryption root that has an encrypted parent.
-.El
 .It Xo
 .Nm
 .Cm version
@@ -4665,95 +139,242 @@
 .Nm
 userland utility and the zfs kernel module.
 .El
+.
+.Ss Dataset Management
+.Bl -tag -width ""
+.It Xr zfs-list 8
+Lists the property information for the given datasets in tabular form.
+.It Xr zfs-create 8
+Creates a new ZFS file system or volume.
+.It Xr zfs-destroy 8
+Destroys the given dataset(s), snapshot(s), or bookmark.
+.It Xr zfs-rename 8
+Renames the given dataset (filesystem or snapshot).
+.It Xr zfs-upgrade 8
+Manage upgrading the on-disk version of filesystems.
+.El
+.
+.Ss Snapshots
+.Bl -tag -width ""
+.It Xr zfs-snapshot 8
+Creates snapshots with the given names.
+.It Xr zfs-rollback 8
+Roll back the given dataset to a previous snapshot.
+.It Xr zfs-hold 8 Ns / Ns Xr zfs-release 8
+Add or remove a hold reference to the specified snapshot or snapshots.
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the
+.Nm zfs Cm destroy
+command return
+.Sy EBUSY .
+.It Xr zfs-diff 8
+Display the difference between a snapshot of a given filesystem and another
+snapshot of that filesystem from a later time or the current contents of the
+filesystem.
+.El
+.
+.Ss Clones
+.Bl -tag -width ""
+.It Xr zfs-clone 8
+Creates a clone of the given snapshot.
+.It Xr zfs-promote 8
+Promotes a clone file system to no longer be dependent on its
+.Qq origin
+snapshot.
+.El
+.
+.Ss Send & Receive
+.Bl -tag -width ""
+.It Xr zfs-send 8
+Generate a send stream, which may be of a filesystem, and may be incremental
+from a bookmark.
+.It Xr zfs-receive 8
+Creates a snapshot whose contents are as specified in the stream provided on
+standard input.
+If a full stream is received, then a new file system is created as well.
+Streams are created using the
+.Xr zfs-send 8
+subcommand, which by default creates a full stream.
+.It Xr zfs-bookmark 8
+Creates a new bookmark of the given snapshot or bookmark.
+Bookmarks mark the point in time when the snapshot was created, and can be used
+as the incremental source for a
+.Nm zfs Cm send
+command.
+.It Xr zfs-redact 8
+Generate a new redaction bookmark.
+This feature can be used to allow clones of a filesystem to be made available on
+a remote system, in the case where their parent need not (or needs to not) be
+usable.
+.El
+.
+.Ss Properties
+.Bl -tag -width ""
+.It Xr zfs-get 8
+Displays properties for the given datasets.
+.It Xr zfs-set 8
+Sets the property or list of properties to the given value(s) for each dataset.
+.It Xr zfs-inherit 8
+Clears the specified property, causing it to be inherited from an ancestor,
+restored to default if no ancestor has the property set, or with the
+.Fl S
+option reverted to the received value if one exists.
+.El
+.
+.Ss Quotas
+.Bl -tag -width ""
+.It Xr zfs-userspace 8 Ns / Ns Xr zfs-groupspace 8 Ns / Ns Xr zfs-projectspace 8
+Displays space consumed by, and quotas on, each user, group, or project
+in the specified filesystem or snapshot.
+.It Xr zfs-project 8
+List, set, or clear project ID and/or inherit flag on the file(s) or directories.
+.El
+.
+.Ss Mountpoints
+.Bl -tag -width ""
+.It Xr zfs-mount 8
+Displays all ZFS file systems currently mounted, or mount ZFS filesystem
+on a path described by its
+.Sy mountpoint
+property.
+.It Xr zfs-unmount 8
+Unmounts currently mounted ZFS file systems.
+.El
+.
+.Ss Shares
+.Bl -tag -width ""
+.It Xr zfs-share 8
+Shares available ZFS file systems.
+.It Xr zfs-unshare 8
+Unshares currently shared ZFS file systems.
+.El
+.
+.Ss Delegated Administration
+.Bl -tag -width ""
+.It Xr zfs-allow 8
+Delegate permissions on the specified filesystem or volume.
+.It Xr zfs-unallow 8
+Remove delegated permissions on the specified filesystem or volume.
+.El
+.
+.Ss Encryption
+.Bl -tag -width ""
+.It Xr zfs-change-key 8
+Add or change an encryption key on the specified dataset.
+.It Xr zfs-load-key 8
+Load the key for the specified encrypted dataset, enabling access.
+.It Xr zfs-unload-key 8
+Unload a key for the specified dataset, removing the ability to access the dataset.
+.El
+.
+.Ss Channel Programs
+.Bl -tag -width ""
+.It Xr zfs-program 8
+Execute ZFS administrative operations
+programmatically via a Lua script-language channel program.
+.El
+.
+.Ss Jails
+.Bl -tag -width ""
+.It Xr zfs-jail 8
+Attaches a filesystem to a jail.
+.It Xr zfs-unjail 8
+Detaches a filesystem from a jail.
+.El
+.
+.Ss Waiting
+.Bl -tag -width ""
+.It Xr zfs-wait 8
+Wait for background activity in a filesystem to complete.
+.El
+.
 .Sh EXIT STATUS
 The
 .Nm
-utility exits 0 on success, 1 if an error occurs, and 2 if invalid command line
-options were specified.
+utility exits
+.Sy 0
+on success,
+.Sy 1
+if an error occurs, and
+.Sy 2
+if invalid command line options were specified.
+.
 .Sh EXAMPLES
 .Bl -tag -width ""
-.It Sy Example 1 No Creating a ZFS File System Hierarchy
+.
+.It Sy Example 1 : No Creating a ZFS File System Hierarchy
 The following commands create a file system named
-.Em pool/home
+.Ar pool/home
 and a file system named
-.Em pool/home/bob .
+.Ar pool/home/bob .
 The mount point
 .Pa /export/home
 is set for the parent file system, and is automatically inherited by the child
 file system.
-.Bd -literal
-# zfs create pool/home
-# zfs set mountpoint=/export/home pool/home
-# zfs create pool/home/bob
-.Ed
-.It Sy Example 2 No Creating a ZFS Snapshot
+.Dl # Nm zfs Cm create Ar pool/home
+.Dl # Nm zfs Cm set Sy mountpoint Ns = Ns Ar /export/home pool/home
+.Dl # Nm zfs Cm create Ar pool/home/bob
+.
+.It Sy Example 2 : No Creating a ZFS Snapshot
 The following command creates a snapshot named
-.Sy yesterday .
+.Ar yesterday .
 This snapshot is mounted on demand in the
 .Pa .zfs/snapshot
 directory at the root of the
-.Em pool/home/bob
+.Ar pool/home/bob
 file system.
-.Bd -literal
-# zfs snapshot pool/home/bob@yesterday
-.Ed
-.It Sy Example 3 No Creating and Destroying Multiple Snapshots
+.Dl # Nm zfs Cm snapshot Ar pool/home/bob Ns @ Ns Ar yesterday
+.
+.It Sy Example 3 : No Creating and Destroying Multiple Snapshots
 The following command creates snapshots named
-.Sy yesterday
-of
-.Em pool/home
+.Ar yesterday No of Ar pool/home
 and all of its descendent file systems.
 Each snapshot is mounted on demand in the
 .Pa .zfs/snapshot
 directory at the root of its file system.
 The second command destroys the newly created snapshots.
-.Bd -literal
-# zfs snapshot -r pool/home@yesterday
-# zfs destroy -r pool/home@yesterday
-.Ed
-.It Sy Example 4 No Disabling and Enabling File System Compression
+.Dl # Nm zfs Cm snapshot Fl r Ar pool/home Ns @ Ns Ar yesterday
+.Dl # Nm zfs Cm destroy Fl r Ar pool/home Ns @ Ns Ar yesterday
+.
+.It Sy Example 4 : No Disabling and Enabling File System Compression
 The following command disables the
 .Sy compression
 property for all file systems under
-.Em pool/home .
+.Ar pool/home .
 The next command explicitly enables
 .Sy compression
 for
-.Em pool/home/anne .
-.Bd -literal
-# zfs set compression=off pool/home
-# zfs set compression=on pool/home/anne
-.Ed
-.It Sy Example 5 No Listing ZFS Datasets
+.Ar pool/home/anne .
+.Dl # Nm zfs Cm set Sy compression Ns = Ns Sy off Ar pool/home
+.Dl # Nm zfs Cm set Sy compression Ns = Ns Sy on Ar pool/home/anne
+.
+.It Sy Example 5 : No Listing ZFS Datasets
 The following command lists all active file systems and volumes in the system.
-Snapshots are displayed if the
-.Sy listsnaps
-property is
-.Sy on .
+Snapshots are displayed if
+.Sy listsnaps Ns = Ns Sy on .
 The default is
 .Sy off .
 See
-.Xr zpool 8
+.Xr zpoolprops 7
 for more information on pool properties.
-.Bd -literal
-# zfs list
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm list
 NAME                      USED  AVAIL  REFER  MOUNTPOINT
 pool                      450K   457G    18K  /pool
 pool/home                 315K   457G    21K  /export/home
 pool/home/anne             18K   457G    18K  /export/home/anne
 pool/home/bob             276K   457G   276K  /export/home/bob
 .Ed
-.It Sy Example 6 No Setting a Quota on a ZFS File System
+.
+.It Sy Example 6 : No Setting a Quota on a ZFS File System
 The following command sets a quota of 50 Gbytes for
-.Em pool/home/bob .
-.Bd -literal
-# zfs set quota=50G pool/home/bob
-.Ed
-.It Sy Example 7 No Listing ZFS Properties
+.Ar pool/home/bob :
+.Dl # Nm zfs Cm set Sy quota Ns = Ns Ar 50G pool/home/bob
+.
+.It Sy Example 7 : No Listing ZFS Properties
 The following command lists all properties for
-.Em pool/home/bob .
-.Bd -literal
-# zfs get all pool/home/bob
+.Ar pool/home/bob :
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm get Sy all Ar pool/home/bob
 NAME           PROPERTY              VALUE                  SOURCE
 pool/home/bob  type                  filesystem             -
 pool/home/bob  creation              Tue Jul 21 15:53 2009  -
@@ -4777,6 +398,7 @@
 pool/home/bob  zoned                 off                    default
 pool/home/bob  snapdir               hidden                 default
 pool/home/bob  acltype               off                    default
+pool/home/bob  aclmode               discard                default
 pool/home/bob  aclinherit            restricted             default
 pool/home/bob  canmount              on                     default
 pool/home/bob  xattr                 on                     default
@@ -4798,63 +420,61 @@
 pool/home/bob  usedbyrefreservation  0                      -
 .Ed
 .Pp
-The following command gets a single property value.
-.Bd -literal
-# zfs get -H -o value compression pool/home/bob
+The following command gets a single property value:
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm get Fl H o Sy value compression Ar pool/home/bob
 on
 .Ed
+.Pp
 The following command lists all properties with local settings for
-.Em pool/home/bob .
-.Bd -literal
-# zfs get -r -s local -o name,property,value all pool/home/bob
+.Ar pool/home/bob :
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm get Fl r s Sy local Fl o Sy name , Ns Sy property , Ns Sy value all Ar pool/home/bob
 NAME           PROPERTY              VALUE
 pool/home/bob  quota                 20G
 pool/home/bob  compression           on
 .Ed
-.It Sy Example 8 No Rolling Back a ZFS File System
+.
+.It Sy Example 8 : No Rolling Back a ZFS File System
 The following command reverts the contents of
-.Em pool/home/anne
+.Ar pool/home/anne
 to the snapshot named
-.Sy yesterday ,
-deleting all intermediate snapshots.
-.Bd -literal
-# zfs rollback -r pool/home/anne@yesterday
-.Ed
-.It Sy Example 9 No Creating a ZFS Clone
+.Ar yesterday ,
+deleting all intermediate snapshots:
+.Dl # Nm zfs Cm rollback Fl r Ar pool/home/anne Ns @ Ns Ar yesterday
+.
+.It Sy Example 9 : No Creating a ZFS Clone
 The following command creates a writable file system whose initial contents are
 the same as
-.Em pool/home/bob@yesterday .
-.Bd -literal
-# zfs clone pool/home/bob@yesterday pool/clone
-.Ed
-.It Sy Example 10 No Promoting a ZFS Clone
+.Ar pool/home/bob@yesterday .
+.Dl # Nm zfs Cm clone Ar pool/home/bob@yesterday pool/clone
+.
+.It Sy Example 10 : No Promoting a ZFS Clone
 The following commands illustrate how to test out changes to a file system, and
 then replace the original file system with the changed one, using clones, clone
 promotion, and renaming:
-.Bd -literal
-# zfs create pool/project/production
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm create Ar pool/project/production
   populate /pool/project/production with data
-# zfs snapshot pool/project/production@today
-# zfs clone pool/project/production@today pool/project/beta
+.No # Nm zfs Cm snapshot Ar pool/project/production Ns @ Ns Ar today
+.No # Nm zfs Cm clone Ar pool/project/production@today pool/project/beta
   make changes to /pool/project/beta and test them
-# zfs promote pool/project/beta
-# zfs rename pool/project/production pool/project/legacy
-# zfs rename pool/project/beta pool/project/production
+.No # Nm zfs Cm promote Ar pool/project/beta
+.No # Nm zfs Cm rename Ar pool/project/production pool/project/legacy
+.No # Nm zfs Cm rename Ar pool/project/beta pool/project/production
   once the legacy version is no longer needed, it can be destroyed
-# zfs destroy pool/project/legacy
+.No # Nm zfs Cm destroy Ar pool/project/legacy
 .Ed
-.It Sy Example 11 No Inheriting ZFS Properties
+.
+.It Sy Example 11 : No Inheriting ZFS Properties
 The following command causes
-.Em pool/home/bob
-and
-.Em pool/home/anne
+.Ar pool/home/bob No and Ar pool/home/anne
 to inherit the
 .Sy checksum
 property from their parent.
-.Bd -literal
-# zfs inherit checksum pool/home/bob pool/home/anne
-.Ed
-.It Sy Example 12 No Remotely Replicating ZFS Data
+.Dl # Nm zfs Cm inherit Sy checksum Ar pool/home/bob pool/home/anne
+.
+.It Sy Example 12 : No Remotely Replicating ZFS Data
 The following commands send a full stream and then an incremental stream to a
 remote machine, restoring them into
 .Em poolB/received/fs@a
@@ -4866,147 +486,145 @@
 .Em poolB/received ,
 and must not initially contain
 .Em poolB/received/fs .
-.Bd -literal
-# zfs send pool/fs@a | \e
-  ssh host zfs receive poolB/received/fs@a
-# zfs send -i a pool/fs@b | \e
-  ssh host zfs receive poolB/received/fs
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm send Ar pool/fs@a |
+.No "   " Nm ssh Ar host Nm zfs Cm receive Ar poolB/received/fs Ns @ Ns Ar a
+.No # Nm zfs Cm send Fl i Ar a pool/fs@b |
+.No "   " Nm ssh Ar host Nm zfs Cm receive Ar poolB/received/fs
 .Ed
-.It Sy Example 13 No Using the zfs receive -d Option
+.
+.It Sy Example 13 : No Using the Nm zfs Cm receive Fl d No Option
 The following command sends a full stream of
-.Em poolA/fsA/fsB@snap
+.Ar poolA/fsA/fsB@snap
 to a remote machine, receiving it into
-.Em poolB/received/fsA/fsB@snap .
+.Ar poolB/received/fsA/fsB@snap .
 The
-.Em fsA/fsB@snap
+.Ar fsA/fsB@snap
 portion of the received snapshot's name is determined from the name of the sent
 snapshot.
-.Em poolB
+.Ar poolB
 must contain the file system
-.Em poolB/received .
+.Ar poolB/received .
 If
-.Em poolB/received/fsA
+.Ar poolB/received/fsA
 does not exist, it is created as an empty file system.
-.Bd -literal
-# zfs send poolA/fsA/fsB@snap | \e
-  ssh host zfs receive -d poolB/received
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm send Ar poolA/fsA/fsB@snap |
+.No "   " Nm ssh Ar host Nm zfs Cm receive Fl d Ar poolB/received
 .Ed
-.It Sy Example 14 No Setting User Properties
+.
+.It Sy Example 14 : No Setting User Properties
 The following example sets the user-defined
-.Sy com.example:department
-property for a dataset.
-.Bd -literal
-# zfs set com.example:department=12345 tank/accounting
-.Ed
-.It Sy Example 15 No Performing a Rolling Snapshot
+.Ar com.example : Ns Ar department
+property for a dataset:
+.Dl # Nm zfs Cm set Ar com.example : Ns Ar department Ns = Ns Ar 12345 tank/accounting
+.
+.It Sy Example 15 : No Performing a Rolling Snapshot
 The following example shows how to maintain a history of snapshots with a
 consistent naming scheme.
 To keep a week's worth of snapshots, the user destroys the oldest snapshot,
 renames the remaining snapshots, and then creates a new snapshot, as follows:
-.Bd -literal
-# zfs destroy -r pool/users@7daysago
-# zfs rename -r pool/users@6daysago @7daysago
-# zfs rename -r pool/users@5daysago @6daysago
-# zfs rename -r pool/users@4daysago @5daysago
-# zfs rename -r pool/users@3daysago @4daysago
-# zfs rename -r pool/users@2daysago @3daysago
-# zfs rename -r pool/users@yesterday @2daysago
-# zfs rename -r pool/users@today @yesterday
-# zfs snapshot -r pool/users@today
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm destroy Fl r Ar pool/users@7daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@6daysago No @ Ns Ar 7daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@5daysago No @ Ns Ar 6daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@4daysago No @ Ns Ar 5daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@3daysago No @ Ns Ar 4daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@2daysago No @ Ns Ar 3daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@yesterday No @ Ns Ar 2daysago
+.No # Nm zfs Cm rename Fl r Ar pool/users@today No @ Ns Ar yesterday
+.No # Nm zfs Cm snapshot Fl r Ar pool/users Ns @ Ns Ar today
 .Ed
-.It Sy Example 16 No Setting sharenfs Property Options on a ZFS File System
+.
+.It Sy Example 16 : No Setting sharenfs Property Options on a ZFS File System
 The following commands show how to set
 .Sy sharenfs
-property options to enable
-.Sy rw
-access for a set of
-.Sy IP
-addresses and to enable root access for system
-.Sy neo
+property options to enable read-write
+access for a set of IP addresses and to enable root access for system
+.Qq neo
 on the
-.Em tank/home
-file system.
-.Bd -literal
-# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home
-.Ed
+.Ar tank/home
+file system:
+.Dl # Nm zfs Cm set Sy sharenfs Ns = Ns ' Ns Ar rw Ns =@123.123.0.0/16,root= Ns Ar neo Ns ' tank/home
 .Pp
-If you are using
-.Sy DNS
-for host name resolution, specify the fully qualified hostname.
-.It Sy Example 17 No Delegating ZFS Administration Permissions on a ZFS Dataset
+If you are using DNS for host name resolution,
+specify the fully-qualified hostname.
+.
+.It Sy Example 17 : No Delegating ZFS Administration Permissions on a ZFS Dataset
 The following example shows how to set permissions so that user
-.Sy cindys
+.Ar cindys
 can create, destroy, mount, and take snapshots on
-.Em tank/cindys .
+.Ar tank/cindys .
 The permissions on
-.Em tank/cindys
+.Ar tank/cindys
 are also displayed.
-.Bd -literal
-# zfs allow cindys create,destroy,mount,snapshot tank/cindys
-# zfs allow tank/cindys
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm allow Sy cindys create , Ns Sy destroy , Ns Sy mount , Ns Sy snapshot Ar tank/cindys
+.No # Nm zfs Cm allow Ar tank/cindys
 ---- Permissions on tank/cindys --------------------------------------
 Local+Descendent permissions:
         user cindys create,destroy,mount,snapshot
 .Ed
 .Pp
 Because the
-.Em tank/cindys
+.Ar tank/cindys
 mount point permission is set to 755 by default, user
-.Sy cindys
+.Ar cindys
 will be unable to mount file systems under
-.Em tank/cindys .
+.Ar tank/cindys .
 Add an ACE similar to the following syntax to provide mount point access:
-.Bd -literal
-# chmod A+user:cindys:add_subdirectory:allow /tank/cindys
-.Ed
-.It Sy Example 18 No Delegating Create Time Permissions on a ZFS Dataset
+.Dl # Cm chmod No A+user: Ns Ar cindys Ns :add_subdirectory:allow Ar /tank/cindys
+.
+.It Sy Example 18 : No Delegating Create Time Permissions on a ZFS Dataset
 The following example shows how to grant anyone in the group
-.Sy staff
+.Ar staff
 to create file systems in
-.Em tank/users .
+.Ar tank/users .
 This syntax also allows staff members to destroy their own file systems, but not
 destroy anyone else's file system.
 The permissions on
-.Em tank/users
+.Ar tank/users
 are also displayed.
-.Bd -literal
-# zfs allow staff create,mount tank/users
-# zfs allow -c destroy tank/users
-# zfs allow tank/users
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm allow Ar staff Sy create , Ns Sy mount Ar tank/users
+.No # Nm zfs Cm allow Fl c Sy destroy Ar tank/users
+.No # Nm zfs Cm allow Ar tank/users
 ---- Permissions on tank/users ---------------------------------------
 Permission sets:
         destroy
 Local+Descendent permissions:
         group staff create,mount
 .Ed
-.It Sy Example 19 No Defining and Granting a Permission Set on a ZFS Dataset
+.
+.It Sy Example 19 : No Defining and Granting a Permission Set on a ZFS Dataset
 The following example shows how to define and grant a permission set on the
-.Em tank/users
+.Ar tank/users
 file system.
 The permissions on
-.Em tank/users
+.Ar tank/users
 are also displayed.
-.Bd -literal
-# zfs allow -s @pset create,destroy,snapshot,mount tank/users
-# zfs allow staff @pset tank/users
-# zfs allow tank/users
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm allow Fl s No @ Ns Ar pset Sy create , Ns Sy destroy , Ns Sy snapshot , Ns Sy mount Ar tank/users
+.No # Nm zfs Cm allow staff No @ Ns Ar pset tank/users
+.No # Nm zfs Cm allow Ar tank/users
 ---- Permissions on tank/users ---------------------------------------
 Permission sets:
         @pset create,destroy,mount,snapshot
 Local+Descendent permissions:
         group staff @pset
 .Ed
-.It Sy Example 20 No Delegating Property Permissions on a ZFS Dataset
+.
+.It Sy Example 20 : No Delegating Property Permissions on a ZFS Dataset
 The following example shows to grant the ability to set quotas and reservations
 on the
-.Em users/home
+.Ar users/home
 file system.
 The permissions on
-.Em users/home
+.Ar users/home
 are also displayed.
-.Bd -literal
-# zfs allow cindys quota,reservation users/home
-# zfs allow users/home
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm allow Ar cindys Sy quota , Ns Sy reservation Ar users/home
+.No # Nm zfs Cm allow Ar users/home
 ---- Permissions on users/home ---------------------------------------
 Local+Descendent permissions:
         user cindys quota,reservation
@@ -5015,32 +633,34 @@
 NAME              PROPERTY  VALUE  SOURCE
 users/home/marks  quota     10G    local
 .Ed
-.It Sy Example 21 No Removing ZFS Delegated Permissions on a ZFS Dataset
+.
+.It Sy Example 21 : No Removing ZFS Delegated Permissions on a ZFS Dataset
 The following example shows how to remove the snapshot permission from the
-.Sy staff
+.Ar staff
 group on the
-.Em tank/users
+.Sy tank/users
 file system.
 The permissions on
-.Em tank/users
+.Sy tank/users
 are also displayed.
-.Bd -literal
-# zfs unallow staff snapshot tank/users
-# zfs allow tank/users
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm unallow Ar staff Sy snapshot Ar tank/users
+.No # Nm zfs Cm allow Ar tank/users
 ---- Permissions on tank/users ---------------------------------------
 Permission sets:
         @pset create,destroy,mount,snapshot
 Local+Descendent permissions:
         group staff @pset
 .Ed
-.It Sy Example 22 No Showing the differences between a snapshot and a ZFS Dataset
+.
+.It Sy Example 22 : No Showing the differences between a snapshot and a ZFS Dataset
 The following example shows how to see what has changed between a prior
 snapshot of a ZFS dataset and its current state.
 The
 .Fl F
 option is used to indicate type information for the files affected.
-.Bd -literal
-# zfs diff -F tank/test@before tank/test
+.Bd -literal -compact -offset Ds
+.No # Nm zfs Cm diff Fl F Ar tank/test@before tank/test
 M       /       /tank/test/
 M       F       /tank/test/linked      (+1)
 R       F       /tank/test/oldname -> /tank/test/newname
@@ -5048,46 +668,71 @@
 +       F       /tank/test/created
 M       F       /tank/test/modified
 .Ed
-.It Sy Example 23 No Creating a bookmark
-The following example create a bookmark to a snapshot. This bookmark
-can then be used instead of snapshot in send streams.
-.Bd -literal
-# zfs bookmark rpool@snapshot rpool#bookmark
-.Ed
-.It Sy Example 24 No Setting sharesmb Property Options on a ZFS File System
-The following example show how to share SMB filesystem through ZFS. Note that
-that a user and his/her password must be given.
-.Bd -literal
-# smbmount //127.0.0.1/share_tmp /mnt/tmp \\
-  -o user=workgroup/turbo,password=obrut,uid=1000
-.Ed
+.
+.It Sy Example 23 : No Creating a bookmark
+The following example create a bookmark to a snapshot.
+This bookmark can then be used instead of snapshot in send streams.
+.Dl # Nm zfs Cm bookmark Ar rpool Ns @ Ns Ar snapshot rpool Ns # Ns Ar bookmark
+.
+.It Sy Example 24 : No Setting Sy sharesmb No Property Options on a ZFS File System
+The following example show how to share SMB filesystem through ZFS.
+Note that a user and their password must be given.
+.Dl # Nm smbmount Ar //127.0.0.1/share_tmp /mnt/tmp Fl o No user=workgroup/turbo,password=obrut,uid=1000
 .Pp
 Minimal
-.Em /etc/samba/smb.conf
-configuration required:
+.Pa /etc/samba/smb.conf
+configuration is required, as follows.
 .Pp
-Samba will need to listen to 'localhost' (127.0.0.1) for the ZFS utilities to
-communicate with Samba. This is the default behavior for most Linux
-distributions.
+Samba will need to bind to the loopback interface for the ZFS utilities to
+communicate with Samba.
+This is the default behavior for most Linux distributions.
 .Pp
-Samba must be able to authenticate a user. This can be done in a number of
-ways, depending on if using the system password file, LDAP or the Samba
-specific smbpasswd file. How to do this is outside the scope of this manual.
-Please refer to the
+Samba must be able to authenticate a user.
+This can be done in a number of ways
+.Pq Xr passwd 5 , LDAP , Xr smbpasswd 5 , &c.\& .
+How to do this is outside the scope of this document – refer to
 .Xr smb.conf 5
-man page for more information.
+for more information.
 .Pp
 See the
-.Sy USERSHARE section
-of the
-.Xr smb.conf 5
-man page for all configuration options in case you need to modify any options
-to the share afterwards. Do note that any changes done with the
+.Sx USERSHARES
+section for all configuration options,
+in case you need to modify any options of the share afterwards.
+Do note that any changes done with the
 .Xr net 8
-command will be undone if the share is ever unshared (such as at a reboot etc).
+command will be undone if the share is ever unshared (like via a reboot).
 .El
+.
+.Sh ENVIRONMENT VARIABLES
+.Bl -tag -width "ZFS_COLOR"
+.It Sy ZFS_COLOR
+Use ANSI color in
+.Nm zfs Cm diff
+and
+.Nm zfs Cm list
+output.
+.El
+.Bl -tag -width "ZFS_MOUNT_HELPER"
+.It Sy ZFS_MOUNT_HELPER
+Cause
+.Nm zfs Cm mount
+to use
+.Xr mount 8
+to mount ZFS datasets.
+This option is provided for backwards compatibility with older ZFS versions.
+.El
+.Bl -tag -width "ZFS_SET_PIPE_MAX"
+.It Sy ZFS_SET_PIPE_MAX
+Tells
+.Nm zfs
+to set the maximum pipe size for sends/recieves.
+Disabled by default on Linux
+due to an unfixed deadlock in Linux's pipe size handling code.
+.El
+.
 .Sh INTERFACE STABILITY
 .Sy Committed .
+.
 .Sh SEE ALSO
 .Xr attr 1 ,
 .Xr gzip 1 ,
@@ -5099,9 +744,46 @@
 .Xr acl 5 ,
 .Xr attributes 5 ,
 .Xr exports 5 ,
+.Xr zfsconcepts 7 ,
+.Xr zfsprops 7 ,
 .Xr exportfs 8 ,
 .Xr mount 8 ,
 .Xr net 8 ,
 .Xr selinux 8 ,
+.Xr zfs-allow 8 ,
+.Xr zfs-bookmark 8 ,
+.Xr zfs-change-key 8 ,
+.Xr zfs-clone 8 ,
+.Xr zfs-create 8 ,
+.Xr zfs-destroy 8 ,
+.Xr zfs-diff 8 ,
+.Xr zfs-get 8 ,
+.Xr zfs-groupspace 8 ,
+.Xr zfs-hold 8 ,
+.Xr zfs-inherit 8 ,
+.Xr zfs-jail 8 ,
+.Xr zfs-list 8 ,
+.Xr zfs-load-key 8 ,
+.Xr zfs-mount 8 ,
 .Xr zfs-program 8 ,
+.Xr zfs-project 8 ,
+.Xr zfs-projectspace 8 ,
+.Xr zfs-promote 8 ,
+.Xr zfs-receive 8 ,
+.Xr zfs-redact 8 ,
+.Xr zfs-release 8 ,
+.Xr zfs-rename 8 ,
+.Xr zfs-rollback 8 ,
+.Xr zfs-send 8 ,
+.Xr zfs-set 8 ,
+.Xr zfs-share 8 ,
+.Xr zfs-snapshot 8 ,
+.Xr zfs-unallow 8 ,
+.Xr zfs-unjail 8 ,
+.Xr zfs-unload-key 8 ,
+.Xr zfs-unmount 8 ,
+.Xr zfs-unshare 8 ,
+.Xr zfs-upgrade 8 ,
+.Xr zfs-userspace 8 ,
+.Xr zfs-wait 8 ,
 .Xr zpool 8

diff --git a/zfs/man/man8/zfs_ids_to_path.8 b/zfs/man/man8/zfs_ids_to_path.8
new file mode 100644
index 0000000..d5b7467
--- /dev/null
+++ b/zfs/man/man8/zfs_ids_to_path.8

@@ -0,0 +1,51 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2020 by Delphix. All rights reserved.
+.\"
+.Dd April 17, 2020
+.Dt ZFS_IDS_TO_PATH 8
+.Os
+.
+.Sh NAME
+.Nm zfs_ids_to_path
+.Nd convert objset and object ids to names and paths
+.Sh SYNOPSIS
+.Nm
+.Op Fl v
+.Ar pool
+.Ar objset-id
+.Ar object-id
+.
+.Sh DESCRIPTION
+The
+.Sy zfs_ids_to_path
+utility converts a provided objset and object ids
+into a path to the file they refer to.
+.Bl -tag -width "-D"
+.It Fl v
+Verbose.
+Print the dataset name and the file path within the dataset separately.
+This will work correctly even if the dataset is not mounted.
+.El
+.
+.Sh SEE ALSO
+.Xr zdb 8 ,
+.Xr zfs 8

diff --git a/zfs/man/man8/zfsprops.8 b/zfs/man/man8/zfsprops.8
deleted file mode 100644
index e69de29..0000000
--- a/zfs/man/man8/zfsprops.8
+++ /dev/null


diff --git a/zfs/man/man8/zgenhostid.8 b/zfs/man/man8/zgenhostid.8
index 607efe1..e157578 100644
--- a/zfs/man/man8/zgenhostid.8
+++ b/zfs/man/man8/zgenhostid.8

@@ -18,54 +18,83 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
 .\"
-.Dd September 16, 2017
-.Dt ZGENHOSTID 8 SMM
-.Os Linux
+.Dd May 26, 2021
+.Dt ZGENHOSTID 8
+.Os
+.
 .Sh NAME
 .Nm zgenhostid
-.Nd generate and store a hostid in
-.Em /etc/hostid
+.Nd generate host ID into /etc/hostid
 .Sh SYNOPSIS
 .Nm
+.Op Fl f
+.Op Fl o Ar filename
 .Op Ar hostid
+.
 .Sh DESCRIPTION
+Creates
+.Pa /etc/hostid
+file and stores the host ID in it.
 If
-.Em /etc/hostid
-does not exist, create it and store a hostid in it.  If the user provides
-.Op Ar hostid
-on the command line, store that value.  Otherwise, randomly generate a
-value to store.
-.Pp
-This emulates the
-.Xr genhostid 1
-utility and is provided for use on systems which do not include the utility.
+.Ar hostid
+was provided, validate and store that value.
+Otherwise, randomly generate an ID.
+.
 .Sh OPTIONS
-.Op Ar hostid
+.Bl -tag -width "-o filename"
+.It Fl h
+Display a summary of the command-line options.
+.It Fl f
+Allow output overwrite.
+.It Fl o Ar filename
+Write to
+.Pa filename
+instead of the default
+.Pa /etc/hostid .
+.It Ar hostid
 Specifies the value to be placed in
-.Em /etc/hostid .
-It must be a number with a value between 1 and 2^32-1.  This value
-.Sy must
-be unique among your systems.  It must be expressed in hexadecimal and be
-exactly 8 digits long.
-.Sh EXAMPLES
-.Bl -tag -width Ds
-.It Generate a random hostid and store it
-.Bd -literal
-# zgenhostid
-.Ed
-.It Record the libc-generated hostid in Em /etc/hostid
-.Bd -literal
-# zgenhostid $(hostid)
-.Ed
-.It Record a custom hostid (0xdeadbeef) in Em etc/hostid
-.Bd -literal
-# zgenhostid deadbeef
-.Ed
+.Pa /etc/hostid .
+It should be a number with a value between 1 and 2^32-1.
+If
+.Sy 0 ,
+generate a random ID.
+This value
+.Em must
+be unique among your systems.
+It
+.Em must
+be an 8-digit-long hexadecimal number, optionally prefixed by
+.Qq 0x .
 .El
+.
+.Sh FILES
+.Pa /etc/hostid
+.
+.Sh EXAMPLES
+.Bl -tag -width Bd
+.It Generate a random hostid and store it
+.Dl # Nm
+.It Record the libc-generated hostid in Pa /etc/hostid
+.Dl # Nm Qq $ Ns Pq Nm hostid
+.It Record a custom hostid Po Ar 0xdeadbeef Pc in Pa /etc/hostid
+.Dl # Nm Ar deadbeef
+.It Record a custom hostid Po Ar 0x01234567 Pc in Pa /tmp/hostid No and overwrite the file if it exists
+.Dl # Nm Fl f o Ar /tmp/hostid 0x01234567
+.El
+.
 .Sh SEE ALSO
 .Xr genhostid 1 ,
 .Xr hostid 1 ,
-.Xr spl-module-parameters 5
+.Xr sethostid 3 ,
+.Xr spl 4
+.
+.Sh HISTORY
+.Nm
+emulates the
+.Xr genhostid 1
+utility and is provided for use on systems which
+do not include the utility or do not provide the
+.Xr sethostid 3
+function.

diff --git a/zfs/man/man8/zinject.8 b/zfs/man/man8/zinject.8
index f02e78c..a293469 100644
--- a/zfs/man/man8/zinject.8
+++ b/zfs/man/man8/zinject.8

@@ -1,4 +1,3 @@
-'\" t
 .\"
 .\" CDDL HEADER START
 .\"
@@ -19,180 +18,279 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright 2013 Darik Horn <dajhorn@vanadac.com>. All rights reserved.
 .\"
-.TH zinject 8 "2013 FEB 28" "ZFS on Linux" "System Administration Commands"
-
-.SH NAME
-zinject \- ZFS Fault Injector
-.SH DESCRIPTION
-.BR zinject
-creates artificial problems in a ZFS pool by simulating data corruption or device failures. This program is dangerous.
-.SH SYNOPSIS
-.TP
-.B "zinject"
+.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
+.\"
+.Dd May 26, 2021
+.Dt ZINJECT 8
+.Os
+.
+.Sh NAME
+.Nm zinject
+.Nd ZFS Fault Injector
+.Sh DESCRIPTION
+.Nm
+creates artificial problems in a ZFS pool by simulating data corruption
+or device failures.
+This program is dangerous.
+.
+.Sh SYNOPSIS
+.Bl -tag -width Ds
+.It Xo
+.Nm zinject
+.Xc
 List injection records.
-.TP
-.B "zinject \-b \fIobjset:object:level:blkd\fB [\-f \fIfrequency\fB] [\-amu] \fIpool\fB"
+.
+.It Xo
+.Nm zinject
+.Fl b Ar objset : Ns Ar object : Ns Ar level : Ns Ar start : Ns Ar end
+.Op Fl f Ar frequency
+.Fl amu
+.Op pool
+.Xc
 Force an error into the pool at a bookmark.
-.TP
-.B "zinject \-c <\fIid\fB | all>
+.
+.It Xo
+.Nm zinject
+.Fl c Ar id Ns | Ns Sy all
+.Xc
 Cancel injection records.
-.TP
-.B "zinject \-d \fIvdev\fB \-A <degrade|fault> \fIpool\fB
+.
+.It Xo
+.Nm zinject
+.Fl d Ar vdev
+.Fl A Sy degrade Ns | Ns Sy fault
+.Ar pool
+.Xc
 Force a vdev into the DEGRADED or FAULTED state.
-.TP
-.B "zinject -d \fIvdev\fB -D latency:lanes \fIpool\fB
-
+.
+.It Xo
+.Nm zinject
+.Fl d Ar vdev
+.Fl D Ar latency : Ns Ar lanes
+.Ar pool
+.Xc
 Add an artificial delay to IO requests on a particular
-device, such that the requests take a minimum of 'latency'
-milliseconds to complete. Each delay has an associated
-number of 'lanes' which defines the number of concurrent
+device, such that the requests take a minimum of
+.Ar latency
+milliseconds to complete.
+Each delay has an associated number of
+.Ar lanes
+which defines the number of concurrent
 IO requests that can be processed.
-
-For example, with a single lane delay of 10 ms (-D 10:1),
+.Pp
+For example, with a single lane delay of 10 ms
+.No (\& Ns Fl D Ar 10 : Ns Ar 1 ) ,
 the device will only be able to service a single IO request
-at a time with each request taking 10 ms to complete. So,
-if only a single request is submitted every 10 ms, the
+at a time with each request taking 10 ms to complete.
+So, if only a single request is submitted every 10 ms, the
 average latency will be 10 ms; but if more than one request
 is submitted every 10 ms, the average latency will be more
 than 10 ms.
-
+.Pp
 Similarly, if a delay of 10 ms is specified to have two
-lanes (-D 10:2), then the device will be able to service
-two requests at a time, each with a minimum latency of
-10 ms. So, if two requests are submitted every 10 ms, then
+lanes
+.No (\& Ns Fl D Ar 10 : Ns Ar 2 ) ,
+then the device will be able to service
+two requests at a time, each with a minimum latency of 10 ms.
+So, if two requests are submitted every 10 ms, then
 the average latency will be 10 ms; but if more than two
 requests are submitted every 10 ms, the average latency
 will be more than 10 ms.
-
-Also note, these delays are additive. So two invocations
-of '-D 10:1', is roughly equivalent to a single invocation
-of '-D 10:2'. This also means, one can specify multiple
-lanes with differing target latencies. For example, an
-invocation of '-D 10:1' followed by '-D 25:2' will
-create 3 lanes on the device; one lane with a latency
+.Pp
+Also note, these delays are additive.
+So two invocations of
+.Fl D Ar 10 : Ns Ar 1
+are roughly equivalent to a single invocation of
+.Fl D Ar 10 : Ns Ar 2 .
+This also means, that one can specify multiple
+lanes with differing target latencies.
+For example, an invocation of
+.Fl D Ar 10 : Ns Ar 1
+followed by
+.Fl D Ar 25 : Ns Ar 2
+will create 3 lanes on the device: one lane with a latency
 of 10 ms and two lanes with a 25 ms latency.
-
-.TP
-.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-f \fIfrequency\fB] [\-F] \fIpool\fB"
+.
+.It Xo
+.Nm zinject
+.Fl d Ar vdev
+.Op Fl e Ar device_error
+.Op Fl L Ar label_error
+.Op Fl T Ar failure
+.Op Fl f Ar frequency
+.Op Fl F
+.Ar pool
+.Xc
 Force a vdev error.
-.TP
-.B "zinject \-I [\-s \fIseconds\fB | \-g \fItxgs\fB] \fIpool\fB"
+.
+.It Xo
+.Nm zinject
+.Fl I
+.Op Fl s Ar seconds Ns | Ns Fl g Ar txgs
+.Ar pool
+.Xc
 Simulate a hardware failure that fails to honor a cache flush.
-.TP
-.B "zinject \-p \fIfunction\fB \fIpool\fB
+.
+.It Xo
+.Nm zinject
+.Fl p Ar function
+.Ar pool
+.Xc
 Panic inside the specified function.
-.TP
-.B "zinject \-t data [\-C \fIdvas\fB] [\-e \fIdevice_error\fB] [\-f \fIfrequency\fB] [\-l \fIlevel\fB] [\-r \fIrange\fB] [\-amq] \fIpath\fB"
+.
+.It Xo
+.Nm zinject
+.Fl t Sy data
+.Fl C Ar dvas
+.Op Fl e Ar device_error
+.Op Fl f Ar frequency
+.Op Fl l Ar level
+.Op Fl r Ar range
+.Op Fl amq
+.Ar path
+.Xc
 Force an error into the contents of a file.
-.TP
-.B "zinject \-t dnode [\-C \fIdvas\fB] [\-e \fIdevice_error\fB] [\-f \fIfrequency\fB] [\-l \fIlevel\fB] [\-amq] \fIpath\fB"
+.
+.It Xo
+.Nm zinject
+.Fl t Sy dnode
+.Fl C Ar dvas
+.Op Fl e Ar device_error
+.Op Fl f Ar frequency
+.Op Fl l Ar level
+.Op Fl amq
+.Ar path
+.Xc
 Force an error into the metadnode for a file or directory.
-.TP
-.B "zinject \-t \fImos_type\fB [\-C \fIdvas\fB] [\-e \fIdevice_error\fB] [\-f \fIfrequency\fB] [\-l \fIlevel\fB] [\-r \fIrange\fB] [\-amqu] \fIpool\fB"
+.
+.It Xo
+.Nm zinject
+.Fl t Ar mos_type
+.Fl C Ar dvas
+.Op Fl e Ar device_error
+.Op Fl f Ar frequency
+.Op Fl l Ar level
+.Op Fl r Ar range
+.Op Fl amqu
+.Ar pool
+.Xc
 Force an error into the MOS of a pool.
-.SH OPTIONS
-.TP
-.BI "\-a"
+.El
+.Sh OPTIONS
+.Bl -tag -width "-C dvas"
+.It Fl a
 Flush the ARC before injection.
-.TP
-.BI "\-b" " objset:object:level:start:end"
-Force an error into the pool at this bookmark tuple. Each number is
-in hexadecimal, and only one block can be specified.
-.TP
-.BI "\-C" " dvas"
-Inject the given error only into specific DVAs. The mask should be
-specified as a list of 0-indexed DVAs separated by commas (ex. '0,2'). This
-option is not applicable to logical data errors such as
-.BR "decompress"
+.It Fl b Ar objset : Ns Ar object : Ns Ar level : Ns Ar start : Ns Ar end
+Force an error into the pool at this bookmark tuple.
+Each number is in hexadecimal, and only one block can be specified.
+.It Fl C Ar dvas
+Inject the given error only into specific DVAs.
+The mask should be specified as a list of 0-indexed DVAs separated by commas
+.No (ex. Ar 0,2 Ns No ).
+This option is not applicable to logical data errors such as
+.Sy decompress
 and
-.BR "decrypt" .
-.TP
-.BI "\-d" " vdev"
+.Sy decrypt .
+.It Fl d Ar vdev
 A vdev specified by path or GUID.
-.TP
-.BI "\-e" " device_error"
+.It Fl e Ar device_error
 Specify
-.BR "checksum" " for an ECKSUM error,"
-.BR "decompress" " for a data decompression error,"
-.BR "decrypt" " for a data decryption error,"
-.BR "corrupt" " to flip a bit in the data after a read,"
-.BR "dtl" " for an ECHILD error,"
-.BR "io" " for an EIO error where reopening the device will succeed, or"
-.BR "nxio" " for an ENXIO error where reopening the device will fail."
-For EIO and ENXIO, the "failed" reads or writes still occur.  The probe simply
-sets the error value reported by the I/O pipeline so it appears the read or
-write failed.  Decryption errors only currently work with file data.
-.TP
-.BI "\-f" " frequency"
-Only inject errors a fraction of the time. Expressed as a real number
-percentage between 0.0001 and 100.
-.TP
-.BI "\-F"
-Fail faster. Do fewer checks.
-.TP
-.BI "\-g" " txgs"
+.Bl -tag -compact -width "decompress"
+.It Sy checksum
+for an ECKSUM error,
+.It Sy decompress
+for a data decompression error,
+.It Sy decrypt
+for a data decryption error,
+.It Sy corrupt
+to flip a bit in the data after a read,
+.It Sy dtl
+for an ECHILD error,
+.It Sy io
+for an EIO error where reopening the device will succeed, or
+.It Sy nxio
+for an ENXIO error where reopening the device will fail.
+.El
+.Pp
+For EIO and ENXIO, the "failed" reads or writes still occur.
+The probe simply sets the error value reported by the I/O pipeline
+so it appears the read or write failed.
+Decryption errors only currently work with file data.
+.It Fl f Ar frequency
+Only inject errors a fraction of the time.
+Expressed as a real number percentage between
+.Sy 0.0001
+and
+.Sy 100 .
+.It Fl F
+Fail faster.
+Do fewer checks.
+.It Fl f Ar txgs
 Run for this many transaction groups before reporting failure.
-.TP
-.BI "\-h"
+.It Fl h
 Print the usage message.
-.TP
-.BI "\-l" " level"
-Inject an error at a particular block level. The default is 0.
-.TP
-.BI "\-L" " label_error"
+.It Fl l Ar level
+Inject an error at a particular block level.
+The default is
+.Sy 0 .
+.It Fl L Ar label_error
 Set the label error region to one of
-.BR " nvlist" ","
-.BR " pad1" ","
-.BR " pad2" ", or"
-.BR " uber" "."
-.TP
-.BI "\-m"
+.Sy nvlist ,
+.Sy pad1 ,
+.Sy pad2 ,
+or
+.Sy uber .
+.It Fl m
 Automatically remount the underlying filesystem.
-.TP
-.BI "\-q"
-Quiet mode. Only print the handler number added.
-.TP
-.BI "\-r" " range"
+.It Fl q
+Quiet mode.
+Only print the handler number added.
+.It Fl r Ar range
 Inject an error over a particular logical range of an object, which
 will be translated to the appropriate blkid range according to the
 object's properties.
-.TP
-.BI "\-s" " seconds"
+.It Fl s Ar seconds
 Run for this many seconds before reporting failure.
-.TP
-.BI "\-T" " failure"
+.It Fl T Ar failure
 Set the failure type to one of
-.BR " all" ","
-.BR " claim" ","
-.BR " free" ","
-.BR " read" ", or"
-.BR " write" "."
-.TP
-.BI "\-t" " mos_type"
+.Sy all ,
+.Sy claim ,
+.Sy free ,
+.Sy read ,
+or
+.Sy write .
+.It Fl t Ar mos_type
 Set this to
-.BR "mos " "for any data in the MOS,"
-.BR "mosdir " "for an object directory,"
-.BR "config " "for the pool configuration,"
-.BR "bpobj " "for the block pointer list,"
-.BR "spacemap " "for the space map,"
-.BR "metaslab " "for the metaslab, or"
-.BR "errlog " "for the persistent error log."
-.TP
-.BI "\-u"
+.Bl -tag -compact -width "spacemap"
+.It Sy mos
+for any data in the MOS,
+.It Sy mosdir
+for an object directory,
+.It Sy config
+for the pool configuration,
+.It Sy bpobj
+for the block pointer list,
+.It Sy spacemap
+for the space map,
+.It Sy metaslab
+for the metaslab, or
+.It Sy errlog
+for the persistent error log.
+.El
+.It Fl u
 Unload the pool after injection.
-
-.SH "ENVIRONMENT VARIABLES"
-.TP
-.B "ZINJECT_DEBUG"
-Run \fBzinject\fR in debug mode.
-
-.SH "AUTHORS"
-This man page was written by Darik Horn <dajhorn@vanadac.com>
-excerpting the \fBzinject\fR usage message and source code.
-
-.SH "SEE ALSO"
-.BR zpool (8),
-.BR zfs (8)
+.El
+.
+.Sh ENVIRONMENT VARIABLES
+.Bl -tag -width "ZF"
+.It Ev ZFS_HOSTID
+Run
+.Nm
+in debug mode.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs 8 ,
+.Xr zpool 8

diff --git a/zfs/man/man8/zpool-add.8 b/zfs/man/man8/zpool-add.8
new file mode 100644
index 0000000..26cf33c
--- /dev/null
+++ b/zfs/man/man8/zpool-add.8

@@ -0,0 +1,101 @@
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-ADD 8
+.Os
+.
+.Sh NAME
+.Nm zpool-add
+.Nd add vdevs to ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm add
+.Op Fl fgLnP
+.Oo Fl o Ar property Ns = Ns Ar value Oc
+.Ar pool vdev Ns …
+.
+.Sh DESCRIPTION
+Adds the specified virtual devices to the given pool.
+The
+.Ar vdev
+specification is described in the
+.Em Virtual Devices
+section of
+.Xr zpoolconcepts 7 .
+The behavior of the
+.Fl f
+option, and the device checks performed are described in the
+.Nm zpool Cm create
+subcommand.
+.Bl -tag -width Ds
+.It Fl f
+Forces use of
+.Ar vdev Ns s ,
+even if they appear in use or specify a conflicting replication level.
+Not all devices can be overridden in this manner.
+.It Fl g
+Display
+.Ar vdev ,
+GUIDs instead of the normal device names.
+These GUIDs can be used in place of
+device names for the zpool detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for
+.Ar vdev Ns s
+resolving all symbolic links.
+This can be used to look up the current block
+device name regardless of the
+.Pa /dev/disk
+path used to open it.
+.It Fl n
+Displays the configuration that would be used without actually adding the
+.Ar vdev Ns s .
+The actual pool creation can still fail due to insufficient privileges or
+device sharing.
+.It Fl P
+Display real paths for
+.Ar vdev Ns s
+instead of only the last component of the path.
+This can be used in conjunction with the
+.Fl L
+flag.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the given pool properties.
+See the
+.Xr zpoolprops 7
+manual page for a list of valid properties that can be set.
+The only property supported at the moment is
+.Sy ashift .
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-attach 8 ,
+.Xr zpool-import 8 ,
+.Xr zpool-initialize 8 ,
+.Xr zpool-online 8 ,
+.Xr zpool-remove 8

diff --git a/zfs/man/man8/zpool-attach.8 b/zfs/man/man8/zpool-attach.8
new file mode 100644
index 0000000..9dfa35a
--- /dev/null
+++ b/zfs/man/man8/zpool-attach.8

@@ -0,0 +1,98 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 15, 2020
+.Dt ZPOOL-ATTACH 8
+.Os
+.
+.Sh NAME
+.Nm zpool-attach
+.Nd attach new device to existing ZFS vdev
+.Sh SYNOPSIS
+.Nm zpool
+.Cm attach
+.Op Fl fsw
+.Oo Fl o Ar property Ns = Ns Ar value Oc
+.Ar pool device new_device
+.
+.Sh DESCRIPTION
+Attaches
+.Ar new_device
+to the existing
+.Ar device .
+The existing device cannot be part of a raidz configuration.
+If
+.Ar device
+is not currently part of a mirrored configuration,
+.Ar device
+automatically transforms into a two-way mirror of
+.Ar device
+and
+.Ar new_device .
+If
+.Ar device
+is part of a two-way mirror, attaching
+.Ar new_device
+creates a three-way mirror, and so on.
+In either case,
+.Ar new_device
+begins to resilver immediately and any running scrub is cancelled.
+.Bl -tag -width Ds
+.It Fl f
+Forces use of
+.Ar new_device ,
+even if it appears to be in use.
+Not all devices can be overridden in this manner.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the given pool properties.
+See the
+.Xr zpoolprops 7
+manual page for a list of valid properties that can be set.
+The only property supported at the moment is
+.Sy ashift .
+.It Fl s
+The
+.Ar new_device
+is reconstructed sequentially to restore redundancy as quickly as possible.
+Checksums are not verified during sequential reconstruction so a scrub is
+started when the resilver completes.
+Sequential reconstruction is not supported for raidz configurations.
+.It Fl w
+Waits until
+.Ar new_device
+has finished resilvering before returning.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-add 8 ,
+.Xr zpool-detach 8 ,
+.Xr zpool-import 8 ,
+.Xr zpool-initialize 8 ,
+.Xr zpool-online 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-resilver 8

diff --git a/zfs/man/man8/zpool-checkpoint.8 b/zfs/man/man8/zpool-checkpoint.8
new file mode 100644
index 0000000..d5add14
--- /dev/null
+++ b/zfs/man/man8/zpool-checkpoint.8

@@ -0,0 +1,72 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-CHECKPOINT 8
+.Os
+.
+.Sh NAME
+.Nm zpool-checkpoint
+.Nd check-point current ZFS storage pool state
+.Sh SYNOPSIS
+.Nm zpool
+.Cm checkpoint
+.Op Fl d Op Fl w
+.Ar pool
+.
+.Sh DESCRIPTION
+Checkpoints the current state of
+.Ar pool
+, which can be later restored by
+.Nm zpool Cm import --rewind-to-checkpoint .
+The existence of a checkpoint in a pool prohibits the following
+.Nm zpool
+subcommands:
+.Cm remove , attach , detach , split , No and Cm reguid .
+In addition, it may break reservation boundaries if the pool lacks free
+space.
+The
+.Nm zpool Cm status
+command indicates the existence of a checkpoint or the progress of discarding a
+checkpoint from a pool.
+.Nm zpool Cm list
+can be used to check how much space the checkpoint takes from the pool.
+.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl d , -discard
+Discards an existing checkpoint from
+.Ar pool .
+.It Fl w , -wait
+Waits until the checkpoint has finished being discarded before returning.
+.El
+.
+.Sh SEE ALSO
+.Xr zfs-snapshot 8 ,
+.Xr zpool-import 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-clear.8 b/zfs/man/man8/zpool-clear.8
new file mode 100644
index 0000000..0b256b2
--- /dev/null
+++ b/zfs/man/man8/zpool-clear.8

@@ -0,0 +1,59 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-CLEAR 8
+.Os
+.
+.Sh NAME
+.Nm zpool-clear
+.Nd clear device errors in ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm clear
+.Ar pool
+.Oo Ar device Oc Ns …
+.
+.Sh DESCRIPTION
+Clears device errors in a pool.
+If no arguments are specified, all device errors within the pool are cleared.
+If one or more devices is specified, only those errors associated with the
+specified device or devices are cleared.
+.Pp
+If the pool was suspended it will be brought back online provided the
+devices can be accessed.
+Pools with
+.Sy multihost
+enabled which have been suspended cannot be resumed.
+While the pool was suspended, it may have been imported on
+another host, and resuming I/O could result in pool damage.
+.
+.Sh SEE ALSO
+.Xr zdb 8 ,
+.Xr zpool-reopen 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-create.8 b/zfs/man/man8/zpool-create.8
new file mode 100644
index 0000000..e902c77
--- /dev/null
+++ b/zfs/man/man8/zpool-create.8

@@ -0,0 +1,211 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+.\"
+.Dd June 2, 2021
+.Dt ZPOOL-CREATE 8
+.Os
+.
+.Sh NAME
+.Nm zpool-create
+.Nd create ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm create
+.Op Fl dfn
+.Op Fl m Ar mountpoint
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Oo Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value Oc
+.Op Fl o Ar compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns …
+.Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns …
+.Op Fl R Ar root
+.Op Fl t Ar tname
+.Ar pool
+.Ar vdev Ns …
+.
+.Sh DESCRIPTION
+Creates a new storage pool containing the virtual devices specified on the
+command line.
+The pool name must begin with a letter, and can only contain
+alphanumeric characters as well as the underscore
+.Pq Qq Sy _ ,
+dash
+.Pq Qq Sy \&- ,
+colon
+.Pq Qq Sy \&: ,
+space
+.Pq Qq Sy \&\  ,
+and period
+.Pq Qq Sy \&. .
+The pool names
+.Sy mirror ,
+.Sy raidz ,
+.Sy draid ,
+.Sy spare
+and
+.Sy log
+are reserved, as are names beginning with
+.Sy mirror ,
+.Sy raidz ,
+.Sy draid ,
+and
+.Sy spare .
+The
+.Ar vdev
+specification is described in the
+.Sx Virtual Devices
+section of
+.Xr zpoolconcepts 7 .
+.Pp
+The command attempts to verify that each device specified is accessible and not
+currently in use by another subsystem.
+However this check is not robust enough
+to detect simultaneous attempts to use a new device in different pools, even if
+.Sy multihost Ns = Sy enabled .
+The administrator must ensure, that simultaneous invocations of any combination of
+.Nm zpool Cm replace ,
+.Nm zpool Cm create ,
+.Nm zpool Cm add ,
+or
+.Nm zpool Cm labelclear ,
+do not refer to the same device.
+Using the same device in two pools will result in pool corruption.
+.Pp
+There are some uses, such as being currently mounted, or specified as the
+dedicated dump device, that prevents a device from ever being used by ZFS.
+Other uses, such as having a preexisting UFS file system, can be overridden with
+.Fl f .
+.Pp
+The command also checks that the replication strategy for the pool is
+consistent.
+An attempt to combine redundant and non-redundant storage in a single pool,
+or to mix disks and files, results in an error unless
+.Fl f
+is specified.
+The use of differently-sized devices within a single raidz or mirror group is
+also flagged as an error unless
+.Fl f
+is specified.
+.Pp
+Unless the
+.Fl R
+option is specified, the default mount point is
+.Pa / Ns Ar pool .
+The mount point must not exist or must be empty, or else the root dataset
+will not be able to be be mounted.
+This can be overridden with the
+.Fl m
+option.
+.Pp
+By default all supported features are enabled on the new pool.
+The
+.Fl d
+option and the
+.Fl o Ar compatibility
+property
+.Pq e.g Fl o Sy compatibility Ns = Ns Ar 2020
+can be used to restrict the features that are enabled, so that the
+pool can be imported on other releases of ZFS.
+.Bl -tag -width "-t tname"
+.It Fl d
+Do not enable any features on the new pool.
+Individual features can be enabled by setting their corresponding properties to
+.Sy enabled
+with
+.Fl o .
+See
+.Xr zpool-features 7
+for details about feature properties.
+.It Fl f
+Forces use of
+.Ar vdev Ns s ,
+even if they appear in use or specify a conflicting replication level.
+Not all devices can be overridden in this manner.
+.It Fl m Ar mountpoint
+Sets the mount point for the root dataset.
+The default mount point is
+.Pa /pool
+or
+.Pa altroot/pool
+if
+.Sy altroot
+is specified.
+The mount point must be an absolute path,
+.Sy legacy ,
+or
+.Sy none .
+For more information on dataset mount points, see
+.Xr zfsprops 7 .
+.It Fl n
+Displays the configuration that would be used without actually creating the
+pool.
+The actual pool creation can still fail due to insufficient privileges or
+device sharing.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the given pool properties.
+See
+.Xr zpoolprops 7
+for a list of valid properties that can be set.
+.It Fl o Ar compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns …
+Specifies compatibility feature sets.
+See
+.Xr zpool-features 7
+for more information about compatibility feature sets.
+.It Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value
+Sets the given pool feature.
+See the
+.Xr zpool-features 7
+section for a list of valid features that can be set.
+Value can be either disabled or enabled.
+.It Fl O Ar file-system-property Ns = Ns Ar value
+Sets the given file system properties in the root file system of the pool.
+See
+.Xr zfsprops 7
+for a list of valid properties that can be set.
+.It Fl R Ar root
+Equivalent to
+.Fl o Sy cachefile Ns = Ns Sy none Fl o Sy altroot Ns = Ns Ar root
+.It Fl t Ar tname
+Sets the in-core pool name to
+.Ar tname
+while the on-disk name will be the name specified as
+.Ar pool .
+This will set the default of the
+.Sy cachefile
+property to
+.Sy none .
+This is intended
+to handle name space collisions when creating pools for other systems,
+such as virtual machines or physical machines whose pools live on network
+block devices.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-destroy 8 ,
+.Xr zpool-export 8 ,
+.Xr zpool-import 8

diff --git a/zfs/man/man8/zpool-destroy.8 b/zfs/man/man8/zpool-destroy.8
new file mode 100644
index 0000000..a2f6729
--- /dev/null
+++ b/zfs/man/man8/zpool-destroy.8

@@ -0,0 +1,48 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 31, 2021
+.Dt ZPOOL-DESTROY 8
+.Os
+.
+.Sh NAME
+.Nm zpool-destroy
+.Nd destroy ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm destroy
+.Op Fl f
+.Ar pool
+.
+.Sh DESCRIPTION
+Destroys the given pool, freeing up any devices for other use.
+This command tries to unmount any active datasets before destroying the pool.
+.Bl -tag -width Ds
+.It Fl f
+Forcefully unmount all active datasets.
+.El

diff --git a/zfs/man/man8/zpool-detach.8 b/zfs/man/man8/zpool-detach.8
new file mode 100644
index 0000000..952dd78
--- /dev/null
+++ b/zfs/man/man8/zpool-detach.8

@@ -0,0 +1,58 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-DETACH 8
+.Os
+.
+.Sh NAME
+.Nm zpool-detach
+.Nd detach device from ZFS mirror
+.Sh SYNOPSIS
+.Nm zpool
+.Cm detach
+.Ar pool device
+.
+.Sh DESCRIPTION
+Detaches
+.Ar device
+from a mirror.
+The operation is refused if there are no other valid replicas of the data.
+If
+.Ar device
+may be re-added to the pool later on then consider the
+.Nm zpool Cm offline
+command instead.
+.
+.Sh SEE ALSO
+.Xr zpool-attach 8 ,
+.Xr zpool-labelclear 8 ,
+.Xr zpool-offline 8 ,
+.Xr zpool-remove 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-split 8

diff --git a/zfs/man/man8/zpool-events.8 b/zfs/man/man8/zpool-events.8
new file mode 100644
index 0000000..55f7bab
--- /dev/null
+++ b/zfs/man/man8/zpool-events.8

@@ -0,0 +1,483 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-EVENTS 8
+.Os
+.
+.Sh NAME
+.Nm zpool-events
+.Nd list recent events generated by kernel
+.Sh SYNOPSIS
+.Nm zpool
+.Cm events
+.Op Fl vHf
+.Op Ar pool
+.Nm zpool
+.Cm events
+.Fl c
+.
+.Sh DESCRIPTION
+Lists all recent events generated by the ZFS kernel modules.
+These events are consumed by the
+.Xr zed 8
+and used to automate administrative tasks such as replacing a failed device
+with a hot spare.
+For more information about the subclasses and event payloads
+that can be generated see
+.Sx EVENTS
+and the following sections.
+.
+.Sh OPTIONS
+.Bl -tag -compact -width Ds
+.It Fl c
+Clear all previous events.
+.It Fl f
+Follow mode.
+.It Fl H
+Scripted mode.
+Do not display headers, and separate fields by a
+single tab instead of arbitrary space.
+.It Fl v
+Print the entire payload for each event.
+.El
+.
+.Sh EVENTS
+These are the different event subclasses.
+The full event name would be
+.Sy ereport.fs.zfs.\& Ns Em SUBCLASS ,
+but only the last part is listed here.
+.Pp
+.Bl -tag -compact -width "vdev.bad_guid_sum"
+.It Sy checksum
+Issued when a checksum error has been detected.
+.It Sy io
+Issued when there is an I/O error in a vdev in the pool.
+.It Sy data
+Issued when there have been data errors in the pool.
+.It Sy deadman
+Issued when an I/O request is determined to be "hung", this can be caused
+by lost completion events due to flaky hardware or drivers.
+See
+.Sy zfs_deadman_failmode
+in
+.Xr zfs 4
+for additional information regarding "hung" I/O detection and configuration.
+.It Sy delay
+Issued when a completed I/O request exceeds the maximum allowed time
+specified by the
+.Sy zio_slow_io_ms
+module parameter.
+This can be an indicator of problems with the underlying storage device.
+The number of delay events is ratelimited by the
+.Sy zfs_slow_io_events_per_second
+module parameter.
+.It Sy config
+Issued every time a vdev change have been done to the pool.
+.It Sy zpool
+Issued when a pool cannot be imported.
+.It Sy zpool.destroy
+Issued when a pool is destroyed.
+.It Sy zpool.export
+Issued when a pool is exported.
+.It Sy zpool.import
+Issued when a pool is imported.
+.It Sy zpool.reguid
+Issued when a REGUID (new unique identifier for the pool have been regenerated) have been detected.
+.It Sy vdev.unknown
+Issued when the vdev is unknown.
+Such as trying to clear device errors on a vdev that have failed/been kicked
+from the system/pool and is no longer available.
+.It Sy vdev.open_failed
+Issued when a vdev could not be opened (because it didn't exist for example).
+.It Sy vdev.corrupt_data
+Issued when corrupt data have been detected on a vdev.
+.It Sy vdev.no_replicas
+Issued when there are no more replicas to sustain the pool.
+This would lead to the pool being
+.Em DEGRADED .
+.It Sy vdev.bad_guid_sum
+Issued when a missing device in the pool have been detected.
+.It Sy vdev.too_small
+Issued when the system (kernel) have removed a device, and ZFS
+notices that the device isn't there any more.
+This is usually followed by a
+.Sy probe_failure
+event.
+.It Sy vdev.bad_label
+Issued when the label is OK but invalid.
+.It Sy vdev.bad_ashift
+Issued when the ashift alignment requirement has increased.
+.It Sy vdev.remove
+Issued when a vdev is detached from a mirror (or a spare detached from a
+vdev where it have been used to replace a failed drive - only works if
+the original drive have been re-added).
+.It Sy vdev.clear
+Issued when clearing device errors in a pool.
+Such as running
+.Nm zpool Cm clear
+on a device in the pool.
+.It Sy vdev.check
+Issued when a check to see if a given vdev could be opened is started.
+.It Sy vdev.spare
+Issued when a spare have kicked in to replace a failed device.
+.It Sy vdev.autoexpand
+Issued when a vdev can be automatically expanded.
+.It Sy io_failure
+Issued when there is an I/O failure in a vdev in the pool.
+.It Sy probe_failure
+Issued when a probe fails on a vdev.
+This would occur if a vdev
+have been kicked from the system outside of ZFS (such as the kernel
+have removed the device).
+.It Sy log_replay
+Issued when the intent log cannot be replayed.
+The can occur in the case of a missing or damaged log device.
+.It Sy resilver.start
+Issued when a resilver is started.
+.It Sy resilver.finish
+Issued when the running resilver have finished.
+.It Sy scrub.start
+Issued when a scrub is started on a pool.
+.It Sy scrub.finish
+Issued when a pool has finished scrubbing.
+.It Sy scrub.abort
+Issued when a scrub is aborted on a pool.
+.It Sy scrub.resume
+Issued when a scrub is resumed on a pool.
+.It Sy scrub.paused
+Issued when a scrub is paused on a pool.
+.It Sy bootfs.vdev.attach
+.El
+.
+.Sh PAYLOADS
+This is the payload (data, information) that accompanies an
+event.
+.Pp
+For
+.Xr zed 8 ,
+these are set to uppercase and prefixed with
+.Sy ZEVENT_ .
+.Pp
+.Bl -tag -compact -width "vdev_cksum_errors"
+.It Sy pool
+Pool name.
+.It Sy pool_failmode
+Failmode -
+.Sy wait ,
+.Sy continue ,
+or
+.Sy panic .
+See the
+.Sy failmode
+property in
+.Xr zpoolprops 7
+for more information.
+.It Sy pool_guid
+The GUID of the pool.
+.It Sy pool_context
+The load state for the pool (0=none, 1=open, 2=import, 3=tryimport, 4=recover
+5=error).
+.It Sy vdev_guid
+The GUID of the vdev in question (the vdev failing or operated upon with
+.Nm zpool Cm clear ,
+etc.).
+.It Sy vdev_type
+Type of vdev -
+.Sy disk ,
+.Sy file ,
+.Sy mirror ,
+etc.
+See the
+.Sy Virtual Devices
+section of
+.Xr zpoolconcepts 7
+for more information on possible values.
+.It Sy vdev_path
+Full path of the vdev, including any
+.Em -partX .
+.It Sy vdev_devid
+ID of vdev (if any).
+.It Sy vdev_fru
+Physical FRU location.
+.It Sy vdev_state
+State of vdev (0=uninitialized, 1=closed, 2=offline, 3=removed, 4=failed to open, 5=faulted, 6=degraded, 7=healthy).
+.It Sy vdev_ashift
+The ashift value of the vdev.
+.It Sy vdev_complete_ts
+The time the last I/O request completed for the specified vdev.
+.It Sy vdev_delta_ts
+The time since the last I/O request completed for the specified vdev.
+.It Sy vdev_spare_paths
+List of spares, including full path and any
+.Em -partX .
+.It Sy vdev_spare_guids
+GUID(s) of spares.
+.It Sy vdev_read_errors
+How many read errors that have been detected on the vdev.
+.It Sy vdev_write_errors
+How many write errors that have been detected on the vdev.
+.It Sy vdev_cksum_errors
+How many checksum errors that have been detected on the vdev.
+.It Sy parent_guid
+GUID of the vdev parent.
+.It Sy parent_type
+Type of parent.
+See
+.Sy vdev_type .
+.It Sy parent_path
+Path of the vdev parent (if any).
+.It Sy parent_devid
+ID of the vdev parent (if any).
+.It Sy zio_objset
+The object set number for a given I/O request.
+.It Sy zio_object
+The object number for a given I/O request.
+.It Sy zio_level
+The indirect level for the block.
+Level 0 is the lowest level and includes data blocks.
+Values > 0 indicate metadata blocks at the appropriate level.
+.It Sy zio_blkid
+The block ID for a given I/O request.
+.It Sy zio_err
+The error number for a failure when handling a given I/O request,
+compatible with
+.Xr errno 3
+with the value of
+.Sy EBADE
+used to indicate a ZFS checksum error.
+.It Sy zio_offset
+The offset in bytes of where to write the I/O request for the specified vdev.
+.It Sy zio_size
+The size in bytes of the I/O request.
+.It Sy zio_flags
+The current flags describing how the I/O request should be handled.
+See the
+.Sy I/O FLAGS
+section for the full list of I/O flags.
+.It Sy zio_stage
+The current stage of the I/O in the pipeline.
+See the
+.Sy I/O STAGES
+section for a full list of all the I/O stages.
+.It Sy zio_pipeline
+The valid pipeline stages for the I/O.
+See the
+.Sy I/O STAGES
+section for a full list of all the I/O stages.
+.It Sy zio_delay
+The time elapsed (in nanoseconds) waiting for the block layer to complete the
+I/O request.
+Unlike
+.Sy zio_delta ,
+this does not include any vdev queuing time and is
+therefore solely a measure of the block layer performance.
+.It Sy zio_timestamp
+The time when a given I/O request was submitted.
+.It Sy zio_delta
+The time required to service a given I/O request.
+.It Sy prev_state
+The previous state of the vdev.
+.It Sy cksum_expected
+The expected checksum value for the block.
+.It Sy cksum_actual
+The actual checksum value for an errant block.
+.It Sy cksum_algorithm
+Checksum algorithm used.
+See
+.Xr zfsprops 7
+for more information on the available checksum algorithms.
+.It Sy cksum_byteswap
+Whether or not the data is byteswapped.
+.It Sy bad_ranges
+.No [\& Ns Ar start , end )
+pairs of corruption offsets.
+Offsets are always aligned on a 64-bit boundary,
+and can include some gaps of non-corruption.
+(See
+.Sy bad_ranges_min_gap )
+.It Sy bad_ranges_min_gap
+In order to bound the size of the
+.Sy bad_ranges
+array, gaps of non-corruption
+less than or equal to
+.Sy bad_ranges_min_gap
+bytes have been merged with
+adjacent corruption.
+Always at least 8 bytes, since corruption is detected on a 64-bit word basis.
+.It Sy bad_range_sets
+This array has one element per range in
+.Sy bad_ranges .
+Each element contains
+the count of bits in that range which were clear in the good data and set
+in the bad data.
+.It Sy bad_range_clears
+This array has one element per range in
+.Sy bad_ranges .
+Each element contains
+the count of bits for that range which were set in the good data and clear in
+the bad data.
+.It Sy bad_set_bits
+If this field exists, it is an array of
+.Pq Ar bad data No & ~( Ns Ar good data ) ;
+that is, the bits set in the bad data which are cleared in the good data.
+Each element corresponds a byte whose offset is in a range in
+.Sy bad_ranges ,
+and the array is ordered by offset.
+Thus, the first element is the first byte in the first
+.Sy bad_ranges
+range, and the last element is the last byte in the last
+.Sy bad_ranges
+range.
+.It Sy bad_cleared_bits
+Like
+.Sy bad_set_bits ,
+but contains
+.Pq Ar good data No & ~( Ns Ar bad data ) ;
+that is, the bits set in the good data which are cleared in the bad data.
+.It Sy bad_set_histogram
+If this field exists, it is an array of counters.
+Each entry counts bits set in a particular bit of a big-endian uint64 type.
+The first entry counts bits
+set in the high-order bit of the first byte, the 9th byte, etc, and the last
+entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
+This information is useful for observing a stuck bit in a parallel data path,
+such as IDE or parallel SCSI.
+.It Sy bad_cleared_histogram
+If this field exists, it is an array of counters.
+Each entry counts bit clears in a particular bit of a big-endian uint64 type.
+The first entry counts bits
+clears of the high-order bit of the first byte, the 9th byte, etc, and the
+last entry counts clears of the low-order bit of the 8th byte, the 16th byte, etc.
+This information is useful for observing a stuck bit in a parallel data
+path, such as IDE or parallel SCSI.
+.El
+.
+.Sh I/O STAGES
+The ZFS I/O pipeline is comprised of various stages which are defined below.
+The individual stages are used to construct these basic I/O
+operations: Read, Write, Free, Claim, and Ioctl.
+These stages may be
+set on an event to describe the life cycle of a given I/O request.
+.Pp
+.TS
+tab(:);
+l l l .
+Stage:Bit Mask:Operations
+_:_:_
+ZIO_STAGE_OPEN:0x00000001:RWFCI
+
+ZIO_STAGE_READ_BP_INIT:0x00000002:R----
+ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W---
+ZIO_STAGE_FREE_BP_INIT:0x00000008:--F--
+ZIO_STAGE_ISSUE_ASYNC:0x00000010:RWF--
+ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W---
+
+ZIO_STAGE_ENCRYPT:0x00000040:-W---
+ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W---
+
+ZIO_STAGE_NOP_WRITE:0x00000100:-W---
+
+ZIO_STAGE_DDT_READ_START:0x00000200:R----
+ZIO_STAGE_DDT_READ_DONE:0x00000400:R----
+ZIO_STAGE_DDT_WRITE:0x00000800:-W---
+ZIO_STAGE_DDT_FREE:0x00001000:--F--
+
+ZIO_STAGE_GANG_ASSEMBLE:0x00002000:RWFC-
+ZIO_STAGE_GANG_ISSUE:0x00004000:RWFC-
+
+ZIO_STAGE_DVA_THROTTLE:0x00008000:-W---
+ZIO_STAGE_DVA_ALLOCATE:0x00010000:-W---
+ZIO_STAGE_DVA_FREE:0x00020000:--F--
+ZIO_STAGE_DVA_CLAIM:0x00040000:---C-
+
+ZIO_STAGE_READY:0x00080000:RWFCI
+
+ZIO_STAGE_VDEV_IO_START:0x00100000:RW--I
+ZIO_STAGE_VDEV_IO_DONE:0x00200000:RW--I
+ZIO_STAGE_VDEV_IO_ASSESS:0x00400000:RW--I
+
+ZIO_STAGE_CHECKSUM_VERIFY:0x00800000:R----
+
+ZIO_STAGE_DONE:0x01000000:RWFCI
+.TE
+.
+.Sh I/O FLAGS
+Every I/O request in the pipeline contains a set of flags which describe its
+function and are used to govern its behavior.
+These flags will be set in an event as a
+.Sy zio_flags
+payload entry.
+.Pp
+.TS
+tab(:);
+l l .
+Flag:Bit Mask
+_:_
+ZIO_FLAG_DONT_AGGREGATE:0x00000001
+ZIO_FLAG_IO_REPAIR:0x00000002
+ZIO_FLAG_SELF_HEAL:0x00000004
+ZIO_FLAG_RESILVER:0x00000008
+ZIO_FLAG_SCRUB:0x00000010
+ZIO_FLAG_SCAN_THREAD:0x00000020
+ZIO_FLAG_PHYSICAL:0x00000040
+
+ZIO_FLAG_CANFAIL:0x00000080
+ZIO_FLAG_SPECULATIVE:0x00000100
+ZIO_FLAG_CONFIG_WRITER:0x00000200
+ZIO_FLAG_DONT_RETRY:0x00000400
+ZIO_FLAG_DONT_CACHE:0x00000800
+ZIO_FLAG_NODATA:0x00001000
+ZIO_FLAG_INDUCE_DAMAGE:0x00002000
+
+ZIO_FLAG_IO_ALLOCATING:0x00004000
+ZIO_FLAG_IO_RETRY:0x00008000
+ZIO_FLAG_PROBE:0x00010000
+ZIO_FLAG_TRYHARD:0x00020000
+ZIO_FLAG_OPTIONAL:0x00040000
+
+ZIO_FLAG_DONT_QUEUE:0x00080000
+ZIO_FLAG_DONT_PROPAGATE:0x00100000
+ZIO_FLAG_IO_BYPASS:0x00200000
+ZIO_FLAG_IO_REWRITE:0x00400000
+ZIO_FLAG_RAW_COMPRESS:0x00800000
+ZIO_FLAG_RAW_ENCRYPT:0x01000000
+
+ZIO_FLAG_GANG_CHILD:0x02000000
+ZIO_FLAG_DDT_CHILD:0x04000000
+ZIO_FLAG_GODFATHER:0x08000000
+ZIO_FLAG_NOPWRITE:0x10000000
+ZIO_FLAG_REEXECUTED:0x20000000
+ZIO_FLAG_DELEGATED:0x40000000
+ZIO_FLAG_FASTWRITE:0x80000000
+.TE
+.
+.Sh SEE ALSO
+.Xr zfs 4 ,
+.Xr zed 8 ,
+.Xr zpool-wait 8

diff --git a/zfs/man/man8/zpool-export.8 b/zfs/man/man8/zpool-export.8
new file mode 100644
index 0000000..a15291a
--- /dev/null
+++ b/zfs/man/man8/zpool-export.8

@@ -0,0 +1,72 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd February 16, 2020
+.Dt ZPOOL-EXPORT 8
+.Os
+.
+.Sh NAME
+.Nm zpool-export
+.Nd export ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm export
+.Op Fl f
+.Fl a Ns | Ns Ar pool Ns …
+.
+.Sh DESCRIPTION
+Exports the given pools from the system.
+All devices are marked as exported, but are still considered in use by other
+subsystems.
+The devices can be moved between systems
+.Pq even those of different endianness
+and imported as long as a sufficient number of devices are present.
+.Pp
+Before exporting the pool, all datasets within the pool are unmounted.
+A pool can not be exported if it has a shared spare that is currently being
+used.
+.Pp
+For pools to be portable, you must give the
+.Nm zpool
+command whole disks, not just partitions, so that ZFS can label the disks with
+portable EFI labels.
+Otherwise, disk drivers on platforms of different endianness will not recognize
+the disks.
+.Bl -tag -width Ds
+.It Fl a
+Exports all pools imported on the system.
+.It Fl f
+Forcefully unmount all datasets, and allow export of pools with active shared spares.
+.Pp
+This command will forcefully export the pool even if it has a shared spare that
+is currently being used.
+This may lead to potential data corruption.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-import 8

diff --git a/zfs/man/man8/zpool-get.8 b/zfs/man/man8/zpool-get.8
new file mode 100644
index 0000000..55904f1
--- /dev/null
+++ b/zfs/man/man8/zpool-get.8

@@ -0,0 +1,108 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-GET 8
+.Os
+.
+.Sh NAME
+.Nm zpool-get
+.Nd retrieve properties of ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm get
+.Op Fl Hp
+.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns …
+.Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns …
+.Oo Ar pool Oc Ns …
+.Nm zpool
+.Cm set
+.Ar property Ns = Ns Ar value
+.Ar pool
+.
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Xo
+.Nm zpool
+.Cm get
+.Op Fl Hp
+.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns …
+.Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns …
+.Oo Ar pool Oc Ns …
+.Xc
+Retrieves the given list of properties
+.Po
+or all properties if
+.Sy all
+is used
+.Pc
+for the specified storage pool(s).
+These properties are displayed with the following fields:
+.Bl -tag -compact -offset Ds -width "property"
+.It Sy name
+Name of storage pool.
+.It Sy property
+Property name.
+.It Sy value
+Property value.
+.It Sy source
+Property source, either
+.Sy default No or Sy local .
+.El
+.Pp
+See the
+.Xr zpoolprops 7
+manual page for more information on the available pool properties.
+.Bl -tag -compact -offset Ds -width "-o field"
+.It Fl H
+Scripted mode.
+Do not display headers, and separate fields by a single tab instead of arbitrary
+space.
+.It Fl o Ar field
+A comma-separated list of columns to display, defaults to
+.Sy name , Ns Sy property , Ns Sy value , Ns Sy source .
+.It Fl p
+Display numbers in parsable (exact) values.
+.El
+.It Xo
+.Nm zpool
+.Cm set
+.Ar property Ns = Ns Ar value
+.Ar pool
+.Xc
+Sets the given property on the specified pool.
+See the
+.Xr zpoolprops 7
+manual page for more information on what properties can be set and acceptable
+values.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-features 7 ,
+.Xr zpoolprops 7 ,
+.Xr zpool-list 8

diff --git a/zfs/man/man8/zpool-history.8 b/zfs/man/man8/zpool-history.8
new file mode 100644
index 0000000..2a2d500
--- /dev/null
+++ b/zfs/man/man8/zpool-history.8

@@ -0,0 +1,58 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-HISTORY 8
+.Os
+.
+.Sh NAME
+.Nm zpool-history
+.Nd inspect command history of ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm history
+.Op Fl il
+.Oo Ar pool Oc Ns …
+.
+.Sh DESCRIPTION
+Displays the command history of the specified pool(s) or all pools if no pool is
+specified.
+.Bl -tag -width Ds
+.It Fl i
+Displays internally logged ZFS events in addition to user initiated events.
+.It Fl l
+Displays log records in long format, which in addition to standard format
+includes, the user name, the hostname, and the zone in which the operation was
+performed.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-checkpoint 8 ,
+.Xr zpool-events 8 ,
+.Xr zpool-status 8 ,
+.Xr zpool-wait 8

diff --git a/zfs/man/man8/zpool-import.8 b/zfs/man/man8/zpool-import.8
new file mode 100644
index 0000000..5462e4e
--- /dev/null
+++ b/zfs/man/man8/zpool-import.8

@@ -0,0 +1,411 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-IMPORT 8
+.Os
+.
+.Sh NAME
+.Nm zpool-import
+.Nd import ZFS storage pools or list available pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm import
+.Op Fl D
+.Oo Fl d Ar dir Ns | Ns Ar device Oc Ns …
+.Nm zpool
+.Cm import
+.Fl a
+.Op Fl DflmN
+.Op Fl F Op Fl nTX
+.Op Fl -rewind-to-checkpoint
+.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device
+.Op Fl o Ar mntopts
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Op Fl R Ar root
+.Nm zpool
+.Cm import
+.Op Fl Dflmt
+.Op Fl F Op Fl nTX
+.Op Fl -rewind-to-checkpoint
+.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device
+.Op Fl o Ar mntopts
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Op Fl R Ar root
+.Op Fl s
+.Ar pool Ns | Ns Ar id
+.Op Ar newpool
+.
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Xo
+.Nm zpool
+.Cm import
+.Op Fl D
+.Oo Fl d Ar dir Ns | Ns Ar device Oc Ns …
+.Xc
+Lists pools available to import.
+If the
+.Fl d
+or
+.Fl c
+options are not specified, this command searches for devices using libblkid
+on Linux and geom on
+.Fx .
+The
+.Fl d
+option can be specified multiple times, and all directories are searched.
+If the device appears to be part of an exported pool, this command displays a
+summary of the pool with the name of the pool, a numeric identifier, as well as
+the vdev layout and current health of the device for each device or file.
+Destroyed pools, pools that were previously destroyed with the
+.Nm zpool Cm destroy
+command, are not listed unless the
+.Fl D
+option is specified.
+.Pp
+The numeric identifier is unique, and can be used instead of the pool name when
+multiple exported pools of the same name are available.
+.Bl -tag -width Ds
+.It Fl c Ar cachefile
+Reads configuration from the given
+.Ar cachefile
+that was created with the
+.Sy cachefile
+pool property.
+This
+.Ar cachefile
+is used instead of searching for devices.
+.It Fl d Ar dir Ns | Ns Ar device
+Uses
+.Ar device
+or searches for devices or files in
+.Ar dir .
+The
+.Fl d
+option can be specified multiple times.
+.It Fl D
+Lists destroyed pools only.
+.El
+.It Xo
+.Nm zpool
+.Cm import
+.Fl a
+.Op Fl DflmN
+.Op Fl F Op Fl nTX
+.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device
+.Op Fl o Ar mntopts
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Op Fl R Ar root
+.Op Fl s
+.Xc
+Imports all pools found in the search directories.
+Identical to the previous command, except that all pools with a sufficient
+number of devices available are imported.
+Destroyed pools, pools that were previously destroyed with the
+.Nm zpool Cm destroy
+command, will not be imported unless the
+.Fl D
+option is specified.
+.Bl -tag -width Ds
+.It Fl a
+Searches for and imports all pools found.
+.It Fl c Ar cachefile
+Reads configuration from the given
+.Ar cachefile
+that was created with the
+.Sy cachefile
+pool property.
+This
+.Ar cachefile
+is used instead of searching for devices.
+.It Fl d Ar dir Ns | Ns Ar device
+Uses
+.Ar device
+or searches for devices or files in
+.Ar dir .
+The
+.Fl d
+option can be specified multiple times.
+This option is incompatible with the
+.Fl c
+option.
+.It Fl D
+Imports destroyed pools only.
+The
+.Fl f
+option is also required.
+.It Fl f
+Forces import, even if the pool appears to be potentially active.
+.It Fl F
+Recovery mode for a non-importable pool.
+Attempt to return the pool to an importable state by discarding the last few
+transactions.
+Not all damaged pools can be recovered by using this option.
+If successful, the data from the discarded transactions is irretrievably lost.
+This option is ignored if the pool is importable or already imported.
+.It Fl l
+Indicates that this command will request encryption keys for all encrypted
+datasets it attempts to mount as it is bringing the pool online.
+Note that if any datasets have a
+.Sy keylocation
+of
+.Sy prompt
+this command will block waiting for the keys to be entered.
+Without this flag
+encrypted datasets will be left unavailable until the keys are loaded.
+.It Fl m
+Allows a pool to import when there is a missing log device.
+Recent transactions can be lost because the log device will be discarded.
+.It Fl n
+Used with the
+.Fl F
+recovery option.
+Determines whether a non-importable pool can be made importable again, but does
+not actually perform the pool recovery.
+For more details about pool recovery mode, see the
+.Fl F
+option, above.
+.It Fl N
+Import the pool without mounting any file systems.
+.It Fl o Ar mntopts
+Comma-separated list of mount options to use when mounting datasets within the
+pool.
+See
+.Xr zfs 8
+for a description of dataset properties and mount options.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the specified property on the imported pool.
+See the
+.Xr zpoolprops 7
+manual page for more information on the available pool properties.
+.It Fl R Ar root
+Sets the
+.Sy cachefile
+property to
+.Sy none
+and the
+.Sy altroot
+property to
+.Ar root .
+.It Fl -rewind-to-checkpoint
+Rewinds pool to the checkpointed state.
+Once the pool is imported with this flag there is no way to undo the rewind.
+All changes and data that were written after the checkpoint are lost!
+The only exception is when the
+.Sy readonly
+mounting option is enabled.
+In this case, the checkpointed state of the pool is opened and an
+administrator can see how the pool would look like if they were
+to fully rewind.
+.It Fl s
+Scan using the default search path, the libblkid cache will not be
+consulted.
+A custom search path may be specified by setting the
+.Sy ZPOOL_IMPORT_PATH
+environment variable.
+.It Fl X
+Used with the
+.Fl F
+recovery option.
+Determines whether extreme measures to find a valid txg should take place.
+This allows the pool to
+be rolled back to a txg which is no longer guaranteed to be consistent.
+Pools imported at an inconsistent txg may contain uncorrectable checksum errors.
+For more details about pool recovery mode, see the
+.Fl F
+option, above.
+WARNING: This option can be extremely hazardous to the
+health of your pool and should only be used as a last resort.
+.It Fl T
+Specify the txg to use for rollback.
+Implies
+.Fl FX .
+For more details
+about pool recovery mode, see the
+.Fl X
+option, above.
+WARNING: This option can be extremely hazardous to the
+health of your pool and should only be used as a last resort.
+.El
+.It Xo
+.Nm zpool
+.Cm import
+.Op Fl Dflmt
+.Op Fl F Op Fl nTX
+.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device
+.Op Fl o Ar mntopts
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Op Fl R Ar root
+.Op Fl s
+.Ar pool Ns | Ns Ar id
+.Op Ar newpool
+.Xc
+Imports a specific pool.
+A pool can be identified by its name or the numeric identifier.
+If
+.Ar newpool
+is specified, the pool is imported using the name
+.Ar newpool .
+Otherwise, it is imported with the same name as its exported name.
+.Pp
+If a device is removed from a system without running
+.Nm zpool Cm export
+first, the device appears as potentially active.
+It cannot be determined if this was a failed export, or whether the device is
+really in use from another host.
+To import a pool in this state, the
+.Fl f
+option is required.
+.Bl -tag -width Ds
+.It Fl c Ar cachefile
+Reads configuration from the given
+.Ar cachefile
+that was created with the
+.Sy cachefile
+pool property.
+This
+.Ar cachefile
+is used instead of searching for devices.
+.It Fl d Ar dir Ns | Ns Ar device
+Uses
+.Ar device
+or searches for devices or files in
+.Ar dir .
+The
+.Fl d
+option can be specified multiple times.
+This option is incompatible with the
+.Fl c
+option.
+.It Fl D
+Imports destroyed pool.
+The
+.Fl f
+option is also required.
+.It Fl f
+Forces import, even if the pool appears to be potentially active.
+.It Fl F
+Recovery mode for a non-importable pool.
+Attempt to return the pool to an importable state by discarding the last few
+transactions.
+Not all damaged pools can be recovered by using this option.
+If successful, the data from the discarded transactions is irretrievably lost.
+This option is ignored if the pool is importable or already imported.
+.It Fl l
+Indicates that this command will request encryption keys for all encrypted
+datasets it attempts to mount as it is bringing the pool online.
+Note that if any datasets have a
+.Sy keylocation
+of
+.Sy prompt
+this command will block waiting for the keys to be entered.
+Without this flag
+encrypted datasets will be left unavailable until the keys are loaded.
+.It Fl m
+Allows a pool to import when there is a missing log device.
+Recent transactions can be lost because the log device will be discarded.
+.It Fl n
+Used with the
+.Fl F
+recovery option.
+Determines whether a non-importable pool can be made importable again, but does
+not actually perform the pool recovery.
+For more details about pool recovery mode, see the
+.Fl F
+option, above.
+.It Fl o Ar mntopts
+Comma-separated list of mount options to use when mounting datasets within the
+pool.
+See
+.Xr zfs 8
+for a description of dataset properties and mount options.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the specified property on the imported pool.
+See the
+.Xr zpoolprops 7
+manual page for more information on the available pool properties.
+.It Fl R Ar root
+Sets the
+.Sy cachefile
+property to
+.Sy none
+and the
+.Sy altroot
+property to
+.Ar root .
+.It Fl s
+Scan using the default search path, the libblkid cache will not be
+consulted.
+A custom search path may be specified by setting the
+.Sy ZPOOL_IMPORT_PATH
+environment variable.
+.It Fl X
+Used with the
+.Fl F
+recovery option.
+Determines whether extreme measures to find a valid txg should take place.
+This allows the pool to
+be rolled back to a txg which is no longer guaranteed to be consistent.
+Pools imported at an inconsistent txg may contain uncorrectable
+checksum errors.
+For more details about pool recovery mode, see the
+.Fl F
+option, above.
+WARNING: This option can be extremely hazardous to the
+health of your pool and should only be used as a last resort.
+.It Fl T
+Specify the txg to use for rollback.
+Implies
+.Fl FX .
+For more details
+about pool recovery mode, see the
+.Fl X
+option, above.
+.Em WARNING :
+This option can be extremely hazardous to the
+health of your pool and should only be used as a last resort.
+.It Fl t
+Used with
+.Ar newpool .
+Specifies that
+.Ar newpool
+is temporary.
+Temporary pool names last until export.
+Ensures that the original pool name will be used
+in all label updates and therefore is retained upon export.
+Will also set
+.Fl o Sy cachefile Ns = Ns Sy none
+when not explicitly specified.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-export 8 ,
+.Xr zpool-list 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-initialize.8 b/zfs/man/man8/zpool-initialize.8
new file mode 100644
index 0000000..ada00bb
--- /dev/null
+++ b/zfs/man/man8/zpool-initialize.8

@@ -0,0 +1,81 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-INITIALIZE 8
+.Os
+.
+.Sh NAME
+.Nm zpool-initialize
+.Nd write to unallocated regions of ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm initialize
+.Op Fl c Ns | Ns Fl s | Ns Fl u
+.Op Fl w
+.Ar pool
+.Oo Ar device Oc Ns …
+.
+.Sh DESCRIPTION
+Begins initializing by writing to all unallocated regions on the specified
+devices, or all eligible devices in the pool if no individual devices are
+specified.
+Only leaf data or log devices may be initialized.
+.Bl -tag -width Ds
+.It Fl c , -cancel
+Cancel initializing on the specified devices, or all eligible devices if none
+are specified.
+If one or more target devices are invalid or are not currently being
+initialized, the command will fail and no cancellation will occur on any device.
+.It Fl s , -suspend
+Suspend initializing on the specified devices, or all eligible devices if none
+are specified.
+If one or more target devices are invalid or are not currently being
+initialized, the command will fail and no suspension will occur on any device.
+Initializing can then be resumed by running
+.Nm zpool Cm initialize
+with no flags on the relevant target devices.
+.It Fl u , -uninit
+Clears the initialization state on the specified devices, or all eligible
+devices if none are specified.
+If the devices are being actively initialized the command will fail.
+After being cleared
+.Nm zpool Cm initialize
+with no flags can be used to re-initialize all unallocoated regions on
+the relevant target devices.
+.It Fl w , -wait
+Wait until the devices have finished initializing before returning.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-add 8 ,
+.Xr zpool-attach 8 ,
+.Xr zpool-create 8 ,
+.Xr zpool-online 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-trim 8

diff --git a/zfs/man/man8/zpool-iostat.8 b/zfs/man/man8/zpool-iostat.8
new file mode 100644
index 0000000..0e64aa7
--- /dev/null
+++ b/zfs/man/man8/zpool-iostat.8

@@ -0,0 +1,257 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-IOSTAT 8
+.Os
+.
+.Sh NAME
+.Nm zpool-iostat
+.Nd display logical I/O statistics for ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm iostat
+.Op Oo Oo Fl c Ar SCRIPT Oc Oo Fl lq Oc Oc Ns | Ns Fl rw
+.Op Fl T Sy u Ns | Ns Sy d
+.Op Fl ghHLnpPvy
+.Oo Ar pool Ns … Ns | Ns Oo Ar pool vdev Ns … Oc Ns | Ns Ar vdev Ns … Oc
+.Op Ar interval Op Ar count
+.
+.Sh DESCRIPTION
+Displays logical I/O statistics for the given pools/vdevs.
+Physical I/O statistics may be observed via
+.Xr iostat 1 .
+If writes are located nearby, they may be merged into a single
+larger operation.
+Additional I/O may be generated depending on the level of vdev redundancy.
+To filter output, you may pass in a list of pools, a pool and list of vdevs
+in that pool, or a list of any vdevs from any pool.
+If no items are specified, statistics for every pool in the system are shown.
+When given an
+.Ar interval ,
+the statistics are printed every
+.Ar interval
+seconds until killed.
+If
+.Fl n
+flag is specified the headers are displayed only once, otherwise they are
+displayed periodically.
+If
+.Ar count
+is specified, the command exits after
+.Ar count
+reports are printed.
+The first report printed is always the statistics since boot regardless of whether
+.Ar interval
+and
+.Ar count
+are passed.
+However, this behavior can be suppressed with the
+.Fl y
+flag.
+Also note that the units of
+.Sy K ,
+.Sy M ,
+.Sy G Ns …
+that are printed in the report are in base 1024.
+To get the raw values, use the
+.Fl p
+flag.
+.Bl -tag -width Ds
+.It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns …
+Run a script (or scripts) on each vdev and include the output as a new column
+in the
+.Nm zpool Cm iostat
+output.
+Users can run any script found in their
+.Pa ~/.zpool.d
+directory or from the system
+.Pa /etc/zfs/zpool.d
+directory.
+Script names containing the slash
+.Pq Sy /
+character are not allowed.
+The default search path can be overridden by setting the
+.Sy ZPOOL_SCRIPTS_PATH
+environment variable.
+A privileged user can only run
+.Fl c
+if they have the
+.Sy ZPOOL_SCRIPTS_AS_ROOT
+environment variable set.
+If a script requires the use of a privileged command, like
+.Xr smartctl 8 ,
+then it's recommended you allow the user access to it in
+.Pa /etc/sudoers
+or add the user to the
+.Pa /etc/sudoers.d/zfs
+file.
+.Pp
+If
+.Fl c
+is passed without a script name, it prints a list of all scripts.
+.Fl c
+also sets verbose mode
+.No \&( Ns Fl v Ns No \&).
+.Pp
+Script output should be in the form of "name=value".
+The column name is set to "name" and the value is set to "value".
+Multiple lines can be used to output multiple columns.
+The first line of output not in the
+"name=value" format is displayed without a column title,
+and no more output after that is displayed.
+This can be useful for printing error messages.
+Blank or NULL values are printed as a '-' to make output AWKable.
+.Pp
+The following environment variables are set before running each script:
+.Bl -tag -compact -width "VDEV_ENC_SYSFS_PATH"
+.It Sy VDEV_PATH
+Full path to the vdev
+.It Sy VDEV_UPATH
+Underlying path to the vdev
+.Pq Pa /dev/sd* .
+For use with device mapper, multipath, or partitioned vdevs.
+.It Sy VDEV_ENC_SYSFS_PATH
+The sysfs path to the enclosure for the vdev (if any).
+.El
+.It Fl T Sy u Ns | Ns Sy d
+Display a time stamp.
+Specify
+.Sy u
+for a printed representation of the internal representation of time.
+See
+.Xr time 2 .
+Specify
+.Sy d
+for standard date format.
+See
+.Xr date 1 .
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl H
+Scripted mode.
+Do not display headers, and separate fields by a
+single tab instead of arbitrary space.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk/
+path used to open it.
+.It Fl n
+Print headers only once when passed
+.It Fl p
+Display numbers in parsable (exact) values.
+Time values are in nanoseconds.
+.It Fl P
+Display full paths for vdevs instead of only the last component of the path.
+This can be used in conjunction with the
+.Fl L
+flag.
+.It Fl r
+Print request size histograms for the leaf vdev's I/O.
+This includes histograms of individual I/O (ind) and aggregate I/O (agg).
+These stats can be useful for observing how well I/O aggregation is working.
+Note that TRIM I/O may exceed 16M, but will be counted as 16M.
+.It Fl v
+Verbose statistics Reports usage statistics for individual vdevs within the
+pool, in addition to the pool-wide statistics.
+.It Fl y
+Normally the first line of output reports the statistics since boot:
+suppress it.
+.It Fl w
+Display latency histograms:
+.Bl -tag -compact -width "asyncq_read/write"
+.It Sy total_wait
+Total I/O time (queuing + disk I/O time).
+.It Sy disk_wait
+Disk I/O time (time reading/writing the disk).
+.It Sy syncq_wait
+Amount of time I/O spent in synchronous priority queues.
+Does not include disk time.
+.It Sy asyncq_wait
+Amount of time I/O spent in asynchronous priority queues.
+Does not include disk time.
+.It Sy scrub
+Amount of time I/O spent in scrub queue.
+Does not include disk time.
+.El
+.It Fl l
+Include average latency statistics:
+.Bl -tag -compact -width "asyncq_read/write"
+.It Sy total_wait
+Average total I/O time (queuing + disk I/O time).
+.It Sy disk_wait
+Average disk I/O time (time reading/writing the disk).
+.It Sy syncq_wait
+Average amount of time I/O spent in synchronous priority queues.
+Does not include disk time.
+.It Sy asyncq_wait
+Average amount of time I/O spent in asynchronous priority queues.
+Does not include disk time.
+.It Sy scrub
+Average queuing time in scrub queue.
+Does not include disk time.
+.It Sy trim
+Average queuing time in trim queue.
+Does not include disk time.
+.El
+.It Fl q
+Include active queue statistics.
+Each priority queue has both pending
+.Sy ( pend )
+and active
+.Sy ( activ )
+I/O requests.
+Pending requests are waiting to be issued to the disk,
+and active requests have been issued to disk and are waiting for completion.
+These stats are broken out by priority queue:
+.Bl -tag -compact -width "asyncq_read/write"
+.It Sy syncq_read/write
+Current number of entries in synchronous priority
+queues.
+.It Sy asyncq_read/write
+Current number of entries in asynchronous priority queues.
+.It Sy scrubq_read
+Current number of entries in scrub queue.
+.It Sy trimq_write
+Current number of entries in trim queue.
+.El
+.Pp
+All queue statistics are instantaneous measurements of the number of
+entries in the queues.
+If you specify an interval,
+the measurements will be sampled from the end of the interval.
+.El
+.
+.Sh SEE ALSO
+.Xr iostat 1 ,
+.Xr smartctl 8 ,
+.Xr zpool-list 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-labelclear.8 b/zfs/man/man8/zpool-labelclear.8
new file mode 100644
index 0000000..c7edc91
--- /dev/null
+++ b/zfs/man/man8/zpool-labelclear.8

@@ -0,0 +1,60 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 31, 2021
+.Dt ZPOOL-LABELCLEAR 8
+.Os
+.
+.Sh NAME
+.Nm zpool-labelclear
+.Nd remove ZFS label information from device
+.Sh SYNOPSIS
+.Nm zpool
+.Cm labelclear
+.Op Fl f
+.Ar device
+.
+.Sh DESCRIPTION
+Removes ZFS label information from the specified
+.Ar device .
+If the
+.Ar device
+is a cache device, it also removes the L2ARC header
+(persistent L2ARC). The
+.Ar device
+must not be part of an active pool configuration.
+.Bl -tag -width Ds
+.It Fl f
+Treat exported or foreign devices as inactive.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-destroy 8 ,
+.Xr zpool-detach 8 ,
+.Xr zpool-remove 8 ,
+.Xr zpool-replace 8

diff --git a/zfs/man/man8/zpool-list.8 b/zfs/man/man8/zpool-list.8
new file mode 100644
index 0000000..dd4e13c
--- /dev/null
+++ b/zfs/man/man8/zpool-list.8

@@ -0,0 +1,112 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-LIST 8
+.Os
+.
+.Sh NAME
+.Nm zpool-list
+.Nd list information about ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm list
+.Op Fl HgLpPv
+.Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns …
+.Op Fl T Sy u Ns | Ns Sy d
+.Oo Ar pool Oc Ns …
+.Op Ar interval Op Ar count
+.
+.Sh DESCRIPTION
+Lists the given pools along with a health status and space usage.
+If no
+.Ar pool Ns s
+are specified, all pools in the system are listed.
+When given an
+.Ar interval ,
+the information is printed every
+.Ar interval
+seconds until killed.
+If
+.Ar count
+is specified, the command exits after
+.Ar count
+reports are printed.
+.Bl -tag -width Ds
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl H
+Scripted mode.
+Do not display headers, and separate fields by a single tab instead of arbitrary
+space.
+.It Fl o Ar property
+Comma-separated list of properties to display.
+See the
+.Xr zpoolprops 7
+manual page for a list of valid properties.
+The default list is
+.Sy name , size , allocated , free , checkpoint, expandsize , fragmentation ,
+.Sy capacity , dedupratio , health , altroot .
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk
+path used to open it.
+.It Fl p
+Display numbers in parsable
+.Pq exact
+values.
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
+.It Fl T Sy u Ns | Ns Sy d
+Display a time stamp.
+Specify
+.Sy u
+for a printed representation of the internal representation of time.
+See
+.Xr time 2 .
+Specify
+.Sy d
+for standard date format.
+See
+.Xr date 1 .
+.It Fl v
+Verbose statistics.
+Reports usage statistics for individual vdevs within the pool, in addition to
+the pool-wide statistics.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-import 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-offline.8 b/zfs/man/man8/zpool-offline.8
new file mode 100644
index 0000000..9b2cf59
--- /dev/null
+++ b/zfs/man/man8/zpool-offline.8

@@ -0,0 +1,94 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-OFFLINE 8
+.Os
+.
+.Sh NAME
+.Nm zpool-offline
+.Nd take physical devices offline in ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm offline
+.Op Fl ft
+.Ar pool
+.Ar device Ns …
+.Nm zpool
+.Cm online
+.Op Fl e
+.Ar pool
+.Ar device Ns …
+.
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Xo
+.Nm zpool
+.Cm offline
+.Op Fl ft
+.Ar pool
+.Ar device Ns …
+.Xc
+Takes the specified physical device offline.
+While the
+.Ar device
+is offline, no attempt is made to read or write to the device.
+This command is not applicable to spares.
+.Bl -tag -width Ds
+.It Fl f
+Force fault.
+Instead of offlining the disk, put it into a faulted state.
+The fault will persist across imports unless the
+.Fl t
+flag was specified.
+.It Fl t
+Temporary.
+Upon reboot, the specified physical device reverts to its previous state.
+.El
+.It Xo
+.Nm zpool
+.Cm online
+.Op Fl e
+.Ar pool
+.Ar device Ns …
+.Xc
+Brings the specified physical device online.
+This command is not applicable to spares.
+.Bl -tag -width Ds
+.It Fl e
+Expand the device to use all available space.
+If the device is part of a mirror or raidz then all devices must be expanded
+before the new space will become available to the pool.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-detach 8 ,
+.Xr zpool-remove 8 ,
+.Xr zpool-reopen 8 ,
+.Xr zpool-resilver 8

diff --git a/zfs/man/man8/zpool-online.8 b/zfs/man/man8/zpool-online.8
new file mode 120000
index 0000000..537e00e
--- /dev/null
+++ b/zfs/man/man8/zpool-online.8

@@ -0,0 +1 @@
+zpool-offline.8
\ No newline at end of file

diff --git a/zfs/man/man8/zpool-reguid.8 b/zfs/man/man8/zpool-reguid.8
new file mode 100644
index 0000000..7bb7c1c
--- /dev/null
+++ b/zfs/man/man8/zpool-reguid.8

@@ -0,0 +1,48 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 31, 2021
+.Dt ZPOOL-REGUID 8
+.Os
+.
+.Sh NAME
+.Nm zpool-reguid
+.Nd generate new unique identifier for ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm reguid
+.Ar pool
+.
+.Sh DESCRIPTION
+Generates a new unique identifier for the pool.
+You must ensure that all devices in this pool are online and healthy before
+performing this action.
+.
+.Sh SEE ALSO
+.Xr zpool-export 8 ,
+.Xr zpool-import 8

diff --git a/zfs/man/man8/zpool-remove.8 b/zfs/man/man8/zpool-remove.8
new file mode 100644
index 0000000..a14218e
--- /dev/null
+++ b/zfs/man/man8/zpool-remove.8

@@ -0,0 +1,111 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-REMOVE 8
+.Os
+.Sh NAME
+.Nm zpool-remove
+.Nd remove devices from ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm remove
+.Op Fl npw
+.Ar pool Ar device Ns …
+.Nm zpool
+.Cm remove
+.Fl s
+.Ar pool
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Xo
+.Nm zpool
+.Cm remove
+.Op Fl npw
+.Ar pool Ar device Ns …
+.Xc
+Removes the specified device from the pool.
+This command supports removing hot spare, cache, log, and both mirrored and
+non-redundant primary top-level vdevs, including dedup and special vdevs.
+.Pp
+Top-level vdevs can only be removed if the primary pool storage does not contain
+a top-level raidz vdev, all top-level vdevs have the same sector size, and the
+keys for all encrypted datasets are loaded.
+.Pp
+Removing a top-level vdev reduces the total amount of space in the storage pool.
+The specified device will be evacuated by copying all allocated space from it to
+the other devices in the pool.
+In this case, the
+.Nm zpool Cm remove
+command initiates the removal and returns, while the evacuation continues in
+the background.
+The removal progress can be monitored with
+.Nm zpool Cm status .
+If an IO error is encountered during the removal process it will be cancelled.
+The
+.Sy device_removal
+feature flag must be enabled to remove a top-level vdev, see
+.Xr zpool-features 7 .
+.Pp
+A mirrored top-level device (log or data) can be removed by specifying the top-level mirror for the
+same.
+Non-log devices or data devices that are part of a mirrored configuration can be removed using
+the
+.Nm zpool Cm detach
+command.
+.Bl -tag -width Ds
+.It Fl n
+Do not actually perform the removal
+.Pq Qq No-op .
+Instead, print the estimated amount of memory that will be used by the
+mapping table after the removal completes.
+This is nonzero only for top-level vdevs.
+.El
+.Bl -tag -width Ds
+.It Fl p
+Used in conjunction with the
+.Fl n
+flag, displays numbers as parsable (exact) values.
+.It Fl w
+Waits until the removal has completed before returning.
+.El
+.It Xo
+.Nm zpool
+.Cm remove
+.Fl s
+.Ar pool
+.Xc
+Stops and cancels an in-progress removal of a top-level vdev.
+.El
+.Sh SEE ALSO
+.Xr zpool-add 8 ,
+.Xr zpool-detach 8 ,
+.Xr zpool-labelclear 8 ,
+.Xr zpool-offline 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-split 8

diff --git a/zfs/man/man8/zpool-reopen.8 b/zfs/man/man8/zpool-reopen.8
new file mode 100644
index 0000000..f1f8606
--- /dev/null
+++ b/zfs/man/man8/zpool-reopen.8

@@ -0,0 +1,52 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd June 2, 2021
+.Dt ZPOOL-REOPEN 8
+.Os
+.
+.Sh NAME
+.Nm zpool-reopen
+.Nd reopen vdevs associated with ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm reopen
+.Op Fl n
+.Oo Ar pool Oc Ns …
+.
+.Sh DESCRIPTION
+Reopen all vdevs associated with the specified pools,
+or all pools if none specified.
+.
+.Sh OPTIONS
+.Bl -tag -width "-n"
+.It Fl n
+Do not restart an in-progress scrub operation.
+This is not recommended and can
+result in partially resilvered devices unless a second scrub is performed.
+.El

diff --git a/zfs/man/man8/zpool-replace.8 b/zfs/man/man8/zpool-replace.8
new file mode 100644
index 0000000..cc61fa3
--- /dev/null
+++ b/zfs/man/man8/zpool-replace.8

@@ -0,0 +1,99 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 29, 2021
+.Dt ZPOOL-REPLACE 8
+.Os
+.
+.Sh NAME
+.Nm zpool-replace
+.Nd replace one device with another in ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm replace
+.Op Fl fsw
+.Oo Fl o Ar property Ns = Ns Ar value Oc
+.Ar pool Ar device Op Ar new-device
+.
+.Sh DESCRIPTION
+Replaces
+.Ar device
+with
+.Ar new-device .
+This is equivalent to attaching
+.Ar new-device ,
+waiting for it to resilver, and then detaching
+.Ar device .
+Any in progress scrub will be cancelled.
+.Pp
+The size of
+.Ar new-device
+must be greater than or equal to the minimum size of all the devices in a mirror
+or raidz configuration.
+.Pp
+.Ar new-device
+is required if the pool is not redundant.
+If
+.Ar new-device
+is not specified, it defaults to
+.Ar device .
+This form of replacement is useful after an existing disk has failed and has
+been physically replaced.
+In this case, the new disk may have the same
+.Pa /dev
+path as the old device, even though it is actually a different disk.
+ZFS recognizes this.
+.Bl -tag -width Ds
+.It Fl f
+Forces use of
+.Ar new-device ,
+even if it appears to be in use.
+Not all devices can be overridden in this manner.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the given pool properties.
+See the
+.Xr zpoolprops 7
+manual page for a list of valid properties that can be set.
+The only property supported at the moment is
+.Sy ashift .
+.It Fl s
+The
+.Ar new-device
+is reconstructed sequentially to restore redundancy as quickly as possible.
+Checksums are not verified during sequential reconstruction so a scrub is
+started when the resilver completes.
+Sequential reconstruction is not supported for raidz configurations.
+.It Fl w
+Waits until the replacement has completed before returning.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-detach 8 ,
+.Xr zpool-initialize 8 ,
+.Xr zpool-online 8 ,
+.Xr zpool-resilver 8

diff --git a/zfs/man/man8/zpool-resilver.8 b/zfs/man/man8/zpool-resilver.8
new file mode 100644
index 0000000..1ef316a
--- /dev/null
+++ b/zfs/man/man8/zpool-resilver.8

@@ -0,0 +1,56 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-RESILVER 8
+.Os
+.
+.Sh NAME
+.Nm zpool-resilver
+.Nd resilver devices in ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm resilver
+.Ar pool Ns …
+.
+.Sh DESCRIPTION
+Starts a resilver of the specified pools.
+If an existing resilver is already running it will be restarted from the beginning.
+Any drives that were scheduled for a deferred
+resilver will be added to the new one.
+This requires the
+.Sy resilver_defer
+pool feature.
+.
+.Sh SEE ALSO
+.Xr zpool-iostat 8 ,
+.Xr zpool-online 8 ,
+.Xr zpool-reopen 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-scrub 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-scrub.8 b/zfs/man/man8/zpool-scrub.8
new file mode 100644
index 0000000..69ae825
--- /dev/null
+++ b/zfs/man/man8/zpool-scrub.8

@@ -0,0 +1,144 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018, 2021 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd July 25, 2021
+.Dt ZPOOL-SCRUB 8
+.Os
+.
+.Sh NAME
+.Nm zpool-scrub
+.Nd begin or resume scrub of ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm scrub
+.Op Fl s Ns | Ns Fl p
+.Op Fl w
+.Ar pool Ns …
+.
+.Sh DESCRIPTION
+Begins a scrub or resumes a paused scrub.
+The scrub examines all data in the specified pools to verify that it checksums
+correctly.
+For replicated
+.Pq mirror, raidz, or draid
+devices, ZFS automatically repairs any damage discovered during the scrub.
+The
+.Nm zpool Cm status
+command reports the progress of the scrub and summarizes the results of the
+scrub upon completion.
+.Pp
+Scrubbing and resilvering are very similar operations.
+The difference is that resilvering only examines data that ZFS knows to be out
+of date
+.Po
+for example, when attaching a new device to a mirror or replacing an existing
+device
+.Pc ,
+whereas scrubbing examines all data to discover silent errors due to hardware
+faults or disk failure.
+.Pp
+Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows
+one at a time.
+.Pp
+A scrub is split into two parts: metadata scanning and block scrubbing.
+The metadata scanning sorts blocks into large sequential ranges which can then
+be read much more efficiently from disk when issuing the scrub I/O.
+.Pp
+If a scrub is paused, the
+.Nm zpool Cm scrub
+resumes it.
+If a resilver is in progress, ZFS does not allow a scrub to be started until the
+resilver completes.
+.Pp
+Note that, due to changes in pool data on a live system, it is possible for
+scrubs to progress slightly beyond 100% completion.
+During this period, no completion time estimate will be provided.
+.
+.Sh OPTIONS
+.Bl -tag -width "-s"
+.It Fl s
+Stop scrubbing.
+.It Fl p
+Pause scrubbing.
+Scrub pause state and progress are periodically synced to disk.
+If the system is restarted or pool is exported during a paused scrub,
+even after import, scrub will remain paused until it is resumed.
+Once resumed the scrub will pick up from the place where it was last
+checkpointed to disk.
+To resume a paused scrub issue
+.Nm zpool Cm scrub
+again.
+.It Fl w
+Wait until scrub has completed before returning.
+.El
+.Sh EXAMPLES
+.Bl -tag -width "Exam"
+.It Sy Example 1 : Status of pool with ongoing scrub:
+Output:
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm status
+  ...
+  scan: scrub in progress since Sun Jul 25 16:07:49 2021
+        403M scanned at 100M/s, 68.4M issued at 10.0M/s, 405M total
+        0B repaired, 16.91% done, 00:00:04 to go
+  ...
+.Ed
+Where:
+.Bl -dash -offset indent
+.It
+Metadata which references 403M of file data has been
+scanned at 100M/s, and 68.4M of that file data has been
+scrubbed sequentially at 10.0M/s.
+.El
+.El
+.Sh PERIODIC SCRUB
+On machines using systemd, scrub timers can be enabled on per-pool basis.
+.Nm weekly
+and
+.Nm monthly
+timer units are provided.
+.Bl -tag -width Ds
+.It Xo
+.Xc
+.Nm systemctl
+.Cm enable
+.Cm zfs-scrub-\fIweekly\fB@\fIrpool\fB.timer
+.Cm --now
+.It Xo
+.Xc
+.Nm systemctl
+.Cm enable
+.Cm zfs-scrub-\fImonthly\fB@\fIotherpool\fB.timer
+.Cm --now
+.El
+.
+.Sh SEE ALSO
+.Xr systemd.timer 5 ,
+.Xr zpool-iostat 8 ,
+.Xr zpool-resilver 8 ,
+.Xr zpool-status 8

diff --git a/zfs/man/man8/zpool-set.8 b/zfs/man/man8/zpool-set.8
new file mode 120000
index 0000000..2b8b8cf
--- /dev/null
+++ b/zfs/man/man8/zpool-set.8

@@ -0,0 +1 @@
+zpool-get.8
\ No newline at end of file

diff --git a/zfs/man/man8/zpool-split.8 b/zfs/man/man8/zpool-split.8
new file mode 100644
index 0000000..c3b05c2
--- /dev/null
+++ b/zfs/man/man8/zpool-split.8

@@ -0,0 +1,116 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd June 2, 2021
+.Dt ZPOOL-SPLIT 8
+.Os
+.
+.Sh NAME
+.Nm zpool-split
+.Nd split devices off ZFS storage pool, creating new pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm split
+.Op Fl gLlnP
+.Oo Fl o Ar property Ns = Ns Ar value Oc Ns …
+.Op Fl R Ar root
+.Ar pool newpool
+.Oo Ar device Oc Ns …
+.
+.Sh DESCRIPTION
+Splits devices off
+.Ar pool
+creating
+.Ar newpool .
+All vdevs in
+.Ar pool
+must be mirrors and the pool must not be in the process of resilvering.
+At the time of the split,
+.Ar newpool
+will be a replica of
+.Ar pool .
+By default, the
+last device in each mirror is split from
+.Ar pool
+to create
+.Ar newpool .
+.Pp
+The optional device specification causes the specified device(s) to be
+included in the new
+.Ar pool
+and, should any devices remain unspecified,
+the last device in each mirror is used as would be by default.
+.Bl -tag -width Ds
+.It Fl g
+Display vdev GUIDs instead of the normal device names.
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk/
+path used to open it.
+.It Fl l
+Indicates that this command will request encryption keys for all encrypted
+datasets it attempts to mount as it is bringing the new pool online.
+Note that if any datasets have
+.Sy keylocation Ns = Ns Sy prompt ,
+this command will block waiting for the keys to be entered.
+Without this flag, encrypted datasets will be left unavailable until the keys are loaded.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+split: do not actually perform it.
+Print out the expected configuration of
+.Ar newpool .
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
+.It Fl o Ar property Ns = Ns Ar value
+Sets the specified property for
+.Ar newpool .
+See the
+.Xr zpoolprops 7
+manual page for more information on the available pool properties.
+.It Fl R Ar root
+Set
+.Sy altroot
+for
+.Ar newpool
+to
+.Ar root
+and automatically import it.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-import 8 ,
+.Xr zpool-list 8 ,
+.Xr zpool-remove 8

diff --git a/zfs/man/man8/zpool-status.8 b/zfs/man/man8/zpool-status.8
new file mode 100644
index 0000000..7c825f6
--- /dev/null
+++ b/zfs/man/man8/zpool-status.8

@@ -0,0 +1,134 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd June 2, 2021
+.Dt ZPOOL-STATUS 8
+.Os
+.
+.Sh NAME
+.Nm zpool-status
+.Nd show detailed health status for ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm status
+.Op Fl DigLpPstvx
+.Op Fl T Sy u Ns | Ns Sy d
+.Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns …
+.Oo Ar pool Oc Ns …
+.Op Ar interval Op Ar count
+.
+.Sh DESCRIPTION
+Displays the detailed health status for the given pools.
+If no
+.Ar pool
+is specified, then the status of each pool in the system is displayed.
+For more information on pool and device health, see the
+.Sx Device Failure and Recovery
+section of
+.Xr zpoolconcepts 7 .
+.Pp
+If a scrub or resilver is in progress, this command reports the percentage done
+and the estimated time to completion.
+Both of these are only approximate, because the amount of data in the pool and
+the other workloads on the system can change.
+.Bl -tag -width Ds
+.It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns …
+Run a script (or scripts) on each vdev and include the output as a new column
+in the
+.Nm zpool Cm status
+output.
+See the
+.Fl c
+option of
+.Nm zpool Cm iostat
+for complete details.
+.It Fl i
+Display vdev initialization status.
+.It Fl g
+Display vdev GUIDs instead of the normal device names
+These GUIDs can be used in place of device names for the zpool
+detach/offline/remove/replace commands.
+.It Fl L
+Display real paths for vdevs resolving all symbolic links.
+This can be used to look up the current block device name regardless of the
+.Pa /dev/disk/
+path used to open it.
+.It Fl p
+Display numbers in parsable (exact) values.
+.It Fl P
+Display full paths for vdevs instead of only the last component of
+the path.
+This can be used in conjunction with the
+.Fl L
+flag.
+.It Fl D
+Display a histogram of deduplication statistics, showing the allocated
+.Pq physically present on disk
+and referenced
+.Pq logically referenced in the pool
+block counts and sizes by reference count.
+.It Fl s
+Display the number of leaf VDEV slow IOs.
+This is the number of IOs that
+didn't complete in
+.Sy zio_slow_io_ms
+milliseconds (default 30 seconds).
+This does not necessarily mean the IOs failed to complete, just took an
+unreasonably long amount of time.
+This may indicate a problem with the underlying storage.
+.It Fl t
+Display vdev TRIM status.
+.It Fl T Sy u Ns | Ns Sy d
+Display a time stamp.
+Specify
+.Sy u
+for a printed representation of the internal representation of time.
+See
+.Xr time 2 .
+Specify
+.Sy d
+for standard date format.
+See
+.Xr date 1 .
+.It Fl v
+Displays verbose data error information, printing out a complete list of all
+data errors since the last complete pool scrub.
+.It Fl x
+Only display status for pools that are exhibiting errors or are otherwise
+unavailable.
+Warnings about pools not using the latest on-disk format will not be included.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-events 8 ,
+.Xr zpool-history 8 ,
+.Xr zpool-iostat 8 ,
+.Xr zpool-list 8 ,
+.Xr zpool-resilver 8 ,
+.Xr zpool-scrub 8 ,
+.Xr zpool-wait 8

diff --git a/zfs/man/man8/zpool-sync.8 b/zfs/man/man8/zpool-sync.8
new file mode 100644
index 0000000..aa68a57
--- /dev/null
+++ b/zfs/man/man8/zpool-sync.8

@@ -0,0 +1,53 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-SYNC 8
+.Os
+.
+.Sh NAME
+.Nm zpool-sync
+.Nd flush data to primary storage of ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm sync
+.Oo Ar pool Oc Ns …
+.
+.Sh DESCRIPTION
+This command forces all in-core dirty data to be written to the primary
+pool storage and not the ZIL.
+It will also update administrative information including quota reporting.
+Without arguments,
+.Nm zpool Cm sync
+will sync all pools on the system.
+Otherwise, it will sync only the specified pools.
+.
+.Sh SEE ALSO
+.Xr zpoolconcepts 7 ,
+.Xr zpool-export 8 ,
+.Xr zpool-iostat 8

diff --git a/zfs/man/man8/zpool-trim.8 b/zfs/man/man8/zpool-trim.8
new file mode 100644
index 0000000..d9a7b44
--- /dev/null
+++ b/zfs/man/man8/zpool-trim.8

@@ -0,0 +1,91 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-TRIM 8
+.Os
+.
+.Sh NAME
+.Nm zpool-trim
+.Nd initiate TRIM of free space in ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm trim
+.Op Fl dw
+.Op Fl r Ar rate
+.Op Fl c Ns | Ns Fl s
+.Ar pool
+.Oo Ar device Ns Oc Ns …
+.
+.Sh DESCRIPTION
+Initiates an immediate on-demand TRIM operation for all of the free space in
+a pool.
+This operation informs the underlying storage devices of all blocks
+in the pool which are no longer allocated and allows thinly provisioned
+devices to reclaim the space.
+.Pp
+A manual on-demand TRIM operation can be initiated irrespective of the
+.Sy autotrim
+pool property setting.
+See the documentation for the
+.Sy autotrim
+property above for the types of vdev devices which can be trimmed.
+.Bl -tag -width Ds
+.It Fl d , -secure
+Causes a secure TRIM to be initiated.
+When performing a secure TRIM, the
+device guarantees that data stored on the trimmed blocks has been erased.
+This requires support from the device and is not supported by all SSDs.
+.It Fl r , -rate Ar rate
+Controls the rate at which the TRIM operation progresses.
+Without this
+option TRIM is executed as quickly as possible.
+The rate, expressed in bytes
+per second, is applied on a per-vdev basis and may be set differently for
+each leaf vdev.
+.It Fl c , -cancel
+Cancel trimming on the specified devices, or all eligible devices if none
+are specified.
+If one or more target devices are invalid or are not currently being
+trimmed, the command will fail and no cancellation will occur on any device.
+.It Fl s , -suspend
+Suspend trimming on the specified devices, or all eligible devices if none
+are specified.
+If one or more target devices are invalid or are not currently being
+trimmed, the command will fail and no suspension will occur on any device.
+Trimming can then be resumed by running
+.Nm zpool Cm trim
+with no flags on the relevant target devices.
+.It Fl w , -wait
+Wait until the devices are done being trimmed before returning.
+.El
+.
+.Sh SEE ALSO
+.Xr zpoolprops 7 ,
+.Xr zpool-initialize 8 ,
+.Xr zpool-wait 8

diff --git a/zfs/man/man8/zpool-upgrade.8 b/zfs/man/man8/zpool-upgrade.8
new file mode 100644
index 0000000..1b13bad
--- /dev/null
+++ b/zfs/man/man8/zpool-upgrade.8

@@ -0,0 +1,109 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+.\"
+.Dd August 9, 2019
+.Dt ZPOOL-UPGRADE 8
+.Os
+.
+.Sh NAME
+.Nm zpool-upgrade
+.Nd manage version and feature flags of ZFS storage pools
+.Sh SYNOPSIS
+.Nm zpool
+.Cm upgrade
+.Nm zpool
+.Cm upgrade
+.Fl v
+.Nm zpool
+.Cm upgrade
+.Op Fl V Ar version
+.Fl a Ns | Ns Ar pool Ns …
+.
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Xo
+.Nm zpool
+.Cm upgrade
+.Xc
+Displays pools which do not have all supported features enabled and pools
+formatted using a legacy ZFS version number.
+These pools can continue to be used, but some features may not be available.
+Use
+.Nm zpool Cm upgrade Fl a
+to enable all features on all pools (subject to the
+.Fl o Sy compatibility
+property).
+.It Xo
+.Nm zpool
+.Cm upgrade
+.Fl v
+.Xc
+Displays legacy ZFS versions supported by the this version of ZFS.
+See
+.Xr zpool-features 7
+for a description of feature flags features supported by this version of ZFS.
+.It Xo
+.Nm zpool
+.Cm upgrade
+.Op Fl V Ar version
+.Fl a Ns | Ns Ar pool Ns …
+.Xc
+Enables all supported features on the given pool.
+.Pp
+If the pool has specified compatibility feature sets using the
+.Fl o Sy compatibility
+property, only the features present in all requested compatibility sets will be
+enabled.
+If this property is set to
+.Ar legacy
+then no upgrade will take place.
+.Pp
+Once this is done, the pool will no longer be accessible on systems that do not
+support feature flags.
+See
+.Xr zpool-features 7
+for details on compatibility with systems that support feature flags, but do not
+support all features enabled on the pool.
+.Bl -tag -width Ds
+.It Fl a
+Enables all supported features (from specified compatibility sets, if any) on all
+pools.
+.It Fl V Ar version
+Upgrade to the specified legacy version.
+If specified, no features will be enabled on the pool.
+This option can only be used to increase the version number up to the last
+supported legacy version number.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-features 7 ,
+.Xr zpoolconcepts 7 ,
+.Xr zpoolprops 7 ,
+.Xr zpool-history 8

diff --git a/zfs/man/man8/zpool-wait.8 b/zfs/man/man8/zpool-wait.8
new file mode 100644
index 0000000..38f4812
--- /dev/null
+++ b/zfs/man/man8/zpool-wait.8

@@ -0,0 +1,116 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd May 27, 2021
+.Dt ZPOOL-WAIT 8
+.Os
+.
+.Sh NAME
+.Nm zpool-wait
+.Nd wait for activity to stop in a ZFS storage pool
+.Sh SYNOPSIS
+.Nm zpool
+.Cm wait
+.Op Fl Hp
+.Op Fl T Sy u Ns | Ns Sy d
+.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns …
+.Ar pool
+.Op Ar interval
+.
+.Sh DESCRIPTION
+Waits until all background activity of the given types has ceased in the given
+pool.
+The activity could cease because it has completed, or because it has been
+paused or canceled by a user, or because the pool has been exported or
+destroyed.
+If no activities are specified, the command waits until background activity of
+every type listed below has ceased.
+If there is no activity of the given types in progress, the command returns
+immediately.
+.Pp
+These are the possible values for
+.Ar activity ,
+along with what each one waits for:
+.Bl -tag -compact -offset Ds -width "initialize"
+.It Sy discard
+Checkpoint to be discarded
+.It Sy free
+.Sy freeing
+property to become
+.Sy 0
+.It Sy initialize
+All initializations to cease
+.It Sy replace
+All device replacements to cease
+.It Sy remove
+Device removal to cease
+.It Sy resilver
+Resilver to cease
+.It Sy scrub
+Scrub to cease
+.It Sy trim
+Manual trim to cease
+.El
+.Pp
+If an
+.Ar interval
+is provided, the amount of work remaining, in bytes, for each activity is
+printed every
+.Ar interval
+seconds.
+.Bl -tag -width Ds
+.It Fl H
+Scripted mode.
+Do not display headers, and separate fields by a single tab instead of arbitrary
+space.
+.It Fl p
+Display numbers in parsable (exact) values.
+.It Fl T Sy u Ns | Ns Sy d
+Display a time stamp.
+Specify
+.Sy u
+for a printed representation of the internal representation of time.
+See
+.Xr time 2 .
+Specify
+.Sy d
+for standard date format.
+See
+.Xr date 1 .
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-checkpoint 8 ,
+.Xr zpool-initialize 8 ,
+.Xr zpool-remove 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-resilver 8 ,
+.Xr zpool-scrub 8 ,
+.Xr zpool-status 8 ,
+.Xr zpool-trim 8

diff --git a/zfs/man/man8/zpool.8 b/zfs/man/man8/zpool.8
index adbb723..e5d7c85 100644
--- a/zfs/man/man8/zpool.8
+++ b/zfs/man/man8/zpool.8

@@ -18,7 +18,6 @@
 .\"
 .\" CDDL HEADER END
 .\"
-.\"
 .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
 .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
@@ -27,9 +26,10 @@
 .\" Copyright 2017 Nexenta Systems, Inc.
 .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 .\"
-.Dd May 2, 2019
-.Dt ZPOOL 8 SMM
-.Os Linux
+.Dd June 2, 2021
+.Dt ZPOOL 8
+.Os
+.
 .Sh NAME
 .Nm zpool
 .Nd configure ZFS storage pools
@@ -37,181 +37,11 @@
 .Nm
 .Fl ?V
 .Nm
-.Cm add
-.Op Fl fgLnP
-.Oo Fl o Ar property Ns = Ns Ar value Oc
-.Ar pool vdev Ns ...
-.Nm
-.Cm attach
-.Op Fl f
-.Oo Fl o Ar property Ns = Ns Ar value Oc
-.Ar pool device new_device
-.Nm
-.Cm checkpoint
-.Op Fl d, -discard
-.Ar pool
-.Nm
-.Cm clear
-.Ar pool
-.Op Ar device
-.Nm
-.Cm create
-.Op Fl dfn
-.Op Fl m Ar mountpoint
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Oo Fl o Ar feature@feature Ns = Ns Ar value Oc
-.Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Ar pool vdev Ns ...
-.Nm
-.Cm destroy
-.Op Fl f
-.Ar pool
-.Nm
-.Cm detach
-.Ar pool device
-.Nm
-.Cm events
-.Op Fl vHf Oo Ar pool Oc | Fl c
-.Nm
-.Cm export
-.Op Fl a
-.Op Fl f
-.Ar pool Ns ...
-.Nm
-.Cm get
-.Op Fl Hp
-.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
-.Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns ...
-.Oo Ar pool Oc Ns ...
-.Nm
-.Cm history
-.Op Fl il
-.Oo Ar pool Oc Ns ...
-.Nm
-.Cm import
-.Op Fl D
-.Op Fl d Ar dir Ns | Ns device
-.Nm
-.Cm import
-.Fl a
-.Op Fl DflmN
-.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
-.Op Fl -rewind-to-checkpoint
-.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns device
-.Op Fl o Ar mntopts
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Nm
-.Cm import
-.Op Fl Dflm
-.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
-.Op Fl -rewind-to-checkpoint
-.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns device
-.Op Fl o Ar mntopts
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Op Fl s
-.Ar pool Ns | Ns Ar id
-.Op Ar newpool Oo Fl t Oc
-.Nm
-.Cm initialize
-.Op Fl c | Fl s
-.Ar pool
-.Op Ar device Ns ...
-.Nm
-.Cm iostat
-.Op Oo Oo Fl c Ar SCRIPT Oc Oo Fl lq Oc Oc Ns | Ns Fl rw
-.Op Fl T Sy u Ns | Ns Sy d
-.Op Fl ghHLnpPvy
-.Oo Oo Ar pool Ns ... Oc Ns | Ns Oo Ar pool vdev Ns ... Oc Ns | Ns Oo Ar vdev Ns ... Oc Oc
-.Op Ar interval Op Ar count
-.Nm
-.Cm labelclear
-.Op Fl f
-.Ar device
-.Nm
-.Cm list
-.Op Fl HgLpPv
-.Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
-.Op Fl T Sy u Ns | Ns Sy d
-.Oo Ar pool Oc Ns ...
-.Op Ar interval Op Ar count
-.Nm
-.Cm offline
-.Op Fl f
-.Op Fl t
-.Ar pool Ar device Ns ...
-.Nm
-.Cm online
-.Op Fl e
-.Ar pool Ar device Ns ...
-.Nm
-.Cm reguid
-.Ar pool
-.Nm
-.Cm reopen
-.Op Fl n
-.Ar pool
-.Nm
-.Cm remove
-.Op Fl np
-.Ar pool Ar device Ns ...
-.Nm
-.Cm remove
-.Fl s
-.Ar pool
-.Nm
-.Cm replace
-.Op Fl f
-.Oo Fl o Ar property Ns = Ns Ar value Oc
-.Ar pool Ar device Op Ar new_device
-.Nm
-.Cm resilver
-.Ar pool Ns ...
-.Nm
-.Cm scrub
-.Op Fl s | Fl p
-.Ar pool Ns ...
-.Nm
-.Cm trim
-.Op Fl d
-.Op Fl r Ar rate
-.Op Fl c | Fl s
-.Ar pool
-.Op Ar device Ns ...
-.Nm
-.Cm set
-.Ar property Ns = Ns Ar value
-.Ar pool
-.Nm
-.Cm split
-.Op Fl gLlnP
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Ar pool newpool
-.Oo Ar device Oc Ns ...
-.Nm
-.Cm status
-.Oo Fl c Ar SCRIPT Oc
-.Op Fl DigLpPstvx
-.Op Fl T Sy u Ns | Ns Sy d
-.Oo Ar pool Oc Ns ...
-.Op Ar interval Op Ar count
-.Nm
-.Cm sync
-.Oo Ar pool Oc Ns ...
-.Nm
-.Cm upgrade
-.Nm
-.Cm upgrade
-.Fl v
-.Nm
-.Cm upgrade
-.Op Fl V Ar version
-.Fl a Ns | Ns Ar pool Ns ...
-.Nm
 .Cm version
+.Nm
+.Cm subcommand
+.Op Ar argumentss
+.
 .Sh DESCRIPTION
 The
 .Nm
@@ -222,706 +52,12 @@
 See
 .Xr zfs 8
 for information on managing datasets.
-.Ss Virtual Devices (vdevs)
-A "virtual device" describes a single device or a collection of devices
-organized according to certain performance and fault characteristics.
-The following virtual devices are supported:
-.Bl -tag -width Ds
-.It Sy disk
-A block device, typically located under
-.Pa /dev .
-ZFS can use individual slices or partitions, though the recommended mode of
-operation is to use whole disks.
-A disk can be specified by a full path, or it can be a shorthand name
-.Po the relative portion of the path under
-.Pa /dev
-.Pc .
-A whole disk can be specified by omitting the slice or partition designation.
-For example,
-.Pa sda
-is equivalent to
-.Pa /dev/sda .
-When given a whole disk, ZFS automatically labels the disk, if necessary.
-.It Sy file
-A regular file.
-The use of files as a backing store is strongly discouraged.
-It is designed primarily for experimental purposes, as the fault tolerance of a
-file is only as good as the file system of which it is a part.
-A file must be specified by a full path.
-.It Sy mirror
-A mirror of two or more devices.
-Data is replicated in an identical fashion across all components of a mirror.
-A mirror with N disks of size X can hold X bytes and can withstand (N-1) devices
-failing before data integrity is compromised.
-.It Sy raidz , raidz1 , raidz2 , raidz3
-A variation on RAID-5 that allows for better distribution of parity and
-eliminates the RAID-5
-.Qq write hole
-.Pq in which data and parity become inconsistent after a power loss .
-Data and parity is striped across all disks within a raidz group.
 .Pp
-A raidz group can have single-, double-, or triple-parity, meaning that the
-raidz group can sustain one, two, or three failures, respectively, without
-losing any data.
-The
-.Sy raidz1
-vdev type specifies a single-parity raidz group; the
-.Sy raidz2
-vdev type specifies a double-parity raidz group; and the
-.Sy raidz3
-vdev type specifies a triple-parity raidz group.
-The
-.Sy raidz
-vdev type is an alias for
-.Sy raidz1 .
-.Pp
-A raidz group with N disks of size X with P parity disks can hold approximately
-(N-P)*X bytes and can withstand P device(s) failing before data integrity is
-compromised.
-The minimum number of devices in a raidz group is one more than the number of
-parity disks.
-The recommended number is between 3 and 9 to help increase performance.
-.It Sy spare
-A pseudo-vdev which keeps track of available hot spares for a pool.
-For more information, see the
-.Sx Hot Spares
-section.
-.It Sy log
-A separate intent log device.
-If more than one log device is specified, then writes are load-balanced between
-devices.
-Log devices can be mirrored.
-However, raidz vdev types are not supported for the intent log.
-For more information, see the
-.Sx Intent Log
-section.
-.It Sy dedup
-A device dedicated solely for deduplication tables.
-The redundancy of this device should match the redundancy of the other normal
-devices in the pool. If more than one dedup device is specified, then
-allocations are load-balanced between those devices.
-.It Sy special
-A device dedicated solely for allocating various kinds of internal metadata,
-and optionally small file blocks.
-The redundancy of this device should match the redundancy of the other normal
-devices in the pool. If more than one special device is specified, then
-allocations are load-balanced between those devices.
-.Pp
-For more information on special allocations, see the
-.Sx Special Allocation Class
-section.
-.It Sy cache
-A device used to cache storage pool data.
-A cache device cannot be configured as a mirror or raidz group.
-For more information, see the
-.Sx Cache Devices
-section.
-.El
-.Pp
-Virtual devices cannot be nested, so a mirror or raidz virtual device can only
-contain files or disks.
-Mirrors of mirrors
-.Pq or other combinations
-are not allowed.
-.Pp
-A pool can have any number of virtual devices at the top of the configuration
-.Po known as
-.Qq root vdevs
-.Pc .
-Data is dynamically distributed across all top-level devices to balance data
-among devices.
-As new virtual devices are added, ZFS automatically places data on the newly
-available devices.
-.Pp
-Virtual devices are specified one at a time on the command line, separated by
-whitespace.
-The keywords
-.Sy mirror
-and
-.Sy raidz
-are used to distinguish where a group ends and another begins.
-For example, the following creates two root vdevs, each a mirror of two disks:
-.Bd -literal
-# zpool create mypool mirror sda sdb mirror sdc sdd
-.Ed
-.Ss Device Failure and Recovery
-ZFS supports a rich set of mechanisms for handling device failure and data
-corruption.
-All metadata and data is checksummed, and ZFS automatically repairs bad data
-from a good copy when corruption is detected.
-.Pp
-In order to take advantage of these features, a pool must make use of some form
-of redundancy, using either mirrored or raidz groups.
-While ZFS supports running in a non-redundant configuration, where each root
-vdev is simply a disk or file, this is strongly discouraged.
-A single case of bit corruption can render some or all of your data unavailable.
-.Pp
-A pool's health status is described by one of three states: online, degraded,
-or faulted.
-An online pool has all devices operating normally.
-A degraded pool is one in which one or more devices have failed, but the data is
-still available due to a redundant configuration.
-A faulted pool has corrupted metadata, or one or more faulted devices, and
-insufficient replicas to continue functioning.
-.Pp
-The health of the top-level vdev, such as mirror or raidz device, is
-potentially impacted by the state of its associated vdevs, or component
-devices.
-A top-level vdev or component device is in one of the following states:
-.Bl -tag -width "DEGRADED"
-.It Sy DEGRADED
-One or more top-level vdevs is in the degraded state because one or more
-component devices are offline.
-Sufficient replicas exist to continue functioning.
-.Pp
-One or more component devices is in the degraded or faulted state, but
-sufficient replicas exist to continue functioning.
-The underlying conditions are as follows:
-.Bl -bullet
-.It
-The number of checksum errors exceeds acceptable levels and the device is
-degraded as an indication that something may be wrong.
-ZFS continues to use the device as necessary.
-.It
-The number of I/O errors exceeds acceptable levels.
-The device could not be marked as faulted because there are insufficient
-replicas to continue functioning.
-.El
-.It Sy FAULTED
-One or more top-level vdevs is in the faulted state because one or more
-component devices are offline.
-Insufficient replicas exist to continue functioning.
-.Pp
-One or more component devices is in the faulted state, and insufficient
-replicas exist to continue functioning.
-The underlying conditions are as follows:
-.Bl -bullet
-.It
-The device could be opened, but the contents did not match expected values.
-.It
-The number of I/O errors exceeds acceptable levels and the device is faulted to
-prevent further use of the device.
-.El
-.It Sy OFFLINE
-The device was explicitly taken offline by the
-.Nm zpool Cm offline
-command.
-.It Sy ONLINE
-The device is online and functioning.
-.It Sy REMOVED
-The device was physically removed while the system was running.
-Device removal detection is hardware-dependent and may not be supported on all
-platforms.
-.It Sy UNAVAIL
-The device could not be opened.
-If a pool is imported when a device was unavailable, then the device will be
-identified by a unique identifier instead of its path since the path was never
-correct in the first place.
-.El
-.Pp
-If a device is removed and later re-attached to the system, ZFS attempts
-to put the device online automatically.
-Device attach detection is hardware-dependent and might not be supported on all
-platforms.
-.Ss Hot Spares
-ZFS allows devices to be associated with pools as
-.Qq hot spares .
-These devices are not actively used in the pool, but when an active device
-fails, it is automatically replaced by a hot spare.
-To create a pool with hot spares, specify a
-.Sy spare
-vdev with any number of devices.
-For example,
-.Bd -literal
-# zpool create pool mirror sda sdb spare sdc sdd
-.Ed
-.Pp
-Spares can be shared across multiple pools, and can be added with the
-.Nm zpool Cm add
-command and removed with the
-.Nm zpool Cm remove
-command.
-Once a spare replacement is initiated, a new
-.Sy spare
-vdev is created within the configuration that will remain there until the
-original device is replaced.
-At this point, the hot spare becomes available again if another device fails.
-.Pp
-If a pool has a shared spare that is currently being used, the pool can not be
-exported since other pools may use this shared spare, which may lead to
-potential data corruption.
-.Pp
-Shared spares add some risk.  If the pools are imported on different hosts, and
-both pools suffer a device failure at the same time, both could attempt to use
-the spare at the same time.  This may not be detected, resulting in data
-corruption.
-.Pp
-An in-progress spare replacement can be cancelled by detaching the hot spare.
-If the original faulted device is detached, then the hot spare assumes its
-place in the configuration, and is removed from the spare list of all active
-pools.
-.Pp
-Spares cannot replace log devices.
-.Ss Intent Log
-The ZFS Intent Log (ZIL) satisfies POSIX requirements for synchronous
-transactions.
-For instance, databases often require their transactions to be on stable storage
-devices when returning from a system call.
-NFS and other applications can also use
-.Xr fsync 2
-to ensure data stability.
-By default, the intent log is allocated from blocks within the main pool.
-However, it might be possible to get better performance using separate intent
-log devices such as NVRAM or a dedicated disk.
-For example:
-.Bd -literal
-# zpool create pool sda sdb log sdc
-.Ed
-.Pp
-Multiple log devices can also be specified, and they can be mirrored.
-See the
-.Sx EXAMPLES
-section for an example of mirroring multiple log devices.
-.Pp
-Log devices can be added, replaced, attached, detached and removed.  In
-addition, log devices are imported and exported as part of the pool
-that contains them.
-Mirrored devices can be removed by specifying the top-level mirror vdev.
-.Ss Cache Devices
-Devices can be added to a storage pool as
-.Qq cache devices .
-These devices provide an additional layer of caching between main memory and
-disk.
-For read-heavy workloads, where the working set size is much larger than what
-can be cached in main memory, using cache devices allow much more of this
-working set to be served from low latency media.
-Using cache devices provides the greatest performance improvement for random
-read-workloads of mostly static content.
-.Pp
-To create a pool with cache devices, specify a
-.Sy cache
-vdev with any number of devices.
-For example:
-.Bd -literal
-# zpool create pool sda sdb cache sdc sdd
-.Ed
-.Pp
-Cache devices cannot be mirrored or part of a raidz configuration.
-If a read error is encountered on a cache device, that read I/O is reissued to
-the original storage pool device, which might be part of a mirrored or raidz
-configuration.
-.Pp
-The content of the cache devices is considered volatile, as is the case with
-other system caches.
-.Ss Pool checkpoint
-Before starting critical procedures that include destructive actions (e.g
-.Nm zfs Cm destroy
-), an administrator can checkpoint the pool's state and in the case of a
-mistake or failure, rewind the entire pool back to the checkpoint.
-Otherwise, the checkpoint can be discarded when the procedure has completed
-successfully.
-.Pp
-A pool checkpoint can be thought of as a pool-wide snapshot and should be used
-with care as it contains every part of the pool's state, from properties to vdev
-configuration.
-Thus, while a pool has a checkpoint certain operations are not allowed.
-Specifically, vdev removal/attach/detach, mirror splitting, and
-changing the pool's guid.
-Adding a new vdev is supported but in the case of a rewind it will have to be
-added again.
-Finally, users of this feature should keep in mind that scrubs in a pool that
-has a checkpoint do not repair checkpointed data.
-.Pp
-To create a checkpoint for a pool:
-.Bd -literal
-# zpool checkpoint pool
-.Ed
-.Pp
-To later rewind to its checkpointed state, you need to first export it and
-then rewind it during import:
-.Bd -literal
-# zpool export pool
-# zpool import --rewind-to-checkpoint pool
-.Ed
-.Pp
-To discard the checkpoint from a pool:
-.Bd -literal
-# zpool checkpoint -d pool
-.Ed
-.Pp
-Dataset reservations (controlled by the
-.Nm reservation
-or
-.Nm refreservation
-zfs properties) may be unenforceable while a checkpoint exists, because the
-checkpoint is allowed to consume the dataset's reservation.
-Finally, data that is part of the checkpoint but has been freed in the
-current state of the pool won't be scanned during a scrub.
-.Ss Special Allocation Class
-The allocations in the special class are dedicated to specific block types.
-By default this includes all metadata, the indirect blocks of user data, and
-any deduplication tables.  The class can also be provisioned to accept
-small file blocks.
-.Pp
-A pool must always have at least one normal (non-dedup/special) vdev before
-other devices can be assigned to the special class. If the special class
-becomes full, then allocations intended for it will spill back into the
-normal class.
-.Pp
-Deduplication tables can be excluded from the special class by setting the
-.Sy zfs_ddt_data_is_special
-zfs module parameter to false (0).
-.Pp
-Inclusion of small file blocks in the special class is opt-in. Each dataset
-can control the size of small file blocks allowed in the special class by
-setting the
-.Sy special_small_blocks
-dataset property. It defaults to zero, so you must opt-in by setting it to a
-non-zero value. See
-.Xr zfs 8
-for more info on setting this property.
-.Ss Properties
-Each pool has several properties associated with it.
-Some properties are read-only statistics while others are configurable and
-change the behavior of the pool.
-.Pp
-The following are read-only properties:
-.Bl -tag -width Ds
-.It Cm allocated
-Amount of storage used within the pool.
-See
-.Sy fragmentation
-and
-.Sy free
-for more information.
-.It Sy capacity
-Percentage of pool space used.
-This property can also be referred to by its shortened column name,
-.Sy cap .
-.It Sy expandsize
-Amount of uninitialized space within the pool or device that can be used to
-increase the total capacity of the pool.
-Uninitialized space consists of any space on an EFI labeled vdev which has not
-been brought online
-.Po e.g, using
-.Nm zpool Cm online Fl e
-.Pc .
-This space occurs when a LUN is dynamically expanded.
-.It Sy fragmentation
-The amount of fragmentation in the pool. As the amount of space
-.Sy allocated
-increases, it becomes more difficult to locate
-.Sy free
-space. This may result in lower write performance compared to pools with more
-unfragmented free space.
-.It Sy free
-The amount of free space available in the pool.
-By contrast, the
-.Xr zfs 8
-.Sy available
-property describes how much new data can be written to ZFS filesystems/volumes.
-The zpool
-.Sy free
-property is not generally useful for this purpose, and can be substantially more than the zfs
-.Sy available
-space. This discrepancy is due to several factors, including raidz party; zfs
-reservation, quota, refreservation, and refquota properties; and space set aside by
-.Sy spa_slop_shift
-(see
-.Xr zfs-module-parameters 5
-for more information).
-.It Sy freeing
-After a file system or snapshot is destroyed, the space it was using is
-returned to the pool asynchronously.
-.Sy freeing
-is the amount of space remaining to be reclaimed.
-Over time
-.Sy freeing
-will decrease while
-.Sy free
-increases.
-.It Sy health
-The current health of the pool.
-Health can be one of
-.Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL .
-.It Sy guid
-A unique identifier for the pool.
-.It Sy load_guid
-A unique identifier for the pool.
-Unlike the
-.Sy guid
-property, this identifier is generated every time we load the pool (e.g. does
-not persist across imports/exports) and never changes while the pool is loaded
-(even if a
-.Sy reguid
-operation takes place).
-.It Sy size
-Total size of the storage pool.
-.It Sy unsupported@ Ns Em feature_guid
-Information about unsupported features that are enabled on the pool.
-See
-.Xr zpool-features 5
-for details.
-.El
-.Pp
-The space usage properties report actual physical space available to the
-storage pool.
-The physical space can be different from the total amount of space that any
-contained datasets can actually use.
-The amount of space used in a raidz configuration depends on the characteristics
-of the data being written.
-In addition, ZFS reserves some space for internal accounting that the
-.Xr zfs 8
-command takes into account, but the
-.Nm
-command does not.
-For non-full pools of a reasonable size, these effects should be invisible.
-For small pools, or pools that are close to being completely full, these
-discrepancies may become more noticeable.
-.Pp
-The following property can be set at creation time and import time:
-.Bl -tag -width Ds
-.It Sy altroot
-Alternate root directory.
-If set, this directory is prepended to any mount points within the pool.
-This can be used when examining an unknown pool where the mount points cannot be
-trusted, or in an alternate boot environment, where the typical paths are not
-valid.
-.Sy altroot
-is not a persistent property.
-It is valid only while the system is up.
-Setting
-.Sy altroot
-defaults to using
-.Sy cachefile Ns = Ns Sy none ,
-though this may be overridden using an explicit setting.
-.El
-.Pp
-The following property can be set only at import time:
-.Bl -tag -width Ds
-.It Sy readonly Ns = Ns Sy on Ns | Ns Sy off
-If set to
-.Sy on ,
-the pool will be imported in read-only mode.
-This property can also be referred to by its shortened column name,
-.Sy rdonly .
-.El
-.Pp
-The following properties can be set at creation time and import time, and later
-changed with the
-.Nm zpool Cm set
-command:
-.Bl -tag -width Ds
-.It Sy ashift Ns = Ns Sy ashift
-Pool sector size exponent, to the power of
-.Sy 2
-(internally referred to as
-.Sy ashift
-). Values from 9 to 16, inclusive, are valid; also, the
-value 0 (the default) means to auto-detect using the kernel's block
-layer and a ZFS internal exception list. I/O operations will be aligned
-to the specified size boundaries. Additionally, the minimum (disk)
-write size will be set to the specified size, so this represents a
-space vs. performance trade-off. For optimal performance, the pool
-sector size should be greater than or equal to the sector size of the
-underlying disks. The typical case for setting this property is when
-performance is important and the underlying disks use 4KiB sectors but
-report 512B sectors to the OS (for compatibility reasons); in that
-case, set
-.Sy ashift=12
-(which is 1<<12 = 4096). When set, this property is
-used as the default hint value in subsequent vdev operations (add,
-attach and replace). Changing this value will not modify any existing
-vdev, not even on disk replacement; however it can be used, for
-instance, to replace a dying 512B sectors disk with a newer 4KiB
-sectors device: this will probably result in bad performance but at the
-same time could prevent loss of data.
-.It Sy autoexpand Ns = Ns Sy on Ns | Ns Sy off
-Controls automatic pool expansion when the underlying LUN is grown.
-If set to
-.Sy on ,
-the pool will be resized according to the size of the expanded device.
-If the device is part of a mirror or raidz then all devices within that
-mirror/raidz group must be expanded before the new space is made available to
-the pool.
-The default behavior is
-.Sy off .
-This property can also be referred to by its shortened column name,
-.Sy expand .
-.It Sy autoreplace Ns = Ns Sy on Ns | Ns Sy off
-Controls automatic device replacement.
-If set to
-.Sy off ,
-device replacement must be initiated by the administrator by using the
-.Nm zpool Cm replace
-command.
-If set to
-.Sy on ,
-any new device, found in the same physical location as a device that previously
-belonged to the pool, is automatically formatted and replaced.
-The default behavior is
-.Sy off .
-This property can also be referred to by its shortened column name,
-.Sy replace .
-Autoreplace can also be used with virtual disks (like device
-mapper) provided that you use the /dev/disk/by-vdev paths setup by
-vdev_id.conf. See the
-.Xr vdev_id 8
-man page for more details.
-Autoreplace and autoonline require the ZFS Event Daemon be configured and
-running.  See the
-.Xr zed 8
-man page for more details.
-.It Sy bootfs Ns = Ns Sy (unset) Ns | Ns Ar pool Ns / Ns Ar dataset
-Identifies the default bootable dataset for the root pool. This property is
-expected to be set mainly by the installation and upgrade programs.
-Not all Linux distribution boot processes use the bootfs property.
-.It Sy cachefile Ns = Ns Ar path Ns | Ns Sy none
-Controls the location of where the pool configuration is cached.
-Discovering all pools on system startup requires a cached copy of the
-configuration data that is stored on the root file system.
-All pools in this cache are automatically imported when the system boots.
-Some environments, such as install and clustering, need to cache this
-information in a different location so that pools are not automatically
-imported.
-Setting this property caches the pool configuration in a different location that
-can later be imported with
-.Nm zpool Cm import Fl c .
-Setting it to the value
-.Sy none
-creates a temporary pool that is never cached, and the
-.Qq
-.Pq empty string
-uses the default location.
-.Pp
-Multiple pools can share the same cache file.
-Because the kernel destroys and recreates this file when pools are added and
-removed, care should be taken when attempting to access this file.
-When the last pool using a
-.Sy cachefile
-is exported or destroyed, the file will be empty.
-.It Sy comment Ns = Ns Ar text
-A text string consisting of printable ASCII characters that will be stored
-such that it is available even if the pool becomes faulted.
-An administrator can provide additional information about a pool using this
-property.
-.It Sy dedupditto Ns = Ns Ar number
-This property is deprecated.  In a future release, it will no longer have any
-effect.
-.Pp
-Threshold for the number of block ditto copies.
-If the reference count for a deduplicated block increases above this number, a
-new ditto copy of this block is automatically stored.
-The default setting is
-.Sy 0
-which causes no ditto copies to be created for deduplicated blocks.
-The minimum legal nonzero setting is
-.Sy 100 .
-.It Sy delegation Ns = Ns Sy on Ns | Ns Sy off
-Controls whether a non-privileged user is granted access based on the dataset
-permissions defined on the dataset.
-See
-.Xr zfs 8
-for more information on ZFS delegated administration.
-.It Sy failmode Ns = Ns Sy wait Ns | Ns Sy continue Ns | Ns Sy panic
-Controls the system behavior in the event of catastrophic pool failure.
-This condition is typically a result of a loss of connectivity to the underlying
-storage device(s) or a failure of all devices within the pool.
-The behavior of such an event is determined as follows:
-.Bl -tag -width "continue"
-.It Sy wait
-Blocks all I/O access until the device connectivity is recovered and the errors
-are cleared.
-This is the default behavior.
-.It Sy continue
-Returns
-.Er EIO
-to any new write I/O requests but allows reads to any of the remaining healthy
-devices.
-Any write requests that have yet to be committed to disk would be blocked.
-.It Sy panic
-Prints out a message to the console and generates a system crash dump.
-.El
-.It Sy autotrim Ns = Ns Sy on Ns | Ns Sy off
-When set to
-.Sy on
-space which has been recently freed, and is no longer allocated by the pool,
-will be periodically trimmed.  This allows block device vdevs which support
-BLKDISCARD, such as SSDs, or file vdevs on which the underlying file system
-supports hole-punching, to reclaim unused blocks.  The default setting for
-this property is
-.Sy off .
-.Pp
-Automatic TRIM does not immediately reclaim blocks after a free. Instead,
-it will optimistically delay allowing smaller ranges to be aggregated in to
-a few larger ones.  These can then be issued more efficiently to the storage.
-.Pp
-Be aware that automatic trimming of recently freed data blocks can put
-significant stress on the underlying storage devices.  This will vary
-depending of how well the specific device handles these commands.  For
-lower end devices it is often possible to achieve most of the benefits
-of automatic trimming by running an on-demand (manual) TRIM periodically
-using the
-.Nm zpool Cm trim
-command.
-.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled
-The value of this property is the current state of
-.Ar feature_name .
-The only valid value when setting this property is
-.Sy enabled
-which moves
-.Ar feature_name
-to the enabled state.
-See
-.Xr zpool-features 5
-for details on feature states.
-.It Sy listsnapshots Ns = Ns Sy on Ns | Ns Sy off
-Controls whether information about snapshots associated with this pool is
-output when
-.Nm zfs Cm list
-is run without the
-.Fl t
-option.
-The default value is
-.Sy off .
-This property can also be referred to by its shortened name,
-.Sy listsnaps .
-.It Sy multihost Ns = Ns Sy on Ns | Ns Sy off
-Controls whether a pool activity check should be performed during
-.Nm zpool Cm import .
-When a pool is determined to be active it cannot be imported, even with the
-.Fl f
-option.  This property is intended to be used in failover configurations
-where multiple hosts have access to a pool on shared storage.
-.Pp
-Multihost provides protection on import only.  It does not protect against an
-individual device being used in multiple pools, regardless of the type of vdev.
-See the discussion under
-.Sy zpool create.
-.Pp
-When this property is on, periodic writes to storage occur to show the pool is
-in use.  See
-.Sy zfs_multihost_interval
-in the
-.Xr zfs-module-parameters 5
-man page.  In order to enable this property each host must set a unique hostid.
-See
-.Xr genhostid 1
-.Xr zgenhostid 8
-.Xr spl-module-parameters 5
-for additional details.  The default value is
-.Sy off .
-.It Sy version Ns = Ns Ar version
-The current on-disk version of the pool.
-This can be increased, but never decreased.
-The preferred method of updating pools is with the
-.Nm zpool Cm upgrade
-command, though this property can be used when a specific version is needed for
-backwards compatibility.
-Once feature flags are enabled on a pool this property will no longer have a
-value.
-.El
-.Ss Subcommands
+For an overview of creating and managing ZFS storage pools see the
+.Xr zpoolconcepts 7
+manual page.
+.
+.Sh SUBCOMMANDS
 All subcommands that modify state are logged persistently to the pool in their
 original form.
 .Pp
@@ -933,401 +69,70 @@
 .Bl -tag -width Ds
 .It Xo
 .Nm
-.Fl ?
+.Fl ?\&
 .Xc
 Displays a help message.
 .It Xo
 .Nm
-.Fl V, -version
+.Fl V , -version
 .Xc
-An alias for the
-.Nm zpool Cm version
-subcommand.
 .It Xo
 .Nm
-.Cm add
-.Op Fl fgLnP
-.Oo Fl o Ar property Ns = Ns Ar value Oc
-.Ar pool vdev Ns ...
+.Cm version
 .Xc
-Adds the specified virtual devices to the given pool.
-The
-.Ar vdev
-specification is described in the
-.Sx Virtual Devices
-section.
-The behavior of the
-.Fl f
-option, and the device checks performed are described in the
-.Nm zpool Cm create
-subcommand.
-.Bl -tag -width Ds
-.It Fl f
-Forces use of
-.Ar vdev Ns s ,
-even if they appear in use or specify a conflicting replication level.
-Not all devices can be overridden in this manner.
-.It Fl g
-Display
-.Ar vdev ,
-GUIDs instead of the normal device names. These GUIDs can be used in place of
-device names for the zpool detach/offline/remove/replace commands.
-.It Fl L
-Display real paths for
-.Ar vdev Ns s
-resolving all symbolic links. This can be used to look up the current block
-device name regardless of the /dev/disk/ path used to open it.
-.It Fl n
-Displays the configuration that would be used without actually adding the
-.Ar vdev Ns s .
-The actual pool creation can still fail due to insufficient privileges or
-device sharing.
-.It Fl P
-Display real paths for
-.Ar vdev Ns s
-instead of only the last component of the path. This can be used in
-conjunction with the
-.Fl L
-flag.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the given pool properties. See the
-.Sx Properties
-section for a list of valid properties that can be set. The only property
-supported at the moment is ashift.
+Displays the software version of the
+.Nm
+userland utility and the ZFS kernel module.
 .El
-.It Xo
-.Nm
-.Cm attach
-.Op Fl f
-.Oo Fl o Ar property Ns = Ns Ar value Oc
-.Ar pool device new_device
-.Xc
-Attaches
-.Ar new_device
-to the existing
-.Ar device .
-The existing device cannot be part of a raidz configuration.
-If
-.Ar device
-is not currently part of a mirrored configuration,
-.Ar device
-automatically transforms into a two-way mirror of
-.Ar device
-and
-.Ar new_device .
-If
-.Ar device
-is part of a two-way mirror, attaching
-.Ar new_device
-creates a three-way mirror, and so on.
-In either case,
-.Ar new_device
-begins to resilver immediately.
+.
+.Ss Creation
 .Bl -tag -width Ds
-.It Fl f
-Forces use of
-.Ar new_device ,
-even if it appears to be in use.
-Not all devices can be overridden in this manner.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the given pool properties. See the
-.Sx Properties
-section for a list of valid properties that can be set. The only property
-supported at the moment is ashift.
-.El
-.It Xo
-.Nm
-.Cm checkpoint
-.Op Fl d, -discard
-.Ar pool
-.Xc
-Checkpoints the current state of
-.Ar pool
-, which can be later restored by
-.Nm zpool Cm import --rewind-to-checkpoint .
-The existence of a checkpoint in a pool prohibits the following
-.Nm zpool
-commands:
-.Cm remove ,
-.Cm attach ,
-.Cm detach ,
-.Cm split ,
-and
-.Cm reguid .
-In addition, it may break reservation boundaries if the pool lacks free
-space.
-The
-.Nm zpool Cm status
-command indicates the existence of a checkpoint or the progress of discarding a
-checkpoint from a pool.
-The
-.Nm zpool Cm list
-command reports how much space the checkpoint takes from the pool.
-.Bl -tag -width Ds
-.It Fl d, -discard
-Discards an existing checkpoint from
-.Ar pool .
-.El
-.It Xo
-.Nm
-.Cm clear
-.Ar pool
-.Op Ar device
-.Xc
-Clears device errors in a pool.
-If no arguments are specified, all device errors within the pool are cleared.
-If one or more devices is specified, only those errors associated with the
-specified device or devices are cleared.
-If multihost is enabled, and the pool has been suspended, this will not
-resume I/O.  While the pool was suspended, it may have been imported on
-another host, and resuming I/O could result in pool damage.
-.It Xo
-.Nm
-.Cm create
-.Op Fl dfn
-.Op Fl m Ar mountpoint
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Oo Fl o Ar feature@feature Ns = Ns Ar value Oc Ns ...
-.Oo Fl O Ar file-system-property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Op Fl t Ar tname
-.Ar pool vdev Ns ...
-.Xc
+.It Xr zpool-create 8
 Creates a new storage pool containing the virtual devices specified on the
 command line.
-The pool name must begin with a letter, and can only contain
-alphanumeric characters as well as underscore
-.Pq Qq Sy _ ,
-dash
-.Pq Qq Sy \&- ,
-colon
-.Pq Qq Sy \&: ,
-space
-.Pq Qq Sy \&\  ,
-and period
-.Pq Qq Sy \&. .
-The pool names
-.Sy mirror ,
-.Sy raidz ,
-.Sy spare
-and
-.Sy log
-are reserved, as are names beginning with
-.Sy mirror ,
-.Sy raidz ,
-.Sy spare ,
-and the pattern
-.Sy c[0-9] .
-The
-.Ar vdev
-specification is described in the
-.Sx Virtual Devices
-section.
-.Pp
-The command attempts to verify that each device specified is accessible and not
-currently in use by another subsystem.  However this check is not robust enough
-to detect simultaneous attempts to use a new device in different pools, even if
-.Sy multihost
-is
-.Sy enabled.
-The
-administrator must ensure that simultaneous invocations of any combination of
-.Sy zpool replace ,
-.Sy zpool create ,
-.Sy zpool add ,
-or
-.Sy zpool labelclear ,
-do not refer to the same device.  Using the same device in two pools will
-result in pool corruption.
-.Pp
-There are some uses, such as being currently mounted, or specified as the
-dedicated dump device, that prevents a device from ever being used by ZFS.
-Other uses, such as having a preexisting UFS file system, can be overridden with
-the
-.Fl f
-option.
-.Pp
-The command also checks that the replication strategy for the pool is
-consistent.
-An attempt to combine redundant and non-redundant storage in a single pool, or
-to mix disks and files, results in an error unless
-.Fl f
-is specified.
-The use of differently sized devices within a single raidz or mirror group is
-also flagged as an error unless
-.Fl f
-is specified.
-.Pp
-Unless the
-.Fl R
-option is specified, the default mount point is
-.Pa / Ns Ar pool .
-The mount point must not exist or must be empty, or else the root dataset
-cannot be mounted.
-This can be overridden with the
-.Fl m
-option.
-.Pp
-By default all supported features are enabled on the new pool unless the
-.Fl d
-option is specified.
-.Bl -tag -width Ds
-.It Fl d
-Do not enable any features on the new pool.
-Individual features can be enabled by setting their corresponding properties to
-.Sy enabled
-with the
-.Fl o
-option.
-See
-.Xr zpool-features 5
-for details about feature properties.
-.It Fl f
-Forces use of
-.Ar vdev Ns s ,
-even if they appear in use or specify a conflicting replication level.
-Not all devices can be overridden in this manner.
-.It Fl m Ar mountpoint
-Sets the mount point for the root dataset.
-The default mount point is
-.Pa /pool
-or
-.Pa altroot/pool
-if
-.Ar altroot
-is specified.
-The mount point must be an absolute path,
-.Sy legacy ,
-or
-.Sy none .
-For more information on dataset mount points, see
-.Xr zfs 8 .
-.It Fl n
-Displays the configuration that would be used without actually creating the
-pool.
-The actual pool creation can still fail due to insufficient privileges or
-device sharing.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the given pool properties.
-See the
-.Sx Properties
-section for a list of valid properties that can be set.
-.It Fl o Ar feature@feature Ns = Ns Ar value
-Sets the given pool feature. See the
-.Xr zpool-features 5
-section for a list of valid features that can be set.
-Value can be either disabled or enabled.
-.It Fl O Ar file-system-property Ns = Ns Ar value
-Sets the given file system properties in the root file system of the pool.
-See the
-.Sx Properties
-section of
-.Xr zfs 8
-for a list of valid properties that can be set.
-.It Fl R Ar root
-Equivalent to
-.Fl o Sy cachefile Ns = Ns Sy none Fl o Sy altroot Ns = Ns Ar root
-.It Fl t Ar tname
-Sets the in-core pool name to
-.Sy tname
-while the on-disk name will be the name specified as the pool name
-.Sy pool .
-This will set the default cachefile property to none. This is intended
-to handle name space collisions when creating pools for other systems,
-such as virtual machines or physical machines whose pools live on network
-block devices.
+.It Xr zpool-initialize 8
+Begins initializing by writing to all unallocated regions on the specified
+devices, or all eligible devices in the pool if no individual devices are
+specified.
 .El
-.It Xo
-.Nm
-.Cm destroy
-.Op Fl f
-.Ar pool
-.Xc
+.
+.Ss Destruction
+.Bl -tag -width Ds
+.It Xr zpool-destroy 8
 Destroys the given pool, freeing up any devices for other use.
-This command tries to unmount any active datasets before destroying the pool.
-.Bl -tag -width Ds
-.It Fl f
-Forces any active datasets contained within the pool to be unmounted.
+.It Xr zpool-labelclear 8
+Removes ZFS label information from the specified
+.Ar device .
 .El
-.It Xo
-.Nm
-.Cm detach
-.Ar pool device
-.Xc
-Detaches
-.Ar device
-from a mirror.
-The operation is refused if there are no other valid replicas of the data.
-If device may be re-added to the pool later on then consider the
-.Sy zpool offline
-command instead.
-.It Xo
-.Nm
-.Cm events
-.Op Fl vHf Oo Ar pool Oc | Fl c
-.Xc
-Lists all recent events generated by the ZFS kernel modules.  These events
-are consumed by the
-.Xr zed 8
-and used to automate administrative tasks such as replacing a failed device
-with a hot spare. For more information about the subclasses and event payloads
-that can be generated see the
-.Xr zfs-events 5
-man page.
+.
+.Ss Virtual Devices
 .Bl -tag -width Ds
-.It Fl c
-Clear all previous events.
-.It Fl f
-Follow mode.
-.It Fl H
-Scripted mode. Do not display headers, and separate fields by a
-single tab instead of arbitrary space.
-.It Fl v
-Print the entire payload for each event.
-.El
 .It Xo
-.Nm
-.Cm export
-.Op Fl a
-.Op Fl f
-.Ar pool Ns ...
+.Xr zpool-attach 8 Ns / Ns Xr zpool-detach 8
 .Xc
-Exports the given pools from the system.
-All devices are marked as exported, but are still considered in use by other
-subsystems.
-The devices can be moved between systems
-.Pq even those of different endianness
-and imported as long as a sufficient number of devices are present.
-.Pp
-Before exporting the pool, all datasets within the pool are unmounted.
-A pool can not be exported if it has a shared spare that is currently being
-used.
-.Pp
-For pools to be portable, you must give the
-.Nm
-command whole disks, not just partitions, so that ZFS can label the disks with
-portable EFI labels.
-Otherwise, disk drivers on platforms of different endianness will not recognize
-the disks.
-.Bl -tag -width Ds
-.It Fl a
-Exports all pools imported on the system.
-.It Fl f
-Forcefully unmount all datasets, using the
-.Nm unmount Fl f
-command.
-.Pp
-This command will forcefully export the pool even if it has a shared spare that
-is currently being used.
-This may lead to potential data corruption.
-.El
+Increases or decreases redundancy by
+.Cm attach Ns ing or
+.Cm detach Ns ing a device on an existing vdev (virtual device).
 .It Xo
-.Nm
-.Cm get
-.Op Fl Hp
-.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
-.Sy all Ns | Ns Ar property Ns Oo , Ns Ar property Oc Ns ...
-.Oo Ar pool Oc Ns ...
+.Xr zpool-add 8 Ns / Ns Xr zpool-remove 8
+.Xc
+Adds the specified virtual devices to the given pool,
+or removes the specified device from the pool.
+.It Xr zpool-replace 8
+Replaces an existing device (which may be faulted) with a new one.
+.It Xr zpool-split 8
+Creates a new pool by splitting all mirrors in an existing pool (which decreases its redundancy).
+.El
+.
+.Ss Properties
+Available pool properties listed in the
+.Xr zpoolprops 7
+manual page.
+.Bl -tag -width Ds
+.It Xr zpool-list 8
+Lists the given pools along with a health status and space usage.
+.It Xo
+.Xr zpool-get 8 Ns / Ns Xr zpool-set 8
 .Xc
 Retrieves the given list of properties
 .Po
@@ -1336,1164 +141,86 @@
 is used
 .Pc
 for the specified storage pool(s).
-These properties are displayed with the following fields:
-.Bd -literal
-        name          Name of storage pool
-        property      Property name
-        value         Property value
-        source        Property source, either 'default' or 'local'.
-.Ed
-.Pp
-See the
-.Sx Properties
-section for more information on the available pool properties.
-.Bl -tag -width Ds
-.It Fl H
-Scripted mode.
-Do not display headers, and separate fields by a single tab instead of arbitrary
-space.
-.It Fl o Ar field
-A comma-separated list of columns to display.
-.Sy name Ns \&, Ns Sy property Ns \&, Ns Sy value Ns \&, Ns Sy source
-is the default value.
-.It Fl p
-Display numbers in parsable (exact) values.
 .El
-.It Xo
-.Nm
-.Cm history
-.Op Fl il
-.Oo Ar pool Oc Ns ...
-.Xc
-Displays the command history of the specified pool(s) or all pools if no pool is
-specified.
+.
+.Ss Monitoring
 .Bl -tag -width Ds
-.It Fl i
-Displays internally logged ZFS events in addition to user initiated events.
-.It Fl l
-Displays log records in long format, which in addition to standard format
-includes, the user name, the hostname, and the zone in which the operation was
-performed.
-.El
-.It Xo
-.Nm
-.Cm import
-.Op Fl D
-.Op Fl d Ar dir Ns | Ns device
-.Xc
-Lists pools available to import.
-If the
-.Fl d
-option is not specified, this command searches for devices in
-.Pa /dev .
-The
-.Fl d
-option can be specified multiple times, and all directories are searched.
-If the device appears to be part of an exported pool, this command displays a
-summary of the pool with the name of the pool, a numeric identifier, as well as
-the vdev layout and current health of the device for each device or file.
-Destroyed pools, pools that were previously destroyed with the
-.Nm zpool Cm destroy
-command, are not listed unless the
-.Fl D
-option is specified.
-.Pp
-The numeric identifier is unique, and can be used instead of the pool name when
-multiple exported pools of the same name are available.
-.Bl -tag -width Ds
-.It Fl c Ar cachefile
-Reads configuration from the given
-.Ar cachefile
-that was created with the
-.Sy cachefile
-pool property.
-This
-.Ar cachefile
-is used instead of searching for devices.
-.It Fl d Ar dir Ns | Ns Ar device
-Uses
-.Ar device
-or searches for devices or files in
-.Ar dir .
-The
-.Fl d
-option can be specified multiple times.
-.It Fl D
-Lists destroyed pools only.
-.El
-.It Xo
-.Nm
-.Cm import
-.Fl a
-.Op Fl DflmN
-.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
-.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns device
-.Op Fl o Ar mntopts
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Op Fl s
-.Xc
-Imports all pools found in the search directories.
-Identical to the previous command, except that all pools with a sufficient
-number of devices available are imported.
-Destroyed pools, pools that were previously destroyed with the
-.Nm zpool Cm destroy
-command, will not be imported unless the
-.Fl D
-option is specified.
-.Bl -tag -width Ds
-.It Fl a
-Searches for and imports all pools found.
-.It Fl c Ar cachefile
-Reads configuration from the given
-.Ar cachefile
-that was created with the
-.Sy cachefile
-pool property.
-This
-.Ar cachefile
-is used instead of searching for devices.
-.It Fl d Ar dir Ns | Ns Ar device
-Uses
-.Ar device
-or searches for devices or files in
-.Ar dir .
-The
-.Fl d
-option can be specified multiple times.
-This option is incompatible with the
-.Fl c
-option.
-.It Fl D
-Imports destroyed pools only.
-The
-.Fl f
-option is also required.
-.It Fl f
-Forces import, even if the pool appears to be potentially active.
-.It Fl F
-Recovery mode for a non-importable pool.
-Attempt to return the pool to an importable state by discarding the last few
-transactions.
-Not all damaged pools can be recovered by using this option.
-If successful, the data from the discarded transactions is irretrievably lost.
-This option is ignored if the pool is importable or already imported.
-.It Fl l
-Indicates that this command will request encryption keys for all encrypted
-datasets it attempts to mount as it is bringing the pool online. Note that if
-any datasets have a
-.Sy keylocation
-of
-.Sy prompt
-this command will block waiting for the keys to be entered. Without this flag
-encrypted datasets will be left unavailable until the keys are loaded.
-.It Fl m
-Allows a pool to import when there is a missing log device.
-Recent transactions can be lost because the log device will be discarded.
-.It Fl n
-Used with the
-.Fl F
-recovery option.
-Determines whether a non-importable pool can be made importable again, but does
-not actually perform the pool recovery.
-For more details about pool recovery mode, see the
-.Fl F
-option, above.
-.It Fl N
-Import the pool without mounting any file systems.
-.It Fl o Ar mntopts
-Comma-separated list of mount options to use when mounting datasets within the
-pool.
-See
-.Xr zfs 8
-for a description of dataset properties and mount options.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property on the imported pool.
-See the
-.Sx Properties
-section for more information on the available pool properties.
-.It Fl R Ar root
-Sets the
-.Sy cachefile
-property to
-.Sy none
-and the
-.Sy altroot
-property to
-.Ar root .
-.It Fl -rewind-to-checkpoint
-Rewinds pool to the checkpointed state.
-Once the pool is imported with this flag there is no way to undo the rewind.
-All changes and data that were written after the checkpoint are lost!
-The only exception is when the
-.Sy readonly
-mounting option is enabled.
-In this case, the checkpointed state of the pool is opened and an
-administrator can see how the pool would look like if they were
-to fully rewind.
-.It Fl s
-Scan using the default search path, the libblkid cache will not be
-consulted. A custom search path may be specified by setting the
-ZPOOL_IMPORT_PATH environment variable.
-.It Fl X
-Used with the
-.Fl F
-recovery option. Determines whether extreme
-measures to find a valid txg should take place. This allows the pool to
-be rolled back to a txg which is no longer guaranteed to be consistent.
-Pools imported at an inconsistent txg may contain uncorrectable
-checksum errors. For more details about pool recovery mode, see the
-.Fl F
-option, above. WARNING: This option can be extremely hazardous to the
-health of your pool and should only be used as a last resort.
-.It Fl T
-Specify the txg to use for rollback. Implies
-.Fl FX .
-For more details
-about pool recovery mode, see the
-.Fl X
-option, above. WARNING: This option can be extremely hazardous to the
-health of your pool and should only be used as a last resort.
-.El
-.It Xo
-.Nm
-.Cm import
-.Op Fl Dflm
-.Op Fl F Oo Fl n Oc Oo Fl t Oc Oo Fl T Oc Oo Fl X Oc
-.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns device
-.Op Fl o Ar mntopts
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Op Fl s
-.Ar pool Ns | Ns Ar id
-.Op Ar newpool
-.Xc
-Imports a specific pool.
-A pool can be identified by its name or the numeric identifier.
-If
-.Ar newpool
-is specified, the pool is imported using the name
-.Ar newpool .
-Otherwise, it is imported with the same name as its exported name.
-.Pp
-If a device is removed from a system without running
-.Nm zpool Cm export
-first, the device appears as potentially active.
-It cannot be determined if this was a failed export, or whether the device is
-really in use from another host.
-To import a pool in this state, the
-.Fl f
-option is required.
-.Bl -tag -width Ds
-.It Fl c Ar cachefile
-Reads configuration from the given
-.Ar cachefile
-that was created with the
-.Sy cachefile
-pool property.
-This
-.Ar cachefile
-is used instead of searching for devices.
-.It Fl d Ar dir Ns | Ns Ar device
-Uses
-.Ar device
-or searches for devices or files in
-.Ar dir .
-The
-.Fl d
-option can be specified multiple times.
-This option is incompatible with the
-.Fl c
-option.
-.It Fl D
-Imports destroyed pool.
-The
-.Fl f
-option is also required.
-.It Fl f
-Forces import, even if the pool appears to be potentially active.
-.It Fl F
-Recovery mode for a non-importable pool.
-Attempt to return the pool to an importable state by discarding the last few
-transactions.
-Not all damaged pools can be recovered by using this option.
-If successful, the data from the discarded transactions is irretrievably lost.
-This option is ignored if the pool is importable or already imported.
-.It Fl l
-Indicates that this command will request encryption keys for all encrypted
-datasets it attempts to mount as it is bringing the pool online. Note that if
-any datasets have a
-.Sy keylocation
-of
-.Sy prompt
-this command will block waiting for the keys to be entered. Without this flag
-encrypted datasets will be left unavailable until the keys are loaded.
-.It Fl m
-Allows a pool to import when there is a missing log device.
-Recent transactions can be lost because the log device will be discarded.
-.It Fl n
-Used with the
-.Fl F
-recovery option.
-Determines whether a non-importable pool can be made importable again, but does
-not actually perform the pool recovery.
-For more details about pool recovery mode, see the
-.Fl F
-option, above.
-.It Fl o Ar mntopts
-Comma-separated list of mount options to use when mounting datasets within the
-pool.
-See
-.Xr zfs 8
-for a description of dataset properties and mount options.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property on the imported pool.
-See the
-.Sx Properties
-section for more information on the available pool properties.
-.It Fl R Ar root
-Sets the
-.Sy cachefile
-property to
-.Sy none
-and the
-.Sy altroot
-property to
-.Ar root .
-.It Fl s
-Scan using the default search path, the libblkid cache will not be
-consulted. A custom search path may be specified by setting the
-ZPOOL_IMPORT_PATH environment variable.
-.It Fl X
-Used with the
-.Fl F
-recovery option. Determines whether extreme
-measures to find a valid txg should take place. This allows the pool to
-be rolled back to a txg which is no longer guaranteed to be consistent.
-Pools imported at an inconsistent txg may contain uncorrectable
-checksum errors. For more details about pool recovery mode, see the
-.Fl F
-option, above. WARNING: This option can be extremely hazardous to the
-health of your pool and should only be used as a last resort.
-.It Fl T
-Specify the txg to use for rollback. Implies
-.Fl FX .
-For more details
-about pool recovery mode, see the
-.Fl X
-option, above. WARNING: This option can be extremely hazardous to the
-health of your pool and should only be used as a last resort.
-.It Fl t
-Used with
-.Sy newpool .
-Specifies that
-.Sy newpool
-is temporary. Temporary pool names last until export. Ensures that
-the original pool name will be used in all label updates and therefore
-is retained upon export.
-Will also set -o cachefile=none when not explicitly specified.
-.El
-.It Xo
-.Nm
-.Cm initialize
-.Op Fl c | Fl s
-.Ar pool
-.Op Ar device Ns ...
-.Xc
-Begins initializing by writing to all unallocated regions on the specified
-devices, or all eligible devices in the pool if no individual devices are
-specified.
-Only leaf data or log devices may be initialized.
-.Bl -tag -width Ds
-.It Fl c, -cancel
-Cancel initializing on the specified devices, or all eligible devices if none
-are specified.
-If one or more target devices are invalid or are not currently being
-initialized, the command will fail and no cancellation will occur on any device.
-.It Fl s -suspend
-Suspend initializing on the specified devices, or all eligible devices if none
-are specified.
-If one or more target devices are invalid or are not currently being
-initialized, the command will fail and no suspension will occur on any device.
-Initializing can then be resumed by running
-.Nm zpool Cm initialize
-with no flags on the relevant target devices.
-.El
-.It Xo
-.Nm
-.Cm iostat
-.Op Oo Oo Fl c Ar SCRIPT Oc Oo Fl lq Oc Oc Ns | Ns Fl rw
-.Op Fl T Sy u Ns | Ns Sy d
-.Op Fl ghHLnpPvy
-.Oo Oo Ar pool Ns ... Oc Ns | Ns Oo Ar pool vdev Ns ... Oc Ns | Ns Oo Ar vdev Ns ... Oc Oc
-.Op Ar interval Op Ar count
-.Xc
+.It Xr zpool-status 8
+Displays the detailed health status for the given pools.
+.It Xr zpool-iostat 8
 Displays logical I/O statistics for the given pools/vdevs. Physical I/Os may
 be observed via
 .Xr iostat 1 .
-If writes are located nearby, they may be merged into a single
-larger operation. Additional I/O may be generated depending on the level of
-vdev redundancy.
-To filter output, you may pass in a list of pools, a pool and list of vdevs
-in that pool, or a list of any vdevs from any pool. If no items are specified,
-statistics for every pool in the system are shown.
-When given an
-.Ar interval ,
-the statistics are printed every
-.Ar interval
-seconds until ^C is pressed. If 
-.Fl n
-flag is specified the headers are displayed only once, otherwise they are 
-displayed periodically. If count is specified, the command exits
-after count reports are printed. The first report printed is always
-the statistics since boot regardless of whether
-.Ar interval
-and
-.Ar count
-are passed. However, this behavior can be suppressed with the
-.Fl y
-flag. Also note that the units of
-.Sy K ,
-.Sy M ,
-.Sy G ...
-that are printed in the report are in base 1024. To get the raw
-values, use the
-.Fl p
-flag.
+.It Xr zpool-events 8
+Lists all recent events generated by the ZFS kernel modules.
+These events are consumed by the
+.Xr zed 8
+and used to automate administrative tasks such as replacing a failed device
+with a hot spare.
+That manual page also describes the subclasses and event payloads
+that can be generated.
+.It Xr zpool-history 8
+Displays the command history of the specified pool(s) or all pools if no pool is
+specified.
+.El
+.
+.Ss Maintenance
 .Bl -tag -width Ds
-.It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns ...
-Run a script (or scripts) on each vdev and include the output as a new column
-in the
-.Nm zpool Cm iostat
-output. Users can run any script found in their
-.Pa ~/.zpool.d
-directory or from the system
-.Pa /etc/zfs/zpool.d
-directory. Script names containing the slash (/) character are not allowed.
-The default search path can be overridden by setting the
-ZPOOL_SCRIPTS_PATH environment variable. A privileged user can run
-.Fl c
-if they have the ZPOOL_SCRIPTS_AS_ROOT
-environment variable set. If a script requires the use of a privileged
-command, like
-.Xr smartctl 8 ,
-then it's recommended you allow the user access to it in
-.Pa /etc/sudoers
-or add the user to the
-.Pa /etc/sudoers.d/zfs
-file.
-.Pp
-If
-.Fl c
-is passed without a script name, it prints a list of all scripts.
-.Fl c
-also sets verbose mode
-.No \&( Ns Fl v Ns No \&).
-.Pp
-Script output should be in the form of "name=value". The column name is
-set to "name" and the value is set to "value". Multiple lines can be
-used to output multiple columns. The first line of output not in the
-"name=value" format is displayed without a column title, and no more
-output after that is displayed. This can be useful for printing error
-messages. Blank or NULL values are printed as a '-' to make output
-awk-able.
-.Pp
-The following environment variables are set before running each script:
-.Bl -tag -width "VDEV_PATH"
-.It Sy VDEV_PATH
-Full path to the vdev
-.El
-.Bl -tag -width "VDEV_UPATH"
-.It Sy VDEV_UPATH
-Underlying path to the vdev (/dev/sd*).  For use with device mapper,
-multipath, or partitioned vdevs.
-.El
-.Bl -tag -width "VDEV_ENC_SYSFS_PATH"
-.It Sy VDEV_ENC_SYSFS_PATH
-The sysfs path to the enclosure for the vdev (if any).
-.El
-.It Fl T Sy u Ns | Ns Sy d
-Display a time stamp.
-Specify
-.Sy u
-for a printed representation of the internal representation of time.
-See
-.Xr time 2 .
-Specify
-.Sy d
-for standard date format.
-See
-.Xr date 1 .
-.It Fl g
-Display vdev GUIDs instead of the normal device names. These GUIDs
-can be used in place of device names for the zpool
-detach/offline/remove/replace commands.
-.It Fl H
-Scripted mode. Do not display headers, and separate fields by a
-single tab instead of arbitrary space.
-.It Fl L
-Display real paths for vdevs resolving all symbolic links. This can
-be used to look up the current block device name regardless of the
-.Pa /dev/disk/
-path used to open it.
-.It Fl n
-Print headers only once when passed
-.It Fl p
-Display numbers in parsable (exact) values. Time values are in
-nanoseconds.
-.It Fl P
-Display full paths for vdevs instead of only the last component of
-the path. This can be used in conjunction with the
-.Fl L
-flag.
-.It Fl r
-Print request size histograms for the leaf vdev's IO. This includes
-histograms of individual IOs (ind) and aggregate IOs (agg). These stats
-can be useful for observing how well IO aggregation is working.  Note
-that TRIM IOs may exceed 16M, but will be counted as 16M.
-.It Fl v
-Verbose statistics Reports usage statistics for individual vdevs within the
-pool, in addition to the pool-wide statistics.
-.It Fl y
-Omit statistics since boot.
-Normally the first line of output reports the statistics since boot.
-This option suppresses that first line of output.
-.Ar interval
-.It Fl w
-Display latency histograms:
-.Pp
-.Ar total_wait :
-Total IO time (queuing + disk IO time).
-.Ar disk_wait :
-Disk IO time (time reading/writing the disk).
-.Ar syncq_wait :
-Amount of time IO spent in synchronous priority queues.  Does not include
-disk time.
-.Ar asyncq_wait :
-Amount of time IO spent in asynchronous priority queues.  Does not include
-disk time.
-.Ar scrub :
-Amount of time IO spent in scrub queue. Does not include disk time.
-.It Fl l
-Include average latency statistics:
-.Pp
-.Ar total_wait :
-Average total IO time (queuing + disk IO time).
-.Ar disk_wait :
-Average disk IO time (time reading/writing the disk).
-.Ar syncq_wait :
-Average amount of time IO spent in synchronous priority queues. Does
-not include disk time.
-.Ar asyncq_wait :
-Average amount of time IO spent in asynchronous priority queues.
-Does not include disk time.
-.Ar scrub :
-Average queuing time in scrub queue. Does not include disk time.
-.Ar trim :
-Average queuing time in trim queue. Does not include disk time.
-.It Fl q
-Include active queue statistics. Each priority queue has both
-pending (
-.Ar pend )
-and active (
-.Ar activ )
-IOs. Pending IOs are waiting to
-be issued to the disk, and active IOs have been issued to disk and are
-waiting for completion. These stats are broken out by priority queue:
-.Pp
-.Ar syncq_read/write :
-Current number of entries in synchronous priority
-queues.
-.Ar asyncq_read/write :
-Current number of entries in asynchronous priority queues.
-.Ar scrubq_read :
-Current number of entries in scrub queue.
-.Ar trimq_write :
-Current number of entries in trim queue.
-.Pp
-All queue statistics are instantaneous measurements of the number of
-entries in the queues. If you specify an interval, the measurements
-will be sampled from the end of the interval.
-.El
-.It Xo
-.Nm
-.Cm labelclear
-.Op Fl f
-.Ar device
-.Xc
-Removes ZFS label information from the specified
-.Ar device .
-The
-.Ar device
-must not be part of an active pool configuration.
-.Bl -tag -width Ds
-.It Fl f
-Treat exported or foreign devices as inactive.
-.El
-.It Xo
-.Nm
-.Cm list
-.Op Fl HgLpPv
-.Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ...
-.Op Fl T Sy u Ns | Ns Sy d
-.Oo Ar pool Oc Ns ...
-.Op Ar interval Op Ar count
-.Xc
-Lists the given pools along with a health status and space usage.
-If no
-.Ar pool Ns s
-are specified, all pools in the system are listed.
-When given an
-.Ar interval ,
-the information is printed every
-.Ar interval
-seconds until ^C is pressed.
-If
-.Ar count
-is specified, the command exits after
-.Ar count
-reports are printed.
-.Bl -tag -width Ds
-.It Fl g
-Display vdev GUIDs instead of the normal device names. These GUIDs
-can be used in place of device names for the zpool
-detach/offline/remove/replace commands.
-.It Fl H
-Scripted mode.
-Do not display headers, and separate fields by a single tab instead of arbitrary
-space.
-.It Fl o Ar property
-Comma-separated list of properties to display.
-See the
-.Sx Properties
-section for a list of valid properties.
-The default list is
-.Cm name , size , allocated , free , checkpoint, expandsize , fragmentation ,
-.Cm capacity , dedupratio , health , altroot .
-.It Fl L
-Display real paths for vdevs resolving all symbolic links. This can
-be used to look up the current block device name regardless of the
-/dev/disk/ path used to open it.
-.It Fl p
-Display numbers in parsable
-.Pq exact
-values.
-.It Fl P
-Display full paths for vdevs instead of only the last component of
-the path. This can be used in conjunction with the
-.Fl L
-flag.
-.It Fl T Sy u Ns | Ns Sy d
-Display a time stamp.
-Specify
-.Sy u
-for a printed representation of the internal representation of time.
-See
-.Xr time 2 .
-Specify
-.Sy d
-for standard date format.
-See
-.Xr date 1 .
-.It Fl v
-Verbose statistics.
-Reports usage statistics for individual vdevs within the pool, in addition to
-the pool-wise statistics.
-.El
-.It Xo
-.Nm
-.Cm offline
-.Op Fl f
-.Op Fl t
-.Ar pool Ar device Ns ...
-.Xc
-Takes the specified physical device offline.
-While the
-.Ar device
-is offline, no attempt is made to read or write to the device.
-This command is not applicable to spares.
-.Bl -tag -width Ds
-.It Fl f
-Force fault. Instead of offlining the disk, put it into a faulted
-state. The fault will persist across imports unless the
-.Fl t
-flag was specified.
-.It Fl t
-Temporary.
-Upon reboot, the specified physical device reverts to its previous state.
-.El
-.It Xo
-.Nm
-.Cm online
-.Op Fl e
-.Ar pool Ar device Ns ...
-.Xc
-Brings the specified physical device online.
-This command is not applicable to spares.
-.Bl -tag -width Ds
-.It Fl e
-Expand the device to use all available space.
-If the device is part of a mirror or raidz then all devices must be expanded
-before the new space will become available to the pool.
-.El
-.It Xo
-.Nm
-.Cm reguid
-.Ar pool
-.Xc
-Generates a new unique identifier for the pool.
-You must ensure that all devices in this pool are online and healthy before
-performing this action.
-.It Xo
-.Nm
-.Cm reopen
-.Op Fl n
-.Ar pool
-.Xc
-Reopen all the vdevs associated with the pool.
-.Bl -tag -width Ds
-.It Fl n
-Do not restart an in-progress scrub operation. This is not recommended and can
-result in partially resilvered devices unless a second scrub is performed.
-.El
-.It Xo
-.Nm
-.Cm remove
-.Op Fl np
-.Ar pool Ar device Ns ...
-.Xc
-Removes the specified device from the pool.
-This command supports removing hot spare, cache, log, and both mirrored and
-non-redundant primary top-level vdevs, including dedup and special vdevs.
-When the primary pool storage includes a top-level raidz vdev only hot spare,
-cache, and log devices can be removed.
-.sp
-Removing a top-level vdev reduces the total amount of space in the storage pool.
-The specified device will be evacuated by copying all allocated space from it to
-the other devices in the pool.
-In this case, the
-.Nm zpool Cm remove
-command initiates the removal and returns, while the evacuation continues in
-the background.
-The removal progress can be monitored with
-.Nm zpool Cm status .
-If an IO error is encountered during the removal process it will be
-cancelled. The
-.Sy device_removal
-feature flag must be enabled to remove a top-level vdev, see
-.Xr zpool-features 5 .
-.Pp
-A mirrored top-level device (log or data) can be removed by specifying the top-level mirror for the
-same.
-Non-log devices or data devices that are part of a mirrored configuration can be removed using
-the
-.Nm zpool Cm detach
-command.
-.Bl -tag -width Ds
-.It Fl n
-Do not actually perform the removal ("no-op").
-Instead, print the estimated amount of memory that will be used by the
-mapping table after the removal completes.
-This is nonzero only for top-level vdevs.
-.El
-.Bl -tag -width Ds
-.It Fl p
-Used in conjunction with the
-.Fl n
-flag, displays numbers as parsable (exact) values.
-.El
-.It Xo
-.Nm
-.Cm remove
-.Fl s
-.Ar pool
-.Xc
-Stops and cancels an in-progress removal of a top-level vdev.
-.It Xo
-.Nm
-.Cm replace
-.Op Fl f
-.Op Fl o Ar property Ns = Ns Ar value
-.Ar pool Ar device Op Ar new_device
-.Xc
-Replaces
-.Ar old_device
-with
-.Ar new_device .
-This is equivalent to attaching
-.Ar new_device ,
-waiting for it to resilver, and then detaching
-.Ar old_device .
-.Pp
-The size of
-.Ar new_device
-must be greater than or equal to the minimum size of all the devices in a mirror
-or raidz configuration.
-.Pp
-.Ar new_device
-is required if the pool is not redundant.
-If
-.Ar new_device
-is not specified, it defaults to
-.Ar old_device .
-This form of replacement is useful after an existing disk has failed and has
-been physically replaced.
-In this case, the new disk may have the same
-.Pa /dev
-path as the old device, even though it is actually a different disk.
-ZFS recognizes this.
-.Bl -tag -width Ds
-.It Fl f
-Forces use of
-.Ar new_device ,
-even if it appears to be in use.
-Not all devices can be overridden in this manner.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the given pool properties. See the
-.Sx Properties
-section for a list of valid properties that can be set.
-The only property supported at the moment is
-.Sy ashift .
-.El
-.It Xo
-.Nm
-.Cm scrub
-.Op Fl s | Fl p
-.Ar pool Ns ...
-.Xc
+.It Xr zpool-scrub 8
 Begins a scrub or resumes a paused scrub.
-The scrub examines all data in the specified pools to verify that it checksums
-correctly.
-For replicated
-.Pq mirror or raidz
-devices, ZFS automatically repairs any damage discovered during the scrub.
-The
-.Nm zpool Cm status
-command reports the progress of the scrub and summarizes the results of the
-scrub upon completion.
-.Pp
-Scrubbing and resilvering are very similar operations.
-The difference is that resilvering only examines data that ZFS knows to be out
-of date
-.Po
-for example, when attaching a new device to a mirror or replacing an existing
-device
-.Pc ,
-whereas scrubbing examines all data to discover silent errors due to hardware
-faults or disk failure.
-.Pp
-Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows
-one at a time.
-If a scrub is paused, the
-.Nm zpool Cm scrub
-resumes it.
-If a resilver is in progress, ZFS does not allow a scrub to be started until the
-resilver completes.
-.Pp
-Note that, due to changes in pool data on a live system, it is possible for
-scrubs to progress slightly beyond 100% completion. During this period, no
-completion time estimate will be provided.
-.Bl -tag -width Ds
-.It Fl s
-Stop scrubbing.
-.El
-.Bl -tag -width Ds
-.It Fl p
-Pause scrubbing.
-Scrub pause state and progress are periodically synced to disk.
-If the system is restarted or pool is exported during a paused scrub,
-even after import, scrub will remain paused until it is resumed.
-Once resumed the scrub will pick up from the place where it was last
-checkpointed to disk.
-To resume a paused scrub issue
-.Nm zpool Cm scrub
-again.
-.El
-.It Xo
-.Nm
-.Cm resilver
-.Ar pool Ns ...
-.Xc
-Starts a resilver. If an existing resilver is already running it will be
-restarted from the beginning. Any drives that were scheduled for a deferred
-resilver will be added to the new one. This requires the
-.Sy resilver_defer
-feature.
-.It Xo
-.Nm
-.Cm trim
-.Op Fl d
-.Op Fl c | Fl s
-.Ar pool
-.Op Ar device Ns ...
-.Xc
-Initiates an immediate on-demand TRIM operation for all of the free space in
-a pool.  This operation informs the underlying storage devices of all blocks
+.It Xr zpool-checkpoint 8
+Checkpoints the current state of
+.Ar pool ,
+which can be later restored by
+.Nm zpool Cm import Fl -rewind-to-checkpoint .
+.It Xr zpool-trim 8
+Initiates an immediate on-demand TRIM operation for all of the free space in a pool.
+This operation informs the underlying storage devices of all blocks
 in the pool which are no longer allocated and allows thinly provisioned
 devices to reclaim the space.
-.Pp
-A manual on-demand TRIM operation can be initiated irrespective of the
-.Sy autotrim
-pool property setting.  See the documentation for the
-.Sy autotrim
-property above for the types of vdev devices which can be trimmed.
-.Bl -tag -width Ds
-.It Fl d -secure
-Causes a secure TRIM to be initiated.  When performing a secure TRIM, the
-device guarantees that data stored on the trimmed blocks has been erased.
-This requires support from the device and is not supported by all SSDs.
-.It Fl r -rate Ar rate
-Controls the rate at which the TRIM operation progresses.  Without this
-option TRIM is executed as quickly as possible. The rate, expressed in bytes
-per second, is applied on a per-vdev basis and may be set differently for
-each leaf vdev.
-.It Fl c, -cancel
-Cancel trimming on the specified devices, or all eligible devices if none
-are specified.
-If one or more target devices are invalid or are not currently being
-trimmed, the command will fail and no cancellation will occur on any device.
-.It Fl s -suspend
-Suspend trimming on the specified devices, or all eligible devices if none
-are specified.
-If one or more target devices are invalid or are not currently being
-trimmed, the command will fail and no suspension will occur on any device.
-Trimming can then be resumed by running
-.Nm zpool Cm trim
-with no flags on the relevant target devices.
-.El
-.It Xo
-.Nm
-.Cm set
-.Ar property Ns = Ns Ar value
-.Ar pool
-.Xc
-Sets the given property on the specified pool.
-See the
-.Sx Properties
-section for more information on what properties can be set and acceptable
-values.
-.It Xo
-.Nm
-.Cm split
-.Op Fl gLlnP
-.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
-.Op Fl R Ar root
-.Ar pool newpool
-.Op Ar device ...
-.Xc
-Splits devices off
-.Ar pool
-creating
-.Ar newpool .
-All vdevs in
-.Ar pool
-must be mirrors and the pool must not be in the process of resilvering.
-At the time of the split,
-.Ar newpool
-will be a replica of
-.Ar pool .
-By default, the
-last device in each mirror is split from
-.Ar pool
-to create
-.Ar newpool .
-.Pp
-The optional device specification causes the specified device(s) to be
-included in the new
-.Ar pool
-and, should any devices remain unspecified,
-the last device in each mirror is used as would be by default.
-.Bl -tag -width Ds
-.It Fl g
-Display vdev GUIDs instead of the normal device names. These GUIDs
-can be used in place of device names for the zpool
-detach/offline/remove/replace commands.
-.It Fl L
-Display real paths for vdevs resolving all symbolic links. This can
-be used to look up the current block device name regardless of the
-.Pa /dev/disk/
-path used to open it.
-.It Fl l
-Indicates that this command will request encryption keys for all encrypted
-datasets it attempts to mount as it is bringing the new pool online. Note that
-if any datasets have a
-.Sy keylocation
-of
-.Sy prompt
-this command will block waiting for the keys to be entered. Without this flag
-encrypted datasets will be left unavailable until the keys are loaded.
-.It Fl n
-Do dry run, do not actually perform the split.
-Print out the expected configuration of
-.Ar newpool .
-.It Fl P
-Display full paths for vdevs instead of only the last component of
-the path. This can be used in conjunction with the
-.Fl L
-flag.
-.It Fl o Ar property Ns = Ns Ar value
-Sets the specified property for
-.Ar newpool .
-See the
-.Sx Properties
-section for more information on the available pool properties.
-.It Fl R Ar root
-Set
-.Sy altroot
-for
-.Ar newpool
-to
-.Ar root
-and automatically import it.
-.El
-.It Xo
-.Nm
-.Cm status
-.Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns ...
-.Op Fl DigLpPstvx
-.Op Fl T Sy u Ns | Ns Sy d
-.Oo Ar pool Oc Ns ...
-.Op Ar interval Op Ar count
-.Xc
-Displays the detailed health status for the given pools.
-If no
-.Ar pool
-is specified, then the status of each pool in the system is displayed.
-For more information on pool and device health, see the
-.Sx Device Failure and Recovery
-section.
-.Pp
-If a scrub or resilver is in progress, this command reports the percentage done
-and the estimated time to completion.
-Both of these are only approximate, because the amount of data in the pool and
-the other workloads on the system can change.
-.Bl -tag -width Ds
-.It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns ...
-Run a script (or scripts) on each vdev and include the output as a new column
-in the
-.Nm zpool Cm status
-output.  See the
-.Fl c
-option of
-.Nm zpool Cm iostat
-for complete details.
-.It Fl i
-Display vdev initialization status.
-.It Fl g
-Display vdev GUIDs instead of the normal device names. These GUIDs
-can be used in place of device names for the zpool
-detach/offline/remove/replace commands.
-.It Fl L
-Display real paths for vdevs resolving all symbolic links. This can
-be used to look up the current block device name regardless of the
-.Pa /dev/disk/
-path used to open it.
-.It Fl p
-Display numbers in parsable (exact) values.
-.It Fl P
-Display full paths for vdevs instead of only the last component of
-the path. This can be used in conjunction with the
-.Fl L
-flag.
-.It Fl D
-Display a histogram of deduplication statistics, showing the allocated
-.Pq physically present on disk
-and referenced
-.Pq logically referenced in the pool
-block counts and sizes by reference count.
-.It Fl s
-Display the number of leaf VDEV slow IOs.  This is the number of IOs that
-didn't complete in \fBzio_slow_io_ms\fR milliseconds (default 30 seconds).
-This does not necessarily mean the IOs failed to complete, just took an
-unreasonably long amount of time.  This may indicate a problem with the
-underlying storage.
-.It Fl t
-Display vdev TRIM status.
-.It Fl T Sy u Ns | Ns Sy d
-Display a time stamp.
-Specify
-.Sy u
-for a printed representation of the internal representation of time.
-See
-.Xr time 2 .
-Specify
-.Sy d
-for standard date format.
-See
-.Xr date 1 .
-.It Fl v
-Displays verbose data error information, printing out a complete list of all
-data errors since the last complete pool scrub.
-.It Fl x
-Only display status for pools that are exhibiting errors or are otherwise
-unavailable.
-Warnings about pools not using the latest on-disk format will not be included.
-.El
-.It Xo
-.Nm
-.Cm sync
-.Op Ar pool ...
-.Xc
+.It Xr zpool-sync 8
 This command forces all in-core dirty data to be written to the primary
-pool storage and not the ZIL. It will also update administrative
-information including quota reporting. Without arguments,
-.Sy zpool sync
-will sync all pools on the system. Otherwise, it will sync only the
-specified pool(s).
-.It Xo
-.Nm
-.Cm upgrade
-.Xc
-Displays pools which do not have all supported features enabled and pools
-formatted using a legacy ZFS version number.
-These pools can continue to be used, but some features may not be available.
-Use
-.Nm zpool Cm upgrade Fl a
-to enable all features on all pools.
-.It Xo
-.Nm
-.Cm upgrade
-.Fl v
-.Xc
-Displays legacy ZFS versions supported by the current software.
-See
-.Xr zpool-features 5
-for a description of feature flags features supported by the current software.
-.It Xo
-.Nm
-.Cm upgrade
-.Op Fl V Ar version
-.Fl a Ns | Ns Ar pool Ns ...
-.Xc
-Enables all supported features on the given pool.
-Once this is done, the pool will no longer be accessible on systems that do not
-support feature flags.
-See
-.Xr zpool-features 5
-for details on compatibility with systems that support feature flags, but do not
-support all features enabled on the pool.
+pool storage and not the ZIL.
+It will also update administrative information including quota reporting.
+Without arguments,
+.Nm zpool Cm sync
+will sync all pools on the system.
+Otherwise, it will sync only the specified pool(s).
+.It Xr zpool-upgrade 8
+Manage the on-disk format version of storage pools.
+.It Xr zpool-wait 8
+Waits until all background activity of the given types has ceased in the given
+pool.
+.El
+.
+.Ss Fault Resolution
 .Bl -tag -width Ds
-.It Fl a
-Enables all supported features on all pools.
-.It Fl V Ar version
-Upgrade to the specified legacy version.
-If the
-.Fl V
-flag is specified, no features will be enabled on the pool.
-This option can only be used to increase the version number up to the last
-supported legacy version number.
-.El
 .It Xo
-.Nm
-.Cm version
+.Xr zpool-offline 8 Ns / Ns Xr zpool-online 8
 .Xc
-Displays the software version of the
-.Nm
-userland utility and the zfs kernel module.
+Takes the specified physical device offline or brings it online.
+.It Xr zpool-resilver 8
+Starts a resilver.
+If an existing resilver is already running it will be restarted from the beginning.
+.It Xr zpool-reopen 8
+Reopen all the vdevs associated with the pool.
+.It Xr zpool-clear 8
+Clears device errors in a pool.
 .El
+.
+.Ss Import & Export
+.Bl -tag -width Ds
+.It Xr zpool-import 8
+Make disks containing ZFS storage pools available for use on the system.
+.It Xr zpool-export 8
+Exports the given pools from the system.
+.It Xr zpool-reguid 8
+Generates a new unique identifier for the pool.
+.El
+.
 .Sh EXIT STATUS
 The following exit values are returned:
-.Bl -tag -width Ds
+.Bl -tag -compact -offset 4n -width "a"
 .It Sy 0
 Successful completion.
 .It Sy 1
@@ -2501,74 +228,69 @@
 .It Sy 2
 Invalid command line options were specified.
 .El
+.
 .Sh EXAMPLES
-.Bl -tag -width Ds
-.It Sy Example 1 No Creating a RAID-Z Storage Pool
+.Bl -tag -width "Exam"
+.It Sy Example 1 : No Creating a RAID-Z Storage Pool
 The following command creates a pool with a single raidz root vdev that
-consists of six disks.
-.Bd -literal
-# zpool create tank raidz sda sdb sdc sdd sde sdf
-.Ed
-.It Sy Example 2 No Creating a Mirrored Storage Pool
+consists of six disks:
+.Dl # Nm zpool Cm create Ar tank Sy raidz Ar sda sdb sdc sdd sde sdf
+.
+.It Sy Example 2 : No Creating a Mirrored Storage Pool
 The following command creates a pool with two mirrors, where each mirror
-contains two disks.
-.Bd -literal
-# zpool create tank mirror sda sdb mirror sdc sdd
-.Ed
-.It Sy Example 3 No Creating a ZFS Storage Pool by Using Partitions
-The following command creates an unmirrored pool using two disk partitions.
-.Bd -literal
-# zpool create tank sda1 sdb2
-.Ed
-.It Sy Example 4 No Creating a ZFS Storage Pool by Using Files
+contains two disks:
+.Dl # Nm zpool Cm create Ar tank Sy mirror Ar sda sdb Sy mirror Ar sdc sdd
+.
+.It Sy Example 3 : No Creating a ZFS Storage Pool by Using Partitions
+The following command creates an unmirrored pool using two disk partitions:
+.Dl # Nm zpool Cm create Ar tank sda1 sdb2
+.
+.It Sy Example 4 : No Creating a ZFS Storage Pool by Using Files
 The following command creates an unmirrored pool using files.
 While not recommended, a pool based on files can be useful for experimental
 purposes.
-.Bd -literal
-# zpool create tank /path/to/file/a /path/to/file/b
-.Ed
-.It Sy Example 5 No Adding a Mirror to a ZFS Storage Pool
+.Dl # Nm zpool Cm create Ar tank /path/to/file/a /path/to/file/b
+.
+.It Sy Example 5 : No Adding a Mirror to a ZFS Storage Pool
 The following command adds two mirrored disks to the pool
-.Em tank ,
+.Ar tank ,
 assuming the pool is already made up of two-way mirrors.
 The additional space is immediately available to any datasets within the pool.
-.Bd -literal
-# zpool add tank mirror sda sdb
-.Ed
-.It Sy Example 6 No Listing Available ZFS Storage Pools
+.Dl # Nm zpool Cm add Ar tank Sy mirror Ar sda sdb
+.
+.It Sy Example 6 : No Listing Available ZFS Storage Pools
 The following command lists all available pools on the system.
 In this case, the pool
-.Em zion
+.Ar zion
 is faulted due to a missing device.
 The results from this command are similar to the following:
-.Bd -literal
-# zpool list
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm list
 NAME    SIZE  ALLOC   FREE  EXPANDSZ   FRAG    CAP  DEDUP  HEALTH  ALTROOT
 rpool  19.9G  8.43G  11.4G         -    33%    42%  1.00x  ONLINE  -
 tank   61.5G  20.0G  41.5G         -    48%    32%  1.00x  ONLINE  -
 zion       -      -      -         -      -      -      -  FAULTED -
 .Ed
-.It Sy Example 7 No Destroying a ZFS Storage Pool
+.
+.It Sy Example 7 : No Destroying a ZFS Storage Pool
 The following command destroys the pool
-.Em tank
-and any datasets contained within.
-.Bd -literal
-# zpool destroy -f tank
-.Ed
-.It Sy Example 8 No Exporting a ZFS Storage Pool
+.Ar tank
+and any datasets contained within:
+.Dl # Nm zpool Cm destroy Fl f Ar tank
+.
+.It Sy Example 8 : No Exporting a ZFS Storage Pool
 The following command exports the devices in pool
-.Em tank
-so that they can be relocated or later imported.
-.Bd -literal
-# zpool export tank
-.Ed
-.It Sy Example 9 No Importing a ZFS Storage Pool
+.Ar tank
+so that they can be relocated or later imported:
+.Dl # Nm zpool Cm export Ar tank
+.
+.It Sy Example 9 : No Importing a ZFS Storage Pool
 The following command displays available pools, and then imports the pool
-.Em tank
+.Ar tank
 for use on the system.
 The results from this command are similar to the following:
-.Bd -literal
-# zpool import
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm import
   pool: tank
     id: 15451357997522795478
  state: ONLINE
@@ -2580,66 +302,58 @@
             sda     ONLINE
             sdb     ONLINE
 
-# zpool import tank
+.No # Nm zpool Cm import Ar tank
 .Ed
-.It Sy Example 10 No Upgrading All ZFS Storage Pools to the Current Version
+.
+.It Sy Example 10 : No Upgrading All ZFS Storage Pools to the Current Version
 The following command upgrades all ZFS Storage pools to the current version of
-the software.
-.Bd -literal
-# zpool upgrade -a
+the software:
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm upgrade Fl a
 This system is currently running ZFS version 2.
 .Ed
-.It Sy Example 11 No Managing Hot Spares
+.
+.It Sy Example 11 : No Managing Hot Spares
 The following command creates a new pool with an available hot spare:
-.Bd -literal
-# zpool create tank mirror sda sdb spare sdc
-.Ed
+.Dl # Nm zpool Cm create Ar tank Sy mirror Ar sda sdb Sy spare Ar sdc
 .Pp
 If one of the disks were to fail, the pool would be reduced to the degraded
 state.
 The failed device can be replaced using the following command:
-.Bd -literal
-# zpool replace tank sda sdd
-.Ed
+.Dl # Nm zpool Cm replace Ar tank sda sdd
 .Pp
 Once the data has been resilvered, the spare is automatically removed and is
 made available for use should another device fail.
 The hot spare can be permanently removed from the pool using the following
 command:
-.Bd -literal
-# zpool remove tank sdc
-.Ed
-.It Sy Example 12 No Creating a ZFS Pool with Mirrored Separate Intent Logs
+.Dl # Nm zpool Cm remove Ar tank sdc
+.
+.It Sy Example 12 : No Creating a ZFS Pool with Mirrored Separate Intent Logs
 The following command creates a ZFS storage pool consisting of two, two-way
 mirrors and mirrored log devices:
-.Bd -literal
-# zpool create pool mirror sda sdb mirror sdc sdd log mirror \\
-  sde sdf
-.Ed
-.It Sy Example 13 No Adding Cache Devices to a ZFS Pool
+.Dl # Nm zpool Cm create Ar pool Sy mirror Ar sda sdb Sy mirror Ar sdc sdd Sy log mirror Ar sde sdf
+.
+.It Sy Example 13 : No Adding Cache Devices to a ZFS Pool
 The following command adds two disks for use as cache devices to a ZFS storage
 pool:
-.Bd -literal
-# zpool add pool cache sdc sdd
-.Ed
+.Dl # Nm zpool Cm add Ar pool Sy cache Ar sdc sdd
 .Pp
 Once added, the cache devices gradually fill with content from main memory.
 Depending on the size of your cache devices, it could take over an hour for
 them to fill.
 Capacity and reads can be monitored using the
 .Cm iostat
-option as follows:
-.Bd -literal
-# zpool iostat -v pool 5
-.Ed
-.It Sy Example 14 No Removing a Mirrored top-level (Log or Data) Device
+subcommand as follows:
+.Dl # Nm zpool Cm iostat Fl v Ar pool 5
+.
+.It Sy Example 14 : No Removing a Mirrored top-level (Log or Data) Device
 The following commands remove the mirrored log device
 .Sy mirror-2
 and mirrored top-level data device
 .Sy mirror-1 .
 .Pp
 Given this configuration:
-.Bd -literal
+.Bd -literal -compact -offset Ds
   pool: tank
  state: ONLINE
  scrub: none requested
@@ -2660,27 +374,22 @@
 .Ed
 .Pp
 The command to remove the mirrored log
-.Sy mirror-2
-is:
-.Bd -literal
-# zpool remove tank mirror-2
-.Ed
+.Ar mirror-2 No is:
+.Dl # Nm zpool Cm remove Ar tank mirror-2
 .Pp
 The command to remove the mirrored data
-.Sy mirror-1
-is:
-.Bd -literal
-# zpool remove tank mirror-1
-.Ed
-.It Sy Example 15 No Displaying expanded space on a device
+.Ar mirror-1 No is:
+.Dl # Nm zpool Cm remove Ar tank mirror-1
+.
+.It Sy Example 15 : No Displaying expanded space on a device
 The following command displays the detailed information for the pool
-.Em data .
+.Ar data .
 This pool is comprised of a single raidz vdev where one of its devices
 increased its capacity by 10GB.
 In this example, the pool will not be able to utilize this extra capacity until
 all the devices under the raidz vdev have been expanded.
-.Bd -literal
-# zpool list -v data
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm list Fl v Ar data
 NAME         SIZE  ALLOC   FREE  EXPANDSZ   FRAG    CAP  DEDUP  HEALTH  ALTROOT
 data        23.9G  14.6G  9.30G         -    48%    61%  1.00x  ONLINE  -
   raidz1    23.9G  14.6G  9.30G         -    48%
@@ -2688,16 +397,12 @@
     sdb         -      -      -       10G      -
     sdc         -      -      -         -      -
 .Ed
-.It Sy Example 16 No Adding output columns
+.
+.It Sy Example 16 : No Adding output columns
 Additional columns can be added to the
-.Nm zpool Cm status
-and
-.Nm zpool Cm iostat
-output with
-.Fl c
-option.
-.Bd -literal
-# zpool status -c vendor,model,size
+.Nm zpool Cm status No and Nm zpool Cm iostat No output with Fl c .
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm status Fl c Ar vendor , Ns Ar model , Ns Ar size
    NAME     STATE  READ WRITE CKSUM vendor  model        size
    tank     ONLINE 0    0     0
    mirror-0 ONLINE 0    0     0
@@ -2708,129 +413,150 @@
    U13      ONLINE 0    0     0     SEAGATE ST8000NM0075 7.3T
    U14      ONLINE 0    0     0     SEAGATE ST8000NM0075 7.3T
 
-# zpool iostat -vc slaves
-   capacity operations bandwidth
-   pool       alloc free  read  write read  write slaves
-   ---------- ----- ----- ----- ----- ----- ----- ---------
-   tank       20.4G 7.23T 26    152   20.7M 21.6M
-   mirror     20.4G 7.23T 26    152   20.7M 21.6M
-   U1         -     -     0     31    1.46K 20.6M sdb sdff
-   U10        -     -     0     1     3.77K 13.3K sdas sdgw
-   U11        -     -     0     1     288K  13.3K sdat sdgx
-   U12        -     -     0     1     78.4K 13.3K sdau sdgy
-   U13        -     -     0     1     128K  13.3K sdav sdgz
-   U14        -     -     0     1     63.2K 13.3K sdfk sdg
+.No # Nm zpool Cm iostat Fl vc Ar size
+              capacity     operations     bandwidth
+pool        alloc   free   read  write   read  write  size
+----------  -----  -----  -----  -----  -----  -----  ----
+rpool       14.6G  54.9G      4     55   250K  2.69M
+  sda1      14.6G  54.9G      4     55   250K  2.69M   70G
+----------  -----  -----  -----  -----  -----  -----  ----
 .Ed
 .El
+.
 .Sh ENVIRONMENT VARIABLES
-.Bl -tag -width "ZFS_ABORT"
-.It Ev ZFS_ABORT
+.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS"
+.It Sy ZFS_ABORT
 Cause
-.Nm zpool
+.Nm
 to dump core on exit for the purposes of running
 .Sy ::findleaks .
-.El
-.Bl -tag -width "ZPOOL_IMPORT_PATH"
-.It Ev ZPOOL_IMPORT_PATH
-The search path for devices or files to use with the pool. This is a colon-separated list of directories in which
-.Nm zpool
+.It Sy ZFS_COLOR
+Use ANSI color in
+.Nm zpool status
+and
+.Nm zpool iostat
+output.
+.It Sy ZPOOL_IMPORT_PATH
+The search path for devices or files to use with the pool.
+This is a colon-separated list of directories in which
+.Nm
 looks for device nodes and files.
 Similar to the
 .Fl d
 option in
 .Nm zpool import .
-.El
-.Bl -tag -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS"
-.It Ev ZPOOL_IMPORT_UDEV_TIMEOUT_MS
+.It Sy ZPOOL_IMPORT_UDEV_TIMEOUT_MS
 The maximum time in milliseconds that
 .Nm zpool import
 will wait for an expected device to be available.
-.El
-.Bl -tag -width "ZPOOL_VDEV_NAME_GUID"
-.It Ev ZPOOL_VDEV_NAME_GUID
+.It Sy ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE
+If set, suppress warning about non-native vdev ashift in
+.Nm zpool status .
+The value is not used, only the presence or absence of the variable matters.
+.It Sy ZPOOL_VDEV_NAME_GUID
 Cause
-.Nm zpool
-subcommands to output vdev guids by default.  This behavior is identical to the
-.Nm zpool status -g
+.Nm
+subcommands to output vdev guids by default.
+This behavior is identical to the
+.Nm zpool Cm status Fl g
 command line option.
-.El
-.Bl -tag -width "ZPOOL_VDEV_NAME_FOLLOW_LINKS"
-.It Ev ZPOOL_VDEV_NAME_FOLLOW_LINKS
+.It Sy ZPOOL_VDEV_NAME_FOLLOW_LINKS
 Cause
-.Nm zpool
-subcommands to follow links for vdev names by default.  This behavior is identical to the
-.Nm zpool status -L
+.Nm
+subcommands to follow links for vdev names by default.
+This behavior is identical to the
+.Nm zpool Cm status Fl L
 command line option.
-.El
-.Bl -tag -width "ZPOOL_VDEV_NAME_PATH"
-.It Ev ZPOOL_VDEV_NAME_PATH
+.It Sy ZPOOL_VDEV_NAME_PATH
 Cause
-.Nm zpool
-subcommands to output full vdev path names by default.  This
-behavior is identical to the
-.Nm zpool status -p
+.Nm
+subcommands to output full vdev path names by default.
+This behavior is identical to the
+.Nm zpool Cm status Fl P
 command line option.
-.El
-.Bl -tag -width "ZFS_VDEV_DEVID_OPT_OUT"
-.It Ev ZFS_VDEV_DEVID_OPT_OUT
-Older ZFS on Linux implementations had issues when attempting to display pool
+.It Sy ZFS_VDEV_DEVID_OPT_OUT
+Older OpenZFS implementations had issues when attempting to display pool
 config VDEV names if a
 .Sy devid
 NVP value is present in the pool's config.
 .Pp
-For example, a pool that originated on illumos platform would have a devid
+For example, a pool that originated on illumos platform would have a
+.Sy devid
 value in the config and
 .Nm zpool status
 would fail when listing the config.
-This would also be true for future Linux based pools.
+This would also be true for future Linux-based pools.
 .Pp
 A pool can be stripped of any
 .Sy devid
 values on import or prevented from adding
 them on
-.Nm zpool create
+.Nm zpool Cm create
 or
-.Nm zpool add
+.Nm zpool Cm add
 by setting
 .Sy ZFS_VDEV_DEVID_OPT_OUT .
-.El
-.Bl -tag -width "ZPOOL_SCRIPTS_AS_ROOT"
-.It Ev ZPOOL_SCRIPTS_AS_ROOT
-Allow a privileged user to run the
-.Nm zpool status/iostat
-with the
-.Fl c
-option.  Normally, only unprivileged users are allowed to run
+.Pp
+.It Sy ZPOOL_SCRIPTS_AS_ROOT
+Allow a privileged user to run
+.Nm zpool status/iostat Fl c .
+Normally, only unprivileged users are allowed to run
 .Fl c .
-.El
-.Bl -tag -width "ZPOOL_SCRIPTS_PATH"
-.It Ev ZPOOL_SCRIPTS_PATH
+.It Sy ZPOOL_SCRIPTS_PATH
 The search path for scripts when running
-.Nm zpool status/iostat
-with the
-.Fl c
-option. This is a colon-separated list of directories and overrides the default
+.Nm zpool status/iostat Fl c .
+This is a colon-separated list of directories and overrides the default
 .Pa ~/.zpool.d
 and
 .Pa /etc/zfs/zpool.d
 search paths.
-.El
-.Bl -tag -width "ZPOOL_SCRIPTS_ENABLED"
-.It Ev ZPOOL_SCRIPTS_ENABLED
+.It Sy ZPOOL_SCRIPTS_ENABLED
 Allow a user to run
-.Nm zpool status/iostat
-with the
-.Fl c
-option. If
+.Nm zpool status/iostat Fl c .
+If
 .Sy ZPOOL_SCRIPTS_ENABLED
 is not set, it is assumed that the user is allowed to run
-.Nm zpool status/iostat -c .
+.Nm zpool Cm status Ns / Ns Cm iostat Fl c .
 .El
+.
 .Sh INTERFACE STABILITY
 .Sy Evolving
+.
 .Sh SEE ALSO
-.Xr zfs-events 5 ,
-.Xr zfs-module-parameters 5 ,
-.Xr zpool-features 5 ,
+.Xr zfs 4 ,
+.Xr zpool-features 7 ,
+.Xr zpoolconcepts 7 ,
+.Xr zpoolprops 7 ,
 .Xr zed 8 ,
-.Xr zfs 8
+.Xr zfs 8 ,
+.Xr zpool-add 8 ,
+.Xr zpool-attach 8 ,
+.Xr zpool-checkpoint 8 ,
+.Xr zpool-clear 8 ,
+.Xr zpool-create 8 ,
+.Xr zpool-destroy 8 ,
+.Xr zpool-detach 8 ,
+.Xr zpool-events 8 ,
+.Xr zpool-export 8 ,
+.Xr zpool-get 8 ,
+.Xr zpool-history 8 ,
+.Xr zpool-import 8 ,
+.Xr zpool-initialize 8 ,
+.Xr zpool-iostat 8 ,
+.Xr zpool-labelclear 8 ,
+.Xr zpool-list 8 ,
+.Xr zpool-offline 8 ,
+.Xr zpool-online 8 ,
+.Xr zpool-reguid 8 ,
+.Xr zpool-remove 8 ,
+.Xr zpool-reopen 8 ,
+.Xr zpool-replace 8 ,
+.Xr zpool-resilver 8 ,
+.Xr zpool-scrub 8 ,
+.Xr zpool-set 8 ,
+.Xr zpool-split 8 ,
+.Xr zpool-status 8 ,
+.Xr zpool-sync 8 ,
+.Xr zpool-trim 8 ,
+.Xr zpool-upgrade 8 ,
+.Xr zpool-wait 8

diff --git a/zfs/man/man8/zpool_influxdb.8 b/zfs/man/man8/zpool_influxdb.8
new file mode 100644
index 0000000..021fbde
--- /dev/null
+++ b/zfs/man/man8/zpool_influxdb.8

@@ -0,0 +1,98 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at
+.\" https://opensource.org/licenses/CDDL-1.0
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright 2020 Richard Elling
+.\"
+.Dd May 26, 2021
+.Dt ZPOOL_INFLUXDB 8
+.Os
+.
+.Sh NAME
+.Nm zpool_influxdb
+.Nd collect ZFS pool statistics in InfluxDB line protocol format
+.Sh SYNOPSIS
+.Nm
+.Op Fl e Ns | Ns Fl -execd
+.Op Fl n Ns | Ns Fl -no-histogram
+.Op Fl s Ns | Ns Fl -sum-histogram-buckets
+.Op Fl t Ns | Ns Fl -tags Ar key Ns = Ns Ar value Ns Oo , Ns Ar key Ns = Ns Ar value Oc Ns …
+.Op Ar pool
+.
+.Sh DESCRIPTION
+.Nm
+produces InfluxDB-line-protocol-compatible metrics from zpools.
+Like the
+.Nm zpool
+command,
+.Nm
+reads the current pool status and statistics.
+Unlike the
+.Nm zpool
+command which is intended for humans,
+.Nm
+formats the output in the InfluxDB line protocol.
+The expected use is as a plugin to a
+metrics collector or aggregator, such as Telegraf.
+.Pp
+By default,
+.Nm
+prints pool metrics and status in the InfluxDB line protocol format.
+All pools are printed, similar to the
+.Nm zpool Cm status
+command.
+Providing a pool name restricts the output to the named pool.
+.
+.Sh OPTIONS
+.Bl -tag -width "-e, --execd"
+.It Fl e , -execd
+Run in daemon mode compatible with Telegraf's
+.Nm execd
+plugin.
+In this mode, the pools are sampled every time a
+newline appears on the standard input.
+.It Fl n , -no-histogram
+Do not print latency and I/O size histograms.
+This can reduce the total
+amount of data, but one should consider the value brought by the insights
+that latency and I/O size distributions provide.
+The resulting values
+are suitable for graphing with Grafana's heatmap plugin.
+.It Fl s , -sum-histogram-buckets
+Accumulates bucket values.
+By default, the values are not accumulated and the raw data appears as shown by
+.Nm zpool Cm iostat .
+This works well for Grafana's heatmap plugin.
+Summing the buckets produces output similar to Prometheus histograms.
+.It Fl t , Fl -tags Ar key Ns = Ns Ar value Ns Oo , Ns Ar key Ns = Ns Ar value Oc Ns …
+Adds specified tags to the tag set.
+No sanity checking is performed.
+See the InfluxDB Line Protocol format documentation for details on escaping
+special characters used in tags.
+.It Fl h , -help
+Print a usage summary.
+.El
+.
+.Sh SEE ALSO
+.Xr zpool-iostat 8 ,
+.Xr zpool-status 8 ,
+.Lk https://github.com/influxdata/influxdb "InfluxDB" ,
+.Lk https://github.com/influxdata/telegraf "Telegraf" ,
+.Lk https://grafana.com "Grafana" ,
+.Lk https://prometheus.io "Prometheus"

diff --git a/zfs/man/man8/zstream.8 b/zfs/man/man8/zstream.8
new file mode 100644
index 0000000..c0322ee
--- /dev/null
+++ b/zfs/man/man8/zstream.8

@@ -0,0 +1,117 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2020 by Delphix. All rights reserved.
+.\"
+.Dd May 8, 2021
+.Dt ZSTREAM 8
+.Os
+.
+.Sh NAME
+.Nm zstream
+.Nd manipulate ZFS send streams
+.Sh SYNOPSIS
+.Nm
+.Cm dump
+.Op Fl Cvd
+.Op Ar file
+.Nm
+.Cm redup
+.Op Fl v
+.Ar file
+.Nm
+.Cm token
+.Ar resume_token
+.
+.Sh DESCRIPTION
+The
+.Sy zstream
+utility manipulates ZFS send streams output by the
+.Sy zfs send
+command.
+.Bl -tag -width ""
+.It Xo
+.Nm
+.Cm dump
+.Op Fl Cvd
+.Op Ar file
+.Xc
+Print information about the specified send stream, including headers and
+record counts.
+The send stream may either be in the specified
+.Ar file ,
+or provided on standard input.
+.Bl -tag -width "-D"
+.It Fl C
+Suppress the validation of checksums.
+.It Fl v
+Verbose.
+Print metadata for each record.
+.It Fl d
+Dump data contained in each record.
+Implies verbose.
+.El
+.Pp
+The
+.Nm zstreamdump
+alias is provided for compatibility and is equivalent to running
+.Nm
+.Cm dump .
+.It Xo
+.Nm
+.Cm token
+.Ar resume_token
+.Xc
+Dumps zfs resume token information
+.It Xo
+.Nm
+.Cm redup
+.Op Fl v
+.Ar file
+.Xc
+Deduplicated send streams can be generated by using the
+.Nm zfs Cm send Fl D
+command.
+The ability to send deduplicated send streams is deprecated.
+In the future, the ability to receive a deduplicated send stream with
+.Nm zfs Cm receive
+will be removed.
+However, deduplicated send streams can still be received by utilizing
+.Nm zstream Cm redup .
+.Pp
+The
+.Nm zstream Cm redup
+command is provided a
+.Ar file
+containing a deduplicated send stream, and outputs an equivalent
+non-deduplicated send stream on standard output.
+Therefore, a deduplicated send stream can be received by running:
+.Dl # Nm zstream Cm redup Pa DEDUP_STREAM_FILE | Nm zfs Cm receive No …
+.Bl -tag -width "-D"
+.It Fl v
+Verbose.
+Print summary of converted records.
+.El
+.El
+.
+.Sh SEE ALSO
+.Xr zfs 8 ,
+.Xr zfs-receive 8 ,
+.Xr zfs-send 8

diff --git a/zfs/man/man8/zstreamdump.8 b/zfs/man/man8/zstreamdump.8
deleted file mode 100644
index 33cd047..0000000
--- a/zfs/man/man8/zstreamdump.8
+++ /dev/null

@@ -1,58 +0,0 @@
-'\" te
-.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
-.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
-.\"  See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with
-.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH zstreamdump 8 "29 Aug 2012" "ZFS pool 28, filesystem 5" "System Administration Commands"
-.SH NAME
-zstreamdump \- filter data in zfs send stream
-.SH SYNOPSIS
-.LP
-.nf
-\fBzstreamdump\fR [\fB-C\fR] [\fB-v\fR] [\fB-d\fR]
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The \fBzstreamdump\fR utility reads from the output of the \fBzfs send\fR
-command, then displays headers and some statistics from that output.  See
-\fBzfs\fR(8).
-.SH OPTIONS
-.sp
-.LP
-The following options are supported:
-.sp
-.ne 2
-.na
-\fB-C\fR
-.ad
-.sp .6
-.RS 4n
-Suppress the validation of checksums.
-.RE
-
-.sp
-.ne 2
-.na
-\fB-v\fR
-.ad
-.sp .6
-.RS 4n
-Verbose. Dump all headers, not only begin and end headers.
-.RE
-
-.sp
-.ne 2
-.na
-\fB-d\fR
-.ad
-.sp .6
-.RS 4n
-Dump contents of blocks modified. Implies verbose.
-.RE
-
-.SH SEE ALSO
-.sp
-.LP
-\fBzfs\fR(8)

diff --git a/zfs/man/man8/zstreamdump.8 b/zfs/man/man8/zstreamdump.8
new file mode 120000
index 0000000..c6721da
--- /dev/null
+++ b/zfs/man/man8/zstreamdump.8

@@ -0,0 +1 @@
+zstream.8
\ No newline at end of file

diff --git a/zfs/module/.gitignore b/zfs/module/.gitignore
index 45e5f99..0ec6052 100644
--- a/zfs/module/.gitignore
+++ b/zfs/module/.gitignore

@@ -2,14 +2,26 @@
 *.ko.unsigned
 *.ko.out
 *.ko.out.sig
+*.ko.debug
+*.ko.full
 *.dwo
 .*.cmd
 .*.d
 *.mod
 
+/Kbuild
 /.cache.mk
 /.tmp_versions
 /Module.markers
 /Module.symvers
+/vnode_if*
+/bus_if.h
+/device_if.h
+/opt_global.h
+
+/export_syms
+/machine
+/x86
+/i386
 
 !Makefile.in

diff --git a/zfs/module/Kbuild.in b/zfs/module/Kbuild.in
new file mode 100644
index 0000000..7675d61
--- /dev/null
+++ b/zfs/module/Kbuild.in

@@ -0,0 +1,48 @@
+# When integrated in to a monolithic kernel the spl module must appear
+# first.  This ensures its module initialization function is run before
+# any of the other module initialization functions which depend on it.
+ZFS_MODULES += spl/
+ZFS_MODULES += avl/
+ZFS_MODULES += icp/
+ZFS_MODULES += lua/
+ZFS_MODULES += nvpair/
+ZFS_MODULES += unicode/
+ZFS_MODULES += zcommon/
+ZFS_MODULES += zfs/
+ZFS_MODULES += zstd/
+
+# The rest is only relevant when run by kbuild
+ifneq ($(KERNELRELEASE),)
+
+obj-$(CONFIG_ZFS) := $(ZFS_MODULES)
+
+ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
+ZFS_MODULE_CFLAGS += -Wmissing-prototypes
+ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@  @NO_FORMAT_ZERO_LENGTH@
+
+ifneq ($(KBUILD_EXTMOD),)
+zfs_include = @abs_top_srcdir@/include
+ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
+ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include
+else
+zfs_include = $(srctree)/include/zfs
+ZFS_MODULE_CFLAGS += -include $(zfs_include)/zfs_config.h
+endif
+
+ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/kernel
+ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl
+ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
+ZFS_MODULE_CFLAGS += -I$(zfs_include)
+ZFS_MODULE_CPPFLAGS += -D_KERNEL
+ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
+
+ifneq ($(KBUILD_EXTMOD),)
+@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
+@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
+endif
+
+subdir-asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+subdir-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+
+
+endif

diff --git a/zfs/module/Makefile.bsd b/zfs/module/Makefile.bsd
new file mode 100644
index 0000000..6bd7869
--- /dev/null
+++ b/zfs/module/Makefile.bsd

@@ -0,0 +1,370 @@
+.if !defined(WITH_CTF)
+WITH_CTF=1
+.endif
+
+.include <bsd.sys.mk>
+
+SRCDIR=${.CURDIR}
+INCDIR=${.CURDIR:H}/include
+
+KMOD=	openzfs
+
+.PATH:	${SRCDIR}/avl \
+	${SRCDIR}/lua \
+	${SRCDIR}/nvpair \
+	${SRCDIR}/os/freebsd/spl \
+	${SRCDIR}/os/freebsd/zfs \
+	${SRCDIR}/unicode \
+	${SRCDIR}/zcommon \
+	${SRCDIR}/zfs \
+	${SRCDIR}/zstd \
+	${SRCDIR}/zstd/lib
+
+
+
+CFLAGS+= -I${.OBJDIR:H}/include
+CFLAGS+= -I${INCDIR}
+CFLAGS+= -I${INCDIR}/os/freebsd
+CFLAGS+= -I${INCDIR}/os/freebsd/spl
+CFLAGS+= -I${INCDIR}/os/freebsd/zfs
+CFLAGS+= -I${SRCDIR}/zstd/include
+CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h
+
+CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS  -D__BSD_VISIBLE=1 \
+	 -DHAVE_UIO_ZEROCOPY -DWITHOUT_NETDUMP -D__KERNEL -D_SYS_CONDVAR_H_ \
+	 -D_SYS_VMEM_H_ -DKDTRACE_HOOKS -DSMP -DCOMPAT_FREEBSD11
+
+.if ${MACHINE_ARCH} == "amd64"
+CFLAGS+= -DHAVE_AVX2 -DHAVE_AVX -D__x86_64 -DHAVE_SSE2 -DHAVE_AVX512F -DHAVE_SSSE3
+.endif
+
+.if defined(WITH_DEBUG) && ${WITH_DEBUG} == "true"
+CFLAGS+= -DZFS_DEBUG -g
+.if defined(WITH_INVARIANTS) && ${WITH_INVARIANTS} == "true"
+ CFLAGS+= -DINVARIANTS -DWITNESS -DOPENSOLARIS_WITNESS
+.endif
+.if defined(WITH_O0) && ${WITH_O0} == "true"
+ CFLAGS+= -O0
+.endif
+.else
+CFLAGS += -DNDEBUG
+.endif
+
+.if defined(WITH_VFS_DEBUG) && ${WITH_VFS_DEBUG} == "true"
+# kernel must also be built with this option for this to work
+CFLAGS+= -DDEBUG_VFS_LOCKS
+.endif
+
+.if defined(WITH_GCOV) && ${WITH_GCOV} == "true"
+CFLAGS+=	 -fprofile-arcs -ftest-coverage
+.endif
+
+DEBUG_FLAGS=-g
+
+.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
+	${MACHINE_ARCH} == "arm"
+CFLAGS+= -DBITS_PER_LONG=32
+.else
+CFLAGS+= -DBITS_PER_LONG=64
+.endif
+
+SRCS=	vnode_if.h device_if.h bus_if.h
+
+# avl
+SRCS+=	avl.c
+
+#lua
+SRCS+=	lapi.c \
+	lauxlib.c \
+	lbaselib.c \
+	lcode.c \
+	lcompat.c \
+	lcorolib.c \
+	lctype.c \
+	ldebug.c \
+	ldo.c \
+	lfunc.c \
+	lgc.c \
+	llex.c \
+	lmem.c \
+	lobject.c \
+	lopcodes.c \
+	lparser.c \
+	lstate.c \
+	lstring.c \
+	lstrlib.c \
+	ltable.c \
+	ltablib.c \
+	ltm.c \
+	lvm.c \
+	lzio.c
+
+#nvpair
+SRCS+=	nvpair.c \
+	fnvpair.c \
+	nvpair_alloc_spl.c \
+	nvpair_alloc_fixed.c
+
+#os/freebsd/spl
+SRCS+=	acl_common.c \
+	callb.c \
+	list.c \
+	sha256c.c \
+	sha512c.c \
+	spl_acl.c \
+	spl_cmn_err.c \
+	spl_dtrace.c \
+	spl_kmem.c \
+	spl_kstat.c \
+	spl_misc.c \
+	spl_policy.c \
+	spl_procfs_list.c \
+	spl_string.c \
+	spl_sunddi.c \
+	spl_sysevent.c \
+	spl_taskq.c \
+	spl_uio.c \
+	spl_vfs.c \
+	spl_vm.c \
+	spl_zlib.c \
+	spl_zone.c
+
+
+.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
+	${MACHINE_ARCH} == "arm"
+SRCS+= spl_atomic.c
+.endif
+
+#os/freebsd/zfs
+SRCS+=	abd_os.c \
+	arc_os.c \
+	crypto_os.c \
+	dmu_os.c \
+	hkdf.c \
+	kmod_core.c \
+	spa_os.c \
+	sysctl_os.c \
+	vdev_file.c \
+	vdev_geom.c \
+	vdev_label_os.c \
+	zfs_acl.c \
+	zfs_ctldir.c \
+	zfs_debug.c \
+	zfs_dir.c \
+	zfs_ioctl_compat.c \
+	zfs_ioctl_os.c \
+	zfs_racct.c \
+	zfs_vfsops.c \
+	zfs_vnops_os.c \
+	zfs_znode.c \
+	zio_crypt.c \
+	zvol_os.c
+
+#unicode
+SRCS+=	uconv.c \
+	u8_textprep.c
+
+#zcommon
+SRCS+=	zfeature_common.c \
+	zfs_comutil.c \
+	zfs_deleg.c \
+	zfs_fletcher.c \
+	zfs_fletcher_avx512.c \
+	zfs_fletcher_intel.c \
+	zfs_fletcher_sse.c \
+	zfs_fletcher_superscalar.c \
+	zfs_fletcher_superscalar4.c \
+	zfs_namecheck.c \
+	zfs_prop.c \
+	zpool_prop.c \
+	zprop_common.c
+
+#zfs
+SRCS+=	abd.c \
+	aggsum.c \
+	arc.c \
+	blkptr.c \
+	bplist.c \
+	bpobj.c \
+	btree.c \
+	cityhash.c \
+	dbuf.c \
+	dbuf_stats.c \
+	bptree.c \
+	bqueue.c \
+	dataset_kstats.c \
+	ddt.c \
+	ddt_zap.c \
+	dmu.c \
+	dmu_diff.c \
+	dmu_object.c \
+	dmu_objset.c \
+	dmu_recv.c \
+	dmu_redact.c \
+	dmu_send.c \
+	dmu_traverse.c \
+	dmu_tx.c \
+	dmu_zfetch.c \
+	dnode.c \
+	dnode_sync.c \
+	dsl_dataset.c \
+	dsl_deadlist.c \
+	dsl_deleg.c \
+	dsl_bookmark.c \
+	dsl_dir.c \
+	dsl_crypt.c \
+	dsl_destroy.c \
+	dsl_pool.c \
+	dsl_prop.c \
+	dsl_scan.c \
+	dsl_synctask.c \
+	dsl_userhold.c \
+	fm.c \
+	gzip.c \
+	lzjb.c \
+	lz4.c \
+	metaslab.c \
+	mmp.c \
+	multilist.c \
+	objlist.c \
+	pathname.c \
+	range_tree.c \
+	refcount.c \
+	rrwlock.c \
+	sa.c \
+	sha256.c \
+	skein_zfs.c \
+	spa.c \
+	spa_boot.c \
+	spa_checkpoint.c \
+	spa_config.c \
+	spa_errlog.c \
+	spa_history.c \
+	spa_log_spacemap.c \
+	spa_misc.c \
+	spa_stats.c \
+	space_map.c \
+	space_reftree.c \
+	txg.c \
+	uberblock.c \
+	unique.c \
+	vdev.c \
+	vdev_cache.c \
+	vdev_draid.c \
+	vdev_draid_rand.c \
+	vdev_indirect.c \
+	vdev_indirect_births.c \
+	vdev_indirect_mapping.c \
+	vdev_initialize.c \
+	vdev_label.c \
+	vdev_mirror.c \
+	vdev_missing.c \
+	vdev_queue.c \
+	vdev_raidz.c \
+	vdev_raidz_math.c \
+	vdev_raidz_math_scalar.c \
+	vdev_rebuild.c \
+	vdev_raidz_math_avx2.c \
+	vdev_raidz_math_avx512bw.c \
+	vdev_raidz_math_avx512f.c \
+	vdev_raidz_math_sse2.c \
+	vdev_raidz_math_ssse3.c \
+	vdev_removal.c \
+	vdev_root.c \
+	vdev_trim.c \
+	zap.c \
+	zap_leaf.c \
+	zap_micro.c \
+	zcp.c \
+	zcp_get.c \
+	zcp_global.c \
+	zcp_iter.c \
+	zcp_set.c \
+	zcp_synctask.c \
+	zfeature.c \
+	zfs_byteswap.c \
+	zfs_file_os.c \
+	zfs_fm.c \
+	zfs_fuid.c \
+	zfs_ioctl.c \
+	zfs_log.c \
+	zfs_onexit.c \
+	zfs_quota.c \
+	zfs_ratelimit.c \
+	zfs_replay.c \
+	zfs_rlock.c \
+	zfs_sa.c \
+	zfs_vnops.c \
+	zil.c \
+	zio.c \
+	zio_checksum.c \
+	zio_compress.c \
+	zio_inject.c \
+	zle.c \
+	zrlock.c \
+	zthr.c \
+	zvol.c
+
+#zstd
+SRCS+=	zfs_zstd.c \
+	zstd.c
+
+beforeinstall:
+.if ${MK_DEBUG_FILES} != "no"
+	mtree -eu \
+	    -f /etc/mtree/BSD.debug.dist \
+	    -p ${DESTDIR}/usr/lib
+.endif
+
+.include <bsd.kmod.mk>
+
+
+CFLAGS.gcc+= -Wno-pointer-to-int-cast
+CFLAGS.clang+= ${NO_WUNUSED_BUT_SET_VARIABLE}
+
+CFLAGS.lapi.c= -Wno-cast-qual
+CFLAGS.lcompat.c= -Wno-cast-qual
+CFLAGS.lobject.c= -Wno-cast-qual
+CFLAGS.ltable.c= -Wno-cast-qual
+CFLAGS.lvm.c= -Wno-cast-qual
+CFLAGS.nvpair.c= -DHAVE_RPC_TYPES -Wno-cast-qual
+CFLAGS.spl_string.c= -Wno-cast-qual
+CFLAGS.spl_vm.c= -Wno-cast-qual
+CFLAGS.spl_zlib.c= -Wno-cast-qual
+CFLAGS.abd.c= -Wno-cast-qual
+CFLAGS.zfs_log.c= -Wno-cast-qual
+CFLAGS.zfs_vnops_os.c= -Wno-pointer-arith
+CFLAGS.u8_textprep.c= -Wno-cast-qual
+CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
+CFLAGS.zfs_fletcher_intel.c= -Wno-cast-qual -Wno-pointer-arith
+CFLAGS.zfs_fletcher_sse.c= -Wno-cast-qual -Wno-pointer-arith
+CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith
+CFLAGS.zprop_common.c= -Wno-cast-qual
+CFLAGS.ddt.c= -Wno-cast-qual
+CFLAGS.dmu.c= -Wno-cast-qual
+CFLAGS.dmu_traverse.c= -Wno-cast-qual
+CFLAGS.dsl_dir.c= -Wno-cast-qual
+CFLAGS.dsl_deadlist.c= -Wno-cast-qual
+CFLAGS.dsl_prop.c= -Wno-cast-qual
+CFLAGS.fm.c= -Wno-cast-qual
+CFLAGS.lz4.c= -Wno-cast-qual
+CFLAGS.spa.c= -Wno-cast-qual
+CFLAGS.spa_misc.c= -Wno-cast-qual
+CFLAGS.sysctl_os.c= -include ../zfs_config.h
+CFLAGS.vdev_draid.c= -Wno-cast-qual
+CFLAGS.vdev_raidz.c= -Wno-cast-qual
+CFLAGS.vdev_raidz_math.c= -Wno-cast-qual
+CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual
+CFLAGS.vdev_raidz_math_avx2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
+CFLAGS.vdev_raidz_math_avx512f.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
+CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
+CFLAGS.zap_leaf.c= -Wno-cast-qual
+CFLAGS.zap_micro.c= -Wno-cast-qual
+CFLAGS.zcp.c= -Wno-cast-qual
+CFLAGS.zfs_fm.c= -Wno-cast-qual
+CFLAGS.zfs_ioctl.c= -Wno-cast-qual
+CFLAGS.zil.c= -Wno-cast-qual
+CFLAGS.zio.c= -Wno-cast-qual
+CFLAGS.zrlock.c= -Wno-cast-qual
+CFLAGS.zfs_zstd.c= -Wno-cast-qual -Wno-pointer-arith
+CFLAGS.zstd.c= -fno-tree-vectorize -U__BMI__

diff --git a/zfs/module/Makefile.in b/zfs/module/Makefile.in
index ea8b834..b633843 100644
--- a/zfs/module/Makefile.in
+++ b/zfs/module/Makefile.in

@@ -1,78 +1,149 @@
-obj-m += avl/
-obj-m += icp/
-obj-m += lua/
-obj-m += nvpair/
-obj-m += spl/
-obj-m += unicode/
-obj-m += zcommon/
-obj-m += zfs/
+include Kbuild
 
 INSTALL_MOD_DIR ?= extra
+INSTALL_MOD_PATH ?= $(DESTDIR)
 
-ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
-ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@
-ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
-ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include/spl
-ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include
+SUBDIR_TARGETS = icp lua zstd
 
-ZFS_MODULE_CPPFLAGS += -D_KERNEL
-ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
+all: modules
+distclean maintainer-clean: clean
+install: modules_install
+uninstall: modules_uninstall
+check:
 
-@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
-@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
+.PHONY: all distclean maintainer-clean install uninstall check distdir \
+	modules modules-Linux modules-FreeBSD modules-unknown \
+	clean clean-Linux clean-FreeBSD \
+	modules_install modules_install-Linux modules_install-FreeBSD \
+	modules_uninstall modules_uninstall-Linux modules_uninstall-FreeBSD \
+	cppcheck cppcheck-Linux cppcheck-FreeBSD
 
-export ZFS_MODULE_CFLAGS ZFS_MODULE_CPPFLAGS
+# For FreeBSD, use debug options from ./configure if not overridden.
+export WITH_DEBUG ?= @WITH_DEBUG@
+export WITH_INVARIANTS ?= @WITH_INVARIANTS@
 
-SUBDIR_TARGETS = icp lua
+# Filter out options that FreeBSD make doesn't understand
+getflags = ( \
+set -- \
+  $(filter-out --%,$(firstword $(MFLAGS))) \
+  $(filter -I%,$(MFLAGS)) \
+  $(filter -j%,$(MFLAGS)); \
+fmakeflags=""; \
+while getopts :deiI:j:knqrstw flag; do \
+  case $$flag in \
+    \?) :;; \
+    :) if [ $$OPTARG = "j" ]; then \
+	 ncpus=$$(sysctl -n kern.smp.cpus 2>/dev/null || :); \
+	 if [ -n "$$ncpus" ]; then fmakeflags="$$fmakeflags -j$$ncpus"; fi; \
+       fi;; \
+    d) fmakeflags="$$fmakeflags -dA";; \
+    *) fmakeflags="$$fmakeflags -$$flag$$OPTARG";; \
+  esac; \
+done; \
+echo $$fmakeflags \
+)
+FMAKEFLAGS = -C @abs_srcdir@ -f Makefile.bsd $(shell $(getflags))
 
-modules:
-	list='$(SUBDIR_TARGETS)'; for targetdir in $$list; do \
-		$(MAKE) -C $$targetdir; \
-	done
-	$(MAKE) -C @LINUX_OBJ@ M=`pwd` @KERNEL_MAKE@ CONFIG_ZFS=m $@
+ifneq (@abs_srcdir@,@abs_builddir@)
+FMAKEFLAGS += MAKEOBJDIR=@abs_builddir@
+endif
 
-clean:
+FMAKE = env -u MAKEFLAGS make $(FMAKEFLAGS)
+
+modules-Linux:
+	list='$(SUBDIR_TARGETS)'; for td in $$list; do $(MAKE) -C $$td; done
+	$(MAKE) -C @LINUX_OBJ@ $(if @KERNEL_CC@,CC=@KERNEL_CC@) \
+		$(if @KERNEL_LD@,LD=@KERNEL_LD@) $(if @KERNEL_LLVM@,LLVM=@KERNEL_LLVM@) \
+		M="$$PWD" @KERNEL_MAKE@ CONFIG_ZFS=m modules
+
+modules-FreeBSD:
+	+$(FMAKE)
+
+modules-unknown:
+	@true
+
+modules: modules-@ac_system@
+
+clean-Linux:
 	@# Only cleanup the kernel build directories when CONFIG_KERNEL
 	@# is defined.  This indicates that kernel modules should be built.
-@CONFIG_KERNEL_TRUE@	$(MAKE) -C @LINUX_OBJ@ M=`pwd` @KERNEL_MAKE@ $@
+@CONFIG_KERNEL_TRUE@	$(MAKE) -C @LINUX_OBJ@ M="$$PWD" @KERNEL_MAKE@ clean
 
-	if [ -f @LINUX_SYMBOLS@ ]; then $(RM) @LINUX_SYMBOLS@; fi
-	if [ -f Module.markers ]; then $(RM) Module.markers; fi
+	$(RM) @LINUX_SYMBOLS@ Module.markers
+	find . -name '*.ur-safe' -type f -delete
 
-	find . -name '*.ur-safe' -type f -print | xargs $(RM)
+clean-FreeBSD:
+	+$(FMAKE) clean
 
-modules_install:
+clean: clean-@ac_system@
+
+modules_install-Linux:
 	@# Install the kernel modules
-	$(MAKE) -C @LINUX_OBJ@ M=`pwd` $@ \
-		INSTALL_MOD_PATH=$(DESTDIR)$(INSTALL_MOD_PATH) \
+	$(MAKE) -C @LINUX_OBJ@ M="$$PWD" modules_install \
+		INSTALL_MOD_PATH=$(INSTALL_MOD_PATH) \
 		INSTALL_MOD_DIR=$(INSTALL_MOD_DIR) \
 		KERNELRELEASE=@LINUX_VERSION@
 	@# Remove extraneous build products when packaging
-	kmoddir=$(DESTDIR)$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@; \
+	kmoddir=$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@; \
 	if [ -n "$(DESTDIR)" ]; then \
-		find $$kmoddir -name 'modules.*' | xargs $(RM); \
+		find $$kmoddir -name 'modules.*' -delete; \
 	fi
-	sysmap=$(DESTDIR)$(INSTALL_MOD_PATH)/boot/System.map-@LINUX_VERSION@; \
+	@# Debian ships tiny fake System.map files that are
+	@# syntactically valid but just say
+	@# "if you want system.map go install this package"
+	@# Naturally, depmod is less than amused by this.
+	@# So if we find it missing or with one of these present,
+	@# we check for the alternate path for the System.map
+	sysmap=$(INSTALL_MOD_PATH)/boot/System.map-@LINUX_VERSION@; \
+	{ [ -f "$$sysmap" ] && [ $$(wc -l < "$$sysmap") -ge 100 ]; } || \
+		sysmap=$(INSTALL_MOD_PATH)/usr/lib/debug/boot/System.map-@LINUX_VERSION@; \
 	if [ -f $$sysmap ]; then \
 		depmod -ae -F $$sysmap @LINUX_VERSION@; \
 	fi
 
-modules_uninstall:
+modules_install-FreeBSD:
+	@# Install the kernel modules
+	+$(FMAKE) install
+
+modules_install: modules_install-@ac_system@
+
+modules_uninstall-Linux:
 	@# Uninstall the kernel modules
-	kmoddir=$(DESTDIR)$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@
-	list='$(obj-m)'; for objdir in $$list; do \
+	kmoddir=$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@; \
+	for objdir in $(ZFS_MODULES); do \
 		$(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \
 	done
 
-distdir:
-	list='$(obj-m)'; for objdir in $$list; do \
-		(cd @top_srcdir@/module && find $$objdir \
-		-name '*.c' -o -name '*.h' -o -name '*.S' | \
-		xargs -r cp --parents -t @abs_top_builddir@/module/$$distdir); \
-	done
+modules_uninstall-FreeBSD:
+	@false
 
-distclean maintainer-clean: clean
-install: modules_install
-uninstall: modules_uninstall
-all: modules
-check:
+modules_uninstall: modules_uninstall-@ac_system@
+
+cppcheck-Linux:
+	@CPPCHECK@ -j@CPU_COUNT@ --std=c99 --quiet --force --error-exitcode=2 \
+		--inline-suppr \
+		--suppress=unmatchedSuppression \
+		--suppress=noValidConfiguration \
+		--enable=warning,information -D_KERNEL \
+		--include=@LINUX_OBJ@/include/generated/autoconf.h \
+		--include=@top_srcdir@/zfs_config.h \
+		--config-exclude=@LINUX_OBJ@/include \
+		-I @LINUX_OBJ@/include \
+		-I @top_srcdir@/include/os/linux/kernel \
+		-I @top_srcdir@/include/os/linux/spl \
+		-I @top_srcdir@/include/os/linux/zfs \
+		-I @top_srcdir@/include \
+		avl icp lua nvpair spl unicode zcommon zfs zstd os/linux
+
+cppcheck-FreeBSD:
+	@true
+
+cppcheck: cppcheck-@ac_system@
+
+distdir:
+	(cd @srcdir@ && find $(ZFS_MODULES) os -name '*.[chS]') | \
+	while read path; do \
+		mkdir -p $$distdir/$${path%/*}; \
+		cp @srcdir@/$$path $$distdir/$$path; \
+	done; \
+	cp @srcdir@/Makefile.bsd $$distdir/Makefile.bsd

diff --git a/zfs/module/avl/Makefile.in b/zfs/module/avl/Makefile.in
index 217fa3c..991d5f9 100644
--- a/zfs/module/avl/Makefile.in
+++ b/zfs/module/avl/Makefile.in

@@ -1,10 +1,10 @@
-src = @abs_top_srcdir@/module/avl
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
+endif
 
 MODULE := zavl
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-
 $(MODULE)-objs += avl.o

diff --git a/zfs/module/avl/avl.c b/zfs/module/avl/avl.c
index 1d2843f..f761a8a 100644
--- a/zfs/module/avl/avl.c
+++ b/zfs/module/avl/avl.c

@@ -96,6 +96,9 @@
  * which each have their own compilation environments and subsequent
  * requirements. Each of these environments must be considered when adding
  * dependencies from avl.c.
+ *
+ * Link to Illumos.org for more information on avl function:
+ * [1] https://illumos.org/man/9f/avl
  */
 
 #include <sys/types.h>
@@ -103,21 +106,7 @@
 #include <sys/debug.h>
 #include <sys/avl.h>
 #include <sys/cmn_err.h>
-
-/*
- * Small arrays to translate between balance (or diff) values and child indices.
- *
- * Code that deals with binary tree data structures will randomly use
- * left and right children when examining a tree.  C "if()" statements
- * which evaluate randomly suffer from very poor hardware branch prediction.
- * In this code we avoid some of the branch mispredictions by using the
- * following translation arrays. They replace random branches with an
- * additional memory reference. Since the translation arrays are both very
- * small the data should remain efficiently in cache.
- */
-static const int  avl_child2balance[2]	= {-1, 1};
-static const int  avl_balance2child[]	= {0, 0, 1};
-
+#include <sys/mod.h>
 
 /*
  * Walk from one node to the previous valued node (ie. an infix walk
@@ -268,14 +257,13 @@
 		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
 		ASSERT(-1 <= diff && diff <= 1);
 		if (diff == 0) {
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 			if (where != NULL)
 				*where = 0;
 #endif
 			return (AVL_NODE2DATA(node, off));
 		}
-		child = avl_balance2child[1 + diff];
-
+		child = (diff > 0);
 	}
 
 	if (where != NULL)
@@ -488,7 +476,6 @@
 	int which_child = AVL_INDEX2CHILD(where);
 	size_t off = tree->avl_offset;
 
-	ASSERT(tree);
 #ifdef _LP64
 	ASSERT(((uintptr_t)new_data & 0x7) == 0);
 #endif
@@ -528,7 +515,7 @@
 		 * Compute the new balance
 		 */
 		old_balance = AVL_XBALANCE(node);
-		new_balance = old_balance + avl_child2balance[which_child];
+		new_balance = old_balance + (which_child ? 1 : -1);
 
 		/*
 		 * If we introduced equal balance, then we are done immediately
@@ -577,7 +564,7 @@
 {
 	avl_node_t *node;
 	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 	int diff;
 #endif
 
@@ -592,7 +579,7 @@
 	 */
 	node = AVL_DATA2NODE(here, tree->avl_offset);
 
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 	diff = tree->avl_compar(new_data, here);
 	ASSERT(-1 <= diff && diff <= 1);
 	ASSERT(diff != 0);
@@ -603,7 +590,7 @@
 		node = node->avl_child[child];
 		child = 1 - child;
 		while (node->avl_child[child] != NULL) {
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 			diff = tree->avl_compar(new_data,
 			    AVL_NODE2DATA(node, tree->avl_offset));
 			ASSERT(-1 <= diff && diff <= 1);
@@ -612,7 +599,7 @@
 #endif
 			node = node->avl_child[child];
 		}
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 		diff = tree->avl_compar(new_data,
 		    AVL_NODE2DATA(node, tree->avl_offset));
 		ASSERT(-1 <= diff && diff <= 1);
@@ -676,8 +663,6 @@
 	int which_child;
 	size_t off = tree->avl_offset;
 
-	ASSERT(tree);
-
 	delete = AVL_DATA2NODE(data, off);
 
 	/*
@@ -696,7 +681,7 @@
 		 * choose node to swap from whichever side is taller
 		 */
 		old_balance = AVL_XBALANCE(delete);
-		left = avl_balance2child[old_balance + 1];
+		left = (old_balance > 0);
 		right = 1 - left;
 
 		/*
@@ -780,7 +765,7 @@
 		 */
 		node = parent;
 		old_balance = AVL_XBALANCE(node);
-		new_balance = old_balance - avl_child2balance[which_child];
+		new_balance = old_balance - (which_child ? 1 : -1);
 		parent = AVL_XPARENT(node);
 		which_child = AVL_XCHILD(node);
 
@@ -808,6 +793,64 @@
 	} while (parent != NULL);
 }
 
+#define	AVL_REINSERT(tree, obj)		\
+	avl_remove((tree), (obj));	\
+	avl_add((tree), (obj))
+
+boolean_t
+avl_update_lt(avl_tree_t *t, void *obj)
+{
+	void *neighbor;
+
+	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
+	    (t->avl_compar(obj, neighbor) <= 0));
+
+	neighbor = AVL_PREV(t, obj);
+	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
+		AVL_REINSERT(t, obj);
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+boolean_t
+avl_update_gt(avl_tree_t *t, void *obj)
+{
+	void *neighbor;
+
+	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
+	    (t->avl_compar(obj, neighbor) >= 0));
+
+	neighbor = AVL_NEXT(t, obj);
+	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
+		AVL_REINSERT(t, obj);
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+boolean_t
+avl_update(avl_tree_t *t, void *obj)
+{
+	void *neighbor;
+
+	neighbor = AVL_PREV(t, obj);
+	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
+		AVL_REINSERT(t, obj);
+		return (B_TRUE);
+	}
+
+	neighbor = AVL_NEXT(t, obj);
+	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
+		AVL_REINSERT(t, obj);
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
 void
 avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
 {
@@ -949,7 +992,7 @@
 	--tree->avl_numnodes;
 
 	/*
-	 * If we just did a right child or there isn't one, go up to parent.
+	 * If we just removed a right child or there isn't one, go up to parent.
 	 */
 	if (child == 1 || parent->avl_child[1] == NULL) {
 		node = parent;
@@ -993,7 +1036,6 @@
 }
 
 #if defined(_KERNEL)
-#include <linux/module.h>
 
 static int __init
 avl_init(void)
@@ -1008,11 +1050,12 @@
 
 module_init(avl_init);
 module_exit(avl_fini);
+#endif
 
-MODULE_DESCRIPTION("Generic AVL tree implementation");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+ZFS_MODULE_DESCRIPTION("Generic AVL tree implementation");
+ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
+ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 
 EXPORT_SYMBOL(avl_create);
 EXPORT_SYMBOL(avl_find);
@@ -1029,4 +1072,6 @@
 EXPORT_SYMBOL(avl_numnodes);
 EXPORT_SYMBOL(avl_destroy_nodes);
 EXPORT_SYMBOL(avl_destroy);
-#endif
+EXPORT_SYMBOL(avl_update_lt);
+EXPORT_SYMBOL(avl_update_gt);
+EXPORT_SYMBOL(avl_update);

diff --git a/zfs/module/icp/Makefile.in b/zfs/module/icp/Makefile.in
index 8ce60de..ce84999 100644
--- a/zfs/module/icp/Makefile.in
+++ b/zfs/module/icp/Makefile.in

@@ -1,44 +1,17 @@
-src = @abs_top_srcdir@/module/icp
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
+icp_include = $(src)/include
+else
+icp_include = $(srctree)/$(src)/include
+endif
 
 MODULE := icp
 
-TARGET_ASM_DIR = @TARGET_ASM_DIR@
-
-ifeq ($(TARGET_ASM_DIR), asm-x86_64)
-ASM_SOURCES := asm-x86_64/aes/aeskey.o
-ASM_SOURCES += asm-x86_64/aes/aes_amd64.o
-ASM_SOURCES += asm-x86_64/aes/aes_aesni.o
-ASM_SOURCES += asm-x86_64/modes/gcm_pclmulqdq.o
-ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o
-ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o
-ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o
-ASM_SOURCES += asm-x86_64/aes/aeskey.o
-ASM_SOURCES += asm-x86_64/aes/aes_amd64.o
-ASM_SOURCES += asm-x86_64/aes/aes_aesni.o
-ASM_SOURCES += asm-x86_64/modes/gcm_pclmulqdq.o
-ASM_SOURCES += asm-x86_64/modes/aesni-gcm-x86_64.o
-ASM_SOURCES += asm-x86_64/modes/ghash-x86_64.o
-ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o
-ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o
-ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o
-
-endif
-
-ifeq ($(TARGET_ASM_DIR), asm-i386)
-ASM_SOURCES :=
-endif
-
-ifeq ($(TARGET_ASM_DIR), asm-generic)
-ASM_SOURCES :=
-endif
-
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-asflags-y := -I@abs_top_srcdir@/module/icp/include
-asflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-ccflags-y := -I@abs_top_srcdir@/module/icp/include
-ccflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+asflags-y := -I$(icp_include)
+ccflags-y := -I$(icp_include)
 
 $(MODULE)-objs += illumos-crypto.o
 $(MODULE)-objs += api/kcf_cipher.o
@@ -54,7 +27,6 @@
 $(MODULE)-objs += spi/kcf_spi.o
 $(MODULE)-objs += io/aes.o
 $(MODULE)-objs += io/edonr_mod.o
-$(MODULE)-objs += io/sha1_mod.o
 $(MODULE)-objs += io/sha2_mod.o
 $(MODULE)-objs += io/skein_mod.o
 $(MODULE)-objs += os/modhash.o
@@ -70,24 +42,32 @@
 $(MODULE)-objs += algs/aes/aes_impl.o
 $(MODULE)-objs += algs/aes/aes_modes.o
 $(MODULE)-objs += algs/edonr/edonr.o
-$(MODULE)-objs += algs/sha1/sha1.o
 $(MODULE)-objs += algs/sha2/sha2.o
-$(MODULE)-objs += algs/sha1/sha1.o
 $(MODULE)-objs += algs/skein/skein.o
 $(MODULE)-objs += algs/skein/skein_block.o
 $(MODULE)-objs += algs/skein/skein_iv.o
-$(MODULE)-objs += $(ASM_SOURCES)
+
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aeskey.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_amd64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_aesni.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/gcm_pclmulqdq.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha256_impl.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha512_impl.o
 
 $(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o
 $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o
 $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o
 
-# Suppress objtool "can't find jump dest instruction at" warnings.  They
-# are caused by the constants which are defined in the text section of the
-# assembly file using .byte instructions (e.g. bswap_mask).  The objtool
-# utility tries to interpret them as opcodes and obviously fails doing so.
+# Suppress objtool "return with modified stack frame" warnings.
 OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
-OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
+
+# Suppress objtool "unsupported stack pointer realignment" warnings. We are
+# not using a DRAP register while aligning the stack to a 64 byte boundary.
+# See #6950 for the reasoning.
+OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
+OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
 
 ICP_DIRS = \
 	api \
@@ -99,13 +79,11 @@
 	algs/aes \
 	algs/edonr \
 	algs/modes \
-	algs/sha1 \
 	algs/sha2 \
 	algs/skein \
 	asm-x86_64 \
 	asm-x86_64/aes \
 	asm-x86_64/modes \
-	asm-x86_64/sha1 \
 	asm-x86_64/sha2 \
 	asm-i386 \
 	asm-generic

diff --git a/zfs/module/icp/algs/aes/aes_impl.c b/zfs/module/icp/algs/aes/aes_impl.c
index 571fdd3..037be0d 100644
--- a/zfs/module/icp/algs/aes/aes_impl.c
+++ b/zfs/module/icp/algs/aes/aes_impl.c

@@ -25,9 +25,9 @@
 #include <sys/zfs_context.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
+#include <sys/simd.h>
 #include <modes/modes.h>
 #include <aes/aes_impl.h>
-#include <linux/simd.h>
 
 /*
  * Initialize AES encryption and decryption key schedules.
@@ -406,7 +406,6 @@
 }
 
 #if defined(_KERNEL) && defined(__linux__)
-#include <linux/mod_compat.h>
 
 static int
 icp_aes_impl_set(const char *val, zfs_kernel_param_t *kp)

diff --git a/zfs/module/icp/algs/aes/aes_impl_aesni.c b/zfs/module/icp/algs/aes/aes_impl_aesni.c
index 222c176..4b5eefd 100644
--- a/zfs/module/icp/algs/aes/aes_impl_aesni.c
+++ b/zfs/module/icp/algs/aes/aes_impl_aesni.c

@@ -24,7 +24,8 @@
 
 #if defined(__x86_64) && defined(HAVE_AES)
 
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
+#include <sys/types.h>
 
 /* These functions are used to execute AES-NI instructions: */
 extern int rijndael_key_setup_enc_intel(uint32_t rk[],

diff --git a/zfs/module/icp/algs/aes/aes_impl_generic.c b/zfs/module/icp/algs/aes/aes_impl_generic.c
index a3b75db..427c096 100644
--- a/zfs/module/icp/algs/aes/aes_impl_generic.c
+++ b/zfs/module/icp/algs/aes/aes_impl_generic.c

@@ -1233,7 +1233,7 @@
 	.encrypt = &aes_generic_encrypt,
 	.decrypt = &aes_generic_decrypt,
 	.is_supported = &aes_generic_will_work,
-#if defined(_LITTLE_ENDIAN)
+#if defined(_ZFS_LITTLE_ENDIAN)
 	.needs_byteswap = B_TRUE,
 #else
 	.needs_byteswap = B_FALSE,

diff --git a/zfs/module/icp/algs/aes/aes_impl_x86-64.c b/zfs/module/icp/algs/aes/aes_impl_x86-64.c
index b4515fa..19f8fd5 100644
--- a/zfs/module/icp/algs/aes/aes_impl_x86-64.c
+++ b/zfs/module/icp/algs/aes/aes_impl_x86-64.c

@@ -24,19 +24,7 @@
 
 #if defined(__x86_64)
 
-#include <linux/simd_x86.h>
-
-/* These functions are used to execute amd64 instructions for AMD or Intel: */
-extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
-	const uint32_t cipherKey[], int keyBits);
-extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
-	const uint32_t cipherKey[], int keyBits);
-extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
-	const uint32_t pt[4], uint32_t ct[4]);
-extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
-	const uint32_t ct[4], uint32_t pt[4]);
-
-
+#include <sys/simd.h>
 #include <aes/aes_impl.h>
 
 /*

diff --git a/zfs/module/icp/algs/edonr/edonr.c b/zfs/module/icp/algs/edonr/edonr.c
index 7c67709..baf8bb8 100644
--- a/zfs/module/icp/algs/edonr/edonr.c
+++ b/zfs/module/icp/algs/edonr/edonr.c

@@ -337,15 +337,17 @@
  *
  * Checksum functions like this one can go over the stack frame size check
  * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024).  We can
- * safely ignore the compiler error since we know that in ZoL, that
+ * safely ignore the compiler error since we know that in OpenZFS, that
  * the function will be called from a worker thread that won't be using
  * much stack.  The only function that goes over the 1k limit is Q512(),
  * which only goes over it by a hair (1248 bytes on ARM32).
  */
 #include <sys/isa_defs.h>	/* for _ILP32 */
-#ifdef _ILP32   /* We're 32-bit, assume small stack frames */
+#if defined(_ILP32)   /* We're 32-bit, assume small stack frames */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
 #endif
+#endif
 
 #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
 static inline size_t
@@ -488,7 +490,7 @@
 		state->hashbitlen = 512;
 		state->bits_processed = 0;
 		state->unprocessed_bits = 0;
-		bcopy(i512p2, hashState224(state)->DoublePipe,
+		bcopy(i512p2, hashState512(state)->DoublePipe,
 		    16 * sizeof (uint64_t));
 		break;
 	}

diff --git a/zfs/module/icp/algs/edonr/edonr_byteorder.h b/zfs/module/icp/algs/edonr/edonr_byteorder.h
index 532dfd7..2b5d482 100644
--- a/zfs/module/icp/algs/edonr/edonr_byteorder.h
+++ b/zfs/module/icp/algs/edonr/edonr_byteorder.h

@@ -52,10 +52,10 @@
 #endif /* __BYTE_ORDER || BYTE_ORDER */
 
 #if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
-#if defined(_BIG_ENDIAN) || defined(_MIPSEB)
+#if defined(_ZFS_BIG_ENDIAN) || defined(_MIPSEB)
 #define	MACHINE_IS_BIG_ENDIAN
 #endif
-#if defined(_LITTLE_ENDIAN) || defined(_MIPSEL)
+#if defined(_ZFS_LITTLE_ENDIAN) || defined(_MIPSEL)
 #define	MACHINE_IS_LITTLE_ENDIAN
 #endif
 #endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */

diff --git a/zfs/module/icp/algs/modes/cbc.c b/zfs/module/icp/algs/modes/cbc.c
index 2cc94ec..85864f5 100644
--- a/zfs/module/icp/algs/modes/cbc.c
+++ b/zfs/module/icp/algs/modes/cbc.c

@@ -60,8 +60,7 @@
 	}
 
 	lastp = (uint8_t *)ctx->cbc_iv;
-	if (out != NULL)
-		crypto_init_ptrs(out, &iov_or_mp, &offset);
+	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
@@ -79,47 +78,28 @@
 			blockp = datap;
 		}
 
-		if (out == NULL) {
-			/*
-			 * XOR the previous cipher block or IV with the
-			 * current clear block.
-			 */
-			xor_block(lastp, blockp);
-			encrypt(ctx->cbc_keysched, blockp, blockp);
+		/*
+		 * XOR the previous cipher block or IV with the
+		 * current clear block.
+		 */
+		xor_block(blockp, lastp);
+		encrypt(ctx->cbc_keysched, lastp, lastp);
+		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+		    &out_data_1_len, &out_data_2, block_size);
 
-			ctx->cbc_lastp = blockp;
-			lastp = blockp;
-
-			if (ctx->cbc_remainder_len > 0) {
-				bcopy(blockp, ctx->cbc_copy_to,
-				    ctx->cbc_remainder_len);
-				bcopy(blockp + ctx->cbc_remainder_len, datap,
-				    need);
-			}
+		/* copy block to where it belongs */
+		if (out_data_1_len == block_size) {
+			copy_block(lastp, out_data_1);
 		} else {
-			/*
-			 * XOR the previous cipher block or IV with the
-			 * current clear block.
-			 */
-			xor_block(blockp, lastp);
-			encrypt(ctx->cbc_keysched, lastp, lastp);
-			crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
-			    &out_data_1_len, &out_data_2, block_size);
-
-			/* copy block to where it belongs */
-			if (out_data_1_len == block_size) {
-				copy_block(lastp, out_data_1);
-			} else {
-				bcopy(lastp, out_data_1, out_data_1_len);
-				if (out_data_2 != NULL) {
-					bcopy(lastp + out_data_1_len,
-					    out_data_2,
-					    block_size - out_data_1_len);
-				}
+			bcopy(lastp, out_data_1, out_data_1_len);
+			if (out_data_2 != NULL) {
+				bcopy(lastp + out_data_1_len,
+				    out_data_2,
+				    block_size - out_data_1_len);
 			}
-			/* update offset */
-			out->cd_offset += block_size;
 		}
+		/* update offset */
+		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->cbc_remainder_len != 0) {
@@ -187,8 +167,7 @@
 	}
 
 	lastp = ctx->cbc_lastp;
-	if (out != NULL)
-		crypto_init_ptrs(out, &iov_or_mp, &offset);
+	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
@@ -209,13 +188,9 @@
 		/* LINTED: pointer alignment */
 		copy_block(blockp, (uint8_t *)OTHER((uint64_t *)lastp, ctx));
 
-		if (out != NULL) {
-			decrypt(ctx->cbc_keysched, blockp,
-			    (uint8_t *)ctx->cbc_remainder);
-			blockp = (uint8_t *)ctx->cbc_remainder;
-		} else {
-			decrypt(ctx->cbc_keysched, blockp, blockp);
-		}
+		decrypt(ctx->cbc_keysched, blockp,
+		    (uint8_t *)ctx->cbc_remainder);
+		blockp = (uint8_t *)ctx->cbc_remainder;
 
 		/*
 		 * XOR the previous cipher block or IV with the
@@ -226,25 +201,18 @@
 		/* LINTED: pointer alignment */
 		lastp = (uint8_t *)OTHER((uint64_t *)lastp, ctx);
 
-		if (out != NULL) {
-			crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
-			    &out_data_1_len, &out_data_2, block_size);
+		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+		    &out_data_1_len, &out_data_2, block_size);
 
-			bcopy(blockp, out_data_1, out_data_1_len);
-			if (out_data_2 != NULL) {
-				bcopy(blockp + out_data_1_len, out_data_2,
-				    block_size - out_data_1_len);
-			}
-
-			/* update offset */
-			out->cd_offset += block_size;
-
-		} else if (ctx->cbc_remainder_len > 0) {
-			/* copy temporary block to where it belongs */
-			bcopy(blockp, ctx->cbc_copy_to, ctx->cbc_remainder_len);
-			bcopy(blockp + ctx->cbc_remainder_len, datap, need);
+		bcopy(blockp, out_data_1, out_data_1_len);
+		if (out_data_2 != NULL) {
+			bcopy(blockp + out_data_1_len, out_data_2,
+			    block_size - out_data_1_len);
 		}
 
+		/* update offset */
+		out->cd_offset += block_size;
+
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->cbc_remainder_len != 0) {
 			datap += need;

diff --git a/zfs/module/icp/algs/modes/ccm.c b/zfs/module/icp/algs/modes/ccm.c
index f4075f5..5d6507c 100644
--- a/zfs/module/icp/algs/modes/ccm.c
+++ b/zfs/module/icp/algs/modes/ccm.c

@@ -68,8 +68,7 @@
 	}
 
 	lastp = (uint8_t *)ctx->ccm_cb;
-	if (out != NULL)
-		crypto_init_ptrs(out, &iov_or_mp, &offset);
+	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	mac_buf = (uint8_t *)ctx->ccm_mac_buf;
 
@@ -108,13 +107,13 @@
 		 * Increment counter. Counter bits are confined
 		 * to the bottom 64 bits of the counter block.
 		 */
-#ifdef _LITTLE_ENDIAN
+#ifdef _ZFS_LITTLE_ENDIAN
 		counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
 		counter = htonll(counter + 1);
 #else
 		counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
 		counter++;
-#endif	/* _LITTLE_ENDIAN */
+#endif	/* _ZFS_LITTLE_ENDIAN */
 		counter &= ctx->ccm_counter_mask;
 		ctx->ccm_cb[1] =
 		    (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
@@ -126,31 +125,22 @@
 
 		ctx->ccm_processed_data_len += block_size;
 
-		if (out == NULL) {
-			if (ctx->ccm_remainder_len > 0) {
-				bcopy(blockp, ctx->ccm_copy_to,
-				    ctx->ccm_remainder_len);
-				bcopy(blockp + ctx->ccm_remainder_len, datap,
-				    need);
-			}
-		} else {
-			crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
-			    &out_data_1_len, &out_data_2, block_size);
+		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+		    &out_data_1_len, &out_data_2, block_size);
 
-			/* copy block to where it belongs */
-			if (out_data_1_len == block_size) {
-				copy_block(lastp, out_data_1);
-			} else {
-				bcopy(lastp, out_data_1, out_data_1_len);
-				if (out_data_2 != NULL) {
-					bcopy(lastp + out_data_1_len,
-					    out_data_2,
-					    block_size - out_data_1_len);
-				}
+		/* copy block to where it belongs */
+		if (out_data_1_len == block_size) {
+			copy_block(lastp, out_data_1);
+		} else {
+			bcopy(lastp, out_data_1, out_data_1_len);
+			if (out_data_2 != NULL) {
+				bcopy(lastp + out_data_1_len,
+				    out_data_2,
+				    block_size - out_data_1_len);
 			}
-			/* update offset */
-			out->cd_offset += block_size;
 		}
+		/* update offset */
+		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ccm_remainder_len != 0) {
@@ -328,7 +318,7 @@
  * This will only deal with decrypting the last block of the input that
  * might not be a multiple of block length.
  */
-void
+static void
 ccm_decrypt_incomplete_block(ccm_ctx_t *ctx,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
 {
@@ -468,13 +458,13 @@
 		 * Increment counter.
 		 * Counter bits are confined to the bottom 64 bits
 		 */
-#ifdef _LITTLE_ENDIAN
+#ifdef _ZFS_LITTLE_ENDIAN
 		counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
 		counter = htonll(counter + 1);
 #else
 		counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
 		counter++;
-#endif	/* _LITTLE_ENDIAN */
+#endif	/* _ZFS_LITTLE_ENDIAN */
 		counter &= ctx->ccm_counter_mask;
 		ctx->ccm_cb[1] =
 		    (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
@@ -583,7 +573,7 @@
 	return (CRYPTO_SUCCESS);
 }
 
-int
+static int
 ccm_validate_args(CK_AES_CCM_PARAMS *ccm_param, boolean_t is_encrypt_init)
 {
 	size_t macSize, nonceSize;
@@ -694,7 +684,7 @@
 		mask |= (1ULL << q);
 	}
 
-#ifdef _LITTLE_ENDIAN
+#ifdef _ZFS_LITTLE_ENDIAN
 	mask = htonll(mask);
 #endif
 	aes_ctx->ccm_counter_mask = mask;
@@ -768,11 +758,7 @@
 	}
 }
 
-/*
- * The following function should be call at encrypt or decrypt init time
- * for AES CCM mode.
- */
-int
+static int
 ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
@@ -856,6 +842,10 @@
 	return (CRYPTO_SUCCESS);
 }
 
+/*
+ * The following function should be call at encrypt or decrypt init time
+ * for AES CCM mode.
+ */
 int
 ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag,
     boolean_t is_encrypt_init, size_t block_size,

diff --git a/zfs/module/icp/algs/modes/ctr.c b/zfs/module/icp/algs/modes/ctr.c
index e3b0e12..0188bdd 100644
--- a/zfs/module/icp/algs/modes/ctr.c
+++ b/zfs/module/icp/algs/modes/ctr.c

@@ -61,8 +61,7 @@
 	}
 
 	lastp = (uint8_t *)ctx->ctr_cb;
-	if (out != NULL)
-		crypto_init_ptrs(out, &iov_or_mp, &offset);
+	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
@@ -111,26 +110,17 @@
 		 */
 		xor_block(blockp, lastp);
 
-		if (out == NULL) {
-			if (ctx->ctr_remainder_len > 0) {
-				bcopy(lastp, ctx->ctr_copy_to,
-				    ctx->ctr_remainder_len);
-				bcopy(lastp + ctx->ctr_remainder_len, datap,
-				    need);
-			}
-		} else {
-			crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
-			    &out_data_1_len, &out_data_2, block_size);
+		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+		    &out_data_1_len, &out_data_2, block_size);
 
-			/* copy block to where it belongs */
-			bcopy(lastp, out_data_1, out_data_1_len);
-			if (out_data_2 != NULL) {
-				bcopy(lastp + out_data_1_len, out_data_2,
-				    block_size - out_data_1_len);
-			}
-			/* update offset */
-			out->cd_offset += block_size;
+		/* copy block to where it belongs */
+		bcopy(lastp, out_data_1, out_data_1_len);
+		if (out_data_2 != NULL) {
+			bcopy(lastp + out_data_1_len, out_data_2,
+			    block_size - out_data_1_len);
 		}
+		/* update offset */
+		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ctr_remainder_len != 0) {

diff --git a/zfs/module/icp/algs/modes/ecb.c b/zfs/module/icp/algs/modes/ecb.c
index 04e6c5e..025f582 100644
--- a/zfs/module/icp/algs/modes/ecb.c
+++ b/zfs/module/icp/algs/modes/ecb.c

@@ -58,8 +58,7 @@
 	}
 
 	lastp = (uint8_t *)ctx->ecb_iv;
-	if (out != NULL)
-		crypto_init_ptrs(out, &iov_or_mp, &offset);
+	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
@@ -77,32 +76,18 @@
 			blockp = datap;
 		}
 
-		if (out == NULL) {
-			cipher(ctx->ecb_keysched, blockp, blockp);
+		cipher(ctx->ecb_keysched, blockp, lastp);
+		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+		    &out_data_1_len, &out_data_2, block_size);
 
-			ctx->ecb_lastp = blockp;
-			lastp = blockp;
-
-			if (ctx->ecb_remainder_len > 0) {
-				bcopy(blockp, ctx->ecb_copy_to,
-				    ctx->ecb_remainder_len);
-				bcopy(blockp + ctx->ecb_remainder_len, datap,
-				    need);
-			}
-		} else {
-			cipher(ctx->ecb_keysched, blockp, lastp);
-			crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
-			    &out_data_1_len, &out_data_2, block_size);
-
-			/* copy block to where it belongs */
-			bcopy(lastp, out_data_1, out_data_1_len);
-			if (out_data_2 != NULL) {
-				bcopy(lastp + out_data_1_len, out_data_2,
-				    block_size - out_data_1_len);
-			}
-			/* update offset */
-			out->cd_offset += block_size;
+		/* copy block to where it belongs */
+		bcopy(lastp, out_data_1, out_data_1_len);
+		if (out_data_2 != NULL) {
+			bcopy(lastp + out_data_1_len, out_data_2,
+			    block_size - out_data_1_len);
 		}
+		/* update offset */
+		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ecb_remainder_len != 0) {

diff --git a/zfs/module/icp/algs/modes/gcm.c b/zfs/module/icp/algs/modes/gcm.c
index 323c0dd..dc8dd92 100644
--- a/zfs/module/icp/algs/modes/gcm.c
+++ b/zfs/module/icp/algs/modes/gcm.c

@@ -28,8 +28,8 @@
 #include <sys/crypto/icp.h>
 #include <sys/crypto/impl.h>
 #include <sys/byteorder.h>
+#include <sys/simd.h>
 #include <modes/gcm_impl.h>
-#include <linux/simd.h>
 #ifdef CAN_USE_GCM_ASM
 #include <aes/aes_impl.h>
 #endif
@@ -59,10 +59,12 @@
 static boolean_t gcm_use_avx = B_FALSE;
 #define	GCM_IMPL_USE_AVX	(*(volatile boolean_t *)&gcm_use_avx)
 
+extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+
 static inline boolean_t gcm_avx_will_work(void);
 static inline void gcm_set_avx(boolean_t);
 static inline boolean_t gcm_toggle_avx(void);
-extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+static inline size_t gcm_simd_get_htab_size(boolean_t);
 
 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t);
@@ -110,13 +112,14 @@
 		    (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 		    length);
 		ctx->gcm_remainder_len += length;
-		ctx->gcm_copy_to = datap;
+		if (ctx->gcm_copy_to == NULL) {
+			ctx->gcm_copy_to = datap;
+		}
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = (uint8_t *)ctx->gcm_cb;
-	if (out != NULL)
-		crypto_init_ptrs(out, &iov_or_mp, &offset);
+	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	gops = gcm_impl_get_ops();
 	do {
@@ -152,39 +155,22 @@
 
 		ctx->gcm_processed_data_len += block_size;
 
-		/*
-		 * The following copies a complete GCM block back to where it
-		 * came from if there was a remainder in the last call and out
-		 * is NULL. That doesn't seem to make sense. So we assert this
-		 * can't happen and leave the code in for reference.
-		 * See https://github.com/zfsonlinux/zfs/issues/9661
-		 */
-		ASSERT(out != NULL);
-		if (out == NULL) {
-			if (ctx->gcm_remainder_len > 0) {
-				bcopy(blockp, ctx->gcm_copy_to,
-				    ctx->gcm_remainder_len);
-				bcopy(blockp + ctx->gcm_remainder_len, datap,
-				    need);
-			}
-		} else {
-			crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
-			    &out_data_1_len, &out_data_2, block_size);
+		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+		    &out_data_1_len, &out_data_2, block_size);
 
-			/* copy block to where it belongs */
-			if (out_data_1_len == block_size) {
-				copy_block(lastp, out_data_1);
-			} else {
-				bcopy(lastp, out_data_1, out_data_1_len);
-				if (out_data_2 != NULL) {
-					bcopy(lastp + out_data_1_len,
-					    out_data_2,
-					    block_size - out_data_1_len);
-				}
+		/* copy block to where it belongs */
+		if (out_data_1_len == block_size) {
+			copy_block(lastp, out_data_1);
+		} else {
+			bcopy(lastp, out_data_1, out_data_1_len);
+			if (out_data_2 != NULL) {
+				bcopy(lastp + out_data_1_len,
+				    out_data_2,
+				    block_size - out_data_1_len);
 			}
-			/* update offset */
-			out->cd_offset += block_size;
 		}
+		/* update offset */
+		out->cd_offset += block_size;
 
 		/* add ciphertext to the hash */
 		GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
@@ -356,11 +342,13 @@
 	if (length > 0) {
 		new_len = ctx->gcm_pt_buf_len + length;
 		new = vmem_alloc(new_len, ctx->gcm_kmflag);
+		if (new == NULL) {
+			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+			ctx->gcm_pt_buf = NULL;
+			return (CRYPTO_HOST_MEMORY);
+		}
 		bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
 		vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
-		if (new == NULL)
-			return (CRYPTO_HOST_MEMORY);
-
 		ctx->gcm_pt_buf = new;
 		ctx->gcm_pt_buf_len = new_len;
 		bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
@@ -532,11 +520,7 @@
 	}
 }
 
-/*
- * The following function is called at encrypt or decrypt init time
- * for AES GCM mode.
- */
-int
+static int
 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
@@ -588,6 +572,9 @@
 }
 
 /*
+ * The following function is called at encrypt or decrypt init time
+ * for AES GCM mode.
+ *
  * Init the GCM context struct. Handle the cycle and avx implementations here.
  */
 int
@@ -644,6 +631,21 @@
 			    (volatile boolean_t *)&gcm_avx_can_use_movbe);
 		}
 	}
+	/* Allocate Htab memory as needed. */
+	if (gcm_ctx->gcm_use_avx == B_TRUE) {
+		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
+
+		if (htab_len == 0) {
+			return (CRYPTO_MECHANISM_PARAM_INVALID);
+		}
+		gcm_ctx->gcm_htab_len = htab_len;
+		gcm_ctx->gcm_Htable =
+		    (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
+
+		if (gcm_ctx->gcm_Htable == NULL) {
+			return (CRYPTO_HOST_MEMORY);
+		}
+	}
 	/* Avx and non avx context initialization differs from here on. */
 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif /* ifdef CAN_USE_GCM_ASM */
@@ -704,6 +706,22 @@
 	if (ks->ops->needs_byteswap == B_TRUE) {
 		gcm_ctx->gcm_use_avx = B_FALSE;
 	}
+	/* Allocate Htab memory as needed. */
+	if (gcm_ctx->gcm_use_avx == B_TRUE) {
+		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
+
+		if (htab_len == 0) {
+			return (CRYPTO_MECHANISM_PARAM_INVALID);
+		}
+		gcm_ctx->gcm_htab_len = htab_len;
+		gcm_ctx->gcm_Htable =
+		    (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
+
+		if (gcm_ctx->gcm_Htable == NULL) {
+			return (CRYPTO_HOST_MEMORY);
+		}
+	}
+
 	/* Avx and non avx context initialization differs from here on. */
 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif	/* ifdef CAN_USE_GCM_ASM */
@@ -780,7 +798,7 @@
  * fallback to the fastest generic implementation.
  */
 const gcm_impl_ops_t *
-gcm_impl_get_ops()
+gcm_impl_get_ops(void)
 {
 	if (!kfpu_allowed())
 		return (&gcm_generic_impl);
@@ -970,7 +988,6 @@
 }
 
 #if defined(_KERNEL) && defined(__linux__)
-#include <linux/mod_compat.h>
 
 static int
 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
@@ -1034,7 +1051,7 @@
 /* Clear the FPU registers since they hold sensitive internal state. */
 #define	clear_fpu_regs() clear_fpu_regs_avx()
 #define	GHASH_AVX(ctx, in, len) \
-    gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \
+    gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
     in, len)
 
 #define	gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
@@ -1052,8 +1069,8 @@
 extern void aes_encrypt_intel(const uint32_t rk[], int nr,
     const uint32_t pt[4], uint32_t ct[4]);
 
-extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]);
-extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2],
+extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
+extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
     const uint8_t *in, size_t len);
 
 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
@@ -1089,8 +1106,20 @@
 	}
 }
 
+static inline size_t
+gcm_simd_get_htab_size(boolean_t simd_mode)
+{
+	switch (simd_mode) {
+	case B_TRUE:
+		return (2 * 6 * 2 * sizeof (uint64_t));
+
+	default:
+		return (0);
+	}
+}
+
 /*
- * Clear senssitve data in the context.
+ * Clear sensitive data in the context.
  *
  * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
  * ctx->gcm_Htable contain the hash sub key which protects authentication.
@@ -1104,7 +1133,6 @@
 {
 	bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
 	bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
-	bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable));
 	bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
 	bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
 }
@@ -1186,13 +1214,6 @@
 		GHASH_AVX(ctx, tmp, block_size);
 		clear_fpu_regs();
 		kfpu_end();
-		/*
-		 * We don't follow gcm_mode_encrypt_contiguous_blocks() here
-		 * but assert that out is not null.
-		 * See gcm_mode_encrypt_contiguous_blocks() above and
-		 * https://github.com/zfsonlinux/zfs/issues/9661
-		 */
-		ASSERT(out != NULL);
 		rv = crypto_put_output_data(tmp, out, block_size);
 		out->cd_offset += block_size;
 		gcm_incr_counter_block(ctx);
@@ -1214,13 +1235,11 @@
 			rv = CRYPTO_FAILED;
 			goto out_nofpu;
 		}
-		if (out != NULL) {
-			rv = crypto_put_output_data(ct_buf, out, chunk_size);
-			if (rv != CRYPTO_SUCCESS) {
-				goto out_nofpu;
-			}
-			out->cd_offset += chunk_size;
+		rv = crypto_put_output_data(ct_buf, out, chunk_size);
+		if (rv != CRYPTO_SUCCESS) {
+			goto out_nofpu;
 		}
+		out->cd_offset += chunk_size;
 		datap += chunk_size;
 		ctx->gcm_processed_data_len += chunk_size;
 	}
@@ -1236,13 +1255,11 @@
 			rv = CRYPTO_FAILED;
 			goto out;
 		}
-		if (out != NULL) {
-			rv = crypto_put_output_data(ct_buf, out, done);
-			if (rv != CRYPTO_SUCCESS) {
-				goto out;
-			}
-			out->cd_offset += done;
+		rv = crypto_put_output_data(ct_buf, out, done);
+		if (rv != CRYPTO_SUCCESS) {
+			goto out;
 		}
+		out->cd_offset += done;
 		ctx->gcm_processed_data_len += done;
 		datap += done;
 		bleft -= done;
@@ -1262,13 +1279,11 @@
 
 		gcm_xor_avx(datap, tmp);
 		GHASH_AVX(ctx, tmp, block_size);
-		if (out != NULL) {
-			rv = crypto_put_output_data(tmp, out, block_size);
-			if (rv != CRYPTO_SUCCESS) {
-				goto out;
-			}
-			out->cd_offset += block_size;
+		rv = crypto_put_output_data(tmp, out, block_size);
+		if (rv != CRYPTO_SUCCESS) {
+			goto out;
 		}
+		out->cd_offset += block_size;
 		gcm_incr_counter_block(ctx);
 		ctx->gcm_processed_data_len += block_size;
 		datap += block_size;
@@ -1384,7 +1399,7 @@
 		}
 		datap += done;
 	}
-	/* Decrypt remainder, which is less then chunk size, in one go. */
+	/* Decrypt remainder, which is less than chunk size, in one go. */
 	kfpu_begin();
 	if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
 		done = aesni_gcm_decrypt(datap, datap, bleft,
@@ -1400,7 +1415,7 @@
 	ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
 
 	/*
-	 * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
+	 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
 	 * decrypt them block by block.
 	 */
 	while (bleft > 0) {

diff --git a/zfs/module/icp/algs/modes/gcm_pclmulqdq.c b/zfs/module/icp/algs/modes/gcm_pclmulqdq.c
index 8a43ba3..0592011 100644
--- a/zfs/module/icp/algs/modes/gcm_pclmulqdq.c
+++ b/zfs/module/icp/algs/modes/gcm_pclmulqdq.c

@@ -24,12 +24,12 @@
 
 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
 
-#include <linux/simd_x86.h>
+#include <sys/types.h>
+#include <sys/simd.h>
 
 /* These functions are used to execute pclmulqdq based assembly methods */
 extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
 
-
 #include <modes/gcm_impl.h>
 
 /*

diff --git a/zfs/module/icp/algs/modes/modes.c b/zfs/module/icp/algs/modes/modes.c
index 1d33c42..59743c7 100644
--- a/zfs/module/icp/algs/modes/modes.c
+++ b/zfs/module/icp/algs/modes/modes.c

@@ -43,17 +43,14 @@
 		break;
 
 	case CRYPTO_DATA_UIO: {
-		uio_t *uiop = out->cd_uio;
-		uintptr_t vec_idx;
+		zfs_uio_t *uiop = out->cd_uio;
+		uint_t vec_idx;
 
 		offset = out->cd_offset;
-		for (vec_idx = 0; vec_idx < uiop->uio_iovcnt &&
-		    offset >= uiop->uio_iov[vec_idx].iov_len;
-		    offset -= uiop->uio_iov[vec_idx++].iov_len)
-			;
+		offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
 
 		*current_offset = offset;
-		*iov_or_mp = (void *)vec_idx;
+		*iov_or_mp = (void *)(uintptr_t)vec_idx;
 		break;
 	}
 	} /* end switch */
@@ -88,34 +85,35 @@
 	}
 
 	case CRYPTO_DATA_UIO: {
-		uio_t *uio = out->cd_uio;
-		iovec_t *iov;
+		zfs_uio_t *uio = out->cd_uio;
 		offset_t offset;
-		uintptr_t vec_idx;
+		uint_t vec_idx;
 		uint8_t *p;
+		uint64_t iov_len;
+		void *iov_base;
 
 		offset = *current_offset;
 		vec_idx = (uintptr_t)(*iov_or_mp);
-		iov = (iovec_t *)&uio->uio_iov[vec_idx];
-		p = (uint8_t *)iov->iov_base + offset;
+		zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
+		p = (uint8_t *)iov_base + offset;
 		*out_data_1 = p;
 
-		if (offset + amt <= iov->iov_len) {
+		if (offset + amt <= iov_len) {
 			/* can fit one block into this iov */
 			*out_data_1_len = amt;
 			*out_data_2 = NULL;
 			*current_offset = offset + amt;
 		} else {
 			/* one block spans two iovecs */
-			*out_data_1_len = iov->iov_len - offset;
-			if (vec_idx == uio->uio_iovcnt)
+			*out_data_1_len = iov_len - offset;
+			if (vec_idx == zfs_uio_iovcnt(uio))
 				return;
 			vec_idx++;
-			iov = (iovec_t *)&uio->uio_iov[vec_idx];
-			*out_data_2 = (uint8_t *)iov->iov_base;
+			zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
+			*out_data_2 = (uint8_t *)iov_base;
 			*current_offset = amt - *out_data_1_len;
 		}
-		*iov_or_mp = (void *)vec_idx;
+		*iov_or_mp = (void *)(uintptr_t)vec_idx;
 		break;
 	}
 	} /* end switch */
@@ -154,6 +152,14 @@
 			vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf,
 			    ((gcm_ctx_t *)ctx)->gcm_pt_buf_len);
 
+#ifdef CAN_USE_GCM_ASM
+		if (((gcm_ctx_t *)ctx)->gcm_Htable != NULL) {
+			gcm_ctx_t *gcm_ctx = (gcm_ctx_t *)ctx;
+			bzero(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len);
+			kmem_free(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len);
+		}
+#endif
+
 		kmem_free(ctx, sizeof (gcm_ctx_t));
 	}
 }

diff --git a/zfs/module/icp/algs/sha1/sha1.c b/zfs/module/icp/algs/sha1/sha1.c
deleted file mode 100644
index 7f28b37..0000000
--- a/zfs/module/icp/algs/sha1/sha1.c
+++ /dev/null

@@ -1,838 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * The basic framework for this code came from the reference
- * implementation for MD5.  That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
- * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
- * Not as fast as one would like -- further optimizations are encouraged
- * and appreciated.
- */
-
-#include <sys/zfs_context.h>
-#include <sha1/sha1.h>
-#include <sha1/sha1_consts.h>
-
-#ifdef _LITTLE_ENDIAN
-#include <sys/byteorder.h>
-#define	HAVE_HTONL
-#endif
-
-#define	_RESTRICT_KYWD
-
-static void Encode(uint8_t *, const uint32_t *, size_t);
-
-#if	defined(__sparc)
-
-#define	SHA1_TRANSFORM(ctx, in) \
-	SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
-		(ctx)->state[3], (ctx)->state[4], (ctx), (in))
-
-static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
-	SHA1_CTX *, const uint8_t *);
-
-#elif	defined(__amd64)
-
-#define	SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
-#define	SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
-		(in), (num))
-
-void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
-
-#else
-
-#define	SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
-
-static void SHA1Transform(SHA1_CTX *, const uint8_t *);
-
-#endif
-
-
-static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
-
-/*
- * F, G, and H are the basic SHA1 functions.
- */
-#define	F(b, c, d)	(((b) & (c)) | ((~b) & (d)))
-#define	G(b, c, d)	((b) ^ (c) ^ (d))
-#define	H(b, c, d)	(((b) & (c)) | (((b)|(c)) & (d)))
-
-/*
- * ROTATE_LEFT rotates x left n bits.
- */
-
-#if	defined(__GNUC__) && defined(_LP64)
-static __inline__ uint64_t
-ROTATE_LEFT(uint64_t value, uint32_t n)
-{
-	uint32_t t32;
-
-	t32 = (uint32_t)value;
-	return ((t32 << n) | (t32 >> (32 - n)));
-}
-
-#else
-
-#define	ROTATE_LEFT(x, n)	\
-	(((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
-
-#endif
-
-
-/*
- * SHA1Init()
- *
- * purpose: initializes the sha1 context and begins and sha1 digest operation
- *   input: SHA1_CTX *	: the context to initializes.
- *  output: void
- */
-
-void
-SHA1Init(SHA1_CTX *ctx)
-{
-	ctx->count[0] = ctx->count[1] = 0;
-
-	/*
-	 * load magic initialization constants. Tell lint
-	 * that these constants are unsigned by using U.
-	 */
-
-	ctx->state[0] = 0x67452301U;
-	ctx->state[1] = 0xefcdab89U;
-	ctx->state[2] = 0x98badcfeU;
-	ctx->state[3] = 0x10325476U;
-	ctx->state[4] = 0xc3d2e1f0U;
-}
-
-void
-SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
-{
-	uint32_t i, buf_index, buf_len;
-	const uint8_t *input = inptr;
-#if defined(__amd64)
-	uint32_t	block_count;
-#endif	/* __amd64 */
-
-	/* check for noop */
-	if (input_len == 0)
-		return;
-
-	/* compute number of bytes mod 64 */
-	buf_index = (ctx->count[1] >> 3) & 0x3F;
-
-	/* update number of bits */
-	if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
-		ctx->count[0]++;
-
-	ctx->count[0] += (input_len >> 29);
-
-	buf_len = 64 - buf_index;
-
-	/* transform as many times as possible */
-	i = 0;
-	if (input_len >= buf_len) {
-
-		/*
-		 * general optimization:
-		 *
-		 * only do initial bcopy() and SHA1Transform() if
-		 * buf_index != 0.  if buf_index == 0, we're just
-		 * wasting our time doing the bcopy() since there
-		 * wasn't any data left over from a previous call to
-		 * SHA1Update().
-		 */
-
-		if (buf_index) {
-			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
-			SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
-			i = buf_len;
-		}
-
-#if !defined(__amd64)
-		for (; i + 63 < input_len; i += 64)
-			SHA1_TRANSFORM(ctx, &input[i]);
-#else
-		block_count = (input_len - i) >> 6;
-		if (block_count > 0) {
-			SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
-			i += block_count << 6;
-		}
-#endif	/* !__amd64 */
-
-		/*
-		 * general optimization:
-		 *
-		 * if i and input_len are the same, return now instead
-		 * of calling bcopy(), since the bcopy() in this case
-		 * will be an expensive nop.
-		 */
-
-		if (input_len == i)
-			return;
-
-		buf_index = 0;
-	}
-
-	/* buffer remaining input */
-	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
-}
-
-/*
- * SHA1Final()
- *
- * purpose: ends an sha1 digest operation, finalizing the message digest and
- *          zeroing the context.
- *   input: uchar_t *	: A buffer to store the digest.
- *			: The function actually uses void* because many
- *			: callers pass things other than uchar_t here.
- *          SHA1_CTX *  : the context to finalize, save, and zero
- *  output: void
- */
-
-void
-SHA1Final(void *digest, SHA1_CTX *ctx)
-{
-	uint8_t		bitcount_be[sizeof (ctx->count)];
-	uint32_t	index = (ctx->count[1] >> 3) & 0x3f;
-
-	/* store bit count, big endian */
-	Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
-
-	/* pad out to 56 mod 64 */
-	SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
-
-	/* append length (before padding) */
-	SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
-
-	/* store state in digest */
-	Encode(digest, ctx->state, sizeof (ctx->state));
-
-	/* zeroize sensitive information */
-	bzero(ctx, sizeof (*ctx));
-}
-
-
-#if !defined(__amd64)
-
-typedef uint32_t sha1word;
-
-/*
- * sparc optimization:
- *
- * on the sparc, we can load big endian 32-bit data easily.  note that
- * special care must be taken to ensure the address is 32-bit aligned.
- * in the interest of speed, we don't check to make sure, since
- * careful programming can guarantee this for us.
- */
-
-#if	defined(_BIG_ENDIAN)
-#define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
-
-#elif	defined(HAVE_HTONL)
-#define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
-
-#else
-/* little endian -- will work on big endian, but slowly */
-#define	LOAD_BIG_32(addr)	\
-	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
-#endif	/* _BIG_ENDIAN */
-
-/*
- * SHA1Transform()
- */
-#if	defined(W_ARRAY)
-#define	W(n) w[n]
-#else	/* !defined(W_ARRAY) */
-#define	W(n) w_ ## n
-#endif	/* !defined(W_ARRAY) */
-
-#if	defined(__sparc)
-
-
-/*
- * sparc register window optimization:
- *
- * `a', `b', `c', `d', and `e' are passed into SHA1Transform
- * explicitly since it increases the number of registers available to
- * the compiler.  under this scheme, these variables can be held in
- * %i0 - %i4, which leaves more local and out registers available.
- *
- * purpose: sha1 transformation -- updates the digest based on `block'
- *   input: uint32_t	: bytes  1 -  4 of the digest
- *          uint32_t	: bytes  5 -  8 of the digest
- *          uint32_t	: bytes  9 - 12 of the digest
- *          uint32_t	: bytes 12 - 16 of the digest
- *          uint32_t	: bytes 16 - 20 of the digest
- *          SHA1_CTX *	: the context to update
- *          uint8_t [64]: the block to use to update the digest
- *  output: void
- */
-
-
-void
-SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
-    SHA1_CTX *ctx, const uint8_t blk[64])
-{
-	/*
-	 * sparc optimization:
-	 *
-	 * while it is somewhat counter-intuitive, on sparc, it is
-	 * more efficient to place all the constants used in this
-	 * function in an array and load the values out of the array
-	 * than to manually load the constants.  this is because
-	 * setting a register to a 32-bit value takes two ops in most
-	 * cases: a `sethi' and an `or', but loading a 32-bit value
-	 * from memory only takes one `ld' (or `lduw' on v9).  while
-	 * this increases memory usage, the compiler can find enough
-	 * other things to do while waiting to keep the pipeline does
-	 * not stall.  additionally, it is likely that many of these
-	 * constants are cached so that later accesses do not even go
-	 * out to the bus.
-	 *
-	 * this array is declared `static' to keep the compiler from
-	 * having to bcopy() this array onto the stack frame of
-	 * SHA1Transform() each time it is called -- which is
-	 * unacceptably expensive.
-	 *
-	 * the `const' is to ensure that callers are good citizens and
-	 * do not try to munge the array.  since these routines are
-	 * going to be called from inside multithreaded kernelland,
-	 * this is a good safety check. -- `sha1_consts' will end up in
-	 * .rodata.
-	 *
-	 * unfortunately, loading from an array in this manner hurts
-	 * performance under Intel.  So, there is a macro,
-	 * SHA1_CONST(), used in SHA1Transform(), that either expands to
-	 * a reference to this array, or to the actual constant,
-	 * depending on what platform this code is compiled for.
-	 */
-
-
-	static const uint32_t sha1_consts[] = {
-		SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
-	};
-
-
-	/*
-	 * general optimization:
-	 *
-	 * use individual integers instead of using an array.  this is a
-	 * win, although the amount it wins by seems to vary quite a bit.
-	 */
-
-
-	uint32_t	w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
-	uint32_t	w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
-
-
-	/*
-	 * sparc optimization:
-	 *
-	 * if `block' is already aligned on a 4-byte boundary, use
-	 * LOAD_BIG_32() directly.  otherwise, bcopy() into a
-	 * buffer that *is* aligned on a 4-byte boundary and then do
-	 * the LOAD_BIG_32() on that buffer.  benchmarks have shown
-	 * that using the bcopy() is better than loading the bytes
-	 * individually and doing the endian-swap by hand.
-	 *
-	 * even though it's quite tempting to assign to do:
-	 *
-	 * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
-	 *
-	 * and only have one set of LOAD_BIG_32()'s, the compiler
-	 * *does not* like that, so please resist the urge.
-	 */
-
-
-	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
-		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
-		w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
-		w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
-		w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
-		w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
-		w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
-		w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
-		w_9  = LOAD_BIG_32(ctx->buf_un.buf32 +  9);
-		w_8  = LOAD_BIG_32(ctx->buf_un.buf32 +  8);
-		w_7  = LOAD_BIG_32(ctx->buf_un.buf32 +  7);
-		w_6  = LOAD_BIG_32(ctx->buf_un.buf32 +  6);
-		w_5  = LOAD_BIG_32(ctx->buf_un.buf32 +  5);
-		w_4  = LOAD_BIG_32(ctx->buf_un.buf32 +  4);
-		w_3  = LOAD_BIG_32(ctx->buf_un.buf32 +  3);
-		w_2  = LOAD_BIG_32(ctx->buf_un.buf32 +  2);
-		w_1  = LOAD_BIG_32(ctx->buf_un.buf32 +  1);
-		w_0  = LOAD_BIG_32(ctx->buf_un.buf32 +  0);
-	} else {
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_15 = LOAD_BIG_32(blk + 60);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_14 = LOAD_BIG_32(blk + 56);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_13 = LOAD_BIG_32(blk + 52);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_12 = LOAD_BIG_32(blk + 48);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_11 = LOAD_BIG_32(blk + 44);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_10 = LOAD_BIG_32(blk + 40);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_9  = LOAD_BIG_32(blk + 36);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_8  = LOAD_BIG_32(blk + 32);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_7  = LOAD_BIG_32(blk + 28);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_6  = LOAD_BIG_32(blk + 24);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_5  = LOAD_BIG_32(blk + 20);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_4  = LOAD_BIG_32(blk + 16);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_3  = LOAD_BIG_32(blk + 12);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_2  = LOAD_BIG_32(blk +  8);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_1  = LOAD_BIG_32(blk +  4);
-		/* LINTED E_BAD_PTR_CAST_ALIGN */
-		w_0  = LOAD_BIG_32(blk +  0);
-	}
-#else	/* !defined(__sparc) */
-
-void /* CSTYLED */
-SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
-{
-	/* CSTYLED */
-	sha1word a = ctx->state[0];
-	sha1word b = ctx->state[1];
-	sha1word c = ctx->state[2];
-	sha1word d = ctx->state[3];
-	sha1word e = ctx->state[4];
-
-#if	defined(W_ARRAY)
-	sha1word	w[16];
-#else	/* !defined(W_ARRAY) */
-	sha1word	w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
-	sha1word	w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
-#endif	/* !defined(W_ARRAY) */
-
-	W(0)  = LOAD_BIG_32((void *)(blk +  0));
-	W(1)  = LOAD_BIG_32((void *)(blk +  4));
-	W(2)  = LOAD_BIG_32((void *)(blk +  8));
-	W(3)  = LOAD_BIG_32((void *)(blk + 12));
-	W(4)  = LOAD_BIG_32((void *)(blk + 16));
-	W(5)  = LOAD_BIG_32((void *)(blk + 20));
-	W(6)  = LOAD_BIG_32((void *)(blk + 24));
-	W(7)  = LOAD_BIG_32((void *)(blk + 28));
-	W(8)  = LOAD_BIG_32((void *)(blk + 32));
-	W(9)  = LOAD_BIG_32((void *)(blk + 36));
-	W(10) = LOAD_BIG_32((void *)(blk + 40));
-	W(11) = LOAD_BIG_32((void *)(blk + 44));
-	W(12) = LOAD_BIG_32((void *)(blk + 48));
-	W(13) = LOAD_BIG_32((void *)(blk + 52));
-	W(14) = LOAD_BIG_32((void *)(blk + 56));
-	W(15) = LOAD_BIG_32((void *)(blk + 60));
-
-#endif /* !defined(__sparc) */
-
-	/*
-	 * general optimization:
-	 *
-	 * even though this approach is described in the standard as
-	 * being slower algorithmically, it is 30-40% faster than the
-	 * "faster" version under SPARC, because this version has more
-	 * of the constraints specified at compile-time and uses fewer
-	 * variables (and therefore has better register utilization)
-	 * than its "speedier" brother.  (i've tried both, trust me)
-	 *
-	 * for either method given in the spec, there is an "assignment"
-	 * phase where the following takes place:
-	 *
-	 *	tmp = (main_computation);
-	 *	e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
-	 *
-	 * we can make the algorithm go faster by not doing this work,
-	 * but just pretending that `d' is now `e', etc. this works
-	 * really well and obviates the need for a temporary variable.
-	 * however, we still explicitly perform the rotate action,
-	 * since it is cheaper on SPARC to do it once than to have to
-	 * do it over and over again.
-	 */
-
-	/* round 1 */
-	e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
-	b = ROTATE_LEFT(b, 30);
-
-	d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
-	a = ROTATE_LEFT(a, 30);
-
-	c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
-	e = ROTATE_LEFT(e, 30);
-
-	b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
-	d = ROTATE_LEFT(d, 30);
-
-	a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
-	c = ROTATE_LEFT(c, 30);
-
-	e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
-	b = ROTATE_LEFT(b, 30);
-
-	d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
-	a = ROTATE_LEFT(a, 30);
-
-	c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
-	e = ROTATE_LEFT(e, 30);
-
-	b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
-	d = ROTATE_LEFT(d, 30);
-
-	a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
-	c = ROTATE_LEFT(c, 30);
-
-	e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
-	b = ROTATE_LEFT(b, 30);
-
-	d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
-	a = ROTATE_LEFT(a, 30);
-
-	c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
-	e = ROTATE_LEFT(e, 30);
-
-	b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
-	d = ROTATE_LEFT(d, 30);
-
-	a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
-	c = ROTATE_LEFT(c, 30);
-
-	e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
-	b = ROTATE_LEFT(b, 30);
-
-	W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);		/* 16 */
-	d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
-	a = ROTATE_LEFT(a, 30);
-
-	W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);		/* 17 */
-	c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
-	e = ROTATE_LEFT(e, 30);
-
-	W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);	/* 18 */
-	b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
-	d = ROTATE_LEFT(d, 30);
-
-	W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);		/* 19 */
-	a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
-	c = ROTATE_LEFT(c, 30);
-
-	/* round 2 */
-	W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);		/* 20 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
-	b = ROTATE_LEFT(b, 30);
-
-	W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);		/* 21 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
-	a = ROTATE_LEFT(a, 30);
-
-	W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);		/* 22 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
-	e = ROTATE_LEFT(e, 30);
-
-	W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);		/* 23 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
-	d = ROTATE_LEFT(d, 30);
-
-	W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);		/* 24 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
-	c = ROTATE_LEFT(c, 30);
-
-	W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);		/* 25 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
-	b = ROTATE_LEFT(b, 30);
-
-	W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);	/* 26 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
-	a = ROTATE_LEFT(a, 30);
-
-	W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);	/* 27 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
-	e = ROTATE_LEFT(e, 30);
-
-	W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);	/* 28 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
-	d = ROTATE_LEFT(d, 30);
-
-	W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1);	/* 29 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
-	c = ROTATE_LEFT(c, 30);
-
-	W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);	/* 30 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
-	b = ROTATE_LEFT(b, 30);
-
-	W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);	/* 31 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
-	a = ROTATE_LEFT(a, 30);
-
-	W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);		/* 32 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
-	e = ROTATE_LEFT(e, 30);
-
-	W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);		/* 33 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
-	d = ROTATE_LEFT(d, 30);
-
-	W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);	/* 34 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
-	c = ROTATE_LEFT(c, 30);
-
-	W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);		/* 35 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
-	b = ROTATE_LEFT(b, 30);
-
-	W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);		/* 36 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
-	a = ROTATE_LEFT(a, 30);
-
-	W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);		/* 37 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
-	e = ROTATE_LEFT(e, 30);
-
-	W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);		/* 38 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
-	d = ROTATE_LEFT(d, 30);
-
-	W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);		/* 39 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
-	c = ROTATE_LEFT(c, 30);
-
-	/* round 3 */
-	W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);		/* 40 */
-	e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
-	b = ROTATE_LEFT(b, 30);
-
-	W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);		/* 41 */
-	d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
-	a = ROTATE_LEFT(a, 30);
-
-	W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);	/* 42 */
-	c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
-	e = ROTATE_LEFT(e, 30);
-
-	W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);	/* 43 */
-	b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
-	d = ROTATE_LEFT(d, 30);
-
-	W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);	/* 44 */
-	a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
-	c = ROTATE_LEFT(c, 30);
-
-	W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1);	/* 45 */
-	e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
-	b = ROTATE_LEFT(b, 30);
-
-	W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);	/* 46 */
-	d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
-	a = ROTATE_LEFT(a, 30);
-
-	W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);	/* 47 */
-	c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
-	e = ROTATE_LEFT(e, 30);
-
-	W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);		/* 48 */
-	b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
-	d = ROTATE_LEFT(d, 30);
-
-	W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);		/* 49 */
-	a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
-	c = ROTATE_LEFT(c, 30);
-
-	W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);	/* 50 */
-	e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
-	b = ROTATE_LEFT(b, 30);
-
-	W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);		/* 51 */
-	d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
-	a = ROTATE_LEFT(a, 30);
-
-	W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);		/* 52 */
-	c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
-	e = ROTATE_LEFT(e, 30);
-
-	W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);		/* 53 */
-	b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
-	d = ROTATE_LEFT(d, 30);
-
-	W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);		/* 54 */
-	a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
-	c = ROTATE_LEFT(c, 30);
-
-	W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);		/* 55 */
-	e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
-	b = ROTATE_LEFT(b, 30);
-
-	W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);		/* 56 */
-	d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
-	a = ROTATE_LEFT(a, 30);
-
-	W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);		/* 57 */
-	c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
-	e = ROTATE_LEFT(e, 30);
-
-	W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);	/* 58 */
-	b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
-	d = ROTATE_LEFT(d, 30);
-
-	W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);	/* 59 */
-	a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
-	c = ROTATE_LEFT(c, 30);
-
-	/* round 4 */
-	W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);	/* 60 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
-	b = ROTATE_LEFT(b, 30);
-
-	W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1);	/* 61 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
-	a = ROTATE_LEFT(a, 30);
-
-	W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);	/* 62 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
-	e = ROTATE_LEFT(e, 30);
-
-	W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);	/* 63 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
-	d = ROTATE_LEFT(d, 30);
-
-	W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);		/* 64 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
-	c = ROTATE_LEFT(c, 30);
-
-	W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);		/* 65 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
-	b = ROTATE_LEFT(b, 30);
-
-	W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);	/* 66 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
-	a = ROTATE_LEFT(a, 30);
-
-	W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);		/* 67 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
-	e = ROTATE_LEFT(e, 30);
-
-	W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);		/* 68 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
-	d = ROTATE_LEFT(d, 30);
-
-	W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);		/* 69 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
-	c = ROTATE_LEFT(c, 30);
-
-	W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);		/* 70 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
-	b = ROTATE_LEFT(b, 30);
-
-	W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);		/* 71 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
-	a = ROTATE_LEFT(a, 30);
-
-	W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);		/* 72 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
-	e = ROTATE_LEFT(e, 30);
-
-	W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);		/* 73 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
-	d = ROTATE_LEFT(d, 30);
-
-	W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);	/* 74 */
-	a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
-	c = ROTATE_LEFT(c, 30);
-
-	W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);	/* 75 */
-	e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
-	b = ROTATE_LEFT(b, 30);
-
-	W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);	/* 76 */
-	d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
-	a = ROTATE_LEFT(a, 30);
-
-	W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1);	/* 77 */
-	c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
-	e = ROTATE_LEFT(e, 30);
-
-	W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);	/* 78 */
-	b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
-	d = ROTATE_LEFT(d, 30);
-
-	W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);	/* 79 */
-
-	ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
-	    SHA1_CONST(3);
-	ctx->state[1] += b;
-	ctx->state[2] += ROTATE_LEFT(c, 30);
-	ctx->state[3] += d;
-	ctx->state[4] += e;
-
-	/* zeroize sensitive information */
-	W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
-	W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
-}
-#endif	/* !__amd64 */
-
-
-/*
- * Encode()
- *
- * purpose: to convert a list of numbers from little endian to big endian
- *   input: uint8_t *	: place to store the converted big endian numbers
- *	    uint32_t *	: place to get numbers to convert from
- *          size_t	: the length of the input in bytes
- *  output: void
- */
-
-static void
-Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
-    size_t len)
-{
-	size_t		i, j;
-
-#if defined(__sparc)
-	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
-		for (i = 0, j = 0; j < len; i++, j += 4) {
-			/* LINTED E_BAD_PTR_CAST_ALIGN */
-			*((uint32_t *)(output + j)) = input[i];
-		}
-	} else {
-#endif /* little endian -- will work on big endian, but slowly */
-
-		for (i = 0, j = 0; j < len; i++, j += 4) {
-			output[j]	= (input[i] >> 24) & 0xff;
-			output[j + 1]	= (input[i] >> 16) & 0xff;
-			output[j + 2]	= (input[i] >>  8) & 0xff;
-			output[j + 3]	= input[i] & 0xff;
-		}
-#if defined(__sparc)
-	}
-#endif
-}

diff --git a/zfs/module/icp/algs/sha2/sha2.c b/zfs/module/icp/algs/sha2/sha2.c
index 05a2e6a..75f6a3c 100644
--- a/zfs/module/icp/algs/sha2/sha2.c
+++ b/zfs/module/icp/algs/sha2/sha2.c

@@ -43,7 +43,7 @@
 
 #define	_RESTRICT_KYWD
 
-#ifdef _LITTLE_ENDIAN
+#ifdef _ZFS_LITTLE_ENDIAN
 #include <sys/byteorder.h>
 #define	HAVE_HTONL
 #endif
@@ -123,7 +123,7 @@
  * careful programming can guarantee this for us.
  */
 
-#if	defined(_BIG_ENDIAN)
+#if	defined(_ZFS_BIG_ENDIAN)
 #define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
 #define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
 

diff --git a/zfs/module/icp/algs/skein/skein.c b/zfs/module/icp/algs/skein/skein.c
index 0187f7b..83fe842 100644
--- a/zfs/module/icp/algs/skein/skein.c
+++ b/zfs/module/icp/algs/skein/skein.c

@@ -5,21 +5,11 @@
  */
 /* Copyright 2013 Doug Whiting. This code is released to the public domain. */
 
-#define	SKEIN_PORT_CODE		/* instantiate any code in skein_port.h */
-
 #include <sys/sysmacros.h>
 #include <sys/types.h>
 #include <sys/skein.h>		/* get the Skein API definitions   */
 #include "skein_impl.h"		/* get internal definitions */
 
-/* External function to process blkCnt (nonzero) full block(s) of data. */
-void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
-    size_t blkCnt, size_t byteCntAdd);
-void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
-    size_t blkCnt, size_t byteCntAdd);
-void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
-    size_t blkCnt, size_t byteCntAdd);
-
 /* 256-bit Skein */
 /* init the context for a straight hashing operation  */
 int

diff --git a/zfs/module/icp/algs/skein/skein_block.c b/zfs/module/icp/algs/skein/skein_block.c
index 7ba165a..3ad52da 100644
--- a/zfs/module/icp/algs/skein/skein_block.c
+++ b/zfs/module/icp/algs/skein/skein_block.c

@@ -30,7 +30,9 @@
  * the #pragma here to ignore the warning.
  */
 #if defined(_ILP32) || defined(__powerpc)	/* Assume small stack */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
 /*
  * We're running on 32-bit, don't unroll loops to save stack frame space
  *

diff --git a/zfs/module/icp/algs/skein/skein_impl.h b/zfs/module/icp/algs/skein/skein_impl.h
index ea834e6..205a517 100644
--- a/zfs/module/icp/algs/skein/skein_impl.h
+++ b/zfs/module/icp/algs/skein/skein_impl.h

@@ -281,4 +281,12 @@
 extern const uint64_t SKEIN1024_IV_512[];
 extern const uint64_t SKEIN1024_IV_1024[];
 
+/* Functions to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd);
+
 #endif	/* _SKEIN_IMPL_H_ */

diff --git a/zfs/module/icp/algs/skein/skein_port.h b/zfs/module/icp/algs/skein/skein_port.h
index 4fe268b..ce43530 100644
--- a/zfs/module/icp/algs/skein/skein_port.h
+++ b/zfs/module/icp/algs/skein/skein_port.h

@@ -44,19 +44,16 @@
 
 #include <sys/isa_defs.h>	/* get endianness selection */
 
-#define	PLATFORM_MUST_ALIGN	_ALIGNMENT_REQUIRED
-#if	defined(_BIG_ENDIAN)
+#if	defined(_ZFS_BIG_ENDIAN)
 /* here for big-endian CPUs */
 #define	SKEIN_NEED_SWAP   (1)
 #else
 /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
 #define	SKEIN_NEED_SWAP   (0)
-#if	PLATFORM_MUST_ALIGN == 0	/* ok to use "fast" versions? */
 #define	Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt)
 #define	Skein_Get64_LSB_First(dst64, src08, wCnt) \
 	bcopy(src08, dst64, 8 * (wCnt))
 #endif
-#endif
 
 #endif				/* ifndef SKEIN_NEED_SWAP */
 
@@ -80,9 +77,8 @@
 #endif				/* ifndef Skein_Swap64 */
 
 #ifndef	Skein_Put64_LSB_First
-void
+static inline void
 Skein_Put64_LSB_First(uint8_t *dst, const uint64_t *src, size_t bCnt)
-#ifdef	SKEIN_PORT_CODE		/* instantiate the function code here? */
 {
 	/*
 	 * this version is fully portable (big-endian or little-endian),
@@ -93,15 +89,11 @@
 	for (n = 0; n < bCnt; n++)
 		dst[n] = (uint8_t)(src[n >> 3] >> (8 * (n & 7)));
 }
-#else
-;				/* output only the function prototype */
-#endif
 #endif				/* ifndef Skein_Put64_LSB_First */
 
 #ifndef	Skein_Get64_LSB_First
-void
+static inline void
 Skein_Get64_LSB_First(uint64_t *dst, const uint8_t *src, size_t wCnt)
-#ifdef	SKEIN_PORT_CODE		/* instantiate the function code here? */
 {
 	/*
 	 * this version is fully portable (big-endian or little-endian),
@@ -119,9 +111,6 @@
 		    (((uint64_t)src[n + 6]) << 48) +
 		    (((uint64_t)src[n + 7]) << 56);
 }
-#else
-;				/* output only the function prototype */
-#endif
 #endif				/* ifndef Skein_Get64_LSB_First */
 
 #endif	/* _SKEIN_PORT_H_ */

diff --git a/zfs/module/icp/api/kcf_cipher.c b/zfs/module/icp/api/kcf_cipher.c
index d66c1aa..d6aa481 100644
--- a/zfs/module/icp/api/kcf_cipher.c
+++ b/zfs/module/icp/api/kcf_cipher.c

@@ -30,9 +30,6 @@
 #include <sys/crypto/spi.h>
 #include <sys/crypto/sched_impl.h>
 
-#define	CRYPTO_OPS_OFFSET(f)		offsetof(crypto_ops_t, co_##f)
-#define	CRYPTO_CIPHER_OFFSET(f)		offsetof(crypto_cipher_ops_t, f)
-
 /*
  * Encryption and decryption routines.
  */

diff --git a/zfs/module/icp/api/kcf_digest.c b/zfs/module/icp/api/kcf_digest.c
index 87090fd..aa68d69 100644
--- a/zfs/module/icp/api/kcf_digest.c
+++ b/zfs/module/icp/api/kcf_digest.c

@@ -30,9 +30,6 @@
 #include <sys/crypto/spi.h>
 #include <sys/crypto/sched_impl.h>
 
-#define	CRYPTO_OPS_OFFSET(f)		offsetof(crypto_ops_t, co_##f)
-#define	CRYPTO_DIGEST_OFFSET(f)		offsetof(crypto_digest_ops_t, f)
-
 /*
  * Message digest routines
  */

diff --git a/zfs/module/icp/api/kcf_mac.c b/zfs/module/icp/api/kcf_mac.c
index 21ab94f..a7722d8 100644
--- a/zfs/module/icp/api/kcf_mac.c
+++ b/zfs/module/icp/api/kcf_mac.c

@@ -30,9 +30,6 @@
 #include <sys/crypto/spi.h>
 #include <sys/crypto/sched_impl.h>
 
-#define	CRYPTO_OPS_OFFSET(f)		offsetof(crypto_ops_t, co_##f)
-#define	CRYPTO_MAC_OFFSET(f)		offsetof(crypto_mac_ops_t, f)
-
 /*
  * Message authentication codes routines.
  */

diff --git a/zfs/module/icp/asm-x86_64/aes/aes_aesni.S b/zfs/module/icp/asm-x86_64/aes/aes_aesni.S
index 4a80c62..b0d9f03 100644
--- a/zfs/module/icp/asm-x86_64/aes/aes_aesni.S
+++ b/zfs/module/icp/asm-x86_64/aes/aes_aesni.S

@@ -208,7 +208,7 @@
 	pxor	%xmm1, %xmm0
 	movups	%xmm0, (%rcx)
 	add	$0x10, %rcx
-	ret
+	RET
 	nop
 SET_SIZE(_key_expansion_128)
 SET_SIZE(_key_expansion_256a)
@@ -236,7 +236,7 @@
 	shufps	$0b01001110, %xmm2, %xmm1
 	movups	%xmm1, 0x10(%rcx)
 	add	$0x20, %rcx
-	ret
+	RET
 SET_SIZE(_key_expansion_192a)
 
 
@@ -257,7 +257,7 @@
 
 	movups	%xmm0, (%rcx)
 	add	$0x10, %rcx
-	ret
+	RET
 SET_SIZE(_key_expansion_192b)
 
 
@@ -271,7 +271,7 @@
 	pxor	%xmm1, %xmm2
 	movups	%xmm2, (%rcx)
 	add	$0x10, %rcx
-	ret
+	RET
 SET_SIZE(_key_expansion_256b)
 
 
@@ -376,7 +376,7 @@
 	mov	$14, %rax			// return # rounds = 14
 #endif
 	FRAME_END
-	ret
+	RET
 
 .align 4
 .Lenc_key192:
@@ -413,7 +413,7 @@
 	mov	$12, %rax			// return # rounds = 12
 #endif
 	FRAME_END
-	ret
+	RET
 
 .align 4
 .Lenc_key128:
@@ -453,13 +453,13 @@
 	mov	$10, %rax			// return # rounds = 10
 #endif
 	FRAME_END
-	ret
+	RET
 
 .Lenc_key_invalid_param:
 #ifdef	OPENSSL_INTERFACE
 	mov	$-1, %rax	// user key or AES key pointer is NULL
 	FRAME_END
-	ret
+	RET
 #else
 	/* FALLTHROUGH */
 #endif	/* OPENSSL_INTERFACE */
@@ -471,7 +471,7 @@
 	xor	%rax, %rax	// a key pointer is NULL or invalid keysize
 #endif	/* OPENSSL_INTERFACE */
 	FRAME_END
-	ret
+	RET
 	SET_SIZE(rijndael_key_setup_enc_intel)
 
 
@@ -548,7 +548,7 @@
 	// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
 	// OpenSSL: rax = 0 for OK, or non-zero for error
 	FRAME_END
-	ret
+	RET
 	SET_SIZE(rijndael_key_setup_dec_intel)
 
 
@@ -655,7 +655,7 @@
 	aesenclast	 %KEY, %STATE		// last round
 	movups	%STATE, (%OUTP)			// output
 
-	ret
+	RET
 	SET_SIZE(aes_encrypt_intel)
 
 
@@ -738,7 +738,7 @@
 	aesdeclast	%KEY, %STATE		// last round
 	movups	%STATE, (%OUTP)			// output
 
-	ret
+	RET
 	SET_SIZE(aes_decrypt_intel)
 
 #endif	/* lint || __lint */

diff --git a/zfs/module/icp/asm-x86_64/aes/aes_amd64.S b/zfs/module/icp/asm-x86_64/aes/aes_amd64.S
index 9db3a31..e631752 100644
--- a/zfs/module/icp/asm-x86_64/aes/aes_amd64.S
+++ b/zfs/module/icp/asm-x86_64/aes/aes_amd64.S

@@ -704,6 +704,7 @@
 
 
 ENTRY_NP(aes_encrypt_amd64)
+	ENDBR
 #ifdef	GLADMAN_INTERFACE
 	// Original interface
 	sub	$[4*8], %rsp	// gnu/linux/opensolaris binary interface
@@ -785,7 +786,7 @@
 	mov	2*8(%rsp), %rbp
 	mov	3*8(%rsp), %r12
 	add	$[4*8], %rsp
-	ret
+	RET
 
 	SET_SIZE(aes_encrypt_amd64)
 
@@ -809,6 +810,7 @@
 
 
 ENTRY_NP(aes_decrypt_amd64)
+	ENDBR
 #ifdef	GLADMAN_INTERFACE
 	// Original interface
 	sub	$[4*8], %rsp	// gnu/linux/opensolaris binary interface
@@ -896,7 +898,7 @@
 	mov	2*8(%rsp), %rbp
 	mov	3*8(%rsp), %r12
 	add	$[4*8], %rsp
-	ret
+	RET
 
 	SET_SIZE(aes_decrypt_amd64)
 #endif	/* lint || __lint */

diff --git a/zfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/zfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
index ed9f660..cf17b37 100644
--- a/zfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
+++ b/zfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S

@@ -47,6 +47,9 @@
 #if defined(__x86_64__) && defined(HAVE_AVX) && \
     defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
 
+#define _ASM
+#include <sys/asm_linkage.h>
+
 .extern gcm_avx_can_use_movbe
 
 .text
@@ -55,6 +58,8 @@
 .type	_aesni_ctr32_ghash_6x,@function
 .align	32
 _aesni_ctr32_ghash_6x:
+.cfi_startproc
+	ENDBR
 	vmovdqu	32(%r11),%xmm2
 	subq	$6,%rdx
 	vpxor	%xmm4,%xmm4,%xmm4
@@ -362,13 +367,16 @@
 	vpxor	16+8(%rsp),%xmm8,%xmm8
 	vpxor	%xmm4,%xmm8,%xmm8
 
-	.byte	0xf3,0xc3
+	RET
+.cfi_endproc
 .size	_aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
 #endif /* ifdef HAVE_MOVBE */
 
 .type	_aesni_ctr32_ghash_no_movbe_6x,@function
 .align	32
 _aesni_ctr32_ghash_no_movbe_6x:
+.cfi_startproc
+	ENDBR
 	vmovdqu	32(%r11),%xmm2
 	subq	$6,%rdx
 	vpxor	%xmm4,%xmm4,%xmm4
@@ -688,7 +696,8 @@
 	vpxor	16+8(%rsp),%xmm8,%xmm8
 	vpxor	%xmm4,%xmm8,%xmm8
 
-	.byte	0xf3,0xc3
+	RET
+.cfi_endproc
 .size	_aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x
 
 .globl	aesni_gcm_decrypt
@@ -696,6 +705,7 @@
 .align	32
 aesni_gcm_decrypt:
 .cfi_startproc
+	ENDBR
 	xorq	%r10,%r10
 	cmpq	$0x60,%rdx
 	jb	.Lgcm_dec_abort
@@ -714,6 +724,8 @@
 .cfi_offset	%r14,-48
 	pushq	%r15
 .cfi_offset	%r15,-56
+	pushq	%r9
+.cfi_offset	%r9,-64
 	vzeroupper
 
 	vmovdqu	(%r8),%xmm1
@@ -726,7 +738,8 @@
 	andq	$-128,%rsp
 	vmovdqu	(%r11),%xmm0
 	leaq	128(%rcx),%rcx
-	leaq	32+32(%r9),%r9
+	movq	32(%r9),%r9
+	leaq	32(%r9),%r9
 	movl	504-128(%rcx),%ebp	// ICP has a larger offset for rounds.
 	vpshufb	%xmm0,%xmm8,%xmm8
 
@@ -782,7 +795,9 @@
 	vmovups	%xmm14,-16(%rsi)
 
 	vpshufb	(%r11),%xmm8,%xmm8
-	vmovdqu	%xmm8,-64(%r9)
+	movq	-56(%rax),%r9
+.cfi_restore	%r9
+	vmovdqu	%xmm8,(%r9)
 
 	vzeroupper
 	movq	-48(%rax),%r15
@@ -801,12 +816,14 @@
 .cfi_def_cfa_register	%rsp
 .Lgcm_dec_abort:
 	movq	%r10,%rax
-	.byte	0xf3,0xc3
+	RET
 .cfi_endproc
 .size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
 .type	_aesni_ctr32_6x,@function
 .align	32
 _aesni_ctr32_6x:
+.cfi_startproc
+	ENDBR
 	vmovdqu	0-128(%rcx),%xmm4
 	vmovdqu	32(%r11),%xmm2
 	leaq	-2(%rbp),%r13	// ICP uses 10,12,14 not 9,11,13 for rounds.
@@ -870,7 +887,7 @@
 	vmovups	%xmm14,80(%rsi)
 	leaq	96(%rsi),%rsi
 
-	.byte	0xf3,0xc3
+	RET
 .align	32
 .Lhandle_ctr32_2:
 	vpshufb	%xmm0,%xmm1,%xmm6
@@ -893,6 +910,7 @@
 	vpshufb	%xmm0,%xmm1,%xmm1
 	vpxor	%xmm4,%xmm14,%xmm14
 	jmp	.Loop_ctr32
+.cfi_endproc
 .size	_aesni_ctr32_6x,.-_aesni_ctr32_6x
 
 .globl	aesni_gcm_encrypt
@@ -900,6 +918,7 @@
 .align	32
 aesni_gcm_encrypt:
 .cfi_startproc
+	ENDBR
 	xorq	%r10,%r10
 	cmpq	$288,%rdx
 	jb	.Lgcm_enc_abort
@@ -918,6 +937,8 @@
 .cfi_offset	%r14,-48
 	pushq	%r15
 .cfi_offset	%r15,-56
+	pushq	%r9
+.cfi_offset	%r9,-64
 	vzeroupper
 
 	vmovdqu	(%r8),%xmm1
@@ -960,7 +981,8 @@
 	call	_aesni_ctr32_6x
 
 	vmovdqu	(%r9),%xmm8
-	leaq	32+32(%r9),%r9
+	movq	32(%r9),%r9
+	leaq	32(%r9),%r9
 	subq	$12,%rdx
 	movq	$192,%r10
 	vpshufb	%xmm0,%xmm8,%xmm8
@@ -1151,7 +1173,9 @@
 	vpxor	%xmm7,%xmm2,%xmm2
 	vpxor	%xmm2,%xmm8,%xmm8
 	vpshufb	(%r11),%xmm8,%xmm8
-	vmovdqu	%xmm8,-64(%r9)
+	movq	-56(%rax),%r9
+.cfi_restore	%r9
+	vmovdqu	%xmm8,(%r9)
 
 	vzeroupper
 	movq	-48(%rax),%r15
@@ -1170,7 +1194,7 @@
 .cfi_def_cfa_register	%rsp
 .Lgcm_enc_abort:
 	movq	%r10,%rax
-	.byte	0xf3,0xc3
+	RET
 .cfi_endproc
 .size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
 
@@ -1185,7 +1209,7 @@
 .align	32
 clear_fpu_regs_avx:
 	vzeroall
-	ret
+	RET
 .size	clear_fpu_regs_avx,.-clear_fpu_regs_avx
 
 /*
@@ -1203,7 +1227,7 @@
 	movdqu  (%rsi), %xmm1
 	pxor    %xmm1, %xmm0
 	movdqu  %xmm0, (%rsi)
-	ret
+	RET
 .size	gcm_xor_avx,.-gcm_xor_avx
 
 /*
@@ -1220,9 +1244,10 @@
 	jz	1f
 	movl	$1, %eax
 1:
-	ret
+	RET
 .size	atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
 
+.pushsection .rodata
 .align	64
 .Lbswap_mask:
 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -1236,6 +1261,7 @@
 .byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 .byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 .align	64
+.popsection
 
 /* Mark the stack non-executable. */
 #if defined(__linux__) && defined(__ELF__)

diff --git a/zfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/zfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
index 59edc4c..df7f188 100644
--- a/zfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
+++ b/zfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S

@@ -244,7 +244,7 @@
 	//
 	// Return
 	//
-	ret
+	RET
 	SET_SIZE(gcm_mul_pclmulqdq)
 
 #endif	/* lint || __lint */

diff --git a/zfs/module/icp/asm-x86_64/modes/ghash-x86_64.S b/zfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
index 90cc36b..bf3724a 100644
--- a/zfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
+++ b/zfs/module/icp/asm-x86_64/modes/ghash-x86_64.S

@@ -97,6 +97,9 @@
 #if defined(__x86_64__) && defined(HAVE_AVX) && \
     defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
 
+#define _ASM
+#include <sys/asm_linkage.h>
+
 .text
 
 .globl	gcm_gmult_clmul
@@ -104,6 +107,7 @@
 .align	16
 gcm_gmult_clmul:
 .cfi_startproc
+	ENDBR
 .L_gmult_clmul:
 	movdqu	(%rdi),%xmm0
 	movdqa	.Lbswap_mask(%rip),%xmm5
@@ -149,7 +153,7 @@
 	pxor	%xmm1,%xmm0
 .byte	102,15,56,0,197
 	movdqu	%xmm0,(%rdi)
-	.byte	0xf3,0xc3
+	RET
 .cfi_endproc
 .size	gcm_gmult_clmul,.-gcm_gmult_clmul
 
@@ -158,6 +162,7 @@
 .align	32
 gcm_init_htab_avx:
 .cfi_startproc
+	ENDBR
 	vzeroupper
 
 	vmovdqu	(%rsi),%xmm2
@@ -262,7 +267,7 @@
 	vmovdqu	%xmm5,-16(%rdi)
 
 	vzeroupper
-	.byte	0xf3,0xc3
+	RET
 .cfi_endproc
 .size	gcm_init_htab_avx,.-gcm_init_htab_avx
 
@@ -271,6 +276,7 @@
 .align	32
 gcm_gmult_avx:
 .cfi_startproc
+	ENDBR
 	jmp	.L_gmult_clmul
 .cfi_endproc
 .size	gcm_gmult_avx,.-gcm_gmult_avx
@@ -279,6 +285,7 @@
 .align	32
 gcm_ghash_avx:
 .cfi_startproc
+	ENDBR
 	vzeroupper
 
 	vmovdqu	(%rdi),%xmm10
@@ -649,9 +656,11 @@
 	vpshufb	%xmm13,%xmm10,%xmm10
 	vmovdqu	%xmm10,(%rdi)
 	vzeroupper
-	.byte	0xf3,0xc3
+	RET
 .cfi_endproc
 .size	gcm_ghash_avx,.-gcm_ghash_avx
+
+.pushsection .rodata
 .align	64
 .Lbswap_mask:
 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -705,6 +714,7 @@
 
 .byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 .align	64
+.popsection
 
 /* Mark the stack non-executable. */
 #if defined(__linux__) && defined(__ELF__)

diff --git a/zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
deleted file mode 100644
index cb92378..0000000
--- a/zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
+++ /dev/null

@@ -1,1353 +0,0 @@
-/*
- * !/usr/bin/env perl
- *
- *  ====================================================================
- *  Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- *  project. The module is, however, dual licensed under OpenSSL and
- *  CRYPTOGAMS licenses depending on where you obtain it. For further
- *  details see http://www.openssl.org/~appro/cryptogams/.
- *  ====================================================================
- *
- *  sha1_block procedure for x86_64.
- *
- *  It was brought to my attention that on EM64T compiler-generated code
- *  was far behind 32-bit assembler implementation. This is unlike on
- *  Opteron where compiler-generated code was only 15% behind 32-bit
- *  assembler, which originally made it hard to motivate the effort.
- *  There was suggestion to mechanically translate 32-bit code, but I
- *  dismissed it, reasoning that x86_64 offers enough register bank
- *  capacity to fully utilize SHA-1 parallelism. Therefore this fresh
- *  implementation:-) However! While 64-bit code does performs better
- *  on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
- *  x86_64 does offer larger *addressable* bank, but out-of-order core
- *  reaches for even more registers through dynamic aliasing, and EM64T
- *  core must have managed to run-time optimize even 32-bit code just as
- *  good as 64-bit one. Performance improvement is summarized in the
- *  following table:
- *
- * 		gcc 3.4		32-bit asm	cycles/byte
- *  Opteron	+45%		+20%		6.8
- *  Xeon P4	+65%		+0%		9.9
- *  Core2		+60%		+10%		7.0
- *
- *
- *  OpenSolaris OS modifications
- *
- *  Sun elects to use this software under the BSD license.
- *
- *  This source originates from OpenSSL file sha1-x86_64.pl at
- *  ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- *  (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- *  1. Added perl "use strict" and declared variables.
- *
- *  2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- *  /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- *  3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- *  assemblers).
- *
- */
-
-/*
- * This file was generated by a perl script (sha1-x86_64.pl). The comments from
- * the original file have been pasted above.
- */
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sys/sha1.h>
-
-
-/* ARGSUSED */
-void
-sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks)
-{
-}
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-ENTRY_NP(sha1_block_data_order)
-	push	%rbx
-	push	%rbp
-	push	%r12
-	mov	%rsp,%rax
-	mov	%rdi,%r8	# reassigned argument
-	sub	$72,%rsp
-	mov	%rsi,%r9	# reassigned argument
-	and	$-64,%rsp
-	mov	%rdx,%r10	# reassigned argument
-	mov	%rax,64(%rsp)
-
-	mov	0(%r8),%edx
-	mov	4(%r8),%esi
-	mov	8(%r8),%edi
-	mov	12(%r8),%ebp
-	mov	16(%r8),%r11d
-.align	4
-.Lloop:
-	mov	0(%r9),%eax
-	bswap	%eax
-	mov	%eax,0(%rsp)
-	lea	0x5a827999(%eax,%r11d),%r12d
-	mov	%edi,%ebx
-	mov	4(%r9),%eax
-	mov	%edx,%r11d
-	xor	%ebp,%ebx
-	bswap	%eax
-	rol	$5,%r11d
-	and	%esi,%ebx
-	mov	%eax,4(%rsp)
-	add	%r11d,%r12d
-	xor	%ebp,%ebx
-	rol	$30,%esi
-	add	%ebx,%r12d
-	lea	0x5a827999(%eax,%ebp),%r11d
-	mov	%esi,%ebx
-	mov	8(%r9),%eax
-	mov	%r12d,%ebp
-	xor	%edi,%ebx
-	bswap	%eax
-	rol	$5,%ebp
-	and	%edx,%ebx
-	mov	%eax,8(%rsp)
-	add	%ebp,%r11d
-	xor	%edi,%ebx
-	rol	$30,%edx
-	add	%ebx,%r11d
-	lea	0x5a827999(%eax,%edi),%ebp
-	mov	%edx,%ebx
-	mov	12(%r9),%eax
-	mov	%r11d,%edi
-	xor	%esi,%ebx
-	bswap	%eax
-	rol	$5,%edi
-	and	%r12d,%ebx
-	mov	%eax,12(%rsp)
-	add	%edi,%ebp
-	xor	%esi,%ebx
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	lea	0x5a827999(%eax,%esi),%edi
-	mov	%r12d,%ebx
-	mov	16(%r9),%eax
-	mov	%ebp,%esi
-	xor	%edx,%ebx
-	bswap	%eax
-	rol	$5,%esi
-	and	%r11d,%ebx
-	mov	%eax,16(%rsp)
-	add	%esi,%edi
-	xor	%edx,%ebx
-	rol	$30,%r11d
-	add	%ebx,%edi
-	lea	0x5a827999(%eax,%edx),%esi
-	mov	%r11d,%ebx
-	mov	20(%r9),%eax
-	mov	%edi,%edx
-	xor	%r12d,%ebx
-	bswap	%eax
-	rol	$5,%edx
-	and	%ebp,%ebx
-	mov	%eax,20(%rsp)
-	add	%edx,%esi
-	xor	%r12d,%ebx
-	rol	$30,%ebp
-	add	%ebx,%esi
-	lea	0x5a827999(%eax,%r12d),%edx
-	mov	%ebp,%ebx
-	mov	24(%r9),%eax
-	mov	%esi,%r12d
-	xor	%r11d,%ebx
-	bswap	%eax
-	rol	$5,%r12d
-	and	%edi,%ebx
-	mov	%eax,24(%rsp)
-	add	%r12d,%edx
-	xor	%r11d,%ebx
-	rol	$30,%edi
-	add	%ebx,%edx
-	lea	0x5a827999(%eax,%r11d),%r12d
-	mov	%edi,%ebx
-	mov	28(%r9),%eax
-	mov	%edx,%r11d
-	xor	%ebp,%ebx
-	bswap	%eax
-	rol	$5,%r11d
-	and	%esi,%ebx
-	mov	%eax,28(%rsp)
-	add	%r11d,%r12d
-	xor	%ebp,%ebx
-	rol	$30,%esi
-	add	%ebx,%r12d
-	lea	0x5a827999(%eax,%ebp),%r11d
-	mov	%esi,%ebx
-	mov	32(%r9),%eax
-	mov	%r12d,%ebp
-	xor	%edi,%ebx
-	bswap	%eax
-	rol	$5,%ebp
-	and	%edx,%ebx
-	mov	%eax,32(%rsp)
-	add	%ebp,%r11d
-	xor	%edi,%ebx
-	rol	$30,%edx
-	add	%ebx,%r11d
-	lea	0x5a827999(%eax,%edi),%ebp
-	mov	%edx,%ebx
-	mov	36(%r9),%eax
-	mov	%r11d,%edi
-	xor	%esi,%ebx
-	bswap	%eax
-	rol	$5,%edi
-	and	%r12d,%ebx
-	mov	%eax,36(%rsp)
-	add	%edi,%ebp
-	xor	%esi,%ebx
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	lea	0x5a827999(%eax,%esi),%edi
-	mov	%r12d,%ebx
-	mov	40(%r9),%eax
-	mov	%ebp,%esi
-	xor	%edx,%ebx
-	bswap	%eax
-	rol	$5,%esi
-	and	%r11d,%ebx
-	mov	%eax,40(%rsp)
-	add	%esi,%edi
-	xor	%edx,%ebx
-	rol	$30,%r11d
-	add	%ebx,%edi
-	lea	0x5a827999(%eax,%edx),%esi
-	mov	%r11d,%ebx
-	mov	44(%r9),%eax
-	mov	%edi,%edx
-	xor	%r12d,%ebx
-	bswap	%eax
-	rol	$5,%edx
-	and	%ebp,%ebx
-	mov	%eax,44(%rsp)
-	add	%edx,%esi
-	xor	%r12d,%ebx
-	rol	$30,%ebp
-	add	%ebx,%esi
-	lea	0x5a827999(%eax,%r12d),%edx
-	mov	%ebp,%ebx
-	mov	48(%r9),%eax
-	mov	%esi,%r12d
-	xor	%r11d,%ebx
-	bswap	%eax
-	rol	$5,%r12d
-	and	%edi,%ebx
-	mov	%eax,48(%rsp)
-	add	%r12d,%edx
-	xor	%r11d,%ebx
-	rol	$30,%edi
-	add	%ebx,%edx
-	lea	0x5a827999(%eax,%r11d),%r12d
-	mov	%edi,%ebx
-	mov	52(%r9),%eax
-	mov	%edx,%r11d
-	xor	%ebp,%ebx
-	bswap	%eax
-	rol	$5,%r11d
-	and	%esi,%ebx
-	mov	%eax,52(%rsp)
-	add	%r11d,%r12d
-	xor	%ebp,%ebx
-	rol	$30,%esi
-	add	%ebx,%r12d
-	lea	0x5a827999(%eax,%ebp),%r11d
-	mov	%esi,%ebx
-	mov	56(%r9),%eax
-	mov	%r12d,%ebp
-	xor	%edi,%ebx
-	bswap	%eax
-	rol	$5,%ebp
-	and	%edx,%ebx
-	mov	%eax,56(%rsp)
-	add	%ebp,%r11d
-	xor	%edi,%ebx
-	rol	$30,%edx
-	add	%ebx,%r11d
-	lea	0x5a827999(%eax,%edi),%ebp
-	mov	%edx,%ebx
-	mov	60(%r9),%eax
-	mov	%r11d,%edi
-	xor	%esi,%ebx
-	bswap	%eax
-	rol	$5,%edi
-	and	%r12d,%ebx
-	mov	%eax,60(%rsp)
-	add	%edi,%ebp
-	xor	%esi,%ebx
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	lea	0x5a827999(%eax,%esi),%edi
-	mov	0(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	8(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%esi
-	xor	32(%rsp),%eax
-	and	%r11d,%ebx
-	add	%esi,%edi
-	xor	52(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,0(%rsp)
-	lea	0x5a827999(%eax,%edx),%esi
-	mov	4(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	12(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edx
-	xor	36(%rsp),%eax
-	and	%ebp,%ebx
-	add	%edx,%esi
-	xor	56(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	mov	%eax,4(%rsp)
-	lea	0x5a827999(%eax,%r12d),%edx
-	mov	8(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	16(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%r12d
-	xor	40(%rsp),%eax
-	and	%edi,%ebx
-	add	%r12d,%edx
-	xor	60(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	mov	%eax,8(%rsp)
-	lea	0x5a827999(%eax,%r11d),%r12d
-	mov	12(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	20(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%r11d
-	xor	44(%rsp),%eax
-	and	%esi,%ebx
-	add	%r11d,%r12d
-	xor	0(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,12(%rsp)
-	lea	0x5a827999(%eax,%ebp),%r11d
-	mov	16(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	24(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%ebp
-	xor	48(%rsp),%eax
-	and	%edx,%ebx
-	add	%ebp,%r11d
-	xor	4(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,16(%rsp)
-	lea	0x6ed9eba1(%eax,%edi),%ebp
-	mov	20(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	28(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	52(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	8(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,20(%rsp)
-	lea	0x6ed9eba1(%eax,%esi),%edi
-	mov	24(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	32(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	56(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	12(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,24(%rsp)
-	lea	0x6ed9eba1(%eax,%edx),%esi
-	mov	28(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	36(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%edx
-	xor	60(%rsp),%eax
-	xor	%r12d,%ebx
-	add	%edx,%esi
-	xor	16(%rsp),%eax
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	mov	%eax,28(%rsp)
-	lea	0x6ed9eba1(%eax,%r12d),%edx
-	mov	32(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	40(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%r12d
-	xor	0(%rsp),%eax
-	xor	%r11d,%ebx
-	add	%r12d,%edx
-	xor	20(%rsp),%eax
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	mov	%eax,32(%rsp)
-	lea	0x6ed9eba1(%eax,%r11d),%r12d
-	mov	36(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	44(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	4(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	24(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,36(%rsp)
-	lea	0x6ed9eba1(%eax,%ebp),%r11d
-	mov	40(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	48(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	8(%rsp),%eax
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	xor	28(%rsp),%eax
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,40(%rsp)
-	lea	0x6ed9eba1(%eax,%edi),%ebp
-	mov	44(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	52(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	12(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	32(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,44(%rsp)
-	lea	0x6ed9eba1(%eax,%esi),%edi
-	mov	48(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	56(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	16(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	36(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,48(%rsp)
-	lea	0x6ed9eba1(%eax,%edx),%esi
-	mov	52(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	60(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%edx
-	xor	20(%rsp),%eax
-	xor	%r12d,%ebx
-	add	%edx,%esi
-	xor	40(%rsp),%eax
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	mov	%eax,52(%rsp)
-	lea	0x6ed9eba1(%eax,%r12d),%edx
-	mov	56(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	0(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%r12d
-	xor	24(%rsp),%eax
-	xor	%r11d,%ebx
-	add	%r12d,%edx
-	xor	44(%rsp),%eax
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	mov	%eax,56(%rsp)
-	lea	0x6ed9eba1(%eax,%r11d),%r12d
-	mov	60(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	4(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	28(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	48(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,60(%rsp)
-	lea	0x6ed9eba1(%eax,%ebp),%r11d
-	mov	0(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	8(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	32(%rsp),%eax
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	xor	52(%rsp),%eax
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,0(%rsp)
-	lea	0x6ed9eba1(%eax,%edi),%ebp
-	mov	4(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	12(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	36(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	56(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,4(%rsp)
-	lea	0x6ed9eba1(%eax,%esi),%edi
-	mov	8(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	16(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	40(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	60(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,8(%rsp)
-	lea	0x6ed9eba1(%eax,%edx),%esi
-	mov	12(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	20(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%edx
-	xor	44(%rsp),%eax
-	xor	%r12d,%ebx
-	add	%edx,%esi
-	xor	0(%rsp),%eax
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	mov	%eax,12(%rsp)
-	lea	0x6ed9eba1(%eax,%r12d),%edx
-	mov	16(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	24(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%r12d
-	xor	48(%rsp),%eax
-	xor	%r11d,%ebx
-	add	%r12d,%edx
-	xor	4(%rsp),%eax
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	mov	%eax,16(%rsp)
-	lea	0x6ed9eba1(%eax,%r11d),%r12d
-	mov	20(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	28(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	52(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	8(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,20(%rsp)
-	lea	0x6ed9eba1(%eax,%ebp),%r11d
-	mov	24(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	32(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	56(%rsp),%eax
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	xor	12(%rsp),%eax
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,24(%rsp)
-	lea	0x6ed9eba1(%eax,%edi),%ebp
-	mov	28(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	36(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	60(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	16(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,28(%rsp)
-	lea	0x6ed9eba1(%eax,%esi),%edi
-	mov	32(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	40(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	0(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	20(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,32(%rsp)
-	lea	-0x70e44324(%eax,%edx),%esi
-	mov	36(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%ebp,%ecx
-	xor	44(%rsp),%eax
-	mov	%edi,%edx
-	and	%r11d,%ebx
-	xor	4(%rsp),%eax
-	or	%r11d,%ecx
-	rol	$5,%edx
-	xor	24(%rsp),%eax
-	and	%r12d,%ecx
-	add	%edx,%esi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%ebp
-	mov	%eax,36(%rsp)
-	add	%ebx,%esi
-	lea	-0x70e44324(%eax,%r12d),%edx
-	mov	40(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edi,%ecx
-	xor	48(%rsp),%eax
-	mov	%esi,%r12d
-	and	%ebp,%ebx
-	xor	8(%rsp),%eax
-	or	%ebp,%ecx
-	rol	$5,%r12d
-	xor	28(%rsp),%eax
-	and	%r11d,%ecx
-	add	%r12d,%edx
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edi
-	mov	%eax,40(%rsp)
-	add	%ebx,%edx
-	lea	-0x70e44324(%eax,%r11d),%r12d
-	mov	44(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%esi,%ecx
-	xor	52(%rsp),%eax
-	mov	%edx,%r11d
-	and	%edi,%ebx
-	xor	12(%rsp),%eax
-	or	%edi,%ecx
-	rol	$5,%r11d
-	xor	32(%rsp),%eax
-	and	%ebp,%ecx
-	add	%r11d,%r12d
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%esi
-	mov	%eax,44(%rsp)
-	add	%ebx,%r12d
-	lea	-0x70e44324(%eax,%ebp),%r11d
-	mov	48(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%edx,%ecx
-	xor	56(%rsp),%eax
-	mov	%r12d,%ebp
-	and	%esi,%ebx
-	xor	16(%rsp),%eax
-	or	%esi,%ecx
-	rol	$5,%ebp
-	xor	36(%rsp),%eax
-	and	%edi,%ecx
-	add	%ebp,%r11d
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edx
-	mov	%eax,48(%rsp)
-	add	%ebx,%r11d
-	lea	-0x70e44324(%eax,%edi),%ebp
-	mov	52(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%r12d,%ecx
-	xor	60(%rsp),%eax
-	mov	%r11d,%edi
-	and	%edx,%ebx
-	xor	20(%rsp),%eax
-	or	%edx,%ecx
-	rol	$5,%edi
-	xor	40(%rsp),%eax
-	and	%esi,%ecx
-	add	%edi,%ebp
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%r12d
-	mov	%eax,52(%rsp)
-	add	%ebx,%ebp
-	lea	-0x70e44324(%eax,%esi),%edi
-	mov	56(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%r11d,%ecx
-	xor	0(%rsp),%eax
-	mov	%ebp,%esi
-	and	%r12d,%ebx
-	xor	24(%rsp),%eax
-	or	%r12d,%ecx
-	rol	$5,%esi
-	xor	44(%rsp),%eax
-	and	%edx,%ecx
-	add	%esi,%edi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%r11d
-	mov	%eax,56(%rsp)
-	add	%ebx,%edi
-	lea	-0x70e44324(%eax,%edx),%esi
-	mov	60(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%ebp,%ecx
-	xor	4(%rsp),%eax
-	mov	%edi,%edx
-	and	%r11d,%ebx
-	xor	28(%rsp),%eax
-	or	%r11d,%ecx
-	rol	$5,%edx
-	xor	48(%rsp),%eax
-	and	%r12d,%ecx
-	add	%edx,%esi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%ebp
-	mov	%eax,60(%rsp)
-	add	%ebx,%esi
-	lea	-0x70e44324(%eax,%r12d),%edx
-	mov	0(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edi,%ecx
-	xor	8(%rsp),%eax
-	mov	%esi,%r12d
-	and	%ebp,%ebx
-	xor	32(%rsp),%eax
-	or	%ebp,%ecx
-	rol	$5,%r12d
-	xor	52(%rsp),%eax
-	and	%r11d,%ecx
-	add	%r12d,%edx
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edi
-	mov	%eax,0(%rsp)
-	add	%ebx,%edx
-	lea	-0x70e44324(%eax,%r11d),%r12d
-	mov	4(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%esi,%ecx
-	xor	12(%rsp),%eax
-	mov	%edx,%r11d
-	and	%edi,%ebx
-	xor	36(%rsp),%eax
-	or	%edi,%ecx
-	rol	$5,%r11d
-	xor	56(%rsp),%eax
-	and	%ebp,%ecx
-	add	%r11d,%r12d
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%esi
-	mov	%eax,4(%rsp)
-	add	%ebx,%r12d
-	lea	-0x70e44324(%eax,%ebp),%r11d
-	mov	8(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%edx,%ecx
-	xor	16(%rsp),%eax
-	mov	%r12d,%ebp
-	and	%esi,%ebx
-	xor	40(%rsp),%eax
-	or	%esi,%ecx
-	rol	$5,%ebp
-	xor	60(%rsp),%eax
-	and	%edi,%ecx
-	add	%ebp,%r11d
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edx
-	mov	%eax,8(%rsp)
-	add	%ebx,%r11d
-	lea	-0x70e44324(%eax,%edi),%ebp
-	mov	12(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%r12d,%ecx
-	xor	20(%rsp),%eax
-	mov	%r11d,%edi
-	and	%edx,%ebx
-	xor	44(%rsp),%eax
-	or	%edx,%ecx
-	rol	$5,%edi
-	xor	0(%rsp),%eax
-	and	%esi,%ecx
-	add	%edi,%ebp
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%r12d
-	mov	%eax,12(%rsp)
-	add	%ebx,%ebp
-	lea	-0x70e44324(%eax,%esi),%edi
-	mov	16(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%r11d,%ecx
-	xor	24(%rsp),%eax
-	mov	%ebp,%esi
-	and	%r12d,%ebx
-	xor	48(%rsp),%eax
-	or	%r12d,%ecx
-	rol	$5,%esi
-	xor	4(%rsp),%eax
-	and	%edx,%ecx
-	add	%esi,%edi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%r11d
-	mov	%eax,16(%rsp)
-	add	%ebx,%edi
-	lea	-0x70e44324(%eax,%edx),%esi
-	mov	20(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%ebp,%ecx
-	xor	28(%rsp),%eax
-	mov	%edi,%edx
-	and	%r11d,%ebx
-	xor	52(%rsp),%eax
-	or	%r11d,%ecx
-	rol	$5,%edx
-	xor	8(%rsp),%eax
-	and	%r12d,%ecx
-	add	%edx,%esi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%ebp
-	mov	%eax,20(%rsp)
-	add	%ebx,%esi
-	lea	-0x70e44324(%eax,%r12d),%edx
-	mov	24(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edi,%ecx
-	xor	32(%rsp),%eax
-	mov	%esi,%r12d
-	and	%ebp,%ebx
-	xor	56(%rsp),%eax
-	or	%ebp,%ecx
-	rol	$5,%r12d
-	xor	12(%rsp),%eax
-	and	%r11d,%ecx
-	add	%r12d,%edx
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edi
-	mov	%eax,24(%rsp)
-	add	%ebx,%edx
-	lea	-0x70e44324(%eax,%r11d),%r12d
-	mov	28(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%esi,%ecx
-	xor	36(%rsp),%eax
-	mov	%edx,%r11d
-	and	%edi,%ebx
-	xor	60(%rsp),%eax
-	or	%edi,%ecx
-	rol	$5,%r11d
-	xor	16(%rsp),%eax
-	and	%ebp,%ecx
-	add	%r11d,%r12d
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%esi
-	mov	%eax,28(%rsp)
-	add	%ebx,%r12d
-	lea	-0x70e44324(%eax,%ebp),%r11d
-	mov	32(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%edx,%ecx
-	xor	40(%rsp),%eax
-	mov	%r12d,%ebp
-	and	%esi,%ebx
-	xor	0(%rsp),%eax
-	or	%esi,%ecx
-	rol	$5,%ebp
-	xor	20(%rsp),%eax
-	and	%edi,%ecx
-	add	%ebp,%r11d
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edx
-	mov	%eax,32(%rsp)
-	add	%ebx,%r11d
-	lea	-0x70e44324(%eax,%edi),%ebp
-	mov	36(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%r12d,%ecx
-	xor	44(%rsp),%eax
-	mov	%r11d,%edi
-	and	%edx,%ebx
-	xor	4(%rsp),%eax
-	or	%edx,%ecx
-	rol	$5,%edi
-	xor	24(%rsp),%eax
-	and	%esi,%ecx
-	add	%edi,%ebp
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%r12d
-	mov	%eax,36(%rsp)
-	add	%ebx,%ebp
-	lea	-0x70e44324(%eax,%esi),%edi
-	mov	40(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%r11d,%ecx
-	xor	48(%rsp),%eax
-	mov	%ebp,%esi
-	and	%r12d,%ebx
-	xor	8(%rsp),%eax
-	or	%r12d,%ecx
-	rol	$5,%esi
-	xor	28(%rsp),%eax
-	and	%edx,%ecx
-	add	%esi,%edi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%r11d
-	mov	%eax,40(%rsp)
-	add	%ebx,%edi
-	lea	-0x70e44324(%eax,%edx),%esi
-	mov	44(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%ebp,%ecx
-	xor	52(%rsp),%eax
-	mov	%edi,%edx
-	and	%r11d,%ebx
-	xor	12(%rsp),%eax
-	or	%r11d,%ecx
-	rol	$5,%edx
-	xor	32(%rsp),%eax
-	and	%r12d,%ecx
-	add	%edx,%esi
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%ebp
-	mov	%eax,44(%rsp)
-	add	%ebx,%esi
-	lea	-0x70e44324(%eax,%r12d),%edx
-	mov	48(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edi,%ecx
-	xor	56(%rsp),%eax
-	mov	%esi,%r12d
-	and	%ebp,%ebx
-	xor	16(%rsp),%eax
-	or	%ebp,%ecx
-	rol	$5,%r12d
-	xor	36(%rsp),%eax
-	and	%r11d,%ecx
-	add	%r12d,%edx
-	rol	$1,%eax
-	or	%ecx,%ebx
-	rol	$30,%edi
-	mov	%eax,48(%rsp)
-	add	%ebx,%edx
-	lea	-0x359d3e2a(%eax,%r11d),%r12d
-	mov	52(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	60(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	20(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	40(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,52(%rsp)
-	lea	-0x359d3e2a(%eax,%ebp),%r11d
-	mov	56(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	0(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	24(%rsp),%eax
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	xor	44(%rsp),%eax
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,56(%rsp)
-	lea	-0x359d3e2a(%eax,%edi),%ebp
-	mov	60(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	4(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	28(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	48(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,60(%rsp)
-	lea	-0x359d3e2a(%eax,%esi),%edi
-	mov	0(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	8(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	32(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	52(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,0(%rsp)
-	lea	-0x359d3e2a(%eax,%edx),%esi
-	mov	4(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	12(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%edx
-	xor	36(%rsp),%eax
-	xor	%r12d,%ebx
-	add	%edx,%esi
-	xor	56(%rsp),%eax
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	mov	%eax,4(%rsp)
-	lea	-0x359d3e2a(%eax,%r12d),%edx
-	mov	8(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	16(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%r12d
-	xor	40(%rsp),%eax
-	xor	%r11d,%ebx
-	add	%r12d,%edx
-	xor	60(%rsp),%eax
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	mov	%eax,8(%rsp)
-	lea	-0x359d3e2a(%eax,%r11d),%r12d
-	mov	12(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	20(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	44(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	0(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,12(%rsp)
-	lea	-0x359d3e2a(%eax,%ebp),%r11d
-	mov	16(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	24(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	48(%rsp),%eax
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	xor	4(%rsp),%eax
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,16(%rsp)
-	lea	-0x359d3e2a(%eax,%edi),%ebp
-	mov	20(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	28(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	52(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	8(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,20(%rsp)
-	lea	-0x359d3e2a(%eax,%esi),%edi
-	mov	24(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	32(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	56(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	12(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,24(%rsp)
-	lea	-0x359d3e2a(%eax,%edx),%esi
-	mov	28(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	36(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%edx
-	xor	60(%rsp),%eax
-	xor	%r12d,%ebx
-	add	%edx,%esi
-	xor	16(%rsp),%eax
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	mov	%eax,28(%rsp)
-	lea	-0x359d3e2a(%eax,%r12d),%edx
-	mov	32(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	40(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%r12d
-	xor	0(%rsp),%eax
-	xor	%r11d,%ebx
-	add	%r12d,%edx
-	xor	20(%rsp),%eax
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	mov	%eax,32(%rsp)
-	lea	-0x359d3e2a(%eax,%r11d),%r12d
-	mov	36(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	44(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	4(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	24(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	mov	%eax,36(%rsp)
-	lea	-0x359d3e2a(%eax,%ebp),%r11d
-	mov	40(%rsp),%eax
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	48(%rsp),%eax
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	8(%rsp),%eax
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	xor	28(%rsp),%eax
-	rol	$30,%edx
-	add	%ebx,%r11d
-	rol	$1,%eax
-	mov	%eax,40(%rsp)
-	lea	-0x359d3e2a(%eax,%edi),%ebp
-	mov	44(%rsp),%eax
-	mov	%edx,%ebx
-	mov	%r11d,%edi
-	xor	52(%rsp),%eax
-	xor	%r12d,%ebx
-	rol	$5,%edi
-	xor	12(%rsp),%eax
-	xor	%esi,%ebx
-	add	%edi,%ebp
-	xor	32(%rsp),%eax
-	rol	$30,%r12d
-	add	%ebx,%ebp
-	rol	$1,%eax
-	mov	%eax,44(%rsp)
-	lea	-0x359d3e2a(%eax,%esi),%edi
-	mov	48(%rsp),%eax
-	mov	%r12d,%ebx
-	mov	%ebp,%esi
-	xor	56(%rsp),%eax
-	xor	%r11d,%ebx
-	rol	$5,%esi
-	xor	16(%rsp),%eax
-	xor	%edx,%ebx
-	add	%esi,%edi
-	xor	36(%rsp),%eax
-	rol	$30,%r11d
-	add	%ebx,%edi
-	rol	$1,%eax
-	mov	%eax,48(%rsp)
-	lea	-0x359d3e2a(%eax,%edx),%esi
-	mov	52(%rsp),%eax
-	mov	%r11d,%ebx
-	mov	%edi,%edx
-	xor	60(%rsp),%eax
-	xor	%ebp,%ebx
-	rol	$5,%edx
-	xor	20(%rsp),%eax
-	xor	%r12d,%ebx
-	add	%edx,%esi
-	xor	40(%rsp),%eax
-	rol	$30,%ebp
-	add	%ebx,%esi
-	rol	$1,%eax
-	lea	-0x359d3e2a(%eax,%r12d),%edx
-	mov	56(%rsp),%eax
-	mov	%ebp,%ebx
-	mov	%esi,%r12d
-	xor	0(%rsp),%eax
-	xor	%edi,%ebx
-	rol	$5,%r12d
-	xor	24(%rsp),%eax
-	xor	%r11d,%ebx
-	add	%r12d,%edx
-	xor	44(%rsp),%eax
-	rol	$30,%edi
-	add	%ebx,%edx
-	rol	$1,%eax
-	lea	-0x359d3e2a(%eax,%r11d),%r12d
-	mov	60(%rsp),%eax
-	mov	%edi,%ebx
-	mov	%edx,%r11d
-	xor	4(%rsp),%eax
-	xor	%esi,%ebx
-	rol	$5,%r11d
-	xor	28(%rsp),%eax
-	xor	%ebp,%ebx
-	add	%r11d,%r12d
-	xor	48(%rsp),%eax
-	rol	$30,%esi
-	add	%ebx,%r12d
-	rol	$1,%eax
-	lea	-0x359d3e2a(%eax,%ebp),%r11d
-	mov	%esi,%ebx
-	mov	%r12d,%ebp
-	xor	%edx,%ebx
-	rol	$5,%ebp
-	xor	%edi,%ebx
-	add	%ebp,%r11d
-	rol	$30,%edx
-	add	%ebx,%r11d
-	// Update and save state information in SHA-1 context
-	add	0(%r8),%r11d
-	add	4(%r8),%r12d
-	add	8(%r8),%edx
-	add	12(%r8),%esi
-	add	16(%r8),%edi
-	mov	%r11d,0(%r8)
-	mov	%r12d,4(%r8)
-	mov	%edx,8(%r8)
-	mov	%esi,12(%r8)
-	mov	%edi,16(%r8)
-
-	xchg	%r11d,%edx	# mov	%r11d,%edx
-	xchg	%r12d,%esi	# mov	%r12d,%esi
-	xchg	%r11d,%edi	# mov	%edx,%edi
-	xchg	%r12d,%ebp	# mov	%esi,%ebp
-			# mov	%edi,%r11d
-	lea	64(%r9),%r9
-	sub	$1,%r10
-	jnz	.Lloop
-	mov	64(%rsp),%rsp
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	ret
-SET_SIZE(sha1_block_data_order)
-
-.data
-.asciz	"SHA1 block transform for x86_64, CRYPTOGAMS by <appro@openssl.org>"
-
-#endif /* lint || __lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif

diff --git a/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S
index 766b753..fc0c68b 100644
--- a/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ b/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S

@@ -83,12 +83,22 @@
 #include <sys/asm_linkage.h>
 
 ENTRY_NP(SHA256TransformBlocks)
+.cfi_startproc
+	ENDBR
+	movq	%rsp, %rax
+.cfi_def_cfa_register %rax
 	push	%rbx
+.cfi_offset	%rbx,-16
 	push	%rbp
+.cfi_offset	%rbp,-24
 	push	%r12
+.cfi_offset	%r12,-32
 	push	%r13
+.cfi_offset	%r13,-40
 	push	%r14
+.cfi_offset	%r14,-48
 	push	%r15
+.cfi_offset	%r15,-56
 	mov	%rsp,%rbp		# copy %rsp
 	shl	$4,%rdx		# num*16
 	sub	$16*4+4*8,%rsp
@@ -99,6 +109,9 @@
 	mov	%rsi,16*4+1*8(%rsp)		# save inp, 2nd arg
 	mov	%rdx,16*4+2*8(%rsp)		# save end pointer, "3rd" arg
 	mov	%rbp,16*4+3*8(%rsp)		# save copy of %rsp
+# echo ".cfi_cfa_expression %rsp+88,deref,+56" |
+#	openssl/crypto/perlasm/x86_64-xlate.pl
+.cfi_escape	0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
 
 	#.picmeup %rbp
 	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
@@ -2026,14 +2039,28 @@
 	jb	.Lloop
 
 	mov	16*4+3*8(%rsp),%rsp
+.cfi_def_cfa	%rsp,56
 	pop	%r15
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r15
 	pop	%r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r14
 	pop	%r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r13
 	pop	%r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r12
 	pop	%rbp
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%rbp
 	pop	%rbx
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%rbx
 
-	ret
+	RET
+.cfi_endproc
 SET_SIZE(SHA256TransformBlocks)
 
 .data

diff --git a/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S
index 6e37618..ab9474a 100644
--- a/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ b/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S

@@ -84,12 +84,22 @@
 #include <sys/asm_linkage.h>
 
 ENTRY_NP(SHA512TransformBlocks)
+.cfi_startproc
+	ENDBR
+	movq	%rsp, %rax
+.cfi_def_cfa_register %rax
 	push	%rbx
+.cfi_offset	%rbx,-16
 	push	%rbp
+.cfi_offset	%rbp,-24
 	push	%r12
+.cfi_offset	%r12,-32
 	push	%r13
+.cfi_offset	%r13,-40
 	push	%r14
+.cfi_offset	%r14,-48
 	push	%r15
+.cfi_offset	%r15,-56
 	mov	%rsp,%rbp		# copy %rsp
 	shl	$4,%rdx		# num*16
 	sub	$16*8+4*8,%rsp
@@ -100,6 +110,9 @@
 	mov	%rsi,16*8+1*8(%rsp)		# save inp, 2nd arg
 	mov	%rdx,16*8+2*8(%rsp)		# save end pointer, "3rd" arg
 	mov	%rbp,16*8+3*8(%rsp)		# save copy of %rsp
+# echo ".cfi_cfa_expression %rsp+152,deref,+56" |
+#	openssl/crypto/perlasm/x86_64-xlate.pl
+.cfi_escape	0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
 
 	#.picmeup %rbp
 	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
@@ -2027,14 +2040,28 @@
 	jb	.Lloop
 
 	mov	16*8+3*8(%rsp),%rsp
+.cfi_def_cfa	%rsp,56
 	pop	%r15
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r15
 	pop	%r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r14
 	pop	%r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r13
 	pop	%r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%r12
 	pop	%rbp
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%rbp
 	pop	%rbx
+.cfi_adjust_cfa_offset -8
+.cfi_restore	%rbx
 
-	ret
+	RET
+.cfi_endproc
 SET_SIZE(SHA512TransformBlocks)
 
 .data

diff --git a/zfs/module/icp/core/kcf_mech_tabs.c b/zfs/module/icp/core/kcf_mech_tabs.c
index 2642b31..00a7621 100644
--- a/zfs/module/icp/core/kcf_mech_tabs.c
+++ b/zfs/module/icp/core/kcf_mech_tabs.c

@@ -720,8 +720,8 @@
 	}
 
 	/* free entry  */
-	KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
 	KCF_PROV_IREFRELE(prov_mech->pm_prov_desc);
+	KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
 	kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
 }
 

diff --git a/zfs/module/icp/core/kcf_prov_lib.c b/zfs/module/icp/core/kcf_prov_lib.c
index 3cae872..1b115d9 100644
--- a/zfs/module/icp/core/kcf_prov_lib.c
+++ b/zfs/module/icp/core/kcf_prov_lib.c

@@ -40,7 +40,7 @@
 crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
     void *digest_ctx, void (*update)(void))
 {
-	uio_t *uiop = data->cd_uio;
+	zfs_uio_t *uiop = data->cd_uio;
 	off_t offset = data->cd_offset;
 	size_t length = len;
 	uint_t vec_idx;
@@ -48,7 +48,7 @@
 	uchar_t *datap;
 
 	ASSERT(data->cd_format == CRYPTO_DATA_UIO);
-	if (uiop->uio_segflg != UIO_SYSSPACE) {
+	if (zfs_uio_segflg(uiop) != UIO_SYSSPACE) {
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
@@ -56,12 +56,9 @@
 	 * Jump to the first iovec containing data to be
 	 * processed.
 	 */
-	for (vec_idx = 0; vec_idx < uiop->uio_iovcnt &&
-	    offset >= uiop->uio_iov[vec_idx].iov_len;
-	    offset -= uiop->uio_iov[vec_idx++].iov_len)
-		;
+	offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
 
-	if (vec_idx == uiop->uio_iovcnt && length > 0) {
+	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The caller specified an offset that is larger than
 		 * the total size of the buffers it provided.
@@ -69,12 +66,11 @@
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
-	while (vec_idx < uiop->uio_iovcnt && length > 0) {
-		cur_len = MIN(uiop->uio_iov[vec_idx].iov_len -
+	while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) {
+		cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) -
 		    offset, length);
 
-		datap = (uchar_t *)(uiop->uio_iov[vec_idx].iov_base +
-		    offset);
+		datap = (uchar_t *)(zfs_uio_iovbase(uiop, vec_idx) + offset);
 		switch (cmd) {
 		case COPY_FROM_DATA:
 			bcopy(datap, buf, cur_len);
@@ -101,7 +97,7 @@
 		offset = 0;
 	}
 
-	if (vec_idx == uiop->uio_iovcnt && length > 0) {
+	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed.
@@ -149,6 +145,7 @@
 	common_ctx_t *common_ctx = ctx;
 	int rv;
 
+	ASSERT(input != output);
 	if (input->cd_miscdata != NULL) {
 		copy_block((uint8_t *)input->cd_miscdata,
 		    &common_ctx->cc_iv[0]);
@@ -158,7 +155,7 @@
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	rv = (cipher)(ctx, input->cd_raw.iov_base + input->cd_offset,
-	    input->cd_length, (input == output) ? NULL : output);
+	    input->cd_length, output);
 
 	return (rv);
 }
@@ -169,18 +166,19 @@
     void (*copy_block)(uint8_t *, uint64_t *))
 {
 	common_ctx_t *common_ctx = ctx;
-	uio_t *uiop = input->cd_uio;
+	zfs_uio_t *uiop = input->cd_uio;
 	off_t offset = input->cd_offset;
 	size_t length = input->cd_length;
 	uint_t vec_idx;
 	size_t cur_len;
 
+	ASSERT(input != output);
 	if (input->cd_miscdata != NULL) {
 		copy_block((uint8_t *)input->cd_miscdata,
 		    &common_ctx->cc_iv[0]);
 	}
 
-	if (input->cd_uio->uio_segflg != UIO_SYSSPACE) {
+	if (zfs_uio_segflg(input->cd_uio) != UIO_SYSSPACE) {
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
@@ -188,11 +186,8 @@
 	 * Jump to the first iovec containing data to be
 	 * processed.
 	 */
-	for (vec_idx = 0; vec_idx < uiop->uio_iovcnt &&
-	    offset >= uiop->uio_iov[vec_idx].iov_len;
-	    offset -= uiop->uio_iov[vec_idx++].iov_len)
-		;
-	if (vec_idx == uiop->uio_iovcnt && length > 0) {
+	offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
+	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
@@ -203,19 +198,22 @@
 	/*
 	 * Now process the iovecs.
 	 */
-	while (vec_idx < uiop->uio_iovcnt && length > 0) {
-		cur_len = MIN(uiop->uio_iov[vec_idx].iov_len -
+	while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) {
+		cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) -
 		    offset, length);
 
-		(cipher)(ctx, uiop->uio_iov[vec_idx].iov_base + offset,
-		    cur_len, (input == output) ? NULL : output);
+		int rv = (cipher)(ctx, zfs_uio_iovbase(uiop, vec_idx) + offset,
+		    cur_len, output);
 
+		if (rv != CRYPTO_SUCCESS) {
+			return (rv);
+		}
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
-	if (vec_idx == uiop->uio_iovcnt && length > 0) {
+	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.

diff --git a/zfs/module/icp/core/kcf_prov_tabs.c b/zfs/module/icp/core/kcf_prov_tabs.c
index 94e6937..3b0bf12 100644
--- a/zfs/module/icp/core/kcf_prov_tabs.c
+++ b/zfs/module/icp/core/kcf_prov_tabs.c

@@ -171,8 +171,8 @@
 	 * at that time.
 	 */
 
-	KCF_PROV_REFRELE(prov_desc);
 	KCF_PROV_IREFRELE(prov_desc);
+	KCF_PROV_REFRELE(prov_desc);
 
 	return (CRYPTO_SUCCESS);
 }
@@ -377,7 +377,7 @@
 			mutex_exit(&desc->pd_lock);
 			break;
 		}
-		/* FALLTHRU */
+		fallthrough;
 
 	case CRYPTO_HW_PROVIDER:
 	case CRYPTO_LOGICAL_PROVIDER:

diff --git a/zfs/module/icp/core/kcf_sched.c b/zfs/module/icp/core/kcf_sched.c
index c8c2bbd..fa44a4d 100644
--- a/zfs/module/icp/core/kcf_sched.c
+++ b/zfs/module/icp/core/kcf_sched.c

@@ -872,7 +872,7 @@
  * Utility routine to remove a request from the chain of requests
  * hanging off a context.
  */
-void
+static void
 kcf_removereq_in_ctxchain(kcf_context_t *ictx, kcf_areq_node_t *areq)
 {
 	kcf_areq_node_t *cur, *prev;
@@ -909,7 +909,7 @@
  *
  * The caller must hold the queue lock and request lock (an_lock).
  */
-void
+static void
 kcf_remove_node(kcf_areq_node_t *node)
 {
 	kcf_areq_node_t *nextp = node->an_next;
@@ -1274,7 +1274,7 @@
  * Allocate the thread pool and initialize all the fields.
  */
 static void
-kcfpool_alloc()
+kcfpool_alloc(void)
 {
 	kcfpool = kmem_alloc(sizeof (kcf_pool_t), KM_SLEEP);
 
@@ -1308,9 +1308,7 @@
 	kcf_areq_node_t *headp;
 	kcf_reqid_table_t *rt;
 
-	kpreempt_disable();
-	rt = kcf_reqid_table[CPU_SEQID & REQID_TABLE_MASK];
-	kpreempt_enable();
+	rt = kcf_reqid_table[CPU_SEQID_UNSTABLE & REQID_TABLE_MASK];
 
 	mutex_enter(&rt->rt_lock);
 

diff --git a/zfs/module/icp/illumos-crypto.c b/zfs/module/icp/illumos-crypto.c
index 3c5ef43..cc990f1 100644
--- a/zfs/module/icp/illumos-crypto.c
+++ b/zfs/module/icp/illumos-crypto.c

@@ -111,7 +111,6 @@
 {
 	skein_mod_fini();
 	sha2_mod_fini();
-	sha1_mod_fini();
 	edonr_mod_fini();
 	aes_mod_fini();
 	kcf_sched_destroy();
@@ -142,7 +141,6 @@
 	/* initialize algorithms */
 	aes_mod_init();
 	edonr_mod_init();
-	sha1_mod_init();
 	sha2_mod_init();
 	skein_mod_init();
 

diff --git a/zfs/module/icp/include/aes/aes_impl.h b/zfs/module/icp/include/aes/aes_impl.h
index 0484462..41dccaa 100644
--- a/zfs/module/icp/include/aes/aes_impl.h
+++ b/zfs/module/icp/include/aes/aes_impl.h

@@ -195,6 +195,16 @@
 extern const aes_impl_ops_t aes_generic_impl;
 #if defined(__x86_64)
 extern const aes_impl_ops_t aes_x86_64_impl;
+
+/* These functions are used to execute amd64 instructions for AMD or Intel: */
+extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
+	const uint32_t cipherKey[], int keyBits);
+extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
+	const uint32_t cipherKey[], int keyBits);
+extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
+	const uint32_t pt[4], uint32_t ct[4]);
+extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
+	const uint32_t ct[4], uint32_t pt[4]);
 #endif
 #if defined(__x86_64) && defined(HAVE_AES)
 extern const aes_impl_ops_t aes_aesni_impl;

diff --git a/zfs/module/icp/include/modes/modes.h b/zfs/module/icp/include/modes/modes.h
index 57a211c..ab71197 100644
--- a/zfs/module/icp/include/modes/modes.h
+++ b/zfs/module/icp/include/modes/modes.h

@@ -219,14 +219,14 @@
 	size_t gcm_pt_buf_len;
 	uint32_t gcm_tmp[4];
 	/*
-	 * The relative positions of gcm_ghash, gcm_H and pre-computed
-	 * gcm_Htable are hard coded in aesni-gcm-x86_64.S and ghash-x86_64.S,
-	 * so please don't change (or adjust accordingly).
+	 * The offset of gcm_Htable relative to gcm_ghash, (32), is hard coded
+	 * in aesni-gcm-x86_64.S, so please don't change (or adjust there).
 	 */
 	uint64_t gcm_ghash[2];
 	uint64_t gcm_H[2];
 #ifdef CAN_USE_GCM_ASM
-	uint64_t gcm_Htable[12][2];
+	uint64_t *gcm_Htable;
+	size_t gcm_htab_len;
 #endif
 	uint64_t gcm_J0[2];
 	uint64_t gcm_len_a_len_c[2];

diff --git a/zfs/module/icp/include/sha1/sha1.h b/zfs/module/icp/include/sha1/sha1.h
deleted file mode 100644
index 251b64f..0000000
--- a/zfs/module/icp/include/sha1/sha1.h
+++ /dev/null

@@ -1,61 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SHA1_H
-#define	_SYS_SHA1_H
-
-#include <sys/types.h>		/* for uint_* */
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * NOTE: n2rng (Niagara2 RNG driver) accesses the state field of
- * SHA1_CTX directly.  NEVER change this structure without verifying
- * compatibility with n2rng.  The important thing is that the state
- * must be in a field declared as uint32_t state[5].
- */
-/* SHA-1 context. */
-typedef struct 	{
-	uint32_t state[5];	/* state (ABCDE) */
-	uint32_t count[2];	/* number of bits, modulo 2^64 (msb first) */
-	union 	{
-		uint8_t		buf8[64];	/* undigested input */
-		uint32_t	buf32[16];	/* realigned input */
-	} buf_un;
-} SHA1_CTX;
-
-#define	SHA1_DIGEST_LENGTH 20
-
-void SHA1Init(SHA1_CTX *);
-void SHA1Update(SHA1_CTX *, const void *, size_t);
-void SHA1Final(void *, SHA1_CTX *);
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA1_H */

diff --git a/zfs/module/icp/include/sha1/sha1_consts.h b/zfs/module/icp/include/sha1/sha1_consts.h
deleted file mode 100644
index 848d25e..0000000
--- a/zfs/module/icp/include/sha1/sha1_consts.h
+++ /dev/null

@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1998, by Sun Microsystems, Inc.
- * All rights reserved.
- */
-
-#ifndef	_SYS_SHA1_CONSTS_H
-#define	_SYS_SHA1_CONSTS_H
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/*
- * as explained in sha1.c, loading 32-bit constants on a sparc is expensive
- * since it involves both a `sethi' and an `or'.  thus, we instead use `ld'
- * to load the constants from an array called `sha1_consts'.  however, on
- * intel (and perhaps other processors), it is cheaper to load the constant
- * directly.  thus, the c code in SHA1Transform() uses the macro SHA1_CONST()
- * which either expands to a constant or an array reference, depending on
- * the architecture the code is being compiled for.
- */
-
-#include <sys/types.h>		/* uint32_t */
-
-extern	const uint32_t	sha1_consts[];
-
-#if	defined(__sparc)
-#define	SHA1_CONST(x)		(sha1_consts[x])
-#else
-#define	SHA1_CONST(x)		(SHA1_CONST_ ## x)
-#endif
-
-/* constants, as provided in FIPS 180-1 */
-
-#define	SHA1_CONST_0		0x5a827999U
-#define	SHA1_CONST_1		0x6ed9eba1U
-#define	SHA1_CONST_2		0x8f1bbcdcU
-#define	SHA1_CONST_3		0xca62c1d6U
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA1_CONSTS_H */

diff --git a/zfs/module/icp/include/sha1/sha1_impl.h b/zfs/module/icp/include/sha1/sha1_impl.h
deleted file mode 100644
index 1c1f872..0000000
--- a/zfs/module/icp/include/sha1/sha1_impl.h
+++ /dev/null

@@ -1,73 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef	_SHA1_IMPL_H
-#define	_SHA1_IMPL_H
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define	SHA1_HASH_SIZE		20	/* SHA_1 digest length in bytes */
-#define	SHA1_DIGEST_LENGTH	20	/* SHA1 digest length in bytes */
-#define	SHA1_HMAC_BLOCK_SIZE	64	/* SHA1-HMAC block size */
-#define	SHA1_HMAC_MIN_KEY_LEN	1	/* SHA1-HMAC min key length in bytes */
-#define	SHA1_HMAC_MAX_KEY_LEN	INT_MAX /* SHA1-HMAC max key length in bytes */
-#define	SHA1_HMAC_INTS_PER_BLOCK	(SHA1_HMAC_BLOCK_SIZE/sizeof (uint32_t))
-
-/*
- * CSPI information (entry points, provider info, etc.)
- */
-typedef enum sha1_mech_type {
-	SHA1_MECH_INFO_TYPE,		/* SUN_CKM_SHA1 */
-	SHA1_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA1_HMAC */
-	SHA1_HMAC_GEN_MECH_INFO_TYPE	/* SUN_CKM_SHA1_HMAC_GENERAL */
-} sha1_mech_type_t;
-
-/*
- * Context for SHA1 mechanism.
- */
-typedef struct sha1_ctx {
-	sha1_mech_type_t	sc_mech_type;	/* type of context */
-	SHA1_CTX		sc_sha1_ctx;	/* SHA1 context */
-} sha1_ctx_t;
-
-/*
- * Context for SHA1-HMAC and SHA1-HMAC-GENERAL mechanisms.
- */
-typedef struct sha1_hmac_ctx {
-	sha1_mech_type_t	hc_mech_type;	/* type of context */
-	uint32_t		hc_digest_len;	/* digest len in bytes */
-	SHA1_CTX		hc_icontext;	/* inner SHA1 context */
-	SHA1_CTX		hc_ocontext;	/* outer SHA1 context */
-} sha1_hmac_ctx_t;
-
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif /* _SHA1_IMPL_H */

diff --git a/zfs/module/icp/include/sys/bitmap.h b/zfs/module/icp/include/sys/bitmap.h
index b1f6823..4e86ee7 100644
--- a/zfs/module/icp/include/sys/bitmap.h
+++ b/zfs/module/icp/include/sys/bitmap.h

@@ -157,9 +157,9 @@
  * to 0 otherwise.
  */
 #define	BT_ATOMIC_SET(bitmap, bitindex) \
-	{ atomic_or_long(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
+	{ atomic_or_ulong(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
 #define	BT_ATOMIC_CLEAR(bitmap, bitindex) \
-	{ atomic_and_long(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
+	{ atomic_and_ulong(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
 
 #define	BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \
 	{ result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)),	\

diff --git a/zfs/module/icp/include/sys/crypto/ioctl.h b/zfs/module/icp/include/sys/crypto/ioctl.h
index dd59ca7..6e371e3 100644
--- a/zfs/module/icp/include/sys/crypto/ioctl.h
+++ b/zfs/module/icp/include/sys/crypto/ioctl.h

@@ -241,9 +241,6 @@
 #define	CRYPTO_LOGIN			CRYPTO(40)
 #define	CRYPTO_LOGOUT			CRYPTO(41)
 
-/* flag for encrypt and decrypt operations */
-#define	CRYPTO_INPLACE_OPERATION	0x00000001
-
 /*
  * Cryptographic Ioctls
  */

diff --git a/zfs/module/icp/include/sys/ia32/asm_linkage.h b/zfs/module/icp/include/sys/ia32/asm_linkage.h
index f2dae70..beb3592 100644
--- a/zfs/module/icp/include/sys/ia32/asm_linkage.h
+++ b/zfs/module/icp/include/sys/ia32/asm_linkage.h

@@ -30,6 +30,32 @@
 #include <sys/stack.h>
 #include <sys/trap.h>
 
+#if defined(_KERNEL) && defined(__linux__)
+#include <linux/linkage.h>
+#endif
+
+#ifndef ENDBR
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+/* CSTYLED */
+#if __has_include(<cet.h>)
+
+#include <cet.h>
+
+#ifdef _CET_ENDBR
+#define	ENDBR	_CET_ENDBR
+#endif /* _CET_ENDBR */
+
+#endif /* <cet.h> */
+#endif /* __ELF__ && __CET__ && __has_include */
+#endif /* !ENDBR */
+
+#ifndef ENDBR
+#define	ENDBR
+#endif
+#ifndef RET
+#define	RET	ret
+#endif
+
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -198,6 +224,7 @@
  * insert the calls to mcount for profiling. ENTRY_NP is identical, but
  * never calls mcount.
  */
+#undef ENTRY
 #define	ENTRY(x) \
 	.text; \
 	.align	ASM_ENTRY_ALIGN; \

diff --git a/zfs/module/icp/include/sys/ia32/stack.h b/zfs/module/icp/include/sys/ia32/stack.h
index c4deb7b..9e7c089 100644
--- a/zfs/module/icp/include/sys/ia32/stack.h
+++ b/zfs/module/icp/include/sys/ia32/stack.h

@@ -126,7 +126,7 @@
 
 #if defined(_KERNEL) && !defined(_ASM)
 
-#if defined(DEBUG)
+#if defined(ZFS_DEBUG)
 #if STACK_ALIGN == 4
 #define	ASSERT_STACK_ALIGNED()						\
 	{								\

diff --git a/zfs/module/icp/include/sys/modctl.h b/zfs/module/icp/include/sys/modctl.h
index a0b94ef..6c26ad6 100644
--- a/zfs/module/icp/include/sys/modctl.h
+++ b/zfs/module/icp/include/sys/modctl.h

@@ -398,7 +398,7 @@
 	char		mod_delay_unload;	/* deferred unload */
 
 	struct modctl_list *mod_requisites;	/* mods this one depends on. */
-	void		*__unused;	/* NOTE: reuse (same size) is OK, */
+	void		*____unused;	/* NOTE: reuse (same size) is OK, */
 					/* deletion causes mdb.vs.core issues */
 	int		mod_loadcnt;	/* number of times mod was loaded */
 	int		mod_nenabled;	/* # of enabled DTrace probes in mod */

diff --git a/zfs/module/icp/io/aes.c b/zfs/module/icp/io/aes.c
index 788bcef..c47c756 100644
--- a/zfs/module/icp/io/aes.c
+++ b/zfs/module/icp/io/aes.c

@@ -92,11 +92,6 @@
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}
 };
 
-/* operations are in-place if the output buffer is NULL */
-#define	AES_ARG_INPLACE(input, output)				\
-	if ((output) == NULL)					\
-		(output) = (input);
-
 static void aes_provider_status(crypto_provider_handle_t, uint_t *);
 
 static crypto_control_ops_t aes_control_ops = {
@@ -413,7 +408,7 @@
 	    == 0) && (plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
 		return (CRYPTO_DATA_LEN_RANGE);
 
-	AES_ARG_INPLACE(plaintext, ciphertext);
+	ASSERT(ciphertext != NULL);
 
 	/*
 	 * We need to just return the length needed to store the output.
@@ -530,7 +525,7 @@
 		return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 	}
 
-	AES_ARG_INPLACE(ciphertext, plaintext);
+	ASSERT(plaintext != NULL);
 
 	/*
 	 * Return length needed to store the output.
@@ -635,7 +630,7 @@
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
-	AES_ARG_INPLACE(plaintext, ciphertext);
+	ASSERT(ciphertext != NULL);
 
 	/* compute number of bytes that will hold the ciphertext */
 	out_len = aes_ctx->ac_remainder_len;
@@ -705,7 +700,7 @@
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
-	AES_ARG_INPLACE(ciphertext, plaintext);
+	ASSERT(plaintext != NULL);
 
 	/*
 	 * Compute number of bytes that will hold the plaintext.
@@ -947,7 +942,7 @@
 	size_t length_needed;
 	int ret;
 
-	AES_ARG_INPLACE(plaintext, ciphertext);
+	ASSERT(ciphertext != NULL);
 
 	/*
 	 * CTR, CCM, GCM, and GMAC modes do not require that plaintext
@@ -981,7 +976,7 @@
 	case AES_GMAC_MECH_INFO_TYPE:
 		if (plaintext->cd_length != 0)
 			return (CRYPTO_ARGUMENTS_BAD);
-		/* FALLTHRU */
+		fallthrough;
 	case AES_GCM_MECH_INFO_TYPE:
 		length_needed = plaintext->cd_length + aes_ctx.ac_tag_len;
 		break;
@@ -1056,6 +1051,16 @@
 		bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
 		kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
 	}
+#ifdef CAN_USE_GCM_ASM
+	if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) &&
+	    ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
+
+		gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
+
+		bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
+		kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
+	}
+#endif
 
 	return (ret);
 }
@@ -1073,7 +1078,7 @@
 	size_t length_needed;
 	int ret;
 
-	AES_ARG_INPLACE(ciphertext, plaintext);
+	ASSERT(plaintext != NULL);
 
 	/*
 	 * CCM, GCM, CTR, and GMAC modes do not require that ciphertext
@@ -1214,6 +1219,14 @@
 			vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf,
 			    ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len);
 		}
+#ifdef CAN_USE_GCM_ASM
+		if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
+			gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
+
+			bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
+			kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
+		}
+#endif
 	}
 
 	return (ret);

diff --git a/zfs/module/icp/io/edonr_mod.c b/zfs/module/icp/io/edonr_mod.c
index 544814a..a806af6 100644
--- a/zfs/module/icp/io/edonr_mod.c
+++ b/zfs/module/icp/io/edonr_mod.c

@@ -25,6 +25,7 @@
 
 #include <sys/modctl.h>
 #include <sys/crypto/common.h>
+#include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
 #include <sys/sysmacros.h>
 #include <sys/edonr.h>

diff --git a/zfs/module/icp/io/sha1_mod.c b/zfs/module/icp/io/sha1_mod.c
deleted file mode 100644
index e7c3854..0000000
--- a/zfs/module/icp/io/sha1_mod.c
+++ /dev/null

@@ -1,1241 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/modctl.h>
-#include <sys/crypto/common.h>
-#include <sys/crypto/spi.h>
-
-#include <sha1/sha1.h>
-#include <sha1/sha1_impl.h>
-
-/*
- * The sha1 module is created with two modlinkages:
- * - a modlmisc that allows consumers to directly call the entry points
- *   SHA1Init, SHA1Update, and SHA1Final.
- * - a modlcrypto that allows the module to register with the Kernel
- *   Cryptographic Framework (KCF) as a software provider for the SHA1
- *   mechanisms.
- */
-
-static struct modlcrypto modlcrypto = {
-	&mod_cryptoops,
-	"SHA1 Kernel SW Provider 1.1"
-};
-
-static struct modlinkage modlinkage = {
-	MODREV_1, { &modlcrypto, NULL }
-};
-
-
-/*
- * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed
- * by KCF to one of the entry points.
- */
-
-#define	PROV_SHA1_CTX(ctx)	((sha1_ctx_t *)(ctx)->cc_provider_private)
-#define	PROV_SHA1_HMAC_CTX(ctx)	((sha1_hmac_ctx_t *)(ctx)->cc_provider_private)
-
-/* to extract the digest length passed as mechanism parameter */
-#define	PROV_SHA1_GET_DIGEST_LEN(m, len) {				\
-	if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t)))		\
-		(len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \
-	else {								\
-		ulong_t tmp_ulong;					\
-		bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t));	\
-		(len) = (uint32_t)tmp_ulong;				\
-	}								\
-}
-
-#define	PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) {	\
-	SHA1Init(ctx);					\
-	SHA1Update(ctx, key, len);			\
-	SHA1Final(digest, ctx);				\
-}
-
-/*
- * Mechanism info structure passed to KCF during registration.
- */
-static crypto_mech_info_t sha1_mech_info_tab[] = {
-	/* SHA1 */
-	{SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE,
-	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
-	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
-	/* SHA1-HMAC */
-	{SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE,
-	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
-	    SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
-	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
-	/* SHA1-HMAC GENERAL */
-	{SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE,
-	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
-	    SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
-	    CRYPTO_KEYSIZE_UNIT_IN_BYTES}
-};
-
-static void sha1_provider_status(crypto_provider_handle_t, uint_t *);
-
-static crypto_control_ops_t sha1_control_ops = {
-	sha1_provider_status
-};
-
-static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
-    crypto_req_handle_t);
-static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
-    crypto_req_handle_t);
-static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *,
-    crypto_req_handle_t);
-static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *,
-    crypto_req_handle_t);
-static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
-    crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
-    crypto_req_handle_t);
-
-static crypto_digest_ops_t sha1_digest_ops = {
-	.digest_init = sha1_digest_init,
-	.digest = sha1_digest,
-	.digest_update = sha1_digest_update,
-	.digest_key = NULL,
-	.digest_final = sha1_digest_final,
-	.digest_atomic = sha1_digest_atomic
-};
-
-static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
-    crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *,
-    crypto_req_handle_t);
-static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
-static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
-    crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
-    crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
-    crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
-    crypto_spi_ctx_template_t, crypto_req_handle_t);
-
-static crypto_mac_ops_t sha1_mac_ops = {
-	.mac_init = sha1_mac_init,
-	.mac = NULL,
-	.mac_update = sha1_mac_update,
-	.mac_final = sha1_mac_final,
-	.mac_atomic = sha1_mac_atomic,
-	.mac_verify_atomic = sha1_mac_verify_atomic
-};
-
-static int sha1_create_ctx_template(crypto_provider_handle_t,
-    crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
-    size_t *, crypto_req_handle_t);
-static int sha1_free_context(crypto_ctx_t *);
-
-static crypto_ctx_ops_t sha1_ctx_ops = {
-	.create_ctx_template = sha1_create_ctx_template,
-	.free_context = sha1_free_context
-};
-
-static crypto_ops_t sha1_crypto_ops = {{{{{
-	&sha1_control_ops,
-	&sha1_digest_ops,
-	NULL,
-	&sha1_mac_ops,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	&sha1_ctx_ops,
-}}}}};
-
-static crypto_provider_info_t sha1_prov_info = {{{{
-	CRYPTO_SPI_VERSION_1,
-	"SHA1 Software Provider",
-	CRYPTO_SW_PROVIDER,
-	NULL,
-	&sha1_crypto_ops,
-	sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t),
-	sha1_mech_info_tab
-}}}};
-
-static crypto_kcf_provider_handle_t sha1_prov_handle = 0;
-
-int
-sha1_mod_init(void)
-{
-	int ret;
-
-	if ((ret = mod_install(&modlinkage)) != 0)
-		return (ret);
-
-	/*
-	 * Register with KCF. If the registration fails, log an
-	 * error but do not uninstall the module, since the functionality
-	 * provided by misc/sha1 should still be available.
-	 */
-	if ((ret = crypto_register_provider(&sha1_prov_info,
-	    &sha1_prov_handle)) != CRYPTO_SUCCESS)
-		cmn_err(CE_WARN, "sha1 _init: "
-		    "crypto_register_provider() failed (0x%x)", ret);
-
-	return (0);
-}
-
-int
-sha1_mod_fini(void)
-{
-	int ret;
-
-	if (sha1_prov_handle != 0) {
-		if ((ret = crypto_unregister_provider(sha1_prov_handle)) !=
-		    CRYPTO_SUCCESS) {
-			cmn_err(CE_WARN,
-			    "sha1 _fini: crypto_unregister_provider() "
-			    "failed (0x%x)", ret);
-			return (EBUSY);
-		}
-		sha1_prov_handle = 0;
-	}
-
-	return (mod_remove(&modlinkage));
-}
-
-/*
- * KCF software provider control entry points.
- */
-/* ARGSUSED */
-static void
-sha1_provider_status(crypto_provider_handle_t provider, uint_t *status)
-{
-	*status = CRYPTO_PROVIDER_READY;
-}
-
-/*
- * KCF software provider digest entry points.
- */
-
-static int
-sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
-    crypto_req_handle_t req)
-{
-	if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
-		return (CRYPTO_MECHANISM_INVALID);
-
-	/*
-	 * Allocate and initialize SHA1 context.
-	 */
-	ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t),
-	    crypto_kmflag(req));
-	if (ctx->cc_provider_private == NULL)
-		return (CRYPTO_HOST_MEMORY);
-
-	PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE;
-	SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
-
-	return (CRYPTO_SUCCESS);
-}
-
-/*
- * Helper SHA1 digest update function for uio data.
- */
-static int
-sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
-{
-	off_t offset = data->cd_offset;
-	size_t length = data->cd_length;
-	uint_t vec_idx = 0;
-	size_t cur_len;
-
-	/* we support only kernel buffer */
-	if (data->cd_uio->uio_segflg != UIO_SYSSPACE)
-		return (CRYPTO_ARGUMENTS_BAD);
-
-	/*
-	 * Jump to the first iovec containing data to be
-	 * digested.
-	 */
-	while (vec_idx < data->cd_uio->uio_iovcnt &&
-	    offset >= data->cd_uio->uio_iov[vec_idx].iov_len) {
-		offset -= data->cd_uio->uio_iov[vec_idx].iov_len;
-		vec_idx++;
-	}
-	if (vec_idx == data->cd_uio->uio_iovcnt) {
-		/*
-		 * The caller specified an offset that is larger than the
-		 * total size of the buffers it provided.
-		 */
-		return (CRYPTO_DATA_LEN_RANGE);
-	}
-
-	/*
-	 * Now do the digesting on the iovecs.
-	 */
-	while (vec_idx < data->cd_uio->uio_iovcnt && length > 0) {
-		cur_len = MIN(data->cd_uio->uio_iov[vec_idx].iov_len -
-		    offset, length);
-
-		SHA1Update(sha1_ctx,
-		    (uint8_t *)data->cd_uio->uio_iov[vec_idx].iov_base + offset,
-		    cur_len);
-
-		length -= cur_len;
-		vec_idx++;
-		offset = 0;
-	}
-
-	if (vec_idx == data->cd_uio->uio_iovcnt && length > 0) {
-		/*
-		 * The end of the specified iovec's was reached but
-		 * the length requested could not be processed, i.e.
-		 * The caller requested to digest more data than it provided.
-		 */
-		return (CRYPTO_DATA_LEN_RANGE);
-	}
-
-	return (CRYPTO_SUCCESS);
-}
-
-/*
- * Helper SHA1 digest final function for uio data.
- * digest_len is the length of the desired digest. If digest_len
- * is smaller than the default SHA1 digest length, the caller
- * must pass a scratch buffer, digest_scratch, which must
- * be at least SHA1_DIGEST_LENGTH bytes.
- */
-static int
-sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
-    ulong_t digest_len, uchar_t *digest_scratch)
-{
-	off_t offset = digest->cd_offset;
-	uint_t vec_idx = 0;
-
-	/* we support only kernel buffer */
-	if (digest->cd_uio->uio_segflg != UIO_SYSSPACE)
-		return (CRYPTO_ARGUMENTS_BAD);
-
-	/*
-	 * Jump to the first iovec containing ptr to the digest to
-	 * be returned.
-	 */
-	while (vec_idx < digest->cd_uio->uio_iovcnt &&
-	    offset >= digest->cd_uio->uio_iov[vec_idx].iov_len) {
-		offset -= digest->cd_uio->uio_iov[vec_idx].iov_len;
-		vec_idx++;
-	}
-	if (vec_idx == digest->cd_uio->uio_iovcnt) {
-		/*
-		 * The caller specified an offset that is
-		 * larger than the total size of the buffers
-		 * it provided.
-		 */
-		return (CRYPTO_DATA_LEN_RANGE);
-	}
-
-	if (offset + digest_len <=
-	    digest->cd_uio->uio_iov[vec_idx].iov_len) {
-		/*
-		 * The computed SHA1 digest will fit in the current
-		 * iovec.
-		 */
-		if (digest_len != SHA1_DIGEST_LENGTH) {
-			/*
-			 * The caller requested a short digest. Digest
-			 * into a scratch buffer and return to
-			 * the user only what was requested.
-			 */
-			SHA1Final(digest_scratch, sha1_ctx);
-			bcopy(digest_scratch, (uchar_t *)digest->
-			    cd_uio->uio_iov[vec_idx].iov_base + offset,
-			    digest_len);
-		} else {
-			SHA1Final((uchar_t *)digest->
-			    cd_uio->uio_iov[vec_idx].iov_base + offset,
-			    sha1_ctx);
-		}
-	} else {
-		/*
-		 * The computed digest will be crossing one or more iovec's.
-		 * This is bad performance-wise but we need to support it.
-		 * Allocate a small scratch buffer on the stack and
-		 * copy it piece meal to the specified digest iovec's.
-		 */
-		uchar_t digest_tmp[SHA1_DIGEST_LENGTH];
-		off_t scratch_offset = 0;
-		size_t length = digest_len;
-		size_t cur_len;
-
-		SHA1Final(digest_tmp, sha1_ctx);
-
-		while (vec_idx < digest->cd_uio->uio_iovcnt && length > 0) {
-			cur_len = MIN(digest->cd_uio->uio_iov[vec_idx].iov_len -
-			    offset, length);
-			bcopy(digest_tmp + scratch_offset,
-			    digest->cd_uio->uio_iov[vec_idx].iov_base + offset,
-			    cur_len);
-
-			length -= cur_len;
-			vec_idx++;
-			scratch_offset += cur_len;
-			offset = 0;
-		}
-
-		if (vec_idx == digest->cd_uio->uio_iovcnt && length > 0) {
-			/*
-			 * The end of the specified iovec's was reached but
-			 * the length requested could not be processed, i.e.
-			 * The caller requested to digest more data than it
-			 * provided.
-			 */
-			return (CRYPTO_DATA_LEN_RANGE);
-		}
-	}
-
-	return (CRYPTO_SUCCESS);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
-    crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-
-	ASSERT(ctx->cc_provider_private != NULL);
-
-	/*
-	 * We need to just return the length needed to store the output.
-	 * We should not destroy the context for the following cases.
-	 */
-	if ((digest->cd_length == 0) ||
-	    (digest->cd_length < SHA1_DIGEST_LENGTH)) {
-		digest->cd_length = SHA1_DIGEST_LENGTH;
-		return (CRYPTO_BUFFER_TOO_SMALL);
-	}
-
-	/*
-	 * Do the SHA1 update on the specified input data.
-	 */
-	switch (data->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
-		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
-		    data->cd_length);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
-		    data);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	if (ret != CRYPTO_SUCCESS) {
-		/* the update failed, free context and bail */
-		kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
-		ctx->cc_provider_private = NULL;
-		digest->cd_length = 0;
-		return (ret);
-	}
-
-	/*
-	 * Do a SHA1 final, must be done separately since the digest
-	 * type can be different than the input data type.
-	 */
-	switch (digest->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Final((unsigned char *)digest->cd_raw.iov_base +
-		    digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
-		    digest, SHA1_DIGEST_LENGTH, NULL);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	/* all done, free context and return */
-
-	if (ret == CRYPTO_SUCCESS) {
-		digest->cd_length = SHA1_DIGEST_LENGTH;
-	} else {
-		digest->cd_length = 0;
-	}
-
-	kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
-	ctx->cc_provider_private = NULL;
-	return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
-    crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-
-	ASSERT(ctx->cc_provider_private != NULL);
-
-	/*
-	 * Do the SHA1 update on the specified input data.
-	 */
-	switch (data->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
-		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
-		    data->cd_length);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
-		    data);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
-    crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-
-	ASSERT(ctx->cc_provider_private != NULL);
-
-	/*
-	 * We need to just return the length needed to store the output.
-	 * We should not destroy the context for the following cases.
-	 */
-	if ((digest->cd_length == 0) ||
-	    (digest->cd_length < SHA1_DIGEST_LENGTH)) {
-		digest->cd_length = SHA1_DIGEST_LENGTH;
-		return (CRYPTO_BUFFER_TOO_SMALL);
-	}
-
-	/*
-	 * Do a SHA1 final.
-	 */
-	switch (digest->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Final((unsigned char *)digest->cd_raw.iov_base +
-		    digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
-		    digest, SHA1_DIGEST_LENGTH, NULL);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	/* all done, free context and return */
-
-	if (ret == CRYPTO_SUCCESS) {
-		digest->cd_length = SHA1_DIGEST_LENGTH;
-	} else {
-		digest->cd_length = 0;
-	}
-
-	kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
-	ctx->cc_provider_private = NULL;
-
-	return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_atomic(crypto_provider_handle_t provider,
-    crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
-    crypto_data_t *data, crypto_data_t *digest,
-    crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-	SHA1_CTX sha1_ctx;
-
-	if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
-		return (CRYPTO_MECHANISM_INVALID);
-
-	/*
-	 * Do the SHA1 init.
-	 */
-	SHA1Init(&sha1_ctx);
-
-	/*
-	 * Do the SHA1 update on the specified input data.
-	 */
-	switch (data->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Update(&sha1_ctx,
-		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
-		    data->cd_length);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_update_uio(&sha1_ctx, data);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	if (ret != CRYPTO_SUCCESS) {
-		/* the update failed, bail */
-		digest->cd_length = 0;
-		return (ret);
-	}
-
-	/*
-	 * Do a SHA1 final, must be done separately since the digest
-	 * type can be different than the input data type.
-	 */
-	switch (digest->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Final((unsigned char *)digest->cd_raw.iov_base +
-		    digest->cd_offset, &sha1_ctx);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_final_uio(&sha1_ctx, digest,
-		    SHA1_DIGEST_LENGTH, NULL);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	if (ret == CRYPTO_SUCCESS) {
-		digest->cd_length = SHA1_DIGEST_LENGTH;
-	} else {
-		digest->cd_length = 0;
-	}
-
-	return (ret);
-}
-
-/*
- * KCF software provider mac entry points.
- *
- * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text))
- *
- * Init:
- * The initialization routine initializes what we denote
- * as the inner and outer contexts by doing
- * - for inner context: SHA1(key XOR ipad)
- * - for outer context: SHA1(key XOR opad)
- *
- * Update:
- * Each subsequent SHA1 HMAC update will result in an
- * update of the inner context with the specified data.
- *
- * Final:
- * The SHA1 HMAC final will do a SHA1 final operation on the
- * inner context, and the resulting digest will be used
- * as the data for an update on the outer context. Last
- * but not least, a SHA1 final on the outer context will
- * be performed to obtain the SHA1 HMAC digest to return
- * to the user.
- */
-
-/*
- * Initialize a SHA1-HMAC context.
- */
-static void
-sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
-{
-	uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK];
-	uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK];
-	uint_t i;
-
-	bzero(ipad, SHA1_HMAC_BLOCK_SIZE);
-	bzero(opad, SHA1_HMAC_BLOCK_SIZE);
-
-	bcopy(keyval, ipad, length_in_bytes);
-	bcopy(keyval, opad, length_in_bytes);
-
-	/* XOR key with ipad (0x36) and opad (0x5c) */
-	for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) {
-		ipad[i] ^= 0x36363636;
-		opad[i] ^= 0x5c5c5c5c;
-	}
-
-	/* perform SHA1 on ipad */
-	SHA1Init(&ctx->hc_icontext);
-	SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE);
-
-	/* perform SHA1 on opad */
-	SHA1Init(&ctx->hc_ocontext);
-	SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE);
-}
-
-/*
- */
-static int
-sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
-    crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
-    crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
-	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
-	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
-		return (CRYPTO_MECHANISM_INVALID);
-
-	/* Add support for key by attributes (RFE 4706552) */
-	if (key->ck_format != CRYPTO_KEY_RAW)
-		return (CRYPTO_ARGUMENTS_BAD);
-
-	ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t),
-	    crypto_kmflag(req));
-	if (ctx->cc_provider_private == NULL)
-		return (CRYPTO_HOST_MEMORY);
-
-	if (ctx_template != NULL) {
-		/* reuse context template */
-		bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx),
-		    sizeof (sha1_hmac_ctx_t));
-	} else {
-		/* no context template, compute context */
-		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
-			uchar_t digested_key[SHA1_DIGEST_LENGTH];
-			sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
-
-			/*
-			 * Hash the passed-in key to get a smaller key.
-			 * The inner context is used since it hasn't been
-			 * initialized yet.
-			 */
-			PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext,
-			    key->ck_data, keylen_in_bytes, digested_key);
-			sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
-			    digested_key, SHA1_DIGEST_LENGTH);
-		} else {
-			sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
-			    key->ck_data, keylen_in_bytes);
-		}
-	}
-
-	/*
-	 * Get the mechanism parameters, if applicable.
-	 */
-	PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
-	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
-		if (mechanism->cm_param == NULL ||
-		    mechanism->cm_param_len != sizeof (ulong_t))
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-		PROV_SHA1_GET_DIGEST_LEN(mechanism,
-		    PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len);
-		if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len >
-		    SHA1_DIGEST_LENGTH)
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-	}
-
-	if (ret != CRYPTO_SUCCESS) {
-		bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
-		kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
-		ctx->cc_provider_private = NULL;
-	}
-
-	return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-
-	ASSERT(ctx->cc_provider_private != NULL);
-
-	/*
-	 * Do a SHA1 update of the inner context using the specified
-	 * data.
-	 */
-	switch (data->cd_format) {
-	case CRYPTO_DATA_RAW:
-		SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext,
-		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
-		    data->cd_length);
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_update_uio(
-		    &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-	uchar_t digest[SHA1_DIGEST_LENGTH];
-	uint32_t digest_len = SHA1_DIGEST_LENGTH;
-
-	ASSERT(ctx->cc_provider_private != NULL);
-
-	if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type ==
-	    SHA1_HMAC_GEN_MECH_INFO_TYPE)
-		digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len;
-
-	/*
-	 * We need to just return the length needed to store the output.
-	 * We should not destroy the context for the following cases.
-	 */
-	if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
-		mac->cd_length = digest_len;
-		return (CRYPTO_BUFFER_TOO_SMALL);
-	}
-
-	/*
-	 * Do a SHA1 final on the inner context.
-	 */
-	SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext);
-
-	/*
-	 * Do a SHA1 update on the outer context, feeding the inner
-	 * digest as data.
-	 */
-	SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest,
-	    SHA1_DIGEST_LENGTH);
-
-	/*
-	 * Do a SHA1 final on the outer context, storing the computing
-	 * digest in the users buffer.
-	 */
-	switch (mac->cd_format) {
-	case CRYPTO_DATA_RAW:
-		if (digest_len != SHA1_DIGEST_LENGTH) {
-			/*
-			 * The caller requested a short digest. Digest
-			 * into a scratch buffer and return to
-			 * the user only what was requested.
-			 */
-			SHA1Final(digest,
-			    &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
-			bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
-			    mac->cd_offset, digest_len);
-		} else {
-			SHA1Final((unsigned char *)mac->cd_raw.iov_base +
-			    mac->cd_offset,
-			    &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
-		}
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_final_uio(
-		    &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac,
-		    digest_len, digest);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	if (ret == CRYPTO_SUCCESS) {
-		mac->cd_length = digest_len;
-	} else {
-		mac->cd_length = 0;
-	}
-
-	bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
-	kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
-	ctx->cc_provider_private = NULL;
-
-	return (ret);
-}
-
-#define	SHA1_MAC_UPDATE(data, ctx, ret) {				\
-	switch (data->cd_format) {					\
-	case CRYPTO_DATA_RAW:						\
-		SHA1Update(&(ctx).hc_icontext,				\
-		    (uint8_t *)data->cd_raw.iov_base +			\
-		    data->cd_offset, data->cd_length);			\
-		break;							\
-	case CRYPTO_DATA_UIO:						\
-		ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \
-		break;							\
-	default:							\
-		ret = CRYPTO_ARGUMENTS_BAD;				\
-	}								\
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_atomic(crypto_provider_handle_t provider,
-    crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
-    crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
-    crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-	uchar_t digest[SHA1_DIGEST_LENGTH];
-	sha1_hmac_ctx_t sha1_hmac_ctx;
-	uint32_t digest_len = SHA1_DIGEST_LENGTH;
-	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
-	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
-	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
-		return (CRYPTO_MECHANISM_INVALID);
-
-	/* Add support for key by attributes (RFE 4706552) */
-	if (key->ck_format != CRYPTO_KEY_RAW)
-		return (CRYPTO_ARGUMENTS_BAD);
-
-	if (ctx_template != NULL) {
-		/* reuse context template */
-		bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-	} else {
-		/* no context template, initialize context */
-		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
-			/*
-			 * Hash the passed-in key to get a smaller key.
-			 * The inner context is used since it hasn't been
-			 * initialized yet.
-			 */
-			PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
-			    key->ck_data, keylen_in_bytes, digest);
-			sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
-			    SHA1_DIGEST_LENGTH);
-		} else {
-			sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
-			    keylen_in_bytes);
-		}
-	}
-
-	/* get the mechanism parameters, if applicable */
-	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
-		if (mechanism->cm_param == NULL ||
-		    mechanism->cm_param_len != sizeof (ulong_t)) {
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-			goto bail;
-		}
-		PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
-		if (digest_len > SHA1_DIGEST_LENGTH) {
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-			goto bail;
-		}
-	}
-
-	/* do a SHA1 update of the inner context using the specified data */
-	SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
-	if (ret != CRYPTO_SUCCESS)
-		/* the update failed, free context and bail */
-		goto bail;
-
-	/*
-	 * Do a SHA1 final on the inner context.
-	 */
-	SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
-
-	/*
-	 * Do an SHA1 update on the outer context, feeding the inner
-	 * digest as data.
-	 */
-	SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
-
-	/*
-	 * Do a SHA1 final on the outer context, storing the computed
-	 * digest in the users buffer.
-	 */
-	switch (mac->cd_format) {
-	case CRYPTO_DATA_RAW:
-		if (digest_len != SHA1_DIGEST_LENGTH) {
-			/*
-			 * The caller requested a short digest. Digest
-			 * into a scratch buffer and return to
-			 * the user only what was requested.
-			 */
-			SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
-			bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
-			    mac->cd_offset, digest_len);
-		} else {
-			SHA1Final((unsigned char *)mac->cd_raw.iov_base +
-			    mac->cd_offset, &sha1_hmac_ctx.hc_ocontext);
-		}
-		break;
-	case CRYPTO_DATA_UIO:
-		ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac,
-		    digest_len, digest);
-		break;
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	if (ret == CRYPTO_SUCCESS) {
-		mac->cd_length = digest_len;
-	} else {
-		mac->cd_length = 0;
-	}
-	/* Extra paranoia: zeroize the context on the stack */
-	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-
-	return (ret);
-bail:
-	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-	mac->cd_length = 0;
-	return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_verify_atomic(crypto_provider_handle_t provider,
-    crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
-    crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
-    crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
-{
-	int ret = CRYPTO_SUCCESS;
-	uchar_t digest[SHA1_DIGEST_LENGTH];
-	sha1_hmac_ctx_t sha1_hmac_ctx;
-	uint32_t digest_len = SHA1_DIGEST_LENGTH;
-	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
-	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
-	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
-		return (CRYPTO_MECHANISM_INVALID);
-
-	/* Add support for key by attributes (RFE 4706552) */
-	if (key->ck_format != CRYPTO_KEY_RAW)
-		return (CRYPTO_ARGUMENTS_BAD);
-
-	if (ctx_template != NULL) {
-		/* reuse context template */
-		bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-	} else {
-		/* no context template, initialize context */
-		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
-			/*
-			 * Hash the passed-in key to get a smaller key.
-			 * The inner context is used since it hasn't been
-			 * initialized yet.
-			 */
-			PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
-			    key->ck_data, keylen_in_bytes, digest);
-			sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
-			    SHA1_DIGEST_LENGTH);
-		} else {
-			sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
-			    keylen_in_bytes);
-		}
-	}
-
-	/* get the mechanism parameters, if applicable */
-	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
-		if (mechanism->cm_param == NULL ||
-		    mechanism->cm_param_len != sizeof (ulong_t)) {
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-			goto bail;
-		}
-		PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
-		if (digest_len > SHA1_DIGEST_LENGTH) {
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-			goto bail;
-		}
-	}
-
-	if (mac->cd_length != digest_len) {
-		ret = CRYPTO_INVALID_MAC;
-		goto bail;
-	}
-
-	/* do a SHA1 update of the inner context using the specified data */
-	SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
-	if (ret != CRYPTO_SUCCESS)
-		/* the update failed, free context and bail */
-		goto bail;
-
-	/* do a SHA1 final on the inner context */
-	SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
-
-	/*
-	 * Do an SHA1 update on the outer context, feeding the inner
-	 * digest as data.
-	 */
-	SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
-
-	/*
-	 * Do a SHA1 final on the outer context, storing the computed
-	 * digest in the users buffer.
-	 */
-	SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
-
-	/*
-	 * Compare the computed digest against the expected digest passed
-	 * as argument.
-	 */
-
-	switch (mac->cd_format) {
-
-	case CRYPTO_DATA_RAW:
-		if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
-		    mac->cd_offset, digest_len) != 0)
-			ret = CRYPTO_INVALID_MAC;
-		break;
-
-	case CRYPTO_DATA_UIO: {
-		off_t offset = mac->cd_offset;
-		uint_t vec_idx = 0;
-		off_t scratch_offset = 0;
-		size_t length = digest_len;
-		size_t cur_len;
-
-		/* we support only kernel buffer */
-		if (mac->cd_uio->uio_segflg != UIO_SYSSPACE)
-			return (CRYPTO_ARGUMENTS_BAD);
-
-		/* jump to the first iovec containing the expected digest */
-		while (vec_idx < mac->cd_uio->uio_iovcnt &&
-		    offset >= mac->cd_uio->uio_iov[vec_idx].iov_len) {
-			offset -= mac->cd_uio->uio_iov[vec_idx].iov_len;
-			vec_idx++;
-		}
-		if (vec_idx == mac->cd_uio->uio_iovcnt) {
-			/*
-			 * The caller specified an offset that is
-			 * larger than the total size of the buffers
-			 * it provided.
-			 */
-			ret = CRYPTO_DATA_LEN_RANGE;
-			break;
-		}
-
-		/* do the comparison of computed digest vs specified one */
-		while (vec_idx < mac->cd_uio->uio_iovcnt && length > 0) {
-			cur_len = MIN(mac->cd_uio->uio_iov[vec_idx].iov_len -
-			    offset, length);
-
-			if (bcmp(digest + scratch_offset,
-			    mac->cd_uio->uio_iov[vec_idx].iov_base + offset,
-			    cur_len) != 0) {
-				ret = CRYPTO_INVALID_MAC;
-				break;
-			}
-
-			length -= cur_len;
-			vec_idx++;
-			scratch_offset += cur_len;
-			offset = 0;
-		}
-		break;
-	}
-
-	default:
-		ret = CRYPTO_ARGUMENTS_BAD;
-	}
-
-	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-	return (ret);
-bail:
-	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-	mac->cd_length = 0;
-	return (ret);
-}
-
-/*
- * KCF software provider context management entry points.
- */
-
-/* ARGSUSED */
-static int
-sha1_create_ctx_template(crypto_provider_handle_t provider,
-    crypto_mechanism_t *mechanism, crypto_key_t *key,
-    crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
-    crypto_req_handle_t req)
-{
-	sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl;
-	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
-	if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) &&
-	    (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) {
-		return (CRYPTO_MECHANISM_INVALID);
-	}
-
-	/* Add support for key by attributes (RFE 4706552) */
-	if (key->ck_format != CRYPTO_KEY_RAW)
-		return (CRYPTO_ARGUMENTS_BAD);
-
-	/*
-	 * Allocate and initialize SHA1 context.
-	 */
-	sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t),
-	    crypto_kmflag(req));
-	if (sha1_hmac_ctx_tmpl == NULL)
-		return (CRYPTO_HOST_MEMORY);
-
-	if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
-		uchar_t digested_key[SHA1_DIGEST_LENGTH];
-
-		/*
-		 * Hash the passed-in key to get a smaller key.
-		 * The inner context is used since it hasn't been
-		 * initialized yet.
-		 */
-		PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext,
-		    key->ck_data, keylen_in_bytes, digested_key);
-		sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key,
-		    SHA1_DIGEST_LENGTH);
-	} else {
-		sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data,
-		    keylen_in_bytes);
-	}
-
-	sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
-	*ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl;
-	*ctx_template_size = sizeof (sha1_hmac_ctx_t);
-
-
-	return (CRYPTO_SUCCESS);
-}
-
-static int
-sha1_free_context(crypto_ctx_t *ctx)
-{
-	uint_t ctx_len;
-	sha1_mech_type_t mech_type;
-
-	if (ctx->cc_provider_private == NULL)
-		return (CRYPTO_SUCCESS);
-
-	/*
-	 * We have to free either SHA1 or SHA1-HMAC contexts, which
-	 * have different lengths.
-	 */
-
-	mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type;
-	if (mech_type == SHA1_MECH_INFO_TYPE)
-		ctx_len = sizeof (sha1_ctx_t);
-	else {
-		ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE ||
-		    mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE);
-		ctx_len = sizeof (sha1_hmac_ctx_t);
-	}
-
-	bzero(ctx->cc_provider_private, ctx_len);
-	kmem_free(ctx->cc_provider_private, ctx_len);
-	ctx->cc_provider_private = NULL;
-
-	return (CRYPTO_SUCCESS);
-}

diff --git a/zfs/module/icp/io/sha2_mod.c b/zfs/module/icp/io/sha2_mod.c
index 3254f55..f3125ad 100644
--- a/zfs/module/icp/io/sha2_mod.c
+++ b/zfs/module/icp/io/sha2_mod.c

@@ -296,19 +296,15 @@
 	size_t cur_len;
 
 	/* we support only kernel buffer */
-	if (data->cd_uio->uio_segflg != UIO_SYSSPACE)
+	if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * digested.
 	 */
-	while (vec_idx < data->cd_uio->uio_iovcnt &&
-	    offset >= data->cd_uio->uio_iov[vec_idx].iov_len) {
-		offset -= data->cd_uio->uio_iov[vec_idx].iov_len;
-		vec_idx++;
-	}
-	if (vec_idx == data->cd_uio->uio_iovcnt) {
+	offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
+	if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
@@ -319,18 +315,18 @@
 	/*
 	 * Now do the digesting on the iovecs.
 	 */
-	while (vec_idx < data->cd_uio->uio_iovcnt && length > 0) {
-		cur_len = MIN(data->cd_uio->uio_iov[vec_idx].iov_len -
+	while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
+		cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
 		    offset, length);
 
-		SHA2Update(sha2_ctx, (uint8_t *)data->cd_uio->
-		    uio_iov[vec_idx].iov_base + offset, cur_len);
+		SHA2Update(sha2_ctx, (uint8_t *)zfs_uio_iovbase(data->cd_uio,
+		    vec_idx) + offset, cur_len);
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
-	if (vec_idx == data->cd_uio->uio_iovcnt && length > 0) {
+	if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.
@@ -357,19 +353,15 @@
 	uint_t vec_idx = 0;
 
 	/* we support only kernel buffer */
-	if (digest->cd_uio->uio_segflg != UIO_SYSSPACE)
+	if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing ptr to the digest to
 	 * be returned.
 	 */
-	while (vec_idx < digest->cd_uio->uio_iovcnt &&
-	    offset >= digest->cd_uio->uio_iov[vec_idx].iov_len) {
-		offset -= digest->cd_uio->uio_iov[vec_idx].iov_len;
-		vec_idx++;
-	}
-	if (vec_idx == digest->cd_uio->uio_iovcnt) {
+	offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
+	if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
 		/*
 		 * The caller specified an offset that is
 		 * larger than the total size of the buffers
@@ -379,7 +371,7 @@
 	}
 
 	if (offset + digest_len <=
-	    digest->cd_uio->uio_iov[vec_idx].iov_len) {
+	    zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
 		/*
 		 * The computed SHA2 digest will fit in the current
 		 * iovec.
@@ -395,12 +387,12 @@
 			 */
 			SHA2Final(digest_scratch, sha2_ctx);
 
-			bcopy(digest_scratch, (uchar_t *)digest->
-			    cd_uio->uio_iov[vec_idx].iov_base + offset,
+			bcopy(digest_scratch, (uchar_t *)
+			    zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
 			    digest_len);
 		} else {
-			SHA2Final((uchar_t *)digest->
-			    cd_uio->uio_iov[vec_idx].iov_base + offset,
+			SHA2Final((uchar_t *)zfs_uio_iovbase(digest->
+			    cd_uio, vec_idx) + offset,
 			    sha2_ctx);
 
 		}
@@ -418,12 +410,12 @@
 
 		SHA2Final(digest_tmp, sha2_ctx);
 
-		while (vec_idx < digest->cd_uio->uio_iovcnt && length > 0) {
+		while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
 			cur_len =
-			    MIN(digest->cd_uio->uio_iov[vec_idx].iov_len -
+			    MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
 			    offset, length);
 			bcopy(digest_tmp + scratch_offset,
-			    digest->cd_uio->uio_iov[vec_idx].iov_base + offset,
+			    zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
 			    cur_len);
 
 			length -= cur_len;
@@ -432,7 +424,7 @@
 			offset = 0;
 		}
 
-		if (vec_idx == digest->cd_uio->uio_iovcnt && length > 0) {
+		if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
 			/*
 			 * The end of the specified iovec's was reached but
 			 * the length requested could not be processed, i.e.
@@ -831,12 +823,15 @@
 	 */
 	if (mechanism->cm_type % 3 == 2) {
 		if (mechanism->cm_param == NULL ||
-		    mechanism->cm_param_len != sizeof (ulong_t))
+		    mechanism->cm_param_len != sizeof (ulong_t)) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
-		PROV_SHA2_GET_DIGEST_LEN(mechanism,
-		    PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
-		if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len)
-			ret = CRYPTO_MECHANISM_PARAM_INVALID;
+		} else {
+			PROV_SHA2_GET_DIGEST_LEN(mechanism,
+			    PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
+			if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len >
+			    sha_digest_len)
+				ret = CRYPTO_MECHANISM_PARAM_INVALID;
+		}
 	}
 
 	if (ret != CRYPTO_SUCCESS) {
@@ -1259,16 +1254,12 @@
 		size_t cur_len;
 
 		/* we support only kernel buffer */
-		if (mac->cd_uio->uio_segflg != UIO_SYSSPACE)
+		if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
 			return (CRYPTO_ARGUMENTS_BAD);
 
 		/* jump to the first iovec containing the expected digest */
-		while (vec_idx < mac->cd_uio->uio_iovcnt &&
-		    offset >= mac->cd_uio->uio_iov[vec_idx].iov_len) {
-			offset -= mac->cd_uio->uio_iov[vec_idx].iov_len;
-			vec_idx++;
-		}
-		if (vec_idx == mac->cd_uio->uio_iovcnt) {
+		offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
+		if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
 			/*
 			 * The caller specified an offset that is
 			 * larger than the total size of the buffers
@@ -1279,12 +1270,12 @@
 		}
 
 		/* do the comparison of computed digest vs specified one */
-		while (vec_idx < mac->cd_uio->uio_iovcnt && length > 0) {
-			cur_len = MIN(mac->cd_uio->uio_iov[vec_idx].iov_len -
+		while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
+			cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
 			    offset, length);
 
 			if (bcmp(digest + scratch_offset,
-			    mac->cd_uio->uio_iov[vec_idx].iov_base + offset,
+			    zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
 			    cur_len) != 0) {
 				ret = CRYPTO_INVALID_MAC;
 				break;

diff --git a/zfs/module/icp/io/skein_mod.c b/zfs/module/icp/io/skein_mod.c
index afd7f56..8992c58 100644
--- a/zfs/module/icp/io/skein_mod.c
+++ b/zfs/module/icp/io/skein_mod.c

@@ -25,6 +25,7 @@
 
 #include <sys/modctl.h>
 #include <sys/crypto/common.h>
+#include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
 #include <sys/sysmacros.h>
 #define	SKEIN_MODULE_IMPL
@@ -271,22 +272,18 @@
 	size_t		length = data->cd_length;
 	uint_t		vec_idx = 0;
 	size_t		cur_len;
-	const uio_t	*uio = data->cd_uio;
+	zfs_uio_t	*uio = data->cd_uio;
 
 	/* we support only kernel buffer */
-	if (uio->uio_segflg != UIO_SYSSPACE)
+	if (zfs_uio_segflg(uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * digested.
 	 */
-	while (vec_idx < uio->uio_iovcnt &&
-	    offset >= uio->uio_iov[vec_idx].iov_len) {
-		offset -= uio->uio_iov[vec_idx].iov_len;
-		vec_idx++;
-	}
-	if (vec_idx == uio->uio_iovcnt) {
+	offset = zfs_uio_index_at_offset(uio, offset, &vec_idx);
+	if (vec_idx == zfs_uio_iovcnt(uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
@@ -297,16 +294,16 @@
 	/*
 	 * Now do the digesting on the iovecs.
 	 */
-	while (vec_idx < uio->uio_iovcnt && length > 0) {
-		cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset, length);
-		SKEIN_OP(ctx, Update, (uint8_t *)uio->uio_iov[vec_idx].iov_base
+	while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
+		cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset, length);
+		SKEIN_OP(ctx, Update, (uint8_t *)zfs_uio_iovbase(uio, vec_idx)
 		    + offset, cur_len);
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
-	if (vec_idx == uio->uio_iovcnt && length > 0) {
+	if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.
@@ -325,23 +322,19 @@
 skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
-	off_t	offset = digest->cd_offset;
-	uint_t	vec_idx = 0;
-	uio_t	*uio = digest->cd_uio;
+	off_t offset = digest->cd_offset;
+	uint_t vec_idx = 0;
+	zfs_uio_t *uio = digest->cd_uio;
 
 	/* we support only kernel buffer */
-	if (uio->uio_segflg != UIO_SYSSPACE)
+	if (zfs_uio_segflg(uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing ptr to the digest to be returned.
 	 */
-	while (vec_idx < uio->uio_iovcnt &&
-	    offset >= uio->uio_iov[vec_idx].iov_len) {
-		offset -= uio->uio_iov[vec_idx].iov_len;
-		vec_idx++;
-	}
-	if (vec_idx == uio->uio_iovcnt) {
+	offset = zfs_uio_index_at_offset(uio, offset, &vec_idx);
+	if (vec_idx == zfs_uio_iovcnt(uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
@@ -349,10 +342,10 @@
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 	if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <=
-	    uio->uio_iov[vec_idx].iov_len) {
+	    zfs_uio_iovlen(uio, vec_idx)) {
 		/* The computed digest will fit in the current iovec. */
 		SKEIN_OP(ctx, Final,
-		    (uchar_t *)uio->uio_iov[vec_idx].iov_base + offset);
+		    (uchar_t *)zfs_uio_iovbase(uio, vec_idx) + offset);
 	} else {
 		uint8_t *digest_tmp;
 		off_t scratch_offset = 0;
@@ -364,11 +357,11 @@
 		if (digest_tmp == NULL)
 			return (CRYPTO_HOST_MEMORY);
 		SKEIN_OP(ctx, Final, digest_tmp);
-		while (vec_idx < uio->uio_iovcnt && length > 0) {
-			cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset,
+		while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
+			cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset,
 			    length);
 			bcopy(digest_tmp + scratch_offset,
-			    uio->uio_iov[vec_idx].iov_base + offset, cur_len);
+			    zfs_uio_iovbase(uio, vec_idx) + offset, cur_len);
 
 			length -= cur_len;
 			vec_idx++;
@@ -377,7 +370,7 @@
 		}
 		kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen));
 
-		if (vec_idx == uio->uio_iovcnt && length > 0) {
+		if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) {
 			/*
 			 * The end of the specified iovec's was reached but
 			 * the length requested could not be processed, i.e.
@@ -501,7 +494,8 @@
  */
 /*ARGSUSED*/
 static int
-skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+skein_final_nofree(crypto_ctx_t *ctx, crypto_data_t *digest,
+    crypto_req_handle_t req)
 {
 	int error = CRYPTO_SUCCESS;
 
@@ -532,6 +526,17 @@
 	else
 		digest->cd_length = 0;
 
+	return (error);
+}
+
+static int
+skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+{
+	int error = skein_final_nofree(ctx, digest, req);
+
+	if (error == CRYPTO_BUFFER_TOO_SMALL)
+		return (error);
+
 	bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 	kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx))));
 	SKEIN_CTX_LVALUE(ctx) = NULL;
@@ -567,7 +572,7 @@
 
 	if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS)
 		goto out;
-	if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS)
+	if ((error = skein_final_nofree(&ctx, data, digest)) != CRYPTO_SUCCESS)
 		goto out;
 
 out:
@@ -676,7 +681,7 @@
 
 	if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS)
 		goto errout;
-	if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS)
+	if ((error = skein_final_nofree(&ctx, mac, req)) != CRYPTO_SUCCESS)
 		goto errout;
 
 	return (CRYPTO_SUCCESS);

diff --git a/zfs/module/icp/spi/kcf_spi.c b/zfs/module/icp/spi/kcf_spi.c
index e438b58..34b36b8 100644
--- a/zfs/module/icp/spi/kcf_spi.c
+++ b/zfs/module/icp/spi/kcf_spi.c

@@ -261,7 +261,7 @@
 			prov_desc->pd_kstat->ks_update = kcf_prov_kstat_update;
 			kstat_install(prov_desc->pd_kstat);
 		}
-		strfree(ks_name);
+		kmem_strfree(ks_name);
 	}
 
 	if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)

diff --git a/zfs/module/lua/Makefile.in b/zfs/module/lua/Makefile.in
index d49065f..0a74c17 100644
--- a/zfs/module/lua/Makefile.in
+++ b/zfs/module/lua/Makefile.in

@@ -1,16 +1,13 @@
-src = @abs_top_srcdir@/module/lua
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
+endif
 
 MODULE := zlua
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-asflags-y += $(ZFS_MODULE_CFLAGS)
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-ccflags-y += -DLUA_USE_LONGLONG
-
-# Suppress unused but set variable warnings often due to ASSERTs
-ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE)
+ccflags-y := -DLUA_USE_LONGLONG
 
 $(MODULE)-objs += lapi.o
 $(MODULE)-objs += lauxlib.o

diff --git a/zfs/module/lua/lapi.c b/zfs/module/lua/lapi.c
index 8196967..75f8b2a 100644
--- a/zfs/module/lua/lapi.c
+++ b/zfs/module/lua/lapi.c

@@ -251,6 +251,8 @@
 
 LUA_API const char *lua_typename (lua_State *L, int t) {
   UNUSED(L);
+  if (t > 8 || t < 0)
+    return "internal_type_error";
   return ttypename(t);
 }
 
@@ -1295,10 +1297,13 @@
 module_init(lua_init);
 module_exit(lua_fini);
 
-MODULE_DESCRIPTION("Lua Interpreter for ZFS");
-MODULE_AUTHOR("Lua.org");
-MODULE_LICENSE("MIT");
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+#endif
+/* END CSTYLED */
+
+ZFS_MODULE_DESCRIPTION("Lua Interpreter for ZFS");
+ZFS_MODULE_AUTHOR("Lua.org");
+ZFS_MODULE_LICENSE("Dual MIT/GPL");
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 
 EXPORT_SYMBOL(lua_absindex);
 EXPORT_SYMBOL(lua_atpanic);
@@ -1340,6 +1345,3 @@
 EXPORT_SYMBOL(lua_touserdata);
 EXPORT_SYMBOL(lua_type);
 EXPORT_SYMBOL(lua_typename);
-
-#endif
-/* END CSTYLED */

diff --git a/zfs/module/lua/lcode.c b/zfs/module/lua/lcode.c
index ae9a3d9..4d88c79 100644
--- a/zfs/module/lua/lcode.c
+++ b/zfs/module/lua/lcode.c

@@ -8,6 +8,10 @@
 #define lcode_c
 #define LUA_CORE
 
+#if defined(HAVE_IMPLICIT_FALLTHROUGH)
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#endif
+
 #include <sys/lua/lua.h>
 
 #include "lcode.h"

diff --git a/zfs/module/lua/ldebug.c b/zfs/module/lua/ldebug.c
index 2e1efa4..da005c4 100644
--- a/zfs/module/lua/ldebug.c
+++ b/zfs/module/lua/ldebug.c

@@ -324,7 +324,6 @@
   if (ISK(c)) {  /* is 'c' a constant? */
     TValue *kvalue = &p->k[INDEXK(c)];
     if (ttisstring(kvalue)) {  /* literal constant? */
-      // cppcheck-suppress autoVariables
       *name = svalue(kvalue);  /* it is its own name */
       return;
     }

diff --git a/zfs/module/lua/ldo.c b/zfs/module/lua/ldo.c
index bddab5f..e4abe04 100644
--- a/zfs/module/lua/ldo.c
+++ b/zfs/module/lua/ldo.c

@@ -33,14 +33,16 @@
 #if defined (_KERNEL) && defined(__linux__)
 #include <asm/current.h>
 static intptr_t stack_remaining(void) {
-  char local;
-  return (intptr_t)(&local - (char *)current->stack);
+  intptr_t local;
+  local = (intptr_t)&local - (intptr_t)current->stack;
+  return local;
 }
 #elif defined (_KERNEL) && defined(__FreeBSD__)
 #include <sys/pcpu.h>
 static intptr_t stack_remaining(void) {
-  char local;
-  return (intptr_t)(&local - (char *)curthread->td_kstack);
+  intptr_t local;
+  local = (intptr_t)&local - (intptr_t)curthread->td_kstack;
+  return local;
 }
 #else
 static intptr_t stack_remaining(void) {
@@ -64,6 +66,7 @@
 
 #ifdef _KERNEL
 
+#ifdef __linux__
 #if defined(__i386__)
 #define	JMP_BUF_CNT	6
 #elif defined(__x86_64__)
@@ -80,6 +83,8 @@
 #define JMP_BUF_CNT	12
 #elif defined(__s390x__)
 #define JMP_BUF_CNT	18
+#elif defined(__riscv)
+#define JMP_BUF_CNT     64
 #else
 #define	JMP_BUF_CNT	1
 #endif
@@ -93,7 +98,7 @@
 #define LUAI_TRY(L,c,a)		if (setjmp(&(c)->b) == 0) { a }
 #define luai_jmpbuf		label_t
 
-/* unsupported archs will build but not be able to run lua programs */
+/* unsupported arches will build but not be able to run lua programs */
 #if JMP_BUF_CNT == 1
 int setjmp (label_t *buf) {
 	return 1;
@@ -103,6 +108,11 @@
 	for (;;);
 }
 #endif
+#else
+#define LUAI_THROW(L,c)		longjmp((c)->b, 1)
+#define LUAI_TRY(L,c,a)		if (setjmp((c)->b) == 0) { a }
+#define luai_jmpbuf		jmp_buf
+#endif
 
 #else /* _KERNEL */
 
@@ -158,6 +168,13 @@
   L->top = oldtop + 1;
 }
 
+/*
+ * Silence infinite recursion warning which was added to -Wall in gcc 12.1
+ */
+#if defined(HAVE_INFINITE_RECURSION)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winfinite-recursion"
+#endif
 
 l_noret luaD_throw (lua_State *L, int errcode) {
   if (L->errorJmp) {  /* thread has an error handler? */
@@ -180,13 +197,17 @@
   }
 }
 
+#if defined(__GNUC__) && !defined(__clang__) && \
+	defined(HAVE_INFINITE_RECURSION)
+#pragma GCC diagnostic pop
+#endif
+
 
 int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {
   unsigned short oldnCcalls = L->nCcalls;
   struct lua_longjmp lj;
   lj.status = LUA_OK;
   lj.previous = L->errorJmp;  /* chain new error handler */
-  // cppcheck-suppress autoVariables
   L->errorJmp = &lj;
   LUAI_TRY(L, &lj,
     (*f)(L, ud);
@@ -386,7 +407,7 @@
       StkId base;
       Proto *p = clLvalue(func)->p;
       n = cast_int(L->top - func) - 1;  /* number of real arguments */
-      luaD_checkstack(L, p->maxstacksize);
+      luaD_checkstack(L, p->maxstacksize + p->numparams);
       for (; n < p->numparams; n++)
         setnilvalue(L->top++);  /* complete missing arguments */
       if (!p->is_vararg) {

diff --git a/zfs/module/lua/lfunc.h b/zfs/module/lua/lfunc.h
index 59a4fa7..638971b 100644
--- a/zfs/module/lua/lfunc.h
+++ b/zfs/module/lua/lfunc.h

@@ -13,10 +13,10 @@
 
 
 #define sizeCclosure(n)	(cast(int, sizeof(CClosure)) + \
-                         cast(int, sizeof(TValue)*((n)-1)))
+                         cast(int, sizeof(TValue)*((n))))
 
 #define sizeLclosure(n)	(cast(int, sizeof(LClosure)) + \
-                         cast(int, sizeof(TValue *)*((n)-1)))
+                         cast(int, sizeof(TValue *)*((n))))
 
 
 LUAI_FUNC Proto *luaF_newproto (lua_State *L);

diff --git a/zfs/module/lua/lgc.c b/zfs/module/lua/lgc.c
index 55feb24..227ad72 100644
--- a/zfs/module/lua/lgc.c
+++ b/zfs/module/lua/lgc.c

@@ -676,7 +676,7 @@
     case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
     case LUA_TSHRSTR:
       G(L)->strt.nuse--;
-      /* FALLTHROUGH */
+      fallthrough;
     case LUA_TLNGSTR: {
       luaM_freemem(L, o, sizestring(gco2ts(o)));
       break;

diff --git a/zfs/module/lua/llex.c b/zfs/module/lua/llex.c
index 50c301f..f2c9bf8 100644
--- a/zfs/module/lua/llex.c
+++ b/zfs/module/lua/llex.c

@@ -477,7 +477,7 @@
         else if (!lisdigit(ls->current)) return '.';
         /* else go through */
       }
-      /* FALLTHROUGH */
+        fallthrough;
       case '0': case '1': case '2': case '3': case '4':
       case '5': case '6': case '7': case '8': case '9': {
         read_numeral(ls, seminfo);

diff --git a/zfs/module/lua/llimits.h b/zfs/module/lua/llimits.h
index 0587d3b..177092f 100644
--- a/zfs/module/lua/llimits.h
+++ b/zfs/module/lua/llimits.h

@@ -287,8 +287,6 @@
 
 #if defined(ltable_c) && !defined(luai_hashnum)
 
-extern int lcompat_hashnum(int64_t);
-
 #define luai_hashnum(i,n) (i = lcompat_hashnum(n))
 
 #endif

diff --git a/zfs/module/lua/lobject.c b/zfs/module/lua/lobject.c
index 024d319..5d88d36 100644
--- a/zfs/module/lua/lobject.c
+++ b/zfs/module/lua/lobject.c

@@ -144,7 +144,7 @@
   *endptr = cast(char *, s);  /* valid up to here */
  ret:
   if (neg) r = -r;
-  return (r * (1 << e));
+  return ((e >= 0) ? (r * (1ULL << e)) : (r / (1ULL << -e)));
 }
 
 #endif

diff --git a/zfs/module/lua/lobject.h b/zfs/module/lua/lobject.h
index a16b8d6..ede697c 100644
--- a/zfs/module/lua/lobject.h
+++ b/zfs/module/lua/lobject.h

@@ -514,14 +514,14 @@
 typedef struct CClosure {
   ClosureHeader;
   lua_CFunction f;
-  TValue upvalue[1];  /* list of upvalues */
+  TValue upvalue[];  /* list of upvalues */
 } CClosure;
 
 
 typedef struct LClosure {
   ClosureHeader;
   struct Proto *p;
-  UpVal *upvals[1];  /* list of upvalues */
+  UpVal *upvals[];  /* list of upvalues */
 } LClosure;
 
 

diff --git a/zfs/module/lua/lstrlib.c b/zfs/module/lua/lstrlib.c
index 1202775..46e3d8f 100644
--- a/zfs/module/lua/lstrlib.c
+++ b/zfs/module/lua/lstrlib.c

@@ -501,7 +501,7 @@
             }
             case '+':  /* 1 or more repetitions */
               s++;  /* 1 match already done */
-              /* FALLTHROUGH */
+              fallthrough;
             case '*':  /* 0 or more repetitions */
               s = max_expand(ms, s, p, ep);
               break;

diff --git a/zfs/module/lua/ltable.c b/zfs/module/lua/ltable.c
index f604187..f6872ba 100644
--- a/zfs/module/lua/ltable.c
+++ b/zfs/module/lua/ltable.c

@@ -492,7 +492,7 @@
         return luaH_getint(t, k);  /* use specialized version */
       /* else go through */
     }
-    /* FALLTHROUGH */
+      fallthrough;
     default: {
       Node *n = mainposition(t, key);
       do {  /* check whether `key' is somewhere in the chain */

diff --git a/zfs/module/lua/ltablib.c b/zfs/module/lua/ltablib.c
index be5b637..51cafff 100644
--- a/zfs/module/lua/ltablib.c
+++ b/zfs/module/lua/ltablib.c

@@ -244,7 +244,7 @@
   }  /* repeat the routine for the larger one */
 }
 
-static int sort (lua_State *L) {
+static int tsort (lua_State *L) {
   int n = aux_getn(L, 1);
   luaL_checkstack(L, 40, "");  /* assume array is smaller than 2^40 */
   if (!lua_isnoneornil(L, 2))  /* is there a 2nd argument? */
@@ -266,7 +266,7 @@
   {"pack", pack},
   {"unpack", unpack},
   {"remove", tremove},
-  {"sort", sort},
+  {"sort", tsort},
   {NULL, NULL}
 };
 

diff --git a/zfs/module/lua/setjmp/setjmp.S b/zfs/module/lua/setjmp/setjmp.S
index 8d06d3f..1f461a0 100644
--- a/zfs/module/lua/setjmp/setjmp.S
+++ b/zfs/module/lua/setjmp/setjmp.S

@@ -14,4 +14,6 @@
 #include "setjmp_mips.S"
 #elif defined(__s390x__)
 #include "setjmp_s390x.S"
+#elif defined(__riscv)
+#include "setjmp_rv64g.S"
 #endif

diff --git a/zfs/module/lua/setjmp/setjmp_rv64g.S b/zfs/module/lua/setjmp/setjmp_rv64g.S
new file mode 100644
index 0000000..7f6c50d
--- /dev/null
+++ b/zfs/module/lua/setjmp/setjmp_rv64g.S

@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * Portions of this software were developed by SRI International and the
+ * University of Cambridge Computer Laboratory under DARPA/AFRL contract
+ * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Portions of this software were developed by the University of Cambridge
+ * Computer Laboratory as part of the CTSRD Project, with support from the
+ * UK Higher Education Innovation Fund (HEIF).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define ENTRY(sym)                                              \
+        .text; .globl sym; .type sym,@function; sym:
+#define END(sym) .size sym, . - sym
+
+
+ENTRY(setjmp)
+	/* Store the stack pointer */
+	sd	sp, (0 * 8)(a0)
+	addi	a0, a0, (1 * 8)
+
+	/* Store the general purpose registers and ra */
+	sd	s0, (0 * 8)(a0)
+	sd	s1, (1 * 8)(a0)
+	sd	s2, (2 * 8)(a0)
+	sd	s3, (3 * 8)(a0)
+	sd	s4, (4 * 8)(a0)
+	sd	s5, (5 * 8)(a0)
+	sd	s6, (6 * 8)(a0)
+	sd	s7, (7 * 8)(a0)
+	sd	s8, (8 * 8)(a0)
+	sd	s9, (9 * 8)(a0)
+	sd	s10, (10 * 8)(a0)
+	sd	s11, (11 * 8)(a0)
+	sd	ra, (12 * 8)(a0)
+	addi	a0, a0, (13 * 8)
+
+	/* Return value */
+	li	a0, 0
+	ret
+END(setjmp)
+
+ENTRY(longjmp)
+	/* Restore the stack pointer */
+	ld	t0, 0(a0)
+	mv	sp, t0
+	addi	a0, a0, (1 * 8)
+
+	/* Restore the general purpose registers and ra */
+	ld	s0, (0 * 8)(a0)
+	ld	s1, (1 * 8)(a0)
+	ld	s2, (2 * 8)(a0)
+	ld	s3, (3 * 8)(a0)
+	ld	s4, (4 * 8)(a0)
+	ld	s5, (5 * 8)(a0)
+	ld	s6, (6 * 8)(a0)
+	ld	s7, (7 * 8)(a0)
+	ld	s8, (8 * 8)(a0)
+	ld	s9, (9 * 8)(a0)
+	ld	s10, (10 * 8)(a0)
+	ld	s11, (11 * 8)(a0)
+	ld	ra, (12 * 8)(a0)
+	addi	a0, a0, (13 * 8)
+
+	/* Load the return value */
+	mv	a0, a1
+	ret
+END(longjmp)

diff --git a/zfs/module/lua/setjmp/setjmp_x86_64.S b/zfs/module/lua/setjmp/setjmp_x86_64.S
index a469cba..47c8867 100644
--- a/zfs/module/lua/setjmp/setjmp_x86_64.S
+++ b/zfs/module/lua/setjmp/setjmp_x86_64.S

@@ -23,7 +23,15 @@
  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
+#if defined(_KERNEL) && defined(__linux__)
+#include <linux/linkage.h>
+#endif
 
+#ifndef RET
+#define	RET	ret
+#endif
+
+#undef ENTRY
 #define	ENTRY(x) \
 	.text; \
 	.align	8; \
@@ -34,7 +42,6 @@
 #define	SET_SIZE(x) \
 	.size	x, [.-x]
 
-
 /*
  * Setjmp and longjmp implement non-local gotos using state vectors
  * type label_t.
@@ -52,7 +59,7 @@
 	movq	0(%rsp), %rdx		/* return address */
 	movq	%rdx, 56(%rdi)		/* rip */
 	xorl	%eax, %eax		/* return 0 */
-	ret
+	RET
 	SET_SIZE(setjmp)
 
 	ENTRY(longjmp)
@@ -67,7 +74,7 @@
 	movq	%rdx, 0(%rsp)
 	xorl	%eax, %eax
 	incl	%eax			/* return 1 */
-	ret
+	RET
 	SET_SIZE(longjmp)
 
 #ifdef __ELF__

diff --git a/zfs/module/nvpair/Makefile.in b/zfs/module/nvpair/Makefile.in
index f420ef9..d814523 100644
--- a/zfs/module/nvpair/Makefile.in
+++ b/zfs/module/nvpair/Makefile.in

@@ -1,12 +1,12 @@
-src = @abs_top_srcdir@/module/nvpair
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
+endif
 
 MODULE := znvpair
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-
 $(MODULE)-objs += nvpair.o
 $(MODULE)-objs += fnvpair.o
 $(MODULE)-objs += nvpair_alloc_spl.o

diff --git a/zfs/module/nvpair/nvpair.c b/zfs/module/nvpair/nvpair.c
index a47b94c..9834ded 100644
--- a/zfs/module/nvpair/nvpair.c
+++ b/zfs/module/nvpair/nvpair.c

@@ -25,13 +25,24 @@
  * Copyright 2018 RackTop Systems.
  */
 
+/*
+ * Links to Illumos.org for more information on Interface Libraries:
+ * [1] https://illumos.org/man/3lib/libnvpair
+ * [2] https://illumos.org/man/3nvpair/nvlist_alloc
+ * [3] https://illumos.org/man/9f/nvlist_alloc
+ * [4] https://illumos.org/man/9f/nvlist_next_nvpair
+ * [5] https://illumos.org/man/9f/nvpair_value_byte
+ */
+
 #include <sys/debug.h>
 #include <sys/isa_defs.h>
 #include <sys/nvpair.h>
 #include <sys/nvpair_impl.h>
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/strings.h>
 #include <rpc/xdr.h>
+#include <sys/mod.h>
 
 #if defined(_KERNEL)
 #include <sys/sunddi.h>
@@ -522,12 +533,14 @@
 	uint64_t index = hash & (priv->nvp_nbuckets - 1);
 
 	ASSERT3U(index, <, priv->nvp_nbuckets);
+	// cppcheck-suppress nullPointerRedundantCheck
 	i_nvp_t *bucket = tab[index];
 
 	/* insert link at the beginning of the bucket */
 	i_nvp_t *new_entry = NVPAIR2I_NVP(nvp);
 	ASSERT3P(new_entry->nvi_hashtable_next, ==, NULL);
 	new_entry->nvi_hashtable_next = bucket;
+	// cppcheck-suppress nullPointerRedundantCheck
 	tab[index] = new_entry;
 
 	priv->nvp_nentries++;
@@ -557,10 +570,10 @@
 	switch (kmflag) {
 	case KM_SLEEP:
 		return (nv_alloc_sleep);
-	case KM_PUSHPAGE:
-		return (nv_alloc_pushpage);
-	default:
+	case KM_NOSLEEP:
 		return (nv_alloc_nosleep);
+	default:
+		return (nv_alloc_pushpage);
 	}
 #else
 	return (nv_alloc_nosleep);
@@ -2553,11 +2566,13 @@
 	int err = 0;
 	nvstream_t nvs;
 	int nvl_endian;
-#ifdef	_LITTLE_ENDIAN
+#if defined(_ZFS_LITTLE_ENDIAN)
 	int host_endian = 1;
-#else
+#elif defined(_ZFS_BIG_ENDIAN)
 	int host_endian = 0;
-#endif	/* _LITTLE_ENDIAN */
+#else
+#error "No endian defined!"
+#endif	/* _ZFS_LITTLE_ENDIAN */
 	nvs_header_t *nvh;
 
 	if (buflen == NULL || nvl == NULL ||
@@ -3201,12 +3216,64 @@
 }
 
 /*
+ * xdrproc_t-compatible callbacks for xdr_array()
+ */
+
+#if defined(_KERNEL) && defined(__linux__) /* Linux kernel */
+
+#define	NVS_BUILD_XDRPROC_T(type)		\
+static bool_t					\
+nvs_xdr_nvp_##type(XDR *xdrs, void *ptr)	\
+{						\
+	return (xdr_##type(xdrs, ptr));		\
+}
+
+#elif !defined(_KERNEL) && defined(XDR_CONTROL) /* tirpc */
+
+#define	NVS_BUILD_XDRPROC_T(type)		\
+static bool_t					\
+nvs_xdr_nvp_##type(XDR *xdrs, ...)		\
+{						\
+	va_list args;				\
+	void *ptr;				\
+						\
+	va_start(args, xdrs);			\
+	ptr = va_arg(args, void *);		\
+	va_end(args);				\
+						\
+	return (xdr_##type(xdrs, ptr));		\
+}
+
+#else /* FreeBSD, sunrpc */
+
+#define	NVS_BUILD_XDRPROC_T(type)		\
+static bool_t					\
+nvs_xdr_nvp_##type(XDR *xdrs, void *ptr, ...)	\
+{						\
+	return (xdr_##type(xdrs, ptr));		\
+}
+
+#endif
+
+/* BEGIN CSTYLED */
+NVS_BUILD_XDRPROC_T(char);
+NVS_BUILD_XDRPROC_T(short);
+NVS_BUILD_XDRPROC_T(u_short);
+NVS_BUILD_XDRPROC_T(int);
+NVS_BUILD_XDRPROC_T(u_int);
+NVS_BUILD_XDRPROC_T(longlong_t);
+NVS_BUILD_XDRPROC_T(u_longlong_t);
+/* END CSTYLED */
+
+/*
  * The format of xdr encoded nvpair is:
  * encode_size, decode_size, name string, data type, nelem, data
  */
 static int
 nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
 {
+	ASSERT(nvs != NULL && nvp != NULL);
+
 	data_type_t type;
 	char	*buf;
 	char	*buf_end = (char *)nvp + nvp->nvp_size;
@@ -3215,7 +3282,7 @@
 	bool_t	ret = FALSE;
 	XDR	*xdr = nvs->nvs_private;
 
-	ASSERT(xdr != NULL && nvp != NULL);
+	ASSERT(xdr != NULL);
 
 	/* name string */
 	if ((buf = NVP_NAME(nvp)) >= buf_end)
@@ -3322,38 +3389,38 @@
 	case DATA_TYPE_INT8_ARRAY:
 	case DATA_TYPE_UINT8_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t),
-		    (xdrproc_t)xdr_char);
+		    nvs_xdr_nvp_char);
 		break;
 
 	case DATA_TYPE_INT16_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t),
-		    sizeof (int16_t), (xdrproc_t)xdr_short);
+		    sizeof (int16_t), nvs_xdr_nvp_short);
 		break;
 
 	case DATA_TYPE_UINT16_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t),
-		    sizeof (uint16_t), (xdrproc_t)xdr_u_short);
+		    sizeof (uint16_t), nvs_xdr_nvp_u_short);
 		break;
 
 	case DATA_TYPE_BOOLEAN_ARRAY:
 	case DATA_TYPE_INT32_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t),
-		    sizeof (int32_t), (xdrproc_t)xdr_int);
+		    sizeof (int32_t), nvs_xdr_nvp_int);
 		break;
 
 	case DATA_TYPE_UINT32_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t),
-		    sizeof (uint32_t), (xdrproc_t)xdr_u_int);
+		    sizeof (uint32_t), nvs_xdr_nvp_u_int);
 		break;
 
 	case DATA_TYPE_INT64_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t),
-		    sizeof (int64_t), (xdrproc_t)xdr_longlong_t);
+		    sizeof (int64_t), nvs_xdr_nvp_longlong_t);
 		break;
 
 	case DATA_TYPE_UINT64_ARRAY:
 		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t),
-		    sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t);
+		    sizeof (uint64_t), nvs_xdr_nvp_u_longlong_t);
 		break;
 
 	case DATA_TYPE_STRING_ARRAY: {
@@ -3603,11 +3670,12 @@
 
 module_init(nvpair_init);
 module_exit(nvpair_fini);
+#endif
 
-MODULE_DESCRIPTION("Generic name/value pair implementation");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+ZFS_MODULE_DESCRIPTION("Generic name/value pair implementation");
+ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
+ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 
 EXPORT_SYMBOL(nv_alloc_init);
 EXPORT_SYMBOL(nv_alloc_reset);
@@ -3722,5 +3790,3 @@
 EXPORT_SYMBOL(nvpair_value_string_array);
 EXPORT_SYMBOL(nvpair_value_nvlist_array);
 EXPORT_SYMBOL(nvpair_value_hrtime);
-
-#endif

diff --git a/zfs/module/os/freebsd/spl/acl_common.c b/zfs/module/os/freebsd/spl/acl_common.c
new file mode 100644
index 0000000..7fd0e36
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/acl_common.c

@@ -0,0 +1,1709 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/avl.h>
+#include <sys/misc.h>
+#if defined(_KERNEL)
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <acl/acl_common.h>
+#include <sys/debug.h>
+#else
+#include <errno.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <grp.h>
+#include <pwd.h>
+#include <acl_common.h>
+#endif
+
+#define	ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \
+    ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \
+    ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL)
+
+
+#define	ACL_SYNCHRONIZE_SET_DENY		0x0000001
+#define	ACL_SYNCHRONIZE_SET_ALLOW		0x0000002
+#define	ACL_SYNCHRONIZE_ERR_DENY		0x0000004
+#define	ACL_SYNCHRONIZE_ERR_ALLOW		0x0000008
+
+#define	ACL_WRITE_OWNER_SET_DENY		0x0000010
+#define	ACL_WRITE_OWNER_SET_ALLOW		0x0000020
+#define	ACL_WRITE_OWNER_ERR_DENY		0x0000040
+#define	ACL_WRITE_OWNER_ERR_ALLOW		0x0000080
+
+#define	ACL_DELETE_SET_DENY			0x0000100
+#define	ACL_DELETE_SET_ALLOW			0x0000200
+#define	ACL_DELETE_ERR_DENY			0x0000400
+#define	ACL_DELETE_ERR_ALLOW			0x0000800
+
+#define	ACL_WRITE_ATTRS_OWNER_SET_DENY		0x0001000
+#define	ACL_WRITE_ATTRS_OWNER_SET_ALLOW		0x0002000
+#define	ACL_WRITE_ATTRS_OWNER_ERR_DENY		0x0004000
+#define	ACL_WRITE_ATTRS_OWNER_ERR_ALLOW		0x0008000
+
+#define	ACL_WRITE_ATTRS_WRITER_SET_DENY		0x0010000
+#define	ACL_WRITE_ATTRS_WRITER_SET_ALLOW	0x0020000
+#define	ACL_WRITE_ATTRS_WRITER_ERR_DENY		0x0040000
+#define	ACL_WRITE_ATTRS_WRITER_ERR_ALLOW	0x0080000
+
+#define	ACL_WRITE_NAMED_WRITER_SET_DENY		0x0100000
+#define	ACL_WRITE_NAMED_WRITER_SET_ALLOW	0x0200000
+#define	ACL_WRITE_NAMED_WRITER_ERR_DENY		0x0400000
+#define	ACL_WRITE_NAMED_WRITER_ERR_ALLOW	0x0800000
+
+#define	ACL_READ_NAMED_READER_SET_DENY		0x1000000
+#define	ACL_READ_NAMED_READER_SET_ALLOW		0x2000000
+#define	ACL_READ_NAMED_READER_ERR_DENY		0x4000000
+#define	ACL_READ_NAMED_READER_ERR_ALLOW		0x8000000
+
+
+#define	ACE_VALID_MASK_BITS (\
+    ACE_READ_DATA | \
+    ACE_LIST_DIRECTORY | \
+    ACE_WRITE_DATA | \
+    ACE_ADD_FILE | \
+    ACE_APPEND_DATA | \
+    ACE_ADD_SUBDIRECTORY | \
+    ACE_READ_NAMED_ATTRS | \
+    ACE_WRITE_NAMED_ATTRS | \
+    ACE_EXECUTE | \
+    ACE_DELETE_CHILD | \
+    ACE_READ_ATTRIBUTES | \
+    ACE_WRITE_ATTRIBUTES | \
+    ACE_DELETE | \
+    ACE_READ_ACL | \
+    ACE_WRITE_ACL | \
+    ACE_WRITE_OWNER | \
+    ACE_SYNCHRONIZE)
+
+#define	ACE_MASK_UNDEFINED			0x80000000
+
+#define	ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \
+    ACE_DIRECTORY_INHERIT_ACE | \
+    ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \
+    ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \
+    ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE)
+
+/*
+ * ACL conversion helpers
+ */
+
+typedef enum {
+	ace_unused,
+	ace_user_obj,
+	ace_user,
+	ace_group, /* includes GROUP and GROUP_OBJ */
+	ace_other_obj
+} ace_to_aent_state_t;
+
+typedef struct acevals {
+	uid_t key;
+	avl_node_t avl;
+	uint32_t mask;
+	uint32_t allowed;
+	uint32_t denied;
+	int aent_type;
+} acevals_t;
+
+typedef struct ace_list {
+	acevals_t user_obj;
+	avl_tree_t user;
+	int numusers;
+	acevals_t group_obj;
+	avl_tree_t group;
+	int numgroups;
+	acevals_t other_obj;
+	uint32_t acl_mask;
+	int hasmask;
+	int dfacl_flag;
+	ace_to_aent_state_t state;
+	int seen; /* bitmask of all aclent_t a_type values seen */
+} ace_list_t;
+
+/*
+ * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified.
+ * v = Ptr to array/vector of objs
+ * n = # objs in the array
+ * s = size of each obj (must be multiples of a word size)
+ * f = ptr to function to compare two objs
+ *	returns (-1 = less than, 0 = equal, 1 = greater than
+ */
+void
+ksort(caddr_t v, int n, int s, int (*f)(void *, void *))
+{
+	int g, i, j, ii;
+	unsigned int *p1, *p2;
+	unsigned int tmp;
+
+	/* No work to do */
+	if (v == NULL || n <= 1)
+		return;
+
+	/* Sanity check on arguments */
+	ASSERT3U(((uintptr_t)v & 0x3), ==, 0);
+	ASSERT3S((s & 0x3), ==, 0);
+	ASSERT3S(s, >, 0);
+	for (g = n / 2; g > 0; g /= 2) {
+		for (i = g; i < n; i++) {
+			for (j = i - g; j >= 0 &&
+			    (*f)(v + j * s, v + (j + g) * s) == 1;
+			    j -= g) {
+				p1 = (void *)(v + j * s);
+				p2 = (void *)(v + (j + g) * s);
+				for (ii = 0; ii < s / 4; ii++) {
+					tmp = *p1;
+					*p1++ = *p2;
+					*p2++ = tmp;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Compare two acls, all fields.  Returns:
+ * -1 (less than)
+ *  0 (equal)
+ * +1 (greater than)
+ */
+int
+cmp2acls(void *a, void *b)
+{
+	aclent_t *x = (aclent_t *)a;
+	aclent_t *y = (aclent_t *)b;
+
+	/* Compare types */
+	if (x->a_type < y->a_type)
+		return (-1);
+	if (x->a_type > y->a_type)
+		return (1);
+	/* Equal types; compare id's */
+	if (x->a_id < y->a_id)
+		return (-1);
+	if (x->a_id > y->a_id)
+		return (1);
+	/* Equal ids; compare perms */
+	if (x->a_perm < y->a_perm)
+		return (-1);
+	if (x->a_perm > y->a_perm)
+		return (1);
+	/* Totally equal */
+	return (0);
+}
+
+static int
+cacl_malloc(void **ptr, size_t size)
+{
+	*ptr = kmem_zalloc(size, KM_SLEEP);
+	return (0);
+}
+
+
+#if !defined(_KERNEL)
+acl_t *
+acl_alloc(enum acl_type type)
+{
+	acl_t *aclp;
+
+	if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0)
+		return (NULL);
+
+	aclp->acl_aclp = NULL;
+	aclp->acl_cnt = 0;
+
+	switch (type) {
+	case ACE_T:
+		aclp->acl_type = ACE_T;
+		aclp->acl_entry_size = sizeof (ace_t);
+		break;
+	case ACLENT_T:
+		aclp->acl_type = ACLENT_T;
+		aclp->acl_entry_size = sizeof (aclent_t);
+		break;
+	default:
+		acl_free(aclp);
+		aclp = NULL;
+	}
+	return (aclp);
+}
+
+/*
+ * Free acl_t structure
+ */
+void
+acl_free(acl_t *aclp)
+{
+	int acl_size;
+
+	if (aclp == NULL)
+		return;
+
+	if (aclp->acl_aclp) {
+		acl_size = aclp->acl_cnt * aclp->acl_entry_size;
+		cacl_free(aclp->acl_aclp, acl_size);
+	}
+
+	cacl_free(aclp, sizeof (acl_t));
+}
+
+static uint32_t
+access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow)
+{
+	uint32_t access_mask = 0;
+	int acl_produce;
+	int synchronize_set = 0, write_owner_set = 0;
+	int delete_set = 0, write_attrs_set = 0;
+	int read_named_set = 0, write_named_set = 0;
+
+	acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW |
+	    ACL_WRITE_ATTRS_OWNER_SET_ALLOW |
+	    ACL_WRITE_ATTRS_WRITER_SET_DENY);
+
+	if (isallow) {
+		synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW;
+		write_owner_set = ACL_WRITE_OWNER_SET_ALLOW;
+		delete_set = ACL_DELETE_SET_ALLOW;
+		if (hasreadperm)
+			read_named_set = ACL_READ_NAMED_READER_SET_ALLOW;
+		if (haswriteperm)
+			write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW;
+		if (isowner)
+			write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW;
+		else if (haswriteperm)
+			write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW;
+	} else {
+
+		synchronize_set = ACL_SYNCHRONIZE_SET_DENY;
+		write_owner_set = ACL_WRITE_OWNER_SET_DENY;
+		delete_set = ACL_DELETE_SET_DENY;
+		if (hasreadperm)
+			read_named_set = ACL_READ_NAMED_READER_SET_DENY;
+		if (haswriteperm)
+			write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY;
+		if (isowner)
+			write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY;
+		else if (haswriteperm)
+			write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY;
+		else
+			/*
+			 * If the entity is not the owner and does not
+			 * have write permissions ACE_WRITE_ATTRIBUTES will
+			 * always go in the DENY ACE.
+			 */
+			access_mask |= ACE_WRITE_ATTRIBUTES;
+	}
+
+	if (acl_produce & synchronize_set)
+		access_mask |= ACE_SYNCHRONIZE;
+	if (acl_produce & write_owner_set)
+		access_mask |= ACE_WRITE_OWNER;
+	if (acl_produce & delete_set)
+		access_mask |= ACE_DELETE;
+	if (acl_produce & write_attrs_set)
+		access_mask |= ACE_WRITE_ATTRIBUTES;
+	if (acl_produce & read_named_set)
+		access_mask |= ACE_READ_NAMED_ATTRS;
+	if (acl_produce & write_named_set)
+		access_mask |= ACE_WRITE_NAMED_ATTRS;
+
+	return (access_mask);
+}
+
+/*
+ * Given an mode_t, convert it into an access_mask as used
+ * by nfsace, assuming aclent_t -> nfsace semantics.
+ */
+static uint32_t
+mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow)
+{
+	uint32_t access = 0;
+	int haswriteperm = 0;
+	int hasreadperm = 0;
+
+	if (isallow) {
+		haswriteperm = (mode & S_IWOTH);
+		hasreadperm = (mode & S_IROTH);
+	} else {
+		haswriteperm = !(mode & S_IWOTH);
+		hasreadperm = !(mode & S_IROTH);
+	}
+
+	/*
+	 * The following call takes care of correctly setting the following
+	 * mask bits in the access_mask:
+	 * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE,
+	 * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS
+	 */
+	access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow);
+
+	if (isallow) {
+		access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES;
+		if (isowner)
+			access |= ACE_WRITE_ACL;
+	} else {
+		if (! isowner)
+			access |= ACE_WRITE_ACL;
+	}
+
+	/* read */
+	if (mode & S_IROTH) {
+		access |= ACE_READ_DATA;
+	}
+	/* write */
+	if (mode & S_IWOTH) {
+		access |= ACE_WRITE_DATA |
+		    ACE_APPEND_DATA;
+		if (isdir)
+			access |= ACE_DELETE_CHILD;
+	}
+	/* exec */
+	if (mode & S_IXOTH) {
+		access |= ACE_EXECUTE;
+	}
+
+	return (access);
+}
+
+/*
+ * Given an nfsace (presumably an ALLOW entry), make a
+ * corresponding DENY entry at the address given.
+ */
+static void
+ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner)
+{
+	(void) memcpy(deny, allow, sizeof (ace_t));
+
+	deny->a_who = allow->a_who;
+
+	deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+	deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS;
+	if (isdir)
+		deny->a_access_mask ^= ACE_DELETE_CHILD;
+
+	deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER |
+	    ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS |
+	    ACE_WRITE_NAMED_ATTRS);
+	deny->a_access_mask |= access_mask_set((allow->a_access_mask &
+	    ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner,
+	    B_FALSE);
+}
+/*
+ * Make an initial pass over an array of aclent_t's.  Gather
+ * information such as an ACL_MASK (if any), number of users,
+ * number of groups, and whether the array needs to be sorted.
+ */
+static int
+ln_aent_preprocess(aclent_t *aclent, int n,
+    int *hasmask, mode_t *mask,
+    int *numuser, int *numgroup, int *needsort)
+{
+	int error = 0;
+	int i;
+	int curtype = 0;
+
+	*hasmask = 0;
+	*mask = 07;
+	*needsort = 0;
+	*numuser = 0;
+	*numgroup = 0;
+
+	for (i = 0; i < n; i++) {
+		if (aclent[i].a_type < curtype)
+			*needsort = 1;
+		else if (aclent[i].a_type > curtype)
+			curtype = aclent[i].a_type;
+		if (aclent[i].a_type & USER)
+			(*numuser)++;
+		if (aclent[i].a_type & (GROUP | GROUP_OBJ))
+			(*numgroup)++;
+		if (aclent[i].a_type & CLASS_OBJ) {
+			if (*hasmask) {
+				error = EINVAL;
+				goto out;
+			} else {
+				*hasmask = 1;
+				*mask = aclent[i].a_perm;
+			}
+		}
+	}
+
+	if ((! *hasmask) && (*numuser + *numgroup > 1)) {
+		error = EINVAL;
+		goto out;
+	}
+
+out:
+	return (error);
+}
+
+/*
+ * Convert an array of aclent_t into an array of nfsace entries,
+ * following POSIX draft -> nfsv4 conversion semantics as outlined in
+ * the IETF draft.
+ */
+static int
+ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir)
+{
+	int error = 0;
+	mode_t mask;
+	int numuser, numgroup, needsort;
+	int resultsize = 0;
+	int i, groupi = 0, skip;
+	ace_t *acep, *result = NULL;
+	int hasmask;
+
+	error = ln_aent_preprocess(aclent, n, &hasmask, &mask,
+	    &numuser, &numgroup, &needsort);
+	if (error != 0)
+		goto out;
+
+	/* allow + deny for each aclent */
+	resultsize = n * 2;
+	if (hasmask) {
+		/*
+		 * stick extra deny on the group_obj and on each
+		 * user|group for the mask (the group_obj was added
+		 * into the count for numgroup)
+		 */
+		resultsize += numuser + numgroup;
+		/* ... and don't count the mask itself */
+		resultsize -= 2;
+	}
+
+	/* sort the source if necessary */
+	if (needsort)
+		ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls);
+
+	if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0)
+		goto out;
+
+	acep = result;
+
+	for (i = 0; i < n; i++) {
+		/*
+		 * don't process CLASS_OBJ (mask); mask was grabbed in
+		 * ln_aent_preprocess()
+		 */
+		if (aclent[i].a_type & CLASS_OBJ)
+			continue;
+
+		/* If we need an ACL_MASK emulator, prepend it now */
+		if ((hasmask) &&
+		    (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) {
+			acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+			acep->a_flags = 0;
+			if (aclent[i].a_type & GROUP_OBJ) {
+				acep->a_who = (uid_t)-1;
+				acep->a_flags |=
+				    (ACE_IDENTIFIER_GROUP|ACE_GROUP);
+			} else if (aclent[i].a_type & USER) {
+				acep->a_who = aclent[i].a_id;
+			} else {
+				acep->a_who = aclent[i].a_id;
+				acep->a_flags |= ACE_IDENTIFIER_GROUP;
+			}
+			if (aclent[i].a_type & ACL_DEFAULT) {
+				acep->a_flags |= ACE_INHERIT_ONLY_ACE |
+				    ACE_FILE_INHERIT_ACE |
+				    ACE_DIRECTORY_INHERIT_ACE;
+			}
+			/*
+			 * Set the access mask for the prepended deny
+			 * ace.  To do this, we invert the mask (found
+			 * in ln_aent_preprocess()) then convert it to an
+			 * DENY ace access_mask.
+			 */
+			acep->a_access_mask = mode_to_ace_access((mask ^ 07),
+			    isdir, 0, 0);
+			acep += 1;
+		}
+
+		/* handle a_perm -> access_mask */
+		acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm,
+		    isdir, aclent[i].a_type & USER_OBJ, 1);
+
+		/* emulate a default aclent */
+		if (aclent[i].a_type & ACL_DEFAULT) {
+			acep->a_flags |= ACE_INHERIT_ONLY_ACE |
+			    ACE_FILE_INHERIT_ACE |
+			    ACE_DIRECTORY_INHERIT_ACE;
+		}
+
+		/*
+		 * handle a_perm and a_id
+		 *
+		 * this must be done last, since it involves the
+		 * corresponding deny aces, which are handled
+		 * differently for each different a_type.
+		 */
+		if (aclent[i].a_type & USER_OBJ) {
+			acep->a_who = (uid_t)-1;
+			acep->a_flags |= ACE_OWNER;
+			ace_make_deny(acep, acep + 1, isdir, B_TRUE);
+			acep += 2;
+		} else if (aclent[i].a_type & USER) {
+			acep->a_who = aclent[i].a_id;
+			ace_make_deny(acep, acep + 1, isdir, B_FALSE);
+			acep += 2;
+		} else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) {
+			if (aclent[i].a_type & GROUP_OBJ) {
+				acep->a_who = (uid_t)-1;
+				acep->a_flags |= ACE_GROUP;
+			} else {
+				acep->a_who = aclent[i].a_id;
+			}
+			acep->a_flags |= ACE_IDENTIFIER_GROUP;
+			/*
+			 * Set the corresponding deny for the group ace.
+			 *
+			 * The deny aces go after all of the groups, unlike
+			 * everything else, where they immediately follow
+			 * the allow ace.
+			 *
+			 * We calculate "skip", the number of slots to
+			 * skip ahead for the deny ace, here.
+			 *
+			 * The pattern is:
+			 * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3
+			 * thus, skip is
+			 * (2 * numgroup) - 1 - groupi
+			 * (2 * numgroup) to account for MD + A
+			 * - 1 to account for the fact that we're on the
+			 * access (A), not the mask (MD)
+			 * - groupi to account for the fact that we have
+			 * passed up groupi number of MD's.
+			 */
+			skip = (2 * numgroup) - 1 - groupi;
+			ace_make_deny(acep, acep + skip, isdir, B_FALSE);
+			/*
+			 * If we just did the last group, skip acep past
+			 * all of the denies; else, just move ahead one.
+			 */
+			if (++groupi >= numgroup)
+				acep += numgroup + 1;
+			else
+				acep += 1;
+		} else if (aclent[i].a_type & OTHER_OBJ) {
+			acep->a_who = (uid_t)-1;
+			acep->a_flags |= ACE_EVERYONE;
+			ace_make_deny(acep, acep + 1, isdir, B_FALSE);
+			acep += 2;
+		} else {
+			error = EINVAL;
+			goto out;
+		}
+	}
+
+	*acepp = result;
+	*rescount = resultsize;
+
+out:
+	if (error != 0) {
+		if ((result != NULL) && (resultsize > 0)) {
+			cacl_free(result, resultsize * sizeof (ace_t));
+		}
+	}
+
+	return (error);
+}
+
+static int
+convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir,
+    ace_t **retacep, int *retacecnt)
+{
+	ace_t *acep;
+	ace_t *dfacep;
+	int acecnt = 0;
+	int dfacecnt = 0;
+	int dfaclstart = 0;
+	int dfaclcnt = 0;
+	aclent_t *aclp;
+	int i;
+	int error;
+	int acesz, dfacesz;
+
+	ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls);
+
+	for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) {
+		if (aclp->a_type & ACL_DEFAULT)
+			break;
+	}
+
+	if (i < aclcnt) {
+		dfaclstart = i;
+		dfaclcnt = aclcnt - i;
+	}
+
+	if (dfaclcnt && !isdir) {
+		return (EINVAL);
+	}
+
+	error = ln_aent_to_ace(aclentp, i,  &acep, &acecnt, isdir);
+	if (error)
+		return (error);
+
+	if (dfaclcnt) {
+		error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt,
+		    &dfacep, &dfacecnt, isdir);
+		if (error) {
+			if (acep) {
+				cacl_free(acep, acecnt * sizeof (ace_t));
+			}
+			return (error);
+		}
+	}
+
+	if (dfacecnt != 0) {
+		acesz = sizeof (ace_t) * acecnt;
+		dfacesz = sizeof (ace_t) * dfacecnt;
+		acep = cacl_realloc(acep, acesz, acesz + dfacesz);
+		if (acep == NULL)
+			return (ENOMEM);
+		if (dfaclcnt) {
+			(void) memcpy(acep + acecnt, dfacep, dfacesz);
+		}
+	}
+	if (dfaclcnt)
+		cacl_free(dfacep, dfacecnt * sizeof (ace_t));
+
+	*retacecnt = acecnt + dfacecnt;
+	*retacep = acep;
+	return (0);
+}
+
+static int
+ace_mask_to_mode(uint32_t  mask, o_mode_t *modep, boolean_t isdir)
+{
+	int error = 0;
+	o_mode_t mode = 0;
+	uint32_t bits, wantbits;
+
+	/* read */
+	if (mask & ACE_READ_DATA)
+		mode |= S_IROTH;
+
+	/* write */
+	wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA);
+	if (isdir)
+		wantbits |= ACE_DELETE_CHILD;
+	bits = mask & wantbits;
+	if (bits != 0) {
+		if (bits != wantbits) {
+			error = ENOTSUP;
+			goto out;
+		}
+		mode |= S_IWOTH;
+	}
+
+	/* exec */
+	if (mask & ACE_EXECUTE) {
+		mode |= S_IXOTH;
+	}
+
+	*modep = mode;
+
+out:
+	return (error);
+}
+
+static void
+acevals_init(acevals_t *vals, uid_t key)
+{
+	bzero(vals, sizeof (*vals));
+	vals->allowed = ACE_MASK_UNDEFINED;
+	vals->denied = ACE_MASK_UNDEFINED;
+	vals->mask = ACE_MASK_UNDEFINED;
+	vals->key = key;
+}
+
+static void
+ace_list_init(ace_list_t *al, int dfacl_flag)
+{
+	acevals_init(&al->user_obj, 0);
+	acevals_init(&al->group_obj, 0);
+	acevals_init(&al->other_obj, 0);
+	al->numusers = 0;
+	al->numgroups = 0;
+	al->acl_mask = 0;
+	al->hasmask = 0;
+	al->state = ace_unused;
+	al->seen = 0;
+	al->dfacl_flag = dfacl_flag;
+}
+
+/*
+ * Find or create an acevals holder for a given id and avl tree.
+ *
+ * Note that only one thread will ever touch these avl trees, so
+ * there is no need for locking.
+ */
+static acevals_t *
+acevals_find(ace_t *ace, avl_tree_t *avl, int *num)
+{
+	acevals_t key, *rc;
+	avl_index_t where;
+
+	key.key = ace->a_who;
+	rc = avl_find(avl, &key, &where);
+	if (rc != NULL)
+		return (rc);
+
+	/* this memory is freed by ln_ace_to_aent()->ace_list_free() */
+	if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0)
+		return (NULL);
+
+	acevals_init(rc, ace->a_who);
+	avl_insert(avl, rc, where);
+	(*num)++;
+
+	return (rc);
+}
+
+static int
+access_mask_check(ace_t *acep, int mask_bit, int isowner)
+{
+	int set_deny, err_deny;
+	int set_allow, err_allow;
+	int acl_consume;
+	int haswriteperm, hasreadperm;
+
+	if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) {
+		haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1;
+		hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1;
+	} else {
+		haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0;
+		hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0;
+	}
+
+	acl_consume = (ACL_SYNCHRONIZE_ERR_DENY |
+	    ACL_DELETE_ERR_DENY |
+	    ACL_WRITE_OWNER_ERR_DENY |
+	    ACL_WRITE_OWNER_ERR_ALLOW |
+	    ACL_WRITE_ATTRS_OWNER_SET_ALLOW |
+	    ACL_WRITE_ATTRS_OWNER_ERR_DENY |
+	    ACL_WRITE_ATTRS_WRITER_SET_DENY |
+	    ACL_WRITE_ATTRS_WRITER_ERR_ALLOW |
+	    ACL_WRITE_NAMED_WRITER_ERR_DENY |
+	    ACL_READ_NAMED_READER_ERR_DENY);
+
+	if (mask_bit == ACE_SYNCHRONIZE) {
+		set_deny = ACL_SYNCHRONIZE_SET_DENY;
+		err_deny =  ACL_SYNCHRONIZE_ERR_DENY;
+		set_allow = ACL_SYNCHRONIZE_SET_ALLOW;
+		err_allow = ACL_SYNCHRONIZE_ERR_ALLOW;
+	} else if (mask_bit == ACE_WRITE_OWNER) {
+		set_deny = ACL_WRITE_OWNER_SET_DENY;
+		err_deny =  ACL_WRITE_OWNER_ERR_DENY;
+		set_allow = ACL_WRITE_OWNER_SET_ALLOW;
+		err_allow = ACL_WRITE_OWNER_ERR_ALLOW;
+	} else if (mask_bit == ACE_DELETE) {
+		set_deny = ACL_DELETE_SET_DENY;
+		err_deny =  ACL_DELETE_ERR_DENY;
+		set_allow = ACL_DELETE_SET_ALLOW;
+		err_allow = ACL_DELETE_ERR_ALLOW;
+	} else if (mask_bit == ACE_WRITE_ATTRIBUTES) {
+		if (isowner) {
+			set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY;
+			err_deny =  ACL_WRITE_ATTRS_OWNER_ERR_DENY;
+			set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW;
+			err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW;
+		} else if (haswriteperm) {
+			set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY;
+			err_deny =  ACL_WRITE_ATTRS_WRITER_ERR_DENY;
+			set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW;
+			err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW;
+		} else {
+			if ((acep->a_access_mask & mask_bit) &&
+			    (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) {
+				return (ENOTSUP);
+			}
+			return (0);
+		}
+	} else if (mask_bit == ACE_READ_NAMED_ATTRS) {
+		if (!hasreadperm)
+			return (0);
+
+		set_deny = ACL_READ_NAMED_READER_SET_DENY;
+		err_deny = ACL_READ_NAMED_READER_ERR_DENY;
+		set_allow = ACL_READ_NAMED_READER_SET_ALLOW;
+		err_allow = ACL_READ_NAMED_READER_ERR_ALLOW;
+	} else if (mask_bit == ACE_WRITE_NAMED_ATTRS) {
+		if (!haswriteperm)
+			return (0);
+
+		set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY;
+		err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY;
+		set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW;
+		err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW;
+	} else {
+		return (EINVAL);
+	}
+
+	if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) {
+		if (acl_consume & set_deny) {
+			if (!(acep->a_access_mask & mask_bit)) {
+				return (ENOTSUP);
+			}
+		} else if (acl_consume & err_deny) {
+			if (acep->a_access_mask & mask_bit) {
+				return (ENOTSUP);
+			}
+		}
+	} else {
+		/* ACE_ACCESS_ALLOWED_ACE_TYPE */
+		if (acl_consume & set_allow) {
+			if (!(acep->a_access_mask & mask_bit)) {
+				return (ENOTSUP);
+			}
+		} else if (acl_consume & err_allow) {
+			if (acep->a_access_mask & mask_bit) {
+				return (ENOTSUP);
+			}
+		}
+	}
+	return (0);
+}
+
+static int
+ace_to_aent_legal(ace_t *acep)
+{
+	int error = 0;
+	int isowner;
+
+	/* only ALLOW or DENY */
+	if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) &&
+	    (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	/* check for invalid flags */
+	if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/* some flags are illegal */
+	if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG |
+	    ACE_FAILED_ACCESS_ACE_FLAG |
+	    ACE_NO_PROPAGATE_INHERIT_ACE)) {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	/* check for invalid masks */
+	if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if ((acep->a_flags & ACE_OWNER)) {
+		isowner = 1;
+	} else {
+		isowner = 0;
+	}
+
+	error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_WRITE_OWNER, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_DELETE, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner);
+	if (error)
+		goto out;
+
+	/* more detailed checking of masks */
+	if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) {
+		if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) {
+			error = ENOTSUP;
+			goto out;
+		}
+		if ((acep->a_access_mask & ACE_WRITE_DATA) &&
+		    (! (acep->a_access_mask & ACE_APPEND_DATA))) {
+			error = ENOTSUP;
+			goto out;
+		}
+		if ((! (acep->a_access_mask & ACE_WRITE_DATA)) &&
+		    (acep->a_access_mask & ACE_APPEND_DATA)) {
+			error = ENOTSUP;
+			goto out;
+		}
+	}
+
+	/* ACL enforcement */
+	if ((acep->a_access_mask & ACE_READ_ACL) &&
+	    (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if (acep->a_access_mask & ACE_WRITE_ACL) {
+		if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) &&
+		    (isowner)) {
+			error = ENOTSUP;
+			goto out;
+		}
+		if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) &&
+		    (! isowner)) {
+			error = ENOTSUP;
+			goto out;
+		}
+	}
+
+out:
+	return (error);
+}
+
+static int
+ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir)
+{
+	/* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */
+	if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) !=
+	    (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) {
+		return (ENOTSUP);
+	}
+
+	return (ace_mask_to_mode(mask, modep, isdir));
+}
+
+static int
+acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list,
+    uid_t owner, gid_t group, boolean_t isdir)
+{
+	int error;
+	uint32_t  flips = ACE_POSIX_SUPPORTED_BITS;
+
+	if (isdir)
+		flips |= ACE_DELETE_CHILD;
+	if (vals->allowed != (vals->denied ^ flips)) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if ((list->hasmask) && (list->acl_mask != vals->mask) &&
+	    (vals->aent_type & (USER | GROUP | GROUP_OBJ))) {
+		error = ENOTSUP;
+		goto out;
+	}
+	error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir);
+	if (error != 0)
+		goto out;
+	dest->a_type = vals->aent_type;
+	if (dest->a_type & (USER | GROUP)) {
+		dest->a_id = vals->key;
+	} else if (dest->a_type & USER_OBJ) {
+		dest->a_id = owner;
+	} else if (dest->a_type & GROUP_OBJ) {
+		dest->a_id = group;
+	} else if (dest->a_type & OTHER_OBJ) {
+		dest->a_id = 0;
+	} else {
+		error = EINVAL;
+		goto out;
+	}
+
+out:
+	return (error);
+}
+
+
+static int
+ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt,
+    uid_t owner, gid_t group, boolean_t isdir)
+{
+	int error = 0;
+	aclent_t *aent, *result = NULL;
+	acevals_t *vals;
+	int resultcount;
+
+	if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) !=
+	    (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	resultcount = 3 + list->numusers + list->numgroups;
+	/*
+	 * This must be the same condition as below, when we add the CLASS_OBJ
+	 * (aka ACL mask)
+	 */
+	if ((list->hasmask) || (! list->dfacl_flag))
+		resultcount += 1;
+
+	if (cacl_malloc((void **)&result,
+	    resultcount * sizeof (aclent_t)) != 0) {
+		error = ENOMEM;
+		goto out;
+	}
+	aent = result;
+
+	/* USER_OBJ */
+	if (!(list->user_obj.aent_type & USER_OBJ)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = acevals_to_aent(&list->user_obj, aent, list, owner, group,
+	    isdir);
+
+	if (error != 0)
+		goto out;
+	++aent;
+	/* USER */
+	vals = NULL;
+	for (vals = avl_first(&list->user); vals != NULL;
+	    vals = AVL_NEXT(&list->user, vals)) {
+		if (!(vals->aent_type & USER)) {
+			error = EINVAL;
+			goto out;
+		}
+		error = acevals_to_aent(vals, aent, list, owner, group,
+		    isdir);
+		if (error != 0)
+			goto out;
+		++aent;
+	}
+	/* GROUP_OBJ */
+	if (!(list->group_obj.aent_type & GROUP_OBJ)) {
+		error = EINVAL;
+		goto out;
+	}
+	error = acevals_to_aent(&list->group_obj, aent, list, owner, group,
+	    isdir);
+	if (error != 0)
+		goto out;
+	++aent;
+	/* GROUP */
+	vals = NULL;
+	for (vals = avl_first(&list->group); vals != NULL;
+	    vals = AVL_NEXT(&list->group, vals)) {
+		if (!(vals->aent_type & GROUP)) {
+			error = EINVAL;
+			goto out;
+		}
+		error = acevals_to_aent(vals, aent, list, owner, group,
+		    isdir);
+		if (error != 0)
+			goto out;
+		++aent;
+	}
+	/*
+	 * CLASS_OBJ (aka ACL_MASK)
+	 *
+	 * An ACL_MASK is not fabricated if the ACL is a default ACL.
+	 * This is to follow UFS's behavior.
+	 */
+	if ((list->hasmask) || (! list->dfacl_flag)) {
+		if (list->hasmask) {
+			uint32_t flips = ACE_POSIX_SUPPORTED_BITS;
+			if (isdir)
+				flips |= ACE_DELETE_CHILD;
+			error = ace_mask_to_mode(list->acl_mask ^ flips,
+			    &aent->a_perm, isdir);
+			if (error != 0)
+				goto out;
+		} else {
+			/* fabricate the ACL_MASK from the group permissions */
+			error = ace_mask_to_mode(list->group_obj.allowed,
+			    &aent->a_perm, isdir);
+			if (error != 0)
+				goto out;
+		}
+		aent->a_id = 0;
+		aent->a_type = CLASS_OBJ | list->dfacl_flag;
+		++aent;
+	}
+	/* OTHER_OBJ */
+	if (!(list->other_obj.aent_type & OTHER_OBJ)) {
+		error = EINVAL;
+		goto out;
+	}
+	error = acevals_to_aent(&list->other_obj, aent, list, owner, group,
+	    isdir);
+	if (error != 0)
+		goto out;
+	++aent;
+
+	*aclentp = result;
+	*aclcnt = resultcount;
+
+out:
+	if (error != 0) {
+		if (result != NULL)
+			cacl_free(result, resultcount * sizeof (aclent_t));
+	}
+
+	return (error);
+}
+
+
+/*
+ * free all data associated with an ace_list
+ */
+static void
+ace_list_free(ace_list_t *al)
+{
+	acevals_t *node;
+	void *cookie;
+
+	if (al == NULL)
+		return;
+
+	cookie = NULL;
+	while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL)
+		cacl_free(node, sizeof (acevals_t));
+	cookie = NULL;
+	while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL)
+		cacl_free(node, sizeof (acevals_t));
+
+	avl_destroy(&al->user);
+	avl_destroy(&al->group);
+
+	/* free the container itself */
+	cacl_free(al, sizeof (ace_list_t));
+}
+
+static int
+acevals_compare(const void *va, const void *vb)
+{
+	const acevals_t *a = va, *b = vb;
+
+	if (a->key == b->key)
+		return (0);
+
+	if (a->key > b->key)
+		return (1);
+
+	else
+		return (-1);
+}
+
+/*
+ * Convert a list of ace_t entries to equivalent regular and default
+ * aclent_t lists.  Return error (ENOTSUP) when conversion is not possible.
+ */
+static int
+ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group,
+    aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt,
+    boolean_t isdir)
+{
+	int error = 0;
+	ace_t *acep;
+	uint32_t bits;
+	int i;
+	ace_list_t *normacl = NULL, *dfacl = NULL, *acl;
+	acevals_t *vals;
+
+	*aclentp = NULL;
+	*aclcnt = 0;
+	*dfaclentp = NULL;
+	*dfaclcnt = 0;
+
+	/* we need at least user_obj, group_obj, and other_obj */
+	if (n < 6) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if (ace == NULL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = cacl_malloc((void **)&normacl, sizeof (ace_list_t));
+	if (error != 0)
+		goto out;
+
+	avl_create(&normacl->user, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+	avl_create(&normacl->group, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+
+	ace_list_init(normacl, 0);
+
+	error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t));
+	if (error != 0)
+		goto out;
+
+	avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+	avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+	ace_list_init(dfacl, ACL_DEFAULT);
+
+	/* process every ace_t... */
+	for (i = 0; i < n; i++) {
+		acep = &ace[i];
+
+		/* rule out certain cases quickly */
+		error = ace_to_aent_legal(acep);
+		if (error != 0)
+			goto out;
+
+		/*
+		 * Turn off these bits in order to not have to worry about
+		 * them when doing the checks for compliments.
+		 */
+		acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE |
+		    ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES |
+		    ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS);
+
+		/* see if this should be a regular or default acl */
+		bits = acep->a_flags &
+		    (ACE_INHERIT_ONLY_ACE |
+		    ACE_FILE_INHERIT_ACE |
+		    ACE_DIRECTORY_INHERIT_ACE);
+		if (bits != 0) {
+			/* all or nothing on these inherit bits */
+			if (bits != (ACE_INHERIT_ONLY_ACE |
+			    ACE_FILE_INHERIT_ACE |
+			    ACE_DIRECTORY_INHERIT_ACE)) {
+				error = ENOTSUP;
+				goto out;
+			}
+			acl = dfacl;
+		} else {
+			acl = normacl;
+		}
+
+		if ((acep->a_flags & ACE_OWNER)) {
+			if (acl->state > ace_user_obj) {
+				error = ENOTSUP;
+				goto out;
+			}
+			acl->state = ace_user_obj;
+			acl->seen |= USER_OBJ;
+			vals = &acl->user_obj;
+			vals->aent_type = USER_OBJ | acl->dfacl_flag;
+		} else if ((acep->a_flags & ACE_EVERYONE)) {
+			acl->state = ace_other_obj;
+			acl->seen |= OTHER_OBJ;
+			vals = &acl->other_obj;
+			vals->aent_type = OTHER_OBJ | acl->dfacl_flag;
+		} else if (acep->a_flags & ACE_IDENTIFIER_GROUP) {
+			if (acl->state > ace_group) {
+				error = ENOTSUP;
+				goto out;
+			}
+			if ((acep->a_flags & ACE_GROUP)) {
+				acl->seen |= GROUP_OBJ;
+				vals = &acl->group_obj;
+				vals->aent_type = GROUP_OBJ | acl->dfacl_flag;
+			} else {
+				acl->seen |= GROUP;
+				vals = acevals_find(acep, &acl->group,
+				    &acl->numgroups);
+				if (vals == NULL) {
+					error = ENOMEM;
+					goto out;
+				}
+				vals->aent_type = GROUP | acl->dfacl_flag;
+			}
+			acl->state = ace_group;
+		} else {
+			if (acl->state > ace_user) {
+				error = ENOTSUP;
+				goto out;
+			}
+			acl->state = ace_user;
+			acl->seen |= USER;
+			vals = acevals_find(acep, &acl->user,
+			    &acl->numusers);
+			if (vals == NULL) {
+				error = ENOMEM;
+				goto out;
+			}
+			vals->aent_type = USER | acl->dfacl_flag;
+		}
+
+		if (!(acl->state > ace_unused)) {
+			error = EINVAL;
+			goto out;
+		}
+
+		if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) {
+			/* no more than one allowed per aclent_t */
+			if (vals->allowed != ACE_MASK_UNDEFINED) {
+				error = ENOTSUP;
+				goto out;
+			}
+			vals->allowed = acep->a_access_mask;
+		} else {
+			/*
+			 * it's a DENY; if there was a previous DENY, it
+			 * must have been an ACL_MASK.
+			 */
+			if (vals->denied != ACE_MASK_UNDEFINED) {
+				/* ACL_MASK is for USER and GROUP only */
+				if ((acl->state != ace_user) &&
+				    (acl->state != ace_group)) {
+					error = ENOTSUP;
+					goto out;
+				}
+
+				if (! acl->hasmask) {
+					acl->hasmask = 1;
+					acl->acl_mask = vals->denied;
+				/* check for mismatched ACL_MASK emulations */
+				} else if (acl->acl_mask != vals->denied) {
+					error = ENOTSUP;
+					goto out;
+				}
+				vals->mask = vals->denied;
+			}
+			vals->denied = acep->a_access_mask;
+		}
+	}
+
+	/* done collating; produce the aclent_t lists */
+	if (normacl->state != ace_unused) {
+		error = ace_list_to_aent(normacl, aclentp, aclcnt,
+		    owner, group, isdir);
+		if (error != 0) {
+			goto out;
+		}
+	}
+	if (dfacl->state != ace_unused) {
+		error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt,
+		    owner, group, isdir);
+		if (error != 0) {
+			goto out;
+		}
+	}
+
+out:
+	if (normacl != NULL)
+		ace_list_free(normacl);
+	if (dfacl != NULL)
+		ace_list_free(dfacl);
+
+	return (error);
+}
+
+static int
+convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir,
+    uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt)
+{
+	int error = 0;
+	aclent_t *aclentp, *dfaclentp;
+	int aclcnt, dfaclcnt;
+	int aclsz, dfaclsz;
+
+	error = ln_ace_to_aent(acebufp, acecnt, owner, group,
+	    &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir);
+
+	if (error)
+		return (error);
+
+
+	if (dfaclcnt != 0) {
+		/*
+		 * Slap aclentp and dfaclentp into a single array.
+		 */
+		aclsz = sizeof (aclent_t) * aclcnt;
+		dfaclsz = sizeof (aclent_t) * dfaclcnt;
+		aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz);
+		if (aclentp != NULL) {
+			(void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz);
+		} else {
+			error = ENOMEM;
+		}
+	}
+
+	if (aclentp) {
+		*retaclentp = aclentp;
+		*retaclcnt = aclcnt + dfaclcnt;
+	}
+
+	if (dfaclentp)
+		cacl_free(dfaclentp, dfaclsz);
+
+	return (error);
+}
+
+
+int
+acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, uid_t owner,
+    gid_t group)
+{
+	int aclcnt;
+	void *acldata;
+	int error;
+
+	/*
+	 * See if we need to translate
+	 */
+	if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) ||
+	    (target_flavor == _ACL_ACLENT_ENABLED &&
+	    aclp->acl_type == ACLENT_T))
+		return (0);
+
+	if (target_flavor == -1) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (target_flavor ==  _ACL_ACE_ENABLED &&
+	    aclp->acl_type == ACLENT_T) {
+		error = convert_aent_to_ace(aclp->acl_aclp,
+		    aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt);
+		if (error)
+			goto out;
+
+	} else if (target_flavor == _ACL_ACLENT_ENABLED &&
+	    aclp->acl_type == ACE_T) {
+		error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt,
+		    isdir, owner, group, (aclent_t **)&acldata, &aclcnt);
+		if (error)
+			goto out;
+	} else {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	/*
+	 * replace old acl with newly translated acl
+	 */
+	cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size);
+	aclp->acl_aclp = acldata;
+	aclp->acl_cnt = aclcnt;
+	if (target_flavor == _ACL_ACE_ENABLED) {
+		aclp->acl_type = ACE_T;
+		aclp->acl_entry_size = sizeof (ace_t);
+	} else {
+		aclp->acl_type = ACLENT_T;
+		aclp->acl_entry_size = sizeof (aclent_t);
+	}
+	return (0);
+
+out:
+
+#if !defined(_KERNEL)
+	errno = error;
+	return (-1);
+#else
+	return (error);
+#endif
+}
+#endif /* !_KERNEL */
+
+#define	SET_ACE(acl, index, who, mask, type, flags) { \
+	acl[0][index].a_who = (uint32_t)who; \
+	acl[0][index].a_type = type; \
+	acl[0][index].a_flags = flags; \
+	acl[0][index++].a_access_mask = mask; \
+}
+
+void
+acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
+{
+	uint32_t read_mask = ACE_READ_DATA;
+	uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA;
+	uint32_t execute_mask = ACE_EXECUTE;
+
+	(void) isdir;	/* will need this later */
+
+	masks->deny1 = 0;
+	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
+		masks->deny1 |= read_mask;
+	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
+		masks->deny1 |= write_mask;
+	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
+		masks->deny1 |= execute_mask;
+
+	masks->deny2 = 0;
+	if (!(mode & S_IRGRP) && (mode & S_IROTH))
+		masks->deny2 |= read_mask;
+	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
+		masks->deny2 |= write_mask;
+	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
+		masks->deny2 |= execute_mask;
+
+	masks->allow0 = 0;
+	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
+		masks->allow0 |= read_mask;
+	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
+		masks->allow0 |= write_mask;
+	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
+		masks->allow0 |= execute_mask;
+
+	masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
+	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
+	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
+	if (mode & S_IRUSR)
+		masks->owner |= read_mask;
+	if (mode & S_IWUSR)
+		masks->owner |= write_mask;
+	if (mode & S_IXUSR)
+		masks->owner |= execute_mask;
+
+	masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IRGRP)
+		masks->group |= read_mask;
+	if (mode & S_IWGRP)
+		masks->group |= write_mask;
+	if (mode & S_IXGRP)
+		masks->group |= execute_mask;
+
+	masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IROTH)
+		masks->everyone |= read_mask;
+	if (mode & S_IWOTH)
+		masks->everyone |= write_mask;
+	if (mode & S_IXOTH)
+		masks->everyone |= execute_mask;
+}
+
+int
+acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count)
+{
+	int		index = 0;
+	int		error;
+	trivial_acl_t	masks;
+
+	*count = 3;
+	acl_trivial_access_masks(mode, isdir, &masks);
+
+	if (masks.allow0)
+		(*count)++;
+	if (masks.deny1)
+		(*count)++;
+	if (masks.deny2)
+		(*count)++;
+
+	if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0)
+		return (error);
+
+	if (masks.allow0) {
+		SET_ACE(acl, index, -1, masks.allow0,
+		    ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
+	}
+	if (masks.deny1) {
+		SET_ACE(acl, index, -1, masks.deny1,
+		    ACE_ACCESS_DENIED_ACE_TYPE, ACE_OWNER);
+	}
+	if (masks.deny2) {
+		SET_ACE(acl, index, -1, masks.deny2,
+		    ACE_ACCESS_DENIED_ACE_TYPE, ACE_GROUP|ACE_IDENTIFIER_GROUP);
+	}
+
+	SET_ACE(acl, index, -1, masks.owner, ACE_ACCESS_ALLOWED_ACE_TYPE,
+	    ACE_OWNER);
+	SET_ACE(acl, index, -1, masks.group, ACE_ACCESS_ALLOWED_ACE_TYPE,
+	    ACE_IDENTIFIER_GROUP|ACE_GROUP);
+	SET_ACE(acl, index, -1, masks.everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
+	    ACE_EVERYONE);
+
+	return (0);
+}
+
+/*
+ * ace_trivial:
+ * determine whether an ace_t acl is trivial
+ *
+ * Trivialness implies that the acl is composed of only
+ * owner, group, everyone entries.  ACL can't
+ * have read_acl denied, and write_owner/write_acl/write_attributes
+ * can only be owner@ entry.
+ */
+int
+ace_trivial_common(void *acep, int aclcnt,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt,
+    uint16_t *, uint16_t *, uint32_t *))
+{
+	uint16_t flags;
+	uint32_t mask;
+	uint16_t type;
+	uint64_t cookie = 0;
+
+	while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
+		switch (flags & ACE_TYPE_FLAGS) {
+		case ACE_OWNER:
+		case ACE_GROUP|ACE_IDENTIFIER_GROUP:
+		case ACE_EVERYONE:
+			break;
+		default:
+			return (1);
+
+		}
+
+		if (flags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
+		    ACE_INHERIT_ONLY_ACE))
+			return (1);
+
+		/*
+		 * Special check for some special bits
+		 *
+		 * Don't allow anybody to deny reading basic
+		 * attributes or a files ACL.
+		 */
+		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
+			return (1);
+
+		/*
+		 * Delete permissions are never set by default
+		 */
+		if (mask & (ACE_DELETE|ACE_DELETE_CHILD))
+			return (1);
+		/*
+		 * only allow owner@ to have
+		 * write_acl/write_owner/write_attributes/write_xattr/
+		 */
+		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
+		    (!(flags & ACE_OWNER) && (mask &
+		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
+		    ACE_WRITE_NAMED_ATTRS))))
+			return (1);
+
+	}
+	return (0);
+}

diff --git a/zfs/module/os/freebsd/spl/callb.c b/zfs/module/os/freebsd/spl/callb.c
new file mode 100644
index 0000000..0b7fefc
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/callb.c

@@ -0,0 +1,373 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/callb.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/kobj.h>
+#include <sys/systm.h>	/* for delay() */
+#include <sys/taskq.h>  /* For TASKQ_NAMELEN */
+#include <sys/kernel.h>
+
+#define	CB_MAXNAME	TASKQ_NAMELEN
+
+/*
+ * The callb mechanism provides generic event scheduling/echoing.
+ * A callb function is registered and called on behalf of the event.
+ */
+typedef struct callb {
+	struct callb	*c_next; 	/* next in class or on freelist */
+	kthread_id_t	c_thread;	/* ptr to caller's thread struct */
+	char		c_flag;		/* info about the callb state */
+	uchar_t		c_class;	/* this callb's class */
+	kcondvar_t	c_done_cv;	/* signal callb completion */
+	boolean_t	(*c_func)(void *, int);
+					/* cb function: returns true if ok */
+	void		*c_arg;		/* arg to c_func */
+	char		c_name[CB_MAXNAME+1]; /* debug:max func name length */
+} callb_t;
+
+/*
+ * callb c_flag bitmap definitions
+ */
+#define	CALLB_FREE		0x0
+#define	CALLB_TAKEN		0x1
+#define	CALLB_EXECUTING		0x2
+
+/*
+ * Basic structure for a callb table.
+ * All callbs are organized into different class groups described
+ * by ct_class array.
+ * The callbs within a class are single-linked and normally run by a
+ * serial execution.
+ */
+typedef struct callb_table {
+	kmutex_t ct_lock;		/* protect all callb states */
+	callb_t	*ct_freelist; 		/* free callb structures */
+	boolean_t ct_busy;		/* B_TRUE prevents additions */
+	kcondvar_t ct_busy_cv;		/* to wait for not busy    */
+	int	ct_ncallb; 		/* num of callbs allocated */
+	callb_t	*ct_first_cb[NCBCLASS];	/* ptr to 1st callb in a class */
+} callb_table_t;
+
+int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC;
+
+static callb_id_t callb_add_common(boolean_t (*)(void *, int),
+    void *, int, char *, kthread_id_t);
+
+static callb_table_t callb_table;	/* system level callback table */
+static callb_table_t *ct = &callb_table;
+static kmutex_t	callb_safe_mutex;
+callb_cpr_t	callb_cprinfo_safe = {
+	&callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, {0, 0} };
+
+/*
+ * Init all callb tables in the system.
+ */
+static void
+callb_init(void *dummy __unused)
+{
+	callb_table.ct_busy = B_FALSE;	/* mark table open for additions */
+	mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+static void
+callb_fini(void *dummy __unused)
+{
+	callb_t *cp;
+	int i;
+
+	mutex_enter(&ct->ct_lock);
+	for (i = 0; i < 16; i++) {
+		while ((cp = ct->ct_freelist) != NULL) {
+			ct->ct_freelist = cp->c_next;
+			ct->ct_ncallb--;
+			kmem_free(cp, sizeof (callb_t));
+		}
+		if (ct->ct_ncallb == 0)
+			break;
+		/* Not all callbacks finished, waiting for the rest. */
+		mutex_exit(&ct->ct_lock);
+		tsleep(ct, 0, "callb", hz / 4);
+		mutex_enter(&ct->ct_lock);
+	}
+	if (ct->ct_ncallb > 0)
+		printf("%s: Leaked %d callbacks!\n", __func__, ct->ct_ncallb);
+	mutex_exit(&ct->ct_lock);
+	mutex_destroy(&callb_safe_mutex);
+	mutex_destroy(&callb_table.ct_lock);
+}
+
+/*
+ * callout_add() is called to register func() be called later.
+ */
+static callb_id_t
+callb_add_common(boolean_t (*func)(void *arg, int code),
+    void *arg, int class, char *name, kthread_id_t t)
+{
+	callb_t *cp;
+
+	ASSERT3S(class, <, NCBCLASS);
+
+	mutex_enter(&ct->ct_lock);
+	while (ct->ct_busy)
+		cv_wait(&ct->ct_busy_cv, &ct->ct_lock);
+	if ((cp = ct->ct_freelist) == NULL) {
+		ct->ct_ncallb++;
+		cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP);
+	}
+	ct->ct_freelist = cp->c_next;
+	cp->c_thread = t;
+	cp->c_func = func;
+	cp->c_arg = arg;
+	cp->c_class = (uchar_t)class;
+	cp->c_flag |= CALLB_TAKEN;
+#ifdef ZFS_DEBUG
+	if (strlen(name) > CB_MAXNAME)
+		cmn_err(CE_WARN, "callb_add: name of callback function '%s' "
+		    "too long -- truncated to %d chars",
+		    name, CB_MAXNAME);
+#endif
+	(void) strncpy(cp->c_name, name, CB_MAXNAME);
+	cp->c_name[CB_MAXNAME] = '\0';
+
+	/*
+	 * Insert the new callb at the head of its class list.
+	 */
+	cp->c_next = ct->ct_first_cb[class];
+	ct->ct_first_cb[class] = cp;
+
+	mutex_exit(&ct->ct_lock);
+	return ((callb_id_t)cp);
+}
+
+/*
+ * The default function to add an entry to the callback table.  Since
+ * it uses curthread as the thread identifier to store in the table,
+ * it should be used for the normal case of a thread which is calling
+ * to add ITSELF to the table.
+ */
+callb_id_t
+callb_add(boolean_t (*func)(void *arg, int code),
+    void *arg, int class, char *name)
+{
+	return (callb_add_common(func, arg, class, name, curthread));
+}
+
+/*
+ * A special version of callb_add() above for use by threads which
+ * might be adding an entry to the table on behalf of some other
+ * thread (for example, one which is constructed but not yet running).
+ * In this version the thread id is an argument.
+ */
+callb_id_t
+callb_add_thread(boolean_t (*func)(void *arg, int code),
+    void *arg, int class, char *name, kthread_id_t t)
+{
+	return (callb_add_common(func, arg, class, name, t));
+}
+
+/*
+ * callout_delete() is called to remove an entry identified by id
+ * that was originally placed there by a call to callout_add().
+ * return -1 if fail to delete a callb entry otherwise return 0.
+ */
+int
+callb_delete(callb_id_t id)
+{
+	callb_t **pp;
+	callb_t *me = (callb_t *)id;
+
+	mutex_enter(&ct->ct_lock);
+
+	for (;;) {
+		pp = &ct->ct_first_cb[me->c_class];
+		while (*pp != NULL && *pp != me)
+			pp = &(*pp)->c_next;
+
+#ifdef ZFS_DEBUG
+		if (*pp != me) {
+			cmn_err(CE_WARN, "callb delete bogus entry 0x%p",
+			    (void *)me);
+			mutex_exit(&ct->ct_lock);
+			return (-1);
+		}
+#endif /* DEBUG */
+
+		/*
+		 * It is not allowed to delete a callb in the middle of
+		 * executing otherwise, the callb_execute() will be confused.
+		 */
+		if (!(me->c_flag & CALLB_EXECUTING))
+			break;
+
+		cv_wait(&me->c_done_cv, &ct->ct_lock);
+	}
+	/* relink the class list */
+	*pp = me->c_next;
+
+	/* clean up myself and return the free callb to the head of freelist */
+	me->c_flag = CALLB_FREE;
+	me->c_next = ct->ct_freelist;
+	ct->ct_freelist = me;
+
+	mutex_exit(&ct->ct_lock);
+	return (0);
+}
+
+/*
+ * class:	indicates to execute all callbs in the same class;
+ * code:	optional argument for the callb functions.
+ * return:	 = 0: success
+ *		!= 0: ptr to string supplied when callback was registered
+ */
+void *
+callb_execute_class(int class, int code)
+{
+	callb_t *cp;
+	void *ret = NULL;
+
+	ASSERT3S(class, <, NCBCLASS);
+
+	mutex_enter(&ct->ct_lock);
+
+	for (cp = ct->ct_first_cb[class];
+	    cp != NULL && ret == 0; cp = cp->c_next) {
+		while (cp->c_flag & CALLB_EXECUTING)
+			cv_wait(&cp->c_done_cv, &ct->ct_lock);
+		/*
+		 * cont if the callb is deleted while we're sleeping
+		 */
+		if (cp->c_flag == CALLB_FREE)
+			continue;
+		cp->c_flag |= CALLB_EXECUTING;
+
+#ifdef CALLB_DEBUG
+		printf("callb_execute: name=%s func=%p arg=%p\n",
+		    cp->c_name, (void *)cp->c_func, (void *)cp->c_arg);
+#endif /* CALLB_DEBUG */
+
+		mutex_exit(&ct->ct_lock);
+		/* If callback function fails, pass back client's name */
+		if (!(*cp->c_func)(cp->c_arg, code))
+			ret = cp->c_name;
+		mutex_enter(&ct->ct_lock);
+
+		cp->c_flag &= ~CALLB_EXECUTING;
+		cv_broadcast(&cp->c_done_cv);
+	}
+	mutex_exit(&ct->ct_lock);
+	return (ret);
+}
+
+/*
+ * callers make sure no recursive entries to this func.
+ * dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure.
+ *
+ * When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we
+ * use a cv_timedwait() in case the kernel thread is blocked.
+ *
+ * Note that this is a generic callback handler for daemon CPR and
+ * should NOT be changed to accommodate any specific requirement in a daemon.
+ * Individual daemons that require changes to the handler shall write
+ * callback routines in their own daemon modules.
+ */
+boolean_t
+callb_generic_cpr(void *arg, int code)
+{
+	callb_cpr_t *cp = (callb_cpr_t *)arg;
+	clock_t ret = 0;			/* assume success */
+
+	mutex_enter(cp->cc_lockp);
+
+	switch (code) {
+	case CB_CODE_CPR_CHKPT:
+		cp->cc_events |= CALLB_CPR_START;
+#ifdef CPR_NOT_THREAD_SAFE
+		while (!(cp->cc_events & CALLB_CPR_SAFE))
+			/* cv_timedwait() returns -1 if it times out. */
+			if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
+			    cp->cc_lockp, (callb_timeout_sec * hz),
+			    TR_CLOCK_TICK)) == -1)
+				break;
+#endif
+		break;
+
+	case CB_CODE_CPR_RESUME:
+		cp->cc_events &= ~CALLB_CPR_START;
+		cv_signal(&cp->cc_stop_cv);
+		break;
+	}
+	mutex_exit(cp->cc_lockp);
+	return (ret != -1);
+}
+
+/*
+ * The generic callback function associated with kernel threads which
+ * are always considered safe.
+ */
+/* ARGSUSED */
+boolean_t
+callb_generic_cpr_safe(void *arg, int code)
+{
+	return (B_TRUE);
+}
+/*
+ * Prevent additions to callback table.
+ */
+void
+callb_lock_table(void)
+{
+	mutex_enter(&ct->ct_lock);
+	ASSERT(!ct->ct_busy);
+	ct->ct_busy = B_TRUE;
+	mutex_exit(&ct->ct_lock);
+}
+
+/*
+ * Allow additions to callback table.
+ */
+void
+callb_unlock_table(void)
+{
+	mutex_enter(&ct->ct_lock);
+	ASSERT(ct->ct_busy);
+	ct->ct_busy = B_FALSE;
+	cv_broadcast(&ct->ct_busy_cv);
+	mutex_exit(&ct->ct_lock);
+}
+
+SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL);
+SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL);

diff --git a/zfs/module/os/freebsd/spl/list.c b/zfs/module/os/freebsd/spl/list.c
new file mode 100644
index 0000000..62374a4
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/list.c

@@ -0,0 +1,243 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/param.h>
+#include <sys/list.h>
+#include <sys/list_impl.h>
+#include <sys/types.h>
+#include <sys/debug.h>
+
+#define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define	list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+#define	list_insert_after_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_prev = (node);			\
+	lnew->list_next = (node)->list_next;		\
+	(node)->list_next->list_prev = lnew;		\
+	(node)->list_next = lnew;			\
+}
+
+#define	list_insert_before_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_next = (node);			\
+	lnew->list_prev = (node)->list_prev;		\
+	(node)->list_prev->list_next = lnew;		\
+	(node)->list_prev = lnew;			\
+}
+
+#define	list_remove_node(node)					\
+	(node)->list_prev->list_next = (node)->list_next;	\
+	(node)->list_next->list_prev = (node)->list_prev;	\
+	(node)->list_next = (node)->list_prev = NULL
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+	ASSERT3P(list, !=, NULL);
+	ASSERT3U(size, >=, offset + sizeof (list_node_t));
+
+	list->list_size = size;
+	list->list_offset = offset;
+	list->list_head.list_next = list->list_head.list_prev =
+	    &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+	list_node_t *node = &list->list_head;
+
+	ASSERT3P(list, !=, NULL);
+	ASSERT3P(list->list_head.list_next, ==, node);
+	ASSERT3P(list->list_head.list_prev, ==, node);
+
+	node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_head(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_after_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_tail(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_before_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+	list_node_t *lold = list_d2l(list, object);
+	ASSERT(!list_empty(list));
+	ASSERT3P(lold->list_next, !=, NULL);
+	list_remove_node(lold);
+}
+
+void *
+list_remove_head(list_t *list)
+{
+	list_node_t *head = list->list_head.list_next;
+	if (head == &list->list_head)
+		return (NULL);
+	list_remove_node(head);
+	return (list_object(list, head));
+}
+
+void *
+list_remove_tail(list_t *list)
+{
+	list_node_t *tail = list->list_head.list_prev;
+	if (tail == &list->list_head)
+		return (NULL);
+	list_remove_node(tail);
+	return (list_object(list, tail));
+}
+
+void *
+list_head(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_next != &list->list_head)
+		return (list_object(list, node->list_next));
+
+	return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_prev != &list->list_head)
+		return (list_object(list, node->list_prev));
+
+	return (NULL);
+}
+
+/*
+ *  Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+	list_node_t *dstnode = &dst->list_head;
+	list_node_t *srcnode = &src->list_head;
+
+	ASSERT3U(dst->list_size, ==, src->list_size);
+	ASSERT3U(dst->list_offset, ==, src->list_offset);
+
+	if (list_empty(src))
+		return;
+
+	dstnode->list_prev->list_next = srcnode->list_next;
+	srcnode->list_next->list_prev = dstnode->list_prev;
+	dstnode->list_prev = srcnode->list_prev;
+	srcnode->list_prev->list_next = dstnode;
+
+	/* empty src list */
+	srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+void
+list_link_replace(list_node_t *lold, list_node_t *lnew)
+{
+	ASSERT(list_link_active(lold));
+	ASSERT(!list_link_active(lnew));
+
+	lnew->list_next = lold->list_next;
+	lnew->list_prev = lold->list_prev;
+	lold->list_prev->list_next = lnew;
+	lold->list_next->list_prev = lnew;
+	lold->list_next = lold->list_prev = NULL;
+}
+
+void
+list_link_init(list_node_t *link)
+{
+	link->list_next = NULL;
+	link->list_prev = NULL;
+}
+
+int
+list_link_active(list_node_t *link)
+{
+	EQUIV(link->list_next == NULL, link->list_prev == NULL);
+	return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+	return (list_empty(list));
+}

diff --git a/zfs/module/os/freebsd/spl/sha224.h b/zfs/module/os/freebsd/spl/sha224.h
new file mode 100644
index 0000000..0abd430
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha224.h

@@ -0,0 +1,96 @@
+/*
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SHA224_H_
+#define	_SHA224_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define	SHA224_BLOCK_LENGTH		64
+#define	SHA224_DIGEST_LENGTH		28
+#define	SHA224_DIGEST_STRING_LENGTH	(SHA224_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA224Context {
+	uint32_t state[8];
+	uint64_t count;
+	uint8_t buf[SHA224_BLOCK_LENGTH];
+} SHA224_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+
+#ifndef SHA224_Init
+#define	SHA224_Init		_libmd_SHA224_Init
+#endif
+#ifndef SHA224_Update
+#define	SHA224_Update		_libmd_SHA224_Update
+#endif
+#ifndef SHA224_Final
+#define	SHA224_Final		_libmd_SHA224_Final
+#endif
+#ifndef SHA224_End
+#define	SHA224_End		_libmd_SHA224_End
+#endif
+#ifndef SHA224_Fd
+#define	SHA224_Fd		_libmd_SHA224_Fd
+#endif
+#ifndef SHA224_FdChunk
+#define	SHA224_FdChunk		_libmd_SHA224_FdChunk
+#endif
+#ifndef SHA224_File
+#define	SHA224_File		_libmd_SHA224_File
+#endif
+#ifndef SHA224_FileChunk
+#define	SHA224_FileChunk	_libmd_SHA224_FileChunk
+#endif
+#ifndef SHA224_Data
+#define	SHA224_Data		_libmd_SHA224_Data
+#endif
+
+#ifndef SHA224_version
+#define	SHA224_version		_libmd_SHA224_version
+#endif
+
+void	SHA224_Init(SHA224_CTX *);
+void	SHA224_Update(SHA224_CTX *, const void *, size_t);
+void	SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)],
+    SHA224_CTX *);
+#ifndef _KERNEL
+char   *SHA224_End(SHA224_CTX *, char *);
+char   *SHA224_Data(const void *, unsigned int, char *);
+char   *SHA224_Fd(int, char *);
+char   *SHA224_FdChunk(int, char *, off_t, off_t);
+char   *SHA224_File(const char *, char *);
+char   *SHA224_FileChunk(const char *, char *, off_t, off_t);
+#endif
+__END_DECLS
+
+#endif /* !_SHA224_H_ */

diff --git a/zfs/module/os/freebsd/spl/sha256.h b/zfs/module/os/freebsd/spl/sha256.h
new file mode 100644
index 0000000..193c0c0
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha256.h

@@ -0,0 +1,99 @@
+/*
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA256_H_
+#define	_SHA256_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define	SHA256_BLOCK_LENGTH		64
+#define	SHA256_DIGEST_LENGTH		32
+#define	SHA256_DIGEST_STRING_LENGTH	(SHA256_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA256Context {
+	uint32_t state[8];
+	uint64_t count;
+	uint8_t buf[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+
+#ifndef SHA256_Init
+#define	SHA256_Init		_libmd_SHA256_Init
+#endif
+#ifndef SHA256_Update
+#define	SHA256_Update		_libmd_SHA256_Update
+#endif
+#ifndef SHA256_Final
+#define	SHA256_Final		_libmd_SHA256_Final
+#endif
+#ifndef SHA256_End
+#define	SHA256_End		_libmd_SHA256_End
+#endif
+#ifndef SHA256_Fd
+#define	SHA256_Fd		_libmd_SHA256_Fd
+#endif
+#ifndef SHA256_FdChunk
+#define	SHA256_FdChunk		_libmd_SHA256_FdChunk
+#endif
+#ifndef SHA256_File
+#define	SHA256_File		_libmd_SHA256_File
+#endif
+#ifndef SHA256_FileChunk
+#define	SHA256_FileChunk	_libmd_SHA256_FileChunk
+#endif
+#ifndef SHA256_Data
+#define	SHA256_Data		_libmd_SHA256_Data
+#endif
+
+#ifndef SHA256_Transform
+#define	SHA256_Transform	_libmd_SHA256_Transform
+#endif
+#ifndef SHA256_version
+#define	SHA256_version		_libmd_SHA256_version
+#endif
+
+void	SHA256_Init(SHA256_CTX *);
+void	SHA256_Update(SHA256_CTX *, const void *, size_t);
+void	SHA256_Final(unsigned char [__min_size(SHA256_DIGEST_LENGTH)],
+    SHA256_CTX *);
+#ifndef _KERNEL
+char   *SHA256_End(SHA256_CTX *, char *);
+char   *SHA256_Data(const void *, unsigned int, char *);
+char   *SHA256_Fd(int, char *);
+char   *SHA256_FdChunk(int, char *, off_t, off_t);
+char   *SHA256_File(const char *, char *);
+char   *SHA256_FileChunk(const char *, char *, off_t, off_t);
+#endif
+__END_DECLS
+
+#endif /* !_SHA256_H_ */

diff --git a/zfs/module/os/freebsd/spl/sha256c.c b/zfs/module/os/freebsd/spl/sha256c.c
new file mode 100644
index 0000000..241cf8c
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha256c.c

@@ -0,0 +1,378 @@
+/*
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+
+#include <sys/byteorder.h>
+#include <sys/endian.h>
+#include "sha224.h"
+#include "sha256.h"
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+/* Copy a vector of big-endian uint32_t into a vector of bytes */
+#define	be32enc_vect(dst, src, len)	\
+	memcpy((void *)dst, (const void *)src, (size_t)len)
+
+/* Copy a vector of bytes into a vector of big-endian uint32_t */
+#define	be32dec_vect(dst, src, len)	\
+	memcpy((void *)dst, (const void *)src, (size_t)len)
+
+#else /* BYTE_ORDER != BIG_ENDIAN */
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+ */
+static void
+be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len / 4; i++)
+		be32enc(dst + i * 4, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint32_t).  Assumes len is a multiple of 4.
+ */
+static void
+be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len / 4; i++)
+		dst[i] = be32dec(src + i * 4);
+}
+
+#endif /* BYTE_ORDER != BIG_ENDIAN */
+
+/* SHA256 round constants. */
+static const uint32_t K[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+/* Elementary functions used by SHA256 */
+#define	Ch(x, y, z)	((x & (y ^ z)) ^ z)
+#define	Maj(x, y, z)	((x & (y | z)) | (y & z))
+#define	SHR(x, n)	(x >> n)
+#define	ROTR(x, n)	((x >> n) | (x << (32 - n)))
+#define	S0(x)		(ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define	S1(x)		(ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define	s0(x)		(ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
+#define	s1(x)		(ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
+
+/* SHA256 round function */
+#define	RND(a, b, c, d, e, f, g, h, k)			\
+	h += S1(e) + Ch(e, f, g) + k;			\
+	d += h;						\
+	h += S0(a) + Maj(a, b, c);
+
+/* Adjusted round function for rotating state */
+#define	RNDr(S, W, i, ii)			\
+	RND(S[(64 - i) % 8], S[(65 - i) % 8],	\
+	    S[(66 - i) % 8], S[(67 - i) % 8],	\
+	    S[(68 - i) % 8], S[(69 - i) % 8],	\
+	    S[(70 - i) % 8], S[(71 - i) % 8],	\
+	    W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define	MSCH(W, ii, i)				\
+	W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] +	\
+		s0(W[i + ii + 1]) + W[i + ii]
+
+/*
+ * SHA256 block compression function.  The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA256_Transform(uint32_t *state, const unsigned char block[64])
+{
+	uint32_t W[64];
+	uint32_t S[8];
+	int i;
+
+	/* 1. Prepare the first part of the message schedule W. */
+	be32dec_vect(W, block, 64);
+
+	/* 2. Initialize working variables. */
+	memcpy(S, state, 32);
+
+	/* 3. Mix. */
+	for (i = 0; i < 64; i += 16) {
+		RNDr(S, W, 0, i);
+		RNDr(S, W, 1, i);
+		RNDr(S, W, 2, i);
+		RNDr(S, W, 3, i);
+		RNDr(S, W, 4, i);
+		RNDr(S, W, 5, i);
+		RNDr(S, W, 6, i);
+		RNDr(S, W, 7, i);
+		RNDr(S, W, 8, i);
+		RNDr(S, W, 9, i);
+		RNDr(S, W, 10, i);
+		RNDr(S, W, 11, i);
+		RNDr(S, W, 12, i);
+		RNDr(S, W, 13, i);
+		RNDr(S, W, 14, i);
+		RNDr(S, W, 15, i);
+
+		if (i == 48)
+			break;
+		MSCH(W, 0, i);
+		MSCH(W, 1, i);
+		MSCH(W, 2, i);
+		MSCH(W, 3, i);
+		MSCH(W, 4, i);
+		MSCH(W, 5, i);
+		MSCH(W, 6, i);
+		MSCH(W, 7, i);
+		MSCH(W, 8, i);
+		MSCH(W, 9, i);
+		MSCH(W, 10, i);
+		MSCH(W, 11, i);
+		MSCH(W, 12, i);
+		MSCH(W, 13, i);
+		MSCH(W, 14, i);
+		MSCH(W, 15, i);
+	}
+
+	/* 4. Mix local working variables into global state */
+	for (i = 0; i < 8; i++)
+		state[i] += S[i];
+}
+
+static unsigned char PAD[64] = {
+	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Add padding and terminating bit-count. */
+static void
+SHA256_Pad(SHA256_CTX * ctx)
+{
+	size_t r;
+
+	/* Figure out how many bytes we have buffered. */
+	r = (ctx->count >> 3) & 0x3f;
+
+	/* Pad to 56 mod 64, transforming if we finish a block en route. */
+	if (r < 56) {
+		/* Pad to 56 mod 64. */
+		memcpy(&ctx->buf[r], PAD, 56 - r);
+	} else {
+		/* Finish the current block and mix. */
+		memcpy(&ctx->buf[r], PAD, 64 - r);
+		SHA256_Transform(ctx->state, ctx->buf);
+
+		/* The start of the final block is all zeroes. */
+		memset(&ctx->buf[0], 0, 56);
+	}
+
+	/* Add the terminating bit-count. */
+	be64enc(&ctx->buf[56], ctx->count);
+
+	/* Mix in the final block. */
+	SHA256_Transform(ctx->state, ctx->buf);
+}
+
+/* SHA-256 initialization.  Begins a SHA-256 operation. */
+void
+SHA256_Init(SHA256_CTX * ctx)
+{
+
+	/* Zero bits processed so far */
+	ctx->count = 0;
+
+	/* Magic initialization constants */
+	ctx->state[0] = 0x6A09E667;
+	ctx->state[1] = 0xBB67AE85;
+	ctx->state[2] = 0x3C6EF372;
+	ctx->state[3] = 0xA54FF53A;
+	ctx->state[4] = 0x510E527F;
+	ctx->state[5] = 0x9B05688C;
+	ctx->state[6] = 0x1F83D9AB;
+	ctx->state[7] = 0x5BE0CD19;
+}
+
+/* Add bytes into the hash */
+void
+SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
+{
+	uint64_t bitlen;
+	uint32_t r;
+	const unsigned char *src = in;
+
+	/* Number of bytes left in the buffer from previous updates */
+	r = (ctx->count >> 3) & 0x3f;
+
+	/* Convert the length into a number of bits */
+	bitlen = len << 3;
+
+	/* Update number of bits */
+	ctx->count += bitlen;
+
+	/* Handle the case where we don't need to perform any transforms */
+	if (len < 64 - r) {
+		memcpy(&ctx->buf[r], src, len);
+		return;
+	}
+
+	/* Finish the current block */
+	memcpy(&ctx->buf[r], src, 64 - r);
+	SHA256_Transform(ctx->state, ctx->buf);
+	src += 64 - r;
+	len -= 64 - r;
+
+	/* Perform complete blocks */
+	while (len >= 64) {
+		SHA256_Transform(ctx->state, src);
+		src += 64;
+		len -= 64;
+	}
+
+	/* Copy left over data into buffer */
+	memcpy(ctx->buf, src, len);
+}
+
+/*
+ * SHA-256 finalization.  Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
+{
+
+	/* Add padding */
+	SHA256_Pad(ctx);
+
+	/* Write the hash */
+	be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
+
+	/* Clear the context state */
+	explicit_bzero(ctx, sizeof (*ctx));
+}
+
+/* SHA-224: ******************************************************* */
+/*
+ * the SHA224 and SHA256 transforms are identical
+ */
+
+/* SHA-224 initialization.  Begins a SHA-224 operation. */
+void
+SHA224_Init(SHA224_CTX * ctx)
+{
+
+	/* Zero bits processed so far */
+	ctx->count = 0;
+
+	/* Magic initialization constants */
+	ctx->state[0] = 0xC1059ED8;
+	ctx->state[1] = 0x367CD507;
+	ctx->state[2] = 0x3070DD17;
+	ctx->state[3] = 0xF70E5939;
+	ctx->state[4] = 0xFFC00B31;
+	ctx->state[5] = 0x68581511;
+	ctx->state[6] = 0x64f98FA7;
+	ctx->state[7] = 0xBEFA4FA4;
+}
+
+/* Add bytes into the SHA-224 hash */
+void
+SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len)
+{
+
+	SHA256_Update((SHA256_CTX *)ctx, in, len);
+}
+
+/*
+ * SHA-224 finalization.  Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx)
+{
+
+	/* Add padding */
+	SHA256_Pad((SHA256_CTX *)ctx);
+
+	/* Write the hash */
+	be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH);
+
+	/* Clear the context state */
+	explicit_bzero(ctx, sizeof (*ctx));
+}
+
+#ifdef WEAK_REFS
+/*
+ * When building libmd, provide weak references. Note: this is not
+ * activated in the context of compiling these sources for internal
+ * use in libcrypt.
+ */
+#undef SHA256_Init
+__weak_reference(_libmd_SHA256_Init, SHA256_Init);
+#undef SHA256_Update
+__weak_reference(_libmd_SHA256_Update, SHA256_Update);
+#undef SHA256_Final
+__weak_reference(_libmd_SHA256_Final, SHA256_Final);
+#undef SHA256_Transform
+__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
+
+#undef SHA224_Init
+__weak_reference(_libmd_SHA224_Init, SHA224_Init);
+#undef SHA224_Update
+__weak_reference(_libmd_SHA224_Update, SHA224_Update);
+#undef SHA224_Final
+__weak_reference(_libmd_SHA224_Final, SHA224_Final);
+#endif

diff --git a/zfs/module/os/freebsd/spl/sha384.h b/zfs/module/os/freebsd/spl/sha384.h
new file mode 100644
index 0000000..67250ce
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha384.h

@@ -0,0 +1,96 @@
+/*
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA384_H_
+#define	_SHA384_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define	SHA384_BLOCK_LENGTH		128
+#define	SHA384_DIGEST_LENGTH		48
+#define	SHA384_DIGEST_STRING_LENGTH	(SHA384_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA384Context {
+	uint64_t state[8];
+	uint64_t count[2];
+	uint8_t buf[SHA384_BLOCK_LENGTH];
+} SHA384_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#ifndef SHA384_Init
+#define	SHA384_Init		_libmd_SHA384_Init
+#endif
+#ifndef SHA384_Update
+#define	SHA384_Update		_libmd_SHA384_Update
+#endif
+#ifndef SHA384_Final
+#define	SHA384_Final		_libmd_SHA384_Final
+#endif
+#ifndef SHA384_End
+#define	SHA384_End		_libmd_SHA384_End
+#endif
+#ifndef SHA384_Fd
+#define	SHA384_Fd		_libmd_SHA384_Fd
+#endif
+#ifndef SHA384_FdChunk
+#define	SHA384_FdChunk		_libmd_SHA384_FdChunk
+#endif
+#ifndef SHA384_File
+#define	SHA384_File		_libmd_SHA384_File
+#endif
+#ifndef SHA384_FileChunk
+#define	SHA384_FileChunk	_libmd_SHA384_FileChunk
+#endif
+#ifndef SHA384_Data
+#define	SHA384_Data		_libmd_SHA384_Data
+#endif
+
+#ifndef SHA384_version
+#define	SHA384_version		_libmd_SHA384_version
+#endif
+
+void	SHA384_Init(SHA384_CTX *);
+void	SHA384_Update(SHA384_CTX *, const void *, size_t);
+void	SHA384_Final(unsigned char [__min_size(SHA384_DIGEST_LENGTH)],
+    SHA384_CTX *);
+#ifndef _KERNEL
+char   *SHA384_End(SHA384_CTX *, char *);
+char   *SHA384_Data(const void *, unsigned int, char *);
+char   *SHA384_Fd(int, char *);
+char   *SHA384_FdChunk(int, char *, off_t, off_t);
+char   *SHA384_File(const char *, char *);
+char   *SHA384_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+__END_DECLS
+
+#endif /* !_SHA384_H_ */

diff --git a/zfs/module/os/freebsd/spl/sha512.h b/zfs/module/os/freebsd/spl/sha512.h
new file mode 100644
index 0000000..b6fb733
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha512.h

@@ -0,0 +1,101 @@
+/*
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA512_H_
+#define	_SHA512_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define	SHA512_BLOCK_LENGTH		128
+#define	SHA512_DIGEST_LENGTH		64
+#define	SHA512_DIGEST_STRING_LENGTH	(SHA512_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA512Context {
+	uint64_t state[8];
+	uint64_t count[2];
+	uint8_t buf[SHA512_BLOCK_LENGTH];
+} SHA512_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#if 0
+#ifndef SHA512_Init
+#define	SHA512_Init		_libmd_SHA512_Init
+#endif
+#ifndef SHA512_Update
+#define	SHA512_Update		_libmd_SHA512_Update
+#endif
+#ifndef SHA512_Final
+#define	SHA512_Final		_libmd_SHA512_Final
+#endif
+#endif
+#ifndef SHA512_End
+#define	SHA512_End		_libmd_SHA512_End
+#endif
+#ifndef SHA512_Fd
+#define	SHA512_Fd		_libmd_SHA512_Fd
+#endif
+#ifndef SHA512_FdChunk
+#define	SHA512_FdChunk		_libmd_SHA512_FdChunk
+#endif
+#ifndef SHA512_File
+#define	SHA512_File		_libmd_SHA512_File
+#endif
+#ifndef SHA512_FileChunk
+#define	SHA512_FileChunk	_libmd_SHA512_FileChunk
+#endif
+#ifndef SHA512_Data
+#define	SHA512_Data		_libmd_SHA512_Data
+#endif
+
+#ifndef SHA512_Transform
+#define	SHA512_Transform	_libmd_SHA512_Transform
+#endif
+#ifndef SHA512_version
+#define	SHA512_version		_libmd_SHA512_version
+#endif
+
+void	SHA512_Init(SHA512_CTX *);
+void	SHA512_Update(SHA512_CTX *, const void *, size_t);
+void	SHA512_Final(unsigned char [__min_size(SHA512_DIGEST_LENGTH)],
+    SHA512_CTX *);
+#ifndef _KERNEL
+char   *SHA512_End(SHA512_CTX *, char *);
+char   *SHA512_Data(const void *, unsigned int, char *);
+char   *SHA512_Fd(int, char *);
+char   *SHA512_FdChunk(int, char *, off_t, off_t);
+char   *SHA512_File(const char *, char *);
+char   *SHA512_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+__END_DECLS
+
+#endif /* !_SHA512_H_ */

diff --git a/zfs/module/os/freebsd/spl/sha512c.c b/zfs/module/os/freebsd/spl/sha512c.c
new file mode 100644
index 0000000..146f338
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha512c.c

@@ -0,0 +1,508 @@
+/*
+ * Copyright 2005 Colin Percival
+ * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include "sha512.h"
+#include "sha512t.h"
+#include "sha384.h"
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+/* Copy a vector of big-endian uint64_t into a vector of bytes */
+#define	be64enc_vect(dst, src, len)	\
+	memcpy((void *)dst, (const void *)src, (size_t)len)
+
+/* Copy a vector of bytes into a vector of big-endian uint64_t */
+#define	be64dec_vect(dst, src, len)	\
+	memcpy((void *)dst, (const void *)src, (size_t)len)
+
+#else /* BYTE_ORDER != BIG_ENDIAN */
+
+/*
+ * Encode a length len/4 vector of (uint64_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 8.
+ */
+static void
+be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len / 8; i++)
+		be64enc(dst + i * 8, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint64_t).  Assumes len is a multiple of 8.
+ */
+static void
+be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len / 8; i++)
+		dst[i] = be64dec(src + i * 8);
+}
+
+#endif /* BYTE_ORDER != BIG_ENDIAN */
+
+/* SHA512 round constants. */
+static const uint64_t K[80] = {
+	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* Elementary functions used by SHA512 */
+#define	Ch(x, y, z)	((x & (y ^ z)) ^ z)
+#define	Maj(x, y, z)	((x & (y | z)) | (y & z))
+#define	SHR(x, n)	(x >> n)
+#define	ROTR(x, n)	((x >> n) | (x << (64 - n)))
+#define	S0(x)		(ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
+#define	S1(x)		(ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
+#define	s0(x)		(ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
+#define	s1(x)		(ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
+
+/* SHA512 round function */
+#define	RND(a, b, c, d, e, f, g, h, k)			\
+	h += S1(e) + Ch(e, f, g) + k;			\
+	d += h;						\
+	h += S0(a) + Maj(a, b, c);
+
+/* Adjusted round function for rotating state */
+#define	RNDr(S, W, i, ii)			\
+	RND(S[(80 - i) % 8], S[(81 - i) % 8],	\
+	    S[(82 - i) % 8], S[(83 - i) % 8],	\
+	    S[(84 - i) % 8], S[(85 - i) % 8],	\
+	    S[(86 - i) % 8], S[(87 - i) % 8],	\
+	    W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define	MSCH(W, ii, i)				\
+	W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] +	\
+		s0(W[i + ii + 1]) + W[i + ii]
+
+/*
+ * SHA512 block compression function.  The 512-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA512_Transform(uint64_t *state,
+    const unsigned char block[SHA512_BLOCK_LENGTH])
+{
+	uint64_t W[80];
+	uint64_t S[8];
+	int i;
+
+	/* 1. Prepare the first part of the message schedule W. */
+	be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
+
+	/* 2. Initialize working variables. */
+	memcpy(S, state, SHA512_DIGEST_LENGTH);
+
+	/* 3. Mix. */
+	for (i = 0; i < 80; i += 16) {
+		RNDr(S, W, 0, i);
+		RNDr(S, W, 1, i);
+		RNDr(S, W, 2, i);
+		RNDr(S, W, 3, i);
+		RNDr(S, W, 4, i);
+		RNDr(S, W, 5, i);
+		RNDr(S, W, 6, i);
+		RNDr(S, W, 7, i);
+		RNDr(S, W, 8, i);
+		RNDr(S, W, 9, i);
+		RNDr(S, W, 10, i);
+		RNDr(S, W, 11, i);
+		RNDr(S, W, 12, i);
+		RNDr(S, W, 13, i);
+		RNDr(S, W, 14, i);
+		RNDr(S, W, 15, i);
+
+		if (i == 64)
+			break;
+		MSCH(W, 0, i);
+		MSCH(W, 1, i);
+		MSCH(W, 2, i);
+		MSCH(W, 3, i);
+		MSCH(W, 4, i);
+		MSCH(W, 5, i);
+		MSCH(W, 6, i);
+		MSCH(W, 7, i);
+		MSCH(W, 8, i);
+		MSCH(W, 9, i);
+		MSCH(W, 10, i);
+		MSCH(W, 11, i);
+		MSCH(W, 12, i);
+		MSCH(W, 13, i);
+		MSCH(W, 14, i);
+		MSCH(W, 15, i);
+	}
+
+	/* 4. Mix local working variables into global state */
+	for (i = 0; i < 8; i++)
+		state[i] += S[i];
+}
+
+static unsigned char PAD[SHA512_BLOCK_LENGTH] = {
+	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Add padding and terminating bit-count. */
+static void
+SHA512_Pad(SHA512_CTX * ctx)
+{
+	size_t r;
+
+	/* Figure out how many bytes we have buffered. */
+	r = (ctx->count[1] >> 3) & 0x7f;
+
+	/* Pad to 112 mod 128, transforming if we finish a block en route. */
+	if (r < 112) {
+		/* Pad to 112 mod 128. */
+		memcpy(&ctx->buf[r], PAD, 112 - r);
+	} else {
+		/* Finish the current block and mix. */
+		memcpy(&ctx->buf[r], PAD, 128 - r);
+		SHA512_Transform(ctx->state, ctx->buf);
+
+		/* The start of the final block is all zeroes. */
+		memset(&ctx->buf[0], 0, 112);
+	}
+
+	/* Add the terminating bit-count. */
+	be64enc_vect(&ctx->buf[112], ctx->count, 16);
+
+	/* Mix in the final block. */
+	SHA512_Transform(ctx->state, ctx->buf);
+}
+
+/* SHA-512 initialization.  Begins a SHA-512 operation. */
+void
+SHA512_Init(SHA512_CTX * ctx)
+{
+
+	/* Zero bits processed so far */
+	ctx->count[0] = ctx->count[1] = 0;
+
+	/* Magic initialization constants */
+	ctx->state[0] = 0x6a09e667f3bcc908ULL;
+	ctx->state[1] = 0xbb67ae8584caa73bULL;
+	ctx->state[2] = 0x3c6ef372fe94f82bULL;
+	ctx->state[3] = 0xa54ff53a5f1d36f1ULL;
+	ctx->state[4] = 0x510e527fade682d1ULL;
+	ctx->state[5] = 0x9b05688c2b3e6c1fULL;
+	ctx->state[6] = 0x1f83d9abfb41bd6bULL;
+	ctx->state[7] = 0x5be0cd19137e2179ULL;
+}
+
+/* Add bytes into the hash */
+void
+SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len)
+{
+	uint64_t bitlen[2];
+	uint64_t r;
+	const unsigned char *src = in;
+
+	/* Number of bytes left in the buffer from previous updates */
+	r = (ctx->count[1] >> 3) & 0x7f;
+
+	/* Convert the length into a number of bits */
+	bitlen[1] = ((uint64_t)len) << 3;
+	bitlen[0] = ((uint64_t)len) >> 61;
+
+	/* Update number of bits */
+	if ((ctx->count[1] += bitlen[1]) < bitlen[1])
+		ctx->count[0]++;
+	ctx->count[0] += bitlen[0];
+
+	/* Handle the case where we don't need to perform any transforms */
+	if (len < SHA512_BLOCK_LENGTH - r) {
+		memcpy(&ctx->buf[r], src, len);
+		return;
+	}
+
+	/* Finish the current block */
+	memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r);
+	SHA512_Transform(ctx->state, ctx->buf);
+	src += SHA512_BLOCK_LENGTH - r;
+	len -= SHA512_BLOCK_LENGTH - r;
+
+	/* Perform complete blocks */
+	while (len >= SHA512_BLOCK_LENGTH) {
+		SHA512_Transform(ctx->state, src);
+		src += SHA512_BLOCK_LENGTH;
+		len -= SHA512_BLOCK_LENGTH;
+	}
+
+	/* Copy left over data into buffer */
+	memcpy(ctx->buf, src, len);
+}
+
+/*
+ * SHA-512 finalization.  Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
+{
+
+	/* Add padding */
+	SHA512_Pad(ctx);
+
+	/* Write the hash */
+	be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
+
+	/* Clear the context state */
+	explicit_bzero(ctx, sizeof (*ctx));
+}
+
+/* SHA-512t: ******************************************************** */
+/*
+ * the SHA512t transforms are identical to SHA512 so reuse the existing function
+ */
+void
+SHA512_224_Init(SHA512_CTX * ctx)
+{
+
+	/* Zero bits processed so far */
+	ctx->count[0] = ctx->count[1] = 0;
+
+	/* Magic initialization constants */
+	ctx->state[0] = 0x8c3d37c819544da2ULL;
+	ctx->state[1] = 0x73e1996689dcd4d6ULL;
+	ctx->state[2] = 0x1dfab7ae32ff9c82ULL;
+	ctx->state[3] = 0x679dd514582f9fcfULL;
+	ctx->state[4] = 0x0f6d2b697bd44da8ULL;
+	ctx->state[5] = 0x77e36f7304c48942ULL;
+	ctx->state[6] = 0x3f9d85a86a1d36c8ULL;
+	ctx->state[7] = 0x1112e6ad91d692a1ULL;
+}
+
+void
+SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len)
+{
+
+	SHA512_Update(ctx, in, len);
+}
+
+void
+SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH],
+    SHA512_CTX *ctx)
+{
+
+	/* Add padding */
+	SHA512_Pad(ctx);
+
+	/* Write the hash */
+	be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
+
+	/* Clear the context state */
+	explicit_bzero(ctx, sizeof (*ctx));
+}
+
+void
+SHA512_256_Init(SHA512_CTX * ctx)
+{
+
+	/* Zero bits processed so far */
+	ctx->count[0] = ctx->count[1] = 0;
+
+	/* Magic initialization constants */
+	ctx->state[0] = 0x22312194fc2bf72cULL;
+	ctx->state[1] = 0x9f555fa3c84c64c2ULL;
+	ctx->state[2] = 0x2393b86b6f53b151ULL;
+	ctx->state[3] = 0x963877195940eabdULL;
+	ctx->state[4] = 0x96283ee2a88effe3ULL;
+	ctx->state[5] = 0xbe5e1e2553863992ULL;
+	ctx->state[6] = 0x2b0199fc2c85b8aaULL;
+	ctx->state[7] = 0x0eb72ddc81c52ca2ULL;
+}
+
+void
+SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len)
+{
+
+	SHA512_Update(ctx, in, len);
+}
+
+void
+SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH],
+    SHA512_CTX * ctx)
+{
+
+	/* Add padding */
+	SHA512_Pad(ctx);
+
+	/* Write the hash */
+	be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
+
+	/* Clear the context state */
+	explicit_bzero(ctx, sizeof (*ctx));
+}
+
+/* ** SHA-384: ******************************************************** */
+/*
+ * the SHA384 and SHA512 transforms are identical, so SHA384 is skipped
+ */
+
+/* SHA-384 initialization.  Begins a SHA-384 operation. */
+void
+SHA384_Init(SHA384_CTX * ctx)
+{
+
+	/* Zero bits processed so far */
+	ctx->count[0] = ctx->count[1] = 0;
+
+	/* Magic initialization constants */
+	ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
+	ctx->state[1] = 0x629a292a367cd507ULL;
+	ctx->state[2] = 0x9159015a3070dd17ULL;
+	ctx->state[3] = 0x152fecd8f70e5939ULL;
+	ctx->state[4] = 0x67332667ffc00b31ULL;
+	ctx->state[5] = 0x8eb44a8768581511ULL;
+	ctx->state[6] = 0xdb0c2e0d64f98fa7ULL;
+	ctx->state[7] = 0x47b5481dbefa4fa4ULL;
+}
+
+/* Add bytes into the SHA-384 hash */
+void
+SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len)
+{
+
+	SHA512_Update((SHA512_CTX *)ctx, in, len);
+}
+
+/*
+ * SHA-384 finalization.  Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
+{
+
+	/* Add padding */
+	SHA512_Pad((SHA512_CTX *)ctx);
+
+	/* Write the hash */
+	be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
+
+	/* Clear the context state */
+	explicit_bzero(ctx, sizeof (*ctx));
+}
+
+#if 0
+/*
+ * When building libmd, provide weak references. Note: this is not
+ * activated in the context of compiling these sources for internal
+ * use in libcrypt.
+ */
+#undef SHA512_Init
+__weak_reference(_libmd_SHA512_Init, SHA512_Init);
+#undef SHA512_Update
+__weak_reference(_libmd_SHA512_Update, SHA512_Update);
+#undef SHA512_Final
+__weak_reference(_libmd_SHA512_Final, SHA512_Final);
+#undef SHA512_Transform
+__weak_reference(_libmd_SHA512_Transform, SHA512_Transform);
+
+#undef SHA512_224_Init
+__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init);
+#undef SHA512_224_Update
+__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update);
+#undef SHA512_224_Final
+__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final);
+
+#undef SHA512_256_Init
+__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init);
+#undef SHA512_256_Update
+__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update);
+#undef SHA512_256_Final
+__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final);
+
+#undef SHA384_Init
+__weak_reference(_libmd_SHA384_Init, SHA384_Init);
+#undef SHA384_Update
+__weak_reference(_libmd_SHA384_Update, SHA384_Update);
+#undef SHA384_Final
+__weak_reference(_libmd_SHA384_Final, SHA384_Final);
+#endif

diff --git a/zfs/module/os/freebsd/spl/sha512t.h b/zfs/module/os/freebsd/spl/sha512t.h
new file mode 100644
index 0000000..703867f
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/sha512t.h

@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA512T_H_
+#define	_SHA512T_H_
+
+#include "sha512.h"
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define	SHA512_224_DIGEST_LENGTH	28
+#define	SHA512_224_DIGEST_STRING_LENGTH	(SHA512_224_DIGEST_LENGTH * 2 + 1)
+#define	SHA512_256_DIGEST_LENGTH	32
+#define	SHA512_256_DIGEST_STRING_LENGTH	(SHA512_256_DIGEST_LENGTH * 2 + 1)
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#ifndef SHA512_224_Init
+#define	SHA512_224_Init		_libmd_SHA512_224_Init
+#endif
+#ifndef SHA512_224_Update
+#define	SHA512_224_Update	_libmd_SHA512_224_Update
+#endif
+#ifndef SHA512_224_Final
+#define	SHA512_224_Final	_libmd_SHA512_224_Final
+#endif
+#ifndef SHA512_224_End
+#define	SHA512_224_End		_libmd_SHA512_224_End
+#endif
+#ifndef SHA512_224_Fd
+#define	SHA512_224_Fd		_libmd_SHA512_224_Fd
+#endif
+#ifndef SHA512_224_FdChunk
+#define	SHA512_224_FdChunk	_libmd_SHA512_224_FdChunk
+#endif
+#ifndef SHA512_224_File
+#define	SHA512_224_File		_libmd_SHA512_224_File
+#endif
+#ifndef SHA512_224_FileChunk
+#define	SHA512_224_FileChunk	_libmd_SHA512_224_FileChunk
+#endif
+#ifndef SHA512_224_Data
+#define	SHA512_224_Data		_libmd_SHA512_224_Data
+#endif
+
+#ifndef SHA512_224_Transform
+#define	SHA512_224_Transform	_libmd_SHA512_224_Transform
+#endif
+#ifndef SHA512_224_version
+#define	SHA512_224_version	_libmd_SHA512_224_version
+#endif
+
+#ifndef SHA512_256_Init
+#define	SHA512_256_Init		_libmd_SHA512_256_Init
+#endif
+#ifndef SHA512_256_Update
+#define	SHA512_256_Update	_libmd_SHA512_256_Update
+#endif
+#ifndef SHA512_256_Final
+#define	SHA512_256_Final	_libmd_SHA512_256_Final
+#endif
+#ifndef SHA512_256_End
+#define	SHA512_256_End		_libmd_SHA512_256_End
+#endif
+#ifndef SHA512_256_Fd
+#define	SHA512_256_Fd		_libmd_SHA512_256_Fd
+#endif
+#ifndef SHA512_256_FdChunk
+#define	SHA512_256_FdChunk	_libmd_SHA512_256_FdChunk
+#endif
+#ifndef SHA512_256_File
+#define	SHA512_256_File		_libmd_SHA512_256_File
+#endif
+#ifndef SHA512_256_FileChunk
+#define	SHA512_256_FileChunk	_libmd_SHA512_256_FileChunk
+#endif
+#ifndef SHA512_256_Data
+#define	SHA512_256_Data		_libmd_SHA512_256_Data
+#endif
+
+#ifndef SHA512_256_Transform
+#define	SHA512_256_Transform	_libmd_SHA512_256_Transform
+#endif
+#ifndef SHA512_256_version
+#define	SHA512_256_version	_libmd_SHA512_256_version
+#endif
+
+void	SHA512_224_Init(SHA512_CTX *);
+void	SHA512_224_Update(SHA512_CTX *, const void *, size_t);
+void	SHA512_224_Final(unsigned char [__min_size(SHA512_224_DIGEST_LENGTH)],
+    SHA512_CTX *);
+#ifndef _KERNEL
+char   *SHA512_224_End(SHA512_CTX *, char *);
+char   *SHA512_224_Data(const void *, unsigned int, char *);
+char   *SHA512_224_Fd(int, char *);
+char   *SHA512_224_FdChunk(int, char *, off_t, off_t);
+char   *SHA512_224_File(const char *, char *);
+char   *SHA512_224_FileChunk(const char *, char *, off_t, off_t);
+#endif
+void	SHA512_256_Init(SHA512_CTX *);
+void	SHA512_256_Update(SHA512_CTX *, const void *, size_t);
+void	SHA512_256_Final(unsigned char [__min_size(SHA512_256_DIGEST_LENGTH)],
+    SHA512_CTX *);
+#ifndef _KERNEL
+char   *SHA512_256_End(SHA512_CTX *, char *);
+char   *SHA512_256_Data(const void *, unsigned int, char *);
+char   *SHA512_256_Fd(int, char *);
+char   *SHA512_256_FdChunk(int, char *, off_t, off_t);
+char   *SHA512_256_File(const char *, char *);
+char   *SHA512_256_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+__END_DECLS
+
+#endif /* !_SHA512T_H_ */

diff --git a/zfs/module/os/freebsd/spl/spl_acl.c b/zfs/module/os/freebsd/spl/spl_acl.c
new file mode 100644
index 0000000..74c26d0
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_acl.c

@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2008, 2009 Edward Tomasz Napierała <trasz@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/errno.h>
+#include <sys/zfs_acl.h>
+#include <sys/acl.h>
+
+struct zfs2bsd {
+	uint32_t	zb_zfs;
+	int		zb_bsd;
+};
+
+struct zfs2bsd perms[] = {{ACE_READ_DATA, ACL_READ_DATA},
+			{ACE_WRITE_DATA, ACL_WRITE_DATA},
+			{ACE_EXECUTE, ACL_EXECUTE},
+			{ACE_APPEND_DATA, ACL_APPEND_DATA},
+			{ACE_DELETE_CHILD, ACL_DELETE_CHILD},
+			{ACE_DELETE, ACL_DELETE},
+			{ACE_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES},
+			{ACE_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES},
+			{ACE_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS},
+			{ACE_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS},
+			{ACE_READ_ACL, ACL_READ_ACL},
+			{ACE_WRITE_ACL, ACL_WRITE_ACL},
+			{ACE_WRITE_OWNER, ACL_WRITE_OWNER},
+			{ACE_SYNCHRONIZE, ACL_SYNCHRONIZE},
+			{0, 0}};
+
+struct zfs2bsd flags[] = {{ACE_FILE_INHERIT_ACE,
+			    ACL_ENTRY_FILE_INHERIT},
+			{ACE_DIRECTORY_INHERIT_ACE,
+			    ACL_ENTRY_DIRECTORY_INHERIT},
+			{ACE_NO_PROPAGATE_INHERIT_ACE,
+			    ACL_ENTRY_NO_PROPAGATE_INHERIT},
+			{ACE_INHERIT_ONLY_ACE,
+			    ACL_ENTRY_INHERIT_ONLY},
+			{ACE_INHERITED_ACE,
+			    ACL_ENTRY_INHERITED},
+			{ACE_SUCCESSFUL_ACCESS_ACE_FLAG,
+			    ACL_ENTRY_SUCCESSFUL_ACCESS},
+			{ACE_FAILED_ACCESS_ACE_FLAG,
+			    ACL_ENTRY_FAILED_ACCESS},
+			{0, 0}};
+
+static int
+_bsd_from_zfs(uint32_t zfs, const struct zfs2bsd *table)
+{
+	const struct zfs2bsd *tmp;
+	int bsd = 0;
+
+	for (tmp = table; tmp->zb_zfs != 0; tmp++) {
+		if (zfs & tmp->zb_zfs)
+			bsd |= tmp->zb_bsd;
+	}
+
+	return (bsd);
+}
+
+static uint32_t
+_zfs_from_bsd(int bsd, const struct zfs2bsd *table)
+{
+	const struct zfs2bsd *tmp;
+	uint32_t zfs = 0;
+
+	for (tmp = table; tmp->zb_bsd != 0; tmp++) {
+		if (bsd & tmp->zb_bsd)
+			zfs |= tmp->zb_zfs;
+	}
+
+	return (zfs);
+}
+
+int
+acl_from_aces(struct acl *aclp, const ace_t *aces, int nentries)
+{
+	int i;
+	struct acl_entry *entry;
+	const ace_t *ace;
+
+	if (nentries < 1) {
+		printf("acl_from_aces: empty ZFS ACL; returning EINVAL.\n");
+		return (EINVAL);
+	}
+
+	if (nentries > ACL_MAX_ENTRIES) {
+		/*
+		 * I believe it may happen only when moving a pool
+		 * from SunOS to FreeBSD.
+		 */
+		printf("acl_from_aces: ZFS ACL too big to fit "
+		    "into 'struct acl'; returning EINVAL.\n");
+		return (EINVAL);
+	}
+
+	bzero(aclp, sizeof (*aclp));
+	aclp->acl_maxcnt = ACL_MAX_ENTRIES;
+	aclp->acl_cnt = nentries;
+
+	for (i = 0; i < nentries; i++) {
+		entry = &(aclp->acl_entry[i]);
+		ace = &(aces[i]);
+
+		if (ace->a_flags & ACE_OWNER)
+			entry->ae_tag = ACL_USER_OBJ;
+		else if (ace->a_flags & ACE_GROUP)
+			entry->ae_tag = ACL_GROUP_OBJ;
+		else if (ace->a_flags & ACE_EVERYONE)
+			entry->ae_tag = ACL_EVERYONE;
+		else if (ace->a_flags & ACE_IDENTIFIER_GROUP)
+			entry->ae_tag = ACL_GROUP;
+		else
+			entry->ae_tag = ACL_USER;
+
+		if (entry->ae_tag == ACL_USER || entry->ae_tag == ACL_GROUP)
+			entry->ae_id = ace->a_who;
+		else
+			entry->ae_id = ACL_UNDEFINED_ID;
+
+		entry->ae_perm = _bsd_from_zfs(ace->a_access_mask, perms);
+		entry->ae_flags = _bsd_from_zfs(ace->a_flags, flags);
+
+		switch (ace->a_type) {
+		case ACE_ACCESS_ALLOWED_ACE_TYPE:
+			entry->ae_entry_type = ACL_ENTRY_TYPE_ALLOW;
+			break;
+		case ACE_ACCESS_DENIED_ACE_TYPE:
+			entry->ae_entry_type = ACL_ENTRY_TYPE_DENY;
+			break;
+		case ACE_SYSTEM_AUDIT_ACE_TYPE:
+			entry->ae_entry_type = ACL_ENTRY_TYPE_AUDIT;
+			break;
+		case ACE_SYSTEM_ALARM_ACE_TYPE:
+			entry->ae_entry_type = ACL_ENTRY_TYPE_ALARM;
+			break;
+		default:
+			panic("acl_from_aces: a_type is 0x%x", ace->a_type);
+		}
+	}
+
+	return (0);
+}
+
+void
+aces_from_acl(ace_t *aces, int *nentries, const struct acl *aclp)
+{
+	int i;
+	const struct acl_entry *entry;
+	ace_t *ace;
+
+	bzero(aces, sizeof (*aces) * aclp->acl_cnt);
+
+	*nentries = aclp->acl_cnt;
+
+	for (i = 0; i < aclp->acl_cnt; i++) {
+		entry = &(aclp->acl_entry[i]);
+		ace = &(aces[i]);
+
+		ace->a_who = entry->ae_id;
+
+		if (entry->ae_tag == ACL_USER_OBJ)
+			ace->a_flags = ACE_OWNER;
+		else if (entry->ae_tag == ACL_GROUP_OBJ)
+			ace->a_flags = (ACE_GROUP | ACE_IDENTIFIER_GROUP);
+		else if (entry->ae_tag == ACL_GROUP)
+			ace->a_flags = ACE_IDENTIFIER_GROUP;
+		else if (entry->ae_tag == ACL_EVERYONE)
+			ace->a_flags = ACE_EVERYONE;
+		else /* ACL_USER */
+			ace->a_flags = 0;
+
+		ace->a_access_mask = _zfs_from_bsd(entry->ae_perm, perms);
+		ace->a_flags |= _zfs_from_bsd(entry->ae_flags, flags);
+
+		switch (entry->ae_entry_type) {
+		case ACL_ENTRY_TYPE_ALLOW:
+			ace->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
+			break;
+		case ACL_ENTRY_TYPE_DENY:
+			ace->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+			break;
+		case ACL_ENTRY_TYPE_ALARM:
+			ace->a_type = ACE_SYSTEM_ALARM_ACE_TYPE;
+			break;
+		case ACL_ENTRY_TYPE_AUDIT:
+			ace->a_type = ACE_SYSTEM_AUDIT_ACE_TYPE;
+			break;
+		default:
+			panic("aces_from_acl: ae_entry_type is 0x%x",
+			    entry->ae_entry_type);
+		}
+	}
+}

diff --git a/zfs/module/os/freebsd/spl/spl_atomic.c b/zfs/module/os/freebsd/spl/spl_atomic.c
new file mode 100644
index 0000000..80040fc
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_atomic.c

@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/atomic.h>
+
+#if !defined(__LP64__) && !defined(__mips_n32) && \
+	!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64) && \
+	!defined(HAS_EMULATED_ATOMIC64)
+
+#ifdef _KERNEL
+#include <sys/kernel.h>
+
+struct mtx atomic_mtx;
+MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF);
+#else
+#include <pthread.h>
+
+#define	mtx_lock(lock)		pthread_mutex_lock(lock)
+#define	mtx_unlock(lock)	pthread_mutex_unlock(lock)
+
+static pthread_mutex_t atomic_mtx;
+
+static __attribute__((constructor)) void
+atomic_init(void)
+{
+	pthread_mutex_init(&atomic_mtx, NULL);
+}
+#endif
+
+void
+atomic_add_64(volatile uint64_t *target, int64_t delta)
+{
+
+	mtx_lock(&atomic_mtx);
+	*target += delta;
+	mtx_unlock(&atomic_mtx);
+}
+
+void
+atomic_dec_64(volatile uint64_t *target)
+{
+
+	mtx_lock(&atomic_mtx);
+	*target -= 1;
+	mtx_unlock(&atomic_mtx);
+}
+
+uint64_t
+atomic_swap_64(volatile uint64_t *a, uint64_t value)
+{
+	uint64_t ret;
+
+	mtx_lock(&atomic_mtx);
+	ret = *a;
+	*a = value;
+	mtx_unlock(&atomic_mtx);
+	return (ret);
+}
+
+uint64_t
+atomic_load_64(volatile uint64_t *a)
+{
+	uint64_t ret;
+
+	mtx_lock(&atomic_mtx);
+	ret = *a;
+	mtx_unlock(&atomic_mtx);
+	return (ret);
+}
+
+uint64_t
+atomic_add_64_nv(volatile uint64_t *target, int64_t delta)
+{
+	uint64_t newval;
+
+	mtx_lock(&atomic_mtx);
+	newval = (*target += delta);
+	mtx_unlock(&atomic_mtx);
+	return (newval);
+}
+
+uint64_t
+atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t newval)
+{
+	uint64_t oldval;
+
+	mtx_lock(&atomic_mtx);
+	oldval = *target;
+	if (oldval == cmp)
+		*target = newval;
+	mtx_unlock(&atomic_mtx);
+	return (oldval);
+}
+#endif

diff --git a/zfs/module/os/freebsd/spl/spl_cmn_err.c b/zfs/module/os/freebsd/spl/spl_cmn_err.c
new file mode 100644
index 0000000..22c7338
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_cmn_err.c

@@ -0,0 +1,77 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * $FreeBSD$
+ */
+/*
+ * Copyright 2007 John Birrell <jb@FreeBSD.org>. All rights reserved.
+ * Copyright 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+
+void
+vcmn_err(int ce, const char *fmt, va_list adx)
+{
+	char buf[256];
+	const char *prefix;
+
+	prefix = NULL; /* silence unwitty compilers */
+	switch (ce) {
+	case CE_CONT:
+		prefix = "Solaris(cont): ";
+		break;
+	case CE_NOTE:
+		prefix = "Solaris: NOTICE: ";
+		break;
+	case CE_WARN:
+		prefix = "Solaris: WARNING: ";
+		break;
+	case CE_PANIC:
+		prefix = "Solaris(panic): ";
+		break;
+	case CE_IGNORE:
+		break;
+	default:
+		panic("Solaris: unknown severity level");
+	}
+	if (ce == CE_PANIC) {
+		vsnprintf(buf, sizeof (buf), fmt, adx);
+		panic("%s%s", prefix, buf);
+	}
+	if (ce != CE_IGNORE) {
+		printf("%s", prefix);
+		vprintf(fmt, adx);
+		printf("\n");
+	}
+}
+
+void
+cmn_err(int type, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vcmn_err(type, fmt, ap);
+	va_end(ap);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_dtrace.c b/zfs/module/os/freebsd/spl/spl_dtrace.c
new file mode 100644
index 0000000..6b2872b
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_dtrace.c

@@ -0,0 +1,38 @@
+/*
+ * Copyright 2014 The FreeBSD Project.
+ * All rights reserved.
+ *
+ * This software was developed by Steven Hartland.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/sdt.h>
+
+/* CSTYLED */
+SDT_PROBE_DEFINE1(sdt, , , set__error, "int");

diff --git a/zfs/module/os/freebsd/spl/spl_kmem.c b/zfs/module/os/freebsd/spl/spl_kmem.c
new file mode 100644
index 0000000..ee8f1d8
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_kmem.c

@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/byteorder.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/debug.h>
+#include <sys/mutex.h>
+#include <sys/vmmeter.h>
+
+
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+
+#ifdef KMEM_DEBUG
+#include <sys/queue.h>
+#include <sys/stack.h>
+#endif
+
+#ifdef _KERNEL
+MALLOC_DEFINE(M_SOLARIS, "solaris", "Solaris");
+#else
+#define	malloc(size, type, flags)	malloc(size)
+#define	free(addr, type)		free(addr)
+#endif
+
+#ifdef KMEM_DEBUG
+struct kmem_item {
+	struct stack	stack;
+	LIST_ENTRY(kmem_item) next;
+};
+static LIST_HEAD(, kmem_item) kmem_items;
+static struct mtx kmem_items_mtx;
+MTX_SYSINIT(kmem_items_mtx, &kmem_items_mtx, "kmem_items", MTX_DEF);
+#endif	/* KMEM_DEBUG */
+
+#include <sys/vmem.h>
+
+void *
+zfs_kmem_alloc(size_t size, int kmflags)
+{
+	void *p;
+#ifdef KMEM_DEBUG
+	struct kmem_item *i;
+
+	size += sizeof (struct kmem_item);
+#endif
+	p = malloc(MAX(size, 16), M_SOLARIS, kmflags);
+#ifndef _KERNEL
+	if (kmflags & KM_SLEEP)
+		assert(p != NULL);
+#endif
+#ifdef KMEM_DEBUG
+	if (p != NULL) {
+		i = p;
+		p = (uint8_t *)p + sizeof (struct kmem_item);
+		stack_save(&i->stack);
+		mtx_lock(&kmem_items_mtx);
+		LIST_INSERT_HEAD(&kmem_items, i, next);
+		mtx_unlock(&kmem_items_mtx);
+	}
+#endif
+	return (p);
+}
+
+void
+zfs_kmem_free(void *buf, size_t size __unused)
+{
+#ifdef KMEM_DEBUG
+	if (buf == NULL) {
+		printf("%s: attempt to free NULL\n", __func__);
+		return;
+	}
+	struct kmem_item *i;
+
+	buf = (uint8_t *)buf - sizeof (struct kmem_item);
+	mtx_lock(&kmem_items_mtx);
+	LIST_FOREACH(i, &kmem_items, next) {
+		if (i == buf)
+			break;
+	}
+	ASSERT3P(i, !=, NULL);
+	LIST_REMOVE(i, next);
+	mtx_unlock(&kmem_items_mtx);
+	memset(buf, 0xDC, MAX(size, 16));
+#endif
+	free(buf, M_SOLARIS);
+}
+
+static uint64_t kmem_size_val;
+
+static void
+kmem_size_init(void *unused __unused)
+{
+
+	kmem_size_val = (uint64_t)vm_cnt.v_page_count * PAGE_SIZE;
+	if (kmem_size_val > vm_kmem_size)
+		kmem_size_val = vm_kmem_size;
+}
+SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL);
+
+uint64_t
+kmem_size(void)
+{
+
+	return (kmem_size_val);
+}
+
+static int
+kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
+{
+	struct kmem_cache *cache = private;
+
+	return (cache->kc_constructor(mem, cache->kc_private, flags));
+}
+
+static void
+kmem_std_destructor(void *mem, int size __unused, void *private)
+{
+	struct kmem_cache *cache = private;
+
+	cache->kc_destructor(mem, cache->kc_private);
+}
+
+kmem_cache_t *
+kmem_cache_create(char *name, size_t bufsize, size_t align,
+    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
+    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags)
+{
+	kmem_cache_t *cache;
+
+	ASSERT3P(vmp, ==, NULL);
+
+	cache = kmem_alloc(sizeof (*cache), KM_SLEEP);
+	strlcpy(cache->kc_name, name, sizeof (cache->kc_name));
+	cache->kc_constructor = constructor;
+	cache->kc_destructor = destructor;
+	cache->kc_private = private;
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
+	cache->kc_zone = uma_zcreate(cache->kc_name, bufsize,
+	    constructor != NULL ? kmem_std_constructor : NULL,
+	    destructor != NULL ? kmem_std_destructor : NULL,
+	    NULL, NULL, align > 0 ? align - 1 : 0, cflags);
+#else
+	cache->kc_size = bufsize;
+#endif
+
+	return (cache);
+}
+
+void
+kmem_cache_destroy(kmem_cache_t *cache)
+{
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
+	uma_zdestroy(cache->kc_zone);
+#endif
+	kmem_free(cache, sizeof (*cache));
+}
+
+void *
+kmem_cache_alloc(kmem_cache_t *cache, int flags)
+{
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
+	return (uma_zalloc_arg(cache->kc_zone, cache, flags));
+#else
+	void *p;
+
+	p = kmem_alloc(cache->kc_size, flags);
+	if (p != NULL && cache->kc_constructor != NULL)
+		kmem_std_constructor(p, cache->kc_size, cache, flags);
+	return (p);
+#endif
+}
+
+void
+kmem_cache_free(kmem_cache_t *cache, void *buf)
+{
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
+	uma_zfree_arg(cache->kc_zone, buf, cache);
+#else
+	if (cache->kc_destructor != NULL)
+		kmem_std_destructor(buf, cache->kc_size, cache);
+	kmem_free(buf, cache->kc_size);
+#endif
+}
+
+/*
+ * Allow our caller to determine if there are running reaps.
+ *
+ * This call is very conservative and may return B_TRUE even when
+ * reaping activity isn't active. If it returns B_FALSE, then reaping
+ * activity is definitely inactive.
+ */
+boolean_t
+kmem_cache_reap_active(void)
+{
+
+	return (B_FALSE);
+}
+
+/*
+ * Reap (almost) everything soon.
+ *
+ * Note: this does not wait for the reap-tasks to complete. Caller
+ * should use kmem_cache_reap_active() (above) and/or moderation to
+ * avoid scheduling too many reap-tasks.
+ */
+#ifdef _KERNEL
+void
+kmem_cache_reap_soon(kmem_cache_t *cache)
+{
+#ifndef KMEM_DEBUG
+#if __FreeBSD_version >= 1300043
+	uma_zone_reclaim(cache->kc_zone, UMA_RECLAIM_DRAIN);
+#else
+	zone_drain(cache->kc_zone);
+#endif
+#endif
+}
+
+void
+kmem_reap(void)
+{
+#if __FreeBSD_version >= 1300043
+	uma_reclaim(UMA_RECLAIM_TRIM);
+#else
+	uma_reclaim();
+#endif
+}
+#else
+void
+kmem_cache_reap_soon(kmem_cache_t *cache __unused)
+{
+}
+
+void
+kmem_reap(void)
+{
+}
+#endif
+
+int
+kmem_debugging(void)
+{
+	return (0);
+}
+
+void *
+calloc(size_t n, size_t s)
+{
+	return (kmem_zalloc(n * s, KM_NOSLEEP));
+}
+
+char *
+kmem_vasprintf(const char *fmt, va_list adx)
+{
+	char *msg;
+	va_list adx2;
+
+	va_copy(adx2, adx);
+	msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
+	(void) vsprintf(msg, fmt, adx2);
+	va_end(adx2);
+
+	return (msg);
+}
+
+#include <vm/uma.h>
+#include <vm/uma_int.h>
+#ifdef KMEM_DEBUG
+#error "KMEM_DEBUG not currently supported"
+#endif
+
+uint64_t
+spl_kmem_cache_inuse(kmem_cache_t *cache)
+{
+	return (uma_zone_get_cur(cache->kc_zone));
+}
+
+uint64_t
+spl_kmem_cache_entry_size(kmem_cache_t *cache)
+{
+	return (cache->kc_zone->uz_size);
+}
+
+/*
+ * Register a move callback for cache defragmentation.
+ * XXX: Unimplemented but harmless to stub out for now.
+ */
+void
+spl_kmem_cache_set_move(kmem_cache_t *skc,
+    kmem_cbrc_t (move)(void *, void *, size_t, void *))
+{
+	ASSERT3P(move, !=, NULL);
+}
+
+#ifdef KMEM_DEBUG
+void kmem_show(void *);
+void
+kmem_show(void *dummy __unused)
+{
+	struct kmem_item *i;
+
+	mtx_lock(&kmem_items_mtx);
+	if (LIST_EMPTY(&kmem_items))
+		printf("KMEM_DEBUG: No leaked elements.\n");
+	else {
+		printf("KMEM_DEBUG: Leaked elements:\n\n");
+		LIST_FOREACH(i, &kmem_items, next) {
+			printf("address=%p\n", i);
+			stack_print_ddb(&i->stack);
+			printf("\n");
+		}
+	}
+	mtx_unlock(&kmem_items_mtx);
+}
+
+SYSUNINIT(sol_kmem, SI_SUB_CPU, SI_ORDER_FIRST, kmem_show, NULL);
+#endif	/* KMEM_DEBUG */

diff --git a/zfs/module/os/freebsd/spl/spl_kstat.c b/zfs/module/os/freebsd/spl/spl_kstat.c
new file mode 100644
index 0000000..9f5f92e
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_kstat.c

@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Links to Illumos.org for more information on kstat function:
+ * [1] https://illumos.org/man/1M/kstat
+ * [2] https://illumos.org/man/9f/kstat_create
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <sys/kstat.h>
+#include <sys/sbuf.h>
+#include <sys/zone.h>
+
+static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
+
+SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
+
+void
+__kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void *(*addr)(kstat_t *ksp, loff_t index))
+{
+	ksp->ks_raw_ops.headers = headers;
+	ksp->ks_raw_ops.data    = data;
+	ksp->ks_raw_ops.addr    = addr;
+}
+
+void
+__kstat_set_seq_raw_ops(kstat_t *ksp,
+    int (*headers)(struct seq_file *f),
+    int (*data)(char *buf, size_t size, void *data),
+    void *(*addr)(kstat_t *ksp, loff_t index))
+{
+	ksp->ks_raw_ops.seq_headers = headers;
+	ksp->ks_raw_ops.data    = data;
+	ksp->ks_raw_ops.addr    = addr;
+}
+
+static int
+kstat_default_update(kstat_t *ksp, int rw)
+{
+	ASSERT3P(ksp, !=, NULL);
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	return (0);
+}
+
+static int
+kstat_resize_raw(kstat_t *ksp)
+{
+	if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
+		return (ENOMEM);
+
+	free(ksp->ks_raw_buf, M_TEMP);
+	ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
+	ksp->ks_raw_buf = malloc(ksp->ks_raw_bufsize, M_TEMP, M_WAITOK);
+
+	return (0);
+}
+
+static void *
+kstat_raw_default_addr(kstat_t *ksp, loff_t n)
+{
+	if (n == 0)
+		return (ksp->ks_data);
+	return (NULL);
+}
+
+static int
+kstat_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	kstat_t *ksp = arg1;
+	kstat_named_t *ksent;
+	uint64_t val;
+
+	ksent = ksp->ks_data;
+	/* Select the correct element */
+	ksent += arg2;
+	/* Update the aggsums before reading */
+	(void) ksp->ks_update(ksp, KSTAT_READ);
+	val = ksent->value.ui64;
+
+	return (sysctl_handle_64(oidp, &val, 0, req));
+}
+
+static int
+kstat_sysctl_string(SYSCTL_HANDLER_ARGS)
+{
+	kstat_t *ksp = arg1;
+	kstat_named_t *ksent = ksp->ks_data;
+	char *val;
+	uint32_t len = 0;
+
+	/* Select the correct element */
+	ksent += arg2;
+	/* Update the aggsums before reading */
+	(void) ksp->ks_update(ksp, KSTAT_READ);
+	val = KSTAT_NAMED_STR_PTR(ksent);
+	len = KSTAT_NAMED_STR_BUFLEN(ksent);
+	val[len-1] = '\0';
+
+	return (sysctl_handle_string(oidp, val, len, req));
+}
+
+static int
+kstat_sysctl_dataset(SYSCTL_HANDLER_ARGS)
+{
+	kstat_t *ksp = arg1;
+	kstat_named_t *ksent;
+	kstat_named_t *ksent_ds;
+	uint64_t val;
+	char *ds_name;
+	uint32_t ds_len = 0;
+
+	ksent_ds = ksent = ksp->ks_data;
+	ds_name = KSTAT_NAMED_STR_PTR(ksent_ds);
+	ds_len = KSTAT_NAMED_STR_BUFLEN(ksent_ds);
+	ds_name[ds_len-1] = '\0';
+
+	if (!zone_dataset_visible(ds_name, NULL)) {
+		return (EPERM);
+	}
+
+	/* Select the correct element */
+	ksent += arg2;
+	/* Update the aggsums before reading */
+	(void) ksp->ks_update(ksp, KSTAT_READ);
+	val = ksent->value.ui64;
+
+	return (sysctl_handle_64(oidp, &val, 0, req));
+}
+
+static int
+kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS)
+{
+	kstat_t *ksp = arg1;
+	kstat_named_t *ksent = ksp->ks_data;
+	char *val;
+	uint32_t len = 0;
+
+	/* Select the correct element */
+	ksent += arg2;
+	val = KSTAT_NAMED_STR_PTR(ksent);
+	len = KSTAT_NAMED_STR_BUFLEN(ksent);
+	val[len-1] = '\0';
+
+	if (!zone_dataset_visible(val, NULL)) {
+		return (EPERM);
+	}
+
+	return (sysctl_handle_string(oidp, val, len, req));
+}
+
+static int
+kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf *sb;
+	kstat_t *ksp = arg1;
+	kstat_io_t *kip = ksp->ks_data;
+	int rc;
+
+	sb = sbuf_new_auto();
+	if (sb == NULL)
+		return (ENOMEM);
+	/* Update the aggsums before reading */
+	(void) ksp->ks_update(ksp, KSTAT_READ);
+
+	/* though wlentime & friends are signed, they will never be negative */
+	sbuf_printf(sb,
+	    "%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
+	    "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
+	    kip->nread, kip->nwritten,
+	    kip->reads, kip->writes,
+	    kip->wtime, kip->wlentime, kip->wlastupdate,
+	    kip->rtime, kip->rlentime, kip->rlastupdate,
+	    kip->wcnt,  kip->rcnt);
+	rc = sbuf_finish(sb);
+	if (rc == 0)
+		rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
+	sbuf_delete(sb);
+	return (rc);
+}
+
+static int
+kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf *sb;
+	void *data;
+	kstat_t *ksp = arg1;
+	void *(*addr_op)(kstat_t *ksp, loff_t index);
+	int n, has_header, rc = 0;
+
+	sb = sbuf_new_auto();
+	if (sb == NULL)
+		return (ENOMEM);
+
+	if (ksp->ks_raw_ops.addr)
+		addr_op = ksp->ks_raw_ops.addr;
+	else
+		addr_op = kstat_raw_default_addr;
+
+	mutex_enter(ksp->ks_lock);
+
+	/* Update the aggsums before reading */
+	(void) ksp->ks_update(ksp, KSTAT_READ);
+
+	ksp->ks_raw_bufsize = PAGE_SIZE;
+	ksp->ks_raw_buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
+
+	n = 0;
+	has_header = (ksp->ks_raw_ops.headers ||
+	    ksp->ks_raw_ops.seq_headers);
+
+restart_headers:
+	if (ksp->ks_raw_ops.headers) {
+		rc = ksp->ks_raw_ops.headers(
+		    ksp->ks_raw_buf, ksp->ks_raw_bufsize);
+	} else if (ksp->ks_raw_ops.seq_headers) {
+		struct seq_file f;
+
+		f.sf_buf = ksp->ks_raw_buf;
+		f.sf_size = ksp->ks_raw_bufsize;
+		rc = ksp->ks_raw_ops.seq_headers(&f);
+	}
+	if (has_header) {
+		if (rc == ENOMEM && !kstat_resize_raw(ksp))
+			goto restart_headers;
+		if (rc == 0)
+			sbuf_printf(sb, "\n%s", ksp->ks_raw_buf);
+	}
+
+	while ((data = addr_op(ksp, n)) != NULL) {
+restart:
+		if (ksp->ks_raw_ops.data) {
+			rc = ksp->ks_raw_ops.data(ksp->ks_raw_buf,
+			    ksp->ks_raw_bufsize, data);
+			if (rc == ENOMEM && !kstat_resize_raw(ksp))
+				goto restart;
+			if (rc == 0)
+				sbuf_printf(sb, "%s", ksp->ks_raw_buf);
+
+		} else {
+			ASSERT3U(ksp->ks_ndata, ==, 1);
+			sbuf_hexdump(sb, ksp->ks_data,
+			    ksp->ks_data_size, NULL, 0);
+		}
+		n++;
+	}
+	free(ksp->ks_raw_buf, M_TEMP);
+	mutex_exit(ksp->ks_lock);
+	sbuf_trim(sb);
+	rc = sbuf_finish(sb);
+	if (rc == 0)
+		rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
+	sbuf_delete(sb);
+	return (rc);
+}
+
+kstat_t *
+__kstat_create(const char *module, int instance, const char *name,
+    const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags)
+{
+	char buf[KSTAT_STRLEN];
+	struct sysctl_oid *root;
+	kstat_t *ksp;
+	char *pool;
+
+	KASSERT(instance == 0, ("instance=%d", instance));
+	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
+		ASSERT3U(ks_ndata, ==, 1);
+
+	if (class == NULL)
+		class = "misc";
+
+	/*
+	 * Allocate the main structure. We don't need to keep a copy of
+	 * module in here, because it is only used for sysctl node creation
+	 * done in this function.
+	 */
+	ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO);
+
+	ksp->ks_crtime = gethrtime();
+	ksp->ks_snaptime = ksp->ks_crtime;
+	ksp->ks_instance = instance;
+	(void) strlcpy(ksp->ks_name, name, KSTAT_STRLEN);
+	(void) strlcpy(ksp->ks_class, class, KSTAT_STRLEN);
+	ksp->ks_type = ks_type;
+	ksp->ks_flags = flags;
+	ksp->ks_update = kstat_default_update;
+
+	mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
+	ksp->ks_lock = &ksp->ks_private_lock;
+
+	switch (ksp->ks_type) {
+	case KSTAT_TYPE_RAW:
+		ksp->ks_ndata = 1;
+		ksp->ks_data_size = ks_ndata;
+		break;
+	case KSTAT_TYPE_NAMED:
+		ksp->ks_ndata = ks_ndata;
+		ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
+		break;
+	case KSTAT_TYPE_INTR:
+		ksp->ks_ndata = ks_ndata;
+		ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
+		break;
+	case KSTAT_TYPE_IO:
+		ksp->ks_ndata = ks_ndata;
+		ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
+		break;
+	case KSTAT_TYPE_TIMER:
+		ksp->ks_ndata = ks_ndata;
+		ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
+		break;
+	default:
+		panic("Undefined kstat type %d\n", ksp->ks_type);
+	}
+
+	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL)
+		ksp->ks_data = NULL;
+	else
+		ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
+
+	/*
+	 * Some kstats use a module name like "zfs/poolname" to distinguish a
+	 * set of kstats belonging to a specific pool.  Split on '/' to add an
+	 * extra node for the pool name if needed.
+	 */
+	(void) strlcpy(buf, module, KSTAT_STRLEN);
+	module = buf;
+	pool = strchr(module, '/');
+	if (pool != NULL)
+		*pool++ = '\0';
+
+	/*
+	 * Create sysctl tree for those statistics:
+	 *
+	 *	kstat.<module>[.<pool>].<class>.<name>
+	 */
+	sysctl_ctx_init(&ksp->ks_sysctl_ctx);
+	root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
+	    SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
+	    "");
+	if (root == NULL) {
+		printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
+		sysctl_ctx_free(&ksp->ks_sysctl_ctx);
+		free(ksp, M_KSTAT);
+		return (NULL);
+	}
+	if (pool != NULL) {
+		root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
+		    SYSCTL_CHILDREN(root), OID_AUTO, pool, CTLFLAG_RW, 0, "");
+		if (root == NULL) {
+			printf("%s: Cannot create kstat.%s.%s tree!\n",
+			    __func__, module, pool);
+			sysctl_ctx_free(&ksp->ks_sysctl_ctx);
+			free(ksp, M_KSTAT);
+			return (NULL);
+		}
+	}
+	root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
+	    OID_AUTO, class, CTLFLAG_RW, 0, "");
+	if (root == NULL) {
+		if (pool != NULL)
+			printf("%s: Cannot create kstat.%s.%s.%s tree!\n",
+			    __func__, module, pool, class);
+		else
+			printf("%s: Cannot create kstat.%s.%s tree!\n",
+			    __func__, module, class);
+		sysctl_ctx_free(&ksp->ks_sysctl_ctx);
+		free(ksp, M_KSTAT);
+		return (NULL);
+	}
+	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
+		root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
+		    SYSCTL_CHILDREN(root),
+		    OID_AUTO, name, CTLFLAG_RW, 0, "");
+		if (root == NULL) {
+			if (pool != NULL)
+				printf("%s: Cannot create kstat.%s.%s.%s.%s "
+				    "tree!\n", __func__, module, pool, class,
+				    name);
+			else
+				printf("%s: Cannot create kstat.%s.%s.%s "
+				    "tree!\n", __func__, module, class, name);
+			sysctl_ctx_free(&ksp->ks_sysctl_ctx);
+			free(ksp, M_KSTAT);
+			return (NULL);
+		}
+
+	}
+	ksp->ks_sysctl_root = root;
+
+	return (ksp);
+}
+
+static void
+kstat_install_named(kstat_t *ksp)
+{
+	kstat_named_t *ksent;
+	char *namelast;
+	int typelast;
+
+	ksent = ksp->ks_data;
+
+	VERIFY((ksp->ks_flags & KSTAT_FLAG_VIRTUAL) || ksent != NULL);
+
+	typelast = 0;
+	namelast = NULL;
+
+	for (int i = 0; i < ksp->ks_ndata; i++, ksent++) {
+		if (ksent->data_type != 0) {
+			typelast = ksent->data_type;
+			namelast = ksent->name;
+		}
+		switch (typelast) {
+		case KSTAT_DATA_CHAR:
+			/* Not Implemented */
+			break;
+		case KSTAT_DATA_INT32:
+			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, namelast,
+			    CTLTYPE_S32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+			    ksp, i, kstat_sysctl, "I", namelast);
+			break;
+		case KSTAT_DATA_UINT32:
+			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, namelast,
+			    CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+			    ksp, i, kstat_sysctl, "IU", namelast);
+			break;
+		case KSTAT_DATA_INT64:
+			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, namelast,
+			    CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+			    ksp, i, kstat_sysctl, "Q", namelast);
+			break;
+		case KSTAT_DATA_UINT64:
+			if (strcmp(ksp->ks_class, "dataset") == 0) {
+				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+				    OID_AUTO, namelast,
+				    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+				    ksp, i, kstat_sysctl_dataset, "QU",
+				    namelast);
+			} else {
+				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+				    OID_AUTO, namelast,
+				    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+				    ksp, i, kstat_sysctl, "QU", namelast);
+			}
+			break;
+		case KSTAT_DATA_LONG:
+			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, namelast,
+			    CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
+			    ksp, i, kstat_sysctl, "L", namelast);
+			break;
+		case KSTAT_DATA_ULONG:
+			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, namelast,
+			    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
+			    ksp, i, kstat_sysctl, "LU", namelast);
+			break;
+		case KSTAT_DATA_STRING:
+			if (strcmp(ksp->ks_class, "dataset") == 0) {
+				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+				    OID_AUTO, namelast, CTLTYPE_STRING |
+				    CTLFLAG_RD | CTLFLAG_MPSAFE,
+				    ksp, i, kstat_sysctl_dataset_string, "A",
+				    namelast);
+			} else {
+				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+				    OID_AUTO, namelast, CTLTYPE_STRING |
+				    CTLFLAG_RD | CTLFLAG_MPSAFE,
+				    ksp, i, kstat_sysctl_string, "A",
+				    namelast);
+			}
+			break;
+		default:
+			panic("unsupported type: %d", typelast);
+		}
+	}
+}
+
+void
+kstat_install(kstat_t *ksp)
+{
+	struct sysctl_oid *root;
+
+	if (ksp->ks_ndata == UINT32_MAX)
+		VERIFY3U(ksp->ks_type, ==, KSTAT_TYPE_RAW);
+
+	switch (ksp->ks_type) {
+	case KSTAT_TYPE_NAMED:
+		return (kstat_install_named(ksp));
+	case KSTAT_TYPE_RAW:
+		if (ksp->ks_raw_ops.data) {
+			root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, ksp->ks_name, CTLTYPE_STRING | CTLFLAG_RD
+			    | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
+			    ksp, 0, kstat_sysctl_raw, "A", ksp->ks_name);
+		} else {
+			root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+			    OID_AUTO, ksp->ks_name, CTLTYPE_OPAQUE | CTLFLAG_RD
+			    | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
+			    ksp, 0, kstat_sysctl_raw, "", ksp->ks_name);
+		}
+		break;
+	case KSTAT_TYPE_IO:
+		root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
+		    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
+		    OID_AUTO, ksp->ks_name,
+		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+		    ksp, 0, kstat_sysctl_io, "A", ksp->ks_name);
+		break;
+	case KSTAT_TYPE_TIMER:
+	case KSTAT_TYPE_INTR:
+	default:
+		panic("unsupported kstat type %d\n", ksp->ks_type);
+	}
+	VERIFY3P(root, !=, NULL);
+	ksp->ks_sysctl_root = root;
+}
+
+void
+kstat_delete(kstat_t *ksp)
+{
+
+	sysctl_ctx_free(&ksp->ks_sysctl_ctx);
+	ksp->ks_lock = NULL;
+	mutex_destroy(&ksp->ks_private_lock);
+	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
+		kmem_free(ksp->ks_data, ksp->ks_data_size);
+	free(ksp, M_KSTAT);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_misc.c b/zfs/module/os/freebsd/spl/spl_misc.c
new file mode 100644
index 0000000..0354b98
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_misc.c

@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
+#include <sys/misc.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include <sys/zfs_context.h>
+
+static struct opensolaris_utsname hw_utsname = {
+	.machine = MACHINE
+};
+
+#ifndef KERNEL_STATIC
+char hw_serial[11] = "0";
+
+utsname_t *
+utsname(void)
+{
+	return (&hw_utsname);
+}
+#endif
+
+static void
+opensolaris_utsname_init(void *arg)
+{
+
+	hw_utsname.sysname = ostype;
+	hw_utsname.nodename = prison0.pr_hostname;
+	hw_utsname.release = osrelease;
+	snprintf(hw_utsname.version, sizeof (hw_utsname.version),
+	    "%d", osreldate);
+}
+
+char *
+kmem_strdup(const char *s)
+{
+	char *buf;
+
+	buf = kmem_alloc(strlen(s) + 1, KM_SLEEP);
+	strcpy(buf, s);
+	return (buf);
+}
+
+int
+ddi_copyin(const void *from, void *to, size_t len, int flags)
+{
+	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
+	if (flags & FKIOCTL) {
+		memcpy(to, from, len);
+		return (0);
+	}
+
+	return (copyin(from, to, len));
+}
+
+int
+ddi_copyout(const void *from, void *to, size_t len, int flags)
+{
+	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
+	if (flags & FKIOCTL) {
+		memcpy(to, from, len);
+		return (0);
+	}
+
+	return (copyout(from, to, len));
+}
+
+int
+spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vpanic(fmt, ap);
+	va_end(ap);
+}
+
+
+SYSINIT(opensolaris_utsname_init, SI_SUB_TUNABLES, SI_ORDER_ANY,
+    opensolaris_utsname_init, NULL);

diff --git a/zfs/module/os/freebsd/spl/spl_policy.c b/zfs/module/os/freebsd/spl/spl_policy.c
new file mode 100644
index 0000000..5ecd3d3
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_policy.c

@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/mntent.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/jail.h>
+#include <sys/policy.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+
+
+int
+secpolicy_nfs(cred_t *cr)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_NFS_DAEMON));
+}
+
+int
+secpolicy_zfs(cred_t *cr)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
+}
+
+int
+secpolicy_zfs_proc(cred_t *cr, proc_t *proc)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
+}
+
+int
+secpolicy_sys_config(cred_t *cr, int checkonly __unused)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_ZFS_POOL_CONFIG));
+}
+
+int
+secpolicy_zinject(cred_t *cr)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_ZFS_INJECT));
+}
+
+int
+secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp __unused)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_VFS_UNMOUNT));
+}
+
+int
+secpolicy_fs_owner(struct mount *mp, cred_t *cr)
+{
+
+	if (zfs_super_owner) {
+		if (cr->cr_uid == mp->mnt_cred->cr_uid &&
+		    cr->cr_prison == mp->mnt_cred->cr_prison) {
+			return (0);
+		}
+	}
+	return (EPERM);
+}
+
+/*
+ * This check is done in kern_link(), so we could just return 0 here.
+ */
+extern int hardlink_check_uid;
+int
+secpolicy_basic_link(vnode_t *vp, cred_t *cr)
+{
+
+	if (!hardlink_check_uid)
+		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_LINK));
+}
+
+int
+secpolicy_vnode_stky_modify(cred_t *cr)
+{
+
+	return (EPERM);
+}
+
+int
+secpolicy_vnode_remove(vnode_t *vp, cred_t *cr)
+{
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN));
+}
+
+int
+secpolicy_vnode_access(cred_t *cr, vnode_t *vp, uid_t owner, accmode_t accmode)
+{
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+
+	if ((accmode & VREAD) && spl_priv_check_cred(cr, PRIV_VFS_READ) != 0)
+		return (EACCES);
+	if ((accmode & VWRITE) &&
+	    spl_priv_check_cred(cr, PRIV_VFS_WRITE) != 0) {
+		return (EACCES);
+	}
+	if (accmode & VEXEC) {
+		if (vp->v_type == VDIR) {
+			if (spl_priv_check_cred(cr, PRIV_VFS_LOOKUP) != 0)
+				return (EACCES);
+		} else {
+			if (spl_priv_check_cred(cr, PRIV_VFS_EXEC) != 0)
+				return (EACCES);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Like secpolicy_vnode_access() but we get the actual wanted mode and the
+ * current mode of the file, not the missing bits.
+ */
+int
+secpolicy_vnode_access2(cred_t *cr, vnode_t *vp, uid_t owner,
+    accmode_t curmode, accmode_t wantmode)
+{
+	accmode_t mode;
+
+	mode = ~curmode & wantmode;
+
+	if (mode == 0)
+		return (0);
+
+	return (secpolicy_vnode_access(cr, vp, owner, mode));
+}
+
+int
+secpolicy_vnode_any_access(cred_t *cr, vnode_t *vp, uid_t owner)
+{
+	static int privs[] = {
+	    PRIV_VFS_ADMIN,
+	    PRIV_VFS_READ,
+	    PRIV_VFS_WRITE,
+	    PRIV_VFS_EXEC,
+	    PRIV_VFS_LOOKUP
+	};
+	int i;
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+
+	/* Same as secpolicy_vnode_setdac */
+	if (owner == cr->cr_uid)
+		return (0);
+
+	for (i = 0; i < sizeof (privs)/sizeof (int); i++) {
+		int priv;
+
+		switch (priv = privs[i]) {
+		case PRIV_VFS_EXEC:
+			if (vp->v_type == VDIR)
+				continue;
+			break;
+		case PRIV_VFS_LOOKUP:
+			if (vp->v_type != VDIR)
+				continue;
+			break;
+		}
+		if (spl_priv_check_cred(cr, priv) == 0)
+			return (0);
+	}
+	return (EPERM);
+}
+
+int
+secpolicy_vnode_setdac(vnode_t *vp, cred_t *cr, uid_t owner)
+{
+
+	if (owner == cr->cr_uid)
+		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN));
+}
+
+int
+secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap,
+    const struct vattr *ovap, int flags,
+    int unlocked_access(void *, int, cred_t *), void *node)
+{
+	int mask = vap->va_mask;
+	int error;
+
+	if (mask & AT_SIZE) {
+		if (vp->v_type == VDIR)
+			return (EISDIR);
+		error = unlocked_access(node, VWRITE, cr);
+		if (error)
+			return (error);
+	}
+	if (mask & AT_MODE) {
+		/*
+		 * If not the owner of the file then check privilege
+		 * for two things: the privilege to set the mode at all
+		 * and, if we're setting setuid, we also need permissions
+		 * to add the set-uid bit, if we're not the owner.
+		 * In the specific case of creating a set-uid root
+		 * file, we need even more permissions.
+		 */
+		error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
+		if (error)
+			return (error);
+		error = secpolicy_setid_setsticky_clear(vp, vap, ovap, cr);
+		if (error)
+			return (error);
+	} else {
+		vap->va_mode = ovap->va_mode;
+	}
+	if (mask & (AT_UID | AT_GID)) {
+		error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
+		if (error)
+			return (error);
+
+		/*
+		 * To change the owner of a file, or change the group of
+		 * a file to a group of which we are not a member, the
+		 * caller must have privilege.
+		 */
+		if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
+		    ((mask & AT_GID) && vap->va_gid != ovap->va_gid &&
+		    !groupmember(vap->va_gid, cr))) {
+			if (secpolicy_fs_owner(vp->v_mount, cr) != 0) {
+				error = spl_priv_check_cred(cr, PRIV_VFS_CHOWN);
+				if (error)
+					return (error);
+			}
+		}
+
+		if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
+		    ((mask & AT_GID) && vap->va_gid != ovap->va_gid)) {
+			secpolicy_setid_clear(vap, vp, cr);
+		}
+	}
+	if (mask & (AT_ATIME | AT_MTIME)) {
+		/*
+		 * From utimes(2):
+		 * If times is NULL, ... The caller must be the owner of
+		 * the file, have permission to write the file, or be the
+		 * super-user.
+		 * If times is non-NULL, ... The caller must be the owner of
+		 * the file or be the super-user.
+		 */
+		error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
+		if (error && (vap->va_vaflags & VA_UTIMES_NULL))
+			error = unlocked_access(node, VWRITE, cr);
+		if (error)
+			return (error);
+	}
+	return (0);
+}
+
+int
+secpolicy_vnode_create_gid(cred_t *cr)
+{
+
+	return (EPERM);
+}
+
+int
+secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid)
+{
+
+	if (groupmember(gid, cr))
+		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_SETGID));
+}
+
+int
+secpolicy_vnode_setid_retain(znode_t *zp, cred_t *cr,
+    boolean_t issuidroot __unused)
+{
+
+	if (secpolicy_fs_owner(ZTOV(zp)->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID));
+}
+
+void
+secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr)
+{
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return;
+
+	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) {
+		if (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)) {
+			vap->va_mask |= AT_MODE;
+			vap->va_mode &= ~(S_ISUID|S_ISGID);
+		}
+	}
+}
+
+int
+secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap,
+    const struct vattr *ovap, cred_t *cr)
+{
+	int error;
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+
+	/*
+	 * Privileged processes may set the sticky bit on non-directories,
+	 * as well as set the setgid bit on a file with a group that the process
+	 * is not a member of. Both of these are allowed in jail(8).
+	 */
+	if (vp->v_type != VDIR && (vap->va_mode & S_ISTXT)) {
+		if (spl_priv_check_cred(cr, PRIV_VFS_STICKYFILE))
+			return (EFTYPE);
+	}
+	/*
+	 * Check for privilege if attempting to set the
+	 * group-id bit.
+	 */
+	if ((vap->va_mode & S_ISGID) != 0) {
+		error = secpolicy_vnode_setids_setgids(vp, cr, ovap->va_gid);
+		if (error)
+			return (error);
+	}
+	/*
+	 * Deny setting setuid if we are not the file owner.
+	 */
+	if ((vap->va_mode & S_ISUID) && ovap->va_uid != cr->cr_uid) {
+		error = spl_priv_check_cred(cr, PRIV_VFS_ADMIN);
+		if (error)
+			return (error);
+	}
+	return (0);
+}
+
+int
+secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
+}
+
+int
+secpolicy_vnode_owner(vnode_t *vp, cred_t *cr, uid_t owner)
+{
+
+	if (owner == cr->cr_uid)
+		return (0);
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+
+	/* XXX: vfs_suser()? */
+	return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_OWNER));
+}
+
+int
+secpolicy_vnode_chown(vnode_t *vp, cred_t *cr, uid_t owner)
+{
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_CHOWN));
+}
+
+void
+secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp)
+{
+
+	if (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER) != 0) {
+		MNT_ILOCK(vfsp);
+		vfsp->vfs_flag |= VFS_NOSETUID | MNT_USER;
+		vfs_clearmntopt(vfsp, MNTOPT_SETUID);
+		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 0);
+		MNT_IUNLOCK(vfsp);
+	}
+}
+
+/*
+ * Check privileges for setting xvattr attributes
+ */
+int
+secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr,
+    vtype_t vtype)
+{
+
+	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+		return (0);
+	return (spl_priv_check_cred(cr, PRIV_VFS_SYSFLAGS));
+}
+
+int
+secpolicy_smb(cred_t *cr)
+{
+
+	return (spl_priv_check_cred(cr, PRIV_NETSMB));
+}

diff --git a/zfs/module/os/freebsd/spl/spl_procfs_list.c b/zfs/module/os/freebsd/spl/spl_procfs_list.c
new file mode 100644
index 0000000..e8448ce
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_procfs_list.c

@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/list.h>
+#include <sys/mutex.h>
+#include <sys/procfs_list.h>
+
+typedef struct procfs_list_iter {
+	procfs_list_t *pli_pl;
+	void *pli_elt;
+} pli_t;
+
+void
+seq_printf(struct seq_file *f, const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	(void) vsnprintf(f->sf_buf, f->sf_size, fmt, adx);
+	va_end(adx);
+}
+
+static int
+procfs_list_update(kstat_t *ksp, int rw)
+{
+	procfs_list_t *pl = ksp->ks_private;
+
+	if (rw == KSTAT_WRITE)
+		pl->pl_clear(pl);
+
+	return (0);
+}
+
+static int
+procfs_list_data(char *buf, size_t size, void *data)
+{
+	pli_t *p;
+	void *elt;
+	procfs_list_t *pl;
+	struct seq_file f;
+
+	p = data;
+	pl = p->pli_pl;
+	elt = p->pli_elt;
+	free(p, M_TEMP);
+	f.sf_buf = buf;
+	f.sf_size = size;
+	return (pl->pl_show(&f, elt));
+}
+
+static void *
+procfs_list_addr(kstat_t *ksp, loff_t n)
+{
+	procfs_list_t *pl = ksp->ks_private;
+	void *elt = ksp->ks_private1;
+	pli_t *p = NULL;
+
+
+	if (n == 0)
+		ksp->ks_private1 = list_head(&pl->pl_list);
+	else if (elt)
+		ksp->ks_private1 = list_next(&pl->pl_list, elt);
+
+	if (ksp->ks_private1) {
+		p = malloc(sizeof (*p), M_TEMP, M_WAITOK);
+		p->pli_pl = pl;
+		p->pli_elt = ksp->ks_private1;
+	}
+
+	return (p);
+}
+
+void
+procfs_list_install(const char *module,
+    const char *submodule,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off)
+{
+	kstat_t *procfs_kstat;
+
+	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&procfs_list->pl_list,
+	    procfs_list_node_off + sizeof (procfs_list_node_t),
+	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+	procfs_list->pl_show = show;
+	procfs_list->pl_show_header = show_header;
+	procfs_list->pl_clear = clear;
+	procfs_list->pl_next_id = 1;
+	procfs_list->pl_node_offset = procfs_list_node_off;
+
+	procfs_kstat =  kstat_create(module, 0, name, submodule,
+	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+
+	if (procfs_kstat) {
+		procfs_kstat->ks_lock = &procfs_list->pl_lock;
+		procfs_kstat->ks_ndata = UINT32_MAX;
+		procfs_kstat->ks_private = procfs_list;
+		procfs_kstat->ks_update = procfs_list_update;
+		kstat_set_seq_raw_ops(procfs_kstat, show_header,
+		    procfs_list_data, procfs_list_addr);
+		kstat_install(procfs_kstat);
+		procfs_list->pl_private = procfs_kstat;
+	}
+}
+
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{}
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+	ASSERT(list_is_empty(&procfs_list->pl_list));
+	kstat_delete(procfs_list->pl_private);
+	list_destroy(&procfs_list->pl_list);
+	mutex_destroy(&procfs_list->pl_lock);
+}
+
+#define	NODE_ID(procfs_list, obj) \
+		(((procfs_list_node_t *)(((char *)obj) + \
+		(procfs_list)->pl_node_offset))->pln_id)
+
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+	list_insert_tail(&procfs_list->pl_list, p);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_string.c b/zfs/module/os/freebsd/spl/spl_string.c
new file mode 100644
index 0000000..00b1df7
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_string.c

@@ -0,0 +1,107 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * $FreeBSD$
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/string.h>
+#include <sys/kmem.h>
+#include <machine/stdarg.h>
+
+#define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
+
+#define	IS_ALPHA(c)	\
+	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+
+char *
+strpbrk(const char *s, const char *b)
+{
+	const char *p;
+
+	do {
+		for (p = b; *p != '\0' && *p != *s; ++p)
+			;
+		if (*p != '\0')
+			return ((char *)s);
+	} while (*s++);
+
+	return (NULL);
+}
+
+/*
+ * Convert a string into a valid C identifier by replacing invalid
+ * characters with '_'.  Also makes sure the string is nul-terminated
+ * and takes up at most n bytes.
+ */
+void
+strident_canon(char *s, size_t n)
+{
+	char c;
+	char *end = s + n - 1;
+
+	if ((c = *s) == 0)
+		return;
+
+	if (!IS_ALPHA(c) && c != '_')
+		*s = '_';
+
+	while (s < end && ((c = *(++s)) != 0)) {
+		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
+			*s = '_';
+	}
+	*s = 0;
+}
+
+/*
+ * Do not change the length of the returned string; it must be freed
+ * with strfree().
+ */
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+	int size;
+	va_list adx;
+	char *buf;
+
+	va_start(adx, fmt);
+	size = vsnprintf(NULL, 0, fmt, adx) + 1;
+	va_end(adx);
+
+	buf = kmem_alloc(size, KM_SLEEP);
+
+	va_start(adx, fmt);
+	(void) vsnprintf(buf, size, fmt, adx);
+	va_end(adx);
+
+	return (buf);
+}
+
+void
+kmem_strfree(char *str)
+{
+	ASSERT3P(str, !=, NULL);
+	kmem_free(str, strlen(str) + 1);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_sunddi.c b/zfs/module/os/freebsd/spl/spl_sunddi.c
new file mode 100644
index 0000000..ebec77b
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_sunddi.c

@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
+#include <sys/misc.h>
+#include <sys/sunddi.h>
+#include <sys/sysctl.h>
+
+int
+ddi_strtol(const char *str, char **nptr, int base, long *result)
+{
+
+	*result = strtol(str, nptr, base);
+	return (0);
+}
+
+int
+ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
+{
+
+	if (str == hw_serial) {
+		*result = prison0.pr_hostid;
+		return (0);
+	}
+
+	*result = strtoul(str, nptr, base);
+	return (0);
+}
+
+int
+ddi_strtoull(const char *str, char **nptr, int base, unsigned long long *result)
+{
+
+	*result = (unsigned long long)strtouq(str, nptr, base);
+	return (0);
+}
+
+int
+ddi_strtoll(const char *str, char **nptr, int base, long long *result)
+{
+
+	*result = (long long)strtoq(str, nptr, base);
+	return (0);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_sysevent.c b/zfs/module/os/freebsd/spl/spl_sysevent.c
new file mode 100644
index 0000000..16188c7
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_sysevent.c

@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#include <sys/list.h>
+#include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/nvpair.h>
+#include <sys/sunddi.h>
+#include <sys/sysevent.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/util.h>
+#include <sys/bus.h>
+
+static int
+log_sysevent(nvlist_t *event)
+{
+	struct sbuf *sb;
+	const char *type;
+	char typestr[128];
+	nvpair_t *elem = NULL;
+
+	sb = sbuf_new_auto();
+	if (sb == NULL)
+		return (ENOMEM);
+	type = NULL;
+
+	while ((elem = nvlist_next_nvpair(event, elem)) != NULL) {
+		switch (nvpair_type(elem)) {
+		case DATA_TYPE_BOOLEAN:
+		{
+			boolean_t value;
+
+			(void) nvpair_value_boolean_value(elem, &value);
+			sbuf_printf(sb, " %s=%s", nvpair_name(elem),
+			    value ? "true" : "false");
+			break;
+		}
+		case DATA_TYPE_UINT8:
+		{
+			uint8_t value;
+
+			(void) nvpair_value_uint8(elem, &value);
+			sbuf_printf(sb, " %s=%hhu", nvpair_name(elem), value);
+			break;
+		}
+		case DATA_TYPE_INT32:
+		{
+			int32_t value;
+
+			(void) nvpair_value_int32(elem, &value);
+			sbuf_printf(sb, " %s=%jd", nvpair_name(elem),
+			    (intmax_t)value);
+			break;
+		}
+		case DATA_TYPE_UINT32:
+		{
+			uint32_t value;
+
+			(void) nvpair_value_uint32(elem, &value);
+			sbuf_printf(sb, " %s=%ju", nvpair_name(elem),
+			    (uintmax_t)value);
+			break;
+		}
+		case DATA_TYPE_INT64:
+		{
+			int64_t value;
+
+			(void) nvpair_value_int64(elem, &value);
+			sbuf_printf(sb, " %s=%jd", nvpair_name(elem),
+			    (intmax_t)value);
+			break;
+		}
+		case DATA_TYPE_UINT64:
+		{
+			uint64_t value;
+
+			(void) nvpair_value_uint64(elem, &value);
+			sbuf_printf(sb, " %s=%ju", nvpair_name(elem),
+			    (uintmax_t)value);
+			break;
+		}
+		case DATA_TYPE_STRING:
+		{
+			char *value;
+
+			(void) nvpair_value_string(elem, &value);
+			sbuf_printf(sb, " %s=%s", nvpair_name(elem), value);
+			if (strcmp(FM_CLASS, nvpair_name(elem)) == 0)
+				type = value;
+			break;
+		}
+		case DATA_TYPE_UINT8_ARRAY:
+		{
+			uint8_t *value;
+			uint_t ii, nelem;
+
+			(void) nvpair_value_uint8_array(elem, &value, &nelem);
+			sbuf_printf(sb, " %s=", nvpair_name(elem));
+			for (ii = 0; ii < nelem; ii++)
+				sbuf_printf(sb, "%02hhx", value[ii]);
+			break;
+		}
+		case DATA_TYPE_UINT16_ARRAY:
+		{
+			uint16_t *value;
+			uint_t ii, nelem;
+
+			(void) nvpair_value_uint16_array(elem, &value, &nelem);
+			sbuf_printf(sb, " %s=", nvpair_name(elem));
+			for (ii = 0; ii < nelem; ii++)
+				sbuf_printf(sb, "%04hx", value[ii]);
+			break;
+		}
+		case DATA_TYPE_UINT32_ARRAY:
+		{
+			uint32_t *value;
+			uint_t ii, nelem;
+
+			(void) nvpair_value_uint32_array(elem, &value, &nelem);
+			sbuf_printf(sb, " %s=", nvpair_name(elem));
+			for (ii = 0; ii < nelem; ii++)
+				sbuf_printf(sb, "%08jx", (uintmax_t)value[ii]);
+			break;
+		}
+		case DATA_TYPE_INT64_ARRAY:
+		{
+			int64_t *value;
+			uint_t ii, nelem;
+
+			(void) nvpair_value_int64_array(elem, &value, &nelem);
+			sbuf_printf(sb, " %s=", nvpair_name(elem));
+			for (ii = 0; ii < nelem; ii++)
+				sbuf_printf(sb, "%016lld",
+				    (long long)value[ii]);
+			break;
+		}
+		case DATA_TYPE_UINT64_ARRAY:
+		{
+			uint64_t *value;
+			uint_t ii, nelem;
+
+			(void) nvpair_value_uint64_array(elem, &value, &nelem);
+			sbuf_printf(sb, " %s=", nvpair_name(elem));
+			for (ii = 0; ii < nelem; ii++)
+				sbuf_printf(sb, "%016jx", (uintmax_t)value[ii]);
+			break;
+		}
+		case DATA_TYPE_STRING_ARRAY:
+		{
+			char **strarr;
+			uint_t ii, nelem;
+
+			(void) nvpair_value_string_array(elem, &strarr, &nelem);
+
+			for (ii = 0; ii < nelem; ii++) {
+				if (strarr[ii] == NULL)  {
+					sbuf_printf(sb, " <NULL>");
+					continue;
+				}
+
+				sbuf_printf(sb, " %s", strarr[ii]);
+				if (strcmp(FM_CLASS, strarr[ii]) == 0)
+					type = strarr[ii];
+			}
+			break;
+		}
+		case DATA_TYPE_NVLIST:
+			/* XXX - requires recursing in log_sysevent */
+			break;
+		default:
+			printf("%s: type %d is not implemented\n", __func__,
+			    nvpair_type(elem));
+			break;
+		}
+	}
+
+	if (sbuf_finish(sb) != 0) {
+		sbuf_delete(sb);
+		return (ENOMEM);
+	}
+
+	if (type == NULL)
+		type = "";
+	if (strncmp(type, "ESC_ZFS_", 8) == 0) {
+		snprintf(typestr, sizeof (typestr), "misc.fs.zfs.%s", type + 8);
+		type = typestr;
+	}
+	devctl_notify("ZFS", "ZFS", type, sbuf_data(sb));
+	sbuf_delete(sb);
+
+	return (0);
+}
+
+static void
+sysevent_worker(void *arg __unused)
+{
+	zfs_zevent_t *ze;
+	nvlist_t *event;
+	uint64_t dropped = 0;
+	uint64_t dst_size;
+	int error;
+
+	zfs_zevent_init(&ze);
+	for (;;) {
+		dst_size = 131072;
+		dropped = 0;
+		event = NULL;
+		error = zfs_zevent_next(ze, &event,
+		    &dst_size, &dropped);
+		if (error) {
+			error = zfs_zevent_wait(ze);
+			if (error == ESHUTDOWN)
+				break;
+		} else {
+			VERIFY3P(event, !=, NULL);
+			log_sysevent(event);
+			nvlist_free(event);
+		}
+	}
+
+	/*
+	 * We avoid zfs_zevent_destroy() here because we're otherwise racing
+	 * against fm_fini() destroying the zevent_lock.  zfs_zevent_destroy()
+	 * will currently only clear `ze->ze_zevent` from an event list then
+	 * free `ze`, so just inline the free() here -- events have already
+	 * been drained.
+	 */
+	VERIFY3P(ze->ze_zevent, ==, NULL);
+	kmem_free(ze, sizeof (zfs_zevent_t));
+
+	kthread_exit();
+}
+
+void
+ddi_sysevent_init(void)
+{
+	kproc_kthread_add(sysevent_worker, NULL, &system_proc, NULL, 0, 0,
+	    "zfskern", "sysevent");
+}

diff --git a/zfs/module/os/freebsd/spl/spl_taskq.c b/zfs/module/os/freebsd/spl/spl_taskq.c
new file mode 100644
index 0000000..3fa7939
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_taskq.c

@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2012 Spectra Logic Corporation.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/taskq.h>
+#include <sys/taskqueue.h>
+#include <sys/zfs_context.h>
+
+#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__)
+#include <machine/pcb.h>
+#endif
+
+#include <vm/uma.h>
+
+#if __FreeBSD_version < 1201522
+#define	taskqueue_start_threads_in_proc(tqp, count, pri, proc, name, ...) \
+    taskqueue_start_threads(tqp, count, pri, name, __VA_ARGS__)
+#endif
+
+static uint_t taskq_tsd;
+static uma_zone_t taskq_zone;
+
+taskq_t *system_taskq = NULL;
+taskq_t *system_delay_taskq = NULL;
+taskq_t *dynamic_taskq = NULL;
+
+proc_t *system_proc;
+
+extern int uma_align_cache;
+
+static MALLOC_DEFINE(M_TASKQ, "taskq", "taskq structures");
+
+static CK_LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl;
+static unsigned long tqenthash;
+static unsigned long tqenthashlock;
+static struct sx *tqenthashtbl_lock;
+
+static taskqid_t tqidnext;
+
+#define	TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash])
+#define	TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) & tqenthashlock)])
+
+#define	TIMEOUT_TASK 1
+#define	NORMAL_TASK 2
+
+static void
+system_taskq_init(void *arg)
+{
+	int i;
+
+	tsd_create(&taskq_tsd, NULL);
+	tqenthashtbl = hashinit(mp_ncpus * 8, M_TASKQ, &tqenthash);
+	tqenthashlock = (tqenthash + 1) / 8;
+	if (tqenthashlock > 0)
+		tqenthashlock--;
+	tqenthashtbl_lock =
+	    malloc(sizeof (*tqenthashtbl_lock) * (tqenthashlock + 1),
+	    M_TASKQ, M_WAITOK | M_ZERO);
+	for (i = 0; i < tqenthashlock + 1; i++)
+		sx_init_flags(&tqenthashtbl_lock[i], "tqenthash", SX_DUPOK);
+	taskq_zone = uma_zcreate("taskq_zone", sizeof (taskq_ent_t),
+	    NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_CACHE, 0);
+	system_taskq = taskq_create("system_taskq", mp_ncpus, minclsyspri,
+	    0, 0, 0);
+	system_delay_taskq = taskq_create("system_delay_taskq", mp_ncpus,
+	    minclsyspri, 0, 0, 0);
+}
+SYSINIT(system_taskq_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_init,
+    NULL);
+
+static void
+system_taskq_fini(void *arg)
+{
+	int i;
+
+	taskq_destroy(system_delay_taskq);
+	taskq_destroy(system_taskq);
+	uma_zdestroy(taskq_zone);
+	tsd_destroy(&taskq_tsd);
+	for (i = 0; i < tqenthashlock + 1; i++)
+		sx_destroy(&tqenthashtbl_lock[i]);
+	for (i = 0; i < tqenthash + 1; i++)
+		VERIFY(CK_LIST_EMPTY(&tqenthashtbl[i]));
+	free(tqenthashtbl_lock, M_TASKQ);
+	free(tqenthashtbl, M_TASKQ);
+}
+SYSUNINIT(system_taskq_fini, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_fini,
+    NULL);
+
+#ifdef __LP64__
+static taskqid_t
+__taskq_genid(void)
+{
+	taskqid_t tqid;
+
+	/*
+	 * Assume a 64-bit counter will not wrap in practice.
+	 */
+	tqid = atomic_add_64_nv(&tqidnext, 1);
+	VERIFY(tqid);
+	return (tqid);
+}
+#else
+static taskqid_t
+__taskq_genid(void)
+{
+	taskqid_t tqid;
+
+	for (;;) {
+		tqid = atomic_add_32_nv(&tqidnext, 1);
+		if (__predict_true(tqid != 0))
+			break;
+	}
+	VERIFY(tqid);
+	return (tqid);
+}
+#endif
+
+static taskq_ent_t *
+taskq_lookup(taskqid_t tqid)
+{
+	taskq_ent_t *ent = NULL;
+
+	sx_xlock(TQIDHASHLOCK(tqid));
+	CK_LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) {
+		if (ent->tqent_id == tqid)
+			break;
+	}
+	if (ent != NULL)
+		refcount_acquire(&ent->tqent_rc);
+	sx_xunlock(TQIDHASHLOCK(tqid));
+	return (ent);
+}
+
+static taskqid_t
+taskq_insert(taskq_ent_t *ent)
+{
+	taskqid_t tqid;
+
+	tqid = __taskq_genid();
+	ent->tqent_id = tqid;
+	ent->tqent_registered = B_TRUE;
+	sx_xlock(TQIDHASHLOCK(tqid));
+	CK_LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash);
+	sx_xunlock(TQIDHASHLOCK(tqid));
+	return (tqid);
+}
+
+static void
+taskq_remove(taskq_ent_t *ent)
+{
+	taskqid_t tqid = ent->tqent_id;
+
+	if (!ent->tqent_registered)
+		return;
+
+	sx_xlock(TQIDHASHLOCK(tqid));
+	CK_LIST_REMOVE(ent, tqent_hash);
+	sx_xunlock(TQIDHASHLOCK(tqid));
+	ent->tqent_registered = B_FALSE;
+}
+
+static void
+taskq_tsd_set(void *context)
+{
+	taskq_t *tq = context;
+
+#if defined(__amd64__) || defined(__i386__) || defined(__aarch64__)
+	if (context != NULL && tsd_get(taskq_tsd) == NULL)
+		fpu_kern_thread(FPU_KERN_NORMAL);
+#endif
+	tsd_set(taskq_tsd, tq);
+}
+
+static taskq_t *
+taskq_create_impl(const char *name, int nthreads, pri_t pri,
+    proc_t *proc __maybe_unused, uint_t flags)
+{
+	taskq_t *tq;
+
+	if ((flags & TASKQ_THREADS_CPU_PCT) != 0)
+		nthreads = MAX((mp_ncpus * nthreads) / 100, 1);
+
+	tq = kmem_alloc(sizeof (*tq), KM_SLEEP);
+	tq->tq_queue = taskqueue_create(name, M_WAITOK,
+	    taskqueue_thread_enqueue, &tq->tq_queue);
+	taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_INIT,
+	    taskq_tsd_set, tq);
+	taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN,
+	    taskq_tsd_set, NULL);
+	(void) taskqueue_start_threads_in_proc(&tq->tq_queue, nthreads, pri,
+	    proc, "%s", name);
+
+	return ((taskq_t *)tq);
+}
+
+taskq_t *
+taskq_create(const char *name, int nthreads, pri_t pri, int minalloc __unused,
+    int maxalloc __unused, uint_t flags)
+{
+	return (taskq_create_impl(name, nthreads, pri, system_proc, flags));
+}
+
+taskq_t *
+taskq_create_proc(const char *name, int nthreads, pri_t pri,
+    int minalloc __unused, int maxalloc __unused, proc_t *proc, uint_t flags)
+{
+	return (taskq_create_impl(name, nthreads, pri, proc, flags));
+}
+
+void
+taskq_destroy(taskq_t *tq)
+{
+
+	taskqueue_free(tq->tq_queue);
+	kmem_free(tq, sizeof (*tq));
+}
+
+int
+taskq_member(taskq_t *tq, kthread_t *thread)
+{
+
+	return (taskqueue_member(tq->tq_queue, thread));
+}
+
+taskq_t *
+taskq_of_curthread(void)
+{
+	return (tsd_get(taskq_tsd));
+}
+
+static void
+taskq_free(taskq_ent_t *task)
+{
+	taskq_remove(task);
+	if (refcount_release(&task->tqent_rc))
+		uma_zfree(taskq_zone, task);
+}
+
+int
+taskq_cancel_id(taskq_t *tq, taskqid_t tid)
+{
+	uint32_t pend;
+	int rc;
+	taskq_ent_t *ent;
+
+	if (tid == 0)
+		return (0);
+
+	if ((ent = taskq_lookup(tid)) == NULL)
+		return (0);
+
+	ent->tqent_cancelled = B_TRUE;
+	if (ent->tqent_type == TIMEOUT_TASK) {
+		rc = taskqueue_cancel_timeout(tq->tq_queue,
+		    &ent->tqent_timeout_task, &pend);
+	} else
+		rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
+	if (rc == EBUSY) {
+		taskqueue_drain(tq->tq_queue, &ent->tqent_task);
+	} else if (pend) {
+		/*
+		 * Tasks normally free themselves when run, but here the task
+		 * was cancelled so it did not free itself.
+		 */
+		taskq_free(ent);
+	}
+	/* Free the extra reference we added with taskq_lookup. */
+	taskq_free(ent);
+	return (rc);
+}
+
+static void
+taskq_run(void *arg, int pending __unused)
+{
+	taskq_ent_t *task = arg;
+
+	if (!task->tqent_cancelled)
+		task->tqent_func(task->tqent_arg);
+	taskq_free(task);
+}
+
+taskqid_t
+taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
+    uint_t flags, clock_t expire_time)
+{
+	taskq_ent_t *task;
+	taskqid_t tqid;
+	clock_t timo;
+	int mflag;
+
+	timo = expire_time - ddi_get_lbolt();
+	if (timo <= 0)
+		return (taskq_dispatch(tq, func, arg, flags));
+
+	if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP)
+		mflag = M_WAITOK;
+	else
+		mflag = M_NOWAIT;
+
+	task = uma_zalloc(taskq_zone, mflag);
+	if (task == NULL)
+		return (0);
+	task->tqent_func = func;
+	task->tqent_arg = arg;
+	task->tqent_type = TIMEOUT_TASK;
+	task->tqent_cancelled = B_FALSE;
+	refcount_init(&task->tqent_rc, 1);
+	tqid = taskq_insert(task);
+	TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
+	    taskq_run, task);
+
+	taskqueue_enqueue_timeout(tq->tq_queue, &task->tqent_timeout_task,
+	    timo);
+	return (tqid);
+}
+
+taskqid_t
+taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
+{
+	taskq_ent_t *task;
+	int mflag, prio;
+	taskqid_t tqid;
+
+	if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP)
+		mflag = M_WAITOK;
+	else
+		mflag = M_NOWAIT;
+	/*
+	 * If TQ_FRONT is given, we want higher priority for this task, so it
+	 * can go at the front of the queue.
+	 */
+	prio = !!(flags & TQ_FRONT);
+
+	task = uma_zalloc(taskq_zone, mflag);
+	if (task == NULL)
+		return (0);
+	refcount_init(&task->tqent_rc, 1);
+	task->tqent_func = func;
+	task->tqent_arg = arg;
+	task->tqent_cancelled = B_FALSE;
+	task->tqent_type = NORMAL_TASK;
+	tqid = taskq_insert(task);
+	TASK_INIT(&task->tqent_task, prio, taskq_run, task);
+	taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
+	return (tqid);
+}
+
+static void
+taskq_run_ent(void *arg, int pending __unused)
+{
+	taskq_ent_t *task = arg;
+
+	task->tqent_func(task->tqent_arg);
+}
+
+void
+taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
+    taskq_ent_t *task)
+{
+	int prio;
+
+	/*
+	 * If TQ_FRONT is given, we want higher priority for this task, so it
+	 * can go at the front of the queue.
+	 */
+	prio = !!(flags & TQ_FRONT);
+	task->tqent_cancelled = B_FALSE;
+	task->tqent_registered = B_FALSE;
+	task->tqent_id = 0;
+	task->tqent_func = func;
+	task->tqent_arg = arg;
+
+	TASK_INIT(&task->tqent_task, prio, taskq_run_ent, task);
+	taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
+}
+
+void
+taskq_wait(taskq_t *tq)
+{
+	taskqueue_quiesce(tq->tq_queue);
+}
+
+void
+taskq_wait_id(taskq_t *tq, taskqid_t tid)
+{
+	taskq_ent_t *ent;
+
+	if (tid == 0)
+		return;
+	if ((ent = taskq_lookup(tid)) == NULL)
+		return;
+
+	taskqueue_drain(tq->tq_queue, &ent->tqent_task);
+	taskq_free(ent);
+}
+
+void
+taskq_wait_outstanding(taskq_t *tq, taskqid_t id __unused)
+{
+	taskqueue_drain_all(tq->tq_queue);
+}
+
+int
+taskq_empty_ent(taskq_ent_t *t)
+{
+	return (t->tqent_task.ta_pending == 0);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_uio.c b/zfs/module/os/freebsd/spl/spl_uio.c
new file mode 100644
index 0000000..0bf251a
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_uio.c

@@ -0,0 +1,107 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved   */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/uio_impl.h>
+#include <sys/vnode.h>
+#include <sys/zfs_znode.h>
+
+int
+zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio)
+{
+	ASSERT3U(zfs_uio_rw(uio), ==, dir);
+	return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio)));
+}
+
+/*
+ * same as zfs_uiomove() but doesn't modify uio structure.
+ * return in cbytes how many bytes were copied.
+ */
+int
+zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
+{
+	struct iovec small_iovec[1];
+	struct uio small_uio_clone;
+	struct uio *uio_clone;
+	int error;
+
+	ASSERT3U(zfs_uio_rw(uio), ==, rw);
+	if (zfs_uio_iovcnt(uio) == 1) {
+		small_uio_clone = *(GET_UIO_STRUCT(uio));
+		small_iovec[0] = *(GET_UIO_STRUCT(uio)->uio_iov);
+		small_uio_clone.uio_iov = small_iovec;
+		uio_clone = &small_uio_clone;
+	} else {
+		uio_clone = cloneuio(GET_UIO_STRUCT(uio));
+	}
+
+	error = vn_io_fault_uiomove(p, n, uio_clone);
+	*cbytes = zfs_uio_resid(uio) - uio_clone->uio_resid;
+	if (uio_clone != &small_uio_clone)
+		free(uio_clone, M_IOV);
+	return (error);
+}
+
+/*
+ * Drop the next n chars out of *uiop.
+ */
+void
+zfs_uioskip(zfs_uio_t *uio, size_t n)
+{
+	zfs_uio_seg_t segflg;
+
+	/* For the full compatibility with illumos. */
+	if (n > zfs_uio_resid(uio))
+		return;
+
+	segflg = zfs_uio_segflg(uio);
+	zfs_uio_segflg(uio) = UIO_NOCOPY;
+	zfs_uiomove(NULL, n, zfs_uio_rw(uio), uio);
+	zfs_uio_segflg(uio) = segflg;
+}
+
+int
+zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio)
+{
+	ASSERT3U(zfs_uio_rw(uio), ==, dir);
+	return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio)));
+}

diff --git a/zfs/module/os/freebsd/spl/spl_vfs.c b/zfs/module/os/freebsd/spl/spl_vfs.c
new file mode 100644
index 0000000..53ef46f
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_vfs.c

@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/cred.h>
+#include <sys/vfs.h>
+#include <sys/priv.h>
+#include <sys/libkern.h>
+
+#include <sys/mutex.h>
+#include <sys/vnode.h>
+#include <sys/taskq.h>
+
+#include <sys/ccompat.h>
+
+MALLOC_DECLARE(M_MOUNT);
+
+void
+vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
+    int flags __unused)
+{
+	struct vfsopt *opt;
+	size_t namesize;
+	int locked;
+
+	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
+		MNT_ILOCK(vfsp);
+
+	if (vfsp->mnt_opt == NULL) {
+		void *opts;
+
+		MNT_IUNLOCK(vfsp);
+		opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
+		MNT_ILOCK(vfsp);
+		if (vfsp->mnt_opt == NULL) {
+			vfsp->mnt_opt = opts;
+			TAILQ_INIT(vfsp->mnt_opt);
+		} else {
+			free(opts, M_MOUNT);
+		}
+	}
+
+	MNT_IUNLOCK(vfsp);
+
+	opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
+	namesize = strlen(name) + 1;
+	opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
+	strlcpy(opt->name, name, namesize);
+	opt->pos = -1;
+	opt->seen = 1;
+	if (arg == NULL) {
+		opt->value = NULL;
+		opt->len = 0;
+	} else {
+		opt->len = strlen(arg) + 1;
+		opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
+		bcopy(arg, opt->value, opt->len);
+	}
+
+	MNT_ILOCK(vfsp);
+	TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
+	if (!locked)
+		MNT_IUNLOCK(vfsp);
+}
+
+void
+vfs_clearmntopt(vfs_t *vfsp, const char *name)
+{
+	int locked;
+
+	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
+		MNT_ILOCK(vfsp);
+	vfs_deleteopt(vfsp->mnt_opt, name);
+	if (!locked)
+		MNT_IUNLOCK(vfsp);
+}
+
+int
+vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
+{
+	struct vfsoptlist *opts = vfsp->mnt_optnew;
+	int error;
+
+	if (opts == NULL)
+		return (0);
+	error = vfs_getopt(opts, opt, (void **)argp, NULL);
+	return (error != 0 ? 0 : 1);
+}
+
+int
+mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
+    char *fspec, int fsflags)
+{
+	struct vfsconf *vfsp;
+	struct mount *mp;
+	vnode_t *vp, *mvp;
+	int error;
+
+	ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
+
+	vp = *vpp;
+	*vpp = NULL;
+	error = 0;
+
+	/*
+	 * Be ultra-paranoid about making sure the type and fspath
+	 * variables will fit in our mp buffers, including the
+	 * terminating NUL.
+	 */
+	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
+		error = ENAMETOOLONG;
+	if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
+		error = ENODEV;
+	if (error == 0 && vp->v_type != VDIR)
+		error = ENOTDIR;
+	/*
+	 * We need vnode lock to protect v_mountedhere and vnode interlock
+	 * to protect v_iflag.
+	 */
+	if (error == 0) {
+		VI_LOCK(vp);
+		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
+			vp->v_iflag |= VI_MOUNT;
+		else
+			error = EBUSY;
+		VI_UNLOCK(vp);
+	}
+	if (error != 0) {
+		vput(vp);
+		return (error);
+	}
+	vn_seqc_write_begin(vp);
+	VOP_UNLOCK1(vp);
+
+	/*
+	 * Allocate and initialize the filesystem.
+	 * We don't want regular user that triggered snapshot mount to be able
+	 * to unmount it, so pass credentials of the parent mount.
+	 */
+	mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
+
+	mp->mnt_optnew = NULL;
+	vfs_setmntopt(mp, "from", fspec, 0);
+	mp->mnt_optnew = mp->mnt_opt;
+	mp->mnt_opt = NULL;
+
+	/*
+	 * Set the mount level flags.
+	 */
+	mp->mnt_flag = fsflags & MNT_UPDATEMASK;
+	/*
+	 * Snapshots are always read-only.
+	 */
+	mp->mnt_flag |= MNT_RDONLY;
+	/*
+	 * We don't want snapshots to allow access to vulnerable setuid
+	 * programs, so we turn off setuid when mounting snapshots.
+	 */
+	mp->mnt_flag |= MNT_NOSUID;
+	/*
+	 * We don't want snapshots to be visible in regular
+	 * mount(8) and df(1) output.
+	 */
+	mp->mnt_flag |= MNT_IGNORE;
+
+	error = VFS_MOUNT(mp);
+	if (error != 0) {
+		/*
+		 * Clear VI_MOUNT and decrement the use count "atomically",
+		 * under the vnode lock.  This is not strictly required,
+		 * but makes it easier to reason about the life-cycle and
+		 * ownership of the covered vnode.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+		VI_LOCK(vp);
+		vp->v_iflag &= ~VI_MOUNT;
+		VI_UNLOCK(vp);
+		vn_seqc_write_end(vp);
+		vput(vp);
+		vfs_unbusy(mp);
+		vfs_freeopts(mp->mnt_optnew);
+		mp->mnt_vnodecovered = NULL;
+		vfs_mount_destroy(mp);
+		return (error);
+	}
+
+	if (mp->mnt_opt != NULL)
+		vfs_freeopts(mp->mnt_opt);
+	mp->mnt_opt = mp->mnt_optnew;
+	(void) VFS_STATFS(mp, &mp->mnt_stat);
+
+	/*
+	 * Prevent external consumers of mount options from reading
+	 * mnt_optnew.
+	 */
+	mp->mnt_optnew = NULL;
+
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+#ifdef FREEBSD_NAMECACHE
+	cache_purge(vp);
+#endif
+	VI_LOCK(vp);
+	vp->v_iflag &= ~VI_MOUNT;
+#ifdef VIRF_MOUNTPOINT
+	vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
+#endif
+	vp->v_mountedhere = mp;
+	VI_UNLOCK(vp);
+	/* Put the new filesystem on the mount list. */
+	mtx_lock(&mountlist_mtx);
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+	vfs_event_signal(NULL, VQ_MOUNT, 0);
+	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
+		panic("mount: lost mount");
+	vn_seqc_write_end(vp);
+	VOP_UNLOCK1(vp);
+#if __FreeBSD_version >= 1300048
+	vfs_op_exit(mp);
+#endif
+	vfs_unbusy(mp);
+	*vpp = mvp;
+	return (0);
+}
+
+/*
+ * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
+ * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
+ * the file system as a result of releasing the vnode. Note, file systems
+ * already have to handle the race where the vnode is incremented before the
+ * inactive routine is called and does its locking.
+ *
+ * Warning: Excessive use of this routine can lead to performance problems.
+ * This is because taskqs throttle back allocation if too many are created.
+ */
+void
+vn_rele_async(vnode_t *vp, taskq_t *taskq)
+{
+	VERIFY3U(vp->v_usecount, >, 0);
+	if (refcount_release_if_not_last(&vp->v_usecount)) {
+#if __FreeBSD_version < 1300045
+		vdrop(vp);
+#endif
+		return;
+	}
+	VERIFY3U(taskq_dispatch((taskq_t *)taskq,
+	    (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_vm.c b/zfs/module/os/freebsd/spl/spl_vm.c
new file mode 100644
index 0000000..739ddb0
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_vm.c

@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/counter.h>
+
+#include <sys/byteorder.h>
+#include <sys/lock.h>
+#include <sys/freebsd_rwlock.h>
+#include <sys/vm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
+const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
+const int zfs_vm_pagerret_ok = VM_PAGER_OK;
+const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
+const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;
+
+void
+zfs_vmobject_assert_wlocked(vm_object_t object)
+{
+
+	/*
+	 * This is not ideal because FILE/LINE used by assertions will not
+	 * be too helpful, but it must be an hard function for
+	 * compatibility reasons.
+	 */
+	VM_OBJECT_ASSERT_WLOCKED(object);
+}
+
+void
+zfs_vmobject_wlock(vm_object_t object)
+{
+
+	VM_OBJECT_WLOCK(object);
+}
+
+void
+zfs_vmobject_wunlock(vm_object_t object)
+{
+
+	VM_OBJECT_WUNLOCK(object);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_zlib.c b/zfs/module/os/freebsd/spl/spl_zlib.c
new file mode 100644
index 0000000..3644eba
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_zlib.c

@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/zmod.h>
+#if __FreeBSD_version >= 1300041
+#include <contrib/zlib/zlib.h>
+#else
+#include <sys/zlib.h>
+#endif
+#include <sys/kobj.h>
+
+
+/*ARGSUSED*/
+static void *
+zcalloc(void *opaque, uint_t items, uint_t size)
+{
+
+	return (malloc((size_t)items*size, M_SOLARIS, M_NOWAIT));
+}
+
+/*ARGSUSED*/
+static void
+zcfree(void *opaque, void *ptr)
+{
+
+	free(ptr, M_SOLARIS);
+}
+
+static int
+zlib_deflateInit(z_stream *stream, int level)
+{
+
+	stream->zalloc = zcalloc;
+	stream->opaque = NULL;
+	stream->zfree = zcfree;
+
+	return (deflateInit(stream, level));
+}
+
+static int
+zlib_deflate(z_stream *stream, int flush)
+{
+	return (deflate(stream, flush));
+}
+
+static int
+zlib_deflateEnd(z_stream *stream)
+{
+	return (deflateEnd(stream));
+}
+
+static int
+zlib_inflateInit(z_stream *stream)
+{
+	stream->zalloc = zcalloc;
+	stream->opaque = NULL;
+	stream->zfree = zcfree;
+
+	return (inflateInit(stream));
+}
+
+static int
+zlib_inflate(z_stream *stream, int finish)
+{
+#if __FreeBSD_version >= 1300024
+	return (inflate(stream, finish));
+#else
+	return (_zlib104_inflate(stream, finish));
+#endif
+}
+
+
+static int
+zlib_inflateEnd(z_stream *stream)
+{
+	return (inflateEnd(stream));
+}
+
+/*
+ * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
+ * and vfree for every call.  Using a kmem_cache also has the advantage
+ * that improves the odds that the memory used will be local to this cpu.
+ * To further improve things it might be wise to create a dedicated per-cpu
+ * workspace for use.  This would take some additional care because we then
+ * must disable preemption around the critical section, and verify that
+ * zlib_deflate* and zlib_inflate* never internally call schedule().
+ */
+static void *
+zlib_workspace_alloc(int flags)
+{
+	// return (kmem_cache_alloc(zlib_workspace_cache, flags));
+	return (NULL);
+}
+
+static void
+zlib_workspace_free(void *workspace)
+{
+	// kmem_cache_free(zlib_workspace_cache, workspace);
+}
+
+/*
+ * Compresses the source buffer into the destination buffer. The level
+ * parameter has the same meaning as in deflateInit.  sourceLen is the byte
+ * length of the source buffer. Upon entry, destLen is the total size of the
+ * destination buffer, which must be at least 0.1% larger than sourceLen plus
+ * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+ *
+ * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ * memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+ * Z_STREAM_ERROR if the level parameter is invalid.
+ */
+int
+z_compress_level(void *dest, size_t *destLen, const void *source,
+    size_t sourceLen, int level)
+{
+	z_stream stream;
+	int err;
+
+	bzero(&stream, sizeof (stream));
+	stream.next_in = (Byte *)source;
+	stream.avail_in = (uInt)sourceLen;
+	stream.next_out = dest;
+	stream.avail_out = (uInt)*destLen;
+	stream.opaque = NULL;
+
+	if ((size_t)stream.avail_out != *destLen)
+		return (Z_BUF_ERROR);
+
+	stream.opaque = zlib_workspace_alloc(KM_SLEEP);
+#if 0
+	if (!stream.opaque)
+		return (Z_MEM_ERROR);
+#endif
+	err = zlib_deflateInit(&stream, level);
+	if (err != Z_OK) {
+		zlib_workspace_free(stream.opaque);
+		return (err);
+	}
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		zlib_deflateEnd(&stream);
+		zlib_workspace_free(stream.opaque);
+		return (err == Z_OK ? Z_BUF_ERROR : err);
+	}
+	*destLen = stream.total_out;
+
+	err = zlib_deflateEnd(&stream);
+	zlib_workspace_free(stream.opaque);
+	return (err);
+}
+
+/*
+ * Decompresses the source buffer into the destination buffer.  sourceLen is
+ * the byte length of the source buffer. Upon entry, destLen is the total
+ * size of the destination buffer, which must be large enough to hold the
+ * entire uncompressed data. (The size of the uncompressed data must have
+ * been saved previously by the compressor and transmitted to the decompressor
+ * by some mechanism outside the scope of this compression library.)
+ * Upon exit, destLen is the actual size of the compressed buffer.
+ * This function can be used to decompress a whole file at once if the
+ * input file is mmap'ed.
+ *
+ * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ * enough memory, Z_BUF_ERROR if there was not enough room in the output
+ * buffer, or Z_DATA_ERROR if the input data was corrupted.
+ */
+int
+z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
+{
+	z_stream stream;
+	int err;
+
+	bzero(&stream, sizeof (stream));
+
+	stream.next_in = (Byte *)source;
+	stream.avail_in = (uInt)sourceLen;
+	stream.next_out = dest;
+	stream.avail_out = (uInt)*destLen;
+
+	if ((size_t)stream.avail_out != *destLen)
+		return (Z_BUF_ERROR);
+
+	stream.opaque = zlib_workspace_alloc(KM_SLEEP);
+#if 0
+	if (!stream.opaque)
+		return (Z_MEM_ERROR);
+#endif
+	err = zlib_inflateInit(&stream);
+	if (err != Z_OK) {
+		zlib_workspace_free(stream.opaque);
+		return (err);
+	}
+
+	err = zlib_inflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		zlib_inflateEnd(&stream);
+		zlib_workspace_free(stream.opaque);
+
+		if (err == Z_NEED_DICT ||
+		    (err == Z_BUF_ERROR && stream.avail_in == 0))
+			return (Z_DATA_ERROR);
+
+		return (err);
+	}
+	*destLen = stream.total_out;
+
+	err = zlib_inflateEnd(&stream);
+	zlib_workspace_free(stream.opaque);
+
+	return (err);
+}

diff --git a/zfs/module/os/freebsd/spl/spl_zone.c b/zfs/module/os/freebsd/spl/spl_zone.c
new file mode 100644
index 0000000..bd3f019
--- /dev/null
+++ b/zfs/module/os/freebsd/spl/spl_zone.c

@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/jail.h>
+#include <sys/osd.h>
+#include <sys/priv.h>
+#include <sys/zone.h>
+
+#include <sys/policy.h>
+
+static MALLOC_DEFINE(M_ZONES, "zones_data", "Zones data");
+
+/*
+ * Structure to record list of ZFS datasets exported to a zone.
+ */
+typedef struct zone_dataset {
+	LIST_ENTRY(zone_dataset) zd_next;
+	char	zd_dataset[0];
+} zone_dataset_t;
+
+LIST_HEAD(zone_dataset_head, zone_dataset);
+
+static int zone_slot;
+
+int
+zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
+{
+	struct zone_dataset_head *head;
+	zone_dataset_t *zd, *zd2;
+	struct prison *pr;
+	int dofree, error;
+
+	if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0)
+		return (error);
+
+	/* Allocate memory before we grab prison's mutex. */
+	zd = malloc(sizeof (*zd) + strlen(dataset) + 1, M_ZONES, M_WAITOK);
+
+	sx_slock(&allprison_lock);
+	pr = prison_find(jailid);	/* Locks &pr->pr_mtx. */
+	sx_sunlock(&allprison_lock);
+	if (pr == NULL) {
+		free(zd, M_ZONES);
+		return (ENOENT);
+	}
+
+	head = osd_jail_get(pr, zone_slot);
+	if (head != NULL) {
+		dofree = 0;
+		LIST_FOREACH(zd2, head, zd_next) {
+			if (strcmp(dataset, zd2->zd_dataset) == 0) {
+				free(zd, M_ZONES);
+				error = EEXIST;
+				goto end;
+			}
+		}
+	} else {
+		dofree = 1;
+		prison_hold_locked(pr);
+		mtx_unlock(&pr->pr_mtx);
+		head = malloc(sizeof (*head), M_ZONES, M_WAITOK);
+		LIST_INIT(head);
+		mtx_lock(&pr->pr_mtx);
+		error = osd_jail_set(pr, zone_slot, head);
+		KASSERT(error == 0, ("osd_jail_set() failed (error=%d)",
+		    error));
+	}
+	strcpy(zd->zd_dataset, dataset);
+	LIST_INSERT_HEAD(head, zd, zd_next);
+end:
+	if (dofree)
+		prison_free_locked(pr);
+	else
+		mtx_unlock(&pr->pr_mtx);
+	return (error);
+}
+
+int
+zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid)
+{
+	struct zone_dataset_head *head;
+	zone_dataset_t *zd;
+	struct prison *pr;
+	int error;
+
+	if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0)
+		return (error);
+
+	sx_slock(&allprison_lock);
+	pr = prison_find(jailid);
+	sx_sunlock(&allprison_lock);
+	if (pr == NULL)
+		return (ENOENT);
+	head = osd_jail_get(pr, zone_slot);
+	if (head == NULL) {
+		error = ENOENT;
+		goto end;
+	}
+	LIST_FOREACH(zd, head, zd_next) {
+		if (strcmp(dataset, zd->zd_dataset) == 0)
+			break;
+	}
+	if (zd == NULL)
+		error = ENOENT;
+	else {
+		LIST_REMOVE(zd, zd_next);
+		free(zd, M_ZONES);
+		if (LIST_EMPTY(head))
+			osd_jail_del(pr, zone_slot);
+		error = 0;
+	}
+end:
+	mtx_unlock(&pr->pr_mtx);
+	return (error);
+}
+
+/*
+ * Returns true if the named dataset is visible in the current zone.
+ * The 'write' parameter is set to 1 if the dataset is also writable.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+	struct zone_dataset_head *head;
+	zone_dataset_t *zd;
+	struct prison *pr;
+	size_t len;
+	int ret = 0;
+
+	if (dataset[0] == '\0')
+		return (0);
+	if (INGLOBALZONE(curproc)) {
+		if (write != NULL)
+			*write = 1;
+		return (1);
+	}
+	pr = curthread->td_ucred->cr_prison;
+	mtx_lock(&pr->pr_mtx);
+	head = osd_jail_get(pr, zone_slot);
+	if (head == NULL)
+		goto end;
+
+	/*
+	 * Walk the list once, looking for datasets which match exactly, or
+	 * specify a dataset underneath an exported dataset.  If found, return
+	 * true and note that it is writable.
+	 */
+	LIST_FOREACH(zd, head, zd_next) {
+		len = strlen(zd->zd_dataset);
+		if (strlen(dataset) >= len &&
+		    bcmp(dataset, zd->zd_dataset, len) == 0 &&
+		    (dataset[len] == '\0' || dataset[len] == '/' ||
+		    dataset[len] == '@')) {
+			if (write)
+				*write = 1;
+			ret = 1;
+			goto end;
+		}
+	}
+
+	/*
+	 * Walk the list a second time, searching for datasets which are parents
+	 * of exported datasets.  These should be visible, but read-only.
+	 *
+	 * Note that we also have to support forms such as 'pool/dataset/', with
+	 * a trailing slash.
+	 */
+	LIST_FOREACH(zd, head, zd_next) {
+		len = strlen(dataset);
+		if (dataset[len - 1] == '/')
+			len--;	/* Ignore trailing slash */
+		if (len < strlen(zd->zd_dataset) &&
+		    bcmp(dataset, zd->zd_dataset, len) == 0 &&
+		    zd->zd_dataset[len] == '/') {
+			if (write)
+				*write = 0;
+			ret = 1;
+			goto end;
+		}
+	}
+end:
+	mtx_unlock(&pr->pr_mtx);
+	return (ret);
+}
+
+static void
+zone_destroy(void *arg)
+{
+	struct zone_dataset_head *head;
+	zone_dataset_t *zd;
+
+	head = arg;
+	while ((zd = LIST_FIRST(head)) != NULL) {
+		LIST_REMOVE(zd, zd_next);
+		free(zd, M_ZONES);
+	}
+	free(head, M_ZONES);
+}
+
+uint32_t
+zone_get_hostid(void *ptr)
+{
+
+	KASSERT(ptr == NULL, ("only NULL pointer supported in %s", __func__));
+
+	return ((uint32_t)curthread->td_ucred->cr_prison->pr_hostid);
+}
+
+static void
+zone_sysinit(void *arg __unused)
+{
+
+	zone_slot = osd_jail_register(zone_destroy, NULL);
+}
+
+static void
+zone_sysuninit(void *arg __unused)
+{
+
+	osd_jail_deregister(zone_slot);
+}
+
+SYSINIT(zone_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysinit, NULL);
+SYSUNINIT(zone_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysuninit, NULL);

diff --git a/zfs/module/os/freebsd/zfs/abd_os.c b/zfs/module/os/freebsd/zfs/abd_os.c
new file mode 100644
index 0000000..ff7f112
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/abd_os.c

@@ -0,0 +1,505 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+/*
+ * See abd.c for a general overview of the arc buffered data (ABD).
+ *
+ * Using a large proportion of scattered ABDs decreases ARC fragmentation since
+ * when we are at the limit of allocatable space, using equal-size chunks will
+ * allow us to quickly reclaim enough space for a new large allocation (assuming
+ * it is also scattered).
+ *
+ * ABDs are allocated scattered by default unless the caller uses
+ * abd_alloc_linear() or zfs_abd_scatter_enabled is disabled.
+ */
+
+#include <sys/abd_impl.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/zio.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_znode.h>
+
+typedef struct abd_stats {
+	kstat_named_t abdstat_struct_size;
+	kstat_named_t abdstat_scatter_cnt;
+	kstat_named_t abdstat_scatter_data_size;
+	kstat_named_t abdstat_scatter_chunk_waste;
+	kstat_named_t abdstat_linear_cnt;
+	kstat_named_t abdstat_linear_data_size;
+} abd_stats_t;
+
+static abd_stats_t abd_stats = {
+	/* Amount of memory occupied by all of the abd_t struct allocations */
+	{ "struct_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The number of scatter ABDs which are currently allocated, excluding
+	 * ABDs which don't own their data (for instance the ones which were
+	 * allocated through abd_get_offset()).
+	 */
+	{ "scatter_cnt",			KSTAT_DATA_UINT64 },
+	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
+	{ "scatter_data_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The amount of space wasted at the end of the last chunk across all
+	 * scatter ABDs tracked by scatter_cnt.
+	 */
+	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 },
+	/*
+	 * The number of linear ABDs which are currently allocated, excluding
+	 * ABDs which don't own their data (for instance the ones which were
+	 * allocated through abd_get_offset() and abd_get_from_buf()). If an
+	 * ABD takes ownership of its buf then it will become tracked.
+	 */
+	{ "linear_cnt",				KSTAT_DATA_UINT64 },
+	/* Amount of data stored in all linear ABDs tracked by linear_cnt */
+	{ "linear_data_size",			KSTAT_DATA_UINT64 },
+};
+
+struct {
+	wmsum_t abdstat_struct_size;
+	wmsum_t abdstat_scatter_cnt;
+	wmsum_t abdstat_scatter_data_size;
+	wmsum_t abdstat_scatter_chunk_waste;
+	wmsum_t abdstat_linear_cnt;
+	wmsum_t abdstat_linear_data_size;
+} abd_sums;
+
+/*
+ * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
+ * ABD's for.  Smaller allocations will use linear ABD's which use
+ * zio_[data_]buf_alloc().
+ *
+ * Scatter ABD's use at least one page each, so sub-page allocations waste
+ * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
+ * half of each page).  Using linear ABD's for small allocations means that
+ * they will be put on slabs which contain many allocations.
+ *
+ * Linear ABDs for multi-page allocations are easier to use, and in some cases
+ * it allows to avoid buffer copying.  But allocation and especially free
+ * of multi-page linear ABDs are expensive operations due to KVA mapping and
+ * unmapping, and with time they cause KVA fragmentations.
+ */
+size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
+
+#if defined(_KERNEL)
+SYSCTL_DECL(_vfs_zfs);
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
+	&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
+SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
+	&zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
+#endif
+
+kmem_cache_t *abd_chunk_cache;
+static kstat_t *abd_ksp;
+
+/*
+ * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
+ * just a single zero'd page-sized buffer. This allows us to conserve
+ * memory by only using a single zero buffer for the scatter chunks.
+ */
+abd_t *abd_zero_scatter = NULL;
+
+static uint_t
+abd_chunkcnt_for_bytes(size_t size)
+{
+	return ((size + PAGE_MASK) >> PAGE_SHIFT);
+}
+
+static inline uint_t
+abd_scatter_chunkcnt(abd_t *abd)
+{
+	ASSERT(!abd_is_linear(abd));
+	return (abd_chunkcnt_for_bytes(
+	    ABD_SCATTER(abd).abd_offset + abd->abd_size));
+}
+
+boolean_t
+abd_size_alloc_linear(size_t size)
+{
+	return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
+}
+
+void
+abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
+{
+	uint_t n = abd_scatter_chunkcnt(abd);
+	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
+	int waste = (n << PAGE_SHIFT) - abd->abd_size;
+	if (op == ABDSTAT_INCR) {
+		ABDSTAT_BUMP(abdstat_scatter_cnt);
+		ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
+		ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
+		arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
+	} else {
+		ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+		ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
+		ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
+		arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
+	}
+}
+
+void
+abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
+{
+	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
+	if (op == ABDSTAT_INCR) {
+		ABDSTAT_BUMP(abdstat_linear_cnt);
+		ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
+	} else {
+		ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
+		ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+	}
+}
+
+void
+abd_verify_scatter(abd_t *abd)
+{
+	uint_t i, n;
+
+	/*
+	 * There is no scatter linear pages in FreeBSD so there is
+	 * an error if the ABD has been marked as a linear page.
+	 */
+	ASSERT(!abd_is_linear_page(abd));
+	ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
+	n = abd_scatter_chunkcnt(abd);
+	for (i = 0; i < n; i++) {
+		ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
+	}
+}
+
+void
+abd_alloc_chunks(abd_t *abd, size_t size)
+{
+	uint_t i, n;
+
+	n = abd_chunkcnt_for_bytes(size);
+	for (i = 0; i < n; i++) {
+		ABD_SCATTER(abd).abd_chunks[i] =
+		    kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
+	}
+}
+
+void
+abd_free_chunks(abd_t *abd)
+{
+	uint_t i, n;
+
+	n = abd_scatter_chunkcnt(abd);
+	for (i = 0; i < n; i++) {
+		kmem_cache_free(abd_chunk_cache,
+		    ABD_SCATTER(abd).abd_chunks[i]);
+	}
+}
+
+abd_t *
+abd_alloc_struct_impl(size_t size)
+{
+	uint_t chunkcnt = abd_chunkcnt_for_bytes(size);
+	/*
+	 * In the event we are allocating a gang ABD, the size passed in
+	 * will be 0. We must make sure to set abd_size to the size of an
+	 * ABD struct as opposed to an ABD scatter with 0 chunks. The gang
+	 * ABD struct allocation accounts for an additional 24 bytes over
+	 * a scatter ABD with 0 chunks.
+	 */
+	size_t abd_size = MAX(sizeof (abd_t),
+	    offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
+	abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
+	ASSERT3P(abd, !=, NULL);
+	ABDSTAT_INCR(abdstat_struct_size, abd_size);
+
+	return (abd);
+}
+
+void
+abd_free_struct_impl(abd_t *abd)
+{
+	uint_t chunkcnt = abd_is_linear(abd) || abd_is_gang(abd) ? 0 :
+	    abd_scatter_chunkcnt(abd);
+	ssize_t size = MAX(sizeof (abd_t),
+	    offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
+	kmem_free(abd, size);
+	ABDSTAT_INCR(abdstat_struct_size, -size);
+}
+
+/*
+ * Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
+ * each chunk in the scatterlist will be set to the same area.
+ */
+_Static_assert(ZERO_REGION_SIZE >= PAGE_SIZE, "zero_region too small");
+static void
+abd_alloc_zero_scatter(void)
+{
+	uint_t i, n;
+
+	n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
+	abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
+	abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
+	abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
+
+	ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
+
+	for (i = 0; i < n; i++) {
+		ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
+		    __DECONST(void *, zero_region);
+	}
+
+	ABDSTAT_BUMP(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
+}
+
+static void
+abd_free_zero_scatter(void)
+{
+	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
+
+	abd_free_struct(abd_zero_scatter);
+	abd_zero_scatter = NULL;
+}
+
+static int
+abd_kstats_update(kstat_t *ksp, int rw)
+{
+	abd_stats_t *as = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+	as->abdstat_struct_size.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_struct_size);
+	as->abdstat_scatter_cnt.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_cnt);
+	as->abdstat_scatter_data_size.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_data_size);
+	as->abdstat_scatter_chunk_waste.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
+	as->abdstat_linear_cnt.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_linear_cnt);
+	as->abdstat_linear_data_size.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_linear_data_size);
+	return (0);
+}
+
+void
+abd_init(void)
+{
+	abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
+	    NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
+
+	wmsum_init(&abd_sums.abdstat_struct_size, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
+	wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
+	wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
+
+	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
+	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+	if (abd_ksp != NULL) {
+		abd_ksp->ks_data = &abd_stats;
+		abd_ksp->ks_update = abd_kstats_update;
+		kstat_install(abd_ksp);
+	}
+
+	abd_alloc_zero_scatter();
+}
+
+void
+abd_fini(void)
+{
+	abd_free_zero_scatter();
+
+	if (abd_ksp != NULL) {
+		kstat_delete(abd_ksp);
+		abd_ksp = NULL;
+	}
+
+	wmsum_fini(&abd_sums.abdstat_struct_size);
+	wmsum_fini(&abd_sums.abdstat_scatter_cnt);
+	wmsum_fini(&abd_sums.abdstat_scatter_data_size);
+	wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
+	wmsum_fini(&abd_sums.abdstat_linear_cnt);
+	wmsum_fini(&abd_sums.abdstat_linear_data_size);
+
+	kmem_cache_destroy(abd_chunk_cache);
+	abd_chunk_cache = NULL;
+}
+
+void
+abd_free_linear_page(abd_t *abd)
+{
+	/*
+	 * FreeBSD does not have scatter linear pages
+	 * so there is an error.
+	 */
+	VERIFY(0);
+}
+
+/*
+ * If we're going to use this ABD for doing I/O using the block layer, the
+ * consumer of the ABD data doesn't care if it's scattered or not, and we don't
+ * plan to store this ABD in memory for a long period of time, we should
+ * allocate the ABD type that requires the least data copying to do the I/O.
+ *
+ * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
+ * using a scatter/gather list we should switch to that and replace this call
+ * with vanilla abd_alloc().
+ */
+abd_t *
+abd_alloc_for_io(size_t size, boolean_t is_metadata)
+{
+	return (abd_alloc_linear(size, is_metadata));
+}
+
+abd_t *
+abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
+    size_t size)
+{
+	abd_verify(sabd);
+	ASSERT3U(off, <=, sabd->abd_size);
+
+	size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
+	size_t chunkcnt = abd_chunkcnt_for_bytes(
+	    (new_offset & PAGE_MASK) + size);
+
+	ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
+
+	/*
+	 * If an abd struct is provided, it is only the minimum size.  If we
+	 * need additional chunks, we need to allocate a new struct.
+	 */
+	if (abd != NULL &&
+	    offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]) >
+	    sizeof (abd_t)) {
+		abd = NULL;
+	}
+
+	if (abd == NULL)
+		abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
+
+	/*
+	 * Even if this buf is filesystem metadata, we only track that
+	 * if we own the underlying data buffer, which is not true in
+	 * this case. Therefore, we don't ever use ABD_FLAG_META here.
+	 */
+
+	ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
+
+	/* Copy the scatterlist starting at the correct offset */
+	(void) memcpy(&ABD_SCATTER(abd).abd_chunks,
+	    &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
+	    chunkcnt * sizeof (void *));
+
+	return (abd);
+}
+
+/*
+ * Initialize the abd_iter.
+ */
+void
+abd_iter_init(struct abd_iter *aiter, abd_t *abd)
+{
+	ASSERT(!abd_is_gang(abd));
+	abd_verify(abd);
+	aiter->iter_abd = abd;
+	aiter->iter_pos = 0;
+	aiter->iter_mapaddr = NULL;
+	aiter->iter_mapsize = 0;
+}
+
+/*
+ * This is just a helper function to see if we have exhausted the
+ * abd_iter and reached the end.
+ */
+boolean_t
+abd_iter_at_end(struct abd_iter *aiter)
+{
+	return (aiter->iter_pos == aiter->iter_abd->abd_size);
+}
+
+/*
+ * Advance the iterator by a certain amount. Cannot be called when a chunk is
+ * in use. This can be safely called when the aiter has already exhausted, in
+ * which case this does nothing.
+ */
+void
+abd_iter_advance(struct abd_iter *aiter, size_t amount)
+{
+	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+	ASSERT0(aiter->iter_mapsize);
+
+	/* There's nothing left to advance to, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	aiter->iter_pos += amount;
+}
+
+/*
+ * Map the current chunk into aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+void
+abd_iter_map(struct abd_iter *aiter)
+{
+	void *paddr;
+
+	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+	ASSERT0(aiter->iter_mapsize);
+
+	/* There's nothing left to iterate over, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	abd_t *abd = aiter->iter_abd;
+	size_t offset = aiter->iter_pos;
+	if (abd_is_linear(abd)) {
+		aiter->iter_mapsize = abd->abd_size - offset;
+		paddr = ABD_LINEAR_BUF(abd);
+	} else {
+		offset += ABD_SCATTER(abd).abd_offset;
+		paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
+		offset &= PAGE_MASK;
+		aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
+		    abd->abd_size - aiter->iter_pos);
+	}
+	aiter->iter_mapaddr = (char *)paddr + offset;
+}
+
+/*
+ * Unmap the current chunk from aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+void
+abd_iter_unmap(struct abd_iter *aiter)
+{
+	if (!abd_iter_at_end(aiter)) {
+		ASSERT3P(aiter->iter_mapaddr, !=, NULL);
+		ASSERT3U(aiter->iter_mapsize, >, 0);
+	}
+
+	aiter->iter_mapaddr = NULL;
+	aiter->iter_mapsize = 0;
+}
+
+void
+abd_cache_reap_now(void)
+{
+	kmem_cache_reap_soon(abd_chunk_cache);
+}

diff --git a/zfs/module/os/freebsd/zfs/arc_os.c b/zfs/module/os/freebsd/zfs/arc_os.c
new file mode 100644
index 0000000..3dd49f0
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/arc_os.c

@@ -0,0 +1,278 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/spa_impl.h>
+#include <sys/counter.h>
+#include <sys/zio_compress.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <sys/arc.h>
+#include <sys/zfs_refcount.h>
+#include <sys/vdev.h>
+#include <sys/vdev_trim.h>
+#include <sys/vdev_impl.h>
+#include <sys/dsl_pool.h>
+#include <sys/zio_checksum.h>
+#include <sys/multilist.h>
+#include <sys/abd.h>
+#include <sys/zil.h>
+#include <sys/fm/fs/zfs.h>
+#include <sys/eventhandler.h>
+#include <sys/callb.h>
+#include <sys/kstat.h>
+#include <sys/zthr.h>
+#include <zfs_fletcher.h>
+#include <sys/arc_impl.h>
+#include <sys/sdt.h>
+#include <sys/aggsum.h>
+#include <sys/vnode.h>
+#include <cityhash.h>
+#include <machine/vmparam.h>
+#include <sys/vm.h>
+#include <sys/vmmeter.h>
+
+#if __FreeBSD_version >= 1300139
+static struct sx arc_vnlru_lock;
+static struct vnode *arc_vnlru_marker;
+#endif
+
+extern struct vfsops zfs_vfsops;
+
+uint_t zfs_arc_free_target = 0;
+
+static void
+arc_free_target_init(void *unused __unused)
+{
+	zfs_arc_free_target = vm_cnt.v_free_target;
+}
+SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
+    arc_free_target_init, NULL);
+
+/*
+ * We don't have a tunable for arc_free_target due to the dependency on
+ * pagedaemon initialisation.
+ */
+static int
+sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS)
+{
+	uint_t val;
+	int err;
+
+	val = zfs_arc_free_target;
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val < minfree)
+		return (EINVAL);
+	if (val > vm_cnt.v_page_count)
+		return (EINVAL);
+
+	zfs_arc_free_target = val;
+
+	return (0);
+}
+SYSCTL_DECL(_vfs_zfs);
+/* BEGIN CSTYLED */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
+    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint_t),
+    sysctl_vfs_zfs_arc_free_target, "IU",
+    "Desired number of free pages below which ARC triggers reclaim");
+/* END CSTYLED */
+
+int64_t
+arc_available_memory(void)
+{
+	int64_t lowest = INT64_MAX;
+	int64_t n __unused;
+
+	/*
+	 * Cooperate with pagedaemon when it's time for it to scan
+	 * and reclaim some pages.
+	 */
+	n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
+	if (n < lowest) {
+		lowest = n;
+	}
+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
+	/*
+	 * If we're on an i386 platform, it's possible that we'll exhaust the
+	 * kernel heap space before we ever run out of available physical
+	 * memory.  Most checks of the size of the heap_area compare against
+	 * tune.t_minarmem, which is the minimum available real memory that we
+	 * can have in the system.  However, this is generally fixed at 25 pages
+	 * which is so low that it's useless.  In this comparison, we seek to
+	 * calculate the total heap-size, and reclaim if more than 3/4ths of the
+	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
+	 * free)
+	 */
+	n = uma_avail() - (long)(uma_limit() / 4);
+	if (n < lowest) {
+		lowest = n;
+	}
+#endif
+
+	DTRACE_PROBE1(arc__available_memory, int64_t, lowest);
+	return (lowest);
+}
+
+/*
+ * Return a default max arc size based on the amount of physical memory.
+ */
+uint64_t
+arc_default_max(uint64_t min, uint64_t allmem)
+{
+	uint64_t size;
+
+	if (allmem >= 1 << 30)
+		size = allmem - (1 << 30);
+	else
+		size = min;
+	return (MAX(allmem * 5 / 8, size));
+}
+
+/*
+ * Helper function for arc_prune_async() it is responsible for safely
+ * handling the execution of a registered arc_prune_func_t.
+ */
+static void
+arc_prune_task(void *arg)
+{
+	int64_t nr_scan = (intptr_t)arg;
+
+#ifndef __ILP32__
+	if (nr_scan > INT_MAX)
+		nr_scan = INT_MAX;
+#endif
+
+#if __FreeBSD_version >= 1300139
+	sx_xlock(&arc_vnlru_lock);
+	vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
+	sx_xunlock(&arc_vnlru_lock);
+#else
+	vnlru_free(nr_scan, &zfs_vfsops);
+#endif
+}
+
+/*
+ * Notify registered consumers they must drop holds on a portion of the ARC
+ * buffered they reference.  This provides a mechanism to ensure the ARC can
+ * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers.  This
+ * is analogous to dnlc_reduce_cache() but more generic.
+ *
+ * This operation is performed asynchronously so it may be safely called
+ * in the context of the arc_reclaim_thread().  A reference is taken here
+ * for each registered arc_prune_t and the arc_prune_task() is responsible
+ * for releasing it once the registered arc_prune_func_t has completed.
+ */
+void
+arc_prune_async(int64_t adjust)
+{
+
+#ifndef __LP64__
+	if (adjust > INTPTR_MAX)
+		adjust = INTPTR_MAX;
+#endif
+	taskq_dispatch(arc_prune_taskq, arc_prune_task,
+	    (void *)(intptr_t)adjust, TQ_SLEEP);
+	ARCSTAT_BUMP(arcstat_prune);
+}
+
+uint64_t
+arc_all_memory(void)
+{
+	return (ptob(physmem));
+}
+
+int
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
+{
+	return (0);
+}
+
+uint64_t
+arc_free_memory(void)
+{
+	return (ptob(freemem));
+}
+
+static eventhandler_tag arc_event_lowmem = NULL;
+
+static void
+arc_lowmem(void *arg __unused, int howto __unused)
+{
+	int64_t free_memory, to_free;
+
+	arc_no_grow = B_TRUE;
+	arc_warm = B_TRUE;
+	arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
+	free_memory = arc_available_memory();
+	int64_t can_free = arc_c - arc_c_min;
+	if (can_free <= 0)
+		return;
+	to_free = (can_free >> arc_shrink_shift) - MIN(free_memory, 0);
+	DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
+	arc_reduce_target_size(to_free);
+
+	/*
+	 * It is unsafe to block here in arbitrary threads, because we can come
+	 * here from ARC itself and may hold ARC locks and thus risk a deadlock
+	 * with ARC reclaim thread.
+	 */
+	if (curproc == pageproc)
+		arc_wait_for_eviction(to_free, B_FALSE);
+}
+
+void
+arc_lowmem_init(void)
+{
+	arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
+	    EVENTHANDLER_PRI_FIRST);
+#if __FreeBSD_version >= 1300139
+	arc_vnlru_marker = vnlru_alloc_marker();
+	sx_init(&arc_vnlru_lock, "arc vnlru lock");
+#endif
+}
+
+void
+arc_lowmem_fini(void)
+{
+	if (arc_event_lowmem != NULL)
+		EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
+#if __FreeBSD_version >= 1300139
+	if (arc_vnlru_marker != NULL) {
+		vnlru_free_marker(arc_vnlru_marker);
+		sx_destroy(&arc_vnlru_lock);
+	}
+#endif
+}
+
+void
+arc_register_hotplug(void)
+{
+}
+
+void
+arc_unregister_hotplug(void)
+{
+}

diff --git a/zfs/module/os/freebsd/zfs/crypto_os.c b/zfs/module/os/freebsd/zfs/crypto_os.c
new file mode 100644
index 0000000..f342c5e
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/crypto_os.c

@@ -0,0 +1,641 @@
+/*
+ * Copyright (c) 2005-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2018 Sean Eric Fagan <sef@ixsystems.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Portions of this file are derived from sys/geom/eli/g_eli_hmac.c
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+
+#ifdef _KERNEL
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <opencrypto/cryptodev.h>
+#include <opencrypto/xform.h>
+#else
+#include <strings.h>
+#endif
+
+#include <sys/zio_crypt.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+
+#include <sys/freebsd_crypto.h>
+
+#define	SHA512_HMAC_BLOCK_SIZE	128
+
+static int crypt_sessions = 0;
+SYSCTL_DECL(_vfs_zfs);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, crypt_sessions, CTLFLAG_RD,
+	&crypt_sessions, 0, "Number of cryptographic sessions created");
+
+void
+crypto_mac_init(struct hmac_ctx *ctx, const crypto_key_t *c_key)
+{
+	uint8_t k_ipad[SHA512_HMAC_BLOCK_SIZE],
+	    k_opad[SHA512_HMAC_BLOCK_SIZE],
+	    key[SHA512_HMAC_BLOCK_SIZE];
+	SHA512_CTX lctx;
+	int i;
+	size_t cl_bytes = CRYPTO_BITS2BYTES(c_key->ck_length);
+
+	/*
+	 * This code is based on the similar code in geom/eli/g_eli_hmac.c
+	 */
+	explicit_bzero(key, sizeof (key));
+	if (c_key->ck_length  == 0)
+		/* do nothing */;
+	else if (cl_bytes <= SHA512_HMAC_BLOCK_SIZE)
+		bcopy(c_key->ck_data, key, cl_bytes);
+	else {
+		/*
+		 * If key is longer than 128 bytes reset it to
+		 * key = SHA512(key).
+		 */
+		SHA512_Init(&lctx);
+		SHA512_Update(&lctx, c_key->ck_data, cl_bytes);
+		SHA512_Final(key, &lctx);
+	}
+
+	/* XOR key with ipad and opad values. */
+	for (i = 0; i < sizeof (key); i++) {
+		k_ipad[i] = key[i] ^ 0x36;
+		k_opad[i] = key[i] ^ 0x5c;
+	}
+	explicit_bzero(key, sizeof (key));
+
+	/* Start inner SHA512. */
+	SHA512_Init(&ctx->innerctx);
+	SHA512_Update(&ctx->innerctx, k_ipad, sizeof (k_ipad));
+	explicit_bzero(k_ipad, sizeof (k_ipad));
+	/* Start outer SHA512. */
+	SHA512_Init(&ctx->outerctx);
+	SHA512_Update(&ctx->outerctx, k_opad, sizeof (k_opad));
+	explicit_bzero(k_opad, sizeof (k_opad));
+}
+
+void
+crypto_mac_update(struct hmac_ctx *ctx, const void *data, size_t datasize)
+{
+	SHA512_Update(&ctx->innerctx, data, datasize);
+}
+
+void
+crypto_mac_final(struct hmac_ctx *ctx, void *md, size_t mdsize)
+{
+	uint8_t digest[SHA512_DIGEST_LENGTH];
+
+	/* Complete inner hash */
+	SHA512_Final(digest, &ctx->innerctx);
+
+	/* Complete outer hash */
+	SHA512_Update(&ctx->outerctx, digest, sizeof (digest));
+	SHA512_Final(digest, &ctx->outerctx);
+
+	explicit_bzero(ctx, sizeof (*ctx));
+	/* mdsize == 0 means "Give me the whole hash!" */
+	if (mdsize == 0)
+		mdsize = SHA512_DIGEST_LENGTH;
+	bcopy(digest, md, mdsize);
+	explicit_bzero(digest, sizeof (digest));
+}
+
+void
+crypto_mac(const crypto_key_t *key, const void *in_data, size_t in_data_size,
+    void *out_data, size_t out_data_size)
+{
+	struct hmac_ctx ctx;
+
+	crypto_mac_init(&ctx, key);
+	crypto_mac_update(&ctx, in_data, in_data_size);
+	crypto_mac_final(&ctx, out_data, out_data_size);
+}
+
+static int
+freebsd_zfs_crypt_done(struct cryptop *crp)
+{
+	freebsd_crypt_session_t *ses;
+
+	ses = crp->crp_opaque;
+	mtx_lock(&ses->fs_lock);
+	ses->fs_done = true;
+	mtx_unlock(&ses->fs_lock);
+	wakeup(crp);
+	return (0);
+}
+
+static int
+freebsd_zfs_crypt_done_sync(struct cryptop *crp)
+{
+
+	return (0);
+}
+
+void
+freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
+{
+	mtx_destroy(&sess->fs_lock);
+	crypto_freesession(sess->fs_sid);
+	explicit_bzero(sess, sizeof (*sess));
+}
+
+static int
+zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
+{
+	int error;
+
+	crp->crp_opaque = session;
+	for (;;) {
+#if __FreeBSD_version < 1400004
+		boolean_t async = ((crypto_ses2caps(crp->crp_session) &
+		    CRYPTOCAP_F_SYNC) == 0);
+#else
+		boolean_t async = !CRYPTO_SESS_SYNC(crp->crp_session);
+#endif
+		crp->crp_callback = async ? freebsd_zfs_crypt_done :
+		    freebsd_zfs_crypt_done_sync;
+		error = crypto_dispatch(crp);
+		if (error == 0) {
+			if (async) {
+				mtx_lock(&session->fs_lock);
+				while (session->fs_done == false) {
+					msleep(crp, &session->fs_lock, 0,
+					    "zfs_crypto", 0);
+				}
+				mtx_unlock(&session->fs_lock);
+			}
+			error = crp->crp_etype;
+		}
+
+		if (error == ENOMEM) {
+			pause("zcrnomem", 1);
+		} else if (error != EAGAIN) {
+			break;
+		}
+		crp->crp_etype = 0;
+		crp->crp_flags &= ~CRYPTO_F_DONE;
+		session->fs_done = false;
+#if __FreeBSD_version < 1300087
+		/*
+		 * Session ID changed, so we should record that,
+		 * and try again
+		 */
+		session->fs_sid = crp->crp_session;
+#endif
+	}
+	return (error);
+}
+static void
+freebsd_crypt_uio_debug_log(boolean_t encrypt,
+    freebsd_crypt_session_t *input_sessionp,
+    struct zio_crypt_info *c_info,
+    zfs_uio_t *data_uio,
+    crypto_key_t *key,
+    uint8_t *ivbuf,
+    size_t datalen,
+    size_t auth_len)
+{
+#ifdef FCRYPTO_DEBUG
+	struct cryptodesc *crd;
+	uint8_t *p = NULL;
+	size_t total = 0;
+
+	printf("%s(%s, %p, { %s, %d, %d, %s }, %p, { %d, %p, %u }, "
+	    "%p, %u, %u)\n",
+	    __FUNCTION__, encrypt ? "encrypt" : "decrypt", input_sessionp,
+	    c_info->ci_algname, c_info->ci_crypt_type,
+	    (unsigned int)c_info->ci_keylen, c_info->ci_name,
+	    data_uio, key->ck_format, key->ck_data,
+	    (unsigned int)key->ck_length,
+	    ivbuf, (unsigned int)datalen, (unsigned int)auth_len);
+	printf("\tkey = { ");
+	for (int i = 0; i < key->ck_length / 8; i++) {
+		uint8_t *b = (uint8_t *)key->ck_data;
+		printf("%02x ", b[i]);
+	}
+	printf("}\n");
+	for (int i = 0; i < zfs_uio_iovcnt(data_uio); i++) {
+		printf("\tiovec #%d: <%p, %u>\n", i,
+		    zfs_uio_iovbase(data_uio, i),
+		    (unsigned int)zfs_uio_iovlen(data_uio, i));
+		total += zfs_uio_iovlen(data_uio, i);
+	}
+	zfs_uio_resid(data_uio) = total;
+#endif
+}
+/*
+ * Create a new cryptographic session.  This should
+ * happen every time the key changes (including when
+ * it's first loaded).
+ */
+#if __FreeBSD_version >= 1300087
+int
+freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
+    struct zio_crypt_info *c_info, crypto_key_t *key)
+{
+	struct crypto_session_params csp;
+	int error = 0;
+
+#ifdef FCRYPTO_DEBUG
+	printf("%s(%p, { %s, %d, %d, %s }, { %d, %p, %u })\n",
+	    __FUNCTION__, sessp,
+	    c_info->ci_algname, c_info->ci_crypt_type,
+	    (unsigned int)c_info->ci_keylen, c_info->ci_name,
+	    key->ck_format, key->ck_data, (unsigned int)key->ck_length);
+	printf("\tkey = { ");
+	for (int i = 0; i < key->ck_length / 8; i++) {
+		uint8_t *b = (uint8_t *)key->ck_data;
+		printf("%02x ", b[i]);
+	}
+	printf("}\n");
+#endif
+	bzero(&csp, sizeof (csp));
+	csp.csp_mode = CSP_MODE_AEAD;
+	csp.csp_cipher_key = key->ck_data;
+	csp.csp_cipher_klen = key->ck_length / 8;
+	switch (c_info->ci_crypt_type) {
+		case ZC_TYPE_GCM:
+		csp.csp_cipher_alg = CRYPTO_AES_NIST_GCM_16;
+		csp.csp_ivlen = AES_GCM_IV_LEN;
+		switch (key->ck_length/8) {
+		case AES_128_GMAC_KEY_LEN:
+		case AES_192_GMAC_KEY_LEN:
+		case AES_256_GMAC_KEY_LEN:
+			break;
+		default:
+			error = EINVAL;
+			goto bad;
+		}
+		break;
+	case ZC_TYPE_CCM:
+		csp.csp_cipher_alg = CRYPTO_AES_CCM_16;
+		csp.csp_ivlen = AES_CCM_IV_LEN;
+		switch (key->ck_length/8) {
+		case AES_128_CBC_MAC_KEY_LEN:
+		case AES_192_CBC_MAC_KEY_LEN:
+		case AES_256_CBC_MAC_KEY_LEN:
+			break;
+		default:
+			error = EINVAL;
+			goto bad;
+			break;
+		}
+		break;
+	default:
+		error = ENOTSUP;
+		goto bad;
+	}
+
+	/*
+	 * Disable the use of hardware drivers on FreeBSD 13 and later since
+	 * common crypto offload drivers impose constraints on AES-GCM AAD
+	 * lengths that make them unusable for ZFS, and we currently do not have
+	 * a mechanism to fall back to a software driver for requests not
+	 * handled by a hardware driver.
+	 *
+	 * On 12 we continue to permit the use of hardware drivers since
+	 * CPU-accelerated drivers such as aesni(4) register themselves as
+	 * hardware drivers.
+	 */
+	error = crypto_newsession(&sessp->fs_sid, &csp, CRYPTOCAP_F_SOFTWARE);
+	mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock",
+	    NULL, MTX_DEF);
+	crypt_sessions++;
+bad:
+#ifdef FCRYPTO_DEBUG
+	if (error)
+		printf("%s: returning error %d\n", __FUNCTION__, error);
+#endif
+	return (error);
+}
+
+int
+freebsd_crypt_uio(boolean_t encrypt,
+    freebsd_crypt_session_t *input_sessionp,
+    struct zio_crypt_info *c_info,
+    zfs_uio_t *data_uio,
+    crypto_key_t *key,
+    uint8_t *ivbuf,
+    size_t datalen,
+    size_t auth_len)
+{
+	struct cryptop *crp;
+	freebsd_crypt_session_t *session = NULL;
+	int error = 0;
+	size_t total = 0;
+
+	freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
+	    key, ivbuf, datalen, auth_len);
+	for (int i = 0; i < zfs_uio_iovcnt(data_uio); i++)
+		total += zfs_uio_iovlen(data_uio, i);
+	zfs_uio_resid(data_uio) = total;
+	if (input_sessionp == NULL) {
+		session = kmem_zalloc(sizeof (*session), KM_SLEEP);
+		error = freebsd_crypt_newsession(session, c_info, key);
+		if (error)
+			goto out;
+	} else
+		session = input_sessionp;
+
+	crp = crypto_getreq(session->fs_sid, M_WAITOK);
+	if (encrypt) {
+		crp->crp_op = CRYPTO_OP_ENCRYPT |
+		    CRYPTO_OP_COMPUTE_DIGEST;
+	} else {
+		crp->crp_op = CRYPTO_OP_DECRYPT |
+		    CRYPTO_OP_VERIFY_DIGEST;
+	}
+	crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_IV_SEPARATE;
+	crypto_use_uio(crp, GET_UIO_STRUCT(data_uio));
+
+	crp->crp_aad_start = 0;
+	crp->crp_aad_length = auth_len;
+	crp->crp_payload_start = auth_len;
+	crp->crp_payload_length = datalen;
+	crp->crp_digest_start = auth_len + datalen;
+
+	bcopy(ivbuf, crp->crp_iv, ZIO_DATA_IV_LEN);
+	error = zfs_crypto_dispatch(session, crp);
+	crypto_freereq(crp);
+out:
+#ifdef FCRYPTO_DEBUG
+	if (error)
+		printf("%s: returning error %d\n", __FUNCTION__, error);
+#endif
+	if (input_sessionp == NULL) {
+		freebsd_crypt_freesession(session);
+		kmem_free(session, sizeof (*session));
+	}
+	return (error);
+}
+
+#else
+int
+freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
+    struct zio_crypt_info *c_info, crypto_key_t *key)
+{
+	struct cryptoini cria, crie, *crip;
+	struct enc_xform *xform;
+	struct auth_hash *xauth;
+	int error = 0;
+	crypto_session_t sid;
+
+#ifdef FCRYPTO_DEBUG
+	printf("%s(%p, { %s, %d, %d, %s }, { %d, %p, %u })\n",
+	    __FUNCTION__, sessp,
+	    c_info->ci_algname, c_info->ci_crypt_type,
+	    (unsigned int)c_info->ci_keylen, c_info->ci_name,
+	    key->ck_format, key->ck_data, (unsigned int)key->ck_length);
+	printf("\tkey = { ");
+	for (int i = 0; i < key->ck_length / 8; i++) {
+		uint8_t *b = (uint8_t *)key->ck_data;
+		printf("%02x ", b[i]);
+	}
+	printf("}\n");
+#endif
+	switch (c_info->ci_crypt_type) {
+	case ZC_TYPE_GCM:
+		xform = &enc_xform_aes_nist_gcm;
+		switch (key->ck_length/8) {
+		case AES_128_GMAC_KEY_LEN:
+			xauth = &auth_hash_nist_gmac_aes_128;
+			break;
+		case AES_192_GMAC_KEY_LEN:
+			xauth = &auth_hash_nist_gmac_aes_192;
+			break;
+		case AES_256_GMAC_KEY_LEN:
+			xauth = &auth_hash_nist_gmac_aes_256;
+			break;
+		default:
+			error = EINVAL;
+			goto bad;
+		}
+		break;
+	case ZC_TYPE_CCM:
+		xform = &enc_xform_ccm;
+		switch (key->ck_length/8) {
+		case AES_128_CBC_MAC_KEY_LEN:
+			xauth = &auth_hash_ccm_cbc_mac_128;
+			break;
+		case AES_192_CBC_MAC_KEY_LEN:
+			xauth = &auth_hash_ccm_cbc_mac_192;
+			break;
+		case AES_256_CBC_MAC_KEY_LEN:
+			xauth = &auth_hash_ccm_cbc_mac_256;
+			break;
+		default:
+			error = EINVAL;
+			goto bad;
+			break;
+		}
+		break;
+	default:
+		error = ENOTSUP;
+		goto bad;
+	}
+#ifdef FCRYPTO_DEBUG
+	printf("%s(%d): Using crypt %s (key length %u [%u bytes]), "
+	    "auth %s (key length %d)\n",
+	    __FUNCTION__, __LINE__,
+	    xform->name, (unsigned int)key->ck_length,
+	    (unsigned int)key->ck_length/8,
+	    xauth->name, xauth->keysize);
+#endif
+
+	bzero(&crie, sizeof (crie));
+	bzero(&cria, sizeof (cria));
+
+	crie.cri_alg = xform->type;
+	crie.cri_key = key->ck_data;
+	crie.cri_klen = key->ck_length;
+
+	cria.cri_alg = xauth->type;
+	cria.cri_key = key->ck_data;
+	cria.cri_klen = key->ck_length;
+
+	cria.cri_next = &crie;
+	crie.cri_next = NULL;
+	crip = &cria;
+	// Everything else is bzero'd
+
+	error = crypto_newsession(&sid, crip,
+	    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
+	if (error != 0) {
+		printf("%s(%d):  crypto_newsession failed with %d\n",
+		    __FUNCTION__, __LINE__, error);
+		goto bad;
+	}
+	sessp->fs_sid = sid;
+	mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock",
+	    NULL, MTX_DEF);
+	crypt_sessions++;
+bad:
+	return (error);
+}
+
+/*
+ * The meat of encryption/decryption.
+ * If sessp is NULL, then it will create a
+ * temporary cryptographic session, and release
+ * it when done.
+ */
+int
+freebsd_crypt_uio(boolean_t encrypt,
+    freebsd_crypt_session_t *input_sessionp,
+    struct zio_crypt_info *c_info,
+    zfs_uio_t *data_uio,
+    crypto_key_t *key,
+    uint8_t *ivbuf,
+    size_t datalen,
+    size_t auth_len)
+{
+	struct cryptop *crp;
+	struct cryptodesc *enc_desc, *auth_desc;
+	struct enc_xform *xform;
+	struct auth_hash *xauth;
+	freebsd_crypt_session_t *session = NULL;
+	int error;
+
+	freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
+	    key, ivbuf, datalen, auth_len);
+	switch (c_info->ci_crypt_type) {
+	case ZC_TYPE_GCM:
+		xform = &enc_xform_aes_nist_gcm;
+		switch (key->ck_length/8) {
+		case AES_128_GMAC_KEY_LEN:
+			xauth = &auth_hash_nist_gmac_aes_128;
+			break;
+		case AES_192_GMAC_KEY_LEN:
+			xauth = &auth_hash_nist_gmac_aes_192;
+			break;
+		case AES_256_GMAC_KEY_LEN:
+			xauth = &auth_hash_nist_gmac_aes_256;
+			break;
+		default:
+			error = EINVAL;
+			goto bad;
+		}
+		break;
+	case ZC_TYPE_CCM:
+		xform = &enc_xform_ccm;
+		switch (key->ck_length/8) {
+		case AES_128_CBC_MAC_KEY_LEN:
+			xauth = &auth_hash_ccm_cbc_mac_128;
+			break;
+		case AES_192_CBC_MAC_KEY_LEN:
+			xauth = &auth_hash_ccm_cbc_mac_192;
+			break;
+		case AES_256_CBC_MAC_KEY_LEN:
+			xauth = &auth_hash_ccm_cbc_mac_256;
+			break;
+		default:
+			error = EINVAL;
+			goto bad;
+			break;
+		}
+		break;
+	default:
+		error = ENOTSUP;
+		goto bad;
+	}
+
+#ifdef FCRYPTO_DEBUG
+	printf("%s(%d): Using crypt %s (key length %u [%u bytes]), "
+	    "auth %s (key length %d)\n",
+	    __FUNCTION__, __LINE__,
+	    xform->name, (unsigned int)key->ck_length,
+	    (unsigned int)key->ck_length/8,
+	    xauth->name, xauth->keysize);
+#endif
+
+	if (input_sessionp == NULL) {
+		session = kmem_zalloc(sizeof (*session), KM_SLEEP);
+		error = freebsd_crypt_newsession(session, c_info, key);
+		if (error)
+			goto out;
+	} else
+		session = input_sessionp;
+
+	crp = crypto_getreq(2);
+	if (crp == NULL) {
+		error = ENOMEM;
+		goto bad;
+	}
+
+	auth_desc = crp->crp_desc;
+	enc_desc = auth_desc->crd_next;
+
+	crp->crp_session = session->fs_sid;
+	crp->crp_ilen = auth_len + datalen;
+	crp->crp_buf = (void*)GET_UIO_STRUCT(data_uio);
+	crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC;
+
+	auth_desc->crd_skip = 0;
+	auth_desc->crd_len = auth_len;
+	auth_desc->crd_inject = auth_len + datalen;
+	auth_desc->crd_alg = xauth->type;
+#ifdef FCRYPTO_DEBUG
+	printf("%s: auth: skip = %u, len = %u, inject = %u\n",
+	    __FUNCTION__, auth_desc->crd_skip, auth_desc->crd_len,
+	    auth_desc->crd_inject);
+#endif
+
+	enc_desc->crd_skip = auth_len;
+	enc_desc->crd_len = datalen;
+	enc_desc->crd_inject = auth_len;
+	enc_desc->crd_alg = xform->type;
+	enc_desc->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
+	bcopy(ivbuf, enc_desc->crd_iv, ZIO_DATA_IV_LEN);
+	enc_desc->crd_next = NULL;
+
+#ifdef FCRYPTO_DEBUG
+	printf("%s: enc: skip = %u, len = %u, inject = %u\n",
+	    __FUNCTION__, enc_desc->crd_skip, enc_desc->crd_len,
+	    enc_desc->crd_inject);
+#endif
+
+	if (encrypt)
+		enc_desc->crd_flags |= CRD_F_ENCRYPT;
+
+	error = zfs_crypto_dispatch(session, crp);
+	crypto_freereq(crp);
+out:
+	if (input_sessionp == NULL) {
+		freebsd_crypt_freesession(session);
+		kmem_free(session, sizeof (*session));
+	}
+bad:
+#ifdef FCRYPTO_DEBUG
+	if (error)
+		printf("%s: returning error %d\n", __FUNCTION__, error);
+#endif
+	return (error);
+}
+#endif

diff --git a/zfs/module/os/freebsd/zfs/dmu_os.c b/zfs/module/os/freebsd/zfs/dmu_os.c
new file mode 100644
index 0000000..2cf54a3
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/dmu_os.c

@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/dmu.h>
+#include <sys/dmu_impl.h>
+#include <sys/dmu_tx.h>
+#include <sys/dbuf.h>
+#include <sys/dnode.h>
+#include <sys/zfs_context.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dsl_prop.h>
+#include <sys/dmu_zfetch.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zap.h>
+#include <sys/zio_checksum.h>
+#include <sys/zio_compress.h>
+#include <sys/sa.h>
+#include <sys/zfeature.h>
+#include <sys/abd.h>
+#include <sys/zfs_rlock.h>
+#include <sys/racct.h>
+#include <sys/vm.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vnops.h>
+
+#include <sys/ccompat.h>
+
+#ifndef IDX_TO_OFF
+#define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
+#endif
+
+#if  __FreeBSD_version < 1300051
+#define	VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
+#else
+#define	VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
+#endif
+
+
+#if __FreeBSD_version < 1300072
+#define	dmu_page_lock(m)	vm_page_lock(m)
+#define	dmu_page_unlock(m)	vm_page_unlock(m)
+#else
+#define	dmu_page_lock(m)
+#define	dmu_page_unlock(m)
+#endif
+
+static int
+dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
+    uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+{
+	dnode_t *dn;
+	int err;
+
+	err = dnode_hold(os, object, FTAG, &dn);
+	if (err)
+		return (err);
+
+	err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
+	    numbufsp, dbpp, DMU_READ_PREFETCH);
+
+	dnode_rele(dn, FTAG);
+
+	return (err);
+}
+
+int
+dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+    vm_page_t *ma, dmu_tx_t *tx)
+{
+	dmu_buf_t **dbp;
+	struct sf_buf *sf;
+	int numbufs, i;
+	int err;
+
+	if (size == 0)
+		return (0);
+
+	err = dmu_buf_hold_array(os, object, offset, size,
+	    FALSE, FTAG, &numbufs, &dbp);
+	if (err)
+		return (err);
+
+	for (i = 0; i < numbufs; i++) {
+		int tocpy, copied, thiscpy;
+		int bufoff;
+		dmu_buf_t *db = dbp[i];
+		caddr_t va;
+
+		ASSERT3U(size, >, 0);
+		ASSERT3U(db->db_size, >=, PAGESIZE);
+
+		bufoff = offset - db->db_offset;
+		tocpy = (int)MIN(db->db_size - bufoff, size);
+
+		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
+
+		if (tocpy == db->db_size)
+			dmu_buf_will_fill(db, tx);
+		else
+			dmu_buf_will_dirty(db, tx);
+
+		for (copied = 0; copied < tocpy; copied += PAGESIZE) {
+			ASSERT3U(ptoa((*ma)->pindex), ==,
+			    db->db_offset + bufoff);
+			thiscpy = MIN(PAGESIZE, tocpy - copied);
+			va = zfs_map_page(*ma, &sf);
+			bcopy(va, (char *)db->db_data + bufoff, thiscpy);
+			zfs_unmap_page(sf);
+			ma += 1;
+			bufoff += PAGESIZE;
+		}
+
+		if (tocpy == db->db_size)
+			dmu_buf_fill_done(db, tx);
+
+		offset += tocpy;
+		size -= tocpy;
+	}
+	dmu_buf_rele_array(dbp, numbufs, FTAG);
+	return (err);
+}
+
+int
+dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
+    int *rbehind, int *rahead, int last_size)
+{
+	struct sf_buf *sf;
+	vm_object_t vmobj;
+	vm_page_t m;
+	dmu_buf_t **dbp;
+	dmu_buf_t *db;
+	caddr_t va;
+	int numbufs, i;
+	int bufoff, pgoff, tocpy;
+	int mi, di;
+	int err;
+
+	ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
+	ASSERT3S(last_size, <=, PAGE_SIZE);
+
+	err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
+	    IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
+	if (err != 0)
+		return (err);
+
+#ifdef ZFS_DEBUG
+	IMPLY(last_size < PAGE_SIZE, *rahead == 0);
+	if (dbp[0]->db_offset != 0 || numbufs > 1) {
+		for (i = 0; i < numbufs; i++) {
+			ASSERT(ISP2(dbp[i]->db_size));
+			ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0);
+			ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
+		}
+	}
+#endif
+
+	vmobj = ma[0]->object;
+	zfs_vmobject_wlock_12(vmobj);
+
+	db = dbp[0];
+	for (i = 0; i < *rbehind; i++) {
+		m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
+		if (m == NULL)
+			break;
+		if (!vm_page_none_valid(m)) {
+			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
+			vm_page_do_sunbusy(m);
+			break;
+		}
+		ASSERT3U(m->dirty, ==, 0);
+		ASSERT(!pmap_page_is_write_mapped(m));
+
+		ASSERT3U(db->db_size, >, PAGE_SIZE);
+		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
+		va = zfs_map_page(m, &sf);
+		bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
+		zfs_unmap_page(sf);
+		vm_page_valid(m);
+		dmu_page_lock(m);
+		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
+			vm_page_activate(m);
+		else
+			vm_page_deactivate(m);
+		dmu_page_unlock(m);
+		vm_page_do_sunbusy(m);
+	}
+	*rbehind = i;
+
+	bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
+	pgoff = 0;
+	for (mi = 0, di = 0; mi < count && di < numbufs; ) {
+		if (pgoff == 0) {
+			m = ma[mi];
+			if (m != bogus_page) {
+				vm_page_assert_xbusied(m);
+				ASSERT(vm_page_none_valid(m));
+				ASSERT3U(m->dirty, ==, 0);
+				ASSERT(!pmap_page_is_write_mapped(m));
+				va = zfs_map_page(m, &sf);
+			}
+		}
+		if (bufoff == 0)
+			db = dbp[di];
+
+		if (m != bogus_page) {
+			ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
+			    db->db_offset + bufoff);
+		}
+
+		/*
+		 * We do not need to clamp the copy size by the file
+		 * size as the last block is zero-filled beyond the
+		 * end of file anyway.
+		 */
+		tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
+		ASSERT3S(tocpy, >=, 0);
+		if (m != bogus_page)
+			bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy);
+
+		pgoff += tocpy;
+		ASSERT3S(pgoff, >=, 0);
+		ASSERT3S(pgoff, <=, PAGESIZE);
+		if (pgoff == PAGESIZE) {
+			if (m != bogus_page) {
+				zfs_unmap_page(sf);
+				vm_page_valid(m);
+			}
+			ASSERT3S(mi, <, count);
+			mi++;
+			pgoff = 0;
+		}
+
+		bufoff += tocpy;
+		ASSERT3S(bufoff, >=, 0);
+		ASSERT3S(bufoff, <=, db->db_size);
+		if (bufoff == db->db_size) {
+			ASSERT3S(di, <, numbufs);
+			di++;
+			bufoff = 0;
+		}
+	}
+
+#ifdef ZFS_DEBUG
+	/*
+	 * Three possibilities:
+	 * - last requested page ends at a buffer boundary and , thus,
+	 *   all pages and buffers have been iterated;
+	 * - all requested pages are filled, but the last buffer
+	 *   has not been exhausted;
+	 *   the read-ahead is possible only in this case;
+	 * - all buffers have been read, but the last page has not been
+	 *   fully filled;
+	 *   this is only possible if the file has only a single buffer
+	 *   with a size that is not a multiple of the page size.
+	 */
+	if (mi == count) {
+		ASSERT3S(di, >=, numbufs - 1);
+		IMPLY(*rahead != 0, di == numbufs - 1);
+		IMPLY(*rahead != 0, bufoff != 0);
+		ASSERT0(pgoff);
+	}
+	if (di == numbufs) {
+		ASSERT3S(mi, >=, count - 1);
+		ASSERT0(*rahead);
+		IMPLY(pgoff == 0, mi == count);
+		if (pgoff != 0) {
+			ASSERT3S(mi, ==, count - 1);
+			ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
+		}
+	}
+#endif
+	if (pgoff != 0) {
+		ASSERT3P(m, !=, bogus_page);
+		bzero(va + pgoff, PAGESIZE - pgoff);
+		zfs_unmap_page(sf);
+		vm_page_valid(m);
+	}
+
+	for (i = 0; i < *rahead; i++) {
+		m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
+		if (m == NULL)
+			break;
+		if (!vm_page_none_valid(m)) {
+			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
+			vm_page_do_sunbusy(m);
+			break;
+		}
+		ASSERT3U(m->dirty, ==, 0);
+		ASSERT(!pmap_page_is_write_mapped(m));
+
+		ASSERT3U(db->db_size, >, PAGE_SIZE);
+		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
+		tocpy = MIN(db->db_size - bufoff, PAGESIZE);
+		va = zfs_map_page(m, &sf);
+		bcopy((char *)db->db_data + bufoff, va, tocpy);
+		if (tocpy < PAGESIZE) {
+			ASSERT3S(i, ==, *rahead - 1);
+			ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
+			bzero(va + tocpy, PAGESIZE - tocpy);
+		}
+		zfs_unmap_page(sf);
+		vm_page_valid(m);
+		dmu_page_lock(m);
+		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
+			vm_page_activate(m);
+		else
+			vm_page_deactivate(m);
+		dmu_page_unlock(m);
+		vm_page_do_sunbusy(m);
+	}
+	*rahead = i;
+	zfs_vmobject_wunlock_12(vmobj);
+
+	dmu_buf_rele_array(dbp, numbufs, FTAG);
+	return (0);
+}

diff --git a/zfs/module/os/freebsd/zfs/hkdf.c b/zfs/module/os/freebsd/zfs/hkdf.c
new file mode 100644
index 0000000..8324ff2
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/hkdf.c

@@ -0,0 +1,102 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/dmu.h>
+#include <sys/hkdf.h>
+#include <sys/freebsd_crypto.h>
+#include <sys/hkdf.h>
+
+static int
+hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material,
+    uint_t km_len, uint8_t *out_buf)
+{
+	crypto_key_t key;
+
+	/* initialize the salt as a crypto key */
+	key.ck_format = CRYPTO_KEY_RAW;
+	key.ck_length = CRYPTO_BYTES2BITS(salt_len);
+	key.ck_data = salt;
+
+	crypto_mac(&key, key_material, km_len, out_buf, SHA512_DIGEST_LENGTH);
+
+	return (0);
+}
+
+static int
+hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len,
+    uint8_t *out_buf, uint_t out_len)
+{
+	struct hmac_ctx ctx;
+	crypto_key_t key;
+	uint_t i, T_len = 0, pos = 0;
+	uint8_t c;
+	uint_t N = (out_len + SHA512_DIGEST_LENGTH) / SHA512_DIGEST_LENGTH;
+	uint8_t T[SHA512_DIGEST_LENGTH];
+
+	if (N > 255)
+		return (SET_ERROR(EINVAL));
+
+	/* initialize the salt as a crypto key */
+	key.ck_format = CRYPTO_KEY_RAW;
+	key.ck_length = CRYPTO_BYTES2BITS(SHA512_DIGEST_LENGTH);
+	key.ck_data = extract_key;
+
+	for (i = 1; i <= N; i++) {
+		c = i;
+
+		crypto_mac_init(&ctx, &key);
+		crypto_mac_update(&ctx, T, T_len);
+		crypto_mac_update(&ctx, info, info_len);
+		crypto_mac_update(&ctx, &c, 1);
+		crypto_mac_final(&ctx, T, SHA512_DIGEST_LENGTH);
+		bcopy(T, out_buf + pos,
+		    (i != N) ? SHA512_DIGEST_LENGTH : (out_len - pos));
+		pos += SHA512_DIGEST_LENGTH;
+	}
+
+	return (0);
+}
+
+/*
+ * HKDF is designed to be a relatively fast function for deriving keys from a
+ * master key + a salt. We use this function to generate new encryption keys
+ * so as to avoid hitting the cryptographic limits of the underlying
+ * encryption modes. Note that, for the sake of deriving encryption keys, the
+ * info parameter is called the "salt" everywhere else in the code.
+ */
+int
+hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt,
+    uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key,
+    uint_t out_len)
+{
+	int ret;
+	uint8_t extract_key[SHA512_DIGEST_LENGTH];
+
+	ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len,
+	    extract_key);
+	if (ret != 0)
+		return (ret);
+
+	ret = hkdf_sha512_expand(extract_key, info, info_len, output_key,
+	    out_len);
+	if (ret != 0)
+		return (ret);
+
+	return (0);
+}

diff --git a/zfs/module/os/freebsd/zfs/kmod_core.c b/zfs/module/os/freebsd/zfs/kmod_core.c
new file mode 100644
index 0000000..6b23bff
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/kmod_core.c

@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/dmu.h>
+#include <sys/dmu_impl.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_send.h>
+#include <sys/dmu_tx.h>
+#include <sys/dsl_bookmark.h>
+#include <sys/dsl_crypt.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_deleg.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_scan.h>
+#include <sys/dsl_userhold.h>
+#include <sys/errno.h>
+#include <sys/eventhandler.h>
+#include <sys/file.h>
+#include <sys/fm/util.h>
+#include <sys/fs/zfs.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/nvpair.h>
+#include <sys/policy.h>
+#include <sys/proc.h>
+#include <sys/sdt.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/uio.h>
+#include <sys/vdev.h>
+#include <sys/vdev_removal.h>
+#include <sys/zap.h>
+#include <sys/zcp.h>
+#include <sys/zfeature.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_ioctl_compat.h>
+#include <sys/zfs_ioctl_impl.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+#include <sys/zio_checksum.h>
+#include <sys/zone.h>
+#include <sys/zvol.h>
+
+#include "zfs_comutil.h"
+#include "zfs_deleg.h"
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+
+SYSCTL_DECL(_vfs_zfs);
+SYSCTL_DECL(_vfs_zfs_vdev);
+
+extern uint_t rrw_tsd_key;
+static int zfs_version_ioctl = ZFS_IOCVER_OZFS;
+SYSCTL_DECL(_vfs_zfs_version);
+SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl,
+    0, "ZFS_IOCTL_VERSION");
+
+static struct cdev *zfsdev;
+
+static struct root_hold_token *zfs_root_token;
+
+extern uint_t rrw_tsd_key;
+extern uint_t zfs_allow_log_key;
+extern uint_t zfs_geom_probe_vdev_key;
+
+static int zfs__init(void);
+static int zfs__fini(void);
+static void zfs_shutdown(void *, int);
+
+static eventhandler_tag zfs_shutdown_event_tag;
+extern zfsdev_state_t *zfsdev_state_list;
+
+#define	ZFS_MIN_KSTACK_PAGES 4
+
+static int
+zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
+    struct thread *td)
+{
+	uint_t len;
+	int vecnum;
+	zfs_iocparm_t *zp;
+	zfs_cmd_t *zc;
+	zfs_cmd_legacy_t *zcl;
+	int rc, error;
+	void *uaddr;
+
+	len = IOCPARM_LEN(zcmd);
+	vecnum = zcmd & 0xff;
+	zp = (void *)arg;
+	error = 0;
+	zcl = NULL;
+
+	if (len != sizeof (zfs_iocparm_t))
+		return (EINVAL);
+
+	uaddr = (void *)zp->zfs_cmd;
+	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+	/*
+	 * Remap ioctl code for legacy user binaries
+	 */
+	if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) {
+		vecnum = zfs_ioctl_legacy_to_ozfs(vecnum);
+		if (vecnum < 0) {
+			kmem_free(zc, sizeof (zfs_cmd_t));
+			return (ENOTSUP);
+		}
+		zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
+		if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) {
+			error = SET_ERROR(EFAULT);
+			goto out;
+		}
+		zfs_cmd_legacy_to_ozfs(zcl, zc);
+	} else if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
+		error = SET_ERROR(EFAULT);
+		goto out;
+	}
+	error = zfsdev_ioctl_common(vecnum, zc, 0);
+	if (zcl) {
+		zfs_cmd_ozfs_to_legacy(zc, zcl);
+		rc = copyout(zcl, uaddr, sizeof (*zcl));
+	} else {
+		rc = copyout(zc, uaddr, sizeof (*zc));
+	}
+	if (error == 0 && rc != 0)
+		error = SET_ERROR(EFAULT);
+out:
+	if (zcl)
+		kmem_free(zcl, sizeof (zfs_cmd_legacy_t));
+	kmem_free(zc, sizeof (zfs_cmd_t));
+	MPASS(tsd_get(rrw_tsd_key) == NULL);
+	return (error);
+}
+
+static void
+zfsdev_close(void *data)
+{
+	zfsdev_state_t *zs = data;
+
+	ASSERT(zs != NULL);
+
+	mutex_enter(&zfsdev_state_lock);
+
+	ASSERT(zs->zs_minor != 0);
+
+	zs->zs_minor = -1;
+	zfs_onexit_destroy(zs->zs_onexit);
+	zfs_zevent_destroy(zs->zs_zevent);
+	zs->zs_onexit = NULL;
+	zs->zs_zevent = NULL;
+
+	mutex_exit(&zfsdev_state_lock);
+}
+
+static int
+zfs_ctldev_init(struct cdev *devp)
+{
+	boolean_t newzs = B_FALSE;
+	minor_t minor;
+	zfsdev_state_t *zs, *zsprev = NULL;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+
+	minor = zfsdev_minor_alloc();
+	if (minor == 0)
+		return (SET_ERROR(ENXIO));
+
+	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+		if (zs->zs_minor == -1)
+			break;
+		zsprev = zs;
+	}
+
+	if (!zs) {
+		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+		newzs = B_TRUE;
+	}
+
+	devfs_set_cdevpriv(zs, zfsdev_close);
+
+	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
+	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
+
+	if (newzs) {
+		zs->zs_minor = minor;
+		wmb();
+		zsprev->zs_next = zs;
+	} else {
+		wmb();
+		zs->zs_minor = minor;
+	}
+	return (0);
+}
+
+static int
+zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
+{
+	int error;
+
+	mutex_enter(&zfsdev_state_lock);
+	error = zfs_ctldev_init(devp);
+	mutex_exit(&zfsdev_state_lock);
+
+	return (error);
+}
+
+static struct cdevsw zfs_cdevsw = {
+	.d_version =	D_VERSION,
+	.d_open =	zfsdev_open,
+	.d_ioctl =	zfsdev_ioctl,
+	.d_name =	ZFS_DRIVER
+};
+
+int
+zfsdev_attach(void)
+{
+	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
+	    ZFS_DRIVER);
+	return (0);
+}
+
+void
+zfsdev_detach(void)
+{
+	if (zfsdev != NULL)
+		destroy_dev(zfsdev);
+}
+
+int
+zfs__init(void)
+{
+	int error;
+
+#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
+	printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
+	    "overflow panic!\nPlease consider adding "
+	    "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
+	    ZFS_MIN_KSTACK_PAGES);
+#endif
+	zfs_root_token = root_mount_hold("ZFS");
+	if ((error = zfs_kmod_init()) != 0) {
+		printf("ZFS: Failed to Load ZFS Filesystem"
+		    ", rc = %d\n", error);
+		root_mount_rel(zfs_root_token);
+		return (error);
+	}
+
+
+	tsd_create(&zfs_geom_probe_vdev_key, NULL);
+
+	printf("ZFS storage pool version: features support ("
+	    SPA_VERSION_STRING ")\n");
+	root_mount_rel(zfs_root_token);
+	ddi_sysevent_init();
+	return (0);
+}
+
+int
+zfs__fini(void)
+{
+	if (zfs_busy() || zvol_busy() ||
+	    zio_injection_enabled) {
+		return (EBUSY);
+	}
+	zfs_kmod_fini();
+	tsd_destroy(&zfs_geom_probe_vdev_key);
+	return (0);
+}
+
+static void
+zfs_shutdown(void *arg __unused, int howto __unused)
+{
+
+	/*
+	 * ZFS fini routines can not properly work in a panic-ed system.
+	 */
+	if (panicstr == NULL)
+		zfs__fini();
+}
+
+static int
+zfs_modevent(module_t mod, int type, void *unused __unused)
+{
+	int err;
+
+	switch (type) {
+	case MOD_LOAD:
+		err = zfs__init();
+		if (err == 0)
+			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
+			    shutdown_post_sync, zfs_shutdown, NULL,
+			    SHUTDOWN_PRI_FIRST);
+		return (err);
+	case MOD_UNLOAD:
+		err = zfs__fini();
+		if (err == 0 && zfs_shutdown_event_tag != NULL)
+			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
+			    zfs_shutdown_event_tag);
+		return (err);
+	case MOD_SHUTDOWN:
+		return (0);
+	default:
+		break;
+	}
+	return (EOPNOTSUPP);
+}
+
+static moduledata_t zfs_mod = {
+	"zfsctrl",
+	zfs_modevent,
+	0
+};
+
+#ifdef _KERNEL
+EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
+#endif
+
+DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY);
+MODULE_VERSION(zfsctrl, 1);
+#if __FreeBSD_version > 1300092
+MODULE_DEPEND(zfsctrl, xdr, 1, 1, 1);
+#else
+MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
+#endif
+MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
+MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1);
+MODULE_DEPEND(zfsctrl, zlib, 1, 1, 1);

diff --git a/zfs/module/os/freebsd/zfs/spa_os.c b/zfs/module/os/freebsd/zfs/spa_os.c
new file mode 100644
index 0000000..9032e64
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/spa_os.c

@@ -0,0 +1,272 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ */
+
+
+#include <sys/zfs_context.h>
+#include <sys/fm/fs/zfs.h>
+#include <sys/spa_impl.h>
+#include <sys/zio.h>
+#include <sys/zio_checksum.h>
+#include <sys/dmu.h>
+#include <sys/dmu_tx.h>
+#include <sys/zap.h>
+#include <sys/zil.h>
+#include <sys/ddt.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_os.h>
+#include <sys/vdev_removal.h>
+#include <sys/vdev_indirect_mapping.h>
+#include <sys/vdev_indirect_births.h>
+#include <sys/metaslab.h>
+#include <sys/metaslab_impl.h>
+#include <sys/uberblock_impl.h>
+#include <sys/txg.h>
+#include <sys/avl.h>
+#include <sys/bpobj.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dmu_objset.h>
+#include <sys/unique.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_synctask.h>
+#include <sys/fs/zfs.h>
+#include <sys/arc.h>
+#include <sys/callb.h>
+#include <sys/spa_boot.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/dsl_scan.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
+#include <sys/zfeature.h>
+#include <sys/zvol.h>
+#include <sys/abd.h>
+#include <sys/callb.h>
+#include <sys/zone.h>
+
+#include "zfs_prop.h"
+#include "zfs_comutil.h"
+
+static nvlist_t *
+spa_generate_rootconf(const char *name)
+{
+	nvlist_t **configs, **tops;
+	nvlist_t *config;
+	nvlist_t *best_cfg, *nvtop, *nvroot;
+	uint64_t *holes;
+	uint64_t best_txg;
+	uint64_t nchildren;
+	uint64_t pgid;
+	uint64_t count;
+	uint64_t i;
+	uint_t   nholes;
+
+	if (vdev_geom_read_pool_label(name, &configs, &count) != 0)
+		return (NULL);
+
+	ASSERT3U(count, !=, 0);
+	best_txg = 0;
+	for (i = 0; i < count; i++) {
+		uint64_t txg;
+
+		if (configs[i] == NULL)
+			continue;
+		txg = fnvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG);
+		if (txg > best_txg) {
+			best_txg = txg;
+			best_cfg = configs[i];
+		}
+	}
+
+	nchildren = 1;
+	nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren);
+	holes = NULL;
+	nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY,
+	    &holes, &nholes);
+
+	tops = kmem_zalloc(nchildren * sizeof (void *), KM_SLEEP);
+	for (i = 0; i < nchildren; i++) {
+		if (i >= count)
+			break;
+		if (configs[i] == NULL)
+			continue;
+		nvtop = fnvlist_lookup_nvlist(configs[i],
+		    ZPOOL_CONFIG_VDEV_TREE);
+		tops[i] = fnvlist_dup(nvtop);
+	}
+	for (i = 0; holes != NULL && i < nholes; i++) {
+		if (i >= nchildren)
+			continue;
+		if (tops[holes[i]] != NULL)
+			continue;
+		tops[holes[i]] = fnvlist_alloc();
+		fnvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_HOLE);
+		fnvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, holes[i]);
+		fnvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, 0);
+	}
+	for (i = 0; i < nchildren; i++) {
+		if (tops[i] != NULL)
+			continue;
+		tops[i] = fnvlist_alloc();
+		fnvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_MISSING);
+		fnvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, i);
+		fnvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, 0);
+	}
+
+	/*
+	 * Create pool config based on the best vdev config.
+	 */
+	config = fnvlist_dup(best_cfg);
+
+	/*
+	 * Put this pool's top-level vdevs into a root vdev.
+	 */
+	pgid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID);
+	nvroot = fnvlist_alloc();
+	fnvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
+	fnvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL);
+	fnvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid);
+	fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, tops,
+	    nchildren);
+
+	/*
+	 * Replace the existing vdev_tree with the new root vdev in
+	 * this pool's configuration (remove the old, add the new).
+	 */
+	fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot);
+
+	/*
+	 * Drop vdev config elements that should not be present at pool level.
+	 */
+	fnvlist_remove(config, ZPOOL_CONFIG_GUID);
+	fnvlist_remove(config, ZPOOL_CONFIG_TOP_GUID);
+
+	for (i = 0; i < count; i++)
+		fnvlist_free(configs[i]);
+	kmem_free(configs, count * sizeof (void *));
+	for (i = 0; i < nchildren; i++)
+		fnvlist_free(tops[i]);
+	kmem_free(tops, nchildren * sizeof (void *));
+	fnvlist_free(nvroot);
+	return (config);
+}
+
+int
+spa_import_rootpool(const char *name, bool checkpointrewind)
+{
+	spa_t *spa;
+	vdev_t *rvd;
+	nvlist_t *config, *nvtop;
+	char *pname;
+	int error;
+
+	/*
+	 * Read the label from the boot device and generate a configuration.
+	 */
+	config = spa_generate_rootconf(name);
+
+	mutex_enter(&spa_namespace_lock);
+	if (config != NULL) {
+		pname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
+		VERIFY0(strcmp(name, pname));
+
+		if ((spa = spa_lookup(pname)) != NULL) {
+			/*
+			 * The pool could already be imported,
+			 * e.g., after reboot -r.
+			 */
+			if (spa->spa_state == POOL_STATE_ACTIVE) {
+				mutex_exit(&spa_namespace_lock);
+				fnvlist_free(config);
+				return (0);
+			}
+
+			/*
+			 * Remove the existing root pool from the namespace so
+			 * that we can replace it with the correct config
+			 * we just read in.
+			 */
+			spa_remove(spa);
+		}
+		spa = spa_add(pname, config, NULL);
+
+		/*
+		 * Set spa_ubsync.ub_version as it can be used in vdev_alloc()
+		 * via spa_version().
+		 */
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+		    &spa->spa_ubsync.ub_version) != 0)
+			spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
+	} else if ((spa = spa_lookup(name)) == NULL) {
+		mutex_exit(&spa_namespace_lock);
+		fnvlist_free(config);
+		cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
+		    name);
+		return (EIO);
+	} else {
+		config = fnvlist_dup(spa->spa_config);
+	}
+	spa->spa_is_root = B_TRUE;
+	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
+	if (checkpointrewind) {
+		spa->spa_import_flags |= ZFS_IMPORT_CHECKPOINT;
+	}
+
+	/*
+	 * Build up a vdev tree based on the boot device's label config.
+	 */
+	nvtop = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
+	    VDEV_ALLOC_ROOTPOOL);
+	spa_config_exit(spa, SCL_ALL, FTAG);
+	if (error) {
+		mutex_exit(&spa_namespace_lock);
+		fnvlist_free(config);
+		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
+		    name);
+		return (error);
+	}
+
+	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+	vdev_free(rvd);
+	spa_config_exit(spa, SCL_ALL, FTAG);
+	mutex_exit(&spa_namespace_lock);
+
+	fnvlist_free(config);
+	return (0);
+}
+
+const char *
+spa_history_zone(void)
+{
+	return ("freebsd");
+}

diff --git a/zfs/module/os/freebsd/zfs/sysctl_os.c b/zfs/module/os/freebsd/zfs/sysctl_os.c
new file mode 100644
index 0000000..b5db3f8
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/sysctl_os.c

@@ -0,0 +1,735 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/stat.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+#include <sys/zap.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+#include <sys/dmu.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_deleg.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_impl.h>
+#include <sys/dmu_tx.h>
+#include <sys/sunddi.h>
+#include <sys/policy.h>
+#include <sys/zone.h>
+#include <sys/nvpair.h>
+#include <sys/mount.h>
+#include <sys/taskqueue.h>
+#include <sys/sdt.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zvol.h>
+#include <sys/dsl_scan.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_bookmark.h>
+#include <sys/dsl_userhold.h>
+#include <sys/zfeature.h>
+#include <sys/zcp.h>
+#include <sys/zio_checksum.h>
+#include <sys/vdev_removal.h>
+#include <sys/dsl_crypt.h>
+
+#include <sys/zfs_ioctl_compat.h>
+#include <sys/zfs_context.h>
+
+#include <sys/arc_impl.h>
+#include <sys/dsl_pool.h>
+
+
+/* BEGIN CSTYLED */
+SYSCTL_DECL(_vfs_zfs);
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0, "ZFS adaptive replacement cache");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS condense");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf, CTLFLAG_RW, 0, "ZFS disk buf cache");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf_cache, CTLFLAG_RW, 0, "ZFS disk buf cache");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, deadman, CTLFLAG_RW, 0, "ZFS deadman");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS dedup");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, l2arc, CTLFLAG_RW, 0, "ZFS l2arc");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, livelist, CTLFLAG_RW, 0, "ZFS livelist");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, lua, CTLFLAG_RW, 0, "ZFS lua");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab, CTLFLAG_RW, 0, "ZFS metaslab");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, mg, CTLFLAG_RW, 0, "ZFS metaslab group");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, multihost, CTLFLAG_RW, 0, "ZFS multihost protection");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, prefetch, CTLFLAG_RW, 0, "ZFS prefetch");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, reconstruct, CTLFLAG_RW, 0, "ZFS reconstruct");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, recv, CTLFLAG_RW, 0, "ZFS receive");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, send, CTLFLAG_RW, 0, "ZFS send");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "ZFS space allocation");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, "ZFS transaction group");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, vnops, CTLFLAG_RW, 0, "ZFS VNOPS");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS event");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
+
+SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0,
+    "ZFS livelist condense");
+SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
+SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file");
+SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
+    "ZFS VDEV mirror");
+
+SYSCTL_DECL(_vfs_zfs_version);
+SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
+    (ZFS_META_VERSION "-" ZFS_META_RELEASE), "OpenZFS module version");
+
+extern arc_state_t ARC_anon;
+extern arc_state_t ARC_mru;
+extern arc_state_t ARC_mru_ghost;
+extern arc_state_t ARC_mfu;
+extern arc_state_t ARC_mfu_ghost;
+extern arc_state_t ARC_l2c_only;
+
+/*
+ * minimum lifespan of a prefetch block in clock ticks
+ * (initialized in arc_init())
+ */
+
+/* arc.c */
+
+int
+param_set_arc_max(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_arc_max;
+	err = sysctl_handle_long(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (SET_ERROR(err));
+
+	if (val != 0 && (val < MIN_ARC_MAX || val <= arc_c_min ||
+	    val >= arc_all_memory()))
+		return (SET_ERROR(EINVAL));
+
+	zfs_arc_max = val;
+	arc_tuning_update(B_TRUE);
+
+	/* Update the sysctl to the tuned value */
+	if (val != 0)
+		zfs_arc_max = arc_c_max;
+
+	return (0);
+}
+
+int
+param_set_arc_min(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_arc_min;
+	err = sysctl_handle_64(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (SET_ERROR(err));
+
+	if (val != 0 && (val < 2ULL << SPA_MAXBLOCKSHIFT || val > arc_c_max))
+		return (SET_ERROR(EINVAL));
+
+	zfs_arc_min = val;
+	arc_tuning_update(B_TRUE);
+
+	/* Update the sysctl to the tuned value */
+	if (val != 0)
+		zfs_arc_min = arc_c_min;
+
+	return (0);
+}
+
+/* legacy compat */
+extern uint64_t l2arc_write_max;	/* def max write size */
+extern uint64_t l2arc_write_boost;	/* extra warmup write */
+extern uint64_t l2arc_headroom;		/* # of dev writes */
+extern uint64_t l2arc_headroom_boost;
+extern uint64_t l2arc_feed_secs;	/* interval seconds */
+extern uint64_t l2arc_feed_min_ms;	/* min interval msecs */
+extern int l2arc_noprefetch;			/* don't cache prefetch bufs */
+extern int l2arc_feed_again;			/* turbo warmup */
+extern int l2arc_norw;			/* no reads during writes */
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
+    &l2arc_write_max, 0, "max write size (LEGACY)");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
+    &l2arc_write_boost, 0, "extra write during warmup (LEGACY)");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
+    &l2arc_headroom, 0, "number of dev writes (LEGACY)");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
+    &l2arc_feed_secs, 0, "interval seconds (LEGACY)");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
+    &l2arc_feed_min_ms, 0, "min interval milliseconds (LEGACY)");
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
+    &l2arc_noprefetch, 0, "don't cache prefetch bufs (LEGACY)");
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
+    &l2arc_feed_again, 0, "turbo warmup (LEGACY)");
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
+    &l2arc_norw, 0, "no reads during writes (LEGACY)");
+#if 0
+extern int zfs_compressed_arc_enabled;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RW,
+    &zfs_compressed_arc_enabled, 1, "compressed arc buffers (LEGACY)");
+#endif
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
+    &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
+    &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+    "size of anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
+    &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+    "size of anonymous state");
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
+    &ARC_mru.arcs_size.rc_count, 0, "size of mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
+    &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+    "size of metadata in mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
+    &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+    "size of data in mru state");
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
+    &ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
+    &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+    "size of metadata in mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
+    &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+    "size of data in mru ghost state");
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
+    &ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
+    &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+    "size of metadata in mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
+    &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+    "size of data in mfu state");
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
+    &ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
+    &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+    "size of metadata in mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
+    &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+    "size of data in mfu ghost state");
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
+    &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
+
+static int
+sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
+{
+	int err, val;
+
+	val = arc_no_grow_shift;
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+        if (val < 0 || val >= arc_shrink_shift)
+		return (EINVAL);
+
+	arc_no_grow_shift = val;
+	return (0);
+}
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
+    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, sizeof (int),
+    sysctl_vfs_zfs_arc_no_grow_shift, "I",
+    "log2(fraction of ARC which must be free to allow growing)");
+
+int
+param_set_arc_long(SYSCTL_HANDLER_ARGS)
+{
+	int err;
+
+	err = sysctl_handle_long(oidp, arg1, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	arc_tuning_update(B_TRUE);
+
+	return (0);
+}
+
+int
+param_set_arc_int(SYSCTL_HANDLER_ARGS)
+{
+	int err;
+
+	err = sysctl_handle_int(oidp, arg1, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	arc_tuning_update(B_TRUE);
+
+	return (0);
+}
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
+    CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+    &zfs_arc_min, sizeof (zfs_arc_min), param_set_arc_min, "LU",
+    "min arc size (LEGACY)");
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
+    CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+    &zfs_arc_max, sizeof (zfs_arc_max), param_set_arc_max, "LU",
+    "max arc size (LEGACY)");
+
+/* dbuf.c */
+
+
+/* dmu.c */
+
+/* dmu_zfetch.c */
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
+
+/* max bytes to prefetch per stream (default 8MB) */
+extern uint32_t	zfetch_max_distance;
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
+    &zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)");
+
+/* max bytes to prefetch indirects for per stream (default 64MB) */
+extern uint32_t	zfetch_max_idistance;
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN,
+    &zfetch_max_idistance, 0,
+    "Max bytes to prefetch indirects for per stream (LEGACY)");
+
+/* dsl_pool.c */
+
+/* dnode.c */
+
+/* dsl_scan.c */
+
+/* metaslab.c */
+
+/*
+ * In pools where the log space map feature is not enabled we touch
+ * multiple metaslabs (and their respective space maps) with each
+ * transaction group. Thus, we benefit from having a small space map
+ * block size since it allows us to issue more I/O operations scattered
+ * around the disk. So a sane default for the space map block size
+ * is 8~16K.
+ */
+extern int zfs_metaslab_sm_blksz_no_log;
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
+    &zfs_metaslab_sm_blksz_no_log, 0,
+    "Block size for space map in pools with log space map disabled.  "
+    "Power of 2 and greater than 4096.");
+
+/*
+ * When the log space map feature is enabled, we accumulate a lot of
+ * changes per metaslab that are flushed once in a while so we benefit
+ * from a bigger block size like 128K for the metaslab space maps.
+ */
+extern int zfs_metaslab_sm_blksz_with_log;
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
+    &zfs_metaslab_sm_blksz_with_log, 0,
+    "Block size for space map in pools with log space map enabled.  "
+    "Power of 2 and greater than 4096.");
+
+/*
+ * The in-core space map representation is more compact than its on-disk form.
+ * The zfs_condense_pct determines how much more compact the in-core
+ * space map representation must be before we compact it on-disk.
+ * Values should be greater than or equal to 100.
+ */
+extern int zfs_condense_pct;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN,
+    &zfs_condense_pct, 0,
+    "Condense on-disk spacemap when it is more than this many percents"
+    " of in-memory counterpart");
+
+extern int zfs_remove_max_segment;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN,
+    &zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will attempt to"
+    " allocate when removing a device");
+
+extern int zfs_removal_suspend_progress;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
+    &zfs_removal_suspend_progress, 0, "Ensures certain actions can happen while"
+    " in the middle of a removal");
+
+
+/*
+ * Minimum size which forces the dynamic allocator to change
+ * it's allocation strategy.  Once the space map cannot satisfy
+ * an allocation of this size then it switches to using more
+ * aggressive strategy (i.e search by size rather than offset).
+ */
+extern uint64_t metaslab_df_alloc_threshold;
+SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
+    &metaslab_df_alloc_threshold, 0,
+    "Minimum size which forces the dynamic allocator to change it's allocation strategy");
+
+/*
+ * The minimum free space, in percent, which must be available
+ * in a space map to continue allocations in a first-fit fashion.
+ * Once the space map's free space drops below this level we dynamically
+ * switch to using best-fit allocations.
+ */
+extern int metaslab_df_free_pct;
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN,
+    &metaslab_df_free_pct, 0,
+    "The minimum free space, in percent, which must be available in a "
+    "space map to continue allocations in a first-fit fashion");
+
+/*
+ * Percentage of all cpus that can be used by the metaslab taskq.
+ */
+extern int metaslab_load_pct;
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, CTLFLAG_RWTUN,
+    &metaslab_load_pct, 0,
+    "Percentage of cpus that can be used by the metaslab taskq");
+
+/*
+ * Max number of metaslabs per group to preload.
+ */
+extern int metaslab_preload_limit;
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
+    &metaslab_preload_limit, 0,
+    "Max number of metaslabs per group to preload");
+
+/* spa.c */
+extern int zfs_ccw_retry_interval;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
+    &zfs_ccw_retry_interval, 0,
+    "Configuration cache file write, retry after failure, interval (seconds)");
+
+extern uint64_t zfs_max_missing_tvds_cachefile;
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN,
+    &zfs_max_missing_tvds_cachefile, 0,
+    "allow importing pools with missing top-level vdevs in cache file");
+
+extern uint64_t zfs_max_missing_tvds_scan;
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN,
+    &zfs_max_missing_tvds_scan, 0,
+    "allow importing pools with missing top-level vdevs during scan");
+
+/* spa_misc.c */
+extern int zfs_flags;
+static int
+sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS)
+{
+	int err, val;
+
+	val = zfs_flags;
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	/*
+	 * ZFS_DEBUG_MODIFY must be enabled prior to boot so all
+	 * arc buffers in the system have the necessary additional
+	 * checksum data.  However, it is safe to disable at any
+	 * time.
+	 */
+	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
+		val &= ~ZFS_DEBUG_MODIFY;
+	zfs_flags = val;
+
+	return (0);
+}
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags,
+    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, NULL, 0,
+    sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing.");
+
+int
+param_set_deadman_synctime(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long val;
+	int err;
+
+	val = zfs_deadman_synctime_ms;
+	err = sysctl_handle_long(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+	zfs_deadman_synctime_ms = val;
+
+	spa_set_deadman_synctime(MSEC2NSEC(zfs_deadman_synctime_ms));
+
+	return (0);
+}
+
+int
+param_set_deadman_ziotime(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long val;
+	int err;
+
+	val = zfs_deadman_ziotime_ms;
+	err = sysctl_handle_long(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+	zfs_deadman_ziotime_ms = val;
+
+	spa_set_deadman_ziotime(MSEC2NSEC(zfs_deadman_synctime_ms));
+
+	return (0);
+}
+
+int
+param_set_deadman_failmode(SYSCTL_HANDLER_ARGS)
+{
+	char buf[16];
+	int rc;
+
+	if (req->newptr == NULL)
+		strlcpy(buf, zfs_deadman_failmode, sizeof (buf));
+
+	rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+	if (rc || req->newptr == NULL)
+		return (rc);
+	if (strcmp(buf, zfs_deadman_failmode) == 0)
+		return (0);
+	if (!strcmp(buf,  "wait"))
+		zfs_deadman_failmode = "wait";
+	if (!strcmp(buf,  "continue"))
+		zfs_deadman_failmode = "continue";
+	if (!strcmp(buf,  "panic"))
+		zfs_deadman_failmode = "panic";
+
+	return (-param_set_deadman_failmode_common(buf));
+}
+
+
+/* spacemap.c */
+extern int space_map_ibs;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
+    &space_map_ibs, 0, "Space map indirect block shift");
+
+
+/* vdev.c */
+int
+param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_vdev_min_auto_ashift;
+	err = sysctl_handle_64(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (SET_ERROR(err));
+
+	if (val < ASHIFT_MIN || val > zfs_vdev_max_auto_ashift)
+		return (SET_ERROR(EINVAL));
+
+	zfs_vdev_min_auto_ashift = val;
+
+	return (0);
+}
+
+int
+param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_vdev_max_auto_ashift;
+	err = sysctl_handle_64(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (SET_ERROR(err));
+
+	if (val > ASHIFT_MAX || val < zfs_vdev_min_auto_ashift)
+		return (SET_ERROR(EINVAL));
+
+	zfs_vdev_max_auto_ashift = val;
+
+	return (0);
+}
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
+    CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+    &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
+    param_set_min_auto_ashift, "QU",
+    "Min ashift used when creating new top-level vdev. (LEGACY)");
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
+    CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+    &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
+    param_set_max_auto_ashift, "QU",
+    "Max ashift used when optimizing for logical -> physical sector size on "
+    "new top-level vdevs. (LEGACY)");
+
+/*
+ * Since the DTL space map of a vdev is not expected to have a lot of
+ * entries, we default its block size to 4K.
+ */
+extern int zfs_vdev_dtl_sm_blksz;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
+    &zfs_vdev_dtl_sm_blksz, 0,
+    "Block size for DTL space map.  Power of 2 and greater than 4096.");
+
+/*
+ * vdev-wide space maps that have lots of entries written to them at
+ * the end of each transaction can benefit from a higher I/O bandwidth
+ * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
+ */
+extern int zfs_vdev_standard_sm_blksz;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN,
+    &zfs_vdev_standard_sm_blksz, 0,
+    "Block size for standard space map.  Power of 2 and greater than 4096.");
+
+extern int vdev_validate_skip;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN,
+    &vdev_validate_skip, 0,
+    "Enable to bypass vdev_validate().");
+
+
+/* vdev_cache.c */
+
+/* vdev_mirror.c */
+/*
+ * The load configuration settings below are tuned by default for
+ * the case where all devices are of the same rotational type.
+ *
+ * If there is a mixture of rotating and non-rotating media, setting
+ * non_rotating_seek_inc to 0 may well provide better results as it
+ * will direct more reads to the non-rotating vdevs which are more
+ * likely to have a higher performance.
+ */
+
+
+/* vdev_queue.c */
+#define	ZFS_VDEV_QUEUE_KNOB_MIN(name)					\
+extern uint32_t zfs_vdev_ ## name ## _min_active;				\
+SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\
+    &zfs_vdev_ ## name ## _min_active, 0,				\
+    "Initial number of I/O requests of type " #name			\
+    " active for each device");
+
+#define	ZFS_VDEV_QUEUE_KNOB_MAX(name)					\
+extern uint32_t zfs_vdev_ ## name ## _max_active;				\
+SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN, \
+    &zfs_vdev_ ## name ## _max_active, 0,				\
+    "Maximum number of I/O requests of type " #name			\
+    " active for each device");
+
+
+#undef ZFS_VDEV_QUEUE_KNOB
+
+extern uint32_t zfs_vdev_max_active;
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
+    &zfs_vdev_max_active, 0,
+    "The maximum number of I/Os of all types active for each device. (LEGACY)");
+
+extern int zfs_vdev_def_queue_depth;
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN,
+    &zfs_vdev_def_queue_depth, 0,
+    "Default queue depth for each allocator");
+
+/*extern uint64_t zfs_multihost_history;
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, multihost_history, CTLFLAG_RWTUN,
+    &zfs_multihost_history, 0,
+    "Historical staticists for the last N multihost updates");*/
+
+#ifdef notyet
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, trim_on_init, CTLFLAG_RW,
+    &vdev_trim_on_init, 0, "Enable/disable full vdev trim on initialisation");
+#endif
+
+
+/* zio.c */
+#if defined(__LP64__)
+int zio_use_uma = 1;
+#else
+int zio_use_uma = 0;
+#endif
+
+SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, use_uma, CTLFLAG_RDTUN, &zio_use_uma, 0,
+    "Use uma(9) for ZIO allocations");
+SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN, &zio_exclude_metadata, 0,
+    "Exclude metadata buffers from dumps as well");
+
+int
+param_set_slop_shift(SYSCTL_HANDLER_ARGS)
+{
+	int val;
+	int err;
+
+	val = *(int *)arg1;
+
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val < 1 || val > 31)
+		return (EINVAL);
+
+	*(int *)arg1 = val;
+
+	return (0);
+}
+
+int
+param_set_multihost_interval(SYSCTL_HANDLER_ARGS)
+{
+	int err;
+
+	err = sysctl_handle_long(oidp, arg1, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (spa_mode_global != SPA_MODE_UNINIT)
+		mmp_signal_all_threads();
+
+	return (0);
+}

diff --git a/zfs/module/os/freebsd/zfs/vdev_file.c b/zfs/module/os/freebsd/zfs/vdev_file.c
new file mode 100644
index 0000000..2d92681
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/vdev_file.c

@@ -0,0 +1,356 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/file.h>
+#include <sys/vdev_file.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/fs/zfs.h>
+#include <sys/abd.h>
+#include <sys/stat.h>
+
+/*
+ * Virtual device vector for files.
+ */
+
+static taskq_t *vdev_file_taskq;
+
+unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+
+void
+vdev_file_init(void)
+{
+	vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
+	    minclsyspri, max_ncpus, INT_MAX, 0);
+}
+
+void
+vdev_file_fini(void)
+{
+	taskq_destroy(vdev_file_taskq);
+}
+
+static void
+vdev_file_hold(vdev_t *vd)
+{
+	ASSERT3P(vd->vdev_path, !=, NULL);
+}
+
+static void
+vdev_file_rele(vdev_t *vd)
+{
+	ASSERT3P(vd->vdev_path, !=, NULL);
+}
+
+static mode_t
+vdev_file_open_mode(spa_mode_t spa_mode)
+{
+	mode_t mode = 0;
+
+	if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
+		mode = O_RDWR;
+	} else if (spa_mode & SPA_MODE_READ) {
+		mode = O_RDONLY;
+	} else if (spa_mode & SPA_MODE_WRITE) {
+		mode = O_WRONLY;
+	}
+
+	return (mode | O_LARGEFILE);
+}
+
+static int
+vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
+{
+	vdev_file_t *vf;
+	zfs_file_t *fp;
+	zfs_file_attr_t zfa;
+	int error;
+
+	/*
+	 * Rotational optimizations only make sense on block devices.
+	 */
+	vd->vdev_nonrot = B_TRUE;
+
+	/*
+	 * Allow TRIM on file based vdevs.  This may not always be supported,
+	 * since it depends on your kernel version and underlying filesystem
+	 * type but it is always safe to attempt.
+	 */
+	vd->vdev_has_trim = B_TRUE;
+
+	/*
+	 * Disable secure TRIM on file based vdevs.  There is no way to
+	 * request this behavior from the underlying filesystem.
+	 */
+	vd->vdev_has_securetrim = B_FALSE;
+
+	/*
+	 * We must have a pathname, and it must be absolute.
+	 */
+	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
+		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Reopen the device if it's not currently open.  Otherwise,
+	 * just update the physical size of the device.
+	 */
+	if (vd->vdev_tsd != NULL) {
+		ASSERT(vd->vdev_reopening);
+		vf = vd->vdev_tsd;
+		goto skip_open;
+	}
+
+	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
+
+	/*
+	 * We always open the files from the root of the global zone, even if
+	 * we're in a local zone.  If the user has gotten to this point, the
+	 * administrator has already decided that the pool should be available
+	 * to local zone users, so the underlying devices should be as well.
+	 */
+	ASSERT3P(vd->vdev_path, !=, NULL);
+	ASSERT(vd->vdev_path[0] == '/');
+
+	error = zfs_file_open(vd->vdev_path,
+	    vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (error);
+	}
+
+	vf->vf_file = fp;
+
+#ifdef _KERNEL
+	/*
+	 * Make sure it's a regular file.
+	 */
+	if (zfs_file_getattr(fp, &zfa)) {
+		return (SET_ERROR(ENODEV));
+	}
+	if (!S_ISREG(zfa.zfa_mode)) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (SET_ERROR(ENODEV));
+	}
+#endif
+
+skip_open:
+
+	error =  zfs_file_getattr(vf->vf_file, &zfa);
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (error);
+	}
+
+	*max_psize = *psize = zfa.zfa_size;
+	*logical_ashift = vdev_file_logical_ashift;
+	*physical_ashift = vdev_file_physical_ashift;
+
+	return (0);
+}
+
+static void
+vdev_file_close(vdev_t *vd)
+{
+	vdev_file_t *vf = vd->vdev_tsd;
+
+	if (vd->vdev_reopening || vf == NULL)
+		return;
+
+	if (vf->vf_file != NULL) {
+		zfs_file_close(vf->vf_file);
+	}
+
+	vd->vdev_delayed_close = B_FALSE;
+	kmem_free(vf, sizeof (vdev_file_t));
+	vd->vdev_tsd = NULL;
+}
+
+/*
+ * Implements the interrupt side for file vdev types. This routine will be
+ * called when the I/O completes allowing us to transfer the I/O to the
+ * interrupt taskqs. For consistency, the code structure mimics disk vdev
+ * types.
+ */
+static void
+vdev_file_io_intr(zio_t *zio)
+{
+	zio_delay_interrupt(zio);
+}
+
+static void
+vdev_file_io_strategy(void *arg)
+{
+	zio_t *zio = arg;
+	vdev_t *vd = zio->io_vd;
+	vdev_file_t *vf;
+	void *buf;
+	ssize_t resid;
+	loff_t off;
+	ssize_t size;
+	int err;
+
+	off = zio->io_offset;
+	size = zio->io_size;
+	resid = 0;
+
+	vf = vd->vdev_tsd;
+
+	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
+	if (zio->io_type == ZIO_TYPE_READ) {
+		buf = abd_borrow_buf(zio->io_abd, zio->io_size);
+		err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
+		abd_return_buf_copy(zio->io_abd, buf, size);
+	} else {
+		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+		err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
+		abd_return_buf(zio->io_abd, buf, size);
+	}
+	zio->io_error = err;
+	if (resid != 0 && zio->io_error == 0)
+		zio->io_error = ENOSPC;
+
+	vdev_file_io_intr(zio);
+}
+
+static void
+vdev_file_io_start(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+	vdev_file_t *vf = vd->vdev_tsd;
+
+	if (zio->io_type == ZIO_TYPE_IOCTL) {
+		/* XXPOLICY */
+		if (!vdev_readable(vd)) {
+			zio->io_error = SET_ERROR(ENXIO);
+			zio_interrupt(zio);
+			return;
+		}
+
+		switch (zio->io_cmd) {
+		case DKIOCFLUSHWRITECACHE:
+			zio->io_error = zfs_file_fsync(vf->vf_file,
+			    O_SYNC|O_DSYNC);
+			break;
+		default:
+			zio->io_error = SET_ERROR(ENOTSUP);
+		}
+
+		zio_execute(zio);
+		return;
+	} else if (zio->io_type == ZIO_TYPE_TRIM) {
+#ifdef notyet
+		int mode = 0;
+
+		ASSERT3U(zio->io_size, !=, 0);
+
+		/* XXX FreeBSD has no fallocate routine in file ops */
+		zio->io_error = zfs_file_fallocate(vf->vf_file,
+		    mode, zio->io_offset, zio->io_size);
+#endif
+		zio->io_error = SET_ERROR(ENOTSUP);
+		zio_execute(zio);
+		return;
+	}
+	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
+	zio->io_target_timestamp = zio_handle_io_delay(zio);
+
+	VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
+	    TQ_SLEEP), !=, 0);
+}
+
+static void
+vdev_file_io_done(zio_t *zio)
+{
+	(void) zio;
+}
+
+vdev_ops_t vdev_file_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
+	.vdev_op_open = vdev_file_open,
+	.vdev_op_close = vdev_file_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_file_io_start,
+	.vdev_op_io_done = vdev_file_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = vdev_file_hold,
+	.vdev_op_rele = vdev_file_rele,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_FILE,		/* name of this vdev type */
+	.vdev_op_leaf = B_TRUE			/* leaf vdev */
+};
+
+/*
+ * From userland we access disks just like files.
+ */
+#ifndef _KERNEL
+
+vdev_ops_t vdev_disk_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
+	.vdev_op_open = vdev_file_open,
+	.vdev_op_close = vdev_file_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_file_io_start,
+	.vdev_op_io_done = vdev_file_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = vdev_file_hold,
+	.vdev_op_rele = vdev_file_rele,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
+	.vdev_op_leaf = B_TRUE			/* leaf vdev */
+};
+
+#endif
+
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+	"Logical ashift for file-based devices");
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+	"Physical ashift for file-based devices");

diff --git a/zfs/module/os/freebsd/zfs/vdev_geom.c b/zfs/module/os/freebsd/zfs/vdev_geom.c
new file mode 100644
index 0000000..c8fa2b0
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/vdev_geom.c

@@ -0,0 +1,1327 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_os.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <vm/vm_page.h>
+#include <geom/geom.h>
+#include <geom/geom_disk.h>
+#include <geom/geom_int.h>
+
+#ifndef g_topology_locked
+#define	g_topology_locked()	sx_xlocked(&topology_lock)
+#endif
+
+/*
+ * Virtual device vector for GEOM.
+ */
+
+static g_attrchanged_t vdev_geom_attrchanged;
+struct g_class zfs_vdev_class = {
+	.name = "ZFS::VDEV",
+	.version = G_VERSION,
+	.attrchanged = vdev_geom_attrchanged,
+};
+
+struct consumer_vdev_elem {
+	SLIST_ENTRY(consumer_vdev_elem)	elems;
+	vdev_t	*vd;
+};
+
+SLIST_HEAD(consumer_priv_t, consumer_vdev_elem);
+/* BEGIN CSTYLED */
+_Static_assert(sizeof (((struct g_consumer *)NULL)->private)
+	== sizeof (struct consumer_priv_t*),
+	"consumer_priv_t* can't be stored in g_consumer.private");
+
+DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
+
+SYSCTL_DECL(_vfs_zfs_vdev);
+/* Don't send BIO_FLUSH. */
+static int vdev_geom_bio_flush_disable;
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
+    &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
+/* Don't send BIO_DELETE. */
+static int vdev_geom_bio_delete_disable;
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
+    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
+/* END CSTYLED */
+
+/* Declare local functions */
+static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read);
+
+/*
+ * Thread local storage used to indicate when a thread is probing geoms
+ * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
+ * it is looking for a replacement for the vdev_t* that is its value.
+ */
+uint_t zfs_geom_probe_vdev_key;
+
+static void
+vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp,
+    boolean_t do_null_update)
+{
+	boolean_t needs_update = B_FALSE;
+	char *physpath;
+	int error, physpath_len;
+
+	physpath_len = MAXPATHLEN;
+	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
+	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
+	if (error == 0) {
+		char *old_physpath;
+
+		/* g_topology lock ensures that vdev has not been closed */
+		g_topology_assert();
+		old_physpath = vd->vdev_physpath;
+		vd->vdev_physpath = spa_strdup(physpath);
+
+		if (old_physpath != NULL) {
+			needs_update = (strcmp(old_physpath,
+			    vd->vdev_physpath) != 0);
+			spa_strfree(old_physpath);
+		} else
+			needs_update = do_null_update;
+	}
+	g_free(physpath);
+
+	/*
+	 * If the physical path changed, update the config.
+	 * Only request an update for previously unset physpaths if
+	 * requested by the caller.
+	 */
+	if (needs_update)
+		spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE);
+
+}
+
+static void
+vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
+{
+	struct consumer_priv_t *priv;
+	struct consumer_vdev_elem *elem;
+
+	priv = (struct consumer_priv_t *)&cp->private;
+	if (SLIST_EMPTY(priv))
+		return;
+
+	SLIST_FOREACH(elem, priv, elems) {
+		vdev_t *vd = elem->vd;
+		if (strcmp(attr, "GEOM::physpath") == 0) {
+			vdev_geom_set_physpath(vd, cp, /* null_update */B_TRUE);
+			return;
+		}
+	}
+}
+
+static void
+vdev_geom_resize(struct g_consumer *cp)
+{
+	struct consumer_priv_t *priv;
+	struct consumer_vdev_elem *elem;
+	spa_t *spa;
+	vdev_t *vd;
+
+	priv = (struct consumer_priv_t *)&cp->private;
+	if (SLIST_EMPTY(priv))
+		return;
+
+	SLIST_FOREACH(elem, priv, elems) {
+		vd = elem->vd;
+		if (vd->vdev_state != VDEV_STATE_HEALTHY)
+			continue;
+		spa = vd->vdev_spa;
+		if (!spa->spa_autoexpand)
+			continue;
+		vdev_online(spa, vd->vdev_guid, ZFS_ONLINE_EXPAND, NULL);
+	}
+}
+
+static void
+vdev_geom_orphan(struct g_consumer *cp)
+{
+	struct consumer_priv_t *priv;
+	// cppcheck-suppress uninitvar
+	struct consumer_vdev_elem *elem;
+
+	g_topology_assert();
+
+	priv = (struct consumer_priv_t *)&cp->private;
+	if (SLIST_EMPTY(priv))
+		/* Vdev close in progress.  Ignore the event. */
+		return;
+
+	/*
+	 * Orphan callbacks occur from the GEOM event thread.
+	 * Concurrent with this call, new I/O requests may be
+	 * working their way through GEOM about to find out
+	 * (only once executed by the g_down thread) that we've
+	 * been orphaned from our disk provider.  These I/Os
+	 * must be retired before we can detach our consumer.
+	 * This is most easily achieved by acquiring the
+	 * SPA ZIO configuration lock as a writer, but doing
+	 * so with the GEOM topology lock held would cause
+	 * a lock order reversal.  Instead, rely on the SPA's
+	 * async removal support to invoke a close on this
+	 * vdev once it is safe to do so.
+	 */
+	SLIST_FOREACH(elem, priv, elems) {
+		// cppcheck-suppress uninitvar
+		vdev_t *vd = elem->vd;
+
+		vd->vdev_remove_wanted = B_TRUE;
+		spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
+	}
+}
+
+static struct g_consumer *
+vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity)
+{
+	struct g_geom *gp;
+	struct g_consumer *cp;
+	int error;
+
+	g_topology_assert();
+
+	ZFS_LOG(1, "Attaching to %s.", pp->name);
+
+	if (sanity) {
+		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) {
+			ZFS_LOG(1, "Failing attach of %s. "
+			    "Incompatible sectorsize %d\n",
+			    pp->name, pp->sectorsize);
+			return (NULL);
+		} else if (pp->mediasize < SPA_MINDEVSIZE) {
+			ZFS_LOG(1, "Failing attach of %s. "
+			    "Incompatible mediasize %ju\n",
+			    pp->name, pp->mediasize);
+			return (NULL);
+		}
+	}
+
+	/* Do we have geom already? No? Create one. */
+	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
+		if (gp->flags & G_GEOM_WITHER)
+			continue;
+		if (strcmp(gp->name, "zfs::vdev") != 0)
+			continue;
+		break;
+	}
+	if (gp == NULL) {
+		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
+		gp->orphan = vdev_geom_orphan;
+		gp->attrchanged = vdev_geom_attrchanged;
+		gp->resize = vdev_geom_resize;
+		cp = g_new_consumer(gp);
+		error = g_attach(cp, pp);
+		if (error != 0) {
+			ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
+			    __LINE__, error);
+			vdev_geom_detach(cp, B_FALSE);
+			return (NULL);
+		}
+		error = g_access(cp, 1, 0, 1);
+		if (error != 0) {
+			ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__,
+			    __LINE__, error);
+			vdev_geom_detach(cp, B_FALSE);
+			return (NULL);
+		}
+		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
+	} else {
+		/* Check if we are already connected to this provider. */
+		LIST_FOREACH(cp, &gp->consumer, consumer) {
+			if (cp->provider == pp) {
+				ZFS_LOG(1, "Found consumer for %s.", pp->name);
+				break;
+			}
+		}
+		if (cp == NULL) {
+			cp = g_new_consumer(gp);
+			error = g_attach(cp, pp);
+			if (error != 0) {
+				ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
+				    __func__, __LINE__, error);
+				vdev_geom_detach(cp, B_FALSE);
+				return (NULL);
+			}
+			error = g_access(cp, 1, 0, 1);
+			if (error != 0) {
+				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
+				    __func__, __LINE__, error);
+				vdev_geom_detach(cp, B_FALSE);
+				return (NULL);
+			}
+			ZFS_LOG(1, "Created consumer for %s.", pp->name);
+		} else {
+			error = g_access(cp, 1, 0, 1);
+			if (error != 0) {
+				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
+				    __func__, __LINE__, error);
+				return (NULL);
+			}
+			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
+		}
+	}
+
+	if (vd != NULL)
+		vd->vdev_tsd = cp;
+
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
+	return (cp);
+}
+
+static void
+vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read)
+{
+	struct g_geom *gp;
+
+	g_topology_assert();
+
+	ZFS_LOG(1, "Detaching from %s.",
+	    cp->provider && cp->provider->name ? cp->provider->name : "NULL");
+
+	gp = cp->geom;
+	if (open_for_read)
+		g_access(cp, -1, 0, -1);
+	/* Destroy consumer on last close. */
+	if (cp->acr == 0 && cp->ace == 0) {
+		if (cp->acw > 0)
+			g_access(cp, 0, -cp->acw, 0);
+		if (cp->provider != NULL) {
+			ZFS_LOG(1, "Destroying consumer for %s.",
+			    cp->provider->name ? cp->provider->name : "NULL");
+			g_detach(cp);
+		}
+		g_destroy_consumer(cp);
+	}
+	/* Destroy geom if there are no consumers left. */
+	if (LIST_EMPTY(&gp->consumer)) {
+		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
+		g_wither_geom(gp, ENXIO);
+	}
+}
+
+static void
+vdev_geom_close_locked(vdev_t *vd)
+{
+	struct g_consumer *cp;
+	struct consumer_priv_t *priv;
+	struct consumer_vdev_elem *elem, *elem_temp;
+
+	g_topology_assert();
+
+	cp = vd->vdev_tsd;
+	vd->vdev_delayed_close = B_FALSE;
+	if (cp == NULL)
+		return;
+
+	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
+	KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__));
+	priv = (struct consumer_priv_t *)&cp->private;
+	vd->vdev_tsd = NULL;
+	SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) {
+		if (elem->vd == vd) {
+			SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems);
+			g_free(elem);
+		}
+	}
+
+	vdev_geom_detach(cp, B_TRUE);
+}
+
+/*
+ * Issue one or more bios to the vdev in parallel
+ * cmds, datas, offsets, errors, and sizes are arrays of length ncmds.  Each IO
+ * operation is described by parallel entries from each array.  There may be
+ * more bios actually issued than entries in the array
+ */
+static void
+vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets,
+    off_t *sizes, int *errors, int ncmds)
+{
+	struct bio **bios;
+	uint8_t *p;
+	off_t off, maxio, s, end;
+	int i, n_bios, j;
+	size_t bios_size;
+
+#if __FreeBSD_version > 1300130
+	maxio = maxphys - (maxphys % cp->provider->sectorsize);
+#else
+	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
+#endif
+	n_bios = 0;
+
+	/* How many bios are required for all commands ? */
+	for (i = 0; i < ncmds; i++)
+		n_bios += (sizes[i] + maxio - 1) / maxio;
+
+	/* Allocate memory for the bios */
+	bios_size = n_bios * sizeof (struct bio *);
+	bios = kmem_zalloc(bios_size, KM_SLEEP);
+
+	/* Prepare and issue all of the bios */
+	for (i = j = 0; i < ncmds; i++) {
+		off = offsets[i];
+		p = datas[i];
+		s = sizes[i];
+		end = off + s;
+		ASSERT0(off % cp->provider->sectorsize);
+		ASSERT0(s % cp->provider->sectorsize);
+
+		for (; off < end; off += maxio, p += maxio, s -= maxio, j++) {
+			bios[j] = g_alloc_bio();
+			bios[j]->bio_cmd = cmds[i];
+			bios[j]->bio_done = NULL;
+			bios[j]->bio_offset = off;
+			bios[j]->bio_length = MIN(s, maxio);
+			bios[j]->bio_data = (caddr_t)p;
+			g_io_request(bios[j], cp);
+		}
+	}
+	ASSERT3S(j, ==, n_bios);
+
+	/* Wait for all of the bios to complete, and clean them up */
+	for (i = j = 0; i < ncmds; i++) {
+		off = offsets[i];
+		s = sizes[i];
+		end = off + s;
+
+		for (; off < end; off += maxio, s -= maxio, j++) {
+			errors[i] = biowait(bios[j], "vdev_geom_io") ||
+			    errors[i];
+			g_destroy_bio(bios[j]);
+		}
+	}
+	kmem_free(bios, bios_size);
+}
+
+/*
+ * Read the vdev config from a device.  Return the number of valid labels that
+ * were found.  The vdev config will be returned in config if and only if at
+ * least one valid label was found.
+ */
+static int
+vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp)
+{
+	struct g_provider *pp;
+	nvlist_t *config;
+	vdev_phys_t *vdev_lists[VDEV_LABELS];
+	char *buf;
+	size_t buflen;
+	uint64_t psize, state, txg;
+	off_t offsets[VDEV_LABELS];
+	off_t size;
+	off_t sizes[VDEV_LABELS];
+	int cmds[VDEV_LABELS];
+	int errors[VDEV_LABELS];
+	int l, nlabels;
+
+	g_topology_assert_not();
+
+	pp = cp->provider;
+	ZFS_LOG(1, "Reading config from %s...", pp->name);
+
+	psize = pp->mediasize;
+	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
+
+	size = sizeof (*vdev_lists[0]) + pp->sectorsize -
+	    ((sizeof (*vdev_lists[0]) - 1) % pp->sectorsize) - 1;
+
+	buflen = sizeof (vdev_lists[0]->vp_nvlist);
+
+	/* Create all of the IO requests */
+	for (l = 0; l < VDEV_LABELS; l++) {
+		cmds[l] = BIO_READ;
+		vdev_lists[l] = kmem_alloc(size, KM_SLEEP);
+		offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE;
+		sizes[l] = size;
+		errors[l] = 0;
+		ASSERT0(offsets[l] % pp->sectorsize);
+	}
+
+	/* Issue the IO requests */
+	vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors,
+	    VDEV_LABELS);
+
+	/* Parse the labels */
+	config = *configp = NULL;
+	nlabels = 0;
+	for (l = 0; l < VDEV_LABELS; l++) {
+		if (errors[l] != 0)
+			continue;
+
+		buf = vdev_lists[l]->vp_nvlist;
+
+		if (nvlist_unpack(buf, buflen, &config, 0) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
+			nvlist_free(config);
+			continue;
+		}
+
+		if (state != POOL_STATE_SPARE &&
+		    state != POOL_STATE_L2CACHE &&
+		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(config);
+			continue;
+		}
+
+		if (*configp != NULL)
+			nvlist_free(*configp);
+		*configp = config;
+		nlabels++;
+	}
+
+	/* Free the label storage */
+	for (l = 0; l < VDEV_LABELS; l++)
+		kmem_free(vdev_lists[l], size);
+
+	return (nlabels);
+}
+
+static void
+resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
+{
+	nvlist_t **new_configs;
+	uint64_t i;
+
+	if (id < *count)
+		return;
+	new_configs = kmem_zalloc((id + 1) * sizeof (nvlist_t *),
+	    KM_SLEEP);
+	for (i = 0; i < *count; i++)
+		new_configs[i] = (*configs)[i];
+	if (*configs != NULL)
+		kmem_free(*configs, *count * sizeof (void *));
+	*configs = new_configs;
+	*count = id + 1;
+}
+
+static void
+process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
+    const char *name, uint64_t *known_pool_guid)
+{
+	nvlist_t *vdev_tree;
+	uint64_t pool_guid;
+	uint64_t vdev_guid;
+	uint64_t id, txg, known_txg;
+	char *pname;
+
+	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
+	    strcmp(pname, name) != 0)
+		goto ignore;
+
+	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
+		goto ignore;
+
+	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
+		goto ignore;
+
+	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
+		goto ignore;
+
+	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
+		goto ignore;
+
+	txg = fnvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG);
+
+	if (*known_pool_guid != 0) {
+		if (pool_guid != *known_pool_guid)
+			goto ignore;
+	} else
+		*known_pool_guid = pool_guid;
+
+	resize_configs(configs, count, id);
+
+	if ((*configs)[id] != NULL) {
+		known_txg = fnvlist_lookup_uint64((*configs)[id],
+		    ZPOOL_CONFIG_POOL_TXG);
+		if (txg <= known_txg)
+			goto ignore;
+		nvlist_free((*configs)[id]);
+	}
+
+	(*configs)[id] = cfg;
+	return;
+
+ignore:
+	nvlist_free(cfg);
+}
+
+int
+vdev_geom_read_pool_label(const char *name,
+    nvlist_t ***configs, uint64_t *count)
+{
+	struct g_class *mp;
+	struct g_geom *gp;
+	struct g_provider *pp;
+	struct g_consumer *zcp;
+	nvlist_t *vdev_cfg;
+	uint64_t pool_guid;
+	int nlabels;
+
+	DROP_GIANT();
+	g_topology_lock();
+
+	*configs = NULL;
+	*count = 0;
+	pool_guid = 0;
+	LIST_FOREACH(mp, &g_classes, class) {
+		if (mp == &zfs_vdev_class)
+			continue;
+		LIST_FOREACH(gp, &mp->geom, geom) {
+			if (gp->flags & G_GEOM_WITHER)
+				continue;
+			LIST_FOREACH(pp, &gp->provider, provider) {
+				if (pp->flags & G_PF_WITHER)
+					continue;
+				zcp = vdev_geom_attach(pp, NULL, B_TRUE);
+				if (zcp == NULL)
+					continue;
+				g_topology_unlock();
+				nlabels = vdev_geom_read_config(zcp, &vdev_cfg);
+				g_topology_lock();
+				vdev_geom_detach(zcp, B_TRUE);
+				if (nlabels == 0)
+					continue;
+				ZFS_LOG(1, "successfully read vdev config");
+
+				process_vdev_config(configs, count,
+				    vdev_cfg, name, &pool_guid);
+			}
+		}
+	}
+	g_topology_unlock();
+	PICKUP_GIANT();
+
+	return (*count > 0 ? 0 : ENOENT);
+}
+
+enum match {
+	NO_MATCH = 0,		/* No matching labels found */
+	TOPGUID_MATCH = 1,	/* Labels match top guid, not vdev guid */
+	ZERO_MATCH = 1,		/* Should never be returned */
+	ONE_MATCH = 2,		/* 1 label matching the vdev_guid */
+	TWO_MATCH = 3,		/* 2 label matching the vdev_guid */
+	THREE_MATCH = 4,	/* 3 label matching the vdev_guid */
+	FULL_MATCH = 5		/* all labels match the vdev_guid */
+};
+
+static enum match
+vdev_attach_ok(vdev_t *vd, struct g_provider *pp)
+{
+	nvlist_t *config;
+	uint64_t pool_guid, top_guid, vdev_guid;
+	struct g_consumer *cp;
+	int nlabels;
+
+	cp = vdev_geom_attach(pp, NULL, B_TRUE);
+	if (cp == NULL) {
+		ZFS_LOG(1, "Unable to attach tasting instance to %s.",
+		    pp->name);
+		return (NO_MATCH);
+	}
+	g_topology_unlock();
+	nlabels = vdev_geom_read_config(cp, &config);
+	g_topology_lock();
+	vdev_geom_detach(cp, B_TRUE);
+	if (nlabels == 0) {
+		ZFS_LOG(1, "Unable to read config from %s.", pp->name);
+		return (NO_MATCH);
+	}
+
+	pool_guid = 0;
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid);
+	top_guid = 0;
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid);
+	vdev_guid = 0;
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
+	nvlist_free(config);
+
+	/*
+	 * Check that the label's pool guid matches the desired guid.
+	 * Inactive spares and L2ARCs do not have any pool guid in the label.
+	 */
+	if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) {
+		ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.",
+		    pp->name,
+		    (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid);
+		return (NO_MATCH);
+	}
+
+	/*
+	 * Check that the label's vdev guid matches the desired guid.
+	 * The second condition handles possible race on vdev detach, when
+	 * remaining vdev receives GUID of destroyed top level mirror vdev.
+	 */
+	if (vdev_guid == vd->vdev_guid) {
+		ZFS_LOG(1, "guids match for provider %s.", pp->name);
+		return (ZERO_MATCH + nlabels);
+	} else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) {
+		ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name);
+		return (TOPGUID_MATCH);
+	}
+	ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.",
+	    pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid);
+	return (NO_MATCH);
+}
+
+static struct g_consumer *
+vdev_geom_attach_by_guids(vdev_t *vd)
+{
+	struct g_class *mp;
+	struct g_geom *gp;
+	struct g_provider *pp, *best_pp;
+	struct g_consumer *cp;
+	const char *vdpath;
+	enum match match, best_match;
+
+	g_topology_assert();
+
+	vdpath = vd->vdev_path + sizeof ("/dev/") - 1;
+	cp = NULL;
+	best_pp = NULL;
+	best_match = NO_MATCH;
+	LIST_FOREACH(mp, &g_classes, class) {
+		if (mp == &zfs_vdev_class)
+			continue;
+		LIST_FOREACH(gp, &mp->geom, geom) {
+			if (gp->flags & G_GEOM_WITHER)
+				continue;
+			LIST_FOREACH(pp, &gp->provider, provider) {
+				match = vdev_attach_ok(vd, pp);
+				if (match > best_match) {
+					best_match = match;
+					best_pp = pp;
+				} else if (match == best_match) {
+					if (strcmp(pp->name, vdpath) == 0) {
+						best_pp = pp;
+					}
+				}
+				if (match == FULL_MATCH)
+					goto out;
+			}
+		}
+	}
+
+out:
+	if (best_pp) {
+		cp = vdev_geom_attach(best_pp, vd, B_TRUE);
+		if (cp == NULL) {
+			printf("ZFS WARNING: Unable to attach to %s.\n",
+			    best_pp->name);
+		}
+	}
+	return (cp);
+}
+
+static struct g_consumer *
+vdev_geom_open_by_guids(vdev_t *vd)
+{
+	struct g_consumer *cp;
+	char *buf;
+	size_t len;
+
+	g_topology_assert();
+
+	ZFS_LOG(1, "Searching by guids [%ju:%ju].",
+	    (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
+	cp = vdev_geom_attach_by_guids(vd);
+	if (cp != NULL) {
+		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
+		buf = kmem_alloc(len, KM_SLEEP);
+
+		snprintf(buf, len, "/dev/%s", cp->provider->name);
+		spa_strfree(vd->vdev_path);
+		vd->vdev_path = buf;
+
+		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
+		    (uintmax_t)spa_guid(vd->vdev_spa),
+		    (uintmax_t)vd->vdev_guid, cp->provider->name);
+	} else {
+		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
+		    (uintmax_t)spa_guid(vd->vdev_spa),
+		    (uintmax_t)vd->vdev_guid);
+	}
+
+	return (cp);
+}
+
+static struct g_consumer *
+vdev_geom_open_by_path(vdev_t *vd, int check_guid)
+{
+	struct g_provider *pp;
+	struct g_consumer *cp;
+
+	g_topology_assert();
+
+	cp = NULL;
+	pp = g_provider_by_name(vd->vdev_path + sizeof ("/dev/") - 1);
+	if (pp != NULL) {
+		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
+		if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH)
+			cp = vdev_geom_attach(pp, vd, B_FALSE);
+	}
+
+	return (cp);
+}
+
+static int
+vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
+{
+	struct g_provider *pp;
+	struct g_consumer *cp;
+	int error, has_trim;
+	uint16_t rate;
+
+	/*
+	 * Set the TLS to indicate downstack that we
+	 * should not access zvols
+	 */
+	VERIFY0(tsd_set(zfs_geom_probe_vdev_key, vd));
+
+	/*
+	 * We must have a pathname, and it must be absolute.
+	 */
+	if (vd->vdev_path == NULL || strncmp(vd->vdev_path, "/dev/", 5) != 0) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		return (EINVAL);
+	}
+
+	/*
+	 * Reopen the device if it's not currently open. Otherwise,
+	 * just update the physical size of the device.
+	 */
+	if ((cp = vd->vdev_tsd) != NULL) {
+		ASSERT(vd->vdev_reopening);
+		goto skip_open;
+	}
+
+	DROP_GIANT();
+	g_topology_lock();
+	error = 0;
+
+	if (vd->vdev_spa->spa_is_splitting ||
+	    ((vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
+	    (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
+	    vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)))) {
+		/*
+		 * We are dealing with a vdev that hasn't been previously
+		 * opened (since boot), and we are not loading an
+		 * existing pool configuration.  This looks like a
+		 * vdev add operation to a new or existing pool.
+		 * Assume the user really wants to do this, and find
+		 * GEOM provider by its name, ignoring GUID mismatches.
+		 *
+		 * XXPOLICY: It would be safer to only allow a device
+		 *           that is unlabeled or labeled but missing
+		 *           GUID information to be opened in this fashion,
+		 *           unless we are doing a split, in which case we
+		 *           should allow any guid.
+		 */
+		cp = vdev_geom_open_by_path(vd, 0);
+	} else {
+		/*
+		 * Try using the recorded path for this device, but only
+		 * accept it if its label data contains the expected GUIDs.
+		 */
+		cp = vdev_geom_open_by_path(vd, 1);
+		if (cp == NULL) {
+			/*
+			 * The device at vd->vdev_path doesn't have the
+			 * expected GUIDs. The disks might have merely
+			 * moved around so try all other GEOM providers
+			 * to find one with the right GUIDs.
+			 */
+			cp = vdev_geom_open_by_guids(vd);
+		}
+	}
+
+	/* Clear the TLS now that tasting is done */
+	VERIFY0(tsd_set(zfs_geom_probe_vdev_key, NULL));
+
+	if (cp == NULL) {
+		ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path);
+		error = ENOENT;
+	} else {
+		struct consumer_priv_t *priv;
+		struct consumer_vdev_elem *elem;
+		int spamode;
+
+		priv = (struct consumer_priv_t *)&cp->private;
+		if (cp->private == NULL)
+			SLIST_INIT(priv);
+		elem = g_malloc(sizeof (*elem), M_WAITOK|M_ZERO);
+		elem->vd = vd;
+		SLIST_INSERT_HEAD(priv, elem, elems);
+
+		spamode = spa_mode(vd->vdev_spa);
+		if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
+		    !ISP2(cp->provider->sectorsize)) {
+			ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
+			    cp->provider->name);
+
+			vdev_geom_close_locked(vd);
+			error = EINVAL;
+			cp = NULL;
+		} else if (cp->acw == 0 && (spamode & FWRITE) != 0) {
+			int i;
+
+			for (i = 0; i < 5; i++) {
+				error = g_access(cp, 0, 1, 0);
+				if (error == 0)
+					break;
+				g_topology_unlock();
+				tsleep(vd, 0, "vdev", hz / 2);
+				g_topology_lock();
+			}
+			if (error != 0) {
+				printf("ZFS WARNING: Unable to open %s for "
+				    "writing (error=%d).\n",
+				    cp->provider->name, error);
+				vdev_geom_close_locked(vd);
+				cp = NULL;
+			}
+		}
+	}
+
+	/* Fetch initial physical path information for this device. */
+	if (cp != NULL) {
+		vdev_geom_attrchanged(cp, "GEOM::physpath");
+
+		/* Set other GEOM characteristics */
+		vdev_geom_set_physpath(vd, cp, /* do_null_update */B_FALSE);
+	}
+
+	g_topology_unlock();
+	PICKUP_GIANT();
+	if (cp == NULL) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		vdev_dbgmsg(vd, "vdev_geom_open: failed to open [error=%d]",
+		    error);
+		return (error);
+	}
+skip_open:
+	pp = cp->provider;
+
+	/*
+	 * Determine the actual size of the device.
+	 */
+	*max_psize = *psize = pp->mediasize;
+
+	/*
+	 * Determine the device's minimum transfer size and preferred
+	 * transfer size.
+	 */
+	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
+	*physical_ashift = 0;
+	if (pp->stripesize && pp->stripesize > (1 << *logical_ashift) &&
+	    ISP2(pp->stripesize) && pp->stripeoffset == 0)
+		*physical_ashift = highbit(pp->stripesize) - 1;
+
+	/*
+	 * Clear the nowritecache settings, so that on a vdev_reopen()
+	 * we will try again.
+	 */
+	vd->vdev_nowritecache = B_FALSE;
+
+	/* Inform the ZIO pipeline that we are non-rotational. */
+	error = g_getattr("GEOM::rotation_rate", cp, &rate);
+	if (error == 0 && rate == DISK_RR_NON_ROTATING)
+		vd->vdev_nonrot = B_TRUE;
+	else
+		vd->vdev_nonrot = B_FALSE;
+
+	/* Set when device reports it supports TRIM. */
+	error = g_getattr("GEOM::candelete", cp, &has_trim);
+	vd->vdev_has_trim = (error == 0 && has_trim);
+
+	/* Set when device reports it supports secure TRIM. */
+	/* unavailable on FreeBSD */
+	vd->vdev_has_securetrim = B_FALSE;
+
+	return (0);
+}
+
+static void
+vdev_geom_close(vdev_t *vd)
+{
+	struct g_consumer *cp;
+	boolean_t locked;
+
+	cp = vd->vdev_tsd;
+
+	DROP_GIANT();
+	locked = g_topology_locked();
+	if (!locked)
+		g_topology_lock();
+
+	if (!vd->vdev_reopening ||
+	    (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 ||
+	    (cp->provider != NULL && cp->provider->error != 0))))
+		vdev_geom_close_locked(vd);
+
+	if (!locked)
+		g_topology_unlock();
+	PICKUP_GIANT();
+}
+
+static void
+vdev_geom_io_intr(struct bio *bp)
+{
+	vdev_t *vd;
+	zio_t *zio;
+
+	zio = bp->bio_caller1;
+	vd = zio->io_vd;
+	zio->io_error = bp->bio_error;
+	if (zio->io_error == 0 && bp->bio_resid != 0)
+		zio->io_error = SET_ERROR(EIO);
+
+	switch (zio->io_error) {
+	case ENOTSUP:
+		/*
+		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
+		 * that future attempts will never succeed. In this case
+		 * we set a persistent flag so that we don't bother with
+		 * requests in the future.
+		 */
+		switch (bp->bio_cmd) {
+		case BIO_FLUSH:
+			vd->vdev_nowritecache = B_TRUE;
+			break;
+		case BIO_DELETE:
+			break;
+		}
+		break;
+	case ENXIO:
+		if (!vd->vdev_remove_wanted) {
+			/*
+			 * If provider's error is set we assume it is being
+			 * removed.
+			 */
+			if (bp->bio_to->error != 0) {
+				vd->vdev_remove_wanted = B_TRUE;
+				spa_async_request(zio->io_spa,
+				    SPA_ASYNC_REMOVE);
+			} else if (!vd->vdev_delayed_close) {
+				vd->vdev_delayed_close = B_TRUE;
+			}
+		}
+		break;
+	}
+
+	/*
+	 * We have to split bio freeing into two parts, because the ABD code
+	 * cannot be called in this context and vdev_op_io_done is not called
+	 * for ZIO_TYPE_IOCTL zio-s.
+	 */
+	if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) {
+		g_destroy_bio(bp);
+		zio->io_bio = NULL;
+	}
+	zio_delay_interrupt(zio);
+}
+
+struct vdev_geom_check_unmapped_cb_state {
+	int	pages;
+	uint_t	end;
+};
+
+/*
+ * Callback to check the ABD segment size/alignment and count the pages.
+ * GEOM requires data buffer to look virtually contiguous.  It means only
+ * the first page of the buffer may not start and only the last may not
+ * end on a page boundary.  All other physical pages must be full.
+ */
+static int
+vdev_geom_check_unmapped_cb(void *buf, size_t len, void *priv)
+{
+	struct vdev_geom_check_unmapped_cb_state *s = priv;
+	vm_offset_t off = (vm_offset_t)buf & PAGE_MASK;
+
+	if (s->pages != 0 && off != 0)
+		return (1);
+	if (s->end != 0)
+		return (1);
+	s->end = (off + len) & PAGE_MASK;
+	s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT;
+	return (0);
+}
+
+/*
+ * Check whether we can use unmapped I/O for this ZIO on this device to
+ * avoid data copying between scattered and/or gang ABD buffer and linear.
+ */
+static int
+vdev_geom_check_unmapped(zio_t *zio, struct g_consumer *cp)
+{
+	struct vdev_geom_check_unmapped_cb_state s;
+
+	/* If unmapped I/O is administratively disabled, respect that. */
+	if (!unmapped_buf_allowed)
+		return (0);
+
+	/* If the buffer is already linear, then nothing to do here. */
+	if (abd_is_linear(zio->io_abd))
+		return (0);
+
+	/*
+	 * If unmapped I/O is not supported by the GEOM provider,
+	 * then we can't do anything and have to copy the data.
+	 */
+	if ((cp->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0)
+		return (0);
+
+	/* Check the buffer chunks sizes/alignments and count pages. */
+	s.pages = s.end = 0;
+	if (abd_iterate_func(zio->io_abd, 0, zio->io_size,
+	    vdev_geom_check_unmapped_cb, &s))
+		return (0);
+	return (s.pages);
+}
+
+/*
+ * Callback to translate the ABD segment into array of physical pages.
+ */
+static int
+vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv)
+{
+	struct bio *bp = priv;
+	vm_offset_t addr = (vm_offset_t)buf;
+	vm_offset_t end = addr + len;
+
+	if (bp->bio_ma_n == 0) {
+		bp->bio_ma_offset = addr & PAGE_MASK;
+		addr &= ~PAGE_MASK;
+	} else {
+		ASSERT0(P2PHASE(addr, PAGE_SIZE));
+	}
+	do {
+		bp->bio_ma[bp->bio_ma_n++] =
+		    PHYS_TO_VM_PAGE(pmap_kextract(addr));
+		addr += PAGE_SIZE;
+	} while (addr < end);
+	return (0);
+}
+
+static void
+vdev_geom_io_start(zio_t *zio)
+{
+	vdev_t *vd;
+	struct g_consumer *cp;
+	struct bio *bp;
+
+	vd = zio->io_vd;
+
+	switch (zio->io_type) {
+	case ZIO_TYPE_IOCTL:
+		/* XXPOLICY */
+		if (!vdev_readable(vd)) {
+			zio->io_error = SET_ERROR(ENXIO);
+			zio_interrupt(zio);
+			return;
+		} else {
+			switch (zio->io_cmd) {
+			case DKIOCFLUSHWRITECACHE:
+				if (zfs_nocacheflush ||
+				    vdev_geom_bio_flush_disable)
+					break;
+				if (vd->vdev_nowritecache) {
+					zio->io_error = SET_ERROR(ENOTSUP);
+					break;
+				}
+				goto sendreq;
+			default:
+				zio->io_error = SET_ERROR(ENOTSUP);
+			}
+		}
+
+		zio_execute(zio);
+		return;
+	case ZIO_TYPE_TRIM:
+		if (!vdev_geom_bio_delete_disable) {
+			goto sendreq;
+		}
+		zio_execute(zio);
+		return;
+	default:
+			;
+		/* PASSTHROUGH --- placate compiler */
+	}
+sendreq:
+	ASSERT(zio->io_type == ZIO_TYPE_READ ||
+	    zio->io_type == ZIO_TYPE_WRITE ||
+	    zio->io_type == ZIO_TYPE_TRIM ||
+	    zio->io_type == ZIO_TYPE_IOCTL);
+
+	cp = vd->vdev_tsd;
+	if (cp == NULL) {
+		zio->io_error = SET_ERROR(ENXIO);
+		zio_interrupt(zio);
+		return;
+	}
+	bp = g_alloc_bio();
+	bp->bio_caller1 = zio;
+	switch (zio->io_type) {
+	case ZIO_TYPE_READ:
+	case ZIO_TYPE_WRITE:
+		zio->io_target_timestamp = zio_handle_io_delay(zio);
+		bp->bio_offset = zio->io_offset;
+		bp->bio_length = zio->io_size;
+		if (zio->io_type == ZIO_TYPE_READ)
+			bp->bio_cmd = BIO_READ;
+		else
+			bp->bio_cmd = BIO_WRITE;
+
+		/*
+		 * If possible, represent scattered and/or gang ABD buffer to
+		 * GEOM as an array of physical pages.  It allows to satisfy
+		 * requirement of virtually contiguous buffer without copying.
+		 */
+		int pgs = vdev_geom_check_unmapped(zio, cp);
+		if (pgs > 0) {
+			bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs,
+			    M_DEVBUF, M_WAITOK);
+			bp->bio_ma_n = 0;
+			bp->bio_ma_offset = 0;
+			abd_iterate_func(zio->io_abd, 0, zio->io_size,
+			    vdev_geom_fill_unmap_cb, bp);
+			bp->bio_data = unmapped_buf;
+			bp->bio_flags |= BIO_UNMAPPED;
+		} else {
+			if (zio->io_type == ZIO_TYPE_READ) {
+				bp->bio_data = abd_borrow_buf(zio->io_abd,
+				    zio->io_size);
+			} else {
+				bp->bio_data = abd_borrow_buf_copy(zio->io_abd,
+				    zio->io_size);
+			}
+		}
+		break;
+	case ZIO_TYPE_TRIM:
+		bp->bio_cmd = BIO_DELETE;
+		bp->bio_data = NULL;
+		bp->bio_offset = zio->io_offset;
+		bp->bio_length = zio->io_size;
+		break;
+	case ZIO_TYPE_IOCTL:
+		bp->bio_cmd = BIO_FLUSH;
+		bp->bio_data = NULL;
+		bp->bio_offset = cp->provider->mediasize;
+		bp->bio_length = 0;
+		break;
+	default:
+		panic("invalid zio->io_type: %d\n", zio->io_type);
+	}
+	bp->bio_done = vdev_geom_io_intr;
+	zio->io_bio = bp;
+
+	g_io_request(bp, cp);
+}
+
+static void
+vdev_geom_io_done(zio_t *zio)
+{
+	struct bio *bp = zio->io_bio;
+
+	if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) {
+		ASSERT3P(bp, ==, NULL);
+		return;
+	}
+
+	if (bp == NULL) {
+		ASSERT3S(zio->io_error, ==, ENXIO);
+		return;
+	}
+
+	if (bp->bio_ma != NULL) {
+		free(bp->bio_ma, M_DEVBUF);
+	} else {
+		if (zio->io_type == ZIO_TYPE_READ) {
+			abd_return_buf_copy(zio->io_abd, bp->bio_data,
+			    zio->io_size);
+		} else {
+			abd_return_buf(zio->io_abd, bp->bio_data,
+			    zio->io_size);
+		}
+	}
+
+	g_destroy_bio(bp);
+	zio->io_bio = NULL;
+}
+
+static void
+vdev_geom_hold(vdev_t *vd)
+{
+}
+
+static void
+vdev_geom_rele(vdev_t *vd)
+{
+}
+
+vdev_ops_t vdev_disk_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
+	.vdev_op_open = vdev_geom_open,
+	.vdev_op_close = vdev_geom_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_geom_io_start,
+	.vdev_op_io_done = vdev_geom_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = vdev_geom_hold,
+	.vdev_op_rele = vdev_geom_rele,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
+	.vdev_op_leaf = B_TRUE			/* leaf vdev */
+};

diff --git a/zfs/module/os/freebsd/zfs/vdev_label_os.c b/zfs/module/os/freebsd/zfs/vdev_label_os.c
new file mode 100644
index 0000000..48f5880
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/vdev_label_os.c

@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/dmu.h>
+#include <sys/zap.h>
+#include <sys/vdev.h>
+#include <sys/vdev_os.h>
+#include <sys/vdev_impl.h>
+#include <sys/uberblock_impl.h>
+#include <sys/metaslab.h>
+#include <sys/metaslab_impl.h>
+#include <sys/zio.h>
+#include <sys/dsl_scan.h>
+#include <sys/abd.h>
+#include <sys/fs/zfs.h>
+
+int
+vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size)
+{
+	spa_t *spa = vd->vdev_spa;
+	zio_t *zio;
+	abd_t *pad2;
+	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+	int error;
+
+	if (size > VDEV_PAD_SIZE)
+		return (EINVAL);
+
+	if (!vd->vdev_ops->vdev_op_leaf)
+		return (ENODEV);
+	if (vdev_is_dead(vd))
+		return (ENXIO);
+
+	ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL);
+
+	pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
+	abd_zero(pad2, VDEV_PAD_SIZE);
+	abd_copy_from_buf(pad2, buf, size);
+
+retry:
+	zio = zio_root(spa, NULL, NULL, flags);
+	vdev_label_write(zio, vd, 0, pad2,
+	    offsetof(vdev_label_t, vl_be),
+	    VDEV_PAD_SIZE, NULL, NULL, flags);
+	error = zio_wait(zio);
+	if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
+		flags |= ZIO_FLAG_TRYHARD;
+		goto retry;
+	}
+
+	abd_free(pad2);
+	return (error);
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_acl.c b/zfs/module/os/freebsd/zfs/zfs_acl.c
new file mode 100644
index 0000000..fe0f691
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_acl.c

@@ -0,0 +1,2674 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/resource.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/unistd.h>
+#include <sys/sdt.h>
+#include <sys/fs/zfs.h>
+#include <sys/policy.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/dmu.h>
+#include <sys/dnode.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <acl/acl_common.h>
+
+
+#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
+#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
+#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
+#define	MIN_ACE_TYPE	ALLOW
+
+#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
+#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
+    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
+#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
+#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
+
+#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
+    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
+    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
+    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
+
+#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
+#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
+    ACE_DELETE|ACE_DELETE_CHILD)
+#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
+
+#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
+
+#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
+
+#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
+    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
+
+#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
+
+#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
+    ZFS_ACL_PROTECTED)
+
+#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
+    ZFS_ACL_OBJ_ACE)
+
+#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
+
+static uint16_t
+zfs_ace_v0_get_type(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_type);
+}
+
+static uint16_t
+zfs_ace_v0_get_flags(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_flags);
+}
+
+static uint32_t
+zfs_ace_v0_get_mask(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_access_mask);
+}
+
+static uint64_t
+zfs_ace_v0_get_who(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_fuid);
+}
+
+static void
+zfs_ace_v0_set_type(void *acep, uint16_t type)
+{
+	((zfs_oldace_t *)acep)->z_type = type;
+}
+
+static void
+zfs_ace_v0_set_flags(void *acep, uint16_t flags)
+{
+	((zfs_oldace_t *)acep)->z_flags = flags;
+}
+
+static void
+zfs_ace_v0_set_mask(void *acep, uint32_t mask)
+{
+	((zfs_oldace_t *)acep)->z_access_mask = mask;
+}
+
+static void
+zfs_ace_v0_set_who(void *acep, uint64_t who)
+{
+	((zfs_oldace_t *)acep)->z_fuid = who;
+}
+
+/*ARGSUSED*/
+static size_t
+zfs_ace_v0_size(void *acep)
+{
+	return (sizeof (zfs_oldace_t));
+}
+
+static size_t
+zfs_ace_v0_abstract_size(void)
+{
+	return (sizeof (zfs_oldace_t));
+}
+
+static int
+zfs_ace_v0_mask_off(void)
+{
+	return (offsetof(zfs_oldace_t, z_access_mask));
+}
+
+/*ARGSUSED*/
+static int
+zfs_ace_v0_data(void *acep, void **datap)
+{
+	*datap = NULL;
+	return (0);
+}
+
+static acl_ops_t zfs_acl_v0_ops = {
+	zfs_ace_v0_get_mask,
+	zfs_ace_v0_set_mask,
+	zfs_ace_v0_get_flags,
+	zfs_ace_v0_set_flags,
+	zfs_ace_v0_get_type,
+	zfs_ace_v0_set_type,
+	zfs_ace_v0_get_who,
+	zfs_ace_v0_set_who,
+	zfs_ace_v0_size,
+	zfs_ace_v0_abstract_size,
+	zfs_ace_v0_mask_off,
+	zfs_ace_v0_data
+};
+
+static uint16_t
+zfs_ace_fuid_get_type(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_type);
+}
+
+static uint16_t
+zfs_ace_fuid_get_flags(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_flags);
+}
+
+static uint32_t
+zfs_ace_fuid_get_mask(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
+}
+
+static uint64_t
+zfs_ace_fuid_get_who(void *args)
+{
+	uint16_t entry_type;
+	zfs_ace_t *acep = args;
+
+	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
+
+	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
+	    entry_type == ACE_EVERYONE)
+		return (-1);
+	return (((zfs_ace_t *)acep)->z_fuid);
+}
+
+static void
+zfs_ace_fuid_set_type(void *acep, uint16_t type)
+{
+	((zfs_ace_hdr_t *)acep)->z_type = type;
+}
+
+static void
+zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
+{
+	((zfs_ace_hdr_t *)acep)->z_flags = flags;
+}
+
+static void
+zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
+{
+	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
+}
+
+static void
+zfs_ace_fuid_set_who(void *arg, uint64_t who)
+{
+	zfs_ace_t *acep = arg;
+
+	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
+
+	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
+	    entry_type == ACE_EVERYONE)
+		return;
+	acep->z_fuid = who;
+}
+
+static size_t
+zfs_ace_fuid_size(void *acep)
+{
+	zfs_ace_hdr_t *zacep = acep;
+	uint16_t entry_type;
+
+	switch (zacep->z_type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		return (sizeof (zfs_object_ace_t));
+	case ALLOW:
+	case DENY:
+		entry_type =
+		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
+		if (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE)
+			return (sizeof (zfs_ace_hdr_t));
+		fallthrough;
+	default:
+		return (sizeof (zfs_ace_t));
+	}
+}
+
+static size_t
+zfs_ace_fuid_abstract_size(void)
+{
+	return (sizeof (zfs_ace_hdr_t));
+}
+
+static int
+zfs_ace_fuid_mask_off(void)
+{
+	return (offsetof(zfs_ace_hdr_t, z_access_mask));
+}
+
+static int
+zfs_ace_fuid_data(void *acep, void **datap)
+{
+	zfs_ace_t *zacep = acep;
+	zfs_object_ace_t *zobjp;
+
+	switch (zacep->z_hdr.z_type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		zobjp = acep;
+		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
+		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
+	default:
+		*datap = NULL;
+		return (0);
+	}
+}
+
+static acl_ops_t zfs_acl_fuid_ops = {
+	zfs_ace_fuid_get_mask,
+	zfs_ace_fuid_set_mask,
+	zfs_ace_fuid_get_flags,
+	zfs_ace_fuid_set_flags,
+	zfs_ace_fuid_get_type,
+	zfs_ace_fuid_set_type,
+	zfs_ace_fuid_get_who,
+	zfs_ace_fuid_set_who,
+	zfs_ace_fuid_size,
+	zfs_ace_fuid_abstract_size,
+	zfs_ace_fuid_mask_off,
+	zfs_ace_fuid_data
+};
+
+/*
+ * The following three functions are provided for compatibility with
+ * older ZPL version in order to determine if the file use to have
+ * an external ACL and what version of ACL previously existed on the
+ * file.  Would really be nice to not need this, sigh.
+ */
+uint64_t
+zfs_external_acl(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+	int error;
+
+	if (zp->z_is_sa)
+		return (0);
+
+	/*
+	 * Need to deal with a potential
+	 * race where zfs_sa_upgrade could cause
+	 * z_isa_sa to change.
+	 *
+	 * If the lookup fails then the state of z_is_sa should have
+	 * changed.
+	 */
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+	    &acl_phys, sizeof (acl_phys))) == 0)
+		return (acl_phys.z_acl_extern_obj);
+	else {
+		/*
+		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
+		 * removed
+		 */
+		VERIFY(zp->z_is_sa);
+		VERIFY3S(error, ==, ENOENT);
+		return (0);
+	}
+}
+
+/*
+ * Determine size of ACL in bytes
+ *
+ * This is more complicated than it should be since we have to deal
+ * with old external ACLs.
+ */
+static int
+zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
+    zfs_acl_phys_t *aclphys)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	uint64_t acl_count;
+	int size;
+	int error;
+
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+	if (zp->z_is_sa) {
+		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
+		    &size)) != 0)
+			return (error);
+		*aclsize = size;
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
+		    &acl_count, sizeof (acl_count))) != 0)
+			return (error);
+		*aclcount = acl_count;
+	} else {
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    aclphys, sizeof (*aclphys))) != 0)
+			return (error);
+
+		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
+			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
+			*aclcount = aclphys->z_acl_size;
+		} else {
+			*aclsize = aclphys->z_acl_size;
+			*aclcount = aclphys->z_acl_count;
+		}
+	}
+	return (0);
+}
+
+int
+zfs_znode_acl_version(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+
+	if (zp->z_is_sa)
+		return (ZFS_ACL_VERSION_FUID);
+	else {
+		int error;
+
+		/*
+		 * Need to deal with a potential
+		 * race where zfs_sa_upgrade could cause
+		 * z_isa_sa to change.
+		 *
+		 * If the lookup fails then the state of z_is_sa should have
+		 * changed.
+		 */
+		if ((error = sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+		    &acl_phys, sizeof (acl_phys))) == 0)
+			return (acl_phys.z_acl_version);
+		else {
+			/*
+			 * After upgrade SA_ZPL_ZNODE_ACL should have
+			 * been removed.
+			 */
+			VERIFY(zp->z_is_sa);
+			VERIFY3S(error, ==, ENOENT);
+			return (ZFS_ACL_VERSION_FUID);
+		}
+	}
+}
+
+static int
+zfs_acl_version(int version)
+{
+	if (version < ZPL_VERSION_FUID)
+		return (ZFS_ACL_VERSION_INITIAL);
+	else
+		return (ZFS_ACL_VERSION_FUID);
+}
+
+static int
+zfs_acl_version_zp(znode_t *zp)
+{
+	return (zfs_acl_version(zp->z_zfsvfs->z_version));
+}
+
+zfs_acl_t *
+zfs_acl_alloc(int vers)
+{
+	zfs_acl_t *aclp;
+
+	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
+	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
+	    offsetof(zfs_acl_node_t, z_next));
+	aclp->z_version = vers;
+	if (vers == ZFS_ACL_VERSION_FUID)
+		aclp->z_ops = &zfs_acl_fuid_ops;
+	else
+		aclp->z_ops = &zfs_acl_v0_ops;
+	return (aclp);
+}
+
+zfs_acl_node_t *
+zfs_acl_node_alloc(size_t bytes)
+{
+	zfs_acl_node_t *aclnode;
+
+	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
+	if (bytes) {
+		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
+		aclnode->z_allocdata = aclnode->z_acldata;
+		aclnode->z_allocsize = bytes;
+		aclnode->z_size = bytes;
+	}
+
+	return (aclnode);
+}
+
+static void
+zfs_acl_node_free(zfs_acl_node_t *aclnode)
+{
+	if (aclnode->z_allocsize)
+		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
+	kmem_free(aclnode, sizeof (zfs_acl_node_t));
+}
+
+static void
+zfs_acl_release_nodes(zfs_acl_t *aclp)
+{
+	zfs_acl_node_t *aclnode;
+
+	while ((aclnode = list_head(&aclp->z_acl))) {
+		list_remove(&aclp->z_acl, aclnode);
+		zfs_acl_node_free(aclnode);
+	}
+	aclp->z_acl_count = 0;
+	aclp->z_acl_bytes = 0;
+}
+
+void
+zfs_acl_free(zfs_acl_t *aclp)
+{
+	zfs_acl_release_nodes(aclp);
+	list_destroy(&aclp->z_acl);
+	kmem_free(aclp, sizeof (zfs_acl_t));
+}
+
+static boolean_t
+zfs_acl_valid_ace_type(uint_t type, uint_t flags)
+{
+	uint16_t entry_type;
+
+	switch (type) {
+	case ALLOW:
+	case DENY:
+	case ACE_SYSTEM_AUDIT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_ACE_TYPE:
+		entry_type = flags & ACE_TYPE_FLAGS;
+		return (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE || entry_type == 0 ||
+		    entry_type == ACE_IDENTIFIER_GROUP);
+	default:
+		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static boolean_t
+zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
+{
+	/*
+	 * first check type of entry
+	 */
+
+	if (!zfs_acl_valid_ace_type(type, iflags))
+		return (B_FALSE);
+
+	switch (type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
+			return (B_FALSE);
+		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
+	}
+
+	/*
+	 * next check inheritance level flags
+	 */
+
+	if (obj_type == VDIR &&
+	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+		aclp->z_hints |= ZFS_INHERIT_ACE;
+
+	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
+		if ((iflags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+static void *
+zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
+    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
+{
+	zfs_acl_node_t *aclnode;
+
+	ASSERT3P(aclp, !=, NULL);
+
+	if (start == NULL) {
+		aclnode = list_head(&aclp->z_acl);
+		if (aclnode == NULL)
+			return (NULL);
+
+		aclp->z_next_ace = aclnode->z_acldata;
+		aclp->z_curr_node = aclnode;
+		aclnode->z_ace_idx = 0;
+	}
+
+	aclnode = aclp->z_curr_node;
+
+	if (aclnode == NULL)
+		return (NULL);
+
+	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
+		aclnode = list_next(&aclp->z_acl, aclnode);
+		if (aclnode == NULL)
+			return (NULL);
+		else {
+			aclp->z_curr_node = aclnode;
+			aclnode->z_ace_idx = 0;
+			aclp->z_next_ace = aclnode->z_acldata;
+		}
+	}
+
+	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
+		void *acep = aclp->z_next_ace;
+		size_t ace_size;
+
+		/*
+		 * Make sure we don't overstep our bounds
+		 */
+		ace_size = aclp->z_ops->ace_size(acep);
+
+		if (((caddr_t)acep + ace_size) >
+		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
+			return (NULL);
+		}
+
+		*iflags = aclp->z_ops->ace_flags_get(acep);
+		*type = aclp->z_ops->ace_type_get(acep);
+		*access_mask = aclp->z_ops->ace_mask_get(acep);
+		*who = aclp->z_ops->ace_who_get(acep);
+		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
+		aclnode->z_ace_idx++;
+
+		return ((void *)acep);
+	}
+	return (NULL);
+}
+
+/*ARGSUSED*/
+static uint64_t
+zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
+    uint16_t *flags, uint16_t *type, uint32_t *mask)
+{
+	zfs_acl_t *aclp = datap;
+	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
+	uint64_t who;
+
+	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
+	    flags, type);
+	return ((uint64_t)(uintptr_t)acep);
+}
+
+/*
+ * Copy ACE to internal ZFS format.
+ * While processing the ACL each ACE will be validated for correctness.
+ * ACE FUIDs will be created later.
+ */
+static int
+zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
+    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
+    zfs_fuid_info_t **fuidp, cred_t *cr)
+{
+	int i;
+	uint16_t entry_type;
+	zfs_ace_t *aceptr = z_acl;
+	ace_t *acep = datap;
+	zfs_object_ace_t *zobjacep;
+	ace_object_t *aceobjp;
+
+	for (i = 0; i != aclcnt; i++) {
+		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
+		aceptr->z_hdr.z_flags = acep->a_flags;
+		aceptr->z_hdr.z_type = acep->a_type;
+		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
+		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
+		    entry_type != ACE_EVERYONE) {
+			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
+			    cr, (entry_type == 0) ?
+			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
+		}
+
+		/*
+		 * Make sure ACE is valid
+		 */
+		if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
+		    aceptr->z_hdr.z_flags) != B_TRUE)
+			return (SET_ERROR(EINVAL));
+
+		switch (acep->a_type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			zobjacep = (zfs_object_ace_t *)aceptr;
+			aceobjp = (ace_object_t *)acep;
+
+			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
+			    sizeof (aceobjp->a_obj_type));
+			bcopy(aceobjp->a_inherit_obj_type,
+			    zobjacep->z_inherit_type,
+			    sizeof (aceobjp->a_inherit_obj_type));
+			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
+			break;
+		default:
+			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
+		}
+
+		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
+		    aclp->z_ops->ace_size(aceptr));
+	}
+
+	*size = (caddr_t)aceptr - (caddr_t)z_acl;
+
+	return (0);
+}
+
+/*
+ * Copy ZFS ACEs to fixed size ace_t layout
+ */
+static void
+zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
+    void *datap, int filter)
+{
+	uint64_t who;
+	uint32_t access_mask;
+	uint16_t iflags, type;
+	zfs_ace_hdr_t *zacep = NULL;
+	ace_t *acep = datap;
+	ace_object_t *objacep;
+	zfs_object_ace_t *zobjacep;
+	size_t ace_size;
+	uint16_t entry_type;
+
+	while ((zacep = zfs_acl_next_ace(aclp, zacep,
+	    &who, &access_mask, &iflags, &type))) {
+
+		switch (type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			if (filter) {
+				continue;
+			}
+			zobjacep = (zfs_object_ace_t *)zacep;
+			objacep = (ace_object_t *)acep;
+			bcopy(zobjacep->z_object_type,
+			    objacep->a_obj_type,
+			    sizeof (zobjacep->z_object_type));
+			bcopy(zobjacep->z_inherit_type,
+			    objacep->a_inherit_obj_type,
+			    sizeof (zobjacep->z_inherit_type));
+			ace_size = sizeof (ace_object_t);
+			break;
+		default:
+			ace_size = sizeof (ace_t);
+			break;
+		}
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+		if ((entry_type != ACE_OWNER &&
+		    entry_type != OWNING_GROUP &&
+		    entry_type != ACE_EVERYONE)) {
+			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
+			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
+			    ZFS_ACE_GROUP : ZFS_ACE_USER);
+		} else {
+			acep->a_who = (uid_t)(int64_t)who;
+		}
+		acep->a_access_mask = access_mask;
+		acep->a_flags = iflags;
+		acep->a_type = type;
+		acep = (ace_t *)((caddr_t)acep + ace_size);
+	}
+}
+
+static int
+zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
+    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
+{
+	int i;
+	zfs_oldace_t *aceptr = z_acl;
+
+	for (i = 0; i != aclcnt; i++, aceptr++) {
+		aceptr->z_access_mask = acep[i].a_access_mask;
+		aceptr->z_type = acep[i].a_type;
+		aceptr->z_flags = acep[i].a_flags;
+		aceptr->z_fuid = acep[i].a_who;
+		/*
+		 * Make sure ACE is valid
+		 */
+		if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
+		    aceptr->z_flags) != B_TRUE)
+			return (SET_ERROR(EINVAL));
+	}
+	*size = (caddr_t)aceptr - (caddr_t)z_acl;
+	return (0);
+}
+
+/*
+ * convert old ACL format to new
+ */
+void
+zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
+{
+	zfs_oldace_t *oldaclp;
+	int i;
+	uint16_t type, iflags;
+	uint32_t access_mask;
+	uint64_t who;
+	void *cookie = NULL;
+	zfs_acl_node_t *newaclnode;
+
+	ASSERT3U(aclp->z_version, ==, ZFS_ACL_VERSION_INITIAL);
+	/*
+	 * First create the ACE in a contiguous piece of memory
+	 * for zfs_copy_ace_2_fuid().
+	 *
+	 * We only convert an ACL once, so this won't happen
+	 * everytime.
+	 */
+	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
+	    KM_SLEEP);
+	i = 0;
+	while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
+	    &access_mask, &iflags, &type))) {
+		oldaclp[i].z_flags = iflags;
+		oldaclp[i].z_type = type;
+		oldaclp[i].z_fuid = who;
+		oldaclp[i++].z_access_mask = access_mask;
+	}
+
+	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
+	    sizeof (zfs_object_ace_t));
+	aclp->z_ops = &zfs_acl_fuid_ops;
+	VERIFY0(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
+	    oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
+	    &newaclnode->z_size, NULL, cr));
+	newaclnode->z_ace_count = aclp->z_acl_count;
+	aclp->z_version = ZFS_ACL_VERSION;
+	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
+
+	/*
+	 * Release all previous ACL nodes
+	 */
+
+	zfs_acl_release_nodes(aclp);
+
+	list_insert_head(&aclp->z_acl, newaclnode);
+
+	aclp->z_acl_bytes = newaclnode->z_size;
+	aclp->z_acl_count = newaclnode->z_ace_count;
+
+}
+
+/*
+ * Convert unix access mask to v4 access mask
+ */
+static uint32_t
+zfs_unix_to_v4(uint32_t access_mask)
+{
+	uint32_t new_mask = 0;
+
+	if (access_mask & S_IXOTH)
+		new_mask |= ACE_EXECUTE;
+	if (access_mask & S_IWOTH)
+		new_mask |= ACE_WRITE_DATA;
+	if (access_mask & S_IROTH)
+		new_mask |= ACE_READ_DATA;
+	return (new_mask);
+}
+
+static void
+zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
+    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
+{
+	uint16_t type = entry_type & ACE_TYPE_FLAGS;
+
+	aclp->z_ops->ace_mask_set(acep, access_mask);
+	aclp->z_ops->ace_type_set(acep, access_type);
+	aclp->z_ops->ace_flags_set(acep, entry_type);
+	if ((type != ACE_OWNER && type != OWNING_GROUP &&
+	    type != ACE_EVERYONE))
+		aclp->z_ops->ace_who_set(acep, fuid);
+}
+
+/*
+ * Determine mode of file based on ACL.
+ */
+uint64_t
+zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
+    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
+{
+	int		entry_type;
+	mode_t		mode;
+	mode_t		seen = 0;
+	zfs_ace_hdr_t 	*acep = NULL;
+	uint64_t	who;
+	uint16_t	iflags, type;
+	uint32_t	access_mask;
+	boolean_t	an_exec_denied = B_FALSE;
+
+	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who,
+	    &access_mask, &iflags, &type))) {
+
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+
+		/*
+		 * Skip over any inherit_only ACEs
+		 */
+		if (iflags & ACE_INHERIT_ONLY_ACE)
+			continue;
+
+		if (entry_type == ACE_OWNER || (entry_type == 0 &&
+		    who == fuid)) {
+			if ((access_mask & ACE_READ_DATA) &&
+			    (!(seen & S_IRUSR))) {
+				seen |= S_IRUSR;
+				if (type == ALLOW) {
+					mode |= S_IRUSR;
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA) &&
+			    (!(seen & S_IWUSR))) {
+				seen |= S_IWUSR;
+				if (type == ALLOW) {
+					mode |= S_IWUSR;
+				}
+			}
+			if ((access_mask & ACE_EXECUTE) &&
+			    (!(seen & S_IXUSR))) {
+				seen |= S_IXUSR;
+				if (type == ALLOW) {
+					mode |= S_IXUSR;
+				}
+			}
+		} else if (entry_type == OWNING_GROUP ||
+		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
+			if ((access_mask & ACE_READ_DATA) &&
+			    (!(seen & S_IRGRP))) {
+				seen |= S_IRGRP;
+				if (type == ALLOW) {
+					mode |= S_IRGRP;
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA) &&
+			    (!(seen & S_IWGRP))) {
+				seen |= S_IWGRP;
+				if (type == ALLOW) {
+					mode |= S_IWGRP;
+				}
+			}
+			if ((access_mask & ACE_EXECUTE) &&
+			    (!(seen & S_IXGRP))) {
+				seen |= S_IXGRP;
+				if (type == ALLOW) {
+					mode |= S_IXGRP;
+				}
+			}
+		} else if (entry_type == ACE_EVERYONE) {
+			if ((access_mask & ACE_READ_DATA)) {
+				if (!(seen & S_IRUSR)) {
+					seen |= S_IRUSR;
+					if (type == ALLOW) {
+						mode |= S_IRUSR;
+					}
+				}
+				if (!(seen & S_IRGRP)) {
+					seen |= S_IRGRP;
+					if (type == ALLOW) {
+						mode |= S_IRGRP;
+					}
+				}
+				if (!(seen & S_IROTH)) {
+					seen |= S_IROTH;
+					if (type == ALLOW) {
+						mode |= S_IROTH;
+					}
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA)) {
+				if (!(seen & S_IWUSR)) {
+					seen |= S_IWUSR;
+					if (type == ALLOW) {
+						mode |= S_IWUSR;
+					}
+				}
+				if (!(seen & S_IWGRP)) {
+					seen |= S_IWGRP;
+					if (type == ALLOW) {
+						mode |= S_IWGRP;
+					}
+				}
+				if (!(seen & S_IWOTH)) {
+					seen |= S_IWOTH;
+					if (type == ALLOW) {
+						mode |= S_IWOTH;
+					}
+				}
+			}
+			if ((access_mask & ACE_EXECUTE)) {
+				if (!(seen & S_IXUSR)) {
+					seen |= S_IXUSR;
+					if (type == ALLOW) {
+						mode |= S_IXUSR;
+					}
+				}
+				if (!(seen & S_IXGRP)) {
+					seen |= S_IXGRP;
+					if (type == ALLOW) {
+						mode |= S_IXGRP;
+					}
+				}
+				if (!(seen & S_IXOTH)) {
+					seen |= S_IXOTH;
+					if (type == ALLOW) {
+						mode |= S_IXOTH;
+					}
+				}
+			}
+		} else {
+			/*
+			 * Only care if this IDENTIFIER_GROUP or
+			 * USER ACE denies execute access to someone,
+			 * mode is not affected
+			 */
+			if ((access_mask & ACE_EXECUTE) && type == DENY)
+				an_exec_denied = B_TRUE;
+		}
+	}
+
+	/*
+	 * Failure to allow is effectively a deny, so execute permission
+	 * is denied if it was never mentioned or if we explicitly
+	 * weren't allowed it.
+	 */
+	if (!an_exec_denied &&
+	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
+	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
+		an_exec_denied = B_TRUE;
+
+	if (an_exec_denied)
+		*pflags &= ~ZFS_NO_EXECS_DENIED;
+	else
+		*pflags |= ZFS_NO_EXECS_DENIED;
+
+	return (mode);
+}
+
+/*
+ * Read an external acl object.  If the intent is to modify, always
+ * create a new acl and leave any cached acl in place.
+ */
+int
+zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
+    boolean_t will_modify)
+{
+	zfs_acl_t	*aclp;
+	int		aclsize;
+	int		acl_count;
+	zfs_acl_node_t	*aclnode;
+	zfs_acl_phys_t	znode_acl;
+	int		version;
+	int		error;
+
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+	if (zp->z_zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
+
+	if (zp->z_acl_cached && !will_modify) {
+		*aclpp = zp->z_acl_cached;
+		return (0);
+	}
+
+	version = zfs_znode_acl_version(zp);
+
+	if ((error = zfs_acl_znode_info(zp, &aclsize,
+	    &acl_count, &znode_acl)) != 0) {
+		goto done;
+	}
+
+	aclp = zfs_acl_alloc(version);
+
+	aclp->z_acl_count = acl_count;
+	aclp->z_acl_bytes = aclsize;
+
+	aclnode = zfs_acl_node_alloc(aclsize);
+	aclnode->z_ace_count = aclp->z_acl_count;
+	aclnode->z_size = aclsize;
+
+	if (!zp->z_is_sa) {
+		if (znode_acl.z_acl_extern_obj) {
+			error = dmu_read(zp->z_zfsvfs->z_os,
+			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
+			    aclnode->z_acldata, DMU_READ_PREFETCH);
+		} else {
+			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
+			    aclnode->z_size);
+		}
+	} else {
+		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
+		    aclnode->z_acldata, aclnode->z_size);
+	}
+
+	if (error != 0) {
+		zfs_acl_free(aclp);
+		zfs_acl_node_free(aclnode);
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = SET_ERROR(EIO);
+		goto done;
+	}
+
+	list_insert_head(&aclp->z_acl, aclnode);
+
+	*aclpp = aclp;
+	if (!will_modify)
+		zp->z_acl_cached = aclp;
+done:
+	return (error);
+}
+
+/*ARGSUSED*/
+void
+zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
+    boolean_t start, void *userdata)
+{
+	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
+
+	if (start) {
+		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
+	} else {
+		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
+		    cb->cb_acl_node);
+	}
+	*dataptr = cb->cb_acl_node->z_acldata;
+	*length = cb->cb_acl_node->z_size;
+}
+
+int
+zfs_acl_chown_setattr(znode_t *zp)
+{
+	int error;
+	zfs_acl_t *aclp;
+
+	if (zp->z_zfsvfs->z_replay == B_FALSE) {
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+		ASSERT_VOP_IN_SEQC(ZTOV(zp));
+	}
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+
+	if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
+		zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
+		    &zp->z_pflags, zp->z_uid, zp->z_gid);
+	return (error);
+}
+
+/*
+ * common code for setting ACLs.
+ *
+ * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
+ * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
+ * already checked the acl and knows whether to inherit.
+ */
+int
+zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
+{
+	int			error;
+	zfsvfs_t		*zfsvfs = zp->z_zfsvfs;
+	dmu_object_type_t	otype;
+	zfs_acl_locator_cb_t	locate = { 0 };
+	uint64_t		mode;
+	sa_bulk_attr_t		bulk[5];
+	uint64_t		ctime[2];
+	int			count = 0;
+	zfs_acl_phys_t		acl_phys;
+
+	if (zp->z_zfsvfs->z_replay == B_FALSE) {
+		ASSERT_VOP_IN_SEQC(ZTOV(zp));
+	}
+
+	mode = zp->z_mode;
+
+	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
+	    zp->z_uid, zp->z_gid);
+
+	zp->z_mode = mode;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, sizeof (ctime));
+
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	/*
+	 * Upgrade needed?
+	 */
+	if (!zfsvfs->z_use_fuids) {
+		otype = DMU_OT_OLDACL;
+	} else {
+		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
+		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
+			zfs_acl_xform(zp, aclp, cr);
+		ASSERT3U(aclp->z_version, >=, ZFS_ACL_VERSION_FUID);
+		otype = DMU_OT_ACL;
+	}
+
+	/*
+	 * Arrgh, we have to handle old on disk format
+	 * as well as newer (preferred) SA format.
+	 */
+
+	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
+		locate.cb_aclp = aclp;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
+		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
+	} else { /* Painful legacy way */
+		zfs_acl_node_t *aclnode;
+		uint64_t off = 0;
+		uint64_t aoid;
+
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    &acl_phys, sizeof (acl_phys))) != 0)
+			return (error);
+
+		aoid = acl_phys.z_acl_extern_obj;
+
+		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			/*
+			 * If ACL was previously external and we are now
+			 * converting to new ACL format then release old
+			 * ACL object and create a new one.
+			 */
+			if (aoid &&
+			    aclp->z_version != acl_phys.z_acl_version) {
+				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
+				if (error)
+					return (error);
+				aoid = 0;
+			}
+			if (aoid == 0) {
+				aoid = dmu_object_alloc(zfsvfs->z_os,
+				    otype, aclp->z_acl_bytes,
+				    otype == DMU_OT_ACL ?
+				    DMU_OT_SYSACL : DMU_OT_NONE,
+				    otype == DMU_OT_ACL ?
+				    DN_OLD_MAX_BONUSLEN : 0, tx);
+			} else {
+				(void) dmu_object_set_blocksize(zfsvfs->z_os,
+				    aoid, aclp->z_acl_bytes, 0, tx);
+			}
+			acl_phys.z_acl_extern_obj = aoid;
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				dmu_write(zfsvfs->z_os, aoid, off,
+				    aclnode->z_size, aclnode->z_acldata, tx);
+				off += aclnode->z_size;
+			}
+		} else {
+			void *start = acl_phys.z_ace_data;
+			/*
+			 * Migrating back embedded?
+			 */
+			if (acl_phys.z_acl_extern_obj) {
+				error = dmu_object_free(zfsvfs->z_os,
+				    acl_phys.z_acl_extern_obj, tx);
+				if (error)
+					return (error);
+				acl_phys.z_acl_extern_obj = 0;
+			}
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+		}
+		/*
+		 * If Old version then swap count/bytes to match old
+		 * layout of znode_acl_phys_t.
+		 */
+		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+			acl_phys.z_acl_size = aclp->z_acl_count;
+			acl_phys.z_acl_count = aclp->z_acl_bytes;
+		} else {
+			acl_phys.z_acl_size = aclp->z_acl_bytes;
+			acl_phys.z_acl_count = aclp->z_acl_count;
+		}
+		acl_phys.z_acl_version = aclp->z_version;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (acl_phys));
+	}
+
+	/*
+	 * Replace ACL wide bits, but first clear them.
+	 */
+	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
+
+	zp->z_pflags |= aclp->z_hints;
+
+	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
+		zp->z_pflags |= ZFS_ACL_TRIVIAL;
+
+	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
+	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
+}
+
+static void
+zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t split, boolean_t trim,
+    zfs_acl_t *aclp)
+{
+	void		*acep = NULL;
+	uint64_t	who;
+	int		new_count, new_bytes;
+	int		ace_size;
+	int 		entry_type;
+	uint16_t	iflags, type;
+	uint32_t	access_mask;
+	zfs_acl_node_t	*newnode;
+	size_t 		abstract_size = aclp->z_ops->ace_abstract_size();
+	void 		*zacep;
+	boolean_t	isdir;
+	trivial_acl_t	masks;
+
+	new_count = new_bytes = 0;
+
+	isdir = (vtype == VDIR);
+
+	acl_trivial_access_masks((mode_t)mode, isdir, &masks);
+
+	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
+
+	zacep = newnode->z_acldata;
+	if (masks.allow0) {
+		zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+	if (masks.deny1) {
+		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+	if (masks.deny2) {
+		zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
+	    &iflags, &type))) {
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+		/*
+		 * ACEs used to represent the file mode may be divided
+		 * into an equivalent pair of inherit-only and regular
+		 * ACEs, if they are inheritable.
+		 * Skip regular ACEs, which are replaced by the new mode.
+		 */
+		if (split && (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE)) {
+			if (!isdir || !(iflags &
+			    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+				continue;
+			/*
+			 * We preserve owner@, group@, or @everyone
+			 * permissions, if they are inheritable, by
+			 * copying them to inherit_only ACEs. This
+			 * prevents inheritable permissions from being
+			 * altered along with the file mode.
+			 */
+			iflags |= ACE_INHERIT_ONLY_ACE;
+		}
+
+		/*
+		 * If this ACL has any inheritable ACEs, mark that in
+		 * the hints (which are later masked into the pflags)
+		 * so create knows to do inheritance.
+		 */
+		if (isdir && (iflags &
+		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+			aclp->z_hints |= ZFS_INHERIT_ACE;
+
+		if ((type != ALLOW && type != DENY) ||
+		    (iflags & ACE_INHERIT_ONLY_ACE)) {
+			switch (type) {
+			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
+				break;
+			}
+		} else {
+			/*
+			 * Limit permissions granted by ACEs to be no greater
+			 * than permissions of the requested group mode.
+			 * Applies when the "aclmode" property is set to
+			 * "groupmask".
+			 */
+			if ((type == ALLOW) && trim)
+				access_mask &= masks.group;
+		}
+		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
+		ace_size = aclp->z_ops->ace_size(acep);
+		zacep = (void *)((uintptr_t)zacep + ace_size);
+		new_count++;
+		new_bytes += ace_size;
+	}
+	zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
+	zacep = (void *)((uintptr_t)zacep + abstract_size);
+	zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
+	zacep = (void *)((uintptr_t)zacep + abstract_size);
+	zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
+
+	new_count += 3;
+	new_bytes += abstract_size * 3;
+	zfs_acl_release_nodes(aclp);
+	aclp->z_acl_count = new_count;
+	aclp->z_acl_bytes = new_bytes;
+	newnode->z_ace_count = new_count;
+	newnode->z_size = new_bytes;
+	list_insert_tail(&aclp->z_acl, newnode);
+}
+
+int
+zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
+{
+	int error = 0;
+
+	mutex_enter(&zp->z_acl_lock);
+	if (zp->z_zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+	if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
+		*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
+	else
+		error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
+
+	if (error == 0) {
+		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
+		zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
+		    (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
+	}
+	mutex_exit(&zp->z_acl_lock);
+
+	return (error);
+}
+
+/*
+ * Should ACE be inherited?
+ */
+static int
+zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
+{
+	int	iflags = (acep_flags & 0xf);
+
+	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
+		return (1);
+	else if (iflags & ACE_FILE_INHERIT_ACE)
+		return (!((vtype == VDIR) &&
+		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
+	return (0);
+}
+
+/*
+ * inherit inheritable ACEs from parent
+ */
+static zfs_acl_t *
+zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
+    uint64_t mode, boolean_t *need_chmod)
+{
+	void		*pacep = NULL;
+	void		*acep;
+	zfs_acl_node_t  *aclnode;
+	zfs_acl_t	*aclp = NULL;
+	uint64_t	who;
+	uint32_t	access_mask;
+	uint16_t	iflags, newflags, type;
+	size_t		ace_size;
+	void		*data1, *data2;
+	size_t		data1sz, data2sz;
+	uint_t		aclinherit;
+	boolean_t	isdir = (vtype == VDIR);
+	boolean_t	isreg = (vtype == VREG);
+
+	*need_chmod = B_TRUE;
+
+	aclp = zfs_acl_alloc(paclp->z_version);
+	aclinherit = zfsvfs->z_acl_inherit;
+	if (aclinherit == ZFS_ACL_DISCARD || vtype == VLNK)
+		return (aclp);
+
+	while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
+	    &access_mask, &iflags, &type))) {
+
+		/*
+		 * don't inherit bogus ACEs
+		 */
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		/*
+		 * Check if ACE is inheritable by this vnode
+		 */
+		if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
+		    !zfs_ace_can_use(vtype, iflags))
+			continue;
+
+		/*
+		 * If owner@, group@, or everyone@ inheritable
+		 * then zfs_acl_chmod() isn't needed.
+		 */
+		if ((aclinherit == ZFS_ACL_PASSTHROUGH ||
+		    aclinherit == ZFS_ACL_PASSTHROUGH_X) &&
+		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
+		    ((iflags & OWNING_GROUP) == OWNING_GROUP)) &&
+		    (isreg || (isdir && (iflags & ACE_DIRECTORY_INHERIT_ACE))))
+			*need_chmod = B_FALSE;
+
+		/*
+		 * Strip inherited execute permission from file if
+		 * not in mode
+		 */
+		if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
+		    !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
+			access_mask &= ~ACE_EXECUTE;
+		}
+
+		/*
+		 * Strip write_acl and write_owner from permissions
+		 * when inheriting an ACE
+		 */
+		if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
+			access_mask &= ~RESTRICTED_CLEAR;
+		}
+
+		ace_size = aclp->z_ops->ace_size(pacep);
+		aclnode = zfs_acl_node_alloc(ace_size);
+		list_insert_tail(&aclp->z_acl, aclnode);
+		acep = aclnode->z_acldata;
+
+		zfs_set_ace(aclp, acep, access_mask, type,
+		    who, iflags|ACE_INHERITED_ACE);
+
+		/*
+		 * Copy special opaque data if any
+		 */
+		if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
+			data2sz = aclp->z_ops->ace_data(acep, &data2);
+			VERIFY3U(data2sz, ==, data1sz);
+			bcopy(data1, data2, data2sz);
+		}
+
+		aclp->z_acl_count++;
+		aclnode->z_ace_count++;
+		aclp->z_acl_bytes += aclnode->z_size;
+		newflags = aclp->z_ops->ace_flags_get(acep);
+
+		/*
+		 * If ACE is not to be inherited further, or if the vnode is
+		 * not a directory, remove all inheritance flags
+		 */
+		if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
+			newflags &= ~ALL_INHERIT;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+			continue;
+		}
+
+		/*
+		 * This directory has an inheritable ACE
+		 */
+		aclp->z_hints |= ZFS_INHERIT_ACE;
+
+		/*
+		 * If only FILE_INHERIT is set then turn on
+		 * inherit_only
+		 */
+		if ((iflags & (ACE_FILE_INHERIT_ACE |
+		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
+			newflags |= ACE_INHERIT_ONLY_ACE;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+		} else {
+			newflags &= ~ACE_INHERIT_ONLY_ACE;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+		}
+	}
+	if (zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
+	    aclp->z_acl_count != 0) {
+		*need_chmod = B_FALSE;
+	}
+
+	return (aclp);
+}
+
+/*
+ * Create file system object initial permissions
+ * including inheritable ACEs.
+ * Also, create FUIDs for owner and group.
+ */
+int
+zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
+    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
+{
+	int		error;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zfs_acl_t	*paclp;
+	gid_t		gid;
+	boolean_t	need_chmod = B_TRUE;
+	boolean_t	trim = B_FALSE;
+	boolean_t	inherited = B_FALSE;
+
+	if ((flag & IS_ROOT_NODE) == 0) {
+		if (zfsvfs->z_replay == B_FALSE)
+			ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+	} else
+		ASSERT3P(dzp->z_vnode, ==, NULL);
+	bzero(acl_ids, sizeof (zfs_acl_ids_t));
+	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+
+	if (vsecp)
+		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
+		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
+			return (error);
+	/*
+	 * Determine uid and gid.
+	 */
+	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
+	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
+		acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
+		    (uint64_t)vap->va_uid, cr,
+		    ZFS_OWNER, &acl_ids->z_fuidp);
+		acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
+		    (uint64_t)vap->va_gid, cr,
+		    ZFS_GROUP, &acl_ids->z_fuidp);
+		gid = vap->va_gid;
+	} else {
+		uid_t id = crgetuid(cr);
+		if (IS_EPHEMERAL(id))
+			id = UID_NOBODY;
+		acl_ids->z_fuid = (uint64_t)id;
+		acl_ids->z_fgid = 0;
+		if (vap->va_mask & AT_GID)  {
+			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_gid,
+			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
+			gid = vap->va_gid;
+			if (acl_ids->z_fgid != dzp->z_gid &&
+			    !groupmember(vap->va_gid, cr) &&
+			    secpolicy_vnode_create_gid(cr) != 0)
+				acl_ids->z_fgid = 0;
+		}
+		if (acl_ids->z_fgid == 0) {
+			char		*domain;
+			uint32_t	rid;
+
+			acl_ids->z_fgid = dzp->z_gid;
+			gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
+			    cr, ZFS_GROUP);
+
+			if (zfsvfs->z_use_fuids &&
+			    IS_EPHEMERAL(acl_ids->z_fgid)) {
+				domain =
+				    zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx,
+				    FUID_INDEX(acl_ids->z_fgid));
+				rid = FUID_RID(acl_ids->z_fgid);
+				zfs_fuid_node_add(&acl_ids->z_fuidp,
+				    domain, rid, FUID_INDEX(acl_ids->z_fgid),
+				    acl_ids->z_fgid, ZFS_GROUP);
+			}
+		}
+	}
+
+	/*
+	 * If we're creating a directory, and the parent directory has the
+	 * set-GID bit set, set in on the new directory.
+	 * Otherwise, if the user is neither privileged nor a member of the
+	 * file's new group, clear the file's set-GID bit.
+	 */
+
+	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
+	    (vap->va_type == VDIR)) {
+		acl_ids->z_mode |= S_ISGID;
+	} else {
+		if ((acl_ids->z_mode & S_ISGID) &&
+		    secpolicy_vnode_setids_setgids(ZTOV(dzp), cr, gid) != 0)
+			acl_ids->z_mode &= ~S_ISGID;
+	}
+
+	if (acl_ids->z_aclp == NULL) {
+		mutex_enter(&dzp->z_acl_lock);
+		if (!(flag & IS_ROOT_NODE) &&
+		    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
+		    !(dzp->z_pflags & ZFS_XATTR)) {
+			VERIFY0(zfs_acl_node_read(dzp, B_TRUE,
+			    &paclp, B_FALSE));
+			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
+			    vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
+			inherited = B_TRUE;
+		} else {
+			acl_ids->z_aclp =
+			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+		}
+		mutex_exit(&dzp->z_acl_lock);
+
+		if (need_chmod) {
+			if (vap->va_type == VDIR)
+				acl_ids->z_aclp->z_hints |=
+				    ZFS_ACL_AUTO_INHERIT;
+
+			if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
+			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
+			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
+				trim = B_TRUE;
+			zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE,
+			    trim, acl_ids->z_aclp);
+		}
+	}
+
+	if (inherited || vsecp) {
+		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
+		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
+		    acl_ids->z_fuid, acl_ids->z_fgid);
+		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+	}
+
+	return (0);
+}
+
+/*
+ * Free ACL and fuid_infop, but not the acl_ids structure
+ */
+void
+zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
+{
+	if (acl_ids->z_aclp)
+		zfs_acl_free(acl_ids->z_aclp);
+	if (acl_ids->z_fuidp)
+		zfs_fuid_info_free(acl_ids->z_fuidp);
+	acl_ids->z_aclp = NULL;
+	acl_ids->z_fuidp = NULL;
+}
+
+boolean_t
+zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
+{
+	return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
+	    zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
+	    (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
+	    zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
+}
+
+/*
+ * Retrieve a file's ACL
+ */
+int
+zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
+{
+	zfs_acl_t	*aclp;
+	ulong_t		mask;
+	int		error;
+	int 		count = 0;
+	int		largeace = 0;
+
+	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
+	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
+
+	if (mask == 0)
+		return (SET_ERROR(ENOSYS));
+
+	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
+		return (error);
+
+	mutex_enter(&zp->z_acl_lock);
+
+	if (zp->z_zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
+	error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+
+	/*
+	 * Scan ACL to determine number of ACEs
+	 */
+	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
+		void *zacep = NULL;
+		uint64_t who;
+		uint32_t access_mask;
+		uint16_t type, iflags;
+
+		while ((zacep = zfs_acl_next_ace(aclp, zacep,
+		    &who, &access_mask, &iflags, &type))) {
+			switch (type) {
+			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+				largeace++;
+				continue;
+			default:
+				count++;
+			}
+		}
+		vsecp->vsa_aclcnt = count;
+	} else
+		count = (int)aclp->z_acl_count;
+
+	if (mask & VSA_ACECNT) {
+		vsecp->vsa_aclcnt = count;
+	}
+
+	if (mask & VSA_ACE) {
+		size_t aclsz;
+
+		aclsz = count * sizeof (ace_t) +
+		    sizeof (ace_object_t) * largeace;
+
+		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
+		vsecp->vsa_aclentsz = aclsz;
+
+		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
+			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
+			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
+		else {
+			zfs_acl_node_t *aclnode;
+			void *start = vsecp->vsa_aclentp;
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+			ASSERT3U((caddr_t)start - (caddr_t)vsecp->vsa_aclentp,
+			    ==, aclp->z_acl_bytes);
+		}
+	}
+	if (mask & VSA_ACE_ACLFLAGS) {
+		vsecp->vsa_aclflags = 0;
+		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
+			vsecp->vsa_aclflags |= ACL_DEFAULTED;
+		if (zp->z_pflags & ZFS_ACL_PROTECTED)
+			vsecp->vsa_aclflags |= ACL_PROTECTED;
+		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
+			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
+	}
+
+	mutex_exit(&zp->z_acl_lock);
+
+	return (0);
+}
+
+int
+zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_type,
+    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
+{
+	zfs_acl_t *aclp;
+	zfs_acl_node_t *aclnode;
+	int aclcnt = vsecp->vsa_aclcnt;
+	int error;
+
+	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
+		return (SET_ERROR(EINVAL));
+
+	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
+
+	aclp->z_hints = 0;
+	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
+	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+		if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
+		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
+		    aclcnt, &aclnode->z_size)) != 0) {
+			zfs_acl_free(aclp);
+			zfs_acl_node_free(aclnode);
+			return (error);
+		}
+	} else {
+		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
+		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
+		    &aclnode->z_size, fuidp, cr)) != 0) {
+			zfs_acl_free(aclp);
+			zfs_acl_node_free(aclnode);
+			return (error);
+		}
+	}
+	aclp->z_acl_bytes = aclnode->z_size;
+	aclnode->z_ace_count = aclcnt;
+	aclp->z_acl_count = aclcnt;
+	list_insert_head(&aclp->z_acl, aclnode);
+
+	/*
+	 * If flags are being set then add them to z_hints
+	 */
+	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
+		if (vsecp->vsa_aclflags & ACL_PROTECTED)
+			aclp->z_hints |= ZFS_ACL_PROTECTED;
+		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
+			aclp->z_hints |= ZFS_ACL_DEFAULTED;
+		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
+			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
+	}
+
+	*zaclp = aclp;
+
+	return (0);
+}
+
+/*
+ * Set a file's ACL
+ */
+int
+zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	zilog_t		*zilog = zfsvfs->z_log;
+	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
+	dmu_tx_t	*tx;
+	int		error;
+	zfs_acl_t	*aclp;
+	zfs_fuid_info_t	*fuidp = NULL;
+	boolean_t	fuid_dirtied;
+	uint64_t	acl_obj;
+
+	if (zp->z_zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+	if (mask == 0)
+		return (SET_ERROR(ENOSYS));
+
+	if (zp->z_pflags & ZFS_IMMUTABLE)
+		return (SET_ERROR(EPERM));
+
+	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
+		return (error);
+
+	error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
+	    &aclp);
+	if (error)
+		return (error);
+
+	/*
+	 * If ACL wide flags aren't being set then preserve any
+	 * existing flags.
+	 */
+	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
+		aclp->z_hints |=
+		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
+	}
+top:
+	mutex_enter(&zp->z_acl_lock);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	/*
+	 * If old version and ACL won't fit in bonus and we aren't
+	 * upgrading then take out necessary DMU holds
+	 */
+
+	if ((acl_obj = zfs_external_acl(zp)) != 0) {
+		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
+		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
+			dmu_tx_hold_free(tx, acl_obj, 0,
+			    DMU_OBJECT_END);
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+			    aclp->z_acl_bytes);
+		} else {
+			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
+		}
+	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
+	}
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_NOWAIT);
+	if (error) {
+		mutex_exit(&zp->z_acl_lock);
+
+		if (error == ERESTART) {
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zfs_acl_free(aclp);
+		return (error);
+	}
+
+	error = zfs_aclset_common(zp, aclp, cr, tx);
+	ASSERT0(error);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	zp->z_acl_cached = aclp;
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
+
+	if (fuidp)
+		zfs_fuid_info_free(fuidp);
+	dmu_tx_commit(tx);
+	mutex_exit(&zp->z_acl_lock);
+
+	return (error);
+}
+
+/*
+ * Check accesses of interest (AoI) against attributes of the dataset
+ * such as read-only.  Returns zero if no AoI conflict with dataset
+ * attributes, otherwise an appropriate errno is returned.
+ */
+static int
+zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
+{
+	if ((v4_mode & WRITE_MASK) &&
+	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
+	    (!IS_DEVVP(ZTOV(zp)) ||
+	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * Intentionally allow ZFS_READONLY through here.
+	 * See zfs_zaccess_common().
+	 */
+	if ((v4_mode & WRITE_MASK_DATA) &&
+	    (zp->z_pflags & ZFS_IMMUTABLE)) {
+		return (SET_ERROR(EPERM));
+	}
+
+	/*
+	 * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK
+	 * (sunlnk) is set. We just don't allow directory removal, which is
+	 * handled in zfs_zaccess_delete().
+	 */
+	if ((v4_mode & ACE_DELETE) &&
+	    (zp->z_pflags & ZFS_NOUNLINK)) {
+		return (EPERM);
+	}
+
+	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
+	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
+		return (SET_ERROR(EACCES));
+	}
+
+	return (0);
+}
+
+/*
+ * The primary usage of this function is to loop through all of the
+ * ACEs in the znode, determining what accesses of interest (AoI) to
+ * the caller are allowed or denied.  The AoI are expressed as bits in
+ * the working_mode parameter.  As each ACE is processed, bits covered
+ * by that ACE are removed from the working_mode.  This removal
+ * facilitates two things.  The first is that when the working mode is
+ * empty (= 0), we know we've looked at all the AoI. The second is
+ * that the ACE interpretation rules don't allow a later ACE to undo
+ * something granted or denied by an earlier ACE.  Removing the
+ * discovered access or denial enforces this rule.  At the end of
+ * processing the ACEs, all AoI that were found to be denied are
+ * placed into the working_mode, giving the caller a mask of denied
+ * accesses.  Returns:
+ *	0		if all AoI granted
+ *	EACCESS 	if the denied mask is non-zero
+ *	other error	if abnormal failure (e.g., IO error)
+ *
+ * A secondary usage of the function is to determine if any of the
+ * AoI are granted.  If an ACE grants any access in
+ * the working_mode, we immediately short circuit out of the function.
+ * This mode is chosen by setting anyaccess to B_TRUE.  The
+ * working_mode is not a denied access mask upon exit if the function
+ * is used in this manner.
+ */
+static int
+zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
+    boolean_t anyaccess, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	zfs_acl_t	*aclp;
+	int		error;
+	uid_t		uid = crgetuid(cr);
+	uint64_t 	who;
+	uint16_t	type, iflags;
+	uint16_t	entry_type;
+	uint32_t	access_mask;
+	uint32_t	deny_mask = 0;
+	zfs_ace_hdr_t	*acep = NULL;
+	boolean_t	checkit;
+	uid_t		gowner;
+	uid_t		fowner;
+
+	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
+
+	mutex_enter(&zp->z_acl_lock);
+
+	if (zp->z_zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
+	error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+
+	ASSERT3P(zp->z_acl_cached, !=, NULL);
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
+	    &iflags, &type))) {
+		uint32_t mask_matched;
+
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
+			continue;
+
+		/* Skip ACE if it does not affect any AoI */
+		mask_matched = (access_mask & *working_mode);
+		if (!mask_matched)
+			continue;
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+
+		checkit = B_FALSE;
+
+		switch (entry_type) {
+		case ACE_OWNER:
+			if (uid == fowner)
+				checkit = B_TRUE;
+			break;
+		case OWNING_GROUP:
+			who = gowner;
+			fallthrough;
+		case ACE_IDENTIFIER_GROUP:
+			checkit = zfs_groupmember(zfsvfs, who, cr);
+			break;
+		case ACE_EVERYONE:
+			checkit = B_TRUE;
+			break;
+
+		/* USER Entry */
+		default:
+			if (entry_type == 0) {
+				uid_t newid;
+
+				newid = zfs_fuid_map_id(zfsvfs, who, cr,
+				    ZFS_ACE_USER);
+				if (newid !=  UID_NOBODY &&
+				    uid == newid)
+					checkit = B_TRUE;
+				break;
+			} else {
+				mutex_exit(&zp->z_acl_lock);
+				return (SET_ERROR(EIO));
+			}
+		}
+
+		if (checkit) {
+			if (type == DENY) {
+				DTRACE_PROBE3(zfs__ace__denies,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				deny_mask |= mask_matched;
+			} else {
+				DTRACE_PROBE3(zfs__ace__allows,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				if (anyaccess) {
+					mutex_exit(&zp->z_acl_lock);
+					return (0);
+				}
+			}
+			*working_mode &= ~mask_matched;
+		}
+
+		/* Are we done? */
+		if (*working_mode == 0)
+			break;
+	}
+
+	mutex_exit(&zp->z_acl_lock);
+
+	/* Put the found 'denies' back on the working mode */
+	if (deny_mask) {
+		*working_mode |= deny_mask;
+		return (SET_ERROR(EACCES));
+	} else if (*working_mode) {
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Return true if any access whatsoever granted, we don't actually
+ * care what access is granted.
+ */
+boolean_t
+zfs_has_access(znode_t *zp, cred_t *cr)
+{
+	uint32_t have = ACE_ALL_PERMS;
+
+	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
+		uid_t owner;
+
+		owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+		return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
+	}
+	return (B_TRUE);
+}
+
+static int
+zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
+    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int err;
+
+	*working_mode = v4_mode;
+	*check_privs = B_TRUE;
+
+	/*
+	 * Short circuit empty requests
+	 */
+	if (v4_mode == 0 || zfsvfs->z_replay) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
+		*check_privs = B_FALSE;
+		return (err);
+	}
+
+	/*
+	 * The caller requested that the ACL check be skipped.  This
+	 * would only happen if the caller checked VOP_ACCESS() with a
+	 * 32 bit ACE mask and already had the appropriate permissions.
+	 */
+	if (skipaclchk) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	/*
+	 * Note: ZFS_READONLY represents the "DOS R/O" attribute.
+	 * When that flag is set, we should behave as if write access
+	 * were not granted by anything in the ACL.  In particular:
+	 * We _must_ allow writes after opening the file r/w, then
+	 * setting the DOS R/O attribute, and writing some more.
+	 * (Similar to how you can write after fchmod(fd, 0444).)
+	 *
+	 * Therefore ZFS_READONLY is ignored in the dataset check
+	 * above, and checked here as if part of the ACL check.
+	 * Also note: DOS R/O is ignored for directories.
+	 */
+	if ((v4_mode & WRITE_MASK_DATA) &&
+	    (ZTOV(zp)->v_type != VDIR) &&
+	    (zp->z_pflags & ZFS_READONLY)) {
+		return (SET_ERROR(EPERM));
+	}
+
+	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
+}
+
+static int
+zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
+    cred_t *cr)
+{
+	if (*working_mode != ACE_WRITE_DATA)
+		return (SET_ERROR(EACCES));
+
+	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
+	    check_privs, B_FALSE, cr));
+}
+
+/*
+ * Check if VEXEC is allowed.
+ *
+ * This routine is based on zfs_fastaccesschk_execute which has slowpath
+ * calling zfs_zaccess. This would be incorrect on FreeBSD (see
+ * zfs_freebsd_access for the difference). Thus this variant let's the
+ * caller handle the slowpath (if necessary).
+ *
+ * On top of that we perform a lockless check for ZFS_NO_EXECS_DENIED.
+ *
+ * Safe access to znode_t is provided by the vnode lock.
+ */
+int
+zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
+{
+	boolean_t is_attr;
+
+	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
+		return (1);
+
+	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
+	    (ZTOV(zdp)->v_type == VDIR));
+	if (is_attr)
+		return (1);
+
+	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED)
+		return (0);
+
+	return (1);
+}
+
+
+/*
+ * Determine whether Access should be granted/denied.
+ *
+ * The least priv subsystem is always consulted as a basic privilege
+ * can define any form of access.
+ */
+int
+zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
+{
+	uint32_t	working_mode;
+	int		error;
+	int		is_attr;
+	boolean_t 	check_privs;
+	znode_t		*xzp = NULL;
+	znode_t 	*check_zp = zp;
+	mode_t		needed_bits;
+	uid_t		owner;
+
+	is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
+
+	/*
+	 * In FreeBSD, we don't care about permissions of individual ADS.
+	 * Note that not checking them is not just an optimization - without
+	 * this shortcut, EA operations may bogusly fail with EACCES.
+	 */
+	if (zp->z_pflags & ZFS_XATTR)
+		return (0);
+
+	owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+
+	/*
+	 * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
+	 * in needed_bits.  Map the bits mapped by working_mode (currently
+	 * missing) in missing_bits.
+	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
+	 * needed_bits.
+	 */
+	needed_bits = 0;
+
+	working_mode = mode;
+	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+	    owner == crgetuid(cr))
+		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
+
+	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
+	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
+		needed_bits |= VREAD;
+	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
+	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
+		needed_bits |= VWRITE;
+	if (working_mode & ACE_EXECUTE)
+		needed_bits |= VEXEC;
+
+	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
+	    &check_privs, skipaclchk, cr)) == 0) {
+		if (is_attr)
+			VN_RELE(ZTOV(xzp));
+		return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
+		    needed_bits, needed_bits));
+	}
+
+	if (error && !check_privs) {
+		if (is_attr)
+			VN_RELE(ZTOV(xzp));
+		return (error);
+	}
+
+	if (error && (flags & V_APPEND)) {
+		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
+	}
+
+	if (error && check_privs) {
+		mode_t		checkmode = 0;
+		vnode_t *check_vp = ZTOV(check_zp);
+
+		/*
+		 * First check for implicit owner permission on
+		 * read_acl/read_attributes
+		 */
+
+		error = 0;
+		ASSERT3U(working_mode, !=, 0);
+
+		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
+		    owner == crgetuid(cr)))
+			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
+
+		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
+		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
+			checkmode |= VREAD;
+		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
+		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
+			checkmode |= VWRITE;
+		if (working_mode & ACE_EXECUTE)
+			checkmode |= VEXEC;
+
+		error = secpolicy_vnode_access2(cr, check_vp, owner,
+		    needed_bits & ~checkmode, needed_bits);
+
+		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
+			error = secpolicy_vnode_chown(check_vp, cr, owner);
+		if (error == 0 && (working_mode & ACE_WRITE_ACL))
+			error = secpolicy_vnode_setdac(check_vp, cr, owner);
+
+		if (error == 0 && (working_mode &
+		    (ACE_DELETE|ACE_DELETE_CHILD)))
+			error = secpolicy_vnode_remove(check_vp, cr);
+
+		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
+			error = secpolicy_vnode_chown(check_vp, cr, owner);
+		}
+		if (error == 0) {
+			/*
+			 * See if any bits other than those already checked
+			 * for are still present.  If so then return EACCES
+			 */
+			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
+				error = SET_ERROR(EACCES);
+			}
+		}
+	} else if (error == 0) {
+		error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
+		    needed_bits, needed_bits);
+	}
+
+
+	if (is_attr)
+		VN_RELE(ZTOV(xzp));
+
+	return (error);
+}
+
+/*
+ * Translate traditional unix VREAD/VWRITE/VEXEC mode into
+ * NFSv4-style ZFS ACL format and call zfs_zaccess()
+ */
+int
+zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
+{
+	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
+}
+
+/*
+ * Access function for secpolicy_vnode_setattr
+ */
+int
+zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
+{
+	int v4_mode = zfs_unix_to_v4(mode >> 6);
+
+	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
+}
+
+static int
+zfs_delete_final_check(znode_t *zp, znode_t *dzp,
+    mode_t available_perms, cred_t *cr)
+{
+	int error;
+	uid_t downer;
+
+	downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER);
+
+	error = secpolicy_vnode_access2(cr, ZTOV(dzp),
+	    downer, available_perms, VWRITE|VEXEC);
+
+	if (error == 0)
+		error = zfs_sticky_remove_access(dzp, zp, cr);
+
+	return (error);
+}
+
+/*
+ * Determine whether Access should be granted/deny, without
+ * consulting least priv subsystem.
+ *
+ * The following chart is the recommended NFSv4 enforcement for
+ * ability to delete an object.
+ *
+ *      -------------------------------------------------------
+ *      |   Parent Dir  |           Target Object Permissions |
+ *      |  permissions  |                                     |
+ *      -------------------------------------------------------
+ *      |               | ACL Allows | ACL Denies| Delete     |
+ *      |               |  Delete    |  Delete   | unspecified|
+ *      -------------------------------------------------------
+ *      |  ACL Allows   | Permit     | Permit    | Permit     |
+ *      |  DELETE_CHILD |                                     |
+ *      -------------------------------------------------------
+ *      |  ACL Denies   | Permit     | Deny      | Deny       |
+ *      |  DELETE_CHILD |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL specifies |            |           |            |
+ *      | only allow    | Permit     | Permit    | Permit     |
+ *      | write and     |            |           |            |
+ *      | execute       |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL denies    |            |           |            |
+ *      | write and     | Permit     | Deny      | Deny       |
+ *      | execute       |            |           |            |
+ *      -------------------------------------------------------
+ *         ^
+ *         |
+ *         No search privilege, can't even look up file?
+ *
+ */
+int
+zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
+{
+	uint32_t dzp_working_mode = 0;
+	uint32_t zp_working_mode = 0;
+	int dzp_error, zp_error;
+	mode_t available_perms;
+	boolean_t dzpcheck_privs = B_TRUE;
+	boolean_t zpcheck_privs = B_TRUE;
+
+	/*
+	 * We want specific DELETE permissions to
+	 * take precedence over WRITE/EXECUTE.  We don't
+	 * want an ACL such as this to mess us up.
+	 * user:joe:write_data:deny,user:joe:delete:allow
+	 *
+	 * However, deny permissions may ultimately be overridden
+	 * by secpolicy_vnode_access().
+	 *
+	 * We will ask for all of the necessary permissions and then
+	 * look at the working modes from the directory and target object
+	 * to determine what was found.
+	 */
+
+	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
+		return (SET_ERROR(EPERM));
+
+	/*
+	 * First row
+	 * If the directory permissions allow the delete, we are done.
+	 */
+	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
+	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
+		return (0);
+
+	/*
+	 * If target object has delete permission then we are done
+	 */
+	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
+	    &zpcheck_privs, B_FALSE, cr)) == 0)
+		return (0);
+
+	ASSERT(dzp_error);
+	ASSERT(zp_error);
+
+	if (!dzpcheck_privs)
+		return (dzp_error);
+	if (!zpcheck_privs)
+		return (zp_error);
+
+	/*
+	 * Second row
+	 *
+	 * If directory returns EACCES then delete_child was denied
+	 * due to deny delete_child.  In this case send the request through
+	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
+	 * since that *could* allow the delete based on write/execute permission
+	 * and we want delete permissions to override write/execute.
+	 */
+
+	if (dzp_error == EACCES) {
+		/* XXXPJD: s/dzp/zp/ ? */
+		return (secpolicy_vnode_remove(ZTOV(dzp), cr));
+	}
+	/*
+	 * Third Row
+	 * only need to see if we have write/execute on directory.
+	 */
+
+	dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
+	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
+
+	if (dzp_error != 0 && !dzpcheck_privs)
+		return (dzp_error);
+
+	/*
+	 * Fourth row
+	 */
+
+	available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE;
+	available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC;
+
+	return (zfs_delete_final_check(zp, dzp, available_perms, cr));
+
+}
+
+int
+zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
+    znode_t *tzp, cred_t *cr)
+{
+	int add_perm;
+	int error;
+
+	if (szp->z_pflags & ZFS_AV_QUARANTINED)
+		return (SET_ERROR(EACCES));
+
+	add_perm = (ZTOV(szp)->v_type == VDIR) ?
+	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
+
+	/*
+	 * Rename permissions are combination of delete permission +
+	 * add file/subdir permission.
+	 *
+	 * BSD operating systems also require write permission
+	 * on the directory being moved from one parent directory
+	 * to another.
+	 */
+	if (ZTOV(szp)->v_type == VDIR && ZTOV(sdzp) != ZTOV(tdzp)) {
+		if ((error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr)))
+			return (error);
+	}
+
+	/*
+	 * first make sure we do the delete portion.
+	 *
+	 * If that succeeds then check for add_file/add_subdir permissions
+	 */
+
+	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
+		return (error);
+
+	/*
+	 * If we have a tzp, see if we can delete it?
+	 */
+	if (tzp && (error = zfs_zaccess_delete(tdzp, tzp, cr)))
+		return (error);
+
+	/*
+	 * Now check for add permissions
+	 */
+	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
+
+	return (error);
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_ctldir.c b/zfs/module/os/freebsd/zfs/zfs_ctldir.c
new file mode 100644
index 0000000..cfc4bab
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_ctldir.c

@@ -0,0 +1,1397 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
+ */
+
+/*
+ * ZFS control directory (a.k.a. ".zfs")
+ *
+ * This directory provides a common location for all ZFS meta-objects.
+ * Currently, this is only the 'snapshot' directory, but this may expand in the
+ * future.  The elements are built using the GFS primitives, as the hierarchy
+ * does not actually exist on disk.
+ *
+ * For 'snapshot', we don't want to have all snapshots always mounted, because
+ * this would take up a huge amount of space in /etc/mnttab.  We have three
+ * types of objects:
+ *
+ * 	ctldir ------> snapshotdir -------> snapshot
+ *                                             |
+ *                                             |
+ *                                             V
+ *                                         mounted fs
+ *
+ * The 'snapshot' node contains just enough information to lookup '..' and act
+ * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
+ * perform an automount of the underlying filesystem and return the
+ * corresponding vnode.
+ *
+ * All mounts are handled automatically by the kernel, but unmounts are
+ * (currently) handled from user land.  The main reason is that there is no
+ * reliable way to auto-unmount the filesystem when it's "no longer in use".
+ * When the user unmounts a filesystem, we call zfsctl_unmount(), which
+ * unmounts any snapshots within the snapshot directory.
+ *
+ * The '.zfs', '.zfs/snapshot', and all directories created under
+ * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
+ * share the same vfs_t as the head filesystem (what '.zfs' lives under).
+ *
+ * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
+ * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
+ * However, vnodes within these mounted on file systems have their v_vfsp
+ * fields set to the head filesystem to make NFS happy (see
+ * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
+ * so that it cannot be freed until all snapshots have been unmounted.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/libkern.h>
+#include <sys/dirent.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/namei.h>
+#include <sys/stat.h>
+#include <sys/dmu.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_deleg.h>
+#include <sys/mount.h>
+#include <sys/zap.h>
+#include <sys/sysproto.h>
+
+#include "zfs_namecheck.h"
+
+#include <sys/kernel.h>
+#include <sys/ccompat.h>
+
+/* Common access mode for all virtual directories under the ctldir */
+const uint16_t zfsctl_ctldir_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
+    S_IROTH | S_IXOTH;
+
+/*
+ * "Synthetic" filesystem implementation.
+ */
+
+/*
+ * Assert that A implies B.
+ */
+#define	KASSERT_IMPLY(A, B, msg)	KASSERT(!(A) || (B), (msg));
+
+static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
+
+typedef struct sfs_node {
+	char		sn_name[ZFS_MAX_DATASET_NAME_LEN];
+	uint64_t	sn_parent_id;
+	uint64_t	sn_id;
+} sfs_node_t;
+
+/*
+ * Check the parent's ID as well as the node's to account for a chance
+ * that IDs originating from different domains (snapshot IDs, artificial
+ * IDs, znode IDs) may clash.
+ */
+static int
+sfs_compare_ids(struct vnode *vp, void *arg)
+{
+	sfs_node_t *n1 = vp->v_data;
+	sfs_node_t *n2 = arg;
+	bool equal;
+
+	equal = n1->sn_id == n2->sn_id &&
+	    n1->sn_parent_id == n2->sn_parent_id;
+
+	/* Zero means equality. */
+	return (!equal);
+}
+
+static int
+sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
+    uint64_t id, struct vnode **vpp)
+{
+	sfs_node_t search;
+	int err;
+
+	search.sn_id = id;
+	search.sn_parent_id = parent_id;
+	err = vfs_hash_get(mp, (uint32_t)id, flags, curthread, vpp,
+	    sfs_compare_ids, &search);
+	return (err);
+}
+
+static int
+sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
+    uint64_t id, struct vnode **vpp)
+{
+	int err;
+
+	KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
+	err = vfs_hash_insert(vp, (uint32_t)id, flags, curthread, vpp,
+	    sfs_compare_ids, vp->v_data);
+	return (err);
+}
+
+static void
+sfs_vnode_remove(struct vnode *vp)
+{
+	vfs_hash_remove(vp);
+}
+
+typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg);
+
+static int
+sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
+    const char *tag, struct vop_vector *vops,
+    sfs_vnode_setup_fn setup, void *arg,
+    struct vnode **vpp)
+{
+	struct vnode *vp;
+	int error;
+
+	error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
+	if (error != 0 || *vpp != NULL) {
+		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
+		    "sfs vnode with no data");
+		return (error);
+	}
+
+	/* Allocate a new vnode/inode. */
+	error = getnewvnode(tag, mp, vops, &vp);
+	if (error != 0) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Exclusively lock the vnode vnode while it's being constructed.
+	 */
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
+	error = insmntque(vp, mp);
+	if (error != 0) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	setup(vp, arg);
+
+	error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
+	if (error != 0 || *vpp != NULL) {
+		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
+		    "sfs vnode with no data");
+		return (error);
+	}
+
+#if __FreeBSD_version >= 1400077
+	vn_set_state(vp, VSTATE_CONSTRUCTED);
+#endif
+
+	*vpp = vp;
+	return (0);
+}
+
+static void
+sfs_print_node(sfs_node_t *node)
+{
+	printf("\tname = %s\n", node->sn_name);
+	printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
+	printf("\tid = %ju\n", (uintmax_t)node->sn_id);
+}
+
+static sfs_node_t *
+sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
+{
+	struct sfs_node *node;
+
+	KASSERT(strlen(name) < sizeof (node->sn_name),
+	    ("sfs node name is too long"));
+	KASSERT(size >= sizeof (*node), ("sfs node size is too small"));
+	node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO);
+	strlcpy(node->sn_name, name, sizeof (node->sn_name));
+	node->sn_parent_id = parent_id;
+	node->sn_id = id;
+
+	return (node);
+}
+
+static void
+sfs_destroy_node(sfs_node_t *node)
+{
+	free(node, M_SFSNODES);
+}
+
+static void *
+sfs_reclaim_vnode(vnode_t *vp)
+{
+	void *data;
+
+	sfs_vnode_remove(vp);
+	data = vp->v_data;
+	vp->v_data = NULL;
+	return (data);
+}
+
+static int
+sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
+    zfs_uio_t *uio, off_t *offp)
+{
+	struct dirent entry;
+	int error;
+
+	/* Reset ncookies for subsequent use of vfs_read_dirent. */
+	if (ap->a_ncookies != NULL)
+		*ap->a_ncookies = 0;
+
+	if (zfs_uio_resid(uio) < sizeof (entry))
+		return (SET_ERROR(EINVAL));
+
+	if (zfs_uio_offset(uio) < 0)
+		return (SET_ERROR(EINVAL));
+	if (zfs_uio_offset(uio) == 0) {
+		entry.d_fileno = id;
+		entry.d_type = DT_DIR;
+		entry.d_name[0] = '.';
+		entry.d_name[1] = '\0';
+		entry.d_namlen = 1;
+		entry.d_reclen = sizeof (entry);
+		error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio));
+		if (error != 0)
+			return (SET_ERROR(error));
+	}
+
+	if (zfs_uio_offset(uio) < sizeof (entry))
+		return (SET_ERROR(EINVAL));
+	if (zfs_uio_offset(uio) == sizeof (entry)) {
+		entry.d_fileno = parent_id;
+		entry.d_type = DT_DIR;
+		entry.d_name[0] = '.';
+		entry.d_name[1] = '.';
+		entry.d_name[2] = '\0';
+		entry.d_namlen = 2;
+		entry.d_reclen = sizeof (entry);
+		error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio));
+		if (error != 0)
+			return (SET_ERROR(error));
+	}
+
+	if (offp != NULL)
+		*offp = 2 * sizeof (entry);
+	return (0);
+}
+
+
+/*
+ * .zfs inode namespace
+ *
+ * We need to generate unique inode numbers for all files and directories
+ * within the .zfs pseudo-filesystem.  We use the following scheme:
+ *
+ * 	ENTRY			ZFSCTL_INODE
+ * 	.zfs			1
+ * 	.zfs/snapshot		2
+ * 	.zfs/snapshot/<snap>	objectid(snap)
+ */
+#define	ZFSCTL_INO_SNAP(id)	(id)
+
+static struct vop_vector zfsctl_ops_root;
+static struct vop_vector zfsctl_ops_snapdir;
+static struct vop_vector zfsctl_ops_snapshot;
+
+void
+zfsctl_init(void)
+{
+}
+
+void
+zfsctl_fini(void)
+{
+}
+
+boolean_t
+zfsctl_is_node(vnode_t *vp)
+{
+	return (vn_matchops(vp, zfsctl_ops_root) ||
+	    vn_matchops(vp, zfsctl_ops_snapdir) ||
+	    vn_matchops(vp, zfsctl_ops_snapshot));
+
+}
+
+typedef struct zfsctl_root {
+	sfs_node_t	node;
+	sfs_node_t	*snapdir;
+	timestruc_t	cmtime;
+} zfsctl_root_t;
+
+
+/*
+ * Create the '.zfs' directory.
+ */
+void
+zfsctl_create(zfsvfs_t *zfsvfs)
+{
+	zfsctl_root_t *dot_zfs;
+	sfs_node_t *snapdir;
+	vnode_t *rvp;
+	uint64_t crtime[2];
+
+	ASSERT3P(zfsvfs->z_ctldir, ==, NULL);
+
+	snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT,
+	    ZFSCTL_INO_SNAPDIR);
+	dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof (*dot_zfs), ".zfs", 0,
+	    ZFSCTL_INO_ROOT);
+	dot_zfs->snapdir = snapdir;
+
+	VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp));
+	VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
+	    &crtime, sizeof (crtime)));
+	ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
+	vput(rvp);
+
+	zfsvfs->z_ctldir = dot_zfs;
+}
+
+/*
+ * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
+ * The nodes must not have any associated vnodes by now as they should be
+ * vflush-ed.
+ */
+void
+zfsctl_destroy(zfsvfs_t *zfsvfs)
+{
+	sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
+	sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
+	zfsvfs->z_ctldir = NULL;
+}
+
+static int
+zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags,
+    struct vnode **vpp)
+{
+	return (VFS_ROOT(mp, flags, vpp));
+}
+
+static void
+zfsctl_common_vnode_setup(vnode_t *vp, void *arg)
+{
+	ASSERT_VOP_ELOCKED(vp, __func__);
+
+	/* We support shared locking. */
+	VN_LOCK_ASHARE(vp);
+	vp->v_type = VDIR;
+	vp->v_data = arg;
+}
+
+static int
+zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags,
+    struct vnode **vpp)
+{
+	void *node;
+	int err;
+
+	node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir;
+	err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
+	    zfsctl_common_vnode_setup, node, vpp);
+	return (err);
+}
+
+static int
+zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags,
+    struct vnode **vpp)
+{
+	void *node;
+	int err;
+
+	node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir->snapdir;
+	err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
+	    &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
+	return (err);
+}
+
+/*
+ * Given a root znode, retrieve the associated .zfs directory.
+ * Add a hold to the vnode and return it.
+ */
+int
+zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp)
+{
+	int error;
+
+	error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
+	return (error);
+}
+
+/*
+ * Common open routine.  Disallow any write access.
+ */
+static int
+zfsctl_common_open(struct vop_open_args *ap)
+{
+	int flags = ap->a_mode;
+
+	if (flags & FWRITE)
+		return (SET_ERROR(EACCES));
+
+	return (0);
+}
+
+/*
+ * Common close routine.  Nothing to do here.
+ */
+/* ARGSUSED */
+static int
+zfsctl_common_close(struct vop_close_args *ap)
+{
+	return (0);
+}
+
+/*
+ * Common access routine.  Disallow writes.
+ */
+static int
+zfsctl_common_access(struct vop_access_args *ap)
+{
+	accmode_t accmode = ap->a_accmode;
+
+	if (accmode & VWRITE)
+		return (SET_ERROR(EACCES));
+	return (0);
+}
+
+/*
+ * Common getattr function.  Fill in basic information.
+ */
+static void
+zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
+{
+	timestruc_t	now;
+	sfs_node_t *node;
+
+	node = vp->v_data;
+
+	vap->va_uid = 0;
+	vap->va_gid = 0;
+	vap->va_rdev = 0;
+	/*
+	 * We are a purely virtual object, so we have no
+	 * blocksize or allocated blocks.
+	 */
+	vap->va_blksize = 0;
+	vap->va_nblocks = 0;
+	vap->va_gen = 0;
+	vn_fsid(vp, vap);
+	vap->va_mode = zfsctl_ctldir_mode;
+	vap->va_type = VDIR;
+	/*
+	 * We live in the now (for atime).
+	 */
+	gethrestime(&now);
+	vap->va_atime = now;
+	/* FreeBSD: Reset chflags(2) flags. */
+	vap->va_flags = 0;
+
+	vap->va_nodeid = node->sn_id;
+
+	/* At least '.' and '..'. */
+	vap->va_nlink = 2;
+}
+
+#ifndef _OPENSOLARIS_SYS_VNODE_H_
+struct vop_fid_args {
+	struct vnode *a_vp;
+	struct fid *a_fid;
+};
+#endif
+
+static int
+zfsctl_common_fid(struct vop_fid_args *ap)
+{
+	vnode_t		*vp = ap->a_vp;
+	fid_t		*fidp = (void *)ap->a_fid;
+	sfs_node_t	*node = vp->v_data;
+	uint64_t	object = node->sn_id;
+	zfid_short_t	*zfid;
+	int		i;
+
+	zfid = (zfid_short_t *)fidp;
+	zfid->zf_len = SHORT_FID_LEN;
+
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
+
+	/* .zfs nodes always have a generation number of 0 */
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = 0;
+
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_reclaim_args {
+	struct vnode *a_vp;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfsctl_common_reclaim(struct vop_reclaim_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+
+	(void) sfs_reclaim_vnode(vp);
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_print_args {
+	struct vnode *a_vp;
+};
+#endif
+
+static int
+zfsctl_common_print(struct vop_print_args *ap)
+{
+	sfs_print_node(ap->a_vp->v_data);
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_getattr_args {
+	struct vnode *a_vp;
+	struct vattr *a_vap;
+	struct ucred *a_cred;
+};
+#endif
+
+/*
+ * Get root directory attributes.
+ */
+static int
+zfsctl_root_getattr(struct vop_getattr_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+	zfsctl_root_t *node = vp->v_data;
+
+	zfsctl_common_getattr(vp, vap);
+	vap->va_ctime = node->cmtime;
+	vap->va_mtime = vap->va_ctime;
+	vap->va_birthtime = vap->va_ctime;
+	vap->va_nlink += 1; /* snapdir */
+	vap->va_size = vap->va_nlink;
+	return (0);
+}
+
+/*
+ * When we lookup "." we still can be asked to lock it
+ * differently, can't we?
+ */
+static int
+zfsctl_relock_dot(vnode_t *dvp, int ltype)
+{
+	vref(dvp);
+	if (ltype != VOP_ISLOCKED(dvp)) {
+		if (ltype == LK_EXCLUSIVE)
+			vn_lock(dvp, LK_UPGRADE | LK_RETRY);
+		else /* if (ltype == LK_SHARED) */
+			vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
+
+		/* Relock for the "." case may left us with reclaimed vnode. */
+		if (VN_IS_DOOMED(dvp)) {
+			vrele(dvp);
+			return (SET_ERROR(ENOENT));
+		}
+	}
+	return (0);
+}
+
+/*
+ * Special case the handling of "..".
+ */
+static int
+zfsctl_root_lookup(struct vop_lookup_args *ap)
+{
+	struct componentname *cnp = ap->a_cnp;
+	vnode_t *dvp = ap->a_dvp;
+	vnode_t **vpp = ap->a_vpp;
+	int flags = ap->a_cnp->cn_flags;
+	int lkflags = ap->a_cnp->cn_lkflags;
+	int nameiop = ap->a_cnp->cn_nameiop;
+	int err;
+
+	ASSERT3S(dvp->v_type, ==, VDIR);
+
+	if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
+		return (SET_ERROR(ENOTSUP));
+
+	if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
+		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
+		if (err == 0)
+			*vpp = dvp;
+	} else if ((flags & ISDOTDOT) != 0) {
+		err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
+		    lkflags, vpp);
+	} else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
+		err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
+	} else {
+		err = SET_ERROR(ENOENT);
+	}
+	if (err != 0)
+		*vpp = NULL;
+	return (err);
+}
+
+static int
+zfsctl_root_readdir(struct vop_readdir_args *ap)
+{
+	struct dirent entry;
+	vnode_t *vp = ap->a_vp;
+	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+	zfsctl_root_t *node = vp->v_data;
+	zfs_uio_t uio;
+	int *eofp = ap->a_eofflag;
+	off_t dots_offset;
+	int error;
+
+	zfs_uio_init(&uio, ap->a_uio);
+
+	ASSERT3S(vp->v_type, ==, VDIR);
+
+	/*
+	 * FIXME: this routine only ever emits 3 entries and does not tolerate
+	 * being called with a buffer too small to handle all of them.
+	 *
+	 * The check below facilitates the idiom of repeating calls until the
+	 * count to return is 0.
+	 */
+	if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) {
+		return (0);
+	}
+
+	error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio,
+	    &dots_offset);
+	if (error != 0) {
+		if (error == ENAMETOOLONG) /* ran out of destination space */
+			error = 0;
+		return (error);
+	}
+	if (zfs_uio_offset(&uio) != dots_offset)
+		return (SET_ERROR(EINVAL));
+
+	CTASSERT(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name));
+	entry.d_fileno = node->snapdir->sn_id;
+	entry.d_type = DT_DIR;
+	strcpy(entry.d_name, node->snapdir->sn_name);
+	entry.d_namlen = strlen(entry.d_name);
+	entry.d_reclen = sizeof (entry);
+	error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio));
+	if (error != 0) {
+		if (error == ENAMETOOLONG)
+			error = 0;
+		return (SET_ERROR(error));
+	}
+	if (eofp != NULL)
+		*eofp = 1;
+	return (0);
+}
+
+static int
+zfsctl_root_vptocnp(struct vop_vptocnp_args *ap)
+{
+	static const char dotzfs_name[4] = ".zfs";
+	vnode_t *dvp;
+	int error;
+
+	if (*ap->a_buflen < sizeof (dotzfs_name))
+		return (SET_ERROR(ENOMEM));
+
+	error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL,
+	    LK_SHARED, &dvp);
+	if (error != 0)
+		return (SET_ERROR(error));
+
+	VOP_UNLOCK1(dvp);
+	*ap->a_vpp = dvp;
+	*ap->a_buflen -= sizeof (dotzfs_name);
+	bcopy(dotzfs_name, ap->a_buf + *ap->a_buflen, sizeof (dotzfs_name));
+	return (0);
+}
+
+static int
+zfsctl_common_pathconf(struct vop_pathconf_args *ap)
+{
+	/*
+	 * We care about ACL variables so that user land utilities like ls
+	 * can display them correctly.  Since the ctldir's st_dev is set to be
+	 * the same as the parent dataset, we must support all variables that
+	 * it supports.
+	 */
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = MIN(LONG_MAX, ZFS_LINK_MAX);
+		return (0);
+
+	case _PC_FILESIZEBITS:
+		*ap->a_retval = 64;
+		return (0);
+
+	case _PC_MIN_HOLE_SIZE:
+		*ap->a_retval = (int)SPA_MINBLOCKSIZE;
+		return (0);
+
+	case _PC_ACL_EXTENDED:
+		*ap->a_retval = 0;
+		return (0);
+
+	case _PC_ACL_NFS4:
+		*ap->a_retval = 1;
+		return (0);
+
+	case _PC_ACL_PATH_MAX:
+		*ap->a_retval = ACL_MAX_ENTRIES;
+		return (0);
+
+	case _PC_NAME_MAX:
+		*ap->a_retval = NAME_MAX;
+		return (0);
+
+	default:
+		return (vop_stdpathconf(ap));
+	}
+}
+
+/*
+ * Returns a trivial ACL
+ */
+static int
+zfsctl_common_getacl(struct vop_getacl_args *ap)
+{
+	int i;
+
+	if (ap->a_type != ACL_TYPE_NFS4)
+		return (EINVAL);
+
+	acl_nfs4_sync_acl_from_mode(ap->a_aclp, zfsctl_ctldir_mode, 0);
+	/*
+	 * acl_nfs4_sync_acl_from_mode assumes that the owner can always modify
+	 * attributes.  That is not the case for the ctldir, so we must clear
+	 * those bits.  We also must clear ACL_READ_NAMED_ATTRS, because xattrs
+	 * aren't supported by the ctldir.
+	 */
+	for (i = 0; i < ap->a_aclp->acl_cnt; i++) {
+		struct acl_entry *entry;
+		entry = &(ap->a_aclp->acl_entry[i]);
+		entry->ae_perm &= ~(ACL_WRITE_ACL | ACL_WRITE_OWNER |
+		    ACL_WRITE_ATTRIBUTES | ACL_WRITE_NAMED_ATTRS |
+		    ACL_READ_NAMED_ATTRS);
+	}
+
+	return (0);
+}
+
+static struct vop_vector zfsctl_ops_root = {
+	.vop_default =	&default_vnodeops,
+#if __FreeBSD_version >= 1300121
+	.vop_fplookup_vexec = VOP_EAGAIN,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = VOP_EAGAIN,
+#endif
+	.vop_open =	zfsctl_common_open,
+	.vop_close =	zfsctl_common_close,
+	.vop_ioctl =	VOP_EINVAL,
+	.vop_getattr =	zfsctl_root_getattr,
+	.vop_access =	zfsctl_common_access,
+	.vop_readdir =	zfsctl_root_readdir,
+	.vop_lookup =	zfsctl_root_lookup,
+	.vop_inactive =	VOP_NULL,
+	.vop_reclaim =	zfsctl_common_reclaim,
+	.vop_fid =	zfsctl_common_fid,
+	.vop_print =	zfsctl_common_print,
+	.vop_vptocnp =	zfsctl_root_vptocnp,
+	.vop_pathconf =	zfsctl_common_pathconf,
+	.vop_getacl =	zfsctl_common_getacl,
+#if __FreeBSD_version >= 1400043
+	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
+#endif
+};
+VFS_VOP_VECTOR_REGISTER(zfsctl_ops_root);
+
+static int
+zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
+{
+	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
+
+	dmu_objset_name(os, zname);
+	if (strlen(zname) + 1 + strlen(name) >= len)
+		return (SET_ERROR(ENAMETOOLONG));
+	(void) strcat(zname, "@");
+	(void) strcat(zname, name);
+	return (0);
+}
+
+static int
+zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id)
+{
+	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
+	int err;
+
+	err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id);
+	return (err);
+}
+
+/*
+ * Given a vnode get a root vnode of a filesystem mounted on top of
+ * the vnode, if any.  The root vnode is referenced and locked.
+ * If no filesystem is mounted then the orinal vnode remains referenced
+ * and locked.  If any error happens the orinal vnode is unlocked and
+ * released.
+ */
+static int
+zfsctl_mounted_here(vnode_t **vpp, int flags)
+{
+	struct mount *mp;
+	int err;
+
+	ASSERT_VOP_LOCKED(*vpp, __func__);
+	ASSERT3S((*vpp)->v_type, ==, VDIR);
+
+	if ((mp = (*vpp)->v_mountedhere) != NULL) {
+		err = vfs_busy(mp, 0);
+		KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err));
+		KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint"));
+		vput(*vpp);
+		err = VFS_ROOT(mp, flags, vpp);
+		vfs_unbusy(mp);
+		return (err);
+	}
+	return (EJUSTRETURN);
+}
+
+typedef struct {
+	const char *snap_name;
+	uint64_t    snap_id;
+} snapshot_setup_arg_t;
+
+static void
+zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg)
+{
+	snapshot_setup_arg_t *ssa = arg;
+	sfs_node_t *node;
+
+	ASSERT_VOP_ELOCKED(vp, __func__);
+
+	node = sfs_alloc_node(sizeof (sfs_node_t),
+	    ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id);
+	zfsctl_common_vnode_setup(vp, node);
+
+	/* We have to support recursive locking. */
+	VN_LOCK_AREC(vp);
+}
+
+/*
+ * Lookup entry point for the 'snapshot' directory.  Try to open the
+ * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
+ * Perform a mount of the associated dataset on top of the vnode.
+ * There are four possibilities:
+ * - the snapshot node and vnode do not exist
+ * - the snapshot vnode is covered by the mounted snapshot
+ * - the snapshot vnode is not covered yet, the mount operation is in progress
+ * - the snapshot vnode is not covered, because the snapshot has been unmounted
+ * The last two states are transient and should be relatively short-lived.
+ */
+static int
+zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
+{
+	vnode_t *dvp = ap->a_dvp;
+	vnode_t **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	char name[NAME_MAX + 1];
+	char fullname[ZFS_MAX_DATASET_NAME_LEN];
+	char *mountpoint;
+	size_t mountpoint_len;
+	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
+	uint64_t snap_id;
+	int nameiop = cnp->cn_nameiop;
+	int lkflags = cnp->cn_lkflags;
+	int flags = cnp->cn_flags;
+	int err;
+
+	ASSERT3S(dvp->v_type, ==, VDIR);
+
+	if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
+		return (SET_ERROR(ENOTSUP));
+
+	if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
+		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
+		if (err == 0)
+			*vpp = dvp;
+		return (err);
+	}
+	if (flags & ISDOTDOT) {
+		err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags,
+		    vpp);
+		return (err);
+	}
+
+	if (cnp->cn_namelen >= sizeof (name))
+		return (SET_ERROR(ENAMETOOLONG));
+
+	strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
+	err = zfsctl_snapshot_lookup(dvp, name, &snap_id);
+	if (err != 0)
+		return (SET_ERROR(ENOENT));
+
+	for (;;) {
+		snapshot_setup_arg_t ssa;
+
+		ssa.snap_name = name;
+		ssa.snap_id = snap_id;
+		err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR,
+		    snap_id, "zfs", &zfsctl_ops_snapshot,
+		    zfsctl_snapshot_vnode_setup, &ssa, vpp);
+		if (err != 0)
+			return (err);
+
+		/* Check if a new vnode has just been created. */
+		if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)
+			break;
+
+		/*
+		 * Check if a snapshot is already mounted on top of the vnode.
+		 */
+		err = zfsctl_mounted_here(vpp, lkflags);
+		if (err != EJUSTRETURN)
+			return (err);
+
+		/*
+		 * If the vnode is not covered, then either the mount operation
+		 * is in progress or the snapshot has already been unmounted
+		 * but the vnode hasn't been inactivated and reclaimed yet.
+		 * We can try to re-use the vnode in the latter case.
+		 */
+		VI_LOCK(*vpp);
+		if (((*vpp)->v_iflag & VI_MOUNT) == 0) {
+			VI_UNLOCK(*vpp);
+			/*
+			 * Upgrade to exclusive lock in order to:
+			 * - avoid race conditions
+			 * - satisfy the contract of mount_snapshot()
+			 */
+			err = VOP_LOCK(*vpp, LK_TRYUPGRADE);
+			if (err == 0)
+				break;
+		} else {
+			VI_UNLOCK(*vpp);
+		}
+
+		/*
+		 * In this state we can loop on uncontested locks and starve
+		 * the thread doing the lengthy, non-trivial mount operation.
+		 * So, yield to prevent that from happening.
+		 */
+		vput(*vpp);
+		kern_yield(PRI_USER);
+	}
+
+	VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof (fullname), fullname));
+
+	mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
+	    strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1;
+	mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
+	(void) snprintf(mountpoint, mountpoint_len,
+	    "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
+	    dvp->v_vfsp->mnt_stat.f_mntonname, name);
+
+	err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
+	kmem_free(mountpoint, mountpoint_len);
+	if (err == 0) {
+		/*
+		 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
+		 *
+		 * This is where we lie about our v_vfsp in order to
+		 * make .zfs/snapshot/<snapname> accessible over NFS
+		 * without requiring manual mounts of <snapname>.
+		 */
+		ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs);
+		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
+
+		/* Clear the root flag (set via VFS_ROOT) as well. */
+		(*vpp)->v_vflag &= ~VV_ROOT;
+	}
+
+	if (err != 0)
+		*vpp = NULL;
+	return (err);
+}
+
+static int
+zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
+{
+	char snapname[ZFS_MAX_DATASET_NAME_LEN];
+	struct dirent entry;
+	vnode_t *vp = ap->a_vp;
+	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+	zfs_uio_t uio;
+	int *eofp = ap->a_eofflag;
+	off_t dots_offset;
+	int error;
+
+	zfs_uio_init(&uio, ap->a_uio);
+
+	ASSERT3S(vp->v_type, ==, VDIR);
+
+	error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap,
+	    &uio, &dots_offset);
+	if (error != 0) {
+		if (error == ENAMETOOLONG) /* ran out of destination space */
+			error = 0;
+		return (error);
+	}
+
+	ZFS_ENTER(zfsvfs);
+	for (;;) {
+		uint64_t cookie;
+		uint64_t id;
+
+		cookie = zfs_uio_offset(&uio) - dots_offset;
+
+		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
+		error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname),
+		    snapname, &id, &cookie, NULL);
+		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
+		if (error != 0) {
+			if (error == ENOENT) {
+				if (eofp != NULL)
+					*eofp = 1;
+				error = 0;
+			}
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+
+		entry.d_fileno = id;
+		entry.d_type = DT_DIR;
+		strcpy(entry.d_name, snapname);
+		entry.d_namlen = strlen(entry.d_name);
+		entry.d_reclen = sizeof (entry);
+		error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio));
+		if (error != 0) {
+			if (error == ENAMETOOLONG)
+				error = 0;
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(error));
+		}
+		zfs_uio_setoffset(&uio, cookie + dots_offset);
+	}
+	/* NOTREACHED */
+}
+
+static int
+zfsctl_snapdir_getattr(struct vop_getattr_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+	vattr_t *vap = ap->a_vap;
+	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+	dsl_dataset_t *ds;
+	uint64_t snap_count;
+	int err;
+
+	ZFS_ENTER(zfsvfs);
+	ds = dmu_objset_ds(zfsvfs->z_os);
+	zfsctl_common_getattr(vp, vap);
+	vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
+	vap->va_mtime = vap->va_ctime;
+	vap->va_birthtime = vap->va_ctime;
+	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
+		err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
+		    dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
+		if (err != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (err);
+		}
+		vap->va_nlink += snap_count;
+	}
+	vap->va_size = vap->va_nlink;
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+static struct vop_vector zfsctl_ops_snapdir = {
+	.vop_default =	&default_vnodeops,
+#if __FreeBSD_version >= 1300121
+	.vop_fplookup_vexec = VOP_EAGAIN,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = VOP_EAGAIN,
+#endif
+	.vop_open =	zfsctl_common_open,
+	.vop_close =	zfsctl_common_close,
+	.vop_getattr =	zfsctl_snapdir_getattr,
+	.vop_access =	zfsctl_common_access,
+	.vop_readdir =	zfsctl_snapdir_readdir,
+	.vop_lookup =	zfsctl_snapdir_lookup,
+	.vop_reclaim =	zfsctl_common_reclaim,
+	.vop_fid =	zfsctl_common_fid,
+	.vop_print =	zfsctl_common_print,
+	.vop_pathconf =	zfsctl_common_pathconf,
+	.vop_getacl =	zfsctl_common_getacl,
+#if __FreeBSD_version >= 1400043
+	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
+#endif
+};
+VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapdir);
+
+
+static int
+zfsctl_snapshot_inactive(struct vop_inactive_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+
+	vrecycle(vp);
+	return (0);
+}
+
+static int
+zfsctl_snapshot_reclaim(struct vop_reclaim_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+	void *data = vp->v_data;
+
+	sfs_reclaim_vnode(vp);
+	sfs_destroy_node(data);
+	return (0);
+}
+
+static int
+zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
+{
+	struct mount *mp;
+	vnode_t *dvp;
+	vnode_t *vp;
+	sfs_node_t *node;
+	size_t len;
+	int locked;
+	int error;
+
+	vp = ap->a_vp;
+	node = vp->v_data;
+	len = strlen(node->sn_name);
+	if (*ap->a_buflen < len)
+		return (SET_ERROR(ENOMEM));
+
+	/*
+	 * Prevent unmounting of the snapshot while the vnode lock
+	 * is not held.  That is not strictly required, but allows
+	 * us to assert that an uncovered snapshot vnode is never
+	 * "leaked".
+	 */
+	mp = vp->v_mountedhere;
+	if (mp == NULL)
+		return (SET_ERROR(ENOENT));
+	error = vfs_busy(mp, 0);
+	KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error));
+
+	/*
+	 * We can vput the vnode as we can now depend on the reference owned
+	 * by the busied mp.  But we also need to hold the vnode, because
+	 * the reference may go after vfs_unbusy() which has to be called
+	 * before we can lock the vnode again.
+	 */
+	locked = VOP_ISLOCKED(vp);
+#if __FreeBSD_version >= 1300045
+	enum vgetstate vs = vget_prep(vp);
+#else
+	vhold(vp);
+#endif
+	vput(vp);
+
+	/* Look up .zfs/snapshot, our parent. */
+	error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp);
+	if (error == 0) {
+		VOP_UNLOCK1(dvp);
+		*ap->a_vpp = dvp;
+		*ap->a_buflen -= len;
+		bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len);
+	}
+	vfs_unbusy(mp);
+#if __FreeBSD_version >= 1300045
+	vget_finish(vp, locked | LK_RETRY, vs);
+#else
+	vget(vp, locked | LK_VNHELD | LK_RETRY, curthread);
+#endif
+	return (error);
+}
+
+/*
+ * These VP's should never see the light of day.  They should always
+ * be covered.
+ */
+static struct vop_vector zfsctl_ops_snapshot = {
+	.vop_default =		NULL, /* ensure very restricted access */
+#if __FreeBSD_version >= 1300121
+	.vop_fplookup_vexec =	VOP_EAGAIN,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = VOP_EAGAIN,
+#endif
+	.vop_open =		zfsctl_common_open,
+	.vop_close =		zfsctl_common_close,
+	.vop_inactive =		zfsctl_snapshot_inactive,
+#if __FreeBSD_version >= 1300045
+	.vop_need_inactive = vop_stdneed_inactive,
+#endif
+	.vop_reclaim =		zfsctl_snapshot_reclaim,
+	.vop_vptocnp =		zfsctl_snapshot_vptocnp,
+	.vop_lock1 =		vop_stdlock,
+	.vop_unlock =		vop_stdunlock,
+	.vop_islocked =		vop_stdislocked,
+	.vop_advlockpurge =	vop_stdadvlockpurge, /* called by vgone */
+	.vop_print =		zfsctl_common_print,
+#if __FreeBSD_version >= 1400043
+	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
+#endif
+};
+VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapshot);
+
+int
+zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
+{
+	zfsvfs_t *zfsvfs __unused = vfsp->vfs_data;
+	vnode_t *vp;
+	int error;
+
+	ASSERT3P(zfsvfs->z_ctldir, !=, NULL);
+	*zfsvfsp = NULL;
+	error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
+	    ZFSCTL_INO_SNAPDIR, objsetid, &vp);
+	if (error == 0 && vp != NULL) {
+		/*
+		 * XXX Probably need to at least reference, if not busy, the mp.
+		 */
+		if (vp->v_mountedhere != NULL)
+			*zfsvfsp = vp->v_mountedhere->mnt_data;
+		vput(vp);
+	}
+	if (*zfsvfsp == NULL)
+		return (SET_ERROR(EINVAL));
+	return (0);
+}
+
+/*
+ * Unmount any snapshots for the given filesystem.  This is called from
+ * zfs_umount() - if we have a ctldir, then go through and unmount all the
+ * snapshots.
+ */
+int
+zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
+{
+	char snapname[ZFS_MAX_DATASET_NAME_LEN];
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+	struct mount *mp;
+	vnode_t *vp;
+	uint64_t cookie;
+	int error;
+
+	ASSERT3P(zfsvfs->z_ctldir, !=, NULL);
+
+	cookie = 0;
+	for (;;) {
+		uint64_t id;
+
+		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
+		error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname),
+		    snapname, &id, &cookie, NULL);
+		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
+		if (error != 0) {
+			if (error == ENOENT)
+				error = 0;
+			break;
+		}
+
+		for (;;) {
+			error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
+			    ZFSCTL_INO_SNAPDIR, id, &vp);
+			if (error != 0 || vp == NULL)
+				break;
+
+			mp = vp->v_mountedhere;
+
+			/*
+			 * v_mountedhere being NULL means that the
+			 * (uncovered) vnode is in a transient state
+			 * (mounting or unmounting), so loop until it
+			 * settles down.
+			 */
+			if (mp != NULL)
+				break;
+			vput(vp);
+		}
+		if (error != 0)
+			break;
+		if (vp == NULL)
+			continue;	/* no mountpoint, nothing to do */
+
+		/*
+		 * The mount-point vnode is kept locked to avoid spurious EBUSY
+		 * from a concurrent umount.
+		 * The vnode lock must have recursive locking enabled.
+		 */
+		vfs_ref(mp);
+		error = dounmount(mp, fflags, curthread);
+		KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1,
+		    ("extra references after unmount"));
+		vput(vp);
+		if (error != 0)
+			break;
+	}
+	KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0,
+	    ("force unmounting failed"));
+	return (error);
+}
+
+int
+zfsctl_snapshot_unmount(const char *snapname, int flags __unused)
+{
+	vfs_t *vfsp = NULL;
+	zfsvfs_t *zfsvfs = NULL;
+
+	if (strchr(snapname, '@') == NULL)
+		return (0);
+
+	int err = getzfsvfs(snapname, &zfsvfs);
+	if (err != 0) {
+		ASSERT3P(zfsvfs, ==, NULL);
+		return (0);
+	}
+	vfsp = zfsvfs->z_vfs;
+
+	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
+
+	vfs_ref(vfsp);
+	vfs_unbusy(vfsp);
+	return (dounmount(vfsp, MS_FORCE, curthread));
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_debug.c b/zfs/module/os/freebsd/zfs/zfs_debug.c
new file mode 100644
index 0000000..b75cf09
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_debug.c

@@ -0,0 +1,254 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/kstat.h>
+
+typedef struct zfs_dbgmsg {
+	list_node_t zdm_node;
+	time_t zdm_timestamp;
+	int zdm_size;
+	char zdm_msg[];
+} zfs_dbgmsg_t;
+
+list_t zfs_dbgmsgs;
+int zfs_dbgmsg_size = 0;
+kmutex_t zfs_dbgmsgs_lock;
+int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
+kstat_t *zfs_dbgmsg_kstat;
+
+/*
+ * Internal ZFS debug messages are enabled by default.
+ *
+ * # Print debug messages as they're logged
+ * dtrace -n 'zfs-dbgmsg { print(stringof(arg0)); }'
+ *
+ * # Print all logged dbgmsg entries
+ * sysctl kstat.zfs.misc.dbgmsg
+ *
+ * # Disable the kernel debug message log.
+ * sysctl vfs.zfs.dbgmsg_enable=0
+ */
+int zfs_dbgmsg_enable = 1;
+
+static int
+zfs_dbgmsg_headers(char *buf, size_t size)
+{
+	(void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message");
+
+	return (0);
+}
+
+static int
+zfs_dbgmsg_data(char *buf, size_t size, void *data)
+{
+	zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
+
+	(void) snprintf(buf, size, "%-12llu %-s\n",
+	    (u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
+
+	return (0);
+}
+
+static void *
+zfs_dbgmsg_addr(kstat_t *ksp, loff_t n)
+{
+	zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private;
+
+	ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
+
+	if (n == 0)
+		ksp->ks_private = list_head(&zfs_dbgmsgs);
+	else if (zdm)
+		ksp->ks_private = list_next(&zfs_dbgmsgs, zdm);
+
+	return (ksp->ks_private);
+}
+
+static void
+zfs_dbgmsg_purge(int max_size)
+{
+	zfs_dbgmsg_t *zdm;
+	int size;
+
+	ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
+
+	while (zfs_dbgmsg_size > max_size) {
+		zdm = list_remove_head(&zfs_dbgmsgs);
+		if (zdm == NULL)
+			return;
+
+		size = zdm->zdm_size;
+		kmem_free(zdm, size);
+		zfs_dbgmsg_size -= size;
+	}
+}
+
+static int
+zfs_dbgmsg_update(kstat_t *ksp, int rw)
+{
+	if (rw == KSTAT_WRITE)
+		zfs_dbgmsg_purge(0);
+
+	return (0);
+}
+
+void
+zfs_dbgmsg_init(void)
+{
+	list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
+	    offsetof(zfs_dbgmsg_t, zdm_node));
+	mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc",
+	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+	if (zfs_dbgmsg_kstat) {
+		zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock;
+		zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX;
+		zfs_dbgmsg_kstat->ks_private = NULL;
+		zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update;
+		kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers,
+		    zfs_dbgmsg_data, zfs_dbgmsg_addr);
+		kstat_install(zfs_dbgmsg_kstat);
+	}
+}
+
+void
+zfs_dbgmsg_fini(void)
+{
+	if (zfs_dbgmsg_kstat)
+		kstat_delete(zfs_dbgmsg_kstat);
+	/*
+	 * TODO - decide how to make this permanent
+	 */
+#ifdef _KERNEL
+	mutex_enter(&zfs_dbgmsgs_lock);
+	zfs_dbgmsg_purge(0);
+	mutex_exit(&zfs_dbgmsgs_lock);
+	mutex_destroy(&zfs_dbgmsgs_lock);
+#endif
+}
+
+void
+__zfs_dbgmsg(char *buf)
+{
+	zfs_dbgmsg_t *zdm;
+	int size;
+
+	DTRACE_PROBE1(zfs__dbgmsg, char *, buf);
+
+	size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1;
+	zdm = kmem_zalloc(size, KM_SLEEP);
+	zdm->zdm_size = size;
+	zdm->zdm_timestamp = gethrestime_sec();
+	strcpy(zdm->zdm_msg, buf);
+
+	mutex_enter(&zfs_dbgmsgs_lock);
+	list_insert_tail(&zfs_dbgmsgs, zdm);
+	zfs_dbgmsg_size += size;
+	zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
+	mutex_exit(&zfs_dbgmsgs_lock);
+}
+
+void
+__set_error(const char *file, const char *func, int line, int err)
+{
+	/*
+	 * To enable this:
+	 *
+	 * $ echo 512 >/sys/module/zfs/parameters/zfs_flags
+	 */
+	if (zfs_flags & ZFS_DEBUG_SET_ERROR)
+		__dprintf(B_FALSE, file, func, line, "error %lu", (ulong_t)err);
+}
+
+#ifdef _KERNEL
+void
+__dprintf(boolean_t dprint, const char *file, const char *func,
+    int line, const char *fmt, ...)
+{
+	const char *newfile;
+	va_list adx;
+	size_t size;
+	char *buf;
+	char *nl;
+	int i;
+
+	size = 1024;
+	buf = kmem_alloc(size, KM_SLEEP);
+
+	/*
+	 * Get rid of annoying prefix to filename.
+	 */
+	newfile = strrchr(file, '/');
+	if (newfile != NULL) {
+		newfile = newfile + 1; /* Get rid of leading / */
+	} else {
+		newfile = file;
+	}
+
+	i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
+
+	if (i < size) {
+		va_start(adx, fmt);
+		(void) vsnprintf(buf + i, size - i, fmt, adx);
+		va_end(adx);
+	}
+
+	/*
+	 * Get rid of trailing newline.
+	 */
+	nl = strrchr(buf, '\n');
+	if (nl != NULL)
+		*nl = '\0';
+
+	__zfs_dbgmsg(buf);
+
+	kmem_free(buf, size);
+}
+
+#else
+
+void
+zfs_dbgmsg_print(const char *tag)
+{
+	zfs_dbgmsg_t *zdm;
+
+	(void) printf("ZFS_DBGMSG(%s):\n", tag);
+	mutex_enter(&zfs_dbgmsgs_lock);
+	for (zdm = list_head(&zfs_dbgmsgs); zdm;
+	    zdm = list_next(&zfs_dbgmsgs, zdm))
+		(void) printf("%s\n", zdm->zdm_msg);
+	mutex_exit(&zfs_dbgmsgs_lock);
+}
+#endif /* _KERNEL */
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, dbgmsg_enable, INT, ZMOD_RW,
+    "Enable ZFS debug message log");
+
+ZFS_MODULE_PARAM(zfs, zfs_, dbgmsg_maxsize, INT, ZMOD_RW,
+    "Maximum ZFS debug log size");
+/* END CSTYLED */

diff --git a/zfs/module/os/freebsd/zfs/zfs_dir.c b/zfs/module/os/freebsd/zfs/zfs_dir.c
new file mode 100644
index 0000000..7fff329
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_dir.c

@@ -0,0 +1,963 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/resource.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/extdirent.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/uio.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/unistd.h>
+#include <sys/sunddi.h>
+#include <sys/random.h>
+#include <sys/policy.h>
+#include <sys/condvar.h>
+#include <sys/callb.h>
+#include <sys/smp.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/fs/zfs.h>
+#include <sys/zap.h>
+#include <sys/dmu.h>
+#include <sys/atomic.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+
+#include <sys/ccompat.h>
+
+/*
+ * zfs_match_find() is used by zfs_dirent_lookup() to perform zap lookups
+ * of names after deciding which is the appropriate lookup interface.
+ */
+static int
+zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
+    matchtype_t mt, uint64_t *zoid)
+{
+	int error;
+
+	if (zfsvfs->z_norm) {
+
+		/*
+		 * In the non-mixed case we only expect there would ever
+		 * be one match, but we need to use the normalizing lookup.
+		 */
+		error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
+		    zoid, mt, NULL, 0, NULL);
+	} else {
+		error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
+	}
+	*zoid = ZFS_DIRENT_OBJ(*zoid);
+
+	return (error);
+}
+
+/*
+ * Look up a directory entry under a locked vnode.
+ * dvp being locked gives us a guarantee that there are no concurrent
+ * modification of the directory and, thus, if a node can be found in
+ * the directory, then it must not be unlinked.
+ *
+ * Input arguments:
+ *	dzp	- znode for directory
+ *	name	- name of entry to lock
+ *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
+ *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
+ *		  ZXATTR: we want dzp's xattr directory
+ *
+ * Output arguments:
+ *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
+ *
+ * Return value: 0 on success or errno on failure.
+ *
+ * NOTE: Always checks for, and rejects, '.' and '..'.
+ */
+int
+zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag)
+{
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	znode_t		*zp;
+	matchtype_t	mt = 0;
+	uint64_t	zoid;
+	int		error = 0;
+
+	if (zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
+
+	*zpp = NULL;
+
+	/*
+	 * Verify that we are not trying to lock '.', '..', or '.zfs'
+	 */
+	if (name[0] == '.' &&
+	    (((name[1] == '\0') || (name[1] == '.' && name[2] == '\0')) ||
+	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)))
+		return (SET_ERROR(EEXIST));
+
+	/*
+	 * Case sensitivity and normalization preferences are set when
+	 * the file system is created.  These are stored in the
+	 * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
+	 * affect how we perform zap lookups.
+	 *
+	 * When matching we may need to normalize & change case according to
+	 * FS settings.
+	 *
+	 * Note that a normalized match is necessary for a case insensitive
+	 * filesystem when the lookup request is not exact because normalization
+	 * can fold case independent of normalizing code point sequences.
+	 *
+	 * See the table above zfs_dropname().
+	 */
+	if (zfsvfs->z_norm != 0) {
+		mt = MT_NORMALIZE;
+
+		/*
+		 * Determine if the match needs to honor the case specified in
+		 * lookup, and if so keep track of that so that during
+		 * normalization we don't fold case.
+		 */
+		if (zfsvfs->z_case == ZFS_CASE_MIXED) {
+			mt |= MT_MATCH_CASE;
+		}
+	}
+
+	/*
+	 * Only look in or update the DNLC if we are looking for the
+	 * name on a file system that does not require normalization
+	 * or case folding.  We can also look there if we happen to be
+	 * on a non-normalizing, mixed sensitivity file system IF we
+	 * are looking for the exact name.
+	 *
+	 * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
+	 * because in that case MT_EXACT and MT_FIRST should produce exactly
+	 * the same result.
+	 */
+
+	if (dzp->z_unlinked && !(flag & ZXATTR))
+		return (ENOENT);
+	if (flag & ZXATTR) {
+		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
+		    sizeof (zoid));
+		if (error == 0)
+			error = (zoid == 0 ? ENOENT : 0);
+	} else {
+		error = zfs_match_find(zfsvfs, dzp, name, mt, &zoid);
+	}
+	if (error) {
+		if (error != ENOENT || (flag & ZEXISTS)) {
+			return (error);
+		}
+	} else {
+		if (flag & ZNEW) {
+			return (SET_ERROR(EEXIST));
+		}
+		error = zfs_zget(zfsvfs, zoid, &zp);
+		if (error)
+			return (error);
+		ASSERT(!zp->z_unlinked);
+		*zpp = zp;
+	}
+
+	return (0);
+}
+
+static int
+zfs_dd_lookup(znode_t *dzp, znode_t **zpp)
+{
+	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+	znode_t *zp;
+	uint64_t parent;
+	int error;
+
+#ifdef ZFS_DEBUG
+	if (zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
+#endif
+	if (dzp->z_unlinked)
+		return (ENOENT);
+
+	if ((error = sa_lookup(dzp->z_sa_hdl,
+	    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
+		return (error);
+
+	error = zfs_zget(zfsvfs, parent, &zp);
+	if (error == 0)
+		*zpp = zp;
+	return (error);
+}
+
+int
+zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp)
+{
+	zfsvfs_t *zfsvfs __unused = dzp->z_zfsvfs;
+	znode_t *zp = NULL;
+	int error = 0;
+
+#ifdef ZFS_DEBUG
+	if (zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
+#endif
+	if (dzp->z_unlinked)
+		return (SET_ERROR(ENOENT));
+
+	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
+		*zpp = dzp;
+	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
+		error = zfs_dd_lookup(dzp, &zp);
+		if (error == 0)
+			*zpp = zp;
+	} else {
+		error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS);
+		if (error == 0) {
+			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
+			*zpp = zp;
+		}
+	}
+	return (error);
+}
+
+/*
+ * unlinked Set (formerly known as the "delete queue") Error Handling
+ *
+ * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
+ * don't specify the name of the entry that we will be manipulating.  We
+ * also fib and say that we won't be adding any new entries to the
+ * unlinked set, even though we might (this is to lower the minimum file
+ * size that can be deleted in a full filesystem).  So on the small
+ * chance that the nlink list is using a fat zap (ie. has more than
+ * 2000 entries), we *may* not pre-read a block that's needed.
+ * Therefore it is remotely possible for some of the assertions
+ * regarding the unlinked set below to fail due to i/o error.  On a
+ * nondebug system, this will result in the space being leaked.
+ */
+void
+zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	ASSERT(zp->z_unlinked);
+	ASSERT3U(zp->z_links, ==, 0);
+
+	VERIFY0(zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+	dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
+}
+
+/*
+ * Clean up any znodes that had no links when we either crashed or
+ * (force) umounted the file system.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+	zap_cursor_t	zc;
+	zap_attribute_t zap;
+	dmu_object_info_t doi;
+	znode_t		*zp;
+	dmu_tx_t	*tx;
+	int		error;
+
+	/*
+	 * Iterate over the contents of the unlinked set.
+	 */
+	for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
+	    zap_cursor_retrieve(&zc, &zap) == 0;
+	    zap_cursor_advance(&zc)) {
+
+		/*
+		 * See what kind of object we have in list
+		 */
+
+		error = dmu_object_info(zfsvfs->z_os,
+		    zap.za_first_integer, &doi);
+		if (error != 0)
+			continue;
+
+		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
+		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
+		/*
+		 * We need to re-mark these list entries for deletion,
+		 * so we pull them back into core and set zp->z_unlinked.
+		 */
+		error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
+
+		/*
+		 * We may pick up znodes that are already marked for deletion.
+		 * This could happen during the purge of an extended attribute
+		 * directory.  All we need to do is skip over them, since they
+		 * are already in the system marked z_unlinked.
+		 */
+		if (error != 0)
+			continue;
+
+		vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
+
+		/*
+		 * Due to changes in zfs_rmnode we need to make sure the
+		 * link count is set to zero here.
+		 */
+		if (zp->z_links != 0) {
+			tx = dmu_tx_create(zfsvfs->z_os);
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+			error = dmu_tx_assign(tx, TXG_WAIT);
+			if (error != 0) {
+				dmu_tx_abort(tx);
+				vput(ZTOV(zp));
+				continue;
+			}
+			zp->z_links = 0;
+			VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+			    &zp->z_links, sizeof (zp->z_links), tx));
+			dmu_tx_commit(tx);
+		}
+
+		zp->z_unlinked = B_TRUE;
+		vput(ZTOV(zp));
+	}
+	zap_cursor_fini(&zc);
+}
+
+/*
+ * Delete the entire contents of a directory.  Return a count
+ * of the number of entries that could not be deleted. If we encounter
+ * an error, return a count of at least one so that the directory stays
+ * in the unlinked set.
+ *
+ * NOTE: this function assumes that the directory is inactive,
+ *	so there is no need to lock its entries before deletion.
+ *	Also, it assumes the directory contents is *only* regular
+ *	files.
+ */
+static int
+zfs_purgedir(znode_t *dzp)
+{
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	znode_t		*xzp;
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	int skipped = 0;
+	int error;
+
+	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
+	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
+	    zap_cursor_advance(&zc)) {
+		error = zfs_zget(zfsvfs,
+		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
+		if (error) {
+			skipped += 1;
+			continue;
+		}
+
+		vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
+		ASSERT((ZTOV(xzp)->v_type == VREG) ||
+		    (ZTOV(xzp)->v_type == VLNK));
+
+		tx = dmu_tx_create(zfsvfs->z_os);
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+		/* Is this really needed ? */
+		zfs_sa_upgrade_txholds(tx, xzp);
+		dmu_tx_mark_netfree(tx);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			vput(ZTOV(xzp));
+			skipped += 1;
+			continue;
+		}
+
+		error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL);
+		if (error)
+			skipped += 1;
+		dmu_tx_commit(tx);
+
+		vput(ZTOV(xzp));
+	}
+	zap_cursor_fini(&zc);
+	if (error != ENOENT)
+		skipped += 1;
+	return (skipped);
+}
+
+extern taskq_t *zfsvfs_taskq;
+
+void
+zfs_rmnode(znode_t *zp)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	objset_t	*os = zfsvfs->z_os;
+	dmu_tx_t	*tx;
+	uint64_t	acl_obj;
+	uint64_t	xattr_obj;
+	uint64_t	count;
+	int		error;
+
+	ASSERT3U(zp->z_links, ==, 0);
+	if (zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+
+	/*
+	 * If this is an attribute directory, purge its contents.
+	 */
+	if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
+	    (zp->z_pflags & ZFS_XATTR)) {
+		if (zfs_purgedir(zp) != 0) {
+			/*
+			 * Not enough space to delete some xattrs.
+			 * Leave it in the unlinked set.
+			 */
+			zfs_znode_dmu_fini(zp);
+			zfs_znode_free(zp);
+			return;
+		}
+	} else {
+		/*
+		 * Free up all the data in the file.  We don't do this for
+		 * XATTR directories because we need truncate and remove to be
+		 * in the same tx, like in zfs_znode_delete(). Otherwise, if
+		 * we crash here we'll end up with an inconsistent truncated
+		 * zap object in the delete queue.  Note a truncated file is
+		 * harmless since it only contains user data.
+		 */
+		error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
+		if (error) {
+			/*
+			 * Not enough space or we were interrupted by unmount.
+			 * Leave the file in the unlinked set.
+			 */
+			zfs_znode_dmu_fini(zp);
+			zfs_znode_free(zp);
+			return;
+		}
+	}
+
+	/*
+	 * If the file has extended attributes, we're going to unlink
+	 * the xattr dir.
+	 */
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error)
+		xattr_obj = 0;
+
+	acl_obj = zfs_external_acl(zp);
+
+	/*
+	 * Set up the final transaction.
+	 */
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	if (xattr_obj)
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
+	if (acl_obj)
+		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		/*
+		 * Not enough space to delete the file.  Leave it in the
+		 * unlinked set, leaking it until the fs is remounted (at
+		 * which point we'll call zfs_unlinked_drain() to process it).
+		 */
+		dmu_tx_abort(tx);
+		zfs_znode_dmu_fini(zp);
+		zfs_znode_free(zp);
+		return;
+	}
+
+	/*
+	 * FreeBSD's implementation of zfs_zget requires a vnode to back it.
+	 * This means that we could end up calling into getnewvnode while
+	 * calling zfs_rmnode as a result of a prior call to getnewvnode
+	 * trying to clear vnodes out of the cache. If this repeats we can
+	 * recurse enough that we overflow our stack. To avoid this, we
+	 * avoid calling zfs_zget on the xattr znode and instead simply add
+	 * it to the unlinked set and schedule a call to zfs_unlinked_drain.
+	 */
+	if (xattr_obj) {
+		/* Add extended attribute directory to the unlinked set. */
+		VERIFY3U(0, ==,
+		    zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx));
+	}
+
+	mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
+	/* Remove this znode from the unlinked set */
+	VERIFY3U(0, ==,
+	    zap_remove_int(os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+	if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
+		cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
+	}
+
+	mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
+	dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
+
+	zfs_znode_delete(zp, tx);
+
+	dmu_tx_commit(tx);
+
+	if (xattr_obj) {
+		/*
+		 * We're using the FreeBSD taskqueue API here instead of
+		 * the Solaris taskq API since the FreeBSD API allows for a
+		 * task to be enqueued multiple times but executed once.
+		 */
+		taskqueue_enqueue(zfsvfs_taskq->tq_queue,
+		    &zfsvfs->z_unlinked_drain_task);
+	}
+}
+
+static uint64_t
+zfs_dirent(znode_t *zp, uint64_t mode)
+{
+	uint64_t de = zp->z_id;
+
+	if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
+		de |= IFTODT(mode) << 60;
+	return (de);
+}
+
+/*
+ * Link zp into dzp.  Can only fail if zp has been unlinked.
+ */
+int
+zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
+    int flag)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	vnode_t *vp = ZTOV(zp);
+	uint64_t value;
+	int zp_is_dir = (vp->v_type == VDIR);
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	int count = 0;
+	int error;
+
+	if (zfsvfs->z_replay == B_FALSE) {
+		ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+	}
+	if (zp_is_dir) {
+		if (dzp->z_links >= ZFS_LINK_MAX)
+			return (SET_ERROR(EMLINK));
+	}
+	if (!(flag & ZRENAMING)) {
+		if (zp->z_unlinked) {	/* no new links to unlinked zp */
+			ASSERT(!(flag & (ZNEW | ZEXISTS)));
+			return (SET_ERROR(ENOENT));
+		}
+		if (zp->z_links >= ZFS_LINK_MAX - zp_is_dir) {
+			return (SET_ERROR(EMLINK));
+		}
+		zp->z_links++;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+		    &zp->z_links, sizeof (zp->z_links));
+
+	} else {
+		ASSERT(!zp->z_unlinked);
+	}
+	value = zfs_dirent(zp, zp->z_mode);
+	error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name,
+	    8, 1, &value, tx);
+
+	/*
+	 * zap_add could fail to add the entry if it exceeds the capacity of the
+	 * leaf-block and zap_leaf_split() failed to help.
+	 * The caller of this routine is responsible for failing the transaction
+	 * which will rollback the SA updates done above.
+	 */
+	if (error != 0) {
+		if (!(flag & ZRENAMING) && !(flag & ZNEW))
+			zp->z_links--;
+		return (error);
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
+	    &dzp->z_id, sizeof (dzp->z_id));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+
+	if (!(flag & ZNEW)) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+		    ctime);
+	}
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT0(error);
+
+	dzp->z_size++;
+	dzp->z_links += zp_is_dir;
+	count = 0;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &dzp->z_links, sizeof (dzp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT0(error);
+	return (0);
+}
+
+/*
+ * The match type in the code for this function should conform to:
+ *
+ * ------------------------------------------------------------------------
+ * fs type  | z_norm      | lookup type | match type
+ * ---------|-------------|-------------|----------------------------------
+ * CS !norm | 0           |           0 | 0 (exact)
+ * CS  norm | formX       |           0 | MT_NORMALIZE
+ * CI !norm | upper       |   !ZCIEXACT | MT_NORMALIZE
+ * CI !norm | upper       |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CI  norm | upper|formX |   !ZCIEXACT | MT_NORMALIZE
+ * CI  norm | upper|formX |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper       |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper       |     ZCILOOK | MT_NORMALIZE
+ * CM  norm | upper|formX |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM  norm | upper|formX |     ZCILOOK | MT_NORMALIZE
+ *
+ * Abbreviations:
+ *    CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
+ *    upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
+ *    formX = unicode normalization form set on fs creation
+ */
+static int
+zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
+    int flag)
+{
+	int error;
+
+	if (zp->z_zfsvfs->z_norm) {
+		matchtype_t mt = MT_NORMALIZE;
+
+		if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) {
+			mt |= MT_MATCH_CASE;
+		}
+
+		error = zap_remove_norm(zp->z_zfsvfs->z_os, dzp->z_id,
+		    name, mt, tx);
+	} else {
+		error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, name, tx);
+	}
+
+	return (error);
+}
+
+/*
+ * Unlink zp from dzp, and mark zp for deletion if this was the last link.
+ * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
+ * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
+ * If it's non-NULL, we use it to indicate whether the znode needs deletion,
+ * and it's the caller's job to do it.
+ */
+int
+zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
+    int flag, boolean_t *unlinkedp)
+{
+	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+	vnode_t *vp = ZTOV(zp);
+	int zp_is_dir = (vp->v_type == VDIR);
+	boolean_t unlinked = B_FALSE;
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	int count = 0;
+	int error;
+
+	if (zfsvfs->z_replay == B_FALSE) {
+		ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+	}
+	if (!(flag & ZRENAMING)) {
+
+		if (zp_is_dir && !zfs_dirempty(zp))
+			return (SET_ERROR(ENOTEMPTY));
+
+		/*
+		 * If we get here, we are going to try to remove the object.
+		 * First try removing the name from the directory; if that
+		 * fails, return the error.
+		 */
+		error = zfs_dropname(dzp, name, zp, tx, flag);
+		if (error != 0) {
+			return (error);
+		}
+
+		if (zp->z_links <= zp_is_dir) {
+			zfs_panic_recover("zfs: link count on vnode %p is %u, "
+			    "should be at least %u", zp->z_vnode,
+			    (int)zp->z_links,
+			    zp_is_dir + 1);
+			zp->z_links = zp_is_dir + 1;
+		}
+		if (--zp->z_links == zp_is_dir) {
+			zp->z_unlinked = B_TRUE;
+			zp->z_links = 0;
+			unlinked = B_TRUE;
+		} else {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+			    NULL, &ctime, sizeof (ctime));
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+			    ctime);
+		}
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+		    NULL, &zp->z_links, sizeof (zp->z_links));
+		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		count = 0;
+		ASSERT0(error);
+	} else {
+		ASSERT(!zp->z_unlinked);
+		error = zfs_dropname(dzp, name, zp, tx, flag);
+		if (error != 0)
+			return (error);
+	}
+
+	dzp->z_size--;		/* one dirent removed */
+	dzp->z_links -= zp_is_dir;	/* ".." link from zp */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+	    NULL, &dzp->z_links, sizeof (dzp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+	    NULL, ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
+	    NULL, mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT0(error);
+
+	if (unlinkedp != NULL)
+		*unlinkedp = unlinked;
+	else if (unlinked)
+		zfs_unlinked_add(zp, tx);
+
+	return (0);
+}
+
+/*
+ * Indicate whether the directory is empty.
+ */
+boolean_t
+zfs_dirempty(znode_t *dzp)
+{
+	return (dzp->z_size == 2);
+}
+
+int
+zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	znode_t *xzp;
+	dmu_tx_t *tx;
+	int error;
+	zfs_acl_ids_t acl_ids;
+	boolean_t fuid_dirtied;
+	uint64_t parent __maybe_unused;
+
+	*xvpp = NULL;
+
+	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
+	    &acl_ids)) != 0)
+		return (error);
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 0)) {
+		zfs_acl_ids_free(&acl_ids);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	getnewvnode_reserve_();
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		getnewvnode_drop_reserve();
+		return (error);
+	}
+	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	ASSERT0(sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), &parent,
+	    sizeof (parent)));
+	ASSERT3U(parent, ==, zp->z_id);
+
+	VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
+	    sizeof (xzp->z_id), tx));
+
+	zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL,
+	    acl_ids.z_fuidp, vap);
+
+	zfs_acl_ids_free(&acl_ids);
+	dmu_tx_commit(tx);
+
+	getnewvnode_drop_reserve();
+
+	*xvpp = xzp;
+
+	return (0);
+}
+
+/*
+ * Return a znode for the extended attribute directory for zp.
+ * ** If the directory does not already exist, it is created **
+ *
+ *	IN:	zp	- znode to obtain attribute directory from
+ *		cr	- credentials of caller
+ *		flags	- flags from the VOP_LOOKUP call
+ *
+ *	OUT:	xzpp	- pointer to extended attribute znode
+ *
+ *	RETURN:	0 on success
+ *		error number on failure
+ */
+int
+zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	znode_t		*xzp;
+	vattr_t		va;
+	int		error;
+top:
+	error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR);
+	if (error)
+		return (error);
+
+	if (xzp != NULL) {
+		*xzpp = xzp;
+		return (0);
+	}
+
+
+	if (!(flags & CREATE_XATTR_DIR))
+		return (SET_ERROR(ENOATTR));
+
+	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * The ability to 'create' files in an attribute
+	 * directory comes from the write_xattr permission on the base file.
+	 *
+	 * The ability to 'search' an attribute directory requires
+	 * read_xattr permission on the base file.
+	 *
+	 * Once in a directory the ability to read/write attributes
+	 * is controlled by the permissions on the attribute file.
+	 */
+	va.va_mask = AT_MODE | AT_UID | AT_GID;
+	va.va_type = VDIR;
+	va.va_mode = S_IFDIR | S_ISVTX | 0777;
+	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
+
+	error = zfs_make_xattrdir(zp, &va, xzpp, cr);
+
+	if (error == ERESTART) {
+		/* NB: we already did dmu_tx_wait() if necessary */
+		goto top;
+	}
+	if (error == 0)
+		VOP_UNLOCK1(ZTOV(*xzpp));
+
+	return (error);
+}
+
+/*
+ * Decide whether it is okay to remove within a sticky directory.
+ *
+ * In sticky directories, write access is not sufficient;
+ * you can remove entries from a directory only if:
+ *
+ *	you own the directory,
+ *	you own the entry,
+ *	the entry is a plain file and you have write access,
+ *	or you are privileged (checked in secpolicy...).
+ *
+ * The function returns 0 if remove access is granted.
+ */
+int
+zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
+{
+	uid_t  		uid;
+	uid_t		downer;
+	uid_t		fowner;
+	zfsvfs_t	*zfsvfs = zdp->z_zfsvfs;
+
+	if (zdp->z_zfsvfs->z_replay)
+		return (0);
+
+	if ((zdp->z_mode & S_ISVTX) == 0)
+		return (0);
+
+	downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
+	fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+
+	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
+	    (ZTOV(zp)->v_type == VREG &&
+	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
+		return (0);
+	else
+		return (secpolicy_vnode_remove(ZTOV(zp), cr));
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_file_os.c b/zfs/module/os/freebsd/zfs/zfs_file_os.c
new file mode 100644
index 0000000..60c9ff0
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_file_os.c

@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/dmu.h>
+#include <sys/dmu_impl.h>
+#include <sys/dmu_recv.h>
+#include <sys/dmu_tx.h>
+#include <sys/dbuf.h>
+#include <sys/dnode.h>
+#include <sys/zfs_context.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_synctask.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zap.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_file.h>
+#include <sys/buf.h>
+#include <sys/stat.h>
+
+int
+zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
+{
+	struct thread *td;
+	int rc, fd;
+
+	td = curthread;
+	pwd_ensure_dirs();
+	/* 12.x doesn't take a const char * */
+	rc = kern_openat(td, AT_FDCWD, __DECONST(char *, path),
+	    UIO_SYSSPACE, flags, mode);
+	if (rc)
+		return (SET_ERROR(rc));
+	fd = td->td_retval[0];
+	td->td_retval[0] = 0;
+	if (fget(curthread, fd, &cap_no_rights, fpp))
+		kern_close(td, fd);
+	return (0);
+}
+
+void
+zfs_file_close(zfs_file_t *fp)
+{
+	fo_close(fp, curthread);
+}
+
+static int
+zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *offp,
+    ssize_t *resid)
+{
+	ssize_t rc;
+	struct uio auio;
+	struct thread *td;
+	struct iovec aiov;
+
+	td = curthread;
+	aiov.iov_base = (void *)(uintptr_t)buf;
+	aiov.iov_len = count;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_resid = count;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_td = td;
+	auio.uio_offset = *offp;
+
+	if ((fp->f_flag & FWRITE) == 0)
+		return (SET_ERROR(EBADF));
+
+	if (fp->f_type == DTYPE_VNODE)
+		bwillwrite();
+
+	rc = fo_write(fp, &auio, td->td_ucred, FOF_OFFSET, td);
+	if (rc)
+		return (SET_ERROR(rc));
+	if (resid)
+		*resid = auio.uio_resid;
+	else if (auio.uio_resid)
+		return (SET_ERROR(EIO));
+	*offp += count - auio.uio_resid;
+	return (rc);
+}
+
+int
+zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_offset;
+	ssize_t rc;
+
+	rc = zfs_file_write_impl(fp, buf, count, &off, resid);
+	if (rc == 0)
+		fp->f_offset = off;
+
+	return (SET_ERROR(rc));
+}
+
+int
+zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	return (zfs_file_write_impl(fp, buf, count, &off, resid));
+}
+
+static int
+zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *offp,
+    ssize_t *resid)
+{
+	ssize_t rc;
+	struct uio auio;
+	struct thread *td;
+	struct iovec aiov;
+
+	td = curthread;
+	aiov.iov_base = (void *)(uintptr_t)buf;
+	aiov.iov_len = count;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_resid = count;
+	auio.uio_rw = UIO_READ;
+	auio.uio_td = td;
+	auio.uio_offset = *offp;
+
+	if ((fp->f_flag & FREAD) == 0)
+		return (SET_ERROR(EBADF));
+
+	rc = fo_read(fp, &auio, td->td_ucred, FOF_OFFSET, td);
+	if (rc)
+		return (SET_ERROR(rc));
+	if (resid)
+		*resid = auio.uio_resid;
+	*offp += count - auio.uio_resid;
+	return (SET_ERROR(0));
+}
+
+int
+zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_offset;
+	ssize_t rc;
+
+	rc = zfs_file_read_impl(fp, buf, count, &off, resid);
+	if (rc == 0)
+		fp->f_offset = off;
+	return (rc);
+}
+
+int
+zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	return (zfs_file_read_impl(fp, buf, count, &off, resid));
+}
+
+int
+zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
+{
+	int rc;
+	struct thread *td;
+
+	td = curthread;
+	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
+		return (SET_ERROR(ESPIPE));
+	rc = fo_seek(fp, *offp, whence, td);
+	if (rc == 0)
+		*offp = td->td_uretoff.tdu_off;
+	return (SET_ERROR(rc));
+}
+
+int
+zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
+{
+	struct thread *td;
+	struct stat sb;
+	int rc;
+
+	td = curthread;
+
+#if __FreeBSD_version < 1400037
+	rc = fo_stat(fp, &sb, td->td_ucred, td);
+#else
+	rc = fo_stat(fp, &sb, td->td_ucred);
+#endif
+	if (rc)
+		return (SET_ERROR(rc));
+	zfattr->zfa_size = sb.st_size;
+	zfattr->zfa_mode = sb.st_mode;
+
+	return (0);
+}
+
+static __inline int
+zfs_vop_fsync(vnode_t *vp)
+{
+	struct mount *mp;
+	int error;
+
+#if __FreeBSD_version < 1400068
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+#else
+	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
+#endif
+		goto drop;
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	error = VOP_FSYNC(vp, MNT_WAIT, curthread);
+	VOP_UNLOCK1(vp);
+	vn_finished_write(mp);
+drop:
+	return (SET_ERROR(error));
+}
+
+int
+zfs_file_fsync(zfs_file_t *fp, int flags)
+{
+	if (fp->f_type != DTYPE_VNODE)
+		return (EINVAL);
+
+	return (zfs_vop_fsync(fp->f_vnode));
+}
+
+zfs_file_t *
+zfs_file_get(int fd)
+{
+	struct file *fp;
+
+	if (fget(curthread, fd, &cap_no_rights, &fp))
+		return (NULL);
+
+	return (fp);
+}
+
+void
+zfs_file_put(zfs_file_t *fp)
+{
+	fdrop(fp, curthread);
+}
+
+loff_t
+zfs_file_off(zfs_file_t *fp)
+{
+	return (fp->f_offset);
+}
+
+void *
+zfs_file_private(zfs_file_t *fp)
+{
+	file_t *tmpfp;
+	void *data;
+	int error;
+
+	tmpfp = curthread->td_fpop;
+	curthread->td_fpop = fp;
+	error = devfs_get_cdevpriv(&data);
+	curthread->td_fpop = tmpfp;
+	if (error != 0)
+		return (NULL);
+	return (data);
+}
+
+int
+zfs_file_unlink(const char *fnamep)
+{
+	zfs_uio_seg_t seg = UIO_SYSSPACE;
+	int rc;
+
+#if __FreeBSD_version >= 1300018
+	rc = kern_funlinkat(curthread, AT_FDCWD, fnamep, FD_NONE, seg, 0, 0);
+#elif __FreeBSD_version >= 1202504 || defined(AT_BENEATH)
+	rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep),
+	    seg, 0, 0);
+#else
+	rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep),
+	    seg, 0);
+#endif
+	return (SET_ERROR(rc));
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_ioctl_compat.c b/zfs/module/os/freebsd/zfs/zfs_ioctl_compat.c
new file mode 100644
index 0000000..43156f2
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_ioctl_compat.c

@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/zfs_ioctl_compat.h>
+
+enum zfs_ioc_legacy {
+	ZFS_IOC_LEGACY_NONE =	-1,
+	ZFS_IOC_LEGACY_FIRST =	0,
+	ZFS_LEGACY_IOC = ZFS_IOC_LEGACY_FIRST,
+	ZFS_IOC_LEGACY_POOL_CREATE = ZFS_IOC_LEGACY_FIRST,
+	ZFS_IOC_LEGACY_POOL_DESTROY,
+	ZFS_IOC_LEGACY_POOL_IMPORT,
+	ZFS_IOC_LEGACY_POOL_EXPORT,
+	ZFS_IOC_LEGACY_POOL_CONFIGS,
+	ZFS_IOC_LEGACY_POOL_STATS,
+	ZFS_IOC_LEGACY_POOL_TRYIMPORT,
+	ZFS_IOC_LEGACY_POOL_SCAN,
+	ZFS_IOC_LEGACY_POOL_FREEZE,
+	ZFS_IOC_LEGACY_POOL_UPGRADE,
+	ZFS_IOC_LEGACY_POOL_GET_HISTORY,
+	ZFS_IOC_LEGACY_VDEV_ADD,
+	ZFS_IOC_LEGACY_VDEV_REMOVE,
+	ZFS_IOC_LEGACY_VDEV_SET_STATE,
+	ZFS_IOC_LEGACY_VDEV_ATTACH,
+	ZFS_IOC_LEGACY_VDEV_DETACH,
+	ZFS_IOC_LEGACY_VDEV_SETPATH,
+	ZFS_IOC_LEGACY_VDEV_SETFRU,
+	ZFS_IOC_LEGACY_OBJSET_STATS,
+	ZFS_IOC_LEGACY_OBJSET_ZPLPROPS,
+	ZFS_IOC_LEGACY_DATASET_LIST_NEXT,
+	ZFS_IOC_LEGACY_SNAPSHOT_LIST_NEXT,
+	ZFS_IOC_LEGACY_SET_PROP,
+	ZFS_IOC_LEGACY_CREATE,
+	ZFS_IOC_LEGACY_DESTROY,
+	ZFS_IOC_LEGACY_ROLLBACK,
+	ZFS_IOC_LEGACY_RENAME,
+	ZFS_IOC_LEGACY_RECV,
+	ZFS_IOC_LEGACY_SEND,
+	ZFS_IOC_LEGACY_INJECT_FAULT,
+	ZFS_IOC_LEGACY_CLEAR_FAULT,
+	ZFS_IOC_LEGACY_INJECT_LIST_NEXT,
+	ZFS_IOC_LEGACY_ERROR_LOG,
+	ZFS_IOC_LEGACY_CLEAR,
+	ZFS_IOC_LEGACY_PROMOTE,
+	ZFS_IOC_LEGACY_DESTROY_SNAPS,
+	ZFS_IOC_LEGACY_SNAPSHOT,
+	ZFS_IOC_LEGACY_DSOBJ_TO_DSNAME,
+	ZFS_IOC_LEGACY_OBJ_TO_PATH,
+	ZFS_IOC_LEGACY_POOL_SET_PROPS,
+	ZFS_IOC_LEGACY_POOL_GET_PROPS,
+	ZFS_IOC_LEGACY_SET_FSACL,
+	ZFS_IOC_LEGACY_GET_FSACL,
+	ZFS_IOC_LEGACY_SHARE,
+	ZFS_IOC_LEGACY_INHERIT_PROP,
+	ZFS_IOC_LEGACY_SMB_ACL,
+	ZFS_IOC_LEGACY_USERSPACE_ONE,
+	ZFS_IOC_LEGACY_USERSPACE_MANY,
+	ZFS_IOC_LEGACY_USERSPACE_UPGRADE,
+	ZFS_IOC_LEGACY_HOLD,
+	ZFS_IOC_LEGACY_RELEASE,
+	ZFS_IOC_LEGACY_GET_HOLDS,
+	ZFS_IOC_LEGACY_OBJSET_RECVD_PROPS,
+	ZFS_IOC_LEGACY_VDEV_SPLIT,
+	ZFS_IOC_LEGACY_NEXT_OBJ,
+	ZFS_IOC_LEGACY_DIFF,
+	ZFS_IOC_LEGACY_TMP_SNAPSHOT,
+	ZFS_IOC_LEGACY_OBJ_TO_STATS,
+	ZFS_IOC_LEGACY_JAIL,
+	ZFS_IOC_LEGACY_UNJAIL,
+	ZFS_IOC_LEGACY_POOL_REGUID,
+	ZFS_IOC_LEGACY_SPACE_WRITTEN,
+	ZFS_IOC_LEGACY_SPACE_SNAPS,
+	ZFS_IOC_LEGACY_SEND_PROGRESS,
+	ZFS_IOC_LEGACY_POOL_REOPEN,
+	ZFS_IOC_LEGACY_LOG_HISTORY,
+	ZFS_IOC_LEGACY_SEND_NEW,
+	ZFS_IOC_LEGACY_SEND_SPACE,
+	ZFS_IOC_LEGACY_CLONE,
+	ZFS_IOC_LEGACY_BOOKMARK,
+	ZFS_IOC_LEGACY_GET_BOOKMARKS,
+	ZFS_IOC_LEGACY_DESTROY_BOOKMARKS,
+	ZFS_IOC_LEGACY_NEXTBOOT,
+	ZFS_IOC_LEGACY_CHANNEL_PROGRAM,
+	ZFS_IOC_LEGACY_REMAP,
+	ZFS_IOC_LEGACY_POOL_CHECKPOINT,
+	ZFS_IOC_LEGACY_POOL_DISCARD_CHECKPOINT,
+	ZFS_IOC_LEGACY_POOL_INITIALIZE,
+	ZFS_IOC_LEGACY_POOL_SYNC,
+	ZFS_IOC_LEGACY_LAST
+};
+
+unsigned static long zfs_ioctl_legacy_to_ozfs_[] = {
+	ZFS_IOC_POOL_CREATE,			/* 0x00 */
+	ZFS_IOC_POOL_DESTROY,			/* 0x01 */
+	ZFS_IOC_POOL_IMPORT,			/* 0x02 */
+	ZFS_IOC_POOL_EXPORT,			/* 0x03 */
+	ZFS_IOC_POOL_CONFIGS,			/* 0x04 */
+	ZFS_IOC_POOL_STATS,			/* 0x05 */
+	ZFS_IOC_POOL_TRYIMPORT,			/* 0x06 */
+	ZFS_IOC_POOL_SCAN,			/* 0x07 */
+	ZFS_IOC_POOL_FREEZE,			/* 0x08 */
+	ZFS_IOC_POOL_UPGRADE,			/* 0x09 */
+	ZFS_IOC_POOL_GET_HISTORY,		/* 0x0a */
+	ZFS_IOC_VDEV_ADD,			/* 0x0b */
+	ZFS_IOC_VDEV_REMOVE,			/* 0x0c */
+	ZFS_IOC_VDEV_SET_STATE,			/* 0x0d */
+	ZFS_IOC_VDEV_ATTACH,			/* 0x0e */
+	ZFS_IOC_VDEV_DETACH,			/* 0x0f */
+	ZFS_IOC_VDEV_SETPATH,			/* 0x10 */
+	ZFS_IOC_VDEV_SETFRU,			/* 0x11 */
+	ZFS_IOC_OBJSET_STATS,			/* 0x12 */
+	ZFS_IOC_OBJSET_ZPLPROPS,		/* 0x13 */
+	ZFS_IOC_DATASET_LIST_NEXT,		/* 0x14 */
+	ZFS_IOC_SNAPSHOT_LIST_NEXT,		/* 0x15 */
+	ZFS_IOC_SET_PROP,			/* 0x16 */
+	ZFS_IOC_CREATE,				/* 0x17 */
+	ZFS_IOC_DESTROY,			/* 0x18 */
+	ZFS_IOC_ROLLBACK,			/* 0x19 */
+	ZFS_IOC_RENAME,				/* 0x1a */
+	ZFS_IOC_RECV,				/* 0x1b */
+	ZFS_IOC_SEND,				/* 0x1c */
+	ZFS_IOC_INJECT_FAULT,			/* 0x1d */
+	ZFS_IOC_CLEAR_FAULT,			/* 0x1e */
+	ZFS_IOC_INJECT_LIST_NEXT,		/* 0x1f */
+	ZFS_IOC_ERROR_LOG,			/* 0x20 */
+	ZFS_IOC_CLEAR,				/* 0x21 */
+	ZFS_IOC_PROMOTE,			/* 0x22 */
+	/* start of mismatch */
+
+	ZFS_IOC_DESTROY_SNAPS,			/* 0x23:0x3b */
+	ZFS_IOC_SNAPSHOT,			/* 0x24:0x23 */
+	ZFS_IOC_DSOBJ_TO_DSNAME,		/* 0x25:0x24 */
+	ZFS_IOC_OBJ_TO_PATH,			/* 0x26:0x25 */
+	ZFS_IOC_POOL_SET_PROPS,			/* 0x27:0x26 */
+	ZFS_IOC_POOL_GET_PROPS,			/* 0x28:0x27 */
+	ZFS_IOC_SET_FSACL,			/* 0x29:0x28 */
+	ZFS_IOC_GET_FSACL,			/* 0x30:0x29 */
+	ZFS_IOC_SHARE,				/* 0x2b:0x2a */
+	ZFS_IOC_INHERIT_PROP,			/* 0x2c:0x2b */
+	ZFS_IOC_SMB_ACL,			/* 0x2d:0x2c */
+	ZFS_IOC_USERSPACE_ONE,			/* 0x2e:0x2d */
+	ZFS_IOC_USERSPACE_MANY,			/* 0x2f:0x2e */
+	ZFS_IOC_USERSPACE_UPGRADE,		/* 0x30:0x2f */
+	ZFS_IOC_HOLD,				/* 0x31:0x30 */
+	ZFS_IOC_RELEASE,			/* 0x32:0x31 */
+	ZFS_IOC_GET_HOLDS,			/* 0x33:0x32 */
+	ZFS_IOC_OBJSET_RECVD_PROPS,		/* 0x34:0x33 */
+	ZFS_IOC_VDEV_SPLIT,			/* 0x35:0x34 */
+	ZFS_IOC_NEXT_OBJ,			/* 0x36:0x35 */
+	ZFS_IOC_DIFF,				/* 0x37:0x36 */
+	ZFS_IOC_TMP_SNAPSHOT,			/* 0x38:0x37 */
+	ZFS_IOC_OBJ_TO_STATS,			/* 0x39:0x38 */
+	ZFS_IOC_JAIL,			/* 0x3a:0xc2 */
+	ZFS_IOC_UNJAIL,			/* 0x3b:0xc3 */
+	ZFS_IOC_POOL_REGUID,			/* 0x3c:0x3c */
+	ZFS_IOC_SPACE_WRITTEN,			/* 0x3d:0x39 */
+	ZFS_IOC_SPACE_SNAPS,			/* 0x3e:0x3a */
+	ZFS_IOC_SEND_PROGRESS,			/* 0x3f:0x3e */
+	ZFS_IOC_POOL_REOPEN,			/* 0x40:0x3d */
+	ZFS_IOC_LOG_HISTORY,			/* 0x41:0x3f */
+	ZFS_IOC_SEND_NEW,			/* 0x42:0x40 */
+	ZFS_IOC_SEND_SPACE,			/* 0x43:0x41 */
+	ZFS_IOC_CLONE,				/* 0x44:0x42 */
+	ZFS_IOC_BOOKMARK,			/* 0x45:0x43 */
+	ZFS_IOC_GET_BOOKMARKS,			/* 0x46:0x44 */
+	ZFS_IOC_DESTROY_BOOKMARKS,		/* 0x47:0x45 */
+	ZFS_IOC_NEXTBOOT,			/* 0x48:0xc1 */
+	ZFS_IOC_CHANNEL_PROGRAM,		/* 0x49:0x48 */
+	ZFS_IOC_REMAP,				/* 0x4a:0x4c */
+	ZFS_IOC_POOL_CHECKPOINT,		/* 0x4b:0x4d */
+	ZFS_IOC_POOL_DISCARD_CHECKPOINT,	/* 0x4c:0x4e */
+	ZFS_IOC_POOL_INITIALIZE,		/* 0x4d:0x4f */
+};
+
+unsigned static long zfs_ioctl_ozfs_to_legacy_common_[] = {
+	ZFS_IOC_POOL_CREATE,			/* 0x00 */
+	ZFS_IOC_POOL_DESTROY,			/* 0x01 */
+	ZFS_IOC_POOL_IMPORT,			/* 0x02 */
+	ZFS_IOC_POOL_EXPORT,			/* 0x03 */
+	ZFS_IOC_POOL_CONFIGS,			/* 0x04 */
+	ZFS_IOC_POOL_STATS,			/* 0x05 */
+	ZFS_IOC_POOL_TRYIMPORT,			/* 0x06 */
+	ZFS_IOC_POOL_SCAN,			/* 0x07 */
+	ZFS_IOC_POOL_FREEZE,			/* 0x08 */
+	ZFS_IOC_POOL_UPGRADE,			/* 0x09 */
+	ZFS_IOC_POOL_GET_HISTORY,		/* 0x0a */
+	ZFS_IOC_VDEV_ADD,			/* 0x0b */
+	ZFS_IOC_VDEV_REMOVE,			/* 0x0c */
+	ZFS_IOC_VDEV_SET_STATE,			/* 0x0d */
+	ZFS_IOC_VDEV_ATTACH,			/* 0x0e */
+	ZFS_IOC_VDEV_DETACH,			/* 0x0f */
+	ZFS_IOC_VDEV_SETPATH,			/* 0x10 */
+	ZFS_IOC_VDEV_SETFRU,			/* 0x11 */
+	ZFS_IOC_OBJSET_STATS,			/* 0x12 */
+	ZFS_IOC_OBJSET_ZPLPROPS,		/* 0x13 */
+	ZFS_IOC_DATASET_LIST_NEXT,		/* 0x14 */
+	ZFS_IOC_SNAPSHOT_LIST_NEXT,		/* 0x15 */
+	ZFS_IOC_SET_PROP,			/* 0x16 */
+	ZFS_IOC_CREATE,				/* 0x17 */
+	ZFS_IOC_DESTROY,			/* 0x18 */
+	ZFS_IOC_ROLLBACK,			/* 0x19 */
+	ZFS_IOC_RENAME,				/* 0x1a */
+	ZFS_IOC_RECV,				/* 0x1b */
+	ZFS_IOC_SEND,				/* 0x1c */
+	ZFS_IOC_INJECT_FAULT,			/* 0x1d */
+	ZFS_IOC_CLEAR_FAULT,			/* 0x1e */
+	ZFS_IOC_INJECT_LIST_NEXT,		/* 0x1f */
+	ZFS_IOC_ERROR_LOG,			/* 0x20 */
+	ZFS_IOC_CLEAR,				/* 0x21 */
+	ZFS_IOC_PROMOTE,			/* 0x22 */
+	/* start of mismatch */
+	ZFS_IOC_LEGACY_SNAPSHOT,		/* 0x23 */
+	ZFS_IOC_LEGACY_DSOBJ_TO_DSNAME,		/* 0x24 */
+	ZFS_IOC_LEGACY_OBJ_TO_PATH,		/* 0x25 */
+	ZFS_IOC_LEGACY_POOL_SET_PROPS,		/* 0x26 */
+	ZFS_IOC_LEGACY_POOL_GET_PROPS,		/* 0x27 */
+	ZFS_IOC_LEGACY_SET_FSACL,		/* 0x28 */
+	ZFS_IOC_LEGACY_GET_FSACL,		/* 0x29 */
+	ZFS_IOC_LEGACY_SHARE,			/* 0x2a */
+	ZFS_IOC_LEGACY_INHERIT_PROP,		/* 0x2b */
+	ZFS_IOC_LEGACY_SMB_ACL,			/* 0x2c */
+	ZFS_IOC_LEGACY_USERSPACE_ONE,		/* 0x2d */
+	ZFS_IOC_LEGACY_USERSPACE_MANY,		/* 0x2e */
+	ZFS_IOC_LEGACY_USERSPACE_UPGRADE,	/* 0x2f */
+	ZFS_IOC_LEGACY_HOLD,			/* 0x30 */
+	ZFS_IOC_LEGACY_RELEASE,			/* 0x31 */
+	ZFS_IOC_LEGACY_GET_HOLDS,		/* 0x32 */
+	ZFS_IOC_LEGACY_OBJSET_RECVD_PROPS,	/* 0x33 */
+	ZFS_IOC_LEGACY_VDEV_SPLIT,		/* 0x34 */
+	ZFS_IOC_LEGACY_NEXT_OBJ,		/* 0x35 */
+	ZFS_IOC_LEGACY_DIFF,			/* 0x36 */
+	ZFS_IOC_LEGACY_TMP_SNAPSHOT,		/* 0x37 */
+	ZFS_IOC_LEGACY_OBJ_TO_STATS,		/* 0x38 */
+	ZFS_IOC_LEGACY_SPACE_WRITTEN,		/* 0x39 */
+	ZFS_IOC_LEGACY_SPACE_SNAPS,		/* 0x3a */
+	ZFS_IOC_LEGACY_DESTROY_SNAPS,		/* 0x3b */
+	ZFS_IOC_LEGACY_POOL_REGUID,		/* 0x3c */
+	ZFS_IOC_LEGACY_POOL_REOPEN,		/* 0x3d */
+	ZFS_IOC_LEGACY_SEND_PROGRESS,		/* 0x3e */
+	ZFS_IOC_LEGACY_LOG_HISTORY,		/* 0x3f */
+	ZFS_IOC_LEGACY_SEND_NEW,		/* 0x40 */
+	ZFS_IOC_LEGACY_SEND_SPACE,		/* 0x41 */
+	ZFS_IOC_LEGACY_CLONE,			/* 0x42 */
+	ZFS_IOC_LEGACY_BOOKMARK,		/* 0x43 */
+	ZFS_IOC_LEGACY_GET_BOOKMARKS,		/* 0x44 */
+	ZFS_IOC_LEGACY_DESTROY_BOOKMARKS,	/* 0x45 */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_RECV_NEW */
+	ZFS_IOC_LEGACY_POOL_SYNC,		/* 0x47 */
+	ZFS_IOC_LEGACY_CHANNEL_PROGRAM,		/* 0x48 */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_LOAD_KEY */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_UNLOAD_KEY */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_CHANGE_KEY */
+	ZFS_IOC_LEGACY_REMAP,			/* 0x4c */
+	ZFS_IOC_LEGACY_POOL_CHECKPOINT,		/* 0x4d */
+	ZFS_IOC_LEGACY_POOL_DISCARD_CHECKPOINT,	/* 0x4e */
+	ZFS_IOC_LEGACY_POOL_INITIALIZE,		/* 0x4f  */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_POOL_TRIM */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_REDACT */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_GET_BOOKMARK_PROPS */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_WAIT */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_WAIT_FS */
+};
+
+unsigned static long zfs_ioctl_ozfs_to_legacy_platform_[] = {
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_EVENTS_NEXT */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_EVENTS_CLEAR */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_EVENTS_SEEK */
+	ZFS_IOC_LEGACY_NEXTBOOT,
+	ZFS_IOC_LEGACY_JAIL,
+	ZFS_IOC_LEGACY_UNJAIL,
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_SET_BOOTENV */
+	ZFS_IOC_LEGACY_NONE, /* ZFS_IOC_GET_BOOTENV */
+};
+
+int
+zfs_ioctl_legacy_to_ozfs(int request)
+{
+	if (request >= sizeof (zfs_ioctl_legacy_to_ozfs_)/sizeof (long))
+		return (-1);
+	return (zfs_ioctl_legacy_to_ozfs_[request]);
+}
+
+int
+zfs_ioctl_ozfs_to_legacy(int request)
+{
+	if (request >= ZFS_IOC_LAST)
+		return (-1);
+
+	if (request > ZFS_IOC_PLATFORM) {
+		request -= ZFS_IOC_PLATFORM + 1;
+		return (zfs_ioctl_ozfs_to_legacy_platform_[request]);
+	}
+	if (request >= sizeof (zfs_ioctl_ozfs_to_legacy_common_)/sizeof (long))
+		return (-1);
+	return (zfs_ioctl_ozfs_to_legacy_common_[request]);
+}
+
+void
+zfs_cmd_legacy_to_ozfs(zfs_cmd_legacy_t *src, zfs_cmd_t *dst)
+{
+	memcpy(dst, src, offsetof(zfs_cmd_t, zc_objset_stats));
+	*&dst->zc_objset_stats = *&src->zc_objset_stats;
+	memcpy(&dst->zc_begin_record, &src->zc_begin_record,
+	    offsetof(zfs_cmd_t, zc_sendobj) -
+	    offsetof(zfs_cmd_t, zc_begin_record));
+	memcpy(&dst->zc_sendobj, &src->zc_sendobj,
+	    sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
+	dst->zc_zoneid = src->zc_jailid;
+}
+
+void
+zfs_cmd_ozfs_to_legacy(zfs_cmd_t *src, zfs_cmd_legacy_t *dst)
+{
+	memcpy(dst, src, offsetof(zfs_cmd_t, zc_objset_stats));
+	*&dst->zc_objset_stats = *&src->zc_objset_stats;
+	*&dst->zc_begin_record.drr_u.drr_begin = *&src->zc_begin_record;
+	dst->zc_begin_record.drr_payloadlen = 0;
+	dst->zc_begin_record.drr_type = 0;
+
+	memcpy(&dst->zc_inject_record, &src->zc_inject_record,
+	    offsetof(zfs_cmd_t, zc_sendobj) -
+	    offsetof(zfs_cmd_t, zc_inject_record));
+	dst->zc_resumable = B_FALSE;
+	memcpy(&dst->zc_sendobj, &src->zc_sendobj,
+	    sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
+	dst->zc_jailid = src->zc_zoneid;
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_ioctl_os.c b/zfs/module/os/freebsd/zfs/zfs_ioctl_os.c
new file mode 100644
index 0000000..effc115
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_ioctl_os.c

@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/nvpair.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_os.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zone.h>
+#include <vm/vm_pageout.h>
+
+#include <sys/zfs_ioctl_impl.h>
+
+#if __FreeBSD_version < 1201517
+#define	vm_page_max_user_wired	vm_page_max_wired
+#endif
+
+int
+zfs_vfs_ref(zfsvfs_t **zfvp)
+{
+	int error = 0;
+
+	if (*zfvp == NULL)
+		return (SET_ERROR(ESRCH));
+
+	error = vfs_busy((*zfvp)->z_vfs, 0);
+	if (error != 0) {
+		*zfvp = NULL;
+		error = SET_ERROR(ESRCH);
+	}
+	return (error);
+}
+
+boolean_t
+zfs_vfs_held(zfsvfs_t *zfsvfs)
+{
+	return (zfsvfs->z_vfs != NULL);
+}
+
+void
+zfs_vfs_rele(zfsvfs_t *zfsvfs)
+{
+	vfs_unbusy(zfsvfs->z_vfs);
+}
+
+static const zfs_ioc_key_t zfs_keys_nextboot[] = {
+	{"command",		DATA_TYPE_STRING,	0},
+	{ ZPOOL_CONFIG_POOL_GUID,		DATA_TYPE_UINT64,	0},
+	{ ZPOOL_CONFIG_GUID,		DATA_TYPE_UINT64,	0}
+};
+
+static int
+zfs_ioc_jail(zfs_cmd_t *zc)
+{
+
+	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
+	    (int)zc->zc_zoneid));
+}
+
+static int
+zfs_ioc_unjail(zfs_cmd_t *zc)
+{
+
+	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
+	    (int)zc->zc_zoneid));
+}
+
+static int
+zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	char name[MAXNAMELEN];
+	spa_t *spa;
+	vdev_t *vd;
+	char *command;
+	uint64_t pool_guid;
+	uint64_t vdev_guid;
+	int error;
+
+	if (nvlist_lookup_uint64(innvl,
+	    ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
+		return (EINVAL);
+	if (nvlist_lookup_uint64(innvl,
+	    ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
+		return (EINVAL);
+	if (nvlist_lookup_string(innvl,
+	    "command", &command) != 0)
+		return (EINVAL);
+
+	mutex_enter(&spa_namespace_lock);
+	spa = spa_by_guid(pool_guid, vdev_guid);
+	if (spa != NULL)
+		strcpy(name, spa_name(spa));
+	mutex_exit(&spa_namespace_lock);
+	if (spa == NULL)
+		return (ENOENT);
+
+	if ((error = spa_open(name, &spa, FTAG)) != 0)
+		return (error);
+	spa_vdev_state_enter(spa, SCL_ALL);
+	vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
+	if (vd == NULL) {
+		(void) spa_vdev_state_exit(spa, NULL, ENXIO);
+		spa_close(spa, FTAG);
+		return (ENODEV);
+	}
+	error = vdev_label_write_pad2(vd, command, strlen(command));
+	(void) spa_vdev_state_exit(spa, NULL, 0);
+	txg_wait_synced(spa->spa_dsl_pool, 0);
+	spa_close(spa, FTAG);
+	return (error);
+}
+
+/* Update the VFS's cache of mountpoint properties */
+void
+zfs_ioctl_update_mount_cache(const char *dsname)
+{
+	zfsvfs_t *zfsvfs;
+
+	if (getzfsvfs(dsname, &zfsvfs) == 0) {
+		struct mount *mp = zfsvfs->z_vfs;
+		VFS_STATFS(mp, &mp->mnt_stat);
+		zfs_vfs_rele(zfsvfs);
+	}
+	/*
+	 * Ignore errors; we can't do anything useful if either getzfsvfs or
+	 * VFS_STATFS fails.
+	 */
+}
+
+uint64_t
+zfs_max_nvlist_src_size_os(void)
+{
+	if (zfs_max_nvlist_src_size != 0)
+		return (zfs_max_nvlist_src_size);
+
+	return (ptob(vm_page_max_user_wired) / 4);
+}
+
+void
+zfs_ioctl_init_os(void)
+{
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
+	    zfs_secpolicy_config, POOL_CHECK_NONE);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
+	    zfs_secpolicy_config, POOL_CHECK_NONE);
+	zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
+	    zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
+	    POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_nextboot, 3);
+
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_racct.c b/zfs/module/os/freebsd/zfs/zfs_racct.c
new file mode 100644
index 0000000..883255b
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_racct.c

@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/zfs_racct.h>
+#include <sys/racct.h>
+
+void
+zfs_racct_read(uint64_t size, uint64_t iops)
+{
+	curthread->td_ru.ru_inblock += iops;
+#ifdef RACCT
+	if (racct_enable) {
+		PROC_LOCK(curproc);
+		racct_add_force(curproc, RACCT_READBPS, size);
+		racct_add_force(curproc, RACCT_READIOPS, iops);
+		PROC_UNLOCK(curproc);
+	}
+#else
+	(void) size;
+#endif /* RACCT */
+}
+
+void
+zfs_racct_write(uint64_t size, uint64_t iops)
+{
+	curthread->td_ru.ru_oublock += iops;
+#ifdef RACCT
+	if (racct_enable) {
+		PROC_LOCK(curproc);
+		racct_add_force(curproc, RACCT_WRITEBPS, size);
+		racct_add_force(curproc, RACCT_WRITEIOPS, iops);
+		PROC_UNLOCK(curproc);
+	}
+#else
+	(void) size;
+#endif /* RACCT */
+}

diff --git a/zfs/module/os/freebsd/zfs/zfs_vfsops.c b/zfs/module/os/freebsd/zfs/zfs_vfsops.c
new file mode 100644
index 0000000..85449eb
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_vfsops.c

@@ -0,0 +1,2616 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
+ * All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/* Portions Copyright 2010 Robert Milkowski */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/acl.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/mntent.h>
+#include <sys/mount.h>
+#include <sys/cmn_err.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_dir.h>
+#include <sys/zil.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_deleg.h>
+#include <sys/spa.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
+#include <sys/policy.h>
+#include <sys/atomic.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/sunddi.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+#include <sys/spa_boot.h>
+#include <sys/jail.h>
+#include <sys/osd.h>
+#include <ufs/ufs/quota.h>
+#include <sys/zfs_quota.h>
+
+#include "zfs_comutil.h"
+
+#ifndef	MNTK_VMSETSIZE_BUG
+#define	MNTK_VMSETSIZE_BUG	0
+#endif
+#ifndef	MNTK_NOMSYNC
+#define	MNTK_NOMSYNC	8
+#endif
+
+/* BEGIN CSTYLED */
+struct mtx zfs_debug_mtx;
+MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
+
+SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
+
+int zfs_super_owner;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
+    "File system owner can perform privileged operation on his file systems");
+
+int zfs_debug_level;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
+	"Debug level");
+
+struct zfs_jailparam {
+	int mount_snapshot;
+};
+
+static struct zfs_jailparam zfs_jailparam0 = {
+	.mount_snapshot = 0,
+};
+
+static int zfs_jailparam_slot;
+
+SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters");
+SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I",
+	"Allow mounting snapshots in the .zfs directory for unjailed datasets");
+
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
+static int zfs_version_acl = ZFS_ACL_VERSION;
+SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
+    "ZFS_ACL_VERSION");
+static int zfs_version_spa = SPA_VERSION;
+SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
+    "SPA_VERSION");
+static int zfs_version_zpl = ZPL_VERSION;
+SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
+    "ZPL_VERSION");
+/* END CSTYLED */
+
+#if __FreeBSD_version >= 1400018
+static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg,
+    bool *mp_busy);
+#else
+static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
+#endif
+static int zfs_mount(vfs_t *vfsp);
+static int zfs_umount(vfs_t *vfsp, int fflag);
+static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
+static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
+static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
+static int zfs_sync(vfs_t *vfsp, int waitfor);
+#if __FreeBSD_version >= 1300098
+static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int *secflavors);
+#else
+static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int **secflavors);
+#endif
+static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
+static void zfs_freevfs(vfs_t *vfsp);
+
+struct vfsops zfs_vfsops = {
+	.vfs_mount =		zfs_mount,
+	.vfs_unmount =		zfs_umount,
+#if __FreeBSD_version >= 1300049
+	.vfs_root =		vfs_cache_root,
+	.vfs_cachedroot = zfs_root,
+#else
+	.vfs_root =		zfs_root,
+#endif
+	.vfs_statfs =		zfs_statfs,
+	.vfs_vget =		zfs_vget,
+	.vfs_sync =		zfs_sync,
+	.vfs_checkexp =		zfs_checkexp,
+	.vfs_fhtovp =		zfs_fhtovp,
+	.vfs_quotactl =		zfs_quotactl,
+};
+
+VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
+
+/*
+ * We need to keep a count of active fs's.
+ * This is necessary to prevent our module
+ * from being unloaded after a umount -f
+ */
+static uint32_t	zfs_active_fs_count = 0;
+
+int
+zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
+    char *setpoint)
+{
+	int error;
+	zfsvfs_t *zfvp;
+	vfs_t *vfsp;
+	objset_t *os;
+	uint64_t tmp = *val;
+
+	error = dmu_objset_from_ds(ds, &os);
+	if (error != 0)
+		return (error);
+
+	error = getzfsvfs_impl(os, &zfvp);
+	if (error != 0)
+		return (error);
+	if (zfvp == NULL)
+		return (ENOENT);
+	vfsp = zfvp->z_vfs;
+	switch (zfs_prop) {
+	case ZFS_PROP_ATIME:
+		if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
+			tmp = 0;
+		if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
+			tmp = 1;
+		break;
+	case ZFS_PROP_DEVICES:
+		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
+			tmp = 0;
+		if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
+			tmp = 1;
+		break;
+	case ZFS_PROP_EXEC:
+		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
+			tmp = 0;
+		if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
+			tmp = 1;
+		break;
+	case ZFS_PROP_SETUID:
+		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
+			tmp = 0;
+		if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
+			tmp = 1;
+		break;
+	case ZFS_PROP_READONLY:
+		if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
+			tmp = 0;
+		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
+			tmp = 1;
+		break;
+	case ZFS_PROP_XATTR:
+		if (zfvp->z_flags & ZSB_XATTR)
+			tmp = zfvp->z_xattr;
+		break;
+	case ZFS_PROP_NBMAND:
+		if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
+			tmp = 0;
+		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
+			tmp = 1;
+		break;
+	default:
+		vfs_unbusy(vfsp);
+		return (ENOENT);
+	}
+
+	vfs_unbusy(vfsp);
+	if (tmp != *val) {
+		(void) strcpy(setpoint, "temporary");
+		*val = tmp;
+	}
+	return (0);
+}
+
+static int
+zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
+{
+	int error = 0;
+	char buf[32];
+	uint64_t usedobj, quotaobj;
+	uint64_t quota, used = 0;
+	timespec_t now;
+
+	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
+	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
+
+	if (quotaobj == 0 || zfsvfs->z_replay) {
+		error = ENOENT;
+		goto done;
+	}
+	(void) sprintf(buf, "%llx", (longlong_t)id);
+	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
+	    buf, sizeof (quota), 1, &quota)) != 0) {
+		dprintf("%s(%d): quotaobj lookup failed\n",
+		    __FUNCTION__, __LINE__);
+		goto done;
+	}
+	/*
+	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
+	 * So we set them to be the same.
+	 */
+	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
+	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
+	if (error && error != ENOENT) {
+		dprintf("%s(%d):  usedobj failed; %d\n",
+		    __FUNCTION__, __LINE__, error);
+		goto done;
+	}
+	dqp->dqb_curblocks = btodb(used);
+	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
+	vfs_timestamp(&now);
+	/*
+	 * Setting this to 0 causes FreeBSD quota(8) to print
+	 * the number of days since the epoch, which isn't
+	 * particularly useful.
+	 */
+	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
+done:
+	return (error);
+}
+
+static int
+#if __FreeBSD_version >= 1400018
+zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy)
+#else
+zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
+#endif
+{
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+	struct thread *td;
+	int cmd, type, error = 0;
+	int bitsize;
+	zfs_userquota_prop_t quota_type;
+	struct dqblk64 dqblk = { 0 };
+
+	td = curthread;
+	cmd = cmds >> SUBCMDSHIFT;
+	type = cmds & SUBCMDMASK;
+
+	ZFS_ENTER(zfsvfs);
+	if (id == -1) {
+		switch (type) {
+		case USRQUOTA:
+			id = td->td_ucred->cr_ruid;
+			break;
+		case GRPQUOTA:
+			id = td->td_ucred->cr_rgid;
+			break;
+		default:
+			error = EINVAL;
+#if __FreeBSD_version < 1400018
+			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
+				vfs_unbusy(vfsp);
+#endif
+			goto done;
+		}
+	}
+	/*
+	 * Map BSD type to:
+	 * ZFS_PROP_USERUSED,
+	 * ZFS_PROP_USERQUOTA,
+	 * ZFS_PROP_GROUPUSED,
+	 * ZFS_PROP_GROUPQUOTA
+	 */
+	switch (cmd) {
+	case Q_SETQUOTA:
+	case Q_SETQUOTA32:
+		if (type == USRQUOTA)
+			quota_type = ZFS_PROP_USERQUOTA;
+		else if (type == GRPQUOTA)
+			quota_type = ZFS_PROP_GROUPQUOTA;
+		else
+			error = EINVAL;
+		break;
+	case Q_GETQUOTA:
+	case Q_GETQUOTA32:
+		if (type == USRQUOTA)
+			quota_type = ZFS_PROP_USERUSED;
+		else if (type == GRPQUOTA)
+			quota_type = ZFS_PROP_GROUPUSED;
+		else
+			error = EINVAL;
+		break;
+	}
+
+	/*
+	 * Depending on the cmd, we may need to get
+	 * the ruid and domain (see fuidstr_to_sid?),
+	 * the fuid (how?), or other information.
+	 * Create fuid using zfs_fuid_create(zfsvfs, id,
+	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
+	 * I think I can use just the id?
+	 *
+	 * Look at zfs_id_overquota() to look up a quota.
+	 * zap_lookup(something, quotaobj, fuidstring,
+	 *     sizeof (long long), 1, &quota)
+	 *
+	 * See zfs_set_userquota() to set a quota.
+	 */
+	if ((uint32_t)type >= MAXQUOTAS) {
+		error = EINVAL;
+		goto done;
+	}
+
+	switch (cmd) {
+	case Q_GETQUOTASIZE:
+		bitsize = 64;
+		error = copyout(&bitsize, arg, sizeof (int));
+		break;
+	case Q_QUOTAON:
+		// As far as I can tell, you can't turn quotas on or off on zfs
+		error = 0;
+#if __FreeBSD_version < 1400018
+		vfs_unbusy(vfsp);
+#endif
+		break;
+	case Q_QUOTAOFF:
+		error = ENOTSUP;
+#if __FreeBSD_version < 1400018
+		vfs_unbusy(vfsp);
+#endif
+		break;
+	case Q_SETQUOTA:
+		error = copyin(arg, &dqblk, sizeof (dqblk));
+		if (error == 0)
+			error = zfs_set_userquota(zfsvfs, quota_type,
+			    "", id, dbtob(dqblk.dqb_bhardlimit));
+		break;
+	case Q_GETQUOTA:
+		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
+		if (error == 0)
+			error = copyout(&dqblk, arg, sizeof (dqblk));
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+done:
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+
+boolean_t
+zfs_is_readonly(zfsvfs_t *zfsvfs)
+{
+	return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
+}
+
+/*ARGSUSED*/
+static int
+zfs_sync(vfs_t *vfsp, int waitfor)
+{
+
+	/*
+	 * Data integrity is job one.  We don't want a compromised kernel
+	 * writing to the storage pool, so we never sync during panic.
+	 */
+	if (panicstr)
+		return (0);
+
+	/*
+	 * Ignore the system syncher.  ZFS already commits async data
+	 * at zfs_txg_timeout intervals.
+	 */
+	if (waitfor == MNT_LAZY)
+		return (0);
+
+	if (vfsp != NULL) {
+		/*
+		 * Sync a specific filesystem.
+		 */
+		zfsvfs_t *zfsvfs = vfsp->vfs_data;
+		dsl_pool_t *dp;
+		int error;
+
+		error = vfs_stdsync(vfsp, waitfor);
+		if (error != 0)
+			return (error);
+
+		ZFS_ENTER(zfsvfs);
+		dp = dmu_objset_pool(zfsvfs->z_os);
+
+		/*
+		 * If the system is shutting down, then skip any
+		 * filesystems which may exist on a suspended pool.
+		 */
+		if (rebooting && spa_suspended(dp->dp_spa)) {
+			ZFS_EXIT(zfsvfs);
+			return (0);
+		}
+
+		if (zfsvfs->z_log != NULL)
+			zil_commit(zfsvfs->z_log, 0);
+
+		ZFS_EXIT(zfsvfs);
+	} else {
+		/*
+		 * Sync all ZFS filesystems.  This is what happens when you
+		 * run sync(8).  Unlike other filesystems, ZFS honors the
+		 * request by waiting for all pools to commit all dirty data.
+		 */
+		spa_sync_allpools();
+	}
+
+	return (0);
+}
+
+static void
+atime_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval == TRUE) {
+		zfsvfs->z_atime = TRUE;
+		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
+	} else {
+		zfsvfs->z_atime = FALSE;
+		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
+	}
+}
+
+static void
+xattr_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval == ZFS_XATTR_OFF) {
+		zfsvfs->z_flags &= ~ZSB_XATTR;
+	} else {
+		zfsvfs->z_flags |= ZSB_XATTR;
+
+		if (newval == ZFS_XATTR_SA)
+			zfsvfs->z_xattr_sa = B_TRUE;
+		else
+			zfsvfs->z_xattr_sa = B_FALSE;
+	}
+}
+
+static void
+blksz_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
+	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
+	ASSERT(ISP2(newval));
+
+	zfsvfs->z_max_blksz = newval;
+	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
+}
+
+static void
+readonly_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval) {
+		/* XXX locking on vfs_flag? */
+		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
+	} else {
+		/* XXX locking on vfs_flag? */
+		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
+	}
+}
+
+static void
+setuid_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval == FALSE) {
+		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
+	} else {
+		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
+	}
+}
+
+static void
+exec_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval == FALSE) {
+		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
+	} else {
+		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
+	}
+}
+
+/*
+ * The nbmand mount option can be changed at mount time.
+ * We can't allow it to be toggled on live file systems or incorrect
+ * behavior may be seen from cifs clients
+ *
+ * This property isn't registered via dsl_prop_register(), but this callback
+ * will be called when a file system is first mounted
+ */
+static void
+nbmand_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == FALSE) {
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
+	} else {
+		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
+		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
+	}
+}
+
+static void
+snapdir_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_show_ctldir = newval;
+}
+
+static void
+vscan_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_vscan = newval;
+}
+
+static void
+acl_mode_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_acl_mode = newval;
+}
+
+static void
+acl_inherit_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_acl_inherit = newval;
+}
+
+static void
+acl_type_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_acl_type = newval;
+}
+
+static int
+zfs_register_callbacks(vfs_t *vfsp)
+{
+	struct dsl_dataset *ds = NULL;
+	objset_t *os = NULL;
+	zfsvfs_t *zfsvfs = NULL;
+	uint64_t nbmand;
+	boolean_t readonly = B_FALSE;
+	boolean_t do_readonly = B_FALSE;
+	boolean_t setuid = B_FALSE;
+	boolean_t do_setuid = B_FALSE;
+	boolean_t exec = B_FALSE;
+	boolean_t do_exec = B_FALSE;
+	boolean_t xattr = B_FALSE;
+	boolean_t atime = B_FALSE;
+	boolean_t do_atime = B_FALSE;
+	boolean_t do_xattr = B_FALSE;
+	int error = 0;
+
+	ASSERT3P(vfsp, !=, NULL);
+	zfsvfs = vfsp->vfs_data;
+	ASSERT3P(zfsvfs, !=, NULL);
+	os = zfsvfs->z_os;
+
+	/*
+	 * This function can be called for a snapshot when we update snapshot's
+	 * mount point, which isn't really supported.
+	 */
+	if (dmu_objset_is_snapshot(os))
+		return (EOPNOTSUPP);
+
+	/*
+	 * The act of registering our callbacks will destroy any mount
+	 * options we may have.  In order to enable temporary overrides
+	 * of mount options, we stash away the current values and
+	 * restore them after we register the callbacks.
+	 */
+	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
+	    !spa_writeable(dmu_objset_spa(os))) {
+		readonly = B_TRUE;
+		do_readonly = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
+		readonly = B_FALSE;
+		do_readonly = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
+		setuid = B_FALSE;
+		do_setuid = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
+		setuid = B_TRUE;
+		do_setuid = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
+		exec = B_FALSE;
+		do_exec = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
+		exec = B_TRUE;
+		do_exec = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
+		zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
+		do_xattr = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
+		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
+		do_xattr = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
+		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
+		do_xattr = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
+		zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
+		do_xattr = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
+		atime = B_FALSE;
+		do_atime = B_TRUE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
+		atime = B_TRUE;
+		do_atime = B_TRUE;
+	}
+
+	/*
+	 * We need to enter pool configuration here, so that we can use
+	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
+	 * dsl_prop_get_integer() can not be used, because it has to acquire
+	 * spa_namespace_lock and we can not do that because we already hold
+	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
+	 * with spa_namespace_lock held and the function calls ZFS vnode
+	 * operations to write the cache file and thus z_teardown_lock is
+	 * acquired after spa_namespace_lock.
+	 */
+	ds = dmu_objset_ds(os);
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+
+	/*
+	 * nbmand is a special property.  It can only be changed at
+	 * mount time.
+	 *
+	 * This is weird, but it is documented to only be changeable
+	 * at mount time.
+	 */
+	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
+		nbmand = B_FALSE;
+	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
+		nbmand = B_TRUE;
+	} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) {
+		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+		return (error);
+	}
+
+	/*
+	 * Register property callbacks.
+	 *
+	 * It would probably be fine to just check for i/o error from
+	 * the first prop_register(), but I guess I like to go
+	 * overboard...
+	 */
+	error = dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
+	    zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+	if (error)
+		goto unregister;
+
+	/*
+	 * Invoke our callbacks to restore temporary mount options.
+	 */
+	if (do_readonly)
+		readonly_changed_cb(zfsvfs, readonly);
+	if (do_setuid)
+		setuid_changed_cb(zfsvfs, setuid);
+	if (do_exec)
+		exec_changed_cb(zfsvfs, exec);
+	if (do_xattr)
+		xattr_changed_cb(zfsvfs, xattr);
+	if (do_atime)
+		atime_changed_cb(zfsvfs, atime);
+
+	nbmand_changed_cb(zfsvfs, nbmand);
+
+	return (0);
+
+unregister:
+	dsl_prop_unregister_all(ds, zfsvfs);
+	return (error);
+}
+
+/*
+ * Associate this zfsvfs with the given objset, which must be owned.
+ * This will cache a bunch of on-disk state from the objset in the
+ * zfsvfs.
+ */
+static int
+zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
+{
+	int error;
+	uint64_t val;
+
+	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
+	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
+	zfsvfs->z_os = os;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
+	if (error != 0)
+		return (error);
+	if (zfsvfs->z_version >
+	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+		(void) printf("Can't mount a version %lld file system "
+		    "on a version %lld pool\n. Pool must be upgraded to mount "
+		    "this file system.", (u_longlong_t)zfsvfs->z_version,
+		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
+		return (SET_ERROR(ENOTSUP));
+	}
+	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_norm = (int)val;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_utf8 = (val != 0);
+
+	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_case = (uint_t)val;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_acl_type = (uint_t)val;
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+	    zfsvfs->z_case == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+	uint64_t sa_obj = 0;
+	if (zfsvfs->z_use_sa) {
+		/* should either have both of these objects or none */
+		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+		    &sa_obj);
+		if (error != 0)
+			return (error);
+
+		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
+		if (error == 0 && val == ZFS_XATTR_SA)
+			zfsvfs->z_xattr_sa = B_TRUE;
+	}
+
+	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+	    &zfsvfs->z_attr_table);
+	if (error != 0)
+		return (error);
+
+	if (zfsvfs->z_version >= ZPL_VERSION_SA)
+		sa_register_update_callback(os, zfs_sa_upgrade);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
+	    &zfsvfs->z_root);
+	if (error != 0)
+		return (error);
+	ASSERT3U(zfsvfs->z_root, !=, 0);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+	    &zfsvfs->z_unlinkedobj);
+	if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
+	    8, 1, &zfsvfs->z_userquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_userquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
+	    8, 1, &zfsvfs->z_groupquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_groupquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
+	    8, 1, &zfsvfs->z_projectquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_projectquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
+	    8, 1, &zfsvfs->z_userobjquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_userobjquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
+	    8, 1, &zfsvfs->z_groupobjquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_groupobjquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
+	    8, 1, &zfsvfs->z_projectobjquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_projectobjquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
+	    &zfsvfs->z_fuid_obj);
+	if (error == ENOENT)
+		zfsvfs->z_fuid_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
+	    &zfsvfs->z_shares_dir);
+	if (error == ENOENT)
+		zfsvfs->z_shares_dir = 0;
+	else if (error != 0)
+		return (error);
+
+	/*
+	 * Only use the name cache if we are looking for a
+	 * name on a file system that does not require normalization
+	 * or case folding.  We can also look there if we happen to be
+	 * on a non-normalizing, mixed sensitivity file system IF we
+	 * are looking for the exact name (which is always the case on
+	 * FreeBSD).
+	 */
+	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
+	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
+	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
+
+	return (0);
+}
+
+taskq_t *zfsvfs_taskq;
+
+static void
+zfsvfs_task_unlinked_drain(void *context, int pending __unused)
+{
+
+	zfs_unlinked_drain((zfsvfs_t *)context);
+}
+
+int
+zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
+{
+	objset_t *os;
+	zfsvfs_t *zfsvfs;
+	int error;
+	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
+
+	/*
+	 * XXX: Fix struct statfs so this isn't necessary!
+	 *
+	 * The 'osname' is used as the filesystem's special node, which means
+	 * it must fit in statfs.f_mntfromname, or else it can't be
+	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
+	 * 'zfs unmount' to think it's not mounted when it is.
+	 */
+	if (strlen(osname) >= MNAMELEN)
+		return (SET_ERROR(ENAMETOOLONG));
+
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+
+	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
+	    &os);
+	if (error != 0) {
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
+	}
+
+	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
+
+	return (error);
+}
+
+
+int
+zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
+{
+	int error;
+
+	zfsvfs->z_vfs = NULL;
+	zfsvfs->z_parent = zfsvfs;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
+	    zfsvfs_task_unlinked_drain, zfsvfs);
+	ZFS_TEARDOWN_INIT(zfsvfs);
+	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
+	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
+	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+
+	error = zfsvfs_init(zfsvfs, os);
+	if (error != 0) {
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
+		*zfvp = NULL;
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
+	}
+
+	*zfvp = zfsvfs;
+	return (0);
+}
+
+static int
+zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
+{
+	int error;
+
+	/*
+	 * Check for a bad on-disk format version now since we
+	 * lied about owning the dataset readonly before.
+	 */
+	if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
+	    dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
+		return (SET_ERROR(EROFS));
+
+	error = zfs_register_callbacks(zfsvfs->z_vfs);
+	if (error)
+		return (error);
+
+	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
+
+	/*
+	 * If we are not mounting (ie: online recv), then we don't
+	 * have to worry about replaying the log as we blocked all
+	 * operations out since we closed the ZIL.
+	 */
+	if (mounting) {
+		boolean_t readonly;
+
+		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
+		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+
+		/*
+		 * During replay we remove the read only flag to
+		 * allow replays to succeed.
+		 */
+		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
+		if (readonly != 0) {
+			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
+		} else {
+			dsl_dir_t *dd;
+			zap_stats_t zs;
+
+			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+			    &zs) == 0) {
+				dataset_kstats_update_nunlinks_kstat(
+				    &zfsvfs->z_kstat, zs.zs_num_entries);
+				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+				    "num_entries in unlinked set: %llu",
+				    (u_longlong_t)zs.zs_num_entries);
+			}
+
+			zfs_unlinked_drain(zfsvfs);
+			dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
+			dd->dd_activity_cancelled = B_FALSE;
+		}
+
+		/*
+		 * Parse and replay the intent log.
+		 *
+		 * Because of ziltest, this must be done after
+		 * zfs_unlinked_drain().  (Further note: ziltest
+		 * doesn't use readonly mounts, where
+		 * zfs_unlinked_drain() isn't called.)  This is because
+		 * ziltest causes spa_sync() to think it's committed,
+		 * but actually it is not, so the intent log contains
+		 * many txg's worth of changes.
+		 *
+		 * In particular, if object N is in the unlinked set in
+		 * the last txg to actually sync, then it could be
+		 * actually freed in a later txg and then reallocated
+		 * in a yet later txg.  This would write a "create
+		 * object N" record to the intent log.  Normally, this
+		 * would be fine because the spa_sync() would have
+		 * written out the fact that object N is free, before
+		 * we could write the "create object N" intent log
+		 * record.
+		 *
+		 * But when we are in ziltest mode, we advance the "open
+		 * txg" without actually spa_sync()-ing the changes to
+		 * disk.  So we would see that object N is still
+		 * allocated and in the unlinked set, and there is an
+		 * intent log record saying to allocate it.
+		 */
+		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+			if (zil_replay_disable) {
+				zil_destroy(zfsvfs->z_log, B_FALSE);
+			} else {
+				boolean_t use_nc = zfsvfs->z_use_namecache;
+				zfsvfs->z_use_namecache = B_FALSE;
+				zfsvfs->z_replay = B_TRUE;
+				zil_replay(zfsvfs->z_os, zfsvfs,
+				    zfs_replay_vector);
+				zfsvfs->z_replay = B_FALSE;
+				zfsvfs->z_use_namecache = use_nc;
+			}
+		}
+
+		/* restore readonly bit */
+		if (readonly != 0)
+			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
+	}
+
+	/*
+	 * Set the objset user_ptr to track its zfsvfs.
+	 */
+	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+
+	return (0);
+}
+
+void
+zfsvfs_free(zfsvfs_t *zfsvfs)
+{
+	int i;
+
+	zfs_fuid_destroy(zfsvfs);
+
+	mutex_destroy(&zfsvfs->z_znodes_lock);
+	mutex_destroy(&zfsvfs->z_lock);
+	ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
+	list_destroy(&zfsvfs->z_all_znodes);
+	ZFS_TEARDOWN_DESTROY(zfsvfs);
+	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
+	rw_destroy(&zfsvfs->z_fuid_lock);
+	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
+	dataset_kstats_destroy(&zfsvfs->z_kstat);
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+}
+
+static void
+zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
+{
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	if (zfsvfs->z_vfs) {
+		if (zfsvfs->z_use_fuids) {
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
+		} else {
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
+		}
+	}
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+}
+
+static int
+zfs_domount(vfs_t *vfsp, char *osname)
+{
+	uint64_t recordsize, fsid_guid;
+	int error = 0;
+	zfsvfs_t *zfsvfs;
+
+	ASSERT3P(vfsp, !=, NULL);
+	ASSERT3P(osname, !=, NULL);
+
+	error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
+	if (error)
+		return (error);
+	zfsvfs->z_vfs = vfsp;
+
+	if ((error = dsl_prop_get_integer(osname,
+	    "recordsize", &recordsize, NULL)))
+		goto out;
+	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
+	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
+
+	vfsp->vfs_data = zfsvfs;
+	vfsp->mnt_flag |= MNT_LOCAL;
+	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
+	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
+	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
+	/*
+	 * This can cause a loss of coherence between ARC and page cache
+	 * on ZoF - unclear if the problem is in FreeBSD or ZoF
+	 */
+	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
+	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
+	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
+
+#if defined(_KERNEL) && !defined(KMEM_DEBUG)
+	vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
+#endif
+	/*
+	 * The fsid is 64 bits, composed of an 8-bit fs type, which
+	 * separates our fsid from any other filesystem types, and a
+	 * 56-bit objset unique ID.  The objset unique ID is unique to
+	 * all objsets open on this system, provided by unique_create().
+	 * The 8-bit fs type must be put in the low bits of fsid[1]
+	 * because that's where other Solaris filesystems put it.
+	 */
+	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
+	ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0);
+	vfsp->vfs_fsid.val[0] = fsid_guid;
+	vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) |
+	    (vfsp->mnt_vfc->vfc_typenum & 0xFF);
+
+	/*
+	 * Set features for file system.
+	 */
+	zfs_set_fuid_feature(zfsvfs);
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
+		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
+		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
+	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
+		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
+		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
+	}
+	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
+
+	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
+		uint64_t pval;
+
+		atime_changed_cb(zfsvfs, B_FALSE);
+		readonly_changed_cb(zfsvfs, B_TRUE);
+		if ((error = dsl_prop_get_integer(osname,
+		    "xattr", &pval, NULL)))
+			goto out;
+		xattr_changed_cb(zfsvfs, pval);
+		if ((error = dsl_prop_get_integer(osname,
+		    "acltype", &pval, NULL)))
+			goto out;
+		acl_type_changed_cb(zfsvfs, pval);
+		zfsvfs->z_issnap = B_TRUE;
+		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
+
+		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+	} else {
+		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
+			goto out;
+	}
+
+	vfs_mountedfrom(vfsp, osname);
+
+	if (!zfsvfs->z_issnap)
+		zfsctl_create(zfsvfs);
+out:
+	if (error) {
+		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
+		zfsvfs_free(zfsvfs);
+	} else {
+		atomic_inc_32(&zfs_active_fs_count);
+	}
+
+	return (error);
+}
+
+static void
+zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
+{
+	objset_t *os = zfsvfs->z_os;
+
+	if (!dmu_objset_is_snapshot(os))
+		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
+}
+
+static int
+getpoolname(const char *osname, char *poolname)
+{
+	char *p;
+
+	p = strchr(osname, '/');
+	if (p == NULL) {
+		if (strlen(osname) >= MAXNAMELEN)
+			return (ENAMETOOLONG);
+		(void) strcpy(poolname, osname);
+	} else {
+		if (p - osname >= MAXNAMELEN)
+			return (ENAMETOOLONG);
+		(void) strncpy(poolname, osname, p - osname);
+		poolname[p - osname] = '\0';
+	}
+	return (0);
+}
+
+static void
+fetch_osname_options(char *name, bool *checkpointrewind)
+{
+
+	if (name[0] == '!') {
+		*checkpointrewind = true;
+		memmove(name, name + 1, strlen(name));
+	} else {
+		*checkpointrewind = false;
+	}
+}
+
+/*ARGSUSED*/
+static int
+zfs_mount(vfs_t *vfsp)
+{
+	kthread_t	*td = curthread;
+	vnode_t		*mvp = vfsp->mnt_vnodecovered;
+	cred_t		*cr = td->td_ucred;
+	char		*osname;
+	int		error = 0;
+	int		canwrite;
+	bool		checkpointrewind, isctlsnap = false;
+
+	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * If full-owner-access is enabled and delegated administration is
+	 * turned on, we must set nosuid.
+	 */
+	if (zfs_super_owner &&
+	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
+		secpolicy_fs_mount_clearopts(cr, vfsp);
+	}
+
+	fetch_osname_options(osname, &checkpointrewind);
+	isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) &&
+	    strchr(osname, '@') != NULL);
+
+	/*
+	 * Check for mount privilege?
+	 *
+	 * If we don't have privilege then see if
+	 * we have local permission to allow it
+	 */
+	error = secpolicy_fs_mount(cr, mvp, vfsp);
+	if (error && isctlsnap) {
+		secpolicy_fs_mount_clearopts(cr, vfsp);
+	} else if (error) {
+		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
+			goto out;
+
+		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
+			vattr_t		vattr;
+
+			/*
+			 * Make sure user is the owner of the mount point
+			 * or has sufficient privileges.
+			 */
+
+			vattr.va_mask = AT_UID;
+
+			vn_lock(mvp, LK_SHARED | LK_RETRY);
+			if (VOP_GETATTR(mvp, &vattr, cr)) {
+				VOP_UNLOCK1(mvp);
+				goto out;
+			}
+
+			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
+			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
+				VOP_UNLOCK1(mvp);
+				goto out;
+			}
+			VOP_UNLOCK1(mvp);
+		}
+
+		secpolicy_fs_mount_clearopts(cr, vfsp);
+	}
+
+	/*
+	 * Refuse to mount a filesystem if we are in a local zone and the
+	 * dataset is not visible.
+	 */
+	if (!INGLOBALZONE(curproc) &&
+	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
+		boolean_t mount_snapshot = B_FALSE;
+
+		/*
+		 * Snapshots may be mounted in .zfs for unjailed datasets
+		 * if allowed by the jail param zfs.mount_snapshot.
+		 */
+		if (isctlsnap) {
+			struct prison *pr;
+			struct zfs_jailparam *zjp;
+
+			pr = curthread->td_ucred->cr_prison;
+			mtx_lock(&pr->pr_mtx);
+			zjp = osd_jail_get(pr, zfs_jailparam_slot);
+			mtx_unlock(&pr->pr_mtx);
+			if (zjp && zjp->mount_snapshot)
+				mount_snapshot = B_TRUE;
+		}
+		if (!mount_snapshot) {
+			error = SET_ERROR(EPERM);
+			goto out;
+		}
+	}
+
+	vfsp->vfs_flag |= MNT_NFS4ACLS;
+
+	/*
+	 * When doing a remount, we simply refresh our temporary properties
+	 * according to those options set in the current VFS options.
+	 */
+	if (vfsp->vfs_flag & MS_REMOUNT) {
+		zfsvfs_t *zfsvfs = vfsp->vfs_data;
+
+		/*
+		 * Refresh mount options with z_teardown_lock blocking I/O while
+		 * the filesystem is in an inconsistent state.
+		 * The lock also serializes this code with filesystem
+		 * manipulations between entry to zfs_suspend_fs() and return
+		 * from zfs_resume_fs().
+		 */
+		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
+		zfs_unregister_callbacks(zfsvfs);
+		error = zfs_register_callbacks(vfsp);
+		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+		goto out;
+	}
+
+	/* Initial root mount: try hard to import the requested root pool. */
+	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
+	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
+		char pname[MAXNAMELEN];
+
+		error = getpoolname(osname, pname);
+		if (error == 0)
+			error = spa_import_rootpool(pname, checkpointrewind);
+		if (error)
+			goto out;
+	}
+	DROP_GIANT();
+	error = zfs_domount(vfsp, osname);
+	PICKUP_GIANT();
+
+out:
+	return (error);
+}
+
+static int
+zfs_statfs(vfs_t *vfsp, struct statfs *statp)
+{
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+	uint64_t refdbytes, availbytes, usedobjs, availobjs;
+
+	statp->f_version = STATFS_VERSION;
+
+	ZFS_ENTER(zfsvfs);
+
+	dmu_objset_space(zfsvfs->z_os,
+	    &refdbytes, &availbytes, &usedobjs, &availobjs);
+
+	/*
+	 * The underlying storage pool actually uses multiple block sizes.
+	 * We report the fragsize as the smallest block size we support,
+	 * and we report our blocksize as the filesystem's maximum blocksize.
+	 */
+	statp->f_bsize = SPA_MINBLOCKSIZE;
+	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
+
+	/*
+	 * The following report "total" blocks of various kinds in the
+	 * file system, but reported in terms of f_frsize - the
+	 * "fragment" size.
+	 */
+
+	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
+	statp->f_bfree = availbytes / statp->f_bsize;
+	statp->f_bavail = statp->f_bfree; /* no root reservation */
+
+	/*
+	 * statvfs() should really be called statufs(), because it assumes
+	 * static metadata.  ZFS doesn't preallocate files, so the best
+	 * we can do is report the max that could possibly fit in f_files,
+	 * and that minus the number actually used in f_ffree.
+	 * For f_ffree, report the smaller of the number of object available
+	 * and the number of blocks (each object will take at least a block).
+	 */
+	statp->f_ffree = MIN(availobjs, statp->f_bfree);
+	statp->f_files = statp->f_ffree + usedobjs;
+
+	/*
+	 * We're a zfs filesystem.
+	 */
+	strlcpy(statp->f_fstypename, "zfs",
+	    sizeof (statp->f_fstypename));
+
+	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
+	    sizeof (statp->f_mntfromname));
+	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
+	    sizeof (statp->f_mntonname));
+
+	statp->f_namemax = MAXNAMELEN - 1;
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+static int
+zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
+{
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+	znode_t *rootzp;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+
+	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+	if (error == 0)
+		*vpp = ZTOV(rootzp);
+
+	ZFS_EXIT(zfsvfs);
+
+	if (error == 0) {
+		error = vn_lock(*vpp, flags);
+		if (error != 0) {
+			VN_RELE(*vpp);
+			*vpp = NULL;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Teardown the zfsvfs::z_os.
+ *
+ * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
+ * and 'z_teardown_inactive_lock' held.
+ */
+static int
+zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
+{
+	znode_t	*zp;
+	dsl_dir_t *dd;
+
+	/*
+	 * If someone has not already unmounted this file system,
+	 * drain the zrele_taskq to ensure all active references to the
+	 * zfsvfs_t have been handled only then can it be safely destroyed.
+	 */
+	if (zfsvfs->z_os) {
+		/*
+		 * If we're unmounting we have to wait for the list to
+		 * drain completely.
+		 *
+		 * If we're not unmounting there's no guarantee the list
+		 * will drain completely, but zreles run from the taskq
+		 * may add the parents of dir-based xattrs to the taskq
+		 * so we want to wait for these.
+		 *
+		 * We can safely read z_nr_znodes without locking because the
+		 * VFS has already blocked operations which add to the
+		 * z_all_znodes list and thus increment z_nr_znodes.
+		 */
+		int round = 0;
+		while (zfsvfs->z_nr_znodes > 0) {
+			taskq_wait_outstanding(dsl_pool_zrele_taskq(
+			    dmu_objset_pool(zfsvfs->z_os)), 0);
+			if (++round > 1 && !unmounting)
+				break;
+		}
+	}
+	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
+
+	if (!unmounting) {
+		/*
+		 * We purge the parent filesystem's vfsp as the parent
+		 * filesystem and all of its snapshots have their vnode's
+		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
+		 * 'z_parent' is self referential for non-snapshots.
+		 */
+#ifdef FREEBSD_NAMECACHE
+#if __FreeBSD_version >= 1300117
+		cache_purgevfs(zfsvfs->z_parent->z_vfs);
+#else
+		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
+#endif
+#endif
+	}
+
+	/*
+	 * Close the zil. NB: Can't close the zil while zfs_inactive
+	 * threads are blocked as zil_close can call zfs_inactive.
+	 */
+	if (zfsvfs->z_log) {
+		zil_close(zfsvfs->z_log);
+		zfsvfs->z_log = NULL;
+	}
+
+	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
+
+	/*
+	 * If we are not unmounting (ie: online recv) and someone already
+	 * unmounted this file system while we were doing the switcheroo,
+	 * or a reopen of z_os failed then just bail out now.
+	 */
+	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
+		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
+		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+		return (SET_ERROR(EIO));
+	}
+
+	/*
+	 * At this point there are no vops active, and any new vops will
+	 * fail with EIO since we have z_teardown_lock for writer (only
+	 * relevant for forced unmount).
+	 *
+	 * Release all holds on dbufs.
+	 */
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
+	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+		if (zp->z_sa_hdl != NULL) {
+			zfs_znode_dmu_fini(zp);
+		}
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	/*
+	 * If we are unmounting, set the unmounted flag and let new vops
+	 * unblock.  zfs_inactive will have the unmounted behavior, and all
+	 * other vops will fail with EIO.
+	 */
+	if (unmounting) {
+		zfsvfs->z_unmounted = B_TRUE;
+		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
+		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+	}
+
+	/*
+	 * z_os will be NULL if there was an error in attempting to reopen
+	 * zfsvfs, so just return as the properties had already been
+	 * unregistered and cached data had been evicted before.
+	 */
+	if (zfsvfs->z_os == NULL)
+		return (0);
+
+	/*
+	 * Unregister properties.
+	 */
+	zfs_unregister_callbacks(zfsvfs);
+
+	/*
+	 * Evict cached data
+	 */
+	if (!zfs_is_readonly(zfsvfs))
+		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+	dmu_objset_evict_dbufs(zfsvfs->z_os);
+	dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
+	dsl_dir_cancel_waiters(dd);
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+zfs_umount(vfs_t *vfsp, int fflag)
+{
+	kthread_t *td = curthread;
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+	objset_t *os;
+	cred_t *cr = td->td_ucred;
+	int ret;
+
+	ret = secpolicy_fs_unmount(cr, vfsp);
+	if (ret) {
+		if (dsl_deleg_access((char *)vfsp->vfs_resource,
+		    ZFS_DELEG_PERM_MOUNT, cr))
+			return (ret);
+	}
+
+	/*
+	 * Unmount any snapshots mounted under .zfs before unmounting the
+	 * dataset itself.
+	 */
+	if (zfsvfs->z_ctldir != NULL) {
+		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
+			return (ret);
+	}
+
+	if (fflag & MS_FORCE) {
+		/*
+		 * Mark file system as unmounted before calling
+		 * vflush(FORCECLOSE). This way we ensure no future vnops
+		 * will be called and risk operating on DOOMED vnodes.
+		 */
+		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
+		zfsvfs->z_unmounted = B_TRUE;
+		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+	}
+
+	/*
+	 * Flush all the files.
+	 */
+	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
+	if (ret != 0)
+		return (ret);
+	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
+	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
+		taskqueue_drain(zfsvfs_taskq->tq_queue,
+		    &zfsvfs->z_unlinked_drain_task);
+
+	VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));
+	os = zfsvfs->z_os;
+
+	/*
+	 * z_os will be NULL if there was an error in
+	 * attempting to reopen zfsvfs.
+	 */
+	if (os != NULL) {
+		/*
+		 * Unset the objset user_ptr.
+		 */
+		mutex_enter(&os->os_user_ptr_lock);
+		dmu_objset_set_user(os, NULL);
+		mutex_exit(&os->os_user_ptr_lock);
+
+		/*
+		 * Finally release the objset
+		 */
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
+	}
+
+	/*
+	 * We can now safely destroy the '.zfs' directory node.
+	 */
+	if (zfsvfs->z_ctldir != NULL)
+		zfsctl_destroy(zfsvfs);
+	zfs_freevfs(vfsp);
+
+	return (0);
+}
+
+static int
+zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
+{
+	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
+	znode_t		*zp;
+	int 		err;
+
+	/*
+	 * zfs_zget() can't operate on virtual entries like .zfs/ or
+	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
+	 * This will make NFS to switch to LOOKUP instead of using VGET.
+	 */
+	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
+	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
+		return (EOPNOTSUPP);
+
+	ZFS_ENTER(zfsvfs);
+	err = zfs_zget(zfsvfs, ino, &zp);
+	if (err == 0 && zp->z_unlinked) {
+		vrele(ZTOV(zp));
+		err = EINVAL;
+	}
+	if (err == 0)
+		*vpp = ZTOV(zp);
+	ZFS_EXIT(zfsvfs);
+	if (err == 0) {
+		err = vn_lock(*vpp, flags);
+		if (err != 0)
+			vrele(*vpp);
+	}
+	if (err != 0)
+		*vpp = NULL;
+	return (err);
+}
+
+static int
+#if __FreeBSD_version >= 1300098
+zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int *secflavors)
+#else
+zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int **secflavors)
+#endif
+{
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+
+	/*
+	 * If this is regular file system vfsp is the same as
+	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
+	 * zfsvfs->z_parent->z_vfs represents parent file system
+	 * which we have to use here, because only this file system
+	 * has mnt_export configured.
+	 */
+	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
+	    credanonp, numsecflavors, secflavors));
+}
+
+CTASSERT(SHORT_FID_LEN <= sizeof (struct fid));
+CTASSERT(LONG_FID_LEN <= sizeof (struct fid));
+
+static int
+zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
+{
+	struct componentname cn;
+	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
+	znode_t		*zp;
+	vnode_t		*dvp;
+	uint64_t	object = 0;
+	uint64_t	fid_gen = 0;
+	uint64_t	setgen = 0;
+	uint64_t	gen_mask;
+	uint64_t	zp_gen;
+	int 		i, err;
+
+	*vpp = NULL;
+
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * On FreeBSD we can get snapshot's mount point or its parent file
+	 * system mount point depending if snapshot is already mounted or not.
+	 */
+	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
+		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
+		uint64_t	objsetid = 0;
+
+		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
+			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
+
+		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
+			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
+
+		ZFS_EXIT(zfsvfs);
+
+		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
+		if (err)
+			return (SET_ERROR(EINVAL));
+		ZFS_ENTER(zfsvfs);
+	}
+
+	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
+		zfid_short_t	*zfid = (zfid_short_t *)fidp;
+
+		for (i = 0; i < sizeof (zfid->zf_object); i++)
+			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
+
+		for (i = 0; i < sizeof (zfid->zf_gen); i++)
+			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
+	} else {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (fidp->fid_len == LONG_FID_LEN && setgen != 0) {
+		ZFS_EXIT(zfsvfs);
+		dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n",
+		    (u_longlong_t)fid_gen, (u_longlong_t)setgen);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
+	 * directory tree. If the object == zfsvfs->z_shares_dir, then
+	 * we are in the .zfs/shares directory tree.
+	 */
+	if ((fid_gen == 0 &&
+	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
+	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
+		ZFS_EXIT(zfsvfs);
+		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
+		if (object == ZFSCTL_INO_SNAPDIR) {
+			cn.cn_nameptr = "snapshot";
+			cn.cn_namelen = strlen(cn.cn_nameptr);
+			cn.cn_nameiop = LOOKUP;
+			cn.cn_flags = ISLASTCN | LOCKLEAF;
+			cn.cn_lkflags = flags;
+			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
+			vput(dvp);
+		} else if (object == zfsvfs->z_shares_dir) {
+			/*
+			 * XXX This branch must not be taken,
+			 * if it is, then the lookup below will
+			 * explode.
+			 */
+			cn.cn_nameptr = "shares";
+			cn.cn_namelen = strlen(cn.cn_nameptr);
+			cn.cn_nameiop = LOOKUP;
+			cn.cn_flags = ISLASTCN;
+			cn.cn_lkflags = flags;
+			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
+			vput(dvp);
+		} else {
+			*vpp = dvp;
+		}
+		return (err);
+	}
+
+	gen_mask = -1ULL >> (64 - 8 * i);
+
+	dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object,
+	    (u_longlong_t)fid_gen,
+	    (u_longlong_t)gen_mask);
+	if ((err = zfs_zget(zfsvfs, object, &zp))) {
+		ZFS_EXIT(zfsvfs);
+		return (err);
+	}
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+	    sizeof (uint64_t));
+	zp_gen = zp_gen & gen_mask;
+	if (zp_gen == 0)
+		zp_gen = 1;
+	if (zp->z_unlinked || zp_gen != fid_gen) {
+		dprintf("znode gen (%llu) != fid gen (%llu)\n",
+		    (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
+		vrele(ZTOV(zp));
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	*vpp = ZTOV(zp);
+	ZFS_EXIT(zfsvfs);
+	err = vn_lock(*vpp, flags);
+	if (err == 0)
+		vnode_create_vobject(*vpp, zp->z_size, curthread);
+	else
+		*vpp = NULL;
+	return (err);
+}
+
+/*
+ * Block out VOPs and close zfsvfs_t::z_os
+ *
+ * Note, if successful, then we return with the 'z_teardown_lock' and
+ * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
+ * dataset and objset intact so that they can be atomically handed off during
+ * a subsequent rollback or recv operation and the resume thereafter.
+ */
+int
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
+{
+	int error;
+
+	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
+		return (error);
+
+	return (0);
+}
+
+/*
+ * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
+ * is an invariant across any of the operations that can be performed while the
+ * filesystem was suspended.  Whether it succeeded or failed, the preconditions
+ * are the same: the relevant objset and associated dataset are owned by
+ * zfsvfs, held, and long held on entry.
+ */
+int
+zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
+{
+	int err;
+	znode_t *zp;
+
+	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
+	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
+
+	/*
+	 * We already own this, so just update the objset_t, as the one we
+	 * had before may have been evicted.
+	 */
+	objset_t *os;
+	VERIFY3P(ds->ds_owner, ==, zfsvfs);
+	VERIFY(dsl_dataset_long_held(ds));
+	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
+	dsl_pool_config_enter(dp, FTAG);
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_config_exit(dp, FTAG);
+
+	err = zfsvfs_init(zfsvfs, os);
+	if (err != 0)
+		goto bail;
+
+	ds->ds_dir->dd_activity_cancelled = B_FALSE;
+	VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));
+
+	zfs_set_fuid_feature(zfsvfs);
+
+	/*
+	 * Attempt to re-establish all the active znodes with
+	 * their dbufs.  If a zfs_rezget() fails, then we'll let
+	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
+	 * when they try to use their znode.
+	 */
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
+	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+		(void) zfs_rezget(zp);
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+bail:
+	/* release the VOPs */
+	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
+	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+
+	if (err) {
+		/*
+		 * Since we couldn't setup the sa framework, try to force
+		 * unmount this file system.
+		 */
+		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
+			vfs_ref(zfsvfs->z_vfs);
+			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
+		}
+	}
+	return (err);
+}
+
+static void
+zfs_freevfs(vfs_t *vfsp)
+{
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+
+	zfsvfs_free(zfsvfs);
+
+	atomic_dec_32(&zfs_active_fs_count);
+}
+
+#ifdef __i386__
+static int desiredvnodes_backup;
+#include <sys/vmmeter.h>
+
+
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#endif
+
+static void
+zfs_vnodes_adjust(void)
+{
+#ifdef __i386__
+	int newdesiredvnodes;
+
+	desiredvnodes_backup = desiredvnodes;
+
+	/*
+	 * We calculate newdesiredvnodes the same way it is done in
+	 * vntblinit(). If it is equal to desiredvnodes, it means that
+	 * it wasn't tuned by the administrator and we can tune it down.
+	 */
+	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
+	    vm_kmem_size / (5 * (sizeof (struct vm_object) +
+	    sizeof (struct vnode))));
+	if (newdesiredvnodes == desiredvnodes)
+		desiredvnodes = (3 * newdesiredvnodes) / 4;
+#endif
+}
+
+static void
+zfs_vnodes_adjust_back(void)
+{
+
+#ifdef __i386__
+	desiredvnodes = desiredvnodes_backup;
+#endif
+}
+
+void
+zfs_init(void)
+{
+
+	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
+
+	/*
+	 * Initialize .zfs directory structures
+	 */
+	zfsctl_init();
+
+	/*
+	 * Initialize znode cache, vnode ops, etc...
+	 */
+	zfs_znode_init();
+
+	/*
+	 * Reduce number of vnodes. Originally number of vnodes is calculated
+	 * with UFS inode in mind. We reduce it here, because it's too big for
+	 * ZFS/i386.
+	 */
+	zfs_vnodes_adjust();
+
+	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
+
+	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
+}
+
+void
+zfs_fini(void)
+{
+	taskq_destroy(zfsvfs_taskq);
+	zfsctl_fini();
+	zfs_znode_fini();
+	zfs_vnodes_adjust_back();
+}
+
+int
+zfs_busy(void)
+{
+	return (zfs_active_fs_count != 0);
+}
+
+/*
+ * Release VOPs and unmount a suspended filesystem.
+ */
+int
+zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
+{
+	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
+	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
+
+	/*
+	 * We already own this, so just hold and rele it to update the
+	 * objset_t, as the one we had before may have been evicted.
+	 */
+	objset_t *os;
+	VERIFY3P(ds->ds_owner, ==, zfsvfs);
+	VERIFY(dsl_dataset_long_held(ds));
+	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
+	dsl_pool_config_enter(dp, FTAG);
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_config_exit(dp, FTAG);
+	zfsvfs->z_os = os;
+
+	/* release the VOPs */
+	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
+	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+
+	/*
+	 * Try to force unmount this file system.
+	 */
+	(void) zfs_umount(zfsvfs->z_vfs, 0);
+	zfsvfs->z_unmounted = B_TRUE;
+	return (0);
+}
+
+int
+zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
+{
+	int error;
+	objset_t *os = zfsvfs->z_os;
+	dmu_tx_t *tx;
+
+	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
+		return (SET_ERROR(EINVAL));
+
+	if (newvers < zfsvfs->z_version)
+		return (SET_ERROR(EINVAL));
+
+	if (zfs_spa_version_map(newvers) >
+	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
+		return (SET_ERROR(ENOTSUP));
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+		    ZFS_SA_ATTRS);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	}
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		return (error);
+	}
+
+	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+	    8, 1, &newvers, tx);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		return (error);
+	}
+
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		uint64_t sa_obj;
+
+		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+		    SPA_VERSION_SA);
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+
+		error = zap_add(os, MASTER_NODE_OBJ,
+		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT0(error);
+
+		VERIFY0(sa_set_sa_object(os, sa_obj));
+		sa_register_update_callback(os, zfs_sa_upgrade);
+	}
+
+	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
+	    "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
+	    (uintmax_t)newvers);
+	dmu_tx_commit(tx);
+
+	zfsvfs->z_version = newvers;
+	os->os_version = newvers;
+
+	zfs_set_fuid_feature(zfsvfs);
+
+	return (0);
+}
+
+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION) {
+		pname = ZPL_VERSION_STR;
+	} else {
+		pname = zfs_prop_to_name(prop);
+	}
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_NFSV4;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
+/*
+ * Return true if the corresponding vfs's unmounted flag is set.
+ * Otherwise return false.
+ * If this function returns true we know VFS unmount has been initiated.
+ */
+boolean_t
+zfs_get_vfs_flag_unmounted(objset_t *os)
+{
+	zfsvfs_t *zfvp;
+	boolean_t unmounted = B_FALSE;
+
+	ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS);
+
+	mutex_enter(&os->os_user_ptr_lock);
+	zfvp = dmu_objset_get_user(os);
+	if (zfvp != NULL && zfvp->z_vfs != NULL &&
+	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
+		unmounted = B_TRUE;
+	mutex_exit(&os->os_user_ptr_lock);
+
+	return (unmounted);
+}
+
+#ifdef _KERNEL
+void
+zfsvfs_update_fromname(const char *oldname, const char *newname)
+{
+	char tmpbuf[MAXPATHLEN];
+	struct mount *mp;
+	char *fromname;
+	size_t oldlen;
+
+	oldlen = strlen(oldname);
+
+	mtx_lock(&mountlist_mtx);
+	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
+		fromname = mp->mnt_stat.f_mntfromname;
+		if (strcmp(fromname, oldname) == 0) {
+			(void) strlcpy(fromname, newname,
+			    sizeof (mp->mnt_stat.f_mntfromname));
+			continue;
+		}
+		if (strncmp(fromname, oldname, oldlen) == 0 &&
+		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
+			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
+			    newname, fromname + oldlen);
+			(void) strlcpy(fromname, tmpbuf,
+			    sizeof (mp->mnt_stat.f_mntfromname));
+			continue;
+		}
+	}
+	mtx_unlock(&mountlist_mtx);
+}
+#endif
+
+/*
+ * Find a prison with ZFS info.
+ * Return the ZFS info and the (locked) prison.
+ */
+static struct zfs_jailparam *
+zfs_jailparam_find(struct prison *spr, struct prison **prp)
+{
+	struct prison *pr;
+	struct zfs_jailparam *zjp;
+
+	for (pr = spr; ; pr = pr->pr_parent) {
+		mtx_lock(&pr->pr_mtx);
+		if (pr == &prison0) {
+			zjp = &zfs_jailparam0;
+			break;
+		}
+		zjp = osd_jail_get(pr, zfs_jailparam_slot);
+		if (zjp != NULL)
+			break;
+		mtx_unlock(&pr->pr_mtx);
+	}
+	*prp = pr;
+
+	return (zjp);
+}
+
+/*
+ * Ensure a prison has its own ZFS info.  If zjpp is non-null, point it to the
+ * ZFS info and lock the prison.
+ */
+static void
+zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp)
+{
+	struct prison *ppr;
+	struct zfs_jailparam *zjp, *nzjp;
+	void **rsv;
+
+	/* If this prison already has ZFS info, return that. */
+	zjp = zfs_jailparam_find(pr, &ppr);
+	if (ppr == pr)
+		goto done;
+
+	/*
+	 * Allocate a new info record.  Then check again, in case something
+	 * changed during the allocation.
+	 */
+	mtx_unlock(&ppr->pr_mtx);
+	nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK);
+	rsv = osd_reserve(zfs_jailparam_slot);
+	zjp = zfs_jailparam_find(pr, &ppr);
+	if (ppr == pr) {
+		free(nzjp, M_PRISON);
+		osd_free_reserved(rsv);
+		goto done;
+	}
+	/* Inherit the initial values from the ancestor. */
+	mtx_lock(&pr->pr_mtx);
+	(void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp);
+	(void) memcpy(nzjp, zjp, sizeof (*zjp));
+	zjp = nzjp;
+	mtx_unlock(&ppr->pr_mtx);
+done:
+	if (zjpp != NULL)
+		*zjpp = zjp;
+	else
+		mtx_unlock(&pr->pr_mtx);
+}
+
+/*
+ * Jail OSD methods for ZFS VFS info.
+ */
+static int
+zfs_jailparam_create(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct vfsoptlist *opts = data;
+	int jsys;
+
+	if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 &&
+	    jsys == JAIL_SYS_INHERIT)
+		return (0);
+	/*
+	 * Inherit a prison's initial values from its parent
+	 * (different from JAIL_SYS_INHERIT which also inherits changes).
+	 */
+	zfs_jailparam_alloc(pr, NULL);
+	return (0);
+}
+
+static int
+zfs_jailparam_get(void *obj, void *data)
+{
+	struct prison *ppr, *pr = obj;
+	struct vfsoptlist *opts = data;
+	struct zfs_jailparam *zjp;
+	int jsys, error;
+
+	zjp = zfs_jailparam_find(pr, &ppr);
+	jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+	error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys));
+	if (error != 0 && error != ENOENT)
+		goto done;
+	if (jsys == JAIL_SYS_NEW) {
+		error = vfs_setopt(opts, "zfs.mount_snapshot",
+		    &zjp->mount_snapshot, sizeof (zjp->mount_snapshot));
+		if (error != 0 && error != ENOENT)
+			goto done;
+	} else {
+		/*
+		 * If this prison is inheriting its ZFS info, report
+		 * empty/zero parameters.
+		 */
+		static int mount_snapshot = 0;
+
+		error = vfs_setopt(opts, "zfs.mount_snapshot",
+		    &mount_snapshot, sizeof (mount_snapshot));
+		if (error != 0 && error != ENOENT)
+			goto done;
+	}
+	error = 0;
+done:
+	mtx_unlock(&ppr->pr_mtx);
+	return (error);
+}
+
+static int
+zfs_jailparam_set(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *ppr;
+	struct vfsoptlist *opts = data;
+	int error, jsys, mount_snapshot;
+
+	/* Set the parameters, which should be correct. */
+	error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
+	if (error == ENOENT)
+		jsys = -1;
+	error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
+	    sizeof (mount_snapshot));
+	if (error == ENOENT)
+		mount_snapshot = -1;
+	else
+		jsys = JAIL_SYS_NEW;
+	if (jsys == JAIL_SYS_NEW) {
+		/* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */
+		struct zfs_jailparam *zjp;
+
+		/*
+		 * A child jail cannot have more permissions than its parent
+		 */
+		if (pr->pr_parent != &prison0) {
+			zjp = zfs_jailparam_find(pr->pr_parent, &ppr);
+			mtx_unlock(&ppr->pr_mtx);
+			if (zjp->mount_snapshot < mount_snapshot) {
+				return (EPERM);
+			}
+		}
+		zfs_jailparam_alloc(pr, &zjp);
+		if (mount_snapshot != -1)
+			zjp->mount_snapshot = mount_snapshot;
+		mtx_unlock(&pr->pr_mtx);
+	} else {
+		/* "zfs=inherit": inherit the parent's ZFS info. */
+		mtx_lock(&pr->pr_mtx);
+		osd_jail_del(pr, zfs_jailparam_slot);
+		mtx_unlock(&pr->pr_mtx);
+	}
+	return (0);
+}
+
+static int
+zfs_jailparam_check(void *obj __unused, void *data)
+{
+	struct vfsoptlist *opts = data;
+	int error, jsys, mount_snapshot;
+
+	/* Check that the parameters are correct. */
+	error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
+			return (EINVAL);
+	}
+	error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
+	    sizeof (mount_snapshot));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		if (mount_snapshot != 0 && mount_snapshot != 1)
+			return (EINVAL);
+	}
+	return (0);
+}
+
+static void
+zfs_jailparam_destroy(void *data)
+{
+
+	free(data, M_PRISON);
+}
+
+static void
+zfs_jailparam_sysinit(void *arg __unused)
+{
+	struct prison *pr;
+	osd_method_t  methods[PR_MAXMETHOD] = {
+		[PR_METHOD_CREATE] = zfs_jailparam_create,
+		[PR_METHOD_GET] = zfs_jailparam_get,
+		[PR_METHOD_SET] = zfs_jailparam_set,
+		[PR_METHOD_CHECK] = zfs_jailparam_check,
+	};
+
+	zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods);
+	/* Copy the defaults to any existing prisons. */
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list)
+		zfs_jailparam_alloc(pr, NULL);
+	sx_sunlock(&allprison_lock);
+}
+
+static void
+zfs_jailparam_sysuninit(void *arg __unused)
+{
+
+	osd_jail_deregister(zfs_jailparam_slot);
+}
+
+SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY,
+	zfs_jailparam_sysinit, NULL);
+SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY,
+	zfs_jailparam_sysuninit, NULL);

diff --git a/zfs/module/os/freebsd/zfs/zfs_vnops_os.c b/zfs/module/os/freebsd/zfs/zfs_vnops_os.c
new file mode 100644
index 0000000..ea6388d
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_vnops_os.c

@@ -0,0 +1,6265 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 Nexenta Systems, Inc.
+ */
+
+/* Portions Copyright 2007 Jeremy Teo */
+/* Portions Copyright 2010 Robert Milkowski */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/resource.h>
+#include <sys/vfs.h>
+#include <sys/endian.h>
+#include <sys/vm.h>
+#include <sys/vnode.h>
+#if __FreeBSD_version >= 1300102
+#include <sys/smr.h>
+#endif
+#include <sys/dirent.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/taskq.h>
+#include <sys/uio.h>
+#include <sys/atomic.h>
+#include <sys/namei.h>
+#include <sys/mman.h>
+#include <sys/cmn_err.h>
+#include <sys/kdb.h>
+#include <sys/sysproto.h>
+#include <sys/errno.h>
+#include <sys/unistd.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/dbuf.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+#include <sys/filio.h>
+#include <sys/sid.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_rlock.h>
+#include <sys/extdirent.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/sched.h>
+#include <sys/acl.h>
+#include <sys/vmmeter.h>
+#include <vm/vm_param.h>
+#include <sys/zil.h>
+#include <sys/zfs_vnops.h>
+
+#include <vm/vm_object.h>
+
+#include <sys/extattr.h>
+#include <sys/priv.h>
+
+#ifndef VN_OPEN_INVFS
+#define	VN_OPEN_INVFS	0x0
+#endif
+
+VFS_SMR_DECLARE;
+
+#if __FreeBSD_version < 1300103
+#define	NDFREE_PNBUF(ndp)	NDFREE((ndp), NDF_ONLY_PNBUF)
+#endif
+
+#if __FreeBSD_version >= 1300047
+#define	vm_page_wire_lock(pp)
+#define	vm_page_wire_unlock(pp)
+#else
+#define	vm_page_wire_lock(pp) vm_page_lock(pp)
+#define	vm_page_wire_unlock(pp) vm_page_unlock(pp)
+#endif
+
+#ifdef DEBUG_VFS_LOCKS
+#define	VNCHECKREF(vp)				  \
+	VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp,	\
+	    ("%s: wrong ref counts", __func__));
+#else
+#define	VNCHECKREF(vp)
+#endif
+
+#if __FreeBSD_version >= 1400045
+typedef uint64_t cookie_t;
+#else
+typedef ulong_t cookie_t;
+#endif
+
+/*
+ * Programming rules.
+ *
+ * Each vnode op performs some logical unit of work.  To do this, the ZPL must
+ * properly lock its in-core state, create a DMU transaction, do the work,
+ * record this work in the intent log (ZIL), commit the DMU transaction,
+ * and wait for the intent log to commit if it is a synchronous operation.
+ * Moreover, the vnode ops must work in both normal and log replay context.
+ * The ordering of events is important to avoid deadlocks and references
+ * to freed memory.  The example below illustrates the following Big Rules:
+ *
+ *  (1)	A check must be made in each zfs thread for a mounted file system.
+ *	This is done avoiding races using ZFS_ENTER(zfsvfs).
+ *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
+ *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *	can return EIO from the calling function.
+ *
+ *  (2)	VN_RELE() should always be the last thing except for zil_commit()
+ *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
+ *	First, if it's the last reference, the vnode/znode
+ *	can be freed, so the zp may point to freed memory.  Second, the last
+ *	reference will call zfs_zinactive(), which may induce a lot of work --
+ *	pushing cached pages (which acquires range locks) and syncing out
+ *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
+ *	which could deadlock the system if you were already holding one.
+ *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
+ *
+ *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
+ *	as they can span dmu_tx_assign() calls.
+ *
+ *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
+ *      dmu_tx_assign().  This is critical because we don't want to block
+ *      while holding locks.
+ *
+ *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *	reduces lock contention and CPU usage when we must wait (note that if
+ *	throughput is constrained by the storage, nearly every transaction
+ *	must wait).
+ *
+ *      Note, in particular, that if a lock is sometimes acquired before
+ *      the tx assigns, and sometimes after (e.g. z_lock), then failing
+ *      to use a non-blocking assign can deadlock the system.  The scenario:
+ *
+ *	Thread A has grabbed a lock before calling dmu_tx_assign().
+ *	Thread B is in an already-assigned tx, and blocks for this lock.
+ *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
+ *	forever, because the previous txg can't quiesce until B's tx commits.
+ *
+ *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
+ *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
+ *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
+ *	to indicate that this operation has already called dmu_tx_wait().
+ *	This will ensure that we don't retry forever, waiting a short bit
+ *	each time.
+ *
+ *  (5)	If the operation succeeded, generate the intent log entry for it
+ *	before dropping locks.  This ensures that the ordering of events
+ *	in the intent log matches the order in which they actually occurred.
+ *	During ZIL replay the zfs_log_* functions will update the sequence
+ *	number to indicate the zil transaction has replayed.
+ *
+ *  (6)	At the end of each vnode op, the DMU tx must always commit,
+ *	regardless of whether there were any errors.
+ *
+ *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
+ *	to ensure that synchronous semantics are provided when necessary.
+ *
+ * In general, this is how things should be ordered in each vnode op:
+ *
+ *	ZFS_ENTER(zfsvfs);		// exit if unmounted
+ * top:
+ *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
+ *	rw_enter(...);			// grab any other locks you need
+ *	tx = dmu_tx_create(...);	// get DMU tx
+ *	dmu_tx_hold_*();		// hold each object you might modify
+ *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ *	if (error) {
+ *		rw_exit(...);		// drop locks
+ *		zfs_dirent_unlock(dl);	// unlock directory entry
+ *		VN_RELE(...);		// release held vnodes
+ *		if (error == ERESTART) {
+ *			waited = B_TRUE;
+ *			dmu_tx_wait(tx);
+ *			dmu_tx_abort(tx);
+ *			goto top;
+ *		}
+ *		dmu_tx_abort(tx);	// abort DMU tx
+ *		ZFS_EXIT(zfsvfs);	// finished in zfs
+ *		return (error);		// really out of space
+ *	}
+ *	error = do_real_work();		// do whatever this VOP does
+ *	if (error == 0)
+ *		zfs_log_*(...);		// on success, make ZIL entry
+ *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
+ *	rw_exit(...);			// drop locks
+ *	zfs_dirent_unlock(dl);		// unlock directory entry
+ *	VN_RELE(...);			// release held vnodes
+ *	zil_commit(zilog, foid);	// synchronous when necessary
+ *	ZFS_EXIT(zfsvfs);		// finished in zfs
+ *	return (error);			// done, report error
+ */
+
+/* ARGSUSED */
+static int
+zfs_open(vnode_t **vpp, int flag, cred_t *cr)
+{
+	znode_t	*zp = VTOZ(*vpp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+	    ((flag & FAPPEND) == 0)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
+	    ZTOV(zp)->v_type == VREG &&
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
+		if (fs_vscan(*vpp, cr, 0) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EACCES));
+		}
+	}
+
+	/* Keep a count of the synchronous opens in the znode */
+	if (flag & (FSYNC | FDSYNC))
+		atomic_inc_32(&zp->z_sync_cnt);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
+{
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/* Decrement the synchronous opens in the znode */
+	if ((flag & (FSYNC | FDSYNC)) && (count == 1))
+		atomic_dec_32(&zp->z_sync_cnt);
+
+	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
+	    ZTOV(zp)->v_type == VREG &&
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
+		VERIFY0(fs_vscan(vp, cr, 1));
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
+    int *rvalp)
+{
+	loff_t off;
+	int error;
+
+	switch (com) {
+	case _FIOFFS:
+	{
+		return (0);
+
+		/*
+		 * The following two ioctls are used by bfu.  Faking out,
+		 * necessary to avoid bfu errors.
+		 */
+	}
+	case _FIOGDIO:
+	case _FIOSDIO:
+	{
+		return (0);
+	}
+
+	case F_SEEK_DATA:
+	case F_SEEK_HOLE:
+	{
+		off = *(offset_t *)data;
+		/* offset parameter is in/out */
+		error = zfs_holey(VTOZ(vp), com, &off);
+		if (error)
+			return (error);
+		*(offset_t *)data = off;
+		return (0);
+	}
+	}
+	return (SET_ERROR(ENOTTY));
+}
+
+static vm_page_t
+page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
+{
+	vm_object_t obj;
+	vm_page_t pp;
+	int64_t end;
+
+	/*
+	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
+	 * aligned boundaries, if the range is not aligned.  As a result a
+	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
+	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
+	 * the whole page would be considered clean despite have some
+	 * dirty data.
+	 * For this reason we should shrink the range to DEV_BSIZE aligned
+	 * boundaries before calling vm_page_clear_dirty.
+	 */
+	end = rounddown2(off + nbytes, DEV_BSIZE);
+	off = roundup2(off, DEV_BSIZE);
+	nbytes = end - off;
+
+	obj = vp->v_object;
+	zfs_vmobject_assert_wlocked_12(obj);
+#if __FreeBSD_version < 1300050
+	for (;;) {
+		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
+		    pp->valid) {
+			if (vm_page_xbusied(pp)) {
+				/*
+				 * Reference the page before unlocking and
+				 * sleeping so that the page daemon is less
+				 * likely to reclaim it.
+				 */
+				vm_page_reference(pp);
+				vm_page_lock(pp);
+				zfs_vmobject_wunlock(obj);
+				vm_page_busy_sleep(pp, "zfsmwb", true);
+				zfs_vmobject_wlock(obj);
+				continue;
+			}
+			vm_page_sbusy(pp);
+		} else if (pp != NULL) {
+			ASSERT(!pp->valid);
+			pp = NULL;
+		}
+		if (pp != NULL) {
+			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+			vm_object_pip_add(obj, 1);
+			pmap_remove_write(pp);
+			if (nbytes != 0)
+				vm_page_clear_dirty(pp, off, nbytes);
+		}
+		break;
+	}
+#else
+	vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
+	    VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
+	    VM_ALLOC_IGN_SBUSY);
+	if (pp != NULL) {
+		ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+		vm_object_pip_add(obj, 1);
+		pmap_remove_write(pp);
+		if (nbytes != 0)
+			vm_page_clear_dirty(pp, off, nbytes);
+	}
+#endif
+	return (pp);
+}
+
+static void
+page_unbusy(vm_page_t pp)
+{
+
+	vm_page_sunbusy(pp);
+#if __FreeBSD_version >= 1300041
+	vm_object_pip_wakeup(pp->object);
+#else
+	vm_object_pip_subtract(pp->object, 1);
+#endif
+}
+
+#if __FreeBSD_version > 1300051
+static vm_page_t
+page_hold(vnode_t *vp, int64_t start)
+{
+	vm_object_t obj;
+	vm_page_t m;
+
+	obj = vp->v_object;
+	vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
+	    VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
+	    VM_ALLOC_NOBUSY);
+	return (m);
+}
+#else
+static vm_page_t
+page_hold(vnode_t *vp, int64_t start)
+{
+	vm_object_t obj;
+	vm_page_t pp;
+
+	obj = vp->v_object;
+	zfs_vmobject_assert_wlocked(obj);
+
+	for (;;) {
+		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
+		    pp->valid) {
+			if (vm_page_xbusied(pp)) {
+				/*
+				 * Reference the page before unlocking and
+				 * sleeping so that the page daemon is less
+				 * likely to reclaim it.
+				 */
+				vm_page_reference(pp);
+				vm_page_lock(pp);
+				zfs_vmobject_wunlock(obj);
+				vm_page_busy_sleep(pp, "zfsmwb", true);
+				zfs_vmobject_wlock(obj);
+				continue;
+			}
+
+			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+			vm_page_wire_lock(pp);
+			vm_page_hold(pp);
+			vm_page_wire_unlock(pp);
+
+		} else
+			pp = NULL;
+		break;
+	}
+	return (pp);
+}
+#endif
+
+static void
+page_unhold(vm_page_t pp)
+{
+
+	vm_page_wire_lock(pp);
+#if __FreeBSD_version >= 1300035
+	vm_page_unwire(pp, PQ_ACTIVE);
+#else
+	vm_page_unhold(pp);
+#endif
+	vm_page_wire_unlock(pp);
+}
+
+/*
+ * When a file is memory mapped, we must keep the IO data synchronized
+ * between the DMU cache and the memory mapped pages.  What this means:
+ *
+ * On Write:	If we find a memory mapped page, we write to *both*
+ *		the page and the dmu buffer.
+ */
+void
+update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
+{
+	vm_object_t obj;
+	struct sf_buf *sf;
+	vnode_t *vp = ZTOV(zp);
+	caddr_t va;
+	int off;
+
+	ASSERT3P(vp->v_mount, !=, NULL);
+	obj = vp->v_object;
+	ASSERT3P(obj, !=, NULL);
+
+	off = start & PAGEOFFSET;
+	zfs_vmobject_wlock_12(obj);
+#if __FreeBSD_version >= 1300041
+	vm_object_pip_add(obj, 1);
+#endif
+	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
+		vm_page_t pp;
+		int nbytes = imin(PAGESIZE - off, len);
+
+		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
+			zfs_vmobject_wunlock_12(obj);
+
+			va = zfs_map_page(pp, &sf);
+			(void) dmu_read(os, zp->z_id, start + off, nbytes,
+			    va + off, DMU_READ_PREFETCH);
+			zfs_unmap_page(sf);
+
+			zfs_vmobject_wlock_12(obj);
+			page_unbusy(pp);
+		}
+		len -= nbytes;
+		off = 0;
+	}
+#if __FreeBSD_version >= 1300041
+	vm_object_pip_wakeup(obj);
+#else
+	vm_object_pip_wakeupn(obj, 0);
+#endif
+	zfs_vmobject_wunlock_12(obj);
+}
+
+/*
+ * Read with UIO_NOCOPY flag means that sendfile(2) requests
+ * ZFS to populate a range of page cache pages with data.
+ *
+ * NOTE: this function could be optimized to pre-allocate
+ * all pages in advance, drain exclusive busy on all of them,
+ * map them into contiguous KVA region and populate them
+ * in one single dmu_read() call.
+ */
+int
+mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
+{
+	vnode_t *vp = ZTOV(zp);
+	objset_t *os = zp->z_zfsvfs->z_os;
+	struct sf_buf *sf;
+	vm_object_t obj;
+	vm_page_t pp;
+	int64_t start;
+	caddr_t va;
+	int len = nbytes;
+	int error = 0;
+
+	ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
+	ASSERT3P(vp->v_mount, !=, NULL);
+	obj = vp->v_object;
+	ASSERT3P(obj, !=, NULL);
+	ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
+
+	zfs_vmobject_wlock_12(obj);
+	for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
+		int bytes = MIN(PAGESIZE, len);
+
+		pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
+		    VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
+		if (vm_page_none_valid(pp)) {
+			zfs_vmobject_wunlock_12(obj);
+			va = zfs_map_page(pp, &sf);
+			error = dmu_read(os, zp->z_id, start, bytes, va,
+			    DMU_READ_PREFETCH);
+			if (bytes != PAGESIZE && error == 0)
+				bzero(va + bytes, PAGESIZE - bytes);
+			zfs_unmap_page(sf);
+			zfs_vmobject_wlock_12(obj);
+#if  __FreeBSD_version >= 1300081
+			if (error == 0) {
+				vm_page_valid(pp);
+				vm_page_activate(pp);
+				vm_page_do_sunbusy(pp);
+			} else {
+				zfs_vmobject_wlock(obj);
+				if (!vm_page_wired(pp) && pp->valid == 0 &&
+				    vm_page_busy_tryupgrade(pp))
+					vm_page_free(pp);
+				else
+					vm_page_sunbusy(pp);
+				zfs_vmobject_wunlock(obj);
+			}
+#else
+			vm_page_do_sunbusy(pp);
+			vm_page_lock(pp);
+			if (error) {
+				if (pp->wire_count == 0 && pp->valid == 0 &&
+				    !vm_page_busied(pp))
+					vm_page_free(pp);
+			} else {
+				pp->valid = VM_PAGE_BITS_ALL;
+				vm_page_activate(pp);
+			}
+			vm_page_unlock(pp);
+#endif
+		} else {
+			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+			vm_page_do_sunbusy(pp);
+		}
+		if (error)
+			break;
+		zfs_uio_advance(uio, bytes);
+		len -= bytes;
+	}
+	zfs_vmobject_wunlock_12(obj);
+	return (error);
+}
+
+/*
+ * When a file is memory mapped, we must keep the IO data synchronized
+ * between the DMU cache and the memory mapped pages.  What this means:
+ *
+ * On Read:	We "read" preferentially from memory mapped pages,
+ *		else we default from the dmu buffer.
+ *
+ * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
+ *	 the file is memory mapped.
+ */
+int
+mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
+{
+	vnode_t *vp = ZTOV(zp);
+	vm_object_t obj;
+	int64_t start;
+	int len = nbytes;
+	int off;
+	int error = 0;
+
+	ASSERT3P(vp->v_mount, !=, NULL);
+	obj = vp->v_object;
+	ASSERT3P(obj, !=, NULL);
+
+	start = zfs_uio_offset(uio);
+	off = start & PAGEOFFSET;
+	zfs_vmobject_wlock_12(obj);
+	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
+		vm_page_t pp;
+		uint64_t bytes = MIN(PAGESIZE - off, len);
+
+		if ((pp = page_hold(vp, start))) {
+			struct sf_buf *sf;
+			caddr_t va;
+
+			zfs_vmobject_wunlock_12(obj);
+			va = zfs_map_page(pp, &sf);
+			error = vn_io_fault_uiomove(va + off, bytes,
+			    GET_UIO_STRUCT(uio));
+			zfs_unmap_page(sf);
+			zfs_vmobject_wlock_12(obj);
+			page_unhold(pp);
+		} else {
+			zfs_vmobject_wunlock_12(obj);
+			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
+			    uio, bytes);
+			zfs_vmobject_wlock_12(obj);
+		}
+		len -= bytes;
+		off = 0;
+		if (error)
+			break;
+	}
+	zfs_vmobject_wunlock_12(obj);
+	return (error);
+}
+
+int
+zfs_write_simple(znode_t *zp, const void *data, size_t len,
+    loff_t pos, size_t *presid)
+{
+	int error = 0;
+	ssize_t resid;
+
+	error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
+	    UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
+
+	if (error) {
+		return (SET_ERROR(error));
+	} else if (presid == NULL) {
+		if (resid != 0) {
+			error = SET_ERROR(EIO);
+		}
+	} else {
+		*presid = resid;
+	}
+	return (error);
+}
+
+void
+zfs_zrele_async(znode_t *zp)
+{
+	vnode_t *vp = ZTOV(zp);
+	objset_t *os = ITOZSB(vp)->z_os;
+
+	VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
+}
+
+static int
+zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
+{
+	int error;
+
+	*vpp = arg;
+	error = vn_lock(*vpp, lkflags);
+	if (error != 0)
+		vrele(*vpp);
+	return (error);
+}
+
+static int
+zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
+{
+	znode_t *zdp = VTOZ(dvp);
+	zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
+	int error;
+	int ltype;
+
+	if (zfsvfs->z_replay == B_FALSE)
+		ASSERT_VOP_LOCKED(dvp, __func__);
+
+	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
+		ASSERT3P(dvp, ==, vp);
+		vref(dvp);
+		ltype = lkflags & LK_TYPE_MASK;
+		if (ltype != VOP_ISLOCKED(dvp)) {
+			if (ltype == LK_EXCLUSIVE)
+				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
+			else /* if (ltype == LK_SHARED) */
+				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
+
+			/*
+			 * Relock for the "." case could leave us with
+			 * reclaimed vnode.
+			 */
+			if (VN_IS_DOOMED(dvp)) {
+				vrele(dvp);
+				return (SET_ERROR(ENOENT));
+			}
+		}
+		return (0);
+	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
+		/*
+		 * Note that in this case, dvp is the child vnode, and we
+		 * are looking up the parent vnode - exactly reverse from
+		 * normal operation.  Unlocking dvp requires some rather
+		 * tricky unlock/relock dance to prevent mp from being freed;
+		 * use vn_vget_ino_gen() which takes care of all that.
+		 *
+		 * XXX Note that there is a time window when both vnodes are
+		 * unlocked.  It is possible, although highly unlikely, that
+		 * during that window the parent-child relationship between
+		 * the vnodes may change, for example, get reversed.
+		 * In that case we would have a wrong lock order for the vnodes.
+		 * All other filesystems seem to ignore this problem, so we
+		 * do the same here.
+		 * A potential solution could be implemented as follows:
+		 * - using LK_NOWAIT when locking the second vnode and retrying
+		 *   if necessary
+		 * - checking that the parent-child relationship still holds
+		 *   after locking both vnodes and retrying if it doesn't
+		 */
+		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
+		return (error);
+	} else {
+		error = vn_lock(vp, lkflags);
+		if (error != 0)
+			vrele(vp);
+		return (error);
+	}
+}
+
+/*
+ * Lookup an entry in a directory, or an extended attribute directory.
+ * If it exists, return a held vnode reference for it.
+ *
+ *	IN:	dvp	- vnode of directory to search.
+ *		nm	- name of entry to lookup.
+ *		pnp	- full pathname to lookup [UNUSED].
+ *		flags	- LOOKUP_XATTR set if looking for an attribute.
+ *		rdir	- root directory vnode [UNUSED].
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *
+ *	OUT:	vpp	- vnode of located entry, NULL if not found.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	NA
+ */
+/* ARGSUSED */
+static int
+zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
+    struct componentname *cnp, int nameiop, cred_t *cr, int flags,
+    boolean_t cached)
+{
+	znode_t *zdp = VTOZ(dvp);
+	znode_t *zp;
+	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
+#if	__FreeBSD_version > 1300124
+	seqc_t dvp_seqc;
+#endif
+	int	error = 0;
+
+	/*
+	 * Fast path lookup, however we must skip DNLC lookup
+	 * for case folding or normalizing lookups because the
+	 * DNLC code only stores the passed in name.  This means
+	 * creating 'a' and removing 'A' on a case insensitive
+	 * file system would work, but DNLC still thinks 'a'
+	 * exists and won't let you create it again on the next
+	 * pass through fast path.
+	 */
+	if (!(flags & LOOKUP_XATTR)) {
+		if (dvp->v_type != VDIR) {
+			return (SET_ERROR(ENOTDIR));
+		} else if (zdp->z_sa_hdl == NULL) {
+			return (SET_ERROR(EIO));
+		}
+	}
+
+	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
+	    const char *, nm);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zdp);
+
+#if	__FreeBSD_version > 1300124
+	dvp_seqc = vn_seqc_read_notmodify(dvp);
+#endif
+
+	*vpp = NULL;
+
+	if (flags & LOOKUP_XATTR) {
+		/*
+		 * If the xattr property is off, refuse the lookup request.
+		 */
+		if (!(zfsvfs->z_flags & ZSB_XATTR)) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EOPNOTSUPP));
+		}
+
+		/*
+		 * We don't allow recursive attributes..
+		 * Maybe someday we will.
+		 */
+		if (zdp->z_pflags & ZFS_XATTR) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EINVAL));
+		}
+
+		if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+		*vpp = ZTOV(zp);
+
+		/*
+		 * Do we have permission to get into attribute directory?
+		 */
+		error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr);
+		if (error) {
+			vrele(ZTOV(zp));
+		}
+
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Check accessibility of directory if we're not coming in via
+	 * VOP_CACHEDLOOKUP.
+	 */
+	if (!cached) {
+#ifdef NOEXECCHECK
+		if ((cnp->cn_flags & NOEXECCHECK) != 0) {
+			cnp->cn_flags &= ~NOEXECCHECK;
+		} else
+#endif
+		if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+
+	/*
+	 * First handle the special cases.
+	 */
+	if ((cnp->cn_flags & ISDOTDOT) != 0) {
+		/*
+		 * If we are a snapshot mounted under .zfs, return
+		 * the vp for the snapshot directory.
+		 */
+		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
+			struct componentname cn;
+			vnode_t *zfsctl_vp;
+			int ltype;
+
+			ZFS_EXIT(zfsvfs);
+			ltype = VOP_ISLOCKED(dvp);
+			VOP_UNLOCK1(dvp);
+			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
+			    &zfsctl_vp);
+			if (error == 0) {
+				cn.cn_nameptr = "snapshot";
+				cn.cn_namelen = strlen(cn.cn_nameptr);
+				cn.cn_nameiop = cnp->cn_nameiop;
+				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
+				cn.cn_lkflags = cnp->cn_lkflags;
+				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
+				vput(zfsctl_vp);
+			}
+			vn_lock(dvp, ltype | LK_RETRY);
+			return (error);
+		}
+	}
+	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
+		ZFS_EXIT(zfsvfs);
+		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
+			return (SET_ERROR(ENOTSUP));
+		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
+		return (error);
+	}
+
+	/*
+	 * The loop is retry the lookup if the parent-child relationship
+	 * changes during the dot-dot locking complexities.
+	 */
+	for (;;) {
+		uint64_t parent;
+
+		error = zfs_dirlook(zdp, nm, &zp);
+		if (error == 0)
+			*vpp = ZTOV(zp);
+
+		ZFS_EXIT(zfsvfs);
+		if (error != 0)
+			break;
+
+		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
+		if (error != 0) {
+			/*
+			 * If we've got a locking error, then the vnode
+			 * got reclaimed because of a force unmount.
+			 * We never enter doomed vnodes into the name cache.
+			 */
+			*vpp = NULL;
+			return (error);
+		}
+
+		if ((cnp->cn_flags & ISDOTDOT) == 0)
+			break;
+
+		ZFS_ENTER(zfsvfs);
+		if (zdp->z_sa_hdl == NULL) {
+			error = SET_ERROR(EIO);
+		} else {
+			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+			    &parent, sizeof (parent));
+		}
+		if (error != 0) {
+			ZFS_EXIT(zfsvfs);
+			vput(ZTOV(zp));
+			break;
+		}
+		if (zp->z_id == parent) {
+			ZFS_EXIT(zfsvfs);
+			break;
+		}
+		vput(ZTOV(zp));
+	}
+
+	if (error != 0)
+		*vpp = NULL;
+
+	/* Translate errors and add SAVENAME when needed. */
+	if (cnp->cn_flags & ISLASTCN) {
+		switch (nameiop) {
+		case CREATE:
+		case RENAME:
+			if (error == ENOENT) {
+				error = EJUSTRETURN;
+#if __FreeBSD_version < 1400068
+				cnp->cn_flags |= SAVENAME;
+#endif
+				break;
+			}
+			fallthrough;
+		case DELETE:
+#if __FreeBSD_version < 1400068
+			if (error == 0)
+				cnp->cn_flags |= SAVENAME;
+#endif
+			break;
+		}
+	}
+
+#if	__FreeBSD_version > 1300124
+	if ((cnp->cn_flags & ISDOTDOT) != 0) {
+		/*
+		 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
+		 * handle races. In particular different callers may end up
+		 * with different vnodes and will try to add conflicting
+		 * entries to the namecache.
+		 *
+		 * While finding different result may be acceptable in face
+		 * of concurrent modification, adding conflicting entries
+		 * trips over an assert in the namecache.
+		 *
+		 * Ultimately let an entry through once everything settles.
+		 */
+		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
+			cnp->cn_flags &= ~MAKEENTRY;
+		}
+	}
+#endif
+
+	/* Insert name into cache (as non-existent) if appropriate. */
+	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
+	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
+		cache_enter(dvp, NULL, cnp);
+
+	/* Insert name into cache if appropriate. */
+	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
+	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
+		if (!(cnp->cn_flags & ISLASTCN) ||
+		    (nameiop != DELETE && nameiop != RENAME)) {
+			cache_enter(dvp, *vpp, cnp);
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Attempt to create a new entry in a directory.  If the entry
+ * already exists, truncate the file if permissible, else return
+ * an error.  Return the vp of the created or trunc'd file.
+ *
+ *	IN:	dvp	- vnode of directory to put new file entry in.
+ *		name	- name of new file entry.
+ *		vap	- attributes of new file.
+ *		excl	- flag indicating exclusive or non-exclusive mode.
+ *		mode	- mode to open file with.
+ *		cr	- credentials of caller.
+ *		flag	- large file flag [UNUSED].
+ *		ct	- caller context
+ *		vsecp	- ACL to be set
+ *
+ *	OUT:	vpp	- vnode of created or trunc'd entry.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dvp - ctime|mtime updated if new entry created
+ *	 vp - ctime|mtime always, atime if new
+ */
+
+/* ARGSUSED */
+int
+zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
+    znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zilog_t		*zilog;
+	objset_t	*os;
+	dmu_tx_t	*tx;
+	int		error;
+	uid_t		uid = crgetuid(cr);
+	gid_t		gid = crgetgid(cr);
+	uint64_t	projid = ZFS_DEFAULT_PROJID;
+	zfs_acl_ids_t   acl_ids;
+	boolean_t	fuid_dirtied;
+	uint64_t	txtype;
+#ifdef DEBUG_VFS_LOCKS
+	vnode_t	*dvp = ZTOV(dzp);
+#endif
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (vsecp || (vap->va_mask & AT_XVATTR) ||
+	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	os = zfsvfs->z_os;
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (vap->va_mask & AT_XVATTR) {
+		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
+		    crgetuid(cr), cr, vap->va_type)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	*zpp = NULL;
+
+	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
+		vap->va_mode &= ~S_ISVTX;
+
+	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
+	if (error) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	ASSERT3P(zp, ==, NULL);
+
+	/*
+	 * Create a new file object and update the directory
+	 * to reference it.
+	 */
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		goto out;
+	}
+
+	/*
+	 * We only support the creation of regular files in
+	 * extended attribute directories.
+	 */
+
+	if ((dzp->z_pflags & ZFS_XATTR) &&
+	    (vap->va_type != VREG)) {
+		error = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0, vap,
+	    cr, vsecp, &acl_ids)) != 0)
+		goto out;
+
+	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
+		projid = zfs_inherit_projid(dzp);
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
+		zfs_acl_ids_free(&acl_ids);
+		error = SET_ERROR(EDQUOT);
+		goto out;
+	}
+
+	getnewvnode_reserve_();
+
+	tx = dmu_tx_create(os);
+
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+	if (!zfsvfs->z_use_sa &&
+	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+		    0, acl_ids.z_aclp->z_acl_bytes);
+	}
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		getnewvnode_drop_reserve();
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
+	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
+	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
+	    vsecp, acl_ids.z_fuidp, vap);
+	zfs_acl_ids_free(&acl_ids);
+	dmu_tx_commit(tx);
+
+	getnewvnode_drop_reserve();
+
+out:
+	VNCHECKREF(dvp);
+	if (error == 0) {
+		*zpp = zp;
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Remove an entry from a directory.
+ *
+ *	IN:	dvp	- vnode of directory to remove entry from.
+ *		name	- name of entry to remove.
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dvp - ctime|mtime
+ *	 vp - ctime (if nlink > 0)
+ */
+
+/*ARGSUSED*/
+static int
+zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
+{
+	znode_t		*dzp = VTOZ(dvp);
+	znode_t		*zp;
+	znode_t		*xzp;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zilog_t		*zilog;
+	uint64_t	xattr_obj;
+	uint64_t	obj = 0;
+	dmu_tx_t	*tx;
+	boolean_t	unlinked;
+	uint64_t	txtype;
+	int		error;
+
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zp = VTOZ(vp);
+	ZFS_VERIFY_ZP(zp);
+	zilog = zfsvfs->z_log;
+
+	xattr_obj = 0;
+	xzp = NULL;
+
+	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+		goto out;
+	}
+
+	/*
+	 * Need to use rmdir for removing directories.
+	 */
+	if (vp->v_type == VDIR) {
+		error = SET_ERROR(EPERM);
+		goto out;
+	}
+
+	vnevent_remove(vp, dvp, name, ct);
+
+	obj = zp->z_id;
+
+	/* are there any extended attributes? */
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error == 0 && xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
+		ASSERT0(error);
+	}
+
+	/*
+	 * We may delete the znode now, or we may put it in the unlinked set;
+	 * it depends on whether we're the last link, and on whether there are
+	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
+	 * allow for either case.
+	 */
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
+
+	if (xzp) {
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+	}
+
+	/* charge as an update -- would be nice not to charge at all */
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+	/*
+	 * Mark this transaction as typically resulting in a net free of space
+	 */
+	dmu_tx_mark_netfree(tx);
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Remove the directory entry.
+	 */
+	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		goto out;
+	}
+
+	if (unlinked) {
+		zfs_unlinked_add(zp, tx);
+		vp->v_vflag |= VV_NOSYNC;
+	}
+	/* XXX check changes to linux vnops */
+	txtype = TX_REMOVE;
+	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
+
+	dmu_tx_commit(tx);
+out:
+
+	if (xzp)
+		vrele(ZTOV(xzp));
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+
+static int
+zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
+    struct componentname *cnp, int nameiop)
+{
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	int error;
+
+	cnp->cn_nameptr = __DECONST(char *, name);
+	cnp->cn_namelen = strlen(name);
+	cnp->cn_nameiop = nameiop;
+	cnp->cn_flags = ISLASTCN;
+#if __FreeBSD_version < 1400068
+	cnp->cn_flags |= SAVENAME;
+#endif
+	cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
+	cnp->cn_cred = kcred;
+#if __FreeBSD_version < 1400037
+	cnp->cn_thread = curthread;
+#endif
+
+	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
+		struct vop_lookup_args a;
+
+		a.a_gen.a_desc = &vop_lookup_desc;
+		a.a_dvp = ZTOV(dzp);
+		a.a_vpp = vpp;
+		a.a_cnp = cnp;
+		error = vfs_cache_lookup(&a);
+	} else {
+		error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
+		    B_FALSE);
+	}
+#ifdef ZFS_DEBUG
+	if (error) {
+		printf("got error %d on name %s on op %d\n", error, name,
+		    nameiop);
+		kdb_backtrace();
+	}
+#endif
+	return (error);
+}
+
+int
+zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
+{
+	vnode_t *vp;
+	int error;
+	struct componentname cn;
+
+	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
+		return (error);
+
+	error = zfs_remove_(ZTOV(dzp), vp, name, cr);
+	vput(vp);
+	return (error);
+}
+/*
+ * Create a new directory and insert it into dvp using the name
+ * provided.  Return a pointer to the inserted directory.
+ *
+ *	IN:	dvp	- vnode of directory to add subdir to.
+ *		dirname	- name of new directory.
+ *		vap	- attributes of new directory.
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *		flags	- case flags
+ *		vsecp	- ACL to be set
+ *
+ *	OUT:	vpp	- vnode of created directory.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dvp - ctime|mtime updated
+ *	 vp - ctime|mtime|atime updated
+ */
+/*ARGSUSED*/
+int
+zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
+    cred_t *cr, int flags, vsecattr_t *vsecp)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zilog_t		*zilog;
+	uint64_t	txtype;
+	dmu_tx_t	*tx;
+	int		error;
+	uid_t		uid = crgetuid(cr);
+	gid_t		gid = crgetgid(cr);
+	zfs_acl_ids_t   acl_ids;
+	boolean_t	fuid_dirtied;
+
+	ASSERT3U(vap->va_type, ==, VDIR);
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    ((vap->va_mask & AT_XVATTR) ||
+	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (dzp->z_pflags & ZFS_XATTR) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(dirname,
+	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (vap->va_mask & AT_XVATTR) {
+		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
+		    crgetuid(cr), cr, vap->va_type)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
+	    NULL, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * First make sure the new directory doesn't exist.
+	 *
+	 * Existence is checked first to make sure we don't return
+	 * EACCES instead of EEXIST which can cause some applications
+	 * to fail.
+	 */
+	*zpp = NULL;
+
+	if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	ASSERT3P(zp, ==, NULL);
+
+	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	/*
+	 * Add a new entry to the directory.
+	 */
+	getnewvnode_reserve_();
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		getnewvnode_drop_reserve();
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Create new node.
+	 */
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	/*
+	 * Now put new name in parent dir.
+	 */
+	(void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
+
+	*zpp = zp;
+
+	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
+	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
+	    acl_ids.z_fuidp, vap);
+
+	zfs_acl_ids_free(&acl_ids);
+
+	dmu_tx_commit(tx);
+
+	getnewvnode_drop_reserve();
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+#if	__FreeBSD_version < 1300124
+static void
+cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
+{
+
+	cache_purge(dvp);
+	cache_purge(vp);
+}
+#endif
+
+/*
+ * Remove a directory subdir entry.  If the current working
+ * directory is the same as the subdir to be removed, the
+ * remove will fail.
+ *
+ *	IN:	dvp	- vnode of directory to remove from.
+ *		name	- name of directory to be removed.
+ *		cwd	- vnode of current working directory.
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dvp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+static int
+zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
+{
+	znode_t		*dzp = VTOZ(dvp);
+	znode_t		*zp = VTOZ(vp);
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zilog_t		*zilog;
+	dmu_tx_t	*tx;
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	ZFS_VERIFY_ZP(zp);
+	zilog = zfsvfs->z_log;
+
+
+	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+		goto out;
+	}
+
+	if (vp->v_type != VDIR) {
+		error = SET_ERROR(ENOTDIR);
+		goto out;
+	}
+
+	vnevent_rmdir(vp, dvp, name, ct);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
+
+	if (error == 0) {
+		uint64_t txtype = TX_RMDIR;
+		zfs_log_remove(zilog, tx, txtype, dzp, name,
+		    ZFS_NO_OBJECT, B_FALSE);
+	}
+
+	dmu_tx_commit(tx);
+
+	cache_vop_rmdir(dvp, vp);
+out:
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+int
+zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
+{
+	struct componentname cn;
+	vnode_t *vp;
+	int error;
+
+	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
+		return (error);
+
+	error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Read as many directory entries as will fit into the provided
+ * buffer from the given directory cursor position (specified in
+ * the uio structure).
+ *
+ *	IN:	vp	- vnode of directory to read.
+ *		uio	- structure supplying read location, range info,
+ *			  and return buffer.
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *		flags	- case flags
+ *
+ *	OUT:	uio	- updated offset and range, buffer filled.
+ *		eofp	- set to true if end-of-file detected.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	vp - atime updated
+ *
+ * Note that the low 4 bits of the cookie returned by zap is always zero.
+ * This allows us to use the low range for "special" directory entries:
+ * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
+ * we use the offset 2 for the '.zfs' directory.
+ */
+/* ARGSUSED */
+static int
+zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
+    int *ncookies, cookie_t **cookies)
+{
+	znode_t		*zp = VTOZ(vp);
+	iovec_t		*iovp;
+	edirent_t	*eodp;
+	dirent64_t	*odp;
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	objset_t	*os;
+	caddr_t		outbuf;
+	size_t		bufsize;
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	uint_t		bytes_wanted;
+	uint64_t	offset; /* must be unsigned; checks for < 1 */
+	uint64_t	parent;
+	int		local_eof;
+	int		outcount;
+	int		error;
+	uint8_t		prefetch;
+	boolean_t	check_sysattrs;
+	uint8_t		type;
+	int		ncooks;
+	cookie_t	*cooks = NULL;
+	int		flags = 0;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * If we are not given an eof variable,
+	 * use a local one.
+	 */
+	if (eofp == NULL)
+		eofp = &local_eof;
+
+	/*
+	 * Check for valid iov_len.
+	 */
+	if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Quit if directory has been removed (posix)
+	 */
+	if ((*eofp = zp->z_unlinked) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	error = 0;
+	os = zfsvfs->z_os;
+	offset = zfs_uio_offset(uio);
+	prefetch = zp->z_zn_prefetch;
+
+	/*
+	 * Initialize the iterator cursor.
+	 */
+	if (offset <= 3) {
+		/*
+		 * Start iteration from the beginning of the directory.
+		 */
+		zap_cursor_init(&zc, os, zp->z_id);
+	} else {
+		/*
+		 * The offset is a serialized cursor.
+		 */
+		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
+	}
+
+	/*
+	 * Get space to change directory entries into fs independent format.
+	 */
+	iovp = GET_UIO_STRUCT(uio)->uio_iov;
+	bytes_wanted = iovp->iov_len;
+	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
+		bufsize = bytes_wanted;
+		outbuf = kmem_alloc(bufsize, KM_SLEEP);
+		odp = (struct dirent64 *)outbuf;
+	} else {
+		bufsize = bytes_wanted;
+		outbuf = NULL;
+		odp = (struct dirent64 *)iovp->iov_base;
+	}
+	eodp = (struct edirent *)odp;
+
+	if (ncookies != NULL) {
+		/*
+		 * Minimum entry size is dirent size and 1 byte for a file name.
+		 */
+		ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
+		    sizeof (((struct dirent *)NULL)->d_name) + 1);
+		cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
+		*cookies = cooks;
+		*ncookies = ncooks;
+	}
+	/*
+	 * If this VFS supports the system attribute view interface; and
+	 * we're looking at an extended attribute directory; and we care
+	 * about normalization conflicts on this vfs; then we must check
+	 * for normalization conflicts with the sysattr name space.
+	 */
+#ifdef TODO
+	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
+	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
+	    (flags & V_RDDIR_ENTFLAGS);
+#else
+	check_sysattrs = 0;
+#endif
+
+	/*
+	 * Transform to file-system independent format
+	 */
+	outcount = 0;
+	while (outcount < bytes_wanted) {
+		ino64_t objnum;
+		ushort_t reclen;
+		off64_t *next = NULL;
+
+		/*
+		 * Special case `.', `..', and `.zfs'.
+		 */
+		if (offset == 0) {
+			(void) strcpy(zap.za_name, ".");
+			zap.za_normalization_conflict = 0;
+			objnum = zp->z_id;
+			type = DT_DIR;
+		} else if (offset == 1) {
+			(void) strcpy(zap.za_name, "..");
+			zap.za_normalization_conflict = 0;
+			objnum = parent;
+			type = DT_DIR;
+		} else if (offset == 2 && zfs_show_ctldir(zp)) {
+			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
+			zap.za_normalization_conflict = 0;
+			objnum = ZFSCTL_INO_ROOT;
+			type = DT_DIR;
+		} else {
+			/*
+			 * Grab next entry.
+			 */
+			if ((error = zap_cursor_retrieve(&zc, &zap))) {
+				if ((*eofp = (error == ENOENT)) != 0)
+					break;
+				else
+					goto update;
+			}
+
+			if (zap.za_integer_length != 8 ||
+			    zap.za_num_integers != 1) {
+				cmn_err(CE_WARN, "zap_readdir: bad directory "
+				    "entry, obj = %lld, offset = %lld\n",
+				    (u_longlong_t)zp->z_id,
+				    (u_longlong_t)offset);
+				error = SET_ERROR(ENXIO);
+				goto update;
+			}
+
+			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
+			/*
+			 * MacOS X can extract the object type here such as:
+			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
+			 */
+			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
+
+			if (check_sysattrs && !zap.za_normalization_conflict) {
+#ifdef TODO
+				zap.za_normalization_conflict =
+				    xattr_sysattr_casechk(zap.za_name);
+#else
+				panic("%s:%u: TODO", __func__, __LINE__);
+#endif
+			}
+		}
+
+		if (flags & V_RDDIR_ACCFILTER) {
+			/*
+			 * If we have no access at all, don't include
+			 * this entry in the returned information
+			 */
+			znode_t	*ezp;
+			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
+				goto skip_entry;
+			if (!zfs_has_access(ezp, cr)) {
+				vrele(ZTOV(ezp));
+				goto skip_entry;
+			}
+			vrele(ZTOV(ezp));
+		}
+
+		if (flags & V_RDDIR_ENTFLAGS)
+			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
+		else
+			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
+
+		/*
+		 * Will this entry fit in the buffer?
+		 */
+		if (outcount + reclen > bufsize) {
+			/*
+			 * Did we manage to fit anything in the buffer?
+			 */
+			if (!outcount) {
+				error = SET_ERROR(EINVAL);
+				goto update;
+			}
+			break;
+		}
+		if (flags & V_RDDIR_ENTFLAGS) {
+			/*
+			 * Add extended flag entry:
+			 */
+			eodp->ed_ino = objnum;
+			eodp->ed_reclen = reclen;
+			/* NOTE: ed_off is the offset for the *next* entry */
+			next = &(eodp->ed_off);
+			eodp->ed_eflags = zap.za_normalization_conflict ?
+			    ED_CASE_CONFLICT : 0;
+			(void) strncpy(eodp->ed_name, zap.za_name,
+			    EDIRENT_NAMELEN(reclen));
+			eodp = (edirent_t *)((intptr_t)eodp + reclen);
+		} else {
+			/*
+			 * Add normal entry:
+			 */
+			odp->d_ino = objnum;
+			odp->d_reclen = reclen;
+			odp->d_namlen = strlen(zap.za_name);
+			/* NOTE: d_off is the offset for the *next* entry. */
+			next = &odp->d_off;
+			strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
+			odp->d_type = type;
+			dirent_terminate(odp);
+			odp = (dirent64_t *)((intptr_t)odp + reclen);
+		}
+		outcount += reclen;
+
+		ASSERT3S(outcount, <=, bufsize);
+
+		/* Prefetch znode */
+		if (prefetch)
+			dmu_prefetch(os, objnum, 0, 0, 0,
+			    ZIO_PRIORITY_SYNC_READ);
+
+	skip_entry:
+		/*
+		 * Move to the next entry, fill in the previous offset.
+		 */
+		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
+			zap_cursor_advance(&zc);
+			offset = zap_cursor_serialize(&zc);
+		} else {
+			offset += 1;
+		}
+
+		/* Fill the offset right after advancing the cursor. */
+		if (next != NULL)
+			*next = offset;
+		if (cooks != NULL) {
+			*cooks++ = offset;
+			ncooks--;
+			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
+		}
+	}
+	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
+
+	/* Subtract unused cookies */
+	if (ncookies != NULL)
+		*ncookies -= ncooks;
+
+	if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
+		iovp->iov_base += outcount;
+		iovp->iov_len -= outcount;
+		zfs_uio_resid(uio) -= outcount;
+	} else if ((error =
+	    zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
+		/*
+		 * Reset the pointer.
+		 */
+		offset = zfs_uio_offset(uio);
+	}
+
+update:
+	zap_cursor_fini(&zc);
+	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
+		kmem_free(outbuf, bufsize);
+
+	if (error == ENOENT)
+		error = 0;
+
+	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
+
+	zfs_uio_setoffset(uio, offset);
+	ZFS_EXIT(zfsvfs);
+	if (error != 0 && cookies != NULL) {
+		free(*cookies, M_TEMP);
+		*cookies = NULL;
+		*ncookies = 0;
+	}
+	return (error);
+}
+
+/*
+ * Get the requested file attributes and place them in the provided
+ * vattr structure.
+ *
+ *	IN:	vp	- vnode of file.
+ *		vap	- va_mask identifies requested attributes.
+ *			  If AT_XVATTR set, then optional attrs are requested
+ *		flags	- ATTR_NOACLCHECK (CIFS server context)
+ *		cr	- credentials of caller.
+ *
+ *	OUT:	vap	- attribute values.
+ *
+ *	RETURN:	0 (always succeeds).
+ */
+/* ARGSUSED */
+static int
+zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int	error = 0;
+	uint32_t blksize;
+	u_longlong_t nblocks;
+	uint64_t mtime[2], ctime[2], crtime[2], rdev;
+	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
+	xoptattr_t *xoap = NULL;
+	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
+		    &rdev, 8);
+
+	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
+	 * Also, if we are the owner don't bother, since owner should
+	 * always be allowed to read basic attributes of file.
+	 */
+	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
+	    (vap->va_uid != crgetuid(cr))) {
+		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
+		    skipaclchk, cr))) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	/*
+	 * Return all attributes.  It's cheaper to provide the answer
+	 * than to determine whether we were asked the question.
+	 */
+
+	vap->va_type = IFTOVT(zp->z_mode);
+	vap->va_mode = zp->z_mode & ~S_IFMT;
+	vn_fsid(vp, vap);
+	vap->va_nodeid = zp->z_id;
+	vap->va_nlink = zp->z_links;
+	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
+	    zp->z_links < ZFS_LINK_MAX)
+		vap->va_nlink++;
+	vap->va_size = zp->z_size;
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		vap->va_rdev = zfs_cmpldev(rdev);
+	vap->va_gen = zp->z_gen;
+	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
+	vap->va_filerev = zp->z_seq;
+
+	/*
+	 * Add in any requested optional attributes and the create time.
+	 * Also set the corresponding bits in the returned attribute bitmap.
+	 */
+	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
+		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+			xoap->xoa_archive =
+			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
+			XVA_SET_RTN(xvap, XAT_ARCHIVE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+			xoap->xoa_readonly =
+			    ((zp->z_pflags & ZFS_READONLY) != 0);
+			XVA_SET_RTN(xvap, XAT_READONLY);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+			xoap->xoa_system =
+			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
+			XVA_SET_RTN(xvap, XAT_SYSTEM);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+			xoap->xoa_hidden =
+			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
+			XVA_SET_RTN(xvap, XAT_HIDDEN);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+			xoap->xoa_nounlink =
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
+			XVA_SET_RTN(xvap, XAT_NOUNLINK);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+			xoap->xoa_immutable =
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
+			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+			xoap->xoa_appendonly =
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
+			XVA_SET_RTN(xvap, XAT_APPENDONLY);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+			xoap->xoa_nodump =
+			    ((zp->z_pflags & ZFS_NODUMP) != 0);
+			XVA_SET_RTN(xvap, XAT_NODUMP);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+			xoap->xoa_opaque =
+			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
+			XVA_SET_RTN(xvap, XAT_OPAQUE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+			xoap->xoa_av_quarantined =
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
+			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+			xoap->xoa_av_modified =
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
+			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
+		    vp->v_type == VREG) {
+			zfs_sa_get_scanstamp(zp, xvap);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
+			XVA_SET_RTN(xvap, XAT_REPARSE);
+		}
+		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
+			xoap->xoa_generation = zp->z_gen;
+			XVA_SET_RTN(xvap, XAT_GEN);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+			xoap->xoa_offline =
+			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
+			XVA_SET_RTN(xvap, XAT_OFFLINE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+			xoap->xoa_sparse =
+			    ((zp->z_pflags & ZFS_SPARSE) != 0);
+			XVA_SET_RTN(xvap, XAT_SPARSE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
+			xoap->xoa_projinherit =
+			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
+			XVA_SET_RTN(xvap, XAT_PROJINHERIT);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
+			xoap->xoa_projid = zp->z_projid;
+			XVA_SET_RTN(xvap, XAT_PROJID);
+		}
+	}
+
+	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
+	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
+	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
+	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
+
+
+	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
+	vap->va_blksize = blksize;
+	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
+
+	if (zp->z_blksz == 0) {
+		/*
+		 * Block size hasn't been set; suggest maximal I/O transfers.
+		 */
+		vap->va_blksize = zfsvfs->z_max_blksz;
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/*
+ * Set the file attributes to the values contained in the
+ * vattr structure.
+ *
+ *	IN:	zp	- znode of file to be modified.
+ *		vap	- new attribute values.
+ *			  If AT_XVATTR set, then optional attrs are being set
+ *		flags	- ATTR_UTIME set if non-default time values provided.
+ *			- ATTR_NOACLCHECK (CIFS context only).
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	vp - ctime updated, mtime updated if size changed.
+ */
+/* ARGSUSED */
+int
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+{
+	vnode_t		*vp = ZTOV(zp);
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	objset_t	*os;
+	zilog_t		*zilog;
+	dmu_tx_t	*tx;
+	vattr_t		oldva;
+	xvattr_t	tmpxvattr;
+	uint_t		mask = vap->va_mask;
+	uint_t		saved_mask = 0;
+	uint64_t	saved_mode;
+	int		trim_mask = 0;
+	uint64_t	new_mode;
+	uint64_t	new_uid, new_gid;
+	uint64_t	xattr_obj;
+	uint64_t	mtime[2], ctime[2];
+	uint64_t	projid = ZFS_INVALID_PROJID;
+	znode_t		*attrzp;
+	int		need_policy = FALSE;
+	int		err, err2;
+	zfs_fuid_info_t *fuidp = NULL;
+	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
+	xoptattr_t	*xoap;
+	zfs_acl_t	*aclp;
+	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	boolean_t	fuid_dirtied = B_FALSE;
+	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
+	int		count = 0, xattr_count = 0;
+
+	if (mask == 0)
+		return (0);
+
+	if (mask & AT_NOSET)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	os = zfsvfs->z_os;
+	zilog = zfsvfs->z_log;
+
+	/*
+	 * Make sure that if we have ephemeral uid/gid or xvattr specified
+	 * that file system is at proper version level
+	 */
+
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
+	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
+	    (mask & AT_XVATTR))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (mask & AT_SIZE && vp->v_type == VDIR) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EISDIR));
+	}
+
+	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * If this is an xvattr_t, then get a pointer to the structure of
+	 * optional attributes.  If this is NULL, then we have a vattr_t.
+	 */
+	xoap = xva_getxoptattr(xvap);
+
+	xva_init(&tmpxvattr);
+
+	/*
+	 * Immutable files can only alter immutable bit and atime
+	 */
+	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
+	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
+	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	/*
+	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
+	 */
+
+	/*
+	 * Verify timestamps doesn't overflow 32 bits.
+	 * ZFS can handle large timestamps, but 32bit syscalls can't
+	 * handle times greater than 2039.  This check should be removed
+	 * once large timestamps are fully supported.
+	 */
+	if (mask & (AT_ATIME | AT_MTIME)) {
+		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
+		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EOVERFLOW));
+		}
+	}
+	if (xoap != NULL && (mask & AT_XVATTR)) {
+		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
+		    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EOVERFLOW));
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
+			if (!dmu_objset_projectquota_enabled(os) ||
+			    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
+				ZFS_EXIT(zfsvfs);
+				return (SET_ERROR(EOPNOTSUPP));
+			}
+
+			projid = xoap->xoa_projid;
+			if (unlikely(projid == ZFS_INVALID_PROJID)) {
+				ZFS_EXIT(zfsvfs);
+				return (SET_ERROR(EINVAL));
+			}
+
+			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
+				projid = ZFS_INVALID_PROJID;
+			else
+				need_policy = TRUE;
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
+		    (xoap->xoa_projinherit !=
+		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
+		    (!dmu_objset_projectquota_enabled(os) ||
+		    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EOPNOTSUPP));
+		}
+	}
+
+	attrzp = NULL;
+	aclp = NULL;
+
+	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * First validate permissions
+	 */
+
+	if (mask & AT_SIZE) {
+		/*
+		 * XXX - Note, we are not providing any open
+		 * mode flags here (like FNDELAY), so we may
+		 * block if there are locks present... this
+		 * should be addressed in openat().
+		 */
+		/* XXX - would it be OK to generate a log record here? */
+		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
+		if (err) {
+			ZFS_EXIT(zfsvfs);
+			return (err);
+		}
+	}
+
+	if (mask & (AT_ATIME|AT_MTIME) ||
+	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
+	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
+	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
+	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
+	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
+	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
+	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
+		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
+		    skipaclchk, cr);
+	}
+
+	if (mask & (AT_UID|AT_GID)) {
+		int	idmask = (mask & (AT_UID|AT_GID));
+		int	take_owner;
+		int	take_group;
+
+		/*
+		 * NOTE: even if a new mode is being set,
+		 * we may clear S_ISUID/S_ISGID bits.
+		 */
+
+		if (!(mask & AT_MODE))
+			vap->va_mode = zp->z_mode;
+
+		/*
+		 * Take ownership or chgrp to group we are a member of
+		 */
+
+		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
+		take_group = (mask & AT_GID) &&
+		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
+
+		/*
+		 * If both AT_UID and AT_GID are set then take_owner and
+		 * take_group must both be set in order to allow taking
+		 * ownership.
+		 *
+		 * Otherwise, send the check through secpolicy_vnode_setattr()
+		 *
+		 */
+
+		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
+		    ((idmask == AT_UID) && take_owner) ||
+		    ((idmask == AT_GID) && take_group)) {
+			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
+			    skipaclchk, cr) == 0) {
+				/*
+				 * Remove setuid/setgid for non-privileged users
+				 */
+				secpolicy_setid_clear(vap, vp, cr);
+				trim_mask = (mask & (AT_UID|AT_GID));
+			} else {
+				need_policy =  TRUE;
+			}
+		} else {
+			need_policy =  TRUE;
+		}
+	}
+
+	oldva.va_mode = zp->z_mode;
+	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
+	if (mask & AT_XVATTR) {
+		/*
+		 * Update xvattr mask to include only those attributes
+		 * that are actually changing.
+		 *
+		 * the bits will be restored prior to actually setting
+		 * the attributes so the caller thinks they were set.
+		 */
+		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+			if (xoap->xoa_appendonly !=
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
+				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
+			if (xoap->xoa_projinherit !=
+			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
+				XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+			if (xoap->xoa_nounlink !=
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
+				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+			if (xoap->xoa_immutable !=
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
+				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+			if (xoap->xoa_nodump !=
+			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_NODUMP);
+				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+			if (xoap->xoa_av_modified !=
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
+				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+			if ((vp->v_type != VREG &&
+			    xoap->xoa_av_quarantined) ||
+			    xoap->xoa_av_quarantined !=
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
+				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EPERM));
+		}
+
+		if (need_policy == FALSE &&
+		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
+		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
+			need_policy = TRUE;
+		}
+	}
+
+	if (mask & AT_MODE) {
+		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+			err = secpolicy_setid_setsticky_clear(vp, vap,
+			    &oldva, cr);
+			if (err) {
+				ZFS_EXIT(zfsvfs);
+				return (err);
+			}
+			trim_mask |= AT_MODE;
+		} else {
+			need_policy = TRUE;
+		}
+	}
+
+	if (need_policy) {
+		/*
+		 * If trim_mask is set then take ownership
+		 * has been granted or write_acl is present and user
+		 * has the ability to modify mode.  In that case remove
+		 * UID|GID and or MODE from mask so that
+		 * secpolicy_vnode_setattr() doesn't revoke it.
+		 */
+
+		if (trim_mask) {
+			saved_mask = vap->va_mask;
+			vap->va_mask &= ~trim_mask;
+			if (trim_mask & AT_MODE) {
+				/*
+				 * Save the mode, as secpolicy_vnode_setattr()
+				 * will overwrite it with ova.va_mode.
+				 */
+				saved_mode = vap->va_mode;
+			}
+		}
+		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
+		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
+		if (err) {
+			ZFS_EXIT(zfsvfs);
+			return (err);
+		}
+
+		if (trim_mask) {
+			vap->va_mask |= saved_mask;
+			if (trim_mask & AT_MODE) {
+				/*
+				 * Recover the mode after
+				 * secpolicy_vnode_setattr().
+				 */
+				vap->va_mode = saved_mode;
+			}
+		}
+	}
+
+	/*
+	 * secpolicy_vnode_setattr, or take ownership may have
+	 * changed va_mask
+	 */
+	mask = vap->va_mask;
+
+	if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
+		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr_obj, sizeof (xattr_obj));
+
+		if (err == 0 && xattr_obj) {
+			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
+			if (err == 0) {
+				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
+				if (err != 0)
+					vrele(ZTOV(attrzp));
+			}
+			if (err)
+				goto out2;
+		}
+		if (mask & AT_UID) {
+			new_uid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
+			if (new_uid != zp->z_uid &&
+			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
+			    new_uid)) {
+				if (attrzp)
+					vput(ZTOV(attrzp));
+				err = SET_ERROR(EDQUOT);
+				goto out2;
+			}
+		}
+
+		if (mask & AT_GID) {
+			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
+			    cr, ZFS_GROUP, &fuidp);
+			if (new_gid != zp->z_gid &&
+			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
+			    new_gid)) {
+				if (attrzp)
+					vput(ZTOV(attrzp));
+				err = SET_ERROR(EDQUOT);
+				goto out2;
+			}
+		}
+
+		if (projid != ZFS_INVALID_PROJID &&
+		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
+			if (attrzp)
+				vput(ZTOV(attrzp));
+			err = SET_ERROR(EDQUOT);
+			goto out2;
+		}
+	}
+	tx = dmu_tx_create(os);
+
+	if (mask & AT_MODE) {
+		uint64_t pmode = zp->z_mode;
+		uint64_t acl_obj;
+		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
+
+		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
+		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
+			err = SET_ERROR(EPERM);
+			goto out;
+		}
+
+		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
+			goto out;
+
+		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
+			/*
+			 * Are we upgrading ACL from old V0 format
+			 * to V1 format?
+			 */
+			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
+			    zfs_znode_acl_version(zp) ==
+			    ZFS_ACL_VERSION_INITIAL) {
+				dmu_tx_hold_free(tx, acl_obj, 0,
+				    DMU_OBJECT_END);
+				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+				    0, aclp->z_acl_bytes);
+			} else {
+				dmu_tx_hold_write(tx, acl_obj, 0,
+				    aclp->z_acl_bytes);
+			}
+		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, aclp->z_acl_bytes);
+		}
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	} else {
+		if (((mask & AT_XVATTR) &&
+		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
+		    (projid != ZFS_INVALID_PROJID &&
+		    !(zp->z_pflags & ZFS_PROJID)))
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		else
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	}
+
+	if (attrzp) {
+		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
+	}
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err)
+		goto out;
+
+	count = 0;
+	/*
+	 * Set each attribute requested.
+	 * We group settings according to the locks they need to acquire.
+	 *
+	 * Note: you cannot set ctime directly, although it will be
+	 * updated as a side-effect of calling this function.
+	 */
+
+	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
+		/*
+		 * For the existed object that is upgraded from old system,
+		 * its on-disk layout has no slot for the project ID attribute.
+		 * But quota accounting logic needs to access related slots by
+		 * offset directly. So we need to adjust old objects' layout
+		 * to make the project ID to some unified and fixed offset.
+		 */
+		if (attrzp)
+			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
+		if (err == 0)
+			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
+
+		if (unlikely(err == EEXIST))
+			err = 0;
+		else if (err != 0)
+			goto out;
+		else
+			projid = ZFS_INVALID_PROJID;
+	}
+
+	if (mask & (AT_UID|AT_GID|AT_MODE))
+		mutex_enter(&zp->z_acl_lock);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+
+	if (attrzp) {
+		if (mask & (AT_UID|AT_GID|AT_MODE))
+			mutex_enter(&attrzp->z_acl_lock);
+		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
+		    sizeof (attrzp->z_pflags));
+		if (projid != ZFS_INVALID_PROJID) {
+			attrzp->z_projid = projid;
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
+			    sizeof (attrzp->z_projid));
+		}
+	}
+
+	if (mask & (AT_UID|AT_GID)) {
+
+		if (mask & AT_UID) {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+			    &new_uid, sizeof (new_uid));
+			zp->z_uid = new_uid;
+			if (attrzp) {
+				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
+				    sizeof (new_uid));
+				attrzp->z_uid = new_uid;
+			}
+		}
+
+		if (mask & AT_GID) {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
+			    NULL, &new_gid, sizeof (new_gid));
+			zp->z_gid = new_gid;
+			if (attrzp) {
+				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
+				    sizeof (new_gid));
+				attrzp->z_gid = new_gid;
+			}
+		}
+		if (!(mask & AT_MODE)) {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
+			    NULL, &new_mode, sizeof (new_mode));
+			new_mode = zp->z_mode;
+		}
+		err = zfs_acl_chown_setattr(zp);
+		ASSERT0(err);
+		if (attrzp) {
+			vn_seqc_write_begin(ZTOV(attrzp));
+			err = zfs_acl_chown_setattr(attrzp);
+			vn_seqc_write_end(ZTOV(attrzp));
+			ASSERT0(err);
+		}
+	}
+
+	if (mask & AT_MODE) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+		    &new_mode, sizeof (new_mode));
+		zp->z_mode = new_mode;
+		ASSERT3P(aclp, !=, NULL);
+		err = zfs_aclset_common(zp, aclp, cr, tx);
+		ASSERT0(err);
+		if (zp->z_acl_cached)
+			zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = aclp;
+		aclp = NULL;
+	}
+
+
+	if (mask & AT_ATIME) {
+		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+		    &zp->z_atime, sizeof (zp->z_atime));
+	}
+
+	if (mask & AT_MTIME) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    mtime, sizeof (mtime));
+	}
+
+	if (projid != ZFS_INVALID_PROJID) {
+		zp->z_projid = projid;
+		SA_ADD_BULK_ATTR(bulk, count,
+		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
+		    sizeof (zp->z_projid));
+	}
+
+	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
+	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
+		    NULL, mtime, sizeof (mtime));
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+	} else if (mask != 0) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
+		if (attrzp) {
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_CTIME(zfsvfs), NULL,
+			    &ctime, sizeof (ctime));
+			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
+			    mtime, ctime);
+		}
+	}
+
+	/*
+	 * Do this after setting timestamps to prevent timestamp
+	 * update from toggling bit
+	 */
+
+	if (xoap && (mask & AT_XVATTR)) {
+
+		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
+			xoap->xoa_createtime = vap->va_birthtime;
+		/*
+		 * restore trimmed off masks
+		 * so that return masks can be set for caller.
+		 */
+
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
+			XVA_SET_REQ(xvap, XAT_APPENDONLY);
+		}
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
+			XVA_SET_REQ(xvap, XAT_NOUNLINK);
+		}
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
+			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
+		}
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
+			XVA_SET_REQ(xvap, XAT_NODUMP);
+		}
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
+			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
+		}
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
+			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
+		}
+		if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
+			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
+			ASSERT3S(vp->v_type, ==, VREG);
+
+		zfs_xvattr_set(zp, xvap, tx);
+	}
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	if (mask != 0)
+		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
+
+	if (mask & (AT_UID|AT_GID|AT_MODE))
+		mutex_exit(&zp->z_acl_lock);
+
+	if (attrzp) {
+		if (mask & (AT_UID|AT_GID|AT_MODE))
+			mutex_exit(&attrzp->z_acl_lock);
+	}
+out:
+	if (err == 0 && attrzp) {
+		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
+		    xattr_count, tx);
+		ASSERT0(err2);
+	}
+
+	if (attrzp)
+		vput(ZTOV(attrzp));
+
+	if (aclp)
+		zfs_acl_free(aclp);
+
+	if (fuidp) {
+		zfs_fuid_info_free(fuidp);
+		fuidp = NULL;
+	}
+
+	if (err) {
+		dmu_tx_abort(tx);
+	} else {
+		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		dmu_tx_commit(tx);
+	}
+
+out2:
+	if (os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (err);
+}
+
+/*
+ * Look up the directory entries corresponding to the source and target
+ * directory/name pairs.
+ */
+static int
+zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
+    znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
+    znode_t **tzpp)
+{
+	zfsvfs_t *zfsvfs;
+	znode_t *szp, *tzp;
+	int error;
+
+	/*
+	 * Before using sdzp and tdzp we must ensure that they are live.
+	 * As a porting legacy from illumos we have two things to worry
+	 * about.  One is typical for FreeBSD and it is that the vnode is
+	 * not reclaimed (doomed).  The other is that the znode is live.
+	 * The current code can invalidate the znode without acquiring the
+	 * corresponding vnode lock if the object represented by the znode
+	 * and vnode is no longer valid after a rollback or receive operation.
+	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
+	 * that protects the znodes from the invalidation.
+	 */
+	zfsvfs = sdzp->z_zfsvfs;
+	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(sdzp);
+	ZFS_VERIFY_ZP(tdzp);
+
+	/*
+	 * Re-resolve svp to be certain it still exists and fetch the
+	 * correct vnode.
+	 */
+	error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
+	if (error != 0) {
+		/* Source entry invalid or not there. */
+		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
+		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
+			error = SET_ERROR(EINVAL);
+		goto out;
+	}
+	*szpp = szp;
+
+	/*
+	 * Re-resolve tvp, if it disappeared we just carry on.
+	 */
+	error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
+	if (error != 0) {
+		vrele(ZTOV(szp));
+		if ((tcnp->cn_flags & ISDOTDOT) != 0)
+			error = SET_ERROR(EINVAL);
+		goto out;
+	}
+	*tzpp = tzp;
+out:
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * We acquire all but fdvp locks using non-blocking acquisitions.  If we
+ * fail to acquire any lock in the path we will drop all held locks,
+ * acquire the new lock in a blocking fashion, and then release it and
+ * restart the rename.  This acquire/release step ensures that we do not
+ * spin on a lock waiting for release.  On error release all vnode locks
+ * and decrement references the way tmpfs_rename() would do.
+ */
+static int
+zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
+    struct vnode *tdvp, struct vnode **tvpp,
+    const struct componentname *scnp, const struct componentname *tcnp)
+{
+	struct vnode	*nvp, *svp, *tvp;
+	znode_t		*sdzp, *tdzp, *szp, *tzp;
+	int		error;
+
+	VOP_UNLOCK1(tdvp);
+	if (*tvpp != NULL && *tvpp != tdvp)
+		VOP_UNLOCK1(*tvpp);
+
+relock:
+	error = vn_lock(sdvp, LK_EXCLUSIVE);
+	if (error)
+		goto out;
+	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
+	if (error != 0) {
+		VOP_UNLOCK1(sdvp);
+		if (error != EBUSY)
+			goto out;
+		error = vn_lock(tdvp, LK_EXCLUSIVE);
+		if (error)
+			goto out;
+		VOP_UNLOCK1(tdvp);
+		goto relock;
+	}
+	tdzp = VTOZ(tdvp);
+	sdzp = VTOZ(sdvp);
+
+	error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
+	if (error != 0) {
+		VOP_UNLOCK1(sdvp);
+		VOP_UNLOCK1(tdvp);
+		goto out;
+	}
+	svp = ZTOV(szp);
+	tvp = tzp != NULL ? ZTOV(tzp) : NULL;
+
+	/*
+	 * Now try acquire locks on svp and tvp.
+	 */
+	nvp = svp;
+	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
+	if (error != 0) {
+		VOP_UNLOCK1(sdvp);
+		VOP_UNLOCK1(tdvp);
+		if (tvp != NULL)
+			vrele(tvp);
+		if (error != EBUSY) {
+			vrele(nvp);
+			goto out;
+		}
+		error = vn_lock(nvp, LK_EXCLUSIVE);
+		if (error != 0) {
+			vrele(nvp);
+			goto out;
+		}
+		VOP_UNLOCK1(nvp);
+		/*
+		 * Concurrent rename race.
+		 * XXX ?
+		 */
+		if (nvp == tdvp) {
+			vrele(nvp);
+			error = SET_ERROR(EINVAL);
+			goto out;
+		}
+		vrele(*svpp);
+		*svpp = nvp;
+		goto relock;
+	}
+	vrele(*svpp);
+	*svpp = nvp;
+
+	if (*tvpp != NULL)
+		vrele(*tvpp);
+	*tvpp = NULL;
+	if (tvp != NULL) {
+		nvp = tvp;
+		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
+		if (error != 0) {
+			VOP_UNLOCK1(sdvp);
+			VOP_UNLOCK1(tdvp);
+			VOP_UNLOCK1(*svpp);
+			if (error != EBUSY) {
+				vrele(nvp);
+				goto out;
+			}
+			error = vn_lock(nvp, LK_EXCLUSIVE);
+			if (error != 0) {
+				vrele(nvp);
+				goto out;
+			}
+			vput(nvp);
+			goto relock;
+		}
+		*tvpp = nvp;
+	}
+
+	return (0);
+
+out:
+	return (error);
+}
+
+/*
+ * Note that we must use VRELE_ASYNC in this function as it walks
+ * up the directory tree and vrele may need to acquire an exclusive
+ * lock if a last reference to a vnode is dropped.
+ */
+static int
+zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
+{
+	zfsvfs_t	*zfsvfs;
+	znode_t		*zp, *zp1;
+	uint64_t	parent;
+	int		error;
+
+	zfsvfs = tdzp->z_zfsvfs;
+	if (tdzp == szp)
+		return (SET_ERROR(EINVAL));
+	if (tdzp == sdzp)
+		return (0);
+	if (tdzp->z_id == zfsvfs->z_root)
+		return (0);
+	zp = tdzp;
+	for (;;) {
+		ASSERT(!zp->z_unlinked);
+		if ((error = sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
+			break;
+
+		if (parent == szp->z_id) {
+			error = SET_ERROR(EINVAL);
+			break;
+		}
+		if (parent == zfsvfs->z_root)
+			break;
+		if (parent == sdzp->z_id)
+			break;
+
+		error = zfs_zget(zfsvfs, parent, &zp1);
+		if (error != 0)
+			break;
+
+		if (zp != tdzp)
+			VN_RELE_ASYNC(ZTOV(zp),
+			    dsl_pool_zrele_taskq(
+			    dmu_objset_pool(zfsvfs->z_os)));
+		zp = zp1;
+	}
+
+	if (error == ENOTDIR)
+		panic("checkpath: .. not a directory\n");
+	if (zp != tdzp)
+		VN_RELE_ASYNC(ZTOV(zp),
+		    dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
+	return (error);
+}
+
+#if	__FreeBSD_version < 1300124
+static void
+cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
+    struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
+{
+
+	cache_purge(fvp);
+	if (tvp != NULL)
+		cache_purge(tvp);
+	cache_purge_negative(tdvp);
+}
+#endif
+
+static int
+zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
+    vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
+    cred_t *cr);
+
+/*
+ * Move an entry from the provided source directory to the target
+ * directory.  Change the entry name as indicated.
+ *
+ *	IN:	sdvp	- Source directory containing the "old entry".
+ *		scnp	- Old entry name.
+ *		tdvp	- Target directory to contain the "new entry".
+ *		tcnp	- New entry name.
+ *		cr	- credentials of caller.
+ *	INOUT:	svpp	- Source file
+ *		tvpp	- Target file, may point to NULL initially
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	sdvp,tdvp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+static int
+zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
+    vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
+    cred_t *cr)
+{
+	int	error;
+
+	ASSERT_VOP_ELOCKED(tdvp, __func__);
+	if (*tvpp != NULL)
+		ASSERT_VOP_ELOCKED(*tvpp, __func__);
+
+	/* Reject renames across filesystems. */
+	if ((*svpp)->v_mount != tdvp->v_mount ||
+	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
+		error = SET_ERROR(EXDEV);
+		goto out;
+	}
+
+	if (zfsctl_is_node(tdvp)) {
+		error = SET_ERROR(EXDEV);
+		goto out;
+	}
+
+	/*
+	 * Lock all four vnodes to ensure safety and semantics of renaming.
+	 */
+	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
+	if (error != 0) {
+		/* no vnodes are locked in the case of error here */
+		return (error);
+	}
+
+	error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
+	VOP_UNLOCK1(sdvp);
+	VOP_UNLOCK1(*svpp);
+out:
+	if (*tvpp != NULL)
+		VOP_UNLOCK1(*tvpp);
+	if (tdvp != *tvpp)
+		VOP_UNLOCK1(tdvp);
+
+	return (error);
+}
+
+static int
+zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
+    vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
+    cred_t *cr)
+{
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs;
+	zilog_t		*zilog;
+	znode_t		*tdzp, *sdzp, *tzp, *szp;
+	const char	*snm = scnp->cn_nameptr;
+	const char	*tnm = tcnp->cn_nameptr;
+	int		error;
+
+	tdzp = VTOZ(tdvp);
+	sdzp = VTOZ(sdvp);
+	zfsvfs = tdzp->z_zfsvfs;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(tdzp);
+	ZFS_VERIFY_ZP(sdzp);
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(tnm,
+	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		error = SET_ERROR(EILSEQ);
+		goto out;
+	}
+
+	/* If source and target are the same file, there is nothing to do. */
+	if ((*svpp) == (*tvpp)) {
+		error = 0;
+		goto out;
+	}
+
+	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
+	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
+	    (*tvpp)->v_mountedhere != NULL)) {
+		error = SET_ERROR(EXDEV);
+		goto out;
+	}
+
+	szp = VTOZ(*svpp);
+	ZFS_VERIFY_ZP(szp);
+	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
+	if (tzp != NULL)
+		ZFS_VERIFY_ZP(tzp);
+
+	/*
+	 * This is to prevent the creation of links into attribute space
+	 * by renaming a linked file into/outof an attribute directory.
+	 * See the comment in zfs_link() for why this is considered bad.
+	 */
+	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
+		error = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	/*
+	 * If we are using project inheritance, means if the directory has
+	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
+	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
+	 * such case, we only allow renames into our tree when the project
+	 * IDs are the same.
+	 */
+	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
+	    tdzp->z_projid != szp->z_projid) {
+		error = SET_ERROR(EXDEV);
+		goto out;
+	}
+
+	/*
+	 * Must have write access at the source to remove the old entry
+	 * and write access at the target to create the new entry.
+	 * Note that if target and source are the same, this can be
+	 * done in a single check.
+	 */
+	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+		goto out;
+
+	if ((*svpp)->v_type == VDIR) {
+		/*
+		 * Avoid ".", "..", and aliases of "." for obvious reasons.
+		 */
+		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
+		    sdzp == szp ||
+		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
+			error = EINVAL;
+			goto out;
+		}
+
+		/*
+		 * Check to make sure rename is valid.
+		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
+		 */
+		if ((error = zfs_rename_check(szp, sdzp, tdzp)))
+			goto out;
+	}
+
+	/*
+	 * Does target exist?
+	 */
+	if (tzp) {
+		/*
+		 * Source and target must be the same type.
+		 */
+		if ((*svpp)->v_type == VDIR) {
+			if ((*tvpp)->v_type != VDIR) {
+				error = SET_ERROR(ENOTDIR);
+				goto out;
+			} else {
+				cache_purge(tdvp);
+				if (sdvp != tdvp)
+					cache_purge(sdvp);
+			}
+		} else {
+			if ((*tvpp)->v_type == VDIR) {
+				error = SET_ERROR(EISDIR);
+				goto out;
+			}
+		}
+	}
+
+	vn_seqc_write_begin(*svpp);
+	vn_seqc_write_begin(sdvp);
+	if (*tvpp != NULL)
+		vn_seqc_write_begin(*tvpp);
+	if (tdvp != *tvpp)
+		vn_seqc_write_begin(tdvp);
+
+	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
+	if (tzp)
+		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
+
+	/*
+	 * notify the target directory if it is not the same
+	 * as source directory.
+	 */
+	if (tdvp != sdvp) {
+		vnevent_rename_dest_dir(tdvp, ct);
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
+	if (sdzp != tdzp) {
+		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tdzp);
+	}
+	if (tzp) {
+		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tzp);
+	}
+
+	zfs_sa_upgrade_txholds(tx, szp);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		goto out_seq;
+	}
+
+	if (tzp)	/* Attempt to remove the existing target */
+		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
+
+	if (error == 0) {
+		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
+		if (error == 0) {
+			szp->z_pflags |= ZFS_AV_MODIFIED;
+
+			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+			ASSERT0(error);
+
+			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
+			    NULL);
+			if (error == 0) {
+				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
+				    snm, tdzp, tnm, szp);
+
+				/*
+				 * Update path information for the target vnode
+				 */
+				vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
+			} else {
+				/*
+				 * At this point, we have successfully created
+				 * the target name, but have failed to remove
+				 * the source name.  Since the create was done
+				 * with the ZRENAMING flag, there are
+				 * complications; for one, the link count is
+				 * wrong.  The easiest way to deal with this
+				 * is to remove the newly created target, and
+				 * return the original error.  This must
+				 * succeed; fortunately, it is very unlikely to
+				 * fail, since we just created it.
+				 */
+				VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
+				    ZRENAMING, NULL));
+			}
+		}
+		if (error == 0) {
+			cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
+		}
+	}
+
+	dmu_tx_commit(tx);
+
+out_seq:
+	vn_seqc_write_end(*svpp);
+	vn_seqc_write_end(sdvp);
+	if (*tvpp != NULL)
+		vn_seqc_write_end(*tvpp);
+	if (tdvp != *tvpp)
+		vn_seqc_write_end(tdvp);
+
+out:
+	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+int
+zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
+    cred_t *cr, int flags)
+{
+	struct componentname scn, tcn;
+	vnode_t *sdvp, *tdvp;
+	vnode_t *svp, *tvp;
+	int error;
+	svp = tvp = NULL;
+
+	sdvp = ZTOV(sdzp);
+	tdvp = ZTOV(tdzp);
+	error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
+	if (sdzp->z_zfsvfs->z_replay == B_FALSE)
+		VOP_UNLOCK1(sdvp);
+	if (error != 0)
+		goto fail;
+	VOP_UNLOCK1(svp);
+
+	vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
+	error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
+	if (error == EJUSTRETURN)
+		tvp = NULL;
+	else if (error != 0) {
+		VOP_UNLOCK1(tdvp);
+		goto fail;
+	}
+
+	error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
+fail:
+	if (svp != NULL)
+		vrele(svp);
+	if (tvp != NULL)
+		vrele(tvp);
+
+	return (error);
+}
+
+/*
+ * Insert the indicated symbolic reference entry into the directory.
+ *
+ *	IN:	dvp	- Directory to contain new symbolic link.
+ *		link	- Name for new symlink entry.
+ *		vap	- Attributes of new entry.
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dvp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
+    const char *link, znode_t **zpp, cred_t *cr, int flags)
+{
+	znode_t		*zp;
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zilog_t		*zilog;
+	uint64_t	len = strlen(link);
+	int		error;
+	zfs_acl_ids_t	acl_ids;
+	boolean_t	fuid_dirtied;
+	uint64_t	txtype = TX_SYMLINK;
+
+	ASSERT3S(vap->va_type, ==, VLNK);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (len > MAXPATHLEN) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENAMETOOLONG));
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0,
+	    vap, cr, NULL, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Attempt to lock directory; fail if entry already exists.
+	 */
+	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
+	    0 /* projid */)) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	getnewvnode_reserve_();
+	tx = dmu_tx_create(zfsvfs->z_os);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE + len);
+	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		getnewvnode_drop_reserve();
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Create a new object for the symlink.
+	 * for version 4 ZPL datasets the symlink will be an SA attribute
+	 */
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	if (zp->z_is_sa)
+		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
+		    __DECONST(void *, link), len, tx);
+	else
+		zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
+
+	zp->z_size = len;
+	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx);
+	/*
+	 * Insert the new object into the directory.
+	 */
+	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
+
+	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
+	*zpp = zp;
+
+	zfs_acl_ids_free(&acl_ids);
+
+	dmu_tx_commit(tx);
+
+	getnewvnode_drop_reserve();
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Return, in the buffer contained in the provided uio structure,
+ * the symbolic path referred to by vp.
+ *
+ *	IN:	vp	- vnode of symbolic link.
+ *		uio	- structure to contain the link path.
+ *		cr	- credentials of caller.
+ *		ct	- caller context
+ *
+ *	OUT:	uio	- structure containing the link path.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	vp - atime updated
+ */
+/* ARGSUSED */
+static int
+zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
+{
+	znode_t		*zp = VTOZ(vp);
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (zp->z_is_sa)
+		error = sa_lookup_uio(zp->z_sa_hdl,
+		    SA_ZPL_SYMLINK(zfsvfs), uio);
+	else
+		error = zfs_sa_readlink(zp, uio);
+
+	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Insert a new entry into directory tdvp referencing svp.
+ *
+ *	IN:	tdvp	- Directory to contain new entry.
+ *		svp	- vnode of new entry.
+ *		name	- name of new entry.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	tdvp - ctime|mtime updated
+ *	 svp - ctime updated
+ */
+/* ARGSUSED */
+int
+zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
+    int flags)
+{
+	znode_t		*tzp;
+	zfsvfs_t	*zfsvfs = tdzp->z_zfsvfs;
+	zilog_t		*zilog;
+	dmu_tx_t	*tx;
+	int		error;
+	uint64_t	parent;
+	uid_t		owner;
+
+	ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(tdzp);
+	zilog = zfsvfs->z_log;
+
+	/*
+	 * POSIX dictates that we return EPERM here.
+	 * Better choices include ENOTSUP or EISDIR.
+	 */
+	if (ZTOV(szp)->v_type == VDIR) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	ZFS_VERIFY_ZP(szp);
+
+	/*
+	 * If we are using project inheritance, means if the directory has
+	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
+	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
+	 * such case, we only allow hard link creation in our tree when the
+	 * project IDs are the same.
+	 */
+	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
+	    tdzp->z_projid != szp->z_projid) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EXDEV));
+	}
+
+	if (szp->z_pflags & (ZFS_APPENDONLY |
+	    ZFS_IMMUTABLE | ZFS_READONLY)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	/* Prevent links to .zfs/shares files */
+
+	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (uint64_t))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	if (parent == zfsvfs->z_shares_dir) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(name,
+	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	/*
+	 * We do not support links between attributes and non-attributes
+	 * because of the potential security risk of creating links
+	 * into "normal" file space in order to circumvent restrictions
+	 * imposed in attribute space.
+	 */
+	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+
+	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
+	if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Attempt to lock directory; fail if entry already exists.
+	 */
+	error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
+	if (error) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
+	zfs_sa_upgrade_txholds(tx, szp);
+	zfs_sa_upgrade_txholds(tx, tdzp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	error = zfs_link_create(tdzp, name, szp, tx, 0);
+
+	if (error == 0) {
+		uint64_t txtype = TX_LINK;
+		zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
+	}
+
+	dmu_tx_commit(tx);
+
+	if (error == 0) {
+		vnevent_link(ZTOV(szp), ct);
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Free or allocate space in a file.  Currently, this function only
+ * supports the `F_FREESP' command.  However, this command is somewhat
+ * misnamed, as its functionality includes the ability to allocate as
+ * well as free space.
+ *
+ *	IN:	ip	- inode of file to free data in.
+ *		cmd	- action to take (only F_FREESP supported).
+ *		bfp	- section of file to free/alloc.
+ *		flag	- current file open mode flags.
+ *		offset	- current file offset.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	ip - ctime|mtime updated
+ */
+/* ARGSUSED */
+int
+zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
+    offset_t offset, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	uint64_t	off, len;
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (cmd != F_FREESP) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Callers might not be able to detect properly that we are read-only,
+	 * so check it explicitly here.
+	 */
+	if (zfs_is_readonly(zfsvfs)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EROFS));
+	}
+
+	if (bfp->l_len < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Permissions aren't checked on Solaris because on this OS
+	 * zfs_space() can only be called with an opened file handle.
+	 * On Linux we can get here through truncate_range() which
+	 * operates directly on inodes, so we need to check access rights.
+	 */
+	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	off = bfp->l_start;
+	len = bfp->l_len; /* 0 means from off to end of file */
+
+	error = zfs_freesp(zp, off, len, flag, TRUE);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*ARGSUSED*/
+static void
+zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error;
+
+	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
+	if (zp->z_sa_hdl == NULL) {
+		/*
+		 * The fs has been unmounted, or we did a
+		 * suspend/resume and this file no longer exists.
+		 */
+		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
+		vrecycle(vp);
+		return;
+	}
+
+	if (zp->z_unlinked) {
+		/*
+		 * Fast path to recycle a vnode of a removed file.
+		 */
+		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
+		vrecycle(vp);
+		return;
+	}
+
+	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
+		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
+
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, zp);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+		} else {
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
+			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
+			zp->z_atime_dirty = 0;
+			dmu_tx_commit(tx);
+		}
+	}
+	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
+}
+
+
+CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid));
+CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid));
+
+/*ARGSUSED*/
+static int
+zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
+{
+	znode_t		*zp = VTOZ(vp);
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	uint32_t	gen;
+	uint64_t	gen64;
+	uint64_t	object = zp->z_id;
+	zfid_short_t	*zfid;
+	int		size, i, error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
+	    &gen64, sizeof (uint64_t))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	gen = (uint32_t)gen64;
+
+	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
+	fidp->fid_len = size;
+
+	zfid = (zfid_short_t *)fidp;
+
+	zfid->zf_len = size;
+
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
+
+	/* Must have a non-zero generation number to distinguish from .zfs */
+	if (gen == 0)
+		gen = 1;
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
+
+	if (size == LONG_FID_LEN) {
+		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
+		zfid_long_t	*zlfid;
+
+		zlfid = (zfid_long_t *)fidp;
+
+		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
+			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
+
+		/* XXX - this should be the generation number for the objset */
+		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
+			zlfid->zf_setgen[i] = 0;
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+static int
+zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
+    caller_context_t *ct)
+{
+	znode_t *zp;
+	zfsvfs_t *zfsvfs;
+
+	switch (cmd) {
+	case _PC_LINK_MAX:
+		*valp = MIN(LONG_MAX, ZFS_LINK_MAX);
+		return (0);
+
+	case _PC_FILESIZEBITS:
+		*valp = 64;
+		return (0);
+	case _PC_MIN_HOLE_SIZE:
+		*valp = (int)SPA_MINBLOCKSIZE;
+		return (0);
+	case _PC_ACL_EXTENDED:
+#if 0		/* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
+		zp = VTOZ(vp);
+		zfsvfs = zp->z_zfsvfs;
+		ZFS_ENTER(zfsvfs);
+		ZFS_VERIFY_ZP(zp);
+		*valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
+		ZFS_EXIT(zfsvfs);
+#else
+		*valp = 0;
+#endif
+		return (0);
+
+	case _PC_ACL_NFS4:
+		zp = VTOZ(vp);
+		zfsvfs = zp->z_zfsvfs;
+		ZFS_ENTER(zfsvfs);
+		ZFS_VERIFY_ZP(zp);
+		*valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
+		ZFS_EXIT(zfsvfs);
+		return (0);
+
+	case _PC_ACL_PATH_MAX:
+		*valp = ACL_MAX_ENTRIES;
+		return (0);
+
+	default:
+		return (EOPNOTSUPP);
+	}
+}
+
+static int
+zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
+    int *rahead)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zfs_locked_range_t *lr;
+	vm_object_t object;
+	off_t start, end, obj_size;
+	uint_t blksz;
+	int pgsin_b, pgsin_a;
+	int error;
+
+	ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
+	ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
+
+	start = IDX_TO_OFF(ma[0]->pindex);
+	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
+
+	/*
+	 * Lock a range covering all required and optional pages.
+	 * Note that we need to handle the case of the block size growing.
+	 */
+	for (;;) {
+		blksz = zp->z_blksz;
+		lr = zfs_rangelock_tryenter(&zp->z_rangelock,
+		    rounddown(start, blksz),
+		    roundup(end, blksz) - rounddown(start, blksz), RL_READER);
+		if (lr == NULL) {
+			if (rahead != NULL) {
+				*rahead = 0;
+				rahead = NULL;
+			}
+			if (rbehind != NULL) {
+				*rbehind = 0;
+				rbehind = NULL;
+			}
+			break;
+		}
+		if (blksz == zp->z_blksz)
+			break;
+		zfs_rangelock_exit(lr);
+	}
+
+	object = ma[0]->object;
+	zfs_vmobject_wlock(object);
+	obj_size = object->un_pager.vnp.vnp_size;
+	zfs_vmobject_wunlock(object);
+	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
+		if (lr != NULL)
+			zfs_rangelock_exit(lr);
+		ZFS_EXIT(zfsvfs);
+		return (zfs_vm_pagerret_bad);
+	}
+
+	pgsin_b = 0;
+	if (rbehind != NULL) {
+		pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
+		pgsin_b = MIN(*rbehind, pgsin_b);
+	}
+
+	pgsin_a = 0;
+	if (rahead != NULL) {
+		pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
+		if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
+			pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
+		pgsin_a = MIN(*rahead, pgsin_a);
+	}
+
+	/*
+	 * NB: we need to pass the exact byte size of the data that we expect
+	 * to read after accounting for the file size.  This is required because
+	 * ZFS will panic if we request DMU to read beyond the end of the last
+	 * allocated block.
+	 */
+	error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
+	    &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
+
+	if (lr != NULL)
+		zfs_rangelock_exit(lr);
+	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
+
+	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
+
+	ZFS_EXIT(zfsvfs);
+
+	if (error != 0)
+		return (zfs_vm_pagerret_error);
+
+	VM_CNT_INC(v_vnodein);
+	VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
+	if (rbehind != NULL)
+		*rbehind = pgsin_b;
+	if (rahead != NULL)
+		*rahead = pgsin_a;
+	return (zfs_vm_pagerret_ok);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_getpages_args {
+	struct vnode *a_vp;
+	vm_page_t *a_m;
+	int a_count;
+	int *a_rbehind;
+	int *a_rahead;
+};
+#endif
+
+static int
+zfs_freebsd_getpages(struct vop_getpages_args *ap)
+{
+
+	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
+	    ap->a_rahead));
+}
+
+static int
+zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
+    int *rtvals)
+{
+	znode_t		*zp = VTOZ(vp);
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	zfs_locked_range_t		*lr;
+	dmu_tx_t	*tx;
+	struct sf_buf	*sf;
+	vm_object_t	object;
+	vm_page_t	m;
+	caddr_t		va;
+	size_t		tocopy;
+	size_t		lo_len;
+	vm_ooffset_t	lo_off;
+	vm_ooffset_t	off;
+	uint_t		blksz;
+	int		ncount;
+	int		pcount;
+	int		err;
+	int		i;
+
+	object = vp->v_object;
+	KASSERT(ma[0]->object == object, ("mismatching object"));
+	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
+
+	pcount = btoc(len);
+	ncount = pcount;
+	for (i = 0; i < pcount; i++)
+		rtvals[i] = zfs_vm_pagerret_error;
+
+	ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
+	ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
+
+	off = IDX_TO_OFF(ma[0]->pindex);
+	blksz = zp->z_blksz;
+	lo_off = rounddown(off, blksz);
+	lo_len = roundup(len + (off - lo_off), blksz);
+	lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
+
+	zfs_vmobject_wlock(object);
+	if (len + off > object->un_pager.vnp.vnp_size) {
+		if (object->un_pager.vnp.vnp_size > off) {
+			int pgoff;
+
+			len = object->un_pager.vnp.vnp_size - off;
+			ncount = btoc(len);
+			if ((pgoff = (int)len & PAGE_MASK) != 0) {
+				/*
+				 * If the object is locked and the following
+				 * conditions hold, then the page's dirty
+				 * field cannot be concurrently changed by a
+				 * pmap operation.
+				 */
+				m = ma[ncount - 1];
+				vm_page_assert_sbusied(m);
+				KASSERT(!pmap_page_is_write_mapped(m),
+				    ("zfs_putpages: page %p is not read-only",
+				    m));
+				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
+				    pgoff);
+			}
+		} else {
+			len = 0;
+			ncount = 0;
+		}
+		if (ncount < pcount) {
+			for (i = ncount; i < pcount; i++) {
+				rtvals[i] = zfs_vm_pagerret_bad;
+			}
+		}
+	}
+	zfs_vmobject_wunlock(object);
+
+	if (ncount == 0)
+		goto out;
+
+	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
+	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
+	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
+	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
+	    zp->z_projid))) {
+		goto out;
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_write(tx, zp->z_id, off, len);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err != 0) {
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
+	if (zp->z_blksz < PAGE_SIZE) {
+		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
+			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
+			va = zfs_map_page(ma[i], &sf);
+			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
+			zfs_unmap_page(sf);
+		}
+	} else {
+		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
+	}
+
+	if (err == 0) {
+		uint64_t mtime[2], ctime[2];
+		sa_bulk_attr_t bulk[3];
+		int count = 0;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    &mtime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+		    &zp->z_pflags, 8);
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		ASSERT0(err);
+		/*
+		 * XXX we should be passing a callback to undirty
+		 * but that would make the locking messier
+		 */
+		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
+		    len, 0, NULL, NULL);
+
+		zfs_vmobject_wlock(object);
+		for (i = 0; i < ncount; i++) {
+			rtvals[i] = zfs_vm_pagerret_ok;
+			vm_page_undirty(ma[i]);
+		}
+		zfs_vmobject_wunlock(object);
+		VM_CNT_INC(v_vnodeout);
+		VM_CNT_ADD(v_vnodepgsout, ncount);
+	}
+	dmu_tx_commit(tx);
+
+out:
+	zfs_rangelock_exit(lr);
+	if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
+	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zfsvfs->z_log, zp->z_id);
+
+	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
+
+	ZFS_EXIT(zfsvfs);
+	return (rtvals[0]);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_putpages_args {
+	struct vnode *a_vp;
+	vm_page_t *a_m;
+	int a_count;
+	int a_sync;
+	int *a_rtvals;
+};
+#endif
+
+static int
+zfs_freebsd_putpages(struct vop_putpages_args *ap)
+{
+
+	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
+	    ap->a_rtvals));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_bmap_args {
+	struct vnode *a_vp;
+	daddr_t  a_bn;
+	struct bufobj **a_bop;
+	daddr_t *a_bnp;
+	int *a_runp;
+	int *a_runb;
+};
+#endif
+
+static int
+zfs_freebsd_bmap(struct vop_bmap_args *ap)
+{
+
+	if (ap->a_bop != NULL)
+		*ap->a_bop = &ap->a_vp->v_bufobj;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	if (ap->a_runp != NULL)
+		*ap->a_runp = 0;
+	if (ap->a_runb != NULL)
+		*ap->a_runb = 0;
+
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_open_args {
+	struct vnode *a_vp;
+	int a_mode;
+	struct ucred *a_cred;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_open(struct vop_open_args *ap)
+{
+	vnode_t	*vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	int error;
+
+	error = zfs_open(&vp, ap->a_mode, ap->a_cred);
+	if (error == 0)
+		vnode_create_vobject(vp, zp->z_size, ap->a_td);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_close_args {
+	struct vnode *a_vp;
+	int  a_fflag;
+	struct ucred *a_cred;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_close(struct vop_close_args *ap)
+{
+
+	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_ioctl_args {
+	struct vnode *a_vp;
+	ulong_t a_command;
+	caddr_t a_data;
+	int a_fflag;
+	struct ucred *cred;
+	struct thread *td;
+};
+#endif
+
+static int
+zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
+{
+
+	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
+	    ap->a_fflag, ap->a_cred, NULL));
+}
+
+static int
+ioflags(int ioflags)
+{
+	int flags = 0;
+
+	if (ioflags & IO_APPEND)
+		flags |= FAPPEND;
+	if (ioflags & IO_NDELAY)
+		flags |= FNONBLOCK;
+	if (ioflags & IO_SYNC)
+		flags |= (FSYNC | FDSYNC | FRSYNC);
+
+	return (flags);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_read_args {
+	struct vnode *a_vp;
+	struct uio *a_uio;
+	int a_ioflag;
+	struct ucred *a_cred;
+};
+#endif
+
+static int
+zfs_freebsd_read(struct vop_read_args *ap)
+{
+	zfs_uio_t uio;
+	zfs_uio_init(&uio, ap->a_uio);
+	return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
+	    ap->a_cred));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_write_args {
+	struct vnode *a_vp;
+	struct uio *a_uio;
+	int a_ioflag;
+	struct ucred *a_cred;
+};
+#endif
+
+static int
+zfs_freebsd_write(struct vop_write_args *ap)
+{
+	zfs_uio_t uio;
+	zfs_uio_init(&uio, ap->a_uio);
+	return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
+	    ap->a_cred));
+}
+
+#if __FreeBSD_version >= 1300102
+/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ */
+static int
+zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
+{
+	vnode_t *vp;
+	znode_t *zp;
+	uint64_t pflags;
+
+	vp = v->a_vp;
+	zp = VTOZ_SMR(vp);
+	if (__predict_false(zp == NULL))
+		return (EAGAIN);
+	pflags = atomic_load_64(&zp->z_pflags);
+	if (pflags & ZFS_AV_QUARANTINED)
+		return (EAGAIN);
+	if (pflags & ZFS_XATTR)
+		return (EAGAIN);
+	if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
+		return (EAGAIN);
+	return (0);
+}
+#endif
+
+#if __FreeBSD_version >= 1300139
+static int
+zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
+{
+	vnode_t *vp;
+	znode_t *zp;
+	char *target;
+
+	vp = v->a_vp;
+	zp = VTOZ_SMR(vp);
+	if (__predict_false(zp == NULL)) {
+		return (EAGAIN);
+	}
+
+	target = atomic_load_consume_ptr(&zp->z_cached_symlink);
+	if (target == NULL) {
+		return (EAGAIN);
+	}
+	return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
+}
+#endif
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_access_args {
+	struct vnode *a_vp;
+	accmode_t a_accmode;
+	struct ucred *a_cred;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_access(struct vop_access_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	accmode_t accmode;
+	int error = 0;
+
+
+	if (ap->a_accmode == VEXEC) {
+		if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
+			return (0);
+	}
+
+	/*
+	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
+	 */
+	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
+	if (accmode != 0)
+		error = zfs_access(zp, accmode, 0, ap->a_cred);
+
+	/*
+	 * VADMIN has to be handled by vaccess().
+	 */
+	if (error == 0) {
+		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
+		if (accmode != 0) {
+#if __FreeBSD_version >= 1300105
+			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
+			    zp->z_gid, accmode, ap->a_cred);
+#else
+			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
+			    zp->z_gid, accmode, ap->a_cred, NULL);
+#endif
+		}
+	}
+
+	/*
+	 * For VEXEC, ensure that at least one execute bit is set for
+	 * non-directories.
+	 */
+	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
+	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
+		error = EACCES;
+	}
+
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_lookup_args {
+	struct vnode *a_dvp;
+	struct vnode **a_vpp;
+	struct componentname *a_cnp;
+};
+#endif
+
+static int
+zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
+{
+	struct componentname *cnp = ap->a_cnp;
+	char nm[NAME_MAX + 1];
+
+	ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
+	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
+
+	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
+	    cnp->cn_cred, 0, cached));
+}
+
+static int
+zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
+{
+
+	return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_lookup_args {
+	struct vnode *a_dvp;
+	struct vnode **a_vpp;
+	struct componentname *a_cnp;
+};
+#endif
+
+static int
+zfs_cache_lookup(struct vop_lookup_args *ap)
+{
+	zfsvfs_t *zfsvfs;
+
+	zfsvfs = ap->a_dvp->v_mount->mnt_data;
+	if (zfsvfs->z_use_namecache)
+		return (vfs_cache_lookup(ap));
+	else
+		return (zfs_freebsd_lookup(ap, B_FALSE));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_create_args {
+	struct vnode *a_dvp;
+	struct vnode **a_vpp;
+	struct componentname *a_cnp;
+	struct vattr *a_vap;
+};
+#endif
+
+static int
+zfs_freebsd_create(struct vop_create_args *ap)
+{
+	zfsvfs_t *zfsvfs;
+	struct componentname *cnp = ap->a_cnp;
+	vattr_t *vap = ap->a_vap;
+	znode_t *zp = NULL;
+	int rc, mode;
+
+#if __FreeBSD_version < 1400068
+	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
+
+	vattr_init_mask(vap);
+	mode = vap->va_mode & ALLPERMS;
+	zfsvfs = ap->a_dvp->v_mount->mnt_data;
+	*ap->a_vpp = NULL;
+
+	rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode,
+	    &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
+	if (rc == 0)
+		*ap->a_vpp = ZTOV(zp);
+	if (zfsvfs->z_use_namecache &&
+	    rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
+		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
+
+	return (rc);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_remove_args {
+	struct vnode *a_dvp;
+	struct vnode *a_vp;
+	struct componentname *a_cnp;
+};
+#endif
+
+static int
+zfs_freebsd_remove(struct vop_remove_args *ap)
+{
+
+#if __FreeBSD_version < 1400068
+	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
+
+	return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
+	    ap->a_cnp->cn_cred));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_mkdir_args {
+	struct vnode *a_dvp;
+	struct vnode **a_vpp;
+	struct componentname *a_cnp;
+	struct vattr *a_vap;
+};
+#endif
+
+static int
+zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
+{
+	vattr_t *vap = ap->a_vap;
+	znode_t *zp = NULL;
+	int rc;
+
+#if __FreeBSD_version < 1400068
+	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
+
+	vattr_init_mask(vap);
+	*ap->a_vpp = NULL;
+
+	rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
+	    ap->a_cnp->cn_cred, 0, NULL);
+
+	if (rc == 0)
+		*ap->a_vpp = ZTOV(zp);
+	return (rc);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_rmdir_args {
+	struct vnode *a_dvp;
+	struct vnode *a_vp;
+	struct componentname *a_cnp;
+};
+#endif
+
+static int
+zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
+{
+	struct componentname *cnp = ap->a_cnp;
+
+#if __FreeBSD_version < 1400068
+	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
+
+	return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_readdir_args {
+	struct vnode *a_vp;
+	struct uio *a_uio;
+	struct ucred *a_cred;
+	int *a_eofflag;
+	int *a_ncookies;
+	cookie_t **a_cookies;
+};
+#endif
+
+static int
+zfs_freebsd_readdir(struct vop_readdir_args *ap)
+{
+	zfs_uio_t uio;
+	zfs_uio_init(&uio, ap->a_uio);
+	return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
+	    ap->a_ncookies, ap->a_cookies));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_fsync_args {
+	struct vnode *a_vp;
+	int a_waitfor;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_fsync(struct vop_fsync_args *ap)
+{
+
+	vop_stdfsync(ap);
+	return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_getattr_args {
+	struct vnode *a_vp;
+	struct vattr *a_vap;
+	struct ucred *a_cred;
+};
+#endif
+
+static int
+zfs_freebsd_getattr(struct vop_getattr_args *ap)
+{
+	vattr_t *vap = ap->a_vap;
+	xvattr_t xvap;
+	ulong_t fflags = 0;
+	int error;
+
+	xva_init(&xvap);
+	xvap.xva_vattr = *vap;
+	xvap.xva_vattr.va_mask |= AT_XVATTR;
+
+	/* Convert chflags into ZFS-type flags. */
+	/* XXX: what about SF_SETTABLE?. */
+	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
+	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
+	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
+	XVA_SET_REQ(&xvap, XAT_NODUMP);
+	XVA_SET_REQ(&xvap, XAT_READONLY);
+	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
+	XVA_SET_REQ(&xvap, XAT_SYSTEM);
+	XVA_SET_REQ(&xvap, XAT_HIDDEN);
+	XVA_SET_REQ(&xvap, XAT_REPARSE);
+	XVA_SET_REQ(&xvap, XAT_OFFLINE);
+	XVA_SET_REQ(&xvap, XAT_SPARSE);
+
+	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
+	if (error != 0)
+		return (error);
+
+	/* Convert ZFS xattr into chflags. */
+#define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
+	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
+		fflags |= (fflag);					\
+} while (0)
+	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
+	    xvap.xva_xoptattrs.xoa_immutable);
+	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
+	    xvap.xva_xoptattrs.xoa_appendonly);
+	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
+	    xvap.xva_xoptattrs.xoa_nounlink);
+	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
+	    xvap.xva_xoptattrs.xoa_archive);
+	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
+	    xvap.xva_xoptattrs.xoa_nodump);
+	FLAG_CHECK(UF_READONLY, XAT_READONLY,
+	    xvap.xva_xoptattrs.xoa_readonly);
+	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
+	    xvap.xva_xoptattrs.xoa_system);
+	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
+	    xvap.xva_xoptattrs.xoa_hidden);
+	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
+	    xvap.xva_xoptattrs.xoa_reparse);
+	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
+	    xvap.xva_xoptattrs.xoa_offline);
+	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
+	    xvap.xva_xoptattrs.xoa_sparse);
+
+#undef	FLAG_CHECK
+	*vap = xvap.xva_vattr;
+	vap->va_flags = fflags;
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_setattr_args {
+	struct vnode *a_vp;
+	struct vattr *a_vap;
+	struct ucred *a_cred;
+};
+#endif
+
+static int
+zfs_freebsd_setattr(struct vop_setattr_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+	vattr_t *vap = ap->a_vap;
+	cred_t *cred = ap->a_cred;
+	xvattr_t xvap;
+	ulong_t fflags;
+	uint64_t zflags;
+
+	vattr_init_mask(vap);
+	vap->va_mask &= ~AT_NOSET;
+
+	xva_init(&xvap);
+	xvap.xva_vattr = *vap;
+
+	zflags = VTOZ(vp)->z_pflags;
+
+	if (vap->va_flags != VNOVAL) {
+		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
+		int error;
+
+		if (zfsvfs->z_use_fuids == B_FALSE)
+			return (EOPNOTSUPP);
+
+		fflags = vap->va_flags;
+		/*
+		 * XXX KDM
+		 * We need to figure out whether it makes sense to allow
+		 * UF_REPARSE through, since we don't really have other
+		 * facilities to handle reparse points and zfs_setattr()
+		 * doesn't currently allow setting that attribute anyway.
+		 */
+		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
+		    UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
+		    UF_OFFLINE|UF_SPARSE)) != 0)
+			return (EOPNOTSUPP);
+		/*
+		 * Unprivileged processes are not permitted to unset system
+		 * flags, or modify flags if any system flags are set.
+		 * Privileged non-jail processes may not modify system flags
+		 * if securelevel > 0 and any existing system flags are set.
+		 * Privileged jail processes behave like privileged non-jail
+		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
+		 * otherwise, they behave like unprivileged processes.
+		 */
+		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
+		    spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
+			if (zflags &
+			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
+				error = securelevel_gt(cred, 0);
+				if (error != 0)
+					return (error);
+			}
+		} else {
+			/*
+			 * Callers may only modify the file flags on
+			 * objects they have VADMIN rights for.
+			 */
+			if ((error = VOP_ACCESS(vp, VADMIN, cred,
+			    curthread)) != 0)
+				return (error);
+			if (zflags &
+			    (ZFS_IMMUTABLE | ZFS_APPENDONLY |
+			    ZFS_NOUNLINK)) {
+				return (EPERM);
+			}
+			if (fflags &
+			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
+				return (EPERM);
+			}
+		}
+
+#define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
+	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
+	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
+		XVA_SET_REQ(&xvap, (xflag));				\
+		(xfield) = ((fflags & (fflag)) != 0);			\
+	}								\
+} while (0)
+		/* Convert chflags into ZFS-type flags. */
+		/* XXX: what about SF_SETTABLE?. */
+		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
+		    xvap.xva_xoptattrs.xoa_immutable);
+		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
+		    xvap.xva_xoptattrs.xoa_appendonly);
+		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
+		    xvap.xva_xoptattrs.xoa_nounlink);
+		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
+		    xvap.xva_xoptattrs.xoa_archive);
+		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
+		    xvap.xva_xoptattrs.xoa_nodump);
+		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
+		    xvap.xva_xoptattrs.xoa_readonly);
+		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
+		    xvap.xva_xoptattrs.xoa_system);
+		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
+		    xvap.xva_xoptattrs.xoa_hidden);
+		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
+		    xvap.xva_xoptattrs.xoa_reparse);
+		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
+		    xvap.xva_xoptattrs.xoa_offline);
+		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
+		    xvap.xva_xoptattrs.xoa_sparse);
+#undef	FLAG_CHANGE
+	}
+	if (vap->va_birthtime.tv_sec != VNOVAL) {
+		xvap.xva_vattr.va_mask |= AT_XVATTR;
+		XVA_SET_REQ(&xvap, XAT_CREATETIME);
+	}
+	return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_rename_args {
+	struct vnode *a_fdvp;
+	struct vnode *a_fvp;
+	struct componentname *a_fcnp;
+	struct vnode *a_tdvp;
+	struct vnode *a_tvp;
+	struct componentname *a_tcnp;
+};
+#endif
+
+static int
+zfs_freebsd_rename(struct vop_rename_args *ap)
+{
+	vnode_t *fdvp = ap->a_fdvp;
+	vnode_t *fvp = ap->a_fvp;
+	vnode_t *tdvp = ap->a_tdvp;
+	vnode_t *tvp = ap->a_tvp;
+	int error;
+
+#if __FreeBSD_version < 1400068
+	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
+	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
+#endif
+
+	error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
+	    ap->a_tcnp, ap->a_fcnp->cn_cred);
+
+	vrele(fdvp);
+	vrele(fvp);
+	vrele(tdvp);
+	if (tvp != NULL)
+		vrele(tvp);
+
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_symlink_args {
+	struct vnode *a_dvp;
+	struct vnode **a_vpp;
+	struct componentname *a_cnp;
+	struct vattr *a_vap;
+	char *a_target;
+};
+#endif
+
+static int
+zfs_freebsd_symlink(struct vop_symlink_args *ap)
+{
+	struct componentname *cnp = ap->a_cnp;
+	vattr_t *vap = ap->a_vap;
+	znode_t *zp = NULL;
+#if __FreeBSD_version >= 1300139
+	char *symlink;
+	size_t symlink_len;
+#endif
+	int rc;
+
+#if __FreeBSD_version < 1400068
+	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
+
+	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
+	vattr_init_mask(vap);
+	*ap->a_vpp = NULL;
+
+	rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
+	    ap->a_target, &zp, cnp->cn_cred, 0 /* flags */);
+	if (rc == 0) {
+		*ap->a_vpp = ZTOV(zp);
+		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+#if __FreeBSD_version >= 1300139
+		MPASS(zp->z_cached_symlink == NULL);
+		symlink_len = strlen(ap->a_target);
+		symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
+		if (symlink != NULL) {
+			memcpy(symlink, ap->a_target, symlink_len);
+			symlink[symlink_len] = '\0';
+			atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
+			    (uintptr_t)symlink);
+		}
+#endif
+	}
+	return (rc);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_readlink_args {
+	struct vnode *a_vp;
+	struct uio *a_uio;
+	struct ucred *a_cred;
+};
+#endif
+
+static int
+zfs_freebsd_readlink(struct vop_readlink_args *ap)
+{
+	zfs_uio_t uio;
+	int error;
+#if __FreeBSD_version >= 1300139
+	znode_t	*zp = VTOZ(ap->a_vp);
+	char *symlink, *base;
+	size_t symlink_len;
+	bool trycache;
+#endif
+
+	zfs_uio_init(&uio, ap->a_uio);
+#if __FreeBSD_version >= 1300139
+	trycache = false;
+	if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
+	    zfs_uio_iovcnt(&uio) == 1) {
+		base = zfs_uio_iovbase(&uio, 0);
+		symlink_len = zfs_uio_iovlen(&uio, 0);
+		trycache = true;
+	}
+#endif
+	error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
+#if __FreeBSD_version >= 1300139
+	if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
+	    error != 0 || !trycache) {
+		return (error);
+	}
+	symlink_len -= zfs_uio_resid(&uio);
+	symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
+	if (symlink != NULL) {
+		memcpy(symlink, base, symlink_len);
+		symlink[symlink_len] = '\0';
+		if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
+		    (uintptr_t)NULL, (uintptr_t)symlink)) {
+			cache_symlink_free(symlink, symlink_len + 1);
+		}
+	}
+#endif
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_link_args {
+	struct vnode *a_tdvp;
+	struct vnode *a_vp;
+	struct componentname *a_cnp;
+};
+#endif
+
+static int
+zfs_freebsd_link(struct vop_link_args *ap)
+{
+	struct componentname *cnp = ap->a_cnp;
+	vnode_t *vp = ap->a_vp;
+	vnode_t *tdvp = ap->a_tdvp;
+
+	if (tdvp->v_mount != vp->v_mount)
+		return (EXDEV);
+
+#if __FreeBSD_version < 1400068
+	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
+
+	return (zfs_link(VTOZ(tdvp), VTOZ(vp),
+	    cnp->cn_nameptr, cnp->cn_cred, 0));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_inactive_args {
+	struct vnode *a_vp;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_inactive(struct vop_inactive_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+
+#if __FreeBSD_version >= 1300123
+	zfs_inactive(vp, curthread->td_ucred, NULL);
+#else
+	zfs_inactive(vp, ap->a_td->td_ucred, NULL);
+#endif
+	return (0);
+}
+
+#if __FreeBSD_version >= 1300042
+#ifndef _SYS_SYSPROTO_H_
+struct vop_need_inactive_args {
+	struct vnode *a_vp;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
+{
+	vnode_t *vp = ap->a_vp;
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int need;
+
+	if (vn_need_pageq_flush(vp))
+		return (1);
+
+	if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
+		return (1);
+	need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
+	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
+
+	return (need);
+}
+#endif
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_reclaim_args {
+	struct vnode *a_vp;
+	struct thread *a_td;
+};
+#endif
+
+static int
+zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
+{
+	vnode_t	*vp = ap->a_vp;
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	ASSERT3P(zp, !=, NULL);
+
+#if __FreeBSD_version < 1300042
+	/* Destroy the vm object and flush associated pages. */
+	vnode_destroy_vobject(vp);
+#endif
+	/*
+	 * z_teardown_inactive_lock protects from a race with
+	 * zfs_znode_dmu_fini in zfsvfs_teardown during
+	 * force unmount.
+	 */
+	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
+	if (zp->z_sa_hdl == NULL)
+		zfs_znode_free(zp);
+	else
+		zfs_zinactive(zp);
+	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
+
+	vp->v_data = NULL;
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_fid_args {
+	struct vnode *a_vp;
+	struct fid *a_fid;
+};
+#endif
+
+static int
+zfs_freebsd_fid(struct vop_fid_args *ap)
+{
+
+	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
+}
+
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_pathconf_args {
+	struct vnode *a_vp;
+	int a_name;
+	register_t *a_retval;
+} *ap;
+#endif
+
+static int
+zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
+{
+	ulong_t val;
+	int error;
+
+	error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
+	    curthread->td_ucred, NULL);
+	if (error == 0) {
+		*ap->a_retval = val;
+		return (error);
+	}
+	if (error != EOPNOTSUPP)
+		return (error);
+
+	switch (ap->a_name) {
+	case _PC_NAME_MAX:
+		*ap->a_retval = NAME_MAX;
+		return (0);
+#if __FreeBSD_version >= 1400032
+	case _PC_DEALLOC_PRESENT:
+		*ap->a_retval = 1;
+		return (0);
+#endif
+	case _PC_PIPE_BUF:
+		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
+			*ap->a_retval = PIPE_BUF;
+			return (0);
+		}
+		return (EINVAL);
+	default:
+		return (vop_stdpathconf(ap));
+	}
+}
+
+/*
+ * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
+ * extended attribute name:
+ *
+ *	NAMESPACE	PREFIX
+ *	system		freebsd:system:
+ *	user		(none, can be used to access ZFS fsattr(5) attributes
+ *			created on Solaris)
+ */
+static int
+zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
+    size_t size)
+{
+	const char *namespace, *prefix, *suffix;
+
+	/* We don't allow '/' character in attribute name. */
+	if (strchr(name, '/') != NULL)
+		return (SET_ERROR(EINVAL));
+	/* We don't allow attribute names that start with "freebsd:" string. */
+	if (strncmp(name, "freebsd:", 8) == 0)
+		return (SET_ERROR(EINVAL));
+
+	bzero(attrname, size);
+
+	switch (attrnamespace) {
+	case EXTATTR_NAMESPACE_USER:
+#if 0
+		prefix = "freebsd:";
+		namespace = EXTATTR_NAMESPACE_USER_STRING;
+		suffix = ":";
+#else
+		/*
+		 * This is the default namespace by which we can access all
+		 * attributes created on Solaris.
+		 */
+		prefix = namespace = suffix = "";
+#endif
+		break;
+	case EXTATTR_NAMESPACE_SYSTEM:
+		prefix = "freebsd:";
+		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
+		suffix = ":";
+		break;
+	case EXTATTR_NAMESPACE_EMPTY:
+	default:
+		return (SET_ERROR(EINVAL));
+	}
+	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
+	    name) >= size) {
+		return (SET_ERROR(ENAMETOOLONG));
+	}
+	return (0);
+}
+
+static int
+zfs_ensure_xattr_cached(znode_t *zp)
+{
+	int error = 0;
+
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+
+	if (zp->z_xattr_cached != NULL)
+		return (0);
+
+	if (rw_write_held(&zp->z_xattr_lock))
+		return (zfs_sa_get_xattr(zp));
+
+	if (!rw_tryupgrade(&zp->z_xattr_lock)) {
+		rw_exit(&zp->z_xattr_lock);
+		rw_enter(&zp->z_xattr_lock, RW_WRITER);
+	}
+	if (zp->z_xattr_cached == NULL)
+		error = zfs_sa_get_xattr(zp);
+	rw_downgrade(&zp->z_xattr_lock);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_getextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	INOUT struct uio *a_uio;
+	OUT size_t *a_size;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+#endif
+
+static int
+zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
+{
+	struct thread *td = ap->a_td;
+	struct nameidata nd;
+	struct vattr va;
+	vnode_t *xvp = NULL, *vp;
+	int error, flags;
+
+	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
+	    LOOKUP_XATTR, B_FALSE);
+	if (error != 0)
+		return (error);
+
+	flags = FREAD;
+#if __FreeBSD_version < 1400043
+	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
+	    xvp, td);
+#else
+	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
+#endif
+	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
+	if (error != 0)
+		return (error);
+	vp = nd.ni_vp;
+	NDFREE_PNBUF(&nd);
+
+	if (ap->a_size != NULL) {
+		error = VOP_GETATTR(vp, &va, ap->a_cred);
+		if (error == 0)
+			*ap->a_size = (size_t)va.va_size;
+	} else if (ap->a_uio != NULL)
+		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
+
+	VOP_UNLOCK1(vp);
+	vn_close(vp, flags, ap->a_cred, td);
+	return (error);
+}
+
+static int
+zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	uchar_t *nv_value;
+	uint_t nv_size;
+	int error;
+
+	error = zfs_ensure_xattr_cached(zp);
+	if (error != 0)
+		return (error);
+
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+	ASSERT3P(zp->z_xattr_cached, !=, NULL);
+
+	error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
+	    &nv_value, &nv_size);
+	if (error)
+		return (error);
+
+	if (ap->a_size != NULL)
+		*ap->a_size = nv_size;
+	else if (ap->a_uio != NULL)
+		error = uiomove(nv_value, nv_size, ap->a_uio);
+
+	return (error);
+}
+
+/*
+ * Vnode operation to retrieve a named extended attribute.
+ */
+static int
+zfs_getextattr(struct vop_getextattr_args *ap)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	char attrname[EXTATTR_MAXNAMELEN+1];
+	int error;
+
+	/*
+	 * If the xattr property is off, refuse the request.
+	 */
+	if (!(zfsvfs->z_flags & ZSB_XATTR))
+		return (SET_ERROR(EOPNOTSUPP));
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VREAD);
+	if (error != 0)
+		return (error);
+
+	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
+	    sizeof (attrname));
+	if (error != 0)
+		return (error);
+
+	error = ENOENT;
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	rw_enter(&zp->z_xattr_lock, RW_READER);
+	if (zfsvfs->z_use_sa && zp->z_is_sa)
+		error = zfs_getextattr_sa(ap, attrname);
+	if (error == ENOENT)
+		error = zfs_getextattr_dir(ap, attrname);
+	rw_exit(&zp->z_xattr_lock);
+	ZFS_EXIT(zfsvfs);
+	if (error == ENOENT)
+		error = SET_ERROR(ENOATTR);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_deleteextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+#endif
+
+static int
+zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
+{
+	struct nameidata nd;
+	vnode_t *xvp = NULL, *vp;
+	int error;
+
+	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
+	    LOOKUP_XATTR, B_FALSE);
+	if (error != 0)
+		return (error);
+
+#if __FreeBSD_version < 1400043
+	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
+	    UIO_SYSSPACE, attrname, xvp, ap->a_td);
+#else
+	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
+	    UIO_SYSSPACE, attrname, xvp);
+#endif
+	error = namei(&nd);
+	if (error != 0)
+		return (error);
+
+	vp = nd.ni_vp;
+	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
+	NDFREE_PNBUF(&nd);
+
+	vput(nd.ni_dvp);
+	if (vp == nd.ni_dvp)
+		vrele(vp);
+	else
+		vput(vp);
+
+	return (error);
+}
+
+static int
+zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	nvlist_t *nvl;
+	int error;
+
+	error = zfs_ensure_xattr_cached(zp);
+	if (error != 0)
+		return (error);
+
+	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
+	ASSERT3P(zp->z_xattr_cached, !=, NULL);
+
+	nvl = zp->z_xattr_cached;
+	error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
+	if (error == 0)
+		error = zfs_sa_set_xattr(zp);
+	if (error != 0) {
+		zp->z_xattr_cached = NULL;
+		nvlist_free(nvl);
+	}
+	return (error);
+}
+
+/*
+ * Vnode operation to remove a named attribute.
+ */
+static int
+zfs_deleteextattr(struct vop_deleteextattr_args *ap)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	char attrname[EXTATTR_MAXNAMELEN+1];
+	int error;
+
+	/*
+	 * If the xattr property is off, refuse the request.
+	 */
+	if (!(zfsvfs->z_flags & ZSB_XATTR))
+		return (SET_ERROR(EOPNOTSUPP));
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VWRITE);
+	if (error != 0)
+		return (error);
+
+	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
+	    sizeof (attrname));
+	if (error != 0)
+		return (error);
+
+	size_t size = 0;
+	struct vop_getextattr_args vga = {
+		.a_vp = ap->a_vp,
+		.a_size = &size,
+		.a_cred = ap->a_cred,
+		.a_td = ap->a_td,
+	};
+	error = ENOENT;
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	rw_enter(&zp->z_xattr_lock, RW_WRITER);
+	if (zfsvfs->z_use_sa && zp->z_is_sa) {
+		error = zfs_getextattr_sa(&vga, attrname);
+		if (error == 0)
+			error = zfs_deleteextattr_sa(ap, attrname);
+	}
+	if (error == ENOENT) {
+		error = zfs_getextattr_dir(&vga, attrname);
+		if (error == 0)
+			error = zfs_deleteextattr_dir(ap, attrname);
+	}
+	rw_exit(&zp->z_xattr_lock);
+	ZFS_EXIT(zfsvfs);
+	if (error == ENOENT)
+		error = SET_ERROR(ENOATTR);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_setextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	INOUT struct uio *a_uio;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+#endif
+
+static int
+zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
+{
+	struct thread *td = ap->a_td;
+	struct nameidata nd;
+	struct vattr va;
+	vnode_t *xvp = NULL, *vp;
+	int error, flags;
+
+	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
+	    LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
+	if (error != 0)
+		return (error);
+
+	flags = FFLAGS(O_WRONLY | O_CREAT);
+#if __FreeBSD_version < 1400043
+	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
+#else
+	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
+#endif
+	error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
+	    NULL);
+	if (error != 0)
+		return (error);
+	vp = nd.ni_vp;
+	NDFREE_PNBUF(&nd);
+
+	VATTR_NULL(&va);
+	va.va_size = 0;
+	error = VOP_SETATTR(vp, &va, ap->a_cred);
+	if (error == 0)
+		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
+
+	VOP_UNLOCK1(vp);
+	vn_close(vp, flags, ap->a_cred, td);
+	return (error);
+}
+
+static int
+zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	nvlist_t *nvl;
+	size_t sa_size;
+	int error;
+
+	error = zfs_ensure_xattr_cached(zp);
+	if (error != 0)
+		return (error);
+
+	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
+	ASSERT3P(zp->z_xattr_cached, !=, NULL);
+
+	nvl = zp->z_xattr_cached;
+	size_t entry_size = ap->a_uio->uio_resid;
+	if (entry_size > DXATTR_MAX_ENTRY_SIZE)
+		return (SET_ERROR(EFBIG));
+	error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
+	if (error != 0)
+		return (error);
+	if (sa_size > DXATTR_MAX_SA_SIZE)
+		return (SET_ERROR(EFBIG));
+	uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
+	error = uiomove(buf, entry_size, ap->a_uio);
+	if (error == 0)
+		error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
+	kmem_free(buf, entry_size);
+	if (error == 0)
+		error = zfs_sa_set_xattr(zp);
+	if (error != 0) {
+		zp->z_xattr_cached = NULL;
+		nvlist_free(nvl);
+	}
+	return (error);
+}
+
+/*
+ * Vnode operation to set a named attribute.
+ */
+static int
+zfs_setextattr(struct vop_setextattr_args *ap)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	char attrname[EXTATTR_MAXNAMELEN+1];
+	int error;
+
+	/*
+	 * If the xattr property is off, refuse the request.
+	 */
+	if (!(zfsvfs->z_flags & ZSB_XATTR))
+		return (SET_ERROR(EOPNOTSUPP));
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VWRITE);
+	if (error != 0)
+		return (error);
+
+	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
+	    sizeof (attrname));
+	if (error != 0)
+		return (error);
+
+	struct vop_deleteextattr_args vda = {
+		.a_vp = ap->a_vp,
+		.a_cred = ap->a_cred,
+		.a_td = ap->a_td,
+	};
+	error = ENOENT;
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	rw_enter(&zp->z_xattr_lock, RW_WRITER);
+	if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
+		error = zfs_setextattr_sa(ap, attrname);
+		if (error == 0)
+			/*
+			 * Successfully put into SA, we need to clear the one
+			 * in dir if present.
+			 */
+			zfs_deleteextattr_dir(&vda, attrname);
+	}
+	if (error) {
+		error = zfs_setextattr_dir(ap, attrname);
+		if (error == 0 && zp->z_is_sa)
+			/*
+			 * Successfully put into dir, we need to clear the one
+			 * in SA if present.
+			 */
+			zfs_deleteextattr_sa(&vda, attrname);
+	}
+	rw_exit(&zp->z_xattr_lock);
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_listextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	INOUT struct uio *a_uio;
+	OUT size_t *a_size;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+#endif
+
+static int
+zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
+{
+	struct thread *td = ap->a_td;
+	struct nameidata nd;
+	uint8_t dirbuf[sizeof (struct dirent)];
+	struct iovec aiov;
+	struct uio auio;
+	vnode_t *xvp = NULL, *vp;
+	int error, eof;
+
+	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
+	    LOOKUP_XATTR, B_FALSE);
+	if (error != 0) {
+		/*
+		 * ENOATTR means that the EA directory does not yet exist,
+		 * i.e. there are no extended attributes there.
+		 */
+		if (error == ENOATTR)
+			error = 0;
+		return (error);
+	}
+
+#if __FreeBSD_version < 1400043
+	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
+	    UIO_SYSSPACE, ".", xvp, td);
+#else
+	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
+	    UIO_SYSSPACE, ".", xvp);
+#endif
+	error = namei(&nd);
+	if (error != 0)
+		return (error);
+	vp = nd.ni_vp;
+	NDFREE_PNBUF(&nd);
+
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_td = td;
+	auio.uio_rw = UIO_READ;
+	auio.uio_offset = 0;
+
+	size_t plen = strlen(attrprefix);
+
+	do {
+		aiov.iov_base = (void *)dirbuf;
+		aiov.iov_len = sizeof (dirbuf);
+		auio.uio_resid = sizeof (dirbuf);
+		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
+		if (error != 0)
+			break;
+		int done = sizeof (dirbuf) - auio.uio_resid;
+		for (int pos = 0; pos < done; ) {
+			struct dirent *dp = (struct dirent *)(dirbuf + pos);
+			pos += dp->d_reclen;
+			/*
+			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
+			 * is what we get when attribute was created on Solaris.
+			 */
+			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
+				continue;
+			else if (plen == 0 &&
+			    strncmp(dp->d_name, "freebsd:", 8) == 0)
+				continue;
+			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
+				continue;
+			uint8_t nlen = dp->d_namlen - plen;
+			if (ap->a_size != NULL) {
+				*ap->a_size += 1 + nlen;
+			} else if (ap->a_uio != NULL) {
+				/*
+				 * Format of extattr name entry is one byte for
+				 * length and the rest for name.
+				 */
+				error = uiomove(&nlen, 1, ap->a_uio);
+				if (error == 0) {
+					char *namep = dp->d_name + plen;
+					error = uiomove(namep, nlen, ap->a_uio);
+				}
+				if (error != 0)
+					break;
+			}
+		}
+	} while (!eof && error == 0);
+
+	vput(vp);
+	return (error);
+}
+
+static int
+zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	int error;
+
+	error = zfs_ensure_xattr_cached(zp);
+	if (error != 0)
+		return (error);
+
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+	ASSERT3P(zp->z_xattr_cached, !=, NULL);
+
+	size_t plen = strlen(attrprefix);
+	nvpair_t *nvp = NULL;
+	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
+		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
+
+		const char *name = nvpair_name(nvp);
+		if (plen == 0 && strncmp(name, "freebsd:", 8) == 0)
+			continue;
+		else if (strncmp(name, attrprefix, plen) != 0)
+			continue;
+		uint8_t nlen = strlen(name) - plen;
+		if (ap->a_size != NULL) {
+			*ap->a_size += 1 + nlen;
+		} else if (ap->a_uio != NULL) {
+			/*
+			 * Format of extattr name entry is one byte for
+			 * length and the rest for name.
+			 */
+			error = uiomove(&nlen, 1, ap->a_uio);
+			if (error == 0) {
+				char *namep = __DECONST(char *, name) + plen;
+				error = uiomove(namep, nlen, ap->a_uio);
+			}
+			if (error != 0)
+				break;
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Vnode operation to retrieve extended attributes on a vnode.
+ */
+static int
+zfs_listextattr(struct vop_listextattr_args *ap)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	char attrprefix[16];
+	int error;
+
+	if (ap->a_size != NULL)
+		*ap->a_size = 0;
+
+	/*
+	 * If the xattr property is off, refuse the request.
+	 */
+	if (!(zfsvfs->z_flags & ZSB_XATTR))
+		return (SET_ERROR(EOPNOTSUPP));
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VREAD);
+	if (error != 0)
+		return (error);
+
+	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
+	    sizeof (attrprefix));
+	if (error != 0)
+		return (error);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	rw_enter(&zp->z_xattr_lock, RW_READER);
+	if (zfsvfs->z_use_sa && zp->z_is_sa)
+		error = zfs_listextattr_sa(ap, attrprefix);
+	if (error == 0)
+		error = zfs_listextattr_dir(ap, attrprefix);
+	rw_exit(&zp->z_xattr_lock);
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_getacl_args {
+	struct vnode *vp;
+	acl_type_t type;
+	struct acl *aclp;
+	struct ucred *cred;
+	struct thread *td;
+};
+#endif
+
+static int
+zfs_freebsd_getacl(struct vop_getacl_args *ap)
+{
+	int		error;
+	vsecattr_t	vsecattr;
+
+	if (ap->a_type != ACL_TYPE_NFS4)
+		return (EINVAL);
+
+	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
+	if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
+	    &vsecattr, 0, ap->a_cred)))
+		return (error);
+
+	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
+	    vsecattr.vsa_aclcnt);
+	if (vsecattr.vsa_aclentp != NULL)
+		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
+
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_setacl_args {
+	struct vnode *vp;
+	acl_type_t type;
+	struct acl *aclp;
+	struct ucred *cred;
+	struct thread *td;
+};
+#endif
+
+static int
+zfs_freebsd_setacl(struct vop_setacl_args *ap)
+{
+	int		error;
+	vsecattr_t vsecattr;
+	int		aclbsize;	/* size of acl list in bytes */
+	aclent_t	*aaclp;
+
+	if (ap->a_type != ACL_TYPE_NFS4)
+		return (EINVAL);
+
+	if (ap->a_aclp == NULL)
+		return (EINVAL);
+
+	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
+		return (EINVAL);
+
+	/*
+	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
+	 * splitting every entry into two and appending "canonical six"
+	 * entries at the end.  Don't allow for setting an ACL that would
+	 * cause chmod(2) to run out of ACL entries.
+	 */
+	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
+		return (ENOSPC);
+
+	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
+	if (error != 0)
+		return (error);
+
+	vsecattr.vsa_mask = VSA_ACE;
+	aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
+	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
+	aaclp = vsecattr.vsa_aclentp;
+	vsecattr.vsa_aclentsz = aclbsize;
+
+	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
+	error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
+	kmem_free(aaclp, aclbsize);
+
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct vop_aclcheck_args {
+	struct vnode *vp;
+	acl_type_t type;
+	struct acl *aclp;
+	struct ucred *cred;
+	struct thread *td;
+};
+#endif
+
+static int
+zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
+{
+
+	return (EOPNOTSUPP);
+}
+
+static int
+zfs_vptocnp(struct vop_vptocnp_args *ap)
+{
+	vnode_t *covered_vp;
+	vnode_t *vp = ap->a_vp;
+	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+	znode_t *zp = VTOZ(vp);
+	int ltype;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/*
+	 * If we are a snapshot mounted under .zfs, run the operation
+	 * on the covered vnode.
+	 */
+	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
+		char name[MAXNAMLEN + 1];
+		znode_t *dzp;
+		size_t len;
+
+		error = zfs_znode_parent_and_name(zp, &dzp, name);
+		if (error == 0) {
+			len = strlen(name);
+			if (*ap->a_buflen < len)
+				error = SET_ERROR(ENOMEM);
+		}
+		if (error == 0) {
+			*ap->a_buflen -= len;
+			bcopy(name, ap->a_buf + *ap->a_buflen, len);
+			*ap->a_vpp = ZTOV(dzp);
+		}
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	ZFS_EXIT(zfsvfs);
+
+	covered_vp = vp->v_mount->mnt_vnodecovered;
+#if __FreeBSD_version >= 1300045
+	enum vgetstate vs = vget_prep(covered_vp);
+#else
+	vhold(covered_vp);
+#endif
+	ltype = VOP_ISLOCKED(vp);
+	VOP_UNLOCK1(vp);
+#if __FreeBSD_version >= 1300045
+	error = vget_finish(covered_vp, LK_SHARED, vs);
+#else
+	error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
+#endif
+	if (error == 0) {
+#if __FreeBSD_version >= 1300123
+		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
+		    ap->a_buflen);
+#else
+		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
+		    ap->a_buf, ap->a_buflen);
+#endif
+		vput(covered_vp);
+	}
+	vn_lock(vp, ltype | LK_RETRY);
+	if (VN_IS_DOOMED(vp))
+		error = SET_ERROR(ENOENT);
+	return (error);
+}
+
+#if __FreeBSD_version >= 1400032
+static int
+zfs_deallocate(struct vop_deallocate_args *ap)
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zilog_t *zilog;
+	off_t off, len, file_sz;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/*
+	 * Callers might not be able to detect properly that we are read-only,
+	 * so check it explicitly here.
+	 */
+	if (zfs_is_readonly(zfsvfs)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EROFS));
+	}
+
+	zilog = zfsvfs->z_log;
+	off = *ap->a_offset;
+	len = *ap->a_len;
+	file_sz = zp->z_size;
+	if (off + len > file_sz)
+		len = file_sz - off;
+	/* Fast path for out-of-range request. */
+	if (len <= 0) {
+		*ap->a_len = 0;
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
+	if (error == 0) {
+		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
+		    (ap->a_ioflag & IO_SYNC) != 0)
+			zil_commit(zilog, zp->z_id);
+		*ap->a_offset = off + len;
+		*ap->a_len = 0;
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+#endif
+
+struct vop_vector zfs_vnodeops;
+struct vop_vector zfs_fifoops;
+struct vop_vector zfs_shareops;
+
+struct vop_vector zfs_vnodeops = {
+	.vop_default =		&default_vnodeops,
+	.vop_inactive =		zfs_freebsd_inactive,
+#if __FreeBSD_version >= 1300042
+	.vop_need_inactive =	zfs_freebsd_need_inactive,
+#endif
+	.vop_reclaim =		zfs_freebsd_reclaim,
+#if __FreeBSD_version >= 1300102
+	.vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
+#endif
+	.vop_access =		zfs_freebsd_access,
+	.vop_allocate =		VOP_EINVAL,
+#if __FreeBSD_version >= 1400032
+	.vop_deallocate =	zfs_deallocate,
+#endif
+	.vop_lookup =		zfs_cache_lookup,
+	.vop_cachedlookup =	zfs_freebsd_cachedlookup,
+	.vop_getattr =		zfs_freebsd_getattr,
+	.vop_setattr =		zfs_freebsd_setattr,
+	.vop_create =		zfs_freebsd_create,
+	.vop_mknod =		(vop_mknod_t *)zfs_freebsd_create,
+	.vop_mkdir =		zfs_freebsd_mkdir,
+	.vop_readdir =		zfs_freebsd_readdir,
+	.vop_fsync =		zfs_freebsd_fsync,
+	.vop_open =		zfs_freebsd_open,
+	.vop_close =		zfs_freebsd_close,
+	.vop_rmdir =		zfs_freebsd_rmdir,
+	.vop_ioctl =		zfs_freebsd_ioctl,
+	.vop_link =		zfs_freebsd_link,
+	.vop_symlink =		zfs_freebsd_symlink,
+	.vop_readlink =		zfs_freebsd_readlink,
+	.vop_read =		zfs_freebsd_read,
+	.vop_write =		zfs_freebsd_write,
+	.vop_remove =		zfs_freebsd_remove,
+	.vop_rename =		zfs_freebsd_rename,
+	.vop_pathconf =		zfs_freebsd_pathconf,
+	.vop_bmap =		zfs_freebsd_bmap,
+	.vop_fid =		zfs_freebsd_fid,
+	.vop_getextattr =	zfs_getextattr,
+	.vop_deleteextattr =	zfs_deleteextattr,
+	.vop_setextattr =	zfs_setextattr,
+	.vop_listextattr =	zfs_listextattr,
+	.vop_getacl =		zfs_freebsd_getacl,
+	.vop_setacl =		zfs_freebsd_setacl,
+	.vop_aclcheck =		zfs_freebsd_aclcheck,
+	.vop_getpages =		zfs_freebsd_getpages,
+	.vop_putpages =		zfs_freebsd_putpages,
+	.vop_vptocnp =		zfs_vptocnp,
+#if __FreeBSD_version >= 1300064
+	.vop_lock1 =		vop_lock,
+	.vop_unlock =		vop_unlock,
+	.vop_islocked =		vop_islocked,
+#endif
+#if __FreeBSD_version >= 1400043
+	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
+#endif
+};
+VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
+
+struct vop_vector zfs_fifoops = {
+	.vop_default =		&fifo_specops,
+	.vop_fsync =		zfs_freebsd_fsync,
+#if __FreeBSD_version >= 1300102
+	.vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
+#endif
+	.vop_access =		zfs_freebsd_access,
+	.vop_getattr =		zfs_freebsd_getattr,
+	.vop_inactive =		zfs_freebsd_inactive,
+	.vop_read =		VOP_PANIC,
+	.vop_reclaim =		zfs_freebsd_reclaim,
+	.vop_setattr =		zfs_freebsd_setattr,
+	.vop_write =		VOP_PANIC,
+	.vop_pathconf = 	zfs_freebsd_pathconf,
+	.vop_fid =		zfs_freebsd_fid,
+	.vop_getacl =		zfs_freebsd_getacl,
+	.vop_setacl =		zfs_freebsd_setacl,
+	.vop_aclcheck =		zfs_freebsd_aclcheck,
+#if __FreeBSD_version >= 1400043
+	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
+#endif
+};
+VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
+
+/*
+ * special share hidden files vnode operations template
+ */
+struct vop_vector zfs_shareops = {
+	.vop_default =		&default_vnodeops,
+#if __FreeBSD_version >= 1300121
+	.vop_fplookup_vexec =	VOP_EAGAIN,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink =	VOP_EAGAIN,
+#endif
+	.vop_access =		zfs_freebsd_access,
+	.vop_inactive =		zfs_freebsd_inactive,
+	.vop_reclaim =		zfs_freebsd_reclaim,
+	.vop_fid =		zfs_freebsd_fid,
+	.vop_pathconf =		zfs_freebsd_pathconf,
+#if __FreeBSD_version >= 1400043
+	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
+#endif
+};
+VFS_VOP_VECTOR_REGISTER(zfs_shareops);

diff --git a/zfs/module/os/freebsd/zfs/zfs_znode.c b/zfs/module/os/freebsd/zfs/zfs_znode.c
new file mode 100644
index 0000000..92e3bdd
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zfs_znode.c

@@ -0,0 +1,2119 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ */
+
+/* Portions Copyright 2007 Jeremy Teo */
+/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/resource.h>
+#include <sys/mntent.h>
+#include <sys/u8_textprep.h>
+#include <sys/dsl_dataset.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/unistd.h>
+#include <sys/atomic.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_rlock.h>
+#include <sys/zfs_fuid.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#endif /* _KERNEL */
+
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
+#include <sys/zfs_refcount.h>
+#include <sys/stat.h>
+#include <sys/zap.h>
+#include <sys/zfs_znode.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_stat.h>
+
+#include "zfs_prop.h"
+#include "zfs_comutil.h"
+
+/* Used by fstat(1). */
+SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD,
+	SYSCTL_NULL_INT_PTR, sizeof (znode_t), "sizeof(znode_t)");
+
+/*
+ * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
+ * turned on when DEBUG is also defined.
+ */
+#ifdef	ZFS_DEBUG
+#define	ZNODE_STATS
+#endif	/* DEBUG */
+
+#ifdef	ZNODE_STATS
+#define	ZNODE_STAT_ADD(stat)			((stat)++)
+#else
+#define	ZNODE_STAT_ADD(stat)			/* nothing */
+#endif	/* ZNODE_STATS */
+
+/*
+ * Functions needed for userland (ie: libzpool) are not put under
+ * #ifdef_KERNEL; the rest of the functions have dependencies
+ * (such as VFS logic) that will not compile easily in userland.
+ */
+#ifdef _KERNEL
+#if !defined(KMEM_DEBUG) && __FreeBSD_version >= 1300102
+#define	_ZFS_USE_SMR
+static uma_zone_t znode_uma_zone;
+#else
+static kmem_cache_t *znode_cache = NULL;
+#endif
+
+extern struct vop_vector zfs_vnodeops;
+extern struct vop_vector zfs_fifoops;
+extern struct vop_vector zfs_shareops;
+
+
+/*
+ * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
+ * z_rangelock. It will modify the offset and length of the lock to reflect
+ * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
+ * called with the rangelock_t's rl_lock held, which avoids races.
+ */
+static void
+zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
+{
+	znode_t *zp = arg;
+
+	/*
+	 * If in append mode, convert to writer and lock starting at the
+	 * current end of file.
+	 */
+	if (new->lr_type == RL_APPEND) {
+		new->lr_offset = zp->z_size;
+		new->lr_type = RL_WRITER;
+	}
+
+	/*
+	 * If we need to grow the block size then lock the whole file range.
+	 */
+	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
+	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
+	    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
+		new->lr_offset = 0;
+		new->lr_length = UINT64_MAX;
+	}
+}
+
+static int
+zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
+{
+	znode_t *zp = buf;
+
+	POINTER_INVALIDATE(&zp->z_zfsvfs);
+
+	list_link_init(&zp->z_link_node);
+
+	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
+
+	zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
+
+	zp->z_acl_cached = NULL;
+	zp->z_xattr_cached = NULL;
+	zp->z_xattr_parent = 0;
+	zp->z_vnode = NULL;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static void
+zfs_znode_cache_destructor(void *buf, void *arg)
+{
+	znode_t *zp = buf;
+
+	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
+	ASSERT3P(zp->z_vnode, ==, NULL);
+	ASSERT(!list_link_active(&zp->z_link_node));
+	mutex_destroy(&zp->z_lock);
+	mutex_destroy(&zp->z_acl_lock);
+	rw_destroy(&zp->z_xattr_lock);
+	zfs_rangelock_fini(&zp->z_rangelock);
+
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+
+	ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
+	ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
+}
+
+
+#ifdef _ZFS_USE_SMR
+VFS_SMR_DECLARE;
+
+static int
+zfs_znode_cache_constructor_smr(void *mem, int size __unused, void *private,
+    int flags)
+{
+
+	return (zfs_znode_cache_constructor(mem, private, flags));
+}
+
+static void
+zfs_znode_cache_destructor_smr(void *mem, int size __unused, void *private)
+{
+
+	zfs_znode_cache_destructor(mem, private);
+}
+
+void
+zfs_znode_init(void)
+{
+	/*
+	 * Initialize zcache
+	 */
+	ASSERT3P(znode_uma_zone, ==, NULL);
+	znode_uma_zone = uma_zcreate("zfs_znode_cache",
+	    sizeof (znode_t), zfs_znode_cache_constructor_smr,
+	    zfs_znode_cache_destructor_smr, NULL, NULL, 0, 0);
+	VFS_SMR_ZONE_SET(znode_uma_zone);
+}
+
+static znode_t *
+zfs_znode_alloc_kmem(int flags)
+{
+
+	return (uma_zalloc_smr(znode_uma_zone, flags));
+}
+
+static void
+zfs_znode_free_kmem(znode_t *zp)
+{
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+	uma_zfree_smr(znode_uma_zone, zp);
+}
+#else
+void
+zfs_znode_init(void)
+{
+	/*
+	 * Initialize zcache
+	 */
+	ASSERT3P(znode_cache, ==, NULL);
+	znode_cache = kmem_cache_create("zfs_znode_cache",
+	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
+	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
+}
+
+static znode_t *
+zfs_znode_alloc_kmem(int flags)
+{
+
+	return (kmem_cache_alloc(znode_cache, flags));
+}
+
+static void
+zfs_znode_free_kmem(znode_t *zp)
+{
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+	kmem_cache_free(znode_cache, zp);
+}
+#endif
+
+void
+zfs_znode_fini(void)
+{
+	/*
+	 * Cleanup zcache
+	 */
+#ifdef _ZFS_USE_SMR
+	if (znode_uma_zone) {
+		uma_zdestroy(znode_uma_zone);
+		znode_uma_zone = NULL;
+	}
+#else
+	if (znode_cache) {
+		kmem_cache_destroy(znode_cache);
+		znode_cache = NULL;
+	}
+#endif
+}
+
+
+static int
+zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
+{
+	zfs_acl_ids_t acl_ids;
+	vattr_t vattr;
+	znode_t *sharezp;
+	znode_t *zp;
+	int error;
+
+	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
+	vattr.va_type = VDIR;
+	vattr.va_mode = S_IFDIR|0555;
+	vattr.va_uid = crgetuid(kcred);
+	vattr.va_gid = crgetgid(kcred);
+
+	sharezp = zfs_znode_alloc_kmem(KM_SLEEP);
+	ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
+	sharezp->z_unlinked = 0;
+	sharezp->z_atime_dirty = 0;
+	sharezp->z_zfsvfs = zfsvfs;
+	sharezp->z_is_sa = zfsvfs->z_use_sa;
+
+	VERIFY0(zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
+	    kcred, NULL, &acl_ids));
+	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
+	ASSERT3P(zp, ==, sharezp);
+	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
+	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
+	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
+	zfsvfs->z_shares_dir = sharezp->z_id;
+
+	zfs_acl_ids_free(&acl_ids);
+	sa_handle_destroy(sharezp->z_sa_hdl);
+	zfs_znode_free_kmem(sharezp);
+
+	return (error);
+}
+
+/*
+ * define a couple of values we need available
+ * for both 64 and 32 bit environments.
+ */
+#ifndef NBITSMINOR64
+#define	NBITSMINOR64	32
+#endif
+#ifndef MAXMAJ64
+#define	MAXMAJ64	0xffffffffUL
+#endif
+#ifndef	MAXMIN64
+#define	MAXMIN64	0xffffffffUL
+#endif
+
+/*
+ * Create special expldev for ZFS private use.
+ * Can't use standard expldev since it doesn't do
+ * what we want.  The standard expldev() takes a
+ * dev32_t in LP64 and expands it to a long dev_t.
+ * We need an interface that takes a dev32_t in ILP32
+ * and expands it to a long dev_t.
+ */
+static uint64_t
+zfs_expldev(dev_t dev)
+{
+	return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
+}
+/*
+ * Special cmpldev for ZFS private use.
+ * Can't use standard cmpldev since it takes
+ * a long dev_t and compresses it to dev32_t in
+ * LP64.  We need to do a compaction of a long dev_t
+ * to a dev32_t in ILP32.
+ */
+dev_t
+zfs_cmpldev(uint64_t dev)
+{
+	return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
+}
+
+static void
+zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
+    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
+{
+	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
+	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
+
+	ASSERT3P(zp->z_sa_hdl, ==, NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	if (sa_hdl == NULL) {
+		VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, zp,
+		    SA_HDL_SHARED, &zp->z_sa_hdl));
+	} else {
+		zp->z_sa_hdl = sa_hdl;
+		sa_set_userp(sa_hdl, zp);
+	}
+
+	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
+
+	/*
+	 * Slap on VROOT if we are the root znode unless we are the root
+	 * node of a snapshot mounted under .zfs.
+	 */
+	if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs)
+		ZTOV(zp)->v_flag |= VROOT;
+
+	vn_exists(ZTOV(zp));
+}
+
+void
+zfs_znode_dmu_fini(znode_t *zp)
+{
+	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
+	    zp->z_unlinked ||
+	    ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zp->z_zfsvfs));
+
+	sa_handle_destroy(zp->z_sa_hdl);
+	zp->z_sa_hdl = NULL;
+}
+
+static void
+zfs_vnode_forget(vnode_t *vp)
+{
+
+	/* copied from insmntque_stddtr */
+	vp->v_data = NULL;
+	vp->v_op = &dead_vnodeops;
+	vgone(vp);
+	vput(vp);
+}
+
+/*
+ * Construct a new znode/vnode and initialize.
+ *
+ * This does not do a call to dmu_set_user() that is
+ * up to the caller to do, in case you don't want to
+ * return the znode
+ */
+static znode_t *
+zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+    dmu_object_type_t obj_type, sa_handle_t *hdl)
+{
+	znode_t	*zp;
+	vnode_t *vp;
+	uint64_t mode;
+	uint64_t parent;
+#ifdef notyet
+	uint64_t mtime[2], ctime[2];
+#endif
+	uint64_t projid = ZFS_DEFAULT_PROJID;
+	sa_bulk_attr_t bulk[9];
+	int count = 0;
+	int error;
+
+	zp = zfs_znode_alloc_kmem(KM_SLEEP);
+
+#ifndef _ZFS_USE_SMR
+	KASSERT((zfsvfs->z_parent->z_vfs->mnt_kern_flag & MNTK_FPLOOKUP) == 0,
+	    ("%s: fast path lookup enabled without smr", __func__));
+#endif
+
+#if __FreeBSD_version >= 1300076
+	KASSERT(curthread->td_vp_reserved != NULL,
+	    ("zfs_znode_alloc: getnewvnode without any vnodes reserved"));
+#else
+	KASSERT(curthread->td_vp_reserv > 0,
+	    ("zfs_znode_alloc: getnewvnode without any vnodes reserved"));
+#endif
+	error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp);
+	if (error != 0) {
+		zfs_znode_free_kmem(zp);
+		return (NULL);
+	}
+	zp->z_vnode = vp;
+	vp->v_data = zp;
+
+	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
+
+	zp->z_sa_hdl = NULL;
+	zp->z_unlinked = 0;
+	zp->z_atime_dirty = 0;
+	zp->z_mapcnt = 0;
+	zp->z_id = db->db_object;
+	zp->z_blksz = blksz;
+	zp->z_seq = 0x7A4653;
+	zp->z_sync_cnt = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+#if __FreeBSD_version >= 1300139
+	atomic_store_ptr(&zp->z_cached_symlink, NULL);
+#endif
+
+	vp = ZTOV(zp);
+
+	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &zp->z_atime, 16);
+#ifdef notyet
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, 16);
+#endif
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &zp->z_uid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &zp->z_gid, 8);
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 ||
+	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
+	    (zp->z_pflags & ZFS_PROJID) &&
+	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
+		if (hdl == NULL)
+			sa_handle_destroy(zp->z_sa_hdl);
+		zfs_vnode_forget(vp);
+		zp->z_vnode = NULL;
+		zfs_znode_free_kmem(zp);
+		return (NULL);
+	}
+
+	zp->z_projid = projid;
+	zp->z_mode = mode;
+
+	/* Cache the xattr parent id */
+	if (zp->z_pflags & ZFS_XATTR)
+		zp->z_xattr_parent = parent;
+
+	vp->v_type = IFTOVT((mode_t)mode);
+
+	switch (vp->v_type) {
+	case VDIR:
+		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
+		break;
+	case VFIFO:
+		vp->v_op = &zfs_fifoops;
+		break;
+	case VREG:
+		if (parent == zfsvfs->z_shares_dir) {
+			ASSERT0(zp->z_uid);
+			ASSERT0(zp->z_gid);
+			vp->v_op = &zfs_shareops;
+		}
+		break;
+	default:
+			break;
+	}
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	list_insert_tail(&zfsvfs->z_all_znodes, zp);
+	zfsvfs->z_nr_znodes++;
+	zp->z_zfsvfs = zfsvfs;
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	/*
+	 * Acquire vnode lock before making it available to the world.
+	 */
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+#if __FreeBSD_version >= 1400077
+	vn_set_state(vp, VSTATE_CONSTRUCTED);
+#endif
+	VN_LOCK_AREC(vp);
+	if (vp->v_type != VFIFO)
+		VN_LOCK_ASHARE(vp);
+
+	return (zp);
+}
+
+static uint64_t empty_xattr;
+static uint64_t pad[4];
+static zfs_acl_phys_t acl_phys;
+/*
+ * Create a new DMU object to hold a zfs znode.
+ *
+ *	IN:	dzp	- parent directory for new znode
+ *		vap	- file attributes for new znode
+ *		tx	- dmu transaction id for zap operations
+ *		cr	- credentials of caller
+ *		flag	- flags:
+ *			  IS_ROOT_NODE	- new object will be root
+ *			  IS_XATTR	- new object is an attribute
+ *		bonuslen - length of bonus buffer
+ *		setaclp  - File/Dir initial ACL
+ *		fuidp	 - Tracks fuid allocation.
+ *
+ *	OUT:	zpp	- allocated znode
+ *
+ */
+void
+zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
+    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
+{
+	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
+	uint64_t	mode, size, links, parent, pflags;
+	uint64_t	dzp_pflags = 0;
+	uint64_t	rdev = 0;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	dmu_buf_t	*db;
+	timestruc_t	now;
+	uint64_t	gen, obj;
+	int		bonuslen;
+	int		dnodesize;
+	sa_handle_t	*sa_hdl;
+	dmu_object_type_t obj_type;
+	sa_bulk_attr_t	*sa_attrs;
+	int		cnt = 0;
+	zfs_acl_locator_cb_t locate = { 0 };
+
+	ASSERT3P(vap, !=, NULL);
+	ASSERT3U((vap->va_mask & AT_MODE), ==, AT_MODE);
+
+	if (zfsvfs->z_replay) {
+		obj = vap->va_nodeid;
+		now = vap->va_ctime;		/* see zfs_replay_create() */
+		gen = vap->va_nblocks;		/* ditto */
+		dnodesize = vap->va_fsid;	/* ditto */
+	} else {
+		obj = 0;
+		vfs_timestamp(&now);
+		gen = dmu_tx_get_txg(tx);
+		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
+	}
+
+	if (dnodesize == 0)
+		dnodesize = DNODE_MIN_SIZE;
+
+	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+	bonuslen = (obj_type == DMU_OT_SA) ?
+	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
+
+	/*
+	 * Create a new DMU object.
+	 */
+	/*
+	 * There's currently no mechanism for pre-reading the blocks that will
+	 * be needed to allocate a new object, so we accept the small chance
+	 * that there will be an i/o error and we will fail one of the
+	 * assertions below.
+	 */
+	if (vap->va_type == VDIR) {
+		if (zfsvfs->z_replay) {
+			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
+			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+			    obj_type, bonuslen, dnodesize, tx));
+		} else {
+			obj = zap_create_norm_dnsize(zfsvfs->z_os,
+			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+			    obj_type, bonuslen, dnodesize, tx);
+		}
+	} else {
+		if (zfsvfs->z_replay) {
+			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
+			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
+			    obj_type, bonuslen, dnodesize, tx));
+		} else {
+			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
+			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
+			    obj_type, bonuslen, dnodesize, tx);
+		}
+	}
+
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
+	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
+
+	/*
+	 * If this is the root, fix up the half-initialized parent pointer
+	 * to reference the just-allocated physical data area.
+	 */
+	if (flag & IS_ROOT_NODE) {
+		dzp->z_id = obj;
+	} else {
+		dzp_pflags = dzp->z_pflags;
+	}
+
+	/*
+	 * If parent is an xattr, so am I.
+	 */
+	if (dzp_pflags & ZFS_XATTR) {
+		flag |= IS_XATTR;
+	}
+
+	if (zfsvfs->z_use_fuids)
+		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+	else
+		pflags = 0;
+
+	if (vap->va_type == VDIR) {
+		size = 2;		/* contents ("." and "..") */
+		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
+	} else {
+		size = links = 0;
+	}
+
+	if (vap->va_type == VBLK || vap->va_type == VCHR) {
+		rdev = zfs_expldev(vap->va_rdev);
+	}
+
+	parent = dzp->z_id;
+	mode = acl_ids->z_mode;
+	if (flag & IS_XATTR)
+		pflags |= ZFS_XATTR;
+
+	/*
+	 * No execs denied will be determined when zfs_mode_compute() is called.
+	 */
+	pflags |= acl_ids->z_aclp->z_hints &
+	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
+	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
+
+	ZFS_TIME_ENCODE(&now, crtime);
+	ZFS_TIME_ENCODE(&now, ctime);
+
+	if (vap->va_mask & AT_ATIME) {
+		ZFS_TIME_ENCODE(&vap->va_atime, atime);
+	} else {
+		ZFS_TIME_ENCODE(&now, atime);
+	}
+
+	if (vap->va_mask & AT_MTIME) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+	} else {
+		ZFS_TIME_ENCODE(&now, mtime);
+	}
+
+	/* Now add in all of the "SA" attributes */
+	VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
+	    &sa_hdl));
+
+	/*
+	 * Setup the array of attributes to be replaced/set on the new file
+	 *
+	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
+	 * in the old znode_phys_t format.  Don't change this ordering
+	 */
+	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+	} else {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
+		    NULL, &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
+		    NULL, &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+	}
+
+	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
+		    &empty_xattr, 8);
+	}
+	if (obj_type == DMU_OT_ZNODE ||
+	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
+		    NULL, &rdev, 8);
+
+	}
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
+		    &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
+		    &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
+		    sizeof (uint64_t) * 4);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (zfs_acl_phys_t));
+	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
+		    &acl_ids->z_aclp->z_acl_count, 8);
+		locate.cb_aclp = acl_ids->z_aclp;
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate,
+		    acl_ids->z_aclp->z_acl_bytes);
+		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
+		    acl_ids->z_fuid, acl_ids->z_fgid);
+	}
+
+	VERIFY0(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx));
+
+	if (!(flag & IS_ROOT_NODE)) {
+		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
+		ASSERT3P(*zpp, !=, NULL);
+	} else {
+		/*
+		 * If we are creating the root node, the "parent" we
+		 * passed in is the znode for the root.
+		 */
+		*zpp = dzp;
+
+		(*zpp)->z_sa_hdl = sa_hdl;
+	}
+
+	(*zpp)->z_pflags = pflags;
+	(*zpp)->z_mode = mode;
+	(*zpp)->z_dnodesize = dnodesize;
+
+	if (vap->va_mask & AT_XVATTR)
+		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
+
+	if (obj_type == DMU_OT_ZNODE ||
+	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
+		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
+	}
+	if (!(flag & IS_ROOT_NODE)) {
+		vnode_t *vp = ZTOV(*zpp);
+		vp->v_vflag |= VV_FORCEINSMQ;
+		int err = insmntque(vp, zfsvfs->z_vfs);
+		vp->v_vflag &= ~VV_FORCEINSMQ;
+		(void) err;
+		KASSERT(err == 0, ("insmntque() failed: error %d", err));
+	}
+	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
+}
+
+/*
+ * Update in-core attributes.  It is assumed the caller will be doing an
+ * sa_bulk_update to push the changes out.
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+	xoptattr_t *xoap;
+
+	xoap = xva_getxoptattr(xvap);
+	ASSERT3P(xoap, !=, NULL);
+
+	if (zp->z_zfsvfs->z_replay == B_FALSE) {
+		ASSERT_VOP_IN_SEQC(ZTOV(zp));
+	}
+
+	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+		uint64_t times[2];
+		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+		    &times, sizeof (times), tx);
+		XVA_SET_RTN(xvap, XAT_CREATETIME);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_READONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_HIDDEN);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SYSTEM);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_ARCHIVE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NOUNLINK);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_APPENDONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NODUMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OPAQUE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+		zfs_sa_set_scanstamp(zp, xvap, tx);
+		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_REPARSE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OFFLINE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SPARSE);
+	}
+}
+
+int
+zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
+{
+	dmu_object_info_t doi;
+	dmu_buf_t	*db;
+	znode_t		*zp;
+	vnode_t		*vp;
+	sa_handle_t	*hdl;
+	int locked;
+	int err;
+
+	getnewvnode_reserve_();
+again:
+	*zpp = NULL;
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
+
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	if (err) {
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		getnewvnode_drop_reserve();
+		return (err);
+	}
+
+	dmu_object_info_from_db(db, &doi);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		getnewvnode_drop_reserve();
+		return (SET_ERROR(EINVAL));
+	}
+
+	hdl = dmu_buf_get_user(db);
+	if (hdl != NULL) {
+		zp = sa_get_userdata(hdl);
+
+		/*
+		 * Since "SA" does immediate eviction we
+		 * should never find a sa handle that doesn't
+		 * know about the znode.
+		 */
+		ASSERT3P(zp, !=, NULL);
+		ASSERT3U(zp->z_id, ==, obj_num);
+		if (zp->z_unlinked) {
+			err = SET_ERROR(ENOENT);
+		} else {
+			vp = ZTOV(zp);
+			/*
+			 * Don't let the vnode disappear after
+			 * ZFS_OBJ_HOLD_EXIT.
+			 */
+			VN_HOLD(vp);
+			*zpp = zp;
+			err = 0;
+		}
+
+		sa_buf_rele(db, NULL);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+
+		if (err) {
+			getnewvnode_drop_reserve();
+			return (err);
+		}
+
+		locked = VOP_ISLOCKED(vp);
+		VI_LOCK(vp);
+		if (VN_IS_DOOMED(vp) && locked != LK_EXCLUSIVE) {
+			/*
+			 * The vnode is doomed and this thread doesn't
+			 * hold the exclusive lock on it, so the vnode
+			 * must be being reclaimed by another thread.
+			 * Otherwise the doomed vnode is being reclaimed
+			 * by this thread and zfs_zget is called from
+			 * ZIL internals.
+			 */
+			VI_UNLOCK(vp);
+
+			/*
+			 * XXX vrele() locks the vnode when the last reference
+			 * is dropped.  Although in this case the vnode is
+			 * doomed / dead and so no inactivation is required,
+			 * the vnode lock is still acquired.  That could result
+			 * in a LOR with z_teardown_lock if another thread holds
+			 * the vnode's lock and tries to take z_teardown_lock.
+			 * But that is only possible if the other thread peforms
+			 * a ZFS vnode operation on the vnode.  That either
+			 * should not happen if the vnode is dead or the thread
+			 * should also have a reference to the vnode and thus
+			 * our reference is not last.
+			 */
+			VN_RELE(vp);
+			goto again;
+		}
+		VI_UNLOCK(vp);
+		getnewvnode_drop_reserve();
+		return (err);
+	}
+
+	/*
+	 * Not found create new znode/vnode
+	 * but only if file exists.
+	 *
+	 * There is a small window where zfs_vget() could
+	 * find this object while a file create is still in
+	 * progress.  This is checked for in zfs_znode_alloc()
+	 *
+	 * if zfs_znode_alloc() fails it will drop the hold on the
+	 * bonus buffer.
+	 */
+	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
+	    doi.doi_bonus_type, NULL);
+	if (zp == NULL) {
+		err = SET_ERROR(ENOENT);
+	} else {
+		*zpp = zp;
+	}
+	if (err == 0) {
+		vnode_t *vp = ZTOV(zp);
+
+		err = insmntque(vp, zfsvfs->z_vfs);
+		if (err == 0) {
+			vp->v_hash = obj_num;
+			VOP_UNLOCK1(vp);
+		} else {
+			zp->z_vnode = NULL;
+			zfs_znode_dmu_fini(zp);
+			zfs_znode_free(zp);
+			*zpp = NULL;
+		}
+	}
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+	getnewvnode_drop_reserve();
+	return (err);
+}
+
+int
+zfs_rezget(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	dmu_object_info_t doi;
+	dmu_buf_t *db;
+	vnode_t *vp;
+	uint64_t obj_num = zp->z_id;
+	uint64_t mode, size;
+	sa_bulk_attr_t bulk[8];
+	int err;
+	int count = 0;
+	uint64_t gen;
+
+	/*
+	 * Remove cached pages before reloading the znode, so that they are not
+	 * lingering after we run into any error.  Ideally, we should vgone()
+	 * the vnode in case of error, but currently we cannot do that
+	 * because of the LOR between the vnode lock and z_teardown_lock.
+	 * So, instead, we have to "doom" the znode in the illumos style.
+	 *
+	 * Ignore invalid pages during the scan.  This is to avoid deadlocks
+	 * between page busying and the teardown lock, as pages are busied prior
+	 * to a VOP_GETPAGES operation, which acquires the teardown read lock.
+	 * Such pages will be invalid and can safely be skipped here.
+	 */
+	vp = ZTOV(zp);
+#if __FreeBSD_version >= 1400042
+	vn_pages_remove_valid(vp, 0, 0);
+#else
+	vn_pages_remove(vp, 0, 0);
+#endif
+
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
+
+	mutex_enter(&zp->z_acl_lock);
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+	mutex_exit(&zp->z_acl_lock);
+
+	rw_enter(&zp->z_xattr_lock, RW_WRITER);
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+	rw_exit(&zp->z_xattr_lock);
+
+	ASSERT3P(zp->z_sa_hdl, ==, NULL);
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	if (err) {
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (err);
+	}
+
+	dmu_object_info_from_db(db, &doi);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (SET_ERROR(EINVAL));
+	}
+
+	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
+	size = zp->z_size;
+
+	/* reload cached values */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
+	    &gen, sizeof (gen));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, sizeof (zp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, sizeof (zp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &zp->z_atime, sizeof (zp->z_atime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &zp->z_uid, sizeof (zp->z_uid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &zp->z_gid, sizeof (zp->z_gid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
+		zfs_znode_dmu_fini(zp);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (SET_ERROR(EIO));
+	}
+
+	zp->z_mode = mode;
+
+	if (gen != zp->z_gen) {
+		zfs_znode_dmu_fini(zp);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (SET_ERROR(EIO));
+	}
+
+	/*
+	 * It is highly improbable but still quite possible that two
+	 * objects in different datasets are created with the same
+	 * object numbers and in transaction groups with the same
+	 * numbers.  znodes corresponding to those objects would
+	 * have the same z_id and z_gen, but their other attributes
+	 * may be different.
+	 * zfs recv -F may replace one of such objects with the other.
+	 * As a result file properties recorded in the replaced
+	 * object's vnode may no longer match the received object's
+	 * properties.  At present the only cached property is the
+	 * files type recorded in v_type.
+	 * So, handle this case by leaving the old vnode and znode
+	 * disassociated from the actual object.  A new vnode and a
+	 * znode will be created if the object is accessed
+	 * (e.g. via a look-up).  The old vnode and znode will be
+	 * recycled when the last vnode reference is dropped.
+	 */
+	if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
+		zfs_znode_dmu_fini(zp);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (SET_ERROR(EIO));
+	}
+
+	/*
+	 * If the file has zero links, then it has been unlinked on the send
+	 * side and it must be in the received unlinked set.
+	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
+	 * stale data and to prevent automatically removal of the file in
+	 * zfs_zinactive().  The file will be removed either when it is removed
+	 * on the send side and the next incremental stream is received or
+	 * when the unlinked set gets processed.
+	 */
+	zp->z_unlinked = (zp->z_links == 0);
+	if (zp->z_unlinked) {
+		zfs_znode_dmu_fini(zp);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (0);
+	}
+
+	zp->z_blksz = doi.doi_data_block_size;
+	if (zp->z_size != size)
+		vnode_pager_setsize(vp, zp->z_size);
+
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+
+	return (0);
+}
+
+void
+zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	objset_t *os = zfsvfs->z_os;
+	uint64_t obj = zp->z_id;
+	uint64_t acl_obj = zfs_external_acl(zp);
+
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
+	if (acl_obj) {
+		VERIFY(!zp->z_is_sa);
+		VERIFY0(dmu_object_free(os, acl_obj, tx));
+	}
+	VERIFY0(dmu_object_free(os, obj, tx));
+	zfs_znode_dmu_fini(zp);
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
+	zfs_znode_free(zp);
+}
+
+void
+zfs_zinactive(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	uint64_t z_id = zp->z_id;
+
+	ASSERT3P(zp->z_sa_hdl, !=, NULL);
+
+	/*
+	 * Don't allow a zfs_zget() while were trying to release this znode
+	 */
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
+
+	/*
+	 * If this was the last reference to a file with no links, remove
+	 * the file from the file system unless the file system is mounted
+	 * read-only.  That can happen, for example, if the file system was
+	 * originally read-write, the file was opened, then unlinked and
+	 * the file system was made read-only before the file was finally
+	 * closed.  The file will remain in the unlinked set.
+	 */
+	if (zp->z_unlinked) {
+		ASSERT(!zfsvfs->z_issnap);
+		if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) {
+			ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
+			zfs_rmnode(zp);
+			return;
+		}
+	}
+
+	zfs_znode_dmu_fini(zp);
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
+	zfs_znode_free(zp);
+}
+
+void
+zfs_znode_free(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+#if __FreeBSD_version >= 1300139
+	char *symlink;
+#endif
+
+	ASSERT3P(zp->z_sa_hdl, ==, NULL);
+	zp->z_vnode = NULL;
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	POINTER_INVALIDATE(&zp->z_zfsvfs);
+	list_remove(&zfsvfs->z_all_znodes, zp);
+	zfsvfs->z_nr_znodes--;
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+#if __FreeBSD_version >= 1300139
+	symlink = atomic_load_ptr(&zp->z_cached_symlink);
+	if (symlink != NULL) {
+		atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
+		    (uintptr_t)NULL);
+		cache_symlink_free(symlink, strlen(symlink) + 1);
+	}
+#endif
+
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	zfs_znode_free_kmem(zp);
+}
+
+void
+zfs_tstamp_update_setup_ext(znode_t *zp, uint_t flag, uint64_t mtime[2],
+    uint64_t ctime[2], boolean_t have_tx)
+{
+	timestruc_t	now;
+
+	vfs_timestamp(&now);
+
+	if (have_tx) {	/* will sa_bulk_update happen really soon? */
+		zp->z_atime_dirty = 0;
+		zp->z_seq++;
+	} else {
+		zp->z_atime_dirty = 1;
+	}
+
+	if (flag & AT_ATIME) {
+		ZFS_TIME_ENCODE(&now, zp->z_atime);
+	}
+
+	if (flag & AT_MTIME) {
+		ZFS_TIME_ENCODE(&now, mtime);
+		if (zp->z_zfsvfs->z_use_fuids) {
+			zp->z_pflags |= (ZFS_ARCHIVE |
+			    ZFS_AV_MODIFIED);
+		}
+	}
+
+	if (flag & AT_CTIME) {
+		ZFS_TIME_ENCODE(&now, ctime);
+		if (zp->z_zfsvfs->z_use_fuids)
+			zp->z_pflags |= ZFS_ARCHIVE;
+	}
+}
+
+
+void
+zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
+    uint64_t ctime[2])
+{
+	zfs_tstamp_update_setup_ext(zp, flag, mtime, ctime, B_TRUE);
+}
+/*
+ * Grow the block size for a file.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		size	- requested block size
+ *		tx	- open transaction.
+ *
+ * NOTE: this function assumes that the znode is write locked.
+ */
+void
+zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
+{
+	int		error;
+	u_longlong_t	dummy;
+
+	if (size <= zp->z_blksz)
+		return;
+	/*
+	 * If the file size is already greater than the current blocksize,
+	 * we will not grow.  If there is more than one block in a file,
+	 * the blocksize cannot change.
+	 */
+	if (zp->z_blksz && zp->z_size > zp->z_blksz)
+		return;
+
+	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
+	    size, 0, tx);
+
+	if (error == ENOTSUP)
+		return;
+	ASSERT0(error);
+
+	/* What blocksize did we actually get? */
+	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
+}
+
+/*
+ * Increase the file length
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		end	- new end-of-file
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_extend(znode_t *zp, uint64_t end)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	uint64_t newblksz;
+	int error;
+
+	/*
+	 * We will change zp_size, lock the whole file.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (end <= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	if (end > zp->z_blksz &&
+	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
+		/*
+		 * We are growing the file past the current block size.
+		 */
+		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
+			/*
+			 * File's blocksize is already larger than the
+			 * "recordsize" property.  Only let it grow to
+			 * the next power of 2.
+			 */
+			ASSERT(!ISP2(zp->z_blksz));
+			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
+		} else {
+			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
+		}
+		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
+	} else {
+		newblksz = 0;
+	}
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	if (newblksz)
+		zfs_grow_blocksize(zp, newblksz, tx);
+
+	zp->z_size = end;
+
+	VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx));
+
+	vnode_pager_setsize(ZTOV(zp), end);
+
+	zfs_rangelock_exit(lr);
+
+	dmu_tx_commit(tx);
+
+	return (0);
+}
+
+/*
+ * Free space in a file.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		off	- start of section to free.
+ *		len	- length of section to free.
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zfs_locked_range_t *lr;
+	int error;
+
+	/*
+	 * Lock the range being freed.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (off >= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	if (off + len > zp->z_size)
+		len = zp->z_size - off;
+
+	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
+
+	if (error == 0) {
+#if __FreeBSD_version >= 1400032
+		vnode_pager_purge_range(ZTOV(zp), off, off + len);
+#else
+		/*
+		 * Before __FreeBSD_version 1400032 we cannot free block in the
+		 * middle of a file, but only at the end of a file, so this code
+		 * path should never happen.
+		 */
+		vnode_pager_setsize(ZTOV(zp), off);
+#endif
+	}
+
+	zfs_rangelock_exit(lr);
+
+	return (error);
+}
+
+/*
+ * Truncate a file
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		end	- new end-of-file.
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_trunc(znode_t *zp, uint64_t end)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	vnode_t *vp = ZTOV(zp);
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	int error;
+	sa_bulk_attr_t bulk[2];
+	int count = 0;
+
+	/*
+	 * We will change zp_size, lock the whole file.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (end >= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
+	    DMU_OBJECT_END);
+	if (error) {
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	zp->z_size = end;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &zp->z_size, sizeof (zp->z_size));
+
+	if (end == 0) {
+		zp->z_pflags &= ~ZFS_SPARSE;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &zp->z_pflags, 8);
+	}
+	VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
+
+	dmu_tx_commit(tx);
+
+	/*
+	 * Clear any mapped pages in the truncated region.  This has to
+	 * happen outside of the transaction to avoid the possibility of
+	 * a deadlock with someone trying to push a page that we are
+	 * about to invalidate.
+	 */
+	vnode_pager_setsize(vp, end);
+
+	zfs_rangelock_exit(lr);
+
+	return (0);
+}
+
+/*
+ * Free space in a file
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		off	- start of range
+ *		len	- end of range (0 => EOF)
+ *		flag	- current file open mode flags.
+ *		log	- TRUE if this action should be logged
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+int
+zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
+{
+	dmu_tx_t *tx;
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zilog_t *zilog = zfsvfs->z_log;
+	uint64_t mode;
+	uint64_t mtime[2], ctime[2];
+	sa_bulk_attr_t bulk[3];
+	int count = 0;
+	int error;
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
+	    sizeof (mode))) != 0)
+		return (error);
+
+	if (off > zp->z_size) {
+		error =  zfs_extend(zp, off+len);
+		if (error == 0 && log)
+			goto log;
+		else
+			return (error);
+	}
+
+	if (len == 0) {
+		error = zfs_trunc(zp, off);
+	} else {
+		if ((error = zfs_free_range(zp, off, len)) == 0 &&
+		    off + len > zp->z_size)
+			error = zfs_extend(zp, off+len);
+	}
+	if (error || !log)
+		return (error);
+log:
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		return (error);
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &zp->z_pflags, 8);
+	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT0(error);
+
+	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
+
+	dmu_tx_commit(tx);
+	return (0);
+}
+
+void
+zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
+{
+	uint64_t	moid, obj, sa_obj, version;
+	uint64_t	sense = ZFS_CASE_SENSITIVE;
+	uint64_t	norm = 0;
+	nvpair_t	*elem;
+	int		error;
+	int		i;
+	znode_t		*rootzp = NULL;
+	zfsvfs_t	*zfsvfs;
+	vattr_t		vattr;
+	znode_t		*zp;
+	zfs_acl_ids_t	acl_ids;
+
+	/*
+	 * First attempt to create master node.
+	 */
+	/*
+	 * In an empty objset, there are no blocks to read and thus
+	 * there can be no i/o errors (which we assert below).
+	 */
+	moid = MASTER_NODE_OBJ;
+	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
+	    DMU_OT_NONE, 0, tx);
+	ASSERT0(error);
+
+	/*
+	 * Set starting attributes.
+	 */
+	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
+		/* For the moment we expect all zpl props to be uint64_ts */
+		uint64_t val;
+		char *name;
+
+		ASSERT3S(nvpair_type(elem), ==, DATA_TYPE_UINT64);
+		val = fnvpair_value_uint64(elem);
+		name = nvpair_name(elem);
+		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
+			if (val < version)
+				version = val;
+		} else {
+			error = zap_update(os, moid, name, 8, 1, &val, tx);
+		}
+		ASSERT0(error);
+		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
+			norm = val;
+		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
+			sense = val;
+	}
+	ASSERT3U(version, !=, 0);
+	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
+
+	/*
+	 * Create zap object used for SA attribute registration
+	 */
+
+	if (version >= ZPL_VERSION_SA) {
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT0(error);
+	} else {
+		sa_obj = 0;
+	}
+	/*
+	 * Create a delete queue.
+	 */
+	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
+
+	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
+	ASSERT0(error);
+
+	/*
+	 * Create root znode.  Create minimal znode/vnode/zfsvfs
+	 * to allow zfs_mknode to work.
+	 */
+	VATTR_NULL(&vattr);
+	vattr.va_mask = AT_MODE|AT_UID|AT_GID;
+	vattr.va_type = VDIR;
+	vattr.va_mode = S_IFDIR|0755;
+	vattr.va_uid = crgetuid(cr);
+	vattr.va_gid = crgetgid(cr);
+
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+
+	rootzp = zfs_znode_alloc_kmem(KM_SLEEP);
+	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
+	rootzp->z_unlinked = 0;
+	rootzp->z_atime_dirty = 0;
+	rootzp->z_is_sa = USE_SA(version, os);
+
+	zfsvfs->z_os = os;
+	zfsvfs->z_parent = zfsvfs;
+	zfsvfs->z_version = version;
+	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
+	zfsvfs->z_use_sa = USE_SA(version, os);
+	zfsvfs->z_norm = norm;
+
+	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+	    &zfsvfs->z_attr_table);
+
+	ASSERT0(error);
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+
+	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+
+	rootzp->z_zfsvfs = zfsvfs;
+	VERIFY0(zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
+	    cr, NULL, &acl_ids));
+	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
+	ASSERT3P(zp, ==, rootzp);
+	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
+	ASSERT0(error);
+	zfs_acl_ids_free(&acl_ids);
+	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
+
+	sa_handle_destroy(rootzp->z_sa_hdl);
+	zfs_znode_free_kmem(rootzp);
+
+	/*
+	 * Create shares directory
+	 */
+
+	error = zfs_create_share_dir(zfsvfs, tx);
+
+	ASSERT0(error);
+
+	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+}
+#endif /* _KERNEL */
+
+static int
+zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
+{
+	uint64_t sa_obj = 0;
+	int error;
+
+	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
+	if (error != 0 && error != ENOENT)
+		return (error);
+
+	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
+	return (error);
+}
+
+static int
+zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
+    dmu_buf_t **db, void *tag)
+{
+	dmu_object_info_t doi;
+	int error;
+
+	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
+		return (error);
+
+	dmu_object_info_from_db(*db, &doi);
+	if ((doi.doi_bonus_type != DMU_OT_SA &&
+	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
+		sa_buf_rele(*db, tag);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
+	if (error != 0) {
+		sa_buf_rele(*db, tag);
+		return (error);
+	}
+
+	return (0);
+}
+
+static void
+zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
+{
+	sa_handle_destroy(hdl);
+	sa_buf_rele(db, tag);
+}
+
+/*
+ * Given an object number, return its parent object number and whether
+ * or not the object is an extended attribute directory.
+ */
+static int
+zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    uint64_t *pobjp, int *is_xattrdir)
+{
+	uint64_t parent;
+	uint64_t pflags;
+	uint64_t mode;
+	uint64_t parent_mode;
+	sa_bulk_attr_t bulk[3];
+	sa_handle_t *sa_hdl;
+	dmu_buf_t *sa_db;
+	int count = 0;
+	int error;
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
+	    &parent, sizeof (parent));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
+	    &pflags, sizeof (pflags));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &mode, sizeof (mode));
+
+	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
+		return (error);
+
+	/*
+	 * When a link is removed its parent pointer is not changed and will
+	 * be invalid.  There are two cases where a link is removed but the
+	 * file stays around, when it goes to the delete queue and when there
+	 * are additional links.
+	 */
+	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
+	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
+
+	/*
+	 * Extended attributes can be applied to files, directories, etc.
+	 * Otherwise the parent must be a directory.
+	 */
+	if (!*is_xattrdir && !S_ISDIR(parent_mode))
+		return (SET_ERROR(EINVAL));
+
+	*pobjp = parent;
+
+	return (0);
+}
+
+/*
+ * Given an object number, return some zpl level statistics
+ */
+static int
+zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    zfs_stat_t *sb)
+{
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &sb->zs_mode, sizeof (sb->zs_mode));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
+	    &sb->zs_gen, sizeof (sb->zs_gen));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
+	    &sb->zs_links, sizeof (sb->zs_links));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
+	    &sb->zs_ctime, sizeof (sb->zs_ctime));
+
+	return (sa_bulk_lookup(hdl, bulk, count));
+}
+
+static int
+zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
+    sa_attr_type_t *sa_table, char *buf, int len)
+{
+	sa_handle_t *sa_hdl;
+	sa_handle_t *prevhdl = NULL;
+	dmu_buf_t *prevdb = NULL;
+	dmu_buf_t *sa_db = NULL;
+	char *path = buf + len - 1;
+	int error;
+
+	*path = '\0';
+	sa_hdl = hdl;
+
+	uint64_t deleteq_obj;
+	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
+	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
+	error = zap_lookup_int(osp, deleteq_obj, obj);
+	if (error == 0) {
+		return (ESTALE);
+	} else if (error != ENOENT) {
+		return (error);
+	}
+	error = 0;
+
+	for (;;) {
+		uint64_t pobj;
+		char component[MAXNAMELEN + 2];
+		size_t complen;
+		int is_xattrdir;
+
+		if (prevdb) {
+			ASSERT3P(prevhdl, !=, NULL);
+			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
+		}
+
+		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
+		    &is_xattrdir)) != 0)
+			break;
+
+		if (pobj == obj) {
+			if (path[0] != '/')
+				*--path = '/';
+			break;
+		}
+
+		component[0] = '/';
+		if (is_xattrdir) {
+			(void) sprintf(component + 1, "<xattrdir>");
+		} else {
+			error = zap_value_search(osp, pobj, obj,
+			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
+			if (error != 0)
+				break;
+		}
+
+		complen = strlen(component);
+		path -= complen;
+		ASSERT3P(path, >=, buf);
+		bcopy(component, path, complen);
+		obj = pobj;
+
+		if (sa_hdl != hdl) {
+			prevhdl = sa_hdl;
+			prevdb = sa_db;
+		}
+		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
+		if (error != 0) {
+			sa_hdl = prevhdl;
+			sa_db = prevdb;
+			break;
+		}
+	}
+
+	if (sa_hdl != NULL && sa_hdl != hdl) {
+		ASSERT3P(sa_db, !=, NULL);
+		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	}
+
+	if (error == 0)
+		(void) memmove(buf, path, buf + len - path);
+
+	return (error);
+}
+
+int
+zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
+{
+	sa_attr_type_t *sa_table;
+	sa_handle_t *hdl;
+	dmu_buf_t *db;
+	int error;
+
+	error = zfs_sa_setup(osp, &sa_table);
+	if (error != 0)
+		return (error);
+
+	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
+
+	zfs_release_sa_handle(hdl, db, FTAG);
+	return (error);
+}
+
+int
+zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
+    char *buf, int len)
+{
+	char *path = buf + len - 1;
+	sa_attr_type_t *sa_table;
+	sa_handle_t *hdl;
+	dmu_buf_t *db;
+	int error;
+
+	*path = '\0';
+
+	error = zfs_sa_setup(osp, &sa_table);
+	if (error != 0)
+		return (error);
+
+	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
+	if (error != 0) {
+		zfs_release_sa_handle(hdl, db, FTAG);
+		return (error);
+	}
+
+	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
+
+	zfs_release_sa_handle(hdl, db, FTAG);
+	return (error);
+}
+
+
+void
+zfs_znode_update_vfs(znode_t *zp)
+{
+	vm_object_t object;
+
+	if ((object = ZTOV(zp)->v_object) == NULL ||
+	    zp->z_size == object->un_pager.vnp.vnp_size)
+		return;
+
+	vnode_pager_setsize(ZTOV(zp), zp->z_size);
+}
+
+
+#ifdef _KERNEL
+int
+zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	uint64_t parent;
+	int is_xattrdir;
+	int err;
+
+	/* Extended attributes should not be visible as regular files. */
+	if ((zp->z_pflags & ZFS_XATTR) != 0)
+		return (SET_ERROR(EINVAL));
+
+	err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table,
+	    &parent, &is_xattrdir);
+	if (err != 0)
+		return (err);
+	ASSERT0(is_xattrdir);
+
+	/* No name as this is a root object. */
+	if (parent == zp->z_id)
+		return (SET_ERROR(EINVAL));
+
+	err = zap_value_search(zfsvfs->z_os, parent, zp->z_id,
+	    ZFS_DIRENT_OBJ(-1ULL), buf);
+	if (err != 0)
+		return (err);
+	err = zfs_zget(zfsvfs, parent, dzpp);
+	return (err);
+}
+#endif /* _KERNEL */

diff --git a/zfs/module/os/freebsd/zfs/zio_crypt.c b/zfs/module/os/freebsd/zfs/zio_crypt.c
new file mode 100644
index 0000000..9e0ab52
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zio_crypt.c

@@ -0,0 +1,1840 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/zio_crypt.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/zil.h>
+#include <sys/sha2.h>
+#include <sys/hkdf.h>
+
+/*
+ * This file is responsible for handling all of the details of generating
+ * encryption parameters and performing encryption and authentication.
+ *
+ * BLOCK ENCRYPTION PARAMETERS:
+ * Encryption /Authentication Algorithm Suite (crypt):
+ * The encryption algorithm, mode, and key length we are going to use. We
+ * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
+ * keys. All authentication is currently done with SHA512-HMAC.
+ *
+ * Plaintext:
+ * The unencrypted data that we want to encrypt.
+ *
+ * Initialization Vector (IV):
+ * An initialization vector for the encryption algorithms. This is used to
+ * "tweak" the encryption algorithms so that two blocks of the same data are
+ * encrypted into different ciphertext outputs, thus obfuscating block patterns.
+ * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
+ * never reused with the same encryption key. This value is stored unencrypted
+ * and must simply be provided to the decryption function. We use a 96 bit IV
+ * (as recommended by NIST) for all block encryption. For non-dedup blocks we
+ * derive the IV randomly. The first 64 bits of the IV are stored in the second
+ * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
+ * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
+ * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
+ * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
+ * level 0 blocks is the number of allocated dnodes in that block. The on-disk
+ * format supports at most 2^15 slots per L0 dnode block, because the maximum
+ * block size is 16MB (2^24). In either case, for level 0 blocks this number
+ * will still be smaller than UINT32_MAX so it is safe to store the IV in the
+ * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
+ * for the dnode code.
+ *
+ * Master key:
+ * This is the most important secret data of an encrypted dataset. It is used
+ * along with the salt to generate that actual encryption keys via HKDF. We
+ * do not use the master key to directly encrypt any data because there are
+ * theoretical limits on how much data can actually be safely encrypted with
+ * any encryption mode. The master key is stored encrypted on disk with the
+ * user's wrapping key. Its length is determined by the encryption algorithm.
+ * For details on how this is stored see the block comment in dsl_crypt.c
+ *
+ * Salt:
+ * Used as an input to the HKDF function, along with the master key. We use a
+ * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
+ * can be used for encrypting many blocks, so we cache the current salt and the
+ * associated derived key in zio_crypt_t so we do not need to derive it again
+ * needlessly.
+ *
+ * Encryption Key:
+ * A secret binary key, generated from an HKDF function used to encrypt and
+ * decrypt data.
+ *
+ * Message Authentication Code (MAC)
+ * The MAC is an output of authenticated encryption modes such as AES-GCM and
+ * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
+ * data on disk and return garbage to the application. Effectively, it is a
+ * checksum that can not be reproduced by an attacker. We store the MAC in the
+ * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
+ * regular checksum of the ciphertext which can be used for scrubbing.
+ *
+ * OBJECT AUTHENTICATION:
+ * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
+ * they contain some info that always needs to be readable. To prevent this
+ * data from being altered, we authenticate this data using SHA512-HMAC. This
+ * will produce a MAC (similar to the one produced via encryption) which can
+ * be used to verify the object was not modified. HMACs do not require key
+ * rotation or IVs, so we can keep up to the full 3 copies of authenticated
+ * data.
+ *
+ * ZIL ENCRYPTION:
+ * ZIL blocks have their bp written to disk ahead of the associated data, so we
+ * cannot store the MAC there as we normally do. For these blocks the MAC is
+ * stored in the embedded checksum within the zil_chain_t header. The salt and
+ * IV are generated for the block on bp allocation instead of at encryption
+ * time. In addition, ZIL blocks have some pieces that must be left in plaintext
+ * for claiming even though all of the sensitive user data still needs to be
+ * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
+ * pieces of the block need to be encrypted. All data that is not encrypted is
+ * authenticated using the AAD mechanisms that the supported encryption modes
+ * provide for. In order to preserve the semantics of the ZIL for encrypted
+ * datasets, the ZIL is not protected at the objset level as described below.
+ *
+ * DNODE ENCRYPTION:
+ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
+ * in plaintext for scrubbing and claiming, but the bonus buffers might contain
+ * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
+ * which pieces of the block need to be encrypted. For more details about
+ * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
+ *
+ * OBJECT SET AUTHENTICATION:
+ * Up to this point, everything we have encrypted and authenticated has been
+ * at level 0 (or -2 for the ZIL). If we did not do any further work the
+ * on-disk format would be susceptible to attacks that deleted or rearranged
+ * the order of level 0 blocks. Ideally, the cleanest solution would be to
+ * maintain a tree of authentication MACs going up the bp tree. However, this
+ * presents a problem for raw sends. Send files do not send information about
+ * indirect blocks so there would be no convenient way to transfer the MACs and
+ * they cannot be recalculated on the receive side without the master key which
+ * would defeat one of the purposes of raw sends in the first place. Instead,
+ * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
+ * from the level below. We also include some portable fields from blk_prop such
+ * as the lsize and compression algorithm to prevent the data from being
+ * misinterpreted.
+ *
+ * At the objset level, we maintain 2 separate 256 bit MACs in the
+ * objset_phys_t. The first one is "portable" and is the logical root of the
+ * MAC tree maintained in the metadnode's bps. The second, is "local" and is
+ * used as the root MAC for the user accounting objects, which are also not
+ * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
+ * of the send file. The useraccounting code ensures that the useraccounting
+ * info is not present upon a receive, so the local MAC can simply be cleared
+ * out at that time. For more info about objset_phys_t authentication, see
+ * zio_crypt_do_objset_hmacs().
+ *
+ * CONSIDERATIONS FOR DEDUP:
+ * In order for dedup to work, blocks that we want to dedup with one another
+ * need to use the same IV and encryption key, so that they will have the same
+ * ciphertext. Normally, one should never reuse an IV with the same encryption
+ * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
+ * blocks. In this case, however, since we are using the same plaintext as
+ * well all that we end up with is a duplicate of the original ciphertext we
+ * already had. As a result, an attacker with read access to the raw disk will
+ * be able to tell which blocks are the same but this information is given away
+ * by dedup anyway. In order to get the same IVs and encryption keys for
+ * equivalent blocks of data we use an HMAC of the plaintext. We use an HMAC
+ * here so that a reproducible checksum of the plaintext is never available to
+ * the attacker. The HMAC key is kept alongside the master key, encrypted on
+ * disk. The first 64 bits of the HMAC are used in place of the random salt, and
+ * the next 96 bits are used as the IV. As a result of this mechanism, dedup
+ * will only work within a clone family since encrypted dedup requires use of
+ * the same master and HMAC keys.
+ */
+
+/*
+ * After encrypting many blocks with the same key we may start to run up
+ * against the theoretical limits of how much data can securely be encrypted
+ * with a single key using the supported encryption modes. The most obvious
+ * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
+ * the more IVs we generate (which both GCM and CCM modes strictly forbid).
+ * This risk actually grows surprisingly quickly over time according to the
+ * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
+ * generated n IVs with a cryptographically secure RNG, the approximate
+ * probability p(n) of a collision is given as:
+ *
+ * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
+ *
+ * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
+ *
+ * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
+ * we must not write more than 398,065,730 blocks with the same encryption key.
+ * Therefore, we rotate our keys after 400,000,000 blocks have been written by
+ * generating a new random 64 bit salt for our HKDF encryption key generation
+ * function.
+ */
+#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
+#define	ZFS_CURRENT_MAX_SALT_USES	\
+	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
+unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
+
+/*
+ * Set to a nonzero value to cause zio_do_crypt_uio() to fail 1/this many
+ * calls, to test decryption error handling code paths.
+ */
+uint64_t zio_decrypt_fail_fraction = 0;
+
+typedef struct blkptr_auth_buf {
+	uint64_t bab_prop;			/* blk_prop - portable mask */
+	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */
+	uint64_t bab_pad;			/* reserved for future use */
+} blkptr_auth_buf_t;
+
+zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
+	{"",			ZC_TYPE_NONE,	0,	"inherit"},
+	{"",			ZC_TYPE_NONE,	0,	"on"},
+	{"",			ZC_TYPE_NONE,	0,	"off"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
+};
+
+static void
+zio_crypt_key_destroy_early(zio_crypt_key_t *key)
+{
+	rw_destroy(&key->zk_salt_lock);
+
+	/* free crypto templates */
+	bzero(&key->zk_session, sizeof (key->zk_session));
+
+	/* zero out sensitive data */
+	bzero(key, sizeof (zio_crypt_key_t));
+}
+
+void
+zio_crypt_key_destroy(zio_crypt_key_t *key)
+{
+
+	freebsd_crypt_freesession(&key->zk_session);
+	zio_crypt_key_destroy_early(key);
+}
+
+int
+zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
+{
+	int ret;
+	crypto_mechanism_t mech __unused;
+	uint_t keydata_len;
+	zio_crypt_info_t *ci = NULL;
+
+	ASSERT3P(key, !=, NULL);
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+	ci = &zio_crypt_table[crypt];
+	if (ci->ci_crypt_type != ZC_TYPE_GCM &&
+	    ci->ci_crypt_type != ZC_TYPE_CCM)
+		return (ENOTSUP);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+	bzero(key, sizeof (zio_crypt_key_t));
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	/* fill keydata buffers and salt with random data */
+	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for the ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = &key->zk_hmac_key;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	ci = &zio_crypt_table[crypt];
+	if (ci->ci_crypt_type != ZC_TYPE_GCM &&
+	    ci->ci_crypt_type != ZC_TYPE_CCM)
+		return (ENOTSUP);
+
+	ret = freebsd_crypt_newsession(&key->zk_session, ci,
+	    &key->zk_current_key);
+	if (ret)
+		goto error;
+
+	key->zk_crypt = crypt;
+	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
+	key->zk_salt_count = 0;
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy_early(key);
+	return (ret);
+}
+
+static int
+zio_crypt_key_change_salt(zio_crypt_key_t *key)
+{
+	int ret = 0;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	crypto_mechanism_t mech __unused;
+
+	uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
+
+	/* generate a new salt */
+	ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	rw_enter(&key->zk_salt_lock, RW_WRITER);
+
+	/* someone beat us to the salt rotation, just unlock and return */
+	if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
+		goto out_unlock;
+
+	/* derive the current key from the master key and the new salt */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
+	if (ret != 0)
+		goto out_unlock;
+
+	/* assign the salt and reset the usage count */
+	bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
+	key->zk_salt_count = 0;
+
+	freebsd_crypt_freesession(&key->zk_session);
+	ret = freebsd_crypt_newsession(&key->zk_session,
+	    &zio_crypt_table[key->zk_crypt], &key->zk_current_key);
+	if (ret != 0)
+		goto out_unlock;
+
+	rw_exit(&key->zk_salt_lock);
+
+	return (0);
+
+out_unlock:
+	rw_exit(&key->zk_salt_lock);
+error:
+	return (ret);
+}
+
+/* See comment above zfs_key_max_salt_uses definition for details */
+int
+zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
+{
+	int ret;
+	boolean_t salt_change;
+
+	rw_enter(&key->zk_salt_lock, RW_READER);
+
+	bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
+	salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
+	    ZFS_CURRENT_MAX_SALT_USES);
+
+	rw_exit(&key->zk_salt_lock);
+
+	if (salt_change) {
+		ret = zio_crypt_key_change_salt(key);
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+void *failed_decrypt_buf;
+int failed_decrypt_size;
+
+/*
+ * This function handles all encryption and decryption in zfs. When
+ * encrypting it expects puio to reference the plaintext and cuio to
+ * reference the ciphertext. cuio must have enough space for the
+ * ciphertext + room for a MAC. datalen should be the length of the
+ * plaintext / ciphertext alone.
+ */
+/*
+ * The implementation for FreeBSD's OpenCrypto.
+ *
+ * The big difference between ICP and FOC is that FOC uses a single
+ * buffer for input and output.  This means that (for AES-GCM, the
+ * only one supported right now) the source must be copied into the
+ * destination, and the destination must have the AAD, and the tag/MAC,
+ * already associated with it.  (Both implementations can use a uio.)
+ *
+ * Since the auth data is part of the iovec array, all we need to know
+ * is the length:  0 means there's no AAD.
+ *
+ */
+static int
+zio_do_crypt_uio_opencrypto(boolean_t encrypt, freebsd_crypt_session_t *sess,
+    uint64_t crypt, crypto_key_t *key, uint8_t *ivbuf, uint_t datalen,
+    zfs_uio_t *uio, uint_t auth_len)
+{
+	zio_crypt_info_t *ci;
+	int ret;
+
+	ci = &zio_crypt_table[crypt];
+	if (ci->ci_crypt_type != ZC_TYPE_GCM &&
+	    ci->ci_crypt_type != ZC_TYPE_CCM)
+		return (ENOTSUP);
+
+
+	ret = freebsd_crypt_uio(encrypt, sess, ci, uio, key, ivbuf,
+	    datalen, auth_len);
+	if (ret != 0) {
+#ifdef FCRYPTO_DEBUG
+		printf("%s(%d):  Returning error %s\n",
+		    __FUNCTION__, __LINE__, encrypt ? "EIO" : "ECKSUM");
+#endif
+		ret = SET_ERROR(encrypt ? EIO : ECKSUM);
+	}
+
+	return (ret);
+}
+
+int
+zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
+{
+	int ret;
+	uint64_t aad[3];
+	/*
+	 * With OpenCrypto in FreeBSD, the same buffer is used for
+	 * input and output.  Also, the AAD (for AES-GMC at least)
+	 * needs to logically go in front.
+	 */
+	zfs_uio_t cuio;
+	struct uio cuio_s;
+	iovec_t iovecs[4];
+	uint64_t crypt = key->zk_crypt;
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	zfs_uio_init(&cuio, &cuio_s);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	/* generate iv for wrapping the master and hmac key */
+	ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	/*
+	 * Since we only support one buffer, we need to copy
+	 * the plain text (source) to the cipher buffer (dest).
+	 * We set iovecs[0] -- the authentication data -- below.
+	 */
+	bcopy((void*)key->zk_master_keydata, keydata_out, keydata_len);
+	bcopy((void*)key->zk_hmac_keydata, hmac_keydata_out,
+	    SHA512_HMAC_KEYLEN);
+	iovecs[1].iov_base = keydata_out;
+	iovecs[1].iov_len = keydata_len;
+	iovecs[2].iov_base = hmac_keydata_out;
+	iovecs[2].iov_len = SHA512_HMAC_KEYLEN;
+	iovecs[3].iov_base = mac;
+	iovecs[3].iov_len = WRAPPING_MAC_LEN;
+
+	/*
+	 * Although we don't support writing to the old format, we do
+	 * support rewrapping the key so that the user can move and
+	 * quarantine datasets on the old format.
+	 */
+	if (key->zk_version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(key->zk_guid);
+	} else {
+		ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(key->zk_guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(key->zk_version);
+	}
+
+	iovecs[0].iov_base = aad;
+	iovecs[0].iov_len = aad_len;
+	enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
+
+	GET_UIO_STRUCT(&cuio)->uio_iov = iovecs;
+	zfs_uio_iovcnt(&cuio) = 4;
+	zfs_uio_segflg(&cuio) = UIO_SYSSPACE;
+
+	/* encrypt the keys and store the resulting ciphertext and mac */
+	ret = zio_do_crypt_uio_opencrypto(B_TRUE, NULL, crypt, cwkey,
+	    iv, enc_len, &cuio, aad_len);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+int
+zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
+    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
+    uint8_t *mac, zio_crypt_key_t *key)
+{
+	int ret;
+	uint64_t aad[3];
+	/*
+	 * With OpenCrypto in FreeBSD, the same buffer is used for
+	 * input and output.  Also, the AAD (for AES-GMC at least)
+	 * needs to logically go in front.
+	 */
+	zfs_uio_t cuio;
+	struct uio cuio_s;
+	iovec_t iovecs[4];
+	void *src, *dst;
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	zfs_uio_init(&cuio, &cuio_s);
+
+	/*
+	 * Since we only support one buffer, we need to copy
+	 * the encrypted buffer (source) to the plain buffer
+	 * (dest).  We set iovecs[0] -- the authentication data --
+	 * below.
+	 */
+	dst = key->zk_master_keydata;
+	src = keydata;
+
+	bcopy(src, dst, keydata_len);
+
+	dst = key->zk_hmac_keydata;
+	src = hmac_keydata;
+	bcopy(src, dst, SHA512_HMAC_KEYLEN);
+
+	iovecs[1].iov_base = key->zk_master_keydata;
+	iovecs[1].iov_len = keydata_len;
+	iovecs[2].iov_base = key->zk_hmac_keydata;
+	iovecs[2].iov_len = SHA512_HMAC_KEYLEN;
+	iovecs[3].iov_base = mac;
+	iovecs[3].iov_len = WRAPPING_MAC_LEN;
+
+	if (version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(guid);
+	} else {
+		ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(version);
+	}
+
+	enc_len = keydata_len + SHA512_HMAC_KEYLEN;
+	iovecs[0].iov_base = aad;
+	iovecs[0].iov_len = aad_len;
+
+	GET_UIO_STRUCT(&cuio)->uio_iov = iovecs;
+	zfs_uio_iovcnt(&cuio) = 4;
+	zfs_uio_segflg(&cuio) = UIO_SYSSPACE;
+
+	/* decrypt the keys and store the result in the output buffers */
+	ret = zio_do_crypt_uio_opencrypto(B_FALSE, NULL, crypt, cwkey,
+	    iv, enc_len, &cuio, aad_len);
+
+	if (ret != 0)
+		goto error;
+
+	/* generate a fresh salt */
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	ret = freebsd_crypt_newsession(&key->zk_session,
+	    &zio_crypt_table[crypt], &key->zk_current_key);
+	if (ret != 0)
+		goto error;
+
+	key->zk_crypt = crypt;
+	key->zk_version = version;
+	key->zk_guid = guid;
+	key->zk_salt_count = 0;
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy_early(key);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv(uint8_t *ivbuf)
+{
+	int ret;
+
+	/* randomly generate the IV */
+	ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	bzero(ivbuf, ZIO_DATA_IV_LEN);
+	return (ret);
+}
+
+int
+zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+    uint8_t *digestbuf, uint_t digestlen)
+{
+	uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH];
+
+	ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH);
+
+	crypto_mac(&key->zk_hmac_key, data, datalen,
+	    raw_digestbuf, SHA512_DIGEST_LENGTH);
+
+	bcopy(raw_digestbuf, digestbuf, digestlen);
+
+	return (0);
+}
+
+int
+zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+    uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
+{
+	int ret;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	ret = zio_crypt_do_hmac(key, data, datalen,
+	    digestbuf, SHA512_DIGEST_LENGTH);
+	if (ret != 0)
+		return (ret);
+
+	bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
+	bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
+
+	return (0);
+}
+
+/*
+ * The following functions are used to encode and decode encryption parameters
+ * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
+ * byte strings, which normally means that these strings would not need to deal
+ * with byteswapping at all. However, both blkptr_t and zil_header_t may be
+ * byteswapped by lower layers and so we must "undo" that byteswap here upon
+ * decoding and encoding in a non-native byteorder. These functions require
+ * that the byteorder bit is correct before being called.
+ */
+void
+zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_ENCRYPTED(bp));
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
+		bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, val32);
+	} else {
+		bcopy(salt, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[0] = BSWAP_64(val64);
+
+		bcopy(iv, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[1] = BSWAP_64(val64);
+
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, BSWAP_32(val32));
+	}
+}
+
+void
+zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_PROTECTED(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_IS_AUTHENTICATED(bp)) {
+		bzero(salt, ZIO_DATA_SALT_LEN);
+		bzero(iv, ZIO_DATA_IV_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
+		bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
+
+		val32 = (uint32_t)BP_GET_IV2(bp);
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
+		bcopy(&val64, salt, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
+		bcopy(&val64, iv, sizeof (uint64_t));
+
+		val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp));
+	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
+		bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
+		    sizeof (uint64_t));
+	} else {
+		bcopy(mac, &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[2] = BSWAP_64(val64);
+
+		bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[3] = BSWAP_64(val64);
+	}
+}
+
+void
+zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+		bzero(mac, ZIO_DATA_MAC_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
+		bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
+		    sizeof (uint64_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
+		bcopy(&val64, mac, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
+		bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
+{
+	zil_chain_t *zilc = data;
+
+	bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
+	bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
+	    sizeof (uint64_t));
+}
+
+void
+zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
+{
+	/*
+	 * The ZIL MAC is embedded in the block it protects, which will
+	 * not have been byteswapped by the time this function has been called.
+	 * As a result, we don't need to worry about byteswapping the MAC.
+	 */
+	const zil_chain_t *zilc = data;
+
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
+	    sizeof (uint64_t));
+}
+
+/*
+ * This routine takes a block of dnodes (src_abd) and copies only the bonus
+ * buffers to the same offsets in the dst buffer. datalen should be the size
+ * of both the src_abd and the dst buffer (not just the length of the bonus
+ * buffers).
+ */
+void
+zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
+{
+	uint_t i, max_dnp = datalen >> DNODE_SHIFT;
+	uint8_t *src;
+	dnode_phys_t *dnp, *sdnp, *ddnp;
+
+	src = abd_borrow_buf_copy(src_abd, datalen);
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
+			    DN_MAX_BONUS_LEN(dnp));
+		}
+	}
+
+	abd_return_buf(src_abd, src, datalen);
+}
+
+/*
+ * This function decides what fields from blk_prop are included in
+ * the on-disk various MAC algorithms.
+ */
+static void
+zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version)
+{
+	int avoidlint = SPA_MINBLOCKSIZE;
+	/*
+	 * Version 0 did not properly zero out all non-portable fields
+	 * as it should have done. We maintain this code so that we can
+	 * do read-only imports of pools on this version.
+	 */
+	if (version == 0) {
+		BP_SET_DEDUP(bp, 0);
+		BP_SET_CHECKSUM(bp, 0);
+		BP_SET_PSIZE(bp, avoidlint);
+		return;
+	}
+
+	ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+
+	/*
+	 * The hole_birth feature might set these fields even if this bp
+	 * is a hole. We zero them out here to guarantee that raw sends
+	 * will function with or without the feature.
+	 */
+	if (BP_IS_HOLE(bp)) {
+		bp->blk_prop = 0ULL;
+		return;
+	}
+
+	/*
+	 * At L0 we want to verify these fields to ensure that data blocks
+	 * can not be reinterpreted. For instance, we do not want an attacker
+	 * to trick us into returning raw lz4 compressed data to the user
+	 * by modifying the compression bits. At higher levels, we cannot
+	 * enforce this policy since raw sends do not convey any information
+	 * about indirect blocks, so these values might be different on the
+	 * receive side. Fortunately, this does not open any new attack
+	 * vectors, since any alterations that can be made to a higher level
+	 * bp must still verify the correct order of the layer below it.
+	 */
+	if (BP_GET_LEVEL(bp) != 0) {
+		BP_SET_BYTEORDER(bp, 0);
+		BP_SET_COMPRESS(bp, 0);
+
+		/*
+		 * psize cannot be set to zero or it will trigger
+		 * asserts, but the value doesn't really matter as
+		 * long as it is constant.
+		 */
+		BP_SET_PSIZE(bp, avoidlint);
+	}
+
+	BP_SET_DEDUP(bp, 0);
+	BP_SET_CHECKSUM(bp, 0);
+}
+
+static void
+zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp,
+    blkptr_auth_buf_t *bab, uint_t *bab_len)
+{
+	blkptr_t tmpbp = *bp;
+
+	if (should_bswap)
+		byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+	ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+	ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+
+	zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac);
+
+	/*
+	 * We always MAC blk_prop in LE to ensure portability. This
+	 * must be done after decoding the mac, since the endianness
+	 * will get zero'd out here.
+	 */
+	zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version);
+	bab->bab_prop = LE_64(tmpbp.blk_prop);
+	bab->bab_pad = 0ULL;
+
+	/* version 0 did not include the padding */
+	*bab_len = sizeof (blkptr_auth_buf_t);
+	if (version == 0)
+		*bab_len -= sizeof (uint64_t);
+}
+
+static int
+zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	crypto_mac_update(ctx, &bab, bab_len);
+
+	return (0);
+}
+
+static void
+zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	SHA2Update(ctx, &bab, bab_len);
+}
+
+static void
+zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	bcopy(&bab, *aadp, bab_len);
+	*aadp += bab_len;
+	*aad_len += bab_len;
+}
+
+static int
+zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, dnode_phys_t *dnp)
+{
+	int ret, i;
+	dnode_phys_t *adnp;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)];
+
+	/* authenticate the core dnode (masking out non-portable bits) */
+	bcopy(dnp, tmp_dncore, sizeof (tmp_dncore));
+	adnp = (dnode_phys_t *)tmp_dncore;
+	if (le_bswap) {
+		adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
+		adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
+		adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
+		adnp->dn_used = BSWAP_64(adnp->dn_used);
+	}
+	adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+	adnp->dn_used = 0;
+
+	crypto_mac_update(ctx, adnp, sizeof (tmp_dncore));
+
+	for (i = 0; i < dnp->dn_nblkptr; i++) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, &dnp->dn_blkptr[i]);
+		if (ret != 0)
+			goto error;
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, DN_SPILL_BLKPTR(dnp));
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * objset_phys_t blocks introduce a number of exceptions to the normal
+ * authentication process. objset_phys_t's contain 2 separate HMACS for
+ * protecting the integrity of their data. The portable_mac protects the
+ * metadnode. This MAC can be sent with a raw send and protects against
+ * reordering of data within the metadnode. The local_mac protects the user
+ * accounting objects which are not sent from one system to another.
+ *
+ * In addition, objset blocks are the only blocks that can be modified and
+ * written to disk without the key loaded under certain circumstances. During
+ * zil_claim() we need to be able to update the zil_header_t to complete
+ * claiming log blocks and during raw receives we need to write out the
+ * portable_mac from the send file. Both of these actions are possible
+ * because these fields are not protected by either MAC so neither one will
+ * need to modify the MACs without the key. However, when the modified blocks
+ * are written out they will be byteswapped into the host machine's native
+ * endianness which will modify fields protected by the MAC. As a result, MAC
+ * calculation for objset blocks works slightly differently from other block
+ * types. Where other block types MAC the data in whatever endianness is
+ * written to disk, objset blocks always MAC little endian version of their
+ * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
+ * and le_bswap indicates whether a byteswap is needed to get this block
+ * into little endian format.
+ */
+int
+zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+    boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
+{
+	int ret;
+	struct hmac_ctx hash_ctx;
+	struct hmac_ctx *ctx = &hash_ctx;
+	objset_phys_t *osp = data;
+	uint64_t intval;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH];
+	uint8_t raw_local_mac[SHA512_DIGEST_LENGTH];
+
+
+	/* calculate the portable MAC from the portable fields and metadnode */
+	crypto_mac_init(ctx, &key->zk_hmac_key);
+
+	/* add in the os_type */
+	intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
+	crypto_mac_update(ctx, &intval, sizeof (uint64_t));
+
+	/* add in the portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	/* CONSTCOND */
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	crypto_mac_update(ctx, &intval, sizeof (uint64_t));
+
+	/* add in fields from the metadnode */
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_meta_dnode);
+	if (ret)
+		goto error;
+
+	crypto_mac_final(ctx, raw_portable_mac, SHA512_DIGEST_LENGTH);
+
+	bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
+
+	/*
+	 * This is necessary here as we check next whether
+	 * OBJSET_FLAG_USERACCOUNTING_COMPLETE is set in order to
+	 * decide if the local_mac should be zeroed out. That flag will always
+	 * be set by dmu_objset_id_quota_upgrade_cb() and
+	 * dmu_objset_userspace_upgrade_cb() if useraccounting has been
+	 * completed.
+	 */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	boolean_t uacct_incomplete =
+	    !(intval & OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+
+	/*
+	 * The local MAC protects the user, group and project accounting.
+	 * If these objects are not present, the local MAC is zeroed out.
+	 */
+	if (uacct_incomplete ||
+	    (datalen >= OBJSET_PHYS_SIZE_V3 &&
+	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_projectused_dnode.dn_type == DMU_OT_NONE) ||
+	    (datalen >= OBJSET_PHYS_SIZE_V2 &&
+	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE) ||
+	    (datalen <= OBJSET_PHYS_SIZE_V1)) {
+		bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+		return (0);
+	}
+
+	/* calculate the local MAC from the userused and groupused dnodes */
+	crypto_mac_init(ctx, &key->zk_hmac_key);
+
+	/* add in the non-portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	/* CONSTCOND */
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	crypto_mac_update(ctx, &intval, sizeof (uint64_t));
+
+	/* XXX check dnode type ... */
+	/* add in fields from the user accounting dnodes */
+	if (osp->os_userused_dnode.dn_type != DMU_OT_NONE) {
+		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+		    should_bswap, &osp->os_userused_dnode);
+		if (ret)
+			goto error;
+	}
+
+	if (osp->os_groupused_dnode.dn_type != DMU_OT_NONE) {
+		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+		    should_bswap, &osp->os_groupused_dnode);
+		if (ret)
+			goto error;
+	}
+
+	if (osp->os_projectused_dnode.dn_type != DMU_OT_NONE &&
+	    datalen >= OBJSET_PHYS_SIZE_V3) {
+		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+		    should_bswap, &osp->os_projectused_dnode);
+		if (ret)
+			goto error;
+	}
+
+	crypto_mac_final(ctx, raw_local_mac, SHA512_DIGEST_LENGTH);
+
+	bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
+
+	return (0);
+
+error:
+	bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
+	bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+	return (ret);
+}
+
+static void
+zio_crypt_destroy_uio(zfs_uio_t *uio)
+{
+	if (GET_UIO_STRUCT(uio)->uio_iov)
+		kmem_free(GET_UIO_STRUCT(uio)->uio_iov,
+		    zfs_uio_iovcnt(uio) * sizeof (iovec_t));
+}
+
+/*
+ * This function parses an uncompressed indirect block and returns a checksum
+ * of all the portable fields from all of the contained bps. The portable
+ * fields are the MAC and all of the fields from blk_prop except for the dedup,
+ * checksum, and psize bits. For an explanation of the purpose of this, see
+ * the comment block on object set authentication.
+ */
+static int
+zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf,
+    uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum)
+{
+	blkptr_t *bp;
+	int i, epb = datalen >> SPA_BLKPTRSHIFT;
+	SHA2_CTX ctx;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	/* checksum all of the MACs from the layer below */
+	SHA2Init(SHA512, &ctx);
+	for (i = 0, bp = buf; i < epb; i++, bp++) {
+		zio_crypt_bp_do_indrect_checksum_updates(&ctx, version,
+		    byteswap, bp);
+	}
+	SHA2Final(digestbuf, &ctx);
+
+	if (generate) {
+		bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
+		return (0);
+	}
+
+	if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0) {
+#ifdef FCRYPTO_DEBUG
+		printf("%s(%d): Setting ECKSUM\n", __FUNCTION__, __LINE__);
+#endif
+		return (SET_ERROR(ECKSUM));
+	}
+	return (0);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+
+	/*
+	 * Unfortunately, callers of this function will not always have
+	 * easy access to the on-disk format version. This info is
+	 * normally found in the DSL Crypto Key, but the checksum-of-MACs
+	 * is expected to be verifiable even when the key isn't loaded.
+	 * Here, instead of doing a ZAP lookup for the version for each
+	 * zio, we simply try both existing formats.
+	 */
+	ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf,
+	    datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum);
+	if (ret == ECKSUM) {
+		ASSERT(!generate);
+		ret = zio_crypt_do_indirect_mac_checksum_impl(generate,
+		    buf, datalen, 0, byteswap, cksum);
+	}
+
+	return (ret);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+	void *buf;
+
+	buf = abd_borrow_buf_copy(abd, datalen);
+	ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
+	    byteswap, cksum);
+	abd_return_buf(abd, buf, datalen);
+
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting ZIL blocks.
+ * We do not check for the older ZIL chain because the encryption feature
+ * was not available before the newer ZIL chain was introduced. The goal
+ * here is to encrypt everything except the blkptr_t of a lr_write_t and
+ * the zil_chain_t header. Everything that is not encrypted is authenticated.
+ */
+/*
+ * The OpenCrypto used in FreeBSD does not use separate source and
+ * destination buffers; instead, the same buffer is used.  Further, to
+ * accommodate some of the drivers, the authbuf needs to be logically before
+ * the data.  This means that we need to copy the source to the destination,
+ * and set up an extra iovec_t at the beginning to handle the authbuf.
+ * It also means we'll only return one zfs_uio_t.
+ */
+
+static int
+zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio,
+    zfs_uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+    boolean_t *no_crypt)
+{
+	(void) puio;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+	uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
+	iovec_t *dst_iovecs;
+	zil_chain_t *zilc;
+	lr_t *lr;
+	uint64_t txtype, lr_len;
+	uint_t crypt_len, nr_iovecs, vec;
+	uint_t aad_len = 0, total_len = 0;
+
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+	}
+	bcopy(src, dst, datalen);
+
+	/* Find the start and end record of the log block. */
+	zilc = (zil_chain_t *)src;
+	slrp = src + sizeof (zil_chain_t);
+	aadp = aadbuf;
+	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+
+	/*
+	 * Calculate the number of encrypted iovecs we will need.
+	 */
+
+	/* We need at least two iovecs -- one for the AAD, one for the MAC. */
+	nr_iovecs = 2;
+
+	for (; slrp < blkend; slrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (byteswap) {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		} else {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		}
+
+		nr_iovecs++;
+		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
+			nr_iovecs++;
+	}
+
+	dst_iovecs = kmem_alloc(nr_iovecs * sizeof (iovec_t), KM_SLEEP);
+
+	/*
+	 * Copy the plain zil header over and authenticate everything except
+	 * the checksum that will store our MAC. If we are writing the data
+	 * the embedded checksum will not have been calculated yet, so we don't
+	 * authenticate that.
+	 */
+	bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
+	aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+	aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+
+	slrp = src + sizeof (zil_chain_t);
+	dlrp = dst + sizeof (zil_chain_t);
+
+	/*
+	 * Loop over records again, filling in iovecs.
+	 */
+
+	/* The first iovec will contain the authbuf. */
+	vec = 1;
+
+	for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		/* copy the common lr_t */
+		bcopy(slrp, dlrp, sizeof (lr_t));
+		bcopy(slrp, aadp, sizeof (lr_t));
+		aadp += sizeof (lr_t);
+		aad_len += sizeof (lr_t);
+
+		/*
+		 * If this is a TX_WRITE record we want to encrypt everything
+		 * except the bp if exists. If the bp does exist we want to
+		 * authenticate it.
+		 */
+		if (txtype == TX_WRITE) {
+			crypt_len = sizeof (lr_write_t) -
+			    sizeof (lr_t) - sizeof (blkptr_t);
+			dst_iovecs[vec].iov_base = (char *)dlrp +
+			    sizeof (lr_t);
+			dst_iovecs[vec].iov_len = crypt_len;
+
+			/* copy the bp now since it will not be encrypted */
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    sizeof (blkptr_t));
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    aadp, sizeof (blkptr_t));
+			aadp += sizeof (blkptr_t);
+			aad_len += sizeof (blkptr_t);
+			vec++;
+			total_len += crypt_len;
+
+			if (lr_len != sizeof (lr_write_t)) {
+				crypt_len = lr_len - sizeof (lr_write_t);
+				dst_iovecs[vec].iov_base = (char *)
+				    dlrp + sizeof (lr_write_t);
+				dst_iovecs[vec].iov_len = crypt_len;
+				vec++;
+				total_len += crypt_len;
+			}
+		} else {
+			crypt_len = lr_len - sizeof (lr_t);
+			dst_iovecs[vec].iov_base = (char *)dlrp +
+			    sizeof (lr_t);
+			dst_iovecs[vec].iov_len = crypt_len;
+			vec++;
+			total_len += crypt_len;
+		}
+	}
+
+	/* The last iovec will contain the MAC. */
+	ASSERT3U(vec, ==, nr_iovecs - 1);
+
+	/* AAD */
+	dst_iovecs[0].iov_base = aadbuf;
+	dst_iovecs[0].iov_len = aad_len;
+	/* MAC */
+	dst_iovecs[vec].iov_base = 0;
+	dst_iovecs[vec].iov_len = 0;
+
+	*no_crypt = (vec == 1);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+	GET_UIO_STRUCT(out_uio)->uio_iov = dst_iovecs;
+	zfs_uio_iovcnt(out_uio) = nr_iovecs;
+
+	return (0);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting dnode blocks.
+ */
+static int
+zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    zfs_uio_t *puio, zfs_uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf,
+    uint_t *auth_len, boolean_t *no_crypt)
+{
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+	uint8_t *src, *dst, *aadp;
+	dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
+	iovec_t *dst_iovecs;
+	uint_t nr_iovecs, crypt_len, vec;
+	uint_t aad_len = 0, total_len = 0;
+	uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
+
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+	}
+	bcopy(src, dst, datalen);
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+	aadp = aadbuf;
+
+	/*
+	 * Count the number of iovecs we will need to do the encryption by
+	 * counting the number of bonus buffers that need to be encrypted.
+	 */
+
+	/* We need at least two iovecs -- one for the AAD, one for the MAC. */
+	nr_iovecs = 2;
+
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		/*
+		 * This block may still be byteswapped. However, all of the
+		 * values we use are either uint8_t's (for which byteswapping
+		 * is a noop) or a * != 0 check, which will work regardless
+		 * of whether or not we byteswap.
+		 */
+		if (sdnp[i].dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
+		    sdnp[i].dn_bonuslen != 0) {
+			nr_iovecs++;
+		}
+	}
+
+	dst_iovecs = kmem_alloc(nr_iovecs * sizeof (iovec_t), KM_SLEEP);
+
+	/*
+	 * Iterate through the dnodes again, this time filling in the uios
+	 * we allocated earlier. We also concatenate any data we want to
+	 * authenticate onto aadbuf.
+	 */
+
+	/* The first iovec will contain the authbuf. */
+	vec = 1;
+
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+
+		/* copy over the core fields and blkptrs (kept as plaintext) */
+		bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
+			    sizeof (blkptr_t));
+		}
+
+		/*
+		 * Handle authenticated data. We authenticate everything in
+		 * the dnode that can be brought over when we do a raw send.
+		 * This includes all of the core fields as well as the MACs
+		 * stored in the bp checksums and all of the portable bits
+		 * from blk_prop. We include the dnode padding here in case it
+		 * ever gets used in the future. Some dn_flags and dn_used are
+		 * not portable so we mask those out values out of the
+		 * authenticated data.
+		 */
+		crypt_len = offsetof(dnode_phys_t, dn_blkptr);
+		bcopy(dnp, aadp, crypt_len);
+		adnp = (dnode_phys_t *)aadp;
+		adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+		adnp->dn_used = 0;
+		aadp += crypt_len;
+		aad_len += crypt_len;
+
+		for (j = 0; j < dnp->dn_nblkptr; j++) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, &dnp->dn_blkptr[j]);
+		}
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, DN_SPILL_BLKPTR(dnp));
+		}
+
+		/*
+		 * If this bonus buffer needs to be encrypted, we prepare an
+		 * iovec_t. The encryption / decryption functions will fill
+		 * this in for us with the encrypted or decrypted data.
+		 * Otherwise we add the bonus buffer to the authenticated
+		 * data buffer and copy it over to the destination. The
+		 * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
+		 * we can guarantee alignment with the AES block size
+		 * (128 bits).
+		 */
+		crypt_len = DN_MAX_BONUS_LEN(dnp);
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			dst_iovecs[vec].iov_base = DN_BONUS(&ddnp[i]);
+			dst_iovecs[vec].iov_len = crypt_len;
+
+			vec++;
+			total_len += crypt_len;
+		} else {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
+			bcopy(DN_BONUS(dnp), aadp, crypt_len);
+			aadp += crypt_len;
+			aad_len += crypt_len;
+		}
+	}
+
+	/* The last iovec will contain the MAC. */
+	ASSERT3U(vec, ==, nr_iovecs - 1);
+
+	/* AAD */
+	dst_iovecs[0].iov_base = aadbuf;
+	dst_iovecs[0].iov_len = aad_len;
+	/* MAC */
+	dst_iovecs[vec].iov_base = 0;
+	dst_iovecs[vec].iov_len = 0;
+
+	*no_crypt = (vec == 1);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+	GET_UIO_STRUCT(out_uio)->uio_iov = dst_iovecs;
+	zfs_uio_iovcnt(out_uio) = nr_iovecs;
+
+	return (0);
+}
+
+static int
+zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *out_uio,
+    uint_t *enc_len)
+{
+	(void) puio;
+	int ret;
+	uint_t nr_plain = 1, nr_cipher = 2;
+	iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL;
+	void *src, *dst;
+
+	cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t),
+	    KM_SLEEP);
+	if (!cipher_iovecs) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+	bzero(cipher_iovecs, nr_cipher * sizeof (iovec_t));
+
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+	}
+	bcopy(src, dst, datalen);
+	cipher_iovecs[0].iov_base = dst;
+	cipher_iovecs[0].iov_len = datalen;
+
+	*enc_len = datalen;
+	GET_UIO_STRUCT(out_uio)->uio_iov = cipher_iovecs;
+	zfs_uio_iovcnt(out_uio) = nr_cipher;
+
+	return (0);
+
+error:
+	if (plain_iovecs != NULL)
+		kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t));
+	if (cipher_iovecs != NULL)
+		kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
+
+	*enc_len = 0;
+	GET_UIO_STRUCT(out_uio)->uio_iov = NULL;
+	zfs_uio_iovcnt(out_uio) = 0;
+
+	return (ret);
+}
+
+/*
+ * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
+ * that they can be used for encryption and decryption by zio_do_crypt_uio().
+ * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
+ * requiring special handling to parse out pieces that are to be encrypted. The
+ * authbuf is used by these special cases to store additional authenticated
+ * data (AAD) for the encryption modes.
+ */
+static int
+zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len,
+    uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+	iovec_t *mac_iov;
+
+	ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
+
+	/* route to handler */
+	switch (ot) {
+	case DMU_OT_INTENT_LOG:
+		ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
+		    datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
+		    no_crypt);
+		break;
+	case DMU_OT_DNODE:
+		ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf,
+		    cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf,
+		    auth_len, no_crypt);
+		break;
+	default:
+		ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
+		    datalen, puio, cuio, enc_len);
+		*authbuf = NULL;
+		*auth_len = 0;
+		*no_crypt = B_FALSE;
+		break;
+	}
+
+	if (ret != 0)
+		goto error;
+
+	/* populate the uios */
+	zfs_uio_segflg(cuio) = UIO_SYSSPACE;
+
+	mac_iov =
+	    ((iovec_t *)&(GET_UIO_STRUCT(cuio)->
+	    uio_iov[zfs_uio_iovcnt(cuio) - 1]));
+	mac_iov->iov_base = (void *)mac;
+	mac_iov->iov_len = ZIO_DATA_MAC_LEN;
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+void *failed_decrypt_buf;
+int faile_decrypt_size;
+
+/*
+ * Primary encryption / decryption entrypoint for zio data.
+ */
+int
+zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
+    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
+    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
+    boolean_t *no_crypt)
+{
+	int ret;
+	boolean_t locked = B_FALSE;
+	uint64_t crypt = key->zk_crypt;
+	uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
+	uint_t enc_len, auth_len;
+	zfs_uio_t puio, cuio;
+	struct uio puio_s, cuio_s;
+	uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
+	crypto_key_t tmp_ckey, *ckey = NULL;
+	freebsd_crypt_session_t *tmpl = NULL;
+	uint8_t *authbuf = NULL;
+
+
+	zfs_uio_init(&puio, &puio_s);
+	zfs_uio_init(&cuio, &cuio_s);
+	bzero(GET_UIO_STRUCT(&puio), sizeof (struct uio));
+	bzero(GET_UIO_STRUCT(&cuio), sizeof (struct uio));
+
+#ifdef FCRYPTO_DEBUG
+	printf("%s(%s, %p, %p, %d, %p, %p, %u, %s, %p, %p, %p)\n",
+	    __FUNCTION__,
+	    encrypt ? "encrypt" : "decrypt",
+	    key, salt, ot, iv, mac, datalen,
+	    byteswap ? "byteswap" : "native_endian", plainbuf,
+	    cipherbuf, no_crypt);
+
+	printf("\tkey = {");
+	for (int i = 0; i < key->zk_current_key.ck_length/8; i++)
+		printf("%02x ", ((uint8_t *)key->zk_current_key.ck_data)[i]);
+	printf("}\n");
+#endif
+	/* create uios for encryption */
+	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
+	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
+	    &authbuf, &auth_len, no_crypt);
+	if (ret != 0)
+		return (ret);
+
+	/*
+	 * If the needed key is the current one, just use it. Otherwise we
+	 * need to generate a temporary one from the given salt + master key.
+	 * If we are encrypting, we must return a copy of the current salt
+	 * so that it can be stored in the blkptr_t.
+	 */
+	rw_enter(&key->zk_salt_lock, RW_READER);
+	locked = B_TRUE;
+
+	if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
+		ckey = &key->zk_current_key;
+		tmpl = &key->zk_session;
+	} else {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+
+		ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+		    salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
+		if (ret != 0)
+			goto error;
+		tmp_ckey.ck_format = CRYPTO_KEY_RAW;
+		tmp_ckey.ck_data = enc_keydata;
+		tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+		ckey = &tmp_ckey;
+		tmpl = NULL;
+	}
+
+	/* perform the encryption / decryption */
+	ret = zio_do_crypt_uio_opencrypto(encrypt, tmpl, key->zk_crypt,
+	    ckey, iv, enc_len, &cuio, auth_len);
+	if (ret != 0)
+		goto error;
+	if (locked) {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+	}
+
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(&puio);
+	zio_crypt_destroy_uio(&cuio);
+
+	return (0);
+
+error:
+	if (!encrypt) {
+		if (failed_decrypt_buf != NULL)
+			kmem_free(failed_decrypt_buf, failed_decrypt_size);
+		failed_decrypt_buf = kmem_alloc(datalen, KM_SLEEP);
+		failed_decrypt_size = datalen;
+		bcopy(cipherbuf, failed_decrypt_buf, datalen);
+	}
+	if (locked)
+		rw_exit(&key->zk_salt_lock);
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(&puio);
+	zio_crypt_destroy_uio(&cuio);
+	return (SET_ERROR(ret));
+}
+
+/*
+ * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
+ * linear buffers.
+ */
+int
+zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot,
+    boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac,
+    uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
+{
+	int ret;
+	void *ptmp, *ctmp;
+
+	if (encrypt) {
+		ptmp = abd_borrow_buf_copy(pabd, datalen);
+		ctmp = abd_borrow_buf(cabd, datalen);
+	} else {
+		ptmp = abd_borrow_buf(pabd, datalen);
+		ctmp = abd_borrow_buf_copy(cabd, datalen);
+	}
+
+	ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac,
+	    datalen, ptmp, ctmp, no_crypt);
+	if (ret != 0)
+		goto error;
+
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (0);
+
+error:
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (SET_ERROR(ret));
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* BEGIN CSTYLED */
+module_param(zfs_key_max_salt_uses, ulong, 0644);
+MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value "
+	"can be used for generating encryption keys before it is rotated");
+/* END CSTYLED */
+#endif

diff --git a/zfs/module/os/freebsd/zfs/zvol_os.c b/zfs/module/os/freebsd/zfs/zvol_os.c
new file mode 100644
index 0000000..ec80bd7
--- /dev/null
+++ b/zfs/module/os/freebsd/zfs/zvol_os.c

@@ -0,0 +1,1573 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions Copyright 2010 Robert Milkowski
+ *
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ */
+
+/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
+
+/*
+ * ZFS volume emulation driver.
+ *
+ * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
+ * Volumes are accessed through the symbolic links named:
+ *
+ * /dev/zvol/<pool_name>/<dataset_name>
+ *
+ * Volumes are persistent through reboot.  No user command needs to be
+ * run before opening and using a device.
+ *
+ * On FreeBSD ZVOLs are simply GEOM providers like any other storage device
+ * in the system. Except when they're simply character devices (volmode=dev).
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/stat.h>
+#include <sys/proc.h>
+#include <sys/zap.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/zio.h>
+#include <sys/disk.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dnode.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dir.h>
+#include <sys/byteorder.h>
+#include <sys/sunddi.h>
+#include <sys/dirent.h>
+#include <sys/policy.h>
+#include <sys/queue.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zil.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_rlock.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_raidz.h>
+#include <sys/zvol.h>
+#include <sys/zil_impl.h>
+#include <sys/dataset_kstats.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_tx.h>
+#include <sys/zfeature.h>
+#include <sys/zio_checksum.h>
+#include <sys/zil_impl.h>
+#include <sys/filio.h>
+
+#include <geom/geom.h>
+#include <sys/zvol.h>
+#include <sys/zvol_impl.h>
+
+#include "zfs_namecheck.h"
+
+#define	ZVOL_DUMPSIZE		"dumpsize"
+
+#ifdef ZVOL_LOCK_DEBUG
+#define	ZVOL_RW_READER		RW_WRITER
+#define	ZVOL_RW_READ_HELD	RW_WRITE_HELD
+#else
+#define	ZVOL_RW_READER		RW_READER
+#define	ZVOL_RW_READ_HELD	RW_READ_HELD
+#endif
+
+enum zvol_geom_state {
+	ZVOL_GEOM_UNINIT,
+	ZVOL_GEOM_STOPPED,
+	ZVOL_GEOM_RUNNING,
+};
+
+struct zvol_state_os {
+#define	zso_dev		_zso_state._zso_dev
+#define	zso_geom	_zso_state._zso_geom
+	union {
+		/* volmode=dev */
+		struct zvol_state_dev {
+			struct cdev *zsd_cdev;
+			uint64_t zsd_sync_cnt;
+		} _zso_dev;
+
+		/* volmode=geom */
+		struct zvol_state_geom {
+			struct g_provider *zsg_provider;
+			struct bio_queue_head zsg_queue;
+			struct mtx zsg_queue_mtx;
+			enum zvol_geom_state zsg_state;
+		} _zso_geom;
+	} _zso_state;
+	int zso_dying;
+};
+
+static uint32_t zvol_minors;
+
+SYSCTL_DECL(_vfs_zfs);
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME");
+SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &zvol_volmode, 0,
+	"Expose as GEOM providers (1), device files (2) or neither");
+static boolean_t zpool_on_zvol = B_FALSE;
+SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, recursive, CTLFLAG_RWTUN, &zpool_on_zvol, 0,
+	"Allow zpools to use zvols as vdevs (DANGEROUS)");
+
+/*
+ * Toggle unmap functionality.
+ */
+boolean_t zvol_unmap_enabled = B_TRUE;
+
+SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, unmap_enabled, CTLFLAG_RWTUN,
+	&zvol_unmap_enabled, 0, "Enable UNMAP functionality");
+
+/*
+ * zvol maximum transfer in one DMU tx.
+ */
+int zvol_maxphys = DMU_MAX_ACCESS / 2;
+
+static void zvol_ensure_zilog(zvol_state_t *zv);
+
+static d_open_t		zvol_cdev_open;
+static d_close_t	zvol_cdev_close;
+static d_ioctl_t	zvol_cdev_ioctl;
+static d_read_t		zvol_cdev_read;
+static d_write_t	zvol_cdev_write;
+static d_strategy_t	zvol_geom_bio_strategy;
+
+static struct cdevsw zvol_cdevsw = {
+	.d_name =	"zvol",
+	.d_version =	D_VERSION,
+	.d_flags =	D_DISK | D_TRACKCLOSE,
+	.d_open =	zvol_cdev_open,
+	.d_close =	zvol_cdev_close,
+	.d_ioctl =	zvol_cdev_ioctl,
+	.d_read =	zvol_cdev_read,
+	.d_write =	zvol_cdev_write,
+	.d_strategy =	zvol_geom_bio_strategy,
+};
+
+extern uint_t zfs_geom_probe_vdev_key;
+
+struct g_class zfs_zvol_class = {
+	.name = "ZFS::ZVOL",
+	.version = G_VERSION,
+};
+
+DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
+
+static int zvol_geom_open(struct g_provider *pp, int flag, int count);
+static int zvol_geom_close(struct g_provider *pp, int flag, int count);
+static void zvol_geom_run(zvol_state_t *zv);
+static void zvol_geom_destroy(zvol_state_t *zv);
+static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
+static void zvol_geom_worker(void *arg);
+static void zvol_geom_bio_start(struct bio *bp);
+static int zvol_geom_bio_getattr(struct bio *bp);
+/* static d_strategy_t	zvol_geom_bio_strategy; (declared elsewhere) */
+
+/*
+ * GEOM mode implementation
+ */
+
+/*ARGSUSED*/
+static int
+zvol_geom_open(struct g_provider *pp, int flag, int count)
+{
+	zvol_state_t *zv;
+	int err = 0;
+	boolean_t drop_suspend = B_FALSE;
+
+	if (!zpool_on_zvol && tsd_get(zfs_geom_probe_vdev_key) != NULL) {
+		/*
+		 * if zfs_geom_probe_vdev_key is set, that means that zfs is
+		 * attempting to probe geom providers while looking for a
+		 * replacement for a missing VDEV.  In this case, the
+		 * spa_namespace_lock will not be held, but it is still illegal
+		 * to use a zvol as a vdev.  Deadlocks can result if another
+		 * thread has spa_namespace_lock
+		 */
+		return (SET_ERROR(EOPNOTSUPP));
+	}
+
+retry:
+	rw_enter(&zvol_state_lock, ZVOL_RW_READER);
+	/*
+	 * Obtain a copy of private under zvol_state_lock to make sure either
+	 * the result of zvol free code setting private to NULL is observed,
+	 * or the zv is protected from being freed because of the positive
+	 * zv_open_count.
+	 */
+	zv = pp->private;
+	if (zv == NULL) {
+		rw_exit(&zvol_state_lock);
+		err = SET_ERROR(ENXIO);
+		goto out_locked;
+	}
+
+	mutex_enter(&zv->zv_state_lock);
+	if (zv->zv_zso->zso_dying) {
+		rw_exit(&zvol_state_lock);
+		err = SET_ERROR(ENXIO);
+		goto out_zv_locked;
+	}
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
+
+	/*
+	 * make sure zvol is not suspended during first open
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	if (zv->zv_open_count == 0) {
+		drop_suspend = B_TRUE;
+		if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
+			mutex_exit(&zv->zv_state_lock);
+			rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+			mutex_enter(&zv->zv_state_lock);
+			/* check to see if zv_suspend_lock is needed */
+			if (zv->zv_open_count != 0) {
+				rw_exit(&zv->zv_suspend_lock);
+				drop_suspend = B_FALSE;
+			}
+		}
+	}
+	rw_exit(&zvol_state_lock);
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	if (zv->zv_open_count == 0) {
+		boolean_t drop_namespace = B_FALSE;
+
+		ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
+
+		/*
+		 * Take spa_namespace_lock to prevent lock inversion when
+		 * zvols from one pool are opened as vdevs in another.
+		 */
+		if (!mutex_owned(&spa_namespace_lock)) {
+			if (!mutex_tryenter(&spa_namespace_lock)) {
+				mutex_exit(&zv->zv_state_lock);
+				rw_exit(&zv->zv_suspend_lock);
+				kern_yield(PRI_USER);
+				goto retry;
+			} else {
+				drop_namespace = B_TRUE;
+			}
+		}
+		err = zvol_first_open(zv, !(flag & FWRITE));
+		if (drop_namespace)
+			mutex_exit(&spa_namespace_lock);
+		if (err)
+			goto out_zv_locked;
+		pp->mediasize = zv->zv_volsize;
+		pp->stripeoffset = 0;
+		pp->stripesize = zv->zv_volblocksize;
+	}
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	/*
+	 * Check for a bad on-disk format version now since we
+	 * lied about owning the dataset readonly before.
+	 */
+	if ((flag & FWRITE) && ((zv->zv_flags & ZVOL_RDONLY) ||
+	    dmu_objset_incompatible_encryption_version(zv->zv_objset))) {
+		err = SET_ERROR(EROFS);
+		goto out_opened;
+	}
+	if (zv->zv_flags & ZVOL_EXCL) {
+		err = SET_ERROR(EBUSY);
+		goto out_opened;
+	}
+#ifdef FEXCL
+	if (flag & FEXCL) {
+		if (zv->zv_open_count != 0) {
+			err = SET_ERROR(EBUSY);
+			goto out_opened;
+		}
+		zv->zv_flags |= ZVOL_EXCL;
+	}
+#endif
+
+	zv->zv_open_count += count;
+out_opened:
+	if (zv->zv_open_count == 0) {
+		zvol_last_close(zv);
+		wakeup(zv);
+	}
+out_zv_locked:
+	mutex_exit(&zv->zv_state_lock);
+out_locked:
+	if (drop_suspend)
+		rw_exit(&zv->zv_suspend_lock);
+	return (err);
+}
+
+/*ARGSUSED*/
+static int
+zvol_geom_close(struct g_provider *pp, int flag, int count)
+{
+	zvol_state_t *zv;
+	boolean_t drop_suspend = B_TRUE;
+	int new_open_count;
+
+	rw_enter(&zvol_state_lock, ZVOL_RW_READER);
+	zv = pp->private;
+	if (zv == NULL) {
+		rw_exit(&zvol_state_lock);
+		return (SET_ERROR(ENXIO));
+	}
+
+	mutex_enter(&zv->zv_state_lock);
+	if (zv->zv_flags & ZVOL_EXCL) {
+		ASSERT3U(zv->zv_open_count, ==, 1);
+		zv->zv_flags &= ~ZVOL_EXCL;
+	}
+
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
+
+	/*
+	 * If the open count is zero, this is a spurious close.
+	 * That indicates a bug in the kernel / DDI framework.
+	 */
+	ASSERT3U(zv->zv_open_count, >, 0);
+
+	/*
+	 * make sure zvol is not suspended during last close
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	new_open_count = zv->zv_open_count - count;
+	if (new_open_count == 0) {
+		if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
+			mutex_exit(&zv->zv_state_lock);
+			rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+			mutex_enter(&zv->zv_state_lock);
+			/* check to see if zv_suspend_lock is needed */
+			new_open_count = zv->zv_open_count - count;
+			if (new_open_count != 0) {
+				rw_exit(&zv->zv_suspend_lock);
+				drop_suspend = B_FALSE;
+			}
+		}
+	} else {
+		drop_suspend = B_FALSE;
+	}
+	rw_exit(&zvol_state_lock);
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	/*
+	 * You may get multiple opens, but only one close.
+	 */
+	zv->zv_open_count = new_open_count;
+	if (zv->zv_open_count == 0) {
+		ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
+		zvol_last_close(zv);
+		wakeup(zv);
+	}
+
+	mutex_exit(&zv->zv_state_lock);
+
+	if (drop_suspend)
+		rw_exit(&zv->zv_suspend_lock);
+	return (0);
+}
+
+static void
+zvol_geom_run(zvol_state_t *zv)
+{
+	struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+	struct g_provider *pp = zsg->zsg_provider;
+
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
+
+	g_error_provider(pp, 0);
+
+	kproc_kthread_add(zvol_geom_worker, zv, &system_proc, NULL, 0, 0,
+	    "zfskern", "zvol %s", pp->name + sizeof (ZVOL_DRIVER));
+}
+
+static void
+zvol_geom_destroy(zvol_state_t *zv)
+{
+	struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+	struct g_provider *pp = zsg->zsg_provider;
+
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
+
+	g_topology_assert();
+
+	mutex_enter(&zv->zv_state_lock);
+	VERIFY3S(zsg->zsg_state, ==, ZVOL_GEOM_RUNNING);
+	mutex_exit(&zv->zv_state_lock);
+	zsg->zsg_provider = NULL;
+	g_wither_geom(pp->geom, ENXIO);
+}
+
+void
+zvol_wait_close(zvol_state_t *zv)
+{
+
+	if (zv->zv_volmode != ZFS_VOLMODE_GEOM)
+		return;
+	mutex_enter(&zv->zv_state_lock);
+	zv->zv_zso->zso_dying = B_TRUE;
+
+	if (zv->zv_open_count)
+		msleep(zv, &zv->zv_state_lock,
+		    PRIBIO, "zvol:dying", 10*hz);
+	mutex_exit(&zv->zv_state_lock);
+}
+
+
+static int
+zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
+{
+	int count, error, flags;
+
+	g_topology_assert();
+
+	/*
+	 * To make it easier we expect either open or close, but not both
+	 * at the same time.
+	 */
+	KASSERT((acr >= 0 && acw >= 0 && ace >= 0) ||
+	    (acr <= 0 && acw <= 0 && ace <= 0),
+	    ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
+	    pp->name, acr, acw, ace));
+
+	if (pp->private == NULL) {
+		if (acr <= 0 && acw <= 0 && ace <= 0)
+			return (0);
+		return (pp->error);
+	}
+
+	/*
+	 * We don't pass FEXCL flag to zvol_geom_open()/zvol_geom_close() if
+	 * ace != 0, because GEOM already handles that and handles it a bit
+	 * differently. GEOM allows for multiple read/exclusive consumers and
+	 * ZFS allows only one exclusive consumer, no matter if it is reader or
+	 * writer. I like better the way GEOM works so I'll leave it for GEOM
+	 * to decide what to do.
+	 */
+
+	count = acr + acw + ace;
+	if (count == 0)
+		return (0);
+
+	flags = 0;
+	if (acr != 0 || ace != 0)
+		flags |= FREAD;
+	if (acw != 0)
+		flags |= FWRITE;
+
+	g_topology_unlock();
+	if (count > 0)
+		error = zvol_geom_open(pp, flags, count);
+	else
+		error = zvol_geom_close(pp, flags, -count);
+	g_topology_lock();
+	return (error);
+}
+
+static void
+zvol_geom_worker(void *arg)
+{
+	zvol_state_t *zv = arg;
+	struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+	struct bio *bp;
+
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM);
+
+	thread_lock(curthread);
+	sched_prio(curthread, PRIBIO);
+	thread_unlock(curthread);
+
+	for (;;) {
+		mtx_lock(&zsg->zsg_queue_mtx);
+		bp = bioq_takefirst(&zsg->zsg_queue);
+		if (bp == NULL) {
+			if (zsg->zsg_state == ZVOL_GEOM_STOPPED) {
+				zsg->zsg_state = ZVOL_GEOM_RUNNING;
+				wakeup(&zsg->zsg_state);
+				mtx_unlock(&zsg->zsg_queue_mtx);
+				kthread_exit();
+			}
+			msleep(&zsg->zsg_queue, &zsg->zsg_queue_mtx,
+			    PRIBIO | PDROP, "zvol:io", 0);
+			continue;
+		}
+		mtx_unlock(&zsg->zsg_queue_mtx);
+		zvol_geom_bio_strategy(bp);
+	}
+}
+
+static void
+zvol_geom_bio_start(struct bio *bp)
+{
+	zvol_state_t *zv = bp->bio_to->private;
+	struct zvol_state_geom *zsg;
+	boolean_t first;
+
+	if (zv == NULL) {
+		g_io_deliver(bp, ENXIO);
+		return;
+	}
+	if (bp->bio_cmd == BIO_GETATTR) {
+		if (zvol_geom_bio_getattr(bp))
+			g_io_deliver(bp, EOPNOTSUPP);
+		return;
+	}
+
+	if (!THREAD_CAN_SLEEP()) {
+		zsg = &zv->zv_zso->zso_geom;
+		mtx_lock(&zsg->zsg_queue_mtx);
+		first = (bioq_first(&zsg->zsg_queue) == NULL);
+		bioq_insert_tail(&zsg->zsg_queue, bp);
+		mtx_unlock(&zsg->zsg_queue_mtx);
+		if (first)
+			wakeup_one(&zsg->zsg_queue);
+		return;
+	}
+
+	zvol_geom_bio_strategy(bp);
+}
+
+static int
+zvol_geom_bio_getattr(struct bio *bp)
+{
+	zvol_state_t *zv;
+
+	zv = bp->bio_to->private;
+	ASSERT3P(zv, !=, NULL);
+
+	spa_t *spa = dmu_objset_spa(zv->zv_objset);
+	uint64_t refd, avail, usedobjs, availobjs;
+
+	if (g_handleattr_int(bp, "GEOM::candelete", 1))
+		return (0);
+	if (strcmp(bp->bio_attribute, "blocksavail") == 0) {
+		dmu_objset_space(zv->zv_objset, &refd, &avail,
+		    &usedobjs, &availobjs);
+		if (g_handleattr_off_t(bp, "blocksavail", avail / DEV_BSIZE))
+			return (0);
+	} else if (strcmp(bp->bio_attribute, "blocksused") == 0) {
+		dmu_objset_space(zv->zv_objset, &refd, &avail,
+		    &usedobjs, &availobjs);
+		if (g_handleattr_off_t(bp, "blocksused", refd / DEV_BSIZE))
+			return (0);
+	} else if (strcmp(bp->bio_attribute, "poolblocksavail") == 0) {
+		avail = metaslab_class_get_space(spa_normal_class(spa));
+		avail -= metaslab_class_get_alloc(spa_normal_class(spa));
+		if (g_handleattr_off_t(bp, "poolblocksavail",
+		    avail / DEV_BSIZE))
+			return (0);
+	} else if (strcmp(bp->bio_attribute, "poolblocksused") == 0) {
+		refd = metaslab_class_get_alloc(spa_normal_class(spa));
+		if (g_handleattr_off_t(bp, "poolblocksused", refd / DEV_BSIZE))
+			return (0);
+	}
+	return (1);
+}
+
+static void
+zvol_geom_bio_strategy(struct bio *bp)
+{
+	zvol_state_t *zv;
+	uint64_t off, volsize;
+	size_t resid;
+	char *addr;
+	objset_t *os;
+	zfs_locked_range_t *lr;
+	int error = 0;
+	boolean_t doread = B_FALSE;
+	boolean_t is_dumpified;
+	boolean_t sync;
+
+	if (bp->bio_to)
+		zv = bp->bio_to->private;
+	else
+		zv = bp->bio_dev->si_drv2;
+
+	if (zv == NULL) {
+		error = SET_ERROR(ENXIO);
+		goto out;
+	}
+
+	rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+
+	switch (bp->bio_cmd) {
+	case BIO_READ:
+		doread = B_TRUE;
+		break;
+	case BIO_WRITE:
+	case BIO_FLUSH:
+	case BIO_DELETE:
+		if (zv->zv_flags & ZVOL_RDONLY) {
+			error = SET_ERROR(EROFS);
+			goto resume;
+		}
+		zvol_ensure_zilog(zv);
+		if (bp->bio_cmd == BIO_FLUSH)
+			goto sync;
+		break;
+	default:
+		error = SET_ERROR(EOPNOTSUPP);
+		goto resume;
+	}
+
+	off = bp->bio_offset;
+	volsize = zv->zv_volsize;
+
+	os = zv->zv_objset;
+	ASSERT3P(os, !=, NULL);
+
+	addr = bp->bio_data;
+	resid = bp->bio_length;
+
+	if (resid > 0 && off >= volsize) {
+		error = SET_ERROR(EIO);
+		goto resume;
+	}
+
+	is_dumpified = B_FALSE;
+	sync = !doread && !is_dumpified &&
+	    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+
+	/*
+	 * There must be no buffer changes when doing a dmu_sync() because
+	 * we can't change the data whilst calculating the checksum.
+	 */
+	lr = zfs_rangelock_enter(&zv->zv_rangelock, off, resid,
+	    doread ? RL_READER : RL_WRITER);
+
+	if (bp->bio_cmd == BIO_DELETE) {
+		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error != 0) {
+			dmu_tx_abort(tx);
+		} else {
+			zvol_log_truncate(zv, tx, off, resid, sync);
+			dmu_tx_commit(tx);
+			error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
+			    off, resid);
+			resid = 0;
+		}
+		goto unlock;
+	}
+	while (resid != 0 && off < volsize) {
+		size_t size = MIN(resid, zvol_maxphys);
+		if (doread) {
+			error = dmu_read(os, ZVOL_OBJ, off, size, addr,
+			    DMU_READ_PREFETCH);
+		} else {
+			dmu_tx_t *tx = dmu_tx_create(os);
+			dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, size);
+			error = dmu_tx_assign(tx, TXG_WAIT);
+			if (error) {
+				dmu_tx_abort(tx);
+			} else {
+				dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
+				zvol_log_write(zv, tx, off, size, sync);
+				dmu_tx_commit(tx);
+			}
+		}
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = SET_ERROR(EIO);
+			break;
+		}
+		off += size;
+		addr += size;
+		resid -= size;
+	}
+unlock:
+	zfs_rangelock_exit(lr);
+
+	bp->bio_completed = bp->bio_length - resid;
+	if (bp->bio_completed < bp->bio_length && off > volsize)
+		error = SET_ERROR(EINVAL);
+
+	switch (bp->bio_cmd) {
+	case BIO_FLUSH:
+		break;
+	case BIO_READ:
+		dataset_kstats_update_read_kstats(&zv->zv_kstat,
+		    bp->bio_completed);
+		break;
+	case BIO_WRITE:
+		dataset_kstats_update_write_kstats(&zv->zv_kstat,
+		    bp->bio_completed);
+		break;
+	case BIO_DELETE:
+		break;
+	default:
+		break;
+	}
+
+	if (sync) {
+sync:
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+	}
+resume:
+	rw_exit(&zv->zv_suspend_lock);
+out:
+	if (bp->bio_to)
+		g_io_deliver(bp, error);
+	else
+		biofinish(bp, NULL, error);
+}
+
+/*
+ * Character device mode implementation
+ */
+
+static int
+zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
+{
+	zvol_state_t *zv;
+	uint64_t volsize;
+	zfs_locked_range_t *lr;
+	int error = 0;
+	zfs_uio_t uio;
+
+	zfs_uio_init(&uio, uio_s);
+
+	zv = dev->si_drv2;
+
+	volsize = zv->zv_volsize;
+	/*
+	 * uio_loffset == volsize isn't an error as
+	 * it's required for EOF processing.
+	 */
+	if (zfs_uio_resid(&uio) > 0 &&
+	    (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
+		return (SET_ERROR(EIO));
+
+	ssize_t start_resid = zfs_uio_resid(&uio);
+	lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
+	    zfs_uio_resid(&uio), RL_READER);
+	while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) {
+		uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1);
+
+		/* don't read past the end */
+		if (bytes > volsize - zfs_uio_offset(&uio))
+			bytes = volsize - zfs_uio_offset(&uio);
+
+		error =  dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = SET_ERROR(EIO);
+			break;
+		}
+	}
+	zfs_rangelock_exit(lr);
+	int64_t nread = start_resid - zfs_uio_resid(&uio);
+	dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
+
+	return (error);
+}
+
+static int
+zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
+{
+	zvol_state_t *zv;
+	uint64_t volsize;
+	zfs_locked_range_t *lr;
+	int error = 0;
+	boolean_t sync;
+	zfs_uio_t uio;
+
+	zv = dev->si_drv2;
+
+	volsize = zv->zv_volsize;
+
+	zfs_uio_init(&uio, uio_s);
+
+	if (zfs_uio_resid(&uio) > 0 &&
+	    (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
+		return (SET_ERROR(EIO));
+
+	ssize_t start_resid = zfs_uio_resid(&uio);
+	sync = (ioflag & IO_SYNC) ||
+	    (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
+
+	rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+	zvol_ensure_zilog(zv);
+
+	lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
+	    zfs_uio_resid(&uio), RL_WRITER);
+	while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) {
+		uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1);
+		uint64_t off = zfs_uio_offset(&uio);
+		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
+
+		if (bytes > volsize - off)	/* don't write past the end */
+			bytes = volsize - off;
+
+		dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			break;
+		}
+		error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
+		if (error == 0)
+			zvol_log_write(zv, tx, off, bytes, sync);
+		dmu_tx_commit(tx);
+
+		if (error)
+			break;
+	}
+	zfs_rangelock_exit(lr);
+	int64_t nwritten = start_resid - zfs_uio_resid(&uio);
+	dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
+	if (sync)
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+	rw_exit(&zv->zv_suspend_lock);
+	return (error);
+}
+
+static int
+zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+	zvol_state_t *zv;
+	struct zvol_state_dev *zsd;
+	int err = 0;
+	boolean_t drop_suspend = B_FALSE;
+
+retry:
+	rw_enter(&zvol_state_lock, ZVOL_RW_READER);
+	/*
+	 * Obtain a copy of si_drv2 under zvol_state_lock to make sure either
+	 * the result of zvol free code setting si_drv2 to NULL is observed,
+	 * or the zv is protected from being freed because of the positive
+	 * zv_open_count.
+	 */
+	zv = dev->si_drv2;
+	if (zv == NULL) {
+		rw_exit(&zvol_state_lock);
+		err = SET_ERROR(ENXIO);
+		goto out_locked;
+	}
+
+	mutex_enter(&zv->zv_state_lock);
+	if (zv->zv_zso->zso_dying) {
+		rw_exit(&zvol_state_lock);
+		err = SET_ERROR(ENXIO);
+		goto out_zv_locked;
+	}
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV);
+
+	/*
+	 * make sure zvol is not suspended during first open
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	if (zv->zv_open_count == 0) {
+		drop_suspend = B_TRUE;
+		if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
+			mutex_exit(&zv->zv_state_lock);
+			rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+			mutex_enter(&zv->zv_state_lock);
+			/* check to see if zv_suspend_lock is needed */
+			if (zv->zv_open_count != 0) {
+				rw_exit(&zv->zv_suspend_lock);
+				drop_suspend = B_FALSE;
+			}
+		}
+	}
+	rw_exit(&zvol_state_lock);
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	if (zv->zv_open_count == 0) {
+		boolean_t drop_namespace = B_FALSE;
+
+		ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
+
+		/*
+		 * Take spa_namespace_lock to prevent lock inversion when
+		 * zvols from one pool are opened as vdevs in another.
+		 */
+		if (!mutex_owned(&spa_namespace_lock)) {
+			if (!mutex_tryenter(&spa_namespace_lock)) {
+				mutex_exit(&zv->zv_state_lock);
+				rw_exit(&zv->zv_suspend_lock);
+				kern_yield(PRI_USER);
+				goto retry;
+			} else {
+				drop_namespace = B_TRUE;
+			}
+		}
+		err = zvol_first_open(zv, !(flags & FWRITE));
+		if (drop_namespace)
+			mutex_exit(&spa_namespace_lock);
+		if (err)
+			goto out_zv_locked;
+	}
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	if ((flags & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+		err = SET_ERROR(EROFS);
+		goto out_opened;
+	}
+	if (zv->zv_flags & ZVOL_EXCL) {
+		err = SET_ERROR(EBUSY);
+		goto out_opened;
+	}
+#ifdef FEXCL
+	if (flags & FEXCL) {
+		if (zv->zv_open_count != 0) {
+			err = SET_ERROR(EBUSY);
+			goto out_opened;
+		}
+		zv->zv_flags |= ZVOL_EXCL;
+	}
+#endif
+
+	zv->zv_open_count++;
+	if (flags & (FSYNC | FDSYNC)) {
+		zsd = &zv->zv_zso->zso_dev;
+		zsd->zsd_sync_cnt++;
+		if (zsd->zsd_sync_cnt == 1 &&
+		    (zv->zv_flags & ZVOL_WRITTEN_TO) != 0)
+			zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ);
+	}
+out_opened:
+	if (zv->zv_open_count == 0) {
+		zvol_last_close(zv);
+		wakeup(zv);
+	}
+out_zv_locked:
+	mutex_exit(&zv->zv_state_lock);
+out_locked:
+	if (drop_suspend)
+		rw_exit(&zv->zv_suspend_lock);
+	return (err);
+}
+
+static int
+zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+	zvol_state_t *zv;
+	struct zvol_state_dev *zsd;
+	boolean_t drop_suspend = B_TRUE;
+
+	rw_enter(&zvol_state_lock, ZVOL_RW_READER);
+	zv = dev->si_drv2;
+	if (zv == NULL) {
+		rw_exit(&zvol_state_lock);
+		return (SET_ERROR(ENXIO));
+	}
+
+	mutex_enter(&zv->zv_state_lock);
+	if (zv->zv_flags & ZVOL_EXCL) {
+		ASSERT3U(zv->zv_open_count, ==, 1);
+		zv->zv_flags &= ~ZVOL_EXCL;
+	}
+
+	ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV);
+
+	/*
+	 * If the open count is zero, this is a spurious close.
+	 * That indicates a bug in the kernel / DDI framework.
+	 */
+	ASSERT3U(zv->zv_open_count, >, 0);
+	/*
+	 * make sure zvol is not suspended during last close
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	if (zv->zv_open_count == 1) {
+		if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
+			mutex_exit(&zv->zv_state_lock);
+			rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+			mutex_enter(&zv->zv_state_lock);
+			/* check to see if zv_suspend_lock is needed */
+			if (zv->zv_open_count != 1) {
+				rw_exit(&zv->zv_suspend_lock);
+				drop_suspend = B_FALSE;
+			}
+		}
+	} else {
+		drop_suspend = B_FALSE;
+	}
+	rw_exit(&zvol_state_lock);
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	/*
+	 * You may get multiple opens, but only one close.
+	 */
+	zv->zv_open_count--;
+	if (flags & (FSYNC | FDSYNC)) {
+		zsd = &zv->zv_zso->zso_dev;
+		zsd->zsd_sync_cnt--;
+	}
+
+	if (zv->zv_open_count == 0) {
+		ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
+		zvol_last_close(zv);
+		wakeup(zv);
+	}
+
+	mutex_exit(&zv->zv_state_lock);
+
+	if (drop_suspend)
+		rw_exit(&zv->zv_suspend_lock);
+	return (0);
+}
+
+static int
+zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data,
+    int fflag, struct thread *td)
+{
+	zvol_state_t *zv;
+	zfs_locked_range_t *lr;
+	off_t offset, length;
+	int error;
+	boolean_t sync;
+
+	zv = dev->si_drv2;
+
+	error = 0;
+	KASSERT(zv->zv_open_count > 0,
+	    ("Device with zero access count in %s", __func__));
+
+	switch (cmd) {
+	case DIOCGSECTORSIZE:
+		*(uint32_t *)data = DEV_BSIZE;
+		break;
+	case DIOCGMEDIASIZE:
+		*(off_t *)data = zv->zv_volsize;
+		break;
+	case DIOCGFLUSH:
+		rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+		if (zv->zv_zilog != NULL)
+			zil_commit(zv->zv_zilog, ZVOL_OBJ);
+		rw_exit(&zv->zv_suspend_lock);
+		break;
+	case DIOCGDELETE:
+		if (!zvol_unmap_enabled)
+			break;
+
+		offset = ((off_t *)data)[0];
+		length = ((off_t *)data)[1];
+		if ((offset % DEV_BSIZE) != 0 || (length % DEV_BSIZE) != 0 ||
+		    offset < 0 || offset >= zv->zv_volsize ||
+		    length <= 0) {
+			printf("%s: offset=%jd length=%jd\n", __func__, offset,
+			    length);
+			error = SET_ERROR(EINVAL);
+			break;
+		}
+		rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
+		zvol_ensure_zilog(zv);
+		lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, length,
+		    RL_WRITER);
+		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error != 0) {
+			sync = FALSE;
+			dmu_tx_abort(tx);
+		} else {
+			sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
+			zvol_log_truncate(zv, tx, offset, length, sync);
+			dmu_tx_commit(tx);
+			error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
+			    offset, length);
+		}
+		zfs_rangelock_exit(lr);
+		if (sync)
+			zil_commit(zv->zv_zilog, ZVOL_OBJ);
+		rw_exit(&zv->zv_suspend_lock);
+		break;
+	case DIOCGSTRIPESIZE:
+		*(off_t *)data = zv->zv_volblocksize;
+		break;
+	case DIOCGSTRIPEOFFSET:
+		*(off_t *)data = 0;
+		break;
+	case DIOCGATTR: {
+		spa_t *spa = dmu_objset_spa(zv->zv_objset);
+		struct diocgattr_arg *arg = (struct diocgattr_arg *)data;
+		uint64_t refd, avail, usedobjs, availobjs;
+
+		if (strcmp(arg->name, "GEOM::candelete") == 0)
+			arg->value.i = 1;
+		else if (strcmp(arg->name, "blocksavail") == 0) {
+			dmu_objset_space(zv->zv_objset, &refd, &avail,
+			    &usedobjs, &availobjs);
+			arg->value.off = avail / DEV_BSIZE;
+		} else if (strcmp(arg->name, "blocksused") == 0) {
+			dmu_objset_space(zv->zv_objset, &refd, &avail,
+			    &usedobjs, &availobjs);
+			arg->value.off = refd / DEV_BSIZE;
+		} else if (strcmp(arg->name, "poolblocksavail") == 0) {
+			avail = metaslab_class_get_space(spa_normal_class(spa));
+			avail -= metaslab_class_get_alloc(
+			    spa_normal_class(spa));
+			arg->value.off = avail / DEV_BSIZE;
+		} else if (strcmp(arg->name, "poolblocksused") == 0) {
+			refd = metaslab_class_get_alloc(spa_normal_class(spa));
+			arg->value.off = refd / DEV_BSIZE;
+		} else
+			error = SET_ERROR(ENOIOCTL);
+		break;
+	}
+	case FIOSEEKHOLE:
+	case FIOSEEKDATA: {
+		off_t *off = (off_t *)data;
+		uint64_t noff;
+		boolean_t hole;
+
+		hole = (cmd == FIOSEEKHOLE);
+		noff = *off;
+		lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX,
+		    RL_READER);
+		error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff);
+		zfs_rangelock_exit(lr);
+		*off = noff;
+		break;
+	}
+	default:
+		error = SET_ERROR(ENOIOCTL);
+	}
+
+	return (error);
+}
+
+/*
+ * Misc. helpers
+ */
+
+static void
+zvol_ensure_zilog(zvol_state_t *zv)
+{
+	ASSERT(ZVOL_RW_READ_HELD(&zv->zv_suspend_lock));
+
+	/*
+	 * Open a ZIL if this is the first time we have written to this
+	 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
+	 * than zv_state_lock so that we don't need to acquire an
+	 * additional lock in this path.
+	 */
+	if (zv->zv_zilog == NULL) {
+		if (!rw_tryupgrade(&zv->zv_suspend_lock)) {
+			rw_exit(&zv->zv_suspend_lock);
+			rw_enter(&zv->zv_suspend_lock, RW_WRITER);
+		}
+		if (zv->zv_zilog == NULL) {
+			zv->zv_zilog = zil_open(zv->zv_objset,
+			    zvol_get_data);
+			zv->zv_flags |= ZVOL_WRITTEN_TO;
+			/* replay / destroy done in zvol_create_minor_impl() */
+			VERIFY0(zv->zv_zilog->zl_header->zh_flags &
+			    ZIL_REPLAY_NEEDED);
+		}
+		rw_downgrade(&zv->zv_suspend_lock);
+	}
+}
+
+static boolean_t
+zvol_is_zvol_impl(const char *device)
+{
+	return (device && strncmp(device, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
+}
+
+static void
+zvol_rename_minor(zvol_state_t *zv, const char *newname)
+{
+	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	/* move to new hashtable entry  */
+	zv->zv_hash = zvol_name_hash(zv->zv_name);
+	hlist_del(&zv->zv_hlink);
+	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
+
+	if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
+		struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+		struct g_provider *pp = zsg->zsg_provider;
+		struct g_geom *gp;
+
+		g_topology_lock();
+		gp = pp->geom;
+		ASSERT3P(gp, !=, NULL);
+
+		zsg->zsg_provider = NULL;
+		g_wither_provider(pp, ENXIO);
+
+		pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
+		pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND;
+		pp->sectorsize = DEV_BSIZE;
+		pp->mediasize = zv->zv_volsize;
+		pp->private = zv;
+		zsg->zsg_provider = pp;
+		g_error_provider(pp, 0);
+		g_topology_unlock();
+	} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
+		struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
+		struct cdev *dev;
+		struct make_dev_args args;
+
+		dev = zsd->zsd_cdev;
+		if (dev != NULL) {
+			destroy_dev(dev);
+			dev = zsd->zsd_cdev = NULL;
+			if (zv->zv_open_count > 0) {
+				zv->zv_flags &= ~ZVOL_EXCL;
+				zv->zv_open_count = 0;
+				/* XXX  need suspend lock but lock order */
+				zvol_last_close(zv);
+			}
+		}
+
+		make_dev_args_init(&args);
+		args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
+		args.mda_devsw = &zvol_cdevsw;
+		args.mda_cr = NULL;
+		args.mda_uid = UID_ROOT;
+		args.mda_gid = GID_OPERATOR;
+		args.mda_mode = 0640;
+		args.mda_si_drv2 = zv;
+		if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, newname)
+		    == 0) {
+#if __FreeBSD_version > 1300130
+			dev->si_iosize_max = maxphys;
+#else
+			dev->si_iosize_max = MAXPHYS;
+#endif
+			zsd->zsd_cdev = dev;
+		}
+	}
+	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+}
+
+/*
+ * Remove minor node for the specified volume.
+ */
+static void
+zvol_free(zvol_state_t *zv)
+{
+	ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
+	ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
+	ASSERT0(zv->zv_open_count);
+
+	ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
+
+	rw_destroy(&zv->zv_suspend_lock);
+	zfs_rangelock_fini(&zv->zv_rangelock);
+
+	if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
+		struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+		struct g_provider *pp __maybe_unused = zsg->zsg_provider;
+
+		ASSERT3P(pp->private, ==, NULL);
+
+		g_topology_lock();
+		zvol_geom_destroy(zv);
+		g_topology_unlock();
+		mtx_destroy(&zsg->zsg_queue_mtx);
+	} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
+		struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
+		struct cdev *dev = zsd->zsd_cdev;
+
+		if (dev != NULL) {
+			ASSERT3P(dev->si_drv2, ==, NULL);
+			destroy_dev(dev);
+		}
+	}
+
+	mutex_destroy(&zv->zv_state_lock);
+	dataset_kstats_destroy(&zv->zv_kstat);
+	kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
+	kmem_free(zv, sizeof (zvol_state_t));
+	zvol_minors--;
+}
+
+/*
+ * Create a minor node (plus a whole lot more) for the specified volume.
+ */
+static int
+zvol_create_minor_impl(const char *name)
+{
+	zvol_state_t *zv;
+	objset_t *os;
+	dmu_object_info_t *doi;
+	uint64_t volsize;
+	uint64_t volmode, hash;
+	int error;
+
+	ZFS_LOG(1, "Creating ZVOL %s...", name);
+	hash = zvol_name_hash(name);
+	if ((zv = zvol_find_by_name_hash(name, hash, RW_NONE)) != NULL) {
+		ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+		mutex_exit(&zv->zv_state_lock);
+		return (SET_ERROR(EEXIST));
+	}
+
+	DROP_GIANT();
+
+	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
+
+	/* lie and say we're read-only */
+	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
+	if (error)
+		goto out_doi;
+
+	error = dmu_object_info(os, ZVOL_OBJ, doi);
+	if (error)
+		goto out_dmu_objset_disown;
+
+	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
+	if (error)
+		goto out_dmu_objset_disown;
+
+	error = dsl_prop_get_integer(name,
+	    zfs_prop_to_name(ZFS_PROP_VOLMODE), &volmode, NULL);
+	if (error || volmode == ZFS_VOLMODE_DEFAULT)
+		volmode = zvol_volmode;
+	error = 0;
+
+	/*
+	 * zvol_alloc equivalent ...
+	 */
+	zv = kmem_zalloc(sizeof (*zv), KM_SLEEP);
+	zv->zv_hash = hash;
+	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
+	zv->zv_zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP);
+	zv->zv_volmode = volmode;
+	if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
+		struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+		struct g_provider *pp;
+		struct g_geom *gp;
+
+		zsg->zsg_state = ZVOL_GEOM_UNINIT;
+		mtx_init(&zsg->zsg_queue_mtx, "zvol", NULL, MTX_DEF);
+
+		g_topology_lock();
+		gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
+		gp->start = zvol_geom_bio_start;
+		gp->access = zvol_geom_access;
+		pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name);
+		pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND;
+		pp->sectorsize = DEV_BSIZE;
+		pp->mediasize = 0;
+		pp->private = zv;
+
+		zsg->zsg_provider = pp;
+		bioq_init(&zsg->zsg_queue);
+	} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
+		struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
+		struct cdev *dev;
+		struct make_dev_args args;
+
+		make_dev_args_init(&args);
+		args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
+		args.mda_devsw = &zvol_cdevsw;
+		args.mda_cr = NULL;
+		args.mda_uid = UID_ROOT;
+		args.mda_gid = GID_OPERATOR;
+		args.mda_mode = 0640;
+		args.mda_si_drv2 = zv;
+		if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name)
+		    == 0) {
+#if __FreeBSD_version > 1300130
+			dev->si_iosize_max = maxphys;
+#else
+			dev->si_iosize_max = MAXPHYS;
+#endif
+			zsd->zsd_cdev = dev;
+		}
+	}
+	(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
+	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
+	zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
+
+	if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
+		zv->zv_flags |= ZVOL_RDONLY;
+
+	zv->zv_volblocksize = doi->doi_data_block_size;
+	zv->zv_volsize = volsize;
+	zv->zv_objset = os;
+
+	ASSERT3P(zv->zv_zilog, ==, NULL);
+	zv->zv_zilog = zil_open(os, zvol_get_data);
+	if (spa_writeable(dmu_objset_spa(os))) {
+		if (zil_replay_disable)
+			zil_destroy(zv->zv_zilog, B_FALSE);
+		else
+			zil_replay(os, zv, zvol_replay_vector);
+	}
+	zil_close(zv->zv_zilog);
+	zv->zv_zilog = NULL;
+	ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
+	dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
+
+	/* TODO: prefetch for geom tasting */
+
+	zv->zv_objset = NULL;
+out_dmu_objset_disown:
+	dmu_objset_disown(os, B_TRUE, FTAG);
+
+	if (error == 0 && volmode == ZFS_VOLMODE_GEOM) {
+		zvol_geom_run(zv);
+		g_topology_unlock();
+	}
+out_doi:
+	kmem_free(doi, sizeof (dmu_object_info_t));
+	if (error == 0) {
+		rw_enter(&zvol_state_lock, RW_WRITER);
+		zvol_insert(zv);
+		zvol_minors++;
+		rw_exit(&zvol_state_lock);
+		ZFS_LOG(1, "ZVOL %s created.", name);
+	}
+	PICKUP_GIANT();
+	return (error);
+}
+
+static void
+zvol_clear_private(zvol_state_t *zv)
+{
+	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+	if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
+		struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+		struct g_provider *pp = zsg->zsg_provider;
+
+		if (pp->private == NULL) /* already cleared */
+			return;
+
+		mtx_lock(&zsg->zsg_queue_mtx);
+		zsg->zsg_state = ZVOL_GEOM_STOPPED;
+		pp->private = NULL;
+		wakeup_one(&zsg->zsg_queue);
+		while (zsg->zsg_state != ZVOL_GEOM_RUNNING)
+			msleep(&zsg->zsg_state, &zsg->zsg_queue_mtx,
+			    0, "zvol:w", 0);
+		mtx_unlock(&zsg->zsg_queue_mtx);
+		ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
+	} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
+		struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
+		struct cdev *dev = zsd->zsd_cdev;
+
+		if (dev != NULL)
+			dev->si_drv2 = NULL;
+	}
+}
+
+static int
+zvol_update_volsize(zvol_state_t *zv, uint64_t volsize)
+{
+	zv->zv_volsize = volsize;
+	if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
+		struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
+		struct g_provider *pp = zsg->zsg_provider;
+
+		g_topology_lock();
+
+		if (pp->private == NULL) {
+			g_topology_unlock();
+			return (SET_ERROR(ENXIO));
+		}
+
+		/*
+		 * Do not invoke resize event when initial size was zero.
+		 * ZVOL initializes the size on first open, this is not
+		 * real resizing.
+		 */
+		if (pp->mediasize == 0)
+			pp->mediasize = zv->zv_volsize;
+		else
+			g_resize_provider(pp, zv->zv_volsize);
+
+		g_topology_unlock();
+	}
+	return (0);
+}
+
+static void
+zvol_set_disk_ro_impl(zvol_state_t *zv, int flags)
+{
+	// XXX? set_disk_ro(zv->zv_zso->zvo_disk, flags);
+}
+
+static void
+zvol_set_capacity_impl(zvol_state_t *zv, uint64_t capacity)
+{
+	// XXX? set_capacity(zv->zv_zso->zvo_disk, capacity);
+}
+
+const static zvol_platform_ops_t zvol_freebsd_ops = {
+	.zv_free = zvol_free,
+	.zv_rename_minor = zvol_rename_minor,
+	.zv_create_minor = zvol_create_minor_impl,
+	.zv_update_volsize = zvol_update_volsize,
+	.zv_clear_private = zvol_clear_private,
+	.zv_is_zvol = zvol_is_zvol_impl,
+	.zv_set_disk_ro = zvol_set_disk_ro_impl,
+	.zv_set_capacity = zvol_set_capacity_impl,
+};
+
+/*
+ * Public interfaces
+ */
+
+int
+zvol_busy(void)
+{
+	return (zvol_minors != 0);
+}
+
+int
+zvol_init(void)
+{
+	zvol_init_impl();
+	zvol_register_ops(&zvol_freebsd_ops);
+	return (0);
+}
+
+void
+zvol_fini(void)
+{
+	zvol_fini_impl();
+}

diff --git a/zfs/module/os/linux/spl/Makefile.in b/zfs/module/os/linux/spl/Makefile.in
new file mode 100644
index 0000000..b2325f9
--- /dev/null
+++ b/zfs/module/os/linux/spl/Makefile.in

@@ -0,0 +1,17 @@
+$(MODULE)-objs += ../os/linux/spl/spl-atomic.o
+$(MODULE)-objs += ../os/linux/spl/spl-condvar.o
+$(MODULE)-objs += ../os/linux/spl/spl-cred.o
+$(MODULE)-objs += ../os/linux/spl/spl-err.o
+$(MODULE)-objs += ../os/linux/spl/spl-generic.o
+$(MODULE)-objs += ../os/linux/spl/spl-kmem.o
+$(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o
+$(MODULE)-objs += ../os/linux/spl/spl-kstat.o
+$(MODULE)-objs += ../os/linux/spl/spl-proc.o
+$(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o
+$(MODULE)-objs += ../os/linux/spl/spl-taskq.o
+$(MODULE)-objs += ../os/linux/spl/spl-thread.o
+$(MODULE)-objs += ../os/linux/spl/spl-trace.o
+$(MODULE)-objs += ../os/linux/spl/spl-tsd.o
+$(MODULE)-objs += ../os/linux/spl/spl-vmem.o
+$(MODULE)-objs += ../os/linux/spl/spl-xdr.o
+$(MODULE)-objs += ../os/linux/spl/spl-zlib.o

diff --git a/zfs/module/os/linux/spl/README.md b/zfs/module/os/linux/spl/README.md
new file mode 100644
index 0000000..906530b
--- /dev/null
+++ b/zfs/module/os/linux/spl/README.md

@@ -0,0 +1,16 @@
+The Solaris Porting Layer, SPL, is a Linux kernel module which provides a
+compatibility layer used by the [OpenZFS](https://github.com/openzfs/zfs) project.
+
+# Installation
+
+The latest version of the SPL is maintained as part of this repository.
+Only when building ZFS version 0.7.x or earlier must an external SPL release
+be used.  These releases can be found at:
+
+  * Version 0.7.x: https://github.com/zfsonlinux/spl/tree/spl-0.7-release  
+  * Version 0.6.5.x: https://github.com/zfsonlinux/spl/tree/spl-0.6.5-release  
+
+# Release
+
+The SPL is released under a GPLv2 license.  
+For more details see the NOTICE and THIRDPARTYLICENSE files; `UCRL-CODE-235197`

diff --git a/zfs/module/spl/THIRDPARTYLICENSE.gplv2 b/zfs/module/os/linux/spl/THIRDPARTYLICENSE.gplv2
similarity index 100%
rename from zfs/module/spl/THIRDPARTYLICENSE.gplv2
rename to zfs/module/os/linux/spl/THIRDPARTYLICENSE.gplv2


diff --git a/zfs/module/spl/THIRDPARTYLICENSE.gplv2.descrip b/zfs/module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip
similarity index 100%
rename from zfs/module/spl/THIRDPARTYLICENSE.gplv2.descrip
rename to zfs/module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip


diff --git a/zfs/module/os/linux/spl/spl-atomic.c b/zfs/module/os/linux/spl/spl-atomic.c
new file mode 100644
index 0000000..accf656
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-atomic.c

@@ -0,0 +1,35 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Atomic Implementation.
+ */
+
+#include <sys/atomic.h>
+
+#ifdef ATOMIC_SPINLOCK
+/* Global atomic lock declarations */
+DEFINE_SPINLOCK(atomic32_lock);
+DEFINE_SPINLOCK(atomic64_lock);
+
+EXPORT_SYMBOL(atomic32_lock);
+EXPORT_SYMBOL(atomic64_lock);
+#endif /* ATOMIC_SPINLOCK */

diff --git a/zfs/module/os/linux/spl/spl-condvar.c b/zfs/module/os/linux/spl/spl-condvar.c
new file mode 100644
index 0000000..d0461a9
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-condvar.c

@@ -0,0 +1,509 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Credential Implementation.
+ */
+
+#include <sys/condvar.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <linux/hrtimer.h>
+#include <linux/compiler_compat.h>
+#include <linux/mod_compat.h>
+
+#include <linux/sched.h>
+
+#ifdef HAVE_SCHED_SIGNAL_HEADER
+#include <linux/sched/signal.h>
+#endif
+
+#define	MAX_HRTIMEOUT_SLACK_US	1000
+unsigned int spl_schedule_hrtimeout_slack_us = 0;
+
+static int
+param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
+{
+	unsigned long val;
+	int error;
+
+	error = kstrtoul(buf, 0, &val);
+	if (error)
+		return (error);
+
+	if (val > MAX_HRTIMEOUT_SLACK_US)
+		return (-EINVAL);
+
+	error = param_set_uint(buf, kp);
+	if (error < 0)
+		return (error);
+
+	return (0);
+}
+
+module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
+	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
+MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
+	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
+
+void
+__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
+{
+	ASSERT(cvp);
+	ASSERT(name == NULL);
+	ASSERT(type == CV_DEFAULT);
+	ASSERT(arg == NULL);
+
+	cvp->cv_magic = CV_MAGIC;
+	init_waitqueue_head(&cvp->cv_event);
+	init_waitqueue_head(&cvp->cv_destroy);
+	atomic_set(&cvp->cv_waiters, 0);
+	atomic_set(&cvp->cv_refs, 1);
+	cvp->cv_mutex = NULL;
+}
+EXPORT_SYMBOL(__cv_init);
+
+static int
+cv_destroy_wakeup(kcondvar_t *cvp)
+{
+	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
+		ASSERT(cvp->cv_mutex == NULL);
+		ASSERT(!waitqueue_active(&cvp->cv_event));
+		return (1);
+	}
+
+	return (0);
+}
+
+void
+__cv_destroy(kcondvar_t *cvp)
+{
+	ASSERT(cvp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+
+	cvp->cv_magic = CV_DESTROY;
+	atomic_dec(&cvp->cv_refs);
+
+	/* Block until all waiters are woken and references dropped. */
+	while (cv_destroy_wakeup(cvp) == 0)
+		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
+
+	ASSERT3P(cvp->cv_mutex, ==, NULL);
+	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
+	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
+	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
+}
+EXPORT_SYMBOL(__cv_destroy);
+
+static void
+cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
+{
+	DEFINE_WAIT(wait);
+	kmutex_t *m;
+
+	ASSERT(cvp);
+	ASSERT(mp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	ASSERT(mutex_owned(mp));
+	atomic_inc(&cvp->cv_refs);
+
+	m = READ_ONCE(cvp->cv_mutex);
+	if (!m)
+		m = xchg(&cvp->cv_mutex, mp);
+	/* Ensure the same mutex is used by all callers */
+	ASSERT(m == NULL || m == mp);
+
+	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
+	atomic_inc(&cvp->cv_waiters);
+
+	/*
+	 * Mutex should be dropped after prepare_to_wait() this
+	 * ensures we're linked in to the waiters list and avoids the
+	 * race where 'cvp->cv_waiters > 0' but the list is empty.
+	 */
+	mutex_exit(mp);
+	if (io)
+		io_schedule();
+	else
+		schedule();
+
+	/* No more waiters a different mutex could be used */
+	if (atomic_dec_and_test(&cvp->cv_waiters)) {
+		/*
+		 * This is set without any lock, so it's racy. But this is
+		 * just for debug anyway, so make it best-effort
+		 */
+		cvp->cv_mutex = NULL;
+		wake_up(&cvp->cv_destroy);
+	}
+
+	finish_wait(&cvp->cv_event, &wait);
+	atomic_dec(&cvp->cv_refs);
+
+	/*
+	 * Hold mutex after we release the cvp, otherwise we could dead lock
+	 * with a thread holding the mutex and call cv_destroy.
+	 */
+	mutex_enter(mp);
+}
+
+void
+__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
+{
+	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
+}
+EXPORT_SYMBOL(__cv_wait);
+
+void
+__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
+{
+	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
+}
+EXPORT_SYMBOL(__cv_wait_io);
+
+int
+__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
+{
+	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
+
+	return (signal_pending(current) ? 0 : 1);
+}
+EXPORT_SYMBOL(__cv_wait_io_sig);
+
+int
+__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
+{
+	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
+
+	return (signal_pending(current) ? 0 : 1);
+}
+EXPORT_SYMBOL(__cv_wait_sig);
+
+void
+__cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
+{
+	sigset_t blocked, saved;
+
+	sigfillset(&blocked);
+	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
+	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
+	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
+}
+EXPORT_SYMBOL(__cv_wait_idle);
+
+#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
+#define	spl_io_schedule_timeout(t)	io_schedule_timeout(t)
+#else
+
+struct spl_task_timer {
+	struct timer_list timer;
+	struct task_struct *task;
+};
+
+static void
+__cv_wakeup(spl_timer_list_t t)
+{
+	struct timer_list *tmr = (struct timer_list *)t;
+	struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
+
+	wake_up_process(task_timer->task);
+}
+
+static long
+spl_io_schedule_timeout(long time_left)
+{
+	long expire_time = jiffies + time_left;
+	struct spl_task_timer task_timer;
+	struct timer_list *timer = &task_timer.timer;
+
+	task_timer.task = current;
+
+	timer_setup(timer, __cv_wakeup, 0);
+
+	timer->expires = expire_time;
+	add_timer(timer);
+
+	io_schedule();
+
+	del_timer_sync(timer);
+
+	time_left = expire_time - jiffies;
+
+	return (time_left < 0 ? 0 : time_left);
+}
+#endif
+
+/*
+ * 'expire_time' argument is an absolute wall clock time in jiffies.
+ * Return value is time left (expire_time - now) or -1 if timeout occurred.
+ */
+static clock_t
+__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
+    int state, int io)
+{
+	DEFINE_WAIT(wait);
+	kmutex_t *m;
+	clock_t time_left;
+
+	ASSERT(cvp);
+	ASSERT(mp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	ASSERT(mutex_owned(mp));
+
+	/* XXX - Does not handle jiffie wrap properly */
+	time_left = expire_time - jiffies;
+	if (time_left <= 0)
+		return (-1);
+
+	atomic_inc(&cvp->cv_refs);
+	m = READ_ONCE(cvp->cv_mutex);
+	if (!m)
+		m = xchg(&cvp->cv_mutex, mp);
+	/* Ensure the same mutex is used by all callers */
+	ASSERT(m == NULL || m == mp);
+
+	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
+	atomic_inc(&cvp->cv_waiters);
+
+	/*
+	 * Mutex should be dropped after prepare_to_wait() this
+	 * ensures we're linked in to the waiters list and avoids the
+	 * race where 'cvp->cv_waiters > 0' but the list is empty.
+	 */
+	mutex_exit(mp);
+	if (io)
+		time_left = spl_io_schedule_timeout(time_left);
+	else
+		time_left = schedule_timeout(time_left);
+
+	/* No more waiters a different mutex could be used */
+	if (atomic_dec_and_test(&cvp->cv_waiters)) {
+		/*
+		 * This is set without any lock, so it's racy. But this is
+		 * just for debug anyway, so make it best-effort
+		 */
+		cvp->cv_mutex = NULL;
+		wake_up(&cvp->cv_destroy);
+	}
+
+	finish_wait(&cvp->cv_event, &wait);
+	atomic_dec(&cvp->cv_refs);
+
+	/*
+	 * Hold mutex after we release the cvp, otherwise we could dead lock
+	 * with a thread holding the mutex and call cv_destroy.
+	 */
+	mutex_enter(mp);
+	return (time_left > 0 ? 1 : -1);
+}
+
+int
+__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
+{
+	return (__cv_timedwait_common(cvp, mp, exp_time,
+	    TASK_UNINTERRUPTIBLE, 0));
+}
+EXPORT_SYMBOL(__cv_timedwait);
+
+int
+__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
+{
+	return (__cv_timedwait_common(cvp, mp, exp_time,
+	    TASK_UNINTERRUPTIBLE, 1));
+}
+EXPORT_SYMBOL(__cv_timedwait_io);
+
+int
+__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
+{
+	int rc;
+
+	rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
+	return (signal_pending(current) ? 0 : rc);
+}
+EXPORT_SYMBOL(__cv_timedwait_sig);
+
+int
+__cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
+{
+	sigset_t blocked, saved;
+	int rc;
+
+	sigfillset(&blocked);
+	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
+	rc = __cv_timedwait_common(cvp, mp, exp_time,
+	    TASK_INTERRUPTIBLE, 0);
+	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
+
+	return (rc);
+}
+EXPORT_SYMBOL(__cv_timedwait_idle);
+/*
+ * 'expire_time' argument is an absolute clock time in nanoseconds.
+ * Return value is time left (expire_time - now) or -1 if timeout occurred.
+ */
+static clock_t
+__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
+    hrtime_t res, int state)
+{
+	DEFINE_WAIT(wait);
+	kmutex_t *m;
+	hrtime_t time_left;
+	ktime_t ktime_left;
+	u64 slack = 0;
+	int rc;
+
+	ASSERT(cvp);
+	ASSERT(mp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	ASSERT(mutex_owned(mp));
+
+	time_left = expire_time - gethrtime();
+	if (time_left <= 0)
+		return (-1);
+
+	atomic_inc(&cvp->cv_refs);
+	m = READ_ONCE(cvp->cv_mutex);
+	if (!m)
+		m = xchg(&cvp->cv_mutex, mp);
+	/* Ensure the same mutex is used by all callers */
+	ASSERT(m == NULL || m == mp);
+
+	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
+	atomic_inc(&cvp->cv_waiters);
+
+	/*
+	 * Mutex should be dropped after prepare_to_wait() this
+	 * ensures we're linked in to the waiters list and avoids the
+	 * race where 'cvp->cv_waiters > 0' but the list is empty.
+	 */
+	mutex_exit(mp);
+
+	ktime_left = ktime_set(0, time_left);
+	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
+	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
+	rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
+
+	/* No more waiters a different mutex could be used */
+	if (atomic_dec_and_test(&cvp->cv_waiters)) {
+		/*
+		 * This is set without any lock, so it's racy. But this is
+		 * just for debug anyway, so make it best-effort
+		 */
+		cvp->cv_mutex = NULL;
+		wake_up(&cvp->cv_destroy);
+	}
+
+	finish_wait(&cvp->cv_event, &wait);
+	atomic_dec(&cvp->cv_refs);
+
+	mutex_enter(mp);
+	return (rc == -EINTR ? 1 : -1);
+}
+
+/*
+ * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
+ */
+static int
+cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+    hrtime_t res, int flag, int state)
+{
+	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
+		tim += gethrtime();
+
+	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
+}
+
+int
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+	    TASK_UNINTERRUPTIBLE));
+}
+EXPORT_SYMBOL(cv_timedwait_hires);
+
+int
+cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+    hrtime_t res, int flag)
+{
+	int rc;
+
+	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+	    TASK_INTERRUPTIBLE);
+	return (signal_pending(current) ? 0 : rc);
+}
+EXPORT_SYMBOL(cv_timedwait_sig_hires);
+
+int
+cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+    hrtime_t res, int flag)
+{
+	sigset_t blocked, saved;
+	int rc;
+
+	sigfillset(&blocked);
+	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
+	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+	    TASK_INTERRUPTIBLE);
+	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
+
+	return (rc);
+}
+EXPORT_SYMBOL(cv_timedwait_idle_hires);
+
+void
+__cv_signal(kcondvar_t *cvp)
+{
+	ASSERT(cvp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	atomic_inc(&cvp->cv_refs);
+
+	/*
+	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
+	 * waiter will be set runnable with each call to wake_up().
+	 * Additionally wake_up() holds a spin_lock associated with
+	 * the wait queue to ensure we don't race waking up processes.
+	 */
+	if (atomic_read(&cvp->cv_waiters) > 0)
+		wake_up(&cvp->cv_event);
+
+	atomic_dec(&cvp->cv_refs);
+}
+EXPORT_SYMBOL(__cv_signal);
+
+void
+__cv_broadcast(kcondvar_t *cvp)
+{
+	ASSERT(cvp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	atomic_inc(&cvp->cv_refs);
+
+	/*
+	 * Wake_up_all() will wake up all waiters even those which
+	 * have the WQ_FLAG_EXCLUSIVE flag set.
+	 */
+	if (atomic_read(&cvp->cv_waiters) > 0)
+		wake_up_all(&cvp->cv_event);
+
+	atomic_dec(&cvp->cv_refs);
+}
+EXPORT_SYMBOL(__cv_broadcast);

diff --git a/zfs/module/os/linux/spl/spl-cred.c b/zfs/module/os/linux/spl/spl-cred.c
new file mode 100644
index 0000000..d407fc6
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-cred.c

@@ -0,0 +1,167 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Credential Implementation.
+ */
+
+#include <sys/cred.h>
+
+static int
+cr_groups_search(const struct group_info *group_info, kgid_t grp)
+{
+	unsigned int left, right, mid;
+	int cmp;
+
+	if (!group_info)
+		return (0);
+
+	left = 0;
+	right = group_info->ngroups;
+	while (left < right) {
+		mid = (left + right) / 2;
+		cmp = KGID_TO_SGID(grp) -
+		    KGID_TO_SGID(GROUP_AT(group_info, mid));
+
+		if (cmp > 0)
+			left = mid + 1;
+		else if (cmp < 0)
+			right = mid;
+		else
+			return (1);
+	}
+	return (0);
+}
+
+/* Hold a reference on the credential */
+void
+crhold(cred_t *cr)
+{
+	(void) get_cred((const cred_t *)cr);
+}
+
+/* Free a reference on the credential */
+void
+crfree(cred_t *cr)
+{
+	put_cred((const cred_t *)cr);
+}
+
+/* Return the number of supplemental groups */
+int
+crgetngroups(const cred_t *cr)
+{
+	struct group_info *gi;
+	int rc;
+
+	gi = cr->group_info;
+	rc = gi->ngroups;
+#ifndef HAVE_GROUP_INFO_GID
+	/*
+	 * For Linux <= 4.8,
+	 * crgetgroups will only returns gi->blocks[0], which contains only
+	 * the first NGROUPS_PER_BLOCK groups.
+	 */
+	if (rc > NGROUPS_PER_BLOCK) {
+		WARN_ON_ONCE(1);
+		rc = NGROUPS_PER_BLOCK;
+	}
+#endif
+	return (rc);
+}
+
+/*
+ * Return an array of supplemental gids.  The returned address is safe
+ * to use as long as the caller has taken a reference with crhold().
+ *
+ * Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d
+ * array via ->gid.
+ */
+gid_t *
+crgetgroups(const cred_t *cr)
+{
+	struct group_info *gi;
+	gid_t *gids = NULL;
+
+	gi = cr->group_info;
+#ifdef HAVE_GROUP_INFO_GID
+	gids = KGIDP_TO_SGIDP(gi->gid);
+#else
+	if (gi->nblocks > 0)
+		gids = KGIDP_TO_SGIDP(gi->blocks[0]);
+#endif
+	return (gids);
+}
+
+/* Check if the passed gid is available in supplied credential. */
+int
+groupmember(gid_t gid, const cred_t *cr)
+{
+	struct group_info *gi;
+	int rc;
+
+	gi = cr->group_info;
+	rc = cr_groups_search(gi, SGID_TO_KGID(gid));
+
+	return (rc);
+}
+
+/* Return the effective user id */
+uid_t
+crgetuid(const cred_t *cr)
+{
+	return (KUID_TO_SUID(cr->fsuid));
+}
+
+/* Return the real user id */
+uid_t
+crgetruid(const cred_t *cr)
+{
+	return (KUID_TO_SUID(cr->uid));
+}
+
+/* Return the effective group id */
+gid_t
+crgetgid(const cred_t *cr)
+{
+	return (KGID_TO_SGID(cr->fsgid));
+}
+
+/* Return the initial user ns or nop_mnt_idmap */
+zidmap_t *
+zfs_get_init_idmap(void)
+{
+#ifdef HAVE_IOPS_CREATE_IDMAP
+	return ((zidmap_t *)&nop_mnt_idmap);
+#else
+	return ((zidmap_t *)&init_user_ns);
+#endif
+}
+
+EXPORT_SYMBOL(zfs_get_init_idmap);
+EXPORT_SYMBOL(crhold);
+EXPORT_SYMBOL(crfree);
+EXPORT_SYMBOL(crgetuid);
+EXPORT_SYMBOL(crgetruid);
+EXPORT_SYMBOL(crgetgid);
+EXPORT_SYMBOL(crgetngroups);
+EXPORT_SYMBOL(crgetgroups);
+EXPORT_SYMBOL(groupmember);

diff --git a/zfs/module/os/linux/spl/spl-err.c b/zfs/module/os/linux/spl/spl-err.c
new file mode 100644
index 0000000..10b768d
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-err.c

@@ -0,0 +1,123 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Error Implementation.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+
+/*
+ * It is often useful to actually have the panic crash the node so you
+ * can then get notified of the event, get the crashdump for later
+ * analysis and other such goodies.
+ * But we would still default to the current default of not to do that.
+ */
+/* BEGIN CSTYLED */
+unsigned int spl_panic_halt;
+module_param(spl_panic_halt, uint, 0644);
+MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
+/* END CSTYLED */
+
+void
+spl_dumpstack(void)
+{
+	printk("Showing stack for process %d\n", current->pid);
+	dump_stack();
+}
+EXPORT_SYMBOL(spl_dumpstack);
+
+int
+spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
+{
+	const char *newfile;
+	char msg[MAXMSGLEN];
+	va_list ap;
+
+	newfile = strrchr(file, '/');
+	if (newfile != NULL)
+		newfile = newfile + 1;
+	else
+		newfile = file;
+
+	va_start(ap, fmt);
+	(void) vsnprintf(msg, sizeof (msg), fmt, ap);
+	va_end(ap);
+
+	printk(KERN_EMERG "%s", msg);
+	printk(KERN_EMERG "PANIC at %s:%d:%s()\n", newfile, line, func);
+	if (spl_panic_halt)
+		panic("%s", msg);
+
+	spl_dumpstack();
+
+	/* Halt the thread to facilitate further debugging */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	while (1)
+		schedule();
+
+	/* Unreachable */
+	return (1);
+}
+EXPORT_SYMBOL(spl_panic);
+
+void
+vcmn_err(int ce, const char *fmt, va_list ap)
+{
+	char msg[MAXMSGLEN];
+
+	vsnprintf(msg, MAXMSGLEN, fmt, ap);
+
+	switch (ce) {
+	case CE_IGNORE:
+		break;
+	case CE_CONT:
+		printk("%s", msg);
+		break;
+	case CE_NOTE:
+		printk(KERN_NOTICE "NOTICE: %s\n", msg);
+		break;
+	case CE_WARN:
+		printk(KERN_WARNING "WARNING: %s\n", msg);
+		break;
+	case CE_PANIC:
+		printk(KERN_EMERG "PANIC: %s\n", msg);
+		spl_dumpstack();
+
+		/* Halt the thread to facilitate further debugging */
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		while (1)
+			schedule();
+	}
+} /* vcmn_err() */
+EXPORT_SYMBOL(vcmn_err);
+
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vcmn_err(ce, fmt, ap);
+	va_end(ap);
+} /* cmn_err() */
+EXPORT_SYMBOL(cmn_err);

diff --git a/zfs/module/os/linux/spl/spl-generic.c b/zfs/module/os/linux/spl/spl-generic.c
new file mode 100644
index 0000000..2cb5251
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-generic.c

@@ -0,0 +1,881 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Generic Implementation.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/systeminfo.h>
+#include <sys/vmsystm.h>
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/vmem.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/taskq.h>
+#include <sys/tsd.h>
+#include <sys/zmod.h>
+#include <sys/debug.h>
+#include <sys/proc.h>
+#include <sys/kstat.h>
+#include <sys/file.h>
+#include <sys/sunddi.h>
+#include <linux/ctype.h>
+#include <sys/disp.h>
+#include <sys/random.h>
+#include <sys/strings.h>
+#include <linux/kmod.h>
+#include "zfs_gitrev.h"
+#include <linux/mod_compat.h>
+#include <sys/cred.h>
+#include <sys/vnode.h>
+#include <sys/misc.h>
+
+char spl_gitrev[64] = ZFS_META_GITREV;
+
+/* BEGIN CSTYLED */
+unsigned long spl_hostid = 0;
+EXPORT_SYMBOL(spl_hostid);
+/* BEGIN CSTYLED */
+module_param(spl_hostid, ulong, 0644);
+MODULE_PARM_DESC(spl_hostid, "The system hostid.");
+/* END CSTYLED */
+
+proc_t p0;
+EXPORT_SYMBOL(p0);
+
+/*
+ * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
+ *
+ * "Further scramblings of Marsaglia's xorshift generators"
+ * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ *
+ * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
+ * is to provide bytes containing random numbers. It is mapped to /dev/urandom
+ * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
+ * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
+ * we can implement it using a fast PRNG that we seed using Linux' actual
+ * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
+ * with an independent seed so that all calls to random_get_pseudo_bytes() are
+ * free of atomic instructions.
+ *
+ * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
+ * to generate words larger than 128 bits will paradoxically be limited to
+ * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
+ * 128-bit words and selecting the first will implicitly select the second. If
+ * a caller finds this behavior undesirable, random_get_bytes() should be used
+ * instead.
+ *
+ * XXX: Linux interrupt handlers that trigger within the critical section
+ * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
+ * see the same numbers. Nothing in the code currently calls this in an
+ * interrupt handler, so this is considered to be okay. If that becomes a
+ * problem, we could create a set of per-cpu variables for interrupt handlers
+ * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
+ * true.
+ */
+void __percpu *spl_pseudo_entropy;
+
+/*
+ * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
+ * file:
+ *
+ * http://xorshift.di.unimi.it/xorshift128plus.c
+ */
+
+static inline uint64_t
+spl_rand_next(uint64_t *s)
+{
+	uint64_t s1 = s[0];
+	const uint64_t s0 = s[1];
+	s[0] = s0;
+	s1 ^= s1 << 23; // a
+	s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
+	return (s[1] + s0);
+}
+
+static inline void
+spl_rand_jump(uint64_t *s)
+{
+	static const uint64_t JUMP[] =
+	    { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
+
+	uint64_t s0 = 0;
+	uint64_t s1 = 0;
+	int i, b;
+	for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
+		for (b = 0; b < 64; b++) {
+			if (JUMP[i] & 1ULL << b) {
+				s0 ^= s[0];
+				s1 ^= s[1];
+			}
+			(void) spl_rand_next(s);
+		}
+
+	s[0] = s0;
+	s[1] = s1;
+}
+
+int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+	uint64_t *xp, s[2];
+
+	ASSERT(ptr);
+
+	xp = get_cpu_ptr(spl_pseudo_entropy);
+
+	s[0] = xp[0];
+	s[1] = xp[1];
+
+	while (len) {
+		union {
+			uint64_t ui64;
+			uint8_t byte[sizeof (uint64_t)];
+		}entropy;
+		int i = MIN(len, sizeof (uint64_t));
+
+		len -= i;
+		entropy.ui64 = spl_rand_next(s);
+
+		while (i--)
+			*ptr++ = entropy.byte[i];
+	}
+
+	xp[0] = s[0];
+	xp[1] = s[1];
+
+	put_cpu_ptr(spl_pseudo_entropy);
+
+	return (0);
+}
+
+
+EXPORT_SYMBOL(random_get_pseudo_bytes);
+
+#if BITS_PER_LONG == 32
+
+/*
+ * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
+ * provides a div64_u64() function for this we do not use it because the
+ * implementation is flawed.  There are cases which return incorrect
+ * results as late as linux-2.6.35.  Until this is fixed upstream the
+ * spl must provide its own implementation.
+ *
+ * This implementation is a slightly modified version of the algorithm
+ * proposed by the book 'Hacker's Delight'.  The original source can be
+ * found here and is available for use without restriction.
+ *
+ * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
+ */
+
+/*
+ * Calculate number of leading of zeros for a 64-bit value.
+ */
+static int
+nlz64(uint64_t x)
+{
+	register int n = 0;
+
+	if (x == 0)
+		return (64);
+
+	if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
+	if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
+	if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n +  8; x = x <<  8; }
+	if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n +  4; x = x <<  4; }
+	if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n +  2; x = x <<  2; }
+	if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n +  1; }
+
+	return (n);
+}
+
+/*
+ * Newer kernels have a div_u64() function but we define our own
+ * to simplify portability between kernel versions.
+ */
+static inline uint64_t
+__div_u64(uint64_t u, uint32_t v)
+{
+	(void) do_div(u, v);
+	return (u);
+}
+
+/*
+ * Turn off missing prototypes warning for these functions. They are
+ * replacements for libgcc-provided functions and will never be called
+ * directly.
+ */
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#endif
+
+/*
+ * Implementation of 64-bit unsigned division for 32-bit machines.
+ *
+ * First the procedure takes care of the case in which the divisor is a
+ * 32-bit quantity. There are two subcases: (1) If the left half of the
+ * dividend is less than the divisor, one execution of do_div() is all that
+ * is required (overflow is not possible). (2) Otherwise it does two
+ * divisions, using the grade school method.
+ */
+uint64_t
+__udivdi3(uint64_t u, uint64_t v)
+{
+	uint64_t u0, u1, v1, q0, q1, k;
+	int n;
+
+	if (v >> 32 == 0) {			// If v < 2**32:
+		if (u >> 32 < v) {		// If u/v cannot overflow,
+			return (__div_u64(u, v)); // just do one division.
+		} else {			// If u/v would overflow:
+			u1 = u >> 32;		// Break u into two halves.
+			u0 = u & 0xFFFFFFFF;
+			q1 = __div_u64(u1, v);	// First quotient digit.
+			k  = u1 - q1 * v;	// First remainder, < v.
+			u0 += (k << 32);
+			q0 = __div_u64(u0, v);	// Seconds quotient digit.
+			return ((q1 << 32) + q0);
+		}
+	} else {				// If v >= 2**32:
+		n = nlz64(v);			// 0 <= n <= 31.
+		v1 = (v << n) >> 32;		// Normalize divisor, MSB is 1.
+		u1 = u >> 1;			// To ensure no overflow.
+		q1 = __div_u64(u1, v1);		// Get quotient from
+		q0 = (q1 << n) >> 31;		// Undo normalization and
+						// division of u by 2.
+		if (q0 != 0)			// Make q0 correct or
+			q0 = q0 - 1;		// too small by 1.
+		if ((u - q0 * v) >= v)
+			q0 = q0 + 1;		// Now q0 is correct.
+
+		return (q0);
+	}
+}
+EXPORT_SYMBOL(__udivdi3);
+
+/* BEGIN CSTYLED */
+#ifndef abs64
+#define	abs64(x)	({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
+#endif
+/* END CSTYLED */
+
+/*
+ * Implementation of 64-bit signed division for 32-bit machines.
+ */
+int64_t
+__divdi3(int64_t u, int64_t v)
+{
+	int64_t q, t;
+	q = __udivdi3(abs64(u), abs64(v));
+	t = (u ^ v) >> 63;	// If u, v have different
+	return ((q ^ t) - t);	// signs, negate q.
+}
+EXPORT_SYMBOL(__divdi3);
+
+/*
+ * Implementation of 64-bit unsigned modulo for 32-bit machines.
+ */
+uint64_t
+__umoddi3(uint64_t dividend, uint64_t divisor)
+{
+	return (dividend - (divisor * __udivdi3(dividend, divisor)));
+}
+EXPORT_SYMBOL(__umoddi3);
+
+/* 64-bit signed modulo for 32-bit machines. */
+int64_t
+__moddi3(int64_t n, int64_t d)
+{
+	int64_t q;
+	boolean_t nn = B_FALSE;
+
+	if (n < 0) {
+		nn = B_TRUE;
+		n = -n;
+	}
+	if (d < 0)
+		d = -d;
+
+	q = __umoddi3(n, d);
+
+	return (nn ? -q : q);
+}
+EXPORT_SYMBOL(__moddi3);
+
+/*
+ * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
+ */
+uint64_t
+__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
+{
+	uint64_t q = __udivdi3(n, d);
+	if (r)
+		*r = n - d * q;
+	return (q);
+}
+EXPORT_SYMBOL(__udivmoddi4);
+
+/*
+ * Implementation of 64-bit signed division/modulo for 32-bit machines.
+ */
+int64_t
+__divmoddi4(int64_t n, int64_t d, int64_t *r)
+{
+	int64_t q, rr;
+	boolean_t nn = B_FALSE;
+	boolean_t nd = B_FALSE;
+	if (n < 0) {
+		nn = B_TRUE;
+		n = -n;
+	}
+	if (d < 0) {
+		nd = B_TRUE;
+		d = -d;
+	}
+
+	q = __udivmoddi4(n, d, (uint64_t *)&rr);
+
+	if (nn != nd)
+		q = -q;
+	if (nn)
+		rr = -rr;
+	if (r)
+		*r = rr;
+	return (q);
+}
+EXPORT_SYMBOL(__divmoddi4);
+
+#if defined(__arm) || defined(__arm__)
+/*
+ * Implementation of 64-bit (un)signed division for 32-bit arm machines.
+ *
+ * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
+ * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
+ * and the remainder in {r2, r3}.  The return type is specifically left
+ * set to 'void' to ensure the compiler does not overwrite these registers
+ * during the return.  All results are in registers as per ABI
+ */
+void
+__aeabi_uldivmod(uint64_t u, uint64_t v)
+{
+	uint64_t res;
+	uint64_t mod;
+
+	res = __udivdi3(u, v);
+	mod = __umoddi3(u, v);
+	{
+		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
+		register uint32_t r1 asm("r1") = (res >> 32);
+		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
+		register uint32_t r3 asm("r3") = (mod >> 32);
+
+		/* BEGIN CSTYLED */
+		asm volatile(""
+		    : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
+		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
+		/* END CSTYLED */
+
+		return; /* r0; */
+	}
+}
+EXPORT_SYMBOL(__aeabi_uldivmod);
+
+void
+__aeabi_ldivmod(int64_t u, int64_t v)
+{
+	int64_t res;
+	uint64_t mod;
+
+	res =  __divdi3(u, v);
+	mod = __umoddi3(u, v);
+	{
+		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
+		register uint32_t r1 asm("r1") = (res >> 32);
+		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
+		register uint32_t r3 asm("r3") = (mod >> 32);
+
+		/* BEGIN CSTYLED */
+		asm volatile(""
+		    : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
+		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
+		/* END CSTYLED */
+
+		return; /* r0; */
+	}
+}
+EXPORT_SYMBOL(__aeabi_ldivmod);
+#endif /* __arm || __arm__ */
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#endif /* BITS_PER_LONG */
+
+/*
+ * NOTE: The strtoxx behavior is solely based on my reading of the Solaris
+ * ddi_strtol(9F) man page.  I have not verified the behavior of these
+ * functions against their Solaris counterparts.  It is possible that I
+ * may have misinterpreted the man page or the man page is incorrect.
+ */
+int ddi_strtoul(const char *, char **, int, unsigned long *);
+int ddi_strtol(const char *, char **, int, long *);
+int ddi_strtoull(const char *, char **, int, unsigned long long *);
+int ddi_strtoll(const char *, char **, int, long long *);
+
+#define	define_ddi_strtoux(type, valtype)				\
+int ddi_strtou##type(const char *str, char **endptr,			\
+    int base, valtype *result)						\
+{									\
+	valtype last_value, value = 0;					\
+	char *ptr = (char *)str;					\
+	int flag = 1, digit;						\
+									\
+	if (strlen(ptr) == 0)						\
+		return (EINVAL);					\
+									\
+	/* Auto-detect base based on prefix */				\
+	if (!base) {							\
+		if (str[0] == '0') {					\
+			if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
+				base = 16; /* hex */			\
+				ptr += 2;				\
+			} else if (str[1] >= '0' && str[1] < 8) {	\
+				base = 8; /* octal */			\
+				ptr += 1;				\
+			} else {					\
+				return (EINVAL);			\
+			}						\
+		} else {						\
+			base = 10; /* decimal */			\
+		}							\
+	}								\
+									\
+	while (1) {							\
+		if (isdigit(*ptr))					\
+			digit = *ptr - '0';				\
+		else if (isalpha(*ptr))					\
+			digit = tolower(*ptr) - 'a' + 10;		\
+		else							\
+			break;						\
+									\
+		if (digit >= base)					\
+			break;						\
+									\
+		last_value = value;					\
+		value = value * base + digit;				\
+		if (last_value > value) /* Overflow */			\
+			return (ERANGE);				\
+									\
+		flag = 1;						\
+		ptr++;							\
+	}								\
+									\
+	if (flag)							\
+		*result = value;					\
+									\
+	if (endptr)							\
+		*endptr = (char *)(flag ? ptr : str);			\
+									\
+	return (0);							\
+}									\
+
+#define	define_ddi_strtox(type, valtype)				\
+int ddi_strto##type(const char *str, char **endptr,			\
+    int base, valtype *result)						\
+{									\
+	int rc;								\
+									\
+	if (*str == '-') {						\
+		rc = ddi_strtou##type(str + 1, endptr, base, result);	\
+		if (!rc) {						\
+			if (*endptr == str + 1)				\
+				*endptr = (char *)str;			\
+			else						\
+				*result = -*result;			\
+		}							\
+	} else {							\
+		rc = ddi_strtou##type(str, endptr, base, result);	\
+	}								\
+									\
+	return (rc);							\
+}
+
+define_ddi_strtoux(l, unsigned long)
+define_ddi_strtox(l, long)
+define_ddi_strtoux(ll, unsigned long long)
+define_ddi_strtox(ll, long long)
+
+EXPORT_SYMBOL(ddi_strtoul);
+EXPORT_SYMBOL(ddi_strtol);
+EXPORT_SYMBOL(ddi_strtoll);
+EXPORT_SYMBOL(ddi_strtoull);
+
+int
+ddi_copyin(const void *from, void *to, size_t len, int flags)
+{
+	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
+	if (flags & FKIOCTL) {
+		memcpy(to, from, len);
+		return (0);
+	}
+
+	return (copyin(from, to, len));
+}
+EXPORT_SYMBOL(ddi_copyin);
+
+/*
+ * Post a uevent to userspace whenever a new vdev adds to the pool. It is
+ * necessary to sync blkid information with udev, which zed daemon uses
+ * during device hotplug to identify the vdev.
+ */
+void
+spl_signal_kobj_evt(struct block_device *bdev)
+{
+#if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV)
+#ifdef HAVE_BDEV_KOBJ
+	struct kobject *disk_kobj = bdev_kobj(bdev);
+#else
+	struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj;
+#endif
+	if (disk_kobj) {
+		int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE);
+		if (ret) {
+			pr_warn("ZFS: Sending event '%d' to kobject: '%s'"
+			    " (%p): failed(ret:%d)\n", KOBJ_CHANGE,
+			    kobject_name(disk_kobj), disk_kobj, ret);
+		}
+	}
+#else
+/*
+ * This is encountered if neither bdev_kobj() nor part_to_dev() is available
+ * in the kernel - likely due to an API change that needs to be chased down.
+ */
+#error "Unsupported kernel: unable to get struct kobj from bdev"
+#endif
+}
+EXPORT_SYMBOL(spl_signal_kobj_evt);
+
+int
+ddi_copyout(const void *from, void *to, size_t len, int flags)
+{
+	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
+	if (flags & FKIOCTL) {
+		memcpy(to, from, len);
+		return (0);
+	}
+
+	return (copyout(from, to, len));
+}
+EXPORT_SYMBOL(ddi_copyout);
+
+static ssize_t
+spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
+{
+#if defined(HAVE_KERNEL_READ_PPOS)
+	return (kernel_read(file, buf, count, pos));
+#else
+	mm_segment_t saved_fs;
+	ssize_t ret;
+
+	saved_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	ret = vfs_read(file, (void __user *)buf, count, pos);
+
+	set_fs(saved_fs);
+
+	return (ret);
+#endif
+}
+
+static int
+spl_getattr(struct file *filp, struct kstat *stat)
+{
+	int rc;
+
+	ASSERT(filp);
+	ASSERT(stat);
+
+#if defined(HAVE_4ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS,
+	    AT_STATX_SYNC_AS_STAT);
+#elif defined(HAVE_2ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, stat);
+#elif defined(HAVE_3ARGS_VFS_GETATTR)
+	rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat);
+#else
+#error "No available vfs_getattr()"
+#endif
+	if (rc)
+		return (-rc);
+
+	return (0);
+}
+
+/*
+ * Read the unique system identifier from the /etc/hostid file.
+ *
+ * The behavior of /usr/bin/hostid on Linux systems with the
+ * regular eglibc and coreutils is:
+ *
+ *   1. Generate the value if the /etc/hostid file does not exist
+ *      or if the /etc/hostid file is less than four bytes in size.
+ *
+ *   2. If the /etc/hostid file is at least 4 bytes, then return
+ *      the first four bytes [0..3] in native endian order.
+ *
+ *   3. Always ignore bytes [4..] if they exist in the file.
+ *
+ * Only the first four bytes are significant, even on systems that
+ * have a 64-bit word size.
+ *
+ * See:
+ *
+ *   eglibc: sysdeps/unix/sysv/linux/gethostid.c
+ *   coreutils: src/hostid.c
+ *
+ * Notes:
+ *
+ * The /etc/hostid file on Solaris is a text file that often reads:
+ *
+ *   # DO NOT EDIT
+ *   "0123456789"
+ *
+ * Directly copying this file to Linux results in a constant
+ * hostid of 4f442023 because the default comment constitutes
+ * the first four bytes of the file.
+ *
+ */
+
+char *spl_hostid_path = HW_HOSTID_PATH;
+module_param(spl_hostid_path, charp, 0444);
+MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
+
+static int
+hostid_read(uint32_t *hostid)
+{
+	uint64_t size;
+	uint32_t value = 0;
+	int error;
+	loff_t off;
+	struct file *filp;
+	struct kstat stat;
+
+	filp = filp_open(spl_hostid_path, 0, 0);
+
+	if (IS_ERR(filp))
+		return (ENOENT);
+
+	error = spl_getattr(filp, &stat);
+	if (error) {
+		filp_close(filp, 0);
+		return (error);
+	}
+	size = stat.size;
+	// cppcheck-suppress sizeofwithnumericparameter
+	if (size < sizeof (HW_HOSTID_MASK)) {
+		filp_close(filp, 0);
+		return (EINVAL);
+	}
+
+	off = 0;
+	/*
+	 * Read directly into the variable like eglibc does.
+	 * Short reads are okay; native behavior is preserved.
+	 */
+	error = spl_kernel_read(filp, &value, sizeof (value), &off);
+	if (error < 0) {
+		filp_close(filp, 0);
+		return (EIO);
+	}
+
+	/* Mask down to 32 bits like coreutils does. */
+	*hostid = (value & HW_HOSTID_MASK);
+	filp_close(filp, 0);
+
+	return (0);
+}
+
+/*
+ * Return the system hostid.  Preferentially use the spl_hostid module option
+ * when set, otherwise use the value in the /etc/hostid file.
+ */
+uint32_t
+zone_get_hostid(void *zone)
+{
+	uint32_t hostid;
+
+	ASSERT3P(zone, ==, NULL);
+
+	if (spl_hostid != 0)
+		return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
+
+	if (hostid_read(&hostid) == 0)
+		return (hostid);
+
+	return (0);
+}
+EXPORT_SYMBOL(zone_get_hostid);
+
+static int
+spl_kvmem_init(void)
+{
+	int rc = 0;
+
+	rc = spl_kmem_init();
+	if (rc)
+		return (rc);
+
+	rc = spl_vmem_init();
+	if (rc) {
+		spl_kmem_fini();
+		return (rc);
+	}
+
+	return (rc);
+}
+
+/*
+ * We initialize the random number generator with 128 bits of entropy from the
+ * system random number generator. In the improbable case that we have a zero
+ * seed, we fallback to the system jiffies, unless it is also zero, in which
+ * situation we use a preprogrammed seed. We step forward by 2^64 iterations to
+ * initialize each of the per-cpu seeds so that the sequences generated on each
+ * CPU are guaranteed to never overlap in practice.
+ */
+static void __init
+spl_random_init(void)
+{
+	uint64_t s[2];
+	int i = 0;
+
+	spl_pseudo_entropy = __alloc_percpu(2 * sizeof (uint64_t),
+	    sizeof (uint64_t));
+
+	get_random_bytes(s, sizeof (s));
+
+	if (s[0] == 0 && s[1] == 0) {
+		if (jiffies != 0) {
+			s[0] = jiffies;
+			s[1] = ~0 - jiffies;
+		} else {
+			(void) memcpy(s, "improbable seed", sizeof (s));
+		}
+		printk("SPL: get_random_bytes() returned 0 "
+		    "when generating random seed. Setting initial seed to "
+		    "0x%016llx%016llx.\n", cpu_to_be64(s[0]),
+		    cpu_to_be64(s[1]));
+	}
+
+	for_each_possible_cpu(i) {
+		uint64_t *wordp = per_cpu_ptr(spl_pseudo_entropy, i);
+
+		spl_rand_jump(s);
+
+		wordp[0] = s[0];
+		wordp[1] = s[1];
+	}
+}
+
+static void
+spl_random_fini(void)
+{
+	free_percpu(spl_pseudo_entropy);
+}
+
+static void
+spl_kvmem_fini(void)
+{
+	spl_vmem_fini();
+	spl_kmem_fini();
+}
+
+static int __init
+spl_init(void)
+{
+	int rc = 0;
+
+	bzero(&p0, sizeof (proc_t));
+	spl_random_init();
+
+	if ((rc = spl_kvmem_init()))
+		goto out1;
+
+	if ((rc = spl_tsd_init()))
+		goto out2;
+
+	if ((rc = spl_taskq_init()))
+		goto out3;
+
+	if ((rc = spl_kmem_cache_init()))
+		goto out4;
+
+	if ((rc = spl_proc_init()))
+		goto out5;
+
+	if ((rc = spl_kstat_init()))
+		goto out6;
+
+	if ((rc = spl_zlib_init()))
+		goto out7;
+
+	return (rc);
+
+out7:
+	spl_kstat_fini();
+out6:
+	spl_proc_fini();
+out5:
+	spl_kmem_cache_fini();
+out4:
+	spl_taskq_fini();
+out3:
+	spl_tsd_fini();
+out2:
+	spl_kvmem_fini();
+out1:
+	return (rc);
+}
+
+static void __exit
+spl_fini(void)
+{
+	spl_zlib_fini();
+	spl_kstat_fini();
+	spl_proc_fini();
+	spl_kmem_cache_fini();
+	spl_taskq_fini();
+	spl_tsd_fini();
+	spl_kvmem_fini();
+	spl_random_fini();
+}
+
+module_init(spl_init);
+module_exit(spl_fini);
+
+ZFS_MODULE_DESCRIPTION("Solaris Porting Layer");
+ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
+ZFS_MODULE_LICENSE("GPL");
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);

diff --git a/zfs/module/os/linux/spl/spl-kmem-cache.c b/zfs/module/os/linux/spl/spl-kmem-cache.c
new file mode 100644
index 0000000..c7fc3c8
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-kmem-cache.c

@@ -0,0 +1,1476 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/percpu_compat.h>
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/taskq.h>
+#include <sys/timer.h>
+#include <sys/vmem.h>
+#include <sys/wait.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/prefetch.h>
+
+/*
+ * Within the scope of spl-kmem.c file the kmem_cache_* definitions
+ * are removed to allow access to the real Linux slab allocator.
+ */
+#undef kmem_cache_destroy
+#undef kmem_cache_create
+#undef kmem_cache_alloc
+#undef kmem_cache_free
+
+
+/*
+ * Linux 3.16 replaced smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}()
+ * with smp_mb__{before,after}_atomic() because they were redundant. This is
+ * only used inside our SLAB allocator, so we implement an internal wrapper
+ * here to give us smp_mb__{before,after}_atomic() on older kernels.
+ */
+#ifndef smp_mb__before_atomic
+#define	smp_mb__before_atomic(x) smp_mb__before_clear_bit(x)
+#endif
+
+#ifndef smp_mb__after_atomic
+#define	smp_mb__after_atomic(x) smp_mb__after_clear_bit(x)
+#endif
+
+/* BEGIN CSTYLED */
+
+/*
+ * Cache magazines are an optimization designed to minimize the cost of
+ * allocating memory.  They do this by keeping a per-cpu cache of recently
+ * freed objects, which can then be reallocated without taking a lock. This
+ * can improve performance on highly contended caches.  However, because
+ * objects in magazines will prevent otherwise empty slabs from being
+ * immediately released this may not be ideal for low memory machines.
+ *
+ * For this reason spl_kmem_cache_magazine_size can be used to set a maximum
+ * magazine size.  When this value is set to 0 the magazine size will be
+ * automatically determined based on the object size.  Otherwise magazines
+ * will be limited to 2-256 objects per magazine (i.e per cpu).  Magazines
+ * may never be entirely disabled in this implementation.
+ */
+unsigned int spl_kmem_cache_magazine_size = 0;
+module_param(spl_kmem_cache_magazine_size, uint, 0444);
+MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
+	"Default magazine size (2-256), set automatically (0)");
+
+/*
+ * The default behavior is to report the number of objects remaining in the
+ * cache.  This allows the Linux VM to repeatedly reclaim objects from the
+ * cache when memory is low satisfy other memory allocations.  Alternately,
+ * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
+ * is reclaimed.  This may increase the likelihood of out of memory events.
+ */
+unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
+module_param(spl_kmem_cache_reclaim, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
+
+unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
+module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
+
+unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE;
+module_param(spl_kmem_cache_max_size, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
+
+/*
+ * For small objects the Linux slab allocator should be used to make the most
+ * efficient use of the memory.  However, large objects are not supported by
+ * the Linux slab and therefore the SPL implementation is preferred.  A cutoff
+ * of 16K was determined to be optimal for architectures using 4K pages and
+ * to also work well on architecutres using larger 64K page sizes.
+ */
+unsigned int spl_kmem_cache_slab_limit = 16384;
+module_param(spl_kmem_cache_slab_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
+	"Objects less than N bytes use the Linux slab");
+
+/*
+ * The number of threads available to allocate new slabs for caches.  This
+ * should not need to be tuned but it is available for performance analysis.
+ */
+unsigned int spl_kmem_cache_kmem_threads = 4;
+module_param(spl_kmem_cache_kmem_threads, uint, 0444);
+MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
+	"Number of spl_kmem_cache threads");
+/* END CSTYLED */
+
+/*
+ * Slab allocation interfaces
+ *
+ * While the Linux slab implementation was inspired by the Solaris
+ * implementation I cannot use it to emulate the Solaris APIs.  I
+ * require two features which are not provided by the Linux slab.
+ *
+ * 1) Constructors AND destructors.  Recent versions of the Linux
+ *    kernel have removed support for destructors.  This is a deal
+ *    breaker for the SPL which contains particularly expensive
+ *    initializers for mutex's, condition variables, etc.  We also
+ *    require a minimal level of cleanup for these data types unlike
+ *    many Linux data types which do need to be explicitly destroyed.
+ *
+ * 2) Virtual address space backed slab.  Callers of the Solaris slab
+ *    expect it to work well for both small are very large allocations.
+ *    Because of memory fragmentation the Linux slab which is backed
+ *    by kmalloc'ed memory performs very badly when confronted with
+ *    large numbers of large allocations.  Basing the slab on the
+ *    virtual address space removes the need for contiguous pages
+ *    and greatly improve performance for large allocations.
+ *
+ * For these reasons, the SPL has its own slab implementation with
+ * the needed features.  It is not as highly optimized as either the
+ * Solaris or Linux slabs, but it should get me most of what is
+ * needed until it can be optimized or obsoleted by another approach.
+ *
+ * One serious concern I do have about this method is the relatively
+ * small virtual address space on 32bit arches.  This will seriously
+ * constrain the size of the slab caches and their performance.
+ */
+
+struct list_head spl_kmem_cache_list;   /* List of caches */
+struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
+taskq_t *spl_kmem_cache_taskq;		/* Task queue for aging / reclaim */
+
+static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
+
+static void *
+kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
+{
+	gfp_t lflags = kmem_flags_convert(flags);
+	void *ptr;
+
+	ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
+
+	/* Resulting allocated memory will be page aligned */
+	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
+
+	return (ptr);
+}
+
+static void
+kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
+{
+	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
+
+	/*
+	 * The Linux direct reclaim path uses this out of band value to
+	 * determine if forward progress is being made.  Normally this is
+	 * incremented by kmem_freepages() which is part of the various
+	 * Linux slab implementations.  However, since we are using none
+	 * of that infrastructure we are responsible for incrementing it.
+	 */
+	if (current->reclaim_state)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
+#else
+		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
+#endif
+	vfree(ptr);
+}
+
+/*
+ * Required space for each aligned sks.
+ */
+static inline uint32_t
+spl_sks_size(spl_kmem_cache_t *skc)
+{
+	return (P2ROUNDUP_TYPED(sizeof (spl_kmem_slab_t),
+	    skc->skc_obj_align, uint32_t));
+}
+
+/*
+ * Required space for each aligned object.
+ */
+static inline uint32_t
+spl_obj_size(spl_kmem_cache_t *skc)
+{
+	uint32_t align = skc->skc_obj_align;
+
+	return (P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
+	    P2ROUNDUP_TYPED(sizeof (spl_kmem_obj_t), align, uint32_t));
+}
+
+uint64_t
+spl_kmem_cache_inuse(kmem_cache_t *cache)
+{
+	return (cache->skc_obj_total);
+}
+EXPORT_SYMBOL(spl_kmem_cache_inuse);
+
+uint64_t
+spl_kmem_cache_entry_size(kmem_cache_t *cache)
+{
+	return (cache->skc_obj_size);
+}
+EXPORT_SYMBOL(spl_kmem_cache_entry_size);
+
+/*
+ * Lookup the spl_kmem_object_t for an object given that object.
+ */
+static inline spl_kmem_obj_t *
+spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
+{
+	return (obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
+	    skc->skc_obj_align, uint32_t));
+}
+
+/*
+ * It's important that we pack the spl_kmem_obj_t structure and the
+ * actual objects in to one large address space to minimize the number
+ * of calls to the allocator.  It is far better to do a few large
+ * allocations and then subdivide it ourselves.  Now which allocator
+ * we use requires balancing a few trade offs.
+ *
+ * For small objects we use kmem_alloc() because as long as you are
+ * only requesting a small number of pages (ideally just one) its cheap.
+ * However, when you start requesting multiple pages with kmem_alloc()
+ * it gets increasingly expensive since it requires contiguous pages.
+ * For this reason we shift to vmem_alloc() for slabs of large objects
+ * which removes the need for contiguous pages.  We do not use
+ * vmem_alloc() in all cases because there is significant locking
+ * overhead in __get_vm_area_node().  This function takes a single
+ * global lock when acquiring an available virtual address range which
+ * serializes all vmem_alloc()'s for all slab caches.  Using slightly
+ * different allocation functions for small and large objects should
+ * give us the best of both worlds.
+ *
+ * +------------------------+
+ * | spl_kmem_slab_t --+-+  |
+ * | skc_obj_size    <-+ |  |
+ * | spl_kmem_obj_t      |  |
+ * | skc_obj_size    <---+  |
+ * | spl_kmem_obj_t      |  |
+ * | ...                 v  |
+ * +------------------------+
+ */
+static spl_kmem_slab_t *
+spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_slab_t *sks;
+	void *base;
+	uint32_t obj_size;
+
+	base = kv_alloc(skc, skc->skc_slab_size, flags);
+	if (base == NULL)
+		return (NULL);
+
+	sks = (spl_kmem_slab_t *)base;
+	sks->sks_magic = SKS_MAGIC;
+	sks->sks_objs = skc->skc_slab_objs;
+	sks->sks_age = jiffies;
+	sks->sks_cache = skc;
+	INIT_LIST_HEAD(&sks->sks_list);
+	INIT_LIST_HEAD(&sks->sks_free_list);
+	sks->sks_ref = 0;
+	obj_size = spl_obj_size(skc);
+
+	for (int i = 0; i < sks->sks_objs; i++) {
+		void *obj = base + spl_sks_size(skc) + (i * obj_size);
+
+		ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
+		spl_kmem_obj_t *sko = spl_sko_from_obj(skc, obj);
+		sko->sko_addr = obj;
+		sko->sko_magic = SKO_MAGIC;
+		sko->sko_slab = sks;
+		INIT_LIST_HEAD(&sko->sko_list);
+		list_add_tail(&sko->sko_list, &sks->sks_free_list);
+	}
+
+	return (sks);
+}
+
+/*
+ * Remove a slab from complete or partial list, it must be called with
+ * the 'skc->skc_lock' held but the actual free must be performed
+ * outside the lock to prevent deadlocking on vmem addresses.
+ */
+static void
+spl_slab_free(spl_kmem_slab_t *sks,
+    struct list_head *sks_list, struct list_head *sko_list)
+{
+	spl_kmem_cache_t *skc;
+
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(sks->sks_ref == 0);
+
+	skc = sks->sks_cache;
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	/*
+	 * Update slab/objects counters in the cache, then remove the
+	 * slab from the skc->skc_partial_list.  Finally add the slab
+	 * and all its objects in to the private work lists where the
+	 * destructors will be called and the memory freed to the system.
+	 */
+	skc->skc_obj_total -= sks->sks_objs;
+	skc->skc_slab_total--;
+	list_del(&sks->sks_list);
+	list_add(&sks->sks_list, sks_list);
+	list_splice_init(&sks->sks_free_list, sko_list);
+}
+
+/*
+ * Reclaim empty slabs at the end of the partial list.
+ */
+static void
+spl_slab_reclaim(spl_kmem_cache_t *skc)
+{
+	spl_kmem_slab_t *sks = NULL, *m = NULL;
+	spl_kmem_obj_t *sko = NULL, *n = NULL;
+	LIST_HEAD(sks_list);
+	LIST_HEAD(sko_list);
+
+	/*
+	 * Empty slabs and objects must be moved to a private list so they
+	 * can be safely freed outside the spin lock.  All empty slabs are
+	 * at the end of skc->skc_partial_list, therefore once a non-empty
+	 * slab is found we can stop scanning.
+	 */
+	spin_lock(&skc->skc_lock);
+	list_for_each_entry_safe_reverse(sks, m,
+	    &skc->skc_partial_list, sks_list) {
+
+		if (sks->sks_ref > 0)
+			break;
+
+		spl_slab_free(sks, &sks_list, &sko_list);
+	}
+	spin_unlock(&skc->skc_lock);
+
+	/*
+	 * The following two loops ensure all the object destructors are run,
+	 * and the slabs themselves are freed.  This is all done outside the
+	 * skc->skc_lock since this allows the destructor to sleep, and
+	 * allows us to perform a conditional reschedule when a freeing a
+	 * large number of objects and slabs back to the system.
+	 */
+
+	list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
+		ASSERT(sko->sko_magic == SKO_MAGIC);
+	}
+
+	list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
+		ASSERT(sks->sks_magic == SKS_MAGIC);
+		kv_free(skc, sks, skc->skc_slab_size);
+	}
+}
+
+static spl_kmem_emergency_t *
+spl_emergency_search(struct rb_root *root, void *obj)
+{
+	struct rb_node *node = root->rb_node;
+	spl_kmem_emergency_t *ske;
+	unsigned long address = (unsigned long)obj;
+
+	while (node) {
+		ske = container_of(node, spl_kmem_emergency_t, ske_node);
+
+		if (address < ske->ske_obj)
+			node = node->rb_left;
+		else if (address > ske->ske_obj)
+			node = node->rb_right;
+		else
+			return (ske);
+	}
+
+	return (NULL);
+}
+
+static int
+spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	spl_kmem_emergency_t *ske_tmp;
+	unsigned long address = ske->ske_obj;
+
+	while (*new) {
+		ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
+
+		parent = *new;
+		if (address < ske_tmp->ske_obj)
+			new = &((*new)->rb_left);
+		else if (address > ske_tmp->ske_obj)
+			new = &((*new)->rb_right);
+		else
+			return (0);
+	}
+
+	rb_link_node(&ske->ske_node, parent, new);
+	rb_insert_color(&ske->ske_node, root);
+
+	return (1);
+}
+
+/*
+ * Allocate a single emergency object and track it in a red black tree.
+ */
+static int
+spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
+{
+	gfp_t lflags = kmem_flags_convert(flags);
+	spl_kmem_emergency_t *ske;
+	int order = get_order(skc->skc_obj_size);
+	int empty;
+
+	/* Last chance use a partial slab if one now exists */
+	spin_lock(&skc->skc_lock);
+	empty = list_empty(&skc->skc_partial_list);
+	spin_unlock(&skc->skc_lock);
+	if (!empty)
+		return (-EEXIST);
+
+	ske = kmalloc(sizeof (*ske), lflags);
+	if (ske == NULL)
+		return (-ENOMEM);
+
+	ske->ske_obj = __get_free_pages(lflags, order);
+	if (ske->ske_obj == 0) {
+		kfree(ske);
+		return (-ENOMEM);
+	}
+
+	spin_lock(&skc->skc_lock);
+	empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
+	if (likely(empty)) {
+		skc->skc_obj_total++;
+		skc->skc_obj_emergency++;
+		if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
+			skc->skc_obj_emergency_max = skc->skc_obj_emergency;
+	}
+	spin_unlock(&skc->skc_lock);
+
+	if (unlikely(!empty)) {
+		free_pages(ske->ske_obj, order);
+		kfree(ske);
+		return (-EINVAL);
+	}
+
+	*obj = (void *)ske->ske_obj;
+
+	return (0);
+}
+
+/*
+ * Locate the passed object in the red black tree and free it.
+ */
+static int
+spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_emergency_t *ske;
+	int order = get_order(skc->skc_obj_size);
+
+	spin_lock(&skc->skc_lock);
+	ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
+	if (ske) {
+		rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
+		skc->skc_obj_emergency--;
+		skc->skc_obj_total--;
+	}
+	spin_unlock(&skc->skc_lock);
+
+	if (ske == NULL)
+		return (-ENOENT);
+
+	free_pages(ske->ske_obj, order);
+	kfree(ske);
+
+	return (0);
+}
+
+/*
+ * Release objects from the per-cpu magazine back to their slab.  The flush
+ * argument contains the max number of entries to remove from the magazine.
+ */
+static void
+spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
+{
+	spin_lock(&skc->skc_lock);
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	int count = MIN(flush, skm->skm_avail);
+	for (int i = 0; i < count; i++)
+		spl_cache_shrink(skc, skm->skm_objs[i]);
+
+	skm->skm_avail -= count;
+	memmove(skm->skm_objs, &(skm->skm_objs[count]),
+	    sizeof (void *) * skm->skm_avail);
+
+	spin_unlock(&skc->skc_lock);
+}
+
+/*
+ * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
+ * When on-slab we want to target spl_kmem_cache_obj_per_slab.  However,
+ * for very small objects we may end up with more than this so as not
+ * to waste space in the minimal allocation of a single page.
+ */
+static int
+spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
+{
+	uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs;
+
+	sks_size = spl_sks_size(skc);
+	obj_size = spl_obj_size(skc);
+	max_size = (spl_kmem_cache_max_size * 1024 * 1024);
+	tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size);
+
+	if (tgt_size <= max_size) {
+		tgt_objs = (tgt_size - sks_size) / obj_size;
+	} else {
+		tgt_objs = (max_size - sks_size) / obj_size;
+		tgt_size = (tgt_objs * obj_size) + sks_size;
+	}
+
+	if (tgt_objs == 0)
+		return (-ENOSPC);
+
+	*objs = tgt_objs;
+	*size = tgt_size;
+
+	return (0);
+}
+
+/*
+ * Make a guess at reasonable per-cpu magazine size based on the size of
+ * each object and the cost of caching N of them in each magazine.  Long
+ * term this should really adapt based on an observed usage heuristic.
+ */
+static int
+spl_magazine_size(spl_kmem_cache_t *skc)
+{
+	uint32_t obj_size = spl_obj_size(skc);
+	int size;
+
+	if (spl_kmem_cache_magazine_size > 0)
+		return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
+
+	/* Per-magazine sizes below assume a 4Kib page size */
+	if (obj_size > (PAGE_SIZE * 256))
+		size = 4;  /* Minimum 4Mib per-magazine */
+	else if (obj_size > (PAGE_SIZE * 32))
+		size = 16; /* Minimum 2Mib per-magazine */
+	else if (obj_size > (PAGE_SIZE))
+		size = 64; /* Minimum 256Kib per-magazine */
+	else if (obj_size > (PAGE_SIZE / 4))
+		size = 128; /* Minimum 128Kib per-magazine */
+	else
+		size = 256;
+
+	return (size);
+}
+
+/*
+ * Allocate a per-cpu magazine to associate with a specific core.
+ */
+static spl_kmem_magazine_t *
+spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
+{
+	spl_kmem_magazine_t *skm;
+	int size = sizeof (spl_kmem_magazine_t) +
+	    sizeof (void *) * skc->skc_mag_size;
+
+	skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+	if (skm) {
+		skm->skm_magic = SKM_MAGIC;
+		skm->skm_avail = 0;
+		skm->skm_size = skc->skc_mag_size;
+		skm->skm_refill = skc->skc_mag_refill;
+		skm->skm_cache = skc;
+		skm->skm_cpu = cpu;
+	}
+
+	return (skm);
+}
+
+/*
+ * Free a per-cpu magazine associated with a specific core.
+ */
+static void
+spl_magazine_free(spl_kmem_magazine_t *skm)
+{
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+	ASSERT(skm->skm_avail == 0);
+	kfree(skm);
+}
+
+/*
+ * Create all pre-cpu magazines of reasonable sizes.
+ */
+static int
+spl_magazine_create(spl_kmem_cache_t *skc)
+{
+	int i = 0;
+
+	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
+
+	skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
+	    num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
+	skc->skc_mag_size = spl_magazine_size(skc);
+	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
+
+	for_each_possible_cpu(i) {
+		skc->skc_mag[i] = spl_magazine_alloc(skc, i);
+		if (!skc->skc_mag[i]) {
+			for (i--; i >= 0; i--)
+				spl_magazine_free(skc->skc_mag[i]);
+
+			kfree(skc->skc_mag);
+			return (-ENOMEM);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Destroy all pre-cpu magazines.
+ */
+static void
+spl_magazine_destroy(spl_kmem_cache_t *skc)
+{
+	spl_kmem_magazine_t *skm;
+	int i = 0;
+
+	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
+
+	for_each_possible_cpu(i) {
+		skm = skc->skc_mag[i];
+		spl_cache_flush(skc, skm, skm->skm_avail);
+		spl_magazine_free(skm);
+	}
+
+	kfree(skc->skc_mag);
+}
+
+/*
+ * Create a object cache based on the following arguments:
+ * name		cache name
+ * size		cache object size
+ * align	cache object alignment
+ * ctor		cache object constructor
+ * dtor		cache object destructor
+ * reclaim	cache object reclaim
+ * priv		cache private data for ctor/dtor/reclaim
+ * vmp		unused must be NULL
+ * flags
+ *	KMC_KVMEM       Force kvmem backed SPL cache
+ *	KMC_SLAB        Force Linux slab backed cache
+ *	KMC_NODEBUG	Disable debugging (unsupported)
+ */
+spl_kmem_cache_t *
+spl_kmem_cache_create(char *name, size_t size, size_t align,
+    spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, void *reclaim,
+    void *priv, void *vmp, int flags)
+{
+	gfp_t lflags = kmem_flags_convert(KM_SLEEP);
+	spl_kmem_cache_t *skc;
+	int rc;
+
+	/*
+	 * Unsupported flags
+	 */
+	ASSERT(vmp == NULL);
+	ASSERT(reclaim == NULL);
+
+	might_sleep();
+
+	skc = kzalloc(sizeof (*skc), lflags);
+	if (skc == NULL)
+		return (NULL);
+
+	skc->skc_magic = SKC_MAGIC;
+	skc->skc_name_size = strlen(name) + 1;
+	skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
+	if (skc->skc_name == NULL) {
+		kfree(skc);
+		return (NULL);
+	}
+	strncpy(skc->skc_name, name, skc->skc_name_size);
+
+	skc->skc_ctor = ctor;
+	skc->skc_dtor = dtor;
+	skc->skc_private = priv;
+	skc->skc_vmp = vmp;
+	skc->skc_linux_cache = NULL;
+	skc->skc_flags = flags;
+	skc->skc_obj_size = size;
+	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
+	atomic_set(&skc->skc_ref, 0);
+
+	INIT_LIST_HEAD(&skc->skc_list);
+	INIT_LIST_HEAD(&skc->skc_complete_list);
+	INIT_LIST_HEAD(&skc->skc_partial_list);
+	skc->skc_emergency_tree = RB_ROOT;
+	spin_lock_init(&skc->skc_lock);
+	init_waitqueue_head(&skc->skc_waitq);
+	skc->skc_slab_fail = 0;
+	skc->skc_slab_create = 0;
+	skc->skc_slab_destroy = 0;
+	skc->skc_slab_total = 0;
+	skc->skc_slab_alloc = 0;
+	skc->skc_slab_max = 0;
+	skc->skc_obj_total = 0;
+	skc->skc_obj_alloc = 0;
+	skc->skc_obj_max = 0;
+	skc->skc_obj_deadlock = 0;
+	skc->skc_obj_emergency = 0;
+	skc->skc_obj_emergency_max = 0;
+
+	rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
+	    GFP_KERNEL);
+	if (rc != 0) {
+		kfree(skc);
+		return (NULL);
+	}
+
+	/*
+	 * Verify the requested alignment restriction is sane.
+	 */
+	if (align) {
+		VERIFY(ISP2(align));
+		VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
+		VERIFY3U(align, <=, PAGE_SIZE);
+		skc->skc_obj_align = align;
+	}
+
+	/*
+	 * When no specific type of slab is requested (kmem, vmem, or
+	 * linuxslab) then select a cache type based on the object size
+	 * and default tunables.
+	 */
+	if (!(skc->skc_flags & (KMC_SLAB | KMC_KVMEM))) {
+		if (spl_kmem_cache_slab_limit &&
+		    size <= (size_t)spl_kmem_cache_slab_limit) {
+			/*
+			 * Objects smaller than spl_kmem_cache_slab_limit can
+			 * use the Linux slab for better space-efficiency.
+			 */
+			skc->skc_flags |= KMC_SLAB;
+		} else {
+			/*
+			 * All other objects are considered large and are
+			 * placed on kvmem backed slabs.
+			 */
+			skc->skc_flags |= KMC_KVMEM;
+		}
+	}
+
+	/*
+	 * Given the type of slab allocate the required resources.
+	 */
+	if (skc->skc_flags & KMC_KVMEM) {
+		rc = spl_slab_size(skc,
+		    &skc->skc_slab_objs, &skc->skc_slab_size);
+		if (rc)
+			goto out;
+
+		rc = spl_magazine_create(skc);
+		if (rc)
+			goto out;
+	} else {
+		unsigned long slabflags = 0;
+
+		if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
+			rc = EINVAL;
+			goto out;
+		}
+
+#if defined(SLAB_USERCOPY)
+		/*
+		 * Required for PAX-enabled kernels if the slab is to be
+		 * used for copying between user and kernel space.
+		 */
+		slabflags |= SLAB_USERCOPY;
+#endif
+
+#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
+		/*
+		 * Newer grsec patchset uses kmem_cache_create_usercopy()
+		 * instead of SLAB_USERCOPY flag
+		 */
+		skc->skc_linux_cache = kmem_cache_create_usercopy(
+		    skc->skc_name, size, align, slabflags, 0, size, NULL);
+#else
+		skc->skc_linux_cache = kmem_cache_create(
+		    skc->skc_name, size, align, slabflags, NULL);
+#endif
+		if (skc->skc_linux_cache == NULL) {
+			rc = ENOMEM;
+			goto out;
+		}
+	}
+
+	down_write(&spl_kmem_cache_sem);
+	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
+	up_write(&spl_kmem_cache_sem);
+
+	return (skc);
+out:
+	kfree(skc->skc_name);
+	percpu_counter_destroy(&skc->skc_linux_alloc);
+	kfree(skc);
+	return (NULL);
+}
+EXPORT_SYMBOL(spl_kmem_cache_create);
+
+/*
+ * Register a move callback for cache defragmentation.
+ * XXX: Unimplemented but harmless to stub out for now.
+ */
+void
+spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
+    kmem_cbrc_t (move)(void *, void *, size_t, void *))
+{
+	ASSERT(move != NULL);
+}
+EXPORT_SYMBOL(spl_kmem_cache_set_move);
+
+/*
+ * Destroy a cache and all objects associated with the cache.
+ */
+void
+spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
+{
+	DECLARE_WAIT_QUEUE_HEAD(wq);
+	taskqid_t id;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skc->skc_flags & (KMC_KVMEM | KMC_SLAB));
+
+	down_write(&spl_kmem_cache_sem);
+	list_del_init(&skc->skc_list);
+	up_write(&spl_kmem_cache_sem);
+
+	/* Cancel any and wait for any pending delayed tasks */
+	VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	spin_lock(&skc->skc_lock);
+	id = skc->skc_taskqid;
+	spin_unlock(&skc->skc_lock);
+
+	taskq_cancel_id(spl_kmem_cache_taskq, id);
+
+	/*
+	 * Wait until all current callers complete, this is mainly
+	 * to catch the case where a low memory situation triggers a
+	 * cache reaping action which races with this destroy.
+	 */
+	wait_event(wq, atomic_read(&skc->skc_ref) == 0);
+
+	if (skc->skc_flags & KMC_KVMEM) {
+		spl_magazine_destroy(skc);
+		spl_slab_reclaim(skc);
+	} else {
+		ASSERT(skc->skc_flags & KMC_SLAB);
+		kmem_cache_destroy(skc->skc_linux_cache);
+	}
+
+	spin_lock(&skc->skc_lock);
+
+	/*
+	 * Validate there are no objects in use and free all the
+	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
+	 */
+	ASSERT3U(skc->skc_slab_alloc, ==, 0);
+	ASSERT3U(skc->skc_obj_alloc, ==, 0);
+	ASSERT3U(skc->skc_slab_total, ==, 0);
+	ASSERT3U(skc->skc_obj_total, ==, 0);
+	ASSERT3U(skc->skc_obj_emergency, ==, 0);
+	ASSERT(list_empty(&skc->skc_complete_list));
+
+	ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
+	percpu_counter_destroy(&skc->skc_linux_alloc);
+
+	spin_unlock(&skc->skc_lock);
+
+	kfree(skc->skc_name);
+	kfree(skc);
+}
+EXPORT_SYMBOL(spl_kmem_cache_destroy);
+
+/*
+ * Allocate an object from a slab attached to the cache.  This is used to
+ * repopulate the per-cpu magazine caches in batches when they run low.
+ */
+static void *
+spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
+{
+	spl_kmem_obj_t *sko;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+
+	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
+	ASSERT(sko->sko_magic == SKO_MAGIC);
+	ASSERT(sko->sko_addr != NULL);
+
+	/* Remove from sks_free_list */
+	list_del_init(&sko->sko_list);
+
+	sks->sks_age = jiffies;
+	sks->sks_ref++;
+	skc->skc_obj_alloc++;
+
+	/* Track max obj usage statistics */
+	if (skc->skc_obj_alloc > skc->skc_obj_max)
+		skc->skc_obj_max = skc->skc_obj_alloc;
+
+	/* Track max slab usage statistics */
+	if (sks->sks_ref == 1) {
+		skc->skc_slab_alloc++;
+
+		if (skc->skc_slab_alloc > skc->skc_slab_max)
+			skc->skc_slab_max = skc->skc_slab_alloc;
+	}
+
+	return (sko->sko_addr);
+}
+
+/*
+ * Generic slab allocation function to run by the global work queues.
+ * It is responsible for allocating a new slab, linking it in to the list
+ * of partial slabs, and then waking any waiters.
+ */
+static int
+__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_slab_t *sks;
+
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+	sks = spl_slab_alloc(skc, flags);
+	spl_fstrans_unmark(cookie);
+
+	spin_lock(&skc->skc_lock);
+	if (sks) {
+		skc->skc_slab_total++;
+		skc->skc_obj_total += sks->sks_objs;
+		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+
+		smp_mb__before_atomic();
+		clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+		smp_mb__after_atomic();
+	}
+	spin_unlock(&skc->skc_lock);
+
+	return (sks == NULL ? -ENOMEM : 0);
+}
+
+static void
+spl_cache_grow_work(void *data)
+{
+	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+	spl_kmem_cache_t *skc = ska->ska_cache;
+
+	int error = __spl_cache_grow(skc, ska->ska_flags);
+
+	atomic_dec(&skc->skc_ref);
+	smp_mb__before_atomic();
+	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
+	smp_mb__after_atomic();
+	if (error == 0)
+		wake_up_all(&skc->skc_waitq);
+
+	kfree(ska);
+}
+
+/*
+ * Returns non-zero when a new slab should be available.
+ */
+static int
+spl_cache_grow_wait(spl_kmem_cache_t *skc)
+{
+	return (!test_bit(KMC_BIT_GROWING, &skc->skc_flags));
+}
+
+/*
+ * No available objects on any slabs, create a new slab.  Note that this
+ * functionality is disabled for KMC_SLAB caches which are backed by the
+ * Linux slab.
+ */
+static int
+spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
+{
+	int remaining, rc = 0;
+
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
+
+	*obj = NULL;
+
+	/*
+	 * Since we can't sleep attempt an emergency allocation to satisfy
+	 * the request.  The only alterative is to fail the allocation but
+	 * it's preferable try.  The use of KM_NOSLEEP is expected to be rare.
+	 */
+	if (flags & KM_NOSLEEP)
+		return (spl_emergency_alloc(skc, flags, obj));
+
+	might_sleep();
+
+	/*
+	 * Before allocating a new slab wait for any reaping to complete and
+	 * then return so the local magazine can be rechecked for new objects.
+	 */
+	if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
+		rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
+		    TASK_UNINTERRUPTIBLE);
+		return (rc ? rc : -EAGAIN);
+	}
+
+	/*
+	 * Note: It would be nice to reduce the overhead of context switch
+	 * and improve NUMA locality, by trying to allocate a new slab in the
+	 * current process context with KM_NOSLEEP flag.
+	 *
+	 * However, this can't be applied to vmem/kvmem due to a bug that
+	 * spl_vmalloc() doesn't honor gfp flags in page table allocation.
+	 */
+
+	/*
+	 * This is handled by dispatching a work request to the global work
+	 * queue.  This allows us to asynchronously allocate a new slab while
+	 * retaining the ability to safely fall back to a smaller synchronous
+	 * allocations to ensure forward progress is always maintained.
+	 */
+	if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
+		spl_kmem_alloc_t *ska;
+
+		ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
+		if (ska == NULL) {
+			clear_bit_unlock(KMC_BIT_GROWING, &skc->skc_flags);
+			smp_mb__after_atomic();
+			wake_up_all(&skc->skc_waitq);
+			return (-ENOMEM);
+		}
+
+		atomic_inc(&skc->skc_ref);
+		ska->ska_cache = skc;
+		ska->ska_flags = flags;
+		taskq_init_ent(&ska->ska_tqe);
+		taskq_dispatch_ent(spl_kmem_cache_taskq,
+		    spl_cache_grow_work, ska, 0, &ska->ska_tqe);
+	}
+
+	/*
+	 * The goal here is to only detect the rare case where a virtual slab
+	 * allocation has deadlocked.  We must be careful to minimize the use
+	 * of emergency objects which are more expensive to track.  Therefore,
+	 * we set a very long timeout for the asynchronous allocation and if
+	 * the timeout is reached the cache is flagged as deadlocked.  From
+	 * this point only new emergency objects will be allocated until the
+	 * asynchronous allocation completes and clears the deadlocked flag.
+	 */
+	if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
+		rc = spl_emergency_alloc(skc, flags, obj);
+	} else {
+		remaining = wait_event_timeout(skc->skc_waitq,
+		    spl_cache_grow_wait(skc), HZ / 10);
+
+		if (!remaining) {
+			spin_lock(&skc->skc_lock);
+			if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
+				set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+				skc->skc_obj_deadlock++;
+			}
+			spin_unlock(&skc->skc_lock);
+		}
+
+		rc = -ENOMEM;
+	}
+
+	return (rc);
+}
+
+/*
+ * Refill a per-cpu magazine with objects from the slabs for this cache.
+ * Ideally the magazine can be repopulated using existing objects which have
+ * been released, however if we are unable to locate enough free objects new
+ * slabs of objects will be created.  On success NULL is returned, otherwise
+ * the address of a single emergency object is returned for use by the caller.
+ */
+static void *
+spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
+{
+	spl_kmem_slab_t *sks;
+	int count = 0, rc, refill;
+	void *obj = NULL;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
+	spin_lock(&skc->skc_lock);
+
+	while (refill > 0) {
+		/* No slabs available we may need to grow the cache */
+		if (list_empty(&skc->skc_partial_list)) {
+			spin_unlock(&skc->skc_lock);
+
+			local_irq_enable();
+			rc = spl_cache_grow(skc, flags, &obj);
+			local_irq_disable();
+
+			/* Emergency object for immediate use by caller */
+			if (rc == 0 && obj != NULL)
+				return (obj);
+
+			if (rc)
+				goto out;
+
+			/* Rescheduled to different CPU skm is not local */
+			if (skm != skc->skc_mag[smp_processor_id()])
+				goto out;
+
+			/*
+			 * Potentially rescheduled to the same CPU but
+			 * allocations may have occurred from this CPU while
+			 * we were sleeping so recalculate max refill.
+			 */
+			refill = MIN(refill, skm->skm_size - skm->skm_avail);
+
+			spin_lock(&skc->skc_lock);
+			continue;
+		}
+
+		/* Grab the next available slab */
+		sks = list_entry((&skc->skc_partial_list)->next,
+		    spl_kmem_slab_t, sks_list);
+		ASSERT(sks->sks_magic == SKS_MAGIC);
+		ASSERT(sks->sks_ref < sks->sks_objs);
+		ASSERT(!list_empty(&sks->sks_free_list));
+
+		/*
+		 * Consume as many objects as needed to refill the requested
+		 * cache.  We must also be careful not to overfill it.
+		 */
+		while (sks->sks_ref < sks->sks_objs && refill-- > 0 &&
+		    ++count) {
+			ASSERT(skm->skm_avail < skm->skm_size);
+			ASSERT(count < skm->skm_size);
+			skm->skm_objs[skm->skm_avail++] =
+			    spl_cache_obj(skc, sks);
+		}
+
+		/* Move slab to skc_complete_list when full */
+		if (sks->sks_ref == sks->sks_objs) {
+			list_del(&sks->sks_list);
+			list_add(&sks->sks_list, &skc->skc_complete_list);
+		}
+	}
+
+	spin_unlock(&skc->skc_lock);
+out:
+	return (NULL);
+}
+
+/*
+ * Release an object back to the slab from which it came.
+ */
+static void
+spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_slab_t *sks = NULL;
+	spl_kmem_obj_t *sko = NULL;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	sko = spl_sko_from_obj(skc, obj);
+	ASSERT(sko->sko_magic == SKO_MAGIC);
+	sks = sko->sko_slab;
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(sks->sks_cache == skc);
+	list_add(&sko->sko_list, &sks->sks_free_list);
+
+	sks->sks_age = jiffies;
+	sks->sks_ref--;
+	skc->skc_obj_alloc--;
+
+	/*
+	 * Move slab to skc_partial_list when no longer full.  Slabs
+	 * are added to the head to keep the partial list is quasi-full
+	 * sorted order.  Fuller at the head, emptier at the tail.
+	 */
+	if (sks->sks_ref == (sks->sks_objs - 1)) {
+		list_del(&sks->sks_list);
+		list_add(&sks->sks_list, &skc->skc_partial_list);
+	}
+
+	/*
+	 * Move empty slabs to the end of the partial list so
+	 * they can be easily found and freed during reclamation.
+	 */
+	if (sks->sks_ref == 0) {
+		list_del(&sks->sks_list);
+		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+		skc->skc_slab_alloc--;
+	}
+}
+
+/*
+ * Allocate an object from the per-cpu magazine, or if the magazine
+ * is empty directly allocate from a slab and repopulate the magazine.
+ */
+void *
+spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_magazine_t *skm;
+	void *obj = NULL;
+
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	/*
+	 * Allocate directly from a Linux slab.  All optimizations are left
+	 * to the underlying cache we only need to guarantee that KM_SLEEP
+	 * callers will never fail.
+	 */
+	if (skc->skc_flags & KMC_SLAB) {
+		struct kmem_cache *slc = skc->skc_linux_cache;
+		do {
+			obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
+		} while ((obj == NULL) && !(flags & KM_NOSLEEP));
+
+		if (obj != NULL) {
+			/*
+			 * Even though we leave everything up to the
+			 * underlying cache we still keep track of
+			 * how many objects we've allocated in it for
+			 * better debuggability.
+			 */
+			percpu_counter_inc(&skc->skc_linux_alloc);
+		}
+		goto ret;
+	}
+
+	local_irq_disable();
+
+restart:
+	/*
+	 * Safe to update per-cpu structure without lock, but
+	 * in the restart case we must be careful to reacquire
+	 * the local magazine since this may have changed
+	 * when we need to grow the cache.
+	 */
+	skm = skc->skc_mag[smp_processor_id()];
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	if (likely(skm->skm_avail)) {
+		/* Object available in CPU cache, use it */
+		obj = skm->skm_objs[--skm->skm_avail];
+	} else {
+		obj = spl_cache_refill(skc, skm, flags);
+		if ((obj == NULL) && !(flags & KM_NOSLEEP))
+			goto restart;
+
+		local_irq_enable();
+		goto ret;
+	}
+
+	local_irq_enable();
+	ASSERT(obj);
+	ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
+
+ret:
+	/* Pre-emptively migrate object to CPU L1 cache */
+	if (obj) {
+		if (obj && skc->skc_ctor)
+			skc->skc_ctor(obj, skc->skc_private, flags);
+		else
+			prefetchw(obj);
+	}
+
+	return (obj);
+}
+EXPORT_SYMBOL(spl_kmem_cache_alloc);
+
+/*
+ * Free an object back to the local per-cpu magazine, there is no
+ * guarantee that this is the same magazine the object was originally
+ * allocated from.  We may need to flush entire from the magazine
+ * back to the slabs to make space.
+ */
+void
+spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_magazine_t *skm;
+	unsigned long flags;
+	int do_reclaim = 0;
+	int do_emergency = 0;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	/*
+	 * Run the destructor
+	 */
+	if (skc->skc_dtor)
+		skc->skc_dtor(obj, skc->skc_private);
+
+	/*
+	 * Free the object from the Linux underlying Linux slab.
+	 */
+	if (skc->skc_flags & KMC_SLAB) {
+		kmem_cache_free(skc->skc_linux_cache, obj);
+		percpu_counter_dec(&skc->skc_linux_alloc);
+		return;
+	}
+
+	/*
+	 * While a cache has outstanding emergency objects all freed objects
+	 * must be checked.  However, since emergency objects will never use
+	 * a virtual address these objects can be safely excluded as an
+	 * optimization.
+	 */
+	if (!is_vmalloc_addr(obj)) {
+		spin_lock(&skc->skc_lock);
+		do_emergency = (skc->skc_obj_emergency > 0);
+		spin_unlock(&skc->skc_lock);
+
+		if (do_emergency && (spl_emergency_free(skc, obj) == 0))
+			return;
+	}
+
+	local_irq_save(flags);
+
+	/*
+	 * Safe to update per-cpu structure without lock, but
+	 * no remote memory allocation tracking is being performed
+	 * it is entirely possible to allocate an object from one
+	 * CPU cache and return it to another.
+	 */
+	skm = skc->skc_mag[smp_processor_id()];
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	/*
+	 * Per-CPU cache full, flush it to make space for this object,
+	 * this may result in an empty slab which can be reclaimed once
+	 * interrupts are re-enabled.
+	 */
+	if (unlikely(skm->skm_avail >= skm->skm_size)) {
+		spl_cache_flush(skc, skm, skm->skm_refill);
+		do_reclaim = 1;
+	}
+
+	/* Available space in cache, use it */
+	skm->skm_objs[skm->skm_avail++] = obj;
+
+	local_irq_restore(flags);
+
+	if (do_reclaim)
+		spl_slab_reclaim(skc);
+}
+EXPORT_SYMBOL(spl_kmem_cache_free);
+
+/*
+ * Depending on how many and which objects are released it may simply
+ * repopulate the local magazine which will then need to age-out.  Objects
+ * which cannot fit in the magazine will be released back to their slabs
+ * which will also need to age out before being released.  This is all just
+ * best effort and we do not want to thrash creating and destroying slabs.
+ */
+void
+spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
+{
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	if (skc->skc_flags & KMC_SLAB)
+		return;
+
+	atomic_inc(&skc->skc_ref);
+
+	/*
+	 * Prevent concurrent cache reaping when contended.
+	 */
+	if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
+		goto out;
+
+	/* Reclaim from the magazine and free all now empty slabs. */
+	unsigned long irq_flags;
+	local_irq_save(irq_flags);
+	spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
+	spl_cache_flush(skc, skm, skm->skm_avail);
+	local_irq_restore(irq_flags);
+
+	spl_slab_reclaim(skc);
+	clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
+	smp_mb__after_atomic();
+	wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
+out:
+	atomic_dec(&skc->skc_ref);
+}
+EXPORT_SYMBOL(spl_kmem_cache_reap_now);
+
+/*
+ * This is stubbed out for code consistency with other platforms.  There
+ * is existing logic to prevent concurrent reaping so while this is ugly
+ * it should do no harm.
+ */
+int
+spl_kmem_cache_reap_active(void)
+{
+	return (0);
+}
+EXPORT_SYMBOL(spl_kmem_cache_reap_active);
+
+/*
+ * Reap all free slabs from all registered caches.
+ */
+void
+spl_kmem_reap(void)
+{
+	spl_kmem_cache_t *skc = NULL;
+
+	down_read(&spl_kmem_cache_sem);
+	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
+		spl_kmem_cache_reap_now(skc);
+	}
+	up_read(&spl_kmem_cache_sem);
+}
+EXPORT_SYMBOL(spl_kmem_reap);
+
+int
+spl_kmem_cache_init(void)
+{
+	init_rwsem(&spl_kmem_cache_sem);
+	INIT_LIST_HEAD(&spl_kmem_cache_list);
+	spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
+	    spl_kmem_cache_kmem_threads, maxclsyspri,
+	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
+	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+	return (0);
+}
+
+void
+spl_kmem_cache_fini(void)
+{
+	taskq_destroy(spl_kmem_cache_taskq);
+}

diff --git a/zfs/module/os/linux/spl/spl-kmem.c b/zfs/module/os/linux/spl/spl-kmem.c
new file mode 100644
index 0000000..943966c
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-kmem.c

@@ -0,0 +1,617 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/vmem.h>
+
+/*
+ * As a general rule kmem_alloc() allocations should be small, preferably
+ * just a few pages since they must by physically contiguous.  Therefore, a
+ * rate limited warning will be printed to the console for any kmem_alloc()
+ * which exceeds a reasonable threshold.
+ *
+ * The default warning threshold is set to sixteen pages but capped at 64K to
+ * accommodate systems using large pages.  This value was selected to be small
+ * enough to ensure the largest allocations are quickly noticed and fixed.
+ * But large enough to avoid logging any warnings when a allocation size is
+ * larger than optimal but not a serious concern.  Since this value is tunable,
+ * developers are encouraged to set it lower when testing so any new largish
+ * allocations are quickly caught.  These warnings may be disabled by setting
+ * the threshold to zero.
+ */
+/* BEGIN CSTYLED */
+unsigned int spl_kmem_alloc_warn = MIN(16 * PAGE_SIZE, 64 * 1024);
+module_param(spl_kmem_alloc_warn, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_alloc_warn,
+	"Warning threshold in bytes for a kmem_alloc()");
+EXPORT_SYMBOL(spl_kmem_alloc_warn);
+
+/*
+ * Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
+ * Allocations which are marginally smaller than this limit may succeed but
+ * should still be avoided due to the expense of locating a contiguous range
+ * of free pages.  Therefore, a maximum kmem size with reasonable safely
+ * margin of 4x is set.  Kmem_alloc() allocations larger than this maximum
+ * will quickly fail.  Vmem_alloc() allocations less than or equal to this
+ * value will use kmalloc(), but shift to vmalloc() when exceeding this value.
+ */
+unsigned int spl_kmem_alloc_max = (KMALLOC_MAX_SIZE >> 2);
+module_param(spl_kmem_alloc_max, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_alloc_max,
+	"Maximum size in bytes for a kmem_alloc()");
+EXPORT_SYMBOL(spl_kmem_alloc_max);
+/* END CSTYLED */
+
+int
+kmem_debugging(void)
+{
+	return (0);
+}
+EXPORT_SYMBOL(kmem_debugging);
+
+char *
+kmem_vasprintf(const char *fmt, va_list ap)
+{
+	va_list aq;
+	char *ptr;
+
+	do {
+		va_copy(aq, ap);
+		ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, aq);
+		va_end(aq);
+	} while (ptr == NULL);
+
+	return (ptr);
+}
+EXPORT_SYMBOL(kmem_vasprintf);
+
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+	va_list ap;
+	char *ptr;
+
+	do {
+		va_start(ap, fmt);
+		ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, ap);
+		va_end(ap);
+	} while (ptr == NULL);
+
+	return (ptr);
+}
+EXPORT_SYMBOL(kmem_asprintf);
+
+static char *
+__strdup(const char *str, int flags)
+{
+	char *ptr;
+	int n;
+
+	n = strlen(str);
+	ptr = kmalloc(n + 1, kmem_flags_convert(flags));
+	if (ptr)
+		memcpy(ptr, str, n + 1);
+
+	return (ptr);
+}
+
+char *
+kmem_strdup(const char *str)
+{
+	return (__strdup(str, KM_SLEEP));
+}
+EXPORT_SYMBOL(kmem_strdup);
+
+void
+kmem_strfree(char *str)
+{
+	kfree(str);
+}
+EXPORT_SYMBOL(kmem_strfree);
+
+void *
+spl_kvmalloc(size_t size, gfp_t lflags)
+{
+#ifdef HAVE_KVMALLOC
+	/*
+	 * GFP_KERNEL allocations can safely use kvmalloc which may
+	 * improve performance by avoiding a) high latency caused by
+	 * vmalloc's on-access allocation, b) performance loss due to
+	 * MMU memory address mapping and c) vmalloc locking overhead.
+	 * This has the side-effect that the slab statistics will
+	 * incorrectly report this as a vmem allocation, but that is
+	 * purely cosmetic.
+	 */
+	if ((lflags & GFP_KERNEL) == GFP_KERNEL)
+		return (kvmalloc(size, lflags));
+#endif
+
+	gfp_t kmalloc_lflags = lflags;
+
+	if (size > PAGE_SIZE) {
+		/*
+		 * We need to set __GFP_NOWARN here since spl_kvmalloc is not
+		 * only called by spl_kmem_alloc_impl but can be called
+		 * directly with custom lflags, too. In that case
+		 * kmem_flags_convert does not get called, which would
+		 * implicitly set __GFP_NOWARN.
+		 */
+		kmalloc_lflags |= __GFP_NOWARN;
+
+		/*
+		 * N.B. __GFP_RETRY_MAYFAIL is supported only for large
+		 * e (>32kB) allocations.
+		 *
+		 * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY
+		 * for !costly requests because there is no other way to tell
+		 * the allocator that we want to fail rather than retry
+		 * endlessly.
+		 */
+		if (!(kmalloc_lflags & __GFP_RETRY_MAYFAIL) ||
+		    (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+			kmalloc_lflags |= __GFP_NORETRY;
+		}
+	}
+
+	/*
+	 * We first try kmalloc - even for big sizes - and fall back to
+	 * spl_vmalloc if that fails.
+	 *
+	 * For non-__GFP-RECLAIM allocations we always stick to
+	 * kmalloc_node, and fail when kmalloc is not successful (returns
+	 * NULL).
+	 * We cannot fall back to spl_vmalloc in this case because spl_vmalloc
+	 * internally uses GPF_KERNEL allocations.
+	 */
+	void *ptr = kmalloc_node(size, kmalloc_lflags, NUMA_NO_NODE);
+	if (ptr || size <= PAGE_SIZE ||
+	    (lflags & __GFP_RECLAIM) != __GFP_RECLAIM) {
+		return (ptr);
+	}
+
+	return (spl_vmalloc(size, lflags | __GFP_HIGHMEM));
+}
+
+/*
+ * General purpose unified implementation of kmem_alloc(). It is an
+ * amalgamation of Linux and Illumos allocator design. It should never be
+ * exported to ensure that code using kmem_alloc()/kmem_zalloc() remains
+ * relatively portable.  Consumers may only access this function through
+ * wrappers that enforce the common flags to ensure portability.
+ */
+inline void *
+spl_kmem_alloc_impl(size_t size, int flags, int node)
+{
+	gfp_t lflags = kmem_flags_convert(flags);
+	void *ptr;
+
+	/*
+	 * Log abnormally large allocations and rate limit the console output.
+	 * Allocations larger than spl_kmem_alloc_warn should be performed
+	 * through the vmem_alloc()/vmem_zalloc() interfaces.
+	 */
+	if ((spl_kmem_alloc_warn > 0) && (size > spl_kmem_alloc_warn) &&
+	    !(flags & KM_VMEM)) {
+		printk(KERN_WARNING
+		    "Large kmem_alloc(%lu, 0x%x), please file an issue at:\n"
+		    "https://github.com/openzfs/zfs/issues/new\n",
+		    (unsigned long)size, flags);
+		dump_stack();
+	}
+
+	/*
+	 * Use a loop because kmalloc_node() can fail when GFP_KERNEL is used
+	 * unlike kmem_alloc() with KM_SLEEP on Illumos.
+	 */
+	do {
+		/*
+		 * Calling kmalloc_node() when the size >= spl_kmem_alloc_max
+		 * is unsafe.  This must fail for all for kmem_alloc() and
+		 * kmem_zalloc() callers.
+		 *
+		 * For vmem_alloc() and vmem_zalloc() callers it is permissible
+		 * to use spl_vmalloc().  However, in general use of
+		 * spl_vmalloc() is strongly discouraged because a global lock
+		 * must be acquired.  Contention on this lock can significantly
+		 * impact performance so frequently manipulating the virtual
+		 * address space is strongly discouraged.
+		 */
+		if (size > spl_kmem_alloc_max) {
+			if (flags & KM_VMEM) {
+				ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
+			} else {
+				return (NULL);
+			}
+		} else {
+			if (flags & KM_VMEM) {
+				ptr = spl_kvmalloc(size, lflags);
+			} else {
+				ptr = kmalloc_node(size, lflags, node);
+			}
+		}
+
+		if (likely(ptr) || (flags & KM_NOSLEEP))
+			return (ptr);
+
+		/*
+		 * Try hard to satisfy the allocation. However, when progress
+		 * cannot be made, the allocation is allowed to fail.
+		 */
+		if ((lflags & GFP_KERNEL) == GFP_KERNEL)
+			lflags |= __GFP_RETRY_MAYFAIL;
+
+		/*
+		 * Use cond_resched() instead of congestion_wait() to avoid
+		 * deadlocking systems where there are no block devices.
+		 */
+		cond_resched();
+	} while (1);
+
+	return (NULL);
+}
+
+inline void
+spl_kmem_free_impl(const void *buf, size_t size)
+{
+	if (is_vmalloc_addr(buf))
+		vfree(buf);
+	else
+		kfree(buf);
+}
+
+/*
+ * Memory allocation and accounting for kmem_* * style allocations.  When
+ * DEBUG_KMEM is enabled the total memory allocated will be tracked and
+ * any memory leaked will be reported during module unload.
+ *
+ * ./configure --enable-debug-kmem
+ */
+#ifdef DEBUG_KMEM
+
+/* Shim layer memory accounting */
+#ifdef HAVE_ATOMIC64_T
+atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
+unsigned long long kmem_alloc_max = 0;
+#else  /* HAVE_ATOMIC64_T */
+atomic_t kmem_alloc_used = ATOMIC_INIT(0);
+unsigned long long kmem_alloc_max = 0;
+#endif /* HAVE_ATOMIC64_T */
+
+EXPORT_SYMBOL(kmem_alloc_used);
+EXPORT_SYMBOL(kmem_alloc_max);
+
+inline void *
+spl_kmem_alloc_debug(size_t size, int flags, int node)
+{
+	void *ptr;
+
+	ptr = spl_kmem_alloc_impl(size, flags, node);
+	if (ptr) {
+		kmem_alloc_used_add(size);
+		if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
+			kmem_alloc_max = kmem_alloc_used_read();
+	}
+
+	return (ptr);
+}
+
+inline void
+spl_kmem_free_debug(const void *ptr, size_t size)
+{
+	kmem_alloc_used_sub(size);
+	spl_kmem_free_impl(ptr, size);
+}
+
+/*
+ * When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
+ * but also the location of every alloc and free.  When the SPL module is
+ * unloaded a list of all leaked addresses and where they were allocated
+ * will be dumped to the console.  Enabling this feature has a significant
+ * impact on performance but it makes finding memory leaks straight forward.
+ *
+ * Not surprisingly with debugging enabled the xmem_locks are very highly
+ * contended particularly on xfree().  If we want to run with this detailed
+ * debugging enabled for anything other than debugging  we need to minimize
+ * the contention by moving to a lock per xmem_table entry model.
+ *
+ * ./configure --enable-debug-kmem-tracking
+ */
+#ifdef DEBUG_KMEM_TRACKING
+
+#include <linux/hash.h>
+#include <linux/ctype.h>
+
+#define	KMEM_HASH_BITS		10
+#define	KMEM_TABLE_SIZE		(1 << KMEM_HASH_BITS)
+
+typedef struct kmem_debug {
+	struct hlist_node kd_hlist;	/* Hash node linkage */
+	struct list_head kd_list;	/* List of all allocations */
+	void *kd_addr;			/* Allocation pointer */
+	size_t kd_size;			/* Allocation size */
+	const char *kd_func;		/* Allocation function */
+	int kd_line;			/* Allocation line */
+} kmem_debug_t;
+
+static spinlock_t kmem_lock;
+static struct hlist_head kmem_table[KMEM_TABLE_SIZE];
+static struct list_head kmem_list;
+
+static kmem_debug_t *
+kmem_del_init(spinlock_t *lock, struct hlist_head *table,
+    int bits, const void *addr)
+{
+	struct hlist_head *head;
+	struct hlist_node *node = NULL;
+	struct kmem_debug *p;
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+
+	head = &table[hash_ptr((void *)addr, bits)];
+	hlist_for_each(node, head) {
+		p = list_entry(node, struct kmem_debug, kd_hlist);
+		if (p->kd_addr == addr) {
+			hlist_del_init(&p->kd_hlist);
+			list_del_init(&p->kd_list);
+			spin_unlock_irqrestore(lock, flags);
+			return (p);
+		}
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return (NULL);
+}
+
+inline void *
+spl_kmem_alloc_track(size_t size, int flags,
+    const char *func, int line, int node)
+{
+	void *ptr = NULL;
+	kmem_debug_t *dptr;
+	unsigned long irq_flags;
+
+	dptr = kmalloc(sizeof (kmem_debug_t), kmem_flags_convert(flags));
+	if (dptr == NULL)
+		return (NULL);
+
+	dptr->kd_func = __strdup(func, flags);
+	if (dptr->kd_func == NULL) {
+		kfree(dptr);
+		return (NULL);
+	}
+
+	ptr = spl_kmem_alloc_debug(size, flags, node);
+	if (ptr == NULL) {
+		kfree(dptr->kd_func);
+		kfree(dptr);
+		return (NULL);
+	}
+
+	INIT_HLIST_NODE(&dptr->kd_hlist);
+	INIT_LIST_HEAD(&dptr->kd_list);
+
+	dptr->kd_addr = ptr;
+	dptr->kd_size = size;
+	dptr->kd_line = line;
+
+	spin_lock_irqsave(&kmem_lock, irq_flags);
+	hlist_add_head(&dptr->kd_hlist,
+	    &kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
+	list_add_tail(&dptr->kd_list, &kmem_list);
+	spin_unlock_irqrestore(&kmem_lock, irq_flags);
+
+	return (ptr);
+}
+
+inline void
+spl_kmem_free_track(const void *ptr, size_t size)
+{
+	kmem_debug_t *dptr;
+
+	/* Ignore NULL pointer since we haven't tracked it at all */
+	if (ptr == NULL)
+		return;
+
+	/* Must exist in hash due to kmem_alloc() */
+	dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
+	ASSERT3P(dptr, !=, NULL);
+	ASSERT3S(dptr->kd_size, ==, size);
+
+	kfree(dptr->kd_func);
+	kfree(dptr);
+
+	spl_kmem_free_debug(ptr, size);
+}
+#endif /* DEBUG_KMEM_TRACKING */
+#endif /* DEBUG_KMEM */
+
+/*
+ * Public kmem_alloc(), kmem_zalloc() and kmem_free() interfaces.
+ */
+void *
+spl_kmem_alloc(size_t size, int flags, const char *func, int line)
+{
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+
+#if !defined(DEBUG_KMEM)
+	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
+#elif !defined(DEBUG_KMEM_TRACKING)
+	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
+#else
+	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
+#endif
+}
+EXPORT_SYMBOL(spl_kmem_alloc);
+
+void *
+spl_kmem_zalloc(size_t size, int flags, const char *func, int line)
+{
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+
+	flags |= KM_ZERO;
+
+#if !defined(DEBUG_KMEM)
+	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
+#elif !defined(DEBUG_KMEM_TRACKING)
+	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
+#else
+	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
+#endif
+}
+EXPORT_SYMBOL(spl_kmem_zalloc);
+
+void
+spl_kmem_free(const void *buf, size_t size)
+{
+#if !defined(DEBUG_KMEM)
+	return (spl_kmem_free_impl(buf, size));
+#elif !defined(DEBUG_KMEM_TRACKING)
+	return (spl_kmem_free_debug(buf, size));
+#else
+	return (spl_kmem_free_track(buf, size));
+#endif
+}
+EXPORT_SYMBOL(spl_kmem_free);
+
+#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
+static char *
+spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
+{
+	int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
+	int i, flag = 1;
+
+	ASSERT(str != NULL && len >= 17);
+	memset(str, 0, len);
+
+	/*
+	 * Check for a fully printable string, and while we are at
+	 * it place the printable characters in the passed buffer.
+	 */
+	for (i = 0; i < size; i++) {
+		str[i] = ((char *)(kd->kd_addr))[i];
+		if (isprint(str[i])) {
+			continue;
+		} else {
+			/*
+			 * Minimum number of printable characters found
+			 * to make it worthwhile to print this as ascii.
+			 */
+			if (i > min)
+				break;
+
+			flag = 0;
+			break;
+		}
+	}
+
+	if (!flag) {
+		sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
+		    *((uint8_t *)kd->kd_addr),
+		    *((uint8_t *)kd->kd_addr + 2),
+		    *((uint8_t *)kd->kd_addr + 4),
+		    *((uint8_t *)kd->kd_addr + 6),
+		    *((uint8_t *)kd->kd_addr + 8),
+		    *((uint8_t *)kd->kd_addr + 10),
+		    *((uint8_t *)kd->kd_addr + 12),
+		    *((uint8_t *)kd->kd_addr + 14));
+	}
+
+	return (str);
+}
+
+static int
+spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
+{
+	int i;
+
+	spin_lock_init(lock);
+	INIT_LIST_HEAD(list);
+
+	for (i = 0; i < size; i++)
+		INIT_HLIST_HEAD(&kmem_table[i]);
+
+	return (0);
+}
+
+static void
+spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
+{
+	unsigned long flags;
+	kmem_debug_t *kd = NULL;
+	char str[17];
+
+	spin_lock_irqsave(lock, flags);
+	if (!list_empty(list))
+		printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
+		    "size", "data", "func", "line");
+
+	list_for_each_entry(kd, list, kd_list) {
+		printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
+		    (int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
+		    kd->kd_func, kd->kd_line);
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+}
+#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
+
+int
+spl_kmem_init(void)
+{
+
+#ifdef DEBUG_KMEM
+	kmem_alloc_used_set(0);
+
+
+
+#ifdef DEBUG_KMEM_TRACKING
+	spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
+#endif /* DEBUG_KMEM_TRACKING */
+#endif /* DEBUG_KMEM */
+
+	return (0);
+}
+
+void
+spl_kmem_fini(void)
+{
+#ifdef DEBUG_KMEM
+	/*
+	 * Display all unreclaimed memory addresses, including the
+	 * allocation size and the first few bytes of what's located
+	 * at that address to aid in debugging.  Performance is not
+	 * a serious concern here since it is module unload time.
+	 */
+	if (kmem_alloc_used_read() != 0)
+		printk(KERN_WARNING "kmem leaked %ld/%llu bytes\n",
+		    (unsigned long)kmem_alloc_used_read(), kmem_alloc_max);
+
+#ifdef DEBUG_KMEM_TRACKING
+	spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
+#endif /* DEBUG_KMEM_TRACKING */
+#endif /* DEBUG_KMEM */
+}

diff --git a/zfs/module/os/linux/spl/spl-kstat.c b/zfs/module/os/linux/spl/spl-kstat.c
new file mode 100644
index 0000000..b5666e7
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-kstat.c

@@ -0,0 +1,715 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Kstat Implementation.
+ *
+ *  Links to Illumos.org for more information on kstat function:
+ *  [1] https://illumos.org/man/1M/kstat
+ *  [2] https://illumos.org/man/9f/kstat_create
+ */
+
+#include <linux/seq_file.h>
+#include <sys/kstat.h>
+#include <sys/vmem.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+
+static kmutex_t kstat_module_lock;
+static struct list_head kstat_module_list;
+static kid_t kstat_id;
+
+static int
+kstat_resize_raw(kstat_t *ksp)
+{
+	if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
+		return (ENOMEM);
+
+	vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
+	ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
+	ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
+
+	return (0);
+}
+
+static int
+kstat_seq_show_headers(struct seq_file *f)
+{
+	kstat_t *ksp = (kstat_t *)f->private;
+	int rc = 0;
+
+	ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
+	    ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
+	    ksp->ks_ndata, (int)ksp->ks_data_size,
+	    ksp->ks_crtime, ksp->ks_snaptime);
+
+	switch (ksp->ks_type) {
+		case KSTAT_TYPE_RAW:
+restart:
+			if (ksp->ks_raw_ops.headers) {
+				rc = ksp->ks_raw_ops.headers(
+				    ksp->ks_raw_buf, ksp->ks_raw_bufsize);
+				if (rc == ENOMEM && !kstat_resize_raw(ksp))
+					goto restart;
+				if (!rc)
+					seq_puts(f, ksp->ks_raw_buf);
+			} else {
+				seq_printf(f, "raw data\n");
+			}
+			break;
+		case KSTAT_TYPE_NAMED:
+			seq_printf(f, "%-31s %-4s %s\n",
+			    "name", "type", "data");
+			break;
+		case KSTAT_TYPE_INTR:
+			seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
+			    "hard", "soft", "watchdog",
+			    "spurious", "multsvc");
+			break;
+		case KSTAT_TYPE_IO:
+			seq_printf(f,
+			    "%-8s %-8s %-8s %-8s %-8s %-8s "
+			    "%-8s %-8s %-8s %-8s %-8s %-8s\n",
+			    "nread", "nwritten", "reads", "writes",
+			    "wtime", "wlentime", "wupdate",
+			    "rtime", "rlentime", "rupdate",
+			    "wcnt", "rcnt");
+			break;
+		case KSTAT_TYPE_TIMER:
+			seq_printf(f,
+			    "%-31s %-8s "
+			    "%-8s %-8s %-8s %-8s %-8s\n",
+			    "name", "events", "elapsed",
+			    "min", "max", "start", "stop");
+			break;
+		default:
+			PANIC("Undefined kstat type %d\n", ksp->ks_type);
+	}
+
+	return (-rc);
+}
+
+static int
+kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
+{
+	int i, j;
+
+	for (i = 0; ; i++) {
+		seq_printf(f, "%03x:", i);
+
+		for (j = 0; j < 16; j++) {
+			if (i * 16 + j >= l) {
+				seq_printf(f, "\n");
+				goto out;
+			}
+
+			seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
+		}
+		seq_printf(f, "\n");
+	}
+out:
+	return (0);
+}
+
+static int
+kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
+{
+	seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
+
+	switch (knp->data_type) {
+		case KSTAT_DATA_CHAR:
+			knp->value.c[15] = '\0'; /* NULL terminate */
+			seq_printf(f, "%-16s", knp->value.c);
+			break;
+		/*
+		 * NOTE - We need to be more careful able what tokens are
+		 * used for each arch, for now this is correct for x86_64.
+		 */
+		case KSTAT_DATA_INT32:
+			seq_printf(f, "%d", knp->value.i32);
+			break;
+		case KSTAT_DATA_UINT32:
+			seq_printf(f, "%u", knp->value.ui32);
+			break;
+		case KSTAT_DATA_INT64:
+			seq_printf(f, "%lld", (signed long long)knp->value.i64);
+			break;
+		case KSTAT_DATA_UINT64:
+			seq_printf(f, "%llu",
+			    (unsigned long long)knp->value.ui64);
+			break;
+		case KSTAT_DATA_LONG:
+			seq_printf(f, "%ld", knp->value.l);
+			break;
+		case KSTAT_DATA_ULONG:
+			seq_printf(f, "%lu", knp->value.ul);
+			break;
+		case KSTAT_DATA_STRING:
+			KSTAT_NAMED_STR_PTR(knp)
+				[KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
+			seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
+			break;
+		default:
+			PANIC("Undefined kstat data type %d\n", knp->data_type);
+	}
+
+	seq_printf(f, "\n");
+
+	return (0);
+}
+
+static int
+kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
+{
+	seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
+	    kip->intrs[KSTAT_INTR_HARD],
+	    kip->intrs[KSTAT_INTR_SOFT],
+	    kip->intrs[KSTAT_INTR_WATCHDOG],
+	    kip->intrs[KSTAT_INTR_SPURIOUS],
+	    kip->intrs[KSTAT_INTR_MULTSVC]);
+
+	return (0);
+}
+
+static int
+kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
+{
+	/* though wlentime & friends are signed, they will never be negative */
+	seq_printf(f,
+	    "%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
+	    "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
+	    kip->nread, kip->nwritten,
+	    kip->reads, kip->writes,
+	    kip->wtime, kip->wlentime, kip->wlastupdate,
+	    kip->rtime, kip->rlentime, kip->rlastupdate,
+	    kip->wcnt,  kip->rcnt);
+
+	return (0);
+}
+
+static int
+kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
+{
+	seq_printf(f,
+	    "%-31s %-8llu %-8llu %-8llu %-8llu %-8llu %-8llu\n",
+	    ktp->name, ktp->num_events, ktp->elapsed_time,
+	    ktp->min_time, ktp->max_time,
+	    ktp->start_time, ktp->stop_time);
+
+	return (0);
+}
+
+static int
+kstat_seq_show(struct seq_file *f, void *p)
+{
+	kstat_t *ksp = (kstat_t *)f->private;
+	int rc = 0;
+
+	ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	switch (ksp->ks_type) {
+		case KSTAT_TYPE_RAW:
+restart:
+			if (ksp->ks_raw_ops.data) {
+				rc = ksp->ks_raw_ops.data(
+				    ksp->ks_raw_buf, ksp->ks_raw_bufsize, p);
+				if (rc == ENOMEM && !kstat_resize_raw(ksp))
+					goto restart;
+				if (!rc)
+					seq_puts(f, ksp->ks_raw_buf);
+			} else {
+				ASSERT(ksp->ks_ndata == 1);
+				rc = kstat_seq_show_raw(f, ksp->ks_data,
+				    ksp->ks_data_size);
+			}
+			break;
+		case KSTAT_TYPE_NAMED:
+			rc = kstat_seq_show_named(f, (kstat_named_t *)p);
+			break;
+		case KSTAT_TYPE_INTR:
+			rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
+			break;
+		case KSTAT_TYPE_IO:
+			rc = kstat_seq_show_io(f, (kstat_io_t *)p);
+			break;
+		case KSTAT_TYPE_TIMER:
+			rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
+			break;
+		default:
+			PANIC("Undefined kstat type %d\n", ksp->ks_type);
+	}
+
+	return (-rc);
+}
+
+static int
+kstat_default_update(kstat_t *ksp, int rw)
+{
+	ASSERT(ksp != NULL);
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	return (0);
+}
+
+static void *
+kstat_seq_data_addr(kstat_t *ksp, loff_t n)
+{
+	void *rc = NULL;
+
+	switch (ksp->ks_type) {
+		case KSTAT_TYPE_RAW:
+			if (ksp->ks_raw_ops.addr)
+				rc = ksp->ks_raw_ops.addr(ksp, n);
+			else
+				rc = ksp->ks_data;
+			break;
+		case KSTAT_TYPE_NAMED:
+			rc = ksp->ks_data + n * sizeof (kstat_named_t);
+			break;
+		case KSTAT_TYPE_INTR:
+			rc = ksp->ks_data + n * sizeof (kstat_intr_t);
+			break;
+		case KSTAT_TYPE_IO:
+			rc = ksp->ks_data + n * sizeof (kstat_io_t);
+			break;
+		case KSTAT_TYPE_TIMER:
+			rc = ksp->ks_data + n * sizeof (kstat_timer_t);
+			break;
+		default:
+			PANIC("Undefined kstat type %d\n", ksp->ks_type);
+	}
+
+	return (rc);
+}
+
+static void *
+kstat_seq_start(struct seq_file *f, loff_t *pos)
+{
+	loff_t n = *pos;
+	kstat_t *ksp = (kstat_t *)f->private;
+	ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	mutex_enter(ksp->ks_lock);
+
+	if (ksp->ks_type == KSTAT_TYPE_RAW) {
+		ksp->ks_raw_bufsize = PAGE_SIZE;
+		ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
+	}
+
+	/* Dynamically update kstat, on error existing kstats are used */
+	(void) ksp->ks_update(ksp, KSTAT_READ);
+
+	ksp->ks_snaptime = gethrtime();
+
+	if (!(ksp->ks_flags & KSTAT_FLAG_NO_HEADERS) && !n &&
+	    kstat_seq_show_headers(f))
+		return (NULL);
+
+	if (n >= ksp->ks_ndata)
+		return (NULL);
+
+	return (kstat_seq_data_addr(ksp, n));
+}
+
+static void *
+kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	kstat_t *ksp = (kstat_t *)f->private;
+	ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	++*pos;
+	if (*pos >= ksp->ks_ndata)
+		return (NULL);
+
+	return (kstat_seq_data_addr(ksp, *pos));
+}
+
+static void
+kstat_seq_stop(struct seq_file *f, void *v)
+{
+	kstat_t *ksp = (kstat_t *)f->private;
+	ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	if (ksp->ks_type == KSTAT_TYPE_RAW)
+		vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
+
+	mutex_exit(ksp->ks_lock);
+}
+
+static struct seq_operations kstat_seq_ops = {
+	.show  = kstat_seq_show,
+	.start = kstat_seq_start,
+	.next  = kstat_seq_next,
+	.stop  = kstat_seq_stop,
+};
+
+static kstat_module_t *
+kstat_find_module(char *name)
+{
+	kstat_module_t *module = NULL;
+
+	list_for_each_entry(module, &kstat_module_list, ksm_module_list) {
+		if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0)
+			return (module);
+	}
+
+	return (NULL);
+}
+
+static kstat_module_t *
+kstat_create_module(char *name)
+{
+	kstat_module_t *module;
+	struct proc_dir_entry *pde;
+
+	pde = proc_mkdir(name, proc_spl_kstat);
+	if (pde == NULL)
+		return (NULL);
+
+	module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
+	module->ksm_proc = pde;
+	strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
+	INIT_LIST_HEAD(&module->ksm_kstat_list);
+	list_add_tail(&module->ksm_module_list, &kstat_module_list);
+
+	return (module);
+
+}
+
+static void
+kstat_delete_module(kstat_module_t *module)
+{
+	ASSERT(list_empty(&module->ksm_kstat_list));
+	remove_proc_entry(module->ksm_name, proc_spl_kstat);
+	list_del(&module->ksm_module_list);
+	kmem_free(module, sizeof (kstat_module_t));
+}
+
+static int
+proc_kstat_open(struct inode *inode, struct file *filp)
+{
+	struct seq_file *f;
+	int rc;
+
+	rc = seq_open(filp, &kstat_seq_ops);
+	if (rc)
+		return (rc);
+
+	f = filp->private_data;
+	f->private = SPL_PDE_DATA(inode);
+
+	return (0);
+}
+
+static ssize_t
+proc_kstat_write(struct file *filp, const char __user *buf, size_t len,
+    loff_t *ppos)
+{
+	struct seq_file *f = filp->private_data;
+	kstat_t *ksp = f->private;
+	int rc;
+
+	ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	mutex_enter(ksp->ks_lock);
+	rc = ksp->ks_update(ksp, KSTAT_WRITE);
+	mutex_exit(ksp->ks_lock);
+
+	if (rc)
+		return (-rc);
+
+	*ppos += len;
+	return (len);
+}
+
+static const kstat_proc_op_t proc_kstat_operations = {
+#ifdef HAVE_PROC_OPS_STRUCT
+	.proc_open	= proc_kstat_open,
+	.proc_write	= proc_kstat_write,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release,
+#else
+	.open		= proc_kstat_open,
+	.write		= proc_kstat_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+#endif
+};
+
+void
+__kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void *(*addr)(kstat_t *ksp, loff_t index))
+{
+	ksp->ks_raw_ops.headers = headers;
+	ksp->ks_raw_ops.data    = data;
+	ksp->ks_raw_ops.addr    = addr;
+}
+EXPORT_SYMBOL(__kstat_set_raw_ops);
+
+void
+kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
+    const char *name)
+{
+	kpep->kpe_owner = NULL;
+	kpep->kpe_proc = NULL;
+	INIT_LIST_HEAD(&kpep->kpe_list);
+	strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
+	strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
+}
+EXPORT_SYMBOL(kstat_proc_entry_init);
+
+kstat_t *
+__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
+    const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
+    uchar_t ks_flags)
+{
+	kstat_t *ksp;
+
+	ASSERT(ks_module);
+	ASSERT(ks_instance == 0);
+	ASSERT(ks_name);
+
+	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
+		ASSERT(ks_ndata == 1);
+
+	ksp = kmem_zalloc(sizeof (*ksp), KM_SLEEP);
+	if (ksp == NULL)
+		return (ksp);
+
+	mutex_enter(&kstat_module_lock);
+	ksp->ks_kid = kstat_id;
+	kstat_id++;
+	mutex_exit(&kstat_module_lock);
+
+	ksp->ks_magic = KS_MAGIC;
+	mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
+	ksp->ks_lock = &ksp->ks_private_lock;
+
+	ksp->ks_crtime = gethrtime();
+	ksp->ks_snaptime = ksp->ks_crtime;
+	ksp->ks_instance = ks_instance;
+	strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
+	ksp->ks_type = ks_type;
+	ksp->ks_flags = ks_flags;
+	ksp->ks_update = kstat_default_update;
+	ksp->ks_private = NULL;
+	ksp->ks_raw_ops.headers = NULL;
+	ksp->ks_raw_ops.data = NULL;
+	ksp->ks_raw_ops.addr = NULL;
+	ksp->ks_raw_buf = NULL;
+	ksp->ks_raw_bufsize = 0;
+	kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
+
+	switch (ksp->ks_type) {
+		case KSTAT_TYPE_RAW:
+			ksp->ks_ndata = 1;
+			ksp->ks_data_size = ks_ndata;
+			break;
+		case KSTAT_TYPE_NAMED:
+			ksp->ks_ndata = ks_ndata;
+			ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
+			break;
+		case KSTAT_TYPE_INTR:
+			ksp->ks_ndata = ks_ndata;
+			ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
+			break;
+		case KSTAT_TYPE_IO:
+			ksp->ks_ndata = ks_ndata;
+			ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
+			break;
+		case KSTAT_TYPE_TIMER:
+			ksp->ks_ndata = ks_ndata;
+			ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
+			break;
+		default:
+			PANIC("Undefined kstat type %d\n", ksp->ks_type);
+	}
+
+	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
+		ksp->ks_data = NULL;
+	} else {
+		ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
+		if (ksp->ks_data == NULL) {
+			kmem_free(ksp, sizeof (*ksp));
+			ksp = NULL;
+		}
+	}
+
+	return (ksp);
+}
+EXPORT_SYMBOL(__kstat_create);
+
+static int
+kstat_detect_collision(kstat_proc_entry_t *kpep)
+{
+	kstat_module_t *module;
+	kstat_proc_entry_t *tmp = NULL;
+	char *parent;
+	char *cp;
+
+	parent = kmem_asprintf("%s", kpep->kpe_module);
+
+	if ((cp = strrchr(parent, '/')) == NULL) {
+		kmem_strfree(parent);
+		return (0);
+	}
+
+	cp[0] = '\0';
+	if ((module = kstat_find_module(parent)) != NULL) {
+		list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
+			if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
+				kmem_strfree(parent);
+				return (EEXIST);
+			}
+		}
+	}
+
+	kmem_strfree(parent);
+	return (0);
+}
+
+/*
+ * Add a file to the proc filesystem under the kstat namespace (i.e.
+ * /proc/spl/kstat/). The file need not necessarily be implemented as a
+ * kstat.
+ */
+void
+kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
+    const kstat_proc_op_t *proc_ops, void *data)
+{
+	kstat_module_t *module;
+	kstat_proc_entry_t *tmp = NULL;
+
+	ASSERT(kpep);
+
+	mutex_enter(&kstat_module_lock);
+
+	module = kstat_find_module(kpep->kpe_module);
+	if (module == NULL) {
+		if (kstat_detect_collision(kpep) != 0) {
+			cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
+			    " collision", kpep->kpe_module, kpep->kpe_name);
+			goto out;
+		}
+		module = kstat_create_module(kpep->kpe_module);
+		if (module == NULL)
+			goto out;
+	}
+
+	/*
+	 * Only one entry by this name per-module, on failure the module
+	 * shouldn't be deleted because we know it has at least one entry.
+	 */
+	list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
+		if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
+			goto out;
+	}
+
+	list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
+
+	kpep->kpe_owner = module;
+	kpep->kpe_proc = proc_create_data(kpep->kpe_name, mode,
+	    module->ksm_proc, proc_ops, data);
+	if (kpep->kpe_proc == NULL) {
+		list_del_init(&kpep->kpe_list);
+		if (list_empty(&module->ksm_kstat_list))
+			kstat_delete_module(module);
+	}
+out:
+	mutex_exit(&kstat_module_lock);
+
+}
+EXPORT_SYMBOL(kstat_proc_entry_install);
+
+void
+__kstat_install(kstat_t *ksp)
+{
+	ASSERT(ksp);
+	mode_t mode;
+	/* Specify permission modes for different kstats */
+	if (strncmp(ksp->ks_proc.kpe_name, "dbufs", KSTAT_STRLEN) == 0) {
+		mode = 0600;
+	} else {
+		mode = 0644;
+	}
+	kstat_proc_entry_install(
+	    &ksp->ks_proc, mode, &proc_kstat_operations, ksp);
+}
+EXPORT_SYMBOL(__kstat_install);
+
+void
+kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
+{
+	kstat_module_t *module = kpep->kpe_owner;
+	if (kpep->kpe_proc)
+		remove_proc_entry(kpep->kpe_name, module->ksm_proc);
+
+	mutex_enter(&kstat_module_lock);
+	list_del_init(&kpep->kpe_list);
+
+	/*
+	 * Remove top level module directory if it wasn't empty before, but now
+	 * is.
+	 */
+	if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
+		kstat_delete_module(module);
+	mutex_exit(&kstat_module_lock);
+
+}
+EXPORT_SYMBOL(kstat_proc_entry_delete);
+
+void
+__kstat_delete(kstat_t *ksp)
+{
+	kstat_proc_entry_delete(&ksp->ks_proc);
+
+	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
+		kmem_free(ksp->ks_data, ksp->ks_data_size);
+
+	ksp->ks_lock = NULL;
+	mutex_destroy(&ksp->ks_private_lock);
+	kmem_free(ksp, sizeof (*ksp));
+}
+EXPORT_SYMBOL(__kstat_delete);
+
+int
+spl_kstat_init(void)
+{
+	mutex_init(&kstat_module_lock, NULL, MUTEX_DEFAULT, NULL);
+	INIT_LIST_HEAD(&kstat_module_list);
+	kstat_id = 0;
+	return (0);
+}
+
+void
+spl_kstat_fini(void)
+{
+	ASSERT(list_empty(&kstat_module_list));
+	mutex_destroy(&kstat_module_lock);
+}

diff --git a/zfs/module/os/linux/spl/spl-proc.c b/zfs/module/os/linux/spl/spl-proc.c
new file mode 100644
index 0000000..81dd5d2
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-proc.c

@@ -0,0 +1,766 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Proc Implementation.
+ */
+
+#include <sys/systeminfo.h>
+#include <sys/kstat.h>
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/vmem.h>
+#include <sys/taskq.h>
+#include <sys/proc.h>
+#include <linux/ctype.h>
+#include <linux/kmod.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+
+#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+typedef struct ctl_table __no_const spl_ctl_table;
+#else
+typedef struct ctl_table spl_ctl_table;
+#endif
+
+static unsigned long table_min = 0;
+static unsigned long table_max = ~0;
+
+static struct ctl_table_header *spl_header = NULL;
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+static struct ctl_table_header *spl_kmem = NULL;
+static struct ctl_table_header *spl_kstat = NULL;
+#endif
+static struct proc_dir_entry *proc_spl = NULL;
+static struct proc_dir_entry *proc_spl_kmem = NULL;
+static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
+static struct proc_dir_entry *proc_spl_taskq_all = NULL;
+static struct proc_dir_entry *proc_spl_taskq = NULL;
+struct proc_dir_entry *proc_spl_kstat = NULL;
+
+#ifdef DEBUG_KMEM
+static int
+proc_domemused(struct ctl_table *table, int write,
+    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int rc = 0;
+	unsigned long val;
+	spl_ctl_table dummy = *table;
+
+	dummy.data = &val;
+	dummy.proc_handler = &proc_dointvec;
+	dummy.extra1 = &table_min;
+	dummy.extra2 = &table_max;
+
+	if (write) {
+		*ppos += *lenp;
+	} else {
+#ifdef HAVE_ATOMIC64_T
+		val = atomic64_read((atomic64_t *)table->data);
+#else
+		val = atomic_read((atomic_t *)table->data);
+#endif /* HAVE_ATOMIC64_T */
+		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
+	}
+
+	return (rc);
+}
+#endif /* DEBUG_KMEM */
+
+static int
+proc_doslab(struct ctl_table *table, int write,
+    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int rc = 0;
+	unsigned long val = 0, mask;
+	spl_ctl_table dummy = *table;
+	spl_kmem_cache_t *skc = NULL;
+
+	dummy.data = &val;
+	dummy.proc_handler = &proc_dointvec;
+	dummy.extra1 = &table_min;
+	dummy.extra2 = &table_max;
+
+	if (write) {
+		*ppos += *lenp;
+	} else {
+		down_read(&spl_kmem_cache_sem);
+		mask = (unsigned long)table->data;
+
+		list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
+
+			/* Only use slabs of the correct kmem/vmem type */
+			if (!(skc->skc_flags & mask))
+				continue;
+
+			/* Sum the specified field for selected slabs */
+			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
+			case KMC_TOTAL:
+				val += skc->skc_slab_size * skc->skc_slab_total;
+				break;
+			case KMC_ALLOC:
+				val += skc->skc_obj_size * skc->skc_obj_alloc;
+				break;
+			case KMC_MAX:
+				val += skc->skc_obj_size * skc->skc_obj_max;
+				break;
+			}
+		}
+
+		up_read(&spl_kmem_cache_sem);
+		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
+	}
+
+	return (rc);
+}
+
+static int
+proc_dohostid(struct ctl_table *table, int write,
+    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char *end, str[32];
+	unsigned long hid;
+	spl_ctl_table dummy = *table;
+
+	dummy.data = str;
+	dummy.maxlen = sizeof (str) - 1;
+
+	if (!write)
+		snprintf(str, sizeof (str), "%lx",
+		    (unsigned long) zone_get_hostid(NULL));
+
+	/* always returns 0 */
+	proc_dostring(&dummy, write, buffer, lenp, ppos);
+
+	if (write) {
+		/*
+		 * We can't use proc_doulongvec_minmax() in the write
+		 * case here because hostid, while a hex value, has no
+		 * leading 0x, which confuses the helper function.
+		 */
+
+		hid = simple_strtoul(str, &end, 16);
+		if (str == end)
+			return (-EINVAL);
+		spl_hostid = hid;
+	}
+
+	return (0);
+}
+
+static void
+taskq_seq_show_headers(struct seq_file *f)
+{
+	seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
+	    "taskq", "act", "nthr", "spwn", "maxt", "pri",
+	    "mina", "maxa", "cura", "flags");
+}
+
+/* indices into the lheads array below */
+#define	LHEAD_PEND	0
+#define	LHEAD_PRIO	1
+#define	LHEAD_DELAY	2
+#define	LHEAD_WAIT	3
+#define	LHEAD_ACTIVE	4
+#define	LHEAD_SIZE	5
+
+/* BEGIN CSTYLED */
+static unsigned int spl_max_show_tasks = 512;
+module_param(spl_max_show_tasks, uint, 0644);
+MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
+/* END CSTYLED */
+
+static int
+taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
+{
+	taskq_t *tq = p;
+	taskq_thread_t *tqt = NULL;
+	spl_wait_queue_entry_t *wq;
+	struct task_struct *tsk;
+	taskq_ent_t *tqe;
+	char name[100];
+	struct list_head *lheads[LHEAD_SIZE], *lh;
+	static char *list_names[LHEAD_SIZE] =
+	    {"pend", "prio", "delay", "wait", "active" };
+	int i, j, have_lheads = 0;
+	unsigned long wflags, flags;
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
+
+	/* get the various lists and check whether they're empty */
+	lheads[LHEAD_PEND] = &tq->tq_pend_list;
+	lheads[LHEAD_PRIO] = &tq->tq_prio_list;
+	lheads[LHEAD_DELAY] = &tq->tq_delay_list;
+#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
+	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
+#else
+	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
+#endif
+	lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
+
+	for (i = 0; i < LHEAD_SIZE; ++i) {
+		if (list_empty(lheads[i]))
+			lheads[i] = NULL;
+		else
+			++have_lheads;
+	}
+
+	/* early return in non-"all" mode if lists are all empty */
+	if (!allflag && !have_lheads) {
+		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		return (0);
+	}
+
+	/* unlock the waitq quickly */
+	if (!lheads[LHEAD_WAIT])
+		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+
+	/* show the base taskq contents */
+	snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
+	seq_printf(f, "%-25s ", name);
+	seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
+	    tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
+	    tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
+	    tq->tq_nalloc, tq->tq_flags);
+
+	/* show the active list */
+	if (lheads[LHEAD_ACTIVE]) {
+		j = 0;
+		list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
+			if (j == 0)
+				seq_printf(f, "\t%s:",
+				    list_names[LHEAD_ACTIVE]);
+			else if (j == 2) {
+				seq_printf(f, "\n\t       ");
+				j = 0;
+			}
+			seq_printf(f, " [%d]%pf(%ps)",
+			    tqt->tqt_thread->pid,
+			    tqt->tqt_task->tqent_func,
+			    tqt->tqt_task->tqent_arg);
+			++j;
+		}
+		seq_printf(f, "\n");
+	}
+
+	for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
+		if (lheads[i]) {
+			j = 0;
+			list_for_each(lh, lheads[i]) {
+				if (spl_max_show_tasks != 0 &&
+				    j >= spl_max_show_tasks) {
+					seq_printf(f, "\n\t(truncated)");
+					break;
+				}
+				/* show the wait waitq list */
+				if (i == LHEAD_WAIT) {
+#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
+					wq = list_entry(lh,
+					    spl_wait_queue_entry_t, entry);
+#else
+					wq = list_entry(lh,
+					    spl_wait_queue_entry_t, task_list);
+#endif
+					if (j == 0)
+						seq_printf(f, "\t%s:",
+						    list_names[i]);
+					else if (j % 8 == 0)
+						seq_printf(f, "\n\t     ");
+
+					tsk = wq->private;
+					seq_printf(f, " %d", tsk->pid);
+				/* pend, prio and delay lists */
+				} else {
+					tqe = list_entry(lh, taskq_ent_t,
+					    tqent_list);
+					if (j == 0)
+						seq_printf(f, "\t%s:",
+						    list_names[i]);
+					else if (j % 2 == 0)
+						seq_printf(f, "\n\t     ");
+
+					seq_printf(f, " %pf(%ps)",
+					    tqe->tqent_func,
+					    tqe->tqent_arg);
+				}
+				++j;
+			}
+			seq_printf(f, "\n");
+		}
+	if (lheads[LHEAD_WAIT])
+		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	return (0);
+}
+
+static int
+taskq_all_seq_show(struct seq_file *f, void *p)
+{
+	return (taskq_seq_show_impl(f, p, B_TRUE));
+}
+
+static int
+taskq_seq_show(struct seq_file *f, void *p)
+{
+	return (taskq_seq_show_impl(f, p, B_FALSE));
+}
+
+static void *
+taskq_seq_start(struct seq_file *f, loff_t *pos)
+{
+	struct list_head *p;
+	loff_t n = *pos;
+
+	down_read(&tq_list_sem);
+	if (!n)
+		taskq_seq_show_headers(f);
+
+	p = tq_list.next;
+	while (n--) {
+		p = p->next;
+		if (p == &tq_list)
+		return (NULL);
+	}
+
+	return (list_entry(p, taskq_t, tq_taskqs));
+}
+
+static void *
+taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	taskq_t *tq = p;
+
+	++*pos;
+	return ((tq->tq_taskqs.next == &tq_list) ?
+	    NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
+}
+
+static void
+slab_seq_show_headers(struct seq_file *f)
+{
+	seq_printf(f,
+	    "--------------------- cache ----------"
+	    "---------------------------------------------  "
+	    "----- slab ------  "
+	    "---- object -----  "
+	    "--- emergency ---\n");
+	seq_printf(f,
+	    "name                                  "
+	    "  flags      size     alloc slabsize  objsize  "
+	    "total alloc   max  "
+	    "total alloc   max  "
+	    "dlock alloc   max\n");
+}
+
+static int
+slab_seq_show(struct seq_file *f, void *p)
+{
+	spl_kmem_cache_t *skc = p;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	if (skc->skc_flags & KMC_SLAB) {
+		/*
+		 * This cache is backed by a generic Linux kmem cache which
+		 * has its own accounting. For these caches we only track
+		 * the number of active allocated objects that exist within
+		 * the underlying Linux slabs. For the overall statistics of
+		 * the underlying Linux cache please refer to /proc/slabinfo.
+		 */
+		spin_lock(&skc->skc_lock);
+		uint64_t objs_allocated =
+		    percpu_counter_sum(&skc->skc_linux_alloc);
+		seq_printf(f, "%-36s  ", skc->skc_name);
+		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
+		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
+		    (long unsigned)skc->skc_flags,
+		    "-",
+		    (long unsigned)(skc->skc_obj_size * objs_allocated),
+		    "-",
+		    (unsigned)skc->skc_obj_size,
+		    "-", "-", "-", "-",
+		    (long unsigned)objs_allocated,
+		    "-", "-", "-", "-");
+		spin_unlock(&skc->skc_lock);
+		return (0);
+	}
+
+	spin_lock(&skc->skc_lock);
+	seq_printf(f, "%-36s  ", skc->skc_name);
+	seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
+	    "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
+	    (long unsigned)skc->skc_flags,
+	    (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
+	    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
+	    (unsigned)skc->skc_slab_size,
+	    (unsigned)skc->skc_obj_size,
+	    (long unsigned)skc->skc_slab_total,
+	    (long unsigned)skc->skc_slab_alloc,
+	    (long unsigned)skc->skc_slab_max,
+	    (long unsigned)skc->skc_obj_total,
+	    (long unsigned)skc->skc_obj_alloc,
+	    (long unsigned)skc->skc_obj_max,
+	    (long unsigned)skc->skc_obj_deadlock,
+	    (long unsigned)skc->skc_obj_emergency,
+	    (long unsigned)skc->skc_obj_emergency_max);
+	spin_unlock(&skc->skc_lock);
+	return (0);
+}
+
+static void *
+slab_seq_start(struct seq_file *f, loff_t *pos)
+{
+	struct list_head *p;
+	loff_t n = *pos;
+
+	down_read(&spl_kmem_cache_sem);
+	if (!n)
+		slab_seq_show_headers(f);
+
+	p = spl_kmem_cache_list.next;
+	while (n--) {
+		p = p->next;
+		if (p == &spl_kmem_cache_list)
+			return (NULL);
+	}
+
+	return (list_entry(p, spl_kmem_cache_t, skc_list));
+}
+
+static void *
+slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	spl_kmem_cache_t *skc = p;
+
+	++*pos;
+	return ((skc->skc_list.next == &spl_kmem_cache_list) ?
+	    NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
+}
+
+static void
+slab_seq_stop(struct seq_file *f, void *v)
+{
+	up_read(&spl_kmem_cache_sem);
+}
+
+static struct seq_operations slab_seq_ops = {
+	.show  = slab_seq_show,
+	.start = slab_seq_start,
+	.next  = slab_seq_next,
+	.stop  = slab_seq_stop,
+};
+
+static int
+proc_slab_open(struct inode *inode, struct file *filp)
+{
+	return (seq_open(filp, &slab_seq_ops));
+}
+
+static const kstat_proc_op_t proc_slab_operations = {
+#ifdef HAVE_PROC_OPS_STRUCT
+	.proc_open	= proc_slab_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release,
+#else
+	.open		= proc_slab_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+#endif
+};
+
+static void
+taskq_seq_stop(struct seq_file *f, void *v)
+{
+	up_read(&tq_list_sem);
+}
+
+static struct seq_operations taskq_all_seq_ops = {
+	.show	= taskq_all_seq_show,
+	.start	= taskq_seq_start,
+	.next	= taskq_seq_next,
+	.stop	= taskq_seq_stop,
+};
+
+static struct seq_operations taskq_seq_ops = {
+	.show	= taskq_seq_show,
+	.start	= taskq_seq_start,
+	.next	= taskq_seq_next,
+	.stop	= taskq_seq_stop,
+};
+
+static int
+proc_taskq_all_open(struct inode *inode, struct file *filp)
+{
+	return (seq_open(filp, &taskq_all_seq_ops));
+}
+
+static int
+proc_taskq_open(struct inode *inode, struct file *filp)
+{
+	return (seq_open(filp, &taskq_seq_ops));
+}
+
+static const kstat_proc_op_t proc_taskq_all_operations = {
+#ifdef HAVE_PROC_OPS_STRUCT
+	.proc_open	= proc_taskq_all_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release,
+#else
+	.open		= proc_taskq_all_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+#endif
+};
+
+static const kstat_proc_op_t proc_taskq_operations = {
+#ifdef HAVE_PROC_OPS_STRUCT
+	.proc_open	= proc_taskq_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release,
+#else
+	.open		= proc_taskq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+#endif
+};
+
+static struct ctl_table spl_kmem_table[] = {
+#ifdef DEBUG_KMEM
+	{
+		.procname	= "kmem_used",
+		.data		= &kmem_alloc_used,
+#ifdef HAVE_ATOMIC64_T
+		.maxlen		= sizeof (atomic64_t),
+#else
+		.maxlen		= sizeof (atomic_t),
+#endif /* HAVE_ATOMIC64_T */
+		.mode		= 0444,
+		.proc_handler	= &proc_domemused,
+	},
+	{
+		.procname	= "kmem_max",
+		.data		= &kmem_alloc_max,
+		.maxlen		= sizeof (unsigned long),
+		.extra1		= &table_min,
+		.extra2		= &table_max,
+		.mode		= 0444,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+#endif /* DEBUG_KMEM */
+	{
+		.procname	= "slab_kvmem_total",
+		.data		= (void *)(KMC_KVMEM | KMC_TOTAL),
+		.maxlen		= sizeof (unsigned long),
+		.extra1		= &table_min,
+		.extra2		= &table_max,
+		.mode		= 0444,
+		.proc_handler	= &proc_doslab,
+	},
+	{
+		.procname	= "slab_kvmem_alloc",
+		.data		= (void *)(KMC_KVMEM | KMC_ALLOC),
+		.maxlen		= sizeof (unsigned long),
+		.extra1		= &table_min,
+		.extra2		= &table_max,
+		.mode		= 0444,
+		.proc_handler	= &proc_doslab,
+	},
+	{
+		.procname	= "slab_kvmem_max",
+		.data		= (void *)(KMC_KVMEM | KMC_MAX),
+		.maxlen		= sizeof (unsigned long),
+		.extra1		= &table_min,
+		.extra2		= &table_max,
+		.mode		= 0444,
+		.proc_handler	= &proc_doslab,
+	},
+	{},
+};
+
+static struct ctl_table spl_kstat_table[] = {
+	{},
+};
+
+static struct ctl_table spl_table[] = {
+	/*
+	 * NB No .strategy entries have been provided since
+	 * sysctl(8) prefers to go via /proc for portability.
+	 */
+	{
+		.procname	= "gitrev",
+		.data		= spl_gitrev,
+		.maxlen		= sizeof (spl_gitrev),
+		.mode		= 0444,
+		.proc_handler	= &proc_dostring,
+	},
+	{
+		.procname	= "hostid",
+		.data		= &spl_hostid,
+		.maxlen		= sizeof (unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_dohostid,
+	},
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
+	{
+		.procname	= "kmem",
+		.mode		= 0555,
+		.child		= spl_kmem_table,
+	},
+	{
+		.procname	= "kstat",
+		.mode		= 0555,
+		.child		= spl_kstat_table,
+	},
+#endif
+	{},
+};
+
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
+static struct ctl_table spl_dir[] = {
+	{
+		.procname	= "spl",
+		.mode		= 0555,
+		.child		= spl_table,
+	},
+	{}
+};
+
+static struct ctl_table spl_root[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= spl_dir,
+	},
+	{}
+};
+#endif
+
+static void spl_proc_cleanup(void)
+{
+	remove_proc_entry("kstat", proc_spl);
+	remove_proc_entry("slab", proc_spl_kmem);
+	remove_proc_entry("kmem", proc_spl);
+	remove_proc_entry("taskq-all", proc_spl);
+	remove_proc_entry("taskq", proc_spl);
+	remove_proc_entry("spl", NULL);
+
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+	if (spl_kstat) {
+		unregister_sysctl_table(spl_kstat);
+		spl_kstat = NULL;
+	}
+	if (spl_kmem) {
+		unregister_sysctl_table(spl_kmem);
+		spl_kmem = NULL;
+	}
+#endif
+	if (spl_header) {
+		unregister_sysctl_table(spl_header);
+		spl_header = NULL;
+	}
+}
+
+int
+spl_proc_init(void)
+{
+	int rc = 0;
+
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
+	spl_header = register_sysctl_table(spl_root);
+	if (spl_header == NULL)
+		return (-EUNATCH);
+#else
+	spl_header = register_sysctl("kernel/spl", spl_table);
+	if (spl_header == NULL)
+		return (-EUNATCH);
+
+	spl_kmem = register_sysctl("kernel/spl/kmem", spl_kmem_table);
+	if (spl_kmem == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+	spl_kstat = register_sysctl("kernel/spl/kstat", spl_kstat_table);
+	if (spl_kstat == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+#endif
+
+	proc_spl = proc_mkdir("spl", NULL);
+	if (proc_spl == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
+	    &proc_taskq_all_operations, NULL);
+	if (proc_spl_taskq_all == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
+	    &proc_taskq_operations, NULL);
+	if (proc_spl_taskq == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	proc_spl_kmem = proc_mkdir("kmem", proc_spl);
+	if (proc_spl_kmem == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
+	    &proc_slab_operations, NULL);
+	if (proc_spl_kmem_slab == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	proc_spl_kstat = proc_mkdir("kstat", proc_spl);
+	if (proc_spl_kstat == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+out:
+	if (rc)
+		spl_proc_cleanup();
+
+	return (rc);
+}
+
+void
+spl_proc_fini(void)
+{
+	spl_proc_cleanup();
+}

diff --git a/zfs/module/os/linux/spl/spl-procfs-list.c b/zfs/module/os/linux/spl/spl-procfs-list.c
new file mode 100644
index 0000000..71b7f75
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-procfs-list.c

@@ -0,0 +1,284 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#include <sys/list.h>
+#include <sys/procfs_list.h>
+#include <linux/proc_fs.h>
+#include <sys/mutex.h>
+
+/*
+ * A procfs_list is a wrapper around a linked list which implements the seq_file
+ * interface, allowing the contents of the list to be exposed through procfs.
+ * The kernel already has some utilities to help implement the seq_file
+ * interface for linked lists (seq_list_*), but they aren't appropriate for use
+ * with lists that have many entries, because seq_list_start walks the list at
+ * the start of each read syscall to find where it left off, so reading a file
+ * ends up being quadratic in the number of entries in the list.
+ *
+ * This implementation avoids this penalty by maintaining a separate cursor into
+ * the list per instance of the file that is open. It also maintains some extra
+ * information in each node of the list to prevent reads of entries that have
+ * been dropped from the list.
+ *
+ * Callers should only add elements to the list using procfs_list_add, which
+ * adds an element to the tail of the list. Other operations can be performed
+ * directly on the wrapped list using the normal list manipulation functions,
+ * but elements should only be removed from the head of the list.
+ */
+
+#define	NODE_ID(procfs_list, obj) \
+		(((procfs_list_node_t *)(((char *)obj) + \
+		(procfs_list)->pl_node_offset))->pln_id)
+
+typedef struct procfs_list_cursor {
+	procfs_list_t	*procfs_list;	/* List into which this cursor points */
+	void		*cached_node;	/* Most recently accessed node */
+	loff_t		cached_pos;	/* Position of cached_node */
+} procfs_list_cursor_t;
+
+static int
+procfs_list_seq_show(struct seq_file *f, void *p)
+{
+	procfs_list_cursor_t *cursor = f->private;
+	procfs_list_t *procfs_list = cursor->procfs_list;
+
+	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+	if (p == SEQ_START_TOKEN) {
+		if (procfs_list->pl_show_header != NULL)
+			return (procfs_list->pl_show_header(f));
+		else
+			return (0);
+	}
+	return (procfs_list->pl_show(f, p));
+}
+
+static void *
+procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
+{
+	void *next_node;
+	procfs_list_t *procfs_list = cursor->procfs_list;
+
+	if (cursor->cached_node == SEQ_START_TOKEN)
+		next_node = list_head(&procfs_list->pl_list);
+	else
+		next_node = list_next(&procfs_list->pl_list,
+		    cursor->cached_node);
+
+	if (next_node != NULL) {
+		cursor->cached_node = next_node;
+		cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
+		*pos = cursor->cached_pos;
+	} else {
+		/*
+		 * seq_read() expects ->next() to update the position even
+		 * when there are no more entries. Advance the position to
+		 * prevent a warning from being logged.
+		 */
+		cursor->cached_node = NULL;
+		cursor->cached_pos++;
+		*pos = cursor->cached_pos;
+	}
+
+	return (next_node);
+}
+
+static void *
+procfs_list_seq_start(struct seq_file *f, loff_t *pos)
+{
+	procfs_list_cursor_t *cursor = f->private;
+	procfs_list_t *procfs_list = cursor->procfs_list;
+
+	mutex_enter(&procfs_list->pl_lock);
+
+	if (*pos == 0) {
+		cursor->cached_node = SEQ_START_TOKEN;
+		cursor->cached_pos = 0;
+		return (SEQ_START_TOKEN);
+	} else if (cursor->cached_node == NULL) {
+		return (NULL);
+	}
+
+	/*
+	 * Check if our cached pointer has become stale, which happens if the
+	 * the message where we left off has been dropped from the list since
+	 * the last read syscall completed.
+	 */
+	void *oldest_node = list_head(&procfs_list->pl_list);
+	if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
+	    NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
+		return (ERR_PTR(-EIO));
+
+	/*
+	 * If it isn't starting from the beginning of the file, the seq_file
+	 * code will either pick up at the same position it visited last or the
+	 * following one.
+	 */
+	if (*pos == cursor->cached_pos) {
+		return (cursor->cached_node);
+	} else {
+		ASSERT3U(*pos, ==, cursor->cached_pos + 1);
+		return (procfs_list_next_node(cursor, pos));
+	}
+}
+
+static void *
+procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	procfs_list_cursor_t *cursor = f->private;
+	ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
+	return (procfs_list_next_node(cursor, pos));
+}
+
+static void
+procfs_list_seq_stop(struct seq_file *f, void *p)
+{
+	procfs_list_cursor_t *cursor = f->private;
+	procfs_list_t *procfs_list = cursor->procfs_list;
+	mutex_exit(&procfs_list->pl_lock);
+}
+
+static struct seq_operations procfs_list_seq_ops = {
+	.show  = procfs_list_seq_show,
+	.start = procfs_list_seq_start,
+	.next  = procfs_list_seq_next,
+	.stop  = procfs_list_seq_stop,
+};
+
+static int
+procfs_list_open(struct inode *inode, struct file *filp)
+{
+	int rc = seq_open_private(filp, &procfs_list_seq_ops,
+	    sizeof (procfs_list_cursor_t));
+	if (rc != 0)
+		return (rc);
+
+	struct seq_file *f = filp->private_data;
+	procfs_list_cursor_t *cursor = f->private;
+	cursor->procfs_list = SPL_PDE_DATA(inode);
+	cursor->cached_node = NULL;
+	cursor->cached_pos = 0;
+
+	return (0);
+}
+
+static ssize_t
+procfs_list_write(struct file *filp, const char __user *buf, size_t len,
+    loff_t *ppos)
+{
+	struct seq_file *f = filp->private_data;
+	procfs_list_cursor_t *cursor = f->private;
+	procfs_list_t *procfs_list = cursor->procfs_list;
+	int rc;
+
+	if (procfs_list->pl_clear != NULL &&
+	    (rc = procfs_list->pl_clear(procfs_list)) != 0)
+		return (-rc);
+	return (len);
+}
+
+static const kstat_proc_op_t procfs_list_operations = {
+#ifdef HAVE_PROC_OPS_STRUCT
+	.proc_open	= procfs_list_open,
+	.proc_write	= procfs_list_write,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= seq_release_private,
+#else
+	.open		= procfs_list_open,
+	.write		= procfs_list_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+#endif
+};
+
+/*
+ * Initialize a procfs_list and create a file for it in the proc filesystem
+ * under the kstat namespace.
+ */
+void
+procfs_list_install(const char *module,
+    const char *submodule,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off)
+{
+	char *modulestr;
+
+	if (submodule != NULL)
+		modulestr = kmem_asprintf("%s/%s", module, submodule);
+	else
+		modulestr = kmem_asprintf("%s", module);
+	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&procfs_list->pl_list,
+	    procfs_list_node_off + sizeof (procfs_list_node_t),
+	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+	procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
+	procfs_list->pl_show = show;
+	procfs_list->pl_show_header = show_header;
+	procfs_list->pl_clear = clear;
+	procfs_list->pl_node_offset = procfs_list_node_off;
+
+	kstat_proc_entry_init(&procfs_list->pl_kstat_entry, modulestr, name);
+	kstat_proc_entry_install(&procfs_list->pl_kstat_entry, mode,
+	    &procfs_list_operations, procfs_list);
+	kmem_strfree(modulestr);
+}
+EXPORT_SYMBOL(procfs_list_install);
+
+/* Remove the proc filesystem file corresponding to the given list */
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{
+	kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
+}
+EXPORT_SYMBOL(procfs_list_uninstall);
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+	ASSERT(list_is_empty(&procfs_list->pl_list));
+	list_destroy(&procfs_list->pl_list);
+	mutex_destroy(&procfs_list->pl_lock);
+}
+EXPORT_SYMBOL(procfs_list_destroy);
+
+/*
+ * Add a new node to the tail of the list. While the standard list manipulation
+ * functions can be use for all other operation, adding elements to the list
+ * should only be done using this helper so that the id of the new node is set
+ * correctly.
+ */
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+	list_insert_tail(&procfs_list->pl_list, p);
+}
+EXPORT_SYMBOL(procfs_list_add);

diff --git a/zfs/module/os/linux/spl/spl-taskq.c b/zfs/module/os/linux/spl/spl-taskq.c
new file mode 100644
index 0000000..fb25a41
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-taskq.c

@@ -0,0 +1,1429 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Task Queue Implementation.
+ */
+
+#include <sys/timer.h>
+#include <sys/taskq.h>
+#include <sys/kmem.h>
+#include <sys/tsd.h>
+#include <sys/trace_spl.h>
+#ifdef HAVE_CPU_HOTPLUG
+#include <linux/cpuhotplug.h>
+#endif
+
+int spl_taskq_thread_bind = 0;
+module_param(spl_taskq_thread_bind, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
+
+
+int spl_taskq_thread_dynamic = 1;
+module_param(spl_taskq_thread_dynamic, int, 0444);
+MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
+
+int spl_taskq_thread_priority = 1;
+module_param(spl_taskq_thread_priority, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_priority,
+	"Allow non-default priority for taskq threads");
+
+int spl_taskq_thread_sequential = 4;
+module_param(spl_taskq_thread_sequential, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_sequential,
+	"Create new taskq threads after N sequential tasks");
+
+/* Global system-wide dynamic task queue available for all consumers */
+taskq_t *system_taskq;
+EXPORT_SYMBOL(system_taskq);
+/* Global dynamic task queue for long delay */
+taskq_t *system_delay_taskq;
+EXPORT_SYMBOL(system_delay_taskq);
+
+/* Private dedicated taskq for creating new taskq threads on demand. */
+static taskq_t *dynamic_taskq;
+static taskq_thread_t *taskq_thread_create(taskq_t *);
+
+#ifdef HAVE_CPU_HOTPLUG
+/* Multi-callback id for cpu hotplugging. */
+static int spl_taskq_cpuhp_state;
+#endif
+
+/* List of all taskqs */
+LIST_HEAD(tq_list);
+struct rw_semaphore tq_list_sem;
+static uint_t taskq_tsd;
+
+static int
+task_km_flags(uint_t flags)
+{
+	if (flags & TQ_NOSLEEP)
+		return (KM_NOSLEEP);
+
+	if (flags & TQ_PUSHPAGE)
+		return (KM_PUSHPAGE);
+
+	return (KM_SLEEP);
+}
+
+/*
+ * taskq_find_by_name - Find the largest instance number of a named taskq.
+ */
+static int
+taskq_find_by_name(const char *name)
+{
+	struct list_head *tql = NULL;
+	taskq_t *tq;
+
+	list_for_each_prev(tql, &tq_list) {
+		tq = list_entry(tql, taskq_t, tq_taskqs);
+		if (strcmp(name, tq->tq_name) == 0)
+			return (tq->tq_instance);
+	}
+	return (-1);
+}
+
+/*
+ * NOTE: Must be called with tq->tq_lock held, returns a list_t which
+ * is not attached to the free, work, or pending taskq lists.
+ */
+static taskq_ent_t *
+task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
+{
+	taskq_ent_t *t;
+	int count = 0;
+
+	ASSERT(tq);
+retry:
+	/* Acquire taskq_ent_t's from free list if available */
+	if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
+		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
+
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_CANCEL));
+		ASSERT(!timer_pending(&t->tqent_timer));
+
+		list_del_init(&t->tqent_list);
+		return (t);
+	}
+
+	/* Free list is empty and memory allocations are prohibited */
+	if (flags & TQ_NOALLOC)
+		return (NULL);
+
+	/* Hit maximum taskq_ent_t pool size */
+	if (tq->tq_nalloc >= tq->tq_maxalloc) {
+		if (flags & TQ_NOSLEEP)
+			return (NULL);
+
+		/*
+		 * Sleep periodically polling the free list for an available
+		 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
+		 * but we cannot block forever waiting for an taskq_ent_t to
+		 * show up in the free list, otherwise a deadlock can happen.
+		 *
+		 * Therefore, we need to allocate a new task even if the number
+		 * of allocated tasks is above tq->tq_maxalloc, but we still
+		 * end up delaying the task allocation by one second, thereby
+		 * throttling the task dispatch rate.
+		 */
+		spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
+		schedule_timeout(HZ / 100);
+		spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
+		    tq->tq_lock_class);
+		if (count < 100) {
+			count++;
+			goto retry;
+		}
+	}
+
+	spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
+	t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
+	spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
+
+	if (t) {
+		taskq_init_ent(t);
+		tq->tq_nalloc++;
+	}
+
+	return (t);
+}
+
+/*
+ * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
+ * to already be removed from the free, work, or pending taskq lists.
+ */
+static void
+task_free(taskq_t *tq, taskq_ent_t *t)
+{
+	ASSERT(tq);
+	ASSERT(t);
+	ASSERT(list_empty(&t->tqent_list));
+	ASSERT(!timer_pending(&t->tqent_timer));
+
+	kmem_free(t, sizeof (taskq_ent_t));
+	tq->tq_nalloc--;
+}
+
+/*
+ * NOTE: Must be called with tq->tq_lock held, either destroys the
+ * taskq_ent_t if too many exist or moves it to the free list for later use.
+ */
+static void
+task_done(taskq_t *tq, taskq_ent_t *t)
+{
+	ASSERT(tq);
+	ASSERT(t);
+
+	/* Wake tasks blocked in taskq_wait_id() */
+	wake_up_all(&t->tqent_waitq);
+
+	list_del_init(&t->tqent_list);
+
+	if (tq->tq_nalloc <= tq->tq_minalloc) {
+		t->tqent_id = TASKQID_INVALID;
+		t->tqent_func = NULL;
+		t->tqent_arg = NULL;
+		t->tqent_flags = 0;
+
+		list_add_tail(&t->tqent_list, &tq->tq_free_list);
+	} else {
+		task_free(tq, t);
+	}
+}
+
+/*
+ * When a delayed task timer expires remove it from the delay list and
+ * add it to the priority list in order for immediate processing.
+ */
+static void
+task_expire_impl(taskq_ent_t *t)
+{
+	taskq_ent_t *w;
+	taskq_t *tq = t->tqent_taskq;
+	struct list_head *l = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+
+	if (t->tqent_flags & TQENT_FLAG_CANCEL) {
+		ASSERT(list_empty(&t->tqent_list));
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		return;
+	}
+
+	t->tqent_birth = jiffies;
+	DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
+
+	/*
+	 * The priority list must be maintained in strict task id order
+	 * from lowest to highest for lowest_id to be easily calculable.
+	 */
+	list_del(&t->tqent_list);
+	list_for_each_prev(l, &tq->tq_prio_list) {
+		w = list_entry(l, taskq_ent_t, tqent_list);
+		if (w->tqent_id < t->tqent_id) {
+			list_add(&t->tqent_list, l);
+			break;
+		}
+	}
+	if (l == &tq->tq_prio_list)
+		list_add(&t->tqent_list, &tq->tq_prio_list);
+
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	wake_up(&tq->tq_work_waitq);
+}
+
+static void
+task_expire(spl_timer_list_t tl)
+{
+	struct timer_list *tmr = (struct timer_list *)tl;
+	taskq_ent_t *t = from_timer(t, tmr, tqent_timer);
+	task_expire_impl(t);
+}
+
+/*
+ * Returns the lowest incomplete taskqid_t.  The taskqid_t may
+ * be queued on the pending list, on the priority list, on the
+ * delay list, or on the work list currently being handled, but
+ * it is not 100% complete yet.
+ */
+static taskqid_t
+taskq_lowest_id(taskq_t *tq)
+{
+	taskqid_t lowest_id = tq->tq_next_id;
+	taskq_ent_t *t;
+	taskq_thread_t *tqt;
+
+	if (!list_empty(&tq->tq_pend_list)) {
+		t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
+		lowest_id = MIN(lowest_id, t->tqent_id);
+	}
+
+	if (!list_empty(&tq->tq_prio_list)) {
+		t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
+		lowest_id = MIN(lowest_id, t->tqent_id);
+	}
+
+	if (!list_empty(&tq->tq_delay_list)) {
+		t = list_entry(tq->tq_delay_list.next, taskq_ent_t, tqent_list);
+		lowest_id = MIN(lowest_id, t->tqent_id);
+	}
+
+	if (!list_empty(&tq->tq_active_list)) {
+		tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
+		    tqt_active_list);
+		ASSERT(tqt->tqt_id != TASKQID_INVALID);
+		lowest_id = MIN(lowest_id, tqt->tqt_id);
+	}
+
+	return (lowest_id);
+}
+
+/*
+ * Insert a task into a list keeping the list sorted by increasing taskqid.
+ */
+static void
+taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
+{
+	taskq_thread_t *w;
+	struct list_head *l = NULL;
+
+	ASSERT(tq);
+	ASSERT(tqt);
+
+	list_for_each_prev(l, &tq->tq_active_list) {
+		w = list_entry(l, taskq_thread_t, tqt_active_list);
+		if (w->tqt_id < tqt->tqt_id) {
+			list_add(&tqt->tqt_active_list, l);
+			break;
+		}
+	}
+	if (l == &tq->tq_active_list)
+		list_add(&tqt->tqt_active_list, &tq->tq_active_list);
+}
+
+/*
+ * Find and return a task from the given list if it exists.  The list
+ * must be in lowest to highest task id order.
+ */
+static taskq_ent_t *
+taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id)
+{
+	struct list_head *l = NULL;
+	taskq_ent_t *t;
+
+	list_for_each(l, lh) {
+		t = list_entry(l, taskq_ent_t, tqent_list);
+
+		if (t->tqent_id == id)
+			return (t);
+
+		if (t->tqent_id > id)
+			break;
+	}
+
+	return (NULL);
+}
+
+/*
+ * Find an already dispatched task given the task id regardless of what
+ * state it is in.  If a task is still pending it will be returned.
+ * If a task is executing, then -EBUSY will be returned instead.
+ * If the task has already been run then NULL is returned.
+ */
+static taskq_ent_t *
+taskq_find(taskq_t *tq, taskqid_t id)
+{
+	taskq_thread_t *tqt;
+	struct list_head *l = NULL;
+	taskq_ent_t *t;
+
+	t = taskq_find_list(tq, &tq->tq_delay_list, id);
+	if (t)
+		return (t);
+
+	t = taskq_find_list(tq, &tq->tq_prio_list, id);
+	if (t)
+		return (t);
+
+	t = taskq_find_list(tq, &tq->tq_pend_list, id);
+	if (t)
+		return (t);
+
+	list_for_each(l, &tq->tq_active_list) {
+		tqt = list_entry(l, taskq_thread_t, tqt_active_list);
+		if (tqt->tqt_id == id) {
+			/*
+			 * Instead of returning tqt_task, we just return a non
+			 * NULL value to prevent misuse, since tqt_task only
+			 * has two valid fields.
+			 */
+			return (ERR_PTR(-EBUSY));
+		}
+	}
+
+	return (NULL);
+}
+
+/*
+ * Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
+ * taskq_wait() functions below.
+ *
+ * Taskq waiting is accomplished by tracking the lowest outstanding task
+ * id and the next available task id.  As tasks are dispatched they are
+ * added to the tail of the pending, priority, or delay lists.  As worker
+ * threads become available the tasks are removed from the heads of these
+ * lists and linked to the worker threads.  This ensures the lists are
+ * kept sorted by lowest to highest task id.
+ *
+ * Therefore the lowest outstanding task id can be quickly determined by
+ * checking the head item from all of these lists.  This value is stored
+ * with the taskq as the lowest id.  It only needs to be recalculated when
+ * either the task with the current lowest id completes or is canceled.
+ *
+ * By blocking until the lowest task id exceeds the passed task id the
+ * taskq_wait_outstanding() function can be easily implemented.  Similarly,
+ * by blocking until the lowest task id matches the next task id taskq_wait()
+ * can be implemented.
+ *
+ * Callers should be aware that when there are multiple worked threads it
+ * is possible for larger task ids to complete before smaller ones.  Also
+ * when the taskq contains delay tasks with small task ids callers may
+ * block for a considerable length of time waiting for them to expire and
+ * execute.
+ */
+static int
+taskq_wait_id_check(taskq_t *tq, taskqid_t id)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	rc = (taskq_find(tq, id) == NULL);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	return (rc);
+}
+
+/*
+ * The taskq_wait_id() function blocks until the passed task id completes.
+ * This does not guarantee that all lower task ids have completed.
+ */
+void
+taskq_wait_id(taskq_t *tq, taskqid_t id)
+{
+	wait_event(tq->tq_wait_waitq, taskq_wait_id_check(tq, id));
+}
+EXPORT_SYMBOL(taskq_wait_id);
+
+static int
+taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	rc = (id < tq->tq_lowest_id);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	return (rc);
+}
+
+/*
+ * The taskq_wait_outstanding() function will block until all tasks with a
+ * lower taskqid than the passed 'id' have been completed.  Note that all
+ * task id's are assigned monotonically at dispatch time.  Zero may be
+ * passed for the id to indicate all tasks dispatch up to this point,
+ * but not after, should be waited for.
+ */
+void
+taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
+{
+	id = id ? id : tq->tq_next_id - 1;
+	wait_event(tq->tq_wait_waitq, taskq_wait_outstanding_check(tq, id));
+}
+EXPORT_SYMBOL(taskq_wait_outstanding);
+
+static int
+taskq_wait_check(taskq_t *tq)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	rc = (tq->tq_lowest_id == tq->tq_next_id);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	return (rc);
+}
+
+/*
+ * The taskq_wait() function will block until the taskq is empty.
+ * This means that if a taskq re-dispatches work to itself taskq_wait()
+ * callers will block indefinitely.
+ */
+void
+taskq_wait(taskq_t *tq)
+{
+	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
+}
+EXPORT_SYMBOL(taskq_wait);
+
+int
+taskq_member(taskq_t *tq, kthread_t *t)
+{
+	return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
+}
+EXPORT_SYMBOL(taskq_member);
+
+taskq_t *
+taskq_of_curthread(void)
+{
+	return (tsd_get(taskq_tsd));
+}
+EXPORT_SYMBOL(taskq_of_curthread);
+
+/*
+ * Cancel an already dispatched task given the task id.  Still pending tasks
+ * will be immediately canceled, and if the task is active the function will
+ * block until it completes.  Preallocated tasks which are canceled must be
+ * freed by the caller.
+ */
+int
+taskq_cancel_id(taskq_t *tq, taskqid_t id)
+{
+	taskq_ent_t *t;
+	int rc = ENOENT;
+	unsigned long flags;
+
+	ASSERT(tq);
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	t = taskq_find(tq, id);
+	if (t && t != ERR_PTR(-EBUSY)) {
+		list_del_init(&t->tqent_list);
+		t->tqent_flags |= TQENT_FLAG_CANCEL;
+
+		/*
+		 * When canceling the lowest outstanding task id we
+		 * must recalculate the new lowest outstanding id.
+		 */
+		if (tq->tq_lowest_id == t->tqent_id) {
+			tq->tq_lowest_id = taskq_lowest_id(tq);
+			ASSERT3S(tq->tq_lowest_id, >, t->tqent_id);
+		}
+
+		/*
+		 * The task_expire() function takes the tq->tq_lock so drop
+		 * drop the lock before synchronously cancelling the timer.
+		 */
+		if (timer_pending(&t->tqent_timer)) {
+			spin_unlock_irqrestore(&tq->tq_lock, flags);
+			del_timer_sync(&t->tqent_timer);
+			spin_lock_irqsave_nested(&tq->tq_lock, flags,
+			    tq->tq_lock_class);
+		}
+
+		if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
+			task_done(tq, t);
+
+		rc = 0;
+	}
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	if (t == ERR_PTR(-EBUSY)) {
+		taskq_wait_id(tq, id);
+		rc = EBUSY;
+	}
+
+	return (rc);
+}
+EXPORT_SYMBOL(taskq_cancel_id);
+
+static int taskq_thread_spawn(taskq_t *tq);
+
+taskqid_t
+taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
+{
+	taskq_ent_t *t;
+	taskqid_t rc = TASKQID_INVALID;
+	unsigned long irqflags;
+
+	ASSERT(tq);
+	ASSERT(func);
+
+	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TASKQ_ACTIVE))
+		goto out;
+
+	/* Do not queue the task unless there is idle thread for it */
+	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
+	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
+		/* Dynamic taskq may be able to spawn another thread */
+		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
+		    taskq_thread_spawn(tq) == 0)
+			goto out;
+	}
+
+	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
+		goto out;
+
+	spin_lock(&t->tqent_lock);
+
+	/* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
+	if (flags & TQ_NOQUEUE)
+		list_add(&t->tqent_list, &tq->tq_prio_list);
+	/* Queue to the priority list instead of the pending list */
+	else if (flags & TQ_FRONT)
+		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
+	else
+		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
+
+	t->tqent_id = rc = tq->tq_next_id;
+	tq->tq_next_id++;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	t->tqent_taskq = tq;
+	t->tqent_timer.function = NULL;
+	t->tqent_timer.expires = 0;
+
+	t->tqent_birth = jiffies;
+	DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
+
+	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
+	spin_unlock(&t->tqent_lock);
+
+	wake_up(&tq->tq_work_waitq);
+out:
+	/* Spawn additional taskq threads if required. */
+	if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
+		(void) taskq_thread_spawn(tq);
+
+	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
+	return (rc);
+}
+EXPORT_SYMBOL(taskq_dispatch);
+
+taskqid_t
+taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
+    uint_t flags, clock_t expire_time)
+{
+	taskqid_t rc = TASKQID_INVALID;
+	taskq_ent_t *t;
+	unsigned long irqflags;
+
+	ASSERT(tq);
+	ASSERT(func);
+
+	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TASKQ_ACTIVE))
+		goto out;
+
+	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
+		goto out;
+
+	spin_lock(&t->tqent_lock);
+
+	/* Queue to the delay list for subsequent execution */
+	list_add_tail(&t->tqent_list, &tq->tq_delay_list);
+
+	t->tqent_id = rc = tq->tq_next_id;
+	tq->tq_next_id++;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	t->tqent_taskq = tq;
+	t->tqent_timer.function = task_expire;
+	t->tqent_timer.expires = (unsigned long)expire_time;
+	add_timer(&t->tqent_timer);
+
+	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
+	spin_unlock(&t->tqent_lock);
+out:
+	/* Spawn additional taskq threads if required. */
+	if (tq->tq_nactive == tq->tq_nthreads)
+		(void) taskq_thread_spawn(tq);
+	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
+	return (rc);
+}
+EXPORT_SYMBOL(taskq_dispatch_delay);
+
+void
+taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
+    taskq_ent_t *t)
+{
+	unsigned long irqflags;
+	ASSERT(tq);
+	ASSERT(func);
+
+	spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
+	    tq->tq_lock_class);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TASKQ_ACTIVE)) {
+		t->tqent_id = TASKQID_INVALID;
+		goto out;
+	}
+
+	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
+		/* Dynamic taskq may be able to spawn another thread */
+		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
+		    taskq_thread_spawn(tq) == 0)
+			goto out2;
+		flags |= TQ_FRONT;
+	}
+
+	spin_lock(&t->tqent_lock);
+
+	/*
+	 * Make sure the entry is not on some other taskq; it is important to
+	 * ASSERT() under lock
+	 */
+	ASSERT(taskq_empty_ent(t));
+
+	/*
+	 * Mark it as a prealloc'd task.  This is important
+	 * to ensure that we don't free it later.
+	 */
+	t->tqent_flags |= TQENT_FLAG_PREALLOC;
+
+	/* Queue to the priority list instead of the pending list */
+	if (flags & TQ_FRONT)
+		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
+	else
+		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
+
+	t->tqent_id = tq->tq_next_id;
+	tq->tq_next_id++;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	t->tqent_taskq = tq;
+
+	t->tqent_birth = jiffies;
+	DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
+
+	spin_unlock(&t->tqent_lock);
+
+	wake_up(&tq->tq_work_waitq);
+out:
+	/* Spawn additional taskq threads if required. */
+	if (tq->tq_nactive == tq->tq_nthreads)
+		(void) taskq_thread_spawn(tq);
+out2:
+	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
+}
+EXPORT_SYMBOL(taskq_dispatch_ent);
+
+int
+taskq_empty_ent(taskq_ent_t *t)
+{
+	return (list_empty(&t->tqent_list));
+}
+EXPORT_SYMBOL(taskq_empty_ent);
+
+void
+taskq_init_ent(taskq_ent_t *t)
+{
+	spin_lock_init(&t->tqent_lock);
+	init_waitqueue_head(&t->tqent_waitq);
+	timer_setup(&t->tqent_timer, NULL, 0);
+	INIT_LIST_HEAD(&t->tqent_list);
+	t->tqent_id = 0;
+	t->tqent_func = NULL;
+	t->tqent_arg = NULL;
+	t->tqent_flags = 0;
+	t->tqent_taskq = NULL;
+}
+EXPORT_SYMBOL(taskq_init_ent);
+
+/*
+ * Return the next pending task, preference is given to tasks on the
+ * priority list which were dispatched with TQ_FRONT.
+ */
+static taskq_ent_t *
+taskq_next_ent(taskq_t *tq)
+{
+	struct list_head *list;
+
+	if (!list_empty(&tq->tq_prio_list))
+		list = &tq->tq_prio_list;
+	else if (!list_empty(&tq->tq_pend_list))
+		list = &tq->tq_pend_list;
+	else
+		return (NULL);
+
+	return (list_entry(list->next, taskq_ent_t, tqent_list));
+}
+
+/*
+ * Spawns a new thread for the specified taskq.
+ */
+static void
+taskq_thread_spawn_task(void *arg)
+{
+	taskq_t *tq = (taskq_t *)arg;
+	unsigned long flags;
+
+	if (taskq_thread_create(tq) == NULL) {
+		/* restore spawning count if failed */
+		spin_lock_irqsave_nested(&tq->tq_lock, flags,
+		    tq->tq_lock_class);
+		tq->tq_nspawn--;
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+	}
+}
+
+/*
+ * Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
+ * number of threads is insufficient to handle the pending tasks.  These
+ * new threads must be created by the dedicated dynamic_taskq to avoid
+ * deadlocks between thread creation and memory reclaim.  The system_taskq
+ * which is also a dynamic taskq cannot be safely used for this.
+ */
+static int
+taskq_thread_spawn(taskq_t *tq)
+{
+	int spawning = 0;
+
+	if (!(tq->tq_flags & TASKQ_DYNAMIC))
+		return (0);
+
+	if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
+	    (tq->tq_flags & TASKQ_ACTIVE)) {
+		spawning = (++tq->tq_nspawn);
+		taskq_dispatch(dynamic_taskq, taskq_thread_spawn_task,
+		    tq, TQ_NOSLEEP);
+	}
+
+	return (spawning);
+}
+
+/*
+ * Threads in a dynamic taskq should only exit once it has been completely
+ * drained and no other threads are actively servicing tasks.  This prevents
+ * threads from being created and destroyed more than is required.
+ *
+ * The first thread is the thread list is treated as the primary thread.
+ * There is nothing special about the primary thread but in order to avoid
+ * all the taskq pids from changing we opt to make it long running.
+ */
+static int
+taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
+{
+	if (!(tq->tq_flags & TASKQ_DYNAMIC))
+		return (0);
+
+	if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
+	    tqt_thread_list) == tqt)
+		return (0);
+
+	return
+	    ((tq->tq_nspawn == 0) &&	/* No threads are being spawned */
+	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
+	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
+	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
+	    (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
+}
+
+static int
+taskq_thread(void *args)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	sigset_t blocked;
+	taskq_thread_t *tqt = args;
+	taskq_t *tq;
+	taskq_ent_t *t;
+	int seq_tasks = 0;
+	unsigned long flags;
+	taskq_ent_t dup_task = {};
+
+	ASSERT(tqt);
+	ASSERT(tqt->tqt_tq);
+	tq = tqt->tqt_tq;
+	current->flags |= PF_NOFREEZE;
+
+	(void) spl_fstrans_mark();
+
+	sigfillset(&blocked);
+	sigprocmask(SIG_BLOCK, &blocked, NULL);
+	flush_signals(current);
+
+	tsd_set(taskq_tsd, tq);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	/*
+	 * If we are dynamically spawned, decrease spawning count. Note that
+	 * we could be created during taskq_create, in which case we shouldn't
+	 * do the decrement. But it's fine because taskq_create will reset
+	 * tq_nspawn later.
+	 */
+	if (tq->tq_flags & TASKQ_DYNAMIC)
+		tq->tq_nspawn--;
+
+	/* Immediately exit if more threads than allowed were created. */
+	if (tq->tq_nthreads >= tq->tq_maxthreads)
+		goto error;
+
+	tq->tq_nthreads++;
+	list_add_tail(&tqt->tqt_thread_list, &tq->tq_thread_list);
+	wake_up(&tq->tq_wait_waitq);
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!kthread_should_stop()) {
+
+		if (list_empty(&tq->tq_pend_list) &&
+		    list_empty(&tq->tq_prio_list)) {
+
+			if (taskq_thread_should_stop(tq, tqt)) {
+				wake_up_all(&tq->tq_wait_waitq);
+				break;
+			}
+
+			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
+			spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+			schedule();
+			seq_tasks = 0;
+
+			spin_lock_irqsave_nested(&tq->tq_lock, flags,
+			    tq->tq_lock_class);
+			remove_wait_queue(&tq->tq_work_waitq, &wait);
+		} else {
+			__set_current_state(TASK_RUNNING);
+		}
+
+		if ((t = taskq_next_ent(tq)) != NULL) {
+			list_del_init(&t->tqent_list);
+
+			/*
+			 * A TQENT_FLAG_PREALLOC task may be reused or freed
+			 * during the task function call. Store tqent_id and
+			 * tqent_flags here.
+			 *
+			 * Also use an on stack taskq_ent_t for tqt_task
+			 * assignment in this case; we want to make sure
+			 * to duplicate all fields, so the values are
+			 * correct when it's accessed via DTRACE_PROBE*.
+			 */
+			tqt->tqt_id = t->tqent_id;
+			tqt->tqt_flags = t->tqent_flags;
+
+			if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
+				dup_task = *t;
+				t = &dup_task;
+			}
+			tqt->tqt_task = t;
+
+			taskq_insert_in_order(tq, tqt);
+			tq->tq_nactive++;
+			spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+			DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
+
+			/* Perform the requested task */
+			t->tqent_func(t->tqent_arg);
+
+			DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
+
+			spin_lock_irqsave_nested(&tq->tq_lock, flags,
+			    tq->tq_lock_class);
+			tq->tq_nactive--;
+			list_del_init(&tqt->tqt_active_list);
+			tqt->tqt_task = NULL;
+
+			/* For prealloc'd tasks, we don't free anything. */
+			if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
+				task_done(tq, t);
+
+			/*
+			 * When the current lowest outstanding taskqid is
+			 * done calculate the new lowest outstanding id
+			 */
+			if (tq->tq_lowest_id == tqt->tqt_id) {
+				tq->tq_lowest_id = taskq_lowest_id(tq);
+				ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
+			}
+
+			/* Spawn additional taskq threads if required. */
+			if ((++seq_tasks) > spl_taskq_thread_sequential &&
+			    taskq_thread_spawn(tq))
+				seq_tasks = 0;
+
+			tqt->tqt_id = TASKQID_INVALID;
+			tqt->tqt_flags = 0;
+			wake_up_all(&tq->tq_wait_waitq);
+		} else {
+			if (taskq_thread_should_stop(tq, tqt))
+				break;
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+	}
+
+	__set_current_state(TASK_RUNNING);
+	tq->tq_nthreads--;
+	list_del_init(&tqt->tqt_thread_list);
+error:
+	kmem_free(tqt, sizeof (taskq_thread_t));
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	tsd_set(taskq_tsd, NULL);
+	thread_exit();
+
+	return (0);
+}
+
+static taskq_thread_t *
+taskq_thread_create(taskq_t *tq)
+{
+	static int last_used_cpu = 0;
+	taskq_thread_t *tqt;
+
+	tqt = kmem_alloc(sizeof (*tqt), KM_PUSHPAGE);
+	INIT_LIST_HEAD(&tqt->tqt_thread_list);
+	INIT_LIST_HEAD(&tqt->tqt_active_list);
+	tqt->tqt_tq = tq;
+	tqt->tqt_id = TASKQID_INVALID;
+
+	tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
+	    "%s", tq->tq_name);
+	if (tqt->tqt_thread == NULL) {
+		kmem_free(tqt, sizeof (taskq_thread_t));
+		return (NULL);
+	}
+
+	if (spl_taskq_thread_bind) {
+		last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
+		kthread_bind(tqt->tqt_thread, last_used_cpu);
+	}
+
+	if (spl_taskq_thread_priority)
+		set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
+
+	wake_up_process(tqt->tqt_thread);
+
+	return (tqt);
+}
+
+taskq_t *
+taskq_create(const char *name, int threads_arg, pri_t pri,
+    int minalloc, int maxalloc, uint_t flags)
+{
+	taskq_t *tq;
+	taskq_thread_t *tqt;
+	int count = 0, rc = 0, i;
+	unsigned long irqflags;
+	int nthreads = threads_arg;
+
+	ASSERT(name != NULL);
+	ASSERT(minalloc >= 0);
+	ASSERT(maxalloc <= INT_MAX);
+	ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
+
+	/* Scale the number of threads using nthreads as a percentage */
+	if (flags & TASKQ_THREADS_CPU_PCT) {
+		ASSERT(nthreads <= 100);
+		ASSERT(nthreads >= 0);
+		nthreads = MIN(threads_arg, 100);
+		nthreads = MAX(nthreads, 0);
+		nthreads = MAX((num_online_cpus() * nthreads) /100, 1);
+	}
+
+	tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
+	if (tq == NULL)
+		return (NULL);
+
+	tq->tq_hp_support = B_FALSE;
+#ifdef HAVE_CPU_HOTPLUG
+	if (flags & TASKQ_THREADS_CPU_PCT) {
+		tq->tq_hp_support = B_TRUE;
+		if (cpuhp_state_add_instance_nocalls(spl_taskq_cpuhp_state,
+		    &tq->tq_hp_cb_node) != 0) {
+			kmem_free(tq, sizeof (*tq));
+			return (NULL);
+		}
+	}
+#endif
+
+	spin_lock_init(&tq->tq_lock);
+	INIT_LIST_HEAD(&tq->tq_thread_list);
+	INIT_LIST_HEAD(&tq->tq_active_list);
+	tq->tq_name = kmem_strdup(name);
+	tq->tq_nactive = 0;
+	tq->tq_nthreads = 0;
+	tq->tq_nspawn = 0;
+	tq->tq_maxthreads = nthreads;
+	tq->tq_cpu_pct = threads_arg;
+	tq->tq_pri = pri;
+	tq->tq_minalloc = minalloc;
+	tq->tq_maxalloc = maxalloc;
+	tq->tq_nalloc = 0;
+	tq->tq_flags = (flags | TASKQ_ACTIVE);
+	tq->tq_next_id = TASKQID_INITIAL;
+	tq->tq_lowest_id = TASKQID_INITIAL;
+	INIT_LIST_HEAD(&tq->tq_free_list);
+	INIT_LIST_HEAD(&tq->tq_pend_list);
+	INIT_LIST_HEAD(&tq->tq_prio_list);
+	INIT_LIST_HEAD(&tq->tq_delay_list);
+	init_waitqueue_head(&tq->tq_work_waitq);
+	init_waitqueue_head(&tq->tq_wait_waitq);
+	tq->tq_lock_class = TQ_LOCK_GENERAL;
+	INIT_LIST_HEAD(&tq->tq_taskqs);
+
+	if (flags & TASKQ_PREPOPULATE) {
+		spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
+		    tq->tq_lock_class);
+
+		for (i = 0; i < minalloc; i++)
+			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
+			    &irqflags));
+
+		spin_unlock_irqrestore(&tq->tq_lock, irqflags);
+	}
+
+	if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
+		nthreads = 1;
+
+	for (i = 0; i < nthreads; i++) {
+		tqt = taskq_thread_create(tq);
+		if (tqt == NULL)
+			rc = 1;
+		else
+			count++;
+	}
+
+	/* Wait for all threads to be started before potential destroy */
+	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
+	/*
+	 * taskq_thread might have touched nspawn, but we don't want them to
+	 * because they're not dynamically spawned. So we reset it to 0
+	 */
+	tq->tq_nspawn = 0;
+
+	if (rc) {
+		taskq_destroy(tq);
+		tq = NULL;
+	} else {
+		down_write(&tq_list_sem);
+		tq->tq_instance = taskq_find_by_name(name) + 1;
+		list_add_tail(&tq->tq_taskqs, &tq_list);
+		up_write(&tq_list_sem);
+	}
+
+	return (tq);
+}
+EXPORT_SYMBOL(taskq_create);
+
+void
+taskq_destroy(taskq_t *tq)
+{
+	struct task_struct *thread;
+	taskq_thread_t *tqt;
+	taskq_ent_t *t;
+	unsigned long flags;
+
+	ASSERT(tq);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	tq->tq_flags &= ~TASKQ_ACTIVE;
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+#ifdef HAVE_CPU_HOTPLUG
+	if (tq->tq_hp_support) {
+		VERIFY0(cpuhp_state_remove_instance_nocalls(
+		    spl_taskq_cpuhp_state, &tq->tq_hp_cb_node));
+	}
+#endif
+	/*
+	 * When TASKQ_ACTIVE is clear new tasks may not be added nor may
+	 * new worker threads be spawned for dynamic taskq.
+	 */
+	if (dynamic_taskq != NULL)
+		taskq_wait_outstanding(dynamic_taskq, 0);
+
+	taskq_wait(tq);
+
+	/* remove taskq from global list used by the kstats */
+	down_write(&tq_list_sem);
+	list_del(&tq->tq_taskqs);
+	up_write(&tq_list_sem);
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	/* wait for spawning threads to insert themselves to the list */
+	while (tq->tq_nspawn) {
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		schedule_timeout_interruptible(1);
+		spin_lock_irqsave_nested(&tq->tq_lock, flags,
+		    tq->tq_lock_class);
+	}
+
+	/*
+	 * Signal each thread to exit and block until it does.  Each thread
+	 * is responsible for removing itself from the list and freeing its
+	 * taskq_thread_t.  This allows for idle threads to opt to remove
+	 * themselves from the taskq.  They can be recreated as needed.
+	 */
+	while (!list_empty(&tq->tq_thread_list)) {
+		tqt = list_entry(tq->tq_thread_list.next,
+		    taskq_thread_t, tqt_thread_list);
+		thread = tqt->tqt_thread;
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+		kthread_stop(thread);
+
+		spin_lock_irqsave_nested(&tq->tq_lock, flags,
+		    tq->tq_lock_class);
+	}
+
+	while (!list_empty(&tq->tq_free_list)) {
+		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
+
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
+		list_del_init(&t->tqent_list);
+		task_free(tq, t);
+	}
+
+	ASSERT0(tq->tq_nthreads);
+	ASSERT0(tq->tq_nalloc);
+	ASSERT0(tq->tq_nspawn);
+	ASSERT(list_empty(&tq->tq_thread_list));
+	ASSERT(list_empty(&tq->tq_active_list));
+	ASSERT(list_empty(&tq->tq_free_list));
+	ASSERT(list_empty(&tq->tq_pend_list));
+	ASSERT(list_empty(&tq->tq_prio_list));
+	ASSERT(list_empty(&tq->tq_delay_list));
+
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	kmem_strfree(tq->tq_name);
+	kmem_free(tq, sizeof (taskq_t));
+}
+EXPORT_SYMBOL(taskq_destroy);
+
+static unsigned int spl_taskq_kick = 0;
+
+/*
+ * 2.6.36 API Change
+ * module_param_cb is introduced to take kernel_param_ops and
+ * module_param_call is marked as obsolete. Also set and get operations
+ * were changed to take a 'const struct kernel_param *'.
+ */
+static int
+#ifdef module_param_cb
+param_set_taskq_kick(const char *val, const struct kernel_param *kp)
+#else
+param_set_taskq_kick(const char *val, struct kernel_param *kp)
+#endif
+{
+	int ret;
+	taskq_t *tq = NULL;
+	taskq_ent_t *t;
+	unsigned long flags;
+
+	ret = param_set_uint(val, kp);
+	if (ret < 0 || !spl_taskq_kick)
+		return (ret);
+	/* reset value */
+	spl_taskq_kick = 0;
+
+	down_read(&tq_list_sem);
+	list_for_each_entry(tq, &tq_list, tq_taskqs) {
+		spin_lock_irqsave_nested(&tq->tq_lock, flags,
+		    tq->tq_lock_class);
+		/* Check if the first pending is older than 5 seconds */
+		t = taskq_next_ent(tq);
+		if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
+			(void) taskq_thread_spawn(tq);
+			printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
+			    tq->tq_name, tq->tq_instance);
+		}
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+	}
+	up_read(&tq_list_sem);
+	return (ret);
+}
+
+#ifdef module_param_cb
+static const struct kernel_param_ops param_ops_taskq_kick = {
+	.set = param_set_taskq_kick,
+	.get = param_get_uint,
+};
+module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
+#else
+module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
+	&spl_taskq_kick, 0644);
+#endif
+MODULE_PARM_DESC(spl_taskq_kick,
+	"Write nonzero to kick stuck taskqs to spawn more threads");
+
+#ifdef HAVE_CPU_HOTPLUG
+/*
+ * This callback will be called exactly once for each core that comes online,
+ * for each dynamic taskq. We attempt to expand taskqs that have
+ * TASKQ_THREADS_CPU_PCT set. We need to redo the percentage calculation every
+ * time, to correctly determine whether or not to add a thread.
+ */
+static int
+spl_taskq_expand(unsigned int cpu, struct hlist_node *node)
+{
+	taskq_t *tq = list_entry(node, taskq_t, tq_hp_cb_node);
+	unsigned long flags;
+	int err = 0;
+
+	ASSERT(tq);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+
+	if (!(tq->tq_flags & TASKQ_ACTIVE)) {
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		return (err);
+	}
+
+	ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
+	int nthreads = MIN(tq->tq_cpu_pct, 100);
+	nthreads = MAX(((num_online_cpus() + 1) * nthreads) / 100, 1);
+	tq->tq_maxthreads = nthreads;
+
+	if (!((tq->tq_flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic) &&
+	    tq->tq_maxthreads > tq->tq_nthreads) {
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		taskq_thread_t *tqt = taskq_thread_create(tq);
+		if (tqt == NULL)
+			err = -1;
+		return (err);
+	}
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+	return (err);
+}
+
+/*
+ * While we don't support offlining CPUs, it is possible that CPUs will fail
+ * to online successfully. We do need to be able to handle this case
+ * gracefully.
+ */
+static int
+spl_taskq_prepare_down(unsigned int cpu, struct hlist_node *node)
+{
+	taskq_t *tq = list_entry(node, taskq_t, tq_hp_cb_node);
+	unsigned long flags;
+
+	ASSERT(tq);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+
+	if (!(tq->tq_flags & TASKQ_ACTIVE))
+		goto out;
+
+	ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
+	int nthreads = MIN(tq->tq_cpu_pct, 100);
+	nthreads = MAX(((num_online_cpus()) * nthreads) / 100, 1);
+	tq->tq_maxthreads = nthreads;
+
+	if (!((tq->tq_flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic) &&
+	    tq->tq_maxthreads < tq->tq_nthreads) {
+		ASSERT3U(tq->tq_maxthreads, ==, tq->tq_nthreads - 1);
+		taskq_thread_t *tqt = list_entry(tq->tq_thread_list.next,
+		    taskq_thread_t, tqt_thread_list);
+		struct task_struct *thread = tqt->tqt_thread;
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+		kthread_stop(thread);
+
+		return (0);
+	}
+
+out:
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+	return (0);
+}
+#endif
+
+int
+spl_taskq_init(void)
+{
+	init_rwsem(&tq_list_sem);
+	tsd_create(&taskq_tsd, NULL);
+
+#ifdef HAVE_CPU_HOTPLUG
+	spl_taskq_cpuhp_state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+	    "fs/spl_taskq:online", spl_taskq_expand, spl_taskq_prepare_down);
+#endif
+
+	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
+	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+	if (system_taskq == NULL)
+		return (1);
+
+	system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
+	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+	if (system_delay_taskq == NULL) {
+#ifdef HAVE_CPU_HOTPLUG
+		cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
+#endif
+		taskq_destroy(system_taskq);
+		return (1);
+	}
+
+	dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
+	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
+	if (dynamic_taskq == NULL) {
+#ifdef HAVE_CPU_HOTPLUG
+		cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
+#endif
+		taskq_destroy(system_taskq);
+		taskq_destroy(system_delay_taskq);
+		return (1);
+	}
+
+	/*
+	 * This is used to annotate tq_lock, so
+	 *   taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
+	 * does not trigger a lockdep warning re: possible recursive locking
+	 */
+	dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
+
+	return (0);
+}
+
+void
+spl_taskq_fini(void)
+{
+	taskq_destroy(dynamic_taskq);
+	dynamic_taskq = NULL;
+
+	taskq_destroy(system_delay_taskq);
+	system_delay_taskq = NULL;
+
+	taskq_destroy(system_taskq);
+	system_taskq = NULL;
+
+	tsd_destroy(&taskq_tsd);
+
+#ifdef HAVE_CPU_HOTPLUG
+	cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
+	spl_taskq_cpuhp_state = 0;
+#endif
+}

diff --git a/zfs/module/os/linux/spl/spl-thread.c b/zfs/module/os/linux/spl/spl-thread.c
new file mode 100644
index 0000000..16d2ca1
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-thread.c

@@ -0,0 +1,216 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) Thread Implementation.
+ */
+
+#include <sys/thread.h>
+#include <sys/kmem.h>
+#include <sys/tsd.h>
+
+/*
+ * Thread interfaces
+ */
+typedef struct thread_priv_s {
+	unsigned long tp_magic;		/* Magic */
+	int tp_name_size;		/* Name size */
+	char *tp_name;			/* Name (without _thread suffix) */
+	void (*tp_func)(void *);	/* Registered function */
+	void *tp_args;			/* Args to be passed to function */
+	size_t tp_len;			/* Len to be passed to function */
+	int tp_state;			/* State to start thread at */
+	pri_t tp_pri;			/* Priority to start threat at */
+} thread_priv_t;
+
+static int
+thread_generic_wrapper(void *arg)
+{
+	thread_priv_t *tp = (thread_priv_t *)arg;
+	void (*func)(void *);
+	void *args;
+
+	ASSERT(tp->tp_magic == TP_MAGIC);
+	func = tp->tp_func;
+	args = tp->tp_args;
+	set_current_state(tp->tp_state);
+	set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
+	kmem_free(tp->tp_name, tp->tp_name_size);
+	kmem_free(tp, sizeof (thread_priv_t));
+
+	if (func)
+		func(args);
+
+	return (0);
+}
+
+void
+__thread_exit(void)
+{
+	tsd_exit();
+	SPL_KTHREAD_COMPLETE_AND_EXIT(NULL, 0);
+	/* Unreachable */
+}
+EXPORT_SYMBOL(__thread_exit);
+
+/*
+ * thread_create() may block forever if it cannot create a thread or
+ * allocate memory.  This is preferable to returning a NULL which Solaris
+ * style callers likely never check for... since it can't fail.
+ */
+kthread_t *
+__thread_create(caddr_t stk, size_t  stksize, thread_func_t func,
+    const char *name, void *args, size_t len, proc_t *pp, int state, pri_t pri)
+{
+	thread_priv_t *tp;
+	struct task_struct *tsk;
+	char *p;
+
+	/* Option pp is simply ignored */
+	/* Variable stack size unsupported */
+	ASSERT(stk == NULL);
+
+	tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE);
+	if (tp == NULL)
+		return (NULL);
+
+	tp->tp_magic = TP_MAGIC;
+	tp->tp_name_size = strlen(name) + 1;
+
+	tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
+	if (tp->tp_name == NULL) {
+		kmem_free(tp, sizeof (thread_priv_t));
+		return (NULL);
+	}
+
+	strncpy(tp->tp_name, name, tp->tp_name_size);
+
+	/*
+	 * Strip trailing "_thread" from passed name which will be the func
+	 * name since the exposed API has no parameter for passing a name.
+	 */
+	p = strstr(tp->tp_name, "_thread");
+	if (p)
+		p[0] = '\0';
+
+	tp->tp_func  = func;
+	tp->tp_args  = args;
+	tp->tp_len   = len;
+	tp->tp_state = state;
+	tp->tp_pri   = pri;
+
+	tsk = spl_kthread_create(thread_generic_wrapper, (void *)tp,
+	    "%s", tp->tp_name);
+	if (IS_ERR(tsk))
+		return (NULL);
+
+	wake_up_process(tsk);
+	return ((kthread_t *)tsk);
+}
+EXPORT_SYMBOL(__thread_create);
+
+/*
+ * spl_kthread_create - Wrapper providing pre-3.13 semantics for
+ * kthread_create() in which it is not killable and less likely
+ * to return -ENOMEM.
+ */
+struct task_struct *
+spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...)
+{
+	struct task_struct *tsk;
+	va_list args;
+	char name[TASK_COMM_LEN];
+
+	va_start(args, namefmt);
+	vsnprintf(name, sizeof (name), namefmt, args);
+	va_end(args);
+	do {
+		tsk = kthread_create(func, data, "%s", name);
+		if (IS_ERR(tsk)) {
+			if (signal_pending(current)) {
+				clear_thread_flag(TIF_SIGPENDING);
+				continue;
+			}
+			if (PTR_ERR(tsk) == -ENOMEM)
+				continue;
+			return (NULL);
+		} else {
+			return (tsk);
+		}
+	} while (1);
+}
+EXPORT_SYMBOL(spl_kthread_create);
+
+/*
+ * The "why" argument indicates the allowable side-effects of the call:
+ *
+ * FORREAL:  Extract the next pending signal from p_sig into p_cursig;
+ * stop the process if a stop has been requested or if a traced signal
+ * is pending.
+ *
+ * JUSTLOOKING:  Don't stop the process, just indicate whether or not
+ * a signal might be pending (FORREAL is needed to tell for sure).
+ */
+int
+issig(int why)
+{
+	ASSERT(why == FORREAL || why == JUSTLOOKING);
+
+	if (!signal_pending(current))
+		return (0);
+
+	if (why != FORREAL)
+		return (1);
+
+	struct task_struct *task = current;
+	spl_kernel_siginfo_t __info;
+	sigset_t set;
+	siginitsetinv(&set, 1ULL << (SIGSTOP - 1) | 1ULL << (SIGTSTP - 1));
+	sigorsets(&set, &task->blocked, &set);
+
+	spin_lock_irq(&task->sighand->siglock);
+	int ret;
+#ifdef HAVE_DEQUEUE_SIGNAL_4ARG
+	enum pid_type __type;
+	if ((ret = dequeue_signal(task, &set, &__info, &__type)) != 0) {
+#else
+	if ((ret = dequeue_signal(task, &set, &__info)) != 0) {
+#endif
+#ifdef HAVE_SIGNAL_STOP
+		spin_unlock_irq(&task->sighand->siglock);
+		kernel_signal_stop();
+#else
+		if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+			spl_set_special_state(TASK_STOPPED);
+
+		spin_unlock_irq(&current->sighand->siglock);
+
+		schedule();
+#endif
+		return (0);
+	}
+
+	spin_unlock_irq(&task->sighand->siglock);
+
+	return (1);
+}
+
+EXPORT_SYMBOL(issig);

diff --git a/zfs/module/os/linux/spl/spl-trace.c b/zfs/module/os/linux/spl/spl-trace.c
new file mode 100644
index 0000000..7912a38
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-trace.c

@@ -0,0 +1,33 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Each DTRACE_PROBE must define its trace point in one (and only one)
+ * source file, so this dummy file exists for that purpose.
+ */
+
+#include <sys/taskq.h>
+
+#ifdef _KERNEL
+#define	CREATE_TRACE_POINTS
+#include <sys/trace.h>
+#include <sys/trace_taskq.h>
+#endif

diff --git a/zfs/module/os/linux/spl/spl-tsd.c b/zfs/module/os/linux/spl/spl-tsd.c
new file mode 100644
index 0000000..546db9a
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-tsd.c

@@ -0,0 +1,719 @@
+/*
+ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ *  Solaris Porting Layer (SPL) Thread Specific Data Implementation.
+ *
+ *  Thread specific data has implemented using a hash table, this avoids
+ *  the need to add a member to the task structure and allows maximum
+ *  portability between kernels.  This implementation has been optimized
+ *  to keep the tsd_set() and tsd_get() times as small as possible.
+ *
+ *  The majority of the entries in the hash table are for specific tsd
+ *  entries.  These entries are hashed by the product of their key and
+ *  pid because by design the key and pid are guaranteed to be unique.
+ *  Their product also has the desirable properly that it will be uniformly
+ *  distributed over the hash bins providing neither the pid nor key is zero.
+ *  Under linux the zero pid is always the init process and thus won't be
+ *  used, and this implementation is careful to never to assign a zero key.
+ *  By default the hash table is sized to 512 bins which is expected to
+ *  be sufficient for light to moderate usage of thread specific data.
+ *
+ *  The hash table contains two additional type of entries.  They first
+ *  type is entry is called a 'key' entry and it is added to the hash during
+ *  tsd_create().  It is used to store the address of the destructor function
+ *  and it is used as an anchor point.  All tsd entries which use the same
+ *  key will be linked to this entry.  This is used during tsd_destroy() to
+ *  quickly call the destructor function for all tsd associated with the key.
+ *  The 'key' entry may be looked up with tsd_hash_search() by passing the
+ *  key you wish to lookup and DTOR_PID constant as the pid.
+ *
+ *  The second type of entry is called a 'pid' entry and it is added to the
+ *  hash the first time a process set a key.  The 'pid' entry is also used
+ *  as an anchor and all tsd for the process will be linked to it.  This
+ *  list is using during tsd_exit() to ensure all registered destructors
+ *  are run for the process.  The 'pid' entry may be looked up with
+ *  tsd_hash_search() by passing the PID_KEY constant as the key, and
+ *  the process pid.  Note that tsd_exit() is called by thread_exit()
+ *  so if your using the Solaris thread API you should not need to call
+ *  tsd_exit() directly.
+ *
+ */
+
+#include <sys/kmem.h>
+#include <sys/thread.h>
+#include <sys/tsd.h>
+#include <linux/hash.h>
+
+typedef struct tsd_hash_bin {
+	spinlock_t		hb_lock;
+	struct hlist_head	hb_head;
+} tsd_hash_bin_t;
+
+typedef struct tsd_hash_table {
+	spinlock_t		ht_lock;
+	uint_t			ht_bits;
+	uint_t			ht_key;
+	tsd_hash_bin_t		*ht_bins;
+} tsd_hash_table_t;
+
+typedef struct tsd_hash_entry {
+	uint_t			he_key;
+	pid_t			he_pid;
+	dtor_func_t		he_dtor;
+	void			*he_value;
+	struct hlist_node	he_list;
+	struct list_head	he_key_list;
+	struct list_head	he_pid_list;
+} tsd_hash_entry_t;
+
+static tsd_hash_table_t *tsd_hash_table = NULL;
+
+
+/*
+ * tsd_hash_search - searches hash table for tsd_hash_entry
+ * @table: hash table
+ * @key: search key
+ * @pid: search pid
+ */
+static tsd_hash_entry_t *
+tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
+{
+	struct hlist_node *node = NULL;
+	tsd_hash_entry_t *entry;
+	tsd_hash_bin_t *bin;
+	ulong_t hash;
+
+	hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
+	bin = &table->ht_bins[hash];
+	spin_lock(&bin->hb_lock);
+	hlist_for_each(node, &bin->hb_head) {
+		entry = list_entry(node, tsd_hash_entry_t, he_list);
+		if ((entry->he_key == key) && (entry->he_pid == pid)) {
+			spin_unlock(&bin->hb_lock);
+			return (entry);
+		}
+	}
+
+	spin_unlock(&bin->hb_lock);
+	return (NULL);
+}
+
+/*
+ * tsd_hash_dtor - call the destructor and free all entries on the list
+ * @work: list of hash entries
+ *
+ * For a list of entries which have all already been removed from the
+ * hash call their registered destructor then free the associated memory.
+ */
+static void
+tsd_hash_dtor(struct hlist_head *work)
+{
+	tsd_hash_entry_t *entry;
+
+	while (!hlist_empty(work)) {
+		entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
+		hlist_del(&entry->he_list);
+
+		if (entry->he_dtor && entry->he_pid != DTOR_PID)
+			entry->he_dtor(entry->he_value);
+
+		kmem_free(entry, sizeof (tsd_hash_entry_t));
+	}
+}
+
+/*
+ * tsd_hash_add - adds an entry to hash table
+ * @table: hash table
+ * @key: search key
+ * @pid: search pid
+ *
+ * The caller is responsible for ensuring the unique key/pid do not
+ * already exist in the hash table.  This possible because all entries
+ * are thread specific thus a concurrent thread will never attempt to
+ * add this key/pid.  Because multiple bins must be checked to add
+ * links to the dtor and pid entries the entire table is locked.
+ */
+static int
+tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
+{
+	tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
+	tsd_hash_bin_t *bin;
+	ulong_t hash;
+	int rc = 0;
+
+	ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
+
+	/* New entry allocate structure, set value, and add to hash */
+	entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
+	if (entry == NULL)
+		return (ENOMEM);
+
+	entry->he_key = key;
+	entry->he_pid = pid;
+	entry->he_value = value;
+	INIT_HLIST_NODE(&entry->he_list);
+	INIT_LIST_HEAD(&entry->he_key_list);
+	INIT_LIST_HEAD(&entry->he_pid_list);
+
+	spin_lock(&table->ht_lock);
+
+	/* Destructor entry must exist for all valid keys */
+	dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
+	ASSERT3P(dtor_entry, !=, NULL);
+	entry->he_dtor = dtor_entry->he_dtor;
+
+	/* Process entry must exist for all valid processes */
+	pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
+	ASSERT3P(pid_entry, !=, NULL);
+
+	hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
+	bin = &table->ht_bins[hash];
+	spin_lock(&bin->hb_lock);
+
+	/* Add to the hash, key, and pid lists */
+	hlist_add_head(&entry->he_list, &bin->hb_head);
+	list_add(&entry->he_key_list, &dtor_entry->he_key_list);
+	list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
+
+	spin_unlock(&bin->hb_lock);
+	spin_unlock(&table->ht_lock);
+
+	return (rc);
+}
+
+/*
+ * tsd_hash_add_key - adds a destructor entry to the hash table
+ * @table: hash table
+ * @keyp: search key
+ * @dtor: key destructor
+ *
+ * For every unique key there is a single entry in the hash which is used
+ * as anchor.  All other thread specific entries for this key are linked
+ * to this anchor via the 'he_key_list' list head.  On return they keyp
+ * will be set to the next available key for the hash table.
+ */
+static int
+tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
+{
+	tsd_hash_entry_t *tmp_entry, *entry;
+	tsd_hash_bin_t *bin;
+	ulong_t hash;
+	int keys_checked = 0;
+
+	ASSERT3P(table, !=, NULL);
+
+	/* Allocate entry to be used as a destructor for this key */
+	entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
+	if (entry == NULL)
+		return (ENOMEM);
+
+	/* Determine next available key value */
+	spin_lock(&table->ht_lock);
+	do {
+		/* Limited to TSD_KEYS_MAX concurrent unique keys */
+		if (table->ht_key++ > TSD_KEYS_MAX)
+			table->ht_key = 1;
+
+		/* Ensure failure when all TSD_KEYS_MAX keys are in use */
+		if (keys_checked++ >= TSD_KEYS_MAX) {
+			spin_unlock(&table->ht_lock);
+			return (ENOENT);
+		}
+
+		tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
+	} while (tmp_entry);
+
+	/* Add destructor entry in to hash table */
+	entry->he_key = *keyp = table->ht_key;
+	entry->he_pid = DTOR_PID;
+	entry->he_dtor = dtor;
+	entry->he_value = NULL;
+	INIT_HLIST_NODE(&entry->he_list);
+	INIT_LIST_HEAD(&entry->he_key_list);
+	INIT_LIST_HEAD(&entry->he_pid_list);
+
+	hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
+	bin = &table->ht_bins[hash];
+	spin_lock(&bin->hb_lock);
+
+	hlist_add_head(&entry->he_list, &bin->hb_head);
+
+	spin_unlock(&bin->hb_lock);
+	spin_unlock(&table->ht_lock);
+
+	return (0);
+}
+
+/*
+ * tsd_hash_add_pid - adds a process entry to the hash table
+ * @table: hash table
+ * @pid: search pid
+ *
+ * For every process there is a single entry in the hash which is used
+ * as anchor.  All other thread specific entries for this process are
+ * linked to this anchor via the 'he_pid_list' list head.
+ */
+static int
+tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
+{
+	tsd_hash_entry_t *entry;
+	tsd_hash_bin_t *bin;
+	ulong_t hash;
+
+	/* Allocate entry to be used as the process reference */
+	entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
+	if (entry == NULL)
+		return (ENOMEM);
+
+	spin_lock(&table->ht_lock);
+	entry->he_key = PID_KEY;
+	entry->he_pid = pid;
+	entry->he_dtor = NULL;
+	entry->he_value = NULL;
+	INIT_HLIST_NODE(&entry->he_list);
+	INIT_LIST_HEAD(&entry->he_key_list);
+	INIT_LIST_HEAD(&entry->he_pid_list);
+
+	hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
+	bin = &table->ht_bins[hash];
+	spin_lock(&bin->hb_lock);
+
+	hlist_add_head(&entry->he_list, &bin->hb_head);
+
+	spin_unlock(&bin->hb_lock);
+	spin_unlock(&table->ht_lock);
+
+	return (0);
+}
+
+/*
+ * tsd_hash_del - delete an entry from hash table, key, and pid lists
+ * @table: hash table
+ * @key: search key
+ * @pid: search pid
+ */
+static void
+tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
+{
+	hlist_del(&entry->he_list);
+	list_del_init(&entry->he_key_list);
+	list_del_init(&entry->he_pid_list);
+}
+
+/*
+ * tsd_hash_table_init - allocate a hash table
+ * @bits: hash table size
+ *
+ * A hash table with 2^bits bins will be created, it may not be resized
+ * after the fact and must be free'd with tsd_hash_table_fini().
+ */
+static tsd_hash_table_t *
+tsd_hash_table_init(uint_t bits)
+{
+	tsd_hash_table_t *table;
+	int hash, size = (1 << bits);
+
+	table = kmem_zalloc(sizeof (tsd_hash_table_t), KM_SLEEP);
+	if (table == NULL)
+		return (NULL);
+
+	table->ht_bins = kmem_zalloc(sizeof (tsd_hash_bin_t) * size, KM_SLEEP);
+	if (table->ht_bins == NULL) {
+		kmem_free(table, sizeof (tsd_hash_table_t));
+		return (NULL);
+	}
+
+	for (hash = 0; hash < size; hash++) {
+		spin_lock_init(&table->ht_bins[hash].hb_lock);
+		INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
+	}
+
+	spin_lock_init(&table->ht_lock);
+	table->ht_bits = bits;
+	table->ht_key = 1;
+
+	return (table);
+}
+
+/*
+ * tsd_hash_table_fini - free a hash table
+ * @table: hash table
+ *
+ * Free a hash table allocated by tsd_hash_table_init().  If the hash
+ * table is not empty this function will call the proper destructor for
+ * all remaining entries before freeing the memory used by those entries.
+ */
+static void
+tsd_hash_table_fini(tsd_hash_table_t *table)
+{
+	HLIST_HEAD(work);
+	tsd_hash_bin_t *bin;
+	tsd_hash_entry_t *entry;
+	int size, i;
+
+	ASSERT3P(table, !=, NULL);
+	spin_lock(&table->ht_lock);
+	for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
+		bin = &table->ht_bins[i];
+		spin_lock(&bin->hb_lock);
+		while (!hlist_empty(&bin->hb_head)) {
+			entry = hlist_entry(bin->hb_head.first,
+			    tsd_hash_entry_t, he_list);
+			tsd_hash_del(table, entry);
+			hlist_add_head(&entry->he_list, &work);
+		}
+		spin_unlock(&bin->hb_lock);
+	}
+	spin_unlock(&table->ht_lock);
+
+	tsd_hash_dtor(&work);
+	kmem_free(table->ht_bins, sizeof (tsd_hash_bin_t)*(1<<table->ht_bits));
+	kmem_free(table, sizeof (tsd_hash_table_t));
+}
+
+/*
+ * tsd_remove_entry - remove a tsd entry for this thread
+ * @entry: entry to remove
+ *
+ * Remove the thread specific data @entry for this thread.
+ * If this is the last entry for this thread, also remove the PID entry.
+ */
+static void
+tsd_remove_entry(tsd_hash_entry_t *entry)
+{
+	HLIST_HEAD(work);
+	tsd_hash_table_t *table;
+	tsd_hash_entry_t *pid_entry;
+	tsd_hash_bin_t *pid_entry_bin, *entry_bin;
+	ulong_t hash;
+
+	table = tsd_hash_table;
+	ASSERT3P(table, !=, NULL);
+	ASSERT3P(entry, !=, NULL);
+
+	spin_lock(&table->ht_lock);
+
+	hash = hash_long((ulong_t)entry->he_key *
+	    (ulong_t)entry->he_pid, table->ht_bits);
+	entry_bin = &table->ht_bins[hash];
+
+	/* save the possible pid_entry */
+	pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
+	    he_pid_list);
+
+	/* remove entry */
+	spin_lock(&entry_bin->hb_lock);
+	tsd_hash_del(table, entry);
+	hlist_add_head(&entry->he_list, &work);
+	spin_unlock(&entry_bin->hb_lock);
+
+	/* if pid_entry is indeed pid_entry, then remove it if it's empty */
+	if (pid_entry->he_key == PID_KEY &&
+	    list_empty(&pid_entry->he_pid_list)) {
+		hash = hash_long((ulong_t)pid_entry->he_key *
+		    (ulong_t)pid_entry->he_pid, table->ht_bits);
+		pid_entry_bin = &table->ht_bins[hash];
+
+		spin_lock(&pid_entry_bin->hb_lock);
+		tsd_hash_del(table, pid_entry);
+		hlist_add_head(&pid_entry->he_list, &work);
+		spin_unlock(&pid_entry_bin->hb_lock);
+	}
+
+	spin_unlock(&table->ht_lock);
+
+	tsd_hash_dtor(&work);
+}
+
+/*
+ * tsd_set - set thread specific data
+ * @key: lookup key
+ * @value: value to set
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy(), protected
+ * from racing tsd_get() or tsd_set() because it is thread specific.
+ * This function has been optimized to be fast for the update case.
+ * When setting the tsd initially it will be slower due to additional
+ * required locking and potential memory allocations.
+ */
+int
+tsd_set(uint_t key, void *value)
+{
+	tsd_hash_table_t *table;
+	tsd_hash_entry_t *entry;
+	pid_t pid;
+	int rc;
+	/* mark remove if value is NULL */
+	boolean_t remove = (value == NULL);
+
+	table = tsd_hash_table;
+	pid = curthread->pid;
+	ASSERT3P(table, !=, NULL);
+
+	if ((key == 0) || (key > TSD_KEYS_MAX))
+		return (EINVAL);
+
+	/* Entry already exists in hash table update value */
+	entry = tsd_hash_search(table, key, pid);
+	if (entry) {
+		entry->he_value = value;
+		/* remove the entry */
+		if (remove)
+			tsd_remove_entry(entry);
+		return (0);
+	}
+
+	/* don't create entry if value is NULL */
+	if (remove)
+		return (0);
+
+	/* Add a process entry to the hash if not yet exists */
+	entry = tsd_hash_search(table, PID_KEY, pid);
+	if (entry == NULL) {
+		rc = tsd_hash_add_pid(table, pid);
+		if (rc)
+			return (rc);
+	}
+
+	rc = tsd_hash_add(table, key, pid, value);
+	return (rc);
+}
+EXPORT_SYMBOL(tsd_set);
+
+/*
+ * tsd_get - get thread specific data
+ * @key: lookup key
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy().  This
+ * implementation is designed to be fast and scalable, it does not
+ * lock the entire table only a single hash bin.
+ */
+void *
+tsd_get(uint_t key)
+{
+	tsd_hash_entry_t *entry;
+
+	ASSERT3P(tsd_hash_table, !=, NULL);
+
+	if ((key == 0) || (key > TSD_KEYS_MAX))
+		return (NULL);
+
+	entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
+	if (entry == NULL)
+		return (NULL);
+
+	return (entry->he_value);
+}
+EXPORT_SYMBOL(tsd_get);
+
+/*
+ * tsd_get_by_thread - get thread specific data for specified thread
+ * @key: lookup key
+ * @thread: thread to lookup
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy().  This
+ * implementation is designed to be fast and scalable, it does not
+ * lock the entire table only a single hash bin.
+ */
+void *
+tsd_get_by_thread(uint_t key, kthread_t *thread)
+{
+	tsd_hash_entry_t *entry;
+
+	ASSERT3P(tsd_hash_table, !=, NULL);
+
+	if ((key == 0) || (key > TSD_KEYS_MAX))
+		return (NULL);
+
+	entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
+	if (entry == NULL)
+		return (NULL);
+
+	return (entry->he_value);
+}
+EXPORT_SYMBOL(tsd_get_by_thread);
+
+/*
+ * tsd_create - create thread specific data key
+ * @keyp: lookup key address
+ * @dtor: destructor called during tsd_destroy() or tsd_exit()
+ *
+ * Provided key must be set to 0 or it assumed to be already in use.
+ * The dtor is allowed to be NULL in which case no additional cleanup
+ * for the data is performed during tsd_destroy() or tsd_exit().
+ *
+ * Caller must prevent racing tsd_set() or tsd_get(), this function is
+ * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
+ */
+void
+tsd_create(uint_t *keyp, dtor_func_t dtor)
+{
+	ASSERT3P(keyp, !=, NULL);
+	if (*keyp)
+		return;
+
+	(void) tsd_hash_add_key(tsd_hash_table, keyp, dtor);
+}
+EXPORT_SYMBOL(tsd_create);
+
+/*
+ * tsd_destroy - destroy thread specific data
+ * @keyp: lookup key address
+ *
+ * Destroys the thread specific data on all threads which use this key.
+ *
+ * Caller must prevent racing tsd_set() or tsd_get(), this function is
+ * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
+ */
+void
+tsd_destroy(uint_t *keyp)
+{
+	HLIST_HEAD(work);
+	tsd_hash_table_t *table;
+	tsd_hash_entry_t *dtor_entry, *entry;
+	tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
+	ulong_t hash;
+
+	table = tsd_hash_table;
+	ASSERT3P(table, !=, NULL);
+
+	spin_lock(&table->ht_lock);
+	dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
+	if (dtor_entry == NULL) {
+		spin_unlock(&table->ht_lock);
+		return;
+	}
+
+	/*
+	 * All threads which use this key must be linked off of the
+	 * DTOR_PID entry.  They are removed from the hash table and
+	 * linked in to a private working list to be destroyed.
+	 */
+	while (!list_empty(&dtor_entry->he_key_list)) {
+		entry = list_entry(dtor_entry->he_key_list.next,
+		    tsd_hash_entry_t, he_key_list);
+		ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
+		ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
+
+		hash = hash_long((ulong_t)entry->he_key *
+		    (ulong_t)entry->he_pid, table->ht_bits);
+		entry_bin = &table->ht_bins[hash];
+
+		spin_lock(&entry_bin->hb_lock);
+		tsd_hash_del(table, entry);
+		hlist_add_head(&entry->he_list, &work);
+		spin_unlock(&entry_bin->hb_lock);
+	}
+
+	hash = hash_long((ulong_t)dtor_entry->he_key *
+	    (ulong_t)dtor_entry->he_pid, table->ht_bits);
+	dtor_entry_bin = &table->ht_bins[hash];
+
+	spin_lock(&dtor_entry_bin->hb_lock);
+	tsd_hash_del(table, dtor_entry);
+	hlist_add_head(&dtor_entry->he_list, &work);
+	spin_unlock(&dtor_entry_bin->hb_lock);
+	spin_unlock(&table->ht_lock);
+
+	tsd_hash_dtor(&work);
+	*keyp = 0;
+}
+EXPORT_SYMBOL(tsd_destroy);
+
+/*
+ * tsd_exit - destroys all thread specific data for this thread
+ *
+ * Destroys all the thread specific data for this thread.
+ *
+ * Caller must prevent racing tsd_set() or tsd_get(), this function is
+ * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
+ */
+void
+tsd_exit(void)
+{
+	HLIST_HEAD(work);
+	tsd_hash_table_t *table;
+	tsd_hash_entry_t *pid_entry, *entry;
+	tsd_hash_bin_t *pid_entry_bin, *entry_bin;
+	ulong_t hash;
+
+	table = tsd_hash_table;
+	ASSERT3P(table, !=, NULL);
+
+	spin_lock(&table->ht_lock);
+	pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
+	if (pid_entry == NULL) {
+		spin_unlock(&table->ht_lock);
+		return;
+	}
+
+	/*
+	 * All keys associated with this pid must be linked off of the
+	 * PID_KEY entry.  They are removed from the hash table and
+	 * linked in to a private working list to be destroyed.
+	 */
+
+	while (!list_empty(&pid_entry->he_pid_list)) {
+		entry = list_entry(pid_entry->he_pid_list.next,
+		    tsd_hash_entry_t, he_pid_list);
+		ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
+
+		hash = hash_long((ulong_t)entry->he_key *
+		    (ulong_t)entry->he_pid, table->ht_bits);
+		entry_bin = &table->ht_bins[hash];
+
+		spin_lock(&entry_bin->hb_lock);
+		tsd_hash_del(table, entry);
+		hlist_add_head(&entry->he_list, &work);
+		spin_unlock(&entry_bin->hb_lock);
+	}
+
+	hash = hash_long((ulong_t)pid_entry->he_key *
+	    (ulong_t)pid_entry->he_pid, table->ht_bits);
+	pid_entry_bin = &table->ht_bins[hash];
+
+	spin_lock(&pid_entry_bin->hb_lock);
+	tsd_hash_del(table, pid_entry);
+	hlist_add_head(&pid_entry->he_list, &work);
+	spin_unlock(&pid_entry_bin->hb_lock);
+	spin_unlock(&table->ht_lock);
+
+	tsd_hash_dtor(&work);
+}
+EXPORT_SYMBOL(tsd_exit);
+
+int
+spl_tsd_init(void)
+{
+	tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
+	if (tsd_hash_table == NULL)
+		return (1);
+
+	return (0);
+}
+
+void
+spl_tsd_fini(void)
+{
+	tsd_hash_table_fini(tsd_hash_table);
+	tsd_hash_table = NULL;
+}

diff --git a/zfs/module/os/linux/spl/spl-vmem.c b/zfs/module/os/linux/spl/spl-vmem.c
new file mode 100644
index 0000000..cab3e95
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-vmem.c

@@ -0,0 +1,90 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/percpu_compat.h>
+#include <sys/debug.h>
+#include <sys/vmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/shrinker.h>
+#include <linux/module.h>
+
+/*
+ * Public vmem_alloc(), vmem_zalloc() and vmem_free() interfaces.
+ */
+void *
+spl_vmem_alloc(size_t size, int flags, const char *func, int line)
+{
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+
+	flags |= KM_VMEM;
+
+#if !defined(DEBUG_KMEM)
+	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
+#elif !defined(DEBUG_KMEM_TRACKING)
+	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
+#else
+	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
+#endif
+}
+EXPORT_SYMBOL(spl_vmem_alloc);
+
+void *
+spl_vmem_zalloc(size_t size, int flags, const char *func, int line)
+{
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+
+	flags |= (KM_VMEM | KM_ZERO);
+
+#if !defined(DEBUG_KMEM)
+	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
+#elif !defined(DEBUG_KMEM_TRACKING)
+	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
+#else
+	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
+#endif
+}
+EXPORT_SYMBOL(spl_vmem_zalloc);
+
+void
+spl_vmem_free(const void *buf, size_t size)
+{
+#if !defined(DEBUG_KMEM)
+	return (spl_kmem_free_impl(buf, size));
+#elif !defined(DEBUG_KMEM_TRACKING)
+	return (spl_kmem_free_debug(buf, size));
+#else
+	return (spl_kmem_free_track(buf, size));
+#endif
+}
+EXPORT_SYMBOL(spl_vmem_free);
+
+int
+spl_vmem_init(void)
+{
+	return (0);
+}
+
+void
+spl_vmem_fini(void)
+{
+}

diff --git a/zfs/module/os/linux/spl/spl-xdr.c b/zfs/module/os/linux/spl/spl-xdr.c
new file mode 100644
index 0000000..5e763c2
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-xdr.c

@@ -0,0 +1,512 @@
+/*
+ *  Copyright (c) 2008-2010 Sun Microsystems, Inc.
+ *  Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Solaris Porting Layer (SPL) XDR Implementation.
+ */
+
+#include <linux/string.h>
+#include <sys/kmem.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <rpc/xdr.h>
+
+/*
+ * SPL's XDR mem implementation.
+ *
+ * This is used by libnvpair to serialize/deserialize the name-value pair data
+ * structures into byte arrays in a well-defined and portable manner.
+ *
+ * These data structures are used by the DMU/ZFS to flexibly manipulate various
+ * information in memory and later serialize it/deserialize it to disk.
+ * Examples of usages include the pool configuration, lists of pool and dataset
+ * properties, etc.
+ *
+ * Reference documentation for the XDR representation and XDR operations can be
+ * found in RFC 1832 and xdr(3), respectively.
+ *
+ * ===  Implementation shortcomings ===
+ *
+ * It is assumed that the following C types have the following sizes:
+ *
+ * char/unsigned char:      1 byte
+ * short/unsigned short:    2 bytes
+ * int/unsigned int:        4 bytes
+ * longlong_t/u_longlong_t: 8 bytes
+ *
+ * The C standard allows these types to be larger (and in the case of ints,
+ * shorter), so if that is the case on some compiler/architecture, the build
+ * will fail (on purpose).
+ *
+ * If someone wants to fix the code to work properly on such environments, then:
+ *
+ * 1) Preconditions should be added to xdrmem_enc functions to make sure the
+ *    caller doesn't pass arguments which exceed the expected range.
+ * 2) Functions which take signed integers should be changed to properly do
+ *    sign extension.
+ * 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
+ *    problems than this implementation.
+ *
+ * It is also assumed that:
+ *
+ * 1) Chars have 8 bits.
+ * 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
+ *    memcpy, memset and memcmp.
+ * 3) Arrays passed to xdr_array() are packed and the compiler/architecture
+ *    supports element-sized-aligned memory accesses.
+ * 4) Negative integers are natively stored in two's complement binary
+ *    representation.
+ *
+ * No checks are done for the 4 assumptions above, though.
+ *
+ * === Caller expectations ===
+ *
+ * Existing documentation does not describe the semantics of XDR operations very
+ * well.  Therefore, some assumptions about failure semantics will be made and
+ * will be described below:
+ *
+ * 1) If any encoding operation fails (e.g., due to lack of buffer space), the
+ * the stream should be considered valid only up to the encoding operation
+ * previous to the one that first failed. However, the stream size as returned
+ * by xdr_control() cannot be considered to be strictly correct (it may be
+ * bigger).
+ *
+ * Putting it another way, if there is an encoding failure it's undefined
+ * whether anything is added to the stream in that operation and therefore
+ * neither xdr_control() nor future encoding operations on the same stream can
+ * be relied upon to produce correct results.
+ *
+ * 2) If a decoding operation fails, it's undefined whether anything will be
+ * decoded into passed buffers/pointers during that operation, or what the
+ * values on those buffers will look like.
+ *
+ * Future decoding operations on the same stream will also have similar
+ * undefined behavior.
+ *
+ * 3) When the first decoding operation fails it is OK to trust the results of
+ * previous decoding operations on the same stream, as long as the caller
+ * expects a failure to be possible (e.g. due to end-of-stream).
+ *
+ * However, this is highly discouraged because the caller should know the
+ * stream size and should be coded to expect any decoding failure to be data
+ * corruption due to hardware, accidental or even malicious causes, which should
+ * be handled gracefully in all cases.
+ *
+ * In very rare situations where there are strong reasons to believe the data
+ * can be trusted to be valid and non-tampered with, then the caller may assume
+ * a decoding failure to be a bug (e.g. due to mismatched data types) and may
+ * fail non-gracefully.
+ *
+ * 4) Non-zero padding bytes will cause the decoding operation to fail.
+ *
+ * 5) Zero bytes on string types will also cause the decoding operation to fail.
+ *
+ * 6) It is assumed that either the pointer to the stream buffer given by the
+ * caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
+ * memory accesses.
+ *
+ * 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
+ *
+ * 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
+ * space or MMIO space), the computer may explode.
+ */
+
+static struct xdr_ops xdrmem_encode_ops;
+static struct xdr_ops xdrmem_decode_ops;
+
+void
+xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
+    const enum xdr_op op)
+{
+	switch (op) {
+		case XDR_ENCODE:
+			xdrs->x_ops = &xdrmem_encode_ops;
+			break;
+		case XDR_DECODE:
+			xdrs->x_ops = &xdrmem_decode_ops;
+			break;
+		default:
+			xdrs->x_ops = NULL; /* Let the caller know we failed */
+			return;
+	}
+
+	xdrs->x_op = op;
+	xdrs->x_addr = addr;
+	xdrs->x_addr_end = addr + size;
+
+	if (xdrs->x_addr_end < xdrs->x_addr) {
+		xdrs->x_ops = NULL;
+	}
+}
+EXPORT_SYMBOL(xdrmem_create);
+
+static bool_t
+xdrmem_control(XDR *xdrs, int req, void *info)
+{
+	struct xdr_bytesrec *rec = (struct xdr_bytesrec *)info;
+
+	if (req != XDR_GET_BYTES_AVAIL)
+		return (FALSE);
+
+	rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
+	rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
+{
+	uint_t size = roundup(cnt, 4);
+	uint_t pad;
+
+	if (size < cnt)
+		return (FALSE); /* Integer overflow */
+
+	if (xdrs->x_addr > xdrs->x_addr_end)
+		return (FALSE);
+
+	if (xdrs->x_addr_end - xdrs->x_addr < size)
+		return (FALSE);
+
+	memcpy(xdrs->x_addr, cp, cnt);
+
+	xdrs->x_addr += cnt;
+
+	pad = size - cnt;
+	if (pad > 0) {
+		memset(xdrs->x_addr, 0, pad);
+		xdrs->x_addr += pad;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
+{
+	static uint32_t zero = 0;
+	uint_t size = roundup(cnt, 4);
+	uint_t pad;
+
+	if (size < cnt)
+		return (FALSE); /* Integer overflow */
+
+	if (xdrs->x_addr > xdrs->x_addr_end)
+		return (FALSE);
+
+	if (xdrs->x_addr_end - xdrs->x_addr < size)
+		return (FALSE);
+
+	memcpy(cp, xdrs->x_addr, cnt);
+	xdrs->x_addr += cnt;
+
+	pad = size - cnt;
+	if (pad > 0) {
+		/* An inverted memchr() would be useful here... */
+		if (memcmp(&zero, xdrs->x_addr, pad) != 0)
+			return (FALSE);
+
+		xdrs->x_addr += pad;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
+{
+	if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
+		return (FALSE);
+
+	*((uint32_t *)xdrs->x_addr) = cpu_to_be32(val);
+
+	xdrs->x_addr += sizeof (uint32_t);
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
+{
+	if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
+		return (FALSE);
+
+	*val = be32_to_cpu(*((uint32_t *)xdrs->x_addr));
+
+	xdrs->x_addr += sizeof (uint32_t);
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_char(XDR *xdrs, char *cp)
+{
+	uint32_t val;
+
+	BUILD_BUG_ON(sizeof (char) != 1);
+	val = *((unsigned char *) cp);
+
+	return (xdrmem_enc_uint32(xdrs, val));
+}
+
+static bool_t
+xdrmem_dec_char(XDR *xdrs, char *cp)
+{
+	uint32_t val;
+
+	BUILD_BUG_ON(sizeof (char) != 1);
+
+	if (!xdrmem_dec_uint32(xdrs, &val))
+		return (FALSE);
+
+	/*
+	 * If any of the 3 other bytes are non-zero then val will be greater
+	 * than 0xff and we fail because according to the RFC, this block does
+	 * not have a char encoded in it.
+	 */
+	if (val > 0xff)
+		return (FALSE);
+
+	*((unsigned char *) cp) = val;
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
+{
+	BUILD_BUG_ON(sizeof (unsigned short) != 2);
+
+	return (xdrmem_enc_uint32(xdrs, *usp));
+}
+
+static bool_t
+xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
+{
+	uint32_t val;
+
+	BUILD_BUG_ON(sizeof (unsigned short) != 2);
+
+	if (!xdrmem_dec_uint32(xdrs, &val))
+		return (FALSE);
+
+	/*
+	 * Short ints are not in the RFC, but we assume similar logic as in
+	 * xdrmem_dec_char().
+	 */
+	if (val > 0xffff)
+		return (FALSE);
+
+	*usp = val;
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_uint(XDR *xdrs, unsigned *up)
+{
+	BUILD_BUG_ON(sizeof (unsigned) != 4);
+
+	return (xdrmem_enc_uint32(xdrs, *up));
+}
+
+static bool_t
+xdrmem_dec_uint(XDR *xdrs, unsigned *up)
+{
+	BUILD_BUG_ON(sizeof (unsigned) != 4);
+
+	return (xdrmem_dec_uint32(xdrs, (uint32_t *)up));
+}
+
+static bool_t
+xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
+{
+	BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
+
+	if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
+		return (FALSE);
+
+	return (xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff));
+}
+
+static bool_t
+xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
+{
+	uint32_t low, high;
+
+	BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
+
+	if (!xdrmem_dec_uint32(xdrs, &high))
+		return (FALSE);
+	if (!xdrmem_dec_uint32(xdrs, &low))
+		return (FALSE);
+
+	*ullp = ((u_longlong_t)high << 32) | low;
+
+	return (TRUE);
+}
+
+static bool_t
+xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
+    const uint_t elsize, const xdrproc_t elproc)
+{
+	uint_t i;
+	caddr_t addr = *arrp;
+
+	if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
+		return (FALSE);
+
+	if (!xdrmem_enc_uint(xdrs, sizep))
+		return (FALSE);
+
+	for (i = 0; i < *sizep; i++) {
+		if (!elproc(xdrs, addr))
+			return (FALSE);
+		addr += elsize;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
+    const uint_t elsize, const xdrproc_t elproc)
+{
+	uint_t i, size;
+	bool_t alloc = FALSE;
+	caddr_t addr;
+
+	if (!xdrmem_dec_uint(xdrs, sizep))
+		return (FALSE);
+
+	size = *sizep;
+
+	if (size > maxsize || size > UINT_MAX / elsize)
+		return (FALSE);
+
+	/*
+	 * The Solaris man page says: "If *arrp is NULL when decoding,
+	 * xdr_array() allocates memory and *arrp points to it".
+	 */
+	if (*arrp == NULL) {
+		BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
+
+		*arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
+		if (*arrp == NULL)
+			return (FALSE);
+
+		alloc = TRUE;
+	}
+
+	addr = *arrp;
+
+	for (i = 0; i < size; i++) {
+		if (!elproc(xdrs, addr)) {
+			if (alloc)
+				kmem_free(*arrp, size * elsize);
+			return (FALSE);
+		}
+		addr += elsize;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
+{
+	size_t slen = strlen(*sp);
+	uint_t len;
+
+	if (slen > maxsize)
+		return (FALSE);
+
+	len = slen;
+
+	if (!xdrmem_enc_uint(xdrs, &len))
+		return (FALSE);
+
+	return (xdrmem_enc_bytes(xdrs, *sp, len));
+}
+
+static bool_t
+xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
+{
+	uint_t size;
+	bool_t alloc = FALSE;
+
+	if (!xdrmem_dec_uint(xdrs, &size))
+		return (FALSE);
+
+	if (size > maxsize || size > UINT_MAX - 1)
+		return (FALSE);
+
+	/*
+	 * Solaris man page: "If *sp is NULL when decoding, xdr_string()
+	 * allocates memory and *sp points to it".
+	 */
+	if (*sp == NULL) {
+		BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
+
+		*sp = kmem_alloc(size + 1, KM_NOSLEEP);
+		if (*sp == NULL)
+			return (FALSE);
+
+		alloc = TRUE;
+	}
+
+	if (!xdrmem_dec_bytes(xdrs, *sp, size))
+		goto fail;
+
+	if (memchr(*sp, 0, size) != NULL)
+		goto fail;
+
+	(*sp)[size] = '\0';
+
+	return (TRUE);
+
+fail:
+	if (alloc)
+		kmem_free(*sp, size + 1);
+
+	return (FALSE);
+}
+
+static struct xdr_ops xdrmem_encode_ops = {
+	.xdr_control		= xdrmem_control,
+	.xdr_char		= xdrmem_enc_char,
+	.xdr_u_short		= xdrmem_enc_ushort,
+	.xdr_u_int		= xdrmem_enc_uint,
+	.xdr_u_longlong_t	= xdrmem_enc_ulonglong,
+	.xdr_opaque		= xdrmem_enc_bytes,
+	.xdr_string		= xdr_enc_string,
+	.xdr_array		= xdr_enc_array
+};
+
+static struct xdr_ops xdrmem_decode_ops = {
+	.xdr_control		= xdrmem_control,
+	.xdr_char		= xdrmem_dec_char,
+	.xdr_u_short		= xdrmem_dec_ushort,
+	.xdr_u_int		= xdrmem_dec_uint,
+	.xdr_u_longlong_t	= xdrmem_dec_ulonglong,
+	.xdr_opaque		= xdrmem_dec_bytes,
+	.xdr_string		= xdr_dec_string,
+	.xdr_array		= xdr_dec_array
+};

diff --git a/zfs/module/os/linux/spl/spl-zlib.c b/zfs/module/os/linux/spl/spl-zlib.c
new file mode 100644
index 0000000..589496d
--- /dev/null
+++ b/zfs/module/os/linux/spl/spl-zlib.c

@@ -0,0 +1,217 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ *  z_compress_level/z_uncompress are nearly identical copies of the
+ *  compress2/uncompress functions provided by the official zlib package
+ *  available at http://zlib.net/.  The only changes made we to slightly
+ *  adapt the functions called to match the linux kernel implementation
+ *  of zlib.  The full zlib license follows:
+ *
+ *  zlib.h -- interface of the 'zlib' general purpose compression library
+ *  version 1.2.5, April 19th, 2010
+ *
+ *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ *
+ *  Jean-loup Gailly
+ *  Mark Adler
+ */
+
+
+#include <linux/percpu_compat.h>
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/zmod.h>
+
+static spl_kmem_cache_t *zlib_workspace_cache;
+
+/*
+ * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
+ * and vfree for every call.  Using a kmem_cache also has the advantage
+ * that improves the odds that the memory used will be local to this cpu.
+ * To further improve things it might be wise to create a dedicated per-cpu
+ * workspace for use.  This would take some additional care because we then
+ * must disable preemption around the critical section, and verify that
+ * zlib_deflate* and zlib_inflate* never internally call schedule().
+ */
+static void *
+zlib_workspace_alloc(int flags)
+{
+	return (kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS)));
+}
+
+static void
+zlib_workspace_free(void *workspace)
+{
+	kmem_cache_free(zlib_workspace_cache, workspace);
+}
+
+/*
+ * Compresses the source buffer into the destination buffer. The level
+ * parameter has the same meaning as in deflateInit.  sourceLen is the byte
+ * length of the source buffer. Upon entry, destLen is the total size of the
+ * destination buffer, which must be at least 0.1% larger than sourceLen plus
+ * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+ *
+ * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ * memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+ * Z_STREAM_ERROR if the level parameter is invalid.
+ */
+int
+z_compress_level(void *dest, size_t *destLen, const void *source,
+    size_t sourceLen, int level)
+{
+	z_stream stream;
+	int err;
+
+	stream.next_in = (Byte *)source;
+	stream.avail_in = (uInt)sourceLen;
+	stream.next_out = dest;
+	stream.avail_out = (uInt)*destLen;
+
+	if ((size_t)stream.avail_out != *destLen)
+		return (Z_BUF_ERROR);
+
+	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
+	if (!stream.workspace)
+		return (Z_MEM_ERROR);
+
+	err = zlib_deflateInit(&stream, level);
+	if (err != Z_OK) {
+		zlib_workspace_free(stream.workspace);
+		return (err);
+	}
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		zlib_deflateEnd(&stream);
+		zlib_workspace_free(stream.workspace);
+		return (err == Z_OK ? Z_BUF_ERROR : err);
+	}
+	*destLen = stream.total_out;
+
+	err = zlib_deflateEnd(&stream);
+	zlib_workspace_free(stream.workspace);
+
+	return (err);
+}
+EXPORT_SYMBOL(z_compress_level);
+
+/*
+ * Decompresses the source buffer into the destination buffer.  sourceLen is
+ * the byte length of the source buffer. Upon entry, destLen is the total
+ * size of the destination buffer, which must be large enough to hold the
+ * entire uncompressed data. (The size of the uncompressed data must have
+ * been saved previously by the compressor and transmitted to the decompressor
+ * by some mechanism outside the scope of this compression library.)
+ * Upon exit, destLen is the actual size of the compressed buffer.
+ * This function can be used to decompress a whole file at once if the
+ * input file is mmap'ed.
+ *
+ * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ * enough memory, Z_BUF_ERROR if there was not enough room in the output
+ * buffer, or Z_DATA_ERROR if the input data was corrupted.
+ */
+int
+z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
+{
+	z_stream stream;
+	int err;
+
+	stream.next_in = (Byte *)source;
+	stream.avail_in = (uInt)sourceLen;
+	stream.next_out = dest;
+	stream.avail_out = (uInt)*destLen;
+
+	if ((size_t)stream.avail_out != *destLen)
+		return (Z_BUF_ERROR);
+
+	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
+	if (!stream.workspace)
+		return (Z_MEM_ERROR);
+
+	err = zlib_inflateInit(&stream);
+	if (err != Z_OK) {
+		zlib_workspace_free(stream.workspace);
+		return (err);
+	}
+
+	err = zlib_inflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		zlib_inflateEnd(&stream);
+		zlib_workspace_free(stream.workspace);
+
+		if (err == Z_NEED_DICT ||
+		    (err == Z_BUF_ERROR && stream.avail_in == 0))
+			return (Z_DATA_ERROR);
+
+		return (err);
+	}
+	*destLen = stream.total_out;
+
+	err = zlib_inflateEnd(&stream);
+	zlib_workspace_free(stream.workspace);
+
+	return (err);
+}
+EXPORT_SYMBOL(z_uncompress);
+
+int
+spl_zlib_init(void)
+{
+	int size;
+
+	size = MAX(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
+	    zlib_inflate_workspacesize());
+
+	zlib_workspace_cache = kmem_cache_create(
+	    "spl_zlib_workspace_cache",
+	    size, 0, NULL, NULL, NULL, NULL, NULL,
+	    KMC_KVMEM);
+	if (!zlib_workspace_cache)
+		return (1);
+
+	return (0);
+}
+
+void
+spl_zlib_fini(void)
+{
+	kmem_cache_destroy(zlib_workspace_cache);
+	zlib_workspace_cache = NULL;
+}

diff --git a/zfs/module/os/linux/zfs/Makefile.in b/zfs/module/os/linux/zfs/Makefile.in
new file mode 100644
index 0000000..fa99077
--- /dev/null
+++ b/zfs/module/os/linux/zfs/Makefile.in

@@ -0,0 +1,38 @@
+#
+# Linux specific sources included from module/zfs/Makefile.in
+#
+
+# Suppress unused-value warnings in sparc64 architecture headers
+ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
+
+$(MODULE)-objs += ../os/linux/zfs/abd_os.o
+$(MODULE)-objs += ../os/linux/zfs/arc_os.o
+$(MODULE)-objs += ../os/linux/zfs/mmp_os.o
+$(MODULE)-objs += ../os/linux/zfs/policy.o
+$(MODULE)-objs += ../os/linux/zfs/trace.o
+$(MODULE)-objs += ../os/linux/zfs/qat.o
+$(MODULE)-objs += ../os/linux/zfs/qat_compress.o
+$(MODULE)-objs += ../os/linux/zfs/qat_crypt.o
+$(MODULE)-objs += ../os/linux/zfs/spa_misc_os.o
+$(MODULE)-objs += ../os/linux/zfs/vdev_disk.o
+$(MODULE)-objs += ../os/linux/zfs/vdev_file.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_acl.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_debug.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_dir.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_racct.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_uio.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_vfsops.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_vnops_os.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_znode.o
+$(MODULE)-objs += ../os/linux/zfs/zio_crypt.o
+$(MODULE)-objs += ../os/linux/zfs/zpl_ctldir.o
+$(MODULE)-objs += ../os/linux/zfs/zpl_export.o
+$(MODULE)-objs += ../os/linux/zfs/zpl_file.o
+$(MODULE)-objs += ../os/linux/zfs/zpl_inode.o
+$(MODULE)-objs += ../os/linux/zfs/zpl_super.o
+$(MODULE)-objs += ../os/linux/zfs/zpl_xattr.o
+$(MODULE)-objs += ../os/linux/zfs/zvol_os.o

diff --git a/zfs/module/os/linux/zfs/abd_os.c b/zfs/module/os/linux/zfs/abd_os.c
new file mode 100644
index 0000000..6067950
--- /dev/null
+++ b/zfs/module/os/linux/zfs/abd_os.c

@@ -0,0 +1,1155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2019 by Delphix. All rights reserved.
+ */
+
+/*
+ * See abd.c for a general overview of the arc buffered data (ABD).
+ *
+ * Linear buffers act exactly like normal buffers and are always mapped into the
+ * kernel's virtual memory space, while scattered ABD data chunks are allocated
+ * as physical pages and then mapped in only while they are actually being
+ * accessed through one of the abd_* library functions. Using scattered ABDs
+ * provides several benefits:
+ *
+ *  (1) They avoid use of kmem_*, preventing performance problems where running
+ *      kmem_reap on very large memory systems never finishes and causes
+ *      constant TLB shootdowns.
+ *
+ *  (2) Fragmentation is less of an issue since when we are at the limit of
+ *      allocatable space, we won't have to search around for a long free
+ *      hole in the VA space for large ARC allocations. Each chunk is mapped in
+ *      individually, so even if we are using HIGHMEM (see next point) we
+ *      wouldn't need to worry about finding a contiguous address range.
+ *
+ *  (3) If we are not using HIGHMEM, then all physical memory is always
+ *      mapped into the kernel's address space, so we also avoid the map /
+ *      unmap costs on each ABD access.
+ *
+ * If we are not using HIGHMEM, scattered buffers which have only one chunk
+ * can be treated as linear buffers, because they are contiguous in the
+ * kernel's virtual address space.  See abd_alloc_chunks() for details.
+ */
+
+#include <sys/abd_impl.h>
+#include <sys/param.h>
+#include <sys/zio.h>
+#include <sys/arc.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_znode.h>
+#ifdef _KERNEL
+#include <linux/kmap_compat.h>
+#include <linux/scatterlist.h>
+#else
+#define	MAX_ORDER	1
+#endif
+
+typedef struct abd_stats {
+	kstat_named_t abdstat_struct_size;
+	kstat_named_t abdstat_linear_cnt;
+	kstat_named_t abdstat_linear_data_size;
+	kstat_named_t abdstat_scatter_cnt;
+	kstat_named_t abdstat_scatter_data_size;
+	kstat_named_t abdstat_scatter_chunk_waste;
+	kstat_named_t abdstat_scatter_orders[MAX_ORDER];
+	kstat_named_t abdstat_scatter_page_multi_chunk;
+	kstat_named_t abdstat_scatter_page_multi_zone;
+	kstat_named_t abdstat_scatter_page_alloc_retry;
+	kstat_named_t abdstat_scatter_sg_table_retry;
+} abd_stats_t;
+
+static abd_stats_t abd_stats = {
+	/* Amount of memory occupied by all of the abd_t struct allocations */
+	{ "struct_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The number of linear ABDs which are currently allocated, excluding
+	 * ABDs which don't own their data (for instance the ones which were
+	 * allocated through abd_get_offset() and abd_get_from_buf()). If an
+	 * ABD takes ownership of its buf then it will become tracked.
+	 */
+	{ "linear_cnt",				KSTAT_DATA_UINT64 },
+	/* Amount of data stored in all linear ABDs tracked by linear_cnt */
+	{ "linear_data_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The number of scatter ABDs which are currently allocated, excluding
+	 * ABDs which don't own their data (for instance the ones which were
+	 * allocated through abd_get_offset()).
+	 */
+	{ "scatter_cnt",			KSTAT_DATA_UINT64 },
+	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
+	{ "scatter_data_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The amount of space wasted at the end of the last chunk across all
+	 * scatter ABDs tracked by scatter_cnt.
+	 */
+	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 },
+	/*
+	 * The number of compound allocations of a given order.  These
+	 * allocations are spread over all currently allocated ABDs, and
+	 * act as a measure of memory fragmentation.
+	 */
+	{ { "scatter_order_N",			KSTAT_DATA_UINT64 } },
+	/*
+	 * The number of scatter ABDs which contain multiple chunks.
+	 * ABDs are preferentially allocated from the minimum number of
+	 * contiguous multi-page chunks, a single chunk is optimal.
+	 */
+	{ "scatter_page_multi_chunk",		KSTAT_DATA_UINT64 },
+	/*
+	 * The number of scatter ABDs which are split across memory zones.
+	 * ABDs are preferentially allocated using pages from a single zone.
+	 */
+	{ "scatter_page_multi_zone",		KSTAT_DATA_UINT64 },
+	/*
+	 *  The total number of retries encountered when attempting to
+	 *  allocate the pages to populate the scatter ABD.
+	 */
+	{ "scatter_page_alloc_retry",		KSTAT_DATA_UINT64 },
+	/*
+	 *  The total number of retries encountered when attempting to
+	 *  allocate the sg table for an ABD.
+	 */
+	{ "scatter_sg_table_retry",		KSTAT_DATA_UINT64 },
+};
+
+struct {
+	wmsum_t abdstat_struct_size;
+	wmsum_t abdstat_linear_cnt;
+	wmsum_t abdstat_linear_data_size;
+	wmsum_t abdstat_scatter_cnt;
+	wmsum_t abdstat_scatter_data_size;
+	wmsum_t abdstat_scatter_chunk_waste;
+	wmsum_t abdstat_scatter_orders[MAX_ORDER];
+	wmsum_t abdstat_scatter_page_multi_chunk;
+	wmsum_t abdstat_scatter_page_multi_zone;
+	wmsum_t abdstat_scatter_page_alloc_retry;
+	wmsum_t abdstat_scatter_sg_table_retry;
+} abd_sums;
+
+#define	abd_for_each_sg(abd, sg, n, i)	\
+	for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
+
+unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1;
+
+/*
+ * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
+ * ABD's.  Smaller allocations will use linear ABD's which uses
+ * zio_[data_]buf_alloc().
+ *
+ * Scatter ABD's use at least one page each, so sub-page allocations waste
+ * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
+ * half of each page).  Using linear ABD's for small allocations means that
+ * they will be put on slabs which contain many allocations.  This can
+ * improve memory efficiency, but it also makes it much harder for ARC
+ * evictions to actually free pages, because all the buffers on one slab need
+ * to be freed in order for the slab (and underlying pages) to be freed.
+ * Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's
+ * possible for them to actually waste more memory than scatter (one page per
+ * buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).
+ *
+ * Spill blocks are typically 512B and are heavily used on systems running
+ * selinux with the default dnode size and the `xattr=sa` property set.
+ *
+ * By default we use linear allocations for 512B and 1KB, and scatter
+ * allocations for larger (1.5KB and up).
+ */
+int zfs_abd_scatter_min_size = 512 * 3;
+
+/*
+ * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose pages are
+ * just a single zero'd page. This allows us to conserve memory by
+ * only using a single zero page for the scatterlist.
+ */
+abd_t *abd_zero_scatter = NULL;
+
+struct page;
+/*
+ * _KERNEL   - Will point to ZERO_PAGE if it is available or it will be
+ *             an allocated zero'd PAGESIZE buffer.
+ * Userspace - Will be an allocated zero'ed PAGESIZE buffer.
+ *
+ * abd_zero_page is assigned to each of the pages of abd_zero_scatter.
+ */
+static struct page *abd_zero_page = NULL;
+
+static kmem_cache_t *abd_cache = NULL;
+static kstat_t *abd_ksp;
+
+static uint_t
+abd_chunkcnt_for_bytes(size_t size)
+{
+	return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);
+}
+
+abd_t *
+abd_alloc_struct_impl(size_t size)
+{
+	/*
+	 * In Linux we do not use the size passed in during ABD
+	 * allocation, so we just ignore it.
+	 */
+	abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
+	ASSERT3P(abd, !=, NULL);
+	ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
+
+	return (abd);
+}
+
+void
+abd_free_struct_impl(abd_t *abd)
+{
+	kmem_cache_free(abd_cache, abd);
+	ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
+}
+
+#ifdef _KERNEL
+/*
+ * Mark zfs data pages so they can be excluded from kernel crash dumps
+ */
+#ifdef _LP64
+#define	ABD_FILE_CACHE_PAGE	0x2F5ABDF11ECAC4E
+
+static inline void
+abd_mark_zfs_page(struct page *page)
+{
+	get_page(page);
+	SetPagePrivate(page);
+	set_page_private(page, ABD_FILE_CACHE_PAGE);
+}
+
+static inline void
+abd_unmark_zfs_page(struct page *page)
+{
+	set_page_private(page, 0UL);
+	ClearPagePrivate(page);
+	put_page(page);
+}
+#else
+#define	abd_mark_zfs_page(page)
+#define	abd_unmark_zfs_page(page)
+#endif /* _LP64 */
+
+#ifndef CONFIG_HIGHMEM
+
+#ifndef __GFP_RECLAIM
+#define	__GFP_RECLAIM		__GFP_WAIT
+#endif
+
+/*
+ * The goal is to minimize fragmentation by preferentially populating ABDs
+ * with higher order compound pages from a single zone.  Allocation size is
+ * progressively decreased until it can be satisfied without performing
+ * reclaim or compaction.  When necessary this function will degenerate to
+ * allocating individual pages and allowing reclaim to satisfy allocations.
+ */
+void
+abd_alloc_chunks(abd_t *abd, size_t size)
+{
+	struct list_head pages;
+	struct sg_table table;
+	struct scatterlist *sg;
+	struct page *page, *tmp_page = NULL;
+	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
+	gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
+	int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
+	int nr_pages = abd_chunkcnt_for_bytes(size);
+	int chunks = 0, zones = 0;
+	size_t remaining_size;
+	int nid = NUMA_NO_NODE;
+	int alloc_pages = 0;
+
+	INIT_LIST_HEAD(&pages);
+
+	while (alloc_pages < nr_pages) {
+		unsigned chunk_pages;
+		int order;
+
+		order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
+		chunk_pages = (1U << order);
+
+		page = alloc_pages_node(nid, order ? gfp_comp : gfp, order);
+		if (page == NULL) {
+			if (order == 0) {
+				ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
+				schedule_timeout_interruptible(1);
+			} else {
+				max_order = MAX(0, order - 1);
+			}
+			continue;
+		}
+
+		list_add_tail(&page->lru, &pages);
+
+		if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))
+			zones++;
+
+		nid = page_to_nid(page);
+		ABDSTAT_BUMP(abdstat_scatter_orders[order]);
+		chunks++;
+		alloc_pages += chunk_pages;
+	}
+
+	ASSERT3S(alloc_pages, ==, nr_pages);
+
+	while (sg_alloc_table(&table, chunks, gfp)) {
+		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
+		schedule_timeout_interruptible(1);
+	}
+
+	sg = table.sgl;
+	remaining_size = size;
+	list_for_each_entry_safe(page, tmp_page, &pages, lru) {
+		size_t sg_size = MIN(PAGESIZE << compound_order(page),
+		    remaining_size);
+		sg_set_page(sg, page, sg_size, 0);
+		abd_mark_zfs_page(page);
+		remaining_size -= sg_size;
+
+		sg = sg_next(sg);
+		list_del(&page->lru);
+	}
+
+	/*
+	 * These conditions ensure that a possible transformation to a linear
+	 * ABD would be valid.
+	 */
+	ASSERT(!PageHighMem(sg_page(table.sgl)));
+	ASSERT0(ABD_SCATTER(abd).abd_offset);
+
+	if (table.nents == 1) {
+		/*
+		 * Since there is only one entry, this ABD can be represented
+		 * as a linear buffer.  All single-page (4K) ABD's can be
+		 * represented this way.  Some multi-page ABD's can also be
+		 * represented this way, if we were able to allocate a single
+		 * "chunk" (higher-order "page" which represents a power-of-2
+		 * series of physically-contiguous pages).  This is often the
+		 * case for 2-page (8K) ABD's.
+		 *
+		 * Representing a single-entry scatter ABD as a linear ABD
+		 * has the performance advantage of avoiding the copy (and
+		 * allocation) in abd_borrow_buf_copy / abd_return_buf_copy.
+		 * A performance increase of around 5% has been observed for
+		 * ARC-cached reads (of small blocks which can take advantage
+		 * of this).
+		 *
+		 * Note that this optimization is only possible because the
+		 * pages are always mapped into the kernel's address space.
+		 * This is not the case for highmem pages, so the
+		 * optimization can not be made there.
+		 */
+		abd->abd_flags |= ABD_FLAG_LINEAR;
+		abd->abd_flags |= ABD_FLAG_LINEAR_PAGE;
+		abd->abd_u.abd_linear.abd_sgl = table.sgl;
+		ABD_LINEAR_BUF(abd) = page_address(sg_page(table.sgl));
+	} else if (table.nents > 1) {
+		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
+		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
+
+		if (zones) {
+			ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);
+			abd->abd_flags |= ABD_FLAG_MULTI_ZONE;
+		}
+
+		ABD_SCATTER(abd).abd_sgl = table.sgl;
+		ABD_SCATTER(abd).abd_nents = table.nents;
+	}
+}
+#else
+
+/*
+ * Allocate N individual pages to construct a scatter ABD.  This function
+ * makes no attempt to request contiguous pages and requires the minimal
+ * number of kernel interfaces.  It's designed for maximum compatibility.
+ */
+void
+abd_alloc_chunks(abd_t *abd, size_t size)
+{
+	struct scatterlist *sg = NULL;
+	struct sg_table table;
+	struct page *page;
+	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
+	int nr_pages = abd_chunkcnt_for_bytes(size);
+	int i = 0;
+
+	while (sg_alloc_table(&table, nr_pages, gfp)) {
+		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
+		schedule_timeout_interruptible(1);
+	}
+
+	ASSERT3U(table.nents, ==, nr_pages);
+	ABD_SCATTER(abd).abd_sgl = table.sgl;
+	ABD_SCATTER(abd).abd_nents = nr_pages;
+
+	abd_for_each_sg(abd, sg, nr_pages, i) {
+		while ((page = __page_cache_alloc(gfp)) == NULL) {
+			ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
+			schedule_timeout_interruptible(1);
+		}
+
+		ABDSTAT_BUMP(abdstat_scatter_orders[0]);
+		sg_set_page(sg, page, PAGESIZE, 0);
+		abd_mark_zfs_page(page);
+	}
+
+	if (nr_pages > 1) {
+		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
+		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
+	}
+}
+#endif /* !CONFIG_HIGHMEM */
+
+/*
+ * This must be called if any of the sg_table allocation functions
+ * are called.
+ */
+static void
+abd_free_sg_table(abd_t *abd)
+{
+	struct sg_table table;
+
+	table.sgl = ABD_SCATTER(abd).abd_sgl;
+	table.nents = table.orig_nents = ABD_SCATTER(abd).abd_nents;
+	sg_free_table(&table);
+}
+
+void
+abd_free_chunks(abd_t *abd)
+{
+	struct scatterlist *sg = NULL;
+	struct page *page;
+	int nr_pages = ABD_SCATTER(abd).abd_nents;
+	int order, i = 0;
+
+	if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)
+		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);
+
+	if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)
+		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
+
+	abd_for_each_sg(abd, sg, nr_pages, i) {
+		page = sg_page(sg);
+		abd_unmark_zfs_page(page);
+		order = compound_order(page);
+		__free_pages(page, order);
+		ASSERT3U(sg->length, <=, PAGE_SIZE << order);
+		ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);
+	}
+	abd_free_sg_table(abd);
+}
+
+/*
+ * Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where each page in
+ * the scatterlist will be set to the zero'd out buffer abd_zero_page.
+ */
+static void
+abd_alloc_zero_scatter(void)
+{
+	struct scatterlist *sg = NULL;
+	struct sg_table table;
+	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
+	int nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
+	int i = 0;
+
+#if defined(HAVE_ZERO_PAGE_GPL_ONLY)
+	gfp_t gfp_zero_page = gfp | __GFP_ZERO;
+	while ((abd_zero_page = __page_cache_alloc(gfp_zero_page)) == NULL) {
+		ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
+		schedule_timeout_interruptible(1);
+	}
+	abd_mark_zfs_page(abd_zero_page);
+#else
+	abd_zero_page = ZERO_PAGE(0);
+#endif /* HAVE_ZERO_PAGE_GPL_ONLY */
+
+	while (sg_alloc_table(&table, nr_pages, gfp)) {
+		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
+		schedule_timeout_interruptible(1);
+	}
+	ASSERT3U(table.nents, ==, nr_pages);
+
+	abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
+	abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
+	ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
+	ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl;
+	ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
+	abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
+	abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
+
+	abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {
+		sg_set_page(sg, abd_zero_page, PAGESIZE, 0);
+	}
+
+	ABDSTAT_BUMP(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE);
+	ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
+}
+
+#else /* _KERNEL */
+
+#ifndef PAGE_SHIFT
+#define	PAGE_SHIFT (highbit64(PAGESIZE)-1)
+#endif
+
+#define	zfs_kmap_atomic(chunk)		((void *)chunk)
+#define	zfs_kunmap_atomic(addr)		do { (void)(addr); } while (0)
+#define	local_irq_save(flags)		do { (void)(flags); } while (0)
+#define	local_irq_restore(flags)	do { (void)(flags); } while (0)
+#define	nth_page(pg, i) \
+	((struct page *)((void *)(pg) + (i) * PAGESIZE))
+
+struct scatterlist {
+	struct page *page;
+	int length;
+	int end;
+};
+
+static void
+sg_init_table(struct scatterlist *sg, int nr)
+{
+	memset(sg, 0, nr * sizeof (struct scatterlist));
+	sg[nr - 1].end = 1;
+}
+
+/*
+ * This must be called if any of the sg_table allocation functions
+ * are called.
+ */
+static void
+abd_free_sg_table(abd_t *abd)
+{
+	int nents = ABD_SCATTER(abd).abd_nents;
+	vmem_free(ABD_SCATTER(abd).abd_sgl,
+	    nents * sizeof (struct scatterlist));
+}
+
+#define	for_each_sg(sgl, sg, nr, i)	\
+	for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
+
+static inline void
+sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
+    unsigned int offset)
+{
+	/* currently we don't use offset */
+	ASSERT(offset == 0);
+	sg->page = page;
+	sg->length = len;
+}
+
+static inline struct page *
+sg_page(struct scatterlist *sg)
+{
+	return (sg->page);
+}
+
+static inline struct scatterlist *
+sg_next(struct scatterlist *sg)
+{
+	if (sg->end)
+		return (NULL);
+
+	return (sg + 1);
+}
+
+void
+abd_alloc_chunks(abd_t *abd, size_t size)
+{
+	unsigned nr_pages = abd_chunkcnt_for_bytes(size);
+	struct scatterlist *sg;
+	int i;
+
+	ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages *
+	    sizeof (struct scatterlist), KM_SLEEP);
+	sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages);
+
+	abd_for_each_sg(abd, sg, nr_pages, i) {
+		struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
+		sg_set_page(sg, p, PAGESIZE, 0);
+	}
+	ABD_SCATTER(abd).abd_nents = nr_pages;
+}
+
+void
+abd_free_chunks(abd_t *abd)
+{
+	int i, n = ABD_SCATTER(abd).abd_nents;
+	struct scatterlist *sg;
+
+	abd_for_each_sg(abd, sg, n, i) {
+		for (int j = 0; j < sg->length; j += PAGESIZE) {
+			struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT);
+			umem_free(p, PAGESIZE);
+		}
+	}
+	abd_free_sg_table(abd);
+}
+
+static void
+abd_alloc_zero_scatter(void)
+{
+	unsigned nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
+	struct scatterlist *sg;
+	int i;
+
+	abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
+	memset(abd_zero_page, 0, PAGESIZE);
+	abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
+	abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
+	abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
+	ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
+	ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
+	abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
+	ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages *
+	    sizeof (struct scatterlist), KM_SLEEP);
+
+	sg_init_table(ABD_SCATTER(abd_zero_scatter).abd_sgl, nr_pages);
+
+	abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {
+		sg_set_page(sg, abd_zero_page, PAGESIZE, 0);
+	}
+
+	ABDSTAT_BUMP(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE);
+	ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
+}
+
+#endif /* _KERNEL */
+
+boolean_t
+abd_size_alloc_linear(size_t size)
+{
+	return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
+}
+
+void
+abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
+{
+	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
+	int waste = P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size;
+	if (op == ABDSTAT_INCR) {
+		ABDSTAT_BUMP(abdstat_scatter_cnt);
+		ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
+		ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
+		arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
+	} else {
+		ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+		ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
+		ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
+		arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
+	}
+}
+
+void
+abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
+{
+	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
+	if (op == ABDSTAT_INCR) {
+		ABDSTAT_BUMP(abdstat_linear_cnt);
+		ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
+	} else {
+		ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
+		ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+	}
+}
+
+void
+abd_verify_scatter(abd_t *abd)
+{
+	size_t n;
+	int i = 0;
+	struct scatterlist *sg = NULL;
+
+	ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);
+	ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
+	    ABD_SCATTER(abd).abd_sgl->length);
+	n = ABD_SCATTER(abd).abd_nents;
+	abd_for_each_sg(abd, sg, n, i) {
+		ASSERT3P(sg_page(sg), !=, NULL);
+	}
+}
+
+static void
+abd_free_zero_scatter(void)
+{
+	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE);
+	ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
+
+	abd_free_sg_table(abd_zero_scatter);
+	abd_free_struct(abd_zero_scatter);
+	abd_zero_scatter = NULL;
+	ASSERT3P(abd_zero_page, !=, NULL);
+#if defined(_KERNEL)
+#if defined(HAVE_ZERO_PAGE_GPL_ONLY)
+	abd_unmark_zfs_page(abd_zero_page);
+	__free_page(abd_zero_page);
+#endif /* HAVE_ZERO_PAGE_GPL_ONLY */
+#else
+	umem_free(abd_zero_page, PAGESIZE);
+#endif /* _KERNEL */
+}
+
+static int
+abd_kstats_update(kstat_t *ksp, int rw)
+{
+	abd_stats_t *as = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+	as->abdstat_struct_size.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_struct_size);
+	as->abdstat_linear_cnt.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_linear_cnt);
+	as->abdstat_linear_data_size.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_linear_data_size);
+	as->abdstat_scatter_cnt.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_cnt);
+	as->abdstat_scatter_data_size.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_data_size);
+	as->abdstat_scatter_chunk_waste.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
+	for (int i = 0; i < MAX_ORDER; i++) {
+		as->abdstat_scatter_orders[i].value.ui64 =
+		    wmsum_value(&abd_sums.abdstat_scatter_orders[i]);
+	}
+	as->abdstat_scatter_page_multi_chunk.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_page_multi_chunk);
+	as->abdstat_scatter_page_multi_zone.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_page_multi_zone);
+	as->abdstat_scatter_page_alloc_retry.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_page_alloc_retry);
+	as->abdstat_scatter_sg_table_retry.value.ui64 =
+	    wmsum_value(&abd_sums.abdstat_scatter_sg_table_retry);
+	return (0);
+}
+
+void
+abd_init(void)
+{
+	int i;
+
+	abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
+	    0, NULL, NULL, NULL, NULL, NULL, 0);
+
+	wmsum_init(&abd_sums.abdstat_struct_size, 0);
+	wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
+	wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
+	for (i = 0; i < MAX_ORDER; i++)
+		wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0);
+	wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_page_alloc_retry, 0);
+	wmsum_init(&abd_sums.abdstat_scatter_sg_table_retry, 0);
+
+	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
+	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+	if (abd_ksp != NULL) {
+		for (i = 0; i < MAX_ORDER; i++) {
+			snprintf(abd_stats.abdstat_scatter_orders[i].name,
+			    KSTAT_STRLEN, "scatter_order_%d", i);
+			abd_stats.abdstat_scatter_orders[i].data_type =
+			    KSTAT_DATA_UINT64;
+		}
+		abd_ksp->ks_data = &abd_stats;
+		abd_ksp->ks_update = abd_kstats_update;
+		kstat_install(abd_ksp);
+	}
+
+	abd_alloc_zero_scatter();
+}
+
+void
+abd_fini(void)
+{
+	abd_free_zero_scatter();
+
+	if (abd_ksp != NULL) {
+		kstat_delete(abd_ksp);
+		abd_ksp = NULL;
+	}
+
+	wmsum_fini(&abd_sums.abdstat_struct_size);
+	wmsum_fini(&abd_sums.abdstat_linear_cnt);
+	wmsum_fini(&abd_sums.abdstat_linear_data_size);
+	wmsum_fini(&abd_sums.abdstat_scatter_cnt);
+	wmsum_fini(&abd_sums.abdstat_scatter_data_size);
+	wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
+	for (int i = 0; i < MAX_ORDER; i++)
+		wmsum_fini(&abd_sums.abdstat_scatter_orders[i]);
+	wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk);
+	wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone);
+	wmsum_fini(&abd_sums.abdstat_scatter_page_alloc_retry);
+	wmsum_fini(&abd_sums.abdstat_scatter_sg_table_retry);
+
+	if (abd_cache) {
+		kmem_cache_destroy(abd_cache);
+		abd_cache = NULL;
+	}
+}
+
+void
+abd_free_linear_page(abd_t *abd)
+{
+	/* Transform it back into a scatter ABD for freeing */
+	struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl;
+	abd->abd_flags &= ~ABD_FLAG_LINEAR;
+	abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE;
+	ABD_SCATTER(abd).abd_nents = 1;
+	ABD_SCATTER(abd).abd_offset = 0;
+	ABD_SCATTER(abd).abd_sgl = sg;
+	abd_free_chunks(abd);
+
+	abd_update_scatter_stats(abd, ABDSTAT_DECR);
+}
+
+/*
+ * If we're going to use this ABD for doing I/O using the block layer, the
+ * consumer of the ABD data doesn't care if it's scattered or not, and we don't
+ * plan to store this ABD in memory for a long period of time, we should
+ * allocate the ABD type that requires the least data copying to do the I/O.
+ *
+ * On Linux the optimal thing to do would be to use abd_get_offset() and
+ * construct a new ABD which shares the original pages thereby eliminating
+ * the copy.  But for the moment a new linear ABD is allocated until this
+ * performance optimization can be implemented.
+ */
+abd_t *
+abd_alloc_for_io(size_t size, boolean_t is_metadata)
+{
+	return (abd_alloc(size, is_metadata));
+}
+
+abd_t *
+abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
+    size_t size)
+{
+	int i = 0;
+	struct scatterlist *sg = NULL;
+
+	abd_verify(sabd);
+	ASSERT3U(off, <=, sabd->abd_size);
+
+	size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
+
+	if (abd == NULL)
+		abd = abd_alloc_struct(0);
+
+	/*
+	 * Even if this buf is filesystem metadata, we only track that
+	 * if we own the underlying data buffer, which is not true in
+	 * this case. Therefore, we don't ever use ABD_FLAG_META here.
+	 */
+
+	abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
+		if (new_offset < sg->length)
+			break;
+		new_offset -= sg->length;
+	}
+
+	ABD_SCATTER(abd).abd_sgl = sg;
+	ABD_SCATTER(abd).abd_offset = new_offset;
+	ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;
+
+	return (abd);
+}
+
+/*
+ * Initialize the abd_iter.
+ */
+void
+abd_iter_init(struct abd_iter *aiter, abd_t *abd)
+{
+	ASSERT(!abd_is_gang(abd));
+	abd_verify(abd);
+	aiter->iter_abd = abd;
+	aiter->iter_mapaddr = NULL;
+	aiter->iter_mapsize = 0;
+	aiter->iter_pos = 0;
+	if (abd_is_linear(abd)) {
+		aiter->iter_offset = 0;
+		aiter->iter_sg = NULL;
+	} else {
+		aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
+		aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
+	}
+}
+
+/*
+ * This is just a helper function to see if we have exhausted the
+ * abd_iter and reached the end.
+ */
+boolean_t
+abd_iter_at_end(struct abd_iter *aiter)
+{
+	return (aiter->iter_pos == aiter->iter_abd->abd_size);
+}
+
+/*
+ * Advance the iterator by a certain amount. Cannot be called when a chunk is
+ * in use. This can be safely called when the aiter has already exhausted, in
+ * which case this does nothing.
+ */
+void
+abd_iter_advance(struct abd_iter *aiter, size_t amount)
+{
+	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+	ASSERT0(aiter->iter_mapsize);
+
+	/* There's nothing left to advance to, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	aiter->iter_pos += amount;
+	aiter->iter_offset += amount;
+	if (!abd_is_linear(aiter->iter_abd)) {
+		while (aiter->iter_offset >= aiter->iter_sg->length) {
+			aiter->iter_offset -= aiter->iter_sg->length;
+			aiter->iter_sg = sg_next(aiter->iter_sg);
+			if (aiter->iter_sg == NULL) {
+				ASSERT0(aiter->iter_offset);
+				break;
+			}
+		}
+	}
+}
+
+/*
+ * Map the current chunk into aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+void
+abd_iter_map(struct abd_iter *aiter)
+{
+	void *paddr;
+	size_t offset = 0;
+
+	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+	ASSERT0(aiter->iter_mapsize);
+
+	/* There's nothing left to iterate over, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	if (abd_is_linear(aiter->iter_abd)) {
+		ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
+		offset = aiter->iter_offset;
+		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
+		paddr = ABD_LINEAR_BUF(aiter->iter_abd);
+	} else {
+		offset = aiter->iter_offset;
+		aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
+		    aiter->iter_abd->abd_size - aiter->iter_pos);
+
+		paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg));
+	}
+
+	aiter->iter_mapaddr = (char *)paddr + offset;
+}
+
+/*
+ * Unmap the current chunk from aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+void
+abd_iter_unmap(struct abd_iter *aiter)
+{
+	/* There's nothing left to unmap, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	if (!abd_is_linear(aiter->iter_abd)) {
+		/* LINTED E_FUNC_SET_NOT_USED */
+		zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset);
+	}
+
+	ASSERT3P(aiter->iter_mapaddr, !=, NULL);
+	ASSERT3U(aiter->iter_mapsize, >, 0);
+
+	aiter->iter_mapaddr = NULL;
+	aiter->iter_mapsize = 0;
+}
+
+void
+abd_cache_reap_now(void)
+{
+}
+
+#if defined(_KERNEL)
+/*
+ * bio_nr_pages for ABD.
+ * @off is the offset in @abd
+ */
+unsigned long
+abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
+{
+	unsigned long pos;
+
+	if (abd_is_gang(abd)) {
+		unsigned long count = 0;
+
+		for (abd_t *cabd = abd_gang_get_offset(abd, &off);
+		    cabd != NULL && size != 0;
+		    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
+			ASSERT3U(off, <, cabd->abd_size);
+			int mysize = MIN(size, cabd->abd_size - off);
+			count += abd_nr_pages_off(cabd, mysize, off);
+			size -= mysize;
+			off = 0;
+		}
+		return (count);
+	}
+
+	if (abd_is_linear(abd))
+		pos = (unsigned long)abd_to_buf(abd) + off;
+	else
+		pos = ABD_SCATTER(abd).abd_offset + off;
+
+	return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
+	    (pos >> PAGE_SHIFT));
+}
+
+static unsigned int
+bio_map(struct bio *bio, void *buf_ptr, unsigned int bio_size)
+{
+	unsigned int offset, size, i;
+	struct page *page;
+
+	offset = offset_in_page(buf_ptr);
+	for (i = 0; i < bio->bi_max_vecs; i++) {
+		size = PAGE_SIZE - offset;
+
+		if (bio_size <= 0)
+			break;
+
+		if (size > bio_size)
+			size = bio_size;
+
+		if (is_vmalloc_addr(buf_ptr))
+			page = vmalloc_to_page(buf_ptr);
+		else
+			page = virt_to_page(buf_ptr);
+
+		/*
+		 * Some network related block device uses tcp_sendpage, which
+		 * doesn't behave well when using 0-count page, this is a
+		 * safety net to catch them.
+		 */
+		ASSERT3S(page_count(page), >, 0);
+
+		if (bio_add_page(bio, page, size, offset) != size)
+			break;
+
+		buf_ptr += size;
+		bio_size -= size;
+		offset = 0;
+	}
+
+	return (bio_size);
+}
+
+/*
+ * bio_map for gang ABD.
+ */
+static unsigned int
+abd_gang_bio_map_off(struct bio *bio, abd_t *abd,
+    unsigned int io_size, size_t off)
+{
+	ASSERT(abd_is_gang(abd));
+
+	for (abd_t *cabd = abd_gang_get_offset(abd, &off);
+	    cabd != NULL;
+	    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
+		ASSERT3U(off, <, cabd->abd_size);
+		int size = MIN(io_size, cabd->abd_size - off);
+		int remainder = abd_bio_map_off(bio, cabd, size, off);
+		io_size -= (size - remainder);
+		if (io_size == 0 || remainder > 0)
+			return (io_size);
+		off = 0;
+	}
+	ASSERT0(io_size);
+	return (io_size);
+}
+
+/*
+ * bio_map for ABD.
+ * @off is the offset in @abd
+ * Remaining IO size is returned
+ */
+unsigned int
+abd_bio_map_off(struct bio *bio, abd_t *abd,
+    unsigned int io_size, size_t off)
+{
+	struct abd_iter aiter;
+
+	ASSERT3U(io_size, <=, abd->abd_size - off);
+	if (abd_is_linear(abd))
+		return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, io_size));
+
+	ASSERT(!abd_is_linear(abd));
+	if (abd_is_gang(abd))
+		return (abd_gang_bio_map_off(bio, abd, io_size, off));
+
+	abd_iter_init(&aiter, abd);
+	abd_iter_advance(&aiter, off);
+
+	for (int i = 0; i < bio->bi_max_vecs; i++) {
+		struct page *pg;
+		size_t len, sgoff, pgoff;
+		struct scatterlist *sg;
+
+		if (io_size <= 0)
+			break;
+
+		sg = aiter.iter_sg;
+		sgoff = aiter.iter_offset;
+		pgoff = sgoff & (PAGESIZE - 1);
+		len = MIN(io_size, PAGESIZE - pgoff);
+		ASSERT(len > 0);
+
+		pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);
+		if (bio_add_page(bio, pg, len, pgoff) != len)
+			break;
+
+		io_size -= len;
+		abd_iter_advance(&aiter, len);
+	}
+
+	return (io_size);
+}
+
+/* Tunable Parameters */
+module_param(zfs_abd_scatter_enabled, int, 0644);
+MODULE_PARM_DESC(zfs_abd_scatter_enabled,
+	"Toggle whether ABD allocations must be linear.");
+module_param(zfs_abd_scatter_min_size, int, 0644);
+MODULE_PARM_DESC(zfs_abd_scatter_min_size,
+	"Minimum size of scatter allocations.");
+/* CSTYLED */
+module_param(zfs_abd_scatter_max_order, uint, 0644);
+MODULE_PARM_DESC(zfs_abd_scatter_max_order,
+	"Maximum order allocation used for a scatter ABD.");
+#endif

diff --git a/zfs/module/os/linux/zfs/arc_os.c b/zfs/module/os/linux/zfs/arc_os.c
new file mode 100644
index 0000000..fc76fe0
--- /dev/null
+++ b/zfs/module/os/linux/zfs/arc_os.c

@@ -0,0 +1,545 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/spa_impl.h>
+#include <sys/zio_compress.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <sys/arc.h>
+#include <sys/zfs_refcount.h>
+#include <sys/vdev.h>
+#include <sys/vdev_trim.h>
+#include <sys/vdev_impl.h>
+#include <sys/dsl_pool.h>
+#include <sys/multilist.h>
+#include <sys/abd.h>
+#include <sys/zil.h>
+#include <sys/fm/fs/zfs.h>
+#ifdef _KERNEL
+#include <sys/shrinker.h>
+#include <sys/vmsystm.h>
+#include <sys/zpl.h>
+#include <linux/page_compat.h>
+#include <linux/notifier.h>
+#include <linux/memory.h>
+#endif
+#include <sys/callb.h>
+#include <sys/kstat.h>
+#include <sys/zthr.h>
+#include <zfs_fletcher.h>
+#include <sys/arc_impl.h>
+#include <sys/trace_zfs.h>
+#include <sys/aggsum.h>
+
+/*
+ * This is a limit on how many pages the ARC shrinker makes available for
+ * eviction in response to one page allocation attempt.  Note that in
+ * practice, the kernel's shrinker can ask us to evict up to about 4x this
+ * for one allocation attempt.
+ *
+ * The default limit of 10,000 (in practice, 160MB per allocation attempt
+ * with 4K pages) limits the amount of time spent attempting to reclaim ARC
+ * memory to less than 100ms per allocation attempt, even with a small
+ * average compressed block size of ~8KB.
+ *
+ * See also the comment in arc_shrinker_count().
+ * Set to 0 to disable limit.
+ */
+int zfs_arc_shrinker_limit = 10000;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static struct notifier_block arc_hotplug_callback_mem_nb;
+#endif
+
+/*
+ * Return a default max arc size based on the amount of physical memory.
+ */
+uint64_t
+arc_default_max(uint64_t min, uint64_t allmem)
+{
+	/* Default to 1/2 of all memory. */
+	return (MAX(allmem / 2, min));
+}
+
+#ifdef _KERNEL
+/*
+ * Return maximum amount of memory that we could possibly use.  Reduced
+ * to half of all memory in user space which is primarily used for testing.
+ */
+uint64_t
+arc_all_memory(void)
+{
+#ifdef CONFIG_HIGHMEM
+	return (ptob(zfs_totalram_pages - zfs_totalhigh_pages));
+#else
+	return (ptob(zfs_totalram_pages));
+#endif /* CONFIG_HIGHMEM */
+}
+
+/*
+ * Return the amount of memory that is considered free.  In user space
+ * which is primarily used for testing we pretend that free memory ranges
+ * from 0-20% of all memory.
+ */
+uint64_t
+arc_free_memory(void)
+{
+#ifdef CONFIG_HIGHMEM
+	struct sysinfo si;
+	si_meminfo(&si);
+	return (ptob(si.freeram - si.freehigh));
+#else
+	return (ptob(nr_free_pages() +
+	    nr_inactive_file_pages()));
+#endif /* CONFIG_HIGHMEM */
+}
+
+/*
+ * Return the amount of memory that can be consumed before reclaim will be
+ * needed.  Positive if there is sufficient free memory, negative indicates
+ * the amount of memory that needs to be freed up.
+ */
+int64_t
+arc_available_memory(void)
+{
+	return (arc_free_memory() - arc_sys_free);
+}
+
+static uint64_t
+arc_evictable_memory(void)
+{
+	int64_t asize = aggsum_value(&arc_sums.arcstat_size);
+	uint64_t arc_clean =
+	    zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) +
+	    zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) +
+	    zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) +
+	    zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
+	uint64_t arc_dirty = MAX((int64_t)asize - (int64_t)arc_clean, 0);
+
+	/*
+	 * Scale reported evictable memory in proportion to page cache, cap
+	 * at specified min/max.
+	 */
+	uint64_t min = (ptob(nr_file_pages()) / 100) * zfs_arc_pc_percent;
+	min = MAX(arc_c_min, MIN(arc_c_max, min));
+
+	if (arc_dirty >= min)
+		return (arc_clean);
+
+	return (MAX((int64_t)asize - (int64_t)min, 0));
+}
+
+/*
+ * The _count() function returns the number of free-able objects.
+ * The _scan() function returns the number of objects that were freed.
+ */
+static unsigned long
+arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	/*
+	 * __GFP_FS won't be set if we are called from ZFS code (see
+	 * kmem_flags_convert(), which removes it).  To avoid a deadlock, we
+	 * don't allow evicting in this case.  We return 0 rather than
+	 * SHRINK_STOP so that the shrinker logic doesn't accumulate a
+	 * deficit against us.
+	 */
+	if (!(sc->gfp_mask & __GFP_FS)) {
+		return (0);
+	}
+
+	/*
+	 * This code is reached in the "direct reclaim" case, where the
+	 * kernel (outside ZFS) is trying to allocate a page, and the system
+	 * is low on memory.
+	 *
+	 * The kernel's shrinker code doesn't understand how many pages the
+	 * ARC's callback actually frees, so it may ask the ARC to shrink a
+	 * lot for one page allocation. This is problematic because it may
+	 * take a long time, thus delaying the page allocation, and because
+	 * it may force the ARC to unnecessarily shrink very small.
+	 *
+	 * Therefore, we limit the amount of data that we say is evictable,
+	 * which limits the amount that the shrinker will ask us to evict for
+	 * one page allocation attempt.
+	 *
+	 * In practice, we may be asked to shrink 4x the limit to satisfy one
+	 * page allocation, before the kernel's shrinker code gives up on us.
+	 * When that happens, we rely on the kernel code to find the pages
+	 * that we freed before invoking the OOM killer.  This happens in
+	 * __alloc_pages_slowpath(), which retries and finds the pages we
+	 * freed when it calls get_page_from_freelist().
+	 *
+	 * See also the comment above zfs_arc_shrinker_limit.
+	 */
+	int64_t limit = zfs_arc_shrinker_limit != 0 ?
+	    zfs_arc_shrinker_limit : INT64_MAX;
+	return (MIN(limit, btop((int64_t)arc_evictable_memory())));
+}
+
+static unsigned long
+arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	ASSERT((sc->gfp_mask & __GFP_FS) != 0);
+
+	/* The arc is considered warm once reclaim has occurred */
+	if (unlikely(arc_warm == B_FALSE))
+		arc_warm = B_TRUE;
+
+	/*
+	 * Evict the requested number of pages by reducing arc_c and waiting
+	 * for the requested amount of data to be evicted.
+	 */
+	arc_reduce_target_size(ptob(sc->nr_to_scan));
+	arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
+	if (current->reclaim_state != NULL)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += sc->nr_to_scan;
+#else
+		current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
+#endif
+
+	/*
+	 * We are experiencing memory pressure which the arc_evict_zthr was
+	 * unable to keep up with. Set arc_no_grow to briefly pause arc
+	 * growth to avoid compounding the memory pressure.
+	 */
+	arc_no_grow = B_TRUE;
+
+	/*
+	 * When direct reclaim is observed it usually indicates a rapid
+	 * increase in memory pressure.  This occurs because the kswapd
+	 * threads were unable to asynchronously keep enough free memory
+	 * available.
+	 */
+	if (current_is_kswapd()) {
+		ARCSTAT_BUMP(arcstat_memory_indirect_count);
+	} else {
+		ARCSTAT_BUMP(arcstat_memory_direct_count);
+	}
+
+	return (sc->nr_to_scan);
+}
+
+SPL_SHRINKER_DECLARE(arc_shrinker,
+    arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS);
+
+int
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
+{
+	uint64_t free_memory = arc_free_memory();
+
+	if (free_memory > arc_all_memory() * arc_lotsfree_percent / 100)
+		return (0);
+
+	if (txg > spa->spa_lowmem_last_txg) {
+		spa->spa_lowmem_last_txg = txg;
+		spa->spa_lowmem_page_load = 0;
+	}
+	/*
+	 * If we are in pageout, we know that memory is already tight,
+	 * the arc is already going to be evicting, so we just want to
+	 * continue to let page writes occur as quickly as possible.
+	 */
+	if (current_is_kswapd()) {
+		if (spa->spa_lowmem_page_load >
+		    MAX(arc_sys_free / 4, free_memory) / 4) {
+			DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
+			return (SET_ERROR(ERESTART));
+		}
+		/* Note: reserve is inflated, so we deflate */
+		atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8);
+		return (0);
+	} else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) {
+		/* memory is low, delay before restarting */
+		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
+		DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
+		return (SET_ERROR(EAGAIN));
+	}
+	spa->spa_lowmem_page_load = 0;
+	return (0);
+}
+
+static void
+arc_set_sys_free(uint64_t allmem)
+{
+	/*
+	 * The ARC tries to keep at least this much memory available for the
+	 * system.  This gives the ARC time to shrink in response to memory
+	 * pressure, before running completely out of memory and invoking the
+	 * direct-reclaim ARC shrinker.
+	 *
+	 * This should be more than twice high_wmark_pages(), so that
+	 * arc_wait_for_eviction() will wait until at least the
+	 * high_wmark_pages() are free (see arc_evict_state_impl()).
+	 *
+	 * Note: Even when the system is very low on memory, the kernel's
+	 * shrinker code may only ask for one "batch" of pages (512KB) to be
+	 * evicted.  If concurrent allocations consume these pages, there may
+	 * still be insufficient free pages, and the OOM killer takes action.
+	 *
+	 * By setting arc_sys_free large enough, and having
+	 * arc_wait_for_eviction() wait until there is at least arc_sys_free/2
+	 * free memory, it is much less likely that concurrent allocations can
+	 * consume all the memory that was evicted before checking for
+	 * OOM.
+	 *
+	 * It's hard to iterate the zones from a linux kernel module, which
+	 * makes it difficult to determine the watermark dynamically. Instead
+	 * we compute the maximum high watermark for this system, based
+	 * on the amount of memory, assuming default parameters on Linux kernel
+	 * 5.3.
+	 */
+
+	/*
+	 * Base wmark_low is 4 * the square root of Kbytes of RAM.
+	 */
+	long wmark = 4 * int_sqrt(allmem/1024) * 1024;
+
+	/*
+	 * Clamp to between 128K and 64MB.
+	 */
+	wmark = MAX(wmark, 128 * 1024);
+	wmark = MIN(wmark, 64 * 1024 * 1024);
+
+	/*
+	 * watermark_boost can increase the wmark by up to 150%.
+	 */
+	wmark += wmark * 150 / 100;
+
+	/*
+	 * arc_sys_free needs to be more than 2x the watermark, because
+	 * arc_wait_for_eviction() waits for half of arc_sys_free.  Bump this up
+	 * to 3x to ensure we're above it.
+	 */
+	arc_sys_free = wmark * 3 + allmem / 32;
+}
+
+void
+arc_lowmem_init(void)
+{
+	uint64_t allmem = arc_all_memory();
+
+	/*
+	 * Register a shrinker to support synchronous (direct) memory
+	 * reclaim from the arc.  This is done to prevent kswapd from
+	 * swapping out pages when it is preferable to shrink the arc.
+	 */
+	spl_register_shrinker(&arc_shrinker);
+	arc_set_sys_free(allmem);
+}
+
+void
+arc_lowmem_fini(void)
+{
+	spl_unregister_shrinker(&arc_shrinker);
+}
+
+int
+param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
+{
+	int error;
+
+	error = param_set_long(buf, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	arc_tuning_update(B_TRUE);
+
+	return (0);
+}
+
+int
+param_set_arc_min(const char *buf, zfs_kernel_param_t *kp)
+{
+	return (param_set_arc_long(buf, kp));
+}
+
+int
+param_set_arc_max(const char *buf, zfs_kernel_param_t *kp)
+{
+	return (param_set_arc_long(buf, kp));
+}
+
+int
+param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)
+{
+	int error;
+
+	error = param_set_int(buf, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	arc_tuning_update(B_TRUE);
+
+	return (0);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/* ARGSUSED */
+static int
+arc_hotplug_callback(struct notifier_block *self, unsigned long action,
+    void *arg)
+{
+	uint64_t allmem = arc_all_memory();
+	if (action != MEM_ONLINE)
+		return (NOTIFY_OK);
+
+	arc_set_limits(allmem);
+
+#ifdef __LP64__
+	if (zfs_dirty_data_max_max == 0)
+		zfs_dirty_data_max_max = MIN(4ULL * 1024 * 1024 * 1024,
+		    allmem * zfs_dirty_data_max_max_percent / 100);
+#else
+	if (zfs_dirty_data_max_max == 0)
+		zfs_dirty_data_max_max = MIN(1ULL * 1024 * 1024 * 1024,
+		    allmem * zfs_dirty_data_max_max_percent / 100);
+#endif
+
+	arc_set_sys_free(allmem);
+	return (NOTIFY_OK);
+}
+#endif
+
+void
+arc_register_hotplug(void)
+{
+#ifdef CONFIG_MEMORY_HOTPLUG
+	arc_hotplug_callback_mem_nb.notifier_call = arc_hotplug_callback;
+	/* There is no significance to the value 100 */
+	arc_hotplug_callback_mem_nb.priority = 100;
+	register_memory_notifier(&arc_hotplug_callback_mem_nb);
+#endif
+}
+
+void
+arc_unregister_hotplug(void)
+{
+#ifdef CONFIG_MEMORY_HOTPLUG
+	unregister_memory_notifier(&arc_hotplug_callback_mem_nb);
+#endif
+}
+#else /* _KERNEL */
+int64_t
+arc_available_memory(void)
+{
+	int64_t lowest = INT64_MAX;
+
+	/* Every 100 calls, free a small amount */
+	if (random_in_range(100) == 0)
+		lowest = -1024;
+
+	return (lowest);
+}
+
+int
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
+{
+	return (0);
+}
+
+uint64_t
+arc_all_memory(void)
+{
+	return (ptob(physmem) / 2);
+}
+
+uint64_t
+arc_free_memory(void)
+{
+	return (random_in_range(arc_all_memory() * 20 / 100));
+}
+
+void
+arc_register_hotplug(void)
+{
+}
+
+void
+arc_unregister_hotplug(void)
+{
+}
+#endif /* _KERNEL */
+
+/*
+ * Helper function for arc_prune_async() it is responsible for safely
+ * handling the execution of a registered arc_prune_func_t.
+ */
+static void
+arc_prune_task(void *ptr)
+{
+	arc_prune_t *ap = (arc_prune_t *)ptr;
+	arc_prune_func_t *func = ap->p_pfunc;
+
+	if (func != NULL)
+		func(ap->p_adjust, ap->p_private);
+
+	zfs_refcount_remove(&ap->p_refcnt, func);
+}
+
+/*
+ * Notify registered consumers they must drop holds on a portion of the ARC
+ * buffered they reference.  This provides a mechanism to ensure the ARC can
+ * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers.  This
+ * is analogous to dnlc_reduce_cache() but more generic.
+ *
+ * This operation is performed asynchronously so it may be safely called
+ * in the context of the arc_reclaim_thread().  A reference is taken here
+ * for each registered arc_prune_t and the arc_prune_task() is responsible
+ * for releasing it once the registered arc_prune_func_t has completed.
+ */
+void
+arc_prune_async(int64_t adjust)
+{
+	arc_prune_t *ap;
+
+	mutex_enter(&arc_prune_mtx);
+	for (ap = list_head(&arc_prune_list); ap != NULL;
+	    ap = list_next(&arc_prune_list, ap)) {
+
+		if (zfs_refcount_count(&ap->p_refcnt) >= 2)
+			continue;
+
+		zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
+		ap->p_adjust = adjust;
+		if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
+		    ap, TQ_SLEEP) == TASKQID_INVALID) {
+			zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
+			continue;
+		}
+		ARCSTAT_BUMP(arcstat_prune);
+	}
+	mutex_exit(&arc_prune_mtx);
+}
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
+	"Limit on number of pages that ARC shrinker can reclaim at once");
+/* END CSTYLED */

diff --git a/zfs/module/os/linux/zfs/mmp_os.c b/zfs/module/os/linux/zfs/mmp_os.c
new file mode 100644
index 0000000..ff3ef1b
--- /dev/null
+++ b/zfs/module/os/linux/zfs/mmp_os.c

@@ -0,0 +1,41 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/mmp.h>
+
+int
+param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+
+	ret = param_set_ulong(val, kp);
+	if (ret < 0)
+		return (ret);
+
+	if (spa_mode_global != SPA_MODE_UNINIT)
+		mmp_signal_all_threads();
+
+	return (ret);
+}

diff --git a/zfs/module/os/linux/zfs/policy.c b/zfs/module/os/linux/zfs/policy.c
new file mode 100644
index 0000000..8d508bc
--- /dev/null
+++ b/zfs/module/os/linux/zfs/policy.c

@@ -0,0 +1,375 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright (C) 2016 Lawrence Livermore National Security, LLC.
+ *
+ * For Linux the vast majority of this enforcement is already handled via
+ * the standard Linux VFS permission checks.  However certain administrative
+ * commands which bypass the standard mechanisms may need to make use of
+ * this functionality.
+ */
+
+#include <sys/policy.h>
+#include <linux/security.h>
+#include <linux/vfs_compat.h>
+
+/*
+ * The passed credentials cannot be directly verified because Linux only
+ * provides and interface to check the *current* process credentials.  In
+ * order to handle this the capable() test is only run when the passed
+ * credentials match the current process credentials or the kcred.  In
+ * all other cases this function must fail and return the passed err.
+ */
+static int
+priv_policy_ns(const cred_t *cr, int capability, int err,
+    struct user_namespace *ns)
+{
+	if (cr != CRED() && (cr != kcred))
+		return (err);
+
+#if defined(CONFIG_USER_NS)
+	if (!(ns ? ns_capable(ns, capability) : capable(capability)))
+#else
+	if (!capable(capability))
+#endif
+		return (err);
+
+	return (0);
+}
+
+static int
+priv_policy(const cred_t *cr, int capability, int err)
+{
+	return (priv_policy_ns(cr, capability, err, NULL));
+}
+
+static int
+priv_policy_user(const cred_t *cr, int capability, int err)
+{
+	/*
+	 * All priv_policy_user checks are preceded by kuid/kgid_has_mapping()
+	 * checks. If we cannot do them, we shouldn't be using ns_capable()
+	 * since we don't know whether the affected files are valid in our
+	 * namespace.
+	 */
+#if defined(CONFIG_USER_NS)
+	return (priv_policy_ns(cr, capability, err, cr->user_ns));
+#else
+	return (priv_policy_ns(cr, capability, err, NULL));
+#endif
+}
+
+/*
+ * Checks for operations that are either client-only or are used by
+ * both clients and servers.
+ */
+int
+secpolicy_nfs(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, EPERM));
+}
+
+/*
+ * Catch all system configuration.
+ */
+int
+secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, EPERM));
+}
+
+/*
+ * Like secpolicy_vnode_access() but we get the actual wanted mode and the
+ * current mode of the file, not the missing bits.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner,
+    mode_t curmode, mode_t wantmode)
+{
+	return (0);
+}
+
+/*
+ * This is a special routine for ZFS; it is used to determine whether
+ * any of the privileges in effect allow any form of access to the
+ * file.  There's no reason to audit this or any reason to record
+ * this.  More work is needed to do the "KPLD" stuff.
+ */
+int
+secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
+{
+	if (crgetuid(cr) == owner)
+		return (0);
+
+	if (zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	if (priv_policy_user(cr, CAP_DAC_OVERRIDE, EPERM) == 0)
+		return (0);
+
+	if (priv_policy_user(cr, CAP_DAC_READ_SEARCH, EPERM) == 0)
+		return (0);
+
+	return (EPERM);
+}
+
+/*
+ * Determine if subject can chown owner of a file.
+ */
+int
+secpolicy_vnode_chown(const cred_t *cr, uid_t owner)
+{
+	if (crgetuid(cr) == owner)
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	return (priv_policy_user(cr, CAP_FOWNER, EPERM));
+}
+
+/*
+ * Determine if subject can change group ownership of a file.
+ */
+int
+secpolicy_vnode_create_gid(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SETGID, EPERM));
+}
+
+/*
+ * Policy determines whether we can remove an entry from a directory,
+ * regardless of permission bits.
+ */
+int
+secpolicy_vnode_remove(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_FOWNER, EPERM));
+}
+
+/*
+ * Determine that subject can modify the mode of a file.  allzone privilege
+ * needed when modifying root owned object.
+ */
+int
+secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
+{
+	if (crgetuid(cr) == owner)
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	return (priv_policy_user(cr, CAP_FOWNER, EPERM));
+}
+
+/*
+ * Are we allowed to retain the set-uid/set-gid bits when
+ * changing ownership or when writing to a file?
+ * "issuid" should be true when set-uid; only in that case
+ * root ownership is checked (setgid is assumed).
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_setid_retain(struct znode *zp __maybe_unused, const cred_t *cr,
+    boolean_t issuidroot)
+{
+	return (priv_policy_user(cr, CAP_FSETID, EPERM));
+}
+
+/*
+ * Determine that subject can set the file setgid flag.
+ */
+int
+secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
+{
+#if defined(CONFIG_USER_NS)
+	if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
+		return (EPERM);
+#endif
+	if (crgetgid(cr) != gid && !groupmember(gid, cr))
+		return (priv_policy_user(cr, CAP_FSETID, EPERM));
+
+	return (0);
+}
+
+/*
+ * Determine if the subject can inject faults in the ZFS fault injection
+ * framework.  Requires all privileges.
+ */
+int
+secpolicy_zinject(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, EACCES));
+}
+
+/*
+ * Determine if the subject has permission to manipulate ZFS datasets
+ * (not pools).  Equivalent to the SYS_MOUNT privilege.
+ */
+int
+secpolicy_zfs(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, EACCES));
+}
+
+/*
+ * Equivalent to secpolicy_zfs(), but works even if the cred_t is not that of
+ * the current process.  Takes both cred_t and proc_t so that this can work
+ * easily on all platforms.
+ *
+ * The has_capability() function was first exported in the 4.10 Linux kernel
+ * then backported to some LTS kernels.  Prior to this change there was no
+ * mechanism to perform this check therefore EACCES is returned when the
+ * functionality is not present in the kernel.
+ */
+int
+secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
+{
+#if defined(HAVE_HAS_CAPABILITY)
+	if (!has_capability(proc, CAP_SYS_ADMIN))
+		return (EACCES);
+	return (0);
+#else
+	return (EACCES);
+#endif
+}
+
+void
+secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
+{
+	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
+	    secpolicy_vnode_setid_retain(NULL, cr,
+	    (vap->va_mode & S_ISUID) != 0 &&
+	    (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
+		vap->va_mask |= AT_MODE;
+		vap->va_mode &= ~(S_ISUID|S_ISGID);
+	}
+}
+
+/*
+ * Determine that subject can set the file setid flags.
+ */
+static int
+secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
+{
+	if (crgetuid(cr) == owner)
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	return (priv_policy_user(cr, CAP_FSETID, EPERM));
+}
+
+/*
+ * Determine that subject can make a file a "sticky".
+ *
+ * Enforced in the Linux VFS.
+ */
+static int
+secpolicy_vnode_stky_modify(const cred_t *cr)
+{
+	return (0);
+}
+
+int
+secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
+    const vattr_t *ovap, cred_t *cr)
+{
+	int error;
+
+	if ((vap->va_mode & S_ISUID) != 0 &&
+	    (error = secpolicy_vnode_setid_modify(cr,
+	    ovap->va_uid)) != 0) {
+		return (error);
+	}
+
+	/*
+	 * Check privilege if attempting to set the
+	 * sticky bit on a non-directory.
+	 */
+	if (!S_ISDIR(ip->i_mode) && (vap->va_mode & S_ISVTX) != 0 &&
+	    secpolicy_vnode_stky_modify(cr) != 0) {
+		vap->va_mode &= ~S_ISVTX;
+	}
+
+	/*
+	 * Check for privilege if attempting to set the
+	 * group-id bit.
+	 */
+	if ((vap->va_mode & S_ISGID) != 0 &&
+	    secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
+		vap->va_mode &= ~S_ISGID;
+	}
+
+	return (0);
+}
+
+/*
+ * Check privileges for setting xvattr attributes
+ */
+int
+secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, mode_t type)
+{
+	return (secpolicy_vnode_chown(cr, owner));
+}
+
+/*
+ * Check privileges for setattr attributes.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_setattr(cred_t *cr, struct inode *ip, struct vattr *vap,
+    const struct vattr *ovap, int flags,
+    int unlocked_access(void *, int, cred_t *), void *node)
+{
+	return (0);
+}
+
+/*
+ * Check privileges for links.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_basic_link(const cred_t *cr)
+{
+	return (0);
+}

diff --git a/zfs/module/os/linux/zfs/qat.c b/zfs/module/os/linux/zfs/qat.c
new file mode 100644
index 0000000..08613b3
--- /dev/null
+++ b/zfs/module/os/linux/zfs/qat.c

@@ -0,0 +1,105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <sys/zfs_context.h>
+#include <sys/qat.h>
+
+qat_stats_t qat_stats = {
+	{ "comp_requests",			KSTAT_DATA_UINT64 },
+	{ "comp_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "comp_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "decomp_requests",			KSTAT_DATA_UINT64 },
+	{ "decomp_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "decomp_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "dc_fails",				KSTAT_DATA_UINT64 },
+	{ "encrypt_requests",			KSTAT_DATA_UINT64 },
+	{ "encrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "encrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "decrypt_requests",			KSTAT_DATA_UINT64 },
+	{ "decrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "decrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "crypt_fails",			KSTAT_DATA_UINT64 },
+	{ "cksum_requests",			KSTAT_DATA_UINT64 },
+	{ "cksum_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "cksum_fails",			KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *qat_ksp = NULL;
+
+CpaStatus
+qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
+{
+	*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
+	if (*pp_mem_addr == NULL)
+		return (CPA_STATUS_RESOURCE);
+	return (CPA_STATUS_SUCCESS);
+}
+
+void
+qat_mem_free_contig(void **pp_mem_addr)
+{
+	if (*pp_mem_addr != NULL) {
+		kfree(*pp_mem_addr);
+		*pp_mem_addr = NULL;
+	}
+}
+
+int
+qat_init(void)
+{
+	qat_ksp = kstat_create("zfs", 0, "qat", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (qat_ksp != NULL) {
+		qat_ksp->ks_data = &qat_stats;
+		kstat_install(qat_ksp);
+	}
+
+	/*
+	 * Just set the disable flag when qat init failed, qat can be
+	 * turned on again in post-process after zfs module is loaded, e.g.:
+	 * echo 0 > /sys/module/zfs/parameters/zfs_qat_compress_disable
+	 */
+	if (qat_dc_init() != 0)
+		zfs_qat_compress_disable = 1;
+
+	if (qat_cy_init() != 0) {
+		zfs_qat_checksum_disable = 1;
+		zfs_qat_encrypt_disable = 1;
+	}
+
+	return (0);
+}
+
+void
+qat_fini(void)
+{
+	if (qat_ksp != NULL) {
+		kstat_delete(qat_ksp);
+		qat_ksp = NULL;
+	}
+
+	qat_cy_fini();
+	qat_dc_fini();
+}
+
+#endif

diff --git a/zfs/module/os/linux/zfs/qat_compress.c b/zfs/module/os/linux/zfs/qat_compress.c
new file mode 100644
index 0000000..64e19e0
--- /dev/null
+++ b/zfs/module/os/linux/zfs/qat_compress.c

@@ -0,0 +1,550 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/completion.h>
+#include <sys/zfs_context.h>
+#include <sys/byteorder.h>
+#include <sys/zio.h>
+#include <sys/qat.h>
+
+/*
+ * Max instances in a QAT device, each instance is a channel to submit
+ * jobs to QAT hardware, this is only for pre-allocating instance and
+ * session arrays; the actual number of instances are defined in the
+ * QAT driver's configuration file.
+ */
+#define	QAT_DC_MAX_INSTANCES	48
+
+/*
+ * ZLIB head and foot size
+ */
+#define	ZLIB_HEAD_SZ		2
+#define	ZLIB_FOOT_SZ		4
+
+static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
+static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
+static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
+static Cpa16U num_inst = 0;
+static Cpa32U inst_num = 0;
+static boolean_t qat_dc_init_done = B_FALSE;
+int zfs_qat_compress_disable = 0;
+
+boolean_t
+qat_dc_use_accel(size_t s_len)
+{
+	return (!zfs_qat_compress_disable &&
+	    qat_dc_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+static void
+qat_dc_callback(void *p_callback, CpaStatus status)
+{
+	if (p_callback != NULL)
+		complete((struct completion *)p_callback);
+}
+
+static void
+qat_dc_clean(void)
+{
+	Cpa16U buff_num = 0;
+	Cpa16U num_inter_buff_lists = 0;
+
+	for (Cpa16U i = 0; i < num_inst; i++) {
+		cpaDcStopInstance(dc_inst_handles[i]);
+		QAT_PHYS_CONTIG_FREE(session_handles[i]);
+		/* free intermediate buffers  */
+		if (buffer_array[i] != NULL) {
+			cpaDcGetNumIntermediateBuffers(
+			    dc_inst_handles[i], &num_inter_buff_lists);
+			for (buff_num = 0; buff_num < num_inter_buff_lists;
+			    buff_num++) {
+				CpaBufferList *buffer_inter =
+				    buffer_array[i][buff_num];
+				if (buffer_inter->pBuffers) {
+					QAT_PHYS_CONTIG_FREE(
+					    buffer_inter->pBuffers->pData);
+					QAT_PHYS_CONTIG_FREE(
+					    buffer_inter->pBuffers);
+				}
+				QAT_PHYS_CONTIG_FREE(
+				    buffer_inter->pPrivateMetaData);
+				QAT_PHYS_CONTIG_FREE(buffer_inter);
+			}
+		}
+	}
+
+	num_inst = 0;
+	qat_dc_init_done = B_FALSE;
+}
+
+int
+qat_dc_init(void)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U sess_size = 0;
+	Cpa32U ctx_size = 0;
+	Cpa16U num_inter_buff_lists = 0;
+	Cpa16U buff_num = 0;
+	Cpa32U buff_meta_size = 0;
+	CpaDcSessionSetupData sd = {0};
+
+	if (qat_dc_init_done)
+		return (0);
+
+	status = cpaDcGetNumInstances(&num_inst);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	/* if the user has configured no QAT compression units just return */
+	if (num_inst == 0)
+		return (0);
+
+	if (num_inst > QAT_DC_MAX_INSTANCES)
+		num_inst = QAT_DC_MAX_INSTANCES;
+
+	status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	for (Cpa16U i = 0; i < num_inst; i++) {
+		cpaDcSetAddressTranslation(dc_inst_handles[i],
+		    (void*)virt_to_phys);
+
+		status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
+		    1, &buff_meta_size);
+
+		if (status == CPA_STATUS_SUCCESS)
+			status = cpaDcGetNumIntermediateBuffers(
+			    dc_inst_handles[i], &num_inter_buff_lists);
+
+		if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
+			status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
+			    num_inter_buff_lists *
+			    sizeof (CpaBufferList *));
+
+		for (buff_num = 0; buff_num < num_inter_buff_lists;
+		    buff_num++) {
+			if (status == CPA_STATUS_SUCCESS)
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num],
+				    sizeof (CpaBufferList));
+
+			if (status == CPA_STATUS_SUCCESS)
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num]->
+				    pPrivateMetaData,
+				    buff_meta_size);
+
+			if (status == CPA_STATUS_SUCCESS)
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num]->pBuffers,
+				    sizeof (CpaFlatBuffer));
+
+			if (status == CPA_STATUS_SUCCESS) {
+				/*
+				 *  implementation requires an intermediate
+				 *  buffer approximately twice the size of
+				 *  output buffer, which is 2x max buffer
+				 *  size here.
+				 */
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num]->pBuffers->
+				    pData, 2 * QAT_MAX_BUF_SIZE);
+				if (status != CPA_STATUS_SUCCESS)
+					goto fail;
+
+				buffer_array[i][buff_num]->numBuffers = 1;
+				buffer_array[i][buff_num]->pBuffers->
+				    dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
+			}
+		}
+
+		status = cpaDcStartInstance(dc_inst_handles[i],
+		    num_inter_buff_lists, buffer_array[i]);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+
+		sd.compLevel = CPA_DC_L1;
+		sd.compType = CPA_DC_DEFLATE;
+		sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
+		sd.sessDirection = CPA_DC_DIR_COMBINED;
+		sd.sessState = CPA_DC_STATELESS;
+		sd.deflateWindowSize = 7;
+		sd.checksum = CPA_DC_ADLER32;
+		status = cpaDcGetSessionSize(dc_inst_handles[i],
+		    &sd, &sess_size, &ctx_size);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+
+		QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
+		if (session_handles[i] == NULL)
+			goto fail;
+
+		status = cpaDcInitSession(dc_inst_handles[i],
+		    session_handles[i],
+		    &sd, NULL, qat_dc_callback);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+	}
+
+	qat_dc_init_done = B_TRUE;
+	return (0);
+fail:
+	qat_dc_clean();
+	return (-1);
+}
+
+void
+qat_dc_fini(void)
+{
+	if (!qat_dc_init_done)
+		return;
+
+	qat_dc_clean();
+}
+
+/*
+ * The "add" parameter is an additional buffer which is passed
+ * to QAT as a scratch buffer alongside the destination buffer
+ * in case the "compressed" data ends up being larger than the
+ * original source data. This is necessary to prevent QAT from
+ * generating buffer overflow warnings for incompressible data.
+ */
+static int
+qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
+    char *dst, int dst_len, char *add, int add_len, size_t *c_len)
+{
+	CpaInstanceHandle dc_inst_handle;
+	CpaDcSessionHandle session_handle;
+	CpaBufferList *buf_list_src = NULL;
+	CpaBufferList *buf_list_dst = NULL;
+	CpaFlatBuffer *flat_buf_src = NULL;
+	CpaFlatBuffer *flat_buf_dst = NULL;
+	Cpa8U *buffer_meta_src = NULL;
+	Cpa8U *buffer_meta_dst = NULL;
+	Cpa32U buffer_meta_size = 0;
+	CpaDcRqResults dc_results = {.checksum = 1};
+	CpaStatus status = CPA_STATUS_FAIL;
+	Cpa32U hdr_sz = 0;
+	Cpa32U compressed_sz;
+	Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
+	Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
+	Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
+	Cpa32U bytes_left;
+	Cpa32U dst_pages = 0;
+	Cpa32U adler32 = 0;
+	char *data;
+	struct page *page;
+	struct page **in_pages = NULL;
+	struct page **out_pages = NULL;
+	struct page **add_pages = NULL;
+	Cpa32U page_off = 0;
+	struct completion complete;
+	Cpa32U page_num = 0;
+	Cpa16U i;
+
+	/*
+	 * We increment num_src_buf and num_dst_buf by 2 to allow
+	 * us to handle non page-aligned buffer addresses and buffers
+	 * whose sizes are not divisible by PAGE_SIZE.
+	 */
+	Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
+	    (num_src_buf * sizeof (CpaFlatBuffer));
+	Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
+	    ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
+
+	status = QAT_PHYS_CONTIG_ALLOC(&in_pages,
+	    num_src_buf * sizeof (struct page *));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&out_pages,
+	    num_dst_buf * sizeof (struct page *));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&add_pages,
+	    num_add_buf * sizeof (struct page *));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
+	dc_inst_handle = dc_inst_handles[i];
+	session_handle = session_handles[i];
+
+	cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
+	    &buffer_meta_size);
+	status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
+	    &buffer_meta_size);
+	status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	/* build source buffer list */
+	status = QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
+
+	buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
+
+	/* build destination buffer list */
+	status = QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
+
+	buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
+
+	buf_list_src->numBuffers = 0;
+	buf_list_src->pPrivateMetaData = buffer_meta_src;
+	bytes_left = src_len;
+	data = src;
+	page_num = 0;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		page = qat_mem_to_page(data);
+		in_pages[page_num] = page;
+		flat_buf_src->pData = kmap(page) + page_off;
+		flat_buf_src->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+
+		bytes_left -= flat_buf_src->dataLenInBytes;
+		data += flat_buf_src->dataLenInBytes;
+		flat_buf_src++;
+		buf_list_src->numBuffers++;
+		page_num++;
+	}
+
+	buf_list_dst->numBuffers = 0;
+	buf_list_dst->pPrivateMetaData = buffer_meta_dst;
+	bytes_left = dst_len;
+	data = dst;
+	page_num = 0;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		page = qat_mem_to_page(data);
+		flat_buf_dst->pData = kmap(page) + page_off;
+		out_pages[page_num] = page;
+		flat_buf_dst->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+
+		bytes_left -= flat_buf_dst->dataLenInBytes;
+		data += flat_buf_dst->dataLenInBytes;
+		flat_buf_dst++;
+		buf_list_dst->numBuffers++;
+		page_num++;
+		dst_pages++;
+	}
+
+	/* map additional scratch pages into the destination buffer list */
+	bytes_left = add_len;
+	data = add;
+	page_num = 0;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		page = qat_mem_to_page(data);
+		flat_buf_dst->pData = kmap(page) + page_off;
+		add_pages[page_num] = page;
+		flat_buf_dst->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+
+		bytes_left -= flat_buf_dst->dataLenInBytes;
+		data += flat_buf_dst->dataLenInBytes;
+		flat_buf_dst++;
+		buf_list_dst->numBuffers++;
+		page_num++;
+	}
+
+	init_completion(&complete);
+
+	if (dir == QAT_COMPRESS) {
+		QAT_STAT_BUMP(comp_requests);
+		QAT_STAT_INCR(comp_total_in_bytes, src_len);
+
+		cpaDcGenerateHeader(session_handle,
+		    buf_list_dst->pBuffers, &hdr_sz);
+		buf_list_dst->pBuffers->pData += hdr_sz;
+		buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
+		status = cpaDcCompressData(
+		    dc_inst_handle, session_handle,
+		    buf_list_src, buf_list_dst,
+		    &dc_results, CPA_DC_FLUSH_FINAL,
+		    &complete);
+		if (status != CPA_STATUS_SUCCESS) {
+			goto fail;
+		}
+
+		/* we now wait until the completion of the operation. */
+		wait_for_completion(&complete);
+
+		if (dc_results.status != CPA_STATUS_SUCCESS) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+
+		compressed_sz = dc_results.produced;
+		if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
+			status = CPA_STATUS_INCOMPRESSIBLE;
+			goto fail;
+		}
+
+		/* get adler32 checksum and append footer */
+		*(Cpa32U*)(dst + hdr_sz + compressed_sz) =
+		    BSWAP_32(dc_results.checksum);
+
+		*c_len = hdr_sz + compressed_sz + ZLIB_FOOT_SZ;
+		QAT_STAT_INCR(comp_total_out_bytes, *c_len);
+	} else {
+		ASSERT3U(dir, ==, QAT_DECOMPRESS);
+		QAT_STAT_BUMP(decomp_requests);
+		QAT_STAT_INCR(decomp_total_in_bytes, src_len);
+
+		buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
+		buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
+		status = cpaDcDecompressData(dc_inst_handle, session_handle,
+		    buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
+		    &complete);
+
+		if (CPA_STATUS_SUCCESS != status) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+
+		/* we now wait until the completion of the operation. */
+		wait_for_completion(&complete);
+
+		if (dc_results.status != CPA_STATUS_SUCCESS) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+
+		/* verify adler checksum */
+		adler32 = *(Cpa32U *)(src + dc_results.consumed + ZLIB_HEAD_SZ);
+		if (adler32 != BSWAP_32(dc_results.checksum)) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+		*c_len = dc_results.produced;
+		QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
+	}
+
+fail:
+	if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
+		QAT_STAT_BUMP(dc_fails);
+
+	if (in_pages) {
+		for (page_num = 0;
+		    page_num < buf_list_src->numBuffers;
+		    page_num++) {
+			kunmap(in_pages[page_num]);
+		}
+		QAT_PHYS_CONTIG_FREE(in_pages);
+	}
+
+	if (out_pages) {
+		for (page_num = 0; page_num < dst_pages; page_num++) {
+			kunmap(out_pages[page_num]);
+		}
+		QAT_PHYS_CONTIG_FREE(out_pages);
+	}
+
+	if (add_pages) {
+		for (page_num = 0;
+		    page_num < buf_list_dst->numBuffers - dst_pages;
+		    page_num++) {
+			kunmap(add_pages[page_num]);
+		}
+		QAT_PHYS_CONTIG_FREE(add_pages);
+	}
+
+	QAT_PHYS_CONTIG_FREE(buffer_meta_src);
+	QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
+	QAT_PHYS_CONTIG_FREE(buf_list_src);
+	QAT_PHYS_CONTIG_FREE(buf_list_dst);
+
+	return (status);
+}
+
+/*
+ * Entry point for QAT accelerated compression / decompression.
+ */
+int
+qat_compress(qat_compress_dir_t dir, char *src, int src_len,
+    char *dst, int dst_len, size_t *c_len)
+{
+	int ret;
+	size_t add_len = 0;
+	void *add = NULL;
+
+	if (dir == QAT_COMPRESS) {
+		add_len = dst_len;
+		add = zio_data_buf_alloc(add_len);
+	}
+
+	ret = qat_compress_impl(dir, src, src_len, dst,
+	    dst_len, add, add_len, c_len);
+
+	if (dir == QAT_COMPRESS)
+		zio_data_buf_free(add, add_len);
+
+	return (ret);
+}
+
+static int
+param_set_qat_compress(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+	int *pvalue = kp->arg;
+	ret = param_set_int(val, kp);
+	if (ret)
+		return (ret);
+	/*
+	 * zfs_qat_compress_disable = 0: enable qat compress
+	 * try to initialize qat instance if it has not been done
+	 */
+	if (*pvalue == 0 && !qat_dc_init_done) {
+		ret = qat_dc_init();
+		if (ret != 0) {
+			zfs_qat_compress_disable = 1;
+			return (ret);
+		}
+	}
+	return (ret);
+}
+
+module_param_call(zfs_qat_compress_disable, param_set_qat_compress,
+    param_get_int, &zfs_qat_compress_disable, 0644);
+MODULE_PARM_DESC(zfs_qat_compress_disable, "Enable/Disable QAT compression");
+
+#endif

diff --git a/zfs/module/os/linux/zfs/qat_crypt.c b/zfs/module/os/linux/zfs/qat_crypt.c
new file mode 100644
index 0000000..4771b2f
--- /dev/null
+++ b/zfs/module/os/linux/zfs/qat_crypt.c

@@ -0,0 +1,630 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * This file represents the QAT implementation of checksums and encryption.
+ * Internally, QAT shares the same cryptographic instances for both of these
+ * operations, so the code has been combined here. QAT data compression uses
+ * compression instances, so that code is separated into qat_compress.c
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/completion.h>
+#include <sys/zfs_context.h>
+#include <sys/zio_crypt.h>
+#include "lac/cpa_cy_im.h"
+#include "lac/cpa_cy_common.h"
+#include <sys/qat.h>
+
+/*
+ * Max instances in a QAT device, each instance is a channel to submit
+ * jobs to QAT hardware, this is only for pre-allocating instances
+ * and session arrays; the actual number of instances are defined in
+ * the QAT driver's configure file.
+ */
+#define	QAT_CRYPT_MAX_INSTANCES		48
+
+#define	MAX_PAGE_NUM			1024
+
+static Cpa32U inst_num = 0;
+static Cpa16U num_inst = 0;
+static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
+static boolean_t qat_cy_init_done = B_FALSE;
+int zfs_qat_encrypt_disable = 0;
+int zfs_qat_checksum_disable = 0;
+
+typedef struct cy_callback {
+	CpaBoolean verify_result;
+	struct completion complete;
+} cy_callback_t;
+
+static void
+symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
+    void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify)
+{
+	cy_callback_t *cb = p_callback;
+
+	if (cb != NULL) {
+		/* indicate that the function has been called */
+		cb->verify_result = verify;
+		complete(&cb->complete);
+	}
+}
+
+boolean_t
+qat_crypt_use_accel(size_t s_len)
+{
+	return (!zfs_qat_encrypt_disable &&
+	    qat_cy_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+boolean_t
+qat_checksum_use_accel(size_t s_len)
+{
+	return (!zfs_qat_checksum_disable &&
+	    qat_cy_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+void
+qat_cy_clean(void)
+{
+	for (Cpa16U i = 0; i < num_inst; i++)
+		cpaCyStopInstance(cy_inst_handles[i]);
+
+	num_inst = 0;
+	qat_cy_init_done = B_FALSE;
+}
+
+int
+qat_cy_init(void)
+{
+	CpaStatus status = CPA_STATUS_FAIL;
+
+	if (qat_cy_init_done)
+		return (0);
+
+	status = cpaCyGetNumInstances(&num_inst);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	/* if the user has configured no QAT encryption units just return */
+	if (num_inst == 0)
+		return (0);
+
+	if (num_inst > QAT_CRYPT_MAX_INSTANCES)
+		num_inst = QAT_CRYPT_MAX_INSTANCES;
+
+	status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	for (Cpa16U i = 0; i < num_inst; i++) {
+		status = cpaCySetAddressTranslation(cy_inst_handles[i],
+		    (void *)virt_to_phys);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+
+		status = cpaCyStartInstance(cy_inst_handles[i]);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+	}
+
+	qat_cy_init_done = B_TRUE;
+	return (0);
+
+error:
+	qat_cy_clean();
+	return (-1);
+}
+
+void
+qat_cy_fini(void)
+{
+	if (!qat_cy_init_done)
+		return;
+
+	qat_cy_clean();
+}
+
+static CpaStatus
+qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
+    CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
+    Cpa64U crypt, Cpa32U aad_len)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U ctx_size;
+	Cpa32U ciper_algorithm;
+	Cpa32U hash_algorithm;
+	CpaCySymSessionSetupData sd = { 0 };
+
+	if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) {
+		return (CPA_STATUS_FAIL);
+	} else {
+		ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM;
+		hash_algorithm = CPA_CY_SYM_HASH_AES_GCM;
+	}
+
+	sd.cipherSetupData.cipherAlgorithm = ciper_algorithm;
+	sd.cipherSetupData.pCipherKey = key->ck_data;
+	sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8;
+	sd.hashSetupData.hashAlgorithm = hash_algorithm;
+	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH;
+	sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN;
+	sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len;
+	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
+	sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
+	sd.digestIsAppended = CPA_FALSE;
+	sd.verifyDigest = CPA_FALSE;
+
+	if (dir == QAT_ENCRYPT) {
+		sd.cipherSetupData.cipherDirection =
+		    CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT;
+		sd.algChainOrder =
+		    CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
+	} else {
+		ASSERT3U(dir, ==, QAT_DECRYPT);
+		sd.cipherSetupData.cipherDirection =
+		    CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT;
+		sd.algChainOrder =
+		    CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
+	}
+
+	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
+	    *cy_session_ctx);
+	if (status != CPA_STATUS_SUCCESS) {
+		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
+		return (status);
+	}
+
+	return (CPA_STATUS_SUCCESS);
+}
+
+static CpaStatus
+qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
+    CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U ctx_size;
+	Cpa32U hash_algorithm;
+	CpaCySymSessionSetupData sd = { 0 };
+
+	/*
+	 * ZFS's SHA512 checksum is actually SHA512/256, which uses
+	 * a different IV from standard SHA512. QAT does not support
+	 * SHA512/256, so we can only support SHA256.
+	 */
+	if (cksum == ZIO_CHECKSUM_SHA256)
+		hash_algorithm = CPA_CY_SYM_HASH_SHA256;
+	else
+		return (CPA_STATUS_FAIL);
+
+	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
+	sd.symOperation = CPA_CY_SYM_OP_HASH;
+	sd.hashSetupData.hashAlgorithm = hash_algorithm;
+	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
+	sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
+	sd.digestIsAppended = CPA_FALSE;
+	sd.verifyDigest = CPA_FALSE;
+
+	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
+	    *cy_session_ctx);
+	if (status != CPA_STATUS_SUCCESS) {
+		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
+		return (status);
+	}
+
+	return (CPA_STATUS_SUCCESS);
+}
+
+static CpaStatus
+qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
+    CpaBufferList *src, CpaBufferList *dst)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U meta_size = 0;
+
+	status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto error;
+
+	if (src != dst) {
+		status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData,
+		    meta_size);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+	}
+
+	return (CPA_STATUS_SUCCESS);
+
+error:
+	QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData);
+	if (src != dst)
+		QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData);
+
+	return (status);
+}
+
+int
+qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
+    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
+    crypto_key_t *key, uint64_t crypt, uint32_t enc_len)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa16U i;
+	CpaInstanceHandle cy_inst_handle;
+	Cpa16U nr_bufs = (enc_len >> PAGE_SHIFT) + 2;
+	Cpa32U bytes_left = 0;
+	Cpa8S *data = NULL;
+	CpaCySymSessionCtx *cy_session_ctx = NULL;
+	cy_callback_t cb;
+	CpaCySymOpData op_data = { 0 };
+	CpaBufferList src_buffer_list = { 0 };
+	CpaBufferList dst_buffer_list = { 0 };
+	CpaFlatBuffer *flat_src_buf_array = NULL;
+	CpaFlatBuffer *flat_src_buf = NULL;
+	CpaFlatBuffer *flat_dst_buf_array = NULL;
+	CpaFlatBuffer *flat_dst_buf = NULL;
+	struct page *in_pages[MAX_PAGE_NUM];
+	struct page *out_pages[MAX_PAGE_NUM];
+	Cpa32U in_page_num = 0;
+	Cpa32U out_page_num = 0;
+	Cpa32U in_page_off = 0;
+	Cpa32U out_page_off = 0;
+
+	if (dir == QAT_ENCRYPT) {
+		QAT_STAT_BUMP(encrypt_requests);
+		QAT_STAT_INCR(encrypt_total_in_bytes, enc_len);
+	} else {
+		QAT_STAT_BUMP(decrypt_requests);
+		QAT_STAT_INCR(decrypt_total_in_bytes, enc_len);
+	}
+
+	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
+	cy_inst_handle = cy_inst_handles[i];
+
+	status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
+	    &cy_session_ctx, key, crypt, aad_len);
+	if (status != CPA_STATUS_SUCCESS) {
+		/* don't count CCM as a failure since it's not supported */
+		if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
+			QAT_STAT_BUMP(crypt_fails);
+		return (status);
+	}
+
+	/*
+	 * We increment nr_bufs by 2 to allow us to handle non
+	 * page-aligned buffer addresses and buffers whose sizes
+	 * are not divisible by PAGE_SIZE.
+	 */
+	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
+	    &src_buffer_list, &dst_buffer_list);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pDigestResult,
+	    ZIO_DATA_MAC_LEN);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pIv,
+	    ZIO_DATA_IV_LEN);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	if (aad_len > 0) {
+		status = QAT_PHYS_CONTIG_ALLOC(&op_data.pAdditionalAuthData,
+		    aad_len);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+		bcopy(aad_buf, op_data.pAdditionalAuthData, aad_len);
+	}
+
+	bytes_left = enc_len;
+	data = src_buf;
+	flat_src_buf = flat_src_buf_array;
+	while (bytes_left > 0) {
+		in_page_off = ((long)data & ~PAGE_MASK);
+		in_pages[in_page_num] = qat_mem_to_page(data);
+		flat_src_buf->pData = kmap(in_pages[in_page_num]) + in_page_off;
+		flat_src_buf->dataLenInBytes =
+		    min((long)PAGE_SIZE - in_page_off, (long)bytes_left);
+		data += flat_src_buf->dataLenInBytes;
+		bytes_left -= flat_src_buf->dataLenInBytes;
+		flat_src_buf++;
+		in_page_num++;
+	}
+	src_buffer_list.pBuffers = flat_src_buf_array;
+	src_buffer_list.numBuffers = in_page_num;
+
+	bytes_left = enc_len;
+	data = dst_buf;
+	flat_dst_buf = flat_dst_buf_array;
+	while (bytes_left > 0) {
+		out_page_off = ((long)data & ~PAGE_MASK);
+		out_pages[out_page_num] = qat_mem_to_page(data);
+		flat_dst_buf->pData = kmap(out_pages[out_page_num]) +
+		    out_page_off;
+		flat_dst_buf->dataLenInBytes =
+		    min((long)PAGE_SIZE - out_page_off, (long)bytes_left);
+		data += flat_dst_buf->dataLenInBytes;
+		bytes_left -= flat_dst_buf->dataLenInBytes;
+		flat_dst_buf++;
+		out_page_num++;
+	}
+	dst_buffer_list.pBuffers = flat_dst_buf_array;
+	dst_buffer_list.numBuffers = out_page_num;
+
+	op_data.sessionCtx = cy_session_ctx;
+	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
+	op_data.cryptoStartSrcOffsetInBytes = 0;
+	op_data.messageLenToCipherInBytes = 0;
+	op_data.hashStartSrcOffsetInBytes = 0;
+	op_data.messageLenToHashInBytes = 0;
+	op_data.messageLenToCipherInBytes = enc_len;
+	op_data.ivLenInBytes = ZIO_DATA_IV_LEN;
+	bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN);
+	/* if dir is QAT_DECRYPT, copy digest_buf to pDigestResult */
+	if (dir == QAT_DECRYPT)
+		bcopy(digest_buf, op_data.pDigestResult, ZIO_DATA_MAC_LEN);
+
+	cb.verify_result = CPA_FALSE;
+	init_completion(&cb.complete);
+	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
+	    &src_buffer_list, &dst_buffer_list, NULL);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	/* we now wait until the completion of the operation. */
+	wait_for_completion(&cb.complete);
+
+	if (cb.verify_result == CPA_FALSE) {
+		status = CPA_STATUS_FAIL;
+		goto fail;
+	}
+
+	if (dir == QAT_ENCRYPT) {
+		/* if dir is QAT_ENCRYPT, save pDigestResult to digest_buf */
+		bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN);
+		QAT_STAT_INCR(encrypt_total_out_bytes, enc_len);
+	} else {
+		QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
+	}
+
+fail:
+	if (status != CPA_STATUS_SUCCESS)
+		QAT_STAT_BUMP(crypt_fails);
+
+	for (i = 0; i < in_page_num; i++)
+		kunmap(in_pages[i]);
+	for (i = 0; i < out_page_num; i++)
+		kunmap(out_pages[i]);
+
+	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
+	if (aad_len > 0)
+		QAT_PHYS_CONTIG_FREE(op_data.pAdditionalAuthData);
+	QAT_PHYS_CONTIG_FREE(op_data.pIv);
+	QAT_PHYS_CONTIG_FREE(op_data.pDigestResult);
+	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
+	QAT_PHYS_CONTIG_FREE(flat_dst_buf_array);
+
+	return (status);
+}
+
+int
+qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	CpaStatus status;
+	Cpa16U i;
+	CpaInstanceHandle cy_inst_handle;
+	Cpa16U nr_bufs = (size >> PAGE_SHIFT) + 2;
+	Cpa32U bytes_left = 0;
+	Cpa8S *data = NULL;
+	CpaCySymSessionCtx *cy_session_ctx = NULL;
+	cy_callback_t cb;
+	Cpa8U *digest_buffer = NULL;
+	CpaCySymOpData op_data = { 0 };
+	CpaBufferList src_buffer_list = { 0 };
+	CpaFlatBuffer *flat_src_buf_array = NULL;
+	CpaFlatBuffer *flat_src_buf = NULL;
+	struct page *in_pages[MAX_PAGE_NUM];
+	Cpa32U page_num = 0;
+	Cpa32U page_off = 0;
+
+	QAT_STAT_BUMP(cksum_requests);
+	QAT_STAT_INCR(cksum_total_in_bytes, size);
+
+	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
+	cy_inst_handle = cy_inst_handles[i];
+
+	status = qat_init_checksum_session_ctx(cy_inst_handle,
+	    &cy_session_ctx, cksum);
+	if (status != CPA_STATUS_SUCCESS) {
+		/* don't count unsupported checksums as a failure */
+		if (cksum == ZIO_CHECKSUM_SHA256 ||
+		    cksum == ZIO_CHECKSUM_SHA512)
+			QAT_STAT_BUMP(cksum_fails);
+		return (status);
+	}
+
+	/*
+	 * We increment nr_bufs by 2 to allow us to handle non
+	 * page-aligned buffer addresses and buffers whose sizes
+	 * are not divisible by PAGE_SIZE.
+	 */
+	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
+	    &src_buffer_list, &src_buffer_list);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&digest_buffer,
+	    sizeof (zio_cksum_t));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	bytes_left = size;
+	data = buf;
+	flat_src_buf = flat_src_buf_array;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		in_pages[page_num] = qat_mem_to_page(data);
+		flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
+		flat_src_buf->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+		data += flat_src_buf->dataLenInBytes;
+		bytes_left -= flat_src_buf->dataLenInBytes;
+		flat_src_buf++;
+		page_num++;
+	}
+	src_buffer_list.pBuffers = flat_src_buf_array;
+	src_buffer_list.numBuffers = page_num;
+
+	op_data.sessionCtx = cy_session_ctx;
+	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
+	op_data.hashStartSrcOffsetInBytes = 0;
+	op_data.messageLenToHashInBytes = size;
+	op_data.pDigestResult = digest_buffer;
+
+	cb.verify_result = CPA_FALSE;
+	init_completion(&cb.complete);
+	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
+	    &src_buffer_list, &src_buffer_list, NULL);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	/* we now wait until the completion of the operation. */
+	wait_for_completion(&cb.complete);
+
+	if (cb.verify_result == CPA_FALSE) {
+		status = CPA_STATUS_FAIL;
+		goto fail;
+	}
+
+	bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
+
+fail:
+	if (status != CPA_STATUS_SUCCESS)
+		QAT_STAT_BUMP(cksum_fails);
+
+	for (i = 0; i < page_num; i++)
+		kunmap(in_pages[i]);
+
+	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(digest_buffer);
+	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
+
+	return (status);
+}
+
+static int
+param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+	int *pvalue = kp->arg;
+	ret = param_set_int(val, kp);
+	if (ret)
+		return (ret);
+	/*
+	 * zfs_qat_encrypt_disable = 0: enable qat encrypt
+	 * try to initialize qat instance if it has not been done
+	 */
+	if (*pvalue == 0 && !qat_cy_init_done) {
+		ret = qat_cy_init();
+		if (ret != 0) {
+			zfs_qat_encrypt_disable = 1;
+			return (ret);
+		}
+	}
+	return (ret);
+}
+
+static int
+param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+	int *pvalue = kp->arg;
+	ret = param_set_int(val, kp);
+	if (ret)
+		return (ret);
+	/*
+	 * set_checksum_param_ops = 0: enable qat checksum
+	 * try to initialize qat instance if it has not been done
+	 */
+	if (*pvalue == 0 && !qat_cy_init_done) {
+		ret = qat_cy_init();
+		if (ret != 0) {
+			zfs_qat_checksum_disable = 1;
+			return (ret);
+		}
+	}
+	return (ret);
+}
+
+module_param_call(zfs_qat_encrypt_disable, param_set_qat_encrypt,
+    param_get_int, &zfs_qat_encrypt_disable, 0644);
+MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Enable/Disable QAT encryption");
+
+module_param_call(zfs_qat_checksum_disable, param_set_qat_checksum,
+    param_get_int, &zfs_qat_checksum_disable, 0644);
+MODULE_PARM_DESC(zfs_qat_checksum_disable, "Enable/Disable QAT checksumming");
+
+#endif

diff --git a/zfs/module/os/linux/zfs/spa_misc_os.c b/zfs/module/os/linux/zfs/spa_misc_os.c
new file mode 100644
index 0000000..5672cd6
--- /dev/null
+++ b/zfs/module/os/linux/zfs/spa_misc_os.c

@@ -0,0 +1,110 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, Intel Corporation.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa_impl.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/unique.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
+#include <sys/fm/util.h>
+#include <sys/dsl_scan.h>
+#include <sys/fs/zfs.h>
+#include <sys/kstat.h>
+#include "zfs_prop.h"
+
+
+int
+param_set_deadman_failmode(const char *val, zfs_kernel_param_t *kp)
+{
+	int error;
+
+	error = -param_set_deadman_failmode_common(val);
+	if (error == 0)
+		error = param_set_charp(val, kp);
+
+	return (error);
+}
+
+int
+param_set_deadman_ziotime(const char *val, zfs_kernel_param_t *kp)
+{
+	int error;
+
+	error = param_set_ulong(val, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	spa_set_deadman_ziotime(MSEC2NSEC(zfs_deadman_ziotime_ms));
+
+	return (0);
+}
+
+int
+param_set_deadman_synctime(const char *val, zfs_kernel_param_t *kp)
+{
+	int error;
+
+	error = param_set_ulong(val, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	spa_set_deadman_synctime(MSEC2NSEC(zfs_deadman_synctime_ms));
+
+	return (0);
+}
+
+int
+param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp)
+{
+	unsigned long val;
+	int error;
+
+	error = kstrtoul(buf, 0, &val);
+	if (error)
+		return (SET_ERROR(error));
+
+	if (val < 1 || val > 31)
+		return (SET_ERROR(-EINVAL));
+
+	error = param_set_int(buf, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	return (0);
+}
+
+const char *
+spa_history_zone(void)
+{
+	return ("linux");
+}

diff --git a/zfs/module/os/linux/zfs/trace.c b/zfs/module/os/linux/zfs/trace.c
new file mode 100644
index 0000000..a690822
--- /dev/null
+++ b/zfs/module/os/linux/zfs/trace.c

@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Each DTRACE_PROBE must define its trace point in one (and only one)
+ * source file, so this dummy file exists for that purpose.
+ */
+
+#include <sys/multilist.h>
+#include <sys/arc_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dmu_tx.h>
+#include <sys/dnode.h>
+#include <sys/zfs_znode.h>
+#include <sys/zil_impl.h>
+
+#ifdef _KERNEL
+#define	CREATE_TRACE_POINTS
+#include <sys/trace.h>
+#include <sys/trace_acl.h>
+#include <sys/trace_arc.h>
+#include <sys/trace_dbgmsg.h>
+#include <sys/trace_dbuf.h>
+#include <sys/trace_dmu.h>
+#include <sys/trace_dnode.h>
+#include <sys/trace_multilist.h>
+#include <sys/trace_rrwlock.h>
+#include <sys/trace_txg.h>
+#include <sys/trace_vdev.h>
+#include <sys/trace_zil.h>
+#include <sys/trace_zio.h>
+#include <sys/trace_zrlock.h>
+#endif

diff --git a/zfs/module/os/linux/zfs/vdev_disk.c b/zfs/module/os/linux/zfs/vdev_disk.c
new file mode 100644
index 0000000..daf47a1
--- /dev/null
+++ b/zfs/module/os/linux/zfs/vdev_disk.c

@@ -0,0 +1,1094 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
+ * LLNL-CODE-403049.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_disk.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_trim.h>
+#include <sys/abd.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <linux/blkpg.h>
+#include <linux/msdos_fs.h>
+#include <linux/vfs_compat.h>
+#ifdef HAVE_LINUX_BLK_CGROUP_HEADER
+#include <linux/blk-cgroup.h>
+#endif
+
+typedef struct vdev_disk {
+	struct block_device		*vd_bdev;
+	krwlock_t			vd_lock;
+} vdev_disk_t;
+
+/*
+ * Unique identifier for the exclusive vdev holder.
+ */
+static void *zfs_vdev_holder = VDEV_HOLDER;
+
+/*
+ * Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the
+ * device is missing. The missing path may be transient since the links
+ * can be briefly removed and recreated in response to udev events.
+ */
+static uint_t zfs_vdev_open_timeout_ms = 1000;
+
+/*
+ * Size of the "reserved" partition, in blocks.
+ */
+#define	EFI_MIN_RESV_SIZE	(16 * 1024)
+
+/*
+ * Virtual device vector for disks.
+ */
+typedef struct dio_request {
+	zio_t			*dr_zio;	/* Parent ZIO */
+	atomic_t		dr_ref;		/* References */
+	int			dr_error;	/* Bio error */
+	int			dr_bio_count;	/* Count of bio's */
+	struct bio		*dr_bio[0];	/* Attached bio's */
+} dio_request_t;
+
+#ifdef HAVE_BLK_MODE_T
+static blk_mode_t
+#else
+static fmode_t
+#endif
+vdev_bdev_mode(spa_mode_t spa_mode)
+{
+#ifdef HAVE_BLK_MODE_T
+	blk_mode_t mode = 0;
+
+	if (spa_mode & SPA_MODE_READ)
+		mode |= BLK_OPEN_READ;
+
+	if (spa_mode & SPA_MODE_WRITE)
+		mode |= BLK_OPEN_WRITE;
+#else
+	fmode_t mode = 0;
+
+	if (spa_mode & SPA_MODE_READ)
+		mode |= FMODE_READ;
+
+	if (spa_mode & SPA_MODE_WRITE)
+		mode |= FMODE_WRITE;
+#endif
+
+	return (mode);
+}
+
+/*
+ * Returns the usable capacity (in bytes) for the partition or disk.
+ */
+static uint64_t
+bdev_capacity(struct block_device *bdev)
+{
+	return (i_size_read(bdev->bd_inode));
+}
+
+#if !defined(HAVE_BDEV_WHOLE)
+static inline struct block_device *
+bdev_whole(struct block_device *bdev)
+{
+	return (bdev->bd_contains);
+}
+#endif
+
+#if defined(HAVE_BDEVNAME)
+#define	vdev_bdevname(bdev, name)	bdevname(bdev, name)
+#else
+static inline void
+vdev_bdevname(struct block_device *bdev, char *name)
+{
+	snprintf(name, BDEVNAME_SIZE, "%pg", bdev);
+}
+#endif
+
+/*
+ * Returns the maximum expansion capacity of the block device (in bytes).
+ *
+ * It is possible to expand a vdev when it has been created as a wholedisk
+ * and the containing block device has increased in capacity.  Or when the
+ * partition containing the pool has been manually increased in size.
+ *
+ * This function is only responsible for calculating the potential expansion
+ * size so it can be reported by 'zpool list'.  The efi_use_whole_disk() is
+ * responsible for verifying the expected partition layout in the wholedisk
+ * case, and updating the partition table if appropriate.  Once the partition
+ * size has been increased the additional capacity will be visible using
+ * bdev_capacity().
+ *
+ * The returned maximum expansion capacity is always expected to be larger, or
+ * at the very least equal, to its usable capacity to prevent overestimating
+ * the pool expandsize.
+ */
+static uint64_t
+bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
+{
+	uint64_t psize;
+	int64_t available;
+
+	if (wholedisk && bdev != bdev_whole(bdev)) {
+		/*
+		 * When reporting maximum expansion capacity for a wholedisk
+		 * deduct any capacity which is expected to be lost due to
+		 * alignment restrictions.  Over reporting this value isn't
+		 * harmful and would only result in slightly less capacity
+		 * than expected post expansion.
+		 * The estimated available space may be slightly smaller than
+		 * bdev_capacity() for devices where the number of sectors is
+		 * not a multiple of the alignment size and the partition layout
+		 * is keeping less than PARTITION_END_ALIGNMENT bytes after the
+		 * "reserved" EFI partition: in such cases return the device
+		 * usable capacity.
+		 */
+		available = i_size_read(bdev_whole(bdev)->bd_inode) -
+		    ((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
+		    PARTITION_END_ALIGNMENT) << SECTOR_BITS);
+		psize = MAX(available, bdev_capacity(bdev));
+	} else {
+		psize = bdev_capacity(bdev);
+	}
+
+	return (psize);
+}
+
+static void
+vdev_disk_error(zio_t *zio)
+{
+	/*
+	 * This function can be called in interrupt context, for instance while
+	 * handling IRQs coming from a misbehaving disk device; use printk()
+	 * which is safe from any context.
+	 */
+	printk(KERN_WARNING "zio pool=%s vdev=%s error=%d type=%d "
+	    "offset=%llu size=%llu flags=%x\n", spa_name(zio->io_spa),
+	    zio->io_vd->vdev_path, zio->io_error, zio->io_type,
+	    (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
+	    zio->io_flags);
+}
+
+static void
+vdev_disk_kobj_evt_post(vdev_t *v)
+{
+	vdev_disk_t *vd = v->vdev_tsd;
+	if (vd && vd->vd_bdev) {
+		spl_signal_kobj_evt(vd->vd_bdev);
+	} else {
+		vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n",
+		    v->vdev_path);
+	}
+}
+
+#if !defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
+/*
+ * Define a dummy struct blk_holder_ops for kernel versions
+ * prior to 6.5.
+ */
+struct blk_holder_ops {};
+#endif
+
+static struct block_device *
+vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder,
+    const struct blk_holder_ops *hops)
+{
+#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG
+	return (blkdev_get_by_path(path,
+	    vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops));
+#else
+	return (blkdev_get_by_path(path,
+	    vdev_bdev_mode(mode) | FMODE_EXCL, holder));
+#endif
+}
+
+static void
+vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder)
+{
+#ifdef HAVE_BLKDEV_PUT_HOLDER
+	return (blkdev_put(bdev, holder));
+#else
+	return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL));
+#endif
+}
+
+static int
+vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
+{
+	struct block_device *bdev;
+#ifdef HAVE_BLK_MODE_T
+	blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+#else
+	fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+#endif
+	hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
+	vdev_disk_t *vd;
+
+	/* Must have a pathname and it must be absolute. */
+	if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
+		v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		vdev_dbgmsg(v, "invalid vdev_path");
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Reopen the device if it is currently open.  When expanding a
+	 * partition force re-scanning the partition table if userland
+	 * did not take care of this already. We need to do this while closed
+	 * in order to get an accurate updated block device size.  Then
+	 * since udev may need to recreate the device links increase the
+	 * open retry timeout before reporting the device as unavailable.
+	 */
+	vd = v->vdev_tsd;
+	if (vd) {
+		char disk_name[BDEVNAME_SIZE + 6] = "/dev/";
+		boolean_t reread_part = B_FALSE;
+
+		rw_enter(&vd->vd_lock, RW_WRITER);
+		bdev = vd->vd_bdev;
+		vd->vd_bdev = NULL;
+
+		if (bdev) {
+			if (v->vdev_expanding && bdev != bdev_whole(bdev)) {
+				vdev_bdevname(bdev_whole(bdev), disk_name + 5);
+				/*
+				 * If userland has BLKPG_RESIZE_PARTITION,
+				 * then it should have updated the partition
+				 * table already. We can detect this by
+				 * comparing our current physical size
+				 * with that of the device. If they are
+				 * the same, then we must not have
+				 * BLKPG_RESIZE_PARTITION or it failed to
+				 * update the partition table online. We
+				 * fallback to rescanning the partition
+				 * table from the kernel below. However,
+				 * if the capacity already reflects the
+				 * updated partition, then we skip
+				 * rescanning the partition table here.
+				 */
+				if (v->vdev_psize == bdev_capacity(bdev))
+					reread_part = B_TRUE;
+			}
+
+			vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
+		}
+
+		if (reread_part) {
+			bdev = vdev_blkdev_get_by_path(disk_name, mode,
+			    zfs_vdev_holder, NULL);
+			if (!IS_ERR(bdev)) {
+				int error = vdev_bdev_reread_part(bdev);
+				vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
+				if (error == 0) {
+					timeout = MSEC2NSEC(
+					    zfs_vdev_open_timeout_ms * 2);
+				}
+			}
+		}
+	} else {
+		vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
+
+		rw_init(&vd->vd_lock, NULL, RW_DEFAULT, NULL);
+		rw_enter(&vd->vd_lock, RW_WRITER);
+	}
+
+	/*
+	 * Devices are always opened by the path provided at configuration
+	 * time.  This means that if the provided path is a udev by-id path
+	 * then drives may be re-cabled without an issue.  If the provided
+	 * path is a udev by-path path, then the physical location information
+	 * will be preserved.  This can be critical for more complicated
+	 * configurations where drives are located in specific physical
+	 * locations to maximize the systems tolerance to component failure.
+	 *
+	 * Alternatively, you can provide your own udev rule to flexibly map
+	 * the drives as you see fit.  It is not advised that you use the
+	 * /dev/[hd]d devices which may be reordered due to probing order.
+	 * Devices in the wrong locations will be detected by the higher
+	 * level vdev validation.
+	 *
+	 * The specified paths may be briefly removed and recreated in
+	 * response to udev events.  This should be exceptionally unlikely
+	 * because the zpool command makes every effort to verify these paths
+	 * have already settled prior to reaching this point.  Therefore,
+	 * a ENOENT failure at this point is highly likely to be transient
+	 * and it is reasonable to sleep and retry before giving up.  In
+	 * practice delays have been observed to be on the order of 100ms.
+	 *
+	 * When ERESTARTSYS is returned it indicates the block device is
+	 * a zvol which could not be opened due to the deadlock detection
+	 * logic in zvol_open().  Extend the timeout and retry the open
+	 * subsequent attempts are expected to eventually succeed.
+	 */
+	hrtime_t start = gethrtime();
+	bdev = ERR_PTR(-ENXIO);
+	while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
+		bdev = vdev_blkdev_get_by_path(v->vdev_path, mode,
+		    zfs_vdev_holder, NULL);
+		if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
+			/*
+			 * There is no point of waiting since device is removed
+			 * explicitly
+			 */
+			if (v->vdev_removed)
+				break;
+
+			schedule_timeout(MSEC_TO_TICK(10));
+		} else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) {
+			timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10);
+			continue;
+		} else if (IS_ERR(bdev)) {
+			break;
+		}
+	}
+
+	if (IS_ERR(bdev)) {
+		int error = -PTR_ERR(bdev);
+		vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error,
+		    (u_longlong_t)(gethrtime() - start),
+		    (u_longlong_t)timeout);
+		vd->vd_bdev = NULL;
+		v->vdev_tsd = vd;
+		rw_exit(&vd->vd_lock);
+		return (SET_ERROR(error));
+	} else {
+		vd->vd_bdev = bdev;
+		v->vdev_tsd = vd;
+		rw_exit(&vd->vd_lock);
+	}
+
+	/*  Determine the physical block size */
+	int physical_block_size = bdev_physical_block_size(vd->vd_bdev);
+
+	/*  Determine the logical block size */
+	int logical_block_size = bdev_logical_block_size(vd->vd_bdev);
+
+	/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
+	v->vdev_nowritecache = B_FALSE;
+
+	/* Set when device reports it supports TRIM. */
+	v->vdev_has_trim = bdev_discard_supported(vd->vd_bdev);
+
+	/* Set when device reports it supports secure TRIM. */
+	v->vdev_has_securetrim = bdev_secure_discard_supported(vd->vd_bdev);
+
+	/* Inform the ZIO pipeline that we are non-rotational */
+	v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
+
+	/* Physical volume size in bytes for the partition */
+	*psize = bdev_capacity(vd->vd_bdev);
+
+	/* Physical volume size in bytes including possible expansion space */
+	*max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk);
+
+	/* Based on the minimum sector size set the block size */
+	*physical_ashift = highbit64(MAX(physical_block_size,
+	    SPA_MINBLOCKSIZE)) - 1;
+
+	*logical_ashift = highbit64(MAX(logical_block_size,
+	    SPA_MINBLOCKSIZE)) - 1;
+
+	return (0);
+}
+
+static void
+vdev_disk_close(vdev_t *v)
+{
+	vdev_disk_t *vd = v->vdev_tsd;
+
+	if (v->vdev_reopening || vd == NULL)
+		return;
+
+	if (vd->vd_bdev != NULL) {
+		vdev_blkdev_put(vd->vd_bdev, spa_mode(v->vdev_spa),
+		    zfs_vdev_holder);
+	}
+
+	rw_destroy(&vd->vd_lock);
+	kmem_free(vd, sizeof (vdev_disk_t));
+	v->vdev_tsd = NULL;
+}
+
+static dio_request_t *
+vdev_disk_dio_alloc(int bio_count)
+{
+	dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
+	    sizeof (struct bio *) * bio_count, KM_SLEEP);
+	atomic_set(&dr->dr_ref, 0);
+	dr->dr_bio_count = bio_count;
+	dr->dr_error = 0;
+
+	for (int i = 0; i < dr->dr_bio_count; i++)
+		dr->dr_bio[i] = NULL;
+
+	return (dr);
+}
+
+static void
+vdev_disk_dio_free(dio_request_t *dr)
+{
+	int i;
+
+	for (i = 0; i < dr->dr_bio_count; i++)
+		if (dr->dr_bio[i])
+			bio_put(dr->dr_bio[i]);
+
+	kmem_free(dr, sizeof (dio_request_t) +
+	    sizeof (struct bio *) * dr->dr_bio_count);
+}
+
+static void
+vdev_disk_dio_get(dio_request_t *dr)
+{
+	atomic_inc(&dr->dr_ref);
+}
+
+static int
+vdev_disk_dio_put(dio_request_t *dr)
+{
+	int rc = atomic_dec_return(&dr->dr_ref);
+
+	/*
+	 * Free the dio_request when the last reference is dropped and
+	 * ensure zio_interpret is called only once with the correct zio
+	 */
+	if (rc == 0) {
+		zio_t *zio = dr->dr_zio;
+		int error = dr->dr_error;
+
+		vdev_disk_dio_free(dr);
+
+		if (zio) {
+			zio->io_error = error;
+			ASSERT3S(zio->io_error, >=, 0);
+			if (zio->io_error)
+				vdev_disk_error(zio);
+
+			zio_delay_interrupt(zio);
+		}
+	}
+
+	return (rc);
+}
+
+BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
+{
+	dio_request_t *dr = bio->bi_private;
+	int rc;
+
+	if (dr->dr_error == 0) {
+#ifdef HAVE_1ARG_BIO_END_IO_T
+		dr->dr_error = BIO_END_IO_ERROR(bio);
+#else
+		if (error)
+			dr->dr_error = -(error);
+		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+			dr->dr_error = EIO;
+#endif
+	}
+
+	/* Drop reference acquired by __vdev_disk_physio */
+	rc = vdev_disk_dio_put(dr);
+}
+
+static inline void
+vdev_submit_bio_impl(struct bio *bio)
+{
+#ifdef HAVE_1ARG_SUBMIT_BIO
+	(void) submit_bio(bio);
+#else
+	(void) submit_bio(bio_data_dir(bio), bio);
+#endif
+}
+
+/*
+ * preempt_schedule_notrace is GPL-only which breaks the ZFS build, so
+ * replace it with preempt_schedule under the following condition:
+ */
+#if defined(CONFIG_ARM64) && \
+    defined(CONFIG_PREEMPTION) && \
+    defined(CONFIG_BLK_CGROUP)
+#define	preempt_schedule_notrace(x) preempt_schedule(x)
+#endif
+
+/*
+ * As for the Linux 5.18 kernel bio_alloc() expects a block_device struct
+ * as an argument removing the need to set it with bio_set_dev().  This
+ * removes the need for all of the following compatibility code.
+ */
+#if !defined(HAVE_BIO_ALLOC_4ARG)
+
+#ifdef HAVE_BIO_SET_DEV
+#if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY)
+/*
+ * The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by
+ * blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched().
+ * As a side effect the function was converted to GPL-only.  Define our
+ * own version when needed which uses rcu_read_lock_sched().
+ *
+ * The Linux 5.17 kernel split linux/blk-cgroup.h into a private and a public
+ * part, moving blkg_tryget into the private one. Define our own version.
+ */
+#if defined(HAVE_BLKG_TRYGET_GPL_ONLY) || !defined(HAVE_BLKG_TRYGET)
+static inline bool
+vdev_blkg_tryget(struct blkcg_gq *blkg)
+{
+	struct percpu_ref *ref = &blkg->refcnt;
+	unsigned long __percpu *count;
+	bool rc;
+
+	rcu_read_lock_sched();
+
+	if (__ref_is_percpu(ref, &count)) {
+		this_cpu_inc(*count);
+		rc = true;
+	} else {
+#ifdef ZFS_PERCPU_REF_COUNT_IN_DATA
+		rc = atomic_long_inc_not_zero(&ref->data->count);
+#else
+		rc = atomic_long_inc_not_zero(&ref->count);
+#endif
+	}
+
+	rcu_read_unlock_sched();
+
+	return (rc);
+}
+#else
+#define	vdev_blkg_tryget(bg)	blkg_tryget(bg)
+#endif
+#ifdef HAVE_BIO_SET_DEV_MACRO
+/*
+ * The Linux 5.0 kernel updated the bio_set_dev() macro so it calls the
+ * GPL-only bio_associate_blkg() symbol thus inadvertently converting
+ * the entire macro.  Provide a minimal version which always assigns the
+ * request queue's root_blkg to the bio.
+ */
+static inline void
+vdev_bio_associate_blkg(struct bio *bio)
+{
+#if defined(HAVE_BIO_BDEV_DISK)
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+#else
+	struct request_queue *q = bio->bi_disk->queue;
+#endif
+
+	ASSERT3P(q, !=, NULL);
+	ASSERT3P(bio->bi_blkg, ==, NULL);
+
+	if (q->root_blkg && vdev_blkg_tryget(q->root_blkg))
+		bio->bi_blkg = q->root_blkg;
+}
+
+#define	bio_associate_blkg vdev_bio_associate_blkg
+#else
+static inline void
+vdev_bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+#if defined(HAVE_BIO_BDEV_DISK)
+	struct request_queue *q = bdev->bd_disk->queue;
+#else
+	struct request_queue *q = bio->bi_disk->queue;
+#endif
+	bio_clear_flag(bio, BIO_REMAPPED);
+	if (bio->bi_bdev != bdev)
+		bio_clear_flag(bio, BIO_THROTTLED);
+	bio->bi_bdev = bdev;
+
+	ASSERT3P(q, !=, NULL);
+	ASSERT3P(bio->bi_blkg, ==, NULL);
+
+	if (q->root_blkg && vdev_blkg_tryget(q->root_blkg))
+		bio->bi_blkg = q->root_blkg;
+}
+#define	bio_set_dev		vdev_bio_set_dev
+#endif
+#endif
+#else
+/*
+ * Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels.
+ */
+static inline void
+bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+	bio->bi_bdev = bdev;
+}
+#endif /* HAVE_BIO_SET_DEV */
+#endif /* !HAVE_BIO_ALLOC_4ARG */
+
+static inline void
+vdev_submit_bio(struct bio *bio)
+{
+	struct bio_list *bio_list = current->bio_list;
+	current->bio_list = NULL;
+	vdev_submit_bio_impl(bio);
+	current->bio_list = bio_list;
+}
+
+static inline struct bio *
+vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
+    unsigned short nr_vecs)
+{
+	struct bio *bio;
+
+#ifdef HAVE_BIO_ALLOC_4ARG
+	bio = bio_alloc(bdev, nr_vecs, 0, gfp_mask);
+#else
+	bio = bio_alloc(gfp_mask, nr_vecs);
+	if (likely(bio != NULL))
+		bio_set_dev(bio, bdev);
+#endif
+
+	return (bio);
+}
+
+static inline unsigned int
+vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
+{
+	unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
+	    bio_size, abd_offset);
+
+#ifdef HAVE_BIO_MAX_SEGS
+	return (bio_max_segs(nr_segs));
+#else
+	return (MIN(nr_segs, BIO_MAX_PAGES));
+#endif
+}
+
+static int
+__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
+    size_t io_size, uint64_t io_offset, int rw, int flags)
+{
+	dio_request_t *dr;
+	uint64_t abd_offset;
+	uint64_t bio_offset;
+	int bio_size;
+	int bio_count = 16;
+	int error = 0;
+	struct blk_plug plug;
+	unsigned short nr_vecs;
+
+	/*
+	 * Accessing outside the block device is never allowed.
+	 */
+	if (io_offset + io_size > bdev->bd_inode->i_size) {
+		vdev_dbgmsg(zio->io_vd,
+		    "Illegal access %llu size %llu, device size %llu",
+		    io_offset, io_size, i_size_read(bdev->bd_inode));
+		return (SET_ERROR(EIO));
+	}
+
+retry:
+	dr = vdev_disk_dio_alloc(bio_count);
+
+	if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
+		bio_set_flags_failfast(bdev, &flags);
+
+	dr->dr_zio = zio;
+
+	/*
+	 * Since bio's can have up to BIO_MAX_PAGES=256 iovec's, each of which
+	 * is at least 512 bytes and at most PAGESIZE (typically 4K), one bio
+	 * can cover at least 128KB and at most 1MB.  When the required number
+	 * of iovec's exceeds this, we are forced to break the IO in multiple
+	 * bio's and wait for them all to complete.  This is likely if the
+	 * recordsize property is increased beyond 1MB.  The default
+	 * bio_count=16 should typically accommodate the maximum-size zio of
+	 * 16MB.
+	 */
+
+	abd_offset = 0;
+	bio_offset = io_offset;
+	bio_size = io_size;
+	for (int i = 0; i <= dr->dr_bio_count; i++) {
+
+		/* Finished constructing bio's for given buffer */
+		if (bio_size <= 0)
+			break;
+
+		/*
+		 * If additional bio's are required, we have to retry, but
+		 * this should be rare - see the comment above.
+		 */
+		if (dr->dr_bio_count == i) {
+			vdev_disk_dio_free(dr);
+			bio_count *= 2;
+			goto retry;
+		}
+
+		nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset);
+		dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
+		if (unlikely(dr->dr_bio[i] == NULL)) {
+			vdev_disk_dio_free(dr);
+			return (SET_ERROR(ENOMEM));
+		}
+
+		/* Matching put called by vdev_disk_physio_completion */
+		vdev_disk_dio_get(dr);
+
+		BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
+		dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
+		dr->dr_bio[i]->bi_private = dr;
+		bio_set_op_attrs(dr->dr_bio[i], rw, flags);
+
+		/* Remaining size is returned to become the new size */
+		bio_size = abd_bio_map_off(dr->dr_bio[i], zio->io_abd,
+		    bio_size, abd_offset);
+
+		/* Advance in buffer and construct another bio if needed */
+		abd_offset += BIO_BI_SIZE(dr->dr_bio[i]);
+		bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
+	}
+
+	/* Extra reference to protect dio_request during vdev_submit_bio */
+	vdev_disk_dio_get(dr);
+
+	if (dr->dr_bio_count > 1)
+		blk_start_plug(&plug);
+
+	/* Submit all bio's associated with this dio */
+	for (int i = 0; i < dr->dr_bio_count; i++) {
+		if (dr->dr_bio[i])
+			vdev_submit_bio(dr->dr_bio[i]);
+	}
+
+	if (dr->dr_bio_count > 1)
+		blk_finish_plug(&plug);
+
+	(void) vdev_disk_dio_put(dr);
+
+	return (error);
+}
+
+BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
+{
+	zio_t *zio = bio->bi_private;
+#ifdef HAVE_1ARG_BIO_END_IO_T
+	zio->io_error = BIO_END_IO_ERROR(bio);
+#else
+	zio->io_error = -error;
+#endif
+
+	if (zio->io_error && (zio->io_error == EOPNOTSUPP))
+		zio->io_vd->vdev_nowritecache = B_TRUE;
+
+	bio_put(bio);
+	ASSERT3S(zio->io_error, >=, 0);
+	if (zio->io_error)
+		vdev_disk_error(zio);
+	zio_interrupt(zio);
+}
+
+static int
+vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
+{
+	struct request_queue *q;
+	struct bio *bio;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return (SET_ERROR(ENXIO));
+
+	bio = vdev_bio_alloc(bdev, GFP_NOIO, 0);
+	if (unlikely(bio == NULL))
+		return (SET_ERROR(ENOMEM));
+
+	bio->bi_end_io = vdev_disk_io_flush_completion;
+	bio->bi_private = zio;
+	bio_set_flush(bio);
+	vdev_submit_bio(bio);
+	invalidate_bdev(bdev);
+
+	return (0);
+}
+
+static int
+vdev_disk_io_trim(zio_t *zio)
+{
+	vdev_t *v = zio->io_vd;
+	vdev_disk_t *vd = v->vdev_tsd;
+
+#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
+	if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
+		return (-blkdev_issue_secure_erase(vd->vd_bdev,
+		    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
+	} else {
+		return (-blkdev_issue_discard(vd->vd_bdev,
+		    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
+	}
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
+	unsigned long trim_flags = 0;
+#if defined(BLKDEV_DISCARD_SECURE)
+	if (zio->io_trim_flags & ZIO_TRIM_SECURE)
+		trim_flags |= BLKDEV_DISCARD_SECURE;
+#endif
+	return (-blkdev_issue_discard(vd->vd_bdev,
+	    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
+#else
+#error "Unsupported kernel"
+#endif
+}
+
+static void
+vdev_disk_io_start(zio_t *zio)
+{
+	vdev_t *v = zio->io_vd;
+	vdev_disk_t *vd = v->vdev_tsd;
+	int rw, error;
+
+	/*
+	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
+	 * Nothing to be done here but return failure.
+	 */
+	if (vd == NULL) {
+		zio->io_error = ENXIO;
+		zio_interrupt(zio);
+		return;
+	}
+
+	rw_enter(&vd->vd_lock, RW_READER);
+
+	/*
+	 * If the vdev is closed, it's likely due to a failed reopen and is
+	 * in the UNAVAIL state.  Nothing to be done here but return failure.
+	 */
+	if (vd->vd_bdev == NULL) {
+		rw_exit(&vd->vd_lock);
+		zio->io_error = ENXIO;
+		zio_interrupt(zio);
+		return;
+	}
+
+	switch (zio->io_type) {
+	case ZIO_TYPE_IOCTL:
+
+		if (!vdev_readable(v)) {
+			rw_exit(&vd->vd_lock);
+			zio->io_error = SET_ERROR(ENXIO);
+			zio_interrupt(zio);
+			return;
+		}
+
+		switch (zio->io_cmd) {
+		case DKIOCFLUSHWRITECACHE:
+
+			if (zfs_nocacheflush)
+				break;
+
+			if (v->vdev_nowritecache) {
+				zio->io_error = SET_ERROR(ENOTSUP);
+				break;
+			}
+
+			error = vdev_disk_io_flush(vd->vd_bdev, zio);
+			if (error == 0) {
+				rw_exit(&vd->vd_lock);
+				return;
+			}
+
+			zio->io_error = error;
+
+			break;
+
+		default:
+			zio->io_error = SET_ERROR(ENOTSUP);
+		}
+
+		rw_exit(&vd->vd_lock);
+		zio_execute(zio);
+		return;
+	case ZIO_TYPE_WRITE:
+		rw = WRITE;
+		break;
+
+	case ZIO_TYPE_READ:
+		rw = READ;
+		break;
+
+	case ZIO_TYPE_TRIM:
+		zio->io_error = vdev_disk_io_trim(zio);
+		rw_exit(&vd->vd_lock);
+		zio_interrupt(zio);
+		return;
+
+	default:
+		rw_exit(&vd->vd_lock);
+		zio->io_error = SET_ERROR(ENOTSUP);
+		zio_interrupt(zio);
+		return;
+	}
+
+	zio->io_target_timestamp = zio_handle_io_delay(zio);
+	error = __vdev_disk_physio(vd->vd_bdev, zio,
+	    zio->io_size, zio->io_offset, rw, 0);
+	rw_exit(&vd->vd_lock);
+
+	if (error) {
+		zio->io_error = error;
+		zio_interrupt(zio);
+		return;
+	}
+}
+
+static void
+vdev_disk_io_done(zio_t *zio)
+{
+	/*
+	 * If the device returned EIO, we revalidate the media.  If it is
+	 * determined the media has changed this triggers the asynchronous
+	 * removal of the device from the configuration.
+	 */
+	if (zio->io_error == EIO) {
+		vdev_t *v = zio->io_vd;
+		vdev_disk_t *vd = v->vdev_tsd;
+
+		if (!zfs_check_disk_status(vd->vd_bdev)) {
+			invalidate_bdev(vd->vd_bdev);
+			v->vdev_remove_wanted = B_TRUE;
+			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
+		}
+	}
+}
+
+static void
+vdev_disk_hold(vdev_t *vd)
+{
+	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
+
+	/* We must have a pathname, and it must be absolute. */
+	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
+		return;
+
+	/*
+	 * Only prefetch path and devid info if the device has
+	 * never been opened.
+	 */
+	if (vd->vdev_tsd != NULL)
+		return;
+
+}
+
+static void
+vdev_disk_rele(vdev_t *vd)
+{
+	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
+
+	/* XXX: Implement me as a vnode rele for the device */
+}
+
+vdev_ops_t vdev_disk_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
+	.vdev_op_open = vdev_disk_open,
+	.vdev_op_close = vdev_disk_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_disk_io_start,
+	.vdev_op_io_done = vdev_disk_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = vdev_disk_hold,
+	.vdev_op_rele = vdev_disk_rele,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
+	.vdev_op_leaf = B_TRUE,			/* leaf vdev */
+	.vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post
+};
+
+/*
+ * The zfs_vdev_scheduler module option has been deprecated. Setting this
+ * value no longer has any effect.  It has not yet been entirely removed
+ * to allow the module to be loaded if this option is specified in the
+ * /etc/modprobe.d/zfs.conf file.  The following warning will be logged.
+ */
+static int
+param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
+{
+	int error = param_set_charp(val, kp);
+	if (error == 0) {
+		printk(KERN_INFO "The 'zfs_vdev_scheduler' module option "
+		    "is not supported.\n");
+	}
+
+	return (error);
+}
+
+char *zfs_vdev_scheduler = "unused";
+module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler,
+    param_get_charp, &zfs_vdev_scheduler, 0644);
+MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
+
+int
+param_set_min_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
+{
+	uint64_t val;
+	int error;
+
+	error = kstrtoull(buf, 0, &val);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	if (val < ASHIFT_MIN || val > zfs_vdev_max_auto_ashift)
+		return (SET_ERROR(-EINVAL));
+
+	error = param_set_ulong(buf, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	return (0);
+}
+
+int
+param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
+{
+	uint64_t val;
+	int error;
+
+	error = kstrtoull(buf, 0, &val);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	if (val > ASHIFT_MAX || val < zfs_vdev_min_auto_ashift)
+		return (SET_ERROR(-EINVAL));
+
+	error = param_set_ulong(buf, kp);
+	if (error < 0)
+		return (SET_ERROR(error));
+
+	return (0);
+}
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
+	"Timeout before determining that a device is missing");

diff --git a/zfs/module/os/linux/zfs/vdev_file.c b/zfs/module/os/linux/zfs/vdev_file.c
new file mode 100644
index 0000000..98338e6
--- /dev/null
+++ b/zfs/module/os/linux/zfs/vdev_file.c

@@ -0,0 +1,382 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_file.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_trim.h>
+#include <sys/zio.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/fs/zfs.h>
+#include <sys/abd.h>
+#include <sys/fcntl.h>
+#include <sys/vnode.h>
+#include <sys/zfs_file.h>
+#ifdef _KERNEL
+#include <linux/falloc.h>
+#endif
+/*
+ * Virtual device vector for files.
+ */
+
+static taskq_t *vdev_file_taskq;
+
+/*
+ * By default, the logical/physical ashift for file vdevs is set to
+ * SPA_MINBLOCKSHIFT (9). This allows all file vdevs to use 512B (1 << 9)
+ * blocksizes. Users may opt to change one or both of these for testing
+ * or performance reasons. Care should be taken as these values will
+ * impact the vdev_ashift setting which can only be set at vdev creation
+ * time.
+ */
+unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
+unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
+
+static void
+vdev_file_hold(vdev_t *vd)
+{
+	ASSERT(vd->vdev_path != NULL);
+}
+
+static void
+vdev_file_rele(vdev_t *vd)
+{
+	ASSERT(vd->vdev_path != NULL);
+}
+
+static mode_t
+vdev_file_open_mode(spa_mode_t spa_mode)
+{
+	mode_t mode = 0;
+
+	if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
+		mode = O_RDWR;
+	} else if (spa_mode & SPA_MODE_READ) {
+		mode = O_RDONLY;
+	} else if (spa_mode & SPA_MODE_WRITE) {
+		mode = O_WRONLY;
+	}
+
+	return (mode | O_LARGEFILE);
+}
+
+static int
+vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
+{
+	vdev_file_t *vf;
+	zfs_file_t *fp;
+	zfs_file_attr_t zfa;
+	int error;
+
+	/*
+	 * Rotational optimizations only make sense on block devices.
+	 */
+	vd->vdev_nonrot = B_TRUE;
+
+	/*
+	 * Allow TRIM on file based vdevs.  This may not always be supported,
+	 * since it depends on your kernel version and underlying filesystem
+	 * type but it is always safe to attempt.
+	 */
+	vd->vdev_has_trim = B_TRUE;
+
+	/*
+	 * Disable secure TRIM on file based vdevs.  There is no way to
+	 * request this behavior from the underlying filesystem.
+	 */
+	vd->vdev_has_securetrim = B_FALSE;
+
+	/*
+	 * We must have a pathname, and it must be absolute.
+	 */
+	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
+		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Reopen the device if it's not currently open.  Otherwise,
+	 * just update the physical size of the device.
+	 */
+	if (vd->vdev_tsd != NULL) {
+		ASSERT(vd->vdev_reopening);
+		vf = vd->vdev_tsd;
+		goto skip_open;
+	}
+
+	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
+
+	/*
+	 * We always open the files from the root of the global zone, even if
+	 * we're in a local zone.  If the user has gotten to this point, the
+	 * administrator has already decided that the pool should be available
+	 * to local zone users, so the underlying devices should be as well.
+	 */
+	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
+
+	error = zfs_file_open(vd->vdev_path,
+	    vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (error);
+	}
+
+	vf->vf_file = fp;
+
+#ifdef _KERNEL
+	/*
+	 * Make sure it's a regular file.
+	 */
+	if (zfs_file_getattr(fp, &zfa)) {
+		return (SET_ERROR(ENODEV));
+	}
+	if (!S_ISREG(zfa.zfa_mode)) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (SET_ERROR(ENODEV));
+	}
+#endif
+
+skip_open:
+
+	error =  zfs_file_getattr(vf->vf_file, &zfa);
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (error);
+	}
+
+	*max_psize = *psize = zfa.zfa_size;
+	*logical_ashift = vdev_file_logical_ashift;
+	*physical_ashift = vdev_file_physical_ashift;
+
+	return (0);
+}
+
+static void
+vdev_file_close(vdev_t *vd)
+{
+	vdev_file_t *vf = vd->vdev_tsd;
+
+	if (vd->vdev_reopening || vf == NULL)
+		return;
+
+	if (vf->vf_file != NULL) {
+		(void) zfs_file_close(vf->vf_file);
+	}
+
+	vd->vdev_delayed_close = B_FALSE;
+	kmem_free(vf, sizeof (vdev_file_t));
+	vd->vdev_tsd = NULL;
+}
+
+static void
+vdev_file_io_strategy(void *arg)
+{
+	zio_t *zio = (zio_t *)arg;
+	vdev_t *vd = zio->io_vd;
+	vdev_file_t *vf = vd->vdev_tsd;
+	ssize_t resid;
+	void *buf;
+	loff_t off;
+	ssize_t size;
+	int err;
+
+	off = zio->io_offset;
+	size = zio->io_size;
+	resid = 0;
+
+	if (zio->io_type == ZIO_TYPE_READ) {
+		buf = abd_borrow_buf(zio->io_abd, zio->io_size);
+		err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
+		abd_return_buf_copy(zio->io_abd, buf, size);
+	} else {
+		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+		err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
+		abd_return_buf(zio->io_abd, buf, size);
+	}
+	zio->io_error = err;
+	if (resid != 0 && zio->io_error == 0)
+		zio->io_error = SET_ERROR(ENOSPC);
+
+	zio_delay_interrupt(zio);
+}
+
+static void
+vdev_file_io_fsync(void *arg)
+{
+	zio_t *zio = (zio_t *)arg;
+	vdev_file_t *vf = zio->io_vd->vdev_tsd;
+
+	zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
+
+	zio_interrupt(zio);
+}
+
+static void
+vdev_file_io_start(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+	vdev_file_t *vf = vd->vdev_tsd;
+
+	if (zio->io_type == ZIO_TYPE_IOCTL) {
+		/* XXPOLICY */
+		if (!vdev_readable(vd)) {
+			zio->io_error = SET_ERROR(ENXIO);
+			zio_interrupt(zio);
+			return;
+		}
+
+		switch (zio->io_cmd) {
+		case DKIOCFLUSHWRITECACHE:
+
+			if (zfs_nocacheflush)
+				break;
+
+			/*
+			 * We cannot safely call vfs_fsync() when PF_FSTRANS
+			 * is set in the current context.  Filesystems like
+			 * XFS include sanity checks to verify it is not
+			 * already set, see xfs_vm_writepage().  Therefore
+			 * the sync must be dispatched to a different context.
+			 */
+			if (__spl_pf_fstrans_check()) {
+				VERIFY3U(taskq_dispatch(vdev_file_taskq,
+				    vdev_file_io_fsync, zio, TQ_SLEEP), !=,
+				    TASKQID_INVALID);
+				return;
+			}
+
+			zio->io_error = zfs_file_fsync(vf->vf_file,
+			    O_SYNC | O_DSYNC);
+			break;
+		default:
+			zio->io_error = SET_ERROR(ENOTSUP);
+		}
+
+		zio_execute(zio);
+		return;
+	} else if (zio->io_type == ZIO_TYPE_TRIM) {
+		int mode = 0;
+
+		ASSERT3U(zio->io_size, !=, 0);
+#ifdef __linux__
+		mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+#endif
+		zio->io_error = zfs_file_fallocate(vf->vf_file,
+		    mode, zio->io_offset, zio->io_size);
+		zio_execute(zio);
+		return;
+	}
+
+	zio->io_target_timestamp = zio_handle_io_delay(zio);
+
+	VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
+	    TQ_SLEEP), !=, TASKQID_INVALID);
+}
+
+static void
+vdev_file_io_done(zio_t *zio)
+{
+	(void) zio;
+}
+
+vdev_ops_t vdev_file_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
+	.vdev_op_open = vdev_file_open,
+	.vdev_op_close = vdev_file_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_file_io_start,
+	.vdev_op_io_done = vdev_file_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = vdev_file_hold,
+	.vdev_op_rele = vdev_file_rele,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_FILE,		/* name of this vdev type */
+	.vdev_op_leaf = B_TRUE			/* leaf vdev */
+};
+
+void
+vdev_file_init(void)
+{
+	vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
+	    minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
+
+	VERIFY(vdev_file_taskq);
+}
+
+void
+vdev_file_fini(void)
+{
+	taskq_destroy(vdev_file_taskq);
+}
+
+/*
+ * From userland we access disks just like files.
+ */
+#ifndef _KERNEL
+
+vdev_ops_t vdev_disk_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
+	.vdev_op_open = vdev_file_open,
+	.vdev_op_close = vdev_file_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_file_io_start,
+	.vdev_op_io_done = vdev_file_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = vdev_file_hold,
+	.vdev_op_rele = vdev_file_rele,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
+	.vdev_op_leaf = B_TRUE			/* leaf vdev */
+};
+
+#endif
+
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW,
+	"Logical ashift for file-based devices");
+ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW,
+	"Physical ashift for file-based devices");

diff --git a/zfs/module/os/linux/zfs/zfs_acl.c b/zfs/module/os/linux/zfs/zfs_acl.c
new file mode 100644
index 0000000..cf37aec
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_acl.c

@@ -0,0 +1,2948 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/sid.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/fs/zfs.h>
+#include <sys/policy.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/dmu.h>
+#include <sys/dnode.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/trace_acl.h>
+#include <sys/zpl.h>
+
+#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
+#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
+#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
+#define	MIN_ACE_TYPE	ALLOW
+
+#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
+#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
+    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
+#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
+#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
+
+#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
+    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
+    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
+    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
+
+#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
+#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
+    ACE_DELETE|ACE_DELETE_CHILD)
+#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
+
+#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
+
+#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
+
+#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
+    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
+
+#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
+
+#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
+    ZFS_ACL_PROTECTED)
+
+#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
+    ZFS_ACL_OBJ_ACE)
+
+#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
+
+#define	IDMAP_WK_CREATOR_OWNER_UID	2147483648U
+
+static uint16_t
+zfs_ace_v0_get_type(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_type);
+}
+
+static uint16_t
+zfs_ace_v0_get_flags(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_flags);
+}
+
+static uint32_t
+zfs_ace_v0_get_mask(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_access_mask);
+}
+
+static uint64_t
+zfs_ace_v0_get_who(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_fuid);
+}
+
+static void
+zfs_ace_v0_set_type(void *acep, uint16_t type)
+{
+	((zfs_oldace_t *)acep)->z_type = type;
+}
+
+static void
+zfs_ace_v0_set_flags(void *acep, uint16_t flags)
+{
+	((zfs_oldace_t *)acep)->z_flags = flags;
+}
+
+static void
+zfs_ace_v0_set_mask(void *acep, uint32_t mask)
+{
+	((zfs_oldace_t *)acep)->z_access_mask = mask;
+}
+
+static void
+zfs_ace_v0_set_who(void *acep, uint64_t who)
+{
+	((zfs_oldace_t *)acep)->z_fuid = who;
+}
+
+/*ARGSUSED*/
+static size_t
+zfs_ace_v0_size(void *acep)
+{
+	return (sizeof (zfs_oldace_t));
+}
+
+static size_t
+zfs_ace_v0_abstract_size(void)
+{
+	return (sizeof (zfs_oldace_t));
+}
+
+static int
+zfs_ace_v0_mask_off(void)
+{
+	return (offsetof(zfs_oldace_t, z_access_mask));
+}
+
+/*ARGSUSED*/
+static int
+zfs_ace_v0_data(void *acep, void **datap)
+{
+	*datap = NULL;
+	return (0);
+}
+
+static acl_ops_t zfs_acl_v0_ops = {
+	.ace_mask_get = zfs_ace_v0_get_mask,
+	.ace_mask_set = zfs_ace_v0_set_mask,
+	.ace_flags_get = zfs_ace_v0_get_flags,
+	.ace_flags_set = zfs_ace_v0_set_flags,
+	.ace_type_get = zfs_ace_v0_get_type,
+	.ace_type_set = zfs_ace_v0_set_type,
+	.ace_who_get = zfs_ace_v0_get_who,
+	.ace_who_set = zfs_ace_v0_set_who,
+	.ace_size = zfs_ace_v0_size,
+	.ace_abstract_size = zfs_ace_v0_abstract_size,
+	.ace_mask_off = zfs_ace_v0_mask_off,
+	.ace_data = zfs_ace_v0_data
+};
+
+static uint16_t
+zfs_ace_fuid_get_type(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_type);
+}
+
+static uint16_t
+zfs_ace_fuid_get_flags(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_flags);
+}
+
+static uint32_t
+zfs_ace_fuid_get_mask(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
+}
+
+static uint64_t
+zfs_ace_fuid_get_who(void *args)
+{
+	uint16_t entry_type;
+	zfs_ace_t *acep = args;
+
+	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
+
+	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
+	    entry_type == ACE_EVERYONE)
+		return (-1);
+	return (((zfs_ace_t *)acep)->z_fuid);
+}
+
+static void
+zfs_ace_fuid_set_type(void *acep, uint16_t type)
+{
+	((zfs_ace_hdr_t *)acep)->z_type = type;
+}
+
+static void
+zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
+{
+	((zfs_ace_hdr_t *)acep)->z_flags = flags;
+}
+
+static void
+zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
+{
+	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
+}
+
+static void
+zfs_ace_fuid_set_who(void *arg, uint64_t who)
+{
+	zfs_ace_t *acep = arg;
+
+	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
+
+	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
+	    entry_type == ACE_EVERYONE)
+		return;
+	acep->z_fuid = who;
+}
+
+static size_t
+zfs_ace_fuid_size(void *acep)
+{
+	zfs_ace_hdr_t *zacep = acep;
+	uint16_t entry_type;
+
+	switch (zacep->z_type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		return (sizeof (zfs_object_ace_t));
+	case ALLOW:
+	case DENY:
+		entry_type =
+		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
+		if (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE)
+			return (sizeof (zfs_ace_hdr_t));
+		fallthrough;
+	default:
+		return (sizeof (zfs_ace_t));
+	}
+}
+
+static size_t
+zfs_ace_fuid_abstract_size(void)
+{
+	return (sizeof (zfs_ace_hdr_t));
+}
+
+static int
+zfs_ace_fuid_mask_off(void)
+{
+	return (offsetof(zfs_ace_hdr_t, z_access_mask));
+}
+
+static int
+zfs_ace_fuid_data(void *acep, void **datap)
+{
+	zfs_ace_t *zacep = acep;
+	zfs_object_ace_t *zobjp;
+
+	switch (zacep->z_hdr.z_type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		zobjp = acep;
+		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
+		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
+	default:
+		*datap = NULL;
+		return (0);
+	}
+}
+
+static acl_ops_t zfs_acl_fuid_ops = {
+	.ace_mask_get = zfs_ace_fuid_get_mask,
+	.ace_mask_set = zfs_ace_fuid_set_mask,
+	.ace_flags_get = zfs_ace_fuid_get_flags,
+	.ace_flags_set = zfs_ace_fuid_set_flags,
+	.ace_type_get = zfs_ace_fuid_get_type,
+	.ace_type_set = zfs_ace_fuid_set_type,
+	.ace_who_get = zfs_ace_fuid_get_who,
+	.ace_who_set = zfs_ace_fuid_set_who,
+	.ace_size = zfs_ace_fuid_size,
+	.ace_abstract_size = zfs_ace_fuid_abstract_size,
+	.ace_mask_off = zfs_ace_fuid_mask_off,
+	.ace_data = zfs_ace_fuid_data
+};
+
+/*
+ * The following three functions are provided for compatibility with
+ * older ZPL version in order to determine if the file use to have
+ * an external ACL and what version of ACL previously existed on the
+ * file.  Would really be nice to not need this, sigh.
+ */
+uint64_t
+zfs_external_acl(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+	int error;
+
+	if (zp->z_is_sa)
+		return (0);
+
+	/*
+	 * Need to deal with a potential
+	 * race where zfs_sa_upgrade could cause
+	 * z_isa_sa to change.
+	 *
+	 * If the lookup fails then the state of z_is_sa should have
+	 * changed.
+	 */
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
+	    &acl_phys, sizeof (acl_phys))) == 0)
+		return (acl_phys.z_acl_extern_obj);
+	else {
+		/*
+		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
+		 * removed
+		 */
+		VERIFY(zp->z_is_sa && error == ENOENT);
+		return (0);
+	}
+}
+
+/*
+ * Determine size of ACL in bytes
+ *
+ * This is more complicated than it should be since we have to deal
+ * with old external ACLs.
+ */
+static int
+zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
+    zfs_acl_phys_t *aclphys)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	uint64_t acl_count;
+	int size;
+	int error;
+
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+	if (zp->z_is_sa) {
+		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
+		    &size)) != 0)
+			return (error);
+		*aclsize = size;
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
+		    &acl_count, sizeof (acl_count))) != 0)
+			return (error);
+		*aclcount = acl_count;
+	} else {
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    aclphys, sizeof (*aclphys))) != 0)
+			return (error);
+
+		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
+			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
+			*aclcount = aclphys->z_acl_size;
+		} else {
+			*aclsize = aclphys->z_acl_size;
+			*aclcount = aclphys->z_acl_count;
+		}
+	}
+	return (0);
+}
+
+int
+zfs_znode_acl_version(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+
+	if (zp->z_is_sa)
+		return (ZFS_ACL_VERSION_FUID);
+	else {
+		int error;
+
+		/*
+		 * Need to deal with a potential
+		 * race where zfs_sa_upgrade could cause
+		 * z_isa_sa to change.
+		 *
+		 * If the lookup fails then the state of z_is_sa should have
+		 * changed.
+		 */
+		if ((error = sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
+		    &acl_phys, sizeof (acl_phys))) == 0)
+			return (acl_phys.z_acl_version);
+		else {
+			/*
+			 * After upgrade SA_ZPL_ZNODE_ACL should have
+			 * been removed.
+			 */
+			VERIFY(zp->z_is_sa && error == ENOENT);
+			return (ZFS_ACL_VERSION_FUID);
+		}
+	}
+}
+
+static int
+zfs_acl_version(int version)
+{
+	if (version < ZPL_VERSION_FUID)
+		return (ZFS_ACL_VERSION_INITIAL);
+	else
+		return (ZFS_ACL_VERSION_FUID);
+}
+
+static int
+zfs_acl_version_zp(znode_t *zp)
+{
+	return (zfs_acl_version(ZTOZSB(zp)->z_version));
+}
+
+zfs_acl_t *
+zfs_acl_alloc(int vers)
+{
+	zfs_acl_t *aclp;
+
+	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
+	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
+	    offsetof(zfs_acl_node_t, z_next));
+	aclp->z_version = vers;
+	if (vers == ZFS_ACL_VERSION_FUID)
+		aclp->z_ops = &zfs_acl_fuid_ops;
+	else
+		aclp->z_ops = &zfs_acl_v0_ops;
+	return (aclp);
+}
+
+zfs_acl_node_t *
+zfs_acl_node_alloc(size_t bytes)
+{
+	zfs_acl_node_t *aclnode;
+
+	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
+	if (bytes) {
+		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
+		aclnode->z_allocdata = aclnode->z_acldata;
+		aclnode->z_allocsize = bytes;
+		aclnode->z_size = bytes;
+	}
+
+	return (aclnode);
+}
+
+static void
+zfs_acl_node_free(zfs_acl_node_t *aclnode)
+{
+	if (aclnode->z_allocsize)
+		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
+	kmem_free(aclnode, sizeof (zfs_acl_node_t));
+}
+
+static void
+zfs_acl_release_nodes(zfs_acl_t *aclp)
+{
+	zfs_acl_node_t *aclnode;
+
+	while ((aclnode = list_head(&aclp->z_acl))) {
+		list_remove(&aclp->z_acl, aclnode);
+		zfs_acl_node_free(aclnode);
+	}
+	aclp->z_acl_count = 0;
+	aclp->z_acl_bytes = 0;
+}
+
+void
+zfs_acl_free(zfs_acl_t *aclp)
+{
+	zfs_acl_release_nodes(aclp);
+	list_destroy(&aclp->z_acl);
+	kmem_free(aclp, sizeof (zfs_acl_t));
+}
+
+static boolean_t
+zfs_acl_valid_ace_type(uint_t type, uint_t flags)
+{
+	uint16_t entry_type;
+
+	switch (type) {
+	case ALLOW:
+	case DENY:
+	case ACE_SYSTEM_AUDIT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_ACE_TYPE:
+		entry_type = flags & ACE_TYPE_FLAGS;
+		return (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE || entry_type == 0 ||
+		    entry_type == ACE_IDENTIFIER_GROUP);
+	default:
+		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static boolean_t
+zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
+{
+	/*
+	 * first check type of entry
+	 */
+
+	if (!zfs_acl_valid_ace_type(type, iflags))
+		return (B_FALSE);
+
+	switch (type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
+			return (B_FALSE);
+		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
+	}
+
+	/*
+	 * next check inheritance level flags
+	 */
+
+	if (S_ISDIR(obj_mode) &&
+	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+		aclp->z_hints |= ZFS_INHERIT_ACE;
+
+	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
+		if ((iflags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+static void *
+zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
+    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
+{
+	zfs_acl_node_t *aclnode;
+
+	ASSERT(aclp);
+
+	if (start == NULL) {
+		aclnode = list_head(&aclp->z_acl);
+		if (aclnode == NULL)
+			return (NULL);
+
+		aclp->z_next_ace = aclnode->z_acldata;
+		aclp->z_curr_node = aclnode;
+		aclnode->z_ace_idx = 0;
+	}
+
+	aclnode = aclp->z_curr_node;
+
+	if (aclnode == NULL)
+		return (NULL);
+
+	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
+		aclnode = list_next(&aclp->z_acl, aclnode);
+		if (aclnode == NULL)
+			return (NULL);
+		else {
+			aclp->z_curr_node = aclnode;
+			aclnode->z_ace_idx = 0;
+			aclp->z_next_ace = aclnode->z_acldata;
+		}
+	}
+
+	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
+		void *acep = aclp->z_next_ace;
+		size_t ace_size;
+
+		/*
+		 * Make sure we don't overstep our bounds
+		 */
+		ace_size = aclp->z_ops->ace_size(acep);
+
+		if (((caddr_t)acep + ace_size) >
+		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
+			return (NULL);
+		}
+
+		*iflags = aclp->z_ops->ace_flags_get(acep);
+		*type = aclp->z_ops->ace_type_get(acep);
+		*access_mask = aclp->z_ops->ace_mask_get(acep);
+		*who = aclp->z_ops->ace_who_get(acep);
+		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
+		aclnode->z_ace_idx++;
+
+		return ((void *)acep);
+	}
+	return (NULL);
+}
+
+/*ARGSUSED*/
+static uint64_t
+zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
+    uint16_t *flags, uint16_t *type, uint32_t *mask)
+{
+	zfs_acl_t *aclp = datap;
+	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
+	uint64_t who;
+
+	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
+	    flags, type);
+	return ((uint64_t)(uintptr_t)acep);
+}
+
+/*
+ * Copy ACE to internal ZFS format.
+ * While processing the ACL each ACE will be validated for correctness.
+ * ACE FUIDs will be created later.
+ */
+static int
+zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *aclp,
+    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
+    zfs_fuid_info_t **fuidp, cred_t *cr)
+{
+	int i;
+	uint16_t entry_type;
+	zfs_ace_t *aceptr = z_acl;
+	ace_t *acep = datap;
+	zfs_object_ace_t *zobjacep;
+	ace_object_t *aceobjp;
+
+	for (i = 0; i != aclcnt; i++) {
+		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
+		aceptr->z_hdr.z_flags = acep->a_flags;
+		aceptr->z_hdr.z_type = acep->a_type;
+		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
+		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
+		    entry_type != ACE_EVERYONE) {
+			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
+			    cr, (entry_type == 0) ?
+			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
+		}
+
+		/*
+		 * Make sure ACE is valid
+		 */
+		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type,
+		    aceptr->z_hdr.z_flags) != B_TRUE)
+			return (SET_ERROR(EINVAL));
+
+		switch (acep->a_type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			zobjacep = (zfs_object_ace_t *)aceptr;
+			aceobjp = (ace_object_t *)acep;
+
+			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
+			    sizeof (aceobjp->a_obj_type));
+			bcopy(aceobjp->a_inherit_obj_type,
+			    zobjacep->z_inherit_type,
+			    sizeof (aceobjp->a_inherit_obj_type));
+			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
+			break;
+		default:
+			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
+		}
+
+		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
+		    aclp->z_ops->ace_size(aceptr));
+	}
+
+	*size = (caddr_t)aceptr - (caddr_t)z_acl;
+
+	return (0);
+}
+
+/*
+ * Copy ZFS ACEs to fixed size ace_t layout
+ */
+static void
+zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
+    void *datap, int filter)
+{
+	uint64_t who;
+	uint32_t access_mask;
+	uint16_t iflags, type;
+	zfs_ace_hdr_t *zacep = NULL;
+	ace_t *acep = datap;
+	ace_object_t *objacep;
+	zfs_object_ace_t *zobjacep;
+	size_t ace_size;
+	uint16_t entry_type;
+
+	while ((zacep = zfs_acl_next_ace(aclp, zacep,
+	    &who, &access_mask, &iflags, &type))) {
+
+		switch (type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			if (filter) {
+				continue;
+			}
+			zobjacep = (zfs_object_ace_t *)zacep;
+			objacep = (ace_object_t *)acep;
+			bcopy(zobjacep->z_object_type,
+			    objacep->a_obj_type,
+			    sizeof (zobjacep->z_object_type));
+			bcopy(zobjacep->z_inherit_type,
+			    objacep->a_inherit_obj_type,
+			    sizeof (zobjacep->z_inherit_type));
+			ace_size = sizeof (ace_object_t);
+			break;
+		default:
+			ace_size = sizeof (ace_t);
+			break;
+		}
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+		if ((entry_type != ACE_OWNER &&
+		    entry_type != OWNING_GROUP &&
+		    entry_type != ACE_EVERYONE)) {
+			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
+			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
+			    ZFS_ACE_GROUP : ZFS_ACE_USER);
+		} else {
+			acep->a_who = (uid_t)(int64_t)who;
+		}
+		acep->a_access_mask = access_mask;
+		acep->a_flags = iflags;
+		acep->a_type = type;
+		acep = (ace_t *)((caddr_t)acep + ace_size);
+	}
+}
+
+static int
+zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep,
+    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
+{
+	int i;
+	zfs_oldace_t *aceptr = z_acl;
+
+	for (i = 0; i != aclcnt; i++, aceptr++) {
+		aceptr->z_access_mask = acep[i].a_access_mask;
+		aceptr->z_type = acep[i].a_type;
+		aceptr->z_flags = acep[i].a_flags;
+		aceptr->z_fuid = acep[i].a_who;
+		/*
+		 * Make sure ACE is valid
+		 */
+		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type,
+		    aceptr->z_flags) != B_TRUE)
+			return (SET_ERROR(EINVAL));
+	}
+	*size = (caddr_t)aceptr - (caddr_t)z_acl;
+	return (0);
+}
+
+/*
+ * convert old ACL format to new
+ */
+void
+zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
+{
+	zfs_oldace_t *oldaclp;
+	int i;
+	uint16_t type, iflags;
+	uint32_t access_mask;
+	uint64_t who;
+	void *cookie = NULL;
+	zfs_acl_node_t *newaclnode;
+
+	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
+	/*
+	 * First create the ACE in a contiguous piece of memory
+	 * for zfs_copy_ace_2_fuid().
+	 *
+	 * We only convert an ACL once, so this won't happen
+	 * every time.
+	 */
+	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
+	    KM_SLEEP);
+	i = 0;
+	while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
+	    &access_mask, &iflags, &type))) {
+		oldaclp[i].z_flags = iflags;
+		oldaclp[i].z_type = type;
+		oldaclp[i].z_fuid = who;
+		oldaclp[i++].z_access_mask = access_mask;
+	}
+
+	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
+	    sizeof (zfs_object_ace_t));
+	aclp->z_ops = &zfs_acl_fuid_ops;
+	VERIFY(zfs_copy_ace_2_fuid(ZTOZSB(zp), ZTOI(zp)->i_mode,
+	    aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
+	    &newaclnode->z_size, NULL, cr) == 0);
+	newaclnode->z_ace_count = aclp->z_acl_count;
+	aclp->z_version = ZFS_ACL_VERSION;
+	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
+
+	/*
+	 * Release all previous ACL nodes
+	 */
+
+	zfs_acl_release_nodes(aclp);
+
+	list_insert_head(&aclp->z_acl, newaclnode);
+
+	aclp->z_acl_bytes = newaclnode->z_size;
+	aclp->z_acl_count = newaclnode->z_ace_count;
+
+}
+
+/*
+ * Convert unix access mask to v4 access mask
+ */
+static uint32_t
+zfs_unix_to_v4(uint32_t access_mask)
+{
+	uint32_t new_mask = 0;
+
+	if (access_mask & S_IXOTH)
+		new_mask |= ACE_EXECUTE;
+	if (access_mask & S_IWOTH)
+		new_mask |= ACE_WRITE_DATA;
+	if (access_mask & S_IROTH)
+		new_mask |= ACE_READ_DATA;
+	return (new_mask);
+}
+
+static void
+zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
+    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
+{
+	uint16_t type = entry_type & ACE_TYPE_FLAGS;
+
+	aclp->z_ops->ace_mask_set(acep, access_mask);
+	aclp->z_ops->ace_type_set(acep, access_type);
+	aclp->z_ops->ace_flags_set(acep, entry_type);
+	if ((type != ACE_OWNER && type != OWNING_GROUP &&
+	    type != ACE_EVERYONE))
+		aclp->z_ops->ace_who_set(acep, fuid);
+}
+
+/*
+ * Determine mode of file based on ACL.
+ */
+uint64_t
+zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
+    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
+{
+	int		entry_type;
+	mode_t		mode;
+	mode_t		seen = 0;
+	zfs_ace_hdr_t 	*acep = NULL;
+	uint64_t	who;
+	uint16_t	iflags, type;
+	uint32_t	access_mask;
+	boolean_t	an_exec_denied = B_FALSE;
+
+	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who,
+	    &access_mask, &iflags, &type))) {
+
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+
+		/*
+		 * Skip over any inherit_only ACEs
+		 */
+		if (iflags & ACE_INHERIT_ONLY_ACE)
+			continue;
+
+		if (entry_type == ACE_OWNER || (entry_type == 0 &&
+		    who == fuid)) {
+			if ((access_mask & ACE_READ_DATA) &&
+			    (!(seen & S_IRUSR))) {
+				seen |= S_IRUSR;
+				if (type == ALLOW) {
+					mode |= S_IRUSR;
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA) &&
+			    (!(seen & S_IWUSR))) {
+				seen |= S_IWUSR;
+				if (type == ALLOW) {
+					mode |= S_IWUSR;
+				}
+			}
+			if ((access_mask & ACE_EXECUTE) &&
+			    (!(seen & S_IXUSR))) {
+				seen |= S_IXUSR;
+				if (type == ALLOW) {
+					mode |= S_IXUSR;
+				}
+			}
+		} else if (entry_type == OWNING_GROUP ||
+		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
+			if ((access_mask & ACE_READ_DATA) &&
+			    (!(seen & S_IRGRP))) {
+				seen |= S_IRGRP;
+				if (type == ALLOW) {
+					mode |= S_IRGRP;
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA) &&
+			    (!(seen & S_IWGRP))) {
+				seen |= S_IWGRP;
+				if (type == ALLOW) {
+					mode |= S_IWGRP;
+				}
+			}
+			if ((access_mask & ACE_EXECUTE) &&
+			    (!(seen & S_IXGRP))) {
+				seen |= S_IXGRP;
+				if (type == ALLOW) {
+					mode |= S_IXGRP;
+				}
+			}
+		} else if (entry_type == ACE_EVERYONE) {
+			if ((access_mask & ACE_READ_DATA)) {
+				if (!(seen & S_IRUSR)) {
+					seen |= S_IRUSR;
+					if (type == ALLOW) {
+						mode |= S_IRUSR;
+					}
+				}
+				if (!(seen & S_IRGRP)) {
+					seen |= S_IRGRP;
+					if (type == ALLOW) {
+						mode |= S_IRGRP;
+					}
+				}
+				if (!(seen & S_IROTH)) {
+					seen |= S_IROTH;
+					if (type == ALLOW) {
+						mode |= S_IROTH;
+					}
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA)) {
+				if (!(seen & S_IWUSR)) {
+					seen |= S_IWUSR;
+					if (type == ALLOW) {
+						mode |= S_IWUSR;
+					}
+				}
+				if (!(seen & S_IWGRP)) {
+					seen |= S_IWGRP;
+					if (type == ALLOW) {
+						mode |= S_IWGRP;
+					}
+				}
+				if (!(seen & S_IWOTH)) {
+					seen |= S_IWOTH;
+					if (type == ALLOW) {
+						mode |= S_IWOTH;
+					}
+				}
+			}
+			if ((access_mask & ACE_EXECUTE)) {
+				if (!(seen & S_IXUSR)) {
+					seen |= S_IXUSR;
+					if (type == ALLOW) {
+						mode |= S_IXUSR;
+					}
+				}
+				if (!(seen & S_IXGRP)) {
+					seen |= S_IXGRP;
+					if (type == ALLOW) {
+						mode |= S_IXGRP;
+					}
+				}
+				if (!(seen & S_IXOTH)) {
+					seen |= S_IXOTH;
+					if (type == ALLOW) {
+						mode |= S_IXOTH;
+					}
+				}
+			}
+		} else {
+			/*
+			 * Only care if this IDENTIFIER_GROUP or
+			 * USER ACE denies execute access to someone,
+			 * mode is not affected
+			 */
+			if ((access_mask & ACE_EXECUTE) && type == DENY)
+				an_exec_denied = B_TRUE;
+		}
+	}
+
+	/*
+	 * Failure to allow is effectively a deny, so execute permission
+	 * is denied if it was never mentioned or if we explicitly
+	 * weren't allowed it.
+	 */
+	if (!an_exec_denied &&
+	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
+	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
+		an_exec_denied = B_TRUE;
+
+	if (an_exec_denied)
+		*pflags &= ~ZFS_NO_EXECS_DENIED;
+	else
+		*pflags |= ZFS_NO_EXECS_DENIED;
+
+	return (mode);
+}
+
+/*
+ * Read an external acl object.  If the intent is to modify, always
+ * create a new acl and leave any cached acl in place.
+ */
+int
+zfs_acl_node_read(struct znode *zp, boolean_t have_lock, zfs_acl_t **aclpp,
+    boolean_t will_modify)
+{
+	zfs_acl_t	*aclp;
+	int		aclsize = 0;
+	int		acl_count = 0;
+	zfs_acl_node_t	*aclnode;
+	zfs_acl_phys_t	znode_acl;
+	int		version;
+	int		error;
+	boolean_t	drop_lock = B_FALSE;
+
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+
+	if (zp->z_acl_cached && !will_modify) {
+		*aclpp = zp->z_acl_cached;
+		return (0);
+	}
+
+	/*
+	 * close race where znode could be upgrade while trying to
+	 * read the znode attributes.
+	 *
+	 * But this could only happen if the file isn't already an SA
+	 * znode
+	 */
+	if (!zp->z_is_sa && !have_lock) {
+		mutex_enter(&zp->z_lock);
+		drop_lock = B_TRUE;
+	}
+	version = zfs_znode_acl_version(zp);
+
+	if ((error = zfs_acl_znode_info(zp, &aclsize,
+	    &acl_count, &znode_acl)) != 0) {
+		goto done;
+	}
+
+	aclp = zfs_acl_alloc(version);
+
+	aclp->z_acl_count = acl_count;
+	aclp->z_acl_bytes = aclsize;
+
+	aclnode = zfs_acl_node_alloc(aclsize);
+	aclnode->z_ace_count = aclp->z_acl_count;
+	aclnode->z_size = aclsize;
+
+	if (!zp->z_is_sa) {
+		if (znode_acl.z_acl_extern_obj) {
+			error = dmu_read(ZTOZSB(zp)->z_os,
+			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
+			    aclnode->z_acldata, DMU_READ_PREFETCH);
+		} else {
+			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
+			    aclnode->z_size);
+		}
+	} else {
+		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(ZTOZSB(zp)),
+		    aclnode->z_acldata, aclnode->z_size);
+	}
+
+	if (error != 0) {
+		zfs_acl_free(aclp);
+		zfs_acl_node_free(aclnode);
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = SET_ERROR(EIO);
+		goto done;
+	}
+
+	list_insert_head(&aclp->z_acl, aclnode);
+
+	*aclpp = aclp;
+	if (!will_modify)
+		zp->z_acl_cached = aclp;
+done:
+	if (drop_lock)
+		mutex_exit(&zp->z_lock);
+	return (error);
+}
+
+/*ARGSUSED*/
+void
+zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
+    boolean_t start, void *userdata)
+{
+	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
+
+	if (start) {
+		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
+	} else {
+		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
+		    cb->cb_acl_node);
+	}
+	*dataptr = cb->cb_acl_node->z_acldata;
+	*length = cb->cb_acl_node->z_size;
+}
+
+int
+zfs_acl_chown_setattr(znode_t *zp)
+{
+	int error;
+	zfs_acl_t *aclp;
+
+	if (ZTOZSB(zp)->z_acl_type == ZFS_ACLTYPE_POSIX)
+		return (0);
+
+	ASSERT(MUTEX_HELD(&zp->z_lock));
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+
+	error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
+	if (error == 0 && aclp->z_acl_count > 0)
+		zp->z_mode = ZTOI(zp)->i_mode =
+		    zfs_mode_compute(zp->z_mode, aclp,
+		    &zp->z_pflags, KUID_TO_SUID(ZTOI(zp)->i_uid),
+		    KGID_TO_SGID(ZTOI(zp)->i_gid));
+
+	/*
+	 * Some ZFS implementations (ZEVO) create neither a ZNODE_ACL
+	 * nor a DACL_ACES SA in which case ENOENT is returned from
+	 * zfs_acl_node_read() when the SA can't be located.
+	 * Allow chown/chgrp to succeed in these cases rather than
+	 * returning an error that makes no sense in the context of
+	 * the caller.
+	 */
+	if (error == ENOENT)
+		return (0);
+
+	return (error);
+}
+
+typedef struct trivial_acl {
+	uint32_t	allow0;		/* allow mask for bits only in owner */
+	uint32_t	deny1;		/* deny mask for bits not in owner */
+	uint32_t	deny2;		/* deny mask for bits not in group */
+	uint32_t	owner;		/* allow mask matching mode */
+	uint32_t	group;		/* allow mask matching mode */
+	uint32_t	everyone;	/* allow mask matching mode */
+} trivial_acl_t;
+
+static void
+acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
+{
+	uint32_t read_mask = ACE_READ_DATA;
+	uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA;
+	uint32_t execute_mask = ACE_EXECUTE;
+
+	if (isdir)
+		write_mask |= ACE_DELETE_CHILD;
+
+	masks->deny1 = 0;
+
+	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
+		masks->deny1 |= read_mask;
+	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
+		masks->deny1 |= write_mask;
+	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
+		masks->deny1 |= execute_mask;
+
+	masks->deny2 = 0;
+	if (!(mode & S_IRGRP) && (mode & S_IROTH))
+		masks->deny2 |= read_mask;
+	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
+		masks->deny2 |= write_mask;
+	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
+		masks->deny2 |= execute_mask;
+
+	masks->allow0 = 0;
+	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
+		masks->allow0 |= read_mask;
+	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
+		masks->allow0 |= write_mask;
+	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
+		masks->allow0 |= execute_mask;
+
+	masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
+	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
+	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
+	if (mode & S_IRUSR)
+		masks->owner |= read_mask;
+	if (mode & S_IWUSR)
+		masks->owner |= write_mask;
+	if (mode & S_IXUSR)
+		masks->owner |= execute_mask;
+
+	masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IRGRP)
+		masks->group |= read_mask;
+	if (mode & S_IWGRP)
+		masks->group |= write_mask;
+	if (mode & S_IXGRP)
+		masks->group |= execute_mask;
+
+	masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IROTH)
+		masks->everyone |= read_mask;
+	if (mode & S_IWOTH)
+		masks->everyone |= write_mask;
+	if (mode & S_IXOTH)
+		masks->everyone |= execute_mask;
+}
+
+/*
+ * ace_trivial:
+ * determine whether an ace_t acl is trivial
+ *
+ * Trivialness implies that the acl is composed of only
+ * owner, group, everyone entries.  ACL can't
+ * have read_acl denied, and write_owner/write_acl/write_attributes
+ * can only be owner@ entry.
+ */
+static int
+ace_trivial_common(void *acep, int aclcnt,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt,
+    uint16_t *, uint16_t *, uint32_t *))
+{
+	uint16_t flags;
+	uint32_t mask;
+	uint16_t type;
+	uint64_t cookie = 0;
+
+	while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
+		switch (flags & ACE_TYPE_FLAGS) {
+		case ACE_OWNER:
+		case ACE_GROUP|ACE_IDENTIFIER_GROUP:
+		case ACE_EVERYONE:
+			break;
+		default:
+			return (1);
+		}
+
+		if (flags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
+		    ACE_INHERIT_ONLY_ACE))
+			return (1);
+
+		/*
+		 * Special check for some special bits
+		 *
+		 * Don't allow anybody to deny reading basic
+		 * attributes or a files ACL.
+		 */
+		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
+			return (1);
+
+		/*
+		 * Delete permission is never set by default
+		 */
+		if (mask & ACE_DELETE)
+			return (1);
+
+		/*
+		 * Child delete permission should be accompanied by write
+		 */
+		if ((mask & ACE_DELETE_CHILD) && !(mask & ACE_WRITE_DATA))
+			return (1);
+
+		/*
+		 * only allow owner@ to have
+		 * write_acl/write_owner/write_attributes/write_xattr/
+		 */
+		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
+		    (!(flags & ACE_OWNER) && (mask &
+		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
+		    ACE_WRITE_NAMED_ATTRS))))
+			return (1);
+
+	}
+
+	return (0);
+}
+
+/*
+ * common code for setting ACLs.
+ *
+ * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
+ * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
+ * already checked the acl and knows whether to inherit.
+ */
+int
+zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
+{
+	int			error;
+	zfsvfs_t		*zfsvfs = ZTOZSB(zp);
+	dmu_object_type_t	otype;
+	zfs_acl_locator_cb_t	locate = { 0 };
+	uint64_t		mode;
+	sa_bulk_attr_t		bulk[5];
+	uint64_t		ctime[2];
+	int			count = 0;
+	zfs_acl_phys_t		acl_phys;
+
+	mode = zp->z_mode;
+
+	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
+	    KUID_TO_SUID(ZTOI(zp)->i_uid), KGID_TO_SGID(ZTOI(zp)->i_gid));
+
+	zp->z_mode = ZTOI(zp)->i_mode = mode;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, sizeof (ctime));
+
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	/*
+	 * Upgrade needed?
+	 */
+	if (!zfsvfs->z_use_fuids) {
+		otype = DMU_OT_OLDACL;
+	} else {
+		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
+		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
+			zfs_acl_xform(zp, aclp, cr);
+		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
+		otype = DMU_OT_ACL;
+	}
+
+	/*
+	 * Arrgh, we have to handle old on disk format
+	 * as well as newer (preferred) SA format.
+	 */
+
+	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
+		locate.cb_aclp = aclp;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
+		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
+	} else { /* Painful legacy way */
+		zfs_acl_node_t *aclnode;
+		uint64_t off = 0;
+		uint64_t aoid;
+
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    &acl_phys, sizeof (acl_phys))) != 0)
+			return (error);
+
+		aoid = acl_phys.z_acl_extern_obj;
+
+		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			/*
+			 * If ACL was previously external and we are now
+			 * converting to new ACL format then release old
+			 * ACL object and create a new one.
+			 */
+			if (aoid &&
+			    aclp->z_version != acl_phys.z_acl_version) {
+				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
+				if (error)
+					return (error);
+				aoid = 0;
+			}
+			if (aoid == 0) {
+				aoid = dmu_object_alloc(zfsvfs->z_os,
+				    otype, aclp->z_acl_bytes,
+				    otype == DMU_OT_ACL ?
+				    DMU_OT_SYSACL : DMU_OT_NONE,
+				    otype == DMU_OT_ACL ?
+				    DN_OLD_MAX_BONUSLEN : 0, tx);
+			} else {
+				(void) dmu_object_set_blocksize(zfsvfs->z_os,
+				    aoid, aclp->z_acl_bytes, 0, tx);
+			}
+			acl_phys.z_acl_extern_obj = aoid;
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				dmu_write(zfsvfs->z_os, aoid, off,
+				    aclnode->z_size, aclnode->z_acldata, tx);
+				off += aclnode->z_size;
+			}
+		} else {
+			void *start = acl_phys.z_ace_data;
+			/*
+			 * Migrating back embedded?
+			 */
+			if (acl_phys.z_acl_extern_obj) {
+				error = dmu_object_free(zfsvfs->z_os,
+				    acl_phys.z_acl_extern_obj, tx);
+				if (error)
+					return (error);
+				acl_phys.z_acl_extern_obj = 0;
+			}
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+		}
+		/*
+		 * If Old version then swap count/bytes to match old
+		 * layout of znode_acl_phys_t.
+		 */
+		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+			acl_phys.z_acl_size = aclp->z_acl_count;
+			acl_phys.z_acl_count = aclp->z_acl_bytes;
+		} else {
+			acl_phys.z_acl_size = aclp->z_acl_bytes;
+			acl_phys.z_acl_count = aclp->z_acl_count;
+		}
+		acl_phys.z_acl_version = aclp->z_version;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (acl_phys));
+	}
+
+	/*
+	 * Replace ACL wide bits, but first clear them.
+	 */
+	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
+
+	zp->z_pflags |= aclp->z_hints;
+
+	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
+		zp->z_pflags |= ZFS_ACL_TRIVIAL;
+
+	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
+	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
+}
+
+static void
+zfs_acl_chmod(boolean_t isdir, uint64_t mode, boolean_t split, boolean_t trim,
+    zfs_acl_t *aclp)
+{
+	void		*acep = NULL;
+	uint64_t	who;
+	int		new_count, new_bytes;
+	int		ace_size;
+	int		entry_type;
+	uint16_t	iflags, type;
+	uint32_t	access_mask;
+	zfs_acl_node_t	*newnode;
+	size_t		abstract_size = aclp->z_ops->ace_abstract_size();
+	void		*zacep;
+	trivial_acl_t	masks;
+
+	new_count = new_bytes = 0;
+
+	acl_trivial_access_masks((mode_t)mode, isdir, &masks);
+
+	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
+
+	zacep = newnode->z_acldata;
+	if (masks.allow0) {
+		zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+	if (masks.deny1) {
+		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+	if (masks.deny2) {
+		zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
+	    &iflags, &type))) {
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+		/*
+		 * ACEs used to represent the file mode may be divided
+		 * into an equivalent pair of inherit-only and regular
+		 * ACEs, if they are inheritable.
+		 * Skip regular ACEs, which are replaced by the new mode.
+		 */
+		if (split && (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE)) {
+			if (!isdir || !(iflags &
+			    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+				continue;
+			/*
+			 * We preserve owner@, group@, or @everyone
+			 * permissions, if they are inheritable, by
+			 * copying them to inherit_only ACEs. This
+			 * prevents inheritable permissions from being
+			 * altered along with the file mode.
+			 */
+			iflags |= ACE_INHERIT_ONLY_ACE;
+		}
+
+		/*
+		 * If this ACL has any inheritable ACEs, mark that in
+		 * the hints (which are later masked into the pflags)
+		 * so create knows to do inheritance.
+		 */
+		if (isdir && (iflags &
+		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+			aclp->z_hints |= ZFS_INHERIT_ACE;
+
+		if ((type != ALLOW && type != DENY) ||
+		    (iflags & ACE_INHERIT_ONLY_ACE)) {
+			switch (type) {
+			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
+				break;
+			}
+		} else {
+			/*
+			 * Limit permissions to be no greater than
+			 * group permissions.
+			 * The "aclinherit" and "aclmode" properties
+			 * affect policy for create and chmod(2),
+			 * respectively.
+			 */
+			if ((type == ALLOW) && trim)
+				access_mask &= masks.group;
+		}
+		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
+		ace_size = aclp->z_ops->ace_size(acep);
+		zacep = (void *)((uintptr_t)zacep + ace_size);
+		new_count++;
+		new_bytes += ace_size;
+	}
+	zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
+	zacep = (void *)((uintptr_t)zacep + abstract_size);
+	zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
+	zacep = (void *)((uintptr_t)zacep + abstract_size);
+	zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
+
+	new_count += 3;
+	new_bytes += abstract_size * 3;
+	zfs_acl_release_nodes(aclp);
+	aclp->z_acl_count = new_count;
+	aclp->z_acl_bytes = new_bytes;
+	newnode->z_ace_count = new_count;
+	newnode->z_size = new_bytes;
+	list_insert_tail(&aclp->z_acl, newnode);
+}
+
+int
+zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
+{
+	int error = 0;
+
+	mutex_enter(&zp->z_acl_lock);
+	mutex_enter(&zp->z_lock);
+	if (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_DISCARD)
+		*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
+	else
+		error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
+
+	if (error == 0) {
+		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
+		zfs_acl_chmod(S_ISDIR(ZTOI(zp)->i_mode), mode, B_TRUE,
+		    (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
+	}
+	mutex_exit(&zp->z_lock);
+	mutex_exit(&zp->z_acl_lock);
+
+	return (error);
+}
+
+/*
+ * Should ACE be inherited?
+ */
+static int
+zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags)
+{
+	int	iflags = (acep_flags & 0xf);
+
+	if (S_ISDIR(obj_mode) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
+		return (1);
+	else if (iflags & ACE_FILE_INHERIT_ACE)
+		return (!(S_ISDIR(obj_mode) &&
+		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
+	return (0);
+}
+
+/*
+ * inherit inheritable ACEs from parent
+ */
+static zfs_acl_t *
+zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t va_mode, zfs_acl_t *paclp,
+    uint64_t mode, boolean_t *need_chmod)
+{
+	void		*pacep = NULL;
+	void		*acep;
+	zfs_acl_node_t  *aclnode;
+	zfs_acl_t	*aclp = NULL;
+	uint64_t	who;
+	uint32_t	access_mask;
+	uint16_t	iflags, newflags, type;
+	size_t		ace_size;
+	void		*data1, *data2;
+	size_t		data1sz, data2sz;
+	uint_t		aclinherit;
+	boolean_t	isdir = S_ISDIR(va_mode);
+	boolean_t	isreg = S_ISREG(va_mode);
+
+	*need_chmod = B_TRUE;
+
+	aclp = zfs_acl_alloc(paclp->z_version);
+	aclinherit = zfsvfs->z_acl_inherit;
+	if (aclinherit == ZFS_ACL_DISCARD || S_ISLNK(va_mode))
+		return (aclp);
+
+	while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
+	    &access_mask, &iflags, &type))) {
+
+		/*
+		 * don't inherit bogus ACEs
+		 */
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		/*
+		 * Check if ACE is inheritable by this vnode
+		 */
+		if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
+		    !zfs_ace_can_use(va_mode, iflags))
+			continue;
+
+		/*
+		 * If owner@, group@, or everyone@ inheritable
+		 * then zfs_acl_chmod() isn't needed.
+		 */
+		if ((aclinherit == ZFS_ACL_PASSTHROUGH ||
+		    aclinherit == ZFS_ACL_PASSTHROUGH_X) &&
+		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
+		    ((iflags & OWNING_GROUP) == OWNING_GROUP)) &&
+		    (isreg || (isdir && (iflags & ACE_DIRECTORY_INHERIT_ACE))))
+			*need_chmod = B_FALSE;
+
+		/*
+		 * Strip inherited execute permission from file if
+		 * not in mode
+		 */
+		if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
+		    !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
+			access_mask &= ~ACE_EXECUTE;
+		}
+
+		/*
+		 * Strip write_acl and write_owner from permissions
+		 * when inheriting an ACE
+		 */
+		if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
+			access_mask &= ~RESTRICTED_CLEAR;
+		}
+
+		ace_size = aclp->z_ops->ace_size(pacep);
+		aclnode = zfs_acl_node_alloc(ace_size);
+		list_insert_tail(&aclp->z_acl, aclnode);
+		acep = aclnode->z_acldata;
+
+		zfs_set_ace(aclp, acep, access_mask, type,
+		    who, iflags|ACE_INHERITED_ACE);
+
+		/*
+		 * Copy special opaque data if any
+		 */
+		if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
+			VERIFY((data2sz = aclp->z_ops->ace_data(acep,
+			    &data2)) == data1sz);
+			bcopy(data1, data2, data2sz);
+		}
+
+		aclp->z_acl_count++;
+		aclnode->z_ace_count++;
+		aclp->z_acl_bytes += aclnode->z_size;
+		newflags = aclp->z_ops->ace_flags_get(acep);
+
+		/*
+		 * If ACE is not to be inherited further, or if the vnode is
+		 * not a directory, remove all inheritance flags
+		 */
+		if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
+			newflags &= ~ALL_INHERIT;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+			continue;
+		}
+
+		/*
+		 * This directory has an inheritable ACE
+		 */
+		aclp->z_hints |= ZFS_INHERIT_ACE;
+
+		/*
+		 * If only FILE_INHERIT is set then turn on
+		 * inherit_only
+		 */
+		if ((iflags & (ACE_FILE_INHERIT_ACE |
+		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
+			newflags |= ACE_INHERIT_ONLY_ACE;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+		} else {
+			newflags &= ~ACE_INHERIT_ONLY_ACE;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+		}
+	}
+	if (zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
+	    aclp->z_acl_count != 0) {
+		*need_chmod = B_FALSE;
+	}
+
+	return (aclp);
+}
+
+/*
+ * Create file system object initial permissions
+ * including inheritable ACEs.
+ * Also, create FUIDs for owner and group.
+ */
+int
+zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
+    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
+{
+	int		error;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zfs_acl_t	*paclp;
+	gid_t		gid = vap->va_gid;
+	boolean_t	need_chmod = B_TRUE;
+	boolean_t	trim = B_FALSE;
+	boolean_t	inherited = B_FALSE;
+
+	bzero(acl_ids, sizeof (zfs_acl_ids_t));
+	acl_ids->z_mode = vap->va_mode;
+
+	if (vsecp)
+		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_mode, vsecp,
+		    cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
+			return (error);
+
+	acl_ids->z_fuid = vap->va_uid;
+	acl_ids->z_fgid = vap->va_gid;
+#ifdef HAVE_KSID
+	/*
+	 * Determine uid and gid.
+	 */
+	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
+	    ((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) {
+		acl_ids->z_fuid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid,
+		    cr, ZFS_OWNER, &acl_ids->z_fuidp);
+		acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
+		    cr, ZFS_GROUP, &acl_ids->z_fuidp);
+		gid = vap->va_gid;
+	} else {
+		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
+		    cr, &acl_ids->z_fuidp);
+		acl_ids->z_fgid = 0;
+		if (vap->va_mask & AT_GID)  {
+			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_gid,
+			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
+			gid = vap->va_gid;
+			if (acl_ids->z_fgid != KGID_TO_SGID(ZTOI(dzp)->i_gid) &&
+			    !groupmember(vap->va_gid, cr) &&
+			    secpolicy_vnode_create_gid(cr) != 0)
+				acl_ids->z_fgid = 0;
+		}
+		if (acl_ids->z_fgid == 0) {
+			if (dzp->z_mode & S_ISGID) {
+				char		*domain;
+				uint32_t	rid;
+
+				acl_ids->z_fgid = KGID_TO_SGID(
+				    ZTOI(dzp)->i_gid);
+				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
+				    cr, ZFS_GROUP);
+
+				if (zfsvfs->z_use_fuids &&
+				    IS_EPHEMERAL(acl_ids->z_fgid)) {
+					domain = zfs_fuid_idx_domain(
+					    &zfsvfs->z_fuid_idx,
+					    FUID_INDEX(acl_ids->z_fgid));
+					rid = FUID_RID(acl_ids->z_fgid);
+					zfs_fuid_node_add(&acl_ids->z_fuidp,
+					    domain, rid,
+					    FUID_INDEX(acl_ids->z_fgid),
+					    acl_ids->z_fgid, ZFS_GROUP);
+				}
+			} else {
+				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
+				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
+				gid = crgetgid(cr);
+			}
+		}
+	}
+#endif /* HAVE_KSID */
+
+	/*
+	 * If we're creating a directory, and the parent directory has the
+	 * set-GID bit set, set in on the new directory.
+	 * Otherwise, if the user is neither privileged nor a member of the
+	 * file's new group, clear the file's set-GID bit.
+	 */
+
+	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
+	    (S_ISDIR(vap->va_mode))) {
+		acl_ids->z_mode |= S_ISGID;
+	} else {
+		if ((acl_ids->z_mode & S_ISGID) &&
+		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
+			acl_ids->z_mode &= ~S_ISGID;
+	}
+
+	if (acl_ids->z_aclp == NULL) {
+		mutex_enter(&dzp->z_acl_lock);
+		mutex_enter(&dzp->z_lock);
+		if (!(flag & IS_ROOT_NODE) &&
+		    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
+		    !(dzp->z_pflags & ZFS_XATTR)) {
+			VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
+			    &paclp, B_FALSE));
+			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
+			    vap->va_mode, paclp, acl_ids->z_mode, &need_chmod);
+			inherited = B_TRUE;
+		} else {
+			acl_ids->z_aclp =
+			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+		}
+		mutex_exit(&dzp->z_lock);
+		mutex_exit(&dzp->z_acl_lock);
+
+		if (need_chmod) {
+			if (S_ISDIR(vap->va_mode))
+				acl_ids->z_aclp->z_hints |=
+				    ZFS_ACL_AUTO_INHERIT;
+
+			if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
+			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
+			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
+				trim = B_TRUE;
+			zfs_acl_chmod(vap->va_mode, acl_ids->z_mode, B_FALSE,
+			    trim, acl_ids->z_aclp);
+		}
+	}
+
+	if (inherited || vsecp) {
+		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
+		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
+		    acl_ids->z_fuid, acl_ids->z_fgid);
+		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+	}
+
+	return (0);
+}
+
+/*
+ * Free ACL and fuid_infop, but not the acl_ids structure
+ */
+void
+zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
+{
+	if (acl_ids->z_aclp)
+		zfs_acl_free(acl_ids->z_aclp);
+	if (acl_ids->z_fuidp)
+		zfs_fuid_info_free(acl_ids->z_fuidp);
+	acl_ids->z_aclp = NULL;
+	acl_ids->z_fuidp = NULL;
+}
+
+boolean_t
+zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
+{
+	return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
+	    zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
+	    (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
+	    zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
+}
+
+/*
+ * Retrieve a file's ACL
+ */
+int
+zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
+{
+	zfs_acl_t	*aclp;
+	ulong_t		mask;
+	int		error;
+	int 		count = 0;
+	int		largeace = 0;
+
+	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
+	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
+
+	if (mask == 0)
+		return (SET_ERROR(ENOSYS));
+
+	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
+		return (error);
+
+	mutex_enter(&zp->z_acl_lock);
+
+	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+
+	/*
+	 * Scan ACL to determine number of ACEs
+	 */
+	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
+		void *zacep = NULL;
+		uint64_t who;
+		uint32_t access_mask;
+		uint16_t type, iflags;
+
+		while ((zacep = zfs_acl_next_ace(aclp, zacep,
+		    &who, &access_mask, &iflags, &type))) {
+			switch (type) {
+			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+				largeace++;
+				continue;
+			default:
+				count++;
+			}
+		}
+		vsecp->vsa_aclcnt = count;
+	} else
+		count = (int)aclp->z_acl_count;
+
+	if (mask & VSA_ACECNT) {
+		vsecp->vsa_aclcnt = count;
+	}
+
+	if (mask & VSA_ACE) {
+		size_t aclsz;
+
+		aclsz = count * sizeof (ace_t) +
+		    sizeof (ace_object_t) * largeace;
+
+		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
+		vsecp->vsa_aclentsz = aclsz;
+
+		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
+			zfs_copy_fuid_2_ace(ZTOZSB(zp), aclp, cr,
+			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
+		else {
+			zfs_acl_node_t *aclnode;
+			void *start = vsecp->vsa_aclentp;
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
+			    aclp->z_acl_bytes);
+		}
+	}
+	if (mask & VSA_ACE_ACLFLAGS) {
+		vsecp->vsa_aclflags = 0;
+		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
+			vsecp->vsa_aclflags |= ACL_DEFAULTED;
+		if (zp->z_pflags & ZFS_ACL_PROTECTED)
+			vsecp->vsa_aclflags |= ACL_PROTECTED;
+		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
+			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
+	}
+
+	mutex_exit(&zp->z_acl_lock);
+
+	return (0);
+}
+
+int
+zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode,
+    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
+{
+	zfs_acl_t *aclp;
+	zfs_acl_node_t *aclnode;
+	int aclcnt = vsecp->vsa_aclcnt;
+	int error;
+
+	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
+		return (SET_ERROR(EINVAL));
+
+	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
+
+	aclp->z_hints = 0;
+	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
+	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+		if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp,
+		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
+		    aclcnt, &aclnode->z_size)) != 0) {
+			zfs_acl_free(aclp);
+			zfs_acl_node_free(aclnode);
+			return (error);
+		}
+	} else {
+		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_mode, aclp,
+		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
+		    &aclnode->z_size, fuidp, cr)) != 0) {
+			zfs_acl_free(aclp);
+			zfs_acl_node_free(aclnode);
+			return (error);
+		}
+	}
+	aclp->z_acl_bytes = aclnode->z_size;
+	aclnode->z_ace_count = aclcnt;
+	aclp->z_acl_count = aclcnt;
+	list_insert_head(&aclp->z_acl, aclnode);
+
+	/*
+	 * If flags are being set then add them to z_hints
+	 */
+	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
+		if (vsecp->vsa_aclflags & ACL_PROTECTED)
+			aclp->z_hints |= ZFS_ACL_PROTECTED;
+		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
+			aclp->z_hints |= ZFS_ACL_DEFAULTED;
+		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
+			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
+	}
+
+	*zaclp = aclp;
+
+	return (0);
+}
+
+/*
+ * Set a file's ACL
+ */
+int
+zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	zilog_t		*zilog = zfsvfs->z_log;
+	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
+	dmu_tx_t	*tx;
+	int		error;
+	zfs_acl_t	*aclp;
+	zfs_fuid_info_t	*fuidp = NULL;
+	boolean_t	fuid_dirtied;
+	uint64_t	acl_obj;
+
+	if (mask == 0)
+		return (SET_ERROR(ENOSYS));
+
+	if (zp->z_pflags & ZFS_IMMUTABLE)
+		return (SET_ERROR(EPERM));
+
+	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
+		return (error);
+
+	error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
+	    &aclp);
+	if (error)
+		return (error);
+
+	/*
+	 * If ACL wide flags aren't being set then preserve any
+	 * existing flags.
+	 */
+	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
+		aclp->z_hints |=
+		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
+	}
+top:
+	mutex_enter(&zp->z_acl_lock);
+	mutex_enter(&zp->z_lock);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	/*
+	 * If old version and ACL won't fit in bonus and we aren't
+	 * upgrading then take out necessary DMU holds
+	 */
+
+	if ((acl_obj = zfs_external_acl(zp)) != 0) {
+		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
+		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
+			dmu_tx_hold_free(tx, acl_obj, 0,
+			    DMU_OBJECT_END);
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+			    aclp->z_acl_bytes);
+		} else {
+			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
+		}
+	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
+	}
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_NOWAIT);
+	if (error) {
+		mutex_exit(&zp->z_acl_lock);
+		mutex_exit(&zp->z_lock);
+
+		if (error == ERESTART) {
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zfs_acl_free(aclp);
+		return (error);
+	}
+
+	error = zfs_aclset_common(zp, aclp, cr, tx);
+	ASSERT(error == 0);
+	ASSERT(zp->z_acl_cached == NULL);
+	zp->z_acl_cached = aclp;
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
+
+	if (fuidp)
+		zfs_fuid_info_free(fuidp);
+	dmu_tx_commit(tx);
+
+	mutex_exit(&zp->z_lock);
+	mutex_exit(&zp->z_acl_lock);
+
+	return (error);
+}
+
+/*
+ * Check accesses of interest (AoI) against attributes of the dataset
+ * such as read-only.  Returns zero if no AoI conflict with dataset
+ * attributes, otherwise an appropriate errno is returned.
+ */
+static int
+zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
+{
+	if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) &&
+	    (!Z_ISDEV(ZTOI(zp)->i_mode) ||
+	    (Z_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) {
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * Intentionally allow ZFS_READONLY through here.
+	 * See zfs_zaccess_common().
+	 */
+	if ((v4_mode & WRITE_MASK_DATA) &&
+	    (zp->z_pflags & ZFS_IMMUTABLE)) {
+		return (SET_ERROR(EPERM));
+	}
+
+	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
+	    (zp->z_pflags & ZFS_NOUNLINK)) {
+		return (SET_ERROR(EPERM));
+	}
+
+	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
+	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
+		return (SET_ERROR(EACCES));
+	}
+
+	return (0);
+}
+
+/*
+ * The primary usage of this function is to loop through all of the
+ * ACEs in the znode, determining what accesses of interest (AoI) to
+ * the caller are allowed or denied.  The AoI are expressed as bits in
+ * the working_mode parameter.  As each ACE is processed, bits covered
+ * by that ACE are removed from the working_mode.  This removal
+ * facilitates two things.  The first is that when the working mode is
+ * empty (= 0), we know we've looked at all the AoI. The second is
+ * that the ACE interpretation rules don't allow a later ACE to undo
+ * something granted or denied by an earlier ACE.  Removing the
+ * discovered access or denial enforces this rule.  At the end of
+ * processing the ACEs, all AoI that were found to be denied are
+ * placed into the working_mode, giving the caller a mask of denied
+ * accesses.  Returns:
+ *	0		if all AoI granted
+ *	EACCES 		if the denied mask is non-zero
+ *	other error	if abnormal failure (e.g., IO error)
+ *
+ * A secondary usage of the function is to determine if any of the
+ * AoI are granted.  If an ACE grants any access in
+ * the working_mode, we immediately short circuit out of the function.
+ * This mode is chosen by setting anyaccess to B_TRUE.  The
+ * working_mode is not a denied access mask upon exit if the function
+ * is used in this manner.
+ */
+static int
+zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
+    boolean_t anyaccess, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	zfs_acl_t	*aclp;
+	int		error;
+	uid_t		uid = crgetuid(cr);
+	uint64_t	who;
+	uint16_t	type, iflags;
+	uint16_t	entry_type;
+	uint32_t	access_mask;
+	uint32_t	deny_mask = 0;
+	zfs_ace_hdr_t	*acep = NULL;
+	boolean_t	checkit;
+	uid_t		gowner;
+	uid_t		fowner;
+
+	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
+
+	mutex_enter(&zp->z_acl_lock);
+
+	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+
+	ASSERT(zp->z_acl_cached);
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
+	    &iflags, &type))) {
+		uint32_t mask_matched;
+
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		if (S_ISDIR(ZTOI(zp)->i_mode) &&
+		    (iflags & ACE_INHERIT_ONLY_ACE))
+			continue;
+
+		/* Skip ACE if it does not affect any AoI */
+		mask_matched = (access_mask & *working_mode);
+		if (!mask_matched)
+			continue;
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+
+		checkit = B_FALSE;
+
+		switch (entry_type) {
+		case ACE_OWNER:
+			if (uid == fowner)
+				checkit = B_TRUE;
+			break;
+		case OWNING_GROUP:
+			who = gowner;
+			fallthrough;
+		case ACE_IDENTIFIER_GROUP:
+			checkit = zfs_groupmember(zfsvfs, who, cr);
+			break;
+		case ACE_EVERYONE:
+			checkit = B_TRUE;
+			break;
+
+		/* USER Entry */
+		default:
+			if (entry_type == 0) {
+				uid_t newid;
+
+				newid = zfs_fuid_map_id(zfsvfs, who, cr,
+				    ZFS_ACE_USER);
+				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
+				    uid == newid)
+					checkit = B_TRUE;
+				break;
+			} else {
+				mutex_exit(&zp->z_acl_lock);
+				return (SET_ERROR(EIO));
+			}
+		}
+
+		if (checkit) {
+			if (type == DENY) {
+				DTRACE_PROBE3(zfs__ace__denies,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				deny_mask |= mask_matched;
+			} else {
+				DTRACE_PROBE3(zfs__ace__allows,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				if (anyaccess) {
+					mutex_exit(&zp->z_acl_lock);
+					return (0);
+				}
+			}
+			*working_mode &= ~mask_matched;
+		}
+
+		/* Are we done? */
+		if (*working_mode == 0)
+			break;
+	}
+
+	mutex_exit(&zp->z_acl_lock);
+
+	/* Put the found 'denies' back on the working mode */
+	if (deny_mask) {
+		*working_mode |= deny_mask;
+		return (SET_ERROR(EACCES));
+	} else if (*working_mode) {
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Return true if any access whatsoever granted, we don't actually
+ * care what access is granted.
+ */
+boolean_t
+zfs_has_access(znode_t *zp, cred_t *cr)
+{
+	uint32_t have = ACE_ALL_PERMS;
+
+	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
+		uid_t owner;
+
+		owner = zfs_fuid_map_id(ZTOZSB(zp),
+		    KUID_TO_SUID(ZTOI(zp)->i_uid), cr, ZFS_OWNER);
+		return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0);
+	}
+	return (B_TRUE);
+}
+
+static int
+zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
+    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int err;
+
+	*working_mode = v4_mode;
+	*check_privs = B_TRUE;
+
+	/*
+	 * Short circuit empty requests
+	 */
+	if (v4_mode == 0 || zfsvfs->z_replay) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
+		*check_privs = B_FALSE;
+		return (err);
+	}
+
+	/*
+	 * The caller requested that the ACL check be skipped.  This
+	 * would only happen if the caller checked VOP_ACCESS() with a
+	 * 32 bit ACE mask and already had the appropriate permissions.
+	 */
+	if (skipaclchk) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	/*
+	 * Note: ZFS_READONLY represents the "DOS R/O" attribute.
+	 * When that flag is set, we should behave as if write access
+	 * were not granted by anything in the ACL.  In particular:
+	 * We _must_ allow writes after opening the file r/w, then
+	 * setting the DOS R/O attribute, and writing some more.
+	 * (Similar to how you can write after fchmod(fd, 0444).)
+	 *
+	 * Therefore ZFS_READONLY is ignored in the dataset check
+	 * above, and checked here as if part of the ACL check.
+	 * Also note: DOS R/O is ignored for directories.
+	 */
+	if ((v4_mode & WRITE_MASK_DATA) &&
+	    S_ISDIR(ZTOI(zp)->i_mode) &&
+	    (zp->z_pflags & ZFS_READONLY)) {
+		return (SET_ERROR(EPERM));
+	}
+
+	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
+}
+
+static int
+zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
+    cred_t *cr)
+{
+	if (*working_mode != ACE_WRITE_DATA)
+		return (SET_ERROR(EACCES));
+
+	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
+	    check_privs, B_FALSE, cr));
+}
+
+int
+zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
+{
+	boolean_t owner = B_FALSE;
+	boolean_t groupmbr = B_FALSE;
+	boolean_t is_attr;
+	uid_t uid = crgetuid(cr);
+	int error;
+
+	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
+		return (SET_ERROR(EACCES));
+
+	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
+	    (S_ISDIR(ZTOI(zdp)->i_mode)));
+	if (is_attr)
+		goto slow;
+
+
+	mutex_enter(&zdp->z_acl_lock);
+
+	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
+		mutex_exit(&zdp->z_acl_lock);
+		return (0);
+	}
+
+	if (KUID_TO_SUID(ZTOI(zdp)->i_uid) != 0 ||
+	    KGID_TO_SGID(ZTOI(zdp)->i_gid) != 0) {
+		mutex_exit(&zdp->z_acl_lock);
+		goto slow;
+	}
+
+	if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) {
+		owner = B_TRUE;
+		if (zdp->z_mode & S_IXUSR) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		} else {
+			mutex_exit(&zdp->z_acl_lock);
+			goto slow;
+		}
+	}
+	if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) {
+		groupmbr = B_TRUE;
+		if (zdp->z_mode & S_IXGRP) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		} else {
+			mutex_exit(&zdp->z_acl_lock);
+			goto slow;
+		}
+	}
+	if (!owner && !groupmbr) {
+		if (zdp->z_mode & S_IXOTH) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		}
+	}
+
+	mutex_exit(&zdp->z_acl_lock);
+
+slow:
+	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
+	ZFS_ENTER(ZTOZSB(zdp));
+	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
+	ZFS_EXIT(ZTOZSB(zdp));
+	return (error);
+}
+
+/*
+ * Determine whether Access should be granted/denied.
+ *
+ * The least priv subsystem is always consulted as a basic privilege
+ * can define any form of access.
+ */
+int
+zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
+{
+	uint32_t	working_mode;
+	int		error;
+	int		is_attr;
+	boolean_t 	check_privs;
+	znode_t		*xzp;
+	znode_t 	*check_zp = zp;
+	mode_t		needed_bits;
+	uid_t		owner;
+
+	is_attr = ((zp->z_pflags & ZFS_XATTR) && S_ISDIR(ZTOI(zp)->i_mode));
+
+	/*
+	 * If attribute then validate against base file
+	 */
+	if (is_attr) {
+		if ((error = zfs_zget(ZTOZSB(zp),
+		    zp->z_xattr_parent, &xzp)) != 0) {
+			return (error);
+		}
+
+		check_zp = xzp;
+
+		/*
+		 * fixup mode to map to xattr perms
+		 */
+
+		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
+			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
+			mode |= ACE_WRITE_NAMED_ATTRS;
+		}
+
+		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
+			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
+			mode |= ACE_READ_NAMED_ATTRS;
+		}
+	}
+
+	owner = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
+	    cr, ZFS_OWNER);
+	/*
+	 * Map the bits required to the standard inode flags
+	 * S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits.  Map the bits
+	 * mapped by working_mode (currently missing) in missing_bits.
+	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
+	 * needed_bits.
+	 */
+	needed_bits = 0;
+
+	working_mode = mode;
+	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+	    owner == crgetuid(cr))
+		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
+
+	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
+	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
+		needed_bits |= S_IRUSR;
+	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
+	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
+		needed_bits |= S_IWUSR;
+	if (working_mode & ACE_EXECUTE)
+		needed_bits |= S_IXUSR;
+
+	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
+	    &check_privs, skipaclchk, cr)) == 0) {
+		if (is_attr)
+			zrele(xzp);
+		return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,
+		    needed_bits, needed_bits));
+	}
+
+	if (error && !check_privs) {
+		if (is_attr)
+			zrele(xzp);
+		return (error);
+	}
+
+	if (error && (flags & V_APPEND)) {
+		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
+	}
+
+	if (error && check_privs) {
+		mode_t		checkmode = 0;
+
+		/*
+		 * First check for implicit owner permission on
+		 * read_acl/read_attributes
+		 */
+
+		error = 0;
+		ASSERT(working_mode != 0);
+
+		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
+		    owner == crgetuid(cr)))
+			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
+
+		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
+		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
+			checkmode |= S_IRUSR;
+		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
+		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
+			checkmode |= S_IWUSR;
+		if (working_mode & ACE_EXECUTE)
+			checkmode |= S_IXUSR;
+
+		error = secpolicy_vnode_access2(cr, ZTOI(check_zp), owner,
+		    needed_bits & ~checkmode, needed_bits);
+
+		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
+			error = secpolicy_vnode_chown(cr, owner);
+		if (error == 0 && (working_mode & ACE_WRITE_ACL))
+			error = secpolicy_vnode_setdac(cr, owner);
+
+		if (error == 0 && (working_mode &
+		    (ACE_DELETE|ACE_DELETE_CHILD)))
+			error = secpolicy_vnode_remove(cr);
+
+		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
+			error = secpolicy_vnode_chown(cr, owner);
+		}
+		if (error == 0) {
+			/*
+			 * See if any bits other than those already checked
+			 * for are still present.  If so then return EACCES
+			 */
+			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
+				error = SET_ERROR(EACCES);
+			}
+		}
+	} else if (error == 0) {
+		error = secpolicy_vnode_access2(cr, ZTOI(zp), owner,
+		    needed_bits, needed_bits);
+	}
+
+	if (is_attr)
+		zrele(xzp);
+
+	return (error);
+}
+
+/*
+ * Translate traditional unix S_IRUSR/S_IWUSR/S_IXUSR mode into
+ * NFSv4-style ZFS ACL format and call zfs_zaccess()
+ */
+int
+zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
+{
+	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
+}
+
+/*
+ * Access function for secpolicy_vnode_setattr
+ */
+int
+zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
+{
+	int v4_mode = zfs_unix_to_v4(mode >> 6);
+
+	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
+}
+
+/* See zfs_zaccess_delete() */
+int zfs_write_implies_delete_child = 1;
+
+/*
+ * Determine whether delete access should be granted.
+ *
+ * The following chart outlines how we handle delete permissions which is
+ * how recent versions of windows (Windows 2008) handles it.  The efficiency
+ * comes from not having to check the parent ACL where the object itself grants
+ * delete:
+ *
+ *      -------------------------------------------------------
+ *      |   Parent Dir  |      Target Object Permissions      |
+ *      |  permissions  |                                     |
+ *      -------------------------------------------------------
+ *      |               | ACL Allows | ACL Denies| Delete     |
+ *      |               |  Delete    |  Delete   | unspecified|
+ *      -------------------------------------------------------
+ *      | ACL Allows    | Permit     | Deny *    | Permit     |
+ *      | DELETE_CHILD  |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL Denies    | Permit     | Deny      | Deny       |
+ *      | DELETE_CHILD  |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL specifies |            |           |            |
+ *      | only allow    | Permit     | Deny *    | Permit     |
+ *      | write and     |            |           |            |
+ *      | execute       |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL denies    |            |           |            |
+ *      | write and     | Permit     | Deny      | Deny       |
+ *      | execute       |            |           |            |
+ *      -------------------------------------------------------
+ *         ^
+ *         |
+ *         Re. execute permission on the directory:  if that's missing,
+ *	   the vnode lookup of the target will fail before we get here.
+ *
+ * Re [*] in the table above:  NFSv4 would normally Permit delete for
+ * these two cells of the matrix.
+ * See acl.h for notes on which ACE_... flags should be checked for which
+ * operations.  Specifically, the NFSv4 committee recommendation is in
+ * conflict with the Windows interpretation of DENY ACEs, where DENY ACEs
+ * should take precedence ahead of ALLOW ACEs.
+ *
+ * This implementation always consults the target object's ACL first.
+ * If a DENY ACE is present on the target object that specifies ACE_DELETE,
+ * delete access is denied.  If an ALLOW ACE with ACE_DELETE is present on
+ * the target object, access is allowed.  If and only if no entries with
+ * ACE_DELETE are present in the object's ACL, check the container's ACL
+ * for entries with ACE_DELETE_CHILD.
+ *
+ * A summary of the logic implemented from the table above is as follows:
+ *
+ * First check for DENY ACEs that apply.
+ * If either target or container has a deny, EACCES.
+ *
+ * Delete access can then be summarized as follows:
+ * 1: The object to be deleted grants ACE_DELETE, or
+ * 2: The containing directory grants ACE_DELETE_CHILD.
+ * In a Windows system, that would be the end of the story.
+ * In this system, (2) has some complications...
+ * 2a: "sticky" bit on a directory adds restrictions, and
+ * 2b: existing ACEs from previous versions of ZFS may
+ * not carry ACE_DELETE_CHILD where they should, so we
+ * also allow delete when ACE_WRITE_DATA is granted.
+ *
+ * Note: 2b is technically a work-around for a prior bug,
+ * which hopefully can go away some day.  For those who
+ * no longer need the work around, and for testing, this
+ * work-around is made conditional via the tunable:
+ * zfs_write_implies_delete_child
+ */
+int
+zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
+{
+	uint32_t wanted_dirperms;
+	uint32_t dzp_working_mode = 0;
+	uint32_t zp_working_mode = 0;
+	int dzp_error, zp_error;
+	boolean_t dzpcheck_privs;
+	boolean_t zpcheck_privs;
+
+	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
+		return (SET_ERROR(EPERM));
+
+	/*
+	 * Case 1:
+	 * If target object grants ACE_DELETE then we are done.  This is
+	 * indicated by a return value of 0.  For this case we don't worry
+	 * about the sticky bit because sticky only applies to the parent
+	 * directory and this is the child access result.
+	 *
+	 * If we encounter a DENY ACE here, we're also done (EACCES).
+	 * Note that if we hit a DENY ACE here (on the target) it should
+	 * take precedence over a DENY ACE on the container, so that when
+	 * we have more complete auditing support we will be able to
+	 * report an access failure against the specific target.
+	 * (This is part of why we're checking the target first.)
+	 */
+	zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
+	    &zpcheck_privs, B_FALSE, cr);
+	if (zp_error == EACCES) {
+		/* We hit a DENY ACE. */
+		if (!zpcheck_privs)
+			return (SET_ERROR(zp_error));
+		return (secpolicy_vnode_remove(cr));
+
+	}
+	if (zp_error == 0)
+		return (0);
+
+	/*
+	 * Case 2:
+	 * If the containing directory grants ACE_DELETE_CHILD,
+	 * or we're in backward compatibility mode and the
+	 * containing directory has ACE_WRITE_DATA, allow.
+	 * Case 2b is handled with wanted_dirperms.
+	 */
+	wanted_dirperms = ACE_DELETE_CHILD;
+	if (zfs_write_implies_delete_child)
+		wanted_dirperms |= ACE_WRITE_DATA;
+	dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
+	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
+	if (dzp_error == EACCES) {
+		/* We hit a DENY ACE. */
+		if (!dzpcheck_privs)
+			return (SET_ERROR(dzp_error));
+		return (secpolicy_vnode_remove(cr));
+	}
+
+	/*
+	 * Cases 2a, 2b (continued)
+	 *
+	 * Note: dzp_working_mode now contains any permissions
+	 * that were NOT granted.  Therefore, if any of the
+	 * wanted_dirperms WERE granted, we will have:
+	 *   dzp_working_mode != wanted_dirperms
+	 * We're really asking if ANY of those permissions
+	 * were granted, and if so, grant delete access.
+	 */
+	if (dzp_working_mode != wanted_dirperms)
+		dzp_error = 0;
+
+	/*
+	 * dzp_error is 0 if the container granted us permissions to "modify".
+	 * If we do not have permission via one or more ACEs, our current
+	 * privileges may still permit us to modify the container.
+	 *
+	 * dzpcheck_privs is false when i.e. the FS is read-only.
+	 * Otherwise, do privilege checks for the container.
+	 */
+	if (dzp_error != 0 && dzpcheck_privs) {
+		uid_t owner;
+
+		/*
+		 * The secpolicy call needs the requested access and
+		 * the current access mode of the container, but it
+		 * only knows about Unix-style modes (VEXEC, VWRITE),
+		 * so this must condense the fine-grained ACE bits into
+		 * Unix modes.
+		 *
+		 * The VEXEC flag is easy, because we know that has
+		 * always been checked before we get here (during the
+		 * lookup of the target vnode).  The container has not
+		 * granted us permissions to "modify", so we do not set
+		 * the VWRITE flag in the current access mode.
+		 */
+		owner = zfs_fuid_map_id(ZTOZSB(dzp),
+		    KUID_TO_SUID(ZTOI(dzp)->i_uid), cr, ZFS_OWNER);
+		dzp_error = secpolicy_vnode_access2(cr, ZTOI(dzp),
+		    owner, S_IXUSR, S_IWUSR|S_IXUSR);
+	}
+	if (dzp_error != 0) {
+		/*
+		 * Note: We may have dzp_error = -1 here (from
+		 * zfs_zacess_common).  Don't return that.
+		 */
+		return (SET_ERROR(EACCES));
+	}
+
+
+	/*
+	 * At this point, we know that the directory permissions allow
+	 * us to modify, but we still need to check for the additional
+	 * restrictions that apply when the "sticky bit" is set.
+	 *
+	 * Yes, zfs_sticky_remove_access() also checks this bit, but
+	 * checking it here and skipping the call below is nice when
+	 * you're watching all of this with dtrace.
+	 */
+	if ((dzp->z_mode & S_ISVTX) == 0)
+		return (0);
+
+	/*
+	 * zfs_sticky_remove_access will succeed if:
+	 * 1. The sticky bit is absent.
+	 * 2. We pass the sticky bit restrictions.
+	 * 3. We have privileges that always allow file removal.
+	 */
+	return (zfs_sticky_remove_access(dzp, zp, cr));
+}
+
+int
+zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
+    znode_t *tzp, cred_t *cr)
+{
+	int add_perm;
+	int error;
+
+	if (szp->z_pflags & ZFS_AV_QUARANTINED)
+		return (SET_ERROR(EACCES));
+
+	add_perm = S_ISDIR(ZTOI(szp)->i_mode) ?
+	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
+
+	/*
+	 * Rename permissions are combination of delete permission +
+	 * add file/subdir permission.
+	 */
+
+	/*
+	 * first make sure we do the delete portion.
+	 *
+	 * If that succeeds then check for add_file/add_subdir permissions
+	 */
+
+	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
+		return (error);
+
+	/*
+	 * If we have a tzp, see if we can delete it?
+	 */
+	if (tzp) {
+		if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
+			return (error);
+	}
+
+	/*
+	 * Now check for add permissions
+	 */
+	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
+
+	return (error);
+}

diff --git a/zfs/module/os/linux/zfs/zfs_ctldir.c b/zfs/module/os/linux/zfs/zfs_ctldir.c
new file mode 100644
index 0000000..0a30692
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_ctldir.c

@@ -0,0 +1,1294 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ *
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <rohan.puri15@gmail.com>
+ *   Brian Behlendorf <behlendorf1@llnl.gov>
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright (c) 2018 George Melikov. All Rights Reserved.
+ * Copyright (c) 2019 Datto, Inc. All rights reserved.
+ * Copyright (c) 2020 The MathWorks, Inc. All rights reserved.
+ */
+
+/*
+ * ZFS control directory (a.k.a. ".zfs")
+ *
+ * This directory provides a common location for all ZFS meta-objects.
+ * Currently, this is only the 'snapshot' and 'shares' directory, but this may
+ * expand in the future.  The elements are built dynamically, as the hierarchy
+ * does not actually exist on disk.
+ *
+ * For 'snapshot', we don't want to have all snapshots always mounted, because
+ * this would take up a huge amount of space in /etc/mnttab.  We have three
+ * types of objects:
+ *
+ *	ctldir ------> snapshotdir -------> snapshot
+ *                                             |
+ *                                             |
+ *                                             V
+ *                                         mounted fs
+ *
+ * The 'snapshot' node contains just enough information to lookup '..' and act
+ * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
+ * perform an automount of the underlying filesystem and return the
+ * corresponding inode.
+ *
+ * All mounts are handled automatically by an user mode helper which invokes
+ * the mount procedure.  Unmounts are handled by allowing the mount
+ * point to expire so the kernel may automatically unmount it.
+ *
+ * The '.zfs', '.zfs/snapshot', and all directories created under
+ * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
+ * zfsvfs_t as the head filesystem (what '.zfs' lives under).
+ *
+ * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
+ * (ie: snapshots) are complete ZFS filesystems and have their own unique
+ * zfsvfs_t.  However, the fsid reported by these mounts will be the same
+ * as that used by the parent zfsvfs_t to make NFS happy.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/stat.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_deleg.h>
+#include <sys/zpl.h>
+#include <sys/mntent.h>
+#include "zfs_namecheck.h"
+
+/*
+ * Two AVL trees are maintained which contain all currently automounted
+ * snapshots.  Every automounted snapshots maps to a single zfs_snapentry_t
+ * entry which MUST:
+ *
+ *   - be attached to both trees, and
+ *   - be unique, no duplicate entries are allowed.
+ *
+ * The zfs_snapshots_by_name tree is indexed by the full dataset name
+ * while the zfs_snapshots_by_objsetid tree is indexed by the unique
+ * objsetid.  This allows for fast lookups either by name or objsetid.
+ */
+static avl_tree_t zfs_snapshots_by_name;
+static avl_tree_t zfs_snapshots_by_objsetid;
+static krwlock_t zfs_snapshot_lock;
+
+/*
+ * Control Directory Tunables (.zfs)
+ */
+int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
+int zfs_admin_snapshot = 0;
+
+typedef struct {
+	char		*se_name;	/* full snapshot name */
+	char		*se_path;	/* full mount path */
+	spa_t		*se_spa;	/* pool spa */
+	uint64_t	se_objsetid;	/* snapshot objset id */
+	struct dentry   *se_root_dentry; /* snapshot root dentry */
+	krwlock_t	se_taskqid_lock;  /* scheduled unmount taskqid lock */
+	taskqid_t	se_taskqid;	/* scheduled unmount taskqid */
+	avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */
+	avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */
+	zfs_refcount_t	se_refcount;	/* reference count */
+} zfs_snapentry_t;
+
+static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
+
+/*
+ * Allocate a new zfs_snapentry_t being careful to make a copy of the
+ * the snapshot name and provided mount point.  No reference is taken.
+ */
+static zfs_snapentry_t *
+zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
+    uint64_t objsetid, struct dentry *root_dentry)
+{
+	zfs_snapentry_t *se;
+
+	se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
+
+	se->se_name = kmem_strdup(full_name);
+	se->se_path = kmem_strdup(full_path);
+	se->se_spa = spa;
+	se->se_objsetid = objsetid;
+	se->se_root_dentry = root_dentry;
+	se->se_taskqid = TASKQID_INVALID;
+	rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL);
+
+	zfs_refcount_create(&se->se_refcount);
+
+	return (se);
+}
+
+/*
+ * Free a zfs_snapentry_t the caller must ensure there are no active
+ * references.
+ */
+static void
+zfsctl_snapshot_free(zfs_snapentry_t *se)
+{
+	zfs_refcount_destroy(&se->se_refcount);
+	kmem_strfree(se->se_name);
+	kmem_strfree(se->se_path);
+	rw_destroy(se->se_taskqid_lock);
+
+	kmem_free(se, sizeof (zfs_snapentry_t));
+}
+
+/*
+ * Hold a reference on the zfs_snapentry_t.
+ */
+static void
+zfsctl_snapshot_hold(zfs_snapentry_t *se)
+{
+	zfs_refcount_add(&se->se_refcount, NULL);
+}
+
+/*
+ * Release a reference on the zfs_snapentry_t.  When the number of
+ * references drops to zero the structure will be freed.
+ */
+static void
+zfsctl_snapshot_rele(zfs_snapentry_t *se)
+{
+	if (zfs_refcount_remove(&se->se_refcount, NULL) == 0)
+		zfsctl_snapshot_free(se);
+}
+
+/*
+ * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
+ * zfs_snapshots_by_objsetid trees.  While the zfs_snapentry_t is part
+ * of the trees a reference is held.
+ */
+static void
+zfsctl_snapshot_add(zfs_snapentry_t *se)
+{
+	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
+	zfsctl_snapshot_hold(se);
+	avl_add(&zfs_snapshots_by_name, se);
+	avl_add(&zfs_snapshots_by_objsetid, se);
+}
+
+/*
+ * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
+ * zfs_snapshots_by_objsetid trees.  Upon removal a reference is dropped,
+ * this can result in the structure being freed if that was the last
+ * remaining reference.
+ */
+static void
+zfsctl_snapshot_remove(zfs_snapentry_t *se)
+{
+	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
+	avl_remove(&zfs_snapshots_by_name, se);
+	avl_remove(&zfs_snapshots_by_objsetid, se);
+	zfsctl_snapshot_rele(se);
+}
+
+/*
+ * Snapshot name comparison function for the zfs_snapshots_by_name.
+ */
+static int
+snapentry_compare_by_name(const void *a, const void *b)
+{
+	const zfs_snapentry_t *se_a = a;
+	const zfs_snapentry_t *se_b = b;
+	int ret;
+
+	ret = strcmp(se_a->se_name, se_b->se_name);
+
+	if (ret < 0)
+		return (-1);
+	else if (ret > 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
+ */
+static int
+snapentry_compare_by_objsetid(const void *a, const void *b)
+{
+	const zfs_snapentry_t *se_a = a;
+	const zfs_snapentry_t *se_b = b;
+
+	if (se_a->se_spa != se_b->se_spa)
+		return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
+
+	if (se_a->se_objsetid < se_b->se_objsetid)
+		return (-1);
+	else if (se_a->se_objsetid > se_b->se_objsetid)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * Find a zfs_snapentry_t in zfs_snapshots_by_name.  If the snapname
+ * is found a pointer to the zfs_snapentry_t is returned and a reference
+ * taken on the structure.  The caller is responsible for dropping the
+ * reference with zfsctl_snapshot_rele().  If the snapname is not found
+ * NULL will be returned.
+ */
+static zfs_snapentry_t *
+zfsctl_snapshot_find_by_name(const char *snapname)
+{
+	zfs_snapentry_t *se, search;
+
+	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
+
+	search.se_name = (char *)snapname;
+	se = avl_find(&zfs_snapshots_by_name, &search, NULL);
+	if (se)
+		zfsctl_snapshot_hold(se);
+
+	return (se);
+}
+
+/*
+ * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
+ * rather than the snapname.  In all other respects it behaves the same
+ * as zfsctl_snapshot_find_by_name().
+ */
+static zfs_snapentry_t *
+zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
+{
+	zfs_snapentry_t *se, search;
+
+	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
+
+	search.se_spa = spa;
+	search.se_objsetid = objsetid;
+	se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
+	if (se)
+		zfsctl_snapshot_hold(se);
+
+	return (se);
+}
+
+/*
+ * Rename a zfs_snapentry_t in the zfs_snapshots_by_name.  The structure is
+ * removed, renamed, and added back to the new correct location in the tree.
+ */
+static int
+zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
+{
+	zfs_snapentry_t *se;
+
+	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
+
+	se = zfsctl_snapshot_find_by_name(old_snapname);
+	if (se == NULL)
+		return (SET_ERROR(ENOENT));
+
+	zfsctl_snapshot_remove(se);
+	kmem_strfree(se->se_name);
+	se->se_name = kmem_strdup(new_snapname);
+	zfsctl_snapshot_add(se);
+	zfsctl_snapshot_rele(se);
+
+	return (0);
+}
+
+/*
+ * Delayed task responsible for unmounting an expired automounted snapshot.
+ */
+static void
+snapentry_expire(void *data)
+{
+	zfs_snapentry_t *se = (zfs_snapentry_t *)data;
+	spa_t *spa = se->se_spa;
+	uint64_t objsetid = se->se_objsetid;
+
+	if (zfs_expire_snapshot <= 0) {
+		zfsctl_snapshot_rele(se);
+		return;
+	}
+
+	rw_enter(&se->se_taskqid_lock, RW_WRITER);
+	se->se_taskqid = TASKQID_INVALID;
+	rw_exit(&se->se_taskqid_lock);
+	(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
+	zfsctl_snapshot_rele(se);
+
+	/*
+	 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
+	 * This can occur when the snapshot is busy.
+	 */
+	rw_enter(&zfs_snapshot_lock, RW_READER);
+	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
+		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
+		zfsctl_snapshot_rele(se);
+	}
+	rw_exit(&zfs_snapshot_lock);
+}
+
+/*
+ * Cancel an automatic unmount of a snapname.  This callback is responsible
+ * for dropping the reference on the zfs_snapentry_t which was taken when
+ * during dispatch.
+ */
+static void
+zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
+{
+	int err = 0;
+	rw_enter(&se->se_taskqid_lock, RW_WRITER);
+	err = taskq_cancel_id(system_delay_taskq, se->se_taskqid);
+	/*
+	 * if we get ENOENT, the taskq couldn't be found to be
+	 * canceled, so we can just mark it as invalid because
+	 * it's already gone. If we got EBUSY, then we already
+	 * blocked until it was gone _anyway_, so we don't care.
+	 */
+	se->se_taskqid = TASKQID_INVALID;
+	rw_exit(&se->se_taskqid_lock);
+	if (err == 0) {
+		zfsctl_snapshot_rele(se);
+	}
+}
+
+/*
+ * Dispatch the unmount task for delayed handling with a hold protecting it.
+ */
+static void
+zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
+{
+
+	if (delay <= 0)
+		return;
+
+	zfsctl_snapshot_hold(se);
+	rw_enter(&se->se_taskqid_lock, RW_WRITER);
+	/*
+	 * If this condition happens, we managed to:
+	 * - dispatch once
+	 * - want to dispatch _again_ before it returned
+	 *
+	 * So let's just return - if that task fails at unmounting,
+	 * we'll eventually dispatch again, and if it succeeds,
+	 * no problem.
+	 */
+	if (se->se_taskqid != TASKQID_INVALID) {
+		rw_exit(&se->se_taskqid_lock);
+		zfsctl_snapshot_rele(se);
+		return;
+	}
+	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
+	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
+	rw_exit(&se->se_taskqid_lock);
+}
+
+/*
+ * Schedule an automatic unmount of objset id to occur in delay seconds from
+ * now.  Any previous delayed unmount will be cancelled in favor of the
+ * updated deadline.  A reference is taken by zfsctl_snapshot_find_by_name()
+ * and held until the outstanding task is handled or cancelled.
+ */
+int
+zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
+{
+	zfs_snapentry_t *se;
+	int error = ENOENT;
+
+	rw_enter(&zfs_snapshot_lock, RW_READER);
+	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
+		zfsctl_snapshot_unmount_cancel(se);
+		zfsctl_snapshot_unmount_delay_impl(se, delay);
+		zfsctl_snapshot_rele(se);
+		error = 0;
+	}
+	rw_exit(&zfs_snapshot_lock);
+
+	return (error);
+}
+
+/*
+ * Check if snapname is currently mounted.  Returned non-zero when mounted
+ * and zero when unmounted.
+ */
+static boolean_t
+zfsctl_snapshot_ismounted(const char *snapname)
+{
+	zfs_snapentry_t *se;
+	boolean_t ismounted = B_FALSE;
+
+	rw_enter(&zfs_snapshot_lock, RW_READER);
+	if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
+		zfsctl_snapshot_rele(se);
+		ismounted = B_TRUE;
+	}
+	rw_exit(&zfs_snapshot_lock);
+
+	return (ismounted);
+}
+
+/*
+ * Check if the given inode is a part of the virtual .zfs directory.
+ */
+boolean_t
+zfsctl_is_node(struct inode *ip)
+{
+	return (ITOZ(ip)->z_is_ctldir);
+}
+
+/*
+ * Check if the given inode is a .zfs/snapshots/snapname directory.
+ */
+boolean_t
+zfsctl_is_snapdir(struct inode *ip)
+{
+	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
+}
+
+/*
+ * Allocate a new inode with the passed id and ops.
+ */
+static struct inode *
+zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
+    const struct file_operations *fops, const struct inode_operations *ops)
+{
+	inode_timespec_t now;
+	struct inode *ip;
+	znode_t *zp;
+
+	ip = new_inode(zfsvfs->z_sb);
+	if (ip == NULL)
+		return (NULL);
+
+	now = current_time(ip);
+	zp = ITOZ(ip);
+	ASSERT3P(zp->z_dirlocks, ==, NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+	zp->z_id = id;
+	zp->z_unlinked = B_FALSE;
+	zp->z_atime_dirty = B_FALSE;
+	zp->z_zn_prefetch = B_FALSE;
+	zp->z_is_sa = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+	zp->z_is_mapped = B_FALSE;
+#endif
+	zp->z_is_ctldir = B_TRUE;
+	zp->z_sa_hdl = NULL;
+	zp->z_blksz = 0;
+	zp->z_seq = 0;
+	zp->z_mapcnt = 0;
+	zp->z_size = 0;
+	zp->z_pflags = 0;
+	zp->z_mode = 0;
+	zp->z_sync_cnt = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+	ip->i_generation = 0;
+	ip->i_ino = id;
+	ip->i_mode = (S_IFDIR | S_IRWXUGO);
+	ip->i_uid = SUID_TO_KUID(0);
+	ip->i_gid = SGID_TO_KGID(0);
+	ip->i_blkbits = SPA_MINBLOCKSHIFT;
+	ip->i_atime = now;
+	ip->i_mtime = now;
+	ip->i_ctime = now;
+	ip->i_fop = fops;
+	ip->i_op = ops;
+#if defined(IOP_XATTR)
+	ip->i_opflags &= ~IOP_XATTR;
+#endif
+
+	if (insert_inode_locked(ip)) {
+		unlock_new_inode(ip);
+		iput(ip);
+		return (NULL);
+	}
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	list_insert_tail(&zfsvfs->z_all_znodes, zp);
+	zfsvfs->z_nr_znodes++;
+	membar_producer();
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	unlock_new_inode(ip);
+
+	return (ip);
+}
+
+/*
+ * Lookup the inode with given id, it will be allocated if needed.
+ */
+static struct inode *
+zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
+    const struct file_operations *fops, const struct inode_operations *ops)
+{
+	struct inode *ip = NULL;
+
+	while (ip == NULL) {
+		ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
+		if (ip)
+			break;
+
+		/* May fail due to concurrent zfsctl_inode_alloc() */
+		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
+	}
+
+	return (ip);
+}
+
+/*
+ * Create the '.zfs' directory.  This directory is cached as part of the VFS
+ * structure.  This results in a hold on the zfsvfs_t.  The code in zfs_umount()
+ * therefore checks against a vfs_count of 2 instead of 1.  This reference
+ * is removed when the ctldir is destroyed in the unmount.  All other entities
+ * under the '.zfs' directory are created dynamically as needed.
+ *
+ * Because the dynamically created '.zfs' directory entries assume the use
+ * of 64-bit inode numbers this support must be disabled on 32-bit systems.
+ */
+int
+zfsctl_create(zfsvfs_t *zfsvfs)
+{
+	ASSERT(zfsvfs->z_ctldir == NULL);
+
+	zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
+	    &zpl_fops_root, &zpl_ops_root);
+	if (zfsvfs->z_ctldir == NULL)
+		return (SET_ERROR(ENOENT));
+
+	return (0);
+}
+
+/*
+ * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
+ * Only called when the filesystem is unmounted.
+ */
+void
+zfsctl_destroy(zfsvfs_t *zfsvfs)
+{
+	if (zfsvfs->z_issnap) {
+		zfs_snapentry_t *se;
+		spa_t *spa = zfsvfs->z_os->os_spa;
+		uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
+
+		rw_enter(&zfs_snapshot_lock, RW_WRITER);
+		se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
+		if (se != NULL)
+			zfsctl_snapshot_remove(se);
+		rw_exit(&zfs_snapshot_lock);
+		if (se != NULL) {
+			zfsctl_snapshot_unmount_cancel(se);
+			zfsctl_snapshot_rele(se);
+		}
+	} else if (zfsvfs->z_ctldir) {
+		iput(zfsvfs->z_ctldir);
+		zfsvfs->z_ctldir = NULL;
+	}
+}
+
+/*
+ * Given a root znode, retrieve the associated .zfs directory.
+ * Add a hold to the vnode and return it.
+ */
+struct inode *
+zfsctl_root(znode_t *zp)
+{
+	ASSERT(zfs_has_ctldir(zp));
+	/* Must have an existing ref, so igrab() cannot return NULL */
+	VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL);
+	return (ZTOZSB(zp)->z_ctldir);
+}
+
+/*
+ * Generate a long fid to indicate a snapdir. We encode whether snapdir is
+ * already mounted in gen field. We do this because nfsd lookup will not
+ * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
+ * this and do automount and return ESTALE to force nfsd revalidate and follow
+ * mount.
+ */
+static int
+zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
+{
+	zfid_short_t *zfid = (zfid_short_t *)fidp;
+	zfid_long_t *zlfid = (zfid_long_t *)fidp;
+	uint32_t gen = 0;
+	uint64_t object;
+	uint64_t objsetid;
+	int i;
+	struct dentry *dentry;
+
+	if (fidp->fid_len < LONG_FID_LEN) {
+		fidp->fid_len = LONG_FID_LEN;
+		return (SET_ERROR(ENOSPC));
+	}
+
+	object = ip->i_ino;
+	objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
+	zfid->zf_len = LONG_FID_LEN;
+
+	dentry = d_obtain_alias(igrab(ip));
+	if (!IS_ERR(dentry)) {
+		gen = !!d_mountpoint(dentry);
+		dput(dentry);
+	}
+
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
+
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
+
+	for (i = 0; i < sizeof (zlfid->zf_setid); i++)
+		zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
+
+	for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
+		zlfid->zf_setgen[i] = 0;
+
+	return (0);
+}
+
+/*
+ * Generate an appropriate fid for an entry in the .zfs directory.
+ */
+int
+zfsctl_fid(struct inode *ip, fid_t *fidp)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfsvfs_t	*zfsvfs = ITOZSB(ip);
+	uint64_t	object = zp->z_id;
+	zfid_short_t	*zfid;
+	int		i;
+
+	ZFS_ENTER(zfsvfs);
+
+	if (zfsctl_is_snapdir(ip)) {
+		ZFS_EXIT(zfsvfs);
+		return (zfsctl_snapdir_fid(ip, fidp));
+	}
+
+	if (fidp->fid_len < SHORT_FID_LEN) {
+		fidp->fid_len = SHORT_FID_LEN;
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOSPC));
+	}
+
+	zfid = (zfid_short_t *)fidp;
+
+	zfid->zf_len = SHORT_FID_LEN;
+
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
+
+	/* .zfs znodes always have a generation number of 0 */
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = 0;
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/*
+ * Construct a full dataset name in full_name: "pool/dataset@snap_name"
+ */
+static int
+zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
+    char *full_name)
+{
+	objset_t *os = zfsvfs->z_os;
+
+	if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
+		return (SET_ERROR(EILSEQ));
+
+	dmu_objset_name(os, full_name);
+	if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
+		return (SET_ERROR(ENAMETOOLONG));
+
+	(void) strcat(full_name, "@");
+	(void) strcat(full_name, snap_name);
+
+	return (0);
+}
+
+/*
+ * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
+ */
+static int
+zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
+    int path_len, char *full_path)
+{
+	objset_t *os = zfsvfs->z_os;
+	fstrans_cookie_t cookie;
+	char *snapname;
+	boolean_t case_conflict;
+	uint64_t id, pos = 0;
+	int error = 0;
+
+	if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
+		return (SET_ERROR(ENOENT));
+
+	cookie = spl_fstrans_mark();
+	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	while (error == 0) {
+		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+		error = dmu_snapshot_list_next(zfsvfs->z_os,
+		    ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
+		    &case_conflict);
+		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+		if (error)
+			goto out;
+
+		if (id == objsetid)
+			break;
+	}
+
+	snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
+	    zfsvfs->z_vfs->vfs_mntpoint, snapname);
+out:
+	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
+	spl_fstrans_unmark(cookie);
+
+	return (error);
+}
+
+/*
+ * Special case the handling of "..".
+ */
+int
+zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
+    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(dip);
+	int error = 0;
+
+	ZFS_ENTER(zfsvfs);
+
+	if (strcmp(name, "..") == 0) {
+		*ipp = dip->i_sb->s_root->d_inode;
+	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
+		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
+		    &zpl_fops_snapdir, &zpl_ops_snapdir);
+	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
+		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
+		    &zpl_fops_shares, &zpl_ops_shares);
+	} else {
+		*ipp = NULL;
+	}
+
+	if (*ipp == NULL)
+		error = SET_ERROR(ENOENT);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Lookup entry point for the 'snapshot' directory.  Try to open the
+ * snapshot if it exist, creating the pseudo filesystem inode as necessary.
+ */
+int
+zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
+    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(dip);
+	uint64_t id;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+
+	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
+	if (error) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
+	    &simple_dir_operations, &simple_dir_inode_operations);
+	if (*ipp == NULL)
+		error = SET_ERROR(ENOENT);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Renaming a directory under '.zfs/snapshot' will automatically trigger
+ * a rename of the snapshot to the new given name.  The rename is confined
+ * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
+ */
+int
+zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
+    struct inode *tdip, const char *tnm, cred_t *cr, int flags)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(sdip);
+	char *to, *from, *real, *fsname;
+	int error;
+
+	if (!zfs_admin_snapshot)
+		return (SET_ERROR(EACCES));
+
+	ZFS_ENTER(zfsvfs);
+
+	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+		error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
+		    ZFS_MAX_DATASET_NAME_LEN, NULL);
+		if (error == 0) {
+			snm = real;
+		} else if (error != ENOTSUP) {
+			goto out;
+		}
+	}
+
+	dmu_objset_name(zfsvfs->z_os, fsname);
+
+	error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
+	    ZFS_MAX_DATASET_NAME_LEN, from);
+	if (error == 0)
+		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
+		    ZFS_MAX_DATASET_NAME_LEN, to);
+	if (error == 0)
+		error = zfs_secpolicy_rename_perms(from, to, cr);
+	if (error != 0)
+		goto out;
+
+	/*
+	 * Cannot move snapshots out of the snapdir.
+	 */
+	if (sdip != tdip) {
+		error = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	/*
+	 * No-op when names are identical.
+	 */
+	if (strcmp(snm, tnm) == 0) {
+		error = 0;
+		goto out;
+	}
+
+	rw_enter(&zfs_snapshot_lock, RW_WRITER);
+
+	error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
+	if (error == 0)
+		(void) zfsctl_snapshot_rename(snm, tnm);
+
+	rw_exit(&zfs_snapshot_lock);
+out:
+	kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Removing a directory under '.zfs/snapshot' will automatically trigger
+ * the removal of the snapshot with the given name.
+ */
+int
+zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
+    int flags)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(dip);
+	char *snapname, *real;
+	int error;
+
+	if (!zfs_admin_snapshot)
+		return (SET_ERROR(EACCES));
+
+	ZFS_ENTER(zfsvfs);
+
+	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+		error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
+		    ZFS_MAX_DATASET_NAME_LEN, NULL);
+		if (error == 0) {
+			name = real;
+		} else if (error != ENOTSUP) {
+			goto out;
+		}
+	}
+
+	error = zfsctl_snapshot_name(ITOZSB(dip), name,
+	    ZFS_MAX_DATASET_NAME_LEN, snapname);
+	if (error == 0)
+		error = zfs_secpolicy_destroy_perms(snapname, cr);
+	if (error != 0)
+		goto out;
+
+	error = zfsctl_snapshot_unmount(snapname, MNT_FORCE);
+	if ((error == 0) || (error == ENOENT))
+		error = dsl_destroy_snapshot(snapname, B_FALSE);
+out:
+	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Creating a directory under '.zfs/snapshot' will automatically trigger
+ * the creation of a new snapshot with the given name.
+ */
+int
+zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap,
+    struct inode **ipp, cred_t *cr, int flags)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(dip);
+	char *dsname;
+	int error;
+
+	if (!zfs_admin_snapshot)
+		return (SET_ERROR(EACCES));
+
+	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
+		error = SET_ERROR(EILSEQ);
+		goto out;
+	}
+
+	dmu_objset_name(zfsvfs->z_os, dsname);
+
+	error = zfs_secpolicy_snapshot_perms(dsname, cr);
+	if (error != 0)
+		goto out;
+
+	if (error == 0) {
+		error = dmu_objset_snapshot_one(dsname, dirname);
+		if (error != 0)
+			goto out;
+
+		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
+		    0, cr, NULL, NULL);
+	}
+out:
+	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
+
+	return (error);
+}
+
+/*
+ * Flush everything out of the kernel's export table and such.
+ * This is needed as once the snapshot is used over NFS, its
+ * entries in svc_export and svc_expkey caches hold reference
+ * to the snapshot mount point. There is no known way of flushing
+ * only the entries related to the snapshot.
+ */
+static void
+exportfs_flush(void)
+{
+	char *argv[] = { "/usr/sbin/exportfs", "-f", NULL };
+	char *envp[] = { NULL };
+
+	(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+}
+
+/*
+ * Attempt to unmount a snapshot by making a call to user space.
+ * There is no assurance that this can or will succeed, is just a
+ * best effort.  In the case where it does fail, perhaps because
+ * it's in use, the unmount will fail harmlessly.
+ */
+int
+zfsctl_snapshot_unmount(const char *snapname, int flags)
+{
+	char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL,
+	    NULL };
+	char *envp[] = { NULL };
+	zfs_snapentry_t *se;
+	int error;
+
+	rw_enter(&zfs_snapshot_lock, RW_READER);
+	if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
+		rw_exit(&zfs_snapshot_lock);
+		return (SET_ERROR(ENOENT));
+	}
+	rw_exit(&zfs_snapshot_lock);
+
+	exportfs_flush();
+
+	if (flags & MNT_FORCE)
+		argv[4] = "-fn";
+	argv[5] = se->se_path;
+	dprintf("unmount; path=%s\n", se->se_path);
+	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+	zfsctl_snapshot_rele(se);
+
+
+	/*
+	 * The umount system utility will return 256 on error.  We must
+	 * assume this error is because the file system is busy so it is
+	 * converted to the more sensible EBUSY.
+	 */
+	if (error)
+		error = SET_ERROR(EBUSY);
+
+	return (error);
+}
+
+int
+zfsctl_snapshot_mount(struct path *path, int flags)
+{
+	struct dentry *dentry = path->dentry;
+	struct inode *ip = dentry->d_inode;
+	zfsvfs_t *zfsvfs;
+	zfsvfs_t *snap_zfsvfs;
+	zfs_snapentry_t *se;
+	char *full_name, *full_path;
+	char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL,
+	    NULL };
+	char *envp[] = { NULL };
+	int error;
+	struct path spath;
+
+	if (ip == NULL)
+		return (SET_ERROR(EISDIR));
+
+	zfsvfs = ITOZSB(ip);
+	ZFS_ENTER(zfsvfs);
+
+	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+	error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
+	    ZFS_MAX_DATASET_NAME_LEN, full_name);
+	if (error)
+		goto error;
+
+	/*
+	 * Construct a mount point path from sb of the ctldir inode and dirent
+	 * name, instead of from d_path(), so that chroot'd process doesn't fail
+	 * on mount.zfs(8).
+	 */
+	snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
+	    zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
+	    dname(dentry));
+
+	/*
+	 * Multiple concurrent automounts of a snapshot are never allowed.
+	 * The snapshot may be manually mounted as many times as desired.
+	 */
+	if (zfsctl_snapshot_ismounted(full_name)) {
+		error = 0;
+		goto error;
+	}
+
+	/*
+	 * Attempt to mount the snapshot from user space.  Normally this
+	 * would be done using the vfs_kern_mount() function, however that
+	 * function is marked GPL-only and cannot be used.  On error we
+	 * careful to log the real error to the console and return EISDIR
+	 * to safely abort the automount.  This should be very rare.
+	 *
+	 * If the user mode helper happens to return EBUSY, a concurrent
+	 * mount is already in progress in which case the error is ignored.
+	 * Take note that if the program was executed successfully the return
+	 * value from call_usermodehelper() will be (exitcode << 8 + signal).
+	 */
+	dprintf("mount; name=%s path=%s\n", full_name, full_path);
+	argv[5] = full_name;
+	argv[6] = full_path;
+	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+	if (error) {
+		if (!(error & MOUNT_BUSY << 8)) {
+			zfs_dbgmsg("Unable to automount %s error=%d",
+			    full_path, error);
+			error = SET_ERROR(EISDIR);
+		} else {
+			/*
+			 * EBUSY, this could mean a concurrent mount, or the
+			 * snapshot has already been mounted at completely
+			 * different place. We return 0 so VFS will retry. For
+			 * the latter case the VFS will retry several times
+			 * and return ELOOP, which is probably not a very good
+			 * behavior.
+			 */
+			error = 0;
+		}
+		goto error;
+	}
+
+	/*
+	 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
+	 * to identify this as an automounted filesystem.
+	 */
+	spath = *path;
+	path_get(&spath);
+	if (follow_down_one(&spath)) {
+		snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
+		snap_zfsvfs->z_parent = zfsvfs;
+		dentry = spath.dentry;
+		spath.mnt->mnt_flags |= MNT_SHRINKABLE;
+
+		rw_enter(&zfs_snapshot_lock, RW_WRITER);
+		se = zfsctl_snapshot_alloc(full_name, full_path,
+		    snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
+		    dentry);
+		zfsctl_snapshot_add(se);
+		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
+		rw_exit(&zfs_snapshot_lock);
+	}
+	path_put(&spath);
+error:
+	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(full_path, MAXPATHLEN);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Get the snapdir inode from fid
+ */
+int
+zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
+    struct inode **ipp)
+{
+	int error;
+	struct path path;
+	char *mnt;
+	struct dentry *dentry;
+
+	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
+	    MAXPATHLEN, mnt);
+	if (error)
+		goto out;
+
+	/* Trigger automount */
+	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
+	if (error)
+		goto out;
+
+	path_put(&path);
+	/*
+	 * Get the snapdir inode. Note, we don't want to use the above
+	 * path because it contains the root of the snapshot rather
+	 * than the snapdir.
+	 */
+	*ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
+	if (*ipp == NULL) {
+		error = SET_ERROR(ENOENT);
+		goto out;
+	}
+
+	/* check gen, see zfsctl_snapdir_fid */
+	dentry = d_obtain_alias(igrab(*ipp));
+	if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
+		iput(*ipp);
+		*ipp = NULL;
+		error = SET_ERROR(ENOENT);
+	}
+	if (!IS_ERR(dentry))
+		dput(dentry);
+out:
+	kmem_free(mnt, MAXPATHLEN);
+	return (error);
+}
+
+int
+zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
+    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(dip);
+	znode_t *zp;
+	znode_t *dzp;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+
+	if (zfsvfs->z_shares_dir == 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
+		error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL);
+		zrele(dzp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Initialize the various pieces we'll need to create and manipulate .zfs
+ * directories.  Currently this is unused but available.
+ */
+void
+zfsctl_init(void)
+{
+	avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name,
+	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
+	    se_node_name));
+	avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
+	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
+	    se_node_objsetid));
+	rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
+}
+
+/*
+ * Cleanup the various pieces we needed for .zfs directories.  In particular
+ * ensure the expiry timer is canceled safely.
+ */
+void
+zfsctl_fini(void)
+{
+	avl_destroy(&zfs_snapshots_by_name);
+	avl_destroy(&zfs_snapshots_by_objsetid);
+	rw_destroy(&zfs_snapshot_lock);
+}
+
+module_param(zfs_admin_snapshot, int, 0644);
+MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
+
+module_param(zfs_expire_snapshot, int, 0644);
+MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");

diff --git a/zfs/module/os/linux/zfs/zfs_debug.c b/zfs/module/os/linux/zfs/zfs_debug.c
new file mode 100644
index 0000000..5958063
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_debug.c

@@ -0,0 +1,256 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/trace_zfs.h>
+
+typedef struct zfs_dbgmsg {
+	procfs_list_node_t	zdm_node;
+	uint64_t		zdm_timestamp;
+	int			zdm_size;
+	char			zdm_msg[]; /* variable length allocation */
+} zfs_dbgmsg_t;
+
+procfs_list_t zfs_dbgmsgs;
+int zfs_dbgmsg_size = 0;
+int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
+
+/*
+ * Internal ZFS debug messages are enabled by default.
+ *
+ * # Print debug messages
+ * cat /proc/spl/kstat/zfs/dbgmsg
+ *
+ * # Disable the kernel debug message log.
+ * echo 0 > /sys/module/zfs/parameters/zfs_dbgmsg_enable
+ *
+ * # Clear the kernel debug message log.
+ * echo 0 >/proc/spl/kstat/zfs/dbgmsg
+ */
+int zfs_dbgmsg_enable = 1;
+
+static int
+zfs_dbgmsg_show_header(struct seq_file *f)
+{
+	seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
+	return (0);
+}
+
+static int
+zfs_dbgmsg_show(struct seq_file *f, void *p)
+{
+	zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
+	seq_printf(f, "%-12llu %-s\n",
+	    (u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
+	return (0);
+}
+
+static void
+zfs_dbgmsg_purge(int max_size)
+{
+	while (zfs_dbgmsg_size > max_size) {
+		zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
+		if (zdm == NULL)
+			return;
+
+		int size = zdm->zdm_size;
+		kmem_free(zdm, size);
+		zfs_dbgmsg_size -= size;
+	}
+}
+
+static int
+zfs_dbgmsg_clear(procfs_list_t *procfs_list)
+{
+	mutex_enter(&zfs_dbgmsgs.pl_lock);
+	zfs_dbgmsg_purge(0);
+	mutex_exit(&zfs_dbgmsgs.pl_lock);
+	return (0);
+}
+
+void
+zfs_dbgmsg_init(void)
+{
+	procfs_list_install("zfs",
+	    NULL,
+	    "dbgmsg",
+	    0600,
+	    &zfs_dbgmsgs,
+	    zfs_dbgmsg_show,
+	    zfs_dbgmsg_show_header,
+	    zfs_dbgmsg_clear,
+	    offsetof(zfs_dbgmsg_t, zdm_node));
+}
+
+void
+zfs_dbgmsg_fini(void)
+{
+	procfs_list_uninstall(&zfs_dbgmsgs);
+	zfs_dbgmsg_purge(0);
+
+	/*
+	 * TODO - decide how to make this permanent
+	 */
+#ifdef _KERNEL
+	procfs_list_destroy(&zfs_dbgmsgs);
+#endif
+}
+
+void
+__set_error(const char *file, const char *func, int line, int err)
+{
+	/*
+	 * To enable this:
+	 *
+	 * $ echo 512 >/sys/module/zfs/parameters/zfs_flags
+	 */
+	if (zfs_flags & ZFS_DEBUG_SET_ERROR)
+		__dprintf(B_FALSE, file, func, line, "error %lu",
+		    (ulong_t)err);
+}
+
+void
+__zfs_dbgmsg(char *buf)
+{
+	int size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1;
+	zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
+	zdm->zdm_size = size;
+	zdm->zdm_timestamp = gethrestime_sec();
+	strcpy(zdm->zdm_msg, buf);
+
+	mutex_enter(&zfs_dbgmsgs.pl_lock);
+	procfs_list_add(&zfs_dbgmsgs, zdm);
+	zfs_dbgmsg_size += size;
+	zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
+	mutex_exit(&zfs_dbgmsgs.pl_lock);
+}
+
+#ifdef _KERNEL
+
+void
+__dprintf(boolean_t dprint, const char *file, const char *func,
+    int line, const char *fmt, ...)
+{
+	const char *newfile;
+	va_list adx;
+	size_t size;
+	char *buf;
+	char *nl;
+	int i;
+	char *prefix = (dprint) ? "dprintf: " : "";
+
+	size = 1024;
+	buf = kmem_alloc(size, KM_SLEEP);
+
+	/*
+	 * Get rid of annoying prefix to filename.
+	 */
+	newfile = strrchr(file, '/');
+	if (newfile != NULL) {
+		newfile = newfile + 1; /* Get rid of leading / */
+	} else {
+		newfile = file;
+	}
+
+	i = snprintf(buf, size, "%s%s:%d:%s(): ", prefix, newfile, line, func);
+
+	if (i < size) {
+		va_start(adx, fmt);
+		(void) vsnprintf(buf + i, size - i, fmt, adx);
+		va_end(adx);
+	}
+
+	/*
+	 * Get rid of trailing newline for dprintf logs.
+	 */
+	if (dprint && buf[0] != '\0') {
+		nl = &buf[strlen(buf) - 1];
+		if (*nl == '\n')
+			*nl = '\0';
+	}
+
+	/*
+	 * To get this data enable the zfs__dprintf trace point as shown:
+	 *
+	 * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
+	 * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
+	 * $ echo 0 > /sys/kernel/debug/tracing/trace
+	 *
+	 * # Dump the ring buffer.
+	 * $ cat /sys/kernel/debug/tracing/trace
+	 */
+	DTRACE_PROBE1(zfs__dprintf, char *, buf);
+
+	/*
+	 * To get this data:
+	 *
+	 * $ cat /proc/spl/kstat/zfs/dbgmsg
+	 *
+	 * To clear the buffer:
+	 * $ echo 0 > /proc/spl/kstat/zfs/dbgmsg
+	 */
+	__zfs_dbgmsg(buf);
+
+	kmem_free(buf, size);
+}
+
+#else
+
+void
+zfs_dbgmsg_print(const char *tag)
+{
+	ssize_t ret __attribute__((unused));
+
+	/*
+	 * We use write() in this function instead of printf()
+	 * so it is safe to call from a signal handler.
+	 */
+	ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
+	ret = write(STDOUT_FILENO, tag, strlen(tag));
+	ret = write(STDOUT_FILENO, ") START:\n", 9);
+
+	mutex_enter(&zfs_dbgmsgs.pl_lock);
+	for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
+	    zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) {
+		ret = write(STDOUT_FILENO, zdm->zdm_msg,
+		    strlen(zdm->zdm_msg));
+		ret = write(STDOUT_FILENO, "\n", 1);
+	}
+
+	ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
+	ret = write(STDOUT_FILENO, tag, strlen(tag));
+	ret = write(STDOUT_FILENO, ") END\n", 6);
+
+	mutex_exit(&zfs_dbgmsgs.pl_lock);
+}
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+module_param(zfs_dbgmsg_enable, int, 0644);
+MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
+
+module_param(zfs_dbgmsg_maxsize, int, 0644);
+MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
+#endif

diff --git a/zfs/module/os/linux/zfs/zfs_dir.c b/zfs/module/os/linux/zfs/zfs_dir.c
new file mode 100644
index 0000000..8ad5454
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_dir.c

@@ -0,0 +1,1222 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/uio.h>
+#include <sys/pathname.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/random.h>
+#include <sys/policy.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_vnops.h>
+#include <sys/fs/zfs.h>
+#include <sys/zap.h>
+#include <sys/dmu.h>
+#include <sys/atomic.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+
+/*
+ * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
+ * of names after deciding which is the appropriate lookup interface.
+ */
+static int
+zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
+    matchtype_t mt, boolean_t update, int *deflags, pathname_t *rpnp,
+    uint64_t *zoid)
+{
+	boolean_t conflict = B_FALSE;
+	int error;
+
+	if (zfsvfs->z_norm) {
+		size_t bufsz = 0;
+		char *buf = NULL;
+
+		if (rpnp) {
+			buf = rpnp->pn_buf;
+			bufsz = rpnp->pn_bufsize;
+		}
+
+		/*
+		 * In the non-mixed case we only expect there would ever
+		 * be one match, but we need to use the normalizing lookup.
+		 */
+		error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
+		    zoid, mt, buf, bufsz, &conflict);
+	} else {
+		error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
+	}
+
+	/*
+	 * Allow multiple entries provided the first entry is
+	 * the object id.  Non-zpl consumers may safely make
+	 * use of the additional space.
+	 *
+	 * XXX: This should be a feature flag for compatibility
+	 */
+	if (error == EOVERFLOW)
+		error = 0;
+
+	if (zfsvfs->z_norm && !error && deflags)
+		*deflags = conflict ? ED_CASE_CONFLICT : 0;
+
+	*zoid = ZFS_DIRENT_OBJ(*zoid);
+
+	return (error);
+}
+
+/*
+ * Lock a directory entry.  A dirlock on <dzp, name> protects that name
+ * in dzp's directory zap object.  As long as you hold a dirlock, you can
+ * assume two things: (1) dzp cannot be reaped, and (2) no other thread
+ * can change the zap entry for (i.e. link or unlink) this name.
+ *
+ * Input arguments:
+ *	dzp	- znode for directory
+ *	name	- name of entry to lock
+ *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
+ *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
+ *		  ZSHARED: allow concurrent access with other ZSHARED callers.
+ *		  ZXATTR: we want dzp's xattr directory
+ *		  ZCILOOK: On a mixed sensitivity file system,
+ *			   this lookup should be case-insensitive.
+ *		  ZCIEXACT: On a purely case-insensitive file system,
+ *			    this lookup should be case-sensitive.
+ *		  ZRENAMING: we are locking for renaming, force narrow locks
+ *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
+ *			     current thread already holds it.
+ *
+ * Output arguments:
+ *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
+ *	dlpp	- pointer to the dirlock for this entry (NULL on error)
+ *      direntflags - (case-insensitive lookup only)
+ *		flags if multiple case-sensitive matches exist in directory
+ *      realpnp     - (case-insensitive lookup only)
+ *		actual name matched within the directory
+ *
+ * Return value: 0 on success or errno on failure.
+ *
+ * NOTE: Always checks for, and rejects, '.' and '..'.
+ * NOTE: For case-insensitive file systems we take wide locks (see below),
+ *	 but return znode pointers to a single match.
+ */
+int
+zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name,
+    znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zfs_dirlock_t	*dl;
+	boolean_t	update;
+	matchtype_t	mt = 0;
+	uint64_t	zoid;
+	int		error = 0;
+	int		cmpflags;
+
+	*zpp = NULL;
+	*dlpp = NULL;
+
+	/*
+	 * Verify that we are not trying to lock '.', '..', or '.zfs'
+	 */
+	if ((name[0] == '.' &&
+	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
+	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
+		return (SET_ERROR(EEXIST));
+
+	/*
+	 * Case sensitivity and normalization preferences are set when
+	 * the file system is created.  These are stored in the
+	 * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
+	 * affect what vnodes can be cached in the DNLC, how we
+	 * perform zap lookups, and the "width" of our dirlocks.
+	 *
+	 * A normal dirlock locks a single name.  Note that with
+	 * normalization a name can be composed multiple ways, but
+	 * when normalized, these names all compare equal.  A wide
+	 * dirlock locks multiple names.  We need these when the file
+	 * system is supporting mixed-mode access.  It is sometimes
+	 * necessary to lock all case permutations of file name at
+	 * once so that simultaneous case-insensitive/case-sensitive
+	 * behaves as rationally as possible.
+	 */
+
+	/*
+	 * When matching we may need to normalize & change case according to
+	 * FS settings.
+	 *
+	 * Note that a normalized match is necessary for a case insensitive
+	 * filesystem when the lookup request is not exact because normalization
+	 * can fold case independent of normalizing code point sequences.
+	 *
+	 * See the table above zfs_dropname().
+	 */
+	if (zfsvfs->z_norm != 0) {
+		mt = MT_NORMALIZE;
+
+		/*
+		 * Determine if the match needs to honor the case specified in
+		 * lookup, and if so keep track of that so that during
+		 * normalization we don't fold case.
+		 */
+		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE &&
+		    (flag & ZCIEXACT)) ||
+		    (zfsvfs->z_case == ZFS_CASE_MIXED && !(flag & ZCILOOK))) {
+			mt |= MT_MATCH_CASE;
+		}
+	}
+
+	/*
+	 * Only look in or update the DNLC if we are looking for the
+	 * name on a file system that does not require normalization
+	 * or case folding.  We can also look there if we happen to be
+	 * on a non-normalizing, mixed sensitivity file system IF we
+	 * are looking for the exact name.
+	 *
+	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
+	 * case for performance improvement?
+	 */
+	update = !zfsvfs->z_norm ||
+	    (zfsvfs->z_case == ZFS_CASE_MIXED &&
+	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
+
+	/*
+	 * ZRENAMING indicates we are in a situation where we should
+	 * take narrow locks regardless of the file system's
+	 * preferences for normalizing and case folding.  This will
+	 * prevent us deadlocking trying to grab the same wide lock
+	 * twice if the two names happen to be case-insensitive
+	 * matches.
+	 */
+	if (flag & ZRENAMING)
+		cmpflags = 0;
+	else
+		cmpflags = zfsvfs->z_norm;
+
+	/*
+	 * Wait until there are no locks on this name.
+	 *
+	 * Don't grab the lock if it is already held. However, cannot
+	 * have both ZSHARED and ZHAVELOCK together.
+	 */
+	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
+	if (!(flag & ZHAVELOCK))
+		rw_enter(&dzp->z_name_lock, RW_READER);
+
+	mutex_enter(&dzp->z_lock);
+	for (;;) {
+		if (dzp->z_unlinked && !(flag & ZXATTR)) {
+			mutex_exit(&dzp->z_lock);
+			if (!(flag & ZHAVELOCK))
+				rw_exit(&dzp->z_name_lock);
+			return (SET_ERROR(ENOENT));
+		}
+		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
+			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
+			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
+				break;
+		}
+		if (error != 0) {
+			mutex_exit(&dzp->z_lock);
+			if (!(flag & ZHAVELOCK))
+				rw_exit(&dzp->z_name_lock);
+			return (SET_ERROR(ENOENT));
+		}
+		if (dl == NULL)	{
+			/*
+			 * Allocate a new dirlock and add it to the list.
+			 */
+			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
+			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
+			dl->dl_name = name;
+			dl->dl_sharecnt = 0;
+			dl->dl_namelock = 0;
+			dl->dl_namesize = 0;
+			dl->dl_dzp = dzp;
+			dl->dl_next = dzp->z_dirlocks;
+			dzp->z_dirlocks = dl;
+			break;
+		}
+		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
+			break;
+		cv_wait(&dl->dl_cv, &dzp->z_lock);
+	}
+
+	/*
+	 * If the z_name_lock was NOT held for this dirlock record it.
+	 */
+	if (flag & ZHAVELOCK)
+		dl->dl_namelock = 1;
+
+	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
+		/*
+		 * We're the second shared reference to dl.  Make a copy of
+		 * dl_name in case the first thread goes away before we do.
+		 * Note that we initialize the new name before storing its
+		 * pointer into dl_name, because the first thread may load
+		 * dl->dl_name at any time.  It'll either see the old value,
+		 * which belongs to it, or the new shared copy; either is OK.
+		 */
+		dl->dl_namesize = strlen(dl->dl_name) + 1;
+		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
+		bcopy(dl->dl_name, name, dl->dl_namesize);
+		dl->dl_name = name;
+	}
+
+	mutex_exit(&dzp->z_lock);
+
+	/*
+	 * We have a dirlock on the name.  (Note that it is the dirlock,
+	 * not the dzp's z_lock, that protects the name in the zap object.)
+	 * See if there's an object by this name; if so, put a hold on it.
+	 */
+	if (flag & ZXATTR) {
+		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
+		    sizeof (zoid));
+		if (error == 0)
+			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
+	} else {
+		error = zfs_match_find(zfsvfs, dzp, name, mt,
+		    update, direntflags, realpnp, &zoid);
+	}
+	if (error) {
+		if (error != ENOENT || (flag & ZEXISTS)) {
+			zfs_dirent_unlock(dl);
+			return (error);
+		}
+	} else {
+		if (flag & ZNEW) {
+			zfs_dirent_unlock(dl);
+			return (SET_ERROR(EEXIST));
+		}
+		error = zfs_zget(zfsvfs, zoid, zpp);
+		if (error) {
+			zfs_dirent_unlock(dl);
+			return (error);
+		}
+	}
+
+	*dlpp = dl;
+
+	return (0);
+}
+
+/*
+ * Unlock this directory entry and wake anyone who was waiting for it.
+ */
+void
+zfs_dirent_unlock(zfs_dirlock_t *dl)
+{
+	znode_t *dzp = dl->dl_dzp;
+	zfs_dirlock_t **prev_dl, *cur_dl;
+
+	mutex_enter(&dzp->z_lock);
+
+	if (!dl->dl_namelock)
+		rw_exit(&dzp->z_name_lock);
+
+	if (dl->dl_sharecnt > 1) {
+		dl->dl_sharecnt--;
+		mutex_exit(&dzp->z_lock);
+		return;
+	}
+	prev_dl = &dzp->z_dirlocks;
+	while ((cur_dl = *prev_dl) != dl)
+		prev_dl = &cur_dl->dl_next;
+	*prev_dl = dl->dl_next;
+	cv_broadcast(&dl->dl_cv);
+	mutex_exit(&dzp->z_lock);
+
+	if (dl->dl_namesize != 0)
+		kmem_free(dl->dl_name, dl->dl_namesize);
+	cv_destroy(&dl->dl_cv);
+	kmem_free(dl, sizeof (*dl));
+}
+
+/*
+ * Look up an entry in a directory.
+ *
+ * NOTE: '.' and '..' are handled as special cases because
+ *	no directory entries are actually stored for them.  If this is
+ *	the root of a filesystem, then '.zfs' is also treated as a
+ *	special pseudo-directory.
+ */
+int
+zfs_dirlook(znode_t *dzp, char *name, znode_t **zpp, int flags,
+    int *deflg, pathname_t *rpnp)
+{
+	zfs_dirlock_t *dl;
+	znode_t *zp;
+	struct inode *ip;
+	int error = 0;
+	uint64_t parent;
+
+	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
+		*zpp = dzp;
+		zhold(*zpp);
+	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
+		zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+
+		/*
+		 * If we are a snapshot mounted under .zfs, return
+		 * the inode pointer for the snapshot directory.
+		 */
+		if ((error = sa_lookup(dzp->z_sa_hdl,
+		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
+			return (error);
+
+		if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
+			error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
+			    "snapshot", &ip, 0, kcred, NULL, NULL);
+			*zpp = ITOZ(ip);
+			return (error);
+		}
+		rw_enter(&dzp->z_parent_lock, RW_READER);
+		error = zfs_zget(zfsvfs, parent, &zp);
+		if (error == 0)
+			*zpp = zp;
+		rw_exit(&dzp->z_parent_lock);
+	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
+		ip = zfsctl_root(dzp);
+		*zpp = ITOZ(ip);
+	} else {
+		int zf;
+
+		zf = ZEXISTS | ZSHARED;
+		if (flags & FIGNORECASE)
+			zf |= ZCILOOK;
+
+		error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
+		if (error == 0) {
+			*zpp = zp;
+			zfs_dirent_unlock(dl);
+			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
+		}
+		rpnp = NULL;
+	}
+
+	if ((flags & FIGNORECASE) && rpnp && !error)
+		(void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
+
+	return (error);
+}
+
+/*
+ * unlinked Set (formerly known as the "delete queue") Error Handling
+ *
+ * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
+ * don't specify the name of the entry that we will be manipulating.  We
+ * also fib and say that we won't be adding any new entries to the
+ * unlinked set, even though we might (this is to lower the minimum file
+ * size that can be deleted in a full filesystem).  So on the small
+ * chance that the nlink list is using a fat zap (ie. has more than
+ * 2000 entries), we *may* not pre-read a block that's needed.
+ * Therefore it is remotely possible for some of the assertions
+ * regarding the unlinked set below to fail due to i/o error.  On a
+ * nondebug system, this will result in the space being leaked.
+ */
+void
+zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+
+	ASSERT(zp->z_unlinked);
+	ASSERT(ZTOI(zp)->i_nlink == 0);
+
+	VERIFY3U(0, ==,
+	    zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+	dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
+}
+
+/*
+ * Clean up any znodes that had no links when we either crashed or
+ * (force) umounted the file system.
+ */
+static void
+zfs_unlinked_drain_task(void *arg)
+{
+	zfsvfs_t *zfsvfs = arg;
+	zap_cursor_t	zc;
+	zap_attribute_t zap;
+	dmu_object_info_t doi;
+	znode_t		*zp;
+	int		error;
+
+	ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
+
+	/*
+	 * Iterate over the contents of the unlinked set.
+	 */
+	for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
+	    zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
+	    zap_cursor_advance(&zc)) {
+
+		/*
+		 * See what kind of object we have in list
+		 */
+
+		error = dmu_object_info(zfsvfs->z_os,
+		    zap.za_first_integer, &doi);
+		if (error != 0)
+			continue;
+
+		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
+		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
+		/*
+		 * We need to re-mark these list entries for deletion,
+		 * so we pull them back into core and set zp->z_unlinked.
+		 */
+		error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
+
+		/*
+		 * We may pick up znodes that are already marked for deletion.
+		 * This could happen during the purge of an extended attribute
+		 * directory.  All we need to do is skip over them, since they
+		 * are already in the system marked z_unlinked.
+		 */
+		if (error != 0)
+			continue;
+
+		zp->z_unlinked = B_TRUE;
+
+		/*
+		 * zrele() decrements the znode's ref count and may cause
+		 * it to be synchronously freed. We interrupt freeing
+		 * of this znode by checking the return value of
+		 * dmu_objset_zfs_unmounting() in dmu_free_long_range()
+		 * when an unmount is requested.
+		 */
+		zrele(zp);
+		ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+	}
+	zap_cursor_fini(&zc);
+
+	zfsvfs->z_draining = B_FALSE;
+	zfsvfs->z_drain_task = TASKQID_INVALID;
+}
+
+/*
+ * Sets z_draining then tries to dispatch async unlinked drain.
+ * If that fails executes synchronous unlinked drain.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+	ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
+
+	zfsvfs->z_draining = B_TRUE;
+	zfsvfs->z_drain_cancel = B_FALSE;
+
+	zfsvfs->z_drain_task = taskq_dispatch(
+	    dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
+	    zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
+	if (zfsvfs->z_drain_task == TASKQID_INVALID) {
+		zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
+		zfs_unlinked_drain_task(zfsvfs);
+	}
+}
+
+/*
+ * Wait for the unlinked drain taskq task to stop. This will interrupt the
+ * unlinked set processing if it is in progress.
+ */
+void
+zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
+{
+	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+	if (zfsvfs->z_draining) {
+		zfsvfs->z_drain_cancel = B_TRUE;
+		taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
+		    dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
+		zfsvfs->z_drain_task = TASKQID_INVALID;
+		zfsvfs->z_draining = B_FALSE;
+	}
+}
+
+/*
+ * Delete the entire contents of a directory.  Return a count
+ * of the number of entries that could not be deleted. If we encounter
+ * an error, return a count of at least one so that the directory stays
+ * in the unlinked set.
+ *
+ * NOTE: this function assumes that the directory is inactive,
+ *	so there is no need to lock its entries before deletion.
+ *	Also, it assumes the directory contents is *only* regular
+ *	files.
+ */
+static int
+zfs_purgedir(znode_t *dzp)
+{
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	znode_t		*xzp;
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zfs_dirlock_t	dl;
+	int skipped = 0;
+	int error;
+
+	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
+	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
+	    zap_cursor_advance(&zc)) {
+		error = zfs_zget(zfsvfs,
+		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
+		if (error) {
+			skipped += 1;
+			continue;
+		}
+
+		ASSERT(S_ISREG(ZTOI(xzp)->i_mode) ||
+		    S_ISLNK(ZTOI(xzp)->i_mode));
+
+		tx = dmu_tx_create(zfsvfs->z_os);
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+		/* Is this really needed ? */
+		zfs_sa_upgrade_txholds(tx, xzp);
+		dmu_tx_mark_netfree(tx);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			zfs_zrele_async(xzp);
+			skipped += 1;
+			continue;
+		}
+		bzero(&dl, sizeof (dl));
+		dl.dl_dzp = dzp;
+		dl.dl_name = zap.za_name;
+
+		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
+		if (error)
+			skipped += 1;
+		dmu_tx_commit(tx);
+
+		zfs_zrele_async(xzp);
+	}
+	zap_cursor_fini(&zc);
+	if (error != ENOENT)
+		skipped += 1;
+	return (skipped);
+}
+
+void
+zfs_rmnode(znode_t *zp)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	objset_t	*os = zfsvfs->z_os;
+	znode_t		*xzp = NULL;
+	dmu_tx_t	*tx;
+	uint64_t	acl_obj;
+	uint64_t	xattr_obj;
+	uint64_t	links;
+	int		error;
+
+	ASSERT(ZTOI(zp)->i_nlink == 0);
+	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
+
+	/*
+	 * If this is an attribute directory, purge its contents.
+	 */
+	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
+		if (zfs_purgedir(zp) != 0) {
+			/*
+			 * Not enough space to delete some xattrs.
+			 * Leave it in the unlinked set.
+			 */
+			zfs_znode_dmu_fini(zp);
+
+			return;
+		}
+	}
+
+	/*
+	 * Free up all the data in the file.  We don't do this for directories
+	 * because we need truncate and remove to be in the same tx, like in
+	 * zfs_znode_delete(). Otherwise, if we crash here we'll end up with
+	 * an inconsistent truncated zap object in the delete queue.  Note a
+	 * truncated file is harmless since it only contains user data.
+	 */
+	if (S_ISREG(ZTOI(zp)->i_mode)) {
+		error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
+		if (error) {
+			/*
+			 * Not enough space or we were interrupted by unmount.
+			 * Leave the file in the unlinked set.
+			 */
+			zfs_znode_dmu_fini(zp);
+			return;
+		}
+	}
+
+	/*
+	 * If the file has extended attributes, we're going to unlink
+	 * the xattr dir.
+	 */
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error == 0 && xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
+		ASSERT(error == 0);
+	}
+
+	acl_obj = zfs_external_acl(zp);
+
+	/*
+	 * Set up the final transaction.
+	 */
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	if (xzp) {
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+	}
+	if (acl_obj)
+		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		/*
+		 * Not enough space to delete the file.  Leave it in the
+		 * unlinked set, leaking it until the fs is remounted (at
+		 * which point we'll call zfs_unlinked_drain() to process it).
+		 */
+		dmu_tx_abort(tx);
+		zfs_znode_dmu_fini(zp);
+		goto out;
+	}
+
+	if (xzp) {
+		ASSERT(error == 0);
+		mutex_enter(&xzp->z_lock);
+		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
+		clear_nlink(ZTOI(xzp));		/* no more links to it */
+		links = 0;
+		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+		    &links, sizeof (links), tx));
+		mutex_exit(&xzp->z_lock);
+		zfs_unlinked_add(xzp, tx);
+	}
+
+	mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
+	/*
+	 * Remove this znode from the unlinked set.  If a has rollback has
+	 * occurred while a file is open and unlinked.  Then when the file
+	 * is closed post rollback it will not exist in the rolled back
+	 * version of the unlinked object.
+	 */
+	error = zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+	    zp->z_id, tx);
+	VERIFY(error == 0 || error == ENOENT);
+
+	uint64_t count;
+	if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
+		cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
+	}
+
+	mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
+	dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
+
+	zfs_znode_delete(zp, tx);
+
+	dmu_tx_commit(tx);
+out:
+	if (xzp)
+		zfs_zrele_async(xzp);
+}
+
+static uint64_t
+zfs_dirent(znode_t *zp, uint64_t mode)
+{
+	uint64_t de = zp->z_id;
+
+	if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
+		de |= IFTODT(mode) << 60;
+	return (de);
+}
+
+/*
+ * Link zp into dl.  Can fail in the following cases :
+ * - if zp has been unlinked.
+ * - if the number of entries with the same hash (aka. colliding entries)
+ *    exceed the capacity of a leaf-block of fatzap and splitting of the
+ *    leaf-block does not help.
+ */
+int
+zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
+{
+	znode_t *dzp = dl->dl_dzp;
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	uint64_t value;
+	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	uint64_t links;
+	int count = 0;
+	int error;
+
+	mutex_enter(&zp->z_lock);
+
+	if (!(flag & ZRENAMING)) {
+		if (zp->z_unlinked) {	/* no new links to unlinked zp */
+			ASSERT(!(flag & (ZNEW | ZEXISTS)));
+			mutex_exit(&zp->z_lock);
+			return (SET_ERROR(ENOENT));
+		}
+		if (!(flag & ZNEW)) {
+			/*
+			 * ZNEW nodes come from zfs_mknode() where the link
+			 * count has already been initialised
+			 */
+			inc_nlink(ZTOI(zp));
+			links = ZTOI(zp)->i_nlink;
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+			    NULL, &links, sizeof (links));
+		}
+	}
+
+	value = zfs_dirent(zp, zp->z_mode);
+	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
+	    &value, tx);
+
+	/*
+	 * zap_add could fail to add the entry if it exceeds the capacity of the
+	 * leaf-block and zap_leaf_split() failed to help.
+	 * The caller of this routine is responsible for failing the transaction
+	 * which will rollback the SA updates done above.
+	 */
+	if (error != 0) {
+		if (!(flag & ZRENAMING) && !(flag & ZNEW))
+			drop_nlink(ZTOI(zp));
+		mutex_exit(&zp->z_lock);
+		return (error);
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
+	    &dzp->z_id, sizeof (dzp->z_id));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+
+	if (!(flag & ZNEW)) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+		    ctime);
+	}
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+
+	mutex_exit(&zp->z_lock);
+
+	mutex_enter(&dzp->z_lock);
+	dzp->z_size++;
+	if (zp_is_dir)
+		inc_nlink(ZTOI(dzp));
+	links = ZTOI(dzp)->i_nlink;
+	count = 0;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &links, sizeof (links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+	mutex_exit(&dzp->z_lock);
+
+	return (0);
+}
+
+/*
+ * The match type in the code for this function should conform to:
+ *
+ * ------------------------------------------------------------------------
+ * fs type  | z_norm      | lookup type | match type
+ * ---------|-------------|-------------|----------------------------------
+ * CS !norm | 0           |           0 | 0 (exact)
+ * CS  norm | formX       |           0 | MT_NORMALIZE
+ * CI !norm | upper       |   !ZCIEXACT | MT_NORMALIZE
+ * CI !norm | upper       |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CI  norm | upper|formX |   !ZCIEXACT | MT_NORMALIZE
+ * CI  norm | upper|formX |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper       |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper       |     ZCILOOK | MT_NORMALIZE
+ * CM  norm | upper|formX |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM  norm | upper|formX |     ZCILOOK | MT_NORMALIZE
+ *
+ * Abbreviations:
+ *    CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
+ *    upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
+ *    formX = unicode normalization form set on fs creation
+ */
+static int
+zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
+    int flag)
+{
+	int error;
+
+	if (ZTOZSB(zp)->z_norm) {
+		matchtype_t mt = MT_NORMALIZE;
+
+		if ((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE &&
+		    (flag & ZCIEXACT)) ||
+		    (ZTOZSB(zp)->z_case == ZFS_CASE_MIXED &&
+		    !(flag & ZCILOOK))) {
+			mt |= MT_MATCH_CASE;
+		}
+
+		error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id,
+		    dl->dl_name, mt, tx);
+	} else {
+		error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
+		    tx);
+	}
+
+	return (error);
+}
+
+/*
+ * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
+ * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
+ * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
+ * If it's non-NULL, we use it to indicate whether the znode needs deletion,
+ * and it's the caller's job to do it.
+ */
+int
+zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
+    boolean_t *unlinkedp)
+{
+	znode_t *dzp = dl->dl_dzp;
+	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
+	boolean_t unlinked = B_FALSE;
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	uint64_t links;
+	int count = 0;
+	int error;
+
+	if (!(flag & ZRENAMING)) {
+		mutex_enter(&zp->z_lock);
+
+		if (zp_is_dir && !zfs_dirempty(zp)) {
+			mutex_exit(&zp->z_lock);
+			return (SET_ERROR(ENOTEMPTY));
+		}
+
+		/*
+		 * If we get here, we are going to try to remove the object.
+		 * First try removing the name from the directory; if that
+		 * fails, return the error.
+		 */
+		error = zfs_dropname(dl, zp, dzp, tx, flag);
+		if (error != 0) {
+			mutex_exit(&zp->z_lock);
+			return (error);
+		}
+
+		if (ZTOI(zp)->i_nlink <= zp_is_dir) {
+			zfs_panic_recover("zfs: link count on %lu is %u, "
+			    "should be at least %u", zp->z_id,
+			    (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
+			set_nlink(ZTOI(zp), zp_is_dir + 1);
+		}
+		drop_nlink(ZTOI(zp));
+		if (ZTOI(zp)->i_nlink == zp_is_dir) {
+			zp->z_unlinked = B_TRUE;
+			clear_nlink(ZTOI(zp));
+			unlinked = B_TRUE;
+		} else {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+			    NULL, &ctime, sizeof (ctime));
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+			    ctime);
+		}
+		links = ZTOI(zp)->i_nlink;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+		    NULL, &links, sizeof (links));
+		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		count = 0;
+		ASSERT(error == 0);
+		mutex_exit(&zp->z_lock);
+	} else {
+		error = zfs_dropname(dl, zp, dzp, tx, flag);
+		if (error != 0)
+			return (error);
+	}
+
+	mutex_enter(&dzp->z_lock);
+	dzp->z_size--;		/* one dirent removed */
+	if (zp_is_dir)
+		drop_nlink(ZTOI(dzp));	/* ".." link from zp */
+	links = ZTOI(dzp)->i_nlink;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+	    NULL, &links, sizeof (links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+	    NULL, ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
+	    NULL, mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+	mutex_exit(&dzp->z_lock);
+
+	if (unlinkedp != NULL)
+		*unlinkedp = unlinked;
+	else if (unlinked)
+		zfs_unlinked_add(zp, tx);
+
+	return (0);
+}
+
+/*
+ * Indicate whether the directory is empty.  Works with or without z_lock
+ * held, but can only be consider a hint in the latter case.  Returns true
+ * if only "." and ".." remain and there's no work in progress.
+ *
+ * The internal ZAP size, rather than zp->z_size, needs to be checked since
+ * some consumers (Lustre) do not strictly maintain an accurate SA_ZPL_SIZE.
+ */
+boolean_t
+zfs_dirempty(znode_t *dzp)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+	uint64_t count;
+	int error;
+
+	if (dzp->z_dirlocks != NULL)
+		return (B_FALSE);
+
+	error = zap_count(zfsvfs->z_os, dzp->z_id, &count);
+	if (error != 0 || count != 0)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+int
+zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	znode_t *xzp;
+	dmu_tx_t *tx;
+	int error;
+	zfs_acl_ids_t acl_ids;
+	boolean_t fuid_dirtied;
+#ifdef ZFS_DEBUG
+	uint64_t parent;
+#endif
+
+	*xzpp = NULL;
+
+	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
+	    &acl_ids)) != 0)
+		return (error);
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
+		zfs_acl_ids_free(&acl_ids);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		return (error);
+	}
+	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+#ifdef ZFS_DEBUG
+	error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent));
+	ASSERT(error == 0 && parent == zp->z_id);
+#endif
+
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
+	    sizeof (xzp->z_id), tx));
+
+	if (!zp->z_unlinked)
+		zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL,
+		    acl_ids.z_fuidp, vap);
+
+	zfs_acl_ids_free(&acl_ids);
+	dmu_tx_commit(tx);
+
+	*xzpp = xzp;
+
+	return (0);
+}
+
+/*
+ * Return a znode for the extended attribute directory for zp.
+ * ** If the directory does not already exist, it is created **
+ *
+ *	IN:	zp	- znode to obtain attribute directory from
+ *		cr	- credentials of caller
+ *		flags	- flags from the VOP_LOOKUP call
+ *
+ *	OUT:	xipp	- pointer to extended attribute znode
+ *
+ *	RETURN:	0 on success
+ *		error number on failure
+ */
+int
+zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	znode_t		*xzp;
+	zfs_dirlock_t	*dl;
+	vattr_t		va;
+	int		error;
+top:
+	error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
+	if (error)
+		return (error);
+
+	if (xzp != NULL) {
+		*xzpp = xzp;
+		zfs_dirent_unlock(dl);
+		return (0);
+	}
+
+	if (!(flags & CREATE_XATTR_DIR)) {
+		zfs_dirent_unlock(dl);
+		return (SET_ERROR(ENOENT));
+	}
+
+	if (zfs_is_readonly(zfsvfs)) {
+		zfs_dirent_unlock(dl);
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * The ability to 'create' files in an attribute
+	 * directory comes from the write_xattr permission on the base file.
+	 *
+	 * The ability to 'search' an attribute directory requires
+	 * read_xattr permission on the base file.
+	 *
+	 * Once in a directory the ability to read/write attributes
+	 * is controlled by the permissions on the attribute file.
+	 */
+	va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
+	va.va_mode = S_IFDIR | S_ISVTX | 0777;
+	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
+
+	va.va_dentry = NULL;
+	error = zfs_make_xattrdir(zp, &va, xzpp, cr);
+	zfs_dirent_unlock(dl);
+
+	if (error == ERESTART) {
+		/* NB: we already did dmu_tx_wait() if necessary */
+		goto top;
+	}
+
+	return (error);
+}
+
+/*
+ * Decide whether it is okay to remove within a sticky directory.
+ *
+ * In sticky directories, write access is not sufficient;
+ * you can remove entries from a directory only if:
+ *
+ *	you own the directory,
+ *	you own the entry,
+ *	you have write access to the entry,
+ *	or you are privileged (checked in secpolicy...).
+ *
+ * The function returns 0 if remove access is granted.
+ */
+int
+zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
+{
+	uid_t		uid;
+	uid_t		downer;
+	uid_t		fowner;
+	zfsvfs_t	*zfsvfs = ZTOZSB(zdp);
+
+	if (zfsvfs->z_replay)
+		return (0);
+
+	if ((zdp->z_mode & S_ISVTX) == 0)
+		return (0);
+
+	downer = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zdp)->i_uid),
+	    cr, ZFS_OWNER);
+	fowner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zp)->i_uid),
+	    cr, ZFS_OWNER);
+
+	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
+	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)
+		return (0);
+	else
+		return (secpolicy_vnode_remove(cr));
+}

diff --git a/zfs/module/os/linux/zfs/zfs_file_os.c b/zfs/module/os/linux/zfs/zfs_file_os.c
new file mode 100644
index 0000000..e12f7c3
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_file_os.c

@@ -0,0 +1,428 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zfs_file.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <linux/falloc.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#ifdef HAVE_FDTABLE_HEADER
+#include <linux/fdtable.h>
+#endif
+
+/*
+ * Open file
+ *
+ * path - fully qualified path to file
+ * flags - file attributes O_READ / O_WRITE / O_EXCL
+ * fpp - pointer to return file pointer
+ *
+ * Returns 0 on success underlying error on failure.
+ */
+int
+zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
+{
+	struct file *filp;
+	int saved_umask;
+
+	if (!(flags & O_CREAT) && (flags & O_WRONLY))
+		flags |= O_EXCL;
+
+	if (flags & O_CREAT)
+		saved_umask = xchg(&current->fs->umask, 0);
+
+	filp = filp_open(path, flags, mode);
+
+	if (flags & O_CREAT)
+		(void) xchg(&current->fs->umask, saved_umask);
+
+	if (IS_ERR(filp))
+		return (-PTR_ERR(filp));
+
+	*fpp = filp;
+	return (0);
+}
+
+void
+zfs_file_close(zfs_file_t *fp)
+{
+	filp_close(fp, 0);
+}
+
+static ssize_t
+zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *off)
+{
+#if defined(HAVE_KERNEL_WRITE_PPOS)
+	return (kernel_write(fp, buf, count, off));
+#else
+	mm_segment_t saved_fs;
+	ssize_t rc;
+
+	saved_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	rc = vfs_write(fp, (__force const char __user __user *)buf, count, off);
+
+	set_fs(saved_fs);
+
+	return (rc);
+#endif
+}
+
+/*
+ * Stateful write - use os internal file pointer to determine where to
+ * write and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * resid -  pointer to count of unwritten bytes  (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_pos;
+	ssize_t rc;
+
+	rc = zfs_file_write_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	fp->f_pos = off;
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless write - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to write to (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc  = zfs_file_write_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+static ssize_t
+zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off)
+{
+#if defined(HAVE_KERNEL_READ_PPOS)
+	return (kernel_read(fp, buf, count, off));
+#else
+	mm_segment_t saved_fs;
+	ssize_t rc;
+
+	saved_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	rc = vfs_read(fp, (void __user *)buf, count, off);
+	set_fs(saved_fs);
+
+	return (rc);
+#endif
+}
+
+/*
+ * Stateful read - use os internal file pointer to determine where to
+ * read and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to read
+ * resid -  pointer to count of unread bytes (if short read)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_pos;
+	ssize_t rc;
+
+	rc = zfs_file_read_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	fp->f_pos = off;
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless read - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to read from (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc = zfs_file_read_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * lseek - set / get file pointer
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * offp - value to seek to, returns current value plus passed offset
+ * whence - see man pages for standard lseek whence values
+ *
+ * Returns 0 on success errno on failure (ESPIPE for non seekable types)
+ */
+int
+zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
+{
+	loff_t rc;
+
+	if (*offp < 0 || *offp > MAXOFFSET_T)
+		return (EINVAL);
+
+	rc = vfs_llseek(fp, *offp, whence);
+	if (rc < 0)
+		return (-rc);
+
+	*offp = rc;
+
+	return (0);
+}
+
+/*
+ * Get file attributes
+ *
+ * filp - file pointer
+ * zfattr - pointer to file attr structure
+ *
+ * Currently only used for fetching size and file mode.
+ *
+ * Returns 0 on success or error code of underlying getattr call on failure.
+ */
+int
+zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr)
+{
+	struct kstat stat;
+	int rc;
+
+#if defined(HAVE_4ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS,
+	    AT_STATX_SYNC_AS_STAT);
+#elif defined(HAVE_2ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, &stat);
+#elif defined(HAVE_3ARGS_VFS_GETATTR)
+	rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat);
+#else
+#error "No available vfs_getattr()"
+#endif
+	if (rc)
+		return (-rc);
+
+	zfattr->zfa_size = stat.size;
+	zfattr->zfa_mode = stat.mode;
+
+	return (0);
+}
+
+/*
+ * Sync file to disk
+ *
+ * filp - file pointer
+ * flags - O_SYNC and or O_DSYNC
+ *
+ * Returns 0 on success or error code of underlying sync call on failure.
+ */
+int
+zfs_file_fsync(zfs_file_t *filp, int flags)
+{
+	int datasync = 0;
+	int error;
+	int fstrans;
+
+	if (flags & O_DSYNC)
+		datasync = 1;
+
+	/*
+	 * May enter XFS which generates a warning when PF_FSTRANS is set.
+	 * To avoid this the flag is cleared over vfs_sync() and then reset.
+	 */
+	fstrans = __spl_pf_fstrans_check();
+	if (fstrans)
+		current->flags &= ~(__SPL_PF_FSTRANS);
+
+	error = -vfs_fsync(filp, datasync);
+
+	if (fstrans)
+		current->flags |= __SPL_PF_FSTRANS;
+
+	return (error);
+}
+
+/*
+ * fallocate - allocate or free space on disk
+ *
+ * fp - file pointer
+ * mode (non-standard options for hole punching etc)
+ * offset - offset to start allocating or freeing from
+ * len - length to free / allocate
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
+{
+	/*
+	 * May enter XFS which generates a warning when PF_FSTRANS is set.
+	 * To avoid this the flag is cleared over vfs_sync() and then reset.
+	 */
+	int fstrans = __spl_pf_fstrans_check();
+	if (fstrans)
+		current->flags &= ~(__SPL_PF_FSTRANS);
+
+	/*
+	 * When supported by the underlying file system preferentially
+	 * use the fallocate() callback to preallocate the space.
+	 */
+	int error = EOPNOTSUPP;
+	if (fp->f_op->fallocate)
+		error = fp->f_op->fallocate(fp, mode, offset, len);
+
+	if (fstrans)
+		current->flags |= __SPL_PF_FSTRANS;
+
+	return (error);
+}
+
+/*
+ * Request current file pointer offset
+ *
+ * fp - pointer to file
+ *
+ * Returns current file offset.
+ */
+loff_t
+zfs_file_off(zfs_file_t *fp)
+{
+	return (fp->f_pos);
+}
+
+/*
+ * Request file pointer private data
+ *
+ * fp - pointer to file
+ *
+ * Returns pointer to file private data.
+ */
+void *
+zfs_file_private(zfs_file_t *fp)
+{
+	return (fp->private_data);
+}
+
+/*
+ * unlink file
+ *
+ * path - fully qualified file path
+ *
+ * Returns 0 on success.
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_unlink(const char *path)
+{
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Get reference to file pointer
+ *
+ * fd - input file descriptor
+ *
+ * Returns pointer to file struct or NULL
+ */
+zfs_file_t *
+zfs_file_get(int fd)
+{
+	return (fget(fd));
+}
+
+/*
+ * Drop reference to file pointer
+ *
+ * fp - input file struct pointer
+ */
+void
+zfs_file_put(zfs_file_t *fp)
+{
+	fput(fp);
+}

diff --git a/zfs/module/os/linux/zfs/zfs_ioctl_os.c b/zfs/module/os/linux/zfs/zfs_ioctl_os.c
new file mode 100644
index 0000000..767d3a3
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_ioctl_os.c

@@ -0,0 +1,339 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Portions Copyright 2011 Martin Matuska
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
+ * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright 2017 RackTop Systems.
+ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2019 Datto Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zap.h>
+#include <sys/spa.h>
+#include <sys/nvpair.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zvol.h>
+#include <sys/fm/util.h>
+#include <sys/dsl_crypt.h>
+
+#include <sys/zfs_ioctl_impl.h>
+
+#include <sys/zfs_sysfs.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+
+boolean_t
+zfs_vfs_held(zfsvfs_t *zfsvfs)
+{
+	return (zfsvfs->z_sb != NULL);
+}
+
+int
+zfs_vfs_ref(zfsvfs_t **zfvp)
+{
+	if (*zfvp == NULL || (*zfvp)->z_sb == NULL ||
+	    !atomic_inc_not_zero(&((*zfvp)->z_sb->s_active))) {
+		return (SET_ERROR(ESRCH));
+	}
+	return (0);
+}
+
+void
+zfs_vfs_rele(zfsvfs_t *zfsvfs)
+{
+	deactivate_super(zfsvfs->z_sb);
+}
+
+static int
+zfsdev_state_init(struct file *filp)
+{
+	zfsdev_state_t *zs, *zsprev = NULL;
+	minor_t minor;
+	boolean_t newzs = B_FALSE;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+
+	minor = zfsdev_minor_alloc();
+	if (minor == 0)
+		return (SET_ERROR(ENXIO));
+
+	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+		if (zs->zs_minor == -1)
+			break;
+		zsprev = zs;
+	}
+
+	if (!zs) {
+		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+		newzs = B_TRUE;
+	}
+
+	filp->private_data = zs;
+
+	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
+	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
+
+	/*
+	 * In order to provide for lock-free concurrent read access
+	 * to the minor list in zfsdev_get_state_impl(), new entries
+	 * must be completely written before linking them into the
+	 * list whereas existing entries are already linked; the last
+	 * operation must be updating zs_minor (from -1 to the new
+	 * value).
+	 */
+	if (newzs) {
+		zs->zs_minor = minor;
+		smp_wmb();
+		zsprev->zs_next = zs;
+	} else {
+		smp_wmb();
+		zs->zs_minor = minor;
+	}
+
+	return (0);
+}
+
+static int
+zfsdev_state_destroy(struct file *filp)
+{
+	zfsdev_state_t *zs;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+	ASSERT(filp->private_data != NULL);
+
+	zs = filp->private_data;
+	zs->zs_minor = -1;
+	zfs_onexit_destroy(zs->zs_onexit);
+	zfs_zevent_destroy(zs->zs_zevent);
+	zs->zs_onexit = NULL;
+	zs->zs_zevent = NULL;
+
+	return (0);
+}
+
+static int
+zfsdev_open(struct inode *ino, struct file *filp)
+{
+	int error;
+
+	mutex_enter(&zfsdev_state_lock);
+	error = zfsdev_state_init(filp);
+	mutex_exit(&zfsdev_state_lock);
+
+	return (-error);
+}
+
+static int
+zfsdev_release(struct inode *ino, struct file *filp)
+{
+	int error;
+
+	mutex_enter(&zfsdev_state_lock);
+	error = zfsdev_state_destroy(filp);
+	mutex_exit(&zfsdev_state_lock);
+
+	return (-error);
+}
+
+static long
+zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
+{
+	uint_t vecnum;
+	zfs_cmd_t *zc;
+	int error, rc;
+
+	vecnum = cmd - ZFS_IOC_FIRST;
+
+	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+
+	if (ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), 0)) {
+		error = -SET_ERROR(EFAULT);
+		goto out;
+	}
+	error = -zfsdev_ioctl_common(vecnum, zc, 0);
+	rc = ddi_copyout(zc, (void *)(uintptr_t)arg, sizeof (zfs_cmd_t), 0);
+	if (error == 0 && rc != 0)
+		error = -SET_ERROR(EFAULT);
+out:
+	kmem_free(zc, sizeof (zfs_cmd_t));
+	return (error);
+
+}
+
+uint64_t
+zfs_max_nvlist_src_size_os(void)
+{
+	if (zfs_max_nvlist_src_size != 0)
+		return (zfs_max_nvlist_src_size);
+
+	return (MIN(ptob(zfs_totalram_pages) / 4, 128 * 1024 * 1024));
+}
+
+/* Update the VFS's cache of mountpoint properties */
+void
+zfs_ioctl_update_mount_cache(const char *dsname)
+{
+}
+
+void
+zfs_ioctl_init_os(void)
+{
+}
+
+#ifdef CONFIG_COMPAT
+static long
+zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
+{
+	return (zfsdev_ioctl(filp, cmd, arg));
+}
+#else
+#define	zfsdev_compat_ioctl	NULL
+#endif
+
+static const struct file_operations zfsdev_fops = {
+	.open		= zfsdev_open,
+	.release	= zfsdev_release,
+	.unlocked_ioctl	= zfsdev_ioctl,
+	.compat_ioctl	= zfsdev_compat_ioctl,
+	.owner		= THIS_MODULE,
+};
+
+static struct miscdevice zfs_misc = {
+	.minor		= ZFS_DEVICE_MINOR,
+	.name		= ZFS_DRIVER,
+	.fops		= &zfsdev_fops,
+};
+
+MODULE_ALIAS_MISCDEV(ZFS_DEVICE_MINOR);
+MODULE_ALIAS("devname:zfs");
+
+int
+zfsdev_attach(void)
+{
+	int error;
+
+	error = misc_register(&zfs_misc);
+	if (error == -EBUSY) {
+		/*
+		 * Fallback to dynamic minor allocation in the event of a
+		 * collision with a reserved minor in linux/miscdevice.h.
+		 * In this case the kernel modules must be manually loaded.
+		 */
+		printk(KERN_INFO "ZFS: misc_register() with static minor %d "
+		    "failed %d, retrying with MISC_DYNAMIC_MINOR\n",
+		    ZFS_DEVICE_MINOR, error);
+
+		zfs_misc.minor = MISC_DYNAMIC_MINOR;
+		error = misc_register(&zfs_misc);
+	}
+
+	if (error)
+		printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
+
+	return (error);
+}
+
+void
+zfsdev_detach(void)
+{
+	misc_deregister(&zfs_misc);
+}
+
+#ifdef ZFS_DEBUG
+#define	ZFS_DEBUG_STR	" (DEBUG mode)"
+#else
+#define	ZFS_DEBUG_STR	""
+#endif
+
+zidmap_t *zfs_init_idmap;
+
+static int __init
+openzfs_init(void)
+{
+	int error;
+
+	if ((error = zfs_kmod_init()) != 0) {
+		printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
+		    ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
+		    ZFS_DEBUG_STR, error);
+
+		return (-error);
+	}
+
+	zfs_sysfs_init();
+
+	printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
+	    "ZFS pool version %s, ZFS filesystem version %s\n",
+	    ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
+	    SPA_VERSION_STRING, ZPL_VERSION_STRING);
+#ifndef CONFIG_FS_POSIX_ACL
+	printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
+#endif /* CONFIG_FS_POSIX_ACL */
+
+	zfs_init_idmap = (zidmap_t *)zfs_get_init_idmap();
+
+	return (0);
+}
+
+static void __exit
+openzfs_fini(void)
+{
+	zfs_sysfs_fini();
+	zfs_kmod_fini();
+
+	printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
+	    ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
+}
+
+#if defined(_KERNEL)
+module_init(openzfs_init);
+module_exit(openzfs_fini);
+#endif
+
+ZFS_MODULE_DESCRIPTION("ZFS");
+ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
+ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);

diff --git a/zfs/module/os/linux/zfs/zfs_racct.c b/zfs/module/os/linux/zfs/zfs_racct.c
new file mode 100644
index 0000000..ce623ef
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_racct.c

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/zfs_racct.h>
+
+void
+zfs_racct_read(uint64_t size, uint64_t iops)
+{
+	(void) size, (void) iops;
+}
+
+void
+zfs_racct_write(uint64_t size, uint64_t iops)
+{
+	(void) size, (void) iops;
+}

diff --git a/zfs/module/os/linux/zfs/zfs_sysfs.c b/zfs/module/os/linux/zfs/zfs_sysfs.c
new file mode 100644
index 0000000..e73b34a
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_sysfs.c

@@ -0,0 +1,667 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/zfeature.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_sysfs.h>
+#include <sys/kmem.h>
+#include <sys/fs/zfs.h>
+#include <linux/kobject.h>
+
+#include "zfs_prop.h"
+
+#if !defined(_KERNEL)
+#error kernel builds only
+#endif
+
+/*
+ * ZFS Module sysfs support
+ *
+ * This extends our sysfs '/sys/module/zfs' entry to include feature
+ * and property attributes. The primary consumer of this information
+ * is user processes, like the zfs CLI, that need to know what the
+ * current loaded ZFS module supports. The libzfs binary will consult
+ * this information when instantiating the zfs|zpool property tables
+ * and the pool features table.
+ *
+ * The added top-level directories are:
+ * /sys/module/zfs
+ *		├── features.kernel
+ *		├── features.pool
+ *		├── properties.dataset
+ *		└── properties.pool
+ *
+ * The local interface for the zfs kobjects includes:
+ *	zfs_kobj_init()
+ *	zfs_kobj_add()
+ *	zfs_kobj_release()
+ *	zfs_kobj_add_attr()
+ *	zfs_kobj_fini()
+ */
+
+/*
+ * A zfs_mod_kobj_t represents a zfs kobject under '/sys/module/zfs'
+ */
+typedef struct zfs_mod_kobj zfs_mod_kobj_t;
+struct zfs_mod_kobj {
+	struct kobject		zko_kobj;
+	struct kobj_type	zko_kobj_type;
+	struct sysfs_ops	zko_sysfs_ops;
+	size_t			zko_attr_count;
+	struct attribute	*zko_attr_list;		/* allocated */
+	struct attribute_group	zko_default_group;	/* .attrs allocated */
+	const struct attribute_group	*zko_default_groups[2];
+	size_t			zko_child_count;
+	zfs_mod_kobj_t		*zko_children;		/* allocated */
+};
+
+#define	ATTR_TABLE_SIZE(cnt)	(sizeof (struct attribute) * (cnt))
+/* Note +1 for NULL terminator slot */
+#define	DEFAULT_ATTR_SIZE(cnt)	(sizeof (struct attribute *) * (cnt + 1))
+#define	CHILD_TABLE_SIZE(cnt)	(sizeof (zfs_mod_kobj_t) * (cnt))
+
+/*
+ * These are the top-level kobjects under '/sys/module/zfs/'
+ */
+static zfs_mod_kobj_t kernel_features_kobj;
+static zfs_mod_kobj_t pool_features_kobj;
+static zfs_mod_kobj_t dataset_props_kobj;
+static zfs_mod_kobj_t pool_props_kobj;
+
+/*
+ * The show function is used to provide the content
+ * of an attribute into a PAGE_SIZE buffer.
+ */
+typedef ssize_t	(*sysfs_show_func)(struct kobject *, struct attribute *,
+    char *);
+
+static void
+zfs_kobj_fini(zfs_mod_kobj_t *zkobj)
+{
+	/* finalize any child kobjects */
+	if (zkobj->zko_child_count != 0) {
+		ASSERT(zkobj->zko_children);
+		for (int i = 0; i < zkobj->zko_child_count; i++)
+			zfs_kobj_fini(&zkobj->zko_children[i]);
+	}
+
+	/* kobject_put() will call zfs_kobj_release() to release memory */
+	kobject_del(&zkobj->zko_kobj);
+	kobject_put(&zkobj->zko_kobj);
+}
+
+static void
+zfs_kobj_release(struct kobject *kobj)
+{
+	zfs_mod_kobj_t *zkobj = container_of(kobj, zfs_mod_kobj_t, zko_kobj);
+
+	if (zkobj->zko_attr_list != NULL) {
+		ASSERT3S(zkobj->zko_attr_count, !=, 0);
+		kmem_free(zkobj->zko_attr_list,
+		    ATTR_TABLE_SIZE(zkobj->zko_attr_count));
+		zkobj->zko_attr_list = NULL;
+	}
+
+	if (zkobj->zko_default_group.attrs != NULL) {
+		kmem_free(zkobj->zko_default_group.attrs,
+		    DEFAULT_ATTR_SIZE(zkobj->zko_attr_count));
+		zkobj->zko_default_group.attrs = NULL;
+	}
+
+	if (zkobj->zko_child_count != 0) {
+		ASSERT(zkobj->zko_children);
+
+		kmem_free(zkobj->zko_children,
+		    CHILD_TABLE_SIZE(zkobj->zko_child_count));
+		zkobj->zko_child_count = 0;
+		zkobj->zko_children = NULL;
+	}
+
+	zkobj->zko_attr_count = 0;
+}
+
+#ifndef sysfs_attr_init
+#define	sysfs_attr_init(attr) do {} while (0)
+#endif
+
+static void
+zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name)
+{
+	VERIFY3U(attr_num, <, zkobj->zko_attr_count);
+	ASSERT(zkobj->zko_attr_list);
+	ASSERT(zkobj->zko_default_group.attrs);
+
+	zkobj->zko_attr_list[attr_num].name = attr_name;
+	zkobj->zko_attr_list[attr_num].mode = 0444;
+	zkobj->zko_default_group.attrs[attr_num] =
+	    &zkobj->zko_attr_list[attr_num];
+	sysfs_attr_init(&zkobj->zko_attr_list[attr_num]);
+}
+
+static int
+zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
+    sysfs_show_func show_func)
+{
+	/*
+	 * Initialize object's attributes. Count can be zero.
+	 */
+	if (attr_cnt > 0) {
+		zkobj->zko_attr_list = kmem_zalloc(ATTR_TABLE_SIZE(attr_cnt),
+		    KM_SLEEP);
+		if (zkobj->zko_attr_list == NULL)
+			return (ENOMEM);
+	}
+	/* this will always have at least one slot for NULL termination */
+	zkobj->zko_default_group.attrs =
+	    kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt), KM_SLEEP);
+	if (zkobj->zko_default_group.attrs == NULL) {
+		if (zkobj->zko_attr_list != NULL) {
+			kmem_free(zkobj->zko_attr_list,
+			    ATTR_TABLE_SIZE(attr_cnt));
+		}
+		return (ENOMEM);
+	}
+	zkobj->zko_attr_count = attr_cnt;
+	zkobj->zko_default_groups[0] = &zkobj->zko_default_group;
+#ifdef HAVE_SYSFS_DEFAULT_GROUPS
+	zkobj->zko_kobj_type.default_groups = zkobj->zko_default_groups;
+#else
+	zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_group.attrs;
+#endif
+
+	if (child_cnt > 0) {
+		zkobj->zko_children = kmem_zalloc(CHILD_TABLE_SIZE(child_cnt),
+		    KM_SLEEP);
+		if (zkobj->zko_children == NULL) {
+			if (zkobj->zko_default_group.attrs != NULL) {
+				kmem_free(zkobj->zko_default_group.attrs,
+				    DEFAULT_ATTR_SIZE(attr_cnt));
+			}
+			if (zkobj->zko_attr_list != NULL) {
+				kmem_free(zkobj->zko_attr_list,
+				    ATTR_TABLE_SIZE(attr_cnt));
+			}
+			return (ENOMEM);
+		}
+		zkobj->zko_child_count = child_cnt;
+	}
+
+	zkobj->zko_sysfs_ops.show = show_func;
+	zkobj->zko_kobj_type.sysfs_ops = &zkobj->zko_sysfs_ops;
+	zkobj->zko_kobj_type.release = zfs_kobj_release;
+
+	return (0);
+}
+
+static int
+zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name)
+{
+	/* zko_default_group.attrs must be NULL terminated */
+	ASSERT(zkobj->zko_default_group.attrs != NULL);
+	ASSERT(zkobj->zko_default_group.attrs[zkobj->zko_attr_count] == NULL);
+
+	kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type);
+	return (kobject_add(&zkobj->zko_kobj, parent, name));
+}
+
+/*
+ * Each zfs property has these common attributes
+ */
+static const char *zprop_attrs[]  = {
+	"type",
+	"readonly",
+	"setonce",
+	"visible",
+	"values",
+	"default",
+	"datasets"	/* zfs properties only */
+};
+
+#define	ZFS_PROP_ATTR_COUNT	ARRAY_SIZE(zprop_attrs)
+#define	ZPOOL_PROP_ATTR_COUNT	(ZFS_PROP_ATTR_COUNT - 1)
+
+static const char *zprop_types[]  = {
+	"number",
+	"string",
+	"index",
+};
+
+typedef struct zfs_type_map {
+	zfs_type_t	ztm_type;
+	const char	*ztm_name;
+} zfs_type_map_t;
+
+static zfs_type_map_t type_map[] = {
+	{ZFS_TYPE_FILESYSTEM,	"filesystem"},
+	{ZFS_TYPE_SNAPSHOT,	"snapshot"},
+	{ZFS_TYPE_VOLUME,	"volume"},
+	{ZFS_TYPE_BOOKMARK,	"bookmark"}
+};
+
+/*
+ * Show the content for a zfs property attribute
+ */
+static ssize_t
+zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property,
+    char *buf, size_t buflen)
+{
+	const char *show_str;
+	char number[32];
+
+	/* For dataset properties list the dataset types that apply */
+	if (strcmp(attr_name, "datasets") == 0 &&
+	    property->pd_types != ZFS_TYPE_POOL) {
+		int len = 0;
+
+		for (int i = 0; i < ARRAY_SIZE(type_map); i++) {
+			if (type_map[i].ztm_type & property->pd_types)  {
+				len += snprintf(buf + len, buflen - len, "%s ",
+				    type_map[i].ztm_name);
+			}
+		}
+		len += snprintf(buf + len, buflen - len, "\n");
+		return (len);
+	}
+
+	if (strcmp(attr_name, "type") == 0) {
+		show_str = zprop_types[property->pd_proptype];
+	} else if (strcmp(attr_name, "readonly") == 0) {
+		show_str = property->pd_attr == PROP_READONLY ? "1" : "0";
+	} else if (strcmp(attr_name, "setonce") == 0) {
+		show_str = property->pd_attr == PROP_ONETIME ? "1" : "0";
+	} else if (strcmp(attr_name, "visible") == 0) {
+		show_str = property->pd_visible ? "1" : "0";
+	} else if (strcmp(attr_name, "values") == 0) {
+		show_str = property->pd_values ? property->pd_values : "";
+	} else if (strcmp(attr_name, "default") == 0) {
+		switch (property->pd_proptype) {
+		case PROP_TYPE_NUMBER:
+			(void) snprintf(number, sizeof (number), "%llu",
+			    (u_longlong_t)property->pd_numdefault);
+			show_str = number;
+			break;
+		case PROP_TYPE_STRING:
+			show_str = property->pd_strdefault ?
+			    property->pd_strdefault : "";
+			break;
+		case PROP_TYPE_INDEX:
+			if (zprop_index_to_string(property->pd_propnum,
+			    property->pd_numdefault, &show_str,
+			    property->pd_types) != 0) {
+				show_str = "";
+			}
+			break;
+		default:
+			return (0);
+		}
+	} else {
+		return (0);
+	}
+
+	return (snprintf(buf, buflen, "%s\n", show_str));
+}
+
+static ssize_t
+dataset_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	zfs_prop_t prop = zfs_name_to_prop(kobject_name(kobj));
+	zprop_desc_t *prop_tbl = zfs_prop_get_table();
+	ssize_t len;
+
+	ASSERT3U(prop, <, ZFS_NUM_PROPS);
+
+	len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
+
+	return (len);
+}
+
+static ssize_t
+pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	zpool_prop_t prop = zpool_name_to_prop(kobject_name(kobj));
+	zprop_desc_t *prop_tbl = zpool_prop_get_table();
+	ssize_t len;
+
+	ASSERT3U(prop, <, ZPOOL_NUM_PROPS);
+
+	len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
+
+	return (len);
+}
+
+/*
+ * ZFS kernel feature attributes for '/sys/module/zfs/features.kernel'
+ *
+ * This list is intended for kernel features that don't have a pool feature
+ * association or that extend existing user kernel interfaces.
+ *
+ * A user process can easily check if the running zfs kernel module
+ * supports the new feature.
+ */
+static const char *zfs_kernel_features[] = {
+	/* --> Add new kernel features here */
+	"com.delphix:vdev_initialize",
+	"org.zfsonlinux:vdev_trim",
+	"org.openzfs:l2arc_persistent",
+};
+
+#define	KERNEL_FEATURE_COUNT	ARRAY_SIZE(zfs_kernel_features)
+
+static ssize_t
+kernel_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	if (strcmp(attr->name, "supported") == 0)
+		return (snprintf(buf, PAGE_SIZE, "yes\n"));
+	return (0);
+}
+
+static void
+kernel_feature_to_kobj(zfs_mod_kobj_t *parent, int slot, const char *name)
+{
+	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[slot];
+
+	ASSERT3U(slot, <, KERNEL_FEATURE_COUNT);
+	ASSERT(name);
+
+	int err = zfs_kobj_init(zfs_kobj, 1, 0, kernel_feature_show);
+	if (err)
+		return;
+
+	zfs_kobj_add_attr(zfs_kobj, 0, "supported");
+
+	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
+	if (err)
+		zfs_kobj_release(&zfs_kobj->zko_kobj);
+}
+
+static int
+zfs_kernel_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
+{
+	/*
+	 * Create a parent kobject to host kernel features.
+	 *
+	 * '/sys/module/zfs/features.kernel'
+	 */
+	int err = zfs_kobj_init(zfs_kobj, 0, KERNEL_FEATURE_COUNT,
+	    kernel_feature_show);
+	if (err)
+		return (err);
+	err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_KERNEL_FEATURES);
+	if (err) {
+		zfs_kobj_release(&zfs_kobj->zko_kobj);
+		return (err);
+	}
+
+	/*
+	 * Now create a kobject for each feature.
+	 *
+	 * '/sys/module/zfs/features.kernel/<feature>'
+	 */
+	for (int f = 0; f < KERNEL_FEATURE_COUNT; f++)
+		kernel_feature_to_kobj(zfs_kobj, f, zfs_kernel_features[f]);
+
+	return (0);
+}
+
+/*
+ * Each pool feature has these common attributes
+ */
+static const char *pool_feature_attrs[]  = {
+	"description",
+	"guid",
+	"uname",
+	"readonly_compatible",
+	"required_for_mos",
+	"activate_on_enable",
+	"per_dataset"
+};
+
+#define	ZPOOL_FEATURE_ATTR_COUNT	ARRAY_SIZE(pool_feature_attrs)
+
+/*
+ * Show the content for the given zfs pool feature attribute
+ */
+static ssize_t
+pool_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	spa_feature_t fid;
+
+	if (zfeature_lookup_guid(kobject_name(kobj), &fid) != 0)
+		return (0);
+
+	ASSERT3U(fid, <, SPA_FEATURES);
+
+	zfeature_flags_t flags = spa_feature_table[fid].fi_flags;
+	const char *show_str = NULL;
+
+	if (strcmp(attr->name, "description") == 0) {
+		show_str = spa_feature_table[fid].fi_desc;
+	} else if (strcmp(attr->name, "guid") == 0) {
+		show_str = spa_feature_table[fid].fi_guid;
+	} else if (strcmp(attr->name, "uname") == 0) {
+		show_str = spa_feature_table[fid].fi_uname;
+	} else if (strcmp(attr->name, "readonly_compatible") == 0) {
+		show_str = flags & ZFEATURE_FLAG_READONLY_COMPAT ? "1" : "0";
+	} else if (strcmp(attr->name, "required_for_mos") == 0) {
+		show_str = flags & ZFEATURE_FLAG_MOS ? "1" : "0";
+	} else if (strcmp(attr->name, "activate_on_enable") == 0) {
+		show_str = flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE ? "1" : "0";
+	} else if (strcmp(attr->name, "per_dataset") == 0) {
+		show_str = flags & ZFEATURE_FLAG_PER_DATASET ? "1" : "0";
+	}
+	if (show_str == NULL)
+		return (0);
+
+	return (snprintf(buf, PAGE_SIZE, "%s\n", show_str));
+}
+
+static void
+pool_feature_to_kobj(zfs_mod_kobj_t *parent, spa_feature_t fid,
+    const char *name)
+{
+	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[fid];
+
+	ASSERT3U(fid, <, SPA_FEATURES);
+	ASSERT(name);
+
+	int err = zfs_kobj_init(zfs_kobj, ZPOOL_FEATURE_ATTR_COUNT, 0,
+	    pool_feature_show);
+	if (err)
+		return;
+
+	for (int i = 0; i < ZPOOL_FEATURE_ATTR_COUNT; i++)
+		zfs_kobj_add_attr(zfs_kobj, i, pool_feature_attrs[i]);
+
+	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
+	if (err)
+		zfs_kobj_release(&zfs_kobj->zko_kobj);
+}
+
+static int
+zfs_pool_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
+{
+	/*
+	 * Create a parent kobject to host pool features.
+	 *
+	 * '/sys/module/zfs/features.pool'
+	 */
+	int err = zfs_kobj_init(zfs_kobj, 0, SPA_FEATURES, pool_feature_show);
+	if (err)
+		return (err);
+	err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_POOL_FEATURES);
+	if (err) {
+		zfs_kobj_release(&zfs_kobj->zko_kobj);
+		return (err);
+	}
+
+	/*
+	 * Now create a kobject for each feature.
+	 *
+	 * '/sys/module/zfs/features.pool/<feature>'
+	 */
+	for (spa_feature_t i = 0; i < SPA_FEATURES; i++)
+		pool_feature_to_kobj(zfs_kobj, i, spa_feature_table[i].fi_guid);
+
+	return (0);
+}
+
+typedef struct prop_to_kobj_arg {
+	zprop_desc_t	*p2k_table;
+	zfs_mod_kobj_t	*p2k_parent;
+	sysfs_show_func	p2k_show_func;
+	int		p2k_attr_count;
+} prop_to_kobj_arg_t;
+
+static int
+zprop_to_kobj(int prop, void *args)
+{
+	prop_to_kobj_arg_t *data = args;
+	zfs_mod_kobj_t *parent = data->p2k_parent;
+	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[prop];
+	const char *name = data->p2k_table[prop].pd_name;
+	int err;
+
+	ASSERT(name);
+
+	err = zfs_kobj_init(zfs_kobj, data->p2k_attr_count, 0,
+	    data->p2k_show_func);
+	if (err)
+		return (ZPROP_CONT);
+
+	for (int i = 0; i < data->p2k_attr_count; i++)
+		zfs_kobj_add_attr(zfs_kobj, i, zprop_attrs[i]);
+
+	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
+	if (err)
+		zfs_kobj_release(&zfs_kobj->zko_kobj);
+
+	return (ZPROP_CONT);
+}
+
+static int
+zfs_sysfs_properties_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent,
+    zfs_type_t type)
+{
+	prop_to_kobj_arg_t context;
+	const char *name;
+	int err;
+
+	/*
+	 * Create a parent kobject to host properties.
+	 *
+	 * '/sys/module/zfs/properties.<type>'
+	 */
+	if (type == ZFS_TYPE_POOL) {
+		name = ZFS_SYSFS_POOL_PROPERTIES;
+		context.p2k_table = zpool_prop_get_table();
+		context.p2k_attr_count = ZPOOL_PROP_ATTR_COUNT;
+		context.p2k_parent = zfs_kobj;
+		context.p2k_show_func = pool_property_show;
+		err = zfs_kobj_init(zfs_kobj, 0, ZPOOL_NUM_PROPS,
+		    pool_property_show);
+	} else {
+		name = ZFS_SYSFS_DATASET_PROPERTIES;
+		context.p2k_table = zfs_prop_get_table();
+		context.p2k_attr_count = ZFS_PROP_ATTR_COUNT;
+		context.p2k_parent = zfs_kobj;
+		context.p2k_show_func = dataset_property_show;
+		err = zfs_kobj_init(zfs_kobj, 0, ZFS_NUM_PROPS,
+		    dataset_property_show);
+	}
+
+	if (err)
+		return (err);
+
+	err = zfs_kobj_add(zfs_kobj, parent, name);
+	if (err) {
+		zfs_kobj_release(&zfs_kobj->zko_kobj);
+		return (err);
+	}
+
+	/*
+	 * Create a kobject for each property.
+	 *
+	 * '/sys/module/zfs/properties.<type>/<property>'
+	 */
+	(void) zprop_iter_common(zprop_to_kobj, &context, B_TRUE,
+	    B_FALSE, type);
+
+	return (err);
+}
+
+void
+zfs_sysfs_init(void)
+{
+	struct kobject *parent;
+#if defined(CONFIG_ZFS) && !defined(CONFIG_ZFS_MODULE)
+	parent = kobject_create_and_add("zfs", fs_kobj);
+#else
+	parent = &(((struct module *)(THIS_MODULE))->mkobj).kobj;
+#endif
+	int err;
+
+	if (parent == NULL)
+		return;
+
+	err = zfs_kernel_features_init(&kernel_features_kobj, parent);
+	if (err)
+		return;
+
+	err = zfs_pool_features_init(&pool_features_kobj, parent);
+	if (err) {
+		zfs_kobj_fini(&kernel_features_kobj);
+		return;
+	}
+
+	err = zfs_sysfs_properties_init(&pool_props_kobj, parent,
+	    ZFS_TYPE_POOL);
+	if (err) {
+		zfs_kobj_fini(&kernel_features_kobj);
+		zfs_kobj_fini(&pool_features_kobj);
+		return;
+	}
+
+	err = zfs_sysfs_properties_init(&dataset_props_kobj, parent,
+	    ZFS_TYPE_FILESYSTEM);
+	if (err) {
+		zfs_kobj_fini(&kernel_features_kobj);
+		zfs_kobj_fini(&pool_features_kobj);
+		zfs_kobj_fini(&pool_props_kobj);
+		return;
+	}
+}
+
+void
+zfs_sysfs_fini(void)
+{
+	/*
+	 * Remove top-level kobjects; each will remove any children kobjects
+	 */
+	zfs_kobj_fini(&kernel_features_kobj);
+	zfs_kobj_fini(&pool_features_kobj);
+	zfs_kobj_fini(&dataset_props_kobj);
+	zfs_kobj_fini(&pool_props_kobj);
+}

diff --git a/zfs/module/os/linux/zfs/zfs_uio.c b/zfs/module/os/linux/zfs/zfs_uio.c
new file mode 100644
index 0000000..11cd62f
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_uio.c

@@ -0,0 +1,330 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+/*
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ */
+
+#ifdef _KERNEL
+
+#include <sys/types.h>
+#include <sys/uio_impl.h>
+#include <sys/sysmacros.h>
+#include <sys/strings.h>
+#include <linux/kmap_compat.h>
+#include <linux/uaccess.h>
+
+/*
+ * Move "n" bytes at byte address "p"; "rw" indicates the direction
+ * of the move, and the I/O parameters are provided in "uio", which is
+ * update to reflect the data which was moved.  Returns 0 on success or
+ * a non-zero errno on failure.
+ */
+static int
+zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+	const struct iovec *iov = uio->uio_iov;
+	size_t skip = uio->uio_skip;
+	ulong_t cnt;
+
+	while (n && uio->uio_resid) {
+		cnt = MIN(iov->iov_len - skip, n);
+		switch (uio->uio_segflg) {
+		case UIO_USERSPACE:
+			/*
+			 * p = kernel data pointer
+			 * iov->iov_base = user data pointer
+			 */
+			if (rw == UIO_READ) {
+				if (copy_to_user(iov->iov_base+skip, p, cnt))
+					return (EFAULT);
+			} else {
+				unsigned long b_left = 0;
+				if (uio->uio_fault_disable) {
+					if (!zfs_access_ok(VERIFY_READ,
+					    (iov->iov_base + skip), cnt)) {
+						return (EFAULT);
+					}
+					pagefault_disable();
+					b_left =
+					    __copy_from_user_inatomic(p,
+					    (iov->iov_base + skip), cnt);
+					pagefault_enable();
+				} else {
+					b_left =
+					    copy_from_user(p,
+					    (iov->iov_base + skip), cnt);
+				}
+				if (b_left > 0) {
+					unsigned long c_bytes =
+					    cnt - b_left;
+					uio->uio_skip += c_bytes;
+					ASSERT3U(uio->uio_skip, <,
+					    iov->iov_len);
+					uio->uio_resid -= c_bytes;
+					uio->uio_loffset += c_bytes;
+					return (EFAULT);
+				}
+			}
+			break;
+		case UIO_SYSSPACE:
+			if (rw == UIO_READ)
+				bcopy(p, iov->iov_base + skip, cnt);
+			else
+				bcopy(iov->iov_base + skip, p, cnt);
+			break;
+		default:
+			ASSERT(0);
+		}
+		skip += cnt;
+		if (skip == iov->iov_len) {
+			skip = 0;
+			uio->uio_iov = (++iov);
+			uio->uio_iovcnt--;
+		}
+		uio->uio_skip = skip;
+		uio->uio_resid -= cnt;
+		uio->uio_loffset += cnt;
+		p = (caddr_t)p + cnt;
+		n -= cnt;
+	}
+	return (0);
+}
+
+static int
+zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+	const struct bio_vec *bv = uio->uio_bvec;
+	size_t skip = uio->uio_skip;
+	ulong_t cnt;
+
+	while (n && uio->uio_resid) {
+		void *paddr;
+		cnt = MIN(bv->bv_len - skip, n);
+
+		paddr = zfs_kmap_atomic(bv->bv_page);
+		if (rw == UIO_READ)
+			bcopy(p, paddr + bv->bv_offset + skip, cnt);
+		else
+			bcopy(paddr + bv->bv_offset + skip, p, cnt);
+		zfs_kunmap_atomic(paddr);
+
+		skip += cnt;
+		if (skip == bv->bv_len) {
+			skip = 0;
+			uio->uio_bvec = (++bv);
+			uio->uio_iovcnt--;
+		}
+		uio->uio_skip = skip;
+		uio->uio_resid -= cnt;
+		uio->uio_loffset += cnt;
+		p = (caddr_t)p + cnt;
+		n -= cnt;
+	}
+	return (0);
+}
+
+#if defined(HAVE_VFS_IOV_ITER)
+static int
+zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
+    boolean_t revert)
+{
+	size_t cnt = MIN(n, uio->uio_resid);
+
+	if (uio->uio_skip)
+		iov_iter_advance(uio->uio_iter, uio->uio_skip);
+
+	if (rw == UIO_READ)
+		cnt = copy_to_iter(p, cnt, uio->uio_iter);
+	else
+		cnt = copy_from_iter(p, cnt, uio->uio_iter);
+
+	/*
+	 * When operating on a full pipe no bytes are processed.
+	 * In which case return EFAULT which is converted to EAGAIN
+	 * by the kernel's generic_file_splice_read() function.
+	 */
+	if (cnt == 0)
+		return (EFAULT);
+
+	/*
+	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
+	 * to avoid consuming the uio and its iov_iter structure.
+	 */
+	if (revert)
+		iov_iter_revert(uio->uio_iter, cnt);
+
+	uio->uio_resid -= cnt;
+	uio->uio_loffset += cnt;
+
+	return (0);
+}
+#endif
+
+int
+zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+	if (uio->uio_segflg == UIO_BVEC)
+		return (zfs_uiomove_bvec(p, n, rw, uio));
+#if defined(HAVE_VFS_IOV_ITER)
+	else if (uio->uio_segflg == UIO_ITER)
+		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
+#endif
+	else
+		return (zfs_uiomove_iov(p, n, rw, uio));
+}
+EXPORT_SYMBOL(zfs_uiomove);
+
+/*
+ * Fault in the pages of the first n bytes specified by the uio structure.
+ * 1 byte in each page is touched and the uio struct is unmodified. Any
+ * error will terminate the process as this is only a best attempt to get
+ * the pages resident.
+ */
+int
+zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
+{
+	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
+		/* There's never a need to fault in kernel pages */
+		return (0);
+#if defined(HAVE_VFS_IOV_ITER)
+	} else if (uio->uio_segflg == UIO_ITER) {
+		/*
+		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
+		 * can be relied on to fault in user pages when referenced.
+		 */
+		if (iov_iter_fault_in_readable(uio->uio_iter, n))
+			return (EFAULT);
+#endif
+	} else {
+		/* Fault in all user pages */
+		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
+		const struct iovec *iov = uio->uio_iov;
+		int iovcnt = uio->uio_iovcnt;
+		size_t skip = uio->uio_skip;
+		uint8_t tmp;
+		caddr_t p;
+
+		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
+			ulong_t cnt = MIN(iov->iov_len - skip, n);
+			/* empty iov */
+			if (cnt == 0)
+				continue;
+			n -= cnt;
+			/* touch each page in this segment. */
+			p = iov->iov_base + skip;
+			while (cnt) {
+				if (copy_from_user(&tmp, p, 1))
+					return (EFAULT);
+				ulong_t incr = MIN(cnt, PAGESIZE);
+				p += incr;
+				cnt -= incr;
+			}
+			/* touch the last byte in case it straddles a page. */
+			p--;
+			if (copy_from_user(&tmp, p, 1))
+				return (EFAULT);
+		}
+	}
+
+	return (0);
+}
+EXPORT_SYMBOL(zfs_uio_prefaultpages);
+
+/*
+ * The same as zfs_uiomove() but doesn't modify uio structure.
+ * return in cbytes how many bytes were copied.
+ */
+int
+zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
+{
+	zfs_uio_t uio_copy;
+	int ret;
+
+	bcopy(uio, &uio_copy, sizeof (zfs_uio_t));
+
+	if (uio->uio_segflg == UIO_BVEC)
+		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
+#if defined(HAVE_VFS_IOV_ITER)
+	else if (uio->uio_segflg == UIO_ITER)
+		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
+#endif
+	else
+		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
+
+	*cbytes = uio->uio_resid - uio_copy.uio_resid;
+
+	return (ret);
+}
+EXPORT_SYMBOL(zfs_uiocopy);
+
+/*
+ * Drop the next n chars out of *uio.
+ */
+void
+zfs_uioskip(zfs_uio_t *uio, size_t n)
+{
+	if (n > uio->uio_resid)
+		return;
+
+	if (uio->uio_segflg == UIO_BVEC) {
+		uio->uio_skip += n;
+		while (uio->uio_iovcnt &&
+		    uio->uio_skip >= uio->uio_bvec->bv_len) {
+			uio->uio_skip -= uio->uio_bvec->bv_len;
+			uio->uio_bvec++;
+			uio->uio_iovcnt--;
+		}
+#if defined(HAVE_VFS_IOV_ITER)
+	} else if (uio->uio_segflg == UIO_ITER) {
+		iov_iter_advance(uio->uio_iter, n);
+#endif
+	} else {
+		uio->uio_skip += n;
+		while (uio->uio_iovcnt &&
+		    uio->uio_skip >= uio->uio_iov->iov_len) {
+			uio->uio_skip -= uio->uio_iov->iov_len;
+			uio->uio_iov++;
+			uio->uio_iovcnt--;
+		}
+	}
+	uio->uio_loffset += n;
+	uio->uio_resid -= n;
+}
+EXPORT_SYMBOL(zfs_uioskip);
+
+#endif /* _KERNEL */

diff --git a/zfs/module/os/linux/zfs/zfs_vfsops.c b/zfs/module/os/linux/zfs/zfs_vfsops.c
new file mode 100644
index 0000000..e620eb4
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_vfsops.c

@@ -0,0 +1,2185 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ */
+
+/* Portions Copyright 2010 Robert Milkowski */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/mntent.h>
+#include <sys/cmn_err.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_dir.h>
+#include <sys/zil.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_deleg.h>
+#include <sys/spa.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
+#include <sys/policy.h>
+#include <sys/atomic.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_quota.h>
+#include <sys/sunddi.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+#include <sys/spa_boot.h>
+#include <sys/objlist.h>
+#include <sys/zpl.h>
+#include <linux/vfs_compat.h>
+#include "zfs_comutil.h"
+
+enum {
+	TOKEN_RO,
+	TOKEN_RW,
+	TOKEN_SETUID,
+	TOKEN_NOSETUID,
+	TOKEN_EXEC,
+	TOKEN_NOEXEC,
+	TOKEN_DEVICES,
+	TOKEN_NODEVICES,
+	TOKEN_DIRXATTR,
+	TOKEN_SAXATTR,
+	TOKEN_XATTR,
+	TOKEN_NOXATTR,
+	TOKEN_ATIME,
+	TOKEN_NOATIME,
+	TOKEN_RELATIME,
+	TOKEN_NORELATIME,
+	TOKEN_NBMAND,
+	TOKEN_NONBMAND,
+	TOKEN_MNTPOINT,
+	TOKEN_LAST,
+};
+
+static const match_table_t zpl_tokens = {
+	{ TOKEN_RO,		MNTOPT_RO },
+	{ TOKEN_RW,		MNTOPT_RW },
+	{ TOKEN_SETUID,		MNTOPT_SETUID },
+	{ TOKEN_NOSETUID,	MNTOPT_NOSETUID },
+	{ TOKEN_EXEC,		MNTOPT_EXEC },
+	{ TOKEN_NOEXEC,		MNTOPT_NOEXEC },
+	{ TOKEN_DEVICES,	MNTOPT_DEVICES },
+	{ TOKEN_NODEVICES,	MNTOPT_NODEVICES },
+	{ TOKEN_DIRXATTR,	MNTOPT_DIRXATTR },
+	{ TOKEN_SAXATTR,	MNTOPT_SAXATTR },
+	{ TOKEN_XATTR,		MNTOPT_XATTR },
+	{ TOKEN_NOXATTR,	MNTOPT_NOXATTR },
+	{ TOKEN_ATIME,		MNTOPT_ATIME },
+	{ TOKEN_NOATIME,	MNTOPT_NOATIME },
+	{ TOKEN_RELATIME,	MNTOPT_RELATIME },
+	{ TOKEN_NORELATIME,	MNTOPT_NORELATIME },
+	{ TOKEN_NBMAND,		MNTOPT_NBMAND },
+	{ TOKEN_NONBMAND,	MNTOPT_NONBMAND },
+	{ TOKEN_MNTPOINT,	MNTOPT_MNTPOINT "=%s" },
+	{ TOKEN_LAST,		NULL },
+};
+
+static void
+zfsvfs_vfs_free(vfs_t *vfsp)
+{
+	if (vfsp != NULL) {
+		if (vfsp->vfs_mntpoint != NULL)
+			kmem_strfree(vfsp->vfs_mntpoint);
+
+		kmem_free(vfsp, sizeof (vfs_t));
+	}
+}
+
+static int
+zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
+{
+	switch (token) {
+	case TOKEN_RO:
+		vfsp->vfs_readonly = B_TRUE;
+		vfsp->vfs_do_readonly = B_TRUE;
+		break;
+	case TOKEN_RW:
+		vfsp->vfs_readonly = B_FALSE;
+		vfsp->vfs_do_readonly = B_TRUE;
+		break;
+	case TOKEN_SETUID:
+		vfsp->vfs_setuid = B_TRUE;
+		vfsp->vfs_do_setuid = B_TRUE;
+		break;
+	case TOKEN_NOSETUID:
+		vfsp->vfs_setuid = B_FALSE;
+		vfsp->vfs_do_setuid = B_TRUE;
+		break;
+	case TOKEN_EXEC:
+		vfsp->vfs_exec = B_TRUE;
+		vfsp->vfs_do_exec = B_TRUE;
+		break;
+	case TOKEN_NOEXEC:
+		vfsp->vfs_exec = B_FALSE;
+		vfsp->vfs_do_exec = B_TRUE;
+		break;
+	case TOKEN_DEVICES:
+		vfsp->vfs_devices = B_TRUE;
+		vfsp->vfs_do_devices = B_TRUE;
+		break;
+	case TOKEN_NODEVICES:
+		vfsp->vfs_devices = B_FALSE;
+		vfsp->vfs_do_devices = B_TRUE;
+		break;
+	case TOKEN_DIRXATTR:
+		vfsp->vfs_xattr = ZFS_XATTR_DIR;
+		vfsp->vfs_do_xattr = B_TRUE;
+		break;
+	case TOKEN_SAXATTR:
+		vfsp->vfs_xattr = ZFS_XATTR_SA;
+		vfsp->vfs_do_xattr = B_TRUE;
+		break;
+	case TOKEN_XATTR:
+		vfsp->vfs_xattr = ZFS_XATTR_DIR;
+		vfsp->vfs_do_xattr = B_TRUE;
+		break;
+	case TOKEN_NOXATTR:
+		vfsp->vfs_xattr = ZFS_XATTR_OFF;
+		vfsp->vfs_do_xattr = B_TRUE;
+		break;
+	case TOKEN_ATIME:
+		vfsp->vfs_atime = B_TRUE;
+		vfsp->vfs_do_atime = B_TRUE;
+		break;
+	case TOKEN_NOATIME:
+		vfsp->vfs_atime = B_FALSE;
+		vfsp->vfs_do_atime = B_TRUE;
+		break;
+	case TOKEN_RELATIME:
+		vfsp->vfs_relatime = B_TRUE;
+		vfsp->vfs_do_relatime = B_TRUE;
+		break;
+	case TOKEN_NORELATIME:
+		vfsp->vfs_relatime = B_FALSE;
+		vfsp->vfs_do_relatime = B_TRUE;
+		break;
+	case TOKEN_NBMAND:
+		vfsp->vfs_nbmand = B_TRUE;
+		vfsp->vfs_do_nbmand = B_TRUE;
+		break;
+	case TOKEN_NONBMAND:
+		vfsp->vfs_nbmand = B_FALSE;
+		vfsp->vfs_do_nbmand = B_TRUE;
+		break;
+	case TOKEN_MNTPOINT:
+		vfsp->vfs_mntpoint = match_strdup(&args[0]);
+		if (vfsp->vfs_mntpoint == NULL)
+			return (SET_ERROR(ENOMEM));
+
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * Parse the raw mntopts and return a vfs_t describing the options.
+ */
+static int
+zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
+{
+	vfs_t *tmp_vfsp;
+	int error;
+
+	tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
+
+	if (mntopts != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		char *tmp_mntopts, *p, *t;
+		int token;
+
+		tmp_mntopts = t = kmem_strdup(mntopts);
+		if (tmp_mntopts == NULL)
+			return (SET_ERROR(ENOMEM));
+
+		while ((p = strsep(&t, ",")) != NULL) {
+			if (!*p)
+				continue;
+
+			args[0].to = args[0].from = NULL;
+			token = match_token(p, zpl_tokens, args);
+			error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
+			if (error) {
+				kmem_strfree(tmp_mntopts);
+				zfsvfs_vfs_free(tmp_vfsp);
+				return (error);
+			}
+		}
+
+		kmem_strfree(tmp_mntopts);
+	}
+
+	*vfsp = tmp_vfsp;
+
+	return (0);
+}
+
+boolean_t
+zfs_is_readonly(zfsvfs_t *zfsvfs)
+{
+	return (!!(zfsvfs->z_sb->s_flags & SB_RDONLY));
+}
+
+/*ARGSUSED*/
+int
+zfs_sync(struct super_block *sb, int wait, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = sb->s_fs_info;
+
+	/*
+	 * Semantically, the only requirement is that the sync be initiated.
+	 * The DMU syncs out txgs frequently, so there's nothing to do.
+	 */
+	if (!wait)
+		return (0);
+
+	if (zfsvfs != NULL) {
+		/*
+		 * Sync a specific filesystem.
+		 */
+		dsl_pool_t *dp;
+
+		ZFS_ENTER(zfsvfs);
+		dp = dmu_objset_pool(zfsvfs->z_os);
+
+		/*
+		 * If the system is shutting down, then skip any
+		 * filesystems which may exist on a suspended pool.
+		 */
+		if (spa_suspended(dp->dp_spa)) {
+			ZFS_EXIT(zfsvfs);
+			return (0);
+		}
+
+		if (zfsvfs->z_log != NULL)
+			zil_commit(zfsvfs->z_log, 0);
+
+		ZFS_EXIT(zfsvfs);
+	} else {
+		/*
+		 * Sync all ZFS filesystems.  This is what happens when you
+		 * run sync(1).  Unlike other filesystems, ZFS honors the
+		 * request by waiting for all pools to commit all dirty data.
+		 */
+		spa_sync_allpools();
+	}
+
+	return (0);
+}
+
+static void
+atime_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	struct super_block *sb = zfsvfs->z_sb;
+
+	if (sb == NULL)
+		return;
+	/*
+	 * Update SB_NOATIME bit in VFS super block.  Since atime update is
+	 * determined by atime_needs_update(), atime_needs_update() needs to
+	 * return false if atime is turned off, and not unconditionally return
+	 * false if atime is turned on.
+	 */
+	if (newval)
+		sb->s_flags &= ~SB_NOATIME;
+	else
+		sb->s_flags |= SB_NOATIME;
+}
+
+static void
+relatime_changed_cb(void *arg, uint64_t newval)
+{
+	((zfsvfs_t *)arg)->z_relatime = newval;
+}
+
+static void
+xattr_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval == ZFS_XATTR_OFF) {
+		zfsvfs->z_flags &= ~ZSB_XATTR;
+	} else {
+		zfsvfs->z_flags |= ZSB_XATTR;
+
+		if (newval == ZFS_XATTR_SA)
+			zfsvfs->z_xattr_sa = B_TRUE;
+		else
+			zfsvfs->z_xattr_sa = B_FALSE;
+	}
+}
+
+static void
+acltype_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	switch (newval) {
+	case ZFS_ACLTYPE_NFSV4:
+	case ZFS_ACLTYPE_OFF:
+		zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
+		zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
+		break;
+	case ZFS_ACLTYPE_POSIX:
+#ifdef CONFIG_FS_POSIX_ACL
+		zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIX;
+		zfsvfs->z_sb->s_flags |= SB_POSIXACL;
+#else
+		zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
+		zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
+#endif /* CONFIG_FS_POSIX_ACL */
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+blksz_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
+	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
+	ASSERT(ISP2(newval));
+
+	zfsvfs->z_max_blksz = newval;
+}
+
+static void
+readonly_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	struct super_block *sb = zfsvfs->z_sb;
+
+	if (sb == NULL)
+		return;
+
+	if (newval)
+		sb->s_flags |= SB_RDONLY;
+	else
+		sb->s_flags &= ~SB_RDONLY;
+}
+
+static void
+devices_changed_cb(void *arg, uint64_t newval)
+{
+}
+
+static void
+setuid_changed_cb(void *arg, uint64_t newval)
+{
+}
+
+static void
+exec_changed_cb(void *arg, uint64_t newval)
+{
+}
+
+static void
+nbmand_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	struct super_block *sb = zfsvfs->z_sb;
+
+	if (sb == NULL)
+		return;
+
+	if (newval == TRUE)
+		sb->s_flags |= SB_MANDLOCK;
+	else
+		sb->s_flags &= ~SB_MANDLOCK;
+}
+
+static void
+snapdir_changed_cb(void *arg, uint64_t newval)
+{
+	((zfsvfs_t *)arg)->z_show_ctldir = newval;
+}
+
+static void
+vscan_changed_cb(void *arg, uint64_t newval)
+{
+	((zfsvfs_t *)arg)->z_vscan = newval;
+}
+
+static void
+acl_mode_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_acl_mode = newval;
+}
+
+static void
+acl_inherit_changed_cb(void *arg, uint64_t newval)
+{
+	((zfsvfs_t *)arg)->z_acl_inherit = newval;
+}
+
+static int
+zfs_register_callbacks(vfs_t *vfsp)
+{
+	struct dsl_dataset *ds = NULL;
+	objset_t *os = NULL;
+	zfsvfs_t *zfsvfs = NULL;
+	int error = 0;
+
+	ASSERT(vfsp);
+	zfsvfs = vfsp->vfs_data;
+	ASSERT(zfsvfs);
+	os = zfsvfs->z_os;
+
+	/*
+	 * The act of registering our callbacks will destroy any mount
+	 * options we may have.  In order to enable temporary overrides
+	 * of mount options, we stash away the current values and
+	 * restore them after we register the callbacks.
+	 */
+	if (zfs_is_readonly(zfsvfs) || !spa_writeable(dmu_objset_spa(os))) {
+		vfsp->vfs_do_readonly = B_TRUE;
+		vfsp->vfs_readonly = B_TRUE;
+	}
+
+	/*
+	 * Register property callbacks.
+	 *
+	 * It would probably be fine to just check for i/o error from
+	 * the first prop_register(), but I guess I like to go
+	 * overboard...
+	 */
+	ds = dmu_objset_ds(os);
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+	error = dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
+	    zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+	if (error)
+		goto unregister;
+
+	/*
+	 * Invoke our callbacks to restore temporary mount options.
+	 */
+	if (vfsp->vfs_do_readonly)
+		readonly_changed_cb(zfsvfs, vfsp->vfs_readonly);
+	if (vfsp->vfs_do_setuid)
+		setuid_changed_cb(zfsvfs, vfsp->vfs_setuid);
+	if (vfsp->vfs_do_exec)
+		exec_changed_cb(zfsvfs, vfsp->vfs_exec);
+	if (vfsp->vfs_do_devices)
+		devices_changed_cb(zfsvfs, vfsp->vfs_devices);
+	if (vfsp->vfs_do_xattr)
+		xattr_changed_cb(zfsvfs, vfsp->vfs_xattr);
+	if (vfsp->vfs_do_atime)
+		atime_changed_cb(zfsvfs, vfsp->vfs_atime);
+	if (vfsp->vfs_do_relatime)
+		relatime_changed_cb(zfsvfs, vfsp->vfs_relatime);
+	if (vfsp->vfs_do_nbmand)
+		nbmand_changed_cb(zfsvfs, vfsp->vfs_nbmand);
+
+	return (0);
+
+unregister:
+	dsl_prop_unregister_all(ds, zfsvfs);
+	return (error);
+}
+
+/*
+ * Takes a dataset, a property, a value and that value's setpoint as
+ * found in the ZAP. Checks if the property has been changed in the vfs.
+ * If so, val and setpoint will be overwritten with updated content.
+ * Otherwise, they are left unchanged.
+ */
+int
+zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
+    char *setpoint)
+{
+	int error;
+	zfsvfs_t *zfvp;
+	vfs_t *vfsp;
+	objset_t *os;
+	uint64_t tmp = *val;
+
+	error = dmu_objset_from_ds(ds, &os);
+	if (error != 0)
+		return (error);
+
+	if (dmu_objset_type(os) != DMU_OST_ZFS)
+		return (EINVAL);
+
+	mutex_enter(&os->os_user_ptr_lock);
+	zfvp = dmu_objset_get_user(os);
+	mutex_exit(&os->os_user_ptr_lock);
+	if (zfvp == NULL)
+		return (ESRCH);
+
+	vfsp = zfvp->z_vfs;
+
+	switch (zfs_prop) {
+	case ZFS_PROP_ATIME:
+		if (vfsp->vfs_do_atime)
+			tmp = vfsp->vfs_atime;
+		break;
+	case ZFS_PROP_RELATIME:
+		if (vfsp->vfs_do_relatime)
+			tmp = vfsp->vfs_relatime;
+		break;
+	case ZFS_PROP_DEVICES:
+		if (vfsp->vfs_do_devices)
+			tmp = vfsp->vfs_devices;
+		break;
+	case ZFS_PROP_EXEC:
+		if (vfsp->vfs_do_exec)
+			tmp = vfsp->vfs_exec;
+		break;
+	case ZFS_PROP_SETUID:
+		if (vfsp->vfs_do_setuid)
+			tmp = vfsp->vfs_setuid;
+		break;
+	case ZFS_PROP_READONLY:
+		if (vfsp->vfs_do_readonly)
+			tmp = vfsp->vfs_readonly;
+		break;
+	case ZFS_PROP_XATTR:
+		if (vfsp->vfs_do_xattr)
+			tmp = vfsp->vfs_xattr;
+		break;
+	case ZFS_PROP_NBMAND:
+		if (vfsp->vfs_do_nbmand)
+			tmp = vfsp->vfs_nbmand;
+		break;
+	default:
+		return (ENOENT);
+	}
+
+	if (tmp != *val) {
+		(void) strcpy(setpoint, "temporary");
+		*val = tmp;
+	}
+	return (0);
+}
+
+/*
+ * Associate this zfsvfs with the given objset, which must be owned.
+ * This will cache a bunch of on-disk state from the objset in the
+ * zfsvfs.
+ */
+static int
+zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
+{
+	int error;
+	uint64_t val;
+
+	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
+	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
+	zfsvfs->z_os = os;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
+	if (error != 0)
+		return (error);
+	if (zfsvfs->z_version >
+	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+		(void) printk("Can't mount a version %lld file system "
+		    "on a version %lld pool\n. Pool must be upgraded to mount "
+		    "this file system.\n", (u_longlong_t)zfsvfs->z_version,
+		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
+		return (SET_ERROR(ENOTSUP));
+	}
+	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_norm = (int)val;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_utf8 = (val != 0);
+
+	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_case = (uint_t)val;
+
+	if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val)) != 0)
+		return (error);
+	zfsvfs->z_acl_type = (uint_t)val;
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+	    zfsvfs->z_case == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+	uint64_t sa_obj = 0;
+	if (zfsvfs->z_use_sa) {
+		/* should either have both of these objects or none */
+		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+		    &sa_obj);
+		if (error != 0)
+			return (error);
+
+		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
+		if ((error == 0) && (val == ZFS_XATTR_SA))
+			zfsvfs->z_xattr_sa = B_TRUE;
+	}
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
+	    &zfsvfs->z_root);
+	if (error != 0)
+		return (error);
+	ASSERT(zfsvfs->z_root != 0);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+	    &zfsvfs->z_unlinkedobj);
+	if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
+	    8, 1, &zfsvfs->z_userquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_userquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
+	    8, 1, &zfsvfs->z_groupquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_groupquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
+	    8, 1, &zfsvfs->z_projectquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_projectquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
+	    8, 1, &zfsvfs->z_userobjquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_userobjquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
+	    8, 1, &zfsvfs->z_groupobjquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_groupobjquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
+	    8, 1, &zfsvfs->z_projectobjquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_projectobjquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
+	    &zfsvfs->z_fuid_obj);
+	if (error == ENOENT)
+		zfsvfs->z_fuid_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
+	    &zfsvfs->z_shares_dir);
+	if (error == ENOENT)
+		zfsvfs->z_shares_dir = 0;
+	else if (error != 0)
+		return (error);
+
+	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+	    &zfsvfs->z_attr_table);
+	if (error != 0)
+		return (error);
+
+	if (zfsvfs->z_version >= ZPL_VERSION_SA)
+		sa_register_update_callback(os, zfs_sa_upgrade);
+
+	return (0);
+}
+
+int
+zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
+{
+	objset_t *os;
+	zfsvfs_t *zfsvfs;
+	int error;
+	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
+
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+
+	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, &os);
+	if (error != 0) {
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
+	}
+
+	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
+
+	return (error);
+}
+
+
+/*
+ * Note: zfsvfs is assumed to be malloc'd, and will be freed by this function
+ * on a failure.  Do not pass in a statically allocated zfsvfs.
+ */
+int
+zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
+{
+	int error;
+
+	zfsvfs->z_vfs = NULL;
+	zfsvfs->z_sb = NULL;
+	zfsvfs->z_parent = zfsvfs;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+	ZFS_TEARDOWN_INIT(zfsvfs);
+	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
+
+	int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
+	    ZFS_OBJ_MTX_MAX);
+	zfsvfs->z_hold_size = size;
+	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
+	    KM_SLEEP);
+	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+	for (int i = 0; i != size; i++) {
+		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
+		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+	}
+
+	error = zfsvfs_init(zfsvfs, os);
+	if (error != 0) {
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
+		*zfvp = NULL;
+		zfsvfs_free(zfsvfs);
+		return (error);
+	}
+
+	zfsvfs->z_drain_task = TASKQID_INVALID;
+	zfsvfs->z_draining = B_FALSE;
+	zfsvfs->z_drain_cancel = B_TRUE;
+
+	*zfvp = zfsvfs;
+	return (0);
+}
+
+static int
+zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
+{
+	int error;
+	boolean_t readonly = zfs_is_readonly(zfsvfs);
+
+	error = zfs_register_callbacks(zfsvfs->z_vfs);
+	if (error)
+		return (error);
+
+	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
+
+	/*
+	 * If we are not mounting (ie: online recv), then we don't
+	 * have to worry about replaying the log as we blocked all
+	 * operations out since we closed the ZIL.
+	 */
+	if (mounting) {
+		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
+		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+
+		/*
+		 * During replay we remove the read only flag to
+		 * allow replays to succeed.
+		 */
+		if (readonly != 0) {
+			readonly_changed_cb(zfsvfs, B_FALSE);
+		} else {
+			zap_stats_t zs;
+			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+			    &zs) == 0) {
+				dataset_kstats_update_nunlinks_kstat(
+				    &zfsvfs->z_kstat, zs.zs_num_entries);
+				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+				    "num_entries in unlinked set: %llu",
+				    zs.zs_num_entries);
+			}
+			zfs_unlinked_drain(zfsvfs);
+			dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
+			dd->dd_activity_cancelled = B_FALSE;
+		}
+
+		/*
+		 * Parse and replay the intent log.
+		 *
+		 * Because of ziltest, this must be done after
+		 * zfs_unlinked_drain().  (Further note: ziltest
+		 * doesn't use readonly mounts, where
+		 * zfs_unlinked_drain() isn't called.)  This is because
+		 * ziltest causes spa_sync() to think it's committed,
+		 * but actually it is not, so the intent log contains
+		 * many txg's worth of changes.
+		 *
+		 * In particular, if object N is in the unlinked set in
+		 * the last txg to actually sync, then it could be
+		 * actually freed in a later txg and then reallocated
+		 * in a yet later txg.  This would write a "create
+		 * object N" record to the intent log.  Normally, this
+		 * would be fine because the spa_sync() would have
+		 * written out the fact that object N is free, before
+		 * we could write the "create object N" intent log
+		 * record.
+		 *
+		 * But when we are in ziltest mode, we advance the "open
+		 * txg" without actually spa_sync()-ing the changes to
+		 * disk.  So we would see that object N is still
+		 * allocated and in the unlinked set, and there is an
+		 * intent log record saying to allocate it.
+		 */
+		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+			if (zil_replay_disable) {
+				zil_destroy(zfsvfs->z_log, B_FALSE);
+			} else {
+				zfsvfs->z_replay = B_TRUE;
+				zil_replay(zfsvfs->z_os, zfsvfs,
+				    zfs_replay_vector);
+				zfsvfs->z_replay = B_FALSE;
+			}
+		}
+
+		/* restore readonly bit */
+		if (readonly != 0)
+			readonly_changed_cb(zfsvfs, B_TRUE);
+	}
+
+	/*
+	 * Set the objset user_ptr to track its zfsvfs.
+	 */
+	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+
+	return (0);
+}
+
+void
+zfsvfs_free(zfsvfs_t *zfsvfs)
+{
+	int i, size = zfsvfs->z_hold_size;
+
+	zfs_fuid_destroy(zfsvfs);
+
+	mutex_destroy(&zfsvfs->z_znodes_lock);
+	mutex_destroy(&zfsvfs->z_lock);
+	list_destroy(&zfsvfs->z_all_znodes);
+	ZFS_TEARDOWN_DESTROY(zfsvfs);
+	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
+	rw_destroy(&zfsvfs->z_fuid_lock);
+	for (i = 0; i != size; i++) {
+		avl_destroy(&zfsvfs->z_hold_trees[i]);
+		mutex_destroy(&zfsvfs->z_hold_locks[i]);
+	}
+	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
+	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
+	zfsvfs_vfs_free(zfsvfs->z_vfs);
+	dataset_kstats_destroy(&zfsvfs->z_kstat);
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+}
+
+static void
+zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
+{
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+}
+
+static void
+zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
+{
+	objset_t *os = zfsvfs->z_os;
+
+	if (!dmu_objset_is_snapshot(os))
+		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
+}
+
+#ifdef HAVE_MLSLABEL
+/*
+ * Check that the hex label string is appropriate for the dataset being
+ * mounted into the global_zone proper.
+ *
+ * Return an error if the hex label string is not default or
+ * admin_low/admin_high.  For admin_low labels, the corresponding
+ * dataset must be readonly.
+ */
+int
+zfs_check_global_label(const char *dsname, const char *hexsl)
+{
+	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
+		return (0);
+	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
+		return (0);
+	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
+		/* must be readonly */
+		uint64_t rdonly;
+
+		if (dsl_prop_get_integer(dsname,
+		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
+			return (SET_ERROR(EACCES));
+		return (rdonly ? 0 : SET_ERROR(EACCES));
+	}
+	return (SET_ERROR(EACCES));
+}
+#endif /* HAVE_MLSLABEL */
+
+static int
+zfs_statfs_project(zfsvfs_t *zfsvfs, znode_t *zp, struct kstatfs *statp,
+    uint32_t bshift)
+{
+	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
+	uint64_t offset = DMU_OBJACCT_PREFIX_LEN;
+	uint64_t quota;
+	uint64_t used;
+	int err;
+
+	strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
+	err = zfs_id_to_fuidstr(zfsvfs, NULL, zp->z_projid, buf + offset,
+	    sizeof (buf) - offset, B_FALSE);
+	if (err)
+		return (err);
+
+	if (zfsvfs->z_projectquota_obj == 0)
+		goto objs;
+
+	err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectquota_obj,
+	    buf + offset, 8, 1, &quota);
+	if (err == ENOENT)
+		goto objs;
+	else if (err)
+		return (err);
+
+	err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
+	    buf + offset, 8, 1, &used);
+	if (unlikely(err == ENOENT)) {
+		uint32_t blksize;
+		u_longlong_t nblocks;
+
+		/*
+		 * Quota accounting is async, so it is possible race case.
+		 * There is at least one object with the given project ID.
+		 */
+		sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
+		if (unlikely(zp->z_blksz == 0))
+			blksize = zfsvfs->z_max_blksz;
+
+		used = blksize * nblocks;
+	} else if (err) {
+		return (err);
+	}
+
+	statp->f_blocks = quota >> bshift;
+	statp->f_bfree = (quota > used) ? ((quota - used) >> bshift) : 0;
+	statp->f_bavail = statp->f_bfree;
+
+objs:
+	if (zfsvfs->z_projectobjquota_obj == 0)
+		return (0);
+
+	err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectobjquota_obj,
+	    buf + offset, 8, 1, &quota);
+	if (err == ENOENT)
+		return (0);
+	else if (err)
+		return (err);
+
+	err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
+	    buf, 8, 1, &used);
+	if (unlikely(err == ENOENT)) {
+		/*
+		 * Quota accounting is async, so it is possible race case.
+		 * There is at least one object with the given project ID.
+		 */
+		used = 1;
+	} else if (err) {
+		return (err);
+	}
+
+	statp->f_files = quota;
+	statp->f_ffree = (quota > used) ? (quota - used) : 0;
+
+	return (0);
+}
+
+int
+zfs_statvfs(struct inode *ip, struct kstatfs *statp)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	uint64_t refdbytes, availbytes, usedobjs, availobjs;
+	int err = 0;
+
+	ZFS_ENTER(zfsvfs);
+
+	dmu_objset_space(zfsvfs->z_os,
+	    &refdbytes, &availbytes, &usedobjs, &availobjs);
+
+	uint64_t fsid = dmu_objset_fsid_guid(zfsvfs->z_os);
+	/*
+	 * The underlying storage pool actually uses multiple block
+	 * size.  Under Solaris frsize (fragment size) is reported as
+	 * the smallest block size we support, and bsize (block size)
+	 * as the filesystem's maximum block size.  Unfortunately,
+	 * under Linux the fragment size and block size are often used
+	 * interchangeably.  Thus we are forced to report both of them
+	 * as the filesystem's maximum block size.
+	 */
+	statp->f_frsize = zfsvfs->z_max_blksz;
+	statp->f_bsize = zfsvfs->z_max_blksz;
+	uint32_t bshift = fls(statp->f_bsize) - 1;
+
+	/*
+	 * The following report "total" blocks of various kinds in
+	 * the file system, but reported in terms of f_bsize - the
+	 * "preferred" size.
+	 */
+
+	/* Round up so we never have a filesystem using 0 blocks. */
+	refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize);
+	statp->f_blocks = (refdbytes + availbytes) >> bshift;
+	statp->f_bfree = availbytes >> bshift;
+	statp->f_bavail = statp->f_bfree; /* no root reservation */
+
+	/*
+	 * statvfs() should really be called statufs(), because it assumes
+	 * static metadata.  ZFS doesn't preallocate files, so the best
+	 * we can do is report the max that could possibly fit in f_files,
+	 * and that minus the number actually used in f_ffree.
+	 * For f_ffree, report the smaller of the number of objects available
+	 * and the number of blocks (each object will take at least a block).
+	 */
+	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
+	statp->f_files = statp->f_ffree + usedobjs;
+	statp->f_fsid.val[0] = (uint32_t)fsid;
+	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
+	statp->f_type = ZFS_SUPER_MAGIC;
+	statp->f_namelen = MAXNAMELEN - 1;
+
+	/*
+	 * We have all of 40 characters to stuff a string here.
+	 * Is there anything useful we could/should provide?
+	 */
+	bzero(statp->f_spare, sizeof (statp->f_spare));
+
+	if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
+	    dmu_objset_projectquota_present(zfsvfs->z_os)) {
+		znode_t *zp = ITOZ(ip);
+
+		if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid &&
+		    zpl_is_valid_projid(zp->z_projid))
+			err = zfs_statfs_project(zfsvfs, zp, statp, bshift);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (err);
+}
+
+static int
+zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
+{
+	znode_t *rootzp;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+
+	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+	if (error == 0)
+		*ipp = ZTOI(rootzp);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Linux kernels older than 3.1 do not support a per-filesystem shrinker.
+ * To accommodate this we must improvise and manually walk the list of znodes
+ * attempting to prune dentries in order to be able to drop the inodes.
+ *
+ * To avoid scanning the same znodes multiple times they are always rotated
+ * to the end of the z_all_znodes list.  New znodes are inserted at the
+ * end of the list so we're always scanning the oldest znodes first.
+ */
+static int
+zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
+{
+	znode_t **zp_array, *zp;
+	int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
+	int objects = 0;
+	int i = 0, j = 0;
+
+	zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
+
+		if ((i++ > nr_to_scan) || (j >= max_array))
+			break;
+
+		ASSERT(list_link_active(&zp->z_link_node));
+		list_remove(&zfsvfs->z_all_znodes, zp);
+		list_insert_tail(&zfsvfs->z_all_znodes, zp);
+
+		/* Skip active znodes and .zfs entries */
+		if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
+			continue;
+
+		if (igrab(ZTOI(zp)) == NULL)
+			continue;
+
+		zp_array[j] = zp;
+		j++;
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	for (i = 0; i < j; i++) {
+		zp = zp_array[i];
+
+		ASSERT3P(zp, !=, NULL);
+		d_prune_aliases(ZTOI(zp));
+
+		if (atomic_read(&ZTOI(zp)->i_count) == 1)
+			objects++;
+
+		zrele(zp);
+	}
+
+	vmem_free(zp_array, max_array * sizeof (znode_t *));
+
+	return (objects);
+}
+
+/*
+ * The ARC has requested that the filesystem drop entries from the dentry
+ * and inode caches.  This can occur when the ARC needs to free meta data
+ * blocks but can't because they are all pinned by entries in these caches.
+ */
+int
+zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
+{
+	zfsvfs_t *zfsvfs = sb->s_fs_info;
+	int error = 0;
+	struct shrinker *shrinker = &sb->s_shrink;
+	struct shrink_control sc = {
+		.nr_to_scan = nr_to_scan,
+		.gfp_mask = GFP_KERNEL,
+	};
+
+	ZFS_ENTER(zfsvfs);
+
+#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
+	defined(SHRINK_CONTROL_HAS_NID) && \
+	defined(SHRINKER_NUMA_AWARE)
+	if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
+		*objects = 0;
+		for_each_online_node(sc.nid) {
+			*objects += (*shrinker->scan_objects)(shrinker, &sc);
+			/*
+			 * reset sc.nr_to_scan, modified by
+			 * scan_objects == super_cache_scan
+			 */
+			sc.nr_to_scan = nr_to_scan;
+		}
+	} else {
+			*objects = (*shrinker->scan_objects)(shrinker, &sc);
+	}
+
+#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
+	*objects = (*shrinker->scan_objects)(shrinker, &sc);
+#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK)
+	*objects = (*shrinker->shrink)(shrinker, &sc);
+#elif defined(HAVE_D_PRUNE_ALIASES)
+#define	D_PRUNE_ALIASES_IS_DEFAULT
+	*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
+#else
+#error "No available dentry and inode cache pruning mechanism."
+#endif
+
+#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
+#undef	D_PRUNE_ALIASES_IS_DEFAULT
+	/*
+	 * Fall back to zfs_prune_aliases if the kernel's per-superblock
+	 * shrinker couldn't free anything, possibly due to the inodes being
+	 * allocated in a different memcg.
+	 */
+	if (*objects == 0)
+		*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
+#endif
+
+	ZFS_EXIT(zfsvfs);
+
+	dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
+	    nr_to_scan, *objects, error);
+
+	return (error);
+}
+
+/*
+ * Teardown the zfsvfs_t.
+ *
+ * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
+ * and 'z_teardown_inactive_lock' held.
+ */
+static int
+zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
+{
+	znode_t	*zp;
+
+	zfs_unlinked_drain_stop_wait(zfsvfs);
+
+	/*
+	 * If someone has not already unmounted this file system,
+	 * drain the zrele_taskq to ensure all active references to the
+	 * zfsvfs_t have been handled only then can it be safely destroyed.
+	 */
+	if (zfsvfs->z_os) {
+		/*
+		 * If we're unmounting we have to wait for the list to
+		 * drain completely.
+		 *
+		 * If we're not unmounting there's no guarantee the list
+		 * will drain completely, but iputs run from the taskq
+		 * may add the parents of dir-based xattrs to the taskq
+		 * so we want to wait for these.
+		 *
+		 * We can safely read z_nr_znodes without locking because the
+		 * VFS has already blocked operations which add to the
+		 * z_all_znodes list and thus increment z_nr_znodes.
+		 */
+		int round = 0;
+		while (zfsvfs->z_nr_znodes > 0) {
+			taskq_wait_outstanding(dsl_pool_zrele_taskq(
+			    dmu_objset_pool(zfsvfs->z_os)), 0);
+			if (++round > 1 && !unmounting)
+				break;
+		}
+	}
+
+	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
+
+	if (!unmounting) {
+		/*
+		 * We purge the parent filesystem's super block as the
+		 * parent filesystem and all of its snapshots have their
+		 * inode's super block set to the parent's filesystem's
+		 * super block.  Note,  'z_parent' is self referential
+		 * for non-snapshots.
+		 */
+		shrink_dcache_sb(zfsvfs->z_parent->z_sb);
+	}
+
+	/*
+	 * Close the zil. NB: Can't close the zil while zfs_inactive
+	 * threads are blocked as zil_close can call zfs_inactive.
+	 */
+	if (zfsvfs->z_log) {
+		zil_close(zfsvfs->z_log);
+		zfsvfs->z_log = NULL;
+	}
+
+	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
+
+	/*
+	 * If we are not unmounting (ie: online recv) and someone already
+	 * unmounted this file system while we were doing the switcheroo,
+	 * or a reopen of z_os failed then just bail out now.
+	 */
+	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+		return (SET_ERROR(EIO));
+	}
+
+	/*
+	 * At this point there are no VFS ops active, and any new VFS ops
+	 * will fail with EIO since we have z_teardown_lock for writer (only
+	 * relevant for forced unmount).
+	 *
+	 * Release all holds on dbufs. We also grab an extra reference to all
+	 * the remaining inodes so that the kernel does not attempt to free
+	 * any inodes of a suspended fs. This can cause deadlocks since the
+	 * zfs_resume_fs() process may involve starting threads, which might
+	 * attempt to free unreferenced inodes to free up memory for the new
+	 * thread.
+	 */
+	if (!unmounting) {
+		mutex_enter(&zfsvfs->z_znodes_lock);
+		for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
+		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+			if (zp->z_sa_hdl)
+				zfs_znode_dmu_fini(zp);
+			if (igrab(ZTOI(zp)) != NULL)
+				zp->z_suspended = B_TRUE;
+
+		}
+		mutex_exit(&zfsvfs->z_znodes_lock);
+	}
+
+	/*
+	 * If we are unmounting, set the unmounted flag and let new VFS ops
+	 * unblock.  zfs_inactive will have the unmounted behavior, and all
+	 * other VFS ops will fail with EIO.
+	 */
+	if (unmounting) {
+		zfsvfs->z_unmounted = B_TRUE;
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+	}
+
+	/*
+	 * z_os will be NULL if there was an error in attempting to reopen
+	 * zfsvfs, so just return as the properties had already been
+	 *
+	 * unregistered and cached data had been evicted before.
+	 */
+	if (zfsvfs->z_os == NULL)
+		return (0);
+
+	/*
+	 * Unregister properties.
+	 */
+	zfs_unregister_callbacks(zfsvfs);
+
+	/*
+	 * Evict cached data. We must write out any dirty data before
+	 * disowning the dataset.
+	 */
+	objset_t *os = zfsvfs->z_os;
+	boolean_t os_dirty = B_FALSE;
+	for (int t = 0; t < TXG_SIZE; t++) {
+		if (dmu_objset_is_dirty(os, t)) {
+			os_dirty = B_TRUE;
+			break;
+		}
+	}
+	if (!zfs_is_readonly(zfsvfs) && os_dirty) {
+		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+	}
+	dmu_objset_evict_dbufs(zfsvfs->z_os);
+	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
+	dsl_dir_cancel_waiters(dd);
+
+	return (0);
+}
+
+#if defined(HAVE_SUPER_SETUP_BDI_NAME)
+atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
+#endif
+
+int
+zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
+{
+	const char *osname = zm->mnt_osname;
+	struct inode *root_inode = NULL;
+	uint64_t recordsize;
+	int error = 0;
+	zfsvfs_t *zfsvfs = NULL;
+	vfs_t *vfs = NULL;
+
+	ASSERT(zm);
+	ASSERT(osname);
+
+	error = zfsvfs_parse_options(zm->mnt_data, &vfs);
+	if (error)
+		return (error);
+
+	error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
+	if (error) {
+		zfsvfs_vfs_free(vfs);
+		goto out;
+	}
+
+	if ((error = dsl_prop_get_integer(osname, "recordsize",
+	    &recordsize, NULL))) {
+		zfsvfs_vfs_free(vfs);
+		goto out;
+	}
+
+	vfs->vfs_data = zfsvfs;
+	zfsvfs->z_vfs = vfs;
+	zfsvfs->z_sb = sb;
+	sb->s_fs_info = zfsvfs;
+	sb->s_magic = ZFS_SUPER_MAGIC;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_time_gran = 1;
+	sb->s_blocksize = recordsize;
+	sb->s_blocksize_bits = ilog2(recordsize);
+
+	error = -zpl_bdi_setup(sb, "zfs");
+	if (error)
+		goto out;
+
+	sb->s_bdi->ra_pages = 0;
+
+	/* Set callback operations for the file system. */
+	sb->s_op = &zpl_super_operations;
+	sb->s_xattr = zpl_xattr_handlers;
+	sb->s_export_op = &zpl_export_operations;
+
+	/* Set features for file system. */
+	zfs_set_fuid_feature(zfsvfs);
+
+	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
+		uint64_t pval;
+
+		atime_changed_cb(zfsvfs, B_FALSE);
+		readonly_changed_cb(zfsvfs, B_TRUE);
+		if ((error = dsl_prop_get_integer(osname,
+		    "xattr", &pval, NULL)))
+			goto out;
+		xattr_changed_cb(zfsvfs, pval);
+		if ((error = dsl_prop_get_integer(osname,
+		    "acltype", &pval, NULL)))
+			goto out;
+		acltype_changed_cb(zfsvfs, pval);
+		zfsvfs->z_issnap = B_TRUE;
+		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
+		zfsvfs->z_snap_defer_time = jiffies;
+
+		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+	} else {
+		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
+			goto out;
+	}
+
+	/* Allocate a root inode for the filesystem. */
+	error = zfs_root(zfsvfs, &root_inode);
+	if (error) {
+		(void) zfs_umount(sb);
+		zfsvfs = NULL; /* avoid double-free; first in zfs_umount */
+		goto out;
+	}
+
+	/* Allocate a root dentry for the filesystem */
+	sb->s_root = d_make_root(root_inode);
+	if (sb->s_root == NULL) {
+		(void) zfs_umount(sb);
+		zfsvfs = NULL; /* avoid double-free; first in zfs_umount */
+		error = SET_ERROR(ENOMEM);
+		goto out;
+	}
+
+	if (!zfsvfs->z_issnap)
+		zfsctl_create(zfsvfs);
+
+	zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
+out:
+	if (error) {
+		if (zfsvfs != NULL) {
+			dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
+			zfsvfs_free(zfsvfs);
+		}
+		/*
+		 * make sure we don't have dangling sb->s_fs_info which
+		 * zfs_preumount will use.
+		 */
+		sb->s_fs_info = NULL;
+	}
+
+	return (error);
+}
+
+/*
+ * Called when an unmount is requested and certain sanity checks have
+ * already passed.  At this point no dentries or inodes have been reclaimed
+ * from their respective caches.  We drop the extra reference on the .zfs
+ * control directory to allow everything to be reclaimed.  All snapshots
+ * must already have been unmounted to reach this point.
+ */
+void
+zfs_preumount(struct super_block *sb)
+{
+	zfsvfs_t *zfsvfs = sb->s_fs_info;
+
+	/* zfsvfs is NULL when zfs_domount fails during mount */
+	if (zfsvfs) {
+		zfs_unlinked_drain_stop_wait(zfsvfs);
+		zfsctl_destroy(sb->s_fs_info);
+		/*
+		 * Wait for zrele_async before entering evict_inodes in
+		 * generic_shutdown_super. The reason we must finish before
+		 * evict_inodes is when lazytime is on, or when zfs_purgedir
+		 * calls zfs_zget, zrele would bump i_count from 0 to 1. This
+		 * would race with the i_count check in evict_inodes. This means
+		 * it could destroy the inode while we are still using it.
+		 *
+		 * We wait for two passes. xattr directories in the first pass
+		 * may add xattr entries in zfs_purgedir, so in the second pass
+		 * we wait for them. We don't use taskq_wait here because it is
+		 * a pool wide taskq. Other mounted filesystems can constantly
+		 * do zrele_async and there's no guarantee when taskq will be
+		 * empty.
+		 */
+		taskq_wait_outstanding(dsl_pool_zrele_taskq(
+		    dmu_objset_pool(zfsvfs->z_os)), 0);
+		taskq_wait_outstanding(dsl_pool_zrele_taskq(
+		    dmu_objset_pool(zfsvfs->z_os)), 0);
+	}
+}
+
+/*
+ * Called once all other unmount released tear down has occurred.
+ * It is our responsibility to release any remaining infrastructure.
+ */
+/*ARGSUSED*/
+int
+zfs_umount(struct super_block *sb)
+{
+	zfsvfs_t *zfsvfs = sb->s_fs_info;
+	objset_t *os;
+
+	if (zfsvfs->z_arc_prune != NULL)
+		arc_remove_prune_callback(zfsvfs->z_arc_prune);
+	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
+	os = zfsvfs->z_os;
+	zpl_bdi_destroy(sb);
+
+	/*
+	 * z_os will be NULL if there was an error in
+	 * attempting to reopen zfsvfs.
+	 */
+	if (os != NULL) {
+		/*
+		 * Unset the objset user_ptr.
+		 */
+		mutex_enter(&os->os_user_ptr_lock);
+		dmu_objset_set_user(os, NULL);
+		mutex_exit(&os->os_user_ptr_lock);
+
+		/*
+		 * Finally release the objset
+		 */
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
+	}
+
+	zfsvfs_free(zfsvfs);
+	return (0);
+}
+
+int
+zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
+{
+	zfsvfs_t *zfsvfs = sb->s_fs_info;
+	vfs_t *vfsp;
+	boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
+	int error;
+
+	if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
+	    !(*flags & SB_RDONLY)) {
+		*flags |= SB_RDONLY;
+		return (EROFS);
+	}
+
+	error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
+	if (error)
+		return (error);
+
+	if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))
+		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+
+	zfs_unregister_callbacks(zfsvfs);
+	zfsvfs_vfs_free(zfsvfs->z_vfs);
+
+	vfsp->vfs_data = zfsvfs;
+	zfsvfs->z_vfs = vfsp;
+	if (!issnap)
+		(void) zfs_register_callbacks(vfsp);
+
+	return (error);
+}
+
+int
+zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
+{
+	zfsvfs_t	*zfsvfs = sb->s_fs_info;
+	znode_t		*zp;
+	uint64_t	object = 0;
+	uint64_t	fid_gen = 0;
+	uint64_t	gen_mask;
+	uint64_t	zp_gen;
+	int		i, err;
+
+	*ipp = NULL;
+
+	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
+		zfid_short_t	*zfid = (zfid_short_t *)fidp;
+
+		for (i = 0; i < sizeof (zfid->zf_object); i++)
+			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
+
+		for (i = 0; i < sizeof (zfid->zf_gen); i++)
+			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
+	} else {
+		return (SET_ERROR(EINVAL));
+	}
+
+	/* LONG_FID_LEN means snapdirs */
+	if (fidp->fid_len == LONG_FID_LEN) {
+		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
+		uint64_t	objsetid = 0;
+		uint64_t	setgen = 0;
+
+		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
+			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
+
+		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
+			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
+
+		if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {
+			dprintf("snapdir fid: objsetid (%llu) != "
+			    "ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",
+			    objsetid, ZFSCTL_INO_SNAPDIRS, object);
+
+			return (SET_ERROR(EINVAL));
+		}
+
+		if (fid_gen > 1 || setgen != 0) {
+			dprintf("snapdir fid: fid_gen (%llu) and setgen "
+			    "(%llu)\n", fid_gen, setgen);
+			return (SET_ERROR(EINVAL));
+		}
+
+		return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
+	}
+
+	ZFS_ENTER(zfsvfs);
+	/* A zero fid_gen means we are in the .zfs control directories */
+	if (fid_gen == 0 &&
+	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
+		*ipp = zfsvfs->z_ctldir;
+		ASSERT(*ipp != NULL);
+		if (object == ZFSCTL_INO_SNAPDIR) {
+			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
+			    0, kcred, NULL, NULL) == 0);
+		} else {
+			/*
+			 * Must have an existing ref, so igrab()
+			 * cannot return NULL
+			 */
+			VERIFY3P(igrab(*ipp), !=, NULL);
+		}
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	gen_mask = -1ULL >> (64 - 8 * i);
+
+	dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
+	if ((err = zfs_zget(zfsvfs, object, &zp))) {
+		ZFS_EXIT(zfsvfs);
+		return (err);
+	}
+
+	/* Don't export xattr stuff */
+	if (zp->z_pflags & ZFS_XATTR) {
+		zrele(zp);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOENT));
+	}
+
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+	    sizeof (uint64_t));
+	zp_gen = zp_gen & gen_mask;
+	if (zp_gen == 0)
+		zp_gen = 1;
+	if ((fid_gen == 0) && (zfsvfs->z_root == object))
+		fid_gen = zp_gen;
+	if (zp->z_unlinked || zp_gen != fid_gen) {
+		dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
+		    fid_gen);
+		zrele(zp);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOENT));
+	}
+
+	*ipp = ZTOI(zp);
+	if (*ipp)
+		zfs_znode_update_vfs(ITOZ(*ipp));
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/*
+ * Block out VFS ops and close zfsvfs_t
+ *
+ * Note, if successful, then we return with the 'z_teardown_lock' and
+ * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
+ * dataset and objset intact so that they can be atomically handed off during
+ * a subsequent rollback or recv operation and the resume thereafter.
+ */
+int
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
+{
+	int error;
+
+	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
+		return (error);
+
+	return (0);
+}
+
+/*
+ * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
+ * is an invariant across any of the operations that can be performed while the
+ * filesystem was suspended.  Whether it succeeded or failed, the preconditions
+ * are the same: the relevant objset and associated dataset are owned by
+ * zfsvfs, held, and long held on entry.
+ */
+int
+zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
+{
+	int err, err2;
+	znode_t *zp;
+
+	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
+	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
+
+	/*
+	 * We already own this, so just update the objset_t, as the one we
+	 * had before may have been evicted.
+	 */
+	objset_t *os;
+	VERIFY3P(ds->ds_owner, ==, zfsvfs);
+	VERIFY(dsl_dataset_long_held(ds));
+	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
+	dsl_pool_config_enter(dp, FTAG);
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_config_exit(dp, FTAG);
+
+	err = zfsvfs_init(zfsvfs, os);
+	if (err != 0)
+		goto bail;
+
+	ds->ds_dir->dd_activity_cancelled = B_FALSE;
+	VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
+
+	zfs_set_fuid_feature(zfsvfs);
+	zfsvfs->z_rollback_time = jiffies;
+
+	/*
+	 * Attempt to re-establish all the active inodes with their
+	 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
+	 * and mark it stale.  This prevents a collision if a new
+	 * inode/object is created which must use the same inode
+	 * number.  The stale inode will be be released when the
+	 * VFS prunes the dentry holding the remaining references
+	 * on the stale inode.
+	 */
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
+	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+		err2 = zfs_rezget(zp);
+		if (err2) {
+			zpl_d_drop_aliases(ZTOI(zp));
+			remove_inode_hash(ZTOI(zp));
+		}
+
+		/* see comment in zfs_suspend_fs() */
+		if (zp->z_suspended) {
+			zfs_zrele_async(zp);
+			zp->z_suspended = B_FALSE;
+		}
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
+		/*
+		 * zfs_suspend_fs() could have interrupted freeing
+		 * of dnodes. We need to restart this freeing so
+		 * that we don't "leak" the space.
+		 */
+		zfs_unlinked_drain(zfsvfs);
+	}
+
+	/*
+	 * Most of the time zfs_suspend_fs is used for changing the contents
+	 * of the underlying dataset. ZFS rollback and receive operations
+	 * might create files for which negative dentries are present in
+	 * the cache. Since walking the dcache would require a lot of GPL-only
+	 * code duplication, it's much easier on these rather rare occasions
+	 * just to flush the whole dcache for the given dataset/filesystem.
+	 */
+	shrink_dcache_sb(zfsvfs->z_sb);
+
+bail:
+	if (err != 0)
+		zfsvfs->z_unmounted = B_TRUE;
+
+	/* release the VFS ops */
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+
+	if (err != 0) {
+		/*
+		 * Since we couldn't setup the sa framework, try to force
+		 * unmount this file system.
+		 */
+		if (zfsvfs->z_os)
+			(void) zfs_umount(zfsvfs->z_sb);
+	}
+	return (err);
+}
+
+/*
+ * Release VOPs and unmount a suspended filesystem.
+ */
+int
+zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
+{
+	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
+	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
+
+	/*
+	 * We already own this, so just hold and rele it to update the
+	 * objset_t, as the one we had before may have been evicted.
+	 */
+	objset_t *os;
+	VERIFY3P(ds->ds_owner, ==, zfsvfs);
+	VERIFY(dsl_dataset_long_held(ds));
+	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
+	dsl_pool_config_enter(dp, FTAG);
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_config_exit(dp, FTAG);
+	zfsvfs->z_os = os;
+
+	/* release the VOPs */
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
+
+	/*
+	 * Try to force unmount this file system.
+	 */
+	(void) zfs_umount(zfsvfs->z_sb);
+	zfsvfs->z_unmounted = B_TRUE;
+	return (0);
+}
+
+/*
+ * Automounted snapshots rely on periodic revalidation
+ * to defer snapshots from being automatically unmounted.
+ */
+
+inline void
+zfs_exit_fs(zfsvfs_t *zfsvfs)
+{
+	if (!zfsvfs->z_issnap)
+		return;
+
+	if (time_after(jiffies, zfsvfs->z_snap_defer_time +
+	    MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
+		zfsvfs->z_snap_defer_time = jiffies;
+		zfsctl_snapshot_unmount_delay(zfsvfs->z_os->os_spa,
+		    dmu_objset_id(zfsvfs->z_os),
+		    zfs_expire_snapshot);
+	}
+}
+
+int
+zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
+{
+	int error;
+	objset_t *os = zfsvfs->z_os;
+	dmu_tx_t *tx;
+
+	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
+		return (SET_ERROR(EINVAL));
+
+	if (newvers < zfsvfs->z_version)
+		return (SET_ERROR(EINVAL));
+
+	if (zfs_spa_version_map(newvers) >
+	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
+		return (SET_ERROR(ENOTSUP));
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+		    ZFS_SA_ATTRS);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	}
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		return (error);
+	}
+
+	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+	    8, 1, &newvers, tx);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		return (error);
+	}
+
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		uint64_t sa_obj;
+
+		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+		    SPA_VERSION_SA);
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+
+		error = zap_add(os, MASTER_NODE_OBJ,
+		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT0(error);
+
+		VERIFY(0 == sa_set_sa_object(os, sa_obj));
+		sa_register_update_callback(os, zfs_sa_upgrade);
+	}
+
+	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
+	    "from %llu to %llu", zfsvfs->z_version, newvers);
+
+	dmu_tx_commit(tx);
+
+	zfsvfs->z_version = newvers;
+	os->os_version = newvers;
+
+	zfs_set_fuid_feature(zfsvfs);
+
+	return (0);
+}
+
+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION)
+		pname = ZPL_VERSION_STR;
+	else
+		pname = zfs_prop_to_name(prop);
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_OFF;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
+/*
+ * Return true if the corresponding vfs's unmounted flag is set.
+ * Otherwise return false.
+ * If this function returns true we know VFS unmount has been initiated.
+ */
+boolean_t
+zfs_get_vfs_flag_unmounted(objset_t *os)
+{
+	zfsvfs_t *zfvp;
+	boolean_t unmounted = B_FALSE;
+
+	ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
+
+	mutex_enter(&os->os_user_ptr_lock);
+	zfvp = dmu_objset_get_user(os);
+	if (zfvp != NULL && zfvp->z_unmounted)
+		unmounted = B_TRUE;
+	mutex_exit(&os->os_user_ptr_lock);
+
+	return (unmounted);
+}
+
+/*ARGSUSED*/
+void
+zfsvfs_update_fromname(const char *oldname, const char *newname)
+{
+	/*
+	 * We don't need to do anything here, the devname is always current by
+	 * virtue of zfsvfs->z_sb->s_op->show_devname.
+	 */
+}
+
+void
+zfs_init(void)
+{
+	zfsctl_init();
+	zfs_znode_init();
+	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
+	register_filesystem(&zpl_fs_type);
+}
+
+void
+zfs_fini(void)
+{
+	/*
+	 * we don't use outstanding because zpl_posix_acl_free might add more.
+	 */
+	taskq_wait(system_delay_taskq);
+	taskq_wait(system_taskq);
+	unregister_filesystem(&zpl_fs_type);
+	zfs_znode_fini();
+	zfsctl_fini();
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(zfs_suspend_fs);
+EXPORT_SYMBOL(zfs_resume_fs);
+EXPORT_SYMBOL(zfs_set_version);
+EXPORT_SYMBOL(zfsvfs_create);
+EXPORT_SYMBOL(zfsvfs_free);
+EXPORT_SYMBOL(zfs_is_readonly);
+EXPORT_SYMBOL(zfs_domount);
+EXPORT_SYMBOL(zfs_preumount);
+EXPORT_SYMBOL(zfs_umount);
+EXPORT_SYMBOL(zfs_remount);
+EXPORT_SYMBOL(zfs_statvfs);
+EXPORT_SYMBOL(zfs_vget);
+EXPORT_SYMBOL(zfs_prune);
+#endif

diff --git a/zfs/module/os/linux/zfs/zfs_vnops_os.c b/zfs/module/os/linux/zfs/zfs_vnops_os.c
new file mode 100644
index 0000000..9c87a0d
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_vnops_os.c

@@ -0,0 +1,4076 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ */
+
+/* Portions Copyright 2007 Jeremy Teo */
+/* Portions Copyright 2010 Robert Milkowski */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/taskq.h>
+#include <sys/uio.h>
+#include <sys/vmsystm.h>
+#include <sys/atomic.h>
+#include <sys/pathname.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/dbuf.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+#include <sys/sid.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_rlock.h>
+#include <sys/cred.h>
+#include <sys/zpl.h>
+#include <sys/zil.h>
+#include <sys/sa_impl.h>
+
+/*
+ * Programming rules.
+ *
+ * Each vnode op performs some logical unit of work.  To do this, the ZPL must
+ * properly lock its in-core state, create a DMU transaction, do the work,
+ * record this work in the intent log (ZIL), commit the DMU transaction,
+ * and wait for the intent log to commit if it is a synchronous operation.
+ * Moreover, the vnode ops must work in both normal and log replay context.
+ * The ordering of events is important to avoid deadlocks and references
+ * to freed memory.  The example below illustrates the following Big Rules:
+ *
+ *  (1) A check must be made in each zfs thread for a mounted file system.
+ *	This is done avoiding races using ZFS_ENTER(zfsvfs).
+ *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
+ *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *      can return EIO from the calling function.
+ *
+ *  (2) zrele() should always be the last thing except for zil_commit() (if
+ *	necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the
+ *	last reference, the vnode/znode can be freed, so the zp may point to
+ *	freed memory.  Second, the last reference will call zfs_zinactive(),
+ *	which may induce a lot of work -- pushing cached pages (which acquires
+ *	range locks) and syncing out cached atime changes.  Third,
+ *	zfs_zinactive() may require a new tx, which could deadlock the system
+ *	if you were already holding one. This deadlock occurs because the tx
+ *	currently being operated on prevents a txg from syncing, which
+ *	prevents the new tx from progressing, resulting in a deadlock.  If you
+ *	must call zrele() within a tx, use zfs_zrele_async(). Note that iput()
+ *	is a synonym for zrele().
+ *
+ *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
+ *	as they can span dmu_tx_assign() calls.
+ *
+ *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
+ *      dmu_tx_assign().  This is critical because we don't want to block
+ *      while holding locks.
+ *
+ *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *	reduces lock contention and CPU usage when we must wait (note that if
+ *	throughput is constrained by the storage, nearly every transaction
+ *	must wait).
+ *
+ *      Note, in particular, that if a lock is sometimes acquired before
+ *      the tx assigns, and sometimes after (e.g. z_lock), then failing
+ *      to use a non-blocking assign can deadlock the system.  The scenario:
+ *
+ *	Thread A has grabbed a lock before calling dmu_tx_assign().
+ *	Thread B is in an already-assigned tx, and blocks for this lock.
+ *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
+ *	forever, because the previous txg can't quiesce until B's tx commits.
+ *
+ *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
+ *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
+ *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
+ *	to indicate that this operation has already called dmu_tx_wait().
+ *	This will ensure that we don't retry forever, waiting a short bit
+ *	each time.
+ *
+ *  (5)	If the operation succeeded, generate the intent log entry for it
+ *	before dropping locks.  This ensures that the ordering of events
+ *	in the intent log matches the order in which they actually occurred.
+ *	During ZIL replay the zfs_log_* functions will update the sequence
+ *	number to indicate the zil transaction has replayed.
+ *
+ *  (6)	At the end of each vnode op, the DMU tx must always commit,
+ *	regardless of whether there were any errors.
+ *
+ *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
+ *	to ensure that synchronous semantics are provided when necessary.
+ *
+ * In general, this is how things should be ordered in each vnode op:
+ *
+ *	ZFS_ENTER(zfsvfs);		// exit if unmounted
+ * top:
+ *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
+ *	rw_enter(...);			// grab any other locks you need
+ *	tx = dmu_tx_create(...);	// get DMU tx
+ *	dmu_tx_hold_*();		// hold each object you might modify
+ *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ *	if (error) {
+ *		rw_exit(...);		// drop locks
+ *		zfs_dirent_unlock(dl);	// unlock directory entry
+ *		zrele(...);		// release held znodes
+ *		if (error == ERESTART) {
+ *			waited = B_TRUE;
+ *			dmu_tx_wait(tx);
+ *			dmu_tx_abort(tx);
+ *			goto top;
+ *		}
+ *		dmu_tx_abort(tx);	// abort DMU tx
+ *		ZFS_EXIT(zfsvfs);	// finished in zfs
+ *		return (error);		// really out of space
+ *	}
+ *	error = do_real_work();		// do whatever this VOP does
+ *	if (error == 0)
+ *		zfs_log_*(...);		// on success, make ZIL entry
+ *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
+ *	rw_exit(...);			// drop locks
+ *	zfs_dirent_unlock(dl);		// unlock directory entry
+ *	zrele(...);			// release held znodes
+ *	zil_commit(zilog, foid);	// synchronous when necessary
+ *	ZFS_EXIT(zfsvfs);		// finished in zfs
+ *	return (error);			// done, report error
+ */
+
+/*
+ * Virus scanning is unsupported.  It would be possible to add a hook
+ * here to performance the required virus scan.  This could be done
+ * entirely in the kernel or potentially as an update to invoke a
+ * scanning utility.
+ */
+static int
+zfs_vscan(struct inode *ip, cred_t *cr, int async)
+{
+	return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
+{
+	znode_t	*zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/* Honor ZFS_APPENDONLY file attribute */
+	if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
+	    ((flag & O_APPEND) == 0)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	/* Virus scan eligible files on open */
+	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
+		if (zfs_vscan(ip, cr, 0) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EACCES));
+		}
+	}
+
+	/* Keep a count of the synchronous opens in the znode */
+	if (flag & O_SYNC)
+		atomic_inc_32(&zp->z_sync_cnt);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_close(struct inode *ip, int flag, cred_t *cr)
+{
+	znode_t	*zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/* Decrement the synchronous opens in the znode */
+	if (flag & O_SYNC)
+		atomic_dec_32(&zp->z_sync_cnt);
+
+	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
+		VERIFY(zfs_vscan(ip, cr, 1) == 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+#if defined(_KERNEL)
+
+static int zfs_fillpage(struct inode *ip, struct page *pp);
+
+/*
+ * When a file is memory mapped, we must keep the IO data synchronized
+ * between the DMU cache and the memory mapped pages.  Update all mapped
+ * pages with the contents of the coresponding dmu buffer.
+ */
+void
+update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
+{
+	struct address_space *mp = ZTOI(zp)->i_mapping;
+	int64_t off = start & (PAGE_SIZE - 1);
+
+	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
+		uint64_t nbytes = MIN(PAGE_SIZE - off, len);
+
+		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
+		if (pp) {
+			if (mapping_writably_mapped(mp))
+				flush_dcache_page(pp);
+
+			void *pb = kmap(pp);
+			int error = dmu_read(os, zp->z_id, start + off,
+			    nbytes, pb + off, DMU_READ_PREFETCH);
+			kunmap(pp);
+
+			if (error) {
+				SetPageError(pp);
+				ClearPageUptodate(pp);
+			} else {
+				ClearPageError(pp);
+				SetPageUptodate(pp);
+
+				if (mapping_writably_mapped(mp))
+					flush_dcache_page(pp);
+
+				mark_page_accessed(pp);
+			}
+
+			unlock_page(pp);
+			put_page(pp);
+		}
+
+		len -= nbytes;
+		off = 0;
+	}
+}
+
+/*
+ * When a file is memory mapped, we must keep the I/O data synchronized
+ * between the DMU cache and the memory mapped pages.  Preferentially read
+ * from memory mapped pages, otherwise fallback to reading through the dmu.
+ */
+int
+mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
+{
+	struct inode *ip = ZTOI(zp);
+	struct address_space *mp = ip->i_mapping;
+	int64_t start = uio->uio_loffset;
+	int64_t off = start & (PAGE_SIZE - 1);
+	int len = nbytes;
+	int error = 0;
+
+	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
+		uint64_t bytes = MIN(PAGE_SIZE - off, len);
+
+		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
+		if (pp) {
+			/*
+			 * If filemap_fault() retries there exists a window
+			 * where the page will be unlocked and not up to date.
+			 * In this case we must try and fill the page.
+			 */
+			if (unlikely(!PageUptodate(pp))) {
+				error = zfs_fillpage(ip, pp);
+				if (error) {
+					unlock_page(pp);
+					put_page(pp);
+					return (error);
+				}
+			}
+
+			ASSERT(PageUptodate(pp) || PageDirty(pp));
+
+			unlock_page(pp);
+
+			void *pb = kmap(pp);
+			error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
+			kunmap(pp);
+
+			if (mapping_writably_mapped(mp))
+				flush_dcache_page(pp);
+
+			mark_page_accessed(pp);
+			put_page(pp);
+		} else {
+			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
+			    uio, bytes);
+		}
+
+		len -= bytes;
+		off = 0;
+
+		if (error)
+			break;
+	}
+
+	return (error);
+}
+#endif /* _KERNEL */
+
+unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
+
+/*
+ * Write the bytes to a file.
+ *
+ *	IN:	zp	- znode of file to be written to
+ *		data	- bytes to write
+ *		len	- number of bytes to write
+ *		pos	- offset to start writing at
+ *
+ *	OUT:	resid	- remaining bytes to write
+ *
+ *	RETURN:	0 if success
+ *		positive error code if failure.  EIO is	returned
+ *		for a short write when residp isn't provided.
+ *
+ * Timestamps:
+ *	zp - ctime|mtime updated if byte count > 0
+ */
+int
+zfs_write_simple(znode_t *zp, const void *data, size_t len,
+    loff_t pos, size_t *residp)
+{
+	fstrans_cookie_t cookie;
+	int error;
+
+	struct iovec iov;
+	iov.iov_base = (void *)data;
+	iov.iov_len = len;
+
+	zfs_uio_t uio;
+	zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
+
+	cookie = spl_fstrans_mark();
+	error = zfs_write(zp, &uio, 0, kcred);
+	spl_fstrans_unmark(cookie);
+
+	if (error == 0) {
+		if (residp != NULL)
+			*residp = zfs_uio_resid(&uio);
+		else if (zfs_uio_resid(&uio) != 0)
+			error = SET_ERROR(EIO);
+	}
+
+	return (error);
+}
+
+static void
+zfs_rele_async_task(void *arg)
+{
+	iput(arg);
+}
+
+void
+zfs_zrele_async(znode_t *zp)
+{
+	struct inode *ip = ZTOI(zp);
+	objset_t *os = ITOZSB(ip)->z_os;
+
+	ASSERT(atomic_read(&ip->i_count) > 0);
+	ASSERT(os != NULL);
+
+	/*
+	 * If decrementing the count would put us at 0, we can't do it inline
+	 * here, because that would be synchronous. Instead, dispatch an iput
+	 * to run later.
+	 *
+	 * For more information on the dangers of a synchronous iput, see the
+	 * header comment of this file.
+	 */
+	if (!atomic_add_unless(&ip->i_count, -1, 1)) {
+		VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
+		    zfs_rele_async_task, ip, TQ_SLEEP) != TASKQID_INVALID);
+	}
+}
+
+
+/*
+ * Lookup an entry in a directory, or an extended attribute directory.
+ * If it exists, return a held inode reference for it.
+ *
+ *	IN:	zdp	- znode of directory to search.
+ *		nm	- name of entry to lookup.
+ *		flags	- LOOKUP_XATTR set if looking for an attribute.
+ *		cr	- credentials of caller.
+ *		direntflags - directory lookup flags
+ *		realpnp - returned pathname.
+ *
+ *	OUT:	zpp	- znode of located entry, NULL if not found.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	NA
+ */
+/* ARGSUSED */
+int
+zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
+    int *direntflags, pathname_t *realpnp)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zdp);
+	int error = 0;
+
+	/*
+	 * Fast path lookup, however we must skip DNLC lookup
+	 * for case folding or normalizing lookups because the
+	 * DNLC code only stores the passed in name.  This means
+	 * creating 'a' and removing 'A' on a case insensitive
+	 * file system would work, but DNLC still thinks 'a'
+	 * exists and won't let you create it again on the next
+	 * pass through fast path.
+	 */
+	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
+
+		if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
+			return (SET_ERROR(ENOTDIR));
+		} else if (zdp->z_sa_hdl == NULL) {
+			return (SET_ERROR(EIO));
+		}
+
+		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
+			error = zfs_fastaccesschk_execute(zdp, cr);
+			if (!error) {
+				*zpp = zdp;
+				zhold(*zpp);
+				return (0);
+			}
+			return (error);
+		}
+	}
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zdp);
+
+	*zpp = NULL;
+
+	if (flags & LOOKUP_XATTR) {
+		/*
+		 * We don't allow recursive attributes..
+		 * Maybe someday we will.
+		 */
+		if (zdp->z_pflags & ZFS_XATTR) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EINVAL));
+		}
+
+		if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+
+		/*
+		 * Do we have permission to get into attribute directory?
+		 */
+
+		if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
+		    B_FALSE, cr))) {
+			zrele(*zpp);
+			*zpp = NULL;
+		}
+
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOTDIR));
+	}
+
+	/*
+	 * Check accessibility of directory.
+	 */
+
+	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
+	if ((error == 0) && (*zpp))
+		zfs_znode_update_vfs(*zpp);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Attempt to create a new entry in a directory.  If the entry
+ * already exists, truncate the file if permissible, else return
+ * an error.  Return the ip of the created or trunc'd file.
+ *
+ *	IN:	dzp	- znode of directory to put new file entry in.
+ *		name	- name of new file entry.
+ *		vap	- attributes of new file.
+ *		excl	- flag indicating exclusive or non-exclusive mode.
+ *		mode	- mode to open file with.
+ *		cr	- credentials of caller.
+ *		flag	- file flag.
+ *		vsecp	- ACL to be set
+ *
+ *	OUT:	zpp	- znode of created or trunc'd entry.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime updated if new entry created
+ *	 zp - ctime|mtime always, atime if new
+ */
+
+/* ARGSUSED */
+int
+zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
+    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	objset_t	*os;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	int		error;
+	uid_t		uid;
+	gid_t		gid;
+	zfs_acl_ids_t   acl_ids;
+	boolean_t	fuid_dirtied;
+	boolean_t	have_acl = B_FALSE;
+	boolean_t	waited = B_FALSE;
+	boolean_t	skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+
+	gid = crgetgid(cr);
+	uid = crgetuid(cr);
+
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	os = zfsvfs->z_os;
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (vap->va_mask & ATTR_XVATTR) {
+		if ((error = secpolicy_xvattr((xvattr_t *)vap,
+		    crgetuid(cr), cr, vap->va_mode)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+top:
+	*zpp = NULL;
+	if (*name == '\0') {
+		/*
+		 * Null component name refers to the directory itself.
+		 */
+		zhold(dzp);
+		zp = dzp;
+		dl = NULL;
+		error = 0;
+	} else {
+		/* possible igrab(zp) */
+		int zflg = 0;
+
+		if (flag & FIGNORECASE)
+			zflg |= ZCILOOK;
+
+		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
+		    NULL, NULL);
+		if (error) {
+			if (have_acl)
+				zfs_acl_ids_free(&acl_ids);
+			if (strcmp(name, "..") == 0)
+				error = SET_ERROR(EISDIR);
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	if (zp == NULL) {
+		uint64_t txtype;
+		uint64_t projid = ZFS_DEFAULT_PROJID;
+
+		/*
+		 * Create a new file object and update the directory
+		 * to reference it.
+		 */
+		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr))) {
+			if (have_acl)
+				zfs_acl_ids_free(&acl_ids);
+			goto out;
+		}
+
+		/*
+		 * We only support the creation of regular files in
+		 * extended attribute directories.
+		 */
+
+		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
+			if (have_acl)
+				zfs_acl_ids_free(&acl_ids);
+			error = SET_ERROR(EINVAL);
+			goto out;
+		}
+
+		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
+		    cr, vsecp, &acl_ids)) != 0)
+			goto out;
+		have_acl = B_TRUE;
+
+		if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
+			projid = zfs_inherit_projid(dzp);
+		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
+			zfs_acl_ids_free(&acl_ids);
+			error = SET_ERROR(EDQUOT);
+			goto out;
+		}
+
+		tx = dmu_tx_create(os);
+
+		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+		    ZFS_SA_BASE_ATTR_SIZE);
+
+		fuid_dirtied = zfsvfs->z_fuid_dirty;
+		if (fuid_dirtied)
+			zfs_fuid_txhold(zfsvfs, tx);
+		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+		if (!zfsvfs->z_use_sa &&
+		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, acl_ids.z_aclp->z_acl_bytes);
+		}
+
+		error = dmu_tx_assign(tx,
+		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+		if (error) {
+			zfs_dirent_unlock(dl);
+			if (error == ERESTART) {
+				waited = B_TRUE;
+				dmu_tx_wait(tx);
+				dmu_tx_abort(tx);
+				goto top;
+			}
+			zfs_acl_ids_free(&acl_ids);
+			dmu_tx_abort(tx);
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+		error = zfs_link_create(dl, zp, tx, ZNEW);
+		if (error != 0) {
+			/*
+			 * Since, we failed to add the directory entry for it,
+			 * delete the newly created dnode.
+			 */
+			zfs_znode_delete(zp, tx);
+			remove_inode_hash(ZTOI(zp));
+			zfs_acl_ids_free(&acl_ids);
+			dmu_tx_commit(tx);
+			goto out;
+		}
+
+		if (fuid_dirtied)
+			zfs_fuid_sync(zfsvfs, tx);
+
+		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
+		if (flag & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
+		    vsecp, acl_ids.z_fuidp, vap);
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_commit(tx);
+	} else {
+		int aflags = (flag & O_APPEND) ? V_APPEND : 0;
+
+		if (have_acl)
+			zfs_acl_ids_free(&acl_ids);
+		have_acl = B_FALSE;
+
+		/*
+		 * A directory entry already exists for this name.
+		 */
+		/*
+		 * Can't truncate an existing file if in exclusive mode.
+		 */
+		if (excl) {
+			error = SET_ERROR(EEXIST);
+			goto out;
+		}
+		/*
+		 * Can't open a directory for writing.
+		 */
+		if (S_ISDIR(ZTOI(zp)->i_mode)) {
+			error = SET_ERROR(EISDIR);
+			goto out;
+		}
+		/*
+		 * Verify requested access to file.
+		 */
+		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
+			goto out;
+		}
+
+		mutex_enter(&dzp->z_lock);
+		dzp->z_seq++;
+		mutex_exit(&dzp->z_lock);
+
+		/*
+		 * Truncate regular files if requested.
+		 */
+		if (S_ISREG(ZTOI(zp)->i_mode) &&
+		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
+			/* we can't hold any locks when calling zfs_freesp() */
+			if (dl) {
+				zfs_dirent_unlock(dl);
+				dl = NULL;
+			}
+			error = zfs_freesp(zp, 0, 0, mode, TRUE);
+		}
+	}
+out:
+
+	if (dl)
+		zfs_dirent_unlock(dl);
+
+	if (error) {
+		if (zp)
+			zrele(zp);
+	} else {
+		zfs_znode_update_vfs(dzp);
+		zfs_znode_update_vfs(zp);
+		*zpp = zp;
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
+    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
+{
+	znode_t		*zp = NULL, *dzp = ITOZ(dip);
+	zfsvfs_t	*zfsvfs = ITOZSB(dip);
+	objset_t	*os;
+	dmu_tx_t	*tx;
+	int		error;
+	uid_t		uid;
+	gid_t		gid;
+	zfs_acl_ids_t   acl_ids;
+	uint64_t	projid = ZFS_DEFAULT_PROJID;
+	boolean_t	fuid_dirtied;
+	boolean_t	have_acl = B_FALSE;
+	boolean_t	waited = B_FALSE;
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+
+	gid = crgetgid(cr);
+	uid = crgetuid(cr);
+
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	os = zfsvfs->z_os;
+
+	if (vap->va_mask & ATTR_XVATTR) {
+		if ((error = secpolicy_xvattr((xvattr_t *)vap,
+		    crgetuid(cr), cr, vap->va_mode)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+top:
+	*ipp = NULL;
+
+	/*
+	 * Create a new file object and update the directory
+	 * to reference it.
+	 */
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		if (have_acl)
+			zfs_acl_ids_free(&acl_ids);
+		goto out;
+	}
+
+	if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
+	    cr, vsecp, &acl_ids)) != 0)
+		goto out;
+	have_acl = B_TRUE;
+
+	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
+		projid = zfs_inherit_projid(dzp);
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
+		zfs_acl_ids_free(&acl_ids);
+		error = SET_ERROR(EDQUOT);
+		goto out;
+	}
+
+	tx = dmu_tx_create(os);
+
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	if (!zfsvfs->z_use_sa &&
+	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+		    0, acl_ids.z_aclp->z_acl_bytes);
+	}
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	/* Add to unlinked set */
+	zp->z_unlinked = B_TRUE;
+	zfs_unlinked_add(zp, tx);
+	zfs_acl_ids_free(&acl_ids);
+	dmu_tx_commit(tx);
+out:
+
+	if (error) {
+		if (zp)
+			zrele(zp);
+	} else {
+		zfs_znode_update_vfs(dzp);
+		zfs_znode_update_vfs(zp);
+		*ipp = ZTOI(zp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Remove an entry from a directory.
+ *
+ *	IN:	dzp	- znode of directory to remove entry from.
+ *		name	- name of entry to remove.
+ *		cr	- credentials of caller.
+ *		flags	- case flags.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime
+ *	 ip - ctime (if nlink > 0)
+ */
+
+uint64_t null_xattr = 0;
+
+/*ARGSUSED*/
+int
+zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
+{
+	znode_t		*zp;
+	znode_t		*xzp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	uint64_t	acl_obj, xattr_obj;
+	uint64_t	xattr_obj_unlinked = 0;
+	uint64_t	obj = 0;
+	uint64_t	links;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	boolean_t	may_delete_now, delete_now = FALSE;
+	boolean_t	unlinked, toobig = FALSE;
+	uint64_t	txtype;
+	pathname_t	*realnmp = NULL;
+	pathname_t	realnm;
+	int		error;
+	int		zflg = ZEXISTS;
+	boolean_t	waited = B_FALSE;
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (flags & FIGNORECASE) {
+		zflg |= ZCILOOK;
+		pn_alloc(&realnm);
+		realnmp = &realnm;
+	}
+
+top:
+	xattr_obj = 0;
+	xzp = NULL;
+	/*
+	 * Attempt to lock directory; fail if entry doesn't exist.
+	 */
+	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
+	    NULL, realnmp))) {
+		if (realnmp)
+			pn_free(realnmp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+		goto out;
+	}
+
+	/*
+	 * Need to use rmdir for removing directories.
+	 */
+	if (S_ISDIR(ZTOI(zp)->i_mode)) {
+		error = SET_ERROR(EPERM);
+		goto out;
+	}
+
+	mutex_enter(&zp->z_lock);
+	may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
+	    !zn_has_cached_data(zp, 0, LLONG_MAX);
+	mutex_exit(&zp->z_lock);
+
+	/*
+	 * We may delete the znode now, or we may put it in the unlinked set;
+	 * it depends on whether we're the last link, and on whether there are
+	 * other holds on the inode.  So we dmu_tx_hold() the right things to
+	 * allow for either case.
+	 */
+	obj = zp->z_id;
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
+	if (may_delete_now) {
+		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
+		/* if the file is too big, only hold_free a token amount */
+		dmu_tx_hold_free(tx, zp->z_id, 0,
+		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
+	}
+
+	/* are there any extended attributes? */
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error == 0 && xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
+		ASSERT0(error);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+	}
+
+	mutex_enter(&zp->z_lock);
+	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
+		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+	mutex_exit(&zp->z_lock);
+
+	/* charge as an update -- would be nice not to charge at all */
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+	/*
+	 * Mark this transaction as typically resulting in a net free of space
+	 */
+	dmu_tx_mark_netfree(tx);
+
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			zrele(zp);
+			if (xzp)
+				zrele(xzp);
+			goto top;
+		}
+		if (realnmp)
+			pn_free(realnmp);
+		dmu_tx_abort(tx);
+		zrele(zp);
+		if (xzp)
+			zrele(xzp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Remove the directory entry.
+	 */
+	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		goto out;
+	}
+
+	if (unlinked) {
+		/*
+		 * Hold z_lock so that we can make sure that the ACL obj
+		 * hasn't changed.  Could have been deleted due to
+		 * zfs_sa_upgrade().
+		 */
+		mutex_enter(&zp->z_lock);
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
+		delete_now = may_delete_now && !toobig &&
+		    atomic_read(&ZTOI(zp)->i_count) == 1 &&
+		    !zn_has_cached_data(zp, 0, LLONG_MAX) &&
+		    xattr_obj == xattr_obj_unlinked &&
+		    zfs_external_acl(zp) == acl_obj;
+	}
+
+	if (delete_now) {
+		if (xattr_obj_unlinked) {
+			ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
+			mutex_enter(&xzp->z_lock);
+			xzp->z_unlinked = B_TRUE;
+			clear_nlink(ZTOI(xzp));
+			links = 0;
+			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+			    &links, sizeof (links), tx);
+			ASSERT3U(error,  ==,  0);
+			mutex_exit(&xzp->z_lock);
+			zfs_unlinked_add(xzp, tx);
+
+			if (zp->z_is_sa)
+				error = sa_remove(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zfsvfs), tx);
+			else
+				error = sa_update(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
+				    sizeof (uint64_t), tx);
+			ASSERT0(error);
+		}
+		/*
+		 * Add to the unlinked set because a new reference could be
+		 * taken concurrently resulting in a deferred destruction.
+		 */
+		zfs_unlinked_add(zp, tx);
+		mutex_exit(&zp->z_lock);
+	} else if (unlinked) {
+		mutex_exit(&zp->z_lock);
+		zfs_unlinked_add(zp, tx);
+	}
+
+	txtype = TX_REMOVE;
+	if (flags & FIGNORECASE)
+		txtype |= TX_CI;
+	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
+
+	dmu_tx_commit(tx);
+out:
+	if (realnmp)
+		pn_free(realnmp);
+
+	zfs_dirent_unlock(dl);
+	zfs_znode_update_vfs(dzp);
+	zfs_znode_update_vfs(zp);
+
+	if (delete_now)
+		zrele(zp);
+	else
+		zfs_zrele_async(zp);
+
+	if (xzp) {
+		zfs_znode_update_vfs(xzp);
+		zfs_zrele_async(xzp);
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Create a new directory and insert it into dzp using the name
+ * provided.  Return a pointer to the inserted directory.
+ *
+ *	IN:	dzp	- znode of directory to add subdir to.
+ *		dirname	- name of new directory.
+ *		vap	- attributes of new directory.
+ *		cr	- credentials of caller.
+ *		flags	- case flags.
+ *		vsecp	- ACL to be set
+ *
+ *	OUT:	zpp	- znode of created directory.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime updated
+ *	zpp - ctime|mtime|atime updated
+ */
+/*ARGSUSED*/
+int
+zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
+    cred_t *cr, int flags, vsecattr_t *vsecp)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*dl;
+	uint64_t	txtype;
+	dmu_tx_t	*tx;
+	int		error;
+	int		zf = ZNEW;
+	uid_t		uid;
+	gid_t		gid = crgetgid(cr);
+	zfs_acl_ids_t   acl_ids;
+	boolean_t	fuid_dirtied;
+	boolean_t	waited = B_FALSE;
+
+	ASSERT(S_ISDIR(vap->va_mode));
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+
+	uid = crgetuid(cr);
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	if (dirname == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (dzp->z_pflags & ZFS_XATTR) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(dirname,
+	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+	if (flags & FIGNORECASE)
+		zf |= ZCILOOK;
+
+	if (vap->va_mask & ATTR_XVATTR) {
+		if ((error = secpolicy_xvattr((xvattr_t *)vap,
+		    crgetuid(cr), cr, vap->va_mode)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
+	    vsecp, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	/*
+	 * First make sure the new directory doesn't exist.
+	 *
+	 * Existence is checked first to make sure we don't return
+	 * EACCES instead of EEXIST which can cause some applications
+	 * to fail.
+	 */
+top:
+	*zpp = NULL;
+
+	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
+	    NULL, NULL))) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	/*
+	 * Add a new entry to the directory.
+	 */
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Create new node.
+	 */
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	/*
+	 * Now put new name in parent dir.
+	 */
+	error = zfs_link_create(dl, zp, tx, ZNEW);
+	if (error != 0) {
+		zfs_znode_delete(zp, tx);
+		remove_inode_hash(ZTOI(zp));
+		goto out;
+	}
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	*zpp = zp;
+
+	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
+	if (flags & FIGNORECASE)
+		txtype |= TX_CI;
+	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
+	    acl_ids.z_fuidp, vap);
+
+out:
+	zfs_acl_ids_free(&acl_ids);
+
+	dmu_tx_commit(tx);
+
+	zfs_dirent_unlock(dl);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	if (error != 0) {
+		zrele(zp);
+	} else {
+		zfs_znode_update_vfs(dzp);
+		zfs_znode_update_vfs(zp);
+	}
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Remove a directory subdir entry.  If the current working
+ * directory is the same as the subdir to be removed, the
+ * remove will fail.
+ *
+ *	IN:	dzp	- znode of directory to remove from.
+ *		name	- name of directory to be removed.
+ *		cwd	- inode of current working directory.
+ *		cr	- credentials of caller.
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
+    int flags)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	int		error;
+	int		zflg = ZEXISTS;
+	boolean_t	waited = B_FALSE;
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (flags & FIGNORECASE)
+		zflg |= ZCILOOK;
+top:
+	zp = NULL;
+
+	/*
+	 * Attempt to lock directory; fail if entry doesn't exist.
+	 */
+	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
+	    NULL, NULL))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+		goto out;
+	}
+
+	if (!S_ISDIR(ZTOI(zp)->i_mode)) {
+		error = SET_ERROR(ENOTDIR);
+		goto out;
+	}
+
+	if (zp == cwd) {
+		error = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	/*
+	 * Grab a lock on the directory to make sure that no one is
+	 * trying to add (or lookup) entries while we are removing it.
+	 */
+	rw_enter(&zp->z_name_lock, RW_WRITER);
+
+	/*
+	 * Grab a lock on the parent pointer to make sure we play well
+	 * with the treewalk and directory rename code.
+	 */
+	rw_enter(&zp->z_parent_lock, RW_WRITER);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		rw_exit(&zp->z_parent_lock);
+		rw_exit(&zp->z_name_lock);
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			zrele(zp);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zrele(zp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
+
+	if (error == 0) {
+		uint64_t txtype = TX_RMDIR;
+		if (flags & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
+		    B_FALSE);
+	}
+
+	dmu_tx_commit(tx);
+
+	rw_exit(&zp->z_parent_lock);
+	rw_exit(&zp->z_name_lock);
+out:
+	zfs_dirent_unlock(dl);
+
+	zfs_znode_update_vfs(dzp);
+	zfs_znode_update_vfs(zp);
+	zrele(zp);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Read directory entries from the given directory cursor position and emit
+ * name and position for each entry.
+ *
+ *	IN:	ip	- inode of directory to read.
+ *		ctx	- directory entry context.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - atime updated
+ *
+ * Note that the low 4 bits of the cookie returned by zap is always zero.
+ * This allows us to use the low range for "special" directory entries:
+ * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
+ * we use the offset 2 for the '.zfs' directory.
+ */
+/* ARGSUSED */
+int
+zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfsvfs_t	*zfsvfs = ITOZSB(ip);
+	objset_t	*os;
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	int		error;
+	uint8_t		prefetch;
+	uint8_t		type;
+	int		done = 0;
+	uint64_t	parent;
+	uint64_t	offset; /* must be unsigned; checks for < 1 */
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent))) != 0)
+		goto out;
+
+	/*
+	 * Quit if directory has been removed (posix)
+	 */
+	if (zp->z_unlinked)
+		goto out;
+
+	error = 0;
+	os = zfsvfs->z_os;
+	offset = ctx->pos;
+	prefetch = zp->z_zn_prefetch;
+
+	/*
+	 * Initialize the iterator cursor.
+	 */
+	if (offset <= 3) {
+		/*
+		 * Start iteration from the beginning of the directory.
+		 */
+		zap_cursor_init(&zc, os, zp->z_id);
+	} else {
+		/*
+		 * The offset is a serialized cursor.
+		 */
+		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
+	}
+
+	/*
+	 * Transform to file-system independent format
+	 */
+	while (!done) {
+		uint64_t objnum;
+		/*
+		 * Special case `.', `..', and `.zfs'.
+		 */
+		if (offset == 0) {
+			(void) strcpy(zap.za_name, ".");
+			zap.za_normalization_conflict = 0;
+			objnum = zp->z_id;
+			type = DT_DIR;
+		} else if (offset == 1) {
+			(void) strcpy(zap.za_name, "..");
+			zap.za_normalization_conflict = 0;
+			objnum = parent;
+			type = DT_DIR;
+		} else if (offset == 2 && zfs_show_ctldir(zp)) {
+			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
+			zap.za_normalization_conflict = 0;
+			objnum = ZFSCTL_INO_ROOT;
+			type = DT_DIR;
+		} else {
+			/*
+			 * Grab next entry.
+			 */
+			if ((error = zap_cursor_retrieve(&zc, &zap))) {
+				if (error == ENOENT)
+					break;
+				else
+					goto update;
+			}
+
+			/*
+			 * Allow multiple entries provided the first entry is
+			 * the object id.  Non-zpl consumers may safely make
+			 * use of the additional space.
+			 *
+			 * XXX: This should be a feature flag for compatibility
+			 */
+			if (zap.za_integer_length != 8 ||
+			    zap.za_num_integers == 0) {
+				cmn_err(CE_WARN, "zap_readdir: bad directory "
+				    "entry, obj = %lld, offset = %lld, "
+				    "length = %d, num = %lld\n",
+				    (u_longlong_t)zp->z_id,
+				    (u_longlong_t)offset,
+				    zap.za_integer_length,
+				    (u_longlong_t)zap.za_num_integers);
+				error = SET_ERROR(ENXIO);
+				goto update;
+			}
+
+			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
+			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
+		}
+
+		done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name),
+		    objnum, type);
+		if (done)
+			break;
+
+		/* Prefetch znode */
+		if (prefetch) {
+			dmu_prefetch(os, objnum, 0, 0, 0,
+			    ZIO_PRIORITY_SYNC_READ);
+		}
+
+		/*
+		 * Move to the next entry, fill in the previous offset.
+		 */
+		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
+			zap_cursor_advance(&zc);
+			offset = zap_cursor_serialize(&zc);
+		} else {
+			offset += 1;
+		}
+		ctx->pos = offset;
+	}
+	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
+
+update:
+	zap_cursor_fini(&zc);
+	if (error == ENOENT)
+		error = 0;
+out:
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Get the basic file attributes and place them in the provided kstat
+ * structure.  The inode is assumed to be the authoritative source
+ * for most of the attributes.  However, the znode currently has the
+ * authoritative atime, blksize, and block count.
+ *
+ *	IN:	ip	- inode of file.
+ *
+ *	OUT:	sp	- kstat values.
+ *
+ *	RETURN:	0 (always succeeds)
+ */
+/* ARGSUSED */
+int
+zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
+{
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	uint32_t blksize;
+	u_longlong_t nblocks;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	mutex_enter(&zp->z_lock);
+
+	zpl_generic_fillattr(user_ns, ip, sp);
+	/*
+	 * +1 link count for root inode with visible '.zfs' directory.
+	 */
+	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
+		if (sp->nlink < ZFS_LINK_MAX)
+			sp->nlink++;
+
+	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
+	sp->blksize = blksize;
+	sp->blocks = nblocks;
+
+	if (unlikely(zp->z_blksz == 0)) {
+		/*
+		 * Block size hasn't been set; suggest maximal I/O transfers.
+		 */
+		sp->blksize = zfsvfs->z_max_blksz;
+	}
+
+	mutex_exit(&zp->z_lock);
+
+	/*
+	 * Required to prevent NFS client from detecting different inode
+	 * numbers of snapshot root dentry before and after snapshot mount.
+	 */
+	if (zfsvfs->z_issnap) {
+		if (ip->i_sb->s_root->d_inode == ip)
+			sp->ino = ZFSCTL_INO_SNAPDIRS -
+			    dmu_objset_id(zfsvfs->z_os);
+	}
+
+	ZFS_EXIT(zfsvfs);
+
+	return (0);
+}
+
+/*
+ * For the operation of changing file's user/group/project, we need to
+ * handle not only the main object that is assigned to the file directly,
+ * but also the ones that are used by the file via hidden xattr directory.
+ *
+ * Because the xattr directory may contains many EA entries, as to it may
+ * be impossible to change all of them via the transaction of changing the
+ * main object's user/group/project attributes. Then we have to change them
+ * via other multiple independent transactions one by one. It may be not good
+ * solution, but we have no better idea yet.
+ */
+static int
+zfs_setattr_dir(znode_t *dzp)
+{
+	struct inode	*dxip = ZTOI(dzp);
+	struct inode	*xip = NULL;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	objset_t	*os = zfsvfs->z_os;
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	zfs_dirlock_t	*dl;
+	znode_t		*zp = NULL;
+	dmu_tx_t	*tx = NULL;
+	uint64_t	uid, gid;
+	sa_bulk_attr_t	bulk[4];
+	int		count;
+	int		err;
+
+	zap_cursor_init(&zc, os, dzp->z_id);
+	while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
+		count = 0;
+		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
+			err = ENXIO;
+			break;
+		}
+
+		err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
+		    ZEXISTS, NULL, NULL);
+		if (err == ENOENT)
+			goto next;
+		if (err)
+			break;
+
+		xip = ZTOI(zp);
+		if (KUID_TO_SUID(xip->i_uid) == KUID_TO_SUID(dxip->i_uid) &&
+		    KGID_TO_SGID(xip->i_gid) == KGID_TO_SGID(dxip->i_gid) &&
+		    zp->z_projid == dzp->z_projid)
+			goto next;
+
+		tx = dmu_tx_create(os);
+		if (!(zp->z_pflags & ZFS_PROJID))
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		else
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+
+		err = dmu_tx_assign(tx, TXG_WAIT);
+		if (err)
+			break;
+
+		mutex_enter(&dzp->z_lock);
+
+		if (KUID_TO_SUID(xip->i_uid) != KUID_TO_SUID(dxip->i_uid)) {
+			xip->i_uid = dxip->i_uid;
+			uid = zfs_uid_read(dxip);
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+			    &uid, sizeof (uid));
+		}
+
+		if (KGID_TO_SGID(xip->i_gid) != KGID_TO_SGID(dxip->i_gid)) {
+			xip->i_gid = dxip->i_gid;
+			gid = zfs_gid_read(dxip);
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+			    &gid, sizeof (gid));
+		}
+
+		if (zp->z_projid != dzp->z_projid) {
+			if (!(zp->z_pflags & ZFS_PROJID)) {
+				zp->z_pflags |= ZFS_PROJID;
+				SA_ADD_BULK_ATTR(bulk, count,
+				    SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
+				    sizeof (zp->z_pflags));
+			}
+
+			zp->z_projid = dzp->z_projid;
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
+			    NULL, &zp->z_projid, sizeof (zp->z_projid));
+		}
+
+		mutex_exit(&dzp->z_lock);
+
+		if (likely(count > 0)) {
+			err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+			dmu_tx_commit(tx);
+		} else {
+			dmu_tx_abort(tx);
+		}
+		tx = NULL;
+		if (err != 0 && err != ENOENT)
+			break;
+
+next:
+		if (zp) {
+			zrele(zp);
+			zp = NULL;
+			zfs_dirent_unlock(dl);
+		}
+		zap_cursor_advance(&zc);
+	}
+
+	if (tx)
+		dmu_tx_abort(tx);
+	if (zp) {
+		zrele(zp);
+		zfs_dirent_unlock(dl);
+	}
+	zap_cursor_fini(&zc);
+
+	return (err == ENOENT ? 0 : err);
+}
+
+/*
+ * Set the file attributes to the values contained in the
+ * vattr structure.
+ *
+ *	IN:	zp	- znode of file to be modified.
+ *		vap	- new attribute values.
+ *			  If ATTR_XVATTR set, then optional attrs are being set
+ *		flags	- ATTR_UTIME set if non-default time values provided.
+ *			- ATTR_NOACLCHECK (CIFS context only).
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - ctime updated, mtime updated if size changed.
+ */
+/* ARGSUSED */
+int
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+{
+	struct inode	*ip;
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	objset_t	*os = zfsvfs->z_os;
+	zilog_t		*zilog;
+	dmu_tx_t	*tx;
+	vattr_t		oldva;
+	xvattr_t	*tmpxvattr;
+	uint_t		mask = vap->va_mask;
+	uint_t		saved_mask = 0;
+	int		trim_mask = 0;
+	uint64_t	new_mode;
+	uint64_t	new_kuid = 0, new_kgid = 0, new_uid, new_gid;
+	uint64_t	xattr_obj;
+	uint64_t	mtime[2], ctime[2], atime[2];
+	uint64_t	projid = ZFS_INVALID_PROJID;
+	znode_t		*attrzp;
+	int		need_policy = FALSE;
+	int		err, err2 = 0;
+	zfs_fuid_info_t *fuidp = NULL;
+	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
+	xoptattr_t	*xoap;
+	zfs_acl_t	*aclp;
+	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	boolean_t	fuid_dirtied = B_FALSE;
+	boolean_t	handle_eadir = B_FALSE;
+	sa_bulk_attr_t	*bulk, *xattr_bulk;
+	int		count = 0, xattr_count = 0, bulks = 8;
+
+	if (mask == 0)
+		return (0);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	ip = ZTOI(zp);
+
+	/*
+	 * If this is a xvattr_t, then get a pointer to the structure of
+	 * optional attributes.  If this is NULL, then we have a vattr_t.
+	 */
+	xoap = xva_getxoptattr(xvap);
+	if (xoap != NULL && (mask & ATTR_XVATTR)) {
+		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
+			if (!dmu_objset_projectquota_enabled(os) ||
+			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
+				ZFS_EXIT(zfsvfs);
+				return (SET_ERROR(ENOTSUP));
+			}
+
+			projid = xoap->xoa_projid;
+			if (unlikely(projid == ZFS_INVALID_PROJID)) {
+				ZFS_EXIT(zfsvfs);
+				return (SET_ERROR(EINVAL));
+			}
+
+			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
+				projid = ZFS_INVALID_PROJID;
+			else
+				need_policy = TRUE;
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
+		    (xoap->xoa_projinherit !=
+		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
+		    (!dmu_objset_projectquota_enabled(os) ||
+		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(ENOTSUP));
+		}
+	}
+
+	zilog = zfsvfs->z_log;
+
+	/*
+	 * Make sure that if we have ephemeral uid/gid or xvattr specified
+	 * that file system is at proper version level
+	 */
+
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
+	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
+	    (mask & ATTR_XVATTR))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EISDIR));
+	}
+
+	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
+	xva_init(tmpxvattr);
+
+	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
+	xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
+
+	/*
+	 * Immutable files can only alter immutable bit and atime
+	 */
+	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
+	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
+	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
+		err = SET_ERROR(EPERM);
+		goto out3;
+	}
+
+	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
+		err = SET_ERROR(EPERM);
+		goto out3;
+	}
+
+	/*
+	 * Verify timestamps doesn't overflow 32 bits.
+	 * ZFS can handle large timestamps, but 32bit syscalls can't
+	 * handle times greater than 2039.  This check should be removed
+	 * once large timestamps are fully supported.
+	 */
+	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
+		if (((mask & ATTR_ATIME) &&
+		    TIMESPEC_OVERFLOW(&vap->va_atime)) ||
+		    ((mask & ATTR_MTIME) &&
+		    TIMESPEC_OVERFLOW(&vap->va_mtime))) {
+			err = SET_ERROR(EOVERFLOW);
+			goto out3;
+		}
+	}
+
+top:
+	attrzp = NULL;
+	aclp = NULL;
+
+	/* Can this be moved to before the top label? */
+	if (zfs_is_readonly(zfsvfs)) {
+		err = SET_ERROR(EROFS);
+		goto out3;
+	}
+
+	/*
+	 * First validate permissions
+	 */
+
+	if (mask & ATTR_SIZE) {
+		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
+		if (err)
+			goto out3;
+
+		/*
+		 * XXX - Note, we are not providing any open
+		 * mode flags here (like FNDELAY), so we may
+		 * block if there are locks present... this
+		 * should be addressed in openat().
+		 */
+		/* XXX - would it be OK to generate a log record here? */
+		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
+		if (err)
+			goto out3;
+	}
+
+	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
+	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
+	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
+	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
+	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
+	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
+	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
+	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
+		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
+		    skipaclchk, cr);
+	}
+
+	if (mask & (ATTR_UID|ATTR_GID)) {
+		int	idmask = (mask & (ATTR_UID|ATTR_GID));
+		int	take_owner;
+		int	take_group;
+
+		/*
+		 * NOTE: even if a new mode is being set,
+		 * we may clear S_ISUID/S_ISGID bits.
+		 */
+
+		if (!(mask & ATTR_MODE))
+			vap->va_mode = zp->z_mode;
+
+		/*
+		 * Take ownership or chgrp to group we are a member of
+		 */
+
+		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
+		take_group = (mask & ATTR_GID) &&
+		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
+
+		/*
+		 * If both ATTR_UID and ATTR_GID are set then take_owner and
+		 * take_group must both be set in order to allow taking
+		 * ownership.
+		 *
+		 * Otherwise, send the check through secpolicy_vnode_setattr()
+		 *
+		 */
+
+		if (((idmask == (ATTR_UID|ATTR_GID)) &&
+		    take_owner && take_group) ||
+		    ((idmask == ATTR_UID) && take_owner) ||
+		    ((idmask == ATTR_GID) && take_group)) {
+			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
+			    skipaclchk, cr) == 0) {
+				/*
+				 * Remove setuid/setgid for non-privileged users
+				 */
+				(void) secpolicy_setid_clear(vap, cr);
+				trim_mask = (mask & (ATTR_UID|ATTR_GID));
+			} else {
+				need_policy =  TRUE;
+			}
+		} else {
+			need_policy =  TRUE;
+		}
+	}
+
+	mutex_enter(&zp->z_lock);
+	oldva.va_mode = zp->z_mode;
+	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
+	if (mask & ATTR_XVATTR) {
+		/*
+		 * Update xvattr mask to include only those attributes
+		 * that are actually changing.
+		 *
+		 * the bits will be restored prior to actually setting
+		 * the attributes so the caller thinks they were set.
+		 */
+		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+			if (xoap->xoa_appendonly !=
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
+				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
+			if (xoap->xoa_projinherit !=
+			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
+				XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+			if (xoap->xoa_nounlink !=
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
+				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+			if (xoap->xoa_immutable !=
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
+				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+			if (xoap->xoa_nodump !=
+			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_NODUMP);
+				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+			if (xoap->xoa_av_modified !=
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
+				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+			if ((!S_ISREG(ip->i_mode) &&
+			    xoap->xoa_av_quarantined) ||
+			    xoap->xoa_av_quarantined !=
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
+				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+			mutex_exit(&zp->z_lock);
+			err = SET_ERROR(EPERM);
+			goto out3;
+		}
+
+		if (need_policy == FALSE &&
+		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
+		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
+			need_policy = TRUE;
+		}
+	}
+
+	mutex_exit(&zp->z_lock);
+
+	if (mask & ATTR_MODE) {
+		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+			err = secpolicy_setid_setsticky_clear(ip, vap,
+			    &oldva, cr);
+			if (err)
+				goto out3;
+
+			trim_mask |= ATTR_MODE;
+		} else {
+			need_policy = TRUE;
+		}
+	}
+
+	if (need_policy) {
+		/*
+		 * If trim_mask is set then take ownership
+		 * has been granted or write_acl is present and user
+		 * has the ability to modify mode.  In that case remove
+		 * UID|GID and or MODE from mask so that
+		 * secpolicy_vnode_setattr() doesn't revoke it.
+		 */
+
+		if (trim_mask) {
+			saved_mask = vap->va_mask;
+			vap->va_mask &= ~trim_mask;
+		}
+		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
+		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
+		if (err)
+			goto out3;
+
+		if (trim_mask)
+			vap->va_mask |= saved_mask;
+	}
+
+	/*
+	 * secpolicy_vnode_setattr, or take ownership may have
+	 * changed va_mask
+	 */
+	mask = vap->va_mask;
+
+	if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
+		handle_eadir = B_TRUE;
+		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr_obj, sizeof (xattr_obj));
+
+		if (err == 0 && xattr_obj) {
+			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
+			if (err)
+				goto out2;
+		}
+		if (mask & ATTR_UID) {
+			new_kuid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
+			if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
+			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
+			    new_kuid)) {
+				if (attrzp)
+					zrele(attrzp);
+				err = SET_ERROR(EDQUOT);
+				goto out2;
+			}
+		}
+
+		if (mask & ATTR_GID) {
+			new_kgid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
+			if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
+			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
+			    new_kgid)) {
+				if (attrzp)
+					zrele(attrzp);
+				err = SET_ERROR(EDQUOT);
+				goto out2;
+			}
+		}
+
+		if (projid != ZFS_INVALID_PROJID &&
+		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
+			if (attrzp)
+				zrele(attrzp);
+			err = EDQUOT;
+			goto out2;
+		}
+	}
+	tx = dmu_tx_create(os);
+
+	if (mask & ATTR_MODE) {
+		uint64_t pmode = zp->z_mode;
+		uint64_t acl_obj;
+		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
+
+		if (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_RESTRICTED &&
+		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
+			err = EPERM;
+			goto out;
+		}
+
+		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
+			goto out;
+
+		mutex_enter(&zp->z_lock);
+		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
+			/*
+			 * Are we upgrading ACL from old V0 format
+			 * to V1 format?
+			 */
+			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
+			    zfs_znode_acl_version(zp) ==
+			    ZFS_ACL_VERSION_INITIAL) {
+				dmu_tx_hold_free(tx, acl_obj, 0,
+				    DMU_OBJECT_END);
+				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+				    0, aclp->z_acl_bytes);
+			} else {
+				dmu_tx_hold_write(tx, acl_obj, 0,
+				    aclp->z_acl_bytes);
+			}
+		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, aclp->z_acl_bytes);
+		}
+		mutex_exit(&zp->z_lock);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	} else {
+		if (((mask & ATTR_XVATTR) &&
+		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
+		    (projid != ZFS_INVALID_PROJID &&
+		    !(zp->z_pflags & ZFS_PROJID)))
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		else
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	}
+
+	if (attrzp) {
+		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
+	}
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err)
+		goto out;
+
+	count = 0;
+	/*
+	 * Set each attribute requested.
+	 * We group settings according to the locks they need to acquire.
+	 *
+	 * Note: you cannot set ctime directly, although it will be
+	 * updated as a side-effect of calling this function.
+	 */
+
+	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
+		/*
+		 * For the existed object that is upgraded from old system,
+		 * its on-disk layout has no slot for the project ID attribute.
+		 * But quota accounting logic needs to access related slots by
+		 * offset directly. So we need to adjust old objects' layout
+		 * to make the project ID to some unified and fixed offset.
+		 */
+		if (attrzp)
+			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
+		if (err == 0)
+			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
+
+		if (unlikely(err == EEXIST))
+			err = 0;
+		else if (err != 0)
+			goto out;
+		else
+			projid = ZFS_INVALID_PROJID;
+	}
+
+	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+		mutex_enter(&zp->z_acl_lock);
+	mutex_enter(&zp->z_lock);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+
+	if (attrzp) {
+		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+			mutex_enter(&attrzp->z_acl_lock);
+		mutex_enter(&attrzp->z_lock);
+		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
+		    sizeof (attrzp->z_pflags));
+		if (projid != ZFS_INVALID_PROJID) {
+			attrzp->z_projid = projid;
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
+			    sizeof (attrzp->z_projid));
+		}
+	}
+
+	if (mask & (ATTR_UID|ATTR_GID)) {
+
+		if (mask & ATTR_UID) {
+			ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
+			new_uid = zfs_uid_read(ZTOI(zp));
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+			    &new_uid, sizeof (new_uid));
+			if (attrzp) {
+				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
+				    sizeof (new_uid));
+				ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
+			}
+		}
+
+		if (mask & ATTR_GID) {
+			ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
+			new_gid = zfs_gid_read(ZTOI(zp));
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
+			    NULL, &new_gid, sizeof (new_gid));
+			if (attrzp) {
+				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
+				    sizeof (new_gid));
+				ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
+			}
+		}
+		if (!(mask & ATTR_MODE)) {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
+			    NULL, &new_mode, sizeof (new_mode));
+			new_mode = zp->z_mode;
+		}
+		err = zfs_acl_chown_setattr(zp);
+		ASSERT(err == 0);
+		if (attrzp) {
+			err = zfs_acl_chown_setattr(attrzp);
+			ASSERT(err == 0);
+		}
+	}
+
+	if (mask & ATTR_MODE) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+		    &new_mode, sizeof (new_mode));
+		zp->z_mode = ZTOI(zp)->i_mode = new_mode;
+		ASSERT3P(aclp, !=, NULL);
+		err = zfs_aclset_common(zp, aclp, cr, tx);
+		ASSERT0(err);
+		if (zp->z_acl_cached)
+			zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = aclp;
+		aclp = NULL;
+	}
+
+	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
+		zp->z_atime_dirty = B_FALSE;
+		ZFS_TIME_ENCODE(&ip->i_atime, atime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+		    &atime, sizeof (atime));
+	}
+
+	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+		ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
+		    vap->va_mtime, ZTOI(zp));
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    mtime, sizeof (mtime));
+	}
+
+	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
+		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
+		ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
+		    ZTOI(zp));
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    ctime, sizeof (ctime));
+	}
+
+	if (projid != ZFS_INVALID_PROJID) {
+		zp->z_projid = projid;
+		SA_ADD_BULK_ATTR(bulk, count,
+		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
+		    sizeof (zp->z_projid));
+	}
+
+	if (attrzp && mask) {
+		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+		    SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
+		    sizeof (ctime));
+	}
+
+	/*
+	 * Do this after setting timestamps to prevent timestamp
+	 * update from toggling bit
+	 */
+
+	if (xoap && (mask & ATTR_XVATTR)) {
+
+		/*
+		 * restore trimmed off masks
+		 * so that return masks can be set for caller.
+		 */
+
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
+			XVA_SET_REQ(xvap, XAT_APPENDONLY);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
+			XVA_SET_REQ(xvap, XAT_NOUNLINK);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
+			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
+			XVA_SET_REQ(xvap, XAT_NODUMP);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
+			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
+			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
+			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
+			ASSERT(S_ISREG(ip->i_mode));
+
+		zfs_xvattr_set(zp, xvap, tx);
+	}
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	if (mask != 0)
+		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
+
+	mutex_exit(&zp->z_lock);
+	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+		mutex_exit(&zp->z_acl_lock);
+
+	if (attrzp) {
+		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+			mutex_exit(&attrzp->z_acl_lock);
+		mutex_exit(&attrzp->z_lock);
+	}
+out:
+	if (err == 0 && xattr_count > 0) {
+		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
+		    xattr_count, tx);
+		ASSERT(err2 == 0);
+	}
+
+	if (aclp)
+		zfs_acl_free(aclp);
+
+	if (fuidp) {
+		zfs_fuid_info_free(fuidp);
+		fuidp = NULL;
+	}
+
+	if (err) {
+		dmu_tx_abort(tx);
+		if (attrzp)
+			zrele(attrzp);
+		if (err == ERESTART)
+			goto top;
+	} else {
+		if (count > 0)
+			err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		dmu_tx_commit(tx);
+		if (attrzp) {
+			if (err2 == 0 && handle_eadir)
+				err2 = zfs_setattr_dir(attrzp);
+			zrele(attrzp);
+		}
+		zfs_znode_update_vfs(zp);
+	}
+
+out2:
+	if (os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+out3:
+	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
+	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
+	kmem_free(tmpxvattr, sizeof (xvattr_t));
+	ZFS_EXIT(zfsvfs);
+	return (err);
+}
+
+typedef struct zfs_zlock {
+	krwlock_t	*zl_rwlock;	/* lock we acquired */
+	znode_t		*zl_znode;	/* znode we held */
+	struct zfs_zlock *zl_next;	/* next in list */
+} zfs_zlock_t;
+
+/*
+ * Drop locks and release vnodes that were held by zfs_rename_lock().
+ */
+static void
+zfs_rename_unlock(zfs_zlock_t **zlpp)
+{
+	zfs_zlock_t *zl;
+
+	while ((zl = *zlpp) != NULL) {
+		if (zl->zl_znode != NULL)
+			zfs_zrele_async(zl->zl_znode);
+		rw_exit(zl->zl_rwlock);
+		*zlpp = zl->zl_next;
+		kmem_free(zl, sizeof (*zl));
+	}
+}
+
+/*
+ * Search back through the directory tree, using the ".." entries.
+ * Lock each directory in the chain to prevent concurrent renames.
+ * Fail any attempt to move a directory into one of its own descendants.
+ * XXX - z_parent_lock can overlap with map or grow locks
+ */
+static int
+zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
+{
+	zfs_zlock_t	*zl;
+	znode_t		*zp = tdzp;
+	uint64_t	rootid = ZTOZSB(zp)->z_root;
+	uint64_t	oidp = zp->z_id;
+	krwlock_t	*rwlp = &szp->z_parent_lock;
+	krw_t		rw = RW_WRITER;
+
+	/*
+	 * First pass write-locks szp and compares to zp->z_id.
+	 * Later passes read-lock zp and compare to zp->z_parent.
+	 */
+	do {
+		if (!rw_tryenter(rwlp, rw)) {
+			/*
+			 * Another thread is renaming in this path.
+			 * Note that if we are a WRITER, we don't have any
+			 * parent_locks held yet.
+			 */
+			if (rw == RW_READER && zp->z_id > szp->z_id) {
+				/*
+				 * Drop our locks and restart
+				 */
+				zfs_rename_unlock(&zl);
+				*zlpp = NULL;
+				zp = tdzp;
+				oidp = zp->z_id;
+				rwlp = &szp->z_parent_lock;
+				rw = RW_WRITER;
+				continue;
+			} else {
+				/*
+				 * Wait for other thread to drop its locks
+				 */
+				rw_enter(rwlp, rw);
+			}
+		}
+
+		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
+		zl->zl_rwlock = rwlp;
+		zl->zl_znode = NULL;
+		zl->zl_next = *zlpp;
+		*zlpp = zl;
+
+		if (oidp == szp->z_id)		/* We're a descendant of szp */
+			return (SET_ERROR(EINVAL));
+
+		if (oidp == rootid)		/* We've hit the top */
+			return (0);
+
+		if (rw == RW_READER) {		/* i.e. not the first pass */
+			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
+			if (error)
+				return (error);
+			zl->zl_znode = zp;
+		}
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
+		    &oidp, sizeof (oidp));
+		rwlp = &zp->z_parent_lock;
+		rw = RW_READER;
+
+	} while (zp->z_id != sdzp->z_id);
+
+	return (0);
+}
+
+/*
+ * Move an entry from the provided source directory to the target
+ * directory.  Change the entry name as indicated.
+ *
+ *	IN:	sdzp	- Source directory containing the "old entry".
+ *		snm	- Old entry name.
+ *		tdzp	- Target directory to contain the "new entry".
+ *		tnm	- New entry name.
+ *		cr	- credentials of caller.
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	sdzp,tdzp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
+    cred_t *cr, int flags)
+{
+	znode_t		*szp, *tzp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(sdzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*sdl, *tdl;
+	dmu_tx_t	*tx;
+	zfs_zlock_t	*zl;
+	int		cmp, serr, terr;
+	int		error = 0;
+	int		zflg = 0;
+	boolean_t	waited = B_FALSE;
+
+	if (snm == NULL || tnm == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(sdzp);
+	zilog = zfsvfs->z_log;
+
+	ZFS_VERIFY_ZP(tdzp);
+
+	/*
+	 * We check i_sb because snapshots and the ctldir must have different
+	 * super blocks.
+	 */
+	if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
+	    zfsctl_is_node(ZTOI(tdzp))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EXDEV));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(tnm,
+	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (flags & FIGNORECASE)
+		zflg |= ZCILOOK;
+
+top:
+	szp = NULL;
+	tzp = NULL;
+	zl = NULL;
+
+	/*
+	 * This is to prevent the creation of links into attribute space
+	 * by renaming a linked file into/outof an attribute directory.
+	 * See the comment in zfs_link() for why this is considered bad.
+	 */
+	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Lock source and target directory entries.  To prevent deadlock,
+	 * a lock ordering must be defined.  We lock the directory with
+	 * the smallest object id first, or if it's a tie, the one with
+	 * the lexically first name.
+	 */
+	if (sdzp->z_id < tdzp->z_id) {
+		cmp = -1;
+	} else if (sdzp->z_id > tdzp->z_id) {
+		cmp = 1;
+	} else {
+		/*
+		 * First compare the two name arguments without
+		 * considering any case folding.
+		 */
+		int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
+
+		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
+		ASSERT(error == 0 || !zfsvfs->z_utf8);
+		if (cmp == 0) {
+			/*
+			 * POSIX: "If the old argument and the new argument
+			 * both refer to links to the same existing file,
+			 * the rename() function shall return successfully
+			 * and perform no other action."
+			 */
+			ZFS_EXIT(zfsvfs);
+			return (0);
+		}
+		/*
+		 * If the file system is case-folding, then we may
+		 * have some more checking to do.  A case-folding file
+		 * system is either supporting mixed case sensitivity
+		 * access or is completely case-insensitive.  Note
+		 * that the file system is always case preserving.
+		 *
+		 * In mixed sensitivity mode case sensitive behavior
+		 * is the default.  FIGNORECASE must be used to
+		 * explicitly request case insensitive behavior.
+		 *
+		 * If the source and target names provided differ only
+		 * by case (e.g., a request to rename 'tim' to 'Tim'),
+		 * we will treat this as a special case in the
+		 * case-insensitive mode: as long as the source name
+		 * is an exact match, we will allow this to proceed as
+		 * a name-change request.
+		 */
+		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+		    (zfsvfs->z_case == ZFS_CASE_MIXED &&
+		    flags & FIGNORECASE)) &&
+		    u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
+		    &error) == 0) {
+			/*
+			 * case preserving rename request, require exact
+			 * name matches
+			 */
+			zflg |= ZCIEXACT;
+			zflg &= ~ZCILOOK;
+		}
+	}
+
+	/*
+	 * If the source and destination directories are the same, we should
+	 * grab the z_name_lock of that directory only once.
+	 */
+	if (sdzp == tdzp) {
+		zflg |= ZHAVELOCK;
+		rw_enter(&sdzp->z_name_lock, RW_READER);
+	}
+
+	if (cmp < 0) {
+		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
+		    ZEXISTS | zflg, NULL, NULL);
+		terr = zfs_dirent_lock(&tdl,
+		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
+	} else {
+		terr = zfs_dirent_lock(&tdl,
+		    tdzp, tnm, &tzp, zflg, NULL, NULL);
+		serr = zfs_dirent_lock(&sdl,
+		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
+		    NULL, NULL);
+	}
+
+	if (serr) {
+		/*
+		 * Source entry invalid or not there.
+		 */
+		if (!terr) {
+			zfs_dirent_unlock(tdl);
+			if (tzp)
+				zrele(tzp);
+		}
+
+		if (sdzp == tdzp)
+			rw_exit(&sdzp->z_name_lock);
+
+		if (strcmp(snm, "..") == 0)
+			serr = EINVAL;
+		ZFS_EXIT(zfsvfs);
+		return (serr);
+	}
+	if (terr) {
+		zfs_dirent_unlock(sdl);
+		zrele(szp);
+
+		if (sdzp == tdzp)
+			rw_exit(&sdzp->z_name_lock);
+
+		if (strcmp(tnm, "..") == 0)
+			terr = EINVAL;
+		ZFS_EXIT(zfsvfs);
+		return (terr);
+	}
+
+	/*
+	 * If we are using project inheritance, means if the directory has
+	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
+	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
+	 * such case, we only allow renames into our tree when the project
+	 * IDs are the same.
+	 */
+	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
+	    tdzp->z_projid != szp->z_projid) {
+		error = SET_ERROR(EXDEV);
+		goto out;
+	}
+
+	/*
+	 * Must have write access at the source to remove the old entry
+	 * and write access at the target to create the new entry.
+	 * Note that if target and source are the same, this can be
+	 * done in a single check.
+	 */
+
+	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+		goto out;
+
+	if (S_ISDIR(ZTOI(szp)->i_mode)) {
+		/*
+		 * Check to make sure rename is valid.
+		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
+		 */
+		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
+			goto out;
+	}
+
+	/*
+	 * Does target exist?
+	 */
+	if (tzp) {
+		/*
+		 * Source and target must be the same type.
+		 */
+		if (S_ISDIR(ZTOI(szp)->i_mode)) {
+			if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
+				error = SET_ERROR(ENOTDIR);
+				goto out;
+			}
+		} else {
+			if (S_ISDIR(ZTOI(tzp)->i_mode)) {
+				error = SET_ERROR(EISDIR);
+				goto out;
+			}
+		}
+		/*
+		 * POSIX dictates that when the source and target
+		 * entries refer to the same file object, rename
+		 * must do nothing and exit without error.
+		 */
+		if (szp->z_id == tzp->z_id) {
+			error = 0;
+			goto out;
+		}
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
+	if (sdzp != tdzp) {
+		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tdzp);
+	}
+	if (tzp) {
+		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tzp);
+	}
+
+	zfs_sa_upgrade_txholds(tx, szp);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		if (zl != NULL)
+			zfs_rename_unlock(&zl);
+		zfs_dirent_unlock(sdl);
+		zfs_dirent_unlock(tdl);
+
+		if (sdzp == tdzp)
+			rw_exit(&sdzp->z_name_lock);
+
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			zrele(szp);
+			if (tzp)
+				zrele(tzp);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zrele(szp);
+		if (tzp)
+			zrele(tzp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (tzp)	/* Attempt to remove the existing target */
+		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+
+	if (error == 0) {
+		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+		if (error == 0) {
+			szp->z_pflags |= ZFS_AV_MODIFIED;
+			if (tdzp->z_pflags & ZFS_PROJINHERIT)
+				szp->z_pflags |= ZFS_PROJINHERIT;
+
+			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+			ASSERT0(error);
+
+			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+			if (error == 0) {
+				zfs_log_rename(zilog, tx, TX_RENAME |
+				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
+				    sdl->dl_name, tdzp, tdl->dl_name, szp);
+			} else {
+				/*
+				 * At this point, we have successfully created
+				 * the target name, but have failed to remove
+				 * the source name.  Since the create was done
+				 * with the ZRENAMING flag, there are
+				 * complications; for one, the link count is
+				 * wrong.  The easiest way to deal with this
+				 * is to remove the newly created target, and
+				 * return the original error.  This must
+				 * succeed; fortunately, it is very unlikely to
+				 * fail, since we just created it.
+				 */
+				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
+				    ZRENAMING, NULL), ==, 0);
+			}
+		} else {
+			/*
+			 * If we had removed the existing target, subsequent
+			 * call to zfs_link_create() to add back the same entry
+			 * but, the new dnode (szp) should not fail.
+			 */
+			ASSERT(tzp == NULL);
+		}
+	}
+
+	dmu_tx_commit(tx);
+out:
+	if (zl != NULL)
+		zfs_rename_unlock(&zl);
+
+	zfs_dirent_unlock(sdl);
+	zfs_dirent_unlock(tdl);
+
+	zfs_znode_update_vfs(sdzp);
+	if (sdzp == tdzp)
+		rw_exit(&sdzp->z_name_lock);
+
+	if (sdzp != tdzp)
+		zfs_znode_update_vfs(tdzp);
+
+	zfs_znode_update_vfs(szp);
+	zrele(szp);
+	if (tzp) {
+		zfs_znode_update_vfs(tzp);
+		zrele(tzp);
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Insert the indicated symbolic reference entry into the directory.
+ *
+ *	IN:	dzp	- Directory to contain new symbolic link.
+ *		name	- Name of directory entry in dip.
+ *		vap	- Attributes of new entry.
+ *		link	- Name for new symlink entry.
+ *		cr	- credentials of caller.
+ *		flags	- case flags
+ *
+ *	OUT:	zpp	- Znode for new symbolic link.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dip - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
+    znode_t **zpp, cred_t *cr, int flags)
+{
+	znode_t		*zp;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	uint64_t	len = strlen(link);
+	int		error;
+	int		zflg = ZNEW;
+	zfs_acl_ids_t	acl_ids;
+	boolean_t	fuid_dirtied;
+	uint64_t	txtype = TX_SYMLINK;
+	boolean_t	waited = B_FALSE;
+
+	ASSERT(S_ISLNK(vap->va_mode));
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+	if (flags & FIGNORECASE)
+		zflg |= ZCILOOK;
+
+	if (len > MAXPATHLEN) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENAMETOOLONG));
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0,
+	    vap, cr, NULL, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+top:
+	*zpp = NULL;
+
+	/*
+	 * Attempt to lock directory; fail if entry already exists.
+	 */
+	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EDQUOT));
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE + len);
+	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Create a new object for the symlink.
+	 * for version 4 ZPL datasets the symlink will be an SA attribute
+	 */
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_is_sa)
+		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
+		    link, len, tx);
+	else
+		zfs_sa_symlink(zp, link, len, tx);
+	mutex_exit(&zp->z_lock);
+
+	zp->z_size = len;
+	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx);
+	/*
+	 * Insert the new object into the directory.
+	 */
+	error = zfs_link_create(dl, zp, tx, ZNEW);
+	if (error != 0) {
+		zfs_znode_delete(zp, tx);
+		remove_inode_hash(ZTOI(zp));
+	} else {
+		if (flags & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
+
+		zfs_znode_update_vfs(dzp);
+		zfs_znode_update_vfs(zp);
+	}
+
+	zfs_acl_ids_free(&acl_ids);
+
+	dmu_tx_commit(tx);
+
+	zfs_dirent_unlock(dl);
+
+	if (error == 0) {
+		*zpp = zp;
+
+		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+			zil_commit(zilog, 0);
+	} else {
+		zrele(zp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Return, in the buffer contained in the provided uio structure,
+ * the symbolic path referred to by ip.
+ *
+ *	IN:	ip	- inode of symbolic link
+ *		uio	- structure to contain the link path.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - atime updated
+ */
+/* ARGSUSED */
+int
+zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfsvfs_t	*zfsvfs = ITOZSB(ip);
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_is_sa)
+		error = sa_lookup_uio(zp->z_sa_hdl,
+		    SA_ZPL_SYMLINK(zfsvfs), uio);
+	else
+		error = zfs_sa_readlink(zp, uio);
+	mutex_exit(&zp->z_lock);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Insert a new entry into directory tdzp referencing szp.
+ *
+ *	IN:	tdzp	- Directory to contain new entry.
+ *		szp	- znode of new entry.
+ *		name	- name of new entry.
+ *		cr	- credentials of caller.
+ *		flags	- case flags.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	tdzp - ctime|mtime updated
+ *	 szp - ctime updated
+ */
+/* ARGSUSED */
+int
+zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
+    int flags)
+{
+	struct inode *sip = ZTOI(szp);
+	znode_t		*tzp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(tdzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	int		error;
+	int		zf = ZNEW;
+	uint64_t	parent;
+	uid_t		owner;
+	boolean_t	waited = B_FALSE;
+	boolean_t	is_tmpfile = 0;
+	uint64_t	txg;
+#ifdef HAVE_TMPFILE
+	is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
+#endif
+	ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode));
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(tdzp);
+	zilog = zfsvfs->z_log;
+
+	/*
+	 * POSIX dictates that we return EPERM here.
+	 * Better choices include ENOTSUP or EISDIR.
+	 */
+	if (S_ISDIR(sip->i_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	ZFS_VERIFY_ZP(szp);
+
+	/*
+	 * If we are using project inheritance, means if the directory has
+	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
+	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
+	 * such case, we only allow hard link creation in our tree when the
+	 * project IDs are the same.
+	 */
+	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
+	    tdzp->z_projid != szp->z_projid) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EXDEV));
+	}
+
+	/*
+	 * We check i_sb because snapshots and the ctldir must have different
+	 * super blocks.
+	 */
+	if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EXDEV));
+	}
+
+	/* Prevent links to .zfs/shares files */
+
+	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (uint64_t))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	if (parent == zfsvfs->z_shares_dir) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(name,
+	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+	if (flags & FIGNORECASE)
+		zf |= ZCILOOK;
+
+	/*
+	 * We do not support links between attributes and non-attributes
+	 * because of the potential security risk of creating links
+	 * into "normal" file space in order to circumvent restrictions
+	 * imposed in attribute space.
+	 */
+	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
+	    cr, ZFS_OWNER);
+	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+top:
+	/*
+	 * Attempt to lock directory; fail if entry already exists.
+	 */
+	error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
+	if (error) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
+	if (is_tmpfile)
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+	zfs_sa_upgrade_txholds(tx, szp);
+	zfs_sa_upgrade_txholds(tx, tdzp);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	/* unmark z_unlinked so zfs_link_create will not reject */
+	if (is_tmpfile)
+		szp->z_unlinked = B_FALSE;
+	error = zfs_link_create(dl, szp, tx, 0);
+
+	if (error == 0) {
+		uint64_t txtype = TX_LINK;
+		/*
+		 * tmpfile is created to be in z_unlinkedobj, so remove it.
+		 * Also, we don't log in ZIL, because all previous file
+		 * operation on the tmpfile are ignored by ZIL. Instead we
+		 * always wait for txg to sync to make sure all previous
+		 * operation are sync safe.
+		 */
+		if (is_tmpfile) {
+			VERIFY(zap_remove_int(zfsvfs->z_os,
+			    zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
+		} else {
+			if (flags & FIGNORECASE)
+				txtype |= TX_CI;
+			zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
+		}
+	} else if (is_tmpfile) {
+		/* restore z_unlinked since when linking failed */
+		szp->z_unlinked = B_TRUE;
+	}
+	txg = dmu_tx_get_txg(tx);
+	dmu_tx_commit(tx);
+
+	zfs_dirent_unlock(dl);
+
+	if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
+		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
+
+	zfs_znode_update_vfs(tdzp);
+	zfs_znode_update_vfs(szp);
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+static void
+zfs_putpage_sync_commit_cb(void *arg)
+{
+	struct page *pp = arg;
+
+	ClearPageError(pp);
+	end_page_writeback(pp);
+}
+
+static void
+zfs_putpage_async_commit_cb(void *arg)
+{
+	struct page *pp = arg;
+	znode_t *zp = ITOZ(pp->mapping->host);
+
+	ClearPageError(pp);
+	end_page_writeback(pp);
+	atomic_dec_32(&zp->z_async_writes_cnt);
+}
+
+/*
+ * Push a page out to disk, once the page is on stable storage the
+ * registered commit callback will be run as notification of completion.
+ *
+ *	IN:	ip	 - page mapped for inode.
+ *		pp	 - page to push (page is locked)
+ *		wbc	 - writeback control data
+ *		for_sync - does the caller intend to wait synchronously for the
+ *			   page writeback to complete?
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - ctime|mtime updated
+ */
+/* ARGSUSED */
+int
+zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
+    boolean_t for_sync)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfsvfs_t	*zfsvfs = ITOZSB(ip);
+	loff_t		offset;
+	loff_t		pgoff;
+	unsigned int	pglen;
+	dmu_tx_t	*tx;
+	caddr_t		va;
+	int		err = 0;
+	uint64_t	mtime[2], ctime[2];
+	sa_bulk_attr_t	bulk[3];
+	int		cnt = 0;
+	struct address_space *mapping;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	ASSERT(PageLocked(pp));
+
+	pgoff = page_offset(pp);	/* Page byte-offset in file */
+	offset = i_size_read(ip);	/* File length in bytes */
+	pglen = MIN(PAGE_SIZE,		/* Page length in bytes */
+	    P2ROUNDUP(offset, PAGE_SIZE)-pgoff);
+
+	/* Page is beyond end of file */
+	if (pgoff >= offset) {
+		unlock_page(pp);
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	/* Truncate page length to end of file */
+	if (pgoff + pglen > offset)
+		pglen = offset - pgoff;
+
+#if 0
+	/*
+	 * FIXME: Allow mmap writes past its quota.  The correct fix
+	 * is to register a page_mkwrite() handler to count the page
+	 * against its quota when it is about to be dirtied.
+	 */
+	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
+	    KUID_TO_SUID(ip->i_uid)) ||
+	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
+	    KGID_TO_SGID(ip->i_gid)) ||
+	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
+	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
+	    zp->z_projid))) {
+		err = EDQUOT;
+	}
+#endif
+
+	/*
+	 * The ordering here is critical and must adhere to the following
+	 * rules in order to avoid deadlocking in either zfs_read() or
+	 * zfs_free_range() due to a lock inversion.
+	 *
+	 * 1) The page must be unlocked prior to acquiring the range lock.
+	 *    This is critical because zfs_read() calls find_lock_page()
+	 *    which may block on the page lock while holding the range lock.
+	 *
+	 * 2) Before setting or clearing write back on a page the range lock
+	 *    must be held in order to prevent a lock inversion with the
+	 *    zfs_free_range() function.
+	 *
+	 * This presents a problem because upon entering this function the
+	 * page lock is already held.  To safely acquire the range lock the
+	 * page lock must be dropped.  This creates a window where another
+	 * process could truncate, invalidate, dirty, or write out the page.
+	 *
+	 * Therefore, after successfully reacquiring the range and page locks
+	 * the current page state is checked.  In the common case everything
+	 * will be as is expected and it can be written out.  However, if
+	 * the page state has changed it must be handled accordingly.
+	 */
+	mapping = pp->mapping;
+	redirty_page_for_writepage(wbc, pp);
+	unlock_page(pp);
+
+	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
+	    pgoff, pglen, RL_WRITER);
+	lock_page(pp);
+
+	/* Page mapping changed or it was no longer dirty, we're done */
+	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
+		unlock_page(pp);
+		zfs_rangelock_exit(lr);
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	/* Another process started write block if required */
+	if (PageWriteback(pp)) {
+		unlock_page(pp);
+		zfs_rangelock_exit(lr);
+
+		if (wbc->sync_mode != WB_SYNC_NONE) {
+			/*
+			 * Speed up any non-sync page writebacks since
+			 * they may take several seconds to complete.
+			 * Refer to the comment in zpl_fsync() (when
+			 * HAVE_FSYNC_RANGE is defined) for details.
+			 */
+			if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+				zil_commit(zfsvfs->z_log, zp->z_id);
+			}
+
+			if (PageWriteback(pp))
+#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
+				folio_wait_bit(page_folio(pp), PG_writeback);
+#else
+				wait_on_page_bit(pp, PG_writeback);
+#endif
+		}
+
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	/* Clear the dirty flag the required locks are held */
+	if (!clear_page_dirty_for_io(pp)) {
+		unlock_page(pp);
+		zfs_rangelock_exit(lr);
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+	/*
+	 * Counterpart for redirty_page_for_writepage() above.  This page
+	 * was in fact not skipped and should not be counted as if it were.
+	 */
+	wbc->pages_skipped--;
+	if (!for_sync)
+		atomic_inc_32(&zp->z_async_writes_cnt);
+	set_page_writeback(pp);
+	unlock_page(pp);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+
+	err = dmu_tx_assign(tx, TXG_NOWAIT);
+	if (err != 0) {
+		if (err == ERESTART)
+			dmu_tx_wait(tx);
+
+		dmu_tx_abort(tx);
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+		filemap_dirty_folio(page_mapping(pp), page_folio(pp));
+#else
+		__set_page_dirty_nobuffers(pp);
+#endif
+		ClearPageError(pp);
+		end_page_writeback(pp);
+		if (!for_sync)
+			atomic_dec_32(&zp->z_async_writes_cnt);
+		zfs_rangelock_exit(lr);
+		ZFS_EXIT(zfsvfs);
+		return (err);
+	}
+
+	va = kmap(pp);
+	ASSERT3U(pglen, <=, PAGE_SIZE);
+	dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
+	kunmap(pp);
+
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+
+	/* Preserve the mtime and ctime provided by the inode */
+	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
+	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+	zp->z_atime_dirty = B_FALSE;
+	zp->z_seq++;
+
+	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
+
+	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
+	    for_sync ? zfs_putpage_sync_commit_cb :
+	    zfs_putpage_async_commit_cb, pp);
+
+	dmu_tx_commit(tx);
+
+	zfs_rangelock_exit(lr);
+
+	if (wbc->sync_mode != WB_SYNC_NONE) {
+		/*
+		 * Note that this is rarely called under writepages(), because
+		 * writepages() normally handles the entire commit for
+		 * performance reasons.
+		 */
+		zil_commit(zfsvfs->z_log, zp->z_id);
+	} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
+		/*
+		 * If the caller does not intend to wait synchronously
+		 * for this page writeback to complete and there are active
+		 * synchronous calls on this file, do a commit so that
+		 * the latter don't accidentally end up waiting for
+		 * our writeback to complete. Refer to the comment in
+		 * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
+		 */
+		zil_commit(zfsvfs->z_log, zp->z_id);
+	}
+
+	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
+
+	ZFS_EXIT(zfsvfs);
+	return (err);
+}
+
+/*
+ * Update the system attributes when the inode has been dirtied.  For the
+ * moment we only update the mode, atime, mtime, and ctime.
+ */
+int
+zfs_dirty_inode(struct inode *ip, int flags)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfsvfs_t	*zfsvfs = ITOZSB(ip);
+	dmu_tx_t	*tx;
+	uint64_t	mode, atime[2], mtime[2], ctime[2];
+	sa_bulk_attr_t	bulk[4];
+	int		error = 0;
+	int		cnt = 0;
+
+	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
+		return (0);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+#ifdef I_DIRTY_TIME
+	/*
+	 * This is the lazytime semantic introduced in Linux 4.0
+	 * This flag will only be called from update_time when lazytime is set.
+	 * (Note, I_DIRTY_SYNC will also set if not lazytime)
+	 * Fortunately mtime and ctime are managed within ZFS itself, so we
+	 * only need to dirty atime.
+	 */
+	if (flags == I_DIRTY_TIME) {
+		zp->z_atime_dirty = B_TRUE;
+		goto out;
+	}
+#endif
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
+	mutex_enter(&zp->z_lock);
+	zp->z_atime_dirty = B_FALSE;
+
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+
+	/* Preserve the mode, mtime and ctime provided by the inode */
+	ZFS_TIME_ENCODE(&ip->i_atime, atime);
+	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
+	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+	mode = ip->i_mode;
+
+	zp->z_mode = mode;
+
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
+	mutex_exit(&zp->z_lock);
+
+	dmu_tx_commit(tx);
+out:
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*ARGSUSED*/
+void
+zfs_inactive(struct inode *ip)
+{
+	znode_t	*zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	uint64_t atime[2];
+	int error;
+	int need_unlock = 0;
+
+	/* Only read lock if we haven't already write locked, e.g. rollback */
+	if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
+		need_unlock = 1;
+		rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+	}
+	if (zp->z_sa_hdl == NULL) {
+		if (need_unlock)
+			rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		return;
+	}
+
+	if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) {
+		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
+
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, zp);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+		} else {
+			ZFS_TIME_ENCODE(&ip->i_atime, atime);
+			mutex_enter(&zp->z_lock);
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
+			    (void *)&atime, sizeof (atime), tx);
+			zp->z_atime_dirty = B_FALSE;
+			mutex_exit(&zp->z_lock);
+			dmu_tx_commit(tx);
+		}
+	}
+
+	zfs_zinactive(zp);
+	if (need_unlock)
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+}
+
+/*
+ * Fill pages with data from the disk.
+ */
+static int
+zfs_fillpage(struct inode *ip, struct page *pp)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	loff_t i_size = i_size_read(ip);
+	u_offset_t io_off = page_offset(pp);
+	size_t io_len = PAGE_SIZE;
+
+	ASSERT3U(io_off, <, i_size);
+
+	if (io_off + io_len > i_size)
+		io_len = i_size - io_off;
+
+	void *va = kmap(pp);
+	int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+	    io_len, va, DMU_READ_PREFETCH);
+	if (io_len != PAGE_SIZE)
+		memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
+	kunmap(pp);
+
+	if (error) {
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = SET_ERROR(EIO);
+
+		SetPageError(pp);
+		ClearPageUptodate(pp);
+	} else {
+		ClearPageError(pp);
+		SetPageUptodate(pp);
+	}
+
+	return (error);
+}
+
+/*
+ * Uses zfs_fillpage to read data from the file and fill the page.
+ *
+ *	IN:	ip	 - inode of file to get data from.
+ *		pp	 - page to read
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	vp - atime updated
+ */
+/* ARGSUSED */
+int
+zfs_getpage(struct inode *ip, struct page *pp)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	znode_t *zp = ITOZ(ip);
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	error = zfs_fillpage(ip, pp);
+	if (error == 0)
+		dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+/*
+ * Check ZFS specific permissions to memory map a section of a file.
+ *
+ *	IN:	ip	- inode of the file to mmap
+ *		off	- file offset
+ *		addrp	- start address in memory region
+ *		len	- length of memory region
+ *		vm_flags- address flags
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ */
+/*ARGSUSED*/
+int
+zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
+    unsigned long vm_flags)
+{
+	znode_t  *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
+	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if ((vm_flags & (VM_READ | VM_EXEC)) &&
+	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EACCES));
+	}
+
+	if (off < 0 || len > MAXOFFSET_T - off) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENXIO));
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/*
+ * Free or allocate space in a file.  Currently, this function only
+ * supports the `F_FREESP' command.  However, this command is somewhat
+ * misnamed, as its functionality includes the ability to allocate as
+ * well as free space.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		cmd	- action to take (only F_FREESP supported).
+ *		bfp	- section of file to free/alloc.
+ *		flag	- current file open mode flags.
+ *		offset	- current file offset.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	zp - ctime|mtime updated
+ */
+/* ARGSUSED */
+int
+zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
+    offset_t offset, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	uint64_t	off, len;
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (cmd != F_FREESP) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Callers might not be able to detect properly that we are read-only,
+	 * so check it explicitly here.
+	 */
+	if (zfs_is_readonly(zfsvfs)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EROFS));
+	}
+
+	if (bfp->l_len < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Permissions aren't checked on Solaris because on this OS
+	 * zfs_space() can only be called with an opened file handle.
+	 * On Linux we can get here through truncate_range() which
+	 * operates directly on inodes, so we need to check access rights.
+	 */
+	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	off = bfp->l_start;
+	len = bfp->l_len; /* 0 means from off to end of file */
+
+	error = zfs_freesp(zp, off, len, flag, TRUE);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*ARGSUSED*/
+int
+zfs_fid(struct inode *ip, fid_t *fidp)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfsvfs_t	*zfsvfs = ITOZSB(ip);
+	uint32_t	gen;
+	uint64_t	gen64;
+	uint64_t	object = zp->z_id;
+	zfid_short_t	*zfid;
+	int		size, i, error;
+
+	ZFS_ENTER(zfsvfs);
+
+	if (fidp->fid_len < SHORT_FID_LEN) {
+		fidp->fid_len = SHORT_FID_LEN;
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOSPC));
+	}
+
+	ZFS_VERIFY_ZP(zp);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
+	    &gen64, sizeof (uint64_t))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	gen = (uint32_t)gen64;
+
+	size = SHORT_FID_LEN;
+
+	zfid = (zfid_short_t *)fidp;
+
+	zfid->zf_len = size;
+
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
+
+	/* Must have a non-zero generation number to distinguish from .zfs */
+	if (gen == 0)
+		gen = 1;
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(zfs_open);
+EXPORT_SYMBOL(zfs_close);
+EXPORT_SYMBOL(zfs_lookup);
+EXPORT_SYMBOL(zfs_create);
+EXPORT_SYMBOL(zfs_tmpfile);
+EXPORT_SYMBOL(zfs_remove);
+EXPORT_SYMBOL(zfs_mkdir);
+EXPORT_SYMBOL(zfs_rmdir);
+EXPORT_SYMBOL(zfs_readdir);
+EXPORT_SYMBOL(zfs_getattr_fast);
+EXPORT_SYMBOL(zfs_setattr);
+EXPORT_SYMBOL(zfs_rename);
+EXPORT_SYMBOL(zfs_symlink);
+EXPORT_SYMBOL(zfs_readlink);
+EXPORT_SYMBOL(zfs_link);
+EXPORT_SYMBOL(zfs_inactive);
+EXPORT_SYMBOL(zfs_space);
+EXPORT_SYMBOL(zfs_fid);
+EXPORT_SYMBOL(zfs_getpage);
+EXPORT_SYMBOL(zfs_putpage);
+EXPORT_SYMBOL(zfs_dirty_inode);
+EXPORT_SYMBOL(zfs_map);
+
+/* BEGIN CSTYLED */
+module_param(zfs_delete_blocks, ulong, 0644);
+MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
+/* END CSTYLED */
+
+#endif

diff --git a/zfs/module/os/linux/zfs/zfs_znode.c b/zfs/module/os/linux/zfs/zfs_znode.c
new file mode 100644
index 0000000..0236b32
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zfs_znode.c

@@ -0,0 +1,2262 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ */
+
+/* Portions Copyright 2007 Jeremy Teo */
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/mntent.h>
+#include <sys/u8_textprep.h>
+#include <sys/dsl_dataset.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/atomic.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_rlock.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zpl.h>
+#endif /* _KERNEL */
+
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
+#include <sys/zfs_refcount.h>
+#include <sys/stat.h>
+#include <sys/zap.h>
+#include <sys/zfs_znode.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_stat.h>
+
+#include "zfs_prop.h"
+#include "zfs_comutil.h"
+
+/*
+ * Functions needed for userland (ie: libzpool) are not put under
+ * #ifdef_KERNEL; the rest of the functions have dependencies
+ * (such as VFS logic) that will not compile easily in userland.
+ */
+#ifdef _KERNEL
+
+static kmem_cache_t *znode_cache = NULL;
+static kmem_cache_t *znode_hold_cache = NULL;
+unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
+
+/*
+ * This is used by the test suite so that it can delay znodes from being
+ * freed in order to inspect the unlinked set.
+ */
+int zfs_unlink_suspend_progress = 0;
+
+/*
+ * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
+ * z_rangelock. It will modify the offset and length of the lock to reflect
+ * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
+ * called with the rangelock_t's rl_lock held, which avoids races.
+ */
+static void
+zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
+{
+	znode_t *zp = arg;
+
+	/*
+	 * If in append mode, convert to writer and lock starting at the
+	 * current end of file.
+	 */
+	if (new->lr_type == RL_APPEND) {
+		new->lr_offset = zp->z_size;
+		new->lr_type = RL_WRITER;
+	}
+
+	/*
+	 * If we need to grow the block size then lock the whole file range.
+	 */
+	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
+	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
+	    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
+		new->lr_offset = 0;
+		new->lr_length = UINT64_MAX;
+	}
+}
+
+/*ARGSUSED*/
+static int
+zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
+{
+	znode_t *zp = buf;
+
+	inode_init_once(ZTOI(zp));
+	list_link_init(&zp->z_link_node);
+
+	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
+	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
+
+	zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
+
+	zp->z_dirlocks = NULL;
+	zp->z_acl_cached = NULL;
+	zp->z_xattr_cached = NULL;
+	zp->z_xattr_parent = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static void
+zfs_znode_cache_destructor(void *buf, void *arg)
+{
+	znode_t *zp = buf;
+
+	ASSERT(!list_link_active(&zp->z_link_node));
+	mutex_destroy(&zp->z_lock);
+	rw_destroy(&zp->z_parent_lock);
+	rw_destroy(&zp->z_name_lock);
+	mutex_destroy(&zp->z_acl_lock);
+	rw_destroy(&zp->z_xattr_lock);
+	zfs_rangelock_fini(&zp->z_rangelock);
+
+	ASSERT3P(zp->z_dirlocks, ==, NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+
+	ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
+	ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
+}
+
+static int
+zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
+{
+	znode_hold_t *zh = buf;
+
+	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
+	zh->zh_refcount = 0;
+
+	return (0);
+}
+
+static void
+zfs_znode_hold_cache_destructor(void *buf, void *arg)
+{
+	znode_hold_t *zh = buf;
+
+	mutex_destroy(&zh->zh_lock);
+}
+
+void
+zfs_znode_init(void)
+{
+	/*
+	 * Initialize zcache.  The KMC_SLAB hint is used in order that it be
+	 * backed by kmalloc() when on the Linux slab in order that any
+	 * wait_on_bit() operations on the related inode operate properly.
+	 */
+	ASSERT(znode_cache == NULL);
+	znode_cache = kmem_cache_create("zfs_znode_cache",
+	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
+	    zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
+
+	ASSERT(znode_hold_cache == NULL);
+	znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
+	    sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
+	    zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+zfs_znode_fini(void)
+{
+	/*
+	 * Cleanup zcache
+	 */
+	if (znode_cache)
+		kmem_cache_destroy(znode_cache);
+	znode_cache = NULL;
+
+	if (znode_hold_cache)
+		kmem_cache_destroy(znode_hold_cache);
+	znode_hold_cache = NULL;
+}
+
+/*
+ * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
+ * serialize access to a znode and its SA buffer while the object is being
+ * created or destroyed.  This kind of locking would normally reside in the
+ * znode itself but in this case that's impossible because the znode and SA
+ * buffer may not yet exist.  Therefore the locking is handled externally
+ * with an array of mutexes and AVLs trees which contain per-object locks.
+ *
+ * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
+ * in to the correct AVL tree and finally the per-object lock is held.  In
+ * zfs_znode_hold_exit() the process is reversed.  The per-object lock is
+ * released, removed from the AVL tree and destroyed if there are no waiters.
+ *
+ * This scheme has two important properties:
+ *
+ * 1) No memory allocations are performed while holding one of the z_hold_locks.
+ *    This ensures evict(), which can be called from direct memory reclaim, will
+ *    never block waiting on a z_hold_locks which just happens to have hashed
+ *    to the same index.
+ *
+ * 2) All locks used to serialize access to an object are per-object and never
+ *    shared.  This minimizes lock contention without creating a large number
+ *    of dedicated locks.
+ *
+ * On the downside it does require znode_lock_t structures to be frequently
+ * allocated and freed.  However, because these are backed by a kmem cache
+ * and very short lived this cost is minimal.
+ */
+int
+zfs_znode_hold_compare(const void *a, const void *b)
+{
+	const znode_hold_t *zh_a = (const znode_hold_t *)a;
+	const znode_hold_t *zh_b = (const znode_hold_t *)b;
+
+	return (TREE_CMP(zh_a->zh_obj, zh_b->zh_obj));
+}
+
+static boolean_t __maybe_unused
+zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
+{
+	znode_hold_t *zh, search;
+	int i = ZFS_OBJ_HASH(zfsvfs, obj);
+	boolean_t held;
+
+	search.zh_obj = obj;
+
+	mutex_enter(&zfsvfs->z_hold_locks[i]);
+	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
+	held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
+	mutex_exit(&zfsvfs->z_hold_locks[i]);
+
+	return (held);
+}
+
+static znode_hold_t *
+zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
+{
+	znode_hold_t *zh, *zh_new, search;
+	int i = ZFS_OBJ_HASH(zfsvfs, obj);
+	boolean_t found = B_FALSE;
+
+	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
+	search.zh_obj = obj;
+
+	mutex_enter(&zfsvfs->z_hold_locks[i]);
+	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
+	if (likely(zh == NULL)) {
+		zh = zh_new;
+		zh->zh_obj = obj;
+		avl_add(&zfsvfs->z_hold_trees[i], zh);
+	} else {
+		ASSERT3U(zh->zh_obj, ==, obj);
+		found = B_TRUE;
+	}
+	zh->zh_refcount++;
+	ASSERT3S(zh->zh_refcount, >, 0);
+	mutex_exit(&zfsvfs->z_hold_locks[i]);
+
+	if (found == B_TRUE)
+		kmem_cache_free(znode_hold_cache, zh_new);
+
+	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
+	mutex_enter(&zh->zh_lock);
+
+	return (zh);
+}
+
+static void
+zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
+{
+	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
+	boolean_t remove = B_FALSE;
+
+	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
+	mutex_exit(&zh->zh_lock);
+
+	mutex_enter(&zfsvfs->z_hold_locks[i]);
+	ASSERT3S(zh->zh_refcount, >, 0);
+	if (--zh->zh_refcount == 0) {
+		avl_remove(&zfsvfs->z_hold_trees[i], zh);
+		remove = B_TRUE;
+	}
+	mutex_exit(&zfsvfs->z_hold_locks[i]);
+
+	if (remove == B_TRUE)
+		kmem_cache_free(znode_hold_cache, zh);
+}
+
+dev_t
+zfs_cmpldev(uint64_t dev)
+{
+	return (dev);
+}
+
+static void
+zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
+    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
+{
+	ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
+
+	mutex_enter(&zp->z_lock);
+
+	ASSERT(zp->z_sa_hdl == NULL);
+	ASSERT(zp->z_acl_cached == NULL);
+	if (sa_hdl == NULL) {
+		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
+		    SA_HDL_SHARED, &zp->z_sa_hdl));
+	} else {
+		zp->z_sa_hdl = sa_hdl;
+		sa_set_userp(sa_hdl, zp);
+	}
+
+	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
+
+	mutex_exit(&zp->z_lock);
+}
+
+void
+zfs_znode_dmu_fini(znode_t *zp)
+{
+	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
+	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
+
+	sa_handle_destroy(zp->z_sa_hdl);
+	zp->z_sa_hdl = NULL;
+}
+
+/*
+ * Called by new_inode() to allocate a new inode.
+ */
+int
+zfs_inode_alloc(struct super_block *sb, struct inode **ip)
+{
+	znode_t *zp;
+
+	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+	*ip = ZTOI(zp);
+
+	return (0);
+}
+
+/*
+ * Called in multiple places when an inode should be destroyed.
+ */
+void
+zfs_inode_destroy(struct inode *ip)
+{
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	if (list_link_active(&zp->z_link_node)) {
+		list_remove(&zfsvfs->z_all_znodes, zp);
+		zfsvfs->z_nr_znodes--;
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+
+	kmem_cache_free(znode_cache, zp);
+}
+
+static void
+zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
+{
+	uint64_t rdev = 0;
+
+	switch (ip->i_mode & S_IFMT) {
+	case S_IFREG:
+		ip->i_op = &zpl_inode_operations;
+		ip->i_fop = &zpl_file_operations;
+		ip->i_mapping->a_ops = &zpl_address_space_operations;
+		break;
+
+	case S_IFDIR:
+		ip->i_op = &zpl_dir_inode_operations;
+		ip->i_fop = &zpl_dir_file_operations;
+		ITOZ(ip)->z_zn_prefetch = B_TRUE;
+		break;
+
+	case S_IFLNK:
+		ip->i_op = &zpl_symlink_inode_operations;
+		break;
+
+	/*
+	 * rdev is only stored in a SA only for device files.
+	 */
+	case S_IFCHR:
+	case S_IFBLK:
+		(void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
+		    sizeof (rdev));
+		fallthrough;
+	case S_IFIFO:
+	case S_IFSOCK:
+		init_special_inode(ip, ip->i_mode, rdev);
+		ip->i_op = &zpl_special_inode_operations;
+		break;
+
+	default:
+		zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
+		    (u_longlong_t)ip->i_ino, ip->i_mode);
+
+		/* Assume the inode is a file and attempt to continue */
+		ip->i_mode = S_IFREG | 0644;
+		ip->i_op = &zpl_inode_operations;
+		ip->i_fop = &zpl_file_operations;
+		ip->i_mapping->a_ops = &zpl_address_space_operations;
+		break;
+	}
+}
+
+static void
+zfs_set_inode_flags(znode_t *zp, struct inode *ip)
+{
+	/*
+	 * Linux and Solaris have different sets of file attributes, so we
+	 * restrict this conversion to the intersection of the two.
+	 */
+#ifdef HAVE_INODE_SET_FLAGS
+	unsigned int flags = 0;
+	if (zp->z_pflags & ZFS_IMMUTABLE)
+		flags |= S_IMMUTABLE;
+	if (zp->z_pflags & ZFS_APPENDONLY)
+		flags |= S_APPEND;
+
+	inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
+#else
+	if (zp->z_pflags & ZFS_IMMUTABLE)
+		ip->i_flags |= S_IMMUTABLE;
+	else
+		ip->i_flags &= ~S_IMMUTABLE;
+
+	if (zp->z_pflags & ZFS_APPENDONLY)
+		ip->i_flags |= S_APPEND;
+	else
+		ip->i_flags &= ~S_APPEND;
+#endif
+}
+
+/*
+ * Update the embedded inode given the znode.
+ */
+void
+zfs_znode_update_vfs(znode_t *zp)
+{
+	zfsvfs_t	*zfsvfs;
+	struct inode	*ip;
+	uint32_t	blksize;
+	u_longlong_t	i_blocks;
+
+	ASSERT(zp != NULL);
+	zfsvfs = ZTOZSB(zp);
+	ip = ZTOI(zp);
+
+	/* Skip .zfs control nodes which do not exist on disk. */
+	if (zfsctl_is_node(ip))
+		return;
+
+	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
+
+	spin_lock(&ip->i_lock);
+	ip->i_mode = zp->z_mode;
+	ip->i_blocks = i_blocks;
+	i_size_write(ip, zp->z_size);
+	spin_unlock(&ip->i_lock);
+}
+
+
+/*
+ * Construct a znode+inode and initialize.
+ *
+ * This does not do a call to dmu_set_user() that is
+ * up to the caller to do, in case you don't want to
+ * return the znode
+ */
+static znode_t *
+zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+    dmu_object_type_t obj_type, sa_handle_t *hdl)
+{
+	znode_t	*zp;
+	struct inode *ip;
+	uint64_t mode;
+	uint64_t parent;
+	uint64_t tmp_gen;
+	uint64_t links;
+	uint64_t z_uid, z_gid;
+	uint64_t atime[2], mtime[2], ctime[2], btime[2];
+	uint64_t projid = ZFS_DEFAULT_PROJID;
+	sa_bulk_attr_t bulk[12];
+	int count = 0;
+
+	ASSERT(zfsvfs != NULL);
+
+	ip = new_inode(zfsvfs->z_sb);
+	if (ip == NULL)
+		return (NULL);
+
+	zp = ITOZ(ip);
+	ASSERT(zp->z_dirlocks == NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+	zp->z_unlinked = B_FALSE;
+	zp->z_atime_dirty = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+	zp->z_is_mapped = B_FALSE;
+#endif
+	zp->z_is_ctldir = B_FALSE;
+	zp->z_suspended = B_FALSE;
+	zp->z_sa_hdl = NULL;
+	zp->z_mapcnt = 0;
+	zp->z_id = db->db_object;
+	zp->z_blksz = blksz;
+	zp->z_seq = 0x7A4653;
+	zp->z_sync_cnt = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+
+	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
+	    &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
+	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
+	    (zp->z_pflags & ZFS_PROJID) &&
+	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
+		if (hdl == NULL)
+			sa_handle_destroy(zp->z_sa_hdl);
+		zp->z_sa_hdl = NULL;
+		goto error;
+	}
+
+	zp->z_projid = projid;
+	zp->z_mode = ip->i_mode = mode;
+	ip->i_generation = (uint32_t)tmp_gen;
+	ip->i_blkbits = SPA_MINBLOCKSHIFT;
+	set_nlink(ip, (uint32_t)links);
+	zfs_uid_write(ip, z_uid);
+	zfs_gid_write(ip, z_gid);
+	zfs_set_inode_flags(zp, ip);
+
+	/* Cache the xattr parent id */
+	if (zp->z_pflags & ZFS_XATTR)
+		zp->z_xattr_parent = parent;
+
+	ZFS_TIME_DECODE(&ip->i_atime, atime);
+	ZFS_TIME_DECODE(&ip->i_mtime, mtime);
+	ZFS_TIME_DECODE(&ip->i_ctime, ctime);
+	ZFS_TIME_DECODE(&zp->z_btime, btime);
+
+	ip->i_ino = zp->z_id;
+	zfs_znode_update_vfs(zp);
+	zfs_inode_set_ops(zfsvfs, ip);
+
+	/*
+	 * The only way insert_inode_locked() can fail is if the ip->i_ino
+	 * number is already hashed for this super block.  This can never
+	 * happen because the inode numbers map 1:1 with the object numbers.
+	 *
+	 * Exceptions include rolling back a mounted file system, either
+	 * from the zfs rollback or zfs recv command.
+	 *
+	 * Active inodes are unhashed during the rollback, but since zrele
+	 * can happen asynchronously, we can't guarantee they've been
+	 * unhashed.  This can cause hash collisions in unlinked drain
+	 * processing so do not hash unlinked znodes.
+	 */
+	if (links > 0)
+		VERIFY3S(insert_inode_locked(ip), ==, 0);
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	list_insert_tail(&zfsvfs->z_all_znodes, zp);
+	zfsvfs->z_nr_znodes++;
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	if (links > 0)
+		unlock_new_inode(ip);
+	return (zp);
+
+error:
+	iput(ip);
+	return (NULL);
+}
+
+/*
+ * Safely mark an inode dirty.  Inodes which are part of a read-only
+ * file system or snapshot may not be dirtied.
+ */
+void
+zfs_mark_inode_dirty(struct inode *ip)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
+		return;
+
+	mark_inode_dirty(ip);
+}
+
+static uint64_t empty_xattr;
+static uint64_t pad[4];
+static zfs_acl_phys_t acl_phys;
+/*
+ * Create a new DMU object to hold a zfs znode.
+ *
+ *	IN:	dzp	- parent directory for new znode
+ *		vap	- file attributes for new znode
+ *		tx	- dmu transaction id for zap operations
+ *		cr	- credentials of caller
+ *		flag	- flags:
+ *			  IS_ROOT_NODE	- new object will be root
+ *			  IS_TMPFILE	- new object is of O_TMPFILE
+ *			  IS_XATTR	- new object is an attribute
+ *		acl_ids	- ACL related attributes
+ *
+ *	OUT:	zpp	- allocated znode (set to dzp if IS_ROOT_NODE)
+ *
+ */
+void
+zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
+    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
+{
+	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
+	uint64_t	mode, size, links, parent, pflags;
+	uint64_t	projid = ZFS_DEFAULT_PROJID;
+	uint64_t	rdev = 0;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	dmu_buf_t	*db;
+	inode_timespec_t now;
+	uint64_t	gen, obj;
+	int		bonuslen;
+	int		dnodesize;
+	sa_handle_t	*sa_hdl;
+	dmu_object_type_t obj_type;
+	sa_bulk_attr_t	*sa_attrs;
+	int		cnt = 0;
+	zfs_acl_locator_cb_t locate = { 0 };
+	znode_hold_t	*zh;
+
+	if (zfsvfs->z_replay) {
+		obj = vap->va_nodeid;
+		now = vap->va_ctime;		/* see zfs_replay_create() */
+		gen = vap->va_nblocks;		/* ditto */
+		dnodesize = vap->va_fsid;	/* ditto */
+	} else {
+		obj = 0;
+		gethrestime(&now);
+		gen = dmu_tx_get_txg(tx);
+		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
+	}
+
+	if (dnodesize == 0)
+		dnodesize = DNODE_MIN_SIZE;
+
+	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+
+	bonuslen = (obj_type == DMU_OT_SA) ?
+	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
+
+	/*
+	 * Create a new DMU object.
+	 */
+	/*
+	 * There's currently no mechanism for pre-reading the blocks that will
+	 * be needed to allocate a new object, so we accept the small chance
+	 * that there will be an i/o error and we will fail one of the
+	 * assertions below.
+	 */
+	if (S_ISDIR(vap->va_mode)) {
+		if (zfsvfs->z_replay) {
+			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
+			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+			    obj_type, bonuslen, dnodesize, tx));
+		} else {
+			obj = zap_create_norm_dnsize(zfsvfs->z_os,
+			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+			    obj_type, bonuslen, dnodesize, tx);
+		}
+	} else {
+		if (zfsvfs->z_replay) {
+			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
+			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
+			    obj_type, bonuslen, dnodesize, tx));
+		} else {
+			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
+			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
+			    obj_type, bonuslen, dnodesize, tx);
+		}
+	}
+
+	zh = zfs_znode_hold_enter(zfsvfs, obj);
+	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
+
+	/*
+	 * If this is the root, fix up the half-initialized parent pointer
+	 * to reference the just-allocated physical data area.
+	 */
+	if (flag & IS_ROOT_NODE) {
+		dzp->z_id = obj;
+	}
+
+	/*
+	 * If parent is an xattr, so am I.
+	 */
+	if (dzp->z_pflags & ZFS_XATTR) {
+		flag |= IS_XATTR;
+	}
+
+	if (zfsvfs->z_use_fuids)
+		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+	else
+		pflags = 0;
+
+	if (S_ISDIR(vap->va_mode)) {
+		size = 2;		/* contents ("." and "..") */
+		links = 2;
+	} else {
+		size = 0;
+		links = (flag & IS_TMPFILE) ? 0 : 1;
+	}
+
+	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
+		rdev = vap->va_rdev;
+
+	parent = dzp->z_id;
+	mode = acl_ids->z_mode;
+	if (flag & IS_XATTR)
+		pflags |= ZFS_XATTR;
+
+	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
+		/*
+		 * With ZFS_PROJID flag, we can easily know whether there is
+		 * project ID stored on disk or not. See zfs_space_delta_cb().
+		 */
+		if (obj_type != DMU_OT_ZNODE &&
+		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
+			pflags |= ZFS_PROJID;
+
+		/*
+		 * Inherit project ID from parent if required.
+		 */
+		projid = zfs_inherit_projid(dzp);
+		if (dzp->z_pflags & ZFS_PROJINHERIT)
+			pflags |= ZFS_PROJINHERIT;
+	}
+
+	/*
+	 * No execs denied will be determined when zfs_mode_compute() is called.
+	 */
+	pflags |= acl_ids->z_aclp->z_hints &
+	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
+	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
+
+	ZFS_TIME_ENCODE(&now, crtime);
+	ZFS_TIME_ENCODE(&now, ctime);
+
+	if (vap->va_mask & ATTR_ATIME) {
+		ZFS_TIME_ENCODE(&vap->va_atime, atime);
+	} else {
+		ZFS_TIME_ENCODE(&now, atime);
+	}
+
+	if (vap->va_mask & ATTR_MTIME) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+	} else {
+		ZFS_TIME_ENCODE(&now, mtime);
+	}
+
+	/* Now add in all of the "SA" attributes */
+	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
+	    &sa_hdl));
+
+	/*
+	 * Setup the array of attributes to be replaced/set on the new file
+	 *
+	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
+	 * in the old znode_phys_t format.  Don't change this ordering
+	 */
+	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+	} else {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
+		    NULL, &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
+		    NULL, &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+	}
+
+	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
+		    &empty_xattr, 8);
+	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
+	    pflags & ZFS_PROJID) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
+		    NULL, &projid, 8);
+	}
+	if (obj_type == DMU_OT_ZNODE ||
+	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
+		    NULL, &rdev, 8);
+	}
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
+		    &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
+		    &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
+		    sizeof (uint64_t) * 4);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (zfs_acl_phys_t));
+	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
+		    &acl_ids->z_aclp->z_acl_count, 8);
+		locate.cb_aclp = acl_ids->z_aclp;
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate,
+		    acl_ids->z_aclp->z_acl_bytes);
+		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
+		    acl_ids->z_fuid, acl_ids->z_fgid);
+	}
+
+	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
+
+	if (!(flag & IS_ROOT_NODE)) {
+		/*
+		 * The call to zfs_znode_alloc() may fail if memory is low
+		 * via the call path: alloc_inode() -> inode_init_always() ->
+		 * security_inode_alloc() -> inode_alloc_security().  Since
+		 * the existing code is written such that zfs_mknode() can
+		 * not fail retry until sufficient memory has been reclaimed.
+		 */
+		do {
+			*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
+		} while (*zpp == NULL);
+
+		VERIFY(*zpp != NULL);
+		VERIFY(dzp != NULL);
+	} else {
+		/*
+		 * If we are creating the root node, the "parent" we
+		 * passed in is the znode for the root.
+		 */
+		*zpp = dzp;
+
+		(*zpp)->z_sa_hdl = sa_hdl;
+	}
+
+	(*zpp)->z_pflags = pflags;
+	(*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
+	(*zpp)->z_dnodesize = dnodesize;
+	(*zpp)->z_projid = projid;
+
+	if (obj_type == DMU_OT_ZNODE ||
+	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
+		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
+	}
+	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
+	zfs_znode_hold_exit(zfsvfs, zh);
+}
+
+/*
+ * Update in-core attributes.  It is assumed the caller will be doing an
+ * sa_bulk_update to push the changes out.
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+	xoptattr_t *xoap;
+	boolean_t update_inode = B_FALSE;
+
+	xoap = xva_getxoptattr(xvap);
+	ASSERT(xoap);
+
+	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+		uint64_t times[2];
+		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+		    &times, sizeof (times), tx);
+		XVA_SET_RTN(xvap, XAT_CREATETIME);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_READONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_HIDDEN);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SYSTEM);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_ARCHIVE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+
+		update_inode = B_TRUE;
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NOUNLINK);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_APPENDONLY);
+
+		update_inode = B_TRUE;
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NODUMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OPAQUE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+		zfs_sa_set_scanstamp(zp, xvap, tx);
+		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_REPARSE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OFFLINE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SPARSE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
+		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
+	}
+
+	if (update_inode)
+		zfs_set_inode_flags(zp, ZTOI(zp));
+}
+
+int
+zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
+{
+	dmu_object_info_t doi;
+	dmu_buf_t	*db;
+	znode_t		*zp;
+	znode_hold_t	*zh;
+	int err;
+	sa_handle_t	*hdl;
+
+	*zpp = NULL;
+
+again:
+	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
+
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	if (err) {
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (err);
+	}
+
+	dmu_object_info_from_db(db, &doi);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EINVAL));
+	}
+
+	hdl = dmu_buf_get_user(db);
+	if (hdl != NULL) {
+		zp = sa_get_userdata(hdl);
+
+
+		/*
+		 * Since "SA" does immediate eviction we
+		 * should never find a sa handle that doesn't
+		 * know about the znode.
+		 */
+
+		ASSERT3P(zp, !=, NULL);
+
+		mutex_enter(&zp->z_lock);
+		ASSERT3U(zp->z_id, ==, obj_num);
+		/*
+		 * If zp->z_unlinked is set, the znode is already marked
+		 * for deletion and should not be discovered. Check this
+		 * after checking igrab() due to fsetxattr() & O_TMPFILE.
+		 *
+		 * If igrab() returns NULL the VFS has independently
+		 * determined the inode should be evicted and has
+		 * called iput_final() to start the eviction process.
+		 * The SA handle is still valid but because the VFS
+		 * requires that the eviction succeed we must drop
+		 * our locks and references to allow the eviction to
+		 * complete.  The zfs_zget() may then be retried.
+		 *
+		 * This unlikely case could be optimized by registering
+		 * a sops->drop_inode() callback.  The callback would
+		 * need to detect the active SA hold thereby informing
+		 * the VFS that this inode should not be evicted.
+		 */
+		if (igrab(ZTOI(zp)) == NULL) {
+			if (zp->z_unlinked)
+				err = SET_ERROR(ENOENT);
+			else
+				err = SET_ERROR(EAGAIN);
+		} else {
+			*zpp = zp;
+			err = 0;
+		}
+
+		mutex_exit(&zp->z_lock);
+		sa_buf_rele(db, NULL);
+		zfs_znode_hold_exit(zfsvfs, zh);
+
+		if (err == EAGAIN) {
+			/* inode might need this to finish evict */
+			cond_resched();
+			goto again;
+		}
+		return (err);
+	}
+
+	/*
+	 * Not found create new znode/vnode but only if file exists.
+	 *
+	 * There is a small window where zfs_vget() could
+	 * find this object while a file create is still in
+	 * progress.  This is checked for in zfs_znode_alloc()
+	 *
+	 * if zfs_znode_alloc() fails it will drop the hold on the
+	 * bonus buffer.
+	 */
+	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
+	    doi.doi_bonus_type, NULL);
+	if (zp == NULL) {
+		err = SET_ERROR(ENOENT);
+	} else {
+		*zpp = zp;
+	}
+	zfs_znode_hold_exit(zfsvfs, zh);
+	return (err);
+}
+
+int
+zfs_rezget(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	dmu_object_info_t doi;
+	dmu_buf_t *db;
+	uint64_t obj_num = zp->z_id;
+	uint64_t mode;
+	uint64_t links;
+	sa_bulk_attr_t bulk[11];
+	int err;
+	int count = 0;
+	uint64_t gen;
+	uint64_t z_uid, z_gid;
+	uint64_t atime[2], mtime[2], ctime[2], btime[2];
+	uint64_t projid = ZFS_DEFAULT_PROJID;
+	znode_hold_t *zh;
+
+	/*
+	 * skip ctldir, otherwise they will always get invalidated. This will
+	 * cause funny behaviour for the mounted snapdirs. Especially for
+	 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
+	 * anyone automount it again as long as someone is still using the
+	 * detached mount.
+	 */
+	if (zp->z_is_ctldir)
+		return (0);
+
+	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
+
+	mutex_enter(&zp->z_acl_lock);
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+	mutex_exit(&zp->z_acl_lock);
+
+	rw_enter(&zp->z_xattr_lock, RW_WRITER);
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+	rw_exit(&zp->z_xattr_lock);
+
+	ASSERT(zp->z_sa_hdl == NULL);
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	if (err) {
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (err);
+	}
+
+	dmu_object_info_from_db(db, &doi);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EINVAL));
+	}
+
+	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
+
+	/* reload cached values */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
+	    &gen, sizeof (gen));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, sizeof (zp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &links, sizeof (links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &z_uid, sizeof (z_uid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &z_gid, sizeof (z_gid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &atime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
+		zfs_znode_dmu_fini(zp);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EIO));
+	}
+
+	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
+		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
+		    &projid, 8);
+		if (err != 0 && err != ENOENT) {
+			zfs_znode_dmu_fini(zp);
+			zfs_znode_hold_exit(zfsvfs, zh);
+			return (SET_ERROR(err));
+		}
+	}
+
+	zp->z_projid = projid;
+	zp->z_mode = ZTOI(zp)->i_mode = mode;
+	zfs_uid_write(ZTOI(zp), z_uid);
+	zfs_gid_write(ZTOI(zp), z_gid);
+
+	ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
+	ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
+	ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
+	ZFS_TIME_DECODE(&zp->z_btime, btime);
+
+	if ((uint32_t)gen != ZTOI(zp)->i_generation) {
+		zfs_znode_dmu_fini(zp);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EIO));
+	}
+
+	set_nlink(ZTOI(zp), (uint32_t)links);
+	zfs_set_inode_flags(zp, ZTOI(zp));
+
+	zp->z_blksz = doi.doi_data_block_size;
+	zp->z_atime_dirty = B_FALSE;
+	zfs_znode_update_vfs(zp);
+
+	/*
+	 * If the file has zero links, then it has been unlinked on the send
+	 * side and it must be in the received unlinked set.
+	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
+	 * stale data and to prevent automatic removal of the file in
+	 * zfs_zinactive().  The file will be removed either when it is removed
+	 * on the send side and the next incremental stream is received or
+	 * when the unlinked set gets processed.
+	 */
+	zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
+	if (zp->z_unlinked)
+		zfs_znode_dmu_fini(zp);
+
+	zfs_znode_hold_exit(zfsvfs, zh);
+
+	return (0);
+}
+
+void
+zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	objset_t *os = zfsvfs->z_os;
+	uint64_t obj = zp->z_id;
+	uint64_t acl_obj = zfs_external_acl(zp);
+	znode_hold_t *zh;
+
+	zh = zfs_znode_hold_enter(zfsvfs, obj);
+	if (acl_obj) {
+		VERIFY(!zp->z_is_sa);
+		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
+	}
+	VERIFY(0 == dmu_object_free(os, obj, tx));
+	zfs_znode_dmu_fini(zp);
+	zfs_znode_hold_exit(zfsvfs, zh);
+}
+
+void
+zfs_zinactive(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	uint64_t z_id = zp->z_id;
+	znode_hold_t *zh;
+
+	ASSERT(zp->z_sa_hdl);
+
+	/*
+	 * Don't allow a zfs_zget() while were trying to release this znode.
+	 */
+	zh = zfs_znode_hold_enter(zfsvfs, z_id);
+
+	mutex_enter(&zp->z_lock);
+
+	/*
+	 * If this was the last reference to a file with no links, remove
+	 * the file from the file system unless the file system is mounted
+	 * read-only.  That can happen, for example, if the file system was
+	 * originally read-write, the file was opened, then unlinked and
+	 * the file system was made read-only before the file was finally
+	 * closed.  The file will remain in the unlinked set.
+	 */
+	if (zp->z_unlinked) {
+		ASSERT(!zfsvfs->z_issnap);
+		if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
+			mutex_exit(&zp->z_lock);
+			zfs_znode_hold_exit(zfsvfs, zh);
+			zfs_rmnode(zp);
+			return;
+		}
+	}
+
+	mutex_exit(&zp->z_lock);
+	zfs_znode_dmu_fini(zp);
+
+	zfs_znode_hold_exit(zfsvfs, zh);
+}
+
+#if defined(HAVE_INODE_TIMESPEC64_TIMES)
+#define	zfs_compare_timespec timespec64_compare
+#else
+#define	zfs_compare_timespec timespec_compare
+#endif
+
+/*
+ * Determine whether the znode's atime must be updated.  The logic mostly
+ * duplicates the Linux kernel's relatime_need_update() functionality.
+ * This function is only called if the underlying filesystem actually has
+ * atime updates enabled.
+ */
+boolean_t
+zfs_relatime_need_update(const struct inode *ip)
+{
+	inode_timespec_t now;
+
+	gethrestime(&now);
+	/*
+	 * In relatime mode, only update the atime if the previous atime
+	 * is earlier than either the ctime or mtime or if at least a day
+	 * has passed since the last update of atime.
+	 */
+	if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
+		return (B_TRUE);
+
+	if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
+		return (B_TRUE);
+
+	if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * Prepare to update znode time stamps.
+ *
+ *	IN:	zp	- znode requiring timestamp update
+ *		flag	- ATTR_MTIME, ATTR_CTIME flags
+ *
+ *	OUT:	zp	- z_seq
+ *		mtime	- new mtime
+ *		ctime	- new ctime
+ *
+ *	Note: We don't update atime here, because we rely on Linux VFS to do
+ *	atime updating.
+ */
+void
+zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
+    uint64_t ctime[2])
+{
+	inode_timespec_t now;
+
+	gethrestime(&now);
+
+	zp->z_seq++;
+
+	if (flag & ATTR_MTIME) {
+		ZFS_TIME_ENCODE(&now, mtime);
+		ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
+		if (ZTOZSB(zp)->z_use_fuids) {
+			zp->z_pflags |= (ZFS_ARCHIVE |
+			    ZFS_AV_MODIFIED);
+		}
+	}
+
+	if (flag & ATTR_CTIME) {
+		ZFS_TIME_ENCODE(&now, ctime);
+		ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
+		if (ZTOZSB(zp)->z_use_fuids)
+			zp->z_pflags |= ZFS_ARCHIVE;
+	}
+}
+
+/*
+ * Grow the block size for a file.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		size	- requested block size
+ *		tx	- open transaction.
+ *
+ * NOTE: this function assumes that the znode is write locked.
+ */
+void
+zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
+{
+	int		error;
+	u_longlong_t	dummy;
+
+	if (size <= zp->z_blksz)
+		return;
+	/*
+	 * If the file size is already greater than the current blocksize,
+	 * we will not grow.  If there is more than one block in a file,
+	 * the blocksize cannot change.
+	 */
+	if (zp->z_blksz && zp->z_size > zp->z_blksz)
+		return;
+
+	error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
+	    size, 0, tx);
+
+	if (error == ENOTSUP)
+		return;
+	ASSERT0(error);
+
+	/* What blocksize did we actually get? */
+	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
+}
+
+/*
+ * Increase the file length
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		end	- new end-of-file
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_extend(znode_t *zp, uint64_t end)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	uint64_t newblksz;
+	int error;
+
+	/*
+	 * We will change zp_size, lock the whole file.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (end <= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	if (end > zp->z_blksz &&
+	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
+		/*
+		 * We are growing the file past the current block size.
+		 */
+		if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
+			/*
+			 * File's blocksize is already larger than the
+			 * "recordsize" property.  Only let it grow to
+			 * the next power of 2.
+			 */
+			ASSERT(!ISP2(zp->z_blksz));
+			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
+		} else {
+			newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
+		}
+		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
+	} else {
+		newblksz = 0;
+	}
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	if (newblksz)
+		zfs_grow_blocksize(zp, newblksz, tx);
+
+	zp->z_size = end;
+
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
+	    &zp->z_size, sizeof (zp->z_size), tx));
+
+	zfs_rangelock_exit(lr);
+
+	dmu_tx_commit(tx);
+
+	return (0);
+}
+
+/*
+ * zfs_zero_partial_page - Modeled after update_pages() but
+ * with different arguments and semantics for use by zfs_freesp().
+ *
+ * Zeroes a piece of a single page cache entry for zp at offset
+ * start and length len.
+ *
+ * Caller must acquire a range lock on the file for the region
+ * being zeroed in order that the ARC and page cache stay in sync.
+ */
+static void
+zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
+{
+	struct address_space *mp = ZTOI(zp)->i_mapping;
+	struct page *pp;
+	int64_t	off;
+	void *pb;
+
+	ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
+
+	off = start & (PAGE_SIZE - 1);
+	start &= PAGE_MASK;
+
+	pp = find_lock_page(mp, start >> PAGE_SHIFT);
+	if (pp) {
+		if (mapping_writably_mapped(mp))
+			flush_dcache_page(pp);
+
+		pb = kmap(pp);
+		bzero(pb + off, len);
+		kunmap(pp);
+
+		if (mapping_writably_mapped(mp))
+			flush_dcache_page(pp);
+
+		mark_page_accessed(pp);
+		SetPageUptodate(pp);
+		ClearPageError(pp);
+		unlock_page(pp);
+		put_page(pp);
+	}
+}
+
+/*
+ * Free space in a file.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		off	- start of section to free.
+ *		len	- length of section to free.
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	zfs_locked_range_t *lr;
+	int error;
+
+	/*
+	 * Lock the range being freed.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (off >= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	if (off + len > zp->z_size)
+		len = zp->z_size - off;
+
+	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
+
+	/*
+	 * Zero partial page cache entries.  This must be done under a
+	 * range lock in order to keep the ARC and page cache in sync.
+	 */
+	if (zn_has_cached_data(zp, off, off + len - 1)) {
+		loff_t first_page, last_page, page_len;
+		loff_t first_page_offset, last_page_offset;
+
+		/* first possible full page in hole */
+		first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		/* last page of hole */
+		last_page = (off + len) >> PAGE_SHIFT;
+
+		/* offset of first_page */
+		first_page_offset = first_page << PAGE_SHIFT;
+		/* offset of last_page */
+		last_page_offset = last_page << PAGE_SHIFT;
+
+		/* truncate whole pages */
+		if (last_page_offset > first_page_offset) {
+			truncate_inode_pages_range(ZTOI(zp)->i_mapping,
+			    first_page_offset, last_page_offset - 1);
+		}
+
+		/* truncate sub-page ranges */
+		if (first_page > last_page) {
+			/* entire punched area within a single page */
+			zfs_zero_partial_page(zp, off, len);
+		} else {
+			/* beginning of punched area at the end of a page */
+			page_len  = first_page_offset - off;
+			if (page_len > 0)
+				zfs_zero_partial_page(zp, off, page_len);
+
+			/* end of punched area at the beginning of a page */
+			page_len = off + len - last_page_offset;
+			if (page_len > 0)
+				zfs_zero_partial_page(zp, last_page_offset,
+				    page_len);
+		}
+	}
+	zfs_rangelock_exit(lr);
+
+	return (error);
+}
+
+/*
+ * Truncate a file
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		end	- new end-of-file.
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_trunc(znode_t *zp, uint64_t end)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	int error;
+	sa_bulk_attr_t bulk[2];
+	int count = 0;
+
+	/*
+	 * We will change zp_size, lock the whole file.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (end >= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
+	    DMU_OBJECT_END);
+	if (error) {
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	zp->z_size = end;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &zp->z_size, sizeof (zp->z_size));
+
+	if (end == 0) {
+		zp->z_pflags &= ~ZFS_SPARSE;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &zp->z_pflags, 8);
+	}
+	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
+
+	dmu_tx_commit(tx);
+	zfs_rangelock_exit(lr);
+
+	return (0);
+}
+
+/*
+ * Free space in a file
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		off	- start of range
+ *		len	- end of range (0 => EOF)
+ *		flag	- current file open mode flags.
+ *		log	- TRUE if this action should be logged
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+int
+zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
+{
+	dmu_tx_t *tx;
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	zilog_t *zilog = zfsvfs->z_log;
+	uint64_t mode;
+	uint64_t mtime[2], ctime[2];
+	sa_bulk_attr_t bulk[3];
+	int count = 0;
+	int error;
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
+	    sizeof (mode))) != 0)
+		return (error);
+
+	if (off > zp->z_size) {
+		error =  zfs_extend(zp, off+len);
+		if (error == 0 && log)
+			goto log;
+		goto out;
+	}
+
+	if (len == 0) {
+		error = zfs_trunc(zp, off);
+	} else {
+		if ((error = zfs_free_range(zp, off, len)) == 0 &&
+		    off + len > zp->z_size)
+			error = zfs_extend(zp, off+len);
+	}
+	if (error || !log)
+		goto out;
+log:
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &zp->z_pflags, 8);
+	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+
+	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
+
+	dmu_tx_commit(tx);
+
+	zfs_znode_update_vfs(zp);
+	error = 0;
+
+out:
+	/*
+	 * Truncate the page cache - for file truncate operations, use
+	 * the purpose-built API for truncations.  For punching operations,
+	 * the truncation is handled under a range lock in zfs_free_range.
+	 */
+	if (len == 0)
+		truncate_setsize(ZTOI(zp), off);
+	return (error);
+}
+
+void
+zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
+{
+	struct super_block *sb;
+	zfsvfs_t	*zfsvfs;
+	uint64_t	moid, obj, sa_obj, version;
+	uint64_t	sense = ZFS_CASE_SENSITIVE;
+	uint64_t	norm = 0;
+	nvpair_t	*elem;
+	int		size;
+	int		error;
+	int		i;
+	znode_t		*rootzp = NULL;
+	vattr_t		vattr;
+	znode_t		*zp;
+	zfs_acl_ids_t	acl_ids;
+
+	/*
+	 * First attempt to create master node.
+	 */
+	/*
+	 * In an empty objset, there are no blocks to read and thus
+	 * there can be no i/o errors (which we assert below).
+	 */
+	moid = MASTER_NODE_OBJ;
+	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
+	    DMU_OT_NONE, 0, tx);
+	ASSERT(error == 0);
+
+	/*
+	 * Set starting attributes.
+	 */
+	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
+		/* For the moment we expect all zpl props to be uint64_ts */
+		uint64_t val;
+		char *name;
+
+		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
+		VERIFY(nvpair_value_uint64(elem, &val) == 0);
+		name = nvpair_name(elem);
+		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
+			if (val < version)
+				version = val;
+		} else {
+			error = zap_update(os, moid, name, 8, 1, &val, tx);
+		}
+		ASSERT(error == 0);
+		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
+			norm = val;
+		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
+			sense = val;
+	}
+	ASSERT(version != 0);
+	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
+
+	/*
+	 * Create zap object used for SA attribute registration
+	 */
+
+	if (version >= ZPL_VERSION_SA) {
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT(error == 0);
+	} else {
+		sa_obj = 0;
+	}
+	/*
+	 * Create a delete queue.
+	 */
+	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
+
+	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
+	ASSERT(error == 0);
+
+	/*
+	 * Create root znode.  Create minimal znode/inode/zfsvfs/sb
+	 * to allow zfs_mknode to work.
+	 */
+	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
+	vattr.va_mode = S_IFDIR|0755;
+	vattr.va_uid = crgetuid(cr);
+	vattr.va_gid = crgetgid(cr);
+
+	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+	rootzp->z_unlinked = B_FALSE;
+	rootzp->z_atime_dirty = B_FALSE;
+	rootzp->z_is_sa = USE_SA(version, os);
+	rootzp->z_pflags = 0;
+
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+	zfsvfs->z_os = os;
+	zfsvfs->z_parent = zfsvfs;
+	zfsvfs->z_version = version;
+	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
+	zfsvfs->z_use_sa = USE_SA(version, os);
+	zfsvfs->z_norm = norm;
+
+	sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
+	sb->s_fs_info = zfsvfs;
+
+	ZTOI(rootzp)->i_sb = sb;
+
+	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+	    &zfsvfs->z_attr_table);
+
+	ASSERT(error == 0);
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+
+	size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
+	zfsvfs->z_hold_size = size;
+	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
+	    KM_SLEEP);
+	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+	for (i = 0; i != size; i++) {
+		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
+		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+	}
+
+	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
+	    cr, NULL, &acl_ids));
+	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
+	ASSERT3P(zp, ==, rootzp);
+	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
+	ASSERT(error == 0);
+	zfs_acl_ids_free(&acl_ids);
+
+	atomic_set(&ZTOI(rootzp)->i_count, 0);
+	sa_handle_destroy(rootzp->z_sa_hdl);
+	kmem_cache_free(znode_cache, rootzp);
+
+	for (i = 0; i != size; i++) {
+		avl_destroy(&zfsvfs->z_hold_trees[i]);
+		mutex_destroy(&zfsvfs->z_hold_locks[i]);
+	}
+
+	mutex_destroy(&zfsvfs->z_znodes_lock);
+
+	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
+	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
+	kmem_free(sb, sizeof (struct super_block));
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+}
+#endif /* _KERNEL */
+
+static int
+zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
+{
+	uint64_t sa_obj = 0;
+	int error;
+
+	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
+	if (error != 0 && error != ENOENT)
+		return (error);
+
+	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
+	return (error);
+}
+
+static int
+zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
+    dmu_buf_t **db, void *tag)
+{
+	dmu_object_info_t doi;
+	int error;
+
+	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
+		return (error);
+
+	dmu_object_info_from_db(*db, &doi);
+	if ((doi.doi_bonus_type != DMU_OT_SA &&
+	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
+		sa_buf_rele(*db, tag);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
+	if (error != 0) {
+		sa_buf_rele(*db, tag);
+		return (error);
+	}
+
+	return (0);
+}
+
+static void
+zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
+{
+	sa_handle_destroy(hdl);
+	sa_buf_rele(db, tag);
+}
+
+/*
+ * Given an object number, return its parent object number and whether
+ * or not the object is an extended attribute directory.
+ */
+static int
+zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    uint64_t *pobjp, int *is_xattrdir)
+{
+	uint64_t parent;
+	uint64_t pflags;
+	uint64_t mode;
+	uint64_t parent_mode;
+	sa_bulk_attr_t bulk[3];
+	sa_handle_t *sa_hdl;
+	dmu_buf_t *sa_db;
+	int count = 0;
+	int error;
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
+	    &parent, sizeof (parent));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
+	    &pflags, sizeof (pflags));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &mode, sizeof (mode));
+
+	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
+		return (error);
+
+	/*
+	 * When a link is removed its parent pointer is not changed and will
+	 * be invalid.  There are two cases where a link is removed but the
+	 * file stays around, when it goes to the delete queue and when there
+	 * are additional links.
+	 */
+	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
+	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
+
+	/*
+	 * Extended attributes can be applied to files, directories, etc.
+	 * Otherwise the parent must be a directory.
+	 */
+	if (!*is_xattrdir && !S_ISDIR(parent_mode))
+		return (SET_ERROR(EINVAL));
+
+	*pobjp = parent;
+
+	return (0);
+}
+
+/*
+ * Given an object number, return some zpl level statistics
+ */
+static int
+zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    zfs_stat_t *sb)
+{
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &sb->zs_mode, sizeof (sb->zs_mode));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
+	    &sb->zs_gen, sizeof (sb->zs_gen));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
+	    &sb->zs_links, sizeof (sb->zs_links));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
+	    &sb->zs_ctime, sizeof (sb->zs_ctime));
+
+	return (sa_bulk_lookup(hdl, bulk, count));
+}
+
+static int
+zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
+    sa_attr_type_t *sa_table, char *buf, int len)
+{
+	sa_handle_t *sa_hdl;
+	sa_handle_t *prevhdl = NULL;
+	dmu_buf_t *prevdb = NULL;
+	dmu_buf_t *sa_db = NULL;
+	char *path = buf + len - 1;
+	int error;
+
+	*path = '\0';
+	sa_hdl = hdl;
+
+	uint64_t deleteq_obj;
+	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
+	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
+	error = zap_lookup_int(osp, deleteq_obj, obj);
+	if (error == 0) {
+		return (ESTALE);
+	} else if (error != ENOENT) {
+		return (error);
+	}
+	error = 0;
+
+	for (;;) {
+		uint64_t pobj = 0;
+		char component[MAXNAMELEN + 2];
+		size_t complen;
+		int is_xattrdir = 0;
+
+		if (prevdb) {
+			ASSERT(prevhdl != NULL);
+			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
+		}
+
+		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
+		    &is_xattrdir)) != 0)
+			break;
+
+		if (pobj == obj) {
+			if (path[0] != '/')
+				*--path = '/';
+			break;
+		}
+
+		component[0] = '/';
+		if (is_xattrdir) {
+			(void) sprintf(component + 1, "<xattrdir>");
+		} else {
+			error = zap_value_search(osp, pobj, obj,
+			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
+			if (error != 0)
+				break;
+		}
+
+		complen = strlen(component);
+		path -= complen;
+		ASSERT(path >= buf);
+		bcopy(component, path, complen);
+		obj = pobj;
+
+		if (sa_hdl != hdl) {
+			prevhdl = sa_hdl;
+			prevdb = sa_db;
+		}
+		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
+		if (error != 0) {
+			sa_hdl = prevhdl;
+			sa_db = prevdb;
+			break;
+		}
+	}
+
+	if (sa_hdl != NULL && sa_hdl != hdl) {
+		ASSERT(sa_db != NULL);
+		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	}
+
+	if (error == 0)
+		(void) memmove(buf, path, buf + len - path);
+
+	return (error);
+}
+
+int
+zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
+{
+	sa_attr_type_t *sa_table;
+	sa_handle_t *hdl;
+	dmu_buf_t *db;
+	int error;
+
+	error = zfs_sa_setup(osp, &sa_table);
+	if (error != 0)
+		return (error);
+
+	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
+
+	zfs_release_sa_handle(hdl, db, FTAG);
+	return (error);
+}
+
+int
+zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
+    char *buf, int len)
+{
+	char *path = buf + len - 1;
+	sa_attr_type_t *sa_table;
+	sa_handle_t *hdl;
+	dmu_buf_t *db;
+	int error;
+
+	*path = '\0';
+
+	error = zfs_sa_setup(osp, &sa_table);
+	if (error != 0)
+		return (error);
+
+	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
+	if (error != 0) {
+		zfs_release_sa_handle(hdl, db, FTAG);
+		return (error);
+	}
+
+	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
+
+	zfs_release_sa_handle(hdl, db, FTAG);
+	return (error);
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(zfs_create_fs);
+EXPORT_SYMBOL(zfs_obj_to_path);
+
+/* CSTYLED */
+module_param(zfs_object_mutex_size, uint, 0644);
+MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
+module_param(zfs_unlink_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
+"(debug - leaks space into the unlinked set)");
+#endif

diff --git a/zfs/module/os/linux/zfs/zio_crypt.c b/zfs/module/os/linux/zfs/zio_crypt.c
new file mode 100644
index 0000000..50e9390
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zio_crypt.c

@@ -0,0 +1,2059 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/zio_crypt.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/zil.h>
+#include <sys/sha2.h>
+#include <sys/hkdf.h>
+#include <sys/qat.h>
+
+/*
+ * This file is responsible for handling all of the details of generating
+ * encryption parameters and performing encryption and authentication.
+ *
+ * BLOCK ENCRYPTION PARAMETERS:
+ * Encryption /Authentication Algorithm Suite (crypt):
+ * The encryption algorithm, mode, and key length we are going to use. We
+ * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
+ * keys. All authentication is currently done with SHA512-HMAC.
+ *
+ * Plaintext:
+ * The unencrypted data that we want to encrypt.
+ *
+ * Initialization Vector (IV):
+ * An initialization vector for the encryption algorithms. This is used to
+ * "tweak" the encryption algorithms so that two blocks of the same data are
+ * encrypted into different ciphertext outputs, thus obfuscating block patterns.
+ * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
+ * never reused with the same encryption key. This value is stored unencrypted
+ * and must simply be provided to the decryption function. We use a 96 bit IV
+ * (as recommended by NIST) for all block encryption. For non-dedup blocks we
+ * derive the IV randomly. The first 64 bits of the IV are stored in the second
+ * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
+ * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
+ * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
+ * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
+ * level 0 blocks is the number of allocated dnodes in that block. The on-disk
+ * format supports at most 2^15 slots per L0 dnode block, because the maximum
+ * block size is 16MB (2^24). In either case, for level 0 blocks this number
+ * will still be smaller than UINT32_MAX so it is safe to store the IV in the
+ * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
+ * for the dnode code.
+ *
+ * Master key:
+ * This is the most important secret data of an encrypted dataset. It is used
+ * along with the salt to generate that actual encryption keys via HKDF. We
+ * do not use the master key to directly encrypt any data because there are
+ * theoretical limits on how much data can actually be safely encrypted with
+ * any encryption mode. The master key is stored encrypted on disk with the
+ * user's wrapping key. Its length is determined by the encryption algorithm.
+ * For details on how this is stored see the block comment in dsl_crypt.c
+ *
+ * Salt:
+ * Used as an input to the HKDF function, along with the master key. We use a
+ * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
+ * can be used for encrypting many blocks, so we cache the current salt and the
+ * associated derived key in zio_crypt_t so we do not need to derive it again
+ * needlessly.
+ *
+ * Encryption Key:
+ * A secret binary key, generated from an HKDF function used to encrypt and
+ * decrypt data.
+ *
+ * Message Authentication Code (MAC)
+ * The MAC is an output of authenticated encryption modes such as AES-GCM and
+ * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
+ * data on disk and return garbage to the application. Effectively, it is a
+ * checksum that can not be reproduced by an attacker. We store the MAC in the
+ * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
+ * regular checksum of the ciphertext which can be used for scrubbing.
+ *
+ * OBJECT AUTHENTICATION:
+ * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
+ * they contain some info that always needs to be readable. To prevent this
+ * data from being altered, we authenticate this data using SHA512-HMAC. This
+ * will produce a MAC (similar to the one produced via encryption) which can
+ * be used to verify the object was not modified. HMACs do not require key
+ * rotation or IVs, so we can keep up to the full 3 copies of authenticated
+ * data.
+ *
+ * ZIL ENCRYPTION:
+ * ZIL blocks have their bp written to disk ahead of the associated data, so we
+ * cannot store the MAC there as we normally do. For these blocks the MAC is
+ * stored in the embedded checksum within the zil_chain_t header. The salt and
+ * IV are generated for the block on bp allocation instead of at encryption
+ * time. In addition, ZIL blocks have some pieces that must be left in plaintext
+ * for claiming even though all of the sensitive user data still needs to be
+ * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
+ * pieces of the block need to be encrypted. All data that is not encrypted is
+ * authenticated using the AAD mechanisms that the supported encryption modes
+ * provide for. In order to preserve the semantics of the ZIL for encrypted
+ * datasets, the ZIL is not protected at the objset level as described below.
+ *
+ * DNODE ENCRYPTION:
+ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
+ * in plaintext for scrubbing and claiming, but the bonus buffers might contain
+ * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
+ * which pieces of the block need to be encrypted. For more details about
+ * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
+ *
+ * OBJECT SET AUTHENTICATION:
+ * Up to this point, everything we have encrypted and authenticated has been
+ * at level 0 (or -2 for the ZIL). If we did not do any further work the
+ * on-disk format would be susceptible to attacks that deleted or rearranged
+ * the order of level 0 blocks. Ideally, the cleanest solution would be to
+ * maintain a tree of authentication MACs going up the bp tree. However, this
+ * presents a problem for raw sends. Send files do not send information about
+ * indirect blocks so there would be no convenient way to transfer the MACs and
+ * they cannot be recalculated on the receive side without the master key which
+ * would defeat one of the purposes of raw sends in the first place. Instead,
+ * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
+ * from the level below. We also include some portable fields from blk_prop such
+ * as the lsize and compression algorithm to prevent the data from being
+ * misinterpreted.
+ *
+ * At the objset level, we maintain 2 separate 256 bit MACs in the
+ * objset_phys_t. The first one is "portable" and is the logical root of the
+ * MAC tree maintained in the metadnode's bps. The second, is "local" and is
+ * used as the root MAC for the user accounting objects, which are also not
+ * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
+ * of the send file. The useraccounting code ensures that the useraccounting
+ * info is not present upon a receive, so the local MAC can simply be cleared
+ * out at that time. For more info about objset_phys_t authentication, see
+ * zio_crypt_do_objset_hmacs().
+ *
+ * CONSIDERATIONS FOR DEDUP:
+ * In order for dedup to work, blocks that we want to dedup with one another
+ * need to use the same IV and encryption key, so that they will have the same
+ * ciphertext. Normally, one should never reuse an IV with the same encryption
+ * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
+ * blocks. In this case, however, since we are using the same plaintext as
+ * well all that we end up with is a duplicate of the original ciphertext we
+ * already had. As a result, an attacker with read access to the raw disk will
+ * be able to tell which blocks are the same but this information is given away
+ * by dedup anyway. In order to get the same IVs and encryption keys for
+ * equivalent blocks of data we use an HMAC of the plaintext. We use an HMAC
+ * here so that a reproducible checksum of the plaintext is never available to
+ * the attacker. The HMAC key is kept alongside the master key, encrypted on
+ * disk. The first 64 bits of the HMAC are used in place of the random salt, and
+ * the next 96 bits are used as the IV. As a result of this mechanism, dedup
+ * will only work within a clone family since encrypted dedup requires use of
+ * the same master and HMAC keys.
+ */
+
+/*
+ * After encrypting many blocks with the same key we may start to run up
+ * against the theoretical limits of how much data can securely be encrypted
+ * with a single key using the supported encryption modes. The most obvious
+ * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
+ * the more IVs we generate (which both GCM and CCM modes strictly forbid).
+ * This risk actually grows surprisingly quickly over time according to the
+ * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
+ * generated n IVs with a cryptographically secure RNG, the approximate
+ * probability p(n) of a collision is given as:
+ *
+ * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
+ *
+ * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
+ *
+ * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
+ * we must not write more than 398,065,730 blocks with the same encryption key.
+ * Therefore, we rotate our keys after 400,000,000 blocks have been written by
+ * generating a new random 64 bit salt for our HKDF encryption key generation
+ * function.
+ */
+#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
+#define	ZFS_CURRENT_MAX_SALT_USES	\
+	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
+unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
+
+typedef struct blkptr_auth_buf {
+	uint64_t bab_prop;			/* blk_prop - portable mask */
+	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */
+	uint64_t bab_pad;			/* reserved for future use */
+} blkptr_auth_buf_t;
+
+zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
+	{"",			ZC_TYPE_NONE,	0,	"inherit"},
+	{"",			ZC_TYPE_NONE,	0,	"on"},
+	{"",			ZC_TYPE_NONE,	0,	"off"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
+};
+
+void
+zio_crypt_key_destroy(zio_crypt_key_t *key)
+{
+	rw_destroy(&key->zk_salt_lock);
+
+	/* free crypto templates */
+	crypto_destroy_ctx_template(key->zk_current_tmpl);
+	crypto_destroy_ctx_template(key->zk_hmac_tmpl);
+
+	/* zero out sensitive data */
+	bzero(key, sizeof (zio_crypt_key_t));
+}
+
+int
+zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	uint_t keydata_len;
+
+	ASSERT(key != NULL);
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+	bzero(key, sizeof (zio_crypt_key_t));
+
+	/* fill keydata buffers and salt with random data */
+	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for the ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = &key->zk_hmac_key;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	/*
+	 * Initialize the crypto templates. It's ok if this fails because
+	 * this is just an optimization.
+	 */
+	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+	    &key->zk_hmac_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_hmac_tmpl = NULL;
+
+	key->zk_crypt = crypt;
+	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
+	key->zk_salt_count = 0;
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy(key);
+	return (ret);
+}
+
+static int
+zio_crypt_key_change_salt(zio_crypt_key_t *key)
+{
+	int ret = 0;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	crypto_mechanism_t mech;
+	uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
+
+	/* generate a new salt */
+	ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	rw_enter(&key->zk_salt_lock, RW_WRITER);
+
+	/* someone beat us to the salt rotation, just unlock and return */
+	if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
+		goto out_unlock;
+
+	/* derive the current key from the master key and the new salt */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
+	if (ret != 0)
+		goto out_unlock;
+
+	/* assign the salt and reset the usage count */
+	bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
+	key->zk_salt_count = 0;
+
+	/* destroy the old context template and create the new one */
+	crypto_destroy_ctx_template(key->zk_current_tmpl);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	rw_exit(&key->zk_salt_lock);
+
+	return (0);
+
+out_unlock:
+	rw_exit(&key->zk_salt_lock);
+error:
+	return (ret);
+}
+
+/* See comment above zfs_key_max_salt_uses definition for details */
+int
+zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
+{
+	int ret;
+	boolean_t salt_change;
+
+	rw_enter(&key->zk_salt_lock, RW_READER);
+
+	bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
+	salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
+	    ZFS_CURRENT_MAX_SALT_USES);
+
+	rw_exit(&key->zk_salt_lock);
+
+	if (salt_change) {
+		ret = zio_crypt_key_change_salt(key);
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * This function handles all encryption and decryption in zfs. When
+ * encrypting it expects puio to reference the plaintext and cuio to
+ * reference the ciphertext. cuio must have enough space for the
+ * ciphertext + room for a MAC. datalen should be the length of the
+ * plaintext / ciphertext alone.
+ */
+static int
+zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
+    crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
+    zfs_uio_t *puio, zfs_uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
+{
+	int ret;
+	crypto_data_t plaindata, cipherdata;
+	CK_AES_CCM_PARAMS ccmp;
+	CK_AES_GCM_PARAMS gcmp;
+	crypto_mechanism_t mech;
+	zio_crypt_info_t crypt_info;
+	uint_t plain_full_len, maclen;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW);
+
+	/* lookup the encryption info */
+	crypt_info = zio_crypt_table[crypt];
+
+	/* the mac will always be the last iovec_t in the cipher uio */
+	maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len;
+
+	ASSERT(maclen <= ZIO_DATA_MAC_LEN);
+
+	/* setup encryption mechanism (same as crypt) */
+	mech.cm_type = crypto_mech2id(crypt_info.ci_mechname);
+
+	/*
+	 * Strangely, the ICP requires that plain_full_len must include
+	 * the MAC length when decrypting, even though the UIO does not
+	 * need to have the extra space allocated.
+	 */
+	if (encrypt) {
+		plain_full_len = datalen;
+	} else {
+		plain_full_len = datalen + maclen;
+	}
+
+	/*
+	 * setup encryption params (currently only AES CCM and AES GCM
+	 * are supported)
+	 */
+	if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) {
+		ccmp.ulNonceSize = ZIO_DATA_IV_LEN;
+		ccmp.ulAuthDataSize = auth_len;
+		ccmp.authData = authbuf;
+		ccmp.ulMACSize = maclen;
+		ccmp.nonce = ivbuf;
+		ccmp.ulDataSize = plain_full_len;
+
+		mech.cm_param = (char *)(&ccmp);
+		mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
+	} else {
+		gcmp.ulIvLen = ZIO_DATA_IV_LEN;
+		gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN);
+		gcmp.ulAADLen = auth_len;
+		gcmp.pAAD = authbuf;
+		gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen);
+		gcmp.pIv = ivbuf;
+
+		mech.cm_param = (char *)(&gcmp);
+		mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
+	}
+
+	/* populate the cipher and plain data structs. */
+	plaindata.cd_format = CRYPTO_DATA_UIO;
+	plaindata.cd_offset = 0;
+	plaindata.cd_uio = puio;
+	plaindata.cd_miscdata = NULL;
+	plaindata.cd_length = plain_full_len;
+
+	cipherdata.cd_format = CRYPTO_DATA_UIO;
+	cipherdata.cd_offset = 0;
+	cipherdata.cd_uio = cuio;
+	cipherdata.cd_miscdata = NULL;
+	cipherdata.cd_length = datalen + maclen;
+
+	/* perform the actual encryption */
+	if (encrypt) {
+		ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata,
+		    NULL);
+		if (ret != CRYPTO_SUCCESS) {
+			ret = SET_ERROR(EIO);
+			goto error;
+		}
+	} else {
+		ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata,
+		    NULL);
+		if (ret != CRYPTO_SUCCESS) {
+			ASSERT3U(ret, ==, CRYPTO_INVALID_MAC);
+			ret = SET_ERROR(ECKSUM);
+			goto error;
+		}
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+int
+zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
+{
+	int ret;
+	zfs_uio_t puio, cuio;
+	uint64_t aad[3];
+	iovec_t plain_iovecs[2], cipher_iovecs[3];
+	uint64_t crypt = key->zk_crypt;
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	/* generate iv for wrapping the master and hmac key */
+	ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* initialize zfs_uio_ts */
+	plain_iovecs[0].iov_base = key->zk_master_keydata;
+	plain_iovecs[0].iov_len = keydata_len;
+	plain_iovecs[1].iov_base = key->zk_hmac_keydata;
+	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+
+	cipher_iovecs[0].iov_base = keydata_out;
+	cipher_iovecs[0].iov_len = keydata_len;
+	cipher_iovecs[1].iov_base = hmac_keydata_out;
+	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+	cipher_iovecs[2].iov_base = mac;
+	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
+
+	/*
+	 * Although we don't support writing to the old format, we do
+	 * support rewrapping the key so that the user can move and
+	 * quarantine datasets on the old format.
+	 */
+	if (key->zk_version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(key->zk_guid);
+	} else {
+		ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(key->zk_guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(key->zk_version);
+	}
+
+	enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
+	puio.uio_iov = plain_iovecs;
+	puio.uio_iovcnt = 2;
+	puio.uio_segflg = UIO_SYSSPACE;
+	cuio.uio_iov = cipher_iovecs;
+	cuio.uio_iovcnt = 3;
+	cuio.uio_segflg = UIO_SYSSPACE;
+
+	/* encrypt the keys and store the resulting ciphertext and mac */
+	ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len,
+	    &puio, &cuio, (uint8_t *)aad, aad_len);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+int
+zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
+    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
+    uint8_t *mac, zio_crypt_key_t *key)
+{
+	crypto_mechanism_t mech;
+	zfs_uio_t puio, cuio;
+	uint64_t aad[3];
+	iovec_t plain_iovecs[2], cipher_iovecs[3];
+	uint_t enc_len, keydata_len, aad_len;
+	int ret;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	/* initialize zfs_uio_ts */
+	plain_iovecs[0].iov_base = key->zk_master_keydata;
+	plain_iovecs[0].iov_len = keydata_len;
+	plain_iovecs[1].iov_base = key->zk_hmac_keydata;
+	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+
+	cipher_iovecs[0].iov_base = keydata;
+	cipher_iovecs[0].iov_len = keydata_len;
+	cipher_iovecs[1].iov_base = hmac_keydata;
+	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+	cipher_iovecs[2].iov_base = mac;
+	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
+
+	if (version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(guid);
+	} else {
+		ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(version);
+	}
+
+	enc_len = keydata_len + SHA512_HMAC_KEYLEN;
+	puio.uio_iov = plain_iovecs;
+	puio.uio_segflg = UIO_SYSSPACE;
+	puio.uio_iovcnt = 2;
+	cuio.uio_iov = cipher_iovecs;
+	cuio.uio_iovcnt = 3;
+	cuio.uio_segflg = UIO_SYSSPACE;
+
+	/* decrypt the keys and store the result in the output buffers */
+	ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len,
+	    &puio, &cuio, (uint8_t *)aad, aad_len);
+	if (ret != 0)
+		goto error;
+
+	/* generate a fresh salt */
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	/*
+	 * Initialize the crypto templates. It's ok if this fails because
+	 * this is just an optimization.
+	 */
+	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+	    &key->zk_hmac_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_hmac_tmpl = NULL;
+
+	key->zk_crypt = crypt;
+	key->zk_version = version;
+	key->zk_guid = guid;
+	key->zk_salt_count = 0;
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy(key);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv(uint8_t *ivbuf)
+{
+	int ret;
+
+	/* randomly generate the IV */
+	ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	bzero(ivbuf, ZIO_DATA_IV_LEN);
+	return (ret);
+}
+
+int
+zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+    uint8_t *digestbuf, uint_t digestlen)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_data_t in_data, digest_data;
+	uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH];
+
+	ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH);
+
+	/* initialize sha512-hmac mechanism and crypto data */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	/* initialize the crypto data */
+	in_data.cd_format = CRYPTO_DATA_RAW;
+	in_data.cd_offset = 0;
+	in_data.cd_length = datalen;
+	in_data.cd_raw.iov_base = (char *)data;
+	in_data.cd_raw.iov_len = in_data.cd_length;
+
+	digest_data.cd_format = CRYPTO_DATA_RAW;
+	digest_data.cd_offset = 0;
+	digest_data.cd_length = SHA512_DIGEST_LENGTH;
+	digest_data.cd_raw.iov_base = (char *)raw_digestbuf;
+	digest_data.cd_raw.iov_len = digest_data.cd_length;
+
+	/* generate the hmac */
+	ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl,
+	    &digest_data, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_digestbuf, digestbuf, digestlen);
+
+	return (0);
+
+error:
+	bzero(digestbuf, digestlen);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+    uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
+{
+	int ret;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	ret = zio_crypt_do_hmac(key, data, datalen,
+	    digestbuf, SHA512_DIGEST_LENGTH);
+	if (ret != 0)
+		return (ret);
+
+	bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
+	bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
+
+	return (0);
+}
+
+/*
+ * The following functions are used to encode and decode encryption parameters
+ * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
+ * byte strings, which normally means that these strings would not need to deal
+ * with byteswapping at all. However, both blkptr_t and zil_header_t may be
+ * byteswapped by lower layers and so we must "undo" that byteswap here upon
+ * decoding and encoding in a non-native byteorder. These functions require
+ * that the byteorder bit is correct before being called.
+ */
+void
+zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_ENCRYPTED(bp));
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
+		bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, val32);
+	} else {
+		bcopy(salt, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[0] = BSWAP_64(val64);
+
+		bcopy(iv, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[1] = BSWAP_64(val64);
+
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, BSWAP_32(val32));
+	}
+}
+
+void
+zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_PROTECTED(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_IS_AUTHENTICATED(bp)) {
+		bzero(salt, ZIO_DATA_SALT_LEN);
+		bzero(iv, ZIO_DATA_IV_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
+		bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
+
+		val32 = (uint32_t)BP_GET_IV2(bp);
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
+		bcopy(&val64, salt, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
+		bcopy(&val64, iv, sizeof (uint64_t));
+
+		val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp));
+	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
+		bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
+		    sizeof (uint64_t));
+	} else {
+		bcopy(mac, &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[2] = BSWAP_64(val64);
+
+		bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[3] = BSWAP_64(val64);
+	}
+}
+
+void
+zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+		bzero(mac, ZIO_DATA_MAC_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
+		bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
+		    sizeof (uint64_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
+		bcopy(&val64, mac, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
+		bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
+{
+	zil_chain_t *zilc = data;
+
+	bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
+	bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
+	    sizeof (uint64_t));
+}
+
+void
+zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
+{
+	/*
+	 * The ZIL MAC is embedded in the block it protects, which will
+	 * not have been byteswapped by the time this function has been called.
+	 * As a result, we don't need to worry about byteswapping the MAC.
+	 */
+	const zil_chain_t *zilc = data;
+
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
+	    sizeof (uint64_t));
+}
+
+/*
+ * This routine takes a block of dnodes (src_abd) and copies only the bonus
+ * buffers to the same offsets in the dst buffer. datalen should be the size
+ * of both the src_abd and the dst buffer (not just the length of the bonus
+ * buffers).
+ */
+void
+zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
+{
+	uint_t i, max_dnp = datalen >> DNODE_SHIFT;
+	uint8_t *src;
+	dnode_phys_t *dnp, *sdnp, *ddnp;
+
+	src = abd_borrow_buf_copy(src_abd, datalen);
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
+			    DN_MAX_BONUS_LEN(dnp));
+		}
+	}
+
+	abd_return_buf(src_abd, src, datalen);
+}
+
+/*
+ * This function decides what fields from blk_prop are included in
+ * the on-disk various MAC algorithms.
+ */
+static void
+zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version)
+{
+	/*
+	 * Version 0 did not properly zero out all non-portable fields
+	 * as it should have done. We maintain this code so that we can
+	 * do read-only imports of pools on this version.
+	 */
+	if (version == 0) {
+		BP_SET_DEDUP(bp, 0);
+		BP_SET_CHECKSUM(bp, 0);
+		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+		return;
+	}
+
+	ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+
+	/*
+	 * The hole_birth feature might set these fields even if this bp
+	 * is a hole. We zero them out here to guarantee that raw sends
+	 * will function with or without the feature.
+	 */
+	if (BP_IS_HOLE(bp)) {
+		bp->blk_prop = 0ULL;
+		return;
+	}
+
+	/*
+	 * At L0 we want to verify these fields to ensure that data blocks
+	 * can not be reinterpreted. For instance, we do not want an attacker
+	 * to trick us into returning raw lz4 compressed data to the user
+	 * by modifying the compression bits. At higher levels, we cannot
+	 * enforce this policy since raw sends do not convey any information
+	 * about indirect blocks, so these values might be different on the
+	 * receive side. Fortunately, this does not open any new attack
+	 * vectors, since any alterations that can be made to a higher level
+	 * bp must still verify the correct order of the layer below it.
+	 */
+	if (BP_GET_LEVEL(bp) != 0) {
+		BP_SET_BYTEORDER(bp, 0);
+		BP_SET_COMPRESS(bp, 0);
+
+		/*
+		 * psize cannot be set to zero or it will trigger
+		 * asserts, but the value doesn't really matter as
+		 * long as it is constant.
+		 */
+		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+	}
+
+	BP_SET_DEDUP(bp, 0);
+	BP_SET_CHECKSUM(bp, 0);
+}
+
+static void
+zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp,
+    blkptr_auth_buf_t *bab, uint_t *bab_len)
+{
+	blkptr_t tmpbp = *bp;
+
+	if (should_bswap)
+		byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+	ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+	ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+
+	zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac);
+
+	/*
+	 * We always MAC blk_prop in LE to ensure portability. This
+	 * must be done after decoding the mac, since the endianness
+	 * will get zero'd out here.
+	 */
+	zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version);
+	bab->bab_prop = LE_64(tmpbp.blk_prop);
+	bab->bab_pad = 0ULL;
+
+	/* version 0 did not include the padding */
+	*bab_len = sizeof (blkptr_auth_buf_t);
+	if (version == 0)
+		*bab_len -= sizeof (uint64_t);
+}
+
+static int
+zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	int ret;
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+	crypto_data_t cd;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+	cd.cd_length = bab_len;
+	cd.cd_raw.iov_base = (char *)&bab;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+static void
+zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	SHA2Update(ctx, &bab, bab_len);
+}
+
+static void
+zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	bcopy(&bab, *aadp, bab_len);
+	*aadp += bab_len;
+	*aad_len += bab_len;
+}
+
+static int
+zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, dnode_phys_t *dnp)
+{
+	int ret, i;
+	dnode_phys_t *adnp, tmp_dncore;
+	size_t dn_core_size = offsetof(dnode_phys_t, dn_blkptr);
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	crypto_data_t cd;
+
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+
+	/*
+	 * Authenticate the core dnode (masking out non-portable bits).
+	 * We only copy the first 64 bytes we operate on to avoid the overhead
+	 * of copying 512-64 unneeded bytes. The compiler seems to be fine
+	 * with that.
+	 */
+	bcopy(dnp, &tmp_dncore, dn_core_size);
+	adnp = &tmp_dncore;
+
+	if (le_bswap) {
+		adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
+		adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
+		adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
+		adnp->dn_used = BSWAP_64(adnp->dn_used);
+	}
+	adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+	adnp->dn_used = 0;
+
+	cd.cd_length = dn_core_size;
+	cd.cd_raw.iov_base = (char *)adnp;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	for (i = 0; i < dnp->dn_nblkptr; i++) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, &dnp->dn_blkptr[i]);
+		if (ret != 0)
+			goto error;
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, DN_SPILL_BLKPTR(dnp));
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * objset_phys_t blocks introduce a number of exceptions to the normal
+ * authentication process. objset_phys_t's contain 2 separate HMACS for
+ * protecting the integrity of their data. The portable_mac protects the
+ * metadnode. This MAC can be sent with a raw send and protects against
+ * reordering of data within the metadnode. The local_mac protects the user
+ * accounting objects which are not sent from one system to another.
+ *
+ * In addition, objset blocks are the only blocks that can be modified and
+ * written to disk without the key loaded under certain circumstances. During
+ * zil_claim() we need to be able to update the zil_header_t to complete
+ * claiming log blocks and during raw receives we need to write out the
+ * portable_mac from the send file. Both of these actions are possible
+ * because these fields are not protected by either MAC so neither one will
+ * need to modify the MACs without the key. However, when the modified blocks
+ * are written out they will be byteswapped into the host machine's native
+ * endianness which will modify fields protected by the MAC. As a result, MAC
+ * calculation for objset blocks works slightly differently from other block
+ * types. Where other block types MAC the data in whatever endianness is
+ * written to disk, objset blocks always MAC little endian version of their
+ * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
+ * and le_bswap indicates whether a byteswap is needed to get this block
+ * into little endian format.
+ */
+int
+zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+    boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_context_t ctx;
+	crypto_data_t cd;
+	objset_phys_t *osp = data;
+	uint64_t intval;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH];
+	uint8_t raw_local_mac[SHA512_DIGEST_LENGTH];
+
+	/* initialize HMAC mechanism */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+
+	/* calculate the portable MAC from the portable fields and metadnode */
+	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the os_type */
+	intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in fields from the metadnode */
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_meta_dnode);
+	if (ret)
+		goto error;
+
+	/* store the final digest in a temporary buffer and copy what we need */
+	cd.cd_length = SHA512_DIGEST_LENGTH;
+	cd.cd_raw.iov_base = (char *)raw_portable_mac;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_final(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
+
+	/*
+	 * This is necessary here as we check next whether
+	 * OBJSET_FLAG_USERACCOUNTING_COMPLETE is set in order to
+	 * decide if the local_mac should be zeroed out. That flag will always
+	 * be set by dmu_objset_id_quota_upgrade_cb() and
+	 * dmu_objset_userspace_upgrade_cb() if useraccounting has been
+	 * completed.
+	 */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	boolean_t uacct_incomplete =
+	    !(intval & OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+
+	/*
+	 * The local MAC protects the user, group and project accounting.
+	 * If these objects are not present, the local MAC is zeroed out.
+	 */
+	if (uacct_incomplete ||
+	    (datalen >= OBJSET_PHYS_SIZE_V3 &&
+	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_projectused_dnode.dn_type == DMU_OT_NONE) ||
+	    (datalen >= OBJSET_PHYS_SIZE_V2 &&
+	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE) ||
+	    (datalen <= OBJSET_PHYS_SIZE_V1)) {
+		bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+		return (0);
+	}
+
+	/* calculate the local MAC from the userused and groupused dnodes */
+	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the non-portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in fields from the user accounting dnodes */
+	if (osp->os_userused_dnode.dn_type != DMU_OT_NONE) {
+		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+		    should_bswap, &osp->os_userused_dnode);
+		if (ret)
+			goto error;
+	}
+
+	if (osp->os_groupused_dnode.dn_type != DMU_OT_NONE) {
+		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+		    should_bswap, &osp->os_groupused_dnode);
+		if (ret)
+			goto error;
+	}
+
+	if (osp->os_projectused_dnode.dn_type != DMU_OT_NONE &&
+	    datalen >= OBJSET_PHYS_SIZE_V3) {
+		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+		    should_bswap, &osp->os_projectused_dnode);
+		if (ret)
+			goto error;
+	}
+
+	/* store the final digest in a temporary buffer and copy what we need */
+	cd.cd_length = SHA512_DIGEST_LENGTH;
+	cd.cd_raw.iov_base = (char *)raw_local_mac;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_final(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
+
+	return (0);
+
+error:
+	bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
+	bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+	return (ret);
+}
+
+static void
+zio_crypt_destroy_uio(zfs_uio_t *uio)
+{
+	if (uio->uio_iov)
+		kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
+}
+
+/*
+ * This function parses an uncompressed indirect block and returns a checksum
+ * of all the portable fields from all of the contained bps. The portable
+ * fields are the MAC and all of the fields from blk_prop except for the dedup,
+ * checksum, and psize bits. For an explanation of the purpose of this, see
+ * the comment block on object set authentication.
+ */
+static int
+zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf,
+    uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum)
+{
+	blkptr_t *bp;
+	int i, epb = datalen >> SPA_BLKPTRSHIFT;
+	SHA2_CTX ctx;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	/* checksum all of the MACs from the layer below */
+	SHA2Init(SHA512, &ctx);
+	for (i = 0, bp = buf; i < epb; i++, bp++) {
+		zio_crypt_bp_do_indrect_checksum_updates(&ctx, version,
+		    byteswap, bp);
+	}
+	SHA2Final(digestbuf, &ctx);
+
+	if (generate) {
+		bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
+		return (0);
+	}
+
+	if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0)
+		return (SET_ERROR(ECKSUM));
+
+	return (0);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+
+	/*
+	 * Unfortunately, callers of this function will not always have
+	 * easy access to the on-disk format version. This info is
+	 * normally found in the DSL Crypto Key, but the checksum-of-MACs
+	 * is expected to be verifiable even when the key isn't loaded.
+	 * Here, instead of doing a ZAP lookup for the version for each
+	 * zio, we simply try both existing formats.
+	 */
+	ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf,
+	    datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum);
+	if (ret == ECKSUM) {
+		ASSERT(!generate);
+		ret = zio_crypt_do_indirect_mac_checksum_impl(generate,
+		    buf, datalen, 0, byteswap, cksum);
+	}
+
+	return (ret);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+	void *buf;
+
+	buf = abd_borrow_buf_copy(abd, datalen);
+	ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
+	    byteswap, cksum);
+	abd_return_buf(abd, buf, datalen);
+
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting ZIL blocks.
+ * We do not check for the older ZIL chain because the encryption feature
+ * was not available before the newer ZIL chain was introduced. The goal
+ * here is to encrypt everything except the blkptr_t of a lr_write_t and
+ * the zil_chain_t header. Everything that is not encrypted is authenticated.
+ */
+static int
+zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio,
+    zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+    boolean_t *no_crypt)
+{
+	int ret;
+	uint64_t txtype, lr_len;
+	uint_t nr_src, nr_dst, crypt_len;
+	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
+	uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
+	zil_chain_t *zilc;
+	lr_t *lr;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+	/* cipherbuf always needs an extra iovec for the MAC */
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+		nr_src = 0;
+		nr_dst = 1;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+		nr_src = 1;
+		nr_dst = 0;
+	}
+	bzero(dst, datalen);
+
+	/* find the start and end record of the log block */
+	zilc = (zil_chain_t *)src;
+	slrp = src + sizeof (zil_chain_t);
+	aadp = aadbuf;
+	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+
+	/* calculate the number of encrypted iovecs we will need */
+	for (; slrp < blkend; slrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		nr_iovecs++;
+		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
+			nr_iovecs++;
+	}
+
+	nr_src += nr_iovecs;
+	nr_dst += nr_iovecs;
+
+	/* allocate the iovec arrays */
+	if (nr_src != 0) {
+		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
+		if (src_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	if (nr_dst != 0) {
+		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
+		if (dst_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	/*
+	 * Copy the plain zil header over and authenticate everything except
+	 * the checksum that will store our MAC. If we are writing the data
+	 * the embedded checksum will not have been calculated yet, so we don't
+	 * authenticate that.
+	 */
+	bcopy(src, dst, sizeof (zil_chain_t));
+	bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
+	aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+	aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+
+	/* loop over records again, filling in iovecs */
+	nr_iovecs = 0;
+	slrp = src + sizeof (zil_chain_t);
+	dlrp = dst + sizeof (zil_chain_t);
+
+	for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		/* copy the common lr_t */
+		bcopy(slrp, dlrp, sizeof (lr_t));
+		bcopy(slrp, aadp, sizeof (lr_t));
+		aadp += sizeof (lr_t);
+		aad_len += sizeof (lr_t);
+
+		ASSERT3P(src_iovecs, !=, NULL);
+		ASSERT3P(dst_iovecs, !=, NULL);
+
+		/*
+		 * If this is a TX_WRITE record we want to encrypt everything
+		 * except the bp if exists. If the bp does exist we want to
+		 * authenticate it.
+		 */
+		if (txtype == TX_WRITE) {
+			crypt_len = sizeof (lr_write_t) -
+			    sizeof (lr_t) - sizeof (blkptr_t);
+			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+			/* copy the bp now since it will not be encrypted */
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    sizeof (blkptr_t));
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    aadp, sizeof (blkptr_t));
+			aadp += sizeof (blkptr_t);
+			aad_len += sizeof (blkptr_t);
+			nr_iovecs++;
+			total_len += crypt_len;
+
+			if (lr_len != sizeof (lr_write_t)) {
+				crypt_len = lr_len - sizeof (lr_write_t);
+				src_iovecs[nr_iovecs].iov_base =
+				    slrp + sizeof (lr_write_t);
+				src_iovecs[nr_iovecs].iov_len = crypt_len;
+				dst_iovecs[nr_iovecs].iov_base =
+				    dlrp + sizeof (lr_write_t);
+				dst_iovecs[nr_iovecs].iov_len = crypt_len;
+				nr_iovecs++;
+				total_len += crypt_len;
+			}
+		} else {
+			crypt_len = lr_len - sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+			nr_iovecs++;
+			total_len += crypt_len;
+		}
+	}
+
+	*no_crypt = (nr_iovecs == 0);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+
+	if (encrypt) {
+		puio->uio_iov = src_iovecs;
+		puio->uio_iovcnt = nr_src;
+		cuio->uio_iov = dst_iovecs;
+		cuio->uio_iovcnt = nr_dst;
+	} else {
+		puio->uio_iov = dst_iovecs;
+		puio->uio_iovcnt = nr_dst;
+		cuio->uio_iov = src_iovecs;
+		cuio->uio_iovcnt = nr_src;
+	}
+
+	return (0);
+
+error:
+	zio_buf_free(aadbuf, datalen);
+	if (src_iovecs != NULL)
+		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
+	if (dst_iovecs != NULL)
+		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
+
+	*enc_len = 0;
+	*authbuf = NULL;
+	*auth_len = 0;
+	*no_crypt = B_FALSE;
+	puio->uio_iov = NULL;
+	puio->uio_iovcnt = 0;
+	cuio->uio_iov = NULL;
+	cuio->uio_iovcnt = 0;
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting dnode blocks.
+ */
+static int
+zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
+    uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+	uint_t nr_src, nr_dst, crypt_len;
+	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+	uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
+	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
+	uint8_t *src, *dst, *aadp;
+	dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+		nr_src = 0;
+		nr_dst = 1;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+		nr_src = 1;
+		nr_dst = 0;
+	}
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+	aadp = aadbuf;
+
+	/*
+	 * Count the number of iovecs we will need to do the encryption by
+	 * counting the number of bonus buffers that need to be encrypted.
+	 */
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		/*
+		 * This block may still be byteswapped. However, all of the
+		 * values we use are either uint8_t's (for which byteswapping
+		 * is a noop) or a * != 0 check, which will work regardless
+		 * of whether or not we byteswap.
+		 */
+		if (sdnp[i].dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
+		    sdnp[i].dn_bonuslen != 0) {
+			nr_iovecs++;
+		}
+	}
+
+	nr_src += nr_iovecs;
+	nr_dst += nr_iovecs;
+
+	if (nr_src != 0) {
+		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
+		if (src_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	if (nr_dst != 0) {
+		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
+		if (dst_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	nr_iovecs = 0;
+
+	/*
+	 * Iterate through the dnodes again, this time filling in the uios
+	 * we allocated earlier. We also concatenate any data we want to
+	 * authenticate onto aadbuf.
+	 */
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+
+		/* copy over the core fields and blkptrs (kept as plaintext) */
+		bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
+			    sizeof (blkptr_t));
+		}
+
+		/*
+		 * Handle authenticated data. We authenticate everything in
+		 * the dnode that can be brought over when we do a raw send.
+		 * This includes all of the core fields as well as the MACs
+		 * stored in the bp checksums and all of the portable bits
+		 * from blk_prop. We include the dnode padding here in case it
+		 * ever gets used in the future. Some dn_flags and dn_used are
+		 * not portable so we mask those out values out of the
+		 * authenticated data.
+		 */
+		crypt_len = offsetof(dnode_phys_t, dn_blkptr);
+		bcopy(dnp, aadp, crypt_len);
+		adnp = (dnode_phys_t *)aadp;
+		adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+		adnp->dn_used = 0;
+		aadp += crypt_len;
+		aad_len += crypt_len;
+
+		for (j = 0; j < dnp->dn_nblkptr; j++) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, &dnp->dn_blkptr[j]);
+		}
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, DN_SPILL_BLKPTR(dnp));
+		}
+
+		/*
+		 * If this bonus buffer needs to be encrypted, we prepare an
+		 * iovec_t. The encryption / decryption functions will fill
+		 * this in for us with the encrypted or decrypted data.
+		 * Otherwise we add the bonus buffer to the authenticated
+		 * data buffer and copy it over to the destination. The
+		 * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
+		 * we can guarantee alignment with the AES block size
+		 * (128 bits).
+		 */
+		crypt_len = DN_MAX_BONUS_LEN(dnp);
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			ASSERT3U(nr_iovecs, <, nr_src);
+			ASSERT3U(nr_iovecs, <, nr_dst);
+			ASSERT3P(src_iovecs, !=, NULL);
+			ASSERT3P(dst_iovecs, !=, NULL);
+			src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+			nr_iovecs++;
+			total_len += crypt_len;
+		} else {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
+			bcopy(DN_BONUS(dnp), aadp, crypt_len);
+			aadp += crypt_len;
+			aad_len += crypt_len;
+		}
+	}
+
+	*no_crypt = (nr_iovecs == 0);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+
+	if (encrypt) {
+		puio->uio_iov = src_iovecs;
+		puio->uio_iovcnt = nr_src;
+		cuio->uio_iov = dst_iovecs;
+		cuio->uio_iovcnt = nr_dst;
+	} else {
+		puio->uio_iov = dst_iovecs;
+		puio->uio_iovcnt = nr_dst;
+		cuio->uio_iov = src_iovecs;
+		cuio->uio_iovcnt = nr_src;
+	}
+
+	return (0);
+
+error:
+	zio_buf_free(aadbuf, datalen);
+	if (src_iovecs != NULL)
+		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
+	if (dst_iovecs != NULL)
+		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
+
+	*enc_len = 0;
+	*authbuf = NULL;
+	*auth_len = 0;
+	*no_crypt = B_FALSE;
+	puio->uio_iov = NULL;
+	puio->uio_iovcnt = 0;
+	cuio->uio_iov = NULL;
+	cuio->uio_iovcnt = 0;
+	return (ret);
+}
+
+static int
+zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *cuio,
+    uint_t *enc_len)
+{
+	(void) encrypt;
+	int ret;
+	uint_t nr_plain = 1, nr_cipher = 2;
+	iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL;
+
+	/* allocate the iovecs for the plain and cipher data */
+	plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t),
+	    KM_SLEEP);
+	if (!plain_iovecs) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t),
+	    KM_SLEEP);
+	if (!cipher_iovecs) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	plain_iovecs[0].iov_base = plainbuf;
+	plain_iovecs[0].iov_len = datalen;
+	cipher_iovecs[0].iov_base = cipherbuf;
+	cipher_iovecs[0].iov_len = datalen;
+
+	*enc_len = datalen;
+	puio->uio_iov = plain_iovecs;
+	puio->uio_iovcnt = nr_plain;
+	cuio->uio_iov = cipher_iovecs;
+	cuio->uio_iovcnt = nr_cipher;
+
+	return (0);
+
+error:
+	if (plain_iovecs != NULL)
+		kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t));
+	if (cipher_iovecs != NULL)
+		kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
+
+	*enc_len = 0;
+	puio->uio_iov = NULL;
+	puio->uio_iovcnt = 0;
+	cuio->uio_iov = NULL;
+	cuio->uio_iovcnt = 0;
+	return (ret);
+}
+
+/*
+ * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
+ * that they can be used for encryption and decryption by zio_do_crypt_uio().
+ * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
+ * requiring special handling to parse out pieces that are to be encrypted. The
+ * authbuf is used by these special cases to store additional authenticated
+ * data (AAD) for the encryption modes.
+ */
+static int
+zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len,
+    uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+	iovec_t *mac_iov;
+
+	ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
+
+	/* route to handler */
+	switch (ot) {
+	case DMU_OT_INTENT_LOG:
+		ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
+		    datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
+		    no_crypt);
+		break;
+	case DMU_OT_DNODE:
+		ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf,
+		    cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf,
+		    auth_len, no_crypt);
+		break;
+	default:
+		ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
+		    datalen, puio, cuio, enc_len);
+		*authbuf = NULL;
+		*auth_len = 0;
+		*no_crypt = B_FALSE;
+		break;
+	}
+
+	if (ret != 0)
+		goto error;
+
+	/* populate the uios */
+	puio->uio_segflg = UIO_SYSSPACE;
+	cuio->uio_segflg = UIO_SYSSPACE;
+
+	mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
+	mac_iov->iov_base = mac;
+	mac_iov->iov_len = ZIO_DATA_MAC_LEN;
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * Primary encryption / decryption entrypoint for zio data.
+ */
+int
+zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
+    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
+    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
+    boolean_t *no_crypt)
+{
+	int ret;
+	boolean_t locked = B_FALSE;
+	uint64_t crypt = key->zk_crypt;
+	uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
+	uint_t enc_len, auth_len;
+	zfs_uio_t puio, cuio;
+	uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
+	crypto_key_t tmp_ckey, *ckey = NULL;
+	crypto_ctx_template_t tmpl;
+	uint8_t *authbuf = NULL;
+
+	memset(&puio, 0, sizeof (puio));
+	memset(&cuio, 0, sizeof (cuio));
+
+	/*
+	 * If the needed key is the current one, just use it. Otherwise we
+	 * need to generate a temporary one from the given salt + master key.
+	 * If we are encrypting, we must return a copy of the current salt
+	 * so that it can be stored in the blkptr_t.
+	 */
+	rw_enter(&key->zk_salt_lock, RW_READER);
+	locked = B_TRUE;
+
+	if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
+		ckey = &key->zk_current_key;
+		tmpl = key->zk_current_tmpl;
+	} else {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+
+		ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+		    salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
+		if (ret != 0)
+			goto error;
+
+		tmp_ckey.ck_format = CRYPTO_KEY_RAW;
+		tmp_ckey.ck_data = enc_keydata;
+		tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+		ckey = &tmp_ckey;
+		tmpl = NULL;
+	}
+
+	/*
+	 * Attempt to use QAT acceleration if we can. We currently don't
+	 * do this for metadnode and ZIL blocks, since they have a much
+	 * more involved buffer layout and the qat_crypt() function only
+	 * works in-place.
+	 */
+	if (qat_crypt_use_accel(datalen) &&
+	    ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) {
+		uint8_t __attribute__((unused)) *srcbuf, *dstbuf;
+
+		if (encrypt) {
+			srcbuf = plainbuf;
+			dstbuf = cipherbuf;
+		} else {
+			srcbuf = cipherbuf;
+			dstbuf = plainbuf;
+		}
+
+		ret = qat_crypt((encrypt) ? QAT_ENCRYPT : QAT_DECRYPT, srcbuf,
+		    dstbuf, NULL, 0, iv, mac, ckey, key->zk_crypt, datalen);
+		if (ret == CPA_STATUS_SUCCESS) {
+			if (locked) {
+				rw_exit(&key->zk_salt_lock);
+				locked = B_FALSE;
+			}
+
+			return (0);
+		}
+		/* If the hardware implementation fails fall back to software */
+	}
+
+	/* create uios for encryption */
+	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
+	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
+	    &authbuf, &auth_len, no_crypt);
+	if (ret != 0)
+		goto error;
+
+	/* perform the encryption / decryption in software */
+	ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
+	    &puio, &cuio, authbuf, auth_len);
+	if (ret != 0)
+		goto error;
+
+	if (locked) {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+	}
+
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(&puio);
+	zio_crypt_destroy_uio(&cuio);
+
+	return (0);
+
+error:
+	if (locked)
+		rw_exit(&key->zk_salt_lock);
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(&puio);
+	zio_crypt_destroy_uio(&cuio);
+
+	return (ret);
+}
+
+/*
+ * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
+ * linear buffers.
+ */
+int
+zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot,
+    boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac,
+    uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
+{
+	int ret;
+	void *ptmp, *ctmp;
+
+	if (encrypt) {
+		ptmp = abd_borrow_buf_copy(pabd, datalen);
+		ctmp = abd_borrow_buf(cabd, datalen);
+	} else {
+		ptmp = abd_borrow_buf(pabd, datalen);
+		ctmp = abd_borrow_buf_copy(cabd, datalen);
+	}
+
+	ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac,
+	    datalen, ptmp, ctmp, no_crypt);
+	if (ret != 0)
+		goto error;
+
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (0);
+
+error:
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (ret);
+}
+
+#if defined(_KERNEL)
+/* BEGIN CSTYLED */
+module_param(zfs_key_max_salt_uses, ulong, 0644);
+MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value "
+	"can be used for generating encryption keys before it is rotated");
+/* END CSTYLED */
+#endif

diff --git a/zfs/module/os/linux/zfs/zpl_ctldir.c b/zfs/module/os/linux/zfs/zpl_ctldir.c
new file mode 100644
index 0000000..302f0bb
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zpl_ctldir.c

@@ -0,0 +1,638 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <rohan.puri15@gmail.com>
+ *   Brian Behlendorf <behlendorf1@llnl.gov>
+ */
+
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zpl.h>
+
+/*
+ * Common open routine.  Disallow any write access.
+ */
+/* ARGSUSED */
+static int
+zpl_common_open(struct inode *ip, struct file *filp)
+{
+	if (blk_mode_is_open_write(filp->f_mode))
+		return (-EACCES);
+
+	return (generic_file_open(ip, filp));
+}
+
+/*
+ * Get root directory contents.
+ */
+static int
+zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
+	int error = 0;
+
+	ZPL_ENTER(zfsvfs);
+
+	if (!zpl_dir_emit_dots(filp, ctx))
+		goto out;
+
+	if (ctx->pos == 2) {
+		if (!zpl_dir_emit(ctx, ZFS_SNAPDIR_NAME,
+		    strlen(ZFS_SNAPDIR_NAME), ZFSCTL_INO_SNAPDIR, DT_DIR))
+			goto out;
+
+		ctx->pos++;
+	}
+
+	if (ctx->pos == 3) {
+		if (!zpl_dir_emit(ctx, ZFS_SHAREDIR_NAME,
+		    strlen(ZFS_SHAREDIR_NAME), ZFSCTL_INO_SHARES, DT_DIR))
+			goto out;
+
+		ctx->pos++;
+	}
+out:
+	ZPL_EXIT(zfsvfs);
+
+	return (error);
+}
+
+#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
+static int
+zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	zpl_dir_context_t ctx =
+	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_root_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
+
+/*
+ * Get root directory attributes.
+ */
+/* ARGSUSED */
+static int
+#ifdef HAVE_IDMAP_IOPS_GETATTR
+zpl_root_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
+zpl_root_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
+zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
+    u32 request_mask, unsigned int query_flags)
+#endif
+{
+	struct inode *ip = path->dentry->d_inode;
+
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
+	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+	generic_fillattr(user_ns, ip, stat);
+#else
+	(void) user_ns;
+#endif
+#else
+	generic_fillattr(ip, stat);
+#endif
+	stat->atime = current_time(ip);
+
+	return (0);
+}
+ZPL_GETATTR_WRAPPER(zpl_root_getattr);
+
+static struct dentry *
+zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
+{
+	cred_t *cr = CRED();
+	struct inode *ip;
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	if (error) {
+		if (error == -ENOENT)
+			return (d_splice_alias(NULL, dentry));
+		else
+			return (ERR_PTR(error));
+	}
+
+	return (d_splice_alias(ip, dentry));
+}
+
+/*
+ * The '.zfs' control directory file and inode operations.
+ */
+const struct file_operations zpl_fops_root = {
+	.open		= zpl_common_open,
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE_SHARED
+	.iterate_shared	= zpl_root_iterate,
+#elif defined(HAVE_VFS_ITERATE)
+	.iterate	= zpl_root_iterate,
+#else
+	.readdir	= zpl_root_readdir,
+#endif
+};
+
+const struct inode_operations zpl_ops_root = {
+	.lookup		= zpl_root_lookup,
+	.getattr	= zpl_root_getattr,
+};
+
+static struct vfsmount *
+zpl_snapdir_automount(struct path *path)
+{
+	int error;
+
+	error = -zfsctl_snapshot_mount(path, 0);
+	if (error)
+		return (ERR_PTR(error));
+
+	/*
+	 * Rather than returning the new vfsmount for the snapshot we must
+	 * return NULL to indicate a mount collision.  This is done because
+	 * the user space mount calls do_add_mount() which adds the vfsmount
+	 * to the name space.  If we returned the new mount here it would be
+	 * added again to the vfsmount list resulting in list corruption.
+	 */
+	return (NULL);
+}
+
+/*
+ * Negative dentries must always be revalidated so newly created snapshots
+ * can be detected and automounted.  Normal dentries should be kept because
+ * as of the 3.18 kernel revaliding the mountpoint dentry will result in
+ * the snapshot being immediately unmounted.
+ */
+static int
+#ifdef HAVE_D_REVALIDATE_NAMEIDATA
+zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
+#else
+zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
+#endif
+{
+	return (!!dentry->d_inode);
+}
+
+dentry_operations_t zpl_dops_snapdirs = {
+/*
+ * Auto mounting of snapshots is only supported for 2.6.37 and
+ * newer kernels.  Prior to this kernel the ops->follow_link()
+ * callback was used as a hack to trigger the mount.  The
+ * resulting vfsmount was then explicitly grafted in to the
+ * name space.  While it might be possible to add compatibility
+ * code to accomplish this it would require considerable care.
+ */
+	.d_automount	= zpl_snapdir_automount,
+	.d_revalidate	= zpl_snapdir_revalidate,
+};
+
+static struct dentry *
+zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
+    unsigned int flags)
+{
+	fstrans_cookie_t cookie;
+	cred_t *cr = CRED();
+	struct inode *ip = NULL;
+	int error;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
+	    0, cr, NULL, NULL);
+	ASSERT3S(error, <=, 0);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error && error != -ENOENT)
+		return (ERR_PTR(error));
+
+	ASSERT(error == 0 || ip == NULL);
+	d_clear_d_op(dentry);
+	d_set_d_op(dentry, &zpl_dops_snapdirs);
+	dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+
+	return (d_splice_alias(ip, dentry));
+}
+
+static int
+zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
+{
+	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
+	fstrans_cookie_t cookie;
+	char snapname[MAXNAMELEN];
+	boolean_t case_conflict;
+	uint64_t id, pos;
+	int error = 0;
+
+	ZPL_ENTER(zfsvfs);
+	cookie = spl_fstrans_mark();
+
+	if (!zpl_dir_emit_dots(filp, ctx))
+		goto out;
+
+	/* Start the position at 0 if it already emitted . and .. */
+	pos = (ctx->pos == 2 ? 0 : ctx->pos);
+	while (error == 0) {
+		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
+		error = -dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
+		    snapname, &id, &pos, &case_conflict);
+		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
+		if (error)
+			goto out;
+
+		if (!zpl_dir_emit(ctx, snapname, strlen(snapname),
+		    ZFSCTL_INO_SHARES - id, DT_DIR))
+			goto out;
+
+		ctx->pos = pos;
+	}
+out:
+	spl_fstrans_unmark(cookie);
+	ZPL_EXIT(zfsvfs);
+
+	if (error == -ENOENT)
+		return (0);
+
+	return (error);
+}
+
+#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
+static int
+zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	zpl_dir_context_t ctx =
+	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_snapdir_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
+
+static int
+#ifdef HAVE_IOPS_RENAME_USERNS
+zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int flags)
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+zpl_snapdir_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int flags)
+#else
+zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
+    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+#endif
+{
+	cred_t *cr = CRED();
+	int error;
+
+	/* We probably don't want to support renameat2(2) in ctldir */
+	if (flags)
+		return (-EINVAL);
+
+	crhold(cr);
+	error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
+	    tdip, dname(tdentry), cr, 0);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	return (error);
+}
+
+#if (!defined(HAVE_RENAME_WANTS_FLAGS) && \
+	!defined(HAVE_IOPS_RENAME_USERNS) && \
+	!defined(HAVE_IOPS_RENAME_IDMAP))
+static int
+zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
+    struct inode *tdip, struct dentry *tdentry)
+{
+	return (zpl_snapdir_rename2(sdip, sdentry, tdip, tdentry, 0));
+}
+#endif
+
+static int
+zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
+{
+	cred_t *cr = CRED();
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	return (error);
+}
+
+static int
+#ifdef HAVE_IOPS_MKDIR_USERNS
+zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
+    struct dentry *dentry, umode_t mode)
+#elif defined(HAVE_IOPS_MKDIR_IDMAP)
+zpl_snapdir_mkdir(struct mnt_idmap *user_ns, struct inode *dip,
+    struct dentry *dentry, umode_t mode)
+#else
+zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
+#endif
+{
+	cred_t *cr = CRED();
+	vattr_t *vap;
+	struct inode *ip;
+	int error;
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
+
+	error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
+	if (error == 0) {
+		d_clear_d_op(dentry);
+		d_set_d_op(dentry, &zpl_dops_snapdirs);
+		d_instantiate(dentry, ip);
+	}
+
+	kmem_free(vap, sizeof (vattr_t));
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	return (error);
+}
+
+/*
+ * Get snapshot directory attributes.
+ */
+/* ARGSUSED */
+static int
+#ifdef HAVE_IDMAP_IOPS_GETATTR
+zpl_snapdir_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
+zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
+zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
+    u32 request_mask, unsigned int query_flags)
+#endif
+{
+	struct inode *ip = path->dentry->d_inode;
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+	ZPL_ENTER(zfsvfs);
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
+	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+	generic_fillattr(user_ns, ip, stat);
+#else
+	(void) user_ns;
+#endif
+#else
+	generic_fillattr(ip, stat);
+#endif
+
+	stat->nlink = stat->size = 2;
+	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
+	stat->atime = current_time(ip);
+	ZPL_EXIT(zfsvfs);
+
+	return (0);
+}
+ZPL_GETATTR_WRAPPER(zpl_snapdir_getattr);
+
+/*
+ * The '.zfs/snapshot' directory file operations.  These mainly control
+ * generating the list of available snapshots when doing an 'ls' in the
+ * directory.  See zpl_snapdir_readdir().
+ */
+const struct file_operations zpl_fops_snapdir = {
+	.open		= zpl_common_open,
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE_SHARED
+	.iterate_shared	= zpl_snapdir_iterate,
+#elif defined(HAVE_VFS_ITERATE)
+	.iterate	= zpl_snapdir_iterate,
+#else
+	.readdir	= zpl_snapdir_readdir,
+#endif
+
+};
+
+/*
+ * The '.zfs/snapshot' directory inode operations.  These mainly control
+ * creating an inode for a snapshot directory and initializing the needed
+ * infrastructure to automount the snapshot.  See zpl_snapdir_lookup().
+ */
+const struct inode_operations zpl_ops_snapdir = {
+	.lookup		= zpl_snapdir_lookup,
+	.getattr	= zpl_snapdir_getattr,
+#if (defined(HAVE_RENAME_WANTS_FLAGS) || \
+	defined(HAVE_IOPS_RENAME_USERNS) || \
+	defined(HAVE_IOPS_RENAME_IDMAP))
+	.rename		= zpl_snapdir_rename2,
+#else
+	.rename		= zpl_snapdir_rename,
+#endif
+	.rmdir		= zpl_snapdir_rmdir,
+	.mkdir		= zpl_snapdir_mkdir,
+};
+
+static struct dentry *
+zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
+    unsigned int flags)
+{
+	fstrans_cookie_t cookie;
+	cred_t *cr = CRED();
+	struct inode *ip = NULL;
+	int error;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
+	    0, cr, NULL, NULL);
+	ASSERT3S(error, <=, 0);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error) {
+		if (error == -ENOENT)
+			return (d_splice_alias(NULL, dentry));
+		else
+			return (ERR_PTR(error));
+	}
+
+	return (d_splice_alias(ip, dentry));
+}
+
+static int
+zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
+{
+	fstrans_cookie_t cookie;
+	cred_t *cr = CRED();
+	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
+	znode_t *dzp;
+	int error = 0;
+
+	ZPL_ENTER(zfsvfs);
+	cookie = spl_fstrans_mark();
+
+	if (zfsvfs->z_shares_dir == 0) {
+		zpl_dir_emit_dots(filp, ctx);
+		goto out;
+	}
+
+	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
+	if (error)
+		goto out;
+
+	crhold(cr);
+	error = -zfs_readdir(ZTOI(dzp), ctx, cr);
+	crfree(cr);
+
+	iput(ZTOI(dzp));
+out:
+	spl_fstrans_unmark(cookie);
+	ZPL_EXIT(zfsvfs);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
+static int
+zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	zpl_dir_context_t ctx =
+	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_shares_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
+
+/* ARGSUSED */
+static int
+#ifdef HAVE_USERNS_IOPS_GETATTR
+zpl_shares_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+zpl_shares_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
+zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
+    u32 request_mask, unsigned int query_flags)
+#endif
+{
+	struct inode *ip = path->dentry->d_inode;
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	znode_t *dzp;
+	int error;
+
+	ZPL_ENTER(zfsvfs);
+
+	if (zfsvfs->z_shares_dir == 0) {
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
+		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#else
+		(void) user_ns;
+#endif
+#else
+		generic_fillattr(path->dentry->d_inode, stat);
+#endif
+		stat->nlink = stat->size = 2;
+		stat->atime = current_time(ip);
+		ZPL_EXIT(zfsvfs);
+		return (0);
+	}
+
+	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
+	if (error == 0) {
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+		error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
+#else
+		error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
+#endif
+		iput(ZTOI(dzp));
+	}
+
+	ZPL_EXIT(zfsvfs);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+ZPL_GETATTR_WRAPPER(zpl_shares_getattr);
+
+/*
+ * The '.zfs/shares' directory file operations.
+ */
+const struct file_operations zpl_fops_shares = {
+	.open		= zpl_common_open,
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+#ifdef HAVE_VFS_ITERATE_SHARED
+	.iterate_shared	= zpl_shares_iterate,
+#elif defined(HAVE_VFS_ITERATE)
+	.iterate	= zpl_shares_iterate,
+#else
+	.readdir	= zpl_shares_readdir,
+#endif
+
+};
+
+/*
+ * The '.zfs/shares' directory inode operations.
+ */
+const struct inode_operations zpl_ops_shares = {
+	.lookup		= zpl_shares_lookup,
+	.getattr	= zpl_shares_getattr,
+};

diff --git a/zfs/module/os/linux/zfs/zpl_export.c b/zfs/module/os/linux/zfs/zpl_export.c
new file mode 100644
index 0000000..5be6353
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zpl_export.c

@@ -0,0 +1,158 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Gunnar Beutner
+ * Copyright (c) 2012 Cyril Plisko. All rights reserved.
+ */
+
+
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zpl.h>
+
+
+static int
+#ifdef HAVE_ENCODE_FH_WITH_INODE
+zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
+{
+#else
+zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
+{
+	/* CSTYLED */
+	struct inode *ip = dentry->d_inode;
+#endif /* HAVE_ENCODE_FH_WITH_INODE */
+	fstrans_cookie_t cookie;
+	ushort_t empty_fid = 0;
+	fid_t *fid;
+	int len_bytes, rc;
+
+	len_bytes = *max_len * sizeof (__u32);
+
+	if (len_bytes < offsetof(fid_t, fid_data)) {
+		fid = (fid_t *)&empty_fid;
+	} else {
+		fid = (fid_t *)fh;
+		fid->fid_len = len_bytes - offsetof(fid_t, fid_data);
+	}
+
+	cookie = spl_fstrans_mark();
+
+	if (zfsctl_is_node(ip))
+		rc = zfsctl_fid(ip, fid);
+	else
+		rc = zfs_fid(ip, fid);
+
+	spl_fstrans_unmark(cookie);
+	len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
+	*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
+
+	return (rc == 0 ? FILEID_INO32_GEN : 255);
+}
+
+static struct dentry *
+zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
+    int fh_len, int fh_type)
+{
+	fid_t *fid = (fid_t *)fh;
+	fstrans_cookie_t cookie;
+	struct inode *ip;
+	int len_bytes, rc;
+
+	len_bytes = fh_len * sizeof (__u32);
+
+	if (fh_type != FILEID_INO32_GEN ||
+	    len_bytes < offsetof(fid_t, fid_data) ||
+	    len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
+		return (ERR_PTR(-EINVAL));
+
+	cookie = spl_fstrans_mark();
+	rc = zfs_vget(sb, &ip, fid);
+	spl_fstrans_unmark(cookie);
+
+	if (rc) {
+		/*
+		 * If we see ENOENT it might mean that an NFSv4 * client
+		 * is using a cached inode value in a file handle and
+		 * that the sought after file has had its inode changed
+		 * by a third party.  So change the error to ESTALE
+		 * which will trigger a full lookup by the client and
+		 * will find the new filename/inode pair if it still
+		 * exists.
+		 */
+		if (rc == ENOENT)
+			rc = ESTALE;
+
+		return (ERR_PTR(-rc));
+	}
+
+	ASSERT((ip != NULL) && !IS_ERR(ip));
+
+	return (d_obtain_alias(ip));
+}
+
+static struct dentry *
+zpl_get_parent(struct dentry *child)
+{
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	znode_t *zp;
+	int error;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_lookup(ITOZ(child->d_inode), "..", &zp, 0, cr, NULL, NULL);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	if (error)
+		return (ERR_PTR(error));
+
+	return (d_obtain_alias(ZTOI(zp)));
+}
+
+static int
+zpl_commit_metadata(struct inode *inode)
+{
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	int error;
+
+	if (zfsctl_is_node(inode))
+		return (0);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_fsync(ITOZ(inode), 0, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+const struct export_operations zpl_export_operations = {
+	.encode_fh		= zpl_encode_fh,
+	.fh_to_dentry		= zpl_fh_to_dentry,
+	.get_parent		= zpl_get_parent,
+	.commit_metadata	= zpl_commit_metadata,
+};

diff --git a/zfs/module/os/linux/zfs/zpl_file.c b/zfs/module/os/linux/zfs/zpl_file.c
new file mode 100644
index 0000000..c880e01
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zpl_file.c

@@ -0,0 +1,1191 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ */
+
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <sys/file.h>
+#include <sys/dmu_objset.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_project.h>
+#if defined(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS) || \
+    defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO)
+#include <linux/pagemap.h>
+#endif
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+#include <linux/writeback.h>
+#endif
+
+/*
+ * When using fallocate(2) to preallocate space, inflate the requested
+ * capacity check by 10% to account for the required metadata blocks.
+ */
+unsigned int zfs_fallocate_reserve_percent = 110;
+
+static int
+zpl_open(struct inode *ip, struct file *filp)
+{
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+
+	error = generic_file_open(ip, filp);
+	if (error)
+		return (error);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_release(struct inode *ip, struct file *filp)
+{
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+
+	cookie = spl_fstrans_mark();
+	if (ITOZ(ip)->z_atime_dirty)
+		zfs_mark_inode_dirty(ip);
+
+	crhold(cr);
+	error = -zfs_close(ip, filp->f_flags, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_iterate(struct file *filp, zpl_dir_context_t *ctx)
+{
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_readdir(file_inode(filp), ctx, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
+static int
+zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	zpl_dir_context_t ctx =
+	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
+	int error;
+
+	error = zpl_iterate(filp, &ctx);
+	filp->f_pos = ctx.pos;
+
+	return (error);
+}
+#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
+
+#if defined(HAVE_FSYNC_WITHOUT_DENTRY)
+/*
+ * Linux 2.6.35 - 3.0 API,
+ * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
+ * redundant.  The dentry is still accessible via filp->f_path.dentry,
+ * and we are guaranteed that filp will never be NULL.
+ */
+static int
+zpl_fsync(struct file *filp, int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_fsync(ITOZ(inode), datasync, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#ifdef HAVE_FILE_AIO_FSYNC
+static int
+zpl_aio_fsync(struct kiocb *kiocb, int datasync)
+{
+	return (zpl_fsync(kiocb->ki_filp, datasync));
+}
+#endif
+
+#elif defined(HAVE_FSYNC_RANGE)
+/*
+ * Linux 3.1 API,
+ * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
+ * been pushed down in to the .fsync() vfs hook.  Additionally, the i_mutex
+ * lock is no longer held by the caller, for zfs we don't require the lock
+ * to be held so we don't acquire it.
+ */
+static int
+zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	znode_t *zp = ITOZ(inode);
+	zfsvfs_t *zfsvfs = ITOZSB(inode);
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+
+	/*
+	 * The variables z_sync_writes_cnt and z_async_writes_cnt work in
+	 * tandem so that sync writes can detect if there are any non-sync
+	 * writes going on and vice-versa. The "vice-versa" part to this logic
+	 * is located in zfs_putpage() where non-sync writes check if there are
+	 * any ongoing sync writes. If any sync and non-sync writes overlap,
+	 * we do a commit to complete the non-sync writes since the latter can
+	 * potentially take several seconds to complete and thus block sync
+	 * writes in the upcoming call to filemap_write_and_wait_range().
+	 */
+	atomic_inc_32(&zp->z_sync_writes_cnt);
+	/*
+	 * If the following check does not detect an overlapping non-sync write
+	 * (say because it's just about to start), then it is guaranteed that
+	 * the non-sync write will detect this sync write. This is because we
+	 * always increment z_sync_writes_cnt / z_async_writes_cnt before doing
+	 * the check on z_async_writes_cnt / z_sync_writes_cnt here and in
+	 * zfs_putpage() respectively.
+	 */
+	if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+		ZPL_ENTER(zfsvfs);
+		zil_commit(zfsvfs->z_log, zp->z_id);
+		ZPL_EXIT(zfsvfs);
+	}
+
+	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+
+	/*
+	 * The sync write is not complete yet but we decrement
+	 * z_sync_writes_cnt since zfs_fsync() increments and decrements
+	 * it internally. If a non-sync write starts just after the decrement
+	 * operation but before we call zfs_fsync(), it may not detect this
+	 * overlapping sync write but it does not matter since we have already
+	 * gone past filemap_write_and_wait_range() and we won't block due to
+	 * the non-sync write.
+	 */
+	atomic_dec_32(&zp->z_sync_writes_cnt);
+
+	if (error)
+		return (error);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_fsync(zp, datasync, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#ifdef HAVE_FILE_AIO_FSYNC
+static int
+zpl_aio_fsync(struct kiocb *kiocb, int datasync)
+{
+	return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
+}
+#endif
+
+#else
+#error "Unsupported fops->fsync() implementation"
+#endif
+
+static inline int
+zfs_io_flags(struct kiocb *kiocb)
+{
+	int flags = 0;
+
+#if defined(IOCB_DSYNC)
+	if (kiocb->ki_flags & IOCB_DSYNC)
+		flags |= O_DSYNC;
+#endif
+#if defined(IOCB_SYNC)
+	if (kiocb->ki_flags & IOCB_SYNC)
+		flags |= O_SYNC;
+#endif
+#if defined(IOCB_APPEND)
+	if (kiocb->ki_flags & IOCB_APPEND)
+		flags |= O_APPEND;
+#endif
+#if defined(IOCB_DIRECT)
+	if (kiocb->ki_flags & IOCB_DIRECT)
+		flags |= O_DIRECT;
+#endif
+	return (flags);
+}
+
+/*
+ * If relatime is enabled, call file_accessed() if zfs_relatime_need_update()
+ * is true.  This is needed since datasets with inherited "relatime" property
+ * aren't necessarily mounted with the MNT_RELATIME flag (e.g. after
+ * `zfs set relatime=...`), which is what relatime test in VFS by
+ * relatime_need_update() is based on.
+ */
+static inline void
+zpl_file_accessed(struct file *filp)
+{
+	struct inode *ip = filp->f_mapping->host;
+
+	if (!IS_NOATIME(ip) && ITOZSB(ip)->z_relatime) {
+		if (zfs_relatime_need_update(ip))
+			file_accessed(filp);
+	} else {
+		file_accessed(filp);
+	}
+}
+
+#if defined(HAVE_VFS_RW_ITERATE)
+
+/*
+ * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports
+ * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to
+ * manipulate the iov_iter are available.  In which case the full iov_iter
+ * can be attached to the uio and correctly handled in the lower layers.
+ * Otherwise, for older kernels extract the iovec and pass it instead.
+ */
+static void
+zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
+    loff_t pos, ssize_t count, size_t skip)
+{
+#if defined(HAVE_VFS_IOV_ITER)
+	zfs_uio_iov_iter_init(uio, to, pos, count, skip);
+#else
+	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
+	    zfs_uio_iov_iter_type(to) & ITER_KVEC ?
+	    UIO_SYSSPACE : UIO_USERSPACE,
+	    count, skip);
+#endif
+}
+
+static ssize_t
+zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
+{
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	struct file *filp = kiocb->ki_filp;
+	ssize_t count = iov_iter_count(to);
+	zfs_uio_t uio;
+
+	zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+
+	int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio,
+	    filp->f_flags | zfs_io_flags(kiocb), cr);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error < 0)
+		return (error);
+
+	ssize_t read = count - uio.uio_resid;
+	kiocb->ki_pos += read;
+
+	zpl_file_accessed(filp);
+
+	return (read);
+}
+
+static inline ssize_t
+zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from,
+    size_t *countp)
+{
+#ifdef HAVE_GENERIC_WRITE_CHECKS_KIOCB
+	ssize_t ret = generic_write_checks(kiocb, from);
+	if (ret <= 0)
+		return (ret);
+
+	*countp = ret;
+#else
+	struct file *file = kiocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *ip = mapping->host;
+	int isblk = S_ISBLK(ip->i_mode);
+
+	*countp = iov_iter_count(from);
+	ssize_t ret = generic_write_checks(file, &kiocb->ki_pos, countp, isblk);
+	if (ret)
+		return (ret);
+#endif
+
+	return (0);
+}
+
+static ssize_t
+zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
+{
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	struct file *filp = kiocb->ki_filp;
+	struct inode *ip = filp->f_mapping->host;
+	zfs_uio_t uio;
+	size_t count = 0;
+	ssize_t ret;
+
+	ret = zpl_generic_write_checks(kiocb, from, &count);
+	if (ret)
+		return (ret);
+
+	zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+
+	int error = -zfs_write(ITOZ(ip), &uio,
+	    filp->f_flags | zfs_io_flags(kiocb), cr);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error < 0)
+		return (error);
+
+	ssize_t wrote = count - uio.uio_resid;
+	kiocb->ki_pos += wrote;
+
+	return (wrote);
+}
+
+#else /* !HAVE_VFS_RW_ITERATE */
+
+static ssize_t
+zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov,
+    unsigned long nr_segs, loff_t pos)
+{
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	struct file *filp = kiocb->ki_filp;
+	size_t count;
+	ssize_t ret;
+
+	ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+	if (ret)
+		return (ret);
+
+	zfs_uio_t uio;
+	zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
+	    count, 0);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+
+	int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio,
+	    filp->f_flags | zfs_io_flags(kiocb), cr);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error < 0)
+		return (error);
+
+	ssize_t read = count - uio.uio_resid;
+	kiocb->ki_pos += read;
+
+	zpl_file_accessed(filp);
+
+	return (read);
+}
+
+static ssize_t
+zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
+    unsigned long nr_segs, loff_t pos)
+{
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	struct file *filp = kiocb->ki_filp;
+	struct inode *ip = filp->f_mapping->host;
+	size_t count;
+	ssize_t ret;
+
+	ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+	if (ret)
+		return (ret);
+
+	ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode));
+	if (ret)
+		return (ret);
+
+	kiocb->ki_pos = pos;
+
+	zfs_uio_t uio;
+	zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
+	    count, 0);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+
+	int error = -zfs_write(ITOZ(ip), &uio,
+	    filp->f_flags | zfs_io_flags(kiocb), cr);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error < 0)
+		return (error);
+
+	ssize_t wrote = count - uio.uio_resid;
+	kiocb->ki_pos += wrote;
+
+	return (wrote);
+}
+#endif /* HAVE_VFS_RW_ITERATE */
+
+#if defined(HAVE_VFS_RW_ITERATE)
+static ssize_t
+zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter)
+{
+	if (rw == WRITE)
+		return (zpl_iter_write(kiocb, iter));
+	else
+		return (zpl_iter_read(kiocb, iter));
+}
+#if defined(HAVE_VFS_DIRECT_IO_ITER)
+static ssize_t
+zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter)
+{
+	return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
+}
+#elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET)
+static ssize_t
+zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
+{
+	ASSERT3S(pos, ==, kiocb->ki_pos);
+	return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
+}
+#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
+static ssize_t
+zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
+{
+	ASSERT3S(pos, ==, kiocb->ki_pos);
+	return (zpl_direct_IO_impl(rw, kiocb, iter));
+}
+#else
+#error "Unknown direct IO interface"
+#endif
+
+#else /* HAVE_VFS_RW_ITERATE */
+
+#if defined(HAVE_VFS_DIRECT_IO_IOVEC)
+static ssize_t
+zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov,
+    loff_t pos, unsigned long nr_segs)
+{
+	if (rw == WRITE)
+		return (zpl_aio_write(kiocb, iov, nr_segs, pos));
+	else
+		return (zpl_aio_read(kiocb, iov, nr_segs, pos));
+}
+#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
+static ssize_t
+zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
+{
+	const struct iovec *iovp = iov_iter_iovec(iter);
+	unsigned long nr_segs = iter->nr_segs;
+
+	ASSERT3S(pos, ==, kiocb->ki_pos);
+	if (rw == WRITE)
+		return (zpl_aio_write(kiocb, iovp, nr_segs, pos));
+	else
+		return (zpl_aio_read(kiocb, iovp, nr_segs, pos));
+}
+#else
+#error "Unknown direct IO interface"
+#endif
+
+#endif /* HAVE_VFS_RW_ITERATE */
+
+static loff_t
+zpl_llseek(struct file *filp, loff_t offset, int whence)
+{
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
+	fstrans_cookie_t cookie;
+
+	if (whence == SEEK_DATA || whence == SEEK_HOLE) {
+		struct inode *ip = filp->f_mapping->host;
+		loff_t maxbytes = ip->i_sb->s_maxbytes;
+		loff_t error;
+
+		spl_inode_lock_shared(ip);
+		cookie = spl_fstrans_mark();
+		error = -zfs_holey(ITOZ(ip), whence, &offset);
+		spl_fstrans_unmark(cookie);
+		if (error == 0)
+			error = lseek_execute(filp, ip, offset, maxbytes);
+		spl_inode_unlock_shared(ip);
+
+		return (error);
+	}
+#endif /* SEEK_HOLE && SEEK_DATA */
+
+	return (generic_file_llseek(filp, offset, whence));
+}
+
+/*
+ * It's worth taking a moment to describe how mmap is implemented
+ * for zfs because it differs considerably from other Linux filesystems.
+ * However, this issue is handled the same way under OpenSolaris.
+ *
+ * The issue is that by design zfs bypasses the Linux page cache and
+ * leaves all caching up to the ARC.  This has been shown to work
+ * well for the common read(2)/write(2) case.  However, mmap(2)
+ * is problem because it relies on being tightly integrated with the
+ * page cache.  To handle this we cache mmap'ed files twice, once in
+ * the ARC and a second time in the page cache.  The code is careful
+ * to keep both copies synchronized.
+ *
+ * When a file with an mmap'ed region is written to using write(2)
+ * both the data in the ARC and existing pages in the page cache
+ * are updated.  For a read(2) data will be read first from the page
+ * cache then the ARC if needed.  Neither a write(2) or read(2) will
+ * will ever result in new pages being added to the page cache.
+ *
+ * New pages are added to the page cache only via .readpage() which
+ * is called when the vfs needs to read a page off disk to back the
+ * virtual memory region.  These pages may be modified without
+ * notifying the ARC and will be written out periodically via
+ * .writepage().  This will occur due to either a sync or the usual
+ * page aging behavior.  Note because a read(2) of a mmap'ed file
+ * will always check the page cache first even when the ARC is out
+ * of date correct data will still be returned.
+ *
+ * While this implementation ensures correct behavior it does have
+ * have some drawbacks.  The most obvious of which is that it
+ * increases the required memory footprint when access mmap'ed
+ * files.  It also adds additional complexity to the code keeping
+ * both caches synchronized.
+ *
+ * Longer term it may be possible to cleanly resolve this wart by
+ * mapping page cache pages directly on to the ARC buffers.  The
+ * Linux address space operations are flexible enough to allow
+ * selection of which pages back a particular index.  The trick
+ * would be working out the details of which subsystem is in
+ * charge, the ARC, the page cache, or both.  It may also prove
+ * helpful to move the ARC buffers to a scatter-gather lists
+ * rather than a vmalloc'ed region.
+ */
+static int
+zpl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct inode *ip = filp->f_mapping->host;
+	int error;
+	fstrans_cookie_t cookie;
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
+	    (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
+	spl_fstrans_unmark(cookie);
+	if (error)
+		return (error);
+
+	error = generic_file_mmap(filp, vma);
+	if (error)
+		return (error);
+
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+	znode_t *zp = ITOZ(ip);
+	mutex_enter(&zp->z_lock);
+	zp->z_is_mapped = B_TRUE;
+	mutex_exit(&zp->z_lock);
+#endif
+
+	return (error);
+}
+
+/*
+ * Populate a page with data for the Linux page cache.  This function is
+ * only used to support mmap(2).  There will be an identical copy of the
+ * data in the ARC which is kept up to date via .write() and .writepage().
+ */
+static inline int
+zpl_readpage_common(struct page *pp)
+{
+	fstrans_cookie_t cookie;
+
+	ASSERT(PageLocked(pp));
+
+	cookie = spl_fstrans_mark();
+	int error = -zfs_getpage(pp->mapping->host, pp);
+	spl_fstrans_unmark(cookie);
+
+	unlock_page(pp);
+
+	return (error);
+}
+
+#ifdef HAVE_VFS_READ_FOLIO
+static int
+zpl_read_folio(struct file *filp, struct folio *folio)
+{
+	return (zpl_readpage_common(&folio->page));
+}
+#else
+static int
+zpl_readpage(struct file *filp, struct page *pp)
+{
+	return (zpl_readpage_common(pp));
+}
+#endif
+
+static int
+zpl_readpage_filler(void *data, struct page *pp)
+{
+	return (zpl_readpage_common(pp));
+}
+
+/*
+ * Populate a set of pages with data for the Linux page cache.  This
+ * function will only be called for read ahead and never for demand
+ * paging.  For simplicity, the code relies on read_cache_pages() to
+ * correctly lock each page for IO and call zpl_readpage().
+ */
+#ifdef HAVE_VFS_READPAGES
+static int
+zpl_readpages(struct file *filp, struct address_space *mapping,
+    struct list_head *pages, unsigned nr_pages)
+{
+	return (read_cache_pages(mapping, pages, zpl_readpage_filler, NULL));
+}
+#else
+static void
+zpl_readahead(struct readahead_control *ractl)
+{
+	struct page *page;
+
+	while ((page = readahead_page(ractl)) != NULL) {
+		int ret;
+
+		ret = zpl_readpage_filler(NULL, page);
+		put_page(page);
+		if (ret)
+			break;
+	}
+}
+#endif
+
+static int
+zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
+{
+	boolean_t *for_sync = data;
+	fstrans_cookie_t cookie;
+
+	ASSERT(PageLocked(pp));
+	ASSERT(!PageWriteback(pp));
+
+	cookie = spl_fstrans_mark();
+	(void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
+	spl_fstrans_unmark(cookie);
+
+	return (0);
+}
+
+#ifdef HAVE_WRITEPAGE_T_FOLIO
+static int
+zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
+{
+	(void) zpl_putpage(&pp->page, wbc, data);
+	return (0);
+}
+#endif
+
+static inline int
+zpl_write_cache_pages(struct address_space *mapping,
+    struct writeback_control *wbc, void *data)
+{
+	int result;
+
+#ifdef HAVE_WRITEPAGE_T_FOLIO
+	result = write_cache_pages(mapping, wbc, zpl_putfolio, data);
+#else
+	result = write_cache_pages(mapping, wbc, zpl_putpage, data);
+#endif
+	return (result);
+}
+
+static int
+zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+	znode_t		*zp = ITOZ(mapping->host);
+	zfsvfs_t	*zfsvfs = ITOZSB(mapping->host);
+	enum writeback_sync_modes sync_mode;
+	int result;
+
+	ZPL_ENTER(zfsvfs);
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		wbc->sync_mode = WB_SYNC_ALL;
+	ZPL_EXIT(zfsvfs);
+	sync_mode = wbc->sync_mode;
+
+	/*
+	 * We don't want to run write_cache_pages() in SYNC mode here, because
+	 * that would make putpage() wait for a single page to be committed to
+	 * disk every single time, resulting in atrocious performance. Instead
+	 * we run it once in non-SYNC mode so that the ZIL gets all the data,
+	 * and then we commit it all in one go.
+	 */
+	boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
+	wbc->sync_mode = WB_SYNC_NONE;
+	result = zpl_write_cache_pages(mapping, wbc, &for_sync);
+	if (sync_mode != wbc->sync_mode) {
+		ZPL_ENTER(zfsvfs);
+		ZPL_VERIFY_ZP(zp);
+		if (zfsvfs->z_log != NULL)
+			zil_commit(zfsvfs->z_log, zp->z_id);
+		ZPL_EXIT(zfsvfs);
+
+		/*
+		 * We need to call write_cache_pages() again (we can't just
+		 * return after the commit) because the previous call in
+		 * non-SYNC mode does not guarantee that we got all the dirty
+		 * pages (see the implementation of write_cache_pages() for
+		 * details). That being said, this is a no-op in most cases.
+		 */
+		wbc->sync_mode = sync_mode;
+		result = zpl_write_cache_pages(mapping, wbc, &for_sync);
+	}
+	return (result);
+}
+
+/*
+ * Write out dirty pages to the ARC, this function is only required to
+ * support mmap(2).  Mapped pages may be dirtied by memory operations
+ * which never call .write().  These dirty pages are kept in sync with
+ * the ARC buffers via this hook.
+ */
+static int
+zpl_writepage(struct page *pp, struct writeback_control *wbc)
+{
+	if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		wbc->sync_mode = WB_SYNC_ALL;
+
+	boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
+
+	return (zpl_putpage(pp, wbc, &for_sync));
+}
+
+/*
+ * The flag combination which matches the behavior of zfs_space() is
+ * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE.  The FALLOC_FL_PUNCH_HOLE
+ * flag was introduced in the 2.6.38 kernel.
+ *
+ * The original mode=0 (allocate space) behavior can be reasonably emulated
+ * by checking if enough space exists and creating a sparse file, as real
+ * persistent space reservation is not possible due to COW, snapshots, etc.
+ */
+static long
+zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
+{
+	cred_t *cr = CRED();
+	loff_t olen;
+	fstrans_cookie_t cookie;
+	int error = 0;
+
+	int test_mode = FALLOC_FL_PUNCH_HOLE;
+#ifdef HAVE_FALLOC_FL_ZERO_RANGE
+	test_mode |= FALLOC_FL_ZERO_RANGE;
+#endif
+
+	if ((mode & ~(FALLOC_FL_KEEP_SIZE | test_mode)) != 0)
+		return (-EOPNOTSUPP);
+
+	if (offset < 0 || len <= 0)
+		return (-EINVAL);
+
+	spl_inode_lock(ip);
+	olen = i_size_read(ip);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	if (mode & (test_mode)) {
+		flock64_t bf;
+
+		if (mode & FALLOC_FL_KEEP_SIZE) {
+			if (offset > olen)
+				goto out_unmark;
+
+			if (offset + len > olen)
+				len = olen - offset;
+		}
+		bf.l_type = F_WRLCK;
+		bf.l_whence = SEEK_SET;
+		bf.l_start = offset;
+		bf.l_len = len;
+		bf.l_pid = 0;
+
+		error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr);
+	} else if ((mode & ~FALLOC_FL_KEEP_SIZE) == 0) {
+		unsigned int percent = zfs_fallocate_reserve_percent;
+		struct kstatfs statfs;
+
+		/* Legacy mode, disable fallocate compatibility. */
+		if (percent == 0) {
+			error = -EOPNOTSUPP;
+			goto out_unmark;
+		}
+
+		/*
+		 * Use zfs_statvfs() instead of dmu_objset_space() since it
+		 * also checks project quota limits, which are relevant here.
+		 */
+		error = zfs_statvfs(ip, &statfs);
+		if (error)
+			goto out_unmark;
+
+		/*
+		 * Shrink available space a bit to account for overhead/races.
+		 * We know the product previously fit into availbytes from
+		 * dmu_objset_space(), so the smaller product will also fit.
+		 */
+		if (len > statfs.f_bavail * (statfs.f_bsize * 100 / percent)) {
+			error = -ENOSPC;
+			goto out_unmark;
+		}
+		if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen)
+			error = zfs_freesp(ITOZ(ip), offset + len, 0, 0, FALSE);
+	}
+out_unmark:
+	spl_fstrans_unmark(cookie);
+	spl_inode_unlock(ip);
+
+	crfree(cr);
+
+	return (error);
+}
+
+static long
+zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
+{
+	return zpl_fallocate_common(file_inode(filp),
+	    mode, offset, len);
+}
+
+#define	ZFS_FL_USER_VISIBLE	(FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
+#define	ZFS_FL_USER_MODIFIABLE	(FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
+
+static uint32_t
+__zpl_ioctl_getflags(struct inode *ip)
+{
+	uint64_t zfs_flags = ITOZ(ip)->z_pflags;
+	uint32_t ioctl_flags = 0;
+
+	if (zfs_flags & ZFS_IMMUTABLE)
+		ioctl_flags |= FS_IMMUTABLE_FL;
+
+	if (zfs_flags & ZFS_APPENDONLY)
+		ioctl_flags |= FS_APPEND_FL;
+
+	if (zfs_flags & ZFS_NODUMP)
+		ioctl_flags |= FS_NODUMP_FL;
+
+	if (zfs_flags & ZFS_PROJINHERIT)
+		ioctl_flags |= ZFS_PROJINHERIT_FL;
+
+	return (ioctl_flags & ZFS_FL_USER_VISIBLE);
+}
+
+/*
+ * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
+ * attributes common to both Linux and Solaris are mapped.
+ */
+static int
+zpl_ioctl_getflags(struct file *filp, void __user *arg)
+{
+	uint32_t flags;
+	int err;
+
+	flags = __zpl_ioctl_getflags(file_inode(filp));
+	err = copy_to_user(arg, &flags, sizeof (flags));
+
+	return (err);
+}
+
+/*
+ * fchange() is a helper macro to detect if we have been asked to change a
+ * flag. This is ugly, but the requirement that we do this is a consequence of
+ * how the Linux file attribute interface was designed. Another consequence is
+ * that concurrent modification of files suffers from a TOCTOU race. Neither
+ * are things we can fix without modifying the kernel-userland interface, which
+ * is outside of our jurisdiction.
+ */
+
+#define	fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
+
+static int
+__zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
+{
+	uint64_t zfs_flags = ITOZ(ip)->z_pflags;
+	xoptattr_t *xoap;
+
+	if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
+	    ZFS_PROJINHERIT_FL))
+		return (-EOPNOTSUPP);
+
+	if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
+		return (-EACCES);
+
+	if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
+	    fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
+	    !capable(CAP_LINUX_IMMUTABLE))
+		return (-EPERM);
+
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EACCES);
+
+	xva_init(xva);
+	xoap = xva_getxoptattr(xva);
+
+	XVA_SET_REQ(xva, XAT_IMMUTABLE);
+	if (ioctl_flags & FS_IMMUTABLE_FL)
+		xoap->xoa_immutable = B_TRUE;
+
+	XVA_SET_REQ(xva, XAT_APPENDONLY);
+	if (ioctl_flags & FS_APPEND_FL)
+		xoap->xoa_appendonly = B_TRUE;
+
+	XVA_SET_REQ(xva, XAT_NODUMP);
+	if (ioctl_flags & FS_NODUMP_FL)
+		xoap->xoa_nodump = B_TRUE;
+
+	XVA_SET_REQ(xva, XAT_PROJINHERIT);
+	if (ioctl_flags & ZFS_PROJINHERIT_FL)
+		xoap->xoa_projinherit = B_TRUE;
+
+	return (0);
+}
+
+static int
+zpl_ioctl_setflags(struct file *filp, void __user *arg)
+{
+	struct inode *ip = file_inode(filp);
+	uint32_t flags;
+	cred_t *cr = CRED();
+	xvattr_t xva;
+	int err;
+	fstrans_cookie_t cookie;
+
+	if (copy_from_user(&flags, arg, sizeof (flags)))
+		return (-EFAULT);
+
+	err = __zpl_ioctl_setflags(ip, flags, &xva);
+	if (err)
+		return (err);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	return (err);
+}
+
+static int
+zpl_ioctl_getxattr(struct file *filp, void __user *arg)
+{
+	zfsxattr_t fsx = { 0 };
+	struct inode *ip = file_inode(filp);
+	int err;
+
+	fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
+	fsx.fsx_projid = ITOZ(ip)->z_projid;
+	err = copy_to_user(arg, &fsx, sizeof (fsx));
+
+	return (err);
+}
+
+static int
+zpl_ioctl_setxattr(struct file *filp, void __user *arg)
+{
+	struct inode *ip = file_inode(filp);
+	zfsxattr_t fsx;
+	cred_t *cr = CRED();
+	xvattr_t xva;
+	xoptattr_t *xoap;
+	int err;
+	fstrans_cookie_t cookie;
+
+	if (copy_from_user(&fsx, arg, sizeof (fsx)))
+		return (-EFAULT);
+
+	if (!zpl_is_valid_projid(fsx.fsx_projid))
+		return (-EINVAL);
+
+	err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
+	if (err)
+		return (err);
+
+	xoap = xva_getxoptattr(&xva);
+	XVA_SET_REQ(&xva, XAT_PROJID);
+	xoap->xoa_projid = fsx.fsx_projid;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	return (err);
+}
+
+static long
+zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		return (zpl_ioctl_getflags(filp, (void *)arg));
+	case FS_IOC_SETFLAGS:
+		return (zpl_ioctl_setflags(filp, (void *)arg));
+	case ZFS_IOC_FSGETXATTR:
+		return (zpl_ioctl_getxattr(filp, (void *)arg));
+	case ZFS_IOC_FSSETXATTR:
+		return (zpl_ioctl_setxattr(filp, (void *)arg));
+	default:
+		return (-ENOTTY);
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static long
+zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case FS_IOC32_GETFLAGS:
+		cmd = FS_IOC_GETFLAGS;
+		break;
+	case FS_IOC32_SETFLAGS:
+		cmd = FS_IOC_SETFLAGS;
+		break;
+	default:
+		return (-ENOTTY);
+	}
+	return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
+}
+#endif /* CONFIG_COMPAT */
+
+
+const struct address_space_operations zpl_address_space_operations = {
+#ifdef HAVE_VFS_READPAGES
+	.readpages	= zpl_readpages,
+#else
+	.readahead	= zpl_readahead,
+#endif
+#ifdef HAVE_VFS_READ_FOLIO
+	.read_folio	= zpl_read_folio,
+#else
+	.readpage	= zpl_readpage,
+#endif
+	.writepage	= zpl_writepage,
+	.writepages	= zpl_writepages,
+	.direct_IO	= zpl_direct_IO,
+#ifdef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS
+	.set_page_dirty = __set_page_dirty_nobuffers,
+#endif
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+	.dirty_folio	= filemap_dirty_folio,
+#endif
+};
+
+const struct file_operations zpl_file_operations = {
+	.open		= zpl_open,
+	.release	= zpl_release,
+	.llseek		= zpl_llseek,
+#ifdef HAVE_VFS_RW_ITERATE
+#ifdef HAVE_NEW_SYNC_READ
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+#endif
+	.read_iter	= zpl_iter_read,
+	.write_iter	= zpl_iter_write,
+#ifdef HAVE_VFS_IOV_ITER
+#ifdef HAVE_COPY_SPLICE_READ
+	.splice_read	= copy_splice_read,
+#else
+	.splice_read	= generic_file_splice_read,
+#endif
+	.splice_write	= iter_file_splice_write,
+#endif
+#else
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= zpl_aio_read,
+	.aio_write	= zpl_aio_write,
+#endif
+	.mmap		= zpl_mmap,
+	.fsync		= zpl_fsync,
+#ifdef HAVE_FILE_AIO_FSYNC
+	.aio_fsync	= zpl_aio_fsync,
+#endif
+	.fallocate	= zpl_fallocate,
+	.unlocked_ioctl	= zpl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= zpl_compat_ioctl,
+#endif
+};
+
+const struct file_operations zpl_dir_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+#if defined(HAVE_VFS_ITERATE_SHARED)
+	.iterate_shared	= zpl_iterate,
+#elif defined(HAVE_VFS_ITERATE)
+	.iterate	= zpl_iterate,
+#else
+	.readdir	= zpl_readdir,
+#endif
+	.fsync		= zpl_fsync,
+	.unlocked_ioctl = zpl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl   = zpl_compat_ioctl,
+#endif
+};
+
+/* BEGIN CSTYLED */
+module_param(zfs_fallocate_reserve_percent, uint, 0644);
+MODULE_PARM_DESC(zfs_fallocate_reserve_percent,
+    "Percentage of length to use for the available capacity check");
+/* END CSTYLED */

diff --git a/zfs/module/os/linux/zfs/zpl_inode.c b/zfs/module/os/linux/zfs/zpl_inode.c
new file mode 100644
index 0000000..6efaaf4
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zpl_inode.c

@@ -0,0 +1,851 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ */
+
+
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_znode.h>
+#include <sys/dmu_objset.h>
+#include <sys/vfs.h>
+#include <sys/zpl.h>
+#include <sys/file.h>
+
+
+static struct dentry *
+zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
+	cred_t *cr = CRED();
+	struct inode *ip;
+	znode_t *zp;
+	int error;
+	fstrans_cookie_t cookie;
+	pathname_t *ppn = NULL;
+	pathname_t pn;
+	int zfs_flags = 0;
+	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
+
+	if (dlen(dentry) >= ZAP_MAXNAMELEN)
+		return (ERR_PTR(-ENAMETOOLONG));
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+
+	/* If we are a case insensitive fs, we need the real name */
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+		zfs_flags = FIGNORECASE;
+		pn_alloc(&pn);
+		ppn = &pn;
+	}
+
+	error = -zfs_lookup(ITOZ(dir), dname(dentry), &zp,
+	    zfs_flags, cr, NULL, ppn);
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	spin_lock(&dentry->d_lock);
+	dentry->d_time = jiffies;
+	spin_unlock(&dentry->d_lock);
+
+	if (error) {
+		/*
+		 * If we have a case sensitive fs, we do not want to
+		 * insert negative entries, so return NULL for ENOENT.
+		 * Fall through if the error is not ENOENT. Also free memory.
+		 */
+		if (ppn) {
+			pn_free(ppn);
+			if (error == -ENOENT)
+				return (NULL);
+		}
+
+		if (error == -ENOENT)
+			return (d_splice_alias(NULL, dentry));
+		else
+			return (ERR_PTR(error));
+	}
+	ip = ZTOI(zp);
+
+	/*
+	 * If we are case insensitive, call the correct function
+	 * to install the name.
+	 */
+	if (ppn) {
+		struct dentry *new_dentry;
+		struct qstr ci_name;
+
+		if (strcmp(dname(dentry), pn.pn_buf) == 0) {
+			new_dentry = d_splice_alias(ip,  dentry);
+		} else {
+			ci_name.name = pn.pn_buf;
+			ci_name.len = strlen(pn.pn_buf);
+			new_dentry = d_add_ci(dentry, ip, &ci_name);
+		}
+		pn_free(ppn);
+		return (new_dentry);
+	} else {
+		return (d_splice_alias(ip, dentry));
+	}
+}
+
+void
+zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr)
+{
+	vap->va_mask = ATTR_MODE;
+	vap->va_mode = mode;
+	vap->va_uid = crgetuid(cr);
+
+	if (dir && dir->i_mode & S_ISGID) {
+		vap->va_gid = KGID_TO_SGID(dir->i_gid);
+		if (S_ISDIR(mode))
+			vap->va_mode |= S_ISGID;
+	} else {
+		vap->va_gid = crgetgid(cr);
+	}
+}
+
+static int
+#ifdef HAVE_IOPS_CREATE_USERNS
+zpl_create(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode, bool flag)
+#elif defined(HAVE_IOPS_CREATE_IDMAP)
+zpl_create(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode, bool flag)
+#else
+zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
+#endif
+{
+	cred_t *cr = CRED();
+	znode_t *zp;
+	vattr_t *vap;
+	int error;
+	fstrans_cookie_t cookie;
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	zpl_vap_init(vap, dir, mode, cr);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
+	    mode, &zp, cr, 0, NULL);
+	if (error == 0) {
+		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
+		if (error == 0)
+			error = zpl_init_acl(ZTOI(zp), dir);
+
+		if (error) {
+			(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
+			remove_inode_hash(ZTOI(zp));
+			iput(ZTOI(zp));
+		} else {
+			d_instantiate(dentry, ZTOI(zp));
+		}
+	}
+
+	spl_fstrans_unmark(cookie);
+	kmem_free(vap, sizeof (vattr_t));
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+#ifdef HAVE_IOPS_MKNOD_USERNS
+zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode,
+#elif defined(HAVE_IOPS_MKNOD_IDMAP)
+zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode,
+#else
+zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+#endif
+    dev_t rdev)
+{
+	cred_t *cr = CRED();
+	znode_t *zp;
+	vattr_t *vap;
+	int error;
+	fstrans_cookie_t cookie;
+
+	/*
+	 * We currently expect Linux to supply rdev=0 for all sockets
+	 * and fifos, but we want to know if this behavior ever changes.
+	 */
+	if (S_ISSOCK(mode) || S_ISFIFO(mode))
+		ASSERT(rdev == 0);
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	zpl_vap_init(vap, dir, mode, cr);
+	vap->va_rdev = rdev;
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
+	    mode, &zp, cr, 0, NULL);
+	if (error == 0) {
+		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
+		if (error == 0)
+			error = zpl_init_acl(ZTOI(zp), dir);
+
+		if (error) {
+			(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
+			remove_inode_hash(ZTOI(zp));
+			iput(ZTOI(zp));
+		} else {
+			d_instantiate(dentry, ZTOI(zp));
+		}
+	}
+
+	spl_fstrans_unmark(cookie);
+	kmem_free(vap, sizeof (vattr_t));
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#ifdef HAVE_TMPFILE
+static int
+#ifdef HAVE_TMPFILE_IDMAP
+zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir,
+    struct file *file, umode_t mode)
+#elif !defined(HAVE_TMPFILE_DENTRY)
+zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
+    struct file *file, umode_t mode)
+#else
+#ifdef HAVE_TMPFILE_USERNS
+zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
+    struct dentry *dentry, umode_t mode)
+#else
+zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+#endif
+#endif
+{
+	cred_t *cr = CRED();
+	struct inode *ip;
+	vattr_t *vap;
+	int error;
+	fstrans_cookie_t cookie;
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	/*
+	 * The VFS does not apply the umask, therefore it is applied here
+	 * when POSIX ACLs are not enabled.
+	 */
+	if (!IS_POSIXACL(dir))
+		mode &= ~current_umask();
+	zpl_vap_init(vap, dir, mode, cr);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
+	if (error == 0) {
+		/* d_tmpfile will do drop_nlink, so we should set it first */
+		set_nlink(ip, 1);
+#ifndef HAVE_TMPFILE_DENTRY
+		d_tmpfile(file, ip);
+
+		error = zpl_xattr_security_init(ip, dir,
+		    &file->f_path.dentry->d_name);
+#else
+		d_tmpfile(dentry, ip);
+
+		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
+#endif
+		if (error == 0)
+			error = zpl_init_acl(ip, dir);
+#ifndef HAVE_TMPFILE_DENTRY
+		error = finish_open_simple(file, error);
+#endif
+		/*
+		 * don't need to handle error here, file is already in
+		 * unlinked set.
+		 */
+	}
+
+	spl_fstrans_unmark(cookie);
+	kmem_free(vap, sizeof (vattr_t));
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+#endif
+
+static int
+zpl_unlink(struct inode *dir, struct dentry *dentry)
+{
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
+
+	/*
+	 * For a CI FS we must invalidate the dentry to prevent the
+	 * creation of negative entries.
+	 */
+	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
+		d_invalidate(dentry);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+#ifdef HAVE_IOPS_MKDIR_USERNS
+zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode)
+#elif defined(HAVE_IOPS_MKDIR_IDMAP)
+zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode)
+#else
+zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+#endif
+{
+	cred_t *cr = CRED();
+	vattr_t *vap;
+	znode_t *zp;
+	int error;
+	fstrans_cookie_t cookie;
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL);
+	if (error == 0) {
+		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
+		if (error == 0)
+			error = zpl_init_acl(ZTOI(zp), dir);
+
+		if (error) {
+			(void) zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0);
+			remove_inode_hash(ZTOI(zp));
+			iput(ZTOI(zp));
+		} else {
+			d_instantiate(dentry, ZTOI(zp));
+		}
+	}
+
+	spl_fstrans_unmark(cookie);
+	kmem_free(vap, sizeof (vattr_t));
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0);
+
+	/*
+	 * For a CI FS we must invalidate the dentry to prevent the
+	 * creation of negative entries.
+	 */
+	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
+		d_invalidate(dentry);
+
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+#ifdef HAVE_USERNS_IOPS_GETATTR
+zpl_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+zpl_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
+zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#endif
+{
+	int error;
+	fstrans_cookie_t cookie;
+	struct inode *ip = path->dentry->d_inode;
+	znode_t *zp __maybe_unused = ITOZ(ip);
+
+	cookie = spl_fstrans_mark();
+
+	/*
+	 * XXX query_flags currently ignored.
+	 */
+
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+	error = -zfs_getattr_fast(user_ns, ip, stat);
+#else
+	error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
+#endif
+
+#ifdef STATX_BTIME
+	if (request_mask & STATX_BTIME) {
+		stat->btime = zp->z_btime;
+		stat->result_mask |= STATX_BTIME;
+	}
+#endif
+
+#ifdef STATX_ATTR_IMMUTABLE
+	if (zp->z_pflags & ZFS_IMMUTABLE)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
+#endif
+
+#ifdef STATX_ATTR_APPEND
+	if (zp->z_pflags & ZFS_APPENDONLY)
+		stat->attributes |= STATX_ATTR_APPEND;
+	stat->attributes_mask |= STATX_ATTR_APPEND;
+#endif
+
+#ifdef STATX_ATTR_NODUMP
+	if (zp->z_pflags & ZFS_NODUMP)
+		stat->attributes |= STATX_ATTR_NODUMP;
+	stat->attributes_mask |= STATX_ATTR_NODUMP;
+#endif
+
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+ZPL_GETATTR_WRAPPER(zpl_getattr);
+
+static int
+#ifdef HAVE_USERNS_IOPS_SETATTR
+zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
+    struct iattr *ia)
+#elif defined(HAVE_IDMAP_IOPS_SETATTR)
+zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry,
+    struct iattr *ia)
+#else
+zpl_setattr(struct dentry *dentry, struct iattr *ia)
+#endif
+{
+	struct inode *ip = dentry->d_inode;
+	cred_t *cr = CRED();
+	vattr_t *vap;
+	int error;
+	fstrans_cookie_t cookie;
+
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+	error = zpl_setattr_prepare(user_ns, dentry, ia);
+#elif defined(HAVE_SETATTR_PREPARE_IDMAP)
+	error = zpl_setattr_prepare(user_ns, dentry, ia);
+#else
+	error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia);
+#endif
+	if (error)
+		return (error);
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
+	vap->va_mode = ia->ia_mode;
+	vap->va_uid = KUID_TO_SUID(ia->ia_uid);
+	vap->va_gid = KGID_TO_SGID(ia->ia_gid);
+	vap->va_size = ia->ia_size;
+	vap->va_atime = ia->ia_atime;
+	vap->va_mtime = ia->ia_mtime;
+	vap->va_ctime = ia->ia_ctime;
+
+	if (vap->va_mask & ATTR_ATIME)
+		ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_setattr(ITOZ(ip), vap, 0, cr);
+	if (!error && (ia->ia_valid & ATTR_MODE))
+		error = zpl_chmod_acl(ip);
+
+	spl_fstrans_unmark(cookie);
+	kmem_free(vap, sizeof (vattr_t));
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+#ifdef HAVE_IOPS_RENAME_USERNS
+zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int rflags)
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int rflags)
+#else
+zpl_rename2(struct inode *sdip, struct dentry *sdentry,
+    struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
+#endif
+{
+	cred_t *cr = CRED();
+	int error;
+	fstrans_cookie_t cookie;
+
+	/* We don't have renameat2(2) support */
+	if (rflags)
+		return (-EINVAL);
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip),
+	    dname(tdentry), cr, 0);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#if !defined(HAVE_IOPS_RENAME_USERNS) && \
+	!defined(HAVE_RENAME_WANTS_FLAGS) && \
+	!defined(HAVE_IOPS_RENAME_IDMAP)
+static int
+zpl_rename(struct inode *sdip, struct dentry *sdentry,
+    struct inode *tdip, struct dentry *tdentry)
+{
+	return (zpl_rename2(sdip, sdentry, tdip, tdentry, 0));
+}
+#endif
+
+static int
+#ifdef HAVE_IOPS_SYMLINK_USERNS
+zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, const char *name)
+#elif defined(HAVE_IOPS_SYMLINK_IDMAP)
+zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, const char *name)
+#else
+zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
+#endif
+{
+	cred_t *cr = CRED();
+	vattr_t *vap;
+	znode_t *zp;
+	int error;
+	fstrans_cookie_t cookie;
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_symlink(ITOZ(dir), dname(dentry), vap,
+	    (char *)name, &zp, cr, 0);
+	if (error == 0) {
+		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
+		if (error) {
+			(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
+			remove_inode_hash(ZTOI(zp));
+			iput(ZTOI(zp));
+		} else {
+			d_instantiate(dentry, ZTOI(zp));
+		}
+	}
+
+	spl_fstrans_unmark(cookie);
+	kmem_free(vap, sizeof (vattr_t));
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+#if defined(HAVE_PUT_LINK_COOKIE)
+static void
+zpl_put_link(struct inode *unused, void *cookie)
+{
+	kmem_free(cookie, MAXPATHLEN);
+}
+#elif defined(HAVE_PUT_LINK_NAMEIDATA)
+static void
+zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
+{
+	const char *link = nd_get_link(nd);
+
+	if (!IS_ERR(link))
+		kmem_free(link, MAXPATHLEN);
+}
+#elif defined(HAVE_PUT_LINK_DELAYED)
+static void
+zpl_put_link(void *ptr)
+{
+	kmem_free(ptr, MAXPATHLEN);
+}
+#endif
+
+static int
+zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
+{
+	fstrans_cookie_t cookie;
+	cred_t *cr = CRED();
+	int error;
+
+	crhold(cr);
+	*link = NULL;
+
+	struct iovec iov;
+	iov.iov_len = MAXPATHLEN;
+	iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+	zfs_uio_t uio;
+	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_readlink(ip, &uio, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	if (error)
+		kmem_free(iov.iov_base, MAXPATHLEN);
+	else
+		*link = iov.iov_base;
+
+	return (error);
+}
+
+#if defined(HAVE_GET_LINK_DELAYED)
+static const char *
+zpl_get_link(struct dentry *dentry, struct inode *inode,
+    struct delayed_call *done)
+{
+	char *link = NULL;
+	int error;
+
+	if (!dentry)
+		return (ERR_PTR(-ECHILD));
+
+	error = zpl_get_link_common(dentry, inode, &link);
+	if (error)
+		return (ERR_PTR(error));
+
+	set_delayed_call(done, zpl_put_link, link);
+
+	return (link);
+}
+#elif defined(HAVE_GET_LINK_COOKIE)
+static const char *
+zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie)
+{
+	char *link = NULL;
+	int error;
+
+	if (!dentry)
+		return (ERR_PTR(-ECHILD));
+
+	error = zpl_get_link_common(dentry, inode, &link);
+	if (error)
+		return (ERR_PTR(error));
+
+	return (*cookie = link);
+}
+#elif defined(HAVE_FOLLOW_LINK_COOKIE)
+static const char *
+zpl_follow_link(struct dentry *dentry, void **cookie)
+{
+	char *link = NULL;
+	int error;
+
+	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
+	if (error)
+		return (ERR_PTR(error));
+
+	return (*cookie = link);
+}
+#elif defined(HAVE_FOLLOW_LINK_NAMEIDATA)
+static void *
+zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *link = NULL;
+	int error;
+
+	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
+	if (error)
+		nd_set_link(nd, ERR_PTR(error));
+	else
+		nd_set_link(nd, link);
+
+	return (NULL);
+}
+#endif
+
+static int
+zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+{
+	cred_t *cr = CRED();
+	struct inode *ip = old_dentry->d_inode;
+	int error;
+	fstrans_cookie_t cookie;
+
+	if (ip->i_nlink >= ZFS_LINK_MAX)
+		return (-EMLINK);
+
+	crhold(cr);
+	ip->i_ctime = current_time(ip);
+	/* Must have an existing ref, so igrab() cannot return NULL */
+	VERIFY3P(igrab(ip), !=, NULL);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_link(ITOZ(dir), ITOZ(ip), dname(dentry), cr, 0);
+	if (error) {
+		iput(ip);
+		goto out;
+	}
+
+	d_instantiate(dentry, ip);
+out:
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+const struct inode_operations zpl_inode_operations = {
+	.setattr	= zpl_setattr,
+	.getattr	= zpl_getattr,
+#ifdef HAVE_GENERIC_SETXATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= zpl_xattr_list,
+#if defined(CONFIG_FS_POSIX_ACL)
+#if defined(HAVE_SET_ACL)
+	.set_acl	= zpl_set_acl,
+#endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_INODE_ACL)
+	.get_inode_acl	= zpl_get_acl,
+#else
+	.get_acl	= zpl_get_acl,
+#endif /* HAVE_GET_INODE_ACL */
+#endif /* CONFIG_FS_POSIX_ACL */
+};
+
+const struct inode_operations zpl_dir_inode_operations = {
+	.create		= zpl_create,
+	.lookup		= zpl_lookup,
+	.link		= zpl_link,
+	.unlink		= zpl_unlink,
+	.symlink	= zpl_symlink,
+	.mkdir		= zpl_mkdir,
+	.rmdir		= zpl_rmdir,
+	.mknod		= zpl_mknod,
+#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+	.rename		= zpl_rename2,
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+	.rename		= zpl_rename2,
+#else
+	.rename		= zpl_rename,
+#endif
+#ifdef HAVE_TMPFILE
+	.tmpfile	= zpl_tmpfile,
+#endif
+	.setattr	= zpl_setattr,
+	.getattr	= zpl_getattr,
+#ifdef HAVE_GENERIC_SETXATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= zpl_xattr_list,
+#if defined(CONFIG_FS_POSIX_ACL)
+#if defined(HAVE_SET_ACL)
+	.set_acl	= zpl_set_acl,
+#endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_INODE_ACL)
+	.get_inode_acl	= zpl_get_acl,
+#else
+	.get_acl	= zpl_get_acl,
+#endif /* HAVE_GET_INODE_ACL */
+#endif /* CONFIG_FS_POSIX_ACL */
+};
+
+const struct inode_operations zpl_symlink_inode_operations = {
+#ifdef HAVE_GENERIC_READLINK
+	.readlink	= generic_readlink,
+#endif
+#if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE)
+	.get_link	= zpl_get_link,
+#elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA)
+	.follow_link	= zpl_follow_link,
+#endif
+#if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA)
+	.put_link	= zpl_put_link,
+#endif
+	.setattr	= zpl_setattr,
+	.getattr	= zpl_getattr,
+#ifdef HAVE_GENERIC_SETXATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= zpl_xattr_list,
+};
+
+const struct inode_operations zpl_special_inode_operations = {
+	.setattr	= zpl_setattr,
+	.getattr	= zpl_getattr,
+#ifdef HAVE_GENERIC_SETXATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= zpl_xattr_list,
+#if defined(CONFIG_FS_POSIX_ACL)
+#if defined(HAVE_SET_ACL)
+	.set_acl	= zpl_set_acl,
+#endif /* HAVE_SET_ACL */
+#if defined(HAVE_GET_INODE_ACL)
+	.get_inode_acl	= zpl_get_acl,
+#else
+	.get_acl	= zpl_get_acl,
+#endif /* HAVE_GET_INODE_ACL */
+#endif /* CONFIG_FS_POSIX_ACL */
+};

diff --git a/zfs/module/os/linux/zfs/zpl_super.c b/zfs/module/os/linux/zfs/zpl_super.c
new file mode 100644
index 0000000..c2fd3fe
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zpl_super.c

@@ -0,0 +1,365 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ */
+
+
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zpl.h>
+
+
+static struct inode *
+zpl_inode_alloc(struct super_block *sb)
+{
+	struct inode *ip;
+
+	VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
+	inode_set_iversion(ip, 1);
+
+	return (ip);
+}
+
+static void
+zpl_inode_destroy(struct inode *ip)
+{
+	ASSERT(atomic_read(&ip->i_count) == 0);
+	zfs_inode_destroy(ip);
+}
+
+/*
+ * Called from __mark_inode_dirty() to reflect that something in the
+ * inode has changed.  We use it to ensure the znode system attributes
+ * are always strictly update to date with respect to the inode.
+ */
+#ifdef HAVE_DIRTY_INODE_WITH_FLAGS
+static void
+zpl_dirty_inode(struct inode *ip, int flags)
+{
+	fstrans_cookie_t cookie;
+
+	cookie = spl_fstrans_mark();
+	zfs_dirty_inode(ip, flags);
+	spl_fstrans_unmark(cookie);
+}
+#else
+static void
+zpl_dirty_inode(struct inode *ip)
+{
+	fstrans_cookie_t cookie;
+
+	cookie = spl_fstrans_mark();
+	zfs_dirty_inode(ip, 0);
+	spl_fstrans_unmark(cookie);
+}
+#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
+
+/*
+ * When ->drop_inode() is called its return value indicates if the
+ * inode should be evicted from the inode cache.  If the inode is
+ * unhashed and has no links the default policy is to evict it
+ * immediately.
+ *
+ * The ->evict_inode() callback must minimally truncate the inode pages,
+ * and call clear_inode().  For 2.6.35 and later kernels this will
+ * simply update the inode state, with the sync occurring before the
+ * truncate in evict().  For earlier kernels clear_inode() maps to
+ * end_writeback() which is responsible for completing all outstanding
+ * write back.  In either case, once this is done it is safe to cleanup
+ * any remaining inode specific data via zfs_inactive().
+ * remaining filesystem specific data.
+ */
+static void
+zpl_evict_inode(struct inode *ip)
+{
+	fstrans_cookie_t cookie;
+
+	cookie = spl_fstrans_mark();
+	truncate_setsize(ip, 0);
+	clear_inode(ip);
+	zfs_inactive(ip);
+	spl_fstrans_unmark(cookie);
+}
+
+static void
+zpl_put_super(struct super_block *sb)
+{
+	fstrans_cookie_t cookie;
+	int error;
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_umount(sb);
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
+}
+
+static int
+zpl_sync_fs(struct super_block *sb, int wait)
+{
+	fstrans_cookie_t cookie;
+	cred_t *cr = CRED();
+	int error;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	error = -zfs_sync(sb, wait, cr);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
+{
+	fstrans_cookie_t cookie;
+	int error;
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_statvfs(dentry->d_inode, statp);
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
+
+	/*
+	 * If required by a 32-bit system call, dynamically scale the
+	 * block size up to 16MiB and decrease the block counts.  This
+	 * allows for a maximum size of 64EiB to be reported.  The file
+	 * counts must be artificially capped at 2^32-1.
+	 */
+	if (unlikely(zpl_is_32bit_api())) {
+		while (statp->f_blocks > UINT32_MAX &&
+		    statp->f_bsize < SPA_MAXBLOCKSIZE) {
+			statp->f_frsize <<= 1;
+			statp->f_bsize <<= 1;
+
+			statp->f_blocks >>= 1;
+			statp->f_bfree >>= 1;
+			statp->f_bavail >>= 1;
+		}
+
+		uint64_t usedobjs = statp->f_files - statp->f_ffree;
+		statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
+		statp->f_files = statp->f_ffree + usedobjs;
+	}
+
+	return (error);
+}
+
+static int
+zpl_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
+	fstrans_cookie_t cookie;
+	int error;
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_remount(sb, flags, &zm);
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+__zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
+{
+	ZPL_ENTER(zfsvfs);
+
+	char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	dmu_objset_name(zfsvfs->z_os, fsname);
+
+	for (int i = 0; fsname[i] != 0; i++) {
+		/*
+		 * Spaces in the dataset name must be converted to their
+		 * octal escape sequence for getmntent(3) to correctly
+		 * parse then fsname portion of /proc/self/mounts.
+		 */
+		if (fsname[i] == ' ') {
+			seq_puts(seq, "\\040");
+		} else {
+			seq_putc(seq, fsname[i]);
+		}
+	}
+
+	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
+
+	ZPL_EXIT(zfsvfs);
+
+	return (0);
+}
+
+static int
+zpl_show_devname(struct seq_file *seq, struct dentry *root)
+{
+	return (__zpl_show_devname(seq, root->d_sb->s_fs_info));
+}
+
+static int
+__zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
+{
+	seq_printf(seq, ",%s",
+	    zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
+
+#ifdef CONFIG_FS_POSIX_ACL
+	switch (zfsvfs->z_acl_type) {
+	case ZFS_ACLTYPE_POSIX:
+		seq_puts(seq, ",posixacl");
+		break;
+	default:
+		seq_puts(seq, ",noacl");
+		break;
+	}
+#endif /* CONFIG_FS_POSIX_ACL */
+
+	return (0);
+}
+
+static int
+zpl_show_options(struct seq_file *seq, struct dentry *root)
+{
+	return (__zpl_show_options(seq, root->d_sb->s_fs_info));
+}
+
+static int
+zpl_fill_super(struct super_block *sb, void *data, int silent)
+{
+	zfs_mnt_t *zm = (zfs_mnt_t *)data;
+	fstrans_cookie_t cookie;
+	int error;
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_domount(sb, zm, silent);
+	spl_fstrans_unmark(cookie);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_test_super(struct super_block *s, void *data)
+{
+	zfsvfs_t *zfsvfs = s->s_fs_info;
+	objset_t *os = data;
+
+	if (zfsvfs == NULL)
+		return (0);
+
+	return (os == zfsvfs->z_os);
+}
+
+static struct super_block *
+zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
+{
+	struct super_block *s;
+	objset_t *os;
+	int err;
+
+	err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
+	if (err)
+		return (ERR_PTR(-err));
+
+	/*
+	 * The dsl pool lock must be released prior to calling sget().
+	 * It is possible sget() may block on the lock in grab_super()
+	 * while deactivate_super() holds that same lock and waits for
+	 * a txg sync.  If the dsl_pool lock is held over sget()
+	 * this can prevent the pool sync and cause a deadlock.
+	 */
+	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
+	dsl_pool_rele(dmu_objset_pool(os), FTAG);
+
+	s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
+
+	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
+	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
+
+	if (IS_ERR(s))
+		return (ERR_CAST(s));
+
+	if (s->s_root == NULL) {
+		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
+		if (err) {
+			deactivate_locked_super(s);
+			return (ERR_PTR(err));
+		}
+		s->s_flags |= SB_ACTIVE;
+	} else if ((flags ^ s->s_flags) & SB_RDONLY) {
+		deactivate_locked_super(s);
+		return (ERR_PTR(-EBUSY));
+	}
+
+	return (s);
+}
+
+static struct dentry *
+zpl_mount(struct file_system_type *fs_type, int flags,
+    const char *osname, void *data)
+{
+	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
+
+	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
+	if (IS_ERR(sb))
+		return (ERR_CAST(sb));
+
+	return (dget(sb->s_root));
+}
+
+static void
+zpl_kill_sb(struct super_block *sb)
+{
+	zfs_preumount(sb);
+	kill_anon_super(sb);
+}
+
+void
+zpl_prune_sb(int64_t nr_to_scan, void *arg)
+{
+	struct super_block *sb = (struct super_block *)arg;
+	int objects = 0;
+
+	(void) -zfs_prune(sb, nr_to_scan, &objects);
+}
+
+const struct super_operations zpl_super_operations = {
+	.alloc_inode		= zpl_inode_alloc,
+	.destroy_inode		= zpl_inode_destroy,
+	.dirty_inode		= zpl_dirty_inode,
+	.write_inode		= NULL,
+	.evict_inode		= zpl_evict_inode,
+	.put_super		= zpl_put_super,
+	.sync_fs		= zpl_sync_fs,
+	.statfs			= zpl_statfs,
+	.remount_fs		= zpl_remount_fs,
+	.show_devname		= zpl_show_devname,
+	.show_options		= zpl_show_options,
+	.show_stats		= NULL,
+};
+
+struct file_system_type zpl_fs_type = {
+	.owner			= THIS_MODULE,
+	.name			= ZFS_DRIVER,
+	.mount			= zpl_mount,
+	.kill_sb		= zpl_kill_sb,
+};

diff --git a/zfs/module/os/linux/zfs/zpl_xattr.c b/zfs/module/os/linux/zfs/zpl_xattr.c
new file mode 100644
index 0000000..0848176
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zpl_xattr.c

@@ -0,0 +1,1549 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ *
+ * Extended attributes (xattr) on Solaris are implemented as files
+ * which exist in a hidden xattr directory.  These extended attributes
+ * can be accessed using the attropen() system call which opens
+ * the extended attribute.  It can then be manipulated just like
+ * a standard file descriptor.  This has a couple advantages such
+ * as practically no size limit on the file, and the extended
+ * attributes permissions may differ from those of the parent file.
+ * This interface is really quite clever, but it's also completely
+ * different than what is supported on Linux.  It also comes with a
+ * steep performance penalty when accessing small xattrs because they
+ * are not stored with the parent file.
+ *
+ * Under Linux extended attributes are manipulated by the system
+ * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
+ * extended attributes to be name/value pairs where the name is a
+ * NULL terminated string.  The name must also include one of the
+ * following namespace prefixes:
+ *
+ *   user     - No restrictions and is available to user applications.
+ *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
+ *   system   - Used for access control lists (system.nfs4_acl, etc).
+ *   security - Used by SELinux to store a files security context.
+ *
+ * The value under Linux to limited to 65536 bytes of binary data.
+ * In practice, individual xattrs tend to be much smaller than this
+ * and are typically less than 100 bytes.  A good example of this
+ * are the security.selinux xattrs which are less than 100 bytes and
+ * exist for every file when xattr labeling is enabled.
+ *
+ * The Linux xattr implementation has been written to take advantage of
+ * this typical usage.  When the dataset property 'xattr=sa' is set,
+ * then xattrs will be preferentially stored as System Attributes (SA).
+ * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
+ * up to 64k of xattrs to be stored in the spill block.  If additional
+ * xattr space is required, which is unlikely under Linux, they will
+ * be stored using the traditional directory approach.
+ *
+ * This optimization results in roughly a 3x performance improvement
+ * when accessing xattrs because it avoids the need to perform a seek
+ * for every xattr value.  When multiple xattrs are stored per-file
+ * the performance improvements are even greater because all of the
+ * xattrs stored in the spill block will be cached.
+ *
+ * However, by default SA based xattrs are disabled in the Linux port
+ * to maximize compatibility with other implementations.  If you do
+ * enable SA based xattrs then they will not be visible on platforms
+ * which do not support this feature.
+ *
+ * NOTE: One additional consequence of the xattr directory implementation
+ * is that when an extended attribute is manipulated an inode is created.
+ * This inode will exist in the Linux inode cache but there will be no
+ * associated entry in the dentry cache which references it.  This is
+ * safe but it may result in some confusion.  Enabling SA based xattrs
+ * largely avoids the issue except in the overflow case.
+ */
+
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zap.h>
+#include <sys/vfs.h>
+#include <sys/zpl.h>
+
+typedef struct xattr_filldir {
+	size_t size;
+	size_t offset;
+	char *buf;
+	struct dentry *dentry;
+} xattr_filldir_t;
+
+static const struct xattr_handler *zpl_xattr_handler(const char *);
+
+static int
+zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
+{
+	static const struct xattr_handler *handler;
+	struct dentry *d = xf->dentry;
+
+	handler = zpl_xattr_handler(name);
+	if (!handler)
+		return (0);
+
+	if (handler->list) {
+#if defined(HAVE_XATTR_LIST_SIMPLE)
+		if (!handler->list(d))
+			return (0);
+#elif defined(HAVE_XATTR_LIST_DENTRY)
+		if (!handler->list(d, NULL, 0, name, name_len, 0))
+			return (0);
+#elif defined(HAVE_XATTR_LIST_HANDLER)
+		if (!handler->list(handler, d, NULL, 0, name, name_len))
+			return (0);
+#endif
+	}
+
+	return (1);
+}
+
+/*
+ * Determine is a given xattr name should be visible and if so copy it
+ * in to the provided buffer (xf->buf).
+ */
+static int
+zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
+{
+	/* Check permissions using the per-namespace list xattr handler. */
+	if (!zpl_xattr_permission(xf, name, name_len))
+		return (0);
+
+	/* When xf->buf is NULL only calculate the required size. */
+	if (xf->buf) {
+		if (xf->offset + name_len + 1 > xf->size)
+			return (-ERANGE);
+
+		memcpy(xf->buf + xf->offset, name, name_len);
+		xf->buf[xf->offset + name_len] = '\0';
+	}
+
+	xf->offset += (name_len + 1);
+
+	return (0);
+}
+
+/*
+ * Read as many directory entry names as will fit in to the provided buffer,
+ * or when no buffer is provided calculate the required buffer size.
+ */
+static int
+zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
+{
+	zap_cursor_t zc;
+	zap_attribute_t	zap;
+	int error;
+
+	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
+
+	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
+
+		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
+			error = -ENXIO;
+			break;
+		}
+
+		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
+		if (error)
+			break;
+
+		zap_cursor_advance(&zc);
+	}
+
+	zap_cursor_fini(&zc);
+
+	if (error == -ENOENT)
+		error = 0;
+
+	return (error);
+}
+
+static ssize_t
+zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
+{
+	struct inode *ip = xf->dentry->d_inode;
+	struct inode *dxip = NULL;
+	znode_t *dxzp;
+	int error;
+
+	/* Lookup the xattr directory */
+	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
+	    cr, NULL, NULL);
+	if (error) {
+		if (error == -ENOENT)
+			error = 0;
+
+		return (error);
+	}
+
+	dxip = ZTOI(dxzp);
+	error = zpl_xattr_readdir(dxip, xf);
+	iput(dxip);
+
+	return (error);
+}
+
+static ssize_t
+zpl_xattr_list_sa(xattr_filldir_t *xf)
+{
+	znode_t *zp = ITOZ(xf->dentry->d_inode);
+	nvpair_t *nvp = NULL;
+	int error = 0;
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_xattr_cached == NULL)
+		error = -zfs_sa_get_xattr(zp);
+	mutex_exit(&zp->z_lock);
+
+	if (error)
+		return (error);
+
+	ASSERT(zp->z_xattr_cached);
+
+	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
+		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
+
+		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
+		    strlen(nvpair_name(nvp)));
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+ssize_t
+zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+	znode_t *zp = ITOZ(dentry->d_inode);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	int error = 0;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	ZPL_ENTER(zfsvfs);
+	ZPL_VERIFY_ZP(zp);
+	rw_enter(&zp->z_xattr_lock, RW_READER);
+
+	if (zfsvfs->z_use_sa && zp->z_is_sa) {
+		error = zpl_xattr_list_sa(&xf);
+		if (error)
+			goto out;
+	}
+
+	error = zpl_xattr_list_dir(&xf, cr);
+	if (error)
+		goto out;
+
+	error = xf.offset;
+out:
+
+	rw_exit(&zp->z_xattr_lock);
+	ZPL_EXIT(zfsvfs);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	return (error);
+}
+
+static int
+zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
+    size_t size, cred_t *cr)
+{
+	fstrans_cookie_t cookie;
+	struct inode *xip = NULL;
+	znode_t *dxzp = NULL;
+	znode_t *xzp = NULL;
+	int error;
+
+	/* Lookup the xattr directory */
+	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
+	    cr, NULL, NULL);
+	if (error)
+		goto out;
+
+	/* Lookup a specific xattr name in the directory */
+	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
+	if (error)
+		goto out;
+
+	xip = ZTOI(xzp);
+	if (!size) {
+		error = i_size_read(xip);
+		goto out;
+	}
+
+	if (size < i_size_read(xip)) {
+		error = -ERANGE;
+		goto out;
+	}
+
+	struct iovec iov;
+	iov.iov_base = (void *)value;
+	iov.iov_len = size;
+
+	zfs_uio_t uio;
+	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
+
+	cookie = spl_fstrans_mark();
+	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
+	spl_fstrans_unmark(cookie);
+
+	if (error == 0)
+		error = size - zfs_uio_resid(&uio);
+out:
+	if (xzp)
+		zrele(xzp);
+
+	if (dxzp)
+		zrele(dxzp);
+
+	return (error);
+}
+
+static int
+zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
+{
+	znode_t *zp = ITOZ(ip);
+	uchar_t *nv_value;
+	uint_t nv_size;
+	int error = 0;
+
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_xattr_cached == NULL)
+		error = -zfs_sa_get_xattr(zp);
+	mutex_exit(&zp->z_lock);
+
+	if (error)
+		return (error);
+
+	ASSERT(zp->z_xattr_cached);
+	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
+	    &nv_value, &nv_size);
+	if (error)
+		return (error);
+
+	if (size == 0 || value == NULL)
+		return (nv_size);
+
+	if (size < nv_size)
+		return (-ERANGE);
+
+	memcpy(value, nv_value, nv_size);
+
+	return (nv_size);
+}
+
+static int
+__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
+    cred_t *cr)
+{
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int error;
+
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+
+	if (zfsvfs->z_use_sa && zp->z_is_sa) {
+		error = zpl_xattr_get_sa(ip, name, value, size);
+		if (error != -ENOENT)
+			goto out;
+	}
+
+	error = zpl_xattr_get_dir(ip, name, value, size, cr);
+out:
+	if (error == -ENOENT)
+		error = -ENODATA;
+
+	return (error);
+}
+
+#define	XATTR_NOENT	0x0
+#define	XATTR_IN_SA	0x1
+#define	XATTR_IN_DIR	0x2
+/* check where the xattr resides */
+static int
+__zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
+{
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int error;
+
+	ASSERT(where);
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+
+	*where = XATTR_NOENT;
+	if (zfsvfs->z_use_sa && zp->z_is_sa) {
+		error = zpl_xattr_get_sa(ip, name, NULL, 0);
+		if (error >= 0)
+			*where |= XATTR_IN_SA;
+		else if (error != -ENOENT)
+			return (error);
+	}
+
+	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
+	if (error >= 0)
+		*where |= XATTR_IN_DIR;
+	else if (error != -ENOENT)
+		return (error);
+
+	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
+		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
+		    " in both SA and dir", ip, name);
+	if (*where == XATTR_NOENT)
+		error = -ENODATA;
+	else
+		error = 0;
+	return (error);
+}
+
+static int
+zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
+{
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	int error;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	ZPL_ENTER(zfsvfs);
+	ZPL_VERIFY_ZP(zp);
+	rw_enter(&zp->z_xattr_lock, RW_READER);
+	error = __zpl_xattr_get(ip, name, value, size, cr);
+	rw_exit(&zp->z_xattr_lock);
+	ZPL_EXIT(zfsvfs);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+
+	return (error);
+}
+
+static int
+zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
+    size_t size, int flags, cred_t *cr)
+{
+	znode_t *dxzp = NULL;
+	znode_t *xzp = NULL;
+	vattr_t *vap = NULL;
+	int lookup_flags, error;
+	const int xattr_mode = S_IFREG | 0644;
+	loff_t pos = 0;
+
+	/*
+	 * Lookup the xattr directory.  When we're adding an entry pass
+	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
+	 * When removing an entry this flag is not passed to avoid
+	 * unnecessarily creating a new xattr directory.
+	 */
+	lookup_flags = LOOKUP_XATTR;
+	if (value != NULL)
+		lookup_flags |= CREATE_XATTR_DIR;
+
+	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
+	    cr, NULL, NULL);
+	if (error)
+		goto out;
+
+	/* Lookup a specific xattr name in the directory */
+	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
+	if (error && (error != -ENOENT))
+		goto out;
+
+	error = 0;
+
+	/* Remove a specific name xattr when value is set to NULL. */
+	if (value == NULL) {
+		if (xzp)
+			error = -zfs_remove(dxzp, (char *)name, cr, 0);
+
+		goto out;
+	}
+
+	/* Lookup failed create a new xattr. */
+	if (xzp == NULL) {
+		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+		vap->va_mode = xattr_mode;
+		vap->va_mask = ATTR_MODE;
+		vap->va_uid = crgetuid(cr);
+		vap->va_gid = crgetgid(cr);
+
+		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
+		    cr, ATTR_NOACLCHECK, NULL);
+		if (error)
+			goto out;
+	}
+
+	ASSERT(xzp != NULL);
+
+	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
+	if (error)
+		goto out;
+
+	error = -zfs_write_simple(xzp, value, size, pos, NULL);
+out:
+	if (error == 0) {
+		ip->i_ctime = current_time(ip);
+		zfs_mark_inode_dirty(ip);
+	}
+
+	if (vap)
+		kmem_free(vap, sizeof (vattr_t));
+
+	if (xzp)
+		zrele(xzp);
+
+	if (dxzp)
+		zrele(dxzp);
+
+	if (error == -ENOENT)
+		error = -ENODATA;
+
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
+    size_t size, int flags, cred_t *cr)
+{
+	znode_t *zp = ITOZ(ip);
+	nvlist_t *nvl;
+	size_t sa_size;
+	int error = 0;
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_xattr_cached == NULL)
+		error = -zfs_sa_get_xattr(zp);
+	mutex_exit(&zp->z_lock);
+
+	if (error)
+		return (error);
+
+	ASSERT(zp->z_xattr_cached);
+	nvl = zp->z_xattr_cached;
+
+	if (value == NULL) {
+		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
+		if (error == -ENOENT)
+			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
+	} else {
+		/* Limited to 32k to keep nvpair memory allocations small */
+		if (size > DXATTR_MAX_ENTRY_SIZE)
+			return (-EFBIG);
+
+		/* Prevent the DXATTR SA from consuming the entire SA region */
+		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
+		if (error)
+			return (error);
+
+		if (sa_size > DXATTR_MAX_SA_SIZE)
+			return (-EFBIG);
+
+		error = -nvlist_add_byte_array(nvl, name,
+		    (uchar_t *)value, size);
+	}
+
+	/*
+	 * Update the SA for additions, modifications, and removals. On
+	 * error drop the inconsistent cached version of the nvlist, it
+	 * will be reconstructed from the ARC when next accessed.
+	 */
+	if (error == 0)
+		error = -zfs_sa_set_xattr(zp);
+
+	if (error) {
+		nvlist_free(nvl);
+		zp->z_xattr_cached = NULL;
+	}
+
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+static int
+zpl_xattr_set(struct inode *ip, const char *name, const void *value,
+    size_t size, int flags)
+{
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	cred_t *cr = CRED();
+	fstrans_cookie_t cookie;
+	int where;
+	int error;
+
+	crhold(cr);
+	cookie = spl_fstrans_mark();
+	ZPL_ENTER(zfsvfs);
+	ZPL_VERIFY_ZP(zp);
+	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
+
+	/*
+	 * Before setting the xattr check to see if it already exists.
+	 * This is done to ensure the following optional flags are honored.
+	 *
+	 *   XATTR_CREATE: fail if xattr already exists
+	 *   XATTR_REPLACE: fail if xattr does not exist
+	 *
+	 * We also want to know if it resides in sa or dir, so we can make
+	 * sure we don't end up with duplicate in both places.
+	 */
+	error = __zpl_xattr_where(ip, name, &where, cr);
+	if (error < 0) {
+		if (error != -ENODATA)
+			goto out;
+		if (flags & XATTR_REPLACE)
+			goto out;
+
+		/* The xattr to be removed already doesn't exist */
+		error = 0;
+		if (value == NULL)
+			goto out;
+	} else {
+		error = -EEXIST;
+		if (flags & XATTR_CREATE)
+			goto out;
+	}
+
+	/* Preferentially store the xattr as a SA for better performance */
+	if (zfsvfs->z_use_sa && zp->z_is_sa &&
+	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
+		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
+		if (error == 0) {
+			/*
+			 * Successfully put into SA, we need to clear the one
+			 * in dir.
+			 */
+			if (where & XATTR_IN_DIR)
+				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
+			goto out;
+		}
+	}
+
+	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
+	/*
+	 * Successfully put into dir, we need to clear the one in SA.
+	 */
+	if (error == 0 && (where & XATTR_IN_SA))
+		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
+out:
+	rw_exit(&ITOZ(ip)->z_xattr_lock);
+	ZPL_EXIT(zfsvfs);
+	spl_fstrans_unmark(cookie);
+	crfree(cr);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+/*
+ * Extended user attributes
+ *
+ * "Extended user attributes may be assigned to files and directories for
+ * storing arbitrary additional information such as the mime type,
+ * character set or encoding of a file.  The access permissions for user
+ * attributes are defined by the file permission bits: read permission
+ * is required to retrieve the attribute value, and writer permission is
+ * required to change it.
+ *
+ * The file permission bits of regular files and directories are
+ * interpreted differently from the file permission bits of special
+ * files and symbolic links.  For regular files and directories the file
+ * permission bits define access to the file's contents, while for
+ * device special files they define access to the device described by
+ * the special file.  The file permissions of symbolic links are not
+ * used in access checks.  These differences would allow users to
+ * consume filesystem resources in a way not controllable by disk quotas
+ * for group or world writable special files and directories.
+ *
+ * For this reason, extended user attributes are allowed only for
+ * regular files and directories, and access to extended user attributes
+ * is restricted to the owner and to users with appropriate capabilities
+ * for directories with the sticky bit set (see the chmod(1) manual page
+ * for an explanation of the sticky bit)." - xattr(7)
+ *
+ * ZFS allows extended user attributes to be disabled administratively
+ * by setting the 'xattr=off' property on the dataset.
+ */
+static int
+__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
+    const char *name, size_t name_len)
+{
+	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
+}
+ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
+
+static int
+__zpl_xattr_user_get(struct inode *ip, const char *name,
+    void *value, size_t size)
+{
+	char *xattr_name;
+	int error;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") == 0)
+		return (-EINVAL);
+#endif
+	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
+		return (-EOPNOTSUPP);
+
+	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
+	error = zpl_xattr_get(ip, xattr_name, value, size);
+	kmem_strfree(xattr_name);
+
+	return (error);
+}
+ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
+
+static int
+__zpl_xattr_user_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
+    const void *value, size_t size, int flags)
+{
+	(void) user_ns;
+	char *xattr_name;
+	int error;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") == 0)
+		return (-EINVAL);
+#endif
+	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
+		return (-EOPNOTSUPP);
+
+	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
+	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
+	kmem_strfree(xattr_name);
+
+	return (error);
+}
+ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
+
+xattr_handler_t zpl_xattr_user_handler =
+{
+	.prefix	= XATTR_USER_PREFIX,
+	.list	= zpl_xattr_user_list,
+	.get	= zpl_xattr_user_get,
+	.set	= zpl_xattr_user_set,
+};
+
+/*
+ * Trusted extended attributes
+ *
+ * "Trusted extended attributes are visible and accessible only to
+ * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
+ * class are used to implement mechanisms in user space (i.e., outside
+ * the kernel) which keep information in extended attributes to which
+ * ordinary processes should not have access." - xattr(7)
+ */
+static int
+__zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
+    const char *name, size_t name_len)
+{
+	return (capable(CAP_SYS_ADMIN));
+}
+ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
+
+static int
+__zpl_xattr_trusted_get(struct inode *ip, const char *name,
+    void *value, size_t size)
+{
+	char *xattr_name;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return (-EACCES);
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") == 0)
+		return (-EINVAL);
+#endif
+	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
+	error = zpl_xattr_get(ip, xattr_name, value, size);
+	kmem_strfree(xattr_name);
+
+	return (error);
+}
+ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
+
+static int
+__zpl_xattr_trusted_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
+    const void *value, size_t size, int flags)
+{
+	(void) user_ns;
+	char *xattr_name;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return (-EACCES);
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") == 0)
+		return (-EINVAL);
+#endif
+	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
+	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
+	kmem_strfree(xattr_name);
+
+	return (error);
+}
+ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
+
+xattr_handler_t zpl_xattr_trusted_handler =
+{
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.list	= zpl_xattr_trusted_list,
+	.get	= zpl_xattr_trusted_get,
+	.set	= zpl_xattr_trusted_set,
+};
+
+/*
+ * Extended security attributes
+ *
+ * "The security attribute namespace is used by kernel security modules,
+ * such as Security Enhanced Linux, and also to implement file
+ * capabilities (see capabilities(7)).  Read and write access
+ * permissions to security attributes depend on the policy implemented
+ * for each security attribute by the security module.  When no security
+ * module is loaded, all processes have read access to extended security
+ * attributes, and write access is limited to processes that have the
+ * CAP_SYS_ADMIN capability." - xattr(7)
+ */
+static int
+__zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
+    const char *name, size_t name_len)
+{
+	return (1);
+}
+ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
+
+static int
+__zpl_xattr_security_get(struct inode *ip, const char *name,
+    void *value, size_t size)
+{
+	char *xattr_name;
+	int error;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") == 0)
+		return (-EINVAL);
+#endif
+	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
+	error = zpl_xattr_get(ip, xattr_name, value, size);
+	kmem_strfree(xattr_name);
+
+	return (error);
+}
+ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
+
+static int
+__zpl_xattr_security_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
+    const void *value, size_t size, int flags)
+{
+	(void) user_ns;
+	char *xattr_name;
+	int error;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") == 0)
+		return (-EINVAL);
+#endif
+	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
+	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
+	kmem_strfree(xattr_name);
+
+	return (error);
+}
+ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
+
+static int
+zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
+    void *fs_info)
+{
+	const struct xattr *xattr;
+	int error = 0;
+
+	for (xattr = xattrs; xattr->name != NULL; xattr++) {
+		error = __zpl_xattr_security_set(NULL, ip,
+		    xattr->name, xattr->value, xattr->value_len, 0);
+
+		if (error < 0)
+			break;
+	}
+
+	return (error);
+}
+
+int
+zpl_xattr_security_init(struct inode *ip, struct inode *dip,
+    const struct qstr *qstr)
+{
+	return security_inode_init_security(ip, dip, qstr,
+	    &zpl_xattr_security_init_impl, NULL);
+}
+
+/*
+ * Security xattr namespace handlers.
+ */
+xattr_handler_t zpl_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.list	= zpl_xattr_security_list,
+	.get	= zpl_xattr_security_get,
+	.set	= zpl_xattr_security_set,
+};
+
+/*
+ * Extended system attributes
+ *
+ * "Extended system attributes are used by the kernel to store system
+ * objects such as Access Control Lists.  Read and write access permissions
+ * to system attributes depend on the policy implemented for each system
+ * attribute implemented by filesystems in the kernel." - xattr(7)
+ */
+#ifdef CONFIG_FS_POSIX_ACL
+static int
+zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
+{
+	char *name, *value = NULL;
+	int error = 0;
+	size_t size = 0;
+
+	if (S_ISLNK(ip->i_mode))
+		return (-EOPNOTSUPP);
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
+		if (acl) {
+			umode_t mode = ip->i_mode;
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0) {
+				return (error);
+			} else {
+				/*
+				 * The mode bits will have been set by
+				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
+				 * using the ZFS ACL conversion.  If they
+				 * differ from the Posix ACL conversion dirty
+				 * the inode to write the Posix mode bits.
+				 */
+				if (ip->i_mode != mode) {
+					ip->i_mode = mode;
+					ip->i_ctime = current_time(ip);
+					zfs_mark_inode_dirty(ip);
+				}
+
+				if (error == 0)
+					acl = NULL;
+			}
+		}
+		break;
+
+	case ACL_TYPE_DEFAULT:
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
+		if (!S_ISDIR(ip->i_mode))
+			return (acl ? -EACCES : 0);
+		break;
+
+	default:
+		return (-EINVAL);
+	}
+
+	if (acl) {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmem_alloc(size, KM_SLEEP);
+
+		error = zpl_acl_to_xattr(acl, value, size);
+		if (error < 0) {
+			kmem_free(value, size);
+			return (error);
+		}
+	}
+
+	error = zpl_xattr_set(ip, name, value, size, 0);
+	if (value)
+		kmem_free(value, size);
+
+	if (!error) {
+		if (acl)
+			zpl_set_cached_acl(ip, type, acl);
+		else
+			zpl_forget_cached_acl(ip, type);
+	}
+
+	return (error);
+}
+
+#ifdef HAVE_SET_ACL
+int
+#ifdef HAVE_SET_ACL_USERNS
+zpl_set_acl(struct user_namespace *userns, struct inode *ip,
+    struct posix_acl *acl, int type)
+#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
+zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
+    struct posix_acl *acl, int type)
+#elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
+zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
+    struct posix_acl *acl, int type)
+#else
+zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
+#endif /* HAVE_SET_ACL_USERNS */
+{
+#ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
+	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
+#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
+	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
+#else
+	return (zpl_set_acl_impl(ip, acl, type));
+#endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
+}
+#endif /* HAVE_SET_ACL */
+
+static struct posix_acl *
+zpl_get_acl_impl(struct inode *ip, int type)
+{
+	struct posix_acl *acl;
+	void *value = NULL;
+	char *name;
+
+	/*
+	 * As of Linux 3.14, the kernel get_acl will check this for us.
+	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
+	 * as the kernel get_acl will set it to temporary sentinel value.
+	 */
+#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
+	acl = get_cached_acl(ip, type);
+	if (acl != ACL_NOT_CACHED)
+		return (acl);
+#endif
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		return (ERR_PTR(-EINVAL));
+	}
+
+	int size = zpl_xattr_get(ip, name, NULL, 0);
+	if (size > 0) {
+		value = kmem_alloc(size, KM_SLEEP);
+		size = zpl_xattr_get(ip, name, value, size);
+	}
+
+	if (size > 0) {
+		acl = zpl_acl_from_xattr(value, size);
+	} else if (size == -ENODATA || size == -ENOSYS) {
+		acl = NULL;
+	} else {
+		acl = ERR_PTR(-EIO);
+	}
+
+	if (size > 0)
+		kmem_free(value, size);
+
+	/* As of Linux 4.7, the kernel get_acl will set this for us */
+#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
+	if (!IS_ERR(acl))
+		zpl_set_cached_acl(ip, type, acl);
+#endif
+
+	return (acl);
+}
+
+#if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
+struct posix_acl *
+zpl_get_acl(struct inode *ip, int type, bool rcu)
+{
+	if (rcu)
+		return (ERR_PTR(-ECHILD));
+
+	return (zpl_get_acl_impl(ip, type));
+}
+#elif defined(HAVE_GET_ACL)
+struct posix_acl *
+zpl_get_acl(struct inode *ip, int type)
+{
+	return (zpl_get_acl_impl(ip, type));
+}
+#else
+#error "Unsupported iops->get_acl() implementation"
+#endif /* HAVE_GET_ACL_RCU */
+
+int
+zpl_init_acl(struct inode *ip, struct inode *dir)
+{
+	struct posix_acl *acl = NULL;
+	int error = 0;
+
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (0);
+
+	if (!S_ISLNK(ip->i_mode)) {
+		acl = zpl_get_acl_impl(dir, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl))
+			return (PTR_ERR(acl));
+		if (!acl) {
+			ip->i_mode &= ~current_umask();
+			ip->i_ctime = current_time(ip);
+			zfs_mark_inode_dirty(ip);
+			return (0);
+		}
+	}
+
+	if (acl) {
+		umode_t mode;
+
+		if (S_ISDIR(ip->i_mode)) {
+			error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);
+			if (error)
+				goto out;
+		}
+
+		mode = ip->i_mode;
+		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
+		if (error >= 0) {
+			ip->i_mode = mode;
+			zfs_mark_inode_dirty(ip);
+			if (error > 0) {
+				error = zpl_set_acl_impl(ip, acl,
+				    ACL_TYPE_ACCESS);
+			}
+		}
+	}
+out:
+	zpl_posix_acl_release(acl);
+
+	return (error);
+}
+
+int
+zpl_chmod_acl(struct inode *ip)
+{
+	struct posix_acl *acl;
+	int error;
+
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (0);
+
+	if (S_ISLNK(ip->i_mode))
+		return (-EOPNOTSUPP);
+
+	acl = zpl_get_acl_impl(ip, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return (PTR_ERR(acl));
+
+	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
+	if (!error)
+		error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);
+
+	zpl_posix_acl_release(acl);
+
+	return (error);
+}
+
+static int
+__zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
+    const char *name, size_t name_len)
+{
+	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
+	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
+
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (0);
+
+	if (list && xattr_size <= list_size)
+		memcpy(list, xattr_name, xattr_size);
+
+	return (xattr_size);
+}
+ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
+
+static int
+__zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
+    const char *name, size_t name_len)
+{
+	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
+	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
+
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (0);
+
+	if (list && xattr_size <= list_size)
+		memcpy(list, xattr_name, xattr_size);
+
+	return (xattr_size);
+}
+ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
+
+static int
+__zpl_xattr_acl_get_access(struct inode *ip, const char *name,
+    void *buffer, size_t size)
+{
+	struct posix_acl *acl;
+	int type = ACL_TYPE_ACCESS;
+	int error;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") != 0)
+		return (-EINVAL);
+#endif
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (-EOPNOTSUPP);
+
+	acl = zpl_get_acl_impl(ip, type);
+	if (IS_ERR(acl))
+		return (PTR_ERR(acl));
+	if (acl == NULL)
+		return (-ENODATA);
+
+	error = zpl_acl_to_xattr(acl, buffer, size);
+	zpl_posix_acl_release(acl);
+
+	return (error);
+}
+ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
+
+static int
+__zpl_xattr_acl_get_default(struct inode *ip, const char *name,
+    void *buffer, size_t size)
+{
+	struct posix_acl *acl;
+	int type = ACL_TYPE_DEFAULT;
+	int error;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") != 0)
+		return (-EINVAL);
+#endif
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (-EOPNOTSUPP);
+
+	acl = zpl_get_acl_impl(ip, type);
+	if (IS_ERR(acl))
+		return (PTR_ERR(acl));
+	if (acl == NULL)
+		return (-ENODATA);
+
+	error = zpl_acl_to_xattr(acl, buffer, size);
+	zpl_posix_acl_release(acl);
+
+	return (error);
+}
+ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
+
+static int
+__zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
+    struct inode *ip, const char *name,
+    const void *value, size_t size, int flags)
+{
+	struct posix_acl *acl;
+	int type = ACL_TYPE_ACCESS;
+	int error = 0;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") != 0)
+		return (-EINVAL);
+#endif
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (-EOPNOTSUPP);
+
+#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
+	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
+		return (-EPERM);
+#else
+	(void) mnt_ns;
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EPERM);
+#endif
+
+	if (value) {
+		acl = zpl_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return (PTR_ERR(acl));
+		else if (acl) {
+			error = zpl_posix_acl_valid(ip, acl);
+			if (error) {
+				zpl_posix_acl_release(acl);
+				return (error);
+			}
+		}
+	} else {
+		acl = NULL;
+	}
+	error = zpl_set_acl_impl(ip, acl, type);
+	zpl_posix_acl_release(acl);
+
+	return (error);
+}
+ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
+
+static int
+__zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
+    struct inode *ip, const char *name,
+    const void *value, size_t size, int flags)
+{
+	struct posix_acl *acl;
+	int type = ACL_TYPE_DEFAULT;
+	int error = 0;
+	/* xattr_resolve_name will do this for us if this is defined */
+#ifndef HAVE_XATTR_HANDLER_NAME
+	if (strcmp(name, "") != 0)
+		return (-EINVAL);
+#endif
+	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+		return (-EOPNOTSUPP);
+
+#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
+	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
+		return (-EPERM);
+#else
+	(void) mnt_ns;
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EPERM);
+#endif
+
+	if (value) {
+		acl = zpl_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return (PTR_ERR(acl));
+		else if (acl) {
+			error = zpl_posix_acl_valid(ip, acl);
+			if (error) {
+				zpl_posix_acl_release(acl);
+				return (error);
+			}
+		}
+	} else {
+		acl = NULL;
+	}
+
+	error = zpl_set_acl_impl(ip, acl, type);
+	zpl_posix_acl_release(acl);
+
+	return (error);
+}
+ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
+
+/*
+ * ACL access xattr namespace handlers.
+ *
+ * Use .name instead of .prefix when available. xattr_resolve_name will match
+ * whole name and reject anything that has .name only as prefix.
+ */
+xattr_handler_t zpl_xattr_acl_access_handler =
+{
+#ifdef HAVE_XATTR_HANDLER_NAME
+	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
+#else
+	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
+#endif
+	.list	= zpl_xattr_acl_list_access,
+	.get	= zpl_xattr_acl_get_access,
+	.set	= zpl_xattr_acl_set_access,
+#if defined(HAVE_XATTR_LIST_SIMPLE) || \
+    defined(HAVE_XATTR_LIST_DENTRY) || \
+    defined(HAVE_XATTR_LIST_HANDLER)
+	.flags	= ACL_TYPE_ACCESS,
+#endif
+};
+
+/*
+ * ACL default xattr namespace handlers.
+ *
+ * Use .name instead of .prefix when available. xattr_resolve_name will match
+ * whole name and reject anything that has .name only as prefix.
+ */
+xattr_handler_t zpl_xattr_acl_default_handler =
+{
+#ifdef HAVE_XATTR_HANDLER_NAME
+	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
+#else
+	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
+#endif
+	.list	= zpl_xattr_acl_list_default,
+	.get	= zpl_xattr_acl_get_default,
+	.set	= zpl_xattr_acl_set_default,
+#if defined(HAVE_XATTR_LIST_SIMPLE) || \
+    defined(HAVE_XATTR_LIST_DENTRY) || \
+    defined(HAVE_XATTR_LIST_HANDLER)
+	.flags	= ACL_TYPE_DEFAULT,
+#endif
+};
+
+#endif /* CONFIG_FS_POSIX_ACL */
+
+xattr_handler_t *zpl_xattr_handlers[] = {
+	&zpl_xattr_security_handler,
+	&zpl_xattr_trusted_handler,
+	&zpl_xattr_user_handler,
+#ifdef CONFIG_FS_POSIX_ACL
+	&zpl_xattr_acl_access_handler,
+	&zpl_xattr_acl_default_handler,
+#endif /* CONFIG_FS_POSIX_ACL */
+	NULL
+};
+
+static const struct xattr_handler *
+zpl_xattr_handler(const char *name)
+{
+	if (strncmp(name, XATTR_USER_PREFIX,
+	    XATTR_USER_PREFIX_LEN) == 0)
+		return (&zpl_xattr_user_handler);
+
+	if (strncmp(name, XATTR_TRUSTED_PREFIX,
+	    XATTR_TRUSTED_PREFIX_LEN) == 0)
+		return (&zpl_xattr_trusted_handler);
+
+	if (strncmp(name, XATTR_SECURITY_PREFIX,
+	    XATTR_SECURITY_PREFIX_LEN) == 0)
+		return (&zpl_xattr_security_handler);
+
+#ifdef CONFIG_FS_POSIX_ACL
+	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
+		return (&zpl_xattr_acl_access_handler);
+
+	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
+		return (&zpl_xattr_acl_default_handler);
+#endif /* CONFIG_FS_POSIX_ACL */
+
+	return (NULL);
+}
+
+#if defined(CONFIG_FS_POSIX_ACL) && \
+	(!defined(HAVE_POSIX_ACL_RELEASE) || \
+		defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY))
+struct acl_rel_struct {
+	struct acl_rel_struct *next;
+	struct posix_acl *acl;
+	clock_t time;
+};
+
+#define	ACL_REL_GRACE	(60*HZ)
+#define	ACL_REL_WINDOW	(1*HZ)
+#define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
+
+/*
+ * Lockless multi-producer single-consumer fifo list.
+ * Nodes are added to tail and removed from head. Tail pointer is our
+ * synchronization point. It always points to the next pointer of the last
+ * node, or head if list is empty.
+ */
+static struct acl_rel_struct *acl_rel_head = NULL;
+static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
+
+static void
+zpl_posix_acl_free(void *arg)
+{
+	struct acl_rel_struct *freelist = NULL;
+	struct acl_rel_struct *a;
+	clock_t new_time;
+	boolean_t refire = B_FALSE;
+
+	ASSERT3P(acl_rel_head, !=, NULL);
+	while (acl_rel_head) {
+		a = acl_rel_head;
+		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
+			/*
+			 * If a is the last node we need to reset tail, but we
+			 * need to use cmpxchg to make sure it is still the
+			 * last node.
+			 */
+			if (acl_rel_tail == &a->next) {
+				acl_rel_head = NULL;
+				if (cmpxchg(&acl_rel_tail, &a->next,
+				    &acl_rel_head) == &a->next) {
+					ASSERT3P(a->next, ==, NULL);
+					a->next = freelist;
+					freelist = a;
+					break;
+				}
+			}
+			/*
+			 * a is not last node, make sure next pointer is set
+			 * by the adder and advance the head.
+			 */
+			while (READ_ONCE(a->next) == NULL)
+				cpu_relax();
+			acl_rel_head = a->next;
+			a->next = freelist;
+			freelist = a;
+		} else {
+			/*
+			 * a is still in grace period. We are responsible to
+			 * reschedule the free task, since adder will only do
+			 * so if list is empty.
+			 */
+			new_time = a->time + ACL_REL_SCHED;
+			refire = B_TRUE;
+			break;
+		}
+	}
+
+	if (refire)
+		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
+		    NULL, TQ_SLEEP, new_time);
+
+	while (freelist) {
+		a = freelist;
+		freelist = a->next;
+		kfree(a->acl);
+		kmem_free(a, sizeof (struct acl_rel_struct));
+	}
+}
+
+void
+zpl_posix_acl_release_impl(struct posix_acl *acl)
+{
+	struct acl_rel_struct *a, **prev;
+
+	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
+	a->next = NULL;
+	a->acl = acl;
+	a->time = ddi_get_lbolt();
+	/* atomically points tail to us and get the previous tail */
+	prev = xchg(&acl_rel_tail, &a->next);
+	ASSERT3P(*prev, ==, NULL);
+	*prev = a;
+	/* if it was empty before, schedule the free task */
+	if (prev == &acl_rel_head)
+		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
+		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
+}
+#endif

diff --git a/zfs/module/os/linux/zfs/zvol_os.c b/zfs/module/os/linux/zfs/zvol_os.c
new file mode 100644
index 0000000..36f40ba
--- /dev/null
+++ b/zfs/module/os/linux/zfs/zvol_os.c

@@ -0,0 +1,1251 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ */
+
+#include <sys/dataset_kstats.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dir.h>
+#include <sys/zap.h>
+#include <sys/zfeature.h>
+#include <sys/zil_impl.h>
+#include <sys/dmu_tx.h>
+#include <sys/zio.h>
+#include <sys/zfs_rlock.h>
+#include <sys/spa_impl.h>
+#include <sys/zvol.h>
+#include <sys/zvol_impl.h>
+
+#include <linux/blkdev_compat.h>
+#include <linux/task_io_accounting_ops.h>
+
+unsigned int zvol_major = ZVOL_MAJOR;
+unsigned int zvol_request_sync = 0;
+unsigned int zvol_prefetch_bytes = (128 * 1024);
+unsigned long zvol_max_discard_blocks = 16384;
+unsigned int zvol_threads = 32;
+unsigned int zvol_open_timeout_ms = 1000;
+
+struct zvol_state_os {
+	struct gendisk		*zvo_disk;	/* generic disk */
+	struct request_queue	*zvo_queue;	/* request queue */
+	dev_t			zvo_dev;	/* device id */
+};
+
+taskq_t *zvol_taskq;
+static struct ida zvol_ida;
+
+typedef struct zv_request_stack {
+	zvol_state_t	*zv;
+	struct bio	*bio;
+} zv_request_t;
+
+typedef struct zv_request_task {
+	zv_request_t zvr;
+	taskq_ent_t	ent;
+} zv_request_task_t;
+
+static zv_request_task_t *
+zv_request_task_create(zv_request_t zvr)
+{
+	zv_request_task_t *task;
+	task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
+	taskq_init_ent(&task->ent);
+	task->zvr = zvr;
+	return (task);
+}
+
+static void
+zv_request_task_free(zv_request_task_t *task)
+{
+	kmem_free(task, sizeof (*task));
+}
+
+/*
+ * Given a path, return TRUE if path is a ZVOL.
+ */
+static boolean_t
+zvol_is_zvol_impl(const char *path)
+{
+	dev_t dev = 0;
+
+	if (vdev_lookup_bdev(path, &dev) != 0)
+		return (B_FALSE);
+
+	if (MAJOR(dev) == zvol_major)
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+static void
+zvol_write(zv_request_t *zvr)
+{
+	struct bio *bio = zvr->bio;
+	int error = 0;
+	zfs_uio_t uio;
+
+	zfs_uio_bvec_init(&uio, bio);
+
+	zvol_state_t *zv = zvr->zv;
+	ASSERT3P(zv, !=, NULL);
+	ASSERT3U(zv->zv_open_count, >, 0);
+	ASSERT3P(zv->zv_zilog, !=, NULL);
+
+	/* bio marked as FLUSH need to flush before write */
+	if (bio_is_flush(bio))
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+	/* Some requests are just for flush and nothing else. */
+	if (uio.uio_resid == 0) {
+		rw_exit(&zv->zv_suspend_lock);
+		BIO_END_IO(bio, 0);
+		return;
+	}
+
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	ssize_t start_resid = uio.uio_resid;
+	unsigned long start_time;
+
+	boolean_t acct = blk_queue_io_stat(q);
+	if (acct)
+		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+
+	boolean_t sync =
+	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+
+	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
+	    uio.uio_loffset, uio.uio_resid, RL_WRITER);
+
+	uint64_t volsize = zv->zv_volsize;
+	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
+		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
+		uint64_t off = uio.uio_loffset;
+		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
+
+		if (bytes > volsize - off)	/* don't write past the end */
+			bytes = volsize - off;
+
+		dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes);
+
+		/* This will only fail for ENOSPC */
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			break;
+		}
+		error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
+		if (error == 0) {
+			zvol_log_write(zv, tx, off, bytes, sync);
+		}
+		dmu_tx_commit(tx);
+
+		if (error)
+			break;
+	}
+	zfs_rangelock_exit(lr);
+
+	int64_t nwritten = start_resid - uio.uio_resid;
+	dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
+	task_io_account_write(nwritten);
+
+	if (sync)
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+	rw_exit(&zv->zv_suspend_lock);
+
+	if (acct)
+		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+
+	BIO_END_IO(bio, -error);
+}
+
+static void
+zvol_write_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_write(&task->zvr);
+	zv_request_task_free(task);
+}
+
+static void
+zvol_discard(zv_request_t *zvr)
+{
+	struct bio *bio = zvr->bio;
+	zvol_state_t *zv = zvr->zv;
+	uint64_t start = BIO_BI_SECTOR(bio) << 9;
+	uint64_t size = BIO_BI_SIZE(bio);
+	uint64_t end = start + size;
+	boolean_t sync;
+	int error = 0;
+	dmu_tx_t *tx;
+
+	ASSERT3P(zv, !=, NULL);
+	ASSERT3U(zv->zv_open_count, >, 0);
+	ASSERT3P(zv->zv_zilog, !=, NULL);
+
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	unsigned long start_time;
+
+	boolean_t acct = blk_queue_io_stat(q);
+	if (acct)
+		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+
+	sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+
+	if (end > zv->zv_volsize) {
+		error = SET_ERROR(EIO);
+		goto unlock;
+	}
+
+	/*
+	 * Align the request to volume block boundaries when a secure erase is
+	 * not required.  This will prevent dnode_free_range() from zeroing out
+	 * the unaligned parts which is slow (read-modify-write) and useless
+	 * since we are not freeing any space by doing so.
+	 */
+	if (!bio_is_secure_erase(bio)) {
+		start = P2ROUNDUP(start, zv->zv_volblocksize);
+		end = P2ALIGN(end, zv->zv_volblocksize);
+		size = end - start;
+	}
+
+	if (start >= end)
+		goto unlock;
+
+	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
+	    start, size, RL_WRITER);
+
+	tx = dmu_tx_create(zv->zv_objset);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error != 0) {
+		dmu_tx_abort(tx);
+	} else {
+		zvol_log_truncate(zv, tx, start, size, B_TRUE);
+		dmu_tx_commit(tx);
+		error = dmu_free_long_range(zv->zv_objset,
+		    ZVOL_OBJ, start, size);
+	}
+	zfs_rangelock_exit(lr);
+
+	if (error == 0 && sync)
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+unlock:
+	rw_exit(&zv->zv_suspend_lock);
+
+	if (acct)
+		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+
+	BIO_END_IO(bio, -error);
+}
+
+static void
+zvol_discard_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_discard(&task->zvr);
+	zv_request_task_free(task);
+}
+
+static void
+zvol_read(zv_request_t *zvr)
+{
+	struct bio *bio = zvr->bio;
+	int error = 0;
+	zfs_uio_t uio;
+
+	zfs_uio_bvec_init(&uio, bio);
+
+	zvol_state_t *zv = zvr->zv;
+	ASSERT3P(zv, !=, NULL);
+	ASSERT3U(zv->zv_open_count, >, 0);
+
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	ssize_t start_resid = uio.uio_resid;
+	unsigned long start_time;
+
+	boolean_t acct = blk_queue_io_stat(q);
+	if (acct)
+		start_time = blk_generic_start_io_acct(q, disk, READ, bio);
+
+	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
+	    uio.uio_loffset, uio.uio_resid, RL_READER);
+
+	uint64_t volsize = zv->zv_volsize;
+	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
+		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
+
+		/* don't read past the end */
+		if (bytes > volsize - uio.uio_loffset)
+			bytes = volsize - uio.uio_loffset;
+
+		error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = SET_ERROR(EIO);
+			break;
+		}
+	}
+	zfs_rangelock_exit(lr);
+
+	int64_t nread = start_resid - uio.uio_resid;
+	dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
+	task_io_account_read(nread);
+
+	rw_exit(&zv->zv_suspend_lock);
+
+	if (acct)
+		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
+
+	BIO_END_IO(bio, -error);
+}
+
+static void
+zvol_read_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_read(&task->zvr);
+	zv_request_task_free(task);
+}
+
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+static void
+zvol_submit_bio(struct bio *bio)
+#else
+static blk_qc_t
+zvol_submit_bio(struct bio *bio)
+#endif
+#else
+static MAKE_REQUEST_FN_RET
+zvol_request(struct request_queue *q, struct bio *bio)
+#endif
+{
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#if defined(HAVE_BIO_BDEV_DISK)
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+#else
+	struct request_queue *q = bio->bi_disk->queue;
+#endif
+#endif
+	zvol_state_t *zv = q->queuedata;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
+	uint64_t size = BIO_BI_SIZE(bio);
+	int rw = bio_data_dir(bio);
+
+	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
+		printk(KERN_INFO
+		    "%s: bad access: offset=%llu, size=%lu\n",
+		    zv->zv_zso->zvo_disk->disk_name,
+		    (long long unsigned)offset,
+		    (long unsigned)size);
+
+		BIO_END_IO(bio, -SET_ERROR(EIO));
+		goto out;
+	}
+
+	zv_request_t zvr = {
+		.zv = zv,
+		.bio = bio,
+	};
+	zv_request_task_t *task;
+
+	if (rw == WRITE) {
+		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
+			BIO_END_IO(bio, -SET_ERROR(EROFS));
+			goto out;
+		}
+
+		/*
+		 * Prevents the zvol from being suspended, or the ZIL being
+		 * concurrently opened.  Will be released after the i/o
+		 * completes.
+		 */
+		rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+		/*
+		 * Open a ZIL if this is the first time we have written to this
+		 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
+		 * than zv_state_lock so that we don't need to acquire an
+		 * additional lock in this path.
+		 */
+		if (zv->zv_zilog == NULL) {
+			rw_exit(&zv->zv_suspend_lock);
+			rw_enter(&zv->zv_suspend_lock, RW_WRITER);
+			if (zv->zv_zilog == NULL) {
+				zv->zv_zilog = zil_open(zv->zv_objset,
+				    zvol_get_data);
+				zv->zv_flags |= ZVOL_WRITTEN_TO;
+				/* replay / destroy done in zvol_create_minor */
+				VERIFY0((zv->zv_zilog->zl_header->zh_flags &
+				    ZIL_REPLAY_NEEDED));
+			}
+			rw_downgrade(&zv->zv_suspend_lock);
+		}
+
+		/*
+		 * We don't want this thread to be blocked waiting for i/o to
+		 * complete, so we instead wait from a taskq callback. The
+		 * i/o may be a ZIL write (via zil_commit()), or a read of an
+		 * indirect block, or a read of a data block (if this is a
+		 * partial-block write).  We will indicate that the i/o is
+		 * complete by calling BIO_END_IO() from the taskq callback.
+		 *
+		 * This design allows the calling thread to continue and
+		 * initiate more concurrent operations by calling
+		 * zvol_request() again. There are typically only a small
+		 * number of threads available to call zvol_request() (e.g.
+		 * one per iSCSI target), so keeping the latency of
+		 * zvol_request() low is important for performance.
+		 *
+		 * The zvol_request_sync module parameter allows this
+		 * behavior to be altered, for performance evaluation
+		 * purposes.  If the callback blocks, setting
+		 * zvol_request_sync=1 will result in much worse performance.
+		 *
+		 * We can have up to zvol_threads concurrent i/o's being
+		 * processed for all zvols on the system.  This is typically
+		 * a vast improvement over the zvol_request_sync=1 behavior
+		 * of one i/o at a time per zvol.  However, an even better
+		 * design would be for zvol_request() to initiate the zio
+		 * directly, and then be notified by the zio_done callback,
+		 * which would call BIO_END_IO().  Unfortunately, the DMU/ZIL
+		 * interfaces lack this functionality (they block waiting for
+		 * the i/o to complete).
+		 */
+		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
+			if (zvol_request_sync) {
+				zvol_discard(&zvr);
+			} else {
+				task = zv_request_task_create(zvr);
+				taskq_dispatch_ent(zvol_taskq,
+				    zvol_discard_task, task, 0, &task->ent);
+			}
+		} else {
+			if (zvol_request_sync) {
+				zvol_write(&zvr);
+			} else {
+				task = zv_request_task_create(zvr);
+				taskq_dispatch_ent(zvol_taskq,
+				    zvol_write_task, task, 0, &task->ent);
+			}
+		}
+	} else {
+		/*
+		 * The SCST driver, and possibly others, may issue READ I/Os
+		 * with a length of zero bytes.  These empty I/Os contain no
+		 * data and require no additional handling.
+		 */
+		if (size == 0) {
+			BIO_END_IO(bio, 0);
+			goto out;
+		}
+
+		rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+		/* See comment in WRITE case above. */
+		if (zvol_request_sync) {
+			zvol_read(&zvr);
+		} else {
+			task = zv_request_task_create(zvr);
+			taskq_dispatch_ent(zvol_taskq,
+			    zvol_read_task, task, 0, &task->ent);
+		}
+	}
+
+out:
+	spl_fstrans_unmark(cookie);
+#if (defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
+	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)) && \
+	!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
+	return (BLK_QC_T_NONE);
+#endif
+}
+
+static int
+#ifdef HAVE_BLK_MODE_T
+zvol_open(struct gendisk *disk, blk_mode_t flag)
+#else
+zvol_open(struct block_device *bdev, fmode_t flag)
+#endif
+{
+	zvol_state_t *zv;
+	int error = 0;
+	boolean_t drop_suspend = B_FALSE;
+#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
+	hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
+	hrtime_t start = gethrtime();
+
+retry:
+#endif
+	rw_enter(&zvol_state_lock, RW_READER);
+	/*
+	 * Obtain a copy of private_data under the zvol_state_lock to make
+	 * sure that either the result of zvol free code path setting
+	 * disk->private_data to NULL is observed, or zvol_os_free()
+	 * is not called on this zv because of the positive zv_open_count.
+	 */
+#ifdef HAVE_BLK_MODE_T
+	zv = disk->private_data;
+#else
+	zv = bdev->bd_disk->private_data;
+#endif
+	if (zv == NULL) {
+		rw_exit(&zvol_state_lock);
+		return (SET_ERROR(-ENXIO));
+	}
+
+	mutex_enter(&zv->zv_state_lock);
+	/*
+	 * Make sure zvol is not suspended during first open
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	if (zv->zv_open_count == 0) {
+		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
+			mutex_exit(&zv->zv_state_lock);
+			rw_enter(&zv->zv_suspend_lock, RW_READER);
+			mutex_enter(&zv->zv_state_lock);
+			/* check to see if zv_suspend_lock is needed */
+			if (zv->zv_open_count != 0) {
+				rw_exit(&zv->zv_suspend_lock);
+			} else {
+				drop_suspend = B_TRUE;
+			}
+		} else {
+			drop_suspend = B_TRUE;
+		}
+	}
+	rw_exit(&zvol_state_lock);
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	if (zv->zv_open_count == 0) {
+		boolean_t drop_namespace = B_FALSE;
+
+		ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
+
+		/*
+		 * In all other call paths the spa_namespace_lock is taken
+		 * before the bdev->bd_mutex lock.  However, on open(2)
+		 * the __blkdev_get() function calls fops->open() with the
+		 * bdev->bd_mutex lock held.  This can result in a deadlock
+		 * when zvols from one pool are used as vdevs in another.
+		 *
+		 * To prevent a lock inversion deadlock we preemptively
+		 * take the spa_namespace_lock.  Normally the lock will not
+		 * be contended and this is safe because spa_open_common()
+		 * handles the case where the caller already holds the
+		 * spa_namespace_lock.
+		 *
+		 * When the lock cannot be aquired after multiple retries
+		 * this must be the vdev on zvol deadlock case and we have
+		 * no choice but to return an error.  For 5.12 and older
+		 * kernels returning -ERESTARTSYS will result in the
+		 * bdev->bd_mutex being dropped, then reacquired, and
+		 * fops->open() being called again.  This process can be
+		 * repeated safely until both locks are acquired.  For 5.13
+		 * and newer the -ERESTARTSYS retry logic was removed from
+		 * the kernel so the only option is to return the error for
+		 * the caller to handle it.
+		 */
+		if (!mutex_owned(&spa_namespace_lock)) {
+			if (!mutex_tryenter(&spa_namespace_lock)) {
+				mutex_exit(&zv->zv_state_lock);
+				rw_exit(&zv->zv_suspend_lock);
+
+#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
+				schedule();
+				return (SET_ERROR(-ERESTARTSYS));
+#else
+				if ((gethrtime() - start) > timeout)
+					return (SET_ERROR(-ERESTARTSYS));
+
+				schedule_timeout(MSEC_TO_TICK(10));
+				goto retry;
+#endif
+			} else {
+				drop_namespace = B_TRUE;
+			}
+		}
+
+		error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));
+
+		if (drop_namespace)
+			mutex_exit(&spa_namespace_lock);
+	}
+
+	if (error == 0) {
+		if ((blk_mode_is_open_write(flag)) &&
+		    (zv->zv_flags & ZVOL_RDONLY)) {
+			if (zv->zv_open_count == 0)
+				zvol_last_close(zv);
+
+			error = SET_ERROR(-EROFS);
+		} else {
+			zv->zv_open_count++;
+		}
+	}
+
+	mutex_exit(&zv->zv_state_lock);
+	if (drop_suspend)
+		rw_exit(&zv->zv_suspend_lock);
+
+	if (error == 0)
+#ifdef HAVE_BLK_MODE_T
+		disk_check_media_change(disk);
+#else
+		zfs_check_media_change(bdev);
+#endif
+
+	return (error);
+}
+
+static void
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
+zvol_release(struct gendisk *disk)
+#else
+zvol_release(struct gendisk *disk, fmode_t unused)
+#endif
+{
+#if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG)
+	(void) unused;
+#endif
+	zvol_state_t *zv;
+	boolean_t drop_suspend = B_TRUE;
+
+	rw_enter(&zvol_state_lock, RW_READER);
+	zv = disk->private_data;
+
+	mutex_enter(&zv->zv_state_lock);
+	ASSERT3U(zv->zv_open_count, >, 0);
+	/*
+	 * make sure zvol is not suspended during last close
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	if (zv->zv_open_count == 1) {
+		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
+			mutex_exit(&zv->zv_state_lock);
+			rw_enter(&zv->zv_suspend_lock, RW_READER);
+			mutex_enter(&zv->zv_state_lock);
+			/* check to see if zv_suspend_lock is needed */
+			if (zv->zv_open_count != 1) {
+				rw_exit(&zv->zv_suspend_lock);
+				drop_suspend = B_FALSE;
+			}
+		}
+	} else {
+		drop_suspend = B_FALSE;
+	}
+	rw_exit(&zvol_state_lock);
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	zv->zv_open_count--;
+	if (zv->zv_open_count == 0) {
+		ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
+		zvol_last_close(zv);
+	}
+
+	mutex_exit(&zv->zv_state_lock);
+
+	if (drop_suspend)
+		rw_exit(&zv->zv_suspend_lock);
+}
+
+static int
+zvol_ioctl(struct block_device *bdev, fmode_t mode,
+    unsigned int cmd, unsigned long arg)
+{
+	zvol_state_t *zv = bdev->bd_disk->private_data;
+	int error = 0;
+
+	ASSERT3U(zv->zv_open_count, >, 0);
+
+	switch (cmd) {
+	case BLKFLSBUF:
+		fsync_bdev(bdev);
+		invalidate_bdev(bdev);
+		rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+		if (!(zv->zv_flags & ZVOL_RDONLY))
+			txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
+
+		rw_exit(&zv->zv_suspend_lock);
+		break;
+
+	case BLKZNAME:
+		mutex_enter(&zv->zv_state_lock);
+		error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
+		mutex_exit(&zv->zv_state_lock);
+		break;
+
+	default:
+		error = -ENOTTY;
+		break;
+	}
+
+	return (SET_ERROR(error));
+}
+
+#ifdef CONFIG_COMPAT
+static int
+zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
+    unsigned cmd, unsigned long arg)
+{
+	return (zvol_ioctl(bdev, mode, cmd, arg));
+}
+#else
+#define	zvol_compat_ioctl	NULL
+#endif
+
+static unsigned int
+zvol_check_events(struct gendisk *disk, unsigned int clearing)
+{
+	unsigned int mask = 0;
+
+	rw_enter(&zvol_state_lock, RW_READER);
+
+	zvol_state_t *zv = disk->private_data;
+	if (zv != NULL) {
+		mutex_enter(&zv->zv_state_lock);
+		mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
+		zv->zv_changed = 0;
+		mutex_exit(&zv->zv_state_lock);
+	}
+
+	rw_exit(&zvol_state_lock);
+
+	return (mask);
+}
+
+static int
+zvol_revalidate_disk(struct gendisk *disk)
+{
+	rw_enter(&zvol_state_lock, RW_READER);
+
+	zvol_state_t *zv = disk->private_data;
+	if (zv != NULL) {
+		mutex_enter(&zv->zv_state_lock);
+		set_capacity(zv->zv_zso->zvo_disk,
+		    zv->zv_volsize >> SECTOR_BITS);
+		mutex_exit(&zv->zv_state_lock);
+	}
+
+	rw_exit(&zvol_state_lock);
+
+	return (0);
+}
+
+static int
+zvol_update_volsize(zvol_state_t *zv, uint64_t volsize)
+{
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+
+#if defined(HAVE_REVALIDATE_DISK_SIZE)
+	revalidate_disk_size(disk, zvol_revalidate_disk(disk) == 0);
+#elif defined(HAVE_REVALIDATE_DISK)
+	revalidate_disk(disk);
+#else
+	zvol_revalidate_disk(disk);
+#endif
+	return (0);
+}
+
+static void
+zvol_clear_private(zvol_state_t *zv)
+{
+	/*
+	 * Cleared while holding zvol_state_lock as a writer
+	 * which will prevent zvol_open() from opening it.
+	 */
+	zv->zv_zso->zvo_disk->private_data = NULL;
+}
+
+/*
+ * Provide a simple virtual geometry for legacy compatibility.  For devices
+ * smaller than 1 MiB a small head and sector count is used to allow very
+ * tiny devices.  For devices over 1 Mib a standard head and sector count
+ * is used to keep the cylinders count reasonable.
+ */
+static int
+zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	zvol_state_t *zv = bdev->bd_disk->private_data;
+	sector_t sectors;
+
+	ASSERT3U(zv->zv_open_count, >, 0);
+
+	sectors = get_capacity(zv->zv_zso->zvo_disk);
+
+	if (sectors > 2048) {
+		geo->heads = 16;
+		geo->sectors = 63;
+	} else {
+		geo->heads = 2;
+		geo->sectors = 4;
+	}
+
+	geo->start = 0;
+	geo->cylinders = sectors / (geo->heads * geo->sectors);
+
+	return (0);
+}
+
+static struct block_device_operations zvol_ops = {
+	.open			= zvol_open,
+	.release		= zvol_release,
+	.ioctl			= zvol_ioctl,
+	.compat_ioctl		= zvol_compat_ioctl,
+	.check_events		= zvol_check_events,
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+	.revalidate_disk	= zvol_revalidate_disk,
+#endif
+	.getgeo			= zvol_getgeo,
+	.owner			= THIS_MODULE,
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+	.submit_bio		= zvol_submit_bio,
+#endif
+};
+
+/*
+ * Allocate memory for a new zvol_state_t and setup the required
+ * request queue and generic disk structures for the block device.
+ */
+static zvol_state_t *
+zvol_alloc(dev_t dev, const char *name)
+{
+	zvol_state_t *zv;
+	struct zvol_state_os *zso;
+	uint64_t volmode;
+
+	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
+		return (NULL);
+
+	if (volmode == ZFS_VOLMODE_DEFAULT)
+		volmode = zvol_volmode;
+
+	if (volmode == ZFS_VOLMODE_NONE)
+		return (NULL);
+
+	zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
+	zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP);
+	zv->zv_zso = zso;
+	zv->zv_volmode = volmode;
+
+	list_link_init(&zv->zv_next);
+	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
+
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#ifdef HAVE_BLK_ALLOC_DISK
+	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (zso->zvo_disk == NULL)
+		goto out_kmem;
+
+	zso->zvo_disk->minors = ZVOL_MINORS;
+	zso->zvo_queue = zso->zvo_disk->queue;
+#else
+	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		goto out_kmem;
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		goto out_kmem;
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_BLK_ALLOC_DISK */
+#else
+	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		goto out_kmem;
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		goto out_kmem;
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+
+	blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);
+
+	/* Limit read-ahead to a single page to prevent over-prefetching. */
+	blk_queue_set_read_ahead(zso->zvo_queue, 1);
+
+	/* Disable write merging in favor of the ZIO pipeline. */
+	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+
+	/* Enable /proc/diskstats */
+	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
+
+	zso->zvo_queue->queuedata = zv;
+	zso->zvo_dev = dev;
+	zv->zv_open_count = 0;
+	strlcpy(zv->zv_name, name, MAXNAMELEN);
+
+	zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
+	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
+
+	zso->zvo_disk->major = zvol_major;
+	zso->zvo_disk->events = DISK_EVENT_MEDIA_CHANGE;
+
+	/*
+	 * Setting ZFS_VOLMODE_DEV disables partitioning on ZVOL devices.
+	 * This is accomplished by limiting the number of minors for the
+	 * device to one and explicitly disabling partition scanning.
+	 */
+	if (volmode == ZFS_VOLMODE_DEV) {
+		zso->zvo_disk->minors = 1;
+		zso->zvo_disk->flags &= ~ZFS_GENHD_FL_EXT_DEVT;
+		zso->zvo_disk->flags |= ZFS_GENHD_FL_NO_PART;
+	}
+
+	zso->zvo_disk->first_minor = (dev & MINORMASK);
+	zso->zvo_disk->fops = &zvol_ops;
+	zso->zvo_disk->private_data = zv;
+	snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
+	    ZVOL_DEV_NAME, (dev & MINORMASK));
+
+	return (zv);
+
+out_kmem:
+	kmem_free(zso, sizeof (struct zvol_state_os));
+	kmem_free(zv, sizeof (zvol_state_t));
+	return (NULL);
+}
+
+/*
+ * Cleanup then free a zvol_state_t which was created by zvol_alloc().
+ * At this time, the structure is not opened by anyone, is taken off
+ * the zvol_state_list, and has its private data set to NULL.
+ * The zvol_state_lock is dropped.
+ *
+ * This function may take many milliseconds to complete (e.g. we've seen
+ * it take over 256ms), due to the calls to "blk_cleanup_queue" and
+ * "del_gendisk". Thus, consumers need to be careful to account for this
+ * latency when calling this function.
+ */
+static void
+zvol_free(zvol_state_t *zv)
+{
+
+	ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
+	ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
+	ASSERT0(zv->zv_open_count);
+	ASSERT3P(zv->zv_zso->zvo_disk->private_data, ==, NULL);
+
+	rw_destroy(&zv->zv_suspend_lock);
+	zfs_rangelock_fini(&zv->zv_rangelock);
+
+	del_gendisk(zv->zv_zso->zvo_disk);
+#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
+	defined(HAVE_BLK_ALLOC_DISK)
+#if defined(HAVE_BLK_CLEANUP_DISK)
+	blk_cleanup_disk(zv->zv_zso->zvo_disk);
+#else
+	put_disk(zv->zv_zso->zvo_disk);
+#endif
+#else
+	blk_cleanup_queue(zv->zv_zso->zvo_queue);
+	put_disk(zv->zv_zso->zvo_disk);
+#endif
+
+	ida_simple_remove(&zvol_ida,
+	    MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
+
+	mutex_destroy(&zv->zv_state_lock);
+	dataset_kstats_destroy(&zv->zv_kstat);
+
+	kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
+	kmem_free(zv, sizeof (zvol_state_t));
+}
+
+void
+zvol_wait_close(zvol_state_t *zv)
+{
+}
+
+/*
+ * Create a block device minor node and setup the linkage between it
+ * and the specified volume.  Once this function returns the block
+ * device is live and ready for use.
+ */
+static int
+zvol_os_create_minor(const char *name)
+{
+	zvol_state_t *zv;
+	objset_t *os;
+	dmu_object_info_t *doi;
+	uint64_t volsize;
+	uint64_t len;
+	unsigned minor = 0;
+	int error = 0;
+	int idx;
+	uint64_t hash = zvol_name_hash(name);
+
+	if (zvol_inhibit_dev)
+		return (0);
+
+	idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP));
+	if (idx < 0)
+		return (SET_ERROR(-idx));
+	minor = idx << ZVOL_MINOR_BITS;
+
+	zv = zvol_find_by_name_hash(name, hash, RW_NONE);
+	if (zv) {
+		ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+		mutex_exit(&zv->zv_state_lock);
+		ida_simple_remove(&zvol_ida, idx);
+		return (SET_ERROR(EEXIST));
+	}
+
+	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
+
+	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
+	if (error)
+		goto out_doi;
+
+	error = dmu_object_info(os, ZVOL_OBJ, doi);
+	if (error)
+		goto out_dmu_objset_disown;
+
+	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
+	if (error)
+		goto out_dmu_objset_disown;
+
+	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
+	if (zv == NULL) {
+		error = SET_ERROR(EAGAIN);
+		goto out_dmu_objset_disown;
+	}
+	zv->zv_hash = hash;
+
+	if (dmu_objset_is_snapshot(os))
+		zv->zv_flags |= ZVOL_RDONLY;
+
+	zv->zv_volblocksize = doi->doi_data_block_size;
+	zv->zv_volsize = volsize;
+	zv->zv_objset = os;
+
+	set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
+
+	blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
+	    (DMU_MAX_ACCESS / 4) >> 9);
+	blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
+	blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+	blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
+	    zv->zv_volblocksize);
+	blk_queue_io_opt(zv->zv_zso->zvo_queue, zv->zv_volblocksize);
+	blk_queue_max_discard_sectors(zv->zv_zso->zvo_queue,
+	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
+	blk_queue_discard_granularity(zv->zv_zso->zvo_queue,
+	    zv->zv_volblocksize);
+#ifdef QUEUE_FLAG_DISCARD
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, zv->zv_zso->zvo_queue);
+#endif
+#ifdef QUEUE_FLAG_NONROT
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, zv->zv_zso->zvo_queue);
+#endif
+#ifdef QUEUE_FLAG_ADD_RANDOM
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zv->zv_zso->zvo_queue);
+#endif
+	/* This flag was introduced in kernel version 4.12. */
+#ifdef QUEUE_FLAG_SCSI_PASSTHROUGH
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue);
+#endif
+
+	ASSERT3P(zv->zv_zilog, ==, NULL);
+	zv->zv_zilog = zil_open(os, zvol_get_data);
+	if (spa_writeable(dmu_objset_spa(os))) {
+		if (zil_replay_disable)
+			zil_destroy(zv->zv_zilog, B_FALSE);
+		else
+			zil_replay(os, zv, zvol_replay_vector);
+	}
+	zil_close(zv->zv_zilog);
+	zv->zv_zilog = NULL;
+	ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
+	dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
+
+	/*
+	 * When udev detects the addition of the device it will immediately
+	 * invoke blkid(8) to determine the type of content on the device.
+	 * Prefetching the blocks commonly scanned by blkid(8) will speed
+	 * up this process.
+	 */
+	len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE);
+	if (len > 0) {
+		dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
+		dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
+		    ZIO_PRIORITY_SYNC_READ);
+	}
+
+	zv->zv_objset = NULL;
+out_dmu_objset_disown:
+	dmu_objset_disown(os, B_TRUE, FTAG);
+out_doi:
+	kmem_free(doi, sizeof (dmu_object_info_t));
+
+	/*
+	 * Keep in mind that once add_disk() is called, the zvol is
+	 * announced to the world, and zvol_open()/zvol_release() can
+	 * be called at any time. Incidentally, add_disk() itself calls
+	 * zvol_open()->zvol_first_open() and zvol_release()->zvol_last_close()
+	 * directly as well.
+	 */
+	if (error == 0) {
+		rw_enter(&zvol_state_lock, RW_WRITER);
+		zvol_insert(zv);
+		rw_exit(&zvol_state_lock);
+#ifdef HAVE_ADD_DISK_RET
+		error = add_disk(zv->zv_zso->zvo_disk);
+#else
+		add_disk(zv->zv_zso->zvo_disk);
+#endif
+	} else {
+		ida_simple_remove(&zvol_ida, idx);
+	}
+
+	return (error);
+}
+
+static void
+zvol_rename_minor(zvol_state_t *zv, const char *newname)
+{
+	int readonly = get_disk_ro(zv->zv_zso->zvo_disk);
+
+	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+
+	/* move to new hashtable entry  */
+	zv->zv_hash = zvol_name_hash(zv->zv_name);
+	hlist_del(&zv->zv_hlink);
+	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
+
+	/*
+	 * The block device's read-only state is briefly changed causing
+	 * a KOBJ_CHANGE uevent to be issued.  This ensures udev detects
+	 * the name change and fixes the symlinks.  This does not change
+	 * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
+	 * changes.  This would normally be done using kobject_uevent() but
+	 * that is a GPL-only symbol which is why we need this workaround.
+	 */
+	set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
+	set_disk_ro(zv->zv_zso->zvo_disk, readonly);
+}
+
+static void
+zvol_set_disk_ro_impl(zvol_state_t *zv, int flags)
+{
+
+	set_disk_ro(zv->zv_zso->zvo_disk, flags);
+}
+
+static void
+zvol_set_capacity_impl(zvol_state_t *zv, uint64_t capacity)
+{
+
+	set_capacity(zv->zv_zso->zvo_disk, capacity);
+}
+
+const static zvol_platform_ops_t zvol_linux_ops = {
+	.zv_free = zvol_free,
+	.zv_rename_minor = zvol_rename_minor,
+	.zv_create_minor = zvol_os_create_minor,
+	.zv_update_volsize = zvol_update_volsize,
+	.zv_clear_private = zvol_clear_private,
+	.zv_is_zvol = zvol_is_zvol_impl,
+	.zv_set_disk_ro = zvol_set_disk_ro_impl,
+	.zv_set_capacity = zvol_set_capacity_impl,
+};
+
+int
+zvol_init(void)
+{
+	int error;
+	int threads = MIN(MAX(zvol_threads, 1), 1024);
+
+	error = register_blkdev(zvol_major, ZVOL_DRIVER);
+	if (error) {
+		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
+		return (error);
+	}
+	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
+	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+	if (zvol_taskq == NULL) {
+		unregister_blkdev(zvol_major, ZVOL_DRIVER);
+		return (-ENOMEM);
+	}
+	zvol_init_impl();
+	ida_init(&zvol_ida);
+	zvol_register_ops(&zvol_linux_ops);
+	return (0);
+}
+
+void
+zvol_fini(void)
+{
+	zvol_fini_impl();
+	unregister_blkdev(zvol_major, ZVOL_DRIVER);
+	taskq_destroy(zvol_taskq);
+	ida_destroy(&zvol_ida);
+}
+
+/* BEGIN CSTYLED */
+module_param(zvol_inhibit_dev, uint, 0644);
+MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
+
+module_param(zvol_major, uint, 0444);
+MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
+
+module_param(zvol_threads, uint, 0444);
+MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
+
+module_param(zvol_request_sync, uint, 0644);
+MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
+
+module_param(zvol_max_discard_blocks, ulong, 0444);
+MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
+
+module_param(zvol_prefetch_bytes, uint, 0644);
+MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
+
+module_param(zvol_volmode, uint, 0644);
+MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+/* END CSTYLED */

diff --git a/zfs/module/spl/Makefile.in b/zfs/module/spl/Makefile.in
index e16666a..cedbfe9 100644
--- a/zfs/module/spl/Makefile.in
+++ b/zfs/module/spl/Makefile.in

@@ -1,27 +1,13 @@
-src = @abs_top_srcdir@/module/spl
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
+mfdir = $(obj)
+else
+mfdir = $(srctree)/$(src)
+endif
 
 MODULE := spl
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-
-$(MODULE)-objs += spl-atomic.o
-$(MODULE)-objs += spl-condvar.o
-$(MODULE)-objs += spl-cred.o
-$(MODULE)-objs += spl-err.o
-$(MODULE)-objs += spl-generic.o
-$(MODULE)-objs += spl-kmem.o
-$(MODULE)-objs += spl-kmem-cache.o
-$(MODULE)-objs += spl-kobj.o
-$(MODULE)-objs += spl-kstat.o
-$(MODULE)-objs += spl-proc.o
-$(MODULE)-objs += spl-procfs-list.o
-$(MODULE)-objs += spl-taskq.o
-$(MODULE)-objs += spl-thread.o
-$(MODULE)-objs += spl-tsd.o
-$(MODULE)-objs += spl-vmem.o
-$(MODULE)-objs += spl-vnode.o
-$(MODULE)-objs += spl-xdr.o
-$(MODULE)-objs += spl-zlib.o
+include $(mfdir)/../os/linux/spl/Makefile

diff --git a/zfs/module/spl/README.md b/zfs/module/spl/README.md
deleted file mode 100644
index 57f635a..0000000
--- a/zfs/module/spl/README.md
+++ /dev/null

@@ -1,16 +0,0 @@
-The Solaris Porting Layer, SPL, is a Linux kernel module which provides a
-compatibility layer used by the [ZFS on Linux](http://zfsonlinux.org) project.
-
-# Installation
-
-The latest version of the SPL is maintained as part of this repository.
-Only when building ZFS version 0.7.x or earlier must an external SPL release
-be used.  These releases can be found at:
-
-  * Version 0.7.x: https://github.com/zfsonlinux/spl/tree/spl-0.7-release  
-  * Version 0.6.5.x: https://github.com/zfsonlinux/spl/tree/spl-0.6.5-release  
-
-# Release
-
-The SPL is released under a GPLv2 license.  
-For more details see the NOTICE and THIRDPARTYLICENSE files; `UCRL-CODE-235197`

diff --git a/zfs/module/spl/spl-atomic.c b/zfs/module/spl/spl-atomic.c
deleted file mode 100644
index 47ed188..0000000
--- a/zfs/module/spl/spl-atomic.c
+++ /dev/null

@@ -1,36 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Atomic Implementation.
- */
-
-#include <sys/atomic.h>
-
-#ifdef ATOMIC_SPINLOCK
-/* Global atomic lock declarations */
-DEFINE_SPINLOCK(atomic32_lock);
-DEFINE_SPINLOCK(atomic64_lock);
-
-EXPORT_SYMBOL(atomic32_lock);
-EXPORT_SYMBOL(atomic64_lock);
-#endif /* ATOMIC_SPINLOCK */

diff --git a/zfs/module/spl/spl-condvar.c b/zfs/module/spl/spl-condvar.c
deleted file mode 100644
index 3cc33da..0000000
--- a/zfs/module/spl/spl-condvar.c
+++ /dev/null

@@ -1,461 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Credential Implementation.
- */
-
-#include <sys/condvar.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <linux/hrtimer.h>
-#include <linux/compiler_compat.h>
-#include <linux/mod_compat.h>
-
-#include <linux/sched.h>
-
-#ifdef HAVE_SCHED_SIGNAL_HEADER
-#include <linux/sched/signal.h>
-#endif
-
-#define	MAX_HRTIMEOUT_SLACK_US	1000
-unsigned int spl_schedule_hrtimeout_slack_us = 0;
-
-static int
-param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
-{
-	unsigned long val;
-	int error;
-
-	error = kstrtoul(buf, 0, &val);
-	if (error)
-		return (error);
-
-	if (val > MAX_HRTIMEOUT_SLACK_US)
-		return (-EINVAL);
-
-	error = param_set_uint(buf, kp);
-	if (error < 0)
-		return (error);
-
-	return (0);
-}
-
-module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
-	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
-MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
-	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
-
-void
-__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
-{
-	ASSERT(cvp);
-	ASSERT(name == NULL);
-	ASSERT(type == CV_DEFAULT);
-	ASSERT(arg == NULL);
-
-	cvp->cv_magic = CV_MAGIC;
-	init_waitqueue_head(&cvp->cv_event);
-	init_waitqueue_head(&cvp->cv_destroy);
-	atomic_set(&cvp->cv_waiters, 0);
-	atomic_set(&cvp->cv_refs, 1);
-	cvp->cv_mutex = NULL;
-}
-EXPORT_SYMBOL(__cv_init);
-
-static int
-cv_destroy_wakeup(kcondvar_t *cvp)
-{
-	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
-		ASSERT(cvp->cv_mutex == NULL);
-		ASSERT(!waitqueue_active(&cvp->cv_event));
-		return (1);
-	}
-
-	return (0);
-}
-
-void
-__cv_destroy(kcondvar_t *cvp)
-{
-	ASSERT(cvp);
-	ASSERT(cvp->cv_magic == CV_MAGIC);
-
-	cvp->cv_magic = CV_DESTROY;
-	atomic_dec(&cvp->cv_refs);
-
-	/* Block until all waiters are woken and references dropped. */
-	while (cv_destroy_wakeup(cvp) == 0)
-		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
-
-	ASSERT3P(cvp->cv_mutex, ==, NULL);
-	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
-	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
-	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
-}
-EXPORT_SYMBOL(__cv_destroy);
-
-static void
-cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
-{
-	DEFINE_WAIT(wait);
-	kmutex_t *m;
-
-	ASSERT(cvp);
-	ASSERT(mp);
-	ASSERT(cvp->cv_magic == CV_MAGIC);
-	ASSERT(mutex_owned(mp));
-	atomic_inc(&cvp->cv_refs);
-
-	m = READ_ONCE(cvp->cv_mutex);
-	if (!m)
-		m = xchg(&cvp->cv_mutex, mp);
-	/* Ensure the same mutex is used by all callers */
-	ASSERT(m == NULL || m == mp);
-
-	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
-	atomic_inc(&cvp->cv_waiters);
-
-	/*
-	 * Mutex should be dropped after prepare_to_wait() this
-	 * ensures we're linked in to the waiters list and avoids the
-	 * race where 'cvp->cv_waiters > 0' but the list is empty.
-	 */
-	mutex_exit(mp);
-	if (io)
-		io_schedule();
-	else
-		schedule();
-
-	/* No more waiters a different mutex could be used */
-	if (atomic_dec_and_test(&cvp->cv_waiters)) {
-		/*
-		 * This is set without any lock, so it's racy. But this is
-		 * just for debug anyway, so make it best-effort
-		 */
-		cvp->cv_mutex = NULL;
-		wake_up(&cvp->cv_destroy);
-	}
-
-	finish_wait(&cvp->cv_event, &wait);
-	atomic_dec(&cvp->cv_refs);
-
-	/*
-	 * Hold mutex after we release the cvp, otherwise we could dead lock
-	 * with a thread holding the mutex and call cv_destroy.
-	 */
-	mutex_enter(mp);
-}
-
-void
-__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
-{
-	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
-}
-EXPORT_SYMBOL(__cv_wait);
-
-void
-__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
-{
-	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
-}
-EXPORT_SYMBOL(__cv_wait_io);
-
-int
-__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
-{
-	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
-
-	return (signal_pending(current) ? 0 : 1);
-}
-EXPORT_SYMBOL(__cv_wait_io_sig);
-
-int
-__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
-{
-	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
-
-	return (signal_pending(current) ? 0 : 1);
-}
-EXPORT_SYMBOL(__cv_wait_sig);
-
-#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
-#define	spl_io_schedule_timeout(t)	io_schedule_timeout(t)
-#else
-
-struct spl_task_timer {
-	struct timer_list timer;
-	struct task_struct *task;
-};
-
-static void
-__cv_wakeup(spl_timer_list_t t)
-{
-	struct timer_list *tmr = (struct timer_list *)t;
-	struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
-
-	wake_up_process(task_timer->task);
-}
-
-static long
-spl_io_schedule_timeout(long time_left)
-{
-	long expire_time = jiffies + time_left;
-	struct spl_task_timer task_timer;
-	struct timer_list *timer = &task_timer.timer;
-
-	task_timer.task = current;
-
-	timer_setup(timer, __cv_wakeup, 0);
-
-	timer->expires = expire_time;
-	add_timer(timer);
-
-	io_schedule();
-
-	del_timer_sync(timer);
-
-	time_left = expire_time - jiffies;
-
-	return (time_left < 0 ? 0 : time_left);
-}
-#endif
-
-/*
- * 'expire_time' argument is an absolute wall clock time in jiffies.
- * Return value is time left (expire_time - now) or -1 if timeout occurred.
- */
-static clock_t
-__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
-    int state, int io)
-{
-	DEFINE_WAIT(wait);
-	kmutex_t *m;
-	clock_t time_left;
-
-	ASSERT(cvp);
-	ASSERT(mp);
-	ASSERT(cvp->cv_magic == CV_MAGIC);
-	ASSERT(mutex_owned(mp));
-
-	/* XXX - Does not handle jiffie wrap properly */
-	time_left = expire_time - jiffies;
-	if (time_left <= 0)
-		return (-1);
-
-	atomic_inc(&cvp->cv_refs);
-	m = READ_ONCE(cvp->cv_mutex);
-	if (!m)
-		m = xchg(&cvp->cv_mutex, mp);
-	/* Ensure the same mutex is used by all callers */
-	ASSERT(m == NULL || m == mp);
-
-	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
-	atomic_inc(&cvp->cv_waiters);
-
-	/*
-	 * Mutex should be dropped after prepare_to_wait() this
-	 * ensures we're linked in to the waiters list and avoids the
-	 * race where 'cvp->cv_waiters > 0' but the list is empty.
-	 */
-	mutex_exit(mp);
-	if (io)
-		time_left = spl_io_schedule_timeout(time_left);
-	else
-		time_left = schedule_timeout(time_left);
-
-	/* No more waiters a different mutex could be used */
-	if (atomic_dec_and_test(&cvp->cv_waiters)) {
-		/*
-		 * This is set without any lock, so it's racy. But this is
-		 * just for debug anyway, so make it best-effort
-		 */
-		cvp->cv_mutex = NULL;
-		wake_up(&cvp->cv_destroy);
-	}
-
-	finish_wait(&cvp->cv_event, &wait);
-	atomic_dec(&cvp->cv_refs);
-
-	/*
-	 * Hold mutex after we release the cvp, otherwise we could dead lock
-	 * with a thread holding the mutex and call cv_destroy.
-	 */
-	mutex_enter(mp);
-	return (time_left > 0 ? time_left : -1);
-}
-
-clock_t
-__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
-{
-	return (__cv_timedwait_common(cvp, mp, exp_time,
-	    TASK_UNINTERRUPTIBLE, 0));
-}
-EXPORT_SYMBOL(__cv_timedwait);
-
-clock_t
-__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
-{
-	return (__cv_timedwait_common(cvp, mp, exp_time,
-	    TASK_UNINTERRUPTIBLE, 1));
-}
-EXPORT_SYMBOL(__cv_timedwait_io);
-
-clock_t
-__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
-{
-	return (__cv_timedwait_common(cvp, mp, exp_time,
-	    TASK_INTERRUPTIBLE, 0));
-}
-EXPORT_SYMBOL(__cv_timedwait_sig);
-
-/*
- * 'expire_time' argument is an absolute clock time in nanoseconds.
- * Return value is time left (expire_time - now) or -1 if timeout occurred.
- */
-static clock_t
-__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
-    hrtime_t res, int state)
-{
-	DEFINE_WAIT(wait);
-	kmutex_t *m;
-	hrtime_t time_left;
-	ktime_t ktime_left;
-	u64 slack = 0;
-
-	ASSERT(cvp);
-	ASSERT(mp);
-	ASSERT(cvp->cv_magic == CV_MAGIC);
-	ASSERT(mutex_owned(mp));
-
-	time_left = expire_time - gethrtime();
-	if (time_left <= 0)
-		return (-1);
-
-	atomic_inc(&cvp->cv_refs);
-	m = READ_ONCE(cvp->cv_mutex);
-	if (!m)
-		m = xchg(&cvp->cv_mutex, mp);
-	/* Ensure the same mutex is used by all callers */
-	ASSERT(m == NULL || m == mp);
-
-	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
-	atomic_inc(&cvp->cv_waiters);
-
-	/*
-	 * Mutex should be dropped after prepare_to_wait() this
-	 * ensures we're linked in to the waiters list and avoids the
-	 * race where 'cvp->cv_waiters > 0' but the list is empty.
-	 */
-	mutex_exit(mp);
-
-	ktime_left = ktime_set(0, time_left);
-	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
-	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
-	schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
-
-	/* No more waiters a different mutex could be used */
-	if (atomic_dec_and_test(&cvp->cv_waiters)) {
-		/*
-		 * This is set without any lock, so it's racy. But this is
-		 * just for debug anyway, so make it best-effort
-		 */
-		cvp->cv_mutex = NULL;
-		wake_up(&cvp->cv_destroy);
-	}
-
-	finish_wait(&cvp->cv_event, &wait);
-	atomic_dec(&cvp->cv_refs);
-
-	mutex_enter(mp);
-	time_left = expire_time - gethrtime();
-	return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
-}
-
-/*
- * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
- */
-static clock_t
-cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
-    hrtime_t res, int flag, int state)
-{
-	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
-		tim += gethrtime();
-
-	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
-}
-
-clock_t
-cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
-    int flag)
-{
-	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
-	    TASK_UNINTERRUPTIBLE));
-}
-EXPORT_SYMBOL(cv_timedwait_hires);
-
-clock_t
-cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
-    hrtime_t res, int flag)
-{
-	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
-	    TASK_INTERRUPTIBLE));
-}
-EXPORT_SYMBOL(cv_timedwait_sig_hires);
-
-void
-__cv_signal(kcondvar_t *cvp)
-{
-	ASSERT(cvp);
-	ASSERT(cvp->cv_magic == CV_MAGIC);
-	atomic_inc(&cvp->cv_refs);
-
-	/*
-	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
-	 * waiter will be set runnable with each call to wake_up().
-	 * Additionally wake_up() holds a spin_lock associated with
-	 * the wait queue to ensure we don't race waking up processes.
-	 */
-	if (atomic_read(&cvp->cv_waiters) > 0)
-		wake_up(&cvp->cv_event);
-
-	atomic_dec(&cvp->cv_refs);
-}
-EXPORT_SYMBOL(__cv_signal);
-
-void
-__cv_broadcast(kcondvar_t *cvp)
-{
-	ASSERT(cvp);
-	ASSERT(cvp->cv_magic == CV_MAGIC);
-	atomic_inc(&cvp->cv_refs);
-
-	/*
-	 * Wake_up_all() will wake up all waiters even those which
-	 * have the WQ_FLAG_EXCLUSIVE flag set.
-	 */
-	if (atomic_read(&cvp->cv_waiters) > 0)
-		wake_up_all(&cvp->cv_event);
-
-	atomic_dec(&cvp->cv_refs);
-}
-EXPORT_SYMBOL(__cv_broadcast);

diff --git a/zfs/module/spl/spl-cred.c b/zfs/module/spl/spl-cred.c
deleted file mode 100644
index ea3e903..0000000
--- a/zfs/module/spl/spl-cred.c
+++ /dev/null

@@ -1,200 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Credential Implementation.
- */
-
-#include <sys/cred.h>
-
-static int
-#ifdef HAVE_KUIDGID_T
-cr_groups_search(const struct group_info *group_info, kgid_t grp)
-#else
-cr_groups_search(const struct group_info *group_info, gid_t grp)
-#endif
-{
-	unsigned int left, right, mid;
-	int cmp;
-
-	if (!group_info)
-		return (0);
-
-	left = 0;
-	right = group_info->ngroups;
-	while (left < right) {
-		mid = (left + right) / 2;
-		cmp = KGID_TO_SGID(grp) -
-		    KGID_TO_SGID(GROUP_AT(group_info, mid));
-
-		if (cmp > 0)
-			left = mid + 1;
-		else if (cmp < 0)
-			right = mid;
-		else
-			return (1);
-	}
-	return (0);
-}
-
-/* Hold a reference on the credential */
-void
-crhold(cred_t *cr)
-{
-	(void) get_cred((const cred_t *)cr);
-}
-
-/* Free a reference on the credential */
-void
-crfree(cred_t *cr)
-{
-	put_cred((const cred_t *)cr);
-}
-
-/* Return the number of supplemental groups */
-int
-crgetngroups(const cred_t *cr)
-{
-	struct group_info *gi;
-	int rc;
-
-	gi = cr->group_info;
-	rc = gi->ngroups;
-#ifndef HAVE_GROUP_INFO_GID
-	/*
-	 * For Linux <= 4.8,
-	 * crgetgroups will only returns gi->blocks[0], which contains only
-	 * the first NGROUPS_PER_BLOCK groups.
-	 */
-	if (rc > NGROUPS_PER_BLOCK) {
-		WARN_ON_ONCE(1);
-		rc = NGROUPS_PER_BLOCK;
-	}
-#endif
-	return (rc);
-}
-
-/*
- * Return an array of supplemental gids.  The returned address is safe
- * to use as long as the caller has taken a reference with crhold().
- *
- * Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d
- * array via ->gid.
- */
-gid_t *
-crgetgroups(const cred_t *cr)
-{
-	struct group_info *gi;
-	gid_t *gids = NULL;
-
-	gi = cr->group_info;
-#ifdef HAVE_GROUP_INFO_GID
-	gids = KGIDP_TO_SGIDP(gi->gid);
-#else
-	if (gi->nblocks > 0)
-		gids = KGIDP_TO_SGIDP(gi->blocks[0]);
-#endif
-	return (gids);
-}
-
-/* Check if the passed gid is available in supplied credential. */
-int
-groupmember(gid_t gid, const cred_t *cr)
-{
-	struct group_info *gi;
-	int rc;
-
-	gi = cr->group_info;
-	rc = cr_groups_search(gi, SGID_TO_KGID(gid));
-
-	return (rc);
-}
-
-/* Return the effective user id */
-uid_t
-crgetuid(const cred_t *cr)
-{
-	return (KUID_TO_SUID(cr->euid));
-}
-
-/* Return the real user id */
-uid_t
-crgetruid(const cred_t *cr)
-{
-	return (KUID_TO_SUID(cr->uid));
-}
-
-/* Return the saved user id */
-uid_t
-crgetsuid(const cred_t *cr)
-{
-	return (KUID_TO_SUID(cr->suid));
-}
-
-/* Return the filesystem user id */
-uid_t
-crgetfsuid(const cred_t *cr)
-{
-	return (KUID_TO_SUID(cr->fsuid));
-}
-
-/* Return the effective group id */
-gid_t
-crgetgid(const cred_t *cr)
-{
-	return (KGID_TO_SGID(cr->egid));
-}
-
-/* Return the real group id */
-gid_t
-crgetrgid(const cred_t *cr)
-{
-	return (KGID_TO_SGID(cr->gid));
-}
-
-/* Return the saved group id */
-gid_t
-crgetsgid(const cred_t *cr)
-{
-	return (KGID_TO_SGID(cr->sgid));
-}
-
-/* Return the filesystem group id */
-gid_t
-crgetfsgid(const cred_t *cr)
-{
-	return (KGID_TO_SGID(cr->fsgid));
-}
-
-EXPORT_SYMBOL(crhold);
-EXPORT_SYMBOL(crfree);
-EXPORT_SYMBOL(crgetuid);
-EXPORT_SYMBOL(crgetruid);
-EXPORT_SYMBOL(crgetsuid);
-EXPORT_SYMBOL(crgetfsuid);
-EXPORT_SYMBOL(crgetgid);
-EXPORT_SYMBOL(crgetrgid);
-EXPORT_SYMBOL(crgetsgid);
-EXPORT_SYMBOL(crgetfsgid);
-EXPORT_SYMBOL(crgetngroups);
-EXPORT_SYMBOL(crgetgroups);
-EXPORT_SYMBOL(groupmember);

diff --git a/zfs/module/spl/spl-err.c b/zfs/module/spl/spl-err.c
deleted file mode 100644
index 3c0bb71..0000000
--- a/zfs/module/spl/spl-err.c
+++ /dev/null

@@ -1,124 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Error Implementation.
- */
-
-#include <sys/sysmacros.h>
-#include <sys/cmn_err.h>
-
-/*
- * It is often useful to actually have the panic crash the node so you
- * can then get notified of the event, get the crashdump for later
- * analysis and other such goodies.
- * But we would still default to the current default of not to do that.
- */
-/* BEGIN CSTYLED */
-unsigned int spl_panic_halt;
-module_param(spl_panic_halt, uint, 0644);
-MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures");
-/* END CSTYLED */
-
-void
-spl_dumpstack(void)
-{
-	printk("Showing stack for process %d\n", current->pid);
-	dump_stack();
-}
-EXPORT_SYMBOL(spl_dumpstack);
-
-int
-spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
-{
-	const char *newfile;
-	char msg[MAXMSGLEN];
-	va_list ap;
-
-	newfile = strrchr(file, '/');
-	if (newfile != NULL)
-		newfile = newfile + 1;
-	else
-		newfile = file;
-
-	va_start(ap, fmt);
-	(void) vsnprintf(msg, sizeof (msg), fmt, ap);
-	va_end(ap);
-
-	printk(KERN_EMERG "%s", msg);
-	printk(KERN_EMERG "PANIC at %s:%d:%s()\n", newfile, line, func);
-	if (spl_panic_halt)
-		panic("%s", msg);
-
-	spl_dumpstack();
-
-	/* Halt the thread to facilitate further debugging */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	while (1)
-		schedule();
-
-	/* Unreachable */
-	return (1);
-}
-EXPORT_SYMBOL(spl_panic);
-
-void
-vcmn_err(int ce, const char *fmt, va_list ap)
-{
-	char msg[MAXMSGLEN];
-
-	vsnprintf(msg, MAXMSGLEN, fmt, ap);
-
-	switch (ce) {
-	case CE_IGNORE:
-		break;
-	case CE_CONT:
-		printk("%s", msg);
-		break;
-	case CE_NOTE:
-		printk(KERN_NOTICE "NOTICE: %s\n", msg);
-		break;
-	case CE_WARN:
-		printk(KERN_WARNING "WARNING: %s\n", msg);
-		break;
-	case CE_PANIC:
-		printk(KERN_EMERG "PANIC: %s\n", msg);
-		spl_dumpstack();
-
-		/* Halt the thread to facilitate further debugging */
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		while (1)
-			schedule();
-	}
-} /* vcmn_err() */
-EXPORT_SYMBOL(vcmn_err);
-
-void
-cmn_err(int ce, const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	vcmn_err(ce, fmt, ap);
-	va_end(ap);
-} /* cmn_err() */
-EXPORT_SYMBOL(cmn_err);

diff --git a/zfs/module/spl/spl-generic.c b/zfs/module/spl/spl-generic.c
deleted file mode 100644
index 43ade75..0000000
--- a/zfs/module/spl/spl-generic.c
+++ /dev/null

@@ -1,791 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Generic Implementation.
- */
-
-#include <sys/sysmacros.h>
-#include <sys/systeminfo.h>
-#include <sys/vmsystm.h>
-#include <sys/kobj.h>
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/vmem.h>
-#include <sys/mutex.h>
-#include <sys/rwlock.h>
-#include <sys/taskq.h>
-#include <sys/tsd.h>
-#include <sys/zmod.h>
-#include <sys/debug.h>
-#include <sys/proc.h>
-#include <sys/kstat.h>
-#include <sys/file.h>
-#include <linux/ctype.h>
-#include <sys/disp.h>
-#include <sys/random.h>
-#include <sys/strings.h>
-#include <linux/kmod.h>
-#include "zfs_gitrev.h"
-
-char spl_gitrev[64] = ZFS_META_GITREV;
-
-/* BEGIN CSTYLED */
-unsigned long spl_hostid = 0;
-EXPORT_SYMBOL(spl_hostid);
-/* BEGIN CSTYLED */
-module_param(spl_hostid, ulong, 0644);
-MODULE_PARM_DESC(spl_hostid, "The system hostid.");
-/* END CSTYLED */
-
-proc_t p0;
-EXPORT_SYMBOL(p0);
-
-/*
- * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
- *
- * "Further scramblings of Marsaglia's xorshift generators"
- * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
- *
- * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
- * is to provide bytes containing random numbers. It is mapped to /dev/urandom
- * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
- * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
- * we can implement it using a fast PRNG that we seed using Linux' actual
- * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
- * with an independent seed so that all calls to random_get_pseudo_bytes() are
- * free of atomic instructions.
- *
- * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
- * to generate words larger than 128 bits will paradoxically be limited to
- * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
- * 128-bit words and selecting the first will implicitly select the second. If
- * a caller finds this behavior undesirable, random_get_bytes() should be used
- * instead.
- *
- * XXX: Linux interrupt handlers that trigger within the critical section
- * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
- * see the same numbers. Nothing in the code currently calls this in an
- * interrupt handler, so this is considered to be okay. If that becomes a
- * problem, we could create a set of per-cpu variables for interrupt handlers
- * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
- * true.
- */
-static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy);
-
-/*
- * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
- * file:
- *
- * http://xorshift.di.unimi.it/xorshift128plus.c
- */
-
-static inline uint64_t
-spl_rand_next(uint64_t *s)
-{
-	uint64_t s1 = s[0];
-	const uint64_t s0 = s[1];
-	s[0] = s0;
-	s1 ^= s1 << 23; // a
-	s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
-	return (s[1] + s0);
-}
-
-static inline void
-spl_rand_jump(uint64_t *s)
-{
-	static const uint64_t JUMP[] =
-	    { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
-
-	uint64_t s0 = 0;
-	uint64_t s1 = 0;
-	int i, b;
-	for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
-		for (b = 0; b < 64; b++) {
-			if (JUMP[i] & 1ULL << b) {
-				s0 ^= s[0];
-				s1 ^= s[1];
-			}
-			(void) spl_rand_next(s);
-		}
-
-	s[0] = s0;
-	s[1] = s1;
-}
-
-int
-random_get_pseudo_bytes(uint8_t *ptr, size_t len)
-{
-	uint64_t *xp, s[2];
-
-	ASSERT(ptr);
-
-	xp = get_cpu_var(spl_pseudo_entropy);
-
-	s[0] = xp[0];
-	s[1] = xp[1];
-
-	while (len) {
-		union {
-			uint64_t ui64;
-			uint8_t byte[sizeof (uint64_t)];
-		}entropy;
-		int i = MIN(len, sizeof (uint64_t));
-
-		len -= i;
-		entropy.ui64 = spl_rand_next(s);
-
-		while (i--)
-			*ptr++ = entropy.byte[i];
-	}
-
-	xp[0] = s[0];
-	xp[1] = s[1];
-
-	put_cpu_var(spl_pseudo_entropy);
-
-	return (0);
-}
-
-
-EXPORT_SYMBOL(random_get_pseudo_bytes);
-
-#if BITS_PER_LONG == 32
-
-/*
- * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
- * provides a div64_u64() function for this we do not use it because the
- * implementation is flawed.  There are cases which return incorrect
- * results as late as linux-2.6.35.  Until this is fixed upstream the
- * spl must provide its own implementation.
- *
- * This implementation is a slightly modified version of the algorithm
- * proposed by the book 'Hacker's Delight'.  The original source can be
- * found here and is available for use without restriction.
- *
- * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
- */
-
-/*
- * Calculate number of leading of zeros for a 64-bit value.
- */
-static int
-nlz64(uint64_t x)
-{
-	register int n = 0;
-
-	if (x == 0)
-		return (64);
-
-	if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }
-	if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }
-	if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n +  8; x = x <<  8; }
-	if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n +  4; x = x <<  4; }
-	if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n +  2; x = x <<  2; }
-	if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n +  1; }
-
-	return (n);
-}
-
-/*
- * Newer kernels have a div_u64() function but we define our own
- * to simplify portability between kernel versions.
- */
-static inline uint64_t
-__div_u64(uint64_t u, uint32_t v)
-{
-	(void) do_div(u, v);
-	return (u);
-}
-
-/*
- * Turn off missing prototypes warning for these functions. They are
- * replacements for libgcc-provided functions and will never be called
- * directly.
- */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wmissing-prototypes"
-
-/*
- * Implementation of 64-bit unsigned division for 32-bit machines.
- *
- * First the procedure takes care of the case in which the divisor is a
- * 32-bit quantity. There are two subcases: (1) If the left half of the
- * dividend is less than the divisor, one execution of do_div() is all that
- * is required (overflow is not possible). (2) Otherwise it does two
- * divisions, using the grade school method.
- */
-uint64_t
-__udivdi3(uint64_t u, uint64_t v)
-{
-	uint64_t u0, u1, v1, q0, q1, k;
-	int n;
-
-	if (v >> 32 == 0) {			// If v < 2**32:
-		if (u >> 32 < v) {		// If u/v cannot overflow,
-			return (__div_u64(u, v)); // just do one division.
-		} else {			// If u/v would overflow:
-			u1 = u >> 32;		// Break u into two halves.
-			u0 = u & 0xFFFFFFFF;
-			q1 = __div_u64(u1, v);	// First quotient digit.
-			k  = u1 - q1 * v;	// First remainder, < v.
-			u0 += (k << 32);
-			q0 = __div_u64(u0, v);	// Seconds quotient digit.
-			return ((q1 << 32) + q0);
-		}
-	} else {				// If v >= 2**32:
-		n = nlz64(v);			// 0 <= n <= 31.
-		v1 = (v << n) >> 32;		// Normalize divisor, MSB is 1.
-		u1 = u >> 1;			// To ensure no overflow.
-		q1 = __div_u64(u1, v1);		// Get quotient from
-		q0 = (q1 << n) >> 31;		// Undo normalization and
-						// division of u by 2.
-		if (q0 != 0)			// Make q0 correct or
-			q0 = q0 - 1;		// too small by 1.
-		if ((u - q0 * v) >= v)
-			q0 = q0 + 1;		// Now q0 is correct.
-
-		return (q0);
-	}
-}
-EXPORT_SYMBOL(__udivdi3);
-
-/* BEGIN CSTYLED */
-#ifndef abs64
-#define	abs64(x)	({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
-#endif
-/* END CSTYLED */
-
-/*
- * Implementation of 64-bit signed division for 32-bit machines.
- */
-int64_t
-__divdi3(int64_t u, int64_t v)
-{
-	int64_t q, t;
-	// cppcheck-suppress shiftTooManyBitsSigned
-	q = __udivdi3(abs64(u), abs64(v));
-	// cppcheck-suppress shiftTooManyBitsSigned
-	t = (u ^ v) >> 63;	// If u, v have different
-	return ((q ^ t) - t);	// signs, negate q.
-}
-EXPORT_SYMBOL(__divdi3);
-
-/*
- * Implementation of 64-bit unsigned modulo for 32-bit machines.
- */
-uint64_t
-__umoddi3(uint64_t dividend, uint64_t divisor)
-{
-	return (dividend - (divisor * __udivdi3(dividend, divisor)));
-}
-EXPORT_SYMBOL(__umoddi3);
-
-/* 64-bit signed modulo for 32-bit machines. */
-int64_t
-__moddi3(int64_t n, int64_t d)
-{
-	int64_t q;
-	boolean_t nn = B_FALSE;
-
-	if (n < 0) {
-		nn = B_TRUE;
-		n = -n;
-	}
-	if (d < 0)
-		d = -d;
-
-	q = __umoddi3(n, d);
-
-	return (nn ? -q : q);
-}
-EXPORT_SYMBOL(__moddi3);
-
-/*
- * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
- */
-uint64_t
-__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
-{
-	uint64_t q = __udivdi3(n, d);
-	if (r)
-		*r = n - d * q;
-	return (q);
-}
-EXPORT_SYMBOL(__udivmoddi4);
-
-/*
- * Implementation of 64-bit signed division/modulo for 32-bit machines.
- */
-int64_t
-__divmoddi4(int64_t n, int64_t d, int64_t *r)
-{
-	int64_t q, rr;
-	boolean_t nn = B_FALSE;
-	boolean_t nd = B_FALSE;
-	if (n < 0) {
-		nn = B_TRUE;
-		n = -n;
-	}
-	if (d < 0) {
-		nd = B_TRUE;
-		d = -d;
-	}
-
-	q = __udivmoddi4(n, d, (uint64_t *)&rr);
-
-	if (nn != nd)
-		q = -q;
-	if (nn)
-		rr = -rr;
-	if (r)
-		*r = rr;
-	return (q);
-}
-EXPORT_SYMBOL(__divmoddi4);
-
-#if defined(__arm) || defined(__arm__)
-/*
- * Implementation of 64-bit (un)signed division for 32-bit arm machines.
- *
- * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
- * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
- * and the remainder in {r2, r3}.  The return type is specifically left
- * set to 'void' to ensure the compiler does not overwrite these registers
- * during the return.  All results are in registers as per ABI
- */
-void
-__aeabi_uldivmod(uint64_t u, uint64_t v)
-{
-	uint64_t res;
-	uint64_t mod;
-
-	res = __udivdi3(u, v);
-	mod = __umoddi3(u, v);
-	{
-		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
-		register uint32_t r1 asm("r1") = (res >> 32);
-		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
-		register uint32_t r3 asm("r3") = (mod >> 32);
-
-		/* BEGIN CSTYLED */
-		asm volatile(""
-		    : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
-		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
-		/* END CSTYLED */
-
-		return; /* r0; */
-	}
-}
-EXPORT_SYMBOL(__aeabi_uldivmod);
-
-void
-__aeabi_ldivmod(int64_t u, int64_t v)
-{
-	int64_t res;
-	uint64_t mod;
-
-	res =  __divdi3(u, v);
-	mod = __umoddi3(u, v);
-	{
-		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
-		register uint32_t r1 asm("r1") = (res >> 32);
-		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
-		register uint32_t r3 asm("r3") = (mod >> 32);
-
-		/* BEGIN CSTYLED */
-		asm volatile(""
-		    : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
-		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
-		/* END CSTYLED */
-
-		return; /* r0; */
-	}
-}
-EXPORT_SYMBOL(__aeabi_ldivmod);
-#endif /* __arm || __arm__ */
-
-#pragma GCC diagnostic pop
-
-#endif /* BITS_PER_LONG */
-
-/*
- * NOTE: The strtoxx behavior is solely based on my reading of the Solaris
- * ddi_strtol(9F) man page.  I have not verified the behavior of these
- * functions against their Solaris counterparts.  It is possible that I
- * may have misinterpreted the man page or the man page is incorrect.
- */
-int ddi_strtoul(const char *, char **, int, unsigned long *);
-int ddi_strtol(const char *, char **, int, long *);
-int ddi_strtoull(const char *, char **, int, unsigned long long *);
-int ddi_strtoll(const char *, char **, int, long long *);
-
-#define	define_ddi_strtoux(type, valtype)				\
-int ddi_strtou##type(const char *str, char **endptr,			\
-    int base, valtype *result)						\
-{									\
-	valtype last_value, value = 0;					\
-	char *ptr = (char *)str;					\
-	int flag = 1, digit;						\
-									\
-	if (strlen(ptr) == 0)						\
-		return (EINVAL);					\
-									\
-	/* Auto-detect base based on prefix */				\
-	if (!base) {							\
-		if (str[0] == '0') {					\
-			if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \
-				base = 16; /* hex */			\
-				ptr += 2;				\
-			} else if (str[1] >= '0' && str[1] < 8) {	\
-				base = 8; /* octal */			\
-				ptr += 1;				\
-			} else {					\
-				return (EINVAL);			\
-			}						\
-		} else {						\
-			base = 10; /* decimal */			\
-		}							\
-	}								\
-									\
-	while (1) {							\
-		if (isdigit(*ptr))					\
-			digit = *ptr - '0';				\
-		else if (isalpha(*ptr))					\
-			digit = tolower(*ptr) - 'a' + 10;		\
-		else							\
-			break;						\
-									\
-		if (digit >= base)					\
-			break;						\
-									\
-		last_value = value;					\
-		value = value * base + digit;				\
-		if (last_value > value) /* Overflow */			\
-			return (ERANGE);				\
-									\
-		flag = 1;						\
-		ptr++;							\
-	}								\
-									\
-	if (flag)							\
-		*result = value;					\
-									\
-	if (endptr)							\
-		*endptr = (char *)(flag ? ptr : str);			\
-									\
-	return (0);							\
-}									\
-
-#define	define_ddi_strtox(type, valtype)				\
-int ddi_strto##type(const char *str, char **endptr,			\
-    int base, valtype *result)						\
-{									\
-	int rc;								\
-									\
-	if (*str == '-') {						\
-		rc = ddi_strtou##type(str + 1, endptr, base, result);	\
-		if (!rc) {						\
-			if (*endptr == str + 1)				\
-				*endptr = (char *)str;			\
-			else						\
-				*result = -*result;			\
-		}							\
-	} else {							\
-		rc = ddi_strtou##type(str, endptr, base, result);	\
-	}								\
-									\
-	return (rc);							\
-}
-
-define_ddi_strtoux(l, unsigned long)
-define_ddi_strtox(l, long)
-define_ddi_strtoux(ll, unsigned long long)
-define_ddi_strtox(ll, long long)
-
-EXPORT_SYMBOL(ddi_strtoul);
-EXPORT_SYMBOL(ddi_strtol);
-EXPORT_SYMBOL(ddi_strtoll);
-EXPORT_SYMBOL(ddi_strtoull);
-
-int
-ddi_copyin(const void *from, void *to, size_t len, int flags)
-{
-	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
-	if (flags & FKIOCTL) {
-		memcpy(to, from, len);
-		return (0);
-	}
-
-	return (copyin(from, to, len));
-}
-EXPORT_SYMBOL(ddi_copyin);
-
-int
-ddi_copyout(const void *from, void *to, size_t len, int flags)
-{
-	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
-	if (flags & FKIOCTL) {
-		memcpy(to, from, len);
-		return (0);
-	}
-
-	return (copyout(from, to, len));
-}
-EXPORT_SYMBOL(ddi_copyout);
-
-/*
- * Read the unique system identifier from the /etc/hostid file.
- *
- * The behavior of /usr/bin/hostid on Linux systems with the
- * regular eglibc and coreutils is:
- *
- *   1. Generate the value if the /etc/hostid file does not exist
- *      or if the /etc/hostid file is less than four bytes in size.
- *
- *   2. If the /etc/hostid file is at least 4 bytes, then return
- *      the first four bytes [0..3] in native endian order.
- *
- *   3. Always ignore bytes [4..] if they exist in the file.
- *
- * Only the first four bytes are significant, even on systems that
- * have a 64-bit word size.
- *
- * See:
- *
- *   eglibc: sysdeps/unix/sysv/linux/gethostid.c
- *   coreutils: src/hostid.c
- *
- * Notes:
- *
- * The /etc/hostid file on Solaris is a text file that often reads:
- *
- *   # DO NOT EDIT
- *   "0123456789"
- *
- * Directly copying this file to Linux results in a constant
- * hostid of 4f442023 because the default comment constitutes
- * the first four bytes of the file.
- *
- */
-
-char *spl_hostid_path = HW_HOSTID_PATH;
-module_param(spl_hostid_path, charp, 0444);
-MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
-
-static int
-hostid_read(uint32_t *hostid)
-{
-	uint64_t size;
-	struct _buf *file;
-	uint32_t value = 0;
-	int error;
-
-	file = kobj_open_file(spl_hostid_path);
-	if (file == (struct _buf *)-1)
-		return (ENOENT);
-
-	error = kobj_get_filesize(file, &size);
-	if (error) {
-		kobj_close_file(file);
-		return (error);
-	}
-
-	if (size < sizeof (HW_HOSTID_MASK)) {
-		kobj_close_file(file);
-		return (EINVAL);
-	}
-
-	/*
-	 * Read directly into the variable like eglibc does.
-	 * Short reads are okay; native behavior is preserved.
-	 */
-	error = kobj_read_file(file, (char *)&value, sizeof (value), 0);
-	if (error < 0) {
-		kobj_close_file(file);
-		return (EIO);
-	}
-
-	/* Mask down to 32 bits like coreutils does. */
-	*hostid = (value & HW_HOSTID_MASK);
-	kobj_close_file(file);
-
-	return (0);
-}
-
-/*
- * Return the system hostid.  Preferentially use the spl_hostid module option
- * when set, otherwise use the value in the /etc/hostid file.
- */
-uint32_t
-zone_get_hostid(void *zone)
-{
-	uint32_t hostid;
-
-	ASSERT3P(zone, ==, NULL);
-
-	if (spl_hostid != 0)
-		return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
-
-	if (hostid_read(&hostid) == 0)
-		return (hostid);
-
-	return (0);
-}
-EXPORT_SYMBOL(zone_get_hostid);
-
-static int
-spl_kvmem_init(void)
-{
-	int rc = 0;
-
-	rc = spl_kmem_init();
-	if (rc)
-		return (rc);
-
-	rc = spl_vmem_init();
-	if (rc) {
-		spl_kmem_fini();
-		return (rc);
-	}
-
-	return (rc);
-}
-
-/*
- * We initialize the random number generator with 128 bits of entropy from the
- * system random number generator. In the improbable case that we have a zero
- * seed, we fallback to the system jiffies, unless it is also zero, in which
- * situation we use a preprogrammed seed. We step forward by 2^64 iterations to
- * initialize each of the per-cpu seeds so that the sequences generated on each
- * CPU are guaranteed to never overlap in practice.
- */
-static void __init
-spl_random_init(void)
-{
-	uint64_t s[2];
-	int i = 0;
-
-	get_random_bytes(s, sizeof (s));
-
-	if (s[0] == 0 && s[1] == 0) {
-		if (jiffies != 0) {
-			s[0] = jiffies;
-			s[1] = ~0 - jiffies;
-		} else {
-			(void) memcpy(s, "improbable seed", sizeof (s));
-		}
-		printk("SPL: get_random_bytes() returned 0 "
-		    "when generating random seed. Setting initial seed to "
-		    "0x%016llx%016llx.\n", cpu_to_be64(s[0]),
-		    cpu_to_be64(s[1]));
-	}
-
-	for_each_possible_cpu(i) {
-		uint64_t *wordp = per_cpu(spl_pseudo_entropy, i);
-
-		spl_rand_jump(s);
-
-		wordp[0] = s[0];
-		wordp[1] = s[1];
-	}
-}
-
-static void
-spl_kvmem_fini(void)
-{
-	spl_vmem_fini();
-	spl_kmem_fini();
-}
-
-static int __init
-spl_init(void)
-{
-	int rc = 0;
-
-	bzero(&p0, sizeof (proc_t));
-	spl_random_init();
-
-	if ((rc = spl_kvmem_init()))
-		goto out1;
-
-	if ((rc = spl_tsd_init()))
-		goto out2;
-
-	if ((rc = spl_taskq_init()))
-		goto out3;
-
-	if ((rc = spl_kmem_cache_init()))
-		goto out4;
-
-	if ((rc = spl_vn_init()))
-		goto out5;
-
-	if ((rc = spl_proc_init()))
-		goto out6;
-
-	if ((rc = spl_kstat_init()))
-		goto out7;
-
-	if ((rc = spl_zlib_init()))
-		goto out8;
-
-	return (rc);
-
-out8:
-	spl_kstat_fini();
-out7:
-	spl_proc_fini();
-out6:
-	spl_vn_fini();
-out5:
-	spl_kmem_cache_fini();
-out4:
-	spl_taskq_fini();
-out3:
-	spl_tsd_fini();
-out2:
-	spl_kvmem_fini();
-out1:
-	return (rc);
-}
-
-static void __exit
-spl_fini(void)
-{
-	spl_zlib_fini();
-	spl_kstat_fini();
-	spl_proc_fini();
-	spl_vn_fini();
-	spl_kmem_cache_fini();
-	spl_taskq_fini();
-	spl_tsd_fini();
-	spl_kvmem_fini();
-}
-
-module_init(spl_init);
-module_exit(spl_fini);
-
-MODULE_DESCRIPTION("Solaris Porting Layer");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE("GPL");
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);

diff --git a/zfs/module/spl/spl-kmem-cache.c b/zfs/module/spl/spl-kmem-cache.c
deleted file mode 100644
index 4866b29..0000000
--- a/zfs/module/spl/spl-kmem-cache.c
+++ /dev/null

@@ -1,1798 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/shrinker.h>
-#include <sys/taskq.h>
-#include <sys/timer.h>
-#include <sys/vmem.h>
-#include <sys/wait.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/prefetch.h>
-
-/*
- * Within the scope of spl-kmem.c file the kmem_cache_* definitions
- * are removed to allow access to the real Linux slab allocator.
- */
-#undef kmem_cache_destroy
-#undef kmem_cache_create
-#undef kmem_cache_alloc
-#undef kmem_cache_free
-
-
-/*
- * Linux 3.16 replaced smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}()
- * with smp_mb__{before,after}_atomic() because they were redundant. This is
- * only used inside our SLAB allocator, so we implement an internal wrapper
- * here to give us smp_mb__{before,after}_atomic() on older kernels.
- */
-#ifndef smp_mb__before_atomic
-#define	smp_mb__before_atomic(x) smp_mb__before_clear_bit(x)
-#endif
-
-#ifndef smp_mb__after_atomic
-#define	smp_mb__after_atomic(x) smp_mb__after_clear_bit(x)
-#endif
-
-/*
- * Cache expiration was implemented because it was part of the default Solaris
- * kmem_cache behavior.  The idea is that per-cpu objects which haven't been
- * accessed in several seconds should be returned to the cache.  On the other
- * hand Linux slabs never move objects back to the slabs unless there is
- * memory pressure on the system.  By default the Linux method is enabled
- * because it has been shown to improve responsiveness on low memory systems.
- * This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
- */
-/* BEGIN CSTYLED */
-unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;
-EXPORT_SYMBOL(spl_kmem_cache_expire);
-module_param(spl_kmem_cache_expire, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
-
-/*
- * Cache magazines are an optimization designed to minimize the cost of
- * allocating memory.  They do this by keeping a per-cpu cache of recently
- * freed objects, which can then be reallocated without taking a lock. This
- * can improve performance on highly contended caches.  However, because
- * objects in magazines will prevent otherwise empty slabs from being
- * immediately released this may not be ideal for low memory machines.
- *
- * For this reason spl_kmem_cache_magazine_size can be used to set a maximum
- * magazine size.  When this value is set to 0 the magazine size will be
- * automatically determined based on the object size.  Otherwise magazines
- * will be limited to 2-256 objects per magazine (i.e per cpu).  Magazines
- * may never be entirely disabled in this implementation.
- */
-unsigned int spl_kmem_cache_magazine_size = 0;
-module_param(spl_kmem_cache_magazine_size, uint, 0444);
-MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
-	"Default magazine size (2-256), set automatically (0)");
-
-/*
- * The default behavior is to report the number of objects remaining in the
- * cache.  This allows the Linux VM to repeatedly reclaim objects from the
- * cache when memory is low satisfy other memory allocations.  Alternately,
- * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
- * is reclaimed.  This may increase the likelihood of out of memory events.
- */
-unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
-module_param(spl_kmem_cache_reclaim, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
-
-unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
-module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
-
-unsigned int spl_kmem_cache_obj_per_slab_min = SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN;
-module_param(spl_kmem_cache_obj_per_slab_min, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab_min,
-	"Minimal number of objects per slab");
-
-unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE;
-module_param(spl_kmem_cache_max_size, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
-
-/*
- * For small objects the Linux slab allocator should be used to make the most
- * efficient use of the memory.  However, large objects are not supported by
- * the Linux slab and therefore the SPL implementation is preferred.  A cutoff
- * of 16K was determined to be optimal for architectures using 4K pages.
- */
-#if PAGE_SIZE == 4096
-unsigned int spl_kmem_cache_slab_limit = 16384;
-#else
-unsigned int spl_kmem_cache_slab_limit = 0;
-#endif
-module_param(spl_kmem_cache_slab_limit, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
-	"Objects less than N bytes use the Linux slab");
-
-/*
- * This value defaults to a threshold designed to avoid allocations which
- * have been deemed costly by the kernel.
- */
-unsigned int spl_kmem_cache_kmem_limit =
-	((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
-	SPL_KMEM_CACHE_OBJ_PER_SLAB;
-module_param(spl_kmem_cache_kmem_limit, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
-	"Objects less than N bytes use the kmalloc");
-
-/*
- * The number of threads available to allocate new slabs for caches.  This
- * should not need to be tuned but it is available for performance analysis.
- */
-unsigned int spl_kmem_cache_kmem_threads = 4;
-module_param(spl_kmem_cache_kmem_threads, uint, 0444);
-MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
-	"Number of spl_kmem_cache threads");
-/* END CSTYLED */
-
-/*
- * Slab allocation interfaces
- *
- * While the Linux slab implementation was inspired by the Solaris
- * implementation I cannot use it to emulate the Solaris APIs.  I
- * require two features which are not provided by the Linux slab.
- *
- * 1) Constructors AND destructors.  Recent versions of the Linux
- *    kernel have removed support for destructors.  This is a deal
- *    breaker for the SPL which contains particularly expensive
- *    initializers for mutex's, condition variables, etc.  We also
- *    require a minimal level of cleanup for these data types unlike
- *    many Linux data types which do need to be explicitly destroyed.
- *
- * 2) Virtual address space backed slab.  Callers of the Solaris slab
- *    expect it to work well for both small are very large allocations.
- *    Because of memory fragmentation the Linux slab which is backed
- *    by kmalloc'ed memory performs very badly when confronted with
- *    large numbers of large allocations.  Basing the slab on the
- *    virtual address space removes the need for contiguous pages
- *    and greatly improve performance for large allocations.
- *
- * For these reasons, the SPL has its own slab implementation with
- * the needed features.  It is not as highly optimized as either the
- * Solaris or Linux slabs, but it should get me most of what is
- * needed until it can be optimized or obsoleted by another approach.
- *
- * One serious concern I do have about this method is the relatively
- * small virtual address space on 32bit arches.  This will seriously
- * constrain the size of the slab caches and their performance.
- */
-
-struct list_head spl_kmem_cache_list;   /* List of caches */
-struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
-taskq_t *spl_kmem_cache_taskq;		/* Task queue for aging / reclaim */
-
-static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
-
-SPL_SHRINKER_CALLBACK_FWD_DECLARE(spl_kmem_cache_generic_shrinker);
-SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker,
-	spl_kmem_cache_generic_shrinker, KMC_DEFAULT_SEEKS);
-
-static void *
-kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
-{
-	gfp_t lflags = kmem_flags_convert(flags);
-	void *ptr;
-
-	if (skc->skc_flags & KMC_KMEM) {
-		ASSERT(ISP2(size));
-		ptr = (void *)__get_free_pages(lflags, get_order(size));
-	} else {
-		ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
-	}
-
-	/* Resulting allocated memory will be page aligned */
-	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
-
-	return (ptr);
-}
-
-static void
-kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
-{
-	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
-
-	/*
-	 * The Linux direct reclaim path uses this out of band value to
-	 * determine if forward progress is being made.  Normally this is
-	 * incremented by kmem_freepages() which is part of the various
-	 * Linux slab implementations.  However, since we are using none
-	 * of that infrastructure we are responsible for incrementing it.
-	 */
-	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
-
-	if (skc->skc_flags & KMC_KMEM) {
-		ASSERT(ISP2(size));
-		free_pages((unsigned long)ptr, get_order(size));
-	} else {
-		vfree(ptr);
-	}
-}
-
-/*
- * Required space for each aligned sks.
- */
-static inline uint32_t
-spl_sks_size(spl_kmem_cache_t *skc)
-{
-	return (P2ROUNDUP_TYPED(sizeof (spl_kmem_slab_t),
-	    skc->skc_obj_align, uint32_t));
-}
-
-/*
- * Required space for each aligned object.
- */
-static inline uint32_t
-spl_obj_size(spl_kmem_cache_t *skc)
-{
-	uint32_t align = skc->skc_obj_align;
-
-	return (P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
-	    P2ROUNDUP_TYPED(sizeof (spl_kmem_obj_t), align, uint32_t));
-}
-
-/*
- * Lookup the spl_kmem_object_t for an object given that object.
- */
-static inline spl_kmem_obj_t *
-spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
-{
-	return (obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
-	    skc->skc_obj_align, uint32_t));
-}
-
-/*
- * Required space for each offslab object taking in to account alignment
- * restrictions and the power-of-two requirement of kv_alloc().
- */
-static inline uint32_t
-spl_offslab_size(spl_kmem_cache_t *skc)
-{
-	return (1UL << (fls64(spl_obj_size(skc)) + 1));
-}
-
-/*
- * It's important that we pack the spl_kmem_obj_t structure and the
- * actual objects in to one large address space to minimize the number
- * of calls to the allocator.  It is far better to do a few large
- * allocations and then subdivide it ourselves.  Now which allocator
- * we use requires balancing a few trade offs.
- *
- * For small objects we use kmem_alloc() because as long as you are
- * only requesting a small number of pages (ideally just one) its cheap.
- * However, when you start requesting multiple pages with kmem_alloc()
- * it gets increasingly expensive since it requires contiguous pages.
- * For this reason we shift to vmem_alloc() for slabs of large objects
- * which removes the need for contiguous pages.  We do not use
- * vmem_alloc() in all cases because there is significant locking
- * overhead in __get_vm_area_node().  This function takes a single
- * global lock when acquiring an available virtual address range which
- * serializes all vmem_alloc()'s for all slab caches.  Using slightly
- * different allocation functions for small and large objects should
- * give us the best of both worlds.
- *
- * KMC_ONSLAB                       KMC_OFFSLAB
- *
- * +------------------------+       +-----------------+
- * | spl_kmem_slab_t --+-+  |       | spl_kmem_slab_t |---+-+
- * | skc_obj_size    <-+ |  |       +-----------------+   | |
- * | spl_kmem_obj_t      |  |                             | |
- * | skc_obj_size    <---+  |       +-----------------+   | |
- * | spl_kmem_obj_t      |  |       | skc_obj_size    | <-+ |
- * | ...                 v  |       | spl_kmem_obj_t  |     |
- * +------------------------+       +-----------------+     v
- */
-static spl_kmem_slab_t *
-spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
-{
-	spl_kmem_slab_t *sks;
-	spl_kmem_obj_t *sko;
-	void *base, *obj;
-	uint32_t obj_size, offslab_size = 0;
-	int i,  rc = 0;
-
-	base = kv_alloc(skc, skc->skc_slab_size, flags);
-	if (base == NULL)
-		return (NULL);
-
-	sks = (spl_kmem_slab_t *)base;
-	sks->sks_magic = SKS_MAGIC;
-	sks->sks_objs = skc->skc_slab_objs;
-	sks->sks_age = jiffies;
-	sks->sks_cache = skc;
-	INIT_LIST_HEAD(&sks->sks_list);
-	INIT_LIST_HEAD(&sks->sks_free_list);
-	sks->sks_ref = 0;
-	obj_size = spl_obj_size(skc);
-
-	if (skc->skc_flags & KMC_OFFSLAB)
-		offslab_size = spl_offslab_size(skc);
-
-	for (i = 0; i < sks->sks_objs; i++) {
-		if (skc->skc_flags & KMC_OFFSLAB) {
-			obj = kv_alloc(skc, offslab_size, flags);
-			if (!obj) {
-				rc = -ENOMEM;
-				goto out;
-			}
-		} else {
-			obj = base + spl_sks_size(skc) + (i * obj_size);
-		}
-
-		ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
-		sko = spl_sko_from_obj(skc, obj);
-		sko->sko_addr = obj;
-		sko->sko_magic = SKO_MAGIC;
-		sko->sko_slab = sks;
-		INIT_LIST_HEAD(&sko->sko_list);
-		list_add_tail(&sko->sko_list, &sks->sks_free_list);
-	}
-
-out:
-	if (rc) {
-		spl_kmem_obj_t *n = NULL;
-		if (skc->skc_flags & KMC_OFFSLAB)
-			list_for_each_entry_safe(sko,
-			    n, &sks->sks_free_list, sko_list) {
-				kv_free(skc, sko->sko_addr, offslab_size);
-			}
-
-		kv_free(skc, base, skc->skc_slab_size);
-		sks = NULL;
-	}
-
-	return (sks);
-}
-
-/*
- * Remove a slab from complete or partial list, it must be called with
- * the 'skc->skc_lock' held but the actual free must be performed
- * outside the lock to prevent deadlocking on vmem addresses.
- */
-static void
-spl_slab_free(spl_kmem_slab_t *sks,
-    struct list_head *sks_list, struct list_head *sko_list)
-{
-	spl_kmem_cache_t *skc;
-
-	ASSERT(sks->sks_magic == SKS_MAGIC);
-	ASSERT(sks->sks_ref == 0);
-
-	skc = sks->sks_cache;
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-
-	/*
-	 * Update slab/objects counters in the cache, then remove the
-	 * slab from the skc->skc_partial_list.  Finally add the slab
-	 * and all its objects in to the private work lists where the
-	 * destructors will be called and the memory freed to the system.
-	 */
-	skc->skc_obj_total -= sks->sks_objs;
-	skc->skc_slab_total--;
-	list_del(&sks->sks_list);
-	list_add(&sks->sks_list, sks_list);
-	list_splice_init(&sks->sks_free_list, sko_list);
-}
-
-/*
- * Reclaim empty slabs at the end of the partial list.
- */
-static void
-spl_slab_reclaim(spl_kmem_cache_t *skc)
-{
-	spl_kmem_slab_t *sks = NULL, *m = NULL;
-	spl_kmem_obj_t *sko = NULL, *n = NULL;
-	LIST_HEAD(sks_list);
-	LIST_HEAD(sko_list);
-	uint32_t size = 0;
-
-	/*
-	 * Empty slabs and objects must be moved to a private list so they
-	 * can be safely freed outside the spin lock.  All empty slabs are
-	 * at the end of skc->skc_partial_list, therefore once a non-empty
-	 * slab is found we can stop scanning.
-	 */
-	spin_lock(&skc->skc_lock);
-	list_for_each_entry_safe_reverse(sks, m,
-	    &skc->skc_partial_list, sks_list) {
-
-		if (sks->sks_ref > 0)
-			break;
-
-		spl_slab_free(sks, &sks_list, &sko_list);
-	}
-	spin_unlock(&skc->skc_lock);
-
-	/*
-	 * The following two loops ensure all the object destructors are
-	 * run, any offslab objects are freed, and the slabs themselves
-	 * are freed.  This is all done outside the skc->skc_lock since
-	 * this allows the destructor to sleep, and allows us to perform
-	 * a conditional reschedule when a freeing a large number of
-	 * objects and slabs back to the system.
-	 */
-	if (skc->skc_flags & KMC_OFFSLAB)
-		size = spl_offslab_size(skc);
-
-	list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
-		ASSERT(sko->sko_magic == SKO_MAGIC);
-
-		if (skc->skc_flags & KMC_OFFSLAB)
-			kv_free(skc, sko->sko_addr, size);
-	}
-
-	list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
-		ASSERT(sks->sks_magic == SKS_MAGIC);
-		kv_free(skc, sks, skc->skc_slab_size);
-	}
-}
-
-static spl_kmem_emergency_t *
-spl_emergency_search(struct rb_root *root, void *obj)
-{
-	struct rb_node *node = root->rb_node;
-	spl_kmem_emergency_t *ske;
-	unsigned long address = (unsigned long)obj;
-
-	while (node) {
-		ske = container_of(node, spl_kmem_emergency_t, ske_node);
-
-		if (address < ske->ske_obj)
-			node = node->rb_left;
-		else if (address > ske->ske_obj)
-			node = node->rb_right;
-		else
-			return (ske);
-	}
-
-	return (NULL);
-}
-
-static int
-spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
-{
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
-	spl_kmem_emergency_t *ske_tmp;
-	unsigned long address = ske->ske_obj;
-
-	while (*new) {
-		ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
-
-		parent = *new;
-		if (address < ske_tmp->ske_obj)
-			new = &((*new)->rb_left);
-		else if (address > ske_tmp->ske_obj)
-			new = &((*new)->rb_right);
-		else
-			return (0);
-	}
-
-	rb_link_node(&ske->ske_node, parent, new);
-	rb_insert_color(&ske->ske_node, root);
-
-	return (1);
-}
-
-/*
- * Allocate a single emergency object and track it in a red black tree.
- */
-static int
-spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
-{
-	gfp_t lflags = kmem_flags_convert(flags);
-	spl_kmem_emergency_t *ske;
-	int order = get_order(skc->skc_obj_size);
-	int empty;
-
-	/* Last chance use a partial slab if one now exists */
-	spin_lock(&skc->skc_lock);
-	empty = list_empty(&skc->skc_partial_list);
-	spin_unlock(&skc->skc_lock);
-	if (!empty)
-		return (-EEXIST);
-
-	ske = kmalloc(sizeof (*ske), lflags);
-	if (ske == NULL)
-		return (-ENOMEM);
-
-	ske->ske_obj = __get_free_pages(lflags, order);
-	if (ske->ske_obj == 0) {
-		kfree(ske);
-		return (-ENOMEM);
-	}
-
-	spin_lock(&skc->skc_lock);
-	empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
-	if (likely(empty)) {
-		skc->skc_obj_total++;
-		skc->skc_obj_emergency++;
-		if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
-			skc->skc_obj_emergency_max = skc->skc_obj_emergency;
-	}
-	spin_unlock(&skc->skc_lock);
-
-	if (unlikely(!empty)) {
-		free_pages(ske->ske_obj, order);
-		kfree(ske);
-		return (-EINVAL);
-	}
-
-	*obj = (void *)ske->ske_obj;
-
-	return (0);
-}
-
-/*
- * Locate the passed object in the red black tree and free it.
- */
-static int
-spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
-{
-	spl_kmem_emergency_t *ske;
-	int order = get_order(skc->skc_obj_size);
-
-	spin_lock(&skc->skc_lock);
-	ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
-	if (ske) {
-		rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
-		skc->skc_obj_emergency--;
-		skc->skc_obj_total--;
-	}
-	spin_unlock(&skc->skc_lock);
-
-	if (ske == NULL)
-		return (-ENOENT);
-
-	free_pages(ske->ske_obj, order);
-	kfree(ske);
-
-	return (0);
-}
-
-/*
- * Release objects from the per-cpu magazine back to their slab.  The flush
- * argument contains the max number of entries to remove from the magazine.
- */
-static void
-__spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
-{
-	int i, count = MIN(flush, skm->skm_avail);
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(skm->skm_magic == SKM_MAGIC);
-
-	for (i = 0; i < count; i++)
-		spl_cache_shrink(skc, skm->skm_objs[i]);
-
-	skm->skm_avail -= count;
-	memmove(skm->skm_objs, &(skm->skm_objs[count]),
-	    sizeof (void *) * skm->skm_avail);
-}
-
-static void
-spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
-{
-	spin_lock(&skc->skc_lock);
-	__spl_cache_flush(skc, skm, flush);
-	spin_unlock(&skc->skc_lock);
-}
-
-static void
-spl_magazine_age(void *data)
-{
-	spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
-	spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
-
-	ASSERT(skm->skm_magic == SKM_MAGIC);
-	ASSERT(skm->skm_cpu == smp_processor_id());
-	ASSERT(irqs_disabled());
-
-	/* There are no available objects or they are too young to age out */
-	if ((skm->skm_avail == 0) ||
-	    time_before(jiffies, skm->skm_age + skc->skc_delay * HZ))
-		return;
-
-	/*
-	 * Because we're executing in interrupt context we may have
-	 * interrupted the holder of this lock.  To avoid a potential
-	 * deadlock return if the lock is contended.
-	 */
-	if (!spin_trylock(&skc->skc_lock))
-		return;
-
-	__spl_cache_flush(skc, skm, skm->skm_refill);
-	spin_unlock(&skc->skc_lock);
-}
-
-/*
- * Called regularly to keep a downward pressure on the cache.
- *
- * Objects older than skc->skc_delay seconds in the per-cpu magazines will
- * be returned to the caches.  This is done to prevent idle magazines from
- * holding memory which could be better used elsewhere.  The delay is
- * present to prevent thrashing the magazine.
- *
- * The newly released objects may result in empty partial slabs.  Those
- * slabs should be released to the system.  Otherwise moving the objects
- * out of the magazines is just wasted work.
- */
-static void
-spl_cache_age(void *data)
-{
-	spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
-	taskqid_t id = 0;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-
-	/* Dynamically disabled at run time */
-	if (!(spl_kmem_cache_expire & KMC_EXPIRE_AGE))
-		return;
-
-	atomic_inc(&skc->skc_ref);
-
-	if (!(skc->skc_flags & KMC_NOMAGAZINE))
-		on_each_cpu(spl_magazine_age, skc, 1);
-
-	spl_slab_reclaim(skc);
-
-	while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
-		id = taskq_dispatch_delay(
-		    spl_kmem_cache_taskq, spl_cache_age, skc, TQ_SLEEP,
-		    ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
-
-		/* Destroy issued after dispatch immediately cancel it */
-		if (test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && id)
-			taskq_cancel_id(spl_kmem_cache_taskq, id);
-	}
-
-	spin_lock(&skc->skc_lock);
-	skc->skc_taskqid = id;
-	spin_unlock(&skc->skc_lock);
-
-	atomic_dec(&skc->skc_ref);
-}
-
-/*
- * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
- * When on-slab we want to target spl_kmem_cache_obj_per_slab.  However,
- * for very small objects we may end up with more than this so as not
- * to waste space in the minimal allocation of a single page.  Also for
- * very large objects we may use as few as spl_kmem_cache_obj_per_slab_min,
- * lower than this and we will fail.
- */
-static int
-spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
-{
-	uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs;
-
-	if (skc->skc_flags & KMC_OFFSLAB) {
-		tgt_objs = spl_kmem_cache_obj_per_slab;
-		tgt_size = P2ROUNDUP(sizeof (spl_kmem_slab_t), PAGE_SIZE);
-
-		if ((skc->skc_flags & KMC_KMEM) &&
-		    (spl_obj_size(skc) > (SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE)))
-			return (-ENOSPC);
-	} else {
-		sks_size = spl_sks_size(skc);
-		obj_size = spl_obj_size(skc);
-		max_size = (spl_kmem_cache_max_size * 1024 * 1024);
-		tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size);
-
-		/*
-		 * KMC_KMEM slabs are allocated by __get_free_pages() which
-		 * rounds up to the nearest order.  Knowing this the size
-		 * should be rounded up to the next power of two with a hard
-		 * maximum defined by the maximum allowed allocation order.
-		 */
-		if (skc->skc_flags & KMC_KMEM) {
-			max_size = SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE;
-			tgt_size = MIN(max_size,
-			    PAGE_SIZE * (1 << MAX(get_order(tgt_size) - 1, 1)));
-		}
-
-		if (tgt_size <= max_size) {
-			tgt_objs = (tgt_size - sks_size) / obj_size;
-		} else {
-			tgt_objs = (max_size - sks_size) / obj_size;
-			tgt_size = (tgt_objs * obj_size) + sks_size;
-		}
-	}
-
-	if (tgt_objs == 0)
-		return (-ENOSPC);
-
-	*objs = tgt_objs;
-	*size = tgt_size;
-
-	return (0);
-}
-
-/*
- * Make a guess at reasonable per-cpu magazine size based on the size of
- * each object and the cost of caching N of them in each magazine.  Long
- * term this should really adapt based on an observed usage heuristic.
- */
-static int
-spl_magazine_size(spl_kmem_cache_t *skc)
-{
-	uint32_t obj_size = spl_obj_size(skc);
-	int size;
-
-	if (spl_kmem_cache_magazine_size > 0)
-		return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
-
-	/* Per-magazine sizes below assume a 4Kib page size */
-	if (obj_size > (PAGE_SIZE * 256))
-		size = 4;  /* Minimum 4Mib per-magazine */
-	else if (obj_size > (PAGE_SIZE * 32))
-		size = 16; /* Minimum 2Mib per-magazine */
-	else if (obj_size > (PAGE_SIZE))
-		size = 64; /* Minimum 256Kib per-magazine */
-	else if (obj_size > (PAGE_SIZE / 4))
-		size = 128; /* Minimum 128Kib per-magazine */
-	else
-		size = 256;
-
-	return (size);
-}
-
-/*
- * Allocate a per-cpu magazine to associate with a specific core.
- */
-static spl_kmem_magazine_t *
-spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
-{
-	spl_kmem_magazine_t *skm;
-	int size = sizeof (spl_kmem_magazine_t) +
-	    sizeof (void *) * skc->skc_mag_size;
-
-	skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
-	if (skm) {
-		skm->skm_magic = SKM_MAGIC;
-		skm->skm_avail = 0;
-		skm->skm_size = skc->skc_mag_size;
-		skm->skm_refill = skc->skc_mag_refill;
-		skm->skm_cache = skc;
-		skm->skm_age = jiffies;
-		skm->skm_cpu = cpu;
-	}
-
-	return (skm);
-}
-
-/*
- * Free a per-cpu magazine associated with a specific core.
- */
-static void
-spl_magazine_free(spl_kmem_magazine_t *skm)
-{
-	ASSERT(skm->skm_magic == SKM_MAGIC);
-	ASSERT(skm->skm_avail == 0);
-	kfree(skm);
-}
-
-/*
- * Create all pre-cpu magazines of reasonable sizes.
- */
-static int
-spl_magazine_create(spl_kmem_cache_t *skc)
-{
-	int i = 0;
-
-	if (skc->skc_flags & KMC_NOMAGAZINE)
-		return (0);
-
-	skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
-	    num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
-	skc->skc_mag_size = spl_magazine_size(skc);
-	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
-
-	for_each_possible_cpu(i) {
-		skc->skc_mag[i] = spl_magazine_alloc(skc, i);
-		if (!skc->skc_mag[i]) {
-			for (i--; i >= 0; i--)
-				spl_magazine_free(skc->skc_mag[i]);
-
-			kfree(skc->skc_mag);
-			return (-ENOMEM);
-		}
-	}
-
-	return (0);
-}
-
-/*
- * Destroy all pre-cpu magazines.
- */
-static void
-spl_magazine_destroy(spl_kmem_cache_t *skc)
-{
-	spl_kmem_magazine_t *skm;
-	int i = 0;
-
-	if (skc->skc_flags & KMC_NOMAGAZINE)
-		return;
-
-	for_each_possible_cpu(i) {
-		skm = skc->skc_mag[i];
-		spl_cache_flush(skc, skm, skm->skm_avail);
-		spl_magazine_free(skm);
-	}
-
-	kfree(skc->skc_mag);
-}
-
-/*
- * Create a object cache based on the following arguments:
- * name		cache name
- * size		cache object size
- * align	cache object alignment
- * ctor		cache object constructor
- * dtor		cache object destructor
- * reclaim	cache object reclaim
- * priv		cache private data for ctor/dtor/reclaim
- * vmp		unused must be NULL
- * flags
- *	KMC_KMEM	Force SPL kmem backed cache
- *	KMC_VMEM        Force SPL vmem backed cache
- *	KMC_SLAB        Force Linux slab backed cache
- *	KMC_OFFSLAB	Locate objects off the slab
- *	KMC_NOTOUCH	Disable cache object aging (unsupported)
- *	KMC_NODEBUG	Disable debugging (unsupported)
- *	KMC_NOHASH      Disable hashing (unsupported)
- *	KMC_QCACHE	Disable qcache (unsupported)
- *	KMC_NOMAGAZINE	Enabled for kmem/vmem, Disabled for Linux slab
- */
-spl_kmem_cache_t *
-spl_kmem_cache_create(char *name, size_t size, size_t align,
-    spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim,
-    void *priv, void *vmp, int flags)
-{
-	gfp_t lflags = kmem_flags_convert(KM_SLEEP);
-	spl_kmem_cache_t *skc;
-	int rc;
-
-	/*
-	 * Unsupported flags
-	 */
-	ASSERT0(flags & KMC_NOMAGAZINE);
-	ASSERT0(flags & KMC_NOHASH);
-	ASSERT0(flags & KMC_QCACHE);
-	ASSERT(vmp == NULL);
-
-	might_sleep();
-
-	skc = kzalloc(sizeof (*skc), lflags);
-	if (skc == NULL)
-		return (NULL);
-
-	skc->skc_magic = SKC_MAGIC;
-	skc->skc_name_size = strlen(name) + 1;
-	skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
-	if (skc->skc_name == NULL) {
-		kfree(skc);
-		return (NULL);
-	}
-	strncpy(skc->skc_name, name, skc->skc_name_size);
-
-	skc->skc_ctor = ctor;
-	skc->skc_dtor = dtor;
-	skc->skc_reclaim = reclaim;
-	skc->skc_private = priv;
-	skc->skc_vmp = vmp;
-	skc->skc_linux_cache = NULL;
-	skc->skc_flags = flags;
-	skc->skc_obj_size = size;
-	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
-	skc->skc_delay = SPL_KMEM_CACHE_DELAY;
-	skc->skc_reap = SPL_KMEM_CACHE_REAP;
-	atomic_set(&skc->skc_ref, 0);
-
-	INIT_LIST_HEAD(&skc->skc_list);
-	INIT_LIST_HEAD(&skc->skc_complete_list);
-	INIT_LIST_HEAD(&skc->skc_partial_list);
-	skc->skc_emergency_tree = RB_ROOT;
-	spin_lock_init(&skc->skc_lock);
-	init_waitqueue_head(&skc->skc_waitq);
-	skc->skc_slab_fail = 0;
-	skc->skc_slab_create = 0;
-	skc->skc_slab_destroy = 0;
-	skc->skc_slab_total = 0;
-	skc->skc_slab_alloc = 0;
-	skc->skc_slab_max = 0;
-	skc->skc_obj_total = 0;
-	skc->skc_obj_alloc = 0;
-	skc->skc_obj_max = 0;
-	skc->skc_obj_deadlock = 0;
-	skc->skc_obj_emergency = 0;
-	skc->skc_obj_emergency_max = 0;
-
-	/*
-	 * Verify the requested alignment restriction is sane.
-	 */
-	if (align) {
-		VERIFY(ISP2(align));
-		VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
-		VERIFY3U(align, <=, PAGE_SIZE);
-		skc->skc_obj_align = align;
-	}
-
-	/*
-	 * When no specific type of slab is requested (kmem, vmem, or
-	 * linuxslab) then select a cache type based on the object size
-	 * and default tunables.
-	 */
-	if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {
-
-		if (spl_kmem_cache_slab_limit &&
-		    size <= (size_t)spl_kmem_cache_slab_limit) {
-			/*
-			 * Objects smaller than spl_kmem_cache_slab_limit can
-			 * use the Linux slab for better space-efficiency.
-			 */
-			skc->skc_flags |= KMC_SLAB;
-		} else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit) {
-			/*
-			 * Small objects, less than spl_kmem_cache_kmem_limit
-			 * per object should use kmem because their slabs are
-			 * small.
-			 */
-			skc->skc_flags |= KMC_KMEM;
-		} else {
-			/*
-			 * All other objects are considered large and are
-			 * placed on vmem backed slabs.
-			 */
-			skc->skc_flags |= KMC_VMEM;
-		}
-	}
-
-	/*
-	 * Given the type of slab allocate the required resources.
-	 */
-	if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
-		rc = spl_slab_size(skc,
-		    &skc->skc_slab_objs, &skc->skc_slab_size);
-		if (rc)
-			goto out;
-
-		rc = spl_magazine_create(skc);
-		if (rc)
-			goto out;
-	} else {
-		unsigned long slabflags = 0;
-
-		if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
-			rc = EINVAL;
-			goto out;
-		}
-
-#if defined(SLAB_USERCOPY)
-		/*
-		 * Required for PAX-enabled kernels if the slab is to be
-		 * used for copying between user and kernel space.
-		 */
-		slabflags |= SLAB_USERCOPY;
-#endif
-
-#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
-		/*
-		 * Newer grsec patchset uses kmem_cache_create_usercopy()
-		 * instead of SLAB_USERCOPY flag
-		 */
-		skc->skc_linux_cache = kmem_cache_create_usercopy(
-		    skc->skc_name, size, align, slabflags, 0, size, NULL);
-#else
-		skc->skc_linux_cache = kmem_cache_create(
-		    skc->skc_name, size, align, slabflags, NULL);
-#endif
-		if (skc->skc_linux_cache == NULL) {
-			rc = ENOMEM;
-			goto out;
-		}
-
-#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
-		skc->skc_linux_cache->allocflags |= __GFP_COMP;
-#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
-		skc->skc_linux_cache->gfpflags |= __GFP_COMP;
-#endif
-		skc->skc_flags |= KMC_NOMAGAZINE;
-	}
-
-	if (spl_kmem_cache_expire & KMC_EXPIRE_AGE) {
-		skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
-		    spl_cache_age, skc, TQ_SLEEP,
-		    ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
-	}
-
-	down_write(&spl_kmem_cache_sem);
-	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
-	up_write(&spl_kmem_cache_sem);
-
-	return (skc);
-out:
-	kfree(skc->skc_name);
-	kfree(skc);
-	return (NULL);
-}
-EXPORT_SYMBOL(spl_kmem_cache_create);
-
-/*
- * Register a move callback for cache defragmentation.
- * XXX: Unimplemented but harmless to stub out for now.
- */
-void
-spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
-    kmem_cbrc_t (move)(void *, void *, size_t, void *))
-{
-	ASSERT(move != NULL);
-}
-EXPORT_SYMBOL(spl_kmem_cache_set_move);
-
-/*
- * Destroy a cache and all objects associated with the cache.
- */
-void
-spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
-{
-	DECLARE_WAIT_QUEUE_HEAD(wq);
-	taskqid_t id;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB));
-
-	down_write(&spl_kmem_cache_sem);
-	list_del_init(&skc->skc_list);
-	up_write(&spl_kmem_cache_sem);
-
-	/* Cancel any and wait for any pending delayed tasks */
-	VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
-	spin_lock(&skc->skc_lock);
-	id = skc->skc_taskqid;
-	spin_unlock(&skc->skc_lock);
-
-	taskq_cancel_id(spl_kmem_cache_taskq, id);
-
-	/*
-	 * Wait until all current callers complete, this is mainly
-	 * to catch the case where a low memory situation triggers a
-	 * cache reaping action which races with this destroy.
-	 */
-	wait_event(wq, atomic_read(&skc->skc_ref) == 0);
-
-	if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
-		spl_magazine_destroy(skc);
-		spl_slab_reclaim(skc);
-	} else {
-		ASSERT(skc->skc_flags & KMC_SLAB);
-		kmem_cache_destroy(skc->skc_linux_cache);
-	}
-
-	spin_lock(&skc->skc_lock);
-
-	/*
-	 * Validate there are no objects in use and free all the
-	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
-	 */
-	ASSERT3U(skc->skc_slab_alloc, ==, 0);
-	ASSERT3U(skc->skc_obj_alloc, ==, 0);
-	ASSERT3U(skc->skc_slab_total, ==, 0);
-	ASSERT3U(skc->skc_obj_total, ==, 0);
-	ASSERT3U(skc->skc_obj_emergency, ==, 0);
-	ASSERT(list_empty(&skc->skc_complete_list));
-
-	spin_unlock(&skc->skc_lock);
-
-	kfree(skc->skc_name);
-	kfree(skc);
-}
-EXPORT_SYMBOL(spl_kmem_cache_destroy);
-
-/*
- * Allocate an object from a slab attached to the cache.  This is used to
- * repopulate the per-cpu magazine caches in batches when they run low.
- */
-static void *
-spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
-{
-	spl_kmem_obj_t *sko;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(sks->sks_magic == SKS_MAGIC);
-
-	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
-	ASSERT(sko->sko_magic == SKO_MAGIC);
-	ASSERT(sko->sko_addr != NULL);
-
-	/* Remove from sks_free_list */
-	list_del_init(&sko->sko_list);
-
-	sks->sks_age = jiffies;
-	sks->sks_ref++;
-	skc->skc_obj_alloc++;
-
-	/* Track max obj usage statistics */
-	if (skc->skc_obj_alloc > skc->skc_obj_max)
-		skc->skc_obj_max = skc->skc_obj_alloc;
-
-	/* Track max slab usage statistics */
-	if (sks->sks_ref == 1) {
-		skc->skc_slab_alloc++;
-
-		if (skc->skc_slab_alloc > skc->skc_slab_max)
-			skc->skc_slab_max = skc->skc_slab_alloc;
-	}
-
-	return (sko->sko_addr);
-}
-
-/*
- * Generic slab allocation function to run by the global work queues.
- * It is responsible for allocating a new slab, linking it in to the list
- * of partial slabs, and then waking any waiters.
- */
-static int
-__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
-{
-	spl_kmem_slab_t *sks;
-
-	fstrans_cookie_t cookie = spl_fstrans_mark();
-	sks = spl_slab_alloc(skc, flags);
-	spl_fstrans_unmark(cookie);
-
-	spin_lock(&skc->skc_lock);
-	if (sks) {
-		skc->skc_slab_total++;
-		skc->skc_obj_total += sks->sks_objs;
-		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
-
-		smp_mb__before_atomic();
-		clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
-		smp_mb__after_atomic();
-	}
-	spin_unlock(&skc->skc_lock);
-
-	return (sks == NULL ? -ENOMEM : 0);
-}
-
-static void
-spl_cache_grow_work(void *data)
-{
-	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
-	spl_kmem_cache_t *skc = ska->ska_cache;
-
-	int error = __spl_cache_grow(skc, ska->ska_flags);
-
-	atomic_dec(&skc->skc_ref);
-	smp_mb__before_atomic();
-	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
-	smp_mb__after_atomic();
-	if (error == 0)
-		wake_up_all(&skc->skc_waitq);
-
-	kfree(ska);
-}
-
-/*
- * Returns non-zero when a new slab should be available.
- */
-static int
-spl_cache_grow_wait(spl_kmem_cache_t *skc)
-{
-	return (!test_bit(KMC_BIT_GROWING, &skc->skc_flags));
-}
-
-/*
- * No available objects on any slabs, create a new slab.  Note that this
- * functionality is disabled for KMC_SLAB caches which are backed by the
- * Linux slab.
- */
-static int
-spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
-{
-	int remaining, rc = 0;
-
-	ASSERT0(flags & ~KM_PUBLIC_MASK);
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
-	might_sleep();
-	*obj = NULL;
-
-	/*
-	 * Before allocating a new slab wait for any reaping to complete and
-	 * then return so the local magazine can be rechecked for new objects.
-	 */
-	if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
-		rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
-		    TASK_UNINTERRUPTIBLE);
-		return (rc ? rc : -EAGAIN);
-	}
-
-	/*
-	 * To reduce the overhead of context switch and improve NUMA locality,
-	 * it tries to allocate a new slab in the current process context with
-	 * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
-	 * allocation.
-	 *
-	 * However, this can't be applied to KVM_VMEM due to a bug that
-	 * spl_vmalloc() doesn't honor gfp flags in page table allocation.
-	 */
-	if (!(skc->skc_flags & KMC_VMEM)) {
-		rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
-		if (rc == 0) {
-			wake_up_all(&skc->skc_waitq);
-			return (0);
-		}
-	}
-
-	/*
-	 * This is handled by dispatching a work request to the global work
-	 * queue.  This allows us to asynchronously allocate a new slab while
-	 * retaining the ability to safely fall back to a smaller synchronous
-	 * allocations to ensure forward progress is always maintained.
-	 */
-	if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
-		spl_kmem_alloc_t *ska;
-
-		ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
-		if (ska == NULL) {
-			clear_bit_unlock(KMC_BIT_GROWING, &skc->skc_flags);
-			smp_mb__after_atomic();
-			wake_up_all(&skc->skc_waitq);
-			return (-ENOMEM);
-		}
-
-		atomic_inc(&skc->skc_ref);
-		ska->ska_cache = skc;
-		ska->ska_flags = flags;
-		taskq_init_ent(&ska->ska_tqe);
-		taskq_dispatch_ent(spl_kmem_cache_taskq,
-		    spl_cache_grow_work, ska, 0, &ska->ska_tqe);
-	}
-
-	/*
-	 * The goal here is to only detect the rare case where a virtual slab
-	 * allocation has deadlocked.  We must be careful to minimize the use
-	 * of emergency objects which are more expensive to track.  Therefore,
-	 * we set a very long timeout for the asynchronous allocation and if
-	 * the timeout is reached the cache is flagged as deadlocked.  From
-	 * this point only new emergency objects will be allocated until the
-	 * asynchronous allocation completes and clears the deadlocked flag.
-	 */
-	if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
-		rc = spl_emergency_alloc(skc, flags, obj);
-	} else {
-		remaining = wait_event_timeout(skc->skc_waitq,
-		    spl_cache_grow_wait(skc), HZ / 10);
-
-		if (!remaining) {
-			spin_lock(&skc->skc_lock);
-			if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
-				set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
-				skc->skc_obj_deadlock++;
-			}
-			spin_unlock(&skc->skc_lock);
-		}
-
-		rc = -ENOMEM;
-	}
-
-	return (rc);
-}
-
-/*
- * Refill a per-cpu magazine with objects from the slabs for this cache.
- * Ideally the magazine can be repopulated using existing objects which have
- * been released, however if we are unable to locate enough free objects new
- * slabs of objects will be created.  On success NULL is returned, otherwise
- * the address of a single emergency object is returned for use by the caller.
- */
-static void *
-spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
-{
-	spl_kmem_slab_t *sks;
-	int count = 0, rc, refill;
-	void *obj = NULL;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(skm->skm_magic == SKM_MAGIC);
-
-	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
-	spin_lock(&skc->skc_lock);
-
-	while (refill > 0) {
-		/* No slabs available we may need to grow the cache */
-		if (list_empty(&skc->skc_partial_list)) {
-			spin_unlock(&skc->skc_lock);
-
-			local_irq_enable();
-			rc = spl_cache_grow(skc, flags, &obj);
-			local_irq_disable();
-
-			/* Emergency object for immediate use by caller */
-			if (rc == 0 && obj != NULL)
-				return (obj);
-
-			if (rc)
-				goto out;
-
-			/* Rescheduled to different CPU skm is not local */
-			if (skm != skc->skc_mag[smp_processor_id()])
-				goto out;
-
-			/*
-			 * Potentially rescheduled to the same CPU but
-			 * allocations may have occurred from this CPU while
-			 * we were sleeping so recalculate max refill.
-			 */
-			refill = MIN(refill, skm->skm_size - skm->skm_avail);
-
-			spin_lock(&skc->skc_lock);
-			continue;
-		}
-
-		/* Grab the next available slab */
-		sks = list_entry((&skc->skc_partial_list)->next,
-		    spl_kmem_slab_t, sks_list);
-		ASSERT(sks->sks_magic == SKS_MAGIC);
-		ASSERT(sks->sks_ref < sks->sks_objs);
-		ASSERT(!list_empty(&sks->sks_free_list));
-
-		/*
-		 * Consume as many objects as needed to refill the requested
-		 * cache.  We must also be careful not to overfill it.
-		 */
-		while (sks->sks_ref < sks->sks_objs && refill-- > 0 &&
-		    ++count) {
-			ASSERT(skm->skm_avail < skm->skm_size);
-			ASSERT(count < skm->skm_size);
-			skm->skm_objs[skm->skm_avail++] =
-			    spl_cache_obj(skc, sks);
-		}
-
-		/* Move slab to skc_complete_list when full */
-		if (sks->sks_ref == sks->sks_objs) {
-			list_del(&sks->sks_list);
-			list_add(&sks->sks_list, &skc->skc_complete_list);
-		}
-	}
-
-	spin_unlock(&skc->skc_lock);
-out:
-	return (NULL);
-}
-
-/*
- * Release an object back to the slab from which it came.
- */
-static void
-spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
-{
-	spl_kmem_slab_t *sks = NULL;
-	spl_kmem_obj_t *sko = NULL;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-
-	sko = spl_sko_from_obj(skc, obj);
-	ASSERT(sko->sko_magic == SKO_MAGIC);
-	sks = sko->sko_slab;
-	ASSERT(sks->sks_magic == SKS_MAGIC);
-	ASSERT(sks->sks_cache == skc);
-	list_add(&sko->sko_list, &sks->sks_free_list);
-
-	sks->sks_age = jiffies;
-	sks->sks_ref--;
-	skc->skc_obj_alloc--;
-
-	/*
-	 * Move slab to skc_partial_list when no longer full.  Slabs
-	 * are added to the head to keep the partial list is quasi-full
-	 * sorted order.  Fuller at the head, emptier at the tail.
-	 */
-	if (sks->sks_ref == (sks->sks_objs - 1)) {
-		list_del(&sks->sks_list);
-		list_add(&sks->sks_list, &skc->skc_partial_list);
-	}
-
-	/*
-	 * Move empty slabs to the end of the partial list so
-	 * they can be easily found and freed during reclamation.
-	 */
-	if (sks->sks_ref == 0) {
-		list_del(&sks->sks_list);
-		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
-		skc->skc_slab_alloc--;
-	}
-}
-
-/*
- * Allocate an object from the per-cpu magazine, or if the magazine
- * is empty directly allocate from a slab and repopulate the magazine.
- */
-void *
-spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
-{
-	spl_kmem_magazine_t *skm;
-	void *obj = NULL;
-
-	ASSERT0(flags & ~KM_PUBLIC_MASK);
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
-	/*
-	 * Allocate directly from a Linux slab.  All optimizations are left
-	 * to the underlying cache we only need to guarantee that KM_SLEEP
-	 * callers will never fail.
-	 */
-	if (skc->skc_flags & KMC_SLAB) {
-		struct kmem_cache *slc = skc->skc_linux_cache;
-		do {
-			obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
-		} while ((obj == NULL) && !(flags & KM_NOSLEEP));
-
-		if (obj != NULL) {
-			/*
-			 * Even though we leave everything up to the
-			 * underlying cache we still keep track of
-			 * how many objects we've allocated in it for
-			 * better debuggability.
-			 */
-			spin_lock(&skc->skc_lock);
-			skc->skc_obj_alloc++;
-			spin_unlock(&skc->skc_lock);
-		}
-		goto ret;
-	}
-
-	local_irq_disable();
-
-restart:
-	/*
-	 * Safe to update per-cpu structure without lock, but
-	 * in the restart case we must be careful to reacquire
-	 * the local magazine since this may have changed
-	 * when we need to grow the cache.
-	 */
-	skm = skc->skc_mag[smp_processor_id()];
-	ASSERT(skm->skm_magic == SKM_MAGIC);
-
-	if (likely(skm->skm_avail)) {
-		/* Object available in CPU cache, use it */
-		obj = skm->skm_objs[--skm->skm_avail];
-		skm->skm_age = jiffies;
-	} else {
-		obj = spl_cache_refill(skc, skm, flags);
-		if ((obj == NULL) && !(flags & KM_NOSLEEP))
-			goto restart;
-
-		local_irq_enable();
-		goto ret;
-	}
-
-	local_irq_enable();
-	ASSERT(obj);
-	ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
-
-ret:
-	/* Pre-emptively migrate object to CPU L1 cache */
-	if (obj) {
-		if (obj && skc->skc_ctor)
-			skc->skc_ctor(obj, skc->skc_private, flags);
-		else
-			prefetchw(obj);
-	}
-
-	return (obj);
-}
-EXPORT_SYMBOL(spl_kmem_cache_alloc);
-
-/*
- * Free an object back to the local per-cpu magazine, there is no
- * guarantee that this is the same magazine the object was originally
- * allocated from.  We may need to flush entire from the magazine
- * back to the slabs to make space.
- */
-void
-spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
-{
-	spl_kmem_magazine_t *skm;
-	unsigned long flags;
-	int do_reclaim = 0;
-	int do_emergency = 0;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
-	/*
-	 * Run the destructor
-	 */
-	if (skc->skc_dtor)
-		skc->skc_dtor(obj, skc->skc_private);
-
-	/*
-	 * Free the object from the Linux underlying Linux slab.
-	 */
-	if (skc->skc_flags & KMC_SLAB) {
-		kmem_cache_free(skc->skc_linux_cache, obj);
-		spin_lock(&skc->skc_lock);
-		skc->skc_obj_alloc--;
-		spin_unlock(&skc->skc_lock);
-		return;
-	}
-
-	/*
-	 * While a cache has outstanding emergency objects all freed objects
-	 * must be checked.  However, since emergency objects will never use
-	 * a virtual address these objects can be safely excluded as an
-	 * optimization.
-	 */
-	if (!is_vmalloc_addr(obj)) {
-		spin_lock(&skc->skc_lock);
-		do_emergency = (skc->skc_obj_emergency > 0);
-		spin_unlock(&skc->skc_lock);
-
-		if (do_emergency && (spl_emergency_free(skc, obj) == 0))
-			return;
-	}
-
-	local_irq_save(flags);
-
-	/*
-	 * Safe to update per-cpu structure without lock, but
-	 * no remote memory allocation tracking is being performed
-	 * it is entirely possible to allocate an object from one
-	 * CPU cache and return it to another.
-	 */
-	skm = skc->skc_mag[smp_processor_id()];
-	ASSERT(skm->skm_magic == SKM_MAGIC);
-
-	/*
-	 * Per-CPU cache full, flush it to make space for this object,
-	 * this may result in an empty slab which can be reclaimed once
-	 * interrupts are re-enabled.
-	 */
-	if (unlikely(skm->skm_avail >= skm->skm_size)) {
-		spl_cache_flush(skc, skm, skm->skm_refill);
-		do_reclaim = 1;
-	}
-
-	/* Available space in cache, use it */
-	skm->skm_objs[skm->skm_avail++] = obj;
-
-	local_irq_restore(flags);
-
-	if (do_reclaim)
-		spl_slab_reclaim(skc);
-}
-EXPORT_SYMBOL(spl_kmem_cache_free);
-
-/*
- * The generic shrinker function for all caches.  Under Linux a shrinker
- * may not be tightly coupled with a slab cache.  In fact Linux always
- * systematically tries calling all registered shrinker callbacks which
- * report that they contain unused objects.  Because of this we only
- * register one shrinker function in the shim layer for all slab caches.
- * We always attempt to shrink all caches when this generic shrinker
- * is called.
- *
- * If sc->nr_to_scan is zero, the caller is requesting a query of the
- * number of objects which can potentially be freed.  If it is nonzero,
- * the request is to free that many objects.
- *
- * Linux kernels >= 3.12 have the count_objects and scan_objects callbacks
- * in struct shrinker and also require the shrinker to return the number
- * of objects freed.
- *
- * Older kernels require the shrinker to return the number of freeable
- * objects following the freeing of nr_to_free.
- *
- * Linux semantics differ from those under Solaris, which are to
- * free all available objects which may (and probably will) be more
- * objects than the requested nr_to_scan.
- */
-static spl_shrinker_t
-__spl_kmem_cache_generic_shrinker(struct shrinker *shrink,
-    struct shrink_control *sc)
-{
-	spl_kmem_cache_t *skc = NULL;
-	int alloc = 0;
-
-	/*
-	 * No shrinking in a transaction context.  Can cause deadlocks.
-	 */
-	if (sc->nr_to_scan && spl_fstrans_check())
-		return (SHRINK_STOP);
-
-	down_read(&spl_kmem_cache_sem);
-	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
-		if (sc->nr_to_scan) {
-#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
-			uint64_t oldalloc = skc->skc_obj_alloc;
-			spl_kmem_cache_reap_now(skc,
-			    MAX(sc->nr_to_scan>>fls64(skc->skc_slab_objs), 1));
-			if (oldalloc > skc->skc_obj_alloc)
-				alloc += oldalloc - skc->skc_obj_alloc;
-#else
-			spl_kmem_cache_reap_now(skc,
-			    MAX(sc->nr_to_scan>>fls64(skc->skc_slab_objs), 1));
-			alloc += skc->skc_obj_alloc;
-#endif /* HAVE_SPLIT_SHRINKER_CALLBACK */
-		} else {
-			/* Request to query number of freeable objects */
-			alloc += skc->skc_obj_alloc;
-		}
-	}
-	up_read(&spl_kmem_cache_sem);
-
-	/*
-	 * When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.
-	 * This functionality only exists to work around a rare issue where
-	 * shrink_slabs() is repeatedly invoked by many cores causing the
-	 * system to thrash.
-	 */
-	if ((spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) && sc->nr_to_scan)
-		return (SHRINK_STOP);
-
-	return (MAX(alloc, 0));
-}
-
-SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);
-
-/*
- * Call the registered reclaim function for a cache.  Depending on how
- * many and which objects are released it may simply repopulate the
- * local magazine which will then need to age-out.  Objects which cannot
- * fit in the magazine we will be released back to their slabs which will
- * also need to age out before being release.  This is all just best
- * effort and we do not want to thrash creating and destroying slabs.
- */
-void
-spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
-{
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-
-	atomic_inc(&skc->skc_ref);
-
-	/*
-	 * Execute the registered reclaim callback if it exists.
-	 */
-	if (skc->skc_flags & KMC_SLAB) {
-		if (skc->skc_reclaim)
-			skc->skc_reclaim(skc->skc_private);
-		goto out;
-	}
-
-	/*
-	 * Prevent concurrent cache reaping when contended.
-	 */
-	if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
-		goto out;
-
-	/*
-	 * When a reclaim function is available it may be invoked repeatedly
-	 * until at least a single slab can be freed.  This ensures that we
-	 * do free memory back to the system.  This helps minimize the chance
-	 * of an OOM event when the bulk of memory is used by the slab.
-	 *
-	 * When free slabs are already available the reclaim callback will be
-	 * skipped.  Additionally, if no forward progress is detected despite
-	 * a reclaim function the cache will be skipped to avoid deadlock.
-	 *
-	 * Longer term this would be the correct place to add the code which
-	 * repacks the slabs in order minimize fragmentation.
-	 */
-	if (skc->skc_reclaim) {
-		uint64_t objects = UINT64_MAX;
-		int do_reclaim;
-
-		do {
-			spin_lock(&skc->skc_lock);
-			do_reclaim =
-			    (skc->skc_slab_total > 0) &&
-			    ((skc->skc_slab_total-skc->skc_slab_alloc) == 0) &&
-			    (skc->skc_obj_alloc < objects);
-
-			objects = skc->skc_obj_alloc;
-			spin_unlock(&skc->skc_lock);
-
-			if (do_reclaim)
-				skc->skc_reclaim(skc->skc_private);
-
-		} while (do_reclaim);
-	}
-
-	/* Reclaim from the magazine and free all now empty slabs. */
-	if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) {
-		spl_kmem_magazine_t *skm;
-		unsigned long irq_flags;
-
-		local_irq_save(irq_flags);
-		skm = skc->skc_mag[smp_processor_id()];
-		spl_cache_flush(skc, skm, skm->skm_avail);
-		local_irq_restore(irq_flags);
-	}
-
-	spl_slab_reclaim(skc);
-	clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
-	smp_mb__after_atomic();
-	wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
-out:
-	atomic_dec(&skc->skc_ref);
-}
-EXPORT_SYMBOL(spl_kmem_cache_reap_now);
-
-/*
- * This is stubbed out for code consistency with other platforms.  There
- * is existing logic to prevent concurrent reaping so while this is ugly
- * it should do no harm.
- */
-int
-spl_kmem_cache_reap_active()
-{
-	return (0);
-}
-EXPORT_SYMBOL(spl_kmem_cache_reap_active);
-
-/*
- * Reap all free slabs from all registered caches.
- */
-void
-spl_kmem_reap(void)
-{
-	struct shrink_control sc;
-
-	sc.nr_to_scan = KMC_REAP_CHUNK;
-	sc.gfp_mask = GFP_KERNEL;
-
-	(void) __spl_kmem_cache_generic_shrinker(NULL, &sc);
-}
-EXPORT_SYMBOL(spl_kmem_reap);
-
-int
-spl_kmem_cache_init(void)
-{
-	init_rwsem(&spl_kmem_cache_sem);
-	INIT_LIST_HEAD(&spl_kmem_cache_list);
-	spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
-	    spl_kmem_cache_kmem_threads, maxclsyspri,
-	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
-	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
-	spl_register_shrinker(&spl_kmem_cache_shrinker);
-
-	return (0);
-}
-
-void
-spl_kmem_cache_fini(void)
-{
-	spl_unregister_shrinker(&spl_kmem_cache_shrinker);
-	taskq_destroy(spl_kmem_cache_taskq);
-}

diff --git a/zfs/module/spl/spl-kmem.c b/zfs/module/spl/spl-kmem.c
deleted file mode 100644
index ca1fc14..0000000
--- a/zfs/module/spl/spl-kmem.c
+++ /dev/null

@@ -1,555 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <sys/debug.h>
-#include <sys/sysmacros.h>
-#include <sys/kmem.h>
-#include <sys/vmem.h>
-#include <linux/mm.h>
-
-/*
- * As a general rule kmem_alloc() allocations should be small, preferably
- * just a few pages since they must by physically contiguous.  Therefore, a
- * rate limited warning will be printed to the console for any kmem_alloc()
- * which exceeds a reasonable threshold.
- *
- * The default warning threshold is set to sixteen pages but capped at 64K to
- * accommodate systems using large pages.  This value was selected to be small
- * enough to ensure the largest allocations are quickly noticed and fixed.
- * But large enough to avoid logging any warnings when a allocation size is
- * larger than optimal but not a serious concern.  Since this value is tunable,
- * developers are encouraged to set it lower when testing so any new largish
- * allocations are quickly caught.  These warnings may be disabled by setting
- * the threshold to zero.
- */
-/* BEGIN CSTYLED */
-unsigned int spl_kmem_alloc_warn = MIN(16 * PAGE_SIZE, 64 * 1024);
-module_param(spl_kmem_alloc_warn, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_alloc_warn,
-	"Warning threshold in bytes for a kmem_alloc()");
-EXPORT_SYMBOL(spl_kmem_alloc_warn);
-
-/*
- * Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
- * Allocations which are marginally smaller than this limit may succeed but
- * should still be avoided due to the expense of locating a contiguous range
- * of free pages.  Therefore, a maximum kmem size with reasonable safely
- * margin of 4x is set.  Kmem_alloc() allocations larger than this maximum
- * will quickly fail.  Vmem_alloc() allocations less than or equal to this
- * value will use kmalloc(), but shift to vmalloc() when exceeding this value.
- */
-unsigned int spl_kmem_alloc_max = (KMALLOC_MAX_SIZE >> 2);
-module_param(spl_kmem_alloc_max, uint, 0644);
-MODULE_PARM_DESC(spl_kmem_alloc_max,
-	"Maximum size in bytes for a kmem_alloc()");
-EXPORT_SYMBOL(spl_kmem_alloc_max);
-/* END CSTYLED */
-
-int
-kmem_debugging(void)
-{
-	return (0);
-}
-EXPORT_SYMBOL(kmem_debugging);
-
-char *
-kmem_vasprintf(const char *fmt, va_list ap)
-{
-	va_list aq;
-	char *ptr;
-
-	do {
-		va_copy(aq, ap);
-		ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, aq);
-		va_end(aq);
-	} while (ptr == NULL);
-
-	return (ptr);
-}
-EXPORT_SYMBOL(kmem_vasprintf);
-
-char *
-kmem_asprintf(const char *fmt, ...)
-{
-	va_list ap;
-	char *ptr;
-
-	do {
-		va_start(ap, fmt);
-		ptr = kvasprintf(kmem_flags_convert(KM_SLEEP), fmt, ap);
-		va_end(ap);
-	} while (ptr == NULL);
-
-	return (ptr);
-}
-EXPORT_SYMBOL(kmem_asprintf);
-
-static char *
-__strdup(const char *str, int flags)
-{
-	char *ptr;
-	int n;
-
-	n = strlen(str);
-	ptr = kmalloc(n + 1, kmem_flags_convert(flags));
-	if (ptr)
-		memcpy(ptr, str, n + 1);
-
-	return (ptr);
-}
-
-char *
-strdup(const char *str)
-{
-	return (__strdup(str, KM_SLEEP));
-}
-EXPORT_SYMBOL(strdup);
-
-void
-strfree(char *str)
-{
-	kfree(str);
-}
-EXPORT_SYMBOL(strfree);
-
-/*
- * General purpose unified implementation of kmem_alloc(). It is an
- * amalgamation of Linux and Illumos allocator design. It should never be
- * exported to ensure that code using kmem_alloc()/kmem_zalloc() remains
- * relatively portable.  Consumers may only access this function through
- * wrappers that enforce the common flags to ensure portability.
- */
-inline void *
-spl_kmem_alloc_impl(size_t size, int flags, int node)
-{
-	gfp_t lflags = kmem_flags_convert(flags);
-	int use_vmem = 0;
-	void *ptr;
-
-	/*
-	 * Log abnormally large allocations and rate limit the console output.
-	 * Allocations larger than spl_kmem_alloc_warn should be performed
-	 * through the vmem_alloc()/vmem_zalloc() interfaces.
-	 */
-	if ((spl_kmem_alloc_warn > 0) && (size > spl_kmem_alloc_warn) &&
-	    !(flags & KM_VMEM)) {
-		printk(KERN_WARNING
-		    "Large kmem_alloc(%lu, 0x%x), please file an issue at:\n"
-		    "https://github.com/zfsonlinux/zfs/issues/new\n",
-		    (unsigned long)size, flags);
-		dump_stack();
-	}
-
-	/*
-	 * Use a loop because kmalloc_node() can fail when GFP_KERNEL is used
-	 * unlike kmem_alloc() with KM_SLEEP on Illumos.
-	 */
-	do {
-		/*
-		 * Calling kmalloc_node() when the size >= spl_kmem_alloc_max
-		 * is unsafe.  This must fail for all for kmem_alloc() and
-		 * kmem_zalloc() callers.
-		 *
-		 * For vmem_alloc() and vmem_zalloc() callers it is permissible
-		 * to use spl_vmalloc().  However, in general use of
-		 * spl_vmalloc() is strongly discouraged because a global lock
-		 * must be acquired.  Contention on this lock can significantly
-		 * impact performance so frequently manipulating the virtual
-		 * address space is strongly discouraged.
-		 */
-		if ((size > spl_kmem_alloc_max) || use_vmem) {
-			if (flags & KM_VMEM) {
-				ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
-			} else {
-				return (NULL);
-			}
-		} else {
-			ptr = kmalloc_node(size, lflags, node);
-		}
-
-		if (likely(ptr) || (flags & KM_NOSLEEP))
-			return (ptr);
-
-		/*
-		 * For vmem_alloc() and vmem_zalloc() callers retry immediately
-		 * using __vmalloc() which is unlikely to fail.
-		 */
-		if ((flags & KM_VMEM) && (use_vmem == 0))  {
-			use_vmem = 1;
-			continue;
-		}
-
-		/*
-		 * Use cond_resched() instead of congestion_wait() to avoid
-		 * deadlocking systems where there are no block devices.
-		 */
-		cond_resched();
-	} while (1);
-
-	return (NULL);
-}
-
-inline void
-spl_kmem_free_impl(const void *buf, size_t size)
-{
-	if (is_vmalloc_addr(buf))
-		vfree(buf);
-	else
-		kfree(buf);
-}
-
-/*
- * Memory allocation and accounting for kmem_* * style allocations.  When
- * DEBUG_KMEM is enabled the total memory allocated will be tracked and
- * any memory leaked will be reported during module unload.
- *
- * ./configure --enable-debug-kmem
- */
-#ifdef DEBUG_KMEM
-
-/* Shim layer memory accounting */
-#ifdef HAVE_ATOMIC64_T
-atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
-unsigned long long kmem_alloc_max = 0;
-#else  /* HAVE_ATOMIC64_T */
-atomic_t kmem_alloc_used = ATOMIC_INIT(0);
-unsigned long long kmem_alloc_max = 0;
-#endif /* HAVE_ATOMIC64_T */
-
-EXPORT_SYMBOL(kmem_alloc_used);
-EXPORT_SYMBOL(kmem_alloc_max);
-
-inline void *
-spl_kmem_alloc_debug(size_t size, int flags, int node)
-{
-	void *ptr;
-
-	ptr = spl_kmem_alloc_impl(size, flags, node);
-	if (ptr) {
-		kmem_alloc_used_add(size);
-		if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
-			kmem_alloc_max = kmem_alloc_used_read();
-	}
-
-	return (ptr);
-}
-
-inline void
-spl_kmem_free_debug(const void *ptr, size_t size)
-{
-	kmem_alloc_used_sub(size);
-	spl_kmem_free_impl(ptr, size);
-}
-
-/*
- * When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
- * but also the location of every alloc and free.  When the SPL module is
- * unloaded a list of all leaked addresses and where they were allocated
- * will be dumped to the console.  Enabling this feature has a significant
- * impact on performance but it makes finding memory leaks straight forward.
- *
- * Not surprisingly with debugging enabled the xmem_locks are very highly
- * contended particularly on xfree().  If we want to run with this detailed
- * debugging enabled for anything other than debugging  we need to minimize
- * the contention by moving to a lock per xmem_table entry model.
- *
- * ./configure --enable-debug-kmem-tracking
- */
-#ifdef DEBUG_KMEM_TRACKING
-
-#include <linux/hash.h>
-#include <linux/ctype.h>
-
-#define	KMEM_HASH_BITS		10
-#define	KMEM_TABLE_SIZE		(1 << KMEM_HASH_BITS)
-
-typedef struct kmem_debug {
-	struct hlist_node kd_hlist;	/* Hash node linkage */
-	struct list_head kd_list;	/* List of all allocations */
-	void *kd_addr;			/* Allocation pointer */
-	size_t kd_size;			/* Allocation size */
-	const char *kd_func;		/* Allocation function */
-	int kd_line;			/* Allocation line */
-} kmem_debug_t;
-
-static spinlock_t kmem_lock;
-static struct hlist_head kmem_table[KMEM_TABLE_SIZE];
-static struct list_head kmem_list;
-
-static kmem_debug_t *
-kmem_del_init(spinlock_t *lock, struct hlist_head *table,
-    int bits, const void *addr)
-{
-	struct hlist_head *head;
-	struct hlist_node *node = NULL;
-	struct kmem_debug *p;
-	unsigned long flags;
-
-	spin_lock_irqsave(lock, flags);
-
-	head = &table[hash_ptr((void *)addr, bits)];
-	hlist_for_each(node, head) {
-		p = list_entry(node, struct kmem_debug, kd_hlist);
-		if (p->kd_addr == addr) {
-			hlist_del_init(&p->kd_hlist);
-			list_del_init(&p->kd_list);
-			spin_unlock_irqrestore(lock, flags);
-			return (p);
-		}
-	}
-
-	spin_unlock_irqrestore(lock, flags);
-
-	return (NULL);
-}
-
-inline void *
-spl_kmem_alloc_track(size_t size, int flags,
-    const char *func, int line, int node)
-{
-	void *ptr = NULL;
-	kmem_debug_t *dptr;
-	unsigned long irq_flags;
-
-	dptr = kmalloc(sizeof (kmem_debug_t), kmem_flags_convert(flags));
-	if (dptr == NULL)
-		return (NULL);
-
-	dptr->kd_func = __strdup(func, flags);
-	if (dptr->kd_func == NULL) {
-		kfree(dptr);
-		return (NULL);
-	}
-
-	ptr = spl_kmem_alloc_debug(size, flags, node);
-	if (ptr == NULL) {
-		kfree(dptr->kd_func);
-		kfree(dptr);
-		return (NULL);
-	}
-
-	INIT_HLIST_NODE(&dptr->kd_hlist);
-	INIT_LIST_HEAD(&dptr->kd_list);
-
-	dptr->kd_addr = ptr;
-	dptr->kd_size = size;
-	dptr->kd_line = line;
-
-	spin_lock_irqsave(&kmem_lock, irq_flags);
-	hlist_add_head(&dptr->kd_hlist,
-	    &kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
-	list_add_tail(&dptr->kd_list, &kmem_list);
-	spin_unlock_irqrestore(&kmem_lock, irq_flags);
-
-	return (ptr);
-}
-
-inline void
-spl_kmem_free_track(const void *ptr, size_t size)
-{
-	kmem_debug_t *dptr;
-
-	/* Ignore NULL pointer since we haven't tracked it at all */
-	if (ptr == NULL)
-		return;
-
-	/* Must exist in hash due to kmem_alloc() */
-	dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
-	ASSERT3P(dptr, !=, NULL);
-	ASSERT3S(dptr->kd_size, ==, size);
-
-	kfree(dptr->kd_func);
-	kfree(dptr);
-
-	spl_kmem_free_debug(ptr, size);
-}
-#endif /* DEBUG_KMEM_TRACKING */
-#endif /* DEBUG_KMEM */
-
-/*
- * Public kmem_alloc(), kmem_zalloc() and kmem_free() interfaces.
- */
-void *
-spl_kmem_alloc(size_t size, int flags, const char *func, int line)
-{
-	ASSERT0(flags & ~KM_PUBLIC_MASK);
-
-#if !defined(DEBUG_KMEM)
-	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
-	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
-	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_kmem_alloc);
-
-void *
-spl_kmem_zalloc(size_t size, int flags, const char *func, int line)
-{
-	ASSERT0(flags & ~KM_PUBLIC_MASK);
-
-	flags |= KM_ZERO;
-
-#if !defined(DEBUG_KMEM)
-	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
-	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
-	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_kmem_zalloc);
-
-void
-spl_kmem_free(const void *buf, size_t size)
-{
-#if !defined(DEBUG_KMEM)
-	return (spl_kmem_free_impl(buf, size));
-#elif !defined(DEBUG_KMEM_TRACKING)
-	return (spl_kmem_free_debug(buf, size));
-#else
-	return (spl_kmem_free_track(buf, size));
-#endif
-}
-EXPORT_SYMBOL(spl_kmem_free);
-
-#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
-static char *
-spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
-{
-	int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
-	int i, flag = 1;
-
-	ASSERT(str != NULL && len >= 17);
-	memset(str, 0, len);
-
-	/*
-	 * Check for a fully printable string, and while we are at
-	 * it place the printable characters in the passed buffer.
-	 */
-	for (i = 0; i < size; i++) {
-		str[i] = ((char *)(kd->kd_addr))[i];
-		if (isprint(str[i])) {
-			continue;
-		} else {
-			/*
-			 * Minimum number of printable characters found
-			 * to make it worthwhile to print this as ascii.
-			 */
-			if (i > min)
-				break;
-
-			flag = 0;
-			break;
-		}
-	}
-
-	if (!flag) {
-		sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
-		    *((uint8_t *)kd->kd_addr),
-		    *((uint8_t *)kd->kd_addr + 2),
-		    *((uint8_t *)kd->kd_addr + 4),
-		    *((uint8_t *)kd->kd_addr + 6),
-		    *((uint8_t *)kd->kd_addr + 8),
-		    *((uint8_t *)kd->kd_addr + 10),
-		    *((uint8_t *)kd->kd_addr + 12),
-		    *((uint8_t *)kd->kd_addr + 14));
-	}
-
-	return (str);
-}
-
-static int
-spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
-{
-	int i;
-
-	spin_lock_init(lock);
-	INIT_LIST_HEAD(list);
-
-	for (i = 0; i < size; i++)
-		INIT_HLIST_HEAD(&kmem_table[i]);
-
-	return (0);
-}
-
-static void
-spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
-{
-	unsigned long flags;
-	kmem_debug_t *kd = NULL;
-	char str[17];
-
-	spin_lock_irqsave(lock, flags);
-	if (!list_empty(list))
-		printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
-		    "size", "data", "func", "line");
-
-	list_for_each_entry(kd, list, kd_list) {
-		printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
-		    (int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
-		    kd->kd_func, kd->kd_line);
-	}
-
-	spin_unlock_irqrestore(lock, flags);
-}
-#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
-
-int
-spl_kmem_init(void)
-{
-
-#ifdef DEBUG_KMEM
-	kmem_alloc_used_set(0);
-
-
-
-#ifdef DEBUG_KMEM_TRACKING
-	spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
-#endif /* DEBUG_KMEM_TRACKING */
-#endif /* DEBUG_KMEM */
-
-	return (0);
-}
-
-void
-spl_kmem_fini(void)
-{
-#ifdef DEBUG_KMEM
-	/*
-	 * Display all unreclaimed memory addresses, including the
-	 * allocation size and the first few bytes of what's located
-	 * at that address to aid in debugging.  Performance is not
-	 * a serious concern here since it is module unload time.
-	 */
-	if (kmem_alloc_used_read() != 0)
-		printk(KERN_WARNING "kmem leaked %ld/%llu bytes\n",
-		    (unsigned long)kmem_alloc_used_read(), kmem_alloc_max);
-
-#ifdef DEBUG_KMEM_TRACKING
-	spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
-#endif /* DEBUG_KMEM_TRACKING */
-#endif /* DEBUG_KMEM */
-}

diff --git a/zfs/module/spl/spl-kobj.c b/zfs/module/spl/spl-kobj.c
deleted file mode 100644
index 7019369..0000000
--- a/zfs/module/spl/spl-kobj.c
+++ /dev/null

@@ -1,86 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Kobj Implementation.
- */
-
-#include <sys/kobj.h>
-
-struct _buf *
-kobj_open_file(const char *name)
-{
-	struct _buf *file;
-	vnode_t *vp;
-	int rc;
-
-	file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP));
-	if (file == NULL)
-		return ((_buf_t *)-1UL);
-
-	if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
-		kfree(file);
-		return ((_buf_t *)-1UL);
-	}
-
-	file->vp = vp;
-
-	return (file);
-} /* kobj_open_file() */
-EXPORT_SYMBOL(kobj_open_file);
-
-void
-kobj_close_file(struct _buf *file)
-{
-	VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
-	kfree(file);
-} /* kobj_close_file() */
-EXPORT_SYMBOL(kobj_close_file);
-
-int
-kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
-	ssize_t resid;
-
-	if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
-	    UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
-		return (-1);
-
-	return (size - resid);
-} /* kobj_read_file() */
-EXPORT_SYMBOL(kobj_read_file);
-
-int
-kobj_get_filesize(struct _buf *file, uint64_t *size)
-{
-	vattr_t vap;
-	int rc;
-
-	rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
-	if (rc)
-		return (rc);
-
-	*size = vap.va_size;
-
-	return (rc);
-} /* kobj_get_filesize() */
-EXPORT_SYMBOL(kobj_get_filesize);

diff --git a/zfs/module/spl/spl-kstat.c b/zfs/module/spl/spl-kstat.c
deleted file mode 100644
index c54378a..0000000
--- a/zfs/module/spl/spl-kstat.c
+++ /dev/null

@@ -1,778 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Kstat Implementation.
- */
-
-#include <linux/seq_file.h>
-#include <sys/kstat.h>
-#include <sys/vmem.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-
-static kmutex_t kstat_module_lock;
-static struct list_head kstat_module_list;
-static kid_t kstat_id;
-
-static int
-kstat_resize_raw(kstat_t *ksp)
-{
-	if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
-		return (ENOMEM);
-
-	vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
-	ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
-	ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
-
-	return (0);
-}
-
-void
-kstat_waitq_enter(kstat_io_t *kiop)
-{
-	hrtime_t new, delta;
-	ulong_t wcnt;
-
-	new = gethrtime();
-	delta = new - kiop->wlastupdate;
-	kiop->wlastupdate = new;
-	wcnt = kiop->wcnt++;
-	if (wcnt != 0) {
-		kiop->wlentime += delta * wcnt;
-		kiop->wtime += delta;
-	}
-}
-EXPORT_SYMBOL(kstat_waitq_enter);
-
-void
-kstat_waitq_exit(kstat_io_t *kiop)
-{
-	hrtime_t new, delta;
-	ulong_t wcnt;
-
-	new = gethrtime();
-	delta = new - kiop->wlastupdate;
-	kiop->wlastupdate = new;
-	wcnt = kiop->wcnt--;
-	ASSERT((int)wcnt > 0);
-	kiop->wlentime += delta * wcnt;
-	kiop->wtime += delta;
-}
-EXPORT_SYMBOL(kstat_waitq_exit);
-
-void
-kstat_runq_enter(kstat_io_t *kiop)
-{
-	hrtime_t new, delta;
-	ulong_t rcnt;
-
-	new = gethrtime();
-	delta = new - kiop->rlastupdate;
-	kiop->rlastupdate = new;
-	rcnt = kiop->rcnt++;
-	if (rcnt != 0) {
-		kiop->rlentime += delta * rcnt;
-		kiop->rtime += delta;
-	}
-}
-EXPORT_SYMBOL(kstat_runq_enter);
-
-void
-kstat_runq_exit(kstat_io_t *kiop)
-{
-	hrtime_t new, delta;
-	ulong_t rcnt;
-
-	new = gethrtime();
-	delta = new - kiop->rlastupdate;
-	kiop->rlastupdate = new;
-	rcnt = kiop->rcnt--;
-	ASSERT((int)rcnt > 0);
-	kiop->rlentime += delta * rcnt;
-	kiop->rtime += delta;
-}
-EXPORT_SYMBOL(kstat_runq_exit);
-
-static int
-kstat_seq_show_headers(struct seq_file *f)
-{
-	kstat_t *ksp = (kstat_t *)f->private;
-	int rc = 0;
-
-	ASSERT(ksp->ks_magic == KS_MAGIC);
-
-	seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
-	    ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
-	    ksp->ks_ndata, (int)ksp->ks_data_size,
-	    ksp->ks_crtime, ksp->ks_snaptime);
-
-	switch (ksp->ks_type) {
-		case KSTAT_TYPE_RAW:
-restart:
-			if (ksp->ks_raw_ops.headers) {
-				rc = ksp->ks_raw_ops.headers(
-				    ksp->ks_raw_buf, ksp->ks_raw_bufsize);
-				if (rc == ENOMEM && !kstat_resize_raw(ksp))
-					goto restart;
-				if (!rc)
-					seq_puts(f, ksp->ks_raw_buf);
-			} else {
-				seq_printf(f, "raw data\n");
-			}
-			break;
-		case KSTAT_TYPE_NAMED:
-			seq_printf(f, "%-31s %-4s %s\n",
-			    "name", "type", "data");
-			break;
-		case KSTAT_TYPE_INTR:
-			seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
-			    "hard", "soft", "watchdog",
-			    "spurious", "multsvc");
-			break;
-		case KSTAT_TYPE_IO:
-			seq_printf(f,
-			    "%-8s %-8s %-8s %-8s %-8s %-8s "
-			    "%-8s %-8s %-8s %-8s %-8s %-8s\n",
-			    "nread", "nwritten", "reads", "writes",
-			    "wtime", "wlentime", "wupdate",
-			    "rtime", "rlentime", "rupdate",
-			    "wcnt", "rcnt");
-			break;
-		case KSTAT_TYPE_TIMER:
-			seq_printf(f,
-			    "%-31s %-8s "
-			    "%-8s %-8s %-8s %-8s %-8s\n",
-			    "name", "events", "elapsed",
-			    "min", "max", "start", "stop");
-			break;
-		default:
-			PANIC("Undefined kstat type %d\n", ksp->ks_type);
-	}
-
-	return (-rc);
-}
-
-static int
-kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
-{
-	int i, j;
-
-	for (i = 0; ; i++) {
-		seq_printf(f, "%03x:", i);
-
-		for (j = 0; j < 16; j++) {
-			if (i * 16 + j >= l) {
-				seq_printf(f, "\n");
-				goto out;
-			}
-
-			seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
-		}
-		seq_printf(f, "\n");
-	}
-out:
-	return (0);
-}
-
-static int
-kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
-{
-	seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
-
-	switch (knp->data_type) {
-		case KSTAT_DATA_CHAR:
-			knp->value.c[15] = '\0'; /* NULL terminate */
-			seq_printf(f, "%-16s", knp->value.c);
-			break;
-		/*
-		 * NOTE - We need to be more careful able what tokens are
-		 * used for each arch, for now this is correct for x86_64.
-		 */
-		case KSTAT_DATA_INT32:
-			seq_printf(f, "%d", knp->value.i32);
-			break;
-		case KSTAT_DATA_UINT32:
-			seq_printf(f, "%u", knp->value.ui32);
-			break;
-		case KSTAT_DATA_INT64:
-			seq_printf(f, "%lld", (signed long long)knp->value.i64);
-			break;
-		case KSTAT_DATA_UINT64:
-			seq_printf(f, "%llu",
-			    (unsigned long long)knp->value.ui64);
-			break;
-		case KSTAT_DATA_LONG:
-			seq_printf(f, "%ld", knp->value.l);
-			break;
-		case KSTAT_DATA_ULONG:
-			seq_printf(f, "%lu", knp->value.ul);
-			break;
-		case KSTAT_DATA_STRING:
-			KSTAT_NAMED_STR_PTR(knp)
-				[KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
-			seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
-			break;
-		default:
-			PANIC("Undefined kstat data type %d\n", knp->data_type);
-	}
-
-	seq_printf(f, "\n");
-
-	return (0);
-}
-
-static int
-kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
-{
-	seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
-	    kip->intrs[KSTAT_INTR_HARD],
-	    kip->intrs[KSTAT_INTR_SOFT],
-	    kip->intrs[KSTAT_INTR_WATCHDOG],
-	    kip->intrs[KSTAT_INTR_SPURIOUS],
-	    kip->intrs[KSTAT_INTR_MULTSVC]);
-
-	return (0);
-}
-
-static int
-kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
-{
-	/* though wlentime & friends are signed, they will never be negative */
-	seq_printf(f,
-	    "%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
-	    "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
-	    kip->nread, kip->nwritten,
-	    kip->reads, kip->writes,
-	    kip->wtime, kip->wlentime, kip->wlastupdate,
-	    kip->rtime, kip->rlentime, kip->rlastupdate,
-	    kip->wcnt,  kip->rcnt);
-
-	return (0);
-}
-
-static int
-kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
-{
-	seq_printf(f,
-	    "%-31s %-8llu %-8llu %-8llu %-8llu %-8llu %-8llu\n",
-	    ktp->name, ktp->num_events, ktp->elapsed_time,
-	    ktp->min_time, ktp->max_time,
-	    ktp->start_time, ktp->stop_time);
-
-	return (0);
-}
-
-static int
-kstat_seq_show(struct seq_file *f, void *p)
-{
-	kstat_t *ksp = (kstat_t *)f->private;
-	int rc = 0;
-
-	ASSERT(ksp->ks_magic == KS_MAGIC);
-
-	switch (ksp->ks_type) {
-		case KSTAT_TYPE_RAW:
-restart:
-			if (ksp->ks_raw_ops.data) {
-				rc = ksp->ks_raw_ops.data(
-				    ksp->ks_raw_buf, ksp->ks_raw_bufsize, p);
-				if (rc == ENOMEM && !kstat_resize_raw(ksp))
-					goto restart;
-				if (!rc)
-					seq_puts(f, ksp->ks_raw_buf);
-			} else {
-				ASSERT(ksp->ks_ndata == 1);
-				rc = kstat_seq_show_raw(f, ksp->ks_data,
-				    ksp->ks_data_size);
-			}
-			break;
-		case KSTAT_TYPE_NAMED:
-			rc = kstat_seq_show_named(f, (kstat_named_t *)p);
-			break;
-		case KSTAT_TYPE_INTR:
-			rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
-			break;
-		case KSTAT_TYPE_IO:
-			rc = kstat_seq_show_io(f, (kstat_io_t *)p);
-			break;
-		case KSTAT_TYPE_TIMER:
-			rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
-			break;
-		default:
-			PANIC("Undefined kstat type %d\n", ksp->ks_type);
-	}
-
-	return (-rc);
-}
-
-static int
-kstat_default_update(kstat_t *ksp, int rw)
-{
-	ASSERT(ksp != NULL);
-
-	if (rw == KSTAT_WRITE)
-		return (EACCES);
-
-	return (0);
-}
-
-static void *
-kstat_seq_data_addr(kstat_t *ksp, loff_t n)
-{
-	void *rc = NULL;
-
-	switch (ksp->ks_type) {
-		case KSTAT_TYPE_RAW:
-			if (ksp->ks_raw_ops.addr)
-				rc = ksp->ks_raw_ops.addr(ksp, n);
-			else
-				rc = ksp->ks_data;
-			break;
-		case KSTAT_TYPE_NAMED:
-			rc = ksp->ks_data + n * sizeof (kstat_named_t);
-			break;
-		case KSTAT_TYPE_INTR:
-			rc = ksp->ks_data + n * sizeof (kstat_intr_t);
-			break;
-		case KSTAT_TYPE_IO:
-			rc = ksp->ks_data + n * sizeof (kstat_io_t);
-			break;
-		case KSTAT_TYPE_TIMER:
-			rc = ksp->ks_data + n * sizeof (kstat_timer_t);
-			break;
-		default:
-			PANIC("Undefined kstat type %d\n", ksp->ks_type);
-	}
-
-	return (rc);
-}
-
-static void *
-kstat_seq_start(struct seq_file *f, loff_t *pos)
-{
-	loff_t n = *pos;
-	kstat_t *ksp = (kstat_t *)f->private;
-	ASSERT(ksp->ks_magic == KS_MAGIC);
-
-	mutex_enter(ksp->ks_lock);
-
-	if (ksp->ks_type == KSTAT_TYPE_RAW) {
-		ksp->ks_raw_bufsize = PAGE_SIZE;
-		ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
-	}
-
-	/* Dynamically update kstat, on error existing kstats are used */
-	(void) ksp->ks_update(ksp, KSTAT_READ);
-
-	ksp->ks_snaptime = gethrtime();
-
-	if (!(ksp->ks_flags & KSTAT_FLAG_NO_HEADERS) && !n &&
-	    kstat_seq_show_headers(f))
-		return (NULL);
-
-	if (n >= ksp->ks_ndata)
-		return (NULL);
-
-	return (kstat_seq_data_addr(ksp, n));
-}
-
-static void *
-kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
-	kstat_t *ksp = (kstat_t *)f->private;
-	ASSERT(ksp->ks_magic == KS_MAGIC);
-
-	++*pos;
-	if (*pos >= ksp->ks_ndata)
-		return (NULL);
-
-	return (kstat_seq_data_addr(ksp, *pos));
-}
-
-static void
-kstat_seq_stop(struct seq_file *f, void *v)
-{
-	kstat_t *ksp = (kstat_t *)f->private;
-	ASSERT(ksp->ks_magic == KS_MAGIC);
-
-	if (ksp->ks_type == KSTAT_TYPE_RAW)
-		vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
-
-	mutex_exit(ksp->ks_lock);
-}
-
-static struct seq_operations kstat_seq_ops = {
-	.show  = kstat_seq_show,
-	.start = kstat_seq_start,
-	.next  = kstat_seq_next,
-	.stop  = kstat_seq_stop,
-};
-
-static kstat_module_t *
-kstat_find_module(char *name)
-{
-	kstat_module_t *module = NULL;
-
-	list_for_each_entry(module, &kstat_module_list, ksm_module_list) {
-		if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0)
-			return (module);
-	}
-
-	return (NULL);
-}
-
-static kstat_module_t *
-kstat_create_module(char *name)
-{
-	kstat_module_t *module;
-	struct proc_dir_entry *pde;
-
-	pde = proc_mkdir(name, proc_spl_kstat);
-	if (pde == NULL)
-		return (NULL);
-
-	module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
-	module->ksm_proc = pde;
-	strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
-	INIT_LIST_HEAD(&module->ksm_kstat_list);
-	list_add_tail(&module->ksm_module_list, &kstat_module_list);
-
-	return (module);
-
-}
-
-static void
-kstat_delete_module(kstat_module_t *module)
-{
-	ASSERT(list_empty(&module->ksm_kstat_list));
-	remove_proc_entry(module->ksm_name, proc_spl_kstat);
-	list_del(&module->ksm_module_list);
-	kmem_free(module, sizeof (kstat_module_t));
-}
-
-static int
-proc_kstat_open(struct inode *inode, struct file *filp)
-{
-	struct seq_file *f;
-	int rc;
-
-	rc = seq_open(filp, &kstat_seq_ops);
-	if (rc)
-		return (rc);
-
-	f = filp->private_data;
-	f->private = PDE_DATA(inode);
-
-	return (rc);
-}
-
-static ssize_t
-proc_kstat_write(struct file *filp, const char __user *buf, size_t len,
-    loff_t *ppos)
-{
-	struct seq_file *f = filp->private_data;
-	kstat_t *ksp = f->private;
-	int rc;
-
-	ASSERT(ksp->ks_magic == KS_MAGIC);
-
-	mutex_enter(ksp->ks_lock);
-	rc = ksp->ks_update(ksp, KSTAT_WRITE);
-	mutex_exit(ksp->ks_lock);
-
-	if (rc)
-		return (-rc);
-
-	*ppos += len;
-	return (len);
-}
-
-static const kstat_proc_op_t proc_kstat_operations = {
-#ifdef HAVE_PROC_OPS_STRUCT
-	.proc_open	= proc_kstat_open,
-	.proc_write	= proc_kstat_write,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-#else
-	.open		= proc_kstat_open,
-	.write		= proc_kstat_write,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-#endif
-};
-
-void
-__kstat_set_raw_ops(kstat_t *ksp,
-    int (*headers)(char *buf, size_t size),
-    int (*data)(char *buf, size_t size, void *data),
-    void *(*addr)(kstat_t *ksp, loff_t index))
-{
-	ksp->ks_raw_ops.headers = headers;
-	ksp->ks_raw_ops.data    = data;
-	ksp->ks_raw_ops.addr    = addr;
-}
-EXPORT_SYMBOL(__kstat_set_raw_ops);
-
-void
-kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
-    const char *name)
-{
-	kpep->kpe_owner = NULL;
-	kpep->kpe_proc = NULL;
-	INIT_LIST_HEAD(&kpep->kpe_list);
-	strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
-	strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
-}
-EXPORT_SYMBOL(kstat_proc_entry_init);
-
-kstat_t *
-__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
-    const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
-    uchar_t ks_flags)
-{
-	kstat_t *ksp;
-
-	ASSERT(ks_module);
-	ASSERT(ks_instance == 0);
-	ASSERT(ks_name);
-
-	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
-		ASSERT(ks_ndata == 1);
-
-	ksp = kmem_zalloc(sizeof (*ksp), KM_SLEEP);
-	if (ksp == NULL)
-		return (ksp);
-
-	mutex_enter(&kstat_module_lock);
-	ksp->ks_kid = kstat_id;
-	kstat_id++;
-	mutex_exit(&kstat_module_lock);
-
-	ksp->ks_magic = KS_MAGIC;
-	mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
-	ksp->ks_lock = &ksp->ks_private_lock;
-
-	ksp->ks_crtime = gethrtime();
-	ksp->ks_snaptime = ksp->ks_crtime;
-	ksp->ks_instance = ks_instance;
-	strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
-	ksp->ks_type = ks_type;
-	ksp->ks_flags = ks_flags;
-	ksp->ks_update = kstat_default_update;
-	ksp->ks_private = NULL;
-	ksp->ks_raw_ops.headers = NULL;
-	ksp->ks_raw_ops.data = NULL;
-	ksp->ks_raw_ops.addr = NULL;
-	ksp->ks_raw_buf = NULL;
-	ksp->ks_raw_bufsize = 0;
-	kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
-
-	switch (ksp->ks_type) {
-		case KSTAT_TYPE_RAW:
-			ksp->ks_ndata = 1;
-			ksp->ks_data_size = ks_ndata;
-			break;
-		case KSTAT_TYPE_NAMED:
-			ksp->ks_ndata = ks_ndata;
-			ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
-			break;
-		case KSTAT_TYPE_INTR:
-			ksp->ks_ndata = ks_ndata;
-			ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
-			break;
-		case KSTAT_TYPE_IO:
-			ksp->ks_ndata = ks_ndata;
-			ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
-			break;
-		case KSTAT_TYPE_TIMER:
-			ksp->ks_ndata = ks_ndata;
-			ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
-			break;
-		default:
-			PANIC("Undefined kstat type %d\n", ksp->ks_type);
-	}
-
-	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
-		ksp->ks_data = NULL;
-	} else {
-		ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
-		if (ksp->ks_data == NULL) {
-			kmem_free(ksp, sizeof (*ksp));
-			ksp = NULL;
-		}
-	}
-
-	return (ksp);
-}
-EXPORT_SYMBOL(__kstat_create);
-
-static int
-kstat_detect_collision(kstat_proc_entry_t *kpep)
-{
-	kstat_module_t *module;
-	kstat_proc_entry_t *tmp = NULL;
-	char *parent;
-	char *cp;
-
-	parent = kmem_asprintf("%s", kpep->kpe_module);
-
-	if ((cp = strrchr(parent, '/')) == NULL) {
-		strfree(parent);
-		return (0);
-	}
-
-	cp[0] = '\0';
-	if ((module = kstat_find_module(parent)) != NULL) {
-		list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
-			if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
-				strfree(parent);
-				return (EEXIST);
-			}
-		}
-	}
-
-	strfree(parent);
-	return (0);
-}
-
-/*
- * Add a file to the proc filesystem under the kstat namespace (i.e.
- * /proc/spl/kstat/). The file need not necessarily be implemented as a
- * kstat.
- */
-void
-kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
-    const kstat_proc_op_t *proc_ops, void *data)
-{
-	kstat_module_t *module;
-	kstat_proc_entry_t *tmp = NULL;
-
-	ASSERT(kpep);
-
-	mutex_enter(&kstat_module_lock);
-
-	module = kstat_find_module(kpep->kpe_module);
-	if (module == NULL) {
-		if (kstat_detect_collision(kpep) != 0) {
-			cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
-			    " collision", kpep->kpe_module, kpep->kpe_name);
-			goto out;
-		}
-		module = kstat_create_module(kpep->kpe_module);
-		if (module == NULL)
-			goto out;
-	}
-
-	/*
-	 * Only one entry by this name per-module, on failure the module
-	 * shouldn't be deleted because we know it has at least one entry.
-	 */
-	list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
-		if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
-			goto out;
-	}
-
-	list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
-
-	kpep->kpe_owner = module;
-	kpep->kpe_proc = proc_create_data(kpep->kpe_name, mode,
-	    module->ksm_proc, proc_ops, data);
-	if (kpep->kpe_proc == NULL) {
-		list_del_init(&kpep->kpe_list);
-		if (list_empty(&module->ksm_kstat_list))
-			kstat_delete_module(module);
-	}
-out:
-	mutex_exit(&kstat_module_lock);
-
-}
-EXPORT_SYMBOL(kstat_proc_entry_install);
-
-void
-__kstat_install(kstat_t *ksp)
-{
-	ASSERT(ksp);
-	mode_t mode;
-	/* Specify permission modes for different kstats */
-	if (strncmp(ksp->ks_proc.kpe_name, "dbufs", KSTAT_STRLEN) == 0) {
-		mode = 0600;
-	} else {
-		mode = 0644;
-	}
-	kstat_proc_entry_install(
-	    &ksp->ks_proc, mode, &proc_kstat_operations, ksp);
-}
-EXPORT_SYMBOL(__kstat_install);
-
-void
-kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
-{
-	kstat_module_t *module = kpep->kpe_owner;
-	if (kpep->kpe_proc)
-		remove_proc_entry(kpep->kpe_name, module->ksm_proc);
-
-	mutex_enter(&kstat_module_lock);
-	list_del_init(&kpep->kpe_list);
-
-	/*
-	 * Remove top level module directory if it wasn't empty before, but now
-	 * is.
-	 */
-	if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
-		kstat_delete_module(module);
-	mutex_exit(&kstat_module_lock);
-
-}
-EXPORT_SYMBOL(kstat_proc_entry_delete);
-
-void
-__kstat_delete(kstat_t *ksp)
-{
-	kstat_proc_entry_delete(&ksp->ks_proc);
-
-	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
-		kmem_free(ksp->ks_data, ksp->ks_data_size);
-
-	ksp->ks_lock = NULL;
-	mutex_destroy(&ksp->ks_private_lock);
-	kmem_free(ksp, sizeof (*ksp));
-}
-EXPORT_SYMBOL(__kstat_delete);
-
-int
-spl_kstat_init(void)
-{
-	mutex_init(&kstat_module_lock, NULL, MUTEX_DEFAULT, NULL);
-	INIT_LIST_HEAD(&kstat_module_list);
-	kstat_id = 0;
-	return (0);
-}
-
-void
-spl_kstat_fini(void)
-{
-	ASSERT(list_empty(&kstat_module_list));
-	mutex_destroy(&kstat_module_lock);
-}

diff --git a/zfs/module/spl/spl-proc.c b/zfs/module/spl/spl-proc.c
deleted file mode 100644
index 40315ed..0000000
--- a/zfs/module/spl/spl-proc.c
+++ /dev/null

@@ -1,819 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Proc Implementation.
- */
-
-#include <sys/systeminfo.h>
-#include <sys/kstat.h>
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/vmem.h>
-#include <sys/taskq.h>
-#include <sys/proc.h>
-#include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/seq_file.h>
-#include <linux/uaccess.h>
-#include <linux/version.h>
-
-#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
-typedef struct ctl_table __no_const spl_ctl_table;
-#else
-typedef struct ctl_table spl_ctl_table;
-#endif
-
-static unsigned long table_min = 0;
-static unsigned long table_max = ~0;
-
-static struct ctl_table_header *spl_header = NULL;
-static struct proc_dir_entry *proc_spl = NULL;
-static struct proc_dir_entry *proc_spl_kmem = NULL;
-static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
-static struct proc_dir_entry *proc_spl_taskq_all = NULL;
-static struct proc_dir_entry *proc_spl_taskq = NULL;
-struct proc_dir_entry *proc_spl_kstat = NULL;
-
-static int
-proc_copyin_string(char *kbuffer, int kbuffer_size, const char *ubuffer,
-    int ubuffer_size)
-{
-	int size;
-
-	if (ubuffer_size > kbuffer_size)
-		return (-EOVERFLOW);
-
-	if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
-		return (-EFAULT);
-
-	/* strip trailing whitespace */
-	size = strnlen(kbuffer, ubuffer_size);
-	while (size-- >= 0)
-		if (!isspace(kbuffer[size]))
-			break;
-
-	/* empty string */
-	if (size < 0)
-		return (-EINVAL);
-
-	/* no space to terminate */
-	if (size == kbuffer_size)
-		return (-EOVERFLOW);
-
-	kbuffer[size + 1] = 0;
-	return (0);
-}
-
-static int
-proc_copyout_string(char *ubuffer, int ubuffer_size, const char *kbuffer,
-    char *append)
-{
-	/*
-	 * NB if 'append' != NULL, it's a single character to append to the
-	 * copied out string - usually "\n", for /proc entries and
-	 * (i.e. a terminating zero byte) for sysctl entries
-	 */
-	int size = MIN(strlen(kbuffer), ubuffer_size);
-
-	if (copy_to_user(ubuffer, kbuffer, size))
-		return (-EFAULT);
-
-	if (append != NULL && size < ubuffer_size) {
-		if (copy_to_user(ubuffer + size, append, 1))
-			return (-EFAULT);
-
-		size++;
-	}
-
-	return (size);
-}
-
-#ifdef DEBUG_KMEM
-static int
-proc_domemused(struct ctl_table *table, int write,
-    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int rc = 0;
-	unsigned long min = 0, max = ~0, val;
-	spl_ctl_table dummy = *table;
-
-	dummy.data = &val;
-	dummy.proc_handler = &proc_dointvec;
-	dummy.extra1 = &min;
-	dummy.extra2 = &max;
-
-	if (write) {
-		*ppos += *lenp;
-	} else {
-#ifdef HAVE_ATOMIC64_T
-		val = atomic64_read((atomic64_t *)table->data);
-#else
-		val = atomic_read((atomic_t *)table->data);
-#endif /* HAVE_ATOMIC64_T */
-		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
-	}
-
-	return (rc);
-}
-#endif /* DEBUG_KMEM */
-
-static int
-proc_doslab(struct ctl_table *table, int write,
-    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int rc = 0;
-	unsigned long min = 0, max = ~0, val = 0, mask;
-	spl_ctl_table dummy = *table;
-	spl_kmem_cache_t *skc = NULL;
-
-	dummy.data = &val;
-	dummy.proc_handler = &proc_dointvec;
-	dummy.extra1 = &min;
-	dummy.extra2 = &max;
-
-	if (write) {
-		*ppos += *lenp;
-	} else {
-		down_read(&spl_kmem_cache_sem);
-		mask = (unsigned long)table->data;
-
-		list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
-
-			/* Only use slabs of the correct kmem/vmem type */
-			if (!(skc->skc_flags & mask))
-				continue;
-
-			/* Sum the specified field for selected slabs */
-			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
-			case KMC_TOTAL:
-				val += skc->skc_slab_size * skc->skc_slab_total;
-				break;
-			case KMC_ALLOC:
-				val += skc->skc_obj_size * skc->skc_obj_alloc;
-				break;
-			case KMC_MAX:
-				val += skc->skc_obj_size * skc->skc_obj_max;
-				break;
-			}
-		}
-
-		up_read(&spl_kmem_cache_sem);
-		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
-	}
-
-	return (rc);
-}
-
-static int
-proc_dohostid(struct ctl_table *table, int write,
-    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int len, rc = 0;
-	char *end, str[32];
-
-	if (write) {
-		/*
-		 * We can't use proc_doulongvec_minmax() in the write
-		 * case here because hostid while a hex value has no
-		 * leading 0x which confuses the helper function.
-		 */
-		rc = proc_copyin_string(str, sizeof (str), buffer, *lenp);
-		if (rc < 0)
-			return (rc);
-
-		spl_hostid = simple_strtoul(str, &end, 16);
-		if (str == end)
-			return (-EINVAL);
-
-	} else {
-		len = snprintf(str, sizeof (str), "%lx",
-		    (unsigned long) zone_get_hostid(NULL));
-		if (*ppos >= len)
-			rc = 0;
-		else
-			rc = proc_copyout_string(buffer,
-			    *lenp, str + *ppos, "\n");
-
-		if (rc >= 0) {
-			*lenp = rc;
-			*ppos += rc;
-		}
-	}
-
-	return (rc);
-}
-
-static void
-taskq_seq_show_headers(struct seq_file *f)
-{
-	seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
-	    "taskq", "act", "nthr", "spwn", "maxt", "pri",
-	    "mina", "maxa", "cura", "flags");
-}
-
-/* indices into the lheads array below */
-#define	LHEAD_PEND	0
-#define	LHEAD_PRIO	1
-#define	LHEAD_DELAY	2
-#define	LHEAD_WAIT	3
-#define	LHEAD_ACTIVE	4
-#define	LHEAD_SIZE	5
-
-/* BEGIN CSTYLED */
-static unsigned int spl_max_show_tasks = 512;
-module_param(spl_max_show_tasks, uint, 0644);
-MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
-/* END CSTYLED */
-
-static int
-taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
-{
-	taskq_t *tq = p;
-	taskq_thread_t *tqt = NULL;
-	spl_wait_queue_entry_t *wq;
-	struct task_struct *tsk;
-	taskq_ent_t *tqe;
-	char name[100];
-	struct list_head *lheads[LHEAD_SIZE], *lh;
-	static char *list_names[LHEAD_SIZE] =
-	    {"pend", "prio", "delay", "wait", "active" };
-	int i, j, have_lheads = 0;
-	unsigned long wflags, flags;
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
-
-	/* get the various lists and check whether they're empty */
-	lheads[LHEAD_PEND] = &tq->tq_pend_list;
-	lheads[LHEAD_PRIO] = &tq->tq_prio_list;
-	lheads[LHEAD_DELAY] = &tq->tq_delay_list;
-#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
-	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
-#else
-	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
-#endif
-	lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
-
-	for (i = 0; i < LHEAD_SIZE; ++i) {
-		if (list_empty(lheads[i]))
-			lheads[i] = NULL;
-		else
-			++have_lheads;
-	}
-
-	/* early return in non-"all" mode if lists are all empty */
-	if (!allflag && !have_lheads) {
-		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
-		spin_unlock_irqrestore(&tq->tq_lock, flags);
-		return (0);
-	}
-
-	/* unlock the waitq quickly */
-	if (!lheads[LHEAD_WAIT])
-		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
-
-	/* show the base taskq contents */
-	snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
-	seq_printf(f, "%-25s ", name);
-	seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
-	    tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
-	    tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
-	    tq->tq_nalloc, tq->tq_flags);
-
-	/* show the active list */
-	if (lheads[LHEAD_ACTIVE]) {
-		j = 0;
-		list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
-			if (j == 0)
-				seq_printf(f, "\t%s:",
-				    list_names[LHEAD_ACTIVE]);
-			else if (j == 2) {
-				seq_printf(f, "\n\t       ");
-				j = 0;
-			}
-			seq_printf(f, " [%d]%pf(%ps)",
-			    tqt->tqt_thread->pid,
-			    tqt->tqt_task->tqent_func,
-			    tqt->tqt_task->tqent_arg);
-			++j;
-		}
-		seq_printf(f, "\n");
-	}
-
-	for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
-		if (lheads[i]) {
-			j = 0;
-			list_for_each(lh, lheads[i]) {
-				if (spl_max_show_tasks != 0 &&
-				    j >= spl_max_show_tasks) {
-					seq_printf(f, "\n\t(truncated)");
-					break;
-				}
-				/* show the wait waitq list */
-				if (i == LHEAD_WAIT) {
-#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
-					wq = list_entry(lh,
-					    spl_wait_queue_entry_t, entry);
-#else
-					wq = list_entry(lh,
-					    spl_wait_queue_entry_t, task_list);
-#endif
-					if (j == 0)
-						seq_printf(f, "\t%s:",
-						    list_names[i]);
-					else if (j % 8 == 0)
-						seq_printf(f, "\n\t     ");
-
-					tsk = wq->private;
-					seq_printf(f, " %d", tsk->pid);
-				/* pend, prio and delay lists */
-				} else {
-					tqe = list_entry(lh, taskq_ent_t,
-					    tqent_list);
-					if (j == 0)
-						seq_printf(f, "\t%s:",
-						    list_names[i]);
-					else if (j % 2 == 0)
-						seq_printf(f, "\n\t     ");
-
-					seq_printf(f, " %pf(%ps)",
-					    tqe->tqent_func,
-					    tqe->tqent_arg);
-				}
-				++j;
-			}
-			seq_printf(f, "\n");
-		}
-	if (lheads[LHEAD_WAIT])
-		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	return (0);
-}
-
-static int
-taskq_all_seq_show(struct seq_file *f, void *p)
-{
-	return (taskq_seq_show_impl(f, p, B_TRUE));
-}
-
-static int
-taskq_seq_show(struct seq_file *f, void *p)
-{
-	return (taskq_seq_show_impl(f, p, B_FALSE));
-}
-
-static void *
-taskq_seq_start(struct seq_file *f, loff_t *pos)
-{
-	struct list_head *p;
-	loff_t n = *pos;
-
-	down_read(&tq_list_sem);
-	if (!n)
-		taskq_seq_show_headers(f);
-
-	p = tq_list.next;
-	while (n--) {
-		p = p->next;
-		if (p == &tq_list)
-		return (NULL);
-	}
-
-	return (list_entry(p, taskq_t, tq_taskqs));
-}
-
-static void *
-taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
-	taskq_t *tq = p;
-
-	++*pos;
-	return ((tq->tq_taskqs.next == &tq_list) ?
-	    NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
-}
-
-static void
-slab_seq_show_headers(struct seq_file *f)
-{
-	seq_printf(f,
-	    "--------------------- cache ----------"
-	    "---------------------------------------------  "
-	    "----- slab ------  "
-	    "---- object -----  "
-	    "--- emergency ---\n");
-	seq_printf(f,
-	    "name                                  "
-	    "  flags      size     alloc slabsize  objsize  "
-	    "total alloc   max  "
-	    "total alloc   max  "
-	    "dlock alloc   max\n");
-}
-
-static int
-slab_seq_show(struct seq_file *f, void *p)
-{
-	spl_kmem_cache_t *skc = p;
-
-	ASSERT(skc->skc_magic == SKC_MAGIC);
-
-	if (skc->skc_flags & KMC_SLAB) {
-		/*
-		 * This cache is backed by a generic Linux kmem cache which
-		 * has its own accounting. For these caches we only track
-		 * the number of active allocated objects that exist within
-		 * the underlying Linux slabs. For the overall statistics of
-		 * the underlying Linux cache please refer to /proc/slabinfo.
-		 */
-		spin_lock(&skc->skc_lock);
-		seq_printf(f, "%-36s  ", skc->skc_name);
-		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
-		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
-		    (long unsigned)skc->skc_flags,
-		    "-",
-		    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
-		    "-",
-		    (unsigned)skc->skc_obj_size,
-		    "-", "-", "-", "-",
-		    (long unsigned)skc->skc_obj_alloc,
-		    "-", "-", "-", "-");
-		spin_unlock(&skc->skc_lock);
-		return (0);
-	}
-
-	spin_lock(&skc->skc_lock);
-	seq_printf(f, "%-36s  ", skc->skc_name);
-	seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
-	    "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
-	    (long unsigned)skc->skc_flags,
-	    (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
-	    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
-	    (unsigned)skc->skc_slab_size,
-	    (unsigned)skc->skc_obj_size,
-	    (long unsigned)skc->skc_slab_total,
-	    (long unsigned)skc->skc_slab_alloc,
-	    (long unsigned)skc->skc_slab_max,
-	    (long unsigned)skc->skc_obj_total,
-	    (long unsigned)skc->skc_obj_alloc,
-	    (long unsigned)skc->skc_obj_max,
-	    (long unsigned)skc->skc_obj_deadlock,
-	    (long unsigned)skc->skc_obj_emergency,
-	    (long unsigned)skc->skc_obj_emergency_max);
-	spin_unlock(&skc->skc_lock);
-	return (0);
-}
-
-static void *
-slab_seq_start(struct seq_file *f, loff_t *pos)
-{
-	struct list_head *p;
-	loff_t n = *pos;
-
-	down_read(&spl_kmem_cache_sem);
-	if (!n)
-		slab_seq_show_headers(f);
-
-	p = spl_kmem_cache_list.next;
-	while (n--) {
-		p = p->next;
-		if (p == &spl_kmem_cache_list)
-			return (NULL);
-	}
-
-	return (list_entry(p, spl_kmem_cache_t, skc_list));
-}
-
-static void *
-slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
-	spl_kmem_cache_t *skc = p;
-
-	++*pos;
-	return ((skc->skc_list.next == &spl_kmem_cache_list) ?
-	    NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
-}
-
-static void
-slab_seq_stop(struct seq_file *f, void *v)
-{
-	up_read(&spl_kmem_cache_sem);
-}
-
-static struct seq_operations slab_seq_ops = {
-	.show  = slab_seq_show,
-	.start = slab_seq_start,
-	.next  = slab_seq_next,
-	.stop  = slab_seq_stop,
-};
-
-static int
-proc_slab_open(struct inode *inode, struct file *filp)
-{
-	return (seq_open(filp, &slab_seq_ops));
-}
-
-static const kstat_proc_op_t proc_slab_operations = {
-#ifdef HAVE_PROC_OPS_STRUCT
-	.proc_open	= proc_slab_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-#else
-	.open		= proc_slab_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-#endif
-};
-
-static void
-taskq_seq_stop(struct seq_file *f, void *v)
-{
-	up_read(&tq_list_sem);
-}
-
-static struct seq_operations taskq_all_seq_ops = {
-	.show	= taskq_all_seq_show,
-	.start	= taskq_seq_start,
-	.next	= taskq_seq_next,
-	.stop	= taskq_seq_stop,
-};
-
-static struct seq_operations taskq_seq_ops = {
-	.show	= taskq_seq_show,
-	.start	= taskq_seq_start,
-	.next	= taskq_seq_next,
-	.stop	= taskq_seq_stop,
-};
-
-static int
-proc_taskq_all_open(struct inode *inode, struct file *filp)
-{
-	return (seq_open(filp, &taskq_all_seq_ops));
-}
-
-static int
-proc_taskq_open(struct inode *inode, struct file *filp)
-{
-	return (seq_open(filp, &taskq_seq_ops));
-}
-
-static const kstat_proc_op_t proc_taskq_all_operations = {
-#ifdef HAVE_PROC_OPS_STRUCT
-	.proc_open	= proc_taskq_all_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-#else
-	.open		= proc_taskq_all_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-#endif
-};
-
-static const kstat_proc_op_t proc_taskq_operations = {
-#ifdef HAVE_PROC_OPS_STRUCT
-	.proc_open	= proc_taskq_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-#else
-	.open		= proc_taskq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-#endif
-};
-
-static struct ctl_table spl_kmem_table[] = {
-#ifdef DEBUG_KMEM
-	{
-		.procname	= "kmem_used",
-		.data		= &kmem_alloc_used,
-#ifdef HAVE_ATOMIC64_T
-		.maxlen		= sizeof (atomic64_t),
-#else
-		.maxlen		= sizeof (atomic_t),
-#endif /* HAVE_ATOMIC64_T */
-		.mode		= 0444,
-		.proc_handler	= &proc_domemused,
-	},
-	{
-		.procname	= "kmem_max",
-		.data		= &kmem_alloc_max,
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler	= &proc_doulongvec_minmax,
-	},
-#endif /* DEBUG_KMEM */
-	{
-		.procname	= "slab_kmem_total",
-		.data		= (void *)(KMC_KMEM | KMC_TOTAL),
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler = &proc_doslab,
-	},
-	{
-		.procname	= "slab_kmem_alloc",
-		.data		= (void *)(KMC_KMEM | KMC_ALLOC),
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler	= &proc_doslab,
-	},
-	{
-		.procname	= "slab_kmem_max",
-		.data		= (void *)(KMC_KMEM | KMC_MAX),
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler	= &proc_doslab,
-	},
-	{
-		.procname	= "slab_vmem_total",
-		.data		= (void *)(KMC_VMEM | KMC_TOTAL),
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler	= &proc_doslab,
-	},
-	{
-		.procname	= "slab_vmem_alloc",
-		.data		= (void *)(KMC_VMEM | KMC_ALLOC),
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler	= &proc_doslab,
-	},
-	{
-		.procname	= "slab_vmem_max",
-		.data		= (void *)(KMC_VMEM | KMC_MAX),
-		.maxlen		= sizeof (unsigned long),
-		.extra1		= &table_min,
-		.extra2		= &table_max,
-		.mode		= 0444,
-		.proc_handler	= &proc_doslab,
-	},
-	{},
-};
-
-static struct ctl_table spl_kstat_table[] = {
-	{},
-};
-
-static struct ctl_table spl_table[] = {
-	/*
-	 * NB No .strategy entries have been provided since
-	 * sysctl(8) prefers to go via /proc for portability.
-	 */
-	{
-		.procname	= "gitrev",
-		.data		= spl_gitrev,
-		.maxlen		= sizeof (spl_gitrev),
-		.mode		= 0444,
-		.proc_handler	= &proc_dostring,
-	},
-	{
-		.procname	= "hostid",
-		.data		= &spl_hostid,
-		.maxlen		= sizeof (unsigned long),
-		.mode		= 0644,
-		.proc_handler	= &proc_dohostid,
-	},
-	{
-		.procname	= "kmem",
-		.mode		= 0555,
-		.child		= spl_kmem_table,
-	},
-	{
-		.procname	= "kstat",
-		.mode		= 0555,
-		.child		= spl_kstat_table,
-	},
-	{},
-};
-
-static struct ctl_table spl_dir[] = {
-	{
-		.procname	= "spl",
-		.mode		= 0555,
-		.child		= spl_table,
-	},
-	{}
-};
-
-static struct ctl_table spl_root[] = {
-	{
-#ifdef HAVE_CTL_NAME
-	.ctl_name = CTL_KERN,
-#endif
-	.procname = "kernel",
-	.mode = 0555,
-	.child = spl_dir,
-	},
-	{}
-};
-
-int
-spl_proc_init(void)
-{
-	int rc = 0;
-
-	spl_header = register_sysctl_table(spl_root);
-	if (spl_header == NULL)
-		return (-EUNATCH);
-
-	proc_spl = proc_mkdir("spl", NULL);
-	if (proc_spl == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-
-	proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
-	    &proc_taskq_all_operations, NULL);
-	if (proc_spl_taskq_all == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-
-	proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
-	    &proc_taskq_operations, NULL);
-	if (proc_spl_taskq == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-
-	proc_spl_kmem = proc_mkdir("kmem", proc_spl);
-	if (proc_spl_kmem == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-
-	proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
-	    &proc_slab_operations, NULL);
-	if (proc_spl_kmem_slab == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-
-	proc_spl_kstat = proc_mkdir("kstat", proc_spl);
-	if (proc_spl_kstat == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-out:
-	if (rc) {
-		remove_proc_entry("kstat", proc_spl);
-		remove_proc_entry("slab", proc_spl_kmem);
-		remove_proc_entry("kmem", proc_spl);
-		remove_proc_entry("taskq-all", proc_spl);
-		remove_proc_entry("taskq", proc_spl);
-		remove_proc_entry("spl", NULL);
-		unregister_sysctl_table(spl_header);
-	}
-
-	return (rc);
-}
-
-void
-spl_proc_fini(void)
-{
-	remove_proc_entry("kstat", proc_spl);
-	remove_proc_entry("slab", proc_spl_kmem);
-	remove_proc_entry("kmem", proc_spl);
-	remove_proc_entry("taskq-all", proc_spl);
-	remove_proc_entry("taskq", proc_spl);
-	remove_proc_entry("spl", NULL);
-
-	ASSERT(spl_header != NULL);
-	unregister_sysctl_table(spl_header);
-}

diff --git a/zfs/module/spl/spl-procfs-list.c b/zfs/module/spl/spl-procfs-list.c
deleted file mode 100644
index 000a54b..0000000
--- a/zfs/module/spl/spl-procfs-list.c
+++ /dev/null

@@ -1,276 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2018 by Delphix. All rights reserved.
- */
-
-#include <sys/list.h>
-#include <sys/mutex.h>
-#include <sys/procfs_list.h>
-#include <linux/proc_fs.h>
-
-/*
- * A procfs_list is a wrapper around a linked list which implements the seq_file
- * interface, allowing the contents of the list to be exposed through procfs.
- * The kernel already has some utilities to help implement the seq_file
- * interface for linked lists (seq_list_*), but they aren't appropriate for use
- * with lists that have many entries, because seq_list_start walks the list at
- * the start of each read syscall to find where it left off, so reading a file
- * ends up being quadratic in the number of entries in the list.
- *
- * This implementation avoids this penalty by maintaining a separate cursor into
- * the list per instance of the file that is open. It also maintains some extra
- * information in each node of the list to prevent reads of entries that have
- * been dropped from the list.
- *
- * Callers should only add elements to the list using procfs_list_add, which
- * adds an element to the tail of the list. Other operations can be performed
- * directly on the wrapped list using the normal list manipulation functions,
- * but elements should only be removed from the head of the list.
- */
-
-#define	NODE_ID(procfs_list, obj) \
-		(((procfs_list_node_t *)(((char *)obj) + \
-		(procfs_list)->pl_node_offset))->pln_id)
-
-typedef struct procfs_list_cursor {
-	procfs_list_t	*procfs_list;	/* List into which this cursor points */
-	void		*cached_node;	/* Most recently accessed node */
-	loff_t		cached_pos;	/* Position of cached_node */
-} procfs_list_cursor_t;
-
-static int
-procfs_list_seq_show(struct seq_file *f, void *p)
-{
-	procfs_list_cursor_t *cursor = f->private;
-	procfs_list_t *procfs_list = cursor->procfs_list;
-
-	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
-	if (p == SEQ_START_TOKEN) {
-		if (procfs_list->pl_show_header != NULL)
-			return (procfs_list->pl_show_header(f));
-		else
-			return (0);
-	}
-	return (procfs_list->pl_show(f, p));
-}
-
-static void *
-procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
-{
-	void *next_node;
-	procfs_list_t *procfs_list = cursor->procfs_list;
-
-	if (cursor->cached_node == SEQ_START_TOKEN)
-		next_node = list_head(&procfs_list->pl_list);
-	else
-		next_node = list_next(&procfs_list->pl_list,
-		    cursor->cached_node);
-
-	if (next_node != NULL) {
-		cursor->cached_node = next_node;
-		cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
-		*pos = cursor->cached_pos;
-	} else {
-		/*
-		 * seq_read() expects ->next() to update the position even
-		 * when there are no more entries. Advance the position to
-		 * prevent a warning from being logged.
-		 */
-		cursor->cached_node = NULL;
-		cursor->cached_pos++;
-		*pos = cursor->cached_pos;
-	}
-
-	return (next_node);
-}
-
-static void *
-procfs_list_seq_start(struct seq_file *f, loff_t *pos)
-{
-	procfs_list_cursor_t *cursor = f->private;
-	procfs_list_t *procfs_list = cursor->procfs_list;
-
-	mutex_enter(&procfs_list->pl_lock);
-
-	if (*pos == 0) {
-		cursor->cached_node = SEQ_START_TOKEN;
-		cursor->cached_pos = 0;
-		return (SEQ_START_TOKEN);
-	} else if (cursor->cached_node == NULL) {
-		return (NULL);
-	}
-
-	/*
-	 * Check if our cached pointer has become stale, which happens if the
-	 * the message where we left off has been dropped from the list since
-	 * the last read syscall completed.
-	 */
-	void *oldest_node = list_head(&procfs_list->pl_list);
-	if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
-	    NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
-		return (ERR_PTR(-EIO));
-
-	/*
-	 * If it isn't starting from the beginning of the file, the seq_file
-	 * code will either pick up at the same position it visited last or the
-	 * following one.
-	 */
-	if (*pos == cursor->cached_pos) {
-		return (cursor->cached_node);
-	} else {
-		ASSERT3U(*pos, ==, cursor->cached_pos + 1);
-		return (procfs_list_next_node(cursor, pos));
-	}
-}
-
-static void *
-procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
-	procfs_list_cursor_t *cursor = f->private;
-	ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
-	return (procfs_list_next_node(cursor, pos));
-}
-
-static void
-procfs_list_seq_stop(struct seq_file *f, void *p)
-{
-	procfs_list_cursor_t *cursor = f->private;
-	procfs_list_t *procfs_list = cursor->procfs_list;
-	mutex_exit(&procfs_list->pl_lock);
-}
-
-static struct seq_operations procfs_list_seq_ops = {
-	.show  = procfs_list_seq_show,
-	.start = procfs_list_seq_start,
-	.next  = procfs_list_seq_next,
-	.stop  = procfs_list_seq_stop,
-};
-
-static int
-procfs_list_open(struct inode *inode, struct file *filp)
-{
-	int rc = seq_open_private(filp, &procfs_list_seq_ops,
-	    sizeof (procfs_list_cursor_t));
-	if (rc != 0)
-		return (rc);
-
-	struct seq_file *f = filp->private_data;
-	procfs_list_cursor_t *cursor = f->private;
-	cursor->procfs_list = PDE_DATA(inode);
-	cursor->cached_node = NULL;
-	cursor->cached_pos = 0;
-
-	return (0);
-}
-
-static ssize_t
-procfs_list_write(struct file *filp, const char __user *buf, size_t len,
-    loff_t *ppos)
-{
-	struct seq_file *f = filp->private_data;
-	procfs_list_cursor_t *cursor = f->private;
-	procfs_list_t *procfs_list = cursor->procfs_list;
-	int rc;
-
-	if (procfs_list->pl_clear != NULL &&
-	    (rc = procfs_list->pl_clear(procfs_list)) != 0)
-		return (-rc);
-	return (len);
-}
-
-static const kstat_proc_op_t procfs_list_operations = {
-#ifdef HAVE_PROC_OPS_STRUCT
-	.proc_open	= procfs_list_open,
-	.proc_write	= procfs_list_write,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release_private,
-#else
-	.open		= procfs_list_open,
-	.write		= procfs_list_write,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-#endif
-};
-
-/*
- * Initialize a procfs_list and create a file for it in the proc filesystem
- * under the kstat namespace.
- */
-void
-procfs_list_install(const char *module,
-    const char *name,
-    mode_t mode,
-    procfs_list_t *procfs_list,
-    int (*show)(struct seq_file *f, void *p),
-    int (*show_header)(struct seq_file *f),
-    int (*clear)(procfs_list_t *procfs_list),
-    size_t procfs_list_node_off)
-{
-	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&procfs_list->pl_list,
-	    procfs_list_node_off + sizeof (procfs_list_node_t),
-	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
-	procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
-	procfs_list->pl_show = show;
-	procfs_list->pl_show_header = show_header;
-	procfs_list->pl_clear = clear;
-	procfs_list->pl_node_offset = procfs_list_node_off;
-
-	kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name);
-	kstat_proc_entry_install(&procfs_list->pl_kstat_entry, mode,
-	    &procfs_list_operations, procfs_list);
-}
-EXPORT_SYMBOL(procfs_list_install);
-
-/* Remove the proc filesystem file corresponding to the given list */
-void
-procfs_list_uninstall(procfs_list_t *procfs_list)
-{
-	kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
-}
-EXPORT_SYMBOL(procfs_list_uninstall);
-
-void
-procfs_list_destroy(procfs_list_t *procfs_list)
-{
-	ASSERT(list_is_empty(&procfs_list->pl_list));
-	list_destroy(&procfs_list->pl_list);
-	mutex_destroy(&procfs_list->pl_lock);
-}
-EXPORT_SYMBOL(procfs_list_destroy);
-
-/*
- * Add a new node to the tail of the list. While the standard list manipulation
- * functions can be use for all other operation, adding elements to the list
- * should only be done using this helper so that the id of the new node is set
- * correctly.
- */
-void
-procfs_list_add(procfs_list_t *procfs_list, void *p)
-{
-	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
-	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
-	list_insert_tail(&procfs_list->pl_list, p);
-}
-EXPORT_SYMBOL(procfs_list_add);

diff --git a/zfs/module/spl/spl-taskq.c b/zfs/module/spl/spl-taskq.c
deleted file mode 100644
index a65c956..0000000
--- a/zfs/module/spl/spl-taskq.c
+++ /dev/null

@@ -1,1290 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Task Queue Implementation.
- */
-
-#include <sys/timer.h>
-#include <sys/taskq.h>
-#include <sys/kmem.h>
-#include <sys/tsd.h>
-
-int spl_taskq_thread_bind = 0;
-module_param(spl_taskq_thread_bind, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
-
-
-int spl_taskq_thread_dynamic = 1;
-module_param(spl_taskq_thread_dynamic, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
-
-int spl_taskq_thread_priority = 1;
-module_param(spl_taskq_thread_priority, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_priority,
-	"Allow non-default priority for taskq threads");
-
-int spl_taskq_thread_sequential = 4;
-module_param(spl_taskq_thread_sequential, int, 0644);
-MODULE_PARM_DESC(spl_taskq_thread_sequential,
-	"Create new taskq threads after N sequential tasks");
-
-/* Global system-wide dynamic task queue available for all consumers */
-taskq_t *system_taskq;
-EXPORT_SYMBOL(system_taskq);
-/* Global dynamic task queue for long delay */
-taskq_t *system_delay_taskq;
-EXPORT_SYMBOL(system_delay_taskq);
-
-/* Private dedicated taskq for creating new taskq threads on demand. */
-static taskq_t *dynamic_taskq;
-static taskq_thread_t *taskq_thread_create(taskq_t *);
-
-/* List of all taskqs */
-LIST_HEAD(tq_list);
-struct rw_semaphore tq_list_sem;
-static uint_t taskq_tsd;
-
-static int
-task_km_flags(uint_t flags)
-{
-	if (flags & TQ_NOSLEEP)
-		return (KM_NOSLEEP);
-
-	if (flags & TQ_PUSHPAGE)
-		return (KM_PUSHPAGE);
-
-	return (KM_SLEEP);
-}
-
-/*
- * taskq_find_by_name - Find the largest instance number of a named taskq.
- */
-static int
-taskq_find_by_name(const char *name)
-{
-	struct list_head *tql = NULL;
-	taskq_t *tq;
-
-	list_for_each_prev(tql, &tq_list) {
-		tq = list_entry(tql, taskq_t, tq_taskqs);
-		if (strcmp(name, tq->tq_name) == 0)
-			return (tq->tq_instance);
-	}
-	return (-1);
-}
-
-/*
- * NOTE: Must be called with tq->tq_lock held, returns a list_t which
- * is not attached to the free, work, or pending taskq lists.
- */
-static taskq_ent_t *
-task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
-{
-	taskq_ent_t *t;
-	int count = 0;
-
-	ASSERT(tq);
-retry:
-	/* Acquire taskq_ent_t's from free list if available */
-	if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
-		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
-
-		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-		ASSERT(!(t->tqent_flags & TQENT_FLAG_CANCEL));
-		ASSERT(!timer_pending(&t->tqent_timer));
-
-		list_del_init(&t->tqent_list);
-		return (t);
-	}
-
-	/* Free list is empty and memory allocations are prohibited */
-	if (flags & TQ_NOALLOC)
-		return (NULL);
-
-	/* Hit maximum taskq_ent_t pool size */
-	if (tq->tq_nalloc >= tq->tq_maxalloc) {
-		if (flags & TQ_NOSLEEP)
-			return (NULL);
-
-		/*
-		 * Sleep periodically polling the free list for an available
-		 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
-		 * but we cannot block forever waiting for an taskq_ent_t to
-		 * show up in the free list, otherwise a deadlock can happen.
-		 *
-		 * Therefore, we need to allocate a new task even if the number
-		 * of allocated tasks is above tq->tq_maxalloc, but we still
-		 * end up delaying the task allocation by one second, thereby
-		 * throttling the task dispatch rate.
-		 */
-		spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
-		schedule_timeout(HZ / 100);
-		spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
-		    tq->tq_lock_class);
-		if (count < 100) {
-			count++;
-			goto retry;
-		}
-	}
-
-	spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
-	t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
-	spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
-
-	if (t) {
-		taskq_init_ent(t);
-		tq->tq_nalloc++;
-	}
-
-	return (t);
-}
-
-/*
- * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
- * to already be removed from the free, work, or pending taskq lists.
- */
-static void
-task_free(taskq_t *tq, taskq_ent_t *t)
-{
-	ASSERT(tq);
-	ASSERT(t);
-	ASSERT(list_empty(&t->tqent_list));
-	ASSERT(!timer_pending(&t->tqent_timer));
-
-	kmem_free(t, sizeof (taskq_ent_t));
-	tq->tq_nalloc--;
-}
-
-/*
- * NOTE: Must be called with tq->tq_lock held, either destroys the
- * taskq_ent_t if too many exist or moves it to the free list for later use.
- */
-static void
-task_done(taskq_t *tq, taskq_ent_t *t)
-{
-	ASSERT(tq);
-	ASSERT(t);
-
-	/* Wake tasks blocked in taskq_wait_id() */
-	wake_up_all(&t->tqent_waitq);
-
-	list_del_init(&t->tqent_list);
-
-	if (tq->tq_nalloc <= tq->tq_minalloc) {
-		t->tqent_id = TASKQID_INVALID;
-		t->tqent_func = NULL;
-		t->tqent_arg = NULL;
-		t->tqent_flags = 0;
-
-		list_add_tail(&t->tqent_list, &tq->tq_free_list);
-	} else {
-		task_free(tq, t);
-	}
-}
-
-/*
- * When a delayed task timer expires remove it from the delay list and
- * add it to the priority list in order for immediate processing.
- */
-static void
-task_expire_impl(taskq_ent_t *t)
-{
-	taskq_ent_t *w;
-	taskq_t *tq = t->tqent_taskq;
-	struct list_head *l = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-
-	if (t->tqent_flags & TQENT_FLAG_CANCEL) {
-		ASSERT(list_empty(&t->tqent_list));
-		spin_unlock_irqrestore(&tq->tq_lock, flags);
-		return;
-	}
-
-	t->tqent_birth = jiffies;
-	/*
-	 * The priority list must be maintained in strict task id order
-	 * from lowest to highest for lowest_id to be easily calculable.
-	 */
-	list_del(&t->tqent_list);
-	list_for_each_prev(l, &tq->tq_prio_list) {
-		w = list_entry(l, taskq_ent_t, tqent_list);
-		if (w->tqent_id < t->tqent_id) {
-			list_add(&t->tqent_list, l);
-			break;
-		}
-	}
-	if (l == &tq->tq_prio_list)
-		list_add(&t->tqent_list, &tq->tq_prio_list);
-
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	wake_up(&tq->tq_work_waitq);
-}
-
-static void
-task_expire(spl_timer_list_t tl)
-{
-	struct timer_list *tmr = (struct timer_list *)tl;
-	taskq_ent_t *t = from_timer(t, tmr, tqent_timer);
-	task_expire_impl(t);
-}
-
-/*
- * Returns the lowest incomplete taskqid_t.  The taskqid_t may
- * be queued on the pending list, on the priority list, on the
- * delay list, or on the work list currently being handled, but
- * it is not 100% complete yet.
- */
-static taskqid_t
-taskq_lowest_id(taskq_t *tq)
-{
-	taskqid_t lowest_id = tq->tq_next_id;
-	taskq_ent_t *t;
-	taskq_thread_t *tqt;
-
-	ASSERT(tq);
-
-	if (!list_empty(&tq->tq_pend_list)) {
-		t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
-		lowest_id = MIN(lowest_id, t->tqent_id);
-	}
-
-	if (!list_empty(&tq->tq_prio_list)) {
-		t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
-		lowest_id = MIN(lowest_id, t->tqent_id);
-	}
-
-	if (!list_empty(&tq->tq_delay_list)) {
-		t = list_entry(tq->tq_delay_list.next, taskq_ent_t, tqent_list);
-		lowest_id = MIN(lowest_id, t->tqent_id);
-	}
-
-	if (!list_empty(&tq->tq_active_list)) {
-		tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
-		    tqt_active_list);
-		ASSERT(tqt->tqt_id != TASKQID_INVALID);
-		lowest_id = MIN(lowest_id, tqt->tqt_id);
-	}
-
-	return (lowest_id);
-}
-
-/*
- * Insert a task into a list keeping the list sorted by increasing taskqid.
- */
-static void
-taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
-{
-	taskq_thread_t *w;
-	struct list_head *l = NULL;
-
-	ASSERT(tq);
-	ASSERT(tqt);
-
-	list_for_each_prev(l, &tq->tq_active_list) {
-		w = list_entry(l, taskq_thread_t, tqt_active_list);
-		if (w->tqt_id < tqt->tqt_id) {
-			list_add(&tqt->tqt_active_list, l);
-			break;
-		}
-	}
-	if (l == &tq->tq_active_list)
-		list_add(&tqt->tqt_active_list, &tq->tq_active_list);
-}
-
-/*
- * Find and return a task from the given list if it exists.  The list
- * must be in lowest to highest task id order.
- */
-static taskq_ent_t *
-taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id)
-{
-	struct list_head *l = NULL;
-	taskq_ent_t *t;
-
-	list_for_each(l, lh) {
-		t = list_entry(l, taskq_ent_t, tqent_list);
-
-		if (t->tqent_id == id)
-			return (t);
-
-		if (t->tqent_id > id)
-			break;
-	}
-
-	return (NULL);
-}
-
-/*
- * Find an already dispatched task given the task id regardless of what
- * state it is in.  If a task is still pending it will be returned.
- * If a task is executing, then -EBUSY will be returned instead.
- * If the task has already been run then NULL is returned.
- */
-static taskq_ent_t *
-taskq_find(taskq_t *tq, taskqid_t id)
-{
-	taskq_thread_t *tqt;
-	struct list_head *l = NULL;
-	taskq_ent_t *t;
-
-	t = taskq_find_list(tq, &tq->tq_delay_list, id);
-	if (t)
-		return (t);
-
-	t = taskq_find_list(tq, &tq->tq_prio_list, id);
-	if (t)
-		return (t);
-
-	t = taskq_find_list(tq, &tq->tq_pend_list, id);
-	if (t)
-		return (t);
-
-	list_for_each(l, &tq->tq_active_list) {
-		tqt = list_entry(l, taskq_thread_t, tqt_active_list);
-		if (tqt->tqt_id == id) {
-			/*
-			 * Instead of returning tqt_task, we just return a non
-			 * NULL value to prevent misuse, since tqt_task only
-			 * has two valid fields.
-			 */
-			return (ERR_PTR(-EBUSY));
-		}
-	}
-
-	return (NULL);
-}
-
-/*
- * Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
- * taskq_wait() functions below.
- *
- * Taskq waiting is accomplished by tracking the lowest outstanding task
- * id and the next available task id.  As tasks are dispatched they are
- * added to the tail of the pending, priority, or delay lists.  As worker
- * threads become available the tasks are removed from the heads of these
- * lists and linked to the worker threads.  This ensures the lists are
- * kept sorted by lowest to highest task id.
- *
- * Therefore the lowest outstanding task id can be quickly determined by
- * checking the head item from all of these lists.  This value is stored
- * with the taskq as the lowest id.  It only needs to be recalculated when
- * either the task with the current lowest id completes or is canceled.
- *
- * By blocking until the lowest task id exceeds the passed task id the
- * taskq_wait_outstanding() function can be easily implemented.  Similarly,
- * by blocking until the lowest task id matches the next task id taskq_wait()
- * can be implemented.
- *
- * Callers should be aware that when there are multiple worked threads it
- * is possible for larger task ids to complete before smaller ones.  Also
- * when the taskq contains delay tasks with small task ids callers may
- * block for a considerable length of time waiting for them to expire and
- * execute.
- */
-static int
-taskq_wait_id_check(taskq_t *tq, taskqid_t id)
-{
-	int rc;
-	unsigned long flags;
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	rc = (taskq_find(tq, id) == NULL);
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	return (rc);
-}
-
-/*
- * The taskq_wait_id() function blocks until the passed task id completes.
- * This does not guarantee that all lower task ids have completed.
- */
-void
-taskq_wait_id(taskq_t *tq, taskqid_t id)
-{
-	wait_event(tq->tq_wait_waitq, taskq_wait_id_check(tq, id));
-}
-EXPORT_SYMBOL(taskq_wait_id);
-
-static int
-taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
-{
-	int rc;
-	unsigned long flags;
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	rc = (id < tq->tq_lowest_id);
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	return (rc);
-}
-
-/*
- * The taskq_wait_outstanding() function will block until all tasks with a
- * lower taskqid than the passed 'id' have been completed.  Note that all
- * task id's are assigned monotonically at dispatch time.  Zero may be
- * passed for the id to indicate all tasks dispatch up to this point,
- * but not after, should be waited for.
- */
-void
-taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
-{
-	id = id ? id : tq->tq_next_id - 1;
-	wait_event(tq->tq_wait_waitq, taskq_wait_outstanding_check(tq, id));
-}
-EXPORT_SYMBOL(taskq_wait_outstanding);
-
-static int
-taskq_wait_check(taskq_t *tq)
-{
-	int rc;
-	unsigned long flags;
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	rc = (tq->tq_lowest_id == tq->tq_next_id);
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	return (rc);
-}
-
-/*
- * The taskq_wait() function will block until the taskq is empty.
- * This means that if a taskq re-dispatches work to itself taskq_wait()
- * callers will block indefinitely.
- */
-void
-taskq_wait(taskq_t *tq)
-{
-	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
-}
-EXPORT_SYMBOL(taskq_wait);
-
-int
-taskq_member(taskq_t *tq, kthread_t *t)
-{
-	return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
-}
-EXPORT_SYMBOL(taskq_member);
-
-/*
- * Cancel an already dispatched task given the task id.  Still pending tasks
- * will be immediately canceled, and if the task is active the function will
- * block until it completes.  Preallocated tasks which are canceled must be
- * freed by the caller.
- */
-int
-taskq_cancel_id(taskq_t *tq, taskqid_t id)
-{
-	taskq_ent_t *t;
-	int rc = ENOENT;
-	unsigned long flags;
-
-	ASSERT(tq);
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	t = taskq_find(tq, id);
-	if (t && t != ERR_PTR(-EBUSY)) {
-		list_del_init(&t->tqent_list);
-		t->tqent_flags |= TQENT_FLAG_CANCEL;
-
-		/*
-		 * When canceling the lowest outstanding task id we
-		 * must recalculate the new lowest outstanding id.
-		 */
-		if (tq->tq_lowest_id == t->tqent_id) {
-			tq->tq_lowest_id = taskq_lowest_id(tq);
-			ASSERT3S(tq->tq_lowest_id, >, t->tqent_id);
-		}
-
-		/*
-		 * The task_expire() function takes the tq->tq_lock so drop
-		 * drop the lock before synchronously cancelling the timer.
-		 */
-		if (timer_pending(&t->tqent_timer)) {
-			spin_unlock_irqrestore(&tq->tq_lock, flags);
-			del_timer_sync(&t->tqent_timer);
-			spin_lock_irqsave_nested(&tq->tq_lock, flags,
-			    tq->tq_lock_class);
-		}
-
-		if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
-			task_done(tq, t);
-
-		rc = 0;
-	}
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	if (t == ERR_PTR(-EBUSY)) {
-		taskq_wait_id(tq, id);
-		rc = EBUSY;
-	}
-
-	return (rc);
-}
-EXPORT_SYMBOL(taskq_cancel_id);
-
-static int taskq_thread_spawn(taskq_t *tq);
-
-taskqid_t
-taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
-{
-	taskq_ent_t *t;
-	taskqid_t rc = TASKQID_INVALID;
-	unsigned long irqflags;
-
-	ASSERT(tq);
-	ASSERT(func);
-
-	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
-
-	/* Taskq being destroyed and all tasks drained */
-	if (!(tq->tq_flags & TASKQ_ACTIVE))
-		goto out;
-
-	/* Do not queue the task unless there is idle thread for it */
-	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
-	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
-		/* Dynamic taskq may be able to spawn another thread */
-		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
-		    taskq_thread_spawn(tq) == 0)
-			goto out;
-	}
-
-	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
-		goto out;
-
-	spin_lock(&t->tqent_lock);
-
-	/* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
-	if (flags & TQ_NOQUEUE)
-		list_add(&t->tqent_list, &tq->tq_prio_list);
-	/* Queue to the priority list instead of the pending list */
-	else if (flags & TQ_FRONT)
-		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
-	else
-		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
-
-	t->tqent_id = rc = tq->tq_next_id;
-	tq->tq_next_id++;
-	t->tqent_func = func;
-	t->tqent_arg = arg;
-	t->tqent_taskq = tq;
-	t->tqent_timer.function = NULL;
-	t->tqent_timer.expires = 0;
-	t->tqent_birth = jiffies;
-
-	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-
-	spin_unlock(&t->tqent_lock);
-
-	wake_up(&tq->tq_work_waitq);
-out:
-	/* Spawn additional taskq threads if required. */
-	if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
-		(void) taskq_thread_spawn(tq);
-
-	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
-	return (rc);
-}
-EXPORT_SYMBOL(taskq_dispatch);
-
-taskqid_t
-taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
-    uint_t flags, clock_t expire_time)
-{
-	taskqid_t rc = TASKQID_INVALID;
-	taskq_ent_t *t;
-	unsigned long irqflags;
-
-	ASSERT(tq);
-	ASSERT(func);
-
-	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
-
-	/* Taskq being destroyed and all tasks drained */
-	if (!(tq->tq_flags & TASKQ_ACTIVE))
-		goto out;
-
-	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
-		goto out;
-
-	spin_lock(&t->tqent_lock);
-
-	/* Queue to the delay list for subsequent execution */
-	list_add_tail(&t->tqent_list, &tq->tq_delay_list);
-
-	t->tqent_id = rc = tq->tq_next_id;
-	tq->tq_next_id++;
-	t->tqent_func = func;
-	t->tqent_arg = arg;
-	t->tqent_taskq = tq;
-	t->tqent_timer.function = task_expire;
-	t->tqent_timer.expires = (unsigned long)expire_time;
-	add_timer(&t->tqent_timer);
-
-	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-
-	spin_unlock(&t->tqent_lock);
-out:
-	/* Spawn additional taskq threads if required. */
-	if (tq->tq_nactive == tq->tq_nthreads)
-		(void) taskq_thread_spawn(tq);
-	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
-	return (rc);
-}
-EXPORT_SYMBOL(taskq_dispatch_delay);
-
-void
-taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
-    taskq_ent_t *t)
-{
-	unsigned long irqflags;
-	ASSERT(tq);
-	ASSERT(func);
-
-	spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
-	    tq->tq_lock_class);
-
-	/* Taskq being destroyed and all tasks drained */
-	if (!(tq->tq_flags & TASKQ_ACTIVE)) {
-		t->tqent_id = TASKQID_INVALID;
-		goto out;
-	}
-
-	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
-		/* Dynamic taskq may be able to spawn another thread */
-		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
-		    taskq_thread_spawn(tq) == 0)
-			goto out2;
-		flags |= TQ_FRONT;
-	}
-
-	spin_lock(&t->tqent_lock);
-
-	/*
-	 * Make sure the entry is not on some other taskq; it is important to
-	 * ASSERT() under lock
-	 */
-	ASSERT(taskq_empty_ent(t));
-
-	/*
-	 * Mark it as a prealloc'd task.  This is important
-	 * to ensure that we don't free it later.
-	 */
-	t->tqent_flags |= TQENT_FLAG_PREALLOC;
-
-	/* Queue to the priority list instead of the pending list */
-	if (flags & TQ_FRONT)
-		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
-	else
-		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
-
-	t->tqent_id = tq->tq_next_id;
-	tq->tq_next_id++;
-	t->tqent_func = func;
-	t->tqent_arg = arg;
-	t->tqent_taskq = tq;
-	t->tqent_birth = jiffies;
-
-	spin_unlock(&t->tqent_lock);
-
-	wake_up(&tq->tq_work_waitq);
-out:
-	/* Spawn additional taskq threads if required. */
-	if (tq->tq_nactive == tq->tq_nthreads)
-		(void) taskq_thread_spawn(tq);
-out2:
-	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
-}
-EXPORT_SYMBOL(taskq_dispatch_ent);
-
-int
-taskq_empty_ent(taskq_ent_t *t)
-{
-	return (list_empty(&t->tqent_list));
-}
-EXPORT_SYMBOL(taskq_empty_ent);
-
-void
-taskq_init_ent(taskq_ent_t *t)
-{
-	spin_lock_init(&t->tqent_lock);
-	init_waitqueue_head(&t->tqent_waitq);
-	timer_setup(&t->tqent_timer, NULL, 0);
-	INIT_LIST_HEAD(&t->tqent_list);
-	t->tqent_id = 0;
-	t->tqent_func = NULL;
-	t->tqent_arg = NULL;
-	t->tqent_flags = 0;
-	t->tqent_taskq = NULL;
-}
-EXPORT_SYMBOL(taskq_init_ent);
-
-/*
- * Return the next pending task, preference is given to tasks on the
- * priority list which were dispatched with TQ_FRONT.
- */
-static taskq_ent_t *
-taskq_next_ent(taskq_t *tq)
-{
-	struct list_head *list;
-
-	if (!list_empty(&tq->tq_prio_list))
-		list = &tq->tq_prio_list;
-	else if (!list_empty(&tq->tq_pend_list))
-		list = &tq->tq_pend_list;
-	else
-		return (NULL);
-
-	return (list_entry(list->next, taskq_ent_t, tqent_list));
-}
-
-/*
- * Spawns a new thread for the specified taskq.
- */
-static void
-taskq_thread_spawn_task(void *arg)
-{
-	taskq_t *tq = (taskq_t *)arg;
-	unsigned long flags;
-
-	if (taskq_thread_create(tq) == NULL) {
-		/* restore spawning count if failed */
-		spin_lock_irqsave_nested(&tq->tq_lock, flags,
-		    tq->tq_lock_class);
-		tq->tq_nspawn--;
-		spin_unlock_irqrestore(&tq->tq_lock, flags);
-	}
-}
-
-/*
- * Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
- * number of threads is insufficient to handle the pending tasks.  These
- * new threads must be created by the dedicated dynamic_taskq to avoid
- * deadlocks between thread creation and memory reclaim.  The system_taskq
- * which is also a dynamic taskq cannot be safely used for this.
- */
-static int
-taskq_thread_spawn(taskq_t *tq)
-{
-	int spawning = 0;
-
-	if (!(tq->tq_flags & TASKQ_DYNAMIC))
-		return (0);
-
-	if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
-	    (tq->tq_flags & TASKQ_ACTIVE)) {
-		spawning = (++tq->tq_nspawn);
-		taskq_dispatch(dynamic_taskq, taskq_thread_spawn_task,
-		    tq, TQ_NOSLEEP);
-	}
-
-	return (spawning);
-}
-
-/*
- * Threads in a dynamic taskq should only exit once it has been completely
- * drained and no other threads are actively servicing tasks.  This prevents
- * threads from being created and destroyed more than is required.
- *
- * The first thread is the thread list is treated as the primary thread.
- * There is nothing special about the primary thread but in order to avoid
- * all the taskq pids from changing we opt to make it long running.
- */
-static int
-taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
-{
-	if (!(tq->tq_flags & TASKQ_DYNAMIC))
-		return (0);
-
-	if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
-	    tqt_thread_list) == tqt)
-		return (0);
-
-	return
-	    ((tq->tq_nspawn == 0) &&	/* No threads are being spawned */
-	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
-	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
-	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
-	    (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
-}
-
-static int
-taskq_thread(void *args)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	sigset_t blocked;
-	taskq_thread_t *tqt = args;
-	taskq_t *tq;
-	taskq_ent_t *t;
-	int seq_tasks = 0;
-	unsigned long flags;
-	taskq_ent_t dup_task = {};
-
-	ASSERT(tqt);
-	ASSERT(tqt->tqt_tq);
-	tq = tqt->tqt_tq;
-	current->flags |= PF_NOFREEZE;
-
-	(void) spl_fstrans_mark();
-
-	sigfillset(&blocked);
-	sigprocmask(SIG_BLOCK, &blocked, NULL);
-	flush_signals(current);
-
-	tsd_set(taskq_tsd, tq);
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	/*
-	 * If we are dynamically spawned, decrease spawning count. Note that
-	 * we could be created during taskq_create, in which case we shouldn't
-	 * do the decrement. But it's fine because taskq_create will reset
-	 * tq_nspawn later.
-	 */
-	if (tq->tq_flags & TASKQ_DYNAMIC)
-		tq->tq_nspawn--;
-
-	/* Immediately exit if more threads than allowed were created. */
-	if (tq->tq_nthreads >= tq->tq_maxthreads)
-		goto error;
-
-	tq->tq_nthreads++;
-	list_add_tail(&tqt->tqt_thread_list, &tq->tq_thread_list);
-	wake_up(&tq->tq_wait_waitq);
-	set_current_state(TASK_INTERRUPTIBLE);
-
-	while (!kthread_should_stop()) {
-
-		if (list_empty(&tq->tq_pend_list) &&
-		    list_empty(&tq->tq_prio_list)) {
-
-			if (taskq_thread_should_stop(tq, tqt)) {
-				wake_up_all(&tq->tq_wait_waitq);
-				break;
-			}
-
-			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
-			spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-			schedule();
-			seq_tasks = 0;
-
-			spin_lock_irqsave_nested(&tq->tq_lock, flags,
-			    tq->tq_lock_class);
-			remove_wait_queue(&tq->tq_work_waitq, &wait);
-		} else {
-			__set_current_state(TASK_RUNNING);
-		}
-
-		if ((t = taskq_next_ent(tq)) != NULL) {
-			list_del_init(&t->tqent_list);
-
-			/*
-			 * A TQENT_FLAG_PREALLOC task may be reused or freed
-			 * during the task function call. Store tqent_id and
-			 * tqent_flags here.
-			 *
-			 * Also use an on stack taskq_ent_t for tqt_task
-			 * assignment in this case. We only populate the two
-			 * fields used by the only user in taskq proc file.
-			 */
-			tqt->tqt_id = t->tqent_id;
-			tqt->tqt_flags = t->tqent_flags;
-
-			if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
-				dup_task.tqent_func = t->tqent_func;
-				dup_task.tqent_arg = t->tqent_arg;
-				t = &dup_task;
-			}
-			tqt->tqt_task = t;
-
-			taskq_insert_in_order(tq, tqt);
-			tq->tq_nactive++;
-			spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-			/* Perform the requested task */
-			t->tqent_func(t->tqent_arg);
-
-			spin_lock_irqsave_nested(&tq->tq_lock, flags,
-			    tq->tq_lock_class);
-			tq->tq_nactive--;
-			list_del_init(&tqt->tqt_active_list);
-			tqt->tqt_task = NULL;
-
-			/* For prealloc'd tasks, we don't free anything. */
-			if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
-				task_done(tq, t);
-
-			/*
-			 * When the current lowest outstanding taskqid is
-			 * done calculate the new lowest outstanding id
-			 */
-			if (tq->tq_lowest_id == tqt->tqt_id) {
-				tq->tq_lowest_id = taskq_lowest_id(tq);
-				ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
-			}
-
-			/* Spawn additional taskq threads if required. */
-			if ((++seq_tasks) > spl_taskq_thread_sequential &&
-			    taskq_thread_spawn(tq))
-				seq_tasks = 0;
-
-			tqt->tqt_id = TASKQID_INVALID;
-			tqt->tqt_flags = 0;
-			wake_up_all(&tq->tq_wait_waitq);
-		} else {
-			if (taskq_thread_should_stop(tq, tqt))
-				break;
-		}
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-	}
-
-	__set_current_state(TASK_RUNNING);
-	tq->tq_nthreads--;
-	list_del_init(&tqt->tqt_thread_list);
-error:
-	kmem_free(tqt, sizeof (taskq_thread_t));
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	tsd_set(taskq_tsd, NULL);
-
-	return (0);
-}
-
-static taskq_thread_t *
-taskq_thread_create(taskq_t *tq)
-{
-	static int last_used_cpu = 0;
-	taskq_thread_t *tqt;
-
-	tqt = kmem_alloc(sizeof (*tqt), KM_PUSHPAGE);
-	INIT_LIST_HEAD(&tqt->tqt_thread_list);
-	INIT_LIST_HEAD(&tqt->tqt_active_list);
-	tqt->tqt_tq = tq;
-	tqt->tqt_id = TASKQID_INVALID;
-
-	tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
-	    "%s", tq->tq_name);
-	if (tqt->tqt_thread == NULL) {
-		kmem_free(tqt, sizeof (taskq_thread_t));
-		return (NULL);
-	}
-
-	if (spl_taskq_thread_bind) {
-		last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
-		kthread_bind(tqt->tqt_thread, last_used_cpu);
-	}
-
-	if (spl_taskq_thread_priority)
-		set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
-
-	wake_up_process(tqt->tqt_thread);
-
-	return (tqt);
-}
-
-taskq_t *
-taskq_create(const char *name, int nthreads, pri_t pri,
-    int minalloc, int maxalloc, uint_t flags)
-{
-	taskq_t *tq;
-	taskq_thread_t *tqt;
-	int count = 0, rc = 0, i;
-	unsigned long irqflags;
-
-	ASSERT(name != NULL);
-	ASSERT(minalloc >= 0);
-	ASSERT(maxalloc <= INT_MAX);
-	ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
-
-	/* Scale the number of threads using nthreads as a percentage */
-	if (flags & TASKQ_THREADS_CPU_PCT) {
-		ASSERT(nthreads <= 100);
-		ASSERT(nthreads >= 0);
-		nthreads = MIN(nthreads, 100);
-		nthreads = MAX(nthreads, 0);
-		nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
-	}
-
-	tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
-	if (tq == NULL)
-		return (NULL);
-
-	spin_lock_init(&tq->tq_lock);
-	INIT_LIST_HEAD(&tq->tq_thread_list);
-	INIT_LIST_HEAD(&tq->tq_active_list);
-	tq->tq_name = strdup(name);
-	tq->tq_nactive = 0;
-	tq->tq_nthreads = 0;
-	tq->tq_nspawn = 0;
-	tq->tq_maxthreads = nthreads;
-	tq->tq_pri = pri;
-	tq->tq_minalloc = minalloc;
-	tq->tq_maxalloc = maxalloc;
-	tq->tq_nalloc = 0;
-	tq->tq_flags = (flags | TASKQ_ACTIVE);
-	tq->tq_next_id = TASKQID_INITIAL;
-	tq->tq_lowest_id = TASKQID_INITIAL;
-	INIT_LIST_HEAD(&tq->tq_free_list);
-	INIT_LIST_HEAD(&tq->tq_pend_list);
-	INIT_LIST_HEAD(&tq->tq_prio_list);
-	INIT_LIST_HEAD(&tq->tq_delay_list);
-	init_waitqueue_head(&tq->tq_work_waitq);
-	init_waitqueue_head(&tq->tq_wait_waitq);
-	tq->tq_lock_class = TQ_LOCK_GENERAL;
-	INIT_LIST_HEAD(&tq->tq_taskqs);
-
-	if (flags & TASKQ_PREPOPULATE) {
-		spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
-		    tq->tq_lock_class);
-
-		for (i = 0; i < minalloc; i++)
-			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
-			    &irqflags));
-
-		spin_unlock_irqrestore(&tq->tq_lock, irqflags);
-	}
-
-	if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
-		nthreads = 1;
-
-	for (i = 0; i < nthreads; i++) {
-		tqt = taskq_thread_create(tq);
-		if (tqt == NULL)
-			rc = 1;
-		else
-			count++;
-	}
-
-	/* Wait for all threads to be started before potential destroy */
-	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
-	/*
-	 * taskq_thread might have touched nspawn, but we don't want them to
-	 * because they're not dynamically spawned. So we reset it to 0
-	 */
-	tq->tq_nspawn = 0;
-
-	if (rc) {
-		taskq_destroy(tq);
-		tq = NULL;
-	} else {
-		down_write(&tq_list_sem);
-		tq->tq_instance = taskq_find_by_name(name) + 1;
-		list_add_tail(&tq->tq_taskqs, &tq_list);
-		up_write(&tq_list_sem);
-	}
-
-	return (tq);
-}
-EXPORT_SYMBOL(taskq_create);
-
-void
-taskq_destroy(taskq_t *tq)
-{
-	struct task_struct *thread;
-	taskq_thread_t *tqt;
-	taskq_ent_t *t;
-	unsigned long flags;
-
-	ASSERT(tq);
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	tq->tq_flags &= ~TASKQ_ACTIVE;
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	/*
-	 * When TASKQ_ACTIVE is clear new tasks may not be added nor may
-	 * new worker threads be spawned for dynamic taskq.
-	 */
-	if (dynamic_taskq != NULL)
-		taskq_wait_outstanding(dynamic_taskq, 0);
-
-	taskq_wait(tq);
-
-	/* remove taskq from global list used by the kstats */
-	down_write(&tq_list_sem);
-	list_del(&tq->tq_taskqs);
-	up_write(&tq_list_sem);
-
-	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
-	/* wait for spawning threads to insert themselves to the list */
-	while (tq->tq_nspawn) {
-		spin_unlock_irqrestore(&tq->tq_lock, flags);
-		schedule_timeout_interruptible(1);
-		spin_lock_irqsave_nested(&tq->tq_lock, flags,
-		    tq->tq_lock_class);
-	}
-
-	/*
-	 * Signal each thread to exit and block until it does.  Each thread
-	 * is responsible for removing itself from the list and freeing its
-	 * taskq_thread_t.  This allows for idle threads to opt to remove
-	 * themselves from the taskq.  They can be recreated as needed.
-	 */
-	while (!list_empty(&tq->tq_thread_list)) {
-		tqt = list_entry(tq->tq_thread_list.next,
-		    taskq_thread_t, tqt_thread_list);
-		thread = tqt->tqt_thread;
-		spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-		kthread_stop(thread);
-
-		spin_lock_irqsave_nested(&tq->tq_lock, flags,
-		    tq->tq_lock_class);
-	}
-
-	while (!list_empty(&tq->tq_free_list)) {
-		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
-
-		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
-
-		list_del_init(&t->tqent_list);
-		task_free(tq, t);
-	}
-
-	ASSERT0(tq->tq_nthreads);
-	ASSERT0(tq->tq_nalloc);
-	ASSERT0(tq->tq_nspawn);
-	ASSERT(list_empty(&tq->tq_thread_list));
-	ASSERT(list_empty(&tq->tq_active_list));
-	ASSERT(list_empty(&tq->tq_free_list));
-	ASSERT(list_empty(&tq->tq_pend_list));
-	ASSERT(list_empty(&tq->tq_prio_list));
-	ASSERT(list_empty(&tq->tq_delay_list));
-
-	spin_unlock_irqrestore(&tq->tq_lock, flags);
-
-	strfree(tq->tq_name);
-	kmem_free(tq, sizeof (taskq_t));
-}
-EXPORT_SYMBOL(taskq_destroy);
-
-
-static unsigned int spl_taskq_kick = 0;
-
-/*
- * 2.6.36 API Change
- * module_param_cb is introduced to take kernel_param_ops and
- * module_param_call is marked as obsolete. Also set and get operations
- * were changed to take a 'const struct kernel_param *'.
- */
-static int
-#ifdef module_param_cb
-param_set_taskq_kick(const char *val, const struct kernel_param *kp)
-#else
-param_set_taskq_kick(const char *val, struct kernel_param *kp)
-#endif
-{
-	int ret;
-	taskq_t *tq = NULL;
-	taskq_ent_t *t;
-	unsigned long flags;
-
-	ret = param_set_uint(val, kp);
-	if (ret < 0 || !spl_taskq_kick)
-		return (ret);
-	/* reset value */
-	spl_taskq_kick = 0;
-
-	down_read(&tq_list_sem);
-	list_for_each_entry(tq, &tq_list, tq_taskqs) {
-		spin_lock_irqsave_nested(&tq->tq_lock, flags,
-		    tq->tq_lock_class);
-		/* Check if the first pending is older than 5 seconds */
-		t = taskq_next_ent(tq);
-		if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
-			(void) taskq_thread_spawn(tq);
-			printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
-			    tq->tq_name, tq->tq_instance);
-		}
-		spin_unlock_irqrestore(&tq->tq_lock, flags);
-	}
-	up_read(&tq_list_sem);
-	return (ret);
-}
-
-#ifdef module_param_cb
-static const struct kernel_param_ops param_ops_taskq_kick = {
-	.set = param_set_taskq_kick,
-	.get = param_get_uint,
-};
-module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
-#else
-module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
-	&spl_taskq_kick, 0644);
-#endif
-MODULE_PARM_DESC(spl_taskq_kick,
-	"Write nonzero to kick stuck taskqs to spawn more threads");
-
-int
-spl_taskq_init(void)
-{
-	init_rwsem(&tq_list_sem);
-	tsd_create(&taskq_tsd, NULL);
-
-	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
-	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
-	if (system_taskq == NULL)
-		return (1);
-
-	system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
-	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
-	if (system_delay_taskq == NULL) {
-		taskq_destroy(system_taskq);
-		return (1);
-	}
-
-	dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
-	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
-	if (dynamic_taskq == NULL) {
-		taskq_destroy(system_taskq);
-		taskq_destroy(system_delay_taskq);
-		return (1);
-	}
-
-	/*
-	 * This is used to annotate tq_lock, so
-	 *   taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
-	 * does not trigger a lockdep warning re: possible recursive locking
-	 */
-	dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
-
-	return (0);
-}
-
-void
-spl_taskq_fini(void)
-{
-	taskq_destroy(dynamic_taskq);
-	dynamic_taskq = NULL;
-
-	taskq_destroy(system_delay_taskq);
-	system_delay_taskq = NULL;
-
-	taskq_destroy(system_taskq);
-	system_taskq = NULL;
-
-	tsd_destroy(&taskq_tsd);
-}

diff --git a/zfs/module/spl/spl-thread.c b/zfs/module/spl/spl-thread.c
deleted file mode 100644
index 0352a31..0000000
--- a/zfs/module/spl/spl-thread.c
+++ /dev/null

@@ -1,161 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Thread Implementation.
- */
-
-#include <sys/thread.h>
-#include <sys/kmem.h>
-#include <sys/tsd.h>
-
-/*
- * Thread interfaces
- */
-typedef struct thread_priv_s {
-	unsigned long tp_magic;		/* Magic */
-	int tp_name_size;		/* Name size */
-	char *tp_name;			/* Name (without _thread suffix) */
-	void (*tp_func)(void *);	/* Registered function */
-	void *tp_args;			/* Args to be passed to function */
-	size_t tp_len;			/* Len to be passed to function */
-	int tp_state;			/* State to start thread at */
-	pri_t tp_pri;			/* Priority to start threat at */
-} thread_priv_t;
-
-static int
-thread_generic_wrapper(void *arg)
-{
-	thread_priv_t *tp = (thread_priv_t *)arg;
-	void (*func)(void *);
-	void *args;
-
-	ASSERT(tp->tp_magic == TP_MAGIC);
-	func = tp->tp_func;
-	args = tp->tp_args;
-	set_current_state(tp->tp_state);
-	set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
-	kmem_free(tp->tp_name, tp->tp_name_size);
-	kmem_free(tp, sizeof (thread_priv_t));
-
-	if (func)
-		func(args);
-
-	return (0);
-}
-
-void
-__thread_exit(void)
-{
-	tsd_exit();
-	complete_and_exit(NULL, 0);
-	/* Unreachable */
-}
-EXPORT_SYMBOL(__thread_exit);
-
-/*
- * thread_create() may block forever if it cannot create a thread or
- * allocate memory.  This is preferable to returning a NULL which Solaris
- * style callers likely never check for... since it can't fail.
- */
-kthread_t *
-__thread_create(caddr_t stk, size_t  stksize, thread_func_t func,
-    const char *name, void *args, size_t len, proc_t *pp, int state, pri_t pri)
-{
-	thread_priv_t *tp;
-	struct task_struct *tsk;
-	char *p;
-
-	/* Option pp is simply ignored */
-	/* Variable stack size unsupported */
-	ASSERT(stk == NULL);
-
-	tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE);
-	if (tp == NULL)
-		return (NULL);
-
-	tp->tp_magic = TP_MAGIC;
-	tp->tp_name_size = strlen(name) + 1;
-
-	tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
-	if (tp->tp_name == NULL) {
-		kmem_free(tp, sizeof (thread_priv_t));
-		return (NULL);
-	}
-
-	strncpy(tp->tp_name, name, tp->tp_name_size);
-
-	/*
-	 * Strip trailing "_thread" from passed name which will be the func
-	 * name since the exposed API has no parameter for passing a name.
-	 */
-	p = strstr(tp->tp_name, "_thread");
-	if (p)
-		p[0] = '\0';
-
-	tp->tp_func  = func;
-	tp->tp_args  = args;
-	tp->tp_len   = len;
-	tp->tp_state = state;
-	tp->tp_pri   = pri;
-
-	tsk = spl_kthread_create(thread_generic_wrapper, (void *)tp,
-	    "%s", tp->tp_name);
-	if (IS_ERR(tsk))
-		return (NULL);
-
-	wake_up_process(tsk);
-	return ((kthread_t *)tsk);
-}
-EXPORT_SYMBOL(__thread_create);
-
-/*
- * spl_kthread_create - Wrapper providing pre-3.13 semantics for
- * kthread_create() in which it is not killable and less likely
- * to return -ENOMEM.
- */
-struct task_struct *
-spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...)
-{
-	struct task_struct *tsk;
-	va_list args;
-	char name[TASK_COMM_LEN];
-
-	va_start(args, namefmt);
-	vsnprintf(name, sizeof (name), namefmt, args);
-	va_end(args);
-	do {
-		tsk = kthread_create(func, data, "%s", name);
-		if (IS_ERR(tsk)) {
-			if (signal_pending(current)) {
-				clear_thread_flag(TIF_SIGPENDING);
-				continue;
-			}
-			if (PTR_ERR(tsk) == -ENOMEM)
-				continue;
-			return (NULL);
-		} else {
-			return (tsk);
-		}
-	} while (1);
-}
-EXPORT_SYMBOL(spl_kthread_create);

diff --git a/zfs/module/spl/spl-tsd.c b/zfs/module/spl/spl-tsd.c
deleted file mode 100644
index b955ed6..0000000
--- a/zfs/module/spl/spl-tsd.c
+++ /dev/null

@@ -1,720 +0,0 @@
-/*
- *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *
- *  Solaris Porting Layer (SPL) Thread Specific Data Implementation.
- *
- *  Thread specific data has implemented using a hash table, this avoids
- *  the need to add a member to the task structure and allows maximum
- *  portability between kernels.  This implementation has been optimized
- *  to keep the tsd_set() and tsd_get() times as small as possible.
- *
- *  The majority of the entries in the hash table are for specific tsd
- *  entries.  These entries are hashed by the product of their key and
- *  pid because by design the key and pid are guaranteed to be unique.
- *  Their product also has the desirable properly that it will be uniformly
- *  distributed over the hash bins providing neither the pid nor key is zero.
- *  Under linux the zero pid is always the init process and thus won't be
- *  used, and this implementation is careful to never to assign a zero key.
- *  By default the hash table is sized to 512 bins which is expected to
- *  be sufficient for light to moderate usage of thread specific data.
- *
- *  The hash table contains two additional type of entries.  They first
- *  type is entry is called a 'key' entry and it is added to the hash during
- *  tsd_create().  It is used to store the address of the destructor function
- *  and it is used as an anchor point.  All tsd entries which use the same
- *  key will be linked to this entry.  This is used during tsd_destroy() to
- *  quickly call the destructor function for all tsd associated with the key.
- *  The 'key' entry may be looked up with tsd_hash_search() by passing the
- *  key you wish to lookup and DTOR_PID constant as the pid.
- *
- *  The second type of entry is called a 'pid' entry and it is added to the
- *  hash the first time a process set a key.  The 'pid' entry is also used
- *  as an anchor and all tsd for the process will be linked to it.  This
- *  list is using during tsd_exit() to ensure all registered destructors
- *  are run for the process.  The 'pid' entry may be looked up with
- *  tsd_hash_search() by passing the PID_KEY constant as the key, and
- *  the process pid.  Note that tsd_exit() is called by thread_exit()
- *  so if your using the Solaris thread API you should not need to call
- *  tsd_exit() directly.
- *
- */
-
-#include <sys/kmem.h>
-#include <sys/thread.h>
-#include <sys/tsd.h>
-#include <linux/hash.h>
-
-typedef struct tsd_hash_bin {
-	spinlock_t		hb_lock;
-	struct hlist_head	hb_head;
-} tsd_hash_bin_t;
-
-typedef struct tsd_hash_table {
-	spinlock_t		ht_lock;
-	uint_t			ht_bits;
-	uint_t			ht_key;
-	tsd_hash_bin_t		*ht_bins;
-} tsd_hash_table_t;
-
-typedef struct tsd_hash_entry {
-	uint_t			he_key;
-	pid_t			he_pid;
-	dtor_func_t		he_dtor;
-	void			*he_value;
-	struct hlist_node	he_list;
-	struct list_head	he_key_list;
-	struct list_head	he_pid_list;
-} tsd_hash_entry_t;
-
-static tsd_hash_table_t *tsd_hash_table = NULL;
-
-
-/*
- * tsd_hash_search - searches hash table for tsd_hash_entry
- * @table: hash table
- * @key: search key
- * @pid: search pid
- */
-static tsd_hash_entry_t *
-tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
-{
-	struct hlist_node *node = NULL;
-	tsd_hash_entry_t *entry;
-	tsd_hash_bin_t *bin;
-	ulong_t hash;
-
-	hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
-	bin = &table->ht_bins[hash];
-	spin_lock(&bin->hb_lock);
-	hlist_for_each(node, &bin->hb_head) {
-		entry = list_entry(node, tsd_hash_entry_t, he_list);
-		if ((entry->he_key == key) && (entry->he_pid == pid)) {
-			spin_unlock(&bin->hb_lock);
-			return (entry);
-		}
-	}
-
-	spin_unlock(&bin->hb_lock);
-	return (NULL);
-}
-
-/*
- * tsd_hash_dtor - call the destructor and free all entries on the list
- * @work: list of hash entries
- *
- * For a list of entries which have all already been removed from the
- * hash call their registered destructor then free the associated memory.
- */
-static void
-tsd_hash_dtor(struct hlist_head *work)
-{
-	tsd_hash_entry_t *entry;
-
-	while (!hlist_empty(work)) {
-		entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
-		hlist_del(&entry->he_list);
-
-		if (entry->he_dtor && entry->he_pid != DTOR_PID)
-			entry->he_dtor(entry->he_value);
-
-		kmem_free(entry, sizeof (tsd_hash_entry_t));
-	}
-}
-
-/*
- * tsd_hash_add - adds an entry to hash table
- * @table: hash table
- * @key: search key
- * @pid: search pid
- *
- * The caller is responsible for ensuring the unique key/pid do not
- * already exist in the hash table.  This possible because all entries
- * are thread specific thus a concurrent thread will never attempt to
- * add this key/pid.  Because multiple bins must be checked to add
- * links to the dtor and pid entries the entire table is locked.
- */
-static int
-tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
-{
-	tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
-	tsd_hash_bin_t *bin;
-	ulong_t hash;
-	int rc = 0;
-
-	ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
-
-	/* New entry allocate structure, set value, and add to hash */
-	entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
-	if (entry == NULL)
-		return (ENOMEM);
-
-	entry->he_key = key;
-	entry->he_pid = pid;
-	entry->he_value = value;
-	INIT_HLIST_NODE(&entry->he_list);
-	INIT_LIST_HEAD(&entry->he_key_list);
-	INIT_LIST_HEAD(&entry->he_pid_list);
-
-	spin_lock(&table->ht_lock);
-
-	/* Destructor entry must exist for all valid keys */
-	dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
-	ASSERT3P(dtor_entry, !=, NULL);
-	entry->he_dtor = dtor_entry->he_dtor;
-
-	/* Process entry must exist for all valid processes */
-	pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
-	ASSERT3P(pid_entry, !=, NULL);
-
-	hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
-	bin = &table->ht_bins[hash];
-	spin_lock(&bin->hb_lock);
-
-	/* Add to the hash, key, and pid lists */
-	hlist_add_head(&entry->he_list, &bin->hb_head);
-	list_add(&entry->he_key_list, &dtor_entry->he_key_list);
-	list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
-
-	spin_unlock(&bin->hb_lock);
-	spin_unlock(&table->ht_lock);
-
-	return (rc);
-}
-
-/*
- * tsd_hash_add_key - adds a destructor entry to the hash table
- * @table: hash table
- * @keyp: search key
- * @dtor: key destructor
- *
- * For every unique key there is a single entry in the hash which is used
- * as anchor.  All other thread specific entries for this key are linked
- * to this anchor via the 'he_key_list' list head.  On return they keyp
- * will be set to the next available key for the hash table.
- */
-static int
-tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
-{
-	tsd_hash_entry_t *tmp_entry, *entry;
-	tsd_hash_bin_t *bin;
-	ulong_t hash;
-	int keys_checked = 0;
-
-	ASSERT3P(table, !=, NULL);
-
-	/* Allocate entry to be used as a destructor for this key */
-	entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
-	if (entry == NULL)
-		return (ENOMEM);
-
-	/* Determine next available key value */
-	spin_lock(&table->ht_lock);
-	do {
-		/* Limited to TSD_KEYS_MAX concurrent unique keys */
-		if (table->ht_key++ > TSD_KEYS_MAX)
-			table->ht_key = 1;
-
-		/* Ensure failure when all TSD_KEYS_MAX keys are in use */
-		if (keys_checked++ >= TSD_KEYS_MAX) {
-			spin_unlock(&table->ht_lock);
-			return (ENOENT);
-		}
-
-		tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
-	} while (tmp_entry);
-
-	/* Add destructor entry in to hash table */
-	entry->he_key = *keyp = table->ht_key;
-	entry->he_pid = DTOR_PID;
-	entry->he_dtor = dtor;
-	entry->he_value = NULL;
-	INIT_HLIST_NODE(&entry->he_list);
-	INIT_LIST_HEAD(&entry->he_key_list);
-	INIT_LIST_HEAD(&entry->he_pid_list);
-
-	hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
-	bin = &table->ht_bins[hash];
-	spin_lock(&bin->hb_lock);
-
-	hlist_add_head(&entry->he_list, &bin->hb_head);
-
-	spin_unlock(&bin->hb_lock);
-	spin_unlock(&table->ht_lock);
-
-	return (0);
-}
-
-/*
- * tsd_hash_add_pid - adds a process entry to the hash table
- * @table: hash table
- * @pid: search pid
- *
- * For every process there is a single entry in the hash which is used
- * as anchor.  All other thread specific entries for this process are
- * linked to this anchor via the 'he_pid_list' list head.
- */
-static int
-tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
-{
-	tsd_hash_entry_t *entry;
-	tsd_hash_bin_t *bin;
-	ulong_t hash;
-
-	/* Allocate entry to be used as the process reference */
-	entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
-	if (entry == NULL)
-		return (ENOMEM);
-
-	spin_lock(&table->ht_lock);
-	entry->he_key = PID_KEY;
-	entry->he_pid = pid;
-	entry->he_dtor = NULL;
-	entry->he_value = NULL;
-	INIT_HLIST_NODE(&entry->he_list);
-	INIT_LIST_HEAD(&entry->he_key_list);
-	INIT_LIST_HEAD(&entry->he_pid_list);
-
-	hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
-	bin = &table->ht_bins[hash];
-	spin_lock(&bin->hb_lock);
-
-	hlist_add_head(&entry->he_list, &bin->hb_head);
-
-	spin_unlock(&bin->hb_lock);
-	spin_unlock(&table->ht_lock);
-
-	return (0);
-}
-
-/*
- * tsd_hash_del - delete an entry from hash table, key, and pid lists
- * @table: hash table
- * @key: search key
- * @pid: search pid
- */
-static void
-tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
-{
-	hlist_del(&entry->he_list);
-	list_del_init(&entry->he_key_list);
-	list_del_init(&entry->he_pid_list);
-}
-
-/*
- * tsd_hash_table_init - allocate a hash table
- * @bits: hash table size
- *
- * A hash table with 2^bits bins will be created, it may not be resized
- * after the fact and must be free'd with tsd_hash_table_fini().
- */
-static tsd_hash_table_t *
-tsd_hash_table_init(uint_t bits)
-{
-	tsd_hash_table_t *table;
-	int hash, size = (1 << bits);
-
-	table = kmem_zalloc(sizeof (tsd_hash_table_t), KM_SLEEP);
-	if (table == NULL)
-		return (NULL);
-
-	table->ht_bins = kmem_zalloc(sizeof (tsd_hash_bin_t) * size, KM_SLEEP);
-	if (table->ht_bins == NULL) {
-		kmem_free(table, sizeof (tsd_hash_table_t));
-		return (NULL);
-	}
-
-	for (hash = 0; hash < size; hash++) {
-		spin_lock_init(&table->ht_bins[hash].hb_lock);
-		INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
-	}
-
-	spin_lock_init(&table->ht_lock);
-	table->ht_bits = bits;
-	table->ht_key = 1;
-
-	return (table);
-}
-
-/*
- * tsd_hash_table_fini - free a hash table
- * @table: hash table
- *
- * Free a hash table allocated by tsd_hash_table_init().  If the hash
- * table is not empty this function will call the proper destructor for
- * all remaining entries before freeing the memory used by those entries.
- */
-static void
-tsd_hash_table_fini(tsd_hash_table_t *table)
-{
-	HLIST_HEAD(work);
-	tsd_hash_bin_t *bin;
-	tsd_hash_entry_t *entry;
-	int size, i;
-
-	ASSERT3P(table, !=, NULL);
-	spin_lock(&table->ht_lock);
-	for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
-		bin = &table->ht_bins[i];
-		spin_lock(&bin->hb_lock);
-		while (!hlist_empty(&bin->hb_head)) {
-			entry = hlist_entry(bin->hb_head.first,
-			    tsd_hash_entry_t, he_list);
-			tsd_hash_del(table, entry);
-			hlist_add_head(&entry->he_list, &work);
-		}
-		spin_unlock(&bin->hb_lock);
-	}
-	spin_unlock(&table->ht_lock);
-
-	tsd_hash_dtor(&work);
-	kmem_free(table->ht_bins, sizeof (tsd_hash_bin_t)*(1<<table->ht_bits));
-	kmem_free(table, sizeof (tsd_hash_table_t));
-}
-
-/*
- * tsd_remove_entry - remove a tsd entry for this thread
- * @entry: entry to remove
- *
- * Remove the thread specific data @entry for this thread.
- * If this is the last entry for this thread, also remove the PID entry.
- */
-static void
-tsd_remove_entry(tsd_hash_entry_t *entry)
-{
-	HLIST_HEAD(work);
-	tsd_hash_table_t *table;
-	tsd_hash_entry_t *pid_entry;
-	tsd_hash_bin_t *pid_entry_bin, *entry_bin;
-	ulong_t hash;
-
-	table = tsd_hash_table;
-	ASSERT3P(table, !=, NULL);
-	ASSERT3P(entry, !=, NULL);
-
-	spin_lock(&table->ht_lock);
-
-	hash = hash_long((ulong_t)entry->he_key *
-	    (ulong_t)entry->he_pid, table->ht_bits);
-	entry_bin = &table->ht_bins[hash];
-
-	/* save the possible pid_entry */
-	pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
-	    he_pid_list);
-
-	/* remove entry */
-	spin_lock(&entry_bin->hb_lock);
-	tsd_hash_del(table, entry);
-	hlist_add_head(&entry->he_list, &work);
-	spin_unlock(&entry_bin->hb_lock);
-
-	/* if pid_entry is indeed pid_entry, then remove it if it's empty */
-	if (pid_entry->he_key == PID_KEY &&
-	    list_empty(&pid_entry->he_pid_list)) {
-		hash = hash_long((ulong_t)pid_entry->he_key *
-		    (ulong_t)pid_entry->he_pid, table->ht_bits);
-		pid_entry_bin = &table->ht_bins[hash];
-
-		spin_lock(&pid_entry_bin->hb_lock);
-		tsd_hash_del(table, pid_entry);
-		hlist_add_head(&pid_entry->he_list, &work);
-		spin_unlock(&pid_entry_bin->hb_lock);
-	}
-
-	spin_unlock(&table->ht_lock);
-
-	tsd_hash_dtor(&work);
-}
-
-/*
- * tsd_set - set thread specific data
- * @key: lookup key
- * @value: value to set
- *
- * Caller must prevent racing tsd_create() or tsd_destroy(), protected
- * from racing tsd_get() or tsd_set() because it is thread specific.
- * This function has been optimized to be fast for the update case.
- * When setting the tsd initially it will be slower due to additional
- * required locking and potential memory allocations.
- */
-int
-tsd_set(uint_t key, void *value)
-{
-	tsd_hash_table_t *table;
-	tsd_hash_entry_t *entry;
-	pid_t pid;
-	int rc;
-	/* mark remove if value is NULL */
-	boolean_t remove = (value == NULL);
-
-	table = tsd_hash_table;
-	pid = curthread->pid;
-	ASSERT3P(table, !=, NULL);
-
-	if ((key == 0) || (key > TSD_KEYS_MAX))
-		return (EINVAL);
-
-	/* Entry already exists in hash table update value */
-	entry = tsd_hash_search(table, key, pid);
-	if (entry) {
-		entry->he_value = value;
-		/* remove the entry */
-		if (remove)
-			tsd_remove_entry(entry);
-		return (0);
-	}
-
-	/* don't create entry if value is NULL */
-	if (remove)
-		return (0);
-
-	/* Add a process entry to the hash if not yet exists */
-	entry = tsd_hash_search(table, PID_KEY, pid);
-	if (entry == NULL) {
-		rc = tsd_hash_add_pid(table, pid);
-		if (rc)
-			return (rc);
-	}
-
-	rc = tsd_hash_add(table, key, pid, value);
-	return (rc);
-}
-EXPORT_SYMBOL(tsd_set);
-
-/*
- * tsd_get - get thread specific data
- * @key: lookup key
- *
- * Caller must prevent racing tsd_create() or tsd_destroy().  This
- * implementation is designed to be fast and scalable, it does not
- * lock the entire table only a single hash bin.
- */
-void *
-tsd_get(uint_t key)
-{
-	tsd_hash_entry_t *entry;
-
-	ASSERT3P(tsd_hash_table, !=, NULL);
-
-	if ((key == 0) || (key > TSD_KEYS_MAX))
-		return (NULL);
-
-	entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
-	if (entry == NULL)
-		return (NULL);
-
-	return (entry->he_value);
-}
-EXPORT_SYMBOL(tsd_get);
-
-/*
- * tsd_get_by_thread - get thread specific data for specified thread
- * @key: lookup key
- * @thread: thread to lookup
- *
- * Caller must prevent racing tsd_create() or tsd_destroy().  This
- * implementation is designed to be fast and scalable, it does not
- * lock the entire table only a single hash bin.
- */
-void *
-tsd_get_by_thread(uint_t key, kthread_t *thread)
-{
-	tsd_hash_entry_t *entry;
-
-	ASSERT3P(tsd_hash_table, !=, NULL);
-
-	if ((key == 0) || (key > TSD_KEYS_MAX))
-		return (NULL);
-
-	entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
-	if (entry == NULL)
-		return (NULL);
-
-	return (entry->he_value);
-}
-EXPORT_SYMBOL(tsd_get_by_thread);
-
-/*
- * tsd_create - create thread specific data key
- * @keyp: lookup key address
- * @dtor: destructor called during tsd_destroy() or tsd_exit()
- *
- * Provided key must be set to 0 or it assumed to be already in use.
- * The dtor is allowed to be NULL in which case no additional cleanup
- * for the data is performed during tsd_destroy() or tsd_exit().
- *
- * Caller must prevent racing tsd_set() or tsd_get(), this function is
- * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
- */
-void
-tsd_create(uint_t *keyp, dtor_func_t dtor)
-{
-	ASSERT3P(keyp, !=, NULL);
-	if (*keyp)
-		return;
-
-	(void) tsd_hash_add_key(tsd_hash_table, keyp, dtor);
-}
-EXPORT_SYMBOL(tsd_create);
-
-/*
- * tsd_destroy - destroy thread specific data
- * @keyp: lookup key address
- *
- * Destroys the thread specific data on all threads which use this key.
- *
- * Caller must prevent racing tsd_set() or tsd_get(), this function is
- * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
- */
-void
-tsd_destroy(uint_t *keyp)
-{
-	HLIST_HEAD(work);
-	tsd_hash_table_t *table;
-	tsd_hash_entry_t *dtor_entry, *entry;
-	tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
-	ulong_t hash;
-
-	table = tsd_hash_table;
-	ASSERT3P(table, !=, NULL);
-
-	spin_lock(&table->ht_lock);
-	dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
-	if (dtor_entry == NULL) {
-		spin_unlock(&table->ht_lock);
-		return;
-	}
-
-	/*
-	 * All threads which use this key must be linked off of the
-	 * DTOR_PID entry.  They are removed from the hash table and
-	 * linked in to a private working list to be destroyed.
-	 */
-	while (!list_empty(&dtor_entry->he_key_list)) {
-		entry = list_entry(dtor_entry->he_key_list.next,
-		    tsd_hash_entry_t, he_key_list);
-		ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
-		ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
-
-		hash = hash_long((ulong_t)entry->he_key *
-		    (ulong_t)entry->he_pid, table->ht_bits);
-		entry_bin = &table->ht_bins[hash];
-
-		spin_lock(&entry_bin->hb_lock);
-		tsd_hash_del(table, entry);
-		hlist_add_head(&entry->he_list, &work);
-		spin_unlock(&entry_bin->hb_lock);
-	}
-
-	hash = hash_long((ulong_t)dtor_entry->he_key *
-	    (ulong_t)dtor_entry->he_pid, table->ht_bits);
-	dtor_entry_bin = &table->ht_bins[hash];
-
-	spin_lock(&dtor_entry_bin->hb_lock);
-	tsd_hash_del(table, dtor_entry);
-	hlist_add_head(&dtor_entry->he_list, &work);
-	spin_unlock(&dtor_entry_bin->hb_lock);
-	spin_unlock(&table->ht_lock);
-
-	tsd_hash_dtor(&work);
-	*keyp = 0;
-}
-EXPORT_SYMBOL(tsd_destroy);
-
-/*
- * tsd_exit - destroys all thread specific data for this thread
- *
- * Destroys all the thread specific data for this thread.
- *
- * Caller must prevent racing tsd_set() or tsd_get(), this function is
- * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
- */
-void
-tsd_exit(void)
-{
-	HLIST_HEAD(work);
-	tsd_hash_table_t *table;
-	tsd_hash_entry_t *pid_entry, *entry;
-	tsd_hash_bin_t *pid_entry_bin, *entry_bin;
-	ulong_t hash;
-
-	table = tsd_hash_table;
-	ASSERT3P(table, !=, NULL);
-
-	spin_lock(&table->ht_lock);
-	pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
-	if (pid_entry == NULL) {
-		spin_unlock(&table->ht_lock);
-		return;
-	}
-
-	/*
-	 * All keys associated with this pid must be linked off of the
-	 * PID_KEY entry.  They are removed from the hash table and
-	 * linked in to a private working list to be destroyed.
-	 */
-
-	while (!list_empty(&pid_entry->he_pid_list)) {
-		entry = list_entry(pid_entry->he_pid_list.next,
-		    tsd_hash_entry_t, he_pid_list);
-		ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
-
-		hash = hash_long((ulong_t)entry->he_key *
-		    (ulong_t)entry->he_pid, table->ht_bits);
-		entry_bin = &table->ht_bins[hash];
-
-		spin_lock(&entry_bin->hb_lock);
-		tsd_hash_del(table, entry);
-		hlist_add_head(&entry->he_list, &work);
-		spin_unlock(&entry_bin->hb_lock);
-	}
-
-	hash = hash_long((ulong_t)pid_entry->he_key *
-	    (ulong_t)pid_entry->he_pid, table->ht_bits);
-	pid_entry_bin = &table->ht_bins[hash];
-
-	spin_lock(&pid_entry_bin->hb_lock);
-	tsd_hash_del(table, pid_entry);
-	hlist_add_head(&pid_entry->he_list, &work);
-	spin_unlock(&pid_entry_bin->hb_lock);
-	spin_unlock(&table->ht_lock);
-
-	tsd_hash_dtor(&work);
-}
-EXPORT_SYMBOL(tsd_exit);
-
-int
-spl_tsd_init(void)
-{
-	tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
-	if (tsd_hash_table == NULL)
-		return (1);
-
-	return (0);
-}
-
-void
-spl_tsd_fini(void)
-{
-	tsd_hash_table_fini(tsd_hash_table);
-	tsd_hash_table = NULL;
-}

diff --git a/zfs/module/spl/spl-vmem.c b/zfs/module/spl/spl-vmem.c
deleted file mode 100644
index a2630ec..0000000
--- a/zfs/module/spl/spl-vmem.c
+++ /dev/null

@@ -1,135 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <sys/debug.h>
-#include <sys/vmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/shrinker.h>
-#include <linux/module.h>
-
-vmem_t *heap_arena = NULL;
-EXPORT_SYMBOL(heap_arena);
-
-vmem_t *zio_alloc_arena = NULL;
-EXPORT_SYMBOL(zio_alloc_arena);
-
-vmem_t *zio_arena = NULL;
-EXPORT_SYMBOL(zio_arena);
-
-#define	VMEM_FLOOR_SIZE		(4 * 1024 * 1024)	/* 4MB floor */
-
-/*
- * Return approximate virtual memory usage based on these assumptions:
- *
- * 1) The major SPL consumer of virtual memory is the kmem cache.
- * 2) Memory allocated with vmem_alloc() is short lived and can be ignored.
- * 3) Allow a 4MB floor as a generous pad given normal consumption.
- * 4) The spl_kmem_cache_sem only contends with cache create/destroy.
- */
-size_t
-vmem_size(vmem_t *vmp, int typemask)
-{
-	spl_kmem_cache_t *skc = NULL;
-	size_t alloc = VMEM_FLOOR_SIZE;
-
-	if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE))
-		return (VMALLOC_TOTAL);
-
-
-	down_read(&spl_kmem_cache_sem);
-	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
-		if (skc->skc_flags & KMC_VMEM)
-			alloc += skc->skc_slab_size * skc->skc_slab_total;
-	}
-	up_read(&spl_kmem_cache_sem);
-
-	if (typemask & VMEM_ALLOC)
-		return (MIN(alloc, VMALLOC_TOTAL));
-	else if (typemask & VMEM_FREE)
-		return (MAX(VMALLOC_TOTAL - alloc, 0));
-	else
-		return (0);
-}
-EXPORT_SYMBOL(vmem_size);
-
-/*
- * Public vmem_alloc(), vmem_zalloc() and vmem_free() interfaces.
- */
-void *
-spl_vmem_alloc(size_t size, int flags, const char *func, int line)
-{
-	ASSERT0(flags & ~KM_PUBLIC_MASK);
-
-	flags |= KM_VMEM;
-
-#if !defined(DEBUG_KMEM)
-	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
-	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
-	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_vmem_alloc);
-
-void *
-spl_vmem_zalloc(size_t size, int flags, const char *func, int line)
-{
-	ASSERT0(flags & ~KM_PUBLIC_MASK);
-
-	flags |= (KM_VMEM | KM_ZERO);
-
-#if !defined(DEBUG_KMEM)
-	return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
-#elif !defined(DEBUG_KMEM_TRACKING)
-	return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
-#else
-	return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
-#endif
-}
-EXPORT_SYMBOL(spl_vmem_zalloc);
-
-void
-spl_vmem_free(const void *buf, size_t size)
-{
-#if !defined(DEBUG_KMEM)
-	return (spl_kmem_free_impl(buf, size));
-#elif !defined(DEBUG_KMEM_TRACKING)
-	return (spl_kmem_free_debug(buf, size));
-#else
-	return (spl_kmem_free_track(buf, size));
-#endif
-}
-EXPORT_SYMBOL(spl_vmem_free);
-
-int
-spl_vmem_init(void)
-{
-	return (0);
-}
-
-void
-spl_vmem_fini(void)
-{
-}

diff --git a/zfs/module/spl/spl-vnode.c b/zfs/module/spl/spl-vnode.c
deleted file mode 100644
index 032bd1a..0000000
--- a/zfs/module/spl/spl-vnode.c
+++ /dev/null

@@ -1,719 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Vnode Implementation.
- */
-
-#include <sys/cred.h>
-#include <sys/vnode.h>
-#include <sys/kmem_cache.h>
-#include <linux/falloc.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#ifdef HAVE_FDTABLE_HEADER
-#include <linux/fdtable.h>
-#endif
-
-vnode_t *rootdir = (vnode_t *)0xabcd1234;
-EXPORT_SYMBOL(rootdir);
-
-static spl_kmem_cache_t *vn_cache;
-static spl_kmem_cache_t *vn_file_cache;
-
-static spinlock_t vn_file_lock;
-static LIST_HEAD(vn_file_list);
-
-static int
-spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len)
-{
-	int error = -EOPNOTSUPP;
-
-#ifdef HAVE_FILE_FALLOCATE
-	if (fp->f_op->fallocate)
-		error = fp->f_op->fallocate(fp, mode, offset, len);
-#else
-#ifdef HAVE_INODE_FALLOCATE
-	if (fp->f_dentry && fp->f_dentry->d_inode &&
-	    fp->f_dentry->d_inode->i_op->fallocate)
-		error = fp->f_dentry->d_inode->i_op->fallocate(
-		    fp->f_dentry->d_inode, mode, offset, len);
-#endif /* HAVE_INODE_FALLOCATE */
-#endif /* HAVE_FILE_FALLOCATE */
-
-	return (error);
-}
-
-static int
-spl_filp_fsync(struct file *fp, int sync)
-{
-#ifdef HAVE_2ARGS_VFS_FSYNC
-	return (vfs_fsync(fp, sync));
-#else
-	return (vfs_fsync(fp, (fp)->f_dentry, sync));
-#endif /* HAVE_2ARGS_VFS_FSYNC */
-}
-
-static ssize_t
-spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
-{
-#if defined(HAVE_KERNEL_WRITE_PPOS)
-	return (kernel_write(file, buf, count, pos));
-#else
-	mm_segment_t saved_fs;
-	ssize_t ret;
-
-	saved_fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	ret = vfs_write(file, (__force const char __user *)buf, count, pos);
-
-	set_fs(saved_fs);
-
-	return (ret);
-#endif
-}
-
-static ssize_t
-spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
-{
-#if defined(HAVE_KERNEL_READ_PPOS)
-	return (kernel_read(file, buf, count, pos));
-#else
-	mm_segment_t saved_fs;
-	ssize_t ret;
-
-	saved_fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	ret = vfs_read(file, (void __user *)buf, count, pos);
-
-	set_fs(saved_fs);
-
-	return (ret);
-#endif
-}
-
-vtype_t
-vn_mode_to_vtype(mode_t mode)
-{
-	if (S_ISREG(mode))
-		return (VREG);
-
-	if (S_ISDIR(mode))
-		return (VDIR);
-
-	if (S_ISCHR(mode))
-		return (VCHR);
-
-	if (S_ISBLK(mode))
-		return (VBLK);
-
-	if (S_ISFIFO(mode))
-		return (VFIFO);
-
-	if (S_ISLNK(mode))
-		return (VLNK);
-
-	if (S_ISSOCK(mode))
-		return (VSOCK);
-
-	return (VNON);
-} /* vn_mode_to_vtype() */
-EXPORT_SYMBOL(vn_mode_to_vtype);
-
-mode_t
-vn_vtype_to_mode(vtype_t vtype)
-{
-	if (vtype == VREG)
-		return (S_IFREG);
-
-	if (vtype == VDIR)
-		return (S_IFDIR);
-
-	if (vtype == VCHR)
-		return (S_IFCHR);
-
-	if (vtype == VBLK)
-		return (S_IFBLK);
-
-	if (vtype == VFIFO)
-		return (S_IFIFO);
-
-	if (vtype == VLNK)
-		return (S_IFLNK);
-
-	if (vtype == VSOCK)
-		return (S_IFSOCK);
-
-	return (VNON);
-} /* vn_vtype_to_mode() */
-EXPORT_SYMBOL(vn_vtype_to_mode);
-
-vnode_t *
-vn_alloc(int flag)
-{
-	vnode_t *vp;
-
-	vp = kmem_cache_alloc(vn_cache, flag);
-	if (vp != NULL) {
-		vp->v_file = NULL;
-		vp->v_type = 0;
-	}
-
-	return (vp);
-} /* vn_alloc() */
-EXPORT_SYMBOL(vn_alloc);
-
-void
-vn_free(vnode_t *vp)
-{
-	kmem_cache_free(vn_cache, vp);
-} /* vn_free() */
-EXPORT_SYMBOL(vn_free);
-
-int
-vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp,
-    int x1, void *x2)
-{
-	struct file *fp;
-	struct kstat stat;
-	int rc, saved_umask = 0;
-	gfp_t saved_gfp;
-	vnode_t *vp;
-
-	ASSERT(flags & (FWRITE | FREAD));
-	ASSERT(seg == UIO_SYSSPACE);
-	ASSERT(vpp);
-	*vpp = NULL;
-
-	if (!(flags & FCREAT) && (flags & FWRITE))
-		flags |= FEXCL;
-
-	/*
-	 * Note for filp_open() the two low bits must be remapped to mean:
-	 * 01 - read-only  -> 00 read-only
-	 * 10 - write-only -> 01 write-only
-	 * 11 - read-write -> 10 read-write
-	 */
-	flags--;
-
-	if (flags & FCREAT)
-		saved_umask = xchg(&current->fs->umask, 0);
-
-	fp = filp_open(path, flags, mode);
-
-	if (flags & FCREAT)
-		(void) xchg(&current->fs->umask, saved_umask);
-
-	if (IS_ERR(fp))
-		return (-PTR_ERR(fp));
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat);
-#else
-	rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
-#endif
-	if (rc) {
-		filp_close(fp, 0);
-		return (-rc);
-	}
-
-	vp = vn_alloc(KM_SLEEP);
-	if (!vp) {
-		filp_close(fp, 0);
-		return (ENOMEM);
-	}
-
-	saved_gfp = mapping_gfp_mask(fp->f_mapping);
-	mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
-
-	mutex_enter(&vp->v_lock);
-	vp->v_type = vn_mode_to_vtype(stat.mode);
-	vp->v_file = fp;
-	vp->v_gfp_mask = saved_gfp;
-	*vpp = vp;
-	mutex_exit(&vp->v_lock);
-
-	return (0);
-} /* vn_open() */
-EXPORT_SYMBOL(vn_open);
-
-int
-vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
-    vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
-{
-	char *realpath;
-	int len, rc;
-
-	ASSERT(vp == rootdir);
-
-	len = strlen(path) + 2;
-	realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
-	if (!realpath)
-		return (ENOMEM);
-
-	(void) snprintf(realpath, len, "/%s", path);
-	rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
-	kfree(realpath);
-
-	return (rc);
-} /* vn_openat() */
-EXPORT_SYMBOL(vn_openat);
-
-int
-vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
-    uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
-{
-	struct file *fp = vp->v_file;
-	loff_t offset = off;
-	int rc;
-
-	ASSERT(uio == UIO_WRITE || uio == UIO_READ);
-	ASSERT(seg == UIO_SYSSPACE);
-	ASSERT((ioflag & ~FAPPEND) == 0);
-
-	if (ioflag & FAPPEND)
-		offset = fp->f_pos;
-
-	if (uio & UIO_WRITE)
-		rc = spl_kernel_write(fp, addr, len, &offset);
-	else
-		rc = spl_kernel_read(fp, addr, len, &offset);
-
-	fp->f_pos = offset;
-
-	if (rc < 0)
-		return (-rc);
-
-	if (residp) {
-		*residp = len - rc;
-	} else {
-		if (rc != len)
-			return (EIO);
-	}
-
-	return (0);
-} /* vn_rdwr() */
-EXPORT_SYMBOL(vn_rdwr);
-
-int
-vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
-{
-	int rc;
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-
-	mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
-	rc = filp_close(vp->v_file, 0);
-	vn_free(vp);
-
-	return (-rc);
-} /* vn_close() */
-EXPORT_SYMBOL(vn_close);
-
-/*
- * vn_seek() does not actually seek it only performs bounds checking on the
- * proposed seek.  We perform minimal checking and allow vn_rdwr() to catch
- * anything more serious.
- */
-int
-vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
-{
-	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
-}
-EXPORT_SYMBOL(vn_seek);
-
-int
-vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
-{
-	struct file *fp;
-	struct kstat stat;
-	int rc;
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-	ASSERT(vap);
-
-	fp = vp->v_file;
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
-	    AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat);
-#else
-	rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
-#endif
-	if (rc)
-		return (-rc);
-
-	vap->va_type	= vn_mode_to_vtype(stat.mode);
-	vap->va_mode	= stat.mode;
-	vap->va_uid	= KUID_TO_SUID(stat.uid);
-	vap->va_gid	= KGID_TO_SGID(stat.gid);
-	vap->va_fsid	= 0;
-	vap->va_nodeid	= stat.ino;
-	vap->va_nlink	= stat.nlink;
-	vap->va_size	= stat.size;
-	vap->va_blksize	= stat.blksize;
-	vap->va_atime	= stat.atime;
-	vap->va_mtime	= stat.mtime;
-	vap->va_ctime	= stat.ctime;
-	vap->va_rdev	= stat.rdev;
-	vap->va_nblocks	= stat.blocks;
-
-	return (0);
-}
-EXPORT_SYMBOL(vn_getattr);
-
-int
-vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
-{
-	int datasync = 0;
-	int error;
-	int fstrans;
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-
-	if (flags & FDSYNC)
-		datasync = 1;
-
-	/*
-	 * May enter XFS which generates a warning when PF_FSTRANS is set.
-	 * To avoid this the flag is cleared over vfs_sync() and then reset.
-	 */
-	fstrans = __spl_pf_fstrans_check();
-	if (fstrans)
-		current->flags &= ~(__SPL_PF_FSTRANS);
-
-	error = -spl_filp_fsync(vp->v_file, datasync);
-	if (fstrans)
-		current->flags |= __SPL_PF_FSTRANS;
-
-	return (error);
-} /* vn_fsync() */
-EXPORT_SYMBOL(vn_fsync);
-
-int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
-    offset_t offset, void *x6, void *x7)
-{
-	int error = EOPNOTSUPP;
-#ifdef FALLOC_FL_PUNCH_HOLE
-	int fstrans;
-#endif
-
-	if (cmd != F_FREESP || bfp->l_whence != SEEK_SET)
-		return (EOPNOTSUPP);
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-	ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
-
-#ifdef FALLOC_FL_PUNCH_HOLE
-	/*
-	 * May enter XFS which generates a warning when PF_FSTRANS is set.
-	 * To avoid this the flag is cleared over vfs_sync() and then reset.
-	 */
-	fstrans = __spl_pf_fstrans_check();
-	if (fstrans)
-		current->flags &= ~(__SPL_PF_FSTRANS);
-
-	/*
-	 * When supported by the underlying file system preferentially
-	 * use the fallocate() callback to preallocate the space.
-	 */
-	error = -spl_filp_fallocate(vp->v_file,
-	    FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
-	    bfp->l_start, bfp->l_len);
-
-	if (fstrans)
-		current->flags |= __SPL_PF_FSTRANS;
-
-	if (error == 0)
-		return (0);
-#endif
-
-#ifdef HAVE_INODE_TRUNCATE_RANGE
-	if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
-	    vp->v_file->f_dentry->d_inode->i_op &&
-	    vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
-		off_t end = bfp->l_start + bfp->l_len;
-		/*
-		 * Judging from the code in shmem_truncate_range(),
-		 * it seems the kernel expects the end offset to be
-		 * inclusive and aligned to the end of a page.
-		 */
-		if (end % PAGE_SIZE != 0) {
-			end &= ~(off_t)(PAGE_SIZE - 1);
-			if (end <= bfp->l_start)
-				return (0);
-		}
-		--end;
-
-		vp->v_file->f_dentry->d_inode->i_op->truncate_range(
-		    vp->v_file->f_dentry->d_inode, bfp->l_start, end);
-
-		return (0);
-	}
-#endif
-
-	return (error);
-}
-EXPORT_SYMBOL(vn_space);
-
-/* Function must be called while holding the vn_file_lock */
-static file_t *
-file_find(int fd, struct task_struct *task)
-{
-	file_t *fp = NULL;
-
-	list_for_each_entry(fp, &vn_file_list,  f_list) {
-		if (fd == fp->f_fd && fp->f_task == task) {
-			ASSERT(atomic_read(&fp->f_ref) != 0);
-			return (fp);
-		}
-	}
-
-	return (NULL);
-} /* file_find() */
-
-file_t *
-vn_getf(int fd)
-{
-	struct kstat stat;
-	struct file *lfp;
-	file_t *fp;
-	vnode_t *vp;
-	int rc = 0;
-
-	if (fd < 0)
-		return (NULL);
-
-	/* Already open just take an extra reference */
-	spin_lock(&vn_file_lock);
-
-	fp = file_find(fd, current);
-	if (fp) {
-		lfp = fget(fd);
-		fput(fp->f_file);
-		/*
-		 * areleasef() can cause us to see a stale reference when
-		 * userspace has reused a file descriptor before areleasef()
-		 * has run. fput() the stale reference and replace it. We
-		 * retain the original reference count such that the concurrent
-		 * areleasef() will decrement its reference and terminate.
-		 */
-		if (lfp != fp->f_file) {
-			fp->f_file = lfp;
-			fp->f_vnode->v_file = lfp;
-		}
-		atomic_inc(&fp->f_ref);
-		spin_unlock(&vn_file_lock);
-		return (fp);
-	}
-
-	spin_unlock(&vn_file_lock);
-
-	/* File was not yet opened create the object and setup */
-	fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
-	if (fp == NULL)
-		goto out;
-
-	mutex_enter(&fp->f_lock);
-
-	fp->f_fd = fd;
-	fp->f_task = current;
-	fp->f_offset = 0;
-	atomic_inc(&fp->f_ref);
-
-	lfp = fget(fd);
-	if (lfp == NULL)
-		goto out_mutex;
-
-	vp = vn_alloc(KM_SLEEP);
-	if (vp == NULL)
-		goto out_fget;
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE,
-	    AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&lfp->f_path, &stat);
-#else
-	rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
-#endif
-	if (rc)
-		goto out_vnode;
-
-	mutex_enter(&vp->v_lock);
-	vp->v_type = vn_mode_to_vtype(stat.mode);
-	vp->v_file = lfp;
-	mutex_exit(&vp->v_lock);
-
-	fp->f_vnode = vp;
-	fp->f_file = lfp;
-
-	/* Put it on the tracking list */
-	spin_lock(&vn_file_lock);
-	list_add(&fp->f_list, &vn_file_list);
-	spin_unlock(&vn_file_lock);
-
-	mutex_exit(&fp->f_lock);
-	return (fp);
-
-out_vnode:
-	vn_free(vp);
-out_fget:
-	fput(lfp);
-out_mutex:
-	mutex_exit(&fp->f_lock);
-	kmem_cache_free(vn_file_cache, fp);
-out:
-	return (NULL);
-} /* getf() */
-EXPORT_SYMBOL(getf);
-
-static void releasef_locked(file_t *fp)
-{
-	ASSERT(fp->f_file);
-	ASSERT(fp->f_vnode);
-
-	/* Unlinked from list, no refs, safe to free outside mutex */
-	fput(fp->f_file);
-	vn_free(fp->f_vnode);
-
-	kmem_cache_free(vn_file_cache, fp);
-}
-
-void
-vn_releasef(int fd)
-{
-	areleasef(fd, P_FINFO(current));
-}
-EXPORT_SYMBOL(releasef);
-
-void
-vn_areleasef(int fd, uf_info_t *fip)
-{
-	file_t *fp;
-	struct task_struct *task = (struct task_struct *)fip;
-
-	if (fd < 0)
-		return;
-
-	spin_lock(&vn_file_lock);
-	fp = file_find(fd, task);
-	if (fp) {
-		atomic_dec(&fp->f_ref);
-		if (atomic_read(&fp->f_ref) > 0) {
-			spin_unlock(&vn_file_lock);
-			return;
-		}
-
-		list_del(&fp->f_list);
-		releasef_locked(fp);
-	}
-	spin_unlock(&vn_file_lock);
-} /* releasef() */
-EXPORT_SYMBOL(areleasef);
-
-static int
-vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
-	struct vnode *vp = buf;
-
-	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
-
-	return (0);
-} /* vn_cache_constructor() */
-
-static void
-vn_cache_destructor(void *buf, void *cdrarg)
-{
-	struct vnode *vp = buf;
-
-	mutex_destroy(&vp->v_lock);
-} /* vn_cache_destructor() */
-
-static int
-vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
-	file_t *fp = buf;
-
-	atomic_set(&fp->f_ref, 0);
-	mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
-	INIT_LIST_HEAD(&fp->f_list);
-
-	return (0);
-} /* vn_file_cache_constructor() */
-
-static void
-vn_file_cache_destructor(void *buf, void *cdrarg)
-{
-	file_t *fp = buf;
-
-	mutex_destroy(&fp->f_lock);
-} /* vn_file_cache_destructor() */
-
-int
-spl_vn_init(void)
-{
-	spin_lock_init(&vn_file_lock);
-
-	vn_cache = kmem_cache_create("spl_vn_cache",
-	    sizeof (struct vnode), 64, vn_cache_constructor,
-	    vn_cache_destructor, NULL, NULL, NULL, 0);
-
-	vn_file_cache = kmem_cache_create("spl_vn_file_cache",
-	    sizeof (file_t), 64, vn_file_cache_constructor,
-	    vn_file_cache_destructor, NULL, NULL, NULL, 0);
-
-	return (0);
-} /* spl_vn_init() */
-
-void
-spl_vn_fini(void)
-{
-	file_t *fp = NULL, *next_fp = NULL;
-	int leaked = 0;
-
-	spin_lock(&vn_file_lock);
-
-	list_for_each_entry_safe(fp, next_fp, &vn_file_list,  f_list) {
-		list_del(&fp->f_list);
-		releasef_locked(fp);
-		leaked++;
-	}
-
-	spin_unlock(&vn_file_lock);
-
-	if (leaked > 0)
-		printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
-
-	kmem_cache_destroy(vn_file_cache);
-	kmem_cache_destroy(vn_cache);
-} /* spl_vn_fini() */

diff --git a/zfs/module/spl/spl-xdr.c b/zfs/module/spl/spl-xdr.c
deleted file mode 100644
index 1dd31ff..0000000
--- a/zfs/module/spl/spl-xdr.c
+++ /dev/null

@@ -1,513 +0,0 @@
-/*
- *  Copyright (c) 2008-2010 Sun Microsystems, Inc.
- *  Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) XDR Implementation.
- */
-
-#include <linux/string.h>
-#include <sys/kmem.h>
-#include <sys/debug.h>
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <rpc/xdr.h>
-
-/*
- * SPL's XDR mem implementation.
- *
- * This is used by libnvpair to serialize/deserialize the name-value pair data
- * structures into byte arrays in a well-defined and portable manner.
- *
- * These data structures are used by the DMU/ZFS to flexibly manipulate various
- * information in memory and later serialize it/deserialize it to disk.
- * Examples of usages include the pool configuration, lists of pool and dataset
- * properties, etc.
- *
- * Reference documentation for the XDR representation and XDR operations can be
- * found in RFC 1832 and xdr(3), respectively.
- *
- * ===  Implementation shortcomings ===
- *
- * It is assumed that the following C types have the following sizes:
- *
- * char/unsigned char:      1 byte
- * short/unsigned short:    2 bytes
- * int/unsigned int:        4 bytes
- * longlong_t/u_longlong_t: 8 bytes
- *
- * The C standard allows these types to be larger (and in the case of ints,
- * shorter), so if that is the case on some compiler/architecture, the build
- * will fail (on purpose).
- *
- * If someone wants to fix the code to work properly on such environments, then:
- *
- * 1) Preconditions should be added to xdrmem_enc functions to make sure the
- *    caller doesn't pass arguments which exceed the expected range.
- * 2) Functions which take signed integers should be changed to properly do
- *    sign extension.
- * 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
- *    problems than this implementation.
- *
- * It is also assumed that:
- *
- * 1) Chars have 8 bits.
- * 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
- *    memcpy, memset and memcmp.
- * 3) Arrays passed to xdr_array() are packed and the compiler/architecture
- *    supports element-sized-aligned memory accesses.
- * 4) Negative integers are natively stored in two's complement binary
- *    representation.
- *
- * No checks are done for the 4 assumptions above, though.
- *
- * === Caller expectations ===
- *
- * Existing documentation does not describe the semantics of XDR operations very
- * well.  Therefore, some assumptions about failure semantics will be made and
- * will be described below:
- *
- * 1) If any encoding operation fails (e.g., due to lack of buffer space), the
- * the stream should be considered valid only up to the encoding operation
- * previous to the one that first failed. However, the stream size as returned
- * by xdr_control() cannot be considered to be strictly correct (it may be
- * bigger).
- *
- * Putting it another way, if there is an encoding failure it's undefined
- * whether anything is added to the stream in that operation and therefore
- * neither xdr_control() nor future encoding operations on the same stream can
- * be relied upon to produce correct results.
- *
- * 2) If a decoding operation fails, it's undefined whether anything will be
- * decoded into passed buffers/pointers during that operation, or what the
- * values on those buffers will look like.
- *
- * Future decoding operations on the same stream will also have similar
- * undefined behavior.
- *
- * 3) When the first decoding operation fails it is OK to trust the results of
- * previous decoding operations on the same stream, as long as the caller
- * expects a failure to be possible (e.g. due to end-of-stream).
- *
- * However, this is highly discouraged because the caller should know the
- * stream size and should be coded to expect any decoding failure to be data
- * corruption due to hardware, accidental or even malicious causes, which should
- * be handled gracefully in all cases.
- *
- * In very rare situations where there are strong reasons to believe the data
- * can be trusted to be valid and non-tampered with, then the caller may assume
- * a decoding failure to be a bug (e.g. due to mismatched data types) and may
- * fail non-gracefully.
- *
- * 4) Non-zero padding bytes will cause the decoding operation to fail.
- *
- * 5) Zero bytes on string types will also cause the decoding operation to fail.
- *
- * 6) It is assumed that either the pointer to the stream buffer given by the
- * caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
- * memory accesses.
- *
- * 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
- *
- * 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
- * space or MMIO space), the computer may explode.
- */
-
-static struct xdr_ops xdrmem_encode_ops;
-static struct xdr_ops xdrmem_decode_ops;
-
-void
-xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
-    const enum xdr_op op)
-{
-	switch (op) {
-		case XDR_ENCODE:
-			xdrs->x_ops = &xdrmem_encode_ops;
-			break;
-		case XDR_DECODE:
-			xdrs->x_ops = &xdrmem_decode_ops;
-			break;
-		default:
-			xdrs->x_ops = NULL; /* Let the caller know we failed */
-			return;
-	}
-
-	xdrs->x_op = op;
-	xdrs->x_addr = addr;
-	xdrs->x_addr_end = addr + size;
-
-	if (xdrs->x_addr_end < xdrs->x_addr) {
-		xdrs->x_ops = NULL;
-	}
-}
-EXPORT_SYMBOL(xdrmem_create);
-
-static bool_t
-xdrmem_control(XDR *xdrs, int req, void *info)
-{
-	struct xdr_bytesrec *rec = (struct xdr_bytesrec *)info;
-
-	if (req != XDR_GET_BYTES_AVAIL)
-		return (FALSE);
-
-	rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
-	rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
-{
-	uint_t size = roundup(cnt, 4);
-	uint_t pad;
-
-	if (size < cnt)
-		return (FALSE); /* Integer overflow */
-
-	if (xdrs->x_addr > xdrs->x_addr_end)
-		return (FALSE);
-
-	if (xdrs->x_addr_end - xdrs->x_addr < size)
-		return (FALSE);
-
-	memcpy(xdrs->x_addr, cp, cnt);
-
-	xdrs->x_addr += cnt;
-
-	pad = size - cnt;
-	if (pad > 0) {
-		memset(xdrs->x_addr, 0, pad);
-		xdrs->x_addr += pad;
-	}
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
-{
-	static uint32_t zero = 0;
-	uint_t size = roundup(cnt, 4);
-	uint_t pad;
-
-	if (size < cnt)
-		return (FALSE); /* Integer overflow */
-
-	if (xdrs->x_addr > xdrs->x_addr_end)
-		return (FALSE);
-
-	if (xdrs->x_addr_end - xdrs->x_addr < size)
-		return (FALSE);
-
-	memcpy(cp, xdrs->x_addr, cnt);
-	xdrs->x_addr += cnt;
-
-	pad = size - cnt;
-	if (pad > 0) {
-		/* An inverted memchr() would be useful here... */
-		if (memcmp(&zero, xdrs->x_addr, pad) != 0)
-			return (FALSE);
-
-		xdrs->x_addr += pad;
-	}
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
-{
-	if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
-		return (FALSE);
-
-	*((uint32_t *)xdrs->x_addr) = cpu_to_be32(val);
-
-	xdrs->x_addr += sizeof (uint32_t);
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
-{
-	if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
-		return (FALSE);
-
-	*val = be32_to_cpu(*((uint32_t *)xdrs->x_addr));
-
-	xdrs->x_addr += sizeof (uint32_t);
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_char(XDR *xdrs, char *cp)
-{
-	uint32_t val;
-
-	BUILD_BUG_ON(sizeof (char) != 1);
-	val = *((unsigned char *) cp);
-
-	return (xdrmem_enc_uint32(xdrs, val));
-}
-
-static bool_t
-xdrmem_dec_char(XDR *xdrs, char *cp)
-{
-	uint32_t val;
-
-	BUILD_BUG_ON(sizeof (char) != 1);
-
-	if (!xdrmem_dec_uint32(xdrs, &val))
-		return (FALSE);
-
-	/*
-	 * If any of the 3 other bytes are non-zero then val will be greater
-	 * than 0xff and we fail because according to the RFC, this block does
-	 * not have a char encoded in it.
-	 */
-	if (val > 0xff)
-		return (FALSE);
-
-	*((unsigned char *) cp) = val;
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
-{
-	BUILD_BUG_ON(sizeof (unsigned short) != 2);
-
-	return (xdrmem_enc_uint32(xdrs, *usp));
-}
-
-static bool_t
-xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
-{
-	uint32_t val;
-
-	BUILD_BUG_ON(sizeof (unsigned short) != 2);
-
-	if (!xdrmem_dec_uint32(xdrs, &val))
-		return (FALSE);
-
-	/*
-	 * Short ints are not in the RFC, but we assume similar logic as in
-	 * xdrmem_dec_char().
-	 */
-	if (val > 0xffff)
-		return (FALSE);
-
-	*usp = val;
-
-	return (TRUE);
-}
-
-static bool_t
-xdrmem_enc_uint(XDR *xdrs, unsigned *up)
-{
-	BUILD_BUG_ON(sizeof (unsigned) != 4);
-
-	return (xdrmem_enc_uint32(xdrs, *up));
-}
-
-static bool_t
-xdrmem_dec_uint(XDR *xdrs, unsigned *up)
-{
-	BUILD_BUG_ON(sizeof (unsigned) != 4);
-
-	return (xdrmem_dec_uint32(xdrs, (uint32_t *)up));
-}
-
-static bool_t
-xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
-{
-	BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
-
-	if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
-		return (FALSE);
-
-	return (xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff));
-}
-
-static bool_t
-xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
-{
-	uint32_t low, high;
-
-	BUILD_BUG_ON(sizeof (u_longlong_t) != 8);
-
-	if (!xdrmem_dec_uint32(xdrs, &high))
-		return (FALSE);
-	if (!xdrmem_dec_uint32(xdrs, &low))
-		return (FALSE);
-
-	*ullp = ((u_longlong_t)high << 32) | low;
-
-	return (TRUE);
-}
-
-static bool_t
-xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
-    const uint_t elsize, const xdrproc_t elproc)
-{
-	uint_t i;
-	caddr_t addr = *arrp;
-
-	if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
-		return (FALSE);
-
-	if (!xdrmem_enc_uint(xdrs, sizep))
-		return (FALSE);
-
-	for (i = 0; i < *sizep; i++) {
-		if (!elproc(xdrs, addr))
-			return (FALSE);
-		addr += elsize;
-	}
-
-	return (TRUE);
-}
-
-static bool_t
-xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
-    const uint_t elsize, const xdrproc_t elproc)
-{
-	uint_t i, size;
-	bool_t alloc = FALSE;
-	caddr_t addr;
-
-	if (!xdrmem_dec_uint(xdrs, sizep))
-		return (FALSE);
-
-	size = *sizep;
-
-	if (size > maxsize || size > UINT_MAX / elsize)
-		return (FALSE);
-
-	/*
-	 * The Solaris man page says: "If *arrp is NULL when decoding,
-	 * xdr_array() allocates memory and *arrp points to it".
-	 */
-	if (*arrp == NULL) {
-		BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
-
-		*arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
-		if (*arrp == NULL)
-			return (FALSE);
-
-		alloc = TRUE;
-	}
-
-	addr = *arrp;
-
-	for (i = 0; i < size; i++) {
-		if (!elproc(xdrs, addr)) {
-			if (alloc)
-				kmem_free(*arrp, size * elsize);
-			return (FALSE);
-		}
-		addr += elsize;
-	}
-
-	return (TRUE);
-}
-
-static bool_t
-xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
-{
-	size_t slen = strlen(*sp);
-	uint_t len;
-
-	if (slen > maxsize)
-		return (FALSE);
-
-	len = slen;
-
-	if (!xdrmem_enc_uint(xdrs, &len))
-		return (FALSE);
-
-	return (xdrmem_enc_bytes(xdrs, *sp, len));
-}
-
-static bool_t
-xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
-{
-	uint_t size;
-	bool_t alloc = FALSE;
-
-	if (!xdrmem_dec_uint(xdrs, &size))
-		return (FALSE);
-
-	if (size > maxsize || size > UINT_MAX - 1)
-		return (FALSE);
-
-	/*
-	 * Solaris man page: "If *sp is NULL when decoding, xdr_string()
-	 * allocates memory and *sp points to it".
-	 */
-	if (*sp == NULL) {
-		BUILD_BUG_ON(sizeof (uint_t) > sizeof (size_t));
-
-		*sp = kmem_alloc(size + 1, KM_NOSLEEP);
-		if (*sp == NULL)
-			return (FALSE);
-
-		alloc = TRUE;
-	}
-
-	if (!xdrmem_dec_bytes(xdrs, *sp, size))
-		goto fail;
-
-	if (memchr(*sp, 0, size) != NULL)
-		goto fail;
-
-	(*sp)[size] = '\0';
-
-	return (TRUE);
-
-fail:
-	if (alloc)
-		kmem_free(*sp, size + 1);
-
-	return (FALSE);
-}
-
-static struct xdr_ops xdrmem_encode_ops = {
-	.xdr_control		= xdrmem_control,
-	.xdr_char		= xdrmem_enc_char,
-	.xdr_u_short		= xdrmem_enc_ushort,
-	.xdr_u_int		= xdrmem_enc_uint,
-	.xdr_u_longlong_t	= xdrmem_enc_ulonglong,
-	.xdr_opaque		= xdrmem_enc_bytes,
-	.xdr_string		= xdr_enc_string,
-	.xdr_array		= xdr_enc_array
-};
-
-static struct xdr_ops xdrmem_decode_ops = {
-	.xdr_control		= xdrmem_control,
-	.xdr_char		= xdrmem_dec_char,
-	.xdr_u_short		= xdrmem_dec_ushort,
-	.xdr_u_int		= xdrmem_dec_uint,
-	.xdr_u_longlong_t	= xdrmem_dec_ulonglong,
-	.xdr_opaque		= xdrmem_dec_bytes,
-	.xdr_string		= xdr_dec_string,
-	.xdr_array		= xdr_dec_array
-};

diff --git a/zfs/module/spl/spl-zlib.c b/zfs/module/spl/spl-zlib.c
deleted file mode 100644
index 6242334..0000000
--- a/zfs/module/spl/spl-zlib.c
+++ /dev/null

@@ -1,217 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *
- *  z_compress_level/z_uncompress are nearly identical copies of the
- *  compress2/uncompress functions provided by the official zlib package
- *  available at http://zlib.net/.  The only changes made we to slightly
- *  adapt the functions called to match the linux kernel implementation
- *  of zlib.  The full zlib license follows:
- *
- *  zlib.h -- interface of the 'zlib' general purpose compression library
- *  version 1.2.5, April 19th, 2010
- *
- *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
- *
- *  This software is provided 'as-is', without any express or implied
- *  warranty.  In no event will the authors be held liable for any damages
- *  arising from the use of this software.
- *
- *  Permission is granted to anyone to use this software for any purpose,
- *  including commercial applications, and to alter it and redistribute it
- *  freely, subject to the following restrictions:
- *
- *  1. The origin of this software must not be misrepresented; you must not
- *     claim that you wrote the original software. If you use this software
- *     in a product, an acknowledgment in the product documentation would be
- *     appreciated but is not required.
- *  2. Altered source versions must be plainly marked as such, and must not be
- *     misrepresented as being the original software.
- *  3. This notice may not be removed or altered from any source distribution.
- *
- *  Jean-loup Gailly
- *  Mark Adler
- */
-
-
-#include <sys/kmem.h>
-#include <sys/kmem_cache.h>
-#include <sys/zmod.h>
-
-static spl_kmem_cache_t *zlib_workspace_cache;
-
-/*
- * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
- * and vfree for every call.  Using a kmem_cache also has the advantage
- * that improves the odds that the memory used will be local to this cpu.
- * To further improve things it might be wise to create a dedicated per-cpu
- * workspace for use.  This would take some additional care because we then
- * must disable preemption around the critical section, and verify that
- * zlib_deflate* and zlib_inflate* never internally call schedule().
- */
-static void *
-zlib_workspace_alloc(int flags)
-{
-	return (kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS)));
-}
-
-static void
-zlib_workspace_free(void *workspace)
-{
-	kmem_cache_free(zlib_workspace_cache, workspace);
-}
-
-/*
- * Compresses the source buffer into the destination buffer. The level
- * parameter has the same meaning as in deflateInit.  sourceLen is the byte
- * length of the source buffer. Upon entry, destLen is the total size of the
- * destination buffer, which must be at least 0.1% larger than sourceLen plus
- * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
- *
- * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- * memory, Z_BUF_ERROR if there was not enough room in the output buffer,
- * Z_STREAM_ERROR if the level parameter is invalid.
- */
-int
-z_compress_level(void *dest, size_t *destLen, const void *source,
-    size_t sourceLen, int level)
-{
-	z_stream stream;
-	int err;
-
-	stream.next_in = (Byte *)source;
-	stream.avail_in = (uInt)sourceLen;
-	stream.next_out = dest;
-	stream.avail_out = (uInt)*destLen;
-
-	if ((size_t)stream.avail_out != *destLen)
-		return (Z_BUF_ERROR);
-
-	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
-	if (!stream.workspace)
-		return (Z_MEM_ERROR);
-
-	err = zlib_deflateInit(&stream, level);
-	if (err != Z_OK) {
-		zlib_workspace_free(stream.workspace);
-		return (err);
-	}
-
-	err = zlib_deflate(&stream, Z_FINISH);
-	if (err != Z_STREAM_END) {
-		zlib_deflateEnd(&stream);
-		zlib_workspace_free(stream.workspace);
-		return (err == Z_OK ? Z_BUF_ERROR : err);
-	}
-	*destLen = stream.total_out;
-
-	err = zlib_deflateEnd(&stream);
-	zlib_workspace_free(stream.workspace);
-
-	return (err);
-}
-EXPORT_SYMBOL(z_compress_level);
-
-/*
- * Decompresses the source buffer into the destination buffer.  sourceLen is
- * the byte length of the source buffer. Upon entry, destLen is the total
- * size of the destination buffer, which must be large enough to hold the
- * entire uncompressed data. (The size of the uncompressed data must have
- * been saved previously by the compressor and transmitted to the decompressor
- * by some mechanism outside the scope of this compression library.)
- * Upon exit, destLen is the actual size of the compressed buffer.
- * This function can be used to decompress a whole file at once if the
- * input file is mmap'ed.
- *
- * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
- * enough memory, Z_BUF_ERROR if there was not enough room in the output
- * buffer, or Z_DATA_ERROR if the input data was corrupted.
- */
-int
-z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
-{
-	z_stream stream;
-	int err;
-
-	stream.next_in = (Byte *)source;
-	stream.avail_in = (uInt)sourceLen;
-	stream.next_out = dest;
-	stream.avail_out = (uInt)*destLen;
-
-	if ((size_t)stream.avail_out != *destLen)
-		return (Z_BUF_ERROR);
-
-	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
-	if (!stream.workspace)
-		return (Z_MEM_ERROR);
-
-	err = zlib_inflateInit(&stream);
-	if (err != Z_OK) {
-		zlib_workspace_free(stream.workspace);
-		return (err);
-	}
-
-	err = zlib_inflate(&stream, Z_FINISH);
-	if (err != Z_STREAM_END) {
-		zlib_inflateEnd(&stream);
-		zlib_workspace_free(stream.workspace);
-
-		if (err == Z_NEED_DICT ||
-		    (err == Z_BUF_ERROR && stream.avail_in == 0))
-			return (Z_DATA_ERROR);
-
-		return (err);
-	}
-	*destLen = stream.total_out;
-
-	err = zlib_inflateEnd(&stream);
-	zlib_workspace_free(stream.workspace);
-
-	return (err);
-}
-EXPORT_SYMBOL(z_uncompress);
-
-int
-spl_zlib_init(void)
-{
-	int size;
-
-	size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
-	    zlib_inflate_workspacesize());
-
-	zlib_workspace_cache = kmem_cache_create(
-	    "spl_zlib_workspace_cache",
-	    size, 0, NULL, NULL, NULL, NULL, NULL,
-	    KMC_VMEM);
-	if (!zlib_workspace_cache)
-		return (1);
-
-	return (0);
-}
-
-void
-spl_zlib_fini(void)
-{
-	kmem_cache_destroy(zlib_workspace_cache);
-	zlib_workspace_cache = NULL;
-}

diff --git a/zfs/module/unicode/Makefile.in b/zfs/module/unicode/Makefile.in
index 82c9037..59c07c4 100644
--- a/zfs/module/unicode/Makefile.in
+++ b/zfs/module/unicode/Makefile.in

@@ -1,11 +1,11 @@
-src = @abs_top_srcdir@/module/unicode
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
+endif
 
 MODULE := zunicode
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-
 $(MODULE)-objs += u8_textprep.o
 $(MODULE)-objs += uconv.o

diff --git a/zfs/module/unicode/u8_textprep.c b/zfs/module/unicode/u8_textprep.c
index 4e6105b..1b5b7b0 100644
--- a/zfs/module/unicode/u8_textprep.c
+++ b/zfs/module/unicode/u8_textprep.c

@@ -23,6 +23,9 @@
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright 2022 MNX Cloud, Inc.
+ */
 
 
 
@@ -46,7 +49,7 @@
 #include <sys/byteorder.h>
 #include <sys/errno.h>
 #include <sys/u8_textprep_data.h>
-
+#include <sys/mod.h>
 
 /* The maximum possible number of bytes in a UTF-8 character. */
 #define	U8_MB_CUR_MAX			(4)
@@ -213,10 +216,10 @@
 /*	80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F  */
 	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
 
-/*  	90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F  */
+/*	90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F  */
 	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
 
-/*  	A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF  */
+/*	A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF  */
 	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
 
 /*	B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF  */
@@ -330,7 +333,7 @@
  * specific to UTF-8 and Unicode.
  */
 int
-u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum)
+u8_validate(const char *u8str, size_t n, char **list, int flag, int *errnum)
 {
 	uchar_t *ib;
 	uchar_t *ibtail;
@@ -865,7 +868,9 @@
 		start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4];
 		end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
 	} else {
+		// cppcheck-suppress arrayIndexOutOfBoundsCond
 		start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4];
+		// cppcheck-suppress arrayIndexOutOfBoundsCond
 		end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1];
 	}
 
@@ -884,7 +889,7 @@
 	 *	| B0| B1| ... | Bm|
 	 *	+---+---+-...-+---+
 	 *
-	 *	The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH).
+	 *	The first byte, B0, is always less than 0xF5 (U8_DECOMP_BOTH).
 	 *
 	 * (2) Canonical decomposition mappings:
 	 *
@@ -1012,7 +1017,9 @@
 		start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4];
 		end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
 	} else {
+		// cppcheck-suppress arrayIndexOutOfBoundsCond
 		start_id = u8_composition_b4_tbl[uv][b3_tbl][b4];
+		// cppcheck-suppress arrayIndexOutOfBoundsCond
 		end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1];
 	}
 
@@ -1282,8 +1289,12 @@
 		saved_l = l - disp[last];
 
 		while (p < oslast) {
-			size = u8_number_of_bytes[*p];
-			if (size <= 1 || (p + size) > oslast)
+			int8_t number_of_bytes = u8_number_of_bytes[*p];
+
+			if (number_of_bytes <= 1)
+				break;
+			size = number_of_bytes;
+			if ((p + size) > oslast)
 				break;
 
 			saved_p = p;
@@ -1374,8 +1385,10 @@
  */
 static size_t
 collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
-    boolean_t is_it_toupper, boolean_t is_it_tolower,
-    boolean_t canonical_decomposition, boolean_t compatibility_decomposition,
+    boolean_t is_it_toupper,
+    boolean_t is_it_tolower,
+    boolean_t canonical_decomposition,
+    boolean_t compatibility_decomposition,
     boolean_t canonical_composition,
     int *errnum, u8_normalization_states_t *state)
 {
@@ -1710,7 +1723,7 @@
 }
 
 /*
- * The do_norm_compare() function does string comparion based on Unicode
+ * The do_norm_compare() function does string comparison based on Unicode
  * simple case mappings and Unicode Normalization definitions.
  *
  * It does so by collecting a sequence of character at a time and comparing
@@ -2139,13 +2152,13 @@
 
 module_init(unicode_init);
 module_exit(unicode_fini);
+#endif
 
-MODULE_DESCRIPTION("Unicode implementation");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+ZFS_MODULE_DESCRIPTION("Unicode implementation");
+ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
+ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 
 EXPORT_SYMBOL(u8_validate);
 EXPORT_SYMBOL(u8_strcmp);
 EXPORT_SYMBOL(u8_textprep_str);
-#endif

diff --git a/zfs/module/unicode/uconv.c b/zfs/module/unicode/uconv.c
index d812d5f..fe84979 100644
--- a/zfs/module/unicode/uconv.c
+++ b/zfs/module/unicode/uconv.c

@@ -69,7 +69,7 @@
 #define	UCONV_OUT_ENDIAN_MASKS	(UCONV_OUT_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN)
 
 /* Native and reversed endian macros. */
-#ifdef	_BIG_ENDIAN
+#ifdef	_ZFS_BIG_ENDIAN
 #define	UCONV_IN_NAT_ENDIAN	UCONV_IN_BIG_ENDIAN
 #define	UCONV_IN_REV_ENDIAN	UCONV_IN_LITTLE_ENDIAN
 #define	UCONV_OUT_NAT_ENDIAN	UCONV_OUT_BIG_ENDIAN

diff --git a/zfs/module/zcommon/Makefile.in b/zfs/module/zcommon/Makefile.in
index 0ac0d43..614968a 100644
--- a/zfs/module/zcommon/Makefile.in
+++ b/zfs/module/zcommon/Makefile.in

@@ -1,18 +1,16 @@
-src = @abs_top_srcdir@/module/zcommon
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
-target_cpu = @target_cpu@
+endif
 
 MODULE := zcommon
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-
 # Suppress unused-value warnings in sparc64 architecture headers
-ifeq ($(target_cpu),sparc64)
-ccflags-y += -Wno-unused-value
-endif
+ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
 
+$(MODULE)-objs += cityhash.o
 $(MODULE)-objs += zfeature_common.o
 $(MODULE)-objs += zfs_comutil.o
 $(MODULE)-objs += zfs_deleg.o
@@ -21,7 +19,6 @@
 $(MODULE)-objs += zfs_fletcher_superscalar4.o
 $(MODULE)-objs += zfs_namecheck.o
 $(MODULE)-objs += zfs_prop.o
-$(MODULE)-objs += zfs_uio.o
 $(MODULE)-objs += zpool_prop.o
 $(MODULE)-objs += zprop_common.o
 
@@ -29,3 +26,7 @@
 $(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
 $(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
 $(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o
+
+ifeq ($(CONFIG_ARM64),y)
+CFLAGS_REMOVE_zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only
+endif

diff --git a/zfs/module/zcommon/cityhash.c b/zfs/module/zcommon/cityhash.c
new file mode 100644
index 0000000..413a96d
--- /dev/null
+++ b/zfs/module/zcommon/cityhash.c

@@ -0,0 +1,67 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/*
+ * Copyright (c) 2017 by Delphix. All rights reserved.
+ */
+
+#include <cityhash.h>
+
+#define	HASH_K1 0xb492b66fbe98f273ULL
+#define	HASH_K2 0x9ae16a3b2f90404fULL
+
+/*
+ * Bitwise right rotate.  Normally this will compile to a single
+ * instruction.
+ */
+static inline uint64_t
+rotate(uint64_t val, int shift)
+{
+	// Avoid shifting by 64: doing so yields an undefined result.
+	return (shift == 0 ? val : (val >> shift) | (val << (64 - shift)));
+}
+
+static inline uint64_t
+cityhash_helper(uint64_t u, uint64_t v, uint64_t mul)
+{
+	uint64_t a = (u ^ v) * mul;
+	a ^= (a >> 47);
+	uint64_t b = (v ^ a) * mul;
+	b ^= (b >> 47);
+	b *= mul;
+	return (b);
+}
+
+uint64_t
+cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
+{
+	uint64_t mul = HASH_K2 + 64;
+	uint64_t a = w1 * HASH_K1;
+	uint64_t b = w2;
+	uint64_t c = w4 * mul;
+	uint64_t d = w3 * HASH_K2;
+	return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d,
+	    a + rotate(b + HASH_K2, 18) + c, mul));
+
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(cityhash4);
+#endif

diff --git a/zfs/module/zcommon/zfeature_common.c b/zfs/module/zcommon/zfeature_common.c
index 9f74f0f..fc0e096 100644
--- a/zfs/module/zcommon/zfeature_common.c
+++ b/zfs/module/zcommon/zfeature_common.c

@@ -25,6 +25,8 @@
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
  */
 
 #ifndef _KERNEL
@@ -36,6 +38,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/inttypes.h>
 #include <sys/types.h>
+#include <sys/param.h>
 #include <sys/zfs_sysfs.h>
 #include "zfeature_common.h"
 
@@ -97,6 +100,8 @@
 
 	for (spa_feature_t i = 0; i < SPA_FEATURES; i++) {
 		zfeature_info_t *feature = &spa_feature_table[i];
+		if (!feature->fi_zfs_mod_supported)
+			continue;
 		if (strcmp(guid, feature->fi_guid) == 0)
 			return (B_TRUE);
 	}
@@ -217,8 +222,17 @@
 	 * libzpool, always supports all the features. libzfs needs to
 	 * query the running module, via sysfs, to determine which
 	 * features are supported.
+	 *
+	 * The equivalent _can_ be done on FreeBSD by way of the sysctl
+	 * tree, but this has not been done yet.  Therefore, we return
+	 * that all features except edonr are supported.
 	 */
-#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD)
+#if defined(__FreeBSD__)
+	if (strcmp(name, "org.illumos:edonr") == 0)
+		return (B_FALSE);
+	else
+		return (B_TRUE);
+#elif defined(_KERNEL) || defined(LIB_ZPOOL_BUILD)
 	return (B_TRUE);
 #else
 	return (zfs_mod_supported(ZFS_SYSFS_POOL_FEATURES, name));
@@ -256,6 +270,19 @@
 	feature->fi_zfs_mod_supported = zfs_mod_supported_feature(guid);
 }
 
+/*
+ * Every feature has a GUID of the form com.example:feature_name.  The
+ * reversed DNS name ensures that the feature's GUID is unique across all ZFS
+ * implementations.  This allows companies to independently develop and
+ * release features.  Examples include org.delphix and org.datto.  Previously,
+ * features developed on one implementation have used that implementation's
+ * domain name (e.g. org.illumos and org.zfsonlinux).  Use of the org.openzfs
+ * domain name is recommended for new features which are developed by the
+ * OpenZFS community and its platforms.  This domain may optionally be used by
+ * companies developing features for initial release through an OpenZFS
+ * implementation.  Use of the org.openzfs domain requires reserving the
+ * feature name in advance with the OpenZFS project.
+ */
 void
 zpool_feature_init(void)
 {
@@ -349,6 +376,31 @@
 	    ZFEATURE_TYPE_BOOLEAN, NULL);
 
 	{
+	static const spa_feature_t livelist_deps[] = {
+		SPA_FEATURE_EXTENSIBLE_DATASET,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_LIVELIST,
+	    "com.delphix:livelist", "livelist",
+	    "Improved clone deletion performance.",
+	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN,
+	    livelist_deps);
+	}
+
+	{
+	static const spa_feature_t log_spacemap_deps[] = {
+		SPA_FEATURE_SPACEMAP_V2,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_LOG_SPACEMAP,
+	    "com.delphix:log_spacemap", "log_spacemap",
+	    "Log metaslab changes on a single spacemap and "
+	    "flush them periodically.",
+	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN,
+	    log_spacemap_deps);
+	}
+
+	{
 	static const spa_feature_t large_blocks_deps[] = {
 		SPA_FEATURE_EXTENSIBLE_DATASET,
 		SPA_FEATURE_NONE
@@ -408,6 +460,47 @@
 	    edonr_deps);
 	}
 
+	{
+	static const spa_feature_t redact_books_deps[] = {
+		SPA_FEATURE_BOOKMARK_V2,
+		SPA_FEATURE_EXTENSIBLE_DATASET,
+		SPA_FEATURE_BOOKMARKS,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_REDACTION_BOOKMARKS,
+	    "com.delphix:redaction_bookmarks", "redaction_bookmarks",
+	    "Support for bookmarks which store redaction lists for zfs "
+	    "redacted send/recv.", 0, ZFEATURE_TYPE_BOOLEAN,
+	    redact_books_deps);
+	}
+
+	{
+	static const spa_feature_t redact_datasets_deps[] = {
+		SPA_FEATURE_EXTENSIBLE_DATASET,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_REDACTED_DATASETS,
+	    "com.delphix:redacted_datasets", "redacted_datasets", "Support for "
+	    "redacted datasets, produced by receiving a redacted zfs send "
+	    "stream.", ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_UINT64_ARRAY,
+	    redact_datasets_deps);
+	}
+
+	{
+	static const spa_feature_t bookmark_written_deps[] = {
+		SPA_FEATURE_BOOKMARK_V2,
+		SPA_FEATURE_EXTENSIBLE_DATASET,
+		SPA_FEATURE_BOOKMARKS,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_BOOKMARK_WRITTEN,
+	    "com.delphix:bookmark_written", "bookmark_written",
+	    "Additional accounting, enabling the written#<bookmark> property"
+	    "(space written since a bookmark), and estimates of send stream "
+	    "sizes for incrementals from bookmarks.",
+	    0, ZFEATURE_TYPE_BOOLEAN, bookmark_written_deps);
+	}
+
 	zfeature_register(SPA_FEATURE_DEVICE_REMOVAL,
 	    "com.delphix:device_removal", "device_removal",
 	    "Top-level vdevs can be removed, reducing logical pool size.",
@@ -476,17 +569,35 @@
 	    ZFEATURE_TYPE_BOOLEAN, project_quota_deps);
 	}
 
-	{
 	zfeature_register(SPA_FEATURE_ALLOCATION_CLASSES,
 	    "org.zfsonlinux:allocation_classes", "allocation_classes",
 	    "Support for separate allocation classes.",
 	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
-	}
 
 	zfeature_register(SPA_FEATURE_RESILVER_DEFER,
 	    "com.datto:resilver_defer", "resilver_defer",
 	    "Support for deferring new resilvers when one is already running.",
 	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
+
+	zfeature_register(SPA_FEATURE_DEVICE_REBUILD,
+	    "org.openzfs:device_rebuild", "device_rebuild",
+	    "Support for sequential mirror/dRAID device rebuilds",
+	    ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
+
+	{
+	static const spa_feature_t zstd_deps[] = {
+		SPA_FEATURE_EXTENSIBLE_DATASET,
+		SPA_FEATURE_NONE
+	};
+	zfeature_register(SPA_FEATURE_ZSTD_COMPRESS,
+	    "org.freebsd:zstd_compress", "zstd_compress",
+	    "zstd compression algorithm support.",
+	    ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN, zstd_deps);
+	}
+
+	zfeature_register(SPA_FEATURE_DRAID,
+	    "org.openzfs:draid", "draid", "Support for distributed spare RAID",
+	    ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL);
 }
 
 #if defined(_KERNEL)

diff --git a/zfs/module/zcommon/zfs_comutil.c b/zfs/module/zcommon/zfs_comutil.c
index a3ff7d8..8861677 100644
--- a/zfs/module/zcommon/zfs_comutil.c
+++ b/zfs/module/zcommon/zfs_comutil.c

@@ -26,7 +26,7 @@
 /*
  * This file is intended for functions that ought to be common between user
  * land (libzfs) and the kernel. When many common routines need to be shared
- * then a separate file should to be created.
+ * then a separate file should be created.
  */
 
 #if !defined(_KERNEL)
@@ -68,7 +68,7 @@
  * Are there special vdevs?
  */
 boolean_t
-zfs_special_devs(nvlist_t *nv)
+zfs_special_devs(nvlist_t *nv, char *type)
 {
 	char *bias;
 	uint_t c;
@@ -84,7 +84,11 @@
 		    &bias) == 0) {
 			if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0 ||
 			    strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0) {
-				return (B_TRUE);
+				if (type != NULL && strcmp(bias, type) == 0) {
+					return (B_TRUE);
+				} else if (type == NULL) {
+					return (B_TRUE);
+				}
 			}
 		}
 	}

diff --git a/zfs/module/zcommon/zfs_deleg.c b/zfs/module/zcommon/zfs_deleg.c
index 8d98f72..e1f5a35 100644
--- a/zfs/module/zcommon/zfs_deleg.c
+++ b/zfs/module/zcommon/zfs_deleg.c

@@ -52,7 +52,6 @@
 	{ZFS_DELEG_PERM_MOUNT},
 	{ZFS_DELEG_PERM_PROMOTE},
 	{ZFS_DELEG_PERM_RECEIVE},
-	{ZFS_DELEG_PERM_REMAP},
 	{ZFS_DELEG_PERM_RENAME},
 	{ZFS_DELEG_PERM_ROLLBACK},
 	{ZFS_DELEG_PERM_SNAPSHOT},

diff --git a/zfs/module/zcommon/zfs_fletcher.c b/zfs/module/zcommon/zfs_fletcher.c
index 4c9db44..7a9de4a 100644
--- a/zfs/module/zcommon/zfs_fletcher.c
+++ b/zfs/module/zcommon/zfs_fletcher.c

@@ -137,10 +137,10 @@
 #include <sys/sysmacros.h>
 #include <sys/byteorder.h>
 #include <sys/spa.h>
+#include <sys/simd.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_context.h>
 #include <zfs_fletcher.h>
-#include <linux/simd.h>
 
 #define	FLETCHER_MIN_SIMD_SIZE	64
 
@@ -184,7 +184,10 @@
 #if defined(__x86_64) && defined(HAVE_AVX512F)
 	&fletcher_4_avx512f_ops,
 #endif
-#if defined(__aarch64__)
+#if defined(__x86_64) && defined(HAVE_AVX512BW)
+	&fletcher_4_avx512bw_ops,
+#endif
+#if defined(__aarch64__) && !defined(__FreeBSD__)
 	&fletcher_4_aarch64_neon_ops,
 #endif
 };
@@ -657,7 +660,7 @@
 	fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \
 }
 
-#define	FLETCHER_4_BENCH_NS	(MSEC2NSEC(50))		/* 50ms */
+#define	FLETCHER_4_BENCH_NS	(MSEC2NSEC(1))		/* 1ms */
 
 typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
 					zio_cksum_t *);
@@ -882,24 +885,26 @@
 	.acf_iter = abd_fletcher_4_iter
 };
 
-
 #if defined(_KERNEL)
-#include <linux/mod_compat.h>
+
+#define	IMPL_FMT(impl, i)	(((impl) == (i)) ? "[%s] " : "%s ")
+
+#if defined(__linux__)
 
 static int
 fletcher_4_param_get(char *buffer, zfs_kernel_param_t *unused)
 {
 	const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
 	char *fmt;
-	int i, cnt = 0;
+	int cnt = 0;
 
 	/* list fastest */
-	fmt = (impl == IMPL_FASTEST) ? "[%s] " : "%s ";
+	fmt = IMPL_FMT(impl, IMPL_FASTEST);
 	cnt += sprintf(buffer + cnt, fmt, "fastest");
 
 	/* list all supported implementations */
-	for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
-		fmt = (i == impl) ? "[%s] " : "%s ";
+	for (uint32_t i = 0; i < fletcher_4_supp_impls_cnt; ++i) {
+		fmt = IMPL_FMT(impl, i);
 		cnt += sprintf(buffer + cnt, fmt,
 		    fletcher_4_supp_impls[i]->name);
 	}
@@ -913,14 +918,62 @@
 	return (fletcher_4_impl_set(val));
 }
 
+#else
+
+#include <sys/sbuf.h>
+
+static int
+fletcher_4_param(ZFS_MODULE_PARAM_ARGS)
+{
+	int err;
+
+	if (req->newptr == NULL) {
+		const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
+		const int init_buflen = 64;
+		const char *fmt;
+		struct sbuf *s;
+
+		s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
+
+		/* list fastest */
+		fmt = IMPL_FMT(impl, IMPL_FASTEST);
+		(void) sbuf_printf(s, fmt, "fastest");
+
+		/* list all supported implementations */
+		for (uint32_t i = 0; i < fletcher_4_supp_impls_cnt; ++i) {
+			fmt = IMPL_FMT(impl, i);
+			(void) sbuf_printf(s, fmt,
+			    fletcher_4_supp_impls[i]->name);
+		}
+
+		err = sbuf_finish(s);
+		sbuf_delete(s);
+
+		return (err);
+	}
+
+	char buf[16];
+
+	err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+	if (err)
+		return (err);
+	return (-fletcher_4_impl_set(buf));
+}
+
+#endif
+
+#undef IMPL_FMT
+
 /*
  * Choose a fletcher 4 implementation in ZFS.
  * Users can choose "cycle" to exercise all implementations, but this is
  * for testing purpose therefore it can only be set in user space.
  */
-module_param_call(zfs_fletcher_4_impl,
-    fletcher_4_param_set, fletcher_4_param_get, NULL, 0644);
-MODULE_PARM_DESC(zfs_fletcher_4_impl, "Select fletcher 4 implementation.");
+/* BEGIN CSTYLED */
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, fletcher_4_impl,
+	fletcher_4_param_set, fletcher_4_param_get, ZMOD_RW,
+	"Select fletcher 4 implementation.");
+/* END CSTYLED */
 
 EXPORT_SYMBOL(fletcher_init);
 EXPORT_SYMBOL(fletcher_2_incremental_native);

diff --git a/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c b/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c
index 3b3c1b5..c95a716 100644
--- a/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c
+++ b/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c

@@ -43,7 +43,7 @@
 
 #if defined(__aarch64__)
 
-#include <linux/simd_aarch64.h>
+#include <sys/simd.h>
 #include <sys/spa_checksum.h>
 #include <sys/strings.h>
 #include <zfs_fletcher.h>

diff --git a/zfs/module/zcommon/zfs_fletcher_avx512.c b/zfs/module/zcommon/zfs_fletcher_avx512.c
index 0d4cff2..963f089 100644
--- a/zfs/module/zcommon/zfs_fletcher_avx512.c
+++ b/zfs/module/zcommon/zfs_fletcher_avx512.c

@@ -24,14 +24,16 @@
 
 #if defined(__x86_64) && defined(HAVE_AVX512F)
 
-#include <linux/simd_x86.h>
 #include <sys/byteorder.h>
 #include <sys/frame.h>
 #include <sys/spa_checksum.h>
 #include <sys/strings.h>
+#include <sys/simd.h>
 #include <zfs_fletcher.h>
 
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 static void
 fletcher_4_avx512f_init(fletcher_4_ctx_t *ctx)
@@ -171,4 +173,59 @@
 	.name = "avx512f"
 };
 
+#if defined(HAVE_AVX512BW)
+static void
+fletcher_4_avx512bw_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
+    uint64_t size)
+{
+	static const zfs_fletcher_avx512_t mask = {
+		.v = { 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+		0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+		0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+		0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B }
+	};
+	const uint32_t *ip = buf;
+	const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
+
+	kfpu_begin();
+
+	FLETCHER_4_AVX512_RESTORE_CTX(ctx);
+
+	__asm("vmovdqu64 %0, %%zmm5" :: "m" (mask));
+
+	for (; ip < ipend; ip += 8) {
+		__asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
+
+		__asm("vpshufb %zmm5, %zmm4, %zmm4");
+
+		__asm("vpaddq %zmm4, %zmm0, %zmm0");
+		__asm("vpaddq %zmm0, %zmm1, %zmm1");
+		__asm("vpaddq %zmm1, %zmm2, %zmm2");
+		__asm("vpaddq %zmm2, %zmm3, %zmm3");
+	}
+
+	FLETCHER_4_AVX512_SAVE_CTX(ctx)
+
+	kfpu_end();
+}
+STACK_FRAME_NON_STANDARD(fletcher_4_avx512bw_byteswap);
+
+static boolean_t
+fletcher_4_avx512bw_valid(void)
+{
+	return (fletcher_4_avx512f_valid() && zfs_avx512bw_available());
+}
+
+const fletcher_4_ops_t fletcher_4_avx512bw_ops = {
+	.init_native = fletcher_4_avx512f_init,
+	.fini_native = fletcher_4_avx512f_fini,
+	.compute_native = fletcher_4_avx512f_native,
+	.init_byteswap = fletcher_4_avx512f_init,
+	.fini_byteswap = fletcher_4_avx512f_fini,
+	.compute_byteswap = fletcher_4_avx512bw_byteswap,
+	.valid = fletcher_4_avx512bw_valid,
+	.name = "avx512bw"
+};
+#endif
+
 #endif /* defined(__x86_64) && defined(HAVE_AVX512F) */

diff --git a/zfs/module/zcommon/zfs_fletcher_intel.c b/zfs/module/zcommon/zfs_fletcher_intel.c
index 7f12efe..5136a01 100644
--- a/zfs/module/zcommon/zfs_fletcher_intel.c
+++ b/zfs/module/zcommon/zfs_fletcher_intel.c

@@ -42,8 +42,8 @@
 
 #if defined(HAVE_AVX) && defined(HAVE_AVX2)
 
-#include <linux/simd_x86.h>
 #include <sys/spa_checksum.h>
+#include <sys/simd.h>
 #include <sys/strings.h>
 #include <zfs_fletcher.h>
 

diff --git a/zfs/module/zcommon/zfs_fletcher_sse.c b/zfs/module/zcommon/zfs_fletcher_sse.c
index e6389d6..15ce9b0 100644
--- a/zfs/module/zcommon/zfs_fletcher_sse.c
+++ b/zfs/module/zcommon/zfs_fletcher_sse.c

@@ -43,7 +43,7 @@
 
 #if defined(HAVE_SSE2)
 
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
 #include <sys/spa_checksum.h>
 #include <sys/byteorder.h>
 #include <sys/strings.h>

diff --git a/zfs/module/zcommon/zfs_fletcher_superscalar.c b/zfs/module/zcommon/zfs_fletcher_superscalar.c
index fbbbf80..153f5c7 100644
--- a/zfs/module/zcommon/zfs_fletcher_superscalar.c
+++ b/zfs/module/zcommon/zfs_fletcher_superscalar.c

@@ -41,6 +41,7 @@
  * SOFTWARE.
  */
 
+#include <sys/param.h>
 #include <sys/byteorder.h>
 #include <sys/spa_checksum.h>
 #include <sys/strings.h>

diff --git a/zfs/module/zcommon/zfs_fletcher_superscalar4.c b/zfs/module/zcommon/zfs_fletcher_superscalar4.c
index 97fdb7b..75e6a3b 100644
--- a/zfs/module/zcommon/zfs_fletcher_superscalar4.c
+++ b/zfs/module/zcommon/zfs_fletcher_superscalar4.c

@@ -41,6 +41,7 @@
  * SOFTWARE.
  */
 
+#include <sys/param.h>
 #include <sys/byteorder.h>
 #include <sys/spa_checksum.h>
 #include <sys/strings.h>

diff --git a/zfs/module/zcommon/zfs_namecheck.c b/zfs/module/zcommon/zfs_namecheck.c
index bf5b779..7ecce45 100644
--- a/zfs/module/zcommon/zfs_namecheck.c
+++ b/zfs/module/zcommon/zfs_namecheck.c

@@ -171,7 +171,7 @@
  * Where each component is made up of alphanumeric characters plus the following
  * characters:
  *
- *	[-_.:%]
+ *	[-_.: %]
  *
  * We allow '%' here as we use that character internally to create unique
  * names for temporary clones (for online recv).
@@ -183,6 +183,8 @@
 {
 	const char *end;
 
+	EQUIV(why == NULL, what == NULL);
+
 	/*
 	 * Make sure the name is not too long.
 	 */
@@ -311,6 +313,44 @@
 }
 
 /*
+ * Assert path is a valid bookmark name
+ */
+int
+bookmark_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+	int ret = entity_namecheck(path, why, what);
+
+	if (ret == 0 && strchr(path, '#') == NULL) {
+		if (why != NULL) {
+			*why = NAME_ERR_NO_POUND;
+			*what = '#';
+		}
+		return (-1);
+	}
+
+	return (ret);
+}
+
+/*
+ * Assert path is a valid snapshot name
+ */
+int
+snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+	int ret = entity_namecheck(path, why, what);
+
+	if (ret == 0 && strchr(path, '@') == NULL) {
+		if (why != NULL) {
+			*why = NAME_ERR_NO_AT;
+			*what = '@';
+		}
+		return (-1);
+	}
+
+	return (ret);
+}
+
+/*
  * mountpoint names must be of the following form:
  *
  *	/[component][/]*[component][/]
@@ -402,29 +442,26 @@
 		return (-1);
 	}
 
-	if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
+	if (strcmp(pool, "mirror") == 0 ||
+	    strcmp(pool, "raidz") == 0 ||
+	    strcmp(pool, "draid") == 0) {
 		if (why)
 			*why = NAME_ERR_RESERVED;
 		return (-1);
 	}
 
-	if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
-		if (why)
-			*why = NAME_ERR_DISKLIKE;
-		return (-1);
-	}
-
 	return (0);
 }
 
-#if defined(_KERNEL)
+EXPORT_SYMBOL(entity_namecheck);
 EXPORT_SYMBOL(pool_namecheck);
 EXPORT_SYMBOL(dataset_namecheck);
+EXPORT_SYMBOL(bookmark_namecheck);
+EXPORT_SYMBOL(snapshot_namecheck);
 EXPORT_SYMBOL(zfs_component_namecheck);
 EXPORT_SYMBOL(dataset_nestcheck);
 EXPORT_SYMBOL(get_dataset_depth);
 EXPORT_SYMBOL(zfs_max_dataset_nesting);
 
-module_param(zfs_max_dataset_nesting, int, 0644);
-MODULE_PARM_DESC(zfs_max_dataset_nesting, "Maximum depth of nested datasets");
-#endif
+ZFS_MODULE_PARAM(zfs, zfs_, max_dataset_nesting, INT, ZMOD_RW,
+	"Limit to the amount of nesting a path can have. Defaults to 50.");

diff --git a/zfs/module/zcommon/zfs_prop.c b/zfs/module/zcommon/zfs_prop.c
index cddf3e8..b4e8fcf 100644
--- a/zfs/module/zcommon/zfs_prop.c
+++ b/zfs/module/zcommon/zfs_prop.c

@@ -20,9 +20,12 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright 2016, Joyent, Inc.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -81,7 +84,10 @@
 		{ "noparity",   ZIO_CHECKSUM_NOPARITY },
 		{ "sha512",	ZIO_CHECKSUM_SHA512 },
 		{ "skein",	ZIO_CHECKSUM_SKEIN },
+#if !defined(__FreeBSD__)
+
 		{ "edonr",	ZIO_CHECKSUM_EDONR },
+#endif
 		{ NULL }
 	};
 
@@ -98,8 +104,11 @@
 		{ "skein",	ZIO_CHECKSUM_SKEIN },
 		{ "skein,verify",
 				ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY },
+#if !defined(__FreeBSD__)
+
 		{ "edonr,verify",
 				ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY },
+#endif
 		{ NULL }
 	};
 
@@ -119,6 +128,87 @@
 		{ "gzip-9",	ZIO_COMPRESS_GZIP_9 },
 		{ "zle",	ZIO_COMPRESS_ZLE },
 		{ "lz4",	ZIO_COMPRESS_LZ4 },
+		{ "zstd",	ZIO_COMPRESS_ZSTD },
+		{ "zstd-fast",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_DEFAULT) },
+
+		/*
+		 * ZSTD 1-19 are synthetic. We store the compression level in a
+		 * separate hidden property to avoid wasting a large amount of
+		 * space in the ZIO_COMPRESS enum.
+		 *
+		 * The compression level is also stored within the header of the
+		 * compressed block since we may need it for later recompression
+		 * to avoid checksum errors (L2ARC).
+		 *
+		 * Note that the level here is defined as bit shifted mask on
+		 * top of the method.
+		 */
+		{ "zstd-1",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_1) },
+		{ "zstd-2",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_2) },
+		{ "zstd-3",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_3) },
+		{ "zstd-4",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_4) },
+		{ "zstd-5",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_5) },
+		{ "zstd-6",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_6) },
+		{ "zstd-7",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_7) },
+		{ "zstd-8",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_8) },
+		{ "zstd-9",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_9) },
+		{ "zstd-10",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_10) },
+		{ "zstd-11",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_11) },
+		{ "zstd-12",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_12) },
+		{ "zstd-13",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_13) },
+		{ "zstd-14",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_14) },
+		{ "zstd-15",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_15) },
+		{ "zstd-16",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_16) },
+		{ "zstd-17",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_17) },
+		{ "zstd-18",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_18) },
+		{ "zstd-19",	ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_19) },
+
+		/*
+		 * The ZSTD-Fast levels are also synthetic.
+		 */
+		{ "zstd-fast-1",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_1) },
+		{ "zstd-fast-2",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_2) },
+		{ "zstd-fast-3",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_3) },
+		{ "zstd-fast-4",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_4) },
+		{ "zstd-fast-5",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_5) },
+		{ "zstd-fast-6",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_6) },
+		{ "zstd-fast-7",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_7) },
+		{ "zstd-fast-8",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_8) },
+		{ "zstd-fast-9",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_9) },
+		{ "zstd-fast-10",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_10) },
+		{ "zstd-fast-20",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_20) },
+		{ "zstd-fast-30",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_30) },
+		{ "zstd-fast-40",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_40) },
+		{ "zstd-fast-50",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_50) },
+		{ "zstd-fast-60",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_60) },
+		{ "zstd-fast-70",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_70) },
+		{ "zstd-fast-80",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_80) },
+		{ "zstd-fast-90",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_90) },
+		{ "zstd-fast-100",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_100) },
+		{ "zstd-fast-500",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_500) },
+		{ "zstd-fast-1000",
+		    ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_1000) },
 		{ NULL }
 	};
 
@@ -154,11 +244,21 @@
 		{ NULL }
 	};
 
+	static zprop_index_t acl_mode_table[] = {
+		{ "discard",	ZFS_ACL_DISCARD },
+		{ "groupmask",	ZFS_ACL_GROUPMASK },
+		{ "passthrough", ZFS_ACL_PASSTHROUGH },
+		{ "restricted",	ZFS_ACL_RESTRICTED },
+		{ NULL }
+	};
+
 	static zprop_index_t acltype_table[] = {
 		{ "off",	ZFS_ACLTYPE_OFF },
-		{ "disabled",	ZFS_ACLTYPE_OFF },
-		{ "noacl",	ZFS_ACLTYPE_OFF },
-		{ "posixacl",	ZFS_ACLTYPE_POSIXACL },
+		{ "posix",	ZFS_ACLTYPE_POSIX },
+		{ "nfsv4",	ZFS_ACLTYPE_NFSV4 },
+		{ "disabled",	ZFS_ACLTYPE_OFF }, /* bkwrd compatibility */
+		{ "noacl",	ZFS_ACLTYPE_OFF }, /* bkwrd compatibility */
+		{ "posixacl",	ZFS_ACLTYPE_POSIX }, /* bkwrd compatibility */
 		{ NULL }
 	};
 
@@ -272,6 +372,8 @@
 	static zprop_index_t redundant_metadata_table[] = {
 		{ "all",	ZFS_REDUNDANT_METADATA_ALL },
 		{ "most",	ZFS_REDUNDANT_METADATA_MOST },
+		{ "some",	ZFS_REDUNDANT_METADATA_SOME },
+		{ "none",	ZFS_REDUNDANT_METADATA_NONE },
 		{ NULL }
 	};
 
@@ -288,7 +390,7 @@
 	zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
 	    ZFS_REDUNDANT_METADATA_ALL,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
-	    "all | most", "REDUND_MD",
+	    "all | most | some | none", "REDUND_MD",
 	    redundant_metadata_table);
 	zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
@@ -297,26 +399,48 @@
 	zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
 	    ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME,
-	    "on | off | fletcher2 | fletcher4 | sha256 | sha512 | "
-	    "skein | edonr", "CHECKSUM", checksum_table);
+#if !defined(__FreeBSD__)
+	    "on | off | fletcher2 | fletcher4 | sha256 | sha512 | skein"
+	    " | edonr",
+#else
+	    "on | off | fletcher2 | fletcher4 | sha256 | sha512 | skein",
+#endif
+	    "CHECKSUM", checksum_table);
 	zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
-	    "on | off | verify | sha256[,verify], sha512[,verify], "
-	    "skein[,verify], edonr,verify", "DEDUP", dedup_table);
+	    "on | off | verify | sha256[,verify] | sha512[,verify] | "
+#if !defined(__FreeBSD__)
+	    "skein[,verify] | edonr,verify",
+#else
+	    "skein[,verify]",
+#endif
+	    "DEDUP", dedup_table);
 	zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
 	    ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
-	    "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4", "COMPRESS",
-	    compress_table);
+	    "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4 | "
+	    "zstd | zstd-[1-19] | "
+	    "zstd-fast | zstd-fast-[1-10,20,30,40,50,60,70,80,90,100,500,1000]",
+	    "COMPRESS", compress_table);
 	zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "hidden | visible", "SNAPDIR", snapdir_table);
 	zprop_register_index(ZFS_PROP_SNAPDEV, "snapdev", ZFS_SNAPDEV_HIDDEN,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "hidden | visible", "SNAPDEV", snapdev_table);
-	zprop_register_index(ZFS_PROP_ACLTYPE, "acltype", ZFS_ACLTYPE_OFF,
+	zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+	    "discard | groupmask | passthrough | restricted", "ACLMODE",
+	    acl_mode_table);
+	zprop_register_index(ZFS_PROP_ACLTYPE, "acltype",
+#ifdef __linux__
+	    /* Linux doesn't natively support ZFS's NFSv4-style ACLs. */
+	    ZFS_ACLTYPE_OFF,
+#else
+	    ZFS_ACLTYPE_NFSV4,
+#endif
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
-	    "noacl | posixacl", "ACLTYPE", acltype_table);
+	    "off | nfsv4 | posix", "ACLTYPE", acltype_table);
 	zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
 	    ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "discard | noallow | restricted | passthrough | passthrough-x",
@@ -363,14 +487,19 @@
 	zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
 	    boolean_table);
+#ifdef __FreeBSD__
+	zprop_register_index(ZFS_PROP_ZONED, "jailed", 0, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM, "on | off", "JAILED", boolean_table);
+#else
 	zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
+#endif
 	zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", boolean_table);
 	zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
 	    boolean_table);
-	zprop_register_index(ZFS_PROP_OVERLAY, "overlay", 0, PROP_INHERIT,
+	zprop_register_index(ZFS_PROP_OVERLAY, "overlay", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "OVERLAY", boolean_table);
 
 	/* default index properties */
@@ -425,14 +554,14 @@
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
 	    "MOUNTPOINT");
 	zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
-	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | NFS share options",
 	    "SHARENFS");
 	zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
 	    ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK,
 	    "filesystem | volume | snapshot | bookmark", "TYPE");
 	zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
-	    "on | off | sharemgr(1M) options", "SHARESMB");
+	    "on | off | SMB share options", "SHARESMB");
 	zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
 	    ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
 	    "<sensitivity label>", "MLSLABEL");
@@ -457,7 +586,11 @@
 	    "ENCROOT");
 	zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation",
 	    "none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
-	    "prompt | <file URI>", "KEYLOCATION");
+	    "prompt | <file URI> | <https URL> | <http URL>", "KEYLOCATION");
+	zprop_register_string(ZFS_PROP_REDACT_SNAPS,
+	    "redact_snaps", NULL, PROP_READONLY,
+	    ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<snapshot>[,...]",
+	    "RSNAPS");
 
 	/* readonly number properties */
 	zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
@@ -465,9 +598,10 @@
 	zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
 	zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
-	    PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
+	    PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<size>",
+	    "REFER");
 	zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
-	    PROP_READONLY, ZFS_TYPE_DATASET,
+	    PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK,
 	    "<1.00x or higher if compressed>", "RATIO");
 	zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
 	    PROP_READONLY, ZFS_TYPE_DATASET,
@@ -495,7 +629,8 @@
 	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
 	    "LUSED");
 	zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced",
-	    0, PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LREFER");
+	    0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<size>",
+	    "LREFER");
 	zprop_register_number(ZFS_PROP_FILESYSTEM_COUNT, "filesystem_count",
 	    UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
 	    "<count>", "FSCOUNT");
@@ -506,8 +641,6 @@
 	    ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID");
 	zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY,
 	    ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG");
-	zprop_register_hidden(ZFS_PROP_REMAPTXG, "remaptxg", PROP_TYPE_NUMBER,
-	    PROP_READONLY, ZFS_TYPE_DATASET, "REMAPTXG");
 	zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters",
 	    0, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "<iters>", "PBKDF2ITERS");
@@ -569,13 +702,16 @@
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT");
 	zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID");
+	zprop_register_hidden(ZFS_PROP_REDACTED, "redacted", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "REDACTED");
 
 	/*
-	 * Property to be removed once libbe is integrated
+	 * Properties that are obsolete and not used.  These are retained so
+	 * that we don't have to change the values of the zfs_prop_t enum, or
+	 * have NULL pointers in the zfs_prop_table[].
 	 */
-	zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
-	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
-	    "PRIV_PROP");
+	zprop_register_hidden(ZFS_PROP_REMAPTXG, "remaptxg", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "REMAPTXG");
 
 	/* oddball properties */
 	zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
@@ -586,6 +722,8 @@
 boolean_t
 zfs_prop_delegatable(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	zprop_desc_t *pd = &zfs_prop_table[prop];
 
 	/* The mlslabel property is never delegatable. */
@@ -668,8 +806,10 @@
 boolean_t
 zfs_prop_written(const char *name)
 {
-	static const char *prefix = "written@";
-	return (strncmp(name, prefix, strlen(prefix)) == 0);
+	static const char *prop_prefix = "written@";
+	static const char *book_prefix = "written#";
+	return (strncmp(name, prop_prefix, strlen(prop_prefix)) == 0 ||
+	    strncmp(name, book_prefix, strlen(book_prefix)) == 0);
 }
 
 /*
@@ -706,6 +846,8 @@
 zprop_type_t
 zfs_prop_get_type(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_proptype);
 }
 
@@ -715,6 +857,8 @@
 boolean_t
 zfs_prop_readonly(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
 	    zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
 	    zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
@@ -726,6 +870,8 @@
 boolean_t
 zfs_prop_visible(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_visible &&
 	    zfs_prop_table[prop].pd_zfs_mod_supported);
 }
@@ -736,6 +882,8 @@
 boolean_t
 zfs_prop_setonce(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
 	    zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
 }
@@ -743,12 +891,16 @@
 const char *
 zfs_prop_default_string(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_strdefault);
 }
 
 uint64_t
 zfs_prop_default_numeric(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_numdefault);
 }
 
@@ -759,6 +911,8 @@
 const char *
 zfs_prop_to_name(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_name);
 }
 
@@ -768,6 +922,8 @@
 boolean_t
 zfs_prop_inheritable(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_attr == PROP_INHERIT ||
 	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
 }
@@ -801,12 +957,17 @@
 		return (B_TRUE);
 	else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0)
 		return (B_TRUE);
+	else if (strlen(str) > 8 && strncmp("https://", str, 8) == 0)
+		return (B_TRUE);
+	else if (strlen(str) > 7 && strncmp("http://", str, 7) == 0)
+		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 
 #ifndef _KERNEL
+#include <libzfs.h>
 
 /*
  * Returns a string describing the set of acceptable values for the given
@@ -815,6 +976,8 @@
 const char *
 zfs_prop_values(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_values);
 }
 
@@ -826,6 +989,8 @@
 int
 zfs_prop_is_string(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING ||
 	    zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX);
 }
@@ -837,6 +1002,8 @@
 const char *
 zfs_prop_column_name(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_colname);
 }
 
@@ -847,6 +1014,8 @@
 boolean_t
 zfs_prop_align_right(zfs_prop_t prop)
 {
+	ASSERT3S(prop, >=, 0);
+	ASSERT3S(prop, <, ZFS_NUM_PROPS);
 	return (zfs_prop_table[prop].pd_rightalign);
 }
 
@@ -854,12 +1023,12 @@
 
 #if defined(_KERNEL)
 
-#include <linux/simd.h>
+#include <sys/simd.h>
 
-#if defined(HAVE_KERNEL_FPU_INTERNAL)
+#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL)
 union fpregs_state **zfs_kfpu_fpregs;
 EXPORT_SYMBOL(zfs_kfpu_fpregs);
-#endif /* HAVE_KERNEL_FPU_INTERNAL */
+#endif /* HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL */
 
 static int __init
 zcommon_init(void)
@@ -880,13 +1049,15 @@
 	kfpu_fini();
 }
 
-module_init(zcommon_init);
+module_init_early(zcommon_init);
 module_exit(zcommon_fini);
 
-MODULE_DESCRIPTION("Generic ZFS support");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
+#endif
+
+ZFS_MODULE_DESCRIPTION("Generic ZFS support");
+ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
+ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
+ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 
 /* zfs dataset property functions */
 EXPORT_SYMBOL(zfs_userquota_prop_prefixes);
@@ -912,5 +1083,3 @@
 EXPORT_SYMBOL(zfs_prop_string_to_index);
 EXPORT_SYMBOL(zfs_prop_valid_for_type);
 EXPORT_SYMBOL(zfs_prop_written);
-
-#endif

diff --git a/zfs/module/zcommon/zfs_uio.c b/zfs/module/zcommon/zfs_uio.c
deleted file mode 100644
index d586e0a..0000000
--- a/zfs/module/zcommon/zfs_uio.c
+++ /dev/null

@@ -1,287 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved	*/
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-/*
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- */
-
-/*
- * The uio support from OpenSolaris has been added as a short term
- * work around.  The hope is to adopt native Linux type and drop the
- * use of uio's entirely.  Under Linux they only add overhead and
- * when possible we want to use native APIs for the ZPL layer.
- */
-#ifdef _KERNEL
-
-#include <sys/types.h>
-#include <sys/uio_impl.h>
-#include <sys/sysmacros.h>
-#include <sys/strings.h>
-#include <linux/kmap_compat.h>
-#include <linux/uaccess.h>
-
-/*
- * Move "n" bytes at byte address "p"; "rw" indicates the direction
- * of the move, and the I/O parameters are provided in "uio", which is
- * update to reflect the data which was moved.  Returns 0 on success or
- * a non-zero errno on failure.
- */
-static int
-uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
-{
-	const struct iovec *iov = uio->uio_iov;
-	size_t skip = uio->uio_skip;
-	ulong_t cnt;
-
-	while (n && uio->uio_resid) {
-		cnt = MIN(iov->iov_len - skip, n);
-		switch (uio->uio_segflg) {
-		case UIO_USERSPACE:
-		case UIO_USERISPACE:
-			/*
-			 * p = kernel data pointer
-			 * iov->iov_base = user data pointer
-			 */
-			if (rw == UIO_READ) {
-				if (copy_to_user(iov->iov_base+skip, p, cnt))
-					return (EFAULT);
-			} else {
-				unsigned long b_left = 0;
-				if (uio->uio_fault_disable) {
-					if (!zfs_access_ok(VERIFY_READ,
-					    (iov->iov_base + skip), cnt)) {
-						return (EFAULT);
-					}
-					pagefault_disable();
-					b_left =
-					    __copy_from_user_inatomic(p,
-					    (iov->iov_base + skip), cnt);
-					pagefault_enable();
-				} else {
-					b_left =
-					    copy_from_user(p,
-					    (iov->iov_base + skip), cnt);
-				}
-				if (b_left > 0) {
-					unsigned long c_bytes =
-					    cnt - b_left;
-					uio->uio_skip += c_bytes;
-					ASSERT3U(uio->uio_skip, <,
-					    iov->iov_len);
-					uio->uio_resid -= c_bytes;
-					uio->uio_loffset += c_bytes;
-					return (EFAULT);
-				}
-			}
-			break;
-		case UIO_SYSSPACE:
-			if (rw == UIO_READ)
-				bcopy(p, iov->iov_base + skip, cnt);
-			else
-				bcopy(iov->iov_base + skip, p, cnt);
-			break;
-		default:
-			ASSERT(0);
-		}
-		skip += cnt;
-		if (skip == iov->iov_len) {
-			skip = 0;
-			uio->uio_iov = (++iov);
-			uio->uio_iovcnt--;
-		}
-		uio->uio_skip = skip;
-		uio->uio_resid -= cnt;
-		uio->uio_loffset += cnt;
-		p = (caddr_t)p + cnt;
-		n -= cnt;
-	}
-	return (0);
-}
-
-static int
-uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
-{
-	const struct bio_vec *bv = uio->uio_bvec;
-	size_t skip = uio->uio_skip;
-	ulong_t cnt;
-
-	while (n && uio->uio_resid) {
-		void *paddr;
-		cnt = MIN(bv->bv_len - skip, n);
-
-		paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
-		if (rw == UIO_READ)
-			bcopy(p, paddr + bv->bv_offset + skip, cnt);
-		else
-			bcopy(paddr + bv->bv_offset + skip, p, cnt);
-		zfs_kunmap_atomic(paddr, KM_USER1);
-
-		skip += cnt;
-		if (skip == bv->bv_len) {
-			skip = 0;
-			uio->uio_bvec = (++bv);
-			uio->uio_iovcnt--;
-		}
-		uio->uio_skip = skip;
-		uio->uio_resid -= cnt;
-		uio->uio_loffset += cnt;
-		p = (caddr_t)p + cnt;
-		n -= cnt;
-	}
-	return (0);
-}
-
-int
-uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
-{
-	if (uio->uio_segflg != UIO_BVEC)
-		return (uiomove_iov(p, n, rw, uio));
-	else
-		return (uiomove_bvec(p, n, rw, uio));
-}
-EXPORT_SYMBOL(uiomove);
-
-#define	fuword8(uptr, vptr)	get_user((*vptr), (uptr))
-
-/*
- * Fault in the pages of the first n bytes specified by the uio structure.
- * 1 byte in each page is touched and the uio struct is unmodified. Any
- * error will terminate the process as this is only a best attempt to get
- * the pages resident.
- */
-int
-uio_prefaultpages(ssize_t n, struct uio *uio)
-{
-	const struct iovec *iov;
-	ulong_t cnt, incr;
-	caddr_t p;
-	uint8_t tmp;
-	int iovcnt;
-	size_t skip;
-
-	/* no need to fault in kernel pages */
-	switch (uio->uio_segflg) {
-		case UIO_SYSSPACE:
-		case UIO_BVEC:
-			return (0);
-		case UIO_USERSPACE:
-		case UIO_USERISPACE:
-			break;
-		default:
-			ASSERT(0);
-	}
-
-	iov = uio->uio_iov;
-	iovcnt = uio->uio_iovcnt;
-	skip = uio->uio_skip;
-
-	for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
-		cnt = MIN(iov->iov_len - skip, n);
-		/* empty iov */
-		if (cnt == 0)
-			continue;
-		n -= cnt;
-		/*
-		 * touch each page in this segment.
-		 */
-		p = iov->iov_base + skip;
-		while (cnt) {
-			if (fuword8((uint8_t *)p, &tmp))
-				return (EFAULT);
-			incr = MIN(cnt, PAGESIZE);
-			p += incr;
-			cnt -= incr;
-		}
-		/*
-		 * touch the last byte in case it straddles a page.
-		 */
-		p--;
-		if (fuword8((uint8_t *)p, &tmp))
-			return (EFAULT);
-	}
-
-	return (0);
-}
-EXPORT_SYMBOL(uio_prefaultpages);
-
-/*
- * same as uiomove() but doesn't modify uio structure.
- * return in cbytes how many bytes were copied.
- */
-int
-uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
-{
-	struct uio uio_copy;
-	int ret;
-
-	bcopy(uio, &uio_copy, sizeof (struct uio));
-	ret = uiomove(p, n, rw, &uio_copy);
-	*cbytes = uio->uio_resid - uio_copy.uio_resid;
-	return (ret);
-}
-EXPORT_SYMBOL(uiocopy);
-
-/*
- * Drop the next n chars out of *uiop.
- */
-void
-uioskip(uio_t *uiop, size_t n)
-{
-	if (n > uiop->uio_resid)
-		return;
-
-	uiop->uio_skip += n;
-	if (uiop->uio_segflg != UIO_BVEC) {
-		while (uiop->uio_iovcnt &&
-		    uiop->uio_skip >= uiop->uio_iov->iov_len) {
-			uiop->uio_skip -= uiop->uio_iov->iov_len;
-			uiop->uio_iov++;
-			uiop->uio_iovcnt--;
-		}
-	} else {
-		while (uiop->uio_iovcnt &&
-		    uiop->uio_skip >= uiop->uio_bvec->bv_len) {
-			uiop->uio_skip -= uiop->uio_bvec->bv_len;
-			uiop->uio_bvec++;
-			uiop->uio_iovcnt--;
-		}
-	}
-	uiop->uio_loffset += n;
-	uiop->uio_resid -= n;
-}
-EXPORT_SYMBOL(uioskip);
-#endif /* _KERNEL */

diff --git a/zfs/module/zcommon/zpool_prop.c b/zfs/module/zcommon/zpool_prop.c
index edb4f60..6299d37 100644
--- a/zfs/module/zcommon/zpool_prop.c
+++ b/zfs/module/zcommon/zpool_prop.c

@@ -22,6 +22,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 #include <sys/zio.h>
@@ -71,6 +72,9 @@
 	    PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
 	zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL,
 	    PROP_DEFAULT, ZFS_TYPE_POOL, "<comment-string>", "COMMENT");
+	zprop_register_string(ZPOOL_PROP_COMPATIBILITY, "compatibility",
+	    "off", PROP_DEFAULT, ZFS_TYPE_POOL,
+	    "<file[,file...]> | off | legacy", "COMPATIBILITY");
 
 	/* readonly number properties */
 	zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
@@ -104,8 +108,6 @@
 	/* default number properties */
 	zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
 	    PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
-	zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
-	    PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
 	zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT,
 	    ZFS_TYPE_POOL, "<ashift, 9-16, or 0=default>", "ASHIFT");
 
@@ -131,7 +133,7 @@
 	    ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
 	    "wait | continue | panic", "FAILMODE", failuremode_table);
 	zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
-	    SPA_AUTOTRIM_OFF, PROP_DEFAULT, ZFS_TYPE_POOL,
+	    SPA_AUTOTRIM_DEFAULT, PROP_DEFAULT, ZFS_TYPE_POOL,
 	    "on | off", "AUTOTRIM", boolean_table);
 
 	/* hidden properties */
@@ -143,6 +145,8 @@
 	    PROP_ONETIME, ZFS_TYPE_POOL, "TNAME");
 	zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize",
 	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE");
+	zprop_register_hidden(ZPOOL_PROP_DEDUPDITTO, "dedupditto",
+	    PROP_TYPE_NUMBER, PROP_DEFAULT, ZFS_TYPE_POOL, "DEDUPDITTO");
 }
 
 /*
@@ -235,6 +239,7 @@
 }
 
 #ifndef _KERNEL
+#include <libzfs.h>
 
 const char *
 zpool_prop_values(zpool_prop_t prop)

diff --git a/zfs/module/zcommon/zprop_common.c b/zfs/module/zcommon/zprop_common.c
index 8416983..faab9d9 100644
--- a/zfs/module/zcommon/zprop_common.c
+++ b/zfs/module/zcommon/zprop_common.c

@@ -41,11 +41,7 @@
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 
-#if defined(_KERNEL)
-#include <linux/sort.h>
-#define	qsort(base, num, size, cmp) \
-    sort(base, num, size, cmp, NULL)
-#else
+#if !defined(_KERNEL)
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
@@ -77,8 +73,11 @@
  * The zfs module spa_feature_table[], whether in-kernel or in libzpool,
  * always supports all the properties. libzfs needs to query the running
  * module, via sysfs, to determine which properties are supported.
+ *
+ * The equivalent _can_ be done on FreeBSD by way of the sysctl
+ * tree, but this has not been done yet.
  */
-#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD)
+#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD) || defined(__FreeBSD__)
 	return (B_TRUE);
 #else
 	return (zfs_mod_supported(type == ZFS_TYPE_POOL ?
@@ -144,7 +143,7 @@
     const char *colname, const zprop_index_t *idx_tbl)
 {
 	zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
-	    objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
+	    objset_types, values, colname, B_FALSE, B_TRUE, idx_tbl);
 }
 
 void

diff --git a/zfs/module/zfs/Makefile.in b/zfs/module/zfs/Makefile.in
index b2460f0..0e04d7e 100644
--- a/zfs/module/zfs/Makefile.in
+++ b/zfs/module/zfs/Makefile.in

@@ -1,20 +1,17 @@
-src = @abs_top_srcdir@/module/zfs
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
 obj = @abs_builddir@
-target_cpu = @target_cpu@
+mfdir = $(obj)
+else
+mfdir = $(srctree)/$(src)
+endif
 
 MODULE := zfs
 
 obj-$(CONFIG_ZFS) := $(MODULE).o
 
-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
-
 # Suppress unused-value warnings in sparc64 architecture headers
-ifeq ($(target_cpu),sparc64)
-ccflags-y += -Wno-unused-value
-endif
-
-# Suppress unused but set variable warnings often due to ASSERTs
-ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE)
+ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
 
 $(MODULE)-objs += abd.o
 $(MODULE)-objs += aggsum.o
@@ -22,12 +19,12 @@
 $(MODULE)-objs += blkptr.o
 $(MODULE)-objs += bplist.o
 $(MODULE)-objs += bpobj.o
-$(MODULE)-objs += cityhash.o
-$(MODULE)-objs += dbuf.o
-$(MODULE)-objs += dbuf_stats.o
 $(MODULE)-objs += bptree.o
+$(MODULE)-objs += btree.o
 $(MODULE)-objs += bqueue.o
 $(MODULE)-objs += dataset_kstats.o
+$(MODULE)-objs += dbuf.o
+$(MODULE)-objs += dbuf_stats.o
 $(MODULE)-objs += ddt.o
 $(MODULE)-objs += ddt_zap.o
 $(MODULE)-objs += dmu.o
@@ -35,33 +32,36 @@
 $(MODULE)-objs += dmu_object.o
 $(MODULE)-objs += dmu_objset.o
 $(MODULE)-objs += dmu_recv.o
+$(MODULE)-objs += dmu_redact.o
 $(MODULE)-objs += dmu_send.o
 $(MODULE)-objs += dmu_traverse.o
 $(MODULE)-objs += dmu_tx.o
 $(MODULE)-objs += dmu_zfetch.o
 $(MODULE)-objs += dnode.o
 $(MODULE)-objs += dnode_sync.o
+$(MODULE)-objs += dsl_bookmark.o
+$(MODULE)-objs += dsl_crypt.o
 $(MODULE)-objs += dsl_dataset.o
 $(MODULE)-objs += dsl_deadlist.o
 $(MODULE)-objs += dsl_deleg.o
-$(MODULE)-objs += dsl_bookmark.o
+$(MODULE)-objs += dsl_destroy.o
 $(MODULE)-objs += dsl_dir.o
-$(MODULE)-objs += dsl_crypt.o
 $(MODULE)-objs += dsl_pool.o
 $(MODULE)-objs += dsl_prop.o
 $(MODULE)-objs += dsl_scan.o
 $(MODULE)-objs += dsl_synctask.o
+$(MODULE)-objs += dsl_userhold.o
 $(MODULE)-objs += edonr_zfs.o
 $(MODULE)-objs += fm.o
 $(MODULE)-objs += gzip.o
 $(MODULE)-objs += hkdf.o
-$(MODULE)-objs += lzjb.o
 $(MODULE)-objs += lz4.o
+$(MODULE)-objs += lzjb.o
 $(MODULE)-objs += metaslab.o
 $(MODULE)-objs += mmp.o
 $(MODULE)-objs += multilist.o
+$(MODULE)-objs += objlist.o
 $(MODULE)-objs += pathname.o
-$(MODULE)-objs += policy.o
 $(MODULE)-objs += range_tree.o
 $(MODULE)-objs += refcount.o
 $(MODULE)-objs += rrwlock.o
@@ -74,18 +74,18 @@
 $(MODULE)-objs += spa_config.o
 $(MODULE)-objs += spa_errlog.o
 $(MODULE)-objs += spa_history.o
+$(MODULE)-objs += spa_log_spacemap.o
 $(MODULE)-objs += spa_misc.o
 $(MODULE)-objs += spa_stats.o
 $(MODULE)-objs += space_map.o
 $(MODULE)-objs += space_reftree.o
 $(MODULE)-objs += txg.o
-$(MODULE)-objs += trace.o
 $(MODULE)-objs += uberblock.o
 $(MODULE)-objs += unique.o
 $(MODULE)-objs += vdev.o
 $(MODULE)-objs += vdev_cache.o
-$(MODULE)-objs += vdev_disk.o
-$(MODULE)-objs += vdev_file.o
+$(MODULE)-objs += vdev_draid.o
+$(MODULE)-objs += vdev_draid_rand.o
 $(MODULE)-objs += vdev_indirect.o
 $(MODULE)-objs += vdev_indirect_births.o
 $(MODULE)-objs += vdev_indirect_mapping.o
@@ -97,6 +97,7 @@
 $(MODULE)-objs += vdev_raidz.o
 $(MODULE)-objs += vdev_raidz_math.o
 $(MODULE)-objs += vdev_raidz_math_scalar.o
+$(MODULE)-objs += vdev_rebuild.o
 $(MODULE)-objs += vdev_removal.o
 $(MODULE)-objs += vdev_root.o
 $(MODULE)-objs += vdev_trim.o
@@ -107,47 +108,30 @@
 $(MODULE)-objs += zcp_get.o
 $(MODULE)-objs += zcp_global.o
 $(MODULE)-objs += zcp_iter.o
+$(MODULE)-objs += zcp_set.o
 $(MODULE)-objs += zcp_synctask.o
 $(MODULE)-objs += zfeature.o
-$(MODULE)-objs += zfs_acl.o
 $(MODULE)-objs += zfs_byteswap.o
-$(MODULE)-objs += zfs_ctldir.o
-$(MODULE)-objs += zfs_debug.o
-$(MODULE)-objs += zfs_dir.o
 $(MODULE)-objs += zfs_fm.o
 $(MODULE)-objs += zfs_fuid.o
 $(MODULE)-objs += zfs_ioctl.o
 $(MODULE)-objs += zfs_log.o
 $(MODULE)-objs += zfs_onexit.o
+$(MODULE)-objs += zfs_quota.o
 $(MODULE)-objs += zfs_ratelimit.o
 $(MODULE)-objs += zfs_replay.o
 $(MODULE)-objs += zfs_rlock.o
 $(MODULE)-objs += zfs_sa.o
-$(MODULE)-objs += zfs_sysfs.o
-$(MODULE)-objs += zfs_vfsops.o
 $(MODULE)-objs += zfs_vnops.o
-$(MODULE)-objs += zfs_znode.o
 $(MODULE)-objs += zil.o
 $(MODULE)-objs += zio.o
 $(MODULE)-objs += zio_checksum.o
 $(MODULE)-objs += zio_compress.o
-$(MODULE)-objs += zio_crypt.o
 $(MODULE)-objs += zio_inject.o
 $(MODULE)-objs += zle.o
-$(MODULE)-objs += zpl_ctldir.o
-$(MODULE)-objs += zpl_export.o
-$(MODULE)-objs += zpl_file.o
-$(MODULE)-objs += zpl_inode.o
-$(MODULE)-objs += zpl_super.o
-$(MODULE)-objs += zpl_xattr.o
 $(MODULE)-objs += zrlock.o
 $(MODULE)-objs += zthr.o
 $(MODULE)-objs += zvol.o
-$(MODULE)-objs += dsl_destroy.o
-$(MODULE)-objs += dsl_userhold.o
-$(MODULE)-objs += qat.o
-$(MODULE)-objs += qat_compress.o
-$(MODULE)-objs += qat_crypt.o
 
 # Suppress incorrect warnings from versions of objtool which are not
 # aware of x86 EVEX prefix instructions used for AVX512.
@@ -162,3 +146,17 @@
 
 $(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o
 $(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o
+
+$(MODULE)-$(CONFIG_PPC) += vdev_raidz_math_powerpc_altivec.o
+$(MODULE)-$(CONFIG_PPC64) += vdev_raidz_math_powerpc_altivec.o
+
+ifeq ($(CONFIG_ALTIVEC),y)
+$(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec
+endif
+
+ifeq ($(CONFIG_ARM64),y)
+CFLAGS_REMOVE_vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only
+CFLAGS_REMOVE_vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only
+endif
+
+include $(mfdir)/../os/linux/zfs/Makefile

diff --git a/zfs/module/zfs/abd.c b/zfs/module/zfs/abd.c
index 8b25144..754974a 100644
--- a/zfs/module/zfs/abd.c
+++ b/zfs/module/zfs/abd.c

@@ -59,33 +59,6 @@
  *                                      +----------------->| chunk N-1 |
  *                                                         +-----------+
  *
- * Linear buffers act exactly like normal buffers and are always mapped into the
- * kernel's virtual memory space, while scattered ABD data chunks are allocated
- * as physical pages and then mapped in only while they are actually being
- * accessed through one of the abd_* library functions. Using scattered ABDs
- * provides several benefits:
- *
- *  (1) They avoid use of kmem_*, preventing performance problems where running
- *      kmem_reap on very large memory systems never finishes and causes
- *      constant TLB shootdowns.
- *
- *  (2) Fragmentation is less of an issue since when we are at the limit of
- *      allocatable space, we won't have to search around for a long free
- *      hole in the VA space for large ARC allocations. Each chunk is mapped in
- *      individually, so even if we are using HIGHMEM (see next point) we
- *      wouldn't need to worry about finding a contiguous address range.
- *
- *  (3) If we are not using HIGHMEM, then all physical memory is always
- *      mapped into the kernel's address space, so we also avoid the map /
- *      unmap costs on each ABD access.
- *
- * If we are not using HIGHMEM, scattered buffers which have only one chunk
- * can be treated as linear buffers, because they are contiguous in the
- * kernel's virtual address space.  See abd_alloc_pages() for details.
- *
- * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
- * B_FALSE.
- *
  * In addition to directly allocating a linear or scattered ABD, it is also
  * possible to create an ABD by requesting the "sub-ABD" starting at an offset
  * within an existing ABD. In linear buffers this is simple (set abd_buf of
@@ -114,505 +87,92 @@
  * compare, copy, read, write, and fill with zeroes. If you need a custom
  * function which progressively accesses the whole ABD, use the abd_iterate_*
  * functions.
+ *
+ * As an additional feature, linear and scatter ABD's can be stitched together
+ * by using the gang ABD type (abd_alloc_gang_abd()). This allows for
+ * multiple ABDs to be viewed as a singular ABD.
+ *
+ * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
+ * B_FALSE.
  */
 
-#include <sys/abd.h>
+#include <sys/abd_impl.h>
 #include <sys/param.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_znode.h>
-#ifdef _KERNEL
-#include <linux/scatterlist.h>
-#include <linux/kmap_compat.h>
-#else
-#define	MAX_ORDER	1
-#endif
-
-typedef struct abd_stats {
-	kstat_named_t abdstat_struct_size;
-	kstat_named_t abdstat_linear_cnt;
-	kstat_named_t abdstat_linear_data_size;
-	kstat_named_t abdstat_scatter_cnt;
-	kstat_named_t abdstat_scatter_data_size;
-	kstat_named_t abdstat_scatter_chunk_waste;
-	kstat_named_t abdstat_scatter_orders[MAX_ORDER];
-	kstat_named_t abdstat_scatter_page_multi_chunk;
-	kstat_named_t abdstat_scatter_page_multi_zone;
-	kstat_named_t abdstat_scatter_page_alloc_retry;
-	kstat_named_t abdstat_scatter_sg_table_retry;
-} abd_stats_t;
-
-static abd_stats_t abd_stats = {
-	/* Amount of memory occupied by all of the abd_t struct allocations */
-	{ "struct_size",			KSTAT_DATA_UINT64 },
-	/*
-	 * The number of linear ABDs which are currently allocated, excluding
-	 * ABDs which don't own their data (for instance the ones which were
-	 * allocated through abd_get_offset() and abd_get_from_buf()). If an
-	 * ABD takes ownership of its buf then it will become tracked.
-	 */
-	{ "linear_cnt",				KSTAT_DATA_UINT64 },
-	/* Amount of data stored in all linear ABDs tracked by linear_cnt */
-	{ "linear_data_size",			KSTAT_DATA_UINT64 },
-	/*
-	 * The number of scatter ABDs which are currently allocated, excluding
-	 * ABDs which don't own their data (for instance the ones which were
-	 * allocated through abd_get_offset()).
-	 */
-	{ "scatter_cnt",			KSTAT_DATA_UINT64 },
-	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
-	{ "scatter_data_size",			KSTAT_DATA_UINT64 },
-	/*
-	 * The amount of space wasted at the end of the last chunk across all
-	 * scatter ABDs tracked by scatter_cnt.
-	 */
-	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 },
-	/*
-	 * The number of compound allocations of a given order.  These
-	 * allocations are spread over all currently allocated ABDs, and
-	 * act as a measure of memory fragmentation.
-	 */
-	{ { "scatter_order_N",			KSTAT_DATA_UINT64 } },
-	/*
-	 * The number of scatter ABDs which contain multiple chunks.
-	 * ABDs are preferentially allocated from the minimum number of
-	 * contiguous multi-page chunks, a single chunk is optimal.
-	 */
-	{ "scatter_page_multi_chunk",		KSTAT_DATA_UINT64 },
-	/*
-	 * The number of scatter ABDs which are split across memory zones.
-	 * ABDs are preferentially allocated using pages from a single zone.
-	 */
-	{ "scatter_page_multi_zone",		KSTAT_DATA_UINT64 },
-	/*
-	 *  The total number of retries encountered when attempting to
-	 *  allocate the pages to populate the scatter ABD.
-	 */
-	{ "scatter_page_alloc_retry",		KSTAT_DATA_UINT64 },
-	/*
-	 *  The total number of retries encountered when attempting to
-	 *  allocate the sg table for an ABD.
-	 */
-	{ "scatter_sg_table_retry",		KSTAT_DATA_UINT64 },
-};
-
-#define	ABDSTAT(stat)		(abd_stats.stat.value.ui64)
-#define	ABDSTAT_INCR(stat, val) \
-	atomic_add_64(&abd_stats.stat.value.ui64, (val))
-#define	ABDSTAT_BUMP(stat)	ABDSTAT_INCR(stat, 1)
-#define	ABDSTAT_BUMPDOWN(stat)	ABDSTAT_INCR(stat, -1)
-
-#define	ABD_SCATTER(abd)	(abd->abd_u.abd_scatter)
-#define	ABD_BUF(abd)		(abd->abd_u.abd_linear.abd_buf)
-#define	abd_for_each_sg(abd, sg, n, i)	\
-	for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
 
 /* see block comment above for description */
 int zfs_abd_scatter_enabled = B_TRUE;
-unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1;
-
-/*
- * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
- * ABD's.  Smaller allocations will use linear ABD's which uses
- * zio_[data_]buf_alloc().
- *
- * Scatter ABD's use at least one page each, so sub-page allocations waste
- * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
- * half of each page).  Using linear ABD's for small allocations means that
- * they will be put on slabs which contain many allocations.  This can
- * improve memory efficiency, but it also makes it much harder for ARC
- * evictions to actually free pages, because all the buffers on one slab need
- * to be freed in order for the slab (and underlying pages) to be freed.
- * Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's
- * possible for them to actually waste more memory than scatter (one page per
- * buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).
- *
- * Spill blocks are typically 512B and are heavily used on systems running
- * selinux with the default dnode size and the `xattr=sa` property set.
- *
- * By default we use linear allocations for 512B and 1KB, and scatter
- * allocations for larger (1.5KB and up).
- */
-int zfs_abd_scatter_min_size = 512 * 3;
-
-static kmem_cache_t *abd_cache = NULL;
-static kstat_t *abd_ksp;
-
-static inline size_t
-abd_chunkcnt_for_bytes(size_t size)
-{
-	return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);
-}
-
-#ifdef _KERNEL
-#ifndef CONFIG_HIGHMEM
-
-#ifndef __GFP_RECLAIM
-#define	__GFP_RECLAIM		__GFP_WAIT
-#endif
-
-/*
- * The goal is to minimize fragmentation by preferentially populating ABDs
- * with higher order compound pages from a single zone.  Allocation size is
- * progressively decreased until it can be satisfied without performing
- * reclaim or compaction.  When necessary this function will degenerate to
- * allocating individual pages and allowing reclaim to satisfy allocations.
- */
-static void
-abd_alloc_pages(abd_t *abd, size_t size)
-{
-	struct list_head pages;
-	struct sg_table table;
-	struct scatterlist *sg;
-	struct page *page, *tmp_page = NULL;
-	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
-	gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
-	int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
-	int nr_pages = abd_chunkcnt_for_bytes(size);
-	int chunks = 0, zones = 0;
-	size_t remaining_size;
-	int nid = NUMA_NO_NODE;
-	int alloc_pages = 0;
-
-	INIT_LIST_HEAD(&pages);
-
-	while (alloc_pages < nr_pages) {
-		unsigned chunk_pages;
-		int order;
-
-		order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
-		chunk_pages = (1U << order);
-
-		page = alloc_pages_node(nid, order ? gfp_comp : gfp, order);
-		if (page == NULL) {
-			if (order == 0) {
-				ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
-				schedule_timeout_interruptible(1);
-			} else {
-				max_order = MAX(0, order - 1);
-			}
-			continue;
-		}
-
-		list_add_tail(&page->lru, &pages);
-
-		if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))
-			zones++;
-
-		nid = page_to_nid(page);
-		ABDSTAT_BUMP(abdstat_scatter_orders[order]);
-		chunks++;
-		alloc_pages += chunk_pages;
-	}
-
-	ASSERT3S(alloc_pages, ==, nr_pages);
-
-	while (sg_alloc_table(&table, chunks, gfp)) {
-		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
-		schedule_timeout_interruptible(1);
-	}
-
-	sg = table.sgl;
-	remaining_size = size;
-	list_for_each_entry_safe(page, tmp_page, &pages, lru) {
-		size_t sg_size = MIN(PAGESIZE << compound_order(page),
-		    remaining_size);
-		sg_set_page(sg, page, sg_size, 0);
-		remaining_size -= sg_size;
-
-		sg = sg_next(sg);
-		list_del(&page->lru);
-	}
-
-	/*
-	 * These conditions ensure that a possible transformation to a linear
-	 * ABD would be valid.
-	 */
-	ASSERT(!PageHighMem(sg_page(table.sgl)));
-	ASSERT0(ABD_SCATTER(abd).abd_offset);
-
-	if (table.nents == 1) {
-		/*
-		 * Since there is only one entry, this ABD can be represented
-		 * as a linear buffer.  All single-page (4K) ABD's can be
-		 * represented this way.  Some multi-page ABD's can also be
-		 * represented this way, if we were able to allocate a single
-		 * "chunk" (higher-order "page" which represents a power-of-2
-		 * series of physically-contiguous pages).  This is often the
-		 * case for 2-page (8K) ABD's.
-		 *
-		 * Representing a single-entry scatter ABD as a linear ABD
-		 * has the performance advantage of avoiding the copy (and
-		 * allocation) in abd_borrow_buf_copy / abd_return_buf_copy.
-		 * A performance increase of around 5% has been observed for
-		 * ARC-cached reads (of small blocks which can take advantage
-		 * of this).
-		 *
-		 * Note that this optimization is only possible because the
-		 * pages are always mapped into the kernel's address space.
-		 * This is not the case for highmem pages, so the
-		 * optimization can not be made there.
-		 */
-		abd->abd_flags |= ABD_FLAG_LINEAR;
-		abd->abd_flags |= ABD_FLAG_LINEAR_PAGE;
-		abd->abd_u.abd_linear.abd_sgl = table.sgl;
-		abd->abd_u.abd_linear.abd_buf =
-		    page_address(sg_page(table.sgl));
-	} else if (table.nents > 1) {
-		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
-		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
-
-		if (zones) {
-			ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);
-			abd->abd_flags |= ABD_FLAG_MULTI_ZONE;
-		}
-
-		ABD_SCATTER(abd).abd_sgl = table.sgl;
-		ABD_SCATTER(abd).abd_nents = table.nents;
-	}
-}
-#else
-/*
- * Allocate N individual pages to construct a scatter ABD.  This function
- * makes no attempt to request contiguous pages and requires the minimal
- * number of kernel interfaces.  It's designed for maximum compatibility.
- */
-static void
-abd_alloc_pages(abd_t *abd, size_t size)
-{
-	struct scatterlist *sg = NULL;
-	struct sg_table table;
-	struct page *page;
-	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
-	int nr_pages = abd_chunkcnt_for_bytes(size);
-	int i = 0;
-
-	while (sg_alloc_table(&table, nr_pages, gfp)) {
-		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
-		schedule_timeout_interruptible(1);
-	}
-
-	ASSERT3U(table.nents, ==, nr_pages);
-	ABD_SCATTER(abd).abd_sgl = table.sgl;
-	ABD_SCATTER(abd).abd_nents = nr_pages;
-
-	abd_for_each_sg(abd, sg, nr_pages, i) {
-		while ((page = __page_cache_alloc(gfp)) == NULL) {
-			ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
-			schedule_timeout_interruptible(1);
-		}
-
-		ABDSTAT_BUMP(abdstat_scatter_orders[0]);
-		sg_set_page(sg, page, PAGESIZE, 0);
-	}
-
-	if (nr_pages > 1) {
-		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
-		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
-	}
-}
-#endif /* !CONFIG_HIGHMEM */
-
-static void
-abd_free_pages(abd_t *abd)
-{
-	struct scatterlist *sg = NULL;
-	struct sg_table table;
-	struct page *page;
-	int nr_pages = ABD_SCATTER(abd).abd_nents;
-	int order, i = 0;
-
-	if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)
-		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);
-
-	if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)
-		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
-
-	abd_for_each_sg(abd, sg, nr_pages, i) {
-		page = sg_page(sg);
-		order = compound_order(page);
-		__free_pages(page, order);
-		ASSERT3U(sg->length, <=, PAGE_SIZE << order);
-		ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);
-	}
-
-	table.sgl = ABD_SCATTER(abd).abd_sgl;
-	table.nents = table.orig_nents = nr_pages;
-	sg_free_table(&table);
-}
-
-#else /* _KERNEL */
-
-#ifndef PAGE_SHIFT
-#define	PAGE_SHIFT (highbit64(PAGESIZE)-1)
-#endif
-
-struct page;
-
-#define	zfs_kmap_atomic(chunk, km)	((void *)chunk)
-#define	zfs_kunmap_atomic(addr, km)	do { (void)(addr); } while (0)
-#define	local_irq_save(flags)		do { (void)(flags); } while (0)
-#define	local_irq_restore(flags)	do { (void)(flags); } while (0)
-#define	nth_page(pg, i) \
-	((struct page *)((void *)(pg) + (i) * PAGESIZE))
-
-struct scatterlist {
-	struct page *page;
-	int length;
-	int end;
-};
-
-static void
-sg_init_table(struct scatterlist *sg, int nr)
-{
-	memset(sg, 0, nr * sizeof (struct scatterlist));
-	sg[nr - 1].end = 1;
-}
-
-#define	for_each_sg(sgl, sg, nr, i)	\
-	for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
-
-static inline void
-sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
-    unsigned int offset)
-{
-	/* currently we don't use offset */
-	ASSERT(offset == 0);
-	sg->page = page;
-	sg->length = len;
-}
-
-static inline struct page *
-sg_page(struct scatterlist *sg)
-{
-	return (sg->page);
-}
-
-static inline struct scatterlist *
-sg_next(struct scatterlist *sg)
-{
-	if (sg->end)
-		return (NULL);
-
-	return (sg + 1);
-}
-
-static void
-abd_alloc_pages(abd_t *abd, size_t size)
-{
-	unsigned nr_pages = abd_chunkcnt_for_bytes(size);
-	struct scatterlist *sg;
-	int i;
-
-	ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages *
-	    sizeof (struct scatterlist), KM_SLEEP);
-	sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages);
-
-	abd_for_each_sg(abd, sg, nr_pages, i) {
-		struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
-		sg_set_page(sg, p, PAGESIZE, 0);
-	}
-	ABD_SCATTER(abd).abd_nents = nr_pages;
-}
-
-static void
-abd_free_pages(abd_t *abd)
-{
-	int i, n = ABD_SCATTER(abd).abd_nents;
-	struct scatterlist *sg;
-
-	abd_for_each_sg(abd, sg, n, i) {
-		for (int j = 0; j < sg->length; j += PAGESIZE) {
-			struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT);
-			umem_free(p, PAGESIZE);
-		}
-	}
-
-	vmem_free(ABD_SCATTER(abd).abd_sgl, n * sizeof (struct scatterlist));
-}
-
-#endif /* _KERNEL */
 
 void
-abd_init(void)
-{
-	int i;
-
-	abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
-	    0, NULL, NULL, NULL, NULL, NULL, 0);
-
-	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
-	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
-	if (abd_ksp != NULL) {
-		abd_ksp->ks_data = &abd_stats;
-		kstat_install(abd_ksp);
-
-		for (i = 0; i < MAX_ORDER; i++) {
-			snprintf(abd_stats.abdstat_scatter_orders[i].name,
-			    KSTAT_STRLEN, "scatter_order_%d", i);
-			abd_stats.abdstat_scatter_orders[i].data_type =
-			    KSTAT_DATA_UINT64;
-		}
-	}
-}
-
-void
-abd_fini(void)
-{
-	if (abd_ksp != NULL) {
-		kstat_delete(abd_ksp);
-		abd_ksp = NULL;
-	}
-
-	if (abd_cache) {
-		kmem_cache_destroy(abd_cache);
-		abd_cache = NULL;
-	}
-}
-
-static inline void
 abd_verify(abd_t *abd)
 {
-	ASSERT3U(abd->abd_size, >, 0);
+#ifdef ZFS_DEBUG
 	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
 	    ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
-	    ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE));
+	    ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG |
+	    ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS | ABD_FLAG_ALLOCD));
 	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
 	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
 	if (abd_is_linear(abd)) {
-		ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL);
-	} else {
-		size_t n;
-		int i = 0;
-		struct scatterlist *sg = NULL;
-
-		ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);
-		ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
-		    ABD_SCATTER(abd).abd_sgl->length);
-		n = ABD_SCATTER(abd).abd_nents;
-		abd_for_each_sg(abd, sg, n, i) {
-			ASSERT3P(sg_page(sg), !=, NULL);
+		ASSERT3U(abd->abd_size, >, 0);
+		ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
+	} else if (abd_is_gang(abd)) {
+		uint_t child_sizes = 0;
+		for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
+		    cabd != NULL;
+		    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
+			ASSERT(list_link_active(&cabd->abd_gang_link));
+			child_sizes += cabd->abd_size;
+			abd_verify(cabd);
 		}
+		ASSERT3U(abd->abd_size, ==, child_sizes);
+	} else {
+		ASSERT3U(abd->abd_size, >, 0);
+		abd_verify_scatter(abd);
 	}
+#endif
 }
 
-static inline abd_t *
-abd_alloc_struct(void)
+static void
+abd_init_struct(abd_t *abd)
 {
-	abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
+	list_link_init(&abd->abd_gang_link);
+	mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
+	abd->abd_flags = 0;
+#ifdef ZFS_DEBUG
+	zfs_refcount_create(&abd->abd_children);
+	abd->abd_parent = NULL;
+#endif
+	abd->abd_size = 0;
+}
 
-	ASSERT3P(abd, !=, NULL);
-	ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
+static void
+abd_fini_struct(abd_t *abd)
+{
+	mutex_destroy(&abd->abd_mtx);
+	ASSERT(!list_link_active(&abd->abd_gang_link));
+#ifdef ZFS_DEBUG
+	zfs_refcount_destroy(&abd->abd_children);
+#endif
+}
 
+abd_t *
+abd_alloc_struct(size_t size)
+{
+	abd_t *abd = abd_alloc_struct_impl(size);
+	abd_init_struct(abd);
+	abd->abd_flags |= ABD_FLAG_ALLOCD;
 	return (abd);
 }
 
-static inline void
+void
 abd_free_struct(abd_t *abd)
 {
-	kmem_cache_free(abd_cache, abd);
-	ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
+	abd_fini_struct(abd);
+	abd_free_struct_impl(abd);
 }
 
 /*
@@ -622,46 +182,26 @@
 abd_t *
 abd_alloc(size_t size, boolean_t is_metadata)
 {
-	/* see the comment above zfs_abd_scatter_min_size */
-	if (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size)
+	if (abd_size_alloc_linear(size))
 		return (abd_alloc_linear(size, is_metadata));
 
 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
 
-	abd_t *abd = abd_alloc_struct();
-	abd->abd_flags = ABD_FLAG_OWNER;
+	abd_t *abd = abd_alloc_struct(size);
+	abd->abd_flags |= ABD_FLAG_OWNER;
 	abd->abd_u.abd_scatter.abd_offset = 0;
-	abd_alloc_pages(abd, size);
+	abd_alloc_chunks(abd, size);
 
 	if (is_metadata) {
 		abd->abd_flags |= ABD_FLAG_META;
 	}
 	abd->abd_size = size;
-	abd->abd_parent = NULL;
-	zfs_refcount_create(&abd->abd_children);
 
-	ABDSTAT_BUMP(abdstat_scatter_cnt);
-	ABDSTAT_INCR(abdstat_scatter_data_size, size);
-	ABDSTAT_INCR(abdstat_scatter_chunk_waste,
-	    P2ROUNDUP(size, PAGESIZE) - size);
+	abd_update_scatter_stats(abd, ABDSTAT_INCR);
 
 	return (abd);
 }
 
-static void
-abd_free_scatter(abd_t *abd)
-{
-	abd_free_pages(abd);
-
-	zfs_refcount_destroy(&abd->abd_children);
-	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
-	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
-	ABDSTAT_INCR(abdstat_scatter_chunk_waste,
-	    (int)abd->abd_size - (int)P2ROUNDUP(abd->abd_size, PAGESIZE));
-
-	abd_free_struct(abd);
-}
-
 /*
  * Allocate an ABD that must be linear, along with its own underlying data
  * buffer. Only use this when it would be very annoying to write your ABD
@@ -670,26 +210,23 @@
 abd_t *
 abd_alloc_linear(size_t size, boolean_t is_metadata)
 {
-	abd_t *abd = abd_alloc_struct();
+	abd_t *abd = abd_alloc_struct(0);
 
 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
 
-	abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
+	abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
 	if (is_metadata) {
 		abd->abd_flags |= ABD_FLAG_META;
 	}
 	abd->abd_size = size;
-	abd->abd_parent = NULL;
-	zfs_refcount_create(&abd->abd_children);
 
 	if (is_metadata) {
-		abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size);
+		ABD_LINEAR_BUF(abd) = zio_buf_alloc(size);
 	} else {
-		abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size);
+		ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size);
 	}
 
-	ABDSTAT_BUMP(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, size);
+	abd_update_linear_stats(abd, ABDSTAT_INCR);
 
 	return (abd);
 }
@@ -698,43 +235,90 @@
 abd_free_linear(abd_t *abd)
 {
 	if (abd_is_linear_page(abd)) {
-		/* Transform it back into a scatter ABD for freeing */
-		struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl;
-		abd->abd_flags &= ~ABD_FLAG_LINEAR;
-		abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE;
-		ABD_SCATTER(abd).abd_nents = 1;
-		ABD_SCATTER(abd).abd_offset = 0;
-		ABD_SCATTER(abd).abd_sgl = sg;
-		abd_free_scatter(abd);
+		abd_free_linear_page(abd);
 		return;
 	}
 	if (abd->abd_flags & ABD_FLAG_META) {
-		zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
+		zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
 	} else {
-		zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
+		zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
 	}
 
-	zfs_refcount_destroy(&abd->abd_children);
-	ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+	abd_update_linear_stats(abd, ABDSTAT_DECR);
+}
 
-	abd_free_struct(abd);
+static void
+abd_free_gang(abd_t *abd)
+{
+	ASSERT(abd_is_gang(abd));
+	abd_t *cabd;
+
+	while ((cabd = list_head(&ABD_GANG(abd).abd_gang_chain)) != NULL) {
+		/*
+		 * We must acquire the child ABDs mutex to ensure that if it
+		 * is being added to another gang ABD we will set the link
+		 * as inactive when removing it from this gang ABD and before
+		 * adding it to the other gang ABD.
+		 */
+		mutex_enter(&cabd->abd_mtx);
+		ASSERT(list_link_active(&cabd->abd_gang_link));
+		list_remove(&ABD_GANG(abd).abd_gang_chain, cabd);
+		mutex_exit(&cabd->abd_mtx);
+		if (cabd->abd_flags & ABD_FLAG_GANG_FREE)
+			abd_free(cabd);
+	}
+	list_destroy(&ABD_GANG(abd).abd_gang_chain);
+}
+
+static void
+abd_free_scatter(abd_t *abd)
+{
+	abd_free_chunks(abd);
+	abd_update_scatter_stats(abd, ABDSTAT_DECR);
 }
 
 /*
- * Free an ABD. Only use this on ABDs allocated with abd_alloc() or
- * abd_alloc_linear().
+ * Free an ABD.  Use with any kind of abd: those created with abd_alloc_*()
+ * and abd_get_*(), including abd_get_offset_struct().
+ *
+ * If the ABD was created with abd_alloc_*(), the underlying data
+ * (scatterlist or linear buffer) will also be freed.  (Subject to ownership
+ * changes via abd_*_ownership_of_buf().)
+ *
+ * Unless the ABD was created with abd_get_offset_struct(), the abd_t will
+ * also be freed.
  */
 void
 abd_free(abd_t *abd)
 {
+	if (abd == NULL)
+		return;
+
 	abd_verify(abd);
-	ASSERT3P(abd->abd_parent, ==, NULL);
-	ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
-	if (abd_is_linear(abd))
-		abd_free_linear(abd);
-	else
-		abd_free_scatter(abd);
+#ifdef ZFS_DEBUG
+	IMPLY(abd->abd_flags & ABD_FLAG_OWNER, abd->abd_parent == NULL);
+#endif
+
+	if (abd_is_gang(abd)) {
+		abd_free_gang(abd);
+	} else if (abd_is_linear(abd)) {
+		if (abd->abd_flags & ABD_FLAG_OWNER)
+			abd_free_linear(abd);
+	} else {
+		if (abd->abd_flags & ABD_FLAG_OWNER)
+			abd_free_scatter(abd);
+	}
+
+#ifdef ZFS_DEBUG
+	if (abd->abd_parent != NULL) {
+		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
+		    abd->abd_size, abd);
+	}
+#endif
+
+	abd_fini_struct(abd);
+	if (abd->abd_flags & ABD_FLAG_ALLOCD)
+		abd_free_struct_impl(abd);
 }
 
 /*
@@ -754,110 +338,277 @@
 }
 
 /*
- * If we're going to use this ABD for doing I/O using the block layer, the
- * consumer of the ABD data doesn't care if it's scattered or not, and we don't
- * plan to store this ABD in memory for a long period of time, we should
- * allocate the ABD type that requires the least data copying to do the I/O.
- *
- * On Illumos this is linear ABDs, however if ldi_strategy() can ever issue I/Os
- * using a scatter/gather list we should switch to that and replace this call
- * with vanilla abd_alloc().
- *
- * On Linux the optimal thing to do would be to use abd_get_offset() and
- * construct a new ABD which shares the original pages thereby eliminating
- * the copy.  But for the moment a new linear ABD is allocated until this
- * performance optimization can be implemented.
+ * Create gang ABD that will be the head of a list of ABD's. This is used
+ * to "chain" scatter/gather lists together when constructing aggregated
+ * IO's. To free this abd, abd_free() must be called.
  */
 abd_t *
-abd_alloc_for_io(size_t size, boolean_t is_metadata)
+abd_alloc_gang(void)
 {
-	return (abd_alloc(size, is_metadata));
+	abd_t *abd = abd_alloc_struct(0);
+	abd->abd_flags |= ABD_FLAG_GANG | ABD_FLAG_OWNER;
+	list_create(&ABD_GANG(abd).abd_gang_chain,
+	    sizeof (abd_t), offsetof(abd_t, abd_gang_link));
+	return (abd);
 }
 
 /*
- * Allocate a new ABD to point to offset off of sabd. It shares the underlying
- * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
- * any derived ABDs exist.
+ * Add a child gang ABD to a parent gang ABDs chained list.
  */
-static inline abd_t *
-abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
+static void
+abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
 {
-	abd_t *abd;
+	ASSERT(abd_is_gang(pabd));
+	ASSERT(abd_is_gang(cabd));
 
+	if (free_on_free) {
+		/*
+		 * If the parent is responsible for freeing the child gang
+		 * ABD we will just splice the child's children ABD list to
+		 * the parent's list and immediately free the child gang ABD
+		 * struct. The parent gang ABDs children from the child gang
+		 * will retain all the free_on_free settings after being
+		 * added to the parents list.
+		 */
+#ifdef ZFS_DEBUG
+		/*
+		 * If cabd had abd_parent, we have to drop it here.  We can't
+		 * transfer it to pabd, nor we can clear abd_size leaving it.
+		 */
+		if (cabd->abd_parent != NULL) {
+			(void) zfs_refcount_remove_many(
+			    &cabd->abd_parent->abd_children,
+			    cabd->abd_size, cabd);
+			cabd->abd_parent = NULL;
+		}
+#endif
+		pabd->abd_size += cabd->abd_size;
+		cabd->abd_size = 0;
+		list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
+		    &ABD_GANG(cabd).abd_gang_chain);
+		ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
+		abd_verify(pabd);
+		abd_free(cabd);
+	} else {
+		for (abd_t *child = list_head(&ABD_GANG(cabd).abd_gang_chain);
+		    child != NULL;
+		    child = list_next(&ABD_GANG(cabd).abd_gang_chain, child)) {
+			/*
+			 * We always pass B_FALSE for free_on_free as it is the
+			 * original child gang ABDs responsibility to determine
+			 * if any of its child ABDs should be free'd on the call
+			 * to abd_free().
+			 */
+			abd_gang_add(pabd, child, B_FALSE);
+		}
+		abd_verify(pabd);
+	}
+}
+
+/*
+ * Add a child ABD to a gang ABD's chained list.
+ */
+void
+abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
+{
+	ASSERT(abd_is_gang(pabd));
+	abd_t *child_abd = NULL;
+
+	/*
+	 * If the child being added is a gang ABD, we will add the
+	 * child's ABDs to the parent gang ABD. This allows us to account
+	 * for the offset correctly in the parent gang ABD.
+	 */
+	if (abd_is_gang(cabd)) {
+		ASSERT(!list_link_active(&cabd->abd_gang_link));
+		return (abd_gang_add_gang(pabd, cabd, free_on_free));
+	}
+	ASSERT(!abd_is_gang(cabd));
+
+	/*
+	 * In order to verify that an ABD is not already part of
+	 * another gang ABD, we must lock the child ABD's abd_mtx
+	 * to check its abd_gang_link status. We unlock the abd_mtx
+	 * only after it is has been added to a gang ABD, which
+	 * will update the abd_gang_link's status. See comment below
+	 * for how an ABD can be in multiple gang ABD's simultaneously.
+	 */
+	mutex_enter(&cabd->abd_mtx);
+	if (list_link_active(&cabd->abd_gang_link)) {
+		/*
+		 * If the child ABD is already part of another
+		 * gang ABD then we must allocate a new
+		 * ABD to use a separate link. We mark the newly
+		 * allocated ABD with ABD_FLAG_GANG_FREE, before
+		 * adding it to the gang ABD's list, to make the
+		 * gang ABD aware that it is responsible to call
+		 * abd_free(). We use abd_get_offset() in order
+		 * to just allocate a new ABD but avoid copying the
+		 * data over into the newly allocated ABD.
+		 *
+		 * An ABD may become part of multiple gang ABD's. For
+		 * example, when writing ditto bocks, the same ABD
+		 * is used to write 2 or 3 locations with 2 or 3
+		 * zio_t's. Each of the zio's may be aggregated with
+		 * different adjacent zio's. zio aggregation uses gang
+		 * zio's, so the single ABD can become part of multiple
+		 * gang zio's.
+		 *
+		 * The ASSERT below is to make sure that if
+		 * free_on_free is passed as B_TRUE, the ABD can
+		 * not be in multiple gang ABD's. The gang ABD
+		 * can not be responsible for cleaning up the child
+		 * ABD memory allocation if the ABD can be in
+		 * multiple gang ABD's at one time.
+		 */
+		ASSERT3B(free_on_free, ==, B_FALSE);
+		child_abd = abd_get_offset(cabd, 0);
+		child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
+	} else {
+		child_abd = cabd;
+		if (free_on_free)
+			child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
+	}
+	ASSERT3P(child_abd, !=, NULL);
+
+	list_insert_tail(&ABD_GANG(pabd).abd_gang_chain, child_abd);
+	mutex_exit(&cabd->abd_mtx);
+	pabd->abd_size += child_abd->abd_size;
+}
+
+/*
+ * Locate the ABD for the supplied offset in the gang ABD.
+ * Return a new offset relative to the returned ABD.
+ */
+abd_t *
+abd_gang_get_offset(abd_t *abd, size_t *off)
+{
+	abd_t *cabd;
+
+	ASSERT(abd_is_gang(abd));
+	ASSERT3U(*off, <, abd->abd_size);
+	for (cabd = list_head(&ABD_GANG(abd).abd_gang_chain); cabd != NULL;
+	    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
+		if (*off >= cabd->abd_size)
+			*off -= cabd->abd_size;
+		else
+			return (cabd);
+	}
+	VERIFY3P(cabd, !=, NULL);
+	return (cabd);
+}
+
+/*
+ * Allocate a new ABD, using the provided struct (if non-NULL, and if
+ * circumstances allow - otherwise allocate the struct).  The returned ABD will
+ * point to offset off of sabd. It shares the underlying buffer data with sabd.
+ * Use abd_free() to free.  sabd must not be freed while any derived ABDs exist.
+ */
+static abd_t *
+abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
+{
 	abd_verify(sabd);
-	ASSERT3U(off, <=, sabd->abd_size);
+	ASSERT3U(off + size, <=, sabd->abd_size);
 
 	if (abd_is_linear(sabd)) {
-		abd = abd_alloc_struct();
-
+		if (abd == NULL)
+			abd = abd_alloc_struct(0);
 		/*
 		 * Even if this buf is filesystem metadata, we only track that
 		 * if we own the underlying data buffer, which is not true in
 		 * this case. Therefore, we don't ever use ABD_FLAG_META here.
 		 */
-		abd->abd_flags = ABD_FLAG_LINEAR;
+		abd->abd_flags |= ABD_FLAG_LINEAR;
 
-		abd->abd_u.abd_linear.abd_buf =
-		    (char *)sabd->abd_u.abd_linear.abd_buf + off;
-	} else {
-		int i = 0;
-		struct scatterlist *sg = NULL;
-		size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off;
-
-		abd = abd_alloc_struct();
-
-		/*
-		 * Even if this buf is filesystem metadata, we only track that
-		 * if we own the underlying data buffer, which is not true in
-		 * this case. Therefore, we don't ever use ABD_FLAG_META here.
-		 */
-		abd->abd_flags = 0;
-
-		abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
-			if (new_offset < sg->length)
-				break;
-			new_offset -= sg->length;
+		ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off;
+	} else if (abd_is_gang(sabd)) {
+		size_t left = size;
+		if (abd == NULL) {
+			abd = abd_alloc_gang();
+		} else {
+			abd->abd_flags |= ABD_FLAG_GANG;
+			list_create(&ABD_GANG(abd).abd_gang_chain,
+			    sizeof (abd_t), offsetof(abd_t, abd_gang_link));
 		}
 
-		ABD_SCATTER(abd).abd_sgl = sg;
-		ABD_SCATTER(abd).abd_offset = new_offset;
-		ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;
+		abd->abd_flags &= ~ABD_FLAG_OWNER;
+		for (abd_t *cabd = abd_gang_get_offset(sabd, &off);
+		    cabd != NULL && left > 0;
+		    cabd = list_next(&ABD_GANG(sabd).abd_gang_chain, cabd)) {
+			int csize = MIN(left, cabd->abd_size - off);
+
+			abd_t *nabd = abd_get_offset_size(cabd, off, csize);
+			abd_gang_add(abd, nabd, B_TRUE);
+			left -= csize;
+			off = 0;
+		}
+		ASSERT3U(left, ==, 0);
+	} else {
+		abd = abd_get_offset_scatter(abd, sabd, off, size);
 	}
 
+	ASSERT3P(abd, !=, NULL);
 	abd->abd_size = size;
+#ifdef ZFS_DEBUG
 	abd->abd_parent = sabd;
-	zfs_refcount_create(&abd->abd_children);
 	(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
-
+#endif
 	return (abd);
 }
 
+/*
+ * Like abd_get_offset_size(), but memory for the abd_t is provided by the
+ * caller.  Using this routine can improve performance by avoiding the cost
+ * of allocating memory for the abd_t struct, and updating the abd stats.
+ * Usually, the provided abd is returned, but in some circumstances (FreeBSD,
+ * if sabd is scatter and size is more than 2 pages) a new abd_t may need to
+ * be allocated.  Therefore callers should be careful to use the returned
+ * abd_t*.
+ */
+abd_t *
+abd_get_offset_struct(abd_t *abd, abd_t *sabd, size_t off, size_t size)
+{
+	abd_t *result;
+	abd_init_struct(abd);
+	result = abd_get_offset_impl(abd, sabd, off, size);
+	if (result != abd)
+		abd_fini_struct(abd);
+	return (result);
+}
+
 abd_t *
 abd_get_offset(abd_t *sabd, size_t off)
 {
 	size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
-
 	VERIFY3U(size, >, 0);
-
-	return (abd_get_offset_impl(sabd, off, size));
+	return (abd_get_offset_impl(NULL, sabd, off, size));
 }
 
 abd_t *
 abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
 {
 	ASSERT3U(off + size, <=, sabd->abd_size);
-
-	return (abd_get_offset_impl(sabd, off, size));
+	return (abd_get_offset_impl(NULL, sabd, off, size));
 }
 
 /*
- * Allocate a linear ABD structure for buf. You must free this with abd_put()
- * since the resulting ABD doesn't own its own buffer.
+ * Return a size scatter ABD containing only zeros.
+ */
+abd_t *
+abd_get_zeros(size_t size)
+{
+	ASSERT3P(abd_zero_scatter, !=, NULL);
+	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
+	return (abd_get_offset_size(abd_zero_scatter, 0, size));
+}
+
+/*
+ * Allocate a linear ABD structure for buf.
  */
 abd_t *
 abd_get_from_buf(void *buf, size_t size)
 {
-	abd_t *abd = abd_alloc_struct();
+	abd_t *abd = abd_alloc_struct(0);
 
 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
 
@@ -866,36 +617,15 @@
 	 * own the underlying data buffer, which is not true in this case.
 	 * Therefore, we don't ever use ABD_FLAG_META here.
 	 */
-	abd->abd_flags = ABD_FLAG_LINEAR;
+	abd->abd_flags |= ABD_FLAG_LINEAR;
 	abd->abd_size = size;
-	abd->abd_parent = NULL;
-	zfs_refcount_create(&abd->abd_children);
 
-	abd->abd_u.abd_linear.abd_buf = buf;
+	ABD_LINEAR_BUF(abd) = buf;
 
 	return (abd);
 }
 
 /*
- * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
- * free the underlying scatterlist or buffer.
- */
-void
-abd_put(abd_t *abd)
-{
-	abd_verify(abd);
-	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
-
-	if (abd->abd_parent != NULL) {
-		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
-		    abd->abd_size, abd);
-	}
-
-	zfs_refcount_destroy(&abd->abd_children);
-	abd_free_struct(abd);
-}
-
-/*
  * Get the raw buffer associated with a linear ABD.
  */
 void *
@@ -903,7 +633,7 @@
 {
 	ASSERT(abd_is_linear(abd));
 	abd_verify(abd);
-	return (abd->abd_u.abd_linear.abd_buf);
+	return (ABD_LINEAR_BUF(abd));
 }
 
 /*
@@ -923,8 +653,9 @@
 	} else {
 		buf = zio_buf_alloc(n);
 	}
+#ifdef ZFS_DEBUG
 	(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
-
+#endif
 	return (buf);
 }
 
@@ -955,7 +686,9 @@
 		ASSERT0(abd_cmp_buf(abd, buf, n));
 		zio_buf_free(buf, n);
 	}
+#ifdef ZFS_DEBUG
 	(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
+#endif
 }
 
 void
@@ -967,28 +700,6 @@
 	abd_return_buf(abd, buf, n);
 }
 
-/*
- * Give this ABD ownership of the buffer that it's storing. Can only be used on
- * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
- * with abd_alloc_linear() which subsequently released ownership of their buf
- * with abd_release_ownership_of_buf().
- */
-void
-abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
-{
-	ASSERT(abd_is_linear(abd));
-	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
-	abd_verify(abd);
-
-	abd->abd_flags |= ABD_FLAG_OWNER;
-	if (is_metadata) {
-		abd->abd_flags |= ABD_FLAG_META;
-	}
-
-	ABDSTAT_BUMP(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
-}
-
 void
 abd_release_ownership_of_buf(abd_t *abd)
 {
@@ -1010,165 +721,95 @@
 	/* Disable this flag since we no longer own the data buffer */
 	abd->abd_flags &= ~ABD_FLAG_META;
 
-	ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+	abd_update_linear_stats(abd, ABDSTAT_DECR);
 }
 
-#ifndef HAVE_1ARG_KMAP_ATOMIC
-#define	NR_KM_TYPE (6)
-#ifdef _KERNEL
-int km_table[NR_KM_TYPE] = {
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-};
-#endif
-#endif
-
-struct abd_iter {
-	/* public interface */
-	void		*iter_mapaddr;	/* addr corresponding to iter_pos */
-	size_t		iter_mapsize;	/* length of data valid at mapaddr */
-
-	/* private */
-	abd_t		*iter_abd;	/* ABD being iterated through */
-	size_t		iter_pos;
-	size_t		iter_offset;	/* offset in current sg/abd_buf, */
-					/* abd_offset included */
-	struct scatterlist *iter_sg;	/* current sg */
-#ifndef HAVE_1ARG_KMAP_ATOMIC
-	int		iter_km;	/* KM_* for kmap_atomic */
-#endif
-};
 
 /*
- * Initialize the abd_iter.
+ * Give this ABD ownership of the buffer that it's storing. Can only be used on
+ * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
+ * with abd_alloc_linear() which subsequently released ownership of their buf
+ * with abd_release_ownership_of_buf().
  */
-static void
-abd_iter_init(struct abd_iter *aiter, abd_t *abd, int km_type)
+void
+abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
 {
+	ASSERT(abd_is_linear(abd));
+	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
 	abd_verify(abd);
-	aiter->iter_abd = abd;
-	aiter->iter_mapaddr = NULL;
-	aiter->iter_mapsize = 0;
-	aiter->iter_pos = 0;
-	if (abd_is_linear(abd)) {
-		aiter->iter_offset = 0;
-		aiter->iter_sg = NULL;
-	} else {
-		aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
-		aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
+
+	abd->abd_flags |= ABD_FLAG_OWNER;
+	if (is_metadata) {
+		abd->abd_flags |= ABD_FLAG_META;
 	}
-#ifndef HAVE_1ARG_KMAP_ATOMIC
-	ASSERT3U(km_type, <, NR_KM_TYPE);
-	aiter->iter_km = km_type;
-#endif
+
+	abd_update_linear_stats(abd, ABDSTAT_INCR);
 }
 
 /*
- * Advance the iterator by a certain amount. Cannot be called when a chunk is
- * in use. This can be safely called when the aiter has already exhausted, in
- * which case this does nothing.
+ * Initializes an abd_iter based on whether the abd is a gang ABD
+ * or just a single ABD.
  */
-static void
-abd_iter_advance(struct abd_iter *aiter, size_t amount)
+static inline abd_t *
+abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off)
 {
-	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
-	ASSERT0(aiter->iter_mapsize);
+	abd_t *cabd = NULL;
 
-	/* There's nothing left to advance to, so do nothing */
-	if (aiter->iter_pos == aiter->iter_abd->abd_size)
-		return;
+	if (abd_is_gang(abd)) {
+		cabd = abd_gang_get_offset(abd, &off);
+		if (cabd) {
+			abd_iter_init(aiter, cabd);
+			abd_iter_advance(aiter, off);
+		}
+	} else {
+		abd_iter_init(aiter, abd);
+		abd_iter_advance(aiter, off);
+	}
+	return (cabd);
+}
 
-	aiter->iter_pos += amount;
-	aiter->iter_offset += amount;
-	if (!abd_is_linear(aiter->iter_abd)) {
-		while (aiter->iter_offset >= aiter->iter_sg->length) {
-			aiter->iter_offset -= aiter->iter_sg->length;
-			aiter->iter_sg = sg_next(aiter->iter_sg);
-			if (aiter->iter_sg == NULL) {
-				ASSERT0(aiter->iter_offset);
-				break;
-			}
+/*
+ * Advances an abd_iter. We have to be careful with gang ABD as
+ * advancing could mean that we are at the end of a particular ABD and
+ * must grab the ABD in the gang ABD's list.
+ */
+static inline abd_t *
+abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter,
+    size_t len)
+{
+	abd_iter_advance(aiter, len);
+	if (abd_is_gang(abd) && abd_iter_at_end(aiter)) {
+		ASSERT3P(cabd, !=, NULL);
+		cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd);
+		if (cabd) {
+			abd_iter_init(aiter, cabd);
+			abd_iter_advance(aiter, 0);
 		}
 	}
-}
-
-/*
- * Map the current chunk into aiter. This can be safely called when the aiter
- * has already exhausted, in which case this does nothing.
- */
-static void
-abd_iter_map(struct abd_iter *aiter)
-{
-	void *paddr;
-	size_t offset = 0;
-
-	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
-	ASSERT0(aiter->iter_mapsize);
-
-	/* There's nothing left to iterate over, so do nothing */
-	if (aiter->iter_pos == aiter->iter_abd->abd_size)
-		return;
-
-	if (abd_is_linear(aiter->iter_abd)) {
-		ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
-		offset = aiter->iter_offset;
-		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
-		paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf;
-	} else {
-		offset = aiter->iter_offset;
-		aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
-		    aiter->iter_abd->abd_size - aiter->iter_pos);
-
-		paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg),
-		    km_table[aiter->iter_km]);
-	}
-
-	aiter->iter_mapaddr = (char *)paddr + offset;
-}
-
-/*
- * Unmap the current chunk from aiter. This can be safely called when the aiter
- * has already exhausted, in which case this does nothing.
- */
-static void
-abd_iter_unmap(struct abd_iter *aiter)
-{
-	/* There's nothing left to unmap, so do nothing */
-	if (aiter->iter_pos == aiter->iter_abd->abd_size)
-		return;
-
-	if (!abd_is_linear(aiter->iter_abd)) {
-		/* LINTED E_FUNC_SET_NOT_USED */
-		zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset,
-		    km_table[aiter->iter_km]);
-	}
-
-	ASSERT3P(aiter->iter_mapaddr, !=, NULL);
-	ASSERT3U(aiter->iter_mapsize, >, 0);
-
-	aiter->iter_mapaddr = NULL;
-	aiter->iter_mapsize = 0;
+	return (cabd);
 }
 
 int
 abd_iterate_func(abd_t *abd, size_t off, size_t size,
     abd_iter_func_t *func, void *private)
 {
-	int ret = 0;
 	struct abd_iter aiter;
+	int ret = 0;
+
+	if (size == 0)
+		return (0);
 
 	abd_verify(abd);
 	ASSERT3U(off + size, <=, abd->abd_size);
 
-	abd_iter_init(&aiter, abd, 0);
-	abd_iter_advance(&aiter, off);
+	boolean_t gang = abd_is_gang(abd);
+	abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
 
 	while (size > 0) {
+		/* If we are at the end of the gang ABD we are done */
+		if (gang && !c_abd)
+			break;
+
 		abd_iter_map(&aiter);
 
 		size_t len = MIN(aiter.iter_mapsize, size);
@@ -1182,7 +823,7 @@
 			break;
 
 		size -= len;
-		abd_iter_advance(&aiter, len);
+		c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
 	}
 
 	return (ret);
@@ -1261,10 +902,10 @@
 	    &ba_ptr);
 }
 
-/*ARGSUSED*/
 static int
 abd_zero_off_cb(void *buf, size_t size, void *private)
 {
+	(void) private;
 	(void) memset(buf, 0, size);
 	return (0);
 }
@@ -1289,6 +930,11 @@
 {
 	int ret = 0;
 	struct abd_iter daiter, saiter;
+	boolean_t dabd_is_gang_abd, sabd_is_gang_abd;
+	abd_t *c_dabd, *c_sabd;
+
+	if (size == 0)
+		return (0);
 
 	abd_verify(dabd);
 	abd_verify(sabd);
@@ -1296,12 +942,17 @@
 	ASSERT3U(doff + size, <=, dabd->abd_size);
 	ASSERT3U(soff + size, <=, sabd->abd_size);
 
-	abd_iter_init(&daiter, dabd, 0);
-	abd_iter_init(&saiter, sabd, 1);
-	abd_iter_advance(&daiter, doff);
-	abd_iter_advance(&saiter, soff);
+	dabd_is_gang_abd = abd_is_gang(dabd);
+	sabd_is_gang_abd = abd_is_gang(sabd);
+	c_dabd = abd_init_abd_iter(dabd, &daiter, doff);
+	c_sabd = abd_init_abd_iter(sabd, &saiter, soff);
 
 	while (size > 0) {
+		/* if we are at the end of the gang ABD we are done */
+		if ((dabd_is_gang_abd && !c_dabd) ||
+		    (sabd_is_gang_abd && !c_sabd))
+			break;
+
 		abd_iter_map(&daiter);
 		abd_iter_map(&saiter);
 
@@ -1320,17 +971,19 @@
 			break;
 
 		size -= len;
-		abd_iter_advance(&daiter, len);
-		abd_iter_advance(&saiter, len);
+		c_dabd =
+		    abd_advance_abd_iter(dabd, c_dabd, &daiter, len);
+		c_sabd =
+		    abd_advance_abd_iter(sabd, c_sabd, &saiter, len);
 	}
 
 	return (ret);
 }
 
-/*ARGSUSED*/
 static int
 abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private)
 {
+	(void) private;
 	(void) memcpy(dbuf, sbuf, size);
 	return (0);
 }
@@ -1345,10 +998,10 @@
 	    abd_copy_off_cb, NULL);
 }
 
-/*ARGSUSED*/
 static int
 abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private)
 {
+	(void) private;
 	return (memcmp(bufa, bufb, size));
 }
 
@@ -1381,37 +1034,55 @@
 	struct abd_iter caiters[3];
 	struct abd_iter daiter = {0};
 	void *caddrs[3];
-	unsigned long flags;
+	unsigned long flags __maybe_unused = 0;
+	abd_t *c_cabds[3];
+	abd_t *c_dabd = NULL;
+	boolean_t cabds_is_gang_abd[3];
+	boolean_t dabd_is_gang_abd = B_FALSE;
 
 	ASSERT3U(parity, <=, 3);
 
-	for (i = 0; i < parity; i++)
-		abd_iter_init(&caiters[i], cabds[i], i);
+	for (i = 0; i < parity; i++) {
+		cabds_is_gang_abd[i] = abd_is_gang(cabds[i]);
+		c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
+	}
 
-	if (dabd)
-		abd_iter_init(&daiter, dabd, i);
+	if (dabd) {
+		dabd_is_gang_abd = abd_is_gang(dabd);
+		c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
+	}
 
 	ASSERT3S(dsize, >=, 0);
 
-	local_irq_save(flags);
+	abd_enter_critical(flags);
 	while (csize > 0) {
+		/* if we are at the end of the gang ABD we are done */
+		if (dabd_is_gang_abd && !c_dabd)
+			break;
+
+		for (i = 0; i < parity; i++) {
+			/*
+			 * If we are at the end of the gang ABD we are
+			 * done.
+			 */
+			if (cabds_is_gang_abd[i] && !c_cabds[i])
+				break;
+			abd_iter_map(&caiters[i]);
+			caddrs[i] = caiters[i].iter_mapaddr;
+		}
+
 		len = csize;
 
 		if (dabd && dsize > 0)
 			abd_iter_map(&daiter);
 
-		for (i = 0; i < parity; i++) {
-			abd_iter_map(&caiters[i]);
-			caddrs[i] = caiters[i].iter_mapaddr;
-		}
-
 		switch (parity) {
 			case 3:
 				len = MIN(caiters[2].iter_mapsize, len);
-				/* falls through */
+				fallthrough;
 			case 2:
 				len = MIN(caiters[1].iter_mapsize, len);
-				/* falls through */
+				fallthrough;
 			case 1:
 				len = MIN(caiters[0].iter_mapsize, len);
 		}
@@ -1438,12 +1109,16 @@
 
 		for (i = parity-1; i >= 0; i--) {
 			abd_iter_unmap(&caiters[i]);
-			abd_iter_advance(&caiters[i], len);
+			c_cabds[i] =
+			    abd_advance_abd_iter(cabds[i], c_cabds[i],
+			    &caiters[i], len);
 		}
 
 		if (dabd && dsize > 0) {
 			abd_iter_unmap(&daiter);
-			abd_iter_advance(&daiter, dlen);
+			c_dabd =
+			    abd_advance_abd_iter(dabd, c_dabd, &daiter,
+			    dlen);
 			dsize -= dlen;
 		}
 
@@ -1452,7 +1127,7 @@
 		ASSERT3S(dsize, >=, 0);
 		ASSERT3S(csize, >=, 0);
 	}
-	local_irq_restore(flags);
+	abd_exit_critical(flags);
 }
 
 /*
@@ -1477,19 +1152,35 @@
 	struct abd_iter citers[3];
 	struct abd_iter xiters[3];
 	void *caddrs[3], *xaddrs[3];
-	unsigned long flags;
+	unsigned long flags __maybe_unused = 0;
+	boolean_t cabds_is_gang_abd[3];
+	boolean_t tabds_is_gang_abd[3];
+	abd_t *c_cabds[3];
+	abd_t *c_tabds[3];
 
 	ASSERT3U(parity, <=, 3);
 
 	for (i = 0; i < parity; i++) {
-		abd_iter_init(&citers[i], cabds[i], 2*i);
-		abd_iter_init(&xiters[i], tabds[i], 2*i+1);
+		cabds_is_gang_abd[i] = abd_is_gang(cabds[i]);
+		tabds_is_gang_abd[i] = abd_is_gang(tabds[i]);
+		c_cabds[i] =
+		    abd_init_abd_iter(cabds[i], &citers[i], 0);
+		c_tabds[i] =
+		    abd_init_abd_iter(tabds[i], &xiters[i], 0);
 	}
 
-	local_irq_save(flags);
+	abd_enter_critical(flags);
 	while (tsize > 0) {
 
 		for (i = 0; i < parity; i++) {
+			/*
+			 * If we are at the end of the gang ABD we
+			 * are done.
+			 */
+			if (cabds_is_gang_abd[i] && !c_cabds[i])
+				break;
+			if (tabds_is_gang_abd[i] && !c_tabds[i])
+				break;
 			abd_iter_map(&citers[i]);
 			abd_iter_map(&xiters[i]);
 			caddrs[i] = citers[i].iter_mapaddr;
@@ -1501,11 +1192,11 @@
 			case 3:
 				len = MIN(xiters[2].iter_mapsize, len);
 				len = MIN(citers[2].iter_mapsize, len);
-				/* falls through */
+				fallthrough;
 			case 2:
 				len = MIN(xiters[1].iter_mapsize, len);
 				len = MIN(citers[1].iter_mapsize, len);
-				/* falls through */
+				fallthrough;
 			case 1:
 				len = MIN(xiters[0].iter_mapsize, len);
 				len = MIN(citers[0].iter_mapsize, len);
@@ -1523,87 +1214,16 @@
 		for (i = parity-1; i >= 0; i--) {
 			abd_iter_unmap(&xiters[i]);
 			abd_iter_unmap(&citers[i]);
-			abd_iter_advance(&xiters[i], len);
-			abd_iter_advance(&citers[i], len);
+			c_tabds[i] =
+			    abd_advance_abd_iter(tabds[i], c_tabds[i],
+			    &xiters[i], len);
+			c_cabds[i] =
+			    abd_advance_abd_iter(cabds[i], c_cabds[i],
+			    &citers[i], len);
 		}
 
 		tsize -= len;
 		ASSERT3S(tsize, >=, 0);
 	}
-	local_irq_restore(flags);
+	abd_exit_critical(flags);
 }
-
-#if defined(_KERNEL)
-/*
- * bio_nr_pages for ABD.
- * @off is the offset in @abd
- */
-unsigned long
-abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
-{
-	unsigned long pos;
-
-	if (abd_is_linear(abd))
-		pos = (unsigned long)abd_to_buf(abd) + off;
-	else
-		pos = abd->abd_u.abd_scatter.abd_offset + off;
-
-	return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
-	    (pos >> PAGE_SHIFT);
-}
-
-/*
- * bio_map for scatter ABD.
- * @off is the offset in @abd
- * Remaining IO size is returned
- */
-unsigned int
-abd_scatter_bio_map_off(struct bio *bio, abd_t *abd,
-    unsigned int io_size, size_t off)
-{
-	int i;
-	struct abd_iter aiter;
-
-	ASSERT(!abd_is_linear(abd));
-	ASSERT3U(io_size, <=, abd->abd_size - off);
-
-	abd_iter_init(&aiter, abd, 0);
-	abd_iter_advance(&aiter, off);
-
-	for (i = 0; i < bio->bi_max_vecs; i++) {
-		struct page *pg;
-		size_t len, sgoff, pgoff;
-		struct scatterlist *sg;
-
-		if (io_size <= 0)
-			break;
-
-		sg = aiter.iter_sg;
-		sgoff = aiter.iter_offset;
-		pgoff = sgoff & (PAGESIZE - 1);
-		len = MIN(io_size, PAGESIZE - pgoff);
-		ASSERT(len > 0);
-
-		pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);
-		if (bio_add_page(bio, pg, len, pgoff) != len)
-			break;
-
-		io_size -= len;
-		abd_iter_advance(&aiter, len);
-	}
-
-	return (io_size);
-}
-
-/* Tunable Parameters */
-module_param(zfs_abd_scatter_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_abd_scatter_enabled,
-	"Toggle whether ABD allocations must be linear.");
-module_param(zfs_abd_scatter_min_size, int, 0644);
-MODULE_PARM_DESC(zfs_abd_scatter_min_size,
-	"Minimum size of scatter allocations.");
-/* CSTYLED */
-module_param(zfs_abd_scatter_max_order, uint, 0644);
-MODULE_PARM_DESC(zfs_abd_scatter_max_order,
-	"Maximum order allocation used for a scatter ABD.");
-#endif

diff --git a/zfs/module/zfs/aggsum.c b/zfs/module/zfs/aggsum.c
index ace3a83..c4ea4f8 100644
--- a/zfs/module/zfs/aggsum.c
+++ b/zfs/module/zfs/aggsum.c

@@ -70,14 +70,19 @@
  * zeroing out the borrowed value (forcing that thread to borrow on its next
  * request, which will also be expensive).  This is what makes aggsums well
  * suited for write-many read-rarely operations.
+ *
+ * Note that the aggsums do not expand if more CPUs are hot-added. In that
+ * case, we will have less fanout than boot_ncpus, but we don't want to always
+ * reserve the RAM necessary to create the extra slots for additional CPUs up
+ * front, and dynamically adding them is a complex task.
  */
 
 /*
- * We will borrow aggsum_borrow_multiplier times the current request, so we will
- * have to get the as_lock approximately every aggsum_borrow_multiplier calls to
- * aggsum_delta().
+ * We will borrow 2^aggsum_borrow_shift times the current request, so we will
+ * have to get the as_lock approximately every 2^aggsum_borrow_shift calls to
+ * aggsum_add().
  */
-static uint_t aggsum_borrow_multiplier = 10;
+static uint_t aggsum_borrow_shift = 4;
 
 void
 aggsum_init(aggsum_t *as, uint64_t value)
@@ -85,9 +90,14 @@
 	bzero(as, sizeof (*as));
 	as->as_lower_bound = as->as_upper_bound = value;
 	mutex_init(&as->as_lock, NULL, MUTEX_DEFAULT, NULL);
-	as->as_numbuckets = boot_ncpus;
-	as->as_buckets = kmem_zalloc(boot_ncpus * sizeof (aggsum_bucket_t),
-	    KM_SLEEP);
+	/*
+	 * Too many buckets may hurt read performance without improving
+	 * write.  From 12 CPUs use bucket per 2 CPUs, from 48 per 4, etc.
+	 */
+	as->as_bucketshift = highbit64(boot_ncpus / 6) / 2;
+	as->as_numbuckets = ((boot_ncpus - 1) >> as->as_bucketshift) + 1;
+	as->as_buckets = kmem_zalloc(as->as_numbuckets *
+	    sizeof (aggsum_bucket_t), KM_SLEEP);
 	for (int i = 0; i < as->as_numbuckets; i++) {
 		mutex_init(&as->as_buckets[i].asc_lock,
 		    NULL, MUTEX_DEFAULT, NULL);
@@ -106,100 +116,91 @@
 int64_t
 aggsum_lower_bound(aggsum_t *as)
 {
-	return (as->as_lower_bound);
+	return (atomic_load_64((volatile uint64_t *)&as->as_lower_bound));
 }
 
-int64_t
+uint64_t
 aggsum_upper_bound(aggsum_t *as)
 {
-	return (as->as_upper_bound);
-}
-
-static void
-aggsum_flush_bucket(aggsum_t *as, struct aggsum_bucket *asb)
-{
-	ASSERT(MUTEX_HELD(&as->as_lock));
-	ASSERT(MUTEX_HELD(&asb->asc_lock));
-
-	/*
-	 * We use atomic instructions for this because we read the upper and
-	 * lower bounds without the lock, so we need stores to be atomic.
-	 */
-	atomic_add_64((volatile uint64_t *)&as->as_lower_bound, asb->asc_delta);
-	atomic_add_64((volatile uint64_t *)&as->as_upper_bound, asb->asc_delta);
-	asb->asc_delta = 0;
-	atomic_add_64((volatile uint64_t *)&as->as_upper_bound,
-	    -asb->asc_borrowed);
-	atomic_add_64((volatile uint64_t *)&as->as_lower_bound,
-	    asb->asc_borrowed);
-	asb->asc_borrowed = 0;
+	return (atomic_load_64(&as->as_upper_bound));
 }
 
 uint64_t
 aggsum_value(aggsum_t *as)
 {
-	int64_t rv;
+	int64_t lb;
+	uint64_t ub;
 
 	mutex_enter(&as->as_lock);
-	if (as->as_lower_bound == as->as_upper_bound) {
-		rv = as->as_lower_bound;
+	lb = as->as_lower_bound;
+	ub = as->as_upper_bound;
+	if (lb == ub) {
 		for (int i = 0; i < as->as_numbuckets; i++) {
 			ASSERT0(as->as_buckets[i].asc_delta);
 			ASSERT0(as->as_buckets[i].asc_borrowed);
 		}
 		mutex_exit(&as->as_lock);
-		return (rv);
+		return (lb);
 	}
 	for (int i = 0; i < as->as_numbuckets; i++) {
 		struct aggsum_bucket *asb = &as->as_buckets[i];
+		if (asb->asc_borrowed == 0)
+			continue;
 		mutex_enter(&asb->asc_lock);
-		aggsum_flush_bucket(as, asb);
+		lb += asb->asc_delta + asb->asc_borrowed;
+		ub += asb->asc_delta - asb->asc_borrowed;
+		asb->asc_delta = 0;
+		asb->asc_borrowed = 0;
 		mutex_exit(&asb->asc_lock);
 	}
-	VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound);
-	rv = as->as_lower_bound;
+	ASSERT3U(lb, ==, ub);
+	atomic_store_64((volatile uint64_t *)&as->as_lower_bound, lb);
+	atomic_store_64(&as->as_upper_bound, lb);
 	mutex_exit(&as->as_lock);
 
-	return (rv);
-}
-
-static void
-aggsum_borrow(aggsum_t *as, int64_t delta, struct aggsum_bucket *asb)
-{
-	int64_t abs_delta = (delta < 0 ? -delta : delta);
-	mutex_enter(&as->as_lock);
-	mutex_enter(&asb->asc_lock);
-
-	aggsum_flush_bucket(as, asb);
-
-	atomic_add_64((volatile uint64_t *)&as->as_upper_bound, abs_delta);
-	atomic_add_64((volatile uint64_t *)&as->as_lower_bound, -abs_delta);
-	asb->asc_borrowed = abs_delta;
-
-	mutex_exit(&asb->asc_lock);
-	mutex_exit(&as->as_lock);
+	return (lb);
 }
 
 void
 aggsum_add(aggsum_t *as, int64_t delta)
 {
 	struct aggsum_bucket *asb;
+	int64_t borrow;
 
-	kpreempt_disable();
-	asb = &as->as_buckets[CPU_SEQID % as->as_numbuckets];
-	kpreempt_enable();
+	asb = &as->as_buckets[(CPU_SEQID_UNSTABLE >> as->as_bucketshift) %
+	    as->as_numbuckets];
 
-	for (;;) {
-		mutex_enter(&asb->asc_lock);
-		if (asb->asc_delta + delta <= (int64_t)asb->asc_borrowed &&
-		    asb->asc_delta + delta >= -(int64_t)asb->asc_borrowed) {
-			asb->asc_delta += delta;
-			mutex_exit(&asb->asc_lock);
-			return;
-		}
+	/* Try fast path if we already borrowed enough before. */
+	mutex_enter(&asb->asc_lock);
+	if (asb->asc_delta + delta <= (int64_t)asb->asc_borrowed &&
+	    asb->asc_delta + delta >= -(int64_t)asb->asc_borrowed) {
+		asb->asc_delta += delta;
 		mutex_exit(&asb->asc_lock);
-		aggsum_borrow(as, delta * aggsum_borrow_multiplier, asb);
+		return;
 	}
+	mutex_exit(&asb->asc_lock);
+
+	/*
+	 * We haven't borrowed enough.  Take the global lock and borrow
+	 * considering what is requested now and what we borrowed before.
+	 */
+	borrow = (delta < 0 ? -delta : delta);
+	borrow <<= aggsum_borrow_shift + as->as_bucketshift;
+	mutex_enter(&as->as_lock);
+	if (borrow >= asb->asc_borrowed)
+		borrow -= asb->asc_borrowed;
+	else
+		borrow = (borrow - (int64_t)asb->asc_borrowed) / 4;
+	mutex_enter(&asb->asc_lock);
+	delta += asb->asc_delta;
+	asb->asc_delta = 0;
+	asb->asc_borrowed += borrow;
+	mutex_exit(&asb->asc_lock);
+	atomic_store_64((volatile uint64_t *)&as->as_lower_bound,
+	    as->as_lower_bound + delta - borrow);
+	atomic_store_64(&as->as_upper_bound,
+	    as->as_upper_bound + delta + borrow);
+	mutex_exit(&as->as_lock);
 }
 
 /*
@@ -210,27 +211,35 @@
 int
 aggsum_compare(aggsum_t *as, uint64_t target)
 {
-	if (as->as_upper_bound < target)
+	int64_t lb;
+	uint64_t ub;
+	int i;
+
+	if (atomic_load_64(&as->as_upper_bound) < target)
 		return (-1);
-	if (as->as_lower_bound > target)
+	lb = atomic_load_64((volatile uint64_t *)&as->as_lower_bound);
+	if (lb > 0 && (uint64_t)lb > target)
 		return (1);
 	mutex_enter(&as->as_lock);
-	for (int i = 0; i < as->as_numbuckets; i++) {
+	lb = as->as_lower_bound;
+	ub = as->as_upper_bound;
+	for (i = 0; i < as->as_numbuckets; i++) {
 		struct aggsum_bucket *asb = &as->as_buckets[i];
+		if (asb->asc_borrowed == 0)
+			continue;
 		mutex_enter(&asb->asc_lock);
-		aggsum_flush_bucket(as, asb);
+		lb += asb->asc_delta + asb->asc_borrowed;
+		ub += asb->asc_delta - asb->asc_borrowed;
+		asb->asc_delta = 0;
+		asb->asc_borrowed = 0;
 		mutex_exit(&asb->asc_lock);
-		if (as->as_upper_bound < target) {
-			mutex_exit(&as->as_lock);
-			return (-1);
-		}
-		if (as->as_lower_bound > target) {
-			mutex_exit(&as->as_lock);
-			return (1);
-		}
+		if (ub < target || (lb > 0 && (uint64_t)lb > target))
+			break;
 	}
-	VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound);
-	ASSERT3U(as->as_lower_bound, ==, target);
+	if (i >= as->as_numbuckets)
+		ASSERT3U(lb, ==, ub);
+	atomic_store_64((volatile uint64_t *)&as->as_lower_bound, lb);
+	atomic_store_64(&as->as_upper_bound, ub);
 	mutex_exit(&as->as_lock);
-	return (0);
+	return (ub < target ? -1 : (uint64_t)lb > target ? 1 : 0);
 }

diff --git a/zfs/module/zfs/arc.c b/zfs/module/zfs/arc.c
index dd4b0f3..1180853 100644
--- a/zfs/module/zfs/arc.c
+++ b/zfs/module/zfs/arc.c

@@ -21,9 +21,17 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2018, Joyent, Inc.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
- * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2011, 2020, Delphix. All rights reserved.
+ * Copyright (c) 2014, Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017, Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2020, George Amanakis. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2020, The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ *     under sponsorship from the FreeBSD Foundation.
  */
 
 /*
@@ -282,30 +290,26 @@
 #include <sys/zio_checksum.h>
 #include <sys/zfs_context.h>
 #include <sys/arc.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/dsl_pool.h>
-#include <sys/zio_checksum.h>
 #include <sys/multilist.h>
 #include <sys/abd.h>
 #include <sys/zil.h>
 #include <sys/fm/fs/zfs.h>
-#ifdef _KERNEL
-#include <sys/shrinker.h>
-#include <sys/vmsystm.h>
-#include <sys/zpl.h>
-#include <linux/page_compat.h>
-#include <linux/mod_compat.h>
-#endif
 #include <sys/callb.h>
 #include <sys/kstat.h>
 #include <sys/zthr.h>
 #include <zfs_fletcher.h>
 #include <sys/arc_impl.h>
-#include <sys/trace_arc.h>
+#include <sys/trace_zfs.h>
 #include <sys/aggsum.h>
-#include <sys/cityhash.h>
+#include <sys/wmsum.h>
+#include <cityhash.h>
+#include <sys/vdev_trim.h>
+#include <sys/zfs_racct.h>
+#include <sys/zstd/zstd.h>
 
 #ifndef _KERNEL
 /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
@@ -317,17 +321,40 @@
  * calling arc_kmem_reap_soon() plus arc_reduce_target_size(), which improves
  * arc_available_memory().
  */
-static zthr_t		*arc_reap_zthr;
+static zthr_t *arc_reap_zthr;
 
 /*
  * This thread's job is to keep arc_size under arc_c, by calling
- * arc_adjust(), which improves arc_is_overflowing().
+ * arc_evict(), which improves arc_is_overflowing().
  */
-static zthr_t		*arc_adjust_zthr;
+static zthr_t *arc_evict_zthr;
+static arc_buf_hdr_t **arc_state_evict_markers;
+static int arc_state_evict_marker_count;
 
-static kmutex_t		arc_adjust_lock;
-static kcondvar_t	arc_adjust_waiters_cv;
-static boolean_t	arc_adjust_needed = B_FALSE;
+static kmutex_t arc_evict_lock;
+static boolean_t arc_evict_needed = B_FALSE;
+
+/*
+ * Count of bytes evicted since boot.
+ */
+static uint64_t arc_evict_count;
+
+/*
+ * List of arc_evict_waiter_t's, representing threads waiting for the
+ * arc_evict_count to reach specific values.
+ */
+static list_t arc_evict_waiters;
+
+/*
+ * When arc_is_overflowing(), arc_get_data_impl() waits for this percent of
+ * the requested amount of data to be evicted.  For example, by default for
+ * every 2KB that's evicted, 1KB of it may be "reused" by a new allocation.
+ * Since this is above 100%, it ensures that progress is made towards getting
+ * arc_size under arc_c.  Since this is finite, it ensures that allocations
+ * can still happen, even during the potentially long time that arc_size is
+ * more than arc_c.
+ */
+int zfs_arc_eviction_pct = 200;
 
 /*
  * The number of headers to evict in arc_evict_state_impl() before
@@ -339,7 +366,7 @@
 int zfs_arc_evict_batch_limit = 10;
 
 /* number of seconds before growing cache again */
-static int arc_grow_retry = 5;
+int arc_grow_retry = 5;
 
 /*
  * Minimum time between calls to arc_kmem_reap_soon().
@@ -353,11 +380,11 @@
 int arc_p_min_shift = 4;
 
 /* log2(fraction of arc to reclaim) */
-static int arc_shrink_shift = 7;
+int arc_shrink_shift = 7;
 
 /* percent of pagecache to reclaim arc to */
 #ifdef _KERNEL
-static uint_t zfs_arc_pc_percent = 0;
+uint_t zfs_arc_pc_percent = 0;
 #endif
 
 /*
@@ -385,19 +412,9 @@
 int arc_lotsfree_percent = 10;
 
 /*
- * hdr_recl() uses this to determine if the arc is up and running.
- */
-static boolean_t arc_initialized;
-
-/*
  * The arc has filled available memory and has now warmed up.
  */
-static boolean_t arc_warm;
-
-/*
- * log2 fraction of the zio arena to keep free.
- */
-int arc_zio_arena_free_shift = 2;
+boolean_t arc_warm;
 
 /*
  * These tunables are for performance analysis.
@@ -448,290 +465,20 @@
 int zfs_arc_meta_adjust_restarts = 4096;
 int zfs_arc_lotsfree_percent = 10;
 
+/*
+ * Number of arc_prune threads
+ */
+static int zfs_arc_prune_task_threads = 1;
+
 /* The 6 states: */
-static arc_state_t ARC_anon;
-static arc_state_t ARC_mru;
-static arc_state_t ARC_mru_ghost;
-static arc_state_t ARC_mfu;
-static arc_state_t ARC_mfu_ghost;
-static arc_state_t ARC_l2c_only;
+arc_state_t ARC_anon;
+arc_state_t ARC_mru;
+arc_state_t ARC_mru_ghost;
+arc_state_t ARC_mfu;
+arc_state_t ARC_mfu_ghost;
+arc_state_t ARC_l2c_only;
 
-typedef struct arc_stats {
-	kstat_named_t arcstat_hits;
-	kstat_named_t arcstat_misses;
-	kstat_named_t arcstat_demand_data_hits;
-	kstat_named_t arcstat_demand_data_misses;
-	kstat_named_t arcstat_demand_metadata_hits;
-	kstat_named_t arcstat_demand_metadata_misses;
-	kstat_named_t arcstat_prefetch_data_hits;
-	kstat_named_t arcstat_prefetch_data_misses;
-	kstat_named_t arcstat_prefetch_metadata_hits;
-	kstat_named_t arcstat_prefetch_metadata_misses;
-	kstat_named_t arcstat_mru_hits;
-	kstat_named_t arcstat_mru_ghost_hits;
-	kstat_named_t arcstat_mfu_hits;
-	kstat_named_t arcstat_mfu_ghost_hits;
-	kstat_named_t arcstat_deleted;
-	/*
-	 * Number of buffers that could not be evicted because the hash lock
-	 * was held by another thread.  The lock may not necessarily be held
-	 * by something using the same buffer, since hash locks are shared
-	 * by multiple buffers.
-	 */
-	kstat_named_t arcstat_mutex_miss;
-	/*
-	 * Number of buffers skipped when updating the access state due to the
-	 * header having already been released after acquiring the hash lock.
-	 */
-	kstat_named_t arcstat_access_skip;
-	/*
-	 * Number of buffers skipped because they have I/O in progress, are
-	 * indirect prefetch buffers that have not lived long enough, or are
-	 * not from the spa we're trying to evict from.
-	 */
-	kstat_named_t arcstat_evict_skip;
-	/*
-	 * Number of times arc_evict_state() was unable to evict enough
-	 * buffers to reach its target amount.
-	 */
-	kstat_named_t arcstat_evict_not_enough;
-	kstat_named_t arcstat_evict_l2_cached;
-	kstat_named_t arcstat_evict_l2_eligible;
-	kstat_named_t arcstat_evict_l2_ineligible;
-	kstat_named_t arcstat_evict_l2_skip;
-	kstat_named_t arcstat_hash_elements;
-	kstat_named_t arcstat_hash_elements_max;
-	kstat_named_t arcstat_hash_collisions;
-	kstat_named_t arcstat_hash_chains;
-	kstat_named_t arcstat_hash_chain_max;
-	kstat_named_t arcstat_p;
-	kstat_named_t arcstat_c;
-	kstat_named_t arcstat_c_min;
-	kstat_named_t arcstat_c_max;
-	/* Not updated directly; only synced in arc_kstat_update. */
-	kstat_named_t arcstat_size;
-	/*
-	 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
-	 * Note that the compressed bytes may match the uncompressed bytes
-	 * if the block is either not compressed or compressed arc is disabled.
-	 */
-	kstat_named_t arcstat_compressed_size;
-	/*
-	 * Uncompressed size of the data stored in b_pabd. If compressed
-	 * arc is disabled then this value will be identical to the stat
-	 * above.
-	 */
-	kstat_named_t arcstat_uncompressed_size;
-	/*
-	 * Number of bytes stored in all the arc_buf_t's. This is classified
-	 * as "overhead" since this data is typically short-lived and will
-	 * be evicted from the arc when it becomes unreferenced unless the
-	 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
-	 * values have been set (see comment in dbuf.c for more information).
-	 */
-	kstat_named_t arcstat_overhead_size;
-	/*
-	 * Number of bytes consumed by internal ARC structures necessary
-	 * for tracking purposes; these structures are not actually
-	 * backed by ARC buffers. This includes arc_buf_hdr_t structures
-	 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
-	 * caches), and arc_buf_t structures (allocated via arc_buf_t
-	 * cache).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_hdr_size;
-	/*
-	 * Number of bytes consumed by ARC buffers of type equal to
-	 * ARC_BUFC_DATA. This is generally consumed by buffers backing
-	 * on disk user data (e.g. plain file contents).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_data_size;
-	/*
-	 * Number of bytes consumed by ARC buffers of type equal to
-	 * ARC_BUFC_METADATA. This is generally consumed by buffers
-	 * backing on disk data that is used for internal ZFS
-	 * structures (e.g. ZAP, dnode, indirect blocks, etc).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_metadata_size;
-	/*
-	 * Number of bytes consumed by dmu_buf_impl_t objects.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_dbuf_size;
-	/*
-	 * Number of bytes consumed by dnode_t objects.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_dnode_size;
-	/*
-	 * Number of bytes consumed by bonus buffers.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_bonus_size;
-	/*
-	 * Total number of bytes consumed by ARC buffers residing in the
-	 * arc_anon state. This includes *all* buffers in the arc_anon
-	 * state; e.g. data, metadata, evictable, and unevictable buffers
-	 * are all included in this value.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_anon_size;
-	/*
-	 * Number of bytes consumed by ARC buffers that meet the
-	 * following criteria: backing buffers of type ARC_BUFC_DATA,
-	 * residing in the arc_anon state, and are eligible for eviction
-	 * (e.g. have no outstanding holds on the buffer).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_anon_evictable_data;
-	/*
-	 * Number of bytes consumed by ARC buffers that meet the
-	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
-	 * residing in the arc_anon state, and are eligible for eviction
-	 * (e.g. have no outstanding holds on the buffer).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_anon_evictable_metadata;
-	/*
-	 * Total number of bytes consumed by ARC buffers residing in the
-	 * arc_mru state. This includes *all* buffers in the arc_mru
-	 * state; e.g. data, metadata, evictable, and unevictable buffers
-	 * are all included in this value.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mru_size;
-	/*
-	 * Number of bytes consumed by ARC buffers that meet the
-	 * following criteria: backing buffers of type ARC_BUFC_DATA,
-	 * residing in the arc_mru state, and are eligible for eviction
-	 * (e.g. have no outstanding holds on the buffer).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mru_evictable_data;
-	/*
-	 * Number of bytes consumed by ARC buffers that meet the
-	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
-	 * residing in the arc_mru state, and are eligible for eviction
-	 * (e.g. have no outstanding holds on the buffer).
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mru_evictable_metadata;
-	/*
-	 * Total number of bytes that *would have been* consumed by ARC
-	 * buffers in the arc_mru_ghost state. The key thing to note
-	 * here, is the fact that this size doesn't actually indicate
-	 * RAM consumption. The ghost lists only consist of headers and
-	 * don't actually have ARC buffers linked off of these headers.
-	 * Thus, *if* the headers had associated ARC buffers, these
-	 * buffers *would have* consumed this number of bytes.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mru_ghost_size;
-	/*
-	 * Number of bytes that *would have been* consumed by ARC
-	 * buffers that are eligible for eviction, of type
-	 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mru_ghost_evictable_data;
-	/*
-	 * Number of bytes that *would have been* consumed by ARC
-	 * buffers that are eligible for eviction, of type
-	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mru_ghost_evictable_metadata;
-	/*
-	 * Total number of bytes consumed by ARC buffers residing in the
-	 * arc_mfu state. This includes *all* buffers in the arc_mfu
-	 * state; e.g. data, metadata, evictable, and unevictable buffers
-	 * are all included in this value.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mfu_size;
-	/*
-	 * Number of bytes consumed by ARC buffers that are eligible for
-	 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
-	 * state.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mfu_evictable_data;
-	/*
-	 * Number of bytes consumed by ARC buffers that are eligible for
-	 * eviction, of type ARC_BUFC_METADATA, and reside in the
-	 * arc_mfu state.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mfu_evictable_metadata;
-	/*
-	 * Total number of bytes that *would have been* consumed by ARC
-	 * buffers in the arc_mfu_ghost state. See the comment above
-	 * arcstat_mru_ghost_size for more details.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mfu_ghost_size;
-	/*
-	 * Number of bytes that *would have been* consumed by ARC
-	 * buffers that are eligible for eviction, of type
-	 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mfu_ghost_evictable_data;
-	/*
-	 * Number of bytes that *would have been* consumed by ARC
-	 * buffers that are eligible for eviction, of type
-	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
-	 * Not updated directly; only synced in arc_kstat_update.
-	 */
-	kstat_named_t arcstat_mfu_ghost_evictable_metadata;
-	kstat_named_t arcstat_l2_hits;
-	kstat_named_t arcstat_l2_misses;
-	kstat_named_t arcstat_l2_feeds;
-	kstat_named_t arcstat_l2_rw_clash;
-	kstat_named_t arcstat_l2_read_bytes;
-	kstat_named_t arcstat_l2_write_bytes;
-	kstat_named_t arcstat_l2_writes_sent;
-	kstat_named_t arcstat_l2_writes_done;
-	kstat_named_t arcstat_l2_writes_error;
-	kstat_named_t arcstat_l2_writes_lock_retry;
-	kstat_named_t arcstat_l2_evict_lock_retry;
-	kstat_named_t arcstat_l2_evict_reading;
-	kstat_named_t arcstat_l2_evict_l1cached;
-	kstat_named_t arcstat_l2_free_on_write;
-	kstat_named_t arcstat_l2_abort_lowmem;
-	kstat_named_t arcstat_l2_cksum_bad;
-	kstat_named_t arcstat_l2_io_error;
-	kstat_named_t arcstat_l2_lsize;
-	kstat_named_t arcstat_l2_psize;
-	/* Not updated directly; only synced in arc_kstat_update. */
-	kstat_named_t arcstat_l2_hdr_size;
-	kstat_named_t arcstat_memory_throttle_count;
-	kstat_named_t arcstat_memory_direct_count;
-	kstat_named_t arcstat_memory_indirect_count;
-	kstat_named_t arcstat_memory_all_bytes;
-	kstat_named_t arcstat_memory_free_bytes;
-	kstat_named_t arcstat_memory_available_bytes;
-	kstat_named_t arcstat_no_grow;
-	kstat_named_t arcstat_tempreserve;
-	kstat_named_t arcstat_loaned_bytes;
-	kstat_named_t arcstat_prune;
-	/* Not updated directly; only synced in arc_kstat_update. */
-	kstat_named_t arcstat_meta_used;
-	kstat_named_t arcstat_meta_limit;
-	kstat_named_t arcstat_dnode_limit;
-	kstat_named_t arcstat_meta_max;
-	kstat_named_t arcstat_meta_min;
-	kstat_named_t arcstat_async_upgrade_sync;
-	kstat_named_t arcstat_demand_hit_predictive_prefetch;
-	kstat_named_t arcstat_demand_hit_prescient_prefetch;
-	kstat_named_t arcstat_need_free;
-	kstat_named_t arcstat_sys_free;
-	kstat_named_t arcstat_raw_size;
-} arc_stats_t;
-
-static arc_stats_t arc_stats = {
+arc_stats_t arc_stats = {
 	{ "hits",			KSTAT_DATA_UINT64 },
 	{ "misses",			KSTAT_DATA_UINT64 },
 	{ "demand_data_hits",		KSTAT_DATA_UINT64 },
@@ -753,6 +500,8 @@
 	{ "evict_not_enough",		KSTAT_DATA_UINT64 },
 	{ "evict_l2_cached",		KSTAT_DATA_UINT64 },
 	{ "evict_l2_eligible",		KSTAT_DATA_UINT64 },
+	{ "evict_l2_eligible_mfu",	KSTAT_DATA_UINT64 },
+	{ "evict_l2_eligible_mru",	KSTAT_DATA_UINT64 },
 	{ "evict_l2_ineligible",	KSTAT_DATA_UINT64 },
 	{ "evict_l2_skip",		KSTAT_DATA_UINT64 },
 	{ "hash_elements",		KSTAT_DATA_UINT64 },
@@ -774,6 +523,9 @@
 	{ "dbuf_size",			KSTAT_DATA_UINT64 },
 	{ "dnode_size",			KSTAT_DATA_UINT64 },
 	{ "bonus_size",			KSTAT_DATA_UINT64 },
+#if defined(COMPAT_FREEBSD11)
+	{ "other_size",			KSTAT_DATA_UINT64 },
+#endif
 	{ "anon_size",			KSTAT_DATA_UINT64 },
 	{ "anon_evictable_data",	KSTAT_DATA_UINT64 },
 	{ "anon_evictable_metadata",	KSTAT_DATA_UINT64 },
@@ -791,6 +543,11 @@
 	{ "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
 	{ "l2_hits",			KSTAT_DATA_UINT64 },
 	{ "l2_misses",			KSTAT_DATA_UINT64 },
+	{ "l2_prefetch_asize",		KSTAT_DATA_UINT64 },
+	{ "l2_mru_asize",		KSTAT_DATA_UINT64 },
+	{ "l2_mfu_asize",		KSTAT_DATA_UINT64 },
+	{ "l2_bufc_data_asize",		KSTAT_DATA_UINT64 },
+	{ "l2_bufc_metadata_asize",	KSTAT_DATA_UINT64 },
 	{ "l2_feeds",			KSTAT_DATA_UINT64 },
 	{ "l2_rw_clash",		KSTAT_DATA_UINT64 },
 	{ "l2_read_bytes",		KSTAT_DATA_UINT64 },
@@ -809,6 +566,22 @@
 	{ "l2_size",			KSTAT_DATA_UINT64 },
 	{ "l2_asize",			KSTAT_DATA_UINT64 },
 	{ "l2_hdr_size",		KSTAT_DATA_UINT64 },
+	{ "l2_log_blk_writes",		KSTAT_DATA_UINT64 },
+	{ "l2_log_blk_avg_asize",	KSTAT_DATA_UINT64 },
+	{ "l2_log_blk_asize",		KSTAT_DATA_UINT64 },
+	{ "l2_log_blk_count",		KSTAT_DATA_UINT64 },
+	{ "l2_data_to_meta_ratio",	KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_success",		KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_unsupported",	KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_io_errors",	KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_dh_errors",	KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_cksum_lb_errors",	KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_lowmem",		KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_size",		KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_asize",		KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_bufs",		KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_bufs_precached",	KSTAT_DATA_UINT64 },
+	{ "l2_rebuild_log_blks",	KSTAT_DATA_UINT64 },
 	{ "memory_throttle_count",	KSTAT_DATA_UINT64 },
 	{ "memory_direct_count",	KSTAT_DATA_UINT64 },
 	{ "memory_indirect_count",	KSTAT_DATA_UINT64 },
@@ -829,16 +602,12 @@
 	{ "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 },
 	{ "arc_need_free",		KSTAT_DATA_UINT64 },
 	{ "arc_sys_free",		KSTAT_DATA_UINT64 },
-	{ "arc_raw_size",		KSTAT_DATA_UINT64 }
+	{ "arc_raw_size",		KSTAT_DATA_UINT64 },
+	{ "cached_only_in_progress",	KSTAT_DATA_UINT64 },
+	{ "abd_chunk_waste_size",	KSTAT_DATA_UINT64 },
 };
 
-#define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
-
-#define	ARCSTAT_INCR(stat, val) \
-	atomic_add_64(&arc_stats.stat.value.ui64, (val))
-
-#define	ARCSTAT_BUMP(stat)	ARCSTAT_INCR(stat, 1)
-#define	ARCSTAT_BUMPDOWN(stat)	ARCSTAT_INCR(stat, -1)
+arc_sums_t arc_sums;
 
 #define	ARCSTAT_MAX(stat, val) {					\
 	uint64_t m;							\
@@ -847,9 +616,6 @@
 		continue;						\
 }
 
-#define	ARCSTAT_MAXSTAT(stat) \
-	ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
-
 /*
  * We define a macro to allow ARC hits/misses to be easily broken down by
  * two separate conditions, giving a total of four different subtypes for
@@ -870,13 +636,25 @@
 		}							\
 	}
 
+/*
+ * This macro allows us to use kstats as floating averages. Each time we
+ * update this kstat, we first factor it and the update value by
+ * ARCSTAT_AVG_FACTOR to shrink the new value's contribution to the overall
+ * average. This macro assumes that integer loads and stores are atomic, but
+ * is not safe for multiple writers updating the kstat in parallel (only the
+ * last writer's update will remain).
+ */
+#define	ARCSTAT_F_AVG_FACTOR	3
+#define	ARCSTAT_F_AVG(stat, value) \
+	do { \
+		uint64_t x = ARCSTAT(stat); \
+		x = x - x / ARCSTAT_F_AVG_FACTOR + \
+		    (value) / ARCSTAT_F_AVG_FACTOR; \
+		ARCSTAT(stat) = x; \
+		_NOTE(CONSTCOND) \
+	} while (0)
+
 kstat_t			*arc_ksp;
-static arc_state_t	*arc_anon;
-static arc_state_t	*arc_mru;
-static arc_state_t	*arc_mru_ghost;
-static arc_state_t	*arc_mfu;
-static arc_state_t	*arc_mfu_ghost;
-static arc_state_t	*arc_l2c_only;
 
 /*
  * There are several ARC variables that are critical to export as kstats --
@@ -886,51 +664,18 @@
  * the possibility of inconsistency by having shadow copies of the variables,
  * while still allowing the code to be readable.
  */
-#define	arc_p		ARCSTAT(arcstat_p)	/* target size of MRU */
-#define	arc_c		ARCSTAT(arcstat_c)	/* target size of cache */
-#define	arc_c_min	ARCSTAT(arcstat_c_min)	/* min target cache size */
-#define	arc_c_max	ARCSTAT(arcstat_c_max)	/* max target cache size */
-#define	arc_no_grow	ARCSTAT(arcstat_no_grow) /* do not grow cache size */
 #define	arc_tempreserve	ARCSTAT(arcstat_tempreserve)
 #define	arc_loaned_bytes	ARCSTAT(arcstat_loaned_bytes)
 #define	arc_meta_limit	ARCSTAT(arcstat_meta_limit) /* max size for metadata */
-#define	arc_dnode_limit	ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
+/* max size for dnodes */
+#define	arc_dnode_size_limit	ARCSTAT(arcstat_dnode_limit)
 #define	arc_meta_min	ARCSTAT(arcstat_meta_min) /* min size for metadata */
-#define	arc_meta_max	ARCSTAT(arcstat_meta_max) /* max size of metadata */
-#define	arc_need_free	ARCSTAT(arcstat_need_free) /* bytes to be freed */
-#define	arc_sys_free	ARCSTAT(arcstat_sys_free) /* target system free bytes */
+#define	arc_need_free	ARCSTAT(arcstat_need_free) /* waiting to be evicted */
 
-/* size of all b_rabd's in entire arc */
-#define	arc_raw_size	ARCSTAT(arcstat_raw_size)
-/* compressed size of entire arc */
-#define	arc_compressed_size	ARCSTAT(arcstat_compressed_size)
-/* uncompressed size of entire arc */
-#define	arc_uncompressed_size	ARCSTAT(arcstat_uncompressed_size)
-/* number of bytes in the arc from arc_buf_t's */
-#define	arc_overhead_size	ARCSTAT(arcstat_overhead_size)
-
-/*
- * There are also some ARC variables that we want to export, but that are
- * updated so often that having the canonical representation be the statistic
- * variable causes a performance bottleneck. We want to use aggsum_t's for these
- * instead, but still be able to export the kstat in the same way as before.
- * The solution is to always use the aggsum version, except in the kstat update
- * callback.
- */
-aggsum_t arc_size;
-aggsum_t arc_meta_used;
-aggsum_t astat_data_size;
-aggsum_t astat_metadata_size;
-aggsum_t astat_dbuf_size;
-aggsum_t astat_dnode_size;
-aggsum_t astat_bonus_size;
-aggsum_t astat_hdr_size;
-aggsum_t astat_l2_hdr_size;
-
-static hrtime_t arc_growtime;
-static list_t arc_prune_list;
-static kmutex_t arc_prune_mtx;
-static taskq_t *arc_prune_taskq;
+hrtime_t arc_growtime;
+list_t arc_prune_list;
+kmutex_t arc_prune_mtx;
+taskq_t *arc_prune_taskq;
 
 #define	GHOST_STATE(state)	\
 	((state) == arc_mru_ghost || (state) == arc_mfu_ghost ||	\
@@ -995,29 +740,18 @@
  * Hash table routines
  */
 
-#define	HT_LOCK_ALIGN	64
-#define	HT_LOCK_PAD	(P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
-
-struct ht_lock {
-	kmutex_t	ht_lock;
-#ifdef _KERNEL
-	unsigned char	pad[HT_LOCK_PAD];
-#endif
-};
-
-#define	BUF_LOCKS 8192
+#define	BUF_LOCKS 2048
 typedef struct buf_hash_table {
 	uint64_t ht_mask;
 	arc_buf_hdr_t **ht_table;
-	struct ht_lock ht_locks[BUF_LOCKS];
+	kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
 } buf_hash_table_t;
 
 static buf_hash_table_t buf_hash_table;
 
 #define	BUF_HASH_INDEX(spa, dva, birth) \
 	(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
-#define	BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
-#define	BUF_HASH_LOCK(idx)	(&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
+#define	BUF_HASH_LOCK(idx)	(&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
 #define	HDR_LOCK(hdr) \
 	(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
 
@@ -1044,9 +778,6 @@
  */
 #define	L2ARC_FEED_TYPES	4
 
-#define	l2arc_writes_sent	ARCSTAT(arcstat_l2_writes_sent)
-#define	l2arc_writes_done	ARCSTAT(arcstat_l2_writes_done)
-
 /* L2ARC Performance Tunables */
 unsigned long l2arc_write_max = L2ARC_WRITE_SIZE;	/* def max write size */
 unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra warmup write */
@@ -1057,6 +788,7 @@
 int l2arc_noprefetch = B_TRUE;			/* don't cache prefetch bufs */
 int l2arc_feed_again = B_TRUE;			/* turbo warmup */
 int l2arc_norw = B_FALSE;			/* no reads during writes */
+int l2arc_meta_percent = 33;			/* limit on headers size */
 
 /*
  * L2ARC Internals
@@ -1094,30 +826,36 @@
 	ARC_FILL_IN_PLACE	= 1 << 4  /* fill in place (special case) */
 } arc_fill_flags_t;
 
+typedef enum arc_ovf_level {
+	ARC_OVF_NONE,			/* ARC within target size. */
+	ARC_OVF_SOME,			/* ARC is slightly overflowed. */
+	ARC_OVF_SEVERE			/* ARC is severely overflowed. */
+} arc_ovf_level_t;
+
 static kmutex_t l2arc_feed_thr_lock;
 static kcondvar_t l2arc_feed_thr_cv;
 static uint8_t l2arc_thread_exit;
 
+static kmutex_t l2arc_rebuild_thr_lock;
+static kcondvar_t l2arc_rebuild_thr_cv;
+
 enum arc_hdr_alloc_flags {
 	ARC_HDR_ALLOC_RDATA = 0x1,
 	ARC_HDR_DO_ADAPT = 0x2,
+	ARC_HDR_USE_RESERVE = 0x4,
 };
 
 
-static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
+static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, int);
 static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
-static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
+static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, int);
 static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
 static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
 static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
 static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
 static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int);
 static void arc_access(arc_buf_hdr_t *, kmutex_t *);
-static boolean_t arc_is_overflowing(void);
 static void arc_buf_watch(arc_buf_t *);
-static void arc_tuning_update(void);
-static void arc_prune_async(int64_t);
-static uint64_t arc_all_memory(void);
 
 static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
 static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
@@ -1126,7 +864,101 @@
 
 static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
 static void l2arc_read_done(zio_t *);
+static void l2arc_do_free_on_write(void);
+static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
+    boolean_t state_only);
 
+#define	l2arc_hdr_arcstats_increment(hdr) \
+	l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
+#define	l2arc_hdr_arcstats_decrement(hdr) \
+	l2arc_hdr_arcstats_update((hdr), B_FALSE, B_FALSE)
+#define	l2arc_hdr_arcstats_increment_state(hdr) \
+	l2arc_hdr_arcstats_update((hdr), B_TRUE, B_TRUE)
+#define	l2arc_hdr_arcstats_decrement_state(hdr) \
+	l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
+
+/*
+ * l2arc_exclude_special : A zfs module parameter that controls whether buffers
+ * 		present on special vdevs are eligibile for caching in L2ARC. If
+ * 		set to 1, exclude dbufs on special vdevs from being cached to
+ * 		L2ARC.
+ */
+int l2arc_exclude_special = 0;
+
+/*
+ * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
+ * 		metadata and data are cached from ARC into L2ARC.
+ */
+int l2arc_mfuonly = 0;
+
+/*
+ * L2ARC TRIM
+ * l2arc_trim_ahead : A ZFS module parameter that controls how much ahead of
+ * 		the current write size (l2arc_write_max) we should TRIM if we
+ * 		have filled the device. It is defined as a percentage of the
+ * 		write size. If set to 100 we trim twice the space required to
+ * 		accommodate upcoming writes. A minimum of 64MB will be trimmed.
+ * 		It also enables TRIM of the whole L2ARC device upon creation or
+ * 		addition to an existing pool or if the header of the device is
+ * 		invalid upon importing a pool or onlining a cache device. The
+ * 		default is 0, which disables TRIM on L2ARC altogether as it can
+ * 		put significant stress on the underlying storage devices. This
+ * 		will vary depending of how well the specific device handles
+ * 		these commands.
+ */
+unsigned long l2arc_trim_ahead = 0;
+
+/*
+ * Performance tuning of L2ARC persistence:
+ *
+ * l2arc_rebuild_enabled : A ZFS module parameter that controls whether adding
+ * 		an L2ARC device (either at pool import or later) will attempt
+ * 		to rebuild L2ARC buffer contents.
+ * l2arc_rebuild_blocks_min_l2size : A ZFS module parameter that controls
+ * 		whether log blocks are written to the L2ARC device. If the L2ARC
+ * 		device is less than 1GB, the amount of data l2arc_evict()
+ * 		evicts is significant compared to the amount of restored L2ARC
+ * 		data. In this case do not write log blocks in L2ARC in order
+ * 		not to waste space.
+ */
+int l2arc_rebuild_enabled = B_TRUE;
+unsigned long l2arc_rebuild_blocks_min_l2size = 1024 * 1024 * 1024;
+
+/* L2ARC persistence rebuild control routines. */
+void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
+static void l2arc_dev_rebuild_thread(void *arg);
+static int l2arc_rebuild(l2arc_dev_t *dev);
+
+/* L2ARC persistence read I/O routines. */
+static int l2arc_dev_hdr_read(l2arc_dev_t *dev);
+static int l2arc_log_blk_read(l2arc_dev_t *dev,
+    const l2arc_log_blkptr_t *this_lp, const l2arc_log_blkptr_t *next_lp,
+    l2arc_log_blk_phys_t *this_lb, l2arc_log_blk_phys_t *next_lb,
+    zio_t *this_io, zio_t **next_io);
+static zio_t *l2arc_log_blk_fetch(vdev_t *vd,
+    const l2arc_log_blkptr_t *lp, l2arc_log_blk_phys_t *lb);
+static void l2arc_log_blk_fetch_abort(zio_t *zio);
+
+/* L2ARC persistence block restoration routines. */
+static void l2arc_log_blk_restore(l2arc_dev_t *dev,
+    const l2arc_log_blk_phys_t *lb, uint64_t lb_asize);
+static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
+    l2arc_dev_t *dev);
+
+/* L2ARC persistence write I/O routines. */
+static uint64_t l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
+    l2arc_write_callback_t *cb);
+
+/* L2ARC persistence auxiliary routines. */
+boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
+    const l2arc_log_blkptr_t *lbp);
+static boolean_t l2arc_log_blk_insert(l2arc_dev_t *dev,
+    const arc_buf_hdr_t *ab);
+boolean_t l2arc_range_check_overlap(uint64_t bottom,
+    uint64_t top, uint64_t check);
+static void l2arc_blk_fetch_done(zio_t *zio);
+static inline uint64_t
+    l2arc_log_blk_overhead(uint64_t write_sz, l2arc_dev_t *dev);
 
 /*
  * We use Cityhash for this. It's fast, and has good hash properties without
@@ -1224,9 +1056,9 @@
 
 		ARCSTAT_MAX(arcstat_hash_chain_max, i);
 	}
-
-	ARCSTAT_BUMP(arcstat_hash_elements);
-	ARCSTAT_MAXSTAT(arcstat_hash_elements);
+	uint64_t he = atomic_inc_64_nv(
+	    &arc_stats.arcstat_hash_elements.value.ui64);
+	ARCSTAT_MAX(arcstat_hash_elements_max, he);
 
 	return (NULL);
 }
@@ -1250,7 +1082,7 @@
 	arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
 
 	/* collect some hash table performance data */
-	ARCSTAT_BUMPDOWN(arcstat_hash_elements);
+	atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64);
 
 	if (buf_hash_table.ht_table[idx] &&
 	    buf_hash_table.ht_table[idx]->b_hash_next == NULL)
@@ -1269,8 +1101,6 @@
 static void
 buf_fini(void)
 {
-	int i;
-
 #if defined(_KERNEL)
 	/*
 	 * Large allocations which do not require contiguous pages
@@ -1282,8 +1112,8 @@
 	kmem_free(buf_hash_table.ht_table,
 	    (buf_hash_table.ht_mask + 1) * sizeof (void *));
 #endif
-	for (i = 0; i < BUF_LOCKS; i++)
-		mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
+	for (int i = 0; i < BUF_LOCKS; i++)
+		mutex_destroy(BUF_HASH_LOCK(i));
 	kmem_cache_destroy(hdr_full_cache);
 	kmem_cache_destroy(hdr_full_crypt_cache);
 	kmem_cache_destroy(hdr_l2only_cache);
@@ -1294,10 +1124,10 @@
  * Constructor callback - called when the cache is empty
  * and a new buf is requested.
  */
-/* ARGSUSED */
 static int
 hdr_full_cons(void *vbuf, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	arc_buf_hdr_t *hdr = vbuf;
 
 	bzero(hdr, HDR_FULL_SIZE);
@@ -1313,10 +1143,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
 hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
 {
+	(void) unused;
 	arc_buf_hdr_t *hdr = vbuf;
 
 	hdr_full_cons(vbuf, unused, kmflag);
@@ -1326,10 +1156,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
 hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	arc_buf_hdr_t *hdr = vbuf;
 
 	bzero(hdr, HDR_L2ONLY_SIZE);
@@ -1338,10 +1168,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
 buf_cons(void *vbuf, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	arc_buf_t *buf = vbuf;
 
 	bzero(buf, sizeof (arc_buf_t));
@@ -1355,10 +1185,10 @@
  * Destructor callback - called when a cached buf is
  * no longer required.
  */
-/* ARGSUSED */
 static void
 hdr_full_dest(void *vbuf, void *unused)
 {
+	(void) unused;
 	arc_buf_hdr_t *hdr = vbuf;
 
 	ASSERT(HDR_EMPTY(hdr));
@@ -1369,52 +1199,36 @@
 	arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
 }
 
-/* ARGSUSED */
 static void
 hdr_full_crypt_dest(void *vbuf, void *unused)
 {
+	(void) unused;
 	arc_buf_hdr_t *hdr = vbuf;
 
 	hdr_full_dest(vbuf, unused);
 	arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
 }
 
-/* ARGSUSED */
 static void
 hdr_l2only_dest(void *vbuf, void *unused)
 {
-	ASSERTV(arc_buf_hdr_t *hdr = vbuf);
+	(void) unused;
+	arc_buf_hdr_t *hdr __maybe_unused = vbuf;
 
 	ASSERT(HDR_EMPTY(hdr));
 	arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 }
 
-/* ARGSUSED */
 static void
 buf_dest(void *vbuf, void *unused)
 {
+	(void) unused;
 	arc_buf_t *buf = vbuf;
 
 	mutex_destroy(&buf->b_evict_lock);
 	arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
 }
 
-/*
- * Reclaim callback -- invoked when memory is low.
- */
-/* ARGSUSED */
-static void
-hdr_recl(void *unused)
-{
-	dprintf("hdr_recl called\n");
-	/*
-	 * umem calls the reclaim func when we destroy the buf cache,
-	 * which is after we do arc_fini().
-	 */
-	if (arc_initialized)
-		zthr_wakeup(arc_reap_zthr);
-}
-
 static void
 buf_init(void)
 {
@@ -1450,12 +1264,12 @@
 	}
 
 	hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
-	    0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0);
+	    0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
 	hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
 	    HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
-	    hdr_recl, NULL, NULL, 0);
+	    NULL, NULL, NULL, 0);
 	hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
-	    HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl,
+	    HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
 	    NULL, NULL, 0);
 	buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
 	    0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
@@ -1464,10 +1278,8 @@
 		for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
 			*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
 
-	for (i = 0; i < BUF_LOCKS; i++) {
-		mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
-		    NULL, MUTEX_DEFAULT, NULL);
-	}
+	for (i = 0; i < BUF_LOCKS; i++)
+		mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
 }
 
 #define	ARC_MINTIME	(hz>>4) /* 62 ms */
@@ -1549,6 +1361,12 @@
 	    HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF);
 }
 
+uint8_t
+arc_get_complevel(arc_buf_t *buf)
+{
+	return (buf->b_hdr->b_complevel);
+}
+
 static inline boolean_t
 arc_buf_is_shared(arc_buf_t *buf)
 {
@@ -1700,11 +1518,11 @@
 void
 arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
 {
+	(void) sig, (void) unused;
 	panic("Got SIGSEGV at address: 0x%lx\n", (long)si->si_addr);
 }
 #endif
 
-/* ARGSUSED */
 static void
 arc_buf_unwatch(arc_buf_t *buf)
 {
@@ -1713,10 +1531,11 @@
 		ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
 		    PROT_READ | PROT_WRITE));
 	}
+#else
+	(void) buf;
 #endif
 }
 
-/* ARGSUSED */
 static void
 arc_buf_watch(arc_buf_t *buf)
 {
@@ -1724,6 +1543,8 @@
 	if (arc_watch)
 		ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
 		    PROT_READ));
+#else
+	(void) buf;
 #endif
 }
 
@@ -1886,6 +1707,45 @@
 }
 
 /*
+ * Allocates an ARC buf header that's in an evicted & L2-cached state.
+ * This is used during l2arc reconstruction to make empty ARC buffers
+ * which circumvent the regular disk->arc->l2arc path and instead come
+ * into being in the reverse order, i.e. l2arc->arc.
+ */
+static arc_buf_hdr_t *
+arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
+    dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth,
+    enum zio_compress compress, uint8_t complevel, boolean_t protected,
+    boolean_t prefetch, arc_state_type_t arcs_state)
+{
+	arc_buf_hdr_t	*hdr;
+
+	ASSERT(size != 0);
+	hdr = kmem_cache_alloc(hdr_l2only_cache, KM_SLEEP);
+	hdr->b_birth = birth;
+	hdr->b_type = type;
+	hdr->b_flags = 0;
+	arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L2HDR);
+	HDR_SET_LSIZE(hdr, size);
+	HDR_SET_PSIZE(hdr, psize);
+	arc_hdr_set_compress(hdr, compress);
+	hdr->b_complevel = complevel;
+	if (protected)
+		arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
+	if (prefetch)
+		arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+	hdr->b_spa = spa_load_guid(dev->l2ad_vdev->vdev_spa);
+
+	hdr->b_dva = dva;
+
+	hdr->b_l2hdr.b_dev = dev;
+	hdr->b_l2hdr.b_daddr = daddr;
+	hdr->b_l2hdr.b_arcs_state = arcs_state;
+
+	return (hdr);
+}
+
+/*
  * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t.
  */
 static uint64_t
@@ -1930,9 +1790,8 @@
 		tmpbuf = zio_buf_alloc(lsize);
 		abd = abd_get_from_buf(tmpbuf, lsize);
 		abd_take_ownership_of_buf(abd, B_TRUE);
-
 		csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
-		    hdr->b_l1hdr.b_pabd, tmpbuf, lsize);
+		    hdr->b_l1hdr.b_pabd, tmpbuf, lsize, hdr->b_complevel);
 		ASSERT3U(csize, <=, psize);
 		abd_zero_off(abd, csize, psize - csize);
 	}
@@ -2013,12 +1872,13 @@
 		 * and then loan a buffer from it, rather than allocating a
 		 * linear buffer and wrapping it in an abd later.
 		 */
-		cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
+		cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
+		    ARC_HDR_DO_ADAPT);
 		tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
 
 		ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
 		    hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
-		    HDR_GET_LSIZE(hdr));
+		    HDR_GET_LSIZE(hdr), &hdr->b_complevel);
 		if (ret != 0) {
 			abd_return_buf(cabd, tmp, arc_hdr_size(hdr));
 			goto error;
@@ -2096,7 +1956,7 @@
  * arc_buf_fill().
  */
 static void
-arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
+arc_buf_untransform_in_place(arc_buf_t *buf)
 {
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 
@@ -2200,7 +2060,7 @@
 
 			if (hash_lock != NULL)
 				mutex_enter(hash_lock);
-			arc_buf_untransform_in_place(buf, hash_lock);
+			arc_buf_untransform_in_place(buf);
 			if (hash_lock != NULL)
 				mutex_exit(hash_lock);
 
@@ -2219,7 +2079,6 @@
 	} else {
 		ASSERT(hdr_compressed);
 		ASSERT(!compressed);
-		ASSERT3U(HDR_GET_LSIZE(hdr), !=, HDR_GET_PSIZE(hdr));
 
 		/*
 		 * If the buf is sharing its data with the hdr, unlink it and
@@ -2265,7 +2124,8 @@
 		} else {
 			error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
 			    hdr->b_l1hdr.b_pabd, buf->b_data,
-			    HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
+			    HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr),
+			    &hdr->b_complevel);
 
 			/*
 			 * Absent hardware errors or software bugs, this should
@@ -2324,8 +2184,8 @@
 		 */
 		ret = SET_ERROR(EIO);
 		spa_log_error(spa, zb);
-		zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
-		    spa, NULL, zb, NULL, 0, 0);
+		(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+		    spa, NULL, zb, NULL, 0);
 	}
 
 	return (ret);
@@ -2353,7 +2213,6 @@
 		return;
 	}
 
-	ASSERT(!GHOST_STATE(state));
 	if (hdr->b_l1hdr.b_pabd != NULL) {
 		(void) zfs_refcount_add_many(&state->arcs_esize[type],
 		    arc_hdr_size(hdr), hdr);
@@ -2394,7 +2253,6 @@
 		return;
 	}
 
-	ASSERT(!GHOST_STATE(state));
 	if (hdr->b_l1hdr.b_pabd != NULL) {
 		(void) zfs_refcount_remove_many(&state->arcs_esize[type],
 		    arc_hdr_size(hdr), hdr);
@@ -2437,12 +2295,16 @@
 	    (state != arc_anon)) {
 		/* We don't use the L2-only state list. */
 		if (state != arc_l2c_only) {
-			multilist_remove(state->arcs_list[arc_buf_type(hdr)],
+			multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
 			    hdr);
 			arc_evictable_space_decrement(hdr, state);
 		}
 		/* remove the prefetch flag if we get a reference */
+		if (HDR_HAS_L2HDR(hdr))
+			l2arc_hdr_arcstats_decrement_state(hdr);
 		arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
+		if (HDR_HAS_L2HDR(hdr))
+			l2arc_hdr_arcstats_increment_state(hdr);
 	}
 }
 
@@ -2467,7 +2329,7 @@
 	 */
 	if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
 	    (state != arc_anon)) {
-		multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
+		multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
 		ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
 		arc_evictable_space_increment(hdr, state);
 	}
@@ -2484,6 +2346,7 @@
 void
 arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
 {
+	(void) state_index;
 	arc_buf_hdr_t *hdr = ab->b_hdr;
 	l1arc_buf_hdr_t *l1hdr = NULL;
 	l2arc_buf_hdr_t *l2hdr = NULL;
@@ -2570,7 +2433,7 @@
 	if (refcnt == 0) {
 		if (old_state != arc_anon && old_state != arc_l2c_only) {
 			ASSERT(HDR_HAS_L1HDR(hdr));
-			multilist_remove(old_state->arcs_list[buftype], hdr);
+			multilist_remove(&old_state->arcs_list[buftype], hdr);
 
 			if (GHOST_STATE(old_state)) {
 				ASSERT0(bufcnt);
@@ -2587,7 +2450,7 @@
 			 * beforehand.
 			 */
 			ASSERT(HDR_HAS_L1HDR(hdr));
-			multilist_insert(new_state->arcs_list[buftype], hdr);
+			multilist_insert(&new_state->arcs_list[buftype], hdr);
 
 			if (GHOST_STATE(new_state)) {
 				ASSERT0(bufcnt);
@@ -2725,15 +2588,15 @@
 		}
 	}
 
-	if (HDR_HAS_L1HDR(hdr))
+	if (HDR_HAS_L1HDR(hdr)) {
 		hdr->b_l1hdr.b_state = new_state;
 
-	/*
-	 * L2 headers should never be on the L2 state list since they don't
-	 * have L1 headers allocated.
-	 */
-	ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
-	    multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
+		if (HDR_HAS_L2HDR(hdr) && new_state != arc_l2c_only) {
+			l2arc_hdr_arcstats_decrement_state(hdr);
+			hdr->b_l2hdr.b_arcs_state = new_state->arcs_state;
+			l2arc_hdr_arcstats_increment_state(hdr);
+		}
+	}
 }
 
 void
@@ -2745,32 +2608,41 @@
 	default:
 		break;
 	case ARC_SPACE_DATA:
-		aggsum_add(&astat_data_size, space);
+		ARCSTAT_INCR(arcstat_data_size, space);
 		break;
 	case ARC_SPACE_META:
-		aggsum_add(&astat_metadata_size, space);
+		ARCSTAT_INCR(arcstat_metadata_size, space);
 		break;
 	case ARC_SPACE_BONUS:
-		aggsum_add(&astat_bonus_size, space);
+		ARCSTAT_INCR(arcstat_bonus_size, space);
 		break;
 	case ARC_SPACE_DNODE:
-		aggsum_add(&astat_dnode_size, space);
+		aggsum_add(&arc_sums.arcstat_dnode_size, space);
 		break;
 	case ARC_SPACE_DBUF:
-		aggsum_add(&astat_dbuf_size, space);
+		ARCSTAT_INCR(arcstat_dbuf_size, space);
 		break;
 	case ARC_SPACE_HDRS:
-		aggsum_add(&astat_hdr_size, space);
+		ARCSTAT_INCR(arcstat_hdr_size, space);
 		break;
 	case ARC_SPACE_L2HDRS:
-		aggsum_add(&astat_l2_hdr_size, space);
+		aggsum_add(&arc_sums.arcstat_l2_hdr_size, space);
+		break;
+	case ARC_SPACE_ABD_CHUNK_WASTE:
+		/*
+		 * Note: this includes space wasted by all scatter ABD's, not
+		 * just those allocated by the ARC.  But the vast majority of
+		 * scatter ABD's come from the ARC, because other users are
+		 * very short-lived.
+		 */
+		ARCSTAT_INCR(arcstat_abd_chunk_waste_size, space);
 		break;
 	}
 
-	if (type != ARC_SPACE_DATA)
-		aggsum_add(&arc_meta_used, space);
+	if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE)
+		aggsum_add(&arc_sums.arcstat_meta_used, space);
 
-	aggsum_add(&arc_size, space);
+	aggsum_add(&arc_sums.arcstat_size, space);
 }
 
 void
@@ -2782,42 +2654,41 @@
 	default:
 		break;
 	case ARC_SPACE_DATA:
-		aggsum_add(&astat_data_size, -space);
+		ARCSTAT_INCR(arcstat_data_size, -space);
 		break;
 	case ARC_SPACE_META:
-		aggsum_add(&astat_metadata_size, -space);
+		ARCSTAT_INCR(arcstat_metadata_size, -space);
 		break;
 	case ARC_SPACE_BONUS:
-		aggsum_add(&astat_bonus_size, -space);
+		ARCSTAT_INCR(arcstat_bonus_size, -space);
 		break;
 	case ARC_SPACE_DNODE:
-		aggsum_add(&astat_dnode_size, -space);
+		aggsum_add(&arc_sums.arcstat_dnode_size, -space);
 		break;
 	case ARC_SPACE_DBUF:
-		aggsum_add(&astat_dbuf_size, -space);
+		ARCSTAT_INCR(arcstat_dbuf_size, -space);
 		break;
 	case ARC_SPACE_HDRS:
-		aggsum_add(&astat_hdr_size, -space);
+		ARCSTAT_INCR(arcstat_hdr_size, -space);
 		break;
 	case ARC_SPACE_L2HDRS:
-		aggsum_add(&astat_l2_hdr_size, -space);
+		aggsum_add(&arc_sums.arcstat_l2_hdr_size, -space);
+		break;
+	case ARC_SPACE_ABD_CHUNK_WASTE:
+		ARCSTAT_INCR(arcstat_abd_chunk_waste_size, -space);
 		break;
 	}
 
-	if (type != ARC_SPACE_DATA) {
-		ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
-		/*
-		 * We use the upper bound here rather than the precise value
-		 * because the arc_meta_max value doesn't need to be
-		 * precise. It's only consumed by humans via arcstats.
-		 */
-		if (arc_meta_max < aggsum_upper_bound(&arc_meta_used))
-			arc_meta_max = aggsum_upper_bound(&arc_meta_used);
-		aggsum_add(&arc_meta_used, -space);
+	if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) {
+		ASSERT(aggsum_compare(&arc_sums.arcstat_meta_used,
+		    space) >= 0);
+		ARCSTAT_MAX(arcstat_meta_max,
+		    aggsum_upper_bound(&arc_sums.arcstat_meta_used));
+		aggsum_add(&arc_sums.arcstat_meta_used, -space);
 	}
 
-	ASSERT(aggsum_compare(&arc_size, space) >= 0);
-	aggsum_add(&arc_size, -space);
+	ASSERT(aggsum_compare(&arc_sums.arcstat_size, space) >= 0);
+	aggsum_add(&arc_sums.arcstat_size, -space);
 }
 
 /*
@@ -2881,12 +2752,6 @@
 	ASSERT3P(*ret, ==, NULL);
 	IMPLY(encrypted, compressed);
 
-	hdr->b_l1hdr.b_mru_hits = 0;
-	hdr->b_l1hdr.b_mru_ghost_hits = 0;
-	hdr->b_l1hdr.b_mfu_hits = 0;
-	hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-	hdr->b_l1hdr.b_l2_hits = 0;
-
 	buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
 	buf->b_hdr = hdr;
 	buf->b_data = NULL;
@@ -3004,10 +2869,10 @@
 
 arc_buf_t *
 arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type)
+    enum zio_compress compression_type, uint8_t complevel)
 {
 	arc_buf_t *buf = arc_alloc_compressed_buf(spa, arc_onloan_tag,
-	    psize, lsize, compression_type);
+	    psize, lsize, compression_type, complevel);
 
 	arc_loaned_bytes_update(arc_buf_size(buf));
 
@@ -3018,10 +2883,11 @@
 arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
     const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
     dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type)
+    enum zio_compress compression_type, uint8_t complevel)
 {
 	arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj,
-	    byteorder, salt, iv, mac, ot, psize, lsize, compression_type);
+	    byteorder, salt, iv, mac, ot, psize, lsize, compression_type,
+	    complevel);
 
 	atomic_add_64(&arc_loaned_bytes, psize);
 	return (buf);
@@ -3152,7 +3018,7 @@
 	    arc_hdr_size(hdr), hdr, buf);
 	arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
 	abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd);
-	abd_put(hdr->b_l1hdr.b_pabd);
+	abd_free(hdr->b_l1hdr.b_pabd);
 	hdr->b_l1hdr.b_pabd = NULL;
 	buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
 
@@ -3322,7 +3188,6 @@
 {
 	uint64_t size;
 	boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
-	boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
 
 	ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
 	ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3333,14 +3198,14 @@
 		size = HDR_GET_PSIZE(hdr);
 		ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
 		hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
-		    do_adapt);
+		    alloc_flags);
 		ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
 		ARCSTAT_INCR(arcstat_raw_size, size);
 	} else {
 		size = arc_hdr_size(hdr);
 		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
 		hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
-		    do_adapt);
+		    alloc_flags);
 		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
 	}
 
@@ -3386,13 +3251,34 @@
 	ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
 }
 
+/*
+ * Allocate empty anonymous ARC header.  The header will get its identity
+ * assigned and buffers attached later as part of read or write operations.
+ *
+ * In case of read arc_read() assigns header its identify (b_dva + b_birth),
+ * inserts it into ARC hash to become globally visible and allocates physical
+ * (b_pabd) or raw (b_rabd) ABD buffer to read into from disk.  On disk read
+ * completion arc_read_done() allocates ARC buffer(s) as needed, potentially
+ * sharing one of them with the physical ABD buffer.
+ *
+ * In case of write arc_alloc_buf() allocates ARC buffer to be filled with
+ * data.  Then after compression and/or encryption arc_write_ready() allocates
+ * and fills (or potentially shares) physical (b_pabd) or raw (b_rabd) ABD
+ * buffer.  On disk write completion arc_write_done() assigns the header its
+ * new identity (b_dva + b_birth) and inserts into ARC hash.
+ *
+ * In case of partial overwrite the old data is read first as described. Then
+ * arc_release() either allocates new anonymous ARC header and moves the ARC
+ * buffer to it, or reuses the old ARC header by discarding its identity and
+ * removing it from ARC hash.  After buffer modification normal write process
+ * follows as described.
+ */
 static arc_buf_hdr_t *
 arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
-    boolean_t protected, enum zio_compress compression_type,
-    arc_buf_contents_t type, boolean_t alloc_rdata)
+    boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
+    arc_buf_contents_t type)
 {
 	arc_buf_hdr_t *hdr;
-	int flags = ARC_HDR_DO_ADAPT;
 
 	VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
 	if (protected) {
@@ -3400,7 +3286,6 @@
 	} else {
 		hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
 	}
-	flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
 
 	ASSERT(HDR_EMPTY(hdr));
 	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3411,20 +3296,19 @@
 	hdr->b_flags = 0;
 	arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR);
 	arc_hdr_set_compress(hdr, compression_type);
+	hdr->b_complevel = complevel;
 	if (protected)
 		arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
 
 	hdr->b_l1hdr.b_state = arc_anon;
 	hdr->b_l1hdr.b_arc_access = 0;
+	hdr->b_l1hdr.b_mru_hits = 0;
+	hdr->b_l1hdr.b_mru_ghost_hits = 0;
+	hdr->b_l1hdr.b_mfu_hits = 0;
+	hdr->b_l1hdr.b_mfu_ghost_hits = 0;
 	hdr->b_l1hdr.b_bufcnt = 0;
 	hdr->b_l1hdr.b_buf = NULL;
 
-	/*
-	 * Allocate the hdr's buffer. This will contain either
-	 * the compressed or uncompressed data depending on the block
-	 * it references and compressed arc enablement.
-	 */
-	arc_hdr_alloc_abd(hdr, flags);
 	ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
 
 	return (hdr);
@@ -3554,7 +3438,6 @@
 	arc_buf_hdr_t *nhdr;
 	arc_buf_t *buf;
 	kmem_cache_t *ncache, *ocache;
-	unsigned nsize, osize;
 
 	/*
 	 * This function requires that hdr is in the arc_anon state.
@@ -3571,14 +3454,10 @@
 
 	if (need_crypt) {
 		ncache = hdr_full_crypt_cache;
-		nsize = sizeof (hdr->b_crypt_hdr);
 		ocache = hdr_full_cache;
-		osize = HDR_FULL_SIZE;
 	} else {
 		ncache = hdr_full_cache;
-		nsize = HDR_FULL_SIZE;
 		ocache = hdr_full_crypt_cache;
-		osize = sizeof (hdr->b_crypt_hdr);
 	}
 
 	nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
@@ -3604,7 +3483,6 @@
 	nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
 	nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
 	nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
-	nhdr->b_l1hdr.b_l2_hits = hdr->b_l1hdr.b_l2_hits;
 	nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
 	nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
 
@@ -3649,7 +3527,6 @@
 	hdr->b_l1hdr.b_mru_ghost_hits = 0;
 	hdr->b_l1hdr.b_mfu_hits = 0;
 	hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-	hdr->b_l1hdr.b_l2_hits = 0;
 	hdr->b_l1hdr.b_acb = NULL;
 	hdr->b_l1hdr.b_pabd = NULL;
 
@@ -3713,7 +3590,7 @@
 arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
 {
 	arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
-	    B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE);
+	    B_FALSE, ZIO_COMPRESS_OFF, 0, type);
 
 	arc_buf_t *buf = NULL;
 	VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
@@ -3729,7 +3606,7 @@
  */
 arc_buf_t *
 arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type)
+    enum zio_compress compression_type, uint8_t complevel)
 {
 	ASSERT3U(lsize, >, 0);
 	ASSERT3U(lsize, >=, psize);
@@ -3737,7 +3614,7 @@
 	ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
 
 	arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-	    B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE);
+	    B_FALSE, compression_type, complevel, ARC_BUFC_DATA);
 
 	arc_buf_t *buf = NULL;
 	VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
@@ -3745,16 +3622,12 @@
 	arc_buf_thaw(buf);
 	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
 
-	if (!arc_buf_is_shared(buf)) {
-		/*
-		 * To ensure that the hdr has the correct data in it if we call
-		 * arc_untransform() on this buf before it's been written to
-		 * disk, it's easiest if we just set up sharing between the
-		 * buf and the hdr.
-		 */
-		arc_hdr_free_abd(hdr, B_FALSE);
-		arc_share_buf(hdr, buf);
-	}
+	/*
+	 * To ensure that the hdr has the correct data in it if we call
+	 * arc_untransform() on this buf before it's been written to disk,
+	 * it's easiest if we just set up sharing between the buf and the hdr.
+	 */
+	arc_share_buf(hdr, buf);
 
 	return (buf);
 }
@@ -3763,7 +3636,7 @@
 arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
     const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
     dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
-    enum zio_compress compression_type)
+    enum zio_compress compression_type, uint8_t complevel)
 {
 	arc_buf_hdr_t *hdr;
 	arc_buf_t *buf;
@@ -3776,7 +3649,7 @@
 	ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
 
 	hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
-	    compression_type, type, B_TRUE);
+	    compression_type, complevel, type);
 
 	hdr->b_crypt_hdr.b_dsobj = dsobj;
 	hdr->b_crypt_hdr.b_ot = ot;
@@ -3801,6 +3674,76 @@
 }
 
 static void
+l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
+    boolean_t state_only)
+{
+	l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
+	l2arc_dev_t *dev = l2hdr->b_dev;
+	uint64_t lsize = HDR_GET_LSIZE(hdr);
+	uint64_t psize = HDR_GET_PSIZE(hdr);
+	uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
+	arc_buf_contents_t type = hdr->b_type;
+	int64_t lsize_s;
+	int64_t psize_s;
+	int64_t asize_s;
+
+	if (incr) {
+		lsize_s = lsize;
+		psize_s = psize;
+		asize_s = asize;
+	} else {
+		lsize_s = -lsize;
+		psize_s = -psize;
+		asize_s = -asize;
+	}
+
+	/* If the buffer is a prefetch, count it as such. */
+	if (HDR_PREFETCH(hdr)) {
+		ARCSTAT_INCR(arcstat_l2_prefetch_asize, asize_s);
+	} else {
+		/*
+		 * We use the value stored in the L2 header upon initial
+		 * caching in L2ARC. This value will be updated in case
+		 * an MRU/MRU_ghost buffer transitions to MFU but the L2ARC
+		 * metadata (log entry) cannot currently be updated. Having
+		 * the ARC state in the L2 header solves the problem of a
+		 * possibly absent L1 header (apparent in buffers restored
+		 * from persistent L2ARC).
+		 */
+		switch (hdr->b_l2hdr.b_arcs_state) {
+			case ARC_STATE_MRU_GHOST:
+			case ARC_STATE_MRU:
+				ARCSTAT_INCR(arcstat_l2_mru_asize, asize_s);
+				break;
+			case ARC_STATE_MFU_GHOST:
+			case ARC_STATE_MFU:
+				ARCSTAT_INCR(arcstat_l2_mfu_asize, asize_s);
+				break;
+			default:
+				break;
+		}
+	}
+
+	if (state_only)
+		return;
+
+	ARCSTAT_INCR(arcstat_l2_psize, psize_s);
+	ARCSTAT_INCR(arcstat_l2_lsize, lsize_s);
+
+	switch (type) {
+		case ARC_BUFC_DATA:
+			ARCSTAT_INCR(arcstat_l2_bufc_data_asize, asize_s);
+			break;
+		case ARC_BUFC_METADATA:
+			ARCSTAT_INCR(arcstat_l2_bufc_metadata_asize, asize_s);
+			break;
+		default:
+			break;
+	}
+}
+
+
+static void
 arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
 {
 	l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
@@ -3813,9 +3756,7 @@
 
 	list_remove(&dev->l2ad_buflist, hdr);
 
-	ARCSTAT_INCR(arcstat_l2_psize, -psize);
-	ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
-
+	l2arc_hdr_arcstats_decrement(hdr);
 	vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
 
 	(void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr),
@@ -3933,9 +3874,18 @@
  *    - arc_mru_ghost -> deleted
  *    - arc_mfu_ghost -> arc_l2c_only
  *    - arc_mfu_ghost -> deleted
+ *
+ * Return total size of evicted data buffers for eviction progress tracking.
+ * When evicting from ghost states return logical buffer size to make eviction
+ * progress at the same (or at least comparable) rate as from non-ghost states.
+ *
+ * Return *real_evicted for actual ARC size reduction to wake up threads
+ * waiting for it.  For non-ghost states it includes size of evicted data
+ * buffers (the headers are not freed there).  For ghost states it includes
+ * only the evicted headers size.
  */
 static int64_t
-arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
+arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted)
 {
 	arc_state_t *evicted_state, *state;
 	int64_t bytes_evicted = 0;
@@ -3945,6 +3895,7 @@
 	ASSERT(MUTEX_HELD(hash_lock));
 	ASSERT(HDR_HAS_L1HDR(hdr));
 
+	*real_evicted = 0;
 	state = hdr->b_l1hdr.b_state;
 	if (GHOST_STATE(state)) {
 		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
@@ -3981,9 +3932,11 @@
 			 */
 			hdr = arc_hdr_realloc(hdr, hdr_full_cache,
 			    hdr_l2only_cache);
+			*real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE;
 		} else {
 			arc_change_state(arc_anon, hdr, hash_lock);
 			arc_hdr_destroy(hdr);
+			*real_evicted += HDR_FULL_SIZE;
 		}
 		return (bytes_evicted);
 	}
@@ -4007,8 +3960,10 @@
 			ARCSTAT_BUMP(arcstat_mutex_miss);
 			break;
 		}
-		if (buf->b_data != NULL)
+		if (buf->b_data != NULL) {
 			bytes_evicted += HDR_GET_LSIZE(hdr);
+			*real_evicted += HDR_GET_LSIZE(hdr);
+		}
 		mutex_exit(&buf->b_evict_lock);
 		arc_buf_destroy_impl(buf);
 	}
@@ -4019,6 +3974,21 @@
 		if (l2arc_write_eligible(hdr->b_spa, hdr)) {
 			ARCSTAT_INCR(arcstat_evict_l2_eligible,
 			    HDR_GET_LSIZE(hdr));
+
+			switch (state->arcs_state) {
+				case ARC_STATE_MRU:
+					ARCSTAT_INCR(
+					    arcstat_evict_l2_eligible_mru,
+					    HDR_GET_LSIZE(hdr));
+					break;
+				case ARC_STATE_MFU:
+					ARCSTAT_INCR(
+					    arcstat_evict_l2_eligible_mfu,
+					    HDR_GET_LSIZE(hdr));
+					break;
+				default:
+					break;
+			}
 		} else {
 			ARCSTAT_INCR(arcstat_evict_l2_ineligible,
 			    HDR_GET_LSIZE(hdr));
@@ -4029,6 +3999,7 @@
 		arc_cksum_free(hdr);
 
 		bytes_evicted += arc_hdr_size(hdr);
+		*real_evicted += arc_hdr_size(hdr);
 
 		/*
 		 * If this hdr is being evicted and has a compressed
@@ -4051,25 +4022,37 @@
 	return (bytes_evicted);
 }
 
+static void
+arc_set_need_free(void)
+{
+	ASSERT(MUTEX_HELD(&arc_evict_lock));
+	int64_t remaining = arc_free_memory() - arc_sys_free / 2;
+	arc_evict_waiter_t *aw = list_tail(&arc_evict_waiters);
+	if (aw == NULL) {
+		arc_need_free = MAX(-remaining, 0);
+	} else {
+		arc_need_free =
+		    MAX(-remaining, (int64_t)(aw->aew_count - arc_evict_count));
+	}
+}
+
 static uint64_t
 arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
-    uint64_t spa, int64_t bytes)
+    uint64_t spa, uint64_t bytes)
 {
 	multilist_sublist_t *mls;
-	uint64_t bytes_evicted = 0;
+	uint64_t bytes_evicted = 0, real_evicted = 0;
 	arc_buf_hdr_t *hdr;
 	kmutex_t *hash_lock;
-	int evict_count = 0;
+	int evict_count = zfs_arc_evict_batch_limit;
 
 	ASSERT3P(marker, !=, NULL);
-	IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
 
 	mls = multilist_sublist_lock(ml, idx);
 
-	for (hdr = multilist_sublist_prev(mls, marker); hdr != NULL;
+	for (hdr = multilist_sublist_prev(mls, marker); likely(hdr != NULL);
 	    hdr = multilist_sublist_prev(mls, marker)) {
-		if ((bytes != ARC_EVICT_ALL && bytes_evicted >= bytes) ||
-		    (evict_count >= zfs_arc_evict_batch_limit))
+		if ((evict_count <= 0) || (bytes_evicted >= bytes))
 			break;
 
 		/*
@@ -4117,10 +4100,13 @@
 		ASSERT(!MUTEX_HELD(hash_lock));
 
 		if (mutex_tryenter(hash_lock)) {
-			uint64_t evicted = arc_evict_hdr(hdr, hash_lock);
+			uint64_t revicted;
+			uint64_t evicted = arc_evict_hdr(hdr, hash_lock,
+			    &revicted);
 			mutex_exit(hash_lock);
 
 			bytes_evicted += evicted;
+			real_evicted += revicted;
 
 			/*
 			 * If evicted is zero, arc_evict_hdr() must have
@@ -4128,31 +4114,8 @@
 			 * evict_count in this case.
 			 */
 			if (evicted != 0)
-				evict_count++;
+				evict_count--;
 
-			/*
-			 * If arc_size isn't overflowing, signal any
-			 * threads that might happen to be waiting.
-			 *
-			 * For each header evicted, we wake up a single
-			 * thread. If we used cv_broadcast, we could
-			 * wake up "too many" threads causing arc_size
-			 * to significantly overflow arc_c; since
-			 * arc_get_data_impl() doesn't check for overflow
-			 * when it's woken up (it doesn't because it's
-			 * possible for the ARC to be overflowing while
-			 * full of un-evictable buffers, and the
-			 * function should proceed in this case).
-			 *
-			 * If threads are left sleeping, due to not
-			 * using cv_broadcast here, they will be woken
-			 * up via cv_broadcast in arc_adjust_cb() just
-			 * before arc_adjust_zthr sleeps.
-			 */
-			mutex_enter(&arc_adjust_lock);
-			if (!arc_is_overflowing())
-				cv_signal(&arc_adjust_waiters_cv);
-			mutex_exit(&arc_adjust_lock);
 		} else {
 			ARCSTAT_BUMP(arcstat_mutex_miss);
 		}
@@ -4160,10 +4123,77 @@
 
 	multilist_sublist_unlock(mls);
 
+	/*
+	 * Increment the count of evicted bytes, and wake up any threads that
+	 * are waiting for the count to reach this value.  Since the list is
+	 * ordered by ascending aew_count, we pop off the beginning of the
+	 * list until we reach the end, or a waiter that's past the current
+	 * "count".  Doing this outside the loop reduces the number of times
+	 * we need to acquire the global arc_evict_lock.
+	 *
+	 * Only wake when there's sufficient free memory in the system
+	 * (specifically, arc_sys_free/2, which by default is a bit more than
+	 * 1/64th of RAM).  See the comments in arc_wait_for_eviction().
+	 */
+	mutex_enter(&arc_evict_lock);
+	arc_evict_count += real_evicted;
+
+	if (arc_free_memory() > arc_sys_free / 2) {
+		arc_evict_waiter_t *aw;
+		while ((aw = list_head(&arc_evict_waiters)) != NULL &&
+		    aw->aew_count <= arc_evict_count) {
+			list_remove(&arc_evict_waiters, aw);
+			cv_broadcast(&aw->aew_cv);
+		}
+	}
+	arc_set_need_free();
+	mutex_exit(&arc_evict_lock);
+
+	/*
+	 * If the ARC size is reduced from arc_c_max to arc_c_min (especially
+	 * if the average cached block is small), eviction can be on-CPU for
+	 * many seconds.  To ensure that other threads that may be bound to
+	 * this CPU are able to make progress, make a voluntary preemption
+	 * call here.
+	 */
+	cond_resched();
+
 	return (bytes_evicted);
 }
 
 /*
+ * Allocate an array of buffer headers used as placeholders during arc state
+ * eviction.
+ */
+static arc_buf_hdr_t **
+arc_state_alloc_markers(int count)
+{
+	arc_buf_hdr_t **markers;
+
+	markers = kmem_zalloc(sizeof (*markers) * count, KM_SLEEP);
+	for (int i = 0; i < count; i++) {
+		markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
+
+		/*
+		 * A b_spa of 0 is used to indicate that this header is
+		 * a marker. This fact is used in arc_evict_type() and
+		 * arc_evict_state_impl().
+		 */
+		markers[i]->b_spa = 0;
+
+	}
+	return (markers);
+}
+
+static void
+arc_state_free_markers(arc_buf_hdr_t **markers, int count)
+{
+	for (int i = 0; i < count; i++)
+		kmem_cache_free(hdr_full_cache, markers[i]);
+	kmem_free(markers, sizeof (*markers) * count);
+}
+
+/*
  * Evict buffers from the given arc state, until we've removed the
  * specified number of bytes. Move the removed buffers to the
  * appropriate evict state.
@@ -4177,16 +4207,14 @@
  * the given arc state; which is used by arc_flush().
  */
 static uint64_t
-arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
+arc_evict_state(arc_state_t *state, uint64_t spa, uint64_t bytes,
     arc_buf_contents_t type)
 {
 	uint64_t total_evicted = 0;
-	multilist_t *ml = state->arcs_list[type];
+	multilist_t *ml = &state->arcs_list[type];
 	int num_sublists;
 	arc_buf_hdr_t **markers;
 
-	IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
-
 	num_sublists = multilist_get_num_sublists(ml);
 
 	/*
@@ -4196,19 +4224,15 @@
 	 * pick up where we left off for each individual sublist, rather
 	 * than starting from the tail each time.
 	 */
-	markers = kmem_zalloc(sizeof (*markers) * num_sublists, KM_SLEEP);
+	if (zthr_iscurthread(arc_evict_zthr)) {
+		markers = arc_state_evict_markers;
+		ASSERT3S(num_sublists, <=, arc_state_evict_marker_count);
+	} else {
+		markers = arc_state_alloc_markers(num_sublists);
+	}
 	for (int i = 0; i < num_sublists; i++) {
 		multilist_sublist_t *mls;
 
-		markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
-
-		/*
-		 * A b_spa of 0 is used to indicate that this header is
-		 * a marker. This fact is used in arc_adjust_type() and
-		 * arc_evict_state_impl().
-		 */
-		markers[i]->b_spa = 0;
-
 		mls = multilist_sublist_lock(ml, i);
 		multilist_sublist_insert_tail(mls, markers[i]);
 		multilist_sublist_unlock(mls);
@@ -4218,7 +4242,7 @@
 	 * While we haven't hit our target number of bytes to evict, or
 	 * we're evicting all available buffers.
 	 */
-	while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+	while (total_evicted < bytes) {
 		int sublist_idx = multilist_get_random_index(ml);
 		uint64_t scan_evicted = 0;
 
@@ -4227,10 +4251,11 @@
 		 * Request that 10% of the LRUs be scanned by the superblock
 		 * shrinker.
 		 */
-		if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size,
-		    arc_dnode_limit) > 0) {
-			arc_prune_async((aggsum_upper_bound(&astat_dnode_size) -
-			    arc_dnode_limit) / sizeof (dnode_t) /
+		if (type == ARC_BUFC_DATA && aggsum_compare(
+		    &arc_sums.arcstat_dnode_size, arc_dnode_size_limit) > 0) {
+			arc_prune_async((aggsum_upper_bound(
+			    &arc_sums.arcstat_dnode_size) -
+			    arc_dnode_size_limit) / sizeof (dnode_t) /
 			    zfs_arc_dnode_reduce_percent);
 		}
 
@@ -4245,9 +4270,7 @@
 			uint64_t bytes_remaining;
 			uint64_t bytes_evicted;
 
-			if (bytes == ARC_EVICT_ALL)
-				bytes_remaining = ARC_EVICT_ALL;
-			else if (total_evicted < bytes)
+			if (total_evicted < bytes)
 				bytes_remaining = bytes - total_evicted;
 			else
 				break;
@@ -4291,10 +4314,9 @@
 		multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
 		multilist_sublist_remove(mls, markers[i]);
 		multilist_sublist_unlock(mls);
-
-		kmem_cache_free(hdr_full_cache, markers[i]);
 	}
-	kmem_free(markers, sizeof (*markers) * num_sublists);
+	if (markers != arc_state_evict_markers)
+		arc_state_free_markers(markers, num_sublists);
 
 	return (total_evicted);
 }
@@ -4331,57 +4353,6 @@
 }
 
 /*
- * Helper function for arc_prune_async() it is responsible for safely
- * handling the execution of a registered arc_prune_func_t.
- */
-static void
-arc_prune_task(void *ptr)
-{
-	arc_prune_t *ap = (arc_prune_t *)ptr;
-	arc_prune_func_t *func = ap->p_pfunc;
-
-	if (func != NULL)
-		func(ap->p_adjust, ap->p_private);
-
-	zfs_refcount_remove(&ap->p_refcnt, func);
-}
-
-/*
- * Notify registered consumers they must drop holds on a portion of the ARC
- * buffered they reference.  This provides a mechanism to ensure the ARC can
- * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers.  This
- * is analogous to dnlc_reduce_cache() but more generic.
- *
- * This operation is performed asynchronously so it may be safely called
- * in the context of the arc_reclaim_thread().  A reference is taken here
- * for each registered arc_prune_t and the arc_prune_task() is responsible
- * for releasing it once the registered arc_prune_func_t has completed.
- */
-static void
-arc_prune_async(int64_t adjust)
-{
-	arc_prune_t *ap;
-
-	mutex_enter(&arc_prune_mtx);
-	for (ap = list_head(&arc_prune_list); ap != NULL;
-	    ap = list_next(&arc_prune_list, ap)) {
-
-		if (zfs_refcount_count(&ap->p_refcnt) >= 2)
-			continue;
-
-		zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
-		ap->p_adjust = adjust;
-		if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
-		    ap, TQ_SLEEP) == TASKQID_INVALID) {
-			zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
-			continue;
-		}
-		ARCSTAT_BUMP(arcstat_prune);
-	}
-	mutex_exit(&arc_prune_mtx);
-}
-
-/*
  * Evict the specified number of bytes from the state specified,
  * restricting eviction to the spa and type given. This function
  * prevents us from trying to evict more from a state's list than
@@ -4390,10 +4361,10 @@
  * evict everything it can, when passed a negative value for "bytes".
  */
 static uint64_t
-arc_adjust_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
+arc_evict_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
     arc_buf_contents_t type)
 {
-	int64_t delta;
+	uint64_t delta;
 
 	if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) {
 		delta = MIN(zfs_refcount_count(&state->arcs_esize[type]),
@@ -4422,7 +4393,7 @@
  * available for reclaim.
  */
 static uint64_t
-arc_adjust_meta_balanced(uint64_t meta_used)
+arc_evict_meta_balanced(uint64_t meta_used)
 {
 	int64_t delta, prune = 0, adjustmnt;
 	uint64_t total_evicted = 0;
@@ -4432,7 +4403,7 @@
 restart:
 	/*
 	 * This slightly differs than the way we evict from the mru in
-	 * arc_adjust because we don't have a "target" value (i.e. no
+	 * arc_evict because we don't have a "target" value (i.e. no
 	 * "meta" arc_p). As a result, I think we can completely
 	 * cannibalize the metadata in the MRU before we evict the
 	 * metadata from the MFU. I think we probably need to implement a
@@ -4444,7 +4415,7 @@
 	    zfs_refcount_count(&arc_mru->arcs_esize[type]) > 0) {
 		delta = MIN(zfs_refcount_count(&arc_mru->arcs_esize[type]),
 		    adjustmnt);
-		total_evicted += arc_adjust_impl(arc_mru, 0, delta, type);
+		total_evicted += arc_evict_impl(arc_mru, 0, delta, type);
 		adjustmnt -= delta;
 	}
 
@@ -4462,7 +4433,7 @@
 	    zfs_refcount_count(&arc_mfu->arcs_esize[type]) > 0) {
 		delta = MIN(zfs_refcount_count(&arc_mfu->arcs_esize[type]),
 		    adjustmnt);
-		total_evicted += arc_adjust_impl(arc_mfu, 0, delta, type);
+		total_evicted += arc_evict_impl(arc_mfu, 0, delta, type);
 	}
 
 	adjustmnt = meta_used - arc_meta_limit;
@@ -4471,7 +4442,7 @@
 	    zfs_refcount_count(&arc_mru_ghost->arcs_esize[type]) > 0) {
 		delta = MIN(adjustmnt,
 		    zfs_refcount_count(&arc_mru_ghost->arcs_esize[type]));
-		total_evicted += arc_adjust_impl(arc_mru_ghost, 0, delta, type);
+		total_evicted += arc_evict_impl(arc_mru_ghost, 0, delta, type);
 		adjustmnt -= delta;
 	}
 
@@ -4479,7 +4450,7 @@
 	    zfs_refcount_count(&arc_mfu_ghost->arcs_esize[type]) > 0) {
 		delta = MIN(adjustmnt,
 		    zfs_refcount_count(&arc_mfu_ghost->arcs_esize[type]));
-		total_evicted += arc_adjust_impl(arc_mfu_ghost, 0, delta, type);
+		total_evicted += arc_evict_impl(arc_mfu_ghost, 0, delta, type);
 	}
 
 	/*
@@ -4489,7 +4460,7 @@
 	 * meta buffers.  Requests to the upper layers will be made with
 	 * increasingly large scan sizes until the ARC is below the limit.
 	 */
-	if (meta_used > arc_meta_limit) {
+	if (meta_used > arc_meta_limit || arc_available_memory() < 0) {
 		if (type == ARC_BUFC_DATA) {
 			type = ARC_BUFC_METADATA;
 		} else {
@@ -4510,11 +4481,11 @@
 }
 
 /*
- * Evict metadata buffers from the cache, such that arc_meta_used is
+ * Evict metadata buffers from the cache, such that arcstat_meta_used is
  * capped by the arc_meta_limit tunable.
  */
 static uint64_t
-arc_adjust_meta_only(uint64_t meta_used)
+arc_evict_meta_only(uint64_t meta_used)
 {
 	uint64_t total_evicted = 0;
 	int64_t target;
@@ -4530,7 +4501,7 @@
 	    (int64_t)(zfs_refcount_count(&arc_anon->arcs_size) +
 	    zfs_refcount_count(&arc_mru->arcs_size) - arc_p));
 
-	total_evicted += arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
+	total_evicted += arc_evict_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
 
 	/*
 	 * Similar to the above, we want to evict enough bytes to get us
@@ -4541,18 +4512,18 @@
 	    (int64_t)(zfs_refcount_count(&arc_mfu->arcs_size) -
 	    (arc_c - arc_p)));
 
-	total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
+	total_evicted += arc_evict_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
 
 	return (total_evicted);
 }
 
 static uint64_t
-arc_adjust_meta(uint64_t meta_used)
+arc_evict_meta(uint64_t meta_used)
 {
 	if (zfs_arc_meta_strategy == ARC_STRATEGY_META_ONLY)
-		return (arc_adjust_meta_only(meta_used));
+		return (arc_evict_meta_only(meta_used));
 	else
-		return (arc_adjust_meta_balanced(meta_used));
+		return (arc_evict_meta_balanced(meta_used));
 }
 
 /*
@@ -4564,10 +4535,10 @@
  * returned.
  */
 static arc_buf_contents_t
-arc_adjust_type(arc_state_t *state)
+arc_evict_type(arc_state_t *state)
 {
-	multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
-	multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
+	multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
+	multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
 	int data_idx = multilist_get_random_index(data_ml);
 	int meta_idx = multilist_get_random_index(meta_ml);
 	multilist_sublist_t *data_mls;
@@ -4631,22 +4602,22 @@
 }
 
 /*
- * Evict buffers from the cache, such that arc_size is capped by arc_c.
+ * Evict buffers from the cache, such that arcstat_size is capped by arc_c.
  */
 static uint64_t
-arc_adjust(void)
+arc_evict(void)
 {
 	uint64_t total_evicted = 0;
 	uint64_t bytes;
 	int64_t target;
-	uint64_t asize = aggsum_value(&arc_size);
-	uint64_t ameta = aggsum_value(&arc_meta_used);
+	uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
+	uint64_t ameta = aggsum_value(&arc_sums.arcstat_meta_used);
 
 	/*
 	 * If we're over arc_meta_limit, we want to correct that before
 	 * potentially evicting data buffers below.
 	 */
-	total_evicted += arc_adjust_meta(ameta);
+	total_evicted += arc_evict_meta(ameta);
 
 	/*
 	 * Adjust MRU size
@@ -4670,9 +4641,9 @@
 	 * type. If we cannot satisfy the number of bytes from this
 	 * type, spill over into the next type.
 	 */
-	if (arc_adjust_type(arc_mru) == ARC_BUFC_METADATA &&
+	if (arc_evict_type(arc_mru) == ARC_BUFC_METADATA &&
 	    ameta > arc_meta_min) {
-		bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
+		bytes = arc_evict_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
 		total_evicted += bytes;
 
 		/*
@@ -4682,9 +4653,9 @@
 		target -= bytes;
 
 		total_evicted +=
-		    arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_DATA);
+		    arc_evict_impl(arc_mru, 0, target, ARC_BUFC_DATA);
 	} else {
-		bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_DATA);
+		bytes = arc_evict_impl(arc_mru, 0, target, ARC_BUFC_DATA);
 		total_evicted += bytes;
 
 		/*
@@ -4694,14 +4665,14 @@
 		target -= bytes;
 
 		total_evicted +=
-		    arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
+		    arc_evict_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
 	}
 
 	/*
 	 * Re-sum ARC stats after the first round of evictions.
 	 */
-	asize = aggsum_value(&arc_size);
-	ameta = aggsum_value(&arc_meta_used);
+	asize = aggsum_value(&arc_sums.arcstat_size);
+	ameta = aggsum_value(&arc_sums.arcstat_meta_used);
 
 
 	/*
@@ -4713,9 +4684,9 @@
 	 */
 	target = asize - arc_c;
 
-	if (arc_adjust_type(arc_mfu) == ARC_BUFC_METADATA &&
+	if (arc_evict_type(arc_mfu) == ARC_BUFC_METADATA &&
 	    ameta > arc_meta_min) {
-		bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
+		bytes = arc_evict_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
 		total_evicted += bytes;
 
 		/*
@@ -4725,9 +4696,9 @@
 		target -= bytes;
 
 		total_evicted +=
-		    arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_DATA);
+		    arc_evict_impl(arc_mfu, 0, target, ARC_BUFC_DATA);
 	} else {
-		bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_DATA);
+		bytes = arc_evict_impl(arc_mfu, 0, target, ARC_BUFC_DATA);
 		total_evicted += bytes;
 
 		/*
@@ -4737,7 +4708,7 @@
 		target -= bytes;
 
 		total_evicted +=
-		    arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
+		    arc_evict_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
 	}
 
 	/*
@@ -4754,13 +4725,13 @@
 	target = zfs_refcount_count(&arc_mru->arcs_size) +
 	    zfs_refcount_count(&arc_mru_ghost->arcs_size) - arc_c;
 
-	bytes = arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_DATA);
+	bytes = arc_evict_impl(arc_mru_ghost, 0, target, ARC_BUFC_DATA);
 	total_evicted += bytes;
 
 	target -= bytes;
 
 	total_evicted +=
-	    arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_METADATA);
+	    arc_evict_impl(arc_mru_ghost, 0, target, ARC_BUFC_METADATA);
 
 	/*
 	 * We assume the sum of the mru list and mfu list is less than
@@ -4773,13 +4744,13 @@
 	target = zfs_refcount_count(&arc_mru_ghost->arcs_size) +
 	    zfs_refcount_count(&arc_mfu_ghost->arcs_size) - arc_c;
 
-	bytes = arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_DATA);
+	bytes = arc_evict_impl(arc_mfu_ghost, 0, target, ARC_BUFC_DATA);
 	total_evicted += bytes;
 
 	target -= bytes;
 
 	total_evicted +=
-	    arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_METADATA);
+	    arc_evict_impl(arc_mfu_ghost, 0, target, ARC_BUFC_METADATA);
 
 	return (total_evicted);
 }
@@ -4812,11 +4783,20 @@
 	(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
 }
 
-static void
+void
 arc_reduce_target_size(int64_t to_free)
 {
-	uint64_t asize = aggsum_value(&arc_size);
-	uint64_t c = arc_c;
+	uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
+
+	/*
+	 * All callers want the ARC to actually evict (at least) this much
+	 * memory.  Therefore we reduce from the lower of the current size and
+	 * the target size.  This way, even if arc_c is much higher than
+	 * arc_size (as can be the case after many calls to arc_freed(), we will
+	 * immediately have arc_c < arc_size and therefore the arc_evict_zthr
+	 * will evict.
+	 */
+	uint64_t c = MIN(arc_c, asize);
 
 	if (c > to_free && c - to_free > arc_c_min) {
 		arc_c = c - to_free;
@@ -4830,215 +4810,26 @@
 	}
 
 	if (asize > arc_c) {
-		/* See comment in arc_adjust_cb_check() on why lock+flag */
-		mutex_enter(&arc_adjust_lock);
-		arc_adjust_needed = B_TRUE;
-		mutex_exit(&arc_adjust_lock);
-		zthr_wakeup(arc_adjust_zthr);
+		/* See comment in arc_evict_cb_check() on why lock+flag */
+		mutex_enter(&arc_evict_lock);
+		arc_evict_needed = B_TRUE;
+		mutex_exit(&arc_evict_lock);
+		zthr_wakeup(arc_evict_zthr);
 	}
 }
-/*
- * Return maximum amount of memory that we could possibly use.  Reduced
- * to half of all memory in user space which is primarily used for testing.
- */
-static uint64_t
-arc_all_memory(void)
-{
-#ifdef _KERNEL
-#ifdef CONFIG_HIGHMEM
-	return (ptob(zfs_totalram_pages - zfs_totalhigh_pages));
-#else
-	return (ptob(zfs_totalram_pages));
-#endif /* CONFIG_HIGHMEM */
-#else
-	return (ptob(physmem) / 2);
-#endif /* _KERNEL */
-}
-
-/*
- * Return the amount of memory that is considered free.  In user space
- * which is primarily used for testing we pretend that free memory ranges
- * from 0-20% of all memory.
- */
-static uint64_t
-arc_free_memory(void)
-{
-#ifdef _KERNEL
-#ifdef CONFIG_HIGHMEM
-	struct sysinfo si;
-	si_meminfo(&si);
-	return (ptob(si.freeram - si.freehigh));
-#else
-	return (ptob(nr_free_pages() +
-	    nr_inactive_file_pages() +
-	    nr_inactive_anon_pages()));
-
-#endif /* CONFIG_HIGHMEM */
-#else
-	return (spa_get_random(arc_all_memory() * 20 / 100));
-#endif /* _KERNEL */
-}
-
-typedef enum free_memory_reason_t {
-	FMR_UNKNOWN,
-	FMR_NEEDFREE,
-	FMR_LOTSFREE,
-	FMR_SWAPFS_MINFREE,
-	FMR_PAGES_PP_MAXIMUM,
-	FMR_HEAP_ARENA,
-	FMR_ZIO_ARENA,
-} free_memory_reason_t;
-
-int64_t last_free_memory;
-free_memory_reason_t last_free_reason;
-
-#ifdef _KERNEL
-/*
- * Additional reserve of pages for pp_reserve.
- */
-int64_t arc_pages_pp_reserve = 64;
-
-/*
- * Additional reserve of pages for swapfs.
- */
-int64_t arc_swapfs_reserve = 64;
-#endif /* _KERNEL */
-
-/*
- * Return the amount of memory that can be consumed before reclaim will be
- * needed.  Positive if there is sufficient free memory, negative indicates
- * the amount of memory that needs to be freed up.
- */
-static int64_t
-arc_available_memory(void)
-{
-	int64_t lowest = INT64_MAX;
-	free_memory_reason_t r = FMR_UNKNOWN;
-#ifdef _KERNEL
-	int64_t n;
-#ifdef __linux__
-#ifdef freemem
-#undef freemem
-#endif
-	pgcnt_t needfree = btop(arc_need_free);
-	pgcnt_t lotsfree = btop(arc_sys_free);
-	pgcnt_t desfree = 0;
-	pgcnt_t freemem = btop(arc_free_memory());
-#endif
-
-	if (needfree > 0) {
-		n = PAGESIZE * (-needfree);
-		if (n < lowest) {
-			lowest = n;
-			r = FMR_NEEDFREE;
-		}
-	}
-
-	/*
-	 * check that we're out of range of the pageout scanner.  It starts to
-	 * schedule paging if freemem is less than lotsfree and needfree.
-	 * lotsfree is the high-water mark for pageout, and needfree is the
-	 * number of needed free pages.  We add extra pages here to make sure
-	 * the scanner doesn't start up while we're freeing memory.
-	 */
-	n = PAGESIZE * (freemem - lotsfree - needfree - desfree);
-	if (n < lowest) {
-		lowest = n;
-		r = FMR_LOTSFREE;
-	}
-
-#ifndef __linux__
-	/*
-	 * check to make sure that swapfs has enough space so that anon
-	 * reservations can still succeed. anon_resvmem() checks that the
-	 * availrmem is greater than swapfs_minfree, and the number of reserved
-	 * swap pages.  We also add a bit of extra here just to prevent
-	 * circumstances from getting really dire.
-	 */
-	n = PAGESIZE * (availrmem - swapfs_minfree - swapfs_reserve -
-	    desfree - arc_swapfs_reserve);
-	if (n < lowest) {
-		lowest = n;
-		r = FMR_SWAPFS_MINFREE;
-	}
-
-	/*
-	 * Check that we have enough availrmem that memory locking (e.g., via
-	 * mlock(3C) or memcntl(2)) can still succeed.  (pages_pp_maximum
-	 * stores the number of pages that cannot be locked; when availrmem
-	 * drops below pages_pp_maximum, page locking mechanisms such as
-	 * page_pp_lock() will fail.)
-	 */
-	n = PAGESIZE * (availrmem - pages_pp_maximum -
-	    arc_pages_pp_reserve);
-	if (n < lowest) {
-		lowest = n;
-		r = FMR_PAGES_PP_MAXIMUM;
-	}
-#endif
-
-#if defined(_ILP32)
-	/*
-	 * If we're on a 32-bit platform, it's possible that we'll exhaust the
-	 * kernel heap space before we ever run out of available physical
-	 * memory.  Most checks of the size of the heap_area compare against
-	 * tune.t_minarmem, which is the minimum available real memory that we
-	 * can have in the system.  However, this is generally fixed at 25 pages
-	 * which is so low that it's useless.  In this comparison, we seek to
-	 * calculate the total heap-size, and reclaim if more than 3/4ths of the
-	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
-	 * free)
-	 */
-	n = vmem_size(heap_arena, VMEM_FREE) -
-	    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2);
-	if (n < lowest) {
-		lowest = n;
-		r = FMR_HEAP_ARENA;
-	}
-#endif
-
-	/*
-	 * If zio data pages are being allocated out of a separate heap segment,
-	 * then enforce that the size of available vmem for this arena remains
-	 * above about 1/4th (1/(2^arc_zio_arena_free_shift)) free.
-	 *
-	 * Note that reducing the arc_zio_arena_free_shift keeps more virtual
-	 * memory (in the zio_arena) free, which can avoid memory
-	 * fragmentation issues.
-	 */
-	if (zio_arena != NULL) {
-		n = (int64_t)vmem_size(zio_arena, VMEM_FREE) -
-		    (vmem_size(zio_arena, VMEM_ALLOC) >>
-		    arc_zio_arena_free_shift);
-		if (n < lowest) {
-			lowest = n;
-			r = FMR_ZIO_ARENA;
-		}
-	}
-#else /* _KERNEL */
-	/* Every 100 calls, free a small amount */
-	if (spa_get_random(100) == 0)
-		lowest = -1024;
-#endif /* _KERNEL */
-
-	last_free_memory = lowest;
-	last_free_reason = r;
-
-	return (lowest);
-}
 
 /*
  * Determine if the system is under memory pressure and is asking
  * to reclaim memory. A return value of B_TRUE indicates that the system
  * is under memory pressure and that the arc should adjust accordingly.
  */
-static boolean_t
+boolean_t
 arc_reclaim_needed(void)
 {
 	return (arc_available_memory() < 0);
 }
 
-static void
+void
 arc_kmem_reap_soon(void)
 {
 	size_t			i;
@@ -5046,11 +4837,10 @@
 	kmem_cache_t		*prev_data_cache = NULL;
 	extern kmem_cache_t	*zio_buf_cache[];
 	extern kmem_cache_t	*zio_data_buf_cache[];
-	extern kmem_cache_t	*range_seg_cache;
 
 #ifdef _KERNEL
-	if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) &&
-	    zfs_arc_meta_prune) {
+	if ((aggsum_compare(&arc_sums.arcstat_meta_used,
+	    arc_meta_limit) >= 0) && zfs_arc_meta_prune) {
 		/*
 		 * We are exceeding our meta-data cache limit.
 		 * Prune some entries to release holds on meta-data.
@@ -5083,32 +4873,16 @@
 	kmem_cache_reap_now(buf_cache);
 	kmem_cache_reap_now(hdr_full_cache);
 	kmem_cache_reap_now(hdr_l2only_cache);
-	kmem_cache_reap_now(range_seg_cache);
-
-	if (zio_arena != NULL) {
-		/*
-		 * Ask the vmem arena to reclaim unused memory from its
-		 * quantum caches.
-		 */
-		vmem_qcache_reap(zio_arena);
-	}
+	kmem_cache_reap_now(zfs_btree_leaf_cache);
+	abd_cache_reap_now();
 }
 
-/* ARGSUSED */
 static boolean_t
-arc_adjust_cb_check(void *arg, zthr_t *zthr)
+arc_evict_cb_check(void *arg, zthr_t *zthr)
 {
-	if (!arc_initialized)
-		return (B_FALSE);
+	(void) arg, (void) zthr;
 
-	/*
-	 * This is necessary so that any changes which may have been made to
-	 * many of the zfs_arc_* module parameters will be propagated to
-	 * their actual internal variable counterparts. Without this,
-	 * changing those module params at runtime would have no effect.
-	 */
-	arc_tuning_update();
-
+#ifdef ZFS_DEBUG
 	/*
 	 * This is necessary in order to keep the kstat information
 	 * up to date for tools that display kstat data such as the
@@ -5116,82 +4890,85 @@
 	 * typically do not call kstat's update function, but simply
 	 * dump out stats from the most recent update.  Without
 	 * this call, these commands may show stale stats for the
-	 * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
-	 * with this change, the data might be up to 1 second
-	 * out of date(the arc_adjust_zthr has a maximum sleep
-	 * time of 1 second); but that should suffice.  The
-	 * arc_state_t structures can be queried directly if more
-	 * accurate information is needed.
+	 * anon, mru, mru_ghost, mfu, and mfu_ghost lists.  Even
+	 * with this call, the data might be out of date if the
+	 * evict thread hasn't been woken recently; but that should
+	 * suffice.  The arc_state_t structures can be queried
+	 * directly if more accurate information is needed.
 	 */
 	if (arc_ksp != NULL)
 		arc_ksp->ks_update(arc_ksp, KSTAT_READ);
+#endif
 
 	/*
-	 * We have to rely on arc_get_data_impl() to tell us when to adjust,
-	 * rather than checking if we are overflowing here, so that we are
-	 * sure to not leave arc_get_data_impl() waiting on
-	 * arc_adjust_waiters_cv.  If we have become "not overflowing" since
-	 * arc_get_data_impl() checked, we need to wake it up.  We could
-	 * broadcast the CV here, but arc_get_data_impl() may have not yet
-	 * gone to sleep.  We would need to use a mutex to ensure that this
-	 * function doesn't broadcast until arc_get_data_impl() has gone to
-	 * sleep (e.g. the arc_adjust_lock).  However, the lock ordering of
-	 * such a lock would necessarily be incorrect with respect to the
-	 * zthr_lock, which is held before this function is called, and is
-	 * held by arc_get_data_impl() when it calls zthr_wakeup().
+	 * We have to rely on arc_wait_for_eviction() to tell us when to
+	 * evict, rather than checking if we are overflowing here, so that we
+	 * are sure to not leave arc_wait_for_eviction() waiting on aew_cv.
+	 * If we have become "not overflowing" since arc_wait_for_eviction()
+	 * checked, we need to wake it up.  We could broadcast the CV here,
+	 * but arc_wait_for_eviction() may have not yet gone to sleep.  We
+	 * would need to use a mutex to ensure that this function doesn't
+	 * broadcast until arc_wait_for_eviction() has gone to sleep (e.g.
+	 * the arc_evict_lock).  However, the lock ordering of such a lock
+	 * would necessarily be incorrect with respect to the zthr_lock,
+	 * which is held before this function is called, and is held by
+	 * arc_wait_for_eviction() when it calls zthr_wakeup().
 	 */
-	return (arc_adjust_needed);
+	return (arc_evict_needed);
 }
 
 /*
- * Keep arc_size under arc_c by running arc_adjust which evicts data
+ * Keep arc_size under arc_c by running arc_evict which evicts data
  * from the ARC.
  */
-/* ARGSUSED */
 static void
-arc_adjust_cb(void *arg, zthr_t *zthr)
+arc_evict_cb(void *arg, zthr_t *zthr)
 {
+	(void) arg, (void) zthr;
+
 	uint64_t evicted = 0;
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 
 	/* Evict from cache */
-	evicted = arc_adjust();
+	evicted = arc_evict();
 
 	/*
 	 * If evicted is zero, we couldn't evict anything
-	 * via arc_adjust(). This could be due to hash lock
+	 * via arc_evict(). This could be due to hash lock
 	 * collisions, but more likely due to the majority of
 	 * arc buffers being unevictable. Therefore, even if
 	 * arc_size is above arc_c, another pass is unlikely to
 	 * be helpful and could potentially cause us to enter an
 	 * infinite loop.  Additionally, zthr_iscancelled() is
 	 * checked here so that if the arc is shutting down, the
-	 * broadcast will wake any remaining arc adjust waiters.
+	 * broadcast will wake any remaining arc evict waiters.
 	 */
-	mutex_enter(&arc_adjust_lock);
-	arc_adjust_needed = !zthr_iscancelled(arc_adjust_zthr) &&
-	    evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0;
-	if (!arc_adjust_needed) {
+	mutex_enter(&arc_evict_lock);
+	arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) &&
+	    evicted > 0 && aggsum_compare(&arc_sums.arcstat_size, arc_c) > 0;
+	if (!arc_evict_needed) {
 		/*
 		 * We're either no longer overflowing, or we
 		 * can't evict anything more, so we should wake
 		 * arc_get_data_impl() sooner.
 		 */
-		cv_broadcast(&arc_adjust_waiters_cv);
-		arc_need_free = 0;
+		arc_evict_waiter_t *aw;
+		while ((aw = list_remove_head(&arc_evict_waiters)) != NULL) {
+			cv_broadcast(&aw->aew_cv);
+		}
+		arc_set_need_free();
 	}
-	mutex_exit(&arc_adjust_lock);
+	mutex_exit(&arc_evict_lock);
 	spl_fstrans_unmark(cookie);
 }
 
-/* ARGSUSED */
 static boolean_t
 arc_reap_cb_check(void *arg, zthr_t *zthr)
 {
-	if (!arc_initialized)
-		return (B_FALSE);
+	(void) arg, (void) zthr;
 
 	int64_t free_memory = arc_available_memory();
+	static int reap_cb_check_counter = 0;
 
 	/*
 	 * If a kmem reap is already active, don't schedule more.  We must
@@ -5216,19 +4993,28 @@
 		arc_no_grow = B_FALSE;
 	}
 
+	/*
+	 * Called unconditionally every 60 seconds to reclaim unused
+	 * zstd compression and decompression context. This is done
+	 * here to avoid the need for an independent thread.
+	 */
+	if (!((reap_cb_check_counter++) % 60))
+		zfs_zstd_cache_reap_now();
+
 	return (B_FALSE);
 }
 
 /*
  * Keep enough free memory in the system by reaping the ARC's kmem
  * caches.  To cause more slabs to be reapable, we may reduce the
- * target size of the cache (arc_c), causing the arc_adjust_cb()
+ * target size of the cache (arc_c), causing the arc_evict_cb()
  * to free more buffers.
  */
-/* ARGSUSED */
 static void
 arc_reap_cb(void *arg, zthr_t *zthr)
 {
+	(void) arg, (void) zthr;
+
 	int64_t free_memory;
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 
@@ -5253,18 +5039,16 @@
 	 * memory in the system at a fraction of the arc_size (1/128th by
 	 * default).  If oversubscribed (free_memory < 0) then reduce the
 	 * target arc_size by the deficit amount plus the fractional
-	 * amount.  If free memory is positive but less then the fractional
+	 * amount.  If free memory is positive but less than the fractional
 	 * amount, reduce by what is needed to hit the fractional amount.
 	 */
 	free_memory = arc_available_memory();
 
-	int64_t to_free =
-	    (arc_c >> arc_shrink_shift) - free_memory;
-	if (to_free > 0) {
-#ifdef _KERNEL
-		to_free = MAX(to_free, arc_need_free);
-#endif
-		arc_reduce_target_size(to_free);
+	int64_t can_free = arc_c - arc_c_min;
+	if (can_free > 0) {
+		int64_t to_free = (can_free >> arc_shrink_shift) - free_memory;
+		if (to_free > 0)
+			arc_reduce_target_size(to_free);
 	}
 	spl_fstrans_unmark(cookie);
 }
@@ -5315,109 +5099,7 @@
  *         already below arc_c_min, evicting any more would only
  *         increase this negative difference.
  */
-static uint64_t
-arc_evictable_memory(void)
-{
-	int64_t asize = aggsum_value(&arc_size);
-	uint64_t arc_clean =
-	    zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) +
-	    zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) +
-	    zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) +
-	    zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
-	uint64_t arc_dirty = MAX((int64_t)asize - (int64_t)arc_clean, 0);
 
-	/*
-	 * Scale reported evictable memory in proportion to page cache, cap
-	 * at specified min/max.
-	 */
-	uint64_t min = (ptob(nr_file_pages()) / 100) * zfs_arc_pc_percent;
-	min = MAX(arc_c_min, MIN(arc_c_max, min));
-
-	if (arc_dirty >= min)
-		return (arc_clean);
-
-	return (MAX((int64_t)asize - (int64_t)min, 0));
-}
-
-/*
- * If sc->nr_to_scan is zero, the caller is requesting a query of the
- * number of objects which can potentially be freed.  If it is nonzero,
- * the request is to free that many objects.
- *
- * Linux kernels >= 3.12 have the count_objects and scan_objects callbacks
- * in struct shrinker and also require the shrinker to return the number
- * of objects freed.
- *
- * Older kernels require the shrinker to return the number of freeable
- * objects following the freeing of nr_to_free.
- */
-static spl_shrinker_t
-__arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
-{
-	int64_t pages;
-
-	/* The arc is considered warm once reclaim has occurred */
-	if (unlikely(arc_warm == B_FALSE))
-		arc_warm = B_TRUE;
-
-	/* Return the potential number of reclaimable pages */
-	pages = btop((int64_t)arc_evictable_memory());
-	if (sc->nr_to_scan == 0)
-		return (pages);
-
-	/* Not allowed to perform filesystem reclaim */
-	if (!(sc->gfp_mask & __GFP_FS))
-		return (SHRINK_STOP);
-
-	/* Reclaim in progress */
-	if (mutex_tryenter(&arc_adjust_lock) == 0) {
-		ARCSTAT_INCR(arcstat_need_free, ptob(sc->nr_to_scan));
-		return (0);
-	}
-
-	mutex_exit(&arc_adjust_lock);
-
-	/*
-	 * Evict the requested number of pages by shrinking arc_c the
-	 * requested amount.
-	 */
-	if (pages > 0) {
-		arc_reduce_target_size(ptob(sc->nr_to_scan));
-		if (current_is_kswapd())
-			arc_kmem_reap_soon();
-#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
-		pages = MAX((int64_t)pages -
-		    (int64_t)btop(arc_evictable_memory()), 0);
-#else
-		pages = btop(arc_evictable_memory());
-#endif
-		/*
-		 * We've shrunk what we can, wake up threads.
-		 */
-		cv_broadcast(&arc_adjust_waiters_cv);
-	} else
-		pages = SHRINK_STOP;
-
-	/*
-	 * When direct reclaim is observed it usually indicates a rapid
-	 * increase in memory pressure.  This occurs because the kswapd
-	 * threads were unable to asynchronously keep enough free memory
-	 * available.  In this case set arc_no_grow to briefly pause arc
-	 * growth to avoid compounding the memory pressure.
-	 */
-	if (current_is_kswapd()) {
-		ARCSTAT_BUMP(arcstat_memory_indirect_count);
-	} else {
-		arc_no_grow = B_TRUE;
-		arc_kmem_reap_soon();
-		ARCSTAT_BUMP(arcstat_memory_direct_count);
-	}
-
-	return (pages);
-}
-SPL_SHRINKER_CALLBACK_WRAPPER(arc_shrinker_func);
-
-SPL_SHRINKER_DECLARE(arc_shrinker, arc_shrinker_func, DEFAULT_SEEKS);
 #endif /* _KERNEL */
 
 /*
@@ -5433,9 +5115,6 @@
 	int64_t mrug_size = zfs_refcount_count(&arc_mru_ghost->arcs_size);
 	int64_t mfug_size = zfs_refcount_count(&arc_mfu_ghost->arcs_size);
 
-	if (state == arc_l2c_only)
-		return;
-
 	ASSERT(bytes > 0);
 	/*
 	 * Adapt the target size of the MRU list:
@@ -5482,12 +5161,12 @@
 	 * cache size, increment the target cache size
 	 */
 	ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT);
-	if (aggsum_compare(&arc_size, arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) >=
-	    0) {
+	if (aggsum_upper_bound(&arc_sums.arcstat_size) >=
+	    arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
 		atomic_add_64(&arc_c, (int64_t)bytes);
 		if (arc_c > arc_c_max)
 			arc_c = arc_c_max;
-		else if (state == arc_anon)
+		else if (state == arc_anon && arc_p < arc_c >> 1)
 			atomic_add_64(&arc_p, (int64_t)bytes);
 		if (arc_p > arc_c)
 			arc_p = arc_c;
@@ -5499,8 +5178,8 @@
  * Check if arc_size has grown past our upper threshold, determined by
  * zfs_arc_overflow_shift.
  */
-static boolean_t
-arc_is_overflowing(void)
+static arc_ovf_level_t
+arc_is_overflowing(boolean_t use_reserve)
 {
 	/* Always allow at least one block of overflow */
 	int64_t overflow = MAX(SPA_MAXBLOCKSIZE,
@@ -5515,16 +5194,21 @@
 	 * in the ARC. In practice, that's in the tens of MB, which is low
 	 * enough to be safe.
 	 */
-	return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow);
+	int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) -
+	    arc_c - overflow / 2;
+	if (!use_reserve)
+		overflow /= 2;
+	return (over < 0 ? ARC_OVF_NONE :
+	    over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
 }
 
 static abd_t *
 arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
-    boolean_t do_adapt)
+    int alloc_flags)
 {
 	arc_buf_contents_t type = arc_buf_type(hdr);
 
-	arc_get_data_impl(hdr, size, tag, do_adapt);
+	arc_get_data_impl(hdr, size, tag, alloc_flags);
 	if (type == ARC_BUFC_METADATA) {
 		return (abd_alloc(size, B_TRUE));
 	} else {
@@ -5538,7 +5222,7 @@
 {
 	arc_buf_contents_t type = arc_buf_type(hdr);
 
-	arc_get_data_impl(hdr, size, tag, B_TRUE);
+	arc_get_data_impl(hdr, size, tag, ARC_HDR_DO_ADAPT);
 	if (type == ARC_BUFC_METADATA) {
 		return (zio_buf_alloc(size));
 	} else {
@@ -5548,6 +5232,85 @@
 }
 
 /*
+ * Wait for the specified amount of data (in bytes) to be evicted from the
+ * ARC, and for there to be sufficient free memory in the system.  Waiting for
+ * eviction ensures that the memory used by the ARC decreases.  Waiting for
+ * free memory ensures that the system won't run out of free pages, regardless
+ * of ARC behavior and settings.  See arc_lowmem_init().
+ */
+void
+arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
+{
+	switch (arc_is_overflowing(use_reserve)) {
+	case ARC_OVF_NONE:
+		return;
+	case ARC_OVF_SOME:
+		/*
+		 * This is a bit racy without taking arc_evict_lock, but the
+		 * worst that can happen is we either call zthr_wakeup() extra
+		 * time due to race with other thread here, or the set flag
+		 * get cleared by arc_evict_cb(), which is unlikely due to
+		 * big hysteresis, but also not important since at this level
+		 * of overflow the eviction is purely advisory.  Same time
+		 * taking the global lock here every time without waiting for
+		 * the actual eviction creates a significant lock contention.
+		 */
+		if (!arc_evict_needed) {
+			arc_evict_needed = B_TRUE;
+			zthr_wakeup(arc_evict_zthr);
+		}
+		return;
+	case ARC_OVF_SEVERE:
+	default:
+	{
+		arc_evict_waiter_t aw;
+		list_link_init(&aw.aew_node);
+		cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
+
+		uint64_t last_count = 0;
+		mutex_enter(&arc_evict_lock);
+		if (!list_is_empty(&arc_evict_waiters)) {
+			arc_evict_waiter_t *last =
+			    list_tail(&arc_evict_waiters);
+			last_count = last->aew_count;
+		} else if (!arc_evict_needed) {
+			arc_evict_needed = B_TRUE;
+			zthr_wakeup(arc_evict_zthr);
+		}
+		/*
+		 * Note, the last waiter's count may be less than
+		 * arc_evict_count if we are low on memory in which
+		 * case arc_evict_state_impl() may have deferred
+		 * wakeups (but still incremented arc_evict_count).
+		 */
+		aw.aew_count = MAX(last_count, arc_evict_count) + amount;
+
+		list_insert_tail(&arc_evict_waiters, &aw);
+
+		arc_set_need_free();
+
+		DTRACE_PROBE3(arc__wait__for__eviction,
+		    uint64_t, amount,
+		    uint64_t, arc_evict_count,
+		    uint64_t, aw.aew_count);
+
+		/*
+		 * We will be woken up either when arc_evict_count reaches
+		 * aew_count, or when the ARC is no longer overflowing and
+		 * eviction completes.
+		 * In case of "false" wakeup, we will still be on the list.
+		 */
+		do {
+			cv_wait(&aw.aew_cv, &arc_evict_lock);
+		} while (list_link_active(&aw.aew_node));
+		mutex_exit(&arc_evict_lock);
+
+		cv_destroy(&aw.aew_cv);
+	}
+	}
+}
+
+/*
  * Allocate a block and return it to the caller. If we are hitting the
  * hard limit for the cache size, we must sleep, waiting for the eviction
  * thread to catch up. If we're past the target size but below the hard
@@ -5555,50 +5318,29 @@
  */
 static void
 arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
-    boolean_t do_adapt)
+    int alloc_flags)
 {
 	arc_state_t *state = hdr->b_l1hdr.b_state;
 	arc_buf_contents_t type = arc_buf_type(hdr);
 
-	if (do_adapt)
+	if (alloc_flags & ARC_HDR_DO_ADAPT)
 		arc_adapt(size, state);
 
 	/*
-	 * If arc_size is currently overflowing, and has grown past our
-	 * upper limit, we must be adding data faster than the evict
-	 * thread can evict. Thus, to ensure we don't compound the
+	 * If arc_size is currently overflowing, we must be adding data
+	 * faster than we are evicting.  To ensure we don't compound the
 	 * problem by adding more data and forcing arc_size to grow even
-	 * further past it's target size, we halt and wait for the
-	 * eviction thread to catch up.
+	 * further past it's target size, we wait for the eviction thread to
+	 * make some progress.  We also wait for there to be sufficient free
+	 * memory in the system, as measured by arc_free_memory().
 	 *
-	 * It's also possible that the reclaim thread is unable to evict
-	 * enough buffers to get arc_size below the overflow limit (e.g.
-	 * due to buffers being un-evictable, or hash lock collisions).
-	 * In this case, we want to proceed regardless if we're
-	 * overflowing; thus we don't use a while loop here.
+	 * Specifically, we wait for zfs_arc_eviction_pct percent of the
+	 * requested size to be evicted.  This should be more than 100%, to
+	 * ensure that that progress is also made towards getting arc_size
+	 * under arc_c.  See the comment above zfs_arc_eviction_pct.
 	 */
-	if (arc_is_overflowing()) {
-		mutex_enter(&arc_adjust_lock);
-
-		/*
-		 * Now that we've acquired the lock, we may no longer be
-		 * over the overflow limit, lets check.
-		 *
-		 * We're ignoring the case of spurious wake ups. If that
-		 * were to happen, it'd let this thread consume an ARC
-		 * buffer before it should have (i.e. before we're under
-		 * the overflow limit and were signalled by the reclaim
-		 * thread). As long as that is a rare occurrence, it
-		 * shouldn't cause any harm.
-		 */
-		if (arc_is_overflowing()) {
-			arc_adjust_needed = B_TRUE;
-			zthr_wakeup(arc_adjust_zthr);
-			(void) cv_wait(&arc_adjust_waiters_cv,
-			    &arc_adjust_lock);
-		}
-		mutex_exit(&arc_adjust_lock);
-	}
+	arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100,
+	    alloc_flags & ARC_HDR_USE_RESERVE);
 
 	VERIFY3U(hdr->b_type, ==, type);
 	if (type == ARC_BUFC_METADATA) {
@@ -5634,10 +5376,11 @@
 		 * If we are growing the cache, and we are adding anonymous
 		 * data, and we have outgrown arc_p, update arc_p
 		 */
-		if (aggsum_upper_bound(&arc_size) < arc_c &&
+		if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
 		    hdr->b_l1hdr.b_state == arc_anon &&
 		    (zfs_refcount_count(&arc_anon->arcs_size) +
-		    zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
+		    zfs_refcount_count(&arc_mru->arcs_size) > arc_p &&
+		    arc_p < arc_c >> 1))
 			arc_p = MIN(arc_c, arc_p + size);
 	}
 }
@@ -5732,11 +5475,15 @@
 				ASSERT(multilist_link_active(
 				    &hdr->b_l1hdr.b_arc_node));
 			} else {
+				if (HDR_HAS_L2HDR(hdr))
+					l2arc_hdr_arcstats_decrement_state(hdr);
 				arc_hdr_clear_flags(hdr,
 				    ARC_FLAG_PREFETCH |
 				    ARC_FLAG_PRESCIENT_PREFETCH);
-				atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+				hdr->b_l1hdr.b_mru_hits++;
 				ARCSTAT_BUMP(arcstat_mru_hits);
+				if (HDR_HAS_L2HDR(hdr))
+					l2arc_hdr_arcstats_increment_state(hdr);
 			}
 			hdr->b_l1hdr.b_arc_access = now;
 			return;
@@ -5758,7 +5505,7 @@
 			DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
 			arc_change_state(arc_mfu, hdr, hash_lock);
 		}
-		atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
+		hdr->b_l1hdr.b_mru_hits++;
 		ARCSTAT_BUMP(arcstat_mru_hits);
 	} else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
 		arc_state_t	*new_state;
@@ -5767,13 +5514,16 @@
 		 * was evicted from the cache.  Move it to the
 		 * MFU state.
 		 */
-
 		if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
 			new_state = arc_mru;
 			if (zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) {
+				if (HDR_HAS_L2HDR(hdr))
+					l2arc_hdr_arcstats_decrement_state(hdr);
 				arc_hdr_clear_flags(hdr,
 				    ARC_FLAG_PREFETCH |
 				    ARC_FLAG_PRESCIENT_PREFETCH);
+				if (HDR_HAS_L2HDR(hdr))
+					l2arc_hdr_arcstats_increment_state(hdr);
 			}
 			DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
 		} else {
@@ -5784,7 +5534,7 @@
 		hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
 		arc_change_state(new_state, hdr, hash_lock);
 
-		atomic_inc_32(&hdr->b_l1hdr.b_mru_ghost_hits);
+		hdr->b_l1hdr.b_mru_ghost_hits++;
 		ARCSTAT_BUMP(arcstat_mru_ghost_hits);
 	} else if (hdr->b_l1hdr.b_state == arc_mfu) {
 		/*
@@ -5797,7 +5547,7 @@
 		 * the head of the list now.
 		 */
 
-		atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits);
+		hdr->b_l1hdr.b_mfu_hits++;
 		ARCSTAT_BUMP(arcstat_mfu_hits);
 		hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
 	} else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
@@ -5820,7 +5570,7 @@
 		DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
 		arc_change_state(new_state, hdr, hash_lock);
 
-		atomic_inc_32(&hdr->b_l1hdr.b_mfu_ghost_hits);
+		hdr->b_l1hdr.b_mfu_ghost_hits++;
 		ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
 	} else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
 		/*
@@ -5881,11 +5631,12 @@
 }
 
 /* a generic arc_read_done_func_t which you can use */
-/* ARGSUSED */
 void
 arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *arg)
 {
+	(void) zio, (void) zb, (void) bp;
+
 	if (buf == NULL)
 		return;
 
@@ -5894,11 +5645,11 @@
 }
 
 /* a generic arc_read_done_func_t */
-/* ARGSUSED */
 void
 arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *arg)
 {
+	(void) zb, (void) bp;
 	arc_buf_t **bufp = arg;
 
 	if (buf == NULL) {
@@ -5995,6 +5746,9 @@
 		} else {
 			hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
 		}
+		if (!HDR_L2_READING(hdr)) {
+			hdr->b_complevel = zio->io_prop.zp_complevel;
+		}
 	}
 
 	arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED);
@@ -6023,7 +5777,7 @@
 	 */
 	int callback_cnt = 0;
 	for (acb = callback_list; acb != NULL; acb = acb->acb_next) {
-		if (!acb->acb_done)
+		if (!acb->acb_done || acb->acb_nobuf)
 			continue;
 
 		callback_cnt++;
@@ -6052,8 +5806,9 @@
 			error = SET_ERROR(EIO);
 			if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
 				spa_log_error(zio->io_spa, &acb->acb_zb);
-				zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
-				    zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0);
+				(void) zfs_ereport_post(
+				    FM_EREPORT_ZFS_AUTHENTICATION,
+				    zio->io_spa, NULL, &acb->acb_zb, zio, 0);
 			}
 		}
 
@@ -6187,10 +5942,14 @@
 	boolean_t noauth_read = BP_IS_AUTHENTICATED(bp) &&
 	    (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
 	boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp);
+	boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
+	arc_buf_t *buf = NULL;
 	int rc = 0;
 
 	ASSERT(!embedded_bp ||
 	    BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
+	ASSERT(!BP_IS_HOLE(bp));
+	ASSERT(!BP_IS_REDACTED(bp));
 
 	/*
 	 * Normally SPL_FSTRANS will already be set since kernel threads which
@@ -6204,6 +5963,19 @@
 	 */
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 top:
+	/*
+	 * Verify the block pointer contents are reasonable.  This should
+	 * always be the case since the blkptr is protected by a checksum.
+	 * However, if there is damage it's desirable to detect this early
+	 * and treat it as a checksum error.  This allows an alternate blkptr
+	 * to be tried when one is available (e.g. ditto blocks).
+	 */
+	if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER,
+	    BLK_VERIFY_LOG)) {
+		rc = SET_ERROR(ECKSUM);
+		goto done;
+	}
+
 	if (!embedded_bp) {
 		/*
 		 * Embedded BP's have no DVA and require no I/O to "read".
@@ -6221,12 +5993,18 @@
 	 */
 	if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) ||
 	    (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) {
-		arc_buf_t *buf = NULL;
 		*arc_flags |= ARC_FLAG_CACHED;
 
 		if (HDR_IO_IN_PROGRESS(hdr)) {
 			zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head;
 
+			if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
+				mutex_exit(hash_lock);
+				ARCSTAT_BUMP(arcstat_cached_only_in_progress);
+				rc = SET_ERROR(ENOENT);
+				goto done;
+			}
+
 			ASSERT3P(head_zio, !=, NULL);
 			if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
 			    priority == ZIO_PRIORITY_SYNC_READ) {
@@ -6262,6 +6040,7 @@
 				acb->acb_compressed = compressed_read;
 				acb->acb_encrypted = encrypted_read;
 				acb->acb_noauth = noauth_read;
+				acb->acb_nobuf = no_buf;
 				acb->acb_zb = *zb;
 				if (pio != NULL)
 					acb->acb_zio_dummy = zio_null(pio,
@@ -6271,8 +6050,6 @@
 				acb->acb_zio_head = head_zio;
 				acb->acb_next = hdr->b_l1hdr.b_acb;
 				hdr->b_l1hdr.b_acb = acb;
-				mutex_exit(hash_lock);
-				goto out;
 			}
 			mutex_exit(hash_lock);
 			goto out;
@@ -6281,7 +6058,7 @@
 		ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
 		    hdr->b_l1hdr.b_state == arc_mfu);
 
-		if (done) {
+		if (done && !no_buf) {
 			if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
 				/*
 				 * This is a demand read which does not have to
@@ -6319,9 +6096,9 @@
 				rc = SET_ERROR(EIO);
 				if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) {
 					spa_log_error(spa, zb);
-					zfs_ereport_post(
+					(void) zfs_ereport_post(
 					    FM_EREPORT_ZFS_AUTHENTICATION,
-					    spa, NULL, zb, NULL, 0, 0);
+					    spa, NULL, zb, NULL, 0);
 				}
 			}
 			if (rc != 0) {
@@ -6335,8 +6112,12 @@
 			ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
 			    rc != EACCES);
 		} else if (*arc_flags & ARC_FLAG_PREFETCH &&
-		    zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
+		    zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
+			if (HDR_HAS_L2HDR(hdr))
+				l2arc_hdr_arcstats_decrement_state(hdr);
 			arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+			if (HDR_HAS_L2HDR(hdr))
+				l2arc_hdr_arcstats_increment_state(hdr);
 		}
 		DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
 		arc_access(hdr, hash_lock);
@@ -6349,9 +6130,7 @@
 		ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
 		    demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
 		    data, metadata, hits);
-
-		if (done)
-			done(NULL, zb, bp, buf, private);
+		goto done;
 	} else {
 		uint64_t lsize = BP_GET_LSIZE(bp);
 		uint64_t psize = BP_GET_PSIZE(bp);
@@ -6363,13 +6142,11 @@
 		abd_t *hdr_abd;
 		int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
 
-		/*
-		 * Gracefully handle a damaged logical block size as a
-		 * checksum error.
-		 */
-		if (lsize > spa_maxblocksize(spa)) {
-			rc = SET_ERROR(ECKSUM);
-			goto out;
+		if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
+			if (hash_lock != NULL)
+				mutex_exit(hash_lock);
+			rc = SET_ERROR(ENOENT);
+			goto done;
 		}
 
 		if (hdr == NULL) {
@@ -6380,8 +6157,7 @@
 			arc_buf_hdr_t *exists = NULL;
 			arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
 			hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-			    BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type,
-			    encrypted_read);
+			    BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type);
 
 			if (!embedded_bp) {
 				hdr->b_dva = *BP_IDENTITY(bp);
@@ -6395,6 +6171,7 @@
 				arc_hdr_destroy(hdr);
 				goto top; /* restart the IO request */
 			}
+			alloc_flags |= ARC_HDR_DO_ADAPT;
 		} else {
 			/*
 			 * This block is in the ghost cache or encrypted data
@@ -6442,9 +6219,9 @@
 			 */
 			arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
 			arc_access(hdr, hash_lock);
-			arc_hdr_alloc_abd(hdr, alloc_flags);
 		}
 
+		arc_hdr_alloc_abd(hdr, alloc_flags);
 		if (encrypted_read) {
 			ASSERT(HDR_HAS_RABD(hdr));
 			size = HDR_GET_PSIZE(hdr);
@@ -6471,8 +6248,13 @@
 		}
 
 		if (*arc_flags & ARC_FLAG_PREFETCH &&
-		    zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt))
+		    zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
+			if (HDR_HAS_L2HDR(hdr))
+				l2arc_hdr_arcstats_decrement_state(hdr);
 			arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+			if (HDR_HAS_L2HDR(hdr))
+				l2arc_hdr_arcstats_increment_state(hdr);
+		}
 		if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
 			arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
 		if (*arc_flags & ARC_FLAG_L2CACHE)
@@ -6539,9 +6321,14 @@
 			ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
 			    demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data,
 			    metadata, misses);
+			zfs_racct_read(size, 1);
 		}
 
-		if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
+		/* Check if the spa even has l2 configured */
+		const boolean_t spa_has_l2 = l2arc_ndev != 0 &&
+		    spa->spa_l2cache.sav_count > 0;
+
+		if (vd != NULL && spa_has_l2 && !(l2arc_norw && devw)) {
 			/*
 			 * Read from the L2ARC if the following are true:
 			 * 1. The L2ARC vdev was previously cached.
@@ -6549,7 +6336,7 @@
 			 * 3. This buffer isn't currently writing to the L2ARC.
 			 * 4. The L2ARC entry wasn't evicted, which may
 			 *    also have invalidated the vdev.
-			 * 5. This isn't prefetch and l2arc_noprefetch is set.
+			 * 5. This isn't prefetch or l2arc_noprefetch is 0.
 			 */
 			if (HDR_HAS_L2HDR(hdr) &&
 			    !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
@@ -6560,7 +6347,7 @@
 
 				DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
 				ARCSTAT_BUMP(arcstat_l2_hits);
-				atomic_inc_32(&hdr->b_l2hdr.b_hits);
+				hdr->b_l2hdr.b_hits++;
 
 				cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
 				    KM_SLEEP);
@@ -6569,6 +6356,17 @@
 				cb->l2rcb_zb = *zb;
 				cb->l2rcb_flags = zio_flags;
 
+				/*
+				 * When Compressed ARC is disabled, but the
+				 * L2ARC block is compressed, arc_hdr_size()
+				 * will have returned LSIZE rather than PSIZE.
+				 */
+				if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+				    !HDR_COMPRESSION_ENABLED(hdr) &&
+				    HDR_GET_PSIZE(hdr) != 0) {
+					size = HDR_GET_PSIZE(hdr);
+				}
+
 				asize = vdev_psize_to_asize(vd, size);
 				if (asize != size) {
 					abd = abd_alloc_for_io(asize,
@@ -6631,15 +6429,24 @@
 		} else {
 			if (vd != NULL)
 				spa_config_exit(spa, SCL_L2ARC, vd);
+
 			/*
-			 * Skip ARC stat bump for block pointers with
-			 * embedded data. The data are read from the blkptr
-			 * itself via decode_embedded_bp_compressed().
+			 * Only a spa with l2 should contribute to l2
+			 * miss stats.  (Including the case of having a
+			 * faulted cache device - that's also a miss.)
 			 */
-			if (l2arc_ndev != 0 && !embedded_bp) {
-				DTRACE_PROBE1(l2arc__miss,
-				    arc_buf_hdr_t *, hdr);
-				ARCSTAT_BUMP(arcstat_l2_misses);
+			if (spa_has_l2) {
+				/*
+				 * Skip ARC stat bump for block pointers with
+				 * embedded data. The data are read from the
+				 * blkptr itself via
+				 * decode_embedded_bp_compressed().
+				 */
+				if (!embedded_bp) {
+					DTRACE_PROBE1(l2arc__miss,
+					    arc_buf_hdr_t *, hdr);
+					ARCSTAT_BUMP(arcstat_l2_misses);
+				}
 			}
 		}
 
@@ -6665,6 +6472,16 @@
 		spa_read_history_add(spa, zb, *arc_flags);
 	spl_fstrans_unmark(cookie);
 	return (rc);
+
+done:
+	if (done)
+		done(NULL, zb, bp, buf, private);
+	if (pio && rc != 0) {
+		zio_t *zio = zio_null(pio, spa, NULL, NULL, NULL, zio_flags);
+		zio->io_error = rc;
+		zio_nowait(zio);
+	}
+	goto out;
 }
 
 arc_prune_t *
@@ -6784,7 +6601,6 @@
 		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
 		ASSERT(!HDR_IN_HASH_TABLE(hdr));
 		ASSERT(!HDR_HAS_L2HDR(hdr));
-		ASSERT(HDR_EMPTY(hdr));
 
 		ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
 		ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
@@ -6936,7 +6752,7 @@
 		 * buffer which will be freed in arc_write().
 		 */
 		nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
-		    compress, type, HDR_HAS_RABD(hdr));
+		    compress, hdr->b_complevel, type);
 		ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
 		ASSERT0(nhdr->b_l1hdr.b_bufcnt);
 		ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -6947,11 +6763,6 @@
 		nhdr->b_l1hdr.b_bufcnt = 1;
 		if (ARC_BUF_ENCRYPTED(buf))
 			nhdr->b_crypt_hdr.b_ebufcnt = 1;
-		nhdr->b_l1hdr.b_mru_hits = 0;
-		nhdr->b_l1hdr.b_mru_ghost_hits = 0;
-		nhdr->b_l1hdr.b_mfu_hits = 0;
-		nhdr->b_l1hdr.b_mfu_ghost_hits = 0;
-		nhdr->b_l1hdr.b_l2_hits = 0;
 		(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
 		buf->b_hdr = nhdr;
 
@@ -6968,7 +6779,6 @@
 		hdr->b_l1hdr.b_mru_ghost_hits = 0;
 		hdr->b_l1hdr.b_mfu_hits = 0;
 		hdr->b_l1hdr.b_mfu_ghost_hits = 0;
-		hdr->b_l1hdr.b_l2_hits = 0;
 		arc_change_state(arc_anon, hdr, hash_lock);
 		hdr->b_l1hdr.b_arc_access = 0;
 
@@ -7100,6 +6910,7 @@
 	}
 	HDR_SET_PSIZE(hdr, psize);
 	arc_hdr_set_compress(hdr, compress);
+	hdr->b_complevel = zio->io_prop.zp_complevel;
 
 	if (zio->io_error != 0 || psize == 0)
 		goto out;
@@ -7121,9 +6932,11 @@
 	if (ARC_BUF_ENCRYPTED(buf)) {
 		ASSERT3U(psize, >, 0);
 		ASSERT(ARC_BUF_COMPRESSED(buf));
-		arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+		arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
+		    ARC_HDR_USE_RESERVE);
 		abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
-	} else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
+	} else if (!abd_size_alloc_linear(arc_buf_size(buf)) ||
+	    !arc_can_share(hdr, buf)) {
 		/*
 		 * Ideally, we would always copy the io_abd into b_pabd, but the
 		 * user may have disabled compressed ARC, thus we must check the
@@ -7131,17 +6944,19 @@
 		 */
 		if (BP_IS_ENCRYPTED(bp)) {
 			ASSERT3U(psize, >, 0);
-			arc_hdr_alloc_abd(hdr,
-			    ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+			arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+			    ARC_HDR_ALLOC_RDATA | ARC_HDR_USE_RESERVE);
 			abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
 		} else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
 		    !ARC_BUF_COMPRESSED(buf)) {
 			ASSERT3U(psize, >, 0);
-			arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+			arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+			    ARC_HDR_USE_RESERVE);
 			abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
 		} else {
 			ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
-			arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+			arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+			    ARC_HDR_USE_RESERVE);
 			abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
 			    arc_buf_size(buf));
 		}
@@ -7259,7 +7074,7 @@
 	ASSERT(!zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
 	callback->awcb_done(zio, buf, callback->awcb_private);
 
-	abd_put(zio->io_abd);
+	abd_free(zio->io_abd);
 	kmem_free(callback, sizeof (arc_write_callback_t));
 }
 
@@ -7289,6 +7104,7 @@
 		ASSERT(ARC_BUF_COMPRESSED(buf));
 		localprop.zp_encrypt = B_TRUE;
 		localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+		localprop.zp_complevel = hdr->b_complevel;
 		localprop.zp_byteorder =
 		    (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
 		    ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
@@ -7307,6 +7123,7 @@
 	} else if (ARC_BUF_COMPRESSED(buf)) {
 		ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
 		localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+		localprop.zp_complevel = hdr->b_complevel;
 		zio_flags |= ZIO_FLAG_RAW_COMPRESS;
 	}
 	callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
@@ -7355,49 +7172,6 @@
 	return (zio);
 }
 
-static int
-arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
-{
-#ifdef _KERNEL
-	uint64_t available_memory = arc_free_memory();
-
-#if defined(_ILP32)
-	available_memory =
-	    MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
-#endif
-
-	if (available_memory > arc_all_memory() * arc_lotsfree_percent / 100)
-		return (0);
-
-	if (txg > spa->spa_lowmem_last_txg) {
-		spa->spa_lowmem_last_txg = txg;
-		spa->spa_lowmem_page_load = 0;
-	}
-	/*
-	 * If we are in pageout, we know that memory is already tight,
-	 * the arc is already going to be evicting, so we just want to
-	 * continue to let page writes occur as quickly as possible.
-	 */
-	if (current_is_kswapd()) {
-		if (spa->spa_lowmem_page_load >
-		    MAX(arc_sys_free / 4, available_memory) / 4) {
-			DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
-			return (SET_ERROR(ERESTART));
-		}
-		/* Note: reserve is inflated, so we deflate */
-		atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8);
-		return (0);
-	} else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) {
-		/* memory is low, delay before restarting */
-		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
-		DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
-		return (SET_ERROR(EAGAIN));
-	}
-	spa->spa_lowmem_page_load = 0;
-#endif /* _KERNEL */
-	return (0);
-}
-
 void
 arc_tempreserve_clear(uint64_t reserve)
 {
@@ -7464,9 +7238,9 @@
 	 */
 	uint64_t total_dirty = reserve + arc_tempreserve + anon_size;
 	uint64_t spa_dirty_anon = spa_dirty_data(spa);
-
-	if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 &&
-	    anon_size > arc_c * zfs_arc_anon_limit_percent / 100 &&
+	uint64_t rarc_c = arc_warm ? arc_c : arc_c_max;
+	if (total_dirty > rarc_c * zfs_arc_dirty_limit_percent / 100 &&
+	    anon_size > rarc_c * zfs_arc_anon_limit_percent / 100 &&
 	    spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) {
 #ifdef ZFS_DEBUG
 		uint64_t meta_esize = zfs_refcount_count(
@@ -7474,9 +7248,12 @@
 		uint64_t data_esize =
 		    zfs_refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
 		dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
-		    "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
-		    arc_tempreserve >> 10, meta_esize >> 10,
-		    data_esize >> 10, reserve >> 10, arc_c >> 10);
+		    "anon_data=%lluK tempreserve=%lluK rarc_c=%lluK\n",
+		    (u_longlong_t)arc_tempreserve >> 10,
+		    (u_longlong_t)meta_esize >> 10,
+		    (u_longlong_t)data_esize >> 10,
+		    (u_longlong_t)reserve >> 10,
+		    (u_longlong_t)rarc_c >> 10);
 #endif
 		DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle);
 		return (SET_ERROR(ERESTART));
@@ -7501,48 +7278,219 @@
 {
 	arc_stats_t *as = ksp->ks_data;
 
-	if (rw == KSTAT_WRITE) {
+	if (rw == KSTAT_WRITE)
 		return (SET_ERROR(EACCES));
-	} else {
-		arc_kstat_update_state(arc_anon,
-		    &as->arcstat_anon_size,
-		    &as->arcstat_anon_evictable_data,
-		    &as->arcstat_anon_evictable_metadata);
-		arc_kstat_update_state(arc_mru,
-		    &as->arcstat_mru_size,
-		    &as->arcstat_mru_evictable_data,
-		    &as->arcstat_mru_evictable_metadata);
-		arc_kstat_update_state(arc_mru_ghost,
-		    &as->arcstat_mru_ghost_size,
-		    &as->arcstat_mru_ghost_evictable_data,
-		    &as->arcstat_mru_ghost_evictable_metadata);
-		arc_kstat_update_state(arc_mfu,
-		    &as->arcstat_mfu_size,
-		    &as->arcstat_mfu_evictable_data,
-		    &as->arcstat_mfu_evictable_metadata);
-		arc_kstat_update_state(arc_mfu_ghost,
-		    &as->arcstat_mfu_ghost_size,
-		    &as->arcstat_mfu_ghost_evictable_data,
-		    &as->arcstat_mfu_ghost_evictable_metadata);
 
-		ARCSTAT(arcstat_size) = aggsum_value(&arc_size);
-		ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used);
-		ARCSTAT(arcstat_data_size) = aggsum_value(&astat_data_size);
-		ARCSTAT(arcstat_metadata_size) =
-		    aggsum_value(&astat_metadata_size);
-		ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size);
-		ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size);
-		ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
-		ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
-		ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
+	as->arcstat_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_hits);
+	as->arcstat_misses.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_misses);
+	as->arcstat_demand_data_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_demand_data_hits);
+	as->arcstat_demand_data_misses.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_demand_data_misses);
+	as->arcstat_demand_metadata_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_demand_metadata_hits);
+	as->arcstat_demand_metadata_misses.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_demand_metadata_misses);
+	as->arcstat_prefetch_data_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_prefetch_data_hits);
+	as->arcstat_prefetch_data_misses.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_prefetch_data_misses);
+	as->arcstat_prefetch_metadata_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits);
+	as->arcstat_prefetch_metadata_misses.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses);
+	as->arcstat_mru_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_mru_hits);
+	as->arcstat_mru_ghost_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_mru_ghost_hits);
+	as->arcstat_mfu_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_mfu_hits);
+	as->arcstat_mfu_ghost_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_mfu_ghost_hits);
+	as->arcstat_deleted.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_deleted);
+	as->arcstat_mutex_miss.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_mutex_miss);
+	as->arcstat_access_skip.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_access_skip);
+	as->arcstat_evict_skip.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_skip);
+	as->arcstat_evict_not_enough.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_not_enough);
+	as->arcstat_evict_l2_cached.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_l2_cached);
+	as->arcstat_evict_l2_eligible.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_l2_eligible);
+	as->arcstat_evict_l2_eligible_mfu.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mfu);
+	as->arcstat_evict_l2_eligible_mru.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mru);
+	as->arcstat_evict_l2_ineligible.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_l2_ineligible);
+	as->arcstat_evict_l2_skip.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_evict_l2_skip);
+	as->arcstat_hash_collisions.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_hash_collisions);
+	as->arcstat_hash_chains.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_hash_chains);
+	as->arcstat_size.value.ui64 =
+	    aggsum_value(&arc_sums.arcstat_size);
+	as->arcstat_compressed_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_compressed_size);
+	as->arcstat_uncompressed_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_uncompressed_size);
+	as->arcstat_overhead_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_overhead_size);
+	as->arcstat_hdr_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_hdr_size);
+	as->arcstat_data_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_data_size);
+	as->arcstat_metadata_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_metadata_size);
+	as->arcstat_dbuf_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_dbuf_size);
+#if defined(COMPAT_FREEBSD11)
+	as->arcstat_other_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_bonus_size) +
+	    aggsum_value(&arc_sums.arcstat_dnode_size) +
+	    wmsum_value(&arc_sums.arcstat_dbuf_size);
+#endif
 
-		as->arcstat_memory_all_bytes.value.ui64 =
-		    arc_all_memory();
-		as->arcstat_memory_free_bytes.value.ui64 =
-		    arc_free_memory();
-		as->arcstat_memory_available_bytes.value.i64 =
-		    arc_available_memory();
-	}
+	arc_kstat_update_state(arc_anon,
+	    &as->arcstat_anon_size,
+	    &as->arcstat_anon_evictable_data,
+	    &as->arcstat_anon_evictable_metadata);
+	arc_kstat_update_state(arc_mru,
+	    &as->arcstat_mru_size,
+	    &as->arcstat_mru_evictable_data,
+	    &as->arcstat_mru_evictable_metadata);
+	arc_kstat_update_state(arc_mru_ghost,
+	    &as->arcstat_mru_ghost_size,
+	    &as->arcstat_mru_ghost_evictable_data,
+	    &as->arcstat_mru_ghost_evictable_metadata);
+	arc_kstat_update_state(arc_mfu,
+	    &as->arcstat_mfu_size,
+	    &as->arcstat_mfu_evictable_data,
+	    &as->arcstat_mfu_evictable_metadata);
+	arc_kstat_update_state(arc_mfu_ghost,
+	    &as->arcstat_mfu_ghost_size,
+	    &as->arcstat_mfu_ghost_evictable_data,
+	    &as->arcstat_mfu_ghost_evictable_metadata);
+
+	as->arcstat_dnode_size.value.ui64 =
+	    aggsum_value(&arc_sums.arcstat_dnode_size);
+	as->arcstat_bonus_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_bonus_size);
+	as->arcstat_l2_hits.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_hits);
+	as->arcstat_l2_misses.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_misses);
+	as->arcstat_l2_prefetch_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_prefetch_asize);
+	as->arcstat_l2_mru_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_mru_asize);
+	as->arcstat_l2_mfu_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_mfu_asize);
+	as->arcstat_l2_bufc_data_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_bufc_data_asize);
+	as->arcstat_l2_bufc_metadata_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_bufc_metadata_asize);
+	as->arcstat_l2_feeds.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_feeds);
+	as->arcstat_l2_rw_clash.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rw_clash);
+	as->arcstat_l2_read_bytes.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_read_bytes);
+	as->arcstat_l2_write_bytes.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_write_bytes);
+	as->arcstat_l2_writes_sent.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_writes_sent);
+	as->arcstat_l2_writes_done.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_writes_done);
+	as->arcstat_l2_writes_error.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_writes_error);
+	as->arcstat_l2_writes_lock_retry.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_writes_lock_retry);
+	as->arcstat_l2_evict_lock_retry.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_evict_lock_retry);
+	as->arcstat_l2_evict_reading.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_evict_reading);
+	as->arcstat_l2_evict_l1cached.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_evict_l1cached);
+	as->arcstat_l2_free_on_write.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_free_on_write);
+	as->arcstat_l2_abort_lowmem.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_abort_lowmem);
+	as->arcstat_l2_cksum_bad.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_cksum_bad);
+	as->arcstat_l2_io_error.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_io_error);
+	as->arcstat_l2_lsize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_lsize);
+	as->arcstat_l2_psize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_psize);
+	as->arcstat_l2_hdr_size.value.ui64 =
+	    aggsum_value(&arc_sums.arcstat_l2_hdr_size);
+	as->arcstat_l2_log_blk_writes.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_log_blk_writes);
+	as->arcstat_l2_log_blk_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_log_blk_asize);
+	as->arcstat_l2_log_blk_count.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_log_blk_count);
+	as->arcstat_l2_rebuild_success.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_success);
+	as->arcstat_l2_rebuild_abort_unsupported.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
+	as->arcstat_l2_rebuild_abort_io_errors.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
+	as->arcstat_l2_rebuild_abort_dh_errors.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
+	as->arcstat_l2_rebuild_abort_cksum_lb_errors.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
+	as->arcstat_l2_rebuild_abort_lowmem.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
+	as->arcstat_l2_rebuild_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_size);
+	as->arcstat_l2_rebuild_asize.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_asize);
+	as->arcstat_l2_rebuild_bufs.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs);
+	as->arcstat_l2_rebuild_bufs_precached.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs_precached);
+	as->arcstat_l2_rebuild_log_blks.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_l2_rebuild_log_blks);
+	as->arcstat_memory_throttle_count.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_memory_throttle_count);
+	as->arcstat_memory_direct_count.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_memory_direct_count);
+	as->arcstat_memory_indirect_count.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_memory_indirect_count);
+
+	as->arcstat_memory_all_bytes.value.ui64 =
+	    arc_all_memory();
+	as->arcstat_memory_free_bytes.value.ui64 =
+	    arc_free_memory();
+	as->arcstat_memory_available_bytes.value.i64 =
+	    arc_available_memory();
+
+	as->arcstat_prune.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_prune);
+	as->arcstat_meta_used.value.ui64 =
+	    aggsum_value(&arc_sums.arcstat_meta_used);
+	as->arcstat_async_upgrade_sync.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_async_upgrade_sync);
+	as->arcstat_demand_hit_predictive_prefetch.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch);
+	as->arcstat_demand_hit_prescient_prefetch.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch);
+	as->arcstat_raw_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_raw_size);
+	as->arcstat_cached_only_in_progress.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_cached_only_in_progress);
+	as->arcstat_abd_chunk_waste_size.value.ui64 =
+	    wmsum_value(&arc_sums.arcstat_abd_chunk_waste_size);
 
 	return (0);
 }
@@ -7554,7 +7502,7 @@
  * distributed between all sublists and uses this assumption when
  * deciding which sublist to evict from and how much to evict from it.
  */
-unsigned int
+static unsigned int
 arc_state_multilist_index_func(multilist_t *ml, void *obj)
 {
 	arc_buf_hdr_t *hdr = obj;
@@ -7576,12 +7524,27 @@
 	 * Also, the low order bits of the hash value are thought to be
 	 * distributed evenly. Otherwise, in the case that the multilist
 	 * has a power of two number of sublists, each sublists' usage
-	 * would not be evenly distributed.
+	 * would not be evenly distributed. In this context full 64bit
+	 * division would be a waste of time, so limit it to 32 bits.
 	 */
-	return (buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
+	return ((unsigned int)buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
 	    multilist_get_num_sublists(ml));
 }
 
+static unsigned int
+arc_state_l2c_multilist_index_func(multilist_t *ml, void *obj)
+{
+	panic("Header %p insert into arc_l2c_only %p", obj, ml);
+}
+
+#define	WARN_IF_TUNING_IGNORED(tuning, value, do_warn) do {	\
+	if ((do_warn) && (tuning) && ((tuning) != (value))) {	\
+		cmn_err(CE_WARN,				\
+		    "ignoring tunable %s (using %llu instead)",	\
+		    (#tuning), (value));			\
+	}							\
+} while (0)
+
 /*
  * Called during module initialization and periodically thereafter to
  * apply reasonable changes to the exposed performance tunings.  Can also be
@@ -7589,25 +7552,12 @@
  * updated manually.  Non-zero zfs_* values which differ from the currently set
  * values will be applied.
  */
-static void
-arc_tuning_update(void)
+void
+arc_tuning_update(boolean_t verbose)
 {
 	uint64_t allmem = arc_all_memory();
 	unsigned long limit;
 
-	/* Valid range: 64M - <all physical memory> */
-	if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
-	    (zfs_arc_max >= 64 << 20) && (zfs_arc_max < allmem) &&
-	    (zfs_arc_max > arc_c_min)) {
-		arc_c_max = zfs_arc_max;
-		arc_c = arc_c_max;
-		arc_p = (arc_c >> 1);
-		if (arc_meta_limit > arc_c_max)
-			arc_meta_limit = arc_c_max;
-		if (arc_dnode_limit > arc_meta_limit)
-			arc_dnode_limit = arc_meta_limit;
-	}
-
 	/* Valid range: 32M - <arc_c_max> */
 	if ((zfs_arc_min) && (zfs_arc_min != arc_c_min) &&
 	    (zfs_arc_min >= 2ULL << SPA_MAXBLOCKSHIFT) &&
@@ -7615,6 +7565,21 @@
 		arc_c_min = zfs_arc_min;
 		arc_c = MAX(arc_c, arc_c_min);
 	}
+	WARN_IF_TUNING_IGNORED(zfs_arc_min, arc_c_min, verbose);
+
+	/* Valid range: 64M - <all physical memory> */
+	if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
+	    (zfs_arc_max >= MIN_ARC_MAX) && (zfs_arc_max < allmem) &&
+	    (zfs_arc_max > arc_c_min)) {
+		arc_c_max = zfs_arc_max;
+		arc_c = MIN(arc_c, arc_c_max);
+		arc_p = (arc_c >> 1);
+		if (arc_meta_limit > arc_c_max)
+			arc_meta_limit = arc_c_max;
+		if (arc_dnode_size_limit > arc_meta_limit)
+			arc_dnode_size_limit = arc_meta_limit;
+	}
+	WARN_IF_TUNING_IGNORED(zfs_arc_max, arc_c_max, verbose);
 
 	/* Valid range: 16M - <arc_c_max> */
 	if ((zfs_arc_meta_min) && (zfs_arc_meta_min != arc_meta_min) &&
@@ -7623,9 +7588,10 @@
 		arc_meta_min = zfs_arc_meta_min;
 		if (arc_meta_limit < arc_meta_min)
 			arc_meta_limit = arc_meta_min;
-		if (arc_dnode_limit < arc_meta_min)
-			arc_dnode_limit = arc_meta_min;
+		if (arc_dnode_size_limit < arc_meta_min)
+			arc_dnode_size_limit = arc_meta_min;
 	}
+	WARN_IF_TUNING_IGNORED(zfs_arc_meta_min, arc_meta_min, verbose);
 
 	/* Valid range: <arc_meta_min> - <arc_c_max> */
 	limit = zfs_arc_meta_limit ? zfs_arc_meta_limit :
@@ -7634,14 +7600,17 @@
 	    (limit >= arc_meta_min) &&
 	    (limit <= arc_c_max))
 		arc_meta_limit = limit;
+	WARN_IF_TUNING_IGNORED(zfs_arc_meta_limit, arc_meta_limit, verbose);
 
 	/* Valid range: <arc_meta_min> - <arc_meta_limit> */
 	limit = zfs_arc_dnode_limit ? zfs_arc_dnode_limit :
 	    MIN(zfs_arc_dnode_limit_percent, 100) * arc_meta_limit / 100;
-	if ((limit != arc_dnode_limit) &&
+	if ((limit != arc_dnode_size_limit) &&
 	    (limit >= arc_meta_min) &&
 	    (limit <= arc_meta_limit))
-		arc_dnode_limit = limit;
+		arc_dnode_size_limit = limit;
+	WARN_IF_TUNING_IGNORED(zfs_arc_dnode_limit, arc_dnode_size_limit,
+	    verbose);
 
 	/* Valid range: 1 - N */
 	if (zfs_arc_grow_retry)
@@ -7671,63 +7640,61 @@
 	if ((zfs_arc_lotsfree_percent >= 0) &&
 	    (zfs_arc_lotsfree_percent <= 100))
 		arc_lotsfree_percent = zfs_arc_lotsfree_percent;
+	WARN_IF_TUNING_IGNORED(zfs_arc_lotsfree_percent, arc_lotsfree_percent,
+	    verbose);
 
 	/* Valid range: 0 - <all physical memory> */
 	if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
 		arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), allmem);
+	WARN_IF_TUNING_IGNORED(zfs_arc_sys_free, arc_sys_free, verbose);
+}
 
+static void
+arc_state_multilist_init(multilist_t *ml,
+    multilist_sublist_index_func_t *index_func, int *maxcountp)
+{
+	multilist_create(ml, sizeof (arc_buf_hdr_t),
+	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), index_func);
+	*maxcountp = MAX(*maxcountp, multilist_get_num_sublists(ml));
 }
 
 static void
 arc_state_init(void)
 {
-	arc_anon = &ARC_anon;
-	arc_mru = &ARC_mru;
-	arc_mru_ghost = &ARC_mru_ghost;
-	arc_mfu = &ARC_mfu;
-	arc_mfu_ghost = &ARC_mfu_ghost;
-	arc_l2c_only = &ARC_l2c_only;
+	int num_sublists = 0;
 
-	arc_mru->arcs_list[ARC_BUFC_METADATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mru->arcs_list[ARC_BUFC_DATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mfu->arcs_list[ARC_BUFC_METADATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mfu->arcs_list[ARC_BUFC_DATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
-	arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
-	    multilist_create(sizeof (arc_buf_hdr_t),
-	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
-	    arc_state_multilist_index_func);
+	arc_state_multilist_init(&arc_mru->arcs_list[ARC_BUFC_METADATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mru->arcs_list[ARC_BUFC_DATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mfu->arcs_list[ARC_BUFC_DATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
+	    arc_state_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
+	    arc_state_multilist_index_func, &num_sublists);
+
+	/*
+	 * L2 headers should never be on the L2 state list since they don't
+	 * have L1 headers allocated.  Special index function asserts that.
+	 */
+	arc_state_multilist_init(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
+	    arc_state_l2c_multilist_index_func, &num_sublists);
+	arc_state_multilist_init(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
+	    arc_state_l2c_multilist_index_func, &num_sublists);
+
+	/*
+	 * Keep track of the number of markers needed to reclaim buffers from
+	 * any ARC state.  The markers will be pre-allocated so as to minimize
+	 * the number of memory allocations performed by the eviction thread.
+	 */
+	arc_state_evict_marker_count = num_sublists;
 
 	zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
 	zfs_refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
@@ -7749,15 +7716,93 @@
 	zfs_refcount_create(&arc_mfu_ghost->arcs_size);
 	zfs_refcount_create(&arc_l2c_only->arcs_size);
 
-	aggsum_init(&arc_meta_used, 0);
-	aggsum_init(&arc_size, 0);
-	aggsum_init(&astat_data_size, 0);
-	aggsum_init(&astat_metadata_size, 0);
-	aggsum_init(&astat_hdr_size, 0);
-	aggsum_init(&astat_l2_hdr_size, 0);
-	aggsum_init(&astat_bonus_size, 0);
-	aggsum_init(&astat_dnode_size, 0);
-	aggsum_init(&astat_dbuf_size, 0);
+	wmsum_init(&arc_sums.arcstat_hits, 0);
+	wmsum_init(&arc_sums.arcstat_misses, 0);
+	wmsum_init(&arc_sums.arcstat_demand_data_hits, 0);
+	wmsum_init(&arc_sums.arcstat_demand_data_misses, 0);
+	wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0);
+	wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0);
+	wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0);
+	wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0);
+	wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0);
+	wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0);
+	wmsum_init(&arc_sums.arcstat_mru_hits, 0);
+	wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0);
+	wmsum_init(&arc_sums.arcstat_mfu_hits, 0);
+	wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0);
+	wmsum_init(&arc_sums.arcstat_deleted, 0);
+	wmsum_init(&arc_sums.arcstat_mutex_miss, 0);
+	wmsum_init(&arc_sums.arcstat_access_skip, 0);
+	wmsum_init(&arc_sums.arcstat_evict_skip, 0);
+	wmsum_init(&arc_sums.arcstat_evict_not_enough, 0);
+	wmsum_init(&arc_sums.arcstat_evict_l2_cached, 0);
+	wmsum_init(&arc_sums.arcstat_evict_l2_eligible, 0);
+	wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mfu, 0);
+	wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0);
+	wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0);
+	wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0);
+	wmsum_init(&arc_sums.arcstat_hash_collisions, 0);
+	wmsum_init(&arc_sums.arcstat_hash_chains, 0);
+	aggsum_init(&arc_sums.arcstat_size, 0);
+	wmsum_init(&arc_sums.arcstat_compressed_size, 0);
+	wmsum_init(&arc_sums.arcstat_uncompressed_size, 0);
+	wmsum_init(&arc_sums.arcstat_overhead_size, 0);
+	wmsum_init(&arc_sums.arcstat_hdr_size, 0);
+	wmsum_init(&arc_sums.arcstat_data_size, 0);
+	wmsum_init(&arc_sums.arcstat_metadata_size, 0);
+	wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
+	aggsum_init(&arc_sums.arcstat_dnode_size, 0);
+	wmsum_init(&arc_sums.arcstat_bonus_size, 0);
+	wmsum_init(&arc_sums.arcstat_l2_hits, 0);
+	wmsum_init(&arc_sums.arcstat_l2_misses, 0);
+	wmsum_init(&arc_sums.arcstat_l2_prefetch_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_mru_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_mfu_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_bufc_data_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_bufc_metadata_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_feeds, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rw_clash, 0);
+	wmsum_init(&arc_sums.arcstat_l2_read_bytes, 0);
+	wmsum_init(&arc_sums.arcstat_l2_write_bytes, 0);
+	wmsum_init(&arc_sums.arcstat_l2_writes_sent, 0);
+	wmsum_init(&arc_sums.arcstat_l2_writes_done, 0);
+	wmsum_init(&arc_sums.arcstat_l2_writes_error, 0);
+	wmsum_init(&arc_sums.arcstat_l2_writes_lock_retry, 0);
+	wmsum_init(&arc_sums.arcstat_l2_evict_lock_retry, 0);
+	wmsum_init(&arc_sums.arcstat_l2_evict_reading, 0);
+	wmsum_init(&arc_sums.arcstat_l2_evict_l1cached, 0);
+	wmsum_init(&arc_sums.arcstat_l2_free_on_write, 0);
+	wmsum_init(&arc_sums.arcstat_l2_abort_lowmem, 0);
+	wmsum_init(&arc_sums.arcstat_l2_cksum_bad, 0);
+	wmsum_init(&arc_sums.arcstat_l2_io_error, 0);
+	wmsum_init(&arc_sums.arcstat_l2_lsize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_psize, 0);
+	aggsum_init(&arc_sums.arcstat_l2_hdr_size, 0);
+	wmsum_init(&arc_sums.arcstat_l2_log_blk_writes, 0);
+	wmsum_init(&arc_sums.arcstat_l2_log_blk_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_log_blk_count, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_success, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_unsupported, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_io_errors, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_dh_errors, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_lowmem, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_size, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_asize, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs_precached, 0);
+	wmsum_init(&arc_sums.arcstat_l2_rebuild_log_blks, 0);
+	wmsum_init(&arc_sums.arcstat_memory_throttle_count, 0);
+	wmsum_init(&arc_sums.arcstat_memory_direct_count, 0);
+	wmsum_init(&arc_sums.arcstat_memory_indirect_count, 0);
+	wmsum_init(&arc_sums.arcstat_prune, 0);
+	aggsum_init(&arc_sums.arcstat_meta_used, 0);
+	wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0);
+	wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0);
+	wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0);
+	wmsum_init(&arc_sums.arcstat_raw_size, 0);
+	wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0);
+	wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0);
 
 	arc_anon->arcs_state = ARC_STATE_ANON;
 	arc_mru->arcs_state = ARC_STATE_MRU;
@@ -7790,26 +7835,104 @@
 	zfs_refcount_destroy(&arc_mfu_ghost->arcs_size);
 	zfs_refcount_destroy(&arc_l2c_only->arcs_size);
 
-	multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
 
-	aggsum_fini(&arc_meta_used);
-	aggsum_fini(&arc_size);
-	aggsum_fini(&astat_data_size);
-	aggsum_fini(&astat_metadata_size);
-	aggsum_fini(&astat_hdr_size);
-	aggsum_fini(&astat_l2_hdr_size);
-	aggsum_fini(&astat_bonus_size);
-	aggsum_fini(&astat_dnode_size);
-	aggsum_fini(&astat_dbuf_size);
+	wmsum_fini(&arc_sums.arcstat_hits);
+	wmsum_fini(&arc_sums.arcstat_misses);
+	wmsum_fini(&arc_sums.arcstat_demand_data_hits);
+	wmsum_fini(&arc_sums.arcstat_demand_data_misses);
+	wmsum_fini(&arc_sums.arcstat_demand_metadata_hits);
+	wmsum_fini(&arc_sums.arcstat_demand_metadata_misses);
+	wmsum_fini(&arc_sums.arcstat_prefetch_data_hits);
+	wmsum_fini(&arc_sums.arcstat_prefetch_data_misses);
+	wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits);
+	wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses);
+	wmsum_fini(&arc_sums.arcstat_mru_hits);
+	wmsum_fini(&arc_sums.arcstat_mru_ghost_hits);
+	wmsum_fini(&arc_sums.arcstat_mfu_hits);
+	wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits);
+	wmsum_fini(&arc_sums.arcstat_deleted);
+	wmsum_fini(&arc_sums.arcstat_mutex_miss);
+	wmsum_fini(&arc_sums.arcstat_access_skip);
+	wmsum_fini(&arc_sums.arcstat_evict_skip);
+	wmsum_fini(&arc_sums.arcstat_evict_not_enough);
+	wmsum_fini(&arc_sums.arcstat_evict_l2_cached);
+	wmsum_fini(&arc_sums.arcstat_evict_l2_eligible);
+	wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mfu);
+	wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru);
+	wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible);
+	wmsum_fini(&arc_sums.arcstat_evict_l2_skip);
+	wmsum_fini(&arc_sums.arcstat_hash_collisions);
+	wmsum_fini(&arc_sums.arcstat_hash_chains);
+	aggsum_fini(&arc_sums.arcstat_size);
+	wmsum_fini(&arc_sums.arcstat_compressed_size);
+	wmsum_fini(&arc_sums.arcstat_uncompressed_size);
+	wmsum_fini(&arc_sums.arcstat_overhead_size);
+	wmsum_fini(&arc_sums.arcstat_hdr_size);
+	wmsum_fini(&arc_sums.arcstat_data_size);
+	wmsum_fini(&arc_sums.arcstat_metadata_size);
+	wmsum_fini(&arc_sums.arcstat_dbuf_size);
+	aggsum_fini(&arc_sums.arcstat_dnode_size);
+	wmsum_fini(&arc_sums.arcstat_bonus_size);
+	wmsum_fini(&arc_sums.arcstat_l2_hits);
+	wmsum_fini(&arc_sums.arcstat_l2_misses);
+	wmsum_fini(&arc_sums.arcstat_l2_prefetch_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_mru_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_mfu_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_bufc_data_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_bufc_metadata_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_feeds);
+	wmsum_fini(&arc_sums.arcstat_l2_rw_clash);
+	wmsum_fini(&arc_sums.arcstat_l2_read_bytes);
+	wmsum_fini(&arc_sums.arcstat_l2_write_bytes);
+	wmsum_fini(&arc_sums.arcstat_l2_writes_sent);
+	wmsum_fini(&arc_sums.arcstat_l2_writes_done);
+	wmsum_fini(&arc_sums.arcstat_l2_writes_error);
+	wmsum_fini(&arc_sums.arcstat_l2_writes_lock_retry);
+	wmsum_fini(&arc_sums.arcstat_l2_evict_lock_retry);
+	wmsum_fini(&arc_sums.arcstat_l2_evict_reading);
+	wmsum_fini(&arc_sums.arcstat_l2_evict_l1cached);
+	wmsum_fini(&arc_sums.arcstat_l2_free_on_write);
+	wmsum_fini(&arc_sums.arcstat_l2_abort_lowmem);
+	wmsum_fini(&arc_sums.arcstat_l2_cksum_bad);
+	wmsum_fini(&arc_sums.arcstat_l2_io_error);
+	wmsum_fini(&arc_sums.arcstat_l2_lsize);
+	wmsum_fini(&arc_sums.arcstat_l2_psize);
+	aggsum_fini(&arc_sums.arcstat_l2_hdr_size);
+	wmsum_fini(&arc_sums.arcstat_l2_log_blk_writes);
+	wmsum_fini(&arc_sums.arcstat_l2_log_blk_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_log_blk_count);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_success);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_size);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_asize);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs_precached);
+	wmsum_fini(&arc_sums.arcstat_l2_rebuild_log_blks);
+	wmsum_fini(&arc_sums.arcstat_memory_throttle_count);
+	wmsum_fini(&arc_sums.arcstat_memory_direct_count);
+	wmsum_fini(&arc_sums.arcstat_memory_indirect_count);
+	wmsum_fini(&arc_sums.arcstat_prune);
+	aggsum_fini(&arc_sums.arcstat_meta_used);
+	wmsum_fini(&arc_sums.arcstat_async_upgrade_sync);
+	wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch);
+	wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch);
+	wmsum_fini(&arc_sums.arcstat_raw_size);
+	wmsum_fini(&arc_sums.arcstat_cached_only_in_progress);
+	wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size);
 }
 
 uint64_t
@@ -7819,34 +7942,47 @@
 }
 
 void
+arc_set_limits(uint64_t allmem)
+{
+	/* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
+	arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
+
+	/* How to set default max varies by platform. */
+	arc_c_max = arc_default_max(arc_c_min, allmem);
+}
+void
 arc_init(void)
 {
 	uint64_t percent, allmem = arc_all_memory();
-	mutex_init(&arc_adjust_lock, NULL, MUTEX_DEFAULT, NULL);
-	cv_init(&arc_adjust_waiters_cv, NULL, CV_DEFAULT, NULL);
+	mutex_init(&arc_evict_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&arc_evict_waiters, sizeof (arc_evict_waiter_t),
+	    offsetof(arc_evict_waiter_t, aew_node));
 
 	arc_min_prefetch_ms = 1000;
 	arc_min_prescient_prefetch_ms = 6000;
 
-#ifdef _KERNEL
-	/*
-	 * Register a shrinker to support synchronous (direct) memory
-	 * reclaim from the arc.  This is done to prevent kswapd from
-	 * swapping out pages when it is preferable to shrink the arc.
-	 */
-	spl_register_shrinker(&arc_shrinker);
-
-	/* Set to 1/64 of all memory or a minimum of 512K */
-	arc_sys_free = MAX(allmem / 64, (512 * 1024));
-	arc_need_free = 0;
+#if defined(_KERNEL)
+	arc_lowmem_init();
 #endif
 
-	/* Set max to 1/2 of all memory */
-	arc_c_max = allmem / 2;
+	arc_set_limits(allmem);
 
-#ifdef	_KERNEL
-	/* Set min cache to 1/32 of all memory, or 32MB, whichever is more */
-	arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
+#ifdef _KERNEL
+	/*
+	 * If zfs_arc_max is non-zero at init, meaning it was set in the kernel
+	 * environment before the module was loaded, don't block setting the
+	 * maximum because it is less than arc_c_min, instead, reset arc_c_min
+	 * to a lower value.
+	 * zfs_arc_min will be handled by arc_tuning_update().
+	 */
+	if (zfs_arc_max != 0 && zfs_arc_max >= MIN_ARC_MAX &&
+	    zfs_arc_max < allmem) {
+		arc_c_max = zfs_arc_max;
+		if (arc_c_min >= arc_c_max) {
+			arc_c_min = MAX(zfs_arc_max / 2,
+			    2ULL << SPA_MAXBLOCKSHIFT);
+		}
+	}
 #else
 	/*
 	 * In userland, there's only the memory pressure that we artificially
@@ -7857,13 +7993,11 @@
 	arc_c_min = MAX(arc_c_max / 2, 2ULL << SPA_MAXBLOCKSHIFT);
 #endif
 
-	arc_c = arc_c_max;
+	arc_c = arc_c_min;
 	arc_p = (arc_c >> 1);
 
 	/* Set min to 1/2 of arc_c_min */
 	arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
-	/* Initialize maximum observed usage to zero */
-	arc_meta_max = 0;
 	/*
 	 * Set arc_meta_limit to a percent of arc_c_max with a floor of
 	 * arc_meta_min, and a ceiling of arc_c_max.
@@ -7871,10 +8005,10 @@
 	percent = MIN(zfs_arc_meta_limit_percent, 100);
 	arc_meta_limit = MAX(arc_meta_min, (percent * arc_c_max) / 100);
 	percent = MIN(zfs_arc_dnode_limit_percent, 100);
-	arc_dnode_limit = (percent * arc_meta_limit) / 100;
+	arc_dnode_size_limit = (percent * arc_meta_limit) / 100;
 
 	/* Apply user specified tunings */
-	arc_tuning_update();
+	arc_tuning_update(B_TRUE);
 
 	/* if kmem_flags are set, lets try to use less memory */
 	if (kmem_debugging())
@@ -7882,22 +8016,18 @@
 	if (arc_c < arc_c_min)
 		arc_c = arc_c_min;
 
+	arc_register_hotplug();
+
 	arc_state_init();
 
-	/*
-	 * The arc must be "uninitialized", so that hdr_recl() (which is
-	 * registered by buf_init()) will not access arc_reap_zthr before
-	 * it is created.
-	 */
-	ASSERT(!arc_initialized);
 	buf_init();
 
 	list_create(&arc_prune_list, sizeof (arc_prune_t),
 	    offsetof(arc_prune_t, p_node));
 	mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
 
-	arc_prune_taskq = taskq_create("arc_prune", boot_ncpus, defclsyspri,
-	    boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+	arc_prune_taskq = taskq_create("arc_prune", zfs_arc_prune_task_threads,
+	    defclsyspri, 100, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
 	arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
 	    sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
@@ -7908,12 +8038,13 @@
 		kstat_install(arc_ksp);
 	}
 
-	arc_adjust_zthr = zthr_create(arc_adjust_cb_check,
-	    arc_adjust_cb, NULL);
-	arc_reap_zthr = zthr_create_timer(arc_reap_cb_check,
-	    arc_reap_cb, NULL, SEC2NSEC(1));
+	arc_state_evict_markers =
+	    arc_state_alloc_markers(arc_state_evict_marker_count);
+	arc_evict_zthr = zthr_create("arc_evict",
+	    arc_evict_cb_check, arc_evict_cb, NULL, defclsyspri);
+	arc_reap_zthr = zthr_create_timer("arc_reap",
+	    arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1), minclsyspri);
 
-	arc_initialized = B_TRUE;
 	arc_warm = B_FALSE;
 
 	/*
@@ -7924,9 +8055,15 @@
 	 * zfs_dirty_data_max_percent (default 10%) with a cap at
 	 * zfs_dirty_data_max_max (default 4G or 25% of physical memory).
 	 */
+#ifdef __LP64__
 	if (zfs_dirty_data_max_max == 0)
 		zfs_dirty_data_max_max = MIN(4ULL * 1024 * 1024 * 1024,
 		    allmem * zfs_dirty_data_max_max_percent / 100);
+#else
+	if (zfs_dirty_data_max_max == 0)
+		zfs_dirty_data_max_max = MIN(1ULL * 1024 * 1024 * 1024,
+		    allmem * zfs_dirty_data_max_max_percent / 100);
+#endif
 
 	if (zfs_dirty_data_max == 0) {
 		zfs_dirty_data_max = allmem *
@@ -7934,6 +8071,18 @@
 		zfs_dirty_data_max = MIN(zfs_dirty_data_max,
 		    zfs_dirty_data_max_max);
 	}
+
+	if (zfs_wrlog_data_max == 0) {
+
+		/*
+		 * dp_wrlog_total is reduced for each txg at the end of
+		 * spa_sync(). However, dp_dirty_total is reduced every time
+		 * a block is written out. Thus under normal operation,
+		 * dp_wrlog_total could grow 2 times as big as
+		 * zfs_dirty_data_max.
+		 */
+		zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+	}
 }
 
 void
@@ -7942,14 +8091,12 @@
 	arc_prune_t *p;
 
 #ifdef _KERNEL
-	spl_unregister_shrinker(&arc_shrinker);
+	arc_lowmem_fini();
 #endif /* _KERNEL */
 
 	/* Use B_TRUE to ensure *all* buffers are evicted */
 	arc_flush(NULL, B_TRUE);
 
-	arc_initialized = B_FALSE;
-
 	if (arc_ksp != NULL) {
 		kstat_delete(arc_ksp);
 		arc_ksp = NULL;
@@ -7970,11 +8117,20 @@
 	list_destroy(&arc_prune_list);
 	mutex_destroy(&arc_prune_mtx);
 
-	(void) zthr_cancel(arc_adjust_zthr);
+	(void) zthr_cancel(arc_evict_zthr);
 	(void) zthr_cancel(arc_reap_zthr);
+	arc_state_free_markers(arc_state_evict_markers,
+	    arc_state_evict_marker_count);
 
-	mutex_destroy(&arc_adjust_lock);
-	cv_destroy(&arc_adjust_waiters_cv);
+	mutex_destroy(&arc_evict_lock);
+	list_destroy(&arc_evict_waiters);
+
+	/*
+	 * Free any buffers that were tagged for destruction.  This needs
+	 * to occur before arc_state_fini() runs and destroys the aggsum
+	 * values which are updated when freeing scatter ABDs.
+	 */
+	l2arc_do_free_on_write();
 
 	/*
 	 * buf_fini() must proceed arc_state_fini() because buf_fin() may
@@ -7984,12 +8140,14 @@
 	buf_fini();
 	arc_state_fini();
 
+	arc_unregister_hotplug();
+
 	/*
 	 * We destroy the zthrs after all the ARC state has been
 	 * torn down to avoid the case of them receiving any
 	 * wakeup() signals after they are destroyed.
 	 */
-	zthr_destroy(arc_adjust_zthr);
+	zthr_destroy(arc_evict_zthr);
 	zthr_destroy(arc_reap_zthr);
 
 	ASSERT0(arc_loaned_bytes);
@@ -8138,6 +8296,103 @@
  *
  * These three functions determine what to write, how much, and how quickly
  * to send writes.
+ *
+ * L2ARC persistence:
+ *
+ * When writing buffers to L2ARC, we periodically add some metadata to
+ * make sure we can pick them up after reboot, thus dramatically reducing
+ * the impact that any downtime has on the performance of storage systems
+ * with large caches.
+ *
+ * The implementation works fairly simply by integrating the following two
+ * modifications:
+ *
+ * *) When writing to the L2ARC, we occasionally write a "l2arc log block",
+ *    which is an additional piece of metadata which describes what's been
+ *    written. This allows us to rebuild the arc_buf_hdr_t structures of the
+ *    main ARC buffers. There are 2 linked-lists of log blocks headed by
+ *    dh_start_lbps[2]. We alternate which chain we append to, so they are
+ *    time-wise and offset-wise interleaved, but that is an optimization rather
+ *    than for correctness. The log block also includes a pointer to the
+ *    previous block in its chain.
+ *
+ * *) We reserve SPA_MINBLOCKSIZE of space at the start of each L2ARC device
+ *    for our header bookkeeping purposes. This contains a device header,
+ *    which contains our top-level reference structures. We update it each
+ *    time we write a new log block, so that we're able to locate it in the
+ *    L2ARC device. If this write results in an inconsistent device header
+ *    (e.g. due to power failure), we detect this by verifying the header's
+ *    checksum and simply fail to reconstruct the L2ARC after reboot.
+ *
+ * Implementation diagram:
+ *
+ * +=== L2ARC device (not to scale) ======================================+
+ * |       ___two newest log block pointers__.__________                  |
+ * |      /                                   \dh_start_lbps[1]           |
+ * |	 /				       \         \dh_start_lbps[0]|
+ * |.___/__.                                    V         V               |
+ * ||L2 dev|....|lb |bufs |lb |bufs |lb |bufs |lb |bufs |lb |---(empty)---|
+ * ||   hdr|      ^         /^       /^        /         /                |
+ * |+------+  ...--\-------/  \-----/--\------/         /                 |
+ * |                \--------------/    \--------------/                  |
+ * +======================================================================+
+ *
+ * As can be seen on the diagram, rather than using a simple linked list,
+ * we use a pair of linked lists with alternating elements. This is a
+ * performance enhancement due to the fact that we only find out the
+ * address of the next log block access once the current block has been
+ * completely read in. Obviously, this hurts performance, because we'd be
+ * keeping the device's I/O queue at only a 1 operation deep, thus
+ * incurring a large amount of I/O round-trip latency. Having two lists
+ * allows us to fetch two log blocks ahead of where we are currently
+ * rebuilding L2ARC buffers.
+ *
+ * On-device data structures:
+ *
+ * L2ARC device header:	l2arc_dev_hdr_phys_t
+ * L2ARC log block:	l2arc_log_blk_phys_t
+ *
+ * L2ARC reconstruction:
+ *
+ * When writing data, we simply write in the standard rotary fashion,
+ * evicting buffers as we go and simply writing new data over them (writing
+ * a new log block every now and then). This obviously means that once we
+ * loop around the end of the device, we will start cutting into an already
+ * committed log block (and its referenced data buffers), like so:
+ *
+ *    current write head__       __old tail
+ *                        \     /
+ *                        V    V
+ * <--|bufs |lb |bufs |lb |    |bufs |lb |bufs |lb |-->
+ *                         ^    ^^^^^^^^^___________________________________
+ *                         |                                                \
+ *                   <<nextwrite>> may overwrite this blk and/or its bufs --'
+ *
+ * When importing the pool, we detect this situation and use it to stop
+ * our scanning process (see l2arc_rebuild).
+ *
+ * There is one significant caveat to consider when rebuilding ARC contents
+ * from an L2ARC device: what about invalidated buffers? Given the above
+ * construction, we cannot update blocks which we've already written to amend
+ * them to remove buffers which were invalidated. Thus, during reconstruction,
+ * we might be populating the cache with buffers for data that's not on the
+ * main pool anymore, or may have been overwritten!
+ *
+ * As it turns out, this isn't a problem. Every arc_read request includes
+ * both the DVA and, crucially, the birth TXG of the BP the caller is
+ * looking for. So even if the cache were populated by completely rotten
+ * blocks for data that had been long deleted and/or overwritten, we'll
+ * never actually return bad data from the cache, since the DVA with the
+ * birth TXG uniquely identify a block in space and time - once created,
+ * a block is immutable on disk. The worst thing we have done is wasted
+ * some time and memory at l2arc rebuild to reconstruct outdated ARC
+ * entries that will get dropped from the l2arc as it is being updated
+ * with new blocks.
+ *
+ * L2ARC buffers that have been evicted by l2arc_evict() ahead of the write
+ * hand are not restored. This is done by saving the offset (in bytes)
+ * l2arc_evict() has evicted to in the L2ARC device header and taking it
+ * into account when restoring buffers.
  */
 
 static boolean_t
@@ -8158,7 +8413,7 @@
 }
 
 static uint64_t
-l2arc_write_size(void)
+l2arc_write_size(l2arc_dev_t *dev)
 {
 	uint64_t size;
 
@@ -8177,6 +8432,46 @@
 	if (arc_warm == B_FALSE)
 		size += l2arc_write_boost;
 
+	/* We need to add in the worst case scenario of log block overhead. */
+	size += l2arc_log_blk_overhead(size, dev);
+	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
+		/*
+		 * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
+		 * times the writesize, whichever is greater.
+		 */
+		size += MAX(64 * 1024 * 1024,
+		    (size * l2arc_trim_ahead) / 100);
+	}
+
+	/*
+	 * Make sure the write size does not exceed the size of the cache
+	 * device. This is important in l2arc_evict(), otherwise infinite
+	 * iteration can occur.
+	 */
+	if (size > dev->l2ad_end - dev->l2ad_start) {
+		cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
+		    "plus the overhead of log blocks (persistent L2ARC, "
+		    "%llu bytes) exceeds the size of the cache device "
+		    "(guid %llu), resetting them to the default (%d)",
+		    l2arc_log_blk_overhead(size, dev),
+		    dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
+		size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
+
+		if (l2arc_trim_ahead > 1) {
+			cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
+			l2arc_trim_ahead = 1;
+		}
+
+		if (arc_warm == B_FALSE)
+			size += l2arc_write_boost;
+
+		size += l2arc_log_blk_overhead(size, dev);
+		if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
+			size += MAX(64 * 1024 * 1024,
+			    (size * l2arc_trim_ahead) / 100);
+		}
+	}
+
 	return (size);
 
 }
@@ -8242,10 +8537,12 @@
 		else if (next == first)
 			break;
 
-	} while (vdev_is_dead(next->l2ad_vdev));
+	} while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
+	    next->l2ad_trim_all);
 
 	/* if we were unable to find any usable vdevs, return NULL */
-	if (vdev_is_dead(next->l2ad_vdev))
+	if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild ||
+	    next->l2ad_trim_all)
 		next = NULL;
 
 	l2arc_dev_last = next;
@@ -8294,16 +8591,20 @@
 static void
 l2arc_write_done(zio_t *zio)
 {
-	l2arc_write_callback_t *cb;
-	l2arc_dev_t *dev;
-	list_t *buflist;
-	arc_buf_hdr_t *head, *hdr, *hdr_prev;
-	kmutex_t *hash_lock;
-	int64_t bytes_dropped = 0;
+	l2arc_write_callback_t	*cb;
+	l2arc_lb_abd_buf_t	*abd_buf;
+	l2arc_lb_ptr_buf_t	*lb_ptr_buf;
+	l2arc_dev_t		*dev;
+	l2arc_dev_hdr_phys_t	*l2dhdr;
+	list_t			*buflist;
+	arc_buf_hdr_t		*head, *hdr, *hdr_prev;
+	kmutex_t		*hash_lock;
+	int64_t			bytes_dropped = 0;
 
 	cb = zio->io_private;
 	ASSERT3P(cb, !=, NULL);
 	dev = cb->l2wcb_dev;
+	l2dhdr = dev->l2ad_dev_hdr;
 	ASSERT3P(dev, !=, NULL);
 	head = cb->l2wcb_head;
 	ASSERT3P(head, !=, NULL);
@@ -8312,9 +8613,6 @@
 	DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
 	    l2arc_write_callback_t *, cb);
 
-	if (zio->io_error != 0)
-		ARCSTAT_BUMP(arcstat_l2_writes_error);
-
 	/*
 	 * All writes completed, or an error was hit.
 	 */
@@ -8378,8 +8676,7 @@
 			arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
 
 			uint64_t psize = HDR_GET_PSIZE(hdr);
-			ARCSTAT_INCR(arcstat_l2_psize, -psize);
-			ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
+			l2arc_hdr_arcstats_decrement(hdr);
 
 			bytes_dropped +=
 			    vdev_psize_to_asize(dev->l2ad_vdev, psize);
@@ -8396,12 +8693,74 @@
 		mutex_exit(hash_lock);
 	}
 
-	atomic_inc_64(&l2arc_writes_done);
+	/*
+	 * Free the allocated abd buffers for writing the log blocks.
+	 * If the zio failed reclaim the allocated space and remove the
+	 * pointers to these log blocks from the log block pointer list
+	 * of the L2ARC device.
+	 */
+	while ((abd_buf = list_remove_tail(&cb->l2wcb_abd_list)) != NULL) {
+		abd_free(abd_buf->abd);
+		zio_buf_free(abd_buf, sizeof (*abd_buf));
+		if (zio->io_error != 0) {
+			lb_ptr_buf = list_remove_head(&dev->l2ad_lbptr_list);
+			/*
+			 * L2BLK_GET_PSIZE returns aligned size for log
+			 * blocks.
+			 */
+			uint64_t asize =
+			    L2BLK_GET_PSIZE((lb_ptr_buf->lb_ptr)->lbp_prop);
+			bytes_dropped += asize;
+			ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
+			ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
+			zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
+			    lb_ptr_buf);
+			zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf);
+			kmem_free(lb_ptr_buf->lb_ptr,
+			    sizeof (l2arc_log_blkptr_t));
+			kmem_free(lb_ptr_buf, sizeof (l2arc_lb_ptr_buf_t));
+		}
+	}
+	list_destroy(&cb->l2wcb_abd_list);
+
+	if (zio->io_error != 0) {
+		ARCSTAT_BUMP(arcstat_l2_writes_error);
+
+		/*
+		 * Restore the lbps array in the header to its previous state.
+		 * If the list of log block pointers is empty, zero out the
+		 * log block pointers in the device header.
+		 */
+		lb_ptr_buf = list_head(&dev->l2ad_lbptr_list);
+		for (int i = 0; i < 2; i++) {
+			if (lb_ptr_buf == NULL) {
+				/*
+				 * If the list is empty zero out the device
+				 * header. Otherwise zero out the second log
+				 * block pointer in the header.
+				 */
+				if (i == 0) {
+					bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+				} else {
+					bzero(&l2dhdr->dh_start_lbps[i],
+					    sizeof (l2arc_log_blkptr_t));
+				}
+				break;
+			}
+			bcopy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[i],
+			    sizeof (l2arc_log_blkptr_t));
+			lb_ptr_buf = list_next(&dev->l2ad_lbptr_list,
+			    lb_ptr_buf);
+		}
+	}
+
+	ARCSTAT_BUMP(arcstat_l2_writes_done);
 	list_remove(buflist, head);
 	ASSERT(!HDR_HAS_L1HDR(head));
 	kmem_cache_free(hdr_l2only_cache, head);
 	mutex_exit(&dev->l2ad_mtx);
 
+	ASSERT(dev->l2ad_vdev != NULL);
 	vdev_space_update(dev->l2ad_vdev, -bytes_dropped, 0, 0);
 
 	l2arc_do_free_on_write();
@@ -8437,7 +8796,7 @@
 	 */
 	if (BP_IS_ENCRYPTED(bp)) {
 		abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
-		    B_TRUE);
+		    ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
 
 		zio_crypt_decode_params_bp(bp, salt, iv);
 		zio_crypt_decode_mac_bp(bp, mac);
@@ -8474,12 +8833,12 @@
 	if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
 	    !HDR_COMPRESSION_ENABLED(hdr)) {
 		abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
-		    B_TRUE);
+		    ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
 		void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
 
 		ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
 		    hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
-		    HDR_GET_LSIZE(hdr));
+		    HDR_GET_LSIZE(hdr), &hdr->b_complevel);
 		if (ret != 0) {
 			abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
 			arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr);
@@ -8578,6 +8937,7 @@
 	    (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
 	zio->io_bp_copy = cb->l2rcb_bp;	/* XXX fix in L2ARC 2.0	*/
 	zio->io_bp = &zio->io_bp_copy;	/* XXX fix in L2ARC 2.0	*/
+	zio->io_prop.zp_complevel = hdr->b_complevel;
 
 	valid_cksum = arc_cksum_is_equal(hdr, zio);
 
@@ -8663,16 +9023,16 @@
 
 	switch (list_num) {
 	case 0:
-		ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
+		ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
 		break;
 	case 1:
-		ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
+		ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
 		break;
 	case 2:
-		ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
+		ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
 		break;
 	case 3:
-		ml = arc_mru->arcs_list[ARC_BUFC_DATA];
+		ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
 		break;
 	default:
 		return (NULL);
@@ -8689,8 +9049,31 @@
 }
 
 /*
+ * Calculates the maximum overhead of L2ARC metadata log blocks for a given
+ * L2ARC write size. l2arc_evict and l2arc_write_size need to include this
+ * overhead in processing to make sure there is enough headroom available
+ * when writing buffers.
+ */
+static inline uint64_t
+l2arc_log_blk_overhead(uint64_t write_sz, l2arc_dev_t *dev)
+{
+	if (dev->l2ad_log_entries == 0) {
+		return (0);
+	} else {
+		uint64_t log_entries = write_sz >> SPA_MINBLOCKSHIFT;
+
+		uint64_t log_blocks = (log_entries +
+		    dev->l2ad_log_entries - 1) /
+		    dev->l2ad_log_entries;
+
+		return (vdev_psize_to_asize(dev->l2ad_vdev,
+		    sizeof (l2arc_log_blk_phys_t)) * log_blocks);
+	}
+}
+
+/*
  * Evict buffers from the device write hand to the distance specified in
- * bytes.  This distance may span populated buffers, it may span nothing.
+ * bytes. This distance may span populated buffers, it may span nothing.
  * This is clearing a region on the L2ARC device ready for writing.
  * If the 'all' boolean is set, every buffer is evicted.
  */
@@ -8701,22 +9084,24 @@
 	arc_buf_hdr_t *hdr, *hdr_prev;
 	kmutex_t *hash_lock;
 	uint64_t taddr;
+	l2arc_lb_ptr_buf_t *lb_ptr_buf, *lb_ptr_buf_prev;
+	vdev_t *vd = dev->l2ad_vdev;
+	boolean_t rerun;
 
 	buflist = &dev->l2ad_buflist;
 
-	if (!all && dev->l2ad_first) {
+top:
+	rerun = B_FALSE;
+	if (dev->l2ad_hand + distance > dev->l2ad_end) {
 		/*
-		 * This is the first sweep through the device.  There is
-		 * nothing to evict.
+		 * When there is no space to accommodate upcoming writes,
+		 * evict to the end. Then bump the write and evict hands
+		 * to the start and iterate. This iteration does not
+		 * happen indefinitely as we make sure in
+		 * l2arc_write_size() that when the write hand is reset,
+		 * the write size does not exceed the end of the device.
 		 */
-		return;
-	}
-
-	if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) {
-		/*
-		 * When nearing the end of the device, evict to the end
-		 * before the device write hand jumps to the start.
-		 */
+		rerun = B_TRUE;
 		taddr = dev->l2ad_end;
 	} else {
 		taddr = dev->l2ad_hand + distance;
@@ -8724,8 +9109,90 @@
 	DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
 	    uint64_t, taddr, boolean_t, all);
 
-top:
+	if (!all) {
+		/*
+		 * This check has to be placed after deciding whether to
+		 * iterate (rerun).
+		 */
+		if (dev->l2ad_first) {
+			/*
+			 * This is the first sweep through the device. There is
+			 * nothing to evict. We have already trimmmed the
+			 * whole device.
+			 */
+			goto out;
+		} else {
+			/*
+			 * Trim the space to be evicted.
+			 */
+			if (vd->vdev_has_trim && dev->l2ad_evict < taddr &&
+			    l2arc_trim_ahead > 0) {
+				/*
+				 * We have to drop the spa_config lock because
+				 * vdev_trim_range() will acquire it.
+				 * l2ad_evict already accounts for the label
+				 * size. To prevent vdev_trim_ranges() from
+				 * adding it again, we subtract it from
+				 * l2ad_evict.
+				 */
+				spa_config_exit(dev->l2ad_spa, SCL_L2ARC, dev);
+				vdev_trim_simple(vd,
+				    dev->l2ad_evict - VDEV_LABEL_START_SIZE,
+				    taddr - dev->l2ad_evict);
+				spa_config_enter(dev->l2ad_spa, SCL_L2ARC, dev,
+				    RW_READER);
+			}
+
+			/*
+			 * When rebuilding L2ARC we retrieve the evict hand
+			 * from the header of the device. Of note, l2arc_evict()
+			 * does not actually delete buffers from the cache
+			 * device, but trimming may do so depending on the
+			 * hardware implementation. Thus keeping track of the
+			 * evict hand is useful.
+			 */
+			dev->l2ad_evict = MAX(dev->l2ad_evict, taddr);
+		}
+	}
+
+retry:
 	mutex_enter(&dev->l2ad_mtx);
+	/*
+	 * We have to account for evicted log blocks. Run vdev_space_update()
+	 * on log blocks whose offset (in bytes) is before the evicted offset
+	 * (in bytes) by searching in the list of pointers to log blocks
+	 * present in the L2ARC device.
+	 */
+	for (lb_ptr_buf = list_tail(&dev->l2ad_lbptr_list); lb_ptr_buf;
+	    lb_ptr_buf = lb_ptr_buf_prev) {
+
+		lb_ptr_buf_prev = list_prev(&dev->l2ad_lbptr_list, lb_ptr_buf);
+
+		/* L2BLK_GET_PSIZE returns aligned size for log blocks */
+		uint64_t asize = L2BLK_GET_PSIZE(
+		    (lb_ptr_buf->lb_ptr)->lbp_prop);
+
+		/*
+		 * We don't worry about log blocks left behind (ie
+		 * lbp_payload_start < l2ad_hand) because l2arc_write_buffers()
+		 * will never write more than l2arc_evict() evicts.
+		 */
+		if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) {
+			break;
+		} else {
+			vdev_space_update(vd, -asize, 0, 0);
+			ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
+			ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
+			zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
+			    lb_ptr_buf);
+			zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf);
+			list_remove(&dev->l2ad_lbptr_list, lb_ptr_buf);
+			kmem_free(lb_ptr_buf->lb_ptr,
+			    sizeof (l2arc_log_blkptr_t));
+			kmem_free(lb_ptr_buf, sizeof (l2arc_lb_ptr_buf_t));
+		}
+	}
+
 	for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) {
 		hdr_prev = list_prev(buflist, hdr);
 
@@ -8745,7 +9212,7 @@
 			mutex_exit(&dev->l2ad_mtx);
 			mutex_enter(hash_lock);
 			mutex_exit(hash_lock);
-			goto top;
+			goto retry;
 		}
 
 		/*
@@ -8757,7 +9224,7 @@
 		ASSERT(!HDR_L2_WRITING(hdr));
 		ASSERT(!HDR_L2_WRITE_HEAD(hdr));
 
-		if (!all && (hdr->b_l2hdr.b_daddr >= taddr ||
+		if (!all && (hdr->b_l2hdr.b_daddr >= dev->l2ad_evict ||
 		    hdr->b_l2hdr.b_daddr < dev->l2ad_hand)) {
 			/*
 			 * We've evicted to the target address,
@@ -8794,6 +9261,33 @@
 		mutex_exit(hash_lock);
 	}
 	mutex_exit(&dev->l2ad_mtx);
+
+out:
+	/*
+	 * We need to check if we evict all buffers, otherwise we may iterate
+	 * unnecessarily.
+	 */
+	if (!all && rerun) {
+		/*
+		 * Bump device hand to the device start if it is approaching the
+		 * end. l2arc_evict() has already evicted ahead for this case.
+		 */
+		dev->l2ad_hand = dev->l2ad_start;
+		dev->l2ad_evict = dev->l2ad_start;
+		dev->l2ad_first = B_FALSE;
+		goto top;
+	}
+
+	if (!all) {
+		/*
+		 * In case of cache device removal (all) the following
+		 * assertions may be violated without functional consequences
+		 * as the device is about to be removed.
+		 */
+		ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
+		if (!dev->l2ad_first)
+			ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
+	}
 }
 
 /*
@@ -8847,18 +9341,41 @@
 	}
 
 	if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
-		cabd = abd_alloc_for_io(asize, ismd);
-		tmp = abd_borrow_buf(cabd, asize);
+		/*
+		 * In some cases, we can wind up with size > asize, so
+		 * we need to opt for the larger allocation option here.
+		 *
+		 * (We also need abd_return_buf_copy in all cases because
+		 * it's an ASSERT() to modify the buffer before returning it
+		 * with arc_return_buf(), and all the compressors
+		 * write things before deciding to fail compression in nearly
+		 * every case.)
+		 */
+		cabd = abd_alloc_for_io(size, ismd);
+		tmp = abd_borrow_buf(cabd, size);
 
-		psize = zio_compress_data(compress, to_write, tmp, size);
+		psize = zio_compress_data(compress, to_write, tmp, size,
+		    hdr->b_complevel);
+
+		if (psize >= asize) {
+			psize = HDR_GET_PSIZE(hdr);
+			abd_return_buf_copy(cabd, tmp, size);
+			HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+			to_write = cabd;
+			abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
+			if (psize != asize)
+				abd_zero_off(to_write, psize, asize - psize);
+			goto encrypt;
+		}
 		ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
 		if (psize < asize)
 			bzero((char *)tmp + psize, asize - psize);
 		psize = HDR_GET_PSIZE(hdr);
-		abd_return_buf_copy(cabd, tmp, asize);
+		abd_return_buf_copy(cabd, tmp, size);
 		to_write = cabd;
 	}
 
+encrypt:
 	if (HDR_ENCRYPTED(hdr)) {
 		eabd = abd_alloc_for_io(asize, ismd);
 
@@ -8913,6 +9430,17 @@
 	return (ret);
 }
 
+static void
+l2arc_blk_fetch_done(zio_t *zio)
+{
+	l2arc_read_callback_t *cb;
+
+	cb = zio->io_private;
+	if (cb->l2rcb_abd != NULL)
+		abd_free(cb->l2rcb_abd);
+	kmem_free(cb, sizeof (l2arc_read_callback_t));
+}
+
 /*
  * Find and write ARC buffers to the L2ARC device.
  *
@@ -8922,17 +9450,19 @@
  * state between calls to this function.
  *
  * Returns the number of bytes actually written (which may be smaller than
- * the delta by which the device hand has changed due to alignment).
+ * the delta by which the device hand has changed due to alignment and the
+ * writing of log blocks).
  */
 static uint64_t
 l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 {
-	arc_buf_hdr_t *hdr, *hdr_prev, *head;
-	uint64_t write_asize, write_psize, write_lsize, headroom;
-	boolean_t full;
-	l2arc_write_callback_t *cb;
-	zio_t *pio, *wzio;
-	uint64_t guid = spa_load_guid(spa);
+	arc_buf_hdr_t 		*hdr, *hdr_prev, *head;
+	uint64_t 		write_asize, write_psize, write_lsize, headroom;
+	boolean_t		full;
+	l2arc_write_callback_t	*cb = NULL;
+	zio_t 			*pio, *wzio;
+	uint64_t 		guid = spa_load_guid(spa);
+	l2arc_dev_hdr_phys_t	*l2dhdr = dev->l2ad_dev_hdr;
 
 	ASSERT3P(dev->l2ad_vdev, !=, NULL);
 
@@ -8945,8 +9475,17 @@
 	/*
 	 * Copy buffers for L2ARC writing.
 	 */
-	for (int try = 0; try < L2ARC_FEED_TYPES; try++) {
-		multilist_sublist_t *mls = l2arc_sublist_lock(try);
+	for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
+		/*
+		 * If pass == 1 or 3, we cache MRU metadata and data
+		 * respectively.
+		 */
+		if (l2arc_mfuonly) {
+			if (pass == 1 || pass == 3)
+				continue;
+		}
+
+		multilist_sublist_t *mls = l2arc_sublist_lock(pass);
 		uint64_t passed_sz = 0;
 
 		VERIFY3P(mls, !=, NULL);
@@ -8984,7 +9523,7 @@
 			}
 
 			passed_sz += HDR_GET_LSIZE(hdr);
-			if (passed_sz > headroom) {
+			if (l2arc_headroom != 0 && passed_sz > headroom) {
 				/*
 				 * Searched too far.
 				 */
@@ -9013,7 +9552,13 @@
 			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
 			    psize);
 
-			if ((write_asize + asize) > target_sz) {
+			/*
+			 * If the allocated size of this buffer plus the max
+			 * size for the pending log block exceeds the evicted
+			 * target size, terminate writing buffers for this run.
+			 */
+			if (write_asize + asize +
+			    sizeof (l2arc_log_blk_phys_t) > target_sz) {
 				full = B_TRUE;
 				mutex_exit(hash_lock);
 				break;
@@ -9036,7 +9581,7 @@
 			/*
 			 * If this header has b_rabd, we can use this since it
 			 * must always match the data exactly as it exists on
-			 * disk. Otherwise, the L2ARC can  normally use the
+			 * disk. Otherwise, the L2ARC can normally use the
 			 * hdr's data, but if we're sharing data between the
 			 * hdr and one of its bufs, L2ARC needs its own copy of
 			 * the data so that the ZIO below can't race with the
@@ -9084,6 +9629,13 @@
 				    sizeof (l2arc_write_callback_t), KM_SLEEP);
 				cb->l2wcb_dev = dev;
 				cb->l2wcb_head = head;
+				/*
+				 * Create a list to save allocated abd buffers
+				 * for l2arc_log_blk_commit().
+				 */
+				list_create(&cb->l2wcb_abd_list,
+				    sizeof (l2arc_lb_abd_buf_t),
+				    offsetof(l2arc_lb_abd_buf_t, node));
 				pio = zio_root(spa, l2arc_write_done, cb,
 				    ZIO_FLAG_CANFAIL);
 			}
@@ -9092,6 +9644,8 @@
 			hdr->b_l2hdr.b_hits = 0;
 
 			hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
+			hdr->b_l2hdr.b_arcs_state =
+			    hdr->b_l1hdr.b_state->arcs_state;
 			arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR);
 
 			mutex_enter(&dev->l2ad_mtx);
@@ -9114,11 +9668,26 @@
 			write_psize += psize;
 			write_asize += asize;
 			dev->l2ad_hand += asize;
+			l2arc_hdr_arcstats_increment(hdr);
 			vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
 
 			mutex_exit(hash_lock);
 
-			(void) zio_nowait(wzio);
+			/*
+			 * Append buf info to current log and commit if full.
+			 * arcstat_l2_{size,asize} kstats are updated
+			 * internally.
+			 */
+			if (l2arc_log_blk_insert(dev, hdr)) {
+				/*
+				 * l2ad_hand will be adjusted in
+				 * l2arc_log_blk_commit().
+				 */
+				write_asize +=
+				    l2arc_log_blk_commit(dev, pio, cb);
+			}
+
+			zio_nowait(wzio);
 		}
 
 		multilist_sublist_unlock(mls);
@@ -9132,39 +9701,55 @@
 		ASSERT0(write_lsize);
 		ASSERT(!HDR_HAS_L1HDR(head));
 		kmem_cache_free(hdr_l2only_cache, head);
+
+		/*
+		 * Although we did not write any buffers l2ad_evict may
+		 * have advanced.
+		 */
+		if (dev->l2ad_evict != l2dhdr->dh_evict)
+			l2arc_dev_hdr_update(dev);
+
 		return (0);
 	}
 
+	if (!dev->l2ad_first)
+		ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
+
 	ASSERT3U(write_asize, <=, target_sz);
 	ARCSTAT_BUMP(arcstat_l2_writes_sent);
 	ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize);
-	ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
-	ARCSTAT_INCR(arcstat_l2_psize, write_psize);
-
-	/*
-	 * Bump device hand to the device start if it is approaching the end.
-	 * l2arc_evict() will already have evicted ahead for this case.
-	 */
-	if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
-		dev->l2ad_hand = dev->l2ad_start;
-		dev->l2ad_first = B_FALSE;
-	}
 
 	dev->l2ad_writing = B_TRUE;
 	(void) zio_wait(pio);
 	dev->l2ad_writing = B_FALSE;
 
+	/*
+	 * Update the device header after the zio completes as
+	 * l2arc_write_done() may have updated the memory holding the log block
+	 * pointers in the device header.
+	 */
+	l2arc_dev_hdr_update(dev);
+
 	return (write_asize);
 }
 
+static boolean_t
+l2arc_hdr_limit_reached(void)
+{
+	int64_t s = aggsum_upper_bound(&arc_sums.arcstat_l2_hdr_size);
+
+	return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) ||
+	    (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100));
+}
+
 /*
  * This thread feeds the L2ARC at regular intervals.  This is the beating
  * heart of the L2ARC.
  */
-/* ARGSUSED */
 static void
 l2arc_feed_thread(void *unused)
 {
+	(void) unused;
 	callb_cpr_t cpr;
 	l2arc_dev_t *dev;
 	spa_t *spa;
@@ -9179,7 +9764,7 @@
 	cookie = spl_fstrans_mark();
 	while (l2arc_thread_exit == 0) {
 		CALLB_CPR_SAFE_BEGIN(&cpr);
-		(void) cv_timedwait_sig(&l2arc_feed_thr_cv,
+		(void) cv_timedwait_idle(&l2arc_feed_thr_cv,
 		    &l2arc_feed_thr_lock, next);
 		CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock);
 		next = ddi_get_lbolt() + hz;
@@ -9224,7 +9809,7 @@
 		/*
 		 * Avoid contributing to memory pressure.
 		 */
-		if (arc_reclaim_needed()) {
+		if (l2arc_hdr_limit_reached()) {
 			ARCSTAT_BUMP(arcstat_l2_abort_lowmem);
 			spa_config_exit(spa, SCL_L2ARC, dev);
 			continue;
@@ -9232,7 +9817,7 @@
 
 		ARCSTAT_BUMP(arcstat_l2_feeds);
 
-		size = l2arc_write_size();
+		size = l2arc_write_size(dev);
 
 		/*
 		 * Evict L2ARC buffers that will be overwritten.
@@ -9261,7 +9846,17 @@
 boolean_t
 l2arc_vdev_present(vdev_t *vd)
 {
-	l2arc_dev_t *dev;
+	return (l2arc_vdev_get(vd) != NULL);
+}
+
+/*
+ * Returns the l2arc_dev_t associated with a particular vdev_t or NULL if
+ * the vdev_t isn't an L2ARC device.
+ */
+l2arc_dev_t *
+l2arc_vdev_get(vdev_t *vd)
+{
+	l2arc_dev_t	*dev;
 
 	mutex_enter(&l2arc_dev_mtx);
 	for (dev = list_head(l2arc_dev_list); dev != NULL;
@@ -9271,7 +9866,81 @@
 	}
 	mutex_exit(&l2arc_dev_mtx);
 
-	return (dev != NULL);
+	return (dev);
+}
+
+static void
+l2arc_rebuild_dev(l2arc_dev_t *dev, boolean_t reopen)
+{
+	l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
+	uint64_t l2dhdr_asize = dev->l2ad_dev_hdr_asize;
+	spa_t *spa = dev->l2ad_spa;
+
+	/*
+	 * The L2ARC has to hold at least the payload of one log block for
+	 * them to be restored (persistent L2ARC). The payload of a log block
+	 * depends on the amount of its log entries. We always write log blocks
+	 * with 1022 entries. How many of them are committed or restored depends
+	 * on the size of the L2ARC device. Thus the maximum payload of
+	 * one log block is 1022 * SPA_MAXBLOCKSIZE = 16GB. If the L2ARC device
+	 * is less than that, we reduce the amount of committed and restored
+	 * log entries per block so as to enable persistence.
+	 */
+	if (dev->l2ad_end < l2arc_rebuild_blocks_min_l2size) {
+		dev->l2ad_log_entries = 0;
+	} else {
+		dev->l2ad_log_entries = MIN((dev->l2ad_end -
+		    dev->l2ad_start) >> SPA_MAXBLOCKSHIFT,
+		    L2ARC_LOG_BLK_MAX_ENTRIES);
+	}
+
+	/*
+	 * Read the device header, if an error is returned do not rebuild L2ARC.
+	 */
+	if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) {
+		/*
+		 * If we are onlining a cache device (vdev_reopen) that was
+		 * still present (l2arc_vdev_present()) and rebuild is enabled,
+		 * we should evict all ARC buffers and pointers to log blocks
+		 * and reclaim their space before restoring its contents to
+		 * L2ARC.
+		 */
+		if (reopen) {
+			if (!l2arc_rebuild_enabled) {
+				return;
+			} else {
+				l2arc_evict(dev, 0, B_TRUE);
+				/* start a new log block */
+				dev->l2ad_log_ent_idx = 0;
+				dev->l2ad_log_blk_payload_asize = 0;
+				dev->l2ad_log_blk_payload_start = 0;
+			}
+		}
+		/*
+		 * Just mark the device as pending for a rebuild. We won't
+		 * be starting a rebuild in line here as it would block pool
+		 * import. Instead spa_load_impl will hand that off to an
+		 * async task which will call l2arc_spa_rebuild_start.
+		 */
+		dev->l2ad_rebuild = B_TRUE;
+	} else if (spa_writeable(spa)) {
+		/*
+		 * In this case TRIM the whole device if l2arc_trim_ahead > 0,
+		 * otherwise create a new header. We zero out the memory holding
+		 * the header to reset dh_start_lbps. If we TRIM the whole
+		 * device the new header will be written by
+		 * vdev_trim_l2arc_thread() at the end of the TRIM to update the
+		 * trim_state in the header too. When reading the header, if
+		 * trim_state is not VDEV_TRIM_COMPLETE and l2arc_trim_ahead > 0
+		 * we opt to TRIM the whole device again.
+		 */
+		if (l2arc_trim_ahead > 0) {
+			dev->l2ad_trim_all = B_TRUE;
+		} else {
+			bzero(l2dhdr, l2dhdr_asize);
+			l2arc_dev_hdr_update(dev);
+		}
+	}
 }
 
 /*
@@ -9281,22 +9950,30 @@
 void
 l2arc_add_vdev(spa_t *spa, vdev_t *vd)
 {
-	l2arc_dev_t *adddev;
+	l2arc_dev_t		*adddev;
+	uint64_t		l2dhdr_asize;
 
 	ASSERT(!l2arc_vdev_present(vd));
 
 	/*
 	 * Create a new l2arc device entry.
 	 */
-	adddev = kmem_zalloc(sizeof (l2arc_dev_t), KM_SLEEP);
+	adddev = vmem_zalloc(sizeof (l2arc_dev_t), KM_SLEEP);
 	adddev->l2ad_spa = spa;
 	adddev->l2ad_vdev = vd;
-	adddev->l2ad_start = VDEV_LABEL_START_SIZE;
+	/* leave extra size for an l2arc device header */
+	l2dhdr_asize = adddev->l2ad_dev_hdr_asize =
+	    MAX(sizeof (*adddev->l2ad_dev_hdr), 1 << vd->vdev_ashift);
+	adddev->l2ad_start = VDEV_LABEL_START_SIZE + l2dhdr_asize;
 	adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd);
+	ASSERT3U(adddev->l2ad_start, <, adddev->l2ad_end);
 	adddev->l2ad_hand = adddev->l2ad_start;
+	adddev->l2ad_evict = adddev->l2ad_start;
 	adddev->l2ad_first = B_TRUE;
 	adddev->l2ad_writing = B_FALSE;
+	adddev->l2ad_trim_all = B_FALSE;
 	list_link_init(&adddev->l2ad_node);
+	adddev->l2ad_dev_hdr = kmem_zalloc(l2dhdr_asize, KM_SLEEP);
 
 	mutex_init(&adddev->l2ad_mtx, NULL, MUTEX_DEFAULT, NULL);
 	/*
@@ -9306,8 +9983,26 @@
 	list_create(&adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l2hdr.b_l2node));
 
+	/*
+	 * This is a list of pointers to log blocks that are still present
+	 * on the device.
+	 */
+	list_create(&adddev->l2ad_lbptr_list, sizeof (l2arc_lb_ptr_buf_t),
+	    offsetof(l2arc_lb_ptr_buf_t, node));
+
 	vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
 	zfs_refcount_create(&adddev->l2ad_alloc);
+	zfs_refcount_create(&adddev->l2ad_lb_asize);
+	zfs_refcount_create(&adddev->l2ad_lb_count);
+
+	/*
+	 * Decide if dev is eligible for L2ARC rebuild or whole device
+	 * trimming. This has to happen before the device is added in the
+	 * cache device list and l2arc_dev_mtx is released. Otherwise
+	 * l2arc_feed_thread() might already start writing on the
+	 * device.
+	 */
+	l2arc_rebuild_dev(adddev, B_FALSE);
 
 	/*
 	 * Add device to global list
@@ -9319,29 +10014,64 @@
 }
 
 /*
+ * Decide if a vdev is eligible for L2ARC rebuild, called from vdev_reopen()
+ * in case of onlining a cache device.
+ */
+void
+l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
+{
+	l2arc_dev_t		*dev = NULL;
+
+	dev = l2arc_vdev_get(vd);
+	ASSERT3P(dev, !=, NULL);
+
+	/*
+	 * In contrast to l2arc_add_vdev() we do not have to worry about
+	 * l2arc_feed_thread() invalidating previous content when onlining a
+	 * cache device. The device parameters (l2ad*) are not cleared when
+	 * offlining the device and writing new buffers will not invalidate
+	 * all previous content. In worst case only buffers that have not had
+	 * their log block written to the device will be lost.
+	 * When onlining the cache device (ie offline->online without exporting
+	 * the pool in between) this happens:
+	 * vdev_reopen() -> vdev_open() -> l2arc_rebuild_vdev()
+	 * 			|			|
+	 * 		vdev_is_dead() = B_FALSE	l2ad_rebuild = B_TRUE
+	 * During the time where vdev_is_dead = B_FALSE and until l2ad_rebuild
+	 * is set to B_TRUE we might write additional buffers to the device.
+	 */
+	l2arc_rebuild_dev(dev, reopen);
+}
+
+/*
  * Remove a vdev from the L2ARC.
  */
 void
 l2arc_remove_vdev(vdev_t *vd)
 {
-	l2arc_dev_t *dev, *nextdev, *remdev = NULL;
+	l2arc_dev_t *remdev = NULL;
 
 	/*
 	 * Find the device by vdev
 	 */
-	mutex_enter(&l2arc_dev_mtx);
-	for (dev = list_head(l2arc_dev_list); dev; dev = nextdev) {
-		nextdev = list_next(l2arc_dev_list, dev);
-		if (vd == dev->l2ad_vdev) {
-			remdev = dev;
-			break;
-		}
-	}
+	remdev = l2arc_vdev_get(vd);
 	ASSERT3P(remdev, !=, NULL);
 
 	/*
+	 * Cancel any ongoing or scheduled rebuild.
+	 */
+	mutex_enter(&l2arc_rebuild_thr_lock);
+	if (remdev->l2ad_rebuild_began == B_TRUE) {
+		remdev->l2ad_rebuild_cancel = B_TRUE;
+		while (remdev->l2ad_rebuild == B_TRUE)
+			cv_wait(&l2arc_rebuild_thr_cv, &l2arc_rebuild_thr_lock);
+	}
+	mutex_exit(&l2arc_rebuild_thr_lock);
+
+	/*
 	 * Remove device from global list
 	 */
+	mutex_enter(&l2arc_dev_mtx);
 	list_remove(l2arc_dev_list, remdev);
 	l2arc_dev_last = NULL;		/* may have been invalidated */
 	atomic_dec_64(&l2arc_ndev);
@@ -9352,9 +10082,14 @@
 	 */
 	l2arc_evict(remdev, 0, B_TRUE);
 	list_destroy(&remdev->l2ad_buflist);
+	ASSERT(list_is_empty(&remdev->l2ad_lbptr_list));
+	list_destroy(&remdev->l2ad_lbptr_list);
 	mutex_destroy(&remdev->l2ad_mtx);
 	zfs_refcount_destroy(&remdev->l2ad_alloc);
-	kmem_free(remdev, sizeof (l2arc_dev_t));
+	zfs_refcount_destroy(&remdev->l2ad_lb_asize);
+	zfs_refcount_destroy(&remdev->l2ad_lb_count);
+	kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize);
+	vmem_free(remdev, sizeof (l2arc_dev_t));
 }
 
 void
@@ -9362,11 +10097,11 @@
 {
 	l2arc_thread_exit = 0;
 	l2arc_ndev = 0;
-	l2arc_writes_sent = 0;
-	l2arc_writes_done = 0;
 
 	mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
+	mutex_init(&l2arc_rebuild_thr_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&l2arc_rebuild_thr_cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
 
@@ -9381,16 +10116,10 @@
 void
 l2arc_fini(void)
 {
-	/*
-	 * This is called from dmu_fini(), which is called from spa_fini();
-	 * Because of this, we can assume that all l2arc devices have
-	 * already been removed when the pools themselves were removed.
-	 */
-
-	l2arc_do_free_on_write();
-
 	mutex_destroy(&l2arc_feed_thr_lock);
 	cv_destroy(&l2arc_feed_thr_cv);
+	mutex_destroy(&l2arc_rebuild_thr_lock);
+	cv_destroy(&l2arc_rebuild_thr_cv);
 	mutex_destroy(&l2arc_dev_mtx);
 	mutex_destroy(&l2arc_free_on_write_mtx);
 
@@ -9401,7 +10130,7 @@
 void
 l2arc_start(void)
 {
-	if (!(spa_mode_global & FWRITE))
+	if (!(spa_mode_global & SPA_MODE_WRITE))
 		return;
 
 	(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
@@ -9411,7 +10140,7 @@
 void
 l2arc_stop(void)
 {
-	if (!(spa_mode_global & FWRITE))
+	if (!(spa_mode_global & SPA_MODE_WRITE))
 		return;
 
 	mutex_enter(&l2arc_feed_thr_lock);
@@ -9422,35 +10151,932 @@
 	mutex_exit(&l2arc_feed_thr_lock);
 }
 
-#if defined(_KERNEL)
-static int
-param_set_arc_long(const char *buf, zfs_kernel_param_t *kp)
+/*
+ * Punches out rebuild threads for the L2ARC devices in a spa. This should
+ * be called after pool import from the spa async thread, since starting
+ * these threads directly from spa_import() will make them part of the
+ * "zpool import" context and delay process exit (and thus pool import).
+ */
+void
+l2arc_spa_rebuild_start(spa_t *spa)
 {
-	int error;
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
-	error = param_set_long(buf, kp);
-	if (error < 0)
-		return (SET_ERROR(error));
+	/*
+	 * Locate the spa's l2arc devices and kick off rebuild threads.
+	 */
+	for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+		l2arc_dev_t *dev =
+		    l2arc_vdev_get(spa->spa_l2cache.sav_vdevs[i]);
+		if (dev == NULL) {
+			/* Don't attempt a rebuild if the vdev is UNAVAIL */
+			continue;
+		}
+		mutex_enter(&l2arc_rebuild_thr_lock);
+		if (dev->l2ad_rebuild && !dev->l2ad_rebuild_cancel) {
+			dev->l2ad_rebuild_began = B_TRUE;
+			(void) thread_create(NULL, 0, l2arc_dev_rebuild_thread,
+			    dev, 0, &p0, TS_RUN, minclsyspri);
+		}
+		mutex_exit(&l2arc_rebuild_thr_lock);
+	}
+}
 
-	arc_tuning_update();
+/*
+ * Main entry point for L2ARC rebuilding.
+ */
+static void
+l2arc_dev_rebuild_thread(void *arg)
+{
+	l2arc_dev_t *dev = arg;
+
+	VERIFY(!dev->l2ad_rebuild_cancel);
+	VERIFY(dev->l2ad_rebuild);
+	(void) l2arc_rebuild(dev);
+	mutex_enter(&l2arc_rebuild_thr_lock);
+	dev->l2ad_rebuild_began = B_FALSE;
+	dev->l2ad_rebuild = B_FALSE;
+	mutex_exit(&l2arc_rebuild_thr_lock);
+
+	thread_exit();
+}
+
+/*
+ * This function implements the actual L2ARC metadata rebuild. It:
+ * starts reading the log block chain and restores each block's contents
+ * to memory (reconstructing arc_buf_hdr_t's).
+ *
+ * Operation stops under any of the following conditions:
+ *
+ * 1) We reach the end of the log block chain.
+ * 2) We encounter *any* error condition (cksum errors, io errors)
+ */
+static int
+l2arc_rebuild(l2arc_dev_t *dev)
+{
+	vdev_t			*vd = dev->l2ad_vdev;
+	spa_t			*spa = vd->vdev_spa;
+	int			err = 0;
+	l2arc_dev_hdr_phys_t	*l2dhdr = dev->l2ad_dev_hdr;
+	l2arc_log_blk_phys_t	*this_lb, *next_lb;
+	zio_t			*this_io = NULL, *next_io = NULL;
+	l2arc_log_blkptr_t	lbps[2];
+	l2arc_lb_ptr_buf_t	*lb_ptr_buf;
+	boolean_t		lock_held;
+
+	this_lb = vmem_zalloc(sizeof (*this_lb), KM_SLEEP);
+	next_lb = vmem_zalloc(sizeof (*next_lb), KM_SLEEP);
+
+	/*
+	 * We prevent device removal while issuing reads to the device,
+	 * then during the rebuilding phases we drop this lock again so
+	 * that a spa_unload or device remove can be initiated - this is
+	 * safe, because the spa will signal us to stop before removing
+	 * our device and wait for us to stop.
+	 */
+	spa_config_enter(spa, SCL_L2ARC, vd, RW_READER);
+	lock_held = B_TRUE;
+
+	/*
+	 * Retrieve the persistent L2ARC device state.
+	 * L2BLK_GET_PSIZE returns aligned size for log blocks.
+	 */
+	dev->l2ad_evict = MAX(l2dhdr->dh_evict, dev->l2ad_start);
+	dev->l2ad_hand = MAX(l2dhdr->dh_start_lbps[0].lbp_daddr +
+	    L2BLK_GET_PSIZE((&l2dhdr->dh_start_lbps[0])->lbp_prop),
+	    dev->l2ad_start);
+	dev->l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST);
+
+	vd->vdev_trim_action_time = l2dhdr->dh_trim_action_time;
+	vd->vdev_trim_state = l2dhdr->dh_trim_state;
+
+	/*
+	 * In case the zfs module parameter l2arc_rebuild_enabled is false
+	 * we do not start the rebuild process.
+	 */
+	if (!l2arc_rebuild_enabled)
+		goto out;
+
+	/* Prepare the rebuild process */
+	bcopy(l2dhdr->dh_start_lbps, lbps, sizeof (lbps));
+
+	/* Start the rebuild process */
+	for (;;) {
+		if (!l2arc_log_blkptr_valid(dev, &lbps[0]))
+			break;
+
+		if ((err = l2arc_log_blk_read(dev, &lbps[0], &lbps[1],
+		    this_lb, next_lb, this_io, &next_io)) != 0)
+			goto out;
+
+		/*
+		 * Our memory pressure valve. If the system is running low
+		 * on memory, rather than swamping memory with new ARC buf
+		 * hdrs, we opt not to rebuild the L2ARC. At this point,
+		 * however, we have already set up our L2ARC dev to chain in
+		 * new metadata log blocks, so the user may choose to offline/
+		 * online the L2ARC dev at a later time (or re-import the pool)
+		 * to reconstruct it (when there's less memory pressure).
+		 */
+		if (l2arc_hdr_limit_reached()) {
+			ARCSTAT_BUMP(arcstat_l2_rebuild_abort_lowmem);
+			cmn_err(CE_NOTE, "System running low on memory, "
+			    "aborting L2ARC rebuild.");
+			err = SET_ERROR(ENOMEM);
+			goto out;
+		}
+
+		spa_config_exit(spa, SCL_L2ARC, vd);
+		lock_held = B_FALSE;
+
+		/*
+		 * Now that we know that the next_lb checks out alright, we
+		 * can start reconstruction from this log block.
+		 * L2BLK_GET_PSIZE returns aligned size for log blocks.
+		 */
+		uint64_t asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
+		l2arc_log_blk_restore(dev, this_lb, asize);
+
+		/*
+		 * log block restored, include its pointer in the list of
+		 * pointers to log blocks present in the L2ARC device.
+		 */
+		lb_ptr_buf = kmem_zalloc(sizeof (l2arc_lb_ptr_buf_t), KM_SLEEP);
+		lb_ptr_buf->lb_ptr = kmem_zalloc(sizeof (l2arc_log_blkptr_t),
+		    KM_SLEEP);
+		bcopy(&lbps[0], lb_ptr_buf->lb_ptr,
+		    sizeof (l2arc_log_blkptr_t));
+		mutex_enter(&dev->l2ad_mtx);
+		list_insert_tail(&dev->l2ad_lbptr_list, lb_ptr_buf);
+		ARCSTAT_INCR(arcstat_l2_log_blk_asize, asize);
+		ARCSTAT_BUMP(arcstat_l2_log_blk_count);
+		zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf);
+		zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf);
+		mutex_exit(&dev->l2ad_mtx);
+		vdev_space_update(vd, asize, 0, 0);
+
+		/*
+		 * Protection against loops of log blocks:
+		 *
+		 *				       l2ad_hand  l2ad_evict
+		 *                                         V	      V
+		 * l2ad_start |=======================================| l2ad_end
+		 *             -----|||----|||---|||----|||
+		 *                  (3)    (2)   (1)    (0)
+		 *             ---|||---|||----|||---|||
+		 *		  (7)   (6)    (5)   (4)
+		 *
+		 * In this situation the pointer of log block (4) passes
+		 * l2arc_log_blkptr_valid() but the log block should not be
+		 * restored as it is overwritten by the payload of log block
+		 * (0). Only log blocks (0)-(3) should be restored. We check
+		 * whether l2ad_evict lies in between the payload starting
+		 * offset of the next log block (lbps[1].lbp_payload_start)
+		 * and the payload starting offset of the present log block
+		 * (lbps[0].lbp_payload_start). If true and this isn't the
+		 * first pass, we are looping from the beginning and we should
+		 * stop.
+		 */
+		if (l2arc_range_check_overlap(lbps[1].lbp_payload_start,
+		    lbps[0].lbp_payload_start, dev->l2ad_evict) &&
+		    !dev->l2ad_first)
+			goto out;
+
+		cond_resched();
+		for (;;) {
+			mutex_enter(&l2arc_rebuild_thr_lock);
+			if (dev->l2ad_rebuild_cancel) {
+				dev->l2ad_rebuild = B_FALSE;
+				cv_signal(&l2arc_rebuild_thr_cv);
+				mutex_exit(&l2arc_rebuild_thr_lock);
+				err = SET_ERROR(ECANCELED);
+				goto out;
+			}
+			mutex_exit(&l2arc_rebuild_thr_lock);
+			if (spa_config_tryenter(spa, SCL_L2ARC, vd,
+			    RW_READER)) {
+				lock_held = B_TRUE;
+				break;
+			}
+			/*
+			 * L2ARC config lock held by somebody in writer,
+			 * possibly due to them trying to remove us. They'll
+			 * likely to want us to shut down, so after a little
+			 * delay, we check l2ad_rebuild_cancel and retry
+			 * the lock again.
+			 */
+			delay(1);
+		}
+
+		/*
+		 * Continue with the next log block.
+		 */
+		lbps[0] = lbps[1];
+		lbps[1] = this_lb->lb_prev_lbp;
+		PTR_SWAP(this_lb, next_lb);
+		this_io = next_io;
+		next_io = NULL;
+	}
+
+	if (this_io != NULL)
+		l2arc_log_blk_fetch_abort(this_io);
+out:
+	if (next_io != NULL)
+		l2arc_log_blk_fetch_abort(next_io);
+	vmem_free(this_lb, sizeof (*this_lb));
+	vmem_free(next_lb, sizeof (*next_lb));
+
+	if (!l2arc_rebuild_enabled) {
+		spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+		    "disabled");
+	} else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) > 0) {
+		ARCSTAT_BUMP(arcstat_l2_rebuild_success);
+		spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+		    "successful, restored %llu blocks",
+		    (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
+	} else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) == 0) {
+		/*
+		 * No error but also nothing restored, meaning the lbps array
+		 * in the device header points to invalid/non-present log
+		 * blocks. Reset the header.
+		 */
+		spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+		    "no valid log blocks");
+		bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+		l2arc_dev_hdr_update(dev);
+	} else if (err == ECANCELED) {
+		/*
+		 * In case the rebuild was canceled do not log to spa history
+		 * log as the pool may be in the process of being removed.
+		 */
+		zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks",
+		    (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
+	} else if (err != 0) {
+		spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+		    "aborted, restored %llu blocks",
+		    (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
+	}
+
+	if (lock_held)
+		spa_config_exit(spa, SCL_L2ARC, vd);
+
+	return (err);
+}
+
+/*
+ * Attempts to read the device header on the provided L2ARC device and writes
+ * it to `hdr'. On success, this function returns 0, otherwise the appropriate
+ * error code is returned.
+ */
+static int
+l2arc_dev_hdr_read(l2arc_dev_t *dev)
+{
+	int			err;
+	uint64_t		guid;
+	l2arc_dev_hdr_phys_t	*l2dhdr = dev->l2ad_dev_hdr;
+	const uint64_t		l2dhdr_asize = dev->l2ad_dev_hdr_asize;
+	abd_t 			*abd;
+
+	guid = spa_guid(dev->l2ad_vdev->vdev_spa);
+
+	abd = abd_get_from_buf(l2dhdr, l2dhdr_asize);
+
+	err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
+	    VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
+	    ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
+	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+	    ZIO_FLAG_SPECULATIVE, B_FALSE));
+
+	abd_free(abd);
+
+	if (err != 0) {
+		ARCSTAT_BUMP(arcstat_l2_rebuild_abort_dh_errors);
+		zfs_dbgmsg("L2ARC IO error (%d) while reading device header, "
+		    "vdev guid: %llu", err,
+		    (u_longlong_t)dev->l2ad_vdev->vdev_guid);
+		return (err);
+	}
+
+	if (l2dhdr->dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC))
+		byteswap_uint64_array(l2dhdr, sizeof (*l2dhdr));
+
+	if (l2dhdr->dh_magic != L2ARC_DEV_HDR_MAGIC ||
+	    l2dhdr->dh_spa_guid != guid ||
+	    l2dhdr->dh_vdev_guid != dev->l2ad_vdev->vdev_guid ||
+	    l2dhdr->dh_version != L2ARC_PERSISTENT_VERSION ||
+	    l2dhdr->dh_log_entries != dev->l2ad_log_entries ||
+	    l2dhdr->dh_end != dev->l2ad_end ||
+	    !l2arc_range_check_overlap(dev->l2ad_start, dev->l2ad_end,
+	    l2dhdr->dh_evict) ||
+	    (l2dhdr->dh_trim_state != VDEV_TRIM_COMPLETE &&
+	    l2arc_trim_ahead > 0)) {
+		/*
+		 * Attempt to rebuild a device containing no actual dev hdr
+		 * or containing a header from some other pool or from another
+		 * version of persistent L2ARC.
+		 */
+		ARCSTAT_BUMP(arcstat_l2_rebuild_abort_unsupported);
+		return (SET_ERROR(ENOTSUP));
+	}
 
 	return (0);
 }
 
+/*
+ * Reads L2ARC log blocks from storage and validates their contents.
+ *
+ * This function implements a simple fetcher to make sure that while
+ * we're processing one buffer the L2ARC is already fetching the next
+ * one in the chain.
+ *
+ * The arguments this_lp and next_lp point to the current and next log block
+ * address in the block chain. Similarly, this_lb and next_lb hold the
+ * l2arc_log_blk_phys_t's of the current and next L2ARC blk.
+ *
+ * The `this_io' and `next_io' arguments are used for block fetching.
+ * When issuing the first blk IO during rebuild, you should pass NULL for
+ * `this_io'. This function will then issue a sync IO to read the block and
+ * also issue an async IO to fetch the next block in the block chain. The
+ * fetched IO is returned in `next_io'. On subsequent calls to this
+ * function, pass the value returned in `next_io' from the previous call
+ * as `this_io' and a fresh `next_io' pointer to hold the next fetch IO.
+ * Prior to the call, you should initialize your `next_io' pointer to be
+ * NULL. If no fetch IO was issued, the pointer is left set at NULL.
+ *
+ * On success, this function returns 0, otherwise it returns an appropriate
+ * error code. On error the fetching IO is aborted and cleared before
+ * returning from this function. Therefore, if we return `success', the
+ * caller can assume that we have taken care of cleanup of fetch IOs.
+ */
 static int
-param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)
+l2arc_log_blk_read(l2arc_dev_t *dev,
+    const l2arc_log_blkptr_t *this_lbp, const l2arc_log_blkptr_t *next_lbp,
+    l2arc_log_blk_phys_t *this_lb, l2arc_log_blk_phys_t *next_lb,
+    zio_t *this_io, zio_t **next_io)
 {
-	int error;
+	int		err = 0;
+	zio_cksum_t	cksum;
+	abd_t		*abd = NULL;
+	uint64_t	asize;
 
-	error = param_set_int(buf, kp);
-	if (error < 0)
-		return (SET_ERROR(error));
+	ASSERT(this_lbp != NULL && next_lbp != NULL);
+	ASSERT(this_lb != NULL && next_lb != NULL);
+	ASSERT(next_io != NULL && *next_io == NULL);
+	ASSERT(l2arc_log_blkptr_valid(dev, this_lbp));
 
-	arc_tuning_update();
+	/*
+	 * Check to see if we have issued the IO for this log block in a
+	 * previous run. If not, this is the first call, so issue it now.
+	 */
+	if (this_io == NULL) {
+		this_io = l2arc_log_blk_fetch(dev->l2ad_vdev, this_lbp,
+		    this_lb);
+	}
 
-	return (0);
+	/*
+	 * Peek to see if we can start issuing the next IO immediately.
+	 */
+	if (l2arc_log_blkptr_valid(dev, next_lbp)) {
+		/*
+		 * Start issuing IO for the next log block early - this
+		 * should help keep the L2ARC device busy while we
+		 * decompress and restore this log block.
+		 */
+		*next_io = l2arc_log_blk_fetch(dev->l2ad_vdev, next_lbp,
+		    next_lb);
+	}
+
+	/* Wait for the IO to read this log block to complete */
+	if ((err = zio_wait(this_io)) != 0) {
+		ARCSTAT_BUMP(arcstat_l2_rebuild_abort_io_errors);
+		zfs_dbgmsg("L2ARC IO error (%d) while reading log block, "
+		    "offset: %llu, vdev guid: %llu", err,
+		    (u_longlong_t)this_lbp->lbp_daddr,
+		    (u_longlong_t)dev->l2ad_vdev->vdev_guid);
+		goto cleanup;
+	}
+
+	/*
+	 * Make sure the buffer checks out.
+	 * L2BLK_GET_PSIZE returns aligned size for log blocks.
+	 */
+	asize = L2BLK_GET_PSIZE((this_lbp)->lbp_prop);
+	fletcher_4_native(this_lb, asize, NULL, &cksum);
+	if (!ZIO_CHECKSUM_EQUAL(cksum, this_lbp->lbp_cksum)) {
+		ARCSTAT_BUMP(arcstat_l2_rebuild_abort_cksum_lb_errors);
+		zfs_dbgmsg("L2ARC log block cksum failed, offset: %llu, "
+		    "vdev guid: %llu, l2ad_hand: %llu, l2ad_evict: %llu",
+		    (u_longlong_t)this_lbp->lbp_daddr,
+		    (u_longlong_t)dev->l2ad_vdev->vdev_guid,
+		    (u_longlong_t)dev->l2ad_hand,
+		    (u_longlong_t)dev->l2ad_evict);
+		err = SET_ERROR(ECKSUM);
+		goto cleanup;
+	}
+
+	/* Now we can take our time decoding this buffer */
+	switch (L2BLK_GET_COMPRESS((this_lbp)->lbp_prop)) {
+	case ZIO_COMPRESS_OFF:
+		break;
+	case ZIO_COMPRESS_LZ4:
+		abd = abd_alloc_for_io(asize, B_TRUE);
+		abd_copy_from_buf_off(abd, this_lb, 0, asize);
+		if ((err = zio_decompress_data(
+		    L2BLK_GET_COMPRESS((this_lbp)->lbp_prop),
+		    abd, this_lb, asize, sizeof (*this_lb), NULL)) != 0) {
+			err = SET_ERROR(EINVAL);
+			goto cleanup;
+		}
+		break;
+	default:
+		err = SET_ERROR(EINVAL);
+		goto cleanup;
+	}
+	if (this_lb->lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
+		byteswap_uint64_array(this_lb, sizeof (*this_lb));
+	if (this_lb->lb_magic != L2ARC_LOG_BLK_MAGIC) {
+		err = SET_ERROR(EINVAL);
+		goto cleanup;
+	}
+cleanup:
+	/* Abort an in-flight fetch I/O in case of error */
+	if (err != 0 && *next_io != NULL) {
+		l2arc_log_blk_fetch_abort(*next_io);
+		*next_io = NULL;
+	}
+	if (abd != NULL)
+		abd_free(abd);
+	return (err);
 }
 
+/*
+ * Restores the payload of a log block to ARC. This creates empty ARC hdr
+ * entries which only contain an l2arc hdr, essentially restoring the
+ * buffers to their L2ARC evicted state. This function also updates space
+ * usage on the L2ARC vdev to make sure it tracks restored buffers.
+ */
+static void
+l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb,
+    uint64_t lb_asize)
+{
+	uint64_t	size = 0, asize = 0;
+	uint64_t	log_entries = dev->l2ad_log_entries;
+
+	/*
+	 * Usually arc_adapt() is called only for data, not headers, but
+	 * since we may allocate significant amount of memory here, let ARC
+	 * grow its arc_c.
+	 */
+	arc_adapt(log_entries * HDR_L2ONLY_SIZE, arc_l2c_only);
+
+	for (int i = log_entries - 1; i >= 0; i--) {
+		/*
+		 * Restore goes in the reverse temporal direction to preserve
+		 * correct temporal ordering of buffers in the l2ad_buflist.
+		 * l2arc_hdr_restore also does a list_insert_tail instead of
+		 * list_insert_head on the l2ad_buflist:
+		 *
+		 *		LIST	l2ad_buflist		LIST
+		 *		HEAD  <------ (time) ------	TAIL
+		 * direction	+-----+-----+-----+-----+-----+    direction
+		 * of l2arc <== | buf | buf | buf | buf | buf | ===> of rebuild
+		 * fill		+-----+-----+-----+-----+-----+
+		 *		^				^
+		 *		|				|
+		 *		|				|
+		 *	l2arc_feed_thread		l2arc_rebuild
+		 *	will place new bufs here	restores bufs here
+		 *
+		 * During l2arc_rebuild() the device is not used by
+		 * l2arc_feed_thread() as dev->l2ad_rebuild is set to true.
+		 */
+		size += L2BLK_GET_LSIZE((&lb->lb_entries[i])->le_prop);
+		asize += vdev_psize_to_asize(dev->l2ad_vdev,
+		    L2BLK_GET_PSIZE((&lb->lb_entries[i])->le_prop));
+		l2arc_hdr_restore(&lb->lb_entries[i], dev);
+	}
+
+	/*
+	 * Record rebuild stats:
+	 *	size		Logical size of restored buffers in the L2ARC
+	 *	asize		Aligned size of restored buffers in the L2ARC
+	 */
+	ARCSTAT_INCR(arcstat_l2_rebuild_size, size);
+	ARCSTAT_INCR(arcstat_l2_rebuild_asize, asize);
+	ARCSTAT_INCR(arcstat_l2_rebuild_bufs, log_entries);
+	ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_asize, lb_asize);
+	ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio, asize / lb_asize);
+	ARCSTAT_BUMP(arcstat_l2_rebuild_log_blks);
+}
+
+/*
+ * Restores a single ARC buf hdr from a log entry. The ARC buffer is put
+ * into a state indicating that it has been evicted to L2ARC.
+ */
+static void
+l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev)
+{
+	arc_buf_hdr_t		*hdr, *exists;
+	kmutex_t		*hash_lock;
+	arc_buf_contents_t	type = L2BLK_GET_TYPE((le)->le_prop);
+	uint64_t		asize;
+
+	/*
+	 * Do all the allocation before grabbing any locks, this lets us
+	 * sleep if memory is full and we don't have to deal with failed
+	 * allocations.
+	 */
+	hdr = arc_buf_alloc_l2only(L2BLK_GET_LSIZE((le)->le_prop), type,
+	    dev, le->le_dva, le->le_daddr,
+	    L2BLK_GET_PSIZE((le)->le_prop), le->le_birth,
+	    L2BLK_GET_COMPRESS((le)->le_prop), le->le_complevel,
+	    L2BLK_GET_PROTECTED((le)->le_prop),
+	    L2BLK_GET_PREFETCH((le)->le_prop),
+	    L2BLK_GET_STATE((le)->le_prop));
+	asize = vdev_psize_to_asize(dev->l2ad_vdev,
+	    L2BLK_GET_PSIZE((le)->le_prop));
+
+	/*
+	 * vdev_space_update() has to be called before arc_hdr_destroy() to
+	 * avoid underflow since the latter also calls vdev_space_update().
+	 */
+	l2arc_hdr_arcstats_increment(hdr);
+	vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
+
+	mutex_enter(&dev->l2ad_mtx);
+	list_insert_tail(&dev->l2ad_buflist, hdr);
+	(void) zfs_refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr);
+	mutex_exit(&dev->l2ad_mtx);
+
+	exists = buf_hash_insert(hdr, &hash_lock);
+	if (exists) {
+		/* Buffer was already cached, no need to restore it. */
+		arc_hdr_destroy(hdr);
+		/*
+		 * If the buffer is already cached, check whether it has
+		 * L2ARC metadata. If not, enter them and update the flag.
+		 * This is important is case of onlining a cache device, since
+		 * we previously evicted all L2ARC metadata from ARC.
+		 */
+		if (!HDR_HAS_L2HDR(exists)) {
+			arc_hdr_set_flags(exists, ARC_FLAG_HAS_L2HDR);
+			exists->b_l2hdr.b_dev = dev;
+			exists->b_l2hdr.b_daddr = le->le_daddr;
+			exists->b_l2hdr.b_arcs_state =
+			    L2BLK_GET_STATE((le)->le_prop);
+			mutex_enter(&dev->l2ad_mtx);
+			list_insert_tail(&dev->l2ad_buflist, exists);
+			(void) zfs_refcount_add_many(&dev->l2ad_alloc,
+			    arc_hdr_size(exists), exists);
+			mutex_exit(&dev->l2ad_mtx);
+			l2arc_hdr_arcstats_increment(exists);
+			vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
+		}
+		ARCSTAT_BUMP(arcstat_l2_rebuild_bufs_precached);
+	}
+
+	mutex_exit(hash_lock);
+}
+
+/*
+ * Starts an asynchronous read IO to read a log block. This is used in log
+ * block reconstruction to start reading the next block before we are done
+ * decoding and reconstructing the current block, to keep the l2arc device
+ * nice and hot with read IO to process.
+ * The returned zio will contain a newly allocated memory buffers for the IO
+ * data which should then be freed by the caller once the zio is no longer
+ * needed (i.e. due to it having completed). If you wish to abort this
+ * zio, you should do so using l2arc_log_blk_fetch_abort, which takes
+ * care of disposing of the allocated buffers correctly.
+ */
+static zio_t *
+l2arc_log_blk_fetch(vdev_t *vd, const l2arc_log_blkptr_t *lbp,
+    l2arc_log_blk_phys_t *lb)
+{
+	uint32_t		asize;
+	zio_t			*pio;
+	l2arc_read_callback_t	*cb;
+
+	/* L2BLK_GET_PSIZE returns aligned size for log blocks */
+	asize = L2BLK_GET_PSIZE((lbp)->lbp_prop);
+	ASSERT(asize <= sizeof (l2arc_log_blk_phys_t));
+
+	cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
+	cb->l2rcb_abd = abd_get_from_buf(lb, asize);
+	pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
+	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
+	    ZIO_FLAG_DONT_RETRY);
+	(void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
+	    cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
+	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE));
+
+	return (pio);
+}
+
+/*
+ * Aborts a zio returned from l2arc_log_blk_fetch and frees the data
+ * buffers allocated for it.
+ */
+static void
+l2arc_log_blk_fetch_abort(zio_t *zio)
+{
+	(void) zio_wait(zio);
+}
+
+/*
+ * Creates a zio to update the device header on an l2arc device.
+ */
+void
+l2arc_dev_hdr_update(l2arc_dev_t *dev)
+{
+	l2arc_dev_hdr_phys_t	*l2dhdr = dev->l2ad_dev_hdr;
+	const uint64_t		l2dhdr_asize = dev->l2ad_dev_hdr_asize;
+	abd_t			*abd;
+	int			err;
+
+	VERIFY(spa_config_held(dev->l2ad_spa, SCL_STATE_ALL, RW_READER));
+
+	l2dhdr->dh_magic = L2ARC_DEV_HDR_MAGIC;
+	l2dhdr->dh_version = L2ARC_PERSISTENT_VERSION;
+	l2dhdr->dh_spa_guid = spa_guid(dev->l2ad_vdev->vdev_spa);
+	l2dhdr->dh_vdev_guid = dev->l2ad_vdev->vdev_guid;
+	l2dhdr->dh_log_entries = dev->l2ad_log_entries;
+	l2dhdr->dh_evict = dev->l2ad_evict;
+	l2dhdr->dh_start = dev->l2ad_start;
+	l2dhdr->dh_end = dev->l2ad_end;
+	l2dhdr->dh_lb_asize = zfs_refcount_count(&dev->l2ad_lb_asize);
+	l2dhdr->dh_lb_count = zfs_refcount_count(&dev->l2ad_lb_count);
+	l2dhdr->dh_flags = 0;
+	l2dhdr->dh_trim_action_time = dev->l2ad_vdev->vdev_trim_action_time;
+	l2dhdr->dh_trim_state = dev->l2ad_vdev->vdev_trim_state;
+	if (dev->l2ad_first)
+		l2dhdr->dh_flags |= L2ARC_DEV_HDR_EVICT_FIRST;
+
+	abd = abd_get_from_buf(l2dhdr, l2dhdr_asize);
+
+	err = zio_wait(zio_write_phys(NULL, dev->l2ad_vdev,
+	    VDEV_LABEL_START_SIZE, l2dhdr_asize, abd, ZIO_CHECKSUM_LABEL, NULL,
+	    NULL, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE));
+
+	abd_free(abd);
+
+	if (err != 0) {
+		zfs_dbgmsg("L2ARC IO error (%d) while writing device header, "
+		    "vdev guid: %llu", err,
+		    (u_longlong_t)dev->l2ad_vdev->vdev_guid);
+	}
+}
+
+/*
+ * Commits a log block to the L2ARC device. This routine is invoked from
+ * l2arc_write_buffers when the log block fills up.
+ * This function allocates some memory to temporarily hold the serialized
+ * buffer to be written. This is then released in l2arc_write_done.
+ */
+static uint64_t
+l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
+{
+	l2arc_log_blk_phys_t	*lb = &dev->l2ad_log_blk;
+	l2arc_dev_hdr_phys_t	*l2dhdr = dev->l2ad_dev_hdr;
+	uint64_t		psize, asize;
+	zio_t			*wzio;
+	l2arc_lb_abd_buf_t	*abd_buf;
+	uint8_t			*tmpbuf;
+	l2arc_lb_ptr_buf_t	*lb_ptr_buf;
+
+	VERIFY3S(dev->l2ad_log_ent_idx, ==, dev->l2ad_log_entries);
+
+	tmpbuf = zio_buf_alloc(sizeof (*lb));
+	abd_buf = zio_buf_alloc(sizeof (*abd_buf));
+	abd_buf->abd = abd_get_from_buf(lb, sizeof (*lb));
+	lb_ptr_buf = kmem_zalloc(sizeof (l2arc_lb_ptr_buf_t), KM_SLEEP);
+	lb_ptr_buf->lb_ptr = kmem_zalloc(sizeof (l2arc_log_blkptr_t), KM_SLEEP);
+
+	/* link the buffer into the block chain */
+	lb->lb_prev_lbp = l2dhdr->dh_start_lbps[1];
+	lb->lb_magic = L2ARC_LOG_BLK_MAGIC;
+
+	/*
+	 * l2arc_log_blk_commit() may be called multiple times during a single
+	 * l2arc_write_buffers() call. Save the allocated abd buffers in a list
+	 * so we can free them in l2arc_write_done() later on.
+	 */
+	list_insert_tail(&cb->l2wcb_abd_list, abd_buf);
+
+	/* try to compress the buffer */
+	psize = zio_compress_data(ZIO_COMPRESS_LZ4,
+	    abd_buf->abd, tmpbuf, sizeof (*lb), 0);
+
+	/* a log block is never entirely zero */
+	ASSERT(psize != 0);
+	asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
+	ASSERT(asize <= sizeof (*lb));
+
+	/*
+	 * Update the start log block pointer in the device header to point
+	 * to the log block we're about to write.
+	 */
+	l2dhdr->dh_start_lbps[1] = l2dhdr->dh_start_lbps[0];
+	l2dhdr->dh_start_lbps[0].lbp_daddr = dev->l2ad_hand;
+	l2dhdr->dh_start_lbps[0].lbp_payload_asize =
+	    dev->l2ad_log_blk_payload_asize;
+	l2dhdr->dh_start_lbps[0].lbp_payload_start =
+	    dev->l2ad_log_blk_payload_start;
+	_NOTE(CONSTCOND)
+	L2BLK_SET_LSIZE(
+	    (&l2dhdr->dh_start_lbps[0])->lbp_prop, sizeof (*lb));
+	L2BLK_SET_PSIZE(
+	    (&l2dhdr->dh_start_lbps[0])->lbp_prop, asize);
+	L2BLK_SET_CHECKSUM(
+	    (&l2dhdr->dh_start_lbps[0])->lbp_prop,
+	    ZIO_CHECKSUM_FLETCHER_4);
+	if (asize < sizeof (*lb)) {
+		/* compression succeeded */
+		bzero(tmpbuf + psize, asize - psize);
+		L2BLK_SET_COMPRESS(
+		    (&l2dhdr->dh_start_lbps[0])->lbp_prop,
+		    ZIO_COMPRESS_LZ4);
+	} else {
+		/* compression failed */
+		bcopy(lb, tmpbuf, sizeof (*lb));
+		L2BLK_SET_COMPRESS(
+		    (&l2dhdr->dh_start_lbps[0])->lbp_prop,
+		    ZIO_COMPRESS_OFF);
+	}
+
+	/* checksum what we're about to write */
+	fletcher_4_native(tmpbuf, asize, NULL,
+	    &l2dhdr->dh_start_lbps[0].lbp_cksum);
+
+	abd_free(abd_buf->abd);
+
+	/* perform the write itself */
+	abd_buf->abd = abd_get_from_buf(tmpbuf, sizeof (*lb));
+	abd_take_ownership_of_buf(abd_buf->abd, B_TRUE);
+	wzio = zio_write_phys(pio, dev->l2ad_vdev, dev->l2ad_hand,
+	    asize, abd_buf->abd, ZIO_CHECKSUM_OFF, NULL, NULL,
+	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE);
+	DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, zio_t *, wzio);
+	(void) zio_nowait(wzio);
+
+	dev->l2ad_hand += asize;
+	/*
+	 * Include the committed log block's pointer  in the list of pointers
+	 * to log blocks present in the L2ARC device.
+	 */
+	bcopy(&l2dhdr->dh_start_lbps[0], lb_ptr_buf->lb_ptr,
+	    sizeof (l2arc_log_blkptr_t));
+	mutex_enter(&dev->l2ad_mtx);
+	list_insert_head(&dev->l2ad_lbptr_list, lb_ptr_buf);
+	ARCSTAT_INCR(arcstat_l2_log_blk_asize, asize);
+	ARCSTAT_BUMP(arcstat_l2_log_blk_count);
+	zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf);
+	zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf);
+	mutex_exit(&dev->l2ad_mtx);
+	vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
+
+	/* bump the kstats */
+	ARCSTAT_INCR(arcstat_l2_write_bytes, asize);
+	ARCSTAT_BUMP(arcstat_l2_log_blk_writes);
+	ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_asize, asize);
+	ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio,
+	    dev->l2ad_log_blk_payload_asize / asize);
+
+	/* start a new log block */
+	dev->l2ad_log_ent_idx = 0;
+	dev->l2ad_log_blk_payload_asize = 0;
+	dev->l2ad_log_blk_payload_start = 0;
+
+	return (asize);
+}
+
+/*
+ * Validates an L2ARC log block address to make sure that it can be read
+ * from the provided L2ARC device.
+ */
+boolean_t
+l2arc_log_blkptr_valid(l2arc_dev_t *dev, const l2arc_log_blkptr_t *lbp)
+{
+	/* L2BLK_GET_PSIZE returns aligned size for log blocks */
+	uint64_t asize = L2BLK_GET_PSIZE((lbp)->lbp_prop);
+	uint64_t end = lbp->lbp_daddr + asize - 1;
+	uint64_t start = lbp->lbp_payload_start;
+	boolean_t evicted = B_FALSE;
+
+	/*
+	 * A log block is valid if all of the following conditions are true:
+	 * - it fits entirely (including its payload) between l2ad_start and
+	 *   l2ad_end
+	 * - it has a valid size
+	 * - neither the log block itself nor part of its payload was evicted
+	 *   by l2arc_evict():
+	 *
+	 *		l2ad_hand          l2ad_evict
+	 *		|			 |	lbp_daddr
+	 *		|     start		 |	|  end
+	 *		|     |			 |	|  |
+	 *		V     V		         V	V  V
+	 *   l2ad_start ============================================ l2ad_end
+	 *                    --------------------------||||
+	 *				^		 ^
+	 *				|		log block
+	 *				payload
+	 */
+
+	evicted =
+	    l2arc_range_check_overlap(start, end, dev->l2ad_hand) ||
+	    l2arc_range_check_overlap(start, end, dev->l2ad_evict) ||
+	    l2arc_range_check_overlap(dev->l2ad_hand, dev->l2ad_evict, start) ||
+	    l2arc_range_check_overlap(dev->l2ad_hand, dev->l2ad_evict, end);
+
+	return (start >= dev->l2ad_start && end <= dev->l2ad_end &&
+	    asize > 0 && asize <= sizeof (l2arc_log_blk_phys_t) &&
+	    (!evicted || dev->l2ad_first));
+}
+
+/*
+ * Inserts ARC buffer header `hdr' into the current L2ARC log block on
+ * the device. The buffer being inserted must be present in L2ARC.
+ * Returns B_TRUE if the L2ARC log block is full and needs to be committed
+ * to L2ARC, or B_FALSE if it still has room for more ARC buffers.
+ */
+static boolean_t
+l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
+{
+	l2arc_log_blk_phys_t	*lb = &dev->l2ad_log_blk;
+	l2arc_log_ent_phys_t	*le;
+
+	if (dev->l2ad_log_entries == 0)
+		return (B_FALSE);
+
+	int index = dev->l2ad_log_ent_idx++;
+
+	ASSERT3S(index, <, dev->l2ad_log_entries);
+	ASSERT(HDR_HAS_L2HDR(hdr));
+
+	le = &lb->lb_entries[index];
+	bzero(le, sizeof (*le));
+	le->le_dva = hdr->b_dva;
+	le->le_birth = hdr->b_birth;
+	le->le_daddr = hdr->b_l2hdr.b_daddr;
+	if (index == 0)
+		dev->l2ad_log_blk_payload_start = le->le_daddr;
+	L2BLK_SET_LSIZE((le)->le_prop, HDR_GET_LSIZE(hdr));
+	L2BLK_SET_PSIZE((le)->le_prop, HDR_GET_PSIZE(hdr));
+	L2BLK_SET_COMPRESS((le)->le_prop, HDR_GET_COMPRESS(hdr));
+	le->le_complevel = hdr->b_complevel;
+	L2BLK_SET_TYPE((le)->le_prop, hdr->b_type);
+	L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr)));
+	L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr)));
+	L2BLK_SET_STATE((le)->le_prop, hdr->b_l1hdr.b_state->arcs_state);
+
+	dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev,
+	    HDR_GET_PSIZE(hdr));
+
+	return (dev->l2ad_log_ent_idx == dev->l2ad_log_entries);
+}
+
+/*
+ * Checks whether a given L2ARC device address sits in a time-sequential
+ * range. The trick here is that the L2ARC is a rotary buffer, so we can't
+ * just do a range comparison, we need to handle the situation in which the
+ * range wraps around the end of the L2ARC device. Arguments:
+ *	bottom -- Lower end of the range to check (written to earlier).
+ *	top    -- Upper end of the range to check (written to later).
+ *	check  -- The address for which we want to determine if it sits in
+ *		  between the top and bottom.
+ *
+ * The 3-way conditional below represents the following cases:
+ *
+ *	bottom < top : Sequentially ordered case:
+ *	  <check>--------+-------------------+
+ *	                 |  (overlap here?)  |
+ *	 L2ARC dev       V                   V
+ *	 |---------------<bottom>============<top>--------------|
+ *
+ *	bottom > top: Looped-around case:
+ *	                      <check>--------+------------------+
+ *	                                     |  (overlap here?) |
+ *	 L2ARC dev                           V                  V
+ *	 |===============<top>---------------<bottom>===========|
+ *	 ^               ^
+ *	 |  (or here?)   |
+ *	 +---------------+---------<check>
+ *
+ *	top == bottom : Just a single address comparison.
+ */
+boolean_t
+l2arc_range_check_overlap(uint64_t bottom, uint64_t top, uint64_t check)
+{
+	if (bottom < top)
+		return (bottom <= check && check <= top);
+	else if (bottom > top)
+		return (check <= top || bottom <= check);
+	else
+		return (check == top);
+}
 
 EXPORT_SYMBOL(arc_buf_size);
 EXPORT_SYMBOL(arc_write);
@@ -9461,116 +11087,127 @@
 EXPORT_SYMBOL(arc_remove_prune_callback);
 
 /* BEGIN CSTYLED */
-module_param_call(zfs_arc_min, param_set_arc_long, param_get_long,
-	&zfs_arc_min, 0644);
-MODULE_PARM_DESC(zfs_arc_min, "Min arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_min,
+	param_get_long, ZMOD_RW, "Min arc size");
 
-module_param_call(zfs_arc_max, param_set_arc_long, param_get_long,
-	&zfs_arc_max, 0644);
-MODULE_PARM_DESC(zfs_arc_max, "Max arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_max,
+	param_get_long, ZMOD_RW, "Max arc size");
 
-module_param_call(zfs_arc_meta_limit, param_set_arc_long, param_get_long,
-	&zfs_arc_meta_limit, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long,
+	param_get_long, ZMOD_RW, "Metadata limit for arc size");
 
-module_param_call(zfs_arc_meta_limit_percent, param_set_arc_long,
-	param_get_long, &zfs_arc_meta_limit_percent, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_limit_percent,
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent,
+	param_set_arc_long, param_get_long, ZMOD_RW,
 	"Percent of arc size for arc meta limit");
 
-module_param_call(zfs_arc_meta_min, param_set_arc_long, param_get_long,
-	&zfs_arc_meta_min, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long,
+	param_get_long, ZMOD_RW, "Min arc metadata");
 
-module_param(zfs_arc_meta_prune, int, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_prune, "Meta objects to scan for prune");
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW,
+	"Meta objects to scan for prune");
 
-module_param(zfs_arc_meta_adjust_restarts, int, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_adjust_restarts,
-	"Limit number of restarts in arc_adjust_meta");
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_adjust_restarts, INT, ZMOD_RW,
+	"Limit number of restarts in arc_evict_meta");
 
-module_param(zfs_arc_meta_strategy, int, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_strategy, "Meta reclaim strategy");
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, INT, ZMOD_RW,
+	"Meta reclaim strategy");
 
-module_param_call(zfs_arc_grow_retry, param_set_arc_int, param_get_int,
-	&zfs_arc_grow_retry, 0644);
-MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,
+	param_get_int, ZMOD_RW, "Seconds before growing arc size");
 
-module_param(zfs_arc_p_dampener_disable, int, 0644);
-MODULE_PARM_DESC(zfs_arc_p_dampener_disable, "disable arc_p adapt dampener");
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, p_dampener_disable, INT, ZMOD_RW,
+	"Disable arc_p adapt dampener");
 
-module_param_call(zfs_arc_shrink_shift, param_set_arc_int, param_get_int,
-	&zfs_arc_shrink_shift, 0644);
-MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,
+	param_get_int, ZMOD_RW, "log2(fraction of arc to reclaim)");
 
-module_param(zfs_arc_pc_percent, uint, 0644);
-MODULE_PARM_DESC(zfs_arc_pc_percent,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,
 	"Percent of pagecache to reclaim arc to");
 
-module_param_call(zfs_arc_p_min_shift, param_set_arc_int, param_get_int,
-	&zfs_arc_p_min_shift, 0644);
-MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, p_min_shift, param_set_arc_int,
+	param_get_int, ZMOD_RW, "arc_c shift to calc min/max arc_p");
 
-module_param(zfs_arc_average_blocksize, int, 0444);
-MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size");
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, INT, ZMOD_RD,
+	"Target average block size");
 
-module_param(zfs_compressed_arc_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers");
+ZFS_MODULE_PARAM(zfs, zfs_, compressed_arc_enabled, INT, ZMOD_RW,
+	"Disable compressed arc buffers");
 
-module_param_call(zfs_arc_min_prefetch_ms, param_set_arc_int, param_get_int,
-	&zfs_arc_min_prefetch_ms, 0644);
-MODULE_PARM_DESC(zfs_arc_min_prefetch_ms, "Min life of prefetch block in ms");
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prefetch_ms, param_set_arc_int,
+	param_get_int, ZMOD_RW, "Min life of prefetch block in ms");
 
-module_param(zfs_arc_min_prescient_prefetch_ms, int, 0644);
-MODULE_PARM_DESC(zfs_arc_min_prescient_prefetch_ms,
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms,
+	param_set_arc_int, param_get_int, ZMOD_RW,
 	"Min life of prescient prefetched block in ms");
 
-module_param(l2arc_write_max, ulong, 0644);
-MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW,
+	"Max write bytes per interval");
 
-module_param(l2arc_write_boost, ulong, 0644);
-MODULE_PARM_DESC(l2arc_write_boost, "Extra write bytes during device warmup");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_boost, ULONG, ZMOD_RW,
+	"Extra write bytes during device warmup");
 
-module_param(l2arc_headroom, ulong, 0644);
-MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom, ULONG, ZMOD_RW,
+	"Number of max device writes to precache");
 
-module_param(l2arc_headroom_boost, ulong, 0644);
-MODULE_PARM_DESC(l2arc_headroom_boost, "Compressed l2arc_headroom multiplier");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, headroom_boost, ULONG, ZMOD_RW,
+	"Compressed l2arc_headroom multiplier");
 
-module_param(l2arc_feed_secs, ulong, 0644);
-MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, trim_ahead, ULONG, ZMOD_RW,
+	"TRIM ahead L2ARC write size multiplier");
 
-module_param(l2arc_feed_min_ms, ulong, 0644);
-MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_secs, ULONG, ZMOD_RW,
+	"Seconds between L2ARC writing");
 
-module_param(l2arc_noprefetch, int, 0644);
-MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_min_ms, ULONG, ZMOD_RW,
+	"Min feed interval in milliseconds");
 
-module_param(l2arc_feed_again, int, 0644);
-MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, noprefetch, INT, ZMOD_RW,
+	"Skip caching prefetched buffers");
 
-module_param(l2arc_norw, int, 0644);
-MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_again, INT, ZMOD_RW,
+	"Turbo L2ARC warmup");
 
-module_param_call(zfs_arc_lotsfree_percent, param_set_arc_int, param_get_int,
-	&zfs_arc_lotsfree_percent, 0644);
-MODULE_PARM_DESC(zfs_arc_lotsfree_percent,
-	"System free memory I/O throttle in bytes");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, norw, INT, ZMOD_RW,
+	"No reads during writes");
 
-module_param_call(zfs_arc_sys_free, param_set_arc_long, param_get_long,
-	&zfs_arc_sys_free, 0644);
-MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, meta_percent, INT, ZMOD_RW,
+	"Percent of ARC size allowed for L2ARC-only headers");
 
-module_param_call(zfs_arc_dnode_limit, param_set_arc_long, param_get_long,
-	&zfs_arc_dnode_limit, 0644);
-MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_enabled, INT, ZMOD_RW,
+	"Rebuild the L2ARC when importing a pool");
 
-module_param(zfs_arc_dnode_limit_percent, ulong, 0644);
-MODULE_PARM_DESC(zfs_arc_dnode_limit_percent,
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
+	"Min size in bytes to write rebuild log blocks in L2ARC");
+
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
+	"Cache only MFU data from ARC into L2ARC");
+
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
+	"If set to 1 exclude dbufs on special vdevs from being cached to "
+	"L2ARC.");
+
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
+	param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
+
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long,
+	param_get_long, ZMOD_RW, "System free memory target size in bytes");
+
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long,
+	param_get_long, ZMOD_RW, "Minimum bytes of dnodes in arc");
+
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent,
+	param_set_arc_long, param_get_long, ZMOD_RW,
 	"Percent of ARC meta buffers for dnodes");
 
-module_param(zfs_arc_dnode_reduce_percent, ulong, 0644);
-MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent,
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW,
 	"Percentage of excess dnodes to try to unpin");
+
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, INT, ZMOD_RW,
+	"When full, ARC allocation waits for eviction of this % of alloc size");
+
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, INT, ZMOD_RW,
+	"The number of headers to evict per sublist before moving to the next");
+
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,
+	"Number of arc_prune threads");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/blkptr.c b/zfs/module/zfs/blkptr.c
index ee24b1c..aa09ded 100644
--- a/zfs/module/zfs/blkptr.c
+++ b/zfs/module/zfs/blkptr.c

@@ -17,6 +17,7 @@
  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
  */
 
+#include <sys/blkptr.h>
 #include <sys/zfs_context.h>
 #include <sys/zio.h>
 #include <sys/zio_compress.h>
@@ -142,7 +143,7 @@
 		uint8_t dstbuf[BPE_PAYLOAD_SIZE];
 		decode_embedded_bp_compressed(bp, dstbuf);
 		VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
-		    dstbuf, buf, psize, buflen));
+		    dstbuf, buf, psize, buflen, NULL));
 	} else {
 		ASSERT3U(lsize, ==, psize);
 		decode_embedded_bp_compressed(bp, buf);

diff --git a/zfs/module/zfs/bplist.c b/zfs/module/zfs/bplist.c
index c81151e..47ea364 100644
--- a/zfs/module/zfs/bplist.c
+++ b/zfs/module/zfs/bplist.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 
 #include <sys/bplist.h>
@@ -75,3 +75,17 @@
 	}
 	mutex_exit(&bpl->bpl_lock);
 }
+
+void
+bplist_clear(bplist_t *bpl)
+{
+	bplist_entry_t *bpe;
+
+	mutex_enter(&bpl->bpl_lock);
+	while ((bpe = list_head(&bpl->bpl_list))) {
+		bplist_iterate_last_removed = bpe;
+		list_remove(&bpl->bpl_list, bpe);
+		kmem_free(bpe, sizeof (*bpe));
+	}
+	mutex_exit(&bpl->bpl_lock);
+}

diff --git a/zfs/module/zfs/bpobj.c b/zfs/module/zfs/bpobj.c
index 6338019..a8e9309 100644
--- a/zfs/module/zfs/bpobj.c
+++ b/zfs/module/zfs/bpobj.c

@@ -20,13 +20,13 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2017 Datto Inc.
  */
 
 #include <sys/bpobj.h>
 #include <sys/zfs_context.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 #include <sys/dsl_pool.h>
 #include <sys/zfeature.h>
 #include <sys/zap.h>
@@ -83,6 +83,9 @@
 		size = BPOBJ_SIZE_V0;
 	else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
 		size = BPOBJ_SIZE_V1;
+	else if (!spa_feature_is_active(dmu_objset_spa(os),
+	    SPA_FEATURE_LIVELIST))
+		size = BPOBJ_SIZE_V2;
 	else
 		size = sizeof (bpobj_phys_t);
 
@@ -171,6 +174,7 @@
 	bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
 	bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
 	bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
+	bpo->bpo_havefreed = (doi.doi_bonus_size > BPOBJ_SIZE_V2);
 	bpo->bpo_phys = bpo->bpo_dbuf->db_data;
 	return (0);
 }
@@ -199,11 +203,21 @@
 	mutex_destroy(&bpo->bpo_lock);
 }
 
+static boolean_t
+bpobj_is_empty_impl(bpobj_t *bpo)
+{
+	ASSERT(MUTEX_HELD(&bpo->bpo_lock));
+	return (bpo->bpo_phys->bpo_num_blkptrs == 0 &&
+	    (!bpo->bpo_havesubobj || bpo->bpo_phys->bpo_num_subobjs == 0));
+}
+
 boolean_t
 bpobj_is_empty(bpobj_t *bpo)
 {
-	return (bpo->bpo_phys->bpo_num_blkptrs == 0 &&
-	    (!bpo->bpo_havesubobj || bpo->bpo_phys->bpo_num_subobjs == 0));
+	mutex_enter(&bpo->bpo_lock);
+	boolean_t is_empty = bpobj_is_empty_impl(bpo);
+	mutex_exit(&bpo->bpo_lock);
+	return (is_empty);
 }
 
 /*
@@ -245,8 +259,8 @@
  * Update bpobj and all of its parents with new space accounting.
  */
 static void
-propagate_space_reduction(bpobj_info_t *bpi, uint64_t freed,
-    uint64_t comp_freed, uint64_t uncomp_freed, dmu_tx_t *tx)
+propagate_space_reduction(bpobj_info_t *bpi, int64_t freed,
+    int64_t comp_freed, int64_t uncomp_freed, dmu_tx_t *tx)
 {
 
 	for (; bpi != NULL; bpi = bpi->bpi_parent) {
@@ -263,22 +277,22 @@
 
 static int
 bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
-    dmu_tx_t *tx, boolean_t free)
+    int64_t start, dmu_tx_t *tx, boolean_t free)
 {
 	int err = 0;
-	uint64_t freed = 0, comp_freed = 0, uncomp_freed = 0;
+	int64_t freed = 0, comp_freed = 0, uncomp_freed = 0;
 	dmu_buf_t *dbuf = NULL;
 	bpobj_t *bpo = bpi->bpi_bpo;
 
-	for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
+	for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) {
 		uint64_t offset = i * sizeof (blkptr_t);
 		uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
 
 		if (dbuf == NULL || dbuf->db_offset > offset) {
 			if (dbuf)
 				dmu_buf_rele(dbuf, FTAG);
-			err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
-			    FTAG, &dbuf, 0);
+			err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
+			    offset, FTAG, &dbuf, 0);
 			if (err)
 				break;
 		}
@@ -288,18 +302,26 @@
 
 		blkptr_t *bparray = dbuf->db_data;
 		blkptr_t *bp = &bparray[blkoff];
-		err = func(arg, bp, tx);
+
+		boolean_t bp_freed = BP_GET_FREE(bp);
+		err = func(arg, bp, bp_freed, tx);
 		if (err)
 			break;
 
 		if (free) {
+			int sign = bp_freed ? -1 : +1;
 			spa_t *spa = dmu_objset_spa(bpo->bpo_os);
-			freed += bp_get_dsize_sync(spa, bp);
-			comp_freed += BP_GET_PSIZE(bp);
-			uncomp_freed += BP_GET_UCSIZE(bp);
+			freed += sign * bp_get_dsize_sync(spa, bp);
+			comp_freed += sign * BP_GET_PSIZE(bp);
+			uncomp_freed += sign * BP_GET_UCSIZE(bp);
 			ASSERT(dmu_buf_is_dirty(bpo->bpo_dbuf, tx));
 			bpo->bpo_phys->bpo_num_blkptrs--;
 			ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
+			if (bp_freed) {
+				ASSERT(bpo->bpo_havefreed);
+				bpo->bpo_phys->bpo_num_freed--;
+				ASSERT3S(bpo->bpo_phys->bpo_num_freed, >=, 0);
+			}
 		}
 	}
 	if (free) {
@@ -328,7 +350,7 @@
  */
 static int
 bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
-    dmu_tx_t *tx, boolean_t free)
+    dmu_tx_t *tx, boolean_t free, uint64_t *bpobj_size)
 {
 	list_t stack;
 	bpobj_info_t *bpi;
@@ -341,6 +363,10 @@
 	list_create(&stack, sizeof (bpobj_info_t),
 	    offsetof(bpobj_info_t, bpi_node));
 	mutex_enter(&initial_bpo->bpo_lock);
+
+	if (bpobj_size != NULL)
+		*bpobj_size = initial_bpo->bpo_phys->bpo_num_blkptrs;
+
 	list_insert_head(&stack, bpi_alloc(initial_bpo, NULL, 0));
 
 	while ((bpi = list_head(&stack)) != NULL) {
@@ -354,7 +380,8 @@
 			dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
 
 		if (bpi->bpi_visited == B_FALSE) {
-			err = bpobj_iterate_blkptrs(bpi, func, arg, tx, free);
+			err = bpobj_iterate_blkptrs(bpi, func, arg, 0, tx,
+			    free);
 			bpi->bpi_visited = B_TRUE;
 			if (err != 0)
 				break;
@@ -370,7 +397,7 @@
 			 * If there are no entries, there should
 			 * be no bytes.
 			 */
-			if (bpobj_is_empty(bpo)) {
+			if (bpobj_is_empty_impl(bpo)) {
 				ASSERT0(bpo->bpo_phys->bpo_bytes);
 				ASSERT0(bpo->bpo_phys->bpo_comp);
 				ASSERT0(bpo->bpo_phys->bpo_uncomp);
@@ -433,6 +460,7 @@
 			 * We have unprocessed subobjs. Process the next one.
 			 */
 			ASSERT(bpo->bpo_havecomp);
+			ASSERT3P(bpobj_size, ==, NULL);
 
 			/* Add the last subobj to stack. */
 			int64_t i = bpi->bpi_unprocessed_subobjs - 1;
@@ -489,16 +517,45 @@
 int
 bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
 {
-	return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
+	return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE, NULL));
 }
 
 /*
  * Iterate the entries.  If func returns nonzero, iteration will stop.
+ *
+ * If there are no subobjs:
+ *
+ * *bpobj_size can be used to return the number of block pointers in the
+ * bpobj.  Note that this may be different from the number of block pointers
+ * that are iterated over, if iteration is terminated early (e.g. by the func
+ * returning nonzero).
+ *
+ * If there are concurrent (or subsequent) modifications to the bpobj then the
+ * returned *bpobj_size can be passed as "start" to
+ * livelist_bpobj_iterate_from_nofree() to iterate the newly added entries.
  */
 int
-bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
+bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg,
+    uint64_t *bpobj_size)
 {
-	return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
+	return (bpobj_iterate_impl(bpo, func, arg, NULL, B_FALSE, bpobj_size));
+}
+
+/*
+ * Iterate over the blkptrs in the bpobj beginning at index start. If func
+ * returns nonzero, iteration will stop. This is a livelist specific function
+ * since it assumes that there are no subobjs present.
+ */
+int
+livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg,
+    int64_t start)
+{
+	if (bpo->bpo_havesubobj)
+		VERIFY0(bpo->bpo_phys->bpo_subobjs);
+	bpobj_info_t *bpi = bpi_alloc(bpo, NULL, 0);
+	int err = bpobj_iterate_blkptrs(bpi, func, arg, start, NULL, B_FALSE);
+	kmem_free(bpi, sizeof (bpobj_info_t));
+	return (err);
 }
 
 /*
@@ -606,14 +663,13 @@
 	}
 
 	VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
-	VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
-
 	if (bpobj_is_empty(&subbpo)) {
 		/* No point in having an empty subobj. */
 		bpobj_close(&subbpo);
 		bpobj_free(bpo->bpo_os, subobj, tx);
 		return;
 	}
+	VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
 
 	mutex_enter(&bpo->bpo_lock);
 	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
@@ -723,8 +779,71 @@
 
 }
 
+/*
+ * Prefetch metadata required for bpobj_enqueue_subobj().
+ */
 void
-bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
+bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj)
+{
+	dmu_object_info_t doi;
+	bpobj_t subbpo;
+	uint64_t subsubobjs;
+	boolean_t copy_subsub = B_TRUE;
+	boolean_t copy_bps = B_TRUE;
+
+	ASSERT(bpobj_is_open(bpo));
+	ASSERT(subobj != 0);
+
+	if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj)
+		return;
+
+	if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0)
+		return;
+	if (bpobj_is_empty(&subbpo)) {
+		bpobj_close(&subbpo);
+		return;
+	}
+	subsubobjs = subbpo.bpo_phys->bpo_subobjs;
+	bpobj_close(&subbpo);
+
+	if (subsubobjs != 0) {
+		if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0)
+			return;
+		if (doi.doi_max_offset > doi.doi_data_block_size)
+			copy_subsub = B_FALSE;
+	}
+
+	if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0)
+		return;
+	if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub)
+		copy_bps = B_FALSE;
+
+	if (copy_subsub && subsubobjs != 0) {
+		if (bpo->bpo_phys->bpo_subobjs) {
+			dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
+			    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
+			    ZIO_PRIORITY_ASYNC_READ);
+		}
+		dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1,
+		    ZIO_PRIORITY_ASYNC_READ);
+	}
+
+	if (copy_bps) {
+		dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
+		    bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1,
+		    ZIO_PRIORITY_ASYNC_READ);
+		dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1,
+		    ZIO_PRIORITY_ASYNC_READ);
+	} else if (bpo->bpo_phys->bpo_subobjs) {
+		dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
+		    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
+		    ZIO_PRIORITY_ASYNC_READ);
+	}
+}
+
+void
+bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
 {
 	blkptr_t stored_bp = *bp;
 	uint64_t offset;
@@ -755,8 +874,8 @@
 		bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
 	}
 
-	/* We never need the fill count. */
 	stored_bp.blk_fill = 0;
+	BP_SET_FREE(&stored_bp, bp_freed);
 
 	mutex_enter(&bpo->bpo_lock);
 
@@ -779,11 +898,16 @@
 
 	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
 	bpo->bpo_phys->bpo_num_blkptrs++;
-	bpo->bpo_phys->bpo_bytes +=
+	int sign = bp_freed ? -1 : +1;
+	bpo->bpo_phys->bpo_bytes += sign *
 	    bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
 	if (bpo->bpo_havecomp) {
-		bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
-		bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
+		bpo->bpo_phys->bpo_comp += sign * BP_GET_PSIZE(bp);
+		bpo->bpo_phys->bpo_uncomp += sign * BP_GET_UCSIZE(bp);
+	}
+	if (bp_freed) {
+		ASSERT(bpo->bpo_havefreed);
+		bpo->bpo_phys->bpo_num_freed++;
 	}
 	mutex_exit(&bpo->bpo_lock);
 }
@@ -797,10 +921,10 @@
 	uint64_t uncomp;
 };
 
-/* ARGSUSED */
 static int
-space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+space_range_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
 {
+	(void) bp_freed, (void) tx;
 	struct space_range_arg *sra = arg;
 
 	if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
@@ -863,3 +987,18 @@
 	*uncompp = sra.uncomp;
 	return (err);
 }
+
+/*
+ * A bpobj_itor_t to append blkptrs to a bplist. Note that while blkptrs in a
+ * bpobj are designated as free or allocated that information is not preserved
+ * in bplists.
+ */
+int
+bplist_append_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
+{
+	(void) bp_freed, (void) tx;
+	bplist_t *bpl = arg;
+	bplist_append(bpl, bp);
+	return (0);
+}

diff --git a/zfs/module/zfs/bptree.c b/zfs/module/zfs/bptree.c
index 8f78e8d..4e9a482 100644
--- a/zfs/module/zfs/bptree.c
+++ b/zfs/module/zfs/bptree.c

@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  */
 
 #include <sys/arc.h>
@@ -33,7 +33,6 @@
 #include <sys/dsl_dir.h>
 #include <sys/dsl_pool.h>
 #include <sys/dnode.h>
-#include <sys/refcount.h>
 #include <sys/spa.h>
 
 /*
@@ -148,15 +147,16 @@
 	dmu_buf_rele(db, FTAG);
 }
 
-/* ARGSUSED */
 static int
 bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
+	(void) zilog, (void) dnp;
 	int err;
 	struct bptree_args *ba = arg;
 
-	if (bp == NULL || BP_IS_HOLE(bp))
+	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
+	    BP_IS_REDACTED(bp))
 		return (0);
 
 	err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);

diff --git a/zfs/module/zfs/bqueue.c b/zfs/module/zfs/bqueue.c
index 3fc7fca..ec5ce43 100644
--- a/zfs/module/zfs/bqueue.c
+++ b/zfs/module/zfs/bqueue.c

@@ -13,7 +13,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2018 by Delphix. All rights reserved.
  */
 
 #include	<sys/bqueue.h>
@@ -27,13 +27,26 @@
 
 /*
  * Initialize a blocking queue  The maximum capacity of the queue is set to
- * size.  Types that want to be stored in a bqueue must contain a bqueue_node_t,
- * and offset should give its offset from the start of the struct.  Return 0 on
- * success, or -1 on failure.
+ * size.  Types that are stored in a bqueue must contain a bqueue_node_t,
+ * and node_offset must be its offset from the start of the struct.
+ * fill_fraction is a performance tuning value; when the queue is full, any
+ * threads attempting to enqueue records will block.  They will block until
+ * they're signaled, which will occur when the queue is at least 1/fill_fraction
+ * empty.  Similar behavior occurs on dequeue; if the queue is empty, threads
+ * block.  They will be signalled when the queue has 1/fill_fraction full, or
+ * when bqueue_flush is called.  As a result, you must call bqueue_flush when
+ * you enqueue your final record on a thread, in case the dequeueing threads are
+ * currently blocked and that enqueue does not cause them to be awoken.
+ * Alternatively, this behavior can be disabled (causing signaling to happen
+ * immediately) by setting fill_fraction to any value larger than size.
+ * Return 0 on success, or -1 on failure.
  */
 int
-bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
+bqueue_init(bqueue_t *q, uint_t fill_fraction, size_t size, size_t node_offset)
 {
+	if (fill_fraction == 0) {
+		return (-1);
+	}
 	list_create(&q->bq_list, node_offset + sizeof (bqueue_node_t),
 	    node_offset + offsetof(bqueue_node_t, bqn_node));
 	cv_init(&q->bq_add_cv, NULL, CV_DEFAULT, NULL);
@@ -42,6 +55,7 @@
 	q->bq_node_offset = node_offset;
 	q->bq_size = 0;
 	q->bq_maxsize = size;
+	q->bq_fill_fraction = fill_fraction;
 	return (0);
 }
 
@@ -53,11 +67,37 @@
 void
 bqueue_destroy(bqueue_t *q)
 {
+	mutex_enter(&q->bq_lock);
 	ASSERT0(q->bq_size);
 	cv_destroy(&q->bq_add_cv);
 	cv_destroy(&q->bq_pop_cv);
-	mutex_destroy(&q->bq_lock);
 	list_destroy(&q->bq_list);
+	mutex_exit(&q->bq_lock);
+	mutex_destroy(&q->bq_lock);
+}
+
+static void
+bqueue_enqueue_impl(bqueue_t *q, void *data, size_t item_size, boolean_t flush)
+{
+	ASSERT3U(item_size, >, 0);
+	ASSERT3U(item_size, <=, q->bq_maxsize);
+	mutex_enter(&q->bq_lock);
+	obj2node(q, data)->bqn_size = item_size;
+	while (q->bq_size && q->bq_size + item_size > q->bq_maxsize) {
+		/*
+		 * Wake up bqueue_dequeue() thread if already sleeping in order
+		 * to prevent the deadlock condition
+		 */
+		cv_signal(&q->bq_pop_cv);
+		cv_wait_sig(&q->bq_add_cv, &q->bq_lock);
+	}
+	q->bq_size += item_size;
+	list_insert_tail(&q->bq_list, data);
+	if (flush)
+		cv_broadcast(&q->bq_pop_cv);
+	else if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
+		cv_signal(&q->bq_pop_cv);
+	mutex_exit(&q->bq_lock);
 }
 
 /*
@@ -66,20 +106,25 @@
  * > 0.
  */
 void
-bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue(bqueue_t *q, void *data, size_t item_size)
 {
-	ASSERT3U(item_size, >, 0);
-	ASSERT3U(item_size, <=, q->bq_maxsize);
-	mutex_enter(&q->bq_lock);
-	obj2node(q, data)->bqn_size = item_size;
-	while (q->bq_size + item_size > q->bq_maxsize) {
-		cv_wait_sig(&q->bq_add_cv, &q->bq_lock);
-	}
-	q->bq_size += item_size;
-	list_insert_tail(&q->bq_list, data);
-	cv_signal(&q->bq_pop_cv);
-	mutex_exit(&q->bq_lock);
+	bqueue_enqueue_impl(q, data, item_size, B_FALSE);
 }
+
+/*
+ * Enqueue an entry, and then flush the queue.  This forces the popping threads
+ * to wake up, even if we're below the fill fraction.  We have this in a single
+ * function, rather than having a separate call, because it prevents race
+ * conditions between the enqueuing thread and the dequeueing thread, where the
+ * enqueueing thread will wake up the dequeueing thread, that thread will
+ * destroy the condvar before the enqueuing thread is done.
+ */
+void
+bqueue_enqueue_flush(bqueue_t *q, void *data, size_t item_size)
+{
+	bqueue_enqueue_impl(q, data, item_size, B_TRUE);
+}
+
 /*
  * Take the first element off of q.  If there are no elements on the queue, wait
  * until one is put there.  Return the removed element.
@@ -88,7 +133,7 @@
 bqueue_dequeue(bqueue_t *q)
 {
 	void *ret = NULL;
-	uint64_t item_size;
+	size_t item_size;
 	mutex_enter(&q->bq_lock);
 	while (q->bq_size == 0) {
 		cv_wait_sig(&q->bq_pop_cv, &q->bq_lock);
@@ -97,7 +142,8 @@
 	ASSERT3P(ret, !=, NULL);
 	item_size = obj2node(q, ret)->bqn_size;
 	q->bq_size -= item_size;
-	cv_signal(&q->bq_add_cv);
+	if (q->bq_size <= q->bq_maxsize - (q->bq_maxsize / q->bq_fill_fraction))
+		cv_signal(&q->bq_add_cv);
 	mutex_exit(&q->bq_lock);
 	return (ret);
 }

diff --git a/zfs/module/zfs/btree.c b/zfs/module/zfs/btree.c
new file mode 100644
index 0000000..28ab3fc
--- /dev/null
+++ b/zfs/module/zfs/btree.c

@@ -0,0 +1,2207 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2019 by Delphix. All rights reserved.
+ */
+
+#include	<sys/btree.h>
+#include	<sys/bitops.h>
+#include	<sys/zfs_context.h>
+
+kmem_cache_t *zfs_btree_leaf_cache;
+
+/*
+ * Control the extent of the verification that occurs when zfs_btree_verify is
+ * called. Primarily used for debugging when extending the btree logic and
+ * functionality. As the intensity is increased, new verification steps are
+ * added. These steps are cumulative; intensity = 3 includes the intensity = 1
+ * and intensity = 2 steps as well.
+ *
+ * Intensity 1: Verify that the tree's height is consistent throughout.
+ * Intensity 2: Verify that a core node's children's parent pointers point
+ * to the core node.
+ * Intensity 3: Verify that the total number of elements in the tree matches the
+ * sum of the number of elements in each node. Also verifies that each node's
+ * count obeys the invariants (less than or equal to maximum value, greater than
+ * or equal to half the maximum minus one).
+ * Intensity 4: Verify that each element compares less than the element
+ * immediately after it and greater than the one immediately before it using the
+ * comparator function. For core nodes, also checks that each element is greater
+ * than the last element in the first of the two nodes it separates, and less
+ * than the first element in the second of the two nodes.
+ * Intensity 5: Verifies, if ZFS_DEBUG is defined, that all unused memory inside
+ * of each node is poisoned appropriately. Note that poisoning always occurs if
+ * ZFS_DEBUG is set, so it is safe to set the intensity to 5 during normal
+ * operation.
+ *
+ * Intensity 4 and 5 are particularly expensive to perform; the previous levels
+ * are a few memory operations per node, while these levels require multiple
+ * operations per element. In addition, when creating large btrees, these
+ * operations are called at every step, resulting in extremely slow operation
+ * (while the asymptotic complexity of the other steps is the same, the
+ * importance of the constant factors cannot be denied).
+ */
+uint_t zfs_btree_verify_intensity = 0;
+
+/*
+ * Convenience functions to silence warnings from memcpy/memmove's
+ * return values and change argument order to src, dest.
+ */
+static void
+bcpy(const void *src, void *dest, size_t size)
+{
+	(void) memcpy(dest, src, size);
+}
+
+static void
+bmov(const void *src, void *dest, size_t size)
+{
+	(void) memmove(dest, src, size);
+}
+
+static boolean_t
+zfs_btree_is_core(struct zfs_btree_hdr *hdr)
+{
+	return (hdr->bth_first == -1);
+}
+
+#ifdef _ILP32
+#define	BTREE_POISON 0xabadb10c
+#else
+#define	BTREE_POISON 0xabadb10cdeadbeef
+#endif
+
+static void
+zfs_btree_poison_node(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+#ifdef ZFS_DEBUG
+	size_t size = tree->bt_elem_size;
+	if (zfs_btree_is_core(hdr)) {
+		zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+		for (uint32_t i = hdr->bth_count + 1; i <= BTREE_CORE_ELEMS;
+		    i++) {
+			node->btc_children[i] =
+			    (zfs_btree_hdr_t *)BTREE_POISON;
+		}
+		(void) memset(node->btc_elems + hdr->bth_count * size, 0x0f,
+		    (BTREE_CORE_ELEMS - hdr->bth_count) * size);
+	} else {
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+		(void) memset(leaf->btl_elems, 0x0f, hdr->bth_first * size);
+		(void) memset(leaf->btl_elems +
+		    (hdr->bth_first + hdr->bth_count) * size, 0x0f,
+		    tree->bt_leaf_size - offsetof(zfs_btree_leaf_t, btl_elems) -
+		    (hdr->bth_first + hdr->bth_count) * size);
+	}
+#endif
+}
+
+static inline void
+zfs_btree_poison_node_at(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
+    uint32_t idx, uint32_t count)
+{
+#ifdef ZFS_DEBUG
+	size_t size = tree->bt_elem_size;
+	if (zfs_btree_is_core(hdr)) {
+		ASSERT3U(idx, >=, hdr->bth_count);
+		ASSERT3U(idx, <=, BTREE_CORE_ELEMS);
+		ASSERT3U(idx + count, <=, BTREE_CORE_ELEMS);
+		zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+		for (uint32_t i = 1; i <= count; i++) {
+			node->btc_children[idx + i] =
+			    (zfs_btree_hdr_t *)BTREE_POISON;
+		}
+		(void) memset(node->btc_elems + idx * size, 0x0f, count * size);
+	} else {
+		ASSERT3U(idx, <=, tree->bt_leaf_cap);
+		ASSERT3U(idx + count, <=, tree->bt_leaf_cap);
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+		(void) memset(leaf->btl_elems +
+		    (hdr->bth_first + idx) * size, 0x0f, count * size);
+	}
+#endif
+}
+
+static inline void
+zfs_btree_verify_poison_at(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
+    uint32_t idx)
+{
+#ifdef ZFS_DEBUG
+	size_t size = tree->bt_elem_size;
+	if (zfs_btree_is_core(hdr)) {
+		ASSERT3U(idx, <, BTREE_CORE_ELEMS);
+		zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+		zfs_btree_hdr_t *cval = (zfs_btree_hdr_t *)BTREE_POISON;
+		VERIFY3P(node->btc_children[idx + 1], ==, cval);
+		for (size_t i = 0; i < size; i++)
+			VERIFY3U(node->btc_elems[idx * size + i], ==, 0x0f);
+	} else  {
+		ASSERT3U(idx, <, tree->bt_leaf_cap);
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+		if (idx >= tree->bt_leaf_cap - hdr->bth_first)
+			return;
+		for (size_t i = 0; i < size; i++) {
+			VERIFY3U(leaf->btl_elems[(hdr->bth_first + idx)
+			    * size + i], ==, 0x0f);
+		}
+	}
+#endif
+}
+
+void
+zfs_btree_init(void)
+{
+	zfs_btree_leaf_cache = kmem_cache_create("zfs_btree_leaf_cache",
+	    BTREE_LEAF_SIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+void
+zfs_btree_fini(void)
+{
+	kmem_cache_destroy(zfs_btree_leaf_cache);
+}
+
+static void *
+zfs_btree_leaf_alloc(zfs_btree_t *tree)
+{
+	if (tree->bt_leaf_size == BTREE_LEAF_SIZE)
+		return (kmem_cache_alloc(zfs_btree_leaf_cache, KM_SLEEP));
+	else
+		return (kmem_alloc(tree->bt_leaf_size, KM_SLEEP));
+}
+
+static void
+zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)
+{
+	if (tree->bt_leaf_size == BTREE_LEAF_SIZE)
+		return (kmem_cache_free(zfs_btree_leaf_cache, ptr));
+	else
+		return (kmem_free(ptr, tree->bt_leaf_size));
+}
+
+void
+zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
+    size_t size)
+{
+	zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
+}
+
+void
+zfs_btree_create_custom(zfs_btree_t *tree,
+    int (*compar) (const void *, const void *),
+    size_t size, size_t lsize)
+{
+	size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
+
+	ASSERT3U(size, <=, esize / 2);
+	memset(tree, 0, sizeof (*tree));
+	tree->bt_compar = compar;
+	tree->bt_elem_size = size;
+	tree->bt_leaf_size = lsize;
+	tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
+	tree->bt_height = -1;
+	tree->bt_bulk = NULL;
+}
+
+/*
+ * Find value in the array of elements provided. Uses a simple binary search.
+ */
+static void *
+zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
+    const void *value, zfs_btree_index_t *where)
+{
+	uint32_t max = nelems;
+	uint32_t min = 0;
+	while (max > min) {
+		uint32_t idx = (min + max) / 2;
+		uint8_t *cur = buf + idx * tree->bt_elem_size;
+		int comp = tree->bt_compar(cur, value);
+		if (comp < 0) {
+			min = idx + 1;
+		} else if (comp > 0) {
+			max = idx;
+		} else {
+			where->bti_offset = idx;
+			where->bti_before = B_FALSE;
+			return (cur);
+		}
+	}
+
+	where->bti_offset = max;
+	where->bti_before = B_TRUE;
+	return (NULL);
+}
+
+/*
+ * Find the given value in the tree. where may be passed as null to use as a
+ * membership test or if the btree is being used as a map.
+ */
+void *
+zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
+{
+	if (tree->bt_height == -1) {
+		if (where != NULL) {
+			where->bti_node = NULL;
+			where->bti_offset = 0;
+		}
+		ASSERT0(tree->bt_num_elems);
+		return (NULL);
+	}
+
+	/*
+	 * If we're in bulk-insert mode, we check the last spot in the tree
+	 * and the last leaf in the tree before doing the normal search,
+	 * because for most workloads the vast majority of finds in
+	 * bulk-insert mode are to insert new elements.
+	 */
+	zfs_btree_index_t idx;
+	size_t size = tree->bt_elem_size;
+	if (tree->bt_bulk != NULL) {
+		zfs_btree_leaf_t *last_leaf = tree->bt_bulk;
+		int comp = tree->bt_compar(last_leaf->btl_elems +
+		    (last_leaf->btl_hdr.bth_first +
+		    last_leaf->btl_hdr.bth_count - 1) * size, value);
+		if (comp < 0) {
+			/*
+			 * If what they're looking for is after the last
+			 * element, it's not in the tree.
+			 */
+			if (where != NULL) {
+				where->bti_node = (zfs_btree_hdr_t *)last_leaf;
+				where->bti_offset =
+				    last_leaf->btl_hdr.bth_count;
+				where->bti_before = B_TRUE;
+			}
+			return (NULL);
+		} else if (comp == 0) {
+			if (where != NULL) {
+				where->bti_node = (zfs_btree_hdr_t *)last_leaf;
+				where->bti_offset =
+				    last_leaf->btl_hdr.bth_count - 1;
+				where->bti_before = B_FALSE;
+			}
+			return (last_leaf->btl_elems +
+			    (last_leaf->btl_hdr.bth_first +
+			    last_leaf->btl_hdr.bth_count - 1) * size);
+		}
+		if (tree->bt_compar(last_leaf->btl_elems +
+		    last_leaf->btl_hdr.bth_first * size, value) <= 0) {
+			/*
+			 * If what they're looking for is after the first
+			 * element in the last leaf, it's in the last leaf or
+			 * it's not in the tree.
+			 */
+			void *d = zfs_btree_find_in_buf(tree,
+			    last_leaf->btl_elems +
+			    last_leaf->btl_hdr.bth_first * size,
+			    last_leaf->btl_hdr.bth_count, value, &idx);
+
+			if (where != NULL) {
+				idx.bti_node = (zfs_btree_hdr_t *)last_leaf;
+				*where = idx;
+			}
+			return (d);
+		}
+	}
+
+	zfs_btree_core_t *node = NULL;
+	uint32_t child = 0;
+	uint32_t depth = 0;
+
+	/*
+	 * Iterate down the tree, finding which child the value should be in
+	 * by comparing with the separators.
+	 */
+	for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
+	    node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
+		ASSERT3P(node, !=, NULL);
+		void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
+		    node->btc_hdr.bth_count, value, &idx);
+		EQUIV(d != NULL, !idx.bti_before);
+		if (d != NULL) {
+			if (where != NULL) {
+				idx.bti_node = (zfs_btree_hdr_t *)node;
+				*where = idx;
+			}
+			return (d);
+		}
+		ASSERT(idx.bti_before);
+		child = idx.bti_offset;
+	}
+
+	/*
+	 * The value is in this leaf, or it would be if it were in the
+	 * tree. Find its proper location and return it.
+	 */
+	zfs_btree_leaf_t *leaf = (depth == 0 ?
+	    (zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
+	void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
+	    leaf->btl_hdr.bth_first * size,
+	    leaf->btl_hdr.bth_count, value, &idx);
+
+	if (where != NULL) {
+		idx.bti_node = (zfs_btree_hdr_t *)leaf;
+		*where = idx;
+	}
+
+	return (d);
+}
+
+/*
+ * To explain the following functions, it is useful to understand the four
+ * kinds of shifts used in btree operation. First, a shift is a movement of
+ * elements within a node. It is used to create gaps for inserting new
+ * elements and children, or cover gaps created when things are removed. A
+ * shift has two fundamental properties, each of which can be one of two
+ * values, making four types of shifts.  There is the direction of the shift
+ * (left or right) and the shape of the shift (parallelogram or isoceles
+ * trapezoid (shortened to trapezoid hereafter)). The shape distinction only
+ * applies to shifts of core nodes.
+ *
+ * The names derive from the following imagining of the layout of a node:
+ *
+ *  Elements:       *   *   *   *   *   *   *   ...   *   *   *
+ *  Children:     *   *   *   *   *   *   *   *   ...   *   *   *
+ *
+ * This layout follows from the fact that the elements act as separators
+ * between pairs of children, and that children root subtrees "below" the
+ * current node. A left and right shift are fairly self-explanatory; a left
+ * shift moves things to the left, while a right shift moves things to the
+ * right. A parallelogram shift is a shift with the same number of elements
+ * and children being moved, while a trapezoid shift is a shift that moves one
+ * more children than elements. An example follows:
+ *
+ * A parallelogram shift could contain the following:
+ *      _______________
+ *      \*   *   *   * \ *   *   *   ...   *   *   *
+ *     * \ *   *   *   *\  *   *   *   ...   *   *   *
+ *        ---------------
+ * A trapezoid shift could contain the following:
+ *          ___________
+ *       * / *   *   * \ *   *   *   ...   *   *   *
+ *     *  / *  *   *   *\  *   *   *   ...   *   *   *
+ *        ---------------
+ *
+ * Note that a parallelogram shift is always shaped like a "left-leaning"
+ * parallelogram, where the starting index of the children being moved is
+ * always one higher than the starting index of the elements being moved. No
+ * "right-leaning" parallelogram shifts are needed (shifts where the starting
+ * element index and starting child index being moved are the same) to achieve
+ * any btree operations, so we ignore them.
+ */
+
+enum bt_shift_shape {
+	BSS_TRAPEZOID,
+	BSS_PARALLELOGRAM
+};
+
+enum bt_shift_direction {
+	BSD_LEFT,
+	BSD_RIGHT
+};
+
+/*
+ * Shift elements and children in the provided core node by off spots.  The
+ * first element moved is idx, and count elements are moved. The shape of the
+ * shift is determined by shape. The direction is determined by dir.
+ */
+static inline void
+bt_shift_core(zfs_btree_t *tree, zfs_btree_core_t *node, uint32_t idx,
+    uint32_t count, uint32_t off, enum bt_shift_shape shape,
+    enum bt_shift_direction dir)
+{
+	size_t size = tree->bt_elem_size;
+	ASSERT(zfs_btree_is_core(&node->btc_hdr));
+
+	uint8_t *e_start = node->btc_elems + idx * size;
+	uint8_t *e_out = (dir == BSD_LEFT ? e_start - off * size :
+	    e_start + off * size);
+	bmov(e_start, e_out, count * size);
+
+	zfs_btree_hdr_t **c_start = node->btc_children + idx +
+	    (shape == BSS_TRAPEZOID ? 0 : 1);
+	zfs_btree_hdr_t **c_out = (dir == BSD_LEFT ? c_start - off :
+	    c_start + off);
+	uint32_t c_count = count + (shape == BSS_TRAPEZOID ? 1 : 0);
+	bmov(c_start, c_out, c_count * sizeof (*c_start));
+}
+
+/*
+ * Shift elements and children in the provided core node left by one spot.
+ * The first element moved is idx, and count elements are moved. The
+ * shape of the shift is determined by trap; true if the shift is a trapezoid,
+ * false if it is a parallelogram.
+ */
+static inline void
+bt_shift_core_left(zfs_btree_t *tree, zfs_btree_core_t *node, uint32_t idx,
+    uint32_t count, enum bt_shift_shape shape)
+{
+	bt_shift_core(tree, node, idx, count, 1, shape, BSD_LEFT);
+}
+
+/*
+ * Shift elements and children in the provided core node right by one spot.
+ * Starts with elements[idx] and children[idx] and one more child than element.
+ */
+static inline void
+bt_shift_core_right(zfs_btree_t *tree, zfs_btree_core_t *node, uint32_t idx,
+    uint32_t count, enum bt_shift_shape shape)
+{
+	bt_shift_core(tree, node, idx, count, 1, shape, BSD_RIGHT);
+}
+
+/*
+ * Shift elements and children in the provided leaf node by off spots.
+ * The first element moved is idx, and count elements are moved. The direction
+ * is determined by left.
+ */
+static inline void
+bt_shift_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *node, uint32_t idx,
+    uint32_t count, uint32_t off, enum bt_shift_direction dir)
+{
+	size_t size = tree->bt_elem_size;
+	zfs_btree_hdr_t *hdr = &node->btl_hdr;
+	ASSERT(!zfs_btree_is_core(hdr));
+
+	if (count == 0)
+		return;
+	uint8_t *start = node->btl_elems + (hdr->bth_first + idx) * size;
+	uint8_t *out = (dir == BSD_LEFT ? start - off * size :
+	    start + off * size);
+	bmov(start, out, count * size);
+}
+
+/*
+ * Grow leaf for n new elements before idx.
+ */
+static void
+bt_grow_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint32_t idx,
+    uint32_t n)
+{
+	zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+	ASSERT(!zfs_btree_is_core(hdr));
+	ASSERT3U(idx, <=, hdr->bth_count);
+	uint32_t capacity = tree->bt_leaf_cap;
+	ASSERT3U(hdr->bth_count + n, <=, capacity);
+	boolean_t cl = (hdr->bth_first >= n);
+	boolean_t cr = (hdr->bth_first + hdr->bth_count + n <= capacity);
+
+	if (cl && (!cr || idx <= hdr->bth_count / 2)) {
+		/* Grow left. */
+		hdr->bth_first -= n;
+		bt_shift_leaf(tree, leaf, n, idx, n, BSD_LEFT);
+	} else if (cr) {
+		/* Grow right. */
+		bt_shift_leaf(tree, leaf, idx, hdr->bth_count - idx, n,
+		    BSD_RIGHT);
+	} else {
+		/* Grow both ways. */
+		uint32_t fn = hdr->bth_first -
+		    (capacity - (hdr->bth_count + n)) / 2;
+		hdr->bth_first -= fn;
+		bt_shift_leaf(tree, leaf, fn, idx, fn, BSD_LEFT);
+		bt_shift_leaf(tree, leaf, fn + idx, hdr->bth_count - idx,
+		    n - fn, BSD_RIGHT);
+	}
+	hdr->bth_count += n;
+}
+
+/*
+ * Shrink leaf for count elements starting from idx.
+ */
+static void
+bt_shrink_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint32_t idx,
+    uint32_t n)
+{
+	zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+	ASSERT(!zfs_btree_is_core(hdr));
+	ASSERT3U(idx, <=, hdr->bth_count);
+	ASSERT3U(idx + n, <=, hdr->bth_count);
+
+	if (idx <= (hdr->bth_count - n) / 2) {
+		bt_shift_leaf(tree, leaf, 0, idx, n, BSD_RIGHT);
+		zfs_btree_poison_node_at(tree, hdr, 0, n);
+		hdr->bth_first += n;
+	} else {
+		bt_shift_leaf(tree, leaf, idx + n, hdr->bth_count - idx - n, n,
+		    BSD_LEFT);
+		zfs_btree_poison_node_at(tree, hdr, hdr->bth_count - n, n);
+	}
+	hdr->bth_count -= n;
+}
+
+/*
+ * Move children and elements from one core node to another. The shape
+ * parameter behaves the same as it does in the shift logic.
+ */
+static inline void
+bt_transfer_core(zfs_btree_t *tree, zfs_btree_core_t *source, uint32_t sidx,
+    uint32_t count, zfs_btree_core_t *dest, uint32_t didx,
+    enum bt_shift_shape shape)
+{
+	size_t size = tree->bt_elem_size;
+	ASSERT(zfs_btree_is_core(&source->btc_hdr));
+	ASSERT(zfs_btree_is_core(&dest->btc_hdr));
+
+	bcpy(source->btc_elems + sidx * size, dest->btc_elems + didx * size,
+	    count * size);
+
+	uint32_t c_count = count + (shape == BSS_TRAPEZOID ? 1 : 0);
+	bcpy(source->btc_children + sidx + (shape == BSS_TRAPEZOID ? 0 : 1),
+	    dest->btc_children + didx + (shape == BSS_TRAPEZOID ? 0 : 1),
+	    c_count * sizeof (*source->btc_children));
+}
+
+static inline void
+bt_transfer_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *source, uint32_t sidx,
+    uint32_t count, zfs_btree_leaf_t *dest, uint32_t didx)
+{
+	size_t size = tree->bt_elem_size;
+	ASSERT(!zfs_btree_is_core(&source->btl_hdr));
+	ASSERT(!zfs_btree_is_core(&dest->btl_hdr));
+
+	bcpy(source->btl_elems + (source->btl_hdr.bth_first + sidx) * size,
+	    dest->btl_elems + (dest->btl_hdr.bth_first + didx) * size,
+	    count * size);
+}
+
+/*
+ * Find the first element in the subtree rooted at hdr, return its value and
+ * put its location in where if non-null.
+ */
+static void *
+zfs_btree_first_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
+    zfs_btree_index_t *where)
+{
+	zfs_btree_hdr_t *node;
+
+	for (node = hdr; zfs_btree_is_core(node);
+	    node = ((zfs_btree_core_t *)node)->btc_children[0])
+		;
+
+	ASSERT(!zfs_btree_is_core(node));
+	zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)node;
+	if (where != NULL) {
+		where->bti_node = node;
+		where->bti_offset = 0;
+		where->bti_before = B_FALSE;
+	}
+	return (&leaf->btl_elems[node->bth_first * tree->bt_elem_size]);
+}
+
+/* Insert an element and a child into a core node at the given offset. */
+static void
+zfs_btree_insert_core_impl(zfs_btree_t *tree, zfs_btree_core_t *parent,
+    uint32_t offset, zfs_btree_hdr_t *new_node, void *buf)
+{
+	size_t size = tree->bt_elem_size;
+	zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
+	ASSERT3P(par_hdr, ==, new_node->bth_parent);
+	ASSERT3U(par_hdr->bth_count, <, BTREE_CORE_ELEMS);
+
+	if (zfs_btree_verify_intensity >= 5) {
+		zfs_btree_verify_poison_at(tree, par_hdr,
+		    par_hdr->bth_count);
+	}
+	/* Shift existing elements and children */
+	uint32_t count = par_hdr->bth_count - offset;
+	bt_shift_core_right(tree, parent, offset, count,
+	    BSS_PARALLELOGRAM);
+
+	/* Insert new values */
+	parent->btc_children[offset + 1] = new_node;
+	bcpy(buf, parent->btc_elems + offset * size, size);
+	par_hdr->bth_count++;
+}
+
+/*
+ * Insert new_node into the parent of old_node directly after old_node, with
+ * buf as the dividing element between the two.
+ */
+static void
+zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
+    zfs_btree_hdr_t *new_node, void *buf)
+{
+	ASSERT3P(old_node->bth_parent, ==, new_node->bth_parent);
+	size_t size = tree->bt_elem_size;
+	zfs_btree_core_t *parent = old_node->bth_parent;
+	zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
+
+	/*
+	 * If this is the root node we were splitting, we create a new root
+	 * and increase the height of the tree.
+	 */
+	if (parent == NULL) {
+		ASSERT3P(old_node, ==, tree->bt_root);
+		tree->bt_num_nodes++;
+		zfs_btree_core_t *new_root =
+		    kmem_alloc(sizeof (zfs_btree_core_t) + BTREE_CORE_ELEMS *
+		    size, KM_SLEEP);
+		zfs_btree_hdr_t *new_root_hdr = &new_root->btc_hdr;
+		new_root_hdr->bth_parent = NULL;
+		new_root_hdr->bth_first = -1;
+		new_root_hdr->bth_count = 1;
+
+		old_node->bth_parent = new_node->bth_parent = new_root;
+		new_root->btc_children[0] = old_node;
+		new_root->btc_children[1] = new_node;
+		bcpy(buf, new_root->btc_elems, size);
+
+		tree->bt_height++;
+		tree->bt_root = new_root_hdr;
+		zfs_btree_poison_node(tree, new_root_hdr);
+		return;
+	}
+
+	/*
+	 * Since we have the new separator, binary search for where to put
+	 * new_node.
+	 */
+	zfs_btree_index_t idx;
+	ASSERT(zfs_btree_is_core(par_hdr));
+	VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
+	    par_hdr->bth_count, buf, &idx), ==, NULL);
+	ASSERT(idx.bti_before);
+	uint32_t offset = idx.bti_offset;
+	ASSERT3U(offset, <=, par_hdr->bth_count);
+	ASSERT3P(parent->btc_children[offset], ==, old_node);
+
+	/*
+	 * If the parent isn't full, shift things to accommodate our insertions
+	 * and return.
+	 */
+	if (par_hdr->bth_count != BTREE_CORE_ELEMS) {
+		zfs_btree_insert_core_impl(tree, parent, offset, new_node, buf);
+		return;
+	}
+
+	/*
+	 * We need to split this core node into two. Currently there are
+	 * BTREE_CORE_ELEMS + 1 child nodes, and we are adding one for
+	 * BTREE_CORE_ELEMS + 2. Some of the children will be part of the
+	 * current node, and the others will be moved to the new core node.
+	 * There are BTREE_CORE_ELEMS + 1 elements including the new one. One
+	 * will be used as the new separator in our parent, and the others
+	 * will be split among the two core nodes.
+	 *
+	 * Usually we will split the node in half evenly, with
+	 * BTREE_CORE_ELEMS/2 elements in each node. If we're bulk loading, we
+	 * instead move only about a quarter of the elements (and children) to
+	 * the new node. Since the average state after a long time is a 3/4
+	 * full node, shortcutting directly to that state improves efficiency.
+	 *
+	 * We do this in two stages: first we split into two nodes, and then we
+	 * reuse our existing logic to insert the new element and child.
+	 */
+	uint32_t move_count = MAX((BTREE_CORE_ELEMS / (tree->bt_bulk == NULL ?
+	    2 : 4)) - 1, 2);
+	uint32_t keep_count = BTREE_CORE_ELEMS - move_count - 1;
+	ASSERT3U(BTREE_CORE_ELEMS - move_count, >=, 2);
+	tree->bt_num_nodes++;
+	zfs_btree_core_t *new_parent = kmem_alloc(sizeof (zfs_btree_core_t) +
+	    BTREE_CORE_ELEMS * size, KM_SLEEP);
+	zfs_btree_hdr_t *new_par_hdr = &new_parent->btc_hdr;
+	new_par_hdr->bth_parent = par_hdr->bth_parent;
+	new_par_hdr->bth_first = -1;
+	new_par_hdr->bth_count = move_count;
+	zfs_btree_poison_node(tree, new_par_hdr);
+
+	par_hdr->bth_count = keep_count;
+
+	bt_transfer_core(tree, parent, keep_count + 1, move_count, new_parent,
+	    0, BSS_TRAPEZOID);
+
+	/* Store the new separator in a buffer. */
+	uint8_t *tmp_buf = kmem_alloc(size, KM_SLEEP);
+	bcpy(parent->btc_elems + keep_count * size, tmp_buf,
+	    size);
+	zfs_btree_poison_node(tree, par_hdr);
+
+	if (offset < keep_count) {
+		/* Insert the new node into the left half */
+		zfs_btree_insert_core_impl(tree, parent, offset, new_node,
+		    buf);
+
+		/*
+		 * Move the new separator to the existing buffer.
+		 */
+		bcpy(tmp_buf, buf, size);
+	} else if (offset > keep_count) {
+		/* Insert the new node into the right half */
+		new_node->bth_parent = new_parent;
+		zfs_btree_insert_core_impl(tree, new_parent,
+		    offset - keep_count - 1, new_node, buf);
+
+		/*
+		 * Move the new separator to the existing buffer.
+		 */
+		bcpy(tmp_buf, buf, size);
+	} else {
+		/*
+		 * Move the new separator into the right half, and replace it
+		 * with buf. We also need to shift back the elements in the
+		 * right half to accommodate new_node.
+		 */
+		bt_shift_core_right(tree, new_parent, 0, move_count,
+		    BSS_TRAPEZOID);
+		new_parent->btc_children[0] = new_node;
+		bcpy(tmp_buf, new_parent->btc_elems, size);
+		new_par_hdr->bth_count++;
+	}
+	kmem_free(tmp_buf, size);
+	zfs_btree_poison_node(tree, par_hdr);
+
+	for (uint32_t i = 0; i <= new_parent->btc_hdr.bth_count; i++)
+		new_parent->btc_children[i]->bth_parent = new_parent;
+
+	for (uint32_t i = 0; i <= parent->btc_hdr.bth_count; i++)
+		ASSERT3P(parent->btc_children[i]->bth_parent, ==, parent);
+
+	/*
+	 * Now that the node is split, we need to insert the new node into its
+	 * parent. This may cause further splitting.
+	 */
+	zfs_btree_insert_into_parent(tree, &parent->btc_hdr,
+	    &new_parent->btc_hdr, buf);
+}
+
+/* Insert an element into a leaf node at the given offset. */
+static void
+zfs_btree_insert_leaf_impl(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
+    uint32_t idx, const void *value)
+{
+	size_t size = tree->bt_elem_size;
+	zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+	ASSERT3U(leaf->btl_hdr.bth_count, <, tree->bt_leaf_cap);
+
+	if (zfs_btree_verify_intensity >= 5) {
+		zfs_btree_verify_poison_at(tree, &leaf->btl_hdr,
+		    leaf->btl_hdr.bth_count);
+	}
+
+	bt_grow_leaf(tree, leaf, idx, 1);
+	uint8_t *start = leaf->btl_elems + (hdr->bth_first + idx) * size;
+	bcpy(value, start, size);
+}
+
+static void
+zfs_btree_verify_order_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr);
+
+/* Helper function for inserting a new value into leaf at the given index. */
+static void
+zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
+    const void *value, uint32_t idx)
+{
+	size_t size = tree->bt_elem_size;
+	uint32_t capacity = tree->bt_leaf_cap;
+
+	/*
+	 * If the leaf isn't full, shift the elements after idx and insert
+	 * value.
+	 */
+	if (leaf->btl_hdr.bth_count != capacity) {
+		zfs_btree_insert_leaf_impl(tree, leaf, idx, value);
+		return;
+	}
+
+	/*
+	 * Otherwise, we split the leaf node into two nodes. If we're not bulk
+	 * inserting, each is of size (capacity / 2).  If we are bulk
+	 * inserting, we move a quarter of the elements to the new node so
+	 * inserts into the old node don't cause immediate splitting but the
+	 * tree stays relatively dense. Since the average state after a long
+	 * time is a 3/4 full node, shortcutting directly to that state
+	 * improves efficiency.  At the end of the bulk insertion process
+	 * we'll need to go through and fix up any nodes (the last leaf and
+	 * its ancestors, potentially) that are below the minimum.
+	 *
+	 * In either case, we're left with one extra element. The leftover
+	 * element will become the new dividing element between the two nodes.
+	 */
+	uint32_t move_count = MAX(capacity / (tree->bt_bulk ? 4 : 2), 1) - 1;
+	uint32_t keep_count = capacity - move_count - 1;
+	ASSERT3U(keep_count, >=, 1);
+	/* If we insert on left. move one more to keep leaves balanced.  */
+	if (idx < keep_count) {
+		keep_count--;
+		move_count++;
+	}
+	tree->bt_num_nodes++;
+	zfs_btree_leaf_t *new_leaf = zfs_btree_leaf_alloc(tree);
+	zfs_btree_hdr_t *new_hdr = &new_leaf->btl_hdr;
+	new_hdr->bth_parent = leaf->btl_hdr.bth_parent;
+	new_hdr->bth_first = (tree->bt_bulk ? 0 : capacity / 4) +
+	    (idx >= keep_count && idx <= keep_count + move_count / 2);
+	new_hdr->bth_count = move_count;
+	zfs_btree_poison_node(tree, new_hdr);
+
+	if (tree->bt_bulk != NULL && leaf == tree->bt_bulk)
+		tree->bt_bulk = new_leaf;
+
+	/* Copy the back part to the new leaf. */
+	bt_transfer_leaf(tree, leaf, keep_count + 1, move_count, new_leaf, 0);
+
+	/* We store the new separator in a buffer we control for simplicity. */
+	uint8_t *buf = kmem_alloc(size, KM_SLEEP);
+	bcpy(leaf->btl_elems + (leaf->btl_hdr.bth_first + keep_count) * size,
+	    buf, size);
+
+	bt_shrink_leaf(tree, leaf, keep_count, 1 + move_count);
+
+	if (idx < keep_count) {
+		/* Insert into the existing leaf. */
+		zfs_btree_insert_leaf_impl(tree, leaf, idx, value);
+	} else if (idx > keep_count) {
+		/* Insert into the new leaf. */
+		zfs_btree_insert_leaf_impl(tree, new_leaf, idx - keep_count -
+		    1, value);
+	} else {
+		/*
+		 * Insert planned separator into the new leaf, and use
+		 * the new value as the new separator.
+		 */
+		zfs_btree_insert_leaf_impl(tree, new_leaf, 0, buf);
+		bcpy(value, buf, size);
+	}
+
+	/*
+	 * Now that the node is split, we need to insert the new node into its
+	 * parent. This may cause further splitting, bur only of core nodes.
+	 */
+	zfs_btree_insert_into_parent(tree, &leaf->btl_hdr, &new_leaf->btl_hdr,
+	    buf);
+	kmem_free(buf, size);
+}
+
+static uint32_t
+zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+	void *buf;
+	if (zfs_btree_is_core(hdr)) {
+		buf = ((zfs_btree_core_t *)hdr)->btc_elems;
+	} else {
+		buf = ((zfs_btree_leaf_t *)hdr)->btl_elems +
+		    hdr->bth_first * tree->bt_elem_size;
+	}
+	zfs_btree_index_t idx;
+	zfs_btree_core_t *parent = hdr->bth_parent;
+	VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
+	    parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
+	ASSERT(idx.bti_before);
+	ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
+	ASSERT3P(parent->btc_children[idx.bti_offset], ==, hdr);
+	return (idx.bti_offset);
+}
+
+/*
+ * Take the b-tree out of bulk insert mode. During bulk-insert mode, some
+ * nodes may violate the invariant that non-root nodes must be at least half
+ * full. All nodes violating this invariant should be the last node in their
+ * particular level. To correct the invariant, we take values from their left
+ * neighbor until they are half full. They must have a left neighbor at their
+ * level because the last node at a level is not the first node unless it's
+ * the root.
+ */
+static void
+zfs_btree_bulk_finish(zfs_btree_t *tree)
+{
+	ASSERT3P(tree->bt_bulk, !=, NULL);
+	ASSERT3P(tree->bt_root, !=, NULL);
+	zfs_btree_leaf_t *leaf = tree->bt_bulk;
+	zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+	zfs_btree_core_t *parent = hdr->bth_parent;
+	size_t size = tree->bt_elem_size;
+	uint32_t capacity = tree->bt_leaf_cap;
+
+	/*
+	 * The invariant doesn't apply to the root node, if that's the only
+	 * node in the tree we're done.
+	 */
+	if (parent == NULL) {
+		tree->bt_bulk = NULL;
+		return;
+	}
+
+	/* First, take elements to rebalance the leaf node. */
+	if (hdr->bth_count < capacity / 2) {
+		/*
+		 * First, find the left neighbor. The simplest way to do this
+		 * is to call zfs_btree_prev twice; the first time finds some
+		 * ancestor of this node, and the second time finds the left
+		 * neighbor. The ancestor found is the lowest common ancestor
+		 * of leaf and the neighbor.
+		 */
+		zfs_btree_index_t idx = {
+			.bti_node = hdr,
+			.bti_offset = 0
+		};
+		VERIFY3P(zfs_btree_prev(tree, &idx, &idx), !=, NULL);
+		ASSERT(zfs_btree_is_core(idx.bti_node));
+		zfs_btree_core_t *common = (zfs_btree_core_t *)idx.bti_node;
+		uint32_t common_idx = idx.bti_offset;
+
+		VERIFY3P(zfs_btree_prev(tree, &idx, &idx), !=, NULL);
+		ASSERT(!zfs_btree_is_core(idx.bti_node));
+		zfs_btree_leaf_t *l_neighbor = (zfs_btree_leaf_t *)idx.bti_node;
+		zfs_btree_hdr_t *l_hdr = idx.bti_node;
+		uint32_t move_count = (capacity / 2) - hdr->bth_count;
+		ASSERT3U(l_neighbor->btl_hdr.bth_count - move_count, >=,
+		    capacity / 2);
+
+		if (zfs_btree_verify_intensity >= 5) {
+			for (uint32_t i = 0; i < move_count; i++) {
+				zfs_btree_verify_poison_at(tree, hdr,
+				    leaf->btl_hdr.bth_count + i);
+			}
+		}
+
+		/* First, shift elements in leaf back. */
+		bt_grow_leaf(tree, leaf, 0, move_count);
+
+		/* Next, move the separator from the common ancestor to leaf. */
+		uint8_t *separator = common->btc_elems + common_idx * size;
+		uint8_t *out = leaf->btl_elems +
+		    (hdr->bth_first + move_count - 1) * size;
+		bcpy(separator, out, size);
+
+		/*
+		 * Now we move elements from the tail of the left neighbor to
+		 * fill the remaining spots in leaf.
+		 */
+		bt_transfer_leaf(tree, l_neighbor, l_hdr->bth_count -
+		    (move_count - 1), move_count - 1, leaf, 0);
+
+		/*
+		 * Finally, move the new last element in the left neighbor to
+		 * the separator.
+		 */
+		bcpy(l_neighbor->btl_elems + (l_hdr->bth_first +
+		    l_hdr->bth_count - move_count) * size, separator, size);
+
+		/* Adjust the node's counts, and we're done. */
+		bt_shrink_leaf(tree, l_neighbor, l_hdr->bth_count - move_count,
+		    move_count);
+
+		ASSERT3U(l_hdr->bth_count, >=, capacity / 2);
+		ASSERT3U(hdr->bth_count, >=, capacity / 2);
+	}
+
+	/*
+	 * Now we have to rebalance any ancestors of leaf that may also
+	 * violate the invariant.
+	 */
+	capacity = BTREE_CORE_ELEMS;
+	while (parent->btc_hdr.bth_parent != NULL) {
+		zfs_btree_core_t *cur = parent;
+		zfs_btree_hdr_t *hdr = &cur->btc_hdr;
+		parent = hdr->bth_parent;
+		/*
+		 * If the invariant isn't violated, move on to the next
+		 * ancestor.
+		 */
+		if (hdr->bth_count >= capacity / 2)
+			continue;
+
+		/*
+		 * Because the smallest number of nodes we can move when
+		 * splitting is 2, we never need to worry about not having a
+		 * left sibling (a sibling is a neighbor with the same parent).
+		 */
+		uint32_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
+		ASSERT3U(parent_idx, >, 0);
+		zfs_btree_core_t *l_neighbor =
+		    (zfs_btree_core_t *)parent->btc_children[parent_idx - 1];
+		uint32_t move_count = (capacity / 2) - hdr->bth_count;
+		ASSERT3U(l_neighbor->btc_hdr.bth_count - move_count, >=,
+		    capacity / 2);
+
+		if (zfs_btree_verify_intensity >= 5) {
+			for (uint32_t i = 0; i < move_count; i++) {
+				zfs_btree_verify_poison_at(tree, hdr,
+				    hdr->bth_count + i);
+			}
+		}
+		/* First, shift things in the right node back. */
+		bt_shift_core(tree, cur, 0, hdr->bth_count, move_count,
+		    BSS_TRAPEZOID, BSD_RIGHT);
+
+		/* Next, move the separator to the right node. */
+		uint8_t *separator = parent->btc_elems + ((parent_idx - 1) *
+		    size);
+		uint8_t *e_out = cur->btc_elems + ((move_count - 1) * size);
+		bcpy(separator, e_out, size);
+
+		/*
+		 * Now, move elements and children from the left node to the
+		 * right.  We move one more child than elements.
+		 */
+		move_count--;
+		uint32_t move_idx = l_neighbor->btc_hdr.bth_count - move_count;
+		bt_transfer_core(tree, l_neighbor, move_idx, move_count, cur, 0,
+		    BSS_TRAPEZOID);
+
+		/*
+		 * Finally, move the last element in the left node to the
+		 * separator's position.
+		 */
+		move_idx--;
+		bcpy(l_neighbor->btc_elems + move_idx * size, separator, size);
+
+		l_neighbor->btc_hdr.bth_count -= move_count + 1;
+		hdr->bth_count += move_count + 1;
+
+		ASSERT3U(l_neighbor->btc_hdr.bth_count, >=, capacity / 2);
+		ASSERT3U(hdr->bth_count, >=, capacity / 2);
+
+		zfs_btree_poison_node(tree, &l_neighbor->btc_hdr);
+
+		for (uint32_t i = 0; i <= hdr->bth_count; i++)
+			cur->btc_children[i]->bth_parent = cur;
+	}
+
+	tree->bt_bulk = NULL;
+	zfs_btree_verify(tree);
+}
+
+/*
+ * Insert value into tree at the location specified by where.
+ */
+void
+zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
+    const zfs_btree_index_t *where)
+{
+	zfs_btree_index_t idx = {0};
+
+	/* If we're not inserting in the last leaf, end bulk insert mode. */
+	if (tree->bt_bulk != NULL) {
+		if (where->bti_node != &tree->bt_bulk->btl_hdr) {
+			zfs_btree_bulk_finish(tree);
+			VERIFY3P(zfs_btree_find(tree, value, &idx), ==, NULL);
+			where = &idx;
+		}
+	}
+
+	tree->bt_num_elems++;
+	/*
+	 * If this is the first element in the tree, create a leaf root node
+	 * and add the value to it.
+	 */
+	if (where->bti_node == NULL) {
+		ASSERT3U(tree->bt_num_elems, ==, 1);
+		ASSERT3S(tree->bt_height, ==, -1);
+		ASSERT3P(tree->bt_root, ==, NULL);
+		ASSERT0(where->bti_offset);
+
+		tree->bt_num_nodes++;
+		zfs_btree_leaf_t *leaf = zfs_btree_leaf_alloc(tree);
+		tree->bt_root = &leaf->btl_hdr;
+		tree->bt_height++;
+
+		zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+		hdr->bth_parent = NULL;
+		hdr->bth_first = 0;
+		hdr->bth_count = 0;
+		zfs_btree_poison_node(tree, hdr);
+
+		zfs_btree_insert_into_leaf(tree, leaf, value, 0);
+		tree->bt_bulk = leaf;
+	} else if (!zfs_btree_is_core(where->bti_node)) {
+		/*
+		 * If we're inserting into a leaf, go directly to the helper
+		 * function.
+		 */
+		zfs_btree_insert_into_leaf(tree,
+		    (zfs_btree_leaf_t *)where->bti_node, value,
+		    where->bti_offset);
+	} else {
+		/*
+		 * If we're inserting into a core node, we can't just shift
+		 * the existing element in that slot in the same node without
+		 * breaking our ordering invariants. Instead we place the new
+		 * value in the node at that spot and then insert the old
+		 * separator into the first slot in the subtree to the right.
+		 */
+		zfs_btree_core_t *node = (zfs_btree_core_t *)where->bti_node;
+
+		/*
+		 * We can ignore bti_before, because either way the value
+		 * should end up in bti_offset.
+		 */
+		uint32_t off = where->bti_offset;
+		zfs_btree_hdr_t *subtree = node->btc_children[off + 1];
+		size_t size = tree->bt_elem_size;
+		uint8_t *buf = kmem_alloc(size, KM_SLEEP);
+		bcpy(node->btc_elems + off * size, buf, size);
+		bcpy(value, node->btc_elems + off * size, size);
+
+		/*
+		 * Find the first slot in the subtree to the right, insert
+		 * there.
+		 */
+		zfs_btree_index_t new_idx;
+		VERIFY3P(zfs_btree_first_helper(tree, subtree, &new_idx), !=,
+		    NULL);
+		ASSERT0(new_idx.bti_offset);
+		ASSERT(!zfs_btree_is_core(new_idx.bti_node));
+		zfs_btree_insert_into_leaf(tree,
+		    (zfs_btree_leaf_t *)new_idx.bti_node, buf, 0);
+		kmem_free(buf, size);
+	}
+	zfs_btree_verify(tree);
+}
+
+/*
+ * Return the first element in the tree, and put its location in where if
+ * non-null.
+ */
+void *
+zfs_btree_first(zfs_btree_t *tree, zfs_btree_index_t *where)
+{
+	if (tree->bt_height == -1) {
+		ASSERT0(tree->bt_num_elems);
+		return (NULL);
+	}
+	return (zfs_btree_first_helper(tree, tree->bt_root, where));
+}
+
+/*
+ * Find the last element in the subtree rooted at hdr, return its value and
+ * put its location in where if non-null.
+ */
+static void *
+zfs_btree_last_helper(zfs_btree_t *btree, zfs_btree_hdr_t *hdr,
+    zfs_btree_index_t *where)
+{
+	zfs_btree_hdr_t *node;
+
+	for (node = hdr; zfs_btree_is_core(node); node =
+	    ((zfs_btree_core_t *)node)->btc_children[node->bth_count])
+		;
+
+	zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)node;
+	if (where != NULL) {
+		where->bti_node = node;
+		where->bti_offset = node->bth_count - 1;
+		where->bti_before = B_FALSE;
+	}
+	return (leaf->btl_elems + (node->bth_first + node->bth_count - 1) *
+	    btree->bt_elem_size);
+}
+
+/*
+ * Return the last element in the tree, and put its location in where if
+ * non-null.
+ */
+void *
+zfs_btree_last(zfs_btree_t *tree, zfs_btree_index_t *where)
+{
+	if (tree->bt_height == -1) {
+		ASSERT0(tree->bt_num_elems);
+		return (NULL);
+	}
+	return (zfs_btree_last_helper(tree, tree->bt_root, where));
+}
+
+/*
+ * This function contains the logic to find the next node in the tree. A
+ * helper function is used because there are multiple internal consumemrs of
+ * this logic. The done_func is used by zfs_btree_destroy_nodes to clean up each
+ * node after we've finished with it.
+ */
+static void *
+zfs_btree_next_helper(zfs_btree_t *tree, const zfs_btree_index_t *idx,
+    zfs_btree_index_t *out_idx,
+    void (*done_func)(zfs_btree_t *, zfs_btree_hdr_t *))
+{
+	if (idx->bti_node == NULL) {
+		ASSERT3S(tree->bt_height, ==, -1);
+		return (NULL);
+	}
+
+	uint32_t offset = idx->bti_offset;
+	if (!zfs_btree_is_core(idx->bti_node)) {
+		/*
+		 * When finding the next element of an element in a leaf,
+		 * there are two cases. If the element isn't the last one in
+		 * the leaf, in which case we just return the next element in
+		 * the leaf. Otherwise, we need to traverse up our parents
+		 * until we find one where our ancestor isn't the last child
+		 * of its parent. Once we do, the next element is the
+		 * separator after our ancestor in its parent.
+		 */
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)idx->bti_node;
+		uint32_t new_off = offset + (idx->bti_before ? 0 : 1);
+		if (leaf->btl_hdr.bth_count > new_off) {
+			out_idx->bti_node = &leaf->btl_hdr;
+			out_idx->bti_offset = new_off;
+			out_idx->bti_before = B_FALSE;
+			return (leaf->btl_elems + (leaf->btl_hdr.bth_first +
+			    new_off) * tree->bt_elem_size);
+		}
+
+		zfs_btree_hdr_t *prev = &leaf->btl_hdr;
+		for (zfs_btree_core_t *node = leaf->btl_hdr.bth_parent;
+		    node != NULL; node = node->btc_hdr.bth_parent) {
+			zfs_btree_hdr_t *hdr = &node->btc_hdr;
+			ASSERT(zfs_btree_is_core(hdr));
+			uint32_t i = zfs_btree_find_parent_idx(tree, prev);
+			if (done_func != NULL)
+				done_func(tree, prev);
+			if (i == hdr->bth_count) {
+				prev = hdr;
+				continue;
+			}
+			out_idx->bti_node = hdr;
+			out_idx->bti_offset = i;
+			out_idx->bti_before = B_FALSE;
+			return (node->btc_elems + i * tree->bt_elem_size);
+		}
+		if (done_func != NULL)
+			done_func(tree, prev);
+		/*
+		 * We've traversed all the way up and been at the end of the
+		 * node every time, so this was the last element in the tree.
+		 */
+		return (NULL);
+	}
+
+	/* If we were before an element in a core node, return that element. */
+	ASSERT(zfs_btree_is_core(idx->bti_node));
+	zfs_btree_core_t *node = (zfs_btree_core_t *)idx->bti_node;
+	if (idx->bti_before) {
+		out_idx->bti_before = B_FALSE;
+		return (node->btc_elems + offset * tree->bt_elem_size);
+	}
+
+	/*
+	 * The next element from one in a core node is the first element in
+	 * the subtree just to the right of the separator.
+	 */
+	zfs_btree_hdr_t *child = node->btc_children[offset + 1];
+	return (zfs_btree_first_helper(tree, child, out_idx));
+}
+
+/*
+ * Return the next valued node in the tree.  The same address can be safely
+ * passed for idx and out_idx.
+ */
+void *
+zfs_btree_next(zfs_btree_t *tree, const zfs_btree_index_t *idx,
+    zfs_btree_index_t *out_idx)
+{
+	return (zfs_btree_next_helper(tree, idx, out_idx, NULL));
+}
+
+/*
+ * Return the previous valued node in the tree.  The same value can be safely
+ * passed for idx and out_idx.
+ */
+void *
+zfs_btree_prev(zfs_btree_t *tree, const zfs_btree_index_t *idx,
+    zfs_btree_index_t *out_idx)
+{
+	if (idx->bti_node == NULL) {
+		ASSERT3S(tree->bt_height, ==, -1);
+		return (NULL);
+	}
+
+	uint32_t offset = idx->bti_offset;
+	if (!zfs_btree_is_core(idx->bti_node)) {
+		/*
+		 * When finding the previous element of an element in a leaf,
+		 * there are two cases. If the element isn't the first one in
+		 * the leaf, in which case we just return the previous element
+		 * in the leaf. Otherwise, we need to traverse up our parents
+		 * until we find one where our previous ancestor isn't the
+		 * first child. Once we do, the previous element is the
+		 * separator after our previous ancestor.
+		 */
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)idx->bti_node;
+		if (offset != 0) {
+			out_idx->bti_node = &leaf->btl_hdr;
+			out_idx->bti_offset = offset - 1;
+			out_idx->bti_before = B_FALSE;
+			return (leaf->btl_elems + (leaf->btl_hdr.bth_first +
+			    offset - 1) * tree->bt_elem_size);
+		}
+		zfs_btree_hdr_t *prev = &leaf->btl_hdr;
+		for (zfs_btree_core_t *node = leaf->btl_hdr.bth_parent;
+		    node != NULL; node = node->btc_hdr.bth_parent) {
+			zfs_btree_hdr_t *hdr = &node->btc_hdr;
+			ASSERT(zfs_btree_is_core(hdr));
+			uint32_t i = zfs_btree_find_parent_idx(tree, prev);
+			if (i == 0) {
+				prev = hdr;
+				continue;
+			}
+			out_idx->bti_node = hdr;
+			out_idx->bti_offset = i - 1;
+			out_idx->bti_before = B_FALSE;
+			return (node->btc_elems + (i - 1) * tree->bt_elem_size);
+		}
+		/*
+		 * We've traversed all the way up and been at the start of the
+		 * node every time, so this was the first node in the tree.
+		 */
+		return (NULL);
+	}
+
+	/*
+	 * The previous element from one in a core node is the last element in
+	 * the subtree just to the left of the separator.
+	 */
+	ASSERT(zfs_btree_is_core(idx->bti_node));
+	zfs_btree_core_t *node = (zfs_btree_core_t *)idx->bti_node;
+	zfs_btree_hdr_t *child = node->btc_children[offset];
+	return (zfs_btree_last_helper(tree, child, out_idx));
+}
+
+/*
+ * Get the value at the provided index in the tree.
+ *
+ * Note that the value returned from this function can be mutated, but only
+ * if it will not change the ordering of the element with respect to any other
+ * elements that could be in the tree.
+ */
+void *
+zfs_btree_get(zfs_btree_t *tree, zfs_btree_index_t *idx)
+{
+	ASSERT(!idx->bti_before);
+	size_t size = tree->bt_elem_size;
+	if (!zfs_btree_is_core(idx->bti_node)) {
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)idx->bti_node;
+		return (leaf->btl_elems + (leaf->btl_hdr.bth_first +
+		    idx->bti_offset) * size);
+	}
+	zfs_btree_core_t *node = (zfs_btree_core_t *)idx->bti_node;
+	return (node->btc_elems + idx->bti_offset * size);
+}
+
+/* Add the given value to the tree. Must not already be in the tree. */
+void
+zfs_btree_add(zfs_btree_t *tree, const void *node)
+{
+	zfs_btree_index_t where = {0};
+	VERIFY3P(zfs_btree_find(tree, node, &where), ==, NULL);
+	zfs_btree_add_idx(tree, node, &where);
+}
+
+/* Helper function to free a tree node. */
+static void
+zfs_btree_node_destroy(zfs_btree_t *tree, zfs_btree_hdr_t *node)
+{
+	tree->bt_num_nodes--;
+	if (!zfs_btree_is_core(node)) {
+		zfs_btree_leaf_free(tree, node);
+	} else {
+		kmem_free(node, sizeof (zfs_btree_core_t) +
+		    BTREE_CORE_ELEMS * tree->bt_elem_size);
+	}
+}
+
+/*
+ * Remove the rm_hdr and the separator to its left from the parent node. The
+ * buffer that rm_hdr was stored in may already be freed, so its contents
+ * cannot be accessed.
+ */
+static void
+zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
+    zfs_btree_hdr_t *rm_hdr)
+{
+	size_t size = tree->bt_elem_size;
+	uint32_t min_count = (BTREE_CORE_ELEMS / 2) - 1;
+	zfs_btree_hdr_t *hdr = &node->btc_hdr;
+	/*
+	 * If the node is the root node and rm_hdr is one of two children,
+	 * promote the other child to the root.
+	 */
+	if (hdr->bth_parent == NULL && hdr->bth_count <= 1) {
+		ASSERT3U(hdr->bth_count, ==, 1);
+		ASSERT3P(tree->bt_root, ==, node);
+		ASSERT3P(node->btc_children[1], ==, rm_hdr);
+		tree->bt_root = node->btc_children[0];
+		node->btc_children[0]->bth_parent = NULL;
+		zfs_btree_node_destroy(tree, hdr);
+		tree->bt_height--;
+		return;
+	}
+
+	uint32_t idx;
+	for (idx = 0; idx <= hdr->bth_count; idx++) {
+		if (node->btc_children[idx] == rm_hdr)
+			break;
+	}
+	ASSERT3U(idx, <=, hdr->bth_count);
+
+	/*
+	 * If the node is the root or it has more than the minimum number of
+	 * children, just remove the child and separator, and return.
+	 */
+	if (hdr->bth_parent == NULL ||
+	    hdr->bth_count > min_count) {
+		/*
+		 * Shift the element and children to the right of rm_hdr to
+		 * the left by one spot.
+		 */
+		bt_shift_core_left(tree, node, idx, hdr->bth_count - idx,
+		    BSS_PARALLELOGRAM);
+		hdr->bth_count--;
+		zfs_btree_poison_node_at(tree, hdr, hdr->bth_count, 1);
+		return;
+	}
+
+	ASSERT3U(hdr->bth_count, ==, min_count);
+
+	/*
+	 * Now we try to take a node from a neighbor. We check left, then
+	 * right. If the neighbor exists and has more than the minimum number
+	 * of elements, we move the separator between us and them to our
+	 * node, move their closest element (last for left, first for right)
+	 * to the separator, and move their closest child to our node. Along
+	 * the way we need to collapse the gap made by idx, and (for our right
+	 * neighbor) the gap made by removing their first element and child.
+	 *
+	 * Note: this logic currently doesn't support taking from a neighbor
+	 * that isn't a sibling (i.e. a neighbor with a different
+	 * parent). This isn't critical functionality, but may be worth
+	 * implementing in the future for completeness' sake.
+	 */
+	zfs_btree_core_t *parent = hdr->bth_parent;
+	uint32_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
+
+	zfs_btree_hdr_t *l_hdr = (parent_idx == 0 ? NULL :
+	    parent->btc_children[parent_idx - 1]);
+	if (l_hdr != NULL && l_hdr->bth_count > min_count) {
+		/* We can take a node from the left neighbor. */
+		ASSERT(zfs_btree_is_core(l_hdr));
+		zfs_btree_core_t *neighbor = (zfs_btree_core_t *)l_hdr;
+
+		/*
+		 * Start by shifting the elements and children in the current
+		 * node to the right by one spot.
+		 */
+		bt_shift_core_right(tree, node, 0, idx - 1, BSS_TRAPEZOID);
+
+		/*
+		 * Move the separator between node and neighbor to the first
+		 * element slot in the current node.
+		 */
+		uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
+		    size;
+		bcpy(separator, node->btc_elems, size);
+
+		/* Move the last child of neighbor to our first child slot. */
+		node->btc_children[0] =
+		    neighbor->btc_children[l_hdr->bth_count];
+		node->btc_children[0]->bth_parent = node;
+
+		/* Move the last element of neighbor to the separator spot. */
+		uint8_t *take_elem = neighbor->btc_elems +
+		    (l_hdr->bth_count - 1) * size;
+		bcpy(take_elem, separator, size);
+		l_hdr->bth_count--;
+		zfs_btree_poison_node_at(tree, l_hdr, l_hdr->bth_count, 1);
+		return;
+	}
+
+	zfs_btree_hdr_t *r_hdr = (parent_idx == parent->btc_hdr.bth_count ?
+	    NULL : parent->btc_children[parent_idx + 1]);
+	if (r_hdr != NULL && r_hdr->bth_count > min_count) {
+		/* We can take a node from the right neighbor. */
+		ASSERT(zfs_btree_is_core(r_hdr));
+		zfs_btree_core_t *neighbor = (zfs_btree_core_t *)r_hdr;
+
+		/*
+		 * Shift elements in node left by one spot to overwrite rm_hdr
+		 * and the separator before it.
+		 */
+		bt_shift_core_left(tree, node, idx, hdr->bth_count - idx,
+		    BSS_PARALLELOGRAM);
+
+		/*
+		 * Move the separator between node and neighbor to the last
+		 * element spot in node.
+		 */
+		uint8_t *separator = parent->btc_elems + parent_idx * size;
+		bcpy(separator, node->btc_elems + (hdr->bth_count - 1) * size,
+		    size);
+
+		/*
+		 * Move the first child of neighbor to the last child spot in
+		 * node.
+		 */
+		node->btc_children[hdr->bth_count] = neighbor->btc_children[0];
+		node->btc_children[hdr->bth_count]->bth_parent = node;
+
+		/* Move the first element of neighbor to the separator spot. */
+		uint8_t *take_elem = neighbor->btc_elems;
+		bcpy(take_elem, separator, size);
+		r_hdr->bth_count--;
+
+		/*
+		 * Shift the elements and children of neighbor to cover the
+		 * stolen elements.
+		 */
+		bt_shift_core_left(tree, neighbor, 1, r_hdr->bth_count,
+		    BSS_TRAPEZOID);
+		zfs_btree_poison_node_at(tree, r_hdr, r_hdr->bth_count, 1);
+		return;
+	}
+
+	/*
+	 * In this case, neither of our neighbors can spare an element, so we
+	 * need to merge with one of them. We prefer the left one,
+	 * arbitrarily. Move the separator into the leftmost merging node
+	 * (which may be us or the left neighbor), and then move the right
+	 * merging node's elements. Once that's done, we go back and delete
+	 * the element we're removing. Finally, go into the parent and delete
+	 * the right merging node and the separator. This may cause further
+	 * merging.
+	 */
+	zfs_btree_hdr_t *new_rm_hdr, *keep_hdr;
+	uint32_t new_idx = idx;
+	if (l_hdr != NULL) {
+		keep_hdr = l_hdr;
+		new_rm_hdr = hdr;
+		new_idx += keep_hdr->bth_count + 1;
+	} else {
+		ASSERT3P(r_hdr, !=, NULL);
+		keep_hdr = hdr;
+		new_rm_hdr = r_hdr;
+		parent_idx++;
+	}
+
+	ASSERT(zfs_btree_is_core(keep_hdr));
+	ASSERT(zfs_btree_is_core(new_rm_hdr));
+
+	zfs_btree_core_t *keep = (zfs_btree_core_t *)keep_hdr;
+	zfs_btree_core_t *rm = (zfs_btree_core_t *)new_rm_hdr;
+
+	if (zfs_btree_verify_intensity >= 5) {
+		for (uint32_t i = 0; i < new_rm_hdr->bth_count + 1; i++) {
+			zfs_btree_verify_poison_at(tree, keep_hdr,
+			    keep_hdr->bth_count + i);
+		}
+	}
+
+	/* Move the separator into the left node. */
+	uint8_t *e_out = keep->btc_elems + keep_hdr->bth_count * size;
+	uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
+	    size;
+	bcpy(separator, e_out, size);
+	keep_hdr->bth_count++;
+
+	/* Move all our elements and children into the left node. */
+	bt_transfer_core(tree, rm, 0, new_rm_hdr->bth_count, keep,
+	    keep_hdr->bth_count, BSS_TRAPEZOID);
+
+	uint32_t old_count = keep_hdr->bth_count;
+
+	/* Update bookkeeping */
+	keep_hdr->bth_count += new_rm_hdr->bth_count;
+	ASSERT3U(keep_hdr->bth_count, ==, (min_count * 2) + 1);
+
+	/*
+	 * Shift the element and children to the right of rm_hdr to
+	 * the left by one spot.
+	 */
+	ASSERT3P(keep->btc_children[new_idx], ==, rm_hdr);
+	bt_shift_core_left(tree, keep, new_idx, keep_hdr->bth_count - new_idx,
+	    BSS_PARALLELOGRAM);
+	keep_hdr->bth_count--;
+
+	/* Reparent all our children to point to the left node. */
+	zfs_btree_hdr_t **new_start = keep->btc_children +
+	    old_count - 1;
+	for (uint32_t i = 0; i < new_rm_hdr->bth_count + 1; i++)
+		new_start[i]->bth_parent = keep;
+	for (uint32_t i = 0; i <= keep_hdr->bth_count; i++) {
+		ASSERT3P(keep->btc_children[i]->bth_parent, ==, keep);
+		ASSERT3P(keep->btc_children[i], !=, rm_hdr);
+	}
+	zfs_btree_poison_node_at(tree, keep_hdr, keep_hdr->bth_count, 1);
+
+	new_rm_hdr->bth_count = 0;
+	zfs_btree_remove_from_node(tree, parent, new_rm_hdr);
+	zfs_btree_node_destroy(tree, new_rm_hdr);
+}
+
+/* Remove the element at the specific location. */
+void
+zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
+{
+	size_t size = tree->bt_elem_size;
+	zfs_btree_hdr_t *hdr = where->bti_node;
+	uint32_t idx = where->bti_offset;
+
+	ASSERT(!where->bti_before);
+	if (tree->bt_bulk != NULL) {
+		/*
+		 * Leave bulk insert mode. Note that our index would be
+		 * invalid after we correct the tree, so we copy the value
+		 * we're planning to remove and find it again after
+		 * bulk_finish.
+		 */
+		uint8_t *value = zfs_btree_get(tree, where);
+		uint8_t *tmp = kmem_alloc(size, KM_SLEEP);
+		bcpy(value, tmp, size);
+		zfs_btree_bulk_finish(tree);
+		VERIFY3P(zfs_btree_find(tree, tmp, where), !=, NULL);
+		kmem_free(tmp, size);
+		hdr = where->bti_node;
+		idx = where->bti_offset;
+	}
+
+	tree->bt_num_elems--;
+	/*
+	 * If the element happens to be in a core node, we move a leaf node's
+	 * element into its place and then remove the leaf node element. This
+	 * makes the rebalance logic not need to be recursive both upwards and
+	 * downwards.
+	 */
+	if (zfs_btree_is_core(hdr)) {
+		zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+		zfs_btree_hdr_t *left_subtree = node->btc_children[idx];
+		void *new_value = zfs_btree_last_helper(tree, left_subtree,
+		    where);
+		ASSERT3P(new_value, !=, NULL);
+
+		bcpy(new_value, node->btc_elems + idx * size, size);
+
+		hdr = where->bti_node;
+		idx = where->bti_offset;
+		ASSERT(!where->bti_before);
+	}
+
+	/*
+	 * First, we'll update the leaf's metadata. Then, we shift any
+	 * elements after the idx to the left. After that, we rebalance if
+	 * needed.
+	 */
+	ASSERT(!zfs_btree_is_core(hdr));
+	zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+	ASSERT3U(hdr->bth_count, >, 0);
+
+	uint32_t min_count = (tree->bt_leaf_cap / 2) - 1;
+
+	/*
+	 * If we're over the minimum size or this is the root, just overwrite
+	 * the value and return.
+	 */
+	if (hdr->bth_count > min_count || hdr->bth_parent == NULL) {
+		bt_shrink_leaf(tree, leaf, idx, 1);
+		if (hdr->bth_parent == NULL) {
+			ASSERT0(tree->bt_height);
+			if (hdr->bth_count == 0) {
+				tree->bt_root = NULL;
+				tree->bt_height--;
+				zfs_btree_node_destroy(tree, &leaf->btl_hdr);
+			}
+		}
+		zfs_btree_verify(tree);
+		return;
+	}
+	ASSERT3U(hdr->bth_count, ==, min_count);
+
+	/*
+	 * Now we try to take a node from a sibling. We check left, then
+	 * right. If they exist and have more than the minimum number of
+	 * elements, we move the separator between us and them to our node
+	 * and move their closest element (last for left, first for right) to
+	 * the separator. Along the way we need to collapse the gap made by
+	 * idx, and (for our right neighbor) the gap made by removing their
+	 * first element.
+	 *
+	 * Note: this logic currently doesn't support taking from a neighbor
+	 * that isn't a sibling. This isn't critical functionality, but may be
+	 * worth implementing in the future for completeness' sake.
+	 */
+	zfs_btree_core_t *parent = hdr->bth_parent;
+	uint32_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
+
+	zfs_btree_hdr_t *l_hdr = (parent_idx == 0 ? NULL :
+	    parent->btc_children[parent_idx - 1]);
+	if (l_hdr != NULL && l_hdr->bth_count > min_count) {
+		/* We can take a node from the left neighbor. */
+		ASSERT(!zfs_btree_is_core(l_hdr));
+		zfs_btree_leaf_t *neighbor = (zfs_btree_leaf_t *)l_hdr;
+
+		/*
+		 * Move our elements back by one spot to make room for the
+		 * stolen element and overwrite the element being removed.
+		 */
+		bt_shift_leaf(tree, leaf, 0, idx, 1, BSD_RIGHT);
+
+		/* Move the separator to our first spot. */
+		uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
+		    size;
+		bcpy(separator, leaf->btl_elems + hdr->bth_first * size, size);
+
+		/* Move our neighbor's last element to the separator. */
+		uint8_t *take_elem = neighbor->btl_elems +
+		    (l_hdr->bth_first + l_hdr->bth_count - 1) * size;
+		bcpy(take_elem, separator, size);
+
+		/* Delete our neighbor's last element. */
+		bt_shrink_leaf(tree, neighbor, l_hdr->bth_count - 1, 1);
+		zfs_btree_verify(tree);
+		return;
+	}
+
+	zfs_btree_hdr_t *r_hdr = (parent_idx == parent->btc_hdr.bth_count ?
+	    NULL : parent->btc_children[parent_idx + 1]);
+	if (r_hdr != NULL && r_hdr->bth_count > min_count) {
+		/* We can take a node from the right neighbor. */
+		ASSERT(!zfs_btree_is_core(r_hdr));
+		zfs_btree_leaf_t *neighbor = (zfs_btree_leaf_t *)r_hdr;
+
+		/*
+		 * Move our elements after the element being removed forwards
+		 * by one spot to make room for the stolen element and
+		 * overwrite the element being removed.
+		 */
+		bt_shift_leaf(tree, leaf, idx + 1, hdr->bth_count - idx - 1,
+		    1, BSD_LEFT);
+
+		/* Move the separator between us to our last spot. */
+		uint8_t *separator = parent->btc_elems + parent_idx * size;
+		bcpy(separator, leaf->btl_elems + (hdr->bth_first +
+		    hdr->bth_count - 1) * size, size);
+
+		/* Move our neighbor's first element to the separator. */
+		uint8_t *take_elem = neighbor->btl_elems +
+		    r_hdr->bth_first * size;
+		bcpy(take_elem, separator, size);
+
+		/* Delete our neighbor's first element. */
+		bt_shrink_leaf(tree, neighbor, 0, 1);
+		zfs_btree_verify(tree);
+		return;
+	}
+
+	/*
+	 * In this case, neither of our neighbors can spare an element, so we
+	 * need to merge with one of them. We prefer the left one, arbitrarily.
+	 * After remove we move the separator into the leftmost merging node
+	 * (which may be us or the left neighbor), and then move the right
+	 * merging node's elements. Once that's done, we go back and delete
+	 * the element we're removing. Finally, go into the parent and delete
+	 * the right merging node and the separator. This may cause further
+	 * merging.
+	 */
+	zfs_btree_hdr_t *rm_hdr, *k_hdr;
+	if (l_hdr != NULL) {
+		k_hdr = l_hdr;
+		rm_hdr = hdr;
+	} else {
+		ASSERT3P(r_hdr, !=, NULL);
+		k_hdr = hdr;
+		rm_hdr = r_hdr;
+		parent_idx++;
+	}
+	ASSERT(!zfs_btree_is_core(k_hdr));
+	ASSERT(!zfs_btree_is_core(rm_hdr));
+	ASSERT3U(k_hdr->bth_count, ==, min_count);
+	ASSERT3U(rm_hdr->bth_count, ==, min_count);
+	zfs_btree_leaf_t *keep = (zfs_btree_leaf_t *)k_hdr;
+	zfs_btree_leaf_t *rm = (zfs_btree_leaf_t *)rm_hdr;
+
+	if (zfs_btree_verify_intensity >= 5) {
+		for (uint32_t i = 0; i < rm_hdr->bth_count + 1; i++) {
+			zfs_btree_verify_poison_at(tree, k_hdr,
+			    k_hdr->bth_count + i);
+		}
+	}
+
+	/*
+	 * Remove the value from the node.  It will go below the minimum,
+	 * but we'll fix it in no time.
+	 */
+	bt_shrink_leaf(tree, leaf, idx, 1);
+
+	/* Prepare space for elements to be moved from the right. */
+	uint32_t k_count = k_hdr->bth_count;
+	bt_grow_leaf(tree, keep, k_count, 1 + rm_hdr->bth_count);
+	ASSERT3U(k_hdr->bth_count, ==, min_count * 2);
+
+	/* Move the separator into the first open spot. */
+	uint8_t *out = keep->btl_elems + (k_hdr->bth_first + k_count) * size;
+	uint8_t *separator = parent->btc_elems + (parent_idx - 1) * size;
+	bcpy(separator, out, size);
+
+	/* Move our elements to the left neighbor. */
+	bt_transfer_leaf(tree, rm, 0, rm_hdr->bth_count, keep, k_count + 1);
+
+	/* Remove the emptied node from the parent. */
+	zfs_btree_remove_from_node(tree, parent, rm_hdr);
+	zfs_btree_node_destroy(tree, rm_hdr);
+	zfs_btree_verify(tree);
+}
+
+/* Remove the given value from the tree. */
+void
+zfs_btree_remove(zfs_btree_t *tree, const void *value)
+{
+	zfs_btree_index_t where = {0};
+	VERIFY3P(zfs_btree_find(tree, value, &where), !=, NULL);
+	zfs_btree_remove_idx(tree, &where);
+}
+
+/* Return the number of elements in the tree. */
+ulong_t
+zfs_btree_numnodes(zfs_btree_t *tree)
+{
+	return (tree->bt_num_elems);
+}
+
+/*
+ * This function is used to visit all the elements in the tree before
+ * destroying the tree. This allows the calling code to perform any cleanup it
+ * needs to do. This is more efficient than just removing the first element
+ * over and over, because it removes all rebalancing. Once the destroy_nodes()
+ * function has been called, no other btree operations are valid until it
+ * returns NULL, which point the only valid operation is zfs_btree_destroy().
+ *
+ * example:
+ *
+ *      zfs_btree_index_t *cookie = NULL;
+ *      my_data_t *node;
+ *
+ *      while ((node = zfs_btree_destroy_nodes(tree, &cookie)) != NULL)
+ *              free(node->ptr);
+ *      zfs_btree_destroy(tree);
+ *
+ */
+void *
+zfs_btree_destroy_nodes(zfs_btree_t *tree, zfs_btree_index_t **cookie)
+{
+	if (*cookie == NULL) {
+		if (tree->bt_height == -1)
+			return (NULL);
+		*cookie = kmem_alloc(sizeof (**cookie), KM_SLEEP);
+		return (zfs_btree_first(tree, *cookie));
+	}
+
+	void *rval = zfs_btree_next_helper(tree, *cookie, *cookie,
+	    zfs_btree_node_destroy);
+	if (rval == NULL)   {
+		tree->bt_root = NULL;
+		tree->bt_height = -1;
+		tree->bt_num_elems = 0;
+		kmem_free(*cookie, sizeof (**cookie));
+		tree->bt_bulk = NULL;
+	}
+	return (rval);
+}
+
+static void
+zfs_btree_clear_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+	if (zfs_btree_is_core(hdr)) {
+		zfs_btree_core_t *btc = (zfs_btree_core_t *)hdr;
+		for (uint32_t i = 0; i <= hdr->bth_count; i++)
+			zfs_btree_clear_helper(tree, btc->btc_children[i]);
+	}
+
+	zfs_btree_node_destroy(tree, hdr);
+}
+
+void
+zfs_btree_clear(zfs_btree_t *tree)
+{
+	if (tree->bt_root == NULL) {
+		ASSERT0(tree->bt_num_elems);
+		return;
+	}
+
+	zfs_btree_clear_helper(tree, tree->bt_root);
+	tree->bt_num_elems = 0;
+	tree->bt_root = NULL;
+	tree->bt_num_nodes = 0;
+	tree->bt_height = -1;
+	tree->bt_bulk = NULL;
+}
+
+void
+zfs_btree_destroy(zfs_btree_t *tree)
+{
+	ASSERT0(tree->bt_num_elems);
+	ASSERT3P(tree->bt_root, ==, NULL);
+}
+
+/* Verify that every child of this node has the correct parent pointer. */
+static void
+zfs_btree_verify_pointers_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+	if (!zfs_btree_is_core(hdr))
+		return;
+
+	zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+	for (uint32_t i = 0; i <= hdr->bth_count; i++) {
+		VERIFY3P(node->btc_children[i]->bth_parent, ==, hdr);
+		zfs_btree_verify_pointers_helper(tree, node->btc_children[i]);
+	}
+}
+
+/* Verify that every node has the correct parent pointer. */
+static void
+zfs_btree_verify_pointers(zfs_btree_t *tree)
+{
+	if (tree->bt_height == -1) {
+		VERIFY3P(tree->bt_root, ==, NULL);
+		return;
+	}
+	VERIFY3P(tree->bt_root->bth_parent, ==, NULL);
+	zfs_btree_verify_pointers_helper(tree, tree->bt_root);
+}
+
+/*
+ * Verify that all the current node and its children satisfy the count
+ * invariants, and return the total count in the subtree rooted in this node.
+ */
+static uint64_t
+zfs_btree_verify_counts_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+	if (!zfs_btree_is_core(hdr)) {
+		if (tree->bt_root != hdr && tree->bt_bulk &&
+		    hdr != &tree->bt_bulk->btl_hdr) {
+			VERIFY3U(hdr->bth_count, >=, tree->bt_leaf_cap / 2 - 1);
+		}
+
+		return (hdr->bth_count);
+	} else {
+
+		zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+		uint64_t ret = hdr->bth_count;
+		if (tree->bt_root != hdr && tree->bt_bulk == NULL)
+			VERIFY3P(hdr->bth_count, >=, BTREE_CORE_ELEMS / 2 - 1);
+		for (uint32_t i = 0; i <= hdr->bth_count; i++) {
+			ret += zfs_btree_verify_counts_helper(tree,
+			    node->btc_children[i]);
+		}
+
+		return (ret);
+	}
+}
+
+/*
+ * Verify that all nodes satisfy the invariants and that the total number of
+ * elements is correct.
+ */
+static void
+zfs_btree_verify_counts(zfs_btree_t *tree)
+{
+	EQUIV(tree->bt_num_elems == 0, tree->bt_height == -1);
+	if (tree->bt_height == -1) {
+		return;
+	}
+	VERIFY3P(zfs_btree_verify_counts_helper(tree, tree->bt_root), ==,
+	    tree->bt_num_elems);
+}
+
+/*
+ * Check that the subtree rooted at this node has a uniform height. Returns
+ * the number of nodes under this node, to help verify bt_num_nodes.
+ */
+static uint64_t
+zfs_btree_verify_height_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
+    int32_t height)
+{
+	if (!zfs_btree_is_core(hdr)) {
+		VERIFY0(height);
+		return (1);
+	}
+
+	zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+	uint64_t ret = 1;
+	for (uint32_t i = 0; i <= hdr->bth_count; i++) {
+		ret += zfs_btree_verify_height_helper(tree,
+		    node->btc_children[i], height - 1);
+	}
+	return (ret);
+}
+
+/*
+ * Check that the tree rooted at this node has a uniform height, and that the
+ * bt_height in the tree is correct.
+ */
+static void
+zfs_btree_verify_height(zfs_btree_t *tree)
+{
+	EQUIV(tree->bt_height == -1, tree->bt_root == NULL);
+	if (tree->bt_height == -1) {
+		return;
+	}
+
+	VERIFY3U(zfs_btree_verify_height_helper(tree, tree->bt_root,
+	    tree->bt_height), ==, tree->bt_num_nodes);
+}
+
+/*
+ * Check that the elements in this node are sorted, and that if this is a core
+ * node, the separators are properly between the subtrees they separaate and
+ * that the children also satisfy this requirement.
+ */
+static void
+zfs_btree_verify_order_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+	size_t size = tree->bt_elem_size;
+	if (!zfs_btree_is_core(hdr)) {
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+		for (uint32_t i = 1; i < hdr->bth_count; i++) {
+			VERIFY3S(tree->bt_compar(leaf->btl_elems +
+			    (hdr->bth_first + i - 1) * size,
+			    leaf->btl_elems +
+			    (hdr->bth_first + i) * size), ==, -1);
+		}
+		return;
+	}
+
+	zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+	for (uint32_t i = 1; i < hdr->bth_count; i++) {
+		VERIFY3S(tree->bt_compar(node->btc_elems + (i - 1) * size,
+		    node->btc_elems + i * size), ==, -1);
+	}
+	for (uint32_t i = 0; i < hdr->bth_count; i++) {
+		uint8_t *left_child_last = NULL;
+		zfs_btree_hdr_t *left_child_hdr = node->btc_children[i];
+		if (zfs_btree_is_core(left_child_hdr)) {
+			zfs_btree_core_t *left_child =
+			    (zfs_btree_core_t *)left_child_hdr;
+			left_child_last = left_child->btc_elems +
+			    (left_child_hdr->bth_count - 1) * size;
+		} else {
+			zfs_btree_leaf_t *left_child =
+			    (zfs_btree_leaf_t *)left_child_hdr;
+			left_child_last = left_child->btl_elems +
+			    (left_child_hdr->bth_first +
+			    left_child_hdr->bth_count - 1) * size;
+		}
+		int comp = tree->bt_compar(node->btc_elems + i * size,
+		    left_child_last);
+		if (comp <= 0) {
+			panic("btree: compar returned %d (expected 1) at "
+			    "%px %d: compar(%px,  %px)", comp, node, i,
+			    node->btc_elems + i * size, left_child_last);
+		}
+
+		uint8_t *right_child_first = NULL;
+		zfs_btree_hdr_t *right_child_hdr = node->btc_children[i + 1];
+		if (zfs_btree_is_core(right_child_hdr)) {
+			zfs_btree_core_t *right_child =
+			    (zfs_btree_core_t *)right_child_hdr;
+			right_child_first = right_child->btc_elems;
+		} else {
+			zfs_btree_leaf_t *right_child =
+			    (zfs_btree_leaf_t *)right_child_hdr;
+			right_child_first = right_child->btl_elems +
+			    right_child_hdr->bth_first * size;
+		}
+		comp = tree->bt_compar(node->btc_elems + i * size,
+		    right_child_first);
+		if (comp >= 0) {
+			panic("btree: compar returned %d (expected -1) at "
+			    "%px %d: compar(%px,  %px)", comp, node, i,
+			    node->btc_elems + i * size, right_child_first);
+		}
+	}
+	for (uint32_t i = 0; i <= hdr->bth_count; i++)
+		zfs_btree_verify_order_helper(tree, node->btc_children[i]);
+}
+
+/* Check that all elements in the tree are in sorted order. */
+static void
+zfs_btree_verify_order(zfs_btree_t *tree)
+{
+	EQUIV(tree->bt_height == -1, tree->bt_root == NULL);
+	if (tree->bt_height == -1) {
+		return;
+	}
+
+	zfs_btree_verify_order_helper(tree, tree->bt_root);
+}
+
+#ifdef ZFS_DEBUG
+/* Check that all unused memory is poisoned correctly. */
+static void
+zfs_btree_verify_poison_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
+{
+	size_t size = tree->bt_elem_size;
+	if (!zfs_btree_is_core(hdr)) {
+		zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+		for (size_t i = 0; i < hdr->bth_first * size; i++)
+			VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
+		size_t esize = tree->bt_leaf_size -
+		    offsetof(zfs_btree_leaf_t, btl_elems);
+		for (size_t i = (hdr->bth_first + hdr->bth_count) * size;
+		    i < esize; i++)
+			VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
+	} else {
+		zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
+		for (size_t i = hdr->bth_count * size;
+		    i < BTREE_CORE_ELEMS * size; i++)
+			VERIFY3U(node->btc_elems[i], ==, 0x0f);
+
+		for (uint32_t i = hdr->bth_count + 1; i <= BTREE_CORE_ELEMS;
+		    i++) {
+			VERIFY3P(node->btc_children[i], ==,
+			    (zfs_btree_hdr_t *)BTREE_POISON);
+		}
+
+		for (uint32_t i = 0; i <= hdr->bth_count; i++) {
+			zfs_btree_verify_poison_helper(tree,
+			    node->btc_children[i]);
+		}
+	}
+}
+#endif
+
+/* Check that unused memory in the tree is still poisoned. */
+static void
+zfs_btree_verify_poison(zfs_btree_t *tree)
+{
+#ifdef ZFS_DEBUG
+	if (tree->bt_height == -1)
+		return;
+	zfs_btree_verify_poison_helper(tree, tree->bt_root);
+#endif
+}
+
+void
+zfs_btree_verify(zfs_btree_t *tree)
+{
+	if (zfs_btree_verify_intensity == 0)
+		return;
+	zfs_btree_verify_height(tree);
+	if (zfs_btree_verify_intensity == 1)
+		return;
+	zfs_btree_verify_pointers(tree);
+	if (zfs_btree_verify_intensity == 2)
+		return;
+	zfs_btree_verify_counts(tree);
+	if (zfs_btree_verify_intensity == 3)
+		return;
+	zfs_btree_verify_order(tree);
+
+	if (zfs_btree_verify_intensity == 4)
+		return;
+	zfs_btree_verify_poison(tree);
+}
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, btree_verify_intensity, UINT, ZMOD_RW,
+	"Enable btree verification. Levels above 4 require ZFS be built "
+	"with debugging");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/cityhash.c b/zfs/module/zfs/cityhash.c
deleted file mode 100644
index 2b62eda..0000000
--- a/zfs/module/zfs/cityhash.c
+++ /dev/null

@@ -1,63 +0,0 @@
-// Copyright (c) 2011 Google, Inc.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-/*
- * Copyright (c) 2017 by Delphix. All rights reserved.
- */
-
-#include <sys/cityhash.h>
-
-#define	HASH_K1 0xb492b66fbe98f273ULL
-#define	HASH_K2 0x9ae16a3b2f90404fULL
-
-/*
- * Bitwise right rotate.  Normally this will compile to a single
- * instruction.
- */
-static inline uint64_t
-rotate(uint64_t val, int shift)
-{
-	// Avoid shifting by 64: doing so yields an undefined result.
-	return (shift == 0 ? val : (val >> shift) | (val << (64 - shift)));
-}
-
-static inline uint64_t
-cityhash_helper(uint64_t u, uint64_t v, uint64_t mul)
-{
-	uint64_t a = (u ^ v) * mul;
-	a ^= (a >> 47);
-	uint64_t b = (v ^ a) * mul;
-	b ^= (b >> 47);
-	b *= mul;
-	return (b);
-}
-
-uint64_t
-cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
-{
-	uint64_t mul = HASH_K2 + 64;
-	uint64_t a = w1 * HASH_K1;
-	uint64_t b = w2;
-	uint64_t c = w4 * mul;
-	uint64_t d = w3 * HASH_K2;
-	return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d,
-	    a + rotate(b + HASH_K2, 18) + c, mul));
-
-}

diff --git a/zfs/module/zfs/dataset_kstats.c b/zfs/module/zfs/dataset_kstats.c
index e46a092..3fbb24d 100644
--- a/zfs/module/zfs/dataset_kstats.c
+++ b/zfs/module/zfs/dataset_kstats.c

@@ -50,17 +50,17 @@
 
 	dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
 	dkv->dkv_writes.value.ui64 =
-	    aggsum_value(&dk->dk_aggsums.das_writes);
+	    wmsum_value(&dk->dk_sums.dss_writes);
 	dkv->dkv_nwritten.value.ui64 =
-	    aggsum_value(&dk->dk_aggsums.das_nwritten);
+	    wmsum_value(&dk->dk_sums.dss_nwritten);
 	dkv->dkv_reads.value.ui64 =
-	    aggsum_value(&dk->dk_aggsums.das_reads);
+	    wmsum_value(&dk->dk_sums.dss_reads);
 	dkv->dkv_nread.value.ui64 =
-	    aggsum_value(&dk->dk_aggsums.das_nread);
+	    wmsum_value(&dk->dk_sums.dss_nread);
 	dkv->dkv_nunlinks.value.ui64 =
-	    aggsum_value(&dk->dk_aggsums.das_nunlinks);
+	    wmsum_value(&dk->dk_sums.dss_nunlinks);
 	dkv->dkv_nunlinked.value.ui64 =
-	    aggsum_value(&dk->dk_aggsums.das_nunlinked);
+	    wmsum_value(&dk->dk_sums.dss_nunlinked);
 
 	return (0);
 }
@@ -140,12 +140,12 @@
 	kstat_install(kstat);
 	dk->dk_kstats = kstat;
 
-	aggsum_init(&dk->dk_aggsums.das_writes, 0);
-	aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
-	aggsum_init(&dk->dk_aggsums.das_reads, 0);
-	aggsum_init(&dk->dk_aggsums.das_nread, 0);
-	aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
-	aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
+	wmsum_init(&dk->dk_sums.dss_writes, 0);
+	wmsum_init(&dk->dk_sums.dss_nwritten, 0);
+	wmsum_init(&dk->dk_sums.dss_reads, 0);
+	wmsum_init(&dk->dk_sums.dss_nread, 0);
+	wmsum_init(&dk->dk_sums.dss_nunlinks, 0);
+	wmsum_init(&dk->dk_sums.dss_nunlinked, 0);
 }
 
 void
@@ -162,12 +162,12 @@
 	kstat_delete(dk->dk_kstats);
 	dk->dk_kstats = NULL;
 
-	aggsum_fini(&dk->dk_aggsums.das_writes);
-	aggsum_fini(&dk->dk_aggsums.das_nwritten);
-	aggsum_fini(&dk->dk_aggsums.das_reads);
-	aggsum_fini(&dk->dk_aggsums.das_nread);
-	aggsum_fini(&dk->dk_aggsums.das_nunlinks);
-	aggsum_fini(&dk->dk_aggsums.das_nunlinked);
+	wmsum_fini(&dk->dk_sums.dss_writes);
+	wmsum_fini(&dk->dk_sums.dss_nwritten);
+	wmsum_fini(&dk->dk_sums.dss_reads);
+	wmsum_fini(&dk->dk_sums.dss_nread);
+	wmsum_fini(&dk->dk_sums.dss_nunlinks);
+	wmsum_fini(&dk->dk_sums.dss_nunlinked);
 }
 
 void
@@ -179,8 +179,8 @@
 	if (dk->dk_kstats == NULL)
 		return;
 
-	aggsum_add(&dk->dk_aggsums.das_writes, 1);
-	aggsum_add(&dk->dk_aggsums.das_nwritten, nwritten);
+	wmsum_add(&dk->dk_sums.dss_writes, 1);
+	wmsum_add(&dk->dk_sums.dss_nwritten, nwritten);
 }
 
 void
@@ -192,8 +192,8 @@
 	if (dk->dk_kstats == NULL)
 		return;
 
-	aggsum_add(&dk->dk_aggsums.das_reads, 1);
-	aggsum_add(&dk->dk_aggsums.das_nread, nread);
+	wmsum_add(&dk->dk_sums.dss_reads, 1);
+	wmsum_add(&dk->dk_sums.dss_nread, nread);
 }
 
 void
@@ -202,7 +202,7 @@
 	if (dk->dk_kstats == NULL)
 		return;
 
-	aggsum_add(&dk->dk_aggsums.das_nunlinks, delta);
+	wmsum_add(&dk->dk_sums.dss_nunlinks, delta);
 }
 
 void
@@ -211,5 +211,5 @@
 	if (dk->dk_kstats == NULL)
 		return;
 
-	aggsum_add(&dk->dk_aggsums.das_nunlinked, delta);
+	wmsum_add(&dk->dk_sums.dss_nunlinked, delta);
 }

diff --git a/zfs/module/zfs/dbuf.c b/zfs/module/zfs/dbuf.c
index 0542ba7..a59aa78 100644
--- a/zfs/module/zfs/dbuf.c
+++ b/zfs/module/zfs/dbuf.c

@@ -21,9 +21,11 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
  */
 
 #include <sys/zfs_context.h>
@@ -44,12 +46,14 @@
 #include <sys/zfeature.h>
 #include <sys/blkptr.h>
 #include <sys/range_tree.h>
-#include <sys/trace_dbuf.h>
+#include <sys/trace_zfs.h>
 #include <sys/callb.h>
 #include <sys/abd.h>
 #include <sys/vdev.h>
-#include <sys/cityhash.h>
+#include <cityhash.h>
 #include <sys/spa_impl.h>
+#include <sys/wmsum.h>
+#include <sys/vdev_impl.h>
 
 kstat_t *dbuf_ksp;
 
@@ -133,8 +137,22 @@
 	{ "metadata_cache_overflow",		KSTAT_DATA_UINT64 }
 };
 
+struct {
+	wmsum_t cache_count;
+	wmsum_t cache_total_evicts;
+	wmsum_t cache_levels[DN_MAX_LEVELS];
+	wmsum_t cache_levels_bytes[DN_MAX_LEVELS];
+	wmsum_t hash_hits;
+	wmsum_t hash_misses;
+	wmsum_t hash_collisions;
+	wmsum_t hash_chains;
+	wmsum_t hash_insert_race;
+	wmsum_t metadata_cache_count;
+	wmsum_t metadata_cache_overflow;
+} dbuf_sums;
+
 #define	DBUF_STAT_INCR(stat, val)	\
-	atomic_add_64(&dbuf_stats.stat.value.ui64, (val));
+	wmsum_add(&dbuf_sums.stat, val);
 #define	DBUF_STAT_DECR(stat, val)	\
 	DBUF_STAT_INCR(stat, -(val));
 #define	DBUF_STAT_BUMP(stat)		\
@@ -148,36 +166,10 @@
 		continue;						\
 }
 
-typedef struct dbuf_hold_arg {
-	/* Function arguments */
-	dnode_t *dh_dn;
-	uint8_t dh_level;
-	uint64_t dh_blkid;
-	boolean_t dh_fail_sparse;
-	boolean_t dh_fail_uncached;
-	void *dh_tag;
-	dmu_buf_impl_t **dh_dbp;
-	/* Local variables */
-	dmu_buf_impl_t *dh_db;
-	dmu_buf_impl_t *dh_parent;
-	blkptr_t *dh_bp;
-	int dh_err;
-	dbuf_dirty_record_t *dh_dr;
-} dbuf_hold_arg_t;
-
-static dbuf_hold_arg_t *dbuf_hold_arg_create(dnode_t *dn, uint8_t level,
-	uint64_t blkid, boolean_t fail_sparse, boolean_t fail_uncached,
-	void *tag, dmu_buf_impl_t **dbp);
-static int dbuf_hold_impl_arg(dbuf_hold_arg_t *dh);
-static void dbuf_hold_arg_destroy(dbuf_hold_arg_t *dh);
-
 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
-
-extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
-    dmu_buf_evict_func_t *evict_func_sync,
-    dmu_buf_evict_func_t *evict_func_async,
-    dmu_buf_t **clear_on_evict_dbufp);
+static void dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr);
+static int dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags);
 
 /*
  * Global data structures and functions for the dbuf cache.
@@ -222,18 +214,22 @@
  * by those caches' matching enum values (from dbuf_cached_state_t).
  */
 typedef struct dbuf_cache {
-	multilist_t *cache;
-	zfs_refcount_t size;
+	multilist_t cache;
+	zfs_refcount_t size ____cacheline_aligned;
 } dbuf_cache_t;
 dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
 
 /* Size limits for the caches */
-unsigned long dbuf_cache_max_bytes = 0;
-unsigned long dbuf_metadata_cache_max_bytes = 0;
+unsigned long dbuf_cache_max_bytes = ULONG_MAX;
+unsigned long dbuf_metadata_cache_max_bytes = ULONG_MAX;
+
 /* Set the default sizes of the caches to log2 fraction of arc size */
 int dbuf_cache_shift = 5;
 int dbuf_metadata_cache_shift = 6;
 
+static unsigned long dbuf_cache_target_bytes(void);
+static unsigned long dbuf_metadata_cache_target_bytes(void);
+
 /*
  * The LRU dbuf cache uses a three-stage eviction policy:
  *	- A low water marker designates when the dbuf eviction thread
@@ -279,14 +275,15 @@
 uint_t dbuf_cache_hiwater_pct = 10;
 uint_t dbuf_cache_lowater_pct = 10;
 
-/* ARGSUSED */
 static int
 dbuf_cons(void *vdb, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	dmu_buf_impl_t *db = vdb;
 	bzero(db, sizeof (dmu_buf_impl_t));
 
 	mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&db->db_rwlock, NULL, RW_DEFAULT, NULL);
 	cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
 	multilist_link_init(&db->db_cache_link);
 	zfs_refcount_create(&db->db_holds);
@@ -294,12 +291,13 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 dbuf_dest(void *vdb, void *unused)
 {
+	(void) unused;
 	dmu_buf_impl_t *db = vdb;
 	mutex_destroy(&db->db_mtx);
+	rw_destroy(&db->db_rwlock);
 	cv_destroy(&db->db_changed);
 	ASSERT(!multilist_link_active(&db->db_cache_link));
 	zfs_refcount_destroy(&db->db_holds);
@@ -310,8 +308,6 @@
  */
 static dbuf_hash_table_t dbuf_hash_table;
 
-static uint64_t dbuf_hash_count;
-
 /*
  * We use Cityhash for this. It's fast, and has good hash properties without
  * requiring any large static buffers.
@@ -322,6 +318,10 @@
 	return (cityhash4((uintptr_t)os, obj, (uint64_t)lvl, blkid));
 }
 
+#define	DTRACE_SET_STATE(db, why) \
+	DTRACE_PROBE2(dbuf__state_change, dmu_buf_impl_t *, db,	\
+	    const char *, why)
+
 #define	DBUF_EQUAL(dbuf, os, obj, level, blkid)		\
 	((dbuf)->db.db_object == (obj) &&		\
 	(dbuf)->db_objset == (os) &&			\
@@ -418,8 +418,8 @@
 	db->db_hash_next = h->hash_table[idx];
 	h->hash_table[idx] = db;
 	mutex_exit(DBUF_HASH_MUTEX(h, idx));
-	atomic_inc_64(&dbuf_hash_count);
-	DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count);
+	uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
+	DBUF_STAT_MAX(hash_elements_max, he);
 
 	return (NULL);
 }
@@ -447,7 +447,7 @@
 		 */
 		if (zfs_refcount_count(
 		    &dbuf_caches[DB_DBUF_METADATA_CACHE].size) >
-		    dbuf_metadata_cache_max_bytes) {
+		    dbuf_metadata_cache_target_bytes()) {
 			DBUF_STAT_BUMP(metadata_cache_overflow);
 			return (B_FALSE);
 		}
@@ -492,7 +492,7 @@
 	    h->hash_table[idx]->db_hash_next == NULL)
 		DBUF_STAT_BUMPDOWN(hash_chains);
 	mutex_exit(DBUF_HASH_MUTEX(h, idx));
-	atomic_dec_64(&dbuf_hash_count);
+	atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
 }
 
 typedef enum {
@@ -595,6 +595,68 @@
 	}
 }
 
+/*
+ * We want to exclude buffers that are on a special allocation class from
+ * L2ARC.
+ */
+boolean_t
+dbuf_is_l2cacheable(dmu_buf_impl_t *db)
+{
+	if (db->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||
+	    (db->db_objset->os_secondary_cache ==
+	    ZFS_CACHE_METADATA && dbuf_is_metadata(db))) {
+		if (l2arc_exclude_special == 0)
+			return (B_TRUE);
+
+		blkptr_t *bp = db->db_blkptr;
+		if (bp == NULL || BP_IS_HOLE(bp))
+			return (B_FALSE);
+		uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+		vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;
+		vdev_t *vd = NULL;
+
+		if (vdev < rvd->vdev_children)
+			vd = rvd->vdev_child[vdev];
+
+		if (vd == NULL)
+			return (B_TRUE);
+
+		if (vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+		    vd->vdev_alloc_bias != VDEV_BIAS_DEDUP)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static inline boolean_t
+dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
+{
+	if (dn->dn_objset->os_secondary_cache == ZFS_CACHE_ALL ||
+	    (dn->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA &&
+	    (level > 0 ||
+	    DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)))) {
+		if (l2arc_exclude_special == 0)
+			return (B_TRUE);
+
+		if (bp == NULL || BP_IS_HOLE(bp))
+			return (B_FALSE);
+		uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+		vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;
+		vdev_t *vd = NULL;
+
+		if (vdev < rvd->vdev_children)
+			vd = rvd->vdev_child[vdev];
+
+		if (vd == NULL)
+			return (B_TRUE);
+
+		if (vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+		    vd->vdev_alloc_bias != VDEV_BIAS_DEDUP)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
 
 /*
  * This function *must* return indices evenly distributed between all
@@ -603,7 +665,7 @@
  * distributed between all sublists and uses this assumption when
  * deciding which sublist to evict from and how much to evict from it.
  */
-unsigned int
+static unsigned int
 dbuf_cache_multilist_index_func(multilist_t *ml, void *obj)
 {
 	dmu_buf_impl_t *db = obj;
@@ -618,18 +680,34 @@
 	 * Also, the low order bits of the hash value are thought to be
 	 * distributed evenly. Otherwise, in the case that the multilist
 	 * has a power of two number of sublists, each sublists' usage
-	 * would not be evenly distributed.
+	 * would not be evenly distributed. In this context full 64bit
+	 * division would be a waste of time, so limit it to 32 bits.
 	 */
-	return (dbuf_hash(db->db_objset, db->db.db_object,
+	return ((unsigned int)dbuf_hash(db->db_objset, db->db.db_object,
 	    db->db_level, db->db_blkid) %
 	    multilist_get_num_sublists(ml));
 }
 
+/*
+ * The target size of the dbuf cache can grow with the ARC target,
+ * unless limited by the tunable dbuf_cache_max_bytes.
+ */
 static inline unsigned long
 dbuf_cache_target_bytes(void)
 {
-	return MIN(dbuf_cache_max_bytes,
-	    arc_target_bytes() >> dbuf_cache_shift);
+	return (MIN(dbuf_cache_max_bytes,
+	    arc_target_bytes() >> dbuf_cache_shift));
+}
+
+/*
+ * The target size of the dbuf metadata cache can grow with the ARC target,
+ * unless limited by the tunable dbuf_metadata_cache_max_bytes.
+ */
+static inline unsigned long
+dbuf_metadata_cache_target_bytes(void)
+{
+	return (MIN(dbuf_metadata_cache_max_bytes,
+	    arc_target_bytes() >> dbuf_metadata_cache_shift));
 }
 
 static inline uint64_t
@@ -649,13 +727,6 @@
 }
 
 static inline boolean_t
-dbuf_cache_above_hiwater(void)
-{
-	return (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
-	    dbuf_cache_hiwater_bytes());
-}
-
-static inline boolean_t
 dbuf_cache_above_lowater(void)
 {
 	return (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
@@ -668,9 +739,9 @@
 static void
 dbuf_evict_one(void)
 {
-	int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache);
+	int idx = multilist_get_random_index(&dbuf_caches[DB_DBUF_CACHE].cache);
 	multilist_sublist_t *mls = multilist_sublist_lock(
-	    dbuf_caches[DB_DBUF_CACHE].cache, idx);
+	    &dbuf_caches[DB_DBUF_CACHE].cache, idx);
 
 	ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
 
@@ -694,8 +765,6 @@
 		ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
 		db->db_caching_status = DB_NO_CACHE;
 		dbuf_destroy(db);
-		DBUF_STAT_MAX(cache_size_bytes_max,
-		    zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size));
 		DBUF_STAT_BUMP(cache_total_evicts);
 	} else {
 		multilist_sublist_unlock(mls);
@@ -709,10 +778,10 @@
  * of the dbuf cache is at or below the maximum size. Once the dbuf is aged
  * out of the cache it is destroyed and becomes eligible for arc eviction.
  */
-/* ARGSUSED */
 static void
 dbuf_evict_thread(void *unused)
 {
+	(void) unused;
 	callb_cpr_t cpr;
 
 	CALLB_CPR_INIT(&cpr, &dbuf_evict_lock, callb_generic_cpr, FTAG);
@@ -721,7 +790,7 @@
 	while (!dbuf_evict_thread_exit) {
 		while (!dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) {
 			CALLB_CPR_SAFE_BEGIN(&cpr);
-			(void) cv_timedwait_sig_hires(&dbuf_evict_cv,
+			(void) cv_timedwait_idle_hires(&dbuf_evict_cv,
 			    &dbuf_evict_lock, SEC2NSEC(1), MSEC2NSEC(1), 0);
 			CALLB_CPR_SAFE_END(&cpr, &dbuf_evict_lock);
 		}
@@ -751,16 +820,15 @@
  * dbuf cache using the callers context.
  */
 static void
-dbuf_evict_notify(void)
+dbuf_evict_notify(uint64_t size)
 {
 	/*
 	 * We check if we should evict without holding the dbuf_evict_lock,
 	 * because it's OK to occasionally make the wrong decision here,
 	 * and grabbing the lock results in massive lock contention.
 	 */
-	if (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
-	    dbuf_cache_target_bytes()) {
-		if (dbuf_cache_above_hiwater())
+	if (size > dbuf_cache_target_bytes()) {
+		if (size > dbuf_cache_hiwater_bytes())
 			dbuf_evict_one();
 		cv_signal(&dbuf_evict_cv);
 	}
@@ -771,19 +839,40 @@
 {
 	dbuf_stats_t *ds = ksp->ks_data;
 
-	if (rw == KSTAT_WRITE) {
+	if (rw == KSTAT_WRITE)
 		return (SET_ERROR(EACCES));
-	} else {
-		ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count(
-		    &dbuf_caches[DB_DBUF_METADATA_CACHE].size);
-		ds->cache_size_bytes.value.ui64 =
-		    zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
-		ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
-		ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
-		ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
-		ds->hash_elements.value.ui64 = dbuf_hash_count;
-	}
 
+	ds->cache_count.value.ui64 =
+	    wmsum_value(&dbuf_sums.cache_count);
+	ds->cache_size_bytes.value.ui64 =
+	    zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
+	ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
+	ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
+	ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
+	ds->cache_total_evicts.value.ui64 =
+	    wmsum_value(&dbuf_sums.cache_total_evicts);
+	for (int i = 0; i < DN_MAX_LEVELS; i++) {
+		ds->cache_levels[i].value.ui64 =
+		    wmsum_value(&dbuf_sums.cache_levels[i]);
+		ds->cache_levels_bytes[i].value.ui64 =
+		    wmsum_value(&dbuf_sums.cache_levels_bytes[i]);
+	}
+	ds->hash_hits.value.ui64 =
+	    wmsum_value(&dbuf_sums.hash_hits);
+	ds->hash_misses.value.ui64 =
+	    wmsum_value(&dbuf_sums.hash_misses);
+	ds->hash_collisions.value.ui64 =
+	    wmsum_value(&dbuf_sums.hash_collisions);
+	ds->hash_chains.value.ui64 =
+	    wmsum_value(&dbuf_sums.hash_chains);
+	ds->hash_insert_race.value.ui64 =
+	    wmsum_value(&dbuf_sums.hash_insert_race);
+	ds->metadata_cache_count.value.ui64 =
+	    wmsum_value(&dbuf_sums.metadata_cache_count);
+	ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count(
+	    &dbuf_caches[DB_DBUF_METADATA_CACHE].size);
+	ds->metadata_cache_overflow.value.ui64 =
+	    wmsum_value(&dbuf_sums.metadata_cache_overflow);
 	return (0);
 }
 
@@ -795,12 +884,12 @@
 	int i;
 
 	/*
-	 * The hash table is big enough to fill all of physical memory
+	 * The hash table is big enough to fill one eighth of physical memory
 	 * with an average block size of zfs_arc_average_blocksize (default 8K).
 	 * By default, the table will take up
 	 * totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers).
 	 */
-	while (hsize * zfs_arc_average_blocksize < physmem * PAGESIZE)
+	while (hsize * zfs_arc_average_blocksize < arc_all_memory() / 8)
 		hsize <<= 1;
 
 retry:
@@ -831,31 +920,14 @@
 	dbuf_stats_init(h);
 
 	/*
-	 * Setup the parameters for the dbuf caches. We set the sizes of the
-	 * dbuf cache and the metadata cache to 1/32nd and 1/16th (default)
-	 * of the target size of the ARC. If the values has been specified as
-	 * a module option and they're not greater than the target size of the
-	 * ARC, then we honor that value.
-	 */
-	if (dbuf_cache_max_bytes == 0 ||
-	    dbuf_cache_max_bytes >= arc_target_bytes()) {
-		dbuf_cache_max_bytes = arc_target_bytes() >> dbuf_cache_shift;
-	}
-	if (dbuf_metadata_cache_max_bytes == 0 ||
-	    dbuf_metadata_cache_max_bytes >= arc_target_bytes()) {
-		dbuf_metadata_cache_max_bytes =
-		    arc_target_bytes() >> dbuf_metadata_cache_shift;
-	}
-
-	/*
 	 * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
 	 * configuration is not required.
 	 */
 	dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
 
 	for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
-		dbuf_caches[dcs].cache =
-		    multilist_create(sizeof (dmu_buf_impl_t),
+		multilist_create(&dbuf_caches[dcs].cache,
+		    sizeof (dmu_buf_impl_t),
 		    offsetof(dmu_buf_impl_t, db_cache_link),
 		    dbuf_cache_multilist_index_func);
 		zfs_refcount_create(&dbuf_caches[dcs].size);
@@ -867,14 +939,24 @@
 	dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
 	    NULL, 0, &p0, TS_RUN, minclsyspri);
 
+	wmsum_init(&dbuf_sums.cache_count, 0);
+	wmsum_init(&dbuf_sums.cache_total_evicts, 0);
+	for (i = 0; i < DN_MAX_LEVELS; i++) {
+		wmsum_init(&dbuf_sums.cache_levels[i], 0);
+		wmsum_init(&dbuf_sums.cache_levels_bytes[i], 0);
+	}
+	wmsum_init(&dbuf_sums.hash_hits, 0);
+	wmsum_init(&dbuf_sums.hash_misses, 0);
+	wmsum_init(&dbuf_sums.hash_collisions, 0);
+	wmsum_init(&dbuf_sums.hash_chains, 0);
+	wmsum_init(&dbuf_sums.hash_insert_race, 0);
+	wmsum_init(&dbuf_sums.metadata_cache_count, 0);
+	wmsum_init(&dbuf_sums.metadata_cache_overflow, 0);
+
 	dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc",
 	    KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 	if (dbuf_ksp != NULL) {
-		dbuf_ksp->ks_data = &dbuf_stats;
-		dbuf_ksp->ks_update = dbuf_kstat_update;
-		kstat_install(dbuf_ksp);
-
 		for (i = 0; i < DN_MAX_LEVELS; i++) {
 			snprintf(dbuf_stats.cache_levels[i].name,
 			    KSTAT_STRLEN, "cache_level_%d", i);
@@ -885,6 +967,9 @@
 			dbuf_stats.cache_levels_bytes[i].data_type =
 			    KSTAT_DATA_UINT64;
 		}
+		dbuf_ksp->ks_data = &dbuf_stats;
+		dbuf_ksp->ks_update = dbuf_kstat_update;
+		kstat_install(dbuf_ksp);
 	}
 }
 
@@ -923,13 +1008,27 @@
 
 	for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
 		zfs_refcount_destroy(&dbuf_caches[dcs].size);
-		multilist_destroy(dbuf_caches[dcs].cache);
+		multilist_destroy(&dbuf_caches[dcs].cache);
 	}
 
 	if (dbuf_ksp != NULL) {
 		kstat_delete(dbuf_ksp);
 		dbuf_ksp = NULL;
 	}
+
+	wmsum_fini(&dbuf_sums.cache_count);
+	wmsum_fini(&dbuf_sums.cache_total_evicts);
+	for (i = 0; i < DN_MAX_LEVELS; i++) {
+		wmsum_fini(&dbuf_sums.cache_levels[i]);
+		wmsum_fini(&dbuf_sums.cache_levels_bytes[i]);
+	}
+	wmsum_fini(&dbuf_sums.hash_hits);
+	wmsum_fini(&dbuf_sums.hash_misses);
+	wmsum_fini(&dbuf_sums.hash_collisions);
+	wmsum_fini(&dbuf_sums.hash_chains);
+	wmsum_fini(&dbuf_sums.hash_insert_race);
+	wmsum_fini(&dbuf_sums.metadata_cache_count);
+	wmsum_fini(&dbuf_sums.metadata_cache_overflow);
 }
 
 /*
@@ -942,6 +1041,7 @@
 {
 	dnode_t *dn;
 	dbuf_dirty_record_t *dr;
+	uint32_t txg_prev;
 
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 
@@ -973,11 +1073,16 @@
 		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
 	}
 
-	for (dr = db->db_data_pending; dr != NULL; dr = dr->dr_next)
+	if ((dr = list_head(&db->db_dirty_records)) != NULL) {
 		ASSERT(dr->dr_dbuf == db);
-
-	for (dr = db->db_last_dirty; dr != NULL; dr = dr->dr_next)
-		ASSERT(dr->dr_dbuf == db);
+		txg_prev = dr->dr_txg;
+		for (dr = list_next(&db->db_dirty_records, dr); dr != NULL;
+		    dr = list_next(&db->db_dirty_records, dr)) {
+			ASSERT(dr->dr_dbuf == db);
+			ASSERT(txg_prev > dr->dr_txg);
+			txg_prev = dr->dr_txg;
+		}
+	}
 
 	/*
 	 * We can't assert that db_size matches dn_datablksz because it
@@ -1007,17 +1112,17 @@
 				    &dn->dn_phys->dn_blkptr[db->db_blkid]);
 		} else {
 			/* db is pointed to by an indirect block */
-			ASSERTV(int epb = db->db_parent->db.db_size >>
-			    SPA_BLKPTRSHIFT);
+			int epb __maybe_unused = db->db_parent->db.db_size >>
+			    SPA_BLKPTRSHIFT;
 			ASSERT3U(db->db_parent->db_level, ==, db->db_level+1);
 			ASSERT3U(db->db_parent->db.db_object, ==,
 			    db->db.db_object);
 			/*
 			 * dnode_grow_indblksz() can make this fail if we don't
-			 * have the struct_rwlock.  XXX indblksz no longer
+			 * have the parent's rwlock.  XXX indblksz no longer
 			 * grows.  safe to do this now?
 			 */
-			if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
+			if (RW_LOCK_HELD(&db->db_parent->db_rwlock)) {
 				ASSERT3P(db->db_blkptr, ==,
 				    ((blkptr_t *)db->db_parent->db.db_data +
 				    db->db_blkid % epb));
@@ -1089,8 +1194,10 @@
 	dbuf_evict_user(db);
 	ASSERT3P(db->db_buf, ==, NULL);
 	db->db.db_data = NULL;
-	if (db->db_state != DB_NOFILL)
+	if (db->db_state != DB_NOFILL) {
 		db->db_state = DB_UNCACHED;
+		DTRACE_SET_STATE(db, "clear data");
+	}
 }
 
 static void
@@ -1104,6 +1211,14 @@
 	db->db.db_data = buf->b_data;
 }
 
+static arc_buf_t *
+dbuf_alloc_arcbuf(dmu_buf_impl_t *db)
+{
+	spa_t *spa = db->db_objset->os_spa;
+
+	return (arc_alloc_buf(spa, db, DBUF_GET_BUFC_TYPE(db), db->db.db_size));
+}
+
 /*
  * Loan out an arc_buf for read.  Return the loaned arc_buf.
  */
@@ -1177,10 +1292,49 @@
 	}
 }
 
+/*
+ * This function is used to lock the parent of the provided dbuf. This should be
+ * used when modifying or reading db_blkptr.
+ */
+db_lock_type_t
+dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag)
+{
+	enum db_lock_type ret = DLT_NONE;
+	if (db->db_parent != NULL) {
+		rw_enter(&db->db_parent->db_rwlock, rw);
+		ret = DLT_PARENT;
+	} else if (dmu_objset_ds(db->db_objset) != NULL) {
+		rrw_enter(&dmu_objset_ds(db->db_objset)->ds_bp_rwlock, rw,
+		    tag);
+		ret = DLT_OBJSET;
+	}
+	/*
+	 * We only return a DLT_NONE lock when it's the top-most indirect block
+	 * of the meta-dnode of the MOS.
+	 */
+	return (ret);
+}
+
+/*
+ * We need to pass the lock type in because it's possible that the block will
+ * move from being the topmost indirect block in a dnode (and thus, have no
+ * parent) to not the top-most via an indirection increase. This would cause a
+ * panic if we didn't pass the lock type in.
+ */
+void
+dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag)
+{
+	if (type == DLT_PARENT)
+		rw_exit(&db->db_parent->db_rwlock);
+	else if (type == DLT_OBJSET)
+		rrw_exit(&dmu_objset_ds(db->db_objset)->ds_bp_rwlock, tag);
+}
+
 static void
 dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *vdb)
 {
+	(void) zb, (void) bp;
 	dmu_buf_impl_t *db = vdb;
 
 	mutex_enter(&db->db_mtx);
@@ -1197,6 +1351,7 @@
 		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 		ASSERT3P(db->db_buf, ==, NULL);
 		db->db_state = DB_UNCACHED;
+		DTRACE_SET_STATE(db, "i/o error");
 	} else if (db->db_level == 0 && db->db_freed_in_flight) {
 		/* freed in flight */
 		ASSERT(zio == NULL || zio->io_error == 0);
@@ -1206,16 +1361,104 @@
 		db->db_freed_in_flight = FALSE;
 		dbuf_set_data(db, buf);
 		db->db_state = DB_CACHED;
+		DTRACE_SET_STATE(db, "freed in flight");
 	} else {
 		/* success */
 		ASSERT(zio == NULL || zio->io_error == 0);
 		dbuf_set_data(db, buf);
 		db->db_state = DB_CACHED;
+		DTRACE_SET_STATE(db, "successful read");
 	}
 	cv_broadcast(&db->db_changed);
 	dbuf_rele_and_unlock(db, NULL, B_FALSE);
 }
 
+/*
+ * Shortcut for performing reads on bonus dbufs.  Returns
+ * an error if we fail to verify the dnode associated with
+ * a decrypted block. Otherwise success.
+ */
+static int
+dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags)
+{
+	int bonuslen, max_bonuslen, err;
+
+	err = dbuf_read_verify_dnode_crypt(db, flags);
+	if (err)
+		return (err);
+
+	bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
+	max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
+	ASSERT(MUTEX_HELD(&db->db_mtx));
+	ASSERT(DB_DNODE_HELD(db));
+	ASSERT3U(bonuslen, <=, db->db.db_size);
+	db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
+	arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
+	if (bonuslen < max_bonuslen)
+		bzero(db->db.db_data, max_bonuslen);
+	if (bonuslen)
+		bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
+	db->db_state = DB_CACHED;
+	DTRACE_SET_STATE(db, "bonus buffer filled");
+	return (0);
+}
+
+static void
+dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn)
+{
+	blkptr_t *bps = db->db.db_data;
+	uint32_t indbs = 1ULL << dn->dn_indblkshift;
+	int n_bps = indbs >> SPA_BLKPTRSHIFT;
+
+	for (int i = 0; i < n_bps; i++) {
+		blkptr_t *bp = &bps[i];
+
+		ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, indbs);
+		BP_SET_LSIZE(bp, BP_GET_LEVEL(db->db_blkptr) == 1 ?
+		    dn->dn_datablksz : BP_GET_LSIZE(db->db_blkptr));
+		BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
+		BP_SET_LEVEL(bp, BP_GET_LEVEL(db->db_blkptr) - 1);
+		BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
+	}
+}
+
+/*
+ * Handle reads on dbufs that are holes, if necessary.  This function
+ * requires that the dbuf's mutex is held. Returns success (0) if action
+ * was taken, ENOENT if no action was taken.
+ */
+static int
+dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn)
+{
+	ASSERT(MUTEX_HELD(&db->db_mtx));
+
+	int is_hole = db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr);
+	/*
+	 * For level 0 blocks only, if the above check fails:
+	 * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync()
+	 * processes the delete record and clears the bp while we are waiting
+	 * for the dn_mtx (resulting in a "no" from block_freed).
+	 */
+	if (!is_hole && db->db_level == 0) {
+		is_hole = dnode_block_freed(dn, db->db_blkid) ||
+		    BP_IS_HOLE(db->db_blkptr);
+	}
+
+	if (is_hole) {
+		dbuf_set_data(db, dbuf_alloc_arcbuf(db));
+		bzero(db->db.db_data, db->db.db_size);
+
+		if (db->db_blkptr != NULL && db->db_level > 0 &&
+		    BP_IS_HOLE(db->db_blkptr) &&
+		    db->db_blkptr->blk_birth != 0) {
+			dbuf_handle_indirect_hole(db, dn);
+		}
+		db->db_state = DB_CACHED;
+		DTRACE_SET_STATE(db, "hole read satisfied");
+		return (0);
+	}
+	return (ENOENT);
+}
 
 /*
  * This function ensures that, when doing a decrypting read of a block,
@@ -1240,8 +1483,8 @@
 
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 
-	if (!os->os_encrypted || os->os_raw_receive ||
-	    (flags & DB_RF_NO_DECRYPT) != 0)
+	if ((flags & DB_RF_NO_DECRYPT) != 0 ||
+	    !os->os_encrypted || os->os_raw_receive)
 		return (0);
 
 	DB_DNODE_ENTER(db);
@@ -1273,93 +1516,51 @@
 	return (err);
 }
 
+/*
+ * Drops db_mtx and the parent lock specified by dblt and tag before
+ * returning.
+ */
 static int
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
+dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
+    db_lock_type_t dblt, void *tag)
 {
 	dnode_t *dn;
 	zbookmark_phys_t zb;
 	uint32_t aflags = ARC_FLAG_NOWAIT;
-	int err, zio_flags = 0;
+	int err, zio_flags;
 
+	err = zio_flags = 0;
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
 	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
-	/* We need the struct_rwlock to prevent db_blkptr from changing. */
-	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	ASSERT(db->db_state == DB_UNCACHED);
 	ASSERT(db->db_buf == NULL);
+	ASSERT(db->db_parent == NULL ||
+	    RW_LOCK_HELD(&db->db_parent->db_rwlock));
 
 	if (db->db_blkid == DMU_BONUS_BLKID) {
-		/*
-		 * The bonus length stored in the dnode may be less than
-		 * the maximum available space in the bonus buffer.
-		 */
-		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
-		int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
-
-		/* if the underlying dnode block is encrypted, decrypt it */
-		err = dbuf_read_verify_dnode_crypt(db, flags);
-		if (err != 0) {
-			DB_DNODE_EXIT(db);
-			mutex_exit(&db->db_mtx);
-			return (err);
-		}
-
-		ASSERT3U(bonuslen, <=, db->db.db_size);
-		db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
-		arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
-		if (bonuslen < max_bonuslen)
-			bzero(db->db.db_data, max_bonuslen);
-		if (bonuslen)
-			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
-		DB_DNODE_EXIT(db);
-		db->db_state = DB_CACHED;
-		mutex_exit(&db->db_mtx);
-		return (0);
+		err = dbuf_read_bonus(db, dn, flags);
+		goto early_unlock;
 	}
 
+	err = dbuf_read_hole(db, dn);
+	if (err == 0)
+		goto early_unlock;
+
 	/*
-	 * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync()
-	 * processes the delete record and clears the bp while we are waiting
-	 * for the dn_mtx (resulting in a "no" from block_freed).
+	 * Any attempt to read a redacted block should result in an error. This
+	 * will never happen under normal conditions, but can be useful for
+	 * debugging purposes.
 	 */
-	if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) ||
-	    (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) ||
-	    BP_IS_HOLE(db->db_blkptr)))) {
-		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
-		dbuf_set_data(db, arc_alloc_buf(db->db_objset->os_spa, db, type,
-		    db->db.db_size));
-		bzero(db->db.db_data, db->db.db_size);
-
-		if (db->db_blkptr != NULL && db->db_level > 0 &&
-		    BP_IS_HOLE(db->db_blkptr) &&
-		    db->db_blkptr->blk_birth != 0) {
-			blkptr_t *bps = db->db.db_data;
-			for (int i = 0; i < ((1 <<
-			    DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
-			    i++) {
-				blkptr_t *bp = &bps[i];
-				ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
-				    1 << dn->dn_indblkshift);
-				BP_SET_LSIZE(bp,
-				    BP_GET_LEVEL(db->db_blkptr) == 1 ?
-				    dn->dn_datablksz :
-				    BP_GET_LSIZE(db->db_blkptr));
-				BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
-				BP_SET_LEVEL(bp,
-				    BP_GET_LEVEL(db->db_blkptr) - 1);
-				BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
-			}
-		}
-		DB_DNODE_EXIT(db);
-		db->db_state = DB_CACHED;
-		mutex_exit(&db->db_mtx);
-		return (0);
+	if (BP_IS_REDACTED(db->db_blkptr)) {
+		ASSERT(dsl_dataset_feature_is_active(
+		    db->db_objset->os_dsl_dataset,
+		    SPA_FEATURE_REDACTED_DATASETS));
+		err = SET_ERROR(EIO);
+		goto early_unlock;
 	}
 
-
 	SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
 	    db->db.db_object, db->db_level, db->db_blkid);
 
@@ -1371,24 +1572,21 @@
 		spa_log_error(db->db_objset->os_spa, &zb);
 		zfs_panic_recover("unencrypted block in encrypted "
 		    "object set %llu", dmu_objset_id(db->db_objset));
-		DB_DNODE_EXIT(db);
-		mutex_exit(&db->db_mtx);
-		return (SET_ERROR(EIO));
+		err = SET_ERROR(EIO);
+		goto early_unlock;
 	}
 
 	err = dbuf_read_verify_dnode_crypt(db, flags);
-	if (err != 0) {
-		DB_DNODE_EXIT(db);
-		mutex_exit(&db->db_mtx);
-		return (err);
-	}
+	if (err != 0)
+		goto early_unlock;
 
 	DB_DNODE_EXIT(db);
 
 	db->db_state = DB_READ;
+	DTRACE_SET_STATE(db, "read issued");
 	mutex_exit(&db->db_mtx);
 
-	if (DBUF_IS_L2CACHEABLE(db))
+	if (dbuf_is_l2cacheable(db))
 		aflags |= ARC_FLAG_L2CACHE;
 
 	dbuf_add_ref(db, NULL);
@@ -1398,11 +1596,23 @@
 
 	if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr))
 		zio_flags |= ZIO_FLAG_RAW;
-
-	err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
+	/*
+	 * The zio layer will copy the provided blkptr later, but we need to
+	 * do this now so that we can release the parent's rwlock. We have to
+	 * do that now so that if dbuf_read_done is called synchronously (on
+	 * an l1 cache hit) we don't acquire the db_mtx while holding the
+	 * parent's rwlock, which would be a lock ordering violation.
+	 */
+	blkptr_t bp = *db->db_blkptr;
+	dmu_buf_unlock_parent(db, dblt, tag);
+	(void) arc_read(zio, db->db_objset->os_spa, &bp,
 	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
 	    &aflags, &zb);
-
+	return (err);
+early_unlock:
+	DB_DNODE_EXIT(db);
+	mutex_exit(&db->db_mtx);
+	dmu_buf_unlock_parent(db, dblt, tag);
 	return (err);
 }
 
@@ -1422,7 +1632,7 @@
 static void
 dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
 {
-	dbuf_dirty_record_t *dr = db->db_last_dirty;
+	dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
 
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	ASSERT(db->db.db_data != NULL);
@@ -1455,6 +1665,7 @@
 		spa_t *spa = db->db_objset->os_spa;
 		enum zio_compress compress_type =
 		    arc_get_compression(db->db_buf);
+		uint8_t complevel = arc_get_complevel(db->db_buf);
 
 		if (arc_is_encrypted(db->db_buf)) {
 			boolean_t byteorder;
@@ -1467,11 +1678,12 @@
 			dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db,
 			    dmu_objset_id(dn->dn_objset), byteorder, salt, iv,
 			    mac, dn->dn_type, size, arc_buf_lsize(db->db_buf),
-			    compress_type);
+			    compress_type, complevel);
 		} else if (compress_type != ZIO_COMPRESS_OFF) {
 			ASSERT3U(type, ==, ARC_BUFC_DATA);
 			dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
-			    size, arc_buf_lsize(db->db_buf), compress_type);
+			    size, arc_buf_lsize(db->db_buf), compress_type,
+			    complevel);
 		} else {
 			dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
 		}
@@ -1500,8 +1712,6 @@
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
-	if ((flags & DB_RF_HAVESTRUCT) == 0)
-		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 
 	prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
 	    (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL &&
@@ -1509,8 +1719,6 @@
 
 	mutex_enter(&db->db_mtx);
 	if (db->db_state == DB_CACHED) {
-		spa_t *spa = dn->dn_objset->os_spa;
-
 		/*
 		 * Ensure that this block's dnode has been decrypted if
 		 * the caller has requested decrypted data.
@@ -1529,6 +1737,7 @@
 		    (arc_is_encrypted(db->db_buf) ||
 		    arc_is_unauthenticated(db->db_buf) ||
 		    arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
+			spa_t *spa = dn->dn_objset->os_spa;
 			zbookmark_phys_t zb;
 
 			SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
@@ -1538,30 +1747,34 @@
 			dbuf_set_data(db, db->db_buf);
 		}
 		mutex_exit(&db->db_mtx);
-		if (err == 0 && prefetch)
-			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
-		if ((flags & DB_RF_HAVESTRUCT) == 0)
-			rw_exit(&dn->dn_struct_rwlock);
+		if (err == 0 && prefetch) {
+			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+			    B_FALSE, flags & DB_RF_HAVESTRUCT);
+		}
 		DB_DNODE_EXIT(db);
 		DBUF_STAT_BUMP(hash_hits);
 	} else if (db->db_state == DB_UNCACHED) {
-		spa_t *spa = dn->dn_objset->os_spa;
 		boolean_t need_wait = B_FALSE;
 
+		db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
+
 		if (zio == NULL &&
 		    db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
+			spa_t *spa = dn->dn_objset->os_spa;
 			zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
 			need_wait = B_TRUE;
 		}
-		err = dbuf_read_impl(db, zio, flags);
+		err = dbuf_read_impl(db, zio, flags, dblt, FTAG);
+		/*
+		 * dbuf_read_impl has dropped db_mtx and our parent's rwlock
+		 * for us
+		 */
+		if (!err && prefetch) {
+			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+			    db->db_state != DB_CACHED,
+			    flags & DB_RF_HAVESTRUCT);
+		}
 
-		/* dbuf_read_impl has dropped db_mtx for us */
-
-		if (!err && prefetch)
-			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
-
-		if ((flags & DB_RF_HAVESTRUCT) == 0)
-			rw_exit(&dn->dn_struct_rwlock);
 		DB_DNODE_EXIT(db);
 		DBUF_STAT_BUMP(hash_misses);
 
@@ -1586,16 +1799,16 @@
 		 * occurred and the dbuf went to UNCACHED.
 		 */
 		mutex_exit(&db->db_mtx);
-		if (prefetch)
-			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
-		if ((flags & DB_RF_HAVESTRUCT) == 0)
-			rw_exit(&dn->dn_struct_rwlock);
+		if (prefetch) {
+			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+			    B_TRUE, flags & DB_RF_HAVESTRUCT);
+		}
 		DB_DNODE_EXIT(db);
 		DBUF_STAT_BUMP(hash_misses);
 
 		/* Skip the wait per the caller's request. */
-		mutex_enter(&db->db_mtx);
 		if ((flags & DB_RF_NEVERWAIT) == 0) {
+			mutex_enter(&db->db_mtx);
 			while (db->db_state == DB_READ ||
 			    db->db_state == DB_FILL) {
 				ASSERT(db->db_state == DB_READ ||
@@ -1606,8 +1819,8 @@
 			}
 			if (db->db_state == DB_UNCACHED)
 				err = SET_ERROR(EIO);
+			mutex_exit(&db->db_mtx);
 		}
-		mutex_exit(&db->db_mtx);
 	}
 
 	return (err);
@@ -1622,13 +1835,11 @@
 	while (db->db_state == DB_READ || db->db_state == DB_FILL)
 		cv_wait(&db->db_changed, &db->db_mtx);
 	if (db->db_state == DB_UNCACHED) {
-		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-		spa_t *spa = db->db_objset->os_spa;
-
 		ASSERT(db->db_buf == NULL);
 		ASSERT(db->db.db_data == NULL);
-		dbuf_set_data(db, arc_alloc_buf(spa, db, type, db->db.db_size));
+		dbuf_set_data(db, dbuf_alloc_arcbuf(db));
 		db->db_state = DB_FILL;
+		DTRACE_SET_STATE(db, "assigning filled buffer");
 	} else if (db->db_state == DB_NOFILL) {
 		dbuf_clear_data(db);
 	} else {
@@ -1691,11 +1902,13 @@
 	dmu_buf_impl_t *db, *db_next;
 	uint64_t txg = tx->tx_txg;
 	avl_index_t where;
+	dbuf_dirty_record_t *dr;
 
 	if (end_blkid > dn->dn_maxblkid &&
 	    !(start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID))
 		end_blkid = dn->dn_maxblkid;
-	dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid);
+	dprintf_dnode(dn, "start=%llu end=%llu\n", (u_longlong_t)start_blkid,
+	    (u_longlong_t)end_blkid);
 
 	db_search = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
 	db_search->db_level = 0;
@@ -1744,9 +1957,8 @@
 		}
 		/* The dbuf is referenced */
 
-		if (db->db_last_dirty != NULL) {
-			dbuf_dirty_record_t *dr = db->db_last_dirty;
-
+		dr = list_head(&db->db_dirty_records);
+		if (dr != NULL) {
 			if (dr->dr_txg == txg) {
 				/*
 				 * This buffer is "in-use", re-adjust the file
@@ -1771,7 +1983,9 @@
 		if (db->db_state == DB_CACHED) {
 			ASSERT(db->db.db_data != NULL);
 			arc_release(db->db_buf, db);
+			rw_enter(&db->db_rwlock, RW_WRITER);
 			bzero(db->db.db_data, db->db.db_size);
+			rw_exit(&db->db_rwlock);
 			arc_buf_freeze(db->db_buf);
 		}
 
@@ -1785,7 +1999,8 @@
 void
 dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
 {
-	arc_buf_t *buf, *obuf;
+	arc_buf_t *buf, *old_buf;
+	dbuf_dirty_record_t *dr;
 	int osize = db->db.db_size;
 	arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 	dnode_t *dn;
@@ -1795,15 +2010,6 @@
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
 
-	/* XXX does *this* func really need the lock? */
-	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
-
-	/*
-	 * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
-	 * is OK, because there can be no other references to the db
-	 * when we are changing its size, so no concurrent DB_FILL can
-	 * be happening.
-	 */
 	/*
 	 * XXX we should be doing a dbuf_read, checking the return
 	 * value and returning that up to our callers
@@ -1814,23 +2020,25 @@
 	buf = arc_alloc_buf(dn->dn_objset->os_spa, db, type, size);
 
 	/* copy old block data to the new block */
-	obuf = db->db_buf;
-	bcopy(obuf->b_data, buf->b_data, MIN(osize, size));
+	old_buf = db->db_buf;
+	bcopy(old_buf->b_data, buf->b_data, MIN(osize, size));
 	/* zero the remainder */
 	if (size > osize)
 		bzero((uint8_t *)buf->b_data + osize, size - osize);
 
 	mutex_enter(&db->db_mtx);
 	dbuf_set_data(db, buf);
-	arc_buf_destroy(obuf, db);
+	arc_buf_destroy(old_buf, db);
 	db->db.db_size = size;
 
-	if (db->db_level == 0) {
-		db->db_last_dirty->dt.dl.dr_data = buf;
-	}
-	ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
-	ASSERT3U(db->db_last_dirty->dr_accounted, ==, osize);
-	db->db_last_dirty->dr_accounted = size;
+	dr = list_head(&db->db_dirty_records);
+	/* dirty record added by dmu_buf_will_dirty() */
+	VERIFY(dr != NULL);
+	if (db->db_level == 0)
+		dr->dt.dl.dr_data = buf;
+	ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
+	ASSERT3U(dr->dr_accounted, ==, osize);
+	dr->dr_accounted = size;
 	mutex_exit(&db->db_mtx);
 
 	dmu_objset_willuse_space(dn->dn_objset, size - osize, tx);
@@ -1840,7 +2048,7 @@
 void
 dbuf_release_bp(dmu_buf_impl_t *db)
 {
-	ASSERTV(objset_t *os = db->db_objset);
+	objset_t *os __maybe_unused = db->db_objset;
 
 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
 	ASSERT(arc_released(os->os_phys_buf) ||
@@ -1877,13 +2085,81 @@
 }
 
 dbuf_dirty_record_t *
+dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
+{
+	rw_enter(&dn->dn_struct_rwlock, RW_READER);
+	IMPLY(dn->dn_objset->os_raw_receive, dn->dn_maxblkid >= blkid);
+	dnode_new_blkid(dn, blkid, tx, B_TRUE, B_FALSE);
+	ASSERT(dn->dn_maxblkid >= blkid);
+
+	dbuf_dirty_record_t *dr = kmem_zalloc(sizeof (*dr), KM_SLEEP);
+	list_link_init(&dr->dr_dirty_node);
+	list_link_init(&dr->dr_dbuf_node);
+	dr->dr_dnode = dn;
+	dr->dr_txg = tx->tx_txg;
+	dr->dt.dll.dr_blkid = blkid;
+	dr->dr_accounted = dn->dn_datablksz;
+
+	/*
+	 * There should not be any dbuf for the block that we're dirtying.
+	 * Otherwise the buffer contents could be inconsistent between the
+	 * dbuf and the lightweight dirty record.
+	 */
+	ASSERT3P(NULL, ==, dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid));
+
+	mutex_enter(&dn->dn_mtx);
+	int txgoff = tx->tx_txg & TXG_MASK;
+	if (dn->dn_free_ranges[txgoff] != NULL) {
+		range_tree_clear(dn->dn_free_ranges[txgoff], blkid, 1);
+	}
+
+	if (dn->dn_nlevels == 1) {
+		ASSERT3U(blkid, <, dn->dn_nblkptr);
+		list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
+		mutex_exit(&dn->dn_mtx);
+		rw_exit(&dn->dn_struct_rwlock);
+		dnode_setdirty(dn, tx);
+	} else {
+		mutex_exit(&dn->dn_mtx);
+
+		int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+		dmu_buf_impl_t *parent_db = dbuf_hold_level(dn,
+		    1, blkid >> epbs, FTAG);
+		rw_exit(&dn->dn_struct_rwlock);
+		if (parent_db == NULL) {
+			kmem_free(dr, sizeof (*dr));
+			return (NULL);
+		}
+		int err = dbuf_read(parent_db, NULL,
+		    (DB_RF_NOPREFETCH | DB_RF_CANFAIL));
+		if (err != 0) {
+			dbuf_rele(parent_db, FTAG);
+			kmem_free(dr, sizeof (*dr));
+			return (NULL);
+		}
+
+		dbuf_dirty_record_t *parent_dr = dbuf_dirty(parent_db, tx);
+		dbuf_rele(parent_db, FTAG);
+		mutex_enter(&parent_dr->dt.di.dr_mtx);
+		ASSERT3U(parent_dr->dr_txg, ==, tx->tx_txg);
+		list_insert_tail(&parent_dr->dt.di.dr_children, dr);
+		mutex_exit(&parent_dr->dt.di.dr_mtx);
+		dr->dr_parent = parent_dr;
+	}
+
+	dmu_objset_willuse_space(dn->dn_objset, dr->dr_accounted, tx);
+
+	return (dr);
+}
+
+dbuf_dirty_record_t *
 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	objset_t *os;
-	dbuf_dirty_record_t **drp, *dr;
-	int drop_struct_lock = FALSE;
+	dbuf_dirty_record_t *dr, *dr_next, *dr_head;
 	int txgoff = tx->tx_txg & TXG_MASK;
+	boolean_t drop_struct_rwlock = B_FALSE;
 
 	ASSERT(tx->tx_txg != 0);
 	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
@@ -1896,7 +2172,7 @@
 	 * objects may be dirtied in syncing context, but only if they
 	 * were already pre-dirtied in open context.
 	 */
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 	if (dn->dn_objset->os_dsl_dataset != NULL) {
 		rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
 		    RW_READER, FTAG);
@@ -1928,27 +2204,7 @@
 	    db->db_state == DB_NOFILL);
 
 	mutex_enter(&dn->dn_mtx);
-	/*
-	 * Don't set dirtyctx to SYNC if we're just modifying this as we
-	 * initialize the objset.
-	 */
-	if (dn->dn_dirtyctx == DN_UNDIRTIED) {
-		if (dn->dn_objset->os_dsl_dataset != NULL) {
-			rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
-			    RW_READER, FTAG);
-		}
-		if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
-			dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ?
-			    DN_DIRTY_SYNC : DN_DIRTY_OPEN);
-			ASSERT(dn->dn_dirtyctx_firstset == NULL);
-			dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
-		}
-		if (dn->dn_objset->os_dsl_dataset != NULL) {
-			rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
-			    FTAG);
-		}
-	}
-
+	dnode_set_dirtyctx(dn, tx, db);
 	if (tx->tx_txg > dn->dn_dirty_txg)
 		dn->dn_dirty_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
@@ -1959,17 +2215,16 @@
 	/*
 	 * If this buffer is already dirty, we're done.
 	 */
-	drp = &db->db_last_dirty;
-	ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
+	dr_head = list_head(&db->db_dirty_records);
+	ASSERT(dr_head == NULL || dr_head->dr_txg <= tx->tx_txg ||
 	    db->db.db_object == DMU_META_DNODE_OBJECT);
-	while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
-		drp = &dr->dr_next;
-	if (dr && dr->dr_txg == tx->tx_txg) {
+	dr_next = dbuf_find_dirty_lte(db, tx->tx_txg);
+	if (dr_next && dr_next->dr_txg == tx->tx_txg) {
 		DB_DNODE_EXIT(db);
 
-		dbuf_redirty(dr);
+		dbuf_redirty(dr_next);
 		mutex_exit(&db->db_mtx);
-		return (dr);
+		return (dr_next);
 	}
 
 	/*
@@ -1990,7 +2245,7 @@
 	 */
 	os = dn->dn_objset;
 	VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(os->os_spa));
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 	if (dn->dn_objset->os_dsl_dataset != NULL)
 		rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG);
 	ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
@@ -2013,6 +2268,8 @@
 	 */
 	dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
 	list_link_init(&dr->dr_dirty_node);
+	list_link_init(&dr->dr_dbuf_node);
+	dr->dr_dnode = dn;
 	if (db->db_level == 0) {
 		void *data_old = db->db_buf;
 
@@ -2047,8 +2304,7 @@
 		dr->dr_accounted = db->db.db_size;
 	dr->dr_dbuf = db;
 	dr->dr_txg = tx->tx_txg;
-	dr->dr_next = *drp;
-	*drp = dr;
+	list_insert_before(&db->db_dirty_records, dr_next, dr);
 
 	/*
 	 * We could have been freed_in_flight between the dbuf_noread
@@ -2086,15 +2342,21 @@
 		return (dr);
 	}
 
-	/*
-	 * The dn_struct_rwlock prevents db_blkptr from changing
-	 * due to a write from syncing context completing
-	 * while we are running, so we want to acquire it before
-	 * looking at db_blkptr.
-	 */
 	if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
-		drop_struct_lock = TRUE;
+		drop_struct_rwlock = B_TRUE;
+	}
+
+	/*
+	 * If we are overwriting a dedup BP, then unless it is snapshotted,
+	 * when we get to syncing context we will need to decrement its
+	 * refcount in the DDT.  Prefetch the relevant DDT block so that
+	 * syncing context won't have to wait for the i/o.
+	 */
+	if (db->db_blkptr != NULL) {
+		db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
+		ddt_prefetch(os->os_spa, db->db_blkptr);
+		dmu_buf_unlock_parent(db, dblt, FTAG);
 	}
 
 	/*
@@ -2107,19 +2369,12 @@
 	    dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
 	    dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
 
-	/*
-	 * If we are overwriting a dedup BP, then unless it is snapshotted,
-	 * when we get to syncing context we will need to decrement its
-	 * refcount in the DDT.  Prefetch the relevant DDT block so that
-	 * syncing context won't have to wait for the i/o.
-	 */
-	ddt_prefetch(os->os_spa, db->db_blkptr);
 
 	if (db->db_level == 0) {
 		ASSERT(!db->db_objset->os_raw_receive ||
 		    dn->dn_maxblkid >= db->db_blkid);
 		dnode_new_blkid(dn, db->db_blkid, tx,
-		    drop_struct_lock, B_FALSE);
+		    drop_struct_rwlock, B_FALSE);
 		ASSERT(dn->dn_maxblkid >= db->db_blkid);
 	}
 
@@ -2130,15 +2385,14 @@
 
 		if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) {
 			int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
-
-			parent = dbuf_hold_level(dn, db->db_level+1,
+			parent = dbuf_hold_level(dn, db->db_level + 1,
 			    db->db_blkid >> epbs, FTAG);
 			ASSERT(parent != NULL);
 			parent_held = TRUE;
 		}
-		if (drop_struct_lock)
+		if (drop_struct_rwlock)
 			rw_exit(&dn->dn_struct_rwlock);
-		ASSERT3U(db->db_level+1, ==, parent->db_level);
+		ASSERT3U(db->db_level + 1, ==, parent->db_level);
 		di = dbuf_dirty(parent, tx);
 		if (parent_held)
 			dbuf_rele(parent, FTAG);
@@ -2148,7 +2402,7 @@
 		 * Since we've dropped the mutex, it's possible that
 		 * dbuf_undirty() might have changed this out from under us.
 		 */
-		if (db->db_last_dirty == dr ||
+		if (list_head(&db->db_dirty_records) == dr ||
 		    dn->dn_object == DMU_META_DNODE_OBJECT) {
 			mutex_enter(&di->dt.di.dr_mtx);
 			ASSERT3U(di->dr_txg, ==, tx->tx_txg);
@@ -2159,14 +2413,14 @@
 		}
 		mutex_exit(&db->db_mtx);
 	} else {
-		ASSERT(db->db_level+1 == dn->dn_nlevels);
+		ASSERT(db->db_level + 1 == dn->dn_nlevels);
 		ASSERT(db->db_blkid < dn->dn_nblkptr);
 		ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf);
 		mutex_enter(&dn->dn_mtx);
 		ASSERT(!list_link_active(&dr->dr_dirty_node));
 		list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
 		mutex_exit(&dn->dn_mtx);
-		if (drop_struct_lock)
+		if (drop_struct_rwlock)
 			rw_exit(&dn->dn_struct_rwlock);
 	}
 
@@ -2175,6 +2429,30 @@
 	return (dr);
 }
 
+static void
+dbuf_undirty_bonus(dbuf_dirty_record_t *dr)
+{
+	dmu_buf_impl_t *db = dr->dr_dbuf;
+
+	if (dr->dt.dl.dr_data != db->db.db_data) {
+		struct dnode *dn = dr->dr_dnode;
+		int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
+
+		kmem_free(dr->dt.dl.dr_data, max_bonuslen);
+		arc_space_return(max_bonuslen, ARC_SPACE_BONUS);
+	}
+	db->db_data_pending = NULL;
+	ASSERT(list_next(&db->db_dirty_records, dr) == NULL);
+	list_remove(&db->db_dirty_records, dr);
+	if (dr->dr_dbuf->db_level != 0) {
+		mutex_destroy(&dr->dt.di.dr_mtx);
+		list_destroy(&dr->dt.di.dr_children);
+	}
+	kmem_free(dr, sizeof (dbuf_dirty_record_t));
+	ASSERT3U(db->db_dirtycnt, >, 0);
+	db->db_dirtycnt -= 1;
+}
+
 /*
  * Undirty a buffer in the transaction group referenced by the given
  * transaction.  Return whether this evicted the dbuf.
@@ -2182,9 +2460,7 @@
 static boolean_t
 dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
-	dnode_t *dn;
 	uint64_t txg = tx->tx_txg;
-	dbuf_dirty_record_t *dr, **drp;
 
 	ASSERT(txg != 0);
 
@@ -2204,16 +2480,12 @@
 	/*
 	 * If this buffer is not dirty, we're done.
 	 */
-	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
-		if (dr->dr_txg <= txg)
-			break;
-	if (dr == NULL || dr->dr_txg < txg)
+	dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, txg);
+	if (dr == NULL)
 		return (B_FALSE);
-	ASSERT(dr->dr_txg == txg);
 	ASSERT(dr->dr_dbuf == db);
 
-	DB_DNODE_ENTER(db);
-	dn = DB_DNODE(db);
+	dnode_t *dn = dr->dr_dnode;
 
 	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
 
@@ -2222,7 +2494,7 @@
 	dsl_pool_undirty_space(dmu_objset_pool(dn->dn_objset),
 	    dr->dr_accounted, txg);
 
-	*drp = dr->dr_next;
+	list_remove(&db->db_dirty_records, dr);
 
 	/*
 	 * Note that there are three places in dbuf_dirty()
@@ -2241,7 +2513,6 @@
 		list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
 		mutex_exit(&dn->dn_mtx);
 	}
-	DB_DNODE_EXIT(db);
 
 	if (db->db_state != DB_NOFILL) {
 		dbuf_unoverride(dr);
@@ -2282,15 +2553,14 @@
 	 */
 	mutex_enter(&db->db_mtx);
 
-	dbuf_dirty_record_t *dr;
-	for (dr = db->db_last_dirty;
-	    dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
+	if (db->db_state == DB_CACHED) {
+		dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
 		/*
 		 * It's possible that it is already dirty but not cached,
 		 * because there are some calls to dbuf_dirty() that don't
 		 * go through dmu_buf_will_dirty().
 		 */
-		if (dr->dr_txg == tx->tx_txg && db->db_state == DB_CACHED) {
+		if (dr != NULL) {
 			/* This dbuf is already dirty and cached. */
 			dbuf_redirty(dr);
 			mutex_exit(&db->db_mtx);
@@ -2318,17 +2588,12 @@
 dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+	dbuf_dirty_record_t *dr;
 
 	mutex_enter(&db->db_mtx);
-	for (dbuf_dirty_record_t *dr = db->db_last_dirty;
-	    dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
-		if (dr->dr_txg == tx->tx_txg) {
-			mutex_exit(&db->db_mtx);
-			return (B_TRUE);
-		}
-	}
+	dr = dbuf_find_dirty_eq(db, tx->tx_txg);
 	mutex_exit(&db->db_mtx);
-	return (B_FALSE);
+	return (dr != NULL);
 }
 
 void
@@ -2337,7 +2602,7 @@
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 
 	db->db_state = DB_NOFILL;
-
+	DTRACE_SET_STATE(db, "allocating NOFILL buffer");
 	dmu_buf_will_fill(db_fake, tx);
 }
 
@@ -2383,12 +2648,9 @@
 	dmu_buf_will_dirty_impl(db_fake,
 	    DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
 
-	dr = db->db_last_dirty;
-	while (dr != NULL && dr->dr_txg > tx->tx_txg)
-		dr = dr->dr_next;
+	dr = dbuf_find_dirty_eq(db, tx->tx_txg);
 
 	ASSERT3P(dr, !=, NULL);
-	ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
 
 	dr->dt.dl.dr_has_raw_params = B_TRUE;
 	dr->dt.dl.dr_byteorder = byteorder;
@@ -2397,23 +2659,43 @@
 	bcopy(mac, dr->dt.dl.dr_mac, ZIO_DATA_MAC_LEN);
 }
 
-#pragma weak dmu_buf_fill_done = dbuf_fill_done
-/* ARGSUSED */
-void
-dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
+static void
+dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
 {
+	struct dirty_leaf *dl;
+	dbuf_dirty_record_t *dr;
+
+	dr = list_head(&db->db_dirty_records);
+	ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
+	dl = &dr->dt.dl;
+	dl->dr_overridden_by = *bp;
+	dl->dr_override_state = DR_OVERRIDDEN;
+	dl->dr_overridden_by.blk_birth = dr->dr_txg;
+}
+
+void
+dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx)
+{
+	(void) tx;
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+	dbuf_states_t old_state;
 	mutex_enter(&db->db_mtx);
 	DBUF_VERIFY(db);
 
-	if (db->db_state == DB_FILL) {
+	old_state = db->db_state;
+	db->db_state = DB_CACHED;
+	if (old_state == DB_FILL) {
 		if (db->db_level == 0 && db->db_freed_in_flight) {
 			ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 			/* we were freed while filling */
 			/* XXX dbuf_undirty? */
 			bzero(db->db.db_data, db->db.db_size);
 			db->db_freed_in_flight = FALSE;
+			DTRACE_SET_STATE(db,
+			    "fill done handling freed in flight");
+		} else {
+			DTRACE_SET_STATE(db, "fill done");
 		}
-		db->db_state = DB_CACHED;
 		cv_broadcast(&db->db_changed);
 	}
 	mutex_exit(&db->db_mtx);
@@ -2428,6 +2710,7 @@
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
 	struct dirty_leaf *dl;
 	dmu_object_type_t type;
+	dbuf_dirty_record_t *dr;
 
 	if (etype == BP_EMBEDDED_TYPE_DATA) {
 		ASSERT(spa_feature_is_active(dmu_objset_spa(db->db_objset),
@@ -2443,8 +2726,9 @@
 
 	dmu_buf_will_not_fill(dbuf, tx);
 
-	ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
-	dl = &db->db_last_dirty->dt.dl;
+	dr = list_head(&db->db_dirty_records);
+	ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
+	dl = &dr->dt.dl;
 	encode_embedded_bp_compressed(&dl->dr_overridden_by,
 	    data, comp, uncompressed_size, compressed_size);
 	BPE_SET_ETYPE(&dl->dr_overridden_by, etype);
@@ -2453,7 +2737,32 @@
 	BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder);
 
 	dl->dr_override_state = DR_OVERRIDDEN;
-	dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg;
+	dl->dr_overridden_by.blk_birth = dr->dr_txg;
+}
+
+void
+dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx)
+{
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+	dmu_object_type_t type;
+	ASSERT(dsl_dataset_feature_is_active(db->db_objset->os_dsl_dataset,
+	    SPA_FEATURE_REDACTED_DATASETS));
+
+	DB_DNODE_ENTER(db);
+	type = DB_DNODE(db)->dn_type;
+	DB_DNODE_EXIT(db);
+
+	ASSERT0(db->db_level);
+	dmu_buf_will_not_fill(dbuf, tx);
+
+	blkptr_t bp = { { { {0} } } };
+	BP_SET_TYPE(&bp, type);
+	BP_SET_LEVEL(&bp, 0);
+	BP_SET_BIRTH(&bp, tx->tx_txg, 0);
+	BP_SET_REDACTED(&bp);
+	BPE_SET_LSIZE(&bp, dbuf->db_size);
+
+	dbuf_override_impl(db, &bp, tx);
 }
 
 /*
@@ -2494,13 +2803,11 @@
 		(void) dbuf_dirty(db, tx);
 		bcopy(buf->b_data, db->db.db_data, db->db.db_size);
 		arc_buf_destroy(buf, db);
-		xuio_stat_wbuf_copied();
 		return;
 	}
 
-	xuio_stat_wbuf_nocopy();
 	if (db->db_state == DB_CACHED) {
-		dbuf_dirty_record_t *dr = db->db_last_dirty;
+		dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
 
 		ASSERT(db->db_buf != NULL);
 		if (dr != NULL && dr->dr_txg == tx->tx_txg) {
@@ -2522,6 +2829,7 @@
 	ASSERT(db->db_buf == NULL);
 	dbuf_set_data(db, buf);
 	db->db_state = DB_FILL;
+	DTRACE_SET_STATE(db, "filling assigned arcbuf");
 	mutex_exit(&db->db_mtx);
 	(void) dbuf_dirty(db, tx);
 	dmu_buf_fill_done(&db->db, tx);
@@ -2549,6 +2857,7 @@
 			kmem_free(db->db.db_data, bonuslen);
 			arc_space_return(bonuslen, ARC_SPACE_BONUS);
 			db->db_state = DB_UNCACHED;
+			DTRACE_SET_STATE(db, "buffer cleared");
 		}
 	}
 
@@ -2558,7 +2867,7 @@
 		ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
 		    db->db_caching_status == DB_DBUF_METADATA_CACHE);
 
-		multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
+		multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
 		(void) zfs_refcount_remove_many(
 		    &dbuf_caches[db->db_caching_status].size,
 		    db->db.db_size, db);
@@ -2576,8 +2885,10 @@
 
 	ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
 	ASSERT(db->db_data_pending == NULL);
+	ASSERT(list_is_empty(&db->db_dirty_records));
 
 	db->db_state = DB_EVICTING;
+	DTRACE_SET_STATE(db, "buffer eviction started");
 	db->db_blkptr = NULL;
 
 	/*
@@ -2596,7 +2907,6 @@
 			mutex_enter_nested(&dn->dn_dbufs_mtx,
 			    NESTED_SINGLE);
 		avl_remove(&dn->dn_dbufs, db);
-		atomic_dec_32(&dn->dn_dbufs_count);
 		membar_producer();
 		DB_DNODE_EXIT(db);
 		if (needlock)
@@ -2630,9 +2940,6 @@
 	ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
 	ASSERT(!multilist_link_active(&db->db_cache_link));
 
-	kmem_cache_free(dbuf_kmem_cache, db);
-	arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
-
 	/*
 	 * If this dbuf is referenced from an indirect dbuf,
 	 * decrement the ref count on the indirect dbuf.
@@ -2641,6 +2948,9 @@
 		mutex_enter(&parent->db_mtx);
 		dbuf_rele_and_unlock(parent, db, B_TRUE);
 	}
+
+	kmem_cache_free(dbuf_kmem_cache, db);
+	arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
 }
 
 /*
@@ -2706,10 +3016,10 @@
 	} else if (level < nlevels-1) {
 		/* this block is referenced from an indirect block */
 		int err;
-		dbuf_hold_arg_t *dh = dbuf_hold_arg_create(dn, level + 1,
+
+		err = dbuf_hold_impl(dn, level + 1,
 		    blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
-		err = dbuf_hold_impl_arg(dh);
-		dbuf_hold_arg_destroy(dh);
+
 		if (err)
 			return (err);
 		err = dbuf_read(*parentp, NULL,
@@ -2719,10 +3029,12 @@
 			*parentp = NULL;
 			return (err);
 		}
+		rw_enter(&(*parentp)->db_rwlock, RW_READER);
 		*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
 		    (blkid & ((1ULL << epbs) - 1));
 		if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
 			ASSERT(BP_IS_HOLE(*bpp));
+		rw_exit(&(*parentp)->db_rwlock);
 		return (0);
 	} else {
 		/* the block is referenced from the dnode */
@@ -2750,11 +3062,13 @@
 
 	db = kmem_cache_alloc(dbuf_kmem_cache, KM_SLEEP);
 
+	list_create(&db->db_dirty_records, sizeof (dbuf_dirty_record_t),
+	    offsetof(dbuf_dirty_record_t, dr_dbuf_node));
+
 	db->db_objset = os;
 	db->db.db_object = dn->dn_object;
 	db->db_level = level;
 	db->db_blkid = blkid;
-	db->db_last_dirty = NULL;
 	db->db_dirtycnt = 0;
 	db->db_dnode_handle = dn->dn_handle;
 	db->db_parent = parent;
@@ -2772,6 +3086,7 @@
 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
 		db->db.db_offset = DMU_BONUS_BLKID;
 		db->db_state = DB_UNCACHED;
+		DTRACE_SET_STATE(db, "bonus buffer created");
 		db->db_caching_status = DB_NO_CACHE;
 		/* the bonus dbuf is not placed in the hash table */
 		arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
@@ -2795,17 +3110,18 @@
 	 * dn_dbufs list.
 	 */
 	mutex_enter(&dn->dn_dbufs_mtx);
-	db->db_state = DB_EVICTING;
+	db->db_state = DB_EVICTING; /* not worth logging this state change */
 	if ((odb = dbuf_hash_insert(db)) != NULL) {
 		/* someone else inserted it first */
-		kmem_cache_free(dbuf_kmem_cache, db);
 		mutex_exit(&dn->dn_dbufs_mtx);
+		kmem_cache_free(dbuf_kmem_cache, db);
 		DBUF_STAT_BUMP(hash_insert_race);
 		return (odb);
 	}
 	avl_add(&dn->dn_dbufs, db);
 
 	db->db_state = DB_UNCACHED;
+	DTRACE_SET_STATE(db, "regular buffer created");
 	db->db_caching_status = DB_NO_CACHE;
 	mutex_exit(&dn->dn_dbufs_mtx);
 	arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
@@ -2816,13 +3132,42 @@
 	ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
 	    zfs_refcount_count(&dn->dn_holds) > 0);
 	(void) zfs_refcount_add(&dn->dn_holds, db);
-	atomic_inc_32(&dn->dn_dbufs_count);
 
 	dprintf_dbuf(db, "db=%p\n", db);
 
 	return (db);
 }
 
+/*
+ * This function returns a block pointer and information about the object,
+ * given a dnode and a block.  This is a publicly accessible version of
+ * dbuf_findbp that only returns some information, rather than the
+ * dbuf.  Note that the dnode passed in must be held, and the dn_struct_rwlock
+ * should be locked as (at least) a reader.
+ */
+int
+dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
+    blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift)
+{
+	dmu_buf_impl_t *dbp = NULL;
+	blkptr_t *bp2;
+	int err = 0;
+	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+
+	err = dbuf_findbp(dn, level, blkid, B_FALSE, &dbp, &bp2);
+	if (err == 0) {
+		*bp = *bp2;
+		if (dbp != NULL)
+			dbuf_rele(dbp, NULL);
+		if (datablkszsec != NULL)
+			*datablkszsec = dn->dn_phys->dn_datablkszsec;
+		if (indblkshift != NULL)
+			*indblkshift = dn->dn_phys->dn_indblkshift;
+	}
+
+	return (err);
+}
+
 typedef struct dbuf_prefetch_arg {
 	spa_t *dpa_spa;	/* The spa to issue the prefetch in. */
 	zbookmark_phys_t dpa_zb; /* The target block to prefetch. */
@@ -2832,20 +3177,51 @@
 	zio_priority_t dpa_prio; /* The priority I/Os should be issued at. */
 	zio_t *dpa_zio; /* The parent zio_t for all prefetches. */
 	arc_flags_t dpa_aflags; /* Flags to pass to the final prefetch. */
+	dbuf_prefetch_fn dpa_cb; /* prefetch completion callback */
+	void *dpa_arg; /* prefetch completion arg */
 } dbuf_prefetch_arg_t;
 
+static void
+dbuf_prefetch_fini(dbuf_prefetch_arg_t *dpa, boolean_t io_done)
+{
+	if (dpa->dpa_cb != NULL) {
+		dpa->dpa_cb(dpa->dpa_arg, dpa->dpa_zb.zb_level,
+		    dpa->dpa_zb.zb_blkid, io_done);
+	}
+	kmem_free(dpa, sizeof (*dpa));
+}
+
+static void
+dbuf_issue_final_prefetch_done(zio_t *zio, const zbookmark_phys_t *zb,
+    const blkptr_t *iobp, arc_buf_t *abuf, void *private)
+{
+	(void) zio, (void) zb, (void) iobp;
+	dbuf_prefetch_arg_t *dpa = private;
+
+	if (abuf != NULL)
+		arc_buf_destroy(abuf, private);
+
+	dbuf_prefetch_fini(dpa, B_TRUE);
+}
+
 /*
  * Actually issue the prefetch read for the block given.
  */
 static void
 dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
 {
-	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
-		return;
+	ASSERT(!BP_IS_REDACTED(bp) ||
+	    dsl_dataset_feature_is_active(
+	    dpa->dpa_dnode->dn_objset->os_dsl_dataset,
+	    SPA_FEATURE_REDACTED_DATASETS));
+
+	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
+		return (dbuf_prefetch_fini(dpa, B_FALSE));
 
 	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 	arc_flags_t aflags =
-	    dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+	    dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH |
+	    ARC_FLAG_NO_BUF;
 
 	/* dnodes are always read as raw and then converted later */
 	if (BP_GET_TYPE(bp) == DMU_OT_DNODE && BP_IS_PROTECTED(bp) &&
@@ -2855,7 +3231,8 @@
 	ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
 	ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level);
 	ASSERT(dpa->dpa_zio != NULL);
-	(void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp, NULL, NULL,
+	(void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp,
+	    dbuf_issue_final_prefetch_done, dpa,
 	    dpa->dpa_prio, zio_flags, &aflags, &dpa->dpa_zb);
 }
 
@@ -2868,6 +3245,7 @@
 dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
     const blkptr_t *iobp, arc_buf_t *abuf, void *private)
 {
+	(void) zb, (void) iobp;
 	dbuf_prefetch_arg_t *dpa = private;
 
 	ASSERT3S(dpa->dpa_zb.zb_level, <, dpa->dpa_curlevel);
@@ -2875,8 +3253,7 @@
 
 	if (abuf == NULL) {
 		ASSERT(zio == NULL || zio->io_error != 0);
-		kmem_free(dpa, sizeof (*dpa));
-		return;
+		return (dbuf_prefetch_fini(dpa, B_TRUE));
 	}
 	ASSERT(zio == NULL || zio->io_error == 0);
 
@@ -2908,11 +3285,9 @@
 		dmu_buf_impl_t *db = dbuf_hold_level(dpa->dpa_dnode,
 		    dpa->dpa_curlevel, curblkid, FTAG);
 		if (db == NULL) {
-			kmem_free(dpa, sizeof (*dpa));
 			arc_buf_destroy(abuf, private);
-			return;
+			return (dbuf_prefetch_fini(dpa, B_TRUE));
 		}
-
 		(void) dbuf_read(db, NULL,
 		    DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_HAVESTRUCT);
 		dbuf_rele(db, FTAG);
@@ -2924,12 +3299,15 @@
 	blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
 	    P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
 
-	if (BP_IS_HOLE(bp)) {
-		kmem_free(dpa, sizeof (*dpa));
+	ASSERT(!BP_IS_REDACTED(bp) || (dpa->dpa_dnode &&
+	    dsl_dataset_feature_is_active(
+	    dpa->dpa_dnode->dn_objset->os_dsl_dataset,
+	    SPA_FEATURE_REDACTED_DATASETS)));
+	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) {
+		dbuf_prefetch_fini(dpa, B_TRUE);
 	} else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
 		ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
 		dbuf_issue_final_prefetch(dpa, bp);
-		kmem_free(dpa, sizeof (*dpa));
 	} else {
 		arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
 		zbookmark_phys_t zb;
@@ -2944,7 +3322,8 @@
 		    dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid);
 
 		(void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
-		    bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio,
+		    bp, dbuf_prefetch_indirect_done, dpa,
+		    ZIO_PRIORITY_SYNC_READ,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
 		    &iter_aflags, &zb);
 	}
@@ -2959,9 +3338,10 @@
  * complete. Note that the prefetch might fail if the dataset is encrypted and
  * the encryption key is unmapped before the IO completes.
  */
-void
-dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
-    arc_flags_t aflags)
+int
+dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
+    zio_priority_t prio, arc_flags_t aflags, dbuf_prefetch_fn cb,
+    void *arg)
 {
 	blkptr_t bp;
 	int epbs, nlevels, curlevel;
@@ -2971,10 +3351,10 @@
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
 
 	if (blkid > dn->dn_maxblkid)
-		return;
+		goto no_issue;
 
-	if (dnode_block_freed(dn, blkid))
-		return;
+	if (level == 0 && dnode_block_freed(dn, blkid))
+		goto no_issue;
 
 	/*
 	 * This dnode hasn't been written to disk yet, so there's nothing to
@@ -2982,11 +3362,11 @@
 	 */
 	nlevels = dn->dn_phys->dn_nlevels;
 	if (level >= nlevels || dn->dn_phys->dn_nblkptr == 0)
-		return;
+		goto no_issue;
 
 	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
 	if (dn->dn_phys->dn_maxblkid < blkid << (epbs * level))
-		return;
+		goto no_issue;
 
 	dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object,
 	    level, blkid);
@@ -2996,7 +3376,7 @@
 		 * This dbuf already exists.  It is either CACHED, or
 		 * (we assume) about to be read or filled.
 		 */
-		return;
+		goto no_issue;
 	}
 
 	/*
@@ -3028,8 +3408,11 @@
 		ASSERT3U(curblkid, <, dn->dn_phys->dn_nblkptr);
 		bp = dn->dn_phys->dn_blkptr[curblkid];
 	}
-	if (BP_IS_HOLE(&bp))
-		return;
+	ASSERT(!BP_IS_REDACTED(&bp) ||
+	    dsl_dataset_feature_is_active(dn->dn_objset->os_dsl_dataset,
+	    SPA_FEATURE_REDACTED_DATASETS));
+	if (BP_IS_HOLE(&bp) || BP_IS_REDACTED(&bp))
+		goto no_issue;
 
 	ASSERT3U(curlevel, ==, BP_GET_LEVEL(&bp));
 
@@ -3047,9 +3430,11 @@
 	dpa->dpa_dnode = dn;
 	dpa->dpa_epbs = epbs;
 	dpa->dpa_zio = pio;
+	dpa->dpa_cb = cb;
+	dpa->dpa_arg = arg;
 
 	/* flag if L2ARC eligible, l2arc_noprefetch then decides */
-	if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+	if (dnode_level_is_l2cacheable(&bp, dn, level))
 		dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
 
 	/*
@@ -3062,19 +3447,19 @@
 	if (curlevel == level) {
 		ASSERT3U(curblkid, ==, blkid);
 		dbuf_issue_final_prefetch(dpa, &bp);
-		kmem_free(dpa, sizeof (*dpa));
 	} else {
 		arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
 		zbookmark_phys_t zb;
 
 		/* flag if L2ARC eligible, l2arc_noprefetch then decides */
-		if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+		if (dnode_level_is_l2cacheable(&bp, dn, level))
 			iter_aflags |= ARC_FLAG_L2CACHE;
 
 		SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
 		    dn->dn_object, curlevel, curblkid);
 		(void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
-		    &bp, dbuf_prefetch_indirect_done, dpa, prio,
+		    &bp, dbuf_prefetch_indirect_done, dpa,
+		    ZIO_PRIORITY_SYNC_READ,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
 		    &iter_aflags, &zb);
 	}
@@ -3083,27 +3468,36 @@
 	 * dpa may have already been freed.
 	 */
 	zio_nowait(pio);
+	return (1);
+no_issue:
+	if (cb != NULL)
+		cb(arg, level, blkid, B_FALSE);
+	return (0);
 }
 
-#define	DBUF_HOLD_IMPL_MAX_DEPTH	20
+int
+dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
+    arc_flags_t aflags)
+{
+
+	return (dbuf_prefetch_impl(dn, level, blkid, prio, aflags, NULL, NULL));
+}
 
 /*
- * Helper function for dbuf_hold_impl_arg() to copy a buffer. Handles
+ * Helper function for dbuf_hold_impl() to copy a buffer. Handles
  * the case of encrypted, compressed and uncompressed buffers by
  * allocating the new buffer, respectively, with arc_alloc_raw_buf(),
  * arc_alloc_compressed_buf() or arc_alloc_buf().*
  *
- * NOTE: Declared noinline to avoid stack bloat in dbuf_hold_impl_arg().
+ * NOTE: Declared noinline to avoid stack bloat in dbuf_hold_impl().
  */
 noinline static void
-dbuf_hold_copy(struct dbuf_hold_arg *dh)
+dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db)
 {
-	dnode_t *dn = dh->dh_dn;
-	dmu_buf_impl_t *db = dh->dh_db;
-	dbuf_dirty_record_t *dr = dh->dh_dr;
+	dbuf_dirty_record_t *dr = db->db_data_pending;
 	arc_buf_t *data = dr->dt.dl.dr_data;
-
 	enum zio_compress compress_type = arc_get_compression(data);
+	uint8_t complevel = arc_get_complevel(data);
 
 	if (arc_is_encrypted(data)) {
 		boolean_t byteorder;
@@ -3115,180 +3509,132 @@
 		dbuf_set_data(db, arc_alloc_raw_buf(dn->dn_objset->os_spa, db,
 		    dmu_objset_id(dn->dn_objset), byteorder, salt, iv, mac,
 		    dn->dn_type, arc_buf_size(data), arc_buf_lsize(data),
-		    compress_type));
+		    compress_type, complevel));
 	} else if (compress_type != ZIO_COMPRESS_OFF) {
 		dbuf_set_data(db, arc_alloc_compressed_buf(
 		    dn->dn_objset->os_spa, db, arc_buf_size(data),
-		    arc_buf_lsize(data), compress_type));
+		    arc_buf_lsize(data), compress_type, complevel));
 	} else {
 		dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db,
 		    DBUF_GET_BUFC_TYPE(db), db->db.db_size));
 	}
 
+	rw_enter(&db->db_rwlock, RW_WRITER);
 	bcopy(data->b_data, db->db.db_data, arc_buf_size(data));
+	rw_exit(&db->db_rwlock);
 }
 
 /*
  * Returns with db_holds incremented, and db_mtx not held.
  * Note: dn_struct_rwlock must be held.
  */
-static int
-dbuf_hold_impl_arg(struct dbuf_hold_arg *dh)
+int
+dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
+    boolean_t fail_sparse, boolean_t fail_uncached,
+    void *tag, dmu_buf_impl_t **dbp)
 {
-	dh->dh_parent = NULL;
+	dmu_buf_impl_t *db, *parent = NULL;
 
-	ASSERT(dh->dh_blkid != DMU_BONUS_BLKID);
-	ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock));
-	ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level);
-
-	*(dh->dh_dbp) = NULL;
-
-	/* dbuf_find() returns with db_mtx held */
-	dh->dh_db = dbuf_find(dh->dh_dn->dn_objset, dh->dh_dn->dn_object,
-	    dh->dh_level, dh->dh_blkid);
-
-	if (dh->dh_db == NULL) {
-		dh->dh_bp = NULL;
-
-		if (dh->dh_fail_uncached)
-			return (SET_ERROR(ENOENT));
-
-		ASSERT3P(dh->dh_parent, ==, NULL);
-		dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid,
-		    dh->dh_fail_sparse, &dh->dh_parent, &dh->dh_bp);
-		if (dh->dh_fail_sparse) {
-			if (dh->dh_err == 0 &&
-			    dh->dh_bp && BP_IS_HOLE(dh->dh_bp))
-				dh->dh_err = SET_ERROR(ENOENT);
-			if (dh->dh_err) {
-				if (dh->dh_parent)
-					dbuf_rele(dh->dh_parent, NULL);
-				return (dh->dh_err);
-			}
-		}
-		if (dh->dh_err && dh->dh_err != ENOENT)
-			return (dh->dh_err);
-		dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid,
-		    dh->dh_parent, dh->dh_bp);
+	/* If the pool has been created, verify the tx_sync_lock is not held */
+	spa_t *spa = dn->dn_objset->os_spa;
+	dsl_pool_t *dp = spa->spa_dsl_pool;
+	if (dp != NULL) {
+		ASSERT(!MUTEX_HELD(&dp->dp_tx.tx_sync_lock));
 	}
 
-	if (dh->dh_fail_uncached && dh->dh_db->db_state != DB_CACHED) {
-		mutex_exit(&dh->dh_db->db_mtx);
+	ASSERT(blkid != DMU_BONUS_BLKID);
+	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+	ASSERT3U(dn->dn_nlevels, >, level);
+
+	*dbp = NULL;
+
+	/* dbuf_find() returns with db_mtx held */
+	db = dbuf_find(dn->dn_objset, dn->dn_object, level, blkid);
+
+	if (db == NULL) {
+		blkptr_t *bp = NULL;
+		int err;
+
+		if (fail_uncached)
+			return (SET_ERROR(ENOENT));
+
+		ASSERT3P(parent, ==, NULL);
+		err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
+		if (fail_sparse) {
+			if (err == 0 && bp && BP_IS_HOLE(bp))
+				err = SET_ERROR(ENOENT);
+			if (err) {
+				if (parent)
+					dbuf_rele(parent, NULL);
+				return (err);
+			}
+		}
+		if (err && err != ENOENT)
+			return (err);
+		db = dbuf_create(dn, level, blkid, parent, bp);
+	}
+
+	if (fail_uncached && db->db_state != DB_CACHED) {
+		mutex_exit(&db->db_mtx);
 		return (SET_ERROR(ENOENT));
 	}
 
-	if (dh->dh_db->db_buf != NULL) {
-		arc_buf_access(dh->dh_db->db_buf);
-		ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
+	if (db->db_buf != NULL) {
+		arc_buf_access(db->db_buf);
+		ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
 	}
 
-	ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
+	ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf));
 
 	/*
 	 * If this buffer is currently syncing out, and we are
 	 * still referencing it from db_data, we need to make a copy
 	 * of it in case we decide we want to dirty it again in this txg.
 	 */
-	if (dh->dh_db->db_level == 0 &&
-	    dh->dh_db->db_blkid != DMU_BONUS_BLKID &&
-	    dh->dh_dn->dn_object != DMU_META_DNODE_OBJECT &&
-	    dh->dh_db->db_state == DB_CACHED && dh->dh_db->db_data_pending) {
-		dh->dh_dr = dh->dh_db->db_data_pending;
-		if (dh->dh_dr->dt.dl.dr_data == dh->dh_db->db_buf)
-			dbuf_hold_copy(dh);
+	if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
+	    dn->dn_object != DMU_META_DNODE_OBJECT &&
+	    db->db_state == DB_CACHED && db->db_data_pending) {
+		dbuf_dirty_record_t *dr = db->db_data_pending;
+		if (dr->dt.dl.dr_data == db->db_buf)
+			dbuf_hold_copy(dn, db);
 	}
 
-	if (multilist_link_active(&dh->dh_db->db_cache_link)) {
-		ASSERT(zfs_refcount_is_zero(&dh->dh_db->db_holds));
-		ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE ||
-		    dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE);
+	if (multilist_link_active(&db->db_cache_link)) {
+		ASSERT(zfs_refcount_is_zero(&db->db_holds));
+		ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
+		    db->db_caching_status == DB_DBUF_METADATA_CACHE);
 
-		multilist_remove(
-		    dbuf_caches[dh->dh_db->db_caching_status].cache,
-		    dh->dh_db);
+		multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
 		(void) zfs_refcount_remove_many(
-		    &dbuf_caches[dh->dh_db->db_caching_status].size,
-		    dh->dh_db->db.db_size, dh->dh_db);
+		    &dbuf_caches[db->db_caching_status].size,
+		    db->db.db_size, db);
 
-		if (dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+		if (db->db_caching_status == DB_DBUF_METADATA_CACHE) {
 			DBUF_STAT_BUMPDOWN(metadata_cache_count);
 		} else {
-			DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
+			DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
 			DBUF_STAT_BUMPDOWN(cache_count);
-			DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
-			    dh->dh_db->db.db_size);
+			DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+			    db->db.db_size);
 		}
-		dh->dh_db->db_caching_status = DB_NO_CACHE;
+		db->db_caching_status = DB_NO_CACHE;
 	}
-	(void) zfs_refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
-	DBUF_VERIFY(dh->dh_db);
-	mutex_exit(&dh->dh_db->db_mtx);
+	(void) zfs_refcount_add(&db->db_holds, tag);
+	DBUF_VERIFY(db);
+	mutex_exit(&db->db_mtx);
 
 	/* NOTE: we can't rele the parent until after we drop the db_mtx */
-	if (dh->dh_parent)
-		dbuf_rele(dh->dh_parent, NULL);
+	if (parent)
+		dbuf_rele(parent, NULL);
 
-	ASSERT3P(DB_DNODE(dh->dh_db), ==, dh->dh_dn);
-	ASSERT3U(dh->dh_db->db_blkid, ==, dh->dh_blkid);
-	ASSERT3U(dh->dh_db->db_level, ==, dh->dh_level);
-	*(dh->dh_dbp) = dh->dh_db;
+	ASSERT3P(DB_DNODE(db), ==, dn);
+	ASSERT3U(db->db_blkid, ==, blkid);
+	ASSERT3U(db->db_level, ==, level);
+	*dbp = db;
 
 	return (0);
 }
 
-/*
- * dbuf_hold_impl_arg() is called recursively, via dbuf_findbp().  There can
- * be as many recursive calls as there are levels of on-disk indirect blocks,
- * but typically only 0-2 recursive calls.  To minimize the stack frame size,
- * the recursive function's arguments and "local variables" are allocated on
- * the heap as the dbuf_hold_arg_t.
- */
-int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
-    boolean_t fail_sparse, boolean_t fail_uncached,
-    void *tag, dmu_buf_impl_t **dbp)
-{
-	dbuf_hold_arg_t *dh = dbuf_hold_arg_create(dn, level, blkid,
-	    fail_sparse, fail_uncached, tag, dbp);
-
-	int error = dbuf_hold_impl_arg(dh);
-
-	dbuf_hold_arg_destroy(dh);
-
-	return (error);
-}
-
-static dbuf_hold_arg_t *
-dbuf_hold_arg_create(dnode_t *dn, uint8_t level, uint64_t blkid,
-    boolean_t fail_sparse, boolean_t fail_uncached,
-    void *tag, dmu_buf_impl_t **dbp)
-{
-	dbuf_hold_arg_t *dh = kmem_alloc(sizeof (*dh), KM_SLEEP);
-	dh->dh_dn = dn;
-	dh->dh_level = level;
-	dh->dh_blkid = blkid;
-
-	dh->dh_fail_sparse = fail_sparse;
-	dh->dh_fail_uncached = fail_uncached;
-
-	dh->dh_tag = tag;
-	dh->dh_dbp = dbp;
-
-	dh->dh_db = NULL;
-	dh->dh_parent = NULL;
-	dh->dh_bp = NULL;
-	dh->dh_err = 0;
-	dh->dh_dr = NULL;
-
-	return (dh);
-}
-
-static void
-dbuf_hold_arg_destroy(dbuf_hold_arg_t *dh)
-{
-	kmem_free(dh, sizeof (*dh));
-}
-
 dmu_buf_impl_t *
 dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
 {
@@ -3316,7 +3662,6 @@
 dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-	dnode_t *dn;
 
 	if (db->db_blkid != DMU_SPILL_BLKID)
 		return (SET_ERROR(ENOTSUP));
@@ -3325,12 +3670,7 @@
 	ASSERT3U(blksz, <=, spa_maxblocksize(dmu_objset_spa(db->db_objset)));
 	blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE);
 
-	DB_DNODE_ENTER(db);
-	dn = DB_DNODE(db);
-	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	dbuf_new_size(db, blksz, tx);
-	rw_exit(&dn->dn_struct_rwlock);
-	DB_DNODE_EXIT(db);
 
 	return (0);
 }
@@ -3410,6 +3750,7 @@
 dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
 {
 	int64_t holds;
+	uint64_t size;
 
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	DBUF_VERIFY(db);
@@ -3505,34 +3846,30 @@
 				    DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
 				db->db_caching_status = dcs;
 
-				multilist_insert(dbuf_caches[dcs].cache, db);
-				(void) zfs_refcount_add_many(
-				    &dbuf_caches[dcs].size,
-				    db->db.db_size, db);
+				multilist_insert(&dbuf_caches[dcs].cache, db);
+				uint64_t db_size = db->db.db_size;
+				size = zfs_refcount_add_many(
+				    &dbuf_caches[dcs].size, db_size, db);
+				uint8_t db_level = db->db_level;
+				mutex_exit(&db->db_mtx);
 
 				if (dcs == DB_DBUF_METADATA_CACHE) {
 					DBUF_STAT_BUMP(metadata_cache_count);
 					DBUF_STAT_MAX(
 					    metadata_cache_size_bytes_max,
-					    zfs_refcount_count(
-					    &dbuf_caches[dcs].size));
+					    size);
 				} else {
-					DBUF_STAT_BUMP(
-					    cache_levels[db->db_level]);
 					DBUF_STAT_BUMP(cache_count);
-					DBUF_STAT_INCR(
-					    cache_levels_bytes[db->db_level],
-					    db->db.db_size);
 					DBUF_STAT_MAX(cache_size_bytes_max,
-					    zfs_refcount_count(
-					    &dbuf_caches[dcs].size));
+					    size);
+					DBUF_STAT_BUMP(cache_levels[db_level]);
+					DBUF_STAT_INCR(
+					    cache_levels_bytes[db_level],
+					    db_size);
 				}
-				mutex_exit(&db->db_mtx);
 
-				if (db->db_caching_status == DB_DBUF_CACHE &&
-				    !evicting) {
-					dbuf_evict_notify();
-				}
+				if (dcs == DB_DBUF_CACHE && !evicting)
+					dbuf_evict_notify(size);
 			}
 
 			if (do_arc_evict)
@@ -3614,7 +3951,7 @@
 }
 
 void
-dmu_buf_user_evict_wait()
+dmu_buf_user_evict_wait(void)
 {
 	taskq_wait(dbu_evict_taskq);
 }
@@ -3693,6 +4030,28 @@
 	}
 }
 
+static void
+dbuf_sync_bonus(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
+{
+	dmu_buf_impl_t *db = dr->dr_dbuf;
+	void *data = dr->dt.dl.dr_data;
+
+	ASSERT0(db->db_level);
+	ASSERT(MUTEX_HELD(&db->db_mtx));
+	ASSERT(db->db_blkid == DMU_BONUS_BLKID);
+	ASSERT(data != NULL);
+
+	dnode_t *dn = dr->dr_dnode;
+	ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=,
+	    DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
+	bcopy(data, DN_BONUS(dn->dn_phys), DN_MAX_BONUS_LEN(dn->dn_phys));
+
+	dbuf_sync_leaf_verify_bonus_dnode(dr);
+
+	dbuf_undirty_bonus(dr);
+	dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
+}
+
 /*
  * When syncing out a blocks of dnodes, adjust the block to deal with
  * encryption.  Normally, we make sure the block is decrypted before writing
@@ -3744,8 +4103,7 @@
 dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = dr->dr_dbuf;
-	dnode_t *dn;
-	zio_t *zio;
+	dnode_t *dn = dr->dr_dnode;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 
@@ -3765,12 +4123,9 @@
 	ASSERT3U(db->db_state, ==, DB_CACHED);
 	ASSERT(db->db_buf != NULL);
 
-	DB_DNODE_ENTER(db);
-	dn = DB_DNODE(db);
 	/* Indirect block size must match what the dnode thinks it is. */
 	ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
 	dbuf_check_blkptr(dn, db);
-	DB_DNODE_EXIT(db);
 
 	/* Provide the pending dirty record to child dbufs */
 	db->db_data_pending = dr;
@@ -3779,7 +4134,7 @@
 
 	dbuf_write(dr, db->db_buf, tx);
 
-	zio = dr->dr_zio;
+	zio_t *zio = dr->dr_zio;
 	mutex_enter(&dr->dt.di.dr_mtx);
 	dbuf_sync_list(&dr->dt.di.dr_children, db->db_level - 1, tx);
 	ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
@@ -3787,7 +4142,6 @@
 	zio_nowait(zio);
 }
 
-#ifdef ZFS_DEBUG
 /*
  * Verify that the size of the data in our bonus buffer does not exceed
  * its recorded size.
@@ -3804,7 +4158,8 @@
 static void
 dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr)
 {
-	dnode_t *dn = DB_DNODE(dr->dr_dbuf);
+#ifdef ZFS_DEBUG
+	dnode_t *dn = dr->dr_dnode;
 
 	/*
 	 * Encrypted bonus buffers can have data past their bonuslen.
@@ -3824,8 +4179,155 @@
 	/* ensure that everything is zero after our data */
 	for (; datap_end < datap_max; datap_end++)
 		ASSERT(*datap_end == 0);
-}
 #endif
+}
+
+static blkptr_t *
+dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
+{
+	/* This must be a lightweight dirty record. */
+	ASSERT3P(dr->dr_dbuf, ==, NULL);
+	dnode_t *dn = dr->dr_dnode;
+
+	if (dn->dn_phys->dn_nlevels == 1) {
+		VERIFY3U(dr->dt.dll.dr_blkid, <, dn->dn_phys->dn_nblkptr);
+		return (&dn->dn_phys->dn_blkptr[dr->dt.dll.dr_blkid]);
+	} else {
+		dmu_buf_impl_t *parent_db = dr->dr_parent->dr_dbuf;
+		int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+		VERIFY3U(parent_db->db_level, ==, 1);
+		VERIFY3P(parent_db->db_dnode_handle->dnh_dnode, ==, dn);
+		VERIFY3U(dr->dt.dll.dr_blkid >> epbs, ==, parent_db->db_blkid);
+		blkptr_t *bp = parent_db->db.db_data;
+		return (&bp[dr->dt.dll.dr_blkid & ((1 << epbs) - 1)]);
+	}
+}
+
+static void
+dbuf_lightweight_ready(zio_t *zio)
+{
+	dbuf_dirty_record_t *dr = zio->io_private;
+	blkptr_t *bp = zio->io_bp;
+
+	if (zio->io_error != 0)
+		return;
+
+	dnode_t *dn = dr->dr_dnode;
+
+	blkptr_t *bp_orig = dbuf_lightweight_bp(dr);
+	spa_t *spa = dmu_objset_spa(dn->dn_objset);
+	int64_t delta = bp_get_dsize_sync(spa, bp) -
+	    bp_get_dsize_sync(spa, bp_orig);
+	dnode_diduse_space(dn, delta);
+
+	uint64_t blkid = dr->dt.dll.dr_blkid;
+	mutex_enter(&dn->dn_mtx);
+	if (blkid > dn->dn_phys->dn_maxblkid) {
+		ASSERT0(dn->dn_objset->os_raw_receive);
+		dn->dn_phys->dn_maxblkid = blkid;
+	}
+	mutex_exit(&dn->dn_mtx);
+
+	if (!BP_IS_EMBEDDED(bp)) {
+		uint64_t fill = BP_IS_HOLE(bp) ? 0 : 1;
+		BP_SET_FILL(bp, fill);
+	}
+
+	dmu_buf_impl_t *parent_db;
+	EQUIV(dr->dr_parent == NULL, dn->dn_phys->dn_nlevels == 1);
+	if (dr->dr_parent == NULL) {
+		parent_db = dn->dn_dbuf;
+	} else {
+		parent_db = dr->dr_parent->dr_dbuf;
+	}
+	rw_enter(&parent_db->db_rwlock, RW_WRITER);
+	*bp_orig = *bp;
+	rw_exit(&parent_db->db_rwlock);
+}
+
+static void
+dbuf_lightweight_physdone(zio_t *zio)
+{
+	dbuf_dirty_record_t *dr = zio->io_private;
+	dsl_pool_t *dp = spa_get_dsl(zio->io_spa);
+	ASSERT3U(dr->dr_txg, ==, zio->io_txg);
+
+	/*
+	 * The callback will be called io_phys_children times.  Retire one
+	 * portion of our dirty space each time we are called.  Any rounding
+	 * error will be cleaned up by dbuf_lightweight_done().
+	 */
+	int delta = dr->dr_accounted / zio->io_phys_children;
+	dsl_pool_undirty_space(dp, delta, zio->io_txg);
+}
+
+static void
+dbuf_lightweight_done(zio_t *zio)
+{
+	dbuf_dirty_record_t *dr = zio->io_private;
+
+	VERIFY0(zio->io_error);
+
+	objset_t *os = dr->dr_dnode->dn_objset;
+	dmu_tx_t *tx = os->os_synctx;
+
+	if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
+		ASSERT(BP_EQUAL(zio->io_bp, &zio->io_bp_orig));
+	} else {
+		dsl_dataset_t *ds = os->os_dsl_dataset;
+		(void) dsl_dataset_block_kill(ds, &zio->io_bp_orig, tx, B_TRUE);
+		dsl_dataset_block_born(ds, zio->io_bp, tx);
+	}
+
+	/*
+	 * See comment in dbuf_write_done().
+	 */
+	if (zio->io_phys_children == 0) {
+		dsl_pool_undirty_space(dmu_objset_pool(os),
+		    dr->dr_accounted, zio->io_txg);
+	} else {
+		dsl_pool_undirty_space(dmu_objset_pool(os),
+		    dr->dr_accounted % zio->io_phys_children, zio->io_txg);
+	}
+
+	abd_free(dr->dt.dll.dr_abd);
+	kmem_free(dr, sizeof (*dr));
+}
+
+noinline static void
+dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
+{
+	dnode_t *dn = dr->dr_dnode;
+	zio_t *pio;
+	if (dn->dn_phys->dn_nlevels == 1) {
+		pio = dn->dn_zio;
+	} else {
+		pio = dr->dr_parent->dr_zio;
+	}
+
+	zbookmark_phys_t zb = {
+		.zb_objset = dmu_objset_id(dn->dn_objset),
+		.zb_object = dn->dn_object,
+		.zb_level = 0,
+		.zb_blkid = dr->dt.dll.dr_blkid,
+	};
+
+	/*
+	 * See comment in dbuf_write().  This is so that zio->io_bp_orig
+	 * will have the old BP in dbuf_lightweight_done().
+	 */
+	dr->dr_bp_copy = *dbuf_lightweight_bp(dr);
+
+	dr->dr_zio = zio_write(pio, dmu_objset_spa(dn->dn_objset),
+	    dmu_tx_get_txg(tx), &dr->dr_bp_copy, dr->dt.dll.dr_abd,
+	    dn->dn_datablksz, abd_get_size(dr->dt.dll.dr_abd),
+	    &dr->dt.dll.dr_props, dbuf_lightweight_ready, NULL,
+	    dbuf_lightweight_physdone, dbuf_lightweight_done, dr,
+	    ZIO_PRIORITY_ASYNC_WRITE,
+	    ZIO_FLAG_MUSTSUCCEED | dr->dt.dll.dr_flags, &zb);
+
+	zio_nowait(dr->dr_zio);
+}
 
 /*
  * dbuf_sync_leaf() is called recursively from dbuf_sync_list() so it is
@@ -3837,7 +4339,7 @@
 {
 	arc_buf_t **datap = &dr->dt.dl.dr_data;
 	dmu_buf_impl_t *db = dr->dr_dbuf;
-	dnode_t *dn;
+	dnode_t *dn = dr->dr_dnode;
 	objset_t *os;
 	uint64_t txg = tx->tx_txg;
 
@@ -3861,9 +4363,6 @@
 	}
 	DBUF_VERIFY(db);
 
-	DB_DNODE_ENTER(db);
-	dn = DB_DNODE(db);
-
 	if (db->db_blkid == DMU_SPILL_BLKID) {
 		mutex_enter(&dn->dn_mtx);
 		if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
@@ -3893,41 +4392,8 @@
 	 * be called).
 	 */
 	if (db->db_blkid == DMU_BONUS_BLKID) {
-		dbuf_dirty_record_t **drp;
-
-		ASSERT(*datap != NULL);
-		ASSERT0(db->db_level);
-		ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=,
-		    DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
-		bcopy(*datap, DN_BONUS(dn->dn_phys),
-		    DN_MAX_BONUS_LEN(dn->dn_phys));
-		DB_DNODE_EXIT(db);
-
-#ifdef ZFS_DEBUG
-		dbuf_sync_leaf_verify_bonus_dnode(dr);
-#endif
-
-		if (*datap != db->db.db_data) {
-			int slots = DB_DNODE(db)->dn_num_slots;
-			int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
-			kmem_free(*datap, bonuslen);
-			arc_space_return(bonuslen, ARC_SPACE_BONUS);
-		}
-		db->db_data_pending = NULL;
-		drp = &db->db_last_dirty;
-		while (*drp != dr)
-			drp = &(*drp)->dr_next;
-		ASSERT(dr->dr_next == NULL);
 		ASSERT(dr->dr_dbuf == db);
-		*drp = dr->dr_next;
-		if (dr->dr_dbuf->db_level != 0) {
-			mutex_destroy(&dr->dt.di.dr_mtx);
-			list_destroy(&dr->dt.di.dr_children);
-		}
-		kmem_free(dr, sizeof (dbuf_dirty_record_t));
-		ASSERT(db->db_dirtycnt > 0);
-		db->db_dirtycnt -= 1;
-		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+		dbuf_sync_bonus(dr, tx);
 		return;
 	}
 
@@ -3978,6 +4444,7 @@
 		int lsize = arc_buf_lsize(*datap);
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 		enum zio_compress compress_type = arc_get_compression(*datap);
+		uint8_t complevel = arc_get_complevel(*datap);
 
 		if (arc_is_encrypted(*datap)) {
 			boolean_t byteorder;
@@ -3988,11 +4455,12 @@
 			arc_get_raw_params(*datap, &byteorder, salt, iv, mac);
 			*datap = arc_alloc_raw_buf(os->os_spa, db,
 			    dmu_objset_id(os), byteorder, salt, iv, mac,
-			    dn->dn_type, psize, lsize, compress_type);
+			    dn->dn_type, psize, lsize, compress_type,
+			    complevel);
 		} else if (compress_type != ZIO_COMPRESS_OFF) {
 			ASSERT3U(type, ==, ARC_BUFC_DATA);
 			*datap = arc_alloc_compressed_buf(os->os_spa, db,
-			    psize, lsize, compress_type);
+			    psize, lsize, compress_type, complevel);
 		} else {
 			*datap = arc_alloc_buf(os->os_spa, db, type, psize);
 		}
@@ -4007,16 +4475,7 @@
 	ASSERT(!list_link_active(&dr->dr_dirty_node));
 	if (dn->dn_object == DMU_META_DNODE_OBJECT) {
 		list_insert_tail(&dn->dn_dirty_records[txg & TXG_MASK], dr);
-		DB_DNODE_EXIT(db);
 	} else {
-		/*
-		 * Although zio_nowait() does not "wait for an IO", it does
-		 * initiate the IO. If this is an empty write it seems plausible
-		 * that the IO could actually be completed before the nowait
-		 * returns. We need to DB_DNODE_EXIT() first in case
-		 * zio_nowait() invalidates the dbuf.
-		 */
-		DB_DNODE_EXIT(db);
 		zio_nowait(dr->dr_zio);
 	}
 }
@@ -4039,22 +4498,26 @@
 			    DMU_META_DNODE_OBJECT);
 			break;
 		}
-		if (dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
-		    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
-			VERIFY3U(dr->dr_dbuf->db_level, ==, level);
-		}
 		list_remove(list, dr);
-		if (dr->dr_dbuf->db_level > 0)
-			dbuf_sync_indirect(dr, tx);
-		else
-			dbuf_sync_leaf(dr, tx);
+		if (dr->dr_dbuf == NULL) {
+			dbuf_sync_lightweight(dr, tx);
+		} else {
+			if (dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
+			    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
+				VERIFY3U(dr->dr_dbuf->db_level, ==, level);
+			}
+			if (dr->dr_dbuf->db_level > 0)
+				dbuf_sync_indirect(dr, tx);
+			else
+				dbuf_sync_leaf(dr, tx);
+		}
 	}
 }
 
-/* ARGSUSED */
 static void
 dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 {
+	(void) buf;
 	dmu_buf_impl_t *db = vdb;
 	dnode_t *dn;
 	blkptr_t *bp = zio->io_bp;
@@ -4137,12 +4600,11 @@
 
 	mutex_exit(&db->db_mtx);
 
-	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+	db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_WRITER, FTAG);
 	*db->db_blkptr = *bp;
-	rw_exit(&dn->dn_struct_rwlock);
+	dmu_buf_unlock_parent(db, dblt, FTAG);
 }
 
-/* ARGSUSED */
 /*
  * This function gets called just prior to running through the compression
  * stage of the zio pipeline. If we're an indirect block comprised of only
@@ -4153,6 +4615,7 @@
 static void
 dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 {
+	(void) zio, (void) buf;
 	dmu_buf_impl_t *db = vdb;
 	dnode_t *dn;
 	blkptr_t *bp;
@@ -4180,9 +4643,9 @@
 		 * anybody from reading the blocks we're about to
 		 * zero out.
 		 */
-		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+		rw_enter(&db->db_rwlock, RW_WRITER);
 		bzero(db->db.db_data, db->db.db_size);
-		rw_exit(&dn->dn_struct_rwlock);
+		rw_exit(&db->db_rwlock);
 	}
 	DB_DNODE_EXIT(db);
 }
@@ -4196,10 +4659,10 @@
  * so this callback allows us to retire dirty space gradually, as the physical
  * i/os complete.
  */
-/* ARGSUSED */
 static void
 dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg)
 {
+	(void) buf;
 	dmu_buf_impl_t *db = arg;
 	objset_t *os = db->db_objset;
 	dsl_pool_t *dp = dmu_objset_pool(os);
@@ -4218,16 +4681,15 @@
 	dsl_pool_undirty_space(dp, delta, zio->io_txg);
 }
 
-/* ARGSUSED */
 static void
 dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
 {
+	(void) buf;
 	dmu_buf_impl_t *db = vdb;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
 	blkptr_t *bp = db->db_blkptr;
 	objset_t *os = db->db_objset;
 	dmu_tx_t *tx = os->os_synctx;
-	dbuf_dirty_record_t **drp, *dr;
 
 	ASSERT0(zio->io_error);
 	ASSERT(db->db_blkptr == bp);
@@ -4248,24 +4710,18 @@
 
 	DBUF_VERIFY(db);
 
-	drp = &db->db_last_dirty;
-	while ((dr = *drp) != db->db_data_pending)
-		drp = &dr->dr_next;
+	dbuf_dirty_record_t *dr = db->db_data_pending;
+	dnode_t *dn = dr->dr_dnode;
 	ASSERT(!list_link_active(&dr->dr_dirty_node));
 	ASSERT(dr->dr_dbuf == db);
-	ASSERT(dr->dr_next == NULL);
-	*drp = dr->dr_next;
+	ASSERT(list_next(&db->db_dirty_records, dr) == NULL);
+	list_remove(&db->db_dirty_records, dr);
 
 #ifdef ZFS_DEBUG
 	if (db->db_blkid == DMU_SPILL_BLKID) {
-		dnode_t *dn;
-
-		DB_DNODE_ENTER(db);
-		dn = DB_DNODE(db);
 		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
 		ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
 		    db->db_blkptr == DN_SPILL_BLKPTR(dn->dn_phys));
-		DB_DNODE_EXIT(db);
 	}
 #endif
 
@@ -4277,21 +4733,16 @@
 				arc_buf_destroy(dr->dt.dl.dr_data, db);
 		}
 	} else {
-		dnode_t *dn;
-
-		DB_DNODE_ENTER(db);
-		dn = DB_DNODE(db);
 		ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
 		ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
 		if (!BP_IS_HOLE(db->db_blkptr)) {
-			ASSERTV(int epbs = dn->dn_phys->dn_indblkshift -
-			    SPA_BLKPTRSHIFT);
+			int epbs __maybe_unused = dn->dn_phys->dn_indblkshift -
+			    SPA_BLKPTRSHIFT;
 			ASSERT3U(db->db_blkid, <=,
 			    dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
 			ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
 			    db->db.db_size);
 		}
-		DB_DNODE_EXIT(db);
 		mutex_destroy(&dr->dt.di.dr_mtx);
 		list_destroy(&dr->dt.di.dr_children);
 	}
@@ -4366,7 +4817,7 @@
 	dbuf_write_done(zio, NULL, db);
 
 	if (zio->io_abd != NULL)
-		abd_put(zio->io_abd);
+		abd_free(zio->io_abd);
 }
 
 typedef struct dbuf_remap_impl_callback_arg {
@@ -4395,7 +4846,7 @@
 }
 
 static void
-dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, dmu_tx_t *tx)
+dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
 {
 	blkptr_t bp_copy = *bp;
 	spa_t *spa = dmu_objset_spa(dn->dn_objset);
@@ -4409,72 +4860,43 @@
 	if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback,
 	    &drica)) {
 		/*
-		 * The struct_rwlock prevents dbuf_read_impl() from
+		 * If the blkptr being remapped is tracked by a livelist,
+		 * then we need to make sure the livelist reflects the update.
+		 * First, cancel out the old blkptr by appending a 'FREE'
+		 * entry. Next, add an 'ALLOC' to track the new version. This
+		 * way we avoid trying to free an inaccurate blkptr at delete.
+		 * Note that embedded blkptrs are not tracked in livelists.
+		 */
+		if (dn->dn_objset != spa_meta_objset(spa)) {
+			dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset);
+			if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
+			    bp->blk_birth > ds->ds_dir->dd_origin_txg) {
+				ASSERT(!BP_IS_EMBEDDED(bp));
+				ASSERT(dsl_dir_is_clone(ds->ds_dir));
+				ASSERT(spa_feature_is_enabled(spa,
+				    SPA_FEATURE_LIVELIST));
+				bplist_append(&ds->ds_dir->dd_pending_frees,
+				    bp);
+				bplist_append(&ds->ds_dir->dd_pending_allocs,
+				    &bp_copy);
+			}
+		}
+
+		/*
+		 * The db_rwlock prevents dbuf_read_impl() from
 		 * dereferencing the BP while we are changing it.  To
 		 * avoid lock contention, only grab it when we are actually
 		 * changing the BP.
 		 */
-		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+		if (rw != NULL)
+			rw_enter(rw, RW_WRITER);
 		*bp = bp_copy;
-		rw_exit(&dn->dn_struct_rwlock);
+		if (rw != NULL)
+			rw_exit(rw);
 	}
 }
 
 /*
- * Returns true if a dbuf_remap would modify the dbuf. We do this by attempting
- * to remap a copy of every bp in the dbuf.
- */
-boolean_t
-dbuf_can_remap(const dmu_buf_impl_t *db)
-{
-	spa_t *spa = dmu_objset_spa(db->db_objset);
-	blkptr_t *bp = db->db.db_data;
-	boolean_t ret = B_FALSE;
-
-	ASSERT3U(db->db_level, >, 0);
-	ASSERT3S(db->db_state, ==, DB_CACHED);
-
-	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL));
-
-	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
-	for (int i = 0; i < db->db.db_size >> SPA_BLKPTRSHIFT; i++) {
-		blkptr_t bp_copy = bp[i];
-		if (spa_remap_blkptr(spa, &bp_copy, NULL, NULL)) {
-			ret = B_TRUE;
-			break;
-		}
-	}
-	spa_config_exit(spa, SCL_VDEV, FTAG);
-
-	return (ret);
-}
-
-boolean_t
-dnode_needs_remap(const dnode_t *dn)
-{
-	spa_t *spa = dmu_objset_spa(dn->dn_objset);
-	boolean_t ret = B_FALSE;
-
-	if (dn->dn_phys->dn_nlevels == 0) {
-		return (B_FALSE);
-	}
-
-	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL));
-
-	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
-	for (int j = 0; j < dn->dn_phys->dn_nblkptr; j++) {
-		blkptr_t bp_copy = dn->dn_phys->dn_blkptr[j];
-		if (spa_remap_blkptr(spa, &bp_copy, NULL, NULL)) {
-			ret = B_TRUE;
-			break;
-		}
-	}
-	spa_config_exit(spa, SCL_VDEV, FTAG);
-
-	return (ret);
-}
-
-/*
  * Remap any existing BP's to concrete vdevs, if possible.
  */
 static void
@@ -4489,7 +4911,7 @@
 	if (db->db_level > 0) {
 		blkptr_t *bp = db->db.db_data;
 		for (int i = 0; i < db->db.db_size >> SPA_BLKPTRSHIFT; i++) {
-			dbuf_remap_impl(dn, &bp[i], tx);
+			dbuf_remap_impl(dn, &bp[i], &db->db_rwlock, tx);
 		}
 	} else if (db->db.db_object == DMU_META_DNODE_OBJECT) {
 		dnode_phys_t *dnp = db->db.db_data;
@@ -4498,7 +4920,10 @@
 		for (int i = 0; i < db->db.db_size >> DNODE_SHIFT;
 		    i += dnp[i].dn_extra_slots + 1) {
 			for (int j = 0; j < dnp[i].dn_nblkptr; j++) {
-				dbuf_remap_impl(dn, &dnp[i].dn_blkptr[j], tx);
+				krwlock_t *lock = (dn->dn_dbuf == NULL ? NULL :
+				    &dn->dn_dbuf->db_rwlock);
+				dbuf_remap_impl(dn, &dnp[i].dn_blkptr[j], lock,
+				    tx);
 			}
 		}
 	}
@@ -4510,19 +4935,17 @@
 dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = dr->dr_dbuf;
-	dnode_t *dn;
+	dnode_t *dn = dr->dr_dnode;
 	objset_t *os;
 	dmu_buf_impl_t *parent = db->db_parent;
 	uint64_t txg = tx->tx_txg;
 	zbookmark_phys_t zb;
 	zio_prop_t zp;
-	zio_t *zio;
+	zio_t *pio; /* parent I/O */
 	int wp_flag = 0;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 
-	DB_DNODE_ENTER(db);
-	dn = DB_DNODE(db);
 	os = dn->dn_objset;
 
 	if (db->db_state != DB_NOFILL) {
@@ -4553,7 +4976,7 @@
 		 * our block pointer, so the parent must be released.
 		 */
 		ASSERT(arc_released(parent->db_buf));
-		zio = parent->db_data_pending->dr_zio;
+		pio = parent->db_data_pending->dr_zio;
 	} else {
 		/* Our parent is the dnode itself. */
 		ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 &&
@@ -4562,12 +4985,12 @@
 		if (db->db_blkid != DMU_SPILL_BLKID)
 			ASSERT3P(db->db_blkptr, ==,
 			    &dn->dn_phys->dn_blkptr[db->db_blkid]);
-		zio = dn->dn_zio;
+		pio = dn->dn_zio;
 	}
 
 	ASSERT(db->db_level == 0 || data == db->db_buf);
 	ASSERT3U(db->db_blkptr->blk_birth, <=, txg);
-	ASSERT(zio);
+	ASSERT(pio);
 
 	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
 	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
@@ -4578,7 +5001,6 @@
 	wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
 
 	dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
-	DB_DNODE_EXIT(db);
 
 	/*
 	 * We copy the blkptr now (rather than when we instantiate the dirty
@@ -4597,9 +5019,9 @@
 		abd_t *contents = (data != NULL) ?
 		    abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL;
 
-		dr->dr_zio = zio_write(zio, os->os_spa, txg,
-		    &dr->dr_bp_copy, contents, db->db.db_size, db->db.db_size,
-		    &zp, dbuf_write_override_ready, NULL, NULL,
+		dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy,
+		    contents, db->db.db_size, db->db.db_size, &zp,
+		    dbuf_write_override_ready, NULL, NULL,
 		    dbuf_write_override_done,
 		    dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
 		mutex_enter(&db->db_mtx);
@@ -4610,7 +5032,7 @@
 	} else if (db->db_state == DB_NOFILL) {
 		ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
 		    zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
-		dr->dr_zio = zio_write(zio, os->os_spa, txg,
+		dr->dr_zio = zio_write(pio, os->os_spa, txg,
 		    &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
 		    dbuf_write_nofill_ready, NULL, NULL,
 		    dbuf_write_nofill_done, db,
@@ -4628,8 +5050,8 @@
 		if (db->db_level != 0)
 			children_ready_cb = dbuf_write_children_ready;
 
-		dr->dr_zio = arc_write(zio, os->os_spa, txg,
-		    &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
+		dr->dr_zio = arc_write(pio, os->os_spa, txg,
+		    &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
 		    &zp, dbuf_write_ready,
 		    children_ready_cb, dbuf_write_physdone,
 		    dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
@@ -4637,7 +5059,6 @@
 	}
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(dbuf_find);
 EXPORT_SYMBOL(dbuf_is_metadata);
 EXPORT_SYMBOL(dbuf_destroy);
@@ -4675,31 +5096,24 @@
 EXPORT_SYMBOL(dmu_buf_get_blkptr);
 
 /* BEGIN CSTYLED */
-module_param(dbuf_cache_max_bytes, ulong, 0644);
-MODULE_PARM_DESC(dbuf_cache_max_bytes,
+ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, max_bytes, ULONG, ZMOD_RW,
 	"Maximum size in bytes of the dbuf cache.");
 
-module_param(dbuf_cache_hiwater_pct, uint, 0644);
-MODULE_PARM_DESC(dbuf_cache_hiwater_pct,
+ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, hiwater_pct, UINT, ZMOD_RW,
 	"Percentage over dbuf_cache_max_bytes when dbufs must be evicted "
 	"directly.");
 
-module_param(dbuf_cache_lowater_pct, uint, 0644);
-MODULE_PARM_DESC(dbuf_cache_lowater_pct,
+ZFS_MODULE_PARAM(zfs_dbuf_cache, dbuf_cache_, lowater_pct, UINT, ZMOD_RW,
 	"Percentage below dbuf_cache_max_bytes when the evict thread stops "
 	"evicting dbufs.");
 
-module_param(dbuf_metadata_cache_max_bytes, ulong, 0644);
-MODULE_PARM_DESC(dbuf_metadata_cache_max_bytes,
+ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_max_bytes, ULONG, ZMOD_RW,
 	"Maximum size in bytes of the dbuf metadata cache.");
 
-module_param(dbuf_cache_shift, int, 0644);
-MODULE_PARM_DESC(dbuf_cache_shift,
+ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, cache_shift, INT, ZMOD_RW,
 	"Set the size of the dbuf cache to a log2 fraction of arc size.");
 
-module_param(dbuf_metadata_cache_shift, int, 0644);
-MODULE_PARM_DESC(dbuf_cache_shift,
-	"Set the size of the dbuf metadata cache to a log2 fraction of "
-	"arc size.");
+ZFS_MODULE_PARAM(zfs_dbuf, dbuf_, metadata_cache_shift, INT, ZMOD_RW,
+	"Set the size of the dbuf metadata cache to a log2 fraction of arc "
+	"size.");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/dbuf_stats.c b/zfs/module/zfs/dbuf_stats.c
index afe7c34..12bb568 100644
--- a/zfs/module/zfs/dbuf_stats.c
+++ b/zfs/module/zfs/dbuf_stats.c

@@ -61,7 +61,7 @@
 	return (0);
 }
 
-int
+static int
 __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
 {
 	arc_buf_info_t abi = { 0 };
@@ -134,7 +134,8 @@
 
 	ASSERT3S(dsh->idx, >=, 0);
 	ASSERT3S(dsh->idx, <=, h->hash_table_mask);
-	memset(buf, 0, size);
+	if (size)
+		buf[0] = 0;
 
 	mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
 	for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
@@ -225,7 +226,7 @@
 	dbuf_stats_hash_table_destroy();
 }
 
-#if defined(_KERNEL)
-module_param(zfs_dbuf_state_index, int, 0644);
-MODULE_PARM_DESC(zfs_dbuf_state_index, "Calculate arc header index");
-#endif
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, dbuf_state_index, INT, ZMOD_RW,
+	"Calculate arc header index");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/ddt.c b/zfs/module/zfs/ddt.c
index 9be17e5..35c0f2d 100644
--- a/zfs/module/zfs/ddt.c
+++ b/zfs/module/zfs/ddt.c

@@ -503,7 +503,7 @@
 {
 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 		ddt_t *ddt = spa->spa_ddt[c];
-		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+		for (enum ddt_type type = 0; type < DDT_TYPES && ddt; type++) {
 			for (enum ddt_class class = 0; class < DDT_CLASSES;
 			    class++) {
 				ddt_histogram_add(ddh,
@@ -552,65 +552,6 @@
 	return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
 }
 
-int
-ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
-{
-	spa_t *spa = ddt->ddt_spa;
-	uint64_t total_refcnt = 0;
-	uint64_t ditto = spa->spa_dedup_ditto;
-	int total_copies = 0;
-	int desired_copies = 0;
-	int copies_needed = 0;
-
-	for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
-		ddt_phys_t *ddp = &dde->dde_phys[p];
-		zio_t *zio = dde->dde_lead_zio[p];
-		uint64_t refcnt = ddp->ddp_refcnt;	/* committed refs */
-		if (zio != NULL)
-			refcnt += zio->io_parent_count;	/* pending refs */
-		if (ddp == ddp_willref)
-			refcnt++;			/* caller's ref */
-		if (refcnt != 0) {
-			total_refcnt += refcnt;
-			total_copies += p;
-		}
-	}
-
-	if (ditto == 0 || ditto > UINT32_MAX)
-		ditto = UINT32_MAX;
-
-	if (total_refcnt >= 1)
-		desired_copies++;
-	if (total_refcnt >= ditto)
-		desired_copies++;
-	if (total_refcnt >= ditto * ditto)
-		desired_copies++;
-
-	copies_needed = MAX(desired_copies, total_copies) - total_copies;
-
-	/* encrypted blocks store their IV in DVA[2] */
-	if (DDK_GET_CRYPT(&dde->dde_key))
-		copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1);
-
-	return (copies_needed);
-}
-
-int
-ddt_ditto_copies_present(ddt_entry_t *dde)
-{
-	ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO];
-	dva_t *dva = ddp->ddp_dva;
-	int copies = 0 - DVA_GET_GANG(dva);
-
-	for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++)
-		if (DVA_IS_VALID(dva))
-			copies++;
-
-	ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP);
-
-	return (copies);
-}
-
 size_t
 ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
 {
@@ -654,12 +595,6 @@
 }
 
 ddt_t *
-ddt_select_by_checksum(spa_t *spa, enum zio_checksum c)
-{
-	return (spa->spa_ddt[c]);
-}
-
-ddt_t *
 ddt_select(spa_t *spa, const blkptr_t *bp)
 {
 	return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
@@ -842,7 +777,7 @@
 			break;
 	}
 
-	return (AVL_ISIGN(cmp));
+	return (TREE_ISIGN(cmp));
 }
 
 static ddt_t *
@@ -1088,8 +1023,11 @@
 			continue;
 		}
 		if (p == DDT_PHYS_DITTO) {
-			if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0)
-				ddt_phys_free(ddt, ddk, ddp, txg);
+			/*
+			 * Note, we no longer create DDT-DITTO blocks, but we
+			 * don't want to leak any written by older software.
+			 */
+			ddt_phys_free(ddt, ddk, ddp, txg);
 			continue;
 		}
 		if (ddp->ddp_refcnt == 0)
@@ -1097,9 +1035,9 @@
 		total_refcnt += ddp->ddp_refcnt;
 	}
 
-	if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0)
-		nclass = DDT_CLASS_DITTO;
-	else if (total_refcnt > 1)
+	/* We do not create new DDT-DITTO blocks. */
+	ASSERT0(dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth);
+	if (total_refcnt > 1)
 		nclass = DDT_CLASS_DUPLICATE;
 	else
 		nclass = DDT_CLASS_UNIQUE;
@@ -1243,7 +1181,7 @@
 	return (SET_ERROR(ENOENT));
 }
 
-#if defined(_KERNEL)
-module_param(zfs_dedup_prefetch, int, 0644);
-MODULE_PARM_DESC(zfs_dedup_prefetch, "Enable prefetching dedup-ed blks");
-#endif
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_dedup, zfs_dedup_, prefetch, INT, ZMOD_RW,
+	"Enable prefetching dedup-ed blks");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/ddt_zap.c b/zfs/module/zfs/ddt_zap.c
index 3489d31..c5c9eda 100644
--- a/zfs/module/zfs/ddt_zap.c
+++ b/zfs/module/zfs/ddt_zap.c

@@ -46,7 +46,7 @@
 	    ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
 	    DMU_OT_NONE, 0, tx);
 
-	return (*objectp == 0 ? ENOTSUP : 0);
+	return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
 }
 
 static int

diff --git a/zfs/module/zfs/dmu.c b/zfs/module/zfs/dmu.c
index 2c1903d..96e98a4 100644
--- a/zfs/module/zfs/dmu.c
+++ b/zfs/module/zfs/dmu.c

@@ -20,12 +20,15 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
  */
 
 #include <sys/dmu.h>
@@ -49,7 +52,8 @@
 #include <sys/sa.h>
 #include <sys/zfeature.h>
 #include <sys/abd.h>
-#include <sys/trace_dmu.h>
+#include <sys/trace_zfs.h>
+#include <sys/zfs_racct.h>
 #include <sys/zfs_rlock.h>
 #ifdef _KERNEL
 #include <sys/vmsystm.h>
@@ -67,19 +71,16 @@
  * will wait until the next TXG.
  * A value of zero will disable this throttle.
  */
-unsigned long zfs_per_txg_dirty_frees_percent = 5;
+unsigned long zfs_per_txg_dirty_frees_percent = 30;
 
 /*
- * Enable/disable forcing txg sync when dirty in dmu_offset_next.
+ * Enable/disable forcing txg sync when dirty checking for holes with lseek().
+ * By default this is enabled to ensure accurate hole reporting, it can result
+ * in a significant performance penalty for lseek(SEEK_HOLE) heavy workloads.
+ * Disabling this option will result in holes never being reported in dirty
+ * files which is always safe.
  */
-int zfs_dmu_offset_next_sync = 0;
-
-/*
- * This can be used for testing, to ensure that certain actions happen
- * while in the middle of a remap (which might otherwise complete too
- * quickly).  Used by ztest(8).
- */
-int zfs_object_remap_one_indirect_delay_ms = 0;
+int zfs_dmu_offset_next_sync = 1;
 
 /*
  * Limit the amount we can prefetch with one call to this amount.  This
@@ -158,15 +159,15 @@
 	{	zfs_acl_byteswap,	"acl"		}
 };
 
-int
+static int
 dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
     void *tag, dmu_buf_t **dbp)
 {
 	uint64_t blkid;
 	dmu_buf_impl_t *db;
 
-	blkid = dbuf_whichblock(dn, 0, offset);
 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
+	blkid = dbuf_whichblock(dn, 0, offset);
 	db = dbuf_hold(dn, blkid, tag);
 	rw_exit(&dn->dn_struct_rwlock);
 
@@ -190,8 +191,8 @@
 	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
-	blkid = dbuf_whichblock(dn, 0, offset);
 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
+	blkid = dbuf_whichblock(dn, 0, offset);
 	db = dbuf_hold(dn, blkid, tag);
 	rw_exit(&dn->dn_struct_rwlock);
 	dnode_rele(dn, FTAG);
@@ -496,15 +497,17 @@
  * and can induce severe lock contention when writing to several files
  * whose dnodes are in the same block.
  */
-static int
+int
 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
     boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
 {
 	dmu_buf_t **dbp;
+	zstream_t *zs = NULL;
 	uint64_t blkid, nblks, i;
 	uint32_t dbuf_flags;
 	int err;
-	zio_t *zio;
+	zio_t *zio = NULL;
+	boolean_t missed = B_FALSE;
 
 	ASSERT(length <= DMU_MAX_ACCESS);
 
@@ -536,39 +539,64 @@
 	}
 	dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
 
-	zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
+	if (read)
+		zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
+		    ZIO_FLAG_CANFAIL);
 	blkid = dbuf_whichblock(dn, 0, offset);
+	if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
+	    DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
+		/*
+		 * Prepare the zfetch before initiating the demand reads, so
+		 * that if multiple threads block on same indirect block, we
+		 * base predictions on the original less racy request order.
+		 */
+		zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks,
+		    read && DNODE_IS_CACHEABLE(dn), B_TRUE);
+	}
 	for (i = 0; i < nblks; i++) {
 		dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
 		if (db == NULL) {
+			if (zs)
+				dmu_zfetch_run(zs, missed, B_TRUE);
 			rw_exit(&dn->dn_struct_rwlock);
 			dmu_buf_rele_array(dbp, nblks, tag);
-			zio_nowait(zio);
+			if (read)
+				zio_nowait(zio);
 			return (SET_ERROR(EIO));
 		}
 
-		/* initiate async i/o */
-		if (read)
+		/*
+		 * Initiate async demand data read.
+		 * We check the db_state after calling dbuf_read() because
+		 * (1) dbuf_read() may change the state to CACHED due to a
+		 * hit in the ARC, and (2) on a cache miss, a child will
+		 * have been added to "zio" but not yet completed, so the
+		 * state will not yet be CACHED.
+		 */
+		if (read) {
 			(void) dbuf_read(db, zio, dbuf_flags);
+			if (db->db_state != DB_CACHED)
+				missed = B_TRUE;
+		}
 		dbp[i] = &db->db;
 	}
 
-	if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
-	    DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
-		dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
-		    read && DNODE_IS_CACHEABLE(dn));
-	}
+	if (!read)
+		zfs_racct_write(length, nblks);
+
+	if (zs)
+		dmu_zfetch_run(zs, missed, B_TRUE);
 	rw_exit(&dn->dn_struct_rwlock);
 
-	/* wait for async i/o */
-	err = zio_wait(zio);
-	if (err) {
-		dmu_buf_rele_array(dbp, nblks, tag);
-		return (err);
-	}
-
-	/* wait for other io to complete */
 	if (read) {
+		/* wait for async read i/o */
+		err = zio_wait(zio);
+		if (err) {
+			dmu_buf_rele_array(dbp, nblks, tag);
+			return (err);
+		}
+
+		/* wait for other io to complete */
 		for (i = 0; i < nblks; i++) {
 			dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
 			mutex_enter(&db->db_mtx);
@@ -688,7 +716,6 @@
 	if (err != 0)
 		return;
 
-	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 	/*
 	 * offset + len - 1 is the last byte we want to prefetch for, and offset
 	 * is the first.  Then dbuf_whichblk(dn, level, off + len - 1) is the
@@ -696,6 +723,7 @@
 	 * offset)  is the first.  Then the number we need to prefetch is the
 	 * last - first + 1.
 	 */
+	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 	if (level > 0 || dn->dn_datablkshift != 0) {
 		nblks = dbuf_whichblock(dn, level, offset + len - 1) -
 		    dbuf_whichblock(dn, level, offset) + 1;
@@ -708,7 +736,6 @@
 		for (int i = 0; i < nblks; i++)
 			dbuf_prefetch(dn, level, blkid + i, pri, 0);
 	}
-
 	rw_exit(&dn->dn_struct_rwlock);
 
 	dnode_rele(dn, FTAG);
@@ -790,13 +817,14 @@
  * otherwise return false.
  * Used below in dmu_free_long_range_impl() to enable abort when unmounting
  */
-/*ARGSUSED*/
 static boolean_t
 dmu_objset_zfs_unmounting(objset_t *os)
 {
 #ifdef _KERNEL
 	if (dmu_objset_type(os) == DMU_OST_ZFS)
 		return (zfs_get_vfs_flag_unmounted(os));
+#else
+	(void) os;
 #endif
 	return (B_FALSE);
 }
@@ -1117,137 +1145,6 @@
 	dmu_buf_rele_array(dbp, numbufs, FTAG);
 }
 
-static int
-dmu_object_remap_one_indirect(objset_t *os, dnode_t *dn,
-    uint64_t last_removal_txg, uint64_t offset)
-{
-	uint64_t l1blkid = dbuf_whichblock(dn, 1, offset);
-	dnode_t *dn_tx;
-	int err = 0;
-
-	rw_enter(&dn->dn_struct_rwlock, RW_READER);
-	dmu_buf_impl_t *dbuf = dbuf_hold_level(dn, 1, l1blkid, FTAG);
-	ASSERT3P(dbuf, !=, NULL);
-
-	/*
-	 * If the block hasn't been written yet, this default will ensure
-	 * we don't try to remap it.
-	 */
-	uint64_t birth = UINT64_MAX;
-	ASSERT3U(last_removal_txg, !=, UINT64_MAX);
-	if (dbuf->db_blkptr != NULL)
-		birth = dbuf->db_blkptr->blk_birth;
-	rw_exit(&dn->dn_struct_rwlock);
-
-	/*
-	 * If this L1 was already written after the last removal, then we've
-	 * already tried to remap it.  An additional hold is taken after the
-	 * dmu_tx_assign() to handle the case where the dnode is freed while
-	 * waiting for the next open txg.
-	 */
-	if (birth <= last_removal_txg &&
-	    dbuf_read(dbuf, NULL, DB_RF_MUST_SUCCEED) == 0 &&
-	    dbuf_can_remap(dbuf)) {
-		dmu_tx_t *tx = dmu_tx_create(os);
-		dmu_tx_hold_remap_l1indirect(tx, dn->dn_object);
-		err = dmu_tx_assign(tx, TXG_WAIT);
-		if (err == 0) {
-			err = dnode_hold(os, dn->dn_object, FTAG, &dn_tx);
-			if (err == 0) {
-				(void) dbuf_dirty(dbuf, tx);
-				dnode_rele(dn_tx, FTAG);
-			}
-			dmu_tx_commit(tx);
-		} else {
-			dmu_tx_abort(tx);
-		}
-	}
-
-	dbuf_rele(dbuf, FTAG);
-
-	delay(MSEC_TO_TICK(zfs_object_remap_one_indirect_delay_ms));
-
-	return (err);
-}
-
-/*
- * Remap all blockpointers in the object, if possible, so that they reference
- * only concrete vdevs.
- *
- * To do this, iterate over the L0 blockpointers and remap any that reference
- * an indirect vdev. Note that we only examine L0 blockpointers; since we
- * cannot guarantee that we can remap all blockpointer anyways (due to split
- * blocks), we do not want to make the code unnecessarily complicated to
- * catch the unlikely case that there is an L1 block on an indirect vdev that
- * contains no indirect blockpointers.
- */
-int
-dmu_object_remap_indirects(objset_t *os, uint64_t object,
-    uint64_t last_removal_txg)
-{
-	uint64_t offset, l1span;
-	int err;
-	dnode_t *dn, *dn_tx;
-
-	err = dnode_hold(os, object, FTAG, &dn);
-	if (err != 0) {
-		return (err);
-	}
-
-	if (dn->dn_nlevels <= 1) {
-		if (issig(JUSTLOOKING) && issig(FORREAL)) {
-			err = SET_ERROR(EINTR);
-		}
-
-		/*
-		 * If the dnode has no indirect blocks, we cannot dirty them.
-		 * We still want to remap the blkptr(s) in the dnode if
-		 * appropriate, so mark it as dirty.  An additional hold is
-		 * taken after the dmu_tx_assign() to handle the case where
-		 * the dnode is freed while waiting for the next open txg.
-		 */
-		if (err == 0 && dnode_needs_remap(dn)) {
-			dmu_tx_t *tx = dmu_tx_create(os);
-			dmu_tx_hold_bonus(tx, object);
-			err = dmu_tx_assign(tx, TXG_WAIT);
-			if (err == 0) {
-				err = dnode_hold(os, object, FTAG, &dn_tx);
-				if (err == 0) {
-					dnode_setdirty(dn_tx, tx);
-					dnode_rele(dn_tx, FTAG);
-				}
-				dmu_tx_commit(tx);
-			} else {
-				dmu_tx_abort(tx);
-			}
-		}
-
-		dnode_rele(dn, FTAG);
-		return (err);
-	}
-
-	offset = 0;
-	l1span = 1ULL << (dn->dn_indblkshift - SPA_BLKPTRSHIFT +
-	    dn->dn_datablkshift);
-	/*
-	 * Find the next L1 indirect that is not a hole.
-	 */
-	while (dnode_next_offset(dn, 0, &offset, 2, 1, 0) == 0) {
-		if (issig(JUSTLOOKING) && issig(FORREAL)) {
-			err = SET_ERROR(EINTR);
-			break;
-		}
-		if ((err = dmu_object_remap_one_indirect(os, dn,
-		    last_removal_txg, offset)) != 0) {
-			break;
-		}
-		offset += l1span;
-	}
-
-	dnode_rele(dn, FTAG);
-	return (err);
-}
-
 void
 dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
     dmu_tx_t *tx)
@@ -1288,171 +1185,32 @@
 	dmu_buf_rele(db, FTAG);
 }
 
-/*
- * DMU support for xuio
- */
-kstat_t *xuio_ksp = NULL;
-
-typedef struct xuio_stats {
-	/* loaned yet not returned arc_buf */
-	kstat_named_t xuiostat_onloan_rbuf;
-	kstat_named_t xuiostat_onloan_wbuf;
-	/* whether a copy is made when loaning out a read buffer */
-	kstat_named_t xuiostat_rbuf_copied;
-	kstat_named_t xuiostat_rbuf_nocopy;
-	/* whether a copy is made when assigning a write buffer */
-	kstat_named_t xuiostat_wbuf_copied;
-	kstat_named_t xuiostat_wbuf_nocopy;
-} xuio_stats_t;
-
-static xuio_stats_t xuio_stats = {
-	{ "onloan_read_buf",	KSTAT_DATA_UINT64 },
-	{ "onloan_write_buf",	KSTAT_DATA_UINT64 },
-	{ "read_buf_copied",	KSTAT_DATA_UINT64 },
-	{ "read_buf_nocopy",	KSTAT_DATA_UINT64 },
-	{ "write_buf_copied",	KSTAT_DATA_UINT64 },
-	{ "write_buf_nocopy",	KSTAT_DATA_UINT64 }
-};
-
-#define	XUIOSTAT_INCR(stat, val)        \
-	atomic_add_64(&xuio_stats.stat.value.ui64, (val))
-#define	XUIOSTAT_BUMP(stat)	XUIOSTAT_INCR(stat, 1)
-
-#ifdef HAVE_UIO_ZEROCOPY
-int
-dmu_xuio_init(xuio_t *xuio, int nblk)
-{
-	dmu_xuio_t *priv;
-	uio_t *uio = &xuio->xu_uio;
-
-	uio->uio_iovcnt = nblk;
-	uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_SLEEP);
-
-	priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP);
-	priv->cnt = nblk;
-	priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP);
-	priv->iovp = (iovec_t *)uio->uio_iov;
-	XUIO_XUZC_PRIV(xuio) = priv;
-
-	if (XUIO_XUZC_RW(xuio) == UIO_READ)
-		XUIOSTAT_INCR(xuiostat_onloan_rbuf, nblk);
-	else
-		XUIOSTAT_INCR(xuiostat_onloan_wbuf, nblk);
-
-	return (0);
-}
-
 void
-dmu_xuio_fini(xuio_t *xuio)
+dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+    dmu_tx_t *tx)
 {
-	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
-	int nblk = priv->cnt;
+	int numbufs, i;
+	dmu_buf_t **dbp;
 
-	kmem_free(priv->iovp, nblk * sizeof (iovec_t));
-	kmem_free(priv->bufs, nblk * sizeof (arc_buf_t *));
-	kmem_free(priv, sizeof (dmu_xuio_t));
-
-	if (XUIO_XUZC_RW(xuio) == UIO_READ)
-		XUIOSTAT_INCR(xuiostat_onloan_rbuf, -nblk);
-	else
-		XUIOSTAT_INCR(xuiostat_onloan_wbuf, -nblk);
-}
-
-/*
- * Initialize iov[priv->next] and priv->bufs[priv->next] with { off, n, abuf }
- * and increase priv->next by 1.
- */
-int
-dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)
-{
-	struct iovec *iov;
-	uio_t *uio = &xuio->xu_uio;
-	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
-	int i = priv->next++;
-
-	ASSERT(i < priv->cnt);
-	ASSERT(off + n <= arc_buf_lsize(abuf));
-	iov = (iovec_t *)uio->uio_iov + i;
-	iov->iov_base = (char *)abuf->b_data + off;
-	iov->iov_len = n;
-	priv->bufs[i] = abuf;
-	return (0);
-}
-
-int
-dmu_xuio_cnt(xuio_t *xuio)
-{
-	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
-	return (priv->cnt);
-}
-
-arc_buf_t *
-dmu_xuio_arcbuf(xuio_t *xuio, int i)
-{
-	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
-
-	ASSERT(i < priv->cnt);
-	return (priv->bufs[i]);
-}
-
-void
-dmu_xuio_clear(xuio_t *xuio, int i)
-{
-	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
-
-	ASSERT(i < priv->cnt);
-	priv->bufs[i] = NULL;
-}
-#endif /* HAVE_UIO_ZEROCOPY */
-
-static void
-xuio_stat_init(void)
-{
-	xuio_ksp = kstat_create("zfs", 0, "xuio_stats", "misc",
-	    KSTAT_TYPE_NAMED, sizeof (xuio_stats) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-	if (xuio_ksp != NULL) {
-		xuio_ksp->ks_data = &xuio_stats;
-		kstat_install(xuio_ksp);
-	}
-}
-
-static void
-xuio_stat_fini(void)
-{
-	if (xuio_ksp != NULL) {
-		kstat_delete(xuio_ksp);
-		xuio_ksp = NULL;
-	}
-}
-
-void
-xuio_stat_wbuf_copied(void)
-{
-	XUIOSTAT_BUMP(xuiostat_wbuf_copied);
-}
-
-void
-xuio_stat_wbuf_nocopy(void)
-{
-	XUIOSTAT_BUMP(xuiostat_wbuf_nocopy);
+	VERIFY0(dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG,
+	    &numbufs, &dbp));
+	for (i = 0; i < numbufs; i++)
+		dmu_buf_redact(dbp[i], tx);
+	dmu_buf_rele_array(dbp, numbufs, FTAG);
 }
 
 #ifdef _KERNEL
 int
-dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
+dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
 {
 	dmu_buf_t **dbp;
 	int numbufs, i, err;
-#ifdef HAVE_UIO_ZEROCOPY
-	xuio_t *xuio = NULL;
-#endif
 
 	/*
 	 * NB: we could do this block-at-a-time, but it's nice
 	 * to be reading in parallel.
 	 */
-	err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
+	err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size,
 	    TRUE, FTAG, &numbufs, &dbp, 0);
 	if (err)
 		return (err);
@@ -1464,28 +1222,12 @@
 
 		ASSERT(size > 0);
 
-		bufoff = uio->uio_loffset - db->db_offset;
+		bufoff = zfs_uio_offset(uio) - db->db_offset;
 		tocpy = MIN(db->db_size - bufoff, size);
 
-#ifdef HAVE_UIO_ZEROCOPY
-		if (xuio) {
-			dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
-			arc_buf_t *dbuf_abuf = dbi->db_buf;
-			arc_buf_t *abuf = dbuf_loan_arcbuf(dbi);
-			err = dmu_xuio_add(xuio, abuf, bufoff, tocpy);
-			if (!err) {
-				uio->uio_resid -= tocpy;
-				uio->uio_loffset += tocpy;
-			}
+		err = zfs_uio_fault_move((char *)db->db_data + bufoff, tocpy,
+		    UIO_READ, uio);
 
-			if (abuf == dbuf_abuf)
-				XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
-			else
-				XUIOSTAT_BUMP(xuiostat_rbuf_copied);
-		} else
-#endif
-			err = uiomove((char *)db->db_data + bufoff, tocpy,
-			    UIO_READ, uio);
 		if (err)
 			break;
 
@@ -1499,14 +1241,14 @@
 /*
  * Read 'size' bytes into the uio buffer.
  * From object zdb->db_object.
- * Starting at offset uio->uio_loffset.
+ * Starting at zfs_uio_offset(uio).
  *
  * If the caller already has a dbuf in the target object
  * (e.g. its bonus buffer), this routine is faster than dmu_read_uio(),
  * because we don't have to find the dnode_t for the object.
  */
 int
-dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size)
+dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
 	dnode_t *dn;
@@ -1526,10 +1268,10 @@
 /*
  * Read 'size' bytes into the uio buffer.
  * From the specified object
- * Starting at offset uio->uio_loffset.
+ * Starting at offset zfs_uio_offset(uio).
  */
 int
-dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
+dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size)
 {
 	dnode_t *dn;
 	int err;
@@ -1549,14 +1291,14 @@
 }
 
 int
-dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
+dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
 {
 	dmu_buf_t **dbp;
 	int numbufs;
 	int err = 0;
 	int i;
 
-	err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
+	err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size,
 	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
 	if (err)
 		return (err);
@@ -1568,7 +1310,7 @@
 
 		ASSERT(size > 0);
 
-		bufoff = uio->uio_loffset - db->db_offset;
+		bufoff = zfs_uio_offset(uio) - db->db_offset;
 		tocpy = MIN(db->db_size - bufoff, size);
 
 		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
@@ -1579,13 +1321,13 @@
 			dmu_buf_will_dirty(db, tx);
 
 		/*
-		 * XXX uiomove could block forever (eg.nfs-backed
+		 * XXX zfs_uiomove could block forever (eg.nfs-backed
 		 * pages).  There needs to be a uiolockdown() function
-		 * to lock the pages in memory, so that uiomove won't
+		 * to lock the pages in memory, so that zfs_uiomove won't
 		 * block.
 		 */
-		err = uiomove((char *)db->db_data + bufoff, tocpy,
-		    UIO_WRITE, uio);
+		err = zfs_uio_fault_move((char *)db->db_data + bufoff,
+		    tocpy, UIO_WRITE, uio);
 
 		if (tocpy == db->db_size)
 			dmu_buf_fill_done(db, tx);
@@ -1603,14 +1345,14 @@
 /*
  * Write 'size' bytes from the uio buffer.
  * To object zdb->db_object.
- * Starting at offset uio->uio_loffset.
+ * Starting at offset zfs_uio_offset(uio).
  *
  * If the caller already has a dbuf in the target object
  * (e.g. its bonus buffer), this routine is faster than dmu_write_uio(),
  * because we don't have to find the dnode_t for the object.
  */
 int
-dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
+dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
     dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
@@ -1631,10 +1373,10 @@
 /*
  * Write 'size' bytes from the uio buffer.
  * To the specified object.
- * Starting at offset uio->uio_loffset.
+ * Starting at offset zfs_uio_offset(uio).
  */
 int
-dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
+dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
     dmu_tx_t *tx)
 {
 	dnode_t *dn;
@@ -1676,54 +1418,30 @@
 	arc_buf_destroy(buf, FTAG);
 }
 
-void
-dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
-    dmu_buf_t *handle, dmu_tx_t *tx)
+/*
+ * A "lightweight" write is faster than a regular write (e.g.
+ * dmu_write_by_dnode() or dmu_assign_arcbuf_by_dnode()), because it avoids the
+ * CPU cost of creating a dmu_buf_impl_t and arc_buf_[hdr_]_t.  However, the
+ * data can not be read or overwritten until the transaction's txg has been
+ * synced.  This makes it appropriate for workloads that are known to be
+ * (temporarily) write-only, like "zfs receive".
+ *
+ * A single block is written, starting at the specified offset in bytes.  If
+ * the call is successful, it returns 0 and the provided abd has been
+ * consumed (the caller should not free it).
+ */
+int
+dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
+    const zio_prop_t *zp, enum zio_flag flags, dmu_tx_t *tx)
 {
-	dmu_buf_t *dst_handle;
-	dmu_buf_impl_t *dstdb;
-	dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
-	dmu_object_type_t type;
-	arc_buf_t *abuf;
-	uint64_t datalen;
-	boolean_t byteorder;
-	uint8_t salt[ZIO_DATA_SALT_LEN];
-	uint8_t iv[ZIO_DATA_IV_LEN];
-	uint8_t mac[ZIO_DATA_MAC_LEN];
-
-	ASSERT3P(srcdb->db_buf, !=, NULL);
-
-	/* hold the db that we want to write to */
-	VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle,
-	    DMU_READ_NO_DECRYPT));
-	dstdb = (dmu_buf_impl_t *)dst_handle;
-	datalen = arc_buf_size(srcdb->db_buf);
-
-	DB_DNODE_ENTER(dstdb);
-	type = DB_DNODE(dstdb)->dn_type;
-	DB_DNODE_EXIT(dstdb);
-
-	/* allocated an arc buffer that matches the type of srcdb->db_buf */
-	if (arc_is_encrypted(srcdb->db_buf)) {
-		arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
-		abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
-		    byteorder, salt, iv, mac, type,
-		    datalen, arc_buf_lsize(srcdb->db_buf),
-		    arc_get_compression(srcdb->db_buf));
-	} else {
-		/* we won't get a compressed db back from dmu_buf_hold() */
-		ASSERT3U(arc_get_compression(srcdb->db_buf),
-		    ==, ZIO_COMPRESS_OFF);
-		abuf = arc_loan_buf(os->os_spa,
-		    DMU_OT_IS_METADATA(type), datalen);
-	}
-
-	ASSERT3U(datalen, ==, arc_buf_size(abuf));
-
-	/* copy the data to the new buffer and assign it to the dstdb */
-	bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen);
-	dbuf_assign_arcbuf(dstdb, abuf, tx);
-	dmu_buf_rele(dst_handle, FTAG);
+	dbuf_dirty_record_t *dr =
+	    dbuf_dirty_lightweight(dn, dbuf_whichblock(dn, 0, offset), tx);
+	if (dr == NULL)
+		return (SET_ERROR(EIO));
+	dr->dt.dll.dr_abd = abd;
+	dr->dt.dll.dr_props = *zp;
+	dr->dt.dll.dr_flags = flags;
+	return (0);
 }
 
 /*
@@ -1749,10 +1467,11 @@
 	rw_exit(&dn->dn_struct_rwlock);
 
 	/*
-	 * We can only assign if the offset is aligned, the arc buf is the
-	 * same size as the dbuf, and the dbuf is not metadata.
+	 * We can only assign if the offset is aligned and the arc buf is the
+	 * same size as the dbuf.
 	 */
 	if (offset == db->db.db_offset && blksz == db->db.db_size) {
+		zfs_racct_write(blksz, 1);
 		dbuf_assign_arcbuf(db, buf, tx);
 		dbuf_rele(db, FTAG);
 	} else {
@@ -1763,7 +1482,6 @@
 		dbuf_rele(db, FTAG);
 		dmu_write(os, object, offset, blksz, buf->b_data, tx);
 		dmu_return_arcbuf(buf);
-		XUIOSTAT_BUMP(xuiostat_wbuf_copied);
 	}
 
 	return (0);
@@ -1790,10 +1508,10 @@
 	dmu_tx_t		*dsa_tx;
 } dmu_sync_arg_t;
 
-/* ARGSUSED */
 static void
 dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
 {
+	(void) buf;
 	dmu_sync_arg_t *dsa = varg;
 	dmu_buf_t *db = dsa->dsa_zgd->zgd_db;
 	blkptr_t *bp = zio->io_bp;
@@ -1818,10 +1536,10 @@
 	dmu_sync_ready(zio, NULL, zio->io_private);
 }
 
-/* ARGSUSED */
 static void
 dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
 {
+	(void) buf;
 	dmu_sync_arg_t *dsa = varg;
 	dbuf_dirty_record_t *dr = dsa->dsa_dr;
 	dmu_buf_impl_t *db = dr->dr_dbuf;
@@ -1893,7 +1611,7 @@
 		zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
 
 		if (!BP_IS_HOLE(bp)) {
-			ASSERTV(blkptr_t *bp_orig = &zio->io_bp_orig);
+			blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig;
 			ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
 			ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
 			ASSERT(zio->io_bp->blk_birth == zio->io_txg);
@@ -1906,7 +1624,7 @@
 
 	dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
 
-	abd_put(zio->io_abd);
+	abd_free(zio->io_abd);
 	kmem_free(dsa, sizeof (*dsa));
 }
 
@@ -2001,7 +1719,7 @@
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
 	objset_t *os = db->db_objset;
 	dsl_dataset_t *ds = os->os_dsl_dataset;
-	dbuf_dirty_record_t *dr;
+	dbuf_dirty_record_t *dr, *dr_next;
 	dmu_sync_arg_t *dsa;
 	zbookmark_phys_t zb;
 	zio_prop_t zp;
@@ -2049,9 +1767,7 @@
 		return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
 	}
 
-	dr = db->db_last_dirty;
-	while (dr && dr->dr_txg != txg)
-		dr = dr->dr_next;
+	dr = dbuf_find_dirty_eq(db, txg);
 
 	if (dr == NULL) {
 		/*
@@ -2062,7 +1778,8 @@
 		return (SET_ERROR(ENOENT));
 	}
 
-	ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg);
+	dr_next = list_next(&db->db_dirty_records, dr);
+	ASSERT(dr_next == NULL || dr_next->dr_txg < txg);
 
 	if (db->db_blkptr != NULL) {
 		/*
@@ -2103,7 +1820,7 @@
 	 */
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
-	if (dr->dr_next != NULL || dnode_block_freed(dn, db->db_blkid))
+	if (dr_next != NULL || dnode_block_freed(dn, db->db_blkid))
 		zp.zp_nopwrite = B_FALSE;
 	DB_DNODE_EXIT(db);
 
@@ -2130,7 +1847,7 @@
 	dsa->dsa_tx = NULL;
 
 	zio_nowait(arc_write(pio, os->os_spa, txg,
-	    zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
+	    zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
 	    &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
 	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
 
@@ -2236,6 +1953,7 @@
 	    (wp & WP_SPILL));
 	enum zio_checksum checksum = os->os_checksum;
 	enum zio_compress compress = os->os_compress;
+	uint8_t complevel = os->os_complevel;
 	enum zio_checksum dedup_checksum = os->os_dedup_checksum;
 	boolean_t dedup = B_FALSE;
 	boolean_t nopwrite = B_FALSE;
@@ -2271,12 +1989,22 @@
 		    ZCHECKSUM_FLAG_EMBEDDED))
 			checksum = ZIO_CHECKSUM_FLETCHER_4;
 
-		if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
-		    (os->os_redundant_metadata ==
-		    ZFS_REDUNDANT_METADATA_MOST &&
-		    (level >= zfs_redundant_metadata_most_ditto_level ||
-		    DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
+		switch (os->os_redundant_metadata) {
+		case ZFS_REDUNDANT_METADATA_ALL:
 			copies++;
+			break;
+		case ZFS_REDUNDANT_METADATA_MOST:
+			if (level >= zfs_redundant_metadata_most_ditto_level ||
+			    DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))
+				copies++;
+			break;
+		case ZFS_REDUNDANT_METADATA_SOME:
+			if (DMU_OT_IS_CRITICAL(type))
+				copies++;
+			break;
+		case ZFS_REDUNDANT_METADATA_NONE:
+			break;
+		}
 	} else if (wp & WP_NOFILL) {
 		ASSERT(level == 0);
 
@@ -2292,6 +2020,8 @@
 	} else {
 		compress = zio_compress_select(os->os_spa, dn->dn_compress,
 		    compress);
+		complevel = zio_complevel_select(os->os_spa, compress,
+		    complevel, complevel);
 
 		checksum = (dedup_checksum == ZIO_CHECKSUM_OFF) ?
 		    zio_checksum_select(dn->dn_checksum, checksum) :
@@ -2350,6 +2080,7 @@
 	}
 
 	zp->zp_compress = compress;
+	zp->zp_complevel = complevel;
 	zp->zp_checksum = checksum;
 	zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
 	zp->zp_level = level;
@@ -2369,53 +2100,56 @@
 }
 
 /*
- * This function is only called from zfs_holey_common() for zpl_llseek()
- * in order to determine the location of holes.  In order to accurately
- * report holes all dirty data must be synced to disk.  This causes extremely
- * poor performance when seeking for holes in a dirty file.  As a compromise,
- * only provide hole data when the dnode is clean.  When a dnode is dirty
- * report the dnode as having no holes which is always a safe thing to do.
+ * Reports the location of data and holes in an object.  In order to
+ * accurately report holes all dirty data must be synced to disk.  This
+ * causes extremely poor performance when seeking for holes in a dirty file.
+ * As a compromise, only provide hole data when the dnode is clean.  When
+ * a dnode is dirty report the dnode as having no holes by returning EBUSY
+ * which is always safe to do.
  */
 int
 dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
 {
 	dnode_t *dn;
-	int i, err;
-	boolean_t clean = B_TRUE;
+	int restarted = 0, err;
 
+restart:
 	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 
-	/*
-	 * Check if dnode is dirty
-	 */
-	for (i = 0; i < TXG_SIZE; i++) {
-		if (multilist_link_active(&dn->dn_dirty_link[i])) {
-			clean = B_FALSE;
-			break;
+	rw_enter(&dn->dn_struct_rwlock, RW_READER);
+
+	if (dnode_is_dirty(dn)) {
+		/*
+		 * If the zfs_dmu_offset_next_sync module option is enabled
+		 * then hole reporting has been requested.  Dirty dnodes
+		 * must be synced to disk to accurately report holes.
+		 *
+		 * Provided a RL_READER rangelock spanning 0-UINT64_MAX is
+		 * held by the caller only a single restart will be required.
+		 * We tolerate callers which do not hold the rangelock by
+		 * returning EBUSY and not reporting holes after one restart.
+		 */
+		if (zfs_dmu_offset_next_sync) {
+			rw_exit(&dn->dn_struct_rwlock);
+			dnode_rele(dn, FTAG);
+
+			if (restarted)
+				return (SET_ERROR(EBUSY));
+
+			txg_wait_synced(dmu_objset_pool(os), 0);
+			restarted = 1;
+			goto restart;
 		}
-	}
 
-	/*
-	 * If compatibility option is on, sync any current changes before
-	 * we go trundling through the block pointers.
-	 */
-	if (!clean && zfs_dmu_offset_next_sync) {
-		clean = B_TRUE;
-		dnode_rele(dn, FTAG);
-		txg_wait_synced(dmu_objset_pool(os), 0);
-		err = dnode_hold(os, object, FTAG, &dn);
-		if (err)
-			return (err);
-	}
-
-	if (clean)
-		err = dnode_next_offset(dn,
-		    (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
-	else
 		err = SET_ERROR(EBUSY);
+	} else {
+		err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK |
+		    (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
+	}
 
+	rw_exit(&dn->dn_struct_rwlock);
 	dnode_rele(dn, FTAG);
 
 	return (err);
@@ -2491,7 +2225,6 @@
 
 /*
  * Faster still when you only care about the size.
- * This is specifically optimized for zfs_getattr().
  */
 void
 dmu_object_size_from_db(dmu_buf_t *db_fake, uint32_t *blksize,
@@ -2561,10 +2294,10 @@
 		buf[i] = BSWAP_16(buf[i]);
 }
 
-/* ARGSUSED */
 void
 byteswap_uint8_array(void *vbuf, size_t size)
 {
+	(void) vbuf, (void) size;
 }
 
 void
@@ -2573,7 +2306,6 @@
 	abd_init();
 	zfs_dbgmsg_init();
 	sa_cache_init();
-	xuio_stat_init();
 	dmu_objset_init();
 	dnode_init();
 	zfetch_init();
@@ -2593,13 +2325,11 @@
 	dbuf_fini();
 	dnode_fini();
 	dmu_objset_fini();
-	xuio_stat_fini();
 	sa_cache_fini();
 	zfs_dbgmsg_fini();
 	abd_fini();
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(dmu_bonus_hold);
 EXPORT_SYMBOL(dmu_bonus_hold_by_dnode);
 EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus);
@@ -2634,21 +2364,15 @@
 EXPORT_SYMBOL(dmu_ot);
 
 /* BEGIN CSTYLED */
-module_param(zfs_nopwrite_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_nopwrite_enabled, "Enable NOP writes");
+ZFS_MODULE_PARAM(zfs, zfs_, nopwrite_enabled, INT, ZMOD_RW,
+	"Enable NOP writes");
 
-module_param(zfs_per_txg_dirty_frees_percent, ulong, 0644);
-MODULE_PARM_DESC(zfs_per_txg_dirty_frees_percent,
-	"percentage of dirtied blocks from frees in one TXG");
+ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, ULONG, ZMOD_RW,
+	"Percentage of dirtied blocks from frees in one TXG");
 
-module_param(zfs_dmu_offset_next_sync, int, 0644);
-MODULE_PARM_DESC(zfs_dmu_offset_next_sync,
+ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW,
 	"Enable forcing txg sync to find holes");
 
-module_param(dmu_prefetch_max, int, 0644);
-MODULE_PARM_DESC(dmu_prefetch_max,
+ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, INT, ZMOD_RW,
 	"Limit one prefetch call to this size");
-
 /* END CSTYLED */
-
-#endif

diff --git a/zfs/module/zfs/dmu_diff.c b/zfs/module/zfs/dmu_diff.c
index 6a7cd84..1382da2 100644
--- a/zfs/module/zfs/dmu_diff.c
+++ b/zfs/module/zfs/dmu_diff.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  */
 
@@ -40,33 +40,36 @@
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_file.h>
 
-struct diffarg {
-	struct vnode *da_vp;		/* file to which we are reporting */
+
+typedef struct dmu_diffarg {
+	zfs_file_t *da_fp;		/* file to which we are reporting */
 	offset_t *da_offp;
 	int da_err;			/* error that stopped diff search */
 	dmu_diff_record_t da_ddr;
-};
+} dmu_diffarg_t;
 
 static int
-write_record(struct diffarg *da)
+write_record(dmu_diffarg_t *da)
 {
-	ssize_t resid; /* have to get resid to get detailed errno */
+	zfs_file_t *fp;
+	ssize_t resid;
 
 	if (da->da_ddr.ddr_type == DDR_NONE) {
 		da->da_err = 0;
 		return (0);
 	}
 
-	da->da_err = vn_rdwr(UIO_WRITE, da->da_vp, (caddr_t)&da->da_ddr,
-	    sizeof (da->da_ddr), 0, UIO_SYSSPACE, FAPPEND,
-	    RLIM64_INFINITY, CRED(), &resid);
+	fp = da->da_fp;
+	da->da_err = zfs_file_write(fp, (caddr_t)&da->da_ddr,
+	    sizeof (da->da_ddr), &resid);
 	*da->da_offp += sizeof (da->da_ddr);
 	return (da->da_err);
 }
 
 static int
-report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last)
+report_free_dnode_range(dmu_diffarg_t *da, uint64_t first, uint64_t last)
 {
 	ASSERT(first <= last);
 	if (da->da_ddr.ddr_type != DDR_FREE ||
@@ -83,7 +86,7 @@
 }
 
 static int
-report_dnode(struct diffarg *da, uint64_t object, dnode_phys_t *dnp)
+report_dnode(dmu_diffarg_t *da, uint64_t object, dnode_phys_t *dnp)
 {
 	ASSERT(dnp != NULL);
 	if (dnp->dn_type == DMU_OT_NONE)
@@ -105,18 +108,19 @@
 	(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
 	(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
 
-/* ARGSUSED */
 static int
 diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
-	struct diffarg *da = arg;
+	(void) zilog;
+	dmu_diffarg_t *da = arg;
 	int err = 0;
 
 	if (issig(JUSTLOOKING) && issig(FORREAL))
 		return (SET_ERROR(EINTR));
 
-	if (bp == NULL || zb->zb_object != DMU_META_DNODE_OBJECT)
+	if (zb->zb_level == ZB_DNODE_LEVEL ||
+	    zb->zb_object != DMU_META_DNODE_OBJECT)
 		return (0);
 
 	if (BP_IS_HOLE(bp)) {
@@ -161,9 +165,9 @@
 
 int
 dmu_diff(const char *tosnap_name, const char *fromsnap_name,
-    struct vnode *vp, offset_t *offp)
+    zfs_file_t *fp, offset_t *offp)
 {
-	struct diffarg da;
+	dmu_diffarg_t da;
 	dsl_dataset_t *fromsnap;
 	dsl_dataset_t *tosnap;
 	dsl_pool_t *dp;
@@ -204,7 +208,7 @@
 	dsl_dataset_long_hold(tosnap, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
-	da.da_vp = vp;
+	da.da_fp = fp;
 	da.da_offp = offp;
 	da.da_ddr.ddr_type = DDR_NONE;
 	da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0;

diff --git a/zfs/module/zfs/dmu_object.c b/zfs/module/zfs/dmu_object.c
index ec78ebb..12cdbd6 100644
--- a/zfs/module/zfs/dmu_object.c
+++ b/zfs/module/zfs/dmu_object.c

@@ -26,6 +26,7 @@
 
 #include <sys/dbuf.h>
 #include <sys/dmu.h>
+#include <sys/dmu_impl.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dnode.h>
@@ -57,10 +58,8 @@
 	int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
 	int error;
 
-	kpreempt_disable();
-	cpuobj = &os->os_obj_next_percpu[CPU_SEQID %
+	cpuobj = &os->os_obj_next_percpu[CPU_SEQID_UNSTABLE %
 	    os->os_obj_next_percpu_len];
-	kpreempt_enable();
 
 	if (dn_slots == 0) {
 		dn_slots = DNODE_MIN_SLOTS;
@@ -504,7 +503,6 @@
 	VERIFY0(dmu_object_free(mos, object, tx));
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(dmu_object_alloc);
 EXPORT_SYMBOL(dmu_object_alloc_ibs);
 EXPORT_SYMBOL(dmu_object_alloc_dnsize);
@@ -520,8 +518,6 @@
 EXPORT_SYMBOL(dmu_object_free_zapified);
 
 /* BEGIN CSTYLED */
-module_param(dmu_object_alloc_chunk_shift, int, 0644);
-MODULE_PARM_DESC(dmu_object_alloc_chunk_shift,
+ZFS_MODULE_PARAM(zfs, , dmu_object_alloc_chunk_shift, INT, ZMOD_RW,
 	"CPU-specific allocator grabs 2^N objects at once");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/dmu_objset.c b/zfs/module/zfs/dmu_objset.c
index 4d3c238..adff615 100644
--- a/zfs/module/zfs/dmu_objset.c
+++ b/zfs/module/zfs/dmu_objset.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -30,11 +30,13 @@
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
-#include <sys/zfeature.h>
 #include <sys/cred.h>
 #include <sys/zfs_context.h>
 #include <sys/dmu_objset.h>
@@ -62,6 +64,8 @@
 #include <sys/dmu_recv.h>
 #include <sys/zfs_project.h>
 #include "zfs_namecheck.h"
+#include <sys/vdev_impl.h>
+#include <sys/arc.h>
 
 /*
  * Needed to close a window in dnode_move() that allows the objset to be freed
@@ -193,8 +197,10 @@
 	 */
 	ASSERT(newval != ZIO_COMPRESS_INHERIT);
 
-	os->os_compress = zio_compress_select(os->os_spa, newval,
-	    ZIO_COMPRESS_ON);
+	os->os_compress = zio_compress_select(os->os_spa,
+	    ZIO_COMPRESS_ALGO(newval), ZIO_COMPRESS_ON);
+	os->os_complevel = zio_complevel_select(os->os_spa, os->os_compress,
+	    ZIO_COMPRESS_LEVEL(newval), ZIO_COMPLEVEL_DEFAULT);
 }
 
 static void
@@ -282,7 +288,9 @@
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
-	    newval == ZFS_REDUNDANT_METADATA_MOST);
+	    newval == ZFS_REDUNDANT_METADATA_MOST ||
+	    newval == ZFS_REDUNDANT_METADATA_SOME ||
+	    newval == ZFS_REDUNDANT_METADATA_NONE);
 
 	os->os_redundant_metadata = newval;
 }
@@ -323,7 +331,7 @@
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
-	ASSERT(newval <= SPA_OLD_MAXBLOCKSIZE);
+	ASSERT(newval <= SPA_MAXBLOCKSIZE);
 	ASSERT(ISP2(newval));
 
 	os->os_zpl_special_smallblock = newval;
@@ -392,14 +400,50 @@
 	return (crc);
 }
 
-unsigned int
+static unsigned int
 dnode_multilist_index_func(multilist_t *ml, void *obj)
 {
 	dnode_t *dn = obj;
-	return (dnode_hash(dn->dn_objset, dn->dn_object) %
+
+	/*
+	 * The low order bits of the hash value are thought to be
+	 * distributed evenly. Otherwise, in the case that the multilist
+	 * has a power of two number of sublists, each sublists' usage
+	 * would not be evenly distributed. In this context full 64bit
+	 * division would be a waste of time, so limit it to 32 bits.
+	 */
+	return ((unsigned int)dnode_hash(dn->dn_objset, dn->dn_object) %
 	    multilist_get_num_sublists(ml));
 }
 
+static inline boolean_t
+dmu_os_is_l2cacheable(objset_t *os)
+{
+	if (os->os_secondary_cache == ZFS_CACHE_ALL ||
+	    os->os_secondary_cache == ZFS_CACHE_METADATA) {
+		if (l2arc_exclude_special == 0)
+			return (B_TRUE);
+
+		blkptr_t *bp = os->os_rootbp;
+		if (bp == NULL || BP_IS_HOLE(bp))
+			return (B_FALSE);
+		uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+		vdev_t *rvd = os->os_spa->spa_root_vdev;
+		vdev_t *vd = NULL;
+
+		if (vdev < rvd->vdev_children)
+			vd = rvd->vdev_child[vdev];
+
+		if (vd == NULL)
+			return (B_TRUE);
+
+		if (vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+		    vd->vdev_alloc_bias != VDEV_BIAS_DEDUP)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
 /*
  * Instantiates the objset_t in-memory structure corresponding to the
  * objset_phys_t that's pointed to by the specified blkptr_t.
@@ -412,6 +456,12 @@
 	int i, err;
 
 	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
+	ASSERT(!BP_IS_REDACTED(bp));
+
+	/*
+	 * We need the pool config lock to get properties.
+	 */
+	ASSERT(ds == NULL || dsl_pool_config_held(ds->ds_dir->dd_pool));
 
 	/*
 	 * The $ORIGIN dataset (if it exists) doesn't have an associated
@@ -436,7 +486,7 @@
 		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
-		if (DMU_OS_IS_L2CACHEABLE(os))
+		if (dmu_os_is_l2cacheable(os))
 			aflags |= ARC_FLAG_L2CACHE;
 
 		if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
@@ -502,20 +552,8 @@
 	 * checksum/compression/copies.
 	 */
 	if (ds != NULL) {
-		boolean_t needlock = B_FALSE;
-
 		os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
 
-		/*
-		 * Note: it's valid to open the objset if the dataset is
-		 * long-held, in which case the pool_config lock will not
-		 * be held.
-		 */
-		if (!dsl_pool_config_held(dmu_objset_pool(os))) {
-			needlock = B_TRUE;
-			dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-		}
-
 		err = dsl_prop_register(ds,
 		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 		    primary_cache_changed_cb, os);
@@ -578,8 +616,6 @@
 				    smallblk_changed_cb, os);
 			}
 		}
-		if (needlock)
-			dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		if (err != 0) {
 			arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
 			kmem_free(os, sizeof (objset_t));
@@ -589,6 +625,7 @@
 		/* It's the meta-objset. */
 		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 		os->os_compress = ZIO_COMPRESS_ON;
+		os->os_complevel = ZIO_COMPLEVEL_DEFAULT;
 		os->os_encrypted = B_FALSE;
 		os->os_copies = spa_max_replication(spa);
 		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
@@ -605,7 +642,7 @@
 	os->os_zil = zil_alloc(os, &os->os_zil_header);
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
+		multilist_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]),
 		    dnode_multilist_index_func);
 	}
@@ -649,11 +686,11 @@
 	int err = 0;
 
 	/*
-	 * We shouldn't be doing anything with dsl_dataset_t's unless the
-	 * pool_config lock is held, or the dataset is long-held.
+	 * We need the pool_config lock to manipulate the dsl_dataset_t.
+	 * Even if the dataset is long-held, we need the pool_config lock
+	 * to open the objset, as it needs to get properties.
 	 */
-	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
-	    dsl_dataset_long_held(ds));
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
 
 	mutex_enter(&ds->ds_opening_lock);
 	if (ds->ds_objset == NULL) {
@@ -686,8 +723,9 @@
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 
+	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	err = dsl_pool_hold(name, tag, &dp);
 	if (err != 0)
 		return (err);
@@ -716,9 +754,9 @@
 dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
     boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
 {
-	int err;
+	(void) tag;
 
-	err = dmu_objset_from_ds(ds, osp);
+	int err = dmu_objset_from_ds(ds, osp);
 	if (err != 0) {
 		return (err);
 	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
@@ -759,8 +797,9 @@
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 
+	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	err = dsl_pool_hold(name, FTAG, &dp);
 	if (err != 0)
 		return (err);
@@ -782,11 +821,15 @@
 	 * speed up pool import times and to keep this txg reserved
 	 * completely for recovery work.
 	 */
-	if ((dmu_objset_userobjspace_upgradable(*osp) ||
-	    dmu_objset_projectquota_upgradable(*osp)) &&
-	    !readonly && !dp->dp_spa->spa_claiming &&
-	    (ds->ds_dir->dd_crypto_obj == 0 || decrypt))
-		dmu_objset_id_quota_upgrade(*osp);
+	if (!readonly && !dp->dp_spa->spa_claiming &&
+	    (ds->ds_dir->dd_crypto_obj == 0 || decrypt)) {
+		if (dmu_objset_userobjspace_upgradable(*osp) ||
+		    dmu_objset_projectquota_upgradable(*osp)) {
+			dmu_objset_id_quota_upgrade(*osp);
+		} else if (dmu_objset_userused_enabled(*osp)) {
+			dmu_objset_userspace_upgrade(*osp);
+		}
+	}
 
 	dsl_pool_rele(dp, FTAG);
 	return (0);
@@ -798,8 +841,9 @@
 {
 	dsl_dataset_t *ds;
 	int err;
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 
+	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
 	if (err != 0)
 		return (err);
@@ -816,9 +860,10 @@
 void
 dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
 {
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
-
+	ds_hold_flags_t flags;
 	dsl_pool_t *dp = dmu_objset_pool(os);
+
+	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
 	dsl_pool_rele(dp, tag);
 }
@@ -846,7 +891,9 @@
 {
 	dsl_pool_t *dp;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
+	ds_hold_flags_t flags;
 
+	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	VERIFY3P(ds, !=, NULL);
 	VERIFY3P(ds->ds_owner, ==, tag);
 	VERIFY(dsl_dataset_long_held(ds));
@@ -854,21 +901,22 @@
 	dsl_dataset_name(ds, name);
 	dp = ds->ds_dir->dd_pool;
 	dsl_pool_config_enter(dp, FTAG);
-	dsl_dataset_disown(ds, decrypt, tag);
-	VERIFY0(dsl_dataset_own(dp, name,
-	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds));
+	dsl_dataset_disown(ds, flags, tag);
+	VERIFY0(dsl_dataset_own(dp, name, flags, tag, newds));
 	dsl_pool_config_exit(dp, FTAG);
 }
 
 void
 dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
 {
+	ds_hold_flags_t flags;
+
+	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	/*
 	 * Stop upgrading thread
 	 */
 	dmu_objset_upgrade_stop(os);
-	dsl_dataset_disown(os->os_dsl_dataset,
-	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
+	dsl_dataset_disown(os->os_dsl_dataset, flags, tag);
 }
 
 void
@@ -988,9 +1036,8 @@
 	mutex_destroy(&os->os_obj_lock);
 	mutex_destroy(&os->os_user_ptr_lock);
 	mutex_destroy(&os->os_upgrade_lock);
-	for (int i = 0; i < TXG_SIZE; i++) {
-		multilist_destroy(os->os_dirty_dnodes[i]);
-	}
+	for (int i = 0; i < TXG_SIZE; i++)
+		multilist_destroy(&os->os_dirty_dnodes[i]);
 	spa_evicting_os_deregister(os->os_spa, os);
 	kmem_free(os, sizeof (objset_t));
 }
@@ -1104,6 +1151,7 @@
 typedef struct dmu_objset_create_arg {
 	const char *doca_name;
 	cred_t *doca_cred;
+	proc_t *doca_proc;
 	void (*doca_userfunc)(objset_t *os, void *arg,
 	    cred_t *cr, dmu_tx_t *tx);
 	void *doca_userarg;
@@ -1112,7 +1160,6 @@
 	dsl_crypto_params_t *doca_dcp;
 } dmu_objset_create_arg_t;
 
-/*ARGSUSED*/
 static int
 dmu_objset_create_check(void *arg, dmu_tx_t *tx)
 {
@@ -1148,7 +1195,7 @@
 	}
 
 	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
-	    doca->doca_cred);
+	    doca->doca_cred, doca->doca_proc);
 	if (error != 0) {
 		dsl_dir_rele(pdd, FTAG);
 		return (error);
@@ -1234,7 +1281,7 @@
 		}
 		VERIFY0(zio_wait(rzio));
 
-		dmu_objset_do_userquota_updates(os, tx);
+		dmu_objset_sync_done(os, tx);
 		taskq_wait(dp->dp_sync_taskq);
 		if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
 			ASSERT3P(ds->ds_key_mapping, !=, NULL);
@@ -1254,6 +1301,7 @@
 			ASSERT3P(ds->ds_key_mapping, !=, NULL);
 			key_mapping_rele(spa, ds->ds_key_mapping, ds);
 			dsl_dataset_sync_done(ds, tx);
+			dmu_buf_rele(ds->ds_dbuf, ds);
 		}
 
 		mutex_enter(&ds->ds_lock);
@@ -1261,8 +1309,7 @@
 		mutex_exit(&ds->ds_lock);
 	}
 
-	spa_history_log_internal_ds(ds, "create", tx, "");
-	zvol_create_minors(spa, doca->doca_name, B_TRUE);
+	spa_history_log_internal_ds(ds, "create", tx, " ");
 
 	dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
 	dsl_dir_rele(pdd, FTAG);
@@ -1277,6 +1324,7 @@
 
 	doca.doca_name = name;
 	doca.doca_cred = CRED();
+	doca.doca_proc = curproc;
 	doca.doca_flags = flags;
 	doca.doca_userfunc = func;
 	doca.doca_userarg = arg;
@@ -1292,18 +1340,22 @@
 	 */
 	doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
 
-	return (dsl_sync_task(name,
+	int rv = dsl_sync_task(name,
 	    dmu_objset_create_check, dmu_objset_create_sync, &doca,
-	    6, ZFS_SPACE_CHECK_NORMAL));
+	    6, ZFS_SPACE_CHECK_NORMAL);
+
+	if (rv == 0)
+		zvol_create_minor(name);
+	return (rv);
 }
 
 typedef struct dmu_objset_clone_arg {
 	const char *doca_clone;
 	const char *doca_origin;
 	cred_t *doca_cred;
+	proc_t *doca_proc;
 } dmu_objset_clone_arg_t;
 
-/*ARGSUSED*/
 static int
 dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
 {
@@ -1329,7 +1381,7 @@
 	}
 
 	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
-	    doca->doca_cred);
+	    doca->doca_cred, doca->doca_proc);
 	if (error != 0) {
 		dsl_dir_rele(pdd, FTAG);
 		return (SET_ERROR(EDQUOT));
@@ -1374,8 +1426,7 @@
 	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
 	dsl_dataset_name(origin, namebuf);
 	spa_history_log_internal_ds(ds, "clone", tx,
-	    "origin=%s (%llu)", namebuf, origin->ds_object);
-	zvol_create_minors(dp->dp_spa, doca->doca_clone, B_TRUE);
+	    "origin=%s (%llu)", namebuf, (u_longlong_t)origin->ds_object);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_dataset_rele(origin, FTAG);
 	dsl_dir_rele(pdd, FTAG);
@@ -1389,105 +1440,16 @@
 	doca.doca_clone = clone;
 	doca.doca_origin = origin;
 	doca.doca_cred = CRED();
+	doca.doca_proc = curproc;
 
-	return (dsl_sync_task(clone,
+	int rv = dsl_sync_task(clone,
 	    dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
-	    6, ZFS_SPACE_CHECK_NORMAL));
-}
+	    6, ZFS_SPACE_CHECK_NORMAL);
 
-static int
-dmu_objset_remap_indirects_impl(objset_t *os, uint64_t last_removed_txg)
-{
-	int error = 0;
-	uint64_t object = 0;
-	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
-		error = dmu_object_remap_indirects(os, object,
-		    last_removed_txg);
-		/*
-		 * If the ZPL removed the object before we managed to dnode_hold
-		 * it, we would get an ENOENT. If the ZPL declares its intent
-		 * to remove the object (dnode_free) before we manage to
-		 * dnode_hold it, we would get an EEXIST. In either case, we
-		 * want to continue remapping the other objects in the objset;
-		 * in all other cases, we want to break early.
-		 */
-		if (error != 0 && error != ENOENT && error != EEXIST) {
-			break;
-		}
-	}
-	if (error == ESRCH) {
-		error = 0;
-	}
-	return (error);
-}
+	if (rv == 0)
+		zvol_create_minor(clone);
 
-int
-dmu_objset_remap_indirects(const char *fsname)
-{
-	int error = 0;
-	objset_t *os = NULL;
-	uint64_t last_removed_txg;
-	uint64_t remap_start_txg;
-	dsl_dir_t *dd;
-
-	error = dmu_objset_hold(fsname, FTAG, &os);
-	if (error != 0) {
-		return (error);
-	}
-	dd = dmu_objset_ds(os)->ds_dir;
-
-	if (!spa_feature_is_enabled(dmu_objset_spa(os),
-	    SPA_FEATURE_OBSOLETE_COUNTS)) {
-		dmu_objset_rele(os, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	if (dsl_dataset_is_snapshot(dmu_objset_ds(os))) {
-		dmu_objset_rele(os, FTAG);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * If there has not been a removal, we're done.
-	 */
-	last_removed_txg = spa_get_last_removal_txg(dmu_objset_spa(os));
-	if (last_removed_txg == -1ULL) {
-		dmu_objset_rele(os, FTAG);
-		return (0);
-	}
-
-	/*
-	 * If we have remapped since the last removal, we're done.
-	 */
-	if (dsl_dir_is_zapified(dd)) {
-		uint64_t last_remap_txg;
-		if (zap_lookup(spa_meta_objset(dmu_objset_spa(os)),
-		    dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
-		    sizeof (last_remap_txg), 1, &last_remap_txg) == 0 &&
-		    last_remap_txg > last_removed_txg) {
-			dmu_objset_rele(os, FTAG);
-			return (0);
-		}
-	}
-
-	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
-	dsl_pool_rele(dmu_objset_pool(os), FTAG);
-
-	remap_start_txg = spa_last_synced_txg(dmu_objset_spa(os));
-	error = dmu_objset_remap_indirects_impl(os, last_removed_txg);
-	if (error == 0) {
-		/*
-		 * We update the last_remap_txg to be the start txg so that
-		 * we can guarantee that every block older than last_remap_txg
-		 * that can be remapped has been remapped.
-		 */
-		error = dsl_dir_update_last_remap_txg(dd, remap_start_txg);
-	}
-
-	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
-	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
-
-	return (error);
+	return (rv);
 }
 
 int
@@ -1498,7 +1460,7 @@
 	nvlist_t *snaps = fnvlist_alloc();
 
 	fnvlist_add_boolean(snaps, longsnap);
-	strfree(longsnap);
+	kmem_strfree(longsnap);
 	err = dsl_dataset_snapshot(snaps, NULL, NULL);
 	fnvlist_free(snaps);
 	return (err);
@@ -1512,10 +1474,15 @@
 	mutex_enter(&os->os_upgrade_lock);
 	os->os_upgrade_status = EINTR;
 	if (!os->os_upgrade_exit) {
+		int status;
+
 		mutex_exit(&os->os_upgrade_lock);
 
-		os->os_upgrade_status = os->os_upgrade_cb(os);
+		status = os->os_upgrade_cb(os);
+
 		mutex_enter(&os->os_upgrade_lock);
+
+		os->os_upgrade_status = status;
 	}
 	os->os_upgrade_exit = B_TRUE;
 	os->os_upgrade_id = 0;
@@ -1543,6 +1510,8 @@
 			dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
 			os->os_upgrade_status = ENOMEM;
 		}
+	} else {
+		dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
 	}
 	mutex_exit(&os->os_upgrade_lock);
 }
@@ -1577,7 +1546,7 @@
 		ASSERT(dn->dn_dbuf->db_data_pending);
 		/*
 		 * Initialize dn_zio outside dnode_sync() because the
-		 * meta-dnode needs to set it ouside dnode_sync().
+		 * meta-dnode needs to set it outside dnode_sync().
 		 */
 		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
 		ASSERT(dn->dn_zio);
@@ -1586,32 +1555,22 @@
 		multilist_sublist_remove(list, dn);
 
 		/*
-		 * If we are not doing useraccounting (os_synced_dnodes == NULL)
-		 * we are done with this dnode for this txg. Unset dn_dirty_txg
-		 * if later txgs aren't dirtying it so that future holders do
-		 * not get a stale value. Otherwise, we will do this in
-		 * userquota_updates_task() when processing has completely
-		 * finished for this txg.
+		 * See the comment above dnode_rele_task() for an explanation
+		 * of why this dnode hold is always needed (even when not
+		 * doing user accounting).
 		 */
-		multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
-		if (newlist != NULL) {
-			(void) dnode_add_ref(dn, newlist);
-			multilist_insert(newlist, dn);
-		} else {
-			mutex_enter(&dn->dn_mtx);
-			if (dn->dn_dirty_txg == tx->tx_txg)
-				dn->dn_dirty_txg = 0;
-			mutex_exit(&dn->dn_mtx);
-		}
+		multilist_t *newlist = &dn->dn_objset->os_synced_dnodes;
+		(void) dnode_add_ref(dn, newlist);
+		multilist_insert(newlist, dn);
 
 		dnode_sync(dn, tx);
 	}
 }
 
-/* ARGSUSED */
 static void
 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
 {
+	(void) abuf;
 	blkptr_t *bp = zio->io_bp;
 	objset_t *os = arg;
 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
@@ -1639,10 +1598,10 @@
 		rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
 }
 
-/* ARGSUSED */
 static void
 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
 {
+	(void) abuf;
 	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
 	objset_t *os = arg;
@@ -1697,7 +1656,7 @@
 	blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
 	*blkptr_copy = *os->os_rootbp;
 
-	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
+	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", (u_longlong_t)tx->tx_txg);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* XXX the write_done callback should really give us the tx... */
@@ -1736,7 +1695,7 @@
 	}
 
 	zio = arc_write(pio, os->os_spa, tx->tx_txg,
-	    blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
+	    blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
 	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
 	    os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
 
@@ -1764,25 +1723,21 @@
 
 	txgoff = tx->tx_txg & TXG_MASK;
 
-	if (dmu_objset_userused_enabled(os) &&
-	    (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
-		/*
-		 * We must create the list here because it uses the
-		 * dn_dirty_link[] of this txg.  But it may already
-		 * exist because we call dsl_dataset_sync() twice per txg.
-		 */
-		if (os->os_synced_dnodes == NULL) {
-			os->os_synced_dnodes =
-			    multilist_create(sizeof (dnode_t),
-			    offsetof(dnode_t, dn_dirty_link[txgoff]),
-			    dnode_multilist_index_func);
-		} else {
-			ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
-			    offsetof(dnode_t, dn_dirty_link[txgoff]));
-		}
+	/*
+	 * We must create the list here because it uses the
+	 * dn_dirty_link[] of this txg.  But it may already
+	 * exist because we call dsl_dataset_sync() twice per txg.
+	 */
+	if (os->os_synced_dnodes.ml_sublists == NULL) {
+		multilist_create(&os->os_synced_dnodes, sizeof (dnode_t),
+		    offsetof(dnode_t, dn_dirty_link[txgoff]),
+		    dnode_multilist_index_func);
+	} else {
+		ASSERT3U(os->os_synced_dnodes.ml_offset, ==,
+		    offsetof(dnode_t, dn_dirty_link[txgoff]));
 	}
 
-	ml = os->os_dirty_dnodes[txgoff];
+	ml = &os->os_dirty_dnodes[txgoff];
 	num_sublists = multilist_get_num_sublists(ml);
 	for (int i = 0; i < num_sublists; i++) {
 		if (multilist_sublist_is_empty_idx(ml, i))
@@ -1801,8 +1756,7 @@
 	while ((dr = list_head(list)) != NULL) {
 		ASSERT0(dr->dr_dbuf->db_level);
 		list_remove(list, dr);
-		if (dr->dr_zio)
-			zio_nowait(dr->dr_zio);
+		zio_nowait(dr->dr_zio);
 	}
 
 	/* Enable dnode backfill if enough objects have been freed. */
@@ -1822,22 +1776,32 @@
 boolean_t
 dmu_objset_is_dirty(objset_t *os, uint64_t txg)
 {
-	return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
+	return (!multilist_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]));
 }
 
-static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES];
 
 void
-dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+dmu_objset_register_type(dmu_objset_type_t ost, file_info_cb_t *cb)
 {
-	used_cbs[ost] = cb;
+	file_cbs[ost] = cb;
+}
+
+int
+dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype, const void *data,
+    zfs_file_info_t *zfi)
+{
+	file_info_cb_t *cb = file_cbs[os->os_phys->os_type];
+	if (cb == NULL)
+		return (EINVAL);
+	return (cb(bonustype, data, zfi));
 }
 
 boolean_t
 dmu_objset_userused_enabled(objset_t *os)
 {
 	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
-	    used_cbs[os->os_phys->os_type] != NULL &&
+	    file_cbs[os->os_phys->os_type] != NULL &&
 	    DMU_USERUSED_DNODE(os) != NULL);
 }
 
@@ -1851,7 +1815,7 @@
 boolean_t
 dmu_objset_projectquota_enabled(objset_t *os)
 {
-	return (used_cbs[os->os_phys->os_type] != NULL &&
+	return (file_cbs[os->os_phys->os_type] != NULL &&
 	    DMU_PROJECTUSED_DNODE(os) != NULL &&
 	    spa_feature_is_enabled(os->os_spa, SPA_FEATURE_PROJECT_QUOTA));
 }
@@ -1882,7 +1846,7 @@
 	 */
 	rv = strcmp(luqn->uqn_id, ruqn->uqn_id);
 
-	return (AVL_ISIGN(rv));
+	return (TREE_ISIGN(rv));
 }
 
 static void
@@ -2023,7 +1987,7 @@
 	userquota_cache_t cache = { { 0 } };
 
 	multilist_sublist_t *list =
-	    multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx);
+	    multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx);
 
 	ASSERT(multilist_sublist_head(list) == NULL ||
 	    dmu_objset_userused_enabled(os));
@@ -2077,25 +2041,54 @@
 				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
 		}
 		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
-		if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
-			dn->dn_dirty_txg = 0;
 		mutex_exit(&dn->dn_mtx);
 
 		multilist_sublist_remove(list, dn);
-		dnode_rele(dn, os->os_synced_dnodes);
+		dnode_rele(dn, &os->os_synced_dnodes);
 	}
 	do_userquota_cacheflush(os, &cache, tx);
 	multilist_sublist_unlock(list);
 	kmem_free(uua, sizeof (*uua));
 }
 
-void
-dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+/*
+ * Release dnode holds from dmu_objset_sync_dnodes().  When the dnode is being
+ * synced (i.e. we have issued the zio's for blocks in the dnode), it can't be
+ * evicted because the block containing the dnode can't be evicted until it is
+ * written out.  However, this hold is necessary to prevent the dnode_t from
+ * being moved (via dnode_move()) while it's still referenced by
+ * dbuf_dirty_record_t:dr_dnode.  And dr_dnode is needed for
+ * dirty_lightweight_leaf-type dirty records.
+ *
+ * If we are doing user-object accounting, the dnode_rele() happens from
+ * userquota_updates_task() instead.
+ */
+static void
+dnode_rele_task(void *arg)
 {
-	int num_sublists;
+	userquota_updates_arg_t *uua = arg;
+	objset_t *os = uua->uua_os;
 
+	multilist_sublist_t *list =
+	    multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx);
+
+	dnode_t *dn;
+	while ((dn = multilist_sublist_head(list)) != NULL) {
+		multilist_sublist_remove(list, dn);
+		dnode_rele(dn, &os->os_synced_dnodes);
+	}
+	multilist_sublist_unlock(list);
+	kmem_free(uua, sizeof (*uua));
+}
+
+/*
+ * Return TRUE if userquota updates are needed.
+ */
+static boolean_t
+dmu_objset_do_userquota_updates_prep(objset_t *os, dmu_tx_t *tx)
+{
 	if (!dmu_objset_userused_enabled(os))
-		return;
+		return (B_FALSE);
 
 	/*
 	 * If this is a raw receive just return and handle accounting
@@ -2105,10 +2098,10 @@
 	 * used for recovery.
 	 */
 	if (os->os_encrypted && dmu_objset_is_receiving(os))
-		return;
+		return (B_FALSE);
 
 	if (tx->tx_txg <= os->os_spa->spa_claim_max_txg)
-		return;
+		return (B_FALSE);
 
 	/* Allocate the user/group/project used objects if necessary. */
 	if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
@@ -2125,23 +2118,39 @@
 		VERIFY0(zap_create_claim(os, DMU_PROJECTUSED_OBJECT,
 		    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
 	}
+	return (B_TRUE);
+}
 
-	num_sublists = multilist_get_num_sublists(os->os_synced_dnodes);
+/*
+ * Dispatch taskq tasks to dp_sync_taskq to update the user accounting, and
+ * also release the holds on the dnodes from dmu_objset_sync_dnodes().
+ * The caller must taskq_wait(dp_sync_taskq).
+ */
+void
+dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx)
+{
+	boolean_t need_userquota = dmu_objset_do_userquota_updates_prep(os, tx);
+
+	int num_sublists = multilist_get_num_sublists(&os->os_synced_dnodes);
 	for (int i = 0; i < num_sublists; i++) {
-		if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i))
-			continue;
 		userquota_updates_arg_t *uua =
 		    kmem_alloc(sizeof (*uua), KM_SLEEP);
 		uua->uua_os = os;
 		uua->uua_sublist_idx = i;
 		uua->uua_tx = tx;
-		/* note: caller does taskq_wait() */
+
+		/*
+		 * If we don't need to update userquotas, use
+		 * dnode_rele_task() to call dnode_rele()
+		 */
 		(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
-		    userquota_updates_task, uua, 0);
+		    need_userquota ? userquota_updates_task : dnode_rele_task,
+		    uua, 0);
 		/* callback frees uua */
 	}
 }
 
+
 /*
  * Returns a pointer to data to find uid/gid from
  *
@@ -2152,31 +2161,22 @@
 static void *
 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
-	dbuf_dirty_record_t *dr, **drp;
+	dbuf_dirty_record_t *dr;
 	void *data;
 
 	if (db->db_dirtycnt == 0)
 		return (db->db.db_data);  /* Nothing is changing */
 
-	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
-		if (dr->dr_txg == tx->tx_txg)
-			break;
+	dr = dbuf_find_dirty_eq(db, tx->tx_txg);
 
 	if (dr == NULL) {
 		data = NULL;
 	} else {
-		dnode_t *dn;
-
-		DB_DNODE_ENTER(dr->dr_dbuf);
-		dn = DB_DNODE(dr->dr_dbuf);
-
-		if (dn->dn_bonuslen == 0 &&
+		if (dr->dr_dnode->dn_bonuslen == 0 &&
 		    dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
 			data = dr->dt.dl.dr_data->b_data;
 		else
 			data = dr->dt.dl.dr_data;
-
-		DB_DNODE_EXIT(dr->dr_dbuf);
 	}
 
 	return (data);
@@ -2188,9 +2188,6 @@
 	objset_t *os = dn->dn_objset;
 	void *data = NULL;
 	dmu_buf_impl_t *db = NULL;
-	uint64_t *user = NULL;
-	uint64_t *group = NULL;
-	uint64_t *project = NULL;
 	int flags = dn->dn_id_flags;
 	int error;
 	boolean_t have_spill = B_FALSE;
@@ -2244,23 +2241,23 @@
 		return;
 	}
 
-	if (before) {
-		ASSERT(data);
-		user = &dn->dn_olduid;
-		group = &dn->dn_oldgid;
-		project = &dn->dn_oldprojid;
-	} else if (data) {
-		user = &dn->dn_newuid;
-		group = &dn->dn_newgid;
-		project = &dn->dn_newprojid;
-	}
-
 	/*
 	 * Must always call the callback in case the object
 	 * type has changed and that type isn't an object type to track
 	 */
-	error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
-	    user, group, project);
+	zfs_file_info_t zfi;
+	error = file_cbs[os->os_phys->os_type](dn->dn_bonustype, data, &zfi);
+
+	if (before) {
+		ASSERT(data);
+		dn->dn_olduid = zfi.zfi_user;
+		dn->dn_oldgid = zfi.zfi_group;
+		dn->dn_oldprojid = zfi.zfi_project;
+	} else if (data) {
+		dn->dn_newuid = zfi.zfi_user;
+		dn->dn_newgid = zfi.zfi_group;
+		dn->dn_newprojid = zfi.zfi_project;
+	}
 
 	/*
 	 * Preserve existing uid/gid when the callback can't determine
@@ -2369,8 +2366,8 @@
 	return (0);
 }
 
-int
-dmu_objset_userspace_upgrade(objset_t *os)
+static int
+dmu_objset_userspace_upgrade_cb(objset_t *os)
 {
 	int err = 0;
 
@@ -2390,6 +2387,12 @@
 	return (0);
 }
 
+void
+dmu_objset_userspace_upgrade(objset_t *os)
+{
+	dmu_objset_upgrade(os, dmu_objset_userspace_upgrade_cb);
+}
+
 static int
 dmu_objset_id_quota_upgrade_cb(objset_t *os)
 {
@@ -2400,14 +2403,15 @@
 		return (0);
 	if (dmu_objset_is_snapshot(os))
 		return (SET_ERROR(EINVAL));
-	if (!dmu_objset_userobjused_enabled(os))
+	if (!dmu_objset_userused_enabled(os))
 		return (SET_ERROR(ENOTSUP));
 	if (!dmu_objset_projectquota_enabled(os) &&
 	    dmu_objset_userobjspace_present(os))
 		return (SET_ERROR(ENOTSUP));
 
-	dmu_objset_ds(os)->ds_feature_activation[
-	    SPA_FEATURE_USEROBJ_ACCOUNTING] = (void *)B_TRUE;
+	if (dmu_objset_userobjused_enabled(os))
+		dmu_objset_ds(os)->ds_feature_activation[
+		    SPA_FEATURE_USEROBJ_ACCOUNTING] = (void *)B_TRUE;
 	if (dmu_objset_projectquota_enabled(os))
 		dmu_objset_ds(os)->ds_feature_activation[
 		    SPA_FEATURE_PROJECT_QUOTA] = (void *)B_TRUE;
@@ -2416,7 +2420,9 @@
 	if (err)
 		return (err);
 
-	os->os_flags |= OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
+	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+	if (dmu_objset_userobjused_enabled(os))
+		os->os_flags |= OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
 	if (dmu_objset_projectquota_enabled(os))
 		os->os_flags |= OBJSET_FLAG_PROJECTQUOTA_COMPLETE;
 
@@ -2497,7 +2503,7 @@
 }
 
 int
-dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
+dmu_snapshot_realname(objset_t *os, const char *name, char *real, int maxlen,
     boolean_t *conflict)
 {
 	dsl_dataset_t *ds = os->os_dsl_dataset;
@@ -2890,7 +2896,7 @@
 			err = dmu_objset_find_impl(spa, child,
 			    func, arg, flags);
 			dsl_pool_config_enter(dp, FTAG);
-			strfree(child);
+			kmem_strfree(child);
 			if (err != 0)
 				break;
 		}
@@ -2928,7 +2934,7 @@
 				dsl_pool_config_exit(dp, FTAG);
 				err = func(child, arg);
 				dsl_pool_config_enter(dp, FTAG);
-				strfree(child);
+				kmem_strfree(child);
 				if (err != 0)
 					break;
 			}
@@ -2951,7 +2957,7 @@
  * See comment above dmu_objset_find_impl().
  */
 int
-dmu_objset_find(char *name, int func(const char *, void *), void *arg,
+dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
     int flags)
 {
 	spa_t *spa;
@@ -3061,7 +3067,7 @@
 EXPORT_SYMBOL(dmu_objset_open_impl);
 EXPORT_SYMBOL(dmu_objset_evict);
 EXPORT_SYMBOL(dmu_objset_register_type);
-EXPORT_SYMBOL(dmu_objset_do_userquota_updates);
+EXPORT_SYMBOL(dmu_objset_sync_done);
 EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
 EXPORT_SYMBOL(dmu_objset_userused_enabled);
 EXPORT_SYMBOL(dmu_objset_userspace_upgrade);

diff --git a/zfs/module/zfs/dmu_recv.c b/zfs/module/zfs/dmu_recv.c
index 2324e8e..6eb1009 100644
--- a/zfs/module/zfs/dmu_recv.c
+++ b/zfs/module/zfs/dmu_recv.c

@@ -21,16 +21,18 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright 2014 HybridCluster. All rights reserved.
- * Copyright 2016 RackTop Systems.
- * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
+#include <sys/dmu_send.h>
+#include <sys/dmu_recv.h>
 #include <sys/dmu_tx.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
@@ -42,38 +44,310 @@
 #include <sys/dsl_prop.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
-#include <sys/spa_impl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
+#include <sys/zvol.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_znode.h>
 #include <zfs_fletcher.h>
 #include <sys/avl.h>
 #include <sys/ddt.h>
 #include <sys/zfs_onexit.h>
-#include <sys/dmu_recv.h>
 #include <sys/dsl_destroy.h>
 #include <sys/blkptr.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/zfeature.h>
 #include <sys/bqueue.h>
-#include <sys/zvol.h>
-#include <sys/policy.h>
+#include <sys/objlist.h>
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
+#include <sys/zfs_file.h>
 
 int zfs_recv_queue_length = SPA_MAXBLOCKSIZE;
+int zfs_recv_queue_ff = 20;
+int zfs_recv_write_batch_size = 1024 * 1024;
 
 static char *dmu_recv_tag = "dmu_recv_tag";
 const char *recv_clone_name = "%recv";
 
-static void byteswap_record(dmu_replay_record_t *drr);
+typedef enum {
+	ORNS_NO,
+	ORNS_YES,
+	ORNS_MAYBE
+} or_need_sync_t;
+
+static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len,
+    void *buf);
+
+struct receive_record_arg {
+	dmu_replay_record_t header;
+	void *payload; /* Pointer to a buffer containing the payload */
+	/*
+	 * If the record is a WRITE or SPILL, pointer to the abd containing the
+	 * payload.
+	 */
+	abd_t *abd;
+	int payload_size;
+	uint64_t bytes_read; /* bytes read from stream when record created */
+	boolean_t eos_marker; /* Marks the end of the stream */
+	bqueue_node_t node;
+};
+
+struct receive_writer_arg {
+	objset_t *os;
+	boolean_t byteswap;
+	bqueue_t q;
+
+	/*
+	 * These three members are used to signal to the main thread when
+	 * we're done.
+	 */
+	kmutex_t mutex;
+	kcondvar_t cv;
+	boolean_t done;
+
+	int err;
+	boolean_t resumable;
+	boolean_t raw;   /* DMU_BACKUP_FEATURE_RAW set */
+	boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
+	boolean_t full;  /* this is a full send stream */
+	uint64_t last_object;
+	uint64_t last_offset;
+	uint64_t max_object; /* highest object ID referenced in stream */
+	uint64_t bytes_read; /* bytes read when current record created */
+
+	list_t write_batch;
+
+	/* Encryption parameters for the last received DRR_OBJECT_RANGE */
+	boolean_t or_crypt_params_present;
+	uint64_t or_firstobj;
+	uint64_t or_numslots;
+	uint8_t or_salt[ZIO_DATA_SALT_LEN];
+	uint8_t or_iv[ZIO_DATA_IV_LEN];
+	uint8_t or_mac[ZIO_DATA_MAC_LEN];
+	boolean_t or_byteorder;
+
+	/* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */
+	or_need_sync_t or_need_sync;
+};
 
 typedef struct dmu_recv_begin_arg {
 	const char *drba_origin;
 	dmu_recv_cookie_t *drba_cookie;
 	cred_t *drba_cred;
+	proc_t *drba_proc;
 	dsl_crypto_params_t *drba_dcp;
 } dmu_recv_begin_arg_t;
 
+static void
+byteswap_record(dmu_replay_record_t *drr)
+{
+#define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
+#define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
+	drr->drr_type = BSWAP_32(drr->drr_type);
+	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
+
+	switch (drr->drr_type) {
+	case DRR_BEGIN:
+		DO64(drr_begin.drr_magic);
+		DO64(drr_begin.drr_versioninfo);
+		DO64(drr_begin.drr_creation_time);
+		DO32(drr_begin.drr_type);
+		DO32(drr_begin.drr_flags);
+		DO64(drr_begin.drr_toguid);
+		DO64(drr_begin.drr_fromguid);
+		break;
+	case DRR_OBJECT:
+		DO64(drr_object.drr_object);
+		DO32(drr_object.drr_type);
+		DO32(drr_object.drr_bonustype);
+		DO32(drr_object.drr_blksz);
+		DO32(drr_object.drr_bonuslen);
+		DO32(drr_object.drr_raw_bonuslen);
+		DO64(drr_object.drr_toguid);
+		DO64(drr_object.drr_maxblkid);
+		break;
+	case DRR_FREEOBJECTS:
+		DO64(drr_freeobjects.drr_firstobj);
+		DO64(drr_freeobjects.drr_numobjs);
+		DO64(drr_freeobjects.drr_toguid);
+		break;
+	case DRR_WRITE:
+		DO64(drr_write.drr_object);
+		DO32(drr_write.drr_type);
+		DO64(drr_write.drr_offset);
+		DO64(drr_write.drr_logical_size);
+		DO64(drr_write.drr_toguid);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
+		DO64(drr_write.drr_key.ddk_prop);
+		DO64(drr_write.drr_compressed_size);
+		break;
+	case DRR_WRITE_EMBEDDED:
+		DO64(drr_write_embedded.drr_object);
+		DO64(drr_write_embedded.drr_offset);
+		DO64(drr_write_embedded.drr_length);
+		DO64(drr_write_embedded.drr_toguid);
+		DO32(drr_write_embedded.drr_lsize);
+		DO32(drr_write_embedded.drr_psize);
+		break;
+	case DRR_FREE:
+		DO64(drr_free.drr_object);
+		DO64(drr_free.drr_offset);
+		DO64(drr_free.drr_length);
+		DO64(drr_free.drr_toguid);
+		break;
+	case DRR_SPILL:
+		DO64(drr_spill.drr_object);
+		DO64(drr_spill.drr_length);
+		DO64(drr_spill.drr_toguid);
+		DO64(drr_spill.drr_compressed_size);
+		DO32(drr_spill.drr_type);
+		break;
+	case DRR_OBJECT_RANGE:
+		DO64(drr_object_range.drr_firstobj);
+		DO64(drr_object_range.drr_numslots);
+		DO64(drr_object_range.drr_toguid);
+		break;
+	case DRR_REDACT:
+		DO64(drr_redact.drr_object);
+		DO64(drr_redact.drr_offset);
+		DO64(drr_redact.drr_length);
+		DO64(drr_redact.drr_toguid);
+		break;
+	case DRR_END:
+		DO64(drr_end.drr_toguid);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
+		break;
+	default:
+		break;
+	}
+
+	if (drr->drr_type != DRR_BEGIN) {
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
+	}
+
+#undef DO64
+#undef DO32
+}
+
+static boolean_t
+redact_snaps_contains(uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
+{
+	for (int i = 0; i < num_snaps; i++) {
+		if (snaps[i] == guid)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Check that the new stream we're trying to receive is redacted with respect to
+ * a subset of the snapshots that the origin was redacted with respect to.  For
+ * the reasons behind this, see the man page on redacted zfs sends and receives.
+ */
+static boolean_t
+compatible_redact_snaps(uint64_t *origin_snaps, uint64_t origin_num_snaps,
+    uint64_t *redact_snaps, uint64_t num_redact_snaps)
+{
+	/*
+	 * Short circuit the comparison; if we are redacted with respect to
+	 * more snapshots than the origin, we can't be redacted with respect
+	 * to a subset.
+	 */
+	if (num_redact_snaps > origin_num_snaps) {
+		return (B_FALSE);
+	}
+
+	for (int i = 0; i < num_redact_snaps; i++) {
+		if (!redact_snaps_contains(origin_snaps, origin_num_snaps,
+		    redact_snaps[i])) {
+			return (B_FALSE);
+		}
+	}
+	return (B_TRUE);
+}
+
+static boolean_t
+redact_check(dmu_recv_begin_arg_t *drba, dsl_dataset_t *origin)
+{
+	uint64_t *origin_snaps;
+	uint64_t origin_num_snaps;
+	dmu_recv_cookie_t *drc = drba->drba_cookie;
+	struct drr_begin *drrb = drc->drc_drrb;
+	int featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	int err = 0;
+	boolean_t ret = B_TRUE;
+	uint64_t *redact_snaps;
+	uint_t numredactsnaps;
+
+	/*
+	 * If this is a full send stream, we're safe no matter what.
+	 */
+	if (drrb->drr_fromguid == 0)
+		return (ret);
+
+	VERIFY(dsl_dataset_get_uint64_array_feature(origin,
+	    SPA_FEATURE_REDACTED_DATASETS, &origin_num_snaps, &origin_snaps));
+
+	if (nvlist_lookup_uint64_array(drc->drc_begin_nvl,
+	    BEGINNV_REDACT_FROM_SNAPS, &redact_snaps, &numredactsnaps) ==
+	    0) {
+		/*
+		 * If the send stream was sent from the redaction bookmark or
+		 * the redacted version of the dataset, then we're safe.  Verify
+		 * that this is from the a compatible redaction bookmark or
+		 * redacted dataset.
+		 */
+		if (!compatible_redact_snaps(origin_snaps, origin_num_snaps,
+		    redact_snaps, numredactsnaps)) {
+			err = EINVAL;
+		}
+	} else if (featureflags & DMU_BACKUP_FEATURE_REDACTED) {
+		/*
+		 * If the stream is redacted, it must be redacted with respect
+		 * to a subset of what the origin is redacted with respect to.
+		 * See case number 2 in the zfs man page section on redacted zfs
+		 * send.
+		 */
+		err = nvlist_lookup_uint64_array(drc->drc_begin_nvl,
+		    BEGINNV_REDACT_SNAPS, &redact_snaps, &numredactsnaps);
+
+		if (err != 0 || !compatible_redact_snaps(origin_snaps,
+		    origin_num_snaps, redact_snaps, numredactsnaps)) {
+			err = EINVAL;
+		}
+	} else if (!redact_snaps_contains(origin_snaps, origin_num_snaps,
+	    drrb->drr_toguid)) {
+		/*
+		 * If the stream isn't redacted but the origin is, this must be
+		 * one of the snapshots the origin is redacted with respect to.
+		 * See case number 1 in the zfs man page section on redacted zfs
+		 * send.
+		 */
+		err = EINVAL;
+	}
+
+	if (err != 0)
+		ret = B_FALSE;
+	return (ret);
+}
+
+/*
+ * If we previously received a stream with --large-block, we don't support
+ * receiving an incremental on top of it without --large-block.  This avoids
+ * forcing a read-modify-write or trying to re-aggregate a string of WRITE
+ * records.
+ */
+static int
+recv_check_large_blocks(dsl_dataset_t *ds, uint64_t featureflags)
+{
+	if (dsl_dataset_feature_is_active(ds, SPA_FEATURE_LARGE_BLOCKS) &&
+	    !(featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS))
+		return (SET_ERROR(ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH));
+	return (0);
+}
+
 static int
 recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
     uint64_t fromguid, uint64_t featureflags)
@@ -91,7 +365,7 @@
 	    dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name,
 	    8, 1, &val);
 	if (error != ENOENT)
-		return (error == 0 ? EBUSY : error);
+		return (error == 0 ? SET_ERROR(EBUSY) : error);
 
 	/* Resume state must not be set. */
 	if (dsl_dataset_has_resume_receive_state(ds))
@@ -102,7 +376,7 @@
 	    dsl_dataset_phys(ds)->ds_snapnames_zapobj,
 	    drba->drba_cookie->drc_tosnap, 8, 1, &val);
 	if (error != ENOENT)
-		return (error == 0 ? EEXIST : error);
+		return (error == 0 ? SET_ERROR(EEXIST) : error);
 
 	/* Must not have children if receiving a ZVOL. */
 	error = zap_count(dp->dp_meta_objset,
@@ -123,7 +397,7 @@
 	 * against that limit.
 	 */
 	error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT,
-	    NULL, drba->drba_cred);
+	    NULL, drba->drba_cred, drba->drba_proc);
 	if (error != 0)
 		return (error);
 
@@ -131,7 +405,7 @@
 		dsl_dataset_t *snap;
 		uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
 
-		/* Can't raw receive on top of an unencrypted dataset */
+		/* Can't perform a raw receive on top of a non-raw receive */
 		if (!encrypted && raw)
 			return (SET_ERROR(EINVAL));
 
@@ -179,6 +453,19 @@
 			    ds->ds_prev->ds_object;
 		}
 
+		if (dsl_dataset_feature_is_active(snap,
+		    SPA_FEATURE_REDACTED_DATASETS) && !redact_check(drba,
+		    snap)) {
+			dsl_dataset_rele(snap, FTAG);
+			return (SET_ERROR(EINVAL));
+		}
+
+		error = recv_check_large_blocks(snap, featureflags);
+		if (error != 0) {
+			dsl_dataset_rele(snap, FTAG);
+			return (error);
+		}
+
 		dsl_dataset_rele(snap, FTAG);
 	} else {
 		/* if full, then must be forced */
@@ -210,12 +497,67 @@
 			if (will_encrypt && embed)
 				return (SET_ERROR(EINVAL));
 		}
-
-		drba->drba_cookie->drc_fromsnapobj = 0;
 	}
 
 	return (0);
+}
 
+/*
+ * Check that any feature flags used in the data stream we're receiving are
+ * supported by the pool we are receiving into.
+ *
+ * Note that some of the features we explicitly check here have additional
+ * (implicit) features they depend on, but those dependencies are enforced
+ * through the zfeature_register() calls declaring the features that we
+ * explicitly check.
+ */
+static int
+recv_begin_check_feature_flags_impl(uint64_t featureflags, spa_t *spa)
+{
+	/*
+	 * Check if there are any unsupported feature flags.
+	 */
+	if (!DMU_STREAM_SUPPORTED(featureflags)) {
+		return (SET_ERROR(ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE));
+	}
+
+	/* Verify pool version supports SA if SA_SPILL feature set */
+	if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
+	    spa_version(spa) < SPA_VERSION_SA)
+		return (SET_ERROR(ENOTSUP));
+
+	/*
+	 * LZ4 compressed, ZSTD compressed, embedded, mooched, large blocks,
+	 * and large_dnodes in the stream can only be used if those pool
+	 * features are enabled because we don't attempt to decompress /
+	 * un-embed / un-mooch / split up the blocks / dnodes during the
+	 * receive process.
+	 */
+	if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_LZ4_COMPRESS))
+		return (SET_ERROR(ENOTSUP));
+	if ((featureflags & DMU_BACKUP_FEATURE_ZSTD) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_ZSTD_COMPRESS))
+		return (SET_ERROR(ENOTSUP));
+	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA))
+		return (SET_ERROR(ENOTSUP));
+	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS))
+		return (SET_ERROR(ENOTSUP));
+	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE))
+		return (SET_ERROR(ENOTSUP));
+
+	/*
+	 * Receiving redacted streams requires that redacted datasets are
+	 * enabled.
+	 */
+	if ((featureflags & DMU_BACKUP_FEATURE_REDACTED) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_REDACTED_DATASETS))
+		return (SET_ERROR(ENOTSUP));
+
+	return (0);
 }
 
 static int
@@ -226,9 +568,9 @@
 	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 	uint64_t fromguid = drrb->drr_fromguid;
 	int flags = drrb->drr_flags;
-	ds_hold_flags_t dsflags = 0;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	int error;
-	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	uint64_t featureflags = drba->drba_cookie->drc_featureflags;
 	dsl_dataset_t *ds;
 	const char *tofs = drba->drba_cookie->drc_tofs;
 
@@ -242,41 +584,15 @@
 	    ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL))
 		return (SET_ERROR(EINVAL));
 
-	/* Verify pool version supports SA if SA_SPILL feature set */
-	if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
-	    spa_version(dp->dp_spa) < SPA_VERSION_SA)
-		return (SET_ERROR(ENOTSUP));
+	error = recv_begin_check_feature_flags_impl(featureflags, dp->dp_spa);
+	if (error != 0)
+		return (error);
 
+	/* Resumable receives require extensible datasets */
 	if (drba->drba_cookie->drc_resumable &&
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EXTENSIBLE_DATASET))
 		return (SET_ERROR(ENOTSUP));
 
-	/*
-	 * The receiving code doesn't know how to translate a WRITE_EMBEDDED
-	 * record to a plain WRITE record, so the pool must have the
-	 * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
-	 * records.  Same with WRITE_EMBEDDED records that use LZ4 compression.
-	 */
-	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
-		return (SET_ERROR(ENOTSUP));
-	if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
-		return (SET_ERROR(ENOTSUP));
-
-	/*
-	 * The receiving code doesn't know how to translate large blocks
-	 * to smaller ones, so the pool must have the LARGE_BLOCKS
-	 * feature enabled if the stream has LARGE_BLOCKS. Same with
-	 * large dnodes.
-	 */
-	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
-		return (SET_ERROR(ENOTSUP));
-	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
-		return (SET_ERROR(ENOTSUP));
-
 	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
 		/* raw receives require the encryption feature */
 		if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION))
@@ -290,7 +606,15 @@
 		if (!(flags & DRR_FLAG_SPILL_BLOCK))
 			return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
 	} else {
-		dsflags |= DS_HOLD_FLAG_DECRYPT;
+		/*
+		 * We support unencrypted datasets below encrypted ones now,
+		 * so add the DS_HOLD_FLAG_DECRYPT flag only if we are dealing
+		 * with a dataset we may encrypt.
+		 */
+		if (drba->drba_dcp == NULL ||
+		    drba->drba_dcp->cp_crypt != ZIO_CRYPT_OFF) {
+			dsflags |= DS_HOLD_FLAG_DECRYPT;
+		}
 	}
 
 	error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
@@ -315,7 +639,7 @@
 		 * If it's a non-clone incremental, we are missing the
 		 * target fs, so fail the recv.
 		 */
-		if (fromguid != 0 && !(flags & DRR_FLAG_CLONE ||
+		if (fromguid != 0 && !((flags & DRR_FLAG_CLONE) ||
 		    drba->drba_origin))
 			return (SET_ERROR(ENOENT));
 
@@ -324,7 +648,7 @@
 		 * contain all the necessary free records and freeobject
 		 * records, reject it.
 		 */
-		if (fromguid == 0 && drba->drba_origin &&
+		if (fromguid == 0 && drba->drba_origin != NULL &&
 		    !(flags & DRR_FLAG_FREERECORDS))
 			return (SET_ERROR(EINVAL));
 
@@ -366,14 +690,16 @@
 		 * filesystems and increment those counts during begin_sync).
 		 */
 		error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
-		    ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred);
+		    ZFS_PROP_FILESYSTEM_LIMIT, NULL,
+		    drba->drba_cred, drba->drba_proc);
 		if (error != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			return (error);
 		}
 
 		error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
-		    ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred);
+		    ZFS_PROP_SNAPSHOT_LIMIT, NULL,
+		    drba->drba_cred, drba->drba_proc);
 		if (error != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			return (error);
@@ -392,7 +718,6 @@
 
 		if (drba->drba_origin != NULL) {
 			dsl_dataset_t *origin;
-
 			error = dsl_dataset_hold_flags(dp, drba->drba_origin,
 			    dsflags, FTAG, &origin);
 			if (error != 0) {
@@ -410,14 +735,38 @@
 				dsl_dataset_rele(ds, FTAG);
 				return (SET_ERROR(ENODEV));
 			}
+
 			if (origin->ds_dir->dd_crypto_obj != 0 &&
 			    (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) {
 				dsl_dataset_rele_flags(origin, dsflags, FTAG);
 				dsl_dataset_rele(ds, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
-			dsl_dataset_rele_flags(origin,
-			    dsflags, FTAG);
+
+			/*
+			 * If the origin is redacted we need to verify that this
+			 * send stream can safely be received on top of the
+			 * origin.
+			 */
+			if (dsl_dataset_feature_is_active(origin,
+			    SPA_FEATURE_REDACTED_DATASETS)) {
+				if (!redact_check(drba, origin)) {
+					dsl_dataset_rele_flags(origin, dsflags,
+					    FTAG);
+					dsl_dataset_rele_flags(ds, dsflags,
+					    FTAG);
+					return (SET_ERROR(EINVAL));
+				}
+			}
+
+			error = recv_check_large_blocks(ds, featureflags);
+			if (error != 0) {
+				dsl_dataset_rele_flags(origin, dsflags, FTAG);
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
+				return (error);
+			}
+
+			dsl_dataset_rele_flags(origin, dsflags, FTAG);
 		}
 
 		dsl_dataset_rele(ds, FTAG);
@@ -432,13 +781,14 @@
 	dmu_recv_begin_arg_t *drba = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	objset_t *mos = dp->dp_meta_objset;
-	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
-	const char *tofs = drba->drba_cookie->drc_tofs;
-	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	dmu_recv_cookie_t *drc = drba->drba_cookie;
+	struct drr_begin *drrb = drc->drc_drrb;
+	const char *tofs = drc->drc_tofs;
+	uint64_t featureflags = drc->drc_featureflags;
 	dsl_dataset_t *ds, *newds;
 	objset_t *os;
 	uint64_t dsobj;
-	ds_hold_flags_t dsflags = 0;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	int error;
 	uint64_t crflags = 0;
 	dsl_crypto_params_t dummy_dcp = { 0 };
@@ -455,7 +805,7 @@
 	 * the raw cmd set. Raw incremental recvs do not use a dcp
 	 * since the encryption parameters are already set in stone.
 	 */
-	if (dcp == NULL && drba->drba_cookie->drc_fromsnapobj == 0 &&
+	if (dcp == NULL && drrb->drr_fromguid == 0 &&
 	    drba->drba_origin == NULL) {
 		ASSERT3P(dcp, ==, NULL);
 		dcp = &dummy_dcp;
@@ -474,7 +824,6 @@
 			    drba->drba_cookie->drc_fromsnapobj, FTAG, &snap));
 			ASSERT3P(dcp, ==, NULL);
 		}
-
 		dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
 		    snap, crflags, drba->drba_cred, dcp, tx);
 		if (drba->drba_cookie->drc_fromsnapobj != 0)
@@ -499,13 +848,24 @@
 		if (origin != NULL)
 			dsl_dataset_rele(origin, FTAG);
 		dsl_dir_rele(dd, FTAG);
-		drba->drba_cookie->drc_newfs = B_TRUE;
+		drc->drc_newfs = B_TRUE;
 	}
-
-	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &newds));
+	VERIFY0(dsl_dataset_own_obj_force(dp, dsobj, dsflags, dmu_recv_tag,
+	    &newds));
+	if (dsl_dataset_feature_is_active(newds,
+	    SPA_FEATURE_REDACTED_DATASETS)) {
+		/*
+		 * If the origin dataset is redacted, the child will be redacted
+		 * when we create it.  We clear the new dataset's
+		 * redaction info; if it should be redacted, we'll fill
+		 * in its information later.
+		 */
+		dsl_dataset_deactivate_feature(newds,
+		    SPA_FEATURE_REDACTED_DATASETS, tx);
+	}
 	VERIFY0(dmu_objset_from_ds(newds, &os));
 
-	if (drba->drba_cookie->drc_resumable) {
+	if (drc->drc_resumable) {
 		dsl_dataset_zapify(newds, tx);
 		if (drrb->drr_fromguid != 0) {
 			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_FROMGUID,
@@ -539,6 +899,17 @@
 			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_RAWOK,
 			    8, 1, &one, tx));
 		}
+
+		uint64_t *redact_snaps;
+		uint_t numredactsnaps;
+		if (nvlist_lookup_uint64_array(drc->drc_begin_nvl,
+		    BEGINNV_REDACT_FROM_SNAPS, &redact_snaps,
+		    &numredactsnaps) == 0) {
+			VERIFY0(zap_add(mos, dsobj,
+			    DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS,
+			    sizeof (*redact_snaps), numredactsnaps,
+			    redact_snaps, tx));
+		}
 	}
 
 	/*
@@ -551,6 +922,15 @@
 		drba->drba_cookie->drc_raw = B_TRUE;
 	}
 
+	if (featureflags & DMU_BACKUP_FEATURE_REDACTED) {
+		uint64_t *redact_snaps;
+		uint_t numredactsnaps;
+		VERIFY0(nvlist_lookup_uint64_array(drc->drc_begin_nvl,
+		    BEGINNV_REDACT_SNAPS, &redact_snaps, &numredactsnaps));
+		dsl_dataset_activate_redaction(newds, redact_snaps,
+		    numredactsnaps, tx);
+	}
+
 	dmu_buf_will_dirty(newds->ds_dbuf, tx);
 	dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
 
@@ -569,68 +949,48 @@
 	rrw_exit(&newds->ds_bp_rwlock, FTAG);
 
 	drba->drba_cookie->drc_ds = newds;
+	drba->drba_cookie->drc_os = os;
 
-	spa_history_log_internal_ds(newds, "receive", tx, "");
+	spa_history_log_internal_ds(newds, "receive", tx, " ");
 }
 
 static int
 dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 {
 	dmu_recv_begin_arg_t *drba = arg;
+	dmu_recv_cookie_t *drc = drba->drba_cookie;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
-	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+	struct drr_begin *drrb = drc->drc_drrb;
 	int error;
-	ds_hold_flags_t dsflags = 0;
-	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	dsl_dataset_t *ds;
-	const char *tofs = drba->drba_cookie->drc_tofs;
+	const char *tofs = drc->drc_tofs;
 
 	/* already checked */
 	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
-	ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING);
+	ASSERT(drc->drc_featureflags & DMU_BACKUP_FEATURE_RESUMING);
 
 	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 	    DMU_COMPOUNDSTREAM ||
 	    drrb->drr_type >= DMU_OST_NUMTYPES)
 		return (SET_ERROR(EINVAL));
 
-	/* Verify pool version supports SA if SA_SPILL feature set */
-	if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
-	    spa_version(dp->dp_spa) < SPA_VERSION_SA)
-		return (SET_ERROR(ENOTSUP));
-
 	/*
-	 * The receiving code doesn't know how to translate a WRITE_EMBEDDED
-	 * record to a plain WRITE record, so the pool must have the
-	 * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
-	 * records.  Same with WRITE_EMBEDDED records that use LZ4 compression.
+	 * This is mostly a sanity check since we should have already done these
+	 * checks during a previous attempt to receive the data.
 	 */
-	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
-		return (SET_ERROR(ENOTSUP));
-	if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
-		return (SET_ERROR(ENOTSUP));
-
-	/*
-	 * The receiving code doesn't know how to translate large blocks
-	 * to smaller ones, so the pool must have the LARGE_BLOCKS
-	 * feature enabled if the stream has LARGE_BLOCKS. Same with
-	 * large dnodes.
-	 */
-	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
-		return (SET_ERROR(ENOTSUP));
-	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
-	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
-		return (SET_ERROR(ENOTSUP));
+	error = recv_begin_check_feature_flags_impl(drc->drc_featureflags,
+	    dp->dp_spa);
+	if (error != 0)
+		return (error);
 
 	/* 6 extra bytes for /%recv */
 	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
 	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
 	    tofs, recv_clone_name);
 
-	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+	if (drc->drc_featureflags & DMU_BACKUP_FEATURE_RAW) {
 		/* raw receives require spill block allocation flag */
 		if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK))
 			return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
@@ -694,6 +1054,50 @@
 		return (SET_ERROR(EINVAL));
 	}
 
+	if (ds->ds_prev != NULL && drrb->drr_fromguid != 0)
+		drc->drc_fromsnapobj = ds->ds_prev->ds_object;
+
+	/*
+	 * If we're resuming, and the send is redacted, then the original send
+	 * must have been redacted, and must have been redacted with respect to
+	 * the same snapshots.
+	 */
+	if (drc->drc_featureflags & DMU_BACKUP_FEATURE_REDACTED) {
+		uint64_t num_ds_redact_snaps;
+		uint64_t *ds_redact_snaps;
+
+		uint_t num_stream_redact_snaps;
+		uint64_t *stream_redact_snaps;
+
+		if (nvlist_lookup_uint64_array(drc->drc_begin_nvl,
+		    BEGINNV_REDACT_SNAPS, &stream_redact_snaps,
+		    &num_stream_redact_snaps) != 0) {
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
+			return (SET_ERROR(EINVAL));
+		}
+
+		if (!dsl_dataset_get_uint64_array_feature(ds,
+		    SPA_FEATURE_REDACTED_DATASETS, &num_ds_redact_snaps,
+		    &ds_redact_snaps)) {
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
+			return (SET_ERROR(EINVAL));
+		}
+
+		for (int i = 0; i < num_ds_redact_snaps; i++) {
+			if (!redact_snaps_contains(ds_redact_snaps,
+			    num_ds_redact_snaps, stream_redact_snaps[i])) {
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
+				return (SET_ERROR(EINVAL));
+			}
+		}
+	}
+
+	error = recv_check_large_blocks(ds, drc->drc_featureflags);
+	if (error != 0) {
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
+		return (error);
+	}
+
 	dsl_dataset_rele_flags(ds, dsflags, FTAG);
 	return (0);
 }
@@ -704,17 +1108,14 @@
 	dmu_recv_begin_arg_t *drba = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	const char *tofs = drba->drba_cookie->drc_tofs;
-	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
-	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	uint64_t featureflags = drba->drba_cookie->drc_featureflags;
 	dsl_dataset_t *ds;
-	objset_t *os;
-	ds_hold_flags_t dsflags = 0;
-	uint64_t dsobj;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	/* 6 extra bytes for /%recv */
 	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
 
-	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
-	    tofs, recv_clone_name);
+	(void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs,
+	    recv_clone_name);
 
 	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
 		drba->drba_cookie->drc_raw = B_TRUE;
@@ -722,33 +1123,25 @@
 		dsflags |= DS_HOLD_FLAG_DECRYPT;
 	}
 
-	if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
+	if (dsl_dataset_own_force(dp, recvname, dsflags, dmu_recv_tag, &ds)
+	    != 0) {
 		/* %recv does not exist; continue in tofs */
-		VERIFY0(dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds));
+		VERIFY0(dsl_dataset_own_force(dp, tofs, dsflags, dmu_recv_tag,
+		    &ds));
 		drba->drba_cookie->drc_newfs = B_TRUE;
 	}
 
-	/* clear the inconsistent flag so that we can own it */
 	ASSERT(DS_IS_INCONSISTENT(ds));
-	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
-	dsobj = ds->ds_object;
-	dsl_dataset_rele_flags(ds, dsflags, FTAG);
-
-	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &ds));
-	VERIFY0(dmu_objset_from_ds(ds, &os));
-
-	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
-
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 	ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) ||
 	    drba->drba_cookie->drc_raw);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 
 	drba->drba_cookie->drc_ds = ds;
+	VERIFY0(dmu_objset_from_ds(ds, &drba->drba_cookie->drc_os));
+	drba->drba_cookie->drc_should_save = B_TRUE;
 
-	spa_history_log_internal_ds(ds, "resume receive", tx, "");
+	spa_history_log_internal_ds(ds, "resume receive", tx, " ");
 }
 
 /*
@@ -758,9 +1151,11 @@
 int
 dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
     boolean_t force, boolean_t resumable, nvlist_t *localprops,
-    nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc)
+    nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc,
+    zfs_file_t *fp, offset_t *voffp)
 {
 	dmu_recv_begin_arg_t drba = { 0 };
+	int err;
 
 	bzero(drc, sizeof (dmu_recv_cookie_t));
 	drc->drc_drr_begin = drr_begin;
@@ -770,6 +1165,7 @@
 	drc->drc_force = force;
 	drc->drc_resumable = resumable;
 	drc->drc_cred = CRED();
+	drc->drc_proc = curproc;
 	drc->drc_clone = (origin != NULL);
 
 	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
@@ -784,20 +1180,46 @@
 		return (SET_ERROR(EINVAL));
 	}
 
+	drc->drc_fp = fp;
+	drc->drc_voff = *voffp;
+	drc->drc_featureflags =
+	    DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
+
+	uint32_t payloadlen = drc->drc_drr_begin->drr_payloadlen;
+	void *payload = NULL;
+	if (payloadlen != 0)
+		payload = kmem_alloc(payloadlen, KM_SLEEP);
+
+	err = receive_read_payload_and_next_header(drc, payloadlen,
+	    payload);
+	if (err != 0) {
+		kmem_free(payload, payloadlen);
+		return (err);
+	}
+	if (payloadlen != 0) {
+		err = nvlist_unpack(payload, payloadlen, &drc->drc_begin_nvl,
+		    KM_SLEEP);
+		kmem_free(payload, payloadlen);
+		if (err != 0) {
+			kmem_free(drc->drc_next_rrd,
+			    sizeof (*drc->drc_next_rrd));
+			return (err);
+		}
+	}
+
 	if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)
 		drc->drc_spill = B_TRUE;
 
 	drba.drba_origin = origin;
 	drba.drba_cookie = drc;
 	drba.drba_cred = CRED();
+	drba.drba_proc = curproc;
 
-	if (DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo) &
-	    DMU_BACKUP_FEATURE_RESUMING) {
-		return (dsl_sync_task(tofs,
+	if (drc->drc_featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+		err = dsl_sync_task(tofs,
 		    dmu_recv_resume_begin_check, dmu_recv_resume_begin_sync,
-		    &drba, 5, ZFS_SPACE_CHECK_NORMAL));
-	} else  {
-		int err;
+		    &drba, 5, ZFS_SPACE_CHECK_NORMAL);
+	} else {
 
 		/*
 		 * For non-raw, non-incremental, non-resuming receives the
@@ -814,143 +1236,25 @@
 		    origin == NULL && drc->drc_drrb->drr_fromguid == 0) {
 			err = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
 			    localprops, hidden_args, &drba.drba_dcp);
-			if (err != 0)
-				return (err);
 		}
 
-		err = dsl_sync_task(tofs,
-		    dmu_recv_begin_check, dmu_recv_begin_sync,
-		    &drba, 5, ZFS_SPACE_CHECK_NORMAL);
-		dsl_crypto_params_free(drba.drba_dcp, !!err);
-
-		return (err);
-	}
-}
-
-struct receive_record_arg {
-	dmu_replay_record_t header;
-	void *payload; /* Pointer to a buffer containing the payload */
-	/*
-	 * If the record is a write, pointer to the arc_buf_t containing the
-	 * payload.
-	 */
-	arc_buf_t *arc_buf;
-	int payload_size;
-	uint64_t bytes_read; /* bytes read from stream when record created */
-	boolean_t eos_marker; /* Marks the end of the stream */
-	bqueue_node_t node;
-};
-
-struct receive_writer_arg {
-	objset_t *os;
-	boolean_t byteswap;
-	bqueue_t q;
-
-	/*
-	 * These three args are used to signal to the main thread that we're
-	 * done.
-	 */
-	kmutex_t mutex;
-	kcondvar_t cv;
-	boolean_t done;
-
-	int err;
-	/* A map from guid to dataset to help handle dedup'd streams. */
-	avl_tree_t *guid_to_ds_map;
-	boolean_t resumable;
-	boolean_t raw;   /* DMU_BACKUP_FEATURE_RAW set */
-	boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
-	uint64_t last_object;
-	uint64_t last_offset;
-	uint64_t max_object; /* highest object ID referenced in stream */
-	uint64_t bytes_read; /* bytes read when current record created */
-
-	/* Encryption parameters for the last received DRR_OBJECT_RANGE */
-	boolean_t or_crypt_params_present;
-	uint64_t or_firstobj;
-	uint64_t or_numslots;
-	uint8_t or_salt[ZIO_DATA_SALT_LEN];
-	uint8_t or_iv[ZIO_DATA_IV_LEN];
-	uint8_t or_mac[ZIO_DATA_MAC_LEN];
-	boolean_t or_byteorder;
-};
-
-struct objlist {
-	list_t list; /* List of struct receive_objnode. */
-	/*
-	 * Last object looked up. Used to assert that objects are being looked
-	 * up in ascending order.
-	 */
-	uint64_t last_lookup;
-};
-
-struct receive_objnode {
-	list_node_t node;
-	uint64_t object;
-};
-
-struct receive_arg  {
-	objset_t *os;
-	vnode_t *vp; /* The vnode to read the stream from */
-	uint64_t voff; /* The current offset in the stream */
-	uint64_t bytes_read;
-	/*
-	 * A record that has had its payload read in, but hasn't yet been handed
-	 * off to the worker thread.
-	 */
-	struct receive_record_arg *rrd;
-	/* A record that has had its header read in, but not its payload. */
-	struct receive_record_arg *next_rrd;
-	zio_cksum_t cksum;
-	zio_cksum_t prev_cksum;
-	int err;
-	boolean_t byteswap;
-	boolean_t raw;
-	uint64_t featureflags;
-	/* Sorted list of objects not to issue prefetches for. */
-	struct objlist ignore_objlist;
-};
-
-typedef struct guid_map_entry {
-	uint64_t	guid;
-	boolean_t	raw;
-	dsl_dataset_t	*gme_ds;
-	avl_node_t	avlnode;
-} guid_map_entry_t;
-
-static int
-guid_compare(const void *arg1, const void *arg2)
-{
-	const guid_map_entry_t *gmep1 = (const guid_map_entry_t *)arg1;
-	const guid_map_entry_t *gmep2 = (const guid_map_entry_t *)arg2;
-
-	return (AVL_CMP(gmep1->guid, gmep2->guid));
-}
-
-static void
-free_guid_map_onexit(void *arg)
-{
-	avl_tree_t *ca = arg;
-	void *cookie = NULL;
-	guid_map_entry_t *gmep;
-
-	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
-		ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT;
-
-		if (gmep->raw) {
-			gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE;
-			dsflags &= ~DS_HOLD_FLAG_DECRYPT;
+		if (err == 0) {
+			err = dsl_sync_task(tofs,
+			    dmu_recv_begin_check, dmu_recv_begin_sync,
+			    &drba, 5, ZFS_SPACE_CHECK_NORMAL);
+			dsl_crypto_params_free(drba.drba_dcp, !!err);
 		}
-
-		dsl_dataset_disown(gmep->gme_ds, dsflags, gmep);
-		kmem_free(gmep, sizeof (guid_map_entry_t));
 	}
-	avl_destroy(ca);
-	kmem_free(ca, sizeof (avl_tree_t));
+
+	if (err != 0) {
+		kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd));
+		nvlist_free(drc->drc_begin_nvl);
+	}
+	return (err);
 }
 
 static int
-receive_read(struct receive_arg *ra, int len, void *buf)
+receive_read(dmu_recv_cookie_t *drc, int len, void *buf)
 {
 	int done = 0;
 
@@ -959,132 +1263,33 @@
 	 * comment in dump_bytes.
 	 */
 	ASSERT(len % 8 == 0 ||
-	    (ra->featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
+	    (drc->drc_featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
 
 	while (done < len) {
 		ssize_t resid;
-
-		ra->err = vn_rdwr(UIO_READ, ra->vp,
-		    (char *)buf + done, len - done,
-		    ra->voff, UIO_SYSSPACE, FAPPEND,
-		    RLIM64_INFINITY, CRED(), &resid);
-
+		zfs_file_t *fp = drc->drc_fp;
+		int err = zfs_file_read(fp, (char *)buf + done,
+		    len - done, &resid);
 		if (resid == len - done) {
 			/*
-			 * Note: ECKSUM indicates that the receive
-			 * was interrupted and can potentially be resumed.
+			 * Note: ECKSUM or ZFS_ERR_STREAM_TRUNCATED indicates
+			 * that the receive was interrupted and can
+			 * potentially be resumed.
 			 */
-			ra->err = SET_ERROR(ECKSUM);
+			err = SET_ERROR(ZFS_ERR_STREAM_TRUNCATED);
 		}
-		ra->voff += len - done - resid;
+		drc->drc_voff += len - done - resid;
 		done = len - resid;
-		if (ra->err != 0)
-			return (ra->err);
+		if (err != 0)
+			return (err);
 	}
 
-	ra->bytes_read += len;
+	drc->drc_bytes_read += len;
 
 	ASSERT3U(done, ==, len);
 	return (0);
 }
 
-noinline static void
-byteswap_record(dmu_replay_record_t *drr)
-{
-#define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
-#define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
-	drr->drr_type = BSWAP_32(drr->drr_type);
-	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
-
-	switch (drr->drr_type) {
-	case DRR_BEGIN:
-		DO64(drr_begin.drr_magic);
-		DO64(drr_begin.drr_versioninfo);
-		DO64(drr_begin.drr_creation_time);
-		DO32(drr_begin.drr_type);
-		DO32(drr_begin.drr_flags);
-		DO64(drr_begin.drr_toguid);
-		DO64(drr_begin.drr_fromguid);
-		break;
-	case DRR_OBJECT:
-		DO64(drr_object.drr_object);
-		DO32(drr_object.drr_type);
-		DO32(drr_object.drr_bonustype);
-		DO32(drr_object.drr_blksz);
-		DO32(drr_object.drr_bonuslen);
-		DO32(drr_object.drr_raw_bonuslen);
-		DO64(drr_object.drr_toguid);
-		DO64(drr_object.drr_maxblkid);
-		break;
-	case DRR_FREEOBJECTS:
-		DO64(drr_freeobjects.drr_firstobj);
-		DO64(drr_freeobjects.drr_numobjs);
-		DO64(drr_freeobjects.drr_toguid);
-		break;
-	case DRR_WRITE:
-		DO64(drr_write.drr_object);
-		DO32(drr_write.drr_type);
-		DO64(drr_write.drr_offset);
-		DO64(drr_write.drr_logical_size);
-		DO64(drr_write.drr_toguid);
-		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
-		DO64(drr_write.drr_key.ddk_prop);
-		DO64(drr_write.drr_compressed_size);
-		break;
-	case DRR_WRITE_BYREF:
-		DO64(drr_write_byref.drr_object);
-		DO64(drr_write_byref.drr_offset);
-		DO64(drr_write_byref.drr_length);
-		DO64(drr_write_byref.drr_toguid);
-		DO64(drr_write_byref.drr_refguid);
-		DO64(drr_write_byref.drr_refobject);
-		DO64(drr_write_byref.drr_refoffset);
-		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
-		    drr_key.ddk_cksum);
-		DO64(drr_write_byref.drr_key.ddk_prop);
-		break;
-	case DRR_WRITE_EMBEDDED:
-		DO64(drr_write_embedded.drr_object);
-		DO64(drr_write_embedded.drr_offset);
-		DO64(drr_write_embedded.drr_length);
-		DO64(drr_write_embedded.drr_toguid);
-		DO32(drr_write_embedded.drr_lsize);
-		DO32(drr_write_embedded.drr_psize);
-		break;
-	case DRR_FREE:
-		DO64(drr_free.drr_object);
-		DO64(drr_free.drr_offset);
-		DO64(drr_free.drr_length);
-		DO64(drr_free.drr_toguid);
-		break;
-	case DRR_SPILL:
-		DO64(drr_spill.drr_object);
-		DO64(drr_spill.drr_length);
-		DO64(drr_spill.drr_toguid);
-		DO64(drr_spill.drr_compressed_size);
-		DO32(drr_spill.drr_type);
-		break;
-	case DRR_OBJECT_RANGE:
-		DO64(drr_object_range.drr_firstobj);
-		DO64(drr_object_range.drr_numslots);
-		DO64(drr_object_range.drr_toguid);
-		break;
-	case DRR_END:
-		DO64(drr_end.drr_toguid);
-		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
-		break;
-	default:
-		break;
-	}
-
-	if (drr->drr_type != DRR_BEGIN) {
-		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
-	}
-
-#undef DO64
-#undef DO32
-}
-
 static inline uint8_t
 deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
 {
@@ -1134,14 +1339,253 @@
 	rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
 }
 
+static int
+receive_object_is_same_generation(objset_t *os, uint64_t object,
+    dmu_object_type_t old_bonus_type, dmu_object_type_t new_bonus_type,
+    const void *new_bonus, boolean_t *samegenp)
+{
+	zfs_file_info_t zoi;
+	int err;
+
+	dmu_buf_t *old_bonus_dbuf;
+	err = dmu_bonus_hold(os, object, FTAG, &old_bonus_dbuf);
+	if (err != 0)
+		return (err);
+	err = dmu_get_file_info(os, old_bonus_type, old_bonus_dbuf->db_data,
+	    &zoi);
+	dmu_buf_rele(old_bonus_dbuf, FTAG);
+	if (err != 0)
+		return (err);
+	uint64_t old_gen = zoi.zfi_generation;
+
+	err = dmu_get_file_info(os, new_bonus_type, new_bonus, &zoi);
+	if (err != 0)
+		return (err);
+	uint64_t new_gen = zoi.zfi_generation;
+
+	*samegenp = (old_gen == new_gen);
+	return (0);
+}
+
+static int
+receive_handle_existing_object(const struct receive_writer_arg *rwa,
+    const struct drr_object *drro, const dmu_object_info_t *doi,
+    const void *bonus_data,
+    uint64_t *object_to_hold, uint32_t *new_blksz)
+{
+	uint32_t indblksz = drro->drr_indblkshift ?
+	    1ULL << drro->drr_indblkshift : 0;
+	int nblkptr = deduce_nblkptr(drro->drr_bonustype,
+	    drro->drr_bonuslen);
+	uint8_t dn_slots = drro->drr_dn_slots != 0 ?
+	    drro->drr_dn_slots : DNODE_MIN_SLOTS;
+	boolean_t do_free_range = B_FALSE;
+	int err;
+
+	*object_to_hold = drro->drr_object;
+
+	/* nblkptr should be bounded by the bonus size and type */
+	if (rwa->raw && nblkptr != drro->drr_nblkptr)
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * After the previous send stream, the sending system may
+	 * have freed this object, and then happened to re-allocate
+	 * this object number in a later txg. In this case, we are
+	 * receiving a different logical file, and the block size may
+	 * appear to be different.  i.e. we may have a different
+	 * block size for this object than what the send stream says.
+	 * In this case we need to remove the object's contents,
+	 * so that its structure can be changed and then its contents
+	 * entirely replaced by subsequent WRITE records.
+	 *
+	 * If this is a -L (--large-block) incremental stream, and
+	 * the previous stream was not -L, the block size may appear
+	 * to increase.  i.e. we may have a smaller block size for
+	 * this object than what the send stream says.  In this case
+	 * we need to keep the object's contents and block size
+	 * intact, so that we don't lose parts of the object's
+	 * contents that are not changed by this incremental send
+	 * stream.
+	 *
+	 * We can distinguish between the two above cases by using
+	 * the ZPL's generation number (see
+	 * receive_object_is_same_generation()).  However, we only
+	 * want to rely on the generation number when absolutely
+	 * necessary, because with raw receives, the generation is
+	 * encrypted.  We also want to minimize dependence on the
+	 * ZPL, so that other types of datasets can also be received
+	 * (e.g. ZVOLs, although note that ZVOLS currently do not
+	 * reallocate their objects or change their structure).
+	 * Therefore, we check a number of different cases where we
+	 * know it is safe to discard the object's contents, before
+	 * using the ZPL's generation number to make the above
+	 * distinction.
+	 */
+	if (drro->drr_blksz != doi->doi_data_block_size) {
+		if (rwa->raw) {
+			/*
+			 * RAW streams always have large blocks, so
+			 * we are sure that the data is not needed
+			 * due to changing --large-block to be on.
+			 * Which is fortunate since the bonus buffer
+			 * (which contains the ZPL generation) is
+			 * encrypted, and the key might not be
+			 * loaded.
+			 */
+			do_free_range = B_TRUE;
+		} else if (rwa->full) {
+			/*
+			 * This is a full send stream, so it always
+			 * replaces what we have.  Even if the
+			 * generation numbers happen to match, this
+			 * can not actually be the same logical file.
+			 * This is relevant when receiving a full
+			 * send as a clone.
+			 */
+			do_free_range = B_TRUE;
+		} else if (drro->drr_type !=
+		    DMU_OT_PLAIN_FILE_CONTENTS ||
+		    doi->doi_type != DMU_OT_PLAIN_FILE_CONTENTS) {
+			/*
+			 * PLAIN_FILE_CONTENTS are the only type of
+			 * objects that have ever been stored with
+			 * large blocks, so we don't need the special
+			 * logic below.  ZAP blocks can shrink (when
+			 * there's only one block), so we don't want
+			 * to hit the error below about block size
+			 * only increasing.
+			 */
+			do_free_range = B_TRUE;
+		} else if (doi->doi_max_offset <=
+		    doi->doi_data_block_size) {
+			/*
+			 * There is only one block.  We can free it,
+			 * because its contents will be replaced by a
+			 * WRITE record.  This can not be the no-L ->
+			 * -L case, because the no-L case would have
+			 * resulted in multiple blocks.  If we
+			 * supported -L -> no-L, it would not be safe
+			 * to free the file's contents.  Fortunately,
+			 * that is not allowed (see
+			 * recv_check_large_blocks()).
+			 */
+			do_free_range = B_TRUE;
+		} else {
+			boolean_t is_same_gen;
+			err = receive_object_is_same_generation(rwa->os,
+			    drro->drr_object, doi->doi_bonus_type,
+			    drro->drr_bonustype, bonus_data, &is_same_gen);
+			if (err != 0)
+				return (SET_ERROR(EINVAL));
+
+			if (is_same_gen) {
+				/*
+				 * This is the same logical file, and
+				 * the block size must be increasing.
+				 * It could only decrease if
+				 * --large-block was changed to be
+				 * off, which is checked in
+				 * recv_check_large_blocks().
+				 */
+				if (drro->drr_blksz <=
+				    doi->doi_data_block_size)
+					return (SET_ERROR(EINVAL));
+				/*
+				 * We keep the existing blocksize and
+				 * contents.
+				 */
+				*new_blksz =
+				    doi->doi_data_block_size;
+			} else {
+				do_free_range = B_TRUE;
+			}
+		}
+	}
+
+	/* nblkptr can only decrease if the object was reallocated */
+	if (nblkptr < doi->doi_nblkptr)
+		do_free_range = B_TRUE;
+
+	/* number of slots can only change on reallocation */
+	if (dn_slots != doi->doi_dnodesize >> DNODE_SHIFT)
+		do_free_range = B_TRUE;
+
+	/*
+	 * For raw sends we also check a few other fields to
+	 * ensure we are preserving the objset structure exactly
+	 * as it was on the receive side:
+	 *     - A changed indirect block size
+	 *     - A smaller nlevels
+	 */
+	if (rwa->raw) {
+		if (indblksz != doi->doi_metadata_block_size)
+			do_free_range = B_TRUE;
+		if (drro->drr_nlevels < doi->doi_indirection)
+			do_free_range = B_TRUE;
+	}
+
+	if (do_free_range) {
+		err = dmu_free_long_range(rwa->os, drro->drr_object,
+		    0, DMU_OBJECT_END);
+		if (err != 0)
+			return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * The dmu does not currently support decreasing nlevels or changing
+	 * indirect block size if there is already one, same as changing the
+	 * number of of dnode slots on an object.  For non-raw sends this
+	 * does not matter and the new object can just use the previous one's
+	 * parameters.  For raw sends, however, the structure of the received
+	 * dnode (including indirects and dnode slots) must match that of the
+	 * send side.  Therefore, instead of using dmu_object_reclaim(), we
+	 * must free the object completely and call dmu_object_claim_dnsize()
+	 * instead.
+	 */
+	if ((rwa->raw && ((doi->doi_indirection > 1 &&
+	    indblksz != doi->doi_metadata_block_size) ||
+	    drro->drr_nlevels < doi->doi_indirection)) ||
+	    dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
+		err = dmu_free_long_object(rwa->os, drro->drr_object);
+		if (err != 0)
+			return (SET_ERROR(EINVAL));
+
+		txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+		*object_to_hold = DMU_NEW_OBJECT;
+	}
+
+	/*
+	 * For raw receives, free everything beyond the new incoming
+	 * maxblkid. Normally this would be done with a DRR_FREE
+	 * record that would come after this DRR_OBJECT record is
+	 * processed. However, for raw receives we manually set the
+	 * maxblkid from the drr_maxblkid and so we must first free
+	 * everything above that blkid to ensure the DMU is always
+	 * consistent with itself. We will never free the first block
+	 * of the object here because a maxblkid of 0 could indicate
+	 * an object with a single block or one with no blocks. This
+	 * free may be skipped when dmu_free_long_range() was called
+	 * above since it covers the entire object's contents.
+	 */
+	if (rwa->raw && *object_to_hold != DMU_NEW_OBJECT && !do_free_range) {
+		err = dmu_free_long_range(rwa->os, drro->drr_object,
+		    (drro->drr_maxblkid + 1) * doi->doi_data_block_size,
+		    DMU_OBJECT_END);
+		if (err != 0)
+			return (SET_ERROR(EINVAL));
+	}
+	return (0);
+}
+
 noinline static int
 receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
     void *data)
 {
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
-	uint64_t object;
 	int err;
+	uint32_t new_blksz = drro->drr_blksz;
 	uint8_t dn_slots = drro->drr_dn_slots != 0 ?
 	    drro->drr_dn_slots : DNODE_MIN_SLOTS;
 
@@ -1156,7 +1600,7 @@
 	    drro->drr_bonuslen >
 	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os))) ||
 	    dn_slots >
-	    (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT))  {
+	    (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT)) {
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -1191,6 +1635,7 @@
 	}
 
 	err = dmu_object_info(rwa->os, drro->drr_object, &doi);
+
 	if (err != 0 && err != ENOENT && err != EEXIST)
 		return (SET_ERROR(EINVAL));
 
@@ -1204,86 +1649,10 @@
 	 * Raw receives will also check that the indirect structure of the
 	 * dnode hasn't changed.
 	 */
+	uint64_t object_to_hold;
 	if (err == 0) {
-		uint32_t indblksz = drro->drr_indblkshift ?
-		    1ULL << drro->drr_indblkshift : 0;
-		int nblkptr = deduce_nblkptr(drro->drr_bonustype,
-		    drro->drr_bonuslen);
-		boolean_t did_free = B_FALSE;
-
-		object = drro->drr_object;
-
-		/* nblkptr should be bounded by the bonus size and type */
-		if (rwa->raw && nblkptr != drro->drr_nblkptr)
-			return (SET_ERROR(EINVAL));
-
-		/*
-		 * Check for indicators that the object was freed and
-		 * reallocated. For all sends, these indicators are:
-		 *     - A changed block size
-		 *     - A smaller nblkptr
-		 *     - A changed dnode size
-		 * For raw sends we also check a few other fields to
-		 * ensure we are preserving the objset structure exactly
-		 * as it was on the receive side:
-		 *     - A changed indirect block size
-		 *     - A smaller nlevels
-		 */
-		if (drro->drr_blksz != doi.doi_data_block_size ||
-		    nblkptr < doi.doi_nblkptr ||
-		    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
-		    (rwa->raw &&
-		    (indblksz != doi.doi_metadata_block_size ||
-		    drro->drr_nlevels < doi.doi_indirection))) {
-			err = dmu_free_long_range(rwa->os,
-			    drro->drr_object, 0, DMU_OBJECT_END);
-			if (err != 0)
-				return (SET_ERROR(EINVAL));
-			else
-				did_free = B_TRUE;
-		}
-
-		/*
-		 * The dmu does not currently support decreasing nlevels
-		 * or changing the number of dnode slots on an object. For
-		 * non-raw sends, this does not matter and the new object
-		 * can just use the previous one's nlevels. For raw sends,
-		 * however, the structure of the received dnode (including
-		 * nlevels and dnode slots) must match that of the send
-		 * side. Therefore, instead of using dmu_object_reclaim(),
-		 * we must free the object completely and call
-		 * dmu_object_claim_dnsize() instead.
-		 */
-		if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
-		    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
-			err = dmu_free_long_object(rwa->os, drro->drr_object);
-			if (err != 0)
-				return (SET_ERROR(EINVAL));
-
-			txg_wait_synced(dmu_objset_pool(rwa->os), 0);
-			object = DMU_NEW_OBJECT;
-		}
-
-		/*
-		 * For raw receives, free everything beyond the new incoming
-		 * maxblkid. Normally this would be done with a DRR_FREE
-		 * record that would come after this DRR_OBJECT record is
-		 * processed. However, for raw receives we manually set the
-		 * maxblkid from the drr_maxblkid and so we must first free
-		 * everything above that blkid to ensure the DMU is always
-		 * consistent with itself. We will never free the first block
-		 * of the object here because a maxblkid of 0 could indicate
-		 * an object with a single block or one with no blocks. This
-		 * free may be skipped when dmu_free_long_range() was called
-		 * above since it covers the entire object's contents.
-		 */
-		if (rwa->raw && object != DMU_NEW_OBJECT && !did_free) {
-			err = dmu_free_long_range(rwa->os, drro->drr_object,
-			    (drro->drr_maxblkid + 1) * doi.doi_data_block_size,
-			    DMU_OBJECT_END);
-			if (err != 0)
-				return (SET_ERROR(EINVAL));
-		}
+		err = receive_handle_existing_object(rwa, drro, &doi, data,
+		    &object_to_hold, &new_blksz);
 	} else if (err == EEXIST) {
 		/*
 		 * The object requested is currently an interior slot of a
@@ -1298,12 +1667,24 @@
 			return (SET_ERROR(EINVAL));
 
 		/* object was freed and we are about to allocate a new one */
-		object = DMU_NEW_OBJECT;
+		object_to_hold = DMU_NEW_OBJECT;
 	} else {
+		/*
+		 * If the only record in this range so far was DRR_FREEOBJECTS
+		 * with at least one actually freed object, it's possible that
+		 * the block will now be converted to a hole. We need to wait
+		 * for the txg to sync to prevent races.
+		 */
+		if (rwa->or_need_sync == ORNS_YES)
+			txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+
 		/* object is free and we are about to allocate a new one */
-		object = DMU_NEW_OBJECT;
+		object_to_hold = DMU_NEW_OBJECT;
 	}
 
+	/* Only relevant for the first object in the range */
+	rwa->or_need_sync = ORNS_NO;
+
 	/*
 	 * If this is a multi-slot dnode there is a chance that this
 	 * object will expand into a slot that is already used by
@@ -1336,27 +1717,27 @@
 	}
 
 	tx = dmu_tx_create(rwa->os);
-	dmu_tx_hold_bonus(tx, object);
-	dmu_tx_hold_write(tx, object, 0, 0);
+	dmu_tx_hold_bonus(tx, object_to_hold);
+	dmu_tx_hold_write(tx, object_to_hold, 0, 0);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
-	if (object == DMU_NEW_OBJECT) {
+	if (object_to_hold == DMU_NEW_OBJECT) {
 		/* Currently free, wants to be allocated */
 		err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
-		    drro->drr_type, drro->drr_blksz,
+		    drro->drr_type, new_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
 		    dn_slots << DNODE_SHIFT, tx);
 	} else if (drro->drr_type != doi.doi_type ||
-	    drro->drr_blksz != doi.doi_data_block_size ||
+	    new_blksz != doi.doi_data_block_size ||
 	    drro->drr_bonustype != doi.doi_bonus_type ||
 	    drro->drr_bonuslen != doi.doi_bonus_size) {
 		/* Currently allocated, but with different properties */
 		err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
-		    drro->drr_type, drro->drr_blksz,
+		    drro->drr_type, new_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
 		    dn_slots << DNODE_SHIFT, rwa->spill ?
 		    DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
@@ -1422,6 +1803,7 @@
 		 * For non-new objects block size and indirect block
 		 * shift cannot change and nlevels can only increase.
 		 */
+		ASSERT3U(new_blksz, ==, drro->drr_blksz);
 		VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
 		    drro->drr_blksz, drro->drr_indblkshift, tx));
 		VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
@@ -1481,7 +1863,8 @@
 		return (SET_ERROR(EINVAL));
 
 	for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj;
-	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0;
+	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs &&
+	    obj < DN_MAX_OBJECT && next_err == 0;
 	    next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) {
 		dmu_object_info_t doi;
 		int err;
@@ -1497,21 +1880,198 @@
 		if (err != 0)
 			return (err);
 
-		if (obj > rwa->max_object)
-			rwa->max_object = obj;
+		if (rwa->or_need_sync == ORNS_MAYBE)
+			rwa->or_need_sync = ORNS_YES;
 	}
 	if (next_err != ESRCH)
 		return (next_err);
 	return (0);
 }
 
-noinline static int
-receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
-    arc_buf_t *abuf)
+/*
+ * Note: if this fails, the caller will clean up any records left on the
+ * rwa->write_batch list.
+ */
+static int
+flush_write_batch_impl(struct receive_writer_arg *rwa)
 {
-	int err;
-	dmu_tx_t *tx;
 	dnode_t *dn;
+	int err;
+
+	if (dnode_hold(rwa->os, rwa->last_object, FTAG, &dn) != 0)
+		return (SET_ERROR(EINVAL));
+
+	struct receive_record_arg *last_rrd = list_tail(&rwa->write_batch);
+	struct drr_write *last_drrw = &last_rrd->header.drr_u.drr_write;
+
+	struct receive_record_arg *first_rrd = list_head(&rwa->write_batch);
+	struct drr_write *first_drrw = &first_rrd->header.drr_u.drr_write;
+
+	ASSERT3U(rwa->last_object, ==, last_drrw->drr_object);
+	ASSERT3U(rwa->last_offset, ==, last_drrw->drr_offset);
+
+	dmu_tx_t *tx = dmu_tx_create(rwa->os);
+	dmu_tx_hold_write_by_dnode(tx, dn, first_drrw->drr_offset,
+	    last_drrw->drr_offset - first_drrw->drr_offset +
+	    last_drrw->drr_logical_size);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err != 0) {
+		dmu_tx_abort(tx);
+		dnode_rele(dn, FTAG);
+		return (err);
+	}
+
+	struct receive_record_arg *rrd;
+	while ((rrd = list_head(&rwa->write_batch)) != NULL) {
+		struct drr_write *drrw = &rrd->header.drr_u.drr_write;
+		abd_t *abd = rrd->abd;
+
+		ASSERT3U(drrw->drr_object, ==, rwa->last_object);
+
+		if (drrw->drr_logical_size != dn->dn_datablksz) {
+			/*
+			 * The WRITE record is larger than the object's block
+			 * size.  We must be receiving an incremental
+			 * large-block stream into a dataset that previously did
+			 * a non-large-block receive.  Lightweight writes must
+			 * be exactly one block, so we need to decompress the
+			 * data (if compressed) and do a normal dmu_write().
+			 */
+			ASSERT3U(drrw->drr_logical_size, >, dn->dn_datablksz);
+			if (DRR_WRITE_COMPRESSED(drrw)) {
+				abd_t *decomp_abd =
+				    abd_alloc_linear(drrw->drr_logical_size,
+				    B_FALSE);
+
+				err = zio_decompress_data(
+				    drrw->drr_compressiontype,
+				    abd, abd_to_buf(decomp_abd),
+				    abd_get_size(abd),
+				    abd_get_size(decomp_abd), NULL);
+
+				if (err == 0) {
+					dmu_write_by_dnode(dn,
+					    drrw->drr_offset,
+					    drrw->drr_logical_size,
+					    abd_to_buf(decomp_abd), tx);
+				}
+				abd_free(decomp_abd);
+			} else {
+				dmu_write_by_dnode(dn,
+				    drrw->drr_offset,
+				    drrw->drr_logical_size,
+				    abd_to_buf(abd), tx);
+			}
+			if (err == 0)
+				abd_free(abd);
+		} else {
+			zio_prop_t zp;
+			dmu_write_policy(rwa->os, dn, 0, 0, &zp);
+
+			enum zio_flag zio_flags = 0;
+
+			if (rwa->raw) {
+				zp.zp_encrypt = B_TRUE;
+				zp.zp_compress = drrw->drr_compressiontype;
+				zp.zp_byteorder = ZFS_HOST_BYTEORDER ^
+				    !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^
+				    rwa->byteswap;
+				bcopy(drrw->drr_salt, zp.zp_salt,
+				    ZIO_DATA_SALT_LEN);
+				bcopy(drrw->drr_iv, zp.zp_iv,
+				    ZIO_DATA_IV_LEN);
+				bcopy(drrw->drr_mac, zp.zp_mac,
+				    ZIO_DATA_MAC_LEN);
+				if (DMU_OT_IS_ENCRYPTED(zp.zp_type)) {
+					zp.zp_nopwrite = B_FALSE;
+					zp.zp_copies = MIN(zp.zp_copies,
+					    SPA_DVAS_PER_BP - 1);
+				}
+				zio_flags |= ZIO_FLAG_RAW;
+			} else if (DRR_WRITE_COMPRESSED(drrw)) {
+				ASSERT3U(drrw->drr_compressed_size, >, 0);
+				ASSERT3U(drrw->drr_logical_size, >=,
+				    drrw->drr_compressed_size);
+				zp.zp_compress = drrw->drr_compressiontype;
+				zio_flags |= ZIO_FLAG_RAW_COMPRESS;
+			} else if (rwa->byteswap) {
+				/*
+				 * Note: compressed blocks never need to be
+				 * byteswapped, because WRITE records for
+				 * metadata blocks are never compressed. The
+				 * exception is raw streams, which are written
+				 * in the original byteorder, and the byteorder
+				 * bit is preserved in the BP by setting
+				 * zp_byteorder above.
+				 */
+				dmu_object_byteswap_t byteswap =
+				    DMU_OT_BYTESWAP(drrw->drr_type);
+				dmu_ot_byteswap[byteswap].ob_func(
+				    abd_to_buf(abd),
+				    DRR_WRITE_PAYLOAD_SIZE(drrw));
+			}
+
+			/*
+			 * Since this data can't be read until the receive
+			 * completes, we can do a "lightweight" write for
+			 * improved performance.
+			 */
+			err = dmu_lightweight_write_by_dnode(dn,
+			    drrw->drr_offset, abd, &zp, zio_flags, tx);
+		}
+
+		if (err != 0) {
+			/*
+			 * This rrd is left on the list, so the caller will
+			 * free it (and the abd).
+			 */
+			break;
+		}
+
+		/*
+		 * Note: If the receive fails, we want the resume stream to
+		 * start with the same record that we last successfully
+		 * received (as opposed to the next record), so that we can
+		 * verify that we are resuming from the correct location.
+		 */
+		save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
+
+		list_remove(&rwa->write_batch, rrd);
+		kmem_free(rrd, sizeof (*rrd));
+	}
+
+	dmu_tx_commit(tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+noinline static int
+flush_write_batch(struct receive_writer_arg *rwa)
+{
+	if (list_is_empty(&rwa->write_batch))
+		return (0);
+	int err = rwa->err;
+	if (err == 0)
+		err = flush_write_batch_impl(rwa);
+	if (err != 0) {
+		struct receive_record_arg *rrd;
+		while ((rrd = list_remove_head(&rwa->write_batch)) != NULL) {
+			abd_free(rrd->abd);
+			kmem_free(rrd, sizeof (*rrd));
+		}
+	}
+	ASSERT(list_is_empty(&rwa->write_batch));
+	return (err);
+}
+
+noinline static int
+receive_process_write_record(struct receive_writer_arg *rwa,
+    struct receive_record_arg *rrd)
+{
+	int err = 0;
+
+	ASSERT3U(rrd->header.drr_type, ==, DRR_WRITE);
+	struct drr_write *drrw = &rrd->header.drr_u.drr_write;
 
 	if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
 	    !DMU_OT_IS_VALID(drrw->drr_type))
@@ -1526,127 +2086,31 @@
 	    drrw->drr_offset < rwa->last_offset)) {
 		return (SET_ERROR(EINVAL));
 	}
+
+	struct receive_record_arg *first_rrd = list_head(&rwa->write_batch);
+	struct drr_write *first_drrw = &first_rrd->header.drr_u.drr_write;
+	uint64_t batch_size =
+	    MIN(zfs_recv_write_batch_size, DMU_MAX_ACCESS / 2);
+	if (first_rrd != NULL &&
+	    (drrw->drr_object != first_drrw->drr_object ||
+	    drrw->drr_offset >= first_drrw->drr_offset + batch_size)) {
+		err = flush_write_batch(rwa);
+		if (err != 0)
+			return (err);
+	}
+
 	rwa->last_object = drrw->drr_object;
 	rwa->last_offset = drrw->drr_offset;
 
 	if (rwa->last_object > rwa->max_object)
 		rwa->max_object = rwa->last_object;
 
-	if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0)
-		return (SET_ERROR(EINVAL));
-
-	tx = dmu_tx_create(rwa->os);
-	dmu_tx_hold_write(tx, drrw->drr_object,
-	    drrw->drr_offset, drrw->drr_logical_size);
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err != 0) {
-		dmu_tx_abort(tx);
-		return (err);
-	}
-
-	if (rwa->byteswap && !arc_is_encrypted(abuf) &&
-	    arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
-		dmu_object_byteswap_t byteswap =
-		    DMU_OT_BYTESWAP(drrw->drr_type);
-		dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
-		    DRR_WRITE_PAYLOAD_SIZE(drrw));
-	}
-
-	VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn));
-	err = dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx);
-	if (err != 0) {
-		dnode_rele(dn, FTAG);
-		dmu_tx_commit(tx);
-		return (err);
-	}
-	dnode_rele(dn, FTAG);
-
+	list_insert_tail(&rwa->write_batch, rrd);
 	/*
-	 * Note: If the receive fails, we want the resume stream to start
-	 * with the same record that we last successfully received (as opposed
-	 * to the next record), so that we can verify that we are
-	 * resuming from the correct location.
+	 * Return EAGAIN to indicate that we will use this rrd again,
+	 * so the caller should not free it
 	 */
-	save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
-	dmu_tx_commit(tx);
-
-	return (0);
-}
-
-/*
- * Handle a DRR_WRITE_BYREF record.  This record is used in dedup'ed
- * streams to refer to a copy of the data that is already on the
- * system because it came in earlier in the stream.  This function
- * finds the earlier copy of the data, and uses that copy instead of
- * data from the stream to fulfill this write.
- */
-static int
-receive_write_byref(struct receive_writer_arg *rwa,
-    struct drr_write_byref *drrwbr)
-{
-	dmu_tx_t *tx;
-	int err;
-	guid_map_entry_t gmesrch;
-	guid_map_entry_t *gmep;
-	avl_index_t where;
-	objset_t *ref_os = NULL;
-	int flags = DMU_READ_PREFETCH;
-	dmu_buf_t *dbp;
-
-	if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
-		return (SET_ERROR(EINVAL));
-
-	/*
-	 * If the GUID of the referenced dataset is different from the
-	 * GUID of the target dataset, find the referenced dataset.
-	 */
-	if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
-		gmesrch.guid = drrwbr->drr_refguid;
-		if ((gmep = avl_find(rwa->guid_to_ds_map, &gmesrch,
-		    &where)) == NULL) {
-			return (SET_ERROR(EINVAL));
-		}
-		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
-			return (SET_ERROR(EINVAL));
-	} else {
-		ref_os = rwa->os;
-	}
-
-	if (drrwbr->drr_object > rwa->max_object)
-		rwa->max_object = drrwbr->drr_object;
-
-	if (rwa->raw)
-		flags |= DMU_READ_NO_DECRYPT;
-
-	/* may return either a regular db or an encrypted one */
-	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
-	    drrwbr->drr_refoffset, FTAG, &dbp, flags);
-	if (err != 0)
-		return (err);
-
-	tx = dmu_tx_create(rwa->os);
-
-	dmu_tx_hold_write(tx, drrwbr->drr_object,
-	    drrwbr->drr_offset, drrwbr->drr_length);
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err != 0) {
-		dmu_tx_abort(tx);
-		return (err);
-	}
-
-	if (rwa->raw) {
-		dmu_copy_from_buf(rwa->os, drrwbr->drr_object,
-		    drrwbr->drr_offset, dbp, tx);
-	} else {
-		dmu_write(rwa->os, drrwbr->drr_object,
-		    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
-	}
-	dmu_buf_rele(dbp, FTAG);
-
-	/* See comment in restore_write. */
-	save_resume_state(rwa, drrwbr->drr_object, drrwbr->drr_offset, tx);
-	dmu_tx_commit(tx);
-	return (0);
+	return (EAGAIN);
 }
 
 static int
@@ -1695,12 +2159,10 @@
 
 static int
 receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
-    arc_buf_t *abuf)
+    abd_t *abd)
 {
-	dmu_tx_t *tx;
 	dmu_buf_t *db, *db_spill;
 	int err;
-	uint32_t flags = 0;
 
 	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
 	    drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
@@ -1713,7 +2175,7 @@
 	 * the DRR_FLAG_SPILL_BLOCK flag.
 	 */
 	if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
-		dmu_return_arcbuf(abuf);
+		abd_free(abd);
 		return (0);
 	}
 
@@ -1722,8 +2184,6 @@
 		    drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
 		    drrs->drr_compressed_size == 0)
 			return (SET_ERROR(EINVAL));
-
-		flags |= DMU_READ_NO_DECRYPT;
 	}
 
 	if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0)
@@ -1739,7 +2199,7 @@
 		return (err);
 	}
 
-	tx = dmu_tx_create(rwa->os);
+	dmu_tx_t *tx = dmu_tx_create(rwa->os);
 
 	dmu_tx_hold_spill(tx, db->db_object);
 
@@ -1758,18 +2218,35 @@
 	 */
 	if (db_spill->db_size != drrs->drr_length) {
 		dmu_buf_will_fill(db_spill, tx);
-		VERIFY(0 == dbuf_spill_set_blksz(db_spill,
+		VERIFY0(dbuf_spill_set_blksz(db_spill,
 		    drrs->drr_length, tx));
 	}
 
-	if (rwa->byteswap && !arc_is_encrypted(abuf) &&
-	    arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
-		dmu_object_byteswap_t byteswap =
-		    DMU_OT_BYTESWAP(drrs->drr_type);
-		dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
-		    DRR_SPILL_PAYLOAD_SIZE(drrs));
+	arc_buf_t *abuf;
+	if (rwa->raw) {
+		boolean_t byteorder = ZFS_HOST_BYTEORDER ^
+		    !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^
+		    rwa->byteswap;
+
+		abuf = arc_loan_raw_buf(dmu_objset_spa(rwa->os),
+		    drrs->drr_object, byteorder, drrs->drr_salt,
+		    drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
+		    drrs->drr_compressed_size, drrs->drr_length,
+		    drrs->drr_compressiontype, 0);
+	} else {
+		abuf = arc_loan_buf(dmu_objset_spa(rwa->os),
+		    DMU_OT_IS_METADATA(drrs->drr_type),
+		    drrs->drr_length);
+		if (rwa->byteswap) {
+			dmu_object_byteswap_t byteswap =
+			    DMU_OT_BYTESWAP(drrs->drr_type);
+			dmu_ot_byteswap[byteswap].ob_func(abd_to_buf(abd),
+			    DRR_SPILL_PAYLOAD_SIZE(drrs));
+		}
 	}
 
+	bcopy(abd_to_buf(abd), abuf->b_data, DRR_SPILL_PAYLOAD_SIZE(drrs));
+	abd_free(abd);
 	dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
 
 	dmu_buf_rele(db, FTAG);
@@ -1785,7 +2262,7 @@
 {
 	int err;
 
-	if (drrf->drr_length != DMU_OBJECT_END &&
+	if (drrf->drr_length != -1ULL &&
 	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
 		return (SET_ERROR(EINVAL));
 
@@ -1847,16 +2324,35 @@
 	bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN);
 	rwa->or_byteorder = byteorder;
 
+	rwa->or_need_sync = ORNS_MAYBE;
+
 	return (0);
 }
 
+/*
+ * Until we have the ability to redact large ranges of data efficiently, we
+ * process these records as frees.
+ */
+/* ARGSUSED */
+noinline static int
+receive_redact(struct receive_writer_arg *rwa, struct drr_redact *drrr)
+{
+	struct drr_free drrf = {0};
+	drrf.drr_length = drrr->drr_length;
+	drrf.drr_object = drrr->drr_object;
+	drrf.drr_offset = drrr->drr_offset;
+	drrf.drr_toguid = drrr->drr_toguid;
+	return (receive_free(rwa, &drrf));
+}
+
 /* used to destroy the drc_ds on error */
 static void
 dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
 {
 	dsl_dataset_t *ds = drc->drc_ds;
-	ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
+	ds_hold_flags_t dsflags;
 
+	dsflags = (drc->drc_raw) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
 	/*
 	 * Wait for the txg sync before cleaning up the receive. For
 	 * resumable receives, this ensures that our resume state has
@@ -1868,7 +2364,8 @@
 	ds->ds_objset->os_raw_receive = B_FALSE;
 
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-	if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
+	if (drc->drc_resumable && drc->drc_should_save &&
+	    !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
 		rrw_exit(&ds->ds_bp_rwlock, FTAG);
 		dsl_dataset_disown(ds, dsflags, dmu_recv_tag);
 	} else {
@@ -1881,61 +2378,60 @@
 }
 
 static void
-receive_cksum(struct receive_arg *ra, int len, void *buf)
+receive_cksum(dmu_recv_cookie_t *drc, int len, void *buf)
 {
-	if (ra->byteswap) {
-		(void) fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
+	if (drc->drc_byteswap) {
+		(void) fletcher_4_incremental_byteswap(buf, len,
+		    &drc->drc_cksum);
 	} else {
-		(void) fletcher_4_incremental_native(buf, len, &ra->cksum);
+		(void) fletcher_4_incremental_native(buf, len, &drc->drc_cksum);
 	}
 }
 
 /*
  * Read the payload into a buffer of size len, and update the current record's
  * payload field.
- * Allocate ra->next_rrd and read the next record's header into
- * ra->next_rrd->header.
+ * Allocate drc->drc_next_rrd and read the next record's header into
+ * drc->drc_next_rrd->header.
  * Verify checksum of payload and next record.
  */
 static int
-receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
+receive_read_payload_and_next_header(dmu_recv_cookie_t *drc, int len, void *buf)
 {
 	int err;
-	zio_cksum_t cksum_orig;
-	zio_cksum_t *cksump;
 
 	if (len != 0) {
 		ASSERT3U(len, <=, SPA_MAXBLOCKSIZE);
-		err = receive_read(ra, len, buf);
+		err = receive_read(drc, len, buf);
 		if (err != 0)
 			return (err);
-		receive_cksum(ra, len, buf);
+		receive_cksum(drc, len, buf);
 
 		/* note: rrd is NULL when reading the begin record's payload */
-		if (ra->rrd != NULL) {
-			ra->rrd->payload = buf;
-			ra->rrd->payload_size = len;
-			ra->rrd->bytes_read = ra->bytes_read;
+		if (drc->drc_rrd != NULL) {
+			drc->drc_rrd->payload = buf;
+			drc->drc_rrd->payload_size = len;
+			drc->drc_rrd->bytes_read = drc->drc_bytes_read;
 		}
 	} else {
 		ASSERT3P(buf, ==, NULL);
 	}
 
-	ra->prev_cksum = ra->cksum;
+	drc->drc_prev_cksum = drc->drc_cksum;
 
-	ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
-	err = receive_read(ra, sizeof (ra->next_rrd->header),
-	    &ra->next_rrd->header);
-	ra->next_rrd->bytes_read = ra->bytes_read;
+	drc->drc_next_rrd = kmem_zalloc(sizeof (*drc->drc_next_rrd), KM_SLEEP);
+	err = receive_read(drc, sizeof (drc->drc_next_rrd->header),
+	    &drc->drc_next_rrd->header);
+	drc->drc_next_rrd->bytes_read = drc->drc_bytes_read;
 
 	if (err != 0) {
-		kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
-		ra->next_rrd = NULL;
+		kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd));
+		drc->drc_next_rrd = NULL;
 		return (err);
 	}
-	if (ra->next_rrd->header.drr_type == DRR_BEGIN) {
-		kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
-		ra->next_rrd = NULL;
+	if (drc->drc_next_rrd->header.drr_type == DRR_BEGIN) {
+		kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd));
+		drc->drc_next_rrd = NULL;
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -1945,90 +2441,30 @@
 	 */
 	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
 	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
-	receive_cksum(ra,
+	receive_cksum(drc,
 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
-	    &ra->next_rrd->header);
+	    &drc->drc_next_rrd->header);
 
-	cksum_orig = ra->next_rrd->header.drr_u.drr_checksum.drr_checksum;
-	cksump = &ra->next_rrd->header.drr_u.drr_checksum.drr_checksum;
+	zio_cksum_t cksum_orig =
+	    drc->drc_next_rrd->header.drr_u.drr_checksum.drr_checksum;
+	zio_cksum_t *cksump =
+	    &drc->drc_next_rrd->header.drr_u.drr_checksum.drr_checksum;
 
-	if (ra->byteswap)
-		byteswap_record(&ra->next_rrd->header);
+	if (drc->drc_byteswap)
+		byteswap_record(&drc->drc_next_rrd->header);
 
 	if ((!ZIO_CHECKSUM_IS_ZERO(cksump)) &&
-	    !ZIO_CHECKSUM_EQUAL(ra->cksum, *cksump)) {
-		kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
-		ra->next_rrd = NULL;
+	    !ZIO_CHECKSUM_EQUAL(drc->drc_cksum, *cksump)) {
+		kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd));
+		drc->drc_next_rrd = NULL;
 		return (SET_ERROR(ECKSUM));
 	}
 
-	receive_cksum(ra, sizeof (cksum_orig), &cksum_orig);
+	receive_cksum(drc, sizeof (cksum_orig), &cksum_orig);
 
 	return (0);
 }
 
-static void
-objlist_create(struct objlist *list)
-{
-	list_create(&list->list, sizeof (struct receive_objnode),
-	    offsetof(struct receive_objnode, node));
-	list->last_lookup = 0;
-}
-
-static void
-objlist_destroy(struct objlist *list)
-{
-	for (struct receive_objnode *n = list_remove_head(&list->list);
-	    n != NULL; n = list_remove_head(&list->list)) {
-		kmem_free(n, sizeof (*n));
-	}
-	list_destroy(&list->list);
-}
-
-/*
- * This function looks through the objlist to see if the specified object number
- * is contained in the objlist.  In the process, it will remove all object
- * numbers in the list that are smaller than the specified object number.  Thus,
- * any lookup of an object number smaller than a previously looked up object
- * number will always return false; therefore, all lookups should be done in
- * ascending order.
- */
-static boolean_t
-objlist_exists(struct objlist *list, uint64_t object)
-{
-	struct receive_objnode *node = list_head(&list->list);
-	ASSERT3U(object, >=, list->last_lookup);
-	list->last_lookup = object;
-	while (node != NULL && node->object < object) {
-		VERIFY3P(node, ==, list_remove_head(&list->list));
-		kmem_free(node, sizeof (*node));
-		node = list_head(&list->list);
-	}
-	return (node != NULL && node->object == object);
-}
-
-/*
- * The objlist is a list of object numbers stored in ascending order.  However,
- * the insertion of new object numbers does not seek out the correct location to
- * store a new object number; instead, it appends it to the list for simplicity.
- * Thus, any users must take care to only insert new object numbers in ascending
- * order.
- */
-static void
-objlist_insert(struct objlist *list, uint64_t object)
-{
-	struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
-	node->object = object;
-#ifdef ZFS_DEBUG
-	{
-	struct receive_objnode *last_object = list_tail(&list->list);
-	uint64_t last_objnum = (last_object != NULL ? last_object->object : 0);
-	ASSERT3U(node->object, >, last_objnum);
-	}
-#endif
-	list_insert_tail(&list->list, node);
-}
-
 /*
  * Issue the prefetch reads for any necessary indirect blocks.
  *
@@ -2048,11 +2484,11 @@
  */
 /* ARGSUSED */
 static void
-receive_read_prefetch(struct receive_arg *ra,
-    uint64_t object, uint64_t offset, uint64_t length)
+receive_read_prefetch(dmu_recv_cookie_t *drc, uint64_t object, uint64_t offset,
+    uint64_t length)
 {
-	if (!objlist_exists(&ra->ignore_objlist, object)) {
-		dmu_prefetch(ra->os, object, 1, offset, length,
+	if (!objlist_exists(drc->drc_ignore_objlist, object)) {
+		dmu_prefetch(drc->drc_os, object, 1, offset, length,
 		    ZIO_PRIORITY_SYNC_READ);
 	}
 }
@@ -2061,14 +2497,15 @@
  * Read records off the stream, issuing any necessary prefetches.
  */
 static int
-receive_read_record(struct receive_arg *ra)
+receive_read_record(dmu_recv_cookie_t *drc)
 {
 	int err;
 
-	switch (ra->rrd->header.drr_type) {
+	switch (drc->drc_rrd->header.drr_type) {
 	case DRR_OBJECT:
 	{
-		struct drr_object *drro = &ra->rrd->header.drr_u.drr_object;
+		struct drr_object *drro =
+		    &drc->drc_rrd->header.drr_u.drr_object;
 		uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro);
 		void *buf = NULL;
 		dmu_object_info_t doi;
@@ -2076,153 +2513,106 @@
 		if (size != 0)
 			buf = kmem_zalloc(size, KM_SLEEP);
 
-		err = receive_read_payload_and_next_header(ra, size, buf);
+		err = receive_read_payload_and_next_header(drc, size, buf);
 		if (err != 0) {
 			kmem_free(buf, size);
 			return (err);
 		}
-		err = dmu_object_info(ra->os, drro->drr_object, &doi);
+		err = dmu_object_info(drc->drc_os, drro->drr_object, &doi);
 		/*
 		 * See receive_read_prefetch for an explanation why we're
 		 * storing this object in the ignore_obj_list.
 		 */
 		if (err == ENOENT || err == EEXIST ||
 		    (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
-			objlist_insert(&ra->ignore_objlist, drro->drr_object);
+			objlist_insert(drc->drc_ignore_objlist,
+			    drro->drr_object);
 			err = 0;
 		}
 		return (err);
 	}
 	case DRR_FREEOBJECTS:
 	{
-		err = receive_read_payload_and_next_header(ra, 0, NULL);
+		err = receive_read_payload_and_next_header(drc, 0, NULL);
 		return (err);
 	}
 	case DRR_WRITE:
 	{
-		struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write;
-		arc_buf_t *abuf;
-		boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type);
-
-		if (ra->raw) {
-			boolean_t byteorder = ZFS_HOST_BYTEORDER ^
-			    !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^
-			    ra->byteswap;
-
-			abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os),
-			    drrw->drr_object, byteorder, drrw->drr_salt,
-			    drrw->drr_iv, drrw->drr_mac, drrw->drr_type,
-			    drrw->drr_compressed_size, drrw->drr_logical_size,
-			    drrw->drr_compressiontype);
-		} else if (DRR_WRITE_COMPRESSED(drrw)) {
-			ASSERT3U(drrw->drr_compressed_size, >, 0);
-			ASSERT3U(drrw->drr_logical_size, >=,
-			    drrw->drr_compressed_size);
-			ASSERT(!is_meta);
-			abuf = arc_loan_compressed_buf(
-			    dmu_objset_spa(ra->os),
-			    drrw->drr_compressed_size, drrw->drr_logical_size,
-			    drrw->drr_compressiontype);
-		} else {
-			abuf = arc_loan_buf(dmu_objset_spa(ra->os),
-			    is_meta, drrw->drr_logical_size);
-		}
-
-		err = receive_read_payload_and_next_header(ra,
-		    DRR_WRITE_PAYLOAD_SIZE(drrw), abuf->b_data);
+		struct drr_write *drrw = &drc->drc_rrd->header.drr_u.drr_write;
+		int size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+		abd_t *abd = abd_alloc_linear(size, B_FALSE);
+		err = receive_read_payload_and_next_header(drc, size,
+		    abd_to_buf(abd));
 		if (err != 0) {
-			dmu_return_arcbuf(abuf);
+			abd_free(abd);
 			return (err);
 		}
-		ra->rrd->arc_buf = abuf;
-		receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset,
+		drc->drc_rrd->abd = abd;
+		receive_read_prefetch(drc, drrw->drr_object, drrw->drr_offset,
 		    drrw->drr_logical_size);
 		return (err);
 	}
-	case DRR_WRITE_BYREF:
-	{
-		struct drr_write_byref *drrwb =
-		    &ra->rrd->header.drr_u.drr_write_byref;
-		err = receive_read_payload_and_next_header(ra, 0, NULL);
-		receive_read_prefetch(ra, drrwb->drr_object, drrwb->drr_offset,
-		    drrwb->drr_length);
-		return (err);
-	}
 	case DRR_WRITE_EMBEDDED:
 	{
 		struct drr_write_embedded *drrwe =
-		    &ra->rrd->header.drr_u.drr_write_embedded;
+		    &drc->drc_rrd->header.drr_u.drr_write_embedded;
 		uint32_t size = P2ROUNDUP(drrwe->drr_psize, 8);
 		void *buf = kmem_zalloc(size, KM_SLEEP);
 
-		err = receive_read_payload_and_next_header(ra, size, buf);
+		err = receive_read_payload_and_next_header(drc, size, buf);
 		if (err != 0) {
 			kmem_free(buf, size);
 			return (err);
 		}
 
-		receive_read_prefetch(ra, drrwe->drr_object, drrwe->drr_offset,
+		receive_read_prefetch(drc, drrwe->drr_object, drrwe->drr_offset,
 		    drrwe->drr_length);
 		return (err);
 	}
 	case DRR_FREE:
+	case DRR_REDACT:
 	{
 		/*
 		 * It might be beneficial to prefetch indirect blocks here, but
 		 * we don't really have the data to decide for sure.
 		 */
-		err = receive_read_payload_and_next_header(ra, 0, NULL);
+		err = receive_read_payload_and_next_header(drc, 0, NULL);
 		return (err);
 	}
 	case DRR_END:
 	{
-		struct drr_end *drre = &ra->rrd->header.drr_u.drr_end;
-		if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
+		struct drr_end *drre = &drc->drc_rrd->header.drr_u.drr_end;
+		if (!ZIO_CHECKSUM_EQUAL(drc->drc_prev_cksum,
+		    drre->drr_checksum))
 			return (SET_ERROR(ECKSUM));
 		return (0);
 	}
 	case DRR_SPILL:
 	{
-		struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill;
-		arc_buf_t *abuf;
-		int len = DRR_SPILL_PAYLOAD_SIZE(drrs);
-
-		/* DRR_SPILL records are either raw or uncompressed */
-		if (ra->raw) {
-			boolean_t byteorder = ZFS_HOST_BYTEORDER ^
-			    !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^
-			    ra->byteswap;
-
-			abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os),
-			    dmu_objset_id(ra->os), byteorder, drrs->drr_salt,
-			    drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
-			    drrs->drr_compressed_size, drrs->drr_length,
-			    drrs->drr_compressiontype);
-		} else {
-			abuf = arc_loan_buf(dmu_objset_spa(ra->os),
-			    DMU_OT_IS_METADATA(drrs->drr_type),
-			    drrs->drr_length);
-		}
-
-		err = receive_read_payload_and_next_header(ra, len,
-		    abuf->b_data);
-		if (err != 0) {
-			dmu_return_arcbuf(abuf);
-			return (err);
-		}
-		ra->rrd->arc_buf = abuf;
+		struct drr_spill *drrs = &drc->drc_rrd->header.drr_u.drr_spill;
+		int size = DRR_SPILL_PAYLOAD_SIZE(drrs);
+		abd_t *abd = abd_alloc_linear(size, B_FALSE);
+		err = receive_read_payload_and_next_header(drc, size,
+		    abd_to_buf(abd));
+		if (err != 0)
+			abd_free(abd);
+		else
+			drc->drc_rrd->abd = abd;
 		return (err);
 	}
 	case DRR_OBJECT_RANGE:
 	{
-		err = receive_read_payload_and_next_header(ra, 0, NULL);
+		err = receive_read_payload_and_next_header(drc, 0, NULL);
 		return (err);
+
 	}
 	default:
 		return (SET_ERROR(EINVAL));
 	}
 }
 
+
+
 static void
 dprintf_drr(struct receive_record_arg *rrd, int err)
 {
@@ -2234,8 +2624,8 @@
 		dprintf("drr_type = OBJECT obj = %llu type = %u "
 		    "bonustype = %u blksz = %u bonuslen = %u cksumtype = %u "
 		    "compress = %u dn_slots = %u err = %d\n",
-		    drro->drr_object, drro->drr_type,  drro->drr_bonustype,
-		    drro->drr_blksz, drro->drr_bonuslen,
+		    (u_longlong_t)drro->drr_object, drro->drr_type,
+		    drro->drr_bonustype, drro->drr_blksz, drro->drr_bonuslen,
 		    drro->drr_checksumtype, drro->drr_compress,
 		    drro->drr_dn_slots, err);
 		break;
@@ -2246,7 +2636,8 @@
 		    &rrd->header.drr_u.drr_freeobjects;
 		dprintf("drr_type = FREEOBJECTS firstobj = %llu "
 		    "numobjs = %llu err = %d\n",
-		    drrfo->drr_firstobj, drrfo->drr_numobjs, err);
+		    (u_longlong_t)drrfo->drr_firstobj,
+		    (u_longlong_t)drrfo->drr_numobjs, err);
 		break;
 	}
 	case DRR_WRITE:
@@ -2255,10 +2646,12 @@
 		dprintf("drr_type = WRITE obj = %llu type = %u offset = %llu "
 		    "lsize = %llu cksumtype = %u flags = %u "
 		    "compress = %u psize = %llu err = %d\n",
-		    drrw->drr_object, drrw->drr_type, drrw->drr_offset,
-		    drrw->drr_logical_size, drrw->drr_checksumtype,
-		    drrw->drr_flags, drrw->drr_compressiontype,
-		    drrw->drr_compressed_size, err);
+		    (u_longlong_t)drrw->drr_object, drrw->drr_type,
+		    (u_longlong_t)drrw->drr_offset,
+		    (u_longlong_t)drrw->drr_logical_size,
+		    drrw->drr_checksumtype, drrw->drr_flags,
+		    drrw->drr_compressiontype,
+		    (u_longlong_t)drrw->drr_compressed_size, err);
 		break;
 	}
 	case DRR_WRITE_BYREF:
@@ -2269,11 +2662,14 @@
 		    "length = %llu toguid = %llx refguid = %llx "
 		    "refobject = %llu refoffset = %llu cksumtype = %u "
 		    "flags = %u err = %d\n",
-		    drrwbr->drr_object, drrwbr->drr_offset,
-		    drrwbr->drr_length, drrwbr->drr_toguid,
-		    drrwbr->drr_refguid, drrwbr->drr_refobject,
-		    drrwbr->drr_refoffset, drrwbr->drr_checksumtype,
-		    drrwbr->drr_flags, err);
+		    (u_longlong_t)drrwbr->drr_object,
+		    (u_longlong_t)drrwbr->drr_offset,
+		    (u_longlong_t)drrwbr->drr_length,
+		    (u_longlong_t)drrwbr->drr_toguid,
+		    (u_longlong_t)drrwbr->drr_refguid,
+		    (u_longlong_t)drrwbr->drr_refobject,
+		    (u_longlong_t)drrwbr->drr_refoffset,
+		    drrwbr->drr_checksumtype, drrwbr->drr_flags, err);
 		break;
 	}
 	case DRR_WRITE_EMBEDDED:
@@ -2283,7 +2679,9 @@
 		dprintf("drr_type = WRITE_EMBEDDED obj = %llu offset = %llu "
 		    "length = %llu compress = %u etype = %u lsize = %u "
 		    "psize = %u err = %d\n",
-		    drrwe->drr_object, drrwe->drr_offset, drrwe->drr_length,
+		    (u_longlong_t)drrwe->drr_object,
+		    (u_longlong_t)drrwe->drr_offset,
+		    (u_longlong_t)drrwe->drr_length,
 		    drrwe->drr_compression, drrwe->drr_etype,
 		    drrwe->drr_lsize, drrwe->drr_psize, err);
 		break;
@@ -2293,7 +2691,9 @@
 		struct drr_free *drrf = &rrd->header.drr_u.drr_free;
 		dprintf("drr_type = FREE obj = %llu offset = %llu "
 		    "length = %lld err = %d\n",
-		    drrf->drr_object, drrf->drr_offset, drrf->drr_length,
+		    (u_longlong_t)drrf->drr_object,
+		    (u_longlong_t)drrf->drr_offset,
+		    (longlong_t)drrf->drr_length,
 		    err);
 		break;
 	}
@@ -2301,7 +2701,8 @@
 	{
 		struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
 		dprintf("drr_type = SPILL obj = %llu length = %llu "
-		    "err = %d\n", drrs->drr_object, drrs->drr_length, err);
+		    "err = %d\n", (u_longlong_t)drrs->drr_object,
+		    (u_longlong_t)drrs->drr_length, err);
 		break;
 	}
 	case DRR_OBJECT_RANGE:
@@ -2310,7 +2711,8 @@
 		    &rrd->header.drr_u.drr_object_range;
 		dprintf("drr_type = OBJECT_RANGE firstobj = %llu "
 		    "numslots = %llu flags = %u err = %d\n",
-		    drror->drr_firstobj, drror->drr_numslots,
+		    (u_longlong_t)drror->drr_firstobj,
+		    (u_longlong_t)drror->drr_numslots,
 		    drror->drr_flags, err);
 		break;
 	}
@@ -2333,6 +2735,22 @@
 	ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read);
 	rwa->bytes_read = rrd->bytes_read;
 
+	if (rrd->header.drr_type != DRR_WRITE) {
+		err = flush_write_batch(rwa);
+		if (err != 0) {
+			if (rrd->abd != NULL) {
+				abd_free(rrd->abd);
+				rrd->abd = NULL;
+				rrd->payload = NULL;
+			} else if (rrd->payload != NULL) {
+				kmem_free(rrd->payload, rrd->payload_size);
+				rrd->payload = NULL;
+			}
+
+			return (err);
+		}
+	}
+
 	switch (rrd->header.drr_type) {
 	case DRR_OBJECT:
 	{
@@ -2351,20 +2769,17 @@
 	}
 	case DRR_WRITE:
 	{
-		struct drr_write *drrw = &rrd->header.drr_u.drr_write;
-		err = receive_write(rwa, drrw, rrd->arc_buf);
-		/* if receive_write() is successful, it consumes the arc_buf */
-		if (err != 0)
-			dmu_return_arcbuf(rrd->arc_buf);
-		rrd->arc_buf = NULL;
-		rrd->payload = NULL;
-		break;
-	}
-	case DRR_WRITE_BYREF:
-	{
-		struct drr_write_byref *drrwbr =
-		    &rrd->header.drr_u.drr_write_byref;
-		err = receive_write_byref(rwa, drrwbr);
+		err = receive_process_write_record(rwa, rrd);
+		if (err != EAGAIN) {
+			/*
+			 * On success, receive_process_write_record() returns
+			 * EAGAIN to indicate that we do not want to free
+			 * the rrd or arc_buf.
+			 */
+			ASSERT(err != 0);
+			abd_free(rrd->abd);
+			rrd->abd = NULL;
+		}
 		break;
 	}
 	case DRR_WRITE_EMBEDDED:
@@ -2385,11 +2800,10 @@
 	case DRR_SPILL:
 	{
 		struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
-		err = receive_spill(rwa, drrs, rrd->arc_buf);
-		/* if receive_spill() is successful, it consumes the arc_buf */
+		err = receive_spill(rwa, drrs, rrd->abd);
 		if (err != 0)
-			dmu_return_arcbuf(rrd->arc_buf);
-		rrd->arc_buf = NULL;
+			abd_free(rrd->abd);
+		rrd->abd = NULL;
 		rrd->payload = NULL;
 		break;
 	}
@@ -2400,6 +2814,12 @@
 		err = receive_object_range(rwa, drror);
 		break;
 	}
+	case DRR_REDACT:
+	{
+		struct drr_redact *drrr = &rrd->header.drr_u.drr_redact;
+		err = receive_redact(rwa, drrr);
+		break;
+	}
 	default:
 		err = (SET_ERROR(EINVAL));
 	}
@@ -2428,19 +2848,34 @@
 		 * on the queue, but we need to clear everything in it before we
 		 * can exit.
 		 */
+		int err = 0;
 		if (rwa->err == 0) {
-			rwa->err = receive_process_record(rwa, rrd);
-		} else if (rrd->arc_buf != NULL) {
-			dmu_return_arcbuf(rrd->arc_buf);
-			rrd->arc_buf = NULL;
+			err = receive_process_record(rwa, rrd);
+		} else if (rrd->abd != NULL) {
+			abd_free(rrd->abd);
+			rrd->abd = NULL;
 			rrd->payload = NULL;
 		} else if (rrd->payload != NULL) {
 			kmem_free(rrd->payload, rrd->payload_size);
 			rrd->payload = NULL;
 		}
-		kmem_free(rrd, sizeof (*rrd));
+		/*
+		 * EAGAIN indicates that this record has been saved (on
+		 * raw->write_batch), and will be used again, so we don't
+		 * free it.
+		 */
+		if (err != EAGAIN) {
+			if (rwa->err == 0)
+				rwa->err = err;
+			kmem_free(rrd, sizeof (*rrd));
+		}
 	}
 	kmem_free(rrd, sizeof (*rrd));
+
+	int err = flush_write_batch(rwa);
+	if (rwa->err == 0)
+		rwa->err = err;
+
 	mutex_enter(&rwa->mutex);
 	rwa->done = B_TRUE;
 	cv_signal(&rwa->cv);
@@ -2450,11 +2885,11 @@
 }
 
 static int
-resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
+resume_check(dmu_recv_cookie_t *drc, nvlist_t *begin_nvl)
 {
 	uint64_t val;
-	objset_t *mos = dmu_objset_pool(ra->os)->dp_meta_objset;
-	uint64_t dsobj = dmu_objset_id(ra->os);
+	objset_t *mos = dmu_objset_pool(drc->drc_os)->dp_meta_objset;
+	uint64_t dsobj = dmu_objset_id(drc->drc_os);
 	uint64_t resume_obj, resume_off;
 
 	if (nvlist_lookup_uint64(begin_nvl,
@@ -2488,113 +2923,39 @@
  * NB: callers *must* call dmu_recv_end() if this succeeds.
  */
 int
-dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
-    int cleanup_fd, uint64_t *action_handlep)
+dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
 {
 	int err = 0;
-	struct receive_arg *ra;
-	struct receive_writer_arg *rwa;
-	int featureflags;
-	uint32_t payloadlen;
-	void *payload;
-	nvlist_t *begin_nvl = NULL;
+	struct receive_writer_arg *rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP);
 
-	ra = kmem_zalloc(sizeof (*ra), KM_SLEEP);
-	rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP);
-
-	ra->byteswap = drc->drc_byteswap;
-	ra->raw = drc->drc_raw;
-	ra->cksum = drc->drc_cksum;
-	ra->vp = vp;
-	ra->voff = *voffp;
-
-	if (dsl_dataset_is_zapified(drc->drc_ds)) {
+	if (dsl_dataset_has_resume_receive_state(drc->drc_ds)) {
+		uint64_t bytes = 0;
 		(void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset,
 		    drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES,
-		    sizeof (ra->bytes_read), 1, &ra->bytes_read);
+		    sizeof (bytes), 1, &bytes);
+		drc->drc_bytes_read += bytes;
 	}
 
-	objlist_create(&ra->ignore_objlist);
+	drc->drc_ignore_objlist = objlist_create();
 
 	/* these were verified in dmu_recv_begin */
 	ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
 	    DMU_SUBSTREAM);
 	ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES);
 
-	/*
-	 * Open the objset we are modifying.
-	 */
-	VERIFY0(dmu_objset_from_ds(drc->drc_ds, &ra->os));
-
 	ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
-
-	featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
-	ra->featureflags = featureflags;
-
-	ASSERT0(ra->os->os_encrypted &&
-	    (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA));
-
-	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
-	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
-		minor_t minor;
-
-		if (cleanup_fd == -1) {
-			err = SET_ERROR(EBADF);
-			goto out;
-		}
-		err = zfs_onexit_fd_hold(cleanup_fd, &minor);
-		if (err != 0) {
-			cleanup_fd = -1;
-			goto out;
-		}
-
-		if (*action_handlep == 0) {
-			rwa->guid_to_ds_map =
-			    kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
-			avl_create(rwa->guid_to_ds_map, guid_compare,
-			    sizeof (guid_map_entry_t),
-			    offsetof(guid_map_entry_t, avlnode));
-			err = zfs_onexit_add_cb(minor,
-			    free_guid_map_onexit, rwa->guid_to_ds_map,
-			    action_handlep);
-			if (err != 0)
-				goto out;
-		} else {
-			err = zfs_onexit_cb_data(minor, *action_handlep,
-			    (void **)&rwa->guid_to_ds_map);
-			if (err != 0)
-				goto out;
-		}
-
-		drc->drc_guid_to_ds_map = rwa->guid_to_ds_map;
-	}
-
-	payloadlen = drc->drc_drr_begin->drr_payloadlen;
-	payload = NULL;
-	if (payloadlen != 0)
-		payload = kmem_alloc(payloadlen, KM_SLEEP);
-
-	err = receive_read_payload_and_next_header(ra, payloadlen, payload);
-	if (err != 0) {
-		if (payloadlen != 0)
-			kmem_free(payload, payloadlen);
-		goto out;
-	}
-	if (payloadlen != 0) {
-		err = nvlist_unpack(payload, payloadlen, &begin_nvl, KM_SLEEP);
-		kmem_free(payload, payloadlen);
-		if (err != 0)
-			goto out;
-	}
+	ASSERT0(drc->drc_os->os_encrypted &&
+	    (drc->drc_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA));
 
 	/* handle DSL encryption key payload */
-	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+	if (drc->drc_featureflags & DMU_BACKUP_FEATURE_RAW) {
 		nvlist_t *keynvl = NULL;
 
-		ASSERT(ra->os->os_encrypted);
+		ASSERT(drc->drc_os->os_encrypted);
 		ASSERT(drc->drc_raw);
 
-		err = nvlist_lookup_nvlist(begin_nvl, "crypt_keydata", &keynvl);
+		err = nvlist_lookup_nvlist(drc->drc_begin_nvl, "crypt_keydata",
+		    &keynvl);
 		if (err != 0)
 			goto out;
 
@@ -2604,7 +2965,7 @@
 		 * are sure the rest of the receive succeeded so we stash
 		 * the keynvl away until then.
 		 */
-		err = dsl_crypto_recv_raw(spa_name(ra->os->os_spa),
+		err = dsl_crypto_recv_raw(spa_name(drc->drc_os->os_spa),
 		    drc->drc_ds->ds_object, drc->drc_fromsnapobj,
 		    drc->drc_drrb->drr_type, keynvl, drc->drc_newfs);
 		if (err != 0)
@@ -2619,23 +2980,33 @@
 			drc->drc_keynvl = fnvlist_dup(keynvl);
 	}
 
-	if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
-		err = resume_check(ra, begin_nvl);
+	if (drc->drc_featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+		err = resume_check(drc, drc->drc_begin_nvl);
 		if (err != 0)
 			goto out;
 	}
 
-	(void) bqueue_init(&rwa->q,
+	/*
+	 * If we failed before this point we will clean up any new resume
+	 * state that was created. Now that we've gotten past the initial
+	 * checks we are ok to retain that resume state.
+	 */
+	drc->drc_should_save = B_TRUE;
+
+	(void) bqueue_init(&rwa->q, zfs_recv_queue_ff,
 	    MAX(zfs_recv_queue_length, 2 * zfs_max_recordsize),
 	    offsetof(struct receive_record_arg, node));
 	cv_init(&rwa->cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL);
-	rwa->os = ra->os;
+	rwa->os = drc->drc_os;
 	rwa->byteswap = drc->drc_byteswap;
 	rwa->resumable = drc->drc_resumable;
 	rwa->raw = drc->drc_raw;
 	rwa->spill = drc->drc_spill;
+	rwa->full = (drc->drc_drr_begin->drr_u.drr_begin.drr_fromguid == 0);
 	rwa->os->os_raw_receive = drc->drc_raw;
+	list_create(&rwa->write_batch, sizeof (struct receive_record_arg),
+	    offsetof(struct receive_record_arg, node.bqn_node));
 
 	(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
 	    TS_RUN, minclsyspri);
@@ -2649,10 +3020,10 @@
 	 * We can leave this loop in 3 ways:  First, if rwa->err is
 	 * non-zero.  In that case, the writer thread will free the rrd we just
 	 * pushed.  Second, if  we're interrupted; in that case, either it's the
-	 * first loop and ra->rrd was never allocated, or it's later and ra->rrd
-	 * has been handed off to the writer thread who will free it.  Finally,
-	 * if receive_read_record fails or we're at the end of the stream, then
-	 * we free ra->rrd and exit.
+	 * first loop and drc->drc_rrd was never allocated, or it's later, and
+	 * drc->drc_rrd has been handed off to the writer thread who will free
+	 * it.  Finally, if receive_read_record fails or we're at the end of the
+	 * stream, then we free drc->drc_rrd and exit.
 	 */
 	while (rwa->err == 0) {
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
@@ -2660,30 +3031,36 @@
 			break;
 		}
 
-		ASSERT3P(ra->rrd, ==, NULL);
-		ra->rrd = ra->next_rrd;
-		ra->next_rrd = NULL;
-		/* Allocates and loads header into ra->next_rrd */
-		err = receive_read_record(ra);
+		ASSERT3P(drc->drc_rrd, ==, NULL);
+		drc->drc_rrd = drc->drc_next_rrd;
+		drc->drc_next_rrd = NULL;
+		/* Allocates and loads header into drc->drc_next_rrd */
+		err = receive_read_record(drc);
 
-		if (ra->rrd->header.drr_type == DRR_END || err != 0) {
-			kmem_free(ra->rrd, sizeof (*ra->rrd));
-			ra->rrd = NULL;
+		if (drc->drc_rrd->header.drr_type == DRR_END || err != 0) {
+			kmem_free(drc->drc_rrd, sizeof (*drc->drc_rrd));
+			drc->drc_rrd = NULL;
 			break;
 		}
 
-		bqueue_enqueue(&rwa->q, ra->rrd,
-		    sizeof (struct receive_record_arg) + ra->rrd->payload_size);
-		ra->rrd = NULL;
+		bqueue_enqueue(&rwa->q, drc->drc_rrd,
+		    sizeof (struct receive_record_arg) +
+		    drc->drc_rrd->payload_size);
+		drc->drc_rrd = NULL;
 	}
-	ASSERT3P(ra->rrd, ==, NULL);
-	ra->rrd = kmem_zalloc(sizeof (*ra->rrd), KM_SLEEP);
-	ra->rrd->eos_marker = B_TRUE;
-	bqueue_enqueue(&rwa->q, ra->rrd, 1);
+
+	ASSERT3P(drc->drc_rrd, ==, NULL);
+	drc->drc_rrd = kmem_zalloc(sizeof (*drc->drc_rrd), KM_SLEEP);
+	drc->drc_rrd->eos_marker = B_TRUE;
+	bqueue_enqueue_flush(&rwa->q, drc->drc_rrd, 1);
 
 	mutex_enter(&rwa->mutex);
 	while (!rwa->done) {
-		cv_wait(&rwa->cv, &rwa->mutex);
+		/*
+		 * We need to use cv_wait_sig() so that any process that may
+		 * be sleeping here can still fork.
+		 */
+		(void) cv_wait_sig(&rwa->cv, &rwa->mutex);
 	}
 	mutex_exit(&rwa->mutex);
 
@@ -2716,6 +3093,7 @@
 	cv_destroy(&rwa->cv);
 	mutex_destroy(&rwa->mutex);
 	bqueue_destroy(&rwa->q);
+	list_destroy(&rwa->write_batch);
 	if (err == 0)
 		err = rwa->err;
 
@@ -2725,12 +3103,17 @@
 	 * we need to clean up the next_rrd we create by processing the
 	 * DRR_BEGIN record.
 	 */
-	if (ra->next_rrd != NULL)
-		kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+	if (drc->drc_next_rrd != NULL)
+		kmem_free(drc->drc_next_rrd, sizeof (*drc->drc_next_rrd));
 
-	nvlist_free(begin_nvl);
-	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
-		zfs_onexit_fd_rele(cleanup_fd);
+	/*
+	 * The objset will be invalidated by dmu_recv_end() when we do
+	 * dsl_dataset_clone_swap_sync_impl().
+	 */
+	drc->drc_os = NULL;
+
+	kmem_free(rwa, sizeof (*rwa));
+	nvlist_free(drc->drc_begin_nvl);
 
 	if (err != 0) {
 		/*
@@ -2742,10 +3125,9 @@
 		nvlist_free(drc->drc_keynvl);
 	}
 
-	*voffp = ra->voff;
-	objlist_destroy(&ra->ignore_objlist);
-	kmem_free(ra, sizeof (*ra));
-	kmem_free(rwa, sizeof (*rwa));
+	objlist_destroy(drc->drc_ignore_objlist);
+	drc->drc_ignore_objlist = NULL;
+	*voffp = drc->drc_voff;
 	return (err);
 }
 
@@ -2813,7 +3195,8 @@
 			return (error);
 		}
 		error = dsl_dataset_snapshot_check_impl(origin_head,
-		    drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred);
+		    drc->drc_tosnap, tx, B_TRUE, 1,
+		    drc->drc_cred, drc->drc_proc);
 		dsl_dataset_rele(origin_head, FTAG);
 		if (error != 0)
 			return (error);
@@ -2821,7 +3204,8 @@
 		error = dsl_destroy_head_check_impl(drc->drc_ds, 1);
 	} else {
 		error = dsl_dataset_snapshot_check_impl(drc->drc_ds,
-		    drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred);
+		    drc->drc_tosnap, tx, B_TRUE, 1,
+		    drc->drc_cred, drc->drc_proc);
 	}
 	return (error);
 }
@@ -2832,6 +3216,7 @@
 	dmu_recv_cookie_t *drc = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	boolean_t encrypted = drc->drc_ds->ds_dir->dd_crypto_obj != 0;
+	uint64_t newsnapobj;
 
 	spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
 	    tx, "snap=%s", drc->drc_tosnap);
@@ -2870,10 +3255,17 @@
 			drc->drc_keynvl = NULL;
 		}
 
-		VERIFY3P(drc->drc_ds->ds_prev, ==, origin_head->ds_prev);
+		VERIFY3P(drc->drc_ds->ds_prev, ==,
+		    origin_head->ds_prev);
 
 		dsl_dataset_clone_swap_sync_impl(drc->drc_ds,
 		    origin_head, tx);
+		/*
+		 * The objset was evicted by dsl_dataset_clone_swap_sync_impl,
+		 * so drc_os is no longer valid.
+		 */
+		drc->drc_os = NULL;
+
 		dsl_dataset_snapshot_sync_impl(origin_head,
 		    drc->drc_tosnap, tx);
 
@@ -2890,7 +3282,7 @@
 		dsl_dataset_phys(origin_head)->ds_flags &=
 		    ~DS_FLAG_INCONSISTENT;
 
-		drc->drc_newsnapobj =
+		newsnapobj =
 		    dsl_dataset_phys(origin_head)->ds_prev_snap_obj;
 
 		dsl_dataset_rele(origin_head, FTAG);
@@ -2927,8 +3319,10 @@
 			    DS_FIELD_RESUME_TOGUID, tx);
 			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
 			    DS_FIELD_RESUME_TONAME, tx);
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, tx);
 		}
-		drc->drc_newsnapobj =
+		newsnapobj =
 		    dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
 	}
 
@@ -2943,15 +3337,13 @@
 	 * value.
 	 */
 	if (drc->drc_raw && drc->drc_ivset_guid != 0) {
-		dmu_object_zapify(dp->dp_meta_objset, drc->drc_newsnapobj,
+		dmu_object_zapify(dp->dp_meta_objset, newsnapobj,
 		    DMU_OT_DSL_DATASET, tx);
-		VERIFY0(zap_update(dp->dp_meta_objset, drc->drc_newsnapobj,
+		VERIFY0(zap_update(dp->dp_meta_objset, newsnapobj,
 		    DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
 		    &drc->drc_ivset_guid, tx));
 	}
 
-	zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
-
 	/*
 	 * Release the hold from dmu_recv_begin.  This must be done before
 	 * we return to open context, so that when we free the dataset's dnode
@@ -2968,54 +3360,6 @@
 	drc->drc_ds = NULL;
 }
 
-static int
-add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
-    boolean_t raw)
-{
-	dsl_pool_t *dp;
-	dsl_dataset_t *snapds;
-	guid_map_entry_t *gmep;
-	objset_t *os;
-	ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
-	int err;
-
-	ASSERT(guid_map != NULL);
-
-	err = dsl_pool_hold(name, FTAG, &dp);
-	if (err != 0)
-		return (err);
-	gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
-	err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds);
-	if (err == 0) {
-		/*
-		 * If this is a deduplicated raw send stream, we need
-		 * to make sure that we can still read raw blocks from
-		 * earlier datasets in the stream, so we set the
-		 * os_raw_receive flag now.
-		 */
-		if (raw) {
-			err = dmu_objset_from_ds(snapds, &os);
-			if (err != 0) {
-				dsl_dataset_disown(snapds, dsflags, FTAG);
-				dsl_pool_rele(dp, FTAG);
-				kmem_free(gmep, sizeof (*gmep));
-				return (err);
-			}
-			os->os_raw_receive = B_TRUE;
-		}
-
-		gmep->raw = raw;
-		gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
-		gmep->gme_ds = snapds;
-		avl_add(guid_map, gmep);
-	} else {
-		kmem_free(gmep, sizeof (*gmep));
-	}
-
-	dsl_pool_rele(dp, FTAG);
-	return (err);
-}
-
 static int dmu_recv_end_modified_blocks = 3;
 
 static int
@@ -3059,9 +3403,14 @@
 	if (error != 0) {
 		dmu_recv_cleanup_ds(drc);
 		nvlist_free(drc->drc_keynvl);
-	} else if (drc->drc_guid_to_ds_map != NULL) {
-		(void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map,
-		    drc->drc_newsnapobj, drc->drc_raw);
+	} else {
+		if (drc->drc_newfs) {
+			zvol_create_minor(drc->drc_tofs);
+		}
+		char *snapname = kmem_asprintf("%s@%s",
+		    drc->drc_tofs, drc->drc_tosnap);
+		zvol_create_minor(snapname);
+		kmem_strfree(snapname);
 	}
 	return (error);
 }
@@ -3076,7 +3425,13 @@
 	    os->os_dsl_dataset->ds_owner == dmu_recv_tag);
 }
 
-#if defined(_KERNEL)
-module_param(zfs_recv_queue_length, int, 0644);
-MODULE_PARM_DESC(zfs_recv_queue_length, "Maximum receive queue length");
-#endif
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, queue_length, INT, ZMOD_RW,
+	"Maximum receive queue length");
+
+ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, queue_ff, INT, ZMOD_RW,
+	"Receive queue fill fraction");
+
+ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, write_batch_size, INT, ZMOD_RW,
+	"Maximum amount of writes to batch into one transaction");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/dmu_redact.c b/zfs/module/zfs/dmu_redact.c
new file mode 100644
index 0000000..5184ef6
--- /dev/null
+++ b/zfs/module/zfs/dmu_redact.c

@@ -0,0 +1,1201 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2017, 2018 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/txg.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dmu_redact.h>
+#include <sys/bqueue.h>
+#include <sys/objlist.h>
+#include <sys/dmu_tx.h>
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#include <sys/zap.h>
+#include <sys/zfs_znode.h>
+#endif
+
+/*
+ * This controls the number of entries in the buffer the redaction_list_update
+ * synctask uses to buffer writes to the redaction list.
+ */
+int redact_sync_bufsize = 1024;
+
+/*
+ * Controls how often to update the redaction list when creating a redaction
+ * list.
+ */
+uint64_t redaction_list_update_interval_ns = 1000 * 1000 * 1000ULL; /* NS */
+
+/*
+ * This tunable controls the length of the queues that zfs redact worker threads
+ * use to communicate.  If the dmu_redact_snap thread is blocking on these
+ * queues, this variable may need to be increased.  If there is a significant
+ * slowdown at the start of a redact operation as these threads consume all the
+ * available IO resources, or the queues are consuming too much memory, this
+ * variable may need to be decreased.
+ */
+int zfs_redact_queue_length = 1024 * 1024;
+
+/*
+ * These tunables control the fill fraction of the queues by zfs redact. The
+ * fill fraction controls the frequency with which threads have to be
+ * cv_signaled. If a lot of cpu time is being spent on cv_signal, then these
+ * should be tuned down.  If the queues empty before the signalled thread can
+ * catch up, then these should be tuned up.
+ */
+uint64_t zfs_redact_queue_ff = 20;
+
+struct redact_record {
+	bqueue_node_t		ln;
+	boolean_t		eos_marker; /* Marks the end of the stream */
+	uint64_t		start_object;
+	uint64_t		start_blkid;
+	uint64_t		end_object;
+	uint64_t		end_blkid;
+	uint8_t			indblkshift;
+	uint32_t		datablksz;
+};
+
+struct redact_thread_arg {
+	bqueue_t	q;
+	objset_t	*os;		/* Objset to traverse */
+	dsl_dataset_t	*ds;		/* Dataset to traverse */
+	struct redact_record *current_record;
+	int		error_code;
+	boolean_t	cancel;
+	zbookmark_phys_t resume;
+	objlist_t	*deleted_objs;
+	uint64_t	*num_blocks_visited;
+	uint64_t	ignore_object;	/* ignore further callbacks on this */
+	uint64_t	txg; /* txg to traverse since */
+};
+
+/*
+ * The redaction node is a wrapper around the redaction record that is used
+ * by the redaction merging thread to sort the records and determine overlaps.
+ *
+ * It contains two nodes; one sorts the records by their start_zb, and the other
+ * sorts the records by their end_zb.
+ */
+struct redact_node {
+	avl_node_t			avl_node_start;
+	avl_node_t			avl_node_end;
+	struct redact_record		*record;
+	struct redact_thread_arg	*rt_arg;
+	uint32_t			thread_num;
+};
+
+struct merge_data {
+	list_t				md_redact_block_pending;
+	redact_block_phys_t		md_coalesce_block;
+	uint64_t			md_last_time;
+	redact_block_phys_t		md_furthest[TXG_SIZE];
+	/* Lists of struct redact_block_list_node. */
+	list_t				md_blocks[TXG_SIZE];
+	boolean_t			md_synctask_txg[TXG_SIZE];
+	uint64_t			md_latest_synctask_txg;
+	redaction_list_t		*md_redaction_list;
+};
+
+/*
+ * A wrapper around struct redact_block so it can be stored in a list_t.
+ */
+struct redact_block_list_node {
+	redact_block_phys_t	block;
+	list_node_t		node;
+};
+
+/*
+ * We've found a new redaction candidate.  In order to improve performance, we
+ * coalesce these blocks when they're adjacent to each other.  This function
+ * handles that.  If the new candidate block range is immediately after the
+ * range we're building, coalesce it into the range we're building.  Otherwise,
+ * put the record we're building on the queue, and update the build pointer to
+ * point to the new record.
+ */
+static void
+record_merge_enqueue(bqueue_t *q, struct redact_record **build,
+    struct redact_record *new)
+{
+	if (new->eos_marker) {
+		if (*build != NULL)
+			bqueue_enqueue(q, *build, sizeof (**build));
+		bqueue_enqueue_flush(q, new, sizeof (*new));
+		return;
+	}
+	if (*build == NULL) {
+		*build = new;
+		return;
+	}
+	struct redact_record *curbuild = *build;
+	if ((curbuild->end_object == new->start_object &&
+	    curbuild->end_blkid + 1 == new->start_blkid &&
+	    curbuild->end_blkid != UINT64_MAX) ||
+	    (curbuild->end_object + 1 == new->start_object &&
+	    curbuild->end_blkid == UINT64_MAX && new->start_blkid == 0)) {
+		curbuild->end_object = new->end_object;
+		curbuild->end_blkid = new->end_blkid;
+		kmem_free(new, sizeof (*new));
+	} else {
+		bqueue_enqueue(q, curbuild, sizeof (*curbuild));
+		*build = new;
+	}
+}
+#ifdef _KERNEL
+struct objnode {
+	avl_node_t node;
+	uint64_t obj;
+};
+
+static int
+objnode_compare(const void *o1, const void *o2)
+{
+	const struct objnode *obj1 = o1;
+	const struct objnode *obj2 = o2;
+	if (obj1->obj < obj2->obj)
+		return (-1);
+	if (obj1->obj > obj2->obj)
+		return (1);
+	return (0);
+}
+
+
+static objlist_t *
+zfs_get_deleteq(objset_t *os)
+{
+	objlist_t *deleteq_objlist = objlist_create();
+	uint64_t deleteq_obj;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	dmu_object_info_t doi;
+
+	ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+	VERIFY0(dmu_object_info(os, MASTER_NODE_OBJ, &doi));
+	ASSERT3U(doi.doi_type, ==, DMU_OT_MASTER_NODE);
+
+	VERIFY0(zap_lookup(os, MASTER_NODE_OBJ,
+	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
+
+	/*
+	 * In order to insert objects into the objlist, they must be in sorted
+	 * order. We don't know what order we'll get them out of the ZAP in, so
+	 * we insert them into and remove them from an avl_tree_t to sort them.
+	 */
+	avl_tree_t at;
+	avl_create(&at, objnode_compare, sizeof (struct objnode),
+	    offsetof(struct objnode, node));
+
+	for (zap_cursor_init(&zc, os, deleteq_obj);
+	    zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
+		struct objnode *obj = kmem_zalloc(sizeof (*obj), KM_SLEEP);
+		obj->obj = za.za_first_integer;
+		avl_add(&at, obj);
+	}
+	zap_cursor_fini(&zc);
+
+	struct objnode *next, *found = avl_first(&at);
+	while (found != NULL) {
+		next = AVL_NEXT(&at, found);
+		objlist_insert(deleteq_objlist, found->obj);
+		found = next;
+	}
+
+	void *cookie = NULL;
+	while ((found = avl_destroy_nodes(&at, &cookie)) != NULL)
+		kmem_free(found, sizeof (*found));
+	avl_destroy(&at);
+	return (deleteq_objlist);
+}
+#endif
+
+/*
+ * This is the callback function to traverse_dataset for the redaction threads
+ * for dmu_redact_snap.  This thread is responsible for creating redaction
+ * records for all the data that is modified by the snapshots we're redacting
+ * with respect to.  Redaction records represent ranges of data that have been
+ * modified by one of the redaction snapshots, and are stored in the
+ * redact_record struct. We need to create redaction records for three
+ * cases:
+ *
+ * First, if there's a normal write, we need to create a redaction record for
+ * that block.
+ *
+ * Second, if there's a hole, we need to create a redaction record that covers
+ * the whole range of the hole.  If the hole is in the meta-dnode, it must cover
+ * every block in all of the objects in the hole.
+ *
+ * Third, if there is a deleted object, we need to create a redaction record for
+ * all of the blocks in that object.
+ */
+static int
+redact_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+    const zbookmark_phys_t *zb, const struct dnode_phys *dnp, void *arg)
+{
+	(void) spa, (void) zilog;
+	struct redact_thread_arg *rta = arg;
+	struct redact_record *record;
+
+	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+	    zb->zb_object >= rta->resume.zb_object);
+
+	if (rta->cancel)
+		return (SET_ERROR(EINTR));
+
+	if (rta->ignore_object == zb->zb_object)
+		return (0);
+
+	/*
+	 * If we're visiting a dnode, we need to handle the case where the
+	 * object has been deleted.
+	 */
+	if (zb->zb_level == ZB_DNODE_LEVEL) {
+		ASSERT3U(zb->zb_level, ==, ZB_DNODE_LEVEL);
+
+		if (zb->zb_object == 0)
+			return (0);
+
+		/*
+		 * If the object has been deleted, redact all of the blocks in
+		 * it.
+		 */
+		if (dnp->dn_type == DMU_OT_NONE ||
+		    objlist_exists(rta->deleted_objs, zb->zb_object)) {
+			rta->ignore_object = zb->zb_object;
+			record = kmem_zalloc(sizeof (struct redact_record),
+			    KM_SLEEP);
+
+			record->eos_marker = B_FALSE;
+			record->start_object = record->end_object =
+			    zb->zb_object;
+			record->start_blkid = 0;
+			record->end_blkid = UINT64_MAX;
+			record_merge_enqueue(&rta->q,
+			    &rta->current_record, record);
+		}
+		return (0);
+	} else if (zb->zb_level < 0) {
+		return (0);
+	} else if (zb->zb_level > 0 && !BP_IS_HOLE(bp)) {
+		/*
+		 * If this is an indirect block, but not a hole, it doesn't
+		 * provide any useful information for redaction, so ignore it.
+		 */
+		return (0);
+	}
+
+	/*
+	 * At this point, there are two options left for the type of block we're
+	 * looking at.  Either this is a hole (which could be in the dnode or
+	 * the meta-dnode), or it's a level 0 block of some sort.  If it's a
+	 * hole, we create a redaction record that covers the whole range.  If
+	 * the hole is in a dnode, we need to redact all the blocks in that
+	 * hole.  If the hole is in the meta-dnode, we instead need to redact
+	 * all blocks in every object covered by that hole.  If it's a level 0
+	 * block, we only need to redact that single block.
+	 */
+	record = kmem_zalloc(sizeof (struct redact_record), KM_SLEEP);
+	record->eos_marker = B_FALSE;
+
+	record->start_object = record->end_object = zb->zb_object;
+	if (BP_IS_HOLE(bp)) {
+		record->start_blkid = zb->zb_blkid *
+		    bp_span_in_blocks(dnp->dn_indblkshift, zb->zb_level);
+
+		record->end_blkid = ((zb->zb_blkid + 1) *
+		    bp_span_in_blocks(dnp->dn_indblkshift, zb->zb_level)) - 1;
+
+		if (zb->zb_object == DMU_META_DNODE_OBJECT) {
+			record->start_object = record->start_blkid *
+			    ((SPA_MINBLOCKSIZE * dnp->dn_datablkszsec) /
+			    sizeof (dnode_phys_t));
+			record->start_blkid = 0;
+			record->end_object = ((record->end_blkid +
+			    1) * ((SPA_MINBLOCKSIZE * dnp->dn_datablkszsec) /
+			    sizeof (dnode_phys_t))) - 1;
+			record->end_blkid = UINT64_MAX;
+		}
+	} else if (zb->zb_level != 0 ||
+	    zb->zb_object == DMU_META_DNODE_OBJECT) {
+		kmem_free(record, sizeof (*record));
+		return (0);
+	} else {
+		record->start_blkid = record->end_blkid = zb->zb_blkid;
+	}
+	record->indblkshift = dnp->dn_indblkshift;
+	record->datablksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
+	record_merge_enqueue(&rta->q, &rta->current_record, record);
+
+	return (0);
+}
+
+static void
+redact_traverse_thread(void *arg)
+{
+	struct redact_thread_arg *rt_arg = arg;
+	int err;
+	struct redact_record *data;
+#ifdef _KERNEL
+	if (rt_arg->os->os_phys->os_type == DMU_OST_ZFS)
+		rt_arg->deleted_objs = zfs_get_deleteq(rt_arg->os);
+	else
+		rt_arg->deleted_objs = objlist_create();
+#else
+	rt_arg->deleted_objs = objlist_create();
+#endif
+
+	err = traverse_dataset_resume(rt_arg->ds, rt_arg->txg,
+	    &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
+	    redact_cb, rt_arg);
+
+	if (err != EINTR)
+		rt_arg->error_code = err;
+	objlist_destroy(rt_arg->deleted_objs);
+	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
+	data->eos_marker = B_TRUE;
+	record_merge_enqueue(&rt_arg->q, &rt_arg->current_record, data);
+	thread_exit();
+}
+
+static inline void
+create_zbookmark_from_obj_off(zbookmark_phys_t *zb, uint64_t object,
+    uint64_t blkid)
+{
+	zb->zb_object = object;
+	zb->zb_level = 0;
+	zb->zb_blkid = blkid;
+}
+
+/*
+ * This is a utility function that can do the comparison for the start or ends
+ * of the ranges in a redact_record.
+ */
+static int
+redact_range_compare(uint64_t obj1, uint64_t off1, uint32_t dbss1,
+    uint64_t obj2, uint64_t off2, uint32_t dbss2)
+{
+	zbookmark_phys_t z1, z2;
+	create_zbookmark_from_obj_off(&z1, obj1, off1);
+	create_zbookmark_from_obj_off(&z2, obj2, off2);
+
+	return (zbookmark_compare(dbss1 >> SPA_MINBLOCKSHIFT, 0,
+	    dbss2 >> SPA_MINBLOCKSHIFT, 0, &z1, &z2));
+}
+
+/*
+ * Compare two redaction records by their range's start location.  Also makes
+ * eos records always compare last.  We use the thread number in the redact_node
+ * to ensure that records do not compare equal (which is not allowed in our avl
+ * trees).
+ */
+static int
+redact_node_compare_start(const void *arg1, const void *arg2)
+{
+	const struct redact_node *rn1 = arg1;
+	const struct redact_node *rn2 = arg2;
+	const struct redact_record *rr1 = rn1->record;
+	const struct redact_record *rr2 = rn2->record;
+	if (rr1->eos_marker)
+		return (1);
+	if (rr2->eos_marker)
+		return (-1);
+
+	int cmp = redact_range_compare(rr1->start_object, rr1->start_blkid,
+	    rr1->datablksz, rr2->start_object, rr2->start_blkid,
+	    rr2->datablksz);
+	if (cmp == 0)
+		cmp = (rn1->thread_num < rn2->thread_num ? -1 : 1);
+	return (cmp);
+}
+
+/*
+ * Compare two redaction records by their range's end location.  Also makes
+ * eos records always compare last.  We use the thread number in the redact_node
+ * to ensure that records do not compare equal (which is not allowed in our avl
+ * trees).
+ */
+static int
+redact_node_compare_end(const void *arg1, const void *arg2)
+{
+	const struct redact_node *rn1 = arg1;
+	const struct redact_node *rn2 = arg2;
+	const struct redact_record *srr1 = rn1->record;
+	const struct redact_record *srr2 = rn2->record;
+	if (srr1->eos_marker)
+		return (1);
+	if (srr2->eos_marker)
+		return (-1);
+
+	int cmp = redact_range_compare(srr1->end_object, srr1->end_blkid,
+	    srr1->datablksz, srr2->end_object, srr2->end_blkid,
+	    srr2->datablksz);
+	if (cmp == 0)
+		cmp = (rn1->thread_num < rn2->thread_num ? -1 : 1);
+	return (cmp);
+}
+
+/*
+ * Utility function that compares two redaction records to determine if any part
+ * of the "from" record is before any part of the "to" record. Also causes End
+ * of Stream redaction records to compare after all others, so that the
+ * redaction merging logic can stay simple.
+ */
+static boolean_t
+redact_record_before(const struct redact_record *from,
+    const struct redact_record *to)
+{
+	if (from->eos_marker == B_TRUE)
+		return (B_FALSE);
+	else if (to->eos_marker == B_TRUE)
+		return (B_TRUE);
+	return (redact_range_compare(from->start_object, from->start_blkid,
+	    from->datablksz, to->end_object, to->end_blkid,
+	    to->datablksz) <= 0);
+}
+
+/*
+ * Pop a new redaction record off the queue, check that the records are in the
+ * right order, and free the old data.
+ */
+static struct redact_record *
+get_next_redact_record(bqueue_t *bq, struct redact_record *prev)
+{
+	struct redact_record *next = bqueue_dequeue(bq);
+	ASSERT(redact_record_before(prev, next));
+	kmem_free(prev, sizeof (*prev));
+	return (next);
+}
+
+/*
+ * Remove the given redaction node from both trees, pull a new redaction record
+ * off the queue, free the old redaction record, update the redaction node, and
+ * reinsert the node into the trees.
+ */
+static int
+update_avl_trees(avl_tree_t *start_tree, avl_tree_t *end_tree,
+    struct redact_node *redact_node)
+{
+	avl_remove(start_tree, redact_node);
+	avl_remove(end_tree, redact_node);
+	redact_node->record = get_next_redact_record(&redact_node->rt_arg->q,
+	    redact_node->record);
+	avl_add(end_tree, redact_node);
+	avl_add(start_tree, redact_node);
+	return (redact_node->rt_arg->error_code);
+}
+
+/*
+ * Synctask for updating redaction lists.  We first take this txg's list of
+ * redacted blocks and append those to the redaction list.  We then update the
+ * redaction list's bonus buffer.  We store the furthest blocks we visited and
+ * the list of snapshots that we're redacting with respect to.  We need these so
+ * that redacted sends and receives can be correctly resumed.
+ */
+static void
+redaction_list_update_sync(void *arg, dmu_tx_t *tx)
+{
+	struct merge_data *md = arg;
+	uint64_t txg = dmu_tx_get_txg(tx);
+	list_t *list = &md->md_blocks[txg & TXG_MASK];
+	redact_block_phys_t *furthest_visited =
+	    &md->md_furthest[txg & TXG_MASK];
+	objset_t *mos = tx->tx_pool->dp_meta_objset;
+	redaction_list_t *rl = md->md_redaction_list;
+	int bufsize = redact_sync_bufsize;
+	redact_block_phys_t *buf = kmem_alloc(bufsize * sizeof (*buf),
+	    KM_SLEEP);
+	int index = 0;
+
+	dmu_buf_will_dirty(rl->rl_dbuf, tx);
+
+	for (struct redact_block_list_node *rbln = list_remove_head(list);
+	    rbln != NULL; rbln = list_remove_head(list)) {
+		ASSERT3U(rbln->block.rbp_object, <=,
+		    furthest_visited->rbp_object);
+		ASSERT(rbln->block.rbp_object < furthest_visited->rbp_object ||
+		    rbln->block.rbp_blkid <= furthest_visited->rbp_blkid);
+		buf[index] = rbln->block;
+		index++;
+		if (index == bufsize) {
+			dmu_write(mos, rl->rl_object,
+			    rl->rl_phys->rlp_num_entries * sizeof (*buf),
+			    bufsize * sizeof (*buf), buf, tx);
+			rl->rl_phys->rlp_num_entries += bufsize;
+			index = 0;
+		}
+		kmem_free(rbln, sizeof (*rbln));
+	}
+	if (index > 0) {
+		dmu_write(mos, rl->rl_object, rl->rl_phys->rlp_num_entries *
+		    sizeof (*buf), index * sizeof (*buf), buf, tx);
+		rl->rl_phys->rlp_num_entries += index;
+	}
+	kmem_free(buf, bufsize * sizeof (*buf));
+
+	md->md_synctask_txg[txg & TXG_MASK] = B_FALSE;
+	rl->rl_phys->rlp_last_object = furthest_visited->rbp_object;
+	rl->rl_phys->rlp_last_blkid = furthest_visited->rbp_blkid;
+}
+
+static void
+commit_rl_updates(objset_t *os, struct merge_data *md, uint64_t object,
+    uint64_t blkid)
+{
+	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(os->os_spa)->dp_mos_dir);
+	dmu_tx_hold_space(tx, sizeof (struct redact_block_list_node));
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+	uint64_t txg = dmu_tx_get_txg(tx);
+	if (!md->md_synctask_txg[txg & TXG_MASK]) {
+		dsl_sync_task_nowait(dmu_tx_pool(tx),
+		    redaction_list_update_sync, md, tx);
+		md->md_synctask_txg[txg & TXG_MASK] = B_TRUE;
+		md->md_latest_synctask_txg = txg;
+	}
+	md->md_furthest[txg & TXG_MASK].rbp_object = object;
+	md->md_furthest[txg & TXG_MASK].rbp_blkid = blkid;
+	list_move_tail(&md->md_blocks[txg & TXG_MASK],
+	    &md->md_redact_block_pending);
+	dmu_tx_commit(tx);
+	md->md_last_time = gethrtime();
+}
+
+/*
+ * We want to store the list of blocks that we're redacting in the bookmark's
+ * redaction list.  However, this list is stored in the MOS, which means it can
+ * only be written to in syncing context.  To get around this, we create a
+ * synctask that will write to the mos for us.  We tell it what to write by
+ * a linked list for each current transaction group; every time we decide to
+ * redact a block, we append it to the transaction group that is currently in
+ * open context.  We also update some progress information that the synctask
+ * will store to enable resumable redacted sends.
+ */
+static void
+update_redaction_list(struct merge_data *md, objset_t *os,
+    uint64_t object, uint64_t blkid, uint64_t endblkid, uint32_t blksz)
+{
+	boolean_t enqueue = B_FALSE;
+	redact_block_phys_t cur = {0};
+	uint64_t count = endblkid - blkid + 1;
+	while (count > REDACT_BLOCK_MAX_COUNT) {
+		update_redaction_list(md, os, object, blkid,
+		    blkid + REDACT_BLOCK_MAX_COUNT - 1, blksz);
+		blkid += REDACT_BLOCK_MAX_COUNT;
+		count -= REDACT_BLOCK_MAX_COUNT;
+	}
+	redact_block_phys_t *coalesce = &md->md_coalesce_block;
+	boolean_t new;
+	if (coalesce->rbp_size_count == 0) {
+		new = B_TRUE;
+		enqueue = B_FALSE;
+	} else  {
+		uint64_t old_count = redact_block_get_count(coalesce);
+		if (coalesce->rbp_object == object &&
+		    coalesce->rbp_blkid + old_count == blkid &&
+		    old_count + count <= REDACT_BLOCK_MAX_COUNT) {
+			ASSERT3U(redact_block_get_size(coalesce), ==, blksz);
+			redact_block_set_count(coalesce, old_count + count);
+			new = B_FALSE;
+			enqueue = B_FALSE;
+		} else {
+			new = B_TRUE;
+			enqueue = B_TRUE;
+		}
+	}
+
+	if (new) {
+		cur = *coalesce;
+		coalesce->rbp_blkid = blkid;
+		coalesce->rbp_object = object;
+
+		redact_block_set_count(coalesce, count);
+		redact_block_set_size(coalesce, blksz);
+	}
+
+	if (enqueue && redact_block_get_size(&cur) != 0) {
+		struct redact_block_list_node *rbln =
+		    kmem_alloc(sizeof (struct redact_block_list_node),
+		    KM_SLEEP);
+		rbln->block = cur;
+		list_insert_tail(&md->md_redact_block_pending, rbln);
+	}
+
+	if (gethrtime() > md->md_last_time +
+	    redaction_list_update_interval_ns) {
+		commit_rl_updates(os, md, object, blkid);
+	}
+}
+
+/*
+ * This thread merges all the redaction records provided by the worker threads,
+ * and determines which blocks are redacted by all the snapshots.  The algorithm
+ * for doing so is similar to performing a merge in mergesort with n sub-lists
+ * instead of 2, with some added complexity due to the fact that the entries are
+ * ranges, not just single blocks.  This algorithm relies on the fact that the
+ * queues are sorted, which is ensured by the fact that traverse_dataset
+ * traverses the dataset in a consistent order.  We pull one entry off the front
+ * of the queues of each secure dataset traversal thread.  Then we repeat the
+ * following: each record represents a range of blocks modified by one of the
+ * redaction snapshots, and each block in that range may need to be redacted in
+ * the send stream.  Find the record with the latest start of its range, and the
+ * record with the earliest end of its range. If the last start is before the
+ * first end, then we know that the blocks in the range [last_start, first_end]
+ * are covered by all of the ranges at the front of the queues, which means
+ * every thread redacts that whole range.  For example, let's say the ranges on
+ * each queue look like this:
+ *
+ * Block Id   1  2  3  4  5  6  7  8  9 10 11
+ * Thread 1 |    [====================]
+ * Thread 2 |       [========]
+ * Thread 3 |             [=================]
+ *
+ * Thread 3 has the last start (5), and the thread 2 has the last end (6).  All
+ * three threads modified the range [5,6], so that data should not be sent over
+ * the wire.  After we've determined whether or not to redact anything, we take
+ * the record with the first end.  We discard that record, and pull a new one
+ * off the front of the queue it came from.  In the above example, we would
+ * discard Thread 2's record, and pull a new one.  Let's say the next record we
+ * pulled from Thread 2 covered range [10,11].  The new layout would look like
+ * this:
+ *
+ * Block Id   1  2  3  4  5  6  7  8  9 10 11
+ * Thread 1 |    [====================]
+ * Thread 2 |                            [==]
+ * Thread 3 |             [=================]
+ *
+ * When we compare the last start (10, from Thread 2) and the first end (9, from
+ * Thread 1), we see that the last start is greater than the first end.
+ * Therefore, we do not redact anything from these records.  We'll iterate by
+ * replacing the record from Thread 1.
+ *
+ * We iterate by replacing the record with the lowest end because we know
+ * that the record with the lowest end has helped us as much as it can.  All the
+ * ranges before it that we will ever redact have been redacted.  In addition,
+ * by replacing the one with the lowest end, we guarantee we catch all ranges
+ * that need to be redacted.  For example, if in the case above we had replaced
+ * the record from Thread 1 instead, we might have ended up with the following:
+ *
+ * Block Id   1  2  3  4  5  6  7  8  9 10 11 12
+ * Thread 1 |                               [==]
+ * Thread 2 |       [========]
+ * Thread 3 |             [=================]
+ *
+ * If the next record from Thread 2 had been [8,10], for example, we should have
+ * redacted part of that range, but because we updated Thread 1's record, we
+ * missed it.
+ *
+ * We implement this algorithm by using two trees.  The first sorts the
+ * redaction records by their start_zb, and the second sorts them by their
+ * end_zb.  We use these to find the record with the last start and the record
+ * with the first end.  We create a record with that start and end, and send it
+ * on.  The overall runtime of this implementation is O(n log m), where n is the
+ * total number of redaction records from all the different redaction snapshots,
+ * and m is the number of redaction snapshots.
+ *
+ * If we redact with respect to zero snapshots, we create a redaction
+ * record with the start object and blkid to 0, and the end object and blkid to
+ * UINT64_MAX.  This will result in us redacting every block.
+ */
+static int
+perform_thread_merge(bqueue_t *q, uint32_t num_threads,
+    struct redact_thread_arg *thread_args, boolean_t *cancel)
+{
+	struct redact_node *redact_nodes = NULL;
+	avl_tree_t start_tree, end_tree;
+	struct redact_record *record;
+	struct redact_record *current_record = NULL;
+	int err = 0;
+	struct merge_data md = { {0} };
+	list_create(&md.md_redact_block_pending,
+	    sizeof (struct redact_block_list_node),
+	    offsetof(struct redact_block_list_node, node));
+
+	/*
+	 * If we're redacting with respect to zero snapshots, then no data is
+	 * permitted to be sent.  We enqueue a record that redacts all blocks,
+	 * and an eos marker.
+	 */
+	if (num_threads == 0) {
+		record = kmem_zalloc(sizeof (struct redact_record),
+		    KM_SLEEP);
+		// We can't redact object 0, so don't try.
+		record->start_object = 1;
+		record->start_blkid = 0;
+		record->end_object = record->end_blkid = UINT64_MAX;
+		bqueue_enqueue(q, record, sizeof (*record));
+		return (0);
+	}
+	if (num_threads > 0) {
+		redact_nodes = kmem_zalloc(num_threads *
+		    sizeof (*redact_nodes), KM_SLEEP);
+	}
+
+	avl_create(&start_tree, redact_node_compare_start,
+	    sizeof (struct redact_node),
+	    offsetof(struct redact_node, avl_node_start));
+	avl_create(&end_tree, redact_node_compare_end,
+	    sizeof (struct redact_node),
+	    offsetof(struct redact_node, avl_node_end));
+
+	for (int i = 0; i < num_threads; i++) {
+		struct redact_node *node = &redact_nodes[i];
+		struct redact_thread_arg *targ = &thread_args[i];
+		node->record = bqueue_dequeue(&targ->q);
+		node->rt_arg = targ;
+		node->thread_num = i;
+		avl_add(&start_tree, node);
+		avl_add(&end_tree, node);
+	}
+
+	/*
+	 * Once the first record in the end tree has returned EOS, every record
+	 * must be an EOS record, so we should stop.
+	 */
+	while (err == 0 && !((struct redact_node *)avl_first(&end_tree))->
+	    record->eos_marker) {
+		if (*cancel) {
+			err = EINTR;
+			break;
+		}
+		struct redact_node *last_start = avl_last(&start_tree);
+		struct redact_node *first_end = avl_first(&end_tree);
+
+		/*
+		 * If the last start record is before the first end record,
+		 * then we have blocks that are redacted by all threads.
+		 * Therefore, we should redact them.  Copy the record, and send
+		 * it to the main thread.
+		 */
+		if (redact_record_before(last_start->record,
+		    first_end->record)) {
+			record = kmem_zalloc(sizeof (struct redact_record),
+			    KM_SLEEP);
+			*record = *first_end->record;
+			record->start_object = last_start->record->start_object;
+			record->start_blkid = last_start->record->start_blkid;
+			record_merge_enqueue(q, &current_record,
+			    record);
+		}
+		err = update_avl_trees(&start_tree, &end_tree, first_end);
+	}
+
+	/*
+	 * We're done; if we were cancelled, we need to cancel our workers and
+	 * clear out their queues.  Either way, we need to remove every thread's
+	 * redact_node struct from the avl trees.
+	 */
+	for (int i = 0; i < num_threads; i++) {
+		if (err != 0) {
+			thread_args[i].cancel = B_TRUE;
+			while (!redact_nodes[i].record->eos_marker) {
+				(void) update_avl_trees(&start_tree, &end_tree,
+				    &redact_nodes[i]);
+			}
+		}
+		avl_remove(&start_tree, &redact_nodes[i]);
+		avl_remove(&end_tree, &redact_nodes[i]);
+		kmem_free(redact_nodes[i].record,
+		    sizeof (struct redact_record));
+		bqueue_destroy(&thread_args[i].q);
+	}
+
+	avl_destroy(&start_tree);
+	avl_destroy(&end_tree);
+	kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
+	if (current_record != NULL)
+		bqueue_enqueue(q, current_record, sizeof (*current_record));
+	return (err);
+}
+
+struct redact_merge_thread_arg {
+	bqueue_t q;
+	spa_t *spa;
+	int numsnaps;
+	struct redact_thread_arg *thr_args;
+	boolean_t cancel;
+	int error_code;
+};
+
+static void
+redact_merge_thread(void *arg)
+{
+	struct redact_merge_thread_arg *rmta = arg;
+	rmta->error_code = perform_thread_merge(&rmta->q,
+	    rmta->numsnaps, rmta->thr_args, &rmta->cancel);
+	struct redact_record *rec = kmem_zalloc(sizeof (*rec), KM_SLEEP);
+	rec->eos_marker = B_TRUE;
+	bqueue_enqueue_flush(&rmta->q, rec, 1);
+	thread_exit();
+}
+
+/*
+ * Find the next object in or after the redaction range passed in, and hold
+ * its dnode with the provided tag.  Also update *object to contain the new
+ * object number.
+ */
+static int
+hold_next_object(objset_t *os, struct redact_record *rec, void *tag,
+    uint64_t *object, dnode_t **dn)
+{
+	int err = 0;
+	if (*dn != NULL)
+		dnode_rele(*dn, tag);
+	*dn = NULL;
+	if (*object < rec->start_object) {
+		*object = rec->start_object - 1;
+	}
+	err = dmu_object_next(os, object, B_FALSE, 0);
+	if (err != 0)
+		return (err);
+
+	err = dnode_hold(os, *object, tag, dn);
+	while (err == 0 && (*object < rec->start_object ||
+	    DMU_OT_IS_METADATA((*dn)->dn_type))) {
+		dnode_rele(*dn, tag);
+		*dn = NULL;
+		err = dmu_object_next(os, object, B_FALSE, 0);
+		if (err != 0)
+			break;
+		err = dnode_hold(os, *object, tag, dn);
+	}
+	return (err);
+}
+
+static int
+perform_redaction(objset_t *os, redaction_list_t *rl,
+    struct redact_merge_thread_arg *rmta)
+{
+	int err = 0;
+	bqueue_t *q = &rmta->q;
+	struct redact_record *rec = NULL;
+	struct merge_data md = { {0} };
+
+	list_create(&md.md_redact_block_pending,
+	    sizeof (struct redact_block_list_node),
+	    offsetof(struct redact_block_list_node, node));
+	md.md_redaction_list = rl;
+
+	for (int i = 0; i < TXG_SIZE; i++) {
+		list_create(&md.md_blocks[i],
+		    sizeof (struct redact_block_list_node),
+		    offsetof(struct redact_block_list_node, node));
+	}
+	dnode_t *dn = NULL;
+	uint64_t prev_obj = 0;
+	for (rec = bqueue_dequeue(q); !rec->eos_marker && err == 0;
+	    rec = get_next_redact_record(q, rec)) {
+		ASSERT3U(rec->start_object, !=, 0);
+		uint64_t object;
+		if (prev_obj != rec->start_object) {
+			object = rec->start_object - 1;
+			err = hold_next_object(os, rec, FTAG, &object, &dn);
+		} else {
+			object = prev_obj;
+		}
+		while (err == 0 && object <= rec->end_object) {
+			if (issig(JUSTLOOKING) && issig(FORREAL)) {
+				err = EINTR;
+				break;
+			}
+			/*
+			 * Part of the current object is contained somewhere in
+			 * the range covered by rec.
+			 */
+			uint64_t startblkid;
+			uint64_t endblkid;
+			uint64_t maxblkid = dn->dn_phys->dn_maxblkid;
+
+			if (rec->start_object < object)
+				startblkid = 0;
+			else if (rec->start_blkid > maxblkid)
+				break;
+			else
+				startblkid = rec->start_blkid;
+
+			if (rec->end_object > object || rec->end_blkid >
+			    maxblkid) {
+				endblkid = maxblkid;
+			} else {
+				endblkid = rec->end_blkid;
+			}
+			update_redaction_list(&md, os, object, startblkid,
+			    endblkid, dn->dn_datablksz);
+
+			if (object == rec->end_object)
+				break;
+			err = hold_next_object(os, rec, FTAG, &object, &dn);
+		}
+		if (err == ESRCH)
+			err = 0;
+		if (dn != NULL)
+			prev_obj = object;
+	}
+	if (err == 0 && dn != NULL)
+		dnode_rele(dn, FTAG);
+
+	if (err == ESRCH)
+		err = 0;
+	rmta->cancel = B_TRUE;
+	while (!rec->eos_marker)
+		rec = get_next_redact_record(q, rec);
+	kmem_free(rec, sizeof (*rec));
+
+	/*
+	 * There may be a block that's being coalesced, sync that out before we
+	 * return.
+	 */
+	if (err == 0 && md.md_coalesce_block.rbp_size_count != 0) {
+		struct redact_block_list_node *rbln =
+		    kmem_alloc(sizeof (struct redact_block_list_node),
+		    KM_SLEEP);
+		rbln->block = md.md_coalesce_block;
+		list_insert_tail(&md.md_redact_block_pending, rbln);
+	}
+	commit_rl_updates(os, &md, UINT64_MAX, UINT64_MAX);
+
+	/*
+	 * Wait for all the redaction info to sync out before we return, so that
+	 * anyone who attempts to resume this redaction will have all the data
+	 * they need.
+	 */
+	dsl_pool_t *dp = spa_get_dsl(os->os_spa);
+	if (md.md_latest_synctask_txg != 0)
+		txg_wait_synced(dp, md.md_latest_synctask_txg);
+	for (int i = 0; i < TXG_SIZE; i++)
+		list_destroy(&md.md_blocks[i]);
+	return (err);
+}
+
+static boolean_t
+redact_snaps_contains(uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
+{
+	for (int i = 0; i < num_snaps; i++) {
+		if (snaps[i] == guid)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+int
+dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
+    const char *redactbook)
+{
+	int err = 0;
+	dsl_pool_t *dp = NULL;
+	dsl_dataset_t *ds = NULL;
+	int numsnaps = 0;
+	objset_t *os;
+	struct redact_thread_arg *args = NULL;
+	redaction_list_t *new_rl = NULL;
+	char *newredactbook;
+
+	if ((err = dsl_pool_hold(snapname, FTAG, &dp)) != 0)
+		return (err);
+
+	newredactbook = kmem_zalloc(sizeof (char) * ZFS_MAX_DATASET_NAME_LEN,
+	    KM_SLEEP);
+
+	if ((err = dsl_dataset_hold_flags(dp, snapname, DS_HOLD_FLAG_DECRYPT,
+	    FTAG, &ds)) != 0) {
+		goto out;
+	}
+	dsl_dataset_long_hold(ds, FTAG);
+	if (!ds->ds_is_snapshot || dmu_objset_from_ds(ds, &os) != 0) {
+		err = EINVAL;
+		goto out;
+	}
+	if (dsl_dataset_feature_is_active(ds, SPA_FEATURE_REDACTED_DATASETS)) {
+		err = EALREADY;
+		goto out;
+	}
+
+	numsnaps = fnvlist_num_pairs(redactnvl);
+	if (numsnaps > 0)
+		args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
+
+	nvpair_t *pair = NULL;
+	for (int i = 0; i < numsnaps; i++) {
+		pair = nvlist_next_nvpair(redactnvl, pair);
+		const char *name = nvpair_name(pair);
+		struct redact_thread_arg *rta = &args[i];
+		err = dsl_dataset_hold_flags(dp, name, DS_HOLD_FLAG_DECRYPT,
+		    FTAG, &rta->ds);
+		if (err != 0)
+			break;
+		/*
+		 * We want to do the long hold before we can get any other
+		 * errors, because the cleanup code will release the long
+		 * hold if rta->ds is filled in.
+		 */
+		dsl_dataset_long_hold(rta->ds, FTAG);
+
+		err = dmu_objset_from_ds(rta->ds, &rta->os);
+		if (err != 0)
+			break;
+		if (!dsl_dataset_is_before(rta->ds, ds, 0)) {
+			err = EINVAL;
+			break;
+		}
+		if (dsl_dataset_feature_is_active(rta->ds,
+		    SPA_FEATURE_REDACTED_DATASETS)) {
+			err = EALREADY;
+			break;
+
+		}
+	}
+	if (err != 0)
+		goto out;
+	VERIFY3P(nvlist_next_nvpair(redactnvl, pair), ==, NULL);
+
+	boolean_t resuming = B_FALSE;
+	zfs_bookmark_phys_t bookmark;
+
+	(void) strlcpy(newredactbook, snapname, ZFS_MAX_DATASET_NAME_LEN);
+	char *c = strchr(newredactbook, '@');
+	ASSERT3P(c, !=, NULL);
+	int n = snprintf(c, ZFS_MAX_DATASET_NAME_LEN - (c - newredactbook),
+	    "#%s", redactbook);
+	if (n >= ZFS_MAX_DATASET_NAME_LEN - (c - newredactbook)) {
+		dsl_pool_rele(dp, FTAG);
+		kmem_free(newredactbook,
+		    sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
+		if (args != NULL)
+			kmem_free(args, numsnaps * sizeof (*args));
+		return (SET_ERROR(ENAMETOOLONG));
+	}
+	err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
+	if (err == 0) {
+		resuming = B_TRUE;
+		if (bookmark.zbm_redaction_obj == 0) {
+			err = EEXIST;
+			goto out;
+		}
+		err = dsl_redaction_list_hold_obj(dp,
+		    bookmark.zbm_redaction_obj, FTAG, &new_rl);
+		if (err != 0) {
+			err = EIO;
+			goto out;
+		}
+		dsl_redaction_list_long_hold(dp, new_rl, FTAG);
+		if (new_rl->rl_phys->rlp_num_snaps != numsnaps) {
+			err = ESRCH;
+			goto out;
+		}
+		for (int i = 0; i < numsnaps; i++) {
+			struct redact_thread_arg *rta = &args[i];
+			if (!redact_snaps_contains(new_rl->rl_phys->rlp_snaps,
+			    new_rl->rl_phys->rlp_num_snaps,
+			    dsl_dataset_phys(rta->ds)->ds_guid)) {
+				err = ESRCH;
+				goto out;
+			}
+		}
+		if (new_rl->rl_phys->rlp_last_blkid == UINT64_MAX &&
+		    new_rl->rl_phys->rlp_last_object == UINT64_MAX) {
+			err = EEXIST;
+			goto out;
+		}
+		dsl_pool_rele(dp, FTAG);
+		dp = NULL;
+	} else {
+		uint64_t *guids = NULL;
+		if (numsnaps > 0) {
+			guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
+			    KM_SLEEP);
+		}
+		for (int i = 0; i < numsnaps; i++) {
+			struct redact_thread_arg *rta = &args[i];
+			guids[i] = dsl_dataset_phys(rta->ds)->ds_guid;
+		}
+
+		dsl_pool_rele(dp, FTAG);
+		dp = NULL;
+		err = dsl_bookmark_create_redacted(newredactbook, snapname,
+		    numsnaps, guids, FTAG, &new_rl);
+		kmem_free(guids, numsnaps * sizeof (uint64_t));
+		if (err != 0) {
+			goto out;
+		}
+	}
+
+	for (int i = 0; i < numsnaps; i++) {
+		struct redact_thread_arg *rta = &args[i];
+		(void) bqueue_init(&rta->q, zfs_redact_queue_ff,
+		    zfs_redact_queue_length,
+		    offsetof(struct redact_record, ln));
+		if (resuming) {
+			rta->resume.zb_blkid =
+			    new_rl->rl_phys->rlp_last_blkid;
+			rta->resume.zb_object =
+			    new_rl->rl_phys->rlp_last_object;
+		}
+		rta->txg = dsl_dataset_phys(ds)->ds_creation_txg;
+		(void) thread_create(NULL, 0, redact_traverse_thread, rta,
+		    0, curproc, TS_RUN, minclsyspri);
+	}
+
+	struct redact_merge_thread_arg *rmta;
+	rmta = kmem_zalloc(sizeof (struct redact_merge_thread_arg), KM_SLEEP);
+
+	(void) bqueue_init(&rmta->q, zfs_redact_queue_ff,
+	    zfs_redact_queue_length, offsetof(struct redact_record, ln));
+	rmta->numsnaps = numsnaps;
+	rmta->spa = os->os_spa;
+	rmta->thr_args = args;
+	(void) thread_create(NULL, 0, redact_merge_thread, rmta, 0, curproc,
+	    TS_RUN, minclsyspri);
+	err = perform_redaction(os, new_rl, rmta);
+	bqueue_destroy(&rmta->q);
+	kmem_free(rmta, sizeof (struct redact_merge_thread_arg));
+
+out:
+	kmem_free(newredactbook, sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
+
+	if (new_rl != NULL) {
+		dsl_redaction_list_long_rele(new_rl, FTAG);
+		dsl_redaction_list_rele(new_rl, FTAG);
+	}
+	for (int i = 0; i < numsnaps; i++) {
+		struct redact_thread_arg *rta = &args[i];
+		/*
+		 * rta->ds may be NULL if we got an error while filling
+		 * it in.
+		 */
+		if (rta->ds != NULL) {
+			dsl_dataset_long_rele(rta->ds, FTAG);
+			dsl_dataset_rele_flags(rta->ds,
+			    DS_HOLD_FLAG_DECRYPT, FTAG);
+		}
+	}
+
+	if (args != NULL)
+		kmem_free(args, numsnaps * sizeof (*args));
+	if (dp != NULL)
+		dsl_pool_rele(dp, FTAG);
+	if (ds != NULL) {
+		dsl_dataset_long_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
+	}
+	return (SET_ERROR(err));
+
+}

diff --git a/zfs/module/zfs/dmu_send.c b/zfs/module/zfs/dmu_send.c
index 21246ab..10f2b19 100644
--- a/zfs/module/zfs/dmu_send.c
+++ b/zfs/module/zfs/dmu_send.c

@@ -21,11 +21,13 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright 2014 HybridCluster. All rights reserved.
  * Copyright 2016 RackTop Systems.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
  */
 
 #include <sys/dmu.h>
@@ -51,6 +53,7 @@
 #include <sys/ddt.h>
 #include <sys/zfs_onexit.h>
 #include <sys/dmu_send.h>
+#include <sys/dmu_recv.h>
 #include <sys/dsl_destroy.h>
 #include <sys/blkptr.h>
 #include <sys/dsl_bookmark.h>
@@ -58,104 +61,197 @@
 #include <sys/bqueue.h>
 #include <sys/zvol.h>
 #include <sys/policy.h>
+#include <sys/objlist.h>
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
+
+#include "zmoddbg.h"
+
 
 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
 int zfs_send_corrupt_data = B_FALSE;
+/*
+ * This tunable controls the amount of data (measured in bytes) that will be
+ * prefetched by zfs send.  If the main thread is blocking on reads that haven't
+ * completed, this variable might need to be increased.  If instead the main
+ * thread is issuing new reads because the prefetches have fallen out of the
+ * cache, this may need to be decreased.
+ */
 int zfs_send_queue_length = SPA_MAXBLOCKSIZE;
-/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
-int zfs_send_set_freerecords_bit = B_TRUE;
-/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
-int zfs_send_unmodified_spill_blocks = B_TRUE;
+/*
+ * This tunable controls the length of the queues that zfs send worker threads
+ * use to communicate.  If the send_main_thread is blocking on these queues,
+ * this variable may need to be increased.  If there is a significant slowdown
+ * at the start of a send as these threads consume all the available IO
+ * resources, this variable may need to be decreased.
+ */
+int zfs_send_no_prefetch_queue_length = 1024 * 1024;
+/*
+ * These tunables control the fill fraction of the queues by zfs send.  The fill
+ * fraction controls the frequency with which threads have to be cv_signaled.
+ * If a lot of cpu time is being spent on cv_signal, then these should be tuned
+ * down.  If the queues empty before the signalled thread can catch up, then
+ * these should be tuned up.
+ */
+int zfs_send_queue_ff = 20;
+int zfs_send_no_prefetch_queue_ff = 20;
 
 /*
  * Use this to override the recordsize calculation for fast zfs send estimates.
  */
-unsigned long zfs_override_estimate_recordsize = 0;
+int zfs_override_estimate_recordsize = 0;
 
-#define	BP_SPAN(datablkszsec, indblkshift, level) \
-	(((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
-	(level) * (indblkshift - SPA_BLKPTRSHIFT)))
+/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
+int zfs_send_set_freerecords_bit = B_TRUE;
+
+/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
+int zfs_send_unmodified_spill_blocks = B_TRUE;
+
+static inline boolean_t
+overflow_multiply(uint64_t a, uint64_t b, uint64_t *c)
+{
+	uint64_t temp = a * b;
+	if (b != 0 && temp / b != a)
+		return (B_FALSE);
+	*c = temp;
+	return (B_TRUE);
+}
 
 struct send_thread_arg {
 	bqueue_t	q;
-	dsl_dataset_t	*ds;		/* Dataset to traverse */
+	objset_t	*os;		/* Objset to traverse */
 	uint64_t	fromtxg;	/* Traverse from this txg */
 	int		flags;		/* flags to pass to traverse_dataset */
 	int		error_code;
 	boolean_t	cancel;
 	zbookmark_phys_t resume;
+	uint64_t	*num_blocks_visited;
 };
 
-struct send_block_record {
+struct redact_list_thread_arg {
+	boolean_t		cancel;
+	bqueue_t		q;
+	zbookmark_phys_t	resume;
+	redaction_list_t	*rl;
+	boolean_t		mark_redact;
+	int			error_code;
+	uint64_t		*num_blocks_visited;
+};
+
+struct send_merge_thread_arg {
+	bqueue_t			q;
+	objset_t			*os;
+	struct redact_list_thread_arg	*from_arg;
+	struct send_thread_arg		*to_arg;
+	struct redact_list_thread_arg	*redact_arg;
+	int				error;
+	boolean_t			cancel;
+};
+
+struct send_range {
 	boolean_t		eos_marker; /* Marks the end of the stream */
-	blkptr_t		bp;
-	zbookmark_phys_t	zb;
-	uint8_t			indblkshift;
-	uint16_t		datablkszsec;
+	uint64_t		object;
+	uint64_t		start_blkid;
+	uint64_t		end_blkid;
 	bqueue_node_t		ln;
+	enum type {DATA, HOLE, OBJECT, OBJECT_RANGE, REDACT,
+	    PREVIOUSLY_REDACTED} type;
+	union {
+		struct srd {
+			dmu_object_type_t	obj_type;
+			uint32_t		datablksz; // logical size
+			uint32_t		datasz; // payload size
+			blkptr_t		bp;
+			arc_buf_t		*abuf;
+			abd_t			*abd;
+			kmutex_t		lock;
+			kcondvar_t		cv;
+			boolean_t		io_outstanding;
+			boolean_t		io_compressed;
+			int			io_err;
+		} data;
+		struct srh {
+			uint32_t		datablksz;
+		} hole;
+		struct sro {
+			/*
+			 * This is a pointer because embedding it in the
+			 * struct causes these structures to be massively larger
+			 * for all range types; this makes the code much less
+			 * memory efficient.
+			 */
+			dnode_phys_t		*dnp;
+			blkptr_t		bp;
+		} object;
+		struct srr {
+			uint32_t		datablksz;
+		} redact;
+		struct sror {
+			blkptr_t		bp;
+		} object_range;
+	} sru;
 };
 
-typedef struct dump_bytes_io {
-	dmu_sendarg_t	*dbi_dsp;
-	void		*dbi_buf;
-	int		dbi_len;
-} dump_bytes_io_t;
+/*
+ * The list of data whose inclusion in a send stream can be pending from
+ * one call to backup_cb to another.  Multiple calls to dump_free(),
+ * dump_freeobjects(), and dump_redact() can be aggregated into a single
+ * DRR_FREE, DRR_FREEOBJECTS, or DRR_REDACT replay record.
+ */
+typedef enum {
+	PENDING_NONE,
+	PENDING_FREE,
+	PENDING_FREEOBJECTS,
+	PENDING_REDACT
+} dmu_pendop_t;
 
-static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
+typedef struct dmu_send_cookie {
+	dmu_replay_record_t *dsc_drr;
+	dmu_send_outparams_t *dsc_dso;
+	offset_t *dsc_off;
+	objset_t *dsc_os;
+	zio_cksum_t dsc_zc;
+	uint64_t dsc_toguid;
+	uint64_t dsc_fromtxg;
+	int dsc_err;
+	dmu_pendop_t dsc_pending_op;
+	uint64_t dsc_featureflags;
+	uint64_t dsc_last_data_object;
+	uint64_t dsc_last_data_offset;
+	uint64_t dsc_resume_object;
+	uint64_t dsc_resume_offset;
+	boolean_t dsc_sent_begin;
+	boolean_t dsc_sent_end;
+    	int32_t dsc_block_diff;
+} dmu_send_cookie_t;
+
+static int do_dump(dmu_send_cookie_t *dscp, struct send_range *range);
 
 static void
-dump_bytes_cb(void *arg)
+range_free(struct send_range *range)
 {
-	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
-	dmu_sendarg_t *dsp = dbi->dbi_dsp;
-	dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os);
-	ssize_t resid; /* have to get resid to get detailed errno */
+	KTRACE();
+	if (range->type == OBJECT) {
+		size_t size = sizeof (dnode_phys_t) *
+		    (range->sru.object.dnp->dn_extra_slots + 1);
+		kmem_free(range->sru.object.dnp, size);
+	} else if (range->type == DATA) {
+		mutex_enter(&range->sru.data.lock);
+		while (range->sru.data.io_outstanding)
+			cv_wait(&range->sru.data.cv, &range->sru.data.lock);
+		if (range->sru.data.abd != NULL)
+			abd_free(range->sru.data.abd);
+		if (range->sru.data.abuf != NULL) {
+			arc_buf_destroy(range->sru.data.abuf,
+			    &range->sru.data.abuf);
+		}
+		mutex_exit(&range->sru.data.lock);
 
-	/*
-	 * The code does not rely on len being a multiple of 8.  We keep
-	 * this assertion because of the corresponding assertion in
-	 * receive_read().  Keeping this assertion ensures that we do not
-	 * inadvertently break backwards compatibility (causing the assertion
-	 * in receive_read() to trigger on old software). Newer feature flags
-	 * (such as raw send) may break this assertion since they were
-	 * introduced after the requirement was made obsolete.
-	 */
-
-	ASSERT(dbi->dbi_len % 8 == 0 ||
-	    (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
-
-	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
-	    (caddr_t)dbi->dbi_buf, dbi->dbi_len,
-	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
-
-	mutex_enter(&ds->ds_sendstream_lock);
-	*dsp->dsa_off += dbi->dbi_len;
-	mutex_exit(&ds->ds_sendstream_lock);
-}
-
-static int
-dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
-{
-	dump_bytes_io_t dbi;
-
-	dbi.dbi_dsp = dsp;
-	dbi.dbi_buf = buf;
-	dbi.dbi_len = len;
-
-#if defined(HAVE_LARGE_STACKS)
-	dump_bytes_cb(&dbi);
-#else
-	/*
-	 * The vn_rdwr() call is performed in a taskq to ensure that there is
-	 * always enough stack space to write safely to the target filesystem.
-	 * The ZIO_TYPE_FREE threads are used because there can be a lot of
-	 * them and they are used in vdev_file.c for a similar purpose.
-	 */
-	spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
-	    ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
-#endif /* HAVE_LARGE_STACKS */
-
-	return (dsp->dsa_err);
+		cv_destroy(&range->sru.data.cv);
+		mutex_destroy(&range->sru.data.lock);
+	}
+	kmem_free(range, sizeof (*range));
 }
 
 /*
@@ -164,32 +260,74 @@
  * up to the start of the checksum itself.
  */
 static int
-dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
+dump_record(dmu_send_cookie_t *dscp, void *payload, int payload_len)
 {
+	KTRACE();
+	dmu_send_outparams_t *dso = dscp->dsc_dso;
 	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
 	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
-	(void) fletcher_4_incremental_native(dsp->dsa_drr,
+	(void) fletcher_4_incremental_native(dscp->dsc_drr,
 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
-	    &dsp->dsa_zc);
-	if (dsp->dsa_drr->drr_type == DRR_BEGIN) {
-		dsp->dsa_sent_begin = B_TRUE;
+	    &dscp->dsc_zc);
+	if (dscp->dsc_drr->drr_type == DRR_BEGIN) {
+		KDEBUG("DRR_BEGIN");
+		dscp->dsc_sent_begin = B_TRUE;
 	} else {
-		ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
+		ASSERT(ZIO_CHECKSUM_IS_ZERO(&dscp->dsc_drr->drr_u.
 		    drr_checksum.drr_checksum));
-		dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
+		dscp->dsc_drr->drr_u.drr_checksum.drr_checksum = dscp->dsc_zc;
 	}
-	if (dsp->dsa_drr->drr_type == DRR_END) {
-		dsp->dsa_sent_end = B_TRUE;
+	if (dscp->dsc_drr->drr_type == DRR_END) {
+		KDEBUG("DRR_END");
+		dscp->dsc_sent_end = B_TRUE;
 	}
-	(void) fletcher_4_incremental_native(&dsp->dsa_drr->
+	KDEBUG("fletcher");
+	(void) fletcher_4_incremental_native(&dscp->dsc_drr->
 	    drr_u.drr_checksum.drr_checksum,
-	    sizeof (zio_cksum_t), &dsp->dsa_zc);
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (SET_ERROR(EINTR));
-	if (payload_len != 0 && !dsp->block_diff) {
-		(void) fletcher_4_incremental_native(payload, payload_len,
-		    &dsp->dsa_zc);
-		if (dump_bytes(dsp, payload, payload_len) != 0)
+	    sizeof (zio_cksum_t), &dscp->dsc_zc);
+	*dscp->dsc_off += sizeof (dmu_replay_record_t);
+	KDEBUG("dso->dso_outfunc");
+	dscp->dsc_err = dso->dso_outfunc(dscp->dsc_os, dscp->dsc_drr,
+	    sizeof (dmu_replay_record_t), dso->dso_arg);
+	KDEBUG("sent %lu bytes sizeof (dmu_replay_record_t)", sizeof (dmu_replay_record_t));
+
+	if (dscp->dsc_err != 0)
+	{
+		KDEBUG("dscp->dsc_err != 0 (%d)", dscp->dsc_err);
+		return (SET_ERROR(EINTR));	
+	}
+	KDEBUG("payload_len=%ld", payload_len);
+	KDEBUG("dso->block_diff=%d", dso->block_diff);
+
+	if (payload_len != 0 && !dso->block_diff) {
+		KDEBUG("Sending payload");
+		*dscp->dsc_off += payload_len;
+		/*
+		 * payload is null when dso_dryrun == B_TRUE (i.e. when we're
+		 * doing a send size calculation)
+		 */
+		if (payload != NULL) {
+			(void) fletcher_4_incremental_native(
+			    payload, payload_len, &dscp->dsc_zc);
+		}
+
+		/*
+		 * The code does not rely on this (len being a multiple of 8).
+		 * We keep this assertion because of the corresponding assertion
+		 * in receive_read().  Keeping this assertion ensures that we do
+		 * not inadvertently break backwards compatibility (causing the
+		 * assertion in receive_read() to trigger on old software).
+		 *
+		 * Raw sends cannot be received on old software, and so can
+		 * bypass this assertion.
+		 */
+
+		ASSERT((payload_len % 8 == 0) ||
+		    (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW));
+
+		dscp->dsc_err = dso->dso_outfunc(dscp->dsc_os, payload,
+		    payload_len, dso->dso_arg);
+		if (dscp->dsc_err != 0)
 			return (SET_ERROR(EINTR));
 	}
 	return (0);
@@ -204,10 +342,11 @@
  * and freeobject records that were generated on the source.
  */
 static int
-dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
+dump_free(dmu_send_cookie_t *dscp, uint64_t object, uint64_t offset,
     uint64_t length)
 {
-	struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
+	KTRACE();
+	struct drr_free *drrf = &(dscp->dsc_drr->drr_u.drr_free);
 
 	/*
 	 * When we receive a free record, dbuf_free_range() assumes
@@ -222,87 +361,140 @@
 	 * another way to assert that the one-record constraint is still
 	 * satisfied.
 	 */
-	ASSERT(object > dsp->dsa_last_data_object ||
-	    (object == dsp->dsa_last_data_object &&
-	    offset > dsp->dsa_last_data_offset));
+	ASSERT(object > dscp->dsc_last_data_object ||
+	    (object == dscp->dsc_last_data_object &&
+	    offset > dscp->dsc_last_data_offset));
 
 	/*
 	 * If there is a pending op, but it's not PENDING_FREE, push it out,
 	 * since free block aggregation can only be done for blocks of the
 	 * same type (i.e., DRR_FREE records can only be aggregated with
 	 * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
-	 * aggregated with other DRR_FREEOBJECTS records.
+	 * aggregated with other DRR_FREEOBJECTS records).
 	 */
-	if (dsp->dsa_pending_op != PENDING_NONE &&
-	    dsp->dsa_pending_op != PENDING_FREE) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE &&
+	    dscp->dsc_pending_op != PENDING_FREE) {
+		KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
 
-	if (dsp->dsa_pending_op == PENDING_FREE) {
-		/*
-		 * There should never be a PENDING_FREE if length is
-		 * DMU_OBJECT_END (because dump_dnode is the only place where
-		 * this function is called with a DMU_OBJECT_END, and only after
-		 * flushing any pending record).
-		 */
-		ASSERT(length != DMU_OBJECT_END);
+	if (dscp->dsc_pending_op == PENDING_FREE) {
 		/*
 		 * Check to see whether this free block can be aggregated
 		 * with pending one.
 		 */
 		if (drrf->drr_object == object && drrf->drr_offset +
 		    drrf->drr_length == offset) {
-			if (offset + length < offset)
-				drrf->drr_length = DMU_OBJECT_END;
+			if (offset + length < offset || length == UINT64_MAX)
+				drrf->drr_length = UINT64_MAX;
 			else
 				drrf->drr_length += length;
 			return (0);
 		} else {
 			/* not a continuation.  Push out pending record */
-			if (dump_record(dsp, NULL, 0) != 0)
+			KDEBUG("calling dump_record");
+			if (dump_record(dscp, NULL, 0) != 0)
 				return (SET_ERROR(EINTR));
-			dsp->dsa_pending_op = PENDING_NONE;
+			dscp->dsc_pending_op = PENDING_NONE;
 		}
 	}
 	/* create a FREE record and make it pending */
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_FREE;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_FREE;
 	drrf->drr_object = object;
 	drrf->drr_offset = offset;
 	if (offset + length < offset)
 		drrf->drr_length = DMU_OBJECT_END;
 	else
 		drrf->drr_length = length;
-	drrf->drr_toguid = dsp->dsa_toguid;
+	drrf->drr_toguid = dscp->dsc_toguid;
 	if (length == DMU_OBJECT_END) {
-		if (dump_record(dsp, NULL, 0) != 0)
+		KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 	} else {
-		dsp->dsa_pending_op = PENDING_FREE;
+		dscp->dsc_pending_op = PENDING_FREE;
 	}
 
 	return (0);
 }
 
+/*
+ * Fill in the drr_redact struct, or perform aggregation if the previous record
+ * is also a redaction record, and the two are adjacent.
+ */
 static int
-dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object,
-    uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data)
+dump_redact(dmu_send_cookie_t *dscp, uint64_t object, uint64_t offset,
+    uint64_t length)
 {
+	struct drr_redact *drrr = &dscp->dsc_drr->drr_u.drr_redact;
+	KTRACE();
+
+	/*
+	 * If there is a pending op, but it's not PENDING_REDACT, push it out,
+	 * since free block aggregation can only be done for blocks of the
+	 * same type (i.e., DRR_REDACT records can only be aggregated with
+	 * other DRR_REDACT records).
+	 */
+	if (dscp->dsc_pending_op != PENDING_NONE &&
+	    dscp->dsc_pending_op != PENDING_REDACT) {
+		KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
+			return (SET_ERROR(EINTR));
+		dscp->dsc_pending_op = PENDING_NONE;
+	}
+
+	if (dscp->dsc_pending_op == PENDING_REDACT) {
+		/*
+		 * Check to see whether this redacted block can be aggregated
+		 * with pending one.
+		 */
+		if (drrr->drr_object == object && drrr->drr_offset +
+		    drrr->drr_length == offset) {
+			drrr->drr_length += length;
+			return (0);
+		} else {
+			/* not a continuation.  Push out pending record */
+			KDEBUG("calling dump_record");
+			if (dump_record(dscp, NULL, 0) != 0)
+				return (SET_ERROR(EINTR));
+			dscp->dsc_pending_op = PENDING_NONE;
+		}
+	}
+	/* create a REDACT record and make it pending */
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_REDACT;
+	drrr->drr_object = object;
+	drrr->drr_offset = offset;
+	drrr->drr_length = length;
+	drrr->drr_toguid = dscp->dsc_toguid;
+	dscp->dsc_pending_op = PENDING_REDACT;
+
+	return (0);
+}
+
+
+static int
+dmu_dump_write(dmu_send_cookie_t *dscp, dmu_object_type_t type, uint64_t object,
+    uint64_t offset, int lsize, int psize, const blkptr_t *bp,
+    boolean_t io_compressed, void *data)
+{
+	KTRACE();
 	uint64_t payload_size;
-	boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
-	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
+	boolean_t raw = (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW);
+	struct drr_write *drrw = &(dscp->dsc_drr->drr_u.drr_write);
 
 	/*
 	 * We send data in increasing object, offset order.
 	 * See comment in dump_free() for details.
 	 */
-	ASSERT(object > dsp->dsa_last_data_object ||
-	    (object == dsp->dsa_last_data_object &&
-	    offset > dsp->dsa_last_data_offset));
-	dsp->dsa_last_data_object = object;
-	dsp->dsa_last_data_offset = offset + lsize - 1;
+	ASSERT(object > dscp->dsc_last_data_object ||
+	    (object == dscp->dsc_last_data_object &&
+	    offset > dscp->dsc_last_data_offset));
+	dscp->dsc_last_data_object = object;
+	dscp->dsc_last_data_offset = offset + lsize - 1;
 
 	/*
 	 * If there is any kind of pending aggregation (currently either
@@ -310,22 +502,30 @@
 	 * the stream, since aggregation can't be done across operations
 	 * of different types.
 	 */
-	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE) {
+		KDEBUG("pending aggregation");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
 	/* write a WRITE record */
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_WRITE;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_WRITE;
 	drrw->drr_object = object;
 	drrw->drr_type = type;
 	drrw->drr_offset = offset;
-	drrw->drr_toguid = dsp->dsa_toguid;
+	drrw->drr_toguid = dscp->dsc_toguid;
 	drrw->drr_logical_size = lsize;
 
 	/* only set the compression fields if the buf is compressed or raw */
-	if (raw || lsize != psize) {
+	boolean_t compressed =
+	    (bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
+	    io_compressed : lsize != psize);
+	if (raw || compressed) {
+		ASSERT(bp != NULL);
+		KDEBUG("raw || compressed");
+		ASSERT(raw || dscp->dsc_featureflags &
+		    DMU_BACKUP_FEATURE_COMPRESSED);
 		ASSERT(!BP_IS_EMBEDDED(bp));
 		ASSERT3S(psize, >, 0);
 
@@ -344,8 +544,9 @@
 			    drrw->drr_iv);
 			zio_crypt_decode_mac_bp(bp, drrw->drr_mac);
 		} else {
+			KDEBUG("Not raw or compressed");
 			/* this is a compressed block */
-			ASSERT(dsp->dsa_featureflags &
+			ASSERT(dscp->dsc_featureflags &
 			    DMU_BACKUP_FEATURE_COMPRESSED);
 			ASSERT(!BP_SHOULD_BYTESWAP(bp));
 			ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
@@ -365,7 +566,7 @@
 		/*
 		 * There's no pre-computed checksum for partial-block writes,
 		 * embedded BP's, or encrypted BP's that are being sent as
-		 * plaintext, so (like fletcher4-checkummed blocks) userland
+		 * plaintext, so (like fletcher4-checksummed blocks) userland
 		 * will have to compute a dedup-capable checksum itself.
 		 */
 		drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
@@ -381,33 +582,45 @@
 		drrw->drr_key.ddk_cksum = bp->blk_cksum;
 	}
 
-	if (dump_record(dsp, data, payload_size) != 0)
-		return (SET_ERROR(EINTR));
+	if(! dscp->dsc_block_diff)
+	{
+		KDEBUG("calling dump_record");
+		if (dump_record(dscp, data, payload_size) != 0)
+			return (SET_ERROR(EINTR));
+	}
+	else
+	{
+		KDEBUG("calling dump_record=%ld", payload_size);
+		if (dump_record(dscp, data, payload_size) != 0)
+			return (SET_ERROR(EINTR));
+	}
 	return (0);
 }
 
 static int
-dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
+dump_write_embedded(dmu_send_cookie_t *dscp, uint64_t object, uint64_t offset,
     int blksz, const blkptr_t *bp)
 {
+	KTRACE();
 	char buf[BPE_PAYLOAD_SIZE];
 	struct drr_write_embedded *drrw =
-	    &(dsp->dsa_drr->drr_u.drr_write_embedded);
+	    &(dscp->dsc_drr->drr_u.drr_write_embedded);
 
-	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE) {
+		KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
 
 	ASSERT(BP_IS_EMBEDDED(bp));
 
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_WRITE_EMBEDDED;
 	drrw->drr_object = object;
 	drrw->drr_offset = offset;
 	drrw->drr_length = blksz;
-	drrw->drr_toguid = dsp->dsa_toguid;
+	drrw->drr_toguid = dscp->dsc_toguid;
 	drrw->drr_compression = BP_GET_COMPRESS(bp);
 	drrw->drr_etype = BPE_GET_ETYPE(bp);
 	drrw->drr_lsize = BPE_GET_LSIZE(bp);
@@ -415,39 +628,49 @@
 
 	decode_embedded_bp_compressed(bp, buf);
 
-	if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
+	uint32_t psize = drrw->drr_psize;
+	uint32_t rsize = P2ROUNDUP(psize, 8);
+
+	if (psize != rsize)
+		memset(buf + psize, 0, rsize - psize);
+
+	KDEBUG("calling dump_record");
+	if (dump_record(dscp, buf, rsize) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
 
 static int
-dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
+dump_spill(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object,
+    void *data)
 {
-	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
+	KTRACE();
+	struct drr_spill *drrs = &(dscp->dsc_drr->drr_u.drr_spill);
 	uint64_t blksz = BP_GET_LSIZE(bp);
 	uint64_t payload_size = blksz;
 
-	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE) {
+	KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
 
 	/* write a SPILL record */
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_SPILL;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_SPILL;
 	drrs->drr_object = object;
 	drrs->drr_length = blksz;
-	drrs->drr_toguid = dsp->dsa_toguid;
+	drrs->drr_toguid = dscp->dsc_toguid;
 
 	/* See comment in dump_dnode() for full details */
 	if (zfs_send_unmodified_spill_blocks &&
-	    (bp->blk_birth <= dsp->dsa_fromtxg)) {
+	    (bp->blk_birth <= dscp->dsc_fromtxg)) {
 		drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
 	}
 
 	/* handle raw send fields */
-	if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+	if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) {
 		ASSERT(BP_IS_PROTECTED(bp));
 
 		if (BP_SHOULD_BYTESWAP(bp))
@@ -459,17 +682,19 @@
 		payload_size = drrs->drr_compressed_size;
 	}
 
-	if (dump_record(dsp, data, payload_size) != 0)
+	KDEBUG("calling dump_record");
+	if (dump_record(dscp, data, payload_size) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
 
 static int
-dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
+dump_freeobjects(dmu_send_cookie_t *dscp, uint64_t firstobj, uint64_t numobjs)
 {
-	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
+	KTRACE();
+	struct drr_freeobjects *drrfo = &(dscp->dsc_drr->drr_u.drr_freeobjects);
 	uint64_t maxobj = DNODES_PER_BLOCK *
-	    (DMU_META_DNODE(dsp->dsa_os)->dn_maxblkid + 1);
+	    (DMU_META_DNODE(dscp->dsc_os)->dn_maxblkid + 1);
 
 	/*
 	 * ZoL < 0.7 does not handle large FREEOBJECTS records correctly,
@@ -478,7 +703,7 @@
 	 * receiving side.
 	 */
 	if (maxobj > 0) {
-		if (maxobj < firstobj)
+		if (maxobj <= firstobj)
 			return (0);
 
 		if (maxobj < firstobj + numobjs)
@@ -490,15 +715,17 @@
 	 * push it out, since free block aggregation can only be done for
 	 * blocks of the same type (i.e., DRR_FREE records can only be
 	 * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
-	 * can only be aggregated with other DRR_FREEOBJECTS records.
+	 * can only be aggregated with other DRR_FREEOBJECTS records).
 	 */
-	if (dsp->dsa_pending_op != PENDING_NONE &&
-	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE &&
+	    dscp->dsc_pending_op != PENDING_FREEOBJECTS) {
+	KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
-	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
+
+	if (dscp->dsc_pending_op == PENDING_FREEOBJECTS) {
 		/*
 		 * See whether this free object array can be aggregated
 		 * with pending one
@@ -507,33 +734,35 @@
 			drrfo->drr_numobjs += numobjs;
 			return (0);
 		} else {
+	KDEBUG("calling dump_record");
 			/* can't be aggregated.  Push out pending record */
-			if (dump_record(dsp, NULL, 0) != 0)
+			if (dump_record(dscp, NULL, 0) != 0)
 				return (SET_ERROR(EINTR));
-			dsp->dsa_pending_op = PENDING_NONE;
+			dscp->dsc_pending_op = PENDING_NONE;
 		}
 	}
 
 	/* write a FREEOBJECTS record */
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_FREEOBJECTS;
 	drrfo->drr_firstobj = firstobj;
 	drrfo->drr_numobjs = numobjs;
-	drrfo->drr_toguid = dsp->dsa_toguid;
+	drrfo->drr_toguid = dscp->dsc_toguid;
 
-	dsp->dsa_pending_op = PENDING_FREEOBJECTS;
+	dscp->dsc_pending_op = PENDING_FREEOBJECTS;
 
 	return (0);
 }
 
 static int
-dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
+dump_dnode(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object,
     dnode_phys_t *dnp)
 {
-	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+	KTRACE();
+	struct drr_object *drro = &(dscp->dsc_drr->drr_u.drr_object);
 	int bonuslen;
 
-	if (object < dsp->dsa_resume_object) {
+	if (object < dscp->dsc_resume_object) {
 		/*
 		 * Note: when resuming, we will visit all the dnodes in
 		 * the block of dnodes that we are resuming from.  In
@@ -541,23 +770,24 @@
 		 * the one we are resuming from.  We should be at most one
 		 * block's worth of dnodes behind the resume point.
 		 */
-		ASSERT3U(dsp->dsa_resume_object - object, <,
+		ASSERT3U(dscp->dsc_resume_object - object, <,
 		    1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT));
 		return (0);
 	}
 
 	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
-		return (dump_freeobjects(dsp, object, 1));
+		return (dump_freeobjects(dscp, object, 1));
 
-	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE) {
+	KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
 
 	/* write an OBJECT record */
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_OBJECT;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_OBJECT;
 	drro->drr_object = object;
 	drro->drr_type = dnp->dn_type;
 	drro->drr_bonustype = dnp->dn_bonustype;
@@ -566,15 +796,15 @@
 	drro->drr_dn_slots = dnp->dn_extra_slots + 1;
 	drro->drr_checksumtype = dnp->dn_checksum;
 	drro->drr_compress = dnp->dn_compress;
-	drro->drr_toguid = dsp->dsa_toguid;
+	drro->drr_toguid = dscp->dsc_toguid;
 
-	if (!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
+	if (!(dscp->dsc_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
 	    drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
 		drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
 
 	bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8);
 
-	if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW)) {
+	if ((dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW)) {
 		ASSERT(BP_IS_ENCRYPTED(bp));
 
 		if (BP_SHOULD_BYTESWAP(bp))
@@ -592,6 +822,8 @@
 		 * to send it.
 		 */
 		if (bonuslen != 0) {
+			if (drro->drr_bonuslen > DN_MAX_BONUS_LEN(dnp))
+				return (SET_ERROR(EINVAL));
 			drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp);
 			bonuslen = drro->drr_raw_bonuslen;
 		}
@@ -599,22 +831,23 @@
 
 	/*
 	 * DRR_OBJECT_SPILL is set for every dnode which references a
-	 * spill block.  This allows the receiving pool to definitively
+	 * spill block.	 This allows the receiving pool to definitively
 	 * determine when a spill block should be kept or freed.
 	 */
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
 		drro->drr_flags |= DRR_OBJECT_SPILL;
 
-	if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
+	KDEBUG("calling dump_record");
+	if (dump_record(dscp, DN_BONUS(dnp), bonuslen) != 0)
 		return (SET_ERROR(EINTR));
 
 	/* Free anything past the end of the file. */
-	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
+	if (dump_free(dscp, object, (dnp->dn_maxblkid + 1) *
 	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
 		return (SET_ERROR(EINTR));
 
 	/*
-	 * Send DRR_SPILL records for unmodified spill blocks.  This is useful
+	 * Send DRR_SPILL records for unmodified spill blocks.	This is useful
 	 * because changing certain attributes of the object (e.g. blocksize)
 	 * can cause old versions of ZFS to incorrectly remove a spill block.
 	 * Including these records in the stream forces an up to date version
@@ -624,63 +857,72 @@
 	 */
 	if (zfs_send_unmodified_spill_blocks &&
 	    (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
-	    (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
-		struct send_block_record record;
+	    (DN_SPILL_BLKPTR(dnp)->blk_birth <= dscp->dsc_fromtxg)) {
+		struct send_range record;
+		blkptr_t *bp = DN_SPILL_BLKPTR(dnp);
 
-		bzero(&record, sizeof (struct send_block_record));
+		bzero(&record, sizeof (struct send_range));
+		record.type = DATA;
+		record.object = object;
 		record.eos_marker = B_FALSE;
-		record.bp = *DN_SPILL_BLKPTR(dnp);
-		SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
-		    object, 0, DMU_SPILL_BLKID);
+		record.start_blkid = DMU_SPILL_BLKID;
+		record.end_blkid = record.start_blkid + 1;
+		record.sru.data.bp = *bp;
+		record.sru.data.obj_type = dnp->dn_type;
+		record.sru.data.datablksz = BP_GET_LSIZE(bp);
 
-		if (do_dump(dsp, &record) != 0)
+		if (do_dump(dscp, &record) != 0)
 			return (SET_ERROR(EINTR));
 	}
 
-	if (dsp->dsa_err != 0)
+	if (dscp->dsc_err != 0)
 		return (SET_ERROR(EINTR));
 
 	return (0);
 }
 
 static int
-dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj,
-    uint64_t numslots)
+dump_object_range(dmu_send_cookie_t *dscp, const blkptr_t *bp,
+    uint64_t firstobj, uint64_t numslots)
 {
 	struct drr_object_range *drror =
-	    &(dsp->dsa_drr->drr_u.drr_object_range);
+	    &(dscp->dsc_drr->drr_u.drr_object_range);
 
+	KTRACE();
 	/* we only use this record type for raw sends */
 	ASSERT(BP_IS_PROTECTED(bp));
-	ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
+	ASSERT(dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW);
 	ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
 	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE);
 	ASSERT0(BP_GET_LEVEL(bp));
 
-	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_record(dsp, NULL, 0) != 0)
+	if (dscp->dsc_pending_op != PENDING_NONE) {
+	KDEBUG("calling dump_record");
+		if (dump_record(dscp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
-		dsp->dsa_pending_op = PENDING_NONE;
+		dscp->dsc_pending_op = PENDING_NONE;
 	}
 
-	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
-	dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE;
+	bzero(dscp->dsc_drr, sizeof (dmu_replay_record_t));
+	dscp->dsc_drr->drr_type = DRR_OBJECT_RANGE;
 	drror->drr_firstobj = firstobj;
 	drror->drr_numslots = numslots;
-	drror->drr_toguid = dsp->dsa_toguid;
+	drror->drr_toguid = dscp->dsc_toguid;
 	if (BP_SHOULD_BYTESWAP(bp))
 		drror->drr_flags |= DRR_RAW_BYTESWAP;
 	zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv);
 	zio_crypt_decode_mac_bp(bp, drror->drr_mac);
 
-	if (dump_record(dsp, NULL, 0) != 0)
+	KDEBUG("calling dump_record");
+	if (dump_record(dscp, NULL, 0) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
 
 static boolean_t
-backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
+send_do_embed(const blkptr_t *bp, uint64_t featureflags)
 {
+	KTRACE();
 	if (!BP_IS_EMBEDDED(bp))
 		return (B_FALSE);
 
@@ -688,7 +930,15 @@
 	 * Compression function must be legacy, or explicitly enabled.
 	 */
 	if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS &&
-	    !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LZ4)))
+	    !(featureflags & DMU_BACKUP_FEATURE_LZ4)))
+		return (B_FALSE);
+
+	/*
+	 * If we have not set the ZSTD feature flag, we can't send ZSTD
+	 * compressed embedded blocks, as the receiver may not support them.
+	 */
+	if ((BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD &&
+	    !(featureflags & DMU_BACKUP_FEATURE_ZSTD)))
 		return (B_FALSE);
 
 	/*
@@ -696,7 +946,7 @@
 	 */
 	switch (BPE_GET_ETYPE(bp)) {
 	case BP_EMBEDDED_TYPE_DATA:
-		if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
+		if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
 			return (B_TRUE);
 		break;
 	default:
@@ -706,335 +956,1554 @@
 }
 
 /*
- * This is the callback function to traverse_dataset that acts as the worker
- * thread for dmu_send_impl.
- */
-/*ARGSUSED*/
-static int
-send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_phys_t *zb, const struct dnode_phys *dnp, void *arg)
-{
-	struct send_thread_arg *sta = arg;
-	struct send_block_record *record;
-	uint64_t record_size;
-	int err = 0;
-
-	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
-	    zb->zb_object >= sta->resume.zb_object);
-	ASSERT3P(sta->ds, !=, NULL);
-
-	if (sta->cancel)
-		return (SET_ERROR(EINTR));
-
-	if (bp == NULL) {
-		ASSERT3U(zb->zb_level, ==, ZB_DNODE_LEVEL);
-		return (0);
-	} else if (zb->zb_level < 0) {
-		return (0);
-	}
-
-	record = kmem_zalloc(sizeof (struct send_block_record), KM_SLEEP);
-	record->eos_marker = B_FALSE;
-	record->bp = *bp;
-	record->zb = *zb;
-	record->indblkshift = dnp->dn_indblkshift;
-	record->datablkszsec = dnp->dn_datablkszsec;
-	record_size = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
-	bqueue_enqueue(&sta->q, record, record_size);
-
-	return (err);
-}
-
-/*
- * This function kicks off the traverse_dataset.  It also handles setting the
- * error code of the thread in case something goes wrong, and pushes the End of
- * Stream record when the traverse_dataset call has finished.  If there is no
- * dataset to traverse, the thread immediately pushes End of Stream marker.
- */
-static void
-send_traverse_thread(void *arg)
-{
-	struct send_thread_arg *st_arg = arg;
-	int err;
-	struct send_block_record *data;
-	fstrans_cookie_t cookie = spl_fstrans_mark();
-
-	if (st_arg->ds != NULL) {
-		err = traverse_dataset_resume(st_arg->ds,
-		    st_arg->fromtxg, &st_arg->resume,
-		    st_arg->flags, send_cb, st_arg);
-
-		if (err != EINTR)
-			st_arg->error_code = err;
-	}
-	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
-	data->eos_marker = B_TRUE;
-	bqueue_enqueue(&st_arg->q, data, 1);
-	spl_fstrans_unmark(cookie);
-	thread_exit();
-}
-
-/*
  * This function actually handles figuring out what kind of record needs to be
- * dumped, reading the data (which has hopefully been prefetched), and calling
- * the appropriate helper function.
+ * dumped, and calling the appropriate helper function.  In most cases,
+ * the data has already been read by send_reader_thread().
  */
 static int
-do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
+do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
 {
-	dsl_dataset_t *ds = dmu_objset_ds(dsa->dsa_os);
-	const blkptr_t *bp = &data->bp;
-	const zbookmark_phys_t *zb = &data->zb;
-	uint8_t indblkshift = data->indblkshift;
-	uint16_t dblkszsec = data->datablkszsec;
-	spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
-	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
 	int err = 0;
-
-	ASSERT3U(zb->zb_level, >=, 0);
-
-	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
-	    zb->zb_object >= dsa->dsa_resume_object);
-
-	/*
-	 * All bps of an encrypted os should have the encryption bit set.
-	 * If this is not true it indicates tampering and we report an error.
-	 */
-	if (dsa->dsa_os->os_encrypted &&
-	    !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
-		spa_log_error(spa, zb);
-		zfs_panic_recover("unencrypted block in encrypted "
-		    "object set %llu", ds->ds_object);
-		return (SET_ERROR(EIO));
+	KTRACE();
+	switch (range->type) {
+	case OBJECT:
+		KDEBUG("OBJECT");
+		err = dump_dnode(dscp, &range->sru.object.bp, range->object,
+		    range->sru.object.dnp);
+		return (err);
+	case OBJECT_RANGE: {
+		KDEBUG("OBJECT_RANGE");
+		ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
+		if (!(dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW)) {
+			return (0);
+		}
+		uint64_t epb = BP_GET_LSIZE(&range->sru.object_range.bp) >>
+		    DNODE_SHIFT;
+		uint64_t firstobj = range->start_blkid * epb;
+		err = dump_object_range(dscp, &range->sru.object_range.bp,
+		    firstobj, epb);
+		break;
 	}
+	case REDACT: {
+		KDEBUG("REDACT");
+		struct srr *srrp = &range->sru.redact;
+		err = dump_redact(dscp, range->object, range->start_blkid *
+		    srrp->datablksz, (range->end_blkid - range->start_blkid) *
+		    srrp->datablksz);
+		return (err);
+	}
+	case DATA: {
+		KDEBUG("DATA");
+		struct srd *srdp = &range->sru.data;
+		blkptr_t *bp = &srdp->bp;
+		spa_t *spa =
+		    dmu_objset_spa(dscp->dsc_os);
 
-	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
-	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
-		return (0);
-	} else if (BP_IS_HOLE(bp) &&
-	    zb->zb_object == DMU_META_DNODE_OBJECT) {
-		uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
-		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
-		err = dump_freeobjects(dsa, dnobj, span >> DNODE_SHIFT);
-	} else if (BP_IS_HOLE(bp)) {
-		uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
-		uint64_t offset = zb->zb_blkid * span;
-		/* Don't dump free records for offsets > DMU_OBJECT_END */
-		if (zb->zb_blkid == 0 || span <= DMU_OBJECT_END / zb->zb_blkid)
-			err = dump_free(dsa, zb->zb_object, offset, span);
-	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
-		return (0);
-	} else if (type == DMU_OT_DNODE) {
-		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
-		arc_flags_t aflags = ARC_FLAG_WAIT;
-		arc_buf_t *abuf;
-		enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+		ASSERT3U(srdp->datablksz, ==, BP_GET_LSIZE(bp));
+		ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
+		if (BP_GET_TYPE(bp) == DMU_OT_SA) {
+			arc_flags_t aflags = ARC_FLAG_WAIT;
+			enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
 
-		if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
-			ASSERT(BP_IS_ENCRYPTED(bp));
-			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
-			zioflags |= ZIO_FLAG_RAW;
-		}
-
-		ASSERT0(zb->zb_level);
-
-		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
-		    ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
-			return (SET_ERROR(EIO));
-
-		dnode_phys_t *blk = abuf->b_data;
-		uint64_t dnobj = zb->zb_blkid * epb;
-
-		/*
-		 * Raw sends require sending encryption parameters for the
-		 * block of dnodes. Regular sends do not need to send this
-		 * info.
-		 */
-		if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
-			ASSERT(arc_is_encrypted(abuf));
-			err = dump_object_range(dsa, bp, dnobj, epb);
-		}
-
-		if (err == 0) {
-			for (int i = 0; i < epb;
-			    i += blk[i].dn_extra_slots + 1) {
-				err = dump_dnode(dsa, bp, dnobj + i, blk + i);
-				if (err != 0)
-					break;
+			if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) {
+				ASSERT(BP_IS_PROTECTED(bp));
+				zioflags |= ZIO_FLAG_RAW;
 			}
-		}
-		arc_buf_destroy(abuf, &abuf);
-	} else if (type == DMU_OT_SA) {
-		arc_flags_t aflags = ARC_FLAG_WAIT;
-		arc_buf_t *abuf;
-		enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
 
-		if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
-			ASSERT(BP_IS_PROTECTED(bp));
-			zioflags |= ZIO_FLAG_RAW;
-		}
+			zbookmark_phys_t zb;
+			ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID);
+			zb.zb_objset = dmu_objset_id(dscp->dsc_os);
+			zb.zb_object = range->object;
+			zb.zb_level = 0;
+			zb.zb_blkid = range->start_blkid;
 
-		if (dsa->block_diff) {
-			err = dump_spill(dsa, bp, zb->zb_object, NULL);
-		} else {
-			if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
-			    ZIO_PRIORITY_ASYNC_READ, zioflags,
-			    &aflags, zb) != 0)
-				return (SET_ERROR(EIO));
-			err = dump_spill (dsa, bp, zb->zb_object, abuf->b_data);
-			arc_buf_destroy(abuf, &abuf);
-		}
+			arc_buf_t *abuf = NULL;
 
-	} else if (backup_do_embed(dsa, bp)) {
-		/* it's an embedded level-0 block of a regular object */
-		int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
-		ASSERT0(zb->zb_level);
-		err = dump_write_embedded(dsa, zb->zb_object,
-		    zb->zb_blkid * blksz, blksz, bp);
-	} else {
-		/* it's a level-0 block of a regular object */
-		arc_flags_t aflags = ARC_FLAG_WAIT;
-		arc_buf_t *abuf;
-		int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
-		uint64_t offset;
+			if( dscp->dsc_block_diff) {
+				err = dump_spill(dscp, bp, zb.zb_object,NULL);
+			}
+			else {
+				if (!dscp->dsc_dso->dso_dryrun && arc_read(NULL, spa,
+			    		bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
+			    		zioflags, &aflags, &zb) != 0)
+					return (SET_ERROR(EIO));
 
-		/*
-		 * If we have large blocks stored on disk but the send flags
-		 * don't allow us to send large blocks, we split the data from
-		 * the arc buf into chunks.
-		 */
-		boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE &&
-		    !(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
-
-		/*
-		 * Raw sends require that we always get raw data as it exists
-		 * on disk, so we assert that we are not splitting blocks here.
-		 */
-		boolean_t request_raw =
-		    (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0;
-
-		/*
-		 * We should only request compressed data from the ARC if all
-		 * the following are true:
-		 *  - stream compression was requested
-		 *  - we aren't splitting large blocks into smaller chunks
-		 *  - the data won't need to be byteswapped before sending
-		 *  - this isn't an embedded block
-		 *  - this isn't metadata (if receiving on a different endian
-		 *    system it can be byteswapped more easily)
-		 */
-		boolean_t request_compressed =
-		    (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED) &&
-		    !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
-		    !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
-
-		IMPLY(request_raw, !split_large_blocks);
-		IMPLY(request_raw, BP_IS_PROTECTED(bp));
-		ASSERT0(zb->zb_level);
-		ASSERT(zb->zb_object > dsa->dsa_resume_object ||
-		    (zb->zb_object == dsa->dsa_resume_object &&
-		    zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
-
-		ASSERT3U(blksz, ==, BP_GET_LSIZE(bp));
-
-		enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
-		if (request_raw)
-			zioflags |= ZIO_FLAG_RAW;
-		else if (request_compressed)
-			zioflags |= ZIO_FLAG_RAW_COMPRESS;
-
-		offset = zb->zb_blkid * blksz;
-
-		if (dsa->block_diff) {
-			/* bug-91372 
-			* for blockdiff case, we just need to know there was a write here
-			* actual data read will be skipped inside dump_write->dump_record
-			* thus, psize can be faked, using lsize to indicate "no compression"
-			* otherwise, doing arc_read (which also determines psize) incurs full data
-			* read and breaks the expense expectation of bitmap generation.
-			*/
-			int fake_psize = blksz; 
-			err = dump_write(dsa, type, zb->zb_object, offset,
-					blksz, fake_psize, bp,
-					NULL);
-			ASSERT(err == 0 || err == EINTR);
+				err = dump_spill(dscp, bp, zb.zb_object,
+			    	(abuf == NULL ? NULL : abuf->b_data));
+			}
+			if (abuf != NULL)
+				arc_buf_destroy(abuf, &abuf);
 			return (err);
 		}
+		if (send_do_embed(bp, dscp->dsc_featureflags)) {
+			err = dump_write_embedded(dscp, range->object,
+			    range->start_blkid * srdp->datablksz,
+			    srdp->datablksz, bp);
+			return (err);
+		}
+		ASSERT(range->object > dscp->dsc_resume_object ||
+		    (range->object == dscp->dsc_resume_object &&
+		    range->start_blkid * srdp->datablksz >=
+		    dscp->dsc_resume_offset));
+		/* it's a level-0 block of a regular object */
 
-		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
-		    ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) {
-			if (zfs_send_corrupt_data) {
-				/* Send a block filled with 0x"zfs badd bloc" */
-				abuf = arc_alloc_buf(spa, &abuf, ARC_BUFC_DATA,
-				    blksz);
+		mutex_enter(&srdp->lock);
+		while (srdp->io_outstanding)
+			cv_wait(&srdp->cv, &srdp->lock);
+		err = srdp->io_err;
+		mutex_exit(&srdp->lock);
+
+		if (err != 0) {
+			if (zfs_send_corrupt_data &&
+			    !dscp->dsc_dso->dso_dryrun) {
+				/*
+				 * Send a block filled with 0x"zfs badd bloc"
+				 */
+				srdp->abuf = arc_alloc_buf(spa, &srdp->abuf,
+				    ARC_BUFC_DATA, srdp->datablksz);
 				uint64_t *ptr;
-				for (ptr = abuf->b_data;
-				    (char *)ptr < (char *)abuf->b_data + blksz;
-				    ptr++)
+				for (ptr = srdp->abuf->b_data;
+				    (char *)ptr < (char *)srdp->abuf->b_data +
+				    srdp->datablksz; ptr++)
 					*ptr = 0x2f5baddb10cULL;
 			} else {
 				return (SET_ERROR(EIO));
 			}
 		}
 
-		if (split_large_blocks) {
-			ASSERT0(arc_is_encrypted(abuf));
-			ASSERT3U(arc_get_compression(abuf), ==,
-			    ZIO_COMPRESS_OFF);
-			char *buf = abuf->b_data;
-			while (blksz > 0 && err == 0) {
-				int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE);
-				err = dump_write(dsa, type, zb->zb_object,
-				    offset, n, n, NULL, buf);
+		ASSERT(dscp->dsc_dso->dso_dryrun ||
+		    srdp->abuf != NULL || srdp->abd != NULL);
+
+		uint64_t offset = range->start_blkid * srdp->datablksz;
+
+		char *data = NULL;
+		if (srdp->abd != NULL) {
+			data = abd_to_buf(srdp->abd);
+			ASSERT3P(srdp->abuf, ==, NULL);
+		} else if (srdp->abuf != NULL) {
+			data = srdp->abuf->b_data;
+		}
+
+		/*
+		 * If we have large blocks stored on disk but the send flags
+		 * don't allow us to send large blocks, we split the data from
+		 * the arc buf into chunks.
+		 */
+		if (srdp->datablksz > SPA_OLD_MAXBLOCKSIZE &&
+		    !(dscp->dsc_featureflags &
+		    DMU_BACKUP_FEATURE_LARGE_BLOCKS)) {
+			while (srdp->datablksz > 0 && err == 0) {
+				int n = MIN(srdp->datablksz,
+				    SPA_OLD_MAXBLOCKSIZE);
+				err = dmu_dump_write(dscp, srdp->obj_type,
+				    range->object, offset, n, n, NULL, B_FALSE,
+				    data);
 				offset += n;
-				buf += n;
-				blksz -= n;
+				/*
+				 * When doing dry run, data==NULL is used as a
+				 * sentinel value by
+				 * dmu_dump_write()->dump_record().
+				 */
+				if (data != NULL)
+					data += n;
+				srdp->datablksz -= n;
 			}
 		} else {
-			err = dump_write(dsa, type, zb->zb_object, offset,
-			    blksz, arc_buf_size(abuf), bp, abuf->b_data);
-		}
-		arc_buf_destroy(abuf, &abuf);
-	}
+			
+			if(dscp->dsc_block_diff)
+			{
+				KDEBUG("doing block diff");
+				int fake_psize = srdp->datablksz;
+				KDEBUG("dmu_dump_write %ld vs %ld", fake_psize, srdp->datasz);
 
-	ASSERT(err == 0 || err == EINTR);
+				err = dmu_dump_write(dscp, srdp->obj_type,
+			    		range->object, offset,
+			    		srdp->datablksz, fake_psize, bp,
+			    		0, NULL);
+			        ASSERT(err == 0 || err == EINTR);
+            			return (err);
+			}
+			else
+			{
+				KDEBUG("not doing block diff");
+				err = dmu_dump_write(dscp, srdp->obj_type,
+			    		range->object, offset,
+			    		srdp->datablksz, srdp->datasz, bp,
+			    		srdp->io_compressed, data);
+			}
+		}
+		return (err);
+	}
+	case HOLE: {
+		struct srh *srhp = &range->sru.hole;
+		if (range->object == DMU_META_DNODE_OBJECT) {
+			uint32_t span = srhp->datablksz >> DNODE_SHIFT;
+			uint64_t first_obj = range->start_blkid * span;
+			uint64_t numobj = range->end_blkid * span - first_obj;
+			return (dump_freeobjects(dscp, first_obj, numobj));
+		}
+		uint64_t offset = 0;
+
+		/*
+		 * If this multiply overflows, we don't need to send this block.
+		 * Even if it has a birth time, it can never not be a hole, so
+		 * we don't need to send records for it.
+		 */
+		if (!overflow_multiply(range->start_blkid, srhp->datablksz,
+		    &offset)) {
+			return (0);
+		}
+		uint64_t len = 0;
+
+		if (!overflow_multiply(range->end_blkid, srhp->datablksz, &len))
+			len = UINT64_MAX;
+		len = len - offset;
+		return (dump_free(dscp, range->object, offset, len));
+	}
+	default:
+		panic("Invalid range type in do_dump: %d", range->type);
+	}
 	return (err);
 }
 
-/*
- * Pop the new data off the queue, and free the old data.
- */
-static struct send_block_record *
-get_next_record(bqueue_t *bq, struct send_block_record *data)
+static struct send_range *
+range_alloc(enum type type, uint64_t object, uint64_t start_blkid,
+    uint64_t end_blkid, boolean_t eos)
 {
-	struct send_block_record *tmp = bqueue_dequeue(bq);
-	kmem_free(data, sizeof (*data));
-	return (tmp);
+	struct send_range *range = kmem_alloc(sizeof (*range), KM_SLEEP);
+	KTRACE();
+
+	range->type = type;
+	range->object = object;
+	range->start_blkid = start_blkid;
+	range->end_blkid = end_blkid;
+	range->eos_marker = eos;
+	KTRACE();
+	if (type == DATA) {
+		range->sru.data.abd = NULL;
+		range->sru.data.abuf = NULL;
+		mutex_init(&range->sru.data.lock, NULL, MUTEX_DEFAULT, NULL);
+		cv_init(&range->sru.data.cv, NULL, CV_DEFAULT, NULL);
+		range->sru.data.io_outstanding = 0;
+		range->sru.data.io_err = 0;
+		range->sru.data.io_compressed = B_FALSE;
+	}
+	return (range);
+}
+
+/*
+ * This is the callback function to traverse_dataset that acts as a worker
+ * thread for dmu_send_impl.
+ */
+static int
+send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+    const zbookmark_phys_t *zb, const struct dnode_phys *dnp, void *arg)
+{
+	(void) zilog;
+	struct send_thread_arg *sta = arg;
+	struct send_range *record;
+	KTRACE();
+
+	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+	    zb->zb_object >= sta->resume.zb_object);
+
+	/*
+	 * All bps of an encrypted os should have the encryption bit set.
+	 * If this is not true it indicates tampering and we report an error.
+	 */
+	if (sta->os->os_encrypted &&
+	    !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
+		spa_log_error(spa, zb);
+		zfs_panic_recover("unencrypted block in encrypted "
+		    "object set %llu", dmu_objset_id(sta->os));
+		return (SET_ERROR(EIO));
+	}
+
+	if (sta->cancel)
+		return (SET_ERROR(EINTR));
+	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
+	    DMU_OBJECT_IS_SPECIAL(zb->zb_object))
+		return (0);
+	atomic_inc_64(sta->num_blocks_visited);
+
+	if (zb->zb_level == ZB_DNODE_LEVEL) {
+		if (zb->zb_object == DMU_META_DNODE_OBJECT)
+			return (0);
+		record = range_alloc(OBJECT, zb->zb_object, 0, 0, B_FALSE);
+		record->sru.object.bp = *bp;
+		size_t size  = sizeof (*dnp) * (dnp->dn_extra_slots + 1);
+		record->sru.object.dnp = kmem_alloc(size, KM_SLEEP);
+		bcopy(dnp, record->sru.object.dnp, size);
+		bqueue_enqueue(&sta->q, record, sizeof (*record));
+		return (0);
+	}
+	if (zb->zb_level == 0 && zb->zb_object == DMU_META_DNODE_OBJECT &&
+	    !BP_IS_HOLE(bp)) {
+		record = range_alloc(OBJECT_RANGE, 0, zb->zb_blkid,
+		    zb->zb_blkid + 1, B_FALSE);
+		record->sru.object_range.bp = *bp;
+		bqueue_enqueue(&sta->q, record, sizeof (*record));
+		return (0);
+	}
+	if (zb->zb_level < 0 || (zb->zb_level > 0 && !BP_IS_HOLE(bp)))
+		return (0);
+	if (zb->zb_object == DMU_META_DNODE_OBJECT && !BP_IS_HOLE(bp))
+		return (0);
+
+	uint64_t span = bp_span_in_blocks(dnp->dn_indblkshift, zb->zb_level);
+	uint64_t start;
+
+	/*
+	 * If this multiply overflows, we don't need to send this block.
+	 * Even if it has a birth time, it can never not be a hole, so
+	 * we don't need to send records for it.
+	 */
+	if (!overflow_multiply(span, zb->zb_blkid, &start) || (!(zb->zb_blkid ==
+	    DMU_SPILL_BLKID || DMU_OT_IS_METADATA(dnp->dn_type)) &&
+	    span * zb->zb_blkid > dnp->dn_maxblkid)) {
+		ASSERT(BP_IS_HOLE(bp));
+		return (0);
+	}
+
+	if (zb->zb_blkid == DMU_SPILL_BLKID)
+		ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_SA);
+
+	enum type record_type = DATA;
+	if (BP_IS_HOLE(bp))
+		record_type = HOLE;
+	else if (BP_IS_REDACTED(bp))
+		record_type = REDACT;
+	else
+		record_type = DATA;
+
+	record = range_alloc(record_type, zb->zb_object, start,
+	    (start + span < start ? 0 : start + span), B_FALSE);
+
+	uint64_t datablksz = (zb->zb_blkid == DMU_SPILL_BLKID ?
+	    BP_GET_LSIZE(bp) : dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
+
+	if (BP_IS_HOLE(bp)) {
+		record->sru.hole.datablksz = datablksz;
+	} else if (BP_IS_REDACTED(bp)) {
+		record->sru.redact.datablksz = datablksz;
+	} else {
+		record->sru.data.datablksz = datablksz;
+		record->sru.data.obj_type = dnp->dn_type;
+		record->sru.data.bp = *bp;
+	}
+
+	bqueue_enqueue(&sta->q, record, sizeof (*record));
+	return (0);
+}
+
+struct redact_list_cb_arg {
+	uint64_t *num_blocks_visited;
+	bqueue_t *q;
+	boolean_t *cancel;
+	boolean_t mark_redact;
+};
+
+static int
+redact_list_cb(redact_block_phys_t *rb, void *arg)
+{
+	struct redact_list_cb_arg *rlcap = arg;
+	KTRACE();
+
+	atomic_inc_64(rlcap->num_blocks_visited);
+	if (*rlcap->cancel)
+		return (-1);
+
+	struct send_range *data = range_alloc(REDACT, rb->rbp_object,
+	    rb->rbp_blkid, rb->rbp_blkid + redact_block_get_count(rb), B_FALSE);
+	ASSERT3U(data->end_blkid, >, rb->rbp_blkid);
+	if (rlcap->mark_redact) {
+		data->type = REDACT;
+		data->sru.redact.datablksz = redact_block_get_size(rb);
+	} else {
+		data->type = PREVIOUSLY_REDACTED;
+	}
+	bqueue_enqueue(rlcap->q, data, sizeof (*data));
+
+	return (0);
+}
+
+/*
+ * This function kicks off the traverse_dataset.  It also handles setting the
+ * error code of the thread in case something goes wrong, and pushes the End of
+ * Stream record when the traverse_dataset call has finished.
+ */
+static void
+send_traverse_thread(void *arg)
+{
+	KTRACE();
+	struct send_thread_arg *st_arg = arg;
+	int err = 0;
+	struct send_range *data;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+
+	err = traverse_dataset_resume(st_arg->os->os_dsl_dataset,
+	    st_arg->fromtxg, &st_arg->resume,
+	    st_arg->flags, send_cb, st_arg);
+
+	if (err != EINTR)
+		st_arg->error_code = err;
+	data = range_alloc(DATA, 0, 0, 0, B_TRUE);
+	bqueue_enqueue_flush(&st_arg->q, data, sizeof (*data));
+	spl_fstrans_unmark(cookie);
+	thread_exit();
+}
+
+/*
+ * Utility function that causes End of Stream records to compare after of all
+ * others, so that other threads' comparison logic can stay simple.
+ */
+static int __attribute__((unused))
+send_range_after(const struct send_range *from, const struct send_range *to)
+{
+	KTRACE();
+	if (from->eos_marker == B_TRUE)
+		return (1);
+	if (to->eos_marker == B_TRUE)
+		return (-1);
+
+	uint64_t from_obj = from->object;
+	uint64_t from_end_obj = from->object + 1;
+	uint64_t to_obj = to->object;
+	uint64_t to_end_obj = to->object + 1;
+	if (from_obj == 0) {
+		ASSERT(from->type == HOLE || from->type == OBJECT_RANGE);
+		from_obj = from->start_blkid << DNODES_PER_BLOCK_SHIFT;
+		from_end_obj = from->end_blkid << DNODES_PER_BLOCK_SHIFT;
+	}
+	if (to_obj == 0) {
+		ASSERT(to->type == HOLE || to->type == OBJECT_RANGE);
+		to_obj = to->start_blkid << DNODES_PER_BLOCK_SHIFT;
+		to_end_obj = to->end_blkid << DNODES_PER_BLOCK_SHIFT;
+	}
+
+	if (from_end_obj <= to_obj)
+		return (-1);
+	if (from_obj >= to_end_obj)
+		return (1);
+	int64_t cmp = TREE_CMP(to->type == OBJECT_RANGE, from->type ==
+	    OBJECT_RANGE);
+	if (unlikely(cmp))
+		return (cmp);
+	cmp = TREE_CMP(to->type == OBJECT, from->type == OBJECT);
+	if (unlikely(cmp))
+		return (cmp);
+	if (from->end_blkid <= to->start_blkid)
+		return (-1);
+	if (from->start_blkid >= to->end_blkid)
+		return (1);
+	return (0);
+}
+
+/*
+ * Pop the new data off the queue, check that the records we receive are in
+ * the right order, but do not free the old data.  This is used so that the
+ * records can be sent on to the main thread without copying the data.
+ */
+static struct send_range *
+get_next_range_nofree(bqueue_t *bq, struct send_range *prev)
+{
+	KTRACE();
+	struct send_range *next = bqueue_dequeue(bq);
+	ASSERT3S(send_range_after(prev, next), ==, -1);
+	return (next);
+}
+
+/*
+ * Pop the new data off the queue, check that the records we receive are in
+ * the right order, and free the old data.
+ */
+static struct send_range *
+get_next_range(bqueue_t *bq, struct send_range *prev)
+{
+	KTRACE();
+	struct send_range *next = get_next_range_nofree(bq, prev);
+	range_free(prev);
+	return (next);
+}
+
+static void
+redact_list_thread(void *arg)
+{
+	struct redact_list_thread_arg *rlt_arg = arg;
+	struct send_range *record;
+	KTRACE();
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+	if (rlt_arg->rl != NULL) {
+		struct redact_list_cb_arg rlcba = {0};
+		rlcba.cancel = &rlt_arg->cancel;
+		rlcba.q = &rlt_arg->q;
+		rlcba.num_blocks_visited = rlt_arg->num_blocks_visited;
+		rlcba.mark_redact = rlt_arg->mark_redact;
+		int err = dsl_redaction_list_traverse(rlt_arg->rl,
+		    &rlt_arg->resume, redact_list_cb, &rlcba);
+		if (err != EINTR)
+			rlt_arg->error_code = err;
+	}
+	record = range_alloc(DATA, 0, 0, 0, B_TRUE);
+	bqueue_enqueue_flush(&rlt_arg->q, record, sizeof (*record));
+	spl_fstrans_unmark(cookie);
+
+	thread_exit();
+}
+
+/*
+ * Compare the start point of the two provided ranges. End of stream ranges
+ * compare last, objects compare before any data or hole inside that object and
+ * multi-object holes that start at the same object.
+ */
+static int
+send_range_start_compare(struct send_range *r1, struct send_range *r2)
+{
+	uint64_t r1_objequiv = r1->object;
+	uint64_t r1_l0equiv = r1->start_blkid;
+	uint64_t r2_objequiv = r2->object;
+	uint64_t r2_l0equiv = r2->start_blkid;
+	KTRACE();
+	int64_t cmp = TREE_CMP(r1->eos_marker, r2->eos_marker);
+	if (unlikely(cmp))
+		return (cmp);
+	if (r1->object == 0) {
+		r1_objequiv = r1->start_blkid * DNODES_PER_BLOCK;
+		r1_l0equiv = 0;
+	}
+	if (r2->object == 0) {
+		r2_objequiv = r2->start_blkid * DNODES_PER_BLOCK;
+		r2_l0equiv = 0;
+	}
+
+	cmp = TREE_CMP(r1_objequiv, r2_objequiv);
+	if (likely(cmp))
+		return (cmp);
+	cmp = TREE_CMP(r2->type == OBJECT_RANGE, r1->type == OBJECT_RANGE);
+	if (unlikely(cmp))
+		return (cmp);
+	cmp = TREE_CMP(r2->type == OBJECT, r1->type == OBJECT);
+	if (unlikely(cmp))
+		return (cmp);
+
+	return (TREE_CMP(r1_l0equiv, r2_l0equiv));
+}
+
+enum q_idx {
+	REDACT_IDX = 0,
+	TO_IDX,
+	FROM_IDX,
+	NUM_THREADS
+};
+
+/*
+ * This function returns the next range the send_merge_thread should operate on.
+ * The inputs are two arrays; the first one stores the range at the front of the
+ * queues stored in the second one.  The ranges are sorted in descending
+ * priority order; the metadata from earlier ranges overrules metadata from
+ * later ranges.  out_mask is used to return which threads the ranges came from;
+ * bit i is set if ranges[i] started at the same place as the returned range.
+ *
+ * This code is not hardcoded to compare a specific number of threads; it could
+ * be used with any number, just by changing the q_idx enum.
+ *
+ * The "next range" is the one with the earliest start; if two starts are equal,
+ * the highest-priority range is the next to operate on.  If a higher-priority
+ * range starts in the middle of the first range, then the first range will be
+ * truncated to end where the higher-priority range starts, and we will operate
+ * on that one next time.   In this way, we make sure that each block covered by
+ * some range gets covered by a returned range, and each block covered is
+ * returned using the metadata of the highest-priority range it appears in.
+ *
+ * For example, if the three ranges at the front of the queues were [2,4),
+ * [3,5), and [1,3), then the ranges returned would be [1,2) with the metadata
+ * from the third range, [2,4) with the metadata from the first range, and then
+ * [4,5) with the metadata from the second.
+ */
+static struct send_range *
+find_next_range(struct send_range **ranges, bqueue_t **qs, uint64_t *out_mask)
+{
+	int idx = 0; // index of the range with the earliest start
+	int i;
+	uint64_t bmask = 0;
+	KTRACE();
+	for (i = 1; i < NUM_THREADS; i++) {
+		if (send_range_start_compare(ranges[i], ranges[idx]) < 0)
+			idx = i;
+	}
+	if (ranges[idx]->eos_marker) {
+		struct send_range *ret = range_alloc(DATA, 0, 0, 0, B_TRUE);
+		*out_mask = 0;
+		return (ret);
+	}
+	/*
+	 * Find all the ranges that start at that same point.
+	 */
+	for (i = 0; i < NUM_THREADS; i++) {
+		if (send_range_start_compare(ranges[i], ranges[idx]) == 0)
+			bmask |= 1 << i;
+	}
+	*out_mask = bmask;
+	/*
+	 * OBJECT_RANGE records only come from the TO thread, and should always
+	 * be treated as overlapping with nothing and sent on immediately.  They
+	 * are only used in raw sends, and are never redacted.
+	 */
+	if (ranges[idx]->type == OBJECT_RANGE) {
+		ASSERT3U(idx, ==, TO_IDX);
+		ASSERT3U(*out_mask, ==, 1 << TO_IDX);
+		struct send_range *ret = ranges[idx];
+		ranges[idx] = get_next_range_nofree(qs[idx], ranges[idx]);
+		return (ret);
+	}
+	/*
+	 * Find the first start or end point after the start of the first range.
+	 */
+	uint64_t first_change = ranges[idx]->end_blkid;
+	for (i = 0; i < NUM_THREADS; i++) {
+		if (i == idx || ranges[i]->eos_marker ||
+		    ranges[i]->object > ranges[idx]->object ||
+		    ranges[i]->object == DMU_META_DNODE_OBJECT)
+			continue;
+		ASSERT3U(ranges[i]->object, ==, ranges[idx]->object);
+		if (first_change > ranges[i]->start_blkid &&
+		    (bmask & (1 << i)) == 0)
+			first_change = ranges[i]->start_blkid;
+		else if (first_change > ranges[i]->end_blkid)
+			first_change = ranges[i]->end_blkid;
+	}
+	/*
+	 * Update all ranges to no longer overlap with the range we're
+	 * returning. All such ranges must start at the same place as the range
+	 * being returned, and end at or after first_change. Thus we update
+	 * their start to first_change. If that makes them size 0, then free
+	 * them and pull a new range from that thread.
+	 */
+	for (i = 0; i < NUM_THREADS; i++) {
+		if (i == idx || (bmask & (1 << i)) == 0)
+			continue;
+		ASSERT3U(first_change, >, ranges[i]->start_blkid);
+		ranges[i]->start_blkid = first_change;
+		ASSERT3U(ranges[i]->start_blkid, <=, ranges[i]->end_blkid);
+		if (ranges[i]->start_blkid == ranges[i]->end_blkid)
+			ranges[i] = get_next_range(qs[i], ranges[i]);
+	}
+	/*
+	 * Short-circuit the simple case; if the range doesn't overlap with
+	 * anything else, or it only overlaps with things that start at the same
+	 * place and are longer, send it on.
+	 */
+	if (first_change == ranges[idx]->end_blkid) {
+		struct send_range *ret = ranges[idx];
+		ranges[idx] = get_next_range_nofree(qs[idx], ranges[idx]);
+		return (ret);
+	}
+
+	/*
+	 * Otherwise, return a truncated copy of ranges[idx] and move the start
+	 * of ranges[idx] back to first_change.
+	 */
+	struct send_range *ret = kmem_alloc(sizeof (*ret), KM_SLEEP);
+	*ret = *ranges[idx];
+	ret->end_blkid = first_change;
+	ranges[idx]->start_blkid = first_change;
+	return (ret);
+}
+
+#define	FROM_AND_REDACT_BITS ((1 << REDACT_IDX) | (1 << FROM_IDX))
+
+/*
+ * Merge the results from the from thread and the to thread, and then hand the
+ * records off to send_prefetch_thread to prefetch them.  If this is not a
+ * send from a redaction bookmark, the from thread will push an end of stream
+ * record and stop, and we'll just send everything that was changed in the
+ * to_ds since the ancestor's creation txg. If it is, then since
+ * traverse_dataset has a canonical order, we can compare each change as
+ * they're pulled off the queues.  That will give us a stream that is
+ * appropriately sorted, and covers all records.  In addition, we pull the
+ * data from the redact_list_thread and use that to determine which blocks
+ * should be redacted.
+ */
+static void
+send_merge_thread(void *arg)
+{
+	struct send_merge_thread_arg *smt_arg = arg;
+	struct send_range *front_ranges[NUM_THREADS];
+	bqueue_t *queues[NUM_THREADS];
+	int err = 0;
+	KTRACE();
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+
+	if (smt_arg->redact_arg == NULL) {
+		front_ranges[REDACT_IDX] =
+		    kmem_zalloc(sizeof (struct send_range), KM_SLEEP);
+		front_ranges[REDACT_IDX]->eos_marker = B_TRUE;
+		front_ranges[REDACT_IDX]->type = REDACT;
+		queues[REDACT_IDX] = NULL;
+	} else {
+		front_ranges[REDACT_IDX] =
+		    bqueue_dequeue(&smt_arg->redact_arg->q);
+		queues[REDACT_IDX] = &smt_arg->redact_arg->q;
+	}
+	front_ranges[TO_IDX] = bqueue_dequeue(&smt_arg->to_arg->q);
+	queues[TO_IDX] = &smt_arg->to_arg->q;
+	front_ranges[FROM_IDX] = bqueue_dequeue(&smt_arg->from_arg->q);
+	queues[FROM_IDX] = &smt_arg->from_arg->q;
+	uint64_t mask = 0;
+	struct send_range *range;
+	for (range = find_next_range(front_ranges, queues, &mask);
+	    !range->eos_marker && err == 0 && !smt_arg->cancel;
+	    range = find_next_range(front_ranges, queues, &mask)) {
+		/*
+		 * If the range in question was in both the from redact bookmark
+		 * and the bookmark we're using to redact, then don't send it.
+		 * It's already redacted on the receiving system, so a redaction
+		 * record would be redundant.
+		 */
+		if ((mask & FROM_AND_REDACT_BITS) == FROM_AND_REDACT_BITS) {
+			ASSERT3U(range->type, ==, REDACT);
+			range_free(range);
+			continue;
+		}
+		bqueue_enqueue(&smt_arg->q, range, sizeof (*range));
+
+		if (smt_arg->to_arg->error_code != 0) {
+			err = smt_arg->to_arg->error_code;
+		} else if (smt_arg->from_arg->error_code != 0) {
+			err = smt_arg->from_arg->error_code;
+		} else if (smt_arg->redact_arg != NULL &&
+		    smt_arg->redact_arg->error_code != 0) {
+			err = smt_arg->redact_arg->error_code;
+		}
+	}
+	if (smt_arg->cancel && err == 0)
+		err = SET_ERROR(EINTR);
+	smt_arg->error = err;
+	if (smt_arg->error != 0) {
+		smt_arg->to_arg->cancel = B_TRUE;
+		smt_arg->from_arg->cancel = B_TRUE;
+		if (smt_arg->redact_arg != NULL)
+			smt_arg->redact_arg->cancel = B_TRUE;
+	}
+	for (int i = 0; i < NUM_THREADS; i++) {
+		while (!front_ranges[i]->eos_marker) {
+			front_ranges[i] = get_next_range(queues[i],
+			    front_ranges[i]);
+		}
+		range_free(front_ranges[i]);
+	}
+	if (range == NULL)
+		range = kmem_zalloc(sizeof (*range), KM_SLEEP);
+	range->eos_marker = B_TRUE;
+	bqueue_enqueue_flush(&smt_arg->q, range, 1);
+	spl_fstrans_unmark(cookie);
+	thread_exit();
+}
+
+struct send_reader_thread_arg {
+	struct send_merge_thread_arg *smta;
+	bqueue_t q;
+	boolean_t cancel;
+	boolean_t issue_reads;
+    boolean_t block_diff;
+	uint64_t featureflags;
+	int error;
+};
+
+static void
+dmu_send_read_done(zio_t *zio)
+{
+	struct send_range *range = zio->io_private;
+	KTRACE();
+
+	mutex_enter(&range->sru.data.lock);
+	if (zio->io_error != 0) {
+		abd_free(range->sru.data.abd);
+		range->sru.data.abd = NULL;
+		range->sru.data.io_err = zio->io_error;
+	}
+
+	ASSERT(range->sru.data.io_outstanding);
+	range->sru.data.io_outstanding = B_FALSE;
+	cv_broadcast(&range->sru.data.cv);
+	mutex_exit(&range->sru.data.lock);
+}
+
+static void
+issue_data_read(struct send_reader_thread_arg *srta, struct send_range *range)
+{
+	struct srd *srdp = &range->sru.data;
+	blkptr_t *bp = &srdp->bp;
+	objset_t *os = srta->smta->os;
+
+	KTRACE();
+	ASSERT3U(range->type, ==, DATA);
+	ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
+	/*
+	 * If we have large blocks stored on disk but
+	 * the send flags don't allow us to send large
+	 * blocks, we split the data from the arc buf
+	 * into chunks.
+	 */
+	boolean_t split_large_blocks =
+	    srdp->datablksz > SPA_OLD_MAXBLOCKSIZE &&
+	    !(srta->featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
+	/*
+	 * We should only request compressed data from the ARC if all
+	 * the following are true:
+	 *  - stream compression was requested
+	 *  - we aren't splitting large blocks into smaller chunks
+	 *  - the data won't need to be byteswapped before sending
+	 *  - this isn't an embedded block
+	 *  - this isn't metadata (if receiving on a different endian
+	 *    system it can be byteswapped more easily)
+	 */
+	boolean_t request_compressed =
+	    (srta->featureflags & DMU_BACKUP_FEATURE_COMPRESSED) &&
+	    !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
+	    !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
+
+	enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+
+	if (srta->featureflags & DMU_BACKUP_FEATURE_RAW) {
+		zioflags |= ZIO_FLAG_RAW;
+		srdp->io_compressed = B_TRUE;
+	} else if (request_compressed) {
+		zioflags |= ZIO_FLAG_RAW_COMPRESS;
+		srdp->io_compressed = B_TRUE;
+	}
+
+	srdp->datasz = (zioflags & ZIO_FLAG_RAW_COMPRESS) ?
+	    BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp);
+
+
+	KDEBUG("srta->block_diff=%d", srta->block_diff);
+	if (srta->block_diff) {
+        /* bug-91372 
+        * for blockdiff case, we just need to know there was a write here
+        * actual data read will be skipped inside dump_write->dump_record
+        * thus, psize can be faked, using lsize to indicate "no compression"
+        * otherwise, doing arc_read (which also determines psize) incurs full data
+        * read and breaks the expense expectation of bitmap generation.
+        */
+		KDEBUG("Not sending data, block diff %ld", srdp->datasz);
+        	return ;
+    	}
+
+	if (!srta->issue_reads)
+		return;
+	if (BP_IS_REDACTED(bp))
+		return;
+	if (send_do_embed(bp, srta->featureflags))
+		return;
+
+	zbookmark_phys_t zb = {
+	    .zb_objset = dmu_objset_id(os),
+	    .zb_object = range->object,
+	    .zb_level = 0,
+	    .zb_blkid = range->start_blkid,
+	};
+
+	arc_flags_t aflags = ARC_FLAG_CACHED_ONLY;
+
+	int arc_err = arc_read(NULL, os->os_spa, bp,
+	    arc_getbuf_func, &srdp->abuf, ZIO_PRIORITY_ASYNC_READ,
+	    zioflags, &aflags, &zb);
+	/*
+	 * If the data is not already cached in the ARC, we read directly
+	 * from zio.  This avoids the performance overhead of adding a new
+	 * entry to the ARC, and we also avoid polluting the ARC cache with
+	 * data that is not likely to be used in the future.
+	 */
+	if (arc_err != 0) {
+		srdp->abd = abd_alloc_linear(srdp->datasz, B_FALSE);
+		srdp->io_outstanding = B_TRUE;
+		zio_nowait(zio_read(NULL, os->os_spa, bp, srdp->abd,
+		    srdp->datasz, dmu_send_read_done, range,
+		    ZIO_PRIORITY_ASYNC_READ, zioflags, &zb));
+	}
+}
+
+/*
+ * Create a new record with the given values.
+ */
+static void
+enqueue_range(struct send_reader_thread_arg *srta, bqueue_t *q, dnode_t *dn,
+    uint64_t blkid, uint64_t count, const blkptr_t *bp, uint32_t datablksz)
+{
+	enum type range_type = (bp == NULL || BP_IS_HOLE(bp) ? HOLE :
+	    (BP_IS_REDACTED(bp) ? REDACT : DATA));
+
+	struct send_range *range = range_alloc(range_type, dn->dn_object,
+	    blkid, blkid + count, B_FALSE);
+
+	KTRACE();
+	if (blkid == DMU_SPILL_BLKID) {
+		ASSERT3P(bp, !=, NULL);
+		ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_SA);
+	}
+
+	switch (range_type) {
+	case HOLE:
+		range->sru.hole.datablksz = datablksz;
+		break;
+	case DATA:
+		ASSERT3U(count, ==, 1);
+		range->sru.data.datablksz = datablksz;
+		range->sru.data.obj_type = dn->dn_type;
+		range->sru.data.bp = *bp;
+		issue_data_read(srta, range);
+		break;
+	case REDACT:
+		range->sru.redact.datablksz = datablksz;
+		break;
+	default:
+		break;
+	}
+	bqueue_enqueue(q, range, datablksz);
+}
+
+/*
+ * This thread is responsible for two things: First, it retrieves the correct
+ * blkptr in the to ds if we need to send the data because of something from
+ * the from thread.  As a result of this, we're the first ones to discover that
+ * some indirect blocks can be discarded because they're not holes. Second,
+ * it issues prefetches for the data we need to send.
+ */
+static void
+send_reader_thread(void *arg)
+{
+	struct send_reader_thread_arg *srta = arg;
+	struct send_merge_thread_arg *smta = srta->smta;
+	bqueue_t *inq = &smta->q;
+	bqueue_t *outq = &srta->q;
+	objset_t *os = smta->os;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+	struct send_range *range = bqueue_dequeue(inq);
+	int err = 0;
+
+	KTRACE();
+	/*
+	 * If the record we're analyzing is from a redaction bookmark from the
+	 * fromds, then we need to know whether or not it exists in the tods so
+	 * we know whether to create records for it or not. If it does, we need
+	 * the datablksz so we can generate an appropriate record for it.
+	 * Finally, if it isn't redacted, we need the blkptr so that we can send
+	 * a WRITE record containing the actual data.
+	 */
+	uint64_t last_obj = UINT64_MAX;
+	uint64_t last_obj_exists = B_TRUE;
+	while (!range->eos_marker && !srta->cancel && smta->error == 0 &&
+	    err == 0) {
+		switch (range->type) {
+		case DATA:
+			issue_data_read(srta, range);
+			bqueue_enqueue(outq, range, range->sru.data.datablksz);
+			range = get_next_range_nofree(inq, range);
+			break;
+		case HOLE:
+		case OBJECT:
+		case OBJECT_RANGE:
+		case REDACT: // Redacted blocks must exist
+			bqueue_enqueue(outq, range, sizeof (*range));
+			range = get_next_range_nofree(inq, range);
+			break;
+		case PREVIOUSLY_REDACTED: {
+			/*
+			 * This entry came from the "from bookmark" when
+			 * sending from a bookmark that has a redaction
+			 * list.  We need to check if this object/blkid
+			 * exists in the target ("to") dataset, and if
+			 * not then we drop this entry.  We also need
+			 * to fill in the block pointer so that we know
+			 * what to prefetch.
+			 *
+			 * To accomplish the above, we first cache whether or
+			 * not the last object we examined exists.  If it
+			 * doesn't, we can drop this record. If it does, we hold
+			 * the dnode and use it to call dbuf_dnode_findbp. We do
+			 * this instead of dbuf_bookmark_findbp because we will
+			 * often operate on large ranges, and holding the dnode
+			 * once is more efficient.
+			 */
+			boolean_t object_exists = B_TRUE;
+			/*
+			 * If the data is redacted, we only care if it exists,
+			 * so that we don't send records for objects that have
+			 * been deleted.
+			 */
+			dnode_t *dn;
+			if (range->object == last_obj && !last_obj_exists) {
+				/*
+				 * If we're still examining the same object as
+				 * previously, and it doesn't exist, we don't
+				 * need to call dbuf_bookmark_findbp.
+				 */
+				object_exists = B_FALSE;
+			} else {
+				err = dnode_hold(os, range->object, FTAG, &dn);
+				if (err == ENOENT) {
+					object_exists = B_FALSE;
+					err = 0;
+				}
+				last_obj = range->object;
+				last_obj_exists = object_exists;
+			}
+
+			if (err != 0) {
+				break;
+			} else if (!object_exists) {
+				/*
+				 * The block was modified, but doesn't
+				 * exist in the to dataset; if it was
+				 * deleted in the to dataset, then we'll
+				 * visit the hole bp for it at some point.
+				 */
+				range = get_next_range(inq, range);
+				continue;
+			}
+			uint64_t file_max =
+			    (dn->dn_maxblkid < range->end_blkid ?
+			    dn->dn_maxblkid : range->end_blkid);
+			/*
+			 * The object exists, so we need to try to find the
+			 * blkptr for each block in the range we're processing.
+			 */
+			rw_enter(&dn->dn_struct_rwlock, RW_READER);
+			for (uint64_t blkid = range->start_blkid;
+			    blkid < file_max; blkid++) {
+				blkptr_t bp;
+				uint32_t datablksz =
+				    dn->dn_phys->dn_datablkszsec <<
+				    SPA_MINBLOCKSHIFT;
+				uint64_t offset = blkid * datablksz;
+				/*
+				 * This call finds the next non-hole block in
+				 * the object. This is to prevent a
+				 * performance problem where we're unredacting
+				 * a large hole. Using dnode_next_offset to
+				 * skip over the large hole avoids iterating
+				 * over every block in it.
+				 */
+				err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK,
+				    &offset, 1, 1, 0);
+				if (err == ESRCH) {
+					offset = UINT64_MAX;
+					err = 0;
+				} else if (err != 0) {
+					break;
+				}
+				if (offset != blkid * datablksz) {
+					/*
+					 * if there is a hole from here
+					 * (blkid) to offset
+					 */
+					offset = MIN(offset, file_max *
+					    datablksz);
+					uint64_t nblks = (offset / datablksz) -
+					    blkid;
+					enqueue_range(srta, outq, dn, blkid,
+					    nblks, NULL, datablksz);
+					blkid += nblks;
+				}
+				if (blkid >= file_max)
+					break;
+				err = dbuf_dnode_findbp(dn, 0, blkid, &bp,
+				    NULL, NULL);
+				if (err != 0)
+					break;
+				ASSERT(!BP_IS_HOLE(&bp));
+				enqueue_range(srta, outq, dn, blkid, 1, &bp,
+				    datablksz);
+			}
+			rw_exit(&dn->dn_struct_rwlock);
+			dnode_rele(dn, FTAG);
+			range = get_next_range(inq, range);
+		}
+		}
+	}
+	if (srta->cancel || err != 0) {
+		smta->cancel = B_TRUE;
+		srta->error = err;
+	} else if (smta->error != 0) {
+		srta->error = smta->error;
+	}
+	while (!range->eos_marker)
+		range = get_next_range(inq, range);
+
+	bqueue_enqueue_flush(outq, range, 1);
+	spl_fstrans_unmark(cookie);
+	thread_exit();
+}
+
+#define	NUM_SNAPS_NOT_REDACTED UINT64_MAX
+
+struct dmu_send_params {
+	/* Pool args */
+	void *tag; // Tag that dp was held with, will be used to release dp.
+	dsl_pool_t *dp;
+	/* To snapshot args */
+	const char *tosnap;
+	dsl_dataset_t *to_ds;
+	/* From snapshot args */
+	zfs_bookmark_phys_t ancestor_zb;
+	uint64_t *fromredactsnaps;
+	/* NUM_SNAPS_NOT_REDACTED if not sending from redaction bookmark */
+	uint64_t numfromredactsnaps;
+	/* Stream params */
+	boolean_t is_clone;
+	boolean_t embedok;
+	boolean_t large_block_ok;
+	boolean_t compressok;
+	boolean_t rawok;
+	boolean_t savedok;
+	int64_t block_diff;
+	uint64_t resumeobj;
+	uint64_t resumeoff;
+	uint64_t saved_guid;
+	zfs_bookmark_phys_t *redactbook;
+	/* Stream output params */
+	dmu_send_outparams_t *dso;
+
+	/* Stream progress params */
+	offset_t *off;
+	int outfd;
+	char saved_toname[MAXNAMELEN];
+};
+
+static int
+setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
+    uint64_t *featureflags)
+{
+	dsl_dataset_t *to_ds = dspp->to_ds;
+	dsl_pool_t *dp = dspp->dp;
+	KTRACE();
+#ifdef _KERNEL
+	if (dmu_objset_type(os) == DMU_OST_ZFS) {
+		uint64_t version;
+		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0)
+			return (SET_ERROR(EINVAL));
+
+		if (version >= ZPL_VERSION_SA)
+			*featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+	}
+#endif
+
+	/* raw sends imply large_block_ok */
+	if ((dspp->rawok || dspp->large_block_ok) &&
+	    dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LARGE_BLOCKS)) {
+		*featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
+	}
+
+	/* encrypted datasets will not have embedded blocks */
+	if ((dspp->embedok || dspp->rawok) && !os->os_encrypted &&
+	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
+		*featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
+	}
+
+	/* raw send implies compressok */
+	if (dspp->compressok || dspp->rawok)
+		*featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
+
+	if (dspp->rawok && os->os_encrypted)
+		*featureflags |= DMU_BACKUP_FEATURE_RAW;
+
+	if ((*featureflags &
+	    (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED |
+	    DMU_BACKUP_FEATURE_RAW)) != 0 &&
+	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
+		*featureflags |= DMU_BACKUP_FEATURE_LZ4;
+	}
+
+	/*
+	 * We specifically do not include DMU_BACKUP_FEATURE_EMBED_DATA here to
+	 * allow sending ZSTD compressed datasets to a receiver that does not
+	 * support ZSTD
+	 */
+	if ((*featureflags &
+	    (DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_RAW)) != 0 &&
+	    dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_ZSTD_COMPRESS)) {
+		*featureflags |= DMU_BACKUP_FEATURE_ZSTD;
+	}
+
+	if (dspp->resumeobj != 0 || dspp->resumeoff != 0) {
+		*featureflags |= DMU_BACKUP_FEATURE_RESUMING;
+	}
+
+	if (dspp->redactbook != NULL) {
+		*featureflags |= DMU_BACKUP_FEATURE_REDACTED;
+	}
+
+	if (dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LARGE_DNODE)) {
+		*featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
+	}
+	return (0);
+}
+
+static dmu_replay_record_t *
+create_begin_record(struct dmu_send_params *dspp, objset_t *os,
+    uint64_t featureflags)
+{
+	KTRACE();
+	dmu_replay_record_t *drr = kmem_zalloc(sizeof (dmu_replay_record_t),
+	    KM_SLEEP);
+	drr->drr_type = DRR_BEGIN;
+
+	struct drr_begin *drrb = &drr->drr_u.drr_begin;
+	dsl_dataset_t *to_ds = dspp->to_ds;
+
+	drrb->drr_magic = DMU_BACKUP_MAGIC;
+	drrb->drr_creation_time = dsl_dataset_phys(to_ds)->ds_creation_time;
+	drrb->drr_type = dmu_objset_type(os);
+	drrb->drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+	drrb->drr_fromguid = dspp->ancestor_zb.zbm_guid;
+
+	DMU_SET_STREAM_HDRTYPE(drrb->drr_versioninfo, DMU_SUBSTREAM);
+	DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, featureflags);
+
+	if (dspp->is_clone)
+		drrb->drr_flags |= DRR_FLAG_CLONE;
+	if (dsl_dataset_phys(dspp->to_ds)->ds_flags & DS_FLAG_CI_DATASET)
+		drrb->drr_flags |= DRR_FLAG_CI_DATA;
+	if (zfs_send_set_freerecords_bit)
+		drrb->drr_flags |= DRR_FLAG_FREERECORDS;
+	drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
+
+	if (dspp->savedok) {
+		drrb->drr_toguid = dspp->saved_guid;
+		strlcpy(drrb->drr_toname, dspp->saved_toname,
+		    sizeof (drrb->drr_toname));
+	} else {
+		dsl_dataset_name(to_ds, drrb->drr_toname);
+		if (!to_ds->ds_is_snapshot) {
+			(void) strlcat(drrb->drr_toname, "@--head--",
+			    sizeof (drrb->drr_toname));
+		}
+	}
+	return (drr);
+}
+
+static void
+setup_to_thread(struct send_thread_arg *to_arg, objset_t *to_os,
+    dmu_sendstatus_t *dssp, uint64_t fromtxg, boolean_t rawok)
+{
+	KTRACE();
+	VERIFY0(bqueue_init(&to_arg->q, zfs_send_no_prefetch_queue_ff,
+	    MAX(zfs_send_no_prefetch_queue_length, 2 * zfs_max_recordsize),
+	    offsetof(struct send_range, ln)));
+	to_arg->error_code = 0;
+	to_arg->cancel = B_FALSE;
+	to_arg->os = to_os;
+	to_arg->fromtxg = fromtxg;
+	to_arg->flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA;
+	if (rawok)
+		to_arg->flags |= TRAVERSE_NO_DECRYPT;
+	if (zfs_send_corrupt_data)
+		to_arg->flags |= TRAVERSE_HARD;
+	to_arg->num_blocks_visited = &dssp->dss_blocks;
+	(void) thread_create(NULL, 0, send_traverse_thread, to_arg, 0,
+	    curproc, TS_RUN, minclsyspri);
+}
+
+static void
+setup_from_thread(struct redact_list_thread_arg *from_arg,
+    redaction_list_t *from_rl, dmu_sendstatus_t *dssp)
+{
+	KTRACE();
+	VERIFY0(bqueue_init(&from_arg->q, zfs_send_no_prefetch_queue_ff,
+	    MAX(zfs_send_no_prefetch_queue_length, 2 * zfs_max_recordsize),
+	    offsetof(struct send_range, ln)));
+	from_arg->error_code = 0;
+	from_arg->cancel = B_FALSE;
+	from_arg->rl = from_rl;
+	from_arg->mark_redact = B_FALSE;
+	from_arg->num_blocks_visited = &dssp->dss_blocks;
+	/*
+	 * If from_ds is null, send_traverse_thread just returns success and
+	 * enqueues an eos marker.
+	 */
+	(void) thread_create(NULL, 0, redact_list_thread, from_arg, 0,
+	    curproc, TS_RUN, minclsyspri);
+}
+
+static void
+setup_redact_list_thread(struct redact_list_thread_arg *rlt_arg,
+    struct dmu_send_params *dspp, redaction_list_t *rl, dmu_sendstatus_t *dssp)
+{
+	KTRACE();
+	if (dspp->redactbook == NULL)
+		return;
+
+	rlt_arg->cancel = B_FALSE;
+	VERIFY0(bqueue_init(&rlt_arg->q, zfs_send_no_prefetch_queue_ff,
+	    MAX(zfs_send_no_prefetch_queue_length, 2 * zfs_max_recordsize),
+	    offsetof(struct send_range, ln)));
+	rlt_arg->error_code = 0;
+	rlt_arg->mark_redact = B_TRUE;
+	rlt_arg->rl = rl;
+	rlt_arg->num_blocks_visited = &dssp->dss_blocks;
+
+	(void) thread_create(NULL, 0, redact_list_thread, rlt_arg, 0,
+	    curproc, TS_RUN, minclsyspri);
+}
+
+static void
+setup_merge_thread(struct send_merge_thread_arg *smt_arg,
+    struct dmu_send_params *dspp, struct redact_list_thread_arg *from_arg,
+    struct send_thread_arg *to_arg, struct redact_list_thread_arg *rlt_arg,
+    objset_t *os)
+{
+	KTRACE();
+	VERIFY0(bqueue_init(&smt_arg->q, zfs_send_no_prefetch_queue_ff,
+	    MAX(zfs_send_no_prefetch_queue_length, 2 * zfs_max_recordsize),
+	    offsetof(struct send_range, ln)));
+	smt_arg->cancel = B_FALSE;
+	smt_arg->error = 0;
+	smt_arg->from_arg = from_arg;
+	smt_arg->to_arg = to_arg;
+	if (dspp->redactbook != NULL)
+		smt_arg->redact_arg = rlt_arg;
+
+	smt_arg->os = os;
+	(void) thread_create(NULL, 0, send_merge_thread, smt_arg, 0, curproc,
+	    TS_RUN, minclsyspri);
+}
+
+static void
+setup_reader_thread(struct send_reader_thread_arg *srt_arg,
+    struct dmu_send_params *dspp, struct send_merge_thread_arg *smt_arg,
+    uint64_t featureflags)
+{
+	KTRACE();
+	VERIFY0(bqueue_init(&srt_arg->q, zfs_send_queue_ff,
+	    MAX(zfs_send_queue_length, 2 * zfs_max_recordsize),
+	    offsetof(struct send_range, ln)));
+	srt_arg->smta = smt_arg;
+	srt_arg->issue_reads = !dspp->dso->dso_dryrun;
+	srt_arg->featureflags = featureflags;
+	(void) thread_create(NULL, 0, send_reader_thread, srt_arg, 0,
+	    curproc, TS_RUN, minclsyspri);
+	srt_arg->block_diff = dspp->block_diff;
+}
+
+static int
+setup_resume_points(struct dmu_send_params *dspp,
+    struct send_thread_arg *to_arg, struct redact_list_thread_arg *from_arg,
+    struct redact_list_thread_arg *rlt_arg,
+    struct send_merge_thread_arg *smt_arg, boolean_t resuming, objset_t *os,
+    redaction_list_t *redact_rl, nvlist_t *nvl)
+{
+	(void) smt_arg;
+	dsl_dataset_t *to_ds = dspp->to_ds;
+	int err = 0;
+
+	KTRACE();
+	uint64_t obj = 0;
+	uint64_t blkid = 0;
+	if (resuming) {
+		obj = dspp->resumeobj;
+		dmu_object_info_t to_doi;
+		err = dmu_object_info(os, obj, &to_doi);
+		if (err != 0)
+			return (err);
+
+		blkid = dspp->resumeoff / to_doi.doi_data_block_size;
+	}
+	/*
+	 * If we're resuming a redacted send, we can skip to the appropriate
+	 * point in the redaction bookmark by binary searching through it.
+	 */
+	if (redact_rl != NULL) {
+		SET_BOOKMARK(&rlt_arg->resume, to_ds->ds_object, obj, 0, blkid);
+	}
+
+	SET_BOOKMARK(&to_arg->resume, to_ds->ds_object, obj, 0, blkid);
+	if (nvlist_exists(nvl, BEGINNV_REDACT_FROM_SNAPS)) {
+		uint64_t objset = dspp->ancestor_zb.zbm_redaction_obj;
+		/*
+		 * Note: If the resume point is in an object whose
+		 * blocksize is different in the from vs to snapshots,
+		 * we will have divided by the "wrong" blocksize.
+		 * However, in this case fromsnap's send_cb() will
+		 * detect that the blocksize has changed and therefore
+		 * ignore this object.
+		 *
+		 * If we're resuming a send from a redaction bookmark,
+		 * we still cannot accidentally suggest blocks behind
+		 * the to_ds.  In addition, we know that any blocks in
+		 * the object in the to_ds will have to be sent, since
+		 * the size changed.  Therefore, we can't cause any harm
+		 * this way either.
+		 */
+		SET_BOOKMARK(&from_arg->resume, objset, obj, 0, blkid);
+	}
+	if (resuming) {
+		fnvlist_add_uint64(nvl, BEGINNV_RESUME_OBJECT, dspp->resumeobj);
+		fnvlist_add_uint64(nvl, BEGINNV_RESUME_OFFSET, dspp->resumeoff);
+	}
+	return (0);
+}
+
+static dmu_sendstatus_t *
+setup_send_progress(struct dmu_send_params *dspp)
+{
+	KTRACE();
+	dmu_sendstatus_t *dssp = kmem_zalloc(sizeof (*dssp), KM_SLEEP);
+	dssp->dss_outfd = dspp->outfd;
+	dssp->dss_off = dspp->off;
+	dssp->dss_proc = curproc;
+	mutex_enter(&dspp->to_ds->ds_sendstream_lock);
+	list_insert_head(&dspp->to_ds->ds_sendstreams, dssp);
+	mutex_exit(&dspp->to_ds->ds_sendstream_lock);
+	return (dssp);
 }
 
 /*
  * Actually do the bulk of the work in a zfs send.
  *
+ * The idea is that we want to do a send from ancestor_zb to to_ds.  We also
+ * want to not send any data that has been modified by all the datasets in
+ * redactsnaparr, and store the list of blocks that are redacted in this way in
+ * a bookmark named redactbook, created on the to_ds.  We do this by creating
+ * several worker threads, whose function is described below.
+ *
+ * There are three cases.
+ * The first case is a redacted zfs send.  In this case there are 5 threads.
+ * The first thread is the to_ds traversal thread: it calls dataset_traverse on
+ * the to_ds and finds all the blocks that have changed since ancestor_zb (if
+ * it's a full send, that's all blocks in the dataset).  It then sends those
+ * blocks on to the send merge thread. The redact list thread takes the data
+ * from the redaction bookmark and sends those blocks on to the send merge
+ * thread.  The send merge thread takes the data from the to_ds traversal
+ * thread, and combines it with the redaction records from the redact list
+ * thread.  If a block appears in both the to_ds's data and the redaction data,
+ * the send merge thread will mark it as redacted and send it on to the prefetch
+ * thread.  Otherwise, the send merge thread will send the block on to the
+ * prefetch thread unchanged. The prefetch thread will issue prefetch reads for
+ * any data that isn't redacted, and then send the data on to the main thread.
+ * The main thread behaves the same as in a normal send case, issuing demand
+ * reads for data blocks and sending out records over the network
+ *
+ * The graphic below diagrams the flow of data in the case of a redacted zfs
+ * send.  Each box represents a thread, and each line represents the flow of
+ * data.
+ *
+ *             Records from the |
+ *           redaction bookmark |
+ * +--------------------+       |  +---------------------------+
+ * |                    |       v  | Send Merge Thread         |
+ * | Redact List Thread +----------> Apply redaction marks to  |
+ * |                    |          | records as specified by   |
+ * +--------------------+          | redaction ranges          |
+ *                                 +----^---------------+------+
+ *                                      |               | Merged data
+ *                                      |               |
+ *                                      |  +------------v--------+
+ *                                      |  | Prefetch Thread     |
+ * +--------------------+               |  | Issues prefetch     |
+ * | to_ds Traversal    |               |  | reads of data blocks|
+ * | Thread (finds      +---------------+  +------------+--------+
+ * | candidate blocks)  |  Blocks modified              | Prefetched data
+ * +--------------------+  by to_ds since               |
+ *                         ancestor_zb     +------------v----+
+ *                                         | Main Thread     |  File Descriptor
+ *                                         | Sends data over +->(to zfs receive)
+ *                                         | wire            |
+ *                                         +-----------------+
+ *
+ * The second case is an incremental send from a redaction bookmark.  The to_ds
+ * traversal thread and the main thread behave the same as in the redacted
+ * send case.  The new thread is the from bookmark traversal thread.  It
+ * iterates over the redaction list in the redaction bookmark, and enqueues
+ * records for each block that was redacted in the original send.  The send
+ * merge thread now has to merge the data from the two threads.  For details
+ * about that process, see the header comment of send_merge_thread().  Any data
+ * it decides to send on will be prefetched by the prefetch thread.  Note that
+ * you can perform a redacted send from a redaction bookmark; in that case,
+ * the data flow behaves very similarly to the flow in the redacted send case,
+ * except with the addition of the bookmark traversal thread iterating over the
+ * redaction bookmark.  The send_merge_thread also has to take on the
+ * responsibility of merging the redact list thread's records, the bookmark
+ * traversal thread's records, and the to_ds records.
+ *
+ * +---------------------+
+ * |                     |
+ * | Redact List Thread  +--------------+
+ * |                     |              |
+ * +---------------------+              |
+ *        Blocks in redaction list      | Ranges modified by every secure snap
+ *        of from bookmark              | (or EOS if not readcted)
+ *                                      |
+ * +---------------------+   |     +----v----------------------+
+ * | bookmark Traversal  |   v     | Send Merge Thread         |
+ * | Thread (finds       +---------> Merges bookmark, rlt, and |
+ * | candidate blocks)   |         | to_ds send records        |
+ * +---------------------+         +----^---------------+------+
+ *                                      |               | Merged data
+ *                                      |  +------------v--------+
+ *                                      |  | Prefetch Thread     |
+ * +--------------------+               |  | Issues prefetch     |
+ * | to_ds Traversal    |               |  | reads of data blocks|
+ * | Thread (finds      +---------------+  +------------+--------+
+ * | candidate blocks)  |  Blocks modified              | Prefetched data
+ * +--------------------+  by to_ds since  +------------v----+
+ *                         ancestor_zb     | Main Thread     |  File Descriptor
+ *                                         | Sends data over +->(to zfs receive)
+ *                                         | wire            |
+ *                                         +-----------------+
+ *
+ * The final case is a simple zfs full or incremental send.  The to_ds traversal
+ * thread behaves the same as always. The redact list thread is never started.
+ * The send merge thread takes all the blocks that the to_ds traversal thread
+ * sends it, prefetches the data, and sends the blocks on to the main thread.
+ * The main thread sends the data over the wire.
+ *
+ * To keep performance acceptable, we want to prefetch the data in the worker
+ * threads.  While the to_ds thread could simply use the TRAVERSE_PREFETCH
+ * feature built into traverse_dataset, the combining and deletion of records
+ * due to redaction and sends from redaction bookmarks mean that we could
+ * issue many unnecessary prefetches.  As a result, we only prefetch data
+ * after we've determined that the record is not going to be redacted.  To
+ * prevent the prefetching from getting too far ahead of the main thread, the
+ * blocking queues that are used for communication are capped not by the
+ * number of entries in the queue, but by the sum of the size of the
+ * prefetches associated with them.  The limit on the amount of data that the
+ * thread can prefetch beyond what the main thread has reached is controlled
+ * by the global variable zfs_send_queue_length.  In addition, to prevent poor
+ * performance in the beginning of a send, we also limit the distance ahead
+ * that the traversal threads can be.  That distance is controlled by the
+ * zfs_send_no_prefetch_queue_length tunable.
+ *
  * Note: Releases dp using the specified tag.
  */
 static int
-dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
-    zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone,
-    boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
-    boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
-    vnode_t *vp, offset_t *off, boolean_t block_diff)
+dmu_send_impl(struct dmu_send_params *dspp)
 {
 	objset_t *os;
 	dmu_replay_record_t *drr;
-	dmu_sendarg_t *dsp;
+	dmu_sendstatus_t *dssp;
+	dmu_send_cookie_t dsc = {0};
+	KTRACE();
 	int err;
-	uint64_t fromtxg = 0;
+	uint64_t fromtxg = dspp->ancestor_zb.zbm_creation_txg;
 	uint64_t featureflags = 0;
-	struct send_thread_arg to_arg;
-	void *payload = NULL;
-	size_t payload_len = 0;
-	struct send_block_record *to_data;
+	struct redact_list_thread_arg *from_arg;
+	struct send_thread_arg *to_arg;
+	struct redact_list_thread_arg *rlt_arg;
+	struct send_merge_thread_arg *smt_arg;
+	struct send_reader_thread_arg *srt_arg;
+	struct send_range *range;
+	redaction_list_t *from_rl = NULL;
+	redaction_list_t *redact_rl = NULL;
+	boolean_t resuming = (dspp->resumeobj != 0 || dspp->resumeoff != 0);
+	boolean_t book_resuming = resuming;
 
+	dsl_dataset_t *to_ds = dspp->to_ds;
+	zfs_bookmark_phys_t *ancestor_zb = &dspp->ancestor_zb;
+	dsl_pool_t *dp = dspp->dp;
+	void *tag = dspp->tag;
+
+	
 	err = dmu_objset_from_ds(to_ds, &os);
 	if (err != 0) {
 		dsl_pool_rele(dp, tag);
@@ -1047,7 +2516,7 @@
 	 * either a snapshot or we have owned the dataset, ensuring that
 	 * it can't be modified.
 	 */
-	if (!rawok && os->os_encrypted &&
+	if (!dspp->rawok && os->os_encrypted &&
 	    arc_is_unauthenticated(os->os_phys_buf)) {
 		zbookmark_phys_t zb;
 
@@ -1063,338 +2532,514 @@
 		ASSERT0(arc_is_unauthenticated(os->os_phys_buf));
 	}
 
-	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
-	drr->drr_type = DRR_BEGIN;
-	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
-	DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
-	    DMU_SUBSTREAM);
+	if ((err = setup_featureflags(dspp, os, &featureflags)) != 0) {
+		dsl_pool_rele(dp, tag);
+		return (err);
+	}
 
-	bzero(&to_arg, sizeof (to_arg));
-
-#ifdef _KERNEL
-	if (dmu_objset_type(os) == DMU_OST_ZFS) {
-		uint64_t version;
-		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) {
-			kmem_free(drr, sizeof (dmu_replay_record_t));
+	/*
+	 * If we're doing a redacted send, hold the bookmark's redaction list.
+	 */
+	if (dspp->redactbook != NULL) {
+		err = dsl_redaction_list_hold_obj(dp,
+		    dspp->redactbook->zbm_redaction_obj, FTAG,
+		    &redact_rl);
+		if (err != 0) {
 			dsl_pool_rele(dp, tag);
 			return (SET_ERROR(EINVAL));
 		}
-		if (version >= ZPL_VERSION_SA) {
-			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+		dsl_redaction_list_long_hold(dp, redact_rl, FTAG);
+	}
+
+	/*
+	 * If we're sending from a redaction bookmark, hold the redaction list
+	 * so that we can consider sending the redacted blocks.
+	 */
+	if (ancestor_zb->zbm_redaction_obj != 0) {
+		err = dsl_redaction_list_hold_obj(dp,
+		    ancestor_zb->zbm_redaction_obj, FTAG, &from_rl);
+		if (err != 0) {
+			if (redact_rl != NULL) {
+				dsl_redaction_list_long_rele(redact_rl, FTAG);
+				dsl_redaction_list_rele(redact_rl, FTAG);
+			}
+			dsl_pool_rele(dp, tag);
+			return (SET_ERROR(EINVAL));
 		}
+		dsl_redaction_list_long_hold(dp, from_rl, FTAG);
 	}
-#endif
-
-	/* raw sends imply large_block_ok */
-	if ((large_block_ok || rawok) &&
-	    dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LARGE_BLOCKS))
-		featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
-	if (dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_LARGE_DNODE))
-		featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
-
-	/* encrypted datasets will not have embedded blocks */
-	if ((embedok || rawok) && !os->os_encrypted &&
-	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
-		featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
-	}
-
-	/* raw send implies compressok */
-	if (compressok || rawok)
-		featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
-
-	if (rawok && os->os_encrypted)
-		featureflags |= DMU_BACKUP_FEATURE_RAW;
-
-	if ((featureflags &
-	    (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED |
-	    DMU_BACKUP_FEATURE_RAW)) != 0 &&
-	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
-		featureflags |= DMU_BACKUP_FEATURE_LZ4;
-	}
-
-	if (resumeobj != 0 || resumeoff != 0) {
-		featureflags |= DMU_BACKUP_FEATURE_RESUMING;
-	}
-
-	DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
-	    featureflags);
-
-	drr->drr_u.drr_begin.drr_creation_time =
-	    dsl_dataset_phys(to_ds)->ds_creation_time;
-	drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
-	if (is_clone)
-		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
-	drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
-	if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
-		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
-	if (zfs_send_set_freerecords_bit)
-		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
-
-	drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
-
-	if (ancestor_zb != NULL) {
-		drr->drr_u.drr_begin.drr_fromguid =
-		    ancestor_zb->zbm_guid;
-		fromtxg = ancestor_zb->zbm_creation_txg;
-	}
-	dsl_dataset_name(to_ds, drr->drr_u.drr_begin.drr_toname);
-	if (!to_ds->ds_is_snapshot) {
-		(void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
-		    sizeof (drr->drr_u.drr_begin.drr_toname));
-	}
-
-	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
-
-	dsp->dsa_drr = drr;
-	dsp->dsa_vp = vp;
-	dsp->dsa_outfd = outfd;
-	dsp->dsa_proc = curproc;
-	dsp->dsa_os = os;
-	dsp->dsa_off = off;
-	dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
-	dsp->dsa_fromtxg = fromtxg;
-	dsp->dsa_pending_op = PENDING_NONE;
-	dsp->dsa_featureflags = featureflags;
-	dsp->dsa_resume_object = resumeobj;
-	dsp->dsa_resume_offset = resumeoff;
-	dsp->block_diff = block_diff;
-
-	mutex_enter(&to_ds->ds_sendstream_lock);
-	list_insert_head(&to_ds->ds_sendstreams, dsp);
-	mutex_exit(&to_ds->ds_sendstream_lock);
 
 	dsl_dataset_long_hold(to_ds, FTAG);
+
+	from_arg = kmem_zalloc(sizeof (*from_arg), KM_SLEEP);
+	to_arg = kmem_zalloc(sizeof (*to_arg), KM_SLEEP);
+	rlt_arg = kmem_zalloc(sizeof (*rlt_arg), KM_SLEEP);
+	smt_arg = kmem_zalloc(sizeof (*smt_arg), KM_SLEEP);
+	srt_arg = kmem_zalloc(sizeof (*srt_arg), KM_SLEEP);
+
+	drr = create_begin_record(dspp, os, featureflags);
+	dssp = setup_send_progress(dspp);
+
+	KDEBUG("Creating dsc record");
+
+	dsc.dsc_drr = drr;
+	dsc.dsc_dso = dspp->dso;
+	dsc.dsc_os = os;
+	dsc.dsc_off = dspp->off;
+	dsc.dsc_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+	dsc.dsc_fromtxg = fromtxg;
+	dsc.dsc_pending_op = PENDING_NONE;
+	dsc.dsc_featureflags = featureflags;
+	dsc.dsc_resume_object = dspp->resumeobj;
+	dsc.dsc_resume_offset = dspp->resumeoff;
+    	dsc.dsc_block_diff = dspp->block_diff;
+
 	dsl_pool_rele(dp, tag);
 
-	/* handle features that require a DRR_BEGIN payload */
-	if (featureflags &
-	    (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) {
-		nvlist_t *keynvl = NULL;
-		nvlist_t *nvl = fnvlist_alloc();
+	void *payload = NULL;
+	size_t payload_len = 0;
+	nvlist_t *nvl = fnvlist_alloc();
 
-		if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
-			dmu_object_info_t to_doi;
-			err = dmu_object_info(os, resumeobj, &to_doi);
-			if (err != 0) {
-				fnvlist_free(nvl);
-				goto out;
-			}
-
-			SET_BOOKMARK(&to_arg.resume, to_ds->ds_object,
-			    resumeobj, 0,
-			    resumeoff / to_doi.doi_data_block_size);
-
-			fnvlist_add_uint64(nvl, "resume_object", resumeobj);
-			fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
-		}
-
-		if (featureflags & DMU_BACKUP_FEATURE_RAW) {
-			uint64_t ivset_guid = (ancestor_zb != NULL) ?
-			    ancestor_zb->zbm_ivset_guid : 0;
-
-			ASSERT(os->os_encrypted);
-
-			err = dsl_crypto_populate_key_nvlist(to_ds,
-			    ivset_guid, &keynvl);
-			if (err != 0) {
-				fnvlist_free(nvl);
-				goto out;
-			}
-
-			fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl);
-		}
-
-		payload = fnvlist_pack(nvl, &payload_len);
-		drr->drr_payloadlen = payload_len;
-		fnvlist_free(keynvl);
-		fnvlist_free(nvl);
+	/*
+	 * If we're doing a redacted send, we include the snapshots we're
+	 * redacted with respect to so that the target system knows what send
+	 * streams can be correctly received on top of this dataset. If we're
+	 * instead sending a redacted dataset, we include the snapshots that the
+	 * dataset was created with respect to.
+	 */
+	if (dspp->redactbook != NULL) {
+		fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_SNAPS,
+		    redact_rl->rl_phys->rlp_snaps,
+		    redact_rl->rl_phys->rlp_num_snaps);
+	} else if (dsl_dataset_feature_is_active(to_ds,
+	    SPA_FEATURE_REDACTED_DATASETS)) {
+		uint64_t *tods_guids;
+		uint64_t length;
+		VERIFY(dsl_dataset_get_uint64_array_feature(to_ds,
+		    SPA_FEATURE_REDACTED_DATASETS, &length, &tods_guids));
+		fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_SNAPS, tods_guids,
+		    length);
 	}
 
-	err = dump_record(dsp, payload, payload_len);
+	/*
+	 * If we're sending from a redaction bookmark, then we should retrieve
+	 * the guids of that bookmark so we can send them over the wire.
+	 */
+	if (from_rl != NULL) {
+		fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_FROM_SNAPS,
+		    from_rl->rl_phys->rlp_snaps,
+		    from_rl->rl_phys->rlp_num_snaps);
+	}
+
+	/*
+	 * If the snapshot we're sending from is redacted, include the redaction
+	 * list in the stream.
+	 */
+	if (dspp->numfromredactsnaps != NUM_SNAPS_NOT_REDACTED) {
+		ASSERT3P(from_rl, ==, NULL);
+		fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_FROM_SNAPS,
+		    dspp->fromredactsnaps, (uint_t)dspp->numfromredactsnaps);
+		if (dspp->numfromredactsnaps > 0) {
+			kmem_free(dspp->fromredactsnaps,
+			    dspp->numfromredactsnaps * sizeof (uint64_t));
+			dspp->fromredactsnaps = NULL;
+		}
+	}
+
+	if (resuming || book_resuming) {
+		err = setup_resume_points(dspp, to_arg, from_arg,
+		    rlt_arg, smt_arg, resuming, os, redact_rl, nvl);
+		if (err != 0)
+			goto out;
+	}
+
+	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+		uint64_t ivset_guid = (ancestor_zb != NULL) ?
+		    ancestor_zb->zbm_ivset_guid : 0;
+		nvlist_t *keynvl = NULL;
+		ASSERT(os->os_encrypted);
+
+		err = dsl_crypto_populate_key_nvlist(os, ivset_guid,
+		    &keynvl);
+		if (err != 0) {
+			fnvlist_free(nvl);
+			goto out;
+		}
+
+		fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl);
+		fnvlist_free(keynvl);
+	}
+
+	if (!nvlist_empty(nvl)) {
+		payload = fnvlist_pack(nvl, &payload_len);
+		drr->drr_payloadlen = payload_len;
+	}
+
+	fnvlist_free(nvl);
+
+	KTRACE();
+
+    	KDEBUG("dspp->block_diff=%d", dspp->block_diff);
+	KDEBUG("dsc.dsc_block_diff=%d", dsc.dsc_block_diff);
+
+	KDEBUG("dump_record");
+	err = dump_record(&dsc, payload, payload_len);
+	KDEBUG("Finished dump_record");
 	fnvlist_pack_free(payload, payload_len);
 	if (err != 0) {
-		err = dsp->dsa_err;
+		err = dsc.dsc_err;
 		goto out;
 	}
 
-	err = bqueue_init(&to_arg.q,
-	    MAX(zfs_send_queue_length, 2 * zfs_max_recordsize),
-	    offsetof(struct send_block_record, ln));
-	to_arg.error_code = 0;
-	to_arg.cancel = B_FALSE;
-	to_arg.ds = to_ds;
-	to_arg.fromtxg = fromtxg;
-	if (dsp->block_diff) {
-		to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA;
-	}
-	else { 
-		// this flag encompasses metadata and data
-		// if data, then we end up spawning traverse_prefetcher (bad for diff case)
-		to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH;
-	}
-	if (rawok)
-		to_arg.flags |= TRAVERSE_NO_DECRYPT;
-	(void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc,
-	    TS_RUN, minclsyspri);
+	setup_to_thread(to_arg, os, dssp, fromtxg, dspp->rawok);
+	setup_from_thread(from_arg, from_rl, dssp);
+	setup_redact_list_thread(rlt_arg, dspp, redact_rl, dssp);
+	setup_merge_thread(smt_arg, dspp, from_arg, to_arg, rlt_arg, os);
+	setup_reader_thread(srt_arg, dspp, smt_arg, featureflags);
 
-	to_data = bqueue_dequeue(&to_arg.q);
-
-	while (!to_data->eos_marker && err == 0) {
-		err = do_dump(dsp, to_data);
-		to_data = get_next_record(&to_arg.q, to_data);
+	range = bqueue_dequeue(&srt_arg->q);
+	while (err == 0 && !range->eos_marker) {
+		err = do_dump(&dsc, range);
+		range = get_next_range(&srt_arg->q, range);
 		if (issig(JUSTLOOKING) && issig(FORREAL))
-			err = EINTR;
+			err = SET_ERROR(EINTR);
 	}
 
+	/*
+	 * If we hit an error or are interrupted, cancel our worker threads and
+	 * clear the queue of any pending records.  The threads will pass the
+	 * cancel up the tree of worker threads, and each one will clean up any
+	 * pending records before exiting.
+	 */
 	if (err != 0) {
-		to_arg.cancel = B_TRUE;
-		while (!to_data->eos_marker) {
-			to_data = get_next_record(&to_arg.q, to_data);
+		srt_arg->cancel = B_TRUE;
+		while (!range->eos_marker) {
+			range = get_next_range(&srt_arg->q, range);
 		}
 	}
-	kmem_free(to_data, sizeof (*to_data));
+	range_free(range);
 
-	bqueue_destroy(&to_arg.q);
+	bqueue_destroy(&srt_arg->q);
+	bqueue_destroy(&smt_arg->q);
+	if (dspp->redactbook != NULL)
+		bqueue_destroy(&rlt_arg->q);
+	bqueue_destroy(&to_arg->q);
+	bqueue_destroy(&from_arg->q);
 
-	if (err == 0 && to_arg.error_code != 0)
-		err = to_arg.error_code;
+	if (err == 0 && srt_arg->error != 0)
+		err = srt_arg->error;
 
 	if (err != 0)
 		goto out;
 
-	if (dsp->dsa_pending_op != PENDING_NONE)
-		if (dump_record(dsp, NULL, 0) != 0)
-			err = SET_ERROR(EINTR);
+	if (dsc.dsc_pending_op != PENDING_NONE)
+	{
+		if( ! dsc.dsc_block_diff)
+		{
+			KDEBUG("dump_record");
+			if (dump_record(&dsc, NULL, 0) != 0)
+				err = SET_ERROR(EINTR);
+		}
+	}
 
 	if (err != 0) {
-		if (err == EINTR && dsp->dsa_err != 0)
-			err = dsp->dsa_err;
+		if (err == EINTR && dsc.dsc_err != 0)
+			err = dsc.dsc_err;
 		goto out;
 	}
 
-	bzero(drr, sizeof (dmu_replay_record_t));
-	drr->drr_type = DRR_END;
-	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
-	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
+	/*
+	 * Send the DRR_END record if this is not a saved stream.
+	 * Otherwise, the omitted DRR_END record will signal to
+	 * the receive side that the stream is incomplete.
+	 */
+	if (!dspp->savedok) {
+		bzero(drr, sizeof (dmu_replay_record_t));
+		drr->drr_type = DRR_END;
+		drr->drr_u.drr_end.drr_checksum = dsc.dsc_zc;
+		drr->drr_u.drr_end.drr_toguid = dsc.dsc_toguid;
 
-	if (dump_record(dsp, NULL, 0) != 0)
-		err = dsp->dsa_err;
+			KDEBUG("dump_record");
+		if (dump_record(&dsc, NULL, 0) != 0)
+			err = dsc.dsc_err;
+	}
 out:
+	KDEBUG("out: mutex list remove %s","test" );
 	mutex_enter(&to_ds->ds_sendstream_lock);
-	list_remove(&to_ds->ds_sendstreams, dsp);
+	list_remove(&to_ds->ds_sendstreams, dssp);
 	mutex_exit(&to_ds->ds_sendstream_lock);
 
-	VERIFY(err != 0 || (dsp->dsa_sent_begin && dsp->dsa_sent_end));
+	KDEBUG("out: VERIFY");
+#ifdef __ZMODDBG__
+	cmn_err(CE_WARN, "%s(%d) %s dspp=%llx", __FILE__, __LINE__, __FUNCTION__, dspp);
+	cmn_err(CE_WARN, "%s(%d) %s err=%d", __FILE__, __LINE__, __FUNCTION__, err);
+	cmn_err(CE_WARN, "%s(%d) %s dsc.dsc_sent_begin=%llx", __FILE__, __LINE__, __FUNCTION__, dsc.dsc_sent_begin);
+	cmn_err(CE_WARN, "%s(%d) %s dsc.dsc_sent_end=%llx", __FILE__, __LINE__, __FUNCTION__, dsc.dsc_sent_end);
+	cmn_err(CE_WARN, "%s(%d) %s dspp->savedok=%llx", __FILE__, __LINE__, __FUNCTION__, dspp->savedok);
+#endif
+#if 0
+	if(dspp != NULL)
+	{
+		VERIFY(err != 0 || (dsc.dsc_sent_begin &&
+	    		(dsc.dsc_sent_end || dspp->savedok)));
+	}
+#endif
+
+	KDEBUG("out: about to kmem_free");
 
 	kmem_free(drr, sizeof (dmu_replay_record_t));
-	kmem_free(dsp, sizeof (dmu_sendarg_t));
+	if(dspp)
+		kmem_free(dssp, sizeof (dmu_sendstatus_t));
+	kmem_free(from_arg, sizeof (*from_arg));
+	kmem_free(to_arg, sizeof (*to_arg));
+	kmem_free(rlt_arg, sizeof (*rlt_arg));
+	kmem_free(smt_arg, sizeof (*smt_arg));
+	kmem_free(srt_arg, sizeof (*srt_arg));
 
 	dsl_dataset_long_rele(to_ds, FTAG);
+	if (from_rl != NULL) {
+		KDEBUG("from_rl dsl_redaction_list_long_rele");
+		dsl_redaction_list_long_rele(from_rl, FTAG);
+		dsl_redaction_list_rele(from_rl, FTAG);
+	}
+	if (redact_rl != NULL) {
+		KDEBUG("redact_rl dsl_redaction_list_long_rele");
+		dsl_redaction_list_long_rele(redact_rl, FTAG);
+		dsl_redaction_list_rele(redact_rl, FTAG);
+	}
 
+	KDEBUG("exit");
 	return (err);
 }
 
 int
 dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
     boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
-    boolean_t rawok, int outfd, vnode_t *vp, offset_t *off, boolean_t block_diff)
+    boolean_t rawok, boolean_t savedok, int outfd, offset_t *off,
+    dmu_send_outparams_t *dsop)
 {
-	dsl_pool_t *dp;
-	dsl_dataset_t *ds;
-	dsl_dataset_t *fromds = NULL;
-	ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
+	KTRACE();
 	int err;
+	dsl_dataset_t *fromds;
+	ds_hold_flags_t dsflags;
+	struct dmu_send_params dspp = {0};
+	dspp.embedok = embedok;
+	dspp.large_block_ok = large_block_ok;
+	dspp.compressok = compressok;
+	dspp.outfd = outfd;
+	dspp.off = off;
+	dspp.dso = dsop;
+	dspp.tag = FTAG;
+	dspp.rawok = rawok;
+	dspp.savedok = savedok;
 
-	err = dsl_pool_hold(pool, FTAG, &dp);
+	dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
+	err = dsl_pool_hold(pool, FTAG, &dspp.dp);
 	if (err != 0)
 		return (err);
 
-	err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds);
+	err = dsl_dataset_hold_obj_flags(dspp.dp, tosnap, dsflags, FTAG,
+	    &dspp.to_ds);
 	if (err != 0) {
-		dsl_pool_rele(dp, FTAG);
+		dsl_pool_rele(dspp.dp, FTAG);
 		return (err);
 	}
 
 	if (fromsnap != 0) {
-		zfs_bookmark_phys_t zb = { 0 };
-		boolean_t is_clone;
-
-		err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
+		err = dsl_dataset_hold_obj_flags(dspp.dp, fromsnap, dsflags,
+		    FTAG, &fromds);
 		if (err != 0) {
-			dsl_dataset_rele_flags(ds, dsflags, FTAG);
-			dsl_pool_rele(dp, FTAG);
+			dsl_dataset_rele_flags(dspp.to_ds, dsflags, FTAG);
+			dsl_pool_rele(dspp.dp, FTAG);
 			return (err);
 		}
-		if (!dsl_dataset_is_before(ds, fromds, 0)) {
-			err = SET_ERROR(EXDEV);
-			dsl_dataset_rele(fromds, FTAG);
-			dsl_dataset_rele_flags(ds, dsflags, FTAG);
-			dsl_pool_rele(dp, FTAG);
-			return (err);
-		}
-
-		zb.zbm_creation_time =
+		dspp.ancestor_zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
+		dspp.ancestor_zb.zbm_creation_txg =
+		    dsl_dataset_phys(fromds)->ds_creation_txg;
+		dspp.ancestor_zb.zbm_creation_time =
 		    dsl_dataset_phys(fromds)->ds_creation_time;
-		zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg;
-		zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
 
 		if (dsl_dataset_is_zapified(fromds)) {
-			(void) zap_lookup(dp->dp_meta_objset,
+			(void) zap_lookup(dspp.dp->dp_meta_objset,
 			    fromds->ds_object, DS_FIELD_IVSET_GUID, 8, 1,
-			    &zb.zbm_ivset_guid);
+			    &dspp.ancestor_zb.zbm_ivset_guid);
 		}
 
-		is_clone = (fromds->ds_dir != ds->ds_dir);
+		/* See dmu_send for the reasons behind this. */
+		uint64_t *fromredact;
+
+		if (!dsl_dataset_get_uint64_array_feature(fromds,
+		    SPA_FEATURE_REDACTED_DATASETS,
+		    &dspp.numfromredactsnaps,
+		    &fromredact)) {
+			dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED;
+		} else if (dspp.numfromredactsnaps > 0) {
+			uint64_t size = dspp.numfromredactsnaps *
+			    sizeof (uint64_t);
+			dspp.fromredactsnaps = kmem_zalloc(size, KM_SLEEP);
+			bcopy(fromredact, dspp.fromredactsnaps, size);
+		}
+
+		boolean_t is_before =
+		    dsl_dataset_is_before(dspp.to_ds, fromds, 0);
+		dspp.is_clone = (dspp.to_ds->ds_dir !=
+		    fromds->ds_dir);
 		dsl_dataset_rele(fromds, FTAG);
-		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
-		    embedok, large_block_ok, compressok, rawok, outfd,
-		    0, 0, vp, off, block_diff);
+		if (!is_before) {
+			dsl_pool_rele(dspp.dp, FTAG);
+			err = SET_ERROR(EXDEV);
+		} else {
+			KDEBUG("Call dmu_send_impl");
+			err = dmu_send_impl(&dspp);
+		}
 	} else {
-		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
-		    embedok, large_block_ok, compressok, rawok, outfd,
-		    0, 0, vp, off, block_diff);
+		dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED;
+		KDEBUG("Call dmu_send_impl UM_SNAPS_NOT_REDACTED");
+		err = dmu_send_impl(&dspp);
 	}
-	dsl_dataset_rele_flags(ds, dsflags, FTAG);
+	if (dspp.fromredactsnaps)
+		kmem_free(dspp.fromredactsnaps,
+		    dspp.numfromredactsnaps * sizeof (uint64_t));
+
+	dsl_dataset_rele(dspp.to_ds, FTAG);
 	return (err);
 }
 
 int
 dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
     boolean_t large_block_ok, boolean_t compressok, boolean_t rawok,
-    int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp,
-    offset_t *off)
+    boolean_t savedok, boolean_t blockdiff, uint64_t resumeobj, uint64_t resumeoff,
+    const char *redactbook, int outfd, offset_t *off,
+    dmu_send_outparams_t *dsop)
 {
-	dsl_pool_t *dp;
-	dsl_dataset_t *ds;
-	int err;
-	ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
+	int err = 0;
+	KTRACE();
+	ds_hold_flags_t dsflags;
 	boolean_t owned = B_FALSE;
+	dsl_dataset_t *fromds = NULL;
+	zfs_bookmark_phys_t book = {0};
+	struct dmu_send_params dspp = {0};
+
+	dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
+	dspp.tosnap = tosnap;
+	dspp.embedok = embedok;
+	dspp.large_block_ok = large_block_ok;
+	dspp.compressok = compressok;
+	dspp.outfd = outfd;
+	dspp.off = off;
+	dspp.dso = dsop;
+	dspp.tag = FTAG;
+	dspp.resumeobj = resumeobj;
+	dspp.resumeoff = resumeoff;
+	dspp.rawok = rawok;
+	dspp.savedok = savedok;
+	dspp.block_diff = blockdiff;
+
+	KDEBUG("blockdiff=%d", blockdiff);
 
 	if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
 		return (SET_ERROR(EINVAL));
 
-	err = dsl_pool_hold(tosnap, FTAG, &dp);
+	err = dsl_pool_hold(tosnap, FTAG, &dspp.dp);
 	if (err != 0)
 		return (err);
-	if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) {
+
+	if (strchr(tosnap, '@') == NULL && spa_writeable(dspp.dp->dp_spa)) {
 		/*
 		 * We are sending a filesystem or volume.  Ensure
 		 * that it doesn't change by owning the dataset.
 		 */
-		err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds);
-		owned = B_TRUE;
+
+		if (savedok) {
+			/*
+			 * We are looking for the dataset that represents the
+			 * partially received send stream. If this stream was
+			 * received as a new snapshot of an existing dataset,
+			 * this will be saved in a hidden clone named
+			 * "<pool>/<dataset>/%recv". Otherwise, the stream
+			 * will be saved in the live dataset itself. In
+			 * either case we need to use dsl_dataset_own_force()
+			 * because the stream is marked as inconsistent,
+			 * which would normally make it unavailable to be
+			 * owned.
+			 */
+			char *name = kmem_asprintf("%s/%s", tosnap,
+			    recv_clone_name);
+			err = dsl_dataset_own_force(dspp.dp, name, dsflags,
+			    FTAG, &dspp.to_ds);
+			if (err == ENOENT) {
+				err = dsl_dataset_own_force(dspp.dp, tosnap,
+				    dsflags, FTAG, &dspp.to_ds);
+			}
+
+			if (err == 0) {
+				owned = B_TRUE;
+				err = zap_lookup(dspp.dp->dp_meta_objset,
+				    dspp.to_ds->ds_object,
+				    DS_FIELD_RESUME_TOGUID, 8, 1,
+				    &dspp.saved_guid);
+			}
+
+			if (err == 0) {
+				err = zap_lookup(dspp.dp->dp_meta_objset,
+				    dspp.to_ds->ds_object,
+				    DS_FIELD_RESUME_TONAME, 1,
+				    sizeof (dspp.saved_toname),
+				    dspp.saved_toname);
+			}
+			/* Only disown if there was an error in the lookups */
+			if (owned && (err != 0))
+				dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
+
+			kmem_strfree(name);
+		} else {
+			err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
+			    FTAG, &dspp.to_ds);
+			if (err == 0)
+				owned = B_TRUE;
+		}
 	} else {
-		err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds);
+		err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
+		    &dspp.to_ds);
 	}
+
 	if (err != 0) {
-		dsl_pool_rele(dp, FTAG);
+		/* Note: dsl dataset is not owned at this point */
+		dsl_pool_rele(dspp.dp, FTAG);
+		return (err);
+	}
+
+	if (redactbook != NULL) {
+		char path[ZFS_MAX_DATASET_NAME_LEN];
+		(void) strlcpy(path, tosnap, sizeof (path));
+		char *at = strchr(path, '@');
+		if (at == NULL) {
+			err = EINVAL;
+		} else {
+			(void) snprintf(at, sizeof (path) - (at - path), "#%s",
+			    redactbook);
+			err = dsl_bookmark_lookup(dspp.dp, path,
+			    NULL, &book);
+			dspp.redactbook = &book;
+		}
+	}
+
+	if (err != 0) {
+		dsl_pool_rele(dspp.dp, FTAG);
+		if (owned)
+			dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
+		else
+			dsl_dataset_rele_flags(dspp.to_ds, dsflags, FTAG);
 		return (err);
 	}
 
 	if (fromsnap != NULL) {
-		zfs_bookmark_phys_t zb = { 0 };
-		boolean_t is_clone = B_FALSE;
-		int fsnamelen = strchr(tosnap, '@') - tosnap;
+		zfs_bookmark_phys_t *zb = &dspp.ancestor_zb;
+		int fsnamelen;
+		if (strpbrk(tosnap, "@#") != NULL)
+			fsnamelen = strpbrk(tosnap, "@#") - tosnap;
+		else
+			fsnamelen = strlen(tosnap);
 
 		/*
 		 * If the fromsnap is in a different filesystem, then
@@ -1403,55 +3048,91 @@
 		if (strncmp(tosnap, fromsnap, fsnamelen) != 0 ||
 		    (fromsnap[fsnamelen] != '@' &&
 		    fromsnap[fsnamelen] != '#')) {
-			is_clone = B_TRUE;
+			dspp.is_clone = B_TRUE;
 		}
 
-		if (strchr(fromsnap, '@')) {
-			dsl_dataset_t *fromds;
-			err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds);
-			if (err == 0) {
-				if (!dsl_dataset_is_before(ds, fromds, 0))
-					err = SET_ERROR(EXDEV);
-				zb.zbm_creation_time =
-				    dsl_dataset_phys(fromds)->ds_creation_time;
-				zb.zbm_creation_txg =
-				    dsl_dataset_phys(fromds)->ds_creation_txg;
-				zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
-				is_clone = (ds->ds_dir != fromds->ds_dir);
+		if (strchr(fromsnap, '@') != NULL) {
+			err = dsl_dataset_hold(dspp.dp, fromsnap, FTAG,
+			    &fromds);
 
-				if (dsl_dataset_is_zapified(fromds)) {
-					(void) zap_lookup(dp->dp_meta_objset,
-					    fromds->ds_object,
-					    DS_FIELD_IVSET_GUID, 8, 1,
-					    &zb.zbm_ivset_guid);
+			if (err != 0) {
+				ASSERT3P(fromds, ==, NULL);
+			} else {
+				/*
+				 * We need to make a deep copy of the redact
+				 * snapshots of the from snapshot, because the
+				 * array will be freed when we evict from_ds.
+				 */
+				uint64_t *fromredact;
+				if (!dsl_dataset_get_uint64_array_feature(
+				    fromds, SPA_FEATURE_REDACTED_DATASETS,
+				    &dspp.numfromredactsnaps,
+				    &fromredact)) {
+					dspp.numfromredactsnaps =
+					    NUM_SNAPS_NOT_REDACTED;
+				} else if (dspp.numfromredactsnaps > 0) {
+					uint64_t size =
+					    dspp.numfromredactsnaps *
+					    sizeof (uint64_t);
+					dspp.fromredactsnaps = kmem_zalloc(size,
+					    KM_SLEEP);
+					bcopy(fromredact, dspp.fromredactsnaps,
+					    size);
+				}
+				if (!dsl_dataset_is_before(dspp.to_ds, fromds,
+				    0)) {
+					err = SET_ERROR(EXDEV);
+				} else {
+					zb->zbm_creation_txg =
+					    dsl_dataset_phys(fromds)->
+					    ds_creation_txg;
+					zb->zbm_creation_time =
+					    dsl_dataset_phys(fromds)->
+					    ds_creation_time;
+					zb->zbm_guid =
+					    dsl_dataset_phys(fromds)->ds_guid;
+					zb->zbm_redaction_obj = 0;
+
+					if (dsl_dataset_is_zapified(fromds)) {
+						(void) zap_lookup(
+						    dspp.dp->dp_meta_objset,
+						    fromds->ds_object,
+						    DS_FIELD_IVSET_GUID, 8, 1,
+						    &zb->zbm_ivset_guid);
+					}
 				}
 				dsl_dataset_rele(fromds, FTAG);
 			}
 		} else {
-			err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
+			dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED;
+			err = dsl_bookmark_lookup(dspp.dp, fromsnap, dspp.to_ds,
+			    zb);
+			if (err == EXDEV && zb->zbm_redaction_obj != 0 &&
+			    zb->zbm_guid ==
+			    dsl_dataset_phys(dspp.to_ds)->ds_guid)
+				err = 0;
 		}
-		if (err != 0) {
-			if (owned)
-				dsl_dataset_disown(ds, dsflags, FTAG);
-			else
-				dsl_dataset_rele_flags(ds, dsflags, FTAG);
 
-			dsl_pool_rele(dp, FTAG);
-			return (err);
+		if (err == 0) {
+			/* dmu_send_impl will call dsl_pool_rele for us. */
+			KDEBUG("call dmu_send_impl");
+			err = dmu_send_impl(&dspp);
+		} else {
+			if (dspp.fromredactsnaps)
+				kmem_free(dspp.fromredactsnaps,
+				    dspp.numfromredactsnaps *
+				    sizeof (uint64_t));
+			dsl_pool_rele(dspp.dp, FTAG);
 		}
-		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
-		    embedok, large_block_ok, compressok, rawok,
-		    outfd, resumeobj, resumeoff, vp, off, B_FALSE);
 	} else {
-		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
-		    embedok, large_block_ok, compressok, rawok,
-		    outfd, resumeobj, resumeoff, vp, off, B_FALSE);
+		KDEBUG("call dmu_send_impl NUM_SNAPS_NOT_REDACTED");
+		dspp.numfromredactsnaps = NUM_SNAPS_NOT_REDACTED;
+		err = dmu_send_impl(&dspp);
 	}
 	if (owned)
-		dsl_dataset_disown(ds, dsflags, FTAG);
+		dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
 	else
-		dsl_dataset_rele_flags(ds, dsflags, FTAG);
-
+		dsl_dataset_rele_flags(dspp.to_ds, dsflags, FTAG);
 	return (err);
 }
 
@@ -1459,6 +3140,7 @@
 dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t uncompressed,
     uint64_t compressed, boolean_t stream_compressed, uint64_t *sizep)
 {
+	KTRACE();
 	int err = 0;
 	uint64_t size;
 	/*
@@ -1512,39 +3194,84 @@
 }
 
 int
-dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds,
-    boolean_t stream_compressed, uint64_t *sizep)
+dmu_send_estimate_fast(dsl_dataset_t *origds, dsl_dataset_t *fromds,
+    zfs_bookmark_phys_t *frombook, boolean_t stream_compressed,
+    boolean_t saved, uint64_t *sizep)
 {
+	KTRACE();
 	int err;
+	dsl_dataset_t *ds = origds;
 	uint64_t uncomp, comp;
 
-	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
-
-	/* tosnap must be a snapshot */
-	if (!ds->ds_is_snapshot)
-		return (SET_ERROR(EINVAL));
-
-	/* fromsnap, if provided, must be a snapshot */
-	if (fromds != NULL && !fromds->ds_is_snapshot)
-		return (SET_ERROR(EINVAL));
+	ASSERT(dsl_pool_config_held(origds->ds_dir->dd_pool));
+	ASSERT(fromds == NULL || frombook == NULL);
 
 	/*
-	 * fromsnap must be an earlier snapshot from the same fs as tosnap,
-	 * or the origin's fs.
+	 * If this is a saved send we may actually be sending
+	 * from the %recv clone used for resuming.
 	 */
-	if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0))
-		return (SET_ERROR(EXDEV));
+	if (saved) {
+		objset_t *mos = origds->ds_dir->dd_pool->dp_meta_objset;
+		uint64_t guid;
+		char dsname[ZFS_MAX_DATASET_NAME_LEN + 6];
 
-	/* Get compressed and uncompressed size estimates of changed data. */
-	if (fromds == NULL) {
+		dsl_dataset_name(origds, dsname);
+		(void) strcat(dsname, "/");
+		(void) strcat(dsname, recv_clone_name);
+
+		err = dsl_dataset_hold(origds->ds_dir->dd_pool,
+		    dsname, FTAG, &ds);
+		if (err != ENOENT && err != 0) {
+			return (err);
+		} else if (err == ENOENT) {
+			ds = origds;
+		}
+
+		/* check that this dataset has partially received data */
+		err = zap_lookup(mos, ds->ds_object,
+		    DS_FIELD_RESUME_TOGUID, 8, 1, &guid);
+		if (err != 0) {
+			err = SET_ERROR(err == ENOENT ? EINVAL : err);
+			goto out;
+		}
+
+		err = zap_lookup(mos, ds->ds_object,
+		    DS_FIELD_RESUME_TONAME, 1, sizeof (dsname), dsname);
+		if (err != 0) {
+			err = SET_ERROR(err == ENOENT ? EINVAL : err);
+			goto out;
+		}
+	}
+
+	/* tosnap must be a snapshot or the target of a saved send */
+	if (!ds->ds_is_snapshot && ds == origds)
+		return (SET_ERROR(EINVAL));
+
+	if (fromds != NULL) {
+		uint64_t used;
+		if (!fromds->ds_is_snapshot) {
+			err = SET_ERROR(EINVAL);
+			goto out;
+		}
+
+		if (!dsl_dataset_is_before(ds, fromds, 0)) {
+			err = SET_ERROR(EXDEV);
+			goto out;
+		}
+
+		err = dsl_dataset_space_written(fromds, ds, &used, &comp,
+		    &uncomp);
+		if (err != 0)
+			goto out;
+	} else if (frombook != NULL) {
+		uint64_t used;
+		err = dsl_dataset_space_written_bookmark(frombook, ds, &used,
+		    &comp, &uncomp);
+		if (err != 0)
+			goto out;
+	} else {
 		uncomp = dsl_dataset_phys(ds)->ds_uncompressed_bytes;
 		comp = dsl_dataset_phys(ds)->ds_compressed_bytes;
-	} else {
-		uint64_t used;
-		err = dsl_dataset_space_written(fromds, ds,
-		    &used, &comp, &uncomp);
-		if (err != 0)
-			return (err);
 	}
 
 	err = dmu_adjust_send_estimate_for_indirects(ds, uncomp, comp,
@@ -1553,84 +3280,32 @@
 	 * Add the size of the BEGIN and END records to the estimate.
 	 */
 	*sizep += 2 * sizeof (dmu_replay_record_t);
+
+out:
+	if (ds != origds)
+		dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
-struct calculate_send_arg {
-	uint64_t uncompressed;
-	uint64_t compressed;
-};
-
-/*
- * Simple callback used to traverse the blocks of a snapshot and sum their
- * uncompressed and compressed sizes.
- */
-/* ARGSUSED */
-static int
-dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
-{
-	struct calculate_send_arg *space = arg;
-	if (bp != NULL && !BP_IS_HOLE(bp)) {
-		space->uncompressed += BP_GET_UCSIZE(bp);
-		space->compressed += BP_GET_PSIZE(bp);
-	}
-	return (0);
-}
-
-/*
- * Given a desination snapshot and a TXG, calculate the approximate size of a
- * send stream sent from that TXG. from_txg may be zero, indicating that the
- * whole snapshot will be sent.
- */
-int
-dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
-    boolean_t stream_compressed, uint64_t *sizep)
-{
-	int err;
-	struct calculate_send_arg size = { 0 };
-
-	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
-
-	/* tosnap must be a snapshot */
-	if (!dsl_dataset_is_snapshot(ds))
-		return (SET_ERROR(EINVAL));
-
-	/* verify that from_txg is before the provided snapshot was taken */
-	if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
-		return (SET_ERROR(EXDEV));
-	}
-	/*
-	 * traverse the blocks of the snapshot with birth times after
-	 * from_txg, summing their uncompressed size
-	 */
-	err = traverse_dataset(ds, from_txg,
-	    TRAVERSE_POST | TRAVERSE_NO_DECRYPT,
-	    dmu_calculate_send_traversal, &size);
-
-	if (err)
-		return (err);
-
-	err = dmu_adjust_send_estimate_for_indirects(ds, size.uncompressed,
-	    size.compressed, stream_compressed, sizep);
-	return (err);
-}
-
-
-#if defined(_KERNEL)
 /* BEGIN CSTYLED */
-module_param(zfs_override_estimate_recordsize, ulong, 0644);
-MODULE_PARM_DESC(zfs_override_estimate_recordsize,
-	"Record size calculation override for zfs send estimates");
-/* END CSTYLED */
+ZFS_MODULE_PARAM(zfs_send, zfs_send_, corrupt_data, INT, ZMOD_RW,
+	"Allow sending corrupt data");
 
-module_param(zfs_send_corrupt_data, int, 0644);
-MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data");
+ZFS_MODULE_PARAM(zfs_send, zfs_send_, queue_length, INT, ZMOD_RW,
+	"Maximum send queue length");
 
-module_param(zfs_send_queue_length, int, 0644);
-MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length");
-
-module_param(zfs_send_unmodified_spill_blocks, int, 0644);
-MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks,
+ZFS_MODULE_PARAM(zfs_send, zfs_send_, unmodified_spill_blocks, INT, ZMOD_RW,
 	"Send unmodified spill blocks");
-#endif
+
+ZFS_MODULE_PARAM(zfs_send, zfs_send_, no_prefetch_queue_length, INT, ZMOD_RW,
+	"Maximum send queue length for non-prefetch queues");
+
+ZFS_MODULE_PARAM(zfs_send, zfs_send_, queue_ff, INT, ZMOD_RW,
+	"Send queue fill fraction");
+
+ZFS_MODULE_PARAM(zfs_send, zfs_send_, no_prefetch_queue_ff, INT, ZMOD_RW,
+	"Send queue fill fraction for non-prefetch queues");
+
+ZFS_MODULE_PARAM(zfs_send, zfs_, override_estimate_recordsize, INT, ZMOD_RW,
+	"Override block size estimate with fixed size");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/dmu_traverse.c b/zfs/module/zfs/dmu_traverse.c
index f426520..2f1c297 100644
--- a/zfs/module/zfs/dmu_traverse.c
+++ b/zfs/module/zfs/dmu_traverse.c

@@ -41,6 +41,7 @@
 
 int32_t zfs_pd_bytes_max = 50 * 1024 * 1024;	/* 50MB */
 int32_t send_holes_without_birth_time = 1;
+int32_t zfs_traverse_indirect_prefetch_limit = 32;
 
 typedef struct prefetch_data {
 	kmutex_t pd_mtx;
@@ -67,13 +68,14 @@
 	boolean_t td_realloc_possible;
 } traverse_data_t;
 
-static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
-    uint64_t objset, uint64_t object);
+static int traverse_dnode(traverse_data_t *td, const blkptr_t *bp,
+    const dnode_phys_t *dnp, uint64_t objset, uint64_t object);
 static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
     uint64_t objset, uint64_t object);
 
 static int
-traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
+traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
+    uint64_t claim_txg)
 {
 	traverse_data_t *td = arg;
 	zbookmark_phys_t zb;
@@ -93,7 +95,8 @@
 }
 
 static int
-traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
+traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
+    uint64_t claim_txg)
 {
 	traverse_data_t *td = arg;
 
@@ -174,7 +177,10 @@
 	return (RESUME_SKIP_NONE);
 }
 
-static void
+/*
+ * Returns B_TRUE, if prefetch read is issued, otherwise B_FALSE.
+ */
+static boolean_t
 traverse_prefetch_metadata(traverse_data_t *td,
     const blkptr_t *bp, const zbookmark_phys_t *zb)
 {
@@ -182,24 +188,26 @@
 	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 
 	if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
-		return;
+		return (B_FALSE);
 	/*
 	 * If we are in the process of resuming, don't prefetch, because
 	 * some children will not be needed (and in fact may have already
 	 * been freed).
 	 */
 	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
-		return;
+		return (B_FALSE);
 	if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
-		return;
+		return (B_FALSE);
 	if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
-		return;
+		return (B_FALSE);
+	ASSERT(!BP_IS_REDACTED(bp));
 
 	if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
 		zio_flags |= ZIO_FLAG_RAW;
 
 	(void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
 	    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
+	return (B_TRUE);
 }
 
 static boolean_t
@@ -207,7 +215,7 @@
 {
 	ASSERT(pfd->pd_flags & TRAVERSE_PREFETCH_DATA);
 	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) ||
-	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
+	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG || BP_IS_REDACTED(bp))
 		return (B_FALSE);
 	return (B_TRUE);
 }
@@ -274,7 +282,7 @@
 		mutex_exit(&pd->pd_mtx);
 	}
 
-	if (BP_IS_HOLE(bp)) {
+	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) {
 		err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
 		if (err != 0)
 			goto post;
@@ -292,7 +300,8 @@
 
 	if (BP_GET_LEVEL(bp) > 0) {
 		uint32_t flags = ARC_FLAG_WAIT;
-		int32_t i;
+		int32_t i, ptidx, pidx;
+		uint32_t prefetchlimit;
 		int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 		zbookmark_phys_t *czb;
 
@@ -305,16 +314,46 @@
 
 		czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP);
 
+		/*
+		 * When performing a traversal it is beneficial to
+		 * asynchronously read-ahead the upcoming indirect
+		 * blocks since they will be needed shortly. However,
+		 * since a 128k indirect (non-L0) block may contain up
+		 * to 1024 128-byte block pointers, its preferable to not
+		 * prefetch them all at once. Issuing a large number of
+		 * async reads may effect performance, and the earlier
+		 * the indirect blocks are prefetched the less likely
+		 * they are to still be resident in the ARC when needed.
+		 * Therefore, prefetching indirect blocks is limited to
+		 * zfs_traverse_indirect_prefetch_limit=32 blocks by
+		 * default.
+		 *
+		 * pidx: Index for which next prefetch to be issued.
+		 * ptidx: Index at which next prefetch to be triggered.
+		 */
+		ptidx = 0;
+		pidx = 1;
+		prefetchlimit = zfs_traverse_indirect_prefetch_limit;
 		for (i = 0; i < epb; i++) {
-			SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object,
-			    zb->zb_level - 1,
-			    zb->zb_blkid * epb + i);
-			traverse_prefetch_metadata(td,
-			    &((blkptr_t *)buf->b_data)[i], czb);
-		}
+			if (prefetchlimit && i == ptidx) {
+				ASSERT3S(ptidx, <=, pidx);
+				for (uint32_t  prefetched = 0; pidx < epb &&
+				    prefetched < prefetchlimit; pidx++) {
+					SET_BOOKMARK(czb, zb->zb_objset,
+					    zb->zb_object, zb->zb_level - 1,
+					    zb->zb_blkid * epb + pidx);
+					if (traverse_prefetch_metadata(td,
+					    &((blkptr_t *)buf->b_data)[pidx],
+					    czb) == B_TRUE) {
+						prefetched++;
+						if (prefetched ==
+						    MAX(prefetchlimit / 2, 1))
+							ptidx = pidx;
+					}
+				}
+			}
 
-		/* recursively visitbp() blocks below this */
-		for (i = 0; i < epb; i++) {
+			/* recursively visitbp() blocks below this */
 			SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
@@ -354,7 +393,7 @@
 
 		/* recursively visitbp() blocks below this */
 		for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
-			err = traverse_dnode(td, &child_dnp[i],
+			err = traverse_dnode(td, bp, &child_dnp[i],
 			    zb->zb_objset, zb->zb_blkid * epb + i);
 			if (err != 0)
 				break;
@@ -395,19 +434,19 @@
 			    zb->zb_objset, DMU_USERUSED_OBJECT);
 		}
 
-		err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
+		err = traverse_dnode(td, bp, &osp->os_meta_dnode, zb->zb_objset,
 		    DMU_META_DNODE_OBJECT);
 		if (err == 0 && OBJSET_BUF_HAS_USERUSED(buf)) {
 			if (OBJSET_BUF_HAS_PROJECTUSED(buf))
-				err = traverse_dnode(td,
+				err = traverse_dnode(td, bp,
 				    &osp->os_projectused_dnode, zb->zb_objset,
 				    DMU_PROJECTUSED_OBJECT);
 			if (err == 0)
-				err = traverse_dnode(td,
+				err = traverse_dnode(td, bp,
 				    &osp->os_groupused_dnode, zb->zb_objset,
 				    DMU_GROUPUSED_OBJECT);
 			if (err == 0)
-				err = traverse_dnode(td,
+				err = traverse_dnode(td, bp,
 				    &osp->os_userused_dnode, zb->zb_objset,
 				    DMU_USERUSED_OBJECT);
 		}
@@ -475,7 +514,7 @@
 }
 
 static int
-traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
+traverse_dnode(traverse_data_t *td, const blkptr_t *bp, const dnode_phys_t *dnp,
     uint64_t objset, uint64_t object)
 {
 	int j, err = 0;
@@ -488,7 +527,7 @@
 	if (td->td_flags & TRAVERSE_PRE) {
 		SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
 		    ZB_DNODE_BLKID);
-		err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
+		err = td->td_func(td->td_spa, NULL, bp, &czb, dnp,
 		    td->td_arg);
 		if (err == TRAVERSE_VISIT_NO_CHILDREN)
 			return (0);
@@ -511,7 +550,7 @@
 	if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
 		SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
 		    ZB_DNODE_BLKID);
-		err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
+		err = td->td_func(td->td_spa, NULL, bp, &czb, dnp,
 		    td->td_arg);
 		if (err == TRAVERSE_VISIT_NO_CHILDREN)
 			return (0);
@@ -521,18 +560,18 @@
 	return (err);
 }
 
-/* ARGSUSED */
 static int
 traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
+	(void) zilog, (void) dnp;
 	prefetch_data_t *pfd = arg;
 	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 	arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH |
 	    ARC_FLAG_PRESCIENT_PREFETCH;
 
 	ASSERT(pfd->pd_bytes_fetched >= 0);
-	if (bp == NULL)
+	if (zb->zb_level == ZB_DNODE_LEVEL)
 		return (0);
 	if (pfd->pd_cancel)
 		return (SET_ERROR(EINTR));
@@ -635,6 +674,7 @@
 		uint32_t flags = ARC_FLAG_WAIT;
 		objset_phys_t *osp;
 		arc_buf_t *buf;
+		ASSERT(!BP_IS_REDACTED(rootbp));
 
 		if ((td->td_flags & TRAVERSE_NO_DECRYPT) &&
 		    BP_IS_PROTECTED(rootbp))
@@ -766,18 +806,22 @@
 	return (err);
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(traverse_dataset);
 EXPORT_SYMBOL(traverse_pool);
 
-module_param(zfs_pd_bytes_max, int, 0644);
-MODULE_PARM_DESC(zfs_pd_bytes_max, "Max number of bytes to prefetch");
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, pd_bytes_max, INT, ZMOD_RW,
+	"Max number of bytes to prefetch");
 
+ZFS_MODULE_PARAM(zfs, zfs_, traverse_indirect_prefetch_limit, INT, ZMOD_RW,
+	"Traverse prefetch number of blocks pointed by indirect block");
+
+#if defined(_KERNEL)
 module_param_named(ignore_hole_birth, send_holes_without_birth_time, int, 0644);
-MODULE_PARM_DESC(ignore_hole_birth, "Alias for send_holes_without_birth_time");
-
-module_param_named(send_holes_without_birth_time,
-	send_holes_without_birth_time, int, 0644);
-MODULE_PARM_DESC(send_holes_without_birth_time,
-	"Ignore hole_birth txg for zfs send");
+MODULE_PARM_DESC(ignore_hole_birth,
+	"Alias for send_holes_without_birth_time");
 #endif
+
+ZFS_MODULE_PARAM(zfs, , send_holes_without_birth_time, INT, ZMOD_RW,
+	"Ignore hole_birth txg for zfs send");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/dmu_tx.c b/zfs/module/zfs/dmu_tx.c
index d6a42f8..063934f 100644
--- a/zfs/module/zfs/dmu_tx.c
+++ b/zfs/module/zfs/dmu_tx.c

@@ -37,7 +37,7 @@
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/zfs_context.h>
-#include <sys/trace_dmu.h>
+#include <sys/trace_zfs.h>
 
 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
     uint64_t arg1, uint64_t arg2);
@@ -54,6 +54,7 @@
 	{ "dmu_tx_dirty_delay",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_dirty_over_max",	KSTAT_DATA_UINT64 },
 	{ "dmu_tx_dirty_frees_delay",	KSTAT_DATA_UINT64 },
+	{ "dmu_tx_wrlog_delay",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_quota",		KSTAT_DATA_UINT64 },
 };
 
@@ -230,9 +231,6 @@
 
 	(void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
 
-	if (zfs_refcount_count(&txh->txh_space_towrite) > 2 * DMU_MAX_ACCESS)
-		err = SET_ERROR(EFBIG);
-
 	if (dn == NULL)
 		return;
 
@@ -293,6 +291,53 @@
 }
 
 static void
+dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+{
+	dnode_t *dn = txh->txh_dnode;
+	int err = 0;
+
+	if (len == 0)
+		return;
+
+	(void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
+
+	if (dn == NULL)
+		return;
+
+	/*
+	 * For i/o error checking, read the blocks that will be needed
+	 * to perform the append; first level-0 block (if not aligned, i.e.
+	 * if they are partial-block writes), no additional blocks are read.
+	 */
+	if (dn->dn_maxblkid == 0) {
+		if (off < dn->dn_datablksz &&
+		    (off > 0 || len < dn->dn_datablksz)) {
+			err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
+			if (err != 0) {
+				txh->txh_tx->tx_err = err;
+			}
+		}
+	} else {
+		zio_t *zio = zio_root(dn->dn_objset->os_spa,
+		    NULL, NULL, ZIO_FLAG_CANFAIL);
+
+		/* first level-0 block */
+		uint64_t start = off >> dn->dn_datablkshift;
+		if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
+			err = dmu_tx_check_ioerr(zio, dn, 0, start);
+			if (err != 0) {
+				txh->txh_tx->tx_err = err;
+			}
+		}
+
+		err = zio_wait(zio);
+		if (err != 0) {
+			txh->txh_tx->tx_err = err;
+		}
+	}
+}
+
+static void
 dmu_tx_count_dnode(dmu_tx_hold_t *txh)
 {
 	(void) zfs_refcount_add_many(&txh->txh_space_towrite,
@@ -317,23 +362,6 @@
 }
 
 void
-dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object)
-{
-	dmu_tx_hold_t *txh;
-
-	ASSERT(tx->tx_txg == 0);
-	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
-	    object, THT_WRITE, 0, 0);
-	if (txh == NULL)
-		return;
-
-	dnode_t *dn = txh->txh_dnode;
-	(void) zfs_refcount_add_many(&txh->txh_space_towrite,
-	    1ULL << dn->dn_indblkshift, FTAG);
-	dmu_tx_count_dnode(txh);
-}
-
-void
 dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
 {
 	dmu_tx_hold_t *txh;
@@ -350,6 +378,42 @@
 }
 
 /*
+ * Should be used when appending to an object and the exact offset is unknown.
+ * The write must occur at or beyond the specified offset.  Only the L0 block
+ * at provided offset will be prefetched.
+ */
+void
+dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
+{
+	dmu_tx_hold_t *txh;
+
+	ASSERT0(tx->tx_txg);
+	ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+	    object, THT_APPEND, off, DMU_OBJECT_END);
+	if (txh != NULL) {
+		dmu_tx_count_append(txh, off, len);
+		dmu_tx_count_dnode(txh);
+	}
+}
+
+void
+dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
+{
+	dmu_tx_hold_t *txh;
+
+	ASSERT0(tx->tx_txg);
+	ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+	txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END);
+	if (txh != NULL) {
+		dmu_tx_count_append(txh, off, len);
+		dmu_tx_count_dnode(txh);
+	}
+}
+
+/*
  * This function marks the transaction as being a "net free".  The end
  * result is that refquotas will be disabled for this transaction, and
  * this transaction will be able to use half of the pool space overhead
@@ -633,7 +697,8 @@
 			/* XXX txh_arg2 better not be zero... */
 
 			dprintf("found txh type %x beginblk=%llx endblk=%llx\n",
-			    txh->txh_type, beginblk, endblk);
+			    txh->txh_type, (u_longlong_t)beginblk,
+			    (u_longlong_t)endblk);
 
 			switch (txh->txh_type) {
 			case THT_WRITE:
@@ -656,6 +721,26 @@
 				if (blkid == 0)
 					match_offset = TRUE;
 				break;
+			case THT_APPEND:
+				if (blkid >= beginblk && (blkid <= endblk ||
+				    txh->txh_arg2 == DMU_OBJECT_END))
+					match_offset = TRUE;
+
+				/*
+				 * THT_WRITE used for bonus and spill blocks.
+				 */
+				ASSERT(blkid != DMU_BONUS_BLKID &&
+				    blkid != DMU_SPILL_BLKID);
+
+				/*
+				 * They might have to increase nlevels,
+				 * thus dirtying the new TLIBs.  Or the
+				 * might have to change the block size,
+				 * thus dirying the new lvl=0 blk=0.
+				 */
+				if (blkid == 0)
+					match_offset = TRUE;
+				break;
 			case THT_FREE:
 				/*
 				 * We will dirty all the level 1 blocks in
@@ -799,34 +884,49 @@
 dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
 {
 	dsl_pool_t *dp = tx->tx_pool;
-	uint64_t delay_min_bytes =
-	    zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
-	hrtime_t wakeup, min_tx_time, now;
+	uint64_t delay_min_bytes, wrlog;
+	hrtime_t wakeup, tx_time = 0, now;
 
-	if (dirty <= delay_min_bytes)
+	/* Calculate minimum transaction time for the dirty data amount. */
+	delay_min_bytes =
+	    zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
+	if (dirty > delay_min_bytes) {
+		/*
+		 * The caller has already waited until we are under the max.
+		 * We make them pass us the amount of dirty data so we don't
+		 * have to handle the case of it being >= the max, which
+		 * could cause a divide-by-zero if it's == the max.
+		 */
+		ASSERT3U(dirty, <, zfs_dirty_data_max);
+
+		tx_time = zfs_delay_scale * (dirty - delay_min_bytes) /
+		    (zfs_dirty_data_max - dirty);
+	}
+
+	/* Calculate minimum transaction time for the TX_WRITE log size. */
+	wrlog = aggsum_upper_bound(&dp->dp_wrlog_total);
+	delay_min_bytes =
+	    zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100;
+	if (wrlog >= zfs_wrlog_data_max) {
+		tx_time = zfs_delay_max_ns;
+	} else if (wrlog > delay_min_bytes) {
+		tx_time = MAX(zfs_delay_scale * (wrlog - delay_min_bytes) /
+		    (zfs_wrlog_data_max - wrlog), tx_time);
+	}
+
+	if (tx_time == 0)
 		return;
 
-	/*
-	 * The caller has already waited until we are under the max.
-	 * We make them pass us the amount of dirty data so we don't
-	 * have to handle the case of it being >= the max, which could
-	 * cause a divide-by-zero if it's == the max.
-	 */
-	ASSERT3U(dirty, <, zfs_dirty_data_max);
-
+	tx_time = MIN(tx_time, zfs_delay_max_ns);
 	now = gethrtime();
-	min_tx_time = zfs_delay_scale *
-	    (dirty - delay_min_bytes) / (zfs_dirty_data_max - dirty);
-	min_tx_time = MIN(min_tx_time, zfs_delay_max_ns);
-	if (now > tx->tx_start + min_tx_time)
+	if (now > tx->tx_start + tx_time)
 		return;
 
 	DTRACE_PROBE3(delay__mintime, dmu_tx_t *, tx, uint64_t, dirty,
-	    uint64_t, min_tx_time);
+	    uint64_t, tx_time);
 
 	mutex_enter(&dp->dp_lock);
-	wakeup = MAX(tx->tx_start + min_tx_time,
-	    dp->dp_last_wakeup + min_tx_time);
+	wakeup = MAX(tx->tx_start + tx_time, dp->dp_last_wakeup + tx_time);
 	dp->dp_last_wakeup = wakeup;
 	mutex_exit(&dp->dp_lock);
 
@@ -904,6 +1004,13 @@
 	}
 
 	if (!tx->tx_dirty_delayed &&
+	    dsl_pool_need_wrlog_delay(tx->tx_pool)) {
+		tx->tx_wait_dirty = B_TRUE;
+		DMU_TX_STAT_BUMP(dmu_tx_wrlog_delay);
+		return (SET_ERROR(ERESTART));
+	}
+
+	if (!tx->tx_dirty_delayed &&
 	    dsl_pool_need_dirty_delay(tx->tx_pool)) {
 		tx->tx_wait_dirty = B_TRUE;
 		DMU_TX_STAT_BUMP(dmu_tx_dirty_delay);
@@ -1032,6 +1139,22 @@
  * details on the throttle). This is used by the VFS operations, after
  * they have already called dmu_tx_wait() (though most likely on a
  * different tx).
+ *
+ * It is guaranteed that subsequent successful calls to dmu_tx_assign()
+ * will assign the tx to monotonically increasing txgs. Of course this is
+ * not strong monotonicity, because the same txg can be returned multiple
+ * times in a row. This guarantee holds both for subsequent calls from
+ * one thread and for multiple threads. For example, it is impossible to
+ * observe the following sequence of events:
+ *
+ *          Thread 1                            Thread 2
+ *
+ *     dmu_tx_assign(T1, ...)
+ *     1 <- dmu_tx_get_txg(T1)
+ *                                       dmu_tx_assign(T2, ...)
+ *                                       2 <- dmu_tx_get_txg(T2)
+ *     dmu_tx_assign(T3, ...)
+ *     1 <- dmu_tx_get_txg(T3)
  */
 int
 dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
@@ -1200,7 +1323,7 @@
 	 * Call any registered callbacks with an error code.
 	 */
 	if (!list_is_empty(&tx->tx_callbacks))
-		dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
+		dmu_tx_do_callbacks(&tx->tx_callbacks, SET_ERROR(ECANCELED));
 
 	dmu_tx_destroy(tx);
 }
@@ -1401,6 +1524,8 @@
 EXPORT_SYMBOL(dmu_tx_create);
 EXPORT_SYMBOL(dmu_tx_hold_write);
 EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
+EXPORT_SYMBOL(dmu_tx_hold_append);
+EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode);
 EXPORT_SYMBOL(dmu_tx_hold_free);
 EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
 EXPORT_SYMBOL(dmu_tx_hold_zap);

diff --git a/zfs/module/zfs/dmu_zfetch.c b/zfs/module/zfs/dmu_zfetch.c
index 46dc462..d2985d5 100644
--- a/zfs/module/zfs/dmu_zfetch.c
+++ b/zfs/module/zfs/dmu_zfetch.c

@@ -24,16 +24,18 @@
  */
 
 /*
- * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
+#include <sys/arc_impl.h>
 #include <sys/dnode.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/dmu.h>
 #include <sys/dbuf.h>
 #include <sys/kstat.h>
+#include <sys/wmsum.h>
 
 /*
  * This tunable disables predictive prefetch.  Note that it leaves "prescient"
@@ -47,9 +49,13 @@
 /* max # of streams per zfetch */
 unsigned int	zfetch_max_streams = 8;
 /* min time before stream reclaim */
-unsigned int	zfetch_min_sec_reap = 2;
-/* max bytes to prefetch per stream (default 8MB) */
-unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
+static unsigned int	zfetch_min_sec_reap = 1;
+/* max time before stream delete */
+static unsigned int	zfetch_max_sec_reap = 2;
+/* min bytes to prefetch per stream (default 4MB) */
+static unsigned int	zfetch_min_distance = 4 * 1024 * 1024;
+/* max bytes to prefetch per stream (default 64MB) */
+unsigned int	zfetch_max_distance = 64 * 1024 * 1024;
 /* max bytes to prefetch indirects for per stream (default 64MB) */
 unsigned int	zfetch_max_idistance = 64 * 1024 * 1024;
 /* max number of bytes in an array_read in which we allow prefetching (1MB) */
@@ -59,28 +65,70 @@
 	kstat_named_t zfetchstat_hits;
 	kstat_named_t zfetchstat_misses;
 	kstat_named_t zfetchstat_max_streams;
+	kstat_named_t zfetchstat_io_issued;
+	kstat_named_t zfetchstat_io_active;
 } zfetch_stats_t;
 
 static zfetch_stats_t zfetch_stats = {
 	{ "hits",			KSTAT_DATA_UINT64 },
 	{ "misses",			KSTAT_DATA_UINT64 },
 	{ "max_streams",		KSTAT_DATA_UINT64 },
+	{ "io_issued",			KSTAT_DATA_UINT64 },
+	{ "io_active",			KSTAT_DATA_UINT64 },
 };
 
-#define	ZFETCHSTAT_BUMP(stat) \
-	atomic_inc_64(&zfetch_stats.stat.value.ui64);
+struct {
+	wmsum_t zfetchstat_hits;
+	wmsum_t zfetchstat_misses;
+	wmsum_t zfetchstat_max_streams;
+	wmsum_t zfetchstat_io_issued;
+	aggsum_t zfetchstat_io_active;
+} zfetch_sums;
+
+#define	ZFETCHSTAT_BUMP(stat)					\
+	wmsum_add(&zfetch_sums.stat, 1)
+#define	ZFETCHSTAT_ADD(stat, val)				\
+	wmsum_add(&zfetch_sums.stat, val)
+
 
 kstat_t		*zfetch_ksp;
 
+static int
+zfetch_kstats_update(kstat_t *ksp, int rw)
+{
+	zfetch_stats_t *zs = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+	zs->zfetchstat_hits.value.ui64 =
+	    wmsum_value(&zfetch_sums.zfetchstat_hits);
+	zs->zfetchstat_misses.value.ui64 =
+	    wmsum_value(&zfetch_sums.zfetchstat_misses);
+	zs->zfetchstat_max_streams.value.ui64 =
+	    wmsum_value(&zfetch_sums.zfetchstat_max_streams);
+	zs->zfetchstat_io_issued.value.ui64 =
+	    wmsum_value(&zfetch_sums.zfetchstat_io_issued);
+	zs->zfetchstat_io_active.value.ui64 =
+	    aggsum_value(&zfetch_sums.zfetchstat_io_active);
+	return (0);
+}
+
 void
 zfetch_init(void)
 {
+	wmsum_init(&zfetch_sums.zfetchstat_hits, 0);
+	wmsum_init(&zfetch_sums.zfetchstat_misses, 0);
+	wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0);
+	wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0);
+	aggsum_init(&zfetch_sums.zfetchstat_io_active, 0);
+
 	zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
 	    KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 
 	if (zfetch_ksp != NULL) {
 		zfetch_ksp->ks_data = &zfetch_stats;
+		zfetch_ksp->ks_update = zfetch_kstats_update;
 		kstat_install(zfetch_ksp);
 	}
 }
@@ -92,6 +140,13 @@
 		kstat_delete(zfetch_ksp);
 		zfetch_ksp = NULL;
 	}
+
+	wmsum_fini(&zfetch_sums.zfetchstat_hits);
+	wmsum_fini(&zfetch_sums.zfetchstat_misses);
+	wmsum_fini(&zfetch_sums.zfetchstat_max_streams);
+	wmsum_fini(&zfetch_sums.zfetchstat_io_issued);
+	ASSERT0(aggsum_value(&zfetch_sums.zfetchstat_io_active));
+	aggsum_fini(&zfetch_sums.zfetchstat_io_active);
 }
 
 /*
@@ -104,22 +159,33 @@
 {
 	if (zf == NULL)
 		return;
-
 	zf->zf_dnode = dno;
+	zf->zf_numstreams = 0;
 
 	list_create(&zf->zf_stream, sizeof (zstream_t),
 	    offsetof(zstream_t, zs_node));
 
-	rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
+	mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+static void
+dmu_zfetch_stream_fini(zstream_t *zs)
+{
+	ASSERT(!list_link_active(&zs->zs_node));
+	zfs_refcount_destroy(&zs->zs_callers);
+	zfs_refcount_destroy(&zs->zs_refs);
+	kmem_free(zs, sizeof (*zs));
 }
 
 static void
 dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
 {
-	ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
+	ASSERT(MUTEX_HELD(&zf->zf_lock));
 	list_remove(&zf->zf_stream, zs);
-	mutex_destroy(&zs->zs_lock);
-	kmem_free(zs, sizeof (*zs));
+	zf->zf_numstreams--;
+	membar_producer();
+	if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+		dmu_zfetch_stream_fini(zs);
 }
 
 /*
@@ -131,93 +197,136 @@
 {
 	zstream_t *zs;
 
-	ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
-
-	rw_enter(&zf->zf_rwlock, RW_WRITER);
+	mutex_enter(&zf->zf_lock);
 	while ((zs = list_head(&zf->zf_stream)) != NULL)
 		dmu_zfetch_stream_remove(zf, zs);
-	rw_exit(&zf->zf_rwlock);
+	mutex_exit(&zf->zf_lock);
 	list_destroy(&zf->zf_stream);
-	rw_destroy(&zf->zf_rwlock);
+	mutex_destroy(&zf->zf_lock);
 
 	zf->zf_dnode = NULL;
 }
 
 /*
- * If there aren't too many streams already, create a new stream.
+ * If there aren't too many active streams already, create one more.
+ * In process delete/reuse all streams without hits for zfetch_max_sec_reap.
+ * If needed, reuse oldest stream without hits for zfetch_min_sec_reap or ever.
  * The "blkid" argument is the next block that we expect this stream to access.
- * While we're here, clean up old streams (which haven't been
- * accessed for at least zfetch_min_sec_reap seconds).
  */
 static void
 dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
 {
-	zstream_t *zs_next;
-	int numstreams = 0;
+	zstream_t *zs, *zs_next, *zs_old = NULL;
+	hrtime_t now = gethrtime(), t;
 
-	ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
+	ASSERT(MUTEX_HELD(&zf->zf_lock));
 
 	/*
-	 * Clean up old streams.
+	 * Delete too old streams, reusing the first found one.
 	 */
-	for (zstream_t *zs = list_head(&zf->zf_stream);
-	    zs != NULL; zs = zs_next) {
+	t = now - SEC2NSEC(zfetch_max_sec_reap);
+	for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) {
 		zs_next = list_next(&zf->zf_stream, zs);
-		if (((gethrtime() - zs->zs_atime) / NANOSEC) >
-		    zfetch_min_sec_reap)
+		/*
+		 * Skip if still active.  1 -- zf_stream reference.
+		 */
+		if (zfs_refcount_count(&zs->zs_refs) != 1)
+			continue;
+		if (zs->zs_atime > t)
+			continue;
+		if (zs_old)
 			dmu_zfetch_stream_remove(zf, zs);
 		else
-			numstreams++;
+			zs_old = zs;
+	}
+	if (zs_old) {
+		zs = zs_old;
+		goto reuse;
 	}
 
 	/*
 	 * The maximum number of streams is normally zfetch_max_streams,
 	 * but for small files we lower it such that it's at least possible
 	 * for all the streams to be non-overlapping.
-	 *
-	 * If we are already at the maximum number of streams for this file,
-	 * even after removing old streams, then don't create this stream.
 	 */
 	uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
 	    zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
 	    zfetch_max_distance));
-	if (numstreams >= max_streams) {
+	if (zf->zf_numstreams >= max_streams) {
+		t = now - SEC2NSEC(zfetch_min_sec_reap);
+		for (zs = list_head(&zf->zf_stream); zs != NULL;
+		    zs = list_next(&zf->zf_stream, zs)) {
+			if (zfs_refcount_count(&zs->zs_refs) != 1)
+				continue;
+			if (zs->zs_atime > t)
+				continue;
+			if (zs_old == NULL || zs->zs_atime < zs_old->zs_atime)
+				zs_old = zs;
+		}
+		if (zs_old) {
+			zs = zs_old;
+			goto reuse;
+		}
 		ZFETCHSTAT_BUMP(zfetchstat_max_streams);
 		return;
 	}
 
-	zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
-	zs->zs_blkid = blkid;
-	zs->zs_pf_blkid = blkid;
-	zs->zs_ipf_blkid = blkid;
-	zs->zs_atime = gethrtime();
-	mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
-
+	zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
+	zs->zs_fetch = zf;
+	zfs_refcount_create(&zs->zs_callers);
+	zfs_refcount_create(&zs->zs_refs);
+	/* One reference for zf_stream. */
+	zfs_refcount_add(&zs->zs_refs, NULL);
+	zf->zf_numstreams++;
 	list_insert_head(&zf->zf_stream, zs);
+
+reuse:
+	zs->zs_blkid = blkid;
+	zs->zs_pf_dist = 0;
+	zs->zs_pf_start = blkid;
+	zs->zs_pf_end = blkid;
+	zs->zs_ipf_dist = 0;
+	zs->zs_ipf_start = blkid;
+	zs->zs_ipf_end = blkid;
+	/* Allow immediate stream reuse until first hit. */
+	zs->zs_atime = now - SEC2NSEC(zfetch_min_sec_reap);
+	zs->zs_missed = B_FALSE;
+	zs->zs_more = B_FALSE;
+}
+
+static void
+dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued)
+{
+	zstream_t *zs = arg;
+
+	if (io_issued && level == 0 && blkid < zs->zs_blkid)
+		zs->zs_more = B_TRUE;
+	if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+		dmu_zfetch_stream_fini(zs);
+	aggsum_add(&zfetch_sums.zfetchstat_io_active, -1);
 }
 
 /*
- * This is the predictive prefetch entry point.  It associates dnode access
- * specified with blkid and nblks arguments with prefetch stream, predicts
- * further accesses based on that stats and initiates speculative prefetch.
+ * This is the predictive prefetch entry point.  dmu_zfetch_prepare()
+ * associates dnode access specified with blkid and nblks arguments with
+ * prefetch stream, predicts further accesses based on that stats and returns
+ * the stream pointer on success.  That pointer must later be passed to
+ * dmu_zfetch_run() to initiate the speculative prefetch for the stream and
+ * release it.  dmu_zfetch() is a wrapper for simple cases when window between
+ * prediction and prefetch initiation is not needed.
  * fetch_data argument specifies whether actual data blocks should be fetched:
  *   FALSE -- prefetch only indirect blocks for predicted data blocks;
  *   TRUE -- prefetch predicted data blocks plus following indirect blocks.
  */
-void
-dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
+zstream_t *
+dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
+    boolean_t fetch_data, boolean_t have_lock)
 {
 	zstream_t *zs;
-	int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
-	int64_t pf_ahead_blks, max_blks;
-	int epbs, max_dist_blks, pf_nblks, ipf_nblks;
-	uint64_t end_of_access_blkid;
-	end_of_access_blkid = blkid + nblks;
 	spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
-	krw_t rw = RW_READER;
 
 	if (zfs_prefetch_disable)
-		return;
+		return (NULL);
 	/*
 	 * If we haven't yet loaded the indirect vdevs' mappings, we
 	 * can only read from blocks that we carefully ensure are on
@@ -226,17 +335,29 @@
 	 * blocks (e.g. of the MOS's dnode object).
 	 */
 	if (!spa_indirect_vdevs_loaded(spa))
-		return;
+		return (NULL);
 
 	/*
 	 * As a fast path for small (single-block) files, ignore access
 	 * to the first block.
 	 */
-	if (blkid == 0)
-		return;
+	if (!have_lock && blkid == 0)
+		return (NULL);
 
-retry:
-	rw_enter(&zf->zf_rwlock, rw);
+	if (!have_lock)
+		rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
+
+	/*
+	 * A fast path for small files for which no prefetch will
+	 * happen.
+	 */
+	uint64_t maxblkid = zf->zf_dnode->dn_maxblkid;
+	if (maxblkid < 2) {
+		if (!have_lock)
+			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+		return (NULL);
+	}
+	mutex_enter(&zf->zf_lock);
 
 	/*
 	 * Find matching prefetch stream.  Depending on whether the accesses
@@ -245,141 +366,224 @@
 	 */
 	for (zs = list_head(&zf->zf_stream); zs != NULL;
 	    zs = list_next(&zf->zf_stream, zs)) {
-		if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) {
-			mutex_enter(&zs->zs_lock);
-			/*
-			 * zs_blkid could have changed before we
-			 * acquired zs_lock; re-check them here.
-			 */
-			if (blkid == zs->zs_blkid) {
-				break;
-			} else if (blkid + 1 == zs->zs_blkid) {
-				blkid++;
-				nblks--;
-				if (nblks == 0) {
-					/* Already prefetched this before. */
-					mutex_exit(&zs->zs_lock);
-					rw_exit(&zf->zf_rwlock);
-					return;
-				}
-				break;
-			}
-			mutex_exit(&zs->zs_lock);
+		if (blkid == zs->zs_blkid) {
+			break;
+		} else if (blkid + 1 == zs->zs_blkid) {
+			blkid++;
+			nblks--;
+			break;
 		}
 	}
 
+	/*
+	 * If the file is ending, remove the matching stream if found.
+	 * If not found then it is too late to create a new one now.
+	 */
+	uint64_t end_of_access_blkid = blkid + nblks;
+	if (end_of_access_blkid >= maxblkid) {
+		if (zs != NULL)
+			dmu_zfetch_stream_remove(zf, zs);
+		mutex_exit(&zf->zf_lock);
+		if (!have_lock)
+			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+		return (NULL);
+	}
+
+	/* Exit if we already prefetched this block before. */
+	if (nblks == 0) {
+		mutex_exit(&zf->zf_lock);
+		if (!have_lock)
+			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+		return (NULL);
+	}
+
 	if (zs == NULL) {
 		/*
 		 * This access is not part of any existing stream.  Create
 		 * a new stream for it.
 		 */
-		ZFETCHSTAT_BUMP(zfetchstat_misses);
-		if (rw == RW_READER && !rw_tryupgrade(&zf->zf_rwlock)) {
-			rw_exit(&zf->zf_rwlock);
-			rw = RW_WRITER;
-			goto retry;
-		}
-
 		dmu_zfetch_stream_create(zf, end_of_access_blkid);
-		rw_exit(&zf->zf_rwlock);
-		return;
+		mutex_exit(&zf->zf_lock);
+		if (!have_lock)
+			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+		ZFETCHSTAT_BUMP(zfetchstat_misses);
+		return (NULL);
 	}
 
 	/*
 	 * This access was to a block that we issued a prefetch for on
-	 * behalf of this stream. Issue further prefetches for this stream.
+	 * behalf of this stream.  Calculate further prefetch distances.
 	 *
-	 * Normally, we start prefetching where we stopped
-	 * prefetching last (zs_pf_blkid).  But when we get our first
-	 * hit on this stream, zs_pf_blkid == zs_blkid, we don't
-	 * want to prefetch the block we just accessed.  In this case,
-	 * start just after the block we just accessed.
+	 * Start prefetch from the demand access size (nblks).  Double the
+	 * distance every access up to zfetch_min_distance.  After that only
+	 * if needed increase the distance by 1/8 up to zfetch_max_distance.
+	 *
+	 * Don't double the distance beyond single block if we have more
+	 * than ~6% of ARC held by active prefetches.  It should help with
+	 * getting out of RAM on some badly mispredicted read patterns.
 	 */
-	pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
-
-	/*
-	 * Double our amount of prefetched data, but don't let the
-	 * prefetch get further ahead than zfetch_max_distance.
-	 */
+	unsigned int dbs = zf->zf_dnode->dn_datablkshift;
+	unsigned int nbytes = nblks << dbs;
+	unsigned int pf_nblks;
 	if (fetch_data) {
-		max_dist_blks =
-		    zfetch_max_distance >> zf->zf_dnode->dn_datablkshift;
-		/*
-		 * Previously, we were (zs_pf_blkid - blkid) ahead.  We
-		 * want to now be double that, so read that amount again,
-		 * plus the amount we are catching up by (i.e. the amount
-		 * read just now).
-		 */
-		pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks;
-		max_blks = max_dist_blks - (pf_start - end_of_access_blkid);
-		pf_nblks = MIN(pf_ahead_blks, max_blks);
+		if (unlikely(zs->zs_pf_dist < nbytes))
+			zs->zs_pf_dist = nbytes;
+		else if (zs->zs_pf_dist < zfetch_min_distance &&
+		    (zs->zs_pf_dist < (1 << dbs) ||
+		    aggsum_compare(&zfetch_sums.zfetchstat_io_active,
+		    arc_c_max >> (4 + dbs)) < 0))
+			zs->zs_pf_dist *= 2;
+		else if (zs->zs_more)
+			zs->zs_pf_dist += zs->zs_pf_dist / 8;
+		zs->zs_more = B_FALSE;
+		if (zs->zs_pf_dist > zfetch_max_distance)
+			zs->zs_pf_dist = zfetch_max_distance;
+		pf_nblks = zs->zs_pf_dist >> dbs;
 	} else {
 		pf_nblks = 0;
 	}
-
-	zs->zs_pf_blkid = pf_start + pf_nblks;
+	if (zs->zs_pf_start < end_of_access_blkid)
+		zs->zs_pf_start = end_of_access_blkid;
+	if (zs->zs_pf_end < end_of_access_blkid + pf_nblks)
+		zs->zs_pf_end = end_of_access_blkid + pf_nblks;
 
 	/*
-	 * Do the same for indirects, starting from where we stopped last,
-	 * or where we will stop reading data blocks (and the indirects
-	 * that point to them).
+	 * Do the same for indirects, starting where we will stop reading
+	 * data blocks (and the indirects that point to them).
 	 */
-	ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid);
-	max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift;
-	/*
-	 * We want to double our distance ahead of the data prefetch
-	 * (or reader, if we are not prefetching data).  Previously, we
-	 * were (zs_ipf_blkid - blkid) ahead.  To double that, we read
-	 * that amount again, plus the amount we are catching up by
-	 * (i.e. the amount read now + the amount of data prefetched now).
-	 */
-	pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
-	max_blks = max_dist_blks - (ipf_start - end_of_access_blkid);
-	ipf_nblks = MIN(pf_ahead_blks, max_blks);
-	zs->zs_ipf_blkid = ipf_start + ipf_nblks;
+	if (unlikely(zs->zs_ipf_dist < nbytes))
+		zs->zs_ipf_dist = nbytes;
+	else
+		zs->zs_ipf_dist *= 2;
+	if (zs->zs_ipf_dist > zfetch_max_idistance)
+		zs->zs_ipf_dist = zfetch_max_idistance;
+	pf_nblks = zs->zs_ipf_dist >> dbs;
+	if (zs->zs_ipf_start < zs->zs_pf_end)
+		zs->zs_ipf_start = zs->zs_pf_end;
+	if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
+		zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;
 
-	epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
-	ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
-	ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs;
-
-	zs->zs_atime = gethrtime();
 	zs->zs_blkid = end_of_access_blkid;
-	mutex_exit(&zs->zs_lock);
-	rw_exit(&zf->zf_rwlock);
+	/* Protect the stream from reclamation. */
+	zs->zs_atime = gethrtime();
+	zfs_refcount_add(&zs->zs_refs, NULL);
+	/* Count concurrent callers. */
+	zfs_refcount_add(&zs->zs_callers, NULL);
+	mutex_exit(&zf->zf_lock);
 
-	/*
-	 * dbuf_prefetch() is asynchronous (even when it needs to read
-	 * indirect blocks), but we still prefer to drop our locks before
-	 * calling it to reduce the time we hold them.
-	 */
+	if (!have_lock)
+		rw_exit(&zf->zf_dnode->dn_struct_rwlock);
 
-	for (int i = 0; i < pf_nblks; i++) {
-		dbuf_prefetch(zf->zf_dnode, 0, pf_start + i,
-		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
-	}
-	for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) {
-		dbuf_prefetch(zf->zf_dnode, 1, iblk,
-		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
-	}
 	ZFETCHSTAT_BUMP(zfetchstat_hits);
+	return (zs);
 }
 
-#if defined(_KERNEL)
+void
+dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
+{
+	zfetch_t *zf = zs->zs_fetch;
+	int64_t pf_start, pf_end, ipf_start, ipf_end;
+	int epbs, issued;
+
+	if (missed)
+		zs->zs_missed = missed;
+
+	/*
+	 * Postpone the prefetch if there are more concurrent callers.
+	 * It happens when multiple requests are waiting for the same
+	 * indirect block.  The last one will run the prefetch for all.
+	 */
+	if (zfs_refcount_remove(&zs->zs_callers, NULL) != 0) {
+		/* Drop reference taken in dmu_zfetch_prepare(). */
+		if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+			dmu_zfetch_stream_fini(zs);
+		return;
+	}
+
+	mutex_enter(&zf->zf_lock);
+	if (zs->zs_missed) {
+		pf_start = zs->zs_pf_start;
+		pf_end = zs->zs_pf_start = zs->zs_pf_end;
+	} else {
+		pf_start = pf_end = 0;
+	}
+	ipf_start = zs->zs_ipf_start;
+	ipf_end = zs->zs_ipf_start = zs->zs_ipf_end;
+	mutex_exit(&zf->zf_lock);
+	ASSERT3S(pf_start, <=, pf_end);
+	ASSERT3S(ipf_start, <=, ipf_end);
+
+	epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
+	ipf_start = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
+	ipf_end = P2ROUNDUP(ipf_end, 1 << epbs) >> epbs;
+	ASSERT3S(ipf_start, <=, ipf_end);
+	issued = pf_end - pf_start + ipf_end - ipf_start;
+	if (issued > 1) {
+		/* More references on top of taken in dmu_zfetch_prepare(). */
+		zfs_refcount_add_many(&zs->zs_refs, issued - 1, NULL);
+	} else if (issued == 0) {
+		/* Some other thread has done our work, so drop the ref. */
+		if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+			dmu_zfetch_stream_fini(zs);
+		return;
+	}
+	aggsum_add(&zfetch_sums.zfetchstat_io_active, issued);
+
+	if (!have_lock)
+		rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
+
+	issued = 0;
+	for (int64_t blk = pf_start; blk < pf_end; blk++) {
+		issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
+		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
+		    dmu_zfetch_done, zs);
+	}
+	for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
+		issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
+		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
+		    dmu_zfetch_done, zs);
+	}
+
+	if (!have_lock)
+		rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+
+	if (issued)
+		ZFETCHSTAT_ADD(zfetchstat_io_issued, issued);
+}
+
+void
+dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
+    boolean_t missed, boolean_t have_lock)
+{
+	zstream_t *zs;
+
+	zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
+	if (zs)
+		dmu_zfetch_run(zs, missed, have_lock);
+}
+
 /* BEGIN CSTYLED */
-module_param(zfs_prefetch_disable, int, 0644);
-MODULE_PARM_DESC(zfs_prefetch_disable, "Disable all ZFS prefetching");
+ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
+	"Disable all ZFS prefetching");
 
-module_param(zfetch_max_streams, uint, 0644);
-MODULE_PARM_DESC(zfetch_max_streams, "Max number of streams per zfetch");
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_streams, UINT, ZMOD_RW,
+	"Max number of streams per zfetch");
 
-module_param(zfetch_min_sec_reap, uint, 0644);
-MODULE_PARM_DESC(zfetch_min_sec_reap, "Min time before stream reclaim");
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_sec_reap, UINT, ZMOD_RW,
+	"Min time before stream reclaim");
 
-module_param(zfetch_max_distance, uint, 0644);
-MODULE_PARM_DESC(zfetch_max_distance,
-	"Max bytes to prefetch per stream (default 8MB)");
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_sec_reap, UINT, ZMOD_RW,
+	"Max time before stream delete");
 
-module_param(zfetch_array_rd_sz, ulong, 0644);
-MODULE_PARM_DESC(zfetch_array_rd_sz, "Number of bytes in a array_read");
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_distance, UINT, ZMOD_RW,
+	"Min bytes to prefetch per stream");
+
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
+	"Max bytes to prefetch per stream");
+
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
+	"Max bytes to prefetch indirects for per stream");
+
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW,
+	"Number of bytes in a array_read");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/dnode.c b/zfs/module/zfs/dnode.c
index 7acfc36..efebc44 100644
--- a/zfs/module/zfs/dnode.c
+++ b/zfs/module/zfs/dnode.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
@@ -37,7 +37,7 @@
 #include <sys/zio.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/range_tree.h>
-#include <sys/trace_dnode.h>
+#include <sys/trace_zfs.h>
 #include <sys/zfs_project.h>
 
 dnode_stats_t dnode_stats = {
@@ -71,10 +71,12 @@
 	{ "dnode_move_active",			KSTAT_DATA_UINT64 },
 };
 
+dnode_sums_t dnode_sums;
+
 static kstat_t *dnode_ksp;
 static kmem_cache_t *dnode_cache;
 
-ASSERTV(static dnode_phys_t dnode_phys_zero);
+static dnode_phys_t dnode_phys_zero __maybe_unused;
 
 int zfs_default_bs = SPA_MINBLOCKSHIFT;
 int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
@@ -89,11 +91,11 @@
 	const dmu_buf_impl_t *d1 = x1;
 	const dmu_buf_impl_t *d2 = x2;
 
-	int cmp = AVL_CMP(d1->db_level, d2->db_level);
+	int cmp = TREE_CMP(d1->db_level, d2->db_level);
 	if (likely(cmp))
 		return (cmp);
 
-	cmp = AVL_CMP(d1->db_blkid, d2->db_blkid);
+	cmp = TREE_CMP(d1->db_blkid, d2->db_blkid);
 	if (likely(cmp))
 		return (cmp);
 
@@ -105,20 +107,20 @@
 		return (1);
 	}
 
-	return (AVL_PCMP(d1, d2));
+	return (TREE_PCMP(d1, d2));
 }
 
-/* ARGSUSED */
 static int
 dnode_cons(void *arg, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	dnode_t *dn = arg;
-	int i;
 
 	rw_init(&dn->dn_struct_rwlock, NULL, RW_NOLOCKDEP, NULL);
 	mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
+	cv_init(&dn->dn_nodnholds, NULL, CV_DEFAULT, NULL);
 
 	/*
 	 * Every dbuf has a reference, and dropping a tracked reference is
@@ -128,6 +130,7 @@
 	zfs_refcount_create(&dn->dn_tx_holds);
 	list_link_init(&dn->dn_link);
 
+	bzero(&dn->dn_next_type[0], sizeof (dn->dn_next_type));
 	bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr));
 	bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels));
 	bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift));
@@ -137,7 +140,7 @@
 	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
 	bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid));
 
-	for (i = 0; i < TXG_SIZE; i++) {
+	for (int i = 0; i < TXG_SIZE; i++) {
 		multilist_link_init(&dn->dn_dirty_link[i]);
 		dn->dn_free_ranges[i] = NULL;
 		list_create(&dn->dn_dirty_records[i],
@@ -172,22 +175,22 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 dnode_dest(void *arg, void *unused)
 {
-	int i;
+	(void) unused;
 	dnode_t *dn = arg;
 
 	rw_destroy(&dn->dn_struct_rwlock);
 	mutex_destroy(&dn->dn_mtx);
 	mutex_destroy(&dn->dn_dbufs_mtx);
 	cv_destroy(&dn->dn_notxholds);
+	cv_destroy(&dn->dn_nodnholds);
 	zfs_refcount_destroy(&dn->dn_holds);
 	zfs_refcount_destroy(&dn->dn_tx_holds);
 	ASSERT(!list_link_active(&dn->dn_link));
 
-	for (i = 0; i < TXG_SIZE; i++) {
+	for (int i = 0; i < TXG_SIZE; i++) {
 		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 		list_destroy(&dn->dn_dirty_records[i]);
@@ -224,6 +227,72 @@
 	avl_destroy(&dn->dn_dbufs);
 }
 
+static int
+dnode_kstats_update(kstat_t *ksp, int rw)
+{
+	dnode_stats_t *ds = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+	ds->dnode_hold_dbuf_hold.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_dbuf_hold);
+	ds->dnode_hold_dbuf_read.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_dbuf_read);
+	ds->dnode_hold_alloc_hits.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_alloc_hits);
+	ds->dnode_hold_alloc_misses.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_alloc_misses);
+	ds->dnode_hold_alloc_interior.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_alloc_interior);
+	ds->dnode_hold_alloc_lock_retry.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_alloc_lock_retry);
+	ds->dnode_hold_alloc_lock_misses.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_alloc_lock_misses);
+	ds->dnode_hold_alloc_type_none.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_alloc_type_none);
+	ds->dnode_hold_free_hits.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_free_hits);
+	ds->dnode_hold_free_misses.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_free_misses);
+	ds->dnode_hold_free_lock_misses.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_free_lock_misses);
+	ds->dnode_hold_free_lock_retry.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_free_lock_retry);
+	ds->dnode_hold_free_refcount.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_free_refcount);
+	ds->dnode_hold_free_overflow.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_hold_free_overflow);
+	ds->dnode_free_interior_lock_retry.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_free_interior_lock_retry);
+	ds->dnode_allocate.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_allocate);
+	ds->dnode_reallocate.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_reallocate);
+	ds->dnode_buf_evict.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_buf_evict);
+	ds->dnode_alloc_next_chunk.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_alloc_next_chunk);
+	ds->dnode_alloc_race.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_alloc_race);
+	ds->dnode_alloc_next_block.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_alloc_next_block);
+	ds->dnode_move_invalid.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_invalid);
+	ds->dnode_move_recheck1.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_recheck1);
+	ds->dnode_move_recheck2.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_recheck2);
+	ds->dnode_move_special.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_special);
+	ds->dnode_move_handle.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_handle);
+	ds->dnode_move_rwlock.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_rwlock);
+	ds->dnode_move_active.value.ui64 =
+	    wmsum_value(&dnode_sums.dnode_move_active);
+	return (0);
+}
+
 void
 dnode_init(void)
 {
@@ -232,11 +301,41 @@
 	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
 	kmem_cache_set_move(dnode_cache, dnode_move);
 
+	wmsum_init(&dnode_sums.dnode_hold_dbuf_hold, 0);
+	wmsum_init(&dnode_sums.dnode_hold_dbuf_read, 0);
+	wmsum_init(&dnode_sums.dnode_hold_alloc_hits, 0);
+	wmsum_init(&dnode_sums.dnode_hold_alloc_misses, 0);
+	wmsum_init(&dnode_sums.dnode_hold_alloc_interior, 0);
+	wmsum_init(&dnode_sums.dnode_hold_alloc_lock_retry, 0);
+	wmsum_init(&dnode_sums.dnode_hold_alloc_lock_misses, 0);
+	wmsum_init(&dnode_sums.dnode_hold_alloc_type_none, 0);
+	wmsum_init(&dnode_sums.dnode_hold_free_hits, 0);
+	wmsum_init(&dnode_sums.dnode_hold_free_misses, 0);
+	wmsum_init(&dnode_sums.dnode_hold_free_lock_misses, 0);
+	wmsum_init(&dnode_sums.dnode_hold_free_lock_retry, 0);
+	wmsum_init(&dnode_sums.dnode_hold_free_refcount, 0);
+	wmsum_init(&dnode_sums.dnode_hold_free_overflow, 0);
+	wmsum_init(&dnode_sums.dnode_free_interior_lock_retry, 0);
+	wmsum_init(&dnode_sums.dnode_allocate, 0);
+	wmsum_init(&dnode_sums.dnode_reallocate, 0);
+	wmsum_init(&dnode_sums.dnode_buf_evict, 0);
+	wmsum_init(&dnode_sums.dnode_alloc_next_chunk, 0);
+	wmsum_init(&dnode_sums.dnode_alloc_race, 0);
+	wmsum_init(&dnode_sums.dnode_alloc_next_block, 0);
+	wmsum_init(&dnode_sums.dnode_move_invalid, 0);
+	wmsum_init(&dnode_sums.dnode_move_recheck1, 0);
+	wmsum_init(&dnode_sums.dnode_move_recheck2, 0);
+	wmsum_init(&dnode_sums.dnode_move_special, 0);
+	wmsum_init(&dnode_sums.dnode_move_handle, 0);
+	wmsum_init(&dnode_sums.dnode_move_rwlock, 0);
+	wmsum_init(&dnode_sums.dnode_move_active, 0);
+
 	dnode_ksp = kstat_create("zfs", 0, "dnodestats", "misc",
 	    KSTAT_TYPE_NAMED, sizeof (dnode_stats) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 	if (dnode_ksp != NULL) {
 		dnode_ksp->ks_data = &dnode_stats;
+		dnode_ksp->ks_update = dnode_kstats_update;
 		kstat_install(dnode_ksp);
 	}
 }
@@ -249,6 +348,35 @@
 		dnode_ksp = NULL;
 	}
 
+	wmsum_fini(&dnode_sums.dnode_hold_dbuf_hold);
+	wmsum_fini(&dnode_sums.dnode_hold_dbuf_read);
+	wmsum_fini(&dnode_sums.dnode_hold_alloc_hits);
+	wmsum_fini(&dnode_sums.dnode_hold_alloc_misses);
+	wmsum_fini(&dnode_sums.dnode_hold_alloc_interior);
+	wmsum_fini(&dnode_sums.dnode_hold_alloc_lock_retry);
+	wmsum_fini(&dnode_sums.dnode_hold_alloc_lock_misses);
+	wmsum_fini(&dnode_sums.dnode_hold_alloc_type_none);
+	wmsum_fini(&dnode_sums.dnode_hold_free_hits);
+	wmsum_fini(&dnode_sums.dnode_hold_free_misses);
+	wmsum_fini(&dnode_sums.dnode_hold_free_lock_misses);
+	wmsum_fini(&dnode_sums.dnode_hold_free_lock_retry);
+	wmsum_fini(&dnode_sums.dnode_hold_free_refcount);
+	wmsum_fini(&dnode_sums.dnode_hold_free_overflow);
+	wmsum_fini(&dnode_sums.dnode_free_interior_lock_retry);
+	wmsum_fini(&dnode_sums.dnode_allocate);
+	wmsum_fini(&dnode_sums.dnode_reallocate);
+	wmsum_fini(&dnode_sums.dnode_buf_evict);
+	wmsum_fini(&dnode_sums.dnode_alloc_next_chunk);
+	wmsum_fini(&dnode_sums.dnode_alloc_race);
+	wmsum_fini(&dnode_sums.dnode_alloc_next_block);
+	wmsum_fini(&dnode_sums.dnode_move_invalid);
+	wmsum_fini(&dnode_sums.dnode_move_recheck1);
+	wmsum_fini(&dnode_sums.dnode_move_recheck2);
+	wmsum_fini(&dnode_sums.dnode_move_special);
+	wmsum_fini(&dnode_sums.dnode_move_handle);
+	wmsum_fini(&dnode_sums.dnode_move_rwlock);
+	wmsum_fini(&dnode_sums.dnode_move_active);
+
 	kmem_cache_destroy(dnode_cache);
 	dnode_cache = NULL;
 }
@@ -446,7 +574,6 @@
 	dnode_t *dn;
 
 	dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
-	ASSERT(!POINTER_IS_VALID(dn->dn_objset));
 	dn->dn_moved = 0;
 
 	/*
@@ -542,10 +669,7 @@
 	dn->dn_dirty_txg = 0;
 
 	dn->dn_dirtyctx = 0;
-	if (dn->dn_dirtyctx_firstset != NULL) {
-		kmem_free(dn->dn_dirtyctx_firstset, 1);
-		dn->dn_dirtyctx_firstset = NULL;
-	}
+	dn->dn_dirtyctx_firstset = NULL;
 	if (dn->dn_bonus != NULL) {
 		mutex_enter(&dn->dn_bonus->db_mtx);
 		dbuf_destroy(dn->dn_bonus);
@@ -594,7 +718,8 @@
 	ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
 
 	dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d dn_slots=%d\n",
-	    dn->dn_objset, dn->dn_object, tx->tx_txg, blocksize, ibs, dn_slots);
+	    dn->dn_objset, (u_longlong_t)dn->dn_object,
+	    (u_longlong_t)tx->tx_txg, blocksize, ibs, dn_slots);
 	DNODE_STAT_BUMP(dnode_allocate);
 
 	ASSERT(dn->dn_type == DMU_OT_NONE);
@@ -611,7 +736,6 @@
 	ASSERT0(dn->dn_maxblkid);
 	ASSERT0(dn->dn_allocated_txg);
 	ASSERT0(dn->dn_assigned_txg);
-	ASSERT0(dn->dn_dirty_txg);
 	ASSERT(zfs_refcount_is_zero(&dn->dn_tx_holds));
 	ASSERT3U(zfs_refcount_count(&dn->dn_holds), <=, 1);
 	ASSERT(avl_is_empty(&dn->dn_dbufs));
@@ -650,10 +774,8 @@
 	dn->dn_dirtyctx = 0;
 
 	dn->dn_free_txg = 0;
-	if (dn->dn_dirtyctx_firstset) {
-		kmem_free(dn->dn_dirtyctx_firstset, 1);
-		dn->dn_dirtyctx_firstset = NULL;
-	}
+	dn->dn_dirtyctx_firstset = NULL;
+	dn->dn_dirty_txg = 0;
 
 	dn->dn_allocated_txg = tx->tx_txg;
 	dn->dn_id_flags = 0;
@@ -759,7 +881,6 @@
 	ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
 	ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
 	ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
-	ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
 
 	/* Copy fields. */
 	ndn->dn_objset = odn->dn_objset;
@@ -827,9 +948,7 @@
 	ndn->dn_newgid = odn->dn_newgid;
 	ndn->dn_newprojid = odn->dn_newprojid;
 	ndn->dn_id_flags = odn->dn_id_flags;
-	dmu_zfetch_init(&ndn->dn_zfetch, NULL);
-	list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
-	ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
+	dmu_zfetch_init(&ndn->dn_zfetch, ndn);
 
 	/*
 	 * Update back pointers. Updating the handle fixes the back pointer of
@@ -837,9 +956,6 @@
 	 */
 	ASSERT(ndn->dn_handle->dnh_dnode == odn);
 	ndn->dn_handle->dnh_dnode = ndn;
-	if (ndn->dn_zfetch.zf_dnode == odn) {
-		ndn->dn_zfetch.zf_dnode = ndn;
-	}
 
 	/*
 	 * Invalidate the original dnode by clearing all of its back pointers.
@@ -898,7 +1014,6 @@
 	odn->dn_moved = (uint8_t)-1;
 }
 
-/*ARGSUSED*/
 static kmem_cbrc_t
 dnode_move(void *buf, void *newbuf, size_t size, void *arg)
 {
@@ -1005,7 +1120,7 @@
 	 */
 	refcount = zfs_refcount_count(&odn->dn_holds);
 	ASSERT(refcount >= 0);
-	dbufs = odn->dn_dbufs_count;
+	dbufs = DN_DBUFS_COUNT(odn);
 
 	/* We can't have more dbufs than dnode holds. */
 	ASSERT3U(dbufs, <=, refcount);
@@ -1032,7 +1147,7 @@
 	list_link_replace(&odn->dn_link, &ndn->dn_link);
 	/* If the dnode was safe to move, the refcount cannot have changed. */
 	ASSERT(refcount == zfs_refcount_count(&ndn->dn_holds));
-	ASSERT(dbufs == ndn->dn_dbufs_count);
+	ASSERT(dbufs == DN_DBUFS_COUNT(ndn));
 	zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
 	mutex_exit(&os->os_lock);
 
@@ -1178,13 +1293,15 @@
 	dnode_t *dn = dnh->dnh_dnode;
 
 	/*
-	 * Wait for final references to the dnode to clear.  This can
-	 * only happen if the arc is asynchronously evicting state that
-	 * has a hold on this dnode while we are trying to evict this
-	 * dnode.
+	 * Ensure dnode_rele_and_unlock() has released dn_mtx, after final
+	 * zfs_refcount_remove()
 	 */
-	while (zfs_refcount_count(&dn->dn_holds) > 0)
-		delay(1);
+	mutex_enter(&dn->dn_mtx);
+	if (zfs_refcount_count(&dn->dn_holds) > 0)
+		cv_wait(&dn->dn_nodnholds, &dn->dn_mtx);
+	mutex_exit(&dn->dn_mtx);
+	ASSERT3U(zfs_refcount_count(&dn->dn_holds), ==, 0);
+
 	ASSERT(dn->dn_dbuf == NULL ||
 	    dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
 	zrl_add(&dnh->dnh_zrlock);
@@ -1200,7 +1317,7 @@
 	dnode_t *dn;
 
 	zrl_init(&dnh->dnh_zrlock);
-	zrl_tryenter(&dnh->dnh_zrlock);
+	VERIFY3U(1, ==, zrl_tryenter(&dnh->dnh_zrlock));
 
 	dn = dnode_create(os, dnp, NULL, object, dnh);
 	DNODE_VERIFY(dn);
@@ -1346,7 +1463,6 @@
 	}
 
 	blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
-
 	db = dbuf_hold(mdn, blk, FTAG);
 	if (drop_struct_lock)
 		rw_exit(&mdn->dn_struct_rwlock);
@@ -1359,7 +1475,8 @@
 	 * We do not need to decrypt to read the dnode so it doesn't matter
 	 * if we get the encrypted or decrypted version.
 	 */
-	err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT);
+	err = dbuf_read(db, NULL, DB_RF_CANFAIL |
+	    DB_RF_NO_DECRYPT | DB_RF_NOPREFETCH);
 	if (err) {
 		DNODE_STAT_BUMP(dnode_hold_dbuf_read);
 		dbuf_rele(db, FTAG);
@@ -1614,7 +1731,10 @@
 	dnode_handle_t *dnh = dn->dn_handle;
 
 	refs = zfs_refcount_remove(&dn->dn_holds, tag);
+	if (refs == 0)
+		cv_broadcast(&dn->dn_nodnholds);
 	mutex_exit(&dn->dn_mtx);
+	/* dnode could get destroyed at this point, so don't use it anymore */
 
 	/*
 	 * It's unsafe to release the last hold on a dnode by dnode_rele() or
@@ -1652,6 +1772,34 @@
 	    slots, NULL, NULL));
 }
 
+/*
+ * Checks if the dnode itself is dirty, or is carrying any uncommitted records.
+ * It is important to check both conditions, as some operations (eg appending
+ * to a file) can dirty both as a single logical unit, but they are not synced
+ * out atomically, so checking one and not the other can result in an object
+ * appearing to be clean mid-way through a commit.
+ *
+ * Do not change this lightly! If you get it wrong, dmu_offset_next() can
+ * detect a hole where there is really data, leading to silent corruption.
+ */
+boolean_t
+dnode_is_dirty(dnode_t *dn)
+{
+	mutex_enter(&dn->dn_mtx);
+
+	for (int i = 0; i < TXG_SIZE; i++) {
+		if (multilist_link_active(&dn->dn_dirty_link[i]) ||
+		    !list_is_empty(&dn->dn_dirty_records[i])) {
+			mutex_exit(&dn->dn_mtx);
+			return (B_TRUE);
+		}
+	}
+
+	mutex_exit(&dn->dn_mtx);
+
+	return (B_FALSE);
+}
+
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
@@ -1677,7 +1825,7 @@
 	 */
 	dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
 
-	multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK];
+	multilist_t *dirtylist = &os->os_dirty_dnodes[txg & TXG_MASK];
 	multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
 
 	/*
@@ -1696,7 +1844,7 @@
 	ASSERT0(dn->dn_next_bonustype[txg & TXG_MASK]);
 
 	dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
-	    dn->dn_object, txg);
+	    (u_longlong_t)dn->dn_object, (u_longlong_t)txg);
 
 	multilist_sublist_insert_head(mls, dn);
 
@@ -1751,7 +1899,7 @@
 	if (ibs == dn->dn_indblkshift)
 		ibs = 0;
 
-	if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
+	if (size == dn->dn_datablksz && ibs == 0)
 		return (0);
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
@@ -1774,23 +1922,25 @@
 	if (ibs && dn->dn_nlevels != 1)
 		goto fail;
 
-	/* resize the old block */
-	err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
-	if (err == 0)
-		dbuf_new_size(db, size, tx);
-	else if (err != ENOENT)
-		goto fail;
-
-	dnode_setdblksz(dn, size);
 	dnode_setdirty(dn, tx);
-	dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
+	if (size != dn->dn_datablksz) {
+		/* resize the old block */
+		err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
+		if (err == 0) {
+			dbuf_new_size(db, size, tx);
+		} else if (err != ENOENT) {
+			goto fail;
+		}
+
+		dnode_setdblksz(dn, size);
+		dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = size;
+		if (db)
+			dbuf_rele(db, FTAG);
+	}
 	if (ibs) {
 		dn->dn_indblkshift = ibs;
-		dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
+		dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
 	}
-	/* release after we have fixed the blocksize in the dnode */
-	if (db)
-		dbuf_rele(db, FTAG);
 
 	rw_exit(&dn->dn_struct_rwlock);
 	return (0);
@@ -1811,6 +1961,7 @@
 
 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 
+	ASSERT3U(new_nlevels, >, dn->dn_nlevels);
 	dn->dn_nlevels = new_nlevels;
 
 	ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
@@ -1828,10 +1979,12 @@
 	list = &dn->dn_dirty_records[txgoff];
 	for (dr = list_head(list); dr; dr = dr_next) {
 		dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
-		if (dr->dr_dbuf->db_level != new_nlevels-1 &&
+
+		IMPLY(dr->dr_dbuf == NULL, old_nlevels == 1);
+		if (dr->dr_dbuf == NULL ||
+		    (dr->dr_dbuf->db_level == old_nlevels - 1 &&
 		    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
-		    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
-			ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
+		    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID)) {
 			list_remove(&dn->dn_dirty_records[txgoff], dr);
 			list_insert_tail(&new->dt.di.dr_children, dr);
 			dr->dr_parent = new;
@@ -1949,18 +2102,20 @@
 dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
     dmu_tx_t *tx)
 {
-	dmu_buf_impl_t db_search;
+	dmu_buf_impl_t *db_search;
 	dmu_buf_impl_t *db;
 	avl_index_t where;
 
+	db_search = kmem_zalloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
+
 	mutex_enter(&dn->dn_dbufs_mtx);
 
-	db_search.db_level = 1;
-	db_search.db_blkid = start_blkid + 1;
-	db_search.db_state = DB_SEARCH;
+	db_search->db_level = 1;
+	db_search->db_blkid = start_blkid + 1;
+	db_search->db_state = DB_SEARCH;
 	for (;;) {
 
-		db = avl_find(&dn->dn_dbufs, &db_search, &where);
+		db = avl_find(&dn->dn_dbufs, db_search, &where);
 		if (db == NULL)
 			db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
 
@@ -1972,7 +2127,7 @@
 		/*
 		 * Setup the next blkid we want to search for.
 		 */
-		db_search.db_blkid = db->db_blkid + 1;
+		db_search->db_blkid = db->db_blkid + 1;
 		ASSERT3U(db->db_blkid, >=, start_blkid);
 
 		/*
@@ -1992,10 +2147,10 @@
 	/*
 	 * Walk all the in-core level-1 dbufs and verify they have been dirtied.
 	 */
-	db_search.db_level = 1;
-	db_search.db_blkid = start_blkid + 1;
-	db_search.db_state = DB_SEARCH;
-	db = avl_find(&dn->dn_dbufs, &db_search, &where);
+	db_search->db_level = 1;
+	db_search->db_blkid = start_blkid + 1;
+	db_search->db_state = DB_SEARCH;
+	db = avl_find(&dn->dn_dbufs, db_search, &where);
 	if (db == NULL)
 		db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
 	for (; db != NULL; db = AVL_NEXT(&dn->dn_dbufs, db)) {
@@ -2005,10 +2160,37 @@
 			ASSERT(db->db_dirtycnt > 0);
 	}
 #endif
+	kmem_free(db_search, sizeof (dmu_buf_impl_t));
 	mutex_exit(&dn->dn_dbufs_mtx);
 }
 
 void
+dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag)
+{
+	/*
+	 * Don't set dirtyctx to SYNC if we're just modifying this as we
+	 * initialize the objset.
+	 */
+	if (dn->dn_dirtyctx == DN_UNDIRTIED) {
+		dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+
+		if (ds != NULL) {
+			rrw_enter(&ds->ds_bp_rwlock, RW_READER, tag);
+		}
+		if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
+			if (dmu_tx_is_syncing(tx))
+				dn->dn_dirtyctx = DN_DIRTY_SYNC;
+			else
+				dn->dn_dirtyctx = DN_DIRTY_OPEN;
+			dn->dn_dirtyctx_firstset = tag;
+		}
+		if (ds != NULL) {
+			rrw_exit(&ds->ds_bp_rwlock, tag);
+		}
+	}
+}
+
+void
 dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db;
@@ -2017,7 +2199,6 @@
 	int trunc = FALSE;
 	int epbs;
 
-	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	blksz = dn->dn_datablksz;
 	blkshift = dn->dn_datablkshift;
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
@@ -2034,7 +2215,7 @@
 		head = P2NPHASE(off, blksz);
 		blkoff = P2PHASE(off, blksz);
 		if ((off >> blkshift) > dn->dn_maxblkid)
-			goto out;
+			return;
 	} else {
 		ASSERT(dn->dn_maxblkid == 0);
 		if (off == 0 && len >= blksz) {
@@ -2043,12 +2224,15 @@
 			 */
 			blkid = 0;
 			nblks = 1;
-			if (dn->dn_nlevels > 1)
+			if (dn->dn_nlevels > 1) {
+				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				dnode_dirty_l1(dn, 0, tx);
+				rw_exit(&dn->dn_struct_rwlock);
+			}
 			goto done;
 		} else if (off >= blksz) {
 			/* Freeing past end-of-data */
-			goto out;
+			return;
 		} else {
 			/* Freeing part of the block. */
 			head = blksz - off;
@@ -2058,19 +2242,26 @@
 	}
 	/* zero out any partial block data at the start of the range */
 	if (head) {
+		int res;
 		ASSERT3U(blkoff + head, ==, blksz);
 		if (len < head)
 			head = len;
-		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
-		    TRUE, FALSE, FTAG, &db) == 0) {
+		rw_enter(&dn->dn_struct_rwlock, RW_READER);
+		res = dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
+		    TRUE, FALSE, FTAG, &db);
+		rw_exit(&dn->dn_struct_rwlock);
+		if (res == 0) {
 			caddr_t data;
+			boolean_t dirty;
 
+			db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER,
+			    FTAG);
 			/* don't dirty if it isn't on disk and isn't dirty */
-			if (db->db_last_dirty ||
-			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
-				rw_exit(&dn->dn_struct_rwlock);
+			dirty = !list_is_empty(&db->db_dirty_records) ||
+			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr));
+			dmu_buf_unlock_parent(db, dblt, FTAG);
+			if (dirty) {
 				dmu_buf_will_dirty(&db->db, tx);
-				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				data = db->db.db_data;
 				bzero(data + blkoff, head);
 			}
@@ -2082,11 +2273,11 @@
 
 	/* If the range was less than one block, we're done */
 	if (len == 0)
-		goto out;
+		return;
 
 	/* If the remaining range is past end of file, we're done */
 	if ((off >> blkshift) > dn->dn_maxblkid)
-		goto out;
+		return;
 
 	ASSERT(ISP2(blksz));
 	if (trunc)
@@ -2097,16 +2288,23 @@
 	ASSERT0(P2PHASE(off, blksz));
 	/* zero out any partial block data at the end of the range */
 	if (tail) {
+		int res;
 		if (len < tail)
 			tail = len;
-		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
-		    TRUE, FALSE, FTAG, &db) == 0) {
+		rw_enter(&dn->dn_struct_rwlock, RW_READER);
+		res = dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
+		    TRUE, FALSE, FTAG, &db);
+		rw_exit(&dn->dn_struct_rwlock);
+		if (res == 0) {
+			boolean_t dirty;
 			/* don't dirty if not on disk and not dirty */
-			if (db->db_last_dirty ||
-			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
-				rw_exit(&dn->dn_struct_rwlock);
+			db_lock_type_t type = dmu_buf_lock_parent(db, RW_READER,
+			    FTAG);
+			dirty = !list_is_empty(&db->db_dirty_records) ||
+			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr));
+			dmu_buf_unlock_parent(db, type, FTAG);
+			if (dirty) {
 				dmu_buf_will_dirty(&db->db, tx);
-				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				bzero(db->db.db_data, tail);
 			}
 			dbuf_rele(db, FTAG);
@@ -2116,7 +2314,7 @@
 
 	/* If the range did not include a full block, we are done */
 	if (len == 0)
-		goto out;
+		return;
 
 	ASSERT(IS_P2ALIGNED(off, blksz));
 	ASSERT(trunc || IS_P2ALIGNED(len, blksz));
@@ -2146,6 +2344,7 @@
 	 *    amount of space if we copy the freed BPs into deadlists.
 	 */
 	if (dn->dn_nlevels > 1) {
+		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 		uint64_t first, last;
 
 		first = blkid >> epbs;
@@ -2190,6 +2389,7 @@
 
 			dnode_dirty_l1(dn, i, tx);
 		}
+		rw_exit(&dn->dn_struct_rwlock);
 	}
 
 done:
@@ -2199,22 +2399,21 @@
 	 */
 	mutex_enter(&dn->dn_mtx);
 	{
-	int txgoff = tx->tx_txg & TXG_MASK;
-	if (dn->dn_free_ranges[txgoff] == NULL) {
-		dn->dn_free_ranges[txgoff] = range_tree_create(NULL, NULL);
-	}
-	range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
-	range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
+		int txgoff = tx->tx_txg & TXG_MASK;
+		if (dn->dn_free_ranges[txgoff] == NULL) {
+			dn->dn_free_ranges[txgoff] = range_tree_create(NULL,
+			    RANGE_SEG64, NULL, 0, 0);
+		}
+		range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
+		range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
 	}
 	dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
-	    blkid, nblks, tx->tx_txg);
+	    (u_longlong_t)blkid, (u_longlong_t)nblks,
+	    (u_longlong_t)tx->tx_txg);
 	mutex_exit(&dn->dn_mtx);
 
 	dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
 	dnode_setdirty(dn, tx);
-out:
-
-	rw_exit(&dn->dn_struct_rwlock);
 }
 
 static boolean_t
@@ -2235,19 +2434,11 @@
 uint64_t
 dnode_block_freed(dnode_t *dn, uint64_t blkid)
 {
-	void *dp = spa_get_dsl(dn->dn_objset->os_spa);
 	int i;
 
 	if (blkid == DMU_BONUS_BLKID)
 		return (FALSE);
 
-	/*
-	 * If we're in the process of opening the pool, dp will not be
-	 * set yet, but there shouldn't be anything dirty.
-	 */
-	if (dp == NULL)
-		return (FALSE);
-
 	if (dn->dn_free_txg)
 		return (TRUE);
 
@@ -2323,6 +2514,8 @@
 	boolean_t hole;
 	int i, inc, error, span;
 
+	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+
 	hole = ((flags & DNODE_FIND_HOLE) != 0);
 	inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
 	ASSERT(txg == 0 || !hole);
@@ -2349,15 +2542,16 @@
 			return (SET_ERROR(ESRCH));
 		}
 		error = dbuf_read(db, NULL,
-		    DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT);
+		    DB_RF_CANFAIL | DB_RF_HAVESTRUCT |
+		    DB_RF_NO_DECRYPT | DB_RF_NOPREFETCH);
 		if (error) {
 			dbuf_rele(db, FTAG);
 			return (error);
 		}
 		data = db->db.db_data;
+		rw_enter(&db->db_rwlock, RW_READER);
 	}
 
-
 	if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
 	    db->db_blkptr->blk_birth <= txg ||
 	    BP_IS_HOLE(db->db_blkptr))) {
@@ -2430,8 +2624,10 @@
 			error = SET_ERROR(ESRCH);
 	}
 
-	if (db)
+	if (db != NULL) {
+		rw_exit(&db->db_rwlock);
 		dbuf_rele(db, FTAG);
+	}
 
 	return (error);
 }
@@ -2527,3 +2723,8 @@
 EXPORT_SYMBOL(dnode_evict_dbufs);
 EXPORT_SYMBOL(dnode_evict_bonus);
 #endif
+
+ZFS_MODULE_PARAM(zfs, zfs_, default_bs, INT, ZMOD_RW,
+	"Default dnode block shift");
+ZFS_MODULE_PARAM(zfs, zfs_, default_ibs, INT, ZMOD_RW,
+	"Default dnode indirect block shift");

diff --git a/zfs/module/zfs/dnode_sync.c b/zfs/module/zfs/dnode_sync.c
index d3acf1b..12ab4be 100644
--- a/zfs/module/zfs/dnode_sync.c
+++ b/zfs/module/zfs/dnode_sync.c

@@ -21,8 +21,9 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2020 Oxide Computer Company
  */
 
 #include <sys/zfs_context.h>
@@ -51,7 +52,6 @@
 
 	/* this dnode can't be paged out because it's dirty */
 	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
-	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 	ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
 
 	db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
@@ -59,10 +59,26 @@
 
 	dn->dn_phys->dn_nlevels = new_level;
 	dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
-	    dn->dn_object, dn->dn_phys->dn_nlevels);
+	    (u_longlong_t)dn->dn_object, dn->dn_phys->dn_nlevels);
+
+	/*
+	 * Lock ordering requires that we hold the children's db_mutexes (by
+	 * calling dbuf_find()) before holding the parent's db_rwlock.  The lock
+	 * order is imposed by dbuf_read's steps of "grab the lock to protect
+	 * db_parent, get db_parent, hold db_parent's db_rwlock".
+	 */
+	dmu_buf_impl_t *children[DN_MAX_NBLKPTR];
+	ASSERT3U(nblkptr, <=, DN_MAX_NBLKPTR);
+	for (i = 0; i < nblkptr; i++) {
+		children[i] =
+		    dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i);
+	}
 
 	/* transfer dnode's block pointers to new indirect block */
 	(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
+	if (dn->dn_dbuf != NULL)
+		rw_enter(&dn->dn_dbuf->db_rwlock, RW_WRITER);
+	rw_enter(&db->db_rwlock, RW_WRITER);
 	ASSERT(db->db.db_data);
 	ASSERT(arc_released(db->db_buf));
 	ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
@@ -72,12 +88,11 @@
 
 	/* set dbuf's parent pointers to new indirect buf */
 	for (i = 0; i < nblkptr; i++) {
-		dmu_buf_impl_t *child =
-		    dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i);
+		dmu_buf_impl_t *child = children[i];
 
 		if (child == NULL)
 			continue;
-#ifdef	DEBUG
+#ifdef	ZFS_DEBUG
 		DB_DNODE_ENTER(child);
 		ASSERT3P(DB_DNODE(child), ==, dn);
 		DB_DNODE_EXIT(child);
@@ -106,6 +121,10 @@
 
 	bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
 
+	rw_exit(&db->db_rwlock);
+	if (dn->dn_dbuf != NULL)
+		rw_exit(&dn->dn_dbuf->db_rwlock);
+
 	dbuf_rele(db, FTAG);
 
 	rw_exit(&dn->dn_struct_rwlock);
@@ -117,7 +136,8 @@
 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 	uint64_t bytesfreed = 0;
 
-	dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
+	dprintf("ds=%p obj=%llx num=%d\n", ds, (u_longlong_t)dn->dn_object,
+	    num);
 
 	for (int i = 0; i < num; i++, bp++) {
 		if (BP_IS_HOLE(bp))
@@ -182,17 +202,14 @@
 		ASSERT(db->db_level == 1);
 
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
-		err = dbuf_hold_impl(dn, db->db_level-1,
+		err = dbuf_hold_impl(dn, db->db_level - 1,
 		    (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
 		rw_exit(&dn->dn_struct_rwlock);
 		if (err == ENOENT)
 			continue;
 		ASSERT(err == 0);
 		ASSERT(child->db_level == 0);
-		dr = child->db_last_dirty;
-		while (dr && dr->dr_txg > txg)
-			dr = dr->dr_next;
-		ASSERT(dr == NULL || dr->dr_txg == txg);
+		dr = dbuf_find_dirty_eq(child, txg);
 
 		/* data_old better be zeroed */
 		if (dr) {
@@ -213,7 +230,7 @@
 		mutex_enter(&child->db_mtx);
 		buf = child->db.db_data;
 		if (buf != NULL && child->db_state != DB_FILL &&
-		    child->db_last_dirty == NULL) {
+		    list_is_empty(&child->db_dirty_records)) {
 			for (j = 0; j < child->db.db_size >> 3; j++) {
 				if (buf[j] != 0) {
 					panic("freed data not zero: "
@@ -280,7 +297,9 @@
 	 * ancestor of the first or last block to be freed.  The first and
 	 * last L1 indirect blocks are always dirtied by dnode_free_range().
 	 */
+	db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
 	VERIFY(BP_GET_FILL(db->db_blkptr) == 0 || db->db_dirtycnt > 0);
+	dmu_buf_unlock_parent(db, dblt, FTAG);
 
 	dbuf_release_bp(db);
 	bp = db->db.db_data;
@@ -306,7 +325,9 @@
 
 	if (db->db_level == 1) {
 		FREE_VERIFY(db, start, end, tx);
-		free_blocks(dn, bp, end-start+1, tx);
+		rw_enter(&db->db_rwlock, RW_WRITER);
+		free_blocks(dn, bp, end - start + 1, tx);
+		rw_exit(&db->db_rwlock);
 	} else {
 		for (uint64_t id = start; id <= end; id++, bp++) {
 			if (BP_IS_HOLE(bp))
@@ -323,10 +344,12 @@
 	}
 
 	if (free_indirects) {
+		rw_enter(&db->db_rwlock, RW_WRITER);
 		for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++)
 			ASSERT(BP_IS_HOLE(bp));
 		bzero(db->db.db_data, db->db.db_size);
 		free_blocks(dn, db->db_blkptr, 1, tx);
+		rw_exit(&db->db_rwlock);
 	}
 
 	DB_DNODE_EXIT(db);
@@ -378,7 +401,6 @@
 			VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
 			    TRUE, FALSE, FTAG, &db));
 			rw_exit(&dn->dn_struct_rwlock);
-
 			free_children(db, blkid, nblks, free_indirects, tx);
 			dbuf_rele(db, FTAG);
 		}
@@ -399,11 +421,11 @@
 	 * match.
 	 */
 	if (trunc && !dn->dn_objset->os_raw_receive) {
-		ASSERTV(uint64_t off);
+		uint64_t off __maybe_unused;
 		dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
 
-		ASSERTV(off = (dn->dn_phys->dn_maxblkid + 1) *
-		    (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT));
+		off = (dn->dn_phys->dn_maxblkid + 1) *
+		    (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 		ASSERT(off < dn->dn_phys->dn_maxblkid ||
 		    dn->dn_phys->dn_maxblkid == 0 ||
 		    dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
@@ -442,7 +464,7 @@
 	mutex_enter(&dn->dn_dbufs_mtx);
 	for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
 
-#ifdef	DEBUG
+#ifdef	ZFS_DEBUG
 		DB_DNODE_ENTER(db);
 		ASSERT3P(DB_DNODE(db), ==, dn);
 		DB_DNODE_EXIT(db);
@@ -518,8 +540,9 @@
 		mutex_enter(&db->db_mtx);
 		/* XXX - use dbuf_undirty()? */
 		list_remove(list, dr);
-		ASSERT(db->db_last_dirty == dr);
-		db->db_last_dirty = NULL;
+		ASSERT(list_head(&db->db_dirty_records) == dr);
+		list_remove_head(&db->db_dirty_records);
+		ASSERT(list_is_empty(&db->db_dirty_records));
 		db->db_dirtycnt -= 1;
 		if (db->db_level == 0) {
 			ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
@@ -605,7 +628,7 @@
 	dnode_phys_t *dnp = dn->dn_phys;
 	int txgoff = tx->tx_txg & TXG_MASK;
 	list_t *list = &dn->dn_dirty_records[txgoff];
-	ASSERTV(static const dnode_phys_t zerodn = { 0 });
+	static const dnode_phys_t zerodn __maybe_unused = { 0 };
 	boolean_t kill_spill = B_FALSE;
 
 	ASSERT(dmu_tx_is_syncing(tx));
@@ -632,8 +655,13 @@
 			    DNODE_FLAG_USEROBJUSED_ACCOUNTED;
 		mutex_exit(&dn->dn_mtx);
 		dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
-	} else {
-		/* Once we account for it, we should always account for it */
+	} else if (!(os->os_encrypted && dmu_objset_is_receiving(os))) {
+		/*
+		 * Once we account for it, we should always account for it,
+		 * except for the case of a raw receive. We will not be able
+		 * to account for it until the receiving dataset has been
+		 * mounted.
+		 */
 		ASSERT(!(dn->dn_phys->dn_flags &
 		    DNODE_FLAG_USERUSED_ACCOUNTED));
 		ASSERT(!(dn->dn_phys->dn_flags &
@@ -741,13 +769,22 @@
 		dsfra.dsfra_dnode = dn;
 		dsfra.dsfra_tx = tx;
 		dsfra.dsfra_free_indirects = freeing_dnode;
+		mutex_enter(&dn->dn_mtx);
 		if (freeing_dnode) {
 			ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff],
 			    0, dn->dn_maxblkid + 1));
 		}
-		mutex_enter(&dn->dn_mtx);
-		range_tree_vacate(dn->dn_free_ranges[txgoff],
+		/*
+		 * Because dnode_sync_free_range() must drop dn_mtx during its
+		 * processing, using it as a callback to range_tree_vacate() is
+		 * not safe.  No other operations (besides destroy) are allowed
+		 * once range_tree_vacate() has begun, and dropping dn_mtx
+		 * would leave a window open for another thread to observe that
+		 * invalid (and unsafe) state.
+		 */
+		range_tree_walk(dn->dn_free_ranges[txgoff],
 		    dnode_sync_free_range, &dsfra);
+		range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL);
 		range_tree_destroy(dn->dn_free_ranges[txgoff]);
 		dn->dn_free_ranges[txgoff] = NULL;
 		mutex_exit(&dn->dn_mtx);
@@ -817,9 +854,13 @@
 		dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
 	}
 
+	ASSERT3U(dnp->dn_bonuslen, <=, DN_MAX_BONUS_LEN(dnp));
+
 	/*
 	 * Although we have dropped our reference to the dnode, it
 	 * can't be evicted until its written, and we haven't yet
-	 * initiated the IO for the dnode's dbuf.
+	 * initiated the IO for the dnode's dbuf.  Additionally, the caller
+	 * has already added a reference to the dnode because it's on the
+	 * os_synced_dnodes list.
 	 */
 }

diff --git a/zfs/module/zfs/dsl_bookmark.c b/zfs/module/zfs/dsl_bookmark.c
index 01362e0..861dd92 100644
--- a/zfs/module/zfs/dsl_bookmark.c
+++ b/zfs/module/zfs/dsl_bookmark.c

@@ -14,8 +14,9 @@
  */
 
 /*
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright 2019, 2020 by Christian Schwarz. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -23,6 +24,7 @@
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
+#include <sys/dsl_destroy.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/arc.h>
@@ -31,6 +33,7 @@
 #include <sys/spa.h>
 #include <sys/dsl_bookmark.h>
 #include <zfs_namecheck.h>
+#include <sys/dmu_send.h>
 
 static int
 dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
@@ -53,14 +56,19 @@
 }
 
 /*
+ * When reading BOOKMARK_V1 bookmarks, the BOOKMARK_V2 fields are guaranteed
+ * to be zeroed.
+ *
  * Returns ESRCH if bookmark is not found.
+ * Note, we need to use the ZAP rather than the AVL to look up bookmarks
+ * by name, because only the ZAP honors the casesensitivity setting.
  */
-static int
-dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname,
+int
+dsl_bookmark_lookup_impl(dsl_dataset_t *ds, const char *shortname,
     zfs_bookmark_phys_t *bmark_phys)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	uint64_t bmark_zapobj = ds->ds_bookmarks;
+	uint64_t bmark_zapobj = ds->ds_bookmarks_obj;
 	matchtype_t mt = 0;
 	int err;
 
@@ -77,15 +85,16 @@
 	bzero(bmark_phys, sizeof (*bmark_phys));
 
 	err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t),
-	    sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt,
-	    NULL, 0, NULL);
+	    sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt, NULL, 0,
+	    NULL);
 
-	return (err == ENOENT ? ESRCH : err);
+	return (err == ENOENT ? SET_ERROR(ESRCH) : err);
 }
 
 /*
  * If later_ds is non-NULL, this will return EXDEV if the specified bookmark
- * does not represents an earlier point in later_ds's timeline.
+ * does not represents an earlier point in later_ds's timeline.  However,
+ * bmp will still be filled in if we return EXDEV.
  *
  * Returns ENOENT if the dataset containing the bookmark does not exist.
  * Returns ESRCH if the dataset exists but the bookmark was not found in it.
@@ -102,7 +111,7 @@
 	if (error != 0)
 		return (error);
 
-	error = dsl_dataset_bmark_lookup(ds, shortname, bmp);
+	error = dsl_bookmark_lookup_impl(ds, shortname, bmp);
 	if (error == 0 && later_ds != NULL) {
 		if (!dsl_dataset_is_before(later_ds, ds, bmp->zbm_creation_txg))
 			error = SET_ERROR(EXDEV);
@@ -111,148 +120,489 @@
 	return (error);
 }
 
-typedef struct dsl_bookmark_create_arg {
-	nvlist_t *dbca_bmarks;
-	nvlist_t *dbca_errors;
-} dsl_bookmark_create_arg_t;
-
+/*
+ * Validates that
+ * - bmark is a full dataset path of a bookmark (bookmark_namecheck)
+ * - source is a full path of a snapshot or bookmark
+ *   ({bookmark,snapshot}_namecheck)
+ *
+ * Returns 0 if valid, -1 otherwise.
+ */
 static int
-dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name,
-    dmu_tx_t *tx)
+dsl_bookmark_create_nvl_validate_pair(const char *bmark, const char *source)
 {
-	dsl_pool_t *dp = dmu_tx_pool(tx);
-	dsl_dataset_t *bmark_fs;
-	char *shortname;
+	if (bookmark_namecheck(bmark, NULL, NULL) != 0)
+		return (-1);
+
+	int is_bmark, is_snap;
+	is_bmark = bookmark_namecheck(source, NULL, NULL) == 0;
+	is_snap = snapshot_namecheck(source, NULL, NULL) == 0;
+	if (!is_bmark && !is_snap)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Check that the given nvlist corresponds to the following schema:
+ *  { newbookmark -> source, ... }
+ * where
+ * - each pair passes dsl_bookmark_create_nvl_validate_pair
+ * - all newbookmarks are in the same pool
+ * - all newbookmarks have unique names
+ *
+ * Note that this function is only validates above schema. Callers must ensure
+ * that the bookmarks can be created, e.g. that sources exist.
+ *
+ * Returns 0 if the nvlist adheres to above schema.
+ * Returns -1 if it doesn't.
+ */
+int
+dsl_bookmark_create_nvl_validate(nvlist_t *bmarks)
+{
+	char *first;
+	size_t first_len;
+
+	first = NULL;
+	for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) {
+
+		char *bmark = nvpair_name(pair);
+		char *source;
+
+		/* list structure: values must be snapshots XOR bookmarks */
+		if (nvpair_value_string(pair, &source) != 0)
+			return (-1);
+		if (dsl_bookmark_create_nvl_validate_pair(bmark, source) != 0)
+			return (-1);
+
+		/* same pool check */
+		if (first == NULL) {
+			char *cp = strpbrk(bmark, "/#");
+			if (cp == NULL)
+				return (-1);
+			first = bmark;
+			first_len = cp - bmark;
+		}
+		if (strncmp(first, bmark, first_len) != 0)
+			return (-1);
+		switch (*(bmark + first_len)) {
+			case '/': /* fallthrough */
+			case '#':
+				break;
+			default:
+				return (-1);
+		}
+
+		/* unique newbookmark names; todo: O(n^2) */
+		for (nvpair_t *pair2 = nvlist_next_nvpair(bmarks, pair);
+		    pair2 != NULL; pair2 = nvlist_next_nvpair(bmarks, pair2)) {
+			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
+				return (-1);
+		}
+
+	}
+	return (0);
+}
+
+/*
+ * expects that newbm and source have been validated using
+ * dsl_bookmark_create_nvl_validate_pair
+ */
+static int
+dsl_bookmark_create_check_impl(dsl_pool_t *dp,
+    const char *newbm, const char *source)
+{
+	ASSERT0(dsl_bookmark_create_nvl_validate_pair(newbm, source));
+	/* defer source namecheck until we know it's a snapshot or bookmark */
+
 	int error;
+	dsl_dataset_t *newbm_ds;
+	char *newbm_short;
 	zfs_bookmark_phys_t bmark_phys;
 
-	if (!snapds->ds_is_snapshot)
-		return (SET_ERROR(EINVAL));
-
-	error = dsl_bookmark_hold_ds(dp, bookmark_name,
-	    &bmark_fs, FTAG, &shortname);
+	error = dsl_bookmark_hold_ds(dp, newbm, &newbm_ds, FTAG, &newbm_short);
 	if (error != 0)
 		return (error);
 
-	if (!dsl_dataset_is_before(bmark_fs, snapds, 0)) {
-		dsl_dataset_rele(bmark_fs, FTAG);
-		return (SET_ERROR(EINVAL));
+	/* Verify that the new bookmark does not already exist */
+	error = dsl_bookmark_lookup_impl(newbm_ds, newbm_short, &bmark_phys);
+	switch (error) {
+	case ESRCH:
+		/* happy path: new bmark doesn't exist, proceed after switch */
+		error = 0;
+		break;
+	case 0:
+		error = SET_ERROR(EEXIST);
+		goto eholdnewbmds;
+	default:
+		/* dsl_bookmark_lookup_impl already did SET_ERROR */
+		goto eholdnewbmds;
 	}
 
-	error = dsl_dataset_bmark_lookup(bmark_fs, shortname,
-	    &bmark_phys);
-	dsl_dataset_rele(bmark_fs, FTAG);
-	if (error == 0)
-		return (SET_ERROR(EEXIST));
-	if (error == ESRCH)
-		return (0);
+	/* error is retval of the following if-cascade */
+	if (strchr(source, '@') != NULL) {
+		dsl_dataset_t *source_snap_ds;
+		ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0);
+		error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds);
+		if (error == 0) {
+			VERIFY(source_snap_ds->ds_is_snapshot);
+			/*
+			 * Verify that source snapshot is an earlier point in
+			 * newbm_ds's timeline (source may be newbm_ds's origin)
+			 */
+			if (!dsl_dataset_is_before(newbm_ds, source_snap_ds, 0))
+				error = SET_ERROR(
+				    ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
+			dsl_dataset_rele(source_snap_ds, FTAG);
+		}
+	} else if (strchr(source, '#') != NULL) {
+		zfs_bookmark_phys_t source_phys;
+		ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0);
+		/*
+		 * Source must exists and be an earlier point in newbm_ds's
+		 * timeline (newbm_ds's origin may be a snap of source's ds)
+		 */
+		error = dsl_bookmark_lookup(dp, source, newbm_ds, &source_phys);
+		switch (error) {
+		case 0:
+			break; /* happy path */
+		case EXDEV:
+			error = SET_ERROR(ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
+			break;
+		default:
+			/* dsl_bookmark_lookup already did SET_ERROR */
+			break;
+		}
+	} else {
+		/*
+		 * dsl_bookmark_create_nvl_validate validates that source is
+		 * either snapshot or bookmark
+		 */
+		panic("unreachable code: %s", source);
+	}
+
+eholdnewbmds:
+	dsl_dataset_rele(newbm_ds, FTAG);
 	return (error);
 }
 
-static int
+int
 dsl_bookmark_create_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_bookmark_create_arg_t *dbca = arg;
-	dsl_pool_t *dp = dmu_tx_pool(tx);
 	int rv = 0;
+	int schema_err = 0;
+	ASSERT3P(dbca, !=, NULL);
+	ASSERT3P(dbca->dbca_bmarks, !=, NULL);
+	/* dbca->dbca_errors is allowed to be NULL */
+
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 
 	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
 		return (SET_ERROR(ENOTSUP));
 
+	if (dsl_bookmark_create_nvl_validate(dbca->dbca_bmarks) != 0)
+		rv = schema_err = SET_ERROR(EINVAL);
+
 	for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
-		dsl_dataset_t *snapds;
-		int error;
+		char *new = nvpair_name(pair);
 
-		/* note: validity of nvlist checked by ioctl layer */
-		error = dsl_dataset_hold(dp, fnvpair_value_string(pair),
-		    FTAG, &snapds);
+		int error = schema_err;
 		if (error == 0) {
-			error = dsl_bookmark_create_check_impl(snapds,
-			    nvpair_name(pair), tx);
-			dsl_dataset_rele(snapds, FTAG);
+			char *source = fnvpair_value_string(pair);
+			error = dsl_bookmark_create_check_impl(dp, new, source);
+			if (error != 0)
+				error = SET_ERROR(error);
 		}
+
 		if (error != 0) {
-			fnvlist_add_int32(dbca->dbca_errors,
-			    nvpair_name(pair), error);
 			rv = error;
+			if (dbca->dbca_errors != NULL)
+				fnvlist_add_int32(dbca->dbca_errors,
+				    new, error);
 		}
 	}
 
 	return (rv);
 }
 
-static void
-dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
+static dsl_bookmark_node_t *
+dsl_bookmark_node_alloc(char *shortname)
 {
-	dsl_bookmark_create_arg_t *dbca = arg;
+	dsl_bookmark_node_t *dbn = kmem_alloc(sizeof (*dbn), KM_SLEEP);
+	dbn->dbn_name = spa_strdup(shortname);
+	dbn->dbn_dirty = B_FALSE;
+	mutex_init(&dbn->dbn_lock, NULL, MUTEX_DEFAULT, NULL);
+	return (dbn);
+}
+
+/*
+ * Set the fields in the zfs_bookmark_phys_t based on the specified snapshot.
+ */
+static void
+dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap)
+{
+	spa_t *spa = dsl_dataset_get_spa(snap);
+	objset_t *mos = spa_get_dsl(spa)->dp_meta_objset;
+	dsl_dataset_phys_t *dsp = dsl_dataset_phys(snap);
+
+	memset(zbm, 0, sizeof (zfs_bookmark_phys_t));
+	zbm->zbm_guid = dsp->ds_guid;
+	zbm->zbm_creation_txg = dsp->ds_creation_txg;
+	zbm->zbm_creation_time = dsp->ds_creation_time;
+	zbm->zbm_redaction_obj = 0;
+
+	/*
+	 * If the dataset is encrypted create a larger bookmark to
+	 * accommodate the IVset guid. The IVset guid was added
+	 * after the encryption feature to prevent a problem with
+	 * raw sends. If we encounter an encrypted dataset without
+	 * an IVset guid we fall back to a normal bookmark.
+	 */
+	if (snap->ds_dir->dd_crypto_obj != 0 &&
+	    spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) {
+		(void) zap_lookup(mos, snap->ds_object,
+		    DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
+		    &zbm->zbm_ivset_guid);
+	}
+
+	if (spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_WRITTEN)) {
+		zbm->zbm_flags = ZBM_FLAG_SNAPSHOT_EXISTS | ZBM_FLAG_HAS_FBN;
+		zbm->zbm_referenced_bytes_refd = dsp->ds_referenced_bytes;
+		zbm->zbm_compressed_bytes_refd = dsp->ds_compressed_bytes;
+		zbm->zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes;
+
+		dsl_dataset_t *nextds;
+		VERIFY0(dsl_dataset_hold_obj(snap->ds_dir->dd_pool,
+		    dsp->ds_next_snap_obj, FTAG, &nextds));
+		dsl_deadlist_space(&nextds->ds_deadlist,
+		    &zbm->zbm_referenced_freed_before_next_snap,
+		    &zbm->zbm_compressed_freed_before_next_snap,
+		    &zbm->zbm_uncompressed_freed_before_next_snap);
+		dsl_dataset_rele(nextds, FTAG);
+	}
+}
+
+/*
+ * Add dsl_bookmark_node_t `dbn` to the given dataset and increment appropriate
+ * SPA feature counters.
+ */
+void
+dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn,
+    dmu_tx_t *tx)
+{
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	objset_t *mos = dp->dp_meta_objset;
 
-	ASSERT(spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS));
+	if (hds->ds_bookmarks_obj == 0) {
+		hds->ds_bookmarks_obj = zap_create_norm(mos,
+		    U8_TEXTPREP_TOUPPER, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0,
+		    tx);
+		spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
+
+		dsl_dataset_zapify(hds, tx);
+		VERIFY0(zap_add(mos, hds->ds_object,
+		    DS_FIELD_BOOKMARK_NAMES,
+		    sizeof (hds->ds_bookmarks_obj), 1,
+		    &hds->ds_bookmarks_obj, tx));
+	}
+
+	avl_add(&hds->ds_bookmarks, dbn);
+
+	/*
+	 * To maintain backwards compatibility with software that doesn't
+	 * understand SPA_FEATURE_BOOKMARK_V2, we need to use the smallest
+	 * possible bookmark size.
+	 */
+	uint64_t bookmark_phys_size = BOOKMARK_PHYS_SIZE_V1;
+	if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2) &&
+	    (dbn->dbn_phys.zbm_ivset_guid != 0 || dbn->dbn_phys.zbm_flags &
+	    ZBM_FLAG_HAS_FBN || dbn->dbn_phys.zbm_redaction_obj != 0)) {
+		bookmark_phys_size = BOOKMARK_PHYS_SIZE_V2;
+		spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2, tx);
+	}
+
+	__attribute__((unused)) zfs_bookmark_phys_t zero_phys = { 0 };
+	ASSERT0(bcmp(((char *)&dbn->dbn_phys) + bookmark_phys_size,
+	    &zero_phys, sizeof (zfs_bookmark_phys_t) - bookmark_phys_size));
+
+	VERIFY0(zap_add(mos, hds->ds_bookmarks_obj, dbn->dbn_name,
+	    sizeof (uint64_t), bookmark_phys_size / sizeof (uint64_t),
+	    &dbn->dbn_phys, tx));
+}
+
+/*
+ * If redaction_list is non-null, we create a redacted bookmark and redaction
+ * list, and store the object number of the redaction list in redact_obj.
+ */
+static void
+dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
+    dmu_tx_t *tx, uint64_t num_redact_snaps, uint64_t *redact_snaps, void *tag,
+    redaction_list_t **redaction_list)
+{
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	objset_t *mos = dp->dp_meta_objset;
+	dsl_dataset_t *snapds, *bmark_fs;
+	char *shortname;
+	boolean_t bookmark_redacted;
+	uint64_t *dsredactsnaps;
+	uint64_t dsnumsnaps;
+
+	VERIFY0(dsl_dataset_hold(dp, snapshot, FTAG, &snapds));
+	VERIFY0(dsl_bookmark_hold_ds(dp, bookmark, &bmark_fs, FTAG,
+	    &shortname));
+
+	dsl_bookmark_node_t *dbn = dsl_bookmark_node_alloc(shortname);
+	dsl_bookmark_set_phys(&dbn->dbn_phys, snapds);
+
+	bookmark_redacted = dsl_dataset_get_uint64_array_feature(snapds,
+	    SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
+	if (redaction_list != NULL || bookmark_redacted) {
+		redaction_list_t *local_rl;
+		if (bookmark_redacted) {
+			redact_snaps = dsredactsnaps;
+			num_redact_snaps = dsnumsnaps;
+		}
+		dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
+		    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
+		    DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
+		    num_redact_snaps * sizeof (uint64_t), tx);
+		spa_feature_incr(dp->dp_spa,
+		    SPA_FEATURE_REDACTION_BOOKMARKS, tx);
+
+		VERIFY0(dsl_redaction_list_hold_obj(dp,
+		    dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
+		dsl_redaction_list_long_hold(dp, local_rl, tag);
+
+		ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
+		    sizeof (redaction_list_phys_t) + num_redact_snaps *
+		    sizeof (uint64_t));
+		dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
+		bcopy(redact_snaps, local_rl->rl_phys->rlp_snaps,
+		    sizeof (uint64_t) * num_redact_snaps);
+		local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
+		if (bookmark_redacted) {
+			ASSERT3P(redaction_list, ==, NULL);
+			local_rl->rl_phys->rlp_last_blkid = UINT64_MAX;
+			local_rl->rl_phys->rlp_last_object = UINT64_MAX;
+			dsl_redaction_list_long_rele(local_rl, tag);
+			dsl_redaction_list_rele(local_rl, tag);
+		} else {
+			*redaction_list = local_rl;
+		}
+	}
+
+	if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
+		spa_feature_incr(dp->dp_spa,
+		    SPA_FEATURE_BOOKMARK_WRITTEN, tx);
+	}
+
+	dsl_bookmark_node_add(bmark_fs, dbn, tx);
+
+	spa_history_log_internal_ds(bmark_fs, "bookmark", tx,
+	    "name=%s creation_txg=%llu target_snap=%llu redact_obj=%llu",
+	    shortname, (longlong_t)dbn->dbn_phys.zbm_creation_txg,
+	    (longlong_t)snapds->ds_object,
+	    (longlong_t)dbn->dbn_phys.zbm_redaction_obj);
+
+	dsl_dataset_rele(bmark_fs, FTAG);
+	dsl_dataset_rele(snapds, FTAG);
+}
+
+
+static void
+dsl_bookmark_create_sync_impl_book(
+    const char *new_name, const char *source_name, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *bmark_fs_source, *bmark_fs_new;
+	char *source_shortname, *new_shortname;
+	zfs_bookmark_phys_t source_phys;
+
+	VERIFY0(dsl_bookmark_hold_ds(dp, source_name, &bmark_fs_source, FTAG,
+	    &source_shortname));
+	VERIFY0(dsl_bookmark_hold_ds(dp, new_name, &bmark_fs_new, FTAG,
+	    &new_shortname));
+
+	/*
+	 * create a copy of the source bookmark by copying most of its members
+	 *
+	 * Caveat: bookmarking a redaction bookmark yields a normal bookmark
+	 * -----------------------------------------------------------------
+	 * Reasoning:
+	 * - The zbm_redaction_obj would be referred to by both source and new
+	 *   bookmark, but would be destroyed once either source or new is
+	 *   destroyed, resulting in use-after-free of the referred object.
+	 * - User expectation when issuing the `zfs bookmark` command is that
+	 *   a normal bookmark of the source is created
+	 *
+	 * Design Alternatives For Full Redaction Bookmark Copying:
+	 * - reference-count the redaction object => would require on-disk
+	 *   format change for existing redaction objects
+	 * - Copy the redaction object => cannot be done in syncing context
+	 *   because the redaction object might be too large
+	 */
+
+	VERIFY0(dsl_bookmark_lookup_impl(bmark_fs_source, source_shortname,
+	    &source_phys));
+	dsl_bookmark_node_t *new_dbn = dsl_bookmark_node_alloc(new_shortname);
+
+	memcpy(&new_dbn->dbn_phys, &source_phys, sizeof (source_phys));
+	new_dbn->dbn_phys.zbm_redaction_obj = 0;
+
+	/* update feature counters */
+	if (new_dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
+		spa_feature_incr(dp->dp_spa,
+		    SPA_FEATURE_BOOKMARK_WRITTEN, tx);
+	}
+	/* no need for redaction bookmark counter; nulled zbm_redaction_obj */
+	/* dsl_bookmark_node_add bumps bookmarks and v2-bookmarks counter */
+
+	/*
+	 * write new bookmark
+	 *
+	 * Note that dsl_bookmark_lookup_impl guarantees that, if source is a
+	 * v1 bookmark, the v2-only fields are zeroed.
+	 * And dsl_bookmark_node_add writes back a v1-sized bookmark if
+	 * v2 bookmarks are disabled and/or v2-only fields are zeroed.
+	 * => bookmark copying works on pre-bookmark-v2 pools
+	 */
+	dsl_bookmark_node_add(bmark_fs_new, new_dbn, tx);
+
+	spa_history_log_internal_ds(bmark_fs_source, "bookmark", tx,
+	    "name=%s creation_txg=%llu source_guid=%llu",
+	    new_shortname, (longlong_t)new_dbn->dbn_phys.zbm_creation_txg,
+	    (longlong_t)source_phys.zbm_guid);
+
+	dsl_dataset_rele(bmark_fs_source, FTAG);
+	dsl_dataset_rele(bmark_fs_new, FTAG);
+}
+
+void
+dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_bookmark_create_arg_t *dbca = arg;
+
+	ASSERT(spa_feature_is_enabled(dmu_tx_pool(tx)->dp_spa,
+	    SPA_FEATURE_BOOKMARKS));
 
 	for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
-		dsl_dataset_t *snapds, *bmark_fs;
-		zfs_bookmark_phys_t bmark_phys = { 0 };
-		char *shortname;
-		uint32_t bmark_len = BOOKMARK_PHYS_SIZE_V1;
 
-		VERIFY0(dsl_dataset_hold(dp, fnvpair_value_string(pair),
-		    FTAG, &snapds));
-		VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair),
-		    &bmark_fs, FTAG, &shortname));
-		if (bmark_fs->ds_bookmarks == 0) {
-			bmark_fs->ds_bookmarks =
-			    zap_create_norm(mos, U8_TEXTPREP_TOUPPER,
-			    DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
-			spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
+		char *new = nvpair_name(pair);
+		char *source = fnvpair_value_string(pair);
 
-			dsl_dataset_zapify(bmark_fs, tx);
-			VERIFY0(zap_add(mos, bmark_fs->ds_object,
-			    DS_FIELD_BOOKMARK_NAMES,
-			    sizeof (bmark_fs->ds_bookmarks), 1,
-			    &bmark_fs->ds_bookmarks, tx));
+		if (strchr(source, '@') != NULL) {
+			dsl_bookmark_create_sync_impl_snap(new, source, tx,
+			    0, NULL, NULL, NULL);
+		} else if (strchr(source, '#') != NULL) {
+			dsl_bookmark_create_sync_impl_book(new, source, tx);
+		} else {
+			panic("unreachable code");
 		}
 
-		bmark_phys.zbm_guid = dsl_dataset_phys(snapds)->ds_guid;
-		bmark_phys.zbm_creation_txg =
-		    dsl_dataset_phys(snapds)->ds_creation_txg;
-		bmark_phys.zbm_creation_time =
-		    dsl_dataset_phys(snapds)->ds_creation_time;
-
-		/*
-		 * If the dataset is encrypted create a larger bookmark to
-		 * accommodate the IVset guid. The IVset guid was added
-		 * after the encryption feature to prevent a problem with
-		 * raw sends. If we encounter an encrypted dataset without
-		 * an IVset guid we fall back to a normal bookmark.
-		 */
-		if (snapds->ds_dir->dd_crypto_obj != 0 &&
-		    spa_feature_is_enabled(dp->dp_spa,
-		    SPA_FEATURE_BOOKMARK_V2)) {
-			int err = zap_lookup(mos, snapds->ds_object,
-			    DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
-			    &bmark_phys.zbm_ivset_guid);
-			if (err == 0) {
-				bmark_len = BOOKMARK_PHYS_SIZE_V2;
-				spa_feature_incr(dp->dp_spa,
-				    SPA_FEATURE_BOOKMARK_V2, tx);
-			}
-		}
-
-		VERIFY0(zap_add(mos, bmark_fs->ds_bookmarks,
-		    shortname, sizeof (uint64_t),
-		    bmark_len / sizeof (uint64_t), &bmark_phys, tx));
-
-		spa_history_log_internal_ds(bmark_fs, "bookmark", tx,
-		    "name=%s creation_txg=%llu target_snap=%llu",
-		    shortname,
-		    (longlong_t)bmark_phys.zbm_creation_txg,
-		    (longlong_t)snapds->ds_object);
-
-		dsl_dataset_rele(bmark_fs, FTAG);
-		dsl_dataset_rele(snapds, FTAG);
 	}
 }
 
@@ -277,58 +627,268 @@
 	    fnvlist_num_pairs(bmarks), ZFS_SPACE_CHECK_NORMAL));
 }
 
+static int
+dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
+{
+	dsl_bookmark_create_redacted_arg_t *dbcra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	int rv = 0;
+
+	if (!spa_feature_is_enabled(dp->dp_spa,
+	    SPA_FEATURE_REDACTION_BOOKMARKS))
+		return (SET_ERROR(ENOTSUP));
+	/*
+	 * If the list of redact snaps will not fit in the bonus buffer with
+	 * the furthest reached object and offset, fail.
+	 */
+	if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
+	    sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
+		return (SET_ERROR(E2BIG));
+
+	if (dsl_bookmark_create_nvl_validate_pair(
+	    dbcra->dbcra_bmark, dbcra->dbcra_snap) != 0)
+		return (SET_ERROR(EINVAL));
+
+	rv = dsl_bookmark_create_check_impl(dp,
+	    dbcra->dbcra_bmark, dbcra->dbcra_snap);
+	return (rv);
+}
+
+static void
+dsl_bookmark_create_redacted_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_bookmark_create_redacted_arg_t *dbcra = arg;
+	dsl_bookmark_create_sync_impl_snap(dbcra->dbcra_bmark,
+	    dbcra->dbcra_snap, tx, dbcra->dbcra_numsnaps, dbcra->dbcra_snaps,
+	    dbcra->dbcra_tag, dbcra->dbcra_rl);
+}
+
+int
+dsl_bookmark_create_redacted(const char *bookmark, const char *snapshot,
+    uint64_t numsnaps, uint64_t *snapguids, void *tag, redaction_list_t **rl)
+{
+	dsl_bookmark_create_redacted_arg_t dbcra;
+
+	dbcra.dbcra_bmark = bookmark;
+	dbcra.dbcra_snap = snapshot;
+	dbcra.dbcra_rl = rl;
+	dbcra.dbcra_numsnaps = numsnaps;
+	dbcra.dbcra_snaps = snapguids;
+	dbcra.dbcra_tag = tag;
+
+	return (dsl_sync_task(bookmark, dsl_bookmark_create_redacted_check,
+	    dsl_bookmark_create_redacted_sync, &dbcra, 5,
+	    ZFS_SPACE_CHECK_NORMAL));
+}
+
+/*
+ * Retrieve the list of properties given in the 'props' nvlist for a bookmark.
+ * If 'props' is NULL, retrieves all properties.
+ */
+static void
+dsl_bookmark_fetch_props(dsl_pool_t *dp, zfs_bookmark_phys_t *bmark_phys,
+    nvlist_t *props, nvlist_t *out_props)
+{
+	ASSERT3P(dp, !=, NULL);
+	ASSERT3P(bmark_phys, !=, NULL);
+	ASSERT3P(out_props, !=, NULL);
+	ASSERT(RRW_LOCK_HELD(&dp->dp_config_rwlock));
+
+	if (props == NULL || nvlist_exists(props,
+	    zfs_prop_to_name(ZFS_PROP_GUID))) {
+		dsl_prop_nvlist_add_uint64(out_props,
+		    ZFS_PROP_GUID, bmark_phys->zbm_guid);
+	}
+	if (props == NULL || nvlist_exists(props,
+	    zfs_prop_to_name(ZFS_PROP_CREATETXG))) {
+		dsl_prop_nvlist_add_uint64(out_props,
+		    ZFS_PROP_CREATETXG, bmark_phys->zbm_creation_txg);
+	}
+	if (props == NULL || nvlist_exists(props,
+	    zfs_prop_to_name(ZFS_PROP_CREATION))) {
+		dsl_prop_nvlist_add_uint64(out_props,
+		    ZFS_PROP_CREATION, bmark_phys->zbm_creation_time);
+	}
+	if (props == NULL || nvlist_exists(props,
+	    zfs_prop_to_name(ZFS_PROP_IVSET_GUID))) {
+		dsl_prop_nvlist_add_uint64(out_props,
+		    ZFS_PROP_IVSET_GUID, bmark_phys->zbm_ivset_guid);
+	}
+	if (bmark_phys->zbm_flags & ZBM_FLAG_HAS_FBN) {
+		if (props == NULL || nvlist_exists(props,
+		    zfs_prop_to_name(ZFS_PROP_REFERENCED))) {
+			dsl_prop_nvlist_add_uint64(out_props,
+			    ZFS_PROP_REFERENCED,
+			    bmark_phys->zbm_referenced_bytes_refd);
+		}
+		if (props == NULL || nvlist_exists(props,
+		    zfs_prop_to_name(ZFS_PROP_LOGICALREFERENCED))) {
+			dsl_prop_nvlist_add_uint64(out_props,
+			    ZFS_PROP_LOGICALREFERENCED,
+			    bmark_phys->zbm_uncompressed_bytes_refd);
+		}
+		if (props == NULL || nvlist_exists(props,
+		    zfs_prop_to_name(ZFS_PROP_REFRATIO))) {
+			uint64_t ratio =
+			    bmark_phys->zbm_compressed_bytes_refd == 0 ? 100 :
+			    bmark_phys->zbm_uncompressed_bytes_refd * 100 /
+			    bmark_phys->zbm_compressed_bytes_refd;
+			dsl_prop_nvlist_add_uint64(out_props,
+			    ZFS_PROP_REFRATIO, ratio);
+		}
+	}
+
+	if ((props == NULL || nvlist_exists(props, "redact_snaps") ||
+	    nvlist_exists(props, "redact_complete")) &&
+	    bmark_phys->zbm_redaction_obj != 0) {
+		redaction_list_t *rl;
+		int err = dsl_redaction_list_hold_obj(dp,
+		    bmark_phys->zbm_redaction_obj, FTAG, &rl);
+		if (err == 0) {
+			if (nvlist_exists(props, "redact_snaps")) {
+				nvlist_t *nvl;
+				nvl = fnvlist_alloc();
+				fnvlist_add_uint64_array(nvl, ZPROP_VALUE,
+				    rl->rl_phys->rlp_snaps,
+				    rl->rl_phys->rlp_num_snaps);
+				fnvlist_add_nvlist(out_props, "redact_snaps",
+				    nvl);
+				nvlist_free(nvl);
+			}
+			if (nvlist_exists(props, "redact_complete")) {
+				nvlist_t *nvl;
+				nvl = fnvlist_alloc();
+				fnvlist_add_boolean_value(nvl, ZPROP_VALUE,
+				    rl->rl_phys->rlp_last_blkid == UINT64_MAX &&
+				    rl->rl_phys->rlp_last_object == UINT64_MAX);
+				fnvlist_add_nvlist(out_props, "redact_complete",
+				    nvl);
+				nvlist_free(nvl);
+			}
+			dsl_redaction_list_rele(rl, FTAG);
+		}
+	}
+}
+
 int
 dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl)
 {
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+	ASSERT(dsl_pool_config_held(dp));
+
+	if (dsl_dataset_is_snapshot(ds))
+		return (SET_ERROR(EINVAL));
+
+	for (dsl_bookmark_node_t *dbn = avl_first(&ds->ds_bookmarks);
+	    dbn != NULL; dbn = AVL_NEXT(&ds->ds_bookmarks, dbn)) {
+		nvlist_t *out_props = fnvlist_alloc();
+
+		dsl_bookmark_fetch_props(dp, &dbn->dbn_phys, props, out_props);
+
+		fnvlist_add_nvlist(outnvl, dbn->dbn_name, out_props);
+		fnvlist_free(out_props);
+	}
+	return (0);
+}
+
+/*
+ * Comparison func for ds_bookmarks AVL tree.  We sort the bookmarks by
+ * their TXG, then by their FBN-ness.  The "FBN-ness" component ensures
+ * that all bookmarks at the same TXG that HAS_FBN are adjacent, which
+ * dsl_bookmark_destroy_sync_impl() depends on.  Note that there may be
+ * multiple bookmarks at the same TXG (with the same FBN-ness).  In this
+ * case we differentiate them by an arbitrary metric (in this case,
+ * their names).
+ */
+static int
+dsl_bookmark_compare(const void *l, const void *r)
+{
+	const dsl_bookmark_node_t *ldbn = l;
+	const dsl_bookmark_node_t *rdbn = r;
+
+	int64_t cmp = TREE_CMP(ldbn->dbn_phys.zbm_creation_txg,
+	    rdbn->dbn_phys.zbm_creation_txg);
+	if (likely(cmp))
+		return (cmp);
+	cmp = TREE_CMP((ldbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN),
+	    (rdbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN));
+	if (likely(cmp))
+		return (cmp);
+	cmp = strcmp(ldbn->dbn_name, rdbn->dbn_name);
+	return (TREE_ISIGN(cmp));
+}
+
+/*
+ * Cache this (head) dataset's bookmarks in the ds_bookmarks AVL tree.
+ */
+int
+dsl_bookmark_init_ds(dsl_dataset_t *ds)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	objset_t *mos = dp->dp_meta_objset;
+
+	ASSERT(!ds->ds_is_snapshot);
+
+	avl_create(&ds->ds_bookmarks, dsl_bookmark_compare,
+	    sizeof (dsl_bookmark_node_t),
+	    offsetof(dsl_bookmark_node_t, dbn_node));
+
+	if (!dsl_dataset_is_zapified(ds))
+		return (0);
+
+	int zaperr = zap_lookup(mos, ds->ds_object, DS_FIELD_BOOKMARK_NAMES,
+	    sizeof (ds->ds_bookmarks_obj), 1, &ds->ds_bookmarks_obj);
+	if (zaperr == ENOENT)
+		return (0);
+	if (zaperr != 0)
+		return (zaperr);
+
+	if (ds->ds_bookmarks_obj == 0)
+		return (0);
+
 	int err = 0;
 	zap_cursor_t zc;
 	zap_attribute_t attr;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
-	uint64_t bmark_zapobj = ds->ds_bookmarks;
-	if (bmark_zapobj == 0)
-		return (0);
-
-	for (zap_cursor_init(&zc, dp->dp_meta_objset, bmark_zapobj);
-	    zap_cursor_retrieve(&zc, &attr) == 0;
+	for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj);
+	    (err = zap_cursor_retrieve(&zc, &attr)) == 0;
 	    zap_cursor_advance(&zc)) {
-		char *bmark_name = attr.za_name;
-		zfs_bookmark_phys_t bmark_phys = { 0 };
+		dsl_bookmark_node_t *dbn =
+		    dsl_bookmark_node_alloc(attr.za_name);
 
-		err = dsl_dataset_bmark_lookup(ds, bmark_name, &bmark_phys);
+		err = dsl_bookmark_lookup_impl(ds,
+		    dbn->dbn_name, &dbn->dbn_phys);
 		ASSERT3U(err, !=, ENOENT);
-		if (err != 0)
+		if (err != 0) {
+			kmem_free(dbn, sizeof (*dbn));
 			break;
-
-		nvlist_t *out_props = fnvlist_alloc();
-		if (nvlist_exists(props,
-		    zfs_prop_to_name(ZFS_PROP_GUID))) {
-			dsl_prop_nvlist_add_uint64(out_props,
-			    ZFS_PROP_GUID, bmark_phys.zbm_guid);
 		}
-		if (nvlist_exists(props,
-		    zfs_prop_to_name(ZFS_PROP_CREATETXG))) {
-			dsl_prop_nvlist_add_uint64(out_props,
-			    ZFS_PROP_CREATETXG, bmark_phys.zbm_creation_txg);
-		}
-		if (nvlist_exists(props,
-		    zfs_prop_to_name(ZFS_PROP_CREATION))) {
-			dsl_prop_nvlist_add_uint64(out_props,
-			    ZFS_PROP_CREATION, bmark_phys.zbm_creation_time);
-		}
-		if (nvlist_exists(props,
-		    zfs_prop_to_name(ZFS_PROP_IVSET_GUID))) {
-			dsl_prop_nvlist_add_uint64(out_props,
-			    ZFS_PROP_IVSET_GUID, bmark_phys.zbm_ivset_guid);
-		}
-
-		fnvlist_add_nvlist(outnvl, bmark_name, out_props);
-		fnvlist_free(out_props);
+		avl_add(&ds->ds_bookmarks, dbn);
 	}
 	zap_cursor_fini(&zc);
+	if (err == ENOENT)
+		err = 0;
 	return (err);
 }
 
+void
+dsl_bookmark_fini_ds(dsl_dataset_t *ds)
+{
+	void *cookie = NULL;
+	dsl_bookmark_node_t *dbn;
+
+	if (ds->ds_is_snapshot)
+		return;
+
+	while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) != NULL) {
+		spa_strfree(dbn->dbn_name);
+		mutex_destroy(&dbn->dbn_lock);
+		kmem_free(dbn, sizeof (*dbn));
+	}
+	avl_destroy(&ds->ds_bookmarks);
+}
+
 /*
  * Retrieve the bookmarks that exist in the specified dataset, and the
  * requested properties of each bookmark.
@@ -359,27 +919,69 @@
 	return (err);
 }
 
+/*
+ * Retrieve all properties for a single bookmark in the given dataset.
+ */
+int
+dsl_get_bookmark_props(const char *dsname, const char *bmname, nvlist_t *props)
+{
+	dsl_pool_t *dp;
+	dsl_dataset_t *ds;
+	zfs_bookmark_phys_t bmark_phys = { 0 };
+	int err;
+
+	err = dsl_pool_hold(dsname, FTAG, &dp);
+	if (err != 0)
+		return (err);
+	err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+	if (err != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (err);
+	}
+
+	err = dsl_bookmark_lookup_impl(ds, bmname, &bmark_phys);
+	if (err != 0)
+		goto out;
+
+	dsl_bookmark_fetch_props(dp, &bmark_phys, NULL, props);
+out:
+	dsl_dataset_rele(ds, FTAG);
+	dsl_pool_rele(dp, FTAG);
+	return (err);
+}
+
 typedef struct dsl_bookmark_destroy_arg {
 	nvlist_t *dbda_bmarks;
 	nvlist_t *dbda_success;
 	nvlist_t *dbda_errors;
 } dsl_bookmark_destroy_arg_t;
 
-static int
-dsl_dataset_bookmark_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
+static void
+dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
+    dmu_tx_t *tx)
 {
-	int err;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	uint64_t bmark_zapobj = ds->ds_bookmarks;
+	uint64_t bmark_zapobj = ds->ds_bookmarks_obj;
 	matchtype_t mt = 0;
 	uint64_t int_size, num_ints;
+	/*
+	 * 'search' must be zeroed so that dbn_flags (which is used in
+	 * dsl_bookmark_compare()) will be zeroed even if the on-disk
+	 * (in ZAP) bookmark is shorter than offsetof(dbn_flags).
+	 */
+	dsl_bookmark_node_t search = { 0 };
+	char realname[ZFS_MAX_DATASET_NAME_LEN];
+
+	/*
+	 * Find the real name of this bookmark, which may be different
+	 * from the given name if the dataset is case-insensitive.  Then
+	 * use the real name to find the node in the ds_bookmarks AVL tree.
+	 */
 
 	if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_NORMALIZE;
 
-	err = zap_length(mos, bmark_zapobj, name, &int_size, &num_ints);
-	if (err != 0)
-		return (err);
+	VERIFY0(zap_length(mos, bmark_zapobj, name, &int_size, &num_ints));
 
 	ASSERT3U(int_size, ==, sizeof (uint64_t));
 
@@ -387,8 +989,70 @@
 		spa_feature_decr(dmu_objset_spa(mos),
 		    SPA_FEATURE_BOOKMARK_V2, tx);
 	}
+	VERIFY0(zap_lookup_norm(mos, bmark_zapobj, name, sizeof (uint64_t),
+	    num_ints, &search.dbn_phys, mt, realname, sizeof (realname), NULL));
 
-	return (zap_remove_norm(mos, bmark_zapobj, name, mt, tx));
+	search.dbn_name = realname;
+	dsl_bookmark_node_t *dbn = avl_find(&ds->ds_bookmarks, &search, NULL);
+	ASSERT(dbn != NULL);
+
+	if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
+		/*
+		 * If this bookmark HAS_FBN, and it is before the most
+		 * recent snapshot, then its TXG is a key in the head's
+		 * deadlist (and all clones' heads' deadlists).  If this is
+		 * the last thing keeping the key (i.e. there are no more
+		 * bookmarks with HAS_FBN at this TXG, and there is no
+		 * snapshot at this TXG), then remove the key.
+		 *
+		 * Note that this algorithm depends on ds_bookmarks being
+		 * sorted such that all bookmarks at the same TXG with
+		 * HAS_FBN are adjacent (with no non-HAS_FBN bookmarks
+		 * at the same TXG in between them).  If this were not
+		 * the case, we would need to examine *all* bookmarks
+		 * at this TXG, rather than just the adjacent ones.
+		 */
+
+		dsl_bookmark_node_t *dbn_prev =
+		    AVL_PREV(&ds->ds_bookmarks, dbn);
+		dsl_bookmark_node_t *dbn_next =
+		    AVL_NEXT(&ds->ds_bookmarks, dbn);
+
+		boolean_t more_bookmarks_at_this_txg =
+		    (dbn_prev != NULL && dbn_prev->dbn_phys.zbm_creation_txg ==
+		    dbn->dbn_phys.zbm_creation_txg &&
+		    (dbn_prev->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) ||
+		    (dbn_next != NULL && dbn_next->dbn_phys.zbm_creation_txg ==
+		    dbn->dbn_phys.zbm_creation_txg &&
+		    (dbn_next->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN));
+
+		if (!(dbn->dbn_phys.zbm_flags & ZBM_FLAG_SNAPSHOT_EXISTS) &&
+		    !more_bookmarks_at_this_txg &&
+		    dbn->dbn_phys.zbm_creation_txg <
+		    dsl_dataset_phys(ds)->ds_prev_snap_txg) {
+			dsl_dir_remove_clones_key(ds->ds_dir,
+			    dbn->dbn_phys.zbm_creation_txg, tx);
+			dsl_deadlist_remove_key(&ds->ds_deadlist,
+			    dbn->dbn_phys.zbm_creation_txg, tx);
+		}
+
+		spa_feature_decr(dmu_objset_spa(mos),
+		    SPA_FEATURE_BOOKMARK_WRITTEN, tx);
+	}
+
+	if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+		VERIFY0(dmu_object_free(mos,
+		    dbn->dbn_phys.zbm_redaction_obj, tx));
+		spa_feature_decr(dmu_objset_spa(mos),
+		    SPA_FEATURE_REDACTION_BOOKMARKS, tx);
+	}
+
+	avl_remove(&ds->ds_bookmarks, dbn);
+	spa_strfree(dbn->dbn_name);
+	mutex_destroy(&dbn->dbn_lock);
+	kmem_free(dbn, sizeof (*dbn));
+
+	VERIFY0(zap_remove_norm(mos, bmark_zapobj, name, mt, tx));
 }
 
 static int
@@ -419,7 +1083,7 @@
 			continue;
 		}
 		if (error == 0) {
-			error = dsl_dataset_bmark_lookup(ds, shortname, &bm);
+			error = dsl_bookmark_lookup_impl(ds, shortname, &bm);
 			dsl_dataset_rele(ds, FTAG);
 			if (error == ESRCH) {
 				/*
@@ -428,6 +1092,20 @@
 				 */
 				continue;
 			}
+			if (error == 0 && bm.zbm_redaction_obj != 0) {
+				redaction_list_t *rl = NULL;
+				error = dsl_redaction_list_hold_obj(tx->tx_pool,
+				    bm.zbm_redaction_obj, FTAG, &rl);
+				if (error == ENOENT) {
+					error = 0;
+				} else if (error == 0 &&
+				    dsl_redaction_list_long_held(rl)) {
+					error = SET_ERROR(EBUSY);
+				}
+				if (rl != NULL) {
+					dsl_redaction_list_rele(rl, FTAG);
+				}
+			}
 		}
 		if (error == 0) {
 			if (dmu_tx_is_syncing(tx)) {
@@ -457,18 +1135,17 @@
 
 		VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair),
 		    &ds, FTAG, &shortname));
-		VERIFY0(dsl_dataset_bookmark_remove(ds, shortname, tx));
+		dsl_bookmark_destroy_sync_impl(ds, shortname, tx);
 
 		/*
 		 * If all of this dataset's bookmarks have been destroyed,
 		 * free the zap object and decrement the feature's use count.
 		 */
-		VERIFY0(zap_count(mos, ds->ds_bookmarks,
-		    &zap_cnt));
+		VERIFY0(zap_count(mos, ds->ds_bookmarks_obj, &zap_cnt));
 		if (zap_cnt == 0) {
 			dmu_buf_will_dirty(ds->ds_dbuf, tx);
-			VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
-			ds->ds_bookmarks = 0;
+			VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx));
+			ds->ds_bookmarks_obj = 0;
 			spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
 			VERIFY0(zap_remove(mos, ds->ds_object,
 			    DS_FIELD_BOOKMARK_NAMES, tx));
@@ -503,3 +1180,553 @@
 	fnvlist_free(dbda.dbda_success);
 	return (rv);
 }
+
+/* Return B_TRUE if there are any long holds on this dataset. */
+boolean_t
+dsl_redaction_list_long_held(redaction_list_t *rl)
+{
+	return (!zfs_refcount_is_zero(&rl->rl_longholds));
+}
+
+void
+dsl_redaction_list_long_hold(dsl_pool_t *dp, redaction_list_t *rl, void *tag)
+{
+	ASSERT(dsl_pool_config_held(dp));
+	(void) zfs_refcount_add(&rl->rl_longholds, tag);
+}
+
+void
+dsl_redaction_list_long_rele(redaction_list_t *rl, void *tag)
+{
+	(void) zfs_refcount_remove(&rl->rl_longholds, tag);
+}
+
+static void
+redaction_list_evict_sync(void *rlu)
+{
+	redaction_list_t *rl = rlu;
+	zfs_refcount_destroy(&rl->rl_longholds);
+
+	kmem_free(rl, sizeof (redaction_list_t));
+}
+
+void
+dsl_redaction_list_rele(redaction_list_t *rl, void *tag)
+{
+	dmu_buf_rele(rl->rl_dbuf, tag);
+}
+
+int
+dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, void *tag,
+    redaction_list_t **rlp)
+{
+	objset_t *mos = dp->dp_meta_objset;
+	dmu_buf_t *dbuf;
+	redaction_list_t *rl;
+	int err;
+
+	ASSERT(dsl_pool_config_held(dp));
+
+	err = dmu_bonus_hold(mos, rlobj, tag, &dbuf);
+	if (err != 0)
+		return (err);
+
+	rl = dmu_buf_get_user(dbuf);
+	if (rl == NULL) {
+		redaction_list_t *winner = NULL;
+
+		rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
+		rl->rl_dbuf = dbuf;
+		rl->rl_object = rlobj;
+		rl->rl_phys = dbuf->db_data;
+		rl->rl_mos = dp->dp_meta_objset;
+		zfs_refcount_create(&rl->rl_longholds);
+		dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
+		    &rl->rl_dbuf);
+		if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
+			kmem_free(rl, sizeof (*rl));
+			rl = winner;
+		}
+	}
+	*rlp = rl;
+	return (0);
+}
+
+/*
+ * Snapshot ds is being destroyed.
+ *
+ * Adjust the "freed_before_next" of any bookmarks between this snap
+ * and the previous snapshot, because their "next snapshot" is changing.
+ *
+ * If there are any bookmarks with HAS_FBN at this snapshot, remove
+ * their HAS_SNAP flag (note: there can be at most one snapshot of
+ * each filesystem at a given txg), and return B_TRUE.  In this case
+ * the caller can not remove the key in the deadlist at this TXG, because
+ * the HAS_FBN bookmarks require the key be there.
+ *
+ * Returns B_FALSE if there are no bookmarks with HAS_FBN at this
+ * snapshot's TXG.  In this case the caller can remove the key in the
+ * deadlist at this TXG.
+ */
+boolean_t
+dsl_bookmark_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+	dsl_dataset_t *head, *next;
+	VERIFY0(dsl_dataset_hold_obj(dp,
+	    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &head));
+	VERIFY0(dsl_dataset_hold_obj(dp,
+	    dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &next));
+
+	/*
+	 * Find the first bookmark that HAS_FBN at or after the
+	 * previous snapshot.
+	 */
+	dsl_bookmark_node_t search = { 0 };
+	avl_index_t idx;
+	search.dbn_phys.zbm_creation_txg =
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
+	search.dbn_phys.zbm_flags = ZBM_FLAG_HAS_FBN;
+	/*
+	 * The empty-string name can't be in the AVL, and it compares
+	 * before any entries with this TXG.
+	 */
+	search.dbn_name = "";
+	VERIFY3P(avl_find(&head->ds_bookmarks, &search, &idx), ==, NULL);
+	dsl_bookmark_node_t *dbn =
+	    avl_nearest(&head->ds_bookmarks, idx, AVL_AFTER);
+
+	/*
+	 * Iterate over all bookmarks that are at or after the previous
+	 * snapshot, and before this (being deleted) snapshot.  Adjust
+	 * their FBN based on their new next snapshot.
+	 */
+	for (; dbn != NULL && dbn->dbn_phys.zbm_creation_txg <
+	    dsl_dataset_phys(ds)->ds_creation_txg;
+	    dbn = AVL_NEXT(&head->ds_bookmarks, dbn)) {
+		if (!(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN))
+			continue;
+		/*
+		 * Increase our FBN by the amount of space that was live
+		 * (referenced) at the time of this bookmark (i.e.
+		 * birth <= zbm_creation_txg), and killed between this
+		 * (being deleted) snapshot and the next snapshot (i.e.
+		 * on the next snapshot's deadlist).  (Space killed before
+		 * this are already on our FBN.)
+		 */
+		uint64_t referenced, compressed, uncompressed;
+		dsl_deadlist_space_range(&next->ds_deadlist,
+		    0, dbn->dbn_phys.zbm_creation_txg,
+		    &referenced, &compressed, &uncompressed);
+		dbn->dbn_phys.zbm_referenced_freed_before_next_snap +=
+		    referenced;
+		dbn->dbn_phys.zbm_compressed_freed_before_next_snap +=
+		    compressed;
+		dbn->dbn_phys.zbm_uncompressed_freed_before_next_snap +=
+		    uncompressed;
+		VERIFY0(zap_update(dp->dp_meta_objset, head->ds_bookmarks_obj,
+		    dbn->dbn_name, sizeof (uint64_t),
+		    sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
+		    &dbn->dbn_phys, tx));
+	}
+	dsl_dataset_rele(next, FTAG);
+
+	/*
+	 * There may be several bookmarks at this txg (the TXG of the
+	 * snapshot being deleted).  We need to clear the SNAPSHOT_EXISTS
+	 * flag on all of them, and return TRUE if there is at least 1
+	 * bookmark here with HAS_FBN (thus preventing the deadlist
+	 * key from being removed).
+	 */
+	boolean_t rv = B_FALSE;
+	for (; dbn != NULL && dbn->dbn_phys.zbm_creation_txg ==
+	    dsl_dataset_phys(ds)->ds_creation_txg;
+	    dbn = AVL_NEXT(&head->ds_bookmarks, dbn)) {
+		if (!(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
+			ASSERT(!(dbn->dbn_phys.zbm_flags &
+			    ZBM_FLAG_SNAPSHOT_EXISTS));
+			continue;
+		}
+		ASSERT(dbn->dbn_phys.zbm_flags & ZBM_FLAG_SNAPSHOT_EXISTS);
+		dbn->dbn_phys.zbm_flags &= ~ZBM_FLAG_SNAPSHOT_EXISTS;
+		VERIFY0(zap_update(dp->dp_meta_objset, head->ds_bookmarks_obj,
+		    dbn->dbn_name, sizeof (uint64_t),
+		    sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
+		    &dbn->dbn_phys, tx));
+		rv = B_TRUE;
+	}
+	dsl_dataset_rele(head, FTAG);
+	return (rv);
+}
+
+/*
+ * A snapshot is being created of this (head) dataset.
+ *
+ * We don't keep keys in the deadlist for the most recent snapshot, or any
+ * bookmarks at or after it, because there can't be any blocks on the
+ * deadlist in this range.  Now that the most recent snapshot is after
+ * all bookmarks, we need to add these keys.  Note that the caller always
+ * adds a key at the previous snapshot, so we only add keys for bookmarks
+ * after that.
+ */
+void
+dsl_bookmark_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	uint64_t last_key_added = UINT64_MAX;
+	for (dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
+	    dbn != NULL && dbn->dbn_phys.zbm_creation_txg >
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
+	    dbn = AVL_PREV(&ds->ds_bookmarks, dbn)) {
+		uint64_t creation_txg = dbn->dbn_phys.zbm_creation_txg;
+		ASSERT3U(creation_txg, <=, last_key_added);
+		/*
+		 * Note, there may be multiple bookmarks at this TXG,
+		 * and we only want to add the key for this TXG once.
+		 * The ds_bookmarks AVL is sorted by TXG, so we will visit
+		 * these bookmarks in sequence.
+		 */
+		if ((dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) &&
+		    creation_txg != last_key_added) {
+			dsl_deadlist_add_key(&ds->ds_deadlist,
+			    creation_txg, tx);
+			last_key_added = creation_txg;
+		}
+	}
+}
+
+/*
+ * The next snapshot of the origin dataset has changed, due to
+ * promote or clone swap.  If there are any bookmarks at this dataset,
+ * we need to update their zbm_*_freed_before_next_snap to reflect this.
+ * The head dataset has the relevant bookmarks in ds_bookmarks.
+ */
+void
+dsl_bookmark_next_changed(dsl_dataset_t *head, dsl_dataset_t *origin,
+    dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+
+	/*
+	 * Find the first bookmark that HAS_FBN at the origin snapshot.
+	 */
+	dsl_bookmark_node_t search = { 0 };
+	avl_index_t idx;
+	search.dbn_phys.zbm_creation_txg =
+	    dsl_dataset_phys(origin)->ds_creation_txg;
+	search.dbn_phys.zbm_flags = ZBM_FLAG_HAS_FBN;
+	/*
+	 * The empty-string name can't be in the AVL, and it compares
+	 * before any entries with this TXG.
+	 */
+	search.dbn_name = "";
+	VERIFY3P(avl_find(&head->ds_bookmarks, &search, &idx), ==, NULL);
+	dsl_bookmark_node_t *dbn =
+	    avl_nearest(&head->ds_bookmarks, idx, AVL_AFTER);
+
+	/*
+	 * Iterate over all bookmarks that are at the origin txg.
+	 * Adjust their FBN based on their new next snapshot.
+	 */
+	for (; dbn != NULL && dbn->dbn_phys.zbm_creation_txg ==
+	    dsl_dataset_phys(origin)->ds_creation_txg &&
+	    (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN);
+	    dbn = AVL_NEXT(&head->ds_bookmarks, dbn)) {
+
+		/*
+		 * Bookmark is at the origin, therefore its
+		 * "next dataset" is changing, so we need
+		 * to reset its FBN by recomputing it in
+		 * dsl_bookmark_set_phys().
+		 */
+		ASSERT3U(dbn->dbn_phys.zbm_guid, ==,
+		    dsl_dataset_phys(origin)->ds_guid);
+		ASSERT3U(dbn->dbn_phys.zbm_referenced_bytes_refd, ==,
+		    dsl_dataset_phys(origin)->ds_referenced_bytes);
+		ASSERT(dbn->dbn_phys.zbm_flags &
+		    ZBM_FLAG_SNAPSHOT_EXISTS);
+		/*
+		 * Save and restore the zbm_redaction_obj, which
+		 * is zeroed by dsl_bookmark_set_phys().
+		 */
+		uint64_t redaction_obj =
+		    dbn->dbn_phys.zbm_redaction_obj;
+		dsl_bookmark_set_phys(&dbn->dbn_phys, origin);
+		dbn->dbn_phys.zbm_redaction_obj = redaction_obj;
+
+		VERIFY0(zap_update(dp->dp_meta_objset, head->ds_bookmarks_obj,
+		    dbn->dbn_name, sizeof (uint64_t),
+		    sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
+		    &dbn->dbn_phys, tx));
+	}
+}
+
+/*
+ * This block is no longer referenced by this (head) dataset.
+ *
+ * Adjust the FBN of any bookmarks that reference this block, whose "next"
+ * is the head dataset.
+ */
+void
+dsl_bookmark_block_killed(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	(void) tx;
+
+	/*
+	 * Iterate over bookmarks whose "next" is the head dataset.
+	 */
+	for (dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
+	    dbn != NULL && dbn->dbn_phys.zbm_creation_txg >=
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
+	    dbn = AVL_PREV(&ds->ds_bookmarks, dbn)) {
+		/*
+		 * If the block was live (referenced) at the time of this
+		 * bookmark, add its space to the bookmark's FBN.
+		 */
+		if (bp->blk_birth <= dbn->dbn_phys.zbm_creation_txg &&
+		    (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
+			mutex_enter(&dbn->dbn_lock);
+			dbn->dbn_phys.zbm_referenced_freed_before_next_snap +=
+			    bp_get_dsize_sync(dsl_dataset_get_spa(ds), bp);
+			dbn->dbn_phys.zbm_compressed_freed_before_next_snap +=
+			    BP_GET_PSIZE(bp);
+			dbn->dbn_phys.zbm_uncompressed_freed_before_next_snap +=
+			    BP_GET_UCSIZE(bp);
+			/*
+			 * Changing the ZAP object here would be too
+			 * expensive.  Also, we may be called from the zio
+			 * interrupt thread, which can't block on i/o.
+			 * Therefore, we mark this bookmark as dirty and
+			 * modify the ZAP once per txg, in
+			 * dsl_bookmark_sync_done().
+			 */
+			dbn->dbn_dirty = B_TRUE;
+			mutex_exit(&dbn->dbn_lock);
+		}
+	}
+}
+
+void
+dsl_bookmark_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+
+	if (dsl_dataset_is_snapshot(ds))
+		return;
+
+	/*
+	 * We only dirty bookmarks that are at or after the most recent
+	 * snapshot.  We can't create snapshots between
+	 * dsl_bookmark_block_killed() and dsl_bookmark_sync_done(), so we
+	 * don't need to look at any bookmarks before ds_prev_snap_txg.
+	 */
+	for (dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
+	    dbn != NULL && dbn->dbn_phys.zbm_creation_txg >=
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
+	    dbn = AVL_PREV(&ds->ds_bookmarks, dbn)) {
+		if (dbn->dbn_dirty) {
+			/*
+			 * We only dirty nodes with HAS_FBN, therefore
+			 * we can always use the current bookmark struct size.
+			 */
+			ASSERT(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN);
+			VERIFY0(zap_update(dp->dp_meta_objset,
+			    ds->ds_bookmarks_obj,
+			    dbn->dbn_name, sizeof (uint64_t),
+			    sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
+			    &dbn->dbn_phys, tx));
+			dbn->dbn_dirty = B_FALSE;
+		}
+	}
+#ifdef ZFS_DEBUG
+	for (dsl_bookmark_node_t *dbn = avl_first(&ds->ds_bookmarks);
+	    dbn != NULL; dbn = AVL_NEXT(&ds->ds_bookmarks, dbn)) {
+		ASSERT(!dbn->dbn_dirty);
+	}
+#endif
+}
+
+/*
+ * Return the TXG of the most recent bookmark (or 0 if there are no bookmarks).
+ */
+uint64_t
+dsl_bookmark_latest_txg(dsl_dataset_t *ds)
+{
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
+	dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
+	if (dbn == NULL)
+		return (0);
+	return (dbn->dbn_phys.zbm_creation_txg);
+}
+
+/*
+ * Compare the redact_block_phys_t to the bookmark. If the last block in the
+ * redact_block_phys_t is before the bookmark, return -1.  If the first block in
+ * the redact_block_phys_t is after the bookmark, return 1.  Otherwise, the
+ * bookmark is inside the range of the redact_block_phys_t, and we return 0.
+ */
+static int
+redact_block_zb_compare(redact_block_phys_t *first,
+    zbookmark_phys_t *second)
+{
+	/*
+	 * If the block_phys is for a previous object, or the last block in the
+	 * block_phys is strictly before the block in the bookmark, the
+	 * block_phys is earlier.
+	 */
+	if (first->rbp_object < second->zb_object ||
+	    (first->rbp_object == second->zb_object &&
+	    first->rbp_blkid + (redact_block_get_count(first) - 1) <
+	    second->zb_blkid)) {
+		return (-1);
+	}
+
+	/*
+	 * If the bookmark is for a previous object, or the block in the
+	 * bookmark is strictly before the first block in the block_phys, the
+	 * bookmark is earlier.
+	 */
+	if (first->rbp_object > second->zb_object ||
+	    (first->rbp_object == second->zb_object &&
+	    first->rbp_blkid > second->zb_blkid)) {
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Traverse the redaction list in the provided object, and call the callback for
+ * each entry we find. Don't call the callback for any records before resume.
+ */
+int
+dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
+    rl_traverse_callback_t cb, void *arg)
+{
+	objset_t *mos = rl->rl_mos;
+	int err = 0;
+
+	if (rl->rl_phys->rlp_last_object != UINT64_MAX ||
+	    rl->rl_phys->rlp_last_blkid != UINT64_MAX) {
+		/*
+		 * When we finish a send, we update the last object and offset
+		 * to UINT64_MAX.  If a send fails partway through, the last
+		 * object and offset will have some other value, indicating how
+		 * far the send got. The redaction list must be complete before
+		 * it can be traversed, so return EINVAL if the last object and
+		 * blkid are not set to UINT64_MAX.
+		 */
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * This allows us to skip the binary search and resume checking logic
+	 * below, if we're not resuming a redacted send.
+	 */
+	if (ZB_IS_ZERO(resume))
+		resume = NULL;
+
+	/*
+	 * Binary search for the point to resume from.
+	 */
+	uint64_t maxidx = rl->rl_phys->rlp_num_entries - 1;
+	uint64_t minidx = 0;
+	while (resume != NULL && maxidx > minidx) {
+		redact_block_phys_t rbp = { 0 };
+		ASSERT3U(maxidx, >, minidx);
+		uint64_t mididx = minidx + ((maxidx - minidx) / 2);
+		err = dmu_read(mos, rl->rl_object, mididx * sizeof (rbp),
+		    sizeof (rbp), &rbp, DMU_READ_NO_PREFETCH);
+		if (err != 0)
+			break;
+
+		int cmp = redact_block_zb_compare(&rbp, resume);
+
+		if (cmp == 0) {
+			minidx = mididx;
+			break;
+		} else if (cmp > 0) {
+			maxidx =
+			    (mididx == minidx ? minidx : mididx - 1);
+		} else {
+			minidx = mididx + 1;
+		}
+	}
+
+	unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
+	redact_block_phys_t *buf = zio_data_buf_alloc(bufsize);
+
+	unsigned int entries_per_buf = bufsize / sizeof (redact_block_phys_t);
+	uint64_t start_block = minidx / entries_per_buf;
+	err = dmu_read(mos, rl->rl_object, start_block * bufsize, bufsize, buf,
+	    DMU_READ_PREFETCH);
+
+	for (uint64_t curidx = minidx;
+	    err == 0 && curidx < rl->rl_phys->rlp_num_entries;
+	    curidx++) {
+		/*
+		 * We read in the redaction list one block at a time.  Once we
+		 * finish with all the entries in a given block, we read in a
+		 * new one.  The predictive prefetcher will take care of any
+		 * prefetching, and this code shouldn't be the bottleneck, so we
+		 * don't need to do manual prefetching.
+		 */
+		if (curidx % entries_per_buf == 0) {
+			err = dmu_read(mos, rl->rl_object, curidx *
+			    sizeof (*buf), bufsize, buf,
+			    DMU_READ_PREFETCH);
+			if (err != 0)
+				break;
+		}
+		redact_block_phys_t *rb = &buf[curidx % entries_per_buf];
+		/*
+		 * If resume is non-null, we should either not send the data, or
+		 * null out resume so we don't have to keep doing these
+		 * comparisons.
+		 */
+		if (resume != NULL) {
+			/*
+			 * It is possible that after the binary search we got
+			 * a record before the resume point. There's two cases
+			 * where this can occur. If the record is the last
+			 * redaction record, and the resume point is after the
+			 * end of the redacted data, curidx will be the last
+			 * redaction record. In that case, the loop will end
+			 * after this iteration. The second case is if the
+			 * resume point is between two redaction records, the
+			 * binary search can return either the record before
+			 * or after the resume point. In that case, the next
+			 * iteration will be greater than the resume point.
+			 */
+			if (redact_block_zb_compare(rb, resume) < 0) {
+				ASSERT3U(curidx, ==, minidx);
+				continue;
+			} else {
+				/*
+				 * If the place to resume is in the middle of
+				 * the range described by this
+				 * redact_block_phys, then modify the
+				 * redact_block_phys in memory so we generate
+				 * the right records.
+				 */
+				if (resume->zb_object == rb->rbp_object &&
+				    resume->zb_blkid > rb->rbp_blkid) {
+					uint64_t diff = resume->zb_blkid -
+					    rb->rbp_blkid;
+					rb->rbp_blkid = resume->zb_blkid;
+					redact_block_set_count(rb,
+					    redact_block_get_count(rb) - diff);
+				}
+				resume = NULL;
+			}
+		}
+
+		if (cb(rb, arg) != 0) {
+			err = EINTR;
+			break;
+		}
+	}
+
+	zio_data_buf_free(buf, bufsize);
+	return (err);
+}

diff --git a/zfs/module/zfs/dsl_crypt.c b/zfs/module/zfs/dsl_crypt.c
index 581876d..872174f 100644
--- a/zfs/module/zfs/dsl_crypt.c
+++ b/zfs/module/zfs/dsl_crypt.c

@@ -107,24 +107,17 @@
 	kmem_free(wkey, sizeof (dsl_wrapping_key_t));
 }
 
-static int
+static void
 dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat,
     uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out)
 {
-	int ret;
 	dsl_wrapping_key_t *wkey;
 
 	/* allocate the wrapping key */
 	wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP);
-	if (!wkey)
-		return (SET_ERROR(ENOMEM));
 
 	/* allocate and initialize the underlying crypto key */
 	wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP);
-	if (!wkey->wk_key.ck_data) {
-		ret = ENOMEM;
-		goto error;
-	}
 
 	wkey->wk_key.ck_format = CRYPTO_KEY_RAW;
 	wkey->wk_key.ck_length = CRYPTO_BYTES2BITS(WRAPPING_KEY_LEN);
@@ -137,13 +130,6 @@
 	wkey->wk_iters = iters;
 
 	*wkey_out = wkey;
-	return (0);
-
-error:
-	dsl_wrapping_key_free(wkey);
-
-	*wkey_out = NULL;
-	return (ret);
 }
 
 int
@@ -161,11 +147,6 @@
 	char *keylocation = NULL;
 
 	dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP);
-	if (!dcp) {
-		ret = SET_ERROR(ENOMEM);
-		goto error;
-	}
-
 	dcp->cp_cmd = cmd;
 
 	/* get relevant arguments from the nvlists */
@@ -234,11 +215,8 @@
 	/* create the wrapping key from the raw data */
 	if (wkeydata != NULL) {
 		/* create the wrapping key with the verified parameters */
-		ret = dsl_wrapping_key_create(wkeydata, keyformat, salt,
+		dsl_wrapping_key_create(wkeydata, keyformat, salt,
 		    iters, &wkey);
-		if (ret != 0)
-			goto error;
-
 		dcp->cp_wkey = wkey;
 	}
 
@@ -257,11 +235,7 @@
 	return (0);
 
 error:
-	if (wkey != NULL)
-		dsl_wrapping_key_free(wkey);
-	if (dcp != NULL)
-		kmem_free(dcp, sizeof (dsl_crypto_params_t));
-
+	kmem_free(dcp, sizeof (dsl_crypto_params_t));
 	*dcp_out = NULL;
 	return (ret);
 }
@@ -365,7 +339,7 @@
 	    DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj));
 }
 
-int
+static int
 dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version)
 {
 	*version = 0;
@@ -561,8 +535,6 @@
 
 	/* allocate and initialize the key */
 	dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP);
-	if (!dck)
-		return (SET_ERROR(ENOMEM));
 
 	/* fetch all of the values we need from the ZAP */
 	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
@@ -854,7 +826,7 @@
 	dsl_pool_rele(dp, FTAG);
 
 	/* create any zvols under this ds */
-	zvol_create_minors(dp->dp_spa, dsname, B_TRUE);
+	zvol_create_minors_recursive(dsname);
 
 	return (0);
 
@@ -921,7 +893,7 @@
 	 * Wait for any outstanding txg IO to complete, releasing any
 	 * remaining references on the wkey.
 	 */
-	if (spa_mode(spa) != FREAD)
+	if (spa_mode(spa) != SPA_MODE_READ)
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 
 	spa_close(spa, FTAG);
@@ -2035,14 +2007,6 @@
 	if (ret != 0)
 		return (ret);
 
-	/*
-	 * Useraccounting is not portable and must be done with the keys loaded.
-	 * Therefore, whenever we do any kind of receive the useraccounting
-	 * must not be present.
-	 */
-	ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE);
-	ASSERT0(os->os_flags & OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE);
-
 	mdn = DMU_META_DNODE(os);
 
 	/*
@@ -2134,6 +2098,7 @@
 	arc_release(os->os_phys_buf, &os->os_phys_buf);
 	bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN);
 	bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN);
+	os->os_flags &= ~OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 	os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
 
 	/* set metadnode compression and checksum */
@@ -2155,9 +2120,6 @@
 		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 		dsl_dataset_sync(ds, zio, tx);
 		VERIFY0(zio_wait(zio));
-
-		/* dsl_dataset_sync_done will drop this reference. */
-		dmu_buf_add_ref(ds->ds_dbuf, ds);
 		dsl_dataset_sync_done(ds, tx);
 	}
 }
@@ -2328,7 +2290,7 @@
 	    iters, tx);
 }
 
-int
+static int
 dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx)
 {
 	int ret;
@@ -2369,7 +2331,7 @@
 	return (ret);
 }
 
-void
+static void
 dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_crypto_recv_key_arg_t *dcrka = arg;
@@ -2406,11 +2368,11 @@
 }
 
 int
-dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, uint64_t from_ivset_guid,
+dsl_crypto_populate_key_nvlist(objset_t *os, uint64_t from_ivset_guid,
     nvlist_t **nvl_out)
 {
 	int ret;
-	objset_t *os;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	dnode_t *mdn;
 	uint64_t rddobj;
 	nvlist_t *nvl = NULL;
@@ -2428,12 +2390,9 @@
 
 	ASSERT(dckobj != 0);
 
-	VERIFY0(dmu_objset_from_ds(ds, &os));
 	mdn = DMU_META_DNODE(os);
 
-	ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
-	if (ret != 0)
-		goto error;
+	nvl = fnvlist_alloc();
 
 	/* lookup values from the DSL Crypto Key */
 	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
@@ -2710,6 +2669,7 @@
 	objset_phys_t *osp = buf;
 	uint8_t portable_mac[ZIO_OBJSET_MAC_LEN];
 	uint8_t local_mac[ZIO_OBJSET_MAC_LEN];
+	const uint8_t zeroed_mac[ZIO_OBJSET_MAC_LEN] = {0};
 
 	/* look up the key from the spa's keystore */
 	ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
@@ -2732,10 +2692,24 @@
 		return (0);
 	}
 
-	if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 ||
-	    bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
-		abd_return_buf(abd, buf, datalen);
-		return (SET_ERROR(ECKSUM));
+	if (memcmp(portable_mac, osp->os_portable_mac,
+	    ZIO_OBJSET_MAC_LEN) != 0 ||
+	    memcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
+		/*
+		 * If the MAC is zeroed out, we failed to decrypt it.
+		 * This should only arise, at least on Linux,
+		 * if we hit edge case handling for useraccounting, since we
+		 * shouldn't get here without bailing out on error earlier
+		 * otherwise.
+		 *
+		 * So if we're in that case, we can just fall through and
+		 * special-casing noticing that it's zero will handle it
+		 * elsewhere, since we can just regenerate it.
+		 */
+		if (memcmp(local_mac, zeroed_mac, ZIO_OBJSET_MAC_LEN) != 0) {
+			abd_return_buf(abd, buf, datalen);
+			return (SET_ERROR(ECKSUM));
+		}
 	}
 
 	abd_return_buf(abd, buf, datalen);
@@ -2895,8 +2869,5 @@
 	return (ret);
 }
 
-#if defined(_KERNEL)
-module_param(zfs_disable_ivset_guid_check, int, 0644);
-MODULE_PARM_DESC(zfs_disable_ivset_guid_check,
+ZFS_MODULE_PARAM(zfs, zfs_, disable_ivset_guid_check, INT, ZMOD_RW,
 	"Set to allow raw receives without IVset guids");
-#endif

diff --git a/zfs/module/zfs/dsl_dataset.c b/zfs/module/zfs/dsl_dataset.c
index 9de51db..979a89b 100644
--- a/zfs/module/zfs/dsl_dataset.c
+++ b/zfs/module/zfs/dsl_dataset.c

@@ -21,13 +21,19 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 RackTop Systems.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2020 The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ *     under sponsorship from the FreeBSD Foundation.
  */
 
 #include <sys/dmu_objset.h>
@@ -57,11 +63,14 @@
 #include <sys/dsl_userhold.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/policy.h>
+#include <sys/dmu_send.h>
 #include <sys/dmu_recv.h>
 #include <sys/zio_compress.h>
 #include <zfs_fletcher.h>
 #include <sys/zio_checksum.h>
 
+#include "zmoddbg.h"
+
 /*
  * The SPA supports block sizes up to 16MB.  However, very large blocks
  * can have an impact on i/o latency (e.g. tying up a spinning disk for
@@ -72,6 +81,7 @@
  * of this setting.
  */
 int zfs_max_recordsize = 1 * 1024 * 1024;
+int zfs_allow_redacted_dataset_mount = 0;
 
 #define	SWITCH64(x, y) \
 	{ \
@@ -82,8 +92,6 @@
 
 #define	DS_REF_MAX	(1ULL << 62)
 
-extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
-
 static void dsl_dataset_set_remap_deadlist_object(dsl_dataset_t *ds,
     uint64_t obj, dmu_tx_t *tx);
 static void dsl_dataset_unset_remap_deadlist_object(dsl_dataset_t *ds,
@@ -120,18 +128,18 @@
 void
 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 {
-	int used, compressed, uncompressed;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	int used = bp_get_dsize_sync(spa, bp);
+	int compressed = BP_GET_PSIZE(bp);
+	int uncompressed = BP_GET_UCSIZE(bp);
 	int64_t delta;
-
-	used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
-	compressed = BP_GET_PSIZE(bp);
-	uncompressed = BP_GET_UCSIZE(bp);
+	spa_feature_t f;
 
 	dprintf_bp(bp, "ds=%p", ds);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* It could have been compressed away to nothing */
-	if (BP_IS_HOLE(bp))
+	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
 		return;
 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 	ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
@@ -155,17 +163,37 @@
 		    (void *)B_TRUE;
 	}
 
-	spa_feature_t f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+
+	f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
 	if (f != SPA_FEATURE_NONE) {
 		ASSERT3S(spa_feature_table[f].fi_type, ==,
 		    ZFEATURE_TYPE_BOOLEAN);
 		ds->ds_feature_activation[f] = (void *)B_TRUE;
 	}
 
+	f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
+	if (f != SPA_FEATURE_NONE) {
+		ASSERT3S(spa_feature_table[f].fi_type, ==,
+		    ZFEATURE_TYPE_BOOLEAN);
+		ds->ds_feature_activation[f] = (void *)B_TRUE;
+	}
+
+	/*
+	 * Track block for livelist, but ignore embedded blocks because
+	 * they do not need to be freed.
+	 */
+	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
+	    bp->blk_birth > ds->ds_dir->dd_origin_txg &&
+	    !(BP_IS_EMBEDDED(bp))) {
+		ASSERT(dsl_dir_is_clone(ds->ds_dir));
+		ASSERT(spa_feature_is_enabled(spa,
+		    SPA_FEATURE_LIVELIST));
+		bplist_append(&ds->ds_dir->dd_pending_allocs, bp);
+	}
+
 	mutex_exit(&ds->ds_lock);
-	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
-	    compressed, uncompressed, tx);
-	dsl_dir_transfer_space(ds->ds_dir, used - delta,
+	dsl_dir_diduse_transfer_space(ds->ds_dir, delta,
+	    compressed, uncompressed, used,
 	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
 }
 
@@ -205,8 +233,8 @@
 		DVA_SET_VDEV(dva, vdev);
 		DVA_SET_OFFSET(dva, offset);
 		DVA_SET_ASIZE(dva, size);
-
-		dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, tx);
+		dsl_deadlist_insert(&ds->ds_remap_deadlist, &fakebp, B_FALSE,
+		    tx);
 	}
 }
 
@@ -220,7 +248,7 @@
 	int compressed = BP_GET_PSIZE(bp);
 	int uncompressed = BP_GET_UCSIZE(bp);
 
-	if (BP_IS_HOLE(bp))
+	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
 		return (0);
 
 	ASSERT(dmu_tx_is_syncing(tx));
@@ -237,10 +265,23 @@
 	ASSERT(!ds->ds_is_snapshot);
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
+	/*
+	 * Track block for livelist, but ignore embedded blocks because
+	 * they do not need to be freed.
+	 */
+	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
+	    bp->blk_birth > ds->ds_dir->dd_origin_txg &&
+	    !(BP_IS_EMBEDDED(bp))) {
+		ASSERT(dsl_dir_is_clone(ds->ds_dir));
+		ASSERT(spa_feature_is_enabled(spa,
+		    SPA_FEATURE_LIVELIST));
+		bplist_append(&ds->ds_dir->dd_pending_frees, bp);
+	}
+
 	if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
 		int64_t delta;
 
-		dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
+		dprintf_bp(bp, "freeing ds=%llu", (u_longlong_t)ds->ds_object);
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 
 		mutex_enter(&ds->ds_lock);
@@ -249,9 +290,8 @@
 		delta = parent_delta(ds, -used);
 		dsl_dataset_phys(ds)->ds_unique_bytes -= used;
 		mutex_exit(&ds->ds_lock);
-		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-		    delta, -compressed, -uncompressed, tx);
-		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
+		dsl_dir_diduse_transfer_space(ds->ds_dir,
+		    delta, -compressed, -uncompressed, -used,
 		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
 	} else {
 		dprintf_bp(bp, "putting on dead list: %s", "");
@@ -265,7 +305,7 @@
 			 */
 			bplist_append(&ds->ds_pending_deadlist, bp);
 		} else {
-			dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
+			dsl_deadlist_insert(&ds->ds_deadlist, bp, B_FALSE, tx);
 		}
 		ASSERT3U(ds->ds_prev->ds_object, ==,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj);
@@ -284,6 +324,9 @@
 			    DD_USED_HEAD, DD_USED_SNAP, tx);
 		}
 	}
+
+	dsl_bookmark_block_killed(ds, bp, tx);
+
 	mutex_enter(&ds->ds_lock);
 	ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used);
 	dsl_dataset_phys(ds)->ds_referenced_bytes -= used;
@@ -395,6 +438,8 @@
 		ds->ds_prev = NULL;
 	}
 
+	dsl_bookmark_fini_ds(ds);
+
 	bplist_destroy(&ds->ds_pending_deadlist);
 	if (dsl_deadlist_is_open(&ds->ds_deadlist))
 		dsl_deadlist_close(&ds->ds_deadlist);
@@ -564,8 +609,8 @@
 
 		bplist_create(&ds->ds_pending_deadlist);
 
-		list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
-		    offsetof(dmu_sendarg_t, dsa_link));
+		list_create(&ds->ds_sendstreams, sizeof (dmu_sendstatus_t),
+		    offsetof(dmu_sendstatus_t, dss_link));
 
 		list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t),
 		    offsetof(dsl_prop_cb_record_t, cbr_ds_node));
@@ -588,14 +633,7 @@
 				    dsl_dataset_phys(ds)->ds_prev_snap_obj,
 				    ds, &ds->ds_prev);
 			}
-			if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
-				int zaperr = zap_lookup(mos, ds->ds_object,
-				    DS_FIELD_BOOKMARK_NAMES,
-				    sizeof (ds->ds_bookmarks), 1,
-				    &ds->ds_bookmarks);
-				if (zaperr != ENOENT)
-					VERIFY0(zaperr);
-			}
+			err = dsl_bookmark_init_ds(ds);
 		} else {
 			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 				err = dsl_dataset_get_snapname(ds);
@@ -647,9 +685,15 @@
 			dsl_deadlist_close(&ds->ds_deadlist);
 			if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))
 				dsl_deadlist_close(&ds->ds_remap_deadlist);
+			dsl_bookmark_fini_ds(ds);
 			if (ds->ds_prev)
 				dsl_dataset_rele(ds->ds_prev, ds);
 			dsl_dir_rele(ds->ds_dir, ds);
+			for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+				if (dsl_dataset_feature_is_active(ds, f))
+					unload_zfeature(ds, f);
+			}
+
 			list_destroy(&ds->ds_prop_cbs);
 			list_destroy(&ds->ds_sendstreams);
 			mutex_destroy(&ds->ds_lock);
@@ -675,7 +719,7 @@
 				    dsl_dataset_phys(ds)->ds_fsid_guid,
 				    (long long)ds->ds_fsid_guid,
 				    spa_name(dp->dp_spa),
-				    dsobj);
+				    (u_longlong_t)dsobj);
 			}
 		}
 	}
@@ -784,14 +828,14 @@
 	return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));
 }
 
-int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
-    void *tag, dsl_dataset_t **dsp)
+static int
+dsl_dataset_own_obj_impl(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
+    void *tag, boolean_t override, dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);
 	if (err != 0)
 		return (err);
-	if (!dsl_dataset_tryown(*dsp, tag)) {
+	if (!dsl_dataset_tryown(*dsp, tag, override)) {
 		dsl_dataset_rele_flags(*dsp, flags, tag);
 		*dsp = NULL;
 		return (SET_ERROR(EBUSY));
@@ -799,20 +843,49 @@
 	return (0);
 }
 
+
 int
-dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
     void *tag, dsl_dataset_t **dsp)
 {
+	return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_FALSE, dsp));
+}
+
+int
+dsl_dataset_own_obj_force(dsl_pool_t *dp, uint64_t dsobj,
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
+{
+	return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_TRUE, dsp));
+}
+
+static int
+dsl_dataset_own_impl(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
+    void *tag, boolean_t override, dsl_dataset_t **dsp)
+{
 	int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);
 	if (err != 0)
 		return (err);
-	if (!dsl_dataset_tryown(*dsp, tag)) {
+	if (!dsl_dataset_tryown(*dsp, tag, override)) {
 		dsl_dataset_rele_flags(*dsp, flags, tag);
 		return (SET_ERROR(EBUSY));
 	}
 	return (0);
 }
 
+int
+dsl_dataset_own_force(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
+    void *tag, dsl_dataset_t **dsp)
+{
+	return (dsl_dataset_own_impl(dp, name, flags, tag, B_TRUE, dsp));
+}
+
+int
+dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
+    void *tag, dsl_dataset_t **dsp)
+{
+	return (dsl_dataset_own_impl(dp, name, flags, tag, B_FALSE, dsp));
+}
+
 /*
  * See the comment above dsl_pool_hold() for details.  In summary, a long
  * hold is used to prevent destruction of a dataset while the pool hold
@@ -927,13 +1000,16 @@
 }
 
 boolean_t
-dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
+dsl_dataset_tryown(dsl_dataset_t *ds, void *tag, boolean_t override)
 {
 	boolean_t gotit = FALSE;
 
 	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
 	mutex_enter(&ds->ds_lock);
-	if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
+	if (ds->ds_owner == NULL && (override || !(DS_IS_INCONSISTENT(ds) ||
+	    (dsl_dataset_feature_is_active(ds,
+	    SPA_FEATURE_REDACTED_DATASETS) &&
+	    !zfs_allow_redacted_dataset_mount)))) {
 		ds->ds_owner = tag;
 		dsl_dataset_long_hold(ds, tag);
 		gotit = TRUE;
@@ -957,7 +1033,7 @@
 {
 	switch (spa_feature_table[f].fi_type) {
 	case ZFEATURE_TYPE_BOOLEAN: {
-		boolean_t val = (boolean_t)arg;
+		boolean_t val = (boolean_t)(uintptr_t)arg;
 		ASSERT(val == B_FALSE || val == B_TRUE);
 		return (val);
 	}
@@ -1013,7 +1089,7 @@
 
 	switch (spa_feature_table[f].fi_type) {
 	case ZFEATURE_TYPE_BOOLEAN:
-		ASSERT3S((boolean_t)arg, ==, B_TRUE);
+		ASSERT3S((boolean_t)(uintptr_t)arg, ==, B_TRUE);
 		VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
 		    sizeof (zero), 1, &zero, tx));
 		break;
@@ -1029,7 +1105,7 @@
 	}
 }
 
-void
+static void
 dsl_dataset_deactivate_feature_impl(dsl_dataset_t *ds, spa_feature_t f,
     dmu_tx_t *tx)
 {
@@ -1185,9 +1261,6 @@
 		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 		dsl_dataset_sync(ds, zio, tx);
 		VERIFY0(zio_wait(zio));
-
-		/* dsl_dataset_sync_done will drop this reference. */
-		dmu_buf_add_ref(ds->ds_dbuf, ds);
 		dsl_dataset_sync_done(ds, tx);
 	}
 }
@@ -1203,6 +1276,14 @@
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(lastname[0] != '@');
+	/*
+	 * Filesystems will eventually have their origin set to dp_origin_snap,
+	 * but that's taken care of in dsl_dataset_create_sync_dd. When
+	 * creating a filesystem, this function is called with origin equal to
+	 * NULL.
+	 */
+	if (origin != NULL)
+		ASSERT3P(origin, !=, dp->dp_origin_snap);
 
 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
 	VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
@@ -1213,6 +1294,20 @@
 	dsl_deleg_set_create_perms(dd, tx, cr);
 
 	/*
+	 * If we are creating a clone and the livelist feature is enabled,
+	 * add the entry DD_FIELD_LIVELIST to ZAP.
+	 */
+	if (origin != NULL &&
+	    spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LIVELIST)) {
+		objset_t *mos = dd->dd_pool->dp_meta_objset;
+		dsl_dir_zapify(dd, tx);
+		uint64_t obj = dsl_deadlist_alloc(mos, tx);
+		VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_LIVELIST,
+		    sizeof (uint64_t), 1, &obj, tx));
+		spa_feature_incr(dp->dp_spa, SPA_FEATURE_LIVELIST, tx);
+	}
+
+	/*
 	 * Since we're creating a new node we know it's a leaf, so we can
 	 * initialize the counts if the limit feature is active.
 	 */
@@ -1281,7 +1376,7 @@
     dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	ASSERTV(uint64_t count);
+	uint64_t count __maybe_unused;
 	int err;
 
 	ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2);
@@ -1380,7 +1475,7 @@
 
 int
 dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
-    dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr)
+    dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr, proc_t *proc)
 {
 	int error;
 	uint64_t value;
@@ -1425,7 +1520,7 @@
 	 */
 	if (cnt != 0 && cr != NULL) {
 		error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
-		    ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr);
+		    ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr, proc);
 		if (error != 0)
 			return (error);
 	}
@@ -1526,7 +1621,7 @@
 			if (error == 0) {
 				error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
 				    ZFS_PROP_SNAPSHOT_LIMIT, NULL,
-				    ddsa->ddsa_cr);
+				    ddsa->ddsa_cr, ddsa->ddsa_proc);
 				dsl_dataset_rele(ds, FTAG);
 			}
 
@@ -1564,7 +1659,7 @@
 		if (error == 0) {
 			/* passing 0/NULL skips dsl_fs_ss_limit_check */
 			error = dsl_dataset_snapshot_check_impl(ds,
-			    atp + 1, tx, B_FALSE, 0, NULL);
+			    atp + 1, tx, B_FALSE, 0, NULL, NULL);
 			dsl_dataset_rele(ds, FTAG);
 		}
 
@@ -1589,8 +1684,8 @@
 	dsl_dataset_phys_t *dsphys;
 	uint64_t dsobj, crtxg;
 	objset_t *mos = dp->dp_meta_objset;
-	ASSERTV(static zil_header_t zero_zil);
-	ASSERTV(objset_t *os);
+	static zil_header_t zero_zil __maybe_unused;
+	objset_t *os __maybe_unused;
 
 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 
@@ -1696,6 +1791,7 @@
 	    dsl_dataset_phys(ds)->ds_deadlist_obj);
 	dsl_deadlist_add_key(&ds->ds_deadlist,
 	    dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
+	dsl_bookmark_snapshotted(ds, tx);
 
 	if (dsl_dataset_remap_deadlist_exists(ds)) {
 		uint64_t remap_deadlist_obj =
@@ -1757,7 +1853,7 @@
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
-	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
+	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");
 }
 
 void
@@ -1783,7 +1879,6 @@
 			dsl_props_set_sync_impl(ds->ds_prev,
 			    ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
 		}
-		zvol_create_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
 		dsl_dataset_rele(ds, FTAG);
 	}
 }
@@ -1842,6 +1937,7 @@
 	ddsa.ddsa_props = props;
 	ddsa.ddsa_errors = errors;
 	ddsa.ddsa_cr = CRED();
+	ddsa.ddsa_proc = curproc;
 
 	if (error == 0) {
 		error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
@@ -1858,6 +1954,13 @@
 		fnvlist_free(suspended);
 	}
 
+	if (error == 0) {
+		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+		    pair = nvlist_next_nvpair(snaps, pair)) {
+			zvol_create_minor(nvpair_name(pair));
+		}
+	}
+
 	return (error);
 }
 
@@ -1882,7 +1985,7 @@
 
 	/* NULL cred means no limit check for tmp snapshot */
 	error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname,
-	    tx, B_FALSE, 0, NULL);
+	    tx, B_FALSE, 0, NULL, NULL);
 	if (error != 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (error);
@@ -1985,24 +2088,151 @@
 	}
 
 	dmu_objset_sync(ds->ds_objset, zio, tx);
-
-	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
-		if (zfeature_active(f, ds->ds_feature_activation[f])) {
-			if (zfeature_active(f, ds->ds_feature[f]))
-				continue;
-			dsl_dataset_activate_feature(ds->ds_object, f,
-			    ds->ds_feature_activation[f], tx);
-			ds->ds_feature[f] = ds->ds_feature_activation[f];
-		}
-	}
 }
 
-static int
-deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+/*
+ * Check if the percentage of blocks shared between the clone and the
+ * snapshot (as opposed to those that are clone only) is below a certain
+ * threshold
+ */
+static boolean_t
+dsl_livelist_should_disable(dsl_dataset_t *ds)
 {
-	dsl_deadlist_t *dl = arg;
-	dsl_deadlist_insert(dl, bp, tx);
-	return (0);
+	uint64_t used, referenced;
+	int percent_shared;
+
+	used = dsl_dir_get_usedds(ds->ds_dir);
+	referenced = dsl_get_referenced(ds);
+	ASSERT3U(referenced, >=, 0);
+	ASSERT3U(used, >=, 0);
+	if (referenced == 0)
+		return (B_FALSE);
+	percent_shared = (100 * (referenced - used)) / referenced;
+	if (percent_shared <= zfs_livelist_min_percent_shared)
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
+/*
+ *  Check if it is possible to combine two livelist entries into one.
+ *  This is the case if the combined number of 'live' blkptrs (ALLOCs that
+ *  don't have a matching FREE) is under the maximum sublist size.
+ *  We check this by subtracting twice the total number of frees from the total
+ *  number of blkptrs. FREEs are counted twice because each FREE blkptr
+ *  will cancel out an ALLOC blkptr when the livelist is processed.
+ */
+static boolean_t
+dsl_livelist_should_condense(dsl_deadlist_entry_t *first,
+    dsl_deadlist_entry_t *next)
+{
+	uint64_t total_free = first->dle_bpobj.bpo_phys->bpo_num_freed +
+	    next->dle_bpobj.bpo_phys->bpo_num_freed;
+	uint64_t total_entries = first->dle_bpobj.bpo_phys->bpo_num_blkptrs +
+	    next->dle_bpobj.bpo_phys->bpo_num_blkptrs;
+	if ((total_entries - (2 * total_free)) < zfs_livelist_max_entries)
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
+typedef struct try_condense_arg {
+	spa_t *spa;
+	dsl_dataset_t *ds;
+} try_condense_arg_t;
+
+/*
+ * Iterate over the livelist entries, searching for a pair to condense.
+ * A nonzero return value means stop, 0 means keep looking.
+ */
+static int
+dsl_livelist_try_condense(void *arg, dsl_deadlist_entry_t *first)
+{
+	try_condense_arg_t *tca = arg;
+	spa_t *spa = tca->spa;
+	dsl_dataset_t *ds = tca->ds;
+	dsl_deadlist_t *ll = &ds->ds_dir->dd_livelist;
+	dsl_deadlist_entry_t *next;
+
+	/* The condense thread has not yet been created at import */
+	if (spa->spa_livelist_condense_zthr == NULL)
+		return (1);
+
+	/* A condense is already in progress */
+	if (spa->spa_to_condense.ds != NULL)
+		return (1);
+
+	next = AVL_NEXT(&ll->dl_tree, &first->dle_node);
+	/* The livelist has only one entry - don't condense it */
+	if (next == NULL)
+		return (1);
+
+	/* Next is the newest entry - don't condense it */
+	if (AVL_NEXT(&ll->dl_tree, &next->dle_node) == NULL)
+		return (1);
+
+	/* This pair is not ready to condense but keep looking */
+	if (!dsl_livelist_should_condense(first, next))
+		return (0);
+
+	/*
+	 * Add a ref to prevent the dataset from being evicted while
+	 * the condense zthr or synctask are running. Ref will be
+	 * released at the end of the condense synctask
+	 */
+	dmu_buf_add_ref(ds->ds_dbuf, spa);
+
+	spa->spa_to_condense.ds = ds;
+	spa->spa_to_condense.first = first;
+	spa->spa_to_condense.next = next;
+	spa->spa_to_condense.syncing = B_FALSE;
+	spa->spa_to_condense.cancelled = B_FALSE;
+
+	zthr_wakeup(spa->spa_livelist_condense_zthr);
+	return (1);
+}
+
+static void
+dsl_flush_pending_livelist(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	dsl_dir_t *dd = ds->ds_dir;
+	spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
+	dsl_deadlist_entry_t *last = dsl_deadlist_last(&dd->dd_livelist);
+
+	/* Check if we need to add a new sub-livelist */
+	if (last == NULL) {
+		/* The livelist is empty */
+		dsl_deadlist_add_key(&dd->dd_livelist,
+		    tx->tx_txg - 1, tx);
+	} else if (spa_sync_pass(spa) == 1) {
+		/*
+		 * Check if the newest entry is full. If it is, make a new one.
+		 * We only do this once per sync because we could overfill a
+		 * sublist in one sync pass and don't want to add another entry
+		 * for a txg that is already represented. This ensures that
+		 * blkptrs born in the same txg are stored in the same sublist.
+		 */
+		bpobj_t bpobj = last->dle_bpobj;
+		uint64_t all = bpobj.bpo_phys->bpo_num_blkptrs;
+		uint64_t free = bpobj.bpo_phys->bpo_num_freed;
+		uint64_t alloc = all - free;
+		if (alloc > zfs_livelist_max_entries) {
+			dsl_deadlist_add_key(&dd->dd_livelist,
+			    tx->tx_txg - 1, tx);
+		}
+	}
+
+	/* Insert each entry into the on-disk livelist */
+	bplist_iterate(&dd->dd_pending_allocs,
+	    dsl_deadlist_insert_alloc_cb, &dd->dd_livelist, tx);
+	bplist_iterate(&dd->dd_pending_frees,
+	    dsl_deadlist_insert_free_cb, &dd->dd_livelist, tx);
+
+	/* Attempt to condense every pair of adjacent entries */
+	try_condense_arg_t arg = {
+	    .spa = spa,
+	    .ds = ds
+	};
+	dsl_deadlist_iterate(&dd->dd_livelist, dsl_livelist_try_condense,
+	    &arg);
 }
 
 void
@@ -2011,21 +2241,36 @@
 	objset_t *os = ds->ds_objset;
 
 	bplist_iterate(&ds->ds_pending_deadlist,
-	    deadlist_enqueue_cb, &ds->ds_deadlist, tx);
+	    dsl_deadlist_insert_alloc_cb, &ds->ds_deadlist, tx);
 
-	if (os->os_synced_dnodes != NULL) {
-		multilist_destroy(os->os_synced_dnodes);
-		os->os_synced_dnodes = NULL;
+	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) {
+		dsl_flush_pending_livelist(ds, tx);
+		if (dsl_livelist_should_disable(ds)) {
+			dsl_dir_remove_livelist(ds->ds_dir, tx, B_TRUE);
+		}
 	}
 
+	dsl_bookmark_sync_done(ds, tx);
+
+	multilist_destroy(&os->os_synced_dnodes);
+
 	if (os->os_encrypted)
 		os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
 	else
 		ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);
 
-	ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
+	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+		if (zfeature_active(f,
+		    ds->ds_feature_activation[f])) {
+			if (zfeature_active(f, ds->ds_feature[f]))
+				continue;
+			dsl_dataset_activate_feature(ds->ds_object, f,
+			    ds->ds_feature_activation[f], tx);
+			ds->ds_feature[f] = ds->ds_feature_activation[f];
+		}
+	}
 
-	dmu_buf_rele(ds->ds_dbuf, ds);
+	ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
 }
 
 int
@@ -2048,7 +2293,7 @@
 		    &count));
 	}
 	if (count != dsl_dataset_phys(ds)->ds_num_children - 1) {
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 	for (zap_cursor_init(&zc, mos,
 	    dsl_dataset_phys(ds)->ds_next_clones_obj);
@@ -2070,18 +2315,7 @@
 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
 {
 	nvlist_t *propval = fnvlist_alloc();
-	nvlist_t *val;
-
-	/*
-	 * We use nvlist_alloc() instead of fnvlist_alloc() because the
-	 * latter would allocate the list with NV_UNIQUE_NAME flag.
-	 * As a result, every time a clone name is appended to the list
-	 * it would be (linearly) searched for a duplicate name.
-	 * We already know that all clone names must be unique and we
-	 * want avoid the quadratic complexity of double-checking that
-	 * because we can have a large number of clones.
-	 */
-	VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP));
+	nvlist_t *val = fnvlist_alloc();
 
 	if (get_clones_stat_impl(ds, val) == 0) {
 		fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
@@ -2102,6 +2336,8 @@
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
+	KTRACE();
+
 	if (dsl_dataset_has_resume_receive_state(ds)) {
 		char *str;
 		void *packed;
@@ -2151,6 +2387,34 @@
 		    DS_FIELD_RESUME_RAWOK) == 0) {
 			fnvlist_add_boolean(token_nv, "rawok");
 		}
+		if (dsl_dataset_feature_is_active(ds,
+		    SPA_FEATURE_REDACTED_DATASETS)) {
+			uint64_t num_redact_snaps;
+			uint64_t *redact_snaps;
+			VERIFY(dsl_dataset_get_uint64_array_feature(ds,
+			    SPA_FEATURE_REDACTED_DATASETS, &num_redact_snaps,
+			    &redact_snaps));
+			fnvlist_add_uint64_array(token_nv, "redact_snaps",
+			    redact_snaps, num_redact_snaps);
+		}
+		if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS) == 0) {
+			uint64_t num_redact_snaps, int_size;
+			uint64_t *redact_snaps;
+			VERIFY0(zap_length(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, &int_size,
+			    &num_redact_snaps));
+			ASSERT3U(int_size, ==, sizeof (uint64_t));
+
+			redact_snaps = kmem_alloc(int_size * num_redact_snaps,
+			    KM_SLEEP);
+			VERIFY0(zap_lookup(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, int_size,
+			    num_redact_snaps, redact_snaps));
+			fnvlist_add_uint64_array(token_nv, "book_redact_snaps",
+			    redact_snaps, num_redact_snaps);
+			kmem_free(redact_snaps, int_size * num_redact_snaps);
+		}
 		packed = fnvlist_pack(token_nv, &packed_size);
 		fnvlist_free(token_nv);
 		compressed = kmem_alloc(packed_size, KM_SLEEP);
@@ -2178,7 +2442,7 @@
 		kmem_free(compressed, packed_size);
 		return (propval);
 	}
-	return (strdup(""));
+	return (kmem_strdup(""));
 }
 
 /*
@@ -2201,7 +2465,7 @@
 		dsl_dataset_rele(recv_ds, FTAG);
 		return (propval);
 	}
-	return (strdup(""));
+	return (kmem_strdup(""));
 }
 
 static void
@@ -2217,9 +2481,9 @@
 			dsl_prop_nvlist_add_string(nv,
 			    ZFS_PROP_RECEIVE_RESUME_TOKEN, childval);
 		}
-		strfree(childval);
+		kmem_strfree(childval);
 	}
-	strfree(propval);
+	kmem_strfree(propval);
 }
 
 uint64_t
@@ -2340,6 +2604,13 @@
 }
 
 uint64_t
+dsl_get_redacted(dsl_dataset_t *ds)
+{
+	return (dsl_dataset_feature_is_active(ds,
+	    SPA_FEATURE_REDACTED_DATASETS));
+}
+
+uint64_t
 dsl_get_available(dsl_dataset_t *ds)
 {
 	uint64_t refdbytes = dsl_get_referenced(ds);
@@ -2390,7 +2661,19 @@
 		dsl_dataset_name(ds->ds_prev, snap);
 		return (0);
 	} else {
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
+	}
+}
+
+void
+dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval)
+{
+	uint64_t nsnaps;
+	uint64_t *snaps;
+	if (dsl_dataset_get_uint64_array_feature(ds,
+	    SPA_FEATURE_REDACTED_DATASETS, &nsnaps, &snaps)) {
+		fnvlist_add_uint64_array(propval, ZPROP_VALUE, snaps,
+		    nsnaps);
 	}
 }
 
@@ -2499,6 +2782,12 @@
 		dsl_dir_stats(ds->ds_dir, nv);
 	}
 
+	nvlist_t *propval = fnvlist_alloc();
+	dsl_get_redact_snaps(ds, propval);
+	fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
+	    propval);
+	nvlist_free(propval);
+
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
 	    dsl_get_available(ds));
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
@@ -2561,12 +2850,13 @@
 void
 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
 {
-	ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
+	dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;
 	ASSERT(dsl_pool_config_held(dp));
 
 	stat->dds_creation_txg = dsl_get_creationtxg(ds);
 	stat->dds_inconsistent = dsl_get_inconsistent(ds);
 	stat->dds_guid = dsl_get_guid(ds);
+	stat->dds_redacted = dsl_get_redacted(ds);
 	stat->dds_origin[0] = '\0';
 	if (ds->ds_is_snapshot) {
 		stat->dds_is_snapshot = B_TRUE;
@@ -2616,7 +2906,7 @@
 boolean_t
 dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
 {
-	ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
+	dsl_pool_t *dp __maybe_unused = ds->ds_dir->dd_pool;
 	uint64_t birth;
 
 	ASSERT(dsl_pool_config_held(dp));
@@ -2651,11 +2941,11 @@
 	dmu_tx_t *ddrsa_tx;
 } dsl_dataset_rename_snapshot_arg_t;
 
-/* ARGSUSED */
 static int
 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
     dsl_dataset_t *hds, void *arg)
 {
+	(void) dp;
 	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
 	int error;
 	uint64_t val;
@@ -2788,20 +3078,26 @@
 static int
 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
 {
-	boolean_t held;
+	boolean_t held = B_FALSE;
 
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
-	if (owner != NULL) {
-		VERIFY3P(ds->ds_owner, ==, owner);
-		dsl_dataset_long_rele(ds, owner);
-	}
-
-	held = dsl_dataset_long_held(ds);
-
-	if (owner != NULL)
-		dsl_dataset_long_hold(ds, owner);
+	dsl_dir_t *dd = ds->ds_dir;
+	mutex_enter(&dd->dd_activity_lock);
+	uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
+	    (owner != NULL ? 1 : 0);
+	/*
+	 * The value of dd_activity_waiters can chance as soon as we drop the
+	 * lock, but we're fine with that; new waiters coming in or old
+	 * waiters leaving doesn't cause problems, since we're going to cancel
+	 * waiters later anyway. The goal of this check is to verify that no
+	 * non-waiters have long-holds, and all new long-holds will be
+	 * prevented because we're holding the pool config as writer.
+	 */
+	if (holds != dd->dd_activity_waiters)
+		held = B_TRUE;
+	mutex_exit(&dd->dd_activity_lock);
 
 	if (held)
 		return (SET_ERROR(EBUSY));
@@ -2894,28 +3190,11 @@
 	}
 
 	/* must not have any bookmarks after the most recent snapshot */
-	nvlist_t *proprequest = fnvlist_alloc();
-	fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
-	nvlist_t *bookmarks = fnvlist_alloc();
-	error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
-	fnvlist_free(proprequest);
-	if (error != 0) {
+	if (dsl_bookmark_latest_txg(ds) >
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg) {
 		dsl_dataset_rele(ds, FTAG);
-		return (error);
+		return (SET_ERROR(EEXIST));
 	}
-	for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
-	    pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
-		nvlist_t *valuenv =
-		    fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair),
-		    zfs_prop_to_name(ZFS_PROP_CREATETXG));
-		uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value");
-		if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
-			fnvlist_free(bookmarks);
-			dsl_dataset_rele(ds, FTAG);
-			return (SET_ERROR(EEXIST));
-		}
-	}
-	fnvlist_free(bookmarks);
 
 	error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
 	if (error != 0) {
@@ -3028,7 +3307,6 @@
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *hds;
 	struct promotenode *snap;
-	dsl_dataset_t *origin_ds;
 	int err;
 	uint64_t unused;
 	uint64_t ss_mv_cnt;
@@ -3052,7 +3330,7 @@
 		err = SET_ERROR(ENOENT);
 		goto out;
 	}
-	origin_ds = snap->ds;
+	dsl_dataset_t *const origin_ds = snap->ds;
 
 	/*
 	 * Encrypted clones share a DSL Crypto Key with their origin's dsl dir.
@@ -3145,6 +3423,32 @@
 	}
 
 	/*
+	 * Check that bookmarks that are being transferred don't have
+	 * name conflicts.
+	 */
+	for (dsl_bookmark_node_t *dbn = avl_first(&origin_ds->ds_bookmarks);
+	    dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=
+	    dsl_dataset_phys(origin_ds)->ds_creation_txg;
+	    dbn = AVL_NEXT(&origin_ds->ds_bookmarks, dbn)) {
+		if (strlen(dbn->dbn_name) >= max_snap_len) {
+			err = SET_ERROR(ENAMETOOLONG);
+			goto out;
+		}
+		zfs_bookmark_phys_t bm;
+		err = dsl_bookmark_lookup_impl(ddpa->ddpa_clone,
+		    dbn->dbn_name, &bm);
+
+		if (err == 0) {
+			fnvlist_add_boolean(ddpa->err_ds, dbn->dbn_name);
+			conflicting_snaps = B_TRUE;
+		} else if (err == ESRCH) {
+			err = 0;
+		} else if (err != 0) {
+			goto out;
+		}
+	}
+
+	/*
 	 * In order to return the full list of conflicting snapshots, we check
 	 * whether there was a conflict after traversing all of them.
 	 */
@@ -3169,7 +3473,7 @@
 
 	/* Check that there is enough space and limit headroom here */
 	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
-	    0, ss_mv_cnt, ddpa->used, ddpa->cr);
+	    0, ss_mv_cnt, ddpa->used, ddpa->cr, ddpa->proc);
 	if (err != 0)
 		goto out;
 
@@ -3233,6 +3537,8 @@
 	uint64_t oldnext_obj;
 	int64_t delta;
 
+	ASSERT(nvlist_empty(ddpa->err_ds));
+
 	VERIFY0(promote_hold(ddpa, dp, FTAG));
 	hds = ddpa->ddpa_clone;
 
@@ -3301,6 +3607,25 @@
 		    dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx));
 	}
 
+	/*
+	 * Move bookmarks to this dir.
+	 */
+	dsl_bookmark_node_t *dbn_next;
+	for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks);
+	    dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=
+	    dsl_dataset_phys(origin_ds)->ds_creation_txg;
+	    dbn = dbn_next) {
+		dbn_next = AVL_NEXT(&origin_head->ds_bookmarks, dbn);
+
+		avl_remove(&origin_head->ds_bookmarks, dbn);
+		VERIFY0(zap_remove(dp->dp_meta_objset,
+		    origin_head->ds_bookmarks_obj, dbn->dbn_name, tx));
+
+		dsl_bookmark_node_add(hds, dbn, tx);
+	}
+
+	dsl_bookmark_next_changed(hds, origin_ds, tx);
+
 	/* move snapshots to this dir */
 	for (snap = list_head(&ddpa->shared_snaps); snap;
 	    snap = list_next(&ddpa->shared_snaps, snap)) {
@@ -3398,8 +3723,17 @@
 
 	dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique;
 
+	/*
+	 * Since livelists are specific to a clone's origin txg, they
+	 * are no longer accurate. Destroy the livelist from the clone being
+	 * promoted. If the origin dataset is a clone, destroy its livelist
+	 * as well.
+	 */
+	dsl_dir_remove_livelist(dd, tx, B_TRUE);
+	dsl_dir_remove_livelist(odd, tx, B_TRUE);
+
 	/* log history record */
-	spa_history_log_internal_ds(hds, "promote", tx, "");
+	spa_history_log_internal_ds(hds, "promote", tx, " ");
 
 	dsl_dir_rele(odd, FTAG);
 	promote_rele(ddpa, FTAG);
@@ -3566,6 +3900,7 @@
 	ddpa.ddpa_clonename = name;
 	ddpa.err_ds = fnvlist_alloc();
 	ddpa.cr = CRED();
+	ddpa.proc = curproc;
 
 	error = dsl_sync_task(name, dsl_dataset_promote_check,
 	    dsl_dataset_promote_sync, &ddpa,
@@ -3708,6 +4043,8 @@
 	    DMU_MAX_ACCESS * spa_asize_inflation);
 	ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
 
+	dsl_dir_cancel_waiters(origin_head->ds_dir);
+
 	/*
 	 * Swap per-dataset feature flags.
 	 */
@@ -3762,9 +4099,9 @@
 	    dsl_dataset_phys(clone)->ds_unique_bytes);
 
 	/*
-	 * Reset origin's unique bytes, if it exists.
+	 * Reset origin's unique bytes.
 	 */
-	if (clone->ds_prev) {
+	{
 		dsl_dataset_t *origin = clone->ds_prev;
 		uint64_t comp, uncomp;
 
@@ -3862,8 +4199,22 @@
 	    dsl_dataset_phys(origin_head)->ds_deadlist_obj);
 	dsl_dataset_swap_remap_deadlists(clone, origin_head, tx);
 
+	/*
+	 * If there is a bookmark at the origin, its "next dataset" is
+	 * changing, so we need to reset its FBN.
+	 */
+	dsl_bookmark_next_changed(origin_head, origin_head->ds_prev, tx);
+
 	dsl_scan_ds_clone_swapped(origin_head, clone, tx);
 
+	/*
+	 * Destroy any livelists associated with the clone or the origin,
+	 * since after the swap the corresponding livelists are no longer
+	 * valid.
+	 */
+	dsl_dir_remove_livelist(clone->ds_dir, tx, B_TRUE);
+	dsl_dir_remove_livelist(origin_head->ds_dir, tx, B_TRUE);
+
 	spa_history_log_internal_ds(clone, "clone swap", tx,
 	    "parent=%s", origin_head->ds_dir->dd_myname);
 }
@@ -3950,7 +4301,6 @@
 } dsl_dataset_set_qr_arg_t;
 
 
-/* ARGSUSED */
 static int
 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
 {
@@ -4151,94 +4501,211 @@
 	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
+typedef struct dsl_dataset_set_compression_arg {
+	const char *ddsca_name;
+	zprop_source_t ddsca_source;
+	uint64_t ddsca_value;
+} dsl_dataset_set_compression_arg_t;
+
+static int
+dsl_dataset_set_compression_check(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_set_compression_arg_t *ddsca = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+
+	uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
+	spa_feature_t f = zio_compress_to_feature(compval);
+
+	if (f == SPA_FEATURE_NONE)
+		return (SET_ERROR(EINVAL));
+
+	if (!spa_feature_is_enabled(dp->dp_spa, f))
+		return (SET_ERROR(ENOTSUP));
+
+	return (0);
+}
+
+static void
+dsl_dataset_set_compression_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_set_compression_arg_t *ddsca = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds = NULL;
+
+	uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
+	spa_feature_t f = zio_compress_to_feature(compval);
+	ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN);
+
+	VERIFY0(dsl_dataset_hold(dp, ddsca->ddsca_name, FTAG, &ds));
+	if (zfeature_active(f, ds->ds_feature[f]) != B_TRUE) {
+		ds->ds_feature_activation[f] = (void *)B_TRUE;
+		dsl_dataset_activate_feature(ds->ds_object, f,
+		    ds->ds_feature_activation[f], tx);
+		ds->ds_feature[f] = ds->ds_feature_activation[f];
+	}
+	dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
+    uint64_t compression)
+{
+	dsl_dataset_set_compression_arg_t ddsca;
+
+	/*
+	 * The sync task is only required for zstd in order to activate
+	 * the feature flag when the property is first set.
+	 */
+	if (ZIO_COMPRESS_ALGO(compression) != ZIO_COMPRESS_ZSTD)
+		return (0);
+
+	ddsca.ddsca_name = dsname;
+	ddsca.ddsca_source = source;
+	ddsca.ddsca_value = compression;
+
+	return (dsl_sync_task(dsname, dsl_dataset_set_compression_check,
+	    dsl_dataset_set_compression_sync, &ddsca, 0,
+	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
+}
+
+/*
+ * Return (in *usedp) the amount of space referenced by "new" that was not
+ * referenced at the time the bookmark corresponds to.  "New" may be a
+ * snapshot or a head.  The bookmark must be before new, in
+ * new's filesystem (or its origin) -- caller verifies this.
+ *
+ * The written space is calculated by considering two components:  First, we
+ * ignore any freed space, and calculate the written as new's used space
+ * minus old's used space.  Next, we add in the amount of space that was freed
+ * between the two time points, thus reducing new's used space relative to
+ * old's. Specifically, this is the space that was born before
+ * zbm_creation_txg, and freed before new (ie. on new's deadlist or a
+ * previous deadlist).
+ *
+ * space freed                         [---------------------]
+ * snapshots                       ---O-------O--------O-------O------
+ *                                         bookmark           new
+ *
+ * Note, the bookmark's zbm_*_bytes_refd must be valid, but if the HAS_FBN
+ * flag is not set, we will calculate the freed_before_next based on the
+ * next snapshot's deadlist, rather than using zbm_*_freed_before_next_snap.
+ */
+static int
+dsl_dataset_space_written_impl(zfs_bookmark_phys_t *bmp,
+    dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
+{
+	int err = 0;
+	dsl_pool_t *dp = new->ds_dir->dd_pool;
+
+	ASSERT(dsl_pool_config_held(dp));
+	if (dsl_dataset_is_snapshot(new)) {
+		ASSERT3U(bmp->zbm_creation_txg, <,
+		    dsl_dataset_phys(new)->ds_creation_txg);
+	}
+
+	*usedp = 0;
+	*usedp += dsl_dataset_phys(new)->ds_referenced_bytes;
+	*usedp -= bmp->zbm_referenced_bytes_refd;
+
+	*compp = 0;
+	*compp += dsl_dataset_phys(new)->ds_compressed_bytes;
+	*compp -= bmp->zbm_compressed_bytes_refd;
+
+	*uncompp = 0;
+	*uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;
+	*uncompp -= bmp->zbm_uncompressed_bytes_refd;
+
+	dsl_dataset_t *snap = new;
+
+	while (dsl_dataset_phys(snap)->ds_prev_snap_txg >
+	    bmp->zbm_creation_txg) {
+		uint64_t used, comp, uncomp;
+
+		dsl_deadlist_space_range(&snap->ds_deadlist,
+		    0, bmp->zbm_creation_txg,
+		    &used, &comp, &uncomp);
+		*usedp += used;
+		*compp += comp;
+		*uncompp += uncomp;
+
+		uint64_t snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
+		if (snap != new)
+			dsl_dataset_rele(snap, FTAG);
+		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
+		if (err != 0)
+			break;
+	}
+
+	/*
+	 * We might not have the FBN if we are calculating written from
+	 * a snapshot (because we didn't know the correct "next" snapshot
+	 * until now).
+	 */
+	if (bmp->zbm_flags & ZBM_FLAG_HAS_FBN) {
+		*usedp += bmp->zbm_referenced_freed_before_next_snap;
+		*compp += bmp->zbm_compressed_freed_before_next_snap;
+		*uncompp += bmp->zbm_uncompressed_freed_before_next_snap;
+	} else {
+		ASSERT3U(dsl_dataset_phys(snap)->ds_prev_snap_txg, ==,
+		    bmp->zbm_creation_txg);
+		uint64_t used, comp, uncomp;
+		dsl_deadlist_space(&snap->ds_deadlist, &used, &comp, &uncomp);
+		*usedp += used;
+		*compp += comp;
+		*uncompp += uncomp;
+	}
+	if (snap != new)
+		dsl_dataset_rele(snap, FTAG);
+	return (err);
+}
+
+/*
+ * Return (in *usedp) the amount of space written in new that was not
+ * present at the time the bookmark corresponds to.  New may be a
+ * snapshot or the head.  Old must be a bookmark before new, in
+ * new's filesystem (or its origin) -- caller verifies this.
+ */
+int
+dsl_dataset_space_written_bookmark(zfs_bookmark_phys_t *bmp,
+    dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
+{
+	if (!(bmp->zbm_flags & ZBM_FLAG_HAS_FBN))
+		return (SET_ERROR(ENOTSUP));
+	return (dsl_dataset_space_written_impl(bmp, new,
+	    usedp, compp, uncompp));
+}
+
 /*
  * Return (in *usedp) the amount of space written in new that is not
  * present in oldsnap.  New may be a snapshot or the head.  Old must be
  * a snapshot before new, in new's filesystem (or its origin).  If not then
  * fail and return EINVAL.
- *
- * The written space is calculated by considering two components:  First, we
- * ignore any freed space, and calculate the written as new's used space
- * minus old's used space.  Next, we add in the amount of space that was freed
- * between the two snapshots, thus reducing new's used space relative to old's.
- * Specifically, this is the space that was born before old->ds_creation_txg,
- * and freed before new (ie. on new's deadlist or a previous deadlist).
- *
- * space freed                         [---------------------]
- * snapshots                       ---O-------O--------O-------O------
- *                                         oldsnap            new
  */
 int
 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
-	int err = 0;
-	uint64_t snapobj;
-	dsl_pool_t *dp = new->ds_dir->dd_pool;
+	if (!dsl_dataset_is_before(new, oldsnap, 0))
+		return (SET_ERROR(EINVAL));
 
-	ASSERT(dsl_pool_config_held(dp));
+	zfs_bookmark_phys_t zbm = { 0 };
+	dsl_dataset_phys_t *dsp = dsl_dataset_phys(oldsnap);
+	zbm.zbm_guid = dsp->ds_guid;
+	zbm.zbm_creation_txg = dsp->ds_creation_txg;
+	zbm.zbm_creation_time = dsp->ds_creation_time;
+	zbm.zbm_referenced_bytes_refd = dsp->ds_referenced_bytes;
+	zbm.zbm_compressed_bytes_refd = dsp->ds_compressed_bytes;
+	zbm.zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes;
 
-	*usedp = 0;
-	*usedp += dsl_dataset_phys(new)->ds_referenced_bytes;
-	*usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes;
-
-	*compp = 0;
-	*compp += dsl_dataset_phys(new)->ds_compressed_bytes;
-	*compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes;
-
-	*uncompp = 0;
-	*uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;
-	*uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes;
-
-	snapobj = new->ds_object;
-	while (snapobj != oldsnap->ds_object) {
-		dsl_dataset_t *snap;
-		uint64_t used, comp, uncomp;
-
-		if (snapobj == new->ds_object) {
-			snap = new;
-		} else {
-			err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
-			if (err != 0)
-				break;
-		}
-
-		if (dsl_dataset_phys(snap)->ds_prev_snap_txg ==
-		    dsl_dataset_phys(oldsnap)->ds_creation_txg) {
-			/*
-			 * The blocks in the deadlist can not be born after
-			 * ds_prev_snap_txg, so get the whole deadlist space,
-			 * which is more efficient (especially for old-format
-			 * deadlists).  Unfortunately the deadlist code
-			 * doesn't have enough information to make this
-			 * optimization itself.
-			 */
-			dsl_deadlist_space(&snap->ds_deadlist,
-			    &used, &comp, &uncomp);
-		} else {
-			dsl_deadlist_space_range(&snap->ds_deadlist,
-			    0, dsl_dataset_phys(oldsnap)->ds_creation_txg,
-			    &used, &comp, &uncomp);
-		}
-		*usedp += used;
-		*compp += comp;
-		*uncompp += uncomp;
-
-		/*
-		 * If we get to the beginning of the chain of snapshots
-		 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
-		 * was not a snapshot of/before new.
-		 */
-		snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
-		if (snap != new)
-			dsl_dataset_rele(snap, FTAG);
-		if (snapobj == 0) {
-			err = SET_ERROR(EINVAL);
-			break;
-		}
-
-	}
-	return (err);
+	/*
+	 * If oldsnap is the origin (or origin's origin, ...) of new,
+	 * we can't easily calculate the effective FBN.  Therefore,
+	 * we do not set ZBM_FLAG_HAS_FBN, so that the _impl will calculate
+	 * it relative to the correct "next": the next snapshot towards "new",
+	 * rather than the next snapshot in oldsnap's dsl_dir.
+	 */
+	return (dsl_dataset_space_written_impl(&zbm, new,
+	    usedp, compp, uncompp));
 }
 
 /*
@@ -4331,16 +4798,26 @@
 
 	if (later->ds_dir == earlier->ds_dir)
 		return (B_TRUE);
-	if (!dsl_dir_is_clone(later->ds_dir))
+
+	/*
+	 * We check dd_origin_obj explicitly here rather than using
+	 * dsl_dir_is_clone() so that we will return TRUE if "earlier"
+	 * is $ORIGIN@$ORIGIN.  dsl_dataset_space_written() depends on
+	 * this behavior.
+	 */
+	if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == 0)
 		return (B_FALSE);
 
-	if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
-		return (B_TRUE);
 	dsl_dataset_t *origin;
 	error = dsl_dataset_hold_obj(dp,
 	    dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
 	if (error != 0)
 		return (B_FALSE);
+	if (dsl_dataset_phys(origin)->ds_creation_txg == earlier_txg &&
+	    origin->ds_dir == earlier->ds_dir) {
+		dsl_dataset_rele(origin, FTAG);
+		return (B_TRUE);
+	}
 	ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
 	dsl_dataset_rele(origin, FTAG);
 	return (ret);
@@ -4457,15 +4934,38 @@
 	spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
 }
 
-#if defined(_KERNEL)
+void
+dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
+    uint64_t num_redact_snaps, dmu_tx_t *tx)
+{
+	uint64_t dsobj = ds->ds_object;
+	struct feature_type_uint64_array_arg *ftuaa =
+	    kmem_zalloc(sizeof (*ftuaa), KM_SLEEP);
+	ftuaa->length = (int64_t)num_redact_snaps;
+	if (num_redact_snaps > 0) {
+		ftuaa->array = kmem_alloc(num_redact_snaps * sizeof (uint64_t),
+		    KM_SLEEP);
+		bcopy(redact_snaps, ftuaa->array, num_redact_snaps *
+		    sizeof (uint64_t));
+	}
+	dsl_dataset_activate_feature(dsobj, SPA_FEATURE_REDACTED_DATASETS,
+	    ftuaa, tx);
+	ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa;
+}
+
+/* BEGIN CSTYLED */
 #if defined(_LP64)
-module_param(zfs_max_recordsize, int, 0644);
-MODULE_PARM_DESC(zfs_max_recordsize, "Max allowed record size");
+#define	RECORDSIZE_PERM ZMOD_RW
 #else
 /* Limited to 1M on 32-bit platforms due to lack of virtual address space */
-module_param(zfs_max_recordsize, int, 0444);
-MODULE_PARM_DESC(zfs_max_recordsize, "Max allowed record size");
+#define	RECORDSIZE_PERM ZMOD_RD
 #endif
+ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, RECORDSIZE_PERM,
+	"Max allowed record size");
+
+ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
+	"Allow mounting of redacted datasets");
+/* END CSTYLED */
 
 EXPORT_SYMBOL(dsl_dataset_hold);
 EXPORT_SYMBOL(dsl_dataset_hold_flags);
@@ -4503,4 +5003,3 @@
 EXPORT_SYMBOL(dsl_dataset_check_quota);
 EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl);
 EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl);
-#endif

diff --git a/zfs/module/zfs/dsl_deadlist.c b/zfs/module/zfs/dsl_deadlist.c
index 10846a3..9827eb1 100644
--- a/zfs/module/zfs/dsl_deadlist.c
+++ b/zfs/module/zfs/dsl_deadlist.c

@@ -20,16 +20,15 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
-#include <sys/dsl_dataset.h>
 #include <sys/dmu.h>
-#include <sys/refcount.h>
 #include <sys/zap.h>
 #include <sys/zfs_context.h>
 #include <sys/dsl_pool.h>
+#include <sys/dsl_dataset.h>
 
 /*
  * Deadlist concurrency:
@@ -51,13 +50,83 @@
  * provides its own locking, and dl_oldfmt is immutable.
  */
 
+/*
+ * Livelist Overview
+ * ================
+ *
+ * Livelists use the same 'deadlist_t' struct as deadlists and are also used
+ * to track blkptrs over the lifetime of a dataset. Livelists however, belong
+ * to clones and track the blkptrs that are clone-specific (were born after
+ * the clone's creation). The exception is embedded block pointers which are
+ * not included in livelists because they do not need to be freed.
+ *
+ * When it comes time to delete the clone, the livelist provides a quick
+ * reference as to what needs to be freed. For this reason, livelists also track
+ * when clone-specific blkptrs are freed before deletion to prevent double
+ * frees. Each blkptr in a livelist is marked as a FREE or an ALLOC and the
+ * deletion algorithm iterates backwards over the livelist, matching
+ * FREE/ALLOC pairs and then freeing those ALLOCs which remain. livelists
+ * are also updated in the case when blkptrs are remapped: the old version
+ * of the blkptr is cancelled out with a FREE and the new version is tracked
+ * with an ALLOC.
+ *
+ * To bound the amount of memory required for deletion, livelists over a
+ * certain size are spread over multiple entries. Entries are grouped by
+ * birth txg so we can be sure the ALLOC/FREE pair for a given blkptr will
+ * be in the same entry. This allows us to delete livelists incrementally
+ * over multiple syncs, one entry at a time.
+ *
+ * During the lifetime of the clone, livelists can get extremely large.
+ * Their size is managed by periodic condensing (preemptively cancelling out
+ * FREE/ALLOC pairs). Livelists are disabled when a clone is promoted or when
+ * the shared space between the clone and its origin is so small that it
+ * doesn't make sense to use livelists anymore.
+ */
+
+/*
+ * The threshold sublist size at which we create a new sub-livelist for the
+ * next txg. However, since blkptrs of the same transaction group must be in
+ * the same sub-list, the actual sublist size may exceed this. When picking the
+ * size we had to balance the fact that larger sublists mean fewer sublists
+ * (decreasing the cost of insertion) against the consideration that sublists
+ * will be loaded into memory and shouldn't take up an inordinate amount of
+ * space. We settled on ~500000 entries, corresponding to roughly 128M.
+ */
+unsigned long zfs_livelist_max_entries = 500000;
+
+/*
+ * We can approximate how much of a performance gain a livelist will give us
+ * based on the percentage of blocks shared between the clone and its origin.
+ * 0 percent shared means that the clone has completely diverged and that the
+ * old method is maximally effective: every read from the block tree will
+ * result in lots of frees. Livelists give us gains when they track blocks
+ * scattered across the tree, when one read in the old method might only
+ * result in a few frees. Once the clone has been overwritten enough,
+ * writes are no longer sparse and we'll no longer get much of a benefit from
+ * tracking them with a livelist. We chose a lower limit of 75 percent shared
+ * (25 percent overwritten). This means that 1/4 of all block pointers will be
+ * freed (e.g. each read frees 256, out of a max of 1024) so we expect livelists
+ * to make deletion 4x faster. Once the amount of shared space drops below this
+ * threshold, the clone will revert to the old deletion method.
+ */
+int zfs_livelist_min_percent_shared = 75;
+
 static int
 dsl_deadlist_compare(const void *arg1, const void *arg2)
 {
-	const dsl_deadlist_entry_t *dle1 = (const dsl_deadlist_entry_t *)arg1;
-	const dsl_deadlist_entry_t *dle2 = (const dsl_deadlist_entry_t *)arg2;
+	const dsl_deadlist_entry_t *dle1 = arg1;
+	const dsl_deadlist_entry_t *dle2 = arg2;
 
-	return (AVL_CMP(dle1->dle_mintxg, dle2->dle_mintxg));
+	return (TREE_CMP(dle1->dle_mintxg, dle2->dle_mintxg));
+}
+
+static int
+dsl_deadlist_cache_compare(const void *arg1, const void *arg2)
+{
+	const dsl_deadlist_cache_entry_t *dlce1 = arg1;
+	const dsl_deadlist_cache_entry_t *dlce2 = arg2;
+
+	return (TREE_CMP(dlce1->dlce_mintxg, dlce2->dlce_mintxg));
 }
 
 static void
@@ -65,10 +134,28 @@
 {
 	zap_cursor_t zc;
 	zap_attribute_t za;
+	int error;
 
 	ASSERT(MUTEX_HELD(&dl->dl_lock));
 
 	ASSERT(!dl->dl_oldfmt);
+	if (dl->dl_havecache) {
+		/*
+		 * After loading the tree, the caller may modify the tree,
+		 * e.g. to add or remove nodes, or to make a node no longer
+		 * refer to the empty_bpobj.  These changes would make the
+		 * dl_cache incorrect.  Therefore we discard the cache here,
+		 * so that it can't become incorrect.
+		 */
+		dsl_deadlist_cache_entry_t *dlce;
+		void *cookie = NULL;
+		while ((dlce = avl_destroy_nodes(&dl->dl_cache, &cookie))
+		    != NULL) {
+			kmem_free(dlce, sizeof (*dlce));
+		}
+		avl_destroy(&dl->dl_cache);
+		dl->dl_havecache = B_FALSE;
+	}
 	if (dl->dl_havetree)
 		return;
 
@@ -76,18 +163,138 @@
 	    sizeof (dsl_deadlist_entry_t),
 	    offsetof(dsl_deadlist_entry_t, dle_node));
 	for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
-	    zap_cursor_retrieve(&zc, &za) == 0;
+	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
 		dle->dle_mintxg = zfs_strtonum(za.za_name, NULL);
-		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
-		    za.za_first_integer));
+
+		/*
+		 * Prefetch all the bpobj's so that we do that i/o
+		 * in parallel.  Then open them all in a second pass.
+		 */
+		dle->dle_bpobj.bpo_object = za.za_first_integer;
+		dmu_prefetch(dl->dl_os, dle->dle_bpobj.bpo_object,
+		    0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+
 		avl_add(&dl->dl_tree, dle);
 	}
+	VERIFY3U(error, ==, ENOENT);
 	zap_cursor_fini(&zc);
+
+	for (dsl_deadlist_entry_t *dle = avl_first(&dl->dl_tree);
+	    dle != NULL; dle = AVL_NEXT(&dl->dl_tree, dle)) {
+		VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os,
+		    dle->dle_bpobj.bpo_object));
+	}
 	dl->dl_havetree = B_TRUE;
 }
 
+/*
+ * Load only the non-empty bpobj's into the dl_cache.  The cache is an analog
+ * of the dl_tree, but contains only non-empty_bpobj nodes from the ZAP. It
+ * is used only for gathering space statistics.  The dl_cache has two
+ * advantages over the dl_tree:
+ *
+ * 1. Loading the dl_cache is ~5x faster than loading the dl_tree (if it's
+ * mostly empty_bpobj's), due to less CPU overhead to open the empty_bpobj
+ * many times and to inquire about its (zero) space stats many times.
+ *
+ * 2. The dl_cache uses less memory than the dl_tree.  We only need to load
+ * the dl_tree of snapshots when deleting a snapshot, after which we free the
+ * dl_tree with dsl_deadlist_discard_tree
+ */
+static void
+dsl_deadlist_load_cache(dsl_deadlist_t *dl)
+{
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	int error;
+
+	ASSERT(MUTEX_HELD(&dl->dl_lock));
+
+	ASSERT(!dl->dl_oldfmt);
+	if (dl->dl_havecache)
+		return;
+
+	uint64_t empty_bpobj = dmu_objset_pool(dl->dl_os)->dp_empty_bpobj;
+
+	avl_create(&dl->dl_cache, dsl_deadlist_cache_compare,
+	    sizeof (dsl_deadlist_cache_entry_t),
+	    offsetof(dsl_deadlist_cache_entry_t, dlce_node));
+	for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
+	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
+	    zap_cursor_advance(&zc)) {
+		if (za.za_first_integer == empty_bpobj)
+			continue;
+		dsl_deadlist_cache_entry_t *dlce =
+		    kmem_zalloc(sizeof (*dlce), KM_SLEEP);
+		dlce->dlce_mintxg = zfs_strtonum(za.za_name, NULL);
+
+		/*
+		 * Prefetch all the bpobj's so that we do that i/o
+		 * in parallel.  Then open them all in a second pass.
+		 */
+		dlce->dlce_bpobj = za.za_first_integer;
+		dmu_prefetch(dl->dl_os, dlce->dlce_bpobj,
+		    0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+		avl_add(&dl->dl_cache, dlce);
+	}
+	VERIFY3U(error, ==, ENOENT);
+	zap_cursor_fini(&zc);
+
+	for (dsl_deadlist_cache_entry_t *dlce = avl_first(&dl->dl_cache);
+	    dlce != NULL; dlce = AVL_NEXT(&dl->dl_cache, dlce)) {
+		bpobj_t bpo;
+		VERIFY0(bpobj_open(&bpo, dl->dl_os, dlce->dlce_bpobj));
+
+		VERIFY0(bpobj_space(&bpo,
+		    &dlce->dlce_bytes, &dlce->dlce_comp, &dlce->dlce_uncomp));
+		bpobj_close(&bpo);
+	}
+	dl->dl_havecache = B_TRUE;
+}
+
+/*
+ * Discard the tree to save memory.
+ */
+void
+dsl_deadlist_discard_tree(dsl_deadlist_t *dl)
+{
+	mutex_enter(&dl->dl_lock);
+
+	if (!dl->dl_havetree) {
+		mutex_exit(&dl->dl_lock);
+		return;
+	}
+	dsl_deadlist_entry_t *dle;
+	void *cookie = NULL;
+	while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie)) != NULL) {
+		bpobj_close(&dle->dle_bpobj);
+		kmem_free(dle, sizeof (*dle));
+	}
+	avl_destroy(&dl->dl_tree);
+
+	dl->dl_havetree = B_FALSE;
+	mutex_exit(&dl->dl_lock);
+}
+
+void
+dsl_deadlist_iterate(dsl_deadlist_t *dl, deadlist_iter_t func, void *args)
+{
+	dsl_deadlist_entry_t *dle;
+
+	ASSERT(dsl_deadlist_is_open(dl));
+
+	mutex_enter(&dl->dl_lock);
+	dsl_deadlist_load_tree(dl);
+	mutex_exit(&dl->dl_lock);
+	for (dle = avl_first(&dl->dl_tree); dle != NULL;
+	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
+		if (func(args, dle) != 0)
+			break;
+	}
+}
+
 void
 dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object)
 {
@@ -98,19 +305,20 @@
 	mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL);
 	dl->dl_os = os;
 	dl->dl_object = object;
-	VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
+	VERIFY0(dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
 	dmu_object_info_from_db(dl->dl_dbuf, &doi);
 	if (doi.doi_type == DMU_OT_BPOBJ) {
 		dmu_buf_rele(dl->dl_dbuf, dl);
 		dl->dl_dbuf = NULL;
 		dl->dl_oldfmt = B_TRUE;
-		VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object));
+		VERIFY0(bpobj_open(&dl->dl_bpobj, os, object));
 		return;
 	}
 
 	dl->dl_oldfmt = B_FALSE;
 	dl->dl_phys = dl->dl_dbuf->db_data;
 	dl->dl_havetree = B_FALSE;
+	dl->dl_havecache = B_FALSE;
 }
 
 boolean_t
@@ -122,9 +330,6 @@
 void
 dsl_deadlist_close(dsl_deadlist_t *dl)
 {
-	void *cookie = NULL;
-	dsl_deadlist_entry_t *dle;
-
 	ASSERT(dsl_deadlist_is_open(dl));
 	mutex_destroy(&dl->dl_lock);
 
@@ -137,6 +342,8 @@
 	}
 
 	if (dl->dl_havetree) {
+		dsl_deadlist_entry_t *dle;
+		void *cookie = NULL;
 		while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie))
 		    != NULL) {
 			bpobj_close(&dle->dle_bpobj);
@@ -144,6 +351,15 @@
 		}
 		avl_destroy(&dl->dl_tree);
 	}
+	if (dl->dl_havecache) {
+		dsl_deadlist_cache_entry_t *dlce;
+		void *cookie = NULL;
+		while ((dlce = avl_destroy_nodes(&dl->dl_cache, &cookie))
+		    != NULL) {
+			kmem_free(dlce, sizeof (*dlce));
+		}
+		avl_destroy(&dl->dl_cache);
+	}
 	dmu_buf_rele(dl->dl_dbuf, dl);
 	dl->dl_dbuf = NULL;
 	dl->dl_phys = NULL;
@@ -166,15 +382,16 @@
 	dmu_object_info_t doi;
 	zap_cursor_t zc;
 	zap_attribute_t za;
+	int error;
 
-	VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi));
+	VERIFY0(dmu_object_info(os, dlobj, &doi));
 	if (doi.doi_type == DMU_OT_BPOBJ) {
 		bpobj_free(os, dlobj, tx);
 		return;
 	}
 
 	for (zap_cursor_init(&zc, os, dlobj);
-	    zap_cursor_retrieve(&zc, &za) == 0;
+	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		uint64_t obj = za.za_first_integer;
 		if (obj == dmu_objset_pool(os)->dp_empty_bpobj)
@@ -182,13 +399,14 @@
 		else
 			bpobj_free(os, obj, tx);
 	}
+	VERIFY3U(error, ==, ENOENT);
 	zap_cursor_fini(&zc);
-	VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
+	VERIFY0(dmu_object_free(os, dlobj, tx));
 }
 
 static void
 dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
-    const blkptr_t *bp, dmu_tx_t *tx)
+    const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
 {
 	ASSERT(MUTEX_HELD(&dl->dl_lock));
 	if (dle->dle_bpobj.bpo_object ==
@@ -196,11 +414,11 @@
 		uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
 		bpobj_close(&dle->dle_bpobj);
 		bpobj_decr_empty(dl->dl_os, tx);
-		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
-		VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
+		VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
+		VERIFY0(zap_update_int_key(dl->dl_os, dl->dl_object,
 		    dle->dle_mintxg, obj, tx));
 	}
-	bpobj_enqueue(&dle->dle_bpobj, bp, tx);
+	bpobj_enqueue(&dle->dle_bpobj, bp, bp_freed, tx);
 }
 
 static void
@@ -214,21 +432,34 @@
 	} else {
 		bpobj_close(&dle->dle_bpobj);
 		bpobj_decr_empty(dl->dl_os, tx);
-		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
-		VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
+		VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
+		VERIFY0(zap_update_int_key(dl->dl_os, dl->dl_object,
 		    dle->dle_mintxg, obj, tx));
 	}
 }
 
+/*
+ * Prefetch metadata required for dle_enqueue_subobj().
+ */
+static void
+dle_prefetch_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
+    uint64_t obj)
+{
+	if (dle->dle_bpobj.bpo_object !=
+	    dmu_objset_pool(dl->dl_os)->dp_empty_bpobj)
+		bpobj_prefetch_subobj(&dle->dle_bpobj, obj);
+}
+
 void
-dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
+dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
 {
 	dsl_deadlist_entry_t dle_tofind;
 	dsl_deadlist_entry_t *dle;
 	avl_index_t where;
 
 	if (dl->dl_oldfmt) {
-		bpobj_enqueue(&dl->dl_bpobj, bp, tx);
+		bpobj_enqueue(&dl->dl_bpobj, bp, bp_freed, tx);
 		return;
 	}
 
@@ -236,10 +467,12 @@
 	dsl_deadlist_load_tree(dl);
 
 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
+
+	int sign = bp_freed ? -1 : +1;
 	dl->dl_phys->dl_used +=
-	    bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
-	dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
-	dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
+	    sign * bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
+	dl->dl_phys->dl_comp += sign * BP_GET_PSIZE(bp);
+	dl->dl_phys->dl_uncomp += sign * BP_GET_UCSIZE(bp);
 
 	dle_tofind.dle_mintxg = bp->blk_birth;
 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -255,10 +488,26 @@
 	}
 
 	ASSERT3P(dle, !=, NULL);
-	dle_enqueue(dl, dle, bp, tx);
+	dle_enqueue(dl, dle, bp, bp_freed, tx);
 	mutex_exit(&dl->dl_lock);
 }
 
+int
+dsl_deadlist_insert_alloc_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	dsl_deadlist_t *dl = arg;
+	dsl_deadlist_insert(dl, bp, B_FALSE, tx);
+	return (0);
+}
+
+int
+dsl_deadlist_insert_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	dsl_deadlist_t *dl = arg;
+	dsl_deadlist_insert(dl, bp, B_TRUE, tx);
+	return (0);
+}
+
 /*
  * Insert new key in deadlist, which must be > all current entries.
  * mintxg is not inclusive.
@@ -279,10 +528,10 @@
 	dsl_deadlist_load_tree(dl);
 
 	obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
-	VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
+	VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
 	avl_add(&dl->dl_tree, dle);
 
-	VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
+	VERIFY0(zap_add_int_key(dl->dl_os, dl->dl_object,
 	    mintxg, obj, tx));
 	mutex_exit(&dl->dl_lock);
 }
@@ -298,12 +547,12 @@
 
 	if (dl->dl_oldfmt)
 		return;
-
 	mutex_enter(&dl->dl_lock);
 	dsl_deadlist_load_tree(dl);
 
 	dle_tofind.dle_mintxg = mintxg;
 	dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
+	ASSERT3P(dle, !=, NULL);
 	dle_prev = AVL_PREV(&dl->dl_tree, dle);
 
 	dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx);
@@ -312,11 +561,115 @@
 	bpobj_close(&dle->dle_bpobj);
 	kmem_free(dle, sizeof (*dle));
 
-	VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
+	VERIFY0(zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
 	mutex_exit(&dl->dl_lock);
 }
 
 /*
+ * Remove a deadlist entry and all of its contents by removing the entry from
+ * the deadlist's avl tree, freeing the entry's bpobj and adjusting the
+ * deadlist's space accounting accordingly.
+ */
+void
+dsl_deadlist_remove_entry(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
+{
+	uint64_t used, comp, uncomp;
+	dsl_deadlist_entry_t dle_tofind;
+	dsl_deadlist_entry_t *dle;
+	objset_t *os = dl->dl_os;
+
+	if (dl->dl_oldfmt)
+		return;
+
+	mutex_enter(&dl->dl_lock);
+	dsl_deadlist_load_tree(dl);
+
+	dle_tofind.dle_mintxg = mintxg;
+	dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
+	VERIFY3P(dle, !=, NULL);
+
+	avl_remove(&dl->dl_tree, dle);
+	VERIFY0(zap_remove_int(os, dl->dl_object, mintxg, tx));
+	VERIFY0(bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp));
+	dmu_buf_will_dirty(dl->dl_dbuf, tx);
+	dl->dl_phys->dl_used -= used;
+	dl->dl_phys->dl_comp -= comp;
+	dl->dl_phys->dl_uncomp -= uncomp;
+	if (dle->dle_bpobj.bpo_object == dmu_objset_pool(os)->dp_empty_bpobj) {
+		bpobj_decr_empty(os, tx);
+	} else {
+		bpobj_free(os, dle->dle_bpobj.bpo_object, tx);
+	}
+	bpobj_close(&dle->dle_bpobj);
+	kmem_free(dle, sizeof (*dle));
+	mutex_exit(&dl->dl_lock);
+}
+
+/*
+ * Clear out the contents of a deadlist_entry by freeing its bpobj,
+ * replacing it with an empty bpobj and adjusting the deadlist's
+ * space accounting
+ */
+void
+dsl_deadlist_clear_entry(dsl_deadlist_entry_t *dle, dsl_deadlist_t *dl,
+    dmu_tx_t *tx)
+{
+	uint64_t new_obj, used, comp, uncomp;
+	objset_t *os = dl->dl_os;
+
+	mutex_enter(&dl->dl_lock);
+	VERIFY0(zap_remove_int(os, dl->dl_object, dle->dle_mintxg, tx));
+	VERIFY0(bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp));
+	dmu_buf_will_dirty(dl->dl_dbuf, tx);
+	dl->dl_phys->dl_used -= used;
+	dl->dl_phys->dl_comp -= comp;
+	dl->dl_phys->dl_uncomp -= uncomp;
+	if (dle->dle_bpobj.bpo_object == dmu_objset_pool(os)->dp_empty_bpobj)
+		bpobj_decr_empty(os, tx);
+	else
+		bpobj_free(os, dle->dle_bpobj.bpo_object, tx);
+	bpobj_close(&dle->dle_bpobj);
+	new_obj = bpobj_alloc_empty(os, SPA_OLD_MAXBLOCKSIZE, tx);
+	VERIFY0(bpobj_open(&dle->dle_bpobj, os, new_obj));
+	VERIFY0(zap_add_int_key(os, dl->dl_object, dle->dle_mintxg,
+	    new_obj, tx));
+	ASSERT(bpobj_is_empty(&dle->dle_bpobj));
+	mutex_exit(&dl->dl_lock);
+}
+
+/*
+ * Return the first entry in deadlist's avl tree
+ */
+dsl_deadlist_entry_t *
+dsl_deadlist_first(dsl_deadlist_t *dl)
+{
+	dsl_deadlist_entry_t *dle;
+
+	mutex_enter(&dl->dl_lock);
+	dsl_deadlist_load_tree(dl);
+	dle = avl_first(&dl->dl_tree);
+	mutex_exit(&dl->dl_lock);
+
+	return (dle);
+}
+
+/*
+ * Return the last entry in deadlist's avl tree
+ */
+dsl_deadlist_entry_t *
+dsl_deadlist_last(dsl_deadlist_t *dl)
+{
+	dsl_deadlist_entry_t *dle;
+
+	mutex_enter(&dl->dl_lock);
+	dsl_deadlist_load_tree(dl);
+	dle = avl_last(&dl->dl_tree);
+	mutex_exit(&dl->dl_lock);
+
+	return (dle);
+}
+
+/*
  * Walk ds's snapshots to regenerate generate ZAP & AVL.
  */
 static void
@@ -334,7 +687,7 @@
 
 	while (mrs_obj != 0) {
 		dsl_dataset_t *ds;
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
+		VERIFY0(dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
 		dsl_deadlist_add_key(&dl,
 		    dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
 		mrs_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
@@ -368,7 +721,7 @@
 			break;
 
 		obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
-		VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
+		VERIFY0(zap_add_int_key(dl->dl_os, newobj,
 		    dle->dle_mintxg, obj, tx));
 	}
 	mutex_exit(&dl->dl_lock);
@@ -381,7 +734,7 @@
 {
 	ASSERT(dsl_deadlist_is_open(dl));
 	if (dl->dl_oldfmt) {
-		VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj,
+		VERIFY0(bpobj_space(&dl->dl_bpobj,
 		    usedp, compp, uncompp));
 		return;
 	}
@@ -397,18 +750,18 @@
  * return space used in the range (mintxg, maxtxg].
  * Includes maxtxg, does not include mintxg.
  * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
- * larger than any bp in the deadlist (eg. UINT64_MAX)).
+ * UINT64_MAX).
  */
 void
 dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
-	dsl_deadlist_entry_t *dle;
-	dsl_deadlist_entry_t dle_tofind;
+	dsl_deadlist_cache_entry_t *dlce;
+	dsl_deadlist_cache_entry_t dlce_tofind;
 	avl_index_t where;
 
 	if (dl->dl_oldfmt) {
-		VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj,
+		VERIFY0(bpobj_space_range(&dl->dl_bpobj,
 		    mintxg, maxtxg, usedp, compp, uncompp));
 		return;
 	}
@@ -416,27 +769,25 @@
 	*usedp = *compp = *uncompp = 0;
 
 	mutex_enter(&dl->dl_lock);
-	dsl_deadlist_load_tree(dl);
-	dle_tofind.dle_mintxg = mintxg;
-	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
+	dsl_deadlist_load_cache(dl);
+	dlce_tofind.dlce_mintxg = mintxg;
+	dlce = avl_find(&dl->dl_cache, &dlce_tofind, &where);
+
 	/*
-	 * If we don't find this mintxg, there shouldn't be anything
-	 * after it either.
+	 * If this mintxg doesn't exist, it may be an empty_bpobj which
+	 * is omitted from the sparse tree.  Start at the next non-empty
+	 * entry.
 	 */
-	ASSERT(dle != NULL ||
-	    avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
+	if (dlce == NULL)
+		dlce = avl_nearest(&dl->dl_cache, where, AVL_AFTER);
 
-	for (; dle && dle->dle_mintxg < maxtxg;
-	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
-		uint64_t used, comp, uncomp;
-
-		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
-		    &used, &comp, &uncomp));
-
-		*usedp += used;
-		*compp += comp;
-		*uncompp += uncomp;
+	for (; dlce && dlce->dlce_mintxg < maxtxg;
+	    dlce = AVL_NEXT(&dl->dl_tree, dlce)) {
+		*usedp += dlce->dlce_bytes;
+		*compp += dlce->dlce_comp;
+		*uncompp += dlce->dlce_uncomp;
 	}
+
 	mutex_exit(&dl->dl_lock);
 }
 
@@ -452,8 +803,8 @@
 
 	ASSERT(MUTEX_HELD(&dl->dl_lock));
 
-	VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
-	VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
+	VERIFY0(bpobj_open(&bpo, dl->dl_os, obj));
+	VERIFY0(bpobj_space(&bpo, &used, &comp, &uncomp));
 	bpobj_close(&bpo);
 
 	dsl_deadlist_load_tree(dl);
@@ -470,11 +821,33 @@
 	dle_enqueue_subobj(dl, dle, obj, tx);
 }
 
+/*
+ * Prefetch metadata required for dsl_deadlist_insert_bpobj().
+ */
+static void
+dsl_deadlist_prefetch_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth)
+{
+	dsl_deadlist_entry_t dle_tofind;
+	dsl_deadlist_entry_t *dle;
+	avl_index_t where;
+
+	ASSERT(MUTEX_HELD(&dl->dl_lock));
+
+	dsl_deadlist_load_tree(dl);
+
+	dle_tofind.dle_mintxg = birth;
+	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
+	if (dle == NULL)
+		dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
+	dle_prefetch_subobj(dl, dle, obj);
+}
+
 static int
-dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
 {
 	dsl_deadlist_t *dl = arg;
-	dsl_deadlist_insert(dl, bp, tx);
+	dsl_deadlist_insert(dl, bp, bp_freed, tx);
 	return (0);
 }
 
@@ -485,50 +858,75 @@
 void
 dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
 {
-	zap_cursor_t zc;
-	zap_attribute_t za;
+	zap_cursor_t zc, pzc;
+	zap_attribute_t *za, *pza;
 	dmu_buf_t *bonus;
 	dsl_deadlist_phys_t *dlp;
 	dmu_object_info_t doi;
+	int error, perror, i;
 
-	VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi));
+	VERIFY0(dmu_object_info(dl->dl_os, obj, &doi));
 	if (doi.doi_type == DMU_OT_BPOBJ) {
 		bpobj_t bpo;
-		VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
-		VERIFY3U(0, ==, bpobj_iterate(&bpo,
-		    dsl_deadlist_insert_cb, dl, tx));
+		VERIFY0(bpobj_open(&bpo, dl->dl_os, obj));
+		VERIFY0(bpobj_iterate(&bpo, dsl_deadlist_insert_cb, dl, tx));
 		bpobj_close(&bpo);
 		return;
 	}
 
-	mutex_enter(&dl->dl_lock);
-	for (zap_cursor_init(&zc, dl->dl_os, obj);
-	    zap_cursor_retrieve(&zc, &za) == 0;
-	    zap_cursor_advance(&zc)) {
-		uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
-		dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
-		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
-	}
-	zap_cursor_fini(&zc);
+	za = kmem_alloc(sizeof (*za), KM_SLEEP);
+	pza = kmem_alloc(sizeof (*pza), KM_SLEEP);
 
-	VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
+	mutex_enter(&dl->dl_lock);
+	/*
+	 * Prefetch up to 128 deadlists first and then more as we progress.
+	 * The limit is a balance between ARC use and diminishing returns.
+	 */
+	for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0;
+	    (perror = zap_cursor_retrieve(&pzc, pza)) == 0 && i < 128;
+	    zap_cursor_advance(&pzc), i++) {
+		dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
+		    zfs_strtonum(pza->za_name, NULL));
+	}
+	for (zap_cursor_init(&zc, dl->dl_os, obj);
+	    (error = zap_cursor_retrieve(&zc, za)) == 0;
+	    zap_cursor_advance(&zc)) {
+		uint64_t mintxg = zfs_strtonum(za->za_name, NULL);
+		dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx);
+		VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
+		if (perror == 0) {
+			dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
+			    zfs_strtonum(pza->za_name, NULL));
+			zap_cursor_advance(&pzc);
+			perror = zap_cursor_retrieve(&pzc, pza);
+		}
+	}
+	VERIFY3U(error, ==, ENOENT);
+	zap_cursor_fini(&zc);
+	zap_cursor_fini(&pzc);
+
+	VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
 	dlp = bonus->db_data;
 	dmu_buf_will_dirty(bonus, tx);
 	bzero(dlp, sizeof (*dlp));
 	dmu_buf_rele(bonus, FTAG);
 	mutex_exit(&dl->dl_lock);
+
+	kmem_free(za, sizeof (*za));
+	kmem_free(pza, sizeof (*pza));
 }
 
 /*
- * Remove entries on dl that are >= mintxg, and put them on the bpobj.
+ * Remove entries on dl that are born > mintxg, and put them on the bpobj.
  */
 void
 dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
     dmu_tx_t *tx)
 {
 	dsl_deadlist_entry_t dle_tofind;
-	dsl_deadlist_entry_t *dle;
+	dsl_deadlist_entry_t *dle, *pdle;
 	avl_index_t where;
+	int i;
 
 	ASSERT(!dl->dl_oldfmt);
 
@@ -540,13 +938,25 @@
 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
 	if (dle == NULL)
 		dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
+	/*
+	 * Prefetch up to 128 deadlists first and then more as we progress.
+	 * The limit is a balance between ARC use and diminishing returns.
+	 */
+	for (pdle = dle, i = 0; pdle && i < 128; i++) {
+		bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
+		pdle = AVL_NEXT(&dl->dl_tree, pdle);
+	}
 	while (dle) {
 		uint64_t used, comp, uncomp;
 		dsl_deadlist_entry_t *dle_next;
 
 		bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
+		if (pdle) {
+			bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
+			pdle = AVL_NEXT(&dl->dl_tree, pdle);
+		}
 
-		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
+		VERIFY0(bpobj_space(&dle->dle_bpobj,
 		    &used, &comp, &uncomp));
 		ASSERT3U(dl->dl_phys->dl_used, >=, used);
 		ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
@@ -555,7 +965,7 @@
 		dl->dl_phys->dl_comp -= comp;
 		dl->dl_phys->dl_uncomp -= uncomp;
 
-		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
+		VERIFY0(zap_remove_int(dl->dl_os, dl->dl_object,
 		    dle->dle_mintxg, tx));
 
 		dle_next = AVL_NEXT(&dl->dl_tree, dle);
@@ -566,3 +976,142 @@
 	}
 	mutex_exit(&dl->dl_lock);
 }
+
+typedef struct livelist_entry {
+	blkptr_t le_bp;
+	uint32_t le_refcnt;
+	avl_node_t le_node;
+} livelist_entry_t;
+
+static int
+livelist_compare(const void *larg, const void *rarg)
+{
+	const blkptr_t *l = &((livelist_entry_t *)larg)->le_bp;
+	const blkptr_t *r = &((livelist_entry_t *)rarg)->le_bp;
+
+	/* Sort them according to dva[0] */
+	uint64_t l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
+	uint64_t r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
+
+	if (l_dva0_vdev != r_dva0_vdev)
+		return (TREE_CMP(l_dva0_vdev, r_dva0_vdev));
+
+	/* if vdevs are equal, sort by offsets. */
+	uint64_t l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
+	uint64_t r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
+	if (l_dva0_offset == r_dva0_offset)
+		ASSERT3U(l->blk_birth, ==, r->blk_birth);
+	return (TREE_CMP(l_dva0_offset, r_dva0_offset));
+}
+
+struct livelist_iter_arg {
+	avl_tree_t *avl;
+	bplist_t *to_free;
+	zthr_t *t;
+};
+
+/*
+ * Expects an AVL tree which is incrementally filled will FREE blkptrs
+ * and used to match up ALLOC/FREE pairs. ALLOC'd blkptrs without a
+ * corresponding FREE are stored in the supplied bplist.
+ *
+ * Note that multiple FREE and ALLOC entries for the same blkptr may
+ * be encountered when dedup is involved. For this reason we keep a
+ * refcount for all the FREE entries of each blkptr and ensure that
+ * each of those FREE entries has a corresponding ALLOC preceding it.
+ */
+static int
+dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
+{
+	struct livelist_iter_arg *lia = arg;
+	avl_tree_t *avl = lia->avl;
+	bplist_t *to_free = lia->to_free;
+	zthr_t *t = lia->t;
+	ASSERT(tx == NULL);
+
+	if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t)))
+		return (SET_ERROR(EINTR));
+
+	livelist_entry_t node;
+	node.le_bp = *bp;
+	livelist_entry_t *found = avl_find(avl, &node, NULL);
+	if (bp_freed) {
+		if (found == NULL) {
+			/* first free entry for this blkptr */
+			livelist_entry_t *e =
+			    kmem_alloc(sizeof (livelist_entry_t), KM_SLEEP);
+			e->le_bp = *bp;
+			e->le_refcnt = 1;
+			avl_add(avl, e);
+		} else {
+			/* dedup block free */
+			ASSERT(BP_GET_DEDUP(bp));
+			ASSERT3U(BP_GET_CHECKSUM(bp), ==,
+			    BP_GET_CHECKSUM(&found->le_bp));
+			ASSERT3U(found->le_refcnt + 1, >, found->le_refcnt);
+			found->le_refcnt++;
+		}
+	} else {
+		if (found == NULL) {
+			/* block is currently marked as allocated */
+			bplist_append(to_free, bp);
+		} else {
+			/* alloc matches a free entry */
+			ASSERT3U(found->le_refcnt, !=, 0);
+			found->le_refcnt--;
+			if (found->le_refcnt == 0) {
+				/* all tracked free pairs have been matched */
+				avl_remove(avl, found);
+				kmem_free(found, sizeof (livelist_entry_t));
+			} else {
+				/*
+				 * This is definitely a deduped blkptr so
+				 * let's validate it.
+				 */
+				ASSERT(BP_GET_DEDUP(bp));
+				ASSERT3U(BP_GET_CHECKSUM(bp), ==,
+				    BP_GET_CHECKSUM(&found->le_bp));
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * Accepts a bpobj and a bplist. Will insert into the bplist the blkptrs
+ * which have an ALLOC entry but no matching FREE
+ */
+int
+dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t,
+    uint64_t *size)
+{
+	avl_tree_t avl;
+	avl_create(&avl, livelist_compare, sizeof (livelist_entry_t),
+	    offsetof(livelist_entry_t, le_node));
+
+	/* process the sublist */
+	struct livelist_iter_arg arg = {
+	    .avl = &avl,
+	    .to_free = to_free,
+	    .t = t
+	};
+	int err = bpobj_iterate_nofree(bpobj, dsl_livelist_iterate, &arg, size);
+	VERIFY(err != 0 || avl_numnodes(&avl) == 0);
+
+	void *cookie = NULL;
+	livelist_entry_t *le = NULL;
+	while ((le = avl_destroy_nodes(&avl, &cookie)) != NULL) {
+		kmem_free(le, sizeof (livelist_entry_t));
+	}
+	avl_destroy(&avl);
+	return (err);
+}
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, max_entries, ULONG, ZMOD_RW,
+	"Size to start the next sub-livelist in a livelist");
+
+ZFS_MODULE_PARAM(zfs_livelist, zfs_livelist_, min_percent_shared, INT, ZMOD_RW,
+	"Threshold at which livelist is disabled");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/dsl_deleg.c b/zfs/module/zfs/dsl_deleg.c
index cef460f..cf8a3c9 100644
--- a/zfs/module/zfs/dsl_deleg.c
+++ b/zfs/module/zfs/dsl_deleg.c

@@ -399,7 +399,7 @@
 
 	val = strcmp(node1->p_setname, node2->p_setname);
 
-	return (AVL_ISIGN(val));
+	return (TREE_ISIGN(val));
 }
 
 /*

diff --git a/zfs/module/zfs/dsl_destroy.c b/zfs/module/zfs/dsl_destroy.c
index ede54d9..b32929b 100644
--- a/zfs/module/zfs/dsl_destroy.c
+++ b/zfs/module/zfs/dsl_destroy.c

@@ -31,6 +31,7 @@
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_destroy.h>
+#include <sys/dsl_bookmark.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dir.h>
@@ -44,6 +45,9 @@
 #include <sys/dmu_impl.h>
 #include <sys/zvol.h>
 #include <sys/zcp.h>
+#include <sys/dsl_deadlist.h>
+#include <sys/zthr.h>
+#include <sys/spa_impl.h>
 
 int
 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
@@ -119,7 +123,7 @@
 };
 
 static int
-process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
 {
 	struct process_old_arg *poa = arg;
 	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
@@ -127,7 +131,7 @@
 	ASSERT(!BP_IS_HOLE(bp));
 
 	if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
-		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
+		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx);
 		if (poa->ds_prev && !poa->after_branch_point &&
 		    bp->blk_birth >
 		    dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
@@ -181,70 +185,86 @@
 	    dsl_dataset_phys(ds_next)->ds_deadlist_obj);
 }
 
-struct removeclonesnode {
-	list_node_t link;
-	dsl_dataset_t *ds;
-};
+typedef struct remaining_clones_key {
+	dsl_dataset_t *rck_clone;
+	list_node_t rck_node;
+} remaining_clones_key_t;
+
+static remaining_clones_key_t *
+rck_alloc(dsl_dataset_t *clone)
+{
+	remaining_clones_key_t *rck = kmem_alloc(sizeof (*rck), KM_SLEEP);
+	rck->rck_clone = clone;
+	return (rck);
+}
 
 static void
-dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
+dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx,
+    list_t *stack, void *tag)
 {
-	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	list_t clones;
-	struct removeclonesnode *rcn;
+	objset_t *mos = dd->dd_pool->dp_meta_objset;
 
-	list_create(&clones, sizeof (struct removeclonesnode),
-	    offsetof(struct removeclonesnode, link));
+	/*
+	 * If it is the old version, dd_clones doesn't exist so we can't
+	 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
+	 * doesn't matter.
+	 */
+	if (dsl_dir_phys(dd)->dd_clones == 0)
+		return;
 
-	rcn = kmem_zalloc(sizeof (struct removeclonesnode), KM_SLEEP);
-	rcn->ds = ds;
-	list_insert_head(&clones, rcn);
+	zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
+	zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 
-	for (; rcn != NULL; rcn = list_next(&clones, rcn)) {
-		zap_cursor_t zc;
-		zap_attribute_t za;
-		/*
-		 * If it is the old version, dd_clones doesn't exist so we can't
-		 * find the clones, but dsl_deadlist_remove_key() is a no-op so
-		 * it doesn't matter.
-		 */
-		if (dsl_dir_phys(rcn->ds->ds_dir)->dd_clones == 0)
-			continue;
+	for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones);
+	    zap_cursor_retrieve(zc, za) == 0;
+	    zap_cursor_advance(zc)) {
+		dsl_dataset_t *clone;
 
-		for (zap_cursor_init(&zc, mos,
-		    dsl_dir_phys(rcn->ds->ds_dir)->dd_clones);
-		    zap_cursor_retrieve(&zc, &za) == 0;
-		    zap_cursor_advance(&zc)) {
-			dsl_dataset_t *clone;
+		VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
+		    za->za_first_integer, tag, &clone));
 
-			VERIFY0(dsl_dataset_hold_obj(rcn->ds->ds_dir->dd_pool,
-			    za.za_first_integer, FTAG, &clone));
-			if (clone->ds_dir->dd_origin_txg > mintxg) {
-				dsl_deadlist_remove_key(&clone->ds_deadlist,
-				    mintxg, tx);
-				if (dsl_dataset_remap_deadlist_exists(clone)) {
-					dsl_deadlist_remove_key(
-					    &clone->ds_remap_deadlist, mintxg,
-					    tx);
-				}
-				rcn = kmem_zalloc(
-				    sizeof (struct removeclonesnode), KM_SLEEP);
-				rcn->ds = clone;
-				list_insert_tail(&clones, rcn);
-			} else {
-				dsl_dataset_rele(clone, FTAG);
+		if (clone->ds_dir->dd_origin_txg > mintxg) {
+			dsl_deadlist_remove_key(&clone->ds_deadlist,
+			    mintxg, tx);
+
+			if (dsl_dataset_remap_deadlist_exists(clone)) {
+				dsl_deadlist_remove_key(
+				    &clone->ds_remap_deadlist, mintxg, tx);
 			}
+
+			list_insert_head(stack, rck_alloc(clone));
+		} else {
+			dsl_dataset_rele(clone, tag);
 		}
-		zap_cursor_fini(&zc);
+	}
+	zap_cursor_fini(zc);
+
+	kmem_free(za, sizeof (zap_attribute_t));
+	kmem_free(zc, sizeof (zap_cursor_t));
+}
+
+void
+dsl_dir_remove_clones_key(dsl_dir_t *top_dd, uint64_t mintxg, dmu_tx_t *tx)
+{
+	list_t stack;
+
+	list_create(&stack, sizeof (remaining_clones_key_t),
+	    offsetof(remaining_clones_key_t, rck_node));
+
+	dsl_dir_remove_clones_key_impl(top_dd, mintxg, tx, &stack, FTAG);
+	for (remaining_clones_key_t *rck = list_remove_head(&stack);
+	    rck != NULL; rck = list_remove_head(&stack)) {
+		dsl_dataset_t *clone = rck->rck_clone;
+		dsl_dir_t *clone_dir = clone->ds_dir;
+
+		kmem_free(rck, sizeof (*rck));
+
+		dsl_dir_remove_clones_key_impl(clone_dir, mintxg, tx,
+		    &stack, FTAG);
+		dsl_dataset_rele(clone, FTAG);
 	}
 
-	rcn = list_remove_head(&clones);
-	kmem_free(rcn, sizeof (struct removeclonesnode));
-	while ((rcn = list_remove_head(&clones)) != NULL) {
-		dsl_dataset_rele(rcn->ds, FTAG);
-		kmem_free(rcn, sizeof (struct removeclonesnode));
-	}
-	list_destroy(&clones);
+	list_destroy(&stack);
 }
 
 static void
@@ -301,19 +321,21 @@
 		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
-		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
+		spa_history_log_internal_ds(ds, "defer_destroy", tx, " ");
 		return;
 	}
 
 	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 
 	/* We need to log before removing it from the namespace. */
-	spa_history_log_internal_ds(ds, "destroy", tx, "");
+	spa_history_log_internal_ds(ds, "destroy", tx, " ");
 
 	dsl_scan_ds_destroyed(ds, tx);
 
 	obj = ds->ds_object;
 
+	boolean_t book_exists = dsl_bookmark_ds_destroyed(ds, tx);
+
 	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
 		if (dsl_dataset_feature_is_active(ds, f))
 			dsl_dataset_deactivate_feature(ds, f, tx);
@@ -391,6 +413,13 @@
 		/* Merge our deadlist into next's and free it. */
 		dsl_deadlist_merge(&ds_next->ds_deadlist,
 		    dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
+
+		/*
+		 * We are done with the deadlist tree (generated/used
+		 * by dsl_deadlist_move_bpobj() and dsl_deadlist_merge()).
+		 * Discard it to save memory.
+		 */
+		dsl_deadlist_discard_tree(&ds_next->ds_deadlist);
 	}
 
 	dsl_deadlist_close(&ds->ds_deadlist);
@@ -400,9 +429,11 @@
 
 	dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx);
 
-	/* Collapse range in clone heads */
-	dsl_dataset_remove_clones_key(ds,
-	    dsl_dataset_phys(ds)->ds_creation_txg, tx);
+	if (!book_exists) {
+		/* Collapse range in clone heads */
+		dsl_dir_remove_clones_key(ds->ds_dir,
+		    dsl_dataset_phys(ds)->ds_creation_txg, tx);
+	}
 
 	if (ds_next->ds_is_snapshot) {
 		dsl_dataset_t *ds_nextnext;
@@ -430,9 +461,13 @@
 		/* Collapse range in this head. */
 		dsl_dataset_t *hds;
 		VERIFY0(dsl_dataset_hold_obj(dp,
-		    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
-		dsl_deadlist_remove_key(&hds->ds_deadlist,
-		    dsl_dataset_phys(ds)->ds_creation_txg, tx);
+		    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,
+		    FTAG, &hds));
+		if (!book_exists) {
+			/* Collapse range in this head. */
+			dsl_deadlist_remove_key(&hds->ds_deadlist,
+			    dsl_dataset_phys(ds)->ds_creation_txg, tx);
+		}
 		if (dsl_dataset_remap_deadlist_exists(hds)) {
 			dsl_deadlist_remove_key(&hds->ds_remap_deadlist,
 			    dsl_dataset_phys(ds)->ds_creation_txg, tx);
@@ -505,7 +540,7 @@
 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 
 	if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
-		ASSERTV(uint64_t count);
+		uint64_t count __maybe_unused;
 		ASSERT0(zap_count(mos,
 		    dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
 		    count == 0);
@@ -565,26 +600,21 @@
 	/*
 	 * lzc_destroy_snaps() is documented to take an nvlist whose
 	 * values "don't matter".  We need to convert that nvlist to
-	 * one that we know can be converted to LUA. We also don't
-	 * care about any duplicate entries because the nvlist will
-	 * be converted to a LUA table which should take care of this.
+	 * one that we know can be converted to LUA.
 	 */
-	nvlist_t *snaps_normalized;
-	VERIFY0(nvlist_alloc(&snaps_normalized, 0, KM_SLEEP));
+	nvlist_t *snaps_normalized = fnvlist_alloc();
 	for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
 		fnvlist_add_boolean_value(snaps_normalized,
 		    nvpair_name(pair), B_TRUE);
 	}
 
-	nvlist_t *arg;
-	VERIFY0(nvlist_alloc(&arg, 0, KM_SLEEP));
+	nvlist_t *arg = fnvlist_alloc();
 	fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
 	fnvlist_free(snaps_normalized);
 	fnvlist_add_boolean_value(arg, "defer", defer);
 
-	nvlist_t *wrapper;
-	VERIFY0(nvlist_alloc(&wrapper, 0, KM_SLEEP));
+	nvlist_t *wrapper = fnvlist_alloc();
 	fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
 	fnvlist_free(arg);
 
@@ -619,13 +649,15 @@
 	    B_TRUE,
 	    0,
 	    zfs_lua_max_memlimit,
-	    nvlist_next_nvpair(wrapper, NULL), result);
+	    fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result);
 	if (error != 0) {
 		char *errorstr = NULL;
 		(void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
 		if (errorstr != NULL) {
-			zfs_dbgmsg(errorstr);
+			zfs_dbgmsg("%s", errorstr);
 		}
+		fnvlist_free(wrapper);
+		fnvlist_free(result);
 		return (error);
 	}
 	fnvlist_free(wrapper);
@@ -667,15 +699,16 @@
 	dmu_tx_t *tx;
 };
 
-/* ARGSUSED */
 static int
 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
+	(void) spa, (void) dnp;
 	struct killarg *ka = arg;
 	dmu_tx_t *tx = ka->tx;
 
-	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
+	    BP_IS_EMBEDDED(bp))
 		return (0);
 
 	if (zb->zb_level == ZB_ZIL_LEVEL) {
@@ -700,6 +733,10 @@
 {
 	struct killarg ka;
 
+	spa_history_log_internal_ds(ds, "destroy", tx,
+	    "(synchronous, mintxg=%llu)",
+	    (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg);
+
 	/*
 	 * Free everything that we point to (that's born after
 	 * the previous snapshot, if we are a clone)
@@ -730,6 +767,8 @@
 	if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
 		return (SET_ERROR(EBUSY));
 
+	ASSERT0(ds->ds_dir->dd_activity_waiters);
+
 	mos = ds->ds_dir->dd_pool->dp_meta_objset;
 
 	/*
@@ -826,6 +865,139 @@
 	dmu_object_free_zapified(mos, ddobj, tx);
 }
 
+static void
+dsl_clone_destroy_assert(dsl_dir_t *dd)
+{
+	uint64_t used, comp, uncomp;
+
+	ASSERT(dsl_dir_is_clone(dd));
+	dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp);
+
+	ASSERT3U(dsl_dir_phys(dd)->dd_used_bytes, ==, used);
+	ASSERT3U(dsl_dir_phys(dd)->dd_compressed_bytes, ==, comp);
+	/*
+	 * Greater than because we do not track embedded block pointers in
+	 * the livelist
+	 */
+	ASSERT3U(dsl_dir_phys(dd)->dd_uncompressed_bytes, >=, uncomp);
+
+	ASSERT(list_is_empty(&dd->dd_pending_allocs.bpl_list));
+	ASSERT(list_is_empty(&dd->dd_pending_frees.bpl_list));
+}
+
+/*
+ * Start the delete process for a clone. Free its zil, verify the space usage
+ * and queue the blkptrs for deletion by adding the livelist to the pool-wide
+ * delete queue.
+ */
+static void
+dsl_async_clone_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	uint64_t zap_obj, to_delete, used, comp, uncomp;
+	objset_t *os;
+	dsl_dir_t *dd = ds->ds_dir;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	objset_t *mos = dp->dp_meta_objset;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+
+	uint64_t mintxg = 0;
+	dsl_deadlist_entry_t *dle = dsl_deadlist_first(&dd->dd_livelist);
+	if (dle != NULL)
+		mintxg = dle->dle_mintxg;
+
+	spa_history_log_internal_ds(ds, "destroy", tx,
+	    "(livelist, mintxg=%llu)", (long long)mintxg);
+
+	/* Check that the clone is in a correct state to be deleted */
+	dsl_clone_destroy_assert(dd);
+
+	/* Destroy the zil */
+	zil_destroy_sync(dmu_objset_zil(os), tx);
+
+	VERIFY0(zap_lookup(mos, dd->dd_object,
+	    DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &to_delete));
+	/* Initialize deleted_clones entry to track livelists to cleanup */
+	int error = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
+	if (error == ENOENT) {
+		zap_obj = zap_create(mos, DMU_OTN_ZAP_METADATA,
+		    DMU_OT_NONE, 0, tx);
+		VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1,
+		    &(zap_obj), tx));
+		spa->spa_livelists_to_delete = zap_obj;
+	} else if (error != 0) {
+		zfs_panic_recover("zfs: error %d was returned while looking "
+		    "up DMU_POOL_DELETED_CLONES in the zap", error);
+		return;
+	}
+	VERIFY0(zap_add_int(mos, zap_obj, to_delete, tx));
+
+	/* Clone is no longer using space, now tracked by dp_free_dir */
+	dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp);
+	dsl_dir_diduse_space(dd, DD_USED_HEAD,
+	    -used, -comp, -dsl_dir_phys(dd)->dd_uncompressed_bytes,
+	    tx);
+	dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+	    used, comp, uncomp, tx);
+	dsl_dir_remove_livelist(dd, tx, B_FALSE);
+	zthr_wakeup(spa->spa_livelist_delete_zthr);
+}
+
+/*
+ * Move the bptree into the pool's list of trees to clean up, update space
+ * accounting information and destroy the zil.
+ */
+static void
+dsl_async_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	uint64_t used, comp, uncomp;
+	objset_t *os;
+
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	objset_t *mos = dp->dp_meta_objset;
+
+	spa_history_log_internal_ds(ds, "destroy", tx,
+	    "(bptree, mintxg=%llu)",
+	    (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg);
+
+	zil_destroy_sync(dmu_objset_zil(os), tx);
+
+	if (!spa_feature_is_active(dp->dp_spa,
+	    SPA_FEATURE_ASYNC_DESTROY)) {
+		dsl_scan_t *scn = dp->dp_scan;
+		spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
+		    tx);
+		dp->dp_bptree_obj = bptree_alloc(mos, tx);
+		VERIFY0(zap_add(mos,
+		    DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+		    &dp->dp_bptree_obj, tx));
+		ASSERT(!scn->scn_async_destroying);
+		scn->scn_async_destroying = B_TRUE;
+	}
+
+	used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
+	comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
+	uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
+
+	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
+	    dsl_dataset_phys(ds)->ds_unique_bytes == used);
+
+	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+	bptree_add(mos, dp->dp_bptree_obj,
+	    &dsl_dataset_phys(ds)->ds_bp,
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg,
+	    used, comp, uncomp, tx);
+	rrw_exit(&ds->ds_bp_rwlock, FTAG);
+	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
+	    -used, -comp, -uncomp, tx);
+	dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+	    used, comp, uncomp, tx);
+}
+
 void
 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
@@ -842,8 +1014,7 @@
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 
-	/* We need to log before removing it from the namespace. */
-	spa_history_log_internal_ds(ds, "destroy", tx, "");
+	dsl_dir_cancel_waiters(ds->ds_dir);
 
 	rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
 	    DS_IS_DEFER_DESTROY(ds->ds_prev) &&
@@ -885,7 +1056,7 @@
 	}
 
 	/*
-	 * Destroy the deadlist.  Unless it's a clone, the
+	 * Destroy the deadlist. Unless it's a clone, the
 	 * deadlist should be empty since the dataset has no snapshots.
 	 * (If it's a clone, it's safe to ignore the deadlist contents
 	 * since they are still referenced by the origin snapshot.)
@@ -898,51 +1069,18 @@
 	if (dsl_dataset_remap_deadlist_exists(ds))
 		dsl_dataset_destroy_remap_deadlist(ds, tx);
 
-	objset_t *os;
-	VERIFY0(dmu_objset_from_ds(ds, &os));
-
-	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
-		old_synchronous_dataset_destroy(ds, tx);
+	/*
+	 * Each destroy is responsible for both destroying (enqueuing
+	 * to be destroyed) the blkptrs comprising the dataset as well as
+	 * those belonging to the zil.
+	 */
+	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) {
+		dsl_async_clone_destroy(ds, tx);
+	} else if (spa_feature_is_enabled(dp->dp_spa,
+	    SPA_FEATURE_ASYNC_DESTROY)) {
+		dsl_async_dataset_destroy(ds, tx);
 	} else {
-		/*
-		 * Move the bptree into the pool's list of trees to
-		 * clean up and update space accounting information.
-		 */
-		uint64_t used, comp, uncomp;
-
-		zil_destroy_sync(dmu_objset_zil(os), tx);
-
-		if (!spa_feature_is_active(dp->dp_spa,
-		    SPA_FEATURE_ASYNC_DESTROY)) {
-			dsl_scan_t *scn = dp->dp_scan;
-			spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
-			    tx);
-			dp->dp_bptree_obj = bptree_alloc(mos, tx);
-			VERIFY0(zap_add(mos,
-			    DMU_POOL_DIRECTORY_OBJECT,
-			    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
-			    &dp->dp_bptree_obj, tx));
-			ASSERT(!scn->scn_async_destroying);
-			scn->scn_async_destroying = B_TRUE;
-		}
-
-		used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
-		comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
-		uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
-
-		ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
-		    dsl_dataset_phys(ds)->ds_unique_bytes == used);
-
-		rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-		bptree_add(mos, dp->dp_bptree_obj,
-		    &dsl_dataset_phys(ds)->ds_bp,
-		    dsl_dataset_phys(ds)->ds_prev_snap_txg,
-		    used, comp, uncomp, tx);
-		rrw_exit(&ds->ds_bp_rwlock, FTAG);
-		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-		    -used, -comp, -uncomp, tx);
-		dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
-		    used, comp, uncomp, tx);
+		old_synchronous_dataset_destroy(ds, tx);
 	}
 
 	if (ds->ds_prev != NULL) {
@@ -973,8 +1111,28 @@
 	VERIFY0(zap_destroy(mos,
 	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
 
-	if (ds->ds_bookmarks != 0) {
-		VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
+	if (ds->ds_bookmarks_obj != 0) {
+		void *cookie = NULL;
+		dsl_bookmark_node_t *dbn;
+
+		while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
+		    NULL) {
+			if (dbn->dbn_phys.zbm_redaction_obj != 0) {
+				VERIFY0(dmu_object_free(mos,
+				    dbn->dbn_phys.zbm_redaction_obj, tx));
+				spa_feature_decr(dmu_objset_spa(mos),
+				    SPA_FEATURE_REDACTION_BOOKMARKS, tx);
+			}
+			if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
+				spa_feature_decr(dmu_objset_spa(mos),
+				    SPA_FEATURE_BOOKMARK_WRITTEN, tx);
+			}
+			spa_strfree(dbn->dbn_name);
+			mutex_destroy(&dbn->dbn_lock);
+			kmem_free(dbn, sizeof (*dbn));
+		}
+		avl_destroy(&ds->ds_bookmarks);
+		VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx));
 		spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
 	}
 
@@ -1023,7 +1181,7 @@
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
 
-	spa_history_log_internal_ds(ds, "destroy begin", tx, "");
+	spa_history_log_internal_ds(ds, "destroy begin", tx, " ");
 	dsl_dataset_rele(ds, FTAG);
 }
 
@@ -1088,10 +1246,10 @@
  * inconsistent datasets, even if we encounter an error trying to
  * process one of them.
  */
-/* ARGSUSED */
 int
 dsl_destroy_inconsistent(const char *dsname, void *arg)
 {
+	(void) arg;
 	objset_t *os;
 
 	if (dmu_objset_hold(dsname, FTAG, &os) == 0) {

diff --git a/zfs/module/zfs/dsl_dir.c b/zfs/module/zfs/dsl_dir.c
index 205ac1a..4d9bd5c 100644
--- a/zfs/module/zfs/dsl_dir.c
+++ b/zfs/module/zfs/dsl_dir.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
  * Copyright (c) 2014 Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -46,8 +46,10 @@
 #include <sys/sunddi.h>
 #include <sys/zfeature.h>
 #include <sys/policy.h>
+#include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zvol.h>
+#include <sys/zthr.h>
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 
@@ -117,17 +119,8 @@
  * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in
  * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by
  * dsl_dir_init_fs_ss_count().
- *
- * There is a special case when we receive a filesystem that already exists. In
- * this case a temporary clone name of %X is created (see dmu_recv_begin). We
- * never update the filesystem counts for temporary clones.
- *
- * Likewise, we do not update the snapshot counts for temporary snapshots,
- * such as those created by zfs diff.
  */
 
-extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
-
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
 
 typedef struct ddulrt_arg {
@@ -140,7 +133,7 @@
 {
 	dsl_dir_t *dd = dbu;
 	int t;
-	ASSERTV(dsl_pool_t *dp = dd->dd_pool);
+	dsl_pool_t *dp __maybe_unused = dd->dd_pool;
 
 	dd->dd_dbuf = NULL;
 
@@ -155,7 +148,12 @@
 
 	spa_async_close(dd->dd_pool->dp_spa, dd);
 
+	if (dsl_deadlist_is_open(&dd->dd_livelist))
+		dsl_dir_livelist_close(dd);
+
 	dsl_prop_fini(dd);
+	cv_destroy(&dd->dd_activity_cv);
+	mutex_destroy(&dd->dd_activity_lock);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 }
@@ -188,23 +186,27 @@
 		dd->dd_dbuf = dbuf;
 		dd->dd_pool = dp;
 
-		if (dsl_dir_is_zapified(dd) &&
-		    zap_contains(dp->dp_meta_objset, ddobj,
-		    DD_FIELD_CRYPTO_KEY_OBJ) == 0) {
-			VERIFY0(zap_lookup(dp->dp_meta_objset,
-			    ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
-			    sizeof (uint64_t), 1, &dd->dd_crypto_obj));
+		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
+		mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
+		cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
+		dsl_prop_init(dd);
 
-			/* check for on-disk format errata */
-			if (dsl_dir_incompatible_encryption_version(dd)) {
-				dp->dp_spa->spa_errata =
-				    ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
+		if (dsl_dir_is_zapified(dd)) {
+			err = zap_lookup(dp->dp_meta_objset,
+			    ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
+			    sizeof (uint64_t), 1, &dd->dd_crypto_obj);
+			if (err == 0) {
+				/* check for on-disk format errata */
+				if (dsl_dir_incompatible_encryption_version(
+				    dd)) {
+					dp->dp_spa->spa_errata =
+					    ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
+				}
+			} else if (err != ENOENT) {
+				goto errout;
 			}
 		}
 
-		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
-		dsl_prop_init(dd);
-
 		dsl_dir_snap_cmtime_update(dd);
 
 		if (dsl_dir_phys(dd)->dd_parent_obj) {
@@ -256,6 +258,16 @@
 			dd->dd_origin_txg =
 			    origin_phys->ds_creation_txg;
 			dmu_buf_rele(origin_bonus, FTAG);
+			if (dsl_dir_is_zapified(dd)) {
+				uint64_t obj;
+				err = zap_lookup(dp->dp_meta_objset,
+				    dd->dd_object, DD_FIELD_LIVELIST,
+				    sizeof (uint64_t), 1, &obj);
+				if (err == 0)
+					dsl_dir_livelist_open(dd, obj);
+				else if (err != ENOENT)
+					goto errout;
+			}
 		}
 
 		dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
@@ -264,7 +276,11 @@
 		if (winner != NULL) {
 			if (dd->dd_parent)
 				dsl_dir_rele(dd->dd_parent, dd);
+			if (dsl_deadlist_is_open(&dd->dd_livelist))
+				dsl_dir_livelist_close(dd);
 			dsl_prop_fini(dd);
+			cv_destroy(&dd->dd_activity_cv);
+			mutex_destroy(&dd->dd_activity_lock);
 			mutex_destroy(&dd->dd_lock);
 			kmem_free(dd, sizeof (dsl_dir_t));
 			dd = winner;
@@ -292,7 +308,11 @@
 errout:
 	if (dd->dd_parent)
 		dsl_dir_rele(dd->dd_parent, dd);
+	if (dsl_deadlist_is_open(&dd->dd_livelist))
+		dsl_dir_livelist_close(dd);
 	dsl_prop_fini(dd);
+	cv_destroy(&dd->dd_activity_cv);
+	mutex_destroy(&dd->dd_activity_lock);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 	dmu_buf_rele(dbuf, tag);
@@ -466,7 +486,7 @@
 		if (next[0] == '@')
 			break;
 		dprintf("looking up %s in obj%lld\n",
-		    buf, dsl_dir_phys(dd)->dd_child_dir_zapobj);
+		    buf, (longlong_t)dsl_dir_phys(dd)->dd_child_dir_zapobj);
 
 		err = zap_lookup(dp->dp_meta_objset,
 		    dsl_dir_phys(dd)->dd_child_dir_zapobj,
@@ -562,11 +582,9 @@
 		    &chld_dd));
 
 		/*
-		 * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and
-		 * temporary datasets.
+		 * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets.
 		 */
-		if (chld_dd->dd_myname[0] == '$' ||
-		    chld_dd->dd_myname[0] == '%') {
+		if (chld_dd->dd_myname[0] == '$') {
 			dsl_dir_rele(chld_dd, FTAG);
 			continue;
 		}
@@ -720,12 +738,14 @@
 } enforce_res_t;
 
 static enforce_res_t
-dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr)
+dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop,
+    cred_t *cr, proc_t *proc)
 {
 	enforce_res_t enforce = ENFORCE_ALWAYS;
 	uint64_t obj;
 	dsl_dataset_t *ds;
 	uint64_t zoned;
+	const char *zonedstr;
 
 	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
 	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
@@ -734,8 +754,16 @@
 	if (crgetzoneid(cr) != GLOBAL_ZONEID)
 		return (ENFORCE_ALWAYS);
 
-	if (secpolicy_zfs(cr) == 0)
+	/*
+	 * We are checking the saved credentials of the user process, which is
+	 * not the current process.  Note that we can't use secpolicy_zfs(),
+	 * because it only works if the cred is that of the current process (on
+	 * Linux).
+	 */
+	if (secpolicy_zfs_proc(cr, proc) == 0)
 		return (ENFORCE_NEVER);
+#else
+	(void) proc;
 #endif
 
 	if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0)
@@ -746,7 +774,8 @@
 	if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0)
 		return (ENFORCE_ALWAYS);
 
-	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) {
+	zonedstr = zfs_prop_to_name(ZFS_PROP_ZONED);
+	if (dsl_prop_get_ds(ds, zonedstr, 8, 1, &zoned, NULL) || zoned) {
 		/* Only root can access zoned fs's from the GZ */
 		enforce = ENFORCE_ALWAYS;
 	} else {
@@ -758,35 +787,6 @@
 	return (enforce);
 }
 
-static void
-dsl_dir_update_last_remap_txg_sync(void *varg, dmu_tx_t *tx)
-{
-	ddulrt_arg_t *arg = varg;
-	uint64_t last_remap_txg;
-	dsl_dir_t *dd = arg->ddulrta_dd;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
-
-	dsl_dir_zapify(dd, tx);
-	if (zap_lookup(mos, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
-	    sizeof (last_remap_txg), 1, &last_remap_txg) != 0 ||
-	    last_remap_txg < arg->ddlrta_txg) {
-		VERIFY0(zap_update(mos, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
-		    sizeof (arg->ddlrta_txg), 1, &arg->ddlrta_txg, tx));
-	}
-}
-
-int
-dsl_dir_update_last_remap_txg(dsl_dir_t *dd, uint64_t txg)
-{
-	ddulrt_arg_t arg;
-	arg.ddulrta_dd = dd;
-	arg.ddlrta_txg = txg;
-
-	return (dsl_sync_task(spa_name(dd->dd_pool->dp_spa),
-	    NULL, dsl_dir_update_last_remap_txg_sync, &arg,
-	    1, ZFS_SPACE_CHECK_RESERVED));
-}
-
 /*
  * Check if adding additional child filesystem(s) would exceed any filesystem
  * limits or adding additional snapshot(s) would exceed any snapshot limits.
@@ -797,7 +797,7 @@
  */
 int
 dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
-    dsl_dir_t *ancestor, cred_t *cr)
+    dsl_dir_t *ancestor, cred_t *cr, proc_t *proc)
 {
 	objset_t *os = dd->dd_pool->dp_meta_objset;
 	uint64_t limit, count;
@@ -809,25 +809,6 @@
 	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
 	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
 
-	/*
-	 * If we're allowed to change the limit, don't enforce the limit
-	 * e.g. this can happen if a snapshot is taken by an administrative
-	 * user in the global zone (i.e. a recursive snapshot by root).
-	 * However, we must handle the case of delegated permissions where we
-	 * are allowed to change the limit on the current dataset, but there
-	 * is another limit in the tree above.
-	 */
-	enforce = dsl_enforce_ds_ss_limits(dd, prop, cr);
-	if (enforce == ENFORCE_NEVER)
-		return (0);
-
-	/*
-	 * e.g. if renaming a dataset with no snapshots, count adjustment
-	 * is 0.
-	 */
-	if (delta == 0)
-		return (0);
-
 	if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
 		/*
 		 * We don't enforce the limit for temporary snapshots. This is
@@ -840,6 +821,24 @@
 	} else {
 		count_prop = DD_FIELD_FILESYSTEM_COUNT;
 	}
+	/*
+	 * If we're allowed to change the limit, don't enforce the limit
+	 * e.g. this can happen if a snapshot is taken by an administrative
+	 * user in the global zone (i.e. a recursive snapshot by root).
+	 * However, we must handle the case of delegated permissions where we
+	 * are allowed to change the limit on the current dataset, but there
+	 * is another limit in the tree above.
+	 */
+	enforce = dsl_enforce_ds_ss_limits(dd, prop, cr, proc);
+	if (enforce == ENFORCE_NEVER)
+		return (0);
+
+	/*
+	 * e.g. if renaming a dataset with no snapshots, count adjustment
+	 * is 0.
+	 */
+	if (delta == 0)
+		return (0);
 
 	/*
 	 * If an ancestor has been provided, stop checking the limit once we
@@ -854,9 +853,14 @@
 	 * stop since we know there is no limit here (or above). The counts are
 	 * not valid on this node and we know we won't touch this node's counts.
 	 */
-	if (!dsl_dir_is_zapified(dd) || zap_lookup(os, dd->dd_object,
-	    count_prop, sizeof (count), 1, &count) == ENOENT)
+	if (!dsl_dir_is_zapified(dd))
 		return (0);
+	err = zap_lookup(os, dd->dd_object,
+	    count_prop, sizeof (count), 1, &count);
+	if (err == ENOENT)
+		return (0);
+	if (err != 0)
+		return (err);
 
 	err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL,
 	    B_FALSE);
@@ -869,7 +873,7 @@
 
 	if (dd->dd_parent != NULL)
 		err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop,
-		    ancestor, cr);
+		    ancestor, cr, proc);
 
 	return (err);
 }
@@ -894,14 +898,12 @@
 	    strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0);
 
 	/*
-	 * When we receive an incremental stream into a filesystem that already
-	 * exists, a temporary clone is created.  We don't count this temporary
-	 * clone, whose name begins with a '%'. We also ignore hidden ($FREE,
-	 * $MOS & $ORIGIN) objsets.
+	 * We don't do accounting for hidden ($FREE, $MOS & $ORIGIN) objsets.
 	 */
-	if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') &&
-	    strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0)
+	if (dd->dd_myname[0] == '$' && strcmp(prop,
+	    DD_FIELD_FILESYSTEM_COUNT) == 0) {
 		return;
+	}
 
 	/*
 	 * e.g. if renaming a dataset with no snapshots, count adjustment is 0
@@ -1068,7 +1070,7 @@
 		return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
 		    sizeof (*count), 1, count));
 	} else {
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 }
 
@@ -1080,23 +1082,10 @@
 		return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
 		    sizeof (*count), 1, count));
 	} else {
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 }
 
-int
-dsl_dir_get_remaptxg(dsl_dir_t *dd, uint64_t *count)
-{
-	if (dsl_dir_is_zapified(dd)) {
-		objset_t *os = dd->dd_pool->dp_meta_objset;
-		return (zap_lookup(os, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
-		    sizeof (*count), 1, count));
-	} else {
-		return (ENOENT);
-	}
-
-}
-
 void
 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 {
@@ -1128,10 +1117,6 @@
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT,
 		    count);
 	}
-	if (dsl_dir_get_remaptxg(dd, &count) == 0) {
-		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REMAPTXG,
-		    count);
-	}
 
 	if (dsl_dir_is_clone(dd)) {
 		char buf[ZFS_MAX_DATASET_NAME_LEN];
@@ -1170,8 +1155,8 @@
 
 	mutex_enter(&dd->dd_lock);
 	ASSERT0(dd->dd_tempreserved[tx->tx_txg & TXG_MASK]);
-	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
-	    dd->dd_space_towrite[tx->tx_txg & TXG_MASK] / 1024);
+	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", (u_longlong_t)tx->tx_txg,
+	    (u_longlong_t)dd->dd_space_towrite[tx->tx_txg & TXG_MASK] / 1024);
 	dd->dd_space_towrite[tx->tx_txg & TXG_MASK] = 0;
 	mutex_exit(&dd->dd_lock);
 
@@ -1276,6 +1261,7 @@
 	uint64_t quota;
 	struct tempreserve *tr;
 	int retval;
+	uint64_t ext_quota;
 	uint64_t ref_rsrv;
 
 top_of_function:
@@ -1334,7 +1320,6 @@
 	 * we're very close to full, this will allow a steady trickle of
 	 * removes to get through.
 	 */
-	uint64_t deferred = 0;
 	if (dd->dd_parent == NULL) {
 		uint64_t avail = dsl_pool_unreserved_space(dd->dd_pool,
 		    (netfree) ?
@@ -1342,27 +1327,45 @@
 
 		if (avail < quota) {
 			quota = avail;
-			retval = ENOSPC;
+			retval = SET_ERROR(ENOSPC);
 		}
 	}
 
 	/*
 	 * If they are requesting more space, and our current estimate
 	 * is over quota, they get to try again unless the actual
-	 * on-disk is over quota and there are no pending changes (which
-	 * may free up space for us).
+	 * on-disk is over quota and there are no pending changes
+	 * or deferred frees (which may free up space for us).
 	 */
-	if (used_on_disk + est_inflight >= quota) {
-		if (est_inflight > 0 || used_on_disk < quota ||
-		    (retval == ENOSPC && used_on_disk < quota + deferred))
-			retval = ERESTART;
-		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
-		    "quota=%lluK tr=%lluK err=%d\n",
-		    used_on_disk>>10, est_inflight>>10,
-		    quota>>10, asize>>10, retval);
+	ext_quota = quota >> 5;
+	if (quota == UINT64_MAX)
+		ext_quota = 0;
+
+	if (used_on_disk >= quota) {
+		/* Quota exceeded */
 		mutex_exit(&dd->dd_lock);
 		DMU_TX_STAT_BUMP(dmu_tx_quota);
-		return (SET_ERROR(retval));
+		return (retval);
+	} else if (used_on_disk + est_inflight >= quota + ext_quota) {
+		if (est_inflight > 0 || used_on_disk < quota) {
+			retval = SET_ERROR(ERESTART);
+		} else {
+			ASSERT3U(used_on_disk, >=, quota);
+
+			if (retval == ENOSPC && (used_on_disk - quota) <
+			    dsl_pool_deferred_space(dd->dd_pool)) {
+				retval = SET_ERROR(ERESTART);
+			}
+		}
+
+		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
+		    "quota=%lluK tr=%lluK err=%d\n",
+		    (u_longlong_t)used_on_disk>>10,
+		    (u_longlong_t)est_inflight>>10,
+		    (u_longlong_t)quota>>10, (u_longlong_t)asize>>10, retval);
+		mutex_exit(&dd->dd_lock);
+		DMU_TX_STAT_BUMP(dmu_tx_quota);
+		return (retval);
 	}
 
 	/* We need to up our estimated delta before dropping dd_lock */
@@ -1530,6 +1533,11 @@
 {
 	int64_t accounted_delta;
 
+	ASSERT(dmu_tx_is_syncing(tx));
+	ASSERT(type < DD_USED_NUM);
+
+	dmu_buf_will_dirty(dd->dd_dbuf, tx);
+
 	/*
 	 * dsl_dataset_set_refreservation_sync_impl() calls this with
 	 * dd_lock held, so that it can atomically update
@@ -1538,36 +1546,28 @@
 	 * consistently.
 	 */
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
-
-	ASSERT(dmu_tx_is_syncing(tx));
-	ASSERT(type < DD_USED_NUM);
-
-	dmu_buf_will_dirty(dd->dd_dbuf, tx);
-
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
-	accounted_delta =
-	    parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used);
-	ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used);
-	ASSERT(compressed >= 0 ||
-	    dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed);
+	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
+	accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
+	ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
+	ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed);
 	ASSERT(uncompressed >= 0 ||
-	    dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed);
-	dsl_dir_phys(dd)->dd_used_bytes += used;
-	dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed;
-	dsl_dir_phys(dd)->dd_compressed_bytes += compressed;
+	    ddp->dd_uncompressed_bytes >= -uncompressed);
+	ddp->dd_used_bytes += used;
+	ddp->dd_uncompressed_bytes += uncompressed;
+	ddp->dd_compressed_bytes += compressed;
 
-	if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
-		ASSERT(used > 0 ||
-		    dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used);
-		dsl_dir_phys(dd)->dd_used_breakdown[type] += used;
-#ifdef DEBUG
+	if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+		ASSERT(used >= 0 || ddp->dd_used_breakdown[type] >= -used);
+		ddp->dd_used_breakdown[type] += used;
+#ifdef ZFS_DEBUG
 		{
 			dd_used_t t;
 			uint64_t u = 0;
 			for (t = 0; t < DD_USED_NUM; t++)
-				u += dsl_dir_phys(dd)->dd_used_breakdown[t];
-			ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes);
+				u += ddp->dd_used_breakdown[t];
+			ASSERT3U(u, ==, ddp->dd_used_bytes);
 		}
 #endif
 	}
@@ -1575,11 +1575,9 @@
 		mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
-		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
-		    accounted_delta, compressed, uncompressed, tx);
-		dsl_dir_transfer_space(dd->dd_parent,
-		    used - accounted_delta,
-		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
+		dsl_dir_diduse_transfer_space(dd->dd_parent,
+		    accounted_delta, compressed, uncompressed,
+		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 	}
 }
 
@@ -1591,21 +1589,72 @@
 	ASSERT(oldtype < DD_USED_NUM);
 	ASSERT(newtype < DD_USED_NUM);
 
+	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
 	if (delta == 0 ||
-	    !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN))
+	    !(ddp->dd_flags & DD_FLAG_USED_BREAKDOWN))
 		return;
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	mutex_enter(&dd->dd_lock);
 	ASSERT(delta > 0 ?
-	    dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta :
-	    dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta);
-	ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta));
-	dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta;
-	dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta;
+	    ddp->dd_used_breakdown[oldtype] >= delta :
+	    ddp->dd_used_breakdown[newtype] >= -delta);
+	ASSERT(ddp->dd_used_bytes >= ABS(delta));
+	ddp->dd_used_breakdown[oldtype] -= delta;
+	ddp->dd_used_breakdown[newtype] += delta;
 	mutex_exit(&dd->dd_lock);
 }
 
+void
+dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
+    int64_t compressed, int64_t uncompressed, int64_t tonew,
+    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
+{
+	int64_t accounted_delta;
+
+	ASSERT(dmu_tx_is_syncing(tx));
+	ASSERT(oldtype < DD_USED_NUM);
+	ASSERT(newtype < DD_USED_NUM);
+
+	dmu_buf_will_dirty(dd->dd_dbuf, tx);
+
+	mutex_enter(&dd->dd_lock);
+	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
+	accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
+	ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
+	ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed);
+	ASSERT(uncompressed >= 0 ||
+	    ddp->dd_uncompressed_bytes >= -uncompressed);
+	ddp->dd_used_bytes += used;
+	ddp->dd_uncompressed_bytes += uncompressed;
+	ddp->dd_compressed_bytes += compressed;
+
+	if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+		ASSERT(tonew - used <= 0 ||
+		    ddp->dd_used_breakdown[oldtype] >= tonew - used);
+		ASSERT(tonew >= 0 ||
+		    ddp->dd_used_breakdown[newtype] >= -tonew);
+		ddp->dd_used_breakdown[oldtype] -= tonew - used;
+		ddp->dd_used_breakdown[newtype] += tonew;
+#ifdef ZFS_DEBUG
+		{
+			dd_used_t t;
+			uint64_t u = 0;
+			for (t = 0; t < DD_USED_NUM; t++)
+				u += ddp->dd_used_breakdown[t];
+			ASSERT3U(u, ==, ddp->dd_used_bytes);
+		}
+#endif
+	}
+	mutex_exit(&dd->dd_lock);
+
+	if (dd->dd_parent != NULL) {
+		dsl_dir_diduse_transfer_space(dd->dd_parent,
+		    accounted_delta, compressed, uncompressed,
+		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
+	}
+}
+
 typedef struct dsl_dir_set_qr_arg {
 	const char *ddsqra_name;
 	zprop_source_t ddsqra_source;
@@ -1699,7 +1748,7 @@
 	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
-int
+static int
 dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_set_qr_arg_t *ddsqra = arg;
@@ -1855,6 +1904,7 @@
 	const char *ddra_oldname;
 	const char *ddra_newname;
 	cred_t *ddra_cred;
+	proc_t *ddra_proc;
 } dsl_dir_rename_arg_t;
 
 typedef struct dsl_valid_rename_arg {
@@ -1862,10 +1912,10 @@
 	int nest_delta;
 } dsl_valid_rename_arg_t;
 
-/* ARGSUSED */
 static int
 dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
+	(void) dp;
 	dsl_valid_rename_arg_t *dvra = arg;
 	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 
@@ -2033,7 +2083,8 @@
 		}
 
 		error = dsl_dir_transfer_possible(dd->dd_parent,
-		    newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred);
+		    newparent, fs_cnt, ss_cnt, myspace,
+		    ddra->ddra_cred, ddra->ddra_proc);
 		if (error != 0) {
 			dsl_dir_rele(newparent, FTAG);
 			dsl_dir_rele(dd, FTAG);
@@ -2053,7 +2104,6 @@
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd, *newparent;
 	const char *mynewname;
-	int error;
 	objset_t *mos = dp->dp_meta_objset;
 
 	VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
@@ -2120,10 +2170,9 @@
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	/* remove from old parent zapobj */
-	error = zap_remove(mos,
+	VERIFY0(zap_remove(mos,
 	    dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
-	    dd->dd_myname, tx);
-	ASSERT0(error);
+	    dd->dd_myname, tx));
 
 	(void) strlcpy(dd->dd_myname, mynewname,
 	    sizeof (dd->dd_myname));
@@ -2136,6 +2185,8 @@
 	VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
 	    dd->dd_myname, 8, 1, &dd->dd_object, tx));
 
+	/* TODO: A rename callback to avoid these layering violations. */
+	zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
 	zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname,
 	    ddra->ddra_newname, B_TRUE);
 
@@ -2153,6 +2204,7 @@
 	ddra.ddra_oldname = oldname;
 	ddra.ddra_newname = newname;
 	ddra.ddra_cred = CRED();
+	ddra.ddra_proc = curproc;
 
 	return (dsl_sync_task(oldname,
 	    dsl_dir_rename_check, dsl_dir_rename_sync, &ddra,
@@ -2161,7 +2213,8 @@
 
 int
 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
-    uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr)
+    uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space,
+    cred_t *cr, proc_t *proc)
 {
 	dsl_dir_t *ancestor;
 	int64_t adelta;
@@ -2175,11 +2228,11 @@
 		return (SET_ERROR(ENOSPC));
 
 	err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT,
-	    ancestor, cr);
+	    ancestor, cr, proc);
 	if (err != 0)
 		return (err);
 	err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT,
-	    ancestor, cr);
+	    ancestor, cr, proc);
 	if (err != 0)
 		return (err);
 
@@ -2225,6 +2278,189 @@
 	return (doi.doi_type == DMU_OTN_ZAP_METADATA);
 }
 
+void
+dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj)
+{
+	objset_t *mos = dd->dd_pool->dp_meta_objset;
+	ASSERT(spa_feature_is_active(dd->dd_pool->dp_spa,
+	    SPA_FEATURE_LIVELIST));
+	dsl_deadlist_open(&dd->dd_livelist, mos, obj);
+	bplist_create(&dd->dd_pending_allocs);
+	bplist_create(&dd->dd_pending_frees);
+}
+
+void
+dsl_dir_livelist_close(dsl_dir_t *dd)
+{
+	dsl_deadlist_close(&dd->dd_livelist);
+	bplist_destroy(&dd->dd_pending_allocs);
+	bplist_destroy(&dd->dd_pending_frees);
+}
+
+void
+dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
+{
+	uint64_t obj;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	spa_t *spa = dp->dp_spa;
+	livelist_condense_entry_t to_condense = spa->spa_to_condense;
+
+	if (!dsl_deadlist_is_open(&dd->dd_livelist))
+		return;
+
+	/*
+	 * If the livelist being removed is set to be condensed, stop the
+	 * condense zthr and indicate the cancellation in the spa_to_condense
+	 * struct in case the condense no-wait synctask has already started
+	 */
+	zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
+	if (ll_condense_thread != NULL &&
+	    (to_condense.ds != NULL) && (to_condense.ds->ds_dir == dd)) {
+		/*
+		 * We use zthr_wait_cycle_done instead of zthr_cancel
+		 * because we don't want to destroy the zthr, just have
+		 * it skip its current task.
+		 */
+		spa->spa_to_condense.cancelled = B_TRUE;
+		zthr_wait_cycle_done(ll_condense_thread);
+		/*
+		 * If we've returned from zthr_wait_cycle_done without
+		 * clearing the to_condense data structure it's either
+		 * because the no-wait synctask has started (which is
+		 * indicated by 'syncing' field of to_condense) and we
+		 * can expect it to clear to_condense on its own.
+		 * Otherwise, we returned before the zthr ran. The
+		 * checkfunc will now fail as cancelled == B_TRUE so we
+		 * can safely NULL out ds, allowing a different dir's
+		 * livelist to be condensed.
+		 *
+		 * We can be sure that the to_condense struct will not
+		 * be repopulated at this stage because both this
+		 * function and dsl_livelist_try_condense execute in
+		 * syncing context.
+		 */
+		if ((spa->spa_to_condense.ds != NULL) &&
+		    !spa->spa_to_condense.syncing) {
+			dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf,
+			    spa);
+			spa->spa_to_condense.ds = NULL;
+		}
+	}
+
+	dsl_dir_livelist_close(dd);
+	VERIFY0(zap_lookup(dp->dp_meta_objset, dd->dd_object,
+	    DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj));
+	VERIFY0(zap_remove(dp->dp_meta_objset, dd->dd_object,
+	    DD_FIELD_LIVELIST, tx));
+	if (total) {
+		dsl_deadlist_free(dp->dp_meta_objset, obj, tx);
+		spa_feature_decr(spa, SPA_FEATURE_LIVELIST, tx);
+	}
+}
+
+static int
+dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
+    zfs_wait_activity_t activity, boolean_t *in_progress)
+{
+	int error = 0;
+
+	ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
+
+	switch (activity) {
+	case ZFS_WAIT_DELETEQ: {
+#ifdef _KERNEL
+		objset_t *os;
+		error = dmu_objset_from_ds(ds, &os);
+		if (error != 0)
+			break;
+
+		mutex_enter(&os->os_user_ptr_lock);
+		void *user = dmu_objset_get_user(os);
+		mutex_exit(&os->os_user_ptr_lock);
+		if (dmu_objset_type(os) != DMU_OST_ZFS ||
+		    user == NULL || zfs_get_vfs_flag_unmounted(os)) {
+			*in_progress = B_FALSE;
+			return (0);
+		}
+
+		uint64_t readonly = B_FALSE;
+		error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
+		    NULL);
+
+		if (error != 0)
+			break;
+
+		if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
+			*in_progress = B_FALSE;
+			return (0);
+		}
+
+		uint64_t count, unlinked_obj;
+		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+		    &unlinked_obj);
+		if (error != 0) {
+			dsl_dataset_rele(ds, FTAG);
+			break;
+		}
+		error = zap_count(os, unlinked_obj, &count);
+
+		if (error == 0)
+			*in_progress = (count != 0);
+		break;
+#else
+		/*
+		 * The delete queue is ZPL specific, and libzpool doesn't have
+		 * it. It doesn't make sense to wait for it.
+		 */
+		(void) ds;
+		*in_progress = B_FALSE;
+		break;
+#endif
+	}
+	default:
+		panic("unrecognized value for activity %d", activity);
+	}
+
+	return (error);
+}
+
+int
+dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
+    boolean_t *waited)
+{
+	int error = 0;
+	boolean_t in_progress;
+	dsl_pool_t *dp = dd->dd_pool;
+	for (;;) {
+		dsl_pool_config_enter(dp, FTAG);
+		error = dsl_dir_activity_in_progress(dd, ds, activity,
+		    &in_progress);
+		dsl_pool_config_exit(dp, FTAG);
+		if (error != 0 || !in_progress)
+			break;
+
+		*waited = B_TRUE;
+
+		if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
+		    0 || dd->dd_activity_cancelled) {
+			error = SET_ERROR(EINTR);
+			break;
+		}
+	}
+	return (error);
+}
+
+void
+dsl_dir_cancel_waiters(dsl_dir_t *dd)
+{
+	mutex_enter(&dd->dd_activity_lock);
+	dd->dd_activity_cancelled = B_TRUE;
+	cv_broadcast(&dd->dd_activity_cv);
+	while (dd->dd_activity_waiters > 0)
+		cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
+	mutex_exit(&dd->dd_activity_lock);
+}
+
 #if defined(_KERNEL)
 EXPORT_SYMBOL(dsl_dir_set_quota);
 EXPORT_SYMBOL(dsl_dir_set_reservation);

diff --git a/zfs/module/zfs/dsl_pool.c b/zfs/module/zfs/dsl_pool.c
index a53a5f3..277560a 100644
--- a/zfs/module/zfs/dsl_pool.c
+++ b/zfs/module/zfs/dsl_pool.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
@@ -42,14 +42,13 @@
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
 #include <sys/spa_impl.h>
-#include <sys/dsl_deadlist.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab_impl.h>
 #include <sys/bptree.h>
 #include <sys/zfeature.h>
 #include <sys/zil_impl.h>
 #include <sys/dsl_userhold.h>
-#include <sys/trace_txg.h>
+#include <sys/trace_zfs.h>
 #include <sys/mmp.h>
 
 /*
@@ -106,6 +105,13 @@
 int zfs_dirty_data_max_max_percent = 25;
 
 /*
+ * The upper limit of TX_WRITE log data.  Write operations are throttled
+ * when approaching the limit until log data is cleared out after txg sync.
+ * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
+ */
+unsigned long zfs_wrlog_data_max = 0;
+
+/*
  * If there's at least this much dirty data (as a percentage of
  * zfs_dirty_data_max), push out a txg.  This should be less than
  * zfs_vdev_async_write_active_min_dirty_percent.
@@ -221,11 +227,17 @@
 	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
 
-	dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
-	    boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+	aggsum_init(&dp->dp_wrlog_total, 0);
+	for (int i = 0; i < TXG_SIZE; i++) {
+		aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
+	}
+
+	dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
+	    boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
+	    TASKQ_THREADS_CPU_PCT);
 	dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
-	    boot_ncpus, defclsyspri, boot_ncpus, INT_MAX,
-	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+	    100, defclsyspri, boot_ncpus, INT_MAX,
+	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
 
 	return (dp);
 }
@@ -416,12 +428,18 @@
 	rrw_destroy(&dp->dp_config_rwlock);
 	mutex_destroy(&dp->dp_lock);
 	cv_destroy(&dp->dp_spaceavail_cv);
-	taskq_destroy(dp->dp_unlinked_drain_taskq);
-	taskq_destroy(dp->dp_iput_taskq);
-	if (dp->dp_blkstats != NULL) {
-		mutex_destroy(&dp->dp_blkstats->zab_lock);
-		vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
+
+	ASSERT0(aggsum_value(&dp->dp_wrlog_total));
+	aggsum_fini(&dp->dp_wrlog_total);
+	for (int i = 0; i < TXG_SIZE; i++) {
+		ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
+		aggsum_fini(&dp->dp_wrlog_pertxg[i]);
 	}
+
+	taskq_destroy(dp->dp_unlinked_drain_taskq);
+	taskq_destroy(dp->dp_zrele_taskq);
+	if (dp->dp_blkstats != NULL)
+		vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
 	kmem_free(dp, sizeof (dsl_pool_t));
 }
 
@@ -455,8 +473,8 @@
 }
 
 dsl_pool_t *
-dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
-    uint64_t txg)
+dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)),
+    dsl_crypto_params_t *dcp, uint64_t txg)
 {
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
@@ -566,6 +584,10 @@
 	zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	dmu_objset_sync(dp->dp_meta_objset, zio, tx);
 	VERIFY0(zio_wait(zio));
+	dmu_objset_sync_done(dp->dp_meta_objset, tx);
+	taskq_wait(dp->dp_sync_taskq);
+	multilist_destroy(&dp->dp_meta_objset->os_synced_dnodes);
+
 	dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
 	spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
 }
@@ -588,6 +610,42 @@
 		cv_signal(&dp->dp_spaceavail_cv);
 }
 
+void
+dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
+{
+	ASSERT3S(size, >=, 0);
+
+	aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
+	aggsum_add(&dp->dp_wrlog_total, size);
+
+	/* Choose a value slightly bigger than min dirty sync bytes */
+	uint64_t sync_min =
+	    zfs_wrlog_data_max * (zfs_dirty_data_sync_percent + 10) / 200;
+	if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
+		txg_kick(dp, txg);
+}
+
+boolean_t
+dsl_pool_need_wrlog_delay(dsl_pool_t *dp)
+{
+	uint64_t delay_min_bytes =
+	    zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100;
+
+	return (aggsum_compare(&dp->dp_wrlog_total, delay_min_bytes) > 0);
+}
+
+static void
+dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
+{
+	int64_t delta;
+	delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+	aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
+	aggsum_add(&dp->dp_wrlog_total, delta);
+	/* Compact per-CPU sums after the big change. */
+	(void) aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+	(void) aggsum_value(&dp->dp_wrlog_total);
+}
+
 #ifdef ZFS_DEBUG
 static boolean_t
 dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
@@ -677,7 +735,7 @@
 	 */
 	for (ds = list_head(&synced_datasets); ds != NULL;
 	    ds = list_next(&synced_datasets, ds)) {
-		dmu_objset_do_userquota_updates(ds->ds_objset, tx);
+		dmu_objset_sync_done(ds->ds_objset, tx);
 	}
 	taskq_wait(dp->dp_sync_taskq);
 
@@ -713,7 +771,8 @@
 	 * Now that the datasets have been completely synced, we can
 	 * clean up our in-memory structures accumulated while syncing:
 	 *
-	 *  - move dead blocks from the pending deadlist to the on-disk deadlist
+	 *  - move dead blocks from the pending deadlist and livelists
+	 *    to the on-disk versions
 	 *  - release hold from dsl_dataset_dirty()
 	 *  - release key mapping hold from dsl_dataset_dirty()
 	 */
@@ -727,6 +786,7 @@
 		}
 
 		dsl_dataset_sync_done(ds, tx);
+		dmu_buf_rele(ds->ds_dbuf, ds);
 	}
 
 	while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) {
@@ -749,7 +809,7 @@
 		dp->dp_mos_uncompressed_delta = 0;
 	}
 
-	if (!multilist_is_empty(mos->os_dirty_dnodes[txg & TXG_MASK])) {
+	if (dmu_objset_is_dirty(mos, txg)) {
 		dsl_pool_sync_mos(dp, tx);
 	}
 
@@ -811,6 +871,9 @@
 		ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
 		dmu_buf_rele(ds->ds_dbuf, zilog);
 	}
+
+	dsl_pool_wrlog_clear(dp, txg);
+
 	ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
 }
 
@@ -888,23 +951,37 @@
 	return (quota);
 }
 
+uint64_t
+dsl_pool_deferred_space(dsl_pool_t *dp)
+{
+	return (metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)));
+}
+
 boolean_t
 dsl_pool_need_dirty_delay(dsl_pool_t *dp)
 {
 	uint64_t delay_min_bytes =
 	    zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
-	uint64_t dirty_min_bytes =
-	    zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
-	uint64_t dirty;
 
 	mutex_enter(&dp->dp_lock);
-	dirty = dp->dp_dirty_total;
+	uint64_t dirty = dp->dp_dirty_total;
 	mutex_exit(&dp->dp_lock);
-	if (dirty > dirty_min_bytes)
-		txg_kick(dp);
+
 	return (dirty > delay_min_bytes);
 }
 
+static boolean_t
+dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
+{
+	ASSERT(MUTEX_HELD(&dp->dp_lock));
+
+	uint64_t dirty_min_bytes =
+	    zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
+	uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
+
+	return (dirty > dirty_min_bytes);
+}
+
 void
 dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
 {
@@ -912,7 +989,12 @@
 		mutex_enter(&dp->dp_lock);
 		dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space;
 		dsl_pool_dirty_delta(dp, space);
+		boolean_t needsync = !dmu_tx_is_syncing(tx) &&
+		    dsl_pool_need_dirty_sync(dp, tx->tx_txg);
 		mutex_exit(&dp->dp_lock);
+
+		if (needsync)
+			txg_kick(dp, tx->tx_txg);
 	}
 }
 
@@ -1102,9 +1184,9 @@
 }
 
 taskq_t *
-dsl_pool_iput_taskq(dsl_pool_t *dp)
+dsl_pool_zrele_taskq(dsl_pool_t *dp)
 {
-	return (dp->dp_iput_taskq);
+	return (dp->dp_zrele_taskq);
 }
 
 taskq_t *
@@ -1158,7 +1240,7 @@
 /*
  * Create the pool-wide zap object for storing temporary snapshot holds.
  */
-void
+static void
 dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
@@ -1200,7 +1282,7 @@
 		error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
 	else
 		error = zap_remove(mos, zapobj, name, tx);
-	strfree(name);
+	kmem_strfree(name);
 
 	return (error);
 }
@@ -1264,8 +1346,16 @@
  * (e.g. it could be destroyed).  Therefore you shouldn't do anything to the
  * dataset except release it.
  *
- * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
- * or modifying operations.
+ * Operations generally fall somewhere into the following taxonomy:
+ *
+ *                              Read-Only             Modifying
+ *
+ *    Dataset Layer / MOS        zfs get             zfs destroy
+ *
+ *     Individual Dataset         read()                write()
+ *
+ *
+ * Dataset Layer Operations
  *
  * Modifying operations should generally use dsl_sync_task().  The synctask
  * infrastructure enforces proper locking strategy with respect to the
@@ -1275,6 +1365,25 @@
  * information from the dataset, then release the pool and dataset.
  * dmu_objset_{hold,rele}() are convenience routines that also do the pool
  * hold/rele.
+ *
+ *
+ * Operations On Individual Datasets
+ *
+ * Objects _within_ an objset should only be modified by the current 'owner'
+ * of the objset to prevent incorrect concurrent modification. Thus, use
+ * {dmu_objset,dsl_dataset}_own to mark some entity as the current owner,
+ * and fail with EBUSY if there is already an owner. The owner can then
+ * implement its own locking strategy, independent of the dataset layer's
+ * locking infrastructure.
+ * (E.g., the ZPL has its own set of locks to control concurrency. A regular
+ *  vnop will not reach into the dataset layer).
+ *
+ * Ideally, objects would also only be read by the objset’s owner, so that we
+ * don’t observe state mid-modification.
+ * (E.g. the ZPL is creating a new object and linking it into a directory; if
+ * you don’t coordinate with the ZPL to hold ZPL-level locks, you could see an
+ * intermediate state.  The ioctl level violates this but in pretty benign
+ * ways, e.g. reading the zpl props object.)
  */
 
 int
@@ -1342,53 +1451,46 @@
 	return (RRW_WRITE_HELD(&dp->dp_config_rwlock));
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(dsl_pool_config_enter);
 EXPORT_SYMBOL(dsl_pool_config_exit);
 
 /* BEGIN CSTYLED */
 /* zfs_dirty_data_max_percent only applied at module load in arc_init(). */
-module_param(zfs_dirty_data_max_percent, int, 0444);
-MODULE_PARM_DESC(zfs_dirty_data_max_percent, "percent of ram can be dirty");
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_percent, INT, ZMOD_RD,
+	"Max percent of RAM allowed to be dirty");
 
 /* zfs_dirty_data_max_max_percent only applied at module load in arc_init(). */
-module_param(zfs_dirty_data_max_max_percent, int, 0444);
-MODULE_PARM_DESC(zfs_dirty_data_max_max_percent,
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max_percent, INT, ZMOD_RD,
 	"zfs_dirty_data_max upper bound as % of RAM");
 
-module_param(zfs_delay_min_dirty_percent, int, 0644);
-MODULE_PARM_DESC(zfs_delay_min_dirty_percent, "transaction delay threshold");
+ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
+	"Transaction delay threshold");
 
-module_param(zfs_dirty_data_max, ulong, 0644);
-MODULE_PARM_DESC(zfs_dirty_data_max, "determines the dirty space limit");
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
+	"Determines the dirty space limit");
+
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+	"The size limit of write-transaction zil log data");
 
 /* zfs_dirty_data_max_max only applied at module load in arc_init(). */
-module_param(zfs_dirty_data_max_max, ulong, 0444);
-MODULE_PARM_DESC(zfs_dirty_data_max_max,
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
 	"zfs_dirty_data_max upper bound in bytes");
 
-module_param(zfs_dirty_data_sync_percent, int, 0644);
-MODULE_PARM_DESC(zfs_dirty_data_sync_percent,
-	"dirty data txg sync threshold as a percentage of zfs_dirty_data_max");
+ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_sync_percent, INT, ZMOD_RW,
+	"Dirty data txg sync threshold as a percentage of zfs_dirty_data_max");
 
-module_param(zfs_delay_scale, ulong, 0644);
-MODULE_PARM_DESC(zfs_delay_scale, "how quickly delay approaches infinity");
+ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, ULONG, ZMOD_RW,
+	"How quickly delay approaches infinity");
 
-module_param(zfs_sync_taskq_batch_pct, int, 0644);
-MODULE_PARM_DESC(zfs_sync_taskq_batch_pct,
-	"max percent of CPUs that are used to sync dirty data");
+ZFS_MODULE_PARAM(zfs, zfs_, sync_taskq_batch_pct, INT, ZMOD_RW,
+	"Max percent of CPUs that are used to sync dirty data");
 
-module_param(zfs_zil_clean_taskq_nthr_pct, int, 0644);
-MODULE_PARM_DESC(zfs_zil_clean_taskq_nthr_pct,
-	"max percent of CPUs that are used per dp_sync_taskq");
+ZFS_MODULE_PARAM(zfs_zil, zfs_zil_, clean_taskq_nthr_pct, INT, ZMOD_RW,
+	"Max percent of CPUs that are used per dp_sync_taskq");
 
-module_param(zfs_zil_clean_taskq_minalloc, int, 0644);
-MODULE_PARM_DESC(zfs_zil_clean_taskq_minalloc,
-	"number of taskq entries that are pre-populated");
+ZFS_MODULE_PARAM(zfs_zil, zfs_zil_, clean_taskq_minalloc, INT, ZMOD_RW,
+	"Number of taskq entries that are pre-populated");
 
-module_param(zfs_zil_clean_taskq_maxalloc, int, 0644);
-MODULE_PARM_DESC(zfs_zil_clean_taskq_maxalloc,
-	"max number of taskq entries that are cached");
-
+ZFS_MODULE_PARAM(zfs_zil, zfs_zil_, clean_taskq_maxalloc, INT, ZMOD_RW,
+	"Max number of taskq entries that are cached");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/dsl_prop.c b/zfs/module/zfs/dsl_prop.c
index 3dd959e..6cba8bd 100644
--- a/zfs/module/zfs/dsl_prop.c
+++ b/zfs/module/zfs/dsl_prop.c

@@ -22,7 +22,8 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
- * Copyright 2015, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
  */
 
 #include <sys/zfs_context.h>
@@ -41,6 +42,7 @@
 
 #define	ZPROP_INHERIT_SUFFIX "$inherit"
 #define	ZPROP_RECVD_SUFFIX "$recvd"
+#define	ZPROP_IUV_SUFFIX "$iuv"
 
 static int
 dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
@@ -69,11 +71,22 @@
 	return (0);
 }
 
+static int
+dsl_prop_known_index(zfs_prop_t prop, uint64_t value)
+{
+	const char *str = NULL;
+	if (prop != ZPROP_CONT && prop != ZPROP_INVAL &&
+	    zfs_prop_get_type(prop) == PROP_TYPE_INDEX)
+		return (!zfs_prop_index_to_string(prop, value, &str));
+
+	return (-1);
+}
+
 int
 dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
     int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
 {
-	int err = ENOENT;
+	int err;
 	dsl_dir_t *target = dd;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	zfs_prop_t prop;
@@ -81,6 +94,7 @@
 	boolean_t inheriting = B_FALSE;
 	char *inheritstr;
 	char *recvdstr;
+	char *iuvstr;
 
 	ASSERT(dsl_pool_config_held(dd->dd_pool));
 
@@ -91,6 +105,7 @@
 	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
+	iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
 
 	/*
 	 * Note: dd may become NULL, therefore we shouldn't dereference it
@@ -98,11 +113,25 @@
 	 */
 	for (; dd != NULL; dd = dd->dd_parent) {
 		if (dd != target || snapshot) {
-			if (!inheritable)
+			if (!inheritable) {
+				err = SET_ERROR(ENOENT);
 				break;
+			}
 			inheriting = B_TRUE;
 		}
 
+		/* Check for a iuv value. */
+		err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
+		    iuvstr, intsz, numints, buf);
+		if (dsl_prop_known_index(zfs_name_to_prop(propname),
+		    *(uint64_t *)buf) != 1)
+			err = ENOENT;
+		if (err != ENOENT) {
+			if (setpoint != NULL && err == 0)
+				dsl_dir_name(dd, setpoint);
+			break;
+		}
+
 		/* Check for a local value. */
 		err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
 		    propname, intsz, numints, buf);
@@ -151,8 +180,9 @@
 	if (err == ENOENT)
 		err = dodefault(prop, intsz, numints, buf);
 
-	strfree(inheritstr);
-	strfree(recvdstr);
+	kmem_strfree(inheritstr);
+	kmem_strfree(recvdstr);
+	kmem_strfree(iuvstr);
 
 	return (err);
 }
@@ -191,7 +221,7 @@
 			char *inheritstr = kmem_asprintf("%s%s", propname,
 			    ZPROP_INHERIT_SUFFIX);
 			err = zap_contains(mos, zapobj, inheritstr);
-			strfree(inheritstr);
+			kmem_strfree(inheritstr);
 			if (err != 0 && err != ENOENT)
 				return (err);
 		}
@@ -202,7 +232,7 @@
 			    ZPROP_RECVD_SUFFIX);
 			err = zap_lookup(mos, zapobj, recvdstr,
 			    intsz, numints, buf);
-			strfree(recvdstr);
+			kmem_strfree(recvdstr);
 			if (err != ENOENT) {
 				if (setpoint != NULL && err == 0)
 					(void) strlcpy(setpoint,
@@ -285,7 +315,7 @@
 	dsl_prop_record_t *pr;
 	dsl_prop_cb_record_t *cbr;
 	int err;
-	ASSERTV(dsl_pool_t *dp = dd->dd_pool);
+	dsl_pool_t *dp __maybe_unused = dd->dd_pool;
 
 	ASSERT(dsl_pool_config_held(dp));
 
@@ -426,7 +456,7 @@
 		panic("unexpected property source: %d", source);
 	}
 
-	strfree(recvdstr);
+	kmem_strfree(recvdstr);
 
 	if (err == ENOENT)
 		return (0);
@@ -502,10 +532,10 @@
 	return (!list_is_empty(&ds->ds_prop_cbs));
 }
 
-/* ARGSUSED */
 static int
 dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
+	(void) arg;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_record_t *pr;
 	dsl_prop_cb_record_t *cbr;
@@ -645,6 +675,45 @@
 	dsl_dir_rele(dd, FTAG);
 }
 
+
+/*
+ * For newer values in zfs index type properties, we add a new key
+ * propname$iuv (iuv = Ignore Unknown Values) to the properties zap object
+ * to store the new property value and store the default value in the
+ * existing prop key. So that the propname$iuv key is ignored by the older zfs
+ * versions and the default property value from the existing prop key is
+ * used.
+ */
+static void
+dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname,
+    int intsz, int numints, const void *value, dmu_tx_t *tx)
+{
+	char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
+	boolean_t iuv = B_FALSE;
+	zfs_prop_t prop = zfs_name_to_prop(propname);
+
+	switch (prop) {
+	case ZFS_PROP_REDUNDANT_METADATA:
+		if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME ||
+		    *(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE)
+			iuv = B_TRUE;
+		break;
+	default:
+		break;
+	}
+
+	if (iuv) {
+		VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints,
+		    value, tx));
+		uint64_t val = zfs_prop_default_numeric(prop);
+		VERIFY0(zap_update(mos, zapobj, propname, intsz, numints,
+		    &val, tx));
+	} else {
+		zap_remove(mos, zapobj, iuvstr, tx);
+	}
+	kmem_strfree(iuvstr);
+}
+
 void
 dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
     zprop_source_t source, int intsz, int numints, const void *value,
@@ -657,6 +726,7 @@
 	const char *valstr = NULL;
 	char *inheritstr;
 	char *recvdstr;
+	char *iuvstr;
 	char *tbuf = NULL;
 	int err;
 	uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
@@ -690,6 +760,7 @@
 
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
+	iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
 
 	switch ((int)source) {
 	case ZPROP_SRC_NONE:
@@ -707,11 +778,14 @@
 		/*
 		 * remove propname$inherit
 		 * set propname -> value
+		 * set propname$iuv -> new property value
 		 */
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		VERIFY0(zap_update(mos, zapobj, propname,
 		    intsz, numints, value, tx));
+		(void) dsl_prop_set_iuv(mos, zapobj, propname, intsz,
+		    numints, value, tx);
 		break;
 	case ZPROP_SRC_INHERITED:
 		/*
@@ -721,6 +795,8 @@
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
+		err = zap_remove(mos, zapobj, iuvstr, tx);
+		ASSERT(err == 0 || err == ENOENT);
 		if (version >= SPA_VERSION_RECVD_PROPS &&
 		    dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
 			dummy = 0;
@@ -747,7 +823,7 @@
 		ASSERT(err == 0 || err == ENOENT);
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
-		/* FALLTHRU */
+		fallthrough;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
 		/*
 		 * remove propname$recvd
@@ -759,8 +835,9 @@
 		cmn_err(CE_PANIC, "unexpected property source: %d", source);
 	}
 
-	strfree(inheritstr);
-	strfree(recvdstr);
+	kmem_strfree(inheritstr);
+	kmem_strfree(recvdstr);
+	kmem_strfree(iuvstr);
 
 	/*
 	 * If we are left with an empty snap zap we can destroy it.
@@ -858,13 +935,7 @@
 	return (error);
 }
 
-typedef struct dsl_props_set_arg {
-	const char *dpsa_dsname;
-	zprop_source_t dpsa_source;
-	nvlist_t *dpsa_props;
-} dsl_props_set_arg_t;
-
-static int
+int
 dsl_props_set_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_props_set_arg_t *dpsa = arg;
@@ -942,7 +1013,7 @@
 	}
 }
 
-static void
+void
 dsl_props_set_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_props_set_arg_t *dpsa = arg;
@@ -1016,6 +1087,14 @@
 
 			propname = za.za_name;
 			source = setpoint;
+
+			/* Skip if iuv entries are preset. */
+			valstr = kmem_asprintf("%s%s", propname,
+			    ZPROP_IUV_SUFFIX);
+			err = zap_contains(mos, propobj, valstr);
+			kmem_strfree(valstr);
+			if (err == 0)
+				continue;
 		} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
 			/* Skip explicitly inherited entries. */
 			continue;
@@ -1039,7 +1118,7 @@
 				valstr = kmem_asprintf("%s%s", propname,
 				    ZPROP_INHERIT_SUFFIX);
 				err = zap_contains(mos, propobj, valstr);
-				strfree(valstr);
+				kmem_strfree(valstr);
 				if (err == 0)
 					continue;
 				if (err != ENOENT)
@@ -1048,6 +1127,16 @@
 
 			source = ((flags & DSL_PROP_GET_INHERITING) ?
 			    setpoint : ZPROP_SOURCE_VAL_RECVD);
+		} else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) {
+			(void) strlcpy(buf, za.za_name,
+			    MIN(sizeof (buf), suffix - za.za_name + 1));
+			propname = buf;
+			source = setpoint;
+			prop = zfs_name_to_prop(propname);
+
+			if (dsl_prop_known_index(prop,
+			    za.za_first_integer) != 1)
+				continue;
 		} else {
 			/*
 			 * For backward compatibility, skip suffixes we don't

diff --git a/zfs/module/zfs/dsl_scan.c b/zfs/module/zfs/dsl_scan.c
index cf83791..f0a851f 100644
--- a/zfs/module/zfs/dsl_scan.c
+++ b/zfs/module/zfs/dsl_scan.c

@@ -20,9 +20,10 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
  * Copyright 2016 Gary Mills
  * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
  * Copyright 2019 Joyent, Inc.
  */
 
@@ -36,6 +37,7 @@
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/arc.h>
+#include <sys/arc_impl.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
@@ -125,9 +127,19 @@
 static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
 static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
 static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
-static uint64_t dsl_scan_count_leaves(vdev_t *vd);
+static uint64_t dsl_scan_count_data_disks(spa_t *spa);
 
 extern int zfs_vdev_async_write_active_min_dirty_percent;
+static int zfs_scan_blkstats = 0;
+
+/*
+ * 'zpool status' uses bytes processed per pass to report throughput and
+ * estimate time remaining.  We define a pass to start when the scanning
+ * phase completes for a sequential resilver.  Optionally, this value
+ * may be used to reset the pass statistics every N txgs to provide an
+ * estimated completion time based on currently observed performance.
+ */
+static uint_t zfs_scan_report_txgs = 0;
 
 /*
  * By default zfs will check to ensure it is not over the hard memory
@@ -145,7 +157,7 @@
  * overload the drives with I/O, since that is protected by
  * zfs_vdev_scrub_max_active.
  */
-unsigned long zfs_scan_vdev_limit = 4 << 20;
+unsigned long zfs_scan_vdev_limit = 16 << 20;
 
 int zfs_scan_issue_strategy = 0;
 int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */
@@ -175,7 +187,9 @@
 int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
 enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
 /* max number of blocks to free in a single TXG */
-unsigned long zfs_async_block_max_blocks = 100000;
+unsigned long zfs_async_block_max_blocks = ULONG_MAX;
+/* max number of dedup blocks to free in a single TXG */
+unsigned long zfs_max_async_dedup_frees = 100000;
 
 int zfs_resilver_disable_defer = 0; /* set to disable resilver deferring */
 
@@ -216,9 +230,9 @@
 
 /*
  * This controls what conditions are placed on dsl_scan_sync_state():
- * SYNC_OPTIONAL) write out scn_phys iff scn_bytes_pending == 0
- * SYNC_MANDATORY) write out scn_phys always. scn_bytes_pending must be 0.
- * SYNC_CACHED) if scn_bytes_pending == 0, write out scn_phys. Otherwise
+ * SYNC_OPTIONAL) write out scn_phys iff scn_queues_pending == 0
+ * SYNC_MANDATORY) write out scn_phys always. scn_queues_pending must be 0.
+ * SYNC_CACHED) if scn_queues_pending == 0, write out scn_phys. Otherwise
  *	write out the scn_phys_cached version.
  * See dsl_scan_sync_state for details.
  */
@@ -276,12 +290,14 @@
 struct dsl_scan_io_queue {
 	dsl_scan_t	*q_scn; /* associated dsl_scan_t */
 	vdev_t		*q_vd; /* top-level vdev that this queue represents */
+	zio_t		*q_zio; /* scn_zio_root child for waiting on IO */
 
 	/* trees used for sorting I/Os and extents of I/Os */
 	range_tree_t	*q_exts_by_addr;
-	avl_tree_t	q_exts_by_size;
+	zfs_btree_t	q_exts_by_size;
 	avl_tree_t	q_sios_by_addr;
 	uint64_t	q_sio_memused;
+	uint64_t	q_last_ext_addr;
 
 	/* members for zio rate limiting */
 	uint64_t	q_maxinflight_bytes;
@@ -444,11 +460,12 @@
 
 	/*
 	 * Calculate the max number of in-flight bytes for pool-wide
-	 * scanning operations (minimum 1MB). Limits for the issuing
-	 * phase are done per top-level vdev and are handled separately.
+	 * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
+	 * Limits for the issuing phase are done per top-level vdev and
+	 * are handled separately.
 	 */
-	scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
-	    dsl_scan_count_leaves(spa->spa_root_vdev), 1ULL << 20);
+	scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
+	    zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
 
 	avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
 	    offsetof(scan_ds_t, sds_node));
@@ -578,6 +595,8 @@
 	}
 
 	spa_scan_stat_init(spa);
+	vdev_scan_stat_init(spa->spa_root_vdev);
+
 	return (0);
 }
 
@@ -635,7 +654,7 @@
  * Because we can be running in the block sorting algorithm, we do not always
  * want to write out the record, only when it is "safe" to do so. This safety
  * condition is achieved by making sure that the sorting queues are empty
- * (scn_bytes_pending == 0). When this condition is not true, the sync'd state
+ * (scn_queues_pending == 0). When this condition is not true, the sync'd state
  * is inconsistent with how much actual scanning progress has been made. The
  * kind of sync to be performed is specified by the sync_type argument. If the
  * sync is optional, we only sync if the queues are empty. If the sync is
@@ -658,8 +677,8 @@
 	int i;
 	spa_t *spa = scn->scn_dp->dp_spa;
 
-	ASSERT(sync_type != SYNC_MANDATORY || scn->scn_bytes_pending == 0);
-	if (scn->scn_bytes_pending == 0) {
+	ASSERT(sync_type != SYNC_MANDATORY || scn->scn_queues_pending == 0);
+	if (scn->scn_queues_pending == 0) {
 		for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
 			vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
 			dsl_scan_io_queue_t *q = vd->vdev_scan_io_queue;
@@ -669,7 +688,8 @@
 
 			mutex_enter(&vd->vdev_scan_io_queue_lock);
 			ASSERT3P(avl_first(&q->q_sios_by_addr), ==, NULL);
-			ASSERT3P(avl_first(&q->q_exts_by_size), ==, NULL);
+			ASSERT3P(zfs_btree_first(&q->q_exts_by_size, NULL), ==,
+			    NULL);
 			ASSERT3P(range_tree_first(q->q_exts_by_addr), ==, NULL);
 			mutex_exit(&vd->vdev_scan_io_queue_lock);
 		}
@@ -696,19 +716,20 @@
 	}
 }
 
-/* ARGSUSED */
-static int
+int
 dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
+	vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
 
-	if (dsl_scan_is_running(scn))
+	if (dsl_scan_is_running(scn) || vdev_rebuild_active(rvd))
 		return (SET_ERROR(EBUSY));
 
 	return (0);
 }
 
-static void
+void
 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
@@ -734,6 +755,7 @@
 	scn->scn_last_checkpoint = 0;
 	scn->scn_checkpointing = B_FALSE;
 	spa_scan_stat_init(spa);
+	vdev_scan_stat_init(spa->spa_root_vdev);
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
@@ -743,8 +765,12 @@
 
 		if (vdev_resilver_needed(spa->spa_root_vdev,
 		    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
-			spa_event_notify(spa, NULL, NULL,
+			nvlist_t *aux = fnvlist_alloc();
+			fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE,
+			    "healing");
+			spa_event_notify(spa, NULL, aux,
 			    ESC_ZFS_RESILVER_START);
+			nvlist_free(aux);
 		} else {
 			spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_START);
 		}
@@ -758,17 +784,38 @@
 		if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
 			scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;
 
+		/*
+		 * When starting a resilver clear any existing rebuild state.
+		 * This is required to prevent stale rebuild status from
+		 * being reported when a rebuild is run, then a resilver and
+		 * finally a scrub.  In which case only the scrub status
+		 * should be reported by 'zpool status'.
+		 */
+		if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
+			vdev_t *rvd = spa->spa_root_vdev;
+			for (uint64_t i = 0; i < rvd->vdev_children; i++) {
+				vdev_t *vd = rvd->vdev_child[i];
+				vdev_rebuild_clear_sync(
+				    (void *)(uintptr_t)vd->vdev_id, tx);
+			}
+		}
 	}
 
 	/* back to the generic stuff */
 
-	if (dp->dp_blkstats == NULL) {
-		dp->dp_blkstats =
-		    vmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
-		mutex_init(&dp->dp_blkstats->zab_lock, NULL,
-		    MUTEX_DEFAULT, NULL);
+	if (zfs_scan_blkstats) {
+		if (dp->dp_blkstats == NULL) {
+			dp->dp_blkstats =
+			    vmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
+		}
+		memset(&dp->dp_blkstats->zab_type, 0,
+		    sizeof (dp->dp_blkstats->zab_type));
+	} else {
+		if (dp->dp_blkstats) {
+			vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
+			dp->dp_blkstats = NULL;
+		}
 	}
-	bzero(&dp->dp_blkstats->zab_type, sizeof (dp->dp_blkstats->zab_type));
 
 	if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
 		ot = DMU_OT_ZAP_OTHER;
@@ -782,7 +829,8 @@
 
 	spa_history_log_internal(spa, "scan setup", tx,
 	    "func=%u mintxg=%llu maxtxg=%llu",
-	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
+	    *funcp, (u_longlong_t)scn->scn_phys.scn_min_txg,
+	    (u_longlong_t)scn->scn_phys.scn_max_txg);
 }
 
 /*
@@ -819,7 +867,7 @@
 		    POOL_SCRUB_NORMAL);
 		if (err == 0) {
 			spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_RESUME);
-			return (ECANCELED);
+			return (SET_ERROR(ECANCELED));
 		}
 
 		return (SET_ERROR(err));
@@ -829,7 +877,6 @@
 	    dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
-/* ARGSUSED */
 static void
 dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
 {
@@ -886,15 +933,17 @@
 
 	scn->scn_phys.scn_state = complete ? DSS_FINISHED : DSS_CANCELED;
 
+	spa_notify_waiters(spa);
+
 	if (dsl_scan_restarting(scn, tx))
 		spa_history_log_internal(spa, "scan aborted, restarting", tx,
-		    "errors=%llu", spa_get_errlog_size(spa));
+		    "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
 	else if (!complete)
 		spa_history_log_internal(spa, "scan cancelled", tx,
-		    "errors=%llu", spa_get_errlog_size(spa));
+		    "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
 	else
 		spa_history_log_internal(spa, "scan done", tx,
-		    "errors=%llu", spa_get_errlog_size(spa));
+		    "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		spa->spa_scrub_active = B_FALSE;
@@ -912,14 +961,22 @@
 		if (complete &&
 		    !spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
 			vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
-			    scn->scn_phys.scn_max_txg, B_TRUE);
+			    scn->scn_phys.scn_max_txg, B_TRUE, B_FALSE);
 
-			spa_event_notify(spa, NULL, NULL,
-			    scn->scn_phys.scn_min_txg ?
-			    ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
+			if (scn->scn_phys.scn_min_txg) {
+				nvlist_t *aux = fnvlist_alloc();
+				fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE,
+				    "healing");
+				spa_event_notify(spa, NULL, aux,
+				    ESC_ZFS_RESILVER_FINISH);
+				nvlist_free(aux);
+			} else {
+				spa_event_notify(spa, NULL, NULL,
+				    ESC_ZFS_SCRUB_FINISH);
+			}
 		} else {
 			vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
-			    0, B_TRUE);
+			    0, B_TRUE, B_FALSE);
 		}
 		spa_errlog_rotate(spa);
 
@@ -952,6 +1009,10 @@
 			    (u_longlong_t)spa_get_errlog_size(spa));
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
 		}
+
+		/* Clear recent error events (i.e. duplicate events tracking) */
+		if (complete)
+			zfs_ereport_clear(spa, NULL);
 	}
 
 	scn->scn_phys.scn_end_time = gethrestime_sec();
@@ -962,10 +1023,10 @@
 	ASSERT(!dsl_scan_is_running(scn));
 }
 
-/* ARGSUSED */
 static int
 dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	if (!dsl_scan_is_running(scn))
@@ -973,10 +1034,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	dsl_scan_done(scn, B_FALSE, tx);
@@ -1028,6 +1089,7 @@
 		scn->scn_phys_cached.scn_flags |= DSF_SCRUB_PAUSED;
 		dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 		spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_PAUSED);
+		spa_notify_waiters(spa);
 	} else {
 		ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
 		if (dsl_scan_is_paused_scrub(scn)) {
@@ -1158,7 +1220,7 @@
 	dmu_object_type_t ot = (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) ?
 	    DMU_OT_SCAN_QUEUE : DMU_OT_ZAP_OTHER;
 
-	ASSERT0(scn->scn_bytes_pending);
+	ASSERT0(scn->scn_queues_pending);
 	ASSERT(scn->scn_phys.scn_queue_obj != 0);
 
 	VERIFY0(dmu_object_free(dp->dp_meta_objset,
@@ -1229,9 +1291,13 @@
 		mutex_enter(&tvd->vdev_scan_io_queue_lock);
 		queue = tvd->vdev_scan_io_queue;
 		if (queue != NULL) {
-			/* # extents in exts_by_size = # in exts_by_addr */
-			mused += avl_numnodes(&queue->q_exts_by_size) *
-			    sizeof (range_seg_t) + queue->q_sio_memused;
+			/*
+			 * # of extents in exts_by_addr = # in exts_by_size.
+			 * B-tree efficiency is ~75%, but can be as low as 50%.
+			 */
+			mused += zfs_btree_numnodes(&queue->q_exts_by_size) *
+			    ((sizeof (range_seg_gap_t) + sizeof (uint64_t)) *
+			    3 / 2) + queue->q_sio_memused;
 		}
 		mutex_exit(&tvd->vdev_scan_io_queue_lock);
 	}
@@ -1239,7 +1305,7 @@
 	dprintf("current scan memory usage: %llu bytes\n", (longlong_t)mused);
 
 	if (mused == 0)
-		ASSERT0(scn->scn_bytes_pending);
+		ASSERT0(scn->scn_queues_pending);
 
 	/*
 	 * If we are above our hard limit, we need to clear out memory.
@@ -1289,12 +1355,13 @@
 	uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
 	uint64_t sync_time_ns = curr_time_ns -
 	    scn->scn_dp->dp_spa->spa_sync_starttime;
-	int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
+	uint64_t dirty_min_bytes = zfs_dirty_data_max *
+	    zfs_vdev_async_write_active_min_dirty_percent / 100;
 	int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
 
 	if ((NSEC2MSEC(scan_time_ns) > mintime &&
-	    (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent ||
+	    (scn->scn_dp->dp_dirty_total >= dirty_min_bytes ||
 	    txg_sync_waiting(scn->scn_dp) ||
 	    NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa) ||
@@ -1337,16 +1404,18 @@
 	zil_header_t	*zsa_zh;
 } zil_scan_arg_t;
 
-/* ARGSUSED */
 static int
-dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
+dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
+    uint64_t claim_txg)
 {
+	(void) zilog;
 	zil_scan_arg_t *zsa = arg;
 	dsl_pool_t *dp = zsa->zsa_dp;
 	dsl_scan_t *scn = dp->dp_scan;
 	zil_header_t *zh = zsa->zsa_zh;
 	zbookmark_phys_t zb;
 
+	ASSERT(!BP_IS_REDACTED(bp));
 	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 		return (0);
 
@@ -1366,19 +1435,21 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
-dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
+dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
+    uint64_t claim_txg)
 {
+	(void) zilog;
 	if (lrc->lrc_txtype == TX_WRITE) {
 		zil_scan_arg_t *zsa = arg;
 		dsl_pool_t *dp = zsa->zsa_dp;
 		dsl_scan_t *scn = dp->dp_scan;
 		zil_header_t *zh = zsa->zsa_zh;
-		lr_write_t *lr = (lr_write_t *)lrc;
-		blkptr_t *bp = &lr->lr_blkptr;
+		const lr_write_t *lr = (const lr_write_t *)lrc;
+		const blkptr_t *bp = &lr->lr_blkptr;
 		zbookmark_phys_t zb;
 
+		ASSERT(!BP_IS_REDACTED(bp));
 		if (BP_IS_HOLE(bp) ||
 		    bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 			return (0);
@@ -1523,7 +1594,7 @@
 	spa_t *spa = scn->scn_dp->dp_spa;
 	scan_prefetch_issue_ctx_t *spic;
 
-	if (zfs_no_scrub_prefetch)
+	if (zfs_no_scrub_prefetch || BP_IS_REDACTED(bp))
 		return;
 
 	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg ||
@@ -1589,10 +1660,11 @@
 	scan_prefetch_ctx_rele(spc, FTAG);
 }
 
-void
+static void
 dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *private)
 {
+	(void) zio;
 	scan_prefetch_ctx_t *spc = private;
 	dsl_scan_t *scn = spc->spc_scn;
 	spa_t *spa = scn->scn_dp->dp_spa;
@@ -1652,7 +1724,6 @@
 	scan_prefetch_ctx_rele(spc, scn);
 }
 
-/* ARGSUSED */
 static void
 dsl_scan_prefetch_thread(void *arg)
 {
@@ -1738,12 +1809,11 @@
 
 		/*
 		 * If we found the block we're trying to resume from, or
-		 * we went past it to a different object, zero it out to
-		 * indicate that it's OK to start checking for suspending
-		 * again.
+		 * we went past it, zero it out to indicate that it's OK
+		 * to start checking for suspending again.
 		 */
-		if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
-		    zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
+		if (zbookmark_subtree_tbd(dnp, zb,
+		    &scn->scn_phys.scn_bookmark)) {
 			dprintf("resuming at %llx/%llx/%llx/%llx\n",
 			    (longlong_t)zb->zb_objset,
 			    (longlong_t)zb->zb_object,
@@ -1775,6 +1845,21 @@
 	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
 	int err;
 
+	ASSERT(!BP_IS_REDACTED(bp));
+
+	/*
+	 * There is an unlikely case of encountering dnodes with contradicting
+	 * dn_bonuslen and DNODE_FLAG_SPILL_BLKPTR flag before in files created
+	 * or modified before commit 4254acb was merged. As it is not possible
+	 * to know which of the two is correct, report an error.
+	 */
+	if (dnp != NULL &&
+	    dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) {
+		scn->scn_phys.scn_errors++;
+		spa_log_error(dp->dp_spa, zb);
+		return (SET_ERROR(EINVAL));
+	}
+
 	if (BP_GET_LEVEL(bp) > 0) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		int i;
@@ -1928,6 +2013,27 @@
 		return;
 	}
 
+	if (BP_IS_REDACTED(bp)) {
+		ASSERT(dsl_dataset_feature_is_active(ds,
+		    SPA_FEATURE_REDACTED_DATASETS));
+		return;
+	}
+
+	/*
+	 * Check if this block contradicts any filesystem flags.
+	 */
+	spa_feature_t f = SPA_FEATURE_LARGE_BLOCKS;
+	if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
+		ASSERT(dsl_dataset_feature_is_active(ds, f));
+
+	f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+	if (f != SPA_FEATURE_NONE)
+		ASSERT(dsl_dataset_feature_is_active(ds, f));
+
+	f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
+	if (f != SPA_FEATURE_NONE)
+		ASSERT(dsl_dataset_feature_is_active(ds, f));
+
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
 		scn->scn_lt_min_this_txg++;
 		return;
@@ -2271,7 +2377,6 @@
 	dsl_scan_sync_state(scn, tx, SYNC_CACHED);
 }
 
-/* ARGSUSED */
 static int
 enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
@@ -2455,10 +2560,10 @@
 	dsl_dataset_rele(ds, FTAG);
 }
 
-/* ARGSUSED */
 static int
 enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
+	(void) arg;
 	dsl_dataset_t *ds;
 	int err;
 	dsl_scan_t *scn = dp->dp_scan;
@@ -2494,16 +2599,15 @@
 	return (0);
 }
 
-/* ARGSUSED */
 void
 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
     ddt_entry_t *dde, dmu_tx_t *tx)
 {
+	(void) tx;
 	const ddt_key_t *ddk = &dde->dde_key;
 	ddt_phys_t *ddp = dde->dde_phys;
 	blkptr_t bp;
 	zbookmark_phys_t zb = { 0 };
-	int p;
 
 	if (!dsl_scan_is_running(scn))
 		return;
@@ -2522,7 +2626,7 @@
 	if (scn->scn_done_txg != 0)
 		return;
 
-	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 		if (ddp->ddp_phys_birth == 0 ||
 		    ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
 			continue;
@@ -2707,22 +2811,17 @@
 }
 
 static uint64_t
-dsl_scan_count_leaves(vdev_t *vd)
+dsl_scan_count_data_disks(spa_t *spa)
 {
+	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t i, leaves = 0;
 
-	/* we only count leaves that belong to the main pool and are readable */
-	if (vd->vdev_islog || vd->vdev_isspare ||
-	    vd->vdev_isl2cache || !vdev_readable(vd))
-		return (0);
-
-	if (vd->vdev_ops->vdev_op_leaf)
-		return (1);
-
-	for (i = 0; i < vd->vdev_children; i++) {
-		leaves += dsl_scan_count_leaves(vd->vdev_child[i]);
+	for (i = 0; i < rvd->vdev_children; i++) {
+		vdev_t *vd = rvd->vdev_child[i];
+		if (vd->vdev_islog || vd->vdev_isspare || vd->vdev_isl2cache)
+			continue;
+		leaves += vdev_get_ndisks(vd) - vdev_get_nparity(vd);
 	}
-
 	return (leaves);
 }
 
@@ -2756,12 +2855,13 @@
 	uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
 	uint64_t sync_time_ns = curr_time_ns -
 	    scn->scn_dp->dp_spa->spa_sync_starttime;
-	int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
+	uint64_t dirty_min_bytes = zfs_dirty_data_max *
+	    zfs_vdev_async_write_active_min_dirty_percent / 100;
 	int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
 
 	return ((NSEC2MSEC(scan_time_ns) > mintime &&
-	    (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent ||
+	    (scn->scn_dp->dp_dirty_total >= dirty_min_bytes ||
 	    txg_sync_waiting(scn->scn_dp) ||
 	    NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa));
@@ -2780,7 +2880,6 @@
 {
 	dsl_scan_t *scn = queue->q_scn;
 	scan_io_t *sio;
-	int64_t bytes_issued = 0;
 	boolean_t suspended = B_FALSE;
 
 	while ((sio = list_head(io_list)) != NULL) {
@@ -2792,16 +2891,12 @@
 		}
 
 		sio2bp(sio, &bp);
-		bytes_issued += SIO_GET_ASIZE(sio);
 		scan_exec_io(scn->scn_dp, &bp, sio->sio_flags,
 		    &sio->sio_zb, queue);
 		(void) list_remove_head(io_list);
 		scan_io_queues_update_zio_stats(queue, &bp);
 		sio_free(sio);
 	}
-
-	atomic_add_64(&scn->scn_bytes_pending, -bytes_issued);
-
 	return (suspended);
 }
 
@@ -2825,7 +2920,7 @@
 
 	srch_sio = sio_alloc(1);
 	srch_sio->sio_nr_dvas = 1;
-	SIO_SET_OFFSET(srch_sio, rs->rs_start);
+	SIO_SET_OFFSET(srch_sio, rs_get_start(rs, queue->q_exts_by_addr));
 
 	/*
 	 * The exact start of the extent might not contain any matching zios,
@@ -2837,13 +2932,17 @@
 	if (sio == NULL)
 		sio = avl_nearest(&queue->q_sios_by_addr, idx, AVL_AFTER);
 
-	while (sio != NULL &&
-	    SIO_GET_OFFSET(sio) < rs->rs_end && num_sios <= 32) {
-		ASSERT3U(SIO_GET_OFFSET(sio), >=, rs->rs_start);
-		ASSERT3U(SIO_GET_END_OFFSET(sio), <=, rs->rs_end);
+	while (sio != NULL && SIO_GET_OFFSET(sio) < rs_get_end(rs,
+	    queue->q_exts_by_addr) && num_sios <= 32) {
+		ASSERT3U(SIO_GET_OFFSET(sio), >=, rs_get_start(rs,
+		    queue->q_exts_by_addr));
+		ASSERT3U(SIO_GET_END_OFFSET(sio), <=, rs_get_end(rs,
+		    queue->q_exts_by_addr));
 
 		next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio);
 		avl_remove(&queue->q_sios_by_addr, sio);
+		if (avl_is_empty(&queue->q_sios_by_addr))
+			atomic_add_64(&queue->q_scn->scn_queues_pending, -1);
 		queue->q_sio_memused -= SIO_GET_MUSED(sio);
 
 		bytes_issued += SIO_GET_ASIZE(sio);
@@ -2858,16 +2957,20 @@
 	 * in the segment we update it to reflect the work we were able to
 	 * complete. Otherwise, we remove it from the range tree entirely.
 	 */
-	if (sio != NULL && SIO_GET_OFFSET(sio) < rs->rs_end) {
+	if (sio != NULL && SIO_GET_OFFSET(sio) < rs_get_end(rs,
+	    queue->q_exts_by_addr)) {
 		range_tree_adjust_fill(queue->q_exts_by_addr, rs,
 		    -bytes_issued);
 		range_tree_resize_segment(queue->q_exts_by_addr, rs,
-		    SIO_GET_OFFSET(sio), rs->rs_end - SIO_GET_OFFSET(sio));
-
+		    SIO_GET_OFFSET(sio), rs_get_end(rs,
+		    queue->q_exts_by_addr) - SIO_GET_OFFSET(sio));
+		queue->q_last_ext_addr = SIO_GET_OFFSET(sio);
 		return (B_TRUE);
 	} else {
-		range_tree_remove(queue->q_exts_by_addr, rs->rs_start,
-		    rs->rs_end - rs->rs_start);
+		uint64_t rstart = rs_get_start(rs, queue->q_exts_by_addr);
+		uint64_t rend = rs_get_end(rs, queue->q_exts_by_addr);
+		range_tree_remove(queue->q_exts_by_addr, rstart, rend - rstart);
+		queue->q_last_ext_addr = -1;
 		return (B_FALSE);
 	}
 }
@@ -2887,18 +2990,13 @@
 scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue)
 {
 	dsl_scan_t *scn = queue->q_scn;
+	range_tree_t *rt = queue->q_exts_by_addr;
 
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 	ASSERT(scn->scn_is_sorted);
 
-	/* handle tunable overrides */
-	if (scn->scn_checkpointing || scn->scn_clearing) {
-		if (zfs_scan_issue_strategy == 1) {
-			return (range_tree_first(queue->q_exts_by_addr));
-		} else if (zfs_scan_issue_strategy == 2) {
-			return (avl_first(&queue->q_exts_by_size));
-		}
-	}
+	if (!scn->scn_checkpointing && !scn->scn_clearing)
+		return (NULL);
 
 	/*
 	 * During normal clearing, we want to issue our largest segments
@@ -2909,13 +3007,42 @@
 	 * so the way we are sorted now is as good as it will ever get.
 	 * In this case, we instead switch to issuing extents in LBA order.
 	 */
-	if (scn->scn_checkpointing) {
-		return (range_tree_first(queue->q_exts_by_addr));
-	} else if (scn->scn_clearing) {
-		return (avl_first(&queue->q_exts_by_size));
-	} else {
-		return (NULL);
+	if ((zfs_scan_issue_strategy < 1 && scn->scn_checkpointing) ||
+	    zfs_scan_issue_strategy == 1)
+		return (range_tree_first(rt));
+
+	/*
+	 * Try to continue previous extent if it is not completed yet.  After
+	 * shrink in scan_io_queue_gather() it may no longer be the best, but
+	 * otherwise we leave shorter remnant every txg.
+	 */
+	uint64_t start;
+	uint64_t size = 1 << rt->rt_shift;
+	range_seg_t *addr_rs;
+	if (queue->q_last_ext_addr != -1) {
+		start = queue->q_last_ext_addr;
+		addr_rs = range_tree_find(rt, start, size);
+		if (addr_rs != NULL)
+			return (addr_rs);
 	}
+
+	/*
+	 * Nothing to continue, so find new best extent.
+	 */
+	uint64_t *v = zfs_btree_first(&queue->q_exts_by_size, NULL);
+	if (v == NULL)
+		return (NULL);
+	queue->q_last_ext_addr = start = *v << rt->rt_shift;
+
+	/*
+	 * We need to get the original entry in the by_addr tree so we can
+	 * modify it.
+	 */
+	addr_rs = range_tree_find(rt, start, size);
+	ASSERT3P(addr_rs, !=, NULL);
+	ASSERT3U(rs_get_start(addr_rs, rt), ==, start);
+	ASSERT3U(rs_get_end(addr_rs, rt), >, start);
+	return (addr_rs);
 }
 
 static void
@@ -2924,21 +3051,23 @@
 	dsl_scan_io_queue_t *queue = arg;
 	kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
 	boolean_t suspended = B_FALSE;
-	range_seg_t *rs = NULL;
-	scan_io_t *sio = NULL;
+	range_seg_t *rs;
+	scan_io_t *sio;
+	zio_t *zio;
 	list_t sio_list;
-	uint64_t bytes_per_leaf = zfs_scan_vdev_limit;
-	uint64_t nr_leaves = dsl_scan_count_leaves(queue->q_vd);
 
 	ASSERT(queue->q_scn->scn_is_sorted);
 
 	list_create(&sio_list, sizeof (scan_io_t),
 	    offsetof(scan_io_t, sio_nodes.sio_list_node));
+	zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
+	    NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
 	mutex_enter(q_lock);
+	queue->q_zio = zio;
 
-	/* calculate maximum in-flight bytes for this txg (min 1MB) */
-	queue->q_maxinflight_bytes =
-	    MAX(nr_leaves * bytes_per_leaf, 1ULL << 20);
+	/* Calculate maximum in-flight bytes for this vdev. */
+	queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
+	    (vdev_get_ndisks(queue->q_vd) - vdev_get_nparity(queue->q_vd)));
 
 	/* reset per-queue scan statistics for this txg */
 	queue->q_total_seg_size_this_txg = 0;
@@ -2949,12 +3078,12 @@
 	/* loop until we run out of time or sios */
 	while ((rs = scan_io_queue_fetch_ext(queue)) != NULL) {
 		uint64_t seg_start = 0, seg_end = 0;
-		boolean_t more_left = B_TRUE;
+		boolean_t more_left;
 
 		ASSERT(list_is_empty(&sio_list));
 
 		/* loop while we still have sios left to process in this rs */
-		while (more_left) {
+		do {
 			scan_io_t *first_sio, *last_sio;
 
 			/*
@@ -2983,7 +3112,7 @@
 
 			if (suspended)
 				break;
-		}
+		} while (more_left);
 
 		/* update statistics for debugging purposes */
 		scan_io_queues_update_seg_stats(queue, seg_start, seg_end);
@@ -3001,7 +3130,9 @@
 		scan_io_queue_insert_impl(queue, sio);
 	}
 
+	queue->q_zio = NULL;
 	mutex_exit(q_lock);
+	zio_nowait(zio);
 	list_destroy(&sio_list);
 }
 
@@ -3022,7 +3153,7 @@
 	ASSERT(scn->scn_is_sorted);
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 
-	if (scn->scn_bytes_pending == 0)
+	if (scn->scn_queues_pending == 0)
 		return;
 
 	if (scn->scn_taskq == NULL) {
@@ -3072,6 +3203,11 @@
 		return (B_TRUE);
 	}
 
+	if (zfs_max_async_dedup_frees != 0 &&
+	    scn->scn_dedup_frees_this_txg >= zfs_max_async_dedup_frees) {
+		return (B_TRUE);
+	}
+
 	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
 	return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
 	    (NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms &&
@@ -3096,6 +3232,8 @@
 	    -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),
 	    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);
 	scn->scn_visited_this_txg++;
+	if (BP_GET_DEDUP(bp))
+		scn->scn_dedup_frees_this_txg++;
 	return (0);
 }
 
@@ -3135,8 +3273,18 @@
 }
 
 static int
-dsl_scan_obsolete_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+bpobj_dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
 {
+	ASSERT(!bp_freed);
+	return (dsl_scan_free_block_cb(arg, bp, tx));
+}
+
+static int
+dsl_scan_obsolete_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
+{
+	ASSERT(!bp_freed);
 	dsl_scan_t *scn = arg;
 	const dva_t *dva = &bp->blk_dva[0];
 
@@ -3155,6 +3303,7 @@
 {
 	spa_t *spa = scn->scn_dp->dp_spa;
 	uint64_t used = 0, comp, uncomp;
+	boolean_t clones_left;
 
 	if (spa->spa_load_state != SPA_LOAD_NONE)
 		return (B_FALSE);
@@ -3168,7 +3317,8 @@
 		(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
 		    &used, &comp, &uncomp);
 	}
-	return (used != 0);
+	clones_left = spa_livelist_delete_check(spa);
+	return ((used != 0) || (clones_left));
 }
 
 static boolean_t
@@ -3223,19 +3373,12 @@
 	}
 
 	/*
-	 * Check if the txg falls within the range which must be
-	 * resilvered.  DVAs outside this range can always be skipped.
-	 */
-	if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
-		return (B_FALSE);
-
-	/*
 	 * Check if the top-level vdev must resilver this offset.
 	 * When the offset does not intersect with a dirty leaf DTL
 	 * then it may be possible to skip the resilver IO.  The psize
 	 * is provided instead of asize to simplify the check for RAIDZ.
 	 */
-	if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
+	if (!vdev_dtl_need_resilver(vd, dva, psize, phys_birth))
 		return (B_FALSE);
 
 	/*
@@ -3265,7 +3408,7 @@
 		scn->scn_zio_root = zio_root(spa, NULL,
 		    NULL, ZIO_FLAG_MUSTSUCCEED);
 		err = bpobj_iterate(&dp->dp_free_bpobj,
-		    dsl_scan_free_block_cb, scn, tx);
+		    bpobj_dsl_scan_free_block_cb, scn, tx);
 		VERIFY0(zio_wait(scn->scn_zio_root));
 		scn->scn_zio_root = NULL;
 
@@ -3324,6 +3467,7 @@
 		    NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
 		    (longlong_t)tx->tx_txg, err);
 		scn->scn_visited_this_txg = 0;
+		scn->scn_dedup_frees_this_txg = 0;
 
 		/*
 		 * Write out changes to the DDT that may be required as a
@@ -3362,13 +3506,16 @@
 		    -dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx);
 	}
 
-	if (dp->dp_free_dir != NULL && !scn->scn_async_destroying) {
+	if (dp->dp_free_dir != NULL && !scn->scn_async_destroying &&
+	    !spa_livelist_delete_check(spa)) {
 		/* finished; verify that space accounting went to zero */
 		ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes);
 		ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes);
 		ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes);
 	}
 
+	spa_notify_waiters(spa);
+
 	EQUIV(bpobj_is_open(&dp->dp_obsolete_bpobj),
 	    0 == zap_contains(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_OBSOLETE_BPOBJ));
@@ -3449,6 +3596,7 @@
 
 	/* reset scan statistics */
 	scn->scn_visited_this_txg = 0;
+	scn->scn_dedup_frees_this_txg = 0;
 	scn->scn_holes_this_txg = 0;
 	scn->scn_lt_min_this_txg = 0;
 	scn->scn_gt_max_this_txg = 0;
@@ -3505,6 +3653,16 @@
 	}
 
 	/*
+	 * Disabled by default, set zfs_scan_report_txgs to report
+	 * average performance over the last zfs_scan_report_txgs TXGs.
+	 */
+	if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
+	    tx->tx_txg % zfs_scan_report_txgs == 0) {
+		scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
+		spa_scan_stat_init(spa);
+	}
+
+	/*
 	 * It is possible to switch from unsorted to sorted at any time,
 	 * but afterwards the scan will remain sorted unless reloaded from
 	 * a checkpoint after a reboot.
@@ -3558,16 +3716,15 @@
 		/* Need to scan metadata for more blocks to scrub */
 		dsl_scan_phys_t *scnp = &scn->scn_phys;
 		taskqid_t prefetch_tqid;
-		uint64_t bytes_per_leaf = zfs_scan_vdev_limit;
-		uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev);
 
 		/*
-		 * Recalculate the max number of in-flight bytes for pool-wide
-		 * scanning operations (minimum 1MB). Limits for the issuing
-		 * phase are done per top-level vdev and are handled separately.
+		 * Calculate the max number of in-flight bytes for pool-wide
+		 * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
+		 * Limits for the issuing phase are done per top-level vdev and
+		 * are handled separately.
 		 */
-		scn->scn_maxinflight_bytes =
-		    MAX(nr_leaves * bytes_per_leaf, 1ULL << 20);
+		scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
+		    zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
 
 		if (scnp->scn_ddt_bookmark.ddb_class <=
 		    scnp->scn_ddt_class_max) {
@@ -3628,11 +3785,14 @@
 			if (scn->scn_is_sorted) {
 				scn->scn_checkpointing = B_TRUE;
 				scn->scn_clearing = B_TRUE;
+				scn->scn_issued_before_pass +=
+				    spa->spa_scan_pass_issued;
+				spa_scan_stat_init(spa);
 			}
 			zfs_dbgmsg("scan complete txg %llu",
 			    (longlong_t)tx->tx_txg);
 		}
-	} else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) {
+	} else if (scn->scn_is_sorted && scn->scn_queues_pending != 0) {
 		ASSERT(scn->scn_clearing);
 
 		/* need to issue scrubbing IOs from per-vdev queues */
@@ -3660,7 +3820,7 @@
 		    (longlong_t)tx->tx_txg);
 		ASSERT3U(scn->scn_done_txg, !=, 0);
 		ASSERT0(spa->spa_scrub_inflight);
-		ASSERT0(scn->scn_bytes_pending);
+		ASSERT0(scn->scn_queues_pending);
 		dsl_scan_done(scn, B_TRUE, tx);
 		sync_type = SYNC_MANDATORY;
 	}
@@ -3669,10 +3829,8 @@
 }
 
 static void
-count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp)
+count_block_issued(spa_t *spa, const blkptr_t *bp, boolean_t all)
 {
-	int i;
-
 	/*
 	 * Don't count embedded bp's, since we already did the work of
 	 * scanning these when we scanned the containing block.
@@ -3687,18 +3845,13 @@
 	 * zio code will only try the first one unless there is an issue.
 	 * Therefore, we should only count the first DVA for these IOs.
 	 */
-	if (scn->scn_is_sorted) {
-		atomic_add_64(&scn->scn_dp->dp_spa->spa_scan_pass_issued,
-		    DVA_GET_ASIZE(&bp->blk_dva[0]));
-	} else {
-		spa_t *spa = scn->scn_dp->dp_spa;
+	atomic_add_64(&spa->spa_scan_pass_issued,
+	    all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
+}
 
-		for (i = 0; i < BP_GET_NDVAS(bp); i++) {
-			atomic_add_64(&spa->spa_scan_pass_issued,
-			    DVA_GET_ASIZE(&bp->blk_dva[i]));
-		}
-	}
-
+static void
+count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
+{
 	/*
 	 * If we resume after a reboot, zab will be NULL; don't record
 	 * incomplete stats in that case.
@@ -3706,9 +3859,7 @@
 	if (zab == NULL)
 		return;
 
-	mutex_enter(&zab->zab_lock);
-
-	for (i = 0; i < 4; i++) {
+	for (int i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
 		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
 
@@ -3743,28 +3894,27 @@
 			break;
 		}
 	}
-
-	mutex_exit(&zab->zab_lock);
 }
 
 static void
 scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio)
 {
 	avl_index_t idx;
-	int64_t asize = SIO_GET_ASIZE(sio);
 	dsl_scan_t *scn = queue->q_scn;
 
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 
+	if (unlikely(avl_is_empty(&queue->q_sios_by_addr)))
+		atomic_add_64(&scn->scn_queues_pending, 1);
 	if (avl_find(&queue->q_sios_by_addr, sio, &idx) != NULL) {
 		/* block is already scheduled for reading */
-		atomic_add_64(&scn->scn_bytes_pending, -asize);
 		sio_free(sio);
 		return;
 	}
 	avl_insert(&queue->q_sios_by_addr, sio, idx);
 	queue->q_sio_memused += SIO_GET_MUSED(sio);
-	range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), asize);
+	range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio),
+	    SIO_GET_ASIZE(sio));
 }
 
 /*
@@ -3777,7 +3927,6 @@
 scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i,
     int zio_flags, const zbookmark_phys_t *zb)
 {
-	dsl_scan_t *scn = queue->q_scn;
 	scan_io_t *sio = sio_alloc(BP_GET_NDVAS(bp));
 
 	ASSERT0(BP_IS_GANG(bp));
@@ -3787,13 +3936,7 @@
 	sio->sio_flags = zio_flags;
 	sio->sio_zb = *zb;
 
-	/*
-	 * Increment the bytes pending counter now so that we can't
-	 * get an integer underflow in case the worker processes the
-	 * zio before we get to incrementing this counter.
-	 */
-	atomic_add_64(&scn->scn_bytes_pending, SIO_GET_ASIZE(sio));
-
+	queue->q_last_ext_addr = -1;
 	scan_io_queue_insert_impl(queue, sio);
 }
 
@@ -3848,10 +3991,10 @@
 	boolean_t needs_io = B_FALSE;
 	int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
 
-
+	count_block(dp->dp_blkstats, bp);
 	if (phys_birth <= scn->scn_phys.scn_min_txg ||
 	    phys_birth >= scn->scn_phys.scn_max_txg) {
-		count_block(scn, dp->dp_blkstats, bp);
+		count_block_issued(spa, bp, B_TRUE);
 		return (0);
 	}
 
@@ -3877,10 +4020,11 @@
 
 		/*
 		 * Keep track of how much data we've examined so that
-		 * zpool(1M) status can make useful progress reports.
+		 * zpool(8) status can make useful progress reports.
 		 */
-		scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva);
-		spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva);
+		uint64_t asize = DVA_GET_ASIZE(dva);
+		scn->scn_phys.scn_examined += asize;
+		spa->spa_scan_pass_exam += asize;
 
 		/* if it's a resilver, this may not be in the target range */
 		if (!needs_io)
@@ -3891,7 +4035,7 @@
 	if (needs_io && !zfs_no_scrub_io) {
 		dsl_scan_enqueue(dp, bp, zio_flags, zb);
 	} else {
-		count_block(scn, dp->dp_blkstats, bp);
+		count_block_issued(spa, bp, B_TRUE);
 	}
 
 	/* do not relocate this block */
@@ -3942,28 +4086,32 @@
 	dsl_scan_t *scn = dp->dp_scan;
 	size_t size = BP_GET_PSIZE(bp);
 	abd_t *data = abd_alloc_for_io(size, B_FALSE);
-
-	ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
+	zio_t *pio;
 
 	if (queue == NULL) {
+		ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight >= scn->scn_maxinflight_bytes)
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
 		mutex_exit(&spa->spa_scrub_lock);
+		pio = scn->scn_zio_root;
 	} else {
 		kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
 
+		ASSERT3U(queue->q_maxinflight_bytes, >, 0);
 		mutex_enter(q_lock);
 		while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
 			cv_wait(&queue->q_zio_cv, q_lock);
 		queue->q_inflight_bytes += BP_GET_PSIZE(bp);
+		pio = queue->q_zio;
 		mutex_exit(q_lock);
 	}
 
-	count_block(scn, dp->dp_blkstats, bp);
-	zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
-	    dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
+	ASSERT(pio != NULL);
+	count_block_issued(spa, bp, queue == NULL);
+	zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
+	    queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
 }
 
 /*
@@ -3997,32 +4145,88 @@
  * extents that are more completely filled (in a 3:2 ratio) vs just larger.
  * Note that as an optimization, we replace multiplication and division by
  * 100 with bitshifting by 7 (which effectively multiplies and divides by 128).
+ *
+ * Since we do not care if one extent is only few percent better than another,
+ * compress the score into 6 bits via binary logarithm AKA highbit64() and
+ * put into otherwise unused due to ashift high bits of offset.  This allows
+ * to reduce q_exts_by_size B-tree elements to only 64 bits and compare them
+ * with single operation.  Plus it makes scrubs more sequential and reduces
+ * chances that minor extent change move it within the B-tree.
  */
 static int
 ext_size_compare(const void *x, const void *y)
 {
-	const range_seg_t *rsa = x, *rsb = y;
-	uint64_t sa = rsa->rs_end - rsa->rs_start,
-	    sb = rsb->rs_end - rsb->rs_start;
-	uint64_t score_a, score_b;
+	const uint64_t *a = x, *b = y;
 
-	score_a = rsa->rs_fill + ((((rsa->rs_fill << 7) / sa) *
-	    fill_weight * rsa->rs_fill) >> 7);
-	score_b = rsb->rs_fill + ((((rsb->rs_fill << 7) / sb) *
-	    fill_weight * rsb->rs_fill) >> 7);
-
-	if (score_a > score_b)
-		return (-1);
-	if (score_a == score_b) {
-		if (rsa->rs_start < rsb->rs_start)
-			return (-1);
-		if (rsa->rs_start == rsb->rs_start)
-			return (0);
-		return (1);
-	}
-	return (1);
+	return (TREE_CMP(*a, *b));
 }
 
+static void
+ext_size_create(range_tree_t *rt, void *arg)
+{
+	(void) rt;
+	zfs_btree_t *size_tree = arg;
+
+	zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
+}
+
+static void
+ext_size_destroy(range_tree_t *rt, void *arg)
+{
+	(void) rt;
+	zfs_btree_t *size_tree = arg;
+	ASSERT0(zfs_btree_numnodes(size_tree));
+
+	zfs_btree_destroy(size_tree);
+}
+
+static uint64_t
+ext_size_value(range_tree_t *rt, range_seg_gap_t *rsg)
+{
+	(void) rt;
+	uint64_t size = rsg->rs_end - rsg->rs_start;
+	uint64_t score = rsg->rs_fill + ((((rsg->rs_fill << 7) / size) *
+	    fill_weight * rsg->rs_fill) >> 7);
+	ASSERT3U(rt->rt_shift, >=, 8);
+	return (((uint64_t)(64 - highbit64(score)) << 56) | rsg->rs_start);
+}
+
+static void
+ext_size_add(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+	zfs_btree_t *size_tree = arg;
+	ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP);
+	uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs);
+	zfs_btree_add(size_tree, &v);
+}
+
+static void
+ext_size_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+	zfs_btree_t *size_tree = arg;
+	ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP);
+	uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs);
+	zfs_btree_remove(size_tree, &v);
+}
+
+static void
+ext_size_vacate(range_tree_t *rt, void *arg)
+{
+	zfs_btree_t *size_tree = arg;
+	zfs_btree_clear(size_tree);
+	zfs_btree_destroy(size_tree);
+
+	ext_size_create(rt, arg);
+}
+
+static const range_tree_ops_t ext_size_ops = {
+	.rtop_create = ext_size_create,
+	.rtop_destroy = ext_size_destroy,
+	.rtop_add = ext_size_add,
+	.rtop_remove = ext_size_remove,
+	.rtop_vacate = ext_size_vacate
+};
+
 /*
  * Comparator for the q_sios_by_addr tree. Sorting is simply performed
  * based on LBA-order (from lowest to highest).
@@ -4032,7 +4236,7 @@
 {
 	const scan_io_t *a = x, *b = y;
 
-	return (AVL_CMP(SIO_GET_OFFSET(a), SIO_GET_OFFSET(b)));
+	return (TREE_CMP(SIO_GET_OFFSET(a), SIO_GET_OFFSET(b)));
 }
 
 /* IO queues are created on demand when they are needed. */
@@ -4045,9 +4249,10 @@
 	q->q_scn = scn;
 	q->q_vd = vd;
 	q->q_sio_memused = 0;
+	q->q_last_ext_addr = -1;
 	cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL);
-	q->q_exts_by_addr = range_tree_create_impl(&rt_avl_ops,
-	    &q->q_exts_by_size, ext_size_compare, zfs_scan_max_ext_gap);
+	q->q_exts_by_addr = range_tree_create_gap(&ext_size_ops, RANGE_SEG_GAP,
+	    &q->q_exts_by_size, 0, vd->vdev_ashift, zfs_scan_max_ext_gap);
 	avl_create(&q->q_sios_by_addr, sio_addr_compare,
 	    sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node));
 
@@ -4065,21 +4270,20 @@
 	dsl_scan_t *scn = queue->q_scn;
 	scan_io_t *sio;
 	void *cookie = NULL;
-	int64_t bytes_dequeued = 0;
 
 	ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
 
+	if (!avl_is_empty(&queue->q_sios_by_addr))
+		atomic_add_64(&scn->scn_queues_pending, -1);
 	while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) !=
 	    NULL) {
 		ASSERT(range_tree_contains(queue->q_exts_by_addr,
 		    SIO_GET_OFFSET(sio), SIO_GET_ASIZE(sio)));
-		bytes_dequeued += SIO_GET_ASIZE(sio);
 		queue->q_sio_memused -= SIO_GET_MUSED(sio);
 		sio_free(sio);
 	}
 
 	ASSERT0(queue->q_sio_memused);
-	atomic_add_64(&scn->scn_bytes_pending, -bytes_dequeued);
 	range_tree_vacate(queue->q_exts_by_addr, NULL, queue);
 	range_tree_destroy(queue->q_exts_by_addr);
 	avl_destroy(&queue->q_sios_by_addr);
@@ -4175,28 +4379,22 @@
 	sio_free(srch_sio);
 
 	if (sio != NULL) {
-		int64_t asize = SIO_GET_ASIZE(sio);
 		blkptr_t tmpbp;
 
 		/* Got it while it was cold in the queue */
 		ASSERT3U(start, ==, SIO_GET_OFFSET(sio));
-		ASSERT3U(size, ==, asize);
+		ASSERT3U(size, ==, SIO_GET_ASIZE(sio));
 		avl_remove(&queue->q_sios_by_addr, sio);
+		if (avl_is_empty(&queue->q_sios_by_addr))
+			atomic_add_64(&scn->scn_queues_pending, -1);
 		queue->q_sio_memused -= SIO_GET_MUSED(sio);
 
 		ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
 		range_tree_remove_fill(queue->q_exts_by_addr, start, size);
 
-		/*
-		 * We only update scn_bytes_pending in the cold path,
-		 * otherwise it will already have been accounted for as
-		 * part of the zio's execution.
-		 */
-		atomic_add_64(&scn->scn_bytes_pending, -asize);
-
 		/* count the block as though we issued it */
 		sio2bp(sio, &tmpbp);
-		count_block(scn, dp->dp_blkstats, &tmpbp);
+		count_block_issued(spa, &tmpbp, B_FALSE);
 
 		sio_free(sio);
 	}
@@ -4254,74 +4452,71 @@
 		spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
 }
 
-#if defined(_KERNEL)
-/* CSTYLED */
-module_param(zfs_scan_vdev_limit, ulong, 0644);
-MODULE_PARM_DESC(zfs_scan_vdev_limit,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, scan_vdev_limit, ULONG, ZMOD_RW,
 	"Max bytes in flight per leaf vdev for scrubs and resilvers");
 
-module_param(zfs_scrub_min_time_ms, int, 0644);
-MODULE_PARM_DESC(zfs_scrub_min_time_ms, "Min millisecs to scrub per txg");
+ZFS_MODULE_PARAM(zfs, zfs_, scrub_min_time_ms, INT, ZMOD_RW,
+	"Min millisecs to scrub per txg");
 
-module_param(zfs_obsolete_min_time_ms, int, 0644);
-MODULE_PARM_DESC(zfs_obsolete_min_time_ms, "Min millisecs to obsolete per txg");
+ZFS_MODULE_PARAM(zfs, zfs_, obsolete_min_time_ms, INT, ZMOD_RW,
+	"Min millisecs to obsolete per txg");
 
-module_param(zfs_free_min_time_ms, int, 0644);
-MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg");
+ZFS_MODULE_PARAM(zfs, zfs_, free_min_time_ms, INT, ZMOD_RW,
+	"Min millisecs to free per txg");
 
-module_param(zfs_resilver_min_time_ms, int, 0644);
-MODULE_PARM_DESC(zfs_resilver_min_time_ms, "Min millisecs to resilver per txg");
+ZFS_MODULE_PARAM(zfs, zfs_, resilver_min_time_ms, INT, ZMOD_RW,
+	"Min millisecs to resilver per txg");
 
-module_param(zfs_scan_suspend_progress, int, 0644);
-MODULE_PARM_DESC(zfs_scan_suspend_progress,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_suspend_progress, INT, ZMOD_RW,
 	"Set to prevent scans from progressing");
 
-module_param(zfs_no_scrub_io, int, 0644);
-MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O");
+ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_io, INT, ZMOD_RW,
+	"Set to disable scrub I/O");
 
-module_param(zfs_no_scrub_prefetch, int, 0644);
-MODULE_PARM_DESC(zfs_no_scrub_prefetch, "Set to disable scrub prefetching");
+ZFS_MODULE_PARAM(zfs, zfs_, no_scrub_prefetch, INT, ZMOD_RW,
+	"Set to disable scrub prefetching");
 
-/* CSTYLED */
-module_param(zfs_async_block_max_blocks, ulong, 0644);
-MODULE_PARM_DESC(zfs_async_block_max_blocks,
+ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, ULONG, ZMOD_RW,
 	"Max number of blocks freed in one txg");
 
-module_param(zfs_free_bpobj_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_free_bpobj_enabled, "Enable processing of the free_bpobj");
+ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, ULONG, ZMOD_RW,
+	"Max number of dedup blocks freed in one txg");
 
-module_param(zfs_scan_mem_lim_fact, int, 0644);
-MODULE_PARM_DESC(zfs_scan_mem_lim_fact, "Fraction of RAM for scan hard limit");
+ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW,
+	"Enable processing of the free_bpobj");
 
-module_param(zfs_scan_issue_strategy, int, 0644);
-MODULE_PARM_DESC(zfs_scan_issue_strategy,
-	"IO issuing strategy during scrubbing. 0 = default, 1 = LBA, 2 = size");
+ZFS_MODULE_PARAM(zfs, zfs_, scan_blkstats, INT, ZMOD_RW,
+	"Enable block statistics calculation during scrub");
 
-module_param(zfs_scan_legacy, int, 0644);
-MODULE_PARM_DESC(zfs_scan_legacy, "Scrub using legacy non-sequential method");
+ZFS_MODULE_PARAM(zfs, zfs_, scan_mem_lim_fact, INT, ZMOD_RW,
+	"Fraction of RAM for scan hard limit");
 
-module_param(zfs_scan_checkpoint_intval, int, 0644);
-MODULE_PARM_DESC(zfs_scan_checkpoint_intval,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_issue_strategy, INT, ZMOD_RW,
+	"IO issuing strategy during scrubbing. "
+	"0 = default, 1 = LBA, 2 = size");
+
+ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW,
+	"Scrub using legacy non-sequential method");
+
+ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, INT, ZMOD_RW,
 	"Scan progress on-disk checkpointing interval");
 
-/* CSTYLED */
-module_param(zfs_scan_max_ext_gap, ulong, 0644);
-MODULE_PARM_DESC(zfs_scan_max_ext_gap,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_max_ext_gap, ULONG, ZMOD_RW,
 	"Max gap in bytes between sequential scrub / resilver I/Os");
 
-module_param(zfs_scan_mem_lim_soft_fact, int, 0644);
-MODULE_PARM_DESC(zfs_scan_mem_lim_soft_fact,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_mem_lim_soft_fact, INT, ZMOD_RW,
 	"Fraction of hard limit used as soft limit");
 
-module_param(zfs_scan_strict_mem_lim, int, 0644);
-MODULE_PARM_DESC(zfs_scan_strict_mem_lim,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW,
 	"Tunable to attempt to reduce lock contention");
 
-module_param(zfs_scan_fill_weight, int, 0644);
-MODULE_PARM_DESC(zfs_scan_fill_weight,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW,
 	"Tunable to adjust bias towards more filled segments during scans");
 
-module_param(zfs_resilver_disable_defer, int, 0644);
-MODULE_PARM_DESC(zfs_resilver_disable_defer,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
+	"Tunable to report resilver performance over the last N txgs");
+
+ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
 	"Process all resilvers immediately");
-#endif
+/* END CSTYLED */

diff --git a/zfs/module/zfs/dsl_synctask.c b/zfs/module/zfs/dsl_synctask.c
index 2d6ca85..9fc9d40 100644
--- a/zfs/module/zfs/dsl_synctask.c
+++ b/zfs/module/zfs/dsl_synctask.c

@@ -32,10 +32,10 @@
 
 #define	DST_AVG_BLKSHIFT 14
 
-/* ARGSUSED */
 static int
 dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
 {
+	(void) arg, (void) tx;
 	return (0);
 }
 
@@ -170,15 +170,13 @@
 
 static void
 dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
-    int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx,
-    boolean_t early)
+    dmu_tx_t *tx, boolean_t early)
 {
 	dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
 
 	dst->dst_pool = dp;
 	dst->dst_txg = dmu_tx_get_txg(tx);
-	dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
-	dst->dst_space_check = space_check;
+	dst->dst_space_check = ZFS_SPACE_CHECK_NONE;
 	dst->dst_checkfunc = dsl_null_checkfunc;
 	dst->dst_syncfunc = syncfunc;
 	dst->dst_arg = arg;
@@ -192,18 +190,16 @@
 
 void
 dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
-    int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx)
+    dmu_tx_t *tx)
 {
-	dsl_sync_task_nowait_common(dp, syncfunc, arg,
-	    blocks_modified, space_check, tx, B_FALSE);
+	dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_FALSE);
 }
 
 void
 dsl_early_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
-    int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx)
+    dmu_tx_t *tx)
 {
-	dsl_sync_task_nowait_common(dp, syncfunc, arg,
-	    blocks_modified, space_check, tx, B_TRUE);
+	dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_TRUE);
 }
 
 /*

diff --git a/zfs/module/zfs/dsl_userhold.c b/zfs/module/zfs/dsl_userhold.c
index 1a2e2a1..75d1531 100644
--- a/zfs/module/zfs/dsl_userhold.c
+++ b/zfs/module/zfs/dsl_userhold.c

@@ -197,7 +197,7 @@
 
 	spa_history_log_internal_ds(ds, "hold", tx,
 	    "tag=%s temp=%d refs=%llu",
-	    htag, minor != 0, ds->ds_userrefs);
+	    htag, minor != 0, (u_longlong_t)ds->ds_userrefs);
 }
 
 typedef struct zfs_hold_cleanup_arg {
@@ -406,7 +406,7 @@
 				    snapname, holdname);
 				fnvlist_add_int32(ddura->ddura_errlist, errtag,
 				    ENOENT);
-				strfree(errtag);
+				kmem_strfree(errtag);
 			}
 			continue;
 		}

diff --git a/zfs/module/zfs/edonr_zfs.c b/zfs/module/zfs/edonr_zfs.c
index e92da6d..aa00e1c 100644
--- a/zfs/module/zfs/edonr_zfs.c
+++ b/zfs/module/zfs/edonr_zfs.c

@@ -27,8 +27,8 @@
  */
 #include <sys/zfs_context.h>
 #include <sys/zio.h>
+#include <sys/zio_checksum.h>
 #include <sys/edonr.h>
-#include <sys/zfs_context.h>	/* For CTASSERT() */
 #include <sys/abd.h>
 
 #define	EDONR_MODE		512

diff --git a/zfs/module/zfs/fm.c b/zfs/module/zfs/fm.c
index 98a8448..cfd1372 100644
--- a/zfs/module/zfs/fm.c
+++ b/zfs/module/zfs/fm.c

@@ -66,14 +66,9 @@
 #ifdef _KERNEL
 #include <sys/atomic.h>
 #include <sys/condvar.h>
-#include <sys/console.h>
-#include <sys/kobj.h>
-#include <sys/time.h>
 #include <sys/zfs_ioctl.h>
 
-int zfs_zevent_len_max = 0;
-int zfs_zevent_cols = 80;
-int zfs_zevent_console = 0;
+int zfs_zevent_len_max = 512;
 
 static int zevent_len_cur = 0;
 static int zevent_waiters = 0;
@@ -105,320 +100,21 @@
 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
 	kstat_named_t	payload_set_failed;	/* num payload set failures */
+	kstat_named_t	erpt_duplicates;	/* num duplicate erpts */
 };
 
 static struct erpt_kstat erpt_kstat_data = {
 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
-	{ "payload-set-failed", KSTAT_DATA_UINT64 }
+	{ "payload-set-failed", KSTAT_DATA_UINT64 },
+	{ "erpt-duplicates", KSTAT_DATA_UINT64 }
 };
 
 kstat_t *fm_ksp;
 
 #ifdef _KERNEL
 
-/*
- * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
- * output so they aren't split across console lines, and return the end column.
- */
-/*PRINTFLIKE4*/
-static int
-fm_printf(int depth, int c, int cols, const char *format, ...)
-{
-	va_list ap;
-	int width;
-	char c1;
-
-	va_start(ap, format);
-	width = vsnprintf(&c1, sizeof (c1), format, ap);
-	va_end(ap);
-
-	if (c + width >= cols) {
-		console_printf("\n");
-		c = 0;
-		if (format[0] != ' ' && depth > 0) {
-			console_printf(" ");
-			c++;
-		}
-	}
-
-	va_start(ap, format);
-	console_vprintf(format, ap);
-	va_end(ap);
-
-	return ((c + width) % cols);
-}
-
-/*
- * Recursively print an nvlist in the specified column width and return the
- * column we end up in.  This function is called recursively by fm_nvprint(),
- * below.  We generically format the entire nvpair using hexadecimal
- * integers and strings, and elide any integer arrays.  Arrays are basically
- * used for cache dumps right now, so we suppress them so as not to overwhelm
- * the amount of console output we produce at panic time.  This can be further
- * enhanced as FMA technology grows based upon the needs of consumers.  All
- * FMA telemetry is logged using the dump device transport, so the console
- * output serves only as a fallback in case this procedure is unsuccessful.
- */
-static int
-fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
-{
-	nvpair_t *nvp;
-
-	for (nvp = nvlist_next_nvpair(nvl, NULL);
-	    nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
-
-		data_type_t type = nvpair_type(nvp);
-		const char *name = nvpair_name(nvp);
-
-		boolean_t b;
-		uint8_t i8;
-		uint16_t i16;
-		uint32_t i32;
-		uint64_t i64;
-		char *str;
-		nvlist_t *cnv;
-
-		if (strcmp(name, FM_CLASS) == 0)
-			continue; /* already printed by caller */
-
-		c = fm_printf(d, c, cols, " %s=", name);
-
-		switch (type) {
-		case DATA_TYPE_BOOLEAN:
-			c = fm_printf(d + 1, c, cols, " 1");
-			break;
-
-		case DATA_TYPE_BOOLEAN_VALUE:
-			(void) nvpair_value_boolean_value(nvp, &b);
-			c = fm_printf(d + 1, c, cols, b ? "1" : "0");
-			break;
-
-		case DATA_TYPE_BYTE:
-			(void) nvpair_value_byte(nvp, &i8);
-			c = fm_printf(d + 1, c, cols, "0x%x", i8);
-			break;
-
-		case DATA_TYPE_INT8:
-			(void) nvpair_value_int8(nvp, (void *)&i8);
-			c = fm_printf(d + 1, c, cols, "0x%x", i8);
-			break;
-
-		case DATA_TYPE_UINT8:
-			(void) nvpair_value_uint8(nvp, &i8);
-			c = fm_printf(d + 1, c, cols, "0x%x", i8);
-			break;
-
-		case DATA_TYPE_INT16:
-			(void) nvpair_value_int16(nvp, (void *)&i16);
-			c = fm_printf(d + 1, c, cols, "0x%x", i16);
-			break;
-
-		case DATA_TYPE_UINT16:
-			(void) nvpair_value_uint16(nvp, &i16);
-			c = fm_printf(d + 1, c, cols, "0x%x", i16);
-			break;
-
-		case DATA_TYPE_INT32:
-			(void) nvpair_value_int32(nvp, (void *)&i32);
-			c = fm_printf(d + 1, c, cols, "0x%x", i32);
-			break;
-
-		case DATA_TYPE_UINT32:
-			(void) nvpair_value_uint32(nvp, &i32);
-			c = fm_printf(d + 1, c, cols, "0x%x", i32);
-			break;
-
-		case DATA_TYPE_INT64:
-			(void) nvpair_value_int64(nvp, (void *)&i64);
-			c = fm_printf(d + 1, c, cols, "0x%llx",
-			    (u_longlong_t)i64);
-			break;
-
-		case DATA_TYPE_UINT64:
-			(void) nvpair_value_uint64(nvp, &i64);
-			c = fm_printf(d + 1, c, cols, "0x%llx",
-			    (u_longlong_t)i64);
-			break;
-
-		case DATA_TYPE_HRTIME:
-			(void) nvpair_value_hrtime(nvp, (void *)&i64);
-			c = fm_printf(d + 1, c, cols, "0x%llx",
-			    (u_longlong_t)i64);
-			break;
-
-		case DATA_TYPE_STRING:
-			(void) nvpair_value_string(nvp, &str);
-			c = fm_printf(d + 1, c, cols, "\"%s\"",
-			    str ? str : "<NULL>");
-			break;
-
-		case DATA_TYPE_NVLIST:
-			c = fm_printf(d + 1, c, cols, "[");
-			(void) nvpair_value_nvlist(nvp, &cnv);
-			c = fm_nvprintr(cnv, d + 1, c, cols);
-			c = fm_printf(d + 1, c, cols, " ]");
-			break;
-
-		case DATA_TYPE_NVLIST_ARRAY: {
-			nvlist_t **val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[");
-			(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++) {
-				c = fm_nvprintr(val[i], d + 1, c, cols);
-			}
-			c = fm_printf(d + 1, c, cols, " ]");
-			}
-			break;
-
-		case DATA_TYPE_INT8_ARRAY: {
-			int8_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_int8_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_UINT8_ARRAY: {
-			uint8_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_uint8_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_INT16_ARRAY: {
-			int16_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_int16_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_UINT16_ARRAY: {
-			uint16_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_uint16_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_INT32_ARRAY: {
-			int32_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_int32_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-			c = fm_printf(d + 1, c, cols, "0x%llx ",
-			    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_UINT32_ARRAY: {
-			uint32_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_uint32_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_INT64_ARRAY: {
-			int64_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_int64_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_UINT64_ARRAY: {
-			uint64_t *val;
-			uint_t i, nelem;
-
-			c = fm_printf(d + 1, c, cols, "[ ");
-			(void) nvpair_value_uint64_array(nvp, &val, &nelem);
-			for (i = 0; i < nelem; i++)
-				c = fm_printf(d + 1, c, cols, "0x%llx ",
-				    (u_longlong_t)val[i]);
-
-			c = fm_printf(d + 1, c, cols, "]");
-			break;
-			}
-
-		case DATA_TYPE_STRING_ARRAY:
-		case DATA_TYPE_BOOLEAN_ARRAY:
-		case DATA_TYPE_BYTE_ARRAY:
-			c = fm_printf(d + 1, c, cols, "[...]");
-			break;
-
-		case DATA_TYPE_UNKNOWN:
-		case DATA_TYPE_DONTCARE:
-			c = fm_printf(d + 1, c, cols, "<unknown>");
-			break;
-		}
-	}
-
-	return (c);
-}
-
-void
-fm_nvprint(nvlist_t *nvl)
-{
-	char *class;
-	int c = 0;
-
-	console_printf("\n");
-
-	if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
-		c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
-
-	if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
-		console_printf("\n");
-
-	console_printf("\n");
-}
-
 static zevent_t *
 zfs_zevent_alloc(void)
 {
@@ -542,9 +238,6 @@
 		goto out;
 	}
 
-	if (zfs_zevent_console)
-		fm_nvprint(nvl);
-
 	ev = zfs_zevent_alloc();
 	if (ev == NULL) {
 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
@@ -569,6 +262,12 @@
 	return (error);
 }
 
+void
+zfs_zevent_track_duplicate(void)
+{
+	atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
+}
+
 static int
 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
 {
@@ -579,30 +278,29 @@
 	return (0);
 }
 
-int
+zfs_file_t *
 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
 {
-	file_t *fp;
-	int error;
-
-	fp = getf(fd);
+	zfs_file_t *fp = zfs_file_get(fd);
 	if (fp == NULL)
-		return (SET_ERROR(EBADF));
+		return (NULL);
 
-	error = zfsdev_getminor(fp->f_file, minorp);
+	int error = zfsdev_getminor(fp, minorp);
 	if (error == 0)
 		error = zfs_zevent_minor_to_state(*minorp, ze);
 
-	if (error)
-		zfs_zevent_fd_rele(fd);
+	if (error) {
+		zfs_zevent_fd_rele(fp);
+		fp = NULL;
+	}
 
-	return (error);
+	return (fp);
 }
 
 void
-zfs_zevent_fd_rele(int fd)
+zfs_zevent_fd_rele(zfs_file_t *fp)
 {
-	releasef(fd);
+	zfs_file_put(fp);
 }
 
 /*
@@ -656,8 +354,7 @@
 
 #ifdef _KERNEL
 	/* Include events dropped due to rate limiting */
-	*dropped += ratelimit_dropped;
-	ratelimit_dropped = 0;
+	*dropped += atomic_swap_64(&ratelimit_dropped, 0);
 #endif
 	ze->ze_dropped = 0;
 out:
@@ -786,17 +483,17 @@
 /*
  * Wrappers for FM nvlist allocators
  */
-/* ARGSUSED */
 static void *
 i_fm_alloc(nv_alloc_t *nva, size_t size)
 {
+	(void) nva;
 	return (kmem_zalloc(size, KM_SLEEP));
 }
 
-/* ARGSUSED */
 static void
 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
 {
+	(void) nva;
 	kmem_free(buf, size);
 }
 
@@ -1256,6 +953,7 @@
 			}
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
+			va_end(ap);
 			return;
 		}
 	}
@@ -1613,18 +1311,13 @@
 {
 	atomic_inc_64(&ratelimit_dropped);
 }
-#endif
 
-#ifdef _KERNEL
 void
 fm_init(void)
 {
 	zevent_len_cur = 0;
 	zevent_flags = 0;
 
-	if (zfs_zevent_len_max == 0)
-		zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
-
 	/* Initialize zevent allocation and generation kstats */
 	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
@@ -1641,6 +1334,8 @@
 	list_create(&zevent_list, sizeof (zevent_t),
 	    offsetof(zevent_t, ev_node));
 	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
+
+	zfs_ereport_init();
 }
 
 void
@@ -1648,6 +1343,8 @@
 {
 	int count;
 
+	zfs_ereport_fini();
+
 	zfs_zevent_drain_all(&count);
 
 	mutex_enter(&zevent_lock);
@@ -1670,14 +1367,7 @@
 		fm_ksp = NULL;
 	}
 }
-
-module_param(zfs_zevent_len_max, int, 0644);
-MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
-
-module_param(zfs_zevent_cols, int, 0644);
-MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
-
-module_param(zfs_zevent_console, int, 0644);
-MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
-
 #endif /* _KERNEL */
+
+ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW,
+	"Max event queue length");

diff --git a/zfs/module/zfs/gzip.c b/zfs/module/zfs/gzip.c
index 5cac2a7..4819124 100644
--- a/zfs/module/zfs/gzip.c
+++ b/zfs/module/zfs/gzip.c

@@ -29,7 +29,8 @@
 #include <sys/debug.h>
 #include <sys/types.h>
 #include <sys/strings.h>
-#include "qat.h"
+#include <sys/qat.h>
+#include <sys/zio_compress.h>
 
 #ifdef _KERNEL
 
@@ -82,10 +83,10 @@
 	return ((size_t)dstlen);
 }
 
-/*ARGSUSED*/
 int
 gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 {
+	(void) n;
 	zlen_t dstlen = d_len;
 
 	ASSERT(d_len >= s_len);

diff --git a/zfs/module/zfs/lz4.c b/zfs/module/zfs/lz4.c
index c04cfa7..eba8f01 100644
--- a/zfs/module/zfs/lz4.c
+++ b/zfs/module/zfs/lz4.c

@@ -33,6 +33,7 @@
  */
 
 #include <sys/zfs_context.h>
+#include <sys/zio_compress.h>
 
 static int real_LZ4_compress(const char *source, char *dest, int isize,
     int osize);
@@ -45,11 +46,11 @@
 
 static kmem_cache_t *lz4_cache;
 
-/*ARGSUSED*/
 size_t
 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
     size_t d_len, int n)
 {
+	(void) n;
 	uint32_t bufsiz;
 	char *dest = d_start;
 
@@ -73,11 +74,11 @@
 	return (bufsiz + sizeof (bufsiz));
 }
 
-/*ARGSUSED*/
 int
 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
     size_t d_len, int n)
 {
+	(void) n;
 	const char *src = s_start;
 	uint32_t bufsiz = BE_IN32(src);
 
@@ -207,7 +208,7 @@
  * Little Endian or Big Endian?
  * Note: overwrite the below #define if you know your architecture endianness.
  */
-#if defined(_BIG_ENDIAN)
+#if defined(_ZFS_BIG_ENDIAN)
 #define	LZ4_BIG_ENDIAN 1
 #else
 /*
@@ -383,7 +384,7 @@
 LZ4_NbCommonBytes(register U64 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
-#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
+#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
 	!defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_clzll(val) >> 3);
 #else
@@ -404,7 +405,7 @@
 	return (r);
 #endif
 #else
-#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
+#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
 	!defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_ctzll(val) >> 3);
 #else
@@ -426,7 +427,7 @@
 LZ4_NbCommonBytes(register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
-#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
+#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
 	!defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_clz(val) >> 3);
 #else
@@ -462,7 +463,6 @@
 
 /* Compression functions */
 
-/*ARGSUSED*/
 static int
 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
     int osize)
@@ -653,7 +653,6 @@
 	HASHLOG64K))
 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
 
-/*ARGSUSED*/
 static int
 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
     int osize)

diff --git a/zfs/module/zfs/lzjb.c b/zfs/module/zfs/lzjb.c
index ae18467..1c536b1 100644
--- a/zfs/module/zfs/lzjb.c
+++ b/zfs/module/zfs/lzjb.c

@@ -37,6 +37,7 @@
  */
 
 #include <sys/zfs_context.h>
+#include <sys/zio_compress.h>
 
 #define	MATCH_BITS	6
 #define	MATCH_MIN	3
@@ -44,10 +45,10 @@
 #define	OFFSET_MASK	((1 << (16 - MATCH_BITS)) - 1)
 #define	LEMPEL_SIZE	1024
 
-/*ARGSUSED*/
 size_t
 lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 {
+	(void) n;
 	uchar_t *src = s_start;
 	uchar_t *dst = d_start;
 	uchar_t *cpy;
@@ -99,10 +100,10 @@
 	return (dst - (uchar_t *)d_start);
 }
 
-/*ARGSUSED*/
 int
 lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 {
+	(void) s_len, (void) n;
 	uchar_t *src = s_start;
 	uchar_t *dst = d_start;
 	uchar_t *d_end = (uchar_t *)d_start + d_len;

diff --git a/zfs/module/zfs/metaslab.c b/zfs/module/zfs/metaslab.c
index faa175b..7479609 100644
--- a/zfs/module/zfs/metaslab.c
+++ b/zfs/module/zfs/metaslab.c

@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
  */
 
@@ -31,11 +32,13 @@
 #include <sys/space_map.h>
 #include <sys/metaslab_impl.h>
 #include <sys/vdev_impl.h>
+#include <sys/vdev_draid.h>
 #include <sys/zio.h>
 #include <sys/spa_impl.h>
 #include <sys/zfeature.h>
 #include <sys/vdev_indirect_mapping.h>
 #include <sys/zap.h>
+#include <sys/btree.h>
 
 #define	WITH_DF_BLOCK_ALLOCATOR
 
@@ -45,10 +48,10 @@
 /*
  * Metaslab granularity, in bytes. This is roughly similar to what would be
  * referred to as the "stripe size" in traditional RAID arrays. In normal
- * operation, we will try to write this amount of data to a top-level vdev
- * before moving on to the next one.
+ * operation, we will try to write this amount of data to each disk before
+ * moving on to the next top-level vdev.
  */
-unsigned long metaslab_aliquot = 512 << 10;
+static unsigned long metaslab_aliquot = 1024 * 1024;
 
 /*
  * For testing, make some blocks above a certain size be gang blocks.
@@ -56,12 +59,21 @@
 unsigned long metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
 
 /*
- * Since we can touch multiple metaslabs (and their respective space maps)
- * with each transaction group, we benefit from having a smaller space map
+ * In pools where the log space map feature is not enabled we touch
+ * multiple metaslabs (and their respective space maps) with each
+ * transaction group. Thus, we benefit from having a small space map
  * block size since it allows us to issue more I/O operations scattered
- * around the disk.
+ * around the disk. So a sane default for the space map block size
+ * is 8~16K.
  */
-int zfs_metaslab_sm_blksz = (1 << 12);
+int zfs_metaslab_sm_blksz_no_log = (1 << 14);
+
+/*
+ * When the log space map feature is enabled, we accumulate a lot of
+ * changes per metaslab that are flushed once in a while so we benefit
+ * from a bigger block size like 128K for the metaslab space maps.
+ */
+int zfs_metaslab_sm_blksz_with_log = (1 << 17);
 
 /*
  * The in-core space map representation is more compact than its on-disk form.
@@ -103,7 +115,7 @@
 
 /*
  * Metaslab groups are considered eligible for allocations if their
- * fragmenation metric (measured as a percentage) is less than or
+ * fragmentation metric (measured as a percentage) is less than or
  * equal to zfs_mg_fragmentation_threshold. If a metaslab group
  * exceeds this threshold then it will be skipped unless all metaslab
  * groups within the metaslab class have also crossed this threshold.
@@ -175,6 +187,13 @@
 int metaslab_df_max_search = 16 * 1024 * 1024;
 
 /*
+ * Forces the metaslab_block_picker function to search for at least this many
+ * segments forwards until giving up on finding a segment that the allocation
+ * will fit into.
+ */
+uint32_t metaslab_min_search_count = 100;
+
+/*
  * If we are not searching forward (due to metaslab_df_max_search,
  * metaslab_df_free_pct, or metaslab_df_alloc_threshold), this tunable
  * controls what segment is used.  If it is set, we will use the largest free
@@ -189,16 +208,20 @@
 int metaslab_load_pct = 50;
 
 /*
- * Determines how many txgs a metaslab may remain loaded without having any
- * allocations from it. As long as a metaslab continues to be used we will
- * keep it loaded.
+ * These tunables control how long a metaslab will remain loaded after the
+ * last allocation from it.  A metaslab can't be unloaded until at least
+ * metaslab_unload_delay TXG's and metaslab_unload_delay_ms milliseconds
+ * have elapsed.  However, zfs_metaslab_mem_limit may cause it to be
+ * unloaded sooner.  These settings are intended to be generous -- to keep
+ * metaslabs loaded for a long time, reducing the rate of metaslab loading.
  */
-int metaslab_unload_delay = TXG_SIZE * 2;
+int metaslab_unload_delay = 32;
+int metaslab_unload_delay_ms = 10 * 60 * 1000; /* ten minutes */
 
 /*
  * Max number of metaslabs per group to preload.
  */
-int metaslab_preload_limit = SPA_DVAS_PER_BP;
+int metaslab_preload_limit = 10;
 
 /*
  * Enable/disable preloading of metaslab.
@@ -241,9 +264,7 @@
  * Internal switch to enable/disable the metaslab allocation tracing
  * facility.
  */
-#ifdef _METASLAB_TRACING
-boolean_t metaslab_trace_enabled = B_TRUE;
-#endif
+boolean_t metaslab_trace_enabled = B_FALSE;
 
 /*
  * Maximum entries that the metaslab allocation tracing facility will keep
@@ -253,9 +274,7 @@
  * to every exceed this value. In debug mode, the system will panic if this
  * limit is ever reached allowing for further investigation.
  */
-#ifdef _METASLAB_TRACING
 uint64_t metaslab_trace_max_entries = 5000;
-#endif
 
 /*
  * Maximum number of metaslabs per group that can be disabled
@@ -263,16 +282,123 @@
  */
 int max_disabled_ms = 3;
 
-static uint64_t metaslab_weight(metaslab_t *);
-static void metaslab_set_fragmentation(metaslab_t *);
+/*
+ * Time (in seconds) to respect ms_max_size when the metaslab is not loaded.
+ * To avoid 64-bit overflow, don't set above UINT32_MAX.
+ */
+unsigned long zfs_metaslab_max_size_cache_sec = 3600; /* 1 hour */
+
+/*
+ * Maximum percentage of memory to use on storing loaded metaslabs. If loading
+ * a metaslab would take it over this percentage, the oldest selected metaslab
+ * is automatically unloaded.
+ */
+int zfs_metaslab_mem_limit = 25;
+
+/*
+ * Force the per-metaslab range trees to use 64-bit integers to store
+ * segments. Used for debugging purposes.
+ */
+boolean_t zfs_metaslab_force_large_segs = B_FALSE;
+
+/*
+ * By default we only store segments over a certain size in the size-sorted
+ * metaslab trees (ms_allocatable_by_size and
+ * ms_unflushed_frees_by_size). This dramatically reduces memory usage and
+ * improves load and unload times at the cost of causing us to use slightly
+ * larger segments than we would otherwise in some cases.
+ */
+uint32_t metaslab_by_size_min_shift = 14;
+
+/*
+ * If not set, we will first try normal allocation.  If that fails then
+ * we will do a gang allocation.  If that fails then we will do a "try hard"
+ * gang allocation.  If that fails then we will have a multi-layer gang
+ * block.
+ *
+ * If set, we will first try normal allocation.  If that fails then
+ * we will do a "try hard" allocation.  If that fails we will do a gang
+ * allocation.  If that fails we will do a "try hard" gang allocation.  If
+ * that fails then we will have a multi-layer gang block.
+ */
+int zfs_metaslab_try_hard_before_gang = B_FALSE;
+
+/*
+ * When not trying hard, we only consider the best zfs_metaslab_find_max_tries
+ * metaslabs.  This improves performance, especially when there are many
+ * metaslabs per vdev and the allocation can't actually be satisfied (so we
+ * would otherwise iterate all the metaslabs).  If there is a metaslab with a
+ * worse weight but it can actually satisfy the allocation, we won't find it
+ * until trying hard.  This may happen if the worse metaslab is not loaded
+ * (and the true weight is better than we have calculated), or due to weight
+ * bucketization.  E.g. we are looking for a 60K segment, and the best
+ * metaslabs all have free segments in the 32-63K bucket, but the best
+ * zfs_metaslab_find_max_tries metaslabs have ms_max_size <60KB, and a
+ * subsequent metaslab has ms_max_size >60KB (but fewer segments in this
+ * bucket, and therefore a lower weight).
+ */
+int zfs_metaslab_find_max_tries = 100;
+
+static uint64_t metaslab_weight(metaslab_t *, boolean_t);
+static void metaslab_set_fragmentation(metaslab_t *, boolean_t);
 static void metaslab_free_impl(vdev_t *, uint64_t, uint64_t, boolean_t);
 static void metaslab_check_free_impl(vdev_t *, uint64_t, uint64_t);
 
 static void metaslab_passivate(metaslab_t *msp, uint64_t weight);
 static uint64_t metaslab_weight_from_range_tree(metaslab_t *msp);
-#ifdef _METASLAB_TRACING
+static void metaslab_flush_update(metaslab_t *, dmu_tx_t *);
+static unsigned int metaslab_idx_func(multilist_t *, void *);
+static void metaslab_evict(metaslab_t *, uint64_t);
+static void metaslab_rt_add(range_tree_t *rt, range_seg_t *rs, void *arg);
 kmem_cache_t *metaslab_alloc_trace_cache;
-#endif
+
+typedef struct metaslab_stats {
+	kstat_named_t metaslabstat_trace_over_limit;
+	kstat_named_t metaslabstat_reload_tree;
+	kstat_named_t metaslabstat_too_many_tries;
+	kstat_named_t metaslabstat_try_hard;
+} metaslab_stats_t;
+
+static metaslab_stats_t metaslab_stats = {
+	{ "trace_over_limit",		KSTAT_DATA_UINT64 },
+	{ "reload_tree",		KSTAT_DATA_UINT64 },
+	{ "too_many_tries",		KSTAT_DATA_UINT64 },
+	{ "try_hard",			KSTAT_DATA_UINT64 },
+};
+
+#define	METASLABSTAT_BUMP(stat) \
+	atomic_inc_64(&metaslab_stats.stat.value.ui64);
+
+
+kstat_t *metaslab_ksp;
+
+void
+metaslab_stat_init(void)
+{
+	ASSERT(metaslab_alloc_trace_cache == NULL);
+	metaslab_alloc_trace_cache = kmem_cache_create(
+	    "metaslab_alloc_trace_cache", sizeof (metaslab_alloc_trace_t),
+	    0, NULL, NULL, NULL, NULL, NULL, 0);
+	metaslab_ksp = kstat_create("zfs", 0, "metaslab_stats",
+	    "misc", KSTAT_TYPE_NAMED, sizeof (metaslab_stats) /
+	    sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+	if (metaslab_ksp != NULL) {
+		metaslab_ksp->ks_data = &metaslab_stats;
+		kstat_install(metaslab_ksp);
+	}
+}
+
+void
+metaslab_stat_fini(void)
+{
+	if (metaslab_ksp != NULL) {
+		kstat_delete(metaslab_ksp);
+		metaslab_ksp = NULL;
+	}
+
+	kmem_cache_destroy(metaslab_alloc_trace_cache);
+	metaslab_alloc_trace_cache = NULL;
+}
 
 /*
  * ==========================================================================
@@ -284,18 +410,19 @@
 {
 	metaslab_class_t *mc;
 
-	mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
+	mc = kmem_zalloc(offsetof(metaslab_class_t,
+	    mc_allocator[spa->spa_alloc_count]), KM_SLEEP);
 
 	mc->mc_spa = spa;
-	mc->mc_rotor = NULL;
 	mc->mc_ops = ops;
 	mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL);
-	mc->mc_alloc_slots = kmem_zalloc(spa->spa_alloc_count *
-	    sizeof (zfs_refcount_t), KM_SLEEP);
-	mc->mc_alloc_max_slots = kmem_zalloc(spa->spa_alloc_count *
-	    sizeof (uint64_t), KM_SLEEP);
-	for (int i = 0; i < spa->spa_alloc_count; i++)
-		zfs_refcount_create_tracked(&mc->mc_alloc_slots[i]);
+	multilist_create(&mc->mc_metaslab_txg_list, sizeof (metaslab_t),
+	    offsetof(metaslab_t, ms_class_txg_node), metaslab_idx_func);
+	for (int i = 0; i < spa->spa_alloc_count; i++) {
+		metaslab_class_allocator_t *mca = &mc->mc_allocator[i];
+		mca->mca_rotor = NULL;
+		zfs_refcount_create_tracked(&mca->mca_alloc_slots);
+	}
 
 	return (mc);
 }
@@ -303,20 +430,22 @@
 void
 metaslab_class_destroy(metaslab_class_t *mc)
 {
-	ASSERT(mc->mc_rotor == NULL);
+	spa_t *spa = mc->mc_spa;
+
 	ASSERT(mc->mc_alloc == 0);
 	ASSERT(mc->mc_deferred == 0);
 	ASSERT(mc->mc_space == 0);
 	ASSERT(mc->mc_dspace == 0);
 
-	for (int i = 0; i < mc->mc_spa->spa_alloc_count; i++)
-		zfs_refcount_destroy(&mc->mc_alloc_slots[i]);
-	kmem_free(mc->mc_alloc_slots, mc->mc_spa->spa_alloc_count *
-	    sizeof (zfs_refcount_t));
-	kmem_free(mc->mc_alloc_max_slots, mc->mc_spa->spa_alloc_count *
-	    sizeof (uint64_t));
+	for (int i = 0; i < spa->spa_alloc_count; i++) {
+		metaslab_class_allocator_t *mca = &mc->mc_allocator[i];
+		ASSERT(mca->mca_rotor == NULL);
+		zfs_refcount_destroy(&mca->mca_alloc_slots);
+	}
 	mutex_destroy(&mc->mc_lock);
-	kmem_free(mc, sizeof (metaslab_class_t));
+	multilist_destroy(&mc->mc_metaslab_txg_list);
+	kmem_free(mc, offsetof(metaslab_class_t,
+	    mc_allocator[spa->spa_alloc_count]));
 }
 
 int
@@ -331,7 +460,7 @@
 	ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) ||
 	    spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER));
 
-	if ((mg = mc->mc_rotor) == NULL)
+	if ((mg = mc->mc_allocator[0].mca_rotor) == NULL)
 		return (0);
 
 	do {
@@ -340,7 +469,7 @@
 		ASSERT3P(vd->vdev_top, ==, vd);
 		ASSERT3P(mg->mg_class, ==, mc);
 		ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops);
-	} while ((mg = mg->mg_next) != mc->mc_rotor);
+	} while ((mg = mg->mg_next) != mc->mc_allocator[0].mca_rotor);
 
 	return (0);
 }
@@ -393,9 +522,10 @@
 	mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE,
 	    KM_SLEEP);
 
+	mutex_enter(&mc->mc_lock);
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
-		metaslab_group_t *mg = tvd->vdev_mg;
+		metaslab_group_t *mg = vdev_get_mg(tvd, mc);
 
 		/*
 		 * Skip any holes, uninitialized top-levels, or
@@ -406,13 +536,18 @@
 			continue;
 		}
 
+		IMPLY(mg == mg->mg_vd->vdev_log_mg,
+		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+
 		for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
 			mc_hist[i] += mg->mg_histogram[i];
 	}
 
-	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
+	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
 		VERIFY3U(mc_hist[i], ==, mc->mc_histogram[i]);
+	}
 
+	mutex_exit(&mc->mc_lock);
 	kmem_free(mc_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE);
 }
 
@@ -501,6 +636,51 @@
 	return (space);
 }
 
+void
+metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg)
+{
+	multilist_t *ml = &mc->mc_metaslab_txg_list;
+	for (int i = 0; i < multilist_get_num_sublists(ml); i++) {
+		multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
+		metaslab_t *msp = multilist_sublist_head(mls);
+		multilist_sublist_unlock(mls);
+		while (msp != NULL) {
+			mutex_enter(&msp->ms_lock);
+
+			/*
+			 * If the metaslab has been removed from the list
+			 * (which could happen if we were at the memory limit
+			 * and it was evicted during this loop), then we can't
+			 * proceed and we should restart the sublist.
+			 */
+			if (!multilist_link_active(&msp->ms_class_txg_node)) {
+				mutex_exit(&msp->ms_lock);
+				i--;
+				break;
+			}
+			mls = multilist_sublist_lock(ml, i);
+			metaslab_t *next_msp = multilist_sublist_next(mls, msp);
+			multilist_sublist_unlock(mls);
+			if (txg >
+			    msp->ms_selected_txg + metaslab_unload_delay &&
+			    gethrtime() > msp->ms_selected_time +
+			    (uint64_t)MSEC2NSEC(metaslab_unload_delay_ms)) {
+				metaslab_evict(msp, txg);
+			} else {
+				/*
+				 * Once we've hit a metaslab selected too
+				 * recently to evict, we're done evicting for
+				 * now.
+				 */
+				mutex_exit(&msp->ms_lock);
+				break;
+			}
+			mutex_exit(&msp->ms_lock);
+			msp = next_msp;
+		}
+	}
+}
+
 static int
 metaslab_compare(const void *x1, const void *x2)
 {
@@ -531,74 +711,13 @@
 	if (sort1 > sort2)
 		return (1);
 
-	int cmp = AVL_CMP(m2->ms_weight, m1->ms_weight);
+	int cmp = TREE_CMP(m2->ms_weight, m1->ms_weight);
 	if (likely(cmp))
 		return (cmp);
 
-	IMPLY(AVL_CMP(m1->ms_start, m2->ms_start) == 0, m1 == m2);
+	IMPLY(TREE_CMP(m1->ms_start, m2->ms_start) == 0, m1 == m2);
 
-	return (AVL_CMP(m1->ms_start, m2->ms_start));
-}
-
-uint64_t
-metaslab_allocated_space(metaslab_t *msp)
-{
-	return (msp->ms_allocated_space);
-}
-
-/*
- * Verify that the space accounting on disk matches the in-core range_trees.
- */
-static void
-metaslab_verify_space(metaslab_t *msp, uint64_t txg)
-{
-	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
-	uint64_t allocating = 0;
-	uint64_t sm_free_space, msp_free_space;
-
-	ASSERT(MUTEX_HELD(&msp->ms_lock));
-	ASSERT(!msp->ms_condensing);
-
-	if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0)
-		return;
-
-	/*
-	 * We can only verify the metaslab space when we're called
-	 * from syncing context with a loaded metaslab that has an
-	 * allocated space map. Calling this in non-syncing context
-	 * does not provide a consistent view of the metaslab since
-	 * we're performing allocations in the future.
-	 */
-	if (txg != spa_syncing_txg(spa) || msp->ms_sm == NULL ||
-	    !msp->ms_loaded)
-		return;
-
-	/*
-	 * Even though the smp_alloc field can get negative (e.g.
-	 * see vdev_checkpoint_sm), that should never be the case
-	 * when it come's to a metaslab's space map.
-	 */
-	ASSERT3S(space_map_allocated(msp->ms_sm), >=, 0);
-
-	sm_free_space = msp->ms_size - metaslab_allocated_space(msp);
-
-	/*
-	 * Account for future allocations since we would have
-	 * already deducted that space from the ms_allocatable.
-	 */
-	for (int t = 0; t < TXG_CONCURRENT_STATES; t++) {
-		allocating +=
-		    range_tree_space(msp->ms_allocating[(txg + t) & TXG_MASK]);
-	}
-
-	ASSERT3U(msp->ms_deferspace, ==,
-	    range_tree_space(msp->ms_defer[0]) +
-	    range_tree_space(msp->ms_defer[1]));
-
-	msp_free_space = range_tree_space(msp->ms_allocatable) + allocating +
-	    msp->ms_deferspace + range_tree_space(msp->ms_freed);
-
-	VERIFY3U(sm_free_space, ==, msp_free_space);
+	return (TREE_CMP(m1->ms_start, m2->ms_start));
 }
 
 /*
@@ -689,21 +808,37 @@
 	mutex_exit(&mg->mg_lock);
 }
 
+int
+metaslab_sort_by_flushed(const void *va, const void *vb)
+{
+	const metaslab_t *a = va;
+	const metaslab_t *b = vb;
+
+	int cmp = TREE_CMP(a->ms_unflushed_txg, b->ms_unflushed_txg);
+	if (likely(cmp))
+		return (cmp);
+
+	uint64_t a_vdev_id = a->ms_group->mg_vd->vdev_id;
+	uint64_t b_vdev_id = b->ms_group->mg_vd->vdev_id;
+	cmp = TREE_CMP(a_vdev_id, b_vdev_id);
+	if (cmp)
+		return (cmp);
+
+	return (TREE_CMP(a->ms_id, b->ms_id));
+}
+
 metaslab_group_t *
 metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
 {
 	metaslab_group_t *mg;
 
-	mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP);
+	mg = kmem_zalloc(offsetof(metaslab_group_t,
+	    mg_allocator[allocators]), KM_SLEEP);
 	mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&mg->mg_ms_disabled_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&mg->mg_ms_disabled_cv, NULL, CV_DEFAULT, NULL);
-	mg->mg_primaries = kmem_zalloc(allocators * sizeof (metaslab_t *),
-	    KM_SLEEP);
-	mg->mg_secondaries = kmem_zalloc(allocators * sizeof (metaslab_t *),
-	    KM_SLEEP);
 	avl_create(&mg->mg_metaslab_tree, metaslab_compare,
-	    sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node));
+	    sizeof (metaslab_t), offsetof(metaslab_t, ms_group_node));
 	mg->mg_vd = vd;
 	mg->mg_class = mc;
 	mg->mg_activation_count = 0;
@@ -711,13 +846,9 @@
 	mg->mg_no_free_space = B_TRUE;
 	mg->mg_allocators = allocators;
 
-	mg->mg_alloc_queue_depth = kmem_zalloc(allocators *
-	    sizeof (zfs_refcount_t), KM_SLEEP);
-	mg->mg_cur_max_alloc_queue_depth = kmem_zalloc(allocators *
-	    sizeof (uint64_t), KM_SLEEP);
 	for (int i = 0; i < allocators; i++) {
-		zfs_refcount_create_tracked(&mg->mg_alloc_queue_depth[i]);
-		mg->mg_cur_max_alloc_queue_depth[i] = 0;
+		metaslab_group_allocator_t *mga = &mg->mg_allocator[i];
+		zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
 	}
 
 	mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
@@ -740,34 +871,27 @@
 
 	taskq_destroy(mg->mg_taskq);
 	avl_destroy(&mg->mg_metaslab_tree);
-	kmem_free(mg->mg_primaries, mg->mg_allocators * sizeof (metaslab_t *));
-	kmem_free(mg->mg_secondaries, mg->mg_allocators *
-	    sizeof (metaslab_t *));
 	mutex_destroy(&mg->mg_lock);
 	mutex_destroy(&mg->mg_ms_disabled_lock);
 	cv_destroy(&mg->mg_ms_disabled_cv);
 
 	for (int i = 0; i < mg->mg_allocators; i++) {
-		zfs_refcount_destroy(&mg->mg_alloc_queue_depth[i]);
-		mg->mg_cur_max_alloc_queue_depth[i] = 0;
+		metaslab_group_allocator_t *mga = &mg->mg_allocator[i];
+		zfs_refcount_destroy(&mga->mga_alloc_queue_depth);
 	}
-	kmem_free(mg->mg_alloc_queue_depth, mg->mg_allocators *
-	    sizeof (zfs_refcount_t));
-	kmem_free(mg->mg_cur_max_alloc_queue_depth, mg->mg_allocators *
-	    sizeof (uint64_t));
-
-	kmem_free(mg, sizeof (metaslab_group_t));
+	kmem_free(mg, offsetof(metaslab_group_t,
+	    mg_allocator[mg->mg_allocators]));
 }
 
 void
 metaslab_group_activate(metaslab_group_t *mg)
 {
 	metaslab_class_t *mc = mg->mg_class;
+	spa_t *spa = mc->mc_spa;
 	metaslab_group_t *mgprev, *mgnext;
 
-	ASSERT3U(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER), !=, 0);
+	ASSERT3U(spa_config_held(spa, SCL_ALLOC, RW_WRITER), !=, 0);
 
-	ASSERT(mc->mc_rotor != mg);
 	ASSERT(mg->mg_prev == NULL);
 	ASSERT(mg->mg_next == NULL);
 	ASSERT(mg->mg_activation_count <= 0);
@@ -775,10 +899,11 @@
 	if (++mg->mg_activation_count <= 0)
 		return;
 
-	mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
+	mg->mg_aliquot = metaslab_aliquot * MAX(1,
+	    vdev_get_ndisks(mg->mg_vd) - vdev_get_nparity(mg->mg_vd));
 	metaslab_group_alloc_update(mg);
 
-	if ((mgprev = mc->mc_rotor) == NULL) {
+	if ((mgprev = mc->mc_allocator[0].mca_rotor) == NULL) {
 		mg->mg_prev = mg;
 		mg->mg_next = mg;
 	} else {
@@ -788,7 +913,10 @@
 		mgprev->mg_next = mg;
 		mgnext->mg_prev = mg;
 	}
-	mc->mc_rotor = mg;
+	for (int i = 0; i < spa->spa_alloc_count; i++) {
+		mc->mc_allocator[i].mca_rotor = mg;
+		mg = mg->mg_next;
+	}
 }
 
 /*
@@ -809,7 +937,8 @@
 	    (SCL_ALLOC | SCL_ZIO));
 
 	if (--mg->mg_activation_count != 0) {
-		ASSERT(mc->mc_rotor != mg);
+		for (int i = 0; i < spa->spa_alloc_count; i++)
+			ASSERT(mc->mc_allocator[i].mca_rotor != mg);
 		ASSERT(mg->mg_prev == NULL);
 		ASSERT(mg->mg_next == NULL);
 		ASSERT(mg->mg_activation_count < 0);
@@ -835,14 +964,15 @@
 	spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
 	metaslab_group_alloc_update(mg);
 	for (int i = 0; i < mg->mg_allocators; i++) {
-		metaslab_t *msp = mg->mg_primaries[i];
+		metaslab_group_allocator_t *mga = &mg->mg_allocator[i];
+		metaslab_t *msp = mga->mga_primary;
 		if (msp != NULL) {
 			mutex_enter(&msp->ms_lock);
 			metaslab_passivate(msp,
 			    metaslab_weight_from_range_tree(msp));
 			mutex_exit(&msp->ms_lock);
 		}
-		msp = mg->mg_secondaries[i];
+		msp = mga->mga_secondary;
 		if (msp != NULL) {
 			mutex_enter(&msp->ms_lock);
 			metaslab_passivate(msp,
@@ -855,12 +985,15 @@
 	mgnext = mg->mg_next;
 
 	if (mg == mgnext) {
-		mc->mc_rotor = NULL;
+		mgnext = NULL;
 	} else {
-		mc->mc_rotor = mgnext;
 		mgprev->mg_next = mgnext;
 		mgnext->mg_prev = mgprev;
 	}
+	for (int i = 0; i < spa->spa_alloc_count; i++) {
+		if (mc->mc_allocator[i].mca_rotor == mg)
+			mc->mc_allocator[i].mca_rotor = mgnext;
+	}
 
 	mg->mg_prev = NULL;
 	mg->mg_next = NULL;
@@ -878,16 +1011,22 @@
 uint64_t
 metaslab_group_get_space(metaslab_group_t *mg)
 {
-	return ((1ULL << mg->mg_vd->vdev_ms_shift) * mg->mg_vd->vdev_ms_count);
+	/*
+	 * Note that the number of nodes in mg_metaslab_tree may be one less
+	 * than vdev_ms_count, due to the embedded log metaslab.
+	 */
+	mutex_enter(&mg->mg_lock);
+	uint64_t ms_count = avl_numnodes(&mg->mg_metaslab_tree);
+	mutex_exit(&mg->mg_lock);
+	return ((1ULL << mg->mg_vd->vdev_ms_shift) * ms_count);
 }
 
 void
 metaslab_group_histogram_verify(metaslab_group_t *mg)
 {
 	uint64_t *mg_hist;
-	vdev_t *vd = mg->mg_vd;
-	uint64_t ashift = vd->vdev_ashift;
-	int i;
+	avl_tree_t *t = &mg->mg_metaslab_tree;
+	uint64_t ashift = mg->mg_vd->vdev_ashift;
 
 	if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0)
 		return;
@@ -898,22 +1037,25 @@
 	ASSERT3U(RANGE_TREE_HISTOGRAM_SIZE, >=,
 	    SPACE_MAP_HISTOGRAM_SIZE + ashift);
 
-	for (int m = 0; m < vd->vdev_ms_count; m++) {
-		metaslab_t *msp = vd->vdev_ms[m];
-		ASSERT(msp != NULL);
-
-		/* skip if not active or not a member */
-		if (msp->ms_sm == NULL || msp->ms_group != mg)
+	mutex_enter(&mg->mg_lock);
+	for (metaslab_t *msp = avl_first(t);
+	    msp != NULL; msp = AVL_NEXT(t, msp)) {
+		VERIFY3P(msp->ms_group, ==, mg);
+		/* skip if not active */
+		if (msp->ms_sm == NULL)
 			continue;
 
-		for (i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++)
+		for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
 			mg_hist[i + ashift] +=
 			    msp->ms_sm->sm_phys->smp_histogram[i];
+		}
 	}
 
-	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++)
+	for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++)
 		VERIFY3U(mg_hist[i], ==, mg->mg_histogram[i]);
 
+	mutex_exit(&mg->mg_lock);
+
 	kmem_free(mg_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE);
 }
 
@@ -928,12 +1070,16 @@
 		return;
 
 	mutex_enter(&mg->mg_lock);
+	mutex_enter(&mc->mc_lock);
 	for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
+		IMPLY(mg == mg->mg_vd->vdev_log_mg,
+		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
 		mg->mg_histogram[i + ashift] +=
 		    msp->ms_sm->sm_phys->smp_histogram[i];
 		mc->mc_histogram[i + ashift] +=
 		    msp->ms_sm->sm_phys->smp_histogram[i];
 	}
+	mutex_exit(&mc->mc_lock);
 	mutex_exit(&mg->mg_lock);
 }
 
@@ -948,17 +1094,21 @@
 		return;
 
 	mutex_enter(&mg->mg_lock);
+	mutex_enter(&mc->mc_lock);
 	for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
 		ASSERT3U(mg->mg_histogram[i + ashift], >=,
 		    msp->ms_sm->sm_phys->smp_histogram[i]);
 		ASSERT3U(mc->mc_histogram[i + ashift], >=,
 		    msp->ms_sm->sm_phys->smp_histogram[i]);
+		IMPLY(mg == mg->mg_vd->vdev_log_mg,
+		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
 
 		mg->mg_histogram[i + ashift] -=
 		    msp->ms_sm->sm_phys->smp_histogram[i];
 		mc->mc_histogram[i + ashift] -=
 		    msp->ms_sm->sm_phys->smp_histogram[i];
 	}
+	mutex_exit(&mc->mc_lock);
 	mutex_exit(&mg->mg_lock);
 }
 
@@ -987,6 +1137,14 @@
 	mutex_enter(&mg->mg_lock);
 	ASSERT(msp->ms_group == mg);
 	avl_remove(&mg->mg_metaslab_tree, msp);
+
+	metaslab_class_t *mc = msp->ms_group->mg_class;
+	multilist_sublist_t *mls =
+	    multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
+	if (multilist_link_active(&msp->ms_class_txg_node))
+		multilist_sublist_remove(mls, msp);
+	multilist_sublist_unlock(mls);
+
 	msp->ms_group = NULL;
 	mutex_exit(&mg->mg_lock);
 }
@@ -1087,7 +1245,7 @@
 	 * in metaslab_group_alloc_update() for more information) and
 	 * the allocation throttle is disabled then allow allocations to this
 	 * device. However, if the allocation throttle is enabled then
-	 * check if we have reached our allocation limit (mg_alloc_queue_depth)
+	 * check if we have reached our allocation limit (mga_alloc_queue_depth)
 	 * to determine if we should allow allocations to this metaslab group.
 	 * If all metaslab groups are no longer considered allocatable
 	 * (mc_alloc_groups == 0) or we're trying to allocate the smallest
@@ -1095,9 +1253,9 @@
 	 * regardless of the mg_allocatable or throttle settings.
 	 */
 	if (mg->mg_allocatable) {
-		metaslab_group_t *mgp;
+		metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
 		int64_t qdepth;
-		uint64_t qmax = mg->mg_cur_max_alloc_queue_depth[allocator];
+		uint64_t qmax = mga->mga_cur_max_alloc_queue_depth;
 
 		if (!mc->mc_alloc_throttle_enabled)
 			return (B_TRUE);
@@ -1116,8 +1274,7 @@
 		 */
 		qmax = qmax * (4 + d) / 4;
 
-		qdepth = zfs_refcount_count(
-		    &mg->mg_alloc_queue_depth[allocator]);
+		qdepth = zfs_refcount_count(&mga->mga_alloc_queue_depth);
 
 		/*
 		 * If this metaslab group is below its qmax or it's
@@ -1135,11 +1292,14 @@
 		 * racy since we can't hold the locks for all metaslab
 		 * groups at the same time when we make this check.
 		 */
-		for (mgp = mg->mg_next; mgp != rotor; mgp = mgp->mg_next) {
-			qmax = mgp->mg_cur_max_alloc_queue_depth[allocator];
+		for (metaslab_group_t *mgp = mg->mg_next;
+		    mgp != rotor; mgp = mgp->mg_next) {
+			metaslab_group_allocator_t *mgap =
+			    &mgp->mg_allocator[allocator];
+			qmax = mgap->mga_cur_max_alloc_queue_depth;
 			qmax = qmax * (4 + d) / 4;
-			qdepth = zfs_refcount_count(
-			    &mgp->mg_alloc_queue_depth[allocator]);
+			qdepth =
+			    zfs_refcount_count(&mgap->mga_alloc_queue_depth);
 
 			/*
 			 * If there is another metaslab group that
@@ -1170,25 +1330,164 @@
  */
 
 /*
- * Comparison function for the private size-ordered tree. Tree is sorted
- * by size, larger sizes at the end of the tree.
+ * Comparison function for the private size-ordered tree using 32-bit
+ * ranges. Tree is sorted by size, larger sizes at the end of the tree.
  */
 static int
-metaslab_rangesize_compare(const void *x1, const void *x2)
+metaslab_rangesize32_compare(const void *x1, const void *x2)
 {
-	const range_seg_t *r1 = x1;
-	const range_seg_t *r2 = x2;
+	const range_seg32_t *r1 = x1;
+	const range_seg32_t *r2 = x2;
+
 	uint64_t rs_size1 = r1->rs_end - r1->rs_start;
 	uint64_t rs_size2 = r2->rs_end - r2->rs_start;
 
-	int cmp = AVL_CMP(rs_size1, rs_size2);
+	int cmp = TREE_CMP(rs_size1, rs_size2);
 	if (likely(cmp))
 		return (cmp);
 
-	return (AVL_CMP(r1->rs_start, r2->rs_start));
+	return (TREE_CMP(r1->rs_start, r2->rs_start));
 }
 
 /*
+ * Comparison function for the private size-ordered tree using 64-bit
+ * ranges. Tree is sorted by size, larger sizes at the end of the tree.
+ */
+static int
+metaslab_rangesize64_compare(const void *x1, const void *x2)
+{
+	const range_seg64_t *r1 = x1;
+	const range_seg64_t *r2 = x2;
+
+	uint64_t rs_size1 = r1->rs_end - r1->rs_start;
+	uint64_t rs_size2 = r2->rs_end - r2->rs_start;
+
+	int cmp = TREE_CMP(rs_size1, rs_size2);
+	if (likely(cmp))
+		return (cmp);
+
+	return (TREE_CMP(r1->rs_start, r2->rs_start));
+}
+typedef struct metaslab_rt_arg {
+	zfs_btree_t *mra_bt;
+	uint32_t mra_floor_shift;
+} metaslab_rt_arg_t;
+
+struct mssa_arg {
+	range_tree_t *rt;
+	metaslab_rt_arg_t *mra;
+};
+
+static void
+metaslab_size_sorted_add(void *arg, uint64_t start, uint64_t size)
+{
+	struct mssa_arg *mssap = arg;
+	range_tree_t *rt = mssap->rt;
+	metaslab_rt_arg_t *mrap = mssap->mra;
+	range_seg_max_t seg = {0};
+	rs_set_start(&seg, rt, start);
+	rs_set_end(&seg, rt, start + size);
+	metaslab_rt_add(rt, &seg, mrap);
+}
+
+static void
+metaslab_size_tree_full_load(range_tree_t *rt)
+{
+	metaslab_rt_arg_t *mrap = rt->rt_arg;
+	METASLABSTAT_BUMP(metaslabstat_reload_tree);
+	ASSERT0(zfs_btree_numnodes(mrap->mra_bt));
+	mrap->mra_floor_shift = 0;
+	struct mssa_arg arg = {0};
+	arg.rt = rt;
+	arg.mra = mrap;
+	range_tree_walk(rt, metaslab_size_sorted_add, &arg);
+}
+
+/*
+ * Create any block allocator specific components. The current allocators
+ * rely on using both a size-ordered range_tree_t and an array of uint64_t's.
+ */
+static void
+metaslab_rt_create(range_tree_t *rt, void *arg)
+{
+	metaslab_rt_arg_t *mrap = arg;
+	zfs_btree_t *size_tree = mrap->mra_bt;
+
+	size_t size;
+	int (*compare) (const void *, const void *);
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		size = sizeof (range_seg32_t);
+		compare = metaslab_rangesize32_compare;
+		break;
+	case RANGE_SEG64:
+		size = sizeof (range_seg64_t);
+		compare = metaslab_rangesize64_compare;
+		break;
+	default:
+		panic("Invalid range seg type %d", rt->rt_type);
+	}
+	zfs_btree_create(size_tree, compare, size);
+	mrap->mra_floor_shift = metaslab_by_size_min_shift;
+}
+
+static void
+metaslab_rt_destroy(range_tree_t *rt, void *arg)
+{
+	(void) rt;
+	metaslab_rt_arg_t *mrap = arg;
+	zfs_btree_t *size_tree = mrap->mra_bt;
+
+	zfs_btree_destroy(size_tree);
+	kmem_free(mrap, sizeof (*mrap));
+}
+
+static void
+metaslab_rt_add(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+	metaslab_rt_arg_t *mrap = arg;
+	zfs_btree_t *size_tree = mrap->mra_bt;
+
+	if (rs_get_end(rs, rt) - rs_get_start(rs, rt) <
+	    (1 << mrap->mra_floor_shift))
+		return;
+
+	zfs_btree_add(size_tree, rs);
+}
+
+static void
+metaslab_rt_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+	metaslab_rt_arg_t *mrap = arg;
+	zfs_btree_t *size_tree = mrap->mra_bt;
+
+	if (rs_get_end(rs, rt) - rs_get_start(rs, rt) < (1 <<
+	    mrap->mra_floor_shift))
+		return;
+
+	zfs_btree_remove(size_tree, rs);
+}
+
+static void
+metaslab_rt_vacate(range_tree_t *rt, void *arg)
+{
+	metaslab_rt_arg_t *mrap = arg;
+	zfs_btree_t *size_tree = mrap->mra_bt;
+	zfs_btree_clear(size_tree);
+	zfs_btree_destroy(size_tree);
+
+	metaslab_rt_create(rt, arg);
+}
+
+static range_tree_ops_t metaslab_rt_ops = {
+	.rtop_create = metaslab_rt_create,
+	.rtop_destroy = metaslab_rt_destroy,
+	.rtop_add = metaslab_rt_add,
+	.rtop_remove = metaslab_rt_remove,
+	.rtop_vacate = metaslab_rt_vacate
+};
+
+/*
  * ==========================================================================
  * Common allocator routines
  * ==========================================================================
@@ -1198,29 +1497,103 @@
  * Return the maximum contiguous segment within the metaslab.
  */
 uint64_t
-metaslab_block_maxsize(metaslab_t *msp)
+metaslab_largest_allocatable(metaslab_t *msp)
 {
-	avl_tree_t *t = &msp->ms_allocatable_by_size;
+	zfs_btree_t *t = &msp->ms_allocatable_by_size;
 	range_seg_t *rs;
 
-	if (t == NULL || (rs = avl_last(t)) == NULL)
-		return (0ULL);
+	if (t == NULL)
+		return (0);
+	if (zfs_btree_numnodes(t) == 0)
+		metaslab_size_tree_full_load(msp->ms_allocatable);
 
-	return (rs->rs_end - rs->rs_start);
+	rs = zfs_btree_last(t, NULL);
+	if (rs == NULL)
+		return (0);
+
+	return (rs_get_end(rs, msp->ms_allocatable) - rs_get_start(rs,
+	    msp->ms_allocatable));
+}
+
+/*
+ * Return the maximum contiguous segment within the unflushed frees of this
+ * metaslab.
+ */
+static uint64_t
+metaslab_largest_unflushed_free(metaslab_t *msp)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+	if (msp->ms_unflushed_frees == NULL)
+		return (0);
+
+	if (zfs_btree_numnodes(&msp->ms_unflushed_frees_by_size) == 0)
+		metaslab_size_tree_full_load(msp->ms_unflushed_frees);
+	range_seg_t *rs = zfs_btree_last(&msp->ms_unflushed_frees_by_size,
+	    NULL);
+	if (rs == NULL)
+		return (0);
+
+	/*
+	 * When a range is freed from the metaslab, that range is added to
+	 * both the unflushed frees and the deferred frees. While the block
+	 * will eventually be usable, if the metaslab were loaded the range
+	 * would not be added to the ms_allocatable tree until TXG_DEFER_SIZE
+	 * txgs had passed.  As a result, when attempting to estimate an upper
+	 * bound for the largest currently-usable free segment in the
+	 * metaslab, we need to not consider any ranges currently in the defer
+	 * trees. This algorithm approximates the largest available chunk in
+	 * the largest range in the unflushed_frees tree by taking the first
+	 * chunk.  While this may be a poor estimate, it should only remain so
+	 * briefly and should eventually self-correct as frees are no longer
+	 * deferred. Similar logic applies to the ms_freed tree. See
+	 * metaslab_load() for more details.
+	 *
+	 * There are two primary sources of inaccuracy in this estimate. Both
+	 * are tolerated for performance reasons. The first source is that we
+	 * only check the largest segment for overlaps. Smaller segments may
+	 * have more favorable overlaps with the other trees, resulting in
+	 * larger usable chunks.  Second, we only look at the first chunk in
+	 * the largest segment; there may be other usable chunks in the
+	 * largest segment, but we ignore them.
+	 */
+	uint64_t rstart = rs_get_start(rs, msp->ms_unflushed_frees);
+	uint64_t rsize = rs_get_end(rs, msp->ms_unflushed_frees) - rstart;
+	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+		uint64_t start = 0;
+		uint64_t size = 0;
+		boolean_t found = range_tree_find_in(msp->ms_defer[t], rstart,
+		    rsize, &start, &size);
+		if (found) {
+			if (rstart == start)
+				return (0);
+			rsize = start - rstart;
+		}
+	}
+
+	uint64_t start = 0;
+	uint64_t size = 0;
+	boolean_t found = range_tree_find_in(msp->ms_freed, rstart,
+	    rsize, &start, &size);
+	if (found)
+		rsize = start - rstart;
+
+	return (rsize);
 }
 
 static range_seg_t *
-metaslab_block_find(avl_tree_t *t, uint64_t start, uint64_t size)
+metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start,
+    uint64_t size, zfs_btree_index_t *where)
 {
-	range_seg_t *rs, rsearch;
-	avl_index_t where;
+	range_seg_t *rs;
+	range_seg_max_t rsearch;
 
-	rsearch.rs_start = start;
-	rsearch.rs_end = start + size;
+	rs_set_start(&rsearch, rt, start);
+	rs_set_end(&rsearch, rt, start + size);
 
-	rs = avl_find(t, &rsearch, &where);
+	rs = zfs_btree_find(t, &rsearch, where);
 	if (rs == NULL) {
-		rs = avl_nearest(t, where, AVL_AFTER);
+		rs = zfs_btree_next(t, where, where);
 	}
 
 	return (rs);
@@ -1228,28 +1601,36 @@
 
 #if defined(WITH_DF_BLOCK_ALLOCATOR) || \
     defined(WITH_CF_BLOCK_ALLOCATOR)
+
 /*
- * This is a helper function that can be used by the allocator to find
- * a suitable block to allocate. This will search the specified AVL
- * tree looking for a block that matches the specified criteria.
+ * This is a helper function that can be used by the allocator to find a
+ * suitable block to allocate. This will search the specified B-tree looking
+ * for a block that matches the specified criteria.
  */
 static uint64_t
-metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
+metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size,
     uint64_t max_search)
 {
-	range_seg_t *rs = metaslab_block_find(t, *cursor, size);
+	if (*cursor == 0)
+		*cursor = rt->rt_start;
+	zfs_btree_t *bt = &rt->rt_root;
+	zfs_btree_index_t where;
+	range_seg_t *rs = metaslab_block_find(bt, rt, *cursor, size, &where);
 	uint64_t first_found;
+	int count_searched = 0;
 
 	if (rs != NULL)
-		first_found = rs->rs_start;
+		first_found = rs_get_start(rs, rt);
 
-	while (rs != NULL && rs->rs_start - first_found <= max_search) {
-		uint64_t offset = rs->rs_start;
-		if (offset + size <= rs->rs_end) {
+	while (rs != NULL && (rs_get_start(rs, rt) - first_found <=
+	    max_search || count_searched < metaslab_min_search_count)) {
+		uint64_t offset = rs_get_start(rs, rt);
+		if (offset + size <= rs_get_end(rs, rt)) {
 			*cursor = offset + size;
 			return (offset);
 		}
-		rs = AVL_NEXT(t, rs);
+		rs = zfs_btree_next(bt, &where, &where);
+		count_searched++;
 	}
 
 	*cursor = 0;
@@ -1295,33 +1676,36 @@
 	uint64_t offset;
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
-	ASSERT3U(avl_numnodes(&rt->rt_root), ==,
-	    avl_numnodes(&msp->ms_allocatable_by_size));
 
 	/*
 	 * If we're running low on space, find a segment based on size,
 	 * rather than iterating based on offset.
 	 */
-	if (metaslab_block_maxsize(msp) < metaslab_df_alloc_threshold ||
+	if (metaslab_largest_allocatable(msp) < metaslab_df_alloc_threshold ||
 	    free_pct < metaslab_df_free_pct) {
 		offset = -1;
 	} else {
-		offset = metaslab_block_picker(&rt->rt_root,
+		offset = metaslab_block_picker(rt,
 		    cursor, size, metaslab_df_max_search);
 	}
 
 	if (offset == -1) {
 		range_seg_t *rs;
+		if (zfs_btree_numnodes(&msp->ms_allocatable_by_size) == 0)
+			metaslab_size_tree_full_load(msp->ms_allocatable);
+
 		if (metaslab_df_use_largest_segment) {
 			/* use largest free segment */
-			rs = avl_last(&msp->ms_allocatable_by_size);
+			rs = zfs_btree_last(&msp->ms_allocatable_by_size, NULL);
 		} else {
+			zfs_btree_index_t where;
 			/* use segment of this size, or next largest */
 			rs = metaslab_block_find(&msp->ms_allocatable_by_size,
-			    0, size);
+			    rt, msp->ms_start, size, &where);
 		}
-		if (rs != NULL && rs->rs_start + size <= rs->rs_end) {
-			offset = rs->rs_start;
+		if (rs != NULL && rs_get_start(rs, rt) + size <= rs_get_end(rs,
+		    rt)) {
+			offset = rs_get_start(rs, rt);
 			*cursor = offset + size;
 		}
 	}
@@ -1350,25 +1734,27 @@
 metaslab_cf_alloc(metaslab_t *msp, uint64_t size)
 {
 	range_tree_t *rt = msp->ms_allocatable;
-	avl_tree_t *t = &msp->ms_allocatable_by_size;
+	zfs_btree_t *t = &msp->ms_allocatable_by_size;
 	uint64_t *cursor = &msp->ms_lbas[0];
 	uint64_t *cursor_end = &msp->ms_lbas[1];
 	uint64_t offset = 0;
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
-	ASSERT3U(avl_numnodes(t), ==, avl_numnodes(&rt->rt_root));
 
 	ASSERT3U(*cursor_end, >=, *cursor);
 
 	if ((*cursor + size) > *cursor_end) {
 		range_seg_t *rs;
 
-		rs = avl_last(&msp->ms_allocatable_by_size);
-		if (rs == NULL || (rs->rs_end - rs->rs_start) < size)
+		if (zfs_btree_numnodes(t) == 0)
+			metaslab_size_tree_full_load(msp->ms_allocatable);
+		rs = zfs_btree_last(t, NULL);
+		if (rs == NULL || (rs_get_end(rs, rt) - rs_get_start(rs, rt)) <
+		    size)
 			return (-1ULL);
 
-		*cursor = rs->rs_start;
-		*cursor_end = rs->rs_end;
+		*cursor = rs_get_start(rs, rt);
+		*cursor_end = rs_get_end(rs, rt);
 	}
 
 	offset = *cursor;
@@ -1403,39 +1789,40 @@
 static uint64_t
 metaslab_ndf_alloc(metaslab_t *msp, uint64_t size)
 {
-	avl_tree_t *t = &msp->ms_allocatable->rt_root;
-	avl_index_t where;
-	range_seg_t *rs, rsearch;
+	zfs_btree_t *t = &msp->ms_allocatable->rt_root;
+	range_tree_t *rt = msp->ms_allocatable;
+	zfs_btree_index_t where;
+	range_seg_t *rs;
+	range_seg_max_t rsearch;
 	uint64_t hbit = highbit64(size);
 	uint64_t *cursor = &msp->ms_lbas[hbit - 1];
-	uint64_t max_size = metaslab_block_maxsize(msp);
+	uint64_t max_size = metaslab_largest_allocatable(msp);
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
-	ASSERT3U(avl_numnodes(t), ==,
-	    avl_numnodes(&msp->ms_allocatable_by_size));
 
 	if (max_size < size)
 		return (-1ULL);
 
-	rsearch.rs_start = *cursor;
-	rsearch.rs_end = *cursor + size;
+	rs_set_start(&rsearch, rt, *cursor);
+	rs_set_end(&rsearch, rt, *cursor + size);
 
-	rs = avl_find(t, &rsearch, &where);
-	if (rs == NULL || (rs->rs_end - rs->rs_start) < size) {
+	rs = zfs_btree_find(t, &rsearch, &where);
+	if (rs == NULL || (rs_get_end(rs, rt) - rs_get_start(rs, rt)) < size) {
 		t = &msp->ms_allocatable_by_size;
 
-		rsearch.rs_start = 0;
-		rsearch.rs_end = MIN(max_size,
-		    1ULL << (hbit + metaslab_ndf_clump_shift));
-		rs = avl_find(t, &rsearch, &where);
+		rs_set_start(&rsearch, rt, 0);
+		rs_set_end(&rsearch, rt, MIN(max_size, 1ULL << (hbit +
+		    metaslab_ndf_clump_shift)));
+
+		rs = zfs_btree_find(t, &rsearch, &where);
 		if (rs == NULL)
-			rs = avl_nearest(t, where, AVL_AFTER);
+			rs = zfs_btree_next(t, &where, &where);
 		ASSERT(rs != NULL);
 	}
 
-	if ((rs->rs_end - rs->rs_start) >= size) {
-		*cursor = rs->rs_start + size;
-		return (rs->rs_start);
+	if ((rs_get_end(rs, rt) - rs_get_start(rs, rt)) >= size) {
+		*cursor = rs_get_start(rs, rt) + size;
+		return (rs_get_start(rs, rt));
 	}
 	return (-1ULL);
 }
@@ -1454,6 +1841,115 @@
  * ==========================================================================
  */
 
+/*
+ * Wait for any in-progress metaslab loads to complete.
+ */
+static void
+metaslab_load_wait(metaslab_t *msp)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+	while (msp->ms_loading) {
+		ASSERT(!msp->ms_loaded);
+		cv_wait(&msp->ms_load_cv, &msp->ms_lock);
+	}
+}
+
+/*
+ * Wait for any in-progress flushing to complete.
+ */
+static void
+metaslab_flush_wait(metaslab_t *msp)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+	while (msp->ms_flushing)
+		cv_wait(&msp->ms_flush_cv, &msp->ms_lock);
+}
+
+static unsigned int
+metaslab_idx_func(multilist_t *ml, void *arg)
+{
+	metaslab_t *msp = arg;
+
+	/*
+	 * ms_id values are allocated sequentially, so full 64bit
+	 * division would be a waste of time, so limit it to 32 bits.
+	 */
+	return ((unsigned int)msp->ms_id % multilist_get_num_sublists(ml));
+}
+
+uint64_t
+metaslab_allocated_space(metaslab_t *msp)
+{
+	return (msp->ms_allocated_space);
+}
+
+/*
+ * Verify that the space accounting on disk matches the in-core range_trees.
+ */
+static void
+metaslab_verify_space(metaslab_t *msp, uint64_t txg)
+{
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	uint64_t allocating = 0;
+	uint64_t sm_free_space, msp_free_space;
+
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	ASSERT(!msp->ms_condensing);
+
+	if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0)
+		return;
+
+	/*
+	 * We can only verify the metaslab space when we're called
+	 * from syncing context with a loaded metaslab that has an
+	 * allocated space map. Calling this in non-syncing context
+	 * does not provide a consistent view of the metaslab since
+	 * we're performing allocations in the future.
+	 */
+	if (txg != spa_syncing_txg(spa) || msp->ms_sm == NULL ||
+	    !msp->ms_loaded)
+		return;
+
+	/*
+	 * Even though the smp_alloc field can get negative,
+	 * when it comes to a metaslab's space map, that should
+	 * never be the case.
+	 */
+	ASSERT3S(space_map_allocated(msp->ms_sm), >=, 0);
+
+	ASSERT3U(space_map_allocated(msp->ms_sm), >=,
+	    range_tree_space(msp->ms_unflushed_frees));
+
+	ASSERT3U(metaslab_allocated_space(msp), ==,
+	    space_map_allocated(msp->ms_sm) +
+	    range_tree_space(msp->ms_unflushed_allocs) -
+	    range_tree_space(msp->ms_unflushed_frees));
+
+	sm_free_space = msp->ms_size - metaslab_allocated_space(msp);
+
+	/*
+	 * Account for future allocations since we would have
+	 * already deducted that space from the ms_allocatable.
+	 */
+	for (int t = 0; t < TXG_CONCURRENT_STATES; t++) {
+		allocating +=
+		    range_tree_space(msp->ms_allocating[(txg + t) & TXG_MASK]);
+	}
+	ASSERT3U(allocating + msp->ms_allocated_this_txg, ==,
+	    msp->ms_allocating_total);
+
+	ASSERT3U(msp->ms_deferspace, ==,
+	    range_tree_space(msp->ms_defer[0]) +
+	    range_tree_space(msp->ms_defer[1]));
+
+	msp_free_space = range_tree_space(msp->ms_allocatable) + allocating +
+	    msp->ms_deferspace + range_tree_space(msp->ms_freed);
+
+	VERIFY3U(sm_free_space, ==, msp_free_space);
+}
+
 static void
 metaslab_aux_histograms_clear(metaslab_t *msp)
 {
@@ -1577,7 +2073,15 @@
 	if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0)
 		return;
 
-	/* see comment in metaslab_verify_unflushed_changes() */
+	/*
+	 * We can end up here from vdev_remove_complete(), in which case we
+	 * cannot do these assertions because we hold spa config locks and
+	 * thus we are not allowed to read from the DMU.
+	 *
+	 * We check if the metaslab group has been removed and if that's
+	 * the case we return immediately as that would mean that we are
+	 * here from the aforementioned code path.
+	 */
 	if (msp->ms_group == NULL)
 		return;
 
@@ -1623,16 +2127,21 @@
 
 	msp->ms_weight = 0;
 	msp->ms_fragmentation = 0;
-	msp->ms_max_size = 0;
 
 	/*
-	 * This function is used for verification purposes. Regardless of
-	 * whether metaslab_weight() thinks this metaslab should be active or
-	 * not, we want to ensure that the actual weight (and therefore the
-	 * value of ms_weight) would be the same if it was to be recalculated
-	 * at this point.
+	 * This function is used for verification purposes and thus should
+	 * not introduce any side-effects/mutations on the system's state.
+	 *
+	 * Regardless of whether metaslab_weight() thinks this metaslab
+	 * should be active or not, we want to ensure that the actual weight
+	 * (and therefore the value of ms_weight) would be the same if it
+	 * was to be recalculated at this point.
+	 *
+	 * In addition we set the nodirty flag so metaslab_weight() does
+	 * not dirty the metaslab for future TXGs (e.g. when trying to
+	 * force condensing to upgrade the metaslab spacemaps).
 	 */
-	msp->ms_weight = metaslab_weight(msp) | was_active;
+	msp->ms_weight = metaslab_weight(msp, B_TRUE) | was_active;
 
 	VERIFY3U(max_segsize, ==, msp->ms_max_size);
 
@@ -1652,17 +2161,85 @@
 }
 
 /*
- * Wait for any in-progress metaslab loads to complete.
+ * If we're over the zfs_metaslab_mem_limit, select the loaded metaslab from
+ * this class that was used longest ago, and attempt to unload it.  We don't
+ * want to spend too much time in this loop to prevent performance
+ * degradation, and we expect that most of the time this operation will
+ * succeed. Between that and the normal unloading processing during txg sync,
+ * we expect this to keep the metaslab memory usage under control.
  */
 static void
-metaslab_load_wait(metaslab_t *msp)
+metaslab_potentially_evict(metaslab_class_t *mc)
 {
-	ASSERT(MUTEX_HELD(&msp->ms_lock));
+#ifdef _KERNEL
+	uint64_t allmem = arc_all_memory();
+	uint64_t inuse = spl_kmem_cache_inuse(zfs_btree_leaf_cache);
+	uint64_t size =	spl_kmem_cache_entry_size(zfs_btree_leaf_cache);
+	int tries = 0;
+	for (; allmem * zfs_metaslab_mem_limit / 100 < inuse * size &&
+	    tries < multilist_get_num_sublists(&mc->mc_metaslab_txg_list) * 2;
+	    tries++) {
+		unsigned int idx = multilist_get_random_index(
+		    &mc->mc_metaslab_txg_list);
+		multilist_sublist_t *mls =
+		    multilist_sublist_lock(&mc->mc_metaslab_txg_list, idx);
+		metaslab_t *msp = multilist_sublist_head(mls);
+		multilist_sublist_unlock(mls);
+		while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 <
+		    inuse * size) {
+			VERIFY3P(mls, ==, multilist_sublist_lock(
+			    &mc->mc_metaslab_txg_list, idx));
+			ASSERT3U(idx, ==,
+			    metaslab_idx_func(&mc->mc_metaslab_txg_list, msp));
 
-	while (msp->ms_loading) {
-		ASSERT(!msp->ms_loaded);
-		cv_wait(&msp->ms_load_cv, &msp->ms_lock);
+			if (!multilist_link_active(&msp->ms_class_txg_node)) {
+				multilist_sublist_unlock(mls);
+				break;
+			}
+			metaslab_t *next_msp = multilist_sublist_next(mls, msp);
+			multilist_sublist_unlock(mls);
+			/*
+			 * If the metaslab is currently loading there are two
+			 * cases. If it's the metaslab we're evicting, we
+			 * can't continue on or we'll panic when we attempt to
+			 * recursively lock the mutex. If it's another
+			 * metaslab that's loading, it can be safely skipped,
+			 * since we know it's very new and therefore not a
+			 * good eviction candidate. We check later once the
+			 * lock is held that the metaslab is fully loaded
+			 * before actually unloading it.
+			 */
+			if (msp->ms_loading) {
+				msp = next_msp;
+				inuse =
+				    spl_kmem_cache_inuse(zfs_btree_leaf_cache);
+				continue;
+			}
+			/*
+			 * We can't unload metaslabs with no spacemap because
+			 * they're not ready to be unloaded yet. We can't
+			 * unload metaslabs with outstanding allocations
+			 * because doing so could cause the metaslab's weight
+			 * to decrease while it's unloaded, which violates an
+			 * invariant that we use to prevent unnecessary
+			 * loading. We also don't unload metaslabs that are
+			 * currently active because they are high-weight
+			 * metaslabs that are likely to be used in the near
+			 * future.
+			 */
+			mutex_enter(&msp->ms_lock);
+			if (msp->ms_allocator == -1 && msp->ms_sm != NULL &&
+			    msp->ms_allocating_total == 0) {
+				metaslab_unload(msp);
+			}
+			mutex_exit(&msp->ms_lock);
+			msp = next_msp;
+			inuse = spl_kmem_cache_inuse(zfs_btree_leaf_cache);
+		}
 	}
+#else
+	(void) mc;
+#endif
 }
 
 static int
@@ -1679,13 +2256,19 @@
 	 * are reading the space map. Therefore, metaslab_sync() and
 	 * metaslab_sync_done() can run at the same time as we do.
 	 *
-	 * metaslab_sync() can append to the space map while we are loading.
-	 * Therefore we load only entries that existed when we started the
-	 * load. Additionally, metaslab_sync_done() has to wait for the load
-	 * to complete because there are potential races like metaslab_load()
-	 * loading parts of the space map that are currently being appended
-	 * by metaslab_sync(). If we didn't, the ms_allocatable would have
-	 * entries that metaslab_sync_done() would try to re-add later.
+	 * If we are using the log space maps, metaslab_sync() can't write to
+	 * the metaslab's space map while we are loading as we only write to
+	 * it when we are flushing the metaslab, and that can't happen while
+	 * we are loading it.
+	 *
+	 * If we are not using log space maps though, metaslab_sync() can
+	 * append to the space map while we are loading. Therefore we load
+	 * only entries that existed when we started the load. Additionally,
+	 * metaslab_sync_done() has to wait for the load to complete because
+	 * there are potential races like metaslab_load() loading parts of the
+	 * space map that are currently being appended by metaslab_sync(). If
+	 * we didn't, the ms_allocatable would have entries that
+	 * metaslab_sync_done() would try to re-add later.
 	 *
 	 * That's why before dropping the lock we remember the synced length
 	 * of the metaslab and read up to that point of the space map,
@@ -1695,29 +2278,73 @@
 	uint64_t length = msp->ms_synced_length;
 	mutex_exit(&msp->ms_lock);
 
+	hrtime_t load_start = gethrtime();
+	metaslab_rt_arg_t *mrap;
+	if (msp->ms_allocatable->rt_arg == NULL) {
+		mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
+	} else {
+		mrap = msp->ms_allocatable->rt_arg;
+		msp->ms_allocatable->rt_ops = NULL;
+		msp->ms_allocatable->rt_arg = NULL;
+	}
+	mrap->mra_bt = &msp->ms_allocatable_by_size;
+	mrap->mra_floor_shift = metaslab_by_size_min_shift;
+
 	if (msp->ms_sm != NULL) {
 		error = space_map_load_length(msp->ms_sm, msp->ms_allocatable,
 		    SM_FREE, length);
+
+		/* Now, populate the size-sorted tree. */
+		metaslab_rt_create(msp->ms_allocatable, mrap);
+		msp->ms_allocatable->rt_ops = &metaslab_rt_ops;
+		msp->ms_allocatable->rt_arg = mrap;
+
+		struct mssa_arg arg = {0};
+		arg.rt = msp->ms_allocatable;
+		arg.mra = mrap;
+		range_tree_walk(msp->ms_allocatable, metaslab_size_sorted_add,
+		    &arg);
 	} else {
 		/*
+		 * Add the size-sorted tree first, since we don't need to load
+		 * the metaslab from the spacemap.
+		 */
+		metaslab_rt_create(msp->ms_allocatable, mrap);
+		msp->ms_allocatable->rt_ops = &metaslab_rt_ops;
+		msp->ms_allocatable->rt_arg = mrap;
+		/*
 		 * The space map has not been allocated yet, so treat
 		 * all the space in the metaslab as free and add it to the
 		 * ms_allocatable tree.
 		 */
 		range_tree_add(msp->ms_allocatable,
 		    msp->ms_start, msp->ms_size);
+
+		if (msp->ms_new) {
+			/*
+			 * If the ms_sm doesn't exist, this means that this
+			 * metaslab hasn't gone through metaslab_sync() and
+			 * thus has never been dirtied. So we shouldn't
+			 * expect any unflushed allocs or frees from previous
+			 * TXGs.
+			 */
+			ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+			ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
+		}
 	}
 
 	/*
 	 * We need to grab the ms_sync_lock to prevent metaslab_sync() from
-	 * changing the ms_sm and the metaslab's range trees while we are
-	 * about to use them and populate the ms_allocatable. The ms_lock
-	 * is insufficient for this because metaslab_sync() doesn't hold
-	 * the ms_lock while writing the ms_checkpointing tree to disk.
+	 * changing the ms_sm (or log_sm) and the metaslab's range trees
+	 * while we are about to use them and populate the ms_allocatable.
+	 * The ms_lock is insufficient for this because metaslab_sync() doesn't
+	 * hold the ms_lock while writing the ms_checkpointing tree to disk.
 	 */
 	mutex_enter(&msp->ms_sync_lock);
 	mutex_enter(&msp->ms_lock);
+
 	ASSERT(!msp->ms_condensing);
+	ASSERT(!msp->ms_flushing);
 
 	if (error != 0) {
 		mutex_exit(&msp->ms_sync_lock);
@@ -1728,10 +2355,58 @@
 	msp->ms_loaded = B_TRUE;
 
 	/*
-	 * The ms_allocatable contains the segments that exist in the
-	 * ms_defer trees [see ms_synced_length]. Thus we need to remove
-	 * them from ms_allocatable as they will be added again in
+	 * Apply all the unflushed changes to ms_allocatable right
+	 * away so any manipulations we do below have a clear view
+	 * of what is allocated and what is free.
+	 */
+	range_tree_walk(msp->ms_unflushed_allocs,
+	    range_tree_remove, msp->ms_allocatable);
+	range_tree_walk(msp->ms_unflushed_frees,
+	    range_tree_add, msp->ms_allocatable);
+
+	ASSERT3P(msp->ms_group, !=, NULL);
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	if (spa_syncing_log_sm(spa) != NULL) {
+		ASSERT(spa_feature_is_enabled(spa,
+		    SPA_FEATURE_LOG_SPACEMAP));
+
+		/*
+		 * If we use a log space map we add all the segments
+		 * that are in ms_unflushed_frees so they are available
+		 * for allocation.
+		 *
+		 * ms_allocatable needs to contain all free segments
+		 * that are ready for allocations (thus not segments
+		 * from ms_freeing, ms_freed, and the ms_defer trees).
+		 * But if we grab the lock in this code path at a sync
+		 * pass later that 1, then it also contains the
+		 * segments of ms_freed (they were added to it earlier
+		 * in this path through ms_unflushed_frees). So we
+		 * need to remove all the segments that exist in
+		 * ms_freed from ms_allocatable as they will be added
+		 * later in metaslab_sync_done().
+		 *
+		 * When there's no log space map, the ms_allocatable
+		 * correctly doesn't contain any segments that exist
+		 * in ms_freed [see ms_synced_length].
+		 */
+		range_tree_walk(msp->ms_freed,
+		    range_tree_remove, msp->ms_allocatable);
+	}
+
+	/*
+	 * If we are not using the log space map, ms_allocatable
+	 * contains the segments that exist in the ms_defer trees
+	 * [see ms_synced_length]. Thus we need to remove them
+	 * from ms_allocatable as they will be added again in
 	 * metaslab_sync_done().
+	 *
+	 * If we are using the log space map, ms_allocatable still
+	 * contains the segments that exist in the ms_defer trees.
+	 * Not because it read them through the ms_sm though. But
+	 * because these segments are part of ms_unflushed_frees
+	 * whose segments we add to ms_allocatable earlier in this
+	 * code path.
 	 */
 	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
 		range_tree_walk(msp->ms_defer[t],
@@ -1751,15 +2426,38 @@
 	 * comment for ms_synchist and ms_deferhist[] for more info]
 	 */
 	uint64_t weight = msp->ms_weight;
+	uint64_t max_size = msp->ms_max_size;
 	metaslab_recalculate_weight_and_sort(msp);
 	if (!WEIGHT_IS_SPACEBASED(weight))
 		ASSERT3U(weight, <=, msp->ms_weight);
-	msp->ms_max_size = metaslab_block_maxsize(msp);
+	msp->ms_max_size = metaslab_largest_allocatable(msp);
+	ASSERT3U(max_size, <=, msp->ms_max_size);
+	hrtime_t load_end = gethrtime();
+	msp->ms_load_time = load_end;
+	zfs_dbgmsg("metaslab_load: txg %llu, spa %s, vdev_id %llu, "
+	    "ms_id %llu, smp_length %llu, "
+	    "unflushed_allocs %llu, unflushed_frees %llu, "
+	    "freed %llu, defer %llu + %llu, unloaded time %llu ms, "
+	    "loading_time %lld ms, ms_max_size %llu, "
+	    "max size error %lld, "
+	    "old_weight %llx, new_weight %llx",
+	    (u_longlong_t)spa_syncing_txg(spa), spa_name(spa),
+	    (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
+	    (u_longlong_t)msp->ms_id,
+	    (u_longlong_t)space_map_length(msp->ms_sm),
+	    (u_longlong_t)range_tree_space(msp->ms_unflushed_allocs),
+	    (u_longlong_t)range_tree_space(msp->ms_unflushed_frees),
+	    (u_longlong_t)range_tree_space(msp->ms_freed),
+	    (u_longlong_t)range_tree_space(msp->ms_defer[0]),
+	    (u_longlong_t)range_tree_space(msp->ms_defer[1]),
+	    (longlong_t)((load_start - msp->ms_unload_time) / 1000000),
+	    (longlong_t)((load_end - load_start) / 1000000),
+	    (u_longlong_t)msp->ms_max_size,
+	    (u_longlong_t)msp->ms_max_size - max_size,
+	    (u_longlong_t)weight, (u_longlong_t)msp->ms_weight);
 
-	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
 	metaslab_verify_space(msp, spa_syncing_txg(spa));
 	mutex_exit(&msp->ms_sync_lock);
-
 	return (0);
 }
 
@@ -1778,8 +2476,42 @@
 	VERIFY(!msp->ms_loading);
 	ASSERT(!msp->ms_condensing);
 
+	/*
+	 * We set the loading flag BEFORE potentially dropping the lock to
+	 * wait for an ongoing flush (see ms_flushing below). This way other
+	 * threads know that there is already a thread that is loading this
+	 * metaslab.
+	 */
 	msp->ms_loading = B_TRUE;
+
+	/*
+	 * Wait for any in-progress flushing to finish as we drop the ms_lock
+	 * both here (during space_map_load()) and in metaslab_flush() (when
+	 * we flush our changes to the ms_sm).
+	 */
+	if (msp->ms_flushing)
+		metaslab_flush_wait(msp);
+
+	/*
+	 * In the possibility that we were waiting for the metaslab to be
+	 * flushed (where we temporarily dropped the ms_lock), ensure that
+	 * no one else loaded the metaslab somehow.
+	 */
+	ASSERT(!msp->ms_loaded);
+
+	/*
+	 * If we're loading a metaslab in the normal class, consider evicting
+	 * another one to keep our memory usage under the limit defined by the
+	 * zfs_metaslab_mem_limit tunable.
+	 */
+	if (spa_normal_class(msp->ms_group->mg_class->mc_spa) ==
+	    msp->ms_group->mg_class) {
+		metaslab_potentially_evict(msp->ms_group->mg_class);
+	}
+
 	int error = metaslab_load_impl(msp);
+
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	msp->ms_loading = B_FALSE;
 	cv_broadcast(&msp->ms_load_cv);
 
@@ -1791,14 +2523,46 @@
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
-	metaslab_verify_weight_and_frag(msp);
+	/*
+	 * This can happen if a metaslab is selected for eviction (in
+	 * metaslab_potentially_evict) and then unloaded during spa_sync (via
+	 * metaslab_class_evict_old).
+	 */
+	if (!msp->ms_loaded)
+		return;
 
 	range_tree_vacate(msp->ms_allocatable, NULL, NULL);
 	msp->ms_loaded = B_FALSE;
+	msp->ms_unload_time = gethrtime();
 
 	msp->ms_activation_weight = 0;
 	msp->ms_weight &= ~METASLAB_ACTIVE_MASK;
-	msp->ms_max_size = 0;
+
+	if (msp->ms_group != NULL) {
+		metaslab_class_t *mc = msp->ms_group->mg_class;
+		multilist_sublist_t *mls =
+		    multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
+		if (multilist_link_active(&msp->ms_class_txg_node))
+			multilist_sublist_remove(mls, msp);
+		multilist_sublist_unlock(mls);
+
+		spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+		zfs_dbgmsg("metaslab_unload: txg %llu, spa %s, vdev_id %llu, "
+		    "ms_id %llu, weight %llx, "
+		    "selected txg %llu (%llu ms ago), alloc_txg %llu, "
+		    "loaded %llu ms ago, max_size %llu",
+		    (u_longlong_t)spa_syncing_txg(spa), spa_name(spa),
+		    (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
+		    (u_longlong_t)msp->ms_id,
+		    (u_longlong_t)msp->ms_weight,
+		    (u_longlong_t)msp->ms_selected_txg,
+		    (u_longlong_t)(msp->ms_unload_time -
+		    msp->ms_selected_time) / 1000 / 1000,
+		    (u_longlong_t)msp->ms_alloc_txg,
+		    (u_longlong_t)(msp->ms_unload_time -
+		    msp->ms_load_time) / 1000 / 1000,
+		    (u_longlong_t)msp->ms_max_size);
+	}
 
 	/*
 	 * We explicitly recalculate the metaslab's weight based on its space
@@ -1806,7 +2570,7 @@
 	 * have their weights calculated from the space map histograms, while
 	 * loaded ones have it calculated from their in-core range tree
 	 * [see metaslab_load()]. This way, the weight reflects the information
-	 * available in-core, whether it is loaded or not
+	 * available in-core, whether it is loaded or not.
 	 *
 	 * If ms_group == NULL means that we came here from metaslab_fini(),
 	 * at which point it doesn't make sense for us to do the recalculation
@@ -1816,7 +2580,45 @@
 		metaslab_recalculate_weight_and_sort(msp);
 }
 
-static void
+/*
+ * We want to optimize the memory use of the per-metaslab range
+ * trees. To do this, we store the segments in the range trees in
+ * units of sectors, zero-indexing from the start of the metaslab. If
+ * the vdev_ms_shift - the vdev_ashift is less than 32, we can store
+ * the ranges using two uint32_ts, rather than two uint64_ts.
+ */
+range_seg_type_t
+metaslab_calculate_range_tree_type(vdev_t *vdev, metaslab_t *msp,
+    uint64_t *start, uint64_t *shift)
+{
+	if (vdev->vdev_ms_shift - vdev->vdev_ashift < 32 &&
+	    !zfs_metaslab_force_large_segs) {
+		*shift = vdev->vdev_ashift;
+		*start = msp->ms_start;
+		return (RANGE_SEG32);
+	} else {
+		*shift = 0;
+		*start = 0;
+		return (RANGE_SEG64);
+	}
+}
+
+void
+metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg)
+{
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	metaslab_class_t *mc = msp->ms_group->mg_class;
+	multilist_sublist_t *mls =
+	    multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
+	if (multilist_link_active(&msp->ms_class_txg_node))
+		multilist_sublist_remove(mls, msp);
+	msp->ms_selected_txg = txg;
+	msp->ms_selected_time = gethrtime();
+	multilist_sublist_insert_tail(mls, msp);
+	multilist_sublist_unlock(mls);
+}
+
+void
 metaslab_space_update(vdev_t *vd, metaslab_class_t *mc, int64_t alloc_delta,
     int64_t defer_delta, int64_t space_delta)
 {
@@ -1830,8 +2632,8 @@
 }
 
 int
-metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
-    metaslab_t **msp)
+metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object,
+    uint64_t txg, metaslab_t **msp)
 {
 	vdev_t *vd = mg->mg_vd;
 	spa_t *spa = vd->vdev_spa;
@@ -1843,6 +2645,8 @@
 	mutex_init(&ms->ms_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ms->ms_sync_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&ms->ms_load_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&ms->ms_flush_cv, NULL, CV_DEFAULT, NULL);
+	multilist_link_init(&ms->ms_class_txg_node);
 
 	ms->ms_id = id;
 	ms->ms_start = id << vd->vdev_ms_shift;
@@ -1850,9 +2654,14 @@
 	ms->ms_allocator = -1;
 	ms->ms_new = B_TRUE;
 
+	vdev_ops_t *ops = vd->vdev_ops;
+	if (ops->vdev_op_metaslab_init != NULL)
+		ops->vdev_op_metaslab_init(vd, &ms->ms_start, &ms->ms_size);
+
 	/*
 	 * We only open space map objects that already exist. All others
-	 * will be opened when we finally allocate an object for it.
+	 * will be opened when we finally allocate an object for it. For
+	 * readonly pools there is no need to open the space map object.
 	 *
 	 * Note:
 	 * When called from vdev_expand(), we can't call into the DMU as
@@ -1861,7 +2670,8 @@
 	 * that case, the object parameter is zero though, so we won't
 	 * call into the DMU.
 	 */
-	if (object != 0) {
+	if (object != 0 && !(spa->spa_mode == SPA_MODE_READ &&
+	    !spa->spa_read_spacemaps)) {
 		error = space_map_open(&ms->ms_sm, mos, object, ms->ms_start,
 		    ms->ms_size, vd->vdev_ashift);
 
@@ -1874,21 +2684,36 @@
 		ms->ms_allocated_space = space_map_allocated(ms->ms_sm);
 	}
 
-	/*
-	 * We create the ms_allocatable here, but we don't create the
-	 * other range trees until metaslab_sync_done().  This serves
-	 * two purposes: it allows metaslab_sync_done() to detect the
-	 * addition of new space; and for debugging, it ensures that
-	 * we'd data fault on any attempt to use this metaslab before
-	 * it's ready.
-	 */
-	ms->ms_allocatable = range_tree_create_impl(&rt_avl_ops,
-	    &ms->ms_allocatable_by_size, metaslab_rangesize_compare, 0);
+	uint64_t shift, start;
+	range_seg_type_t type =
+	    metaslab_calculate_range_tree_type(vd, ms, &start, &shift);
 
-	ms->ms_trim = range_tree_create(NULL, NULL);
+	ms->ms_allocatable = range_tree_create(NULL, type, NULL, start, shift);
+	for (int t = 0; t < TXG_SIZE; t++) {
+		ms->ms_allocating[t] = range_tree_create(NULL, type,
+		    NULL, start, shift);
+	}
+	ms->ms_freeing = range_tree_create(NULL, type, NULL, start, shift);
+	ms->ms_freed = range_tree_create(NULL, type, NULL, start, shift);
+	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+		ms->ms_defer[t] = range_tree_create(NULL, type, NULL,
+		    start, shift);
+	}
+	ms->ms_checkpointing =
+	    range_tree_create(NULL, type, NULL, start, shift);
+	ms->ms_unflushed_allocs =
+	    range_tree_create(NULL, type, NULL, start, shift);
+
+	metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
+	mrap->mra_bt = &ms->ms_unflushed_frees_by_size;
+	mrap->mra_floor_shift = metaslab_by_size_min_shift;
+	ms->ms_unflushed_frees = range_tree_create(&metaslab_rt_ops,
+	    type, mrap, start, shift);
+
+	ms->ms_trim = range_tree_create(NULL, type, NULL, start, shift);
 
 	metaslab_group_add(mg, ms);
-	metaslab_set_fragmentation(ms);
+	metaslab_set_fragmentation(ms, B_FALSE);
 
 	/*
 	 * If we're opening an existing pool (txg == 0) or creating
@@ -1905,17 +2730,6 @@
 		    metaslab_allocated_space(ms), 0, 0);
 	}
 
-	/*
-	 * If metaslab_debug_load is set and we're initializing a metaslab
-	 * that has an allocated space map object then load the space map
-	 * so that we can verify frees.
-	 */
-	if (metaslab_debug_load && ms->ms_sm != NULL) {
-		mutex_enter(&ms->ms_lock);
-		VERIFY0(metaslab_load(ms));
-		mutex_exit(&ms->ms_lock);
-	}
-
 	if (txg != 0) {
 		vdev_dirty(vd, 0, NULL, txg);
 		vdev_dirty(vd, VDD_METASLAB, ms, txg);
@@ -1926,20 +2740,61 @@
 	return (0);
 }
 
+static void
+metaslab_fini_flush_data(metaslab_t *msp)
+{
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+
+	if (metaslab_unflushed_txg(msp) == 0) {
+		ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL),
+		    ==, NULL);
+		return;
+	}
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+
+	mutex_enter(&spa->spa_flushed_ms_lock);
+	avl_remove(&spa->spa_metaslabs_by_flushed, msp);
+	mutex_exit(&spa->spa_flushed_ms_lock);
+
+	spa_log_sm_decrement_mscount(spa, metaslab_unflushed_txg(msp));
+	spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp),
+	    metaslab_unflushed_dirty(msp));
+}
+
+uint64_t
+metaslab_unflushed_changes_memused(metaslab_t *ms)
+{
+	return ((range_tree_numsegs(ms->ms_unflushed_allocs) +
+	    range_tree_numsegs(ms->ms_unflushed_frees)) *
+	    ms->ms_unflushed_allocs->rt_root.bt_elem_size);
+}
+
 void
 metaslab_fini(metaslab_t *msp)
 {
 	metaslab_group_t *mg = msp->ms_group;
 	vdev_t *vd = mg->mg_vd;
+	spa_t *spa = vd->vdev_spa;
+
+	metaslab_fini_flush_data(msp);
 
 	metaslab_group_remove(mg, msp);
 
 	mutex_enter(&msp->ms_lock);
 	VERIFY(msp->ms_group == NULL);
-	metaslab_space_update(vd, mg->mg_class,
-	    -metaslab_allocated_space(msp), 0, -msp->ms_size);
 
+	/*
+	 * If this metaslab hasn't been through metaslab_sync_done() yet its
+	 * space hasn't been accounted for in its vdev and doesn't need to be
+	 * subtracted.
+	 */
+	if (!msp->ms_new) {
+		metaslab_space_update(vd, mg->mg_class,
+		    -metaslab_allocated_space(msp), 0, -msp->ms_size);
+
+	}
 	space_map_close(msp->ms_sm);
+	msp->ms_sm = NULL;
 
 	metaslab_unload(msp);
 
@@ -1947,17 +2802,24 @@
 	range_tree_destroy(msp->ms_freeing);
 	range_tree_destroy(msp->ms_freed);
 
+	ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
+	    metaslab_unflushed_changes_memused(msp));
+	spa->spa_unflushed_stats.sus_memused -=
+	    metaslab_unflushed_changes_memused(msp);
+	range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
+	range_tree_destroy(msp->ms_unflushed_allocs);
+	range_tree_destroy(msp->ms_checkpointing);
+	range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
+	range_tree_destroy(msp->ms_unflushed_frees);
+
 	for (int t = 0; t < TXG_SIZE; t++) {
 		range_tree_destroy(msp->ms_allocating[t]);
 	}
-
 	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
 		range_tree_destroy(msp->ms_defer[t]);
 	}
 	ASSERT0(msp->ms_deferspace);
 
-	range_tree_destroy(msp->ms_checkpointing);
-
 	for (int t = 0; t < TXG_SIZE; t++)
 		ASSERT(!txg_list_member(&vd->vdev_ms_list, msp, t));
 
@@ -1966,6 +2828,7 @@
 
 	mutex_exit(&msp->ms_lock);
 	cv_destroy(&msp->ms_load_cv);
+	cv_destroy(&msp->ms_flush_cv);
 	mutex_destroy(&msp->ms_lock);
 	mutex_destroy(&msp->ms_sync_lock);
 	ASSERT3U(msp->ms_allocator, ==, -1);
@@ -2020,7 +2883,7 @@
  * value should be in the range [0, 100].
  */
 static void
-metaslab_set_fragmentation(metaslab_t *msp)
+metaslab_set_fragmentation(metaslab_t *msp, boolean_t nodirty)
 {
 	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
 	uint64_t fragmentation = 0;
@@ -2055,14 +2918,17 @@
 		 * be shutting down the pool. We don't want to dirty
 		 * any data past this point so skip setting the condense
 		 * flag. We can retry this action the next time the pool
-		 * is imported.
+		 * is imported. We also skip marking this metaslab for
+		 * condensing if the caller has explicitly set nodirty.
 		 */
-		if (spa_writeable(spa) && txg < spa_final_dirty_txg(spa)) {
+		if (!nodirty &&
+		    spa_writeable(spa) && txg < spa_final_dirty_txg(spa)) {
 			msp->ms_condense_wanted = B_TRUE;
 			vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
 			zfs_dbgmsg("txg %llu, requesting force condense: "
-			    "ms_id %llu, vdev_id %llu", txg, msp->ms_id,
-			    vd->vdev_id);
+			    "ms_id %llu, vdev_id %llu", (u_longlong_t)txg,
+			    (u_longlong_t)msp->ms_id,
+			    (u_longlong_t)vd->vdev_id);
 		}
 		msp->ms_fragmentation = ZFS_FRAG_INVALID;
 		return;
@@ -2105,7 +2971,6 @@
 	uint64_t weight, space;
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
-	ASSERT(!vd->vdev_removing);
 
 	/*
 	 * The baseline weight is the metaslab's free space.
@@ -2207,9 +3072,9 @@
 }
 
 /*
- * Calculate the weight based on the on-disk histogram. This should only
- * be called after a sync pass has completely finished since the on-disk
- * information is updated in metaslab_sync().
+ * Calculate the weight based on the on-disk histogram. Should be applied
+ * only to unloaded metaslabs  (i.e no incoming allocations) in-order to
+ * give results consistent with the on-disk state
  */
 static uint64_t
 metaslab_weight_from_spacemap(metaslab_t *msp)
@@ -2283,7 +3148,6 @@
 		}
 		WEIGHT_SET_ACTIVE(weight, 0);
 		ASSERT(!WEIGHT_IS_SPACEBASED(weight));
-
 		return (weight);
 	}
 
@@ -2317,17 +3181,26 @@
 
 /*
  * Determine if we should attempt to allocate from this metaslab. If the
- * metaslab has a maximum size then we can quickly determine if the desired
- * allocation size can be satisfied. Otherwise, if we're using segment-based
- * weighting then we can determine the maximum allocation that this metaslab
- * can accommodate based on the index encoded in the weight. If we're using
- * space-based weights then rely on the entire weight (excluding the weight
- * type bit).
+ * metaslab is loaded, then we can determine if the desired allocation
+ * can be satisfied by looking at the size of the maximum free segment
+ * on that metaslab. Otherwise, we make our decision based on the metaslab's
+ * weight. For segment-based weighting we can determine the maximum
+ * allocation based on the index encoded in its value. For space-based
+ * weights we rely on the entire weight (excluding the weight-type bit).
  */
-boolean_t
-metaslab_should_allocate(metaslab_t *msp, uint64_t asize)
+static boolean_t
+metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard)
 {
-	if (msp->ms_max_size != 0)
+	/*
+	 * If the metaslab is loaded, ms_max_size is definitive and we can use
+	 * the fast check. If it's not, the ms_max_size is a lower bound (once
+	 * set), and we should use the fast check as long as we're not in
+	 * try_hard and it's been less than zfs_metaslab_max_size_cache_sec
+	 * seconds since the metaslab was unloaded.
+	 */
+	if (msp->ms_loaded ||
+	    (msp->ms_max_size != 0 && !try_hard && gethrtime() <
+	    msp->ms_unload_time + SEC2NSEC(zfs_metaslab_max_size_cache_sec)))
 		return (msp->ms_max_size >= asize);
 
 	boolean_t should_allocate;
@@ -2347,8 +3220,9 @@
 
 	return (should_allocate);
 }
+
 static uint64_t
-metaslab_weight(metaslab_t *msp)
+metaslab_weight(metaslab_t *msp, boolean_t nodirty)
 {
 	vdev_t *vd = msp->ms_group->mg_vd;
 	spa_t *spa = vd->vdev_spa;
@@ -2356,24 +3230,24 @@
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
-	/*
-	 * If this vdev is in the process of being removed, there is nothing
-	 * for us to do here.
-	 */
-	if (vd->vdev_removing)
-		return (0);
-
-	metaslab_set_fragmentation(msp);
+	metaslab_set_fragmentation(msp, nodirty);
 
 	/*
-	 * Update the maximum size if the metaslab is loaded. This will
+	 * Update the maximum size. If the metaslab is loaded, this will
 	 * ensure that we get an accurate maximum size if newly freed space
-	 * has been added back into the free tree.
+	 * has been added back into the free tree. If the metaslab is
+	 * unloaded, we check if there's a larger free segment in the
+	 * unflushed frees. This is a lower bound on the largest allocatable
+	 * segment size. Coalescing of adjacent entries may reveal larger
+	 * allocatable segments, but we aren't aware of those until loading
+	 * the space map into a range tree.
 	 */
-	if (msp->ms_loaded)
-		msp->ms_max_size = metaslab_block_maxsize(msp);
-	else
-		ASSERT0(msp->ms_max_size);
+	if (msp->ms_loaded) {
+		msp->ms_max_size = metaslab_largest_allocatable(msp);
+	} else {
+		msp->ms_max_size = MAX(msp->ms_max_size,
+		    metaslab_largest_unflushed_free(msp));
+	}
 
 	/*
 	 * Segment-based weighting requires space map histogram support.
@@ -2397,35 +3271,46 @@
 	/* note: we preserve the mask (e.g. indication of primary, etc..) */
 	uint64_t was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
 	metaslab_group_sort(msp->ms_group, msp,
-	    metaslab_weight(msp) | was_active);
+	    metaslab_weight(msp, B_FALSE) | was_active);
 }
 
 static int
 metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
     int allocator, uint64_t activation_weight)
 {
+	metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
 	/*
 	 * If we're activating for the claim code, we don't want to actually
 	 * set the metaslab up for a specific allocator.
 	 */
-	if (activation_weight == METASLAB_WEIGHT_CLAIM)
+	if (activation_weight == METASLAB_WEIGHT_CLAIM) {
+		ASSERT0(msp->ms_activation_weight);
+		msp->ms_activation_weight = msp->ms_weight;
+		metaslab_group_sort(mg, msp, msp->ms_weight |
+		    activation_weight);
 		return (0);
+	}
 
-	metaslab_t **arr = (activation_weight == METASLAB_WEIGHT_PRIMARY ?
-	    mg->mg_primaries : mg->mg_secondaries);
+	metaslab_t **mspp = (activation_weight == METASLAB_WEIGHT_PRIMARY ?
+	    &mga->mga_primary : &mga->mga_secondary);
 
 	mutex_enter(&mg->mg_lock);
-	if (arr[allocator] != NULL) {
+	if (*mspp != NULL) {
 		mutex_exit(&mg->mg_lock);
 		return (EEXIST);
 	}
 
-	arr[allocator] = msp;
+	*mspp = msp;
 	ASSERT3S(msp->ms_allocator, ==, -1);
 	msp->ms_allocator = allocator;
 	msp->ms_primary = (activation_weight == METASLAB_WEIGHT_PRIMARY);
+
+	ASSERT0(msp->ms_activation_weight);
+	msp->ms_activation_weight = msp->ms_weight;
+	metaslab_group_sort_impl(mg, msp,
+	    msp->ms_weight | activation_weight);
 	mutex_exit(&mg->mg_lock);
 
 	return (0);
@@ -2502,11 +3387,6 @@
 		return (error);
 	}
 
-	ASSERT0(msp->ms_activation_weight);
-	msp->ms_activation_weight = msp->ms_weight;
-	metaslab_group_sort(msp->ms_group, msp,
-	    msp->ms_weight | activation_weight);
-
 	ASSERT(msp->ms_loaded);
 	ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
 
@@ -2530,14 +3410,15 @@
 	ASSERT3S(0, <=, msp->ms_allocator);
 	ASSERT3U(msp->ms_allocator, <, mg->mg_allocators);
 
+	metaslab_group_allocator_t *mga = &mg->mg_allocator[msp->ms_allocator];
 	if (msp->ms_primary) {
-		ASSERT3P(mg->mg_primaries[msp->ms_allocator], ==, msp);
+		ASSERT3P(mga->mga_primary, ==, msp);
 		ASSERT(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
-		mg->mg_primaries[msp->ms_allocator] = NULL;
+		mga->mga_primary = NULL;
 	} else {
-		ASSERT3P(mg->mg_secondaries[msp->ms_allocator], ==, msp);
+		ASSERT3P(mga->mga_secondary, ==, msp);
 		ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
-		mg->mg_secondaries[msp->ms_allocator] = NULL;
+		mga->mga_secondary = NULL;
 	}
 	msp->ms_allocator = -1;
 	metaslab_group_sort_impl(mg, msp, weight);
@@ -2547,7 +3428,7 @@
 static void
 metaslab_passivate(metaslab_t *msp, uint64_t weight)
 {
-	ASSERTV(uint64_t size = weight & ~METASLAB_WEIGHT_TYPE);
+	uint64_t size __maybe_unused = weight & ~METASLAB_WEIGHT_TYPE;
 
 	/*
 	 * If size < SPA_MINBLOCKSIZE, then we will not allocate from
@@ -2570,13 +3451,13 @@
  * we either fail an allocation attempt (similar to space-based metaslabs)
  * or have exhausted the free space in zfs_metaslab_switch_threshold
  * buckets since the metaslab was activated. This function checks to see
- * if we've exhaused the zfs_metaslab_switch_threshold buckets in the
+ * if we've exhausted the zfs_metaslab_switch_threshold buckets in the
  * metaslab and passivates it proactively. This will allow us to select a
  * metaslab with a larger contiguous region, if any, remaining within this
  * metaslab group. If we're in sync pass > 1, then we continue using this
  * metaslab so that we don't dirty more block and cause more sync passes.
  */
-void
+static void
 metaslab_segment_may_passivate(metaslab_t *msp)
 {
 	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
@@ -2601,14 +3482,15 @@
 metaslab_preload(void *arg)
 {
 	metaslab_t *msp = arg;
-	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	metaslab_class_t *mc = msp->ms_group->mg_class;
+	spa_t *spa = mc->mc_spa;
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 
 	ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
 
 	mutex_enter(&msp->ms_lock);
 	(void) metaslab_load(msp);
-	msp->ms_selected_txg = spa_syncing_txg(spa);
+	metaslab_set_selected_txg(msp, spa_syncing_txg(spa));
 	mutex_exit(&msp->ms_lock);
 	spl_fstrans_unmark(cookie);
 }
@@ -2651,18 +3533,19 @@
 }
 
 /*
- * Determine if the space map's on-disk footprint is past our tolerance
- * for inefficiency. We would like to use the following criteria to make
- * our decision:
+ * Determine if the space map's on-disk footprint is past our tolerance for
+ * inefficiency. We would like to use the following criteria to make our
+ * decision:
  *
- * 1. The size of the space map object should not dramatically increase as a
- * result of writing out the free space range tree.
+ * 1. Do not condense if the size of the space map object would dramatically
+ *    increase as a result of writing out the free space range tree.
  *
- * 2. The minimal on-disk space map representation is zfs_condense_pct/100
- * times the size than the free space range tree representation
- * (i.e. zfs_condense_pct = 110 and in-core = 1MB, minimal = 1.1MB).
+ * 2. Condense if the on on-disk space map representation is at least
+ *    zfs_condense_pct/100 times the size of the optimal representation
+ *    (i.e. zfs_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).
  *
- * 3. The on-disk size of the space map should actually decrease.
+ * 3. Do not condense if the on-disk size of the space map does not actually
+ *    decrease.
  *
  * Unfortunately, we cannot compute the on-disk size of the space map in this
  * context because we cannot accurately compute the effects of compression, etc.
@@ -2676,127 +3559,396 @@
 	space_map_t *sm = msp->ms_sm;
 	vdev_t *vd = msp->ms_group->mg_vd;
 	uint64_t vdev_blocksize = 1 << vd->vdev_ashift;
-	uint64_t current_txg = spa_syncing_txg(vd->vdev_spa);
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	ASSERT(msp->ms_loaded);
-
-	/*
-	 * Allocations and frees in early passes are generally more space
-	 * efficient (in terms of blocks described in space map entries)
-	 * than the ones in later passes (e.g. we don't compress after
-	 * sync pass 5) and condensing a metaslab multiple times in a txg
-	 * could degrade performance.
-	 *
-	 * Thus we prefer condensing each metaslab at most once every txg at
-	 * the earliest sync pass possible. If a metaslab is eligible for
-	 * condensing again after being considered for condensing within the
-	 * same txg, it will hopefully be dirty in the next txg where it will
-	 * be condensed at an earlier pass.
-	 */
-	if (msp->ms_condense_checked_txg == current_txg)
-		return (B_FALSE);
-	msp->ms_condense_checked_txg = current_txg;
+	ASSERT(sm != NULL);
+	ASSERT3U(spa_sync_pass(vd->vdev_spa), ==, 1);
 
 	/*
 	 * We always condense metaslabs that are empty and metaslabs for
 	 * which a condense request has been made.
 	 */
-	if (avl_is_empty(&msp->ms_allocatable_by_size) ||
+	if (range_tree_numsegs(msp->ms_allocatable) == 0 ||
 	    msp->ms_condense_wanted)
 		return (B_TRUE);
 
-	uint64_t object_size = space_map_length(msp->ms_sm);
+	uint64_t record_size = MAX(sm->sm_blksz, vdev_blocksize);
+	uint64_t object_size = space_map_length(sm);
 	uint64_t optimal_size = space_map_estimate_optimal_size(sm,
 	    msp->ms_allocatable, SM_NO_VDEVID);
 
-	dmu_object_info_t doi;
-	dmu_object_info_from_db(sm->sm_dbuf, &doi);
-	uint64_t record_size = MAX(doi.doi_data_block_size, vdev_blocksize);
-
 	return (object_size >= (optimal_size * zfs_condense_pct / 100) &&
 	    object_size > zfs_metaslab_condense_block_threshold * record_size);
 }
 
 /*
  * Condense the on-disk space map representation to its minimized form.
- * The minimized form consists of a small number of allocations followed by
- * the entries of the free range tree.
+ * The minimized form consists of a small number of allocations followed
+ * by the entries of the free range tree (ms_allocatable). The condensed
+ * spacemap contains all the entries of previous TXGs (including those in
+ * the pool-wide log spacemaps; thus this is effectively a superset of
+ * metaslab_flush()), but this TXG's entries still need to be written.
  */
 static void
-metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
+metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
 {
 	range_tree_t *condense_tree;
 	space_map_t *sm = msp->ms_sm;
+	uint64_t txg = dmu_tx_get_txg(tx);
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	ASSERT(msp->ms_loaded);
+	ASSERT(msp->ms_sm != NULL);
 
+	/*
+	 * In order to condense the space map, we need to change it so it
+	 * only describes which segments are currently allocated and free.
+	 *
+	 * All the current free space resides in the ms_allocatable, all
+	 * the ms_defer trees, and all the ms_allocating trees. We ignore
+	 * ms_freed because it is empty because we're in sync pass 1. We
+	 * ignore ms_freeing because these changes are not yet reflected
+	 * in the spacemap (they will be written later this txg).
+	 *
+	 * So to truncate the space map to represent all the entries of
+	 * previous TXGs we do the following:
+	 *
+	 * 1] We create a range tree (condense tree) that is 100% empty.
+	 * 2] We add to it all segments found in the ms_defer trees
+	 *    as those segments are marked as free in the original space
+	 *    map. We do the same with the ms_allocating trees for the same
+	 *    reason. Adding these segments should be a relatively
+	 *    inexpensive operation since we expect these trees to have a
+	 *    small number of nodes.
+	 * 3] We vacate any unflushed allocs, since they are not frees we
+	 *    need to add to the condense tree. Then we vacate any
+	 *    unflushed frees as they should already be part of ms_allocatable.
+	 * 4] At this point, we would ideally like to add all segments
+	 *    in the ms_allocatable tree from the condense tree. This way
+	 *    we would write all the entries of the condense tree as the
+	 *    condensed space map, which would only contain freed
+	 *    segments with everything else assumed to be allocated.
+	 *
+	 *    Doing so can be prohibitively expensive as ms_allocatable can
+	 *    be large, and therefore computationally expensive to add to
+	 *    the condense_tree. Instead we first sync out an entry marking
+	 *    everything as allocated, then the condense_tree and then the
+	 *    ms_allocatable, in the condensed space map. While this is not
+	 *    optimal, it is typically close to optimal and more importantly
+	 *    much cheaper to compute.
+	 *
+	 * 5] Finally, as both of the unflushed trees were written to our
+	 *    new and condensed metaslab space map, we basically flushed
+	 *    all the unflushed changes to disk, thus we call
+	 *    metaslab_flush_update().
+	 */
+	ASSERT3U(spa_sync_pass(spa), ==, 1);
+	ASSERT(range_tree_is_empty(msp->ms_freed)); /* since it is pass 1 */
 
 	zfs_dbgmsg("condensing: txg %llu, msp[%llu] %px, vdev id %llu, "
-	    "spa %s, smp size %llu, segments %lu, forcing condense=%s", txg,
-	    msp->ms_id, msp, msp->ms_group->mg_vd->vdev_id,
-	    msp->ms_group->mg_vd->vdev_spa->spa_name,
-	    space_map_length(msp->ms_sm),
-	    avl_numnodes(&msp->ms_allocatable->rt_root),
+	    "spa %s, smp size %llu, segments %llu, forcing condense=%s",
+	    (u_longlong_t)txg, (u_longlong_t)msp->ms_id, msp,
+	    (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
+	    spa->spa_name, (u_longlong_t)space_map_length(msp->ms_sm),
+	    (u_longlong_t)range_tree_numsegs(msp->ms_allocatable),
 	    msp->ms_condense_wanted ? "TRUE" : "FALSE");
 
 	msp->ms_condense_wanted = B_FALSE;
 
-	/*
-	 * Create an range tree that is 100% allocated. We remove segments
-	 * that have been freed in this txg, any deferred frees that exist,
-	 * and any allocation in the future. Removing segments should be
-	 * a relatively inexpensive operation since we expect these trees to
-	 * have a small number of nodes.
-	 */
-	condense_tree = range_tree_create(NULL, NULL);
-	range_tree_add(condense_tree, msp->ms_start, msp->ms_size);
+	range_seg_type_t type;
+	uint64_t shift, start;
+	type = metaslab_calculate_range_tree_type(msp->ms_group->mg_vd, msp,
+	    &start, &shift);
 
-	range_tree_walk(msp->ms_freeing, range_tree_remove, condense_tree);
-	range_tree_walk(msp->ms_freed, range_tree_remove, condense_tree);
+	condense_tree = range_tree_create(NULL, type, NULL, start, shift);
 
 	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
 		range_tree_walk(msp->ms_defer[t],
-		    range_tree_remove, condense_tree);
+		    range_tree_add, condense_tree);
 	}
 
-	for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+	for (int t = 0; t < TXG_CONCURRENT_STATES; t++) {
 		range_tree_walk(msp->ms_allocating[(txg + t) & TXG_MASK],
-		    range_tree_remove, condense_tree);
+		    range_tree_add, condense_tree);
 	}
 
+	ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
+	    metaslab_unflushed_changes_memused(msp));
+	spa->spa_unflushed_stats.sus_memused -=
+	    metaslab_unflushed_changes_memused(msp);
+	range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
+	range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
+
 	/*
-	 * We're about to drop the metaslab's lock thus allowing
-	 * other consumers to change it's content. Set the
-	 * metaslab's ms_condensing flag to ensure that
-	 * allocations on this metaslab do not occur while we're
-	 * in the middle of committing it to disk. This is only critical
-	 * for ms_allocatable as all other range trees use per txg
+	 * We're about to drop the metaslab's lock thus allowing other
+	 * consumers to change it's content. Set the metaslab's ms_condensing
+	 * flag to ensure that allocations on this metaslab do not occur
+	 * while we're in the middle of committing it to disk. This is only
+	 * critical for ms_allocatable as all other range trees use per TXG
 	 * views of their content.
 	 */
 	msp->ms_condensing = B_TRUE;
 
 	mutex_exit(&msp->ms_lock);
-	space_map_truncate(sm, zfs_metaslab_sm_blksz, tx);
+	uint64_t object = space_map_object(msp->ms_sm);
+	space_map_truncate(sm,
+	    spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP) ?
+	    zfs_metaslab_sm_blksz_with_log : zfs_metaslab_sm_blksz_no_log, tx);
 
 	/*
-	 * While we would ideally like to create a space map representation
-	 * that consists only of allocation records, doing so can be
-	 * prohibitively expensive because the in-core free tree can be
-	 * large, and therefore computationally expensive to subtract
-	 * from the condense_tree. Instead we sync out two trees, a cheap
-	 * allocation only tree followed by the in-core free tree. While not
-	 * optimal, this is typically close to optimal, and much cheaper to
-	 * compute.
+	 * space_map_truncate() may have reallocated the spacemap object.
+	 * If so, update the vdev_ms_array.
 	 */
-	space_map_write(sm, condense_tree, SM_ALLOC, SM_NO_VDEVID, tx);
+	if (space_map_object(msp->ms_sm) != object) {
+		object = space_map_object(msp->ms_sm);
+		dmu_write(spa->spa_meta_objset,
+		    msp->ms_group->mg_vd->vdev_ms_array, sizeof (uint64_t) *
+		    msp->ms_id, sizeof (uint64_t), &object, tx);
+	}
+
+	/*
+	 * Note:
+	 * When the log space map feature is enabled, each space map will
+	 * always have ALLOCS followed by FREES for each sync pass. This is
+	 * typically true even when the log space map feature is disabled,
+	 * except from the case where a metaslab goes through metaslab_sync()
+	 * and gets condensed. In that case the metaslab's space map will have
+	 * ALLOCS followed by FREES (due to condensing) followed by ALLOCS
+	 * followed by FREES (due to space_map_write() in metaslab_sync()) for
+	 * sync pass 1.
+	 */
+	range_tree_t *tmp_tree = range_tree_create(NULL, type, NULL, start,
+	    shift);
+	range_tree_add(tmp_tree, msp->ms_start, msp->ms_size);
+	space_map_write(sm, tmp_tree, SM_ALLOC, SM_NO_VDEVID, tx);
+	space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx);
+	space_map_write(sm, condense_tree, SM_FREE, SM_NO_VDEVID, tx);
+
 	range_tree_vacate(condense_tree, NULL, NULL);
 	range_tree_destroy(condense_tree);
-
-	space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx);
+	range_tree_vacate(tmp_tree, NULL, NULL);
+	range_tree_destroy(tmp_tree);
 	mutex_enter(&msp->ms_lock);
+
 	msp->ms_condensing = B_FALSE;
+	metaslab_flush_update(msp, tx);
+}
+
+static void
+metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx)
+{
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	ASSERT(spa_syncing_log_sm(spa) != NULL);
+	ASSERT(msp->ms_sm != NULL);
+	ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+	ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
+
+	mutex_enter(&spa->spa_flushed_ms_lock);
+	metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+	metaslab_set_unflushed_dirty(msp, B_TRUE);
+	avl_add(&spa->spa_metaslabs_by_flushed, msp);
+	mutex_exit(&spa->spa_flushed_ms_lock);
+
+	spa_log_sm_increment_current_mscount(spa);
+	spa_log_summary_add_flushed_metaslab(spa, B_TRUE);
+}
+
+void
+metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty)
+{
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	ASSERT(spa_syncing_log_sm(spa) != NULL);
+	ASSERT(msp->ms_sm != NULL);
+	ASSERT(metaslab_unflushed_txg(msp) != 0);
+	ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp);
+	ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+	ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
+
+	VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(spa));
+
+	/* update metaslab's position in our flushing tree */
+	uint64_t ms_prev_flushed_txg = metaslab_unflushed_txg(msp);
+	boolean_t ms_prev_flushed_dirty = metaslab_unflushed_dirty(msp);
+	mutex_enter(&spa->spa_flushed_ms_lock);
+	avl_remove(&spa->spa_metaslabs_by_flushed, msp);
+	metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+	metaslab_set_unflushed_dirty(msp, dirty);
+	avl_add(&spa->spa_metaslabs_by_flushed, msp);
+	mutex_exit(&spa->spa_flushed_ms_lock);
+
+	/* update metaslab counts of spa_log_sm_t nodes */
+	spa_log_sm_decrement_mscount(spa, ms_prev_flushed_txg);
+	spa_log_sm_increment_current_mscount(spa);
+
+	/* update log space map summary */
+	spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg,
+	    ms_prev_flushed_dirty);
+	spa_log_summary_add_flushed_metaslab(spa, dirty);
+
+	/* cleanup obsolete logs if any */
+	spa_cleanup_old_sm_logs(spa, tx);
+}
+
+/*
+ * Called when the metaslab has been flushed (its own spacemap now reflects
+ * all the contents of the pool-wide spacemap log). Updates the metaslab's
+ * metadata and any pool-wide related log space map data (e.g. summary,
+ * obsolete logs, etc..) to reflect that.
+ */
+static void
+metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+{
+	metaslab_group_t *mg = msp->ms_group;
+	spa_t *spa = mg->mg_vd->vdev_spa;
+
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+	ASSERT3U(spa_sync_pass(spa), ==, 1);
+
+	/*
+	 * Just because a metaslab got flushed, that doesn't mean that
+	 * it will pass through metaslab_sync_done(). Thus, make sure to
+	 * update ms_synced_length here in case it doesn't.
+	 */
+	msp->ms_synced_length = space_map_length(msp->ms_sm);
+
+	/*
+	 * We may end up here from metaslab_condense() without the
+	 * feature being active. In that case this is a no-op.
+	 */
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP) ||
+	    metaslab_unflushed_txg(msp) == 0)
+		return;
+
+	metaslab_unflushed_bump(msp, tx, B_FALSE);
+}
+
+boolean_t
+metaslab_flush(metaslab_t *msp, dmu_tx_t *tx)
+{
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	ASSERT3U(spa_sync_pass(spa), ==, 1);
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+
+	ASSERT(msp->ms_sm != NULL);
+	ASSERT(metaslab_unflushed_txg(msp) != 0);
+	ASSERT(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL) != NULL);
+
+	/*
+	 * There is nothing wrong with flushing the same metaslab twice, as
+	 * this codepath should work on that case. However, the current
+	 * flushing scheme makes sure to avoid this situation as we would be
+	 * making all these calls without having anything meaningful to write
+	 * to disk. We assert this behavior here.
+	 */
+	ASSERT3U(metaslab_unflushed_txg(msp), <, dmu_tx_get_txg(tx));
+
+	/*
+	 * We can not flush while loading, because then we would
+	 * not load the ms_unflushed_{allocs,frees}.
+	 */
+	if (msp->ms_loading)
+		return (B_FALSE);
+
+	metaslab_verify_space(msp, dmu_tx_get_txg(tx));
+	metaslab_verify_weight_and_frag(msp);
+
+	/*
+	 * Metaslab condensing is effectively flushing. Therefore if the
+	 * metaslab can be condensed we can just condense it instead of
+	 * flushing it.
+	 *
+	 * Note that metaslab_condense() does call metaslab_flush_update()
+	 * so we can just return immediately after condensing. We also
+	 * don't need to care about setting ms_flushing or broadcasting
+	 * ms_flush_cv, even if we temporarily drop the ms_lock in
+	 * metaslab_condense(), as the metaslab is already loaded.
+	 */
+	if (msp->ms_loaded && metaslab_should_condense(msp)) {
+		metaslab_group_t *mg = msp->ms_group;
+
+		/*
+		 * For all histogram operations below refer to the
+		 * comments of metaslab_sync() where we follow a
+		 * similar procedure.
+		 */
+		metaslab_group_histogram_verify(mg);
+		metaslab_class_histogram_verify(mg->mg_class);
+		metaslab_group_histogram_remove(mg, msp);
+
+		metaslab_condense(msp, tx);
+
+		space_map_histogram_clear(msp->ms_sm);
+		space_map_histogram_add(msp->ms_sm, msp->ms_allocatable, tx);
+		ASSERT(range_tree_is_empty(msp->ms_freed));
+		for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+			space_map_histogram_add(msp->ms_sm,
+			    msp->ms_defer[t], tx);
+		}
+		metaslab_aux_histograms_update(msp);
+
+		metaslab_group_histogram_add(mg, msp);
+		metaslab_group_histogram_verify(mg);
+		metaslab_class_histogram_verify(mg->mg_class);
+
+		metaslab_verify_space(msp, dmu_tx_get_txg(tx));
+
+		/*
+		 * Since we recreated the histogram (and potentially
+		 * the ms_sm too while condensing) ensure that the
+		 * weight is updated too because we are not guaranteed
+		 * that this metaslab is dirty and will go through
+		 * metaslab_sync_done().
+		 */
+		metaslab_recalculate_weight_and_sort(msp);
+		return (B_TRUE);
+	}
+
+	msp->ms_flushing = B_TRUE;
+	uint64_t sm_len_before = space_map_length(msp->ms_sm);
+
+	mutex_exit(&msp->ms_lock);
+	space_map_write(msp->ms_sm, msp->ms_unflushed_allocs, SM_ALLOC,
+	    SM_NO_VDEVID, tx);
+	space_map_write(msp->ms_sm, msp->ms_unflushed_frees, SM_FREE,
+	    SM_NO_VDEVID, tx);
+	mutex_enter(&msp->ms_lock);
+
+	uint64_t sm_len_after = space_map_length(msp->ms_sm);
+	if (zfs_flags & ZFS_DEBUG_LOG_SPACEMAP) {
+		zfs_dbgmsg("flushing: txg %llu, spa %s, vdev_id %llu, "
+		    "ms_id %llu, unflushed_allocs %llu, unflushed_frees %llu, "
+		    "appended %llu bytes", (u_longlong_t)dmu_tx_get_txg(tx),
+		    spa_name(spa),
+		    (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
+		    (u_longlong_t)msp->ms_id,
+		    (u_longlong_t)range_tree_space(msp->ms_unflushed_allocs),
+		    (u_longlong_t)range_tree_space(msp->ms_unflushed_frees),
+		    (u_longlong_t)(sm_len_after - sm_len_before));
+	}
+
+	ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
+	    metaslab_unflushed_changes_memused(msp));
+	spa->spa_unflushed_stats.sus_memused -=
+	    metaslab_unflushed_changes_memused(msp);
+	range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
+	range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
+
+	metaslab_verify_space(msp, dmu_tx_get_txg(tx));
+	metaslab_verify_weight_and_frag(msp);
+
+	metaslab_flush_update(msp, tx);
+
+	metaslab_verify_space(msp, dmu_tx_get_txg(tx));
+	metaslab_verify_weight_and_frag(msp);
+
+	msp->ms_flushing = B_FALSE;
+	cv_broadcast(&msp->ms_flush_cv);
+	return (B_TRUE);
 }
 
 /*
@@ -2811,24 +3963,21 @@
 	objset_t *mos = spa_meta_objset(spa);
 	range_tree_t *alloctree = msp->ms_allocating[txg & TXG_MASK];
 	dmu_tx_t *tx;
-	uint64_t object = space_map_object(msp->ms_sm);
 
 	ASSERT(!vd->vdev_ishole);
 
 	/*
 	 * This metaslab has just been added so there's no work to do now.
 	 */
-	if (msp->ms_freeing == NULL) {
-		ASSERT3P(alloctree, ==, NULL);
+	if (msp->ms_new) {
+		ASSERT0(range_tree_space(alloctree));
+		ASSERT0(range_tree_space(msp->ms_freeing));
+		ASSERT0(range_tree_space(msp->ms_freed));
+		ASSERT0(range_tree_space(msp->ms_checkpointing));
+		ASSERT0(range_tree_space(msp->ms_trim));
 		return;
 	}
 
-	ASSERT3P(alloctree, !=, NULL);
-	ASSERT3P(msp->ms_freeing, !=, NULL);
-	ASSERT3P(msp->ms_freed, !=, NULL);
-	ASSERT3P(msp->ms_checkpointing, !=, NULL);
-	ASSERT3P(msp->ms_trim, !=, NULL);
-
 	/*
 	 * Normally, we don't want to process a metaslab if there are no
 	 * allocations or frees to perform. However, if the metaslab is being
@@ -2848,7 +3997,7 @@
 		return;
 
 
-	VERIFY(txg <= spa_final_dirty_txg(spa));
+	VERIFY3U(txg, <=, spa_final_dirty_txg(spa));
 
 	/*
 	 * The only state that can actually be changing concurrently
@@ -2865,16 +4014,27 @@
 	 */
 	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
 
-	if (msp->ms_sm == NULL) {
-		uint64_t new_object;
+	/*
+	 * Generate a log space map if one doesn't exist already.
+	 */
+	spa_generate_syncing_log_sm(spa, tx);
 
-		new_object = space_map_alloc(mos, zfs_metaslab_sm_blksz, tx);
+	if (msp->ms_sm == NULL) {
+		uint64_t new_object = space_map_alloc(mos,
+		    spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP) ?
+		    zfs_metaslab_sm_blksz_with_log :
+		    zfs_metaslab_sm_blksz_no_log, tx);
 		VERIFY3U(new_object, !=, 0);
 
+		dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
+		    msp->ms_id, sizeof (uint64_t), &new_object, tx);
+
 		VERIFY0(space_map_open(&msp->ms_sm, mos, new_object,
 		    msp->ms_start, msp->ms_size, vd->vdev_ashift));
-
 		ASSERT(msp->ms_sm != NULL);
+
+		ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+		ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
 		ASSERT0(metaslab_allocated_space(msp));
 	}
 
@@ -2883,7 +4043,7 @@
 		ASSERT(spa_has_checkpoint(spa));
 
 		uint64_t new_object = space_map_alloc(mos,
-		    vdev_standard_sm_blksz, tx);
+		    zfs_vdev_standard_sm_blksz, tx);
 		VERIFY3U(new_object, !=, 0);
 
 		VERIFY0(space_map_open(&vd->vdev_checkpoint_sm,
@@ -2912,10 +4072,43 @@
 	metaslab_class_histogram_verify(mg->mg_class);
 	metaslab_group_histogram_remove(mg, msp);
 
-	if (msp->ms_loaded && metaslab_should_condense(msp)) {
-		metaslab_condense(msp, txg, tx);
+	if (spa->spa_sync_pass == 1 && msp->ms_loaded &&
+	    metaslab_should_condense(msp))
+		metaslab_condense(msp, tx);
+
+	/*
+	 * We'll be going to disk to sync our space accounting, thus we
+	 * drop the ms_lock during that time so allocations coming from
+	 * open-context (ZIL) for future TXGs do not block.
+	 */
+	mutex_exit(&msp->ms_lock);
+	space_map_t *log_sm = spa_syncing_log_sm(spa);
+	if (log_sm != NULL) {
+		ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP));
+		if (metaslab_unflushed_txg(msp) == 0)
+			metaslab_unflushed_add(msp, tx);
+		else if (!metaslab_unflushed_dirty(msp))
+			metaslab_unflushed_bump(msp, tx, B_TRUE);
+
+		space_map_write(log_sm, alloctree, SM_ALLOC,
+		    vd->vdev_id, tx);
+		space_map_write(log_sm, msp->ms_freeing, SM_FREE,
+		    vd->vdev_id, tx);
+		mutex_enter(&msp->ms_lock);
+
+		ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
+		    metaslab_unflushed_changes_memused(msp));
+		spa->spa_unflushed_stats.sus_memused -=
+		    metaslab_unflushed_changes_memused(msp);
+		range_tree_remove_xor_add(alloctree,
+		    msp->ms_unflushed_frees, msp->ms_unflushed_allocs);
+		range_tree_remove_xor_add(msp->ms_freeing,
+		    msp->ms_unflushed_allocs, msp->ms_unflushed_frees);
+		spa->spa_unflushed_stats.sus_memused +=
+		    metaslab_unflushed_changes_memused(msp);
 	} else {
-		mutex_exit(&msp->ms_lock);
+		ASSERT(!spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP));
+
 		space_map_write(msp->ms_sm, alloctree, SM_ALLOC,
 		    SM_NO_VDEVID, tx);
 		space_map_write(msp->ms_sm, msp->ms_freeing, SM_FREE,
@@ -2935,7 +4128,8 @@
 		/*
 		 * Since we are doing writes to disk and the ms_checkpointing
 		 * tree won't be changing during that time, we drop the
-		 * ms_lock while writing to the checkpoint space map.
+		 * ms_lock while writing to the checkpoint space map, for the
+		 * same reason mentioned above.
 		 */
 		mutex_exit(&msp->ms_lock);
 		space_map_write(vd->vdev_checkpoint_sm,
@@ -3003,6 +4197,10 @@
 	 * and instead will just swap the pointers for freeing and freed.
 	 * We can safely do this since the freed_tree is guaranteed to be
 	 * empty on the initial pass.
+	 *
+	 * Keep in mind that even if we are currently using a log spacemap
+	 * we want current frees to end up in the ms_allocatable (but not
+	 * get appended to the ms_sm) so their ranges can be reused as usual.
 	 */
 	if (spa_sync_pass(spa) == 1) {
 		range_tree_swap(&msp->ms_freeing, &msp->ms_freed);
@@ -3022,37 +4220,34 @@
 
 	mutex_exit(&msp->ms_lock);
 
-	if (object != space_map_object(msp->ms_sm)) {
-		object = space_map_object(msp->ms_sm);
-		dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
-		    msp->ms_id, sizeof (uint64_t), &object, tx);
-	}
+	/*
+	 * Verify that the space map object ID has been recorded in the
+	 * vdev_ms_array.
+	 */
+	uint64_t object;
+	VERIFY0(dmu_read(mos, vd->vdev_ms_array,
+	    msp->ms_id * sizeof (uint64_t), sizeof (uint64_t), &object, 0));
+	VERIFY3U(object, ==, space_map_object(msp->ms_sm));
+
 	mutex_exit(&msp->ms_sync_lock);
 	dmu_tx_commit(tx);
 }
 
-void
-metaslab_potentially_unload(metaslab_t *msp, uint64_t txg)
+static void
+metaslab_evict(metaslab_t *msp, uint64_t txg)
 {
-	/*
-	 * If the metaslab is loaded and we've not tried to load or allocate
-	 * from it in 'metaslab_unload_delay' txgs, then unload it.
-	 */
-	if (msp->ms_loaded &&
-	    msp->ms_disabled == 0 &&
-	    msp->ms_selected_txg + metaslab_unload_delay < txg) {
-		for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
-			VERIFY0(range_tree_space(
-			    msp->ms_allocating[(txg + t) & TXG_MASK]));
-		}
-		if (msp->ms_allocator != -1) {
-			metaslab_passivate(msp, msp->ms_weight &
-			    ~METASLAB_ACTIVE_MASK);
-		}
+	if (!msp->ms_loaded || msp->ms_disabled != 0)
+		return;
 
-		if (!metaslab_debug_unload)
-			metaslab_unload(msp);
+	for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+		VERIFY0(range_tree_space(
+		    msp->ms_allocating[(txg + t) & TXG_MASK]));
 	}
+	if (msp->ms_allocator != -1)
+		metaslab_passivate(msp, msp->ms_weight & ~METASLAB_ACTIVE_MASK);
+
+	if (!metaslab_debug_unload)
+		metaslab_unload(msp);
 }
 
 /*
@@ -3073,34 +4268,15 @@
 
 	mutex_enter(&msp->ms_lock);
 
-	/*
-	 * If this metaslab is just becoming available, initialize its
-	 * range trees and add its capacity to the vdev.
-	 */
-	if (msp->ms_freed == NULL) {
-		for (int t = 0; t < TXG_SIZE; t++) {
-			ASSERT(msp->ms_allocating[t] == NULL);
-
-			msp->ms_allocating[t] = range_tree_create(NULL, NULL);
-		}
-
-		ASSERT3P(msp->ms_freeing, ==, NULL);
-		msp->ms_freeing = range_tree_create(NULL, NULL);
-
-		ASSERT3P(msp->ms_freed, ==, NULL);
-		msp->ms_freed = range_tree_create(NULL, NULL);
-
-		for (int t = 0; t < TXG_DEFER_SIZE; t++) {
-			ASSERT(msp->ms_defer[t] == NULL);
-
-			msp->ms_defer[t] = range_tree_create(NULL, NULL);
-		}
-
-		ASSERT3P(msp->ms_checkpointing, ==, NULL);
-		msp->ms_checkpointing = range_tree_create(NULL, NULL);
-
+	if (msp->ms_new) {
+		/* this is a new metaslab, add its capacity to the vdev */
 		metaslab_space_update(vd, mg->mg_class, 0, 0, msp->ms_size);
+
+		/* there should be no allocations nor frees at this point */
+		VERIFY0(msp->ms_allocated_this_txg);
+		VERIFY0(range_tree_space(msp->ms_freed));
 	}
+
 	ASSERT0(range_tree_space(msp->ms_freeing));
 	ASSERT0(range_tree_space(msp->ms_checkpointing));
 
@@ -3115,21 +4291,28 @@
 	defer_delta = 0;
 	alloc_delta = msp->ms_allocated_this_txg -
 	    range_tree_space(msp->ms_freed);
+
 	if (defer_allowed) {
 		defer_delta = range_tree_space(msp->ms_freed) -
 		    range_tree_space(*defer_tree);
 	} else {
 		defer_delta -= range_tree_space(*defer_tree);
 	}
-
 	metaslab_space_update(vd, mg->mg_class, alloc_delta + defer_delta,
 	    defer_delta, 0);
 
-	/*
-	 * If there's a metaslab_load() in progress, wait for it to complete
-	 * so that we have a consistent view of the in-core space map.
-	 */
-	metaslab_load_wait(msp);
+	if (spa_syncing_log_sm(spa) == NULL) {
+		/*
+		 * If there's a metaslab_load() in progress and we don't have
+		 * a log space map, it means that we probably wrote to the
+		 * metaslab's space map. If this is the case, we need to
+		 * make sure that we wait for the load to complete so that we
+		 * have a consistent view at the in-core side of the metaslab.
+		 */
+		metaslab_load_wait(msp);
+	} else {
+		ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+	}
 
 	/*
 	 * When auto-trimming is enabled, free ranges which are added to
@@ -3196,7 +4379,7 @@
 	ASSERT0(range_tree_space(msp->ms_freeing));
 	ASSERT0(range_tree_space(msp->ms_freed));
 	ASSERT0(range_tree_space(msp->ms_checkpointing));
-
+	msp->ms_allocating_total -= msp->ms_allocated_this_txg;
 	msp->ms_allocated_this_txg = 0;
 	mutex_exit(&msp->ms_lock);
 }
@@ -3249,37 +4432,6 @@
  * Metaslab allocation tracing facility
  * ==========================================================================
  */
-#ifdef _METASLAB_TRACING
-kstat_t *metaslab_trace_ksp;
-kstat_named_t metaslab_trace_over_limit;
-
-void
-metaslab_alloc_trace_init(void)
-{
-	ASSERT(metaslab_alloc_trace_cache == NULL);
-	metaslab_alloc_trace_cache = kmem_cache_create(
-	    "metaslab_alloc_trace_cache", sizeof (metaslab_alloc_trace_t),
-	    0, NULL, NULL, NULL, NULL, NULL, 0);
-	metaslab_trace_ksp = kstat_create("zfs", 0, "metaslab_trace_stats",
-	    "misc", KSTAT_TYPE_NAMED, 1, KSTAT_FLAG_VIRTUAL);
-	if (metaslab_trace_ksp != NULL) {
-		metaslab_trace_ksp->ks_data = &metaslab_trace_over_limit;
-		kstat_named_init(&metaslab_trace_over_limit,
-		    "metaslab_trace_over_limit", KSTAT_DATA_UINT64);
-		kstat_install(metaslab_trace_ksp);
-	}
-}
-
-void
-metaslab_alloc_trace_fini(void)
-{
-	if (metaslab_trace_ksp != NULL) {
-		kstat_delete(metaslab_trace_ksp);
-		metaslab_trace_ksp = NULL;
-	}
-	kmem_cache_destroy(metaslab_alloc_trace_cache);
-	metaslab_alloc_trace_cache = NULL;
-}
 
 /*
  * Add an allocation trace element to the allocation tracing list.
@@ -3303,10 +4455,10 @@
 	 */
 	if (zal->zal_size == metaslab_trace_max_entries) {
 		metaslab_alloc_trace_t *mat_next;
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 		panic("too many entries in allocation list");
 #endif
-		atomic_inc_64(&metaslab_trace_over_limit.value.ui64);
+		METASLABSTAT_BUMP(metaslabstat_trace_over_limit);
 		zal->zal_size--;
 		mat_next = list_next(&zal->zal_list, list_head(&zal->zal_list));
 		list_remove(&zal->zal_list, mat_next);
@@ -3354,31 +4506,6 @@
 	list_destroy(&zal->zal_list);
 	zal->zal_size = 0;
 }
-#else
-
-#define	metaslab_trace_add(zal, mg, msp, psize, id, off, alloc)
-
-void
-metaslab_alloc_trace_init(void)
-{
-}
-
-void
-metaslab_alloc_trace_fini(void)
-{
-}
-
-void
-metaslab_trace_init(zio_alloc_list_t *zal)
-{
-}
-
-void
-metaslab_trace_fini(zio_alloc_list_t *zal)
-{
-}
-
-#endif /* _METASLAB_TRACING */
 
 /*
  * ==========================================================================
@@ -3398,22 +4525,25 @@
 	if (!mg->mg_class->mc_alloc_throttle_enabled)
 		return;
 
-	(void) zfs_refcount_add(&mg->mg_alloc_queue_depth[allocator], tag);
+	metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+	(void) zfs_refcount_add(&mga->mga_alloc_queue_depth, tag);
 }
 
 static void
 metaslab_group_increment_qdepth(metaslab_group_t *mg, int allocator)
 {
+	metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+	metaslab_class_allocator_t *mca =
+	    &mg->mg_class->mc_allocator[allocator];
 	uint64_t max = mg->mg_max_alloc_queue_depth;
-	uint64_t cur = mg->mg_cur_max_alloc_queue_depth[allocator];
+	uint64_t cur = mga->mga_cur_max_alloc_queue_depth;
 	while (cur < max) {
-		if (atomic_cas_64(&mg->mg_cur_max_alloc_queue_depth[allocator],
+		if (atomic_cas_64(&mga->mga_cur_max_alloc_queue_depth,
 		    cur, cur + 1) == cur) {
-			atomic_inc_64(
-			    &mg->mg_class->mc_alloc_max_slots[allocator]);
+			atomic_inc_64(&mca->mca_alloc_max_slots);
 			return;
 		}
-		cur = mg->mg_cur_max_alloc_queue_depth[allocator];
+		cur = mga->mga_cur_max_alloc_queue_depth;
 	}
 }
 
@@ -3429,7 +4559,8 @@
 	if (!mg->mg_class->mc_alloc_throttle_enabled)
 		return;
 
-	(void) zfs_refcount_remove(&mg->mg_alloc_queue_depth[allocator], tag);
+	metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+	(void) zfs_refcount_remove(&mga->mga_alloc_queue_depth, tag);
 	if (io_complete)
 		metaslab_group_increment_qdepth(mg, allocator);
 }
@@ -3445,8 +4576,8 @@
 	for (int d = 0; d < ndvas; d++) {
 		uint64_t vdev = DVA_GET_VDEV(&dva[d]);
 		metaslab_group_t *mg = vdev_lookup_top(spa, vdev)->vdev_mg;
-		VERIFY(zfs_refcount_not_held(
-		    &mg->mg_alloc_queue_depth[allocator], tag));
+		metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+		VERIFY(zfs_refcount_not_held(&mga->mga_alloc_queue_depth, tag));
 	}
 #endif
 }
@@ -3458,6 +4589,7 @@
 	range_tree_t *rt = msp->ms_allocatable;
 	metaslab_class_t *mc = msp->ms_group->mg_class;
 
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	VERIFY(!msp->ms_condensing);
 	VERIFY0(msp->ms_disabled);
 
@@ -3476,6 +4608,7 @@
 			vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
 
 		range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, size);
+		msp->ms_allocating_total += size;
 
 		/* Track the last successful allocation */
 		msp->ms_alloc_txg = txg;
@@ -3486,7 +4619,7 @@
 	 * Now that we've attempted the allocation we need to update the
 	 * metaslab's maximum block size since it may have changed.
 	 */
-	msp->ms_max_size = metaslab_block_maxsize(msp);
+	msp->ms_max_size = metaslab_largest_allocatable(msp);
 	return (start);
 }
 
@@ -3499,12 +4632,13 @@
  * have selected, we may not try the newly-activated metaslab, and instead
  * activate another metaslab.  This is not optimal, but generally does not cause
  * any problems (a possible exception being if every metaslab is completely full
- * except for the the newly-activated metaslab which we fail to examine).
+ * except for the newly-activated metaslab which we fail to examine).
  */
 static metaslab_t *
 find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight,
     dva_t *dva, int d, boolean_t want_unique, uint64_t asize, int allocator,
-    zio_alloc_list_t *zal, metaslab_t *search, boolean_t *was_active)
+    boolean_t try_hard, zio_alloc_list_t *zal, metaslab_t *search,
+    boolean_t *was_active)
 {
 	avl_index_t idx;
 	avl_tree_t *t = &mg->mg_metaslab_tree;
@@ -3512,9 +4646,17 @@
 	if (msp == NULL)
 		msp = avl_nearest(t, idx, AVL_AFTER);
 
+	int tries = 0;
 	for (; msp != NULL; msp = AVL_NEXT(t, msp)) {
 		int i;
-		if (!metaslab_should_allocate(msp, asize)) {
+
+		if (!try_hard && tries > zfs_metaslab_find_max_tries) {
+			METASLABSTAT_BUMP(metaslabstat_too_many_tries);
+			return (NULL);
+		}
+		tries++;
+
+		if (!metaslab_should_allocate(msp, asize, try_hard)) {
 			metaslab_trace_add(zal, mg, msp, asize, d,
 			    TRACE_TOO_SMALL, allocator);
 			continue;
@@ -3556,7 +4698,7 @@
 	return (msp);
 }
 
-void
+static void
 metaslab_active_mask_verify(metaslab_t *msp)
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
@@ -3591,11 +4733,10 @@
 	}
 }
 
-/* ARGSUSED */
 static uint64_t
 metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
-    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva,
-    int d, int allocator)
+    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva, int d,
+    int allocator, boolean_t try_hard)
 {
 	metaslab_t *msp = NULL;
 	uint64_t offset = -1ULL;
@@ -3618,6 +4759,7 @@
 	 */
 	if (mg->mg_ms_ready < mg->mg_allocators * 3)
 		allocator = 0;
+	metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
 
 	ASSERT3U(mg->mg_vd->vdev_ms_count, >=, 2);
 
@@ -3639,13 +4781,13 @@
 		mutex_enter(&mg->mg_lock);
 
 		if (activation_weight == METASLAB_WEIGHT_PRIMARY &&
-		    mg->mg_primaries[allocator] != NULL) {
-			msp = mg->mg_primaries[allocator];
+		    mga->mga_primary != NULL) {
+			msp = mga->mga_primary;
 
 			/*
 			 * Even though we don't hold the ms_lock for the
 			 * primary metaslab, those fields should not
-			 * change while we hold the mg_lock. Thus is is
+			 * change while we hold the mg_lock. Thus it is
 			 * safe to make assertions on them.
 			 */
 			ASSERT(msp->ms_primary);
@@ -3653,9 +4795,10 @@
 			ASSERT(msp->ms_loaded);
 
 			was_active = B_TRUE;
+			ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
 		} else if (activation_weight == METASLAB_WEIGHT_SECONDARY &&
-		    mg->mg_secondaries[allocator] != NULL) {
-			msp = mg->mg_secondaries[allocator];
+		    mga->mga_secondary != NULL) {
+			msp = mga->mga_secondary;
 
 			/*
 			 * See comment above about the similar assertions
@@ -3666,10 +4809,11 @@
 			ASSERT(msp->ms_loaded);
 
 			was_active = B_TRUE;
+			ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
 		} else {
 			msp = find_valid_metaslab(mg, activation_weight, dva, d,
-			    want_unique, asize, allocator, zal, search,
-			    &was_active);
+			    want_unique, asize, allocator, try_hard, zal,
+			    search, &was_active);
 		}
 
 		mutex_exit(&mg->mg_lock);
@@ -3696,7 +4840,7 @@
 		 * capable of handling our request. It's possible that
 		 * another thread may have changed the weight while we
 		 * were blocked on the metaslab lock. We check the
-		 * active status first to see if we need to reselect
+		 * active status first to see if we need to set_selected_txg
 		 * a new metaslab.
 		 */
 		if (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK)) {
@@ -3739,7 +4883,7 @@
 			continue;
 		}
 
-		msp->ms_selected_txg = txg;
+		metaslab_set_selected_txg(msp, txg);
 
 		int activation_error =
 		    metaslab_activate(msp, allocator, activation_weight);
@@ -3776,7 +4920,7 @@
 		 * can accurately determine if the allocation attempt should
 		 * proceed.
 		 */
-		if (!metaslab_should_allocate(msp, asize)) {
+		if (!metaslab_should_allocate(msp, asize, try_hard)) {
 			/* Passivate this metaslab and select a new one. */
 			metaslab_trace_add(zal, mg, msp, asize, d,
 			    TRACE_TOO_SMALL, allocator);
@@ -3854,7 +4998,7 @@
 		 */
 		uint64_t weight;
 		if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
-			weight = metaslab_block_maxsize(msp);
+			weight = metaslab_largest_allocatable(msp);
 			WEIGHT_SET_SPACEBASED(weight);
 		} else {
 			weight = metaslab_weight_from_range_tree(msp);
@@ -3886,7 +5030,7 @@
 		 * we may end up in an infinite loop retrying the same
 		 * metaslab.
 		 */
-		ASSERT(!metaslab_should_allocate(msp, asize));
+		ASSERT(!metaslab_should_allocate(msp, asize, try_hard));
 
 		mutex_exit(&msp->ms_lock);
 	}
@@ -3897,14 +5041,14 @@
 
 static uint64_t
 metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal,
-    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva,
-    int d, int allocator)
+    uint64_t asize, uint64_t txg, boolean_t want_unique, dva_t *dva, int d,
+    int allocator, boolean_t try_hard)
 {
 	uint64_t offset;
 	ASSERT(mg->mg_initialized);
 
 	offset = metaslab_group_alloc_normal(mg, zal, asize, txg, want_unique,
-	    dva, d, allocator);
+	    dva, d, allocator, try_hard);
 
 	mutex_enter(&mg->mg_lock);
 	if (offset == -1ULL) {
@@ -3939,6 +5083,7 @@
     dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags,
     zio_alloc_list_t *zal, int allocator)
 {
+	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
 	metaslab_group_t *mg, *fast_mg, *rotor;
 	vdev_t *vd;
 	boolean_t try_hard = B_FALSE;
@@ -3952,7 +5097,7 @@
 	 * damage can result in extremely long reconstruction times.  This
 	 * will also test spilling from special to normal.
 	 */
-	if (psize >= metaslab_force_ganging && (spa_get_random(100) < 3)) {
+	if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) {
 		metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
 		    allocator);
 		return (SET_ERROR(ENOSPC));
@@ -3960,7 +5105,7 @@
 
 	/*
 	 * Start at the rotor and loop through all mgs until we find something.
-	 * Note that there's no locking on mc_rotor or mc_aliquot because
+	 * Note that there's no locking on mca_rotor or mca_aliquot because
 	 * nothing actually breaks if we miss a few updates -- we just won't
 	 * allocate quite as evenly.  It all balances out over time.
 	 *
@@ -3990,29 +5135,29 @@
 		 * all else fails.
 		 */
 		if (vd != NULL && vd->vdev_mg != NULL) {
-			mg = vd->vdev_mg;
+			mg = vdev_get_mg(vd, mc);
 
 			if (flags & METASLAB_HINTBP_AVOID &&
 			    mg->mg_next != NULL)
 				mg = mg->mg_next;
 		} else {
-			mg = mc->mc_rotor;
+			mg = mca->mca_rotor;
 		}
 	} else if (d != 0) {
 		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
 		mg = vd->vdev_mg->mg_next;
 	} else if (flags & METASLAB_FASTWRITE) {
-		mg = fast_mg = mc->mc_rotor;
+		mg = fast_mg = mca->mca_rotor;
 
 		do {
 			if (fast_mg->mg_vd->vdev_pending_fastwrite <
 			    mg->mg_vd->vdev_pending_fastwrite)
 				mg = fast_mg;
-		} while ((fast_mg = fast_mg->mg_next) != mc->mc_rotor);
+		} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);
 
 	} else {
-		ASSERT(mc->mc_rotor != NULL);
-		mg = mc->mc_rotor;
+		ASSERT(mca->mca_rotor != NULL);
+		mg = mca->mca_rotor;
 	}
 
 	/*
@@ -4020,7 +5165,7 @@
 	 * metaslab group that has been passivated, just follow the rotor.
 	 */
 	if (mg->mg_class != mc || mg->mg_activation_count <= 0)
-		mg = mc->mc_rotor;
+		mg = mca->mca_rotor;
 
 	rotor = mg;
 top:
@@ -4062,12 +5207,11 @@
 		ASSERT(mg->mg_initialized);
 
 		/*
-		 * Avoid writing single-copy data to a failing,
+		 * Avoid writing single-copy data to an unhealthy,
 		 * non-redundant vdev, unless we've already tried all
 		 * other vdevs.
 		 */
-		if ((vd->vdev_stat.vs_write_errors > 0 ||
-		    vd->vdev_state < VDEV_STATE_HEALTHY) &&
+		if (vd->vdev_state < VDEV_STATE_HEALTHY &&
 		    d == 0 && !try_hard && vd->vdev_children == 0) {
 			metaslab_trace_add(zal, mg, NULL, psize, d,
 			    TRACE_VDEV_ERROR, allocator);
@@ -4081,12 +5225,12 @@
 
 		/*
 		 * If we don't need to try hard, then require that the
-		 * block be on an different metaslab from any other DVAs
+		 * block be on a different metaslab from any other DVAs
 		 * in this BP (unique=true).  If we are trying hard, then
 		 * allow any metaslab to be used (unique=false).
 		 */
 		uint64_t offset = metaslab_group_alloc(mg, zal, asize, txg,
-		    !try_hard, dva, d, allocator);
+		    !try_hard, dva, d, allocator, try_hard);
 
 		if (offset != -1ULL) {
 			/*
@@ -4098,7 +5242,7 @@
 			 * Bias is also used to compensate for unequally
 			 * sized vdevs so that space is allocated fairly.
 			 */
-			if (mc->mc_aliquot == 0 && metaslab_bias_enabled) {
+			if (mca->mca_aliquot == 0 && metaslab_bias_enabled) {
 				vdev_stat_t *vs = &vd->vdev_stat;
 				int64_t vs_free = vs->vs_space - vs->vs_alloc;
 				int64_t mc_free = mc->mc_space - mc->mc_alloc;
@@ -4136,10 +5280,10 @@
 			}
 
 			if ((flags & METASLAB_FASTWRITE) ||
-			    atomic_add_64_nv(&mc->mc_aliquot, asize) >=
+			    atomic_add_64_nv(&mca->mca_aliquot, asize) >=
 			    mg->mg_aliquot + mg->mg_bias) {
-				mc->mc_rotor = mg->mg_next;
-				mc->mc_aliquot = 0;
+				mca->mca_rotor = mg->mg_next;
+				mca->mca_aliquot = 0;
 			}
 
 			DVA_SET_VDEV(&dva[d], vd->vdev_id);
@@ -4156,14 +5300,17 @@
 			return (0);
 		}
 next:
-		mc->mc_rotor = mg->mg_next;
-		mc->mc_aliquot = 0;
+		mca->mca_rotor = mg->mg_next;
+		mca->mca_aliquot = 0;
 	} while ((mg = mg->mg_next) != rotor);
 
 	/*
-	 * If we haven't tried hard, do so now.
+	 * If we haven't tried hard, perhaps do so now.
 	 */
-	if (!try_hard) {
+	if (!try_hard && (zfs_metaslab_try_hard_before_gang ||
+	    GANG_ALLOCATION(flags) || (flags & METASLAB_ZIL) != 0 ||
+	    psize <= 1 << spa->spa_min_ashift)) {
+		METASLABSTAT_BUMP(metaslabstat_try_hard);
 		try_hard = B_TRUE;
 		goto top;
 	}
@@ -4210,11 +5357,11 @@
 	mutex_exit(&msp->ms_lock);
 }
 
-/* ARGSUSED */
 void
 metaslab_free_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
     uint64_t size, void *arg)
 {
+	(void) inner_offset;
 	boolean_t *checkpoint = arg;
 
 	ASSERT3P(checkpoint, !=, NULL);
@@ -4263,7 +5410,7 @@
 	void *rbca_cb_arg;
 } remap_blkptr_cb_arg_t;
 
-void
+static void
 remap_blkptr_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
     uint64_t size, void *arg)
 {
@@ -4423,13 +5570,14 @@
 	ASSERT3P(vd->vdev_indirect_mapping, ==, NULL);
 
 	if (DVA_GET_GANG(dva))
-		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+		size = vdev_gang_header_asize(vd);
 
 	msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
 
 	mutex_enter(&msp->ms_lock);
 	range_tree_remove(msp->ms_allocating[txg & TXG_MASK],
 	    offset, size);
+	msp->ms_allocating_total -= size;
 
 	VERIFY(!msp->ms_condensing);
 	VERIFY3U(offset, >=, msp->ms_start);
@@ -4457,7 +5605,7 @@
 	ASSERT3U(spa_config_held(spa, SCL_ALL, RW_READER), !=, 0);
 
 	if (DVA_GET_GANG(dva)) {
-		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+		size = vdev_gang_header_asize(vd);
 	}
 
 	metaslab_free_impl(vd, offset, size, checkpoint);
@@ -4474,48 +5622,40 @@
 metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, int allocator,
     zio_t *zio, int flags)
 {
-	uint64_t available_slots = 0;
-	boolean_t slot_reserved = B_FALSE;
-	uint64_t max = mc->mc_alloc_max_slots[allocator];
+	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
+	uint64_t max = mca->mca_alloc_max_slots;
 
 	ASSERT(mc->mc_alloc_throttle_enabled);
-	mutex_enter(&mc->mc_lock);
-
-	uint64_t reserved_slots =
-	    zfs_refcount_count(&mc->mc_alloc_slots[allocator]);
-	if (reserved_slots < max)
-		available_slots = max - reserved_slots;
-
-	if (slots <= available_slots || GANG_ALLOCATION(flags) ||
-	    flags & METASLAB_MUST_RESERVE) {
+	if (GANG_ALLOCATION(flags) || (flags & METASLAB_MUST_RESERVE) ||
+	    zfs_refcount_count(&mca->mca_alloc_slots) + slots <= max) {
 		/*
+		 * The potential race between _count() and _add() is covered
+		 * by the allocator lock in most cases, or irrelevant due to
+		 * GANG_ALLOCATION() or METASLAB_MUST_RESERVE set in others.
+		 * But even if we assume some other non-existing scenario, the
+		 * worst that can happen is few more I/Os get to allocation
+		 * earlier, that is not a problem.
+		 *
 		 * We reserve the slots individually so that we can unreserve
 		 * them individually when an I/O completes.
 		 */
-		for (int d = 0; d < slots; d++) {
-			reserved_slots =
-			    zfs_refcount_add(&mc->mc_alloc_slots[allocator],
-			    zio);
-		}
+		for (int d = 0; d < slots; d++)
+			zfs_refcount_add(&mca->mca_alloc_slots, zio);
 		zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
-		slot_reserved = B_TRUE;
+		return (B_TRUE);
 	}
-
-	mutex_exit(&mc->mc_lock);
-	return (slot_reserved);
+	return (B_FALSE);
 }
 
 void
 metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots,
     int allocator, zio_t *zio)
 {
+	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
+
 	ASSERT(mc->mc_alloc_throttle_enabled);
-	mutex_enter(&mc->mc_lock);
-	for (int d = 0; d < slots; d++) {
-		(void) zfs_refcount_remove(&mc->mc_alloc_slots[allocator],
-		    zio);
-	}
-	mutex_exit(&mc->mc_lock);
+	for (int d = 0; d < slots; d++)
+		zfs_refcount_remove(&mca->mca_alloc_slots, zio);
 }
 
 static int
@@ -4560,11 +5700,21 @@
 	range_tree_remove(msp->ms_allocatable, offset, size);
 	range_tree_clear(msp->ms_trim, offset, size);
 
-	if (spa_writeable(spa)) {	/* don't dirty if we're zdb(1M) */
+	if (spa_writeable(spa)) {	/* don't dirty if we're zdb(8) */
+		metaslab_class_t *mc = msp->ms_group->mg_class;
+		multilist_sublist_t *mls =
+		    multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
+		if (!multilist_link_active(&msp->ms_class_txg_node)) {
+			msp->ms_selected_txg = txg;
+			multilist_sublist_insert_head(mls, msp);
+		}
+		multilist_sublist_unlock(mls);
+
 		if (range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
 			vdev_dirty(vd, VDD_METASLAB, msp, txg);
 		range_tree_add(msp->ms_allocating[txg & TXG_MASK],
 		    offset, size);
+		msp->ms_allocating_total += size;
 	}
 
 	mutex_exit(&msp->ms_lock);
@@ -4577,11 +5727,11 @@
 	int		mcca_error;
 } metaslab_claim_cb_arg_t;
 
-/* ARGSUSED */
 static void
 metaslab_claim_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
     uint64_t size, void *arg)
 {
+	(void) inner_offset;
 	metaslab_claim_cb_arg_t *mcca_arg = arg;
 
 	if (mcca_arg->mcca_error == 0) {
@@ -4597,7 +5747,7 @@
 		metaslab_claim_cb_arg_t arg;
 
 		/*
-		 * Only zdb(1M) can claim on indirect vdevs.  This is used
+		 * Only zdb(8) can claim on indirect vdevs.  This is used
 		 * to detect leaks of mapped space (that are not accounted
 		 * for in the obsolete counts, spacemap, or bpobj).
 		 */
@@ -4639,7 +5789,7 @@
 	ASSERT(DVA_IS_VALID(dva));
 
 	if (DVA_GET_GANG(dva))
-		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+		size = vdev_gang_header_asize(vd);
 
 	return (metaslab_claim_impl(vd, offset, size, txg));
 }
@@ -4658,7 +5808,8 @@
 
 	spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
 
-	if (mc->mc_rotor == NULL) {	/* no vdevs in this class */
+	if (mc->mc_allocator[allocator].mca_rotor == NULL) {
+		/* no vdevs in this class */
 		spa_config_exit(spa, SCL_ALLOC, FTAG);
 		return (SET_ERROR(ENOSPC));
 	}
@@ -4689,7 +5840,6 @@
 			metaslab_group_alloc_increment(spa,
 			    DVA_GET_VDEV(&dva[d]), zio, flags, allocator);
 		}
-
 	}
 	ASSERT(error == 0);
 	ASSERT(BP_GET_NDVAS(bp) == ndvas);
@@ -4833,11 +5983,12 @@
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 }
 
-/* ARGSUSED */
 static void
 metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
     uint64_t size, void *arg)
 {
+	(void) inner, (void) arg;
+
 	if (vd->vdev_ops == &vdev_indirect_ops)
 		return;
 
@@ -4848,7 +5999,7 @@
 metaslab_check_free_impl(vdev_t *vd, uint64_t offset, uint64_t size)
 {
 	metaslab_t *msp;
-	ASSERTV(spa_t *spa = vd->vdev_spa);
+	spa_t *spa __maybe_unused = vd->vdev_spa;
 
 	if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
 		return;
@@ -4871,12 +6022,23 @@
 		    offset, size);
 	}
 
-	range_tree_verify_not_present(msp->ms_trim, offset, size);
+	/*
+	 * Check all segments that currently exist in the freeing pipeline.
+	 *
+	 * It would intuitively make sense to also check the current allocating
+	 * tree since metaslab_unalloc_dva() exists for extents that are
+	 * allocated and freed in the same sync pass within the same txg.
+	 * Unfortunately there are places (e.g. the ZIL) where we allocate a
+	 * segment but then we free part of it within the same txg
+	 * [see zil_sync()]. Thus, we don't call range_tree_verify() in the
+	 * current allocating tree.
+	 */
 	range_tree_verify_not_present(msp->ms_freeing, offset, size);
 	range_tree_verify_not_present(msp->ms_checkpointing, offset, size);
 	range_tree_verify_not_present(msp->ms_freed, offset, size);
 	for (int j = 0; j < TXG_DEFER_SIZE; j++)
 		range_tree_verify_not_present(msp->ms_defer[j], offset, size);
+	range_tree_verify_not_present(msp->ms_trim, offset, size);
 	mutex_exit(&msp->ms_lock);
 }
 
@@ -4894,7 +6056,7 @@
 		uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]);
 
 		if (DVA_GET_GANG(&bp->blk_dva[i]))
-			size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+			size = vdev_gang_header_asize(vd);
 
 		ASSERT3P(vd, !=, NULL);
 
@@ -4963,7 +6125,7 @@
 }
 
 void
-metaslab_enable(metaslab_t *msp, boolean_t sync)
+metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload)
 {
 	metaslab_group_t *mg = msp->ms_group;
 	spa_t *spa = mg->mg_vd->vdev_spa;
@@ -4981,72 +6143,135 @@
 	if (--msp->ms_disabled == 0) {
 		mg->mg_ms_disabled--;
 		cv_broadcast(&mg->mg_ms_disabled_cv);
+		if (unload)
+			metaslab_unload(msp);
 	}
 	mutex_exit(&msp->ms_lock);
 	mutex_exit(&mg->mg_ms_disabled_lock);
 }
 
-#if defined(_KERNEL)
+void
+metaslab_set_unflushed_dirty(metaslab_t *ms, boolean_t dirty)
+{
+	ms->ms_unflushed_dirty = dirty;
+}
+
+static void
+metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
+{
+	vdev_t *vd = ms->ms_group->mg_vd;
+	spa_t *spa = vd->vdev_spa;
+	objset_t *mos = spa_meta_objset(spa);
+
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+
+	metaslab_unflushed_phys_t entry = {
+		.msp_unflushed_txg = metaslab_unflushed_txg(ms),
+	};
+	uint64_t entry_size = sizeof (entry);
+	uint64_t entry_offset = ms->ms_id * entry_size;
+
+	uint64_t object = 0;
+	int err = zap_lookup(mos, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, sizeof (uint64_t), 1,
+	    &object);
+	if (err == ENOENT) {
+		object = dmu_object_alloc(mos, DMU_OTN_UINT64_METADATA,
+		    SPA_OLD_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
+		VERIFY0(zap_add(mos, vd->vdev_top_zap,
+		    VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, sizeof (uint64_t), 1,
+		    &object, tx));
+	} else {
+		VERIFY0(err);
+	}
+
+	dmu_write(spa_meta_objset(spa), object, entry_offset, entry_size,
+	    &entry, tx);
+}
+
+void
+metaslab_set_unflushed_txg(metaslab_t *ms, uint64_t txg, dmu_tx_t *tx)
+{
+	ms->ms_unflushed_txg = txg;
+	metaslab_update_ondisk_flush_data(ms, tx);
+}
+
+boolean_t
+metaslab_unflushed_dirty(metaslab_t *ms)
+{
+	return (ms->ms_unflushed_dirty);
+}
+
+uint64_t
+metaslab_unflushed_txg(metaslab_t *ms)
+{
+	return (ms->ms_unflushed_txg);
+}
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, aliquot, ULONG, ZMOD_RW,
+	"Allocation granularity (a.k.a. stripe size)");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_load, INT, ZMOD_RW,
+	"Load all metaslabs when pool is first opened");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_unload, INT, ZMOD_RW,
+	"Prevent metaslabs from being unloaded");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW,
+	"Preload potential metaslabs during reassessment");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, INT, ZMOD_RW,
+	"Delay in txgs after metaslab was last used before unloading");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay_ms, INT, ZMOD_RW,
+	"Delay in milliseconds after metaslab was last used before unloading");
+
 /* BEGIN CSTYLED */
-module_param(metaslab_aliquot, ulong, 0644);
-MODULE_PARM_DESC(metaslab_aliquot,
-	"allocation granularity (a.k.a. stripe size)");
+ZFS_MODULE_PARAM(zfs_mg, zfs_mg_, noalloc_threshold, INT, ZMOD_RW,
+	"Percentage of metaslab group size that should be free to make it "
+	"eligible for allocation");
 
-module_param(metaslab_debug_load, int, 0644);
-MODULE_PARM_DESC(metaslab_debug_load,
-	"load all metaslabs when pool is first opened");
+ZFS_MODULE_PARAM(zfs_mg, zfs_mg_, fragmentation_threshold, INT, ZMOD_RW,
+	"Percentage of metaslab group size that should be considered eligible "
+	"for allocations unless all metaslab groups within the metaslab class "
+	"have also crossed this threshold");
 
-module_param(metaslab_debug_unload, int, 0644);
-MODULE_PARM_DESC(metaslab_debug_unload,
-	"prevent metaslabs from being unloaded");
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, fragmentation_threshold, INT,
+	 ZMOD_RW, "Fragmentation for metaslab to allow allocation");
 
-module_param(metaslab_preload_enabled, int, 0644);
-MODULE_PARM_DESC(metaslab_preload_enabled,
-	"preload potential metaslabs during reassessment");
-
-module_param(zfs_mg_noalloc_threshold, int, 0644);
-MODULE_PARM_DESC(zfs_mg_noalloc_threshold,
-	"percentage of free space for metaslab group to allow allocation");
-
-module_param(zfs_mg_fragmentation_threshold, int, 0644);
-MODULE_PARM_DESC(zfs_mg_fragmentation_threshold,
-	"fragmentation for metaslab group to allow allocation");
-
-module_param(zfs_metaslab_fragmentation_threshold, int, 0644);
-MODULE_PARM_DESC(zfs_metaslab_fragmentation_threshold,
-	"fragmentation for metaslab to allow allocation");
-
-module_param(metaslab_fragmentation_factor_enabled, int, 0644);
-MODULE_PARM_DESC(metaslab_fragmentation_factor_enabled,
-	"use the fragmentation metric to prefer less fragmented metaslabs");
-
-module_param(metaslab_lba_weighting_enabled, int, 0644);
-MODULE_PARM_DESC(metaslab_lba_weighting_enabled,
-	"prefer metaslabs with lower LBAs");
-
-module_param(metaslab_bias_enabled, int, 0644);
-MODULE_PARM_DESC(metaslab_bias_enabled,
-	"enable metaslab group biasing");
-
-module_param(zfs_metaslab_segment_weight_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_metaslab_segment_weight_enabled,
-	"enable segment-based metaslab selection");
-
-module_param(zfs_metaslab_switch_threshold, int, 0644);
-MODULE_PARM_DESC(zfs_metaslab_switch_threshold,
-	"segment-based metaslab selection maximum buckets before switching");
-
-module_param(metaslab_force_ganging, ulong, 0644);
-MODULE_PARM_DESC(metaslab_force_ganging,
-	"blocks larger than this size are forced to be gang blocks");
-
-module_param(metaslab_df_max_search, int, 0644);
-MODULE_PARM_DESC(metaslab_df_max_search,
-	"max distance (bytes) to search forward before using size tree");
-
-module_param(metaslab_df_use_largest_segment, int, 0644);
-MODULE_PARM_DESC(metaslab_df_use_largest_segment,
-	"when looking in size tree, use largest segment instead of exact fit");
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, fragmentation_factor_enabled, INT, ZMOD_RW,
+	"Use the fragmentation metric to prefer less fragmented metaslabs");
 /* END CSTYLED */
 
-#endif
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, lba_weighting_enabled, INT, ZMOD_RW,
+	"Prefer metaslabs with lower LBAs");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, bias_enabled, INT, ZMOD_RW,
+	"Enable metaslab group biasing");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, segment_weight_enabled, INT,
+	ZMOD_RW, "Enable segment-based metaslab selection");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
+	"Segment-based metaslab selection maximum buckets before switching");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, ULONG, ZMOD_RW,
+	"Blocks larger than this size are forced to be gang blocks");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, INT, ZMOD_RW,
+	"Max distance (bytes) to search forward before using size tree");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW,
+	"When looking in size tree, use largest segment instead of exact fit");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, ULONG,
+	ZMOD_RW, "How long to trust the cached max chunk size of a metaslab");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, mem_limit, INT, ZMOD_RW,
+	"Percentage of memory that can be used to store metaslab range trees");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT,
+	ZMOD_RW, "Try hard to allocate before ganging");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, INT, ZMOD_RW,
+	"Normally only consider this many of the best metaslabs in each vdev");

diff --git a/zfs/module/zfs/mmp.c b/zfs/module/zfs/mmp.c
index 3f9941b..139bb0a 100644
--- a/zfs/module/zfs/mmp.c
+++ b/zfs/module/zfs/mmp.c

@@ -307,8 +307,17 @@
 		if (leaf == NULL)
 			leaf = list_head(&spa->spa_leaf_list);
 
-		if (!vdev_writeable(leaf)) {
+		/*
+		 * We skip unwritable, offline, detached, and dRAID spare
+		 * devices as they are either not legal targets or the write
+		 * may fail or not be seen by other hosts.  Skipped dRAID
+		 * spares can never be written so the fail mask is not set.
+		 */
+		if (!vdev_writeable(leaf) || leaf->vdev_offline ||
+		    leaf->vdev_detached) {
 			fail_mask |= MMP_FAIL_NOT_WRITABLE;
+		} else if (leaf->vdev_ops == &vdev_draid_spare_ops) {
+			continue;
 		} else if (leaf->vdev_mmp_pending != 0) {
 			fail_mask |= MMP_FAIL_WRITE_PENDING;
 		} else {
@@ -435,7 +444,7 @@
 	uint64_t offset;
 
 	hrtime_t lock_acquire_time = gethrtime();
-	spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
+	spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER);
 	lock_acquire_time = gethrtime() - lock_acquire_time;
 	if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
 		zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "
@@ -476,8 +485,9 @@
 	if (mmp->mmp_skip_error != 0) {
 		mmp->mmp_skip_error = 0;
 		zfs_dbgmsg("MMP write after skipping due to unavailable "
-		    "leaves, pool '%s' gethrtime %llu leaf %#llu",
-		    spa_name(spa), gethrtime(), vd->vdev_guid);
+		    "leaves, pool '%s' gethrtime %llu leaf %llu",
+		    spa_name(spa), (u_longlong_t)gethrtime(),
+		    (u_longlong_t)vd->vdev_guid);
 	}
 
 	if (mmp->mmp_zio_root == NULL)
@@ -514,9 +524,9 @@
 	mutex_exit(&mmp->mmp_io_lock);
 
 	offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) -
-	    MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL));
+	    MMP_BLOCKS_PER_LABEL + random_in_range(MMP_BLOCKS_PER_LABEL));
 
-	label = spa_get_random(VDEV_LABELS);
+	label = random_in_range(VDEV_LABELS);
 	vdev_label_write(zio, vd, label, ub_abd, offset,
 	    VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp,
 	    flags | ZIO_FLAG_DONT_PROPAGATE);
@@ -608,10 +618,11 @@
 			    "mmp_interval %llu last_mmp_fail_intervals %u "
 			    "mmp_fail_intervals %u mmp_fail_ns %llu "
 			    "skip_wait %d leaves %d next_time %llu",
-			    spa_name(spa), gethrtime(), last_mmp_interval,
-			    mmp_interval, last_mmp_fail_intervals,
-			    mmp_fail_intervals, mmp_fail_ns, skip_wait, leaves,
-			    next_time);
+			    spa_name(spa), (u_longlong_t)gethrtime(),
+			    (u_longlong_t)last_mmp_interval,
+			    (u_longlong_t)mmp_interval, last_mmp_fail_intervals,
+			    mmp_fail_intervals, (u_longlong_t)mmp_fail_ns,
+			    skip_wait, leaves, (u_longlong_t)next_time);
 		}
 
 		/*
@@ -624,8 +635,9 @@
 			zfs_dbgmsg("MMP state change pool '%s': gethrtime %llu "
 			    "last_spa_multihost %u multihost %u "
 			    "last_spa_suspended %u suspended %u",
-			    spa_name(spa), last_spa_multihost, multihost,
-			    last_spa_suspended, suspended);
+			    spa_name(spa), (u_longlong_t)gethrtime(),
+			    last_spa_multihost, multihost, last_spa_suspended,
+			    suspended);
 			mutex_enter(&mmp->mmp_io_lock);
 			mmp->mmp_last_write = gethrtime();
 			mmp->mmp_delay = mmp_interval;
@@ -675,15 +687,14 @@
 		}
 
 		CALLB_CPR_SAFE_BEGIN(&cpr);
-		(void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv,
+		(void) cv_timedwait_idle_hires(&mmp->mmp_thread_cv,
 		    &mmp->mmp_thread_lock, next_time, USEC2NSEC(100),
 		    CALLOUT_FLAG_ABSOLUTE);
 		CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
 	}
 
 	/* Outstanding writes are allowed to complete. */
-	if (mmp->mmp_zio_root)
-		zio_wait(mmp->mmp_zio_root);
+	zio_wait(mmp->mmp_zio_root);
 
 	mmp->mmp_zio_root = NULL;
 	mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
@@ -720,36 +731,14 @@
 	mutex_exit(&spa_namespace_lock);
 }
 
-#if defined(_KERNEL)
-#include <linux/mod_compat.h>
-
-static int
-param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-
-	ret = param_set_ulong(val, kp);
-	if (ret < 0)
-		return (ret);
-
-	if (spa_mode_global != 0)
-		mmp_signal_all_threads();
-
-	return (ret);
-}
-
 /* BEGIN CSTYLED */
-module_param(zfs_multihost_fail_intervals, uint, 0644);
-MODULE_PARM_DESC(zfs_multihost_fail_intervals,
+ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval,
+	param_set_multihost_interval, param_get_ulong, ZMOD_RW,
+	"Milliseconds between mmp writes to each leaf");
+/* END CSTYLED */
+
+ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, fail_intervals, UINT, ZMOD_RW,
 	"Max allowed period without a successful mmp write");
 
-module_param_call(zfs_multihost_interval, param_set_multihost_interval,
-    param_get_ulong, &zfs_multihost_interval, 0644);
-MODULE_PARM_DESC(zfs_multihost_interval,
-	"Milliseconds between mmp writes to each leaf");
-
-module_param(zfs_multihost_import_intervals, uint, 0644);
-MODULE_PARM_DESC(zfs_multihost_import_intervals,
+ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, import_intervals, UINT, ZMOD_RW,
 	"Number of zfs_multihost_interval periods to wait for activity");
-/* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/multilist.c b/zfs/module/zfs/multilist.c
index b74ee0f..8bbc9b3 100644
--- a/zfs/module/zfs/multilist.c
+++ b/zfs/module/zfs/multilist.c

@@ -18,10 +18,7 @@
 
 #include <sys/zfs_context.h>
 #include <sys/multilist.h>
-#include <sys/trace_multilist.h>
-
-/* needed for spa_get_random() */
-#include <sys/spa.h>
+#include <sys/trace_zfs.h>
 
 /*
  * This overrides the number of sublists in each multilist_t, which defaults
@@ -33,7 +30,7 @@
  * Given the object contained on the list, return a pointer to the
  * object's multilist_node_t structure it contains.
  */
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 static multilist_node_t *
 multilist_d2l(multilist_t *ml, void *obj)
 {
@@ -68,8 +65,8 @@
  *     requirement, but a general rule of thumb in order to garner the
  *     best multi-threaded performance out of the data structure.
  */
-static multilist_t *
-multilist_create_impl(size_t size, size_t offset,
+static void
+multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
     unsigned int num, multilist_sublist_index_func_t *index_func)
 {
 	ASSERT3U(size, >, 0);
@@ -77,7 +74,6 @@
 	ASSERT3U(num, >, 0);
 	ASSERT3P(index_func, !=, NULL);
 
-	multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
 	ml->ml_offset = offset;
 	ml->ml_num_sublists = num;
 	ml->ml_index_func = index_func;
@@ -92,16 +88,18 @@
 		mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
 		list_create(&mls->mls_list, size, offset);
 	}
-	return (ml);
 }
 
 /*
- * Allocate a new multilist, using the default number of sublists
- * (the number of CPUs, or at least 4, or the tunable
- * zfs_multilist_num_sublists).
+ * Allocate a new multilist, using the default number of sublists (the number
+ * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
+ * that the multilists do not expand if more CPUs are hot-added. In that case,
+ * we will have less fanout than boot_ncpus, but we don't want to always
+ * reserve the RAM necessary to create the extra slots for additional CPUs up
+ * front, and dynamically adding them is a complex task.
  */
-multilist_t *
-multilist_create(size_t size, size_t offset,
+void
+multilist_create(multilist_t *ml, size_t size, size_t offset,
     multilist_sublist_index_func_t *index_func)
 {
 	int num_sublists;
@@ -112,7 +110,7 @@
 		num_sublists = MAX(boot_ncpus, 4);
 	}
 
-	return (multilist_create_impl(size, offset, num_sublists, index_func));
+	multilist_create_impl(ml, size, offset, num_sublists, index_func);
 }
 
 /*
@@ -138,7 +136,7 @@
 
 	ml->ml_num_sublists = 0;
 	ml->ml_offset = 0;
-	kmem_free(ml, sizeof (multilist_t));
+	ml->ml_sublists = NULL;
 }
 
 /*
@@ -274,7 +272,7 @@
 unsigned int
 multilist_get_random_index(multilist_t *ml)
 {
-	return (spa_get_random(ml->ml_num_sublists));
+	return (random_in_range(ml->ml_num_sublists));
 }
 
 /* Lock and return the sublist specified at the given index */
@@ -425,13 +423,7 @@
 	return (list_link_active(link));
 }
 
-#if defined(_KERNEL)
-
 /* BEGIN CSTYLED */
-
-module_param(zfs_multilist_num_sublists, int, 0644);
-MODULE_PARM_DESC(zfs_multilist_num_sublists,
+ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW,
 	"Number of sublists used in each multilist");
-
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/objlist.c b/zfs/module/zfs/objlist.c
new file mode 100644
index 0000000..c80bab2
--- /dev/null
+++ b/zfs/module/zfs/objlist.c

@@ -0,0 +1,84 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#include	<sys/objlist.h>
+#include	<sys/zfs_context.h>
+
+objlist_t *
+objlist_create(void)
+{
+	objlist_t *list = kmem_alloc(sizeof (*list), KM_SLEEP);
+	list_create(&list->ol_list, sizeof (objlist_node_t),
+	    offsetof(objlist_node_t, on_node));
+	list->ol_last_lookup = 0;
+	return (list);
+}
+
+void
+objlist_destroy(objlist_t *list)
+{
+	for (objlist_node_t *n = list_remove_head(&list->ol_list);
+	    n != NULL; n = list_remove_head(&list->ol_list)) {
+		kmem_free(n, sizeof (*n));
+	}
+	list_destroy(&list->ol_list);
+	kmem_free(list, sizeof (*list));
+}
+
+/*
+ * This function looks through the objlist to see if the specified object number
+ * is contained in the objlist.  In the process, it will remove all object
+ * numbers in the list that are smaller than the specified object number.  Thus,
+ * any lookup of an object number smaller than a previously looked up object
+ * number will always return false; therefore, all lookups should be done in
+ * ascending order.
+ */
+boolean_t
+objlist_exists(objlist_t *list, uint64_t object)
+{
+	objlist_node_t *node = list_head(&list->ol_list);
+	ASSERT3U(object, >=, list->ol_last_lookup);
+	list->ol_last_lookup = object;
+	while (node != NULL && node->on_object < object) {
+		VERIFY3P(node, ==, list_remove_head(&list->ol_list));
+		kmem_free(node, sizeof (*node));
+		node = list_head(&list->ol_list);
+	}
+	return (node != NULL && node->on_object == object);
+}
+
+/*
+ * The objlist is a list of object numbers stored in ascending order.  However,
+ * the insertion of new object numbers does not seek out the correct location to
+ * store a new object number; instead, it appends it to the list for simplicity.
+ * Thus, any users must take care to only insert new object numbers in ascending
+ * order.
+ */
+void
+objlist_insert(objlist_t *list, uint64_t object)
+{
+	objlist_node_t *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
+	node->on_object = object;
+#ifdef ZFS_DEBUG
+	objlist_node_t *last_object = list_tail(&list->ol_list);
+	uint64_t last_objnum = (last_object != NULL ? last_object->on_object :
+	    0);
+	ASSERT3U(node->on_object, >, last_objnum);
+#endif
+	list_insert_tail(&list->ol_list, node);
+}

diff --git a/zfs/module/zfs/pathname.c b/zfs/module/zfs/pathname.c
index 4766762..84ab7b7 100644
--- a/zfs/module/zfs/pathname.c
+++ b/zfs/module/zfs/pathname.c

@@ -73,10 +73,6 @@
 {
 	pnp->pn_buf = kmem_alloc(sz, KM_SLEEP);
 	pnp->pn_bufsize = sz;
-#if 0 /* unused in ZoL */
-	pnp->pn_path = pnp->pn_buf;
-	pnp->pn_pathlen = 0;
-#endif
 }
 
 /*
@@ -89,8 +85,4 @@
 	kmem_free(pnp->pn_buf, pnp->pn_bufsize);
 	pnp->pn_buf = NULL;
 	pnp->pn_bufsize = 0;
-#if 0 /* unused in ZoL */
-	pnp->pn_path = NULL;
-	pnp->pn_pathlen = 0;
-#endif
 }

diff --git a/zfs/module/zfs/policy.c b/zfs/module/zfs/policy.c
deleted file mode 100644
index 7f9456a..0000000
--- a/zfs/module/zfs/policy.c
+++ /dev/null

@@ -1,355 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Joyent, Inc. All rights reserved.
- * Copyright (C) 2016 Lawrence Livermore National Security, LLC.
- *
- * For Linux the vast majority of this enforcement is already handled via
- * the standard Linux VFS permission checks.  However certain administrative
- * commands which bypass the standard mechanisms may need to make use of
- * this functionality.
- */
-
-#include <sys/policy.h>
-#include <linux/security.h>
-#include <linux/vfs_compat.h>
-
-/*
- * The passed credentials cannot be directly verified because Linux only
- * provides and interface to check the *current* process credentials.  In
- * order to handle this the capable() test is only run when the passed
- * credentials match the current process credentials or the kcred.  In
- * all other cases this function must fail and return the passed err.
- */
-static int
-priv_policy_ns(const cred_t *cr, int capability, boolean_t all, int err,
-    struct user_namespace *ns)
-{
-	ASSERT3S(all, ==, B_FALSE);
-
-	if (cr != CRED() && (cr != kcred))
-		return (err);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_NS_CAPABLE)
-	if (!(ns ? ns_capable(ns, capability) : capable(capability)))
-#else
-	if (!capable(capability))
-#endif
-		return (err);
-
-	return (0);
-}
-
-static int
-priv_policy(const cred_t *cr, int capability, boolean_t all, int err)
-{
-	return (priv_policy_ns(cr, capability, all, err, NULL));
-}
-
-static int
-priv_policy_user(const cred_t *cr, int capability, boolean_t all, int err)
-{
-	/*
-	 * All priv_policy_user checks are preceded by kuid/kgid_has_mapping()
-	 * checks. If we cannot do them, we shouldn't be using ns_capable()
-	 * since we don't know whether the affected files are valid in our
-	 * namespace. Note that kuid_has_mapping() came after cred->user_ns, so
-	 * we shouldn't need to re-check for HAVE_CRED_USER_NS
-	 */
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	return (priv_policy_ns(cr, capability, all, err, cr->user_ns));
-#else
-	return (priv_policy_ns(cr, capability, all, err, NULL));
-#endif
-}
-
-/*
- * Checks for operations that are either client-only or are used by
- * both clients and servers.
- */
-int
-secpolicy_nfs(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
-}
-
-/*
- * Catch all system configuration.
- */
-int
-secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
-}
-
-/*
- * Like secpolicy_vnode_access() but we get the actual wanted mode and the
- * current mode of the file, not the missing bits.
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner,
-    mode_t curmode, mode_t wantmode)
-{
-	return (0);
-}
-
-/*
- * This is a special routine for ZFS; it is used to determine whether
- * any of the privileges in effect allow any form of access to the
- * file.  There's no reason to audit this or any reason to record
- * this.  More work is needed to do the "KPLD" stuff.
- */
-int
-secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-	if (zpl_inode_owner_or_capable(ip))
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	if (priv_policy_user(cr, CAP_DAC_OVERRIDE, B_FALSE, EPERM) == 0)
-		return (0);
-
-	if (priv_policy_user(cr, CAP_DAC_READ_SEARCH, B_FALSE, EPERM) == 0)
-		return (0);
-
-	return (EPERM);
-}
-
-/*
- * Determine if subject can chown owner of a file.
- */
-int
-secpolicy_vnode_chown(const cred_t *cr, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
-}
-
-/*
- * Determine if subject can change group ownership of a file.
- */
-int
-secpolicy_vnode_create_gid(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SETGID, B_FALSE, EPERM));
-}
-
-/*
- * Policy determines whether we can remove an entry from a directory,
- * regardless of permission bits.
- */
-int
-secpolicy_vnode_remove(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
-}
-
-/*
- * Determine that subject can modify the mode of a file.  allzone privilege
- * needed when modifying root owned object.
- */
-int
-secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
-}
-
-/*
- * Are we allowed to retain the set-uid/set-gid bits when
- * changing ownership or when writing to a file?
- * "issuid" should be true when set-uid; only in that case
- * root ownership is checked (setgid is assumed).
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot)
-{
-	return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
-}
-
-/*
- * Determine that subject can set the file setgid flag.
- */
-int
-secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
-{
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
-		return (EPERM);
-#endif
-	if (crgetfsgid(cr) != gid && !groupmember(gid, cr))
-		return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
-
-	return (0);
-}
-
-/*
- * Determine if the subject can inject faults in the ZFS fault injection
- * framework.  Requires all privileges.
- */
-int
-secpolicy_zinject(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
-}
-
-/*
- * Determine if the subject has permission to manipulate ZFS datasets
- * (not pools).  Equivalent to the SYS_MOUNT privilege.
- */
-int
-secpolicy_zfs(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
-}
-
-void
-secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
-{
-	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
-	    secpolicy_vnode_setid_retain(cr,
-	    (vap->va_mode & S_ISUID) != 0 &&
-	    (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
-		vap->va_mask |= AT_MODE;
-		vap->va_mode &= ~(S_ISUID|S_ISGID);
-	}
-}
-
-/*
- * Determine that subject can set the file setid flags.
- */
-static int
-secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
-}
-
-/*
- * Determine that subject can make a file a "sticky".
- *
- * Enforced in the Linux VFS.
- */
-static int
-secpolicy_vnode_stky_modify(const cred_t *cr)
-{
-	return (0);
-}
-
-int
-secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
-    const vattr_t *ovap, cred_t *cr)
-{
-	int error;
-
-	if ((vap->va_mode & S_ISUID) != 0 &&
-	    (error = secpolicy_vnode_setid_modify(cr,
-	    ovap->va_uid)) != 0) {
-		return (error);
-	}
-
-	/*
-	 * Check privilege if attempting to set the
-	 * sticky bit on a non-directory.
-	 */
-	if (!S_ISDIR(ip->i_mode) && (vap->va_mode & S_ISVTX) != 0 &&
-	    secpolicy_vnode_stky_modify(cr) != 0) {
-		vap->va_mode &= ~S_ISVTX;
-	}
-
-	/*
-	 * Check for privilege if attempting to set the
-	 * group-id bit.
-	 */
-	if ((vap->va_mode & S_ISGID) != 0 &&
-	    secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
-		vap->va_mode &= ~S_ISGID;
-	}
-
-	return (0);
-}
-
-/*
- * Check privileges for setting xvattr attributes
- */
-int
-secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype)
-{
-	return (secpolicy_vnode_chown(cr, owner));
-}
-
-/*
- * Check privileges for setattr attributes.
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_vnode_setattr(cred_t *cr, struct inode *ip, struct vattr *vap,
-    const struct vattr *ovap, int flags,
-    int unlocked_access(void *, int, cred_t *), void *node)
-{
-	return (0);
-}
-
-/*
- * Check privileges for links.
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_basic_link(const cred_t *cr)
-{
-	return (0);
-}

diff --git a/zfs/module/zfs/qat.c b/zfs/module/zfs/qat.c
deleted file mode 100644
index a6f024c..0000000
--- a/zfs/module/zfs/qat.c
+++ /dev/null

@@ -1,105 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <sys/zfs_context.h>
-#include "qat.h"
-
-qat_stats_t qat_stats = {
-	{ "comp_requests",			KSTAT_DATA_UINT64 },
-	{ "comp_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "comp_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "decomp_requests",			KSTAT_DATA_UINT64 },
-	{ "decomp_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "decomp_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "dc_fails",				KSTAT_DATA_UINT64 },
-	{ "encrypt_requests",			KSTAT_DATA_UINT64 },
-	{ "encrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "encrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "decrypt_requests",			KSTAT_DATA_UINT64 },
-	{ "decrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "decrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "crypt_fails",			KSTAT_DATA_UINT64 },
-	{ "cksum_requests",			KSTAT_DATA_UINT64 },
-	{ "cksum_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "cksum_fails",			KSTAT_DATA_UINT64 },
-};
-
-static kstat_t *qat_ksp = NULL;
-
-CpaStatus
-qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
-{
-	*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
-	if (*pp_mem_addr == NULL)
-		return (CPA_STATUS_RESOURCE);
-	return (CPA_STATUS_SUCCESS);
-}
-
-void
-qat_mem_free_contig(void **pp_mem_addr)
-{
-	if (*pp_mem_addr != NULL) {
-		kfree(*pp_mem_addr);
-		*pp_mem_addr = NULL;
-	}
-}
-
-int
-qat_init(void)
-{
-	qat_ksp = kstat_create("zfs", 0, "qat", "misc",
-	    KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-	if (qat_ksp != NULL) {
-		qat_ksp->ks_data = &qat_stats;
-		kstat_install(qat_ksp);
-	}
-
-	/*
-	 * Just set the disable flag when qat init failed, qat can be
-	 * turned on again in post-process after zfs module is loaded, e.g.:
-	 * echo 0 > /sys/module/zfs/parameters/zfs_qat_compress_disable
-	 */
-	if (qat_dc_init() != 0)
-		zfs_qat_compress_disable = 1;
-
-	if (qat_cy_init() != 0) {
-		zfs_qat_checksum_disable = 1;
-		zfs_qat_encrypt_disable = 1;
-	}
-
-	return (0);
-}
-
-void
-qat_fini(void)
-{
-	if (qat_ksp != NULL) {
-		kstat_delete(qat_ksp);
-		qat_ksp = NULL;
-	}
-
-	qat_cy_fini();
-	qat_dc_fini();
-}
-
-#endif

diff --git a/zfs/module/zfs/qat_compress.c b/zfs/module/zfs/qat_compress.c
deleted file mode 100644
index 16649d6..0000000
--- a/zfs/module/zfs/qat_compress.c
+++ /dev/null

@@ -1,570 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/completion.h>
-#include <linux/mod_compat.h>
-#include <sys/zfs_context.h>
-#include <sys/byteorder.h>
-#include <sys/zio.h>
-#include "qat.h"
-
-/*
- * Max instances in a QAT device, each instance is a channel to submit
- * jobs to QAT hardware, this is only for pre-allocating instance and
- * session arrays; the actual number of instances are defined in the
- * QAT driver's configuration file.
- */
-#define	QAT_DC_MAX_INSTANCES	48
-
-/*
- * ZLIB head and foot size
- */
-#define	ZLIB_HEAD_SZ		2
-#define	ZLIB_FOOT_SZ		4
-
-static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
-static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
-static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
-static Cpa16U num_inst = 0;
-static Cpa32U inst_num = 0;
-static boolean_t qat_dc_init_done = B_FALSE;
-int zfs_qat_compress_disable = 0;
-
-boolean_t
-qat_dc_use_accel(size_t s_len)
-{
-	return (!zfs_qat_compress_disable &&
-	    qat_dc_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
-}
-
-static void
-qat_dc_callback(void *p_callback, CpaStatus status)
-{
-	if (p_callback != NULL)
-		complete((struct completion *)p_callback);
-}
-
-static void
-qat_dc_clean(void)
-{
-	Cpa16U buff_num = 0;
-	Cpa16U num_inter_buff_lists = 0;
-
-	for (Cpa16U i = 0; i < num_inst; i++) {
-		cpaDcStopInstance(dc_inst_handles[i]);
-		QAT_PHYS_CONTIG_FREE(session_handles[i]);
-		/* free intermediate buffers  */
-		if (buffer_array[i] != NULL) {
-			cpaDcGetNumIntermediateBuffers(
-			    dc_inst_handles[i], &num_inter_buff_lists);
-			for (buff_num = 0; buff_num < num_inter_buff_lists;
-			    buff_num++) {
-				CpaBufferList *buffer_inter =
-				    buffer_array[i][buff_num];
-				if (buffer_inter->pBuffers) {
-					QAT_PHYS_CONTIG_FREE(
-					    buffer_inter->pBuffers->pData);
-					QAT_PHYS_CONTIG_FREE(
-					    buffer_inter->pBuffers);
-				}
-				QAT_PHYS_CONTIG_FREE(
-				    buffer_inter->pPrivateMetaData);
-				QAT_PHYS_CONTIG_FREE(buffer_inter);
-			}
-		}
-	}
-
-	num_inst = 0;
-	qat_dc_init_done = B_FALSE;
-}
-
-int
-qat_dc_init(void)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U sess_size = 0;
-	Cpa32U ctx_size = 0;
-	Cpa16U num_inter_buff_lists = 0;
-	Cpa16U buff_num = 0;
-	Cpa32U buff_meta_size = 0;
-	CpaDcSessionSetupData sd = {0};
-
-	if (qat_dc_init_done)
-		return (0);
-
-	status = cpaDcGetNumInstances(&num_inst);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	/* if the user has configured no QAT compression units just return */
-	if (num_inst == 0)
-		return (0);
-
-	if (num_inst > QAT_DC_MAX_INSTANCES)
-		num_inst = QAT_DC_MAX_INSTANCES;
-
-	status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	for (Cpa16U i = 0; i < num_inst; i++) {
-		cpaDcSetAddressTranslation(dc_inst_handles[i],
-		    (void*)virt_to_phys);
-
-		status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
-		    1, &buff_meta_size);
-
-		if (status == CPA_STATUS_SUCCESS)
-			status = cpaDcGetNumIntermediateBuffers(
-			    dc_inst_handles[i], &num_inter_buff_lists);
-
-		if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
-			status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
-			    num_inter_buff_lists *
-			    sizeof (CpaBufferList *));
-
-		for (buff_num = 0; buff_num < num_inter_buff_lists;
-		    buff_num++) {
-			if (status == CPA_STATUS_SUCCESS)
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num],
-				    sizeof (CpaBufferList));
-
-			if (status == CPA_STATUS_SUCCESS)
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num]->
-				    pPrivateMetaData,
-				    buff_meta_size);
-
-			if (status == CPA_STATUS_SUCCESS)
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num]->pBuffers,
-				    sizeof (CpaFlatBuffer));
-
-			if (status == CPA_STATUS_SUCCESS) {
-				/*
-				 *  implementation requires an intermediate
-				 *  buffer approximately twice the size of
-				 *  output buffer, which is 2x max buffer
-				 *  size here.
-				 */
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num]->pBuffers->
-				    pData, 2 * QAT_MAX_BUF_SIZE);
-				if (status != CPA_STATUS_SUCCESS)
-					goto fail;
-
-				buffer_array[i][buff_num]->numBuffers = 1;
-				buffer_array[i][buff_num]->pBuffers->
-				    dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
-			}
-		}
-
-		status = cpaDcStartInstance(dc_inst_handles[i],
-		    num_inter_buff_lists, buffer_array[i]);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-
-		sd.compLevel = CPA_DC_L1;
-		sd.compType = CPA_DC_DEFLATE;
-		sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
-		sd.sessDirection = CPA_DC_DIR_COMBINED;
-		sd.sessState = CPA_DC_STATELESS;
-		sd.deflateWindowSize = 7;
-		sd.checksum = CPA_DC_ADLER32;
-		status = cpaDcGetSessionSize(dc_inst_handles[i],
-		    &sd, &sess_size, &ctx_size);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-
-		QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
-		if (session_handles[i] == NULL)
-			goto fail;
-
-		status = cpaDcInitSession(dc_inst_handles[i],
-		    session_handles[i],
-		    &sd, NULL, qat_dc_callback);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-	}
-
-	qat_dc_init_done = B_TRUE;
-	return (0);
-fail:
-	qat_dc_clean();
-	return (-1);
-}
-
-void
-qat_dc_fini(void)
-{
-	if (!qat_dc_init_done)
-		return;
-
-	qat_dc_clean();
-}
-
-/*
- * The "add" parameter is an additional buffer which is passed
- * to QAT as a scratch buffer alongside the destination buffer
- * in case the "compressed" data ends up being larger than the
- * original source data. This is necessary to prevent QAT from
- * generating buffer overflow warnings for incompressible data.
- */
-static int
-qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
-    char *dst, int dst_len, char *add, int add_len, size_t *c_len)
-{
-	CpaInstanceHandle dc_inst_handle;
-	CpaDcSessionHandle session_handle;
-	CpaBufferList *buf_list_src = NULL;
-	CpaBufferList *buf_list_dst = NULL;
-	CpaFlatBuffer *flat_buf_src = NULL;
-	CpaFlatBuffer *flat_buf_dst = NULL;
-	Cpa8U *buffer_meta_src = NULL;
-	Cpa8U *buffer_meta_dst = NULL;
-	Cpa32U buffer_meta_size = 0;
-	CpaDcRqResults dc_results;
-	CpaStatus status = CPA_STATUS_FAIL;
-	Cpa32U hdr_sz = 0;
-	Cpa32U compressed_sz;
-	Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
-	Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
-	Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
-	Cpa32U bytes_left;
-	Cpa32U dst_pages = 0;
-	Cpa32U adler32 = 0;
-	char *data;
-	struct page *page;
-	struct page **in_pages = NULL;
-	struct page **out_pages = NULL;
-	struct page **add_pages = NULL;
-	Cpa32U page_off = 0;
-	struct completion complete;
-	Cpa32U page_num = 0;
-	Cpa16U i;
-
-	/*
-	 * We increment num_src_buf and num_dst_buf by 2 to allow
-	 * us to handle non page-aligned buffer addresses and buffers
-	 * whose sizes are not divisible by PAGE_SIZE.
-	 */
-	Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
-	    (num_src_buf * sizeof (CpaFlatBuffer));
-	Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
-	    ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
-
-	status = QAT_PHYS_CONTIG_ALLOC(&in_pages,
-	    num_src_buf * sizeof (struct page *));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	status = QAT_PHYS_CONTIG_ALLOC(&out_pages,
-	    num_dst_buf * sizeof (struct page *));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	status = QAT_PHYS_CONTIG_ALLOC(&add_pages,
-	    num_add_buf * sizeof (struct page *));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
-	dc_inst_handle = dc_inst_handles[i];
-	session_handle = session_handles[i];
-
-	cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
-	    &buffer_meta_size);
-	status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
-	    &buffer_meta_size);
-	status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	/* build source buffer list */
-	status = QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
-
-	buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
-
-	/* build destination buffer list */
-	status = QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
-
-	buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
-
-	buf_list_src->numBuffers = 0;
-	buf_list_src->pPrivateMetaData = buffer_meta_src;
-	bytes_left = src_len;
-	data = src;
-	page_num = 0;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		page = qat_mem_to_page(data);
-		in_pages[page_num] = page;
-		flat_buf_src->pData = kmap(page) + page_off;
-		flat_buf_src->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-
-		bytes_left -= flat_buf_src->dataLenInBytes;
-		data += flat_buf_src->dataLenInBytes;
-		flat_buf_src++;
-		buf_list_src->numBuffers++;
-		page_num++;
-	}
-
-	buf_list_dst->numBuffers = 0;
-	buf_list_dst->pPrivateMetaData = buffer_meta_dst;
-	bytes_left = dst_len;
-	data = dst;
-	page_num = 0;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		page = qat_mem_to_page(data);
-		flat_buf_dst->pData = kmap(page) + page_off;
-		out_pages[page_num] = page;
-		flat_buf_dst->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-
-		bytes_left -= flat_buf_dst->dataLenInBytes;
-		data += flat_buf_dst->dataLenInBytes;
-		flat_buf_dst++;
-		buf_list_dst->numBuffers++;
-		page_num++;
-		dst_pages++;
-	}
-
-	/* map additional scratch pages into the destination buffer list */
-	bytes_left = add_len;
-	data = add;
-	page_num = 0;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		page = qat_mem_to_page(data);
-		flat_buf_dst->pData = kmap(page) + page_off;
-		add_pages[page_num] = page;
-		flat_buf_dst->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-
-		bytes_left -= flat_buf_dst->dataLenInBytes;
-		data += flat_buf_dst->dataLenInBytes;
-		flat_buf_dst++;
-		buf_list_dst->numBuffers++;
-		page_num++;
-	}
-
-	init_completion(&complete);
-
-	if (dir == QAT_COMPRESS) {
-		QAT_STAT_BUMP(comp_requests);
-		QAT_STAT_INCR(comp_total_in_bytes, src_len);
-
-		cpaDcGenerateHeader(session_handle,
-		    buf_list_dst->pBuffers, &hdr_sz);
-		buf_list_dst->pBuffers->pData += hdr_sz;
-		buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
-		status = cpaDcCompressData(
-		    dc_inst_handle, session_handle,
-		    buf_list_src, buf_list_dst,
-		    &dc_results, CPA_DC_FLUSH_FINAL,
-		    &complete);
-		if (status != CPA_STATUS_SUCCESS) {
-			goto fail;
-		}
-
-		/* we now wait until the completion of the operation. */
-		wait_for_completion(&complete);
-
-		if (dc_results.status != CPA_STATUS_SUCCESS) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		compressed_sz = dc_results.produced;
-		if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
-			status = CPA_STATUS_INCOMPRESSIBLE;
-			goto fail;
-		}
-
-		flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
-		/* move to the last page */
-		flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
-
-		/* no space for gzip footer in the last page */
-		if (((compressed_sz + hdr_sz) % PAGE_SIZE)
-		    + ZLIB_FOOT_SZ > PAGE_SIZE) {
-			status = CPA_STATUS_INCOMPRESSIBLE;
-			goto fail;
-		}
-
-		/* jump to the end of the buffer and append footer */
-		flat_buf_dst->pData =
-		    (char *)((unsigned long)flat_buf_dst->pData & PAGE_MASK)
-		    + ((compressed_sz + hdr_sz) % PAGE_SIZE);
-		flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
-
-		dc_results.produced = 0;
-		status = cpaDcGenerateFooter(session_handle,
-		    flat_buf_dst, &dc_results);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-
-		*c_len = compressed_sz + dc_results.produced + hdr_sz;
-		QAT_STAT_INCR(comp_total_out_bytes, *c_len);
-	} else {
-		ASSERT3U(dir, ==, QAT_DECOMPRESS);
-		QAT_STAT_BUMP(decomp_requests);
-		QAT_STAT_INCR(decomp_total_in_bytes, src_len);
-
-		buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
-		buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
-		status = cpaDcDecompressData(dc_inst_handle, session_handle,
-		    buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
-		    &complete);
-
-		if (CPA_STATUS_SUCCESS != status) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		/* we now wait until the completion of the operation. */
-		wait_for_completion(&complete);
-
-		if (dc_results.status != CPA_STATUS_SUCCESS) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		/* verify adler checksum */
-		adler32 = *(Cpa32U *)(src + dc_results.consumed + ZLIB_HEAD_SZ);
-		if (adler32 != BSWAP_32(dc_results.checksum)) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-		*c_len = dc_results.produced;
-		QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
-	}
-
-fail:
-	if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
-		QAT_STAT_BUMP(dc_fails);
-
-	if (in_pages) {
-		for (page_num = 0;
-		    page_num < buf_list_src->numBuffers;
-		    page_num++) {
-			kunmap(in_pages[page_num]);
-		}
-		QAT_PHYS_CONTIG_FREE(in_pages);
-	}
-
-	if (out_pages) {
-		for (page_num = 0; page_num < dst_pages; page_num++) {
-			kunmap(out_pages[page_num]);
-		}
-		QAT_PHYS_CONTIG_FREE(out_pages);
-	}
-
-	if (add_pages) {
-		for (page_num = 0;
-		    page_num < buf_list_dst->numBuffers - dst_pages;
-		    page_num++) {
-			kunmap(add_pages[page_num]);
-		}
-		QAT_PHYS_CONTIG_FREE(add_pages);
-	}
-
-	QAT_PHYS_CONTIG_FREE(buffer_meta_src);
-	QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
-	QAT_PHYS_CONTIG_FREE(buf_list_src);
-	QAT_PHYS_CONTIG_FREE(buf_list_dst);
-
-	return (status);
-}
-
-/*
- * Entry point for QAT accelerated compression / decompression.
- */
-int
-qat_compress(qat_compress_dir_t dir, char *src, int src_len,
-    char *dst, int dst_len, size_t *c_len)
-{
-	int ret;
-	size_t add_len = 0;
-	void *add = NULL;
-
-	if (dir == QAT_COMPRESS) {
-		add_len = dst_len;
-		add = zio_data_buf_alloc(add_len);
-	}
-
-	ret = qat_compress_impl(dir, src, src_len, dst,
-	    dst_len, add, add_len, c_len);
-
-	if (dir == QAT_COMPRESS)
-		zio_data_buf_free(add, add_len);
-
-	return (ret);
-}
-
-static int
-param_set_qat_compress(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-	int *pvalue = kp->arg;
-	ret = param_set_int(val, kp);
-	if (ret)
-		return (ret);
-	/*
-	 * zfs_qat_compress_disable = 0: enable qat compress
-	 * try to initialize qat instance if it has not been done
-	 */
-	if (*pvalue == 0 && !qat_dc_init_done) {
-		ret = qat_dc_init();
-		if (ret != 0) {
-			zfs_qat_compress_disable = 1;
-			return (ret);
-		}
-	}
-	return (ret);
-}
-
-module_param_call(zfs_qat_compress_disable, param_set_qat_compress,
-    param_get_int, &zfs_qat_compress_disable, 0644);
-MODULE_PARM_DESC(zfs_qat_compress_disable, "Enable/Disable QAT compression");
-
-#endif

diff --git a/zfs/module/zfs/qat_crypt.c b/zfs/module/zfs/qat_crypt.c
deleted file mode 100644
index ec9f085..0000000
--- a/zfs/module/zfs/qat_crypt.c
+++ /dev/null

@@ -1,631 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * This file represents the QAT implementation of checksums and encryption.
- * Internally, QAT shares the same cryptographic instances for both of these
- * operations, so the code has been combined here. QAT data compression uses
- * compression instances, so that code is separated into qat_compress.c
- */
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/completion.h>
-#include <linux/mod_compat.h>
-#include <sys/zfs_context.h>
-#include <sys/zio_crypt.h>
-#include "lac/cpa_cy_im.h"
-#include "lac/cpa_cy_common.h"
-#include "qat.h"
-
-/*
- * Max instances in a QAT device, each instance is a channel to submit
- * jobs to QAT hardware, this is only for pre-allocating instances
- * and session arrays; the actual number of instances are defined in
- * the QAT driver's configure file.
- */
-#define	QAT_CRYPT_MAX_INSTANCES		48
-
-#define	MAX_PAGE_NUM			1024
-
-static Cpa32U inst_num = 0;
-static Cpa16U num_inst = 0;
-static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
-static boolean_t qat_cy_init_done = B_FALSE;
-int zfs_qat_encrypt_disable = 0;
-int zfs_qat_checksum_disable = 0;
-
-typedef struct cy_callback {
-	CpaBoolean verify_result;
-	struct completion complete;
-} cy_callback_t;
-
-static void
-symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
-    void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify)
-{
-	cy_callback_t *cb = p_callback;
-
-	if (cb != NULL) {
-		/* indicate that the function has been called */
-		cb->verify_result = verify;
-		complete(&cb->complete);
-	}
-}
-
-boolean_t
-qat_crypt_use_accel(size_t s_len)
-{
-	return (!zfs_qat_encrypt_disable &&
-	    qat_cy_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
-}
-
-boolean_t
-qat_checksum_use_accel(size_t s_len)
-{
-	return (!zfs_qat_checksum_disable &&
-	    qat_cy_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
-}
-
-void
-qat_cy_clean(void)
-{
-	for (Cpa16U i = 0; i < num_inst; i++)
-		cpaCyStopInstance(cy_inst_handles[i]);
-
-	num_inst = 0;
-	qat_cy_init_done = B_FALSE;
-}
-
-int
-qat_cy_init(void)
-{
-	CpaStatus status = CPA_STATUS_FAIL;
-
-	if (qat_cy_init_done)
-		return (0);
-
-	status = cpaCyGetNumInstances(&num_inst);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	/* if the user has configured no QAT encryption units just return */
-	if (num_inst == 0)
-		return (0);
-
-	if (num_inst > QAT_CRYPT_MAX_INSTANCES)
-		num_inst = QAT_CRYPT_MAX_INSTANCES;
-
-	status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	for (Cpa16U i = 0; i < num_inst; i++) {
-		status = cpaCySetAddressTranslation(cy_inst_handles[i],
-		    (void *)virt_to_phys);
-		if (status != CPA_STATUS_SUCCESS)
-			goto error;
-
-		status = cpaCyStartInstance(cy_inst_handles[i]);
-		if (status != CPA_STATUS_SUCCESS)
-			goto error;
-	}
-
-	qat_cy_init_done = B_TRUE;
-	return (0);
-
-error:
-	qat_cy_clean();
-	return (-1);
-}
-
-void
-qat_cy_fini(void)
-{
-	if (!qat_cy_init_done)
-		return;
-
-	qat_cy_clean();
-}
-
-static CpaStatus
-qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
-    CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
-    Cpa64U crypt, Cpa32U aad_len)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U ctx_size;
-	Cpa32U ciper_algorithm;
-	Cpa32U hash_algorithm;
-	CpaCySymSessionSetupData sd = { 0 };
-
-	if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) {
-		return (CPA_STATUS_FAIL);
-	} else {
-		ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM;
-		hash_algorithm = CPA_CY_SYM_HASH_AES_GCM;
-	}
-
-	sd.cipherSetupData.cipherAlgorithm = ciper_algorithm;
-	sd.cipherSetupData.pCipherKey = key->ck_data;
-	sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8;
-	sd.hashSetupData.hashAlgorithm = hash_algorithm;
-	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH;
-	sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN;
-	sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len;
-	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
-	sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
-	sd.digestIsAppended = CPA_FALSE;
-	sd.verifyDigest = CPA_FALSE;
-
-	if (dir == QAT_ENCRYPT) {
-		sd.cipherSetupData.cipherDirection =
-		    CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT;
-		sd.algChainOrder =
-		    CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
-	} else {
-		ASSERT3U(dir, ==, QAT_DECRYPT);
-		sd.cipherSetupData.cipherDirection =
-		    CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT;
-		sd.algChainOrder =
-		    CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
-	}
-
-	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
-	    *cy_session_ctx);
-	if (status != CPA_STATUS_SUCCESS) {
-		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
-		return (status);
-	}
-
-	return (CPA_STATUS_SUCCESS);
-}
-
-static CpaStatus
-qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
-    CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U ctx_size;
-	Cpa32U hash_algorithm;
-	CpaCySymSessionSetupData sd = { 0 };
-
-	/*
-	 * ZFS's SHA512 checksum is actually SHA512/256, which uses
-	 * a different IV from standard SHA512. QAT does not support
-	 * SHA512/256, so we can only support SHA256.
-	 */
-	if (cksum == ZIO_CHECKSUM_SHA256)
-		hash_algorithm = CPA_CY_SYM_HASH_SHA256;
-	else
-		return (CPA_STATUS_FAIL);
-
-	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
-	sd.symOperation = CPA_CY_SYM_OP_HASH;
-	sd.hashSetupData.hashAlgorithm = hash_algorithm;
-	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
-	sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
-	sd.digestIsAppended = CPA_FALSE;
-	sd.verifyDigest = CPA_FALSE;
-
-	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
-	    *cy_session_ctx);
-	if (status != CPA_STATUS_SUCCESS) {
-		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
-		return (status);
-	}
-
-	return (CPA_STATUS_SUCCESS);
-}
-
-static CpaStatus
-qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
-    CpaBufferList *src, CpaBufferList *dst)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U meta_size = 0;
-
-	status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size);
-	if (status != CPA_STATUS_SUCCESS)
-		goto error;
-
-	if (src != dst) {
-		status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData,
-		    meta_size);
-		if (status != CPA_STATUS_SUCCESS)
-			goto error;
-	}
-
-	return (CPA_STATUS_SUCCESS);
-
-error:
-	QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData);
-	if (src != dst)
-		QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData);
-
-	return (status);
-}
-
-int
-qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
-    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
-    crypto_key_t *key, uint64_t crypt, uint32_t enc_len)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa16U i;
-	CpaInstanceHandle cy_inst_handle;
-	Cpa16U nr_bufs = (enc_len >> PAGE_SHIFT) + 2;
-	Cpa32U bytes_left = 0;
-	Cpa8S *data = NULL;
-	CpaCySymSessionCtx *cy_session_ctx = NULL;
-	cy_callback_t cb;
-	CpaCySymOpData op_data = { 0 };
-	CpaBufferList src_buffer_list = { 0 };
-	CpaBufferList dst_buffer_list = { 0 };
-	CpaFlatBuffer *flat_src_buf_array = NULL;
-	CpaFlatBuffer *flat_src_buf = NULL;
-	CpaFlatBuffer *flat_dst_buf_array = NULL;
-	CpaFlatBuffer *flat_dst_buf = NULL;
-	struct page *in_pages[MAX_PAGE_NUM];
-	struct page *out_pages[MAX_PAGE_NUM];
-	Cpa32U in_page_num = 0;
-	Cpa32U out_page_num = 0;
-	Cpa32U in_page_off = 0;
-	Cpa32U out_page_off = 0;
-
-	if (dir == QAT_ENCRYPT) {
-		QAT_STAT_BUMP(encrypt_requests);
-		QAT_STAT_INCR(encrypt_total_in_bytes, enc_len);
-	} else {
-		QAT_STAT_BUMP(decrypt_requests);
-		QAT_STAT_INCR(decrypt_total_in_bytes, enc_len);
-	}
-
-	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
-	cy_inst_handle = cy_inst_handles[i];
-
-	status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
-	    &cy_session_ctx, key, crypt, aad_len);
-	if (status != CPA_STATUS_SUCCESS) {
-		/* don't count CCM as a failure since it's not supported */
-		if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
-			QAT_STAT_BUMP(crypt_fails);
-		return (status);
-	}
-
-	/*
-	 * We increment nr_bufs by 2 to allow us to handle non
-	 * page-aligned buffer addresses and buffers whose sizes
-	 * are not divisible by PAGE_SIZE.
-	 */
-	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
-	    &src_buffer_list, &dst_buffer_list);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
-	    nr_bufs * sizeof (CpaFlatBuffer));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array,
-	    nr_bufs * sizeof (CpaFlatBuffer));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pDigestResult,
-	    ZIO_DATA_MAC_LEN);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pIv,
-	    ZIO_DATA_IV_LEN);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	if (aad_len > 0) {
-		status = QAT_PHYS_CONTIG_ALLOC(&op_data.pAdditionalAuthData,
-		    aad_len);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-		bcopy(aad_buf, op_data.pAdditionalAuthData, aad_len);
-	}
-
-	bytes_left = enc_len;
-	data = src_buf;
-	flat_src_buf = flat_src_buf_array;
-	while (bytes_left > 0) {
-		in_page_off = ((long)data & ~PAGE_MASK);
-		in_pages[in_page_num] = qat_mem_to_page(data);
-		flat_src_buf->pData = kmap(in_pages[in_page_num]) + in_page_off;
-		flat_src_buf->dataLenInBytes =
-		    min((long)PAGE_SIZE - in_page_off, (long)bytes_left);
-		data += flat_src_buf->dataLenInBytes;
-		bytes_left -= flat_src_buf->dataLenInBytes;
-		flat_src_buf++;
-		in_page_num++;
-	}
-	src_buffer_list.pBuffers = flat_src_buf_array;
-	src_buffer_list.numBuffers = in_page_num;
-
-	bytes_left = enc_len;
-	data = dst_buf;
-	flat_dst_buf = flat_dst_buf_array;
-	while (bytes_left > 0) {
-		out_page_off = ((long)data & ~PAGE_MASK);
-		out_pages[out_page_num] = qat_mem_to_page(data);
-		flat_dst_buf->pData = kmap(out_pages[out_page_num]) +
-		    out_page_off;
-		flat_dst_buf->dataLenInBytes =
-		    min((long)PAGE_SIZE - out_page_off, (long)bytes_left);
-		data += flat_dst_buf->dataLenInBytes;
-		bytes_left -= flat_dst_buf->dataLenInBytes;
-		flat_dst_buf++;
-		out_page_num++;
-	}
-	dst_buffer_list.pBuffers = flat_dst_buf_array;
-	dst_buffer_list.numBuffers = out_page_num;
-
-	op_data.sessionCtx = cy_session_ctx;
-	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
-	op_data.cryptoStartSrcOffsetInBytes = 0;
-	op_data.messageLenToCipherInBytes = 0;
-	op_data.hashStartSrcOffsetInBytes = 0;
-	op_data.messageLenToHashInBytes = 0;
-	op_data.messageLenToCipherInBytes = enc_len;
-	op_data.ivLenInBytes = ZIO_DATA_IV_LEN;
-	bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN);
-	/* if dir is QAT_DECRYPT, copy digest_buf to pDigestResult */
-	if (dir == QAT_DECRYPT)
-		bcopy(digest_buf, op_data.pDigestResult, ZIO_DATA_MAC_LEN);
-
-	cb.verify_result = CPA_FALSE;
-	init_completion(&cb.complete);
-	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
-	    &src_buffer_list, &dst_buffer_list, NULL);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	/* we now wait until the completion of the operation. */
-	wait_for_completion(&cb.complete);
-
-	if (cb.verify_result == CPA_FALSE) {
-		status = CPA_STATUS_FAIL;
-		goto fail;
-	}
-
-	if (dir == QAT_ENCRYPT) {
-		/* if dir is QAT_ENCRYPT, save pDigestResult to digest_buf */
-		bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN);
-		QAT_STAT_INCR(encrypt_total_out_bytes, enc_len);
-	} else {
-		QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
-	}
-
-fail:
-	if (status != CPA_STATUS_SUCCESS)
-		QAT_STAT_BUMP(crypt_fails);
-
-	for (i = 0; i < in_page_num; i++)
-		kunmap(in_pages[i]);
-	for (i = 0; i < out_page_num; i++)
-		kunmap(out_pages[i]);
-
-	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
-	if (aad_len > 0)
-		QAT_PHYS_CONTIG_FREE(op_data.pAdditionalAuthData);
-	QAT_PHYS_CONTIG_FREE(op_data.pIv);
-	QAT_PHYS_CONTIG_FREE(op_data.pDigestResult);
-	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
-	QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData);
-	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
-	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
-	QAT_PHYS_CONTIG_FREE(flat_dst_buf_array);
-
-	return (status);
-}
-
-int
-qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
-{
-	CpaStatus status;
-	Cpa16U i;
-	CpaInstanceHandle cy_inst_handle;
-	Cpa16U nr_bufs = (size >> PAGE_SHIFT) + 2;
-	Cpa32U bytes_left = 0;
-	Cpa8S *data = NULL;
-	CpaCySymSessionCtx *cy_session_ctx = NULL;
-	cy_callback_t cb;
-	Cpa8U *digest_buffer = NULL;
-	CpaCySymOpData op_data = { 0 };
-	CpaBufferList src_buffer_list = { 0 };
-	CpaFlatBuffer *flat_src_buf_array = NULL;
-	CpaFlatBuffer *flat_src_buf = NULL;
-	struct page *in_pages[MAX_PAGE_NUM];
-	Cpa32U page_num = 0;
-	Cpa32U page_off = 0;
-
-	QAT_STAT_BUMP(cksum_requests);
-	QAT_STAT_INCR(cksum_total_in_bytes, size);
-
-	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
-	cy_inst_handle = cy_inst_handles[i];
-
-	status = qat_init_checksum_session_ctx(cy_inst_handle,
-	    &cy_session_ctx, cksum);
-	if (status != CPA_STATUS_SUCCESS) {
-		/* don't count unsupported checksums as a failure */
-		if (cksum == ZIO_CHECKSUM_SHA256 ||
-		    cksum == ZIO_CHECKSUM_SHA512)
-			QAT_STAT_BUMP(cksum_fails);
-		return (status);
-	}
-
-	/*
-	 * We increment nr_bufs by 2 to allow us to handle non
-	 * page-aligned buffer addresses and buffers whose sizes
-	 * are not divisible by PAGE_SIZE.
-	 */
-	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
-	    &src_buffer_list, &src_buffer_list);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
-	    nr_bufs * sizeof (CpaFlatBuffer));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&digest_buffer,
-	    sizeof (zio_cksum_t));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	bytes_left = size;
-	data = buf;
-	flat_src_buf = flat_src_buf_array;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		in_pages[page_num] = qat_mem_to_page(data);
-		flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
-		flat_src_buf->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-		data += flat_src_buf->dataLenInBytes;
-		bytes_left -= flat_src_buf->dataLenInBytes;
-		flat_src_buf++;
-		page_num++;
-	}
-	src_buffer_list.pBuffers = flat_src_buf_array;
-	src_buffer_list.numBuffers = page_num;
-
-	op_data.sessionCtx = cy_session_ctx;
-	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
-	op_data.hashStartSrcOffsetInBytes = 0;
-	op_data.messageLenToHashInBytes = size;
-	op_data.pDigestResult = digest_buffer;
-
-	cb.verify_result = CPA_FALSE;
-	init_completion(&cb.complete);
-	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
-	    &src_buffer_list, &src_buffer_list, NULL);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	/* we now wait until the completion of the operation. */
-	wait_for_completion(&cb.complete);
-
-	if (cb.verify_result == CPA_FALSE) {
-		status = CPA_STATUS_FAIL;
-		goto fail;
-	}
-
-	bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
-
-fail:
-	if (status != CPA_STATUS_SUCCESS)
-		QAT_STAT_BUMP(cksum_fails);
-
-	for (i = 0; i < page_num; i++)
-		kunmap(in_pages[i]);
-
-	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
-	QAT_PHYS_CONTIG_FREE(digest_buffer);
-	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
-	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
-	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
-
-	return (status);
-}
-
-static int
-param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-	int *pvalue = kp->arg;
-	ret = param_set_int(val, kp);
-	if (ret)
-		return (ret);
-	/*
-	 * zfs_qat_encrypt_disable = 0: enable qat encrypt
-	 * try to initialize qat instance if it has not been done
-	 */
-	if (*pvalue == 0 && !qat_cy_init_done) {
-		ret = qat_cy_init();
-		if (ret != 0) {
-			zfs_qat_encrypt_disable = 1;
-			return (ret);
-		}
-	}
-	return (ret);
-}
-
-static int
-param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-	int *pvalue = kp->arg;
-	ret = param_set_int(val, kp);
-	if (ret)
-		return (ret);
-	/*
-	 * set_checksum_param_ops = 0: enable qat checksum
-	 * try to initialize qat instance if it has not been done
-	 */
-	if (*pvalue == 0 && !qat_cy_init_done) {
-		ret = qat_cy_init();
-		if (ret != 0) {
-			zfs_qat_checksum_disable = 1;
-			return (ret);
-		}
-	}
-	return (ret);
-}
-
-module_param_call(zfs_qat_encrypt_disable, param_set_qat_encrypt,
-    param_get_int, &zfs_qat_encrypt_disable, 0644);
-MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Enable/Disable QAT encryption");
-
-module_param_call(zfs_qat_checksum_disable, param_set_qat_checksum,
-    param_get_int, &zfs_qat_checksum_disable, 0644);
-MODULE_PARM_DESC(zfs_qat_checksum_disable, "Enable/Disable QAT checksumming");
-
-#endif

diff --git a/zfs/module/zfs/range_tree.c b/zfs/module/zfs/range_tree.c
index 391533b..a1a5f79 100644
--- a/zfs/module/zfs/range_tree.c
+++ b/zfs/module/zfs/range_tree.c

@@ -23,7 +23,8 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -74,42 +75,38 @@
  * support removing complete segments.
  */
 
-kmem_cache_t *range_seg_cache;
-
-/* Generic ops for managing an AVL tree alongside a range tree */
-struct range_tree_ops rt_avl_ops = {
-	.rtop_create = rt_avl_create,
-	.rtop_destroy = rt_avl_destroy,
-	.rtop_add = rt_avl_add,
-	.rtop_remove = rt_avl_remove,
-	.rtop_vacate = rt_avl_vacate,
-};
-
-void
-range_tree_init(void)
+static inline void
+rs_copy(range_seg_t *src, range_seg_t *dest, range_tree_t *rt)
 {
-	ASSERT(range_seg_cache == NULL);
-	range_seg_cache = kmem_cache_create("range_seg_cache",
-	    sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
-}
-
-void
-range_tree_fini(void)
-{
-	kmem_cache_destroy(range_seg_cache);
-	range_seg_cache = NULL;
+	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+	size_t size = 0;
+	switch (rt->rt_type) {
+	case RANGE_SEG32:
+		size = sizeof (range_seg32_t);
+		break;
+	case RANGE_SEG64:
+		size = sizeof (range_seg64_t);
+		break;
+	case RANGE_SEG_GAP:
+		size = sizeof (range_seg_gap_t);
+		break;
+	default:
+		VERIFY(0);
+	}
+	bcopy(src, dest, size);
 }
 
 void
 range_tree_stat_verify(range_tree_t *rt)
 {
 	range_seg_t *rs;
+	zfs_btree_index_t where;
 	uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
 	int i;
 
-	for (rs = avl_first(&rt->rt_root); rs != NULL;
-	    rs = AVL_NEXT(&rt->rt_root, rs)) {
-		uint64_t size = rs->rs_end - rs->rs_start;
+	for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL;
+	    rs = zfs_btree_next(&rt->rt_root, &where, &where)) {
+		uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
 		int idx	= highbit64(size) - 1;
 
 		hist[idx]++;
@@ -119,7 +116,8 @@
 	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
 		if (hist[i] != rt->rt_histogram[i]) {
 			zfs_dbgmsg("i=%d, hist=%px, hist=%llu, rt_hist=%llu",
-			    i, hist, hist[i], rt->rt_histogram[i]);
+			    i, hist, (u_longlong_t)hist[i],
+			    (u_longlong_t)rt->rt_histogram[i]);
 		}
 		VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
 	}
@@ -128,7 +126,7 @@
 static void
 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
 {
-	uint64_t size = rs->rs_end - rs->rs_start;
+	uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
 	int idx = highbit64(size) - 1;
 
 	ASSERT(size != 0);
@@ -142,7 +140,7 @@
 static void
 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
 {
-	uint64_t size = rs->rs_end - rs->rs_start;
+	uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
 	int idx = highbit64(size) - 1;
 
 	ASSERT(size != 0);
@@ -153,14 +151,35 @@
 	rt->rt_histogram[idx]--;
 }
 
-/*
- * NOTE: caller is responsible for all locking.
- */
 static int
-range_tree_seg_compare(const void *x1, const void *x2)
+range_tree_seg32_compare(const void *x1, const void *x2)
 {
-	const range_seg_t *r1 = (const range_seg_t *)x1;
-	const range_seg_t *r2 = (const range_seg_t *)x2;
+	const range_seg32_t *r1 = x1;
+	const range_seg32_t *r2 = x2;
+
+	ASSERT3U(r1->rs_start, <=, r1->rs_end);
+	ASSERT3U(r2->rs_start, <=, r2->rs_end);
+
+	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
+}
+
+static int
+range_tree_seg64_compare(const void *x1, const void *x2)
+{
+	const range_seg64_t *r1 = x1;
+	const range_seg64_t *r2 = x2;
+
+	ASSERT3U(r1->rs_start, <=, r1->rs_end);
+	ASSERT3U(r2->rs_start, <=, r2->rs_end);
+
+	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
+}
+
+static int
+range_tree_seg_gap_compare(const void *x1, const void *x2)
+{
+	const range_seg_gap_t *r1 = x1;
+	const range_seg_gap_t *r2 = x2;
 
 	ASSERT3U(r1->rs_start, <=, r1->rs_end);
 	ASSERT3U(r2->rs_start, <=, r2->rs_end);
@@ -169,18 +188,39 @@
 }
 
 range_tree_t *
-range_tree_create_impl(range_tree_ops_t *ops, void *arg,
-    int (*avl_compare) (const void *, const void *), uint64_t gap)
+range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
+    void *arg, uint64_t start, uint64_t shift, uint64_t gap)
 {
 	range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
 
-	avl_create(&rt->rt_root, range_tree_seg_compare,
-	    sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
+	ASSERT3U(shift, <, 64);
+	ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
+	size_t size;
+	int (*compare) (const void *, const void *);
+	switch (type) {
+	case RANGE_SEG32:
+		size = sizeof (range_seg32_t);
+		compare = range_tree_seg32_compare;
+		break;
+	case RANGE_SEG64:
+		size = sizeof (range_seg64_t);
+		compare = range_tree_seg64_compare;
+		break;
+	case RANGE_SEG_GAP:
+		size = sizeof (range_seg_gap_t);
+		compare = range_tree_seg_gap_compare;
+		break;
+	default:
+		panic("Invalid range seg type %d", type);
+	}
+	zfs_btree_create(&rt->rt_root, compare, size);
 
 	rt->rt_ops = ops;
 	rt->rt_gap = gap;
 	rt->rt_arg = arg;
-	rt->rt_avl_compare = avl_compare;
+	rt->rt_type = type;
+	rt->rt_start = start;
+	rt->rt_shift = shift;
 
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
 		rt->rt_ops->rtop_create(rt, rt->rt_arg);
@@ -189,9 +229,10 @@
 }
 
 range_tree_t *
-range_tree_create(range_tree_ops_t *ops, void *arg)
+range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type,
+    void *arg, uint64_t start, uint64_t shift)
 {
-	return (range_tree_create_impl(ops, arg, NULL, 0));
+	return (range_tree_create_gap(ops, type, arg, start, shift, 0));
 }
 
 void
@@ -202,19 +243,30 @@
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
 		rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
 
-	avl_destroy(&rt->rt_root);
+	zfs_btree_destroy(&rt->rt_root);
 	kmem_free(rt, sizeof (*rt));
 }
 
 void
 range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta)
 {
-	ASSERT3U(rs->rs_fill + delta, !=, 0);
-	ASSERT3U(rs->rs_fill + delta, <=, rs->rs_end - rs->rs_start);
+	if (delta < 0 && delta * -1 >= rs_get_fill(rs, rt)) {
+		zfs_panic_recover("zfs: attempting to decrease fill to or "
+		    "below 0; probable double remove in segment [%llx:%llx]",
+		    (longlong_t)rs_get_start(rs, rt),
+		    (longlong_t)rs_get_end(rs, rt));
+	}
+	if (rs_get_fill(rs, rt) + delta > rs_get_end(rs, rt) -
+	    rs_get_start(rs, rt)) {
+		zfs_panic_recover("zfs: attempting to increase fill beyond "
+		    "max; probable double add in segment [%llx:%llx]",
+		    (longlong_t)rs_get_start(rs, rt),
+		    (longlong_t)rs_get_end(rs, rt));
+	}
 
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
-	rs->rs_fill += delta;
+	rs_set_fill(rs, rt, rs_get_fill(rs, rt) + delta);
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
 		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
 }
@@ -223,28 +275,20 @@
 range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
 {
 	range_tree_t *rt = arg;
-	avl_index_t where;
-	range_seg_t rsearch, *rs_before, *rs_after, *rs;
+	zfs_btree_index_t where;
+	range_seg_t *rs_before, *rs_after, *rs;
+	range_seg_max_t tmp, rsearch;
 	uint64_t end = start + size, gap = rt->rt_gap;
 	uint64_t bridge_size = 0;
 	boolean_t merge_before, merge_after;
 
 	ASSERT3U(size, !=, 0);
 	ASSERT3U(fill, <=, size);
+	ASSERT3U(start + size, >, start);
 
-	rsearch.rs_start = start;
-	rsearch.rs_end = end;
-	rs = avl_find(&rt->rt_root, &rsearch, &where);
-
-	if (gap == 0 && rs != NULL &&
-	    rs->rs_start <= start && rs->rs_end >= end) {
-		zfs_panic_recover("zfs: allocating allocated segment"
-		    "(offset=%llu size=%llu) of (offset=%llu size=%llu)\n",
-		    (longlong_t)start, (longlong_t)size,
-		    (longlong_t)rs->rs_start,
-		    (longlong_t)rs->rs_end - rs->rs_start);
-		return;
-	}
+	rs_set_start(&rsearch, rt, start);
+	rs_set_end(&rsearch, rt, end);
+	rs = zfs_btree_find(&rt->rt_root, &rsearch, &where);
 
 	/*
 	 * If this is a gap-supporting range tree, it is possible that we
@@ -255,27 +299,32 @@
 	 * the normal code paths.
 	 */
 	if (rs != NULL) {
-		ASSERT3U(gap, !=, 0);
-		if (rs->rs_start <= start && rs->rs_end >= end) {
+		if (gap == 0) {
+			zfs_panic_recover("zfs: adding existent segment to "
+			    "range tree (offset=%llx size=%llx)",
+			    (longlong_t)start, (longlong_t)size);
+			return;
+		}
+		uint64_t rstart = rs_get_start(rs, rt);
+		uint64_t rend = rs_get_end(rs, rt);
+		if (rstart <= start && rend >= end) {
 			range_tree_adjust_fill(rt, rs, fill);
 			return;
 		}
 
-		avl_remove(&rt->rt_root, rs);
 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
 			rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
 
 		range_tree_stat_decr(rt, rs);
-		rt->rt_space -= rs->rs_end - rs->rs_start;
+		rt->rt_space -= rend - rstart;
 
-		fill += rs->rs_fill;
-		start = MIN(start, rs->rs_start);
-		end = MAX(end, rs->rs_end);
+		fill += rs_get_fill(rs, rt);
+		start = MIN(start, rstart);
+		end = MAX(end, rend);
 		size = end - start;
 
+		zfs_btree_remove(&rt->rt_root, rs);
 		range_tree_add_impl(rt, start, size, fill);
-
-		kmem_cache_free(range_seg_cache, rs);
 		return;
 	}
 
@@ -286,19 +335,21 @@
 	 * If gap != 0, we might need to merge with our neighbors even if we
 	 * aren't directly touching.
 	 */
-	rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
-	rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
+	zfs_btree_index_t where_before, where_after;
+	rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before);
+	rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after);
 
-	merge_before = (rs_before != NULL && rs_before->rs_end >= start - gap);
-	merge_after = (rs_after != NULL && rs_after->rs_start <= end + gap);
+	merge_before = (rs_before != NULL && rs_get_end(rs_before, rt) >=
+	    start - gap);
+	merge_after = (rs_after != NULL && rs_get_start(rs_after, rt) <= end +
+	    gap);
 
 	if (merge_before && gap != 0)
-		bridge_size += start - rs_before->rs_end;
+		bridge_size += start - rs_get_end(rs_before, rt);
 	if (merge_after && gap != 0)
-		bridge_size += rs_after->rs_start - end;
+		bridge_size += rs_get_start(rs_after, rt) - end;
 
 	if (merge_before && merge_after) {
-		avl_remove(&rt->rt_root, rs_before);
 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) {
 			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
 			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
@@ -307,9 +358,19 @@
 		range_tree_stat_decr(rt, rs_before);
 		range_tree_stat_decr(rt, rs_after);
 
-		rs_after->rs_fill += rs_before->rs_fill + fill;
-		rs_after->rs_start = rs_before->rs_start;
-		kmem_cache_free(range_seg_cache, rs_before);
+		rs_copy(rs_after, &tmp, rt);
+		uint64_t before_start = rs_get_start_raw(rs_before, rt);
+		uint64_t before_fill = rs_get_fill(rs_before, rt);
+		uint64_t after_fill = rs_get_fill(rs_after, rt);
+		zfs_btree_remove_idx(&rt->rt_root, &where_before);
+
+		/*
+		 * We have to re-find the node because our old reference is
+		 * invalid as soon as we do any mutating btree operations.
+		 */
+		rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after);
+		rs_set_start_raw(rs_after, rt, before_start);
+		rs_set_fill(rs_after, rt, after_fill + before_fill + fill);
 		rs = rs_after;
 	} else if (merge_before) {
 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
@@ -317,8 +378,9 @@
 
 		range_tree_stat_decr(rt, rs_before);
 
-		rs_before->rs_fill += fill;
-		rs_before->rs_end = end;
+		uint64_t before_fill = rs_get_fill(rs_before, rt);
+		rs_set_end(rs_before, rt, end);
+		rs_set_fill(rs_before, rt, before_fill + fill);
 		rs = rs_before;
 	} else if (merge_after) {
 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
@@ -326,22 +388,26 @@
 
 		range_tree_stat_decr(rt, rs_after);
 
-		rs_after->rs_fill += fill;
-		rs_after->rs_start = start;
+		uint64_t after_fill = rs_get_fill(rs_after, rt);
+		rs_set_start(rs_after, rt, start);
+		rs_set_fill(rs_after, rt, after_fill + fill);
 		rs = rs_after;
 	} else {
-		rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
+		rs = &tmp;
 
-		rs->rs_fill = fill;
-		rs->rs_start = start;
-		rs->rs_end = end;
-		avl_insert(&rt->rt_root, rs, where);
+		rs_set_start(rs, rt, start);
+		rs_set_end(rs, rt, end);
+		rs_set_fill(rs, rt, fill);
+		zfs_btree_add_idx(&rt->rt_root, rs, &where);
 	}
 
-	if (gap != 0)
-		ASSERT3U(rs->rs_fill, <=, rs->rs_end - rs->rs_start);
-	else
-		ASSERT3U(rs->rs_fill, ==, rs->rs_end - rs->rs_start);
+	if (gap != 0) {
+		ASSERT3U(rs_get_fill(rs, rt), <=, rs_get_end(rs, rt) -
+		    rs_get_start(rs, rt));
+	} else {
+		ASSERT3U(rs_get_fill(rs, rt), ==, rs_get_end(rs, rt) -
+		    rs_get_start(rs, rt));
+	}
 
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
 		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
@@ -360,22 +426,25 @@
 range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
     boolean_t do_fill)
 {
-	avl_index_t where;
-	range_seg_t rsearch, *rs, *newseg;
+	zfs_btree_index_t where;
+	range_seg_t *rs;
+	range_seg_max_t rsearch, rs_tmp;
 	uint64_t end = start + size;
 	boolean_t left_over, right_over;
 
 	VERIFY3U(size, !=, 0);
 	VERIFY3U(size, <=, rt->rt_space);
+	if (rt->rt_type == RANGE_SEG64)
+		ASSERT3U(start + size, >, start);
 
-	rsearch.rs_start = start;
-	rsearch.rs_end = end;
-	rs = avl_find(&rt->rt_root, &rsearch, &where);
+	rs_set_start(&rsearch, rt, start);
+	rs_set_end(&rsearch, rt, end);
+	rs = zfs_btree_find(&rt->rt_root, &rsearch, &where);
 
 	/* Make sure we completely overlap with someone */
 	if (rs == NULL) {
-		zfs_panic_recover("zfs: freeing free segment "
-		    "(offset=%llu size=%llu)",
+		zfs_panic_recover("zfs: removing nonexistent segment from "
+		    "range tree (offset=%llx size=%llx)",
 		    (longlong_t)start, (longlong_t)size);
 		return;
 	}
@@ -388,30 +457,32 @@
 	 */
 	if (rt->rt_gap != 0) {
 		if (do_fill) {
-			if (rs->rs_fill == size) {
-				start = rs->rs_start;
-				end = rs->rs_end;
+			if (rs_get_fill(rs, rt) == size) {
+				start = rs_get_start(rs, rt);
+				end = rs_get_end(rs, rt);
 				size = end - start;
 			} else {
 				range_tree_adjust_fill(rt, rs, -size);
 				return;
 			}
-		} else if (rs->rs_start != start || rs->rs_end != end) {
+		} else if (rs_get_start(rs, rt) != start ||
+		    rs_get_end(rs, rt) != end) {
 			zfs_panic_recover("zfs: freeing partial segment of "
-			    "gap tree (offset=%llu size=%llu) of "
-			    "(offset=%llu size=%llu)",
+			    "gap tree (offset=%llx size=%llx) of "
+			    "(offset=%llx size=%llx)",
 			    (longlong_t)start, (longlong_t)size,
-			    (longlong_t)rs->rs_start,
-			    (longlong_t)rs->rs_end - rs->rs_start);
+			    (longlong_t)rs_get_start(rs, rt),
+			    (longlong_t)rs_get_end(rs, rt) - rs_get_start(rs,
+			    rt));
 			return;
 		}
 	}
 
-	VERIFY3U(rs->rs_start, <=, start);
-	VERIFY3U(rs->rs_end, >=, end);
+	VERIFY3U(rs_get_start(rs, rt), <=, start);
+	VERIFY3U(rs_get_end(rs, rt), >=, end);
 
-	left_over = (rs->rs_start != start);
-	right_over = (rs->rs_end != end);
+	left_over = (rs_get_start(rs, rt) != start);
+	right_over = (rs_get_end(rs, rt) != end);
 
 	range_tree_stat_decr(rt, rs);
 
@@ -419,24 +490,33 @@
 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
 
 	if (left_over && right_over) {
-		newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
-		newseg->rs_start = end;
-		newseg->rs_end = rs->rs_end;
-		newseg->rs_fill = newseg->rs_end - newseg->rs_start;
-		range_tree_stat_incr(rt, newseg);
+		range_seg_max_t newseg;
+		rs_set_start(&newseg, rt, end);
+		rs_set_end_raw(&newseg, rt, rs_get_end_raw(rs, rt));
+		rs_set_fill(&newseg, rt, rs_get_end(rs, rt) - end);
+		range_tree_stat_incr(rt, &newseg);
 
-		rs->rs_end = start;
+		// This modifies the buffer already inside the range tree
+		rs_set_end(rs, rt, start);
 
-		avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
+		rs_copy(rs, &rs_tmp, rt);
+		if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL)
+			zfs_btree_add_idx(&rt->rt_root, &newseg, &where);
+		else
+			zfs_btree_add(&rt->rt_root, &newseg);
+
 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
-			rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
+			rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg);
 	} else if (left_over) {
-		rs->rs_end = start;
+		// This modifies the buffer already inside the range tree
+		rs_set_end(rs, rt, start);
+		rs_copy(rs, &rs_tmp, rt);
 	} else if (right_over) {
-		rs->rs_start = end;
+		// This modifies the buffer already inside the range tree
+		rs_set_start(rs, rt, end);
+		rs_copy(rs, &rs_tmp, rt);
 	} else {
-		avl_remove(&rt->rt_root, rs);
-		kmem_cache_free(range_seg_cache, rs);
+		zfs_btree_remove_idx(&rt->rt_root, &where);
 		rs = NULL;
 	}
 
@@ -446,11 +526,12 @@
 		 * the size, since we do not support removing partial segments
 		 * of range trees with gaps.
 		 */
-		rs->rs_fill = rs->rs_end - rs->rs_start;
-		range_tree_stat_incr(rt, rs);
+		rs_set_fill_raw(rs, rt, rs_get_end_raw(rs, rt) -
+		    rs_get_start_raw(rs, rt));
+		range_tree_stat_incr(rt, &rs_tmp);
 
 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
-			rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
+			rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg);
 	}
 
 	rt->rt_space -= size;
@@ -472,14 +553,14 @@
 range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
     uint64_t newstart, uint64_t newsize)
 {
-	int64_t delta = newsize - (rs->rs_end - rs->rs_start);
+	int64_t delta = newsize - (rs_get_end(rs, rt) - rs_get_start(rs, rt));
 
 	range_tree_stat_decr(rt, rs);
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
 
-	rs->rs_start = newstart;
-	rs->rs_end = newstart + newsize;
+	rs_set_start(rs, rt, newstart);
+	rs_set_end(rs, rt, newstart + newsize);
 
 	range_tree_stat_incr(rt, rs);
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
@@ -491,22 +572,27 @@
 static range_seg_t *
 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
 {
-	range_seg_t rsearch;
+	range_seg_max_t rsearch;
 	uint64_t end = start + size;
 
 	VERIFY(size != 0);
 
-	rsearch.rs_start = start;
-	rsearch.rs_end = end;
-	return (avl_find(&rt->rt_root, &rsearch, NULL));
+	rs_set_start(&rsearch, rt, start);
+	rs_set_end(&rsearch, rt, end);
+	return (zfs_btree_find(&rt->rt_root, &rsearch, NULL));
 }
 
 range_seg_t *
 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
 {
+	if (rt->rt_type == RANGE_SEG64)
+		ASSERT3U(start + size, >, start);
+
 	range_seg_t *rs = range_tree_find_impl(rt, start, size);
-	if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
+	if (rs != NULL && rs_get_start(rs, rt) <= start &&
+	    rs_get_end(rs, rt) >= start + size) {
 		return (rs);
+	}
 	return (NULL);
 }
 
@@ -525,6 +611,40 @@
 }
 
 /*
+ * Returns the first subset of the given range which overlaps with the range
+ * tree. Returns true if there is a segment in the range, and false if there
+ * isn't.
+ */
+boolean_t
+range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
+    uint64_t *ostart, uint64_t *osize)
+{
+	if (rt->rt_type == RANGE_SEG64)
+		ASSERT3U(start + size, >, start);
+
+	range_seg_max_t rsearch;
+	rs_set_start(&rsearch, rt, start);
+	rs_set_end_raw(&rsearch, rt, rs_get_start_raw(&rsearch, rt) + 1);
+
+	zfs_btree_index_t where;
+	range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where);
+	if (rs != NULL) {
+		*ostart = start;
+		*osize = MIN(size, rs_get_end(rs, rt) - start);
+		return (B_TRUE);
+	}
+
+	rs = zfs_btree_next(&rt->rt_root, &where, &where);
+	if (rs == NULL || rs_get_start(rs, rt) > start + size)
+		return (B_FALSE);
+
+	*ostart = rs_get_start(rs, rt);
+	*osize = MIN(start + size, rs_get_end(rs, rt)) -
+	    rs_get_start(rs, rt);
+	return (B_TRUE);
+}
+
+/*
  * Ensure that this range is not in the tree, regardless of whether
  * it is currently in the tree.
  */
@@ -536,9 +656,12 @@
 	if (size == 0)
 		return;
 
+	if (rt->rt_type == RANGE_SEG64)
+		ASSERT3U(start + size, >, start);
+
 	while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
-		uint64_t free_start = MAX(rs->rs_start, start);
-		uint64_t free_end = MIN(rs->rs_end, start + size);
+		uint64_t free_start = MAX(rs_get_start(rs, rt), start);
+		uint64_t free_end = MIN(rs_get_end(rs, rt), start + size);
 		range_tree_remove(rt, free_start, free_end - free_start);
 	}
 }
@@ -549,7 +672,7 @@
 	range_tree_t *rt;
 
 	ASSERT0(range_tree_space(*rtdst));
-	ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
+	ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root));
 
 	rt = *rtsrc;
 	*rtsrc = *rtdst;
@@ -559,16 +682,20 @@
 void
 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
 {
-	range_seg_t *rs;
-	void *cookie = NULL;
-
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL)
 		rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
 
-	while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
-		if (func != NULL)
-			func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
-		kmem_cache_free(range_seg_cache, rs);
+	if (func != NULL) {
+		range_seg_t *rs;
+		zfs_btree_index_t *cookie = NULL;
+
+		while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) !=
+		    NULL) {
+			func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) -
+			    rs_get_start(rs, rt));
+		}
+	} else {
+		zfs_btree_clear(&rt->rt_root);
 	}
 
 	bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
@@ -578,16 +705,18 @@
 void
 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
 {
-	range_seg_t *rs;
-
-	for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
-		func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
+	zfs_btree_index_t where;
+	for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where);
+	    rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) {
+		func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) -
+		    rs_get_start(rs, rt));
+	}
 }
 
 range_seg_t *
 range_tree_first(range_tree_t *rt)
 {
-	return (avl_first(&rt->rt_root));
+	return (zfs_btree_first(&rt->rt_root, NULL));
 }
 
 uint64_t
@@ -596,6 +725,12 @@
 	return (rt->rt_space);
 }
 
+uint64_t
+range_tree_numsegs(range_tree_t *rt)
+{
+	return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root));
+}
+
 boolean_t
 range_tree_is_empty(range_tree_t *rt)
 {
@@ -603,63 +738,109 @@
 	return (range_tree_space(rt) == 0);
 }
 
-/* Generic range tree functions for maintaining segments in an AVL tree. */
+/*
+ * Remove any overlapping ranges between the given segment [start, end)
+ * from removefrom. Add non-overlapping leftovers to addto.
+ */
 void
-rt_avl_create(range_tree_t *rt, void *arg)
+range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
+    range_tree_t *removefrom, range_tree_t *addto)
 {
-	avl_tree_t *tree = arg;
+	zfs_btree_index_t where;
+	range_seg_max_t starting_rs;
+	rs_set_start(&starting_rs, removefrom, start);
+	rs_set_end_raw(&starting_rs, removefrom, rs_get_start_raw(&starting_rs,
+	    removefrom) + 1);
 
-	avl_create(tree, rt->rt_avl_compare, sizeof (range_seg_t),
-	    offsetof(range_seg_t, rs_pp_node));
+	range_seg_t *curr = zfs_btree_find(&removefrom->rt_root,
+	    &starting_rs, &where);
+
+	if (curr == NULL)
+		curr = zfs_btree_next(&removefrom->rt_root, &where, &where);
+
+	range_seg_t *next;
+	for (; curr != NULL; curr = next) {
+		if (start == end)
+			return;
+		VERIFY3U(start, <, end);
+
+		/* there is no overlap */
+		if (end <= rs_get_start(curr, removefrom)) {
+			range_tree_add(addto, start, end - start);
+			return;
+		}
+
+		uint64_t overlap_start = MAX(rs_get_start(curr, removefrom),
+		    start);
+		uint64_t overlap_end = MIN(rs_get_end(curr, removefrom),
+		    end);
+		uint64_t overlap_size = overlap_end - overlap_start;
+		ASSERT3S(overlap_size, >, 0);
+		range_seg_max_t rs;
+		rs_copy(curr, &rs, removefrom);
+
+		range_tree_remove(removefrom, overlap_start, overlap_size);
+
+		if (start < overlap_start)
+			range_tree_add(addto, start, overlap_start - start);
+
+		start = overlap_end;
+		next = zfs_btree_find(&removefrom->rt_root, &rs, &where);
+		/*
+		 * If we find something here, we only removed part of the
+		 * curr segment. Either there's some left at the end
+		 * because we've reached the end of the range we're removing,
+		 * or there's some left at the start because we started
+		 * partway through the range.  Either way, we continue with
+		 * the loop. If it's the former, we'll return at the start of
+		 * the loop, and if it's the latter we'll see if there is more
+		 * area to process.
+		 */
+		if (next != NULL) {
+			ASSERT(start == end || start == rs_get_end(&rs,
+			    removefrom));
+		}
+
+		next = zfs_btree_next(&removefrom->rt_root, &where, &where);
+	}
+	VERIFY3P(curr, ==, NULL);
+
+	if (start != end) {
+		VERIFY3U(start, <, end);
+		range_tree_add(addto, start, end - start);
+	} else {
+		VERIFY3U(start, ==, end);
+	}
 }
 
+/*
+ * For each entry in rt, if it exists in removefrom, remove it
+ * from removefrom. Otherwise, add it to addto.
+ */
 void
-rt_avl_destroy(range_tree_t *rt, void *arg)
+range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
+    range_tree_t *addto)
 {
-	avl_tree_t *tree = arg;
-
-	ASSERT0(avl_numnodes(tree));
-	avl_destroy(tree);
-}
-
-void
-rt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
-	avl_tree_t *tree = arg;
-	avl_add(tree, rs);
-}
-
-void
-rt_avl_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
-	avl_tree_t *tree = arg;
-	avl_remove(tree, rs);
-}
-
-void
-rt_avl_vacate(range_tree_t *rt, void *arg)
-{
-	/*
-	 * Normally one would walk the tree freeing nodes along the way.
-	 * Since the nodes are shared with the range trees we can avoid
-	 * walking all nodes and just reinitialize the avl tree. The nodes
-	 * will be freed by the range tree, so we don't want to free them here.
-	 */
-	rt_avl_create(rt, arg);
+	zfs_btree_index_t where;
+	for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs;
+	    rs = zfs_btree_next(&rt->rt_root, &where, &where)) {
+		range_tree_remove_xor_add_segment(rs_get_start(rs, rt),
+		    rs_get_end(rs, rt), removefrom, addto);
+	}
 }
 
 uint64_t
 range_tree_min(range_tree_t *rt)
 {
-	range_seg_t *rs = avl_first(&rt->rt_root);
-	return (rs != NULL ? rs->rs_start : 0);
+	range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL);
+	return (rs != NULL ? rs_get_start(rs, rt) : 0);
 }
 
 uint64_t
 range_tree_max(range_tree_t *rt)
 {
-	range_seg_t *rs = avl_last(&rt->rt_root);
-	return (rs != NULL ? rs->rs_end : 0);
+	range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL);
+	return (rs != NULL ? rs_get_end(rs, rt) : 0);
 }
 
 uint64_t

diff --git a/zfs/module/zfs/refcount.c b/zfs/module/zfs/refcount.c
index a7e46d3..35a379d 100644
--- a/zfs/module/zfs/refcount.c
+++ b/zfs/module/zfs/refcount.c

@@ -20,17 +20,18 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2021 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
-#include <sys/refcount.h>
+#include <sys/zfs_refcount.h>
 
-#ifdef _KERNEL
-int reference_tracking_enable = FALSE; /* runs out of memory too easily */
-#else
-int reference_tracking_enable = TRUE;
-#endif
+/*
+ * Reference count tracking is disabled by default.  It's memory requirements
+ * are reasonable, however as implemented it consumes a significant amount of
+ * cpu time.  Until its performance is improved it should be manually enabled.
+ */
+int reference_tracking_enable = FALSE;
 int reference_history = 3; /* tunable */
 
 #ifdef	ZFS_DEBUG
@@ -86,7 +87,7 @@
 {
 	reference_t *ref;
 
-	ASSERT(rc->rc_count == number);
+	ASSERT3U(rc->rc_count, ==, number);
 	while ((ref = list_head(&rc->rc_list))) {
 		list_remove(&rc->rc_list, ref);
 		kmem_cache_free(reference_cache, ref);
@@ -111,13 +112,13 @@
 int
 zfs_refcount_is_zero(zfs_refcount_t *rc)
 {
-	return (rc->rc_count == 0);
+	return (zfs_refcount_count(rc) == 0);
 }
 
 int64_t
 zfs_refcount_count(zfs_refcount_t *rc)
 {
-	return (rc->rc_count);
+	return (atomic_load_64(&rc->rc_count));
 }
 
 int64_t
@@ -126,15 +127,18 @@
 	reference_t *ref = NULL;
 	int64_t count;
 
-	if (rc->rc_tracked) {
-		ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
-		ref->ref_holder = holder;
-		ref->ref_number = number;
+	if (!rc->rc_tracked) {
+		count = atomic_add_64_nv(&(rc)->rc_count, number);
+		ASSERT3U(count, >=, number);
+		return (count);
 	}
+
+	ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
+	ref->ref_holder = holder;
+	ref->ref_number = number;
 	mutex_enter(&rc->rc_mtx);
-	ASSERT(rc->rc_count >= 0);
-	if (rc->rc_tracked)
-		list_insert_head(&rc->rc_list, ref);
+	ASSERT3U(rc->rc_count, >=, 0);
+	list_insert_head(&rc->rc_list, ref);
 	rc->rc_count += number;
 	count = rc->rc_count;
 	mutex_exit(&rc->rc_mtx);
@@ -155,16 +159,14 @@
 	reference_t *ref;
 	int64_t count;
 
-	mutex_enter(&rc->rc_mtx);
-	ASSERT(rc->rc_count >= number);
-
 	if (!rc->rc_tracked) {
-		rc->rc_count -= number;
-		count = rc->rc_count;
-		mutex_exit(&rc->rc_mtx);
+		count = atomic_add_64_nv(&(rc)->rc_count, -number);
+		ASSERT3S(count, >=, 0);
 		return (count);
 	}
 
+	mutex_enter(&rc->rc_mtx);
+	ASSERT3U(rc->rc_count, >=, number);
 	for (ref = list_head(&rc->rc_list); ref;
 	    ref = list_next(&rc->rc_list, ref)) {
 		if (ref->ref_holder == holder && ref->ref_number == number) {
@@ -241,12 +243,10 @@
 	reference_t *ref;
 	boolean_t found = B_FALSE;
 
-	mutex_enter(&rc->rc_mtx);
-	if (!rc->rc_tracked) {
-		mutex_exit(&rc->rc_mtx);
+	if (!rc->rc_tracked)
 		return;
-	}
 
+	mutex_enter(&rc->rc_mtx);
 	for (ref = list_head(&rc->rc_list); ref;
 	    ref = list_next(&rc->rc_list, ref)) {
 		if (ref->ref_holder == current_holder &&
@@ -278,13 +278,10 @@
 {
 	reference_t *ref;
 
+	if (!rc->rc_tracked)
+		return (zfs_refcount_count(rc) > 0);
+
 	mutex_enter(&rc->rc_mtx);
-
-	if (!rc->rc_tracked) {
-		mutex_exit(&rc->rc_mtx);
-		return (rc->rc_count > 0);
-	}
-
 	for (ref = list_head(&rc->rc_list); ref;
 	    ref = list_next(&rc->rc_list, ref)) {
 		if (ref->ref_holder == holder) {
@@ -306,13 +303,10 @@
 {
 	reference_t *ref;
 
-	mutex_enter(&rc->rc_mtx);
-
-	if (!rc->rc_tracked) {
-		mutex_exit(&rc->rc_mtx);
+	if (!rc->rc_tracked)
 		return (B_TRUE);
-	}
 
+	mutex_enter(&rc->rc_mtx);
 	for (ref = list_head(&rc->rc_list); ref;
 	    ref = list_next(&rc->rc_list, ref)) {
 		if (ref->ref_holder == holder) {
@@ -323,4 +317,20 @@
 	mutex_exit(&rc->rc_mtx);
 	return (B_TRUE);
 }
+
+EXPORT_SYMBOL(zfs_refcount_create);
+EXPORT_SYMBOL(zfs_refcount_destroy);
+EXPORT_SYMBOL(zfs_refcount_is_zero);
+EXPORT_SYMBOL(zfs_refcount_count);
+EXPORT_SYMBOL(zfs_refcount_add);
+EXPORT_SYMBOL(zfs_refcount_remove);
+EXPORT_SYMBOL(zfs_refcount_held);
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, ,reference_tracking_enable, INT, ZMOD_RW,
+	"Track reference holders to refcount_t objects");
+
+ZFS_MODULE_PARAM(zfs, ,reference_history, INT, ZMOD_RW,
+	"Maximum reference holders being tracked");
+/* END CSTYLED */
 #endif	/* ZFS_DEBUG */

diff --git a/zfs/module/zfs/rrwlock.c b/zfs/module/zfs/rrwlock.c
index 582b40a..d23fc3a 100644
--- a/zfs/module/zfs/rrwlock.c
+++ b/zfs/module/zfs/rrwlock.c

@@ -26,8 +26,8 @@
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
-#include <sys/refcount.h>
 #include <sys/rrwlock.h>
+#include <sys/trace_zfs.h>
 
 /*
  * This file contains the implementation of a re-entrant read
@@ -163,7 +163,7 @@
 rrw_enter_read_impl(rrwlock_t *rrl, boolean_t prio, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
-#if !defined(DEBUG) && defined(_KERNEL)
+#if !defined(ZFS_DEBUG) && defined(_KERNEL)
 	if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted &&
 	    !rrl->rr_track_all) {
 		rrl->rr_anon_rcount.rc_count++;
@@ -240,7 +240,7 @@
 rrw_exit(rrwlock_t *rrl, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
-#if !defined(DEBUG) && defined(_KERNEL)
+#if !defined(ZFS_DEBUG) && defined(_KERNEL)
 	if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) {
 		rrl->rr_anon_rcount.rc_count--;
 		if (rrl->rr_anon_rcount.rc_count == 0)

diff --git a/zfs/module/zfs/sa.c b/zfs/module/zfs/sa.c
index 6218383..b69b0c6 100644
--- a/zfs/module/zfs/sa.c
+++ b/zfs/module/zfs/sa.c

@@ -39,7 +39,6 @@
 #include <sys/sa.h>
 #include <sys/sunddi.h>
 #include <sys/sa_impl.h>
-#include <sys/dnode.h>
 #include <sys/errno.h>
 #include <sys/zfs_context.h>
 
@@ -213,20 +212,20 @@
 static int sa_legacy_attr_count = ARRAY_SIZE(sa_legacy_attrs);
 static kmem_cache_t *sa_cache = NULL;
 
-/*ARGSUSED*/
 static int
 sa_cache_constructor(void *buf, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	sa_handle_t *hdl = buf;
 
 	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
 	return (0);
 }
 
-/*ARGSUSED*/
 static void
 sa_cache_destructor(void *buf, void *unused)
 {
+	(void) unused;
 	sa_handle_t *hdl = buf;
 	mutex_destroy(&hdl->sa_lock);
 }
@@ -252,7 +251,7 @@
 	const sa_lot_t *node1 = (const sa_lot_t *)arg1;
 	const sa_lot_t *node2 = (const sa_lot_t *)arg2;
 
-	return (AVL_CMP(node1->lot_num, node2->lot_num));
+	return (TREE_CMP(node1->lot_num, node2->lot_num));
 }
 
 static int
@@ -261,14 +260,14 @@
 	const sa_lot_t *node1 = (const sa_lot_t *)arg1;
 	const sa_lot_t *node2 = (const sa_lot_t *)arg2;
 
-	int cmp = AVL_CMP(node1->lot_hash, node2->lot_hash);
+	int cmp = TREE_CMP(node1->lot_hash, node2->lot_hash);
 	if (likely(cmp))
 		return (cmp);
 
-	return (AVL_CMP(node1->lot_instance, node2->lot_instance));
+	return (TREE_CMP(node1->lot_instance, node2->lot_instance));
 }
 
-boolean_t
+static boolean_t
 sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
 {
 	int i;
@@ -318,7 +317,7 @@
  *
  * Operates on bulk array, first failure will abort further processing
  */
-int
+static int
 sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
     sa_data_op_t data_op, dmu_tx_t *tx)
 {
@@ -1156,7 +1155,7 @@
 	os->os_sa = NULL;
 }
 
-void
+static void
 sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
     uint16_t length, int length_idx, boolean_t var_length, void *userp)
 {
@@ -1219,25 +1218,25 @@
 	}
 }
 
-/*ARGSUSED*/
-void
+static void
 sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
     uint16_t length, int length_idx, boolean_t variable_length, void *userp)
 {
+	(void) hdr, (void) length_idx, (void) variable_length;
 	sa_handle_t *hdl = userp;
 	sa_os_t *sa = hdl->sa_os->os_sa;
 
 	sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
 }
 
-void
+static void
 sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
 {
 	sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
 	dmu_buf_impl_t *db;
 	int num_lengths = 1;
 	int i;
-	ASSERTV(sa_os_t *sa = hdl->sa_os->os_sa);
+	sa_os_t *sa __maybe_unused = hdl->sa_os->os_sa;
 
 	ASSERT(MUTEX_HELD(&sa->sa_lock));
 	if (sa_hdr_phys->sa_magic == SA_MAGIC)
@@ -1293,7 +1292,7 @@
 			mutex_exit(&sa->sa_lock);
 			zfs_dbgmsg("Buffer Header: %x != SA_MAGIC:%x "
 			    "object=%#llx\n", sa_hdr_phys->sa_magic, SA_MAGIC,
-			    db->db.db_object);
+			    (u_longlong_t)db->db.db_object);
 			return (SET_ERROR(EIO));
 		}
 		sa_byteswap(hdl, buftype);
@@ -1310,10 +1309,10 @@
 	return (0);
 }
 
-/*ARGSUSED*/
 static void
 sa_evict_sync(void *dbu)
 {
+	(void) dbu;
 	panic("evicting sa dbuf\n");
 }
 
@@ -1344,7 +1343,7 @@
 static void
 sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
 {
-	ASSERTV(sa_os_t *sa = os->os_sa);
+	sa_os_t *sa __maybe_unused = os->os_sa;
 
 	ASSERT(MUTEX_HELD(&sa->sa_lock));
 	(void) zfs_refcount_add(&idx_tab->sa_refcount, NULL);
@@ -1462,7 +1461,7 @@
 	dmu_buf_rele(db, tag);
 }
 
-int
+static int
 sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
 {
 	ASSERT(hdl);
@@ -1503,7 +1502,7 @@
 
 #ifdef _KERNEL
 int
-sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
+sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, zfs_uio_t *uio)
 {
 	int error;
 	sa_bulk_attr_t bulk;
@@ -1516,8 +1515,8 @@
 
 	mutex_enter(&hdl->sa_lock);
 	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
-		error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
-		    uio->uio_resid), UIO_READ, uio);
+		error = zfs_uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
+		    zfs_uio_resid(uio)), UIO_READ, uio);
 	}
 	mutex_exit(&hdl->sa_lock);
 	return (error);
@@ -1586,7 +1585,7 @@
 		    &ctime, 16);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL,
 		    &crtime, 16);
-		if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
+		if (Z_ISBLK(ZTOTYPE(zp)) || Z_ISCHR(ZTOTYPE(zp)))
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
 			    &rdev, 8);
 	} else {
@@ -1625,7 +1624,7 @@
 
 	zp->z_projid = projid;
 	zp->z_pflags |= ZFS_PROJID;
-	links = ZTOI(zp)->i_nlink;
+	links = ZTONLNK(zp);
 	count = 0;
 	err = 0;
 
@@ -1646,7 +1645,7 @@
 	SA_ADD_BULK_ATTR(attrs, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
 	SA_ADD_BULK_ATTR(attrs, count, SA_ZPL_PROJID(zfsvfs), NULL, &projid, 8);
 
-	if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
+	if (Z_ISBLK(ZTOTYPE(zp)) || Z_ISCHR(ZTOTYPE(zp)))
 		SA_ADD_BULK_ATTR(attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
 		    &rdev, 8);
 

diff --git a/zfs/module/zfs/sha256.c b/zfs/module/zfs/sha256.c
index 2adadf5..c5b033c 100644
--- a/zfs/module/zfs/sha256.c
+++ b/zfs/module/zfs/sha256.c

@@ -28,9 +28,10 @@
  */
 #include <sys/zfs_context.h>
 #include <sys/zio.h>
+#include <sys/zio_checksum.h>
 #include <sys/sha2.h>
 #include <sys/abd.h>
-#include "qat.h"
+#include <sys/qat.h>
 
 static int
 sha_incremental(void *buf, size_t size, void *arg)
@@ -40,11 +41,11 @@
 	return (0);
 }
 
-/*ARGSUSED*/
 void
 abd_checksum_SHA256(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) ctx_template;
 	int ret;
 	SHA2_CTX ctx;
 	zio_cksum_t tmp;
@@ -77,11 +78,11 @@
 	zcp->zc_word[3] = BE_64(tmp.zc_word[3]);
 }
 
-/*ARGSUSED*/
 void
 abd_checksum_SHA512_native(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) ctx_template;
 	SHA2_CTX	ctx;
 
 	SHA2Init(SHA512_256, &ctx);
@@ -89,7 +90,6 @@
 	SHA2Final(zcp, &ctx);
 }
 
-/*ARGSUSED*/
 void
 abd_checksum_SHA512_byteswap(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)

diff --git a/zfs/module/zfs/skein_zfs.c b/zfs/module/zfs/skein_zfs.c
index 8deb84b..11b9940 100644
--- a/zfs/module/zfs/skein_zfs.c
+++ b/zfs/module/zfs/skein_zfs.c

@@ -24,6 +24,7 @@
  */
 #include <sys/zfs_context.h>
 #include <sys/zio.h>
+#include <sys/zio_checksum.h>
 #include <sys/skein.h>
 
 #include <sys/abd.h>

diff --git a/zfs/module/zfs/spa.c b/zfs/module/zfs/spa.c
index 4f38d28..5f238e6 100644
--- a/zfs/module/zfs/spa.c
+++ b/zfs/module/zfs/spa.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2018, Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -32,6 +32,8 @@
  * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
  * Copyright 2017 Joyent, Inc.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
+ * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
  */
 
 /*
@@ -57,8 +59,10 @@
 #include <sys/vdev_indirect_mapping.h>
 #include <sys/vdev_indirect_births.h>
 #include <sys/vdev_initialize.h>
+#include <sys/vdev_rebuild.h>
 #include <sys/vdev_trim.h>
 #include <sys/vdev_disk.h>
+#include <sys/vdev_draid.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
 #include <sys/mmp.h>
@@ -105,6 +109,7 @@
 typedef enum zti_modes {
 	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
 	ZTI_MODE_BATCH,			/* cpu-intensive; value is ignored */
+	ZTI_MODE_SCALE,			/* Taskqs scale with CPUs. */
 	ZTI_MODE_NULL,			/* don't create a taskq */
 	ZTI_NMODES
 } zti_modes_t;
@@ -112,6 +117,7 @@
 #define	ZTI_P(n, q)	{ ZTI_MODE_FIXED, (n), (q) }
 #define	ZTI_PCT(n)	{ ZTI_MODE_ONLINE_PERCENT, (n), 1 }
 #define	ZTI_BATCH	{ ZTI_MODE_BATCH, 0, 1 }
+#define	ZTI_SCALE	{ ZTI_MODE_SCALE, 0, 1 }
 #define	ZTI_NULL	{ ZTI_MODE_NULL, 0, 0 }
 
 #define	ZTI_N(n)	ZTI_P(n, 1)
@@ -138,7 +144,8 @@
  * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
  * additional degree of parallelism specified by the number of threads per-
  * taskq and the number of taskqs; when dispatching an event in this case, the
- * particular taskq is chosen at random.
+ * particular taskq is chosen at random. ZTI_SCALE is similar to ZTI_BATCH,
+ * but with number of taskqs also scaling with number of CPUs.
  *
  * The different taskq priorities are to handle the different contexts (issue
  * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
@@ -147,9 +154,9 @@
 const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* NULL */
-	{ ZTI_N(8),	ZTI_NULL,	ZTI_P(12, 8),	ZTI_NULL }, /* READ */
-	{ ZTI_BATCH,	ZTI_N(5),	ZTI_P(12, 8),	ZTI_N(5) }, /* WRITE */
-	{ ZTI_P(12, 8),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
+	{ ZTI_N(8),	ZTI_NULL,	ZTI_SCALE,	ZTI_NULL }, /* READ */
+	{ ZTI_BATCH,	ZTI_N(5),	ZTI_SCALE,	ZTI_N(5) }, /* WRITE */
+	{ ZTI_SCALE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* CLAIM */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* IOCTL */
 	{ ZTI_N(4),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* TRIM */
@@ -161,7 +168,8 @@
 static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport);
 static void spa_vdev_resilver_done(spa_t *spa);
 
-uint_t		zio_taskq_batch_pct = 75;	/* 1 thread per cpu in pset */
+uint_t		zio_taskq_batch_pct = 80;	/* 1 thread per cpu in pset */
+uint_t		zio_taskq_batch_tpq;		/* threads per taskq */
 boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
 uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
 
@@ -174,6 +182,12 @@
 boolean_t	spa_load_verify_dryrun = B_FALSE;
 
 /*
+ * Allow read spacemaps in case of readonly import (spa_mode == SPA_MODE_READ).
+ * This is used by zdb for spacemaps verification.
+ */
+boolean_t	spa_mode_readable_spacemaps = B_FALSE;
+
+/*
  * This (illegal) pool name is used when temporarily importing a spa_t in order
  * to get the vdev stats associated with the imported devices.
  */
@@ -234,6 +248,27 @@
 boolean_t	zfs_pause_spa_sync = B_FALSE;
 
 /*
+ * Variables to indicate the livelist condense zthr func should wait at certain
+ * points for the livelist to be removed - used to test condense/destroy races
+ */
+int zfs_livelist_condense_zthr_pause = 0;
+int zfs_livelist_condense_sync_pause = 0;
+
+/*
+ * Variables to track whether or not condense cancellation has been
+ * triggered in testing.
+ */
+int zfs_livelist_condense_sync_cancel = 0;
+int zfs_livelist_condense_zthr_cancel = 0;
+
+/*
+ * Variable to track whether or not extra ALLOC blkptrs were added to a
+ * livelist entry while it was being condensed (caused by the way we track
+ * remapped blkptrs in dbuf_remap_impl)
+ */
+int zfs_livelist_condense_new_alloc = 0;
+
+/*
  * ==========================================================================
  * SPA properties routines
  * ==========================================================================
@@ -249,15 +284,15 @@
 	const char *propname = zpool_prop_to_name(prop);
 	nvlist_t *propval;
 
-	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
+	propval = fnvlist_alloc();
+	fnvlist_add_uint64(propval, ZPROP_SOURCE, src);
 
 	if (strval != NULL)
-		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
+		fnvlist_add_string(propval, ZPROP_VALUE, strval);
 	else
-		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
+		fnvlist_add_uint64(propval, ZPROP_VALUE, intval);
 
-	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
+	fnvlist_add_nvlist(nvl, propname, propval);
 	nvlist_free(propval);
 }
 
@@ -280,10 +315,12 @@
 		alloc = metaslab_class_get_alloc(mc);
 		alloc += metaslab_class_get_alloc(spa_special_class(spa));
 		alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
+		alloc += metaslab_class_get_alloc(spa_embedded_log_class(spa));
 
 		size = metaslab_class_get_space(mc);
 		size += metaslab_class_get_space(spa_special_class(spa));
 		size += metaslab_class_get_space(spa_dedup_class(spa));
+		size += metaslab_class_get_space(spa_embedded_log_class(spa));
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
@@ -298,7 +335,7 @@
 		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL,
 		    metaslab_class_expandable_space(mc), src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
-		    (spa_mode(spa) == FREAD), src);
+		    (spa_mode(spa) == SPA_MODE_READ), src);
 
 		cap = (size == 0) ? 0 : (alloc * 100 / size);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
@@ -352,6 +389,11 @@
 		    0, ZPROP_SRC_LOCAL);
 	}
 
+	if (spa->spa_compatibility != NULL) {
+		spa_prop_add_list(*nvp, ZPOOL_PROP_COMPATIBILITY,
+		    spa->spa_compatibility, 0, ZPROP_SRC_LOCAL);
+	}
+
 	if (spa->spa_root != NULL)
 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
 		    0, ZPROP_SRC_LOCAL);
@@ -392,12 +434,15 @@
 	objset_t *mos = spa->spa_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
+	dsl_pool_t *dp;
 	int err;
 
 	err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
 	if (err)
 		return (err);
 
+	dp = spa_get_dsl(spa);
+	dsl_pool_config_enter(dp, FTAG);
 	mutex_enter(&spa->spa_props_lock);
 
 	/*
@@ -406,10 +451,8 @@
 	spa_prop_get_config(spa, nvp);
 
 	/* If no pool property object, no more prop to get. */
-	if (mos == NULL || spa->spa_pool_props_object == 0) {
-		mutex_exit(&spa->spa_props_lock);
+	if (mos == NULL || spa->spa_pool_props_object == 0)
 		goto out;
-	}
 
 	/*
 	 * Get properties from the MOS pool property object.
@@ -433,23 +476,17 @@
 				src = ZPROP_SRC_LOCAL;
 
 			if (prop == ZPOOL_PROP_BOOTFS) {
-				dsl_pool_t *dp;
 				dsl_dataset_t *ds = NULL;
 
-				dp = spa_get_dsl(spa);
-				dsl_pool_config_enter(dp, FTAG);
 				err = dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &ds);
-				if (err != 0) {
-					dsl_pool_config_exit(dp, FTAG);
+				if (err != 0)
 					break;
-				}
 
 				strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
 				    KM_SLEEP);
 				dsl_dataset_name(ds, strval);
 				dsl_dataset_rele(ds, FTAG);
-				dsl_pool_config_exit(dp, FTAG);
 			} else {
 				strval = NULL;
 				intval = za.za_first_integer;
@@ -480,8 +517,9 @@
 		}
 	}
 	zap_cursor_fini(&zc);
-	mutex_exit(&spa->spa_props_lock);
 out:
+	mutex_exit(&spa->spa_props_lock);
+	dsl_pool_config_exit(dp, FTAG);
 	if (err && err != ENOENT) {
 		nvlist_free(*nvp);
 		*nvp = NULL;
@@ -603,7 +641,6 @@
 
 			if (!error) {
 				objset_t *os;
-				uint64_t propval;
 
 				if (strval == NULL || strval[0] == '\0') {
 					objnum = zpool_prop_default_numeric(
@@ -615,27 +652,9 @@
 				if (error != 0)
 					break;
 
-				/*
-				 * Must be ZPL, and its property settings
-				 * must be supported by GRUB (compression
-				 * is not gzip, and large dnodes are not
-				 * used).
-				 */
-
+				/* Must be ZPL. */
 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
 					error = SET_ERROR(ENOTSUP);
-				} else if ((error =
-				    dsl_prop_get_int_ds(dmu_objset_ds(os),
-				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
-				    &propval)) == 0 &&
-				    !BOOTFS_COMPRESS_VALID(propval)) {
-					error = SET_ERROR(ENOTSUP);
-				} else if ((error =
-				    dsl_prop_get_int_ds(dmu_objset_ds(os),
-				    zfs_prop_to_name(ZFS_PROP_DNODESIZE),
-				    &propval)) == 0 &&
-				    propval != ZFS_DNSIZE_LEGACY) {
-					error = SET_ERROR(ENOTSUP);
 				} else {
 					objnum = dmu_objset_id(os);
 				}
@@ -700,16 +719,6 @@
 				error = SET_ERROR(E2BIG);
 			break;
 
-		case ZPOOL_PROP_DEDUPDITTO:
-			if (spa_version(spa) < SPA_VERSION_DEDUP)
-				error = SET_ERROR(ENOTSUP);
-			else
-				error = nvpair_value_uint64(elem, &intval);
-			if (error == 0 &&
-			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
-				error = SET_ERROR(EINVAL);
-			break;
-
 		default:
 			break;
 		}
@@ -718,6 +727,9 @@
 			break;
 	}
 
+	(void) nvlist_remove_all(props,
+	    zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO));
+
 	if (!error && reset_bootfs) {
 		error = nvlist_remove(props,
 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
@@ -829,11 +841,10 @@
 	}
 }
 
-/*ARGSUSED*/
 static int
 spa_change_guid_check(void *arg, dmu_tx_t *tx)
 {
-	ASSERTV(uint64_t *newguid = arg);
+	uint64_t *newguid __maybe_unused = arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t vdev_state;
@@ -873,7 +884,7 @@
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
-	    oldguid, *newguid);
+	    (u_longlong_t)oldguid, (u_longlong_t)*newguid);
 }
 
 /*
@@ -899,7 +910,16 @@
 	    spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
 
 	if (error == 0) {
-		spa_write_cachefile(spa, B_FALSE, B_TRUE);
+		/*
+		 * Clear the kobj flag from all the vdevs to allow
+		 * vdev_cache_process_kobj_evt() to post events to all the
+		 * vdevs since GUID is updated.
+		 */
+		vdev_clear_kobj_evt(spa->spa_root_vdev);
+		for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
+			vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]);
+
+		spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
 		spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
 	}
 
@@ -925,7 +945,7 @@
 	ret = memcmp(&sa->se_bookmark, &sb->se_bookmark,
 	    sizeof (zbookmark_phys_t));
 
-	return (AVL_ISIGN(ret));
+	return (TREE_ISIGN(ret));
 }
 
 /*
@@ -956,25 +976,12 @@
 	uint_t value = ztip->zti_value;
 	uint_t count = ztip->zti_count;
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
-	uint_t flags = 0;
+	uint_t cpus, flags = TASKQ_DYNAMIC;
 	boolean_t batch = B_FALSE;
 
-	if (mode == ZTI_MODE_NULL) {
-		tqs->stqs_count = 0;
-		tqs->stqs_taskq = NULL;
-		return;
-	}
-
-	ASSERT3U(count, >, 0);
-
-	tqs->stqs_count = count;
-	tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
-
 	switch (mode) {
 	case ZTI_MODE_FIXED:
-		ASSERT3U(value, >=, 1);
-		value = MAX(value, 1);
-		flags |= TASKQ_DYNAMIC;
+		ASSERT3U(value, >, 0);
 		break;
 
 	case ZTI_MODE_BATCH:
@@ -983,6 +990,48 @@
 		value = MIN(zio_taskq_batch_pct, 100);
 		break;
 
+	case ZTI_MODE_SCALE:
+		flags |= TASKQ_THREADS_CPU_PCT;
+		/*
+		 * We want more taskqs to reduce lock contention, but we want
+		 * less for better request ordering and CPU utilization.
+		 */
+		cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
+		if (zio_taskq_batch_tpq > 0) {
+			count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) /
+			    zio_taskq_batch_tpq);
+		} else {
+			/*
+			 * Prefer 6 threads per taskq, but no more taskqs
+			 * than threads in them on large systems. For 80%:
+			 *
+			 *                 taskq   taskq   total
+			 * cpus    taskqs  percent threads threads
+			 * ------- ------- ------- ------- -------
+			 * 1       1       80%     1       1
+			 * 2       1       80%     1       1
+			 * 4       1       80%     3       3
+			 * 8       2       40%     3       6
+			 * 16      3       27%     4       12
+			 * 32      5       16%     5       25
+			 * 64      7       11%     7       49
+			 * 128     10      8%      10      100
+			 * 256     14      6%      15      210
+			 */
+			count = 1 + cpus / 6;
+			while (count * count > cpus)
+				count--;
+		}
+		/* Limit each taskq within 100% to not trigger assertion. */
+		count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
+		value = (zio_taskq_batch_pct + count / 2) / count;
+		break;
+
+	case ZTI_MODE_NULL:
+		tqs->stqs_count = 0;
+		tqs->stqs_taskq = NULL;
+		return;
+
 	default:
 		panic("unrecognized mode for %s_%s taskq (%u:%u) in "
 		    "spa_activate()",
@@ -990,12 +1039,20 @@
 		break;
 	}
 
+	ASSERT3U(count, >, 0);
+	tqs->stqs_count = count;
+	tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
+
 	for (uint_t i = 0; i < count; i++) {
 		taskq_t *tq;
 		char name[32];
 
-		(void) snprintf(name, sizeof (name), "%s_%s",
-		    zio_type_name[t], zio_taskq_types[q]);
+		if (count > 1)
+			(void) snprintf(name, sizeof (name), "%s_%s_%u",
+			    zio_type_name[t], zio_taskq_types[q], i);
+		else
+			(void) snprintf(name, sizeof (name), "%s_%s",
+			    zio_type_name[t], zio_taskq_types[q]);
 
 		if (zio_taskq_sysdc && spa->spa_proc != &p0) {
 			if (batch)
@@ -1008,13 +1065,25 @@
 			/*
 			 * The write issue taskq can be extremely CPU
 			 * intensive.  Run it at slightly less important
-			 * priority than the other taskqs.  Under Linux this
-			 * means incrementing the priority value on platforms
-			 * like illumos it should be decremented.
+			 * priority than the other taskqs.
+			 *
+			 * Under Linux and FreeBSD this means incrementing
+			 * the priority value as opposed to platforms like
+			 * illumos where it should be decremented.
+			 *
+			 * On FreeBSD, if priorities divided by four (RQ_PPQ)
+			 * are equal then a difference between them is
+			 * insignificant.
 			 */
-			if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
+			if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) {
+#if defined(__linux__)
 				pri++;
-
+#elif defined(__FreeBSD__)
+				pri += 4;
+#else
+#error "unknown OS"
+#endif
+			}
 			tq = taskq_create_proc(name, value, pri, 50,
 			    INT_MAX, spa->spa_proc, flags);
 		}
@@ -1182,15 +1251,18 @@
  * Activate an uninitialized pool.
  */
 static void
-spa_activate(spa_t *spa, int mode)
+spa_activate(spa_t *spa, spa_mode_t mode)
 {
 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
 
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_mode = mode;
+	spa->spa_read_spacemaps = spa_mode_readable_spacemaps;
 
 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
+	spa->spa_embedded_log_class =
+	    metaslab_class_create(spa, zfs_metaslab_ops);
 	spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops);
 	spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops);
 
@@ -1256,7 +1328,7 @@
 	/*
 	 * This taskq is used to perform zvol-minor-related tasks
 	 * asynchronously. This has several advantages, including easy
-	 * resolution of various deadlocks (zfsonlinux bug #3681).
+	 * resolution of various deadlocks.
 	 *
 	 * The taskq must be single threaded to ensure tasks are always
 	 * processed in the order in which they were dispatched.
@@ -1276,15 +1348,15 @@
 	 * pool traverse code from monopolizing the global (and limited)
 	 * system_taskq by inappropriately scheduling long running tasks on it.
 	 */
-	spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus,
-	    defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
+	spa->spa_prefetch_taskq = taskq_create("z_prefetch", 100,
+	    defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
 
 	/*
 	 * The taskq to upgrade datasets in this pool. Currently used by
 	 * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA.
 	 */
-	spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus,
-	    defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
+	spa->spa_upgrade_taskq = taskq_create("z_upgrade", 100,
+	    defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
 }
 
 /*
@@ -1342,6 +1414,9 @@
 	metaslab_class_destroy(spa->spa_log_class);
 	spa->spa_log_class = NULL;
 
+	metaslab_class_destroy(spa->spa_embedded_log_class);
+	spa->spa_embedded_log_class = NULL;
+
 	metaslab_class_destroy(spa->spa_special_class);
 	spa->spa_special_class = NULL;
 
@@ -1392,7 +1467,7 @@
  * in the CLOSED state.  This will prep the pool before open/creation/import.
  * All vdev validation is done by the vdev_alloc() routine.
  */
-static int
+int
 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
     uint_t id, int atype)
 {
@@ -1433,29 +1508,130 @@
 	return (0);
 }
 
+static boolean_t
+spa_should_flush_logs_on_unload(spa_t *spa)
+{
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+		return (B_FALSE);
+
+	if (!spa_writeable(spa))
+		return (B_FALSE);
+
+	if (!spa->spa_sync_on)
+		return (B_FALSE);
+
+	if (spa_state(spa) != POOL_STATE_EXPORTED)
+		return (B_FALSE);
+
+	if (zfs_keep_log_spacemaps_at_export)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * Opens a transaction that will set the flag that will instruct
+ * spa_sync to attempt to flush all the metaslabs for that txg.
+ */
+static void
+spa_unload_log_sm_flush_all(spa_t *spa)
+{
+	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+
+	ASSERT3U(spa->spa_log_flushall_txg, ==, 0);
+	spa->spa_log_flushall_txg = dmu_tx_get_txg(tx);
+
+	dmu_tx_commit(tx);
+	txg_wait_synced(spa_get_dsl(spa), spa->spa_log_flushall_txg);
+}
+
+static void
+spa_unload_log_sm_metadata(spa_t *spa)
+{
+	void *cookie = NULL;
+	spa_log_sm_t *sls;
+	while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
+	    &cookie)) != NULL) {
+		VERIFY0(sls->sls_mscount);
+		kmem_free(sls, sizeof (spa_log_sm_t));
+	}
+
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e != NULL; e = list_head(&spa->spa_log_summary)) {
+		VERIFY0(e->lse_mscount);
+		list_remove(&spa->spa_log_summary, e);
+		kmem_free(e, sizeof (log_summary_entry_t));
+	}
+
+	spa->spa_unflushed_stats.sus_nblocks = 0;
+	spa->spa_unflushed_stats.sus_memused = 0;
+	spa->spa_unflushed_stats.sus_blocklimit = 0;
+}
+
+static void
+spa_destroy_aux_threads(spa_t *spa)
+{
+	if (spa->spa_condense_zthr != NULL) {
+		zthr_destroy(spa->spa_condense_zthr);
+		spa->spa_condense_zthr = NULL;
+	}
+	if (spa->spa_checkpoint_discard_zthr != NULL) {
+		zthr_destroy(spa->spa_checkpoint_discard_zthr);
+		spa->spa_checkpoint_discard_zthr = NULL;
+	}
+	if (spa->spa_livelist_delete_zthr != NULL) {
+		zthr_destroy(spa->spa_livelist_delete_zthr);
+		spa->spa_livelist_delete_zthr = NULL;
+	}
+	if (spa->spa_livelist_condense_zthr != NULL) {
+		zthr_destroy(spa->spa_livelist_condense_zthr);
+		spa->spa_livelist_condense_zthr = NULL;
+	}
+}
+
 /*
  * Opposite of spa_load().
  */
 static void
 spa_unload(spa_t *spa)
 {
-	int i;
-
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+	ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED);
 
 	spa_import_progress_remove(spa_guid(spa));
 	spa_load_note(spa, "UNLOADING");
 
-	/*
-	 * Stop async tasks.
-	 */
-	spa_async_suspend(spa);
+	spa_wake_waiters(spa);
 
-	if (spa->spa_root_vdev) {
-		vdev_t *root_vdev = spa->spa_root_vdev;
-		vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE);
-		vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
-		vdev_autotrim_stop_all(spa);
+	/*
+	 * If we have set the spa_final_txg, we have already performed the
+	 * tasks below in spa_export_common(). We should not redo it here since
+	 * we delay the final TXGs beyond what spa_final_txg is set at.
+	 */
+	if (spa->spa_final_txg == UINT64_MAX) {
+		/*
+		 * If the log space map feature is enabled and the pool is
+		 * getting exported (but not destroyed), we want to spend some
+		 * time flushing as many metaslabs as we can in an attempt to
+		 * destroy log space maps and save import time.
+		 */
+		if (spa_should_flush_logs_on_unload(spa))
+			spa_unload_log_sm_flush_all(spa);
+
+		/*
+		 * Stop async tasks.
+		 */
+		spa_async_suspend(spa);
+
+		if (spa->spa_root_vdev) {
+			vdev_t *root_vdev = spa->spa_root_vdev;
+			vdev_initialize_stop_all(root_vdev,
+			    VDEV_INITIALIZE_ACTIVE);
+			vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
+			vdev_autotrim_stop_all(spa);
+			vdev_rebuild_stop_all(spa);
+		}
 	}
 
 	/*
@@ -1467,16 +1643,15 @@
 	}
 
 	/*
-	 * Even though vdev_free() also calls vdev_metaslab_fini, we need
-	 * to call it earlier, before we wait for async i/o to complete.
-	 * This ensures that there is no async metaslab prefetching, by
-	 * calling taskq_wait(mg_taskq).
+	 * This ensures that there is no async metaslab prefetching
+	 * while we attempt to unload the spa.
 	 */
 	if (spa->spa_root_vdev != NULL) {
-		spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
-		for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++)
-			vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]);
-		spa_config_exit(spa, SCL_ALL, spa);
+		for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
+			vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
+			if (vc->vdev_mg != NULL)
+				taskq_wait(vc->vdev_mg->mg_taskq);
+		}
 	}
 
 	if (spa->spa_mmp.mmp_thread)
@@ -1497,15 +1672,7 @@
 		spa->spa_vdev_removal = NULL;
 	}
 
-	if (spa->spa_condense_zthr != NULL) {
-		zthr_destroy(spa->spa_condense_zthr);
-		spa->spa_condense_zthr = NULL;
-	}
-
-	if (spa->spa_checkpoint_discard_zthr != NULL) {
-		zthr_destroy(spa->spa_checkpoint_discard_zthr);
-		spa->spa_checkpoint_discard_zthr = NULL;
-	}
+	spa_destroy_aux_threads(spa);
 
 	spa_condense_fini(spa);
 
@@ -1530,13 +1697,14 @@
 	}
 
 	ddt_unload(spa);
+	spa_unload_log_sm_metadata(spa);
 
 	/*
 	 * Drop and purge level 2 cache
 	 */
 	spa_l2cache_drop(spa);
 
-	for (i = 0; i < spa->spa_spares.sav_count; i++)
+	for (int i = 0; i < spa->spa_spares.sav_count; i++)
 		vdev_free(spa->spa_spares.sav_vdevs[i]);
 	if (spa->spa_spares.sav_vdevs) {
 		kmem_free(spa->spa_spares.sav_vdevs,
@@ -1549,7 +1717,7 @@
 	}
 	spa->spa_spares.sav_count = 0;
 
-	for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
+	for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
 		vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
 	}
@@ -1572,6 +1740,10 @@
 		spa_strfree(spa->spa_comment);
 		spa->spa_comment = NULL;
 	}
+	if (spa->spa_compatibility != NULL) {
+		spa_strfree(spa->spa_compatibility);
+		spa->spa_compatibility = NULL;
+	}
 
 	spa_config_exit(spa, SCL_ALL, spa);
 }
@@ -1625,8 +1797,8 @@
 	if (spa->spa_spares.sav_config == NULL)
 		nspares = 0;
 	else
-		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
-		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+		VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+		    ZPOOL_CONFIG_SPARES, &spares, &nspares));
 
 	spa->spa_spares.sav_count = (int)nspares;
 	spa->spa_spares.sav_vdevs = NULL;
@@ -1688,16 +1860,15 @@
 	 * Recompute the stashed list of spares, with status information
 	 * this time.
 	 */
-	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
-	    DATA_TYPE_NVLIST_ARRAY) == 0);
+	fnvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES);
 
 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
 	    KM_SLEEP);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		spares[i] = vdev_config_generate(spa,
 		    spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
-	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
-	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
+	fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		nvlist_free(spares[i]);
 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
@@ -1747,16 +1918,15 @@
 		goto out;
 	}
 
-	VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
-	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+	VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config,
+	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache));
 	newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
 
 	/*
 	 * Process new nvlist of vdevs.
 	 */
 	for (i = 0; i < nl2cache; i++) {
-		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
-		    &guid) == 0);
+		guid = fnvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID);
 
 		newvdevs[i] = NULL;
 		for (j = 0; j < oldnvdevs; j++) {
@@ -1798,6 +1968,15 @@
 
 			if (!vdev_is_dead(vd))
 				l2arc_add_vdev(spa, vd);
+
+			/*
+			 * Upon cache device addition to a pool or pool
+			 * creation with a cache device or if the header
+			 * of the device is invalid we issue an async
+			 * TRIM command for the whole device which will
+			 * execute if l2arc_trim_ahead > 0.
+			 */
+			spa_async_request(spa, SPA_ASYNC_L2CACHE_TRIM);
 		}
 	}
 
@@ -1808,8 +1987,7 @@
 	 * Recompute the stashed list of l2cache devices, with status
 	 * information this time.
 	 */
-	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
-	    DATA_TYPE_NVLIST_ARRAY) == 0);
+	fnvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE);
 
 	if (sav->sav_count > 0)
 		l2cache = kmem_alloc(sav->sav_count * sizeof (void *),
@@ -1817,8 +1995,8 @@
 	for (i = 0; i < sav->sav_count; i++)
 		l2cache[i] = vdev_config_generate(spa,
 		    sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
-	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
-	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
+	fnvlist_add_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, l2cache,
+	    sav->sav_count);
 
 out:
 	/*
@@ -1928,7 +2106,7 @@
 
 		child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *),
 		    KM_SLEEP);
-		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		nv = fnvlist_alloc();
 
 		for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *tvd = rvd->vdev_child[c];
@@ -2004,6 +2182,9 @@
 	return (rv);
 }
 
+/*
+ * Passivate any log vdevs (note, does not apply to embedded log metaslabs).
+ */
 static boolean_t
 spa_passivate_log(spa_t *spa)
 {
@@ -2012,15 +2193,12 @@
 
 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
 
-	if (!spa_has_slogs(spa))
-		return (B_FALSE);
-
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
-		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (tvd->vdev_islog) {
-			metaslab_group_passivate(mg);
+			ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+			metaslab_group_passivate(tvd->vdev_mg);
 			slog_found = B_TRUE;
 		}
 	}
@@ -2028,6 +2206,9 @@
 	return (slog_found);
 }
 
+/*
+ * Activate any log vdevs (note, does not apply to embedded log metaslabs).
+ */
 static void
 spa_activate_log(spa_t *spa)
 {
@@ -2037,10 +2218,11 @@
 
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
-		metaslab_group_t *mg = tvd->vdev_mg;
 
-		if (tvd->vdev_islog)
-			metaslab_group_activate(mg);
+		if (tvd->vdev_islog) {
+			ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+			metaslab_group_activate(tvd->vdev_mg);
+		}
 	}
 }
 
@@ -2084,6 +2266,7 @@
 }
 
 typedef struct spa_load_error {
+	boolean_t	sle_verify_data;
 	uint64_t	sle_meta_count;
 	uint64_t	sle_data_count;
 } spa_load_error_t;
@@ -2120,12 +2303,17 @@
 int spa_load_verify_metadata = B_TRUE;
 int spa_load_verify_data = B_TRUE;
 
-/*ARGSUSED*/
 static int
 spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
-	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+	zio_t *rio = arg;
+	spa_load_error_t *sle = rio->io_private;
+
+	(void) zilog, (void) dnp;
+
+	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
+	    BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
 		return (0);
 	/*
 	 * Note: normally this routine will not be called if
@@ -2134,12 +2322,12 @@
 	 */
 	if (!spa_load_verify_metadata)
 		return (0);
-	if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
+	if (!BP_IS_METADATA(bp) &&
+	    (!spa_load_verify_data || !sle->sle_verify_data))
 		return (0);
 
 	uint64_t maxinflight_bytes =
 	    arc_target_bytes() >> spa_load_verify_shift;
-	zio_t *rio = arg;
 	size_t size = BP_GET_PSIZE(bp);
 
 	mutex_enter(&spa->spa_scrub_lock);
@@ -2155,10 +2343,11 @@
 	return (0);
 }
 
-/* ARGSUSED */
-int
+static int
 verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
+	(void) dp, (void) arg;
+
 	if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN)
 		return (SET_ERROR(ENAMETOOLONG));
 
@@ -2176,7 +2365,8 @@
 
 	zpool_get_load_policy(spa->spa_config, &policy);
 
-	if (policy.zlp_rewind & ZPOOL_NEVER_REWIND)
+	if (policy.zlp_rewind & ZPOOL_NEVER_REWIND ||
+	    policy.zlp_maxmeta == UINT64_MAX)
 		return (0);
 
 	dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
@@ -2187,6 +2377,13 @@
 	if (error != 0)
 		return (error);
 
+	/*
+	 * Verify data only if we are rewinding or error limit was set.
+	 * Otherwise nothing except dbgmsg care about it to waste time.
+	 */
+	sle.sle_verify_data = (policy.zlp_rewind & ZPOOL_REWIND_MASK) ||
+	    (policy.zlp_maxdata < UINT64_MAX);
+
 	rio = zio_root(spa, NULL, &sle,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
 
@@ -2226,12 +2423,14 @@
 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
 
 		loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
-		VERIFY(nvlist_add_uint64(spa->spa_load_info,
-		    ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
-		VERIFY(nvlist_add_int64(spa->spa_load_info,
-		    ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
-		VERIFY(nvlist_add_uint64(spa->spa_load_info,
-		    ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
+		fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_TIME,
+		    spa->spa_load_txg_ts);
+		fnvlist_add_int64(spa->spa_load_info, ZPOOL_CONFIG_REWIND_TIME,
+		    loss);
+		fnvlist_add_uint64(spa->spa_load_info,
+		    ZPOOL_CONFIG_LOAD_META_ERRORS, sle.sle_meta_count);
+		fnvlist_add_uint64(spa->spa_load_info,
+		    ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count);
 	} else {
 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
 	}
@@ -2282,6 +2481,385 @@
 	return (SET_ERROR(err));
 }
 
+boolean_t
+spa_livelist_delete_check(spa_t *spa)
+{
+	return (spa->spa_livelists_to_delete != 0);
+}
+
+static boolean_t
+spa_livelist_delete_cb_check(void *arg, zthr_t *z)
+{
+	(void) z;
+	spa_t *spa = arg;
+	return (spa_livelist_delete_check(spa));
+}
+
+static int
+delete_blkptr_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	spa_t *spa = arg;
+	zio_free(spa, tx->tx_txg, bp);
+	dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,
+	    -bp_get_dsize_sync(spa, bp),
+	    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);
+	return (0);
+}
+
+static int
+dsl_get_next_livelist_obj(objset_t *os, uint64_t zap_obj, uint64_t *llp)
+{
+	int err;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	zap_cursor_init(&zc, os, zap_obj);
+	err = zap_cursor_retrieve(&zc, &za);
+	zap_cursor_fini(&zc);
+	if (err == 0)
+		*llp = za.za_first_integer;
+	return (err);
+}
+
+/*
+ * Components of livelist deletion that must be performed in syncing
+ * context: freeing block pointers and updating the pool-wide data
+ * structures to indicate how much work is left to do
+ */
+typedef struct sublist_delete_arg {
+	spa_t *spa;
+	dsl_deadlist_t *ll;
+	uint64_t key;
+	bplist_t *to_free;
+} sublist_delete_arg_t;
+
+static void
+sublist_delete_sync(void *arg, dmu_tx_t *tx)
+{
+	sublist_delete_arg_t *sda = arg;
+	spa_t *spa = sda->spa;
+	dsl_deadlist_t *ll = sda->ll;
+	uint64_t key = sda->key;
+	bplist_t *to_free = sda->to_free;
+
+	bplist_iterate(to_free, delete_blkptr_cb, spa, tx);
+	dsl_deadlist_remove_entry(ll, key, tx);
+}
+
+typedef struct livelist_delete_arg {
+	spa_t *spa;
+	uint64_t ll_obj;
+	uint64_t zap_obj;
+} livelist_delete_arg_t;
+
+static void
+livelist_delete_sync(void *arg, dmu_tx_t *tx)
+{
+	livelist_delete_arg_t *lda = arg;
+	spa_t *spa = lda->spa;
+	uint64_t ll_obj = lda->ll_obj;
+	uint64_t zap_obj = lda->zap_obj;
+	objset_t *mos = spa->spa_meta_objset;
+	uint64_t count;
+
+	/* free the livelist and decrement the feature count */
+	VERIFY0(zap_remove_int(mos, zap_obj, ll_obj, tx));
+	dsl_deadlist_free(mos, ll_obj, tx);
+	spa_feature_decr(spa, SPA_FEATURE_LIVELIST, tx);
+	VERIFY0(zap_count(mos, zap_obj, &count));
+	if (count == 0) {
+		/* no more livelists to delete */
+		VERIFY0(zap_remove(mos, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_DELETED_CLONES, tx));
+		VERIFY0(zap_destroy(mos, zap_obj, tx));
+		spa->spa_livelists_to_delete = 0;
+		spa_notify_waiters(spa);
+	}
+}
+
+/*
+ * Load in the value for the livelist to be removed and open it. Then,
+ * load its first sublist and determine which block pointers should actually
+ * be freed. Then, call a synctask which performs the actual frees and updates
+ * the pool-wide livelist data.
+ */
+static void
+spa_livelist_delete_cb(void *arg, zthr_t *z)
+{
+	spa_t *spa = arg;
+	uint64_t ll_obj = 0, count;
+	objset_t *mos = spa->spa_meta_objset;
+	uint64_t zap_obj = spa->spa_livelists_to_delete;
+	/*
+	 * Determine the next livelist to delete. This function should only
+	 * be called if there is at least one deleted clone.
+	 */
+	VERIFY0(dsl_get_next_livelist_obj(mos, zap_obj, &ll_obj));
+	VERIFY0(zap_count(mos, ll_obj, &count));
+	if (count > 0) {
+		dsl_deadlist_t *ll;
+		dsl_deadlist_entry_t *dle;
+		bplist_t to_free;
+		ll = kmem_zalloc(sizeof (dsl_deadlist_t), KM_SLEEP);
+		dsl_deadlist_open(ll, mos, ll_obj);
+		dle = dsl_deadlist_first(ll);
+		ASSERT3P(dle, !=, NULL);
+		bplist_create(&to_free);
+		int err = dsl_process_sub_livelist(&dle->dle_bpobj, &to_free,
+		    z, NULL);
+		if (err == 0) {
+			sublist_delete_arg_t sync_arg = {
+			    .spa = spa,
+			    .ll = ll,
+			    .key = dle->dle_mintxg,
+			    .to_free = &to_free
+			};
+			zfs_dbgmsg("deleting sublist (id %llu) from"
+			    " livelist %llu, %lld remaining",
+			    (u_longlong_t)dle->dle_bpobj.bpo_object,
+			    (u_longlong_t)ll_obj, (longlong_t)count - 1);
+			VERIFY0(dsl_sync_task(spa_name(spa), NULL,
+			    sublist_delete_sync, &sync_arg, 0,
+			    ZFS_SPACE_CHECK_DESTROY));
+		} else {
+			VERIFY3U(err, ==, EINTR);
+		}
+		bplist_clear(&to_free);
+		bplist_destroy(&to_free);
+		dsl_deadlist_close(ll);
+		kmem_free(ll, sizeof (dsl_deadlist_t));
+	} else {
+		livelist_delete_arg_t sync_arg = {
+		    .spa = spa,
+		    .ll_obj = ll_obj,
+		    .zap_obj = zap_obj
+		};
+		zfs_dbgmsg("deletion of livelist %llu completed",
+		    (u_longlong_t)ll_obj);
+		VERIFY0(dsl_sync_task(spa_name(spa), NULL, livelist_delete_sync,
+		    &sync_arg, 0, ZFS_SPACE_CHECK_DESTROY));
+	}
+}
+
+static void
+spa_start_livelist_destroy_thread(spa_t *spa)
+{
+	ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL);
+	spa->spa_livelist_delete_zthr =
+	    zthr_create("z_livelist_destroy",
+	    spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa,
+	    minclsyspri);
+}
+
+typedef struct livelist_new_arg {
+	bplist_t *allocs;
+	bplist_t *frees;
+} livelist_new_arg_t;
+
+static int
+livelist_track_new_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
+{
+	ASSERT(tx == NULL);
+	livelist_new_arg_t *lna = arg;
+	if (bp_freed) {
+		bplist_append(lna->frees, bp);
+	} else {
+		bplist_append(lna->allocs, bp);
+		zfs_livelist_condense_new_alloc++;
+	}
+	return (0);
+}
+
+typedef struct livelist_condense_arg {
+	spa_t *spa;
+	bplist_t to_keep;
+	uint64_t first_size;
+	uint64_t next_size;
+} livelist_condense_arg_t;
+
+static void
+spa_livelist_condense_sync(void *arg, dmu_tx_t *tx)
+{
+	livelist_condense_arg_t *lca = arg;
+	spa_t *spa = lca->spa;
+	bplist_t new_frees;
+	dsl_dataset_t *ds = spa->spa_to_condense.ds;
+
+	/* Have we been cancelled? */
+	if (spa->spa_to_condense.cancelled) {
+		zfs_livelist_condense_sync_cancel++;
+		goto out;
+	}
+
+	dsl_deadlist_entry_t *first = spa->spa_to_condense.first;
+	dsl_deadlist_entry_t *next = spa->spa_to_condense.next;
+	dsl_deadlist_t *ll = &ds->ds_dir->dd_livelist;
+
+	/*
+	 * It's possible that the livelist was changed while the zthr was
+	 * running. Therefore, we need to check for new blkptrs in the two
+	 * entries being condensed and continue to track them in the livelist.
+	 * Because of the way we handle remapped blkptrs (see dbuf_remap_impl),
+	 * it's possible that the newly added blkptrs are FREEs or ALLOCs so
+	 * we need to sort them into two different bplists.
+	 */
+	uint64_t first_obj = first->dle_bpobj.bpo_object;
+	uint64_t next_obj = next->dle_bpobj.bpo_object;
+	uint64_t cur_first_size = first->dle_bpobj.bpo_phys->bpo_num_blkptrs;
+	uint64_t cur_next_size = next->dle_bpobj.bpo_phys->bpo_num_blkptrs;
+
+	bplist_create(&new_frees);
+	livelist_new_arg_t new_bps = {
+	    .allocs = &lca->to_keep,
+	    .frees = &new_frees,
+	};
+
+	if (cur_first_size > lca->first_size) {
+		VERIFY0(livelist_bpobj_iterate_from_nofree(&first->dle_bpobj,
+		    livelist_track_new_cb, &new_bps, lca->first_size));
+	}
+	if (cur_next_size > lca->next_size) {
+		VERIFY0(livelist_bpobj_iterate_from_nofree(&next->dle_bpobj,
+		    livelist_track_new_cb, &new_bps, lca->next_size));
+	}
+
+	dsl_deadlist_clear_entry(first, ll, tx);
+	ASSERT(bpobj_is_empty(&first->dle_bpobj));
+	dsl_deadlist_remove_entry(ll, next->dle_mintxg, tx);
+
+	bplist_iterate(&lca->to_keep, dsl_deadlist_insert_alloc_cb, ll, tx);
+	bplist_iterate(&new_frees, dsl_deadlist_insert_free_cb, ll, tx);
+	bplist_destroy(&new_frees);
+
+	char dsname[ZFS_MAX_DATASET_NAME_LEN];
+	dsl_dataset_name(ds, dsname);
+	zfs_dbgmsg("txg %llu condensing livelist of %s (id %llu), bpobj %llu "
+	    "(%llu blkptrs) and bpobj %llu (%llu blkptrs) -> bpobj %llu "
+	    "(%llu blkptrs)", (u_longlong_t)tx->tx_txg, dsname,
+	    (u_longlong_t)ds->ds_object, (u_longlong_t)first_obj,
+	    (u_longlong_t)cur_first_size, (u_longlong_t)next_obj,
+	    (u_longlong_t)cur_next_size,
+	    (u_longlong_t)first->dle_bpobj.bpo_object,
+	    (u_longlong_t)first->dle_bpobj.bpo_phys->bpo_num_blkptrs);
+out:
+	dmu_buf_rele(ds->ds_dbuf, spa);
+	spa->spa_to_condense.ds = NULL;
+	bplist_clear(&lca->to_keep);
+	bplist_destroy(&lca->to_keep);
+	kmem_free(lca, sizeof (livelist_condense_arg_t));
+	spa->spa_to_condense.syncing = B_FALSE;
+}
+
+static void
+spa_livelist_condense_cb(void *arg, zthr_t *t)
+{
+	while (zfs_livelist_condense_zthr_pause &&
+	    !(zthr_has_waiters(t) || zthr_iscancelled(t)))
+		delay(1);
+
+	spa_t *spa = arg;
+	dsl_deadlist_entry_t *first = spa->spa_to_condense.first;
+	dsl_deadlist_entry_t *next = spa->spa_to_condense.next;
+	uint64_t first_size, next_size;
+
+	livelist_condense_arg_t *lca =
+	    kmem_alloc(sizeof (livelist_condense_arg_t), KM_SLEEP);
+	bplist_create(&lca->to_keep);
+
+	/*
+	 * Process the livelists (matching FREEs and ALLOCs) in open context
+	 * so we have minimal work in syncing context to condense.
+	 *
+	 * We save bpobj sizes (first_size and next_size) to use later in
+	 * syncing context to determine if entries were added to these sublists
+	 * while in open context. This is possible because the clone is still
+	 * active and open for normal writes and we want to make sure the new,
+	 * unprocessed blockpointers are inserted into the livelist normally.
+	 *
+	 * Note that dsl_process_sub_livelist() both stores the size number of
+	 * blockpointers and iterates over them while the bpobj's lock held, so
+	 * the sizes returned to us are consistent which what was actually
+	 * processed.
+	 */
+	int err = dsl_process_sub_livelist(&first->dle_bpobj, &lca->to_keep, t,
+	    &first_size);
+	if (err == 0)
+		err = dsl_process_sub_livelist(&next->dle_bpobj, &lca->to_keep,
+		    t, &next_size);
+
+	if (err == 0) {
+		while (zfs_livelist_condense_sync_pause &&
+		    !(zthr_has_waiters(t) || zthr_iscancelled(t)))
+			delay(1);
+
+		dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+		dmu_tx_mark_netfree(tx);
+		dmu_tx_hold_space(tx, 1);
+		err = dmu_tx_assign(tx, TXG_NOWAIT | TXG_NOTHROTTLE);
+		if (err == 0) {
+			/*
+			 * Prevent the condense zthr restarting before
+			 * the synctask completes.
+			 */
+			spa->spa_to_condense.syncing = B_TRUE;
+			lca->spa = spa;
+			lca->first_size = first_size;
+			lca->next_size = next_size;
+			dsl_sync_task_nowait(spa_get_dsl(spa),
+			    spa_livelist_condense_sync, lca, tx);
+			dmu_tx_commit(tx);
+			return;
+		}
+	}
+	/*
+	 * Condensing can not continue: either it was externally stopped or
+	 * we were unable to assign to a tx because the pool has run out of
+	 * space. In the second case, we'll just end up trying to condense
+	 * again in a later txg.
+	 */
+	ASSERT(err != 0);
+	bplist_clear(&lca->to_keep);
+	bplist_destroy(&lca->to_keep);
+	kmem_free(lca, sizeof (livelist_condense_arg_t));
+	dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf, spa);
+	spa->spa_to_condense.ds = NULL;
+	if (err == EINTR)
+		zfs_livelist_condense_zthr_cancel++;
+}
+
+/*
+ * Check that there is something to condense but that a condense is not
+ * already in progress and that condensing has not been cancelled.
+ */
+static boolean_t
+spa_livelist_condense_cb_check(void *arg, zthr_t *z)
+{
+	(void) z;
+	spa_t *spa = arg;
+	if ((spa->spa_to_condense.ds != NULL) &&
+	    (spa->spa_to_condense.syncing == B_FALSE) &&
+	    (spa->spa_to_condense.cancelled == B_FALSE)) {
+		return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static void
+spa_start_livelist_condensing_thread(spa_t *spa)
+{
+	spa->spa_to_condense.ds = NULL;
+	spa->spa_to_condense.first = NULL;
+	spa->spa_to_condense.next = NULL;
+	spa->spa_to_condense.syncing = B_FALSE;
+	spa->spa_to_condense.cancelled = B_FALSE;
+
+	ASSERT3P(spa->spa_livelist_condense_zthr, ==, NULL);
+	spa->spa_livelist_condense_zthr =
+	    zthr_create("z_livelist_condense",
+	    spa_livelist_condense_cb_check,
+	    spa_livelist_condense_cb, spa, minclsyspri);
+}
+
 static void
 spa_spawn_aux_threads(spa_t *spa)
 {
@@ -2290,11 +2868,14 @@
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa_start_indirect_condensing_thread(spa);
+	spa_start_livelist_destroy_thread(spa);
+	spa_start_livelist_condensing_thread(spa);
 
 	ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL);
 	spa->spa_checkpoint_discard_zthr =
-	    zthr_create(spa_checkpoint_discard_thread_check,
-	    spa_checkpoint_discard_thread, spa);
+	    zthr_create("z_checkpoint_discard",
+	    spa_checkpoint_discard_thread_check,
+	    spa_checkpoint_discard_thread, spa, minclsyspri);
 }
 
 /*
@@ -2404,7 +2985,8 @@
 			spa->spa_loaded_ts.tv_nsec = 0;
 		}
 		if (error != EBADF) {
-			zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0);
+			(void) zfs_ereport_post(ereport, spa,
+			    NULL, NULL, NULL, 0);
 		}
 	}
 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
@@ -2550,8 +3132,10 @@
 
 		zfs_dbgmsg("fail_intvals>0 import_delay=%llu ub_mmp "
 		    "mmp_fails=%llu ub_mmp mmp_interval=%llu "
-		    "import_intervals=%u", import_delay, MMP_FAIL_INT(ub),
-		    MMP_INTERVAL(ub), import_intervals);
+		    "import_intervals=%llu", (u_longlong_t)import_delay,
+		    (u_longlong_t)MMP_FAIL_INT(ub),
+		    (u_longlong_t)MMP_INTERVAL(ub),
+		    (u_longlong_t)import_intervals);
 
 	} else if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) &&
 	    MMP_FAIL_INT(ub) == 0) {
@@ -2562,8 +3146,10 @@
 
 		zfs_dbgmsg("fail_intvals=0 import_delay=%llu ub_mmp "
 		    "mmp_interval=%llu ub_mmp_delay=%llu "
-		    "import_intervals=%u", import_delay, MMP_INTERVAL(ub),
-		    ub->ub_mmp_delay, import_intervals);
+		    "import_intervals=%llu", (u_longlong_t)import_delay,
+		    (u_longlong_t)MMP_INTERVAL(ub),
+		    (u_longlong_t)ub->ub_mmp_delay,
+		    (u_longlong_t)import_intervals);
 
 	} else if (MMP_VALID(ub)) {
 		/*
@@ -2574,15 +3160,18 @@
 		    ub->ub_mmp_delay) * import_intervals);
 
 		zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu "
-		    "import_intervals=%u leaves=%u", import_delay,
-		    ub->ub_mmp_delay, import_intervals,
+		    "import_intervals=%llu leaves=%u",
+		    (u_longlong_t)import_delay,
+		    (u_longlong_t)ub->ub_mmp_delay,
+		    (u_longlong_t)import_intervals,
 		    vdev_count_leaves(spa));
 	} else {
 		/* Using local tunings is the only reasonable option */
 		zfs_dbgmsg("pool last imported on non-MMP aware "
 		    "host using import_delay=%llu multihost_interval=%llu "
-		    "import_intervals=%u", import_delay, multihost_interval,
-		    import_intervals);
+		    "import_intervals=%llu", (u_longlong_t)import_delay,
+		    (u_longlong_t)multihost_interval,
+		    (u_longlong_t)import_intervals);
 	}
 
 	return (import_delay);
@@ -2634,7 +3223,7 @@
 	import_delay = spa_activity_check_duration(spa, ub);
 
 	/* Add a small random factor in case of simultaneous imports (0-25%) */
-	import_delay += import_delay * spa_get_random(250) / 1000;
+	import_delay += import_delay * random_in_range(250) / 1000;
 
 	import_expire = gethrtime() + import_delay;
 
@@ -2650,8 +3239,11 @@
 			    "txg %llu ub_txg  %llu "
 			    "timestamp %llu ub_timestamp  %llu "
 			    "mmp_config %#llx ub_mmp_config %#llx",
-			    txg, ub->ub_txg, timestamp, ub->ub_timestamp,
-			    mmp_config, ub->ub_mmp_config);
+			    (u_longlong_t)txg, (u_longlong_t)ub->ub_txg,
+			    (u_longlong_t)timestamp,
+			    (u_longlong_t)ub->ub_timestamp,
+			    (u_longlong_t)mmp_config,
+			    (u_longlong_t)ub->ub_mmp_config);
 
 			error = SET_ERROR(EREMOTEIO);
 			break;
@@ -2737,7 +3329,8 @@
 			cmn_err(CE_WARN, "pool '%s' could not be "
 			    "loaded as it was last accessed by "
 			    "another system (host: %s hostid: 0x%llx). "
-			    "See: http://illumos.org/msg/ZFS-8000-EY",
+			    "See: https://openzfs.github.io/openzfs-docs/msg/"
+			    "ZFS-8000-EY",
 			    spa_name(spa), hostname, (u_longlong_t)hostid);
 			spa_load_failed(spa, "hostid verification failed: pool "
 			    "last accessed by host: %s (hostid: 0x%llx)",
@@ -2758,6 +3351,7 @@
 	vdev_t *rvd;
 	uint64_t pool_guid;
 	char *comment;
+	char *compatibility;
 
 	/*
 	 * Versioning wasn't explicitly added to the label until later, so if
@@ -2806,6 +3400,11 @@
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
 		spa->spa_comment = spa_strdup(comment);
 
+	ASSERT(spa->spa_compatibility == NULL);
+	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMPATIBILITY,
+	    &compatibility) == 0)
+		spa->spa_compatibility = spa_strdup(compatibility);
+
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    &spa->spa_config_txg);
 
@@ -2890,7 +3489,7 @@
 	if (spa->spa_missing_tvds != 0) {
 		spa_load_note(spa, "vdev tree has %lld missing top-level "
 		    "vdevs.", (u_longlong_t)spa->spa_missing_tvds);
-		if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) {
+		if (spa->spa_trust_config && (spa->spa_mode & SPA_MODE_WRITE)) {
 			/*
 			 * Although theoretically we could allow users to open
 			 * incomplete pools in RW mode, we'd need to add a lot
@@ -3084,7 +3683,7 @@
 		 * from the label.
 		 */
 		nvlist_free(spa->spa_label_features);
-		VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
+		spa->spa_label_features = fnvlist_dup(features);
 	}
 
 	nvlist_free(label);
@@ -3097,21 +3696,20 @@
 	if (ub->ub_version >= SPA_VERSION_FEATURES) {
 		nvlist_t *unsup_feat;
 
-		VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
-		    0);
+		unsup_feat = fnvlist_alloc();
 
 		for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
 		    NULL); nvp != NULL;
 		    nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
 			if (!zfeature_is_supported(nvpair_name(nvp))) {
-				VERIFY(nvlist_add_string(unsup_feat,
-				    nvpair_name(nvp), "") == 0);
+				fnvlist_add_string(unsup_feat,
+				    nvpair_name(nvp), "");
 			}
 		}
 
 		if (!nvlist_empty(unsup_feat)) {
-			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
-			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+			fnvlist_add_nvlist(spa->spa_load_info,
+			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
 			nvlist_free(unsup_feat);
 			spa_load_failed(spa, "some features are unsupported");
 			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
@@ -3202,7 +3800,14 @@
 	/*
 	 * Build a new vdev tree from the trusted config
 	 */
-	VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
+	error = spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD);
+	if (error != 0) {
+		nvlist_free(mos_config);
+		spa_config_exit(spa, SCL_ALL, FTAG);
+		spa_load_failed(spa, "spa_config_parse failed [error=%d]",
+		    error);
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
+	}
 
 	/*
 	 * Vdev paths in the MOS may be obsolete. If the untrusted config was
@@ -3551,6 +4156,15 @@
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
+	 * Load the livelist deletion field. If a livelist is queued for
+	 * deletion, indicate that in the spa
+	 */
+	error = spa_dir_prop(spa, DMU_POOL_DELETED_CLONES,
+	    &spa->spa_livelists_to_delete, B_FALSE);
+	if (error != 0 && error != ENOENT)
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+	/*
 	 * Load the history object.  If we have an older pool, this
 	 * will not be present.
 	 */
@@ -3605,7 +4219,7 @@
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (error == 0) {
-		uint64_t autoreplace;
+		uint64_t autoreplace = 0;
 
 		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
@@ -3613,8 +4227,6 @@
 		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
 		spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
-		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
-		    &spa->spa_dedup_ditto);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim);
 		spa->spa_autoreplace = (autoreplace != 0);
 	}
@@ -3741,11 +4353,18 @@
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
 	}
 
+	error = spa_ld_log_spacemaps(spa);
+	if (error != 0) {
+		spa_load_failed(spa, "spa_ld_log_spacemaps failed [error=%d]",
+		    error);
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
+	}
+
 	/*
 	 * Propagate the leaf DTLs we just loaded all the way up the vdev tree.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
-	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
+	vdev_dtl_reassess(rvd, 0, 0, B_FALSE, B_FALSE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	return (0);
@@ -3872,7 +4491,7 @@
 static void
 spa_ld_prepare_for_reload(spa_t *spa)
 {
-	int mode = spa->spa_mode;
+	spa_mode_t mode = spa->spa_mode;
 	int async_suspended = spa->spa_async_suspended;
 
 	spa_unload(spa);
@@ -4050,7 +4669,7 @@
 		vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL };
 		int svdcount = 0;
 		int children = rvd->vdev_children;
-		int c0 = spa_get_random(children);
+		int c0 = random_in_range(children);
 
 		for (int c = 0; c < children; c++) {
 			vdev_t *vd = rvd->vdev_child[(c0 + c) % children];
@@ -4334,11 +4953,16 @@
 		    update_config_cache);
 
 		/*
-		 * Check all DTLs to see if anything needs resilvering.
+		 * Check if a rebuild was in progress and if so resume it.
+		 * Then check all DTLs to see if anything needs resilvering.
+		 * The resilver will be deferred if a rebuild was started.
 		 */
-		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
-		    vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
+		if (vdev_rebuild_active(spa->spa_root_vdev)) {
+			vdev_rebuild_restart(spa);
+		} else if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
+		    vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
+		}
 
 		/*
 		 * Log the fact that we booted up (so that we can detect if
@@ -4374,6 +4998,8 @@
 	}
 
 	spa_import_progress_remove(spa_guid(spa));
+	spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
+
 	spa_load_note(spa, "LOADED");
 
 	return (0);
@@ -4382,7 +5008,7 @@
 static int
 spa_load_retry(spa_t *spa, spa_load_state_t state)
 {
-	int mode = spa->spa_mode;
+	spa_mode_t mode = spa->spa_mode;
 
 	spa_unload(spa);
 	spa_deactivate(spa);
@@ -4576,7 +5202,7 @@
 			 */
 			spa_unload(spa);
 			spa_deactivate(spa);
-			spa_write_cachefile(spa, B_TRUE, B_TRUE);
+			spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
 			spa_remove(spa);
 			if (locked)
 				mutex_exit(&spa_namespace_lock);
@@ -4590,11 +5216,10 @@
 			 * attempted vdev_open().  Return this to the user.
 			 */
 			if (config != NULL && spa->spa_config) {
-				VERIFY(nvlist_dup(spa->spa_config, config,
-				    KM_SLEEP) == 0);
-				VERIFY(nvlist_add_nvlist(*config,
+				*config = fnvlist_dup(spa->spa_config);
+				fnvlist_add_nvlist(*config,
 				    ZPOOL_CONFIG_LOAD_INFO,
-				    spa->spa_load_info) == 0);
+				    spa->spa_load_info);
 			}
 			spa_unload(spa);
 			spa_deactivate(spa);
@@ -4615,9 +5240,9 @@
 	 * If we've recovered the pool, pass back any information we
 	 * gathered while doing the load.
 	 */
-	if (state == SPA_LOAD_RECOVER) {
-		VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
-		    spa->spa_load_info) == 0);
+	if (state == SPA_LOAD_RECOVER && config != NULL) {
+		fnvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
+		    spa->spa_load_info);
 	}
 
 	if (locked) {
@@ -4628,7 +5253,7 @@
 	}
 
 	if (firstopen)
-		zvol_create_minors(spa, spa_name(spa), B_TRUE);
+		zvol_create_minors_recursive(spa_name(spa));
 
 	*spapp = spa;
 
@@ -4695,15 +5320,14 @@
 	if (spa->spa_spares.sav_count == 0)
 		return;
 
-	VERIFY(nvlist_lookup_nvlist(config,
-	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
-	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+	nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+	VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+	    ZPOOL_CONFIG_SPARES, &spares, &nspares));
 	if (nspares != 0) {
-		VERIFY(nvlist_add_nvlist_array(nvroot,
-		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
-		VERIFY(nvlist_lookup_nvlist_array(nvroot,
-		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+		fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, spares,
+		    nspares);
+		VERIFY0(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares));
 
 		/*
 		 * Go through and find any spares which have since been
@@ -4711,15 +5335,17 @@
 		 * their status appropriately.
 		 */
 		for (i = 0; i < nspares; i++) {
-			VERIFY(nvlist_lookup_uint64(spares[i],
-			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			guid = fnvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID);
+			VERIFY0(nvlist_lookup_uint64_array(spares[i],
+			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc));
 			if (spa_spare_exists(guid, &pool, NULL) &&
 			    pool != 0ULL) {
-				VERIFY(nvlist_lookup_uint64_array(
-				    spares[i], ZPOOL_CONFIG_VDEV_STATS,
-				    (uint64_t **)&vs, &vsc) == 0);
 				vs->vs_state = VDEV_STATE_CANT_OPEN;
 				vs->vs_aux = VDEV_AUX_SPARED;
+			} else {
+				vs->vs_state =
+				    spa->spa_spares.sav_vdevs[i]->vdev_state;
 			}
 		}
 	}
@@ -4744,23 +5370,22 @@
 	if (spa->spa_l2cache.sav_count == 0)
 		return;
 
-	VERIFY(nvlist_lookup_nvlist(config,
-	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
-	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+	nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+	VERIFY0(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
+	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache));
 	if (nl2cache != 0) {
-		VERIFY(nvlist_add_nvlist_array(nvroot,
-		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
-		VERIFY(nvlist_lookup_nvlist_array(nvroot,
-		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+		fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, l2cache,
+		    nl2cache);
+		VERIFY0(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache));
 
 		/*
 		 * Update level 2 cache device stats.
 		 */
 
 		for (i = 0; i < nl2cache; i++) {
-			VERIFY(nvlist_lookup_uint64(l2cache[i],
-			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			guid = fnvlist_lookup_uint64(l2cache[i],
+			    ZPOOL_CONFIG_GUID);
 
 			vd = NULL;
 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
@@ -4772,9 +5397,8 @@
 			}
 			ASSERT(vd != NULL);
 
-			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
-			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
-			    == 0);
+			VERIFY0(nvlist_lookup_uint64_array(l2cache[i],
+			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc));
 			vdev_get_stats(vd, vs);
 			vdev_config_generate_stats(vd, l2cache[i]);
 
@@ -4889,20 +5513,20 @@
 
 			loadtimes[0] = spa->spa_loaded_ts.tv_sec;
 			loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
-			VERIFY(nvlist_add_uint64_array(*config,
-			    ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
+			fnvlist_add_uint64_array(*config,
+			    ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2);
 
-			VERIFY(nvlist_add_uint64(*config,
+			fnvlist_add_uint64(*config,
 			    ZPOOL_CONFIG_ERRCOUNT,
-			    spa_get_errlog_size(spa)) == 0);
+			    spa_get_errlog_size(spa));
 
 			if (spa_suspended(spa)) {
-				VERIFY(nvlist_add_uint64(*config,
+				fnvlist_add_uint64(*config,
 				    ZPOOL_CONFIG_SUSPENDED,
-				    spa->spa_failmode) == 0);
-				VERIFY(nvlist_add_uint64(*config,
+				    spa->spa_failmode);
+				fnvlist_add_uint64(*config,
 				    ZPOOL_CONFIG_SUSPENDED_REASON,
-				    spa->spa_suspended) == 0);
+				    spa->spa_suspended);
 			}
 
 			spa_add_spares(spa, *config);
@@ -4994,8 +5618,8 @@
 
 		if ((error = vdev_open(vd)) == 0 &&
 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
-			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
-			    vd->vdev_guid) == 0);
+			fnvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
+			    vd->vdev_guid);
 		}
 
 		vdev_free(vd);
@@ -5046,23 +5670,20 @@
 		 * Generate new dev list by concatenating with the
 		 * current dev list.
 		 */
-		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
-		    &olddevs, &oldndevs) == 0);
+		VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config, config,
+		    &olddevs, &oldndevs));
 
 		newdevs = kmem_alloc(sizeof (void *) *
 		    (ndevs + oldndevs), KM_SLEEP);
 		for (i = 0; i < oldndevs; i++)
-			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
-			    KM_SLEEP) == 0);
+			newdevs[i] = fnvlist_dup(olddevs[i]);
 		for (i = 0; i < ndevs; i++)
-			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
-			    KM_SLEEP) == 0);
+			newdevs[i + oldndevs] = fnvlist_dup(devs[i]);
 
-		VERIFY(nvlist_remove(sav->sav_config, config,
-		    DATA_TYPE_NVLIST_ARRAY) == 0);
+		fnvlist_remove(sav->sav_config, config);
 
-		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
-		    config, newdevs, ndevs + oldndevs) == 0);
+		fnvlist_add_nvlist_array(sav->sav_config, config, newdevs,
+		    ndevs + oldndevs);
 		for (i = 0; i < oldndevs + ndevs; i++)
 			nvlist_free(newdevs[i]);
 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
@@ -5070,10 +5691,8 @@
 		/*
 		 * Generate a new dev list.
 		 */
-		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
-		    KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
-		    devs, ndevs) == 0);
+		sav->sav_config = fnvlist_alloc();
+		fnvlist_add_nvlist_array(sav->sav_config, config, devs, ndevs);
 	}
 }
 
@@ -5131,7 +5750,7 @@
 	uint64_t txg = TXG_INITIAL;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
-	uint64_t version, obj;
+	uint64_t version, obj, ndraid = 0;
 	boolean_t has_features;
 	boolean_t has_encryption;
 	boolean_t has_allocclass;
@@ -5204,7 +5823,7 @@
 			return (error);
 		}
 	}
-	if (!has_allocclass && zfs_special_devs(nvroot)) {
+	if (!has_allocclass && zfs_special_devs(nvroot, NULL)) {
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
@@ -5253,8 +5872,8 @@
 
 	if (error == 0 &&
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
-	    (error = spa_validate_aux(spa, nvroot, txg,
-	    VDEV_ALLOC_ADD)) == 0) {
+	    (error = vdev_draid_spare_create(nvroot, rvd, &ndraid, 0)) == 0 &&
+	    (error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) == 0) {
 		/*
 		 * instantiate the metaslab groups (this will dirty the vdevs)
 		 * we can no longer error exit past this point
@@ -5282,10 +5901,9 @@
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
-		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
-		    KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
-		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+		spa->spa_spares.sav_config = fnvlist_alloc();
+		fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+		    ZPOOL_CONFIG_SPARES, spares, nspares);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5297,10 +5915,9 @@
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
-		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
-		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
-		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+		spa->spa_l2cache.sav_config = fnvlist_alloc();
+		fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5395,6 +6012,9 @@
 		spa_sync_props(props, tx);
 	}
 
+	for (int i = 0; i < ndraid; i++)
+		spa_feature_incr(spa, SPA_FEATURE_DRAID, tx);
+
 	dmu_tx_commit(tx);
 
 	spa->spa_sync_on = B_TRUE;
@@ -5404,7 +6024,7 @@
 
 	spa_spawn_aux_threads(spa);
 
-	spa_write_cachefile(spa, B_FALSE, B_TRUE);
+	spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
 
 	/*
 	 * Don't count references from objsets that are already closed
@@ -5429,7 +6049,7 @@
 	char *altroot = NULL;
 	spa_load_state_t state = SPA_LOAD_IMPORT;
 	zpool_load_policy_t policy;
-	uint64_t mode = spa_mode_global;
+	spa_mode_t mode = spa_mode_global;
 	uint64_t readonly = B_FALSE;
 	int error;
 	nvlist_t *nvroot;
@@ -5453,7 +6073,7 @@
 	(void) nvlist_lookup_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
 	if (readonly)
-		mode = FREAD;
+		mode = SPA_MODE_READ;
 	spa = spa_add(pool, config, altroot);
 	spa->spa_import_flags = flags;
 
@@ -5465,7 +6085,7 @@
 		if (props != NULL)
 			spa_configfile_set(spa, props, B_FALSE);
 
-		spa_write_cachefile(spa, B_FALSE, B_TRUE);
+		spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
 		spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
 		zfs_dbgmsg("spa_import: verbatim import of %s", pool);
 		mutex_exit(&spa_namespace_lock);
@@ -5498,8 +6118,7 @@
 	 * Propagate anything learned while loading the pool and pass it
 	 * back to caller (i.e. rewind info, missing devices, etc).
 	 */
-	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
-	    spa->spa_load_info) == 0);
+	fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	/*
@@ -5517,8 +6136,7 @@
 		spa_load_l2cache(spa);
 	}
 
-	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-	    &nvroot) == 0);
+	nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (props != NULL)
@@ -5542,13 +6160,12 @@
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		if (spa->spa_spares.sav_config)
-			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
-			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+			fnvlist_remove(spa->spa_spares.sav_config,
+			    ZPOOL_CONFIG_SPARES);
 		else
-			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
-		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+			spa->spa_spares.sav_config = fnvlist_alloc();
+		fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+		    ZPOOL_CONFIG_SPARES, spares, nspares);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5557,13 +6174,12 @@
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		if (spa->spa_l2cache.sav_config)
-			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
-			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
+			fnvlist_remove(spa->spa_l2cache.sav_config,
+			    ZPOOL_CONFIG_L2CACHE);
 		else
-			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
-		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+			spa->spa_l2cache.sav_config = fnvlist_alloc();
+		fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5595,10 +6211,10 @@
 
 	spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
 
-	zvol_create_minors(spa, pool, B_TRUE);
-
 	mutex_exit(&spa_namespace_lock);
 
+	zvol_create_minors_recursive(pool);
+
 	return (0);
 }
 
@@ -5623,7 +6239,7 @@
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
-	spa_activate(spa, FREAD);
+	spa_activate(spa, SPA_MODE_READ);
 
 	/*
 	 * Rewind pool if a max txg was provided.
@@ -5646,6 +6262,16 @@
 		spa->spa_config_source = SPA_CONFIG_SRC_SCAN;
 	}
 
+	/*
+	 * spa_import() relies on a pool config fetched by spa_try_import()
+	 * for spare/cache devices. Import flags are not passed to
+	 * spa_tryimport(), which makes it return early due to a missing log
+	 * device and missing retrieving the cache device and spare eventually.
+	 * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch
+	 * the correct configuration regardless of the missing log device.
+	 */
+	spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG;
+
 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING);
 
 	/*
@@ -5653,16 +6279,14 @@
 	 */
 	if (spa->spa_root_vdev != NULL) {
 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
-		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
-		    poolname) == 0);
-		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-		    state) == 0);
-		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
-		    spa->spa_uberblock.ub_timestamp) == 0);
-		VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
-		    spa->spa_load_info) == 0);
-		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
-		    spa->spa_errata) == 0);
+		fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, poolname);
+		fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state);
+		fnvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
+		    spa->spa_uberblock.ub_timestamp);
+		fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+		    spa->spa_load_info);
+		fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
+		    spa->spa_errata);
 
 		/*
 		 * If the bootfs property exists on this pool then we
@@ -5691,8 +6315,8 @@
 					(void) snprintf(dsname, MAXPATHLEN,
 					    "%s/%s", poolname, ++cp);
 				}
-				VERIFY(nvlist_add_string(config,
-				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
+				fnvlist_add_string(config, ZPOOL_CONFIG_BOOTFS,
+				    dsname);
 				kmem_free(dsname, MAXPATHLEN);
 			}
 			kmem_free(tmpname, MAXPATHLEN);
@@ -5725,15 +6349,16 @@
  * we don't sync the labels or remove the configuration cache.
  */
 static int
-spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
+spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
     boolean_t force, boolean_t hardforce)
 {
+	int error;
 	spa_t *spa;
 
 	if (oldconfig)
 		*oldconfig = NULL;
 
-	if (!(spa_mode_global & FWRITE))
+	if (!(spa_mode_global & SPA_MODE_WRITE))
 		return (SET_ERROR(EROFS));
 
 	mutex_enter(&spa_namespace_lock);
@@ -5780,13 +6405,9 @@
 	 * references.  If we are resetting a pool, allow references by
 	 * fault injection handlers.
 	 */
-	if (!spa_refcount_zero(spa) ||
-	    (spa->spa_inject_ref != 0 &&
-	    new_state != POOL_STATE_UNINITIALIZED)) {
-		spa_async_resume(spa);
-		spa->spa_is_exporting = B_FALSE;
-		mutex_exit(&spa_namespace_lock);
-		return (SET_ERROR(EBUSY));
+	if (!spa_refcount_zero(spa) || (spa->spa_inject_ref != 0)) {
+		error = SET_ERROR(EBUSY);
+		goto fail;
 	}
 
 	if (spa->spa_sync_on) {
@@ -5798,10 +6419,8 @@
 		 */
 		if (!force && new_state == POOL_STATE_EXPORTED &&
 		    spa_has_active_shared_spare(spa)) {
-			spa_async_resume(spa);
-			spa->spa_is_exporting = B_FALSE;
-			mutex_exit(&spa_namespace_lock);
-			return (SET_ERROR(EXDEV));
+			error = SET_ERROR(EXDEV);
+			goto fail;
 		}
 
 		/*
@@ -5816,6 +6435,7 @@
 			vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE);
 			vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE);
 			vdev_autotrim_stop_all(spa);
+			vdev_rebuild_stop_all(spa);
 		}
 
 		/*
@@ -5826,9 +6446,27 @@
 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_state = new_state;
+			vdev_config_dirty(spa->spa_root_vdev);
+			spa_config_exit(spa, SCL_ALL, FTAG);
+		}
+
+		/*
+		 * If the log space map feature is enabled and the pool is
+		 * getting exported (but not destroyed), we want to spend some
+		 * time flushing as many metaslabs as we can in an attempt to
+		 * destroy log space maps and save import time. This has to be
+		 * done before we set the spa_final_txg, otherwise
+		 * spa_sync() -> spa_flush_metaslabs() may dirty the final TXGs.
+		 * spa_should_flush_logs_on_unload() should be called after
+		 * spa_state has been set to the new_state.
+		 */
+		if (spa_should_flush_logs_on_unload(spa))
+			spa_unload_log_sm_flush_all(spa);
+
+		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
+			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_final_txg = spa_last_synced_txg(spa) +
 			    TXG_DEFER_SIZE + 1;
-			vdev_config_dirty(spa->spa_root_vdev);
 			spa_config_exit(spa, SCL_ALL, FTAG);
 		}
 	}
@@ -5845,11 +6483,11 @@
 	}
 
 	if (oldconfig && spa->spa_config)
-		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
+		*oldconfig = fnvlist_dup(spa->spa_config);
 
 	if (new_state != POOL_STATE_UNINITIALIZED) {
 		if (!hardforce)
-			spa_write_cachefile(spa, B_TRUE, B_TRUE);
+			spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
 		spa_remove(spa);
 	} else {
 		/*
@@ -5862,13 +6500,19 @@
 
 	mutex_exit(&spa_namespace_lock);
 	return (0);
+
+fail:
+	spa->spa_is_exporting = B_FALSE;
+	spa_async_resume(spa);
+	mutex_exit(&spa_namespace_lock);
+	return (error);
 }
 
 /*
  * Destroy a storage pool.
  */
 int
-spa_destroy(char *pool)
+spa_destroy(const char *pool)
 {
 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
 	    B_FALSE, B_FALSE));
@@ -5878,7 +6522,7 @@
  * Export a storage pool.
  */
 int
-spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
+spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force,
     boolean_t hardforce)
 {
 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
@@ -5890,7 +6534,7 @@
  * from the namespace in any way.
  */
 int
-spa_reset(char *pool)
+spa_reset(const char *pool)
 {
 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
 	    B_FALSE, B_FALSE));
@@ -5903,12 +6547,25 @@
  */
 
 /*
+ * This is called as a synctask to increment the draid feature flag
+ */
+static void
+spa_draid_feature_incr(void *arg, dmu_tx_t *tx)
+{
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	int draid = (int)(uintptr_t)arg;
+
+	for (int c = 0; c < draid; c++)
+		spa_feature_incr(spa, SPA_FEATURE_DRAID, tx);
+}
+
+/*
  * Add a device to a storage pool.
  */
 int
 spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 {
-	uint64_t txg, id;
+	uint64_t txg, ndraid = 0;
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
@@ -5937,8 +6594,23 @@
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 
 	if (vd->vdev_children != 0 &&
-	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
+	    (error = vdev_create(vd, txg, B_FALSE)) != 0) {
 		return (spa_vdev_exit(spa, vd, txg, error));
+	}
+
+	/*
+	 * The virtual dRAID spares must be added after vdev tree is created
+	 * and the vdev guids are generated.  The guid of their associated
+	 * dRAID is stored in the config and used when opening the spare.
+	 */
+	if ((error = vdev_draid_spare_create(nvroot, vd, &ndraid,
+	    rvd->vdev_children)) == 0) {
+		if (ndraid > 0 && nvlist_lookup_nvlist_array(nvroot,
+		    ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0)
+			nspares = 0;
+	} else {
+		return (spa_vdev_exit(spa, vd, txg, error));
+	}
 
 	/*
 	 * We must validate the spares and l2cache devices after checking the
@@ -5951,7 +6623,7 @@
 	 * If we are in the middle of a device removal, we can only add
 	 * devices which match the existing devices in the pool.
 	 * If we are in the middle of a removal, or have some indirect
-	 * vdevs, we can not add raidz toplevels.
+	 * vdevs, we can not add raidz or dRAID top levels.
 	 */
 	if (spa->spa_vdev_removal != NULL ||
 	    spa->spa_removing_phys.sr_prev_indirect_vdev != -1) {
@@ -5961,10 +6633,10 @@
 			    tvd->vdev_ashift != spa->spa_max_ashift) {
 				return (spa_vdev_exit(spa, vd, txg, EINVAL));
 			}
-			/* Fail if top level vdev is raidz */
-			if (tvd->vdev_ops == &vdev_raidz_ops) {
+			/* Fail if top level vdev is raidz or a dRAID */
+			if (vdev_get_nparity(tvd) != 0)
 				return (spa_vdev_exit(spa, vd, txg, EINVAL));
-			}
+
 			/*
 			 * Need the top level mirror to be
 			 * a mirror of leaf vdevs only
@@ -5983,19 +6655,9 @@
 	}
 
 	for (int c = 0; c < vd->vdev_children; c++) {
-
-		/*
-		 * Set the vdev id to the first hole, if one exists.
-		 */
-		for (id = 0; id < rvd->vdev_children; id++) {
-			if (rvd->vdev_child[id]->vdev_ishole) {
-				vdev_free(rvd->vdev_child[id]);
-				break;
-			}
-		}
 		tvd = vd->vdev_child[c];
 		vdev_remove_child(vd, tvd);
-		tvd->vdev_id = id;
+		tvd->vdev_id = rvd->vdev_children;
 		vdev_add_child(rvd, tvd);
 		vdev_config_dirty(tvd);
 	}
@@ -6015,6 +6677,19 @@
 	}
 
 	/*
+	 * We can't increment a feature while holding spa_vdev so we
+	 * have to do it in a synctask.
+	 */
+	if (ndraid != 0) {
+		dmu_tx_t *tx;
+
+		tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
+		dsl_sync_task_nowait(spa->spa_dsl_pool, spa_draid_feature_incr,
+		    (void *)(uintptr_t)ndraid, tx);
+		dmu_tx_commit(tx);
+	}
+
+	/*
 	 * We have to be careful when adding new vdevs to an existing pool.
 	 * If other threads start allocating from these vdevs before we
 	 * sync the config cache, and we lose power, then upon reboot we may
@@ -6049,12 +6724,17 @@
  * extra rules: you can't attach to it after it's been created, and upon
  * completion of resilvering, the first disk (the one being replaced)
  * is automatically detached.
+ *
+ * If 'rebuild' is specified, then sequential reconstruction (a.ka. rebuild)
+ * should be performed instead of traditional healing reconstruction.  From
+ * an administrators perspective these are both resilver operations.
  */
 int
-spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
+spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
+    int rebuild)
 {
 	uint64_t txg, dtl_max_txg;
-	ASSERTV(vdev_t *rvd = spa->spa_root_vdev);
+	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
 	vdev_ops_t *pvops;
 	char *oldvdpath, *newvdpath;
@@ -6074,6 +6754,21 @@
 		return (spa_vdev_exit(spa, NULL, txg, error));
 	}
 
+	if (rebuild) {
+		if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
+			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
+		if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
+		    dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
+			return (spa_vdev_exit(spa, NULL, txg,
+			    ZFS_ERR_RESILVER_IN_PROGRESS));
+		}
+	} else {
+		if (vdev_rebuild_active(rvd))
+			return (spa_vdev_exit(spa, NULL, txg,
+			    ZFS_ERR_REBUILD_IN_PROGRESS));
+	}
+
 	if (spa->spa_vdev_removal != NULL)
 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
 
@@ -6101,10 +6796,37 @@
 		return (spa_vdev_exit(spa, newrootvd, txg, error));
 
 	/*
-	 * Spares can't replace logs
+	 * log, dedup and special vdevs should not be replaced by spares.
 	 */
-	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
+	if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
+	    oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+	}
+
+	/*
+	 * A dRAID spare can only replace a child of its parent dRAID vdev.
+	 */
+	if (newvd->vdev_ops == &vdev_draid_spare_ops &&
+	    oldvd->vdev_top != vdev_draid_spare_get_parent(newvd)) {
+		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+	}
+
+	if (rebuild) {
+		/*
+		 * For rebuilds, the top vdev must support reconstruction
+		 * using only space maps.  This means the only allowable
+		 * vdevs types are the root vdev, a mirror, or dRAID.
+		 */
+		tvd = pvd;
+		if (pvd->vdev_top != NULL)
+			tvd = pvd->vdev_top;
+
+		if (tvd->vdev_ops != &vdev_mirror_ops &&
+		    tvd->vdev_ops != &vdev_root_ops &&
+		    tvd->vdev_ops != &vdev_draid_ops) {
+			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+		}
+	}
 
 	if (!replacing) {
 		/*
@@ -6159,7 +6881,7 @@
 	 * than the top-level vdev.
 	 */
 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
-		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
+		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 	/*
 	 * If this is an in-place replacement, update oldvd's path and devid
@@ -6177,9 +6899,6 @@
 		}
 	}
 
-	/* mark the device being resilvered */
-	newvd->vdev_resilver_txg = txg;
-
 	/*
 	 * If the parent is not a mirror, or if we're replacing, insert the new
 	 * mirror/replacing/spare vdev above oldvd.
@@ -6217,8 +6936,8 @@
 	 */
 	dtl_max_txg = txg + TXG_CONCURRENT_STATES;
 
-	vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
-	    dtl_max_txg - TXG_INITIAL);
+	vdev_dtl_dirty(newvd, DTL_MISSING,
+	    TXG_INITIAL, dtl_max_txg - TXG_INITIAL);
 
 	if (newvd->vdev_isspare) {
 		spa_spare_activate(newvd);
@@ -6235,16 +6954,25 @@
 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
 
 	/*
-	 * Schedule the resilver to restart in the future. We do this to
-	 * ensure that dmu_sync-ed blocks have been stitched into the
-	 * respective datasets. We do not do this if resilvers have been
-	 * deferred.
+	 * Schedule the resilver or rebuild to restart in the future. We do
+	 * this to ensure that dmu_sync-ed blocks have been stitched into the
+	 * respective datasets.
 	 */
-	if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
-	    spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
-		vdev_defer_resilver(newvd);
-	else
-		dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);
+	if (rebuild) {
+		newvd->vdev_rebuild_txg = txg;
+
+		vdev_rebuild(tvd);
+	} else {
+		newvd->vdev_resilver_txg = txg;
+
+		if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
+		    spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
+			vdev_defer_resilver(newvd);
+		} else {
+			dsl_scan_restart_resilver(spa->spa_dsl_pool,
+			    dtl_max_txg);
+		}
+	}
 
 	if (spa->spa_bootfs)
 		spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -6272,14 +7000,14 @@
  * Detach a device from a mirror or replacing vdev.
  *
  * If 'replace_done' is specified, only detach if the parent
- * is a replacing vdev.
+ * is a replacing or a spare vdev.
  */
 int
 spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
 {
 	uint64_t txg;
 	int error;
-	ASSERTV(vdev_t *rvd = spa->spa_root_vdev);
+	vdev_t *rvd __maybe_unused = spa->spa_root_vdev;
 	vdev_t *vd, *pvd, *cvd, *tvd;
 	boolean_t unspare = B_FALSE;
 	uint64_t unspare_guid = 0;
@@ -6287,7 +7015,7 @@
 
 	ASSERT(spa_writeable(spa));
 
-	txg = spa_vdev_enter(spa);
+	txg = spa_vdev_detach_enter(spa, guid);
 
 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
@@ -6388,14 +7116,20 @@
 	}
 
 	/*
-	 * If we are detaching the original disk from a spare, then it implies
-	 * that the spare should become a real disk, and be removed from the
-	 * active spare list for the pool.
+	 * If we are detaching the original disk from a normal spare, then it
+	 * implies that the spare should become a real disk, and be removed
+	 * from the active spare list for the pool.  dRAID spares on the
+	 * other hand are coupled to the pool and thus should never be removed
+	 * from the spares list.
 	 */
-	if (pvd->vdev_ops == &vdev_spare_ops &&
-	    vd->vdev_id == 0 &&
-	    pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
-		unspare = B_TRUE;
+	if (pvd->vdev_ops == &vdev_spare_ops && vd->vdev_id == 0) {
+		vdev_t *last_cvd = pvd->vdev_child[pvd->vdev_children - 1];
+
+		if (last_cvd->vdev_isspare &&
+		    last_cvd->vdev_ops != &vdev_draid_spare_ops) {
+			unspare = B_TRUE;
+		}
+	}
 
 	/*
 	 * Erase the disk labels so the disk can be used for other things.
@@ -6484,6 +7218,7 @@
 	vdev_dirty(tvd, VDD_DTL, vd, txg);
 
 	spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE);
+	spa_notify_waiters(spa);
 
 	/* hang on to the spa before we release the lock */
 	spa_open_ref(spa, FTAG);
@@ -6572,6 +7307,10 @@
 	    vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
 		mutex_exit(&vd->vdev_initialize_lock);
 		return (SET_ERROR(ESRCH));
+	} else if (cmd_type == POOL_INITIALIZE_UNINIT &&
+	    vd->vdev_initialize_thread != NULL) {
+		mutex_exit(&vd->vdev_initialize_lock);
+		return (SET_ERROR(EBUSY));
 	}
 
 	switch (cmd_type) {
@@ -6584,6 +7323,9 @@
 	case POOL_INITIALIZE_SUSPEND:
 		vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED, vd_list);
 		break;
+	case POOL_INITIALIZE_UNINIT:
+		vdev_uninitialize(vd);
+		break;
 	default:
 		panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
 	}
@@ -6818,7 +7560,8 @@
 		vdev_t *vd = rvd->vdev_child[c];
 
 		/* don't count the holes & logs as children */
-		if (vd->vdev_islog || !vdev_is_concrete(vd)) {
+		if (vd->vdev_islog || (vd->vdev_ops != &vdev_indirect_ops &&
+		    !vdev_is_concrete(vd))) {
 			if (lastlog == 0)
 				lastlog = c;
 			continue;
@@ -6854,6 +7597,11 @@
 			}
 		}
 
+		/* deal with indirect vdevs */
+		if (spa->spa_root_vdev->vdev_child[c]->vdev_ops ==
+		    &vdev_indirect_ops)
+			continue;
+
 		/* which disk is going to be split? */
 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
 		    &glist[c]) != 0) {
@@ -6889,14 +7637,14 @@
 		}
 
 		/* we need certain info from the top level */
-		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
-		    vml[c]->vdev_top->vdev_ms_array) == 0);
-		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
-		    vml[c]->vdev_top->vdev_ms_shift) == 0);
-		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
-		    vml[c]->vdev_top->vdev_asize) == 0);
-		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
-		    vml[c]->vdev_top->vdev_ashift) == 0);
+		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
+		    vml[c]->vdev_top->vdev_ms_array);
+		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
+		    vml[c]->vdev_top->vdev_ms_shift);
+		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
+		    vml[c]->vdev_top->vdev_asize);
+		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
+		    vml[c]->vdev_top->vdev_ashift);
 
 		/* transfer per-vdev ZAPs */
 		ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0);
@@ -6926,28 +7674,24 @@
 	 * Temporarily record the splitting vdevs in the spa config.  This
 	 * will disappear once the config is regenerated.
 	 */
-	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
-	    glist, children) == 0);
+	nvl = fnvlist_alloc();
+	fnvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, glist, children);
 	kmem_free(glist, children * sizeof (uint64_t));
 
 	mutex_enter(&spa->spa_props_lock);
-	VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
-	    nvl) == 0);
+	fnvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, nvl);
 	mutex_exit(&spa->spa_props_lock);
 	spa->spa_config_splitting = nvl;
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	/* configure and create the new pool */
-	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
-	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
-	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
-	    spa_version(spa)) == 0);
-	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
-	    spa->spa_config_txg) == 0);
-	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
-	    spa_generate_guid(NULL)) == 0);
+	fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname);
+	fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE);
+	fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa));
+	fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg);
+	fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    spa_generate_guid(NULL));
 	VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS));
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
@@ -6981,7 +7725,7 @@
 	    offsetof(vdev_t, vdev_trim_node));
 
 	for (c = 0; c < children; c++) {
-		if (vml[c] != NULL) {
+		if (vml[c] != NULL && vml[c]->vdev_ops != &vdev_indirect_ops) {
 			mutex_enter(&vml[c]->vdev_initialize_lock);
 			vdev_initialize_stop(vml[c],
 			    VDEV_INITIALIZE_ACTIVE, &vd_initialize_list);
@@ -7000,6 +7744,7 @@
 	list_destroy(&vd_trim_list);
 
 	newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT;
+	newspa->spa_is_splitting = B_TRUE;
 
 	/* create the new pool from the disks of the original pool */
 	error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE);
@@ -7008,10 +7753,9 @@
 
 	/* if that worked, generate a real config for the new pool */
 	if (newspa->spa_root_vdev != NULL) {
-		VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
-		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
-		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
+		newspa->spa_config_splitting = fnvlist_alloc();
+		fnvlist_add_uint64(newspa->spa_config_splitting,
+		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa));
 		spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
 		    B_TRUE));
 	}
@@ -7041,7 +7785,7 @@
 	if (error != 0)
 		dmu_tx_abort(tx);
 	for (c = 0; c < children; c++) {
-		if (vml[c] != NULL) {
+		if (vml[c] != NULL && vml[c]->vdev_ops != &vdev_indirect_ops) {
 			vdev_t *tvd = vml[c]->vdev_top;
 
 			/*
@@ -7077,6 +7821,7 @@
 	spa_history_log_internal(newspa, "split", NULL,
 	    "from pool %s", spa_name(spa));
 
+	newspa->spa_is_splitting = B_FALSE;
 	kmem_free(vml, children * sizeof (vdev_t *));
 
 	/* if we're not going to mount the filesystems in userland, export */
@@ -7232,12 +7977,18 @@
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
+
+	/*
+	 * If a detach was not performed above replace waiters will not have
+	 * been notified.  In which case we must do so now.
+	 */
+	spa_notify_waiters(spa);
 }
 
 /*
  * Update the stored path or FRU for this vdev.
  */
-int
+static int
 spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
     boolean_t ispath)
 {
@@ -7361,6 +8112,9 @@
 		vd->vdev_stat.vs_checksum_errors = 0;
 
 		vdev_state_dirty(vd->vdev_top);
+
+		/* Tell userspace that the vdev is gone. */
+		zfs_post_remove(spa, vd);
 	}
 
 	for (int c = 0; c < vd->vdev_children; c++)
@@ -7420,12 +8174,16 @@
 		old_space = metaslab_class_get_space(spa_normal_class(spa));
 		old_space += metaslab_class_get_space(spa_special_class(spa));
 		old_space += metaslab_class_get_space(spa_dedup_class(spa));
+		old_space += metaslab_class_get_space(
+		    spa_embedded_log_class(spa));
 
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 
 		new_space = metaslab_class_get_space(spa_normal_class(spa));
 		new_space += metaslab_class_get_space(spa_special_class(spa));
 		new_space += metaslab_class_get_space(spa_dedup_class(spa));
+		new_space += metaslab_class_get_space(
+		    spa_embedded_log_class(spa));
 		mutex_exit(&spa_namespace_lock);
 
 		/*
@@ -7435,7 +8193,8 @@
 		if (new_space != old_space) {
 			spa_history_log_internal(spa, "vdev online", NULL,
 			    "pool '%s' size: %llu(+%llu)",
-			    spa_name(spa), new_space, new_space - old_space);
+			    spa_name(spa), (u_longlong_t)new_space,
+			    (u_longlong_t)(new_space - old_space));
 		}
 	}
 
@@ -7470,13 +8229,17 @@
 	/*
 	 * If any devices are done replacing, detach them.
 	 */
-	if (tasks & SPA_ASYNC_RESILVER_DONE)
+	if (tasks & SPA_ASYNC_RESILVER_DONE ||
+	    tasks & SPA_ASYNC_REBUILD_DONE ||
+	    tasks & SPA_ASYNC_DETACH_SPARE) {
 		spa_vdev_resilver_done(spa);
+	}
 
 	/*
 	 * Kick off a resilver.
 	 */
 	if (tasks & SPA_ASYNC_RESILVER &&
+	    !vdev_rebuild_active(spa->spa_root_vdev) &&
 	    (!dsl_scan_resilvering(dp) ||
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
 		dsl_scan_restart_resilver(dp, 0);
@@ -7506,6 +8269,28 @@
 	}
 
 	/*
+	 * Kick off L2 cache whole device TRIM.
+	 */
+	if (tasks & SPA_ASYNC_L2CACHE_TRIM) {
+		mutex_enter(&spa_namespace_lock);
+		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+		vdev_trim_l2arc(spa);
+		spa_config_exit(spa, SCL_CONFIG, FTAG);
+		mutex_exit(&spa_namespace_lock);
+	}
+
+	/*
+	 * Kick off L2 cache rebuilding.
+	 */
+	if (tasks & SPA_ASYNC_L2CACHE_REBUILD) {
+		mutex_enter(&spa_namespace_lock);
+		spa_config_enter(spa, SCL_L2ARC, FTAG, RW_READER);
+		l2arc_spa_rebuild_start(spa);
+		spa_config_exit(spa, SCL_L2ARC, FTAG);
+		mutex_exit(&spa_namespace_lock);
+	}
+
+	/*
 	 * Let the world know that we're done.
 	 */
 	mutex_enter(&spa->spa_async_lock);
@@ -7533,6 +8318,14 @@
 	zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr;
 	if (discard_thread != NULL)
 		zthr_cancel(discard_thread);
+
+	zthr_t *ll_delete_thread = spa->spa_livelist_delete_zthr;
+	if (ll_delete_thread != NULL)
+		zthr_cancel(ll_delete_thread);
+
+	zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
+	if (ll_condense_thread != NULL)
+		zthr_cancel(ll_condense_thread);
 }
 
 void
@@ -7551,6 +8344,14 @@
 	zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr;
 	if (discard_thread != NULL)
 		zthr_resume(discard_thread);
+
+	zthr_t *ll_delete_thread = spa->spa_livelist_delete_zthr;
+	if (ll_delete_thread != NULL)
+		zthr_resume(ll_delete_thread);
+
+	zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
+	if (ll_condense_thread != NULL)
+		zthr_resume(ll_condense_thread);
 }
 
 static boolean_t
@@ -7579,8 +8380,7 @@
 	mutex_enter(&spa->spa_async_lock);
 	if (spa_async_tasks_pending(spa) &&
 	    !spa->spa_async_suspended &&
-	    spa->spa_async_thread == NULL &&
-	    rootdir != NULL)
+	    spa->spa_async_thread == NULL)
 		spa->spa_async_thread = thread_create(NULL, 0,
 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
 	mutex_exit(&spa->spa_async_lock);
@@ -7607,24 +8407,46 @@
  * ==========================================================================
  */
 
+
 static int
-bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+bpobj_enqueue_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
 {
 	bpobj_t *bpo = arg;
-	bpobj_enqueue(bpo, bp, tx);
+	bpobj_enqueue(bpo, bp, bp_freed, tx);
 	return (0);
 }
 
+int
+bpobj_enqueue_alloc_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	return (bpobj_enqueue_cb(arg, bp, B_FALSE, tx));
+}
+
+int
+bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	return (bpobj_enqueue_cb(arg, bp, B_TRUE, tx));
+}
+
 static int
 spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
-	zio_t *zio = arg;
+	zio_t *pio = arg;
 
-	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
-	    zio->io_flags));
+	zio_nowait(zio_free_sync(pio, pio->io_spa, dmu_tx_get_txg(tx), bp,
+	    pio->io_flags));
 	return (0);
 }
 
+static int
+bpobj_spa_free_sync_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
+    dmu_tx_t *tx)
+{
+	ASSERT(!bp_freed);
+	return (spa_free_sync_cb(arg, bp, tx));
+}
+
 /*
  * Note: this simple function is not inlined to make it easier to dtrace the
  * amount of time spent syncing frees.
@@ -7647,9 +8469,21 @@
 	if (spa_sync_pass(spa) != 1)
 		return;
 
+	/*
+	 * Note:
+	 * If the log space map feature is active, we stop deferring
+	 * frees to the next TXG and therefore running this function
+	 * would be considered a no-op as spa_deferred_bpobj should
+	 * not have any entries.
+	 *
+	 * That said we run this function anyway (instead of returning
+	 * immediately) for the edge-case scenario where we just
+	 * activated the log space map feature in this TXG but we have
+	 * deferred frees from the previous TXG.
+	 */
 	zio_t *zio = zio_root(spa, NULL, NULL, 0);
 	VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
-	    spa_free_sync_cb, zio, tx), ==, 0);
+	    bpobj_spa_free_sync_cb, zio, tx), ==, 0);
 	VERIFY0(zio_wait(zio));
 }
 
@@ -7710,16 +8544,15 @@
 		    &sav->sav_object, tx) == 0);
 	}
 
-	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	nvroot = fnvlist_alloc();
 	if (sav->sav_count == 0) {
-		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
+		fnvlist_add_nvlist_array(nvroot, config, NULL, 0);
 	} else {
 		list = kmem_alloc(sav->sav_count*sizeof (void *), KM_SLEEP);
 		for (i = 0; i < sav->sav_count; i++)
 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
 			    B_FALSE, VDEV_CONFIG_L2CACHE);
-		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
-		    sav->sav_count) == 0);
+		fnvlist_add_nvlist_array(nvroot, config, list, sav->sav_count);
 		for (i = 0; i < sav->sav_count; i++)
 			nvlist_free(list[i]);
 		kmem_free(list, sav->sav_count * sizeof (void *));
@@ -7881,7 +8714,8 @@
 
 	spa->spa_uberblock.ub_version = version;
 	vdev_config_dirty(spa->spa_root_vdev);
-	spa_history_log_internal(spa, "set", tx, "version=%lld", version);
+	spa_history_log_internal(spa, "set", tx, "version=%lld",
+	    (longlong_t)version);
 }
 
 /*
@@ -7951,15 +8785,36 @@
 			spa->spa_comment = spa_strdup(strval);
 			/*
 			 * We need to dirty the configuration on all the vdevs
-			 * so that their labels get updated.  It's unnecessary
-			 * to do this for pool creation since the vdev's
-			 * configuration has already been dirtied.
+			 * so that their labels get updated.  We also need to
+			 * update the cache file to keep it in sync with the
+			 * MOS version. It's unnecessary to do this for pool
+			 * creation since the vdev's configuration has already
+			 * been dirtied.
 			 */
-			if (tx->tx_txg != TXG_INITIAL)
+			if (tx->tx_txg != TXG_INITIAL) {
 				vdev_config_dirty(spa->spa_root_vdev);
+				spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+			}
 			spa_history_log_internal(spa, "set", tx,
 			    "%s=%s", nvpair_name(elem), strval);
 			break;
+		case ZPOOL_PROP_COMPATIBILITY:
+			strval = fnvpair_value_string(elem);
+			if (spa->spa_compatibility != NULL)
+				spa_strfree(spa->spa_compatibility);
+			spa->spa_compatibility = spa_strdup(strval);
+			/*
+			 * Dirty the configuration on vdevs as above.
+			 */
+			if (tx->tx_txg != TXG_INITIAL) {
+				vdev_config_dirty(spa->spa_root_vdev);
+				spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+			}
+
+			spa_history_log_internal(spa, "set", tx,
+			    "%s=%s", nvpair_name(elem), strval);
+			break;
+
 		default:
 			/*
 			 * Set pool property values in the poolprops mos object.
@@ -7995,7 +8850,8 @@
 				    spa->spa_pool_props_object, propname,
 				    8, 1, &intval, tx));
 				spa_history_log_internal(spa, "set", tx,
-				    "%s=%lld", nvpair_name(elem), intval);
+				    "%s=%lld", nvpair_name(elem),
+				    (longlong_t)intval);
 			} else {
 				ASSERT(0); /* not allowed */
 			}
@@ -8024,9 +8880,6 @@
 			case ZPOOL_PROP_MULTIHOST:
 				spa->spa_multihost = intval;
 				break;
-			case ZPOOL_PROP_DEDUPDITTO:
-				spa->spa_dedup_ditto = intval;
-				break;
 			default:
 				break;
 			}
@@ -8114,8 +8967,8 @@
 static void
 vdev_indirect_state_sync_verify(vdev_t *vd)
 {
-	ASSERTV(vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping);
-	ASSERTV(vdev_indirect_births_t *vib = vd->vdev_indirect_births);
+	vdev_indirect_mapping_t *vim __maybe_unused = vd->vdev_indirect_mapping;
+	vdev_indirect_births_t *vib __maybe_unused = vd->vdev_indirect_births;
 
 	if (vd->vdev_ops == &vdev_indirect_ops) {
 		ASSERT(vim != NULL);
@@ -8179,25 +9032,32 @@
 		 * allocations look at mg_max_alloc_queue_depth, and async
 		 * allocations all happen from spa_sync().
 		 */
-		for (int i = 0; i < spa->spa_alloc_count; i++)
+		for (int i = 0; i < mg->mg_allocators; i++) {
 			ASSERT0(zfs_refcount_count(
-			    &(mg->mg_alloc_queue_depth[i])));
+			    &(mg->mg_allocator[i].mga_alloc_queue_depth)));
+		}
 		mg->mg_max_alloc_queue_depth = max_queue_depth;
 
-		for (int i = 0; i < spa->spa_alloc_count; i++) {
-			mg->mg_cur_max_alloc_queue_depth[i] =
+		for (int i = 0; i < mg->mg_allocators; i++) {
+			mg->mg_allocator[i].mga_cur_max_alloc_queue_depth =
 			    zfs_vdev_def_queue_depth;
 		}
 		slots_per_allocator += zfs_vdev_def_queue_depth;
 	}
 
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
-		ASSERT0(zfs_refcount_count(&normal->mc_alloc_slots[i]));
-		ASSERT0(zfs_refcount_count(&special->mc_alloc_slots[i]));
-		ASSERT0(zfs_refcount_count(&dedup->mc_alloc_slots[i]));
-		normal->mc_alloc_max_slots[i] = slots_per_allocator;
-		special->mc_alloc_max_slots[i] = slots_per_allocator;
-		dedup->mc_alloc_max_slots[i] = slots_per_allocator;
+		ASSERT0(zfs_refcount_count(&normal->mc_allocator[i].
+		    mca_alloc_slots));
+		ASSERT0(zfs_refcount_count(&special->mc_allocator[i].
+		    mca_alloc_slots));
+		ASSERT0(zfs_refcount_count(&dedup->mc_allocator[i].
+		    mca_alloc_slots));
+		normal->mc_allocator[i].mca_alloc_max_slots =
+		    slots_per_allocator;
+		special->mc_allocator[i].mca_alloc_max_slots =
+		    slots_per_allocator;
+		dedup->mc_allocator[i].mca_alloc_max_slots =
+		    slots_per_allocator;
 	}
 	normal->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
 	special->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
@@ -8240,7 +9100,14 @@
 		spa_errlog_sync(spa, txg);
 		dsl_pool_sync(dp, txg);
 
-		if (pass < zfs_sync_pass_deferred_free) {
+		if (pass < zfs_sync_pass_deferred_free ||
+		    spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
+			/*
+			 * If the log space map feature is active we don't
+			 * care about deferred frees and the deferred bpobj
+			 * as the log space map should effectively have the
+			 * same results (i.e. appending only to one object).
+			 */
 			spa_sync_frees(spa, free_bpl, tx);
 		} else {
 			/*
@@ -8248,7 +9115,7 @@
 			 * we sync the deferred frees later in pass 1.
 			 */
 			ASSERT3U(pass, >, 1);
-			bplist_iterate(free_bpl, bpobj_enqueue_cb,
+			bplist_iterate(free_bpl, bpobj_enqueue_alloc_cb,
 			    &spa->spa_deferred_bpobj, tx);
 		}
 
@@ -8257,6 +9124,8 @@
 		svr_sync(spa, tx);
 		spa_sync_upgrades(spa, tx);
 
+		spa_flush_metaslabs(spa, tx);
+
 		vdev_t *vd = NULL;
 		while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
 		    != NULL)
@@ -8318,7 +9187,7 @@
 			vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL };
 			int svdcount = 0;
 			int children = rvd->vdev_children;
-			int c0 = spa_get_random(children);
+			int c0 = random_in_range(children);
 
 			for (int c = 0; c < children; c++) {
 				vdev_t *vd =
@@ -8383,9 +9252,9 @@
 	spa->spa_sync_pass = 0;
 
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
-		mutex_enter(&spa->spa_alloc_locks[i]);
-		VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
-		mutex_exit(&spa->spa_alloc_locks[i]);
+		mutex_enter(&spa->spa_allocs[i].spaa_lock);
+		VERIFY0(avl_numnodes(&spa->spa_allocs[i].spaa_tree));
+		mutex_exit(&spa->spa_allocs[i].spaa_lock);
 	}
 
 	/*
@@ -8495,9 +9364,9 @@
 	dsl_pool_sync_done(dp, txg);
 
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
-		mutex_enter(&spa->spa_alloc_locks[i]);
-		VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
-		mutex_exit(&spa->spa_alloc_locks[i]);
+		mutex_enter(&spa->spa_allocs[i].spaa_lock);
+		VERIFY0(avl_numnodes(&spa->spa_allocs[i].spaa_tree));
+		mutex_exit(&spa->spa_allocs[i].spaa_lock);
 	}
 
 	/*
@@ -8507,6 +9376,11 @@
 	    != NULL)
 		vdev_sync_done(vd, txg);
 
+	metaslab_class_evict_old(spa->spa_normal_class, txg);
+	metaslab_class_evict_old(spa->spa_log_class, txg);
+
+	spa_sync_close_syncing_log_sm(spa);
+
 	spa_update_dspace(spa);
 
 	/*
@@ -8654,6 +9528,7 @@
 boolean_t
 spa_has_spare(spa_t *spa, uint64_t guid)
 {
+	(void) spa;
 	int i;
 	uint64_t spareguid;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
@@ -8692,6 +9567,308 @@
 	return (B_FALSE);
 }
 
+uint64_t
+spa_total_metaslabs(spa_t *spa)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+
+	uint64_t m = 0;
+	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+		vdev_t *vd = rvd->vdev_child[c];
+		if (!vdev_is_concrete(vd))
+			continue;
+		m += vd->vdev_ms_count;
+	}
+	return (m);
+}
+
+/*
+ * Notify any waiting threads that some activity has switched from being in-
+ * progress to not-in-progress so that the thread can wake up and determine
+ * whether it is finished waiting.
+ */
+void
+spa_notify_waiters(spa_t *spa)
+{
+	/*
+	 * Acquiring spa_activities_lock here prevents the cv_broadcast from
+	 * happening between the waiting thread's check and cv_wait.
+	 */
+	mutex_enter(&spa->spa_activities_lock);
+	cv_broadcast(&spa->spa_activities_cv);
+	mutex_exit(&spa->spa_activities_lock);
+}
+
+/*
+ * Notify any waiting threads that the pool is exporting, and then block until
+ * they are finished using the spa_t.
+ */
+void
+spa_wake_waiters(spa_t *spa)
+{
+	mutex_enter(&spa->spa_activities_lock);
+	spa->spa_waiters_cancel = B_TRUE;
+	cv_broadcast(&spa->spa_activities_cv);
+	while (spa->spa_waiters != 0)
+		cv_wait(&spa->spa_waiters_cv, &spa->spa_activities_lock);
+	spa->spa_waiters_cancel = B_FALSE;
+	mutex_exit(&spa->spa_activities_lock);
+}
+
+/* Whether the vdev or any of its descendants are being initialized/trimmed. */
+static boolean_t
+spa_vdev_activity_in_progress_impl(vdev_t *vd, zpool_wait_activity_t activity)
+{
+	spa_t *spa = vd->vdev_spa;
+
+	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER));
+	ASSERT(MUTEX_HELD(&spa->spa_activities_lock));
+	ASSERT(activity == ZPOOL_WAIT_INITIALIZE ||
+	    activity == ZPOOL_WAIT_TRIM);
+
+	kmutex_t *lock = activity == ZPOOL_WAIT_INITIALIZE ?
+	    &vd->vdev_initialize_lock : &vd->vdev_trim_lock;
+
+	mutex_exit(&spa->spa_activities_lock);
+	mutex_enter(lock);
+	mutex_enter(&spa->spa_activities_lock);
+
+	boolean_t in_progress = (activity == ZPOOL_WAIT_INITIALIZE) ?
+	    (vd->vdev_initialize_state == VDEV_INITIALIZE_ACTIVE) :
+	    (vd->vdev_trim_state == VDEV_TRIM_ACTIVE);
+	mutex_exit(lock);
+
+	if (in_progress)
+		return (B_TRUE);
+
+	for (int i = 0; i < vd->vdev_children; i++) {
+		if (spa_vdev_activity_in_progress_impl(vd->vdev_child[i],
+		    activity))
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * If use_guid is true, this checks whether the vdev specified by guid is
+ * being initialized/trimmed. Otherwise, it checks whether any vdev in the pool
+ * is being initialized/trimmed. The caller must hold the config lock and
+ * spa_activities_lock.
+ */
+static int
+spa_vdev_activity_in_progress(spa_t *spa, boolean_t use_guid, uint64_t guid,
+    zpool_wait_activity_t activity, boolean_t *in_progress)
+{
+	mutex_exit(&spa->spa_activities_lock);
+	spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
+	mutex_enter(&spa->spa_activities_lock);
+
+	vdev_t *vd;
+	if (use_guid) {
+		vd = spa_lookup_by_guid(spa, guid, B_FALSE);
+		if (vd == NULL || !vd->vdev_ops->vdev_op_leaf) {
+			spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+			return (EINVAL);
+		}
+	} else {
+		vd = spa->spa_root_vdev;
+	}
+
+	*in_progress = spa_vdev_activity_in_progress_impl(vd, activity);
+
+	spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+	return (0);
+}
+
+/*
+ * Locking for waiting threads
+ * ---------------------------
+ *
+ * Waiting threads need a way to check whether a given activity is in progress,
+ * and then, if it is, wait for it to complete. Each activity will have some
+ * in-memory representation of the relevant on-disk state which can be used to
+ * determine whether or not the activity is in progress. The in-memory state and
+ * the locking used to protect it will be different for each activity, and may
+ * not be suitable for use with a cvar (e.g., some state is protected by the
+ * config lock). To allow waiting threads to wait without any races, another
+ * lock, spa_activities_lock, is used.
+ *
+ * When the state is checked, both the activity-specific lock (if there is one)
+ * and spa_activities_lock are held. In some cases, the activity-specific lock
+ * is acquired explicitly (e.g. the config lock). In others, the locking is
+ * internal to some check (e.g. bpobj_is_empty). After checking, the waiting
+ * thread releases the activity-specific lock and, if the activity is in
+ * progress, then cv_waits using spa_activities_lock.
+ *
+ * The waiting thread is woken when another thread, one completing some
+ * activity, updates the state of the activity and then calls
+ * spa_notify_waiters, which will cv_broadcast. This 'completing' thread only
+ * needs to hold its activity-specific lock when updating the state, and this
+ * lock can (but doesn't have to) be dropped before calling spa_notify_waiters.
+ *
+ * Because spa_notify_waiters acquires spa_activities_lock before broadcasting,
+ * and because it is held when the waiting thread checks the state of the
+ * activity, it can never be the case that the completing thread both updates
+ * the activity state and cv_broadcasts in between the waiting thread's check
+ * and cv_wait. Thus, a waiting thread can never miss a wakeup.
+ *
+ * In order to prevent deadlock, when the waiting thread does its check, in some
+ * cases it will temporarily drop spa_activities_lock in order to acquire the
+ * activity-specific lock. The order in which spa_activities_lock and the
+ * activity specific lock are acquired in the waiting thread is determined by
+ * the order in which they are acquired in the completing thread; if the
+ * completing thread calls spa_notify_waiters with the activity-specific lock
+ * held, then the waiting thread must also acquire the activity-specific lock
+ * first.
+ */
+
+static int
+spa_activity_in_progress(spa_t *spa, zpool_wait_activity_t activity,
+    boolean_t use_tag, uint64_t tag, boolean_t *in_progress)
+{
+	int error = 0;
+
+	ASSERT(MUTEX_HELD(&spa->spa_activities_lock));
+
+	switch (activity) {
+	case ZPOOL_WAIT_CKPT_DISCARD:
+		*in_progress =
+		    (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT) &&
+		    zap_contains(spa_meta_objset(spa),
+		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ZPOOL_CHECKPOINT) ==
+		    ENOENT);
+		break;
+	case ZPOOL_WAIT_FREE:
+		*in_progress = ((spa_version(spa) >= SPA_VERSION_DEADLISTS &&
+		    !bpobj_is_empty(&spa->spa_dsl_pool->dp_free_bpobj)) ||
+		    spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY) ||
+		    spa_livelist_delete_check(spa));
+		break;
+	case ZPOOL_WAIT_INITIALIZE:
+	case ZPOOL_WAIT_TRIM:
+		error = spa_vdev_activity_in_progress(spa, use_tag, tag,
+		    activity, in_progress);
+		break;
+	case ZPOOL_WAIT_REPLACE:
+		mutex_exit(&spa->spa_activities_lock);
+		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
+		mutex_enter(&spa->spa_activities_lock);
+
+		*in_progress = vdev_replace_in_progress(spa->spa_root_vdev);
+		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+		break;
+	case ZPOOL_WAIT_REMOVE:
+		*in_progress = (spa->spa_removing_phys.sr_state ==
+		    DSS_SCANNING);
+		break;
+	case ZPOOL_WAIT_RESILVER:
+		if ((*in_progress = vdev_rebuild_active(spa->spa_root_vdev)))
+			break;
+		fallthrough;
+	case ZPOOL_WAIT_SCRUB:
+	{
+		boolean_t scanning, paused, is_scrub;
+		dsl_scan_t *scn =  spa->spa_dsl_pool->dp_scan;
+
+		is_scrub = (scn->scn_phys.scn_func == POOL_SCAN_SCRUB);
+		scanning = (scn->scn_phys.scn_state == DSS_SCANNING);
+		paused = dsl_scan_is_paused_scrub(scn);
+		*in_progress = (scanning && !paused &&
+		    is_scrub == (activity == ZPOOL_WAIT_SCRUB));
+		break;
+	}
+	default:
+		panic("unrecognized value for activity %d", activity);
+	}
+
+	return (error);
+}
+
+static int
+spa_wait_common(const char *pool, zpool_wait_activity_t activity,
+    boolean_t use_tag, uint64_t tag, boolean_t *waited)
+{
+	/*
+	 * The tag is used to distinguish between instances of an activity.
+	 * 'initialize' and 'trim' are the only activities that we use this for.
+	 * The other activities can only have a single instance in progress in a
+	 * pool at one time, making the tag unnecessary.
+	 *
+	 * There can be multiple devices being replaced at once, but since they
+	 * all finish once resilvering finishes, we don't bother keeping track
+	 * of them individually, we just wait for them all to finish.
+	 */
+	if (use_tag && activity != ZPOOL_WAIT_INITIALIZE &&
+	    activity != ZPOOL_WAIT_TRIM)
+		return (EINVAL);
+
+	if (activity < 0 || activity >= ZPOOL_WAIT_NUM_ACTIVITIES)
+		return (EINVAL);
+
+	spa_t *spa;
+	int error = spa_open(pool, &spa, FTAG);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Increment the spa's waiter count so that we can call spa_close and
+	 * still ensure that the spa_t doesn't get freed before this thread is
+	 * finished with it when the pool is exported. We want to call spa_close
+	 * before we start waiting because otherwise the additional ref would
+	 * prevent the pool from being exported or destroyed throughout the
+	 * potentially long wait.
+	 */
+	mutex_enter(&spa->spa_activities_lock);
+	spa->spa_waiters++;
+	spa_close(spa, FTAG);
+
+	*waited = B_FALSE;
+	for (;;) {
+		boolean_t in_progress;
+		error = spa_activity_in_progress(spa, activity, use_tag, tag,
+		    &in_progress);
+
+		if (error || !in_progress || spa->spa_waiters_cancel)
+			break;
+
+		*waited = B_TRUE;
+
+		if (cv_wait_sig(&spa->spa_activities_cv,
+		    &spa->spa_activities_lock) == 0) {
+			error = EINTR;
+			break;
+		}
+	}
+
+	spa->spa_waiters--;
+	cv_signal(&spa->spa_waiters_cv);
+	mutex_exit(&spa->spa_activities_lock);
+
+	return (error);
+}
+
+/*
+ * Wait for a particular instance of the specified activity to complete, where
+ * the instance is identified by 'tag'
+ */
+int
+spa_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
+    boolean_t *waited)
+{
+	return (spa_wait_common(pool, activity, B_TRUE, tag, waited));
+}
+
+/*
+ * Wait for all instances of the specified activity complete
+ */
+int
+spa_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
+{
+
+	return (spa_wait_common(pool, activity, B_FALSE, 0, waited));
+}
+
 sysevent_t *
 spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name)
 {
@@ -8704,6 +9881,8 @@
 		ev = kmem_alloc(sizeof (sysevent_t), KM_SLEEP);
 		ev->resource = resource;
 	}
+#else
+	(void) spa, (void) vd, (void) hist_nvl, (void) name;
 #endif
 	return (ev);
 }
@@ -8716,6 +9895,8 @@
 		zfs_zevent_post(ev->resource, NULL, zfs_zevent_post_cb);
 		kmem_free(ev, sizeof (*ev));
 	}
+#else
+	(void) ev;
 #endif
 }
 
@@ -8732,7 +9913,6 @@
 	spa_event_post(spa_event_create(spa, vd, hist_nvl, name));
 }
 
-#if defined(_KERNEL)
 /* state manipulation functions */
 EXPORT_SYMBOL(spa_open);
 EXPORT_SYMBOL(spa_open_rewind);
@@ -8787,37 +9967,44 @@
 
 /* asynchronous event notification */
 EXPORT_SYMBOL(spa_event_notify);
-#endif
 
-#if defined(_KERNEL)
 /* BEGIN CSTYLED */
-module_param(spa_load_verify_shift, int, 0644);
-MODULE_PARM_DESC(spa_load_verify_shift, "log2(fraction of arc that can "
-	"be used by inflight I/Os when verifying pool during import");
-/* END CSTYLED */
+ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, INT, ZMOD_RW,
+	"log2 fraction of arc that can be used by inflight I/Os when "
+	"verifying pool during import");
 
-module_param(spa_load_verify_metadata, int, 0644);
-MODULE_PARM_DESC(spa_load_verify_metadata,
+ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_metadata, INT, ZMOD_RW,
 	"Set to traverse metadata on pool import");
 
-module_param(spa_load_verify_data, int, 0644);
-MODULE_PARM_DESC(spa_load_verify_data,
+ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_data, INT, ZMOD_RW,
 	"Set to traverse data on pool import");
 
-module_param(spa_load_print_vdev_tree, int, 0644);
-MODULE_PARM_DESC(spa_load_print_vdev_tree,
+ZFS_MODULE_PARAM(zfs_spa, spa_, load_print_vdev_tree, INT, ZMOD_RW,
 	"Print vdev tree to zfs_dbgmsg during pool import");
 
-/* CSTYLED */
-module_param(zio_taskq_batch_pct, uint, 0444);
-MODULE_PARM_DESC(zio_taskq_batch_pct,
+ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_pct, UINT, ZMOD_RD,
 	"Percentage of CPUs to run an IO worker thread");
 
-/* BEGIN CSTYLED */
-module_param(zfs_max_missing_tvds, ulong, 0644);
-MODULE_PARM_DESC(zfs_max_missing_tvds,
-	"Allow importing pool with up to this number of missing top-level vdevs"
-	" (in read-only mode)");
-/* END CSTYLED */
+ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RD,
+	"Number of threads per IO worker taskqueue");
 
-#endif
+ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, ULONG, ZMOD_RW,
+	"Allow importing pool with up to this number of missing top-level "
+	"vdevs (in read-only mode)");
+
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT, ZMOD_RW,
+	"Set the livelist condense zthr to pause");
+
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_pause, INT, ZMOD_RW,
+	"Set the livelist condense synctask to pause");
+
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_cancel, INT, ZMOD_RW,
+	"Whether livelist condensing was canceled in the synctask");
+
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_cancel, INT, ZMOD_RW,
+	"Whether livelist condensing was canceled in the zthr function");
+
+ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT, ZMOD_RW,
+	"Whether extra ALLOC blkptrs were added to a livelist entry while it "
+	"was being condensed");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/spa_boot.c b/zfs/module/zfs/spa_boot.c
index be79542..6743946 100644
--- a/zfs/module/zfs/spa_boot.c
+++ b/zfs/module/zfs/spa_boot.c

@@ -27,7 +27,7 @@
 #ifdef _KERNEL
 
 #include <sys/zio.h>
-#include <sys/spa.h>
+#include <sys/spa_boot.h>
 #include <sys/sunddi.h>
 
 char *

diff --git a/zfs/module/zfs/spa_checkpoint.c b/zfs/module/zfs/spa_checkpoint.c
index 44711ac..ddcdb68 100644
--- a/zfs/module/zfs/spa_checkpoint.c
+++ b/zfs/module/zfs/spa_checkpoint.c

@@ -191,6 +191,7 @@
 	spa->spa_checkpoint_info.sci_timestamp = 0;
 
 	spa_feature_decr(spa, SPA_FEATURE_POOL_CHECKPOINT, tx);
+	spa_notify_waiters(spa);
 
 	spa_history_log_internal(spa, "spa discard checkpoint", tx,
 	    "finished discarding checkpointed state from the pool");
@@ -211,7 +212,7 @@
 	uint64_t end = sme->sme_offset + sme->sme_run;
 
 	if (sdc->sdc_entry_limit == 0)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 
 	/*
 	 * Since the space map is not condensed, we know that
@@ -336,17 +337,18 @@
 	spa_checkpoint_accounting_verify(vd->vdev_spa);
 #endif
 
-	zfs_dbgmsg("discarding checkpoint: txg %llu, vdev id %d, "
+	zfs_dbgmsg("discarding checkpoint: txg %llu, vdev id %lld, "
 	    "deleted %llu words - %llu words are left",
-	    tx->tx_txg, vd->vdev_id, (words_before - words_after),
-	    words_after);
+	    (u_longlong_t)tx->tx_txg, (longlong_t)vd->vdev_id,
+	    (u_longlong_t)(words_before - words_after),
+	    (u_longlong_t)words_after);
 
 	if (error != EINTR) {
 		if (error != 0) {
-			zfs_panic_recover("zfs: error %d was returned "
+			zfs_panic_recover("zfs: error %lld was returned "
 			    "while incrementally destroying the checkpoint "
-			    "space map of vdev %llu\n",
-			    error, vd->vdev_id);
+			    "space map of vdev %u\n",
+			    (longlong_t)error, vd->vdev_id);
 		}
 		ASSERT0(words_after);
 		ASSERT0(space_map_allocated(vd->vdev_checkpoint_sm));
@@ -378,10 +380,10 @@
 	return (B_TRUE);
 }
 
-/* ARGSUSED */
 boolean_t
 spa_checkpoint_discard_thread_check(void *arg, zthr_t *zthr)
 {
+	(void) zthr;
 	spa_t *spa = arg;
 
 	if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
@@ -448,10 +450,10 @@
 }
 
 
-/* ARGSUSED */
 static int
 spa_checkpoint_check(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_POOL_CHECKPOINT))
@@ -472,10 +474,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 spa_checkpoint_sync(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	spa_t *spa = dp->dp_spa;
 	uberblock_t checkpoint = spa->spa_ubsync;
@@ -524,7 +526,7 @@
 	spa_feature_incr(spa, SPA_FEATURE_POOL_CHECKPOINT, tx);
 
 	spa_history_log_internal(spa, "spa checkpoint", tx,
-	    "checkpointed uberblock txg=%llu", checkpoint.ub_txg);
+	    "checkpointed uberblock txg=%llu", (u_longlong_t)checkpoint.ub_txg);
 }
 
 /*
@@ -569,10 +571,10 @@
 	return (error);
 }
 
-/* ARGSUSED */
 static int
 spa_checkpoint_discard_check(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
@@ -587,10 +589,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 spa_checkpoint_discard_sync(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	VERIFY0(zap_remove(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
@@ -624,15 +626,12 @@
 	    ZFS_SPACE_CHECK_DISCARD_CHECKPOINT));
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(spa_checkpoint_get_stats);
 EXPORT_SYMBOL(spa_checkpoint_discard_thread);
 EXPORT_SYMBOL(spa_checkpoint_discard_thread_check);
 
 /* BEGIN CSTYLED */
-module_param(zfs_spa_discard_memory_limit, ulong, 0644);
-MODULE_PARM_DESC(zfs_spa_discard_memory_limit,
-    "Maximum memory for prefetching checkpoint space "
-    "map per top-level vdev while discarding checkpoint");
+ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, ULONG, ZMOD_RW,
+	"Limit for memory used in prefetching the checkpoint space map done "
+	"on each vdev while discarding the checkpoint");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/spa_config.c b/zfs/module/zfs/spa_config.c
index 8c7c149..c4282b0 100644
--- a/zfs/module/zfs/spa_config.c
+++ b/zfs/module/zfs/spa_config.c

@@ -22,23 +22,25 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 #include <sys/spa.h>
+#include <sys/file.h>
 #include <sys/fm/fs/zfs.h>
 #include <sys/spa_impl.h>
 #include <sys/nvpair.h>
-#include <sys/uio.h>
 #include <sys/fs/zfs.h>
 #include <sys/vdev_impl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/systeminfo.h>
 #include <sys/sunddi.h>
 #include <sys/zfeature.h>
+#include <sys/zfs_file.h>
+#include <sys/zfs_context.h>
 #ifdef _KERNEL
-#include <sys/kobj.h>
 #include <sys/zone.h>
 #endif
 
@@ -80,8 +82,10 @@
 	nvlist_t *nvlist, *child;
 	nvpair_t *nvpair;
 	char *pathname;
-	struct _buf *file;
+	zfs_file_t *fp;
+	zfs_file_attr_t zfa;
 	uint64_t fsize;
+	int err;
 
 #ifdef _KERNEL
 	if (zfs_autoimport_disable)
@@ -95,22 +99,27 @@
 
 	(void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path);
 
-	file = kobj_open_file(pathname);
+	err = zfs_file_open(pathname, O_RDONLY, 0, &fp);
 
+#ifdef __FreeBSD__
+	if (err)
+		err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp);
+#endif
 	kmem_free(pathname, MAXPATHLEN);
 
-	if (file == (struct _buf *)-1)
+	if (err)
 		return;
 
-	if (kobj_get_filesize(file, &fsize) != 0)
+	if (zfs_file_getattr(fp, &zfa))
 		goto out;
 
+	fsize = zfa.zfa_size;
 	buf = kmem_alloc(fsize, KM_SLEEP);
 
 	/*
 	 * Read the nvlist from the file.
 	 */
-	if (kobj_read_file(file, buf, fsize, 0) < 0)
+	if (zfs_file_read(fp, buf, fsize, NULL) < 0)
 		goto out;
 
 	/*
@@ -143,27 +152,32 @@
 	if (buf != NULL)
 		kmem_free(buf, fsize);
 
-	kobj_close_file(file);
+	zfs_file_close(fp);
 }
 
 static int
 spa_config_remove(spa_config_dirent_t *dp)
 {
-#if defined(__linux__) && defined(_KERNEL)
-	int error, flags = FWRITE | FTRUNC;
-	uio_seg_t seg = UIO_SYSSPACE;
-	vnode_t *vp;
+	int error = 0;
 
-	error = vn_open(dp->scd_path, seg, flags, 0644, &vp, 0, 0);
-	if (error == 0) {
-		(void) VOP_FSYNC(vp, FSYNC, kcred, NULL);
-		(void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
+	/*
+	 * Remove the cache file.  If zfs_file_unlink() in not supported by the
+	 * platform fallback to truncating the file which is functionally
+	 * equivalent.
+	 */
+	error = zfs_file_unlink(dp->scd_path);
+	if (error == EOPNOTSUPP) {
+		int flags = O_RDWR | O_TRUNC;
+		zfs_file_t *fp;
+
+		error = zfs_file_open(dp->scd_path, flags, 0644, &fp);
+		if (error == 0) {
+			(void) zfs_file_fsync(fp, O_SYNC);
+			(void) zfs_file_close(fp);
+		}
 	}
 
 	return (error);
-#else
-	return (vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE));
-#endif
 }
 
 static int
@@ -171,10 +185,10 @@
 {
 	size_t buflen;
 	char *buf;
-	vnode_t *vp;
-	int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
+	int oflags = O_RDWR | O_TRUNC | O_CREAT | O_LARGEFILE;
 	char *temp;
 	int err;
+	zfs_file_t *fp;
 
 	/*
 	 * If the nvlist is empty (NULL), then remove the old cachefile.
@@ -193,46 +207,22 @@
 	buf = fnvlist_pack(nvl, &buflen);
 	temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 
-#if defined(__linux__) && defined(_KERNEL)
 	/*
 	 * Write the configuration to disk.  Due to the complexity involved
 	 * in performing a rename and remove from within the kernel the file
 	 * is instead truncated and overwritten in place.  This way we always
 	 * have a consistent view of the data or a zero length file.
 	 */
-	err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0);
+	err = zfs_file_open(dp->scd_path, oflags, 0644, &fp);
 	if (err == 0) {
-		err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0,
-		    UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL);
+		err = zfs_file_write(fp, buf, buflen, NULL);
 		if (err == 0)
-			err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
+			err = zfs_file_fsync(fp, O_SYNC);
 
-		(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
+		zfs_file_close(fp);
 		if (err)
 			(void) spa_config_remove(dp);
 	}
-#else
-	/*
-	 * Write the configuration to disk.  We need to do the traditional
-	 * 'write to temporary file, sync, move over original' to make sure we
-	 * always have a consistent view of the data.
-	 */
-	(void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
-
-	err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
-	if (err == 0) {
-		err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
-		    0, RLIM64_INFINITY, kcred, NULL);
-		if (err == 0)
-			err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
-		if (err == 0)
-			err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
-		(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
-	}
-
-	(void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
-#endif
-
 	fnvlist_pack_free(buf, buflen);
 	kmem_free(temp, MAXPATHLEN);
 	return (err);
@@ -248,7 +238,8 @@
  * would be required.
  */
 void
-spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
+spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
+    boolean_t postblkidevent)
 {
 	spa_config_dirent_t *dp, *tdp;
 	nvlist_t *nvl;
@@ -258,7 +249,7 @@
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
-	if (rootdir == NULL || !(spa_mode_global & FWRITE))
+	if (!(spa_mode_global & SPA_MODE_WRITE))
 		return;
 
 	/*
@@ -325,8 +316,9 @@
 		 * resource issues are resolved.
 		 */
 		if (target->spa_ccw_fail_time == 0) {
-			zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
-			    target, NULL, NULL, NULL, 0, 0);
+			(void) zfs_ereport_post(
+			    FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
+			    target, NULL, NULL, NULL, 0);
 		}
 		target->spa_ccw_fail_time = gethrtime();
 		spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
@@ -353,6 +345,18 @@
 
 	if (postsysevent)
 		spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC);
+
+	/*
+	 * Post udev event to sync blkid information if the pool is created
+	 * or a new vdev is added to the pool.
+	 */
+	if ((target->spa_root_vdev) && postblkidevent) {
+		vdev_post_kobj_evt(target->spa_root_vdev);
+		for (int i = 0; i < target->spa_l2cache.sav_count; i++)
+			vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]);
+		for (int i = 0; i < target->spa_spares.sav_count; i++)
+			vdev_post_kobj_evt(target->spa_spares.sav_vdevs[i]);
+	}
 }
 
 /*
@@ -456,6 +460,9 @@
 	if (spa->spa_comment != NULL)
 		fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT,
 		    spa->spa_comment);
+	if (spa->spa_compatibility != NULL)
+		fnvlist_add_string(config, ZPOOL_CONFIG_COMPATIBILITY,
+		    spa->spa_compatibility);
 
 	hostid = spa_get_hostid(spa);
 	if (hostid != 0)
@@ -604,6 +611,7 @@
 	 */
 	if (!spa->spa_is_root) {
 		spa_write_cachefile(spa, B_FALSE,
+		    what != SPA_CONFIG_UPDATE_POOL,
 		    what != SPA_CONFIG_UPDATE_POOL);
 	}
 
@@ -611,17 +619,19 @@
 		spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(spa_config_load);
 EXPORT_SYMBOL(spa_all_configs);
 EXPORT_SYMBOL(spa_config_set);
 EXPORT_SYMBOL(spa_config_generate);
 EXPORT_SYMBOL(spa_config_update);
 
-module_param(spa_config_path, charp, 0444);
-MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)");
-
-module_param(zfs_autoimport_disable, int, 0644);
-MODULE_PARM_DESC(zfs_autoimport_disable, "Disable pool import at module load");
-
+/* BEGIN CSTYLED */
+#ifdef __linux__
+/* string sysctls require a char array on FreeBSD */
+ZFS_MODULE_PARAM(zfs_spa, spa_, config_path, STRING, ZMOD_RD,
+	"SPA config file (/etc/zfs/zpool.cache)");
 #endif
+
+ZFS_MODULE_PARAM(zfs, zfs_, autoimport_disable, INT, ZMOD_RW,
+	"Disable pool import at module load");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/spa_errlog.c b/zfs/module/zfs/spa_errlog.c
index fa5120e..c6b28ea 100644
--- a/zfs/module/zfs/spa_errlog.c
+++ b/zfs/module/zfs/spa_errlog.c

@@ -252,6 +252,8 @@
 	mutex_exit(&spa->spa_errlist_lock);
 
 	mutex_exit(&spa->spa_errlog_lock);
+#else
+	(void) spa, (void) uaddr, (void) count;
 #endif
 
 	return (ret);

diff --git a/zfs/module/zfs/spa_history.c b/zfs/module/zfs/spa_history.c
index fa95d31..dae06e4 100644
--- a/zfs/module/zfs/spa_history.c
+++ b/zfs/module/zfs/spa_history.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
  */
@@ -180,16 +180,6 @@
 	return (0);
 }
 
-static char *
-spa_history_zone(void)
-{
-#ifdef _KERNEL
-	return ("linux");
-#else
-	return (NULL);
-#endif
-}
-
 /*
  * Post a history sysevent.
  *
@@ -298,7 +288,6 @@
 	}
 #endif
 
-	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
 	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename);
 
 	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
@@ -307,14 +296,17 @@
 	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
 		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
 			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
-			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
+			    (longlong_t)fnvlist_lookup_uint64(nvl,
+			    ZPOOL_HIST_TXG),
 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
-			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
+			    (u_longlong_t)fnvlist_lookup_uint64(nvl,
+			    ZPOOL_HIST_DSID),
 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
 		} else {
 			zfs_dbgmsg("txg %lld %s %s",
-			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
+			    (longlong_t)fnvlist_lookup_uint64(nvl,
+			    ZPOOL_HIST_TXG),
 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
 		}
@@ -331,7 +323,7 @@
 		 * posted as a result of the ZPOOL_HIST_CMD key being present
 		 * it would result in only one sysevent being posted with the
 		 * full command line arguments, requiring the consumer to know
-		 * how to parse and understand zfs(1M) command invocations.
+		 * how to parse and understand zfs(8) command invocations.
 		 */
 		spa_history_log_notify(spa, nvl);
 	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
@@ -406,14 +398,18 @@
 	}
 	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
 
+	/*
+	 * Since the history is recorded asynchronously, the effective time is
+	 * now, which may be considerably before the change is made on disk.
+	 */
+	fnvlist_add_uint64(nvarg, ZPOOL_HIST_TIME, gethrestime_sec());
+
 	/* Kick this off asynchronously; errors are ignored. */
-	dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
-	    nvarg, 0, ZFS_SPACE_CHECK_NONE, tx);
+	dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, tx);
 	dmu_tx_commit(tx);
 
 	/* spa_history_log_sync will free nvl */
 	return (err);
-
 }
 
 /*
@@ -534,16 +530,17 @@
 
 	msg = kmem_vasprintf(fmt, adx);
 	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
-	strfree(msg);
+	kmem_strfree(msg);
 
 	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
 	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
 
 	if (dmu_tx_is_syncing(tx)) {
 		spa_history_log_sync(nvl, tx);
 	} else {
 		dsl_sync_task_nowait(spa_get_dsl(spa),
-		    spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx);
+		    spa_history_log_sync, nvl, tx);
 	}
 	/* spa_history_log_sync() will free nvl */
 }
@@ -623,6 +620,14 @@
 	    u->nodename, u->release, u->version, u->machine);
 }
 
+#ifndef _KERNEL
+const char *
+spa_history_zone(void)
+{
+	return (NULL);
+}
+#endif
+
 #if defined(_KERNEL)
 EXPORT_SYMBOL(spa_history_create_obj);
 EXPORT_SYMBOL(spa_history_get);

diff --git a/zfs/module/zfs/spa_log_spacemap.c b/zfs/module/zfs/spa_log_spacemap.c
new file mode 100644
index 0000000..6a27f57
--- /dev/null
+++ b/zfs/module/zfs/spa_log_spacemap.c

@@ -0,0 +1,1400 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+ */
+
+#include <sys/dmu_objset.h>
+#include <sys/metaslab.h>
+#include <sys/metaslab_impl.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/spa_log_spacemap.h>
+#include <sys/vdev_impl.h>
+#include <sys/zap.h>
+
+/*
+ * Log Space Maps
+ *
+ * Log space maps are an optimization in ZFS metadata allocations for pools
+ * whose workloads are primarily random-writes. Random-write workloads are also
+ * typically random-free, meaning that they are freeing from locations scattered
+ * throughout the pool. This means that each TXG we will have to append some
+ * FREE records to almost every metaslab. With log space maps, we hold their
+ * changes in memory and log them altogether in one pool-wide space map on-disk
+ * for persistence. As more blocks are accumulated in the log space maps and
+ * more unflushed changes are accounted in memory, we flush a selected group
+ * of metaslabs every TXG to relieve memory pressure and potential overheads
+ * when loading the pool. Flushing a metaslab to disk relieves memory as we
+ * flush any unflushed changes from memory to disk (i.e. the metaslab's space
+ * map) and saves import time by making old log space maps obsolete and
+ * eventually destroying them. [A log space map is said to be obsolete when all
+ * its entries have made it to their corresponding metaslab space maps].
+ *
+ * == On disk data structures used ==
+ *
+ * - The pool has a new feature flag and a new entry in the MOS. The feature
+ *   is activated when we create the first log space map and remains active
+ *   for the lifetime of the pool. The new entry in the MOS Directory [refer
+ *   to DMU_POOL_LOG_SPACEMAP_ZAP] is populated with a ZAP whose key-value
+ *   pairs are of the form <key: txg, value: log space map object for that txg>.
+ *   This entry is our on-disk reference of the log space maps that exist in
+ *   the pool for each TXG and it is used during import to load all the
+ *   metaslab unflushed changes in memory. To see how this structure is first
+ *   created and later populated refer to spa_generate_syncing_log_sm(). To see
+ *   how it is used during import time refer to spa_ld_log_sm_metadata().
+ *
+ * - Each vdev has a new entry in its vdev_top_zap (see field
+ *   VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS) which holds the msp_unflushed_txg of
+ *   each metaslab in this vdev. This field is the on-disk counterpart of the
+ *   in-memory field ms_unflushed_txg which tells us from which TXG and onwards
+ *   the metaslab haven't had its changes flushed. During import, we use this
+ *   to ignore any entries in the space map log that are for this metaslab but
+ *   from a TXG before msp_unflushed_txg. At that point, we also populate its
+ *   in-memory counterpart and from there both fields are updated every time
+ *   we flush that metaslab.
+ *
+ * - A space map is created every TXG and, during that TXG, it is used to log
+ *   all incoming changes (the log space map). When created, the log space map
+ *   is referenced in memory by spa_syncing_log_sm and its object ID is inserted
+ *   to the space map ZAP mentioned above. The log space map is closed at the
+ *   end of the TXG and will be destroyed when it becomes fully obsolete. We
+ *   know when a log space map has become obsolete by looking at the oldest
+ *   (and smallest) ms_unflushed_txg in the pool. If the value of that is bigger
+ *   than the log space map's TXG, then it means that there is no metaslab who
+ *   doesn't have the changes from that log and we can therefore destroy it.
+ *   [see spa_cleanup_old_sm_logs()].
+ *
+ * == Important in-memory structures ==
+ *
+ * - The per-spa field spa_metaslabs_by_flushed sorts all the metaslabs in
+ *   the pool by their ms_unflushed_txg field. It is primarily used for three
+ *   reasons. First of all, it is used during flushing where we try to flush
+ *   metaslabs in-order from the oldest-flushed to the most recently flushed
+ *   every TXG. Secondly, it helps us to lookup the ms_unflushed_txg of the
+ *   oldest flushed metaslab to distinguish which log space maps have become
+ *   obsolete and which ones are still relevant. Finally it tells us which
+ *   metaslabs have unflushed changes in a pool where this feature was just
+ *   enabled, as we don't immediately add all of the pool's metaslabs but we
+ *   add them over time as they go through metaslab_sync(). The reason that
+ *   we do that is to ease these pools into the behavior of the flushing
+ *   algorithm (described later on).
+ *
+ * - The per-spa field spa_sm_logs_by_txg can be thought as the in-memory
+ *   counterpart of the space map ZAP mentioned above. It's an AVL tree whose
+ *   nodes represent the log space maps in the pool. This in-memory
+ *   representation of log space maps in the pool sorts the log space maps by
+ *   the TXG that they were created (which is also the TXG of their unflushed
+ *   changes). It also contains the following extra information for each
+ *   space map:
+ *   [1] The number of metaslabs that were last flushed on that TXG. This is
+ *       important because if that counter is zero and this is the oldest
+ *       log then it means that it is also obsolete.
+ *   [2] The number of blocks of that space map. This field is used by the
+ *       block heuristic of our flushing algorithm (described later on).
+ *       It represents how many blocks of metadata changes ZFS had to write
+ *       to disk for that TXG.
+ *
+ * - The per-spa field spa_log_summary is a list of entries that summarizes
+ *   the metaslab and block counts of all the nodes of the spa_sm_logs_by_txg
+ *   AVL tree mentioned above. The reason this exists is that our flushing
+ *   algorithm (described later) tries to estimate how many metaslabs to flush
+ *   in each TXG by iterating over all the log space maps and looking at their
+ *   block counts. Summarizing that information means that don't have to
+ *   iterate through each space map, minimizing the runtime overhead of the
+ *   flushing algorithm which would be induced in syncing context. In terms of
+ *   implementation the log summary is used as a queue:
+ *   * we modify or pop entries from its head when we flush metaslabs
+ *   * we modify or append entries to its tail when we sync changes.
+ *
+ * - Each metaslab has two new range trees that hold its unflushed changes,
+ *   ms_unflushed_allocs and ms_unflushed_frees. These are always disjoint.
+ *
+ * == Flushing algorithm ==
+ *
+ * The decision of how many metaslabs to flush on a give TXG is guided by
+ * two heuristics:
+ *
+ * [1] The memory heuristic -
+ * We keep track of the memory used by the unflushed trees from all the
+ * metaslabs [see sus_memused of spa_unflushed_stats] and we ensure that it
+ * stays below a certain threshold which is determined by an arbitrary hard
+ * limit and an arbitrary percentage of the system's memory [see
+ * spa_log_exceeds_memlimit()]. When we see that the memory usage of the
+ * unflushed changes are passing that threshold, we flush metaslabs, which
+ * empties their unflushed range trees, reducing the memory used.
+ *
+ * [2] The block heuristic -
+ * We try to keep the total number of blocks in the log space maps in check
+ * so the log doesn't grow indefinitely and we don't induce a lot of overhead
+ * when loading the pool. At the same time we don't want to flush a lot of
+ * metaslabs too often as this would defeat the purpose of the log space map.
+ * As a result we set a limit in the amount of blocks that we think it's
+ * acceptable for the log space maps to have and try not to cross it.
+ * [see sus_blocklimit from spa_unflushed_stats].
+ *
+ * In order to stay below the block limit every TXG we have to estimate how
+ * many metaslabs we need to flush based on the current rate of incoming blocks
+ * and our history of log space map blocks. The main idea here is to answer
+ * the question of how many metaslabs do we need to flush in order to get rid
+ * at least an X amount of log space map blocks. We can answer this question
+ * by iterating backwards from the oldest log space map to the newest one
+ * and looking at their metaslab and block counts. At this point the log summary
+ * mentioned above comes handy as it reduces the amount of things that we have
+ * to iterate (even though it may reduce the preciseness of our estimates due
+ * to its aggregation of data). So with that in mind, we project the incoming
+ * rate of the current TXG into the future and attempt to approximate how many
+ * metaslabs would we need to flush from now in order to avoid exceeding our
+ * block limit in different points in the future (granted that we would keep
+ * flushing the same number of metaslabs for every TXG). Then we take the
+ * maximum number from all these estimates to be on the safe side. For the
+ * exact implementation details of algorithm refer to
+ * spa_estimate_metaslabs_to_flush.
+ */
+
+/*
+ * This is used as the block size for the space maps used for the
+ * log space map feature. These space maps benefit from a bigger
+ * block size as we expect to be writing a lot of data to them at
+ * once.
+ */
+unsigned long zfs_log_sm_blksz = 1ULL << 17;
+
+/*
+ * Percentage of the overall system's memory that ZFS allows to be
+ * used for unflushed changes (e.g. the sum of size of all the nodes
+ * in the unflushed trees).
+ *
+ * Note that this value is calculated over 1000000 for finer granularity
+ * (thus the _ppm suffix; reads as "parts per million"). As an example,
+ * the default of 1000 allows 0.1% of memory to be used.
+ */
+unsigned long zfs_unflushed_max_mem_ppm = 1000;
+
+/*
+ * Specific hard-limit in memory that ZFS allows to be used for
+ * unflushed changes.
+ */
+unsigned long zfs_unflushed_max_mem_amt = 1ULL << 30;
+
+/*
+ * The following tunable determines the number of blocks that can be used for
+ * the log space maps. It is expressed as a percentage of the total number of
+ * metaslabs in the pool (i.e. the default of 400 means that the number of log
+ * blocks is capped at 4 times the number of metaslabs).
+ *
+ * This value exists to tune our flushing algorithm, with higher values
+ * flushing metaslabs less often (doing less I/Os) per TXG versus lower values
+ * flushing metaslabs more aggressively with the upside of saving overheads
+ * when loading the pool. Another factor in this tradeoff is that flushing
+ * less often can potentially lead to better utilization of the metaslab space
+ * map's block size as we accumulate more changes per flush.
+ *
+ * Given that this tunable indirectly controls the flush rate (metaslabs
+ * flushed per txg) and that's why making it a percentage in terms of the
+ * number of metaslabs in the pool makes sense here.
+ *
+ * As a rule of thumb we default this tunable to 400% based on the following:
+ *
+ * 1] Assuming a constant flush rate and a constant incoming rate of log blocks
+ *    it is reasonable to expect that the amount of obsolete entries changes
+ *    linearly from txg to txg (e.g. the oldest log should have the most
+ *    obsolete entries, and the most recent one the least). With this we could
+ *    say that, at any given time, about half of the entries in the whole space
+ *    map log are obsolete. Thus for every two entries for a metaslab in the
+ *    log space map, only one of them is valid and actually makes it to the
+ *    metaslab's space map.
+ *    [factor of 2]
+ * 2] Each entry in the log space map is guaranteed to be two words while
+ *    entries in metaslab space maps are generally single-word.
+ *    [an extra factor of 2 - 400% overall]
+ * 3] Even if [1] and [2] are slightly less than 2 each, we haven't taken into
+ *    account any consolidation of segments from the log space map to the
+ *    unflushed range trees nor their history (e.g. a segment being allocated,
+ *    then freed, then allocated again means 3 log space map entries but 0
+ *    metaslab space map entries). Depending on the workload, we've seen ~1.8
+ *    non-obsolete log space map entries per metaslab entry, for a total of
+ *    ~600%. Since most of these estimates though are workload dependent, we
+ *    default on 400% to be conservative.
+ *
+ *    Thus we could say that even in the worst
+ *    case of [1] and [2], the factor should end up being 4.
+ *
+ * That said, regardless of the number of metaslabs in the pool we need to
+ * provide upper and lower bounds for the log block limit.
+ * [see zfs_unflushed_log_block_{min,max}]
+ */
+unsigned long zfs_unflushed_log_block_pct = 400;
+
+/*
+ * If the number of metaslabs is small and our incoming rate is high, we could
+ * get into a situation that we are flushing all our metaslabs every TXG. Thus
+ * we always allow at least this many log blocks.
+ */
+unsigned long zfs_unflushed_log_block_min = 1000;
+
+/*
+ * If the log becomes too big, the import time of the pool can take a hit in
+ * terms of performance. Thus we have a hard limit in the size of the log in
+ * terms of blocks.
+ */
+static unsigned long zfs_unflushed_log_block_max = (1ULL << 17);
+
+/*
+ * Also we have a hard limit in the size of the log in terms of dirty TXGs.
+ */
+static unsigned long zfs_unflushed_log_txg_max = 1000;
+
+/*
+ * Max # of rows allowed for the log_summary. The tradeoff here is accuracy and
+ * stability of the flushing algorithm (longer summary) vs its runtime overhead
+ * (smaller summary is faster to traverse).
+ */
+unsigned long zfs_max_logsm_summary_length = 10;
+
+/*
+ * Tunable that sets the lower bound on the metaslabs to flush every TXG.
+ *
+ * Setting this to 0 has no effect since if the pool is idle we won't even be
+ * creating log space maps and therefore we won't be flushing. On the other
+ * hand if the pool has any incoming workload our block heuristic will start
+ * flushing metaslabs anyway.
+ *
+ * The point of this tunable is to be used in extreme cases where we really
+ * want to flush more metaslabs than our adaptable heuristic plans to flush.
+ */
+unsigned long zfs_min_metaslabs_to_flush = 1;
+
+/*
+ * Tunable that specifies how far in the past do we want to look when trying to
+ * estimate the incoming log blocks for the current TXG.
+ *
+ * Setting this too high may not only increase runtime but also minimize the
+ * effect of the incoming rates from the most recent TXGs as we take the
+ * average over all the blocks that we walk
+ * [see spa_estimate_incoming_log_blocks].
+ */
+unsigned long zfs_max_log_walking = 5;
+
+/*
+ * This tunable exists solely for testing purposes. It ensures that the log
+ * spacemaps are not flushed and destroyed during export in order for the
+ * relevant log spacemap import code paths to be tested (effectively simulating
+ * a crash).
+ */
+int zfs_keep_log_spacemaps_at_export = 0;
+
+static uint64_t
+spa_estimate_incoming_log_blocks(spa_t *spa)
+{
+	ASSERT3U(spa_sync_pass(spa), ==, 1);
+	uint64_t steps = 0, sum = 0;
+	for (spa_log_sm_t *sls = avl_last(&spa->spa_sm_logs_by_txg);
+	    sls != NULL && steps < zfs_max_log_walking;
+	    sls = AVL_PREV(&spa->spa_sm_logs_by_txg, sls)) {
+		if (sls->sls_txg == spa_syncing_txg(spa)) {
+			/*
+			 * skip the log created in this TXG as this would
+			 * make our estimations inaccurate.
+			 */
+			continue;
+		}
+		sum += sls->sls_nblocks;
+		steps++;
+	}
+	return ((steps > 0) ? DIV_ROUND_UP(sum, steps) : 0);
+}
+
+uint64_t
+spa_log_sm_blocklimit(spa_t *spa)
+{
+	return (spa->spa_unflushed_stats.sus_blocklimit);
+}
+
+void
+spa_log_sm_set_blocklimit(spa_t *spa)
+{
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
+		ASSERT0(spa_log_sm_blocklimit(spa));
+		return;
+	}
+
+	uint64_t msdcount = 0;
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e; e = list_next(&spa->spa_log_summary, e))
+		msdcount += e->lse_msdcount;
+
+	uint64_t limit = msdcount * zfs_unflushed_log_block_pct / 100;
+	spa->spa_unflushed_stats.sus_blocklimit = MIN(MAX(limit,
+	    zfs_unflushed_log_block_min), zfs_unflushed_log_block_max);
+}
+
+uint64_t
+spa_log_sm_nblocks(spa_t *spa)
+{
+	return (spa->spa_unflushed_stats.sus_nblocks);
+}
+
+/*
+ * Ensure that the in-memory log space map structures and the summary
+ * have the same block and metaslab counts.
+ */
+static void
+spa_log_summary_verify_counts(spa_t *spa)
+{
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+
+	if ((zfs_flags & ZFS_DEBUG_LOG_SPACEMAP) == 0)
+		return;
+
+	uint64_t ms_in_avl = avl_numnodes(&spa->spa_metaslabs_by_flushed);
+
+	uint64_t ms_in_summary = 0, blk_in_summary = 0;
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e; e = list_next(&spa->spa_log_summary, e)) {
+		ms_in_summary += e->lse_mscount;
+		blk_in_summary += e->lse_blkcount;
+	}
+
+	uint64_t ms_in_logs = 0, blk_in_logs = 0;
+	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+		ms_in_logs += sls->sls_mscount;
+		blk_in_logs += sls->sls_nblocks;
+	}
+
+	VERIFY3U(ms_in_logs, ==, ms_in_summary);
+	VERIFY3U(ms_in_logs, ==, ms_in_avl);
+	VERIFY3U(blk_in_logs, ==, blk_in_summary);
+	VERIFY3U(blk_in_logs, ==, spa_log_sm_nblocks(spa));
+}
+
+static boolean_t
+summary_entry_is_full(spa_t *spa, log_summary_entry_t *e, uint64_t txg)
+{
+	if (e->lse_end == txg)
+		return (0);
+	if (e->lse_txgcount >= DIV_ROUND_UP(zfs_unflushed_log_txg_max,
+	    zfs_max_logsm_summary_length))
+		return (1);
+	uint64_t blocks_per_row = MAX(1,
+	    DIV_ROUND_UP(spa_log_sm_blocklimit(spa),
+	    zfs_max_logsm_summary_length));
+	return (blocks_per_row <= e->lse_blkcount);
+}
+
+/*
+ * Update the log summary information to reflect the fact that a metaslab
+ * was flushed or destroyed (e.g due to device removal or pool export/destroy).
+ *
+ * We typically flush the oldest flushed metaslab so the first (and oldest)
+ * entry of the summary is updated. However if that metaslab is getting loaded
+ * we may flush the second oldest one which may be part of an entry later in
+ * the summary. Moreover, if we call into this function from metaslab_fini()
+ * the metaslabs probably won't be ordered by ms_unflushed_txg. Thus we ask
+ * for a txg as an argument so we can locate the appropriate summary entry for
+ * the metaslab.
+ */
+void
+spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg, boolean_t dirty)
+{
+	/*
+	 * We don't track summary data for read-only pools and this function
+	 * can be called from metaslab_fini(). In that case return immediately.
+	 */
+	if (!spa_writeable(spa))
+		return;
+
+	log_summary_entry_t *target = NULL;
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e != NULL; e = list_next(&spa->spa_log_summary, e)) {
+		if (e->lse_start > txg)
+			break;
+		target = e;
+	}
+
+	if (target == NULL || target->lse_mscount == 0) {
+		/*
+		 * We didn't find a summary entry for this metaslab. We must be
+		 * at the teardown of a spa_load() attempt that got an error
+		 * while reading the log space maps.
+		 */
+		VERIFY3S(spa_load_state(spa), ==, SPA_LOAD_ERROR);
+		return;
+	}
+
+	target->lse_mscount--;
+	if (dirty)
+		target->lse_msdcount--;
+}
+
+/*
+ * Update the log summary information to reflect the fact that we destroyed
+ * old log space maps. Since we can only destroy the oldest log space maps,
+ * we decrement the block count of the oldest summary entry and potentially
+ * destroy it when that count hits 0.
+ *
+ * This function is called after a metaslab is flushed and typically that
+ * metaslab is the oldest flushed, which means that this function will
+ * typically decrement the block count of the first entry of the summary and
+ * potentially free it if the block count gets to zero (its metaslab count
+ * should be zero too at that point).
+ *
+ * There are certain scenarios though that don't work exactly like that so we
+ * need to account for them:
+ *
+ * Scenario [1]: It is possible that after we flushed the oldest flushed
+ * metaslab and we destroyed the oldest log space map, more recent logs had 0
+ * metaslabs pointing to them so we got rid of them too. This can happen due
+ * to metaslabs being destroyed through device removal, or because the oldest
+ * flushed metaslab was loading but we kept flushing more recently flushed
+ * metaslabs due to the memory pressure of unflushed changes. Because of that,
+ * we always iterate from the beginning of the summary and if blocks_gone is
+ * bigger than the block_count of the current entry we free that entry (we
+ * expect its metaslab count to be zero), we decrement blocks_gone and on to
+ * the next entry repeating this procedure until blocks_gone gets decremented
+ * to 0. Doing this also works for the typical case mentioned above.
+ *
+ * Scenario [2]: The oldest flushed metaslab isn't necessarily accounted by
+ * the first (and oldest) entry in the summary. If the first few entries of
+ * the summary were only accounting metaslabs from a device that was just
+ * removed, then the current oldest flushed metaslab could be accounted by an
+ * entry somewhere in the middle of the summary. Moreover flushing that
+ * metaslab will destroy all the log space maps older than its ms_unflushed_txg
+ * because they became obsolete after the removal. Thus, iterating as we did
+ * for scenario [1] works out for this case too.
+ *
+ * Scenario [3]: At times we decide to flush all the metaslabs in the pool
+ * in one TXG (either because we are exporting the pool or because our flushing
+ * heuristics decided to do so). When that happens all the log space maps get
+ * destroyed except the one created for the current TXG which doesn't have
+ * any log blocks yet. As log space maps get destroyed with every metaslab that
+ * we flush, entries in the summary are also destroyed. This brings a weird
+ * corner-case when we flush the last metaslab and the log space map of the
+ * current TXG is in the same summary entry with other log space maps that
+ * are older. When that happens we are eventually left with this one last
+ * summary entry whose blocks are gone (blocks_gone equals the entry's block
+ * count) but its metaslab count is non-zero (because it accounts all the
+ * metaslabs in the pool as they all got flushed). Under this scenario we can't
+ * free this last summary entry as it's referencing all the metaslabs in the
+ * pool and its block count will get incremented at the end of this sync (when
+ * we close the syncing log space map). Thus we just decrement its current
+ * block count and leave it alone. In the case that the pool gets exported,
+ * its metaslab count will be decremented over time as we call metaslab_fini()
+ * for all the metaslabs in the pool and the entry will be freed at
+ * spa_unload_log_sm_metadata().
+ */
+void
+spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone)
+{
+	log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	if (e->lse_txgcount > 0)
+		e->lse_txgcount--;
+	for (; e != NULL; e = list_head(&spa->spa_log_summary)) {
+		if (e->lse_blkcount > blocks_gone) {
+			e->lse_blkcount -= blocks_gone;
+			blocks_gone = 0;
+			break;
+		} else if (e->lse_mscount == 0) {
+			/* remove obsolete entry */
+			blocks_gone -= e->lse_blkcount;
+			list_remove(&spa->spa_log_summary, e);
+			kmem_free(e, sizeof (log_summary_entry_t));
+		} else {
+			/* Verify that this is scenario [3] mentioned above. */
+			VERIFY3U(blocks_gone, ==, e->lse_blkcount);
+
+			/*
+			 * Assert that this is scenario [3] further by ensuring
+			 * that this is the only entry in the summary.
+			 */
+			VERIFY3P(e, ==, list_tail(&spa->spa_log_summary));
+			ASSERT3P(e, ==, list_head(&spa->spa_log_summary));
+
+			blocks_gone = e->lse_blkcount = 0;
+			break;
+		}
+	}
+
+	/*
+	 * Ensure that there is no way we are trying to remove more blocks
+	 * than the # of blocks in the summary.
+	 */
+	ASSERT0(blocks_gone);
+}
+
+void
+spa_log_sm_decrement_mscount(spa_t *spa, uint64_t txg)
+{
+	spa_log_sm_t target = { .sls_txg = txg };
+	spa_log_sm_t *sls = avl_find(&spa->spa_sm_logs_by_txg,
+	    &target, NULL);
+
+	if (sls == NULL) {
+		/*
+		 * We must be at the teardown of a spa_load() attempt that
+		 * got an error while reading the log space maps.
+		 */
+		VERIFY3S(spa_load_state(spa), ==, SPA_LOAD_ERROR);
+		return;
+	}
+
+	ASSERT(sls->sls_mscount > 0);
+	sls->sls_mscount--;
+}
+
+void
+spa_log_sm_increment_current_mscount(spa_t *spa)
+{
+	spa_log_sm_t *last_sls = avl_last(&spa->spa_sm_logs_by_txg);
+	ASSERT3U(last_sls->sls_txg, ==, spa_syncing_txg(spa));
+	last_sls->sls_mscount++;
+}
+
+static void
+summary_add_data(spa_t *spa, uint64_t txg, uint64_t metaslabs_flushed,
+    uint64_t metaslabs_dirty, uint64_t nblocks)
+{
+	log_summary_entry_t *e = list_tail(&spa->spa_log_summary);
+
+	if (e == NULL || summary_entry_is_full(spa, e, txg)) {
+		e = kmem_zalloc(sizeof (log_summary_entry_t), KM_SLEEP);
+		e->lse_start = e->lse_end = txg;
+		e->lse_txgcount = 1;
+		list_insert_tail(&spa->spa_log_summary, e);
+	}
+
+	ASSERT3U(e->lse_start, <=, txg);
+	if (e->lse_end < txg) {
+		e->lse_end = txg;
+		e->lse_txgcount++;
+	}
+	e->lse_mscount += metaslabs_flushed;
+	e->lse_msdcount += metaslabs_dirty;
+	e->lse_blkcount += nblocks;
+}
+
+static void
+spa_log_summary_add_incoming_blocks(spa_t *spa, uint64_t nblocks)
+{
+	summary_add_data(spa, spa_syncing_txg(spa), 0, 0, nblocks);
+}
+
+void
+spa_log_summary_add_flushed_metaslab(spa_t *spa, boolean_t dirty)
+{
+	summary_add_data(spa, spa_syncing_txg(spa), 1, dirty ? 1 : 0, 0);
+}
+
+void
+spa_log_summary_dirty_flushed_metaslab(spa_t *spa, uint64_t txg)
+{
+	log_summary_entry_t *target = NULL;
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e != NULL; e = list_next(&spa->spa_log_summary, e)) {
+		if (e->lse_start > txg)
+			break;
+		target = e;
+	}
+	ASSERT3P(target, !=, NULL);
+	ASSERT3U(target->lse_mscount, !=, 0);
+	target->lse_msdcount++;
+}
+
+/*
+ * This function attempts to estimate how many metaslabs should
+ * we flush to satisfy our block heuristic for the log spacemap
+ * for the upcoming TXGs.
+ *
+ * Specifically, it first tries to estimate the number of incoming
+ * blocks in this TXG. Then by projecting that incoming rate to
+ * future TXGs and using the log summary, it figures out how many
+ * flushes we would need to do for future TXGs individually to
+ * stay below our block limit and returns the maximum number of
+ * flushes from those estimates.
+ */
+static uint64_t
+spa_estimate_metaslabs_to_flush(spa_t *spa)
+{
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+	ASSERT3U(spa_sync_pass(spa), ==, 1);
+	ASSERT(spa_log_sm_blocklimit(spa) != 0);
+
+	/*
+	 * This variable contains the incoming rate that will be projected
+	 * and used for our flushing estimates in the future.
+	 */
+	uint64_t incoming = spa_estimate_incoming_log_blocks(spa);
+
+	/*
+	 * At any point in time this variable tells us how many
+	 * TXGs in the future we are so we can make our estimations.
+	 */
+	uint64_t txgs_in_future = 1;
+
+	/*
+	 * This variable tells us how much room do we have until we hit
+	 * our limit. When it goes negative, it means that we've exceeded
+	 * our limit and we need to flush.
+	 *
+	 * Note that since we start at the first TXG in the future (i.e.
+	 * txgs_in_future starts from 1) we already decrement this
+	 * variable by the incoming rate.
+	 */
+	int64_t available_blocks =
+	    spa_log_sm_blocklimit(spa) - spa_log_sm_nblocks(spa) - incoming;
+
+	int64_t available_txgs = zfs_unflushed_log_txg_max;
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e; e = list_next(&spa->spa_log_summary, e))
+		available_txgs -= e->lse_txgcount;
+
+	/*
+	 * This variable tells us the total number of flushes needed to
+	 * keep the log size within the limit when we reach txgs_in_future.
+	 */
+	uint64_t total_flushes = 0;
+
+	/* Holds the current maximum of our estimates so far. */
+	uint64_t max_flushes_pertxg = zfs_min_metaslabs_to_flush;
+
+	/*
+	 * For our estimations we only look as far in the future
+	 * as the summary allows us.
+	 */
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e; e = list_next(&spa->spa_log_summary, e)) {
+
+		/*
+		 * If there is still room before we exceed our limit
+		 * then keep skipping TXGs accumulating more blocks
+		 * based on the incoming rate until we exceed it.
+		 */
+		if (available_blocks >= 0 && available_txgs >= 0) {
+			uint64_t skip_txgs = (incoming == 0) ?
+			    available_txgs + 1 : MIN(available_txgs + 1,
+			    (available_blocks / incoming) + 1);
+			available_blocks -= (skip_txgs * incoming);
+			available_txgs -= skip_txgs;
+			txgs_in_future += skip_txgs;
+			ASSERT3S(available_blocks, >=, -incoming);
+			ASSERT3S(available_txgs, >=, -1);
+		}
+
+		/*
+		 * At this point we're far enough into the future where
+		 * the limit was just exceeded and we flush metaslabs
+		 * based on the current entry in the summary, updating
+		 * our available_blocks.
+		 */
+		ASSERT(available_blocks < 0 || available_txgs < 0);
+		available_blocks += e->lse_blkcount;
+		available_txgs += e->lse_txgcount;
+		total_flushes += e->lse_msdcount;
+
+		/*
+		 * Keep the running maximum of the total_flushes that
+		 * we've done so far over the number of TXGs in the
+		 * future that we are. The idea here is to estimate
+		 * the average number of flushes that we should do
+		 * every TXG so that when we are that many TXGs in the
+		 * future we stay under the limit.
+		 */
+		max_flushes_pertxg = MAX(max_flushes_pertxg,
+		    DIV_ROUND_UP(total_flushes, txgs_in_future));
+	}
+	return (max_flushes_pertxg);
+}
+
+uint64_t
+spa_log_sm_memused(spa_t *spa)
+{
+	return (spa->spa_unflushed_stats.sus_memused);
+}
+
+static boolean_t
+spa_log_exceeds_memlimit(spa_t *spa)
+{
+	if (spa_log_sm_memused(spa) > zfs_unflushed_max_mem_amt)
+		return (B_TRUE);
+
+	uint64_t system_mem_allowed = ((physmem * PAGESIZE) *
+	    zfs_unflushed_max_mem_ppm) / 1000000;
+	if (spa_log_sm_memused(spa) > system_mem_allowed)
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+boolean_t
+spa_flush_all_logs_requested(spa_t *spa)
+{
+	return (spa->spa_log_flushall_txg != 0);
+}
+
+void
+spa_flush_metaslabs(spa_t *spa, dmu_tx_t *tx)
+{
+	uint64_t txg = dmu_tx_get_txg(tx);
+
+	if (spa_sync_pass(spa) != 1)
+		return;
+
+	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+		return;
+
+	/*
+	 * If we don't have any metaslabs with unflushed changes
+	 * return immediately.
+	 */
+	if (avl_numnodes(&spa->spa_metaslabs_by_flushed) == 0)
+		return;
+
+	/*
+	 * During SPA export we leave a few empty TXGs to go by [see
+	 * spa_final_dirty_txg() to understand why]. For this specific
+	 * case, it is important to not flush any metaslabs as that
+	 * would dirty this TXG.
+	 *
+	 * That said, during one of these dirty TXGs that is less or
+	 * equal to spa_final_dirty(), spa_unload() will request that
+	 * we try to flush all the metaslabs for that TXG before
+	 * exporting the pool, thus we ensure that we didn't get a
+	 * request of flushing everything before we attempt to return
+	 * immediately.
+	 */
+	if (spa->spa_uberblock.ub_rootbp.blk_birth < txg &&
+	    !dmu_objset_is_dirty(spa_meta_objset(spa), txg) &&
+	    !spa_flush_all_logs_requested(spa))
+		return;
+
+	/*
+	 * We need to generate a log space map before flushing because this
+	 * will set up the in-memory data (i.e. node in spa_sm_logs_by_txg)
+	 * for this TXG's flushed metaslab count (aka sls_mscount which is
+	 * manipulated in many ways down the metaslab_flush() codepath).
+	 *
+	 * That is not to say that we may generate a log space map when we
+	 * don't need it. If we are flushing metaslabs, that means that we
+	 * were going to write changes to disk anyway, so even if we were
+	 * not flushing, a log space map would have been created anyway in
+	 * metaslab_sync().
+	 */
+	spa_generate_syncing_log_sm(spa, tx);
+
+	/*
+	 * This variable tells us how many metaslabs we want to flush based
+	 * on the block-heuristic of our flushing algorithm (see block comment
+	 * of log space map feature). We also decrement this as we flush
+	 * metaslabs and attempt to destroy old log space maps.
+	 */
+	uint64_t want_to_flush;
+	if (spa_flush_all_logs_requested(spa)) {
+		ASSERT3S(spa_state(spa), ==, POOL_STATE_EXPORTED);
+		want_to_flush = UINT64_MAX;
+	} else {
+		want_to_flush = spa_estimate_metaslabs_to_flush(spa);
+	}
+
+	/* Used purely for verification purposes */
+	uint64_t visited = 0;
+
+	/*
+	 * Ideally we would only iterate through spa_metaslabs_by_flushed
+	 * using only one variable (curr). We can't do that because
+	 * metaslab_flush() mutates position of curr in the AVL when
+	 * it flushes that metaslab by moving it to the end of the tree.
+	 * Thus we always keep track of the original next node of the
+	 * current node (curr) in another variable (next).
+	 */
+	metaslab_t *next = NULL;
+	for (metaslab_t *curr = avl_first(&spa->spa_metaslabs_by_flushed);
+	    curr != NULL; curr = next) {
+		next = AVL_NEXT(&spa->spa_metaslabs_by_flushed, curr);
+
+		/*
+		 * If this metaslab has been flushed this txg then we've done
+		 * a full circle over the metaslabs.
+		 */
+		if (metaslab_unflushed_txg(curr) == txg)
+			break;
+
+		/*
+		 * If we are done flushing for the block heuristic and the
+		 * unflushed changes don't exceed the memory limit just stop.
+		 */
+		if (want_to_flush == 0 && !spa_log_exceeds_memlimit(spa))
+			break;
+
+		if (metaslab_unflushed_dirty(curr)) {
+			mutex_enter(&curr->ms_sync_lock);
+			mutex_enter(&curr->ms_lock);
+			metaslab_flush(curr, tx);
+			mutex_exit(&curr->ms_lock);
+			mutex_exit(&curr->ms_sync_lock);
+			if (want_to_flush > 0)
+				want_to_flush--;
+		} else
+			metaslab_unflushed_bump(curr, tx, B_FALSE);
+
+		visited++;
+	}
+	ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=, visited);
+
+	spa_log_sm_set_blocklimit(spa);
+}
+
+/*
+ * Close the log space map for this TXG and update the block counts
+ * for the log's in-memory structure and the summary.
+ */
+void
+spa_sync_close_syncing_log_sm(spa_t *spa)
+{
+	if (spa_syncing_log_sm(spa) == NULL)
+		return;
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP));
+
+	spa_log_sm_t *sls = avl_last(&spa->spa_sm_logs_by_txg);
+	ASSERT3U(sls->sls_txg, ==, spa_syncing_txg(spa));
+
+	sls->sls_nblocks = space_map_nblocks(spa_syncing_log_sm(spa));
+	spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
+
+	/*
+	 * Note that we can't assert that sls_mscount is not 0,
+	 * because there is the case where the first metaslab
+	 * in spa_metaslabs_by_flushed is loading and we were
+	 * not able to flush any metaslabs the current TXG.
+	 */
+	ASSERT(sls->sls_nblocks != 0);
+
+	spa_log_summary_add_incoming_blocks(spa, sls->sls_nblocks);
+	spa_log_summary_verify_counts(spa);
+
+	space_map_close(spa->spa_syncing_log_sm);
+	spa->spa_syncing_log_sm = NULL;
+
+	/*
+	 * At this point we tried to flush as many metaslabs as we
+	 * can as the pool is getting exported. Reset the "flush all"
+	 * so the last few TXGs before closing the pool can be empty
+	 * (e.g. not dirty).
+	 */
+	if (spa_flush_all_logs_requested(spa)) {
+		ASSERT3S(spa_state(spa), ==, POOL_STATE_EXPORTED);
+		spa->spa_log_flushall_txg = 0;
+	}
+}
+
+void
+spa_cleanup_old_sm_logs(spa_t *spa, dmu_tx_t *tx)
+{
+	objset_t *mos = spa_meta_objset(spa);
+
+	uint64_t spacemap_zap;
+	int error = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_LOG_SPACEMAP_ZAP, sizeof (spacemap_zap), 1, &spacemap_zap);
+	if (error == ENOENT) {
+		ASSERT(avl_is_empty(&spa->spa_sm_logs_by_txg));
+		return;
+	}
+	VERIFY0(error);
+
+	metaslab_t *oldest = avl_first(&spa->spa_metaslabs_by_flushed);
+	uint64_t oldest_flushed_txg = metaslab_unflushed_txg(oldest);
+
+	/* Free all log space maps older than the oldest_flushed_txg. */
+	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+	    sls && sls->sls_txg < oldest_flushed_txg;
+	    sls = avl_first(&spa->spa_sm_logs_by_txg)) {
+		ASSERT0(sls->sls_mscount);
+		avl_remove(&spa->spa_sm_logs_by_txg, sls);
+		space_map_free_obj(mos, sls->sls_sm_obj, tx);
+		VERIFY0(zap_remove_int(mos, spacemap_zap, sls->sls_txg, tx));
+		spa_log_summary_decrement_blkcount(spa, sls->sls_nblocks);
+		spa->spa_unflushed_stats.sus_nblocks -= sls->sls_nblocks;
+		kmem_free(sls, sizeof (spa_log_sm_t));
+	}
+}
+
+static spa_log_sm_t *
+spa_log_sm_alloc(uint64_t sm_obj, uint64_t txg)
+{
+	spa_log_sm_t *sls = kmem_zalloc(sizeof (*sls), KM_SLEEP);
+	sls->sls_sm_obj = sm_obj;
+	sls->sls_txg = txg;
+	return (sls);
+}
+
+void
+spa_generate_syncing_log_sm(spa_t *spa, dmu_tx_t *tx)
+{
+	uint64_t txg = dmu_tx_get_txg(tx);
+	objset_t *mos = spa_meta_objset(spa);
+
+	if (spa_syncing_log_sm(spa) != NULL)
+		return;
+
+	if (!spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP))
+		return;
+
+	uint64_t spacemap_zap;
+	int error = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_LOG_SPACEMAP_ZAP, sizeof (spacemap_zap), 1, &spacemap_zap);
+	if (error == ENOENT) {
+		ASSERT(avl_is_empty(&spa->spa_sm_logs_by_txg));
+
+		error = 0;
+		spacemap_zap = zap_create(mos,
+		    DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
+		VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_LOG_SPACEMAP_ZAP, sizeof (spacemap_zap), 1,
+		    &spacemap_zap, tx));
+		spa_feature_incr(spa, SPA_FEATURE_LOG_SPACEMAP, tx);
+	}
+	VERIFY0(error);
+
+	uint64_t sm_obj;
+	ASSERT3U(zap_lookup_int_key(mos, spacemap_zap, txg, &sm_obj),
+	    ==, ENOENT);
+	sm_obj = space_map_alloc(mos, zfs_log_sm_blksz, tx);
+	VERIFY0(zap_add_int_key(mos, spacemap_zap, txg, sm_obj, tx));
+	avl_add(&spa->spa_sm_logs_by_txg, spa_log_sm_alloc(sm_obj, txg));
+
+	/*
+	 * We pass UINT64_MAX as the space map's representation size
+	 * and SPA_MINBLOCKSHIFT as the shift, to make the space map
+	 * accept any sorts of segments since there's no real advantage
+	 * to being more restrictive (given that we're already going
+	 * to be using 2-word entries).
+	 */
+	VERIFY0(space_map_open(&spa->spa_syncing_log_sm, mos, sm_obj,
+	    0, UINT64_MAX, SPA_MINBLOCKSHIFT));
+
+	spa_log_sm_set_blocklimit(spa);
+}
+
+/*
+ * Find all the log space maps stored in the space map ZAP and sort
+ * them by their TXG in spa_sm_logs_by_txg.
+ */
+static int
+spa_ld_log_sm_metadata(spa_t *spa)
+{
+	int error;
+	uint64_t spacemap_zap;
+
+	ASSERT(avl_is_empty(&spa->spa_sm_logs_by_txg));
+
+	error = zap_lookup(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_LOG_SPACEMAP_ZAP, sizeof (spacemap_zap), 1, &spacemap_zap);
+	if (error == ENOENT) {
+		/* the space map ZAP doesn't exist yet */
+		return (0);
+	} else if (error != 0) {
+		spa_load_failed(spa, "spa_ld_log_sm_metadata(): failed at "
+		    "zap_lookup(DMU_POOL_DIRECTORY_OBJECT) [error %d]",
+		    error);
+		return (error);
+	}
+
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	for (zap_cursor_init(&zc, spa_meta_objset(spa), spacemap_zap);
+	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
+	    zap_cursor_advance(&zc)) {
+		uint64_t log_txg = zfs_strtonum(za.za_name, NULL);
+		spa_log_sm_t *sls =
+		    spa_log_sm_alloc(za.za_first_integer, log_txg);
+		avl_add(&spa->spa_sm_logs_by_txg, sls);
+	}
+	zap_cursor_fini(&zc);
+	if (error != ENOENT) {
+		spa_load_failed(spa, "spa_ld_log_sm_metadata(): failed at "
+		    "zap_cursor_retrieve(spacemap_zap) [error %d]",
+		    error);
+		return (error);
+	}
+
+	for (metaslab_t *m = avl_first(&spa->spa_metaslabs_by_flushed);
+	    m; m = AVL_NEXT(&spa->spa_metaslabs_by_flushed, m)) {
+		spa_log_sm_t target = { .sls_txg = metaslab_unflushed_txg(m) };
+		spa_log_sm_t *sls = avl_find(&spa->spa_sm_logs_by_txg,
+		    &target, NULL);
+
+		/*
+		 * At this point if sls is zero it means that a bug occurred
+		 * in ZFS the last time the pool was open or earlier in the
+		 * import code path. In general, we would have placed a
+		 * VERIFY() here or in this case just let the kernel panic
+		 * with NULL pointer dereference when incrementing sls_mscount,
+		 * but since this is the import code path we can be a bit more
+		 * lenient. Thus, for DEBUG bits we always cause a panic, while
+		 * in production we log the error and just fail the import.
+		 */
+		ASSERT(sls != NULL);
+		if (sls == NULL) {
+			spa_load_failed(spa, "spa_ld_log_sm_metadata(): bug "
+			    "encountered: could not find log spacemap for "
+			    "TXG %ld [error %d]",
+			    metaslab_unflushed_txg(m), ENOENT);
+			return (ENOENT);
+		}
+		sls->sls_mscount++;
+	}
+
+	return (0);
+}
+
+typedef struct spa_ld_log_sm_arg {
+	spa_t *slls_spa;
+	uint64_t slls_txg;
+} spa_ld_log_sm_arg_t;
+
+static int
+spa_ld_log_sm_cb(space_map_entry_t *sme, void *arg)
+{
+	uint64_t offset = sme->sme_offset;
+	uint64_t size = sme->sme_run;
+	uint32_t vdev_id = sme->sme_vdev;
+
+	spa_ld_log_sm_arg_t *slls = arg;
+	spa_t *spa = slls->slls_spa;
+
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+
+	/*
+	 * If the vdev has been removed (i.e. it is indirect or a hole)
+	 * skip this entry. The contents of this vdev have already moved
+	 * elsewhere.
+	 */
+	if (!vdev_is_concrete(vd))
+		return (0);
+
+	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+	ASSERT(!ms->ms_loaded);
+
+	/*
+	 * If we have already flushed entries for this TXG to this
+	 * metaslab's space map, then ignore it. Note that we flush
+	 * before processing any allocations/frees for that TXG, so
+	 * the metaslab's space map only has entries from *before*
+	 * the unflushed TXG.
+	 */
+	if (slls->slls_txg < metaslab_unflushed_txg(ms))
+		return (0);
+
+	switch (sme->sme_type) {
+	case SM_ALLOC:
+		range_tree_remove_xor_add_segment(offset, offset + size,
+		    ms->ms_unflushed_frees, ms->ms_unflushed_allocs);
+		break;
+	case SM_FREE:
+		range_tree_remove_xor_add_segment(offset, offset + size,
+		    ms->ms_unflushed_allocs, ms->ms_unflushed_frees);
+		break;
+	default:
+		panic("invalid maptype_t");
+		break;
+	}
+	if (!metaslab_unflushed_dirty(ms)) {
+		metaslab_set_unflushed_dirty(ms, B_TRUE);
+		spa_log_summary_dirty_flushed_metaslab(spa,
+		    metaslab_unflushed_txg(ms));
+	}
+	return (0);
+}
+
+static int
+spa_ld_log_sm_data(spa_t *spa)
+{
+	spa_log_sm_t *sls, *psls;
+	int error = 0;
+
+	/*
+	 * If we are not going to do any writes there is no need
+	 * to read the log space maps.
+	 */
+	if (!spa_writeable(spa))
+		return (0);
+
+	ASSERT0(spa->spa_unflushed_stats.sus_nblocks);
+	ASSERT0(spa->spa_unflushed_stats.sus_memused);
+
+	hrtime_t read_logs_starttime = gethrtime();
+
+	/* Prefetch log spacemaps dnodes. */
+	for (sls = avl_first(&spa->spa_sm_logs_by_txg); sls;
+	    sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+		dmu_prefetch(spa_meta_objset(spa), sls->sls_sm_obj,
+		    0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+	}
+
+	uint_t pn = 0;
+	uint64_t ps = 0;
+	psls = sls = avl_first(&spa->spa_sm_logs_by_txg);
+	while (sls != NULL) {
+		/* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */
+		if (psls != NULL && pn < 16 &&
+		    (pn < 2 || ps < 2 * dmu_prefetch_max)) {
+			error = space_map_open(&psls->sls_sm,
+			    spa_meta_objset(spa), psls->sls_sm_obj, 0,
+			    UINT64_MAX, SPA_MINBLOCKSHIFT);
+			if (error != 0) {
+				spa_load_failed(spa, "spa_ld_log_sm_data(): "
+				    "failed at space_map_open(obj=%llu) "
+				    "[error %d]",
+				    (u_longlong_t)sls->sls_sm_obj, error);
+				goto out;
+			}
+			dmu_prefetch(spa_meta_objset(spa), psls->sls_sm_obj,
+			    0, 0, space_map_length(psls->sls_sm),
+			    ZIO_PRIORITY_ASYNC_READ);
+			pn++;
+			ps += space_map_length(psls->sls_sm);
+			psls = AVL_NEXT(&spa->spa_sm_logs_by_txg, psls);
+			continue;
+		}
+
+		/* Load TXG log spacemap into ms_unflushed_allocs/frees. */
+		cond_resched();
+		ASSERT0(sls->sls_nblocks);
+		sls->sls_nblocks = space_map_nblocks(sls->sls_sm);
+		spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
+		summary_add_data(spa, sls->sls_txg,
+		    sls->sls_mscount, 0, sls->sls_nblocks);
+
+		struct spa_ld_log_sm_arg vla = {
+			.slls_spa = spa,
+			.slls_txg = sls->sls_txg
+		};
+		error = space_map_iterate(sls->sls_sm,
+		    space_map_length(sls->sls_sm), spa_ld_log_sm_cb, &vla);
+		if (error != 0) {
+			spa_load_failed(spa, "spa_ld_log_sm_data(): failed "
+			    "at space_map_iterate(obj=%llu) [error %d]",
+			    (u_longlong_t)sls->sls_sm_obj, error);
+			goto out;
+		}
+
+		pn--;
+		ps -= space_map_length(sls->sls_sm);
+		space_map_close(sls->sls_sm);
+		sls->sls_sm = NULL;
+		sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls);
+
+		/* Update log block limits considering just loaded. */
+		spa_log_sm_set_blocklimit(spa);
+	}
+
+	hrtime_t read_logs_endtime = gethrtime();
+	spa_load_note(spa,
+	    "read %llu log space maps (%llu total blocks - blksz = %llu bytes) "
+	    "in %lld ms", (u_longlong_t)avl_numnodes(&spa->spa_sm_logs_by_txg),
+	    (u_longlong_t)spa_log_sm_nblocks(spa),
+	    (u_longlong_t)zfs_log_sm_blksz,
+	    (longlong_t)((read_logs_endtime - read_logs_starttime) / 1000000));
+
+out:
+	if (error != 0) {
+		for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+		    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+			if (sls->sls_sm) {
+				space_map_close(sls->sls_sm);
+				sls->sls_sm = NULL;
+			}
+		}
+	} else {
+		ASSERT0(pn);
+		ASSERT0(ps);
+	}
+	/*
+	 * Now that the metaslabs contain their unflushed changes:
+	 * [1] recalculate their actual allocated space
+	 * [2] recalculate their weights
+	 * [3] sum up the memory usage of their unflushed range trees
+	 * [4] optionally load them, if debug_load is set
+	 *
+	 * Note that even in the case where we get here because of an
+	 * error (e.g. error != 0), we still want to update the fields
+	 * below in order to have a proper teardown in spa_unload().
+	 */
+	for (metaslab_t *m = avl_first(&spa->spa_metaslabs_by_flushed);
+	    m != NULL; m = AVL_NEXT(&spa->spa_metaslabs_by_flushed, m)) {
+		mutex_enter(&m->ms_lock);
+		m->ms_allocated_space = space_map_allocated(m->ms_sm) +
+		    range_tree_space(m->ms_unflushed_allocs) -
+		    range_tree_space(m->ms_unflushed_frees);
+
+		vdev_t *vd = m->ms_group->mg_vd;
+		metaslab_space_update(vd, m->ms_group->mg_class,
+		    range_tree_space(m->ms_unflushed_allocs), 0, 0);
+		metaslab_space_update(vd, m->ms_group->mg_class,
+		    -range_tree_space(m->ms_unflushed_frees), 0, 0);
+
+		ASSERT0(m->ms_weight & METASLAB_ACTIVE_MASK);
+		metaslab_recalculate_weight_and_sort(m);
+
+		spa->spa_unflushed_stats.sus_memused +=
+		    metaslab_unflushed_changes_memused(m);
+
+		if (metaslab_debug_load && m->ms_sm != NULL) {
+			VERIFY0(metaslab_load(m));
+			metaslab_set_selected_txg(m, 0);
+		}
+		mutex_exit(&m->ms_lock);
+	}
+
+	return (error);
+}
+
+static int
+spa_ld_unflushed_txgs(vdev_t *vd)
+{
+	spa_t *spa = vd->vdev_spa;
+	objset_t *mos = spa_meta_objset(spa);
+
+	if (vd->vdev_top_zap == 0)
+		return (0);
+
+	uint64_t object = 0;
+	int error = zap_lookup(mos, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
+	    sizeof (uint64_t), 1, &object);
+	if (error == ENOENT)
+		return (0);
+	else if (error != 0) {
+		spa_load_failed(spa, "spa_ld_unflushed_txgs(): failed at "
+		    "zap_lookup(vdev_top_zap=%llu) [error %d]",
+		    (u_longlong_t)vd->vdev_top_zap, error);
+		return (error);
+	}
+
+	for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
+		metaslab_t *ms = vd->vdev_ms[m];
+		ASSERT(ms != NULL);
+
+		metaslab_unflushed_phys_t entry;
+		uint64_t entry_size = sizeof (entry);
+		uint64_t entry_offset = ms->ms_id * entry_size;
+
+		error = dmu_read(mos, object,
+		    entry_offset, entry_size, &entry, 0);
+		if (error != 0) {
+			spa_load_failed(spa, "spa_ld_unflushed_txgs(): "
+			    "failed at dmu_read(obj=%llu) [error %d]",
+			    (u_longlong_t)object, error);
+			return (error);
+		}
+
+		ms->ms_unflushed_txg = entry.msp_unflushed_txg;
+		ms->ms_unflushed_dirty = B_FALSE;
+		ASSERT(range_tree_is_empty(ms->ms_unflushed_allocs));
+		ASSERT(range_tree_is_empty(ms->ms_unflushed_frees));
+		if (ms->ms_unflushed_txg != 0) {
+			mutex_enter(&spa->spa_flushed_ms_lock);
+			avl_add(&spa->spa_metaslabs_by_flushed, ms);
+			mutex_exit(&spa->spa_flushed_ms_lock);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Read all the log space map entries into their respective
+ * metaslab unflushed trees and keep them sorted by TXG in the
+ * SPA's metadata. In addition, setup all the metadata for the
+ * memory and the block heuristics.
+ */
+int
+spa_ld_log_spacemaps(spa_t *spa)
+{
+	int error;
+
+	spa_log_sm_set_blocklimit(spa);
+
+	for (uint64_t c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
+		vdev_t *vd = spa->spa_root_vdev->vdev_child[c];
+		error = spa_ld_unflushed_txgs(vd);
+		if (error != 0)
+			return (error);
+	}
+
+	error = spa_ld_log_sm_metadata(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Note: we don't actually expect anything to change at this point
+	 * but we grab the config lock so we don't fail any assertions
+	 * when using vdev_lookup_top().
+	 */
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+	error = spa_ld_log_sm_data(spa);
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
+
+	return (error);
+}
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, ULONG, ZMOD_RW,
+    "Specific hard-limit in memory that ZFS allows to be used for "
+    "unflushed changes");
+
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_ppm, ULONG, ZMOD_RW,
+    "Percentage of the overall system memory that ZFS allows to be "
+    "used for unflushed changes (value is calculated over 1000000 for "
+    "finer granularity)");
+
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_max, ULONG, ZMOD_RW,
+    "Hard limit (upper-bound) in the size of the space map log "
+    "in terms of blocks.");
+
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, ULONG, ZMOD_RW,
+    "Lower-bound limit for the maximum amount of blocks allowed in "
+    "log spacemap (see zfs_unflushed_log_block_max)");
+
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, ULONG, ZMOD_RW,
+    "Hard limit (upper-bound) in the size of the space map log "
+    "in terms of dirty TXGs.");
+
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, ULONG, ZMOD_RW,
+    "Tunable used to determine the number of blocks that can be used for "
+    "the spacemap log, expressed as a percentage of the total number of "
+    "metaslabs in the pool (e.g. 400 means the number of log blocks is "
+    "capped at 4 times the number of metaslabs)");
+
+ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, ULONG, ZMOD_RW,
+    "The number of past TXGs that the flushing algorithm of the log "
+    "spacemap feature uses to estimate incoming log blocks");
+
+ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, ULONG, ZMOD_RW,
+    "Maximum number of rows allowed in the summary of the spacemap log");
+
+ZFS_MODULE_PARAM(zfs, zfs_, min_metaslabs_to_flush, ULONG, ZMOD_RW,
+    "Minimum number of metaslabs to flush per dirty TXG");
+
+ZFS_MODULE_PARAM(zfs, zfs_, keep_log_spacemaps_at_export, INT, ZMOD_RW,
+    "Prevent the log spacemaps from being flushed and destroyed "
+    "during pool export/destroy");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/spa_misc.c b/zfs/module/zfs/spa_misc.c
index ecdb3c6..1139430 100644
--- a/zfs/module/zfs/spa_misc.c
+++ b/zfs/module/zfs/spa_misc.c

@@ -20,12 +20,13 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2017 Datto Inc.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -58,13 +59,15 @@
 #include <sys/ddt.h>
 #include <sys/kstat.h>
 #include "zfs_prop.h"
+#include <sys/btree.h>
 #include <sys/zfeature.h>
-#include "qat.h"
+#include <sys/qat.h>
+#include <sys/zstd/zstd.h>
 
 /*
  * SPA locking
  *
- * There are four basic locks for managing spa_t structures:
+ * There are three basic locks for managing spa_t structures:
  *
  * spa_namespace_lock (global mutex)
  *
@@ -240,7 +243,7 @@
 static avl_tree_t spa_l2cache_avl;
 
 kmem_cache_t *spa_buffer_pool;
-int spa_mode_global;
+spa_mode_t spa_mode_global = SPA_MODE_UNINIT;
 
 #ifdef ZFS_DEBUG
 /*
@@ -301,20 +304,20 @@
  * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
  * in one of three behaviors controlled by zfs_deadman_failmode.
  */
-unsigned long zfs_deadman_synctime_ms = 600000ULL;
+unsigned long zfs_deadman_synctime_ms = 600000UL;
 
 /*
  * This value controls the maximum amount of time zio_wait() will block for an
  * outstanding IO.  By default this is 300 seconds at which point the "hung"
  * behavior will be applied as described for zfs_deadman_synctime_ms.
  */
-unsigned long zfs_deadman_ziotime_ms = 300000ULL;
+unsigned long zfs_deadman_ziotime_ms = 300000UL;
 
 /*
  * Check time in milliseconds. This defines the frequency at which we check
  * for hung I/O.
  */
-unsigned long zfs_deadman_checktime_ms = 60000ULL;
+unsigned long zfs_deadman_checktime_ms = 60000UL;
 
 /*
  * By default the deadman is enabled.
@@ -344,11 +347,14 @@
 
 /*
  * Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space in
- * the pool to be consumed.  This ensures that we don't run the pool
- * completely out of space, due to unaccounted changes (e.g. to the MOS).
- * It also limits the worst-case time to allocate space.  If we have
- * less than this amount of free space, most ZPL operations (e.g. write,
- * create) will return ENOSPC.
+ * the pool to be consumed (bounded by spa_max_slop).  This ensures that we
+ * don't run the pool completely out of space, due to unaccounted changes (e.g.
+ * to the MOS).  It also limits the worst-case time to allocate space.  If we
+ * have less than this amount of free space, most ZPL operations (e.g.  write,
+ * create) will return ENOSPC.  The ZIL metaslabs (spa_embedded_log_class) are
+ * also part of this 3.2% of space which can't be consumed by normal writes;
+ * the slop space "proper" (spa_get_slop_space()) is decreased by the embedded
+ * log space.
  *
  * Certain operations (e.g. file removal, most administrative actions) can
  * use half the slop space.  They will only return ENOSPC if less than half
@@ -371,10 +377,15 @@
  * 3.2%, in an effort to have it be at least spa_min_slop (128MB),
  * but we never allow it to be more than half the pool size.
  *
+ * Further, on very large pools, the slop space will be smaller than
+ * 3.2%, to avoid reserving much more space than we actually need; bounded
+ * by spa_max_slop (128GB).
+ *
  * See also the comments in zfs_space_check_t.
  */
 int spa_slop_shift = 5;
-uint64_t spa_min_slop = 128 * 1024 * 1024;
+uint64_t spa_min_slop = 128ULL * 1024 * 1024;
+uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
 int spa_allocators = 4;
 
 
@@ -433,9 +444,9 @@
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
 		cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
-		zfs_refcount_create_untracked(&scl->scl_count);
 		scl->scl_writer = NULL;
 		scl->scl_write_wanted = 0;
+		scl->scl_count = 0;
 	}
 }
 
@@ -446,9 +457,9 @@
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_destroy(&scl->scl_lock);
 		cv_destroy(&scl->scl_cv);
-		zfs_refcount_destroy(&scl->scl_count);
 		ASSERT(scl->scl_writer == NULL);
 		ASSERT(scl->scl_write_wanted == 0);
+		ASSERT(scl->scl_count == 0);
 	}
 }
 
@@ -469,7 +480,7 @@
 			}
 		} else {
 			ASSERT(scl->scl_writer != curthread);
-			if (!zfs_refcount_is_zero(&scl->scl_count)) {
+			if (scl->scl_count != 0) {
 				mutex_exit(&scl->scl_lock);
 				spa_config_exit(spa, locks & ((1 << i) - 1),
 				    tag);
@@ -477,15 +488,17 @@
 			}
 			scl->scl_writer = curthread;
 		}
-		(void) zfs_refcount_add(&scl->scl_count, tag);
+		scl->scl_count++;
 		mutex_exit(&scl->scl_lock);
 	}
 	return (1);
 }
 
-void
-spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
+static void
+spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw,
+    int mmp_flag)
 {
+	(void) tag;
 	int wlocks_held = 0;
 
 	ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
@@ -498,34 +511,57 @@
 			continue;
 		mutex_enter(&scl->scl_lock);
 		if (rw == RW_READER) {
-			while (scl->scl_writer || scl->scl_write_wanted) {
+			while (scl->scl_writer ||
+			    (!mmp_flag && scl->scl_write_wanted)) {
 				cv_wait(&scl->scl_cv, &scl->scl_lock);
 			}
 		} else {
 			ASSERT(scl->scl_writer != curthread);
-			while (!zfs_refcount_is_zero(&scl->scl_count)) {
+			while (scl->scl_count != 0) {
 				scl->scl_write_wanted++;
 				cv_wait(&scl->scl_cv, &scl->scl_lock);
 				scl->scl_write_wanted--;
 			}
 			scl->scl_writer = curthread;
 		}
-		(void) zfs_refcount_add(&scl->scl_count, tag);
+		scl->scl_count++;
 		mutex_exit(&scl->scl_lock);
 	}
 	ASSERT3U(wlocks_held, <=, locks);
 }
 
 void
+spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
+{
+	spa_config_enter_impl(spa, locks, tag, rw, 0);
+}
+
+/*
+ * The spa_config_enter_mmp() allows the mmp thread to cut in front of
+ * outstanding write lock requests. This is needed since the mmp updates are
+ * time sensitive and failure to service them promptly will result in a
+ * suspended pool. This pool suspension has been seen in practice when there is
+ * a single disk in a pool that is responding slowly and presumably about to
+ * fail.
+ */
+
+void
+spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw)
+{
+	spa_config_enter_impl(spa, locks, tag, rw, 1);
+}
+
+void
 spa_config_exit(spa_t *spa, int locks, const void *tag)
 {
+	(void) tag;
 	for (int i = SCL_LOCKS - 1; i >= 0; i--) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
-		ASSERT(!zfs_refcount_is_zero(&scl->scl_count));
-		if (zfs_refcount_remove(&scl->scl_count, tag) == 0) {
+		ASSERT(scl->scl_count > 0);
+		if (--scl->scl_count == 0) {
 			ASSERT(scl->scl_writer == NULL ||
 			    scl->scl_writer == curthread);
 			scl->scl_writer = NULL;	/* OK in either case */
@@ -544,8 +580,7 @@
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
-		if ((rw == RW_READER &&
-		    !zfs_refcount_is_zero(&scl->scl_count)) ||
+		if ((rw == RW_READER && scl->scl_count != 0) ||
 		    (rw == RW_WRITER && scl->scl_writer == curthread))
 			locks_held |= 1 << i;
 	}
@@ -604,7 +639,7 @@
 
 	zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
 	    (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
-	    ++spa->spa_deadman_calls);
+	    (u_longlong_t)++spa->spa_deadman_calls);
 	if (zfs_deadman_enabled)
 		vdev_deadman(spa->spa_root_vdev, FTAG);
 
@@ -613,6 +648,15 @@
 	    MSEC_TO_TICK(zfs_deadman_checktime_ms));
 }
 
+static int
+spa_log_sm_sort_by_txg(const void *va, const void *vb)
+{
+	const spa_log_sm_t *a = va;
+	const spa_log_sm_t *b = vb;
+
+	return (TREE_CMP(a->sls_txg, b->sls_txg));
+}
+
 /*
  * Create an uninitialized spa_t with the given name.  Requires
  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
@@ -640,12 +684,16 @@
 	mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&spa->spa_activities_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&spa->spa_waiters_cv, NULL, CV_DEFAULT, NULL);
 
 	for (int t = 0; t < TXG_SIZE; t++)
 		bplist_create(&spa->spa_free_bplist[t]);
@@ -677,15 +725,20 @@
 		spa->spa_root = spa_strdup(altroot);
 
 	spa->spa_alloc_count = spa_allocators;
-	spa->spa_alloc_locks = kmem_zalloc(spa->spa_alloc_count *
-	    sizeof (kmutex_t), KM_SLEEP);
-	spa->spa_alloc_trees = kmem_zalloc(spa->spa_alloc_count *
-	    sizeof (avl_tree_t), KM_SLEEP);
+	spa->spa_allocs = kmem_zalloc(spa->spa_alloc_count *
+	    sizeof (spa_alloc_t), KM_SLEEP);
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
-		mutex_init(&spa->spa_alloc_locks[i], NULL, MUTEX_DEFAULT, NULL);
-		avl_create(&spa->spa_alloc_trees[i], zio_bookmark_compare,
+		mutex_init(&spa->spa_allocs[i].spaa_lock, NULL, MUTEX_DEFAULT,
+		    NULL);
+		avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
 		    sizeof (zio_t), offsetof(zio_t, io_alloc_node));
 	}
+	avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
+	    sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
+	avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg,
+	    sizeof (spa_log_sm_t), offsetof(spa_log_sm_t, sls_node));
+	list_create(&spa->spa_log_summary, sizeof (log_summary_entry_t),
+	    offsetof(log_summary_entry_t, lse_node));
 
 	/*
 	 * Every pool starts with the default cachefile
@@ -719,6 +772,7 @@
 
 	spa->spa_min_ashift = INT_MAX;
 	spa->spa_max_ashift = 0;
+	spa->spa_min_alloc = INT_MAX;
 
 	/* Reset cached value */
 	spa->spa_dedup_dspace = ~0ULL;
@@ -749,8 +803,9 @@
 	spa_config_dirent_t *dp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
-	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
+	ASSERT(spa_state(spa) == POOL_STATE_UNINITIALIZED);
 	ASSERT3U(zfs_refcount_count(&spa->spa_refcount), ==, 0);
+	ASSERT0(spa->spa_waiters);
 
 	nvlist_free(spa->spa_config_splitting);
 
@@ -768,14 +823,15 @@
 	}
 
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
-		avl_destroy(&spa->spa_alloc_trees[i]);
-		mutex_destroy(&spa->spa_alloc_locks[i]);
+		avl_destroy(&spa->spa_allocs[i].spaa_tree);
+		mutex_destroy(&spa->spa_allocs[i].spaa_lock);
 	}
-	kmem_free(spa->spa_alloc_locks, spa->spa_alloc_count *
-	    sizeof (kmutex_t));
-	kmem_free(spa->spa_alloc_trees, spa->spa_alloc_count *
-	    sizeof (avl_tree_t));
+	kmem_free(spa->spa_allocs, spa->spa_alloc_count *
+	    sizeof (spa_alloc_t));
 
+	avl_destroy(&spa->spa_metaslabs_by_flushed);
+	avl_destroy(&spa->spa_sm_logs_by_txg);
+	list_destroy(&spa->spa_log_summary);
 	list_destroy(&spa->spa_config_list);
 	list_destroy(&spa->spa_leaf_list);
 
@@ -799,7 +855,10 @@
 	cv_destroy(&spa->spa_proc_cv);
 	cv_destroy(&spa->spa_scrub_io_cv);
 	cv_destroy(&spa->spa_suspend_cv);
+	cv_destroy(&spa->spa_activities_cv);
+	cv_destroy(&spa->spa_waiters_cv);
 
+	mutex_destroy(&spa->spa_flushed_ms_lock);
 	mutex_destroy(&spa->spa_async_lock);
 	mutex_destroy(&spa->spa_errlist_lock);
 	mutex_destroy(&spa->spa_errlog_lock);
@@ -812,6 +871,7 @@
 	mutex_destroy(&spa->spa_suspend_lock);
 	mutex_destroy(&spa->spa_vdev_top_lock);
 	mutex_destroy(&spa->spa_feat_stats_lock);
+	mutex_destroy(&spa->spa_activities_lock);
 
 	kmem_free(spa, sizeof (spa_t));
 }
@@ -912,10 +972,10 @@
 	const spa_aux_t *sa = (const spa_aux_t *)a;
 	const spa_aux_t *sb = (const spa_aux_t *)b;
 
-	return (AVL_CMP(sa->aux_guid, sb->aux_guid));
+	return (TREE_CMP(sa->aux_guid, sb->aux_guid));
 }
 
-void
+static void
 spa_aux_add(vdev_t *vd, avl_tree_t *avl)
 {
 	avl_index_t where;
@@ -933,7 +993,7 @@
 	}
 }
 
-void
+static void
 spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
 {
 	spa_aux_t search;
@@ -953,7 +1013,7 @@
 	}
 }
 
-boolean_t
+static boolean_t
 spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl)
 {
 	spa_aux_t search, *found;
@@ -978,7 +1038,7 @@
 	return (found != NULL);
 }
 
-void
+static void
 spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
 {
 	spa_aux_t search, *found;
@@ -995,10 +1055,10 @@
 /*
  * Spares are tracked globally due to the following constraints:
  *
- * 	- A spare may be part of multiple pools.
- * 	- A spare may be added to a pool even if it's actively in use within
+ *	- A spare may be part of multiple pools.
+ *	- A spare may be added to a pool even if it's actively in use within
  *	  another pool.
- * 	- A spare in use in any pool can only be the source of a replacement if
+ *	- A spare in use in any pool can only be the source of a replacement if
  *	  the target is a spare in the same pool.
  *
  * We keep track of all spares on the system through the use of a reference
@@ -1137,6 +1197,30 @@
 }
 
 /*
+ * The same as spa_vdev_enter() above but additionally takes the guid of
+ * the vdev being detached.  When there is a rebuild in process it will be
+ * suspended while the vdev tree is modified then resumed by spa_vdev_exit().
+ * The rebuild is canceled if only a single child remains after the detach.
+ */
+uint64_t
+spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
+{
+	mutex_enter(&spa->spa_vdev_top_lock);
+	mutex_enter(&spa_namespace_lock);
+
+	vdev_autotrim_stop_all(spa);
+
+	if (guid != 0) {
+		vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
+		if (vd) {
+			vdev_rebuild_stop_wait(vd->vdev_top);
+		}
+	}
+
+	return (spa_vdev_config_enter(spa));
+}
+
+/*
  * Internal implementation for spa_vdev_enter().  Used when a vdev
  * operation requires multiple syncs (i.e. removing a device) while
  * keeping the spa_namespace_lock held.
@@ -1169,7 +1253,7 @@
 	/*
 	 * Reassess the DTLs.
 	 */
-	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
+	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE, B_FALSE);
 
 	if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
 		config_changed = B_TRUE;
@@ -1181,6 +1265,7 @@
 	 */
 	ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
+	ASSERT(metaslab_class_validate(spa_embedded_log_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0);
 
@@ -1220,16 +1305,16 @@
 		 */
 		vdev_autotrim_stop_wait(vd);
 
-		spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
+		spa_config_enter(spa, SCL_STATE_ALL, spa, RW_WRITER);
 		vdev_free(vd);
-		spa_config_exit(spa, SCL_ALL, spa);
+		spa_config_exit(spa, SCL_STATE_ALL, spa);
 	}
 
 	/*
 	 * If the config changed, update the config cache.
 	 */
 	if (config_changed)
-		spa_write_cachefile(spa, B_FALSE, B_TRUE);
+		spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
 }
 
 /*
@@ -1242,6 +1327,7 @@
 spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
 {
 	vdev_autotrim_restart(spa);
+	vdev_rebuild_restart(spa);
 
 	spa_vdev_config_exit(spa, vd, txg, error, FTAG);
 	mutex_exit(&spa_namespace_lock);
@@ -1293,7 +1379,7 @@
 	}
 
 	if (vd != NULL || error == 0)
-		vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE);
+		vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE, B_FALSE);
 
 	if (vd != NULL) {
 		if (vd != spa->spa_root_vdev)
@@ -1311,7 +1397,7 @@
 
 	/*
 	 * If anything changed, wait for it to sync.  This ensures that,
-	 * from the system administrator's perspective, zpool(1M) commands
+	 * from the system administrator's perspective, zpool(8) commands
 	 * are synchronous.  This is important for things like zpool offline:
 	 * when the command completes, you expect no further I/O from ZFS.
 	 */
@@ -1323,7 +1409,7 @@
 	 */
 	if (config_changed) {
 		mutex_enter(&spa_namespace_lock);
-		spa_write_cachefile(spa, B_FALSE, B_TRUE);
+		spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
 		mutex_exit(&spa_namespace_lock);
 	}
 
@@ -1430,31 +1516,20 @@
 }
 
 uint64_t
-spa_get_random(uint64_t range)
-{
-	uint64_t r;
-
-	ASSERT(range != 0);
-
-	if (range == 1)
-		return (0);
-
-	(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
-
-	return (r % range);
-}
-
-uint64_t
 spa_generate_guid(spa_t *spa)
 {
-	uint64_t guid = spa_get_random(-1ULL);
+	uint64_t guid;
 
 	if (spa != NULL) {
-		while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
-			guid = spa_get_random(-1ULL);
+		do {
+			(void) random_get_pseudo_bytes((void *)&guid,
+			    sizeof (guid));
+		} while (guid == 0 || spa_guid_exists(spa_guid(spa), guid));
 	} else {
-		while (guid == 0 || spa_guid_exists(guid, 0))
-			guid = spa_get_random(-1ULL);
+		do {
+			(void) random_get_pseudo_bytes((void *)&guid,
+			    sizeof (guid));
+		} while (guid == 0 || spa_guid_exists(guid, 0));
 	}
 
 	return (guid);
@@ -1720,17 +1795,52 @@
 }
 
 /*
- * Return the amount of slop space in bytes.  It is 1/32 of the pool (3.2%),
- * or at least 128MB, unless that would cause it to be more than half the
- * pool size.
+ * Return the amount of slop space in bytes.  It is typically 1/32 of the pool
+ * (3.2%), minus the embedded log space.  On very small pools, it may be
+ * slightly larger than this.  On very large pools, it will be capped to
+ * the value of spa_max_slop.  The embedded log space is not included in
+ * spa_dspace.  By subtracting it, the usable space (per "zfs list") is a
+ * constant 97% of the total space, regardless of metaslab size (assuming the
+ * default spa_slop_shift=5 and a non-tiny pool).
  *
- * See the comment above spa_slop_shift for details.
+ * See the comment above spa_slop_shift for more details.
  */
 uint64_t
 spa_get_slop_space(spa_t *spa)
 {
-	uint64_t space = spa_get_dspace(spa);
-	return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop)));
+	uint64_t space = 0;
+	uint64_t slop = 0;
+
+	/*
+	 * Make sure spa_dedup_dspace has been set.
+	 */
+	if (spa->spa_dedup_dspace == ~0ULL)
+		spa_update_dspace(spa);
+
+	/*
+	 * spa_get_dspace() includes the space only logically "used" by
+	 * deduplicated data, so since it's not useful to reserve more
+	 * space with more deduplicated data, we subtract that out here.
+	 */
+	space = spa_get_dspace(spa) - spa->spa_dedup_dspace;
+	slop = MIN(space >> spa_slop_shift, spa_max_slop);
+
+	/*
+	 * Subtract the embedded log space, but no more than half the (3.2%)
+	 * unusable space.  Note, the "no more than half" is only relevant if
+	 * zfs_embedded_slog_min_ms >> spa_slop_shift < 2, which is not true by
+	 * default.
+	 */
+	uint64_t embedded_log =
+	    metaslab_class_get_dspace(spa_embedded_log_class(spa));
+	slop -= MIN(embedded_log, slop >> 1);
+
+	/*
+	 * Slop space should be at least spa_min_slop, but no more than half
+	 * the entire pool.
+	 */
+	slop = MAX(slop, MIN(space >> 1, spa_min_slop));
+	return (slop);
 }
 
 uint64_t
@@ -1752,10 +1862,11 @@
 	    ddt_get_dedup_dspace(spa);
 	if (spa->spa_vdev_removal != NULL) {
 		/*
-		 * We can't allocate from the removing device, so
-		 * subtract its size.  This prevents the DMU/DSL from
-		 * filling up the (now smaller) pool while we are in the
-		 * middle of removing the device.
+		 * We can't allocate from the removing device, so subtract
+		 * its size if it was included in dspace (i.e. if this is a
+		 * normal-class vdev, not special/dedup).  This prevents the
+		 * DMU/DSL from filling up the (now smaller) pool while we
+		 * are in the middle of removing the device.
 		 *
 		 * Note that the DMU/DSL doesn't actually know or care
 		 * how much space is allocated (it does its own tracking
@@ -1767,8 +1878,17 @@
 		spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 		vdev_t *vd =
 		    vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
-		spa->spa_dspace -= spa_deflate(spa) ?
-		    vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
+		/*
+		 * If the stars align, we can wind up here after
+		 * vdev_remove_complete() has cleared vd->vdev_mg but before
+		 * spa->spa_vdev_removal gets cleared, so we must check before
+		 * we dereference.
+		 */
+		if (vd->vdev_mg &&
+		    vd->vdev_mg->mg_class == spa_normal_class(spa)) {
+			spa->spa_dspace -= spa_deflate(spa) ?
+			    vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
+		}
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 	}
 }
@@ -1814,6 +1934,12 @@
 }
 
 metaslab_class_t *
+spa_embedded_log_class(spa_t *spa)
+{
+	return (spa->spa_embedded_log_class);
+}
+
+metaslab_class_t *
 spa_special_class(spa_t *spa)
 {
 	return (spa->spa_special_class);
@@ -1832,12 +1958,10 @@
 spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype,
     uint_t level, uint_t special_smallblk)
 {
-	if (DMU_OT_IS_ZIL(objtype)) {
-		if (spa->spa_log_class->mc_groups != 0)
-			return (spa_log_class(spa));
-		else
-			return (spa_normal_class(spa));
-	}
+	/*
+	 * ZIL allocations determine their class in zio_alloc_zil().
+	 */
+	ASSERT(objtype != DMU_OT_INTENT_LOG);
 
 	boolean_t has_special_class = spa->spa_special_class->mc_groups != 0;
 
@@ -1970,6 +2094,32 @@
 		spa->spa_deadman_failmode = ZIO_FAILURE_MODE_WAIT;
 }
 
+void
+spa_set_deadman_ziotime(hrtime_t ns)
+{
+	spa_t *spa = NULL;
+
+	if (spa_mode_global != SPA_MODE_UNINIT) {
+		mutex_enter(&spa_namespace_lock);
+		while ((spa = spa_next(spa)) != NULL)
+			spa->spa_deadman_ziotime = ns;
+		mutex_exit(&spa_namespace_lock);
+	}
+}
+
+void
+spa_set_deadman_synctime(hrtime_t ns)
+{
+	spa_t *spa = NULL;
+
+	if (spa_mode_global != SPA_MODE_UNINIT) {
+		mutex_enter(&spa_namespace_lock);
+		while ((spa = spa_next(spa)) != NULL)
+			spa->spa_deadman_synctime = ns;
+		mutex_exit(&spa_namespace_lock);
+	}
+}
+
 uint64_t
 dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
 {
@@ -2088,6 +2238,7 @@
 	    spa_import_progress_list;
 
 	procfs_list_install("zfs",
+	    NULL,
 	    "import_progress",
 	    0644,
 	    &spa_import_progress_list->procfs_list,
@@ -2243,7 +2394,7 @@
 
 	s = strcmp(s1->spa_name, s2->spa_name);
 
-	return (AVL_ISIGN(s));
+	return (TREE_ISIGN(s));
 }
 
 void
@@ -2253,7 +2404,7 @@
 }
 
 void
-spa_init(int mode)
+spa_init(spa_mode_t mode)
 {
 	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -2272,7 +2423,7 @@
 	spa_mode_global = mode;
 
 #ifndef _KERNEL
-	if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
+	if (spa_mode_global != SPA_MODE_READ && dprintf_find_string("watch")) {
 		struct sigaction sa;
 
 		sa.sa_flags = SA_SIGINFO;
@@ -2291,8 +2442,8 @@
 	fm_init();
 	zfs_refcount_init();
 	unique_init();
-	range_tree_init();
-	metaslab_alloc_trace_init();
+	zfs_btree_init();
+	metaslab_stat_init();
 	ddt_init();
 	zio_init();
 	dmu_init();
@@ -2326,8 +2477,8 @@
 	dmu_fini();
 	zio_fini();
 	ddt_fini();
-	metaslab_alloc_trace_fini();
-	range_tree_fini();
+	metaslab_stat_fini();
+	zfs_btree_fini();
 	unique_fini();
 	zfs_refcount_fini();
 	fm_fini();
@@ -2346,14 +2497,14 @@
 }
 
 /*
- * Return whether this pool has slogs. No locking needed.
+ * Return whether this pool has a dedicated slog device. No locking needed.
  * It's not a problem if the wrong answer is returned as it's only for
- * performance and not correctness
+ * performance and not correctness.
  */
 boolean_t
 spa_has_slogs(spa_t *spa)
 {
-	return (spa->spa_log_class->mc_rotor != NULL);
+	return (spa->spa_log_class->mc_groups != 0);
 }
 
 spa_log_state_t
@@ -2377,7 +2528,7 @@
 boolean_t
 spa_writeable(spa_t *spa)
 {
-	return (!!(spa->spa_mode & FWRITE) && spa->spa_trust_config);
+	return (!!(spa->spa_mode & SPA_MODE_WRITE) && spa->spa_trust_config);
 }
 
 /*
@@ -2391,7 +2542,7 @@
 	    !txg_all_lists_empty(&spa->spa_dsl_pool->dp_early_sync_tasks));
 }
 
-int
+spa_mode_t
 spa_mode(spa_t *spa)
 {
 	return (spa->spa_mode);
@@ -2436,7 +2587,6 @@
 	spa->spa_scan_pass_scrub_spent_paused = 0;
 	spa->spa_scan_pass_exam = 0;
 	spa->spa_scan_pass_issued = 0;
-	vdev_scan_stat_init(spa->spa_root_vdev);
 }
 
 /*
@@ -2559,6 +2709,12 @@
 	return (spa->spa_missing_tvds_allowed);
 }
 
+space_map_t *
+spa_syncing_log_sm(spa_t *spa)
+{
+	return (spa->spa_syncing_log_sm);
+}
+
 void
 spa_set_missing_tvds(spa_t *spa, uint64_t missing)
 {
@@ -2635,7 +2791,7 @@
 spa_importing_readonly_checkpoint(spa_t *spa)
 {
 	return ((spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT) &&
-	    spa->spa_mode == FREAD);
+	    spa->spa_mode == SPA_MODE_READ);
 }
 
 uint64_t
@@ -2673,95 +2829,32 @@
 
 #if defined(_KERNEL)
 
-#include <linux/mod_compat.h>
-
-static int
-param_set_deadman_failmode(const char *val, zfs_kernel_param_t *kp)
+int
+param_set_deadman_failmode_common(const char *val)
 {
 	spa_t *spa = NULL;
 	char *p;
 
 	if (val == NULL)
-		return (SET_ERROR(-EINVAL));
+		return (SET_ERROR(EINVAL));
 
 	if ((p = strchr(val, '\n')) != NULL)
 		*p = '\0';
 
 	if (strcmp(val, "wait") != 0 && strcmp(val, "continue") != 0 &&
 	    strcmp(val, "panic"))
-		return (SET_ERROR(-EINVAL));
+		return (SET_ERROR(EINVAL));
 
-	if (spa_mode_global != 0) {
+	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL)
 			spa_set_deadman_failmode(spa, val);
 		mutex_exit(&spa_namespace_lock);
 	}
 
-	return (param_set_charp(val, kp));
-}
-
-static int
-param_set_deadman_ziotime(const char *val, zfs_kernel_param_t *kp)
-{
-	spa_t *spa = NULL;
-	int error;
-
-	error = param_set_ulong(val, kp);
-	if (error < 0)
-		return (SET_ERROR(error));
-
-	if (spa_mode_global != 0) {
-		mutex_enter(&spa_namespace_lock);
-		while ((spa = spa_next(spa)) != NULL)
-			spa->spa_deadman_ziotime =
-			    MSEC2NSEC(zfs_deadman_ziotime_ms);
-		mutex_exit(&spa_namespace_lock);
-	}
-
 	return (0);
 }
-
-static int
-param_set_deadman_synctime(const char *val, zfs_kernel_param_t *kp)
-{
-	spa_t *spa = NULL;
-	int error;
-
-	error = param_set_ulong(val, kp);
-	if (error < 0)
-		return (SET_ERROR(error));
-
-	if (spa_mode_global != 0) {
-		mutex_enter(&spa_namespace_lock);
-		while ((spa = spa_next(spa)) != NULL)
-			spa->spa_deadman_synctime =
-			    MSEC2NSEC(zfs_deadman_synctime_ms);
-		mutex_exit(&spa_namespace_lock);
-	}
-
-	return (0);
-}
-
-static int
-param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp)
-{
-	unsigned long val;
-	int error;
-
-	error = kstrtoul(buf, 0, &val);
-	if (error)
-		return (SET_ERROR(error));
-
-	if (val < 1 || val > 31)
-		return (SET_ERROR(-EINVAL));
-
-	error = param_set_int(buf, kp);
-	if (error < 0)
-		return (SET_ERROR(error));
-
-	return (0);
-}
+#endif
 
 /* Namespace manipulation */
 EXPORT_SYMBOL(spa_lookup);
@@ -2825,7 +2918,6 @@
 EXPORT_SYMBOL(spa_guid_exists);
 EXPORT_SYMBOL(spa_strdup);
 EXPORT_SYMBOL(spa_strfree);
-EXPORT_SYMBOL(spa_get_random);
 EXPORT_SYMBOL(spa_generate_guid);
 EXPORT_SYMBOL(snprintf_blkptr);
 EXPORT_SYMBOL(spa_freeze);
@@ -2851,57 +2943,47 @@
 EXPORT_SYMBOL(spa_has_checkpoint);
 EXPORT_SYMBOL(spa_top_vdevs_spacemap_addressable);
 
-/* BEGIN CSTYLED */
-module_param(zfs_flags, uint, 0644);
-MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
+ZFS_MODULE_PARAM(zfs, zfs_, flags, UINT, ZMOD_RW,
+	"Set additional debugging flags");
 
-module_param(zfs_recover, int, 0644);
-MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
+ZFS_MODULE_PARAM(zfs, zfs_, recover, INT, ZMOD_RW,
+	"Set to attempt to recover from fatal errors");
 
-module_param(zfs_free_leak_on_eio, int, 0644);
-MODULE_PARM_DESC(zfs_free_leak_on_eio,
+ZFS_MODULE_PARAM(zfs, zfs_, free_leak_on_eio, INT, ZMOD_RW,
 	"Set to ignore IO errors during free and permanently leak the space");
 
-module_param_call(zfs_deadman_synctime_ms, param_set_deadman_synctime,
-    param_get_ulong, &zfs_deadman_synctime_ms, 0644);
-MODULE_PARM_DESC(zfs_deadman_synctime_ms,
-	"Pool sync expiration time in milliseconds");
-
-module_param_call(zfs_deadman_ziotime_ms, param_set_deadman_ziotime,
-    param_get_ulong, &zfs_deadman_ziotime_ms, 0644);
-MODULE_PARM_DESC(zfs_deadman_ziotime_ms,
-	"IO expiration time in milliseconds");
-
-module_param(zfs_deadman_checktime_ms, ulong, 0644);
-MODULE_PARM_DESC(zfs_deadman_checktime_ms,
+ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, ULONG, ZMOD_RW,
 	"Dead I/O check interval in milliseconds");
 
-module_param(zfs_deadman_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_deadman_enabled, "Enable deadman timer");
+ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, enabled, INT, ZMOD_RW,
+	"Enable deadman timer");
 
-module_param_call(zfs_deadman_failmode, param_set_deadman_failmode,
-    param_get_charp, &zfs_deadman_failmode, 0644);
-MODULE_PARM_DESC(zfs_deadman_failmode, "Failmode for deadman timer");
-
-module_param(spa_asize_inflation, int, 0644);
-MODULE_PARM_DESC(spa_asize_inflation,
+ZFS_MODULE_PARAM(zfs_spa, spa_, asize_inflation, INT, ZMOD_RW,
 	"SPA size estimate multiplication factor");
 
-module_param_call(spa_slop_shift, param_set_slop_shift, param_get_int,
-    &spa_slop_shift, 0644);
-MODULE_PARM_DESC(spa_slop_shift, "Reserved free space in pool");
-
-module_param(zfs_ddt_data_is_special, int, 0644);
-MODULE_PARM_DESC(zfs_ddt_data_is_special,
+ZFS_MODULE_PARAM(zfs, zfs_, ddt_data_is_special, INT, ZMOD_RW,
 	"Place DDT data into the special class");
 
-module_param(zfs_user_indirect_is_special, int, 0644);
-MODULE_PARM_DESC(zfs_user_indirect_is_special,
+ZFS_MODULE_PARAM(zfs, zfs_, user_indirect_is_special, INT, ZMOD_RW,
 	"Place user data indirect blocks into the special class");
 
-module_param(zfs_special_class_metadata_reserve_pct, int, 0644);
-MODULE_PARM_DESC(zfs_special_class_metadata_reserve_pct,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, failmode,
+	param_set_deadman_failmode, param_get_charp, ZMOD_RW,
+	"Failmode for deadman timer");
+
+ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, synctime_ms,
+	param_set_deadman_synctime, param_get_ulong, ZMOD_RW,
+	"Pool sync expiration time in milliseconds");
+
+ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms,
+	param_set_deadman_ziotime, param_get_ulong, ZMOD_RW,
+	"IO expiration time in milliseconds");
+
+ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, INT, ZMOD_RW,
 	"Small file blocks in special vdevs depends on this much "
 	"free space available");
 /* END CSTYLED */
-#endif
+
+ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift,
+	param_get_int, ZMOD_RW, "Reserved free space in pool");

diff --git a/zfs/module/zfs/spa_stats.c b/zfs/module/zfs/spa_stats.c
index 6895428..534ac72 100644
--- a/zfs/module/zfs/spa_stats.c
+++ b/zfs/module/zfs/spa_stats.c

@@ -122,14 +122,11 @@
 spa_read_history_init(spa_t *spa)
 {
 	spa_history_list_t *shl = &spa->spa_stats.read_history;
-	char *module;
 
 	shl->size = 0;
-
-	module = kmem_asprintf("zfs/%s", spa_name(spa));
-
 	shl->procfs_list.pl_private = shl;
-	procfs_list_install(module,
+	procfs_list_install("zfs",
+	    spa_name(spa),
 	    "reads",
 	    0600,
 	    &shl->procfs_list,
@@ -137,8 +134,6 @@
 	    spa_read_history_show_header,
 	    spa_read_history_clear,
 	    offsetof(spa_read_history_t, srh_node));
-
-	strfree(module);
 }
 
 static void
@@ -293,14 +288,11 @@
 spa_txg_history_init(spa_t *spa)
 {
 	spa_history_list_t *shl = &spa->spa_stats.txg_history;
-	char *module;
 
 	shl->size = 0;
-
-	module = kmem_asprintf("zfs/%s", spa_name(spa));
-
 	shl->procfs_list.pl_private = shl;
-	procfs_list_install(module,
+	procfs_list_install("zfs",
+	    spa_name(spa),
 	    "txgs",
 	    0644,
 	    &shl->procfs_list,
@@ -308,8 +300,6 @@
 	    spa_txg_history_show_header,
 	    spa_txg_history_clear,
 	    offsetof(spa_txg_history_t, sth_node));
-
-	strfree(module);
 }
 
 static void
@@ -478,11 +468,11 @@
 
 	if (rw == KSTAT_WRITE) {
 		for (i = 0; i < shk->count; i++)
-			((kstat_named_t *)shk->private)[i].value.ui64 = 0;
+			((kstat_named_t *)shk->priv)[i].value.ui64 = 0;
 	}
 
 	for (i = shk->count; i > 0; i--)
-		if (((kstat_named_t *)shk->private)[i-1].value.ui64 != 0)
+		if (((kstat_named_t *)shk->priv)[i-1].value.ui64 != 0)
 			break;
 
 	ksp->ks_ndata = i;
@@ -504,12 +494,12 @@
 
 	shk->count = 42; /* power of two buckets for 1ns to 2,199s */
 	shk->size = shk->count * sizeof (kstat_named_t);
-	shk->private = kmem_alloc(shk->size, KM_SLEEP);
+	shk->priv = kmem_alloc(shk->size, KM_SLEEP);
 
 	name = kmem_asprintf("zfs/%s", spa_name(spa));
 
 	for (i = 0; i < shk->count; i++) {
-		ks = &((kstat_named_t *)shk->private)[i];
+		ks = &((kstat_named_t *)shk->priv)[i];
 		ks->data_type = KSTAT_DATA_UINT64;
 		ks->value.ui64 = 0;
 		(void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
@@ -522,14 +512,14 @@
 
 	if (ksp) {
 		ksp->ks_lock = &shk->lock;
-		ksp->ks_data = shk->private;
+		ksp->ks_data = shk->priv;
 		ksp->ks_ndata = shk->count;
 		ksp->ks_data_size = shk->size;
 		ksp->ks_private = spa;
 		ksp->ks_update = spa_tx_assign_update;
 		kstat_install(ksp);
 	}
-	strfree(name);
+	kmem_strfree(name);
 }
 
 static void
@@ -542,7 +532,7 @@
 	if (ksp)
 		kstat_delete(ksp);
 
-	kmem_free(shk->private, shk->size);
+	kmem_free(shk->priv, shk->size);
 	mutex_destroy(&shk->lock);
 }
 
@@ -555,55 +545,7 @@
 	while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
 		idx++;
 
-	atomic_inc_64(&((kstat_named_t *)shk->private)[idx].value.ui64);
-}
-
-/*
- * ==========================================================================
- * SPA IO History Routines
- * ==========================================================================
- */
-static int
-spa_io_history_update(kstat_t *ksp, int rw)
-{
-	if (rw == KSTAT_WRITE)
-		memset(ksp->ks_data, 0, ksp->ks_data_size);
-
-	return (0);
-}
-
-static void
-spa_io_history_init(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-	char *name;
-	kstat_t *ksp;
-
-	mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
-
-	name = kmem_asprintf("zfs/%s", spa_name(spa));
-
-	ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
-	shk->kstat = ksp;
-
-	if (ksp) {
-		ksp->ks_lock = &shk->lock;
-		ksp->ks_private = spa;
-		ksp->ks_update = spa_io_history_update;
-		kstat_install(ksp);
-	}
-	strfree(name);
-}
-
-static void
-spa_io_history_destroy(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-
-	if (shk->kstat)
-		kstat_delete(shk->kstat);
-
-	mutex_destroy(&shk->lock);
+	atomic_inc_64(&((kstat_named_t *)shk->priv)[idx].value.ui64);
 }
 
 /*
@@ -675,7 +617,7 @@
 	while (shl->size > size) {
 		smh = list_remove_head(&shl->procfs_list.pl_list);
 		if (smh->vdev_path)
-			strfree(smh->vdev_path);
+			kmem_strfree(smh->vdev_path);
 		kmem_free(smh, sizeof (spa_mmp_history_t));
 		shl->size--;
 	}
@@ -699,14 +641,12 @@
 spa_mmp_history_init(spa_t *spa)
 {
 	spa_history_list_t *shl = &spa->spa_stats.mmp_history;
-	char *module;
 
 	shl->size = 0;
 
-	module = kmem_asprintf("zfs/%s", spa_name(spa));
-
 	shl->procfs_list.pl_private = shl;
-	procfs_list_install(module,
+	procfs_list_install("zfs",
+	    spa_name(spa),
 	    "multihost",
 	    0644,
 	    &shl->procfs_list,
@@ -714,8 +654,6 @@
 	    spa_mmp_history_show_header,
 	    spa_mmp_history_clear,
 	    offsetof(spa_mmp_history_t, smh_node));
-
-	strfree(module);
 }
 
 static void
@@ -814,7 +752,7 @@
 	if (vd) {
 		smh->vdev_guid = vd->vdev_guid;
 		if (vd->vdev_path)
-			smh->vdev_path = strdup(vd->vdev_path);
+			smh->vdev_path = kmem_strdup(vd->vdev_path);
 	}
 	smh->vdev_label = label;
 	smh->mmp_node_id = mmp_node_id;
@@ -835,7 +773,9 @@
 static void *
 spa_state_addr(kstat_t *ksp, loff_t n)
 {
-	return (ksp->ks_private);	/* return the spa_t */
+	if (n == 0)
+		return (ksp->ks_private);	/* return the spa_t */
+	return (NULL);
 }
 
 static int
@@ -876,7 +816,7 @@
 		kstat_install(ksp);
 	}
 
-	strfree(name);
+	kmem_strfree(name);
 }
 
 static void
@@ -903,6 +843,12 @@
 	{ "autotrim_bytes_skipped",		KSTAT_DATA_UINT64 },
 	{ "autotrim_extents_failed",		KSTAT_DATA_UINT64 },
 	{ "autotrim_bytes_failed",		KSTAT_DATA_UINT64 },
+	{ "simple_trim_extents_written",	KSTAT_DATA_UINT64 },
+	{ "simple_trim_bytes_written",		KSTAT_DATA_UINT64 },
+	{ "simple_trim_extents_skipped",	KSTAT_DATA_UINT64 },
+	{ "simple_trim_bytes_skipped",		KSTAT_DATA_UINT64 },
+	{ "simple_trim_extents_failed",		KSTAT_DATA_UINT64 },
+	{ "simple_trim_bytes_failed",		KSTAT_DATA_UINT64 },
 };
 
 #define	SPA_IOSTATS_ADD(stat, val) \
@@ -929,17 +875,24 @@
 		SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
 		SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
 		SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
-	} else {
+	} else if (type == TRIM_TYPE_AUTO) {
 		SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
 		SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
 		SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
 		SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
 		SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
 		SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
+	} else {
+		SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written);
+		SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written);
+		SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped);
+		SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped);
+		SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed);
+		SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed);
 	}
 }
 
-int
+static int
 spa_iostats_update(kstat_t *ksp, int rw)
 {
 	if (rw == KSTAT_WRITE) {
@@ -973,7 +926,7 @@
 		kstat_install(ksp);
 	}
 
-	strfree(name);
+	kmem_strfree(name);
 }
 
 static void
@@ -995,7 +948,6 @@
 	spa_read_history_init(spa);
 	spa_txg_history_init(spa);
 	spa_tx_assign_init(spa);
-	spa_io_history_init(spa);
 	spa_mmp_history_init(spa);
 	spa_state_init(spa);
 	spa_iostats_init(spa);
@@ -1009,26 +961,19 @@
 	spa_tx_assign_destroy(spa);
 	spa_txg_history_destroy(spa);
 	spa_read_history_destroy(spa);
-	spa_io_history_destroy(spa);
 	spa_mmp_history_destroy(spa);
 }
 
-#if defined(_KERNEL)
-/* CSTYLED */
-module_param(zfs_read_history, int, 0644);
-MODULE_PARM_DESC(zfs_read_history,
-	"Historical statistics for the last N reads");
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, read_history, INT, ZMOD_RW,
+    "Historical statistics for the last N reads");
 
-module_param(zfs_read_history_hits, int, 0644);
-MODULE_PARM_DESC(zfs_read_history_hits,
-	"Include cache hits in read history");
+ZFS_MODULE_PARAM(zfs, zfs_, read_history_hits, INT, ZMOD_RW,
+    "Include cache hits in read history");
 
-module_param(zfs_txg_history, int, 0644);
-MODULE_PARM_DESC(zfs_txg_history,
-	"Historical statistics for the last N txgs");
+ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, history, INT, ZMOD_RW,
+    "Historical statistics for the last N txgs");
 
-module_param(zfs_multihost_history, int, 0644);
-MODULE_PARM_DESC(zfs_multihost_history,
-	"Historical statistics for last N multihost writes");
+ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, history, INT, ZMOD_RW,
+    "Historical statistics for last N multihost writes");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/space_map.c b/zfs/module/zfs/space_map.c
index d9cd876..11d4798 100644
--- a/zfs/module/zfs/space_map.c
+++ b/zfs/module/zfs/space_map.c

@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -34,7 +34,6 @@
 #include <sys/dsl_pool.h>
 #include <sys/zio.h>
 #include <sys/space_map.h>
-#include <sys/refcount.h>
 #include <sys/zfeature.h>
 
 /*
@@ -96,6 +95,7 @@
 	    ZIO_PRIORITY_SYNC_READ);
 
 	int error = 0;
+	uint64_t txg = 0, sync_pass = 0;
 	for (uint64_t block_base = 0; block_base < end && error == 0;
 	    block_base += blksz) {
 		dmu_buf_t *db;
@@ -117,8 +117,29 @@
 		    block_cursor < block_end && error == 0; block_cursor++) {
 			uint64_t e = *block_cursor;
 
-			if (sm_entry_is_debug(e)) /* Skip debug entries */
+			if (sm_entry_is_debug(e)) {
+				/*
+				 * Debug entries are only needed to record the
+				 * current TXG and sync pass if available.
+				 *
+				 * Note though that sometimes there can be
+				 * debug entries that are used as padding
+				 * at the end of space map blocks in-order
+				 * to not split a double-word entry in the
+				 * middle between two blocks. These entries
+				 * have their TXG field set to 0 and we
+				 * skip them without recording the TXG.
+				 * [see comment in space_map_write_seg()]
+				 */
+				uint64_t e_txg = SM_DEBUG_TXG_DECODE(e);
+				if (e_txg != 0) {
+					txg = e_txg;
+					sync_pass = SM_DEBUG_SYNCPASS_DECODE(e);
+				} else {
+					ASSERT0(SM_DEBUG_SYNCPASS_DECODE(e));
+				}
 				continue;
+			}
 
 			uint64_t raw_offset, raw_run, vdev_id;
 			maptype_t type;
@@ -158,7 +179,9 @@
 			    .sme_type = type,
 			    .sme_vdev = vdev_id,
 			    .sme_offset = entry_offset,
-			    .sme_run = entry_run
+			    .sme_run = entry_run,
+			    .sme_txg = txg,
+			    .sme_sync_pass = sync_pass
 			};
 			error = callback(&sme, arg);
 		}
@@ -523,8 +546,9 @@
  * dbuf must be dirty for the changes in sm_phys to take effect.
  */
 static void
-space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
-    uint64_t vdev_id, uint8_t words, dmu_buf_t **dbp, void *tag, dmu_tx_t *tx)
+space_map_write_seg(space_map_t *sm, uint64_t rstart, uint64_t rend,
+    maptype_t maptype, uint64_t vdev_id, uint8_t words, dmu_buf_t **dbp,
+    void *tag, dmu_tx_t *tx)
 {
 	ASSERT3U(words, !=, 0);
 	ASSERT3U(words, <=, 2);
@@ -548,14 +572,14 @@
 
 	ASSERT3P(block_cursor, <=, block_end);
 
-	uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
-	uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
+	uint64_t size = (rend - rstart) >> sm->sm_shift;
+	uint64_t start = (rstart - sm->sm_start) >> sm->sm_shift;
 	uint64_t run_max = (words == 2) ? SM2_RUN_MAX : SM_RUN_MAX;
 
-	ASSERT3U(rs->rs_start, >=, sm->sm_start);
-	ASSERT3U(rs->rs_start, <, sm->sm_start + sm->sm_size);
-	ASSERT3U(rs->rs_end - rs->rs_start, <=, sm->sm_size);
-	ASSERT3U(rs->rs_end, <=, sm->sm_start + sm->sm_size);
+	ASSERT3U(rstart, >=, sm->sm_start);
+	ASSERT3U(rstart, <, sm->sm_start + sm->sm_size);
+	ASSERT3U(rend - rstart, <=, sm->sm_size);
+	ASSERT3U(rend, <=, sm->sm_start + sm->sm_size);
 
 	while (size != 0) {
 		ASSERT3P(block_cursor, <=, block_end);
@@ -650,7 +674,7 @@
 
 	space_map_write_intro_debug(sm, maptype, tx);
 
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 	/*
 	 * We do this right after we write the intro debug entry
 	 * because the estimate does not take it into account.
@@ -673,10 +697,14 @@
 
 	dmu_buf_will_dirty(db, tx);
 
-	avl_tree_t *t = &rt->rt_root;
-	for (range_seg_t *rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
-		uint64_t offset = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
-		uint64_t length = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
+	zfs_btree_t *t = &rt->rt_root;
+	zfs_btree_index_t where;
+	for (range_seg_t *rs = zfs_btree_first(t, &where); rs != NULL;
+	    rs = zfs_btree_next(t, &where, &where)) {
+		uint64_t offset = (rs_get_start(rs, rt) - sm->sm_start) >>
+		    sm->sm_shift;
+		uint64_t length = (rs_get_end(rs, rt) - rs_get_start(rs, rt)) >>
+		    sm->sm_shift;
 		uint8_t words = 1;
 
 		/*
@@ -698,16 +726,16 @@
 		    length > SM_RUN_MAX ||
 		    vdev_id != SM_NO_VDEVID ||
 		    (zfs_force_some_double_word_sm_entries &&
-		    spa_get_random(100) == 0)))
+		    random_in_range(100) == 0)))
 			words = 2;
 
-		space_map_write_seg(sm, rs, maptype, vdev_id, words,
-		    &db, FTAG, tx);
+		space_map_write_seg(sm, rs_get_start(rs, rt), rs_get_end(rs,
+		    rt), maptype, vdev_id, words, &db, FTAG, tx);
 	}
 
 	dmu_buf_rele(db, FTAG);
 
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 	/*
 	 * We expect our estimation to be based on the worst case
 	 * scenario [see comment in space_map_estimate_optimal_size()].
@@ -749,7 +777,7 @@
 	else
 		sm->sm_phys->smp_alloc -= range_tree_space(rt);
 
-	uint64_t nodes = avl_numnodes(&rt->rt_root);
+	uint64_t nodes = zfs_btree_numnodes(&rt->rt_root);
 	uint64_t rt_space = range_tree_space(rt);
 
 	space_map_write_impl(sm, rt, maptype, vdev_id, tx);
@@ -758,7 +786,7 @@
 	 * Ensure that the space_map's accounting wasn't changed
 	 * while we were in the middle of writing it out.
 	 */
-	VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
+	VERIFY3U(nodes, ==, zfs_btree_numnodes(&rt->rt_root));
 	VERIFY3U(range_tree_space(rt), ==, rt_space);
 }
 
@@ -849,9 +877,11 @@
 	    doi.doi_data_block_size != blocksize ||
 	    doi.doi_metadata_block_size != 1 << space_map_ibs) {
 		zfs_dbgmsg("txg %llu, spa %s, sm %px, reallocating "
-		    "object[%llu]: old bonus %u, old blocksz %u",
-		    dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object,
-		    doi.doi_bonus_size, doi.doi_data_block_size);
+		    "object[%llu]: old bonus %llu, old blocksz %u",
+		    (u_longlong_t)dmu_tx_get_txg(tx), spa_name(spa), sm,
+		    (u_longlong_t)sm->sm_object,
+		    (u_longlong_t)doi.doi_bonus_size,
+		    doi.doi_data_block_size);
 
 		space_map_free(sm, tx);
 		dmu_buf_rele(sm->sm_dbuf, sm);
@@ -1067,3 +1097,11 @@
 {
 	return (sm != NULL ? sm->sm_phys->smp_length : 0);
 }
+
+uint64_t
+space_map_nblocks(space_map_t *sm)
+{
+	if (sm == NULL)
+		return (0);
+	return (DIV_ROUND_UP(space_map_length(sm), sm->sm_blksz));
+}

diff --git a/zfs/module/zfs/space_reftree.c b/zfs/module/zfs/space_reftree.c
index aa289ba..080fc66 100644
--- a/zfs/module/zfs/space_reftree.c
+++ b/zfs/module/zfs/space_reftree.c

@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -57,11 +57,11 @@
 	const space_ref_t *sr1 = (const space_ref_t *)x1;
 	const space_ref_t *sr2 = (const space_ref_t *)x2;
 
-	int cmp = AVL_CMP(sr1->sr_offset, sr2->sr_offset);
+	int cmp = TREE_CMP(sr1->sr_offset, sr2->sr_offset);
 	if (likely(cmp))
 		return (cmp);
 
-	return (AVL_PCMP(sr1, sr2));
+	return (TREE_PCMP(sr1, sr2));
 }
 
 void
@@ -109,10 +109,13 @@
 void
 space_reftree_add_map(avl_tree_t *t, range_tree_t *rt, int64_t refcnt)
 {
-	range_seg_t *rs;
+	zfs_btree_index_t where;
 
-	for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
-		space_reftree_add_seg(t, rs->rs_start, rs->rs_end, refcnt);
+	for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; rs =
+	    zfs_btree_next(&rt->rt_root, &where, &where)) {
+		space_reftree_add_seg(t, rs_get_start(rs, rt), rs_get_end(rs,
+		    rt),  refcnt);
+	}
 }
 
 /*

diff --git a/zfs/module/zfs/trace.c b/zfs/module/zfs/trace.c
deleted file mode 100644
index eb6efe8..0000000
--- a/zfs/module/zfs/trace.c
+++ /dev/null

@@ -1,52 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Each Linux tracepoints subsystem must define CREATE_TRACE_POINTS in one
- * (and only one) C file, so this dummy file exists for that purpose.
- */
-
-#include <sys/multilist.h>
-#include <sys/arc_impl.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/dbuf.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dmu_tx.h>
-#include <sys/dnode.h>
-#include <sys/multilist.h>
-#include <sys/zfs_znode.h>
-#include <sys/zil_impl.h>
-#include <sys/zrlock.h>
-
-#define	CREATE_TRACE_POINTS
-#include <sys/trace.h>
-#include <sys/trace_acl.h>
-#include <sys/trace_arc.h>
-#include <sys/trace_dbuf.h>
-#include <sys/trace_dmu.h>
-#include <sys/trace_dnode.h>
-#include <sys/trace_multilist.h>
-#include <sys/trace_txg.h>
-#include <sys/trace_vdev.h>
-#include <sys/trace_zil.h>
-#include <sys/trace_zio.h>
-#include <sys/trace_zrlock.h>

diff --git a/zfs/module/zfs/txg.c b/zfs/module/zfs/txg.c
index bf6e7d7..c9eb84b 100644
--- a/zfs/module/zfs/txg.c
+++ b/zfs/module/zfs/txg.c

@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -33,7 +33,7 @@
 #include <sys/dsl_scan.h>
 #include <sys/zil.h>
 #include <sys/callb.h>
-#include <sys/trace_txg.h>
+#include <sys/trace_zfs.h>
 
 /*
  * ZFS Transaction Groups
@@ -242,16 +242,11 @@
 {
 	CALLB_CPR_SAFE_BEGIN(cpr);
 
-	/*
-	 * cv_wait_sig() is used instead of cv_wait() in order to prevent
-	 * this process from incorrectly contributing to the system load
-	 * average when idle.
-	 */
 	if (time) {
-		(void) cv_timedwait_sig(cv, &tx->tx_sync_lock,
+		(void) cv_timedwait_idle(cv, &tx->tx_sync_lock,
 		    ddi_get_lbolt() + time);
 	} else {
-		cv_wait_sig(cv, &tx->tx_sync_lock);
+		cv_wait_idle(cv, &tx->tx_sync_lock);
 	}
 
 	CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock);
@@ -272,7 +267,7 @@
 	ASSERT3U(tx->tx_threads, ==, 2);
 
 	/*
-	 * We need to ensure that we've vacated the deferred space_maps.
+	 * We need to ensure that we've vacated the deferred metaslab trees.
 	 */
 	txg_wait_synced(dp, tx->tx_open_txg + TXG_DEFER_SIZE);
 
@@ -297,6 +292,27 @@
 	mutex_exit(&tx->tx_sync_lock);
 }
 
+/*
+ * Get a handle on the currently open txg and keep it open.
+ *
+ * The txg is guaranteed to stay open until txg_rele_to_quiesce() is called for
+ * the handle. Once txg_rele_to_quiesce() has been called, the txg stays
+ * in quiescing state until txg_rele_to_sync() is called for the handle.
+ *
+ * It is guaranteed that subsequent calls return monotonically increasing
+ * txgs for the same dsl_pool_t. Of course this is not strong monotonicity,
+ * because the same txg can be returned multiple times in a row. This
+ * guarantee holds both for subsequent calls from one thread and for multiple
+ * threads. For example, it is impossible to observe the following sequence
+ * of events:
+ *
+ *           Thread 1                            Thread 2
+ *
+ *   1 <- txg_hold_open(P, ...)
+ *                                       2 <- txg_hold_open(P, ...)
+ *   1 <- txg_hold_open(P, ...)
+ *
+ */
 uint64_t
 txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
 {
@@ -310,9 +326,7 @@
 	 * significance to the chosen tx_cpu. Because.. Why not use
 	 * the current cpu to index into the array?
 	 */
-	kpreempt_disable();
-	tc = &tx->tx_cpu[CPU_SEQID];
-	kpreempt_enable();
+	tc = &tx->tx_cpu[CPU_SEQID_UNSTABLE];
 
 	mutex_enter(&tc->tc_open_lock);
 	txg = tx->tx_open_txg;
@@ -400,7 +414,8 @@
 	spa_txg_history_add(dp->dp_spa, txg + 1, tx_open_time);
 
 	/*
-	 * Quiesce the transaction group by waiting for everyone to txg_exit().
+	 * Quiesce the transaction group by waiting for everyone to
+	 * call txg_rele_to_sync() for their open transaction handles.
 	 */
 	for (c = 0; c < max_ncpus; c++) {
 		tx_cpu_t *tc = &tx->tx_cpu[c];
@@ -453,8 +468,9 @@
 			 * Commit callback taskq hasn't been created yet.
 			 */
 			tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
-			    boot_ncpus, defclsyspri, boot_ncpus, boot_ncpus * 2,
-			    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+			    100, defclsyspri, boot_ncpus, boot_ncpus * 2,
+			    TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
+			    TASKQ_THREADS_CPU_PCT);
 		}
 
 		cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
@@ -483,14 +499,6 @@
 }
 
 static boolean_t
-txg_is_syncing(dsl_pool_t *dp)
-{
-	tx_state_t *tx = &dp->dp_tx;
-	ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
-	return (tx->tx_syncing_txg != 0);
-}
-
-static boolean_t
 txg_is_quiescing(dsl_pool_t *dp)
 {
 	tx_state_t *tx = &dp->dp_tx;
@@ -523,8 +531,6 @@
 		clock_t timeout = zfs_txg_timeout * hz;
 		clock_t timer;
 		uint64_t txg;
-		uint64_t dirty_min_bytes =
-		    zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
 
 		/*
 		 * We sync when we're scanning, there's someone waiting
@@ -535,10 +541,10 @@
 		while (!dsl_scan_active(dp->dp_scan) &&
 		    !tx->tx_exiting && timer > 0 &&
 		    tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
-		    !txg_has_quiesced_to_sync(dp) &&
-		    dp->dp_dirty_total < dirty_min_bytes) {
+		    !txg_has_quiesced_to_sync(dp)) {
 			dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
-			    tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
+			    (u_longlong_t)tx->tx_synced_txg,
+			    (u_longlong_t)tx->tx_sync_txg_waiting, dp);
 			txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
 			delta = ddi_get_lbolt() - start;
 			timer = (delta > timeout ? 0 : timeout - delta);
@@ -549,6 +555,11 @@
 		 * prompting it to do so if necessary.
 		 */
 		while (!tx->tx_exiting && !txg_has_quiesced_to_sync(dp)) {
+			if (txg_is_quiescing(dp)) {
+				txg_thread_wait(tx, &cpr,
+				    &tx->tx_quiesce_done_cv, 0);
+				continue;
+			}
 			if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
 				tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
 			cv_broadcast(&tx->tx_quiesce_more_cv);
@@ -571,7 +582,8 @@
 		cv_broadcast(&tx->tx_quiesce_more_cv);
 
 		dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
-		    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
+		    (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
+		    (u_longlong_t)tx->tx_sync_txg_waiting);
 		mutex_exit(&tx->tx_sync_lock);
 
 		txg_stat_t *ts = spa_txg_history_init_io(spa, txg, dp);
@@ -622,8 +634,9 @@
 
 		txg = tx->tx_open_txg;
 		dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
-		    txg, tx->tx_quiesce_txg_waiting,
-		    tx->tx_sync_txg_waiting);
+		    (u_longlong_t)txg,
+		    (u_longlong_t)tx->tx_quiesce_txg_waiting,
+		    (u_longlong_t)tx->tx_sync_txg_waiting);
 		tx->tx_quiescing_txg = txg;
 
 		mutex_exit(&tx->tx_sync_lock);
@@ -633,7 +646,8 @@
 		/*
 		 * Hand this txg off to the sync thread.
 		 */
-		dprintf("quiesce done, handing off txg %llu\n", txg);
+		dprintf("quiesce done, handing off txg %llu\n",
+		    (u_longlong_t)txg);
 		tx->tx_quiescing_txg = 0;
 		tx->tx_quiesced_txg = txg;
 		DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg);
@@ -689,11 +703,13 @@
 	if (tx->tx_sync_txg_waiting < txg)
 		tx->tx_sync_txg_waiting = txg;
 	dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
-	    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
+	    (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
+	    (u_longlong_t)tx->tx_sync_txg_waiting);
 	while (tx->tx_synced_txg < txg) {
 		dprintf("broadcasting sync more "
-		    "tx_synced=%llu waiting=%llu dp=%p\n",
-		    tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
+		    "tx_synced=%llu waiting=%llu dp=%px\n",
+		    (u_longlong_t)tx->tx_synced_txg,
+		    (u_longlong_t)tx->tx_sync_txg_waiting, dp);
 		cv_broadcast(&tx->tx_sync_more_cv);
 		if (wait_sig) {
 			/*
@@ -748,7 +764,8 @@
 	if (tx->tx_quiesce_txg_waiting < txg && should_quiesce)
 		tx->tx_quiesce_txg_waiting = txg;
 	dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
-	    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
+	    (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
+	    (u_longlong_t)tx->tx_sync_txg_waiting);
 	while (tx->tx_open_txg < txg) {
 		cv_broadcast(&tx->tx_quiesce_more_cv);
 		/*
@@ -760,31 +777,30 @@
 		if (should_quiesce == B_TRUE) {
 			cv_wait_io(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
 		} else {
-			cv_wait_sig(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
+			cv_wait_idle(&tx->tx_quiesce_done_cv,
+			    &tx->tx_sync_lock);
 		}
 	}
 	mutex_exit(&tx->tx_sync_lock);
 }
 
 /*
- * If there isn't a txg syncing or in the pipeline, push another txg through
- * the pipeline by quiescing the open txg.
+ * Pass in the txg number that should be synced.
  */
 void
-txg_kick(dsl_pool_t *dp)
+txg_kick(dsl_pool_t *dp, uint64_t txg)
 {
 	tx_state_t *tx = &dp->dp_tx;
 
 	ASSERT(!dsl_pool_config_held(dp));
 
+	if (tx->tx_sync_txg_waiting >= txg)
+		return;
+
 	mutex_enter(&tx->tx_sync_lock);
-	if (!txg_is_syncing(dp) &&
-	    !txg_is_quiescing(dp) &&
-	    tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
-	    tx->tx_sync_txg_waiting <= tx->tx_synced_txg &&
-	    tx->tx_quiesced_txg <= tx->tx_synced_txg) {
-		tx->tx_quiesce_txg_waiting = tx->tx_open_txg + 1;
-		cv_broadcast(&tx->tx_quiesce_more_cv);
+	if (tx->tx_sync_txg_waiting < txg) {
+		tx->tx_sync_txg_waiting = txg;
+		cv_broadcast(&tx->tx_sync_more_cv);
 	}
 	mutex_exit(&tx->tx_sync_lock);
 }
@@ -813,7 +829,7 @@
 void
 txg_verify(spa_t *spa, uint64_t txg)
 {
-	ASSERTV(dsl_pool_t *dp = spa_get_dsl(spa));
+	dsl_pool_t *dp __maybe_unused = spa_get_dsl(spa);
 	if (txg <= TXG_INITIAL || txg == ZILTEST_TXG)
 		return;
 	ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
@@ -1038,7 +1054,6 @@
 	return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(txg_init);
 EXPORT_SYMBOL(txg_fini);
 EXPORT_SYMBOL(txg_sync_start);
@@ -1054,6 +1069,7 @@
 EXPORT_SYMBOL(txg_stalled);
 EXPORT_SYMBOL(txg_sync_waiting);
 
-module_param(zfs_txg_timeout, int, 0644);
-MODULE_PARM_DESC(zfs_txg_timeout, "Max seconds worth of delta per txg");
-#endif
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, timeout, INT, ZMOD_RW,
+	"Max seconds worth of delta per txg");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/unique.c b/zfs/module/zfs/unique.c
index 5cdd025..0e07679 100644
--- a/zfs/module/zfs/unique.c
+++ b/zfs/module/zfs/unique.c

@@ -45,7 +45,7 @@
 	const unique_t *una = (const unique_t *)a;
 	const unique_t *unb = (const unique_t *)b;
 
-	return (AVL_CMP(una->un_value, unb->un_value));
+	return (TREE_CMP(una->un_value, unb->un_value));
 }
 
 void

diff --git a/zfs/module/zfs/vdev.c b/zfs/module/zfs/vdev.c
index f16e630..57259b8 100644
--- a/zfs/module/zfs/vdev.c
+++ b/zfs/module/zfs/vdev.c

@@ -21,13 +21,14 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  * Copyright 2017 Joyent, Inc.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, Datto Inc. All rights reserved.
+ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
  */
 
 #include <sys/zfs_context.h>
@@ -39,6 +40,8 @@
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dir.h>
 #include <sys/vdev_impl.h>
+#include <sys/vdev_rebuild.h>
+#include <sys/vdev_draid.h>
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
@@ -50,12 +53,34 @@
 #include <sys/arc.h>
 #include <sys/zil.h>
 #include <sys/dsl_scan.h>
+#include <sys/vdev_raidz.h>
 #include <sys/abd.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
 #include <sys/zvol.h>
 #include <sys/zfs_ratelimit.h>
 
+/*
+ * One metaslab from each (normal-class) vdev is used by the ZIL.  These are
+ * called "embedded slog metaslabs", are referenced by vdev_log_mg, and are
+ * part of the spa_embedded_log_class.  The metaslab with the most free space
+ * in each vdev is selected for this purpose when the pool is opened (or a
+ * vdev is added).  See vdev_metaslab_init().
+ *
+ * Log blocks can be allocated from the following locations.  Each one is tried
+ * in order until the allocation succeeds:
+ * 1. dedicated log vdevs, aka "slog" (spa_log_class)
+ * 2. embedded slog metaslabs (spa_embedded_log_class)
+ * 3. other metaslabs in normal vdevs (spa_normal_class)
+ *
+ * zfs_embedded_slog_min_ms disables the embedded slog if there are fewer
+ * than this number of metaslabs in the vdev.  This ensures that we don't set
+ * aside an unreasonable amount of space for the ZIL.  If set to less than
+ * 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced
+ * (by more than 1<<spa_slop_shift) due to the embedded slog metaslab.
+ */
+int zfs_embedded_slog_min_ms = 64;
+
 /* default target for number of metaslabs per top-level vdev */
 int zfs_vdev_default_ms_count = 200;
 
@@ -77,7 +102,7 @@
  * Since the DTL space map of a vdev is not expected to have a lot of
  * entries, we default its block size to 4K.
  */
-int vdev_dtl_sm_blksz = (1 << 12);
+int zfs_vdev_dtl_sm_blksz = (1 << 12);
 
 /*
  * Rate limit slow IO (delay) events to this many per second.
@@ -100,7 +125,7 @@
  * the end of each transaction can benefit from a higher I/O bandwidth
  * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
  */
-int vdev_standard_sm_blksz = (1 << 17);
+int zfs_vdev_standard_sm_blksz = (1 << 17);
 
 /*
  * Tunable parameter for debugging or performance analysis. Setting this
@@ -109,6 +134,17 @@
  */
 int zfs_nocacheflush = 0;
 
+/*
+ * Maximum and minimum ashift values that can be automatically set based on
+ * vdev's physical ashift (disk's physical sector size).  While ASHIFT_MAX
+ * is higher than the maximum value, it is intentionally limited here to not
+ * excessively impact pool space efficiency.  Higher ashift values may still
+ * be forced by vdev logical ashift or by user via ashift property, but won't
+ * be set automatically as a performance optimization.
+ */
+uint64_t zfs_vdev_max_auto_ashift = 14;
+uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+
 /*PRINTFLIKE2*/
 void
 vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
@@ -137,7 +173,8 @@
 	char state[20];
 
 	if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops) {
-		zfs_dbgmsg("%*svdev %u: %s", indent, "", vd->vdev_id,
+		zfs_dbgmsg("%*svdev %llu: %s", indent, "",
+		    (u_longlong_t)vd->vdev_id,
 		    vd->vdev_ops->vdev_op_type);
 		return;
 	}
@@ -189,6 +226,8 @@
 static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_root_ops,
 	&vdev_raidz_ops,
+	&vdev_draid_ops,
+	&vdev_draid_spare_ops,
 	&vdev_mirror_ops,
 	&vdev_replacing_ops,
 	&vdev_spare_ops,
@@ -215,17 +254,35 @@
 	return (ops);
 }
 
-/* ARGSUSED */
-void
-vdev_default_xlate(vdev_t *vd, const range_seg_t *in, range_seg_t *res)
+/*
+ * Given a vdev and a metaslab class, find which metaslab group we're
+ * interested in. All vdevs may belong to two different metaslab classes.
+ * Dedicated slog devices use only the primary metaslab group, rather than a
+ * separate log group. For embedded slogs, the vdev_log_mg will be non-NULL.
+ */
+metaslab_group_t *
+vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
 {
-	res->rs_start = in->rs_start;
-	res->rs_end = in->rs_end;
+	if (mc == spa_embedded_log_class(vd->vdev_spa) &&
+	    vd->vdev_log_mg != NULL)
+		return (vd->vdev_log_mg);
+	else
+		return (vd->vdev_mg);
+}
+
+void
+vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
+{
+	(void) vd, (void) remain_rs;
+
+	physical_rs->rs_start = logical_rs->rs_start;
+	physical_rs->rs_end = logical_rs->rs_end;
 }
 
 /*
  * Derive the enumerated allocation bias from string input.
- * String origin is either the per-vdev zap or zpool(1M).
+ * String origin is either the per-vdev zap or zpool(8).
  */
 static vdev_alloc_bias_t
 vdev_derive_alloc_bias(const char *bias)
@@ -260,6 +317,12 @@
 	return (asize);
 }
 
+uint64_t
+vdev_default_min_asize(vdev_t *vd)
+{
+	return (vd->vdev_min_asize);
+}
+
 /*
  * Get the minimum allocatable size. We define the allocatable size as
  * the vdev's asize rounded to the nearest metaslab. This allows us to
@@ -285,15 +348,7 @@
 	if (vd == vd->vdev_top)
 		return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift));
 
-	/*
-	 * The allocatable space for a raidz vdev is N * sizeof(smallest child),
-	 * so each child must provide at least 1/Nth of its asize.
-	 */
-	if (pvd->vdev_ops == &vdev_raidz_ops)
-		return ((pvd->vdev_min_asize + pvd->vdev_children - 1) /
-		    pvd->vdev_children);
-
-	return (pvd->vdev_min_asize);
+	return (pvd->vdev_ops->vdev_op_min_asize(pvd));
 }
 
 void
@@ -305,6 +360,48 @@
 		vdev_set_min_asize(vd->vdev_child[c]);
 }
 
+/*
+ * Get the minimal allocation size for the top-level vdev.
+ */
+uint64_t
+vdev_get_min_alloc(vdev_t *vd)
+{
+	uint64_t min_alloc = 1ULL << vd->vdev_ashift;
+
+	if (vd->vdev_ops->vdev_op_min_alloc != NULL)
+		min_alloc = vd->vdev_ops->vdev_op_min_alloc(vd);
+
+	return (min_alloc);
+}
+
+/*
+ * Get the parity level for a top-level vdev.
+ */
+uint64_t
+vdev_get_nparity(vdev_t *vd)
+{
+	uint64_t nparity = 0;
+
+	if (vd->vdev_ops->vdev_op_nparity != NULL)
+		nparity = vd->vdev_ops->vdev_op_nparity(vd);
+
+	return (nparity);
+}
+
+/*
+ * Get the number of data disks for a top-level vdev.
+ */
+uint64_t
+vdev_get_ndisks(vdev_t *vd)
+{
+	uint64_t ndisks = 1;
+
+	if (vd->vdev_ops->vdev_op_ndisks != NULL)
+		ndisks = vd->vdev_ops->vdev_op_ndisks(vd);
+
+	return (ndisks);
+}
+
 vdev_t *
 vdev_lookup_top(spa_t *spa, uint64_t vdev)
 {
@@ -529,7 +626,8 @@
 
 	rw_init(&vd->vdev_indirect_rwlock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL);
-	vd->vdev_obsolete_segments = range_tree_create(NULL, NULL);
+	vd->vdev_obsolete_segments = range_tree_create(NULL, RANGE_SEG64, NULL,
+	    0, 0);
 
 	/*
 	 * Initialize rate limit structs for events.  We rate limit ZIO delay
@@ -538,6 +636,8 @@
 	 */
 	zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second,
 	    1);
+	zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second,
+	    1);
 	zfs_ratelimit_init(&vd->vdev_checksum_rl,
 	    &zfs_checksum_events_per_second, 1);
 
@@ -546,14 +646,17 @@
 	list_link_init(&vd->vdev_initialize_node);
 	list_link_init(&vd->vdev_leaf_node);
 	list_link_init(&vd->vdev_trim_node);
+
 	mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL);
 	mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_scan_io_queue_lock, NULL, MUTEX_DEFAULT, NULL);
+
 	mutex_init(&vd->vdev_initialize_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_initialize_io_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&vd->vdev_initialize_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&vd->vdev_initialize_io_cv, NULL, CV_DEFAULT, NULL);
+
 	mutex_init(&vd->vdev_trim_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_autotrim_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_trim_io_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -561,9 +664,14 @@
 	cv_init(&vd->vdev_autotrim_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&vd->vdev_trim_io_cv, NULL, CV_DEFAULT, NULL);
 
+	mutex_init(&vd->vdev_rebuild_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL);
+
 	for (int t = 0; t < DTL_TYPES; t++) {
-		vd->vdev_dtl[t] = range_tree_create(NULL, NULL);
+		vd->vdev_dtl[t] = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
+		    0);
 	}
+
 	txg_list_create(&vd->vdev_ms_list, spa,
 	    offsetof(struct metaslab, ms_txg_node));
 	txg_list_create(&vd->vdev_dtl_list, spa,
@@ -586,7 +694,7 @@
 {
 	vdev_ops_t *ops;
 	char *type;
-	uint64_t guid = 0, islog, nparity;
+	uint64_t guid = 0, islog;
 	vdev_t *vd;
 	vdev_indirect_config_t *vic;
 	char *tmp = NULL;
@@ -643,48 +751,13 @@
 	if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES)
 		return (SET_ERROR(ENOTSUP));
 
-	/*
-	 * Set the nparity property for RAID-Z vdevs.
-	 */
-	nparity = -1ULL;
-	if (ops == &vdev_raidz_ops) {
-		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
-		    &nparity) == 0) {
-			if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
-				return (SET_ERROR(EINVAL));
-			/*
-			 * Previous versions could only support 1 or 2 parity
-			 * device.
-			 */
-			if (nparity > 1 &&
-			    spa_version(spa) < SPA_VERSION_RAIDZ2)
-				return (SET_ERROR(ENOTSUP));
-			if (nparity > 2 &&
-			    spa_version(spa) < SPA_VERSION_RAIDZ3)
-				return (SET_ERROR(ENOTSUP));
-		} else {
-			/*
-			 * We require the parity to be specified for SPAs that
-			 * support multiple parity levels.
-			 */
-			if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
-				return (SET_ERROR(EINVAL));
-			/*
-			 * Otherwise, we default to 1 parity device for RAID-Z.
-			 */
-			nparity = 1;
-		}
-	} else {
-		nparity = 0;
-	}
-	ASSERT(nparity != -1ULL);
-
-	/*
-	 * If creating a top-level vdev, check for allocation classes input
-	 */
 	if (top_level && alloctype == VDEV_ALLOC_ADD) {
 		char *bias;
 
+		/*
+		 * If creating a top-level vdev, check for allocation
+		 * classes input.
+		 */
 		if (nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS,
 		    &bias) == 0) {
 			alloc_bias = vdev_derive_alloc_bias(bias);
@@ -696,13 +769,32 @@
 				return (SET_ERROR(ENOTSUP));
 			}
 		}
+
+		/* spa_vdev_add() expects feature to be enabled */
+		if (ops == &vdev_draid_ops &&
+		    spa->spa_load_state != SPA_LOAD_CREATE &&
+		    !spa_feature_is_enabled(spa, SPA_FEATURE_DRAID)) {
+			return (SET_ERROR(ENOTSUP));
+		}
+	}
+
+	/*
+	 * Initialize the vdev specific data.  This is done before calling
+	 * vdev_alloc_common() since it may fail and this simplifies the
+	 * error reporting and cleanup code paths.
+	 */
+	void *tsd = NULL;
+	if (ops->vdev_op_init != NULL) {
+		rc = ops->vdev_op_init(spa, nv, &tsd);
+		if (rc != 0) {
+			return (rc);
+		}
 	}
 
 	vd = vdev_alloc_common(spa, id, guid, ops);
-	vic = &vd->vdev_indirect_config;
-
+	vd->vdev_tsd = tsd;
 	vd->vdev_islog = islog;
-	vd->vdev_nparity = nparity;
+
 	if (top_level && alloc_bias != VDEV_BIAS_NONE)
 		vd->vdev_alloc_bias = alloc_bias;
 
@@ -742,6 +834,8 @@
 	    &vd->vdev_wholedisk) != 0)
 		vd->vdev_wholedisk = -1ULL;
 
+	vic = &vd->vdev_indirect_config;
+
 	ASSERT0(vic->vic_mapping_object);
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_INDIRECT_OBJECT,
 	    &vic->vic_mapping_object);
@@ -833,6 +927,9 @@
 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
 		    &vd->vdev_resilver_txg);
 
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REBUILD_TXG,
+		    &vd->vdev_rebuild_txg);
+
 		if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
 			vdev_defer_resilver(vd);
 
@@ -888,6 +985,7 @@
 	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
 	ASSERT3P(vd->vdev_trim_thread, ==, NULL);
 	ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
+	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
 
 	/*
 	 * Scan queues are normally destroyed at the end of a scan. If the
@@ -919,12 +1017,21 @@
 	ASSERT(vd->vdev_child == NULL);
 	ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
 
+	if (vd->vdev_ops->vdev_op_fini != NULL)
+		vd->vdev_ops->vdev_op_fini(vd);
+
 	/*
 	 * Discard allocation state.
 	 */
 	if (vd->vdev_mg != NULL) {
 		vdev_metaslab_fini(vd);
 		metaslab_group_destroy(vd->vdev_mg);
+		vd->vdev_mg = NULL;
+	}
+	if (vd->vdev_log_mg != NULL) {
+		ASSERT0(vd->vdev_ms_count);
+		metaslab_group_destroy(vd->vdev_log_mg);
+		vd->vdev_log_mg = NULL;
 	}
 
 	ASSERT0(vd->vdev_stat.vs_space);
@@ -995,10 +1102,12 @@
 	mutex_destroy(&vd->vdev_stat_lock);
 	mutex_destroy(&vd->vdev_probe_lock);
 	mutex_destroy(&vd->vdev_scan_io_queue_lock);
+
 	mutex_destroy(&vd->vdev_initialize_lock);
 	mutex_destroy(&vd->vdev_initialize_io_lock);
 	cv_destroy(&vd->vdev_initialize_io_cv);
 	cv_destroy(&vd->vdev_initialize_cv);
+
 	mutex_destroy(&vd->vdev_trim_lock);
 	mutex_destroy(&vd->vdev_autotrim_lock);
 	mutex_destroy(&vd->vdev_trim_io_lock);
@@ -1006,7 +1115,11 @@
 	cv_destroy(&vd->vdev_autotrim_cv);
 	cv_destroy(&vd->vdev_trim_io_cv);
 
+	mutex_destroy(&vd->vdev_rebuild_lock);
+	cv_destroy(&vd->vdev_rebuild_cv);
+
 	zfs_ratelimit_fini(&vd->vdev_delay_rl);
+	zfs_ratelimit_fini(&vd->vdev_deadman_rl);
 	zfs_ratelimit_fini(&vd->vdev_checksum_rl);
 
 	if (vd == spa->spa_root_vdev)
@@ -1041,14 +1154,20 @@
 
 	if (tvd->vdev_mg)
 		ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg);
+	if (tvd->vdev_log_mg)
+		ASSERT3P(tvd->vdev_log_mg, ==, svd->vdev_log_mg);
 	tvd->vdev_mg = svd->vdev_mg;
+	tvd->vdev_log_mg = svd->vdev_log_mg;
 	tvd->vdev_ms = svd->vdev_ms;
 
 	svd->vdev_mg = NULL;
+	svd->vdev_log_mg = NULL;
 	svd->vdev_ms = NULL;
 
 	if (tvd->vdev_mg != NULL)
 		tvd->vdev_mg->mg_vd = tvd;
+	if (tvd->vdev_log_mg != NULL)
+		tvd->vdev_log_mg->mg_vd = tvd;
 
 	tvd->vdev_checkpoint_sm = svd->vdev_checkpoint_sm;
 	svd->vdev_checkpoint_sm = NULL;
@@ -1075,7 +1194,10 @@
 	ASSERT3P(tvd->vdev_indirect_births, ==, NULL);
 	ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL);
 	ASSERT0(tvd->vdev_removing);
+	ASSERT0(tvd->vdev_rebuilding);
 	tvd->vdev_removing = svd->vdev_removing;
+	tvd->vdev_rebuilding = svd->vdev_rebuilding;
+	tvd->vdev_rebuild_config = svd->vdev_rebuild_config;
 	tvd->vdev_indirect_config = svd->vdev_indirect_config;
 	tvd->vdev_indirect_mapping = svd->vdev_indirect_mapping;
 	tvd->vdev_indirect_births = svd->vdev_indirect_births;
@@ -1089,6 +1211,7 @@
 	svd->vdev_indirect_births = NULL;
 	svd->vdev_obsolete_sm = NULL;
 	svd->vdev_removing = 0;
+	svd->vdev_rebuilding = 0;
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL)
@@ -1131,7 +1254,8 @@
 }
 
 /*
- * Add a mirror/replacing vdev above an existing vdev.
+ * Add a mirror/replacing vdev above an existing vdev.  There is no need to
+ * call .vdev_op_init() since mirror/replacing vdevs do not have private state.
  */
 vdev_t *
 vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
@@ -1149,6 +1273,8 @@
 	mvd->vdev_max_asize = cvd->vdev_max_asize;
 	mvd->vdev_psize = cvd->vdev_psize;
 	mvd->vdev_ashift = cvd->vdev_ashift;
+	mvd->vdev_logical_ashift = cvd->vdev_logical_ashift;
+	mvd->vdev_physical_ashift = cvd->vdev_physical_ashift;
 	mvd->vdev_state = cvd->vdev_state;
 	mvd->vdev_crtxg = cvd->vdev_crtxg;
 
@@ -1180,7 +1306,8 @@
 	    mvd->vdev_ops == &vdev_replacing_ops ||
 	    mvd->vdev_ops == &vdev_spare_ops);
 	cvd->vdev_ashift = mvd->vdev_ashift;
-
+	cvd->vdev_logical_ashift = mvd->vdev_logical_ashift;
+	cvd->vdev_physical_ashift = mvd->vdev_physical_ashift;
 	vdev_remove_child(mvd, cvd);
 	vdev_remove_child(pvd, mvd);
 
@@ -1218,7 +1345,7 @@
 	vdev_free(mvd);
 }
 
-static void
+void
 vdev_metaslab_group_create(vdev_t *vd)
 {
 	spa_t *spa = vd->vdev_spa;
@@ -1252,10 +1379,15 @@
 		vd->vdev_mg = metaslab_group_create(mc, vd,
 		    spa->spa_alloc_count);
 
+		if (!vd->vdev_islog) {
+			vd->vdev_log_mg = metaslab_group_create(
+			    spa_embedded_log_class(spa), vd, 1);
+		}
+
 		/*
-		 * The spa ashift values currently only reflect the
-		 * general vdev classes. Class destination is late
-		 * binding so ashift checking had to wait until now
+		 * The spa ashift min/max only apply for the normal metaslab
+		 * class. Class destination is late binding so ashift boundary
+		 * setting had to wait until now.
 		 */
 		if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
 		    mc == spa_normal_class(spa) && vd->vdev_aux == NULL) {
@@ -1263,6 +1395,10 @@
 				spa->spa_max_ashift = vd->vdev_ashift;
 			if (vd->vdev_ashift < spa->spa_min_ashift)
 				spa->spa_min_ashift = vd->vdev_ashift;
+
+			uint64_t min_alloc = vdev_get_min_alloc(vd);
+			if (min_alloc < spa->spa_min_alloc)
+				spa->spa_min_alloc = min_alloc;
 		}
 	}
 }
@@ -1271,8 +1407,6 @@
 vdev_metaslab_init(vdev_t *vd, uint64_t txg)
 {
 	spa_t *spa = vd->vdev_spa;
-	objset_t *mos = spa->spa_meta_objset;
-	uint64_t m;
 	uint64_t oldc = vd->vdev_ms_count;
 	uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift;
 	metaslab_t **mspp;
@@ -1300,16 +1434,17 @@
 
 	vd->vdev_ms = mspp;
 	vd->vdev_ms_count = newc;
-	for (m = oldc; m < newc; m++) {
-		uint64_t object = 0;
 
+	for (uint64_t m = oldc; m < newc; m++) {
+		uint64_t object = 0;
 		/*
 		 * vdev_ms_array may be 0 if we are creating the "fake"
 		 * metaslabs for an indirect vdev for zdb's leak detection.
 		 * See zdb_leak_init().
 		 */
 		if (txg == 0 && vd->vdev_ms_array != 0) {
-			error = dmu_read(mos, vd->vdev_ms_array,
+			error = dmu_read(spa->spa_meta_objset,
+			    vd->vdev_ms_array,
 			    m * sizeof (uint64_t), sizeof (uint64_t), &object,
 			    DMU_READ_PREFETCH);
 			if (error != 0) {
@@ -1319,17 +1454,6 @@
 			}
 		}
 
-#ifndef _KERNEL
-		/*
-		 * To accommodate zdb_leak_init() fake indirect
-		 * metaslabs, we allocate a metaslab group for
-		 * indirect vdevs which normally don't have one.
-		 */
-		if (vd->vdev_mg == NULL) {
-			ASSERT0(vdev_is_concrete(vd));
-			vdev_metaslab_group_create(vd);
-		}
-#endif
 		error = metaslab_init(vd->vdev_mg, m, object, txg,
 		    &(vd->vdev_ms[m]));
 		if (error != 0) {
@@ -1339,6 +1463,47 @@
 		}
 	}
 
+	/*
+	 * Find the emptiest metaslab on the vdev and mark it for use for
+	 * embedded slog by moving it from the regular to the log metaslab
+	 * group.
+	 */
+	if (vd->vdev_mg->mg_class == spa_normal_class(spa) &&
+	    vd->vdev_ms_count > zfs_embedded_slog_min_ms &&
+	    avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) {
+		uint64_t slog_msid = 0;
+		uint64_t smallest = UINT64_MAX;
+
+		/*
+		 * Note, we only search the new metaslabs, because the old
+		 * (pre-existing) ones may be active (e.g. have non-empty
+		 * range_tree's), and we don't move them to the new
+		 * metaslab_t.
+		 */
+		for (uint64_t m = oldc; m < newc; m++) {
+			uint64_t alloc =
+			    space_map_allocated(vd->vdev_ms[m]->ms_sm);
+			if (alloc < smallest) {
+				slog_msid = m;
+				smallest = alloc;
+			}
+		}
+		metaslab_t *slog_ms = vd->vdev_ms[slog_msid];
+		/*
+		 * The metaslab was marked as dirty at the end of
+		 * metaslab_init(). Remove it from the dirty list so that we
+		 * can uninitialize and reinitialize it to the new class.
+		 */
+		if (txg != 0) {
+			(void) txg_list_remove_this(&vd->vdev_ms_list,
+			    slog_ms, txg);
+		}
+		uint64_t sm_obj = space_map_object(slog_ms->ms_sm);
+		metaslab_fini(slog_ms);
+		VERIFY0(metaslab_init(vd->vdev_log_mg, slog_msid, sm_obj, txg,
+		    &vd->vdev_ms[slog_msid]));
+	}
+
 	if (txg == 0)
 		spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
 
@@ -1349,6 +1514,8 @@
 	 */
 	if (!expanding && !vd->vdev_removing) {
 		metaslab_group_activate(vd->vdev_mg);
+		if (vd->vdev_log_mg != NULL)
+			metaslab_group_activate(vd->vdev_log_mg);
 	}
 
 	if (txg == 0)
@@ -1377,7 +1544,12 @@
 
 	if (vd->vdev_ms != NULL) {
 		metaslab_group_t *mg = vd->vdev_mg;
+
 		metaslab_group_passivate(mg);
+		if (vd->vdev_log_mg != NULL) {
+			ASSERT(!vd->vdev_islog);
+			metaslab_group_passivate(vd->vdev_log_mg);
+		}
 
 		uint64_t count = vd->vdev_ms_count;
 		for (uint64_t m = 0; m < count; m++) {
@@ -1387,11 +1559,13 @@
 		}
 		vmem_free(vd->vdev_ms, count * sizeof (metaslab_t *));
 		vd->vdev_ms = NULL;
-
 		vd->vdev_ms_count = 0;
 
-		for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
+		for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
 			ASSERT0(mg->mg_histogram[i]);
+			if (vd->vdev_log_mg != NULL)
+				ASSERT0(vd->vdev_log_mg->mg_histogram[i]);
+		}
 	}
 	ASSERT0(vd->vdev_ms_count);
 	ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
@@ -1440,8 +1614,8 @@
 		} else {
 			ASSERT(zio->io_error != 0);
 			vdev_dbgmsg(vd, "failed probe");
-			zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
-			    spa, vd, NULL, NULL, 0, 0);
+			(void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
+			    spa, vd, NULL, NULL, 0);
 			zio->io_error = SET_ERROR(ENXIO);
 		}
 
@@ -1544,7 +1718,7 @@
 	for (int l = 1; l < VDEV_LABELS; l++) {
 		zio_nowait(zio_read_phys(pio, vd,
 		    vdev_label_offset(vd->vdev_psize, l,
-		    offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE,
+		    offsetof(vdev_label_t, vl_be)), VDEV_PAD_SIZE,
 		    abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE),
 		    ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
 		    ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE));
@@ -1558,6 +1732,14 @@
 }
 
 static void
+vdev_load_child(void *arg)
+{
+	vdev_t *vd = arg;
+
+	vd->vdev_load_error = vdev_load(vd);
+}
+
+static void
 vdev_open_child(void *arg)
 {
 	vdev_t *vd = arg;
@@ -1582,39 +1764,68 @@
 	return (B_FALSE);
 }
 
+/*
+ * Returns B_TRUE if the passed child should be opened.
+ */
+static boolean_t
+vdev_default_open_children_func(vdev_t *vd)
+{
+	(void) vd;
+	return (B_TRUE);
+}
+
+/*
+ * Open the requested child vdevs.  If any of the leaf vdevs are using
+ * a ZFS volume then do the opens in a single thread.  This avoids a
+ * deadlock when the current thread is holding the spa_namespace_lock.
+ */
+static void
+vdev_open_children_impl(vdev_t *vd, vdev_open_children_func_t *open_func)
+{
+	int children = vd->vdev_children;
+
+	taskq_t *tq = taskq_create("vdev_open", children, minclsyspri,
+	    children, children, TASKQ_PREPOPULATE);
+	vd->vdev_nonrot = B_TRUE;
+
+	for (int c = 0; c < children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (open_func(cvd) == B_FALSE)
+			continue;
+
+		if (tq == NULL || vdev_uses_zvols(vd)) {
+			cvd->vdev_open_error = vdev_open(cvd);
+		} else {
+			VERIFY(taskq_dispatch(tq, vdev_open_child,
+			    cvd, TQ_SLEEP) != TASKQID_INVALID);
+		}
+
+		vd->vdev_nonrot &= cvd->vdev_nonrot;
+	}
+
+	if (tq != NULL) {
+		taskq_wait(tq);
+		taskq_destroy(tq);
+	}
+}
+
+/*
+ * Open all child vdevs.
+ */
 void
 vdev_open_children(vdev_t *vd)
 {
-	taskq_t *tq;
-	int children = vd->vdev_children;
+	vdev_open_children_impl(vd, vdev_default_open_children_func);
+}
 
-	/*
-	 * in order to handle pools on top of zvols, do the opens
-	 * in a single thread so that the same thread holds the
-	 * spa_namespace_lock
-	 */
-	if (vdev_uses_zvols(vd)) {
-retry_sync:
-		for (int c = 0; c < children; c++)
-			vd->vdev_child[c]->vdev_open_error =
-			    vdev_open(vd->vdev_child[c]);
-	} else {
-		tq = taskq_create("vdev_open", children, minclsyspri,
-		    children, children, TASKQ_PREPOPULATE);
-		if (tq == NULL)
-			goto retry_sync;
-
-		for (int c = 0; c < children; c++)
-			VERIFY(taskq_dispatch(tq, vdev_open_child,
-			    vd->vdev_child[c], TQ_SLEEP) != TASKQID_INVALID);
-
-		taskq_destroy(tq);
-	}
-
-	vd->vdev_nonrot = B_TRUE;
-
-	for (int c = 0; c < children; c++)
-		vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
+/*
+ * Conditionally open a subset of child vdevs.
+ */
+void
+vdev_open_children_subset(vdev_t *vd, vdev_open_children_func_t *open_func)
+{
+	vdev_open_children_impl(vd, open_func);
 }
 
 /*
@@ -1633,6 +1844,57 @@
 }
 
 /*
+ * Choose the best of two ashifts, preferring one between logical ashift
+ * (absolute minimum) and administrator defined maximum, otherwise take
+ * the biggest of the two.
+ */
+uint64_t
+vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b)
+{
+	if (a > logical && a <= zfs_vdev_max_auto_ashift) {
+		if (b <= logical || b > zfs_vdev_max_auto_ashift)
+			return (a);
+		else
+			return (MAX(a, b));
+	} else if (b <= logical || b > zfs_vdev_max_auto_ashift)
+		return (MAX(a, b));
+	return (b);
+}
+
+/*
+ * Maximize performance by inflating the configured ashift for top level
+ * vdevs to be as close to the physical ashift as possible while maintaining
+ * administrator defined limits and ensuring it doesn't go below the
+ * logical ashift.
+ */
+static void
+vdev_ashift_optimize(vdev_t *vd)
+{
+	ASSERT(vd == vd->vdev_top);
+
+	if (vd->vdev_ashift < vd->vdev_physical_ashift &&
+	    vd->vdev_physical_ashift <= zfs_vdev_max_auto_ashift) {
+		vd->vdev_ashift = MIN(
+		    MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
+		    MAX(zfs_vdev_min_auto_ashift,
+		    vd->vdev_physical_ashift));
+	} else {
+		/*
+		 * If the logical and physical ashifts are the same, then
+		 * we ensure that the top-level vdev's ashift is not smaller
+		 * than our minimum ashift value. For the unusual case
+		 * where logical ashift > physical ashift, we can't cap
+		 * the calculated ashift based on max ashift as that
+		 * would cause failures.
+		 * We still check if we need to increase it to match
+		 * the min ashift.
+		 */
+		vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift,
+		    vd->vdev_ashift);
+	}
+}
+
+/*
  * Prepare a virtual device for access.
  */
 int
@@ -1643,7 +1905,8 @@
 	uint64_t osize = 0;
 	uint64_t max_osize = 0;
 	uint64_t asize, max_asize, psize;
-	uint64_t ashift = 0;
+	uint64_t logical_ashift = 0;
+	uint64_t physical_ashift = 0;
 
 	ASSERT(vd->vdev_open_thread == curthread ||
 	    spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
@@ -1673,7 +1936,15 @@
 		return (SET_ERROR(ENXIO));
 	}
 
-	error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
+	error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
+	    &logical_ashift, &physical_ashift);
+
+	/* Keep the device in removed state if unplugged */
+	if (error == ENOENT && vd->vdev_removed) {
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED,
+		    VDEV_AUX_NONE);
+		return (error);
+	}
 
 	/*
 	 * Physical volume size should never be larger than its max size, unless
@@ -1692,7 +1963,7 @@
 	 */
 	vd->vdev_reopening = B_FALSE;
 	if (zio_injection_enabled && error == 0)
-		error = zio_handle_device_injection(vd, NULL, ENXIO);
+		error = zio_handle_device_injection(vd, NULL, SET_ERROR(ENXIO));
 
 	if (error) {
 		if (vd->vdev_removed &&
@@ -1789,6 +2060,18 @@
 		return (SET_ERROR(EINVAL));
 	}
 
+	/*
+	 * We can always set the logical/physical ashift members since
+	 * their values are only used to calculate the vdev_ashift when
+	 * the device is first added to the config. These values should
+	 * not be used for anything else since they may change whenever
+	 * the device is reopened and we don't store them in the label.
+	 */
+	vd->vdev_physical_ashift =
+	    MAX(physical_ashift, vd->vdev_physical_ashift);
+	vd->vdev_logical_ashift = MAX(logical_ashift,
+	    vd->vdev_logical_ashift);
+
 	if (vd->vdev_asize == 0) {
 		/*
 		 * This is the first-ever open, so use the computed values.
@@ -1796,8 +2079,23 @@
 		 */
 		vd->vdev_asize = asize;
 		vd->vdev_max_asize = max_asize;
+
+		/*
+		 * If the vdev_ashift was not overridden at creation time,
+		 * then set it the logical ashift and optimize the ashift.
+		 */
 		if (vd->vdev_ashift == 0) {
-			vd->vdev_ashift = ashift; /* use detected value */
+			vd->vdev_ashift = vd->vdev_logical_ashift;
+
+			if (vd->vdev_logical_ashift > ASHIFT_MAX) {
+				vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+				    VDEV_AUX_ASHIFT_TOO_BIG);
+				return (SET_ERROR(EDOM));
+			}
+
+			if (vd->vdev_top == vd) {
+				vdev_ashift_optimize(vd);
+			}
 		}
 		if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
 		    vd->vdev_ashift > ASHIFT_MAX)) {
@@ -1807,16 +2105,17 @@
 		}
 	} else {
 		/*
-		 * Detect if the alignment requirement has increased.
-		 * We don't want to make the pool unavailable, just
-		 * post an event instead.
+		 * Make sure the alignment required hasn't increased.
 		 */
-		if (ashift > vd->vdev_top->vdev_ashift &&
+		if (vd->vdev_ashift > vd->vdev_top->vdev_ashift &&
 		    vd->vdev_ops->vdev_op_leaf) {
-			zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
-			    spa, vd, NULL, NULL, 0, 0);
+			(void) zfs_ereport_post(
+			    FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
+			    spa, vd, NULL, NULL, 0);
+			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+			    VDEV_AUX_BAD_LABEL);
+			return (SET_ERROR(EDOM));
 		}
-
 		vd->vdev_max_asize = max_asize;
 	}
 
@@ -1852,15 +2151,13 @@
 	}
 
 	/*
-	 * Track the min and max ashift values for normal data devices.
+	 * Track the minimum allocation size.
 	 */
 	if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
-	    vd->vdev_alloc_bias == VDEV_BIAS_NONE &&
 	    vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
-		if (vd->vdev_ashift > spa->spa_max_ashift)
-			spa->spa_max_ashift = vd->vdev_ashift;
-		if (vd->vdev_ashift < spa->spa_min_ashift)
-			spa->spa_min_ashift = vd->vdev_ashift;
+		uint64_t min_alloc = vdev_get_min_alloc(vd);
+		if (min_alloc < spa->spa_min_alloc)
+			spa->spa_min_alloc = min_alloc;
 	}
 
 	/*
@@ -1874,6 +2171,16 @@
 	return (0);
 }
 
+static void
+vdev_validate_child(void *arg)
+{
+	vdev_t *vd = arg;
+
+	vd->vdev_validate_thread = curthread;
+	vd->vdev_validate_error = vdev_validate(vd);
+	vd->vdev_validate_thread = NULL;
+}
+
 /*
  * Called once the vdevs are all opened, this routine validates the label
  * contents. This needs to be done before vdev_load() so that we don't
@@ -1888,18 +2195,43 @@
 vdev_validate(vdev_t *vd)
 {
 	spa_t *spa = vd->vdev_spa;
+	taskq_t *tq = NULL;
 	nvlist_t *label;
 	uint64_t guid = 0, aux_guid = 0, top_guid;
 	uint64_t state;
 	nvlist_t *nvl;
 	uint64_t txg;
+	int children = vd->vdev_children;
 
 	if (vdev_validate_skip)
 		return (0);
 
-	for (uint64_t c = 0; c < vd->vdev_children; c++)
-		if (vdev_validate(vd->vdev_child[c]) != 0)
+	if (children > 0) {
+		tq = taskq_create("vdev_validate", children, minclsyspri,
+		    children, children, TASKQ_PREPOPULATE);
+	}
+
+	for (uint64_t c = 0; c < children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (tq == NULL || vdev_uses_zvols(cvd)) {
+			vdev_validate_child(cvd);
+		} else {
+			VERIFY(taskq_dispatch(tq, vdev_validate_child, cvd,
+			    TQ_SLEEP) != TASKQID_INVALID);
+		}
+	}
+	if (tq != NULL) {
+		taskq_wait(tq);
+		taskq_destroy(tq);
+	}
+	for (int c = 0; c < children; c++) {
+		int error = vd->vdev_child[c]->vdev_validate_error;
+
+		if (error != 0)
 			return (SET_ERROR(EBADF));
+	}
+
 
 	/*
 	 * If the device has already failed, or was marked offline, don't do
@@ -1922,7 +2254,7 @@
 		txg = spa_last_synced_txg(spa);
 
 	if ((label = vdev_label_read_config(vd, txg)) == NULL) {
-		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_BAD_LABEL);
 		vdev_dbgmsg(vd, "vdev_validate: failed reading config for "
 		    "txg %llu", (u_longlong_t)txg);
@@ -2072,6 +2404,7 @@
 static void
 vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd)
 {
+	char *old, *new;
 	if (svd->vdev_path != NULL && dvd->vdev_path != NULL) {
 		if (strcmp(svd->vdev_path, dvd->vdev_path) != 0) {
 			zfs_dbgmsg("vdev_copy_path: vdev %llu: path changed "
@@ -2085,6 +2418,29 @@
 		zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'",
 		    (u_longlong_t)dvd->vdev_guid, dvd->vdev_path);
 	}
+
+	/*
+	 * Our enclosure sysfs path may have changed between imports
+	 */
+	old = dvd->vdev_enc_sysfs_path;
+	new = svd->vdev_enc_sysfs_path;
+	if ((old != NULL && new == NULL) ||
+	    (old == NULL && new != NULL) ||
+	    ((old != NULL && new != NULL) && strcmp(new, old) != 0)) {
+		zfs_dbgmsg("vdev_copy_path: vdev %llu: vdev_enc_sysfs_path "
+		    "changed from '%s' to '%s'", (u_longlong_t)dvd->vdev_guid,
+		    old, new);
+
+		if (dvd->vdev_enc_sysfs_path)
+			spa_strfree(dvd->vdev_enc_sysfs_path);
+
+		if (svd->vdev_enc_sysfs_path) {
+			dvd->vdev_enc_sysfs_path = spa_strdup(
+			    svd->vdev_enc_sysfs_path);
+		} else {
+			dvd->vdev_enc_sysfs_path = NULL;
+		}
+	}
 }
 
 /*
@@ -2187,9 +2543,11 @@
 vdev_close(vdev_t *vd)
 {
 	vdev_t *pvd = vd->vdev_parent;
-	ASSERTV(spa_t *spa = vd->vdev_spa);
+	spa_t *spa __maybe_unused = vd->vdev_spa;
 
-	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+	ASSERT(vd != NULL);
+	ASSERT(vd->vdev_open_thread == curthread ||
+	    spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 
 	/*
 	 * If our parent is reopening, then we are as well, unless we are
@@ -2228,7 +2586,7 @@
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_hold(vd->vdev_child[c]);
 
-	if (vd->vdev_ops->vdev_op_leaf)
+	if (vd->vdev_ops->vdev_op_leaf && vd->vdev_ops->vdev_op_hold != NULL)
 		vd->vdev_ops->vdev_op_hold(vd);
 }
 
@@ -2239,7 +2597,7 @@
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_rele(vd->vdev_child[c]);
 
-	if (vd->vdev_ops->vdev_op_leaf)
+	if (vd->vdev_ops->vdev_op_leaf && vd->vdev_ops->vdev_op_rele != NULL)
 		vd->vdev_ops->vdev_op_rele(vd);
 }
 
@@ -2269,14 +2627,36 @@
 	if (vd->vdev_aux) {
 		(void) vdev_validate_aux(vd);
 		if (vdev_readable(vd) && vdev_writeable(vd) &&
-		    vd->vdev_aux == &spa->spa_l2cache &&
-		    !l2arc_vdev_present(vd))
-			l2arc_add_vdev(spa, vd);
+		    vd->vdev_aux == &spa->spa_l2cache) {
+			/*
+			 * In case the vdev is present we should evict all ARC
+			 * buffers and pointers to log blocks and reclaim their
+			 * space before restoring its contents to L2ARC.
+			 */
+			if (l2arc_vdev_present(vd)) {
+				l2arc_rebuild_vdev(vd, B_TRUE);
+			} else {
+				l2arc_add_vdev(spa, vd);
+			}
+			spa_async_request(spa, SPA_ASYNC_L2CACHE_REBUILD);
+			spa_async_request(spa, SPA_ASYNC_L2CACHE_TRIM);
+		}
 	} else {
 		(void) vdev_validate(vd);
 	}
 
 	/*
+	 * Recheck if resilver is still needed and cancel any
+	 * scheduled resilver if resilver is unneeded.
+	 */
+	if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
+	    spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
+		mutex_enter(&spa->spa_async_lock);
+		spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
+		mutex_exit(&spa->spa_async_lock);
+	}
+
+	/*
 	 * Reassess parent vdev's health.
 	 */
 	vdev_propagate_state(vd);
@@ -2296,7 +2676,7 @@
 
 	if (error || vd->vdev_state != VDEV_STATE_HEALTHY) {
 		vdev_close(vd);
-		return (error ? error : ENXIO);
+		return (error ? error : SET_ERROR(ENXIO));
 	}
 
 	/*
@@ -2475,15 +2855,12 @@
 
 	/*
 	 * While we are loading the pool, the DTLs have not been loaded yet.
-	 * Ignore the DTLs and try all devices.  This avoids a recursive
-	 * mutex enter on the vdev_dtl_lock, and also makes us try hard
-	 * when loading the pool (relying on the checksum to ensure that
-	 * we get the right data -- note that we while loading, we are
-	 * only reading the MOS, which is always checksummed).
+	 * This isn't a problem but it can result in devices being tried
+	 * which are known to not have the data.  In which case, the import
+	 * is relying on the checksum to ensure that we get the right data.
+	 * Note that while importing we are only reading the MOS, which is
+	 * always checksummed.
 	 */
-	if (vd->vdev_spa->spa_load_state != SPA_LOAD_NONE)
-		return (B_FALSE);
-
 	mutex_enter(&vd->vdev_dtl_lock);
 	if (!range_tree_is_empty(rt))
 		dirty = range_tree_contains(rt, txg, size);
@@ -2506,10 +2883,28 @@
 }
 
 /*
- * Returns B_TRUE if vdev determines offset needs to be resilvered.
+ * Check if the txg falls within the range which must be
+ * resilvered.  DVAs outside this range can always be skipped.
  */
 boolean_t
-vdev_dtl_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
+vdev_default_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
+    uint64_t phys_birth)
+{
+	(void) dva, (void) psize;
+
+	/* Set by sequential resilver. */
+	if (phys_birth == TXG_UNKNOWN)
+		return (B_TRUE);
+
+	return (vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1));
+}
+
+/*
+ * Returns B_TRUE if the vdev determines the DVA needs to be resilvered.
+ */
+boolean_t
+vdev_dtl_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
+    uint64_t phys_birth)
 {
 	ASSERT(vd != vd->vdev_spa->spa_root_vdev);
 
@@ -2517,7 +2912,8 @@
 	    vd->vdev_ops->vdev_op_leaf)
 		return (B_TRUE);
 
-	return (vd->vdev_ops->vdev_op_need_resilver(vd, offset, psize));
+	return (vd->vdev_ops->vdev_op_need_resilver(vd, dva, psize,
+	    phys_birth));
 }
 
 /*
@@ -2526,14 +2922,11 @@
 static uint64_t
 vdev_dtl_min(vdev_t *vd)
 {
-	range_seg_t *rs;
-
 	ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
 	ASSERT3U(range_tree_space(vd->vdev_dtl[DTL_MISSING]), !=, 0);
 	ASSERT0(vd->vdev_children);
 
-	rs = avl_first(&vd->vdev_dtl[DTL_MISSING]->rt_root);
-	return (rs->rs_start - 1);
+	return (range_tree_min(vd->vdev_dtl[DTL_MISSING]) - 1);
 }
 
 /*
@@ -2542,14 +2935,11 @@
 static uint64_t
 vdev_dtl_max(vdev_t *vd)
 {
-	range_seg_t *rs;
-
 	ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
 	ASSERT3U(range_tree_space(vd->vdev_dtl[DTL_MISSING]), !=, 0);
 	ASSERT0(vd->vdev_children);
 
-	rs = avl_last(&vd->vdev_dtl[DTL_MISSING]->rt_root);
-	return (rs->rs_end);
+	return (range_tree_max(vd->vdev_dtl[DTL_MISSING]));
 }
 
 /*
@@ -2561,11 +2951,8 @@
  * excise the DTLs.
  */
 static boolean_t
-vdev_dtl_should_excise(vdev_t *vd)
+vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
 {
-	spa_t *spa = vd->vdev_spa;
-	dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
-
 	ASSERT0(vd->vdev_children);
 
 	if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2574,23 +2961,52 @@
 	if (vd->vdev_resilver_deferred)
 		return (B_FALSE);
 
-	if (vd->vdev_resilver_txg == 0 ||
-	    range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
+	if (range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
 		return (B_TRUE);
 
-	/*
-	 * When a resilver is initiated the scan will assign the scn_max_txg
-	 * value to the highest txg value that exists in all DTLs. If this
-	 * device's max DTL is not part of this scan (i.e. it is not in
-	 * the range (scn_min_txg, scn_max_txg] then it is not eligible
-	 * for excision.
-	 */
-	if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
-		ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd));
-		ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg);
-		ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg);
-		return (B_TRUE);
+	if (rebuild_done) {
+		vdev_rebuild_t *vr = &vd->vdev_top->vdev_rebuild_config;
+		vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+		/* Rebuild not initiated by attach */
+		if (vd->vdev_rebuild_txg == 0)
+			return (B_TRUE);
+
+		/*
+		 * When a rebuild completes without error then all missing data
+		 * up to the rebuild max txg has been reconstructed and the DTL
+		 * is eligible for excision.
+		 */
+		if (vrp->vrp_rebuild_state == VDEV_REBUILD_COMPLETE &&
+		    vdev_dtl_max(vd) <= vrp->vrp_max_txg) {
+			ASSERT3U(vrp->vrp_min_txg, <=, vdev_dtl_min(vd));
+			ASSERT3U(vrp->vrp_min_txg, <, vd->vdev_rebuild_txg);
+			ASSERT3U(vd->vdev_rebuild_txg, <=, vrp->vrp_max_txg);
+			return (B_TRUE);
+		}
+	} else {
+		dsl_scan_t *scn = vd->vdev_spa->spa_dsl_pool->dp_scan;
+		dsl_scan_phys_t *scnp __maybe_unused = &scn->scn_phys;
+
+		/* Resilver not initiated by attach */
+		if (vd->vdev_resilver_txg == 0)
+			return (B_TRUE);
+
+		/*
+		 * When a resilver is initiated the scan will assign the
+		 * scn_max_txg value to the highest txg value that exists
+		 * in all DTLs. If this device's max DTL is not part of this
+		 * scan (i.e. it is not in the range (scn_min_txg, scn_max_txg]
+		 * then it is not eligible for excision.
+		 */
+		if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
+			ASSERT3U(scnp->scn_min_txg, <=, vdev_dtl_min(vd));
+			ASSERT3U(scnp->scn_min_txg, <, vd->vdev_resilver_txg);
+			ASSERT3U(vd->vdev_resilver_txg, <=, scnp->scn_max_txg);
+			return (B_TRUE);
+		}
 	}
+
 	return (B_FALSE);
 }
 
@@ -2599,7 +3015,8 @@
  * write operations will be issued to the pool.
  */
 void
-vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
+vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
+    boolean_t scrub_done, boolean_t rebuild_done)
 {
 	spa_t *spa = vd->vdev_spa;
 	avl_tree_t reftree;
@@ -2609,22 +3026,28 @@
 
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_dtl_reassess(vd->vdev_child[c], txg,
-		    scrub_txg, scrub_done);
+		    scrub_txg, scrub_done, rebuild_done);
 
 	if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
 		return;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
 		dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+		vdev_rebuild_t *vr = &vd->vdev_top->vdev_rebuild_config;
+		boolean_t check_excise = B_FALSE;
 		boolean_t wasempty = B_TRUE;
 
 		mutex_enter(&vd->vdev_dtl_lock);
 
 		/*
-		 * If requested, pretend the scan completed cleanly.
+		 * If requested, pretend the scan or rebuild completed cleanly.
 		 */
-		if (zfs_scan_ignore_errors && scn)
-			scn->scn_phys.scn_errors = 0;
+		if (zfs_scan_ignore_errors) {
+			if (scn != NULL)
+				scn->scn_phys.scn_errors = 0;
+			if (vr != NULL)
+				vr->vr_rebuild_phys.vrp_errors = 0;
+		}
 
 		if (scrub_txg != 0 &&
 		    !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
@@ -2639,21 +3062,29 @@
 		}
 
 		/*
-		 * If we've completed a scan cleanly then determine
-		 * if this vdev should remove any DTLs. We only want to
-		 * excise regions on vdevs that were available during
-		 * the entire duration of this scan.
+		 * If we've completed a scrub/resilver or a rebuild cleanly
+		 * then determine if this vdev should remove any DTLs. We
+		 * only want to excise regions on vdevs that were available
+		 * during the entire duration of this scan.
 		 */
-		if (scrub_txg != 0 &&
-		    (spa->spa_scrub_started ||
-		    (scn != NULL && scn->scn_phys.scn_errors == 0)) &&
-		    vdev_dtl_should_excise(vd)) {
+		if (rebuild_done &&
+		    vr != NULL && vr->vr_rebuild_phys.vrp_errors == 0) {
+			check_excise = B_TRUE;
+		} else {
+			if (spa->spa_scrub_started ||
+			    (scn != NULL && scn->scn_phys.scn_errors == 0)) {
+				check_excise = B_TRUE;
+			}
+		}
+
+		if (scrub_txg && check_excise &&
+		    vdev_dtl_should_excise(vd, rebuild_done)) {
 			/*
-			 * We completed a scrub up to scrub_txg.  If we
-			 * did it without rebooting, then the scrub dtl
-			 * will be valid, so excise the old region and
-			 * fold in the scrub dtl.  Otherwise, leave the
-			 * dtl as-is if there was an error.
+			 * We completed a scrub, resilver or rebuild up to
+			 * scrub_txg.  If we did it without rebooting, then
+			 * the scrub dtl will be valid, so excise the old
+			 * region and fold in the scrub dtl.  Otherwise,
+			 * leave the dtl as-is if there was an error.
 			 *
 			 * There's little trick here: to excise the beginning
 			 * of the DTL_MISSING map, we put it into a reference
@@ -2696,15 +3127,20 @@
 			    range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
 
 		/*
-		 * If the vdev was resilvering and no longer has any
-		 * DTLs then reset its resilvering flag and dirty
+		 * If the vdev was resilvering or rebuilding and no longer
+		 * has any DTLs then reset the appropriate flag and dirty
 		 * the top level so that we persist the change.
 		 */
-		if (txg != 0 && vd->vdev_resilver_txg != 0 &&
+		if (txg != 0 &&
 		    range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]) &&
 		    range_tree_is_empty(vd->vdev_dtl[DTL_OUTAGE])) {
-			vd->vdev_resilver_txg = 0;
-			vdev_config_dirty(vd->vdev_top);
+			if (vd->vdev_rebuild_txg != 0) {
+				vd->vdev_rebuild_txg = 0;
+				vdev_config_dirty(vd->vdev_top);
+			} else if (vd->vdev_resilver_txg != 0) {
+				vd->vdev_resilver_txg = 0;
+				vdev_config_dirty(vd->vdev_top);
+			}
 		}
 
 		mutex_exit(&vd->vdev_dtl_lock);
@@ -2722,8 +3158,8 @@
 			continue;			/* leaf vdevs only */
 		if (t == DTL_PARTIAL)
 			minref = 1;			/* i.e. non-zero */
-		else if (vd->vdev_nparity != 0)
-			minref = vd->vdev_nparity + 1;	/* RAID-Z */
+		else if (vdev_get_nparity(vd) != 0)
+			minref = vdev_get_nparity(vd) + 1; /* RAID-Z, dRAID */
 		else
 			minref = vd->vdev_children;	/* any kind of mirror */
 		space_reftree_create(&reftree);
@@ -2739,26 +3175,68 @@
 	mutex_exit(&vd->vdev_dtl_lock);
 }
 
+/*
+ * Iterate over all the vdevs except spare, and post kobj events
+ */
+void
+vdev_post_kobj_evt(vdev_t *vd)
+{
+	if (vd->vdev_ops->vdev_op_kobj_evt_post &&
+	    vd->vdev_kobj_flag == B_FALSE) {
+		vd->vdev_kobj_flag = B_TRUE;
+		vd->vdev_ops->vdev_op_kobj_evt_post(vd);
+	}
+
+	for (int c = 0; c < vd->vdev_children; c++)
+		vdev_post_kobj_evt(vd->vdev_child[c]);
+}
+
+/*
+ * Iterate over all the vdevs except spare, and clear kobj events
+ */
+void
+vdev_clear_kobj_evt(vdev_t *vd)
+{
+	vd->vdev_kobj_flag = B_FALSE;
+
+	for (int c = 0; c < vd->vdev_children; c++)
+		vdev_clear_kobj_evt(vd->vdev_child[c]);
+}
+
 int
 vdev_dtl_load(vdev_t *vd)
 {
 	spa_t *spa = vd->vdev_spa;
 	objset_t *mos = spa->spa_meta_objset;
+	range_tree_t *rt;
 	int error = 0;
 
 	if (vd->vdev_ops->vdev_op_leaf && vd->vdev_dtl_object != 0) {
 		ASSERT(vdev_is_concrete(vd));
 
+		/*
+		 * If the dtl cannot be sync'd there is no need to open it.
+		 */
+		if (spa->spa_mode == SPA_MODE_READ && !spa->spa_read_spacemaps)
+			return (0);
+
 		error = space_map_open(&vd->vdev_dtl_sm, mos,
 		    vd->vdev_dtl_object, 0, -1ULL, 0);
 		if (error)
 			return (error);
 		ASSERT(vd->vdev_dtl_sm != NULL);
 
-		mutex_enter(&vd->vdev_dtl_lock);
-		error = space_map_load(vd->vdev_dtl_sm,
-		    vd->vdev_dtl[DTL_MISSING], SM_ALLOC);
-		mutex_exit(&vd->vdev_dtl_lock);
+		rt = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+		error = space_map_load(vd->vdev_dtl_sm, rt, SM_ALLOC);
+		if (error == 0) {
+			mutex_enter(&vd->vdev_dtl_lock);
+			range_tree_walk(rt, range_tree_add,
+			    vd->vdev_dtl[DTL_MISSING]);
+			mutex_exit(&vd->vdev_dtl_lock);
+		}
+
+		range_tree_vacate(rt, NULL, NULL);
+		range_tree_destroy(rt);
 
 		return (error);
 	}
@@ -2842,7 +3320,7 @@
 	}
 }
 
-void
+static void
 vdev_dtl_sync(vdev_t *vd, uint64_t txg)
 {
 	spa_t *spa = vd->vdev_spa;
@@ -2882,7 +3360,7 @@
 	if (vd->vdev_dtl_sm == NULL) {
 		uint64_t new_object;
 
-		new_object = space_map_alloc(mos, vdev_dtl_sm_blksz, tx);
+		new_object = space_map_alloc(mos, zfs_vdev_dtl_sm_blksz, tx);
 		VERIFY3U(new_object, !=, 0);
 
 		VERIFY0(space_map_open(&vd->vdev_dtl_sm, mos, new_object,
@@ -2890,13 +3368,13 @@
 		ASSERT(vd->vdev_dtl_sm != NULL);
 	}
 
-	rtsync = range_tree_create(NULL, NULL);
+	rtsync = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
 
 	mutex_enter(&vd->vdev_dtl_lock);
 	range_tree_walk(rt, range_tree_add, rtsync);
 	mutex_exit(&vd->vdev_dtl_lock);
 
-	space_map_truncate(vd->vdev_dtl_sm, vdev_dtl_sm_blksz, tx);
+	space_map_truncate(vd->vdev_dtl_sm, zfs_vdev_dtl_sm_blksz, tx);
 	space_map_write(vd->vdev_dtl_sm, rtsync, SM_ALLOC, SM_NO_VDEVID, tx);
 	range_tree_vacate(rtsync, NULL, NULL);
 
@@ -2940,13 +3418,15 @@
 	 * If not, we can safely offline/detach/remove the device.
 	 */
 	vd->vdev_cant_read = B_TRUE;
-	vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
+	vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
 	required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
 	vd->vdev_cant_read = cant_read;
-	vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
+	vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
 
-	if (!required && zio_injection_enabled)
-		required = !!zio_handle_device_injection(vd, NULL, ECHILD);
+	if (!required && zio_injection_enabled) {
+		required = !!zio_handle_device_injection(vd, NULL,
+		    SET_ERROR(ECHILD));
+	}
 
 	return (required);
 }
@@ -3019,18 +3499,46 @@
 int
 vdev_load(vdev_t *vd)
 {
+	int children = vd->vdev_children;
 	int error = 0;
+	taskq_t *tq = NULL;
+
+	/*
+	 * It's only worthwhile to use the taskq for the root vdev, because the
+	 * slow part is metaslab_init, and that only happens for top-level
+	 * vdevs.
+	 */
+	if (vd->vdev_ops == &vdev_root_ops && vd->vdev_children > 0) {
+		tq = taskq_create("vdev_load", children, minclsyspri,
+		    children, children, TASKQ_PREPOPULATE);
+	}
 
 	/*
 	 * Recursively load all children.
 	 */
 	for (int c = 0; c < vd->vdev_children; c++) {
-		error = vdev_load(vd->vdev_child[c]);
-		if (error != 0) {
-			return (error);
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (tq == NULL || vdev_uses_zvols(cvd)) {
+			cvd->vdev_load_error = vdev_load(cvd);
+		} else {
+			VERIFY(taskq_dispatch(tq, vdev_load_child,
+			    cvd, TQ_SLEEP) != TASKQID_INVALID);
 		}
 	}
 
+	if (tq != NULL) {
+		taskq_wait(tq);
+		taskq_destroy(tq);
+	}
+
+	for (int c = 0; c < vd->vdev_children; c++) {
+		int error = vd->vdev_child[c]->vdev_load_error;
+
+		if (error != 0)
+			return (error);
+	}
+
 	vdev_set_deflate_ratio(vd);
 
 	/*
@@ -3040,11 +3548,32 @@
 		spa_t *spa = vd->vdev_spa;
 		char bias_str[64];
 
-		if (zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+		error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
 		    VDEV_TOP_ZAP_ALLOCATION_BIAS, 1, sizeof (bias_str),
-		    bias_str) == 0) {
+		    bias_str);
+		if (error == 0) {
 			ASSERT(vd->vdev_alloc_bias == VDEV_BIAS_NONE);
 			vd->vdev_alloc_bias = vdev_derive_alloc_bias(bias_str);
+		} else if (error != ENOENT) {
+			vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
+			    VDEV_AUX_CORRUPT_DATA);
+			vdev_dbgmsg(vd, "vdev_load: zap_lookup(top_zap=%llu) "
+			    "failed [error=%d]", vd->vdev_top_zap, error);
+			return (error);
+		}
+	}
+
+	/*
+	 * Load any rebuild state from the top-level vdev zap.
+	 */
+	if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+		error = vdev_rebuild_load(vd);
+		if (error && error != ENOTSUP) {
+			vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
+			    VDEV_AUX_CORRUPT_DATA);
+			vdev_dbgmsg(vd, "vdev_load: vdev_rebuild_load "
+			    "failed [error=%d]", error);
+			return (error);
 		}
 	}
 
@@ -3187,6 +3716,26 @@
 	return (0);
 }
 
+static void
+vdev_destroy_ms_flush_data(vdev_t *vd, dmu_tx_t *tx)
+{
+	objset_t *mos = spa_meta_objset(vd->vdev_spa);
+
+	if (vd->vdev_top_zap == 0)
+		return;
+
+	uint64_t object = 0;
+	int err = zap_lookup(mos, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, sizeof (uint64_t), 1, &object);
+	if (err == ENOENT)
+		return;
+	VERIFY0(err);
+
+	VERIFY0(dmu_object_free(mos, object, tx));
+	VERIFY0(zap_remove(mos, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, tx));
+}
+
 /*
  * Free the objects used to store this vdev's spacemaps, and the array
  * that points to them.
@@ -3214,6 +3763,7 @@
 
 	kmem_free(smobj_array, array_bytes);
 	VERIFY0(dmu_object_free(mos, vd->vdev_ms_array, tx));
+	vdev_destroy_ms_flush_data(vd, tx);
 	vd->vdev_ms_array = 0;
 }
 
@@ -3249,22 +3799,11 @@
 	    != NULL)
 		metaslab_sync_done(msp, txg);
 
-	/*
-	 * Because this function is only called on dirty vdevs, it's possible
-	 * we won't consider all metaslabs for unloading on every
-	 * txg. However, unless the system is largely idle it is likely that
-	 * we will dirty all vdevs within a few txgs.
-	 */
-	for (int i = 0; i < vd->vdev_ms_count; i++) {
-		msp = vd->vdev_ms[i];
-		mutex_enter(&msp->ms_lock);
-		if (msp->ms_sm != NULL)
-			metaslab_potentially_unload(msp, txg);
-		mutex_exit(&msp->ms_lock);
-	}
-
-	if (reassess)
+	if (reassess) {
 		metaslab_sync_reassess(vd->vdev_mg);
+		if (vd->vdev_log_mg != NULL)
+			metaslab_sync_reassess(vd->vdev_log_mg);
+	}
 }
 
 void
@@ -3343,10 +3882,10 @@
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
-		return (spa_vdev_state_exit(spa, NULL, ENODEV));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
-		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
 
 	tvd = vd->vdev_top;
 
@@ -3425,10 +3964,10 @@
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
-		return (spa_vdev_state_exit(spa, NULL, ENODEV));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
-		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
 
 	/*
 	 * If the vdev is already faulted, then don't do anything.
@@ -3444,6 +3983,36 @@
 	return (spa_vdev_state_exit(spa, vd, 0));
 }
 
+int
+vdev_remove_wanted(spa_t *spa, uint64_t guid)
+{
+	vdev_t *vd;
+
+	spa_vdev_state_enter(spa, SCL_NONE);
+
+	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
+
+	/*
+	 * If the vdev is already removed, or expanding which can trigger
+	 * repartition add/remove events, then don't do anything.
+	 */
+	if (vd->vdev_removed || vd->vdev_expanding)
+		return (spa_vdev_state_exit(spa, NULL, 0));
+
+	/*
+	 * Confirm the vdev has been removed, otherwise don't do anything.
+	 */
+	if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL)))
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
+
+	vd->vdev_remove_wanted = B_TRUE;
+	spa_async_request(spa, SPA_ASYNC_REMOVE);
+
+	return (spa_vdev_state_exit(spa, vd, 0));
+}
+
+
 /*
  * Online the given vdev.
  *
@@ -3462,10 +4031,10 @@
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
-		return (spa_vdev_state_exit(spa, NULL, ENODEV));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
-		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
 
 	wasoffline = (vd->vdev_offline || vd->vdev_tmpoffline);
 	oldstate = vd->vdev_state;
@@ -3517,9 +4086,14 @@
 	}
 	mutex_exit(&vd->vdev_initialize_lock);
 
-	/* Restart trimming if necessary */
+	/*
+	 * Restart trimming if necessary. We do not restart trimming for cache
+	 * devices here. This is triggered by l2arc_rebuild_vdev()
+	 * asynchronously for the whole device or in l2arc_evict() as it evicts
+	 * space for upcoming writes.
+	 */
 	mutex_enter(&vd->vdev_trim_lock);
-	if (vdev_writeable(vd) &&
+	if (vdev_writeable(vd) && !vd->vdev_isl2cache &&
 	    vd->vdev_trim_thread == NULL &&
 	    vd->vdev_trim_state == VDEV_TRIM_ACTIVE) {
 		(void) vdev_trim(vd, vd->vdev_trim_rate, vd->vdev_trim_partial,
@@ -3529,9 +4103,19 @@
 
 	if (wasoffline ||
 	    (oldstate < VDEV_STATE_DEGRADED &&
-	    vd->vdev_state >= VDEV_STATE_DEGRADED))
+	    vd->vdev_state >= VDEV_STATE_DEGRADED)) {
 		spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
 
+		/*
+		 * Asynchronously detach spare vdev if resilver or
+		 * rebuild is not required
+		 */
+		if (vd->vdev_unspare &&
+		    !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+		    !dsl_scan_resilver_scheduled(spa->spa_dsl_pool) &&
+		    !vdev_rebuild_active(tvd))
+			spa_async_request(spa, SPA_ASYNC_DETACH_SPARE);
+	}
 	return (spa_vdev_state_exit(spa, vd, 0));
 }
 
@@ -3547,9 +4131,12 @@
 	spa_vdev_state_enter(spa, SCL_ALLOC);
 
 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
-		return (spa_vdev_state_exit(spa, NULL, ENODEV));
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
+		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENOTSUP)));
+
+	if (vd->vdev_ops == &vdev_draid_spare_ops)
 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 
 	tvd = vd->vdev_top;
@@ -3567,7 +4154,8 @@
 		 */
 		if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
 		    vdev_dtl_required(vd))
-			return (spa_vdev_state_exit(spa, NULL, EBUSY));
+			return (spa_vdev_state_exit(spa, NULL,
+			    SET_ERROR(EBUSY)));
 
 		/*
 		 * If the top-level is a slog and it has had allocations
@@ -3579,6 +4167,7 @@
 			/*
 			 * Prevent any future allocations.
 			 */
+			ASSERT3P(tvd->vdev_log_mg, ==, NULL);
 			metaslab_group_passivate(mg);
 			(void) spa_vdev_state_exit(spa, vd, 0);
 
@@ -3624,7 +4213,8 @@
 		    vdev_is_dead(tvd)) {
 			vd->vdev_offline = B_FALSE;
 			vdev_reopen(tvd);
-			return (spa_vdev_state_exit(spa, NULL, EBUSY));
+			return (spa_vdev_state_exit(spa, NULL,
+			    SET_ERROR(EBUSY)));
 		}
 
 		/*
@@ -3676,9 +4266,9 @@
 		vdev_clear(spa, vd->vdev_child[c]);
 
 	/*
-	 * It makes no sense to "clear" an indirect vdev.
+	 * It makes no sense to "clear" an indirect  or removed vdev.
 	 */
-	if (!vdev_is_concrete(vd))
+	if (!vdev_is_concrete(vd) || vd->vdev_removed)
 		return;
 
 	/*
@@ -3726,6 +4316,9 @@
 	    vd->vdev_parent->vdev_ops == &vdev_spare_ops &&
 	    vd->vdev_parent->vdev_child[0] == vd)
 		vd->vdev_unspare = B_TRUE;
+
+	/* Clear recent error events cache (i.e. duplicate events tracking) */
+	zfs_ereport_clear(spa, vd);
 }
 
 boolean_t
@@ -3794,6 +4387,13 @@
 static void
 vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs)
 {
+	/*
+	 * Exclude the dRAID spare when aggregating to avoid double counting
+	 * the ops and bytes.  These IOs are counted by the physical leaves.
+	 */
+	if (cvd->vdev_ops == &vdev_draid_spare_ops)
+		return;
+
 	for (int t = 0; t < VS_ZIO_TYPES; t++) {
 		vs->vs_ops[t] += cvs->vs_ops[t];
 		vs->vs_bytes[t] += cvs->vs_bytes[t];
@@ -3808,6 +4408,8 @@
 static void
 vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx)
 {
+	(void) cvd;
+
 	int t, b;
 	for (t = 0; t < ZIO_TYPES; t++) {
 		for (b = 0; b < ARRAY_SIZE(vsx->vsx_disk_histo[0]); b++)
@@ -3886,7 +4488,6 @@
 				vdev_get_child_stat(cvd, vs, cvs);
 			if (vsx)
 				vdev_get_child_stat_ex(cvd, vsx, cvsx);
-
 		}
 	} else {
 		/*
@@ -3917,7 +4518,9 @@
 		vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
 		vs->vs_state = vd->vdev_state;
 		vs->vs_rsize = vdev_get_min_asize(vd);
+
 		if (vd->vdev_ops->vdev_op_leaf) {
+			vs->vs_pspace = vd->vdev_psize;
 			vs->vs_rsize += VDEV_LABEL_START_SIZE +
 			    VDEV_LABEL_END_SIZE;
 			/*
@@ -3943,7 +4546,11 @@
 			vs->vs_trim_bytes_est = vd->vdev_trim_bytes_est;
 			vs->vs_trim_state = vd->vdev_trim_state;
 			vs->vs_trim_action_time = vd->vdev_trim_action_time;
+
+			/* Set when there is a deferred resilver. */
+			vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
 		}
+
 		/*
 		 * Report expandable space on top-level, non-auxiliary devices
 		 * only. The expandable space is reported in terms of metaslab
@@ -3955,13 +4562,30 @@
 			    vd->vdev_max_asize - vd->vdev_asize,
 			    1ULL << tvd->vdev_ms_shift);
 		}
+
+		vs->vs_configured_ashift = vd->vdev_top != NULL
+		    ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
+		vs->vs_logical_ashift = vd->vdev_logical_ashift;
+		if (vd->vdev_physical_ashift <= ASHIFT_MAX)
+			vs->vs_physical_ashift = vd->vdev_physical_ashift;
+		else
+			vs->vs_physical_ashift = 0;
+
+		/*
+		 * Report fragmentation and rebuild progress for top-level,
+		 * non-auxiliary, concrete devices.
+		 */
 		if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
 		    vdev_is_concrete(vd)) {
+			/*
+			 * The vdev fragmentation rating doesn't take into
+			 * account the embedded slog metaslab (vdev_log_mg).
+			 * Since it's only one metaslab, it would have a tiny
+			 * impact on the overall fragmentation.
+			 */
 			vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
 			    vd->vdev_mg->mg_fragmentation : 0;
 		}
-		if (vd->vdev_ops->vdev_op_leaf)
-			vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
 	}
 
 	vdev_get_stats_ex_impl(vd, vs, vsx);
@@ -4042,17 +4666,39 @@
 		mutex_enter(&vd->vdev_stat_lock);
 
 		if (flags & ZIO_FLAG_IO_REPAIR) {
+			/*
+			 * Repair is the result of a resilver issued by the
+			 * scan thread (spa_sync).
+			 */
 			if (flags & ZIO_FLAG_SCAN_THREAD) {
-				dsl_scan_phys_t *scn_phys =
-				    &spa->spa_dsl_pool->dp_scan->scn_phys;
+				dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+				dsl_scan_phys_t *scn_phys = &scn->scn_phys;
 				uint64_t *processed = &scn_phys->scn_processed;
 
-				/* XXX cleanup? */
 				if (vd->vdev_ops->vdev_op_leaf)
 					atomic_add_64(processed, psize);
 				vs->vs_scan_processed += psize;
 			}
 
+			/*
+			 * Repair is the result of a rebuild issued by the
+			 * rebuild thread (vdev_rebuild_thread).  To avoid
+			 * double counting repaired bytes the virtual dRAID
+			 * spare vdev is excluded from the processed bytes.
+			 */
+			if (zio->io_priority == ZIO_PRIORITY_REBUILD) {
+				vdev_t *tvd = vd->vdev_top;
+				vdev_rebuild_t *vr = &tvd->vdev_rebuild_config;
+				vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+				uint64_t *rebuilt = &vrp->vrp_bytes_rebuilt;
+
+				if (vd->vdev_ops->vdev_op_leaf &&
+				    vd->vdev_ops != &vdev_draid_spare_ops) {
+					atomic_add_64(rebuilt, psize);
+				}
+				vs->vs_rebuild_processed += psize;
+			}
+
 			if (flags & ZIO_FLAG_SELF_HEAL)
 				vs->vs_self_healed += psize;
 		}
@@ -4064,6 +4710,7 @@
 		if (vd->vdev_ops->vdev_op_leaf &&
 		    (zio->io_priority < ZIO_PRIORITY_NUM_QUEUEABLE)) {
 			zio_type_t vs_type = type;
+			zio_priority_t priority = zio->io_priority;
 
 			/*
 			 * TRIM ops and bytes are reported to user space as
@@ -4073,19 +4720,44 @@
 			if (type == ZIO_TYPE_TRIM)
 				vs_type = ZIO_TYPE_IOCTL;
 
+			/*
+			 * Solely for the purposes of 'zpool iostat -lqrw'
+			 * reporting use the priority to categorize the IO.
+			 * Only the following are reported to user space:
+			 *
+			 *   ZIO_PRIORITY_SYNC_READ,
+			 *   ZIO_PRIORITY_SYNC_WRITE,
+			 *   ZIO_PRIORITY_ASYNC_READ,
+			 *   ZIO_PRIORITY_ASYNC_WRITE,
+			 *   ZIO_PRIORITY_SCRUB,
+			 *   ZIO_PRIORITY_TRIM.
+			 */
+			if (priority == ZIO_PRIORITY_REBUILD) {
+				priority = ((type == ZIO_TYPE_WRITE) ?
+				    ZIO_PRIORITY_ASYNC_WRITE :
+				    ZIO_PRIORITY_SCRUB);
+			} else if (priority == ZIO_PRIORITY_INITIALIZING) {
+				ASSERT3U(type, ==, ZIO_TYPE_WRITE);
+				priority = ZIO_PRIORITY_ASYNC_WRITE;
+			} else if (priority == ZIO_PRIORITY_REMOVAL) {
+				priority = ((type == ZIO_TYPE_WRITE) ?
+				    ZIO_PRIORITY_ASYNC_WRITE :
+				    ZIO_PRIORITY_ASYNC_READ);
+			}
+
 			vs->vs_ops[vs_type]++;
 			vs->vs_bytes[vs_type] += psize;
 
 			if (flags & ZIO_FLAG_DELEGATED) {
-				vsx->vsx_agg_histo[zio->io_priority]
+				vsx->vsx_agg_histo[priority]
 				    [RQ_HISTO(zio->io_size)]++;
 			} else {
-				vsx->vsx_ind_histo[zio->io_priority]
+				vsx->vsx_ind_histo[priority]
 				    [RQ_HISTO(zio->io_size)]++;
 			}
 
 			if (zio->io_delta && zio->io_delay) {
-				vsx->vsx_queue_histo[zio->io_priority]
+				vsx->vsx_queue_histo[priority]
 				    [L_HISTO(zio->io_delta - zio->io_delay)]++;
 				vsx->vsx_disk_histo[type]
 				    [L_HISTO(zio->io_delay)]++;
@@ -4119,8 +4791,7 @@
 	if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
 		return;
 
-	if (spa->spa_load_state == SPA_LOAD_NONE &&
-	    type == ZIO_TYPE_WRITE && txg != 0 &&
+	if (type == ZIO_TYPE_WRITE && txg != 0 &&
 	    (!(flags & ZIO_FLAG_IO_REPAIR) ||
 	    (flags & ZIO_FLAG_SCAN_THREAD) ||
 	    spa->spa_claiming)) {
@@ -4179,6 +4850,7 @@
 vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta,
     int64_t space_delta)
 {
+	(void) defer_delta;
 	int64_t dspace_delta;
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
@@ -4533,8 +5205,8 @@
 				class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
 			}
 
-			zfs_ereport_post(class, spa, vd, NULL, NULL,
-			    save_state, 0);
+			(void) zfs_ereport_post(class, spa, vd, NULL, NULL,
+			    save_state);
 		}
 
 		/* Erase any notion of persistent removed state */
@@ -4586,10 +5258,8 @@
 	if (!vd->vdev_ops->vdev_op_leaf) {
 		const char *vdev_type = vd->vdev_ops->vdev_op_type;
 
-		if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0 ||
-		    strcmp(vdev_type, VDEV_TYPE_INDIRECT) == 0) {
+		if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0)
 			return (B_FALSE);
-		}
 	}
 
 	for (int c = 0; c < vd->vdev_children; c++) {
@@ -4687,7 +5357,7 @@
 			zio_t *fio;
 			uint64_t delta;
 
-			zfs_dbgmsg("slow vdev: %s has %d active IOs",
+			zfs_dbgmsg("slow vdev: %s has %lu active IOs",
 			    vd->vdev_path, avl_numnodes(&vq->vq_active_tree));
 
 			/*
@@ -4747,30 +5417,42 @@
 	    vdev_resilver_needed(vd, NULL, NULL));
 }
 
+boolean_t
+vdev_xlate_is_empty(range_seg64_t *rs)
+{
+	return (rs->rs_start == rs->rs_end);
+}
+
 /*
- * Translate a logical range to the physical range for the specified vdev_t.
- * This function is initially called with a leaf vdev and will walk each
- * parent vdev until it reaches a top-level vdev. Once the top-level is
- * reached the physical range is initialized and the recursive function
- * begins to unwind. As it unwinds it calls the parent's vdev specific
- * translation function to do the real conversion.
+ * Translate a logical range to the first contiguous physical range for the
+ * specified vdev_t.  This function is initially called with a leaf vdev and
+ * will walk each parent vdev until it reaches a top-level vdev. Once the
+ * top-level is reached the physical range is initialized and the recursive
+ * function begins to unwind. As it unwinds it calls the parent's vdev
+ * specific translation function to do the real conversion.
  */
 void
-vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs, range_seg_t *physical_rs)
+vdev_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
 {
 	/*
 	 * Walk up the vdev tree
 	 */
 	if (vd != vd->vdev_top) {
-		vdev_xlate(vd->vdev_parent, logical_rs, physical_rs);
+		vdev_xlate(vd->vdev_parent, logical_rs, physical_rs,
+		    remain_rs);
 	} else {
 		/*
-		 * We've reached the top-level vdev, initialize the
-		 * physical range to the logical range and start to
-		 * unwind.
+		 * We've reached the top-level vdev, initialize the physical
+		 * range to the logical range and set an empty remaining
+		 * range then start to unwind.
 		 */
 		physical_rs->rs_start = logical_rs->rs_start;
 		physical_rs->rs_end = logical_rs->rs_end;
+
+		remain_rs->rs_start = logical_rs->rs_start;
+		remain_rs->rs_end = logical_rs->rs_start;
+
 		return;
 	}
 
@@ -4780,17 +5462,69 @@
 
 	/*
 	 * As this recursive function unwinds, translate the logical
-	 * range into its physical components by calling the
-	 * vdev specific translate function.
+	 * range into its physical and any remaining components by calling
+	 * the vdev specific translate function.
 	 */
-	range_seg_t intermediate = { { { 0, 0 } } };
-	pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate);
+	range_seg64_t intermediate = { 0 };
+	pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate, remain_rs);
 
 	physical_rs->rs_start = intermediate.rs_start;
 	physical_rs->rs_end = intermediate.rs_end;
 }
 
-#if defined(_KERNEL)
+void
+vdev_xlate_walk(vdev_t *vd, const range_seg64_t *logical_rs,
+    vdev_xlate_func_t *func, void *arg)
+{
+	range_seg64_t iter_rs = *logical_rs;
+	range_seg64_t physical_rs;
+	range_seg64_t remain_rs;
+
+	while (!vdev_xlate_is_empty(&iter_rs)) {
+
+		vdev_xlate(vd, &iter_rs, &physical_rs, &remain_rs);
+
+		/*
+		 * With raidz and dRAID, it's possible that the logical range
+		 * does not live on this leaf vdev. Only when there is a non-
+		 * zero physical size call the provided function.
+		 */
+		if (!vdev_xlate_is_empty(&physical_rs))
+			func(arg, &physical_rs);
+
+		iter_rs = remain_rs;
+	}
+}
+
+/*
+ * Look at the vdev tree and determine whether any devices are currently being
+ * replaced.
+ */
+boolean_t
+vdev_replace_in_progress(vdev_t *vdev)
+{
+	ASSERT(spa_config_held(vdev->vdev_spa, SCL_ALL, RW_READER) != 0);
+
+	if (vdev->vdev_ops == &vdev_replacing_ops)
+		return (B_TRUE);
+
+	/*
+	 * A 'spare' vdev indicates that we have a replace in progress, unless
+	 * it has exactly two children, and the second, the hot spare, has
+	 * finished being resilvered.
+	 */
+	if (vdev->vdev_ops == &vdev_spare_ops && (vdev->vdev_children > 2 ||
+	    !vdev_dtl_empty(vdev->vdev_child[1], DTL_MISSING)))
+		return (B_TRUE);
+
+	for (int i = 0; i < vdev->vdev_children; i++) {
+		if (vdev_replace_in_progress(vdev->vdev_child[i]))
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
 EXPORT_SYMBOL(vdev_fault);
 EXPORT_SYMBOL(vdev_degrade);
 EXPORT_SYMBOL(vdev_online);
@@ -4798,36 +5532,43 @@
 EXPORT_SYMBOL(vdev_clear);
 
 /* BEGIN CSTYLED */
-module_param(zfs_vdev_default_ms_count, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_default_ms_count,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_count, INT, ZMOD_RW,
 	"Target number of metaslabs per top-level vdev");
 
-module_param(zfs_vdev_min_ms_count, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_min_ms_count,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, default_ms_shift, INT, ZMOD_RW,
+	"Default limit for metaslab size");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, min_ms_count, INT, ZMOD_RW,
 	"Minimum number of metaslabs per top-level vdev");
 
-module_param(zfs_vdev_ms_count_limit, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_ms_count_limit,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, ms_count_limit, INT, ZMOD_RW,
 	"Practical upper limit of total metaslabs per top-level vdev");
 
-module_param(zfs_slow_io_events_per_second, uint, 0644);
-MODULE_PARM_DESC(zfs_slow_io_events_per_second,
+ZFS_MODULE_PARAM(zfs, zfs_, slow_io_events_per_second, UINT, ZMOD_RW,
 	"Rate limit slow IO (delay) events to this many per second");
 
-module_param(zfs_checksum_events_per_second, uint, 0644);
-MODULE_PARM_DESC(zfs_checksum_events_per_second, "Rate limit checksum events "
-	"to this many checksum errors per second (do not set below zed"
-	"threshold).");
+ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW,
+	"Rate limit checksum events to this many checksum errors per second "
+	"(do not set below zed threshold).");
 
-module_param(zfs_scan_ignore_errors, int, 0644);
-MODULE_PARM_DESC(zfs_scan_ignore_errors,
+ZFS_MODULE_PARAM(zfs, zfs_, scan_ignore_errors, INT, ZMOD_RW,
 	"Ignore errors during resilver/scrub");
 
-module_param(vdev_validate_skip, int, 0644);
-MODULE_PARM_DESC(vdev_validate_skip,
+ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW,
 	"Bypass vdev_validate()");
 
-module_param(zfs_nocacheflush, int, 0644);
-MODULE_PARM_DESC(zfs_nocacheflush, "Disable cache flushes");
+ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW,
+	"Disable cache flushes");
+
+ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, INT, ZMOD_RW,
+	"Minimum number of metaslabs required to dedicate one for log blocks");
+
+ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
+	param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
+	"Minimum ashift used when creating new top-level vdevs");
+
+ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
+	param_set_max_auto_ashift, param_get_ulong, ZMOD_RW,
+	"Maximum ashift used when optimizing for logical -> physical sector "
+	"size on new top-level vdevs");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/vdev_cache.c b/zfs/module/zfs/vdev_cache.c
index b63b9f9..6e82184 100644
--- a/zfs/module/zfs/vdev_cache.c
+++ b/zfs/module/zfs/vdev_cache.c

@@ -111,7 +111,7 @@
 	const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
 	const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
 
-	return (AVL_CMP(ve1->ve_offset, ve2->ve_offset));
+	return (TREE_CMP(ve1->ve_offset, ve2->ve_offset));
 }
 
 static int
@@ -120,7 +120,7 @@
 	const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
 	const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
 
-	int cmp = AVL_CMP(ve1->ve_lastused, ve2->ve_lastused);
+	int cmp = TREE_CMP(ve1->ve_lastused, ve2->ve_lastused);
 	if (likely(cmp))
 		return (cmp);
 
@@ -254,7 +254,7 @@
 	vdev_cache_entry_t *ve, *ve_search;
 	uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
 	zio_t *fio;
-	ASSERTV(uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS));
+	uint64_t cache_phase __maybe_unused = P2PHASE(zio->io_offset, VCBS);
 
 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
 
@@ -425,13 +425,13 @@
 	}
 }
 
-#if defined(_KERNEL)
-module_param(zfs_vdev_cache_max, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_cache_max, "Inflate reads small than max");
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_max, INT, ZMOD_RW,
+	"Inflate reads small than max");
 
-module_param(zfs_vdev_cache_size, int, 0444);
-MODULE_PARM_DESC(zfs_vdev_cache_size, "Total size of the per-disk cache");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_size, INT, ZMOD_RD,
+	"Total size of the per-disk cache");
 
-module_param(zfs_vdev_cache_bshift, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_cache_bshift, "Shift size to inflate reads too");
-#endif
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_bshift, INT, ZMOD_RW,
+	"Shift size to inflate reads too");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/vdev_disk.c b/zfs/module/zfs/vdev_disk.c
deleted file mode 100644
index 17b4927..0000000
--- a/zfs/module/zfs/vdev_disk.c
+++ /dev/null

@@ -1,925 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
- * LLNL-CODE-403049.
- * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_disk.h>
-#include <sys/vdev_impl.h>
-#include <sys/vdev_trim.h>
-#include <sys/abd.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-#include <linux/mod_compat.h>
-#include <linux/msdos_fs.h>
-#include <linux/vfs_compat.h>
-
-/*
- * Unique identifier for the exclusive vdev holder.
- */
-static void *zfs_vdev_holder = VDEV_HOLDER;
-
-/*
- * Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the
- * device is missing. The missing path may be transient since the links
- * can be briefly removed and recreated in response to udev events.
- */
-static unsigned zfs_vdev_open_timeout_ms = 1000;
-
-/*
- * Size of the "reserved" partition, in blocks.
- */
-#define	EFI_MIN_RESV_SIZE	(16 * 1024)
-
-/*
- * Virtual device vector for disks.
- */
-typedef struct dio_request {
-	zio_t			*dr_zio;	/* Parent ZIO */
-	atomic_t		dr_ref;		/* References */
-	int			dr_error;	/* Bio error */
-	int			dr_bio_count;	/* Count of bio's */
-	struct bio		*dr_bio[0];	/* Attached bio's */
-} dio_request_t;
-
-
-#if defined(HAVE_OPEN_BDEV_EXCLUSIVE) || defined(HAVE_BLKDEV_GET_BY_PATH)
-static fmode_t
-vdev_bdev_mode(int smode)
-{
-	fmode_t mode = 0;
-
-	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
-
-	if (smode & FREAD)
-		mode |= FMODE_READ;
-
-	if (smode & FWRITE)
-		mode |= FMODE_WRITE;
-
-	return (mode);
-}
-#else
-static int
-vdev_bdev_mode(int smode)
-{
-	int mode = 0;
-
-	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
-
-	if ((smode & FREAD) && !(smode & FWRITE))
-		mode = SB_RDONLY;
-
-	return (mode);
-}
-#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
-
-/*
- * Returns the usable capacity (in bytes) for the partition or disk.
- */
-static uint64_t
-bdev_capacity(struct block_device *bdev)
-{
-	return (i_size_read(bdev->bd_inode));
-}
-
-/*
- * Returns the maximum expansion capacity of the block device (in bytes).
- *
- * It is possible to expand a vdev when it has been created as a wholedisk
- * and the containing block device has increased in capacity.  Or when the
- * partition containing the pool has been manually increased in size.
- *
- * This function is only responsible for calculating the potential expansion
- * size so it can be reported by 'zpool list'.  The efi_use_whole_disk() is
- * responsible for verifying the expected partition layout in the wholedisk
- * case, and updating the partition table if appropriate.  Once the partition
- * size has been increased the additional capacity will be visible using
- * bdev_capacity().
- *
- * The returned maximum expansion capacity is always expected to be larger, or
- * at the very least equal, to its usable capacity to prevent overestimating
- * the pool expandsize.
- */
-static uint64_t
-bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
-{
-	uint64_t psize;
-	int64_t available;
-
-	if (wholedisk && bdev->bd_part != NULL && bdev != bdev->bd_contains) {
-		/*
-		 * When reporting maximum expansion capacity for a wholedisk
-		 * deduct any capacity which is expected to be lost due to
-		 * alignment restrictions.  Over reporting this value isn't
-		 * harmful and would only result in slightly less capacity
-		 * than expected post expansion.
-		 * The estimated available space may be slightly smaller than
-		 * bdev_capacity() for devices where the number of sectors is
-		 * not a multiple of the alignment size and the partition layout
-		 * is keeping less than PARTITION_END_ALIGNMENT bytes after the
-		 * "reserved" EFI partition: in such cases return the device
-		 * usable capacity.
-		 */
-		available = i_size_read(bdev->bd_contains->bd_inode) -
-		    ((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
-		    PARTITION_END_ALIGNMENT) << SECTOR_BITS);
-		psize = MAX(available, bdev_capacity(bdev));
-	} else {
-		psize = bdev_capacity(bdev);
-	}
-
-	return (psize);
-}
-
-static void
-vdev_disk_error(zio_t *zio)
-{
-	/*
-	 * This function can be called in interrupt context, for instance while
-	 * handling IRQs coming from a misbehaving disk device; use printk()
-	 * which is safe from any context.
-	 */
-	printk(KERN_WARNING "zio pool=%s vdev=%s error=%d type=%d "
-	    "offset=%llu size=%llu flags=%x\n", spa_name(zio->io_spa),
-	    zio->io_vd->vdev_path, zio->io_error, zio->io_type,
-	    (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
-	    zio->io_flags);
-}
-
-static int
-vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
-{
-	struct block_device *bdev;
-	fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
-	hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
-	vdev_disk_t *vd;
-
-	/* Must have a pathname and it must be absolute. */
-	if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
-		v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		vdev_dbgmsg(v, "invalid vdev_path");
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Reopen the device if it is currently open.  When expanding a
-	 * partition force re-scanning the partition table while closed
-	 * in order to get an accurate updated block device size.  Then
-	 * since udev may need to recreate the device links increase the
-	 * open retry timeout before reporting the device as unavailable.
-	 */
-	vd = v->vdev_tsd;
-	if (vd) {
-		char disk_name[BDEVNAME_SIZE + 6] = "/dev/";
-		boolean_t reread_part = B_FALSE;
-
-		rw_enter(&vd->vd_lock, RW_WRITER);
-		bdev = vd->vd_bdev;
-		vd->vd_bdev = NULL;
-
-		if (bdev) {
-			if (v->vdev_expanding && bdev != bdev->bd_contains) {
-				bdevname(bdev->bd_contains, disk_name + 5);
-				reread_part = B_TRUE;
-			}
-
-			vdev_bdev_close(bdev, mode);
-		}
-
-		if (reread_part) {
-			bdev = vdev_bdev_open(disk_name, mode, zfs_vdev_holder);
-			if (!IS_ERR(bdev)) {
-				int error = vdev_bdev_reread_part(bdev);
-				vdev_bdev_close(bdev, mode);
-				if (error == 0) {
-					timeout = MSEC2NSEC(
-					    zfs_vdev_open_timeout_ms * 2);
-				}
-			}
-		}
-	} else {
-		vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
-
-		rw_init(&vd->vd_lock, NULL, RW_DEFAULT, NULL);
-		rw_enter(&vd->vd_lock, RW_WRITER);
-	}
-
-	/*
-	 * Devices are always opened by the path provided at configuration
-	 * time.  This means that if the provided path is a udev by-id path
-	 * then drives may be re-cabled without an issue.  If the provided
-	 * path is a udev by-path path, then the physical location information
-	 * will be preserved.  This can be critical for more complicated
-	 * configurations where drives are located in specific physical
-	 * locations to maximize the systems tolerance to component failure.
-	 *
-	 * Alternatively, you can provide your own udev rule to flexibly map
-	 * the drives as you see fit.  It is not advised that you use the
-	 * /dev/[hd]d devices which may be reordered due to probing order.
-	 * Devices in the wrong locations will be detected by the higher
-	 * level vdev validation.
-	 *
-	 * The specified paths may be briefly removed and recreated in
-	 * response to udev events.  This should be exceptionally unlikely
-	 * because the zpool command makes every effort to verify these paths
-	 * have already settled prior to reaching this point.  Therefore,
-	 * a ENOENT failure at this point is highly likely to be transient
-	 * and it is reasonable to sleep and retry before giving up.  In
-	 * practice delays have been observed to be on the order of 100ms.
-	 */
-	hrtime_t start = gethrtime();
-	bdev = ERR_PTR(-ENXIO);
-	while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
-		bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder);
-		if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
-			schedule_timeout(MSEC_TO_TICK(10));
-		} else if (IS_ERR(bdev)) {
-			break;
-		}
-	}
-
-	if (IS_ERR(bdev)) {
-		int error = -PTR_ERR(bdev);
-		vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error,
-		    (u_longlong_t)(gethrtime() - start),
-		    (u_longlong_t)timeout);
-		vd->vd_bdev = NULL;
-		v->vdev_tsd = vd;
-		rw_exit(&vd->vd_lock);
-		return (SET_ERROR(error));
-	} else {
-		vd->vd_bdev = bdev;
-		v->vdev_tsd = vd;
-		rw_exit(&vd->vd_lock);
-	}
-
-	struct request_queue *q = bdev_get_queue(vd->vd_bdev);
-
-	/*  Determine the physical block size */
-	int block_size = vdev_bdev_block_size(vd->vd_bdev);
-
-	/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
-	v->vdev_nowritecache = B_FALSE;
-
-	/* Set when device reports it supports TRIM. */
-	v->vdev_has_trim = !!blk_queue_discard(q);
-
-	/* Set when device reports it supports secure TRIM. */
-	v->vdev_has_securetrim = !!blk_queue_discard_secure(q);
-
-	/* Inform the ZIO pipeline that we are non-rotational */
-	v->vdev_nonrot = blk_queue_nonrot(q);
-
-	/* Physical volume size in bytes for the partition */
-	*psize = bdev_capacity(vd->vd_bdev);
-
-	/* Physical volume size in bytes including possible expansion space */
-	*max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk);
-
-	/* Based on the minimum sector size set the block size */
-	*ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
-
-	return (0);
-}
-
-static void
-vdev_disk_close(vdev_t *v)
-{
-	vdev_disk_t *vd = v->vdev_tsd;
-
-	if (v->vdev_reopening || vd == NULL)
-		return;
-
-	if (vd->vd_bdev != NULL) {
-		vdev_bdev_close(vd->vd_bdev,
-		    vdev_bdev_mode(spa_mode(v->vdev_spa)));
-	}
-
-	rw_destroy(&vd->vd_lock);
-	kmem_free(vd, sizeof (vdev_disk_t));
-	v->vdev_tsd = NULL;
-}
-
-static dio_request_t *
-vdev_disk_dio_alloc(int bio_count)
-{
-	dio_request_t *dr;
-	int i;
-
-	dr = kmem_zalloc(sizeof (dio_request_t) +
-	    sizeof (struct bio *) * bio_count, KM_SLEEP);
-	if (dr) {
-		atomic_set(&dr->dr_ref, 0);
-		dr->dr_bio_count = bio_count;
-		dr->dr_error = 0;
-
-		for (i = 0; i < dr->dr_bio_count; i++)
-			dr->dr_bio[i] = NULL;
-	}
-
-	return (dr);
-}
-
-static void
-vdev_disk_dio_free(dio_request_t *dr)
-{
-	int i;
-
-	for (i = 0; i < dr->dr_bio_count; i++)
-		if (dr->dr_bio[i])
-			bio_put(dr->dr_bio[i]);
-
-	kmem_free(dr, sizeof (dio_request_t) +
-	    sizeof (struct bio *) * dr->dr_bio_count);
-}
-
-static void
-vdev_disk_dio_get(dio_request_t *dr)
-{
-	atomic_inc(&dr->dr_ref);
-}
-
-static int
-vdev_disk_dio_put(dio_request_t *dr)
-{
-	int rc = atomic_dec_return(&dr->dr_ref);
-
-	/*
-	 * Free the dio_request when the last reference is dropped and
-	 * ensure zio_interpret is called only once with the correct zio
-	 */
-	if (rc == 0) {
-		zio_t *zio = dr->dr_zio;
-		int error = dr->dr_error;
-
-		vdev_disk_dio_free(dr);
-
-		if (zio) {
-			zio->io_error = error;
-			ASSERT3S(zio->io_error, >=, 0);
-			if (zio->io_error)
-				vdev_disk_error(zio);
-
-			zio_delay_interrupt(zio);
-		}
-	}
-
-	return (rc);
-}
-
-BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
-{
-	dio_request_t *dr = bio->bi_private;
-	int rc;
-
-	if (dr->dr_error == 0) {
-#ifdef HAVE_1ARG_BIO_END_IO_T
-		dr->dr_error = BIO_END_IO_ERROR(bio);
-#else
-		if (error)
-			dr->dr_error = -(error);
-		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-			dr->dr_error = EIO;
-#endif
-	}
-
-	/* Drop reference acquired by __vdev_disk_physio */
-	rc = vdev_disk_dio_put(dr);
-}
-
-static unsigned int
-bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
-{
-	unsigned int offset, size, i;
-	struct page *page;
-
-	offset = offset_in_page(bio_ptr);
-	for (i = 0; i < bio->bi_max_vecs; i++) {
-		size = PAGE_SIZE - offset;
-
-		if (bio_size <= 0)
-			break;
-
-		if (size > bio_size)
-			size = bio_size;
-
-		if (is_vmalloc_addr(bio_ptr))
-			page = vmalloc_to_page(bio_ptr);
-		else
-			page = virt_to_page(bio_ptr);
-
-		/*
-		 * Some network related block device uses tcp_sendpage, which
-		 * doesn't behave well when using 0-count page, this is a
-		 * safety net to catch them.
-		 */
-		ASSERT3S(page_count(page), >, 0);
-
-		if (bio_add_page(bio, page, size, offset) != size)
-			break;
-
-		bio_ptr  += size;
-		bio_size -= size;
-		offset = 0;
-	}
-
-	return (bio_size);
-}
-
-static unsigned int
-bio_map_abd_off(struct bio *bio, abd_t *abd, unsigned int size, size_t off)
-{
-	if (abd_is_linear(abd))
-		return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, size));
-
-	return (abd_scatter_bio_map_off(bio, abd, size, off));
-}
-
-static inline void
-vdev_submit_bio_impl(struct bio *bio)
-{
-#ifdef HAVE_1ARG_SUBMIT_BIO
-	submit_bio(bio);
-#else
-	submit_bio(0, bio);
-#endif
-}
-
-/*
- * preempt_schedule_notrace is GPL-only which breaks the ZFS build, so
- * replace it with preempt_schedule under the following condition:
- */
-#if defined(CONFIG_ARM64) && \
-    defined(CONFIG_PREEMPTION) && \
-    defined(CONFIG_BLK_CGROUP)
-#define	preempt_schedule_notrace(x) preempt_schedule(x)
-#endif
-
-#ifdef HAVE_BIO_SET_DEV
-#if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY)
-/*
- * The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by
- * blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched().
- * As a side effect the function was converted to GPL-only.  Define our
- * own version when needed which uses rcu_read_lock_sched().
- */
-#if defined(HAVE_BLKG_TRYGET_GPL_ONLY)
-static inline bool
-vdev_blkg_tryget(struct blkcg_gq *blkg)
-{
-	struct percpu_ref *ref = &blkg->refcnt;
-	unsigned long __percpu *count;
-	bool rc;
-
-	rcu_read_lock_sched();
-
-	if (__ref_is_percpu(ref, &count)) {
-		this_cpu_inc(*count);
-		rc = true;
-	} else {
-		rc = atomic_long_inc_not_zero(&ref->count);
-	}
-
-	rcu_read_unlock_sched();
-
-	return (rc);
-}
-#elif defined(HAVE_BLKG_TRYGET)
-#define	vdev_blkg_tryget(bg)	blkg_tryget(bg)
-#endif
-/*
- * The Linux 5.0 kernel updated the bio_set_dev() macro so it calls the
- * GPL-only bio_associate_blkg() symbol thus inadvertently converting
- * the entire macro.  Provide a minimal version which always assigns the
- * request queue's root_blkg to the bio.
- */
-static inline void
-vdev_bio_associate_blkg(struct bio *bio)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-
-	ASSERT3P(q, !=, NULL);
-	ASSERT3P(bio->bi_blkg, ==, NULL);
-
-	if (q->root_blkg && vdev_blkg_tryget(q->root_blkg))
-		bio->bi_blkg = q->root_blkg;
-}
-#define	bio_associate_blkg vdev_bio_associate_blkg
-#endif
-#else
-/*
- * Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels.
- */
-static inline void
-bio_set_dev(struct bio *bio, struct block_device *bdev)
-{
-	bio->bi_bdev = bdev;
-}
-#endif /* HAVE_BIO_SET_DEV */
-
-static inline void
-vdev_submit_bio(struct bio *bio)
-{
-#ifdef HAVE_CURRENT_BIO_TAIL
-	struct bio **bio_tail = current->bio_tail;
-	current->bio_tail = NULL;
-	vdev_submit_bio_impl(bio);
-	current->bio_tail = bio_tail;
-#else
-	struct bio_list *bio_list = current->bio_list;
-	current->bio_list = NULL;
-	vdev_submit_bio_impl(bio);
-	current->bio_list = bio_list;
-#endif
-}
-
-static int
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
-    size_t io_size, uint64_t io_offset, int rw, int flags)
-{
-	dio_request_t *dr;
-	uint64_t abd_offset;
-	uint64_t bio_offset;
-	int bio_size, bio_count = 16;
-	int i = 0, error = 0;
-#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
-	struct blk_plug plug;
-#endif
-	/*
-	 * Accessing outside the block device is never allowed.
-	 */
-	if (io_offset + io_size > bdev->bd_inode->i_size) {
-		vdev_dbgmsg(zio->io_vd,
-		    "Illegal access %llu size %llu, device size %llu",
-		    io_offset, io_size, i_size_read(bdev->bd_inode));
-		return (SET_ERROR(EIO));
-	}
-
-retry:
-	dr = vdev_disk_dio_alloc(bio_count);
-	if (dr == NULL)
-		return (SET_ERROR(ENOMEM));
-
-	if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
-		bio_set_flags_failfast(bdev, &flags);
-
-	dr->dr_zio = zio;
-
-	/*
-	 * When the IO size exceeds the maximum bio size for the request
-	 * queue we are forced to break the IO in multiple bio's and wait
-	 * for them all to complete.  Ideally, all pool users will set
-	 * their volume block size to match the maximum request size and
-	 * the common case will be one bio per vdev IO request.
-	 */
-
-	abd_offset = 0;
-	bio_offset = io_offset;
-	bio_size   = io_size;
-	for (i = 0; i <= dr->dr_bio_count; i++) {
-
-		/* Finished constructing bio's for given buffer */
-		if (bio_size <= 0)
-			break;
-
-		/*
-		 * By default only 'bio_count' bio's per dio are allowed.
-		 * However, if we find ourselves in a situation where more
-		 * are needed we allocate a larger dio and warn the user.
-		 */
-		if (dr->dr_bio_count == i) {
-			vdev_disk_dio_free(dr);
-			bio_count *= 2;
-			goto retry;
-		}
-
-		/* bio_alloc() with __GFP_WAIT never returns NULL */
-		dr->dr_bio[i] = bio_alloc(GFP_NOIO,
-		    MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
-		    BIO_MAX_PAGES));
-		if (unlikely(dr->dr_bio[i] == NULL)) {
-			vdev_disk_dio_free(dr);
-			return (SET_ERROR(ENOMEM));
-		}
-
-		/* Matching put called by vdev_disk_physio_completion */
-		vdev_disk_dio_get(dr);
-
-		bio_set_dev(dr->dr_bio[i], bdev);
-		BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
-		dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
-		dr->dr_bio[i]->bi_private = dr;
-		bio_set_op_attrs(dr->dr_bio[i], rw, flags);
-
-		/* Remaining size is returned to become the new size */
-		bio_size = bio_map_abd_off(dr->dr_bio[i], zio->io_abd,
-		    bio_size, abd_offset);
-
-		/* Advance in buffer and construct another bio if needed */
-		abd_offset += BIO_BI_SIZE(dr->dr_bio[i]);
-		bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
-	}
-
-	/* Extra reference to protect dio_request during vdev_submit_bio */
-	vdev_disk_dio_get(dr);
-
-#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
-	if (dr->dr_bio_count > 1)
-		blk_start_plug(&plug);
-#endif
-
-	/* Submit all bio's associated with this dio */
-	for (i = 0; i < dr->dr_bio_count; i++)
-		if (dr->dr_bio[i])
-			vdev_submit_bio(dr->dr_bio[i]);
-
-#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
-	if (dr->dr_bio_count > 1)
-		blk_finish_plug(&plug);
-#endif
-
-	(void) vdev_disk_dio_put(dr);
-
-	return (error);
-}
-
-BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
-{
-	zio_t *zio = bio->bi_private;
-#ifdef HAVE_1ARG_BIO_END_IO_T
-	zio->io_error = BIO_END_IO_ERROR(bio);
-#else
-	zio->io_error = -error;
-#endif
-
-	if (zio->io_error && (zio->io_error == EOPNOTSUPP))
-		zio->io_vd->vdev_nowritecache = B_TRUE;
-
-	bio_put(bio);
-	ASSERT3S(zio->io_error, >=, 0);
-	if (zio->io_error)
-		vdev_disk_error(zio);
-	zio_interrupt(zio);
-}
-
-static int
-vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
-{
-	struct request_queue *q;
-	struct bio *bio;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return (SET_ERROR(ENXIO));
-
-	bio = bio_alloc(GFP_NOIO, 0);
-	/* bio_alloc() with __GFP_WAIT never returns NULL */
-	if (unlikely(bio == NULL))
-		return (SET_ERROR(ENOMEM));
-
-	bio->bi_end_io = vdev_disk_io_flush_completion;
-	bio->bi_private = zio;
-	bio_set_dev(bio, bdev);
-	bio_set_flush(bio);
-	vdev_submit_bio(bio);
-	invalidate_bdev(bdev);
-
-	return (0);
-}
-
-static void
-vdev_disk_io_start(zio_t *zio)
-{
-	vdev_t *v = zio->io_vd;
-	vdev_disk_t *vd = v->vdev_tsd;
-	unsigned long trim_flags = 0;
-	int rw, flags, error;
-
-	/*
-	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
-	 * Nothing to be done here but return failure.
-	 */
-	if (vd == NULL) {
-		zio->io_error = ENXIO;
-		zio_interrupt(zio);
-		return;
-	}
-
-	rw_enter(&vd->vd_lock, RW_READER);
-
-	/*
-	 * If the vdev is closed, it's likely due to a failed reopen and is
-	 * in the UNAVAIL state.  Nothing to be done here but return failure.
-	 */
-	if (vd->vd_bdev == NULL) {
-		rw_exit(&vd->vd_lock);
-		zio->io_error = ENXIO;
-		zio_interrupt(zio);
-		return;
-	}
-
-	switch (zio->io_type) {
-	case ZIO_TYPE_IOCTL:
-
-		if (!vdev_readable(v)) {
-			rw_exit(&vd->vd_lock);
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return;
-		}
-
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
-
-			if (zfs_nocacheflush)
-				break;
-
-			if (v->vdev_nowritecache) {
-				zio->io_error = SET_ERROR(ENOTSUP);
-				break;
-			}
-
-			error = vdev_disk_io_flush(vd->vd_bdev, zio);
-			if (error == 0) {
-				rw_exit(&vd->vd_lock);
-				return;
-			}
-
-			zio->io_error = error;
-
-			break;
-
-		default:
-			zio->io_error = SET_ERROR(ENOTSUP);
-		}
-
-		rw_exit(&vd->vd_lock);
-		zio_execute(zio);
-		return;
-	case ZIO_TYPE_WRITE:
-		rw = WRITE;
-#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
-		flags = (1 << BIO_RW_UNPLUG);
-#elif defined(REQ_UNPLUG)
-		flags = REQ_UNPLUG;
-#else
-		flags = 0;
-#endif
-		break;
-
-	case ZIO_TYPE_READ:
-		rw = READ;
-#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
-		flags = (1 << BIO_RW_UNPLUG);
-#elif defined(REQ_UNPLUG)
-		flags = REQ_UNPLUG;
-#else
-		flags = 0;
-#endif
-		break;
-
-	case ZIO_TYPE_TRIM:
-#if defined(BLKDEV_DISCARD_SECURE)
-		if (zio->io_trim_flags & ZIO_TRIM_SECURE)
-			trim_flags |= BLKDEV_DISCARD_SECURE;
-#endif
-		zio->io_error = -blkdev_issue_discard(vd->vd_bdev,
-		    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS,
-		    trim_flags);
-
-		rw_exit(&vd->vd_lock);
-		zio_interrupt(zio);
-		return;
-
-	default:
-		rw_exit(&vd->vd_lock);
-		zio->io_error = SET_ERROR(ENOTSUP);
-		zio_interrupt(zio);
-		return;
-	}
-
-	zio->io_target_timestamp = zio_handle_io_delay(zio);
-	error = __vdev_disk_physio(vd->vd_bdev, zio,
-	    zio->io_size, zio->io_offset, rw, flags);
-	rw_exit(&vd->vd_lock);
-
-	if (error) {
-		zio->io_error = error;
-		zio_interrupt(zio);
-		return;
-	}
-}
-
-static void
-vdev_disk_io_done(zio_t *zio)
-{
-	/*
-	 * If the device returned EIO, we revalidate the media.  If it is
-	 * determined the media has changed this triggers the asynchronous
-	 * removal of the device from the configuration.
-	 */
-	if (zio->io_error == EIO) {
-		vdev_t *v = zio->io_vd;
-		vdev_disk_t *vd = v->vdev_tsd;
-
-		if (zfs_check_media_change(vd->vd_bdev)) {
-			vdev_bdev_invalidate(vd->vd_bdev);
-			v->vdev_remove_wanted = B_TRUE;
-			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
-		}
-	}
-}
-
-static void
-vdev_disk_hold(vdev_t *vd)
-{
-	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
-
-	/* We must have a pathname, and it must be absolute. */
-	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
-		return;
-
-	/*
-	 * Only prefetch path and devid info if the device has
-	 * never been opened.
-	 */
-	if (vd->vdev_tsd != NULL)
-		return;
-
-	/* XXX: Implement me as a vnode lookup for the device */
-	vd->vdev_name_vp = NULL;
-	vd->vdev_devid_vp = NULL;
-}
-
-static void
-vdev_disk_rele(vdev_t *vd)
-{
-	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
-
-	/* XXX: Implement me as a vnode rele for the device */
-}
-
-vdev_ops_t vdev_disk_ops = {
-	.vdev_op_open = vdev_disk_open,
-	.vdev_op_close = vdev_disk_close,
-	.vdev_op_asize = vdev_default_asize,
-	.vdev_op_io_start = vdev_disk_io_start,
-	.vdev_op_io_done = vdev_disk_io_done,
-	.vdev_op_state_change = NULL,
-	.vdev_op_need_resilver = NULL,
-	.vdev_op_hold = vdev_disk_hold,
-	.vdev_op_rele = vdev_disk_rele,
-	.vdev_op_remap = NULL,
-	.vdev_op_xlate = vdev_default_xlate,
-	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
-};
-
-/*
- * The zfs_vdev_scheduler module option has been deprecated. Setting this
- * value no longer has any effect.  It has not yet been entirely removed
- * to allow the module to be loaded if this option is specified in the
- * /etc/modprobe.d/zfs.conf file.  The following warning will be logged.
- */
-static int
-param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
-{
-	int error = param_set_charp(val, kp);
-	if (error == 0) {
-		printk(KERN_INFO "The 'zfs_vdev_scheduler' module option "
-		    "is not supported.\n");
-	}
-
-	return (error);
-}
-
-char *zfs_vdev_scheduler = "unused";
-module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler,
-    param_get_charp, &zfs_vdev_scheduler, 0644);
-MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");

diff --git a/zfs/module/zfs/vdev_draid.c b/zfs/module/zfs/vdev_draid.c
new file mode 100644
index 0000000..10d0951
--- /dev/null
+++ b/zfs/module/zfs/vdev_draid.c

@@ -0,0 +1,2837 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 Intel Corporation.
+ * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_draid.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_rebuild.h>
+#include <sys/abd.h>
+#include <sys/zio.h>
+#include <sys/nvpair.h>
+#include <sys/zio_checksum.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/fs/zfs.h>
+#include <zfs_fletcher.h>
+
+#ifdef ZFS_DEBUG
+#include <sys/vdev.h>	/* For vdev_xlate() in vdev_draid_io_verify() */
+#endif
+
+/*
+ * dRAID is a distributed spare implementation for ZFS. A dRAID vdev is
+ * comprised of multiple raidz redundancy groups which are spread over the
+ * dRAID children. To ensure an even distribution, and avoid hot spots, a
+ * permutation mapping is applied to the order of the dRAID children.
+ * This mixing effectively distributes the parity columns evenly over all
+ * of the disks in the dRAID.
+ *
+ * This is beneficial because it means when resilvering all of the disks
+ * can participate thereby increasing the available IOPs and bandwidth.
+ * Furthermore, by reserving a small fraction of each child's total capacity
+ * virtual distributed spare disks can be created. These spares similarly
+ * benefit from the performance gains of spanning all of the children. The
+ * consequence of which is that resilvering to a distributed spare can
+ * substantially reduce the time required to restore full parity to pool
+ * with a failed disks.
+ *
+ * === dRAID group layout ===
+ *
+ * First, let's define a "row" in the configuration to be a 16M chunk from
+ * each physical drive at the same offset. This is the minimum allowable
+ * size since it must be possible to store a full 16M block when there is
+ * only a single data column. Next, we define a "group" to be a set of
+ * sequential disks containing both the parity and data columns. We allow
+ * groups to span multiple rows in order to align any group size to any
+ * number of physical drives. Finally, a "slice" is comprised of the rows
+ * which contain the target number of groups. The permutation mappings
+ * are applied in a round robin fashion to each slice.
+ *
+ * Given D+P drives in a group (including parity drives) and C-S physical
+ * drives (not including the spare drives), we can distribute the groups
+ * across R rows without remainder by selecting the least common multiple
+ * of D+P and C-S as the number of groups; i.e. ngroups = LCM(D+P, C-S).
+ *
+ * In the example below, there are C=14 physical drives in the configuration
+ * with S=2 drives worth of spare capacity. Each group has a width of 9
+ * which includes D=8 data and P=1 parity drive. There are 4 groups and
+ * 3 rows per slice.  Each group has a size of 144M (16M * 9) and a slice
+ * size is 576M (144M * 4). When allocating from a dRAID each group is
+ * filled before moving on to the next as show in slice0 below.
+ *
+ *             data disks (8 data + 1 parity)          spares (2)
+ *     +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ *  ^  | 2 | 6 | 1 | 11| 4 | 0 | 7 | 10| 8 | 9 | 13| 5 | 12| 3 | device map 0
+ *  |  +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ *  |  |              group 0              |  group 1..|       |
+ *  |  +-----------------------------------+-----------+-------|
+ *  |  | 0   1   2   3   4   5   6   7   8 | 36  37  38|       |  r
+ *  |  | 9   10  11  12  13  14  15  16  17| 45  46  47|       |  o
+ *  |  | 18  19  20  21  22  23  24  25  26| 54  55  56|       |  w
+ *     | 27  28  29  30  31  32  33  34  35| 63  64  65|       |  0
+ *  s  +-----------------------+-----------------------+-------+
+ *  l  |       ..group 1       |        group 2..      |       |
+ *  i  +-----------------------+-----------------------+-------+
+ *  c  | 39  40  41  42  43  44| 72  73  74  75  76  77|       |  r
+ *  e  | 48  49  50  51  52  53| 81  82  83  84  85  86|       |  o
+ *  0  | 57  58  59  60  61  62| 90  91  92  93  94  95|       |  w
+ *     | 66  67  68  69  70  71| 99 100 101 102 103 104|       |  1
+ *  |  +-----------+-----------+-----------------------+-------+
+ *  |  |..group 2  |            group 3                |       |
+ *  |  +-----------+-----------+-----------------------+-------+
+ *  |  | 78  79  80|108 109 110 111 112 113 114 115 116|       |  r
+ *  |  | 87  88  89|117 118 119 120 121 122 123 124 125|       |  o
+ *  |  | 96  97  98|126 127 128 129 130 131 132 133 134|       |  w
+ *  v  |105 106 107|135 136 137 138 139 140 141 142 143|       |  2
+ *     +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ *     | 9 | 11| 12| 2 | 4 | 1 | 3 | 0 | 10| 13| 8 | 5 | 6 | 7 | device map 1
+ *  s  +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ *  l  |              group 4              |  group 5..|       | row 3
+ *  i  +-----------------------+-----------+-----------+-------|
+ *  c  |       ..group 5       |        group 6..      |       | row 4
+ *  e  +-----------+-----------+-----------------------+-------+
+ *  1  |..group 6  |            group 7                |       | row 5
+ *     +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ *     | 3 | 5 | 10| 8 | 6 | 11| 12| 0 | 2 | 4 | 7 | 1 | 9 | 13| device map 2
+ *  s  +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ *  l  |              group 8              |  group 9..|       | row 6
+ *  i  +-----------------------------------------------+-------|
+ *  c  |       ..group 9       |        group 10..     |       | row 7
+ *  e  +-----------------------+-----------------------+-------+
+ *  2  |..group 10 |            group 11               |       | row 8
+ *     +-----------+-----------------------------------+-------+
+ *
+ * This layout has several advantages over requiring that each row contain
+ * a whole number of groups.
+ *
+ * 1. The group count is not a relevant parameter when defining a dRAID
+ *    layout. Only the group width is needed, and *all* groups will have
+ *    the desired size.
+ *
+ * 2. All possible group widths (<= physical disk count) can be supported.
+ *
+ * 3. The logic within vdev_draid.c is simplified when the group width is
+ *    the same for all groups (although some of the logic around computing
+ *    permutation numbers and drive offsets is more complicated).
+ *
+ * N.B. The following array describes all valid dRAID permutation maps.
+ * Each row is used to generate a permutation map for a different number
+ * of children from a unique seed. The seeds were generated and carefully
+ * evaluated by the 'draid' utility in order to provide balanced mappings.
+ * In addition to the seed a checksum of the in-memory mapping is stored
+ * for verification.
+ *
+ * The imbalance ratio of a given failure (e.g. 5 disks wide, child 3 failed,
+ * with a given permutation map) is the ratio of the amounts of I/O that will
+ * be sent to the least and most busy disks when resilvering. The average
+ * imbalance ratio (of a given number of disks and permutation map) is the
+ * average of the ratios of all possible single and double disk failures.
+ *
+ * In order to achieve a low imbalance ratio the number of permutations in
+ * the mapping must be significantly larger than the number of children.
+ * For dRAID the number of permutations has been limited to 512 to minimize
+ * the map size. This does result in a gradually increasing imbalance ratio
+ * as seen in the table below. Increasing the number of permutations for
+ * larger child counts would reduce the imbalance ratio. However, in practice
+ * when there are a large number of children each child is responsible for
+ * fewer total IOs so it's less of a concern.
+ *
+ * Note these values are hard coded and must never be changed.  Existing
+ * pools depend on the same mapping always being generated in order to
+ * read and write from the correct locations.  Any change would make
+ * existing pools completely inaccessible.
+ */
+static const draid_map_t draid_maps[VDEV_DRAID_MAX_MAPS] = {
+	{   2, 256, 0x89ef3dabbcc7de37, 0x00000000433d433d },	/* 1.000 */
+	{   3, 256, 0x89a57f3de98121b4, 0x00000000bcd8b7b5 },	/* 1.000 */
+	{   4, 256, 0xc9ea9ec82340c885, 0x00000001819d7c69 },	/* 1.000 */
+	{   5, 256, 0xf46733b7f4d47dfd, 0x00000002a1648d74 },	/* 1.010 */
+	{   6, 256, 0x88c3c62d8585b362, 0x00000003d3b0c2c4 },	/* 1.031 */
+	{   7, 256, 0x3a65d809b4d1b9d5, 0x000000055c4183ee },	/* 1.043 */
+	{   8, 256, 0xe98930e3c5d2e90a, 0x00000006edfb0329 },	/* 1.059 */
+	{   9, 256, 0x5a5430036b982ccb, 0x00000008ceaf6934 },	/* 1.056 */
+	{  10, 256, 0x92bf389e9eadac74, 0x0000000b26668c09 },	/* 1.072 */
+	{  11, 256, 0x74ccebf1dcf3ae80, 0x0000000dd691358c },	/* 1.083 */
+	{  12, 256, 0x8847e41a1a9f5671, 0x00000010a0c63c8e },	/* 1.097 */
+	{  13, 256, 0x7481b56debf0e637, 0x0000001424121fe4 },	/* 1.100 */
+	{  14, 256, 0x559b8c44065f8967, 0x00000016ab2ff079 },	/* 1.121 */
+	{  15, 256, 0x34c49545a2ee7f01, 0x0000001a6028efd6 },	/* 1.103 */
+	{  16, 256, 0xb85f4fa81a7698f7, 0x0000001e95ff5e66 },	/* 1.111 */
+	{  17, 256, 0x6353e47b7e47aba0, 0x00000021a81fa0fe },	/* 1.133 */
+	{  18, 256, 0xaa549746b1cbb81c, 0x00000026f02494c9 },	/* 1.131 */
+	{  19, 256, 0x892e343f2f31d690, 0x00000029eb392835 },	/* 1.130 */
+	{  20, 256, 0x76914824db98cc3f, 0x0000003004f31a7c },	/* 1.141 */
+	{  21, 256, 0x4b3cbabf9cfb1d0f, 0x00000036363a2408 },	/* 1.139 */
+	{  22, 256, 0xf45c77abb4f035d4, 0x00000038dd0f3e84 },	/* 1.150 */
+	{  23, 256, 0x5e18bd7f3fd4baf4, 0x0000003f0660391f },	/* 1.174 */
+	{  24, 256, 0xa7b3a4d285d6503b, 0x000000443dfc9ff6 },	/* 1.168 */
+	{  25, 256, 0x56ac7dd967521f5a, 0x0000004b03a87eb7 },	/* 1.180 */
+	{  26, 256, 0x3a42dfda4eb880f7, 0x000000522c719bba },	/* 1.226 */
+	{  27, 256, 0xd200d2fc6b54bf60, 0x0000005760b4fdf5 },	/* 1.228 */
+	{  28, 256, 0xc52605bbd486c546, 0x0000005e00d8f74c },	/* 1.217 */
+	{  29, 256, 0xc761779e63cd762f, 0x00000067be3cd85c },	/* 1.239 */
+	{  30, 256, 0xca577b1e07f85ca5, 0x0000006f5517f3e4 },	/* 1.238 */
+	{  31, 256, 0xfd50a593c518b3d4, 0x0000007370e7778f },	/* 1.273 */
+	{  32, 512, 0xc6c87ba5b042650b, 0x000000f7eb08a156 },	/* 1.191 */
+	{  33, 512, 0xc3880d0c9d458304, 0x0000010734b5d160 },	/* 1.199 */
+	{  34, 512, 0xe920927e4d8b2c97, 0x00000118c1edbce0 },	/* 1.195 */
+	{  35, 512, 0x8da7fcda87bde316, 0x0000012a3e9f9110 },	/* 1.201 */
+	{  36, 512, 0xcf09937491514a29, 0x0000013bd6a24bef },	/* 1.194 */
+	{  37, 512, 0x9b5abbf345cbd7cc, 0x0000014b9d90fac3 },	/* 1.237 */
+	{  38, 512, 0x506312a44668d6a9, 0x0000015e1b5f6148 },	/* 1.242 */
+	{  39, 512, 0x71659ede62b4755f, 0x00000173ef029bcd },	/* 1.231 */
+	{  40, 512, 0xa7fde73fb74cf2d7, 0x000001866fb72748 },	/* 1.233 */
+	{  41, 512, 0x19e8b461a1dea1d3, 0x000001a046f76b23 },	/* 1.271 */
+	{  42, 512, 0x031c9b868cc3e976, 0x000001afa64c49d3 },	/* 1.263 */
+	{  43, 512, 0xbaa5125faa781854, 0x000001c76789e278 },	/* 1.270 */
+	{  44, 512, 0x4ed55052550d721b, 0x000001d800ccd8eb },	/* 1.281 */
+	{  45, 512, 0x0fd63ddbdff90677, 0x000001f08ad59ed2 },	/* 1.282 */
+	{  46, 512, 0x36d66546de7fdd6f, 0x000002016f09574b },	/* 1.286 */
+	{  47, 512, 0x99f997e7eafb69d7, 0x0000021e42e47cb6 },	/* 1.329 */
+	{  48, 512, 0xbecd9c2571312c5d, 0x000002320fe2872b },	/* 1.286 */
+	{  49, 512, 0xd97371329e488a32, 0x0000024cd73f2ca7 },	/* 1.322 */
+	{  50, 512, 0x30e9b136670749ee, 0x000002681c83b0e0 },	/* 1.335 */
+	{  51, 512, 0x11ad6bc8f47aaeb4, 0x0000027e9261b5d5 },	/* 1.305 */
+	{  52, 512, 0x68e445300af432c1, 0x0000029aa0eb7dbf },	/* 1.330 */
+	{  53, 512, 0x910fb561657ea98c, 0x000002b3dca04853 },	/* 1.365 */
+	{  54, 512, 0xd619693d8ce5e7a5, 0x000002cc280e9c97 },	/* 1.334 */
+	{  55, 512, 0x24e281f564dbb60a, 0x000002e9fa842713 },	/* 1.364 */
+	{  56, 512, 0x947a7d3bdaab44c5, 0x000003046680f72e },	/* 1.374 */
+	{  57, 512, 0x2d44fec9c093e0de, 0x00000324198ba810 },	/* 1.363 */
+	{  58, 512, 0x87743c272d29bb4c, 0x0000033ec48c9ac9 },	/* 1.401 */
+	{  59, 512, 0x96aa3b6f67f5d923, 0x0000034faead902c },	/* 1.392 */
+	{  60, 512, 0x94a4f1faf520b0d3, 0x0000037d713ab005 },	/* 1.360 */
+	{  61, 512, 0xb13ed3a272f711a2, 0x00000397368f3cbd },	/* 1.396 */
+	{  62, 512, 0x3b1b11805fa4a64a, 0x000003b8a5e2840c },	/* 1.453 */
+	{  63, 512, 0x4c74caad9172ba71, 0x000003d4be280290 },	/* 1.437 */
+	{  64, 512, 0x035ff643923dd29e, 0x000003fad6c355e1 },	/* 1.402 */
+	{  65, 512, 0x768e9171b11abd3c, 0x0000040eb07fed20 },	/* 1.459 */
+	{  66, 512, 0x75880e6f78a13ddd, 0x000004433d6acf14 },	/* 1.423 */
+	{  67, 512, 0x910b9714f698a877, 0x00000451ea65d5db },	/* 1.447 */
+	{  68, 512, 0x87f5db6f9fdcf5c7, 0x000004732169e3f7 },	/* 1.450 */
+	{  69, 512, 0x836d4968fbaa3706, 0x000004954068a380 },	/* 1.455 */
+	{  70, 512, 0xc567d73a036421ab, 0x000004bd7cb7bd3d },	/* 1.463 */
+	{  71, 512, 0x619df40f240b8fed, 0x000004e376c2e972 },	/* 1.463 */
+	{  72, 512, 0x42763a680d5bed8e, 0x000005084275c680 },	/* 1.452 */
+	{  73, 512, 0x5866f064b3230431, 0x0000052906f2c9ab },	/* 1.498 */
+	{  74, 512, 0x9fa08548b1621a44, 0x0000054708019247 },	/* 1.526 */
+	{  75, 512, 0xb6053078ce0fc303, 0x00000572cc5c72b0 },	/* 1.491 */
+	{  76, 512, 0x4a7aad7bf3890923, 0x0000058e987bc8e9 },	/* 1.470 */
+	{  77, 512, 0xe165613fd75b5a53, 0x000005c20473a211 },	/* 1.527 */
+	{  78, 512, 0x3ff154ac878163a6, 0x000005d659194bf3 },	/* 1.509 */
+	{  79, 512, 0x24b93ade0aa8a532, 0x0000060a201c4f8e },	/* 1.569 */
+	{  80, 512, 0xc18e2d14cd9bb554, 0x0000062c55cfe48c },	/* 1.555 */
+	{  81, 512, 0x98cc78302feb58b6, 0x0000066656a07194 },	/* 1.509 */
+	{  82, 512, 0xc6c5fd5a2abc0543, 0x0000067cff94fbf8 },	/* 1.596 */
+	{  83, 512, 0xa7962f514acbba21, 0x000006ab7b5afa2e },	/* 1.568 */
+	{  84, 512, 0xba02545069ddc6dc, 0x000006d19861364f },	/* 1.541 */
+	{  85, 512, 0x447c73192c35073e, 0x000006fce315ce35 },	/* 1.623 */
+	{  86, 512, 0x48beef9e2d42b0c2, 0x00000720a8e38b6b },	/* 1.620 */
+	{  87, 512, 0x4874cf98541a35e0, 0x00000758382a2273 },	/* 1.597 */
+	{  88, 512, 0xad4cf8333a31127a, 0x00000781e1651b1b },	/* 1.575 */
+	{  89, 512, 0x47ae4859d57888c1, 0x000007b27edbe5bc },	/* 1.627 */
+	{  90, 512, 0x06f7723cfe5d1891, 0x000007dc2a96d8eb },	/* 1.596 */
+	{  91, 512, 0xd4e44218d660576d, 0x0000080ac46f02d5 },	/* 1.622 */
+	{  92, 512, 0x7066702b0d5be1f2, 0x00000832c96d154e },	/* 1.695 */
+	{  93, 512, 0x011209b4f9e11fb9, 0x0000085eefda104c },	/* 1.605 */
+	{  94, 512, 0x47ffba30a0b35708, 0x00000899badc32dc },	/* 1.625 */
+	{  95, 512, 0x1a95a6ac4538aaa8, 0x000008b6b69a42b2 },	/* 1.687 */
+	{  96, 512, 0xbda2b239bb2008eb, 0x000008f22d2de38a },	/* 1.621 */
+	{  97, 512, 0x7ffa0bea90355c6c, 0x0000092e5b23b816 },	/* 1.699 */
+	{  98, 512, 0x1d56ba34be426795, 0x0000094f482e5d1b },	/* 1.688 */
+	{  99, 512, 0x0aa89d45c502e93d, 0x00000977d94a98ce },	/* 1.642 */
+	{ 100, 512, 0x54369449f6857774, 0x000009c06c9b34cc },	/* 1.683 */
+	{ 101, 512, 0xf7d4dd8445b46765, 0x000009e5dc542259 },	/* 1.755 */
+	{ 102, 512, 0xfa8866312f169469, 0x00000a16b54eae93 },	/* 1.692 */
+	{ 103, 512, 0xd8a5aea08aef3ff9, 0x00000a381d2cbfe7 },	/* 1.747 */
+	{ 104, 512, 0x66bcd2c3d5f9ef0e, 0x00000a8191817be7 },	/* 1.751 */
+	{ 105, 512, 0x3fb13a47a012ec81, 0x00000ab562b9a254 },	/* 1.751 */
+	{ 106, 512, 0x43100f01c9e5e3ca, 0x00000aeee84c185f },	/* 1.726 */
+	{ 107, 512, 0xca09c50ccee2d054, 0x00000b1c359c047d },	/* 1.788 */
+	{ 108, 512, 0xd7176732ac503f9b, 0x00000b578bc52a73 },	/* 1.740 */
+	{ 109, 512, 0xed206e51f8d9422d, 0x00000b8083e0d960 },	/* 1.780 */
+	{ 110, 512, 0x17ead5dc6ba0dcd6, 0x00000bcfb1a32ca8 },	/* 1.836 */
+	{ 111, 512, 0x5f1dc21e38a969eb, 0x00000c0171becdd6 },	/* 1.778 */
+	{ 112, 512, 0xddaa973de33ec528, 0x00000c3edaba4b95 },	/* 1.831 */
+	{ 113, 512, 0x2a5eccd7735a3630, 0x00000c630664e7df },	/* 1.825 */
+	{ 114, 512, 0xafcccee5c0b71446, 0x00000cb65392f6e4 },	/* 1.826 */
+	{ 115, 512, 0x8fa30c5e7b147e27, 0x00000cd4db391e55 },	/* 1.843 */
+	{ 116, 512, 0x5afe0711fdfafd82, 0x00000d08cb4ec35d },	/* 1.826 */
+	{ 117, 512, 0x533a6090238afd4c, 0x00000d336f115d1b },	/* 1.803 */
+	{ 118, 512, 0x90cf11b595e39a84, 0x00000d8e041c2048 },	/* 1.857 */
+	{ 119, 512, 0x0d61a3b809444009, 0x00000dcb798afe35 },	/* 1.877 */
+	{ 120, 512, 0x7f34da0f54b0d114, 0x00000df3922664e1 },	/* 1.849 */
+	{ 121, 512, 0xa52258d5b72f6551, 0x00000e4d37a9872d },	/* 1.867 */
+	{ 122, 512, 0xc1de54d7672878db, 0x00000e6583a94cf6 },	/* 1.978 */
+	{ 123, 512, 0x1d03354316a414ab, 0x00000ebffc50308d },	/* 1.947 */
+	{ 124, 512, 0xcebdcc377665412c, 0x00000edee1997cea },	/* 1.865 */
+	{ 125, 512, 0x4ddd4c04b1a12344, 0x00000f21d64b373f },	/* 1.881 */
+	{ 126, 512, 0x64fc8f94e3973658, 0x00000f8f87a8896b },	/* 1.882 */
+	{ 127, 512, 0x68765f78034a334e, 0x00000fb8fe62197e },	/* 1.867 */
+	{ 128, 512, 0xaf36b871a303e816, 0x00000fec6f3afb1e },	/* 1.972 */
+	{ 129, 512, 0x2a4cbf73866c3a28, 0x00001027febfe4e5 },	/* 1.896 */
+	{ 130, 512, 0x9cb128aacdcd3b2f, 0x0000106aa8ac569d },	/* 1.965 */
+	{ 131, 512, 0x5511d41c55869124, 0x000010bbd755ddf1 },	/* 1.963 */
+	{ 132, 512, 0x42f92461937f284a, 0x000010fb8bceb3b5 },	/* 1.925 */
+	{ 133, 512, 0xe2d89a1cf6f1f287, 0x0000114cf5331e34 },	/* 1.862 */
+	{ 134, 512, 0xdc631a038956200e, 0x0000116428d2adc5 },	/* 2.042 */
+	{ 135, 512, 0xb2e5ac222cd236be, 0x000011ca88e4d4d2 },	/* 1.935 */
+	{ 136, 512, 0xbc7d8236655d88e7, 0x000011e39cb94e66 },	/* 2.005 */
+	{ 137, 512, 0x073e02d88d2d8e75, 0x0000123136c7933c },	/* 2.041 */
+	{ 138, 512, 0x3ddb9c3873166be0, 0x00001280e4ec6d52 },	/* 1.997 */
+	{ 139, 512, 0x7d3b1a845420e1b5, 0x000012c2e7cd6a44 },	/* 1.996 */
+	{ 140, 512, 0x60102308aa7b2a6c, 0x000012fc490e6c7d },	/* 2.053 */
+	{ 141, 512, 0xdb22bb2f9eb894aa, 0x00001343f5a85a1a },	/* 1.971 */
+	{ 142, 512, 0xd853f879a13b1606, 0x000013bb7d5f9048 },	/* 2.018 */
+	{ 143, 512, 0x001620a03f804b1d, 0x000013e74cc794fd },	/* 1.961 */
+	{ 144, 512, 0xfdb52dda76fbf667, 0x00001442d2f22480 },	/* 2.046 */
+	{ 145, 512, 0xa9160110f66e24ff, 0x0000144b899f9dbb },	/* 1.968 */
+	{ 146, 512, 0x77306a30379ae03b, 0x000014cb98eb1f81 },	/* 2.143 */
+	{ 147, 512, 0x14f5985d2752319d, 0x000014feab821fc9 },	/* 2.064 */
+	{ 148, 512, 0xa4b8ff11de7863f8, 0x0000154a0e60b9c9 },	/* 2.023 */
+	{ 149, 512, 0x44b345426455c1b3, 0x000015999c3c569c },	/* 2.136 */
+	{ 150, 512, 0x272677826049b46c, 0x000015c9697f4b92 },	/* 2.063 */
+	{ 151, 512, 0x2f9216e2cd74fe40, 0x0000162b1f7bbd39 },	/* 1.974 */
+	{ 152, 512, 0x706ae3e763ad8771, 0x00001661371c55e1 },	/* 2.210 */
+	{ 153, 512, 0xf7fd345307c2480e, 0x000016e251f28b6a },	/* 2.006 */
+	{ 154, 512, 0x6e94e3d26b3139eb, 0x000016f2429bb8c6 },	/* 2.193 */
+	{ 155, 512, 0x5458bbfbb781fcba, 0x0000173efdeca1b9 },	/* 2.163 */
+	{ 156, 512, 0xa80e2afeccd93b33, 0x000017bfdcb78adc },	/* 2.046 */
+	{ 157, 512, 0x1e4ccbb22796cf9d, 0x00001826fdcc39c9 },	/* 2.084 */
+	{ 158, 512, 0x8fba4b676aaa3663, 0x00001841a1379480 },	/* 2.264 */
+	{ 159, 512, 0xf82b843814b315fa, 0x000018886e19b8a3 },	/* 2.074 */
+	{ 160, 512, 0x7f21e920ecf753a3, 0x0000191812ca0ea7 },	/* 2.282 */
+	{ 161, 512, 0x48bb8ea2c4caa620, 0x0000192f310faccf },	/* 2.148 */
+	{ 162, 512, 0x5cdb652b4952c91b, 0x0000199e1d7437c7 },	/* 2.355 */
+	{ 163, 512, 0x6ac1ba6f78c06cd4, 0x000019cd11f82c70 },	/* 2.164 */
+	{ 164, 512, 0x9faf5f9ca2669a56, 0x00001a18d5431f6a },	/* 2.393 */
+	{ 165, 512, 0xaa57e9383eb01194, 0x00001a9e7d253d85 },	/* 2.178 */
+	{ 166, 512, 0x896967bf495c34d2, 0x00001afb8319b9fc },	/* 2.334 */
+	{ 167, 512, 0xdfad5f05de225f1b, 0x00001b3a59c3093b },	/* 2.266 */
+	{ 168, 512, 0xfd299a99f9f2abdd, 0x00001bb6f1a10799 },	/* 2.304 */
+	{ 169, 512, 0xdda239e798fe9fd4, 0x00001bfae0c9692d },	/* 2.218 */
+	{ 170, 512, 0x5fca670414a32c3e, 0x00001c22129dbcff },	/* 2.377 */
+	{ 171, 512, 0x1bb8934314b087de, 0x00001c955db36cd0 },	/* 2.155 */
+	{ 172, 512, 0xd96394b4b082200d, 0x00001cfc8619b7e6 },	/* 2.404 */
+	{ 173, 512, 0xb612a7735b1c8cbc, 0x00001d303acdd585 },	/* 2.205 */
+	{ 174, 512, 0x28e7430fe5875fe1, 0x00001d7ed5b3697d },	/* 2.359 */
+	{ 175, 512, 0x5038e89efdd981b9, 0x00001dc40ec35c59 },	/* 2.158 */
+	{ 176, 512, 0x075fd78f1d14db7c, 0x00001e31c83b4a2b },	/* 2.614 */
+	{ 177, 512, 0xc50fafdb5021be15, 0x00001e7cdac82fbc },	/* 2.239 */
+	{ 178, 512, 0xe6dc7572ce7b91c7, 0x00001edd8bb454fc },	/* 2.493 */
+	{ 179, 512, 0x21f7843e7beda537, 0x00001f3a8e019d6c },	/* 2.327 */
+	{ 180, 512, 0xc83385e20b43ec82, 0x00001f70735ec137 },	/* 2.231 */
+	{ 181, 512, 0xca818217dddb21fd, 0x0000201ca44c5a3c },	/* 2.237 */
+	{ 182, 512, 0xe6035defea48f933, 0x00002038e3346658 },	/* 2.691 */
+	{ 183, 512, 0x47262a4f953dac5a, 0x000020c2e554314e },	/* 2.170 */
+	{ 184, 512, 0xe24c7246260873ea, 0x000021197e618d64 },	/* 2.600 */
+	{ 185, 512, 0xeef6b57c9b58e9e1, 0x0000217ea48ecddc },	/* 2.391 */
+	{ 186, 512, 0x2becd3346e386142, 0x000021c496d4a5f9 },	/* 2.677 */
+	{ 187, 512, 0x63c6207bdf3b40a3, 0x0000220e0f2eec0c },	/* 2.410 */
+	{ 188, 512, 0x3056ce8989767d4b, 0x0000228eb76cd137 },	/* 2.776 */
+	{ 189, 512, 0x91af61c307cee780, 0x000022e17e2ea501 },	/* 2.266 */
+	{ 190, 512, 0xda359da225f6d54f, 0x00002358a2debc19 },	/* 2.717 */
+	{ 191, 512, 0x0a5f7a2a55607ba0, 0x0000238a79dac18c },	/* 2.474 */
+	{ 192, 512, 0x27bb75bf5224638a, 0x00002403a58e2351 },	/* 2.673 */
+	{ 193, 512, 0x1ebfdb94630f5d0f, 0x00002492a10cb339 },	/* 2.420 */
+	{ 194, 512, 0x6eae5e51d9c5f6fb, 0x000024ce4bf98715 },	/* 2.898 */
+	{ 195, 512, 0x08d903b4daedc2e0, 0x0000250d1e15886c },	/* 2.363 */
+	{ 196, 512, 0xc722a2f7fa7cd686, 0x0000258a99ed0c9e },	/* 2.747 */
+	{ 197, 512, 0x8f71faf0e54e361d, 0x000025dee11976f5 },	/* 2.531 */
+	{ 198, 512, 0x87f64695c91a54e7, 0x0000264e00a43da0 },	/* 2.707 */
+	{ 199, 512, 0xc719cbac2c336b92, 0x000026d327277ac1 },	/* 2.315 */
+	{ 200, 512, 0xe7e647afaf771ade, 0x000027523a5c44bf },	/* 3.012 */
+	{ 201, 512, 0x12d4b5c38ce8c946, 0x0000273898432545 },	/* 2.378 */
+	{ 202, 512, 0xf2e0cd4067bdc94a, 0x000027e47bb2c935 },	/* 2.969 */
+	{ 203, 512, 0x21b79f14d6d947d3, 0x0000281e64977f0d },	/* 2.594 */
+	{ 204, 512, 0x515093f952f18cd6, 0x0000289691a473fd },	/* 2.763 */
+	{ 205, 512, 0xd47b160a1b1022c8, 0x00002903e8b52411 },	/* 2.457 */
+	{ 206, 512, 0xc02fc96684715a16, 0x0000297515608601 },	/* 3.057 */
+	{ 207, 512, 0xef51e68efba72ed0, 0x000029ef73604804 },	/* 2.590 */
+	{ 208, 512, 0x9e3be6e5448b4f33, 0x00002a2846ed074b },	/* 3.047 */
+	{ 209, 512, 0x81d446c6d5fec063, 0x00002a92ca693455 },	/* 2.676 */
+	{ 210, 512, 0xff215de8224e57d5, 0x00002b2271fe3729 },	/* 2.993 */
+	{ 211, 512, 0xe2524d9ba8f69796, 0x00002b64b99c3ba2 },	/* 2.457 */
+	{ 212, 512, 0xf6b28e26097b7e4b, 0x00002bd768b6e068 },	/* 3.182 */
+	{ 213, 512, 0x893a487f30ce1644, 0x00002c67f722b4b2 },	/* 2.563 */
+	{ 214, 512, 0x386566c3fc9871df, 0x00002cc1cf8b4037 },	/* 3.025 */
+	{ 215, 512, 0x1e0ed78edf1f558a, 0x00002d3948d36c7f },	/* 2.730 */
+	{ 216, 512, 0xe3bc20c31e61f113, 0x00002d6d6b12e025 },	/* 3.036 */
+	{ 217, 512, 0xd6c3ad2e23021882, 0x00002deff7572241 },	/* 2.722 */
+	{ 218, 512, 0xb4a9f95cf0f69c5a, 0x00002e67d537aa36 },	/* 3.356 */
+	{ 219, 512, 0x6e98ed6f6c38e82f, 0x00002e9720626789 },	/* 2.697 */
+	{ 220, 512, 0x2e01edba33fddac7, 0x00002f407c6b0198 },	/* 2.979 */
+	{ 221, 512, 0x559d02e1f5f57ccc, 0x00002fb6a5ab4f24 },	/* 2.858 */
+	{ 222, 512, 0xac18f5a916adcd8e, 0x0000304ae1c5c57e },	/* 3.258 */
+	{ 223, 512, 0x15789fbaddb86f4b, 0x0000306f6e019c78 },	/* 2.693 */
+	{ 224, 512, 0xf4a9c36d5bc4c408, 0x000030da40434213 },	/* 3.259 */
+	{ 225, 512, 0xf640f90fd2727f44, 0x00003189ed37b90c },	/* 2.733 */
+	{ 226, 512, 0xb5313d390d61884a, 0x000031e152616b37 },	/* 3.235 */
+	{ 227, 512, 0x4bae6b3ce9160939, 0x0000321f40aeac42 },	/* 2.983 */
+	{ 228, 512, 0x838c34480f1a66a1, 0x000032f389c0f78e },	/* 3.308 */
+	{ 229, 512, 0xb1c4a52c8e3d6060, 0x0000330062a40284 },	/* 2.715 */
+	{ 230, 512, 0xe0f1110c6d0ed822, 0x0000338be435644f },	/* 3.540 */
+	{ 231, 512, 0x9f1a8ccdcea68d4b, 0x000034045a4e97e1 },	/* 2.779 */
+	{ 232, 512, 0x3261ed62223f3099, 0x000034702cfc401c },	/* 3.084 */
+	{ 233, 512, 0xf2191e2311022d65, 0x00003509dd19c9fc },	/* 2.987 */
+	{ 234, 512, 0xf102a395c2033abc, 0x000035654dc96fae },	/* 3.341 */
+	{ 235, 512, 0x11fe378f027906b6, 0x000035b5193b0264 },	/* 2.793 */
+	{ 236, 512, 0xf777f2c026b337aa, 0x000036704f5d9297 },	/* 3.518 */
+	{ 237, 512, 0x1b04e9c2ee143f32, 0x000036dfbb7af218 },	/* 2.962 */
+	{ 238, 512, 0x2fcec95266f9352c, 0x00003785c8df24a9 },	/* 3.196 */
+	{ 239, 512, 0xfe2b0e47e427dd85, 0x000037cbdf5da729 },	/* 2.914 */
+	{ 240, 512, 0x72b49bf2225f6c6d, 0x0000382227c15855 },	/* 3.408 */
+	{ 241, 512, 0x50486b43df7df9c7, 0x0000389b88be6453 },	/* 2.903 */
+	{ 242, 512, 0x5192a3e53181c8ab, 0x000038ddf3d67263 },	/* 3.778 */
+	{ 243, 512, 0xe9f5d8365296fd5e, 0x0000399f1c6c9e9c },	/* 3.026 */
+	{ 244, 512, 0xc740263f0301efa8, 0x00003a147146512d },	/* 3.347 */
+	{ 245, 512, 0x23cd0f2b5671e67d, 0x00003ab10bcc0d9d },	/* 3.212 */
+	{ 246, 512, 0x002ccc7e5cd41390, 0x00003ad6cd14a6c0 },	/* 3.482 */
+	{ 247, 512, 0x9aafb3c02544b31b, 0x00003b8cb8779fb0 },	/* 3.146 */
+	{ 248, 512, 0x72ba07a78b121999, 0x00003c24142a5a3f },	/* 3.626 */
+	{ 249, 512, 0x3d784aa58edfc7b4, 0x00003cd084817d99 },	/* 2.952 */
+	{ 250, 512, 0xaab750424d8004af, 0x00003d506a8e098e },	/* 3.463 */
+	{ 251, 512, 0x84403fcf8e6b5ca2, 0x00003d4c54c2aec4 },	/* 3.131 */
+	{ 252, 512, 0x71eb7455ec98e207, 0x00003e655715cf2c },	/* 3.538 */
+	{ 253, 512, 0xd752b4f19301595b, 0x00003ecd7b2ca5ac },	/* 2.974 */
+	{ 254, 512, 0xc4674129750499de, 0x00003e99e86d3e95 },	/* 3.843 */
+	{ 255, 512, 0x9772baff5cd12ef5, 0x00003f895c019841 },	/* 3.088 */
+};
+
+/*
+ * Verify the map is valid. Each device index must appear exactly
+ * once in every row, and the permutation array checksum must match.
+ */
+static int
+verify_perms(uint8_t *perms, uint64_t children, uint64_t nperms,
+    uint64_t checksum)
+{
+	int countssz = sizeof (uint16_t) * children;
+	uint16_t *counts = kmem_zalloc(countssz, KM_SLEEP);
+
+	for (int i = 0; i < nperms; i++) {
+		for (int j = 0; j < children; j++) {
+			uint8_t val = perms[(i * children) + j];
+
+			if (val >= children || counts[val] != i) {
+				kmem_free(counts, countssz);
+				return (EINVAL);
+			}
+
+			counts[val]++;
+		}
+	}
+
+	if (checksum != 0) {
+		int permssz = sizeof (uint8_t) * children * nperms;
+		zio_cksum_t cksum;
+
+		fletcher_4_native_varsize(perms, permssz, &cksum);
+
+		if (checksum != cksum.zc_word[0]) {
+			kmem_free(counts, countssz);
+			return (ECKSUM);
+		}
+	}
+
+	kmem_free(counts, countssz);
+
+	return (0);
+}
+
+/*
+ * Generate the permutation array for the draid_map_t.  These maps control
+ * the placement of all data in a dRAID.  Therefore it's critical that the
+ * seed always generates the same mapping.  We provide our own pseudo-random
+ * number generator for this purpose.
+ */
+int
+vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp)
+{
+	VERIFY3U(map->dm_children, >=, VDEV_DRAID_MIN_CHILDREN);
+	VERIFY3U(map->dm_children, <=, VDEV_DRAID_MAX_CHILDREN);
+	VERIFY3U(map->dm_seed, !=, 0);
+	VERIFY3U(map->dm_nperms, !=, 0);
+	VERIFY3P(map->dm_perms, ==, NULL);
+
+#ifdef _KERNEL
+	/*
+	 * The kernel code always provides both a map_seed and checksum.
+	 * Only the tests/zfs-tests/cmd/draid/draid.c utility will provide
+	 * a zero checksum when generating new candidate maps.
+	 */
+	VERIFY3U(map->dm_checksum, !=, 0);
+#endif
+	uint64_t children = map->dm_children;
+	uint64_t nperms = map->dm_nperms;
+	int rowsz = sizeof (uint8_t) * children;
+	int permssz = rowsz * nperms;
+	uint8_t *perms;
+
+	/* Allocate the permutation array */
+	perms = vmem_alloc(permssz, KM_SLEEP);
+
+	/* Setup an initial row with a known pattern */
+	uint8_t *initial_row = kmem_alloc(rowsz, KM_SLEEP);
+	for (int i = 0; i < children; i++)
+		initial_row[i] = i;
+
+	uint64_t draid_seed[2] = { VDEV_DRAID_SEED, map->dm_seed };
+	uint8_t *current_row, *previous_row = initial_row;
+
+	/*
+	 * Perform a Fisher-Yates shuffle of each row using the previous
+	 * row as the starting point.  An initial_row with known pattern
+	 * is used as the input for the first row.
+	 */
+	for (int i = 0; i < nperms; i++) {
+		current_row = &perms[i * children];
+		memcpy(current_row, previous_row, rowsz);
+
+		for (int j = children - 1; j > 0; j--) {
+			uint64_t k = vdev_draid_rand(draid_seed) % (j + 1);
+			uint8_t val = current_row[j];
+			current_row[j] = current_row[k];
+			current_row[k] = val;
+		}
+
+		previous_row = current_row;
+	}
+
+	kmem_free(initial_row, rowsz);
+
+	int error = verify_perms(perms, children, nperms, map->dm_checksum);
+	if (error) {
+		vmem_free(perms, permssz);
+		return (error);
+	}
+
+	*permsp = perms;
+
+	return (0);
+}
+
+/*
+ * Lookup the fixed draid_map_t for the requested number of children.
+ */
+int
+vdev_draid_lookup_map(uint64_t children, const draid_map_t **mapp)
+{
+	for (int i = 0; i < VDEV_DRAID_MAX_MAPS; i++) {
+		if (draid_maps[i].dm_children == children) {
+			*mapp = &draid_maps[i];
+			return (0);
+		}
+	}
+
+	return (ENOENT);
+}
+
+/*
+ * Lookup the permutation array and iteration id for the provided offset.
+ */
+static void
+vdev_draid_get_perm(vdev_draid_config_t *vdc, uint64_t pindex,
+    uint8_t **base, uint64_t *iter)
+{
+	uint64_t ncols = vdc->vdc_children;
+	uint64_t poff = pindex % (vdc->vdc_nperms * ncols);
+
+	*base = vdc->vdc_perms + (poff / ncols) * ncols;
+	*iter = poff % ncols;
+}
+
+static inline uint64_t
+vdev_draid_permute_id(vdev_draid_config_t *vdc,
+    uint8_t *base, uint64_t iter, uint64_t index)
+{
+	return ((base[index] + iter) % vdc->vdc_children);
+}
+
+/*
+ * Return the asize which is the psize rounded up to a full group width.
+ * i.e. vdev_draid_psize_to_asize().
+ */
+static uint64_t
+vdev_draid_asize(vdev_t *vd, uint64_t psize)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+	uint64_t ashift = vd->vdev_ashift;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	uint64_t rows = ((psize - 1) / (vdc->vdc_ndata << ashift)) + 1;
+	uint64_t asize = (rows * vdc->vdc_groupwidth) << ashift;
+
+	ASSERT3U(asize, !=, 0);
+	ASSERT3U(asize % (vdc->vdc_groupwidth), ==, 0);
+
+	return (asize);
+}
+
+/*
+ * Deflate the asize to the psize, this includes stripping parity.
+ */
+uint64_t
+vdev_draid_asize_to_psize(vdev_t *vd, uint64_t asize)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT0(asize % vdc->vdc_groupwidth);
+
+	return ((asize / vdc->vdc_groupwidth) * vdc->vdc_ndata);
+}
+
+/*
+ * Convert a logical offset to the corresponding group number.
+ */
+static uint64_t
+vdev_draid_offset_to_group(vdev_t *vd, uint64_t offset)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	return (offset / vdc->vdc_groupsz);
+}
+
+/*
+ * Convert a group number to the logical starting offset for that group.
+ */
+static uint64_t
+vdev_draid_group_to_offset(vdev_t *vd, uint64_t group)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	return (group * vdc->vdc_groupsz);
+}
+
+/*
+ * Full stripe writes.  When writing, all columns (D+P) are required.  Parity
+ * is calculated over all the columns, including empty zero filled sectors,
+ * and each is written to disk.  While only the data columns are needed for
+ * a normal read, all of the columns are required for reconstruction when
+ * performing a sequential resilver.
+ *
+ * For "big columns" it's sufficient to map the correct range of the zio ABD.
+ * Partial columns require allocating a gang ABD in order to zero fill the
+ * empty sectors.  When the column is empty a zero filled sector must be
+ * mapped.  In all cases the data ABDs must be the same size as the parity
+ * ABDs (e.g. rc->rc_size == parity_size).
+ */
+static void
+vdev_draid_map_alloc_write(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
+{
+	uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift;
+	uint64_t parity_size = rr->rr_col[0].rc_size;
+	uint64_t abd_off = abd_offset;
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
+	ASSERT3U(parity_size, ==, abd_get_size(rr->rr_col[0].rc_abd));
+
+	for (uint64_t c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		if (rc->rc_size == 0) {
+			/* empty data column (small write), add a skip sector */
+			ASSERT3U(skip_size, ==, parity_size);
+			rc->rc_abd = abd_get_zeros(skip_size);
+		} else if (rc->rc_size == parity_size) {
+			/* this is a "big column" */
+			rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+			    zio->io_abd, abd_off, rc->rc_size);
+		} else {
+			/* short data column, add a skip sector */
+			ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
+			rc->rc_abd = abd_alloc_gang();
+			abd_gang_add(rc->rc_abd, abd_get_offset_size(
+			    zio->io_abd, abd_off, rc->rc_size), B_TRUE);
+			abd_gang_add(rc->rc_abd, abd_get_zeros(skip_size),
+			    B_TRUE);
+		}
+
+		ASSERT3U(abd_get_size(rc->rc_abd), ==, parity_size);
+
+		abd_off += rc->rc_size;
+		rc->rc_size = parity_size;
+	}
+
+	IMPLY(abd_offset != 0, abd_off == zio->io_size);
+}
+
+/*
+ * Scrub/resilver reads.  In order to store the contents of the skip sectors
+ * an additional ABD is allocated.  The columns are handled in the same way
+ * as a full stripe write except instead of using the zero ABD the newly
+ * allocated skip ABD is used to back the skip sectors.  In all cases the
+ * data ABD must be the same size as the parity ABDs.
+ */
+static void
+vdev_draid_map_alloc_scrub(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
+{
+	uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift;
+	uint64_t parity_size = rr->rr_col[0].rc_size;
+	uint64_t abd_off = abd_offset;
+	uint64_t skip_off = 0;
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+	ASSERT3P(rr->rr_abd_empty, ==, NULL);
+
+	if (rr->rr_nempty > 0) {
+		rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size,
+		    B_FALSE);
+	}
+
+	for (uint64_t c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		if (rc->rc_size == 0) {
+			/* empty data column (small read), add a skip sector */
+			ASSERT3U(skip_size, ==, parity_size);
+			ASSERT3U(rr->rr_nempty, !=, 0);
+			rc->rc_abd = abd_get_offset_size(rr->rr_abd_empty,
+			    skip_off, skip_size);
+			skip_off += skip_size;
+		} else if (rc->rc_size == parity_size) {
+			/* this is a "big column" */
+			rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+			    zio->io_abd, abd_off, rc->rc_size);
+		} else {
+			/* short data column, add a skip sector */
+			ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
+			ASSERT3U(rr->rr_nempty, !=, 0);
+			rc->rc_abd = abd_alloc_gang();
+			abd_gang_add(rc->rc_abd, abd_get_offset_size(
+			    zio->io_abd, abd_off, rc->rc_size), B_TRUE);
+			abd_gang_add(rc->rc_abd, abd_get_offset_size(
+			    rr->rr_abd_empty, skip_off, skip_size), B_TRUE);
+			skip_off += skip_size;
+		}
+
+		uint64_t abd_size = abd_get_size(rc->rc_abd);
+		ASSERT3U(abd_size, ==, abd_get_size(rr->rr_col[0].rc_abd));
+
+		/*
+		 * Increase rc_size so the skip ABD is included in subsequent
+		 * parity calculations.
+		 */
+		abd_off += rc->rc_size;
+		rc->rc_size = abd_size;
+	}
+
+	IMPLY(abd_offset != 0, abd_off == zio->io_size);
+	ASSERT3U(skip_off, ==, rr->rr_nempty * skip_size);
+}
+
+/*
+ * Normal reads.  In this common case only the columns containing data
+ * are read in to the zio ABDs.  Neither the parity columns or empty skip
+ * sectors are read unless the checksum fails verification.  In which case
+ * vdev_raidz_read_all() will call vdev_draid_map_alloc_empty() to expand
+ * the raid map in order to allow reconstruction using the parity data and
+ * skip sectors.
+ */
+static void
+vdev_draid_map_alloc_read(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
+{
+	uint64_t abd_off = abd_offset;
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+
+	for (uint64_t c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		if (rc->rc_size > 0) {
+			rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+			    zio->io_abd, abd_off, rc->rc_size);
+			abd_off += rc->rc_size;
+		}
+	}
+
+	IMPLY(abd_offset != 0, abd_off == zio->io_size);
+}
+
+/*
+ * Converts a normal "read" raidz_row_t to a "scrub" raidz_row_t. The key
+ * difference is that an ABD is allocated to back skip sectors so they may
+ * be read in to memory, verified, and repaired if needed.
+ */
+void
+vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
+{
+	uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift;
+	uint64_t parity_size = rr->rr_col[0].rc_size;
+	uint64_t skip_off = 0;
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+	ASSERT3P(rr->rr_abd_empty, ==, NULL);
+
+	if (rr->rr_nempty > 0) {
+		rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size,
+		    B_FALSE);
+	}
+
+	for (uint64_t c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		if (rc->rc_size == 0) {
+			/* empty data column (small read), add a skip sector */
+			ASSERT3U(skip_size, ==, parity_size);
+			ASSERT3U(rr->rr_nempty, !=, 0);
+			ASSERT3P(rc->rc_abd, ==, NULL);
+			rc->rc_abd = abd_get_offset_size(rr->rr_abd_empty,
+			    skip_off, skip_size);
+			skip_off += skip_size;
+		} else if (rc->rc_size == parity_size) {
+			/* this is a "big column", nothing to add */
+			ASSERT3P(rc->rc_abd, !=, NULL);
+		} else {
+			/*
+			 * short data column, add a skip sector and clear
+			 * rc_tried to force the entire column to be re-read
+			 * thereby including the missing skip sector data
+			 * which is needed for reconstruction.
+			 */
+			ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
+			ASSERT3U(rr->rr_nempty, !=, 0);
+			ASSERT3P(rc->rc_abd, !=, NULL);
+			ASSERT(!abd_is_gang(rc->rc_abd));
+			abd_t *read_abd = rc->rc_abd;
+			rc->rc_abd = abd_alloc_gang();
+			abd_gang_add(rc->rc_abd, read_abd, B_TRUE);
+			abd_gang_add(rc->rc_abd, abd_get_offset_size(
+			    rr->rr_abd_empty, skip_off, skip_size), B_TRUE);
+			skip_off += skip_size;
+			rc->rc_tried = 0;
+		}
+
+		/*
+		 * Increase rc_size so the empty ABD is included in subsequent
+		 * parity calculations.
+		 */
+		rc->rc_size = parity_size;
+	}
+
+	ASSERT3U(skip_off, ==, rr->rr_nempty * skip_size);
+}
+
+/*
+ * Verify that all empty sectors are zero filled before using them to
+ * calculate parity.  Otherwise, silent corruption in an empty sector will
+ * result in bad parity being generated.  That bad parity will then be
+ * considered authoritative and overwrite the good parity on disk.  This
+ * is possible because the checksum is only calculated over the data,
+ * thus it cannot be used to detect damage in empty sectors.
+ */
+int
+vdev_draid_map_verify_empty(zio_t *zio, raidz_row_t *rr)
+{
+	uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift;
+	uint64_t parity_size = rr->rr_col[0].rc_size;
+	uint64_t skip_off = parity_size - skip_size;
+	uint64_t empty_off = 0;
+	int ret = 0;
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+	ASSERT3P(rr->rr_abd_empty, !=, NULL);
+	ASSERT3U(rr->rr_bigcols, >, 0);
+
+	void *zero_buf = kmem_zalloc(skip_size, KM_SLEEP);
+
+	for (int c = rr->rr_bigcols; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		ASSERT3P(rc->rc_abd, !=, NULL);
+		ASSERT3U(rc->rc_size, ==, parity_size);
+
+		if (abd_cmp_buf_off(rc->rc_abd, zero_buf, skip_off,
+		    skip_size) != 0) {
+			vdev_raidz_checksum_error(zio, rc, rc->rc_abd);
+			abd_zero_off(rc->rc_abd, skip_off, skip_size);
+			rc->rc_error = SET_ERROR(ECKSUM);
+			ret++;
+		}
+
+		empty_off += skip_size;
+	}
+
+	ASSERT3U(empty_off, ==, abd_get_size(rr->rr_abd_empty));
+
+	kmem_free(zero_buf, skip_size);
+
+	return (ret);
+}
+
+/*
+ * Given a logical address within a dRAID configuration, return the physical
+ * address on the first drive in the group that this address maps to
+ * (at position 'start' in permutation number 'perm').
+ */
+static uint64_t
+vdev_draid_logical_to_physical(vdev_t *vd, uint64_t logical_offset,
+    uint64_t *perm, uint64_t *start)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	/* b is the dRAID (parent) sector offset. */
+	uint64_t ashift = vd->vdev_top->vdev_ashift;
+	uint64_t b_offset = logical_offset >> ashift;
+
+	/*
+	 * The height of a row in units of the vdev's minimum sector size.
+	 * This is the amount of data written to each disk of each group
+	 * in a given permutation.
+	 */
+	uint64_t rowheight_sectors = VDEV_DRAID_ROWHEIGHT >> ashift;
+
+	/*
+	 * We cycle through a disk permutation every groupsz * ngroups chunk
+	 * of address space. Note that ngroups * groupsz must be a multiple
+	 * of the number of data drives (ndisks) in order to guarantee
+	 * alignment. So, for example, if our row height is 16MB, our group
+	 * size is 10, and there are 13 data drives in the draid, then ngroups
+	 * will be 13, we will change permutation every 2.08GB and each
+	 * disk will have 160MB of data per chunk.
+	 */
+	uint64_t groupwidth = vdc->vdc_groupwidth;
+	uint64_t ngroups = vdc->vdc_ngroups;
+	uint64_t ndisks = vdc->vdc_ndisks;
+
+	/*
+	 * groupstart is where the group this IO will land in "starts" in
+	 * the permutation array.
+	 */
+	uint64_t group = logical_offset / vdc->vdc_groupsz;
+	uint64_t groupstart = (group * groupwidth) % ndisks;
+	ASSERT3U(groupstart + groupwidth, <=, ndisks + groupstart);
+	*start = groupstart;
+
+	/* b_offset is the sector offset within a group chunk */
+	b_offset = b_offset % (rowheight_sectors * groupwidth);
+	ASSERT0(b_offset % groupwidth);
+
+	/*
+	 * Find the starting byte offset on each child vdev:
+	 * - within a permutation there are ngroups groups spread over the
+	 *   rows, where each row covers a slice portion of the disk
+	 * - each permutation has (groupwidth * ngroups) / ndisks rows
+	 * - so each permutation covers rows * slice portion of the disk
+	 * - so we need to find the row where this IO group target begins
+	 */
+	*perm = group / ngroups;
+	uint64_t row = (*perm * ((groupwidth * ngroups) / ndisks)) +
+	    (((group % ngroups) * groupwidth) / ndisks);
+
+	return (((rowheight_sectors * row) +
+	    (b_offset / groupwidth)) << ashift);
+}
+
+static uint64_t
+vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
+    uint64_t abd_offset, uint64_t abd_size)
+{
+	vdev_t *vd = zio->io_vd;
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+	uint64_t ashift = vd->vdev_top->vdev_ashift;
+	uint64_t io_size = abd_size;
+	uint64_t io_asize = vdev_draid_asize(vd, io_size);
+	uint64_t group = vdev_draid_offset_to_group(vd, io_offset);
+	uint64_t start_offset = vdev_draid_group_to_offset(vd, group + 1);
+
+	/*
+	 * Limit the io_size to the space remaining in the group.  A second
+	 * row in the raidz_map_t is created for the remainder.
+	 */
+	if (io_offset + io_asize > start_offset) {
+		io_size = vdev_draid_asize_to_psize(vd,
+		    start_offset - io_offset);
+	}
+
+	/*
+	 * At most a block may span the logical end of one group and the start
+	 * of the next group. Therefore, at the end of a group the io_size must
+	 * span the group width evenly and the remainder must be aligned to the
+	 * start of the next group.
+	 */
+	IMPLY(abd_offset == 0 && io_size < zio->io_size,
+	    (io_asize >> ashift) % vdc->vdc_groupwidth == 0);
+	IMPLY(abd_offset != 0,
+	    vdev_draid_group_to_offset(vd, group) == io_offset);
+
+	/* Lookup starting byte offset on each child vdev */
+	uint64_t groupstart, perm;
+	uint64_t physical_offset = vdev_draid_logical_to_physical(vd,
+	    io_offset, &perm, &groupstart);
+
+	/*
+	 * If there is less than groupwidth drives available after the group
+	 * start, the group is going to wrap onto the next row. 'wrap' is the
+	 * group disk number that starts on the next row.
+	 */
+	uint64_t ndisks = vdc->vdc_ndisks;
+	uint64_t groupwidth = vdc->vdc_groupwidth;
+	uint64_t wrap = groupwidth;
+
+	if (groupstart + groupwidth > ndisks)
+		wrap = ndisks - groupstart;
+
+	/* The io size in units of the vdev's minimum sector size. */
+	const uint64_t psize = io_size >> ashift;
+
+	/*
+	 * "Quotient": The number of data sectors for this stripe on all but
+	 * the "big column" child vdevs that also contain "remainder" data.
+	 */
+	uint64_t q = psize / vdc->vdc_ndata;
+
+	/*
+	 * "Remainder": The number of partial stripe data sectors in this I/O.
+	 * This will add a sector to some, but not all, child vdevs.
+	 */
+	uint64_t r = psize - q * vdc->vdc_ndata;
+
+	/* The number of "big columns" - those which contain remainder data. */
+	uint64_t bc = (r == 0 ? 0 : r + vdc->vdc_nparity);
+	ASSERT3U(bc, <, groupwidth);
+
+	/* The total number of data and parity sectors for this I/O. */
+	uint64_t tot = psize + (vdc->vdc_nparity * (q + (r == 0 ? 0 : 1)));
+
+	raidz_row_t *rr;
+	rr = kmem_alloc(offsetof(raidz_row_t, rr_col[groupwidth]), KM_SLEEP);
+	rr->rr_cols = groupwidth;
+	rr->rr_scols = groupwidth;
+	rr->rr_bigcols = bc;
+	rr->rr_missingdata = 0;
+	rr->rr_missingparity = 0;
+	rr->rr_firstdatacol = vdc->vdc_nparity;
+	rr->rr_abd_empty = NULL;
+#ifdef ZFS_DEBUG
+	rr->rr_offset = io_offset;
+	rr->rr_size = io_size;
+#endif
+	*rrp = rr;
+
+	uint8_t *base;
+	uint64_t iter, asize = 0;
+	vdev_draid_get_perm(vdc, perm, &base, &iter);
+	for (uint64_t i = 0; i < groupwidth; i++) {
+		raidz_col_t *rc = &rr->rr_col[i];
+		uint64_t c = (groupstart + i) % ndisks;
+
+		/* increment the offset if we wrap to the next row */
+		if (i == wrap)
+			physical_offset += VDEV_DRAID_ROWHEIGHT;
+
+		rc->rc_devidx = vdev_draid_permute_id(vdc, base, iter, c);
+		rc->rc_offset = physical_offset;
+		rc->rc_abd = NULL;
+		rc->rc_orig_data = NULL;
+		rc->rc_error = 0;
+		rc->rc_tried = 0;
+		rc->rc_skipped = 0;
+		rc->rc_force_repair = 0;
+		rc->rc_allow_repair = 1;
+		rc->rc_need_orig_restore = B_FALSE;
+
+		if (q == 0 && i >= bc)
+			rc->rc_size = 0;
+		else if (i < bc)
+			rc->rc_size = (q + 1) << ashift;
+		else
+			rc->rc_size = q << ashift;
+
+		asize += rc->rc_size;
+	}
+
+	ASSERT3U(asize, ==, tot << ashift);
+	rr->rr_nempty = roundup(tot, groupwidth) - tot;
+	IMPLY(bc > 0, rr->rr_nempty == groupwidth - bc);
+
+	/* Allocate buffers for the parity columns */
+	for (uint64_t c = 0; c < rr->rr_firstdatacol; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
+	}
+
+	/*
+	 * Map buffers for data columns and allocate/map buffers for skip
+	 * sectors.  There are three distinct cases for dRAID which are
+	 * required to support sequential rebuild.
+	 */
+	if (zio->io_type == ZIO_TYPE_WRITE) {
+		vdev_draid_map_alloc_write(zio, abd_offset, rr);
+	} else if ((rr->rr_nempty > 0) &&
+	    (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
+		vdev_draid_map_alloc_scrub(zio, abd_offset, rr);
+	} else {
+		ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+		vdev_draid_map_alloc_read(zio, abd_offset, rr);
+	}
+
+	return (io_size);
+}
+
+/*
+ * Allocate the raidz mapping to be applied to the dRAID I/O.  The parity
+ * calculations for dRAID are identical to raidz however there are a few
+ * differences in the layout.
+ *
+ * - dRAID always allocates a full stripe width. Any extra sectors due
+ *   this padding are zero filled and written to disk. They will be read
+ *   back during a scrub or repair operation since they are included in
+ *   the parity calculation. This property enables sequential resilvering.
+ *
+ * - When the block at the logical offset spans redundancy groups then two
+ *   rows are allocated in the raidz_map_t. One row resides at the end of
+ *   the first group and the other at the start of the following group.
+ */
+static raidz_map_t *
+vdev_draid_map_alloc(zio_t *zio)
+{
+	raidz_row_t *rr[2];
+	uint64_t abd_offset = 0;
+	uint64_t abd_size = zio->io_size;
+	uint64_t io_offset = zio->io_offset;
+	uint64_t size;
+	int nrows = 1;
+
+	size = vdev_draid_map_alloc_row(zio, &rr[0], io_offset,
+	    abd_offset, abd_size);
+	if (size < abd_size) {
+		vdev_t *vd = zio->io_vd;
+
+		io_offset += vdev_draid_asize(vd, size);
+		abd_offset += size;
+		abd_size -= size;
+		nrows++;
+
+		ASSERT3U(io_offset, ==, vdev_draid_group_to_offset(
+		    vd, vdev_draid_offset_to_group(vd, io_offset)));
+		ASSERT3U(abd_offset, <, zio->io_size);
+		ASSERT3U(abd_size, !=, 0);
+
+		size = vdev_draid_map_alloc_row(zio, &rr[1],
+		    io_offset, abd_offset, abd_size);
+		VERIFY3U(size, ==, abd_size);
+	}
+
+	raidz_map_t *rm;
+	rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[nrows]), KM_SLEEP);
+	rm->rm_ops = vdev_raidz_math_get_ops();
+	rm->rm_nrows = nrows;
+	rm->rm_row[0] = rr[0];
+	if (nrows == 2)
+		rm->rm_row[1] = rr[1];
+
+	return (rm);
+}
+
+/*
+ * Given an offset into a dRAID return the next group width aligned offset
+ * which can be used to start an allocation.
+ */
+static uint64_t
+vdev_draid_get_astart(vdev_t *vd, const uint64_t start)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	return (roundup(start, vdc->vdc_groupwidth << vd->vdev_ashift));
+}
+
+/*
+ * Allocatable space for dRAID is (children - nspares) * sizeof(smallest child)
+ * rounded down to the last full slice.  So each child must provide at least
+ * 1 / (children - nspares) of its asize.
+ */
+static uint64_t
+vdev_draid_min_asize(vdev_t *vd)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	return (VDEV_DRAID_REFLOW_RESERVE +
+	    (vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks));
+}
+
+/*
+ * When using dRAID the minimum allocation size is determined by the number
+ * of data disks in the redundancy group.  Full stripes are always used.
+ */
+static uint64_t
+vdev_draid_min_alloc(vdev_t *vd)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	return (vdc->vdc_ndata << vd->vdev_ashift);
+}
+
+/*
+ * Returns true if the txg range does not exist on any leaf vdev.
+ *
+ * A dRAID spare does not fit into the DTL model. While it has child vdevs
+ * there is no redundancy among them, and the effective child vdev is
+ * determined by offset. Essentially we do a vdev_dtl_reassess() on the
+ * fly by replacing a dRAID spare with the child vdev under the offset.
+ * Note that it is a recursive process because the child vdev can be
+ * another dRAID spare and so on.
+ */
+boolean_t
+vdev_draid_missing(vdev_t *vd, uint64_t physical_offset, uint64_t txg,
+    uint64_t size)
+{
+	if (vd->vdev_ops == &vdev_spare_ops ||
+	    vd->vdev_ops == &vdev_replacing_ops) {
+		/*
+		 * Check all of the readable children, if any child
+		 * contains the txg range the data it is not missing.
+		 */
+		for (int c = 0; c < vd->vdev_children; c++) {
+			vdev_t *cvd = vd->vdev_child[c];
+
+			if (!vdev_readable(cvd))
+				continue;
+
+			if (!vdev_draid_missing(cvd, physical_offset,
+			    txg, size))
+				return (B_FALSE);
+		}
+
+		return (B_TRUE);
+	}
+
+	if (vd->vdev_ops == &vdev_draid_spare_ops) {
+		/*
+		 * When sequentially resilvering we don't have a proper
+		 * txg range so instead we must presume all txgs are
+		 * missing on this vdev until the resilver completes.
+		 */
+		if (vd->vdev_rebuild_txg != 0)
+			return (B_TRUE);
+
+		/*
+		 * DTL_MISSING is set for all prior txgs when a resilver
+		 * is started in spa_vdev_attach().
+		 */
+		if (vdev_dtl_contains(vd, DTL_MISSING, txg, size))
+			return (B_TRUE);
+
+		/*
+		 * Consult the DTL on the relevant vdev. Either a vdev
+		 * leaf or spare/replace mirror child may be returned so
+		 * we must recursively call vdev_draid_missing_impl().
+		 */
+		vd = vdev_draid_spare_get_child(vd, physical_offset);
+		if (vd == NULL)
+			return (B_TRUE);
+
+		return (vdev_draid_missing(vd, physical_offset,
+		    txg, size));
+	}
+
+	return (vdev_dtl_contains(vd, DTL_MISSING, txg, size));
+}
+
+/*
+ * Returns true if the txg is only partially replicated on the leaf vdevs.
+ */
+static boolean_t
+vdev_draid_partial(vdev_t *vd, uint64_t physical_offset, uint64_t txg,
+    uint64_t size)
+{
+	if (vd->vdev_ops == &vdev_spare_ops ||
+	    vd->vdev_ops == &vdev_replacing_ops) {
+		/*
+		 * Check all of the readable children, if any child is
+		 * missing the txg range then it is partially replicated.
+		 */
+		for (int c = 0; c < vd->vdev_children; c++) {
+			vdev_t *cvd = vd->vdev_child[c];
+
+			if (!vdev_readable(cvd))
+				continue;
+
+			if (vdev_draid_partial(cvd, physical_offset, txg, size))
+				return (B_TRUE);
+		}
+
+		return (B_FALSE);
+	}
+
+	if (vd->vdev_ops == &vdev_draid_spare_ops) {
+		/*
+		 * When sequentially resilvering we don't have a proper
+		 * txg range so instead we must presume all txgs are
+		 * missing on this vdev until the resilver completes.
+		 */
+		if (vd->vdev_rebuild_txg != 0)
+			return (B_TRUE);
+
+		/*
+		 * DTL_MISSING is set for all prior txgs when a resilver
+		 * is started in spa_vdev_attach().
+		 */
+		if (vdev_dtl_contains(vd, DTL_MISSING, txg, size))
+			return (B_TRUE);
+
+		/*
+		 * Consult the DTL on the relevant vdev. Either a vdev
+		 * leaf or spare/replace mirror child may be returned so
+		 * we must recursively call vdev_draid_missing_impl().
+		 */
+		vd = vdev_draid_spare_get_child(vd, physical_offset);
+		if (vd == NULL)
+			return (B_TRUE);
+
+		return (vdev_draid_partial(vd, physical_offset, txg, size));
+	}
+
+	return (vdev_dtl_contains(vd, DTL_MISSING, txg, size));
+}
+
+/*
+ * Determine if the vdev is readable at the given offset.
+ */
+boolean_t
+vdev_draid_readable(vdev_t *vd, uint64_t physical_offset)
+{
+	if (vd->vdev_ops == &vdev_draid_spare_ops) {
+		vd = vdev_draid_spare_get_child(vd, physical_offset);
+		if (vd == NULL)
+			return (B_FALSE);
+	}
+
+	if (vd->vdev_ops == &vdev_spare_ops ||
+	    vd->vdev_ops == &vdev_replacing_ops) {
+
+		for (int c = 0; c < vd->vdev_children; c++) {
+			vdev_t *cvd = vd->vdev_child[c];
+
+			if (!vdev_readable(cvd))
+				continue;
+
+			if (vdev_draid_readable(cvd, physical_offset))
+				return (B_TRUE);
+		}
+
+		return (B_FALSE);
+	}
+
+	return (vdev_readable(vd));
+}
+
+/*
+ * Returns the first distributed spare found under the provided vdev tree.
+ */
+static vdev_t *
+vdev_draid_find_spare(vdev_t *vd)
+{
+	if (vd->vdev_ops == &vdev_draid_spare_ops)
+		return (vd);
+
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *svd = vdev_draid_find_spare(vd->vdev_child[c]);
+		if (svd != NULL)
+			return (svd);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Returns B_TRUE if the passed in vdev is currently "faulted".
+ * Faulted, in this context, means that the vdev represents a
+ * replacing or sparing vdev tree.
+ */
+static boolean_t
+vdev_draid_faulted(vdev_t *vd, uint64_t physical_offset)
+{
+	if (vd->vdev_ops == &vdev_draid_spare_ops) {
+		vd = vdev_draid_spare_get_child(vd, physical_offset);
+		if (vd == NULL)
+			return (B_FALSE);
+
+		/*
+		 * After resolving the distributed spare to a leaf vdev
+		 * check the parent to determine if it's "faulted".
+		 */
+		vd = vd->vdev_parent;
+	}
+
+	return (vd->vdev_ops == &vdev_replacing_ops ||
+	    vd->vdev_ops == &vdev_spare_ops);
+}
+
+/*
+ * Determine if the dRAID block at the logical offset is degraded.
+ * Used by sequential resilver.
+ */
+static boolean_t
+vdev_draid_group_degraded(vdev_t *vd, uint64_t offset)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+	ASSERT3U(vdev_draid_get_astart(vd, offset), ==, offset);
+
+	uint64_t groupstart, perm;
+	uint64_t physical_offset = vdev_draid_logical_to_physical(vd,
+	    offset, &perm, &groupstart);
+
+	uint8_t *base;
+	uint64_t iter;
+	vdev_draid_get_perm(vdc, perm, &base, &iter);
+
+	for (uint64_t i = 0; i < vdc->vdc_groupwidth; i++) {
+		uint64_t c = (groupstart + i) % vdc->vdc_ndisks;
+		uint64_t cid = vdev_draid_permute_id(vdc, base, iter, c);
+		vdev_t *cvd = vd->vdev_child[cid];
+
+		/* Group contains a faulted vdev. */
+		if (vdev_draid_faulted(cvd, physical_offset))
+			return (B_TRUE);
+
+		/*
+		 * Always check groups with active distributed spares
+		 * because any vdev failure in the pool will affect them.
+		 */
+		if (vdev_draid_find_spare(cvd) != NULL)
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Determine if the txg is missing.  Used by healing resilver.
+ */
+static boolean_t
+vdev_draid_group_missing(vdev_t *vd, uint64_t offset, uint64_t txg,
+    uint64_t size)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+	ASSERT3U(vdev_draid_get_astart(vd, offset), ==, offset);
+
+	uint64_t groupstart, perm;
+	uint64_t physical_offset = vdev_draid_logical_to_physical(vd,
+	    offset, &perm, &groupstart);
+
+	uint8_t *base;
+	uint64_t iter;
+	vdev_draid_get_perm(vdc, perm, &base, &iter);
+
+	for (uint64_t i = 0; i < vdc->vdc_groupwidth; i++) {
+		uint64_t c = (groupstart + i) % vdc->vdc_ndisks;
+		uint64_t cid = vdev_draid_permute_id(vdc, base, iter, c);
+		vdev_t *cvd = vd->vdev_child[cid];
+
+		/* Transaction group is known to be partially replicated. */
+		if (vdev_draid_partial(cvd, physical_offset, txg, size))
+			return (B_TRUE);
+
+		/*
+		 * Always check groups with active distributed spares
+		 * because any vdev failure in the pool will affect them.
+		 */
+		if (vdev_draid_find_spare(cvd) != NULL)
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Find the smallest child asize and largest sector size to calculate the
+ * available capacity.  Distributed spares are ignored since their capacity
+ * is also based of the minimum child size in the top-level dRAID.
+ */
+static void
+vdev_draid_calculate_asize(vdev_t *vd, uint64_t *asizep, uint64_t *max_asizep,
+    uint64_t *logical_ashiftp, uint64_t *physical_ashiftp)
+{
+	uint64_t logical_ashift = 0, physical_ashift = 0;
+	uint64_t asize = 0, max_asize = 0;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_ops == &vdev_draid_spare_ops)
+			continue;
+
+		asize = MIN(asize - 1, cvd->vdev_asize - 1) + 1;
+		max_asize = MIN(max_asize - 1, cvd->vdev_max_asize - 1) + 1;
+		logical_ashift = MAX(logical_ashift, cvd->vdev_ashift);
+	}
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_ops == &vdev_draid_spare_ops)
+			continue;
+		physical_ashift = vdev_best_ashift(logical_ashift,
+		    physical_ashift, cvd->vdev_physical_ashift);
+	}
+
+	*asizep = asize;
+	*max_asizep = max_asize;
+	*logical_ashiftp = logical_ashift;
+	*physical_ashiftp = physical_ashift;
+}
+
+/*
+ * Open spare vdevs.
+ */
+static boolean_t
+vdev_draid_open_spares(vdev_t *vd)
+{
+	return (vd->vdev_ops == &vdev_draid_spare_ops ||
+	    vd->vdev_ops == &vdev_replacing_ops ||
+	    vd->vdev_ops == &vdev_spare_ops);
+}
+
+/*
+ * Open all children, excluding spares.
+ */
+static boolean_t
+vdev_draid_open_children(vdev_t *vd)
+{
+	return (!vdev_draid_open_spares(vd));
+}
+
+/*
+ * Open a top-level dRAID vdev.
+ */
+static int
+vdev_draid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
+{
+	vdev_draid_config_t *vdc =  vd->vdev_tsd;
+	uint64_t nparity = vdc->vdc_nparity;
+	int open_errors = 0;
+
+	if (nparity > VDEV_DRAID_MAXPARITY ||
+	    vd->vdev_children < nparity + 1) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * First open the normal children then the distributed spares.  This
+	 * ordering is important to ensure the distributed spares calculate
+	 * the correct psize in the event that the dRAID vdevs were expanded.
+	 */
+	vdev_open_children_subset(vd, vdev_draid_open_children);
+	vdev_open_children_subset(vd, vdev_draid_open_spares);
+
+	/* Verify enough of the children are available to continue. */
+	for (int c = 0; c < vd->vdev_children; c++) {
+		if (vd->vdev_child[c]->vdev_open_error != 0) {
+			if ((++open_errors) > nparity) {
+				vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
+				return (SET_ERROR(ENXIO));
+			}
+		}
+	}
+
+	/*
+	 * Allocatable capacity is the sum of the space on all children less
+	 * the number of distributed spares rounded down to last full row
+	 * and then to the last full group. An additional 32MB of scratch
+	 * space is reserved at the end of each child for use by the dRAID
+	 * expansion feature.
+	 */
+	uint64_t child_asize, child_max_asize;
+	vdev_draid_calculate_asize(vd, &child_asize, &child_max_asize,
+	    logical_ashift, physical_ashift);
+
+	/*
+	 * Should be unreachable since the minimum child size is 64MB, but
+	 * we want to make sure an underflow absolutely cannot occur here.
+	 */
+	if (child_asize < VDEV_DRAID_REFLOW_RESERVE ||
+	    child_max_asize < VDEV_DRAID_REFLOW_RESERVE) {
+		return (SET_ERROR(ENXIO));
+	}
+
+	child_asize = ((child_asize - VDEV_DRAID_REFLOW_RESERVE) /
+	    VDEV_DRAID_ROWHEIGHT) * VDEV_DRAID_ROWHEIGHT;
+	child_max_asize = ((child_max_asize - VDEV_DRAID_REFLOW_RESERVE) /
+	    VDEV_DRAID_ROWHEIGHT) * VDEV_DRAID_ROWHEIGHT;
+
+	*asize = (((child_asize * vdc->vdc_ndisks) / vdc->vdc_groupsz) *
+	    vdc->vdc_groupsz);
+	*max_asize = (((child_max_asize * vdc->vdc_ndisks) / vdc->vdc_groupsz) *
+	    vdc->vdc_groupsz);
+
+	return (0);
+}
+
+/*
+ * Close a top-level dRAID vdev.
+ */
+static void
+vdev_draid_close(vdev_t *vd)
+{
+	for (int c = 0; c < vd->vdev_children; c++) {
+		if (vd->vdev_child[c] != NULL)
+			vdev_close(vd->vdev_child[c]);
+	}
+}
+
+/*
+ * Return the maximum asize for a rebuild zio in the provided range
+ * given the following constraints.  A dRAID chunks may not:
+ *
+ * - Exceed the maximum allowed block size (SPA_MAXBLOCKSIZE), or
+ * - Span dRAID redundancy groups.
+ */
+static uint64_t
+vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
+    uint64_t max_segment)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	uint64_t ashift = vd->vdev_ashift;
+	uint64_t ndata = vdc->vdc_ndata;
+	uint64_t psize = MIN(P2ROUNDUP(max_segment * ndata, 1 << ashift),
+	    SPA_MAXBLOCKSIZE);
+
+	ASSERT3U(vdev_draid_get_astart(vd, start), ==, start);
+	ASSERT3U(asize % (vdc->vdc_groupwidth << ashift), ==, 0);
+
+	/* Chunks must evenly span all data columns in the group. */
+	psize = (((psize >> ashift) / ndata) * ndata) << ashift;
+	uint64_t chunk_size = MIN(asize, vdev_psize_to_asize(vd, psize));
+
+	/* Reduce the chunk size to the group space remaining. */
+	uint64_t group = vdev_draid_offset_to_group(vd, start);
+	uint64_t left = vdev_draid_group_to_offset(vd, group + 1) - start;
+	chunk_size = MIN(chunk_size, left);
+
+	ASSERT3U(chunk_size % (vdc->vdc_groupwidth << ashift), ==, 0);
+	ASSERT3U(vdev_draid_offset_to_group(vd, start), ==,
+	    vdev_draid_offset_to_group(vd, start + chunk_size - 1));
+
+	return (chunk_size);
+}
+
+/*
+ * Align the start of the metaslab to the group width and slightly reduce
+ * its size to a multiple of the group width.  Since full stripe writes are
+ * required by dRAID this space is unallocable.  Furthermore, aligning the
+ * metaslab start is important for vdev initialize and TRIM which both operate
+ * on metaslab boundaries which vdev_xlate() expects to be aligned.
+ */
+static void
+vdev_draid_metaslab_init(vdev_t *vd, uint64_t *ms_start, uint64_t *ms_size)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+
+	uint64_t sz = vdc->vdc_groupwidth << vd->vdev_ashift;
+	uint64_t astart = vdev_draid_get_astart(vd, *ms_start);
+	uint64_t asize = ((*ms_size - (astart - *ms_start)) / sz) * sz;
+
+	*ms_start = astart;
+	*ms_size = asize;
+
+	ASSERT0(*ms_start % sz);
+	ASSERT0(*ms_size % sz);
+}
+
+/*
+ * Add virtual dRAID spares to the list of valid spares. In order to accomplish
+ * this the existing array must be freed and reallocated with the additional
+ * entries.
+ */
+int
+vdev_draid_spare_create(nvlist_t *nvroot, vdev_t *vd, uint64_t *ndraidp,
+    uint64_t next_vdev_id)
+{
+	uint64_t draid_nspares = 0;
+	uint64_t ndraid = 0;
+	int error;
+
+	for (uint64_t i = 0; i < vd->vdev_children; i++) {
+		vdev_t *cvd = vd->vdev_child[i];
+
+		if (cvd->vdev_ops == &vdev_draid_ops) {
+			vdev_draid_config_t *vdc = cvd->vdev_tsd;
+			draid_nspares += vdc->vdc_nspares;
+			ndraid++;
+		}
+	}
+
+	if (draid_nspares == 0) {
+		*ndraidp = ndraid;
+		return (0);
+	}
+
+	nvlist_t **old_spares, **new_spares;
+	uint_t old_nspares;
+	error = nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &old_spares, &old_nspares);
+	if (error)
+		old_nspares = 0;
+
+	/* Allocate memory and copy of the existing spares. */
+	new_spares = kmem_alloc(sizeof (nvlist_t *) *
+	    (draid_nspares + old_nspares), KM_SLEEP);
+	for (uint_t i = 0; i < old_nspares; i++)
+		new_spares[i] = fnvlist_dup(old_spares[i]);
+
+	/* Add new distributed spares to ZPOOL_CONFIG_SPARES. */
+	uint64_t n = old_nspares;
+	for (uint64_t vdev_id = 0; vdev_id < vd->vdev_children; vdev_id++) {
+		vdev_t *cvd = vd->vdev_child[vdev_id];
+		char path[64];
+
+		if (cvd->vdev_ops != &vdev_draid_ops)
+			continue;
+
+		vdev_draid_config_t *vdc = cvd->vdev_tsd;
+		uint64_t nspares = vdc->vdc_nspares;
+		uint64_t nparity = vdc->vdc_nparity;
+
+		for (uint64_t spare_id = 0; spare_id < nspares; spare_id++) {
+			bzero(path, sizeof (path));
+			(void) snprintf(path, sizeof (path) - 1,
+			    "%s%llu-%llu-%llu", VDEV_TYPE_DRAID,
+			    (u_longlong_t)nparity,
+			    (u_longlong_t)next_vdev_id + vdev_id,
+			    (u_longlong_t)spare_id);
+
+			nvlist_t *spare = fnvlist_alloc();
+			fnvlist_add_string(spare, ZPOOL_CONFIG_PATH, path);
+			fnvlist_add_string(spare, ZPOOL_CONFIG_TYPE,
+			    VDEV_TYPE_DRAID_SPARE);
+			fnvlist_add_uint64(spare, ZPOOL_CONFIG_TOP_GUID,
+			    cvd->vdev_guid);
+			fnvlist_add_uint64(spare, ZPOOL_CONFIG_SPARE_ID,
+			    spare_id);
+			fnvlist_add_uint64(spare, ZPOOL_CONFIG_IS_LOG, 0);
+			fnvlist_add_uint64(spare, ZPOOL_CONFIG_IS_SPARE, 1);
+			fnvlist_add_uint64(spare, ZPOOL_CONFIG_WHOLE_DISK, 1);
+			fnvlist_add_uint64(spare, ZPOOL_CONFIG_ASHIFT,
+			    cvd->vdev_ashift);
+
+			new_spares[n] = spare;
+			n++;
+		}
+	}
+
+	if (n > 0) {
+		(void) nvlist_remove_all(nvroot, ZPOOL_CONFIG_SPARES);
+		fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    new_spares, n);
+	}
+
+	for (int i = 0; i < n; i++)
+		nvlist_free(new_spares[i]);
+
+	kmem_free(new_spares, sizeof (*new_spares) * n);
+	*ndraidp = ndraid;
+
+	return (0);
+}
+
+/*
+ * Determine if any portion of the provided block resides on a child vdev
+ * with a dirty DTL and therefore needs to be resilvered.
+ */
+static boolean_t
+vdev_draid_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
+    uint64_t phys_birth)
+{
+	uint64_t offset = DVA_GET_OFFSET(dva);
+	uint64_t asize = vdev_draid_asize(vd, psize);
+
+	if (phys_birth == TXG_UNKNOWN) {
+		/*
+		 * Sequential resilver.  There is no meaningful phys_birth
+		 * for this block, we can only determine if block resides
+		 * in a degraded group in which case it must be resilvered.
+		 */
+		ASSERT3U(vdev_draid_offset_to_group(vd, offset), ==,
+		    vdev_draid_offset_to_group(vd, offset + asize - 1));
+
+		return (vdev_draid_group_degraded(vd, offset));
+	} else {
+		/*
+		 * Healing resilver.  TXGs not in DTL_PARTIAL are intact,
+		 * as are blocks in non-degraded groups.
+		 */
+		if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
+			return (B_FALSE);
+
+		if (vdev_draid_group_missing(vd, offset, phys_birth, 1))
+			return (B_TRUE);
+
+		/* The block may span groups in which case check both. */
+		if (vdev_draid_offset_to_group(vd, offset) !=
+		    vdev_draid_offset_to_group(vd, offset + asize - 1)) {
+			if (vdev_draid_group_missing(vd,
+			    offset + asize, phys_birth, 1))
+				return (B_TRUE);
+		}
+
+		return (B_FALSE);
+	}
+}
+
+static boolean_t
+vdev_draid_rebuilding(vdev_t *vd)
+{
+	if (vd->vdev_ops->vdev_op_leaf && vd->vdev_rebuild_txg)
+		return (B_TRUE);
+
+	for (int i = 0; i < vd->vdev_children; i++) {
+		if (vdev_draid_rebuilding(vd->vdev_child[i])) {
+			return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+static void
+vdev_draid_io_verify(vdev_t *vd, raidz_row_t *rr, int col)
+{
+#ifdef ZFS_DEBUG
+	range_seg64_t logical_rs, physical_rs, remain_rs;
+	logical_rs.rs_start = rr->rr_offset;
+	logical_rs.rs_end = logical_rs.rs_start +
+	    vdev_draid_asize(vd, rr->rr_size);
+
+	raidz_col_t *rc = &rr->rr_col[col];
+	vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+
+	vdev_xlate(cvd, &logical_rs, &physical_rs, &remain_rs);
+	ASSERT(vdev_xlate_is_empty(&remain_rs));
+	ASSERT3U(rc->rc_offset, ==, physical_rs.rs_start);
+	ASSERT3U(rc->rc_offset, <, physical_rs.rs_end);
+	ASSERT3U(rc->rc_offset + rc->rc_size, ==, physical_rs.rs_end);
+#endif
+}
+
+/*
+ * For write operations:
+ * 1. Generate the parity data
+ * 2. Create child zio write operations to each column's vdev, for both
+ *    data and parity.  A gang ABD is allocated by vdev_draid_map_alloc()
+ *    if a skip sector needs to be added to a column.
+ */
+static void
+vdev_draid_io_start_write(zio_t *zio, raidz_row_t *rr)
+{
+	vdev_t *vd = zio->io_vd;
+	raidz_map_t *rm = zio->io_vsd;
+
+	vdev_raidz_generate_parity_row(rm, rr);
+
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		/*
+		 * Empty columns are zero filled and included in the parity
+		 * calculation and therefore must be written.
+		 */
+		ASSERT3U(rc->rc_size, !=, 0);
+
+		/* Verify physical to logical translation */
+		vdev_draid_io_verify(vd, rr, c);
+
+		zio_nowait(zio_vdev_child_io(zio, NULL,
+		    vd->vdev_child[rc->rc_devidx], rc->rc_offset,
+		    rc->rc_abd, rc->rc_size, zio->io_type, zio->io_priority,
+		    0, vdev_raidz_child_done, rc));
+	}
+}
+
+/*
+ * For read operations:
+ * 1. The vdev_draid_map_alloc() function will create a minimal raidz
+ *    mapping for the read based on the zio->io_flags.  There are two
+ *    possible mappings either 1) a normal read, or 2) a scrub/resilver.
+ * 2. Create the zio read operations.  This will include all parity
+ *    columns and skip sectors for a scrub/resilver.
+ */
+static void
+vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
+{
+	vdev_t *vd = zio->io_vd;
+
+	/* Sequential rebuild must do IO at redundancy group boundary. */
+	IMPLY(zio->io_priority == ZIO_PRIORITY_REBUILD, rr->rr_nempty == 0);
+
+	/*
+	 * Iterate over the columns in reverse order so that we hit the parity
+	 * last.  Any errors along the way will force us to read the parity.
+	 * For scrub/resilver IOs which verify skip sectors, a gang ABD will
+	 * have been allocated to store them and rc->rc_size is increased.
+	 */
+	for (int c = rr->rr_cols - 1; c >= 0; c--) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+
+		if (!vdev_draid_readable(cvd, rc->rc_offset)) {
+			if (c >= rr->rr_firstdatacol)
+				rr->rr_missingdata++;
+			else
+				rr->rr_missingparity++;
+			rc->rc_error = SET_ERROR(ENXIO);
+			rc->rc_tried = 1;
+			rc->rc_skipped = 1;
+			continue;
+		}
+
+		if (vdev_draid_missing(cvd, rc->rc_offset, zio->io_txg, 1)) {
+			if (c >= rr->rr_firstdatacol)
+				rr->rr_missingdata++;
+			else
+				rr->rr_missingparity++;
+			rc->rc_error = SET_ERROR(ESTALE);
+			rc->rc_skipped = 1;
+			continue;
+		}
+
+		/*
+		 * Empty columns may be read during vdev_draid_io_done().
+		 * Only skip them after the readable and missing checks
+		 * verify they are available.
+		 */
+		if (rc->rc_size == 0) {
+			rc->rc_skipped = 1;
+			continue;
+		}
+
+		if (zio->io_flags & ZIO_FLAG_RESILVER) {
+			vdev_t *svd;
+
+			/*
+			 * Sequential rebuilds need to always consider the data
+			 * on the child being rebuilt to be stale.  This is
+			 * important when all columns are available to aid
+			 * known reconstruction in identifing which columns
+			 * contain incorrect data.
+			 *
+			 * Furthermore, all repairs need to be constrained to
+			 * the devices being rebuilt because without a checksum
+			 * we cannot verify the data is actually correct and
+			 * performing an incorrect repair could result in
+			 * locking in damage and making the data unrecoverable.
+			 */
+			if (zio->io_priority == ZIO_PRIORITY_REBUILD) {
+				if (vdev_draid_rebuilding(cvd)) {
+					if (c >= rr->rr_firstdatacol)
+						rr->rr_missingdata++;
+					else
+						rr->rr_missingparity++;
+					rc->rc_error = SET_ERROR(ESTALE);
+					rc->rc_skipped = 1;
+					rc->rc_allow_repair = 1;
+					continue;
+				} else {
+					rc->rc_allow_repair = 0;
+				}
+			} else {
+				rc->rc_allow_repair = 1;
+			}
+
+			/*
+			 * If this child is a distributed spare then the
+			 * offset might reside on the vdev being replaced.
+			 * In which case this data must be written to the
+			 * new device.  Failure to do so would result in
+			 * checksum errors when the old device is detached
+			 * and the pool is scrubbed.
+			 */
+			if ((svd = vdev_draid_find_spare(cvd)) != NULL) {
+				svd = vdev_draid_spare_get_child(svd,
+				    rc->rc_offset);
+				if (svd && (svd->vdev_ops == &vdev_spare_ops ||
+				    svd->vdev_ops == &vdev_replacing_ops)) {
+					rc->rc_force_repair = 1;
+
+					if (vdev_draid_rebuilding(svd))
+						rc->rc_allow_repair = 1;
+				}
+			}
+
+			/*
+			 * Always issue a repair IO to this child when its
+			 * a spare or replacing vdev with an active rebuild.
+			 */
+			if ((cvd->vdev_ops == &vdev_spare_ops ||
+			    cvd->vdev_ops == &vdev_replacing_ops) &&
+			    vdev_draid_rebuilding(cvd)) {
+				rc->rc_force_repair = 1;
+				rc->rc_allow_repair = 1;
+			}
+		}
+	}
+
+	/*
+	 * Either a parity or data column is missing this means a repair
+	 * may be attempted by vdev_draid_io_done().  Expand the raid map
+	 * to read in empty columns which are needed along with the parity
+	 * during reconstruction.
+	 */
+	if ((rr->rr_missingdata > 0 || rr->rr_missingparity > 0) &&
+	    rr->rr_nempty > 0 && rr->rr_abd_empty == NULL) {
+		vdev_draid_map_alloc_empty(zio, rr);
+	}
+
+	for (int c = rr->rr_cols - 1; c >= 0; c--) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+
+		if (rc->rc_error || rc->rc_size == 0)
+			continue;
+
+		if (c >= rr->rr_firstdatacol || rr->rr_missingdata > 0 ||
+		    (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
+			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
+			    rc->rc_offset, rc->rc_abd, rc->rc_size,
+			    zio->io_type, zio->io_priority, 0,
+			    vdev_raidz_child_done, rc));
+		}
+	}
+}
+
+/*
+ * Start an IO operation to a dRAID vdev.
+ */
+static void
+vdev_draid_io_start(zio_t *zio)
+{
+	vdev_t *vd __maybe_unused = zio->io_vd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+	ASSERT3U(zio->io_offset, ==, vdev_draid_get_astart(vd, zio->io_offset));
+
+	raidz_map_t *rm = vdev_draid_map_alloc(zio);
+	zio->io_vsd = rm;
+	zio->io_vsd_ops = &vdev_raidz_vsd_ops;
+
+	if (zio->io_type == ZIO_TYPE_WRITE) {
+		for (int i = 0; i < rm->rm_nrows; i++) {
+			vdev_draid_io_start_write(zio, rm->rm_row[i]);
+		}
+	} else {
+		ASSERT(zio->io_type == ZIO_TYPE_READ);
+
+		for (int i = 0; i < rm->rm_nrows; i++) {
+			vdev_draid_io_start_read(zio, rm->rm_row[i]);
+		}
+	}
+
+	zio_execute(zio);
+}
+
+/*
+ * Complete an IO operation on a dRAID vdev.  The raidz logic can be applied
+ * to dRAID since the layout is fully described by the raidz_map_t.
+ */
+static void
+vdev_draid_io_done(zio_t *zio)
+{
+	vdev_raidz_io_done(zio);
+}
+
+static void
+vdev_draid_state_change(vdev_t *vd, int faulted, int degraded)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+	ASSERT(vd->vdev_ops == &vdev_draid_ops);
+
+	if (faulted > vdc->vdc_nparity)
+		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_NO_REPLICAS);
+	else if (degraded + faulted != 0)
+		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
+	else
+		vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
+}
+
+static void
+vdev_draid_xlate(vdev_t *cvd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
+{
+	vdev_t *raidvd = cvd->vdev_parent;
+	ASSERT(raidvd->vdev_ops == &vdev_draid_ops);
+
+	vdev_draid_config_t *vdc = raidvd->vdev_tsd;
+	uint64_t ashift = raidvd->vdev_top->vdev_ashift;
+
+	/* Make sure the offsets are block-aligned */
+	ASSERT0(logical_rs->rs_start % (1 << ashift));
+	ASSERT0(logical_rs->rs_end % (1 << ashift));
+
+	uint64_t logical_start = logical_rs->rs_start;
+	uint64_t logical_end = logical_rs->rs_end;
+
+	/*
+	 * Unaligned ranges must be skipped. All metaslabs are correctly
+	 * aligned so this should not happen, but this case is handled in
+	 * case it's needed by future callers.
+	 */
+	uint64_t astart = vdev_draid_get_astart(raidvd, logical_start);
+	if (astart != logical_start) {
+		physical_rs->rs_start = logical_start;
+		physical_rs->rs_end = logical_start;
+		remain_rs->rs_start = MIN(astart, logical_end);
+		remain_rs->rs_end = logical_end;
+		return;
+	}
+
+	/*
+	 * Unlike with mirrors and raidz a dRAID logical range can map
+	 * to multiple non-contiguous physical ranges. This is handled by
+	 * limiting the size of the logical range to a single group and
+	 * setting the remain argument such that it describes the remaining
+	 * unmapped logical range. This is stricter than absolutely
+	 * necessary but helps simplify the logic below.
+	 */
+	uint64_t group = vdev_draid_offset_to_group(raidvd, logical_start);
+	uint64_t nextstart = vdev_draid_group_to_offset(raidvd, group + 1);
+	if (logical_end > nextstart)
+		logical_end = nextstart;
+
+	/* Find the starting offset for each vdev in the group */
+	uint64_t perm, groupstart;
+	uint64_t start = vdev_draid_logical_to_physical(raidvd,
+	    logical_start, &perm, &groupstart);
+	uint64_t end = start;
+
+	uint8_t *base;
+	uint64_t iter, id;
+	vdev_draid_get_perm(vdc, perm, &base, &iter);
+
+	/*
+	 * Check if the passed child falls within the group.  If it does
+	 * update the start and end to reflect the physical range.
+	 * Otherwise, leave them unmodified which will result in an empty
+	 * (zero-length) physical range being returned.
+	 */
+	for (uint64_t i = 0; i < vdc->vdc_groupwidth; i++) {
+		uint64_t c = (groupstart + i) % vdc->vdc_ndisks;
+
+		if (c == 0 && i != 0) {
+			/* the group wrapped, increment the start */
+			start += VDEV_DRAID_ROWHEIGHT;
+			end = start;
+		}
+
+		id = vdev_draid_permute_id(vdc, base, iter, c);
+		if (id == cvd->vdev_id) {
+			uint64_t b_size = (logical_end >> ashift) -
+			    (logical_start >> ashift);
+			ASSERT3U(b_size, >, 0);
+			end = start + ((((b_size - 1) /
+			    vdc->vdc_groupwidth) + 1) << ashift);
+			break;
+		}
+	}
+	physical_rs->rs_start = start;
+	physical_rs->rs_end = end;
+
+	/*
+	 * Only top-level vdevs are allowed to set remain_rs because
+	 * when .vdev_op_xlate() is called for their children the full
+	 * logical range is not provided by vdev_xlate().
+	 */
+	remain_rs->rs_start = logical_end;
+	remain_rs->rs_end = logical_rs->rs_end;
+
+	ASSERT3U(physical_rs->rs_start, <=, logical_start);
+	ASSERT3U(physical_rs->rs_end - physical_rs->rs_start, <=,
+	    logical_end - logical_start);
+}
+
+/*
+ * Add dRAID specific fields to the config nvlist.
+ */
+static void
+vdev_draid_config_generate(vdev_t *vd, nvlist_t *nv)
+{
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vdc->vdc_nparity);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, vdc->vdc_ndata);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, vdc->vdc_nspares);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, vdc->vdc_ngroups);
+}
+
+/*
+ * Initialize private dRAID specific fields from the nvlist.
+ */
+static int
+vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd)
+{
+	(void) spa;
+	uint64_t ndata, nparity, nspares, ngroups;
+	int error;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, &ndata))
+		return (SET_ERROR(EINVAL));
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &nparity) ||
+	    nparity == 0 || nparity > VDEV_DRAID_MAXPARITY) {
+		return (SET_ERROR(EINVAL));
+	}
+
+	uint_t children;
+	nvlist_t **child;
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0 || children == 0 ||
+	    children > VDEV_DRAID_MAX_CHILDREN) {
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, &nspares) ||
+	    nspares > 100 || nspares > (children - (ndata + nparity))) {
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, &ngroups) ||
+	    ngroups == 0 || ngroups > VDEV_DRAID_MAX_CHILDREN) {
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Validate the minimum number of children exist per group for the
+	 * specified parity level (draid1 >= 2, draid2 >= 3, draid3 >= 4).
+	 */
+	if (children < (ndata + nparity + nspares))
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * Create the dRAID configuration using the pool nvlist configuration
+	 * and the fixed mapping for the correct number of children.
+	 */
+	vdev_draid_config_t *vdc;
+	const draid_map_t *map;
+
+	error = vdev_draid_lookup_map(children, &map);
+	if (error)
+		return (SET_ERROR(EINVAL));
+
+	vdc = kmem_zalloc(sizeof (*vdc), KM_SLEEP);
+	vdc->vdc_ndata = ndata;
+	vdc->vdc_nparity = nparity;
+	vdc->vdc_nspares = nspares;
+	vdc->vdc_children = children;
+	vdc->vdc_ngroups = ngroups;
+	vdc->vdc_nperms = map->dm_nperms;
+
+	error = vdev_draid_generate_perms(map, &vdc->vdc_perms);
+	if (error) {
+		kmem_free(vdc, sizeof (*vdc));
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Derived constants.
+	 */
+	vdc->vdc_groupwidth = vdc->vdc_ndata + vdc->vdc_nparity;
+	vdc->vdc_ndisks = vdc->vdc_children - vdc->vdc_nspares;
+	vdc->vdc_groupsz = vdc->vdc_groupwidth * VDEV_DRAID_ROWHEIGHT;
+	vdc->vdc_devslicesz = (vdc->vdc_groupsz * vdc->vdc_ngroups) /
+	    vdc->vdc_ndisks;
+
+	ASSERT3U(vdc->vdc_groupwidth, >=, 2);
+	ASSERT3U(vdc->vdc_groupwidth, <=, vdc->vdc_ndisks);
+	ASSERT3U(vdc->vdc_groupsz, >=, 2 * VDEV_DRAID_ROWHEIGHT);
+	ASSERT3U(vdc->vdc_devslicesz, >=, VDEV_DRAID_ROWHEIGHT);
+	ASSERT3U(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT, ==, 0);
+	ASSERT3U((vdc->vdc_groupwidth * vdc->vdc_ngroups) %
+	    vdc->vdc_ndisks, ==, 0);
+
+	*tsd = vdc;
+
+	return (0);
+}
+
+static void
+vdev_draid_fini(vdev_t *vd)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	vmem_free(vdc->vdc_perms, sizeof (uint8_t) *
+	    vdc->vdc_children * vdc->vdc_nperms);
+	kmem_free(vdc, sizeof (*vdc));
+}
+
+static uint64_t
+vdev_draid_nparity(vdev_t *vd)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	return (vdc->vdc_nparity);
+}
+
+static uint64_t
+vdev_draid_ndisks(vdev_t *vd)
+{
+	vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+	return (vdc->vdc_ndisks);
+}
+
+vdev_ops_t vdev_draid_ops = {
+	.vdev_op_init = vdev_draid_init,
+	.vdev_op_fini = vdev_draid_fini,
+	.vdev_op_open = vdev_draid_open,
+	.vdev_op_close = vdev_draid_close,
+	.vdev_op_asize = vdev_draid_asize,
+	.vdev_op_min_asize = vdev_draid_min_asize,
+	.vdev_op_min_alloc = vdev_draid_min_alloc,
+	.vdev_op_io_start = vdev_draid_io_start,
+	.vdev_op_io_done = vdev_draid_io_done,
+	.vdev_op_state_change = vdev_draid_state_change,
+	.vdev_op_need_resilver = vdev_draid_need_resilver,
+	.vdev_op_hold = NULL,
+	.vdev_op_rele = NULL,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_draid_xlate,
+	.vdev_op_rebuild_asize = vdev_draid_rebuild_asize,
+	.vdev_op_metaslab_init = vdev_draid_metaslab_init,
+	.vdev_op_config_generate = vdev_draid_config_generate,
+	.vdev_op_nparity = vdev_draid_nparity,
+	.vdev_op_ndisks = vdev_draid_ndisks,
+	.vdev_op_type = VDEV_TYPE_DRAID,
+	.vdev_op_leaf = B_FALSE,
+};
+
+
+/*
+ * A dRAID distributed spare is a virtual leaf vdev which is included in the
+ * parent dRAID configuration.  The last N columns of the dRAID permutation
+ * table are used to determine on which dRAID children a specific offset
+ * should be written.  These spare leaf vdevs can only be used to replace
+ * faulted children in the same dRAID configuration.
+ */
+
+/*
+ * Distributed spare state.  All fields are set when the distributed spare is
+ * first opened and are immutable.
+ */
+typedef struct {
+	vdev_t *vds_draid_vdev;		/* top-level parent dRAID vdev */
+	uint64_t vds_top_guid;		/* top-level parent dRAID guid */
+	uint64_t vds_spare_id;		/* spare id (0 - vdc->vdc_nspares-1) */
+} vdev_draid_spare_t;
+
+/*
+ * Returns the parent dRAID vdev to which the distributed spare belongs.
+ * This may be safely called even when the vdev is not open.
+ */
+vdev_t *
+vdev_draid_spare_get_parent(vdev_t *vd)
+{
+	vdev_draid_spare_t *vds = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_spare_ops);
+
+	if (vds->vds_draid_vdev != NULL)
+		return (vds->vds_draid_vdev);
+
+	return (vdev_lookup_by_guid(vd->vdev_spa->spa_root_vdev,
+	    vds->vds_top_guid));
+}
+
+/*
+ * A dRAID space is active when it's the child of a vdev using the
+ * vdev_spare_ops, vdev_replacing_ops or vdev_draid_ops.
+ */
+static boolean_t
+vdev_draid_spare_is_active(vdev_t *vd)
+{
+	vdev_t *pvd = vd->vdev_parent;
+
+	if (pvd != NULL && (pvd->vdev_ops == &vdev_spare_ops ||
+	    pvd->vdev_ops == &vdev_replacing_ops ||
+	    pvd->vdev_ops == &vdev_draid_ops)) {
+		return (B_TRUE);
+	} else {
+		return (B_FALSE);
+	}
+}
+
+/*
+ * Given a dRAID distribute spare vdev, returns the physical child vdev
+ * on which the provided offset resides.  This may involve recursing through
+ * multiple layers of distributed spares.  Note that offset is relative to
+ * this vdev.
+ */
+vdev_t *
+vdev_draid_spare_get_child(vdev_t *vd, uint64_t physical_offset)
+{
+	vdev_draid_spare_t *vds = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_spare_ops);
+
+	/* The vdev is closed */
+	if (vds->vds_draid_vdev == NULL)
+		return (NULL);
+
+	vdev_t *tvd = vds->vds_draid_vdev;
+	vdev_draid_config_t *vdc = tvd->vdev_tsd;
+
+	ASSERT3P(tvd->vdev_ops, ==, &vdev_draid_ops);
+	ASSERT3U(vds->vds_spare_id, <, vdc->vdc_nspares);
+
+	uint8_t *base;
+	uint64_t iter;
+	uint64_t perm = physical_offset / vdc->vdc_devslicesz;
+
+	vdev_draid_get_perm(vdc, perm, &base, &iter);
+
+	uint64_t cid = vdev_draid_permute_id(vdc, base, iter,
+	    (tvd->vdev_children - 1) - vds->vds_spare_id);
+	vdev_t *cvd = tvd->vdev_child[cid];
+
+	if (cvd->vdev_ops == &vdev_draid_spare_ops)
+		return (vdev_draid_spare_get_child(cvd, physical_offset));
+
+	return (cvd);
+}
+
+static void
+vdev_draid_spare_close(vdev_t *vd)
+{
+	vdev_draid_spare_t *vds = vd->vdev_tsd;
+	vds->vds_draid_vdev = NULL;
+}
+
+/*
+ * Opening a dRAID spare device is done by looking up the associated dRAID
+ * top-level vdev guid from the spare configuration.
+ */
+static int
+vdev_draid_spare_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
+{
+	vdev_draid_spare_t *vds = vd->vdev_tsd;
+	vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
+	uint64_t asize, max_asize;
+
+	vdev_t *tvd = vdev_lookup_by_guid(rvd, vds->vds_top_guid);
+	if (tvd == NULL) {
+		/*
+		 * When spa_vdev_add() is labeling new spares the
+		 * associated dRAID is not attached to the root vdev
+		 * nor does this spare have a parent.  Simulate a valid
+		 * device in order to allow the label to be initialized
+		 * and the distributed spare added to the configuration.
+		 */
+		if (vd->vdev_parent == NULL) {
+			*psize = *max_psize = SPA_MINDEVSIZE;
+			*logical_ashift = *physical_ashift = ASHIFT_MIN;
+			return (0);
+		}
+
+		return (SET_ERROR(EINVAL));
+	}
+
+	vdev_draid_config_t *vdc = tvd->vdev_tsd;
+	if (tvd->vdev_ops != &vdev_draid_ops || vdc == NULL)
+		return (SET_ERROR(EINVAL));
+
+	if (vds->vds_spare_id >= vdc->vdc_nspares)
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * Neither tvd->vdev_asize or tvd->vdev_max_asize can be used here
+	 * because the caller may be vdev_draid_open() in which case the
+	 * values are stale as they haven't yet been updated by vdev_open().
+	 * To avoid this always recalculate the dRAID asize and max_asize.
+	 */
+	vdev_draid_calculate_asize(tvd, &asize, &max_asize,
+	    logical_ashift, physical_ashift);
+
+	*psize = asize + VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
+	*max_psize = max_asize + VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
+
+	vds->vds_draid_vdev = tvd;
+
+	return (0);
+}
+
+/*
+ * Completed distributed spare IO.  Store the result in the parent zio
+ * as if it had performed the operation itself.  Only the first error is
+ * preserved if there are multiple errors.
+ */
+static void
+vdev_draid_spare_child_done(zio_t *zio)
+{
+	zio_t *pio = zio->io_private;
+
+	/*
+	 * IOs are issued to non-writable vdevs in order to keep their
+	 * DTLs accurate.  However, we don't want to propagate the
+	 * error in to the distributed spare's DTL.  When resilvering
+	 * vdev_draid_need_resilver() will consult the relevant DTL
+	 * to determine if the data is missing and must be repaired.
+	 */
+	if (!vdev_writeable(zio->io_vd))
+		return;
+
+	if (pio->io_error == 0)
+		pio->io_error = zio->io_error;
+}
+
+/*
+ * Returns a valid label nvlist for the distributed spare vdev.  This is
+ * used to bypass the IO pipeline to avoid the complexity of constructing
+ * a complete label with valid checksum to return when read.
+ */
+nvlist_t *
+vdev_draid_read_config_spare(vdev_t *vd)
+{
+	spa_t *spa = vd->vdev_spa;
+	spa_aux_vdev_t *sav = &spa->spa_spares;
+	uint64_t guid = vd->vdev_guid;
+
+	nvlist_t *nv = fnvlist_alloc();
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_VERSION, spa_version(spa));
+	fnvlist_add_string(nv, ZPOOL_CONFIG_POOL_NAME, spa_name(spa));
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa));
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_POOL_STATE,
+	    vdev_draid_spare_is_active(vd) ?
+	    POOL_STATE_ACTIVE : POOL_STATE_SPARE);
+
+	/* Set the vdev guid based on the vdev list in sav_count. */
+	for (int i = 0; i < sav->sav_count; i++) {
+		if (sav->sav_vdevs[i]->vdev_ops == &vdev_draid_spare_ops &&
+		    strcmp(sav->sav_vdevs[i]->vdev_path, vd->vdev_path) == 0) {
+			guid = sav->sav_vdevs[i]->vdev_guid;
+			break;
+		}
+	}
+
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, guid);
+
+	return (nv);
+}
+
+/*
+ * Handle any ioctl requested of the distributed spare.  Only flushes
+ * are supported in which case all children must be flushed.
+ */
+static int
+vdev_draid_spare_ioctl(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+	int error = 0;
+
+	if (zio->io_cmd == DKIOCFLUSHWRITECACHE) {
+		for (int c = 0; c < vd->vdev_children; c++) {
+			zio_nowait(zio_vdev_child_io(zio, NULL,
+			    vd->vdev_child[c], zio->io_offset, zio->io_abd,
+			    zio->io_size, zio->io_type, zio->io_priority, 0,
+			    vdev_draid_spare_child_done, zio));
+		}
+	} else {
+		error = SET_ERROR(ENOTSUP);
+	}
+
+	return (error);
+}
+
+/*
+ * Initiate an IO to the distributed spare.  For normal IOs this entails using
+ * the zio->io_offset and permutation table to calculate which child dRAID vdev
+ * is responsible for the data.  Then passing along the zio to that child to
+ * perform the actual IO.  The label ranges are not stored on disk and require
+ * some special handling which is described below.
+ */
+static void
+vdev_draid_spare_io_start(zio_t *zio)
+{
+	vdev_t *cvd = NULL, *vd = zio->io_vd;
+	vdev_draid_spare_t *vds = vd->vdev_tsd;
+	uint64_t offset = zio->io_offset - VDEV_LABEL_START_SIZE;
+
+	/*
+	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
+	 * Nothing to be done here but return failure.
+	 */
+	if (vds == NULL) {
+		zio->io_error = ENXIO;
+		zio_interrupt(zio);
+		return;
+	}
+
+	switch (zio->io_type) {
+	case ZIO_TYPE_IOCTL:
+		zio->io_error = vdev_draid_spare_ioctl(zio);
+		break;
+
+	case ZIO_TYPE_WRITE:
+		if (VDEV_OFFSET_IS_LABEL(vd, zio->io_offset)) {
+			/*
+			 * Accept probe IOs and config writers to simulate the
+			 * existence of an on disk label.  vdev_label_sync(),
+			 * vdev_uberblock_sync() and vdev_copy_uberblocks()
+			 * skip the distributed spares.  This only leaves
+			 * vdev_label_init() which is allowed to succeed to
+			 * avoid adding special cases the function.
+			 */
+			if (zio->io_flags & ZIO_FLAG_PROBE ||
+			    zio->io_flags & ZIO_FLAG_CONFIG_WRITER) {
+				zio->io_error = 0;
+			} else {
+				zio->io_error = SET_ERROR(EIO);
+			}
+		} else {
+			cvd = vdev_draid_spare_get_child(vd, offset);
+
+			if (cvd == NULL) {
+				zio->io_error = SET_ERROR(ENXIO);
+			} else {
+				zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
+				    offset, zio->io_abd, zio->io_size,
+				    zio->io_type, zio->io_priority, 0,
+				    vdev_draid_spare_child_done, zio));
+			}
+		}
+		break;
+
+	case ZIO_TYPE_READ:
+		if (VDEV_OFFSET_IS_LABEL(vd, zio->io_offset)) {
+			/*
+			 * Accept probe IOs to simulate the existence of a
+			 * label.  vdev_label_read_config() bypasses the
+			 * pipeline to read the label configuration and
+			 * vdev_uberblock_load() skips distributed spares
+			 * when attempting to locate the best uberblock.
+			 */
+			if (zio->io_flags & ZIO_FLAG_PROBE) {
+				zio->io_error = 0;
+			} else {
+				zio->io_error = SET_ERROR(EIO);
+			}
+		} else {
+			cvd = vdev_draid_spare_get_child(vd, offset);
+
+			if (cvd == NULL || !vdev_readable(cvd)) {
+				zio->io_error = SET_ERROR(ENXIO);
+			} else {
+				zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
+				    offset, zio->io_abd, zio->io_size,
+				    zio->io_type, zio->io_priority, 0,
+				    vdev_draid_spare_child_done, zio));
+			}
+		}
+		break;
+
+	case ZIO_TYPE_TRIM:
+		/* The vdev label ranges are never trimmed */
+		ASSERT0(VDEV_OFFSET_IS_LABEL(vd, zio->io_offset));
+
+		cvd = vdev_draid_spare_get_child(vd, offset);
+
+		if (cvd == NULL || !cvd->vdev_has_trim) {
+			zio->io_error = SET_ERROR(ENXIO);
+		} else {
+			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
+			    offset, zio->io_abd, zio->io_size,
+			    zio->io_type, zio->io_priority, 0,
+			    vdev_draid_spare_child_done, zio));
+		}
+		break;
+
+	default:
+		zio->io_error = SET_ERROR(ENOTSUP);
+		break;
+	}
+
+	zio_execute(zio);
+}
+
+static void
+vdev_draid_spare_io_done(zio_t *zio)
+{
+	(void) zio;
+}
+
+/*
+ * Lookup the full spare config in spa->spa_spares.sav_config and
+ * return the top_guid and spare_id for the named spare.
+ */
+static int
+vdev_draid_spare_lookup(spa_t *spa, nvlist_t *nv, uint64_t *top_guidp,
+    uint64_t *spare_idp)
+{
+	nvlist_t **spares;
+	uint_t nspares;
+	int error;
+
+	if ((spa->spa_spares.sav_config == NULL) ||
+	    (nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+	    ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0)) {
+		return (SET_ERROR(ENOENT));
+	}
+
+	char *spare_name;
+	error = nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &spare_name);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
+
+	for (int i = 0; i < nspares; i++) {
+		nvlist_t *spare = spares[i];
+		uint64_t top_guid, spare_id;
+		char *type, *path;
+
+		/* Skip non-distributed spares */
+		error = nvlist_lookup_string(spare, ZPOOL_CONFIG_TYPE, &type);
+		if (error != 0 || strcmp(type, VDEV_TYPE_DRAID_SPARE) != 0)
+			continue;
+
+		/* Skip spares with the wrong name */
+		error = nvlist_lookup_string(spare, ZPOOL_CONFIG_PATH, &path);
+		if (error != 0 || strcmp(path, spare_name) != 0)
+			continue;
+
+		/* Found the matching spare */
+		error = nvlist_lookup_uint64(spare,
+		    ZPOOL_CONFIG_TOP_GUID, &top_guid);
+		if (error == 0) {
+			error = nvlist_lookup_uint64(spare,
+			    ZPOOL_CONFIG_SPARE_ID, &spare_id);
+		}
+
+		if (error != 0) {
+			return (SET_ERROR(EINVAL));
+		} else {
+			*top_guidp = top_guid;
+			*spare_idp = spare_id;
+			return (0);
+		}
+	}
+
+	return (SET_ERROR(ENOENT));
+}
+
+/*
+ * Initialize private dRAID spare specific fields from the nvlist.
+ */
+static int
+vdev_draid_spare_init(spa_t *spa, nvlist_t *nv, void **tsd)
+{
+	vdev_draid_spare_t *vds;
+	uint64_t top_guid = 0;
+	uint64_t spare_id;
+
+	/*
+	 * In the normal case check the list of spares stored in the spa
+	 * to lookup the top_guid and spare_id for provided spare config.
+	 * When creating a new pool or adding vdevs the spare list is not
+	 * yet populated and the values are provided in the passed config.
+	 */
+	if (vdev_draid_spare_lookup(spa, nv, &top_guid, &spare_id) != 0) {
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_TOP_GUID,
+		    &top_guid) != 0)
+			return (SET_ERROR(EINVAL));
+
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_SPARE_ID,
+		    &spare_id) != 0)
+			return (SET_ERROR(EINVAL));
+	}
+
+	vds = kmem_alloc(sizeof (vdev_draid_spare_t), KM_SLEEP);
+	vds->vds_draid_vdev = NULL;
+	vds->vds_top_guid = top_guid;
+	vds->vds_spare_id = spare_id;
+
+	*tsd = vds;
+
+	return (0);
+}
+
+static void
+vdev_draid_spare_fini(vdev_t *vd)
+{
+	kmem_free(vd->vdev_tsd, sizeof (vdev_draid_spare_t));
+}
+
+static void
+vdev_draid_spare_config_generate(vdev_t *vd, nvlist_t *nv)
+{
+	vdev_draid_spare_t *vds = vd->vdev_tsd;
+
+	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_spare_ops);
+
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID, vds->vds_top_guid);
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_SPARE_ID, vds->vds_spare_id);
+}
+
+vdev_ops_t vdev_draid_spare_ops = {
+	.vdev_op_init = vdev_draid_spare_init,
+	.vdev_op_fini = vdev_draid_spare_fini,
+	.vdev_op_open = vdev_draid_spare_open,
+	.vdev_op_close = vdev_draid_spare_close,
+	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
+	.vdev_op_io_start = vdev_draid_spare_io_start,
+	.vdev_op_io_done = vdev_draid_spare_io_done,
+	.vdev_op_state_change = NULL,
+	.vdev_op_need_resilver = NULL,
+	.vdev_op_hold = NULL,
+	.vdev_op_rele = NULL,
+	.vdev_op_remap = NULL,
+	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = vdev_draid_spare_config_generate,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
+	.vdev_op_type = VDEV_TYPE_DRAID_SPARE,
+	.vdev_op_leaf = B_TRUE,
+};

diff --git a/zfs/module/zfs/vdev_draid_rand.c b/zfs/module/zfs/vdev_draid_rand.c
new file mode 100644
index 0000000..fe1a75c
--- /dev/null
+++ b/zfs/module/zfs/vdev_draid_rand.c

@@ -0,0 +1,40 @@
+/*
+ * Xorshift Pseudo Random Number Generator based on work by David Blackman
+ * and Sebastiano Vigna (vigna@acm.org).
+ *
+ *   "Further scramblings of Marsaglia's xorshift generators"
+ *   http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ *   http://prng.di.unimi.it/xoroshiro128plusplus.c
+ *
+ * To the extent possible under law, the author has dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide. This software is distributed without any warranty.
+ *
+ * See <http://creativecommons.org/publicdomain/zero/1.0/>.
+ *
+ * This is xoroshiro128++ 1.0, one of our all-purpose, rock-solid,
+ * small-state generators. It is extremely (sub-ns) fast and it passes all
+ * tests we are aware of, but its state space is large enough only for
+ * mild parallelism.
+ */
+
+#include <sys/vdev_draid.h>
+
+static inline uint64_t rotl(const uint64_t x, int k)
+{
+	return (x << k) | (x >> (64 - k));
+}
+
+uint64_t
+vdev_draid_rand(uint64_t *s)
+{
+	const uint64_t s0 = s[0];
+	uint64_t s1 = s[1];
+	const uint64_t result = rotl(s0 + s1, 17) + s0;
+
+	s1 ^= s0;
+	s[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b
+	s[1] = rotl(s1, 28); // c
+
+	return (result);
+}

diff --git a/zfs/module/zfs/vdev_file.c b/zfs/module/zfs/vdev_file.c
deleted file mode 100644
index b79017f..0000000
--- a/zfs/module/zfs/vdev_file.c
+++ /dev/null

@@ -1,331 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_file.h>
-#include <sys/vdev_impl.h>
-#include <sys/vdev_trim.h>
-#include <sys/zio.h>
-#include <sys/fs/zfs.h>
-#include <sys/fm/fs/zfs.h>
-#include <sys/abd.h>
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-
-/*
- * Virtual device vector for files.
- */
-
-static taskq_t *vdev_file_taskq;
-
-static void
-vdev_file_hold(vdev_t *vd)
-{
-	ASSERT(vd->vdev_path != NULL);
-}
-
-static void
-vdev_file_rele(vdev_t *vd)
-{
-	ASSERT(vd->vdev_path != NULL);
-}
-
-static int
-vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
-{
-	vdev_file_t *vf;
-	vnode_t *vp;
-	vattr_t vattr;
-	int error;
-
-	/*
-	 * Rotational optimizations only make sense on block devices.
-	 */
-	vd->vdev_nonrot = B_TRUE;
-
-	/*
-	 * Allow TRIM on file based vdevs.  This may not always be supported,
-	 * since it depends on your kernel version and underlying filesystem
-	 * type but it is always safe to attempt.
-	 */
-	vd->vdev_has_trim = B_TRUE;
-
-	/*
-	 * Disable secure TRIM on file based vdevs.  There is no way to
-	 * request this behavior from the underlying filesystem.
-	 */
-	vd->vdev_has_securetrim = B_FALSE;
-
-	/*
-	 * We must have a pathname, and it must be absolute.
-	 */
-	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
-		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Reopen the device if it's not currently open.  Otherwise,
-	 * just update the physical size of the device.
-	 */
-	if (vd->vdev_tsd != NULL) {
-		ASSERT(vd->vdev_reopening);
-		vf = vd->vdev_tsd;
-		goto skip_open;
-	}
-
-	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
-
-	/*
-	 * We always open the files from the root of the global zone, even if
-	 * we're in a local zone.  If the user has gotten to this point, the
-	 * administrator has already decided that the pool should be available
-	 * to local zone users, so the underlying devices should be as well.
-	 */
-	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
-	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
-	    spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
-
-	if (error) {
-		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (error);
-	}
-
-	vf->vf_vnode = vp;
-
-#ifdef _KERNEL
-	/*
-	 * Make sure it's a regular file.
-	 */
-	if (vp->v_type != VREG) {
-		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (SET_ERROR(ENODEV));
-	}
-#endif
-
-skip_open:
-	/*
-	 * Determine the physical size of the file.
-	 */
-	vattr.va_mask = AT_SIZE;
-	error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
-	if (error) {
-		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (error);
-	}
-
-	*max_psize = *psize = vattr.va_size;
-	*ashift = SPA_MINBLOCKSHIFT;
-
-	return (0);
-}
-
-static void
-vdev_file_close(vdev_t *vd)
-{
-	vdev_file_t *vf = vd->vdev_tsd;
-
-	if (vd->vdev_reopening || vf == NULL)
-		return;
-
-	if (vf->vf_vnode != NULL) {
-		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
-		(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
-		    kcred, NULL);
-	}
-
-	vd->vdev_delayed_close = B_FALSE;
-	kmem_free(vf, sizeof (vdev_file_t));
-	vd->vdev_tsd = NULL;
-}
-
-static void
-vdev_file_io_strategy(void *arg)
-{
-	zio_t *zio = (zio_t *)arg;
-	vdev_t *vd = zio->io_vd;
-	vdev_file_t *vf = vd->vdev_tsd;
-	ssize_t resid;
-	void *buf;
-
-	if (zio->io_type == ZIO_TYPE_READ)
-		buf = abd_borrow_buf(zio->io_abd, zio->io_size);
-	else
-		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
-
-	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
-	    UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size,
-	    zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
-
-	if (zio->io_type == ZIO_TYPE_READ)
-		abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
-	else
-		abd_return_buf(zio->io_abd, buf, zio->io_size);
-
-	if (resid != 0 && zio->io_error == 0)
-		zio->io_error = SET_ERROR(ENOSPC);
-
-	zio_delay_interrupt(zio);
-}
-
-static void
-vdev_file_io_fsync(void *arg)
-{
-	zio_t *zio = (zio_t *)arg;
-	vdev_file_t *vf = zio->io_vd->vdev_tsd;
-
-	zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
-
-	zio_interrupt(zio);
-}
-
-static void
-vdev_file_io_start(zio_t *zio)
-{
-	vdev_t *vd = zio->io_vd;
-	vdev_file_t *vf = vd->vdev_tsd;
-
-	if (zio->io_type == ZIO_TYPE_IOCTL) {
-		/* XXPOLICY */
-		if (!vdev_readable(vd)) {
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return;
-		}
-
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
-
-			if (zfs_nocacheflush)
-				break;
-
-			/*
-			 * We cannot safely call vfs_fsync() when PF_FSTRANS
-			 * is set in the current context.  Filesystems like
-			 * XFS include sanity checks to verify it is not
-			 * already set, see xfs_vm_writepage().  Therefore
-			 * the sync must be dispatched to a different context.
-			 */
-			if (__spl_pf_fstrans_check()) {
-				VERIFY3U(taskq_dispatch(vdev_file_taskq,
-				    vdev_file_io_fsync, zio, TQ_SLEEP), !=,
-				    TASKQID_INVALID);
-				return;
-			}
-
-			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
-			    kcred, NULL);
-			break;
-		default:
-			zio->io_error = SET_ERROR(ENOTSUP);
-		}
-
-		zio_execute(zio);
-		return;
-	} else if (zio->io_type == ZIO_TYPE_TRIM) {
-		struct flock flck;
-
-		ASSERT3U(zio->io_size, !=, 0);
-		bzero(&flck, sizeof (flck));
-		flck.l_type = F_FREESP;
-		flck.l_start = zio->io_offset;
-		flck.l_len = zio->io_size;
-		flck.l_whence = SEEK_SET;
-
-		zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &flck,
-		    0, 0, kcred, NULL);
-
-		zio_execute(zio);
-		return;
-	}
-
-	zio->io_target_timestamp = zio_handle_io_delay(zio);
-
-	VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
-	    TQ_SLEEP), !=, TASKQID_INVALID);
-}
-
-/* ARGSUSED */
-static void
-vdev_file_io_done(zio_t *zio)
-{
-}
-
-vdev_ops_t vdev_file_ops = {
-	.vdev_op_open = vdev_file_open,
-	.vdev_op_close = vdev_file_close,
-	.vdev_op_asize = vdev_default_asize,
-	.vdev_op_io_start = vdev_file_io_start,
-	.vdev_op_io_done = vdev_file_io_done,
-	.vdev_op_state_change = NULL,
-	.vdev_op_need_resilver = NULL,
-	.vdev_op_hold = vdev_file_hold,
-	.vdev_op_rele = vdev_file_rele,
-	.vdev_op_remap = NULL,
-	.vdev_op_xlate = vdev_default_xlate,
-	.vdev_op_type = VDEV_TYPE_FILE,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
-};
-
-void
-vdev_file_init(void)
-{
-	vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
-	    minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
-
-	VERIFY(vdev_file_taskq);
-}
-
-void
-vdev_file_fini(void)
-{
-	taskq_destroy(vdev_file_taskq);
-}
-
-/*
- * From userland we access disks just like files.
- */
-#ifndef _KERNEL
-
-vdev_ops_t vdev_disk_ops = {
-	.vdev_op_open = vdev_file_open,
-	.vdev_op_close = vdev_file_close,
-	.vdev_op_asize = vdev_default_asize,
-	.vdev_op_io_start = vdev_file_io_start,
-	.vdev_op_io_done = vdev_file_io_done,
-	.vdev_op_state_change = NULL,
-	.vdev_op_need_resilver = NULL,
-	.vdev_op_hold = vdev_file_hold,
-	.vdev_op_rele = vdev_file_rele,
-	.vdev_op_remap = NULL,
-	.vdev_op_xlate = vdev_default_xlate,
-	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
-};
-
-#endif

diff --git a/zfs/module/zfs/vdev_indirect.c b/zfs/module/zfs/vdev_indirect.c
index 4539fa6..9e4c115 100644
--- a/zfs/module/zfs/vdev_indirect.c
+++ b/zfs/module/zfs/vdev_indirect.c

@@ -16,6 +16,7 @@
 /*
  * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2014, 2020 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -26,7 +27,6 @@
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/metaslab.h>
-#include <sys/refcount.h>
 #include <sys/dmu.h>
 #include <sys/vdev_indirect_mapping.h>
 #include <sys/dmu_tx.h>
@@ -181,7 +181,7 @@
  * condenses.  Higher values will condense less often (causing less
  * i/o); lower values will reduce the mapping size more quickly.
  */
-int zfs_indirect_condense_obsolete_pct = 25;
+int zfs_condense_indirect_obsolete_pct = 25;
 
 /*
  * Condense if the obsolete space map takes up more than this amount of
@@ -239,6 +239,7 @@
 	 */
 	struct indirect_child *ic_duplicate;
 	list_node_t ic_node; /* node on is_unique_child */
+	int ic_error; /* set when a child does not contain the data */
 } indirect_child_t;
 
 /*
@@ -269,7 +270,7 @@
 	 */
 	indirect_child_t *is_good_child;
 
-	indirect_child_t is_child[1]; /* variable-length */
+	indirect_child_t is_child[];
 } indirect_split_t;
 
 /*
@@ -314,7 +315,6 @@
 
 static const zio_vsd_ops_t vdev_indirect_vsd_ops = {
 	.vsd_free = vdev_indirect_map_free,
-	.vsd_cksum_report = zio_vsd_default_cksum_report
 };
 
 /*
@@ -420,7 +420,7 @@
 	 * If nothing new has been marked obsolete, there is no
 	 * point in condensing.
 	 */
-	ASSERTV(uint64_t obsolete_sm_obj);
+	uint64_t obsolete_sm_obj __maybe_unused;
 	ASSERT0(vdev_obsolete_sm_object(vd, &obsolete_sm_obj));
 	if (vd->vdev_obsolete_sm == NULL) {
 		ASSERT0(obsolete_sm_obj);
@@ -445,7 +445,7 @@
 	 * by the mapping.
 	 */
 	if (bytes_obsolete * 100 / bytes_mapped >=
-	    zfs_indirect_condense_obsolete_pct &&
+	    zfs_condense_indirect_obsolete_pct &&
 	    mapping_size > zfs_condense_min_mapping_bytes) {
 		zfs_dbgmsg("should condense vdev %llu because obsolete "
 		    "spacemap covers %d%% of %lluMB mapping",
@@ -529,8 +529,9 @@
 	zfs_dbgmsg("finished condense of vdev %llu in txg %llu: "
 	    "new mapping object %llu has %llu entries "
 	    "(was %llu entries)",
-	    vd->vdev_id, dmu_tx_get_txg(tx), vic->vic_mapping_object,
-	    new_count, old_count);
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)dmu_tx_get_txg(tx),
+	    (u_longlong_t)vic->vic_mapping_object,
+	    (u_longlong_t)new_count, (u_longlong_t)old_count);
 
 	vdev_config_dirty(spa->spa_root_vdev);
 }
@@ -543,7 +544,7 @@
 {
 	spa_condensing_indirect_t *sci = arg;
 	uint64_t txg = dmu_tx_get_txg(tx);
-	ASSERTV(spa_t *spa = dmu_tx_pool(tx)->dp_spa);
+	spa_t *spa __maybe_unused = dmu_tx_pool(tx)->dp_spa;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT3P(sci, ==, spa->spa_condensing_indirect);
@@ -576,8 +577,7 @@
 	 */
 	if (list_is_empty(&sci->sci_new_mapping_entries[txgoff])) {
 		dsl_sync_task_nowait(dmu_tx_pool(tx),
-		    spa_condense_indirect_commit_sync, sci,
-		    0, ZFS_SPACE_CHECK_NONE, tx);
+		    spa_condense_indirect_commit_sync, sci, tx);
 	}
 
 	vdev_indirect_mapping_entry_t *vime =
@@ -637,16 +637,15 @@
 	}
 }
 
-/* ARGSUSED */
 static boolean_t
 spa_condense_indirect_thread_check(void *arg, zthr_t *zthr)
 {
+	(void) zthr;
 	spa_t *spa = arg;
 
 	return (spa->spa_condensing_indirect != NULL);
 }
 
-/* ARGSUSED */
 static void
 spa_condense_indirect_thread(void *arg, zthr_t *zthr)
 {
@@ -797,7 +796,7 @@
 
 	zfs_dbgmsg("starting condense of vdev %llu in txg %llu: "
 	    "posm=%llu nm=%llu",
-	    vd->vdev_id, dmu_tx_get_txg(tx),
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)dmu_tx_get_txg(tx),
 	    (u_longlong_t)scip->scip_prev_obsolete_sm_object,
 	    (u_longlong_t)scip->scip_next_mapping_object);
 
@@ -814,7 +813,7 @@
 vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx)
 {
 	spa_t *spa = vd->vdev_spa;
-	ASSERTV(vdev_indirect_config_t *vic = &vd->vdev_indirect_config);
+	vdev_indirect_config_t *vic __maybe_unused = &vd->vdev_indirect_config;
 
 	ASSERT3U(vic->vic_mapping_object, !=, 0);
 	ASSERT(range_tree_space(vd->vdev_obsolete_segments) > 0);
@@ -825,7 +824,7 @@
 	VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
 	if (obsolete_sm_object == 0) {
 		obsolete_sm_object = space_map_alloc(spa->spa_meta_objset,
-		    vdev_standard_sm_blksz, tx);
+		    zfs_vdev_standard_sm_blksz, tx);
 
 		ASSERT(vd->vdev_top_zap != 0);
 		VERIFY0(zap_add(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
@@ -883,8 +882,9 @@
 spa_start_indirect_condensing_thread(spa_t *spa)
 {
 	ASSERT3P(spa->spa_condense_zthr, ==, NULL);
-	spa->spa_condense_zthr = zthr_create(spa_condense_indirect_thread_check,
-	    spa_condense_indirect_thread, spa);
+	spa->spa_condense_zthr = zthr_create("z_indirect_condense",
+	    spa_condense_indirect_thread_check,
+	    spa_condense_indirect_thread, spa, minclsyspri);
 }
 
 /*
@@ -940,20 +940,20 @@
 	return (error);
 }
 
-/* ARGSUSED */
 static void
 vdev_indirect_close(vdev_t *vd)
 {
+	(void) vd;
 }
 
-/* ARGSUSED */
 static int
 vdev_indirect_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	*psize = *max_psize = vd->vdev_asize +
 	    VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
-	*ashift = vd->vdev_ashift;
+	*logical_ashift = vd->vdev_ashift;
+	*physical_ashift = vd->vdev_physical_ashift;
 	return (0);
 }
 
@@ -965,7 +965,7 @@
 	list_node_t rs_node;
 } remap_segment_t;
 
-remap_segment_t *
+static remap_segment_t *
 rs_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset)
 {
 	remap_segment_t *rs = kmem_alloc(sizeof (remap_segment_t), KM_SLEEP);
@@ -989,7 +989,7 @@
  * Finally, since we are doing an allocation, it is up to the caller to
  * free the array allocated in this function.
  */
-vdev_indirect_mapping_entry_phys_t *
+static vdev_indirect_mapping_entry_phys_t *
 vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset,
     uint64_t asize, uint64_t *copied_entries)
 {
@@ -1185,7 +1185,7 @@
 	pio->io_error = zio_worst_error(pio->io_error, zio->io_error);
 	mutex_exit(&pio->io_lock);
 
-	abd_put(zio->io_abd);
+	abd_free(zio->io_abd);
 }
 
 /*
@@ -1271,15 +1271,14 @@
 				continue;
 
 			/*
-			 * Note, we may read from a child whose DTL
-			 * indicates that the data may not be present here.
-			 * While this might result in a few i/os that will
-			 * likely return incorrect data, it simplifies the
-			 * code since we can treat scrub and resilver
-			 * identically.  (The incorrect data will be
-			 * detected and ignored when we verify the
-			 * checksum.)
+			 * If a child is missing the data, set ic_error. Used
+			 * in vdev_indirect_repair(). We perform the read
+			 * nevertheless which provides the opportunity to
+			 * reconstruct the split block if at all possible.
 			 */
+			if (vdev_dtl_contains(ic->ic_vdev, DTL_MISSING,
+			    zio->io_txg, 1))
+				ic->ic_error = SET_ERROR(ESTALE);
 
 			ic->ic_data = abd_alloc_sametype(zio->io_abd,
 			    is->is_size);
@@ -1297,7 +1296,7 @@
 static void
 vdev_indirect_io_start(zio_t *zio)
 {
-	ASSERTV(spa_t *spa = zio->io_spa);
+	spa_t *spa __maybe_unused = zio->io_spa;
 	indirect_vsd_t *iv = kmem_zalloc(sizeof (*iv), KM_SLEEP);
 	list_create(&iv->iv_splits,
 	    sizeof (indirect_split_t), offsetof(indirect_split_t, is_node));
@@ -1401,7 +1400,7 @@
 	zio_bad_cksum_t zbc = {{{ 0 }}};
 	abd_t *bad_abd = ic->ic_data;
 	abd_t *good_abd = is->is_good_child->ic_data;
-	zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
+	(void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
 	    is->is_target_offset, is->is_size, good_abd, bad_abd, &zbc);
 }
 
@@ -1409,7 +1408,11 @@
  * Issue repair i/os for any incorrect copies.  We do this by comparing
  * each split segment's correct data (is_good_child's ic_data) with each
  * other copy of the data.  If they differ, then we overwrite the bad data
- * with the good copy.  Note that we do this without regard for the DTL's,
+ * with the good copy.  The DTL is checked in vdev_indirect_read_all() and
+ * if a vdev is missing a copy of the data we set ic_error and the read is
+ * performed. This provides the opportunity to reconstruct the split block
+ * if at all possible. ic_error is checked here and if set it suppresses
+ * incrementing the checksum counter. Aside from this DTLs are not checked,
  * which simplifies this code and also issues the optimal number of writes
  * (based on which copies actually read bad data, as opposed to which we
  * think might be wrong).  For the same reason, we always use
@@ -1420,11 +1423,6 @@
 {
 	indirect_vsd_t *iv = zio->io_vsd;
 
-	enum zio_flag flags = ZIO_FLAG_IO_REPAIR;
-
-	if (!(zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))
-		flags |= ZIO_FLAG_SELF_HEAL;
-
 	if (!spa_writeable(zio->io_spa))
 		return;
 
@@ -1446,6 +1444,14 @@
 			    ZIO_FLAG_IO_REPAIR | ZIO_FLAG_SELF_HEAL,
 			    NULL, NULL));
 
+			/*
+			 * If ic_error is set the current child does not have
+			 * a copy of the data, so suppress incrementing the
+			 * checksum counter.
+			 */
+			if (ic->ic_error == ESTALE)
+				continue;
+
 			vdev_indirect_checksum_error(zio, is, ic);
 		}
 	}
@@ -1472,13 +1478,12 @@
 
 			vdev_t *vd = ic->ic_vdev;
 
+			(void) zfs_ereport_post_checksum(zio->io_spa, vd,
+			    NULL, zio, is->is_target_offset, is->is_size,
+			    NULL, NULL, NULL);
 			mutex_enter(&vd->vdev_stat_lock);
 			vd->vdev_stat.vs_checksum_errors++;
 			mutex_exit(&vd->vdev_stat_lock);
-
-			zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
-			    is->is_target_offset, is->is_size,
-			    NULL, NULL, NULL);
 		}
 	}
 }
@@ -1566,7 +1571,7 @@
 			indirect_child_t *ic = list_head(&is->is_unique_child);
 			int children = is->is_unique_children;
 
-			for (int i = spa_get_random(children); i > 0; i--)
+			for (int i = random_in_range(children); i > 0; i--)
 				ic = list_next(&is->is_unique_child, ic);
 
 			ASSERT3P(ic, !=, NULL);
@@ -1637,7 +1642,7 @@
 			if (ic->ic_data == NULL)
 				continue;
 
-			abd_zero(ic->ic_data, ic->ic_data->abd_size);
+			abd_zero(ic->ic_data, abd_get_size(ic->ic_data));
 		}
 
 		iv->iv_attempts_max *= 2;
@@ -1730,7 +1735,7 @@
 	 * Known_good will be TRUE when reconstruction is known to be possible.
 	 */
 	if (zfs_reconstruct_indirect_damage_fraction != 0 &&
-	    spa_get_random(zfs_reconstruct_indirect_damage_fraction) == 0)
+	    random_in_range(zfs_reconstruct_indirect_damage_fraction) == 0)
 		known_good = (vdev_indirect_splits_damage(iv, zio) == 0);
 
 	/*
@@ -1842,9 +1847,13 @@
 }
 
 vdev_ops_t vdev_indirect_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_indirect_open,
 	.vdev_op_close = vdev_indirect_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_indirect_io_start,
 	.vdev_op_io_done = vdev_indirect_io_done,
 	.vdev_op_state_change = NULL,
@@ -1853,12 +1862,15 @@
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = vdev_indirect_remap,
 	.vdev_op_xlate = NULL,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_INDIRECT,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* leaf vdev */
 };
 
-#if defined(_KERNEL)
-EXPORT_SYMBOL(rs_alloc);
 EXPORT_SYMBOL(spa_condense_fini);
 EXPORT_SYMBOL(spa_start_indirect_condensing_thread);
 EXPORT_SYMBOL(spa_condense_indirect_start_sync);
@@ -1870,25 +1882,24 @@
 EXPORT_SYMBOL(vdev_obsolete_counts_are_precise);
 EXPORT_SYMBOL(vdev_obsolete_sm_object);
 
-module_param(zfs_condense_indirect_vdevs_enable, int, 0644);
-MODULE_PARM_DESC(zfs_condense_indirect_vdevs_enable,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_vdevs_enable, INT, ZMOD_RW,
 	"Whether to attempt condensing indirect vdev mappings");
 
-/* CSTYLED */
-module_param(zfs_condense_min_mapping_bytes, ulong, 0644);
-MODULE_PARM_DESC(zfs_condense_min_mapping_bytes,
-	"Minimum size of vdev mapping to condense");
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_obsolete_pct, INT, ZMOD_RW,
+	"Minimum obsolete percent of bytes in the mapping to attempt condensing");
 
-/* CSTYLED */
-module_param(zfs_condense_max_obsolete_bytes, ulong, 0644);
-MODULE_PARM_DESC(zfs_condense_max_obsolete_bytes,
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, min_mapping_bytes, ULONG, ZMOD_RW,
+	"Don't bother condensing if the mapping uses less than this amount of "
+	"memory");
+
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, max_obsolete_bytes, ULONG, ZMOD_RW,
 	"Minimum size obsolete spacemap to attempt condensing");
 
-module_param(zfs_condense_indirect_commit_entry_delay_ms, int, 0644);
-MODULE_PARM_DESC(zfs_condense_indirect_commit_entry_delay_ms,
-	"Delay while condensing vdev mapping");
+ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_commit_entry_delay_ms, INT, ZMOD_RW,
+	"Used by tests to ensure certain actions happen in the middle of a "
+	"condense. A maximum value of 1 should be sufficient.");
 
-module_param(zfs_reconstruct_indirect_combinations_max, int, 0644);
-MODULE_PARM_DESC(zfs_reconstruct_indirect_combinations_max,
+ZFS_MODULE_PARAM(zfs_reconstruct, zfs_reconstruct_, indirect_combinations_max, INT, ZMOD_RW,
 	"Maximum number of combinations when reconstructing split segments");
-#endif
+/* END CSTYLED */

diff --git a/zfs/module/zfs/vdev_indirect_mapping.c b/zfs/module/zfs/vdev_indirect_mapping.c
index e4d998f..bb484a4 100644
--- a/zfs/module/zfs/vdev_indirect_mapping.c
+++ b/zfs/module/zfs/vdev_indirect_mapping.c

@@ -39,11 +39,12 @@
 	EQUIV(vim->vim_phys->vimp_num_entries > 0,
 	    vim->vim_entries != NULL);
 	if (vim->vim_phys->vimp_num_entries > 0) {
-		ASSERTV(vdev_indirect_mapping_entry_phys_t *last_entry =
-		    &vim->vim_entries[vim->vim_phys->vimp_num_entries - 1]);
-		ASSERTV(uint64_t offset =
-		    DVA_MAPPING_GET_SRC_OFFSET(last_entry));
-		ASSERTV(uint64_t size = DVA_GET_ASIZE(&last_entry->vimep_dst));
+		vdev_indirect_mapping_entry_phys_t *last_entry __maybe_unused =
+		    &vim->vim_entries[vim->vim_phys->vimp_num_entries - 1];
+		uint64_t offset __maybe_unused =
+		    DVA_MAPPING_GET_SRC_OFFSET(last_entry);
+		uint64_t size __maybe_unused =
+		    DVA_GET_ASIZE(&last_entry->vimep_dst);
 
 		ASSERT3U(vim->vim_phys->vimp_max_offset, >=, offset + size);
 	}

diff --git a/zfs/module/zfs/vdev_initialize.c b/zfs/module/zfs/vdev_initialize.c
index 8a36359..5d90fd6 100644
--- a/zfs/module/zfs/vdev_initialize.c
+++ b/zfs/module/zfs/vdev_initialize.c

@@ -20,14 +20,13 @@
  */
 
 /*
- * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/txg.h>
 #include <sys/vdev_impl.h>
-#include <sys/refcount.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_synctask.h>
 #include <sys/zap.h>
@@ -47,7 +46,7 @@
 int zfs_initialize_limit = 1;
 
 /* size of initializing writes; default 1MiB, see zfs_remove_max_segment */
-uint64_t zfs_initialize_chunk_size = 1024 * 1024;
+unsigned long zfs_initialize_chunk_size = 1024 * 1024;
 
 static boolean_t
 vdev_initialize_should_stop(vdev_t *vd)
@@ -102,6 +101,39 @@
 }
 
 static void
+vdev_initialize_zap_remove_sync(void *arg, dmu_tx_t *tx)
+{
+	uint64_t guid = *(uint64_t *)arg;
+
+	kmem_free(arg, sizeof (uint64_t));
+
+	vdev_t *vd = spa_lookup_by_guid(tx->tx_pool->dp_spa, guid, B_FALSE);
+	if (vd == NULL || vd->vdev_top->vdev_removing || !vdev_is_concrete(vd))
+		return;
+
+	ASSERT3S(vd->vdev_initialize_state, ==, VDEV_INITIALIZE_NONE);
+	ASSERT3U(vd->vdev_leaf_zap, !=, 0);
+
+	vd->vdev_initialize_last_offset = 0;
+	vd->vdev_initialize_action_time = 0;
+
+	objset_t *mos = vd->vdev_spa->spa_meta_objset;
+	int error;
+
+	error = zap_remove(mos, vd->vdev_leaf_zap,
+	    VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET, tx);
+	VERIFY(error == 0 || error == ENOENT);
+
+	error = zap_remove(mos, vd->vdev_leaf_zap,
+	    VDEV_LEAF_ZAP_INITIALIZE_STATE, tx);
+	VERIFY(error == 0 || error == ENOENT);
+
+	error = zap_remove(mos, vd->vdev_leaf_zap,
+	    VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, tx);
+	VERIFY(error == 0 || error == ENOENT);
+}
+
+static void
 vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
 {
 	ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
@@ -122,12 +154,20 @@
 	if (vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED) {
 		vd->vdev_initialize_action_time = gethrestime_sec();
 	}
+
+	vdev_initializing_state_t old_state = vd->vdev_initialize_state;
 	vd->vdev_initialize_state = new_state;
 
 	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
-	dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
-	    guid, 2, ZFS_SPACE_CHECK_NONE, tx);
+
+	if (new_state != VDEV_INITIALIZE_NONE) {
+		dsl_sync_task_nowait(spa_get_dsl(spa),
+		    vdev_initialize_zap_update_sync, guid, tx);
+	} else {
+		dsl_sync_task_nowait(spa_get_dsl(spa),
+		    vdev_initialize_zap_remove_sync, guid, tx);
+	}
 
 	switch (new_state) {
 	case VDEV_INITIALIZE_ACTIVE:
@@ -139,18 +179,27 @@
 		    "vdev=%s suspended", vd->vdev_path);
 		break;
 	case VDEV_INITIALIZE_CANCELED:
-		spa_history_log_internal(spa, "initialize", tx,
-		    "vdev=%s canceled", vd->vdev_path);
+		if (old_state == VDEV_INITIALIZE_ACTIVE ||
+		    old_state == VDEV_INITIALIZE_SUSPENDED)
+			spa_history_log_internal(spa, "initialize", tx,
+			    "vdev=%s canceled", vd->vdev_path);
 		break;
 	case VDEV_INITIALIZE_COMPLETE:
 		spa_history_log_internal(spa, "initialize", tx,
 		    "vdev=%s complete", vd->vdev_path);
 		break;
+	case VDEV_INITIALIZE_NONE:
+		spa_history_log_internal(spa, "uninitialize", tx,
+		    "vdev=%s", vd->vdev_path);
+		break;
 	default:
 		panic("invalid state %llu", (unsigned long long)new_state);
 	}
 
 	dmu_tx_commit(tx);
+
+	if (new_state != VDEV_INITIALIZE_ACTIVE)
+		spa_notify_waiters(spa);
 }
 
 static void
@@ -214,8 +263,7 @@
 
 		/* This is the first write of this txg. */
 		dsl_sync_task_nowait(spa_get_dsl(spa),
-		    vdev_initialize_zap_update_sync, guid, 2,
-		    ZFS_SPACE_CHECK_RESERVED, tx);
+		    vdev_initialize_zap_update_sync, guid, tx);
 	}
 
 	/*
@@ -250,10 +298,11 @@
  * divisible by sizeof (uint64_t), and buf must be 8-byte aligned. The ABD
  * allocation will guarantee these for us.
  */
-/* ARGSUSED */
 static int
 vdev_initialize_block_fill(void *buf, size_t len, void *unused)
 {
+	(void) unused;
+
 	ASSERT0(len % sizeof (uint64_t));
 #ifdef _ILP32
 	for (uint64_t i = 0; i < len; i += sizeof (uint32_t)) {
@@ -289,11 +338,13 @@
 static int
 vdev_initialize_ranges(vdev_t *vd, abd_t *data)
 {
-	avl_tree_t *rt = &vd->vdev_initialize_tree->rt_root;
+	range_tree_t *rt = vd->vdev_initialize_tree;
+	zfs_btree_t *bt = &rt->rt_root;
+	zfs_btree_index_t where;
 
-	for (range_seg_t *rs = avl_first(rt); rs != NULL;
-	    rs = AVL_NEXT(rt, rs)) {
-		uint64_t size = rs->rs_end - rs->rs_start;
+	for (range_seg_t *rs = zfs_btree_first(bt, &where); rs != NULL;
+	    rs = zfs_btree_next(bt, &where, &where)) {
+		uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
 
 		/* Split range into legally-sized physical chunks */
 		uint64_t writes_required =
@@ -303,7 +354,7 @@
 			int error;
 
 			error = vdev_initialize_write(vd,
-			    VDEV_LABEL_START_SIZE + rs->rs_start +
+			    VDEV_LABEL_START_SIZE + rs_get_start(rs, rt) +
 			    (w * zfs_initialize_chunk_size),
 			    MIN(size - (w * zfs_initialize_chunk_size),
 			    zfs_initialize_chunk_size), data);
@@ -315,6 +366,32 @@
 }
 
 static void
+vdev_initialize_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs)
+{
+	uint64_t *last_rs_end = (uint64_t *)arg;
+
+	if (physical_rs->rs_end > *last_rs_end)
+		*last_rs_end = physical_rs->rs_end;
+}
+
+static void
+vdev_initialize_xlate_progress(void *arg, range_seg64_t *physical_rs)
+{
+	vdev_t *vd = (vdev_t *)arg;
+
+	uint64_t size = physical_rs->rs_end - physical_rs->rs_start;
+	vd->vdev_initialize_bytes_est += size;
+
+	if (vd->vdev_initialize_last_offset > physical_rs->rs_end) {
+		vd->vdev_initialize_bytes_done += size;
+	} else if (vd->vdev_initialize_last_offset > physical_rs->rs_start &&
+	    vd->vdev_initialize_last_offset < physical_rs->rs_end) {
+		vd->vdev_initialize_bytes_done +=
+		    vd->vdev_initialize_last_offset - physical_rs->rs_start;
+	}
+}
+
+static void
 vdev_initialize_calculate_progress(vdev_t *vd)
 {
 	ASSERT(spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_READER) ||
@@ -328,28 +405,35 @@
 		metaslab_t *msp = vd->vdev_top->vdev_ms[i];
 		mutex_enter(&msp->ms_lock);
 
-		uint64_t ms_free = msp->ms_size -
-		    metaslab_allocated_space(msp);
-
-		if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
-			ms_free /= vd->vdev_top->vdev_children;
+		uint64_t ms_free = (msp->ms_size -
+		    metaslab_allocated_space(msp)) /
+		    vdev_get_ndisks(vd->vdev_top);
 
 		/*
 		 * Convert the metaslab range to a physical range
 		 * on our vdev. We use this to determine if we are
 		 * in the middle of this metaslab range.
 		 */
-		range_seg_t logical_rs, physical_rs;
+		range_seg64_t logical_rs, physical_rs, remain_rs;
 		logical_rs.rs_start = msp->ms_start;
 		logical_rs.rs_end = msp->ms_start + msp->ms_size;
-		vdev_xlate(vd, &logical_rs, &physical_rs);
 
+		/* Metaslab space after this offset has not been initialized */
+		vdev_xlate(vd, &logical_rs, &physical_rs, &remain_rs);
 		if (vd->vdev_initialize_last_offset <= physical_rs.rs_start) {
 			vd->vdev_initialize_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
 			continue;
-		} else if (vd->vdev_initialize_last_offset >
-		    physical_rs.rs_end) {
+		}
+
+		/* Metaslab space before this offset has been initialized */
+		uint64_t last_rs_end = physical_rs.rs_end;
+		if (!vdev_xlate_is_empty(&remain_rs)) {
+			vdev_xlate_walk(vd, &remain_rs,
+			    vdev_initialize_xlate_last_rs_end, &last_rs_end);
+		}
+
+		if (vd->vdev_initialize_last_offset > last_rs_end) {
 			vd->vdev_initialize_bytes_done += ms_free;
 			vd->vdev_initialize_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
@@ -363,26 +447,17 @@
 		 */
 		VERIFY0(metaslab_load(msp));
 
-		for (range_seg_t *rs = avl_first(&msp->ms_allocatable->rt_root);
-		    rs; rs = AVL_NEXT(&msp->ms_allocatable->rt_root, rs)) {
-			logical_rs.rs_start = rs->rs_start;
-			logical_rs.rs_end = rs->rs_end;
-			vdev_xlate(vd, &logical_rs, &physical_rs);
+		zfs_btree_index_t where;
+		range_tree_t *rt = msp->ms_allocatable;
+		for (range_seg_t *rs =
+		    zfs_btree_first(&rt->rt_root, &where); rs;
+		    rs = zfs_btree_next(&rt->rt_root, &where,
+		    &where)) {
+			logical_rs.rs_start = rs_get_start(rs, rt);
+			logical_rs.rs_end = rs_get_end(rs, rt);
 
-			uint64_t size = physical_rs.rs_end -
-			    physical_rs.rs_start;
-			vd->vdev_initialize_bytes_est += size;
-			if (vd->vdev_initialize_last_offset >
-			    physical_rs.rs_end) {
-				vd->vdev_initialize_bytes_done += size;
-			} else if (vd->vdev_initialize_last_offset >
-			    physical_rs.rs_start &&
-			    vd->vdev_initialize_last_offset <
-			    physical_rs.rs_end) {
-				vd->vdev_initialize_bytes_done +=
-				    vd->vdev_initialize_last_offset -
-				    physical_rs.rs_start;
-			}
+			vdev_xlate_walk(vd, &logical_rs,
+			    vdev_initialize_xlate_progress, vd);
 		}
 		mutex_exit(&msp->ms_lock);
 	}
@@ -412,6 +487,34 @@
 	return (err);
 }
 
+static void
+vdev_initialize_xlate_range_add(void *arg, range_seg64_t *physical_rs)
+{
+	vdev_t *vd = arg;
+
+	/* Only add segments that we have not visited yet */
+	if (physical_rs->rs_end <= vd->vdev_initialize_last_offset)
+		return;
+
+	/* Pick up where we left off mid-range. */
+	if (vd->vdev_initialize_last_offset > physical_rs->rs_start) {
+		zfs_dbgmsg("range write: vd %s changed (%llu, %llu) to "
+		    "(%llu, %llu)", vd->vdev_path,
+		    (u_longlong_t)physical_rs->rs_start,
+		    (u_longlong_t)physical_rs->rs_end,
+		    (u_longlong_t)vd->vdev_initialize_last_offset,
+		    (u_longlong_t)physical_rs->rs_end);
+		ASSERT3U(physical_rs->rs_end, >,
+		    vd->vdev_initialize_last_offset);
+		physical_rs->rs_start = vd->vdev_initialize_last_offset;
+	}
+
+	ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start);
+
+	range_tree_add(vd->vdev_initialize_tree, physical_rs->rs_start,
+	    physical_rs->rs_end - physical_rs->rs_start);
+}
+
 /*
  * Convert the logical range into a physical range and add it to our
  * avl tree.
@@ -420,47 +523,12 @@
 vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size)
 {
 	vdev_t *vd = arg;
-	range_seg_t logical_rs, physical_rs;
+	range_seg64_t logical_rs;
 	logical_rs.rs_start = start;
 	logical_rs.rs_end = start + size;
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
-	vdev_xlate(vd, &logical_rs, &physical_rs);
-
-	IMPLY(vd->vdev_top == vd,
-	    logical_rs.rs_start == physical_rs.rs_start);
-	IMPLY(vd->vdev_top == vd,
-	    logical_rs.rs_end == physical_rs.rs_end);
-
-	/* Only add segments that we have not visited yet */
-	if (physical_rs.rs_end <= vd->vdev_initialize_last_offset)
-		return;
-
-	/* Pick up where we left off mid-range. */
-	if (vd->vdev_initialize_last_offset > physical_rs.rs_start) {
-		zfs_dbgmsg("range write: vd %s changed (%llu, %llu) to "
-		    "(%llu, %llu)", vd->vdev_path,
-		    (u_longlong_t)physical_rs.rs_start,
-		    (u_longlong_t)physical_rs.rs_end,
-		    (u_longlong_t)vd->vdev_initialize_last_offset,
-		    (u_longlong_t)physical_rs.rs_end);
-		ASSERT3U(physical_rs.rs_end, >,
-		    vd->vdev_initialize_last_offset);
-		physical_rs.rs_start = vd->vdev_initialize_last_offset;
-	}
-	ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
-
-	/*
-	 * With raidz, it's possible that the logical range does not live on
-	 * this leaf vdev. We only add the physical range to this vdev's if it
-	 * has a length greater than 0.
-	 */
-	if (physical_rs.rs_end > physical_rs.rs_start) {
-		range_tree_add(vd->vdev_initialize_tree, physical_rs.rs_start,
-		    physical_rs.rs_end - physical_rs.rs_start);
-	} else {
-		ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
-	}
+	vdev_xlate_walk(vd, &logical_rs, vdev_initialize_xlate_range_add, arg);
 }
 
 static void
@@ -479,11 +547,13 @@
 
 	abd_t *deadbeef = vdev_initialize_block_alloc();
 
-	vd->vdev_initialize_tree = range_tree_create(NULL, NULL);
+	vd->vdev_initialize_tree = range_tree_create(NULL, RANGE_SEG64, NULL,
+	    0, 0);
 
 	for (uint64_t i = 0; !vd->vdev_detached &&
 	    i < vd->vdev_top->vdev_ms_count; i++) {
 		metaslab_t *msp = vd->vdev_top->vdev_ms[i];
+		boolean_t unload_when_done = B_FALSE;
 
 		/*
 		 * If we've expanded the top-level vdev or it's our
@@ -497,6 +567,8 @@
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		metaslab_disable(msp);
 		mutex_enter(&msp->ms_lock);
+		if (!msp->ms_loaded && !msp->ms_loading)
+			unload_when_done = B_TRUE;
 		VERIFY0(metaslab_load(msp));
 
 		range_tree_walk(msp->ms_allocatable, vdev_initialize_range_add,
@@ -504,7 +576,7 @@
 		mutex_exit(&msp->ms_lock);
 
 		error = vdev_initialize_ranges(vd, deadbeef);
-		metaslab_enable(msp, B_TRUE);
+		metaslab_enable(msp, B_TRUE, unload_when_done);
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 		range_tree_vacate(vd->vdev_initialize_tree, NULL, NULL);
@@ -525,8 +597,14 @@
 	vd->vdev_initialize_tree = NULL;
 
 	mutex_enter(&vd->vdev_initialize_lock);
-	if (!vd->vdev_initialize_exit_wanted && vdev_writeable(vd)) {
-		vdev_initialize_change_state(vd, VDEV_INITIALIZE_COMPLETE);
+	if (!vd->vdev_initialize_exit_wanted) {
+		if (vdev_writeable(vd)) {
+			vdev_initialize_change_state(vd,
+			    VDEV_INITIALIZE_COMPLETE);
+		} else if (vd->vdev_faulted) {
+			vdev_initialize_change_state(vd,
+			    VDEV_INITIALIZE_CANCELED);
+		}
 	}
 	ASSERT(vd->vdev_initialize_thread != NULL ||
 	    vd->vdev_initialize_inflight == 0);
@@ -545,6 +623,8 @@
 	vd->vdev_initialize_thread = NULL;
 	cv_broadcast(&vd->vdev_initialize_cv);
 	mutex_exit(&vd->vdev_initialize_lock);
+
+	thread_exit();
 }
 
 /*
@@ -568,6 +648,24 @@
 }
 
 /*
+ * Uninitializes a device. Caller must hold vdev_initialize_lock.
+ * Device must be a leaf and not already be initializing.
+ */
+void
+vdev_uninitialize(vdev_t *vd)
+{
+	ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
+	ASSERT(vd->vdev_ops->vdev_op_leaf);
+	ASSERT(vdev_is_concrete(vd));
+	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+	ASSERT(!vd->vdev_detached);
+	ASSERT(!vd->vdev_initialize_exit_wanted);
+	ASSERT(!vd->vdev_top->vdev_removing);
+
+	vdev_initialize_change_state(vd, VDEV_INITIALIZE_NONE);
+}
+
+/*
  * Wait for the initialize thread to be terminated (cancelled or stopped).
  */
 static void
@@ -588,6 +686,7 @@
 void
 vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list)
 {
+	(void) spa;
 	vdev_t *vd;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -721,15 +820,17 @@
 	}
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(vdev_initialize);
+EXPORT_SYMBOL(vdev_uninitialize);
 EXPORT_SYMBOL(vdev_initialize_stop);
 EXPORT_SYMBOL(vdev_initialize_stop_all);
 EXPORT_SYMBOL(vdev_initialize_stop_wait);
 EXPORT_SYMBOL(vdev_initialize_restart);
 
-/* CSTYLED */
-module_param(zfs_initialize_value, ulong, 0644);
-MODULE_PARM_DESC(zfs_initialize_value,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, initialize_value, ULONG, ZMOD_RW,
 	"Value written during zpool initialize");
-#endif
+
+ZFS_MODULE_PARAM(zfs, zfs_, initialize_chunk_size, ULONG, ZMOD_RW,
+	"Size in bytes of writes by zpool initialize");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/vdev_label.c b/zfs/module/zfs/vdev_label.c
index 6320732..faf8904 100644
--- a/zfs/module/zfs/vdev_label.c
+++ b/zfs/module/zfs/vdev_label.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
  */
 
@@ -142,6 +142,7 @@
 #include <sys/zap.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
+#include <sys/vdev_draid.h>
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
@@ -149,6 +150,8 @@
 #include <sys/dsl_scan.h>
 #include <sys/abd.h>
 #include <sys/fs/zfs.h>
+#include <sys/byteorder.h>
+#include <sys/zfs_bootenv.h>
 
 /*
  * Basic routines to read and write from a vdev label.
@@ -404,6 +407,19 @@
 	}
 }
 
+static void
+top_vdev_actions_getprogress(vdev_t *vd, nvlist_t *nvl)
+{
+	if (vd == vd->vdev_top) {
+		vdev_rebuild_stat_t vrs;
+		if (vdev_rebuild_get_stats(vd, &vrs) == 0) {
+			fnvlist_add_uint64_array(nvl,
+			    ZPOOL_CONFIG_REBUILD_STATS, (uint64_t *)&vrs,
+			    sizeof (vrs) / sizeof (uint64_t));
+		}
+	}
+}
+
 /*
  * Generate the nvlist representing this vdev's config.
  */
@@ -438,31 +454,13 @@
 	if (vd->vdev_fru != NULL)
 		fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);
 
-	if (vd->vdev_nparity != 0) {
-		ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
-		    VDEV_TYPE_RAIDZ) == 0);
+	if (vd->vdev_ops->vdev_op_config_generate != NULL)
+		vd->vdev_ops->vdev_op_config_generate(vd, nv);
 
-		/*
-		 * Make sure someone hasn't managed to sneak a fancy new vdev
-		 * into a crufty old storage pool.
-		 */
-		ASSERT(vd->vdev_nparity == 1 ||
-		    (vd->vdev_nparity <= 2 &&
-		    spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
-		    (vd->vdev_nparity <= 3 &&
-		    spa_version(spa) >= SPA_VERSION_RAIDZ3));
-
-		/*
-		 * Note that we'll add the nparity tag even on storage pools
-		 * that only support a single parity device -- older software
-		 * will just ignore it.
-		 */
-		fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity);
-	}
-
-	if (vd->vdev_wholedisk != -1ULL)
+	if (vd->vdev_wholedisk != -1ULL) {
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    vd->vdev_wholedisk);
+	}
 
 	if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
@@ -470,6 +468,9 @@
 	if (vd->vdev_isspare)
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
 
+	if (flags & VDEV_CONFIG_L2CACHE)
+		fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
+
 	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
 	    vd == vd->vdev_top) {
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
@@ -559,6 +560,7 @@
 		vdev_config_generate_stats(vd, nv);
 
 		root_vdev_actions_getprogress(vd, nv);
+		top_vdev_actions_getprogress(vd, nv);
 
 		/*
 		 * Note: this can be called from open context
@@ -597,7 +599,8 @@
 			 * as a single mapping.
 			 */
 			for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
-				if (1ULL << (i + 1) < vdev_removal_max_span) {
+				if (i + 1 < highbit64(vdev_removal_max_span)
+				    - 1) {
 					to_alloc +=
 					    vd->vdev_mg->mg_histogram[i] <<
 					    (i + 1);
@@ -663,6 +666,9 @@
 		if (vd->vdev_resilver_txg != 0)
 			fnvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
 			    vd->vdev_resilver_txg);
+		if (vd->vdev_rebuild_txg != 0)
+			fnvlist_add_uint64(nv, ZPOOL_CONFIG_REBUILD_TXG,
+			    vd->vdev_rebuild_txg);
 		if (vd->vdev_faulted)
 			fnvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, B_TRUE);
 		if (vd->vdev_degraded)
@@ -751,35 +757,47 @@
 {
 	spa_t *spa = vd->vdev_spa;
 	nvlist_t *config = NULL;
-	vdev_phys_t *vp;
-	abd_t *vp_abd;
-	zio_t *zio;
+	vdev_phys_t *vp[VDEV_LABELS];
+	abd_t *vp_abd[VDEV_LABELS];
+	zio_t *zio[VDEV_LABELS];
 	uint64_t best_txg = 0;
 	uint64_t label_txg = 0;
 	int error = 0;
 	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
 	    ZIO_FLAG_SPECULATIVE;
 
-	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+	ASSERT(vd->vdev_validate_thread == curthread ||
+	    spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 
 	if (!vdev_readable(vd))
 		return (NULL);
 
-	vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
-	vp = abd_to_buf(vp_abd);
+	/*
+	 * The label for a dRAID distributed spare is not stored on disk.
+	 * Instead it is generated when needed which allows us to bypass
+	 * the pipeline when reading the config from the label.
+	 */
+	if (vd->vdev_ops == &vdev_draid_spare_ops)
+		return (vdev_draid_read_config_spare(vd));
+
+	for (int l = 0; l < VDEV_LABELS; l++) {
+		vp_abd[l] = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
+		vp[l] = abd_to_buf(vp_abd[l]);
+	}
 
 retry:
 	for (int l = 0; l < VDEV_LABELS; l++) {
+		zio[l] = zio_root(spa, NULL, NULL, flags);
+
+		vdev_label_read(zio[l], vd, l, vp_abd[l],
+		    offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t),
+		    NULL, NULL, flags);
+	}
+	for (int l = 0; l < VDEV_LABELS; l++) {
 		nvlist_t *label = NULL;
 
-		zio = zio_root(spa, NULL, NULL, flags);
-
-		vdev_label_read(zio, vd, l, vp_abd,
-		    offsetof(vdev_label_t, vl_vdev_phys),
-		    sizeof (vdev_phys_t), NULL, NULL, flags);
-
-		if (zio_wait(zio) == 0 &&
-		    nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
+		if (zio_wait(zio[l]) == 0 &&
+		    nvlist_unpack(vp[l]->vp_nvlist, sizeof (vp[l]->vp_nvlist),
 		    &label, 0) == 0) {
 			/*
 			 * Auxiliary vdevs won't have txg values in their
@@ -792,6 +810,8 @@
 			    ZPOOL_CONFIG_POOL_TXG, &label_txg);
 			if ((error || label_txg == 0) && !config) {
 				config = label;
+				for (l++; l < VDEV_LABELS; l++)
+					zio_wait(zio[l]);
 				break;
 			} else if (label_txg <= txg && label_txg > best_txg) {
 				best_txg = label_txg;
@@ -820,7 +840,9 @@
 		    (u_longlong_t)txg);
 	}
 
-	abd_free(vp_abd);
+	for (int l = 0; l < VDEV_LABELS; l++) {
+		abd_free(vp_abd[l]);
+	}
 
 	return (config);
 }
@@ -932,7 +954,7 @@
 	 */
 	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 	    (spa = spa_by_guid(pool_guid, device_guid)) != NULL &&
-	    spa_mode(spa) == FREAD)
+	    spa_mode(spa) == SPA_MODE_READ)
 		state = POOL_STATE_ACTIVE;
 
 	/*
@@ -957,7 +979,7 @@
 	nvlist_t *label;
 	vdev_phys_t *vp;
 	abd_t *vp_abd;
-	abd_t *pad2;
+	abd_t *bootenv;
 	uberblock_t *ub;
 	abd_t *ub_abd;
 	zio_t *zio;
@@ -1118,8 +1140,8 @@
 	ub->ub_txg = 0;
 
 	/* Initialize the 2nd padding area. */
-	pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
-	abd_zero(pad2, VDEV_PAD_SIZE);
+	bootenv = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
+	abd_zero(bootenv, VDEV_PAD_SIZE);
 
 	/*
 	 * Write everything in parallel.
@@ -1138,8 +1160,8 @@
 		 * Zero out the 2nd padding area where it might have
 		 * left over data from previous filesystem format.
 		 */
-		vdev_label_write(zio, vd, l, pad2,
-		    offsetof(vdev_label_t, vl_pad2),
+		vdev_label_write(zio, vd, l, bootenv,
+		    offsetof(vdev_label_t, vl_be),
 		    VDEV_PAD_SIZE, NULL, NULL, flags);
 
 		vdev_label_write(zio, vd, l, ub_abd,
@@ -1155,7 +1177,7 @@
 	}
 
 	nvlist_free(label);
-	abd_free(pad2);
+	abd_free(bootenv);
 	abd_free(ub_abd);
 	abd_free(vp_abd);
 
@@ -1179,6 +1201,212 @@
 }
 
 /*
+ * Done callback for vdev_label_read_bootenv_impl. If this is the first
+ * callback to finish, store our abd in the callback pointer. Otherwise, we
+ * just free our abd and return.
+ */
+static void
+vdev_label_read_bootenv_done(zio_t *zio)
+{
+	zio_t *rio = zio->io_private;
+	abd_t **cbp = rio->io_private;
+
+	ASSERT3U(zio->io_size, ==, VDEV_PAD_SIZE);
+
+	if (zio->io_error == 0) {
+		mutex_enter(&rio->io_lock);
+		if (*cbp == NULL) {
+			/* Will free this buffer in vdev_label_read_bootenv. */
+			*cbp = zio->io_abd;
+		} else {
+			abd_free(zio->io_abd);
+		}
+		mutex_exit(&rio->io_lock);
+	} else {
+		abd_free(zio->io_abd);
+	}
+}
+
+static void
+vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags)
+{
+	for (int c = 0; c < vd->vdev_children; c++)
+		vdev_label_read_bootenv_impl(zio, vd->vdev_child[c], flags);
+
+	/*
+	 * We just use the first label that has a correct checksum; the
+	 * bootloader should have rewritten them all to be the same on boot,
+	 * and any changes we made since boot have been the same across all
+	 * labels.
+	 */
+	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
+		for (int l = 0; l < VDEV_LABELS; l++) {
+			vdev_label_read(zio, vd, l,
+			    abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE),
+			    offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE,
+			    vdev_label_read_bootenv_done, zio, flags);
+		}
+	}
+}
+
+int
+vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv)
+{
+	nvlist_t *config;
+	spa_t *spa = rvd->vdev_spa;
+	abd_t *abd = NULL;
+	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
+
+	ASSERT(bootenv);
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+
+	zio_t *zio = zio_root(spa, NULL, &abd, flags);
+	vdev_label_read_bootenv_impl(zio, rvd, flags);
+	int err = zio_wait(zio);
+
+	if (abd != NULL) {
+		char *buf;
+		vdev_boot_envblock_t *vbe = abd_to_buf(abd);
+
+		vbe->vbe_version = ntohll(vbe->vbe_version);
+		switch (vbe->vbe_version) {
+		case VB_RAW:
+			/*
+			 * if we have textual data in vbe_bootenv, create nvlist
+			 * with key "envmap".
+			 */
+			fnvlist_add_uint64(bootenv, BOOTENV_VERSION, VB_RAW);
+			vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';
+			fnvlist_add_string(bootenv, GRUB_ENVMAP,
+			    vbe->vbe_bootenv);
+			break;
+
+		case VB_NVLIST:
+			err = nvlist_unpack(vbe->vbe_bootenv,
+			    sizeof (vbe->vbe_bootenv), &config, 0);
+			if (err == 0) {
+				fnvlist_merge(bootenv, config);
+				nvlist_free(config);
+				break;
+			}
+			fallthrough;
+		default:
+			/* Check for FreeBSD zfs bootonce command string */
+			buf = abd_to_buf(abd);
+			if (*buf == '\0') {
+				fnvlist_add_uint64(bootenv, BOOTENV_VERSION,
+				    VB_NVLIST);
+				break;
+			}
+			fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf);
+		}
+
+		/*
+		 * abd was allocated in vdev_label_read_bootenv_impl()
+		 */
+		abd_free(abd);
+		/*
+		 * If we managed to read any successfully,
+		 * return success.
+		 */
+		return (0);
+	}
+	return (err);
+}
+
+int
+vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env)
+{
+	zio_t *zio;
+	spa_t *spa = vd->vdev_spa;
+	vdev_boot_envblock_t *bootenv;
+	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+	int error;
+	size_t nvsize;
+	char *nvbuf;
+
+	error = nvlist_size(env, &nvsize, NV_ENCODE_XDR);
+	if (error != 0)
+		return (SET_ERROR(error));
+
+	if (nvsize >= sizeof (bootenv->vbe_bootenv)) {
+		return (SET_ERROR(E2BIG));
+	}
+
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+
+	error = ENXIO;
+	for (int c = 0; c < vd->vdev_children; c++) {
+		int child_err;
+
+		child_err = vdev_label_write_bootenv(vd->vdev_child[c], env);
+		/*
+		 * As long as any of the disks managed to write all of their
+		 * labels successfully, return success.
+		 */
+		if (child_err == 0)
+			error = child_err;
+	}
+
+	if (!vd->vdev_ops->vdev_op_leaf || vdev_is_dead(vd) ||
+	    !vdev_writeable(vd)) {
+		return (error);
+	}
+	ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE);
+	abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
+	abd_zero(abd, VDEV_PAD_SIZE);
+
+	bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE);
+	nvbuf = bootenv->vbe_bootenv;
+	nvsize = sizeof (bootenv->vbe_bootenv);
+
+	bootenv->vbe_version = fnvlist_lookup_uint64(env, BOOTENV_VERSION);
+	switch (bootenv->vbe_version) {
+	case VB_RAW:
+		if (nvlist_lookup_string(env, GRUB_ENVMAP, &nvbuf) == 0) {
+			(void) strlcpy(bootenv->vbe_bootenv, nvbuf, nvsize);
+		}
+		error = 0;
+		break;
+
+	case VB_NVLIST:
+		error = nvlist_pack(env, &nvbuf, &nvsize, NV_ENCODE_XDR,
+		    KM_SLEEP);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	if (error == 0) {
+		bootenv->vbe_version = htonll(bootenv->vbe_version);
+		abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE);
+	} else {
+		abd_free(abd);
+		return (SET_ERROR(error));
+	}
+
+retry:
+	zio = zio_root(spa, NULL, NULL, flags);
+	for (int l = 0; l < VDEV_LABELS; l++) {
+		vdev_label_write(zio, vd, l, abd,
+		    offsetof(vdev_label_t, vl_be),
+		    VDEV_PAD_SIZE, NULL, NULL, flags);
+	}
+
+	error = zio_wait(zio);
+	if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
+		flags |= ZIO_FLAG_TRYHARD;
+		goto retry;
+	}
+
+	abd_free(abd);
+	return (error);
+}
+
+/*
  * ==========================================================================
  * uberblock load/sync
  * ==========================================================================
@@ -1197,18 +1425,18 @@
 static int
 vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
 {
-	int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
+	int cmp = TREE_CMP(ub1->ub_txg, ub2->ub_txg);
 
 	if (likely(cmp))
 		return (cmp);
 
-	cmp = AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp);
+	cmp = TREE_CMP(ub1->ub_timestamp, ub2->ub_timestamp);
 	if (likely(cmp))
 		return (cmp);
 
 	/*
 	 * If MMP_VALID(ub) && MMP_SEQ_VALID(ub) then the host has an MMP-aware
-	 * ZFS, e.g. zfsonlinux >= 0.7.
+	 * ZFS, e.g. OpenZFS >= 0.7.
 	 *
 	 * If one ub has MMP and the other does not, they were written by
 	 * different hosts, which matters for MMP.  So we treat no MMP/no SEQ as
@@ -1226,7 +1454,7 @@
 	if (MMP_VALID(ub2) && MMP_SEQ_VALID(ub2))
 		seq2 = MMP_SEQ(ub2);
 
-	return (AVL_CMP(seq1, seq2));
+	return (TREE_CMP(seq1, seq2));
 }
 
 struct ubl_cbdata {
@@ -1271,7 +1499,8 @@
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
 
-	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
+	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd) &&
+	    vd->vdev_ops != &vdev_draid_spare_ops) {
 		for (int l = 0; l < VDEV_LABELS; l++) {
 			for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
 				vdev_label_read(zio, vd, l,
@@ -1360,6 +1589,13 @@
 	    SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
+	/*
+	 * No uberblocks are stored on distributed spares, they may be
+	 * safely skipped when expanding a leaf vdev.
+	 */
+	if (vd->vdev_ops == &vdev_draid_spare_ops)
+		return;
+
 	spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);
 
 	ub_abd = abd_alloc_linear(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
@@ -1421,6 +1657,15 @@
 	if (!vdev_writeable(vd))
 		return;
 
+	/*
+	 * There's no need to write uberblocks to a distributed spare, they
+	 * are already stored on all the leaves of the parent dRAID.  For
+	 * this same reason vdev_uberblock_load_impl() skips distributed
+	 * spares when reading uberblocks.
+	 */
+	if (vd->vdev_ops == &vdev_draid_spare_ops)
+		return;
+
 	/* If the vdev was expanded, need to copy uberblock rings. */
 	if (vd->vdev_state == VDEV_STATE_HEALTHY &&
 	    vd->vdev_copy_uberblocks == B_TRUE) {
@@ -1446,7 +1691,7 @@
 }
 
 /* Sync the uberblocks to all vdevs in svd[] */
-int
+static int
 vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
 {
 	spa_t *spa = svd[0]->vdev_spa;
@@ -1538,6 +1783,14 @@
 		return;
 
 	/*
+	 * The top-level config never needs to be written to a distributed
+	 * spare.  When read vdev_dspare_label_read_config() will generate
+	 * the config for the vdev_label_read_config().
+	 */
+	if (vd->vdev_ops == &vdev_draid_spare_ops)
+		return;
+
+	/*
 	 * Generate a label describing the top-level config to which we belong.
 	 */
 	label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE);
@@ -1563,7 +1816,7 @@
 	nvlist_free(label);
 }
 
-int
+static int
 vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
 {
 	list_t *dl = &spa->spa_config_dirty_list;

diff --git a/zfs/module/zfs/vdev_mirror.c b/zfs/module/zfs/vdev_mirror.c
index cf8402d..d80a767 100644
--- a/zfs/module/zfs/vdev_mirror.c
+++ b/zfs/module/zfs/vdev_mirror.c

@@ -33,7 +33,9 @@
 #include <sys/dsl_pool.h>
 #include <sys/dsl_scan.h>
 #include <sys/vdev_impl.h>
+#include <sys/vdev_draid.h>
 #include <sys/zio.h>
+#include <sys/zio_checksum.h>
 #include <sys/abd.h>
 #include <sys/fs/zfs.h>
 
@@ -99,15 +101,16 @@
 /*
  * Virtual device vector for mirroring.
  */
-
 typedef struct mirror_child {
 	vdev_t		*mc_vd;
+	abd_t		*mc_abd;
 	uint64_t	mc_offset;
 	int		mc_error;
 	int		mc_load;
 	uint8_t		mc_tried;
 	uint8_t		mc_skipped;
 	uint8_t		mc_speculative;
+	uint8_t		mc_rebuilding;
 } mirror_child_t;
 
 typedef struct mirror_map {
@@ -115,6 +118,7 @@
 	int		mm_preferred_cnt;
 	int		mm_children;
 	boolean_t	mm_resilvering;
+	boolean_t	mm_rebuilding;
 	boolean_t	mm_root;
 	mirror_child_t	mm_child[];
 } mirror_map_t;
@@ -172,7 +176,6 @@
 
 static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
 	.vsd_free = vdev_mirror_map_free,
-	.vsd_cksum_report = zio_vsd_default_cksum_report
 };
 
 static int
@@ -239,6 +242,21 @@
 	return (load + zfs_vdev_mirror_rotating_seek_inc);
 }
 
+static boolean_t
+vdev_mirror_rebuilding(vdev_t *vd)
+{
+	if (vd->vdev_ops->vdev_op_leaf && vd->vdev_rebuild_txg)
+		return (B_TRUE);
+
+	for (int i = 0; i < vd->vdev_children; i++) {
+		if (vdev_mirror_rebuilding(vd->vdev_child[i])) {
+			return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
 /*
  * Avoid inlining the function to keep vdev_mirror_io_start(), which
  * is this functions only caller, as small as possible on the stack.
@@ -356,17 +374,18 @@
 			mc = &mm->mm_child[c];
 			mc->mc_vd = vd->vdev_child[c];
 			mc->mc_offset = zio->io_offset;
+
+			if (vdev_mirror_rebuilding(mc->mc_vd))
+				mm->mm_rebuilding = mc->mc_rebuilding = B_TRUE;
 		}
 	}
 
-	zio->io_vsd = mm;
-	zio->io_vsd_ops = &vdev_mirror_vsd_ops;
 	return (mm);
 }
 
 static int
 vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	int numerrors = 0;
 	int lasterror = 0;
@@ -389,7 +408,15 @@
 
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
-		*ashift = MAX(*ashift, cvd->vdev_ashift);
+		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+	}
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_open_error)
+			continue;
+		*physical_ashift = vdev_best_ashift(*logical_ashift,
+		    *physical_ashift, cvd->vdev_physical_ashift);
 	}
 
 	if (numerrors == vd->vdev_children) {
@@ -420,32 +447,6 @@
 	mc->mc_skipped = 0;
 }
 
-static void
-vdev_mirror_scrub_done(zio_t *zio)
-{
-	mirror_child_t *mc = zio->io_private;
-
-	if (zio->io_error == 0) {
-		zio_t *pio;
-		zio_link_t *zl = NULL;
-
-		mutex_enter(&zio->io_lock);
-		while ((pio = zio_walk_parents(zio, &zl)) != NULL) {
-			mutex_enter(&pio->io_lock);
-			ASSERT3U(zio->io_size, >=, pio->io_size);
-			abd_copy(pio->io_abd, zio->io_abd, pio->io_size);
-			mutex_exit(&pio->io_lock);
-		}
-		mutex_exit(&zio->io_lock);
-	}
-
-	abd_free(zio->io_abd);
-
-	mc->mc_error = zio->io_error;
-	mc->mc_tried = 1;
-	mc->mc_skipped = 0;
-}
-
 /*
  * Check the other, lower-index DVAs to see if they're on the same
  * vdev as the child we picked.  If they are, use them since they
@@ -477,7 +478,7 @@
 	int p;
 
 	if (mm->mm_root) {
-		p = spa_get_random(mm->mm_preferred_cnt);
+		p = random_in_range(mm->mm_preferred_cnt);
 		return (vdev_mirror_dva_select(zio, p));
 	}
 
@@ -491,12 +492,37 @@
 	return (mm->mm_preferred[p]);
 }
 
+static boolean_t
+vdev_mirror_child_readable(mirror_child_t *mc)
+{
+	vdev_t *vd = mc->mc_vd;
+
+	if (vd->vdev_top != NULL && vd->vdev_top->vdev_ops == &vdev_draid_ops)
+		return (vdev_draid_readable(vd, mc->mc_offset));
+	else
+		return (vdev_readable(vd));
+}
+
+static boolean_t
+vdev_mirror_child_missing(mirror_child_t *mc, uint64_t txg, uint64_t size)
+{
+	vdev_t *vd = mc->mc_vd;
+
+	if (vd->vdev_top != NULL && vd->vdev_top->vdev_ops == &vdev_draid_ops)
+		return (vdev_draid_missing(vd, mc->mc_offset, txg, size));
+	else
+		return (vdev_dtl_contains(vd, DTL_MISSING, txg, size));
+}
+
 /*
  * Try to find a vdev whose DTL doesn't contain the block we want to read
- * preferring vdevs based on determined load.
+ * preferring vdevs based on determined load. If we can't, try the read on
+ * any vdev we haven't already tried.
  *
- * Try to find a child whose DTL doesn't contain the block we want to read.
- * If we can't, try the read on any vdev we haven't already tried.
+ * Distributed spares are an exception to the above load rule. They are
+ * always preferred in order to detect gaps in the distributed spare which
+ * are created when another disk in the dRAID fails. In order to restore
+ * redundancy those gaps must be read to trigger the required repair IO.
  */
 static int
 vdev_mirror_child_select(zio_t *zio)
@@ -516,20 +542,27 @@
 		if (mc->mc_tried || mc->mc_skipped)
 			continue;
 
-		if (mc->mc_vd == NULL || !vdev_readable(mc->mc_vd)) {
+		if (mc->mc_vd == NULL ||
+		    !vdev_mirror_child_readable(mc)) {
 			mc->mc_error = SET_ERROR(ENXIO);
 			mc->mc_tried = 1;	/* don't even try */
 			mc->mc_skipped = 1;
 			continue;
 		}
 
-		if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
+		if (vdev_mirror_child_missing(mc, txg, 1)) {
 			mc->mc_error = SET_ERROR(ESTALE);
 			mc->mc_skipped = 1;
 			mc->mc_speculative = 1;
 			continue;
 		}
 
+		if (mc->mc_vd->vdev_ops == &vdev_draid_spare_ops) {
+			mm->mm_preferred[0] = c;
+			mm->mm_preferred_cnt = 1;
+			break;
+		}
+
 		mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
 		if (mc->mc_load > lowest_load)
 			continue;
@@ -575,6 +608,8 @@
 	int c, children;
 
 	mm = vdev_mirror_map_init(zio);
+	zio->io_vsd = mm;
+	zio->io_vsd_ops = &vdev_mirror_vsd_ops;
 
 	if (mm == NULL) {
 		ASSERT(!spa_trust_config(zio->io_spa));
@@ -584,24 +619,35 @@
 	}
 
 	if (zio->io_type == ZIO_TYPE_READ) {
-		if (zio->io_bp != NULL &&
-		    (zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) {
+		if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) {
 			/*
-			 * For scrubbing reads (if we can verify the
-			 * checksum here, as indicated by io_bp being
-			 * non-NULL) we need to allocate a read buffer for
-			 * each child and issue reads to all children.  If
-			 * any child succeeds, it will copy its data into
-			 * zio->io_data in vdev_mirror_scrub_done.
+			 * For scrubbing reads we need to issue reads to all
+			 * children.  One child can reuse parent buffer, but
+			 * for others we have to allocate separate ones to
+			 * verify checksums if io_bp is non-NULL, or compare
+			 * them in vdev_mirror_io_done() otherwise.
 			 */
+			boolean_t first = B_TRUE;
 			for (c = 0; c < mm->mm_children; c++) {
 				mc = &mm->mm_child[c];
-				zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
-				    mc->mc_vd, mc->mc_offset,
+
+				/* Don't issue ZIOs to offline children */
+				if (!vdev_mirror_child_readable(mc)) {
+					mc->mc_error = SET_ERROR(ENXIO);
+					mc->mc_tried = 1;
+					mc->mc_skipped = 1;
+					continue;
+				}
+
+				mc->mc_abd = first ? zio->io_abd :
 				    abd_alloc_sametype(zio->io_abd,
-				    zio->io_size), zio->io_size,
-				    zio->io_type, zio->io_priority, 0,
-				    vdev_mirror_scrub_done, mc));
+				    zio->io_size);
+				zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
+				    mc->mc_vd, mc->mc_offset, mc->mc_abd,
+				    zio->io_size, zio->io_type,
+				    zio->io_priority, 0,
+				    vdev_mirror_child_done, mc));
+				first = B_FALSE;
 			}
 			zio_execute(zio);
 			return;
@@ -623,11 +669,25 @@
 
 	while (children--) {
 		mc = &mm->mm_child[c];
+		c++;
+
+		/*
+		 * When sequentially resilvering only issue write repair
+		 * IOs to the vdev which is being rebuilt since performance
+		 * is limited by the slowest child.  This is an issue for
+		 * faster replacement devices such as distributed spares.
+		 */
+		if ((zio->io_priority == ZIO_PRIORITY_REBUILD) &&
+		    (zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
+		    !(zio->io_flags & ZIO_FLAG_SCRUB) &&
+		    mm->mm_rebuilding && !mc->mc_rebuilding) {
+			continue;
+		}
+
 		zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 		    mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size,
 		    zio->io_type, zio->io_priority, 0,
 		    vdev_mirror_child_done, mc));
-		c++;
 	}
 
 	zio_execute(zio);
@@ -655,6 +715,7 @@
 	int c;
 	int good_copies = 0;
 	int unexpected_errors = 0;
+	int last_good_copy = -1;
 
 	if (mm == NULL)
 		return;
@@ -666,6 +727,7 @@
 			if (!mc->mc_skipped)
 				unexpected_errors++;
 		} else if (mc->mc_tried) {
+			last_good_copy = c;
 			good_copies++;
 		}
 	}
@@ -679,7 +741,6 @@
 		 * no non-degraded top-level vdevs left, and not update DTLs
 		 * if we intend to reallocate.
 		 */
-		/* XXPOLICY */
 		if (good_copies != mm->mm_children) {
 			/*
 			 * Always require at least one good copy.
@@ -706,7 +767,6 @@
 	/*
 	 * If we don't have a good copy yet, keep trying other children.
 	 */
-	/* XXPOLICY */
 	if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
 		ASSERT(c >= 0 && c < mm->mm_children);
 		mc = &mm->mm_child[c];
@@ -718,7 +778,80 @@
 		return;
 	}
 
-	/* XXPOLICY */
+	if (zio->io_flags & ZIO_FLAG_SCRUB && !mm->mm_resilvering) {
+		abd_t *best_abd = NULL;
+		if (last_good_copy >= 0)
+			best_abd = mm->mm_child[last_good_copy].mc_abd;
+
+		/*
+		 * If we're scrubbing but don't have a BP available (because
+		 * this vdev is under a raidz or draid vdev) then the best we
+		 * can do is compare all of the copies read.  If they're not
+		 * identical then return a checksum error and the most likely
+		 * correct data.  The raidz code will issue a repair I/O if
+		 * possible.
+		 */
+		if (zio->io_bp == NULL) {
+			ASSERT(zio->io_vd->vdev_ops == &vdev_replacing_ops ||
+			    zio->io_vd->vdev_ops == &vdev_spare_ops);
+
+			abd_t *pref_abd = NULL;
+			for (c = 0; c < last_good_copy; c++) {
+				mc = &mm->mm_child[c];
+				if (mc->mc_error || !mc->mc_tried)
+					continue;
+
+				if (abd_cmp(mc->mc_abd, best_abd) != 0)
+					zio->io_error = SET_ERROR(ECKSUM);
+
+				/*
+				 * The distributed spare is always prefered
+				 * by vdev_mirror_child_select() so it's
+				 * considered to be the best candidate.
+				 */
+				if (pref_abd == NULL &&
+				    mc->mc_vd->vdev_ops ==
+				    &vdev_draid_spare_ops)
+					pref_abd = mc->mc_abd;
+
+				/*
+				 * In the absence of a preferred copy, use
+				 * the parent pointer to avoid a memory copy.
+				 */
+				if (mc->mc_abd == zio->io_abd)
+					best_abd = mc->mc_abd;
+			}
+			if (pref_abd)
+				best_abd = pref_abd;
+		} else {
+
+			/*
+			 * If we have a BP available, then checksums are
+			 * already verified and we just need a buffer
+			 * with valid data, preferring parent one to
+			 * avoid a memory copy.
+			 */
+			for (c = 0; c < last_good_copy; c++) {
+				mc = &mm->mm_child[c];
+				if (mc->mc_error || !mc->mc_tried)
+					continue;
+				if (mc->mc_abd == zio->io_abd) {
+					best_abd = mc->mc_abd;
+					break;
+				}
+			}
+		}
+
+		if (best_abd && best_abd != zio->io_abd)
+			abd_copy(zio->io_abd, best_abd, zio->io_size);
+		for (c = 0; c < mm->mm_children; c++) {
+			mc = &mm->mm_child[c];
+			if (mc->mc_abd != zio->io_abd)
+				abd_free(mc->mc_abd);
+			mc->mc_abd = NULL;
+		}
+	}
+
 	if (good_copies == 0) {
 		zio->io_error = vdev_mirror_worst_error(mm);
 		ASSERT(zio->io_error != 0);
@@ -742,6 +875,8 @@
 			mc = &mm->mm_child[c];
 
 			if (mc->mc_error == 0) {
+				vdev_ops_t *ops = mc->mc_vd->vdev_ops;
+
 				if (mc->mc_tried)
 					continue;
 				/*
@@ -750,15 +885,16 @@
 				 * 1. it's a scrub (in which case we have
 				 * tried everything that was healthy)
 				 *  - or -
-				 * 2. it's an indirect vdev (in which case
-				 * it could point to any other vdev, which
-				 * might have a bad DTL)
+				 * 2. it's an indirect or distributed spare
+				 * vdev (in which case it could point to any
+				 * other vdev, which might have a bad DTL)
 				 *  - or -
 				 * 3. the DTL indicates that this data is
 				 * missing from this vdev
 				 */
 				if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
-				    mc->mc_vd->vdev_ops != &vdev_indirect_ops &&
+				    ops != &vdev_indirect_ops &&
+				    ops != &vdev_draid_spare_ops &&
 				    !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
 				    zio->io_txg, 1))
 					continue;
@@ -767,8 +903,9 @@
 
 			zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 			    mc->mc_vd, mc->mc_offset,
-			    zio->io_abd, zio->io_size,
-			    ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
+			    zio->io_abd, zio->io_size, ZIO_TYPE_WRITE,
+			    zio->io_priority == ZIO_PRIORITY_REBUILD ?
+			    ZIO_PRIORITY_REBUILD : ZIO_PRIORITY_ASYNC_WRITE,
 			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
 			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
 		}
@@ -793,76 +930,110 @@
 	}
 }
 
+/*
+ * Return the maximum asize for a rebuild zio in the provided range.
+ */
+static uint64_t
+vdev_mirror_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
+    uint64_t max_segment)
+{
+	(void) start;
+
+	uint64_t psize = MIN(P2ROUNDUP(max_segment, 1 << vd->vdev_ashift),
+	    SPA_MAXBLOCKSIZE);
+
+	return (MIN(asize, vdev_psize_to_asize(vd, psize)));
+}
+
 vdev_ops_t vdev_mirror_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_mirror_open,
 	.vdev_op_close = vdev_mirror_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_mirror_io_start,
 	.vdev_op_io_done = vdev_mirror_io_done,
 	.vdev_op_state_change = vdev_mirror_state_change,
-	.vdev_op_need_resilver = NULL,
+	.vdev_op_need_resilver = vdev_default_need_resilver,
 	.vdev_op_hold = NULL,
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = vdev_mirror_rebuild_asize,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_MIRROR,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };
 
 vdev_ops_t vdev_replacing_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_mirror_open,
 	.vdev_op_close = vdev_mirror_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_mirror_io_start,
 	.vdev_op_io_done = vdev_mirror_io_done,
 	.vdev_op_state_change = vdev_mirror_state_change,
-	.vdev_op_need_resilver = NULL,
+	.vdev_op_need_resilver = vdev_default_need_resilver,
 	.vdev_op_hold = NULL,
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = vdev_mirror_rebuild_asize,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_REPLACING,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };
 
 vdev_ops_t vdev_spare_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_mirror_open,
 	.vdev_op_close = vdev_mirror_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_mirror_io_start,
 	.vdev_op_io_done = vdev_mirror_io_done,
 	.vdev_op_state_change = vdev_mirror_state_change,
-	.vdev_op_need_resilver = NULL,
+	.vdev_op_need_resilver = vdev_default_need_resilver,
 	.vdev_op_hold = NULL,
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
+	.vdev_op_rebuild_asize = vdev_mirror_rebuild_asize,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_SPARE,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };
 
-#if defined(_KERNEL)
 /* BEGIN CSTYLED */
-module_param(zfs_vdev_mirror_rotating_inc, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_mirror_rotating_inc,
+ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_inc, INT, ZMOD_RW,
 	"Rotating media load increment for non-seeking I/O's");
 
-module_param(zfs_vdev_mirror_rotating_seek_inc, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_mirror_rotating_seek_inc,
+ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_seek_inc, INT, ZMOD_RW,
 	"Rotating media load increment for seeking I/O's");
 
-module_param(zfs_vdev_mirror_rotating_seek_offset, int, 0644);
+ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_seek_offset, INT, ZMOD_RW,
+	"Offset in bytes from the last I/O which triggers "
+	"a reduced rotating media seek increment");
 
-MODULE_PARM_DESC(zfs_vdev_mirror_rotating_seek_offset,
-	"Offset in bytes from the last I/O which "
-	"triggers a reduced rotating media seek increment");
-
-module_param(zfs_vdev_mirror_non_rotating_inc, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_mirror_non_rotating_inc,
+ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, non_rotating_inc, INT, ZMOD_RW,
 	"Non-rotating media load increment for non-seeking I/O's");
 
-module_param(zfs_vdev_mirror_non_rotating_seek_inc, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_mirror_non_rotating_seek_inc,
+ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, non_rotating_seek_inc, INT, ZMOD_RW,
 	"Non-rotating media load increment for seeking I/O's");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/vdev_missing.c b/zfs/module/zfs/vdev_missing.c
index 205b23e..505df23 100644
--- a/zfs/module/zfs/vdev_missing.c
+++ b/zfs/module/zfs/vdev_missing.c

@@ -42,10 +42,9 @@
 #include <sys/fs/zfs.h>
 #include <sys/zio.h>
 
-/* ARGSUSED */
 static int
 vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *ashift, uint64_t *pshift)
 {
 	/*
 	 * Really this should just fail.  But then the root vdev will be in the
@@ -53,19 +52,20 @@
 	 * VDEV_AUX_BAD_GUID_SUM.  So we pretend to succeed, knowing that we
 	 * will fail the GUID sum check before ever trying to open the pool.
 	 */
+	(void) vd;
 	*psize = 0;
 	*max_psize = 0;
 	*ashift = 0;
+	*pshift = 0;
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 vdev_missing_close(vdev_t *vd)
 {
+	(void) vd;
 }
 
-/* ARGSUSED */
 static void
 vdev_missing_io_start(zio_t *zio)
 {
@@ -73,16 +73,20 @@
 	zio_execute(zio);
 }
 
-/* ARGSUSED */
 static void
 vdev_missing_io_done(zio_t *zio)
 {
+	(void) zio;
 }
 
 vdev_ops_t vdev_missing_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_missing_open,
 	.vdev_op_close = vdev_missing_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_missing_io_start,
 	.vdev_op_io_done = vdev_missing_io_done,
 	.vdev_op_state_change = NULL,
@@ -91,14 +95,23 @@
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = NULL,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_MISSING,	/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
 
 vdev_ops_t vdev_hole_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_missing_open,
 	.vdev_op_close = vdev_missing_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_missing_io_start,
 	.vdev_op_io_done = vdev_missing_io_done,
 	.vdev_op_state_change = NULL,
@@ -107,6 +120,11 @@
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = NULL,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_HOLE,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };

diff --git a/zfs/module/zfs/vdev_queue.c b/zfs/module/zfs/vdev_queue.c
index d3d9a6b..cc5b15b 100644
--- a/zfs/module/zfs/vdev_queue.c
+++ b/zfs/module/zfs/vdev_queue.c

@@ -35,8 +35,6 @@
 #include <sys/dsl_pool.h>
 #include <sys/metaslab_impl.h>
 #include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/kstat.h>
 #include <sys/abd.h>
 
 /*
@@ -121,16 +119,17 @@
 
 /*
  * The maximum number of i/os active to each device.  Ideally, this will be >=
- * the sum of each queue's max_active.  It must be at least the sum of each
- * queue's min_active.
+ * the sum of each queue's max_active.
  */
 uint32_t zfs_vdev_max_active = 1000;
 
 /*
  * Per-queue limits on the number of i/os active to each device.  If the
  * number of active i/os is < zfs_vdev_max_active, then the min_active comes
- * into play. We will send min_active from each queue, and then select from
- * queues in the order defined by zio_priority_t.
+ * into play.  We will send min_active from each queue round-robin, and then
+ * send from queues in the order defined by zio_priority_t up to max_active.
+ * Some queues have additional mechanisms to limit number of active I/Os in
+ * addition to min_active and max_active, see below.
  *
  * In general, smaller max_active's will lead to lower latency of synchronous
  * operations.  Larger max_active's may lead to higher overall throughput,
@@ -151,13 +150,15 @@
 uint32_t zfs_vdev_async_write_min_active = 2;
 uint32_t zfs_vdev_async_write_max_active = 10;
 uint32_t zfs_vdev_scrub_min_active = 1;
-uint32_t zfs_vdev_scrub_max_active = 2;
+uint32_t zfs_vdev_scrub_max_active = 3;
 uint32_t zfs_vdev_removal_min_active = 1;
 uint32_t zfs_vdev_removal_max_active = 2;
 uint32_t zfs_vdev_initializing_min_active = 1;
 uint32_t zfs_vdev_initializing_max_active = 1;
 uint32_t zfs_vdev_trim_min_active = 1;
 uint32_t zfs_vdev_trim_max_active = 2;
+uint32_t zfs_vdev_rebuild_min_active = 1;
+uint32_t zfs_vdev_rebuild_max_active = 3;
 
 /*
  * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
@@ -170,6 +171,28 @@
 int zfs_vdev_async_write_active_max_dirty_percent = 60;
 
 /*
+ * For non-interactive I/O (scrub, resilver, removal, initialize and rebuild),
+ * the number of concurrently-active I/O's is limited to *_min_active, unless
+ * the vdev is "idle".  When there are no interactive I/Os active (sync or
+ * async), and zfs_vdev_nia_delay I/Os have completed since the last
+ * interactive I/O, then the vdev is considered to be "idle", and the number
+ * of concurrently-active non-interactive I/O's is increased to *_max_active.
+ */
+uint_t zfs_vdev_nia_delay = 5;
+
+/*
+ * Some HDDs tend to prioritize sequential I/O so high that concurrent
+ * random I/O latency reaches several seconds.  On some HDDs it happens
+ * even if sequential I/Os are submitted one at a time, and so setting
+ * *_max_active to 1 does not help.  To prevent non-interactive I/Os, like
+ * scrub, from monopolizing the device no more than zfs_vdev_nia_credit
+ * I/Os can be sent while there are outstanding incomplete interactive
+ * I/Os.  This enforced wait ensures the HDD services the interactive I/O
+ * within a reasonable amount of time.
+ */
+uint_t zfs_vdev_nia_credit = 5;
+
+/*
  * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O.
  * For read I/Os, we also aggregate across small adjacency gaps; for writes
  * we include spans of optional I/Os to aid aggregation at the disk even when
@@ -212,18 +235,18 @@
  */
 int zfs_vdev_aggregate_trim = 0;
 
-int
+static int
 vdev_queue_offset_compare(const void *x1, const void *x2)
 {
 	const zio_t *z1 = (const zio_t *)x1;
 	const zio_t *z2 = (const zio_t *)x2;
 
-	int cmp = AVL_CMP(z1->io_offset, z2->io_offset);
+	int cmp = TREE_CMP(z1->io_offset, z2->io_offset);
 
 	if (likely(cmp))
 		return (cmp);
 
-	return (AVL_PCMP(z1, z2));
+	return (TREE_PCMP(z1, z2));
 }
 
 static inline avl_tree_t *
@@ -244,22 +267,22 @@
 		return (&vq->vq_trim_offset_tree);
 }
 
-int
+static int
 vdev_queue_timestamp_compare(const void *x1, const void *x2)
 {
 	const zio_t *z1 = (const zio_t *)x1;
 	const zio_t *z2 = (const zio_t *)x2;
 
-	int cmp = AVL_CMP(z1->io_timestamp, z2->io_timestamp);
+	int cmp = TREE_CMP(z1->io_timestamp, z2->io_timestamp);
 
 	if (likely(cmp))
 		return (cmp);
 
-	return (AVL_PCMP(z1, z2));
+	return (TREE_PCMP(z1, z2));
 }
 
 static int
-vdev_queue_class_min_active(zio_priority_t p)
+vdev_queue_class_min_active(vdev_queue_t *vq, zio_priority_t p)
 {
 	switch (p) {
 	case ZIO_PRIORITY_SYNC_READ:
@@ -271,13 +294,19 @@
 	case ZIO_PRIORITY_ASYNC_WRITE:
 		return (zfs_vdev_async_write_min_active);
 	case ZIO_PRIORITY_SCRUB:
-		return (zfs_vdev_scrub_min_active);
+		return (vq->vq_ia_active == 0 ? zfs_vdev_scrub_min_active :
+		    MIN(vq->vq_nia_credit, zfs_vdev_scrub_min_active));
 	case ZIO_PRIORITY_REMOVAL:
-		return (zfs_vdev_removal_min_active);
+		return (vq->vq_ia_active == 0 ? zfs_vdev_removal_min_active :
+		    MIN(vq->vq_nia_credit, zfs_vdev_removal_min_active));
 	case ZIO_PRIORITY_INITIALIZING:
-		return (zfs_vdev_initializing_min_active);
+		return (vq->vq_ia_active == 0 ?zfs_vdev_initializing_min_active:
+		    MIN(vq->vq_nia_credit, zfs_vdev_initializing_min_active));
 	case ZIO_PRIORITY_TRIM:
 		return (zfs_vdev_trim_min_active);
+	case ZIO_PRIORITY_REBUILD:
+		return (vq->vq_ia_active == 0 ? zfs_vdev_rebuild_min_active :
+		    MIN(vq->vq_nia_credit, zfs_vdev_rebuild_min_active));
 	default:
 		panic("invalid priority %u", p);
 		return (0);
@@ -307,14 +336,12 @@
 	 * Sync tasks correspond to interactive user actions. To reduce the
 	 * execution time of those actions we push data out as fast as possible.
 	 */
-	if (spa_has_pending_synctask(spa))
+	dirty = dp->dp_dirty_total;
+	if (dirty > max_bytes || spa_has_pending_synctask(spa))
 		return (zfs_vdev_async_write_max_active);
 
-	dirty = dp->dp_dirty_total;
 	if (dirty < min_bytes)
 		return (zfs_vdev_async_write_min_active);
-	if (dirty > max_bytes)
-		return (zfs_vdev_async_write_max_active);
 
 	/*
 	 * linear interpolation:
@@ -333,7 +360,7 @@
 }
 
 static int
-vdev_queue_class_max_active(spa_t *spa, zio_priority_t p)
+vdev_queue_class_max_active(spa_t *spa, vdev_queue_t *vq, zio_priority_t p)
 {
 	switch (p) {
 	case ZIO_PRIORITY_SYNC_READ:
@@ -345,13 +372,35 @@
 	case ZIO_PRIORITY_ASYNC_WRITE:
 		return (vdev_queue_max_async_writes(spa));
 	case ZIO_PRIORITY_SCRUB:
+		if (vq->vq_ia_active > 0) {
+			return (MIN(vq->vq_nia_credit,
+			    zfs_vdev_scrub_min_active));
+		} else if (vq->vq_nia_credit < zfs_vdev_nia_delay)
+			return (MAX(1, zfs_vdev_scrub_min_active));
 		return (zfs_vdev_scrub_max_active);
 	case ZIO_PRIORITY_REMOVAL:
+		if (vq->vq_ia_active > 0) {
+			return (MIN(vq->vq_nia_credit,
+			    zfs_vdev_removal_min_active));
+		} else if (vq->vq_nia_credit < zfs_vdev_nia_delay)
+			return (MAX(1, zfs_vdev_removal_min_active));
 		return (zfs_vdev_removal_max_active);
 	case ZIO_PRIORITY_INITIALIZING:
+		if (vq->vq_ia_active > 0) {
+			return (MIN(vq->vq_nia_credit,
+			    zfs_vdev_initializing_min_active));
+		} else if (vq->vq_nia_credit < zfs_vdev_nia_delay)
+			return (MAX(1, zfs_vdev_initializing_min_active));
 		return (zfs_vdev_initializing_max_active);
 	case ZIO_PRIORITY_TRIM:
 		return (zfs_vdev_trim_max_active);
+	case ZIO_PRIORITY_REBUILD:
+		if (vq->vq_ia_active > 0) {
+			return (MIN(vq->vq_nia_credit,
+			    zfs_vdev_rebuild_min_active));
+		} else if (vq->vq_nia_credit < zfs_vdev_nia_delay)
+			return (MAX(1, zfs_vdev_rebuild_min_active));
+		return (zfs_vdev_rebuild_max_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
@@ -366,17 +415,24 @@
 vdev_queue_class_to_issue(vdev_queue_t *vq)
 {
 	spa_t *spa = vq->vq_vdev->vdev_spa;
-	zio_priority_t p;
+	zio_priority_t p, n;
 
 	if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
 		return (ZIO_PRIORITY_NUM_QUEUEABLE);
 
-	/* find a queue that has not reached its minimum # outstanding i/os */
-	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
+	/*
+	 * Find a queue that has not reached its minimum # outstanding i/os.
+	 * Do round-robin to reduce starvation due to zfs_vdev_max_active
+	 * and vq_nia_credit limits.
+	 */
+	for (n = 0; n < ZIO_PRIORITY_NUM_QUEUEABLE; n++) {
+		p = (vq->vq_last_prio + n + 1) % ZIO_PRIORITY_NUM_QUEUEABLE;
 		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
 		    vq->vq_class[p].vqc_active <
-		    vdev_queue_class_min_active(p))
+		    vdev_queue_class_min_active(vq, p)) {
+			vq->vq_last_prio = p;
 			return (p);
+		}
 	}
 
 	/*
@@ -386,8 +442,10 @@
 	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
 		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
 		    vq->vq_class[p].vqc_active <
-		    vdev_queue_class_max_active(spa, p))
+		    vdev_queue_class_max_active(spa, vq, p)) {
+			vq->vq_last_prio = p;
 			return (p);
+		}
 	}
 
 	/* No eligible queued i/os */
@@ -456,94 +514,67 @@
 static void
 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 {
-	spa_t *spa = zio->io_spa;
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
 	avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
-
-	if (shk->kstat != NULL) {
-		mutex_enter(&shk->lock);
-		kstat_waitq_enter(shk->kstat->ks_data);
-		mutex_exit(&shk->lock);
-	}
 }
 
 static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
-	spa_t *spa = zio->io_spa;
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
 	avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
+}
 
-	if (shk->kstat != NULL) {
-		mutex_enter(&shk->lock);
-		kstat_waitq_exit(shk->kstat->ks_data);
-		mutex_exit(&shk->lock);
+static boolean_t
+vdev_queue_is_interactive(zio_priority_t p)
+{
+	switch (p) {
+	case ZIO_PRIORITY_SCRUB:
+	case ZIO_PRIORITY_REMOVAL:
+	case ZIO_PRIORITY_INITIALIZING:
+	case ZIO_PRIORITY_REBUILD:
+		return (B_FALSE);
+	default:
+		return (B_TRUE);
 	}
 }
 
 static void
 vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
 {
-	spa_t *spa = zio->io_spa;
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	vq->vq_class[zio->io_priority].vqc_active++;
-	avl_add(&vq->vq_active_tree, zio);
-
-	if (shk->kstat != NULL) {
-		mutex_enter(&shk->lock);
-		kstat_runq_enter(shk->kstat->ks_data);
-		mutex_exit(&shk->lock);
+	if (vdev_queue_is_interactive(zio->io_priority)) {
+		if (++vq->vq_ia_active == 1)
+			vq->vq_nia_credit = 1;
+	} else if (vq->vq_ia_active > 0) {
+		vq->vq_nia_credit--;
 	}
+	avl_add(&vq->vq_active_tree, zio);
 }
 
 static void
 vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
 {
-	spa_t *spa = zio->io_spa;
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	vq->vq_class[zio->io_priority].vqc_active--;
+	if (vdev_queue_is_interactive(zio->io_priority)) {
+		if (--vq->vq_ia_active == 0)
+			vq->vq_nia_credit = 0;
+		else
+			vq->vq_nia_credit = zfs_vdev_nia_credit;
+	} else if (vq->vq_ia_active == 0)
+		vq->vq_nia_credit++;
 	avl_remove(&vq->vq_active_tree, zio);
-
-	if (shk->kstat != NULL) {
-		kstat_io_t *ksio = shk->kstat->ks_data;
-
-		mutex_enter(&shk->lock);
-		kstat_runq_exit(ksio);
-		if (zio->io_type == ZIO_TYPE_READ) {
-			ksio->reads++;
-			ksio->nread += zio->io_size;
-		} else if (zio->io_type == ZIO_TYPE_WRITE) {
-			ksio->writes++;
-			ksio->nwritten += zio->io_size;
-		}
-		mutex_exit(&shk->lock);
-	}
 }
 
 static void
 vdev_queue_agg_io_done(zio_t *aio)
 {
-	if (aio->io_type == ZIO_TYPE_READ) {
-		zio_t *pio;
-		zio_link_t *zl = NULL;
-		while ((pio = zio_walk_parents(aio, &zl)) != NULL) {
-			abd_copy_off(pio->io_abd, aio->io_abd,
-			    0, pio->io_offset - aio->io_offset, pio->io_size);
-		}
-	}
-
 	abd_free(aio->io_abd);
 }
 
@@ -556,11 +587,18 @@
 #define	IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
 #define	IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
 
+/*
+ * Sufficiently adjacent io_offset's in ZIOs will be aggregated. We do this
+ * by creating a gang ABD from the adjacent ZIOs io_abd's. By using
+ * a gang ABD we avoid doing memory copies to and from the parent,
+ * child ZIOs. The gang ABD also accounts for gaps between adjacent
+ * io_offsets by simply getting the zero ABD for writes or allocating
+ * a new ABD for reads and placing them in the gang ABD as well.
+ */
 static zio_t *
 vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
 {
 	zio_t *first, *last, *aio, *dio, *mandatory, *nio;
-	zio_link_t *zl = NULL;
 	uint64_t maxgap = 0;
 	uint64_t size;
 	uint64_t limit;
@@ -568,6 +606,7 @@
 	boolean_t stretch = B_FALSE;
 	avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
 	enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+	uint64_t next_offset;
 	abd_t *abd;
 
 	maxblocksize = spa_maxblocksize(vq->vq_vdev->vdev_spa);
@@ -587,6 +626,13 @@
 	if (zio->io_type == ZIO_TYPE_TRIM && !zfs_vdev_aggregate_trim)
 		return (NULL);
 
+	/*
+	 * I/Os to distributed spares are directly dispatched to the dRAID
+	 * leaf vdevs for aggregation.  See the comment at the end of the
+	 * zio_vdev_io_start() function.
+	 */
+	ASSERT(vq->vq_vdev->vdev_ops != &vdev_draid_spare_ops);
+
 	first = last = zio;
 
 	if (zio->io_type == ZIO_TYPE_READ)
@@ -695,7 +741,7 @@
 	size = IO_SPAN(first, last);
 	ASSERT3U(size, <=, maxblocksize);
 
-	abd = abd_alloc_for_io(size, B_TRUE);
+	abd = abd_alloc_gang();
 	if (abd == NULL)
 		return (NULL);
 
@@ -706,37 +752,56 @@
 	aio->io_timestamp = first->io_timestamp;
 
 	nio = first;
+	next_offset = first->io_offset;
 	do {
 		dio = nio;
 		nio = AVL_NEXT(t, dio);
 		zio_add_child(dio, aio);
 		vdev_queue_io_remove(vq, dio);
+
+		if (dio->io_offset != next_offset) {
+			/* allocate a buffer for a read gap */
+			ASSERT3U(dio->io_type, ==, ZIO_TYPE_READ);
+			ASSERT3U(dio->io_offset, >, next_offset);
+			abd = abd_alloc_for_io(
+			    dio->io_offset - next_offset, B_TRUE);
+			abd_gang_add(aio->io_abd, abd, B_TRUE);
+		}
+		if (dio->io_abd &&
+		    (dio->io_size != abd_get_size(dio->io_abd))) {
+			/* abd size not the same as IO size */
+			ASSERT3U(abd_get_size(dio->io_abd), >, dio->io_size);
+			abd = abd_get_offset_size(dio->io_abd, 0, dio->io_size);
+			abd_gang_add(aio->io_abd, abd, B_TRUE);
+		} else {
+			if (dio->io_flags & ZIO_FLAG_NODATA) {
+				/* allocate a buffer for a write gap */
+				ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
+				ASSERT3P(dio->io_abd, ==, NULL);
+				abd_gang_add(aio->io_abd,
+				    abd_get_zeros(dio->io_size), B_TRUE);
+			} else {
+				/*
+				 * We pass B_FALSE to abd_gang_add()
+				 * because we did not allocate a new
+				 * ABD, so it is assumed the caller
+				 * will free this ABD.
+				 */
+				abd_gang_add(aio->io_abd, dio->io_abd,
+				    B_FALSE);
+			}
+		}
+		next_offset = dio->io_offset + dio->io_size;
 	} while (dio != last);
+	ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size);
 
 	/*
-	 * We need to drop the vdev queue's lock during zio_execute() to
-	 * avoid a deadlock that we could encounter due to lock order
-	 * reversal between vq_lock and io_lock in zio_change_priority().
-	 * Use the dropped lock to do memory copy without congestion.
+	 * Callers must call zio_vdev_io_bypass() and zio_execute() for
+	 * aggregated (parent) I/Os so that we could avoid dropping the
+	 * queue's lock here to avoid a deadlock that we could encounter
+	 * due to lock order reversal between vq_lock and io_lock in
+	 * zio_change_priority().
 	 */
-	mutex_exit(&vq->vq_lock);
-	while ((dio = zio_walk_parents(aio, &zl)) != NULL) {
-		ASSERT3U(dio->io_type, ==, aio->io_type);
-
-		if (dio->io_flags & ZIO_FLAG_NODATA) {
-			ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
-			abd_zero_off(aio->io_abd,
-			    dio->io_offset - aio->io_offset, dio->io_size);
-		} else if (dio->io_type == ZIO_TYPE_WRITE) {
-			abd_copy_off(aio->io_abd, dio->io_abd,
-			    dio->io_offset - aio->io_offset, 0, dio->io_size);
-		}
-
-		zio_vdev_io_bypass(dio);
-		zio_execute(dio);
-	}
-	mutex_enter(&vq->vq_lock);
-
 	return (aio);
 }
 
@@ -774,23 +839,24 @@
 	ASSERT3U(zio->io_priority, ==, p);
 
 	aio = vdev_queue_aggregate(vq, zio);
-	if (aio != NULL)
+	if (aio != NULL) {
 		zio = aio;
-	else
+	} else {
 		vdev_queue_io_remove(vq, zio);
 
-	/*
-	 * If the I/O is or was optional and therefore has no data, we need to
-	 * simply discard it. We need to drop the vdev queue's lock to avoid a
-	 * deadlock that we could encounter since this I/O will complete
-	 * immediately.
-	 */
-	if (zio->io_flags & ZIO_FLAG_NODATA) {
-		mutex_exit(&vq->vq_lock);
-		zio_vdev_io_bypass(zio);
-		zio_execute(zio);
-		mutex_enter(&vq->vq_lock);
-		goto again;
+		/*
+		 * If the I/O is or was optional and therefore has no data, we
+		 * need to simply discard it. We need to drop the vdev queue's
+		 * lock to avoid a deadlock that we could encounter since this
+		 * I/O will complete immediately.
+		 */
+		if (zio->io_flags & ZIO_FLAG_NODATA) {
+			mutex_exit(&vq->vq_lock);
+			zio_vdev_io_bypass(zio);
+			zio_execute(zio);
+			mutex_enter(&vq->vq_lock);
+			goto again;
+		}
 	}
 
 	vdev_queue_pending_add(vq, zio);
@@ -803,7 +869,8 @@
 vdev_queue_io(zio_t *zio)
 {
 	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
-	zio_t *nio;
+	zio_t *dio, *nio;
+	zio_link_t *zl = NULL;
 
 	if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
 		return (zio);
@@ -819,7 +886,8 @@
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
 		    zio->io_priority != ZIO_PRIORITY_SCRUB &&
 		    zio->io_priority != ZIO_PRIORITY_REMOVAL &&
-		    zio->io_priority != ZIO_PRIORITY_INITIALIZING) {
+		    zio->io_priority != ZIO_PRIORITY_INITIALIZING &&
+		    zio->io_priority != ZIO_PRIORITY_REBUILD) {
 			zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
 		}
 	} else if (zio->io_type == ZIO_TYPE_WRITE) {
@@ -828,7 +896,8 @@
 		if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE &&
 		    zio->io_priority != ZIO_PRIORITY_REMOVAL &&
-		    zio->io_priority != ZIO_PRIORITY_INITIALIZING) {
+		    zio->io_priority != ZIO_PRIORITY_INITIALIZING &&
+		    zio->io_priority != ZIO_PRIORITY_REBUILD) {
 			zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
 		}
 	} else {
@@ -837,9 +906,9 @@
 	}
 
 	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
+	zio->io_timestamp = gethrtime();
 
 	mutex_enter(&vq->vq_lock);
-	zio->io_timestamp = gethrtime();
 	vdev_queue_io_add(vq, zio);
 	nio = vdev_queue_io_to_issue(vq);
 	mutex_exit(&vq->vq_lock);
@@ -848,6 +917,11 @@
 		return (NULL);
 
 	if (nio->io_done == vdev_queue_agg_io_done) {
+		while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
+			ASSERT3U(dio->io_type, ==, nio->io_type);
+			zio_vdev_io_bypass(dio);
+			zio_execute(dio);
+		}
 		zio_nowait(nio);
 		return (NULL);
 	}
@@ -859,19 +933,24 @@
 vdev_queue_io_done(zio_t *zio)
 {
 	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
-	zio_t *nio;
+	zio_t *dio, *nio;
+	zio_link_t *zl = NULL;
+
+	hrtime_t now = gethrtime();
+	vq->vq_io_complete_ts = now;
+	vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;
 
 	mutex_enter(&vq->vq_lock);
-
 	vdev_queue_pending_remove(vq, zio);
 
-	zio->io_delta = gethrtime() - zio->io_timestamp;
-	vq->vq_io_complete_ts = gethrtime();
-	vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
-
 	while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
 		mutex_exit(&vq->vq_lock);
 		if (nio->io_done == vdev_queue_agg_io_done) {
+			while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
+				ASSERT3U(dio->io_type, ==, nio->io_type);
+				zio_vdev_io_bypass(dio);
+				zio_execute(dio);
+			}
 			zio_nowait(nio);
 		} else {
 			zio_vdev_io_reissue(nio);
@@ -952,99 +1031,91 @@
 	return (vd->vdev_queue.vq_last_offset);
 }
 
-#if defined(_KERNEL)
-module_param(zfs_vdev_aggregation_limit, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_aggregation_limit, "Max vdev I/O aggregation size");
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregation_limit, INT, ZMOD_RW,
+	"Max vdev I/O aggregation size");
 
-module_param(zfs_vdev_aggregation_limit_non_rotating, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_aggregation_limit_non_rotating,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregation_limit_non_rotating, INT, ZMOD_RW,
 	"Max vdev I/O aggregation size for non-rotating media");
 
-module_param(zfs_vdev_aggregate_trim, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_aggregate_trim, "Allow TRIM I/O to be aggregated");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregate_trim, INT, ZMOD_RW,
+	"Allow TRIM I/O to be aggregated");
 
-module_param(zfs_vdev_read_gap_limit, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_read_gap_limit, "Aggregate read I/O over gap");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, read_gap_limit, INT, ZMOD_RW,
+	"Aggregate read I/O over gap");
 
-module_param(zfs_vdev_write_gap_limit, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_write_gap_limit, "Aggregate write I/O over gap");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, write_gap_limit, INT, ZMOD_RW,
+	"Aggregate write I/O over gap");
 
-module_param(zfs_vdev_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_max_active, "Maximum number of active I/Os per vdev");
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, max_active, INT, ZMOD_RW,
+	"Maximum number of active I/Os per vdev");
 
-module_param(zfs_vdev_async_write_active_max_dirty_percent, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_async_write_active_max_dirty_percent,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, async_write_active_max_dirty_percent, INT, ZMOD_RW,
 	"Async write concurrency max threshold");
 
-module_param(zfs_vdev_async_write_active_min_dirty_percent, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_async_write_active_min_dirty_percent,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, async_write_active_min_dirty_percent, INT, ZMOD_RW,
 	"Async write concurrency min threshold");
 
-module_param(zfs_vdev_async_read_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_async_read_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, async_read_max_active, INT, ZMOD_RW,
 	"Max active async read I/Os per vdev");
 
-module_param(zfs_vdev_async_read_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_async_read_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, async_read_min_active, INT, ZMOD_RW,
 	"Min active async read I/Os per vdev");
 
-module_param(zfs_vdev_async_write_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_async_write_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, async_write_max_active, INT, ZMOD_RW,
 	"Max active async write I/Os per vdev");
 
-module_param(zfs_vdev_async_write_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_async_write_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, async_write_min_active, INT, ZMOD_RW,
 	"Min active async write I/Os per vdev");
 
-module_param(zfs_vdev_initializing_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_initializing_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, initializing_max_active, INT, ZMOD_RW,
 	"Max active initializing I/Os per vdev");
 
-module_param(zfs_vdev_initializing_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_initializing_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, initializing_min_active, INT, ZMOD_RW,
 	"Min active initializing I/Os per vdev");
 
-module_param(zfs_vdev_removal_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_removal_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, removal_max_active, INT, ZMOD_RW,
 	"Max active removal I/Os per vdev");
 
-module_param(zfs_vdev_removal_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_removal_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, removal_min_active, INT, ZMOD_RW,
 	"Min active removal I/Os per vdev");
 
-module_param(zfs_vdev_scrub_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_scrub_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, scrub_max_active, INT, ZMOD_RW,
 	"Max active scrub I/Os per vdev");
 
-module_param(zfs_vdev_scrub_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_scrub_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, scrub_min_active, INT, ZMOD_RW,
 	"Min active scrub I/Os per vdev");
 
-module_param(zfs_vdev_sync_read_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_sync_read_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, sync_read_max_active, INT, ZMOD_RW,
 	"Max active sync read I/Os per vdev");
 
-module_param(zfs_vdev_sync_read_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_sync_read_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, sync_read_min_active, INT, ZMOD_RW,
 	"Min active sync read I/Os per vdev");
 
-module_param(zfs_vdev_sync_write_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_sync_write_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, sync_write_max_active, INT, ZMOD_RW,
 	"Max active sync write I/Os per vdev");
 
-module_param(zfs_vdev_sync_write_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_sync_write_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, sync_write_min_active, INT, ZMOD_RW,
 	"Min active sync write I/Os per vdev");
 
-module_param(zfs_vdev_trim_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_trim_max_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, trim_max_active, INT, ZMOD_RW,
 	"Max active trim/discard I/Os per vdev");
 
-module_param(zfs_vdev_trim_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_trim_min_active,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, trim_min_active, INT, ZMOD_RW,
 	"Min active trim/discard I/Os per vdev");
 
-module_param(zfs_vdev_queue_depth_pct, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_queue_depth_pct,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, rebuild_max_active, INT, ZMOD_RW,
+	"Max active rebuild I/Os per vdev");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, rebuild_min_active, INT, ZMOD_RW,
+	"Min active rebuild I/Os per vdev");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, nia_credit, INT, ZMOD_RW,
+	"Number of non-interactive I/Os to allow in sequence");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, nia_delay, INT, ZMOD_RW,
+	"Number of non-interactive I/Os before _max_active");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, queue_depth_pct, INT, ZMOD_RW,
 	"Queue depth percentage for each top-level vdev");
-#endif
+/* END CSTYLED */

diff --git a/zfs/module/zfs/vdev_raidz.c b/zfs/module/zfs/vdev_raidz.c
index f63ccaa..5c25007 100644
--- a/zfs/module/zfs/vdev_raidz.c
+++ b/zfs/module/zfs/vdev_raidz.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2016 Gvozden Nešković. All rights reserved.
  */
 
@@ -35,6 +35,7 @@
 #include <sys/fm/fs/zfs.h>
 #include <sys/vdev_raidz.h>
 #include <sys/vdev_raidz_impl.h>
+#include <sys/vdev_draid.h>
 
 #ifdef ZFS_DEBUG
 #include <sys/vdev.h>	/* For vdev_xlate() in vdev_raidz_io_verify() */
@@ -134,25 +135,31 @@
 	VDEV_RAIDZ_64MUL_2((x), mask); \
 }
 
+static void
+vdev_raidz_row_free(raidz_row_t *rr)
+{
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		if (rc->rc_size != 0)
+			abd_free(rc->rc_abd);
+		if (rc->rc_orig_data != NULL)
+			abd_free(rc->rc_orig_data);
+	}
+
+	if (rr->rr_abd_empty != NULL)
+		abd_free(rr->rr_abd_empty);
+
+	kmem_free(rr, offsetof(raidz_row_t, rr_col[rr->rr_scols]));
+}
+
 void
 vdev_raidz_map_free(raidz_map_t *rm)
 {
-	int c;
+	for (int i = 0; i < rm->rm_nrows; i++)
+		vdev_raidz_row_free(rm->rm_row[i]);
 
-	for (c = 0; c < rm->rm_firstdatacol; c++) {
-		abd_free(rm->rm_col[c].rc_abd);
-
-		if (rm->rm_col[c].rc_gdata != NULL)
-			abd_free(rm->rm_col[c].rc_gdata);
-	}
-
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
-		abd_put(rm->rm_col[c].rc_abd);
-
-	if (rm->rm_abd_copy != NULL)
-		abd_free(rm->rm_abd_copy);
-
-	kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols]));
+	kmem_free(rm, offsetof(raidz_map_t, rm_row[rm->rm_nrows]));
 }
 
 static void
@@ -160,170 +167,11 @@
 {
 	raidz_map_t *rm = zio->io_vsd;
 
-	ASSERT0(rm->rm_freed);
-	rm->rm_freed = 1;
-
-	if (rm->rm_reports == 0)
-		vdev_raidz_map_free(rm);
+	vdev_raidz_map_free(rm);
 }
 
-/*ARGSUSED*/
-static void
-vdev_raidz_cksum_free(void *arg, size_t ignored)
-{
-	raidz_map_t *rm = arg;
-
-	ASSERT3U(rm->rm_reports, >, 0);
-
-	if (--rm->rm_reports == 0 && rm->rm_freed != 0)
-		vdev_raidz_map_free(rm);
-}
-
-static void
-vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
-{
-	raidz_map_t *rm = zcr->zcr_cbdata;
-	const size_t c = zcr->zcr_cbinfo;
-	size_t x, offset;
-
-	const abd_t *good = NULL;
-	const abd_t *bad = rm->rm_col[c].rc_abd;
-
-	if (good_data == NULL) {
-		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
-		return;
-	}
-
-	if (c < rm->rm_firstdatacol) {
-		/*
-		 * The first time through, calculate the parity blocks for
-		 * the good data (this relies on the fact that the good
-		 * data never changes for a given logical ZIO)
-		 */
-		if (rm->rm_col[0].rc_gdata == NULL) {
-			abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
-
-			/*
-			 * Set up the rm_col[]s to generate the parity for
-			 * good_data, first saving the parity bufs and
-			 * replacing them with buffers to hold the result.
-			 */
-			for (x = 0; x < rm->rm_firstdatacol; x++) {
-				bad_parity[x] = rm->rm_col[x].rc_abd;
-				rm->rm_col[x].rc_abd =
-				    rm->rm_col[x].rc_gdata =
-				    abd_alloc_sametype(rm->rm_col[x].rc_abd,
-				    rm->rm_col[x].rc_size);
-			}
-
-			/* fill in the data columns from good_data */
-			offset = 0;
-			for (; x < rm->rm_cols; x++) {
-				abd_put(rm->rm_col[x].rc_abd);
-
-				rm->rm_col[x].rc_abd =
-				    abd_get_offset_size((abd_t *)good_data,
-				    offset, rm->rm_col[x].rc_size);
-				offset += rm->rm_col[x].rc_size;
-			}
-
-			/*
-			 * Construct the parity from the good data.
-			 */
-			vdev_raidz_generate_parity(rm);
-
-			/* restore everything back to its original state */
-			for (x = 0; x < rm->rm_firstdatacol; x++)
-				rm->rm_col[x].rc_abd = bad_parity[x];
-
-			offset = 0;
-			for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
-				abd_put(rm->rm_col[x].rc_abd);
-				rm->rm_col[x].rc_abd = abd_get_offset_size(
-				    rm->rm_abd_copy, offset,
-				    rm->rm_col[x].rc_size);
-				offset += rm->rm_col[x].rc_size;
-			}
-		}
-
-		ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
-		good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0,
-		    rm->rm_col[c].rc_size);
-	} else {
-		/* adjust good_data to point at the start of our column */
-		offset = 0;
-		for (x = rm->rm_firstdatacol; x < c; x++)
-			offset += rm->rm_col[x].rc_size;
-
-		good = abd_get_offset_size((abd_t *)good_data, offset,
-		    rm->rm_col[c].rc_size);
-	}
-
-	/* we drop the ereport if it ends up that the data was good */
-	zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
-	abd_put((abd_t *)good);
-}
-
-/*
- * Invoked indirectly by zfs_ereport_start_checksum(), called
- * below when our read operation fails completely.  The main point
- * is to keep a copy of everything we read from disk, so that at
- * vdev_raidz_cksum_finish() time we can compare it with the good data.
- */
-static void
-vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
-{
-	size_t c = (size_t)(uintptr_t)arg;
-	size_t offset;
-
-	raidz_map_t *rm = zio->io_vsd;
-	size_t size;
-
-	/* set up the report and bump the refcount  */
-	zcr->zcr_cbdata = rm;
-	zcr->zcr_cbinfo = c;
-	zcr->zcr_finish = vdev_raidz_cksum_finish;
-	zcr->zcr_free = vdev_raidz_cksum_free;
-
-	rm->rm_reports++;
-	ASSERT3U(rm->rm_reports, >, 0);
-
-	if (rm->rm_abd_copy != NULL)
-		return;
-
-	/*
-	 * It's the first time we're called for this raidz_map_t, so we need
-	 * to copy the data aside; there's no guarantee that our zio's buffer
-	 * won't be re-used for something else.
-	 *
-	 * Our parity data is already in separate buffers, so there's no need
-	 * to copy them.
-	 */
-
-	size = 0;
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
-		size += rm->rm_col[c].rc_size;
-
-	rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE);
-
-	for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-		raidz_col_t *col = &rm->rm_col[c];
-		abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset,
-		    col->rc_size);
-
-		abd_copy(tmp, col->rc_abd, col->rc_size);
-
-		abd_put(col->rc_abd);
-		col->rc_abd = tmp;
-
-		offset += col->rc_size;
-	}
-	ASSERT3U(offset, ==, size);
-}
-
-static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
+const zio_vsd_ops_t vdev_raidz_vsd_ops = {
 	.vsd_free = vdev_raidz_map_free_vsd,
-	.vsd_cksum_report = vdev_raidz_cksum_report
 };
 
 /*
@@ -337,7 +185,7 @@
 vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
     uint64_t nparity)
 {
-	raidz_map_t *rm;
+	raidz_row_t *rr;
 	/* The starting RAIDZ (parent) vdev sector of the block. */
 	uint64_t b = zio->io_offset >> ashift;
 	/* The zio's size in units of the vdev's minimum sector size. */
@@ -347,7 +195,10 @@
 	/* The starting byte offset on each child vdev. */
 	uint64_t o = (b / dcols) << ashift;
 	uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
-	uint64_t off = 0;
+
+	raidz_map_t *rm =
+	    kmem_zalloc(offsetof(raidz_map_t, rm_row[1]), KM_SLEEP);
+	rm->rm_nrows = 1;
 
 	/*
 	 * "Quotient": The number of data sectors for this stripe on all but
@@ -370,8 +221,10 @@
 	 */
 	tot = s + nparity * (q + (r == 0 ? 0 : 1));
 
-	/* acols: The columns that will be accessed. */
-	/* scols: The columns that will be accessed or skipped. */
+	/*
+	 * acols: The columns that will be accessed.
+	 * scols: The columns that will be accessed or skipped.
+	 */
 	if (q == 0) {
 		/* Our I/O request doesn't span all child vdevs. */
 		acols = bc;
@@ -383,65 +236,66 @@
 
 	ASSERT3U(acols, <=, scols);
 
-	rm = kmem_alloc(offsetof(raidz_map_t, rm_col[scols]), KM_SLEEP);
+	rr = kmem_alloc(offsetof(raidz_row_t, rr_col[scols]), KM_SLEEP);
+	rm->rm_row[0] = rr;
 
-	rm->rm_cols = acols;
-	rm->rm_scols = scols;
-	rm->rm_bigcols = bc;
-	rm->rm_skipstart = bc;
-	rm->rm_missingdata = 0;
-	rm->rm_missingparity = 0;
-	rm->rm_firstdatacol = nparity;
-	rm->rm_abd_copy = NULL;
-	rm->rm_reports = 0;
-	rm->rm_freed = 0;
-	rm->rm_ecksuminjected = 0;
+	rr->rr_cols = acols;
+	rr->rr_scols = scols;
+	rr->rr_bigcols = bc;
+	rr->rr_missingdata = 0;
+	rr->rr_missingparity = 0;
+	rr->rr_firstdatacol = nparity;
+	rr->rr_abd_empty = NULL;
+	rr->rr_nempty = 0;
+#ifdef ZFS_DEBUG
+	rr->rr_offset = zio->io_offset;
+	rr->rr_size = zio->io_size;
+#endif
 
 	asize = 0;
 
 	for (c = 0; c < scols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
 		col = f + c;
 		coff = o;
 		if (col >= dcols) {
 			col -= dcols;
 			coff += 1ULL << ashift;
 		}
-		rm->rm_col[c].rc_devidx = col;
-		rm->rm_col[c].rc_offset = coff;
-		rm->rm_col[c].rc_abd = NULL;
-		rm->rm_col[c].rc_gdata = NULL;
-		rm->rm_col[c].rc_error = 0;
-		rm->rm_col[c].rc_tried = 0;
-		rm->rm_col[c].rc_skipped = 0;
+		rc->rc_devidx = col;
+		rc->rc_offset = coff;
+		rc->rc_abd = NULL;
+		rc->rc_orig_data = NULL;
+		rc->rc_error = 0;
+		rc->rc_tried = 0;
+		rc->rc_skipped = 0;
+		rc->rc_force_repair = 0;
+		rc->rc_allow_repair = 1;
+		rc->rc_need_orig_restore = B_FALSE;
 
 		if (c >= acols)
-			rm->rm_col[c].rc_size = 0;
+			rc->rc_size = 0;
 		else if (c < bc)
-			rm->rm_col[c].rc_size = (q + 1) << ashift;
+			rc->rc_size = (q + 1) << ashift;
 		else
-			rm->rm_col[c].rc_size = q << ashift;
+			rc->rc_size = q << ashift;
 
-		asize += rm->rm_col[c].rc_size;
+		asize += rc->rc_size;
 	}
 
 	ASSERT3U(asize, ==, tot << ashift);
-	rm->rm_asize = roundup(asize, (nparity + 1) << ashift);
 	rm->rm_nskip = roundup(tot, nparity + 1) - tot;
-	ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << ashift);
-	ASSERT3U(rm->rm_nskip, <=, nparity);
+	rm->rm_skipstart = bc;
 
-	for (c = 0; c < rm->rm_firstdatacol; c++)
-		rm->rm_col[c].rc_abd =
-		    abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE);
+	for (c = 0; c < rr->rr_firstdatacol; c++)
+		rr->rr_col[c].rc_abd =
+		    abd_alloc_linear(rr->rr_col[c].rc_size, B_FALSE);
 
-	rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0,
-	    rm->rm_col[c].rc_size);
-	off = rm->rm_col[c].rc_size;
-
-	for (c = c + 1; c < acols; c++) {
-		rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, off,
-		    rm->rm_col[c].rc_size);
-		off += rm->rm_col[c].rc_size;
+	for (uint64_t off = 0; c < acols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+		    zio->io_abd, off, rc->rc_size);
+		off += rc->rc_size;
 	}
 
 	/*
@@ -464,24 +318,21 @@
 	 * skip the first column since at least one data and one parity
 	 * column must appear in each row.
 	 */
-	ASSERT(rm->rm_cols >= 2);
-	ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
+	ASSERT(rr->rr_cols >= 2);
+	ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
 
-	if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
-		devidx = rm->rm_col[0].rc_devidx;
-		o = rm->rm_col[0].rc_offset;
-		rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
-		rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset;
-		rm->rm_col[1].rc_devidx = devidx;
-		rm->rm_col[1].rc_offset = o;
+	if (rr->rr_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
+		devidx = rr->rr_col[0].rc_devidx;
+		o = rr->rr_col[0].rc_offset;
+		rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
+		rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
+		rr->rr_col[1].rc_devidx = devidx;
+		rr->rr_col[1].rc_offset = o;
 
 		if (rm->rm_skipstart == 0)
 			rm->rm_skipstart = 1;
 	}
 
-	zio->io_vsd = rm;
-	zio->io_vsd_ops = &vdev_raidz_vsd_ops;
-
 	/* init RAIDZ parity ops */
 	rm->rm_ops = vdev_raidz_math_get_ops();
 
@@ -550,50 +401,43 @@
 }
 
 static void
-vdev_raidz_generate_parity_p(raidz_map_t *rm)
+vdev_raidz_generate_parity_p(raidz_row_t *rr)
 {
-	uint64_t *p;
-	int c;
-	abd_t *src;
+	uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
 
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-		src = rm->rm_col[c].rc_abd;
-		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
+	for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		abd_t *src = rr->rr_col[c].rc_abd;
 
-		if (c == rm->rm_firstdatacol) {
-			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+		if (c == rr->rr_firstdatacol) {
+			abd_copy_to_buf(p, src, rr->rr_col[c].rc_size);
 		} else {
 			struct pqr_struct pqr = { p, NULL, NULL };
-			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+			(void) abd_iterate_func(src, 0, rr->rr_col[c].rc_size,
 			    vdev_raidz_p_func, &pqr);
 		}
 	}
 }
 
 static void
-vdev_raidz_generate_parity_pq(raidz_map_t *rm)
+vdev_raidz_generate_parity_pq(raidz_row_t *rr)
 {
-	uint64_t *p, *q, pcnt, ccnt, mask, i;
-	int c;
-	abd_t *src;
+	uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
+	uint64_t *q = abd_to_buf(rr->rr_col[VDEV_RAIDZ_Q].rc_abd);
+	uint64_t pcnt = rr->rr_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]);
+	ASSERT(rr->rr_col[VDEV_RAIDZ_P].rc_size ==
+	    rr->rr_col[VDEV_RAIDZ_Q].rc_size);
 
-	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]);
-	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
-	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+	for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		abd_t *src = rr->rr_col[c].rc_abd;
 
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-		src = rm->rm_col[c].rc_abd;
-		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
-		q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
+		uint64_t ccnt = rr->rr_col[c].rc_size / sizeof (p[0]);
 
-		ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
-
-		if (c == rm->rm_firstdatacol) {
+		if (c == rr->rr_firstdatacol) {
 			ASSERT(ccnt == pcnt || ccnt == 0);
-			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
-			(void) memcpy(q, p, rm->rm_col[c].rc_size);
+			abd_copy_to_buf(p, src, rr->rr_col[c].rc_size);
+			(void) memcpy(q, p, rr->rr_col[c].rc_size);
 
-			for (i = ccnt; i < pcnt; i++) {
+			for (uint64_t i = ccnt; i < pcnt; i++) {
 				p[i] = 0;
 				q[i] = 0;
 			}
@@ -601,14 +445,15 @@
 			struct pqr_struct pqr = { p, q, NULL };
 
 			ASSERT(ccnt <= pcnt);
-			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+			(void) abd_iterate_func(src, 0, rr->rr_col[c].rc_size,
 			    vdev_raidz_pq_func, &pqr);
 
 			/*
 			 * Treat short columns as though they are full of 0s.
 			 * Note that there's therefore nothing needed for P.
 			 */
-			for (i = ccnt; i < pcnt; i++) {
+			uint64_t mask;
+			for (uint64_t i = ccnt; i < pcnt; i++) {
 				VDEV_RAIDZ_64MUL_2(q[i], mask);
 			}
 		}
@@ -616,33 +461,29 @@
 }
 
 static void
-vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
+vdev_raidz_generate_parity_pqr(raidz_row_t *rr)
 {
-	uint64_t *p, *q, *r, pcnt, ccnt, mask, i;
-	int c;
-	abd_t *src;
+	uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
+	uint64_t *q = abd_to_buf(rr->rr_col[VDEV_RAIDZ_Q].rc_abd);
+	uint64_t *r = abd_to_buf(rr->rr_col[VDEV_RAIDZ_R].rc_abd);
+	uint64_t pcnt = rr->rr_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]);
+	ASSERT(rr->rr_col[VDEV_RAIDZ_P].rc_size ==
+	    rr->rr_col[VDEV_RAIDZ_Q].rc_size);
+	ASSERT(rr->rr_col[VDEV_RAIDZ_P].rc_size ==
+	    rr->rr_col[VDEV_RAIDZ_R].rc_size);
 
-	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]);
-	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
-	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
-	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
-	    rm->rm_col[VDEV_RAIDZ_R].rc_size);
+	for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		abd_t *src = rr->rr_col[c].rc_abd;
 
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-		src = rm->rm_col[c].rc_abd;
-		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
-		q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
-		r = abd_to_buf(rm->rm_col[VDEV_RAIDZ_R].rc_abd);
+		uint64_t ccnt = rr->rr_col[c].rc_size / sizeof (p[0]);
 
-		ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
-
-		if (c == rm->rm_firstdatacol) {
+		if (c == rr->rr_firstdatacol) {
 			ASSERT(ccnt == pcnt || ccnt == 0);
-			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
-			(void) memcpy(q, p, rm->rm_col[c].rc_size);
-			(void) memcpy(r, p, rm->rm_col[c].rc_size);
+			abd_copy_to_buf(p, src, rr->rr_col[c].rc_size);
+			(void) memcpy(q, p, rr->rr_col[c].rc_size);
+			(void) memcpy(r, p, rr->rr_col[c].rc_size);
 
-			for (i = ccnt; i < pcnt; i++) {
+			for (uint64_t i = ccnt; i < pcnt; i++) {
 				p[i] = 0;
 				q[i] = 0;
 				r[i] = 0;
@@ -651,14 +492,15 @@
 			struct pqr_struct pqr = { p, q, r };
 
 			ASSERT(ccnt <= pcnt);
-			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+			(void) abd_iterate_func(src, 0, rr->rr_col[c].rc_size,
 			    vdev_raidz_pqr_func, &pqr);
 
 			/*
 			 * Treat short columns as though they are full of 0s.
 			 * Note that there's therefore nothing needed for P.
 			 */
-			for (i = ccnt; i < pcnt; i++) {
+			uint64_t mask;
+			for (uint64_t i = ccnt; i < pcnt; i++) {
 				VDEV_RAIDZ_64MUL_2(q[i], mask);
 				VDEV_RAIDZ_64MUL_4(r[i], mask);
 			}
@@ -671,31 +513,42 @@
  * parity columns available.
  */
 void
-vdev_raidz_generate_parity(raidz_map_t *rm)
+vdev_raidz_generate_parity_row(raidz_map_t *rm, raidz_row_t *rr)
 {
+	ASSERT3U(rr->rr_cols, !=, 0);
+
 	/* Generate using the new math implementation */
-	if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL)
+	if (vdev_raidz_math_generate(rm, rr) != RAIDZ_ORIGINAL_IMPL)
 		return;
 
-	switch (rm->rm_firstdatacol) {
+	switch (rr->rr_firstdatacol) {
 	case 1:
-		vdev_raidz_generate_parity_p(rm);
+		vdev_raidz_generate_parity_p(rr);
 		break;
 	case 2:
-		vdev_raidz_generate_parity_pq(rm);
+		vdev_raidz_generate_parity_pq(rr);
 		break;
 	case 3:
-		vdev_raidz_generate_parity_pqr(rm);
+		vdev_raidz_generate_parity_pqr(rr);
 		break;
 	default:
 		cmn_err(CE_PANIC, "invalid RAID-Z configuration");
 	}
 }
 
-/* ARGSUSED */
+void
+vdev_raidz_generate_parity(raidz_map_t *rm)
+{
+	for (int i = 0; i < rm->rm_nrows; i++) {
+		raidz_row_t *rr = rm->rm_row[i];
+		vdev_raidz_generate_parity_row(rm, rr);
+	}
+}
+
 static int
 vdev_raidz_reconst_p_func(void *dbuf, void *sbuf, size_t size, void *private)
 {
+	(void) private;
 	uint64_t *dst = dbuf;
 	uint64_t *src = sbuf;
 	int cnt = size / sizeof (src[0]);
@@ -707,11 +560,11 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
 vdev_raidz_reconst_q_pre_func(void *dbuf, void *sbuf, size_t size,
     void *private)
 {
+	(void) private;
 	uint64_t *dst = dbuf;
 	uint64_t *src = sbuf;
 	uint64_t mask;
@@ -725,10 +578,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
 vdev_raidz_reconst_q_pre_tail_func(void *buf, size_t size, void *private)
 {
+	(void) private;
 	uint64_t *dst = buf;
 	uint64_t mask;
 	int cnt = size / sizeof (dst[0]);
@@ -808,31 +661,28 @@
 	return (0);
 }
 
-static int
-vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts)
+static void
+vdev_raidz_reconstruct_p(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	int x = tgts[0];
-	int c;
 	abd_t *dst, *src;
 
-	ASSERT(ntgts == 1);
-	ASSERT(x >= rm->rm_firstdatacol);
-	ASSERT(x < rm->rm_cols);
+	ASSERT3U(ntgts, ==, 1);
+	ASSERT3U(x, >=, rr->rr_firstdatacol);
+	ASSERT3U(x, <, rr->rr_cols);
 
-	ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_P].rc_size);
-	ASSERT(rm->rm_col[x].rc_size > 0);
+	ASSERT3U(rr->rr_col[x].rc_size, <=, rr->rr_col[VDEV_RAIDZ_P].rc_size);
 
-	src = rm->rm_col[VDEV_RAIDZ_P].rc_abd;
-	dst = rm->rm_col[x].rc_abd;
+	src = rr->rr_col[VDEV_RAIDZ_P].rc_abd;
+	dst = rr->rr_col[x].rc_abd;
 
-	abd_copy_from_buf(dst, abd_to_buf(src), rm->rm_col[x].rc_size);
+	abd_copy_from_buf(dst, abd_to_buf(src), rr->rr_col[x].rc_size);
 
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-		uint64_t size = MIN(rm->rm_col[x].rc_size,
-		    rm->rm_col[c].rc_size);
+	for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		uint64_t size = MIN(rr->rr_col[x].rc_size,
+		    rr->rr_col[c].rc_size);
 
-		src = rm->rm_col[c].rc_abd;
-		dst = rm->rm_col[x].rc_abd;
+		src = rr->rr_col[c].rc_abd;
 
 		if (c == x)
 			continue;
@@ -840,12 +690,10 @@
 		(void) abd_iterate_func2(dst, src, 0, 0, size,
 		    vdev_raidz_reconst_p_func, NULL);
 	}
-
-	return (1 << VDEV_RAIDZ_P);
 }
 
-static int
-vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts)
+static void
+vdev_raidz_reconstruct_q(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	int x = tgts[0];
 	int c, exp;
@@ -853,44 +701,42 @@
 
 	ASSERT(ntgts == 1);
 
-	ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+	ASSERT(rr->rr_col[x].rc_size <= rr->rr_col[VDEV_RAIDZ_Q].rc_size);
 
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-		uint64_t size = (c == x) ? 0 : MIN(rm->rm_col[x].rc_size,
-		    rm->rm_col[c].rc_size);
+	for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+		uint64_t size = (c == x) ? 0 : MIN(rr->rr_col[x].rc_size,
+		    rr->rr_col[c].rc_size);
 
-		src = rm->rm_col[c].rc_abd;
-		dst = rm->rm_col[x].rc_abd;
+		src = rr->rr_col[c].rc_abd;
+		dst = rr->rr_col[x].rc_abd;
 
-		if (c == rm->rm_firstdatacol) {
+		if (c == rr->rr_firstdatacol) {
 			abd_copy(dst, src, size);
-			if (rm->rm_col[x].rc_size > size)
+			if (rr->rr_col[x].rc_size > size) {
 				abd_zero_off(dst, size,
-				    rm->rm_col[x].rc_size - size);
-
+				    rr->rr_col[x].rc_size - size);
+			}
 		} else {
-			ASSERT3U(size, <=, rm->rm_col[x].rc_size);
+			ASSERT3U(size, <=, rr->rr_col[x].rc_size);
 			(void) abd_iterate_func2(dst, src, 0, 0, size,
 			    vdev_raidz_reconst_q_pre_func, NULL);
 			(void) abd_iterate_func(dst,
-			    size, rm->rm_col[x].rc_size - size,
+			    size, rr->rr_col[x].rc_size - size,
 			    vdev_raidz_reconst_q_pre_tail_func, NULL);
 		}
 	}
 
-	src = rm->rm_col[VDEV_RAIDZ_Q].rc_abd;
-	dst = rm->rm_col[x].rc_abd;
-	exp = 255 - (rm->rm_cols - 1 - x);
+	src = rr->rr_col[VDEV_RAIDZ_Q].rc_abd;
+	dst = rr->rr_col[x].rc_abd;
+	exp = 255 - (rr->rr_cols - 1 - x);
 
 	struct reconst_q_struct rq = { abd_to_buf(src), exp };
-	(void) abd_iterate_func(dst, 0, rm->rm_col[x].rc_size,
+	(void) abd_iterate_func(dst, 0, rr->rr_col[x].rc_size,
 	    vdev_raidz_reconst_q_post_func, &rq);
-
-	return (1 << VDEV_RAIDZ_Q);
 }
 
-static int
-vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
+static void
+vdev_raidz_reconstruct_pq(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp;
 	abd_t *pdata, *qdata;
@@ -901,10 +747,10 @@
 
 	ASSERT(ntgts == 2);
 	ASSERT(x < y);
-	ASSERT(x >= rm->rm_firstdatacol);
-	ASSERT(y < rm->rm_cols);
+	ASSERT(x >= rr->rr_firstdatacol);
+	ASSERT(y < rr->rr_cols);
 
-	ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size);
+	ASSERT(rr->rr_col[x].rc_size >= rr->rr_col[y].rc_size);
 
 	/*
 	 * Move the parity data aside -- we're going to compute parity as
@@ -913,29 +759,29 @@
 	 * parity so we make those columns appear to be full of zeros by
 	 * setting their lengths to zero.
 	 */
-	pdata = rm->rm_col[VDEV_RAIDZ_P].rc_abd;
-	qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_abd;
-	xsize = rm->rm_col[x].rc_size;
-	ysize = rm->rm_col[y].rc_size;
+	pdata = rr->rr_col[VDEV_RAIDZ_P].rc_abd;
+	qdata = rr->rr_col[VDEV_RAIDZ_Q].rc_abd;
+	xsize = rr->rr_col[x].rc_size;
+	ysize = rr->rr_col[y].rc_size;
 
-	rm->rm_col[VDEV_RAIDZ_P].rc_abd =
-	    abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_P].rc_size, B_TRUE);
-	rm->rm_col[VDEV_RAIDZ_Q].rc_abd =
-	    abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_Q].rc_size, B_TRUE);
-	rm->rm_col[x].rc_size = 0;
-	rm->rm_col[y].rc_size = 0;
+	rr->rr_col[VDEV_RAIDZ_P].rc_abd =
+	    abd_alloc_linear(rr->rr_col[VDEV_RAIDZ_P].rc_size, B_TRUE);
+	rr->rr_col[VDEV_RAIDZ_Q].rc_abd =
+	    abd_alloc_linear(rr->rr_col[VDEV_RAIDZ_Q].rc_size, B_TRUE);
+	rr->rr_col[x].rc_size = 0;
+	rr->rr_col[y].rc_size = 0;
 
-	vdev_raidz_generate_parity_pq(rm);
+	vdev_raidz_generate_parity_pq(rr);
 
-	rm->rm_col[x].rc_size = xsize;
-	rm->rm_col[y].rc_size = ysize;
+	rr->rr_col[x].rc_size = xsize;
+	rr->rr_col[y].rc_size = ysize;
 
 	p = abd_to_buf(pdata);
 	q = abd_to_buf(qdata);
-	pxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
-	qxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
-	xd = rm->rm_col[x].rc_abd;
-	yd = rm->rm_col[y].rc_abd;
+	pxy = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
+	qxy = abd_to_buf(rr->rr_col[VDEV_RAIDZ_Q].rc_abd);
+	xd = rr->rr_col[x].rc_abd;
+	yd = rr->rr_col[y].rc_abd;
 
 	/*
 	 * We now have:
@@ -953,7 +799,7 @@
 	 */
 
 	a = vdev_raidz_pow2[255 + x - y];
-	b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)];
+	b = vdev_raidz_pow2[255 - (rr->rr_cols - 1 - x)];
 	tmp = 255 - vdev_raidz_log2[a ^ 1];
 
 	aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)];
@@ -967,22 +813,20 @@
 	(void) abd_iterate_func(xd, ysize, xsize - ysize,
 	    vdev_raidz_reconst_pq_tail_func, &rpq);
 
-	abd_free(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
-	abd_free(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
+	abd_free(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
+	abd_free(rr->rr_col[VDEV_RAIDZ_Q].rc_abd);
 
 	/*
 	 * Restore the saved parity data.
 	 */
-	rm->rm_col[VDEV_RAIDZ_P].rc_abd = pdata;
-	rm->rm_col[VDEV_RAIDZ_Q].rc_abd = qdata;
-
-	return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q));
+	rr->rr_col[VDEV_RAIDZ_P].rc_abd = pdata;
+	rr->rr_col[VDEV_RAIDZ_Q].rc_abd = qdata;
 }
 
 /* BEGIN CSTYLED */
 /*
  * In the general case of reconstruction, we must solve the system of linear
- * equations defined by the coeffecients used to generate parity as well as
+ * equations defined by the coefficients used to generate parity as well as
  * the contents of the data and parity disks. This can be expressed with
  * vectors for the original data (D) and the actual data (d) and parity (p)
  * and a matrix composed of the identity matrix (I) and a dispersal matrix (V):
@@ -996,7 +840,7 @@
  *            ~~   ~~                     ~~     ~~
  *
  * I is simply a square identity matrix of size n, and V is a vandermonde
- * matrix defined by the coeffecients we chose for the various parity columns
+ * matrix defined by the coefficients we chose for the various parity columns
  * (1, 2, 4). Note that these values were chosen both for simplicity, speedy
  * computation as well as linear separability.
  *
@@ -1134,13 +978,13 @@
 /* END CSTYLED */
 
 static void
-vdev_raidz_matrix_init(raidz_map_t *rm, int n, int nmap, int *map,
+vdev_raidz_matrix_init(raidz_row_t *rr, int n, int nmap, int *map,
     uint8_t **rows)
 {
 	int i, j;
 	int pow;
 
-	ASSERT(n == rm->rm_cols - rm->rm_firstdatacol);
+	ASSERT(n == rr->rr_cols - rr->rr_firstdatacol);
 
 	/*
 	 * Fill in the missing rows of interest.
@@ -1164,7 +1008,7 @@
 }
 
 static void
-vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing,
+vdev_raidz_matrix_invert(raidz_row_t *rr, int n, int nmissing, int *missing,
     uint8_t **rows, uint8_t **invrows, const uint8_t *used)
 {
 	int i, j, ii, jj;
@@ -1176,10 +1020,10 @@
 	 * correspond to data columns.
 	 */
 	for (i = 0; i < nmissing; i++) {
-		ASSERT3S(used[i], <, rm->rm_firstdatacol);
+		ASSERT3S(used[i], <, rr->rr_firstdatacol);
 	}
 	for (; i < n; i++) {
-		ASSERT3S(used[i], >=, rm->rm_firstdatacol);
+		ASSERT3S(used[i], >=, rr->rr_firstdatacol);
 	}
 
 	/*
@@ -1196,8 +1040,8 @@
 	 */
 	for (i = 0; i < nmissing; i++) {
 		for (j = nmissing; j < n; j++) {
-			ASSERT3U(used[j], >=, rm->rm_firstdatacol);
-			jj = used[j] - rm->rm_firstdatacol;
+			ASSERT3U(used[j], >=, rr->rr_firstdatacol);
+			jj = used[j] - rr->rr_firstdatacol;
 			ASSERT3S(jj, <, n);
 			invrows[i][j] = rows[i][jj];
 			rows[i][jj] = 0;
@@ -1258,7 +1102,7 @@
 }
 
 static void
-vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
+vdev_raidz_matrix_reconstruct(raidz_row_t *rr, int n, int nmissing,
     int *missing, uint8_t **invrows, const uint8_t *used)
 {
 	int i, j, x, cc, c;
@@ -1290,22 +1134,24 @@
 
 	for (i = 0; i < n; i++) {
 		c = used[i];
-		ASSERT3U(c, <, rm->rm_cols);
+		ASSERT3U(c, <, rr->rr_cols);
 
-		src = abd_to_buf(rm->rm_col[c].rc_abd);
-		ccount = rm->rm_col[c].rc_size;
+		ccount = rr->rr_col[c].rc_size;
+		ASSERT(ccount >= rr->rr_col[missing[0]].rc_size || i > 0);
+		if (ccount == 0)
+			continue;
+		src = abd_to_buf(rr->rr_col[c].rc_abd);
 		for (j = 0; j < nmissing; j++) {
-			cc = missing[j] + rm->rm_firstdatacol;
-			ASSERT3U(cc, >=, rm->rm_firstdatacol);
-			ASSERT3U(cc, <, rm->rm_cols);
+			cc = missing[j] + rr->rr_firstdatacol;
+			ASSERT3U(cc, >=, rr->rr_firstdatacol);
+			ASSERT3U(cc, <, rr->rr_cols);
 			ASSERT3U(cc, !=, c);
 
-			dst[j] = abd_to_buf(rm->rm_col[cc].rc_abd);
-			dcount[j] = rm->rm_col[cc].rc_size;
+			dcount[j] = rr->rr_col[cc].rc_size;
+			if (dcount[j] != 0)
+				dst[j] = abd_to_buf(rr->rr_col[cc].rc_abd);
 		}
 
-		ASSERT(ccount >= rm->rm_col[missing[0]].rc_size || i > 0);
-
 		for (x = 0; x < ccount; x++, src++) {
 			if (*src != 0)
 				log = vdev_raidz_log2[*src];
@@ -1333,51 +1179,56 @@
 	kmem_free(p, psize);
 }
 
-static int
-vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
+static void
+vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	int n, i, c, t, tt;
 	int nmissing_rows;
 	int missing_rows[VDEV_RAIDZ_MAXPARITY];
 	int parity_map[VDEV_RAIDZ_MAXPARITY];
-
 	uint8_t *p, *pp;
 	size_t psize;
-
 	uint8_t *rows[VDEV_RAIDZ_MAXPARITY];
 	uint8_t *invrows[VDEV_RAIDZ_MAXPARITY];
 	uint8_t *used;
 
 	abd_t **bufs = NULL;
 
-	int code = 0;
-
 	/*
 	 * Matrix reconstruction can't use scatter ABDs yet, so we allocate
-	 * temporary linear ABDs.
+	 * temporary linear ABDs if any non-linear ABDs are found.
 	 */
-	if (!abd_is_linear(rm->rm_col[rm->rm_firstdatacol].rc_abd)) {
-		bufs = kmem_alloc(rm->rm_cols * sizeof (abd_t *), KM_PUSHPAGE);
+	for (i = rr->rr_firstdatacol; i < rr->rr_cols; i++) {
+		if (!abd_is_linear(rr->rr_col[i].rc_abd)) {
+			bufs = kmem_alloc(rr->rr_cols * sizeof (abd_t *),
+			    KM_PUSHPAGE);
 
-		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-			raidz_col_t *col = &rm->rm_col[c];
+			for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+				raidz_col_t *col = &rr->rr_col[c];
 
-			bufs[c] = col->rc_abd;
-			col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE);
-			abd_copy(col->rc_abd, bufs[c], col->rc_size);
+				bufs[c] = col->rc_abd;
+				if (bufs[c] != NULL) {
+					col->rc_abd = abd_alloc_linear(
+					    col->rc_size, B_TRUE);
+					abd_copy(col->rc_abd, bufs[c],
+					    col->rc_size);
+				}
+			}
+
+			break;
 		}
 	}
 
-	n = rm->rm_cols - rm->rm_firstdatacol;
+	n = rr->rr_cols - rr->rr_firstdatacol;
 
 	/*
 	 * Figure out which data columns are missing.
 	 */
 	nmissing_rows = 0;
 	for (t = 0; t < ntgts; t++) {
-		if (tgts[t] >= rm->rm_firstdatacol) {
+		if (tgts[t] >= rr->rr_firstdatacol) {
 			missing_rows[nmissing_rows++] =
-			    tgts[t] - rm->rm_firstdatacol;
+			    tgts[t] - rr->rr_firstdatacol;
 		}
 	}
 
@@ -1387,7 +1238,7 @@
 	 */
 	for (tt = 0, c = 0, i = 0; i < nmissing_rows; c++) {
 		ASSERT(tt < ntgts);
-		ASSERT(c < rm->rm_firstdatacol);
+		ASSERT(c < rr->rr_firstdatacol);
 
 		/*
 		 * Skip any targeted parity columns.
@@ -1397,15 +1248,10 @@
 			continue;
 		}
 
-		code |= 1 << c;
-
 		parity_map[i] = c;
 		i++;
 	}
 
-	ASSERT(code != 0);
-	ASSERT3U(code, <, 1 << VDEV_RAIDZ_MAXPARITY);
-
 	psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) *
 	    nmissing_rows * n + sizeof (used[0]) * n;
 	p = kmem_alloc(psize, KM_SLEEP);
@@ -1422,9 +1268,9 @@
 		used[i] = parity_map[i];
 	}
 
-	for (tt = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+	for (tt = 0, c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
 		if (tt < nmissing_rows &&
-		    c == missing_rows[tt] + rm->rm_firstdatacol) {
+		    c == missing_rows[tt] + rr->rr_firstdatacol) {
 			tt++;
 			continue;
 		}
@@ -1437,18 +1283,18 @@
 	/*
 	 * Initialize the interesting rows of the matrix.
 	 */
-	vdev_raidz_matrix_init(rm, n, nmissing_rows, parity_map, rows);
+	vdev_raidz_matrix_init(rr, n, nmissing_rows, parity_map, rows);
 
 	/*
 	 * Invert the matrix.
 	 */
-	vdev_raidz_matrix_invert(rm, n, nmissing_rows, missing_rows, rows,
+	vdev_raidz_matrix_invert(rr, n, nmissing_rows, missing_rows, rows,
 	    invrows, used);
 
 	/*
 	 * Reconstruct the missing data using the generated matrix.
 	 */
-	vdev_raidz_matrix_reconstruct(rm, n, nmissing_rows, missing_rows,
+	vdev_raidz_matrix_reconstruct(rr, n, nmissing_rows, missing_rows,
 	    invrows, used);
 
 	kmem_free(p, psize);
@@ -1457,49 +1303,42 @@
 	 * copy back from temporary linear abds and free them
 	 */
 	if (bufs) {
-		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-			raidz_col_t *col = &rm->rm_col[c];
+		for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+			raidz_col_t *col = &rr->rr_col[c];
 
-			abd_copy(bufs[c], col->rc_abd, col->rc_size);
-			abd_free(col->rc_abd);
+			if (bufs[c] != NULL) {
+				abd_copy(bufs[c], col->rc_abd, col->rc_size);
+				abd_free(col->rc_abd);
+			}
 			col->rc_abd = bufs[c];
 		}
-		kmem_free(bufs, rm->rm_cols * sizeof (abd_t *));
+		kmem_free(bufs, rr->rr_cols * sizeof (abd_t *));
 	}
-
-	return (code);
 }
 
-int
-vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
+static void
+vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
+    const int *t, int nt)
 {
 	int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
 	int ntgts;
 	int i, c, ret;
-	int code;
 	int nbadparity, nbaddata;
 	int parity_valid[VDEV_RAIDZ_MAXPARITY];
 
-	/*
-	 * The tgts list must already be sorted.
-	 */
-	for (i = 1; i < nt; i++) {
-		ASSERT(t[i] > t[i - 1]);
-	}
-
-	nbadparity = rm->rm_firstdatacol;
-	nbaddata = rm->rm_cols - nbadparity;
+	nbadparity = rr->rr_firstdatacol;
+	nbaddata = rr->rr_cols - nbadparity;
 	ntgts = 0;
-	for (i = 0, c = 0; c < rm->rm_cols; c++) {
-		if (c < rm->rm_firstdatacol)
+	for (i = 0, c = 0; c < rr->rr_cols; c++) {
+		if (c < rr->rr_firstdatacol)
 			parity_valid[c] = B_FALSE;
 
 		if (i < nt && c == t[i]) {
 			tgts[ntgts++] = c;
 			i++;
-		} else if (rm->rm_col[c].rc_error != 0) {
+		} else if (rr->rr_col[c].rc_error != 0) {
 			tgts[ntgts++] = c;
-		} else if (c >= rm->rm_firstdatacol) {
+		} else if (c >= rr->rr_firstdatacol) {
 			nbaddata--;
 		} else {
 			parity_valid[c] = B_TRUE;
@@ -1514,50 +1353,53 @@
 	dt = &tgts[nbadparity];
 
 	/* Reconstruct using the new math implementation */
-	ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata);
+	ret = vdev_raidz_math_reconstruct(rm, rr, parity_valid, dt, nbaddata);
 	if (ret != RAIDZ_ORIGINAL_IMPL)
-		return (ret);
+		return;
 
 	/*
 	 * See if we can use any of our optimized reconstruction routines.
 	 */
 	switch (nbaddata) {
 	case 1:
-		if (parity_valid[VDEV_RAIDZ_P])
-			return (vdev_raidz_reconstruct_p(rm, dt, 1));
+		if (parity_valid[VDEV_RAIDZ_P]) {
+			vdev_raidz_reconstruct_p(rr, dt, 1);
+			return;
+		}
 
-		ASSERT(rm->rm_firstdatacol > 1);
+		ASSERT(rr->rr_firstdatacol > 1);
 
-		if (parity_valid[VDEV_RAIDZ_Q])
-			return (vdev_raidz_reconstruct_q(rm, dt, 1));
+		if (parity_valid[VDEV_RAIDZ_Q]) {
+			vdev_raidz_reconstruct_q(rr, dt, 1);
+			return;
+		}
 
-		ASSERT(rm->rm_firstdatacol > 2);
+		ASSERT(rr->rr_firstdatacol > 2);
 		break;
 
 	case 2:
-		ASSERT(rm->rm_firstdatacol > 1);
+		ASSERT(rr->rr_firstdatacol > 1);
 
 		if (parity_valid[VDEV_RAIDZ_P] &&
-		    parity_valid[VDEV_RAIDZ_Q])
-			return (vdev_raidz_reconstruct_pq(rm, dt, 2));
+		    parity_valid[VDEV_RAIDZ_Q]) {
+			vdev_raidz_reconstruct_pq(rr, dt, 2);
+			return;
+		}
 
-		ASSERT(rm->rm_firstdatacol > 2);
+		ASSERT(rr->rr_firstdatacol > 2);
 
 		break;
 	}
 
-	code = vdev_raidz_reconstruct_general(rm, tgts, ntgts);
-	ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY));
-	ASSERT(code > 0);
-	return (code);
+	vdev_raidz_reconstruct_general(rr, tgts, ntgts);
 }
 
 static int
 vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
-	vdev_t *cvd;
-	uint64_t nparity = vd->vdev_nparity;
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
+	uint64_t nparity = vdrz->vd_nparity;
 	int c;
 	int lasterror = 0;
 	int numerrors = 0;
@@ -1573,7 +1415,7 @@
 	vdev_open_children(vd);
 
 	for (c = 0; c < vd->vdev_children; c++) {
-		cvd = vd->vdev_child[c];
+		vdev_t *cvd = vd->vdev_child[c];
 
 		if (cvd->vdev_open_error != 0) {
 			lasterror = cvd->vdev_open_error;
@@ -1583,7 +1425,15 @@
 
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
-		*ashift = MAX(*ashift, cvd->vdev_ashift);
+		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+	}
+	for (c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_open_error != 0)
+			continue;
+		*physical_ashift = vdev_best_ashift(*logical_ashift,
+		    *physical_ashift, cvd->vdev_physical_ashift);
 	}
 
 	*asize *= vd->vdev_children;
@@ -1600,19 +1450,20 @@
 static void
 vdev_raidz_close(vdev_t *vd)
 {
-	int c;
-
-	for (c = 0; c < vd->vdev_children; c++)
-		vdev_close(vd->vdev_child[c]);
+	for (int c = 0; c < vd->vdev_children; c++) {
+		if (vd->vdev_child[c] != NULL)
+			vdev_close(vd->vdev_child[c]);
+	}
 }
 
 static uint64_t
 vdev_raidz_asize(vdev_t *vd, uint64_t psize)
 {
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
 	uint64_t asize;
 	uint64_t ashift = vd->vdev_top->vdev_ashift;
-	uint64_t cols = vd->vdev_children;
-	uint64_t nparity = vd->vdev_nparity;
+	uint64_t cols = vdrz->vd_logical_width;
+	uint64_t nparity = vdrz->vd_nparity;
 
 	asize = ((psize - 1) >> ashift) + 1;
 	asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
@@ -1621,7 +1472,18 @@
 	return (asize);
 }
 
-static void
+/*
+ * The allocatable space for a raidz vdev is N * sizeof(smallest child)
+ * so each child must provide at least 1/Nth of its asize.
+ */
+static uint64_t
+vdev_raidz_min_asize(vdev_t *vd)
+{
+	return ((vd->vdev_min_asize + vd->vdev_children - 1) /
+	    vd->vdev_children);
+}
+
+void
 vdev_raidz_child_done(zio_t *zio)
 {
 	raidz_col_t *rc = zio->io_private;
@@ -1632,21 +1494,21 @@
 }
 
 static void
-vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, int col)
+vdev_raidz_io_verify(vdev_t *vd, raidz_row_t *rr, int col)
 {
 #ifdef ZFS_DEBUG
-	vdev_t *vd = zio->io_vd;
 	vdev_t *tvd = vd->vdev_top;
 
-	range_seg_t logical_rs, physical_rs;
-	logical_rs.rs_start = zio->io_offset;
+	range_seg64_t logical_rs, physical_rs, remain_rs;
+	logical_rs.rs_start = rr->rr_offset;
 	logical_rs.rs_end = logical_rs.rs_start +
-	    vdev_raidz_asize(zio->io_vd, zio->io_size);
+	    vdev_raidz_asize(vd, rr->rr_size);
 
-	raidz_col_t *rc = &rm->rm_col[col];
+	raidz_col_t *rc = &rr->rr_col[col];
 	vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
 
-	vdev_xlate(cvd, &logical_rs, &physical_rs);
+	vdev_xlate(cvd, &logical_rs, &physical_rs, &remain_rs);
+	ASSERT(vdev_xlate_is_empty(&remain_rs));
 	ASSERT3U(rc->rc_offset, ==, physical_rs.rs_start);
 	ASSERT3U(rc->rc_offset, <, physical_rs.rs_end);
 	/*
@@ -1664,6 +1526,91 @@
 #endif
 }
 
+static void
+vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr, uint64_t ashift)
+{
+	vdev_t *vd = zio->io_vd;
+	raidz_map_t *rm = zio->io_vsd;
+	int c, i;
+
+	vdev_raidz_generate_parity_row(rm, rr);
+
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		if (rc->rc_size == 0)
+			continue;
+
+		/* Verify physical to logical translation */
+		vdev_raidz_io_verify(vd, rr, c);
+
+		zio_nowait(zio_vdev_child_io(zio, NULL,
+		    vd->vdev_child[rc->rc_devidx], rc->rc_offset,
+		    rc->rc_abd, rc->rc_size, zio->io_type, zio->io_priority,
+		    0, vdev_raidz_child_done, rc));
+	}
+
+	/*
+	 * Generate optional I/Os for skip sectors to improve aggregation
+	 * contiguity.
+	 */
+	for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) {
+		ASSERT(c <= rr->rr_scols);
+		if (c == rr->rr_scols)
+			c = 0;
+
+		raidz_col_t *rc = &rr->rr_col[c];
+		vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+
+		zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
+		    rc->rc_offset + rc->rc_size, NULL, 1ULL << ashift,
+		    zio->io_type, zio->io_priority,
+		    ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL));
+	}
+}
+
+static void
+vdev_raidz_io_start_read(zio_t *zio, raidz_row_t *rr)
+{
+	vdev_t *vd = zio->io_vd;
+
+	/*
+	 * Iterate over the columns in reverse order so that we hit the parity
+	 * last -- any errors along the way will force us to read the parity.
+	 */
+	for (int c = rr->rr_cols - 1; c >= 0; c--) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		if (rc->rc_size == 0)
+			continue;
+		vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+		if (!vdev_readable(cvd)) {
+			if (c >= rr->rr_firstdatacol)
+				rr->rr_missingdata++;
+			else
+				rr->rr_missingparity++;
+			rc->rc_error = SET_ERROR(ENXIO);
+			rc->rc_tried = 1;	/* don't even try */
+			rc->rc_skipped = 1;
+			continue;
+		}
+		if (vdev_dtl_contains(cvd, DTL_MISSING, zio->io_txg, 1)) {
+			if (c >= rr->rr_firstdatacol)
+				rr->rr_missingdata++;
+			else
+				rr->rr_missingparity++;
+			rc->rc_error = SET_ERROR(ESTALE);
+			rc->rc_skipped = 1;
+			continue;
+		}
+		if (c >= rr->rr_firstdatacol || rr->rr_missingdata > 0 ||
+		    (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
+			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
+			    rc->rc_offset, rc->rc_abd, rc->rc_size,
+			    zio->io_type, zio->io_priority, 0,
+			    vdev_raidz_child_done, rc));
+		}
+	}
+}
+
 /*
  * Start an IO operation on a RAIDZ VDev
  *
@@ -1686,118 +1633,52 @@
 {
 	vdev_t *vd = zio->io_vd;
 	vdev_t *tvd = vd->vdev_top;
-	vdev_t *cvd;
-	raidz_map_t *rm;
-	raidz_col_t *rc;
-	int c, i;
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
 
-	rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
-	    vd->vdev_nparity);
-
-	ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size));
-
-	if (zio->io_type == ZIO_TYPE_WRITE) {
-		vdev_raidz_generate_parity(rm);
-
-		for (c = 0; c < rm->rm_cols; c++) {
-			rc = &rm->rm_col[c];
-			cvd = vd->vdev_child[rc->rc_devidx];
-
-			/*
-			 * Verify physical to logical translation.
-			 */
-			vdev_raidz_io_verify(zio, rm, c);
-
-			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
-			    rc->rc_offset, rc->rc_abd, rc->rc_size,
-			    zio->io_type, zio->io_priority, 0,
-			    vdev_raidz_child_done, rc));
-		}
-
-		/*
-		 * Generate optional I/Os for any skipped sectors to improve
-		 * aggregation contiguity.
-		 */
-		for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) {
-			ASSERT(c <= rm->rm_scols);
-			if (c == rm->rm_scols)
-				c = 0;
-			rc = &rm->rm_col[c];
-			cvd = vd->vdev_child[rc->rc_devidx];
-			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
-			    rc->rc_offset + rc->rc_size, NULL,
-			    1 << tvd->vdev_ashift,
-			    zio->io_type, zio->io_priority,
-			    ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL));
-		}
-
-		zio_execute(zio);
-		return;
-	}
-
-	ASSERT(zio->io_type == ZIO_TYPE_READ);
+	raidz_map_t *rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift,
+	    vdrz->vd_logical_width, vdrz->vd_nparity);
+	zio->io_vsd = rm;
+	zio->io_vsd_ops = &vdev_raidz_vsd_ops;
 
 	/*
-	 * Iterate over the columns in reverse order so that we hit the parity
-	 * last -- any errors along the way will force us to read the parity.
+	 * Until raidz expansion is implemented all maps for a raidz vdev
+	 * contain a single row.
 	 */
-	for (c = rm->rm_cols - 1; c >= 0; c--) {
-		rc = &rm->rm_col[c];
-		cvd = vd->vdev_child[rc->rc_devidx];
-		if (!vdev_readable(cvd)) {
-			if (c >= rm->rm_firstdatacol)
-				rm->rm_missingdata++;
-			else
-				rm->rm_missingparity++;
-			rc->rc_error = SET_ERROR(ENXIO);
-			rc->rc_tried = 1;	/* don't even try */
-			rc->rc_skipped = 1;
-			continue;
-		}
-		if (vdev_dtl_contains(cvd, DTL_MISSING, zio->io_txg, 1)) {
-			if (c >= rm->rm_firstdatacol)
-				rm->rm_missingdata++;
-			else
-				rm->rm_missingparity++;
-			rc->rc_error = SET_ERROR(ESTALE);
-			rc->rc_skipped = 1;
-			continue;
-		}
-		if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
-		    (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
-			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
-			    rc->rc_offset, rc->rc_abd, rc->rc_size,
-			    zio->io_type, zio->io_priority, 0,
-			    vdev_raidz_child_done, rc));
-		}
+	ASSERT3U(rm->rm_nrows, ==, 1);
+	raidz_row_t *rr = rm->rm_row[0];
+
+	if (zio->io_type == ZIO_TYPE_WRITE) {
+		vdev_raidz_io_start_write(zio, rr, tvd->vdev_ashift);
+	} else {
+		ASSERT(zio->io_type == ZIO_TYPE_READ);
+		vdev_raidz_io_start_read(zio, rr);
 	}
 
 	zio_execute(zio);
 }
 
-
 /*
  * Report a checksum error for a child of a RAID-Z device.
  */
-static void
-raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
+void
+vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
 {
 	vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
 
-	if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+	if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE) &&
+	    zio->io_priority != ZIO_PRIORITY_REBUILD) {
 		zio_bad_cksum_t zbc;
 		raidz_map_t *rm = zio->io_vsd;
 
-		mutex_enter(&vd->vdev_stat_lock);
-		vd->vdev_stat.vs_checksum_errors++;
-		mutex_exit(&vd->vdev_stat_lock);
-
 		zbc.zbc_has_cksum = 0;
 		zbc.zbc_injected = rm->rm_ecksuminjected;
 
-		zfs_ereport_post_checksum(zio->io_spa, vd,
+		(void) zfs_ereport_post_checksum(zio->io_spa, vd,
 		    &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
 		    rc->rc_abd, bad_data, &zbc);
+		mutex_enter(&vd->vdev_stat_lock);
+		vd->vdev_stat.vs_checksum_errors++;
+		mutex_exit(&vd->vdev_stat_lock);
 	}
 }
 
@@ -1824,13 +1705,14 @@
  * Generate the parity from the data columns. If we tried and were able to
  * read the parity without error, verify that the generated parity matches the
  * data we read. If it doesn't, we fire off a checksum error. Return the
- * number such failures.
+ * number of such failures.
  */
 static int
-raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
+raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
 {
 	abd_t *orig[VDEV_RAIDZ_MAXPARITY];
 	int c, ret = 0;
+	raidz_map_t *rm = zio->io_vsd;
 	raidz_col_t *rc;
 
 	blkptr_t *bp = zio->io_bp;
@@ -1840,23 +1722,38 @@
 	if (checksum == ZIO_CHECKSUM_NOPARITY)
 		return (ret);
 
-	for (c = 0; c < rm->rm_firstdatacol; c++) {
-		rc = &rm->rm_col[c];
+	for (c = 0; c < rr->rr_firstdatacol; c++) {
+		rc = &rr->rr_col[c];
 		if (!rc->rc_tried || rc->rc_error != 0)
 			continue;
 
-		orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
-		abd_copy(orig[c], rc->rc_abd, rc->rc_size);
+		orig[c] = rc->rc_abd;
+		ASSERT3U(abd_get_size(rc->rc_abd), ==, rc->rc_size);
+		rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
 	}
 
-	vdev_raidz_generate_parity(rm);
+	/*
+	 * Verify any empty sectors are zero filled to ensure the parity
+	 * is calculated correctly even if these non-data sectors are damaged.
+	 */
+	if (rr->rr_nempty && rr->rr_abd_empty != NULL)
+		ret += vdev_draid_map_verify_empty(zio, rr);
 
-	for (c = 0; c < rm->rm_firstdatacol; c++) {
-		rc = &rm->rm_col[c];
+	/*
+	 * Regenerates parity even for !tried||rc_error!=0 columns.  This
+	 * isn't harmful but it does have the side effect of fixing stuff
+	 * we didn't realize was necessary (i.e. even if we return 0).
+	 */
+	vdev_raidz_generate_parity_row(rm, rr);
+
+	for (c = 0; c < rr->rr_firstdatacol; c++) {
+		rc = &rr->rr_col[c];
+
 		if (!rc->rc_tried || rc->rc_error != 0)
 			continue;
+
 		if (abd_cmp(orig[c], rc->rc_abd) != 0) {
-			raidz_checksum_error(zio, rc, orig[c]);
+			vdev_raidz_checksum_error(zio, rc, orig[c]);
 			rc->rc_error = SET_ERROR(ECKSUM);
 			ret++;
 		}
@@ -1867,454 +1764,82 @@
 }
 
 static int
-vdev_raidz_worst_error(raidz_map_t *rm)
+vdev_raidz_worst_error(raidz_row_t *rr)
 {
 	int error = 0;
 
-	for (int c = 0; c < rm->rm_cols; c++)
-		error = zio_worst_error(error, rm->rm_col[c].rc_error);
+	for (int c = 0; c < rr->rr_cols; c++)
+		error = zio_worst_error(error, rr->rr_col[c].rc_error);
 
 	return (error);
 }
 
-/*
- * Iterate over all combinations of bad data and attempt a reconstruction.
- * Note that the algorithm below is non-optimal because it doesn't take into
- * account how reconstruction is actually performed. For example, with
- * triple-parity RAID-Z the reconstruction procedure is the same if column 4
- * is targeted as invalid as if columns 1 and 4 are targeted since in both
- * cases we'd only use parity information in column 0.
- */
-static int
-vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
-{
-	raidz_map_t *rm = zio->io_vsd;
-	raidz_col_t *rc;
-	abd_t *orig[VDEV_RAIDZ_MAXPARITY];
-	int tstore[VDEV_RAIDZ_MAXPARITY + 2];
-	int *tgts = &tstore[1];
-	int curr, next, i, c, n;
-	int code, ret = 0;
-
-	ASSERT(total_errors < rm->rm_firstdatacol);
-
-	/*
-	 * This simplifies one edge condition.
-	 */
-	tgts[-1] = -1;
-
-	for (n = 1; n <= rm->rm_firstdatacol - total_errors; n++) {
-		/*
-		 * Initialize the targets array by finding the first n columns
-		 * that contain no error.
-		 *
-		 * If there were no data errors, we need to ensure that we're
-		 * always explicitly attempting to reconstruct at least one
-		 * data column. To do this, we simply push the highest target
-		 * up into the data columns.
-		 */
-		for (c = 0, i = 0; i < n; i++) {
-			if (i == n - 1 && data_errors == 0 &&
-			    c < rm->rm_firstdatacol) {
-				c = rm->rm_firstdatacol;
-			}
-
-			while (rm->rm_col[c].rc_error != 0) {
-				c++;
-				ASSERT3S(c, <, rm->rm_cols);
-			}
-
-			tgts[i] = c++;
-		}
-
-		/*
-		 * Setting tgts[n] simplifies the other edge condition.
-		 */
-		tgts[n] = rm->rm_cols;
-
-		/*
-		 * These buffers were allocated in previous iterations.
-		 */
-		for (i = 0; i < n - 1; i++) {
-			ASSERT(orig[i] != NULL);
-		}
-
-		orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd,
-		    rm->rm_col[0].rc_size);
-
-		curr = 0;
-		next = tgts[curr];
-
-		while (curr != n) {
-			tgts[curr] = next;
-			curr = 0;
-
-			/*
-			 * Save off the original data that we're going to
-			 * attempt to reconstruct.
-			 */
-			for (i = 0; i < n; i++) {
-				ASSERT(orig[i] != NULL);
-				c = tgts[i];
-				ASSERT3S(c, >=, 0);
-				ASSERT3S(c, <, rm->rm_cols);
-				rc = &rm->rm_col[c];
-				abd_copy(orig[i], rc->rc_abd, rc->rc_size);
-			}
-
-			/*
-			 * Attempt a reconstruction and exit the outer loop on
-			 * success.
-			 */
-			code = vdev_raidz_reconstruct(rm, tgts, n);
-			if (raidz_checksum_verify(zio) == 0) {
-
-				for (i = 0; i < n; i++) {
-					c = tgts[i];
-					rc = &rm->rm_col[c];
-					ASSERT(rc->rc_error == 0);
-					if (rc->rc_tried)
-						raidz_checksum_error(zio, rc,
-						    orig[i]);
-					rc->rc_error = SET_ERROR(ECKSUM);
-				}
-
-				ret = code;
-				goto done;
-			}
-
-			/*
-			 * Restore the original data.
-			 */
-			for (i = 0; i < n; i++) {
-				c = tgts[i];
-				rc = &rm->rm_col[c];
-				abd_copy(rc->rc_abd, orig[i], rc->rc_size);
-			}
-
-			do {
-				/*
-				 * Find the next valid column after the curr
-				 * position..
-				 */
-				for (next = tgts[curr] + 1;
-				    next < rm->rm_cols &&
-				    rm->rm_col[next].rc_error != 0; next++)
-					continue;
-
-				ASSERT(next <= tgts[curr + 1]);
-
-				/*
-				 * If that spot is available, we're done here.
-				 */
-				if (next != tgts[curr + 1])
-					break;
-
-				/*
-				 * Otherwise, find the next valid column after
-				 * the previous position.
-				 */
-				for (c = tgts[curr - 1] + 1;
-				    rm->rm_col[c].rc_error != 0; c++)
-					continue;
-
-				tgts[curr] = c;
-				curr++;
-
-			} while (curr != n);
-		}
-	}
-	n--;
-done:
-	for (i = 0; i < n; i++)
-		abd_free(orig[i]);
-
-	return (ret);
-}
-
-/*
- * Complete an IO operation on a RAIDZ VDev
- *
- * Outline:
- * - For write operations:
- *   1. Check for errors on the child IOs.
- *   2. Return, setting an error code if too few child VDevs were written
- *      to reconstruct the data later.  Note that partial writes are
- *      considered successful if they can be reconstructed at all.
- * - For read operations:
- *   1. Check for errors on the child IOs.
- *   2. If data errors occurred:
- *      a. Try to reassemble the data from the parity available.
- *      b. If we haven't yet read the parity drives, read them now.
- *      c. If all parity drives have been read but the data still doesn't
- *         reassemble with a correct checksum, then try combinatorial
- *         reconstruction.
- *      d. If that doesn't work, return an error.
- *   3. If there were unexpected errors or this is a resilver operation,
- *      rewrite the vdevs that had errors.
- */
 static void
-vdev_raidz_io_done(zio_t *zio)
+vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
 {
-	vdev_t *vd = zio->io_vd;
-	vdev_t *cvd;
-	raidz_map_t *rm = zio->io_vsd;
-	raidz_col_t *rc = NULL;
 	int unexpected_errors = 0;
 	int parity_errors = 0;
 	int parity_untried = 0;
 	int data_errors = 0;
-	int total_errors = 0;
-	int n, c;
-	int tgts[VDEV_RAIDZ_MAXPARITY];
-	int code;
 
-	ASSERT(zio->io_bp != NULL);  /* XXX need to add code to enforce this */
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
 
-	ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol);
-	ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol);
-
-	for (c = 0; c < rm->rm_cols; c++) {
-		rc = &rm->rm_col[c];
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
 
 		if (rc->rc_error) {
-			ASSERT(rc->rc_error != ECKSUM);	/* child has no bp */
-
-			if (c < rm->rm_firstdatacol)
+			if (c < rr->rr_firstdatacol)
 				parity_errors++;
 			else
 				data_errors++;
 
 			if (!rc->rc_skipped)
 				unexpected_errors++;
-
-			total_errors++;
-		} else if (c < rm->rm_firstdatacol && !rc->rc_tried) {
+		} else if (c < rr->rr_firstdatacol && !rc->rc_tried) {
 			parity_untried++;
 		}
+
+		if (rc->rc_force_repair)
+			unexpected_errors++;
 	}
 
-	if (zio->io_type == ZIO_TYPE_WRITE) {
-		/*
-		 * XXX -- for now, treat partial writes as a success.
-		 * (If we couldn't write enough columns to reconstruct
-		 * the data, the I/O failed.  Otherwise, good enough.)
-		 *
-		 * Now that we support write reallocation, it would be better
-		 * to treat partial failure as real failure unless there are
-		 * no non-degraded top-level vdevs left, and not update DTLs
-		 * if we intend to reallocate.
-		 */
-		/* XXPOLICY */
-		if (total_errors > rm->rm_firstdatacol)
-			zio->io_error = vdev_raidz_worst_error(rm);
-
-		return;
-	}
-
-	ASSERT(zio->io_type == ZIO_TYPE_READ);
 	/*
-	 * There are three potential phases for a read:
-	 *	1. produce valid data from the columns read
-	 *	2. read all disks and try again
-	 *	3. perform combinatorial reconstruction
+	 * If we read more parity disks than were used for
+	 * reconstruction, confirm that the other parity disks produced
+	 * correct data.
 	 *
-	 * Each phase is progressively both more expensive and less likely to
-	 * occur. If we encounter more errors than we can repair or all phases
-	 * fail, we have no choice but to return an error.
+	 * Note that we also regenerate parity when resilvering so we
+	 * can write it out to failed devices later.
 	 */
-
-	/*
-	 * If the number of errors we saw was correctable -- less than or equal
-	 * to the number of parity disks read -- attempt to produce data that
-	 * has a valid checksum. Naturally, this case applies in the absence of
-	 * any errors.
-	 */
-	if (total_errors <= rm->rm_firstdatacol - parity_untried) {
-		if (data_errors == 0) {
-			if (raidz_checksum_verify(zio) == 0) {
-				/*
-				 * If we read parity information (unnecessarily
-				 * as it happens since no reconstruction was
-				 * needed) regenerate and verify the parity.
-				 * We also regenerate parity when resilvering
-				 * so we can write it out to the failed device
-				 * later.
-				 */
-				if (parity_errors + parity_untried <
-				    rm->rm_firstdatacol ||
-				    (zio->io_flags & ZIO_FLAG_RESILVER)) {
-					n = raidz_parity_verify(zio, rm);
-					unexpected_errors += n;
-					ASSERT(parity_errors + n <=
-					    rm->rm_firstdatacol);
-				}
-				goto done;
-			}
-		} else {
-			/*
-			 * We either attempt to read all the parity columns or
-			 * none of them. If we didn't try to read parity, we
-			 * wouldn't be here in the correctable case. There must
-			 * also have been fewer parity errors than parity
-			 * columns or, again, we wouldn't be in this code path.
-			 */
-			ASSERT(parity_untried == 0);
-			ASSERT(parity_errors < rm->rm_firstdatacol);
-
-			/*
-			 * Identify the data columns that reported an error.
-			 */
-			n = 0;
-			for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
-				rc = &rm->rm_col[c];
-				if (rc->rc_error != 0) {
-					ASSERT(n < VDEV_RAIDZ_MAXPARITY);
-					tgts[n++] = c;
-				}
-			}
-
-			ASSERT(rm->rm_firstdatacol >= n);
-
-			code = vdev_raidz_reconstruct(rm, tgts, n);
-
-			if (raidz_checksum_verify(zio) == 0) {
-				/*
-				 * If we read more parity disks than were used
-				 * for reconstruction, confirm that the other
-				 * parity disks produced correct data. This
-				 * routine is suboptimal in that it regenerates
-				 * the parity that we already used in addition
-				 * to the parity that we're attempting to
-				 * verify, but this should be a relatively
-				 * uncommon case, and can be optimized if it
-				 * becomes a problem. Note that we regenerate
-				 * parity when resilvering so we can write it
-				 * out to failed devices later.
-				 */
-				if (parity_errors < rm->rm_firstdatacol - n ||
-				    (zio->io_flags & ZIO_FLAG_RESILVER)) {
-					n = raidz_parity_verify(zio, rm);
-					unexpected_errors += n;
-					ASSERT(parity_errors + n <=
-					    rm->rm_firstdatacol);
-				}
-
-				goto done;
-			}
-		}
+	if (parity_errors + parity_untried <
+	    rr->rr_firstdatacol - data_errors ||
+	    (zio->io_flags & ZIO_FLAG_RESILVER)) {
+		int n = raidz_parity_verify(zio, rr);
+		unexpected_errors += n;
 	}
 
-	/*
-	 * This isn't a typical situation -- either we got a read error or
-	 * a child silently returned bad data. Read every block so we can
-	 * try again with as much data and parity as we can track down. If
-	 * we've already been through once before, all children will be marked
-	 * as tried so we'll proceed to combinatorial reconstruction.
-	 */
-	unexpected_errors = 1;
-	rm->rm_missingdata = 0;
-	rm->rm_missingparity = 0;
-
-	for (c = 0; c < rm->rm_cols; c++) {
-		if (rm->rm_col[c].rc_tried)
-			continue;
-
-		zio_vdev_io_redone(zio);
-		do {
-			rc = &rm->rm_col[c];
-			if (rc->rc_tried)
-				continue;
-			zio_nowait(zio_vdev_child_io(zio, NULL,
-			    vd->vdev_child[rc->rc_devidx],
-			    rc->rc_offset, rc->rc_abd, rc->rc_size,
-			    zio->io_type, zio->io_priority, 0,
-			    vdev_raidz_child_done, rc));
-		} while (++c < rm->rm_cols);
-
-		return;
-	}
-
-	/*
-	 * At this point we've attempted to reconstruct the data given the
-	 * errors we detected, and we've attempted to read all columns. There
-	 * must, therefore, be one or more additional problems -- silent errors
-	 * resulting in invalid data rather than explicit I/O errors resulting
-	 * in absent data. We check if there is enough additional data to
-	 * possibly reconstruct the data and then perform combinatorial
-	 * reconstruction over all possible combinations. If that fails,
-	 * we're cooked.
-	 */
-	if (total_errors > rm->rm_firstdatacol) {
-		zio->io_error = vdev_raidz_worst_error(rm);
-
-	} else if (total_errors < rm->rm_firstdatacol &&
-	    (code = vdev_raidz_combrec(zio, total_errors, data_errors)) != 0) {
-		/*
-		 * If we didn't use all the available parity for the
-		 * combinatorial reconstruction, verify that the remaining
-		 * parity is correct.
-		 */
-		if (code != (1 << rm->rm_firstdatacol) - 1)
-			(void) raidz_parity_verify(zio, rm);
-	} else {
-		/*
-		 * We're here because either:
-		 *
-		 *	total_errors == rm_first_datacol, or
-		 *	vdev_raidz_combrec() failed
-		 *
-		 * In either case, there is enough bad data to prevent
-		 * reconstruction.
-		 *
-		 * Start checksum ereports for all children which haven't
-		 * failed, and the IO wasn't speculative.
-		 */
-		zio->io_error = SET_ERROR(ECKSUM);
-
-		if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
-			for (c = 0; c < rm->rm_cols; c++) {
-				vdev_t *cvd;
-				rc = &rm->rm_col[c];
-				cvd = vd->vdev_child[rc->rc_devidx];
-				if (rc->rc_error == 0) {
-					zio_bad_cksum_t zbc;
-					zbc.zbc_has_cksum = 0;
-					zbc.zbc_injected =
-					    rm->rm_ecksuminjected;
-
-					mutex_enter(&cvd->vdev_stat_lock);
-					cvd->vdev_stat.vs_checksum_errors++;
-					mutex_exit(&cvd->vdev_stat_lock);
-
-					zfs_ereport_start_checksum(
-					    zio->io_spa, cvd,
-					    &zio->io_bookmark, zio,
-					    rc->rc_offset, rc->rc_size,
-					    (void *)(uintptr_t)c, &zbc);
-				}
-			}
-		}
-	}
-
-done:
-	zio_checksum_verified(zio);
-
 	if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
-	    (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER))) {
+	    (unexpected_errors > 0 || (zio->io_flags & ZIO_FLAG_RESILVER))) {
 		/*
 		 * Use the good data we have in hand to repair damaged children.
 		 */
-		for (c = 0; c < rm->rm_cols; c++) {
-			rc = &rm->rm_col[c];
-			cvd = vd->vdev_child[rc->rc_devidx];
+		for (int c = 0; c < rr->rr_cols; c++) {
+			raidz_col_t *rc = &rr->rr_col[c];
+			vdev_t *vd = zio->io_vd;
+			vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
 
-			if (rc->rc_error == 0)
+			if (!rc->rc_allow_repair) {
 				continue;
+			} else if (!rc->rc_force_repair &&
+			    (rc->rc_error == 0 || rc->rc_size == 0)) {
+				continue;
+			}
 
 			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
 			    rc->rc_offset, rc->rc_abd, rc->rc_size,
-			    ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
+			    ZIO_TYPE_WRITE,
+			    zio->io_priority == ZIO_PRIORITY_REBUILD ?
+			    ZIO_PRIORITY_REBUILD : ZIO_PRIORITY_ASYNC_WRITE,
 			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
 			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
 		}
@@ -2322,9 +1847,526 @@
 }
 
 static void
+raidz_restore_orig_data(raidz_map_t *rm)
+{
+	for (int i = 0; i < rm->rm_nrows; i++) {
+		raidz_row_t *rr = rm->rm_row[i];
+		for (int c = 0; c < rr->rr_cols; c++) {
+			raidz_col_t *rc = &rr->rr_col[c];
+			if (rc->rc_need_orig_restore) {
+				abd_copy(rc->rc_abd,
+				    rc->rc_orig_data, rc->rc_size);
+				rc->rc_need_orig_restore = B_FALSE;
+			}
+		}
+	}
+}
+
+/*
+ * returns EINVAL if reconstruction of the block will not be possible
+ * returns ECKSUM if this specific reconstruction failed
+ * returns 0 on successful reconstruction
+ */
+static int
+raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
+{
+	raidz_map_t *rm = zio->io_vsd;
+
+	/* Reconstruct each row */
+	for (int r = 0; r < rm->rm_nrows; r++) {
+		raidz_row_t *rr = rm->rm_row[r];
+		int my_tgts[VDEV_RAIDZ_MAXPARITY]; /* value is child id */
+		int t = 0;
+		int dead = 0;
+		int dead_data = 0;
+
+		for (int c = 0; c < rr->rr_cols; c++) {
+			raidz_col_t *rc = &rr->rr_col[c];
+			ASSERT0(rc->rc_need_orig_restore);
+			if (rc->rc_error != 0) {
+				dead++;
+				if (c >= nparity)
+					dead_data++;
+				continue;
+			}
+			if (rc->rc_size == 0)
+				continue;
+			for (int lt = 0; lt < ntgts; lt++) {
+				if (rc->rc_devidx == ltgts[lt]) {
+					if (rc->rc_orig_data == NULL) {
+						rc->rc_orig_data =
+						    abd_alloc_linear(
+						    rc->rc_size, B_TRUE);
+						abd_copy(rc->rc_orig_data,
+						    rc->rc_abd, rc->rc_size);
+					}
+					rc->rc_need_orig_restore = B_TRUE;
+
+					dead++;
+					if (c >= nparity)
+						dead_data++;
+					my_tgts[t++] = c;
+					break;
+				}
+			}
+		}
+		if (dead > nparity) {
+			/* reconstruction not possible */
+			raidz_restore_orig_data(rm);
+			return (EINVAL);
+		}
+		if (dead_data > 0)
+			vdev_raidz_reconstruct_row(rm, rr, my_tgts, t);
+	}
+
+	/* Check for success */
+	if (raidz_checksum_verify(zio) == 0) {
+
+		/* Reconstruction succeeded - report errors */
+		for (int i = 0; i < rm->rm_nrows; i++) {
+			raidz_row_t *rr = rm->rm_row[i];
+
+			for (int c = 0; c < rr->rr_cols; c++) {
+				raidz_col_t *rc = &rr->rr_col[c];
+				if (rc->rc_need_orig_restore) {
+					/*
+					 * Note: if this is a parity column,
+					 * we don't really know if it's wrong.
+					 * We need to let
+					 * vdev_raidz_io_done_verified() check
+					 * it, and if we set rc_error, it will
+					 * think that it is a "known" error
+					 * that doesn't need to be checked
+					 * or corrected.
+					 */
+					if (rc->rc_error == 0 &&
+					    c >= rr->rr_firstdatacol) {
+						vdev_raidz_checksum_error(zio,
+						    rc, rc->rc_orig_data);
+						rc->rc_error =
+						    SET_ERROR(ECKSUM);
+					}
+					rc->rc_need_orig_restore = B_FALSE;
+				}
+			}
+
+			vdev_raidz_io_done_verified(zio, rr);
+		}
+
+		zio_checksum_verified(zio);
+
+		return (0);
+	}
+
+	/* Reconstruction failed - restore original data */
+	raidz_restore_orig_data(rm);
+	return (ECKSUM);
+}
+
+/*
+ * Iterate over all combinations of N bad vdevs and attempt a reconstruction.
+ * Note that the algorithm below is non-optimal because it doesn't take into
+ * account how reconstruction is actually performed. For example, with
+ * triple-parity RAID-Z the reconstruction procedure is the same if column 4
+ * is targeted as invalid as if columns 1 and 4 are targeted since in both
+ * cases we'd only use parity information in column 0.
+ *
+ * The order that we find the various possible combinations of failed
+ * disks is dictated by these rules:
+ * - Examine each "slot" (the "i" in tgts[i])
+ *   - Try to increment this slot (tgts[i] = tgts[i] + 1)
+ *   - if we can't increment because it runs into the next slot,
+ *     reset our slot to the minimum, and examine the next slot
+ *
+ *  For example, with a 6-wide RAIDZ3, and no known errors (so we have to choose
+ *  3 columns to reconstruct), we will generate the following sequence:
+ *
+ *  STATE        ACTION
+ *  0 1 2        special case: skip since these are all parity
+ *  0 1   3      first slot: reset to 0; middle slot: increment to 2
+ *  0   2 3      first slot: increment to 1
+ *    1 2 3      first: reset to 0; middle: reset to 1; last: increment to 4
+ *  0 1     4    first: reset to 0; middle: increment to 2
+ *  0   2   4    first: increment to 1
+ *    1 2   4    first: reset to 0; middle: increment to 3
+ *  0     3 4    first: increment to 1
+ *    1   3 4    first: increment to 2
+ *      2 3 4    first: reset to 0; middle: reset to 1; last: increment to 5
+ *  0 1       5  first: reset to 0; middle: increment to 2
+ *  0   2     5  first: increment to 1
+ *    1 2     5  first: reset to 0; middle: increment to 3
+ *  0     3   5  first: increment to 1
+ *    1   3   5  first: increment to 2
+ *      2 3   5  first: reset to 0; middle: increment to 4
+ *  0       4 5  first: increment to 1
+ *    1     4 5  first: increment to 2
+ *      2   4 5  first: increment to 3
+ *        3 4 5  done
+ *
+ * This strategy works for dRAID but is less efficient when there are a large
+ * number of child vdevs and therefore permutations to check. Furthermore,
+ * since the raidz_map_t rows likely do not overlap reconstruction would be
+ * possible as long as there are no more than nparity data errors per row.
+ * These additional permutations are not currently checked but could be as
+ * a future improvement.
+ */
+static int
+vdev_raidz_combrec(zio_t *zio)
+{
+	int nparity = vdev_get_nparity(zio->io_vd);
+	raidz_map_t *rm = zio->io_vsd;
+
+	/* Check if there's enough data to attempt reconstrution. */
+	for (int i = 0; i < rm->rm_nrows; i++) {
+		raidz_row_t *rr = rm->rm_row[i];
+		int total_errors = 0;
+
+		for (int c = 0; c < rr->rr_cols; c++) {
+			if (rr->rr_col[c].rc_error)
+				total_errors++;
+		}
+
+		if (total_errors > nparity)
+			return (vdev_raidz_worst_error(rr));
+	}
+
+	for (int num_failures = 1; num_failures <= nparity; num_failures++) {
+		int tstore[VDEV_RAIDZ_MAXPARITY + 2];
+		int *ltgts = &tstore[1]; /* value is logical child ID */
+
+		/* Determine number of logical children, n */
+		int n = zio->io_vd->vdev_children;
+
+		ASSERT3U(num_failures, <=, nparity);
+		ASSERT3U(num_failures, <=, VDEV_RAIDZ_MAXPARITY);
+
+		/* Handle corner cases in combrec logic */
+		ltgts[-1] = -1;
+		for (int i = 0; i < num_failures; i++) {
+			ltgts[i] = i;
+		}
+		ltgts[num_failures] = n;
+
+		for (;;) {
+			int err = raidz_reconstruct(zio, ltgts, num_failures,
+			    nparity);
+			if (err == EINVAL) {
+				/*
+				 * Reconstruction not possible with this #
+				 * failures; try more failures.
+				 */
+				break;
+			} else if (err == 0)
+				return (0);
+
+			/* Compute next targets to try */
+			for (int t = 0; ; t++) {
+				ASSERT3U(t, <, num_failures);
+				ltgts[t]++;
+				if (ltgts[t] == n) {
+					/* try more failures */
+					ASSERT3U(t, ==, num_failures - 1);
+					break;
+				}
+
+				ASSERT3U(ltgts[t], <, n);
+				ASSERT3U(ltgts[t], <=, ltgts[t + 1]);
+
+				/*
+				 * If that spot is available, we're done here.
+				 * Try the next combination.
+				 */
+				if (ltgts[t] != ltgts[t + 1])
+					break;
+
+				/*
+				 * Otherwise, reset this tgt to the minimum,
+				 * and move on to the next tgt.
+				 */
+				ltgts[t] = ltgts[t - 1] + 1;
+				ASSERT3U(ltgts[t], ==, t);
+			}
+
+			/* Increase the number of failures and keep trying. */
+			if (ltgts[num_failures - 1] == n)
+				break;
+		}
+	}
+
+	return (ECKSUM);
+}
+
+void
+vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
+{
+	for (uint64_t row = 0; row < rm->rm_nrows; row++) {
+		raidz_row_t *rr = rm->rm_row[row];
+		vdev_raidz_reconstruct_row(rm, rr, t, nt);
+	}
+}
+
+/*
+ * Complete a write IO operation on a RAIDZ VDev
+ *
+ * Outline:
+ *   1. Check for errors on the child IOs.
+ *   2. Return, setting an error code if too few child VDevs were written
+ *      to reconstruct the data later.  Note that partial writes are
+ *      considered successful if they can be reconstructed at all.
+ */
+static void
+vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr)
+{
+	int total_errors = 0;
+
+	ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol);
+	ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol);
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
+
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		if (rc->rc_error) {
+			ASSERT(rc->rc_error != ECKSUM);	/* child has no bp */
+
+			total_errors++;
+		}
+	}
+
+	/*
+	 * Treat partial writes as a success. If we couldn't write enough
+	 * columns to reconstruct the data, the I/O failed.  Otherwise,
+	 * good enough.
+	 *
+	 * Now that we support write reallocation, it would be better
+	 * to treat partial failure as real failure unless there are
+	 * no non-degraded top-level vdevs left, and not update DTLs
+	 * if we intend to reallocate.
+	 */
+	if (total_errors > rr->rr_firstdatacol) {
+		zio->io_error = zio_worst_error(zio->io_error,
+		    vdev_raidz_worst_error(rr));
+	}
+}
+
+static void
+vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
+    raidz_row_t *rr)
+{
+	int parity_errors = 0;
+	int parity_untried = 0;
+	int data_errors = 0;
+	int total_errors = 0;
+
+	ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol);
+	ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol);
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+
+		/*
+		 * If scrubbing and a replacing/sparing child vdev determined
+		 * that not all of its children have an identical copy of the
+		 * data, then clear the error so the column is treated like
+		 * any other read and force a repair to correct the damage.
+		 */
+		if (rc->rc_error == ECKSUM) {
+			ASSERT(zio->io_flags & ZIO_FLAG_SCRUB);
+			vdev_raidz_checksum_error(zio, rc, rc->rc_abd);
+			rc->rc_force_repair = 1;
+			rc->rc_error = 0;
+		}
+
+		if (rc->rc_error) {
+			if (c < rr->rr_firstdatacol)
+				parity_errors++;
+			else
+				data_errors++;
+
+			total_errors++;
+		} else if (c < rr->rr_firstdatacol && !rc->rc_tried) {
+			parity_untried++;
+		}
+	}
+
+	/*
+	 * If there were data errors and the number of errors we saw was
+	 * correctable -- less than or equal to the number of parity disks read
+	 * -- reconstruct based on the missing data.
+	 */
+	if (data_errors != 0 &&
+	    total_errors <= rr->rr_firstdatacol - parity_untried) {
+		/*
+		 * We either attempt to read all the parity columns or
+		 * none of them. If we didn't try to read parity, we
+		 * wouldn't be here in the correctable case. There must
+		 * also have been fewer parity errors than parity
+		 * columns or, again, we wouldn't be in this code path.
+		 */
+		ASSERT(parity_untried == 0);
+		ASSERT(parity_errors < rr->rr_firstdatacol);
+
+		/*
+		 * Identify the data columns that reported an error.
+		 */
+		int n = 0;
+		int tgts[VDEV_RAIDZ_MAXPARITY];
+		for (int c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
+			raidz_col_t *rc = &rr->rr_col[c];
+			if (rc->rc_error != 0) {
+				ASSERT(n < VDEV_RAIDZ_MAXPARITY);
+				tgts[n++] = c;
+			}
+		}
+
+		ASSERT(rr->rr_firstdatacol >= n);
+
+		vdev_raidz_reconstruct_row(rm, rr, tgts, n);
+	}
+}
+
+/*
+ * Return the number of reads issued.
+ */
+static int
+vdev_raidz_read_all(zio_t *zio, raidz_row_t *rr)
+{
+	vdev_t *vd = zio->io_vd;
+	int nread = 0;
+
+	rr->rr_missingdata = 0;
+	rr->rr_missingparity = 0;
+
+	/*
+	 * If this rows contains empty sectors which are not required
+	 * for a normal read then allocate an ABD for them now so they
+	 * may be read, verified, and any needed repairs performed.
+	 */
+	if (rr->rr_nempty && rr->rr_abd_empty == NULL)
+		vdev_draid_map_alloc_empty(zio, rr);
+
+	for (int c = 0; c < rr->rr_cols; c++) {
+		raidz_col_t *rc = &rr->rr_col[c];
+		if (rc->rc_tried || rc->rc_size == 0)
+			continue;
+
+		zio_nowait(zio_vdev_child_io(zio, NULL,
+		    vd->vdev_child[rc->rc_devidx],
+		    rc->rc_offset, rc->rc_abd, rc->rc_size,
+		    zio->io_type, zio->io_priority, 0,
+		    vdev_raidz_child_done, rc));
+		nread++;
+	}
+	return (nread);
+}
+
+/*
+ * We're here because either there were too many errors to even attempt
+ * reconstruction (total_errors == rm_first_datacol), or vdev_*_combrec()
+ * failed. In either case, there is enough bad data to prevent reconstruction.
+ * Start checksum ereports for all children which haven't failed.
+ */
+static void
+vdev_raidz_io_done_unrecoverable(zio_t *zio)
+{
+	raidz_map_t *rm = zio->io_vsd;
+
+	for (int i = 0; i < rm->rm_nrows; i++) {
+		raidz_row_t *rr = rm->rm_row[i];
+
+		for (int c = 0; c < rr->rr_cols; c++) {
+			raidz_col_t *rc = &rr->rr_col[c];
+			vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx];
+
+			if (rc->rc_error != 0)
+				continue;
+
+			zio_bad_cksum_t zbc;
+			zbc.zbc_has_cksum = 0;
+			zbc.zbc_injected = rm->rm_ecksuminjected;
+
+			(void) zfs_ereport_start_checksum(zio->io_spa,
+			    cvd, &zio->io_bookmark, zio, rc->rc_offset,
+			    rc->rc_size, &zbc);
+			mutex_enter(&cvd->vdev_stat_lock);
+			cvd->vdev_stat.vs_checksum_errors++;
+			mutex_exit(&cvd->vdev_stat_lock);
+		}
+	}
+}
+
+void
+vdev_raidz_io_done(zio_t *zio)
+{
+	raidz_map_t *rm = zio->io_vsd;
+
+	if (zio->io_type == ZIO_TYPE_WRITE) {
+		for (int i = 0; i < rm->rm_nrows; i++) {
+			vdev_raidz_io_done_write_impl(zio, rm->rm_row[i]);
+		}
+	} else {
+		for (int i = 0; i < rm->rm_nrows; i++) {
+			raidz_row_t *rr = rm->rm_row[i];
+			vdev_raidz_io_done_reconstruct_known_missing(zio,
+			    rm, rr);
+		}
+
+		if (raidz_checksum_verify(zio) == 0) {
+			for (int i = 0; i < rm->rm_nrows; i++) {
+				raidz_row_t *rr = rm->rm_row[i];
+				vdev_raidz_io_done_verified(zio, rr);
+			}
+			zio_checksum_verified(zio);
+		} else {
+			/*
+			 * A sequential resilver has no checksum which makes
+			 * combinatoral reconstruction impossible. This code
+			 * path is unreachable since raidz_checksum_verify()
+			 * has no checksum to verify and must succeed.
+			 */
+			ASSERT3U(zio->io_priority, !=, ZIO_PRIORITY_REBUILD);
+
+			/*
+			 * This isn't a typical situation -- either we got a
+			 * read error or a child silently returned bad data.
+			 * Read every block so we can try again with as much
+			 * data and parity as we can track down. If we've
+			 * already been through once before, all children will
+			 * be marked as tried so we'll proceed to combinatorial
+			 * reconstruction.
+			 */
+			int nread = 0;
+			for (int i = 0; i < rm->rm_nrows; i++) {
+				nread += vdev_raidz_read_all(zio,
+				    rm->rm_row[i]);
+			}
+			if (nread != 0) {
+				/*
+				 * Normally our stage is VDEV_IO_DONE, but if
+				 * we've already called redone(), it will have
+				 * changed to VDEV_IO_START, in which case we
+				 * don't want to call redone() again.
+				 */
+				if (zio->io_stage != ZIO_STAGE_VDEV_IO_START)
+					zio_vdev_io_redone(zio);
+				return;
+			}
+
+			zio->io_error = vdev_raidz_combrec(zio);
+			if (zio->io_error == ECKSUM &&
+			    !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+				vdev_raidz_io_done_unrecoverable(zio);
+			}
+		}
+	}
+}
+
+static void
 vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
 {
-	if (faulted > vd->vdev_nparity)
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
+	if (faulted > vdrz->vd_nparity)
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_NO_REPLICAS);
 	else if (degraded + faulted != 0)
@@ -2340,18 +2382,26 @@
  * width blocks must be resilvered.
  */
 static boolean_t
-vdev_raidz_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
+vdev_raidz_need_resilver(vdev_t *vd, const dva_t *dva, size_t psize,
+    uint64_t phys_birth)
 {
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
 	uint64_t dcols = vd->vdev_children;
-	uint64_t nparity = vd->vdev_nparity;
+	uint64_t nparity = vdrz->vd_nparity;
 	uint64_t ashift = vd->vdev_top->vdev_ashift;
 	/* The starting RAIDZ (parent) vdev sector of the block. */
-	uint64_t b = offset >> ashift;
+	uint64_t b = DVA_GET_OFFSET(dva) >> ashift;
 	/* The zio's size in units of the vdev's minimum sector size. */
 	uint64_t s = ((psize - 1) >> ashift) + 1;
 	/* The first column for this stripe. */
 	uint64_t f = b % dcols;
 
+	/* Unreachable by sequential resilver. */
+	ASSERT3U(phys_birth, !=, TXG_UNKNOWN);
+
+	if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
+		return (B_FALSE);
+
 	if (s + nparity >= dcols)
 		return (B_TRUE);
 
@@ -2372,8 +2422,11 @@
 }
 
 static void
-vdev_raidz_xlate(vdev_t *cvd, const range_seg_t *in, range_seg_t *res)
+vdev_raidz_xlate(vdev_t *cvd, const range_seg64_t *logical_rs,
+    range_seg64_t *physical_rs, range_seg64_t *remain_rs)
 {
+	(void) remain_rs;
+
 	vdev_t *raidvd = cvd->vdev_parent;
 	ASSERT(raidvd->vdev_ops == &vdev_raidz_ops);
 
@@ -2382,10 +2435,10 @@
 	uint64_t ashift = raidvd->vdev_top->vdev_ashift;
 
 	/* make sure the offsets are block-aligned */
-	ASSERT0(in->rs_start % (1 << ashift));
-	ASSERT0(in->rs_end % (1 << ashift));
-	uint64_t b_start = in->rs_start >> ashift;
-	uint64_t b_end = in->rs_end >> ashift;
+	ASSERT0(logical_rs->rs_start % (1 << ashift));
+	ASSERT0(logical_rs->rs_end % (1 << ashift));
+	uint64_t b_start = logical_rs->rs_start >> ashift;
+	uint64_t b_end = logical_rs->rs_end >> ashift;
 
 	uint64_t start_row = 0;
 	if (b_start > tgt_col) /* avoid underflow */
@@ -2395,17 +2448,119 @@
 	if (b_end > tgt_col)
 		end_row = ((b_end - tgt_col - 1) / width) + 1;
 
-	res->rs_start = start_row << ashift;
-	res->rs_end = end_row << ashift;
+	physical_rs->rs_start = start_row << ashift;
+	physical_rs->rs_end = end_row << ashift;
 
-	ASSERT3U(res->rs_start, <=, in->rs_start);
-	ASSERT3U(res->rs_end - res->rs_start, <=, in->rs_end - in->rs_start);
+	ASSERT3U(physical_rs->rs_start, <=, logical_rs->rs_start);
+	ASSERT3U(physical_rs->rs_end - physical_rs->rs_start, <=,
+	    logical_rs->rs_end - logical_rs->rs_start);
+}
+
+/*
+ * Initialize private RAIDZ specific fields from the nvlist.
+ */
+static int
+vdev_raidz_init(spa_t *spa, nvlist_t *nv, void **tsd)
+{
+	vdev_raidz_t *vdrz;
+	uint64_t nparity;
+
+	uint_t children;
+	nvlist_t **child;
+	int error = nvlist_lookup_nvlist_array(nv,
+	    ZPOOL_CONFIG_CHILDREN, &child, &children);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &nparity) == 0) {
+		if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
+			return (SET_ERROR(EINVAL));
+
+		/*
+		 * Previous versions could only support 1 or 2 parity
+		 * device.
+		 */
+		if (nparity > 1 && spa_version(spa) < SPA_VERSION_RAIDZ2)
+			return (SET_ERROR(EINVAL));
+		else if (nparity > 2 && spa_version(spa) < SPA_VERSION_RAIDZ3)
+			return (SET_ERROR(EINVAL));
+	} else {
+		/*
+		 * We require the parity to be specified for SPAs that
+		 * support multiple parity levels.
+		 */
+		if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
+			return (SET_ERROR(EINVAL));
+
+		/*
+		 * Otherwise, we default to 1 parity device for RAID-Z.
+		 */
+		nparity = 1;
+	}
+
+	vdrz = kmem_zalloc(sizeof (*vdrz), KM_SLEEP);
+	vdrz->vd_logical_width = children;
+	vdrz->vd_nparity = nparity;
+
+	*tsd = vdrz;
+
+	return (0);
+}
+
+static void
+vdev_raidz_fini(vdev_t *vd)
+{
+	kmem_free(vd->vdev_tsd, sizeof (vdev_raidz_t));
+}
+
+/*
+ * Add RAIDZ specific fields to the config nvlist.
+ */
+static void
+vdev_raidz_config_generate(vdev_t *vd, nvlist_t *nv)
+{
+	ASSERT3P(vd->vdev_ops, ==, &vdev_raidz_ops);
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
+
+	/*
+	 * Make sure someone hasn't managed to sneak a fancy new vdev
+	 * into a crufty old storage pool.
+	 */
+	ASSERT(vdrz->vd_nparity == 1 ||
+	    (vdrz->vd_nparity <= 2 &&
+	    spa_version(vd->vdev_spa) >= SPA_VERSION_RAIDZ2) ||
+	    (vdrz->vd_nparity <= 3 &&
+	    spa_version(vd->vdev_spa) >= SPA_VERSION_RAIDZ3));
+
+	/*
+	 * Note that we'll add these even on storage pools where they
+	 * aren't strictly required -- older software will just ignore
+	 * it.
+	 */
+	fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vdrz->vd_nparity);
+}
+
+static uint64_t
+vdev_raidz_nparity(vdev_t *vd)
+{
+	vdev_raidz_t *vdrz = vd->vdev_tsd;
+	return (vdrz->vd_nparity);
+}
+
+static uint64_t
+vdev_raidz_ndisks(vdev_t *vd)
+{
+	return (vd->vdev_children);
 }
 
 vdev_ops_t vdev_raidz_ops = {
+	.vdev_op_init = vdev_raidz_init,
+	.vdev_op_fini = vdev_raidz_fini,
 	.vdev_op_open = vdev_raidz_open,
 	.vdev_op_close = vdev_raidz_close,
 	.vdev_op_asize = vdev_raidz_asize,
+	.vdev_op_min_asize = vdev_raidz_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_raidz_io_start,
 	.vdev_op_io_done = vdev_raidz_io_done,
 	.vdev_op_state_change = vdev_raidz_state_change,
@@ -2414,6 +2569,11 @@
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_raidz_xlate,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = vdev_raidz_config_generate,
+	.vdev_op_nparity = vdev_raidz_nparity,
+	.vdev_op_ndisks = vdev_raidz_ndisks,
 	.vdev_op_type = VDEV_TYPE_RAIDZ,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE			/* not a leaf vdev */
 };

diff --git a/zfs/module/zfs/vdev_raidz_math.c b/zfs/module/zfs/vdev_raidz_math.c
index 576d33b..2ce0dc5 100644
--- a/zfs/module/zfs/vdev_raidz_math.c
+++ b/zfs/module/zfs/vdev_raidz_math.c

@@ -29,9 +29,7 @@
 #include <sys/zfs_debug.h>
 #include <sys/vdev_raidz.h>
 #include <sys/vdev_raidz_impl.h>
-#include <linux/simd.h>
-
-extern boolean_t raidz_will_scalar_work(void);
+#include <sys/simd.h>
 
 /* Opaque implementation with NULL methods to represent original methods */
 static const raidz_impl_ops_t vdev_raidz_original_impl = {
@@ -63,10 +61,13 @@
 #if defined(__x86_64) && defined(HAVE_AVX512BW)	/* only x86_64 for now */
 	&vdev_raidz_avx512bw_impl,
 #endif
-#if defined(__aarch64__)
+#if defined(__aarch64__) && !defined(__FreeBSD__)
 	&vdev_raidz_aarch64_neon_impl,
 	&vdev_raidz_aarch64_neonx2_impl,
 #endif
+#if defined(__powerpc__) && defined(__altivec__)
+	&vdev_raidz_powerpc_altivec_impl,
+#endif
 };
 
 /* Indicate that benchmark has been completed */
@@ -148,7 +149,7 @@
  * Select parity generation method for raidz_map
  */
 int
-vdev_raidz_math_generate(raidz_map_t *rm)
+vdev_raidz_math_generate(raidz_map_t *rm, raidz_row_t *rr)
 {
 	raidz_gen_f gen_parity = NULL;
 
@@ -173,7 +174,7 @@
 	if (gen_parity == NULL)
 		return (RAIDZ_ORIGINAL_IMPL);
 
-	gen_parity(rm);
+	gen_parity(rr);
 
 	return (0);
 }
@@ -240,8 +241,8 @@
  * @nbaddata     - Number of failed data columns
  */
 int
-vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
-    const int *dt, const int nbaddata)
+vdev_raidz_math_reconstruct(raidz_map_t *rm, raidz_row_t *rr,
+    const int *parity_valid, const int *dt, const int nbaddata)
 {
 	raidz_rec_f rec_fn = NULL;
 
@@ -264,7 +265,7 @@
 	if (rec_fn == NULL)
 		return (RAIDZ_ORIGINAL_IMPL);
 	else
-		return (rec_fn(rm, dt));
+		return (rec_fn(rr, dt));
 }
 
 const char *raidz_gen_name[] = {
@@ -359,7 +360,7 @@
 #define	BENCH_D_COLS	(8ULL)
 #define	BENCH_COLS	(BENCH_D_COLS + PARITY_PQR)
 #define	BENCH_ZIO_SIZE	(1ULL << SPA_OLD_MAXBLOCKSHIFT)	/* 128 kiB */
-#define	BENCH_NS	MSEC2NSEC(25)			/* 25ms */
+#define	BENCH_NS	MSEC2NSEC(1)			/* 1ms */
 
 typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
 
@@ -409,7 +410,7 @@
 		t_start = gethrtime();
 
 		do {
-			for (i = 0; i < 25; i++, run_cnt++)
+			for (i = 0; i < 5; i++, run_cnt++)
 				bench_fn(bench_rm, fn);
 
 			t_diff = gethrtime() - t_start;
@@ -464,6 +465,7 @@
 	raidz_supp_impl_cnt = c;	/* number of supported impl */
 
 #if defined(_KERNEL)
+	abd_t *pabd;
 	zio_t *bench_zio = NULL;
 	raidz_map_t *bench_rm = NULL;
 	uint64_t bench_parity;
@@ -491,6 +493,12 @@
 	bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
 	    BENCH_COLS, PARITY_PQR);
 
+	/* Ensure that fake parity blocks are initialized */
+	for (c = 0; c < bench_rm->rm_row[0]->rr_firstdatacol; c++) {
+		pabd = bench_rm->rm_row[0]->rr_col[c].rc_abd;
+		memset(abd_to_buf(pabd), 0xAA, abd_get_size(pabd));
+	}
+
 	for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
 		benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
 
@@ -627,8 +635,7 @@
 	return (err);
 }
 
-#if defined(_KERNEL)
-#include <linux/mod_compat.h>
+#if defined(_KERNEL) && defined(__linux__)
 
 static int
 zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)

diff --git a/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
index 0ea2ad6..e46b253 100644
--- a/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
+++ b/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h

@@ -23,9 +23,11 @@
  */
 
 #include <sys/types.h>
-#include <linux/simd_aarch64.h>
+#include <sys/simd.h>
 
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
 #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
@@ -479,10 +481,8 @@
 		/* upper part */					\
 		"and v14.16b," VR0(r) ".16b,v15.16b\n"			\
 		"and v13.16b," VR1(r) ".16b,v15.16b\n"			\
-		"sshr " VR0(r) ".8h," VR0(r) ".8h,#4\n"			\
-		"sshr " VR1(r) ".8h," VR1(r) ".8h,#4\n"			\
-		"and " VR0(r) ".16b," VR0(r) ".16b,v15.16b\n"		\
-		"and " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n"		\
+		"ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n"		\
+		"ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n"		\
 									\
 		"tbl v12.16b,{v10.16b}," VR0(r) ".16b\n"		\
 		"tbl v10.16b,{v10.16b}," VR1(r) ".16b\n"		\

diff --git a/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c
index e072f51..c024aaf 100644
--- a/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c
+++ b/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c

@@ -210,9 +210,13 @@
  * If compiled with -O0, gcc doesn't do any stack frame coalescing
  * and -Wframe-larger-than=1024 is triggered in debug mode.
  */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
 DEFINE_REC_METHODS(aarch64_neonx2);
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
 
 static boolean_t
 raidz_will_aarch64_neonx2_work(void)

diff --git a/zfs/module/zfs/vdev_raidz_math_avx2.c b/zfs/module/zfs/vdev_raidz_math_avx2.c
index a12eb67..65e4beb 100644
--- a/zfs/module/zfs/vdev_raidz_math_avx2.c
+++ b/zfs/module/zfs/vdev_raidz_math_avx2.c

@@ -26,9 +26,11 @@
 #if defined(__x86_64) && defined(HAVE_AVX2)
 
 #include <sys/types.h>
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
 
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
 #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)

diff --git a/zfs/module/zfs/vdev_raidz_math_avx512bw.c b/zfs/module/zfs/vdev_raidz_math_avx512bw.c
index 2f545c9..f06b469 100644
--- a/zfs/module/zfs/vdev_raidz_math_avx512bw.c
+++ b/zfs/module/zfs/vdev_raidz_math_avx512bw.c

@@ -27,10 +27,14 @@
 
 #if defined(__x86_64) && defined(HAVE_AVX512BW)
 
+#include <sys/param.h>
 #include <sys/types.h>
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
 
+
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
 #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)

diff --git a/zfs/module/zfs/vdev_raidz_math_avx512f.c b/zfs/module/zfs/vdev_raidz_math_avx512f.c
index 75af7a8..aab653b 100644
--- a/zfs/module/zfs/vdev_raidz_math_avx512f.c
+++ b/zfs/module/zfs/vdev_raidz_math_avx512f.c

@@ -28,9 +28,12 @@
 #if defined(__x86_64) && defined(HAVE_AVX512F)
 
 #include <sys/types.h>
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
+#include <sys/debug.h>
 
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
 #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
@@ -194,6 +197,8 @@
 		    "vpternlogd $0x6c,%zmm29, %zmm26, %" VR0(r) "\n"	\
 		    "vpternlogd $0x6c,%zmm29, %zmm25, %" VR1(r));	\
 		break;							\
+	default:							\
+		VERIFY(0);						\
 	}								\
 }
 
@@ -370,6 +375,9 @@
 		COPY(R_23(r), _mul_x2_in);				\
 		gf_x2_mul_fns[c]();					\
 		COPY(_mul_x2_acc, R_23(r));				\
+		break;							\
+	default:							\
+		VERIFY(0);						\
 	}								\
 }
 

diff --git a/zfs/module/zfs/vdev_raidz_math_impl.h b/zfs/module/zfs/vdev_raidz_math_impl.h
index ea592c0..35e016f 100644
--- a/zfs/module/zfs/vdev_raidz_math_impl.h
+++ b/zfs/module/zfs/vdev_raidz_math_impl.h

@@ -26,6 +26,7 @@
 #define	_VDEV_RAIDZ_MATH_IMPL_H
 
 #include <sys/types.h>
+#include <sys/vdev_raidz_impl.h>
 
 #define	raidz_inline inline __attribute__((always_inline))
 #ifndef noinline
@@ -36,33 +37,33 @@
  * Functions calculate multiplication constants for data reconstruction.
  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
  * used parity columns for reconstruction.
- * @rm			RAIDZ map
+ * @rr			RAIDZ row
  * @tgtidx		array of missing data indexes
  * @coeff		output array of coefficients. Array must be provided by
  *         		user and must hold minimum MUL_CNT values.
  */
 static noinline void
-raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 
 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
 }
 
 static noinline void
-raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 
 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
 }
 
 static noinline void
-raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 	gf_t a, b, e;
@@ -76,9 +77,9 @@
 }
 
 static noinline void
-raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 
@@ -93,9 +94,9 @@
 }
 
 static noinline void
-raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 
@@ -114,9 +115,9 @@
 }
 
 static noinline void
-raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
-	const unsigned ncols = raidz_ncols(rm);
+	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 	const unsigned z = tgtidx[TARGET_Z];
@@ -347,26 +348,26 @@
 /*
  * Generate P parity (RAIDZ1)
  *
- * @rm	RAIDZ map
+ * @rr	RAIDZ row
  */
 static raidz_inline void
-raidz_generate_p_impl(raidz_map_t * const rm)
+raidz_generate_p_impl(raidz_row_t * const rr)
 {
 	size_t c;
-	const size_t ncols = raidz_ncols(rm);
-	const size_t psize = rm->rm_col[CODE_P].rc_size;
-	abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
+	const size_t ncols = rr->rr_cols;
+	const size_t psize = rr->rr_col[CODE_P].rc_size;
+	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
 	size_t size;
 	abd_t *dabd;
 
 	raidz_math_begin();
 
 	/* start with first data column */
-	raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
+	raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
 
 	for (c = 2; c < ncols; c++) {
-		dabd = rm->rm_col[c].rc_abd;
-		size = rm->rm_col[c].rc_size;
+		dabd = rr->rr_col[c].rc_abd;
+		size = rr->rr_col[c].rc_size;
 
 		/* add data column */
 		raidz_add(pabd, dabd, size);
@@ -391,7 +392,7 @@
 {
 	v_t *p = (v_t *)c[0];
 	v_t *q = (v_t *)c[1];
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const qend = q + (csize / sizeof (v_t));
 
@@ -414,29 +415,29 @@
 /*
  * Generate PQ parity (RAIDZ2)
  *
- * @rm	RAIDZ map
+ * @rr	RAIDZ row
  */
 static raidz_inline void
-raidz_generate_pq_impl(raidz_map_t * const rm)
+raidz_generate_pq_impl(raidz_row_t * const rr)
 {
 	size_t c;
-	const size_t ncols = raidz_ncols(rm);
-	const size_t csize = rm->rm_col[CODE_P].rc_size;
+	const size_t ncols = rr->rr_cols;
+	const size_t csize = rr->rr_col[CODE_P].rc_size;
 	size_t dsize;
 	abd_t *dabd;
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd
+		rr->rr_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd
 	};
 
 	raidz_math_begin();
 
-	raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
-	raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
+	raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
+	raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
 
 	for (c = 3; c < ncols; c++) {
-		dabd = rm->rm_col[c].rc_abd;
-		dsize = rm->rm_col[c].rc_size;
+		dabd = rr->rr_col[c].rc_abd;
+		dsize = rr->rr_col[c].rc_size;
 
 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
 		    raidz_gen_pq_add);
@@ -462,7 +463,7 @@
 	v_t *p = (v_t *)c[0];
 	v_t *q = (v_t *)c[1];
 	v_t *r = (v_t *)c[CODE_R];
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const qend = q + (csize / sizeof (v_t));
 
@@ -487,31 +488,31 @@
 /*
  * Generate PQR parity (RAIDZ2)
  *
- * @rm	RAIDZ map
+ * @rr	RAIDZ row
  */
 static raidz_inline void
-raidz_generate_pqr_impl(raidz_map_t * const rm)
+raidz_generate_pqr_impl(raidz_row_t * const rr)
 {
 	size_t c;
-	const size_t ncols = raidz_ncols(rm);
-	const size_t csize = rm->rm_col[CODE_P].rc_size;
+	const size_t ncols = rr->rr_cols;
+	const size_t csize = rr->rr_col[CODE_P].rc_size;
 	size_t dsize;
 	abd_t *dabd;
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd,
+		rr->rr_col[CODE_R].rc_abd
 	};
 
 	raidz_math_begin();
 
-	raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
-	raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
-	raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
+	raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
+	raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
+	raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
 
 	for (c = 4; c < ncols; c++) {
-		dabd = rm->rm_col[c].rc_abd;
-		dsize = rm->rm_col[c].rc_size;
+		dabd = rr->rr_col[c].rc_abd;
+		dsize = rr->rr_col[c].rc_size;
 
 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
 		    raidz_gen_pqr_add);
@@ -579,33 +580,36 @@
  * @syn_method	raidz_add_abd()
  * @rec_method	not applicable
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
-	const size_t xsize = rm->rm_col[x].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
 	size_t size;
 	abd_t *dabd;
 
+	if (xabd == NULL)
+		return (1 << CODE_P);
+
 	raidz_math_begin();
 
 	/* copy P into target */
-	raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
+	raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
 
 	/* generate p_syndrome */
 	for (c = firstdc; c < ncols; c++) {
 		if (c == x)
 			continue;
 
-		dabd = rm->rm_col[c].rc_abd;
-		size = MIN(rm->rm_col[c].rc_size, xsize);
+		dabd = rr->rr_col[c].rc_abd;
+		size = MIN(rr->rr_col[c].rc_size, xsize);
 
 		raidz_add(xabd, dabd, size);
 	}
@@ -629,7 +633,7 @@
     const size_t dsize)
 {
 	v_t *x = (v_t *)xc[TARGET_X];
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const xend = x + (xsize / sizeof (v_t));
 
@@ -653,30 +657,33 @@
  * @syn_method	raidz_add_abd()
  * @rec_method	raidz_mul_abd_cb()
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
-	abd_t *xabd = rm->rm_col[x].rc_abd;
-	const size_t xsize = rm->rm_col[x].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
 	abd_t *tabds[] = { xabd };
 
+	if (xabd == NULL)
+		return (1 << CODE_Q);
+
 	unsigned coeff[MUL_CNT];
-	raidz_rec_q_coeff(rm, tgtidx, coeff);
+	raidz_rec_q_coeff(rr, tgtidx, coeff);
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 	}
@@ -687,8 +694,8 @@
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
 		}
 
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
@@ -696,7 +703,7 @@
 	}
 
 	/* add Q to the syndrome */
-	raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
+	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
 
 	/* transform the syndrome */
 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
@@ -720,7 +727,7 @@
     const size_t dsize)
 {
 	v_t *x = (v_t *)xc[TARGET_X];
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 
@@ -744,30 +751,33 @@
  * @syn_method	raidz_add_abd()
  * @rec_method	raidz_mul_abd_cb()
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ rr
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
-	const size_t xsize = rm->rm_col[x].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *tabds[] = { xabd };
 
+	if (xabd == NULL)
+		return (1 << CODE_R);
+
 	unsigned coeff[MUL_CNT];
-	raidz_rec_r_coeff(rm, tgtidx, coeff);
+	raidz_rec_r_coeff(rr, tgtidx, coeff);
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 	}
@@ -779,8 +789,8 @@
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
 		}
 
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
@@ -788,7 +798,7 @@
 	}
 
 	/* add R to the syndrome */
-	raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
+	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
 
 	/* transform the syndrome */
 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
@@ -813,7 +823,7 @@
 {
 	v_t *x = (v_t *)tc[TARGET_X];
 	v_t *y = (v_t *)tc[TARGET_Y];
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const yend = y + (tsize / sizeof (v_t));
 
@@ -881,31 +891,34 @@
  * @syn_method	raidz_syn_pq_abd()
  * @rec_method	raidz_rec_pq_abd()
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
-	const size_t xsize = rm->rm_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd
+		rr->rr_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd
 	};
 
+	if (xabd == NULL)
+		return ((1 << CODE_P) | (1 << CODE_Q));
+
 	unsigned coeff[MUL_CNT];
-	raidz_rec_pq_coeff(rm, tgtidx, coeff);
+	raidz_rec_pq_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets is shorter then others
@@ -921,8 +934,8 @@
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@@ -934,8 +947,8 @@
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
 		}
 
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
@@ -946,7 +959,7 @@
 
 	/* Copy shorter targets back to the original abd buffer */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 
 	raidz_math_end();
 
@@ -971,7 +984,7 @@
 {
 	v_t *x = (v_t *)c[TARGET_X];
 	v_t *y = (v_t *)c[TARGET_Y];
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const yend = y + (tsize / sizeof (v_t));
 
@@ -1038,30 +1051,34 @@
  * @syn_method	raidz_syn_pr_abd()
  * @rec_method	raidz_rec_pr_abd()
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[0];
 	const size_t y = tgtidx[1];
-	const size_t xsize = rm->rm_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_R].rc_abd
 	};
+
+	if (xabd == NULL)
+		return ((1 << CODE_P) | (1 << CODE_R));
+
 	unsigned coeff[MUL_CNT];
-	raidz_rec_pr_coeff(rm, tgtidx, coeff);
+	raidz_rec_pr_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets are shorter then others.
@@ -1077,8 +1094,8 @@
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@@ -1090,8 +1107,8 @@
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
 		}
 
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
@@ -1104,14 +1121,14 @@
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 
 	raidz_math_end();
 
 	if (ysize < xsize)
 		abd_free(yabd);
 
-	return ((1 << CODE_P) | (1 << CODE_Q));
+	return ((1 << CODE_P) | (1 << CODE_R));
 }
 
 
@@ -1130,7 +1147,7 @@
 	v_t *x = (v_t *)c[TARGET_X];
 	v_t *y = (v_t *)c[TARGET_Y];
 	const v_t * const xend = x + (tsize / sizeof (v_t));
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 
 	SYN_QR_DEFINE();
@@ -1201,30 +1218,34 @@
  * @syn_method	raidz_syn_qr_abd()
  * @rec_method	raidz_rec_qr_abd()
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
-	const size_t xsize = rm->rm_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_Q].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_Q].rc_abd,
+		rr->rr_col[CODE_R].rc_abd
 	};
+
+	if (xabd == NULL)
+		return ((1 << CODE_Q) | (1 << CODE_R));
+
 	unsigned coeff[MUL_CNT];
-	raidz_rec_qr_coeff(rm, tgtidx, coeff);
+	raidz_rec_qr_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets is shorter then others
@@ -1240,8 +1261,8 @@
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@@ -1253,8 +1274,8 @@
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
 		}
 
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
@@ -1267,7 +1288,7 @@
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 
 	raidz_math_end();
 
@@ -1295,7 +1316,7 @@
 	v_t *y = (v_t *)c[TARGET_Y];
 	v_t *z = (v_t *)c[TARGET_Z];
 	const v_t * const yend = y + (tsize / sizeof (v_t));
-	const v_t *d = (v_t *)dc;
+	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 
 	SYN_PQR_DEFINE();
@@ -1384,34 +1405,38 @@
  * @syn_method	raidz_syn_pqr_abd()
  * @rec_method	raidz_rec_pqr_abd()
  *
- * @rm		RAIDZ map
+ * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
-raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
-	const size_t firstdc = raidz_parity(rm);
-	const size_t ncols = raidz_ncols(rm);
+	const size_t firstdc = rr->rr_firstdatacol;
+	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
 	const size_t z = tgtidx[TARGET_Z];
-	const size_t xsize = rm->rm_col[x].rc_size;
-	const size_t ysize = rm->rm_col[y].rc_size;
-	const size_t zsize = rm->rm_col[z].rc_size;
-	abd_t *xabd = rm->rm_col[x].rc_abd;
-	abd_t *yabd = rm->rm_col[y].rc_abd;
-	abd_t *zabd = rm->rm_col[z].rc_abd;
+	const size_t xsize = rr->rr_col[x].rc_size;
+	const size_t ysize = rr->rr_col[y].rc_size;
+	const size_t zsize = rr->rr_col[z].rc_size;
+	abd_t *xabd = rr->rr_col[x].rc_abd;
+	abd_t *yabd = rr->rr_col[y].rc_abd;
+	abd_t *zabd = rr->rr_col[z].rc_abd;
 	abd_t *tabds[] = { xabd, yabd, zabd };
 	abd_t *cabds[] = {
-		rm->rm_col[CODE_P].rc_abd,
-		rm->rm_col[CODE_Q].rc_abd,
-		rm->rm_col[CODE_R].rc_abd
+		rr->rr_col[CODE_P].rc_abd,
+		rr->rr_col[CODE_Q].rc_abd,
+		rr->rr_col[CODE_R].rc_abd
 	};
+
+	if (xabd == NULL)
+		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
+
 	unsigned coeff[MUL_CNT];
-	raidz_rec_pqr_coeff(rm, tgtidx, coeff);
+	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets is shorter then others
@@ -1431,9 +1456,9 @@
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
-		raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
@@ -1446,8 +1471,8 @@
 			dabd = NULL;
 			dsize = 0;
 		} else {
-			dabd = rm->rm_col[c].rc_abd;
-			dsize = rm->rm_col[c].rc_size;
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
 		}
 
 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
@@ -1460,9 +1485,9 @@
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
 	if (zsize < xsize)
-		raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
+		raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
 
 	raidz_math_end();
 

diff --git a/zfs/module/zfs/vdev_raidz_math_powerpc_altivec.c b/zfs/module/zfs/vdev_raidz_math_powerpc_altivec.c
new file mode 100644
index 0000000..1db2c4c
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_powerpc_altivec.c

@@ -0,0 +1,4337 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2019 Romain Dolbeau. All rights reserved.
+ *           <romain.dolbeau@european-processor-initiative.eu>
+ */
+
+#include <sys/isa_defs.h>
+#include <sys/types.h>
+
+#if defined(__powerpc__)
+#pragma GCC target("altivec")
+
+#include "vdev_raidz_math_powerpc_altivec_common.h"
+
+#define	SYN_STRIDE		4
+
+#define	ZERO_STRIDE		4
+#define	ZERO_DEFINE()	\
+	GEN_X_DEFINE_0_3() \
+	GEN_X_DEFINE_33_36()
+#define	ZERO_D			0, 1, 2, 3
+
+#define	COPY_STRIDE		4
+#define	COPY_DEFINE()	\
+	GEN_X_DEFINE_0_3() \
+	GEN_X_DEFINE_33_36()
+#define	COPY_D			0, 1, 2, 3
+
+#define	ADD_STRIDE		4
+#define	ADD_DEFINE()	\
+	GEN_X_DEFINE_0_3() \
+	GEN_X_DEFINE_33_36()
+#define	ADD_D			0, 1, 2, 3
+
+#define	MUL_STRIDE		4
+#define	MUL_DEFINE()	\
+	GEN_X_DEFINE_0_3() \
+	GEN_X_DEFINE_33_36()
+#define	MUL_D			0, 1, 2, 3
+
+#define	GEN_P_DEFINE() \
+	GEN_X_DEFINE_0_3() \
+	GEN_X_DEFINE_33_36()
+#define	GEN_P_STRIDE		4
+#define	GEN_P_P			0, 1, 2, 3
+
+#define	GEN_PQ_DEFINE()	\
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	GEN_PQ_STRIDE		4
+#define	GEN_PQ_D		0, 1, 2, 3
+#define	GEN_PQ_C		4, 5, 6, 7
+
+#define	GEN_PQR_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	GEN_PQR_STRIDE		4
+#define	GEN_PQR_D		0, 1, 2, 3
+#define	GEN_PQR_C		4, 5, 6, 7
+
+#define	SYN_Q_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	SYN_Q_STRIDE		4
+#define	SYN_Q_D			0, 1, 2, 3
+#define	SYN_Q_X			4, 5, 6, 7
+
+#define	SYN_R_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	SYN_R_STRIDE		4
+#define	SYN_R_D			0, 1, 2, 3
+#define	SYN_R_X			4, 5, 6, 7
+
+#define	SYN_PQ_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	SYN_PQ_STRIDE		4
+#define	SYN_PQ_D		0, 1, 2, 3
+#define	SYN_PQ_X		4, 5, 6, 7
+
+#define	REC_PQ_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_31()	\
+	GEN_X_DEFINE_32()	\
+	GEN_X_DEFINE_33_36()
+#define	REC_PQ_STRIDE		2
+#define	REC_PQ_X		0, 1
+#define	REC_PQ_Y		2, 3
+#define	REC_PQ_T		4, 5
+
+#define	SYN_PR_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	SYN_PR_STRIDE		4
+#define	SYN_PR_D		0, 1, 2, 3
+#define	SYN_PR_X		4, 5, 6, 7
+
+#define	REC_PR_DEFINE()	\
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_31()	\
+	GEN_X_DEFINE_32()	\
+	GEN_X_DEFINE_33_36()
+#define	REC_PR_STRIDE		2
+#define	REC_PR_X		0, 1
+#define	REC_PR_Y		2, 3
+#define	REC_PR_T		4, 5
+
+#define	SYN_QR_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	SYN_QR_STRIDE		4
+#define	SYN_QR_D		0, 1, 2, 3
+#define	SYN_QR_X		4, 5, 6, 7
+
+#define	REC_QR_DEFINE()	\
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_31()	\
+	GEN_X_DEFINE_32()	\
+	GEN_X_DEFINE_33_36()
+#define	REC_QR_STRIDE		2
+#define	REC_QR_X		0, 1
+#define	REC_QR_Y		2, 3
+#define	REC_QR_T		4, 5
+
+#define	SYN_PQR_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_33_36()
+#define	SYN_PQR_STRIDE		 4
+#define	SYN_PQR_D		 0, 1, 2, 3
+#define	SYN_PQR_X		 4, 5, 6, 7
+
+#define	REC_PQR_DEFINE() \
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_8_9()	\
+	GEN_X_DEFINE_31()	\
+	GEN_X_DEFINE_32()	\
+	GEN_X_DEFINE_33_36()
+#define	REC_PQR_STRIDE		2
+#define	REC_PQR_X		0, 1
+#define	REC_PQR_Y		2, 3
+#define	REC_PQR_Z		4, 5
+#define	REC_PQR_XS		6, 7
+#define	REC_PQR_YS		8, 9
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(powerpc_altivec);
+DEFINE_REC_METHODS(powerpc_altivec);
+
+static boolean_t
+raidz_will_powerpc_altivec_work(void)
+{
+	return (kfpu_allowed()) && zfs_altivec_available();
+}
+
+const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl = {
+	.init = NULL,
+	.fini = NULL,
+	.gen = RAIDZ_GEN_METHODS(powerpc_altivec),
+	.rec = RAIDZ_REC_METHODS(powerpc_altivec),
+	.is_supported = &raidz_will_powerpc_altivec_work,
+	.name = "powerpc_altivec"
+};
+
+#endif /* defined(__powerpc__) */
+
+
+#if defined(__powerpc__)
+#if defined(_ZFS_LITTLE_ENDIAN) && _LITTLE_ENDIAN
+/* BEGIN CSTYLED */
+const uint8_t
+__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+		0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10,
+		0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x11, 0x12, 0x17, 0x14, 0x1d, 0x1e, 0x1b, 0x18,
+		0x09, 0x0a, 0x0f, 0x0c, 0x05, 0x06, 0x03, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x3c, 0x38, 0x34, 0x30, 0x2c, 0x28, 0x24, 0x20,
+		0x1c, 0x18, 0x14, 0x10, 0x0c, 0x08, 0x04, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x33, 0x36, 0x39, 0x3c, 0x27, 0x22, 0x2d, 0x28,
+		0x1b, 0x1e, 0x11, 0x14, 0x0f, 0x0a, 0x05, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x22, 0x24, 0x2e, 0x28, 0x3a, 0x3c, 0x36, 0x30,
+		0x12, 0x14, 0x1e, 0x18, 0x0a, 0x0c, 0x06, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x2d, 0x2a, 0x23, 0x24, 0x31, 0x36, 0x3f, 0x38,
+		0x15, 0x12, 0x1b, 0x1c, 0x09, 0x0e, 0x07, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
+		0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x77, 0x7e, 0x65, 0x6c, 0x53, 0x5a, 0x41, 0x48,
+		0x3f, 0x36, 0x2d, 0x24, 0x1b, 0x12, 0x09, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x66, 0x6c, 0x72, 0x78, 0x4e, 0x44, 0x5a, 0x50,
+		0x36, 0x3c, 0x22, 0x28, 0x1e, 0x14, 0x0a, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x69, 0x62, 0x7f, 0x74, 0x45, 0x4e, 0x53, 0x58,
+		0x31, 0x3a, 0x27, 0x2c, 0x1d, 0x16, 0x0b, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x44, 0x48, 0x5c, 0x50, 0x74, 0x78, 0x6c, 0x60,
+		0x24, 0x28, 0x3c, 0x30, 0x14, 0x18, 0x0c, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x4b, 0x46, 0x51, 0x5c, 0x7f, 0x72, 0x65, 0x68,
+		0x23, 0x2e, 0x39, 0x34, 0x17, 0x1a, 0x0d, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x5a, 0x54, 0x46, 0x48, 0x62, 0x6c, 0x7e, 0x70,
+		0x2a, 0x24, 0x36, 0x38, 0x12, 0x1c, 0x0e, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x55, 0x5a, 0x4b, 0x44, 0x69, 0x66, 0x77, 0x78,
+		0x2d, 0x22, 0x33, 0x3c, 0x11, 0x1e, 0x0f, 0x00  },
+	{	0xbb, 0xa6, 0x81, 0x9c, 0xcf, 0xd2, 0xf5, 0xe8,
+		0x53, 0x4e, 0x69, 0x74, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0xbb, 0xa6, 0x81, 0x9c, 0xcf, 0xd2, 0xf5, 0xe8,
+		0x53, 0x4e, 0x69, 0x74, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88,
+		0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00  },
+	{	0xa6, 0xbb, 0x9c, 0x81, 0xd2, 0xcf, 0xe8, 0xf5,
+		0x53, 0x4e, 0x69, 0x74, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xee, 0xfc, 0xca, 0xd8, 0xa6, 0xb4, 0x82, 0x90,
+		0x7e, 0x6c, 0x5a, 0x48, 0x36, 0x24, 0x12, 0x00  },
+	{	0xa6, 0xbb, 0x9c, 0x81, 0xd2, 0xcf, 0xe8, 0xf5,
+		0x53, 0x4e, 0x69, 0x74, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xe1, 0xf2, 0xc7, 0xd4, 0xad, 0xbe, 0x8b, 0x98,
+		0x79, 0x6a, 0x5f, 0x4c, 0x35, 0x26, 0x13, 0x00  },
+	{	0x9c, 0x81, 0xa6, 0xbb, 0xf5, 0xe8, 0xcf, 0xd2,
+		0x4e, 0x53, 0x74, 0x69, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xcc, 0xd8, 0xe4, 0xf0, 0x9c, 0x88, 0xb4, 0xa0,
+		0x6c, 0x78, 0x44, 0x50, 0x3c, 0x28, 0x14, 0x00  },
+	{	0x9c, 0x81, 0xa6, 0xbb, 0xf5, 0xe8, 0xcf, 0xd2,
+		0x4e, 0x53, 0x74, 0x69, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xc3, 0xd6, 0xe9, 0xfc, 0x97, 0x82, 0xbd, 0xa8,
+		0x6b, 0x7e, 0x41, 0x54, 0x3f, 0x2a, 0x15, 0x00  },
+	{	0x81, 0x9c, 0xbb, 0xa6, 0xe8, 0xf5, 0xd2, 0xcf,
+		0x4e, 0x53, 0x74, 0x69, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xd2, 0xc4, 0xfe, 0xe8, 0x8a, 0x9c, 0xa6, 0xb0,
+		0x62, 0x74, 0x4e, 0x58, 0x3a, 0x2c, 0x16, 0x00  },
+	{	0x81, 0x9c, 0xbb, 0xa6, 0xe8, 0xf5, 0xd2, 0xcf,
+		0x4e, 0x53, 0x74, 0x69, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xdd, 0xca, 0xf3, 0xe4, 0x81, 0x96, 0xaf, 0xb8,
+		0x65, 0x72, 0x4b, 0x5c, 0x39, 0x2e, 0x17, 0x00  },
+	{	0xe8, 0xf5, 0xcf, 0xd2, 0xa6, 0xbb, 0x81, 0x9c,
+		0x74, 0x69, 0x53, 0x4e, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x88, 0x90, 0xb8, 0xa0, 0xe8, 0xf0, 0xd8, 0xc0,
+		0x48, 0x50, 0x78, 0x60, 0x28, 0x30, 0x18, 0x00  },
+	{	0xe8, 0xf5, 0xcf, 0xd2, 0xa6, 0xbb, 0x81, 0x9c,
+		0x74, 0x69, 0x53, 0x4e, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x87, 0x9e, 0xb5, 0xac, 0xe3, 0xfa, 0xd1, 0xc8,
+		0x4f, 0x56, 0x7d, 0x64, 0x2b, 0x32, 0x19, 0x00  },
+	{	0xf5, 0xe8, 0xd2, 0xcf, 0xbb, 0xa6, 0x9c, 0x81,
+		0x74, 0x69, 0x53, 0x4e, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x96, 0x8c, 0xa2, 0xb8, 0xfe, 0xe4, 0xca, 0xd0,
+		0x46, 0x5c, 0x72, 0x68, 0x2e, 0x34, 0x1a, 0x00  },
+	{	0xf5, 0xe8, 0xd2, 0xcf, 0xbb, 0xa6, 0x9c, 0x81,
+		0x74, 0x69, 0x53, 0x4e, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x99, 0x82, 0xaf, 0xb4, 0xf5, 0xee, 0xc3, 0xd8,
+		0x41, 0x5a, 0x77, 0x6c, 0x2d, 0x36, 0x1b, 0x00  },
+	{	0xcf, 0xd2, 0xe8, 0xf5, 0x9c, 0x81, 0xbb, 0xa6,
+		0x69, 0x74, 0x4e, 0x53, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xb4, 0xa8, 0x8c, 0x90, 0xc4, 0xd8, 0xfc, 0xe0,
+		0x54, 0x48, 0x6c, 0x70, 0x24, 0x38, 0x1c, 0x00  },
+	{	0xcf, 0xd2, 0xe8, 0xf5, 0x9c, 0x81, 0xbb, 0xa6,
+		0x69, 0x74, 0x4e, 0x53, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xbb, 0xa6, 0x81, 0x9c, 0xcf, 0xd2, 0xf5, 0xe8,
+		0x53, 0x4e, 0x69, 0x74, 0x27, 0x3a, 0x1d, 0x00  },
+	{	0xd2, 0xcf, 0xf5, 0xe8, 0x81, 0x9c, 0xa6, 0xbb,
+		0x69, 0x74, 0x4e, 0x53, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xaa, 0xb4, 0x96, 0x88, 0xd2, 0xcc, 0xee, 0xf0,
+		0x5a, 0x44, 0x66, 0x78, 0x22, 0x3c, 0x1e, 0x00  },
+	{	0xd2, 0xcf, 0xf5, 0xe8, 0x81, 0x9c, 0xa6, 0xbb,
+		0x69, 0x74, 0x4e, 0x53, 0x3a, 0x27, 0x1d, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xa5, 0xba, 0x9b, 0x84, 0xd9, 0xc6, 0xe7, 0xf8,
+		0x5d, 0x42, 0x63, 0x7c, 0x21, 0x3e, 0x1f, 0x00  },
+	{	0x6b, 0x51, 0x1f, 0x25, 0x83, 0xb9, 0xf7, 0xcd,
+		0xa6, 0x9c, 0xd2, 0xe8, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x6b, 0x51, 0x1f, 0x25, 0x83, 0xb9, 0xf7, 0xcd,
+		0xa6, 0x9c, 0xd2, 0xe8, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xef, 0xce, 0xad, 0x8c, 0x6b, 0x4a, 0x29, 0x08,
+		0xe7, 0xc6, 0xa5, 0x84, 0x63, 0x42, 0x21, 0x00  },
+	{	0x76, 0x4c, 0x02, 0x38, 0x9e, 0xa4, 0xea, 0xd0,
+		0xa6, 0x9c, 0xd2, 0xe8, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
+		0xee, 0xcc, 0xaa, 0x88, 0x66, 0x44, 0x22, 0x00  },
+	{	0x76, 0x4c, 0x02, 0x38, 0x9e, 0xa4, 0xea, 0xd0,
+		0xa6, 0x9c, 0xd2, 0xe8, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xf1, 0xd2, 0xb7, 0x94, 0x7d, 0x5e, 0x3b, 0x18,
+		0xe9, 0xca, 0xaf, 0x8c, 0x65, 0x46, 0x23, 0x00  },
+	{	0x4c, 0x76, 0x38, 0x02, 0xb9, 0x83, 0xcd, 0xf7,
+		0xbb, 0x81, 0xcf, 0xf5, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xdc, 0xf8, 0x94, 0xb0, 0x4c, 0x68, 0x04, 0x20,
+		0xfc, 0xd8, 0xb4, 0x90, 0x6c, 0x48, 0x24, 0x00  },
+	{	0x4c, 0x76, 0x38, 0x02, 0xb9, 0x83, 0xcd, 0xf7,
+		0xbb, 0x81, 0xcf, 0xf5, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xd3, 0xf6, 0x99, 0xbc, 0x47, 0x62, 0x0d, 0x28,
+		0xfb, 0xde, 0xb1, 0x94, 0x6f, 0x4a, 0x25, 0x00  },
+	{	0x51, 0x6b, 0x25, 0x1f, 0xa4, 0x9e, 0xd0, 0xea,
+		0xbb, 0x81, 0xcf, 0xf5, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xc2, 0xe4, 0x8e, 0xa8, 0x5a, 0x7c, 0x16, 0x30,
+		0xf2, 0xd4, 0xbe, 0x98, 0x6a, 0x4c, 0x26, 0x00  },
+	{	0x51, 0x6b, 0x25, 0x1f, 0xa4, 0x9e, 0xd0, 0xea,
+		0xbb, 0x81, 0xcf, 0xf5, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xcd, 0xea, 0x83, 0xa4, 0x51, 0x76, 0x1f, 0x38,
+		0xf5, 0xd2, 0xbb, 0x9c, 0x69, 0x4e, 0x27, 0x00  },
+	{	0x38, 0x02, 0x51, 0x6b, 0xea, 0xd0, 0x83, 0xb9,
+		0x81, 0xbb, 0xe8, 0xd2, 0x53, 0x69, 0x3a, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x98, 0xb0, 0xc8, 0xe0, 0x38, 0x10, 0x68, 0x40,
+		0xd8, 0xf0, 0x88, 0xa0, 0x78, 0x50, 0x28, 0x00  },
+	{	0x38, 0x02, 0x51, 0x6b, 0xea, 0xd0, 0x83, 0xb9,
+		0x81, 0xbb, 0xe8, 0xd2, 0x53, 0x69, 0x3a, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x97, 0xbe, 0xc5, 0xec, 0x33, 0x1a, 0x61, 0x48,
+		0xdf, 0xf6, 0x8d, 0xa4, 0x7b, 0x52, 0x29, 0x00  },
+	{	0x25, 0x1f, 0x4c, 0x76, 0xf7, 0xcd, 0x9e, 0xa4,
+		0x81, 0xbb, 0xe8, 0xd2, 0x53, 0x69, 0x3a, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x86, 0xac, 0xd2, 0xf8, 0x2e, 0x04, 0x7a, 0x50,
+		0xd6, 0xfc, 0x82, 0xa8, 0x7e, 0x54, 0x2a, 0x00  },
+	{	0x25, 0x1f, 0x4c, 0x76, 0xf7, 0xcd, 0x9e, 0xa4,
+		0x81, 0xbb, 0xe8, 0xd2, 0x53, 0x69, 0x3a, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x89, 0xa2, 0xdf, 0xf4, 0x25, 0x0e, 0x73, 0x58,
+		0xd1, 0xfa, 0x87, 0xac, 0x7d, 0x56, 0x2b, 0x00  },
+	{	0x1f, 0x25, 0x76, 0x4c, 0xd0, 0xea, 0xb9, 0x83,
+		0x9c, 0xa6, 0xf5, 0xcf, 0x53, 0x69, 0x3a, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xa4, 0x88, 0xfc, 0xd0, 0x14, 0x38, 0x4c, 0x60,
+		0xc4, 0xe8, 0x9c, 0xb0, 0x74, 0x58, 0x2c, 0x00  },
+	{	0x1f, 0x25, 0x76, 0x4c, 0xd0, 0xea, 0xb9, 0x83,
+		0x9c, 0xa6, 0xf5, 0xcf, 0x53, 0x69, 0x3a, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xab, 0x86, 0xf1, 0xdc, 0x1f, 0x32, 0x45, 0x68,
+		0xc3, 0xee, 0x99, 0xb4, 0x77, 0x5a, 0x2d, 0x00  },
+	{	0x02, 0x38, 0x6b, 0x51, 0xcd, 0xf7, 0xa4, 0x9e,
+		0x9c, 0xa6, 0xf5, 0xcf, 0x53, 0x69, 0x3a, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xba, 0x94, 0xe6, 0xc8, 0x02, 0x2c, 0x5e, 0x70,
+		0xca, 0xe4, 0x96, 0xb8, 0x72, 0x5c, 0x2e, 0x00  },
+	{	0x02, 0x38, 0x6b, 0x51, 0xcd, 0xf7, 0xa4, 0x9e,
+		0x9c, 0xa6, 0xf5, 0xcf, 0x53, 0x69, 0x3a, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0xb5, 0x9a, 0xeb, 0xc4, 0x09, 0x26, 0x57, 0x78,
+		0xcd, 0xe2, 0x93, 0xbc, 0x71, 0x5e, 0x2f, 0x00  },
+	{	0xd0, 0xf7, 0x9e, 0xb9, 0x4c, 0x6b, 0x02, 0x25,
+		0xf5, 0xd2, 0xbb, 0x9c, 0x69, 0x4e, 0x27, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0xd0, 0xf7, 0x9e, 0xb9, 0x4c, 0x6b, 0x02, 0x25,
+		0xf5, 0xd2, 0xbb, 0x9c, 0x69, 0x4e, 0x27, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x1f, 0x2e, 0x7d, 0x4c, 0xdb, 0xea, 0xb9, 0x88,
+		0x97, 0xa6, 0xf5, 0xc4, 0x53, 0x62, 0x31, 0x00  },
+	{	0xcd, 0xea, 0x83, 0xa4, 0x51, 0x76, 0x1f, 0x38,
+		0xf5, 0xd2, 0xbb, 0x9c, 0x69, 0x4e, 0x27, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x0e, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90,
+		0x9e, 0xac, 0xfa, 0xc8, 0x56, 0x64, 0x32, 0x00  },
+	{	0xcd, 0xea, 0x83, 0xa4, 0x51, 0x76, 0x1f, 0x38,
+		0xf5, 0xd2, 0xbb, 0x9c, 0x69, 0x4e, 0x27, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x01, 0x32, 0x67, 0x54, 0xcd, 0xfe, 0xab, 0x98,
+		0x99, 0xaa, 0xff, 0xcc, 0x55, 0x66, 0x33, 0x00  },
+	{	0xf7, 0xd0, 0xb9, 0x9e, 0x76, 0x51, 0x38, 0x1f,
+		0xe8, 0xcf, 0xa6, 0x81, 0x69, 0x4e, 0x27, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x2c, 0x18, 0x44, 0x70, 0xfc, 0xc8, 0x94, 0xa0,
+		0x8c, 0xb8, 0xe4, 0xd0, 0x5c, 0x68, 0x34, 0x00  },
+	{	0xf7, 0xd0, 0xb9, 0x9e, 0x76, 0x51, 0x38, 0x1f,
+		0xe8, 0xcf, 0xa6, 0x81, 0x69, 0x4e, 0x27, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x23, 0x16, 0x49, 0x7c, 0xf7, 0xc2, 0x9d, 0xa8,
+		0x8b, 0xbe, 0xe1, 0xd4, 0x5f, 0x6a, 0x35, 0x00  },
+	{	0xea, 0xcd, 0xa4, 0x83, 0x6b, 0x4c, 0x25, 0x02,
+		0xe8, 0xcf, 0xa6, 0x81, 0x69, 0x4e, 0x27, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x32, 0x04, 0x5e, 0x68, 0xea, 0xdc, 0x86, 0xb0,
+		0x82, 0xb4, 0xee, 0xd8, 0x5a, 0x6c, 0x36, 0x00  },
+	{	0xea, 0xcd, 0xa4, 0x83, 0x6b, 0x4c, 0x25, 0x02,
+		0xe8, 0xcf, 0xa6, 0x81, 0x69, 0x4e, 0x27, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x3d, 0x0a, 0x53, 0x64, 0xe1, 0xd6, 0x8f, 0xb8,
+		0x85, 0xb2, 0xeb, 0xdc, 0x59, 0x6e, 0x37, 0x00  },
+	{	0x83, 0xa4, 0xd0, 0xf7, 0x25, 0x02, 0x76, 0x51,
+		0xd2, 0xf5, 0x81, 0xa6, 0x74, 0x53, 0x27, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x68, 0x50, 0x18, 0x20, 0x88, 0xb0, 0xf8, 0xc0,
+		0xa8, 0x90, 0xd8, 0xe0, 0x48, 0x70, 0x38, 0x00  },
+	{	0x83, 0xa4, 0xd0, 0xf7, 0x25, 0x02, 0x76, 0x51,
+		0xd2, 0xf5, 0x81, 0xa6, 0x74, 0x53, 0x27, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x67, 0x5e, 0x15, 0x2c, 0x83, 0xba, 0xf1, 0xc8,
+		0xaf, 0x96, 0xdd, 0xe4, 0x4b, 0x72, 0x39, 0x00  },
+	{	0x9e, 0xb9, 0xcd, 0xea, 0x38, 0x1f, 0x6b, 0x4c,
+		0xd2, 0xf5, 0x81, 0xa6, 0x74, 0x53, 0x27, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x76, 0x4c, 0x02, 0x38, 0x9e, 0xa4, 0xea, 0xd0,
+		0xa6, 0x9c, 0xd2, 0xe8, 0x4e, 0x74, 0x3a, 0x00  },
+	{	0x9e, 0xb9, 0xcd, 0xea, 0x38, 0x1f, 0x6b, 0x4c,
+		0xd2, 0xf5, 0x81, 0xa6, 0x74, 0x53, 0x27, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x79, 0x42, 0x0f, 0x34, 0x95, 0xae, 0xe3, 0xd8,
+		0xa1, 0x9a, 0xd7, 0xec, 0x4d, 0x76, 0x3b, 0x00  },
+	{	0xa4, 0x83, 0xf7, 0xd0, 0x1f, 0x38, 0x4c, 0x6b,
+		0xcf, 0xe8, 0x9c, 0xbb, 0x74, 0x53, 0x27, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x54, 0x68, 0x2c, 0x10, 0xa4, 0x98, 0xdc, 0xe0,
+		0xb4, 0x88, 0xcc, 0xf0, 0x44, 0x78, 0x3c, 0x00  },
+	{	0xa4, 0x83, 0xf7, 0xd0, 0x1f, 0x38, 0x4c, 0x6b,
+		0xcf, 0xe8, 0x9c, 0xbb, 0x74, 0x53, 0x27, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x5b, 0x66, 0x21, 0x1c, 0xaf, 0x92, 0xd5, 0xe8,
+		0xb3, 0x8e, 0xc9, 0xf4, 0x47, 0x7a, 0x3d, 0x00  },
+	{	0xb9, 0x9e, 0xea, 0xcd, 0x02, 0x25, 0x51, 0x76,
+		0xcf, 0xe8, 0x9c, 0xbb, 0x74, 0x53, 0x27, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x4a, 0x74, 0x36, 0x08, 0xb2, 0x8c, 0xce, 0xf0,
+		0xba, 0x84, 0xc6, 0xf8, 0x42, 0x7c, 0x3e, 0x00  },
+	{	0xb9, 0x9e, 0xea, 0xcd, 0x02, 0x25, 0x51, 0x76,
+		0xcf, 0xe8, 0x9c, 0xbb, 0x74, 0x53, 0x27, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x45, 0x7a, 0x3b, 0x04, 0xb9, 0x86, 0xc7, 0xf8,
+		0xbd, 0x82, 0xc3, 0xfc, 0x41, 0x7e, 0x3f, 0x00  },
+	{	0xd6, 0xa2, 0x3e, 0x4a, 0x1b, 0x6f, 0xf3, 0x87,
+		0x51, 0x25, 0xb9, 0xcd, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0xd6, 0xa2, 0x3e, 0x4a, 0x1b, 0x6f, 0xf3, 0x87,
+		0x51, 0x25, 0xb9, 0xcd, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xcf, 0x8e, 0x4d, 0x0c, 0xcb, 0x8a, 0x49, 0x08,
+		0xc7, 0x86, 0x45, 0x04, 0xc3, 0x82, 0x41, 0x00  },
+	{	0xcb, 0xbf, 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a,
+		0x51, 0x25, 0xb9, 0xcd, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xde, 0x9c, 0x5a, 0x18, 0xd6, 0x94, 0x52, 0x10,
+		0xce, 0x8c, 0x4a, 0x08, 0xc6, 0x84, 0x42, 0x00  },
+	{	0xcb, 0xbf, 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a,
+		0x51, 0x25, 0xb9, 0xcd, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xd1, 0x92, 0x57, 0x14, 0xdd, 0x9e, 0x5b, 0x18,
+		0xc9, 0x8a, 0x4f, 0x0c, 0xc5, 0x86, 0x43, 0x00  },
+	{	0xf1, 0x85, 0x19, 0x6d, 0x21, 0x55, 0xc9, 0xbd,
+		0x4c, 0x38, 0xa4, 0xd0, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xfc, 0xb8, 0x74, 0x30, 0xec, 0xa8, 0x64, 0x20,
+		0xdc, 0x98, 0x54, 0x10, 0xcc, 0x88, 0x44, 0x00  },
+	{	0xf1, 0x85, 0x19, 0x6d, 0x21, 0x55, 0xc9, 0xbd,
+		0x4c, 0x38, 0xa4, 0xd0, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xf3, 0xb6, 0x79, 0x3c, 0xe7, 0xa2, 0x6d, 0x28,
+		0xdb, 0x9e, 0x51, 0x14, 0xcf, 0x8a, 0x45, 0x00  },
+	{	0xec, 0x98, 0x04, 0x70, 0x3c, 0x48, 0xd4, 0xa0,
+		0x4c, 0x38, 0xa4, 0xd0, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xe2, 0xa4, 0x6e, 0x28, 0xfa, 0xbc, 0x76, 0x30,
+		0xd2, 0x94, 0x5e, 0x18, 0xca, 0x8c, 0x46, 0x00  },
+	{	0xec, 0x98, 0x04, 0x70, 0x3c, 0x48, 0xd4, 0xa0,
+		0x4c, 0x38, 0xa4, 0xd0, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xed, 0xaa, 0x63, 0x24, 0xf1, 0xb6, 0x7f, 0x38,
+		0xd5, 0x92, 0x5b, 0x1c, 0xc9, 0x8e, 0x47, 0x00  },
+	{	0x85, 0xf1, 0x70, 0x04, 0x72, 0x06, 0x87, 0xf3,
+		0x76, 0x02, 0x83, 0xf7, 0x81, 0xf5, 0x74, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xb8, 0xf0, 0x28, 0x60, 0x98, 0xd0, 0x08, 0x40,
+		0xf8, 0xb0, 0x68, 0x20, 0xd8, 0x90, 0x48, 0x00  },
+	{	0x85, 0xf1, 0x70, 0x04, 0x72, 0x06, 0x87, 0xf3,
+		0x76, 0x02, 0x83, 0xf7, 0x81, 0xf5, 0x74, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xb7, 0xfe, 0x25, 0x6c, 0x93, 0xda, 0x01, 0x48,
+		0xff, 0xb6, 0x6d, 0x24, 0xdb, 0x92, 0x49, 0x00  },
+	{	0x98, 0xec, 0x6d, 0x19, 0x6f, 0x1b, 0x9a, 0xee,
+		0x76, 0x02, 0x83, 0xf7, 0x81, 0xf5, 0x74, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xa6, 0xec, 0x32, 0x78, 0x8e, 0xc4, 0x1a, 0x50,
+		0xf6, 0xbc, 0x62, 0x28, 0xde, 0x94, 0x4a, 0x00  },
+	{	0x98, 0xec, 0x6d, 0x19, 0x6f, 0x1b, 0x9a, 0xee,
+		0x76, 0x02, 0x83, 0xf7, 0x81, 0xf5, 0x74, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xa9, 0xe2, 0x3f, 0x74, 0x85, 0xce, 0x13, 0x58,
+		0xf1, 0xba, 0x67, 0x2c, 0xdd, 0x96, 0x4b, 0x00  },
+	{	0xa2, 0xd6, 0x57, 0x23, 0x48, 0x3c, 0xbd, 0xc9,
+		0x6b, 0x1f, 0x9e, 0xea, 0x81, 0xf5, 0x74, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x84, 0xc8, 0x1c, 0x50, 0xb4, 0xf8, 0x2c, 0x60,
+		0xe4, 0xa8, 0x7c, 0x30, 0xd4, 0x98, 0x4c, 0x00  },
+	{	0xa2, 0xd6, 0x57, 0x23, 0x48, 0x3c, 0xbd, 0xc9,
+		0x6b, 0x1f, 0x9e, 0xea, 0x81, 0xf5, 0x74, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x8b, 0xc6, 0x11, 0x5c, 0xbf, 0xf2, 0x25, 0x68,
+		0xe3, 0xae, 0x79, 0x34, 0xd7, 0x9a, 0x4d, 0x00  },
+	{	0xbf, 0xcb, 0x4a, 0x3e, 0x55, 0x21, 0xa0, 0xd4,
+		0x6b, 0x1f, 0x9e, 0xea, 0x81, 0xf5, 0x74, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x9a, 0xd4, 0x06, 0x48, 0xa2, 0xec, 0x3e, 0x70,
+		0xea, 0xa4, 0x76, 0x38, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0xbf, 0xcb, 0x4a, 0x3e, 0x55, 0x21, 0xa0, 0xd4,
+		0x6b, 0x1f, 0x9e, 0xea, 0x81, 0xf5, 0x74, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x95, 0xda, 0x0b, 0x44, 0xa9, 0xe6, 0x37, 0x78,
+		0xed, 0xa2, 0x73, 0x3c, 0xd1, 0x9e, 0x4f, 0x00  },
+	{	0x6d, 0x04, 0xbf, 0xd6, 0xd4, 0xbd, 0x06, 0x6f,
+		0x02, 0x6b, 0xd0, 0xb9, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x6d, 0x04, 0xbf, 0xd6, 0xd4, 0xbd, 0x06, 0x6f,
+		0x02, 0x6b, 0xd0, 0xb9, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x3f, 0x6e, 0x9d, 0xcc, 0x7b, 0x2a, 0xd9, 0x88,
+		0xb7, 0xe6, 0x15, 0x44, 0xf3, 0xa2, 0x51, 0x00  },
+	{	0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72,
+		0x02, 0x6b, 0xd0, 0xb9, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x2e, 0x7c, 0x8a, 0xd8, 0x66, 0x34, 0xc2, 0x90,
+		0xbe, 0xec, 0x1a, 0x48, 0xf6, 0xa4, 0x52, 0x00  },
+	{	0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72,
+		0x02, 0x6b, 0xd0, 0xb9, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x21, 0x72, 0x87, 0xd4, 0x6d, 0x3e, 0xcb, 0x98,
+		0xb9, 0xea, 0x1f, 0x4c, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0x4a, 0x23, 0x98, 0xf1, 0xee, 0x87, 0x3c, 0x55,
+		0x1f, 0x76, 0xcd, 0xa4, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x0c, 0x58, 0xa4, 0xf0, 0x5c, 0x08, 0xf4, 0xa0,
+		0xac, 0xf8, 0x04, 0x50, 0xfc, 0xa8, 0x54, 0x00  },
+	{	0x4a, 0x23, 0x98, 0xf1, 0xee, 0x87, 0x3c, 0x55,
+		0x1f, 0x76, 0xcd, 0xa4, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x03, 0x56, 0xa9, 0xfc, 0x57, 0x02, 0xfd, 0xa8,
+		0xab, 0xfe, 0x01, 0x54, 0xff, 0xaa, 0x55, 0x00  },
+	{	0x57, 0x3e, 0x85, 0xec, 0xf3, 0x9a, 0x21, 0x48,
+		0x1f, 0x76, 0xcd, 0xa4, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x12, 0x44, 0xbe, 0xe8, 0x4a, 0x1c, 0xe6, 0xb0,
+		0xa2, 0xf4, 0x0e, 0x58, 0xfa, 0xac, 0x56, 0x00  },
+	{	0x57, 0x3e, 0x85, 0xec, 0xf3, 0x9a, 0x21, 0x48,
+		0x1f, 0x76, 0xcd, 0xa4, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x1d, 0x4a, 0xb3, 0xe4, 0x41, 0x16, 0xef, 0xb8,
+		0xa5, 0xf2, 0x0b, 0x5c, 0xf9, 0xae, 0x57, 0x00  },
+	{	0x3e, 0x57, 0xf1, 0x98, 0xbd, 0xd4, 0x72, 0x1b,
+		0x25, 0x4c, 0xea, 0x83, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x48, 0x10, 0xf8, 0xa0, 0x28, 0x70, 0x98, 0xc0,
+		0x88, 0xd0, 0x38, 0x60, 0xe8, 0xb0, 0x58, 0x00  },
+	{	0x3e, 0x57, 0xf1, 0x98, 0xbd, 0xd4, 0x72, 0x1b,
+		0x25, 0x4c, 0xea, 0x83, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x47, 0x1e, 0xf5, 0xac, 0x23, 0x7a, 0x91, 0xc8,
+		0x8f, 0xd6, 0x3d, 0x64, 0xeb, 0xb2, 0x59, 0x00  },
+	{	0x23, 0x4a, 0xec, 0x85, 0xa0, 0xc9, 0x6f, 0x06,
+		0x25, 0x4c, 0xea, 0x83, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x56, 0x0c, 0xe2, 0xb8, 0x3e, 0x64, 0x8a, 0xd0,
+		0x86, 0xdc, 0x32, 0x68, 0xee, 0xb4, 0x5a, 0x00  },
+	{	0x23, 0x4a, 0xec, 0x85, 0xa0, 0xc9, 0x6f, 0x06,
+		0x25, 0x4c, 0xea, 0x83, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x59, 0x02, 0xef, 0xb4, 0x35, 0x6e, 0x83, 0xd8,
+		0x81, 0xda, 0x37, 0x6c, 0xed, 0xb6, 0x5b, 0x00  },
+	{	0x19, 0x70, 0xd6, 0xbf, 0x87, 0xee, 0x48, 0x21,
+		0x38, 0x51, 0xf7, 0x9e, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x74, 0x28, 0xcc, 0x90, 0x04, 0x58, 0xbc, 0xe0,
+		0x94, 0xc8, 0x2c, 0x70, 0xe4, 0xb8, 0x5c, 0x00  },
+	{	0x19, 0x70, 0xd6, 0xbf, 0x87, 0xee, 0x48, 0x21,
+		0x38, 0x51, 0xf7, 0x9e, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x7b, 0x26, 0xc1, 0x9c, 0x0f, 0x52, 0xb5, 0xe8,
+		0x93, 0xce, 0x29, 0x74, 0xe7, 0xba, 0x5d, 0x00  },
+	{	0x04, 0x6d, 0xcb, 0xa2, 0x9a, 0xf3, 0x55, 0x3c,
+		0x38, 0x51, 0xf7, 0x9e, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x6a, 0x34, 0xd6, 0x88, 0x12, 0x4c, 0xae, 0xf0,
+		0x9a, 0xc4, 0x26, 0x78, 0xe2, 0xbc, 0x5e, 0x00  },
+	{	0x04, 0x6d, 0xcb, 0xa2, 0x9a, 0xf3, 0x55, 0x3c,
+		0x38, 0x51, 0xf7, 0x9e, 0xa6, 0xcf, 0x69, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x65, 0x3a, 0xdb, 0x84, 0x19, 0x46, 0xa7, 0xf8,
+		0x9d, 0xc2, 0x23, 0x7c, 0xe1, 0xbe, 0x5f, 0x00  },
+	{	0xbd, 0xf3, 0x21, 0x6f, 0x98, 0xd6, 0x04, 0x4a,
+		0xf7, 0xb9, 0x6b, 0x25, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0xbd, 0xf3, 0x21, 0x6f, 0x98, 0xd6, 0x04, 0x4a,
+		0xf7, 0xb9, 0x6b, 0x25, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x2f, 0x4e, 0xed, 0x8c, 0xab, 0xca, 0x69, 0x08,
+		0x27, 0x46, 0xe5, 0x84, 0xa3, 0xc2, 0x61, 0x00  },
+	{	0xa0, 0xee, 0x3c, 0x72, 0x85, 0xcb, 0x19, 0x57,
+		0xf7, 0xb9, 0x6b, 0x25, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x3e, 0x5c, 0xfa, 0x98, 0xb6, 0xd4, 0x72, 0x10,
+		0x2e, 0x4c, 0xea, 0x88, 0xa6, 0xc4, 0x62, 0x00  },
+	{	0xa0, 0xee, 0x3c, 0x72, 0x85, 0xcb, 0x19, 0x57,
+		0xf7, 0xb9, 0x6b, 0x25, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x31, 0x52, 0xf7, 0x94, 0xbd, 0xde, 0x7b, 0x18,
+		0x29, 0x4a, 0xef, 0x8c, 0xa5, 0xc6, 0x63, 0x00  },
+	{	0x9a, 0xd4, 0x06, 0x48, 0xa2, 0xec, 0x3e, 0x70,
+		0xea, 0xa4, 0x76, 0x38, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x1c, 0x78, 0xd4, 0xb0, 0x8c, 0xe8, 0x44, 0x20,
+		0x3c, 0x58, 0xf4, 0x90, 0xac, 0xc8, 0x64, 0x00  },
+	{	0x9a, 0xd4, 0x06, 0x48, 0xa2, 0xec, 0x3e, 0x70,
+		0xea, 0xa4, 0x76, 0x38, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x13, 0x76, 0xd9, 0xbc, 0x87, 0xe2, 0x4d, 0x28,
+		0x3b, 0x5e, 0xf1, 0x94, 0xaf, 0xca, 0x65, 0x00  },
+	{	0x87, 0xc9, 0x1b, 0x55, 0xbf, 0xf1, 0x23, 0x6d,
+		0xea, 0xa4, 0x76, 0x38, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x02, 0x64, 0xce, 0xa8, 0x9a, 0xfc, 0x56, 0x30,
+		0x32, 0x54, 0xfe, 0x98, 0xaa, 0xcc, 0x66, 0x00  },
+	{	0x87, 0xc9, 0x1b, 0x55, 0xbf, 0xf1, 0x23, 0x6d,
+		0xea, 0xa4, 0x76, 0x38, 0xd2, 0x9c, 0x4e, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x0d, 0x6a, 0xc3, 0xa4, 0x91, 0xf6, 0x5f, 0x38,
+		0x35, 0x52, 0xfb, 0x9c, 0xa9, 0xce, 0x67, 0x00  },
+	{	0xee, 0xa0, 0x6f, 0x21, 0xf1, 0xbf, 0x70, 0x3e,
+		0xd0, 0x9e, 0x51, 0x1f, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x58, 0x30, 0x88, 0xe0, 0xf8, 0x90, 0x28, 0x40,
+		0x18, 0x70, 0xc8, 0xa0, 0xb8, 0xd0, 0x68, 0x00  },
+	{	0xee, 0xa0, 0x6f, 0x21, 0xf1, 0xbf, 0x70, 0x3e,
+		0xd0, 0x9e, 0x51, 0x1f, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x57, 0x3e, 0x85, 0xec, 0xf3, 0x9a, 0x21, 0x48,
+		0x1f, 0x76, 0xcd, 0xa4, 0xbb, 0xd2, 0x69, 0x00  },
+	{	0xf3, 0xbd, 0x72, 0x3c, 0xec, 0xa2, 0x6d, 0x23,
+		0xd0, 0x9e, 0x51, 0x1f, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x46, 0x2c, 0x92, 0xf8, 0xee, 0x84, 0x3a, 0x50,
+		0x16, 0x7c, 0xc2, 0xa8, 0xbe, 0xd4, 0x6a, 0x00  },
+	{	0xf3, 0xbd, 0x72, 0x3c, 0xec, 0xa2, 0x6d, 0x23,
+		0xd0, 0x9e, 0x51, 0x1f, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x49, 0x22, 0x9f, 0xf4, 0xe5, 0x8e, 0x33, 0x58,
+		0x11, 0x7a, 0xc7, 0xac, 0xbd, 0xd6, 0x6b, 0x00  },
+	{	0xc9, 0x87, 0x48, 0x06, 0xcb, 0x85, 0x4a, 0x04,
+		0xcd, 0x83, 0x4c, 0x02, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x64, 0x08, 0xbc, 0xd0, 0xd4, 0xb8, 0x0c, 0x60,
+		0x04, 0x68, 0xdc, 0xb0, 0xb4, 0xd8, 0x6c, 0x00  },
+	{	0xc9, 0x87, 0x48, 0x06, 0xcb, 0x85, 0x4a, 0x04,
+		0xcd, 0x83, 0x4c, 0x02, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x6b, 0x06, 0xb1, 0xdc, 0xdf, 0xb2, 0x05, 0x68,
+		0x03, 0x6e, 0xd9, 0xb4, 0xb7, 0xda, 0x6d, 0x00  },
+	{	0xd4, 0x9a, 0x55, 0x1b, 0xd6, 0x98, 0x57, 0x19,
+		0xcd, 0x83, 0x4c, 0x02, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x7a, 0x14, 0xa6, 0xc8, 0xc2, 0xac, 0x1e, 0x70,
+		0x0a, 0x64, 0xd6, 0xb8, 0xb2, 0xdc, 0x6e, 0x00  },
+	{	0xd4, 0x9a, 0x55, 0x1b, 0xd6, 0x98, 0x57, 0x19,
+		0xcd, 0x83, 0x4c, 0x02, 0xcf, 0x81, 0x4e, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x75, 0x1a, 0xab, 0xc4, 0xc9, 0xa6, 0x17, 0x78,
+		0x0d, 0x62, 0xd3, 0xbc, 0xb1, 0xde, 0x6f, 0x00  },
+	{	0x06, 0x55, 0xa0, 0xf3, 0x57, 0x04, 0xf1, 0xa2,
+		0xa4, 0xf7, 0x02, 0x51, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x06, 0x55, 0xa0, 0xf3, 0x57, 0x04, 0xf1, 0xa2,
+		0xa4, 0xf7, 0x02, 0x51, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xdf, 0xae, 0x3d, 0x4c, 0x1b, 0x6a, 0xf9, 0x88,
+		0x57, 0x26, 0xb5, 0xc4, 0x93, 0xe2, 0x71, 0x00  },
+	{	0x1b, 0x48, 0xbd, 0xee, 0x4a, 0x19, 0xec, 0xbf,
+		0xa4, 0xf7, 0x02, 0x51, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xce, 0xbc, 0x2a, 0x58, 0x06, 0x74, 0xe2, 0x90,
+		0x5e, 0x2c, 0xba, 0xc8, 0x96, 0xe4, 0x72, 0x00  },
+	{	0x1b, 0x48, 0xbd, 0xee, 0x4a, 0x19, 0xec, 0xbf,
+		0xa4, 0xf7, 0x02, 0x51, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xc1, 0xb2, 0x27, 0x54, 0x0d, 0x7e, 0xeb, 0x98,
+		0x59, 0x2a, 0xbf, 0xcc, 0x95, 0xe6, 0x73, 0x00  },
+	{	0x21, 0x72, 0x87, 0xd4, 0x6d, 0x3e, 0xcb, 0x98,
+		0xb9, 0xea, 0x1f, 0x4c, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xec, 0x98, 0x04, 0x70, 0x3c, 0x48, 0xd4, 0xa0,
+		0x4c, 0x38, 0xa4, 0xd0, 0x9c, 0xe8, 0x74, 0x00  },
+	{	0x21, 0x72, 0x87, 0xd4, 0x6d, 0x3e, 0xcb, 0x98,
+		0xb9, 0xea, 0x1f, 0x4c, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xe3, 0x96, 0x09, 0x7c, 0x37, 0x42, 0xdd, 0xa8,
+		0x4b, 0x3e, 0xa1, 0xd4, 0x9f, 0xea, 0x75, 0x00  },
+	{	0x3c, 0x6f, 0x9a, 0xc9, 0x70, 0x23, 0xd6, 0x85,
+		0xb9, 0xea, 0x1f, 0x4c, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xf2, 0x84, 0x1e, 0x68, 0x2a, 0x5c, 0xc6, 0xb0,
+		0x42, 0x34, 0xae, 0xd8, 0x9a, 0xec, 0x76, 0x00  },
+	{	0x3c, 0x6f, 0x9a, 0xc9, 0x70, 0x23, 0xd6, 0x85,
+		0xb9, 0xea, 0x1f, 0x4c, 0xf5, 0xa6, 0x53, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xfd, 0x8a, 0x13, 0x64, 0x21, 0x56, 0xcf, 0xb8,
+		0x45, 0x32, 0xab, 0xdc, 0x99, 0xee, 0x77, 0x00  },
+	{	0x55, 0x06, 0xee, 0xbd, 0x3e, 0x6d, 0x85, 0xd6,
+		0x83, 0xd0, 0x38, 0x6b, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xa8, 0xd0, 0x58, 0x20, 0x48, 0x30, 0xb8, 0xc0,
+		0x68, 0x10, 0x98, 0xe0, 0x88, 0xf0, 0x78, 0x00  },
+	{	0x55, 0x06, 0xee, 0xbd, 0x3e, 0x6d, 0x85, 0xd6,
+		0x83, 0xd0, 0x38, 0x6b, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xa7, 0xde, 0x55, 0x2c, 0x43, 0x3a, 0xb1, 0xc8,
+		0x6f, 0x16, 0x9d, 0xe4, 0x8b, 0xf2, 0x79, 0x00  },
+	{	0x48, 0x1b, 0xf3, 0xa0, 0x23, 0x70, 0x98, 0xcb,
+		0x83, 0xd0, 0x38, 0x6b, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xb6, 0xcc, 0x42, 0x38, 0x5e, 0x24, 0xaa, 0xd0,
+		0x66, 0x1c, 0x92, 0xe8, 0x8e, 0xf4, 0x7a, 0x00  },
+	{	0x48, 0x1b, 0xf3, 0xa0, 0x23, 0x70, 0x98, 0xcb,
+		0x83, 0xd0, 0x38, 0x6b, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0xb9, 0xc2, 0x4f, 0x34, 0x55, 0x2e, 0xa3, 0xd8,
+		0x61, 0x1a, 0x97, 0xec, 0x8d, 0xf6, 0x7b, 0x00  },
+	{	0x72, 0x21, 0xc9, 0x9a, 0x04, 0x57, 0xbf, 0xec,
+		0x9e, 0xcd, 0x25, 0x76, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x94, 0xe8, 0x6c, 0x10, 0x64, 0x18, 0x9c, 0xe0,
+		0x74, 0x08, 0x8c, 0xf0, 0x84, 0xf8, 0x7c, 0x00  },
+	{	0x72, 0x21, 0xc9, 0x9a, 0x04, 0x57, 0xbf, 0xec,
+		0x9e, 0xcd, 0x25, 0x76, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x9b, 0xe6, 0x61, 0x1c, 0x6f, 0x12, 0x95, 0xe8,
+		0x73, 0x0e, 0x89, 0xf4, 0x87, 0xfa, 0x7d, 0x00  },
+	{	0x6f, 0x3c, 0xd4, 0x87, 0x19, 0x4a, 0xa2, 0xf1,
+		0x9e, 0xcd, 0x25, 0x76, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x8a, 0xf4, 0x76, 0x08, 0x72, 0x0c, 0x8e, 0xf0,
+		0x7a, 0x04, 0x86, 0xf8, 0x82, 0xfc, 0x7e, 0x00  },
+	{	0x6f, 0x3c, 0xd4, 0x87, 0x19, 0x4a, 0xa2, 0xf1,
+		0x9e, 0xcd, 0x25, 0x76, 0xe8, 0xbb, 0x53, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x00, 0x00, 0x00, 0x00  },
+	{	0x85, 0xfa, 0x7b, 0x04, 0x79, 0x06, 0x87, 0xf8,
+		0x7d, 0x02, 0x83, 0xfc, 0x81, 0xfe, 0x7f, 0x00  },
+	{	0xb1, 0x59, 0x7c, 0x94, 0x36, 0xde, 0xfb, 0x13,
+		0xa2, 0x4a, 0x6f, 0x87, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0xb1, 0x59, 0x7c, 0x94, 0x36, 0xde, 0xfb, 0x13,
+		0xa2, 0x4a, 0x6f, 0x87, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x8f, 0x0e, 0x8d, 0x0c, 0x8b, 0x0a, 0x89, 0x08,
+		0x87, 0x06, 0x85, 0x04, 0x83, 0x02, 0x81, 0x00  },
+	{	0xac, 0x44, 0x61, 0x89, 0x2b, 0xc3, 0xe6, 0x0e,
+		0xa2, 0x4a, 0x6f, 0x87, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x9e, 0x1c, 0x9a, 0x18, 0x96, 0x14, 0x92, 0x10,
+		0x8e, 0x0c, 0x8a, 0x08, 0x86, 0x04, 0x82, 0x00  },
+	{	0xac, 0x44, 0x61, 0x89, 0x2b, 0xc3, 0xe6, 0x0e,
+		0xa2, 0x4a, 0x6f, 0x87, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x91, 0x12, 0x97, 0x14, 0x9d, 0x1e, 0x9b, 0x18,
+		0x89, 0x0a, 0x8f, 0x0c, 0x85, 0x06, 0x83, 0x00  },
+	{	0x96, 0x7e, 0x5b, 0xb3, 0x0c, 0xe4, 0xc1, 0x29,
+		0xbf, 0x57, 0x72, 0x9a, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xbc, 0x38, 0xb4, 0x30, 0xac, 0x28, 0xa4, 0x20,
+		0x9c, 0x18, 0x94, 0x10, 0x8c, 0x08, 0x84, 0x00  },
+	{	0x96, 0x7e, 0x5b, 0xb3, 0x0c, 0xe4, 0xc1, 0x29,
+		0xbf, 0x57, 0x72, 0x9a, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xb3, 0x36, 0xb9, 0x3c, 0xa7, 0x22, 0xad, 0x28,
+		0x9b, 0x1e, 0x91, 0x14, 0x8f, 0x0a, 0x85, 0x00  },
+	{	0x8b, 0x63, 0x46, 0xae, 0x11, 0xf9, 0xdc, 0x34,
+		0xbf, 0x57, 0x72, 0x9a, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xa2, 0x24, 0xae, 0x28, 0xba, 0x3c, 0xb6, 0x30,
+		0x92, 0x14, 0x9e, 0x18, 0x8a, 0x0c, 0x86, 0x00  },
+	{	0x8b, 0x63, 0x46, 0xae, 0x11, 0xf9, 0xdc, 0x34,
+		0xbf, 0x57, 0x72, 0x9a, 0x25, 0xcd, 0xe8, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xad, 0x2a, 0xa3, 0x24, 0xb1, 0x36, 0xbf, 0x38,
+		0x95, 0x12, 0x9b, 0x1c, 0x89, 0x0e, 0x87, 0x00  },
+	{	0xe2, 0x0a, 0x32, 0xda, 0x5f, 0xb7, 0x8f, 0x67,
+		0x85, 0x6d, 0x55, 0xbd, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xf8, 0x70, 0xe8, 0x60, 0xd8, 0x50, 0xc8, 0x40,
+		0xb8, 0x30, 0xa8, 0x20, 0x98, 0x10, 0x88, 0x00  },
+	{	0xe2, 0x0a, 0x32, 0xda, 0x5f, 0xb7, 0x8f, 0x67,
+		0x85, 0x6d, 0x55, 0xbd, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xf7, 0x7e, 0xe5, 0x6c, 0xd3, 0x5a, 0xc1, 0x48,
+		0xbf, 0x36, 0xad, 0x24, 0x9b, 0x12, 0x89, 0x00  },
+	{	0xff, 0x17, 0x2f, 0xc7, 0x42, 0xaa, 0x92, 0x7a,
+		0x85, 0x6d, 0x55, 0xbd, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xe6, 0x6c, 0xf2, 0x78, 0xce, 0x44, 0xda, 0x50,
+		0xb6, 0x3c, 0xa2, 0x28, 0x9e, 0x14, 0x8a, 0x00  },
+	{	0xff, 0x17, 0x2f, 0xc7, 0x42, 0xaa, 0x92, 0x7a,
+		0x85, 0x6d, 0x55, 0xbd, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xe9, 0x62, 0xff, 0x74, 0xc5, 0x4e, 0xd3, 0x58,
+		0xb1, 0x3a, 0xa7, 0x2c, 0x9d, 0x16, 0x8b, 0x00  },
+	{	0xc5, 0x2d, 0x15, 0xfd, 0x65, 0x8d, 0xb5, 0x5d,
+		0x98, 0x70, 0x48, 0xa0, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xc4, 0x48, 0xdc, 0x50, 0xf4, 0x78, 0xec, 0x60,
+		0xa4, 0x28, 0xbc, 0x30, 0x94, 0x18, 0x8c, 0x00  },
+	{	0xc5, 0x2d, 0x15, 0xfd, 0x65, 0x8d, 0xb5, 0x5d,
+		0x98, 0x70, 0x48, 0xa0, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xcb, 0x46, 0xd1, 0x5c, 0xff, 0x72, 0xe5, 0x68,
+		0xa3, 0x2e, 0xb9, 0x34, 0x97, 0x1a, 0x8d, 0x00  },
+	{	0xd8, 0x30, 0x08, 0xe0, 0x78, 0x90, 0xa8, 0x40,
+		0x98, 0x70, 0x48, 0xa0, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xda, 0x54, 0xc6, 0x48, 0xe2, 0x6c, 0xfe, 0x70,
+		0xaa, 0x24, 0xb6, 0x38, 0x92, 0x1c, 0x8e, 0x00  },
+	{	0xd8, 0x30, 0x08, 0xe0, 0x78, 0x90, 0xa8, 0x40,
+		0x98, 0x70, 0x48, 0xa0, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xd5, 0x5a, 0xcb, 0x44, 0xe9, 0x66, 0xf7, 0x78,
+		0xad, 0x22, 0xb3, 0x3c, 0x91, 0x1e, 0x8f, 0x00  },
+	{	0x0a, 0xff, 0xfd, 0x08, 0xf9, 0x0c, 0x0e, 0xfb,
+		0xf1, 0x04, 0x06, 0xf3, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x0a, 0xff, 0xfd, 0x08, 0xf9, 0x0c, 0x0e, 0xfb,
+		0xf1, 0x04, 0x06, 0xf3, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x7f, 0xee, 0x5d, 0xcc, 0x3b, 0xaa, 0x19, 0x88,
+		0xf7, 0x66, 0xd5, 0x44, 0xb3, 0x22, 0x91, 0x00  },
+	{	0x17, 0xe2, 0xe0, 0x15, 0xe4, 0x11, 0x13, 0xe6,
+		0xf1, 0x04, 0x06, 0xf3, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x6e, 0xfc, 0x4a, 0xd8, 0x26, 0xb4, 0x02, 0x90,
+		0xfe, 0x6c, 0xda, 0x48, 0xb6, 0x24, 0x92, 0x00  },
+	{	0x17, 0xe2, 0xe0, 0x15, 0xe4, 0x11, 0x13, 0xe6,
+		0xf1, 0x04, 0x06, 0xf3, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x61, 0xf2, 0x47, 0xd4, 0x2d, 0xbe, 0x0b, 0x98,
+		0xf9, 0x6a, 0xdf, 0x4c, 0xb5, 0x26, 0x93, 0x00  },
+	{	0x2d, 0xd8, 0xda, 0x2f, 0xc3, 0x36, 0x34, 0xc1,
+		0xec, 0x19, 0x1b, 0xee, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x4c, 0xd8, 0x64, 0xf0, 0x1c, 0x88, 0x34, 0xa0,
+		0xec, 0x78, 0xc4, 0x50, 0xbc, 0x28, 0x94, 0x00  },
+	{	0x2d, 0xd8, 0xda, 0x2f, 0xc3, 0x36, 0x34, 0xc1,
+		0xec, 0x19, 0x1b, 0xee, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x43, 0xd6, 0x69, 0xfc, 0x17, 0x82, 0x3d, 0xa8,
+		0xeb, 0x7e, 0xc1, 0x54, 0xbf, 0x2a, 0x95, 0x00  },
+	{	0x30, 0xc5, 0xc7, 0x32, 0xde, 0x2b, 0x29, 0xdc,
+		0xec, 0x19, 0x1b, 0xee, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x52, 0xc4, 0x7e, 0xe8, 0x0a, 0x9c, 0x26, 0xb0,
+		0xe2, 0x74, 0xce, 0x58, 0xba, 0x2c, 0x96, 0x00  },
+	{	0x30, 0xc5, 0xc7, 0x32, 0xde, 0x2b, 0x29, 0xdc,
+		0xec, 0x19, 0x1b, 0xee, 0x02, 0xf7, 0xf5, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x5d, 0xca, 0x73, 0xe4, 0x01, 0x96, 0x2f, 0xb8,
+		0xe5, 0x72, 0xcb, 0x5c, 0xb9, 0x2e, 0x97, 0x00  },
+	{	0x59, 0xac, 0xb3, 0x46, 0x90, 0x65, 0x7a, 0x8f,
+		0xd6, 0x23, 0x3c, 0xc9, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x08, 0x90, 0x38, 0xa0, 0x68, 0xf0, 0x58, 0xc0,
+		0xc8, 0x50, 0xf8, 0x60, 0xa8, 0x30, 0x98, 0x00  },
+	{	0x59, 0xac, 0xb3, 0x46, 0x90, 0x65, 0x7a, 0x8f,
+		0xd6, 0x23, 0x3c, 0xc9, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x07, 0x9e, 0x35, 0xac, 0x63, 0xfa, 0x51, 0xc8,
+		0xcf, 0x56, 0xfd, 0x64, 0xab, 0x32, 0x99, 0x00  },
+	{	0x44, 0xb1, 0xae, 0x5b, 0x8d, 0x78, 0x67, 0x92,
+		0xd6, 0x23, 0x3c, 0xc9, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x16, 0x8c, 0x22, 0xb8, 0x7e, 0xe4, 0x4a, 0xd0,
+		0xc6, 0x5c, 0xf2, 0x68, 0xae, 0x34, 0x9a, 0x00  },
+	{	0x44, 0xb1, 0xae, 0x5b, 0x8d, 0x78, 0x67, 0x92,
+		0xd6, 0x23, 0x3c, 0xc9, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x19, 0x82, 0x2f, 0xb4, 0x75, 0xee, 0x43, 0xd8,
+		0xc1, 0x5a, 0xf7, 0x6c, 0xad, 0x36, 0x9b, 0x00  },
+	{	0x7e, 0x8b, 0x94, 0x61, 0xaa, 0x5f, 0x40, 0xb5,
+		0xcb, 0x3e, 0x21, 0xd4, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x34, 0xa8, 0x0c, 0x90, 0x44, 0xd8, 0x7c, 0xe0,
+		0xd4, 0x48, 0xec, 0x70, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x7e, 0x8b, 0x94, 0x61, 0xaa, 0x5f, 0x40, 0xb5,
+		0xcb, 0x3e, 0x21, 0xd4, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x3b, 0xa6, 0x01, 0x9c, 0x4f, 0xd2, 0x75, 0xe8,
+		0xd3, 0x4e, 0xe9, 0x74, 0xa7, 0x3a, 0x9d, 0x00  },
+	{	0x63, 0x96, 0x89, 0x7c, 0xb7, 0x42, 0x5d, 0xa8,
+		0xcb, 0x3e, 0x21, 0xd4, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x2a, 0xb4, 0x16, 0x88, 0x52, 0xcc, 0x6e, 0xf0,
+		0xda, 0x44, 0xe6, 0x78, 0xa2, 0x3c, 0x9e, 0x00  },
+	{	0x63, 0x96, 0x89, 0x7c, 0xb7, 0x42, 0x5d, 0xa8,
+		0xcb, 0x3e, 0x21, 0xd4, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x53, 0x53, 0x4e, 0x4e, 0x69, 0x69, 0x74, 0x74,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x25, 0xba, 0x1b, 0x84, 0x59, 0xc6, 0x67, 0xf8,
+		0xdd, 0x42, 0xe3, 0x7c, 0xa1, 0x3e, 0x9f, 0x00  },
+	{	0xda, 0x08, 0x63, 0xb1, 0xb5, 0x67, 0x0c, 0xde,
+		0x04, 0xd6, 0xbd, 0x6f, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0xda, 0x08, 0x63, 0xb1, 0xb5, 0x67, 0x0c, 0xde,
+		0x04, 0xd6, 0xbd, 0x6f, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x6f, 0xce, 0x2d, 0x8c, 0xeb, 0x4a, 0xa9, 0x08,
+		0x67, 0xc6, 0x25, 0x84, 0xe3, 0x42, 0xa1, 0x00  },
+	{	0xc7, 0x15, 0x7e, 0xac, 0xa8, 0x7a, 0x11, 0xc3,
+		0x04, 0xd6, 0xbd, 0x6f, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x7e, 0xdc, 0x3a, 0x98, 0xf6, 0x54, 0xb2, 0x10,
+		0x6e, 0xcc, 0x2a, 0x88, 0xe6, 0x44, 0xa2, 0x00  },
+	{	0xc7, 0x15, 0x7e, 0xac, 0xa8, 0x7a, 0x11, 0xc3,
+		0x04, 0xd6, 0xbd, 0x6f, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x71, 0xd2, 0x37, 0x94, 0xfd, 0x5e, 0xbb, 0x18,
+		0x69, 0xca, 0x2f, 0x8c, 0xe5, 0x46, 0xa3, 0x00  },
+	{	0xfd, 0x2f, 0x44, 0x96, 0x8f, 0x5d, 0x36, 0xe4,
+		0x19, 0xcb, 0xa0, 0x72, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x5c, 0xf8, 0x14, 0xb0, 0xcc, 0x68, 0x84, 0x20,
+		0x7c, 0xd8, 0x34, 0x90, 0xec, 0x48, 0xa4, 0x00  },
+	{	0xfd, 0x2f, 0x44, 0x96, 0x8f, 0x5d, 0x36, 0xe4,
+		0x19, 0xcb, 0xa0, 0x72, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x53, 0xf6, 0x19, 0xbc, 0xc7, 0x62, 0x8d, 0x28,
+		0x7b, 0xde, 0x31, 0x94, 0xef, 0x4a, 0xa5, 0x00  },
+	{	0xe0, 0x32, 0x59, 0x8b, 0x92, 0x40, 0x2b, 0xf9,
+		0x19, 0xcb, 0xa0, 0x72, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x42, 0xe4, 0x0e, 0xa8, 0xda, 0x7c, 0x96, 0x30,
+		0x72, 0xd4, 0x3e, 0x98, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0xe0, 0x32, 0x59, 0x8b, 0x92, 0x40, 0x2b, 0xf9,
+		0x19, 0xcb, 0xa0, 0x72, 0x6b, 0xb9, 0xd2, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x4d, 0xea, 0x03, 0xa4, 0xd1, 0x76, 0x9f, 0x38,
+		0x75, 0xd2, 0x3b, 0x9c, 0xe9, 0x4e, 0xa7, 0x00  },
+	{	0x89, 0x5b, 0x2d, 0xff, 0xdc, 0x0e, 0x78, 0xaa,
+		0x23, 0xf1, 0x87, 0x55, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x18, 0xb0, 0x48, 0xe0, 0xb8, 0x10, 0xe8, 0x40,
+		0x58, 0xf0, 0x08, 0xa0, 0xf8, 0x50, 0xa8, 0x00  },
+	{	0x89, 0x5b, 0x2d, 0xff, 0xdc, 0x0e, 0x78, 0xaa,
+		0x23, 0xf1, 0x87, 0x55, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x17, 0xbe, 0x45, 0xec, 0xb3, 0x1a, 0xe1, 0x48,
+		0x5f, 0xf6, 0x0d, 0xa4, 0xfb, 0x52, 0xa9, 0x00  },
+	{	0x94, 0x46, 0x30, 0xe2, 0xc1, 0x13, 0x65, 0xb7,
+		0x23, 0xf1, 0x87, 0x55, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x06, 0xac, 0x52, 0xf8, 0xae, 0x04, 0xfa, 0x50,
+		0x56, 0xfc, 0x02, 0xa8, 0xfe, 0x54, 0xaa, 0x00  },
+	{	0x94, 0x46, 0x30, 0xe2, 0xc1, 0x13, 0x65, 0xb7,
+		0x23, 0xf1, 0x87, 0x55, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x09, 0xa2, 0x5f, 0xf4, 0xa5, 0x0e, 0xf3, 0x58,
+		0x51, 0xfa, 0x07, 0xac, 0xfd, 0x56, 0xab, 0x00  },
+	{	0xae, 0x7c, 0x0a, 0xd8, 0xe6, 0x34, 0x42, 0x90,
+		0x3e, 0xec, 0x9a, 0x48, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x24, 0x88, 0x7c, 0xd0, 0x94, 0x38, 0xcc, 0x60,
+		0x44, 0xe8, 0x1c, 0xb0, 0xf4, 0x58, 0xac, 0x00  },
+	{	0xae, 0x7c, 0x0a, 0xd8, 0xe6, 0x34, 0x42, 0x90,
+		0x3e, 0xec, 0x9a, 0x48, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x2b, 0x86, 0x71, 0xdc, 0x9f, 0x32, 0xc5, 0x68,
+		0x43, 0xee, 0x19, 0xb4, 0xf7, 0x5a, 0xad, 0x00  },
+	{	0xb3, 0x61, 0x17, 0xc5, 0xfb, 0x29, 0x5f, 0x8d,
+		0x3e, 0xec, 0x9a, 0x48, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x3a, 0x94, 0x66, 0xc8, 0x82, 0x2c, 0xde, 0x70,
+		0x4a, 0xe4, 0x16, 0xb8, 0xf2, 0x5c, 0xae, 0x00  },
+	{	0xb3, 0x61, 0x17, 0xc5, 0xfb, 0x29, 0x5f, 0x8d,
+		0x3e, 0xec, 0x9a, 0x48, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x35, 0x9a, 0x6b, 0xc4, 0x89, 0x26, 0xd7, 0x78,
+		0x4d, 0xe2, 0x13, 0xbc, 0xf1, 0x5e, 0xaf, 0x00  },
+	{	0x61, 0xae, 0xe2, 0x2d, 0x7a, 0xb5, 0xf9, 0x36,
+		0x57, 0x98, 0xd4, 0x1b, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x61, 0xae, 0xe2, 0x2d, 0x7a, 0xb5, 0xf9, 0x36,
+		0x57, 0x98, 0xd4, 0x1b, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x9f, 0x2e, 0xfd, 0x4c, 0x5b, 0xea, 0x39, 0x88,
+		0x17, 0xa6, 0x75, 0xc4, 0xd3, 0x62, 0xb1, 0x00  },
+	{	0x7c, 0xb3, 0xff, 0x30, 0x67, 0xa8, 0xe4, 0x2b,
+		0x57, 0x98, 0xd4, 0x1b, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x8e, 0x3c, 0xea, 0x58, 0x46, 0xf4, 0x22, 0x90,
+		0x1e, 0xac, 0x7a, 0xc8, 0xd6, 0x64, 0xb2, 0x00  },
+	{	0x7c, 0xb3, 0xff, 0x30, 0x67, 0xa8, 0xe4, 0x2b,
+		0x57, 0x98, 0xd4, 0x1b, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x81, 0x32, 0xe7, 0x54, 0x4d, 0xfe, 0x2b, 0x98,
+		0x19, 0xaa, 0x7f, 0xcc, 0xd5, 0x66, 0xb3, 0x00  },
+	{	0x46, 0x89, 0xc5, 0x0a, 0x40, 0x8f, 0xc3, 0x0c,
+		0x4a, 0x85, 0xc9, 0x06, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xac, 0x18, 0xc4, 0x70, 0x7c, 0xc8, 0x14, 0xa0,
+		0x0c, 0xb8, 0x64, 0xd0, 0xdc, 0x68, 0xb4, 0x00  },
+	{	0x46, 0x89, 0xc5, 0x0a, 0x40, 0x8f, 0xc3, 0x0c,
+		0x4a, 0x85, 0xc9, 0x06, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xa3, 0x16, 0xc9, 0x7c, 0x77, 0xc2, 0x1d, 0xa8,
+		0x0b, 0xbe, 0x61, 0xd4, 0xdf, 0x6a, 0xb5, 0x00  },
+	{	0x5b, 0x94, 0xd8, 0x17, 0x5d, 0x92, 0xde, 0x11,
+		0x4a, 0x85, 0xc9, 0x06, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xb2, 0x04, 0xde, 0x68, 0x6a, 0xdc, 0x06, 0xb0,
+		0x02, 0xb4, 0x6e, 0xd8, 0xda, 0x6c, 0xb6, 0x00  },
+	{	0x5b, 0x94, 0xd8, 0x17, 0x5d, 0x92, 0xde, 0x11,
+		0x4a, 0x85, 0xc9, 0x06, 0x4c, 0x83, 0xcf, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xbd, 0x0a, 0xd3, 0x64, 0x61, 0xd6, 0x0f, 0xb8,
+		0x05, 0xb2, 0x6b, 0xdc, 0xd9, 0x6e, 0xb7, 0x00  },
+	{	0x32, 0xfd, 0xac, 0x63, 0x13, 0xdc, 0x8d, 0x42,
+		0x70, 0xbf, 0xee, 0x21, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xe8, 0x50, 0x98, 0x20, 0x08, 0xb0, 0x78, 0xc0,
+		0x28, 0x90, 0x58, 0xe0, 0xc8, 0x70, 0xb8, 0x00  },
+	{	0x32, 0xfd, 0xac, 0x63, 0x13, 0xdc, 0x8d, 0x42,
+		0x70, 0xbf, 0xee, 0x21, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xe7, 0x5e, 0x95, 0x2c, 0x03, 0xba, 0x71, 0xc8,
+		0x2f, 0x96, 0x5d, 0xe4, 0xcb, 0x72, 0xb9, 0x00  },
+	{	0x2f, 0xe0, 0xb1, 0x7e, 0x0e, 0xc1, 0x90, 0x5f,
+		0x70, 0xbf, 0xee, 0x21, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xf6, 0x4c, 0x82, 0x38, 0x1e, 0xa4, 0x6a, 0xd0,
+		0x26, 0x9c, 0x52, 0xe8, 0xce, 0x74, 0xba, 0x00  },
+	{	0x2f, 0xe0, 0xb1, 0x7e, 0x0e, 0xc1, 0x90, 0x5f,
+		0x70, 0xbf, 0xee, 0x21, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xf9, 0x42, 0x8f, 0x34, 0x15, 0xae, 0x63, 0xd8,
+		0x21, 0x9a, 0x57, 0xec, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x15, 0xda, 0x8b, 0x44, 0x29, 0xe6, 0xb7, 0x78,
+		0x6d, 0xa2, 0xf3, 0x3c, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xd4, 0x68, 0xac, 0x10, 0x24, 0x98, 0x5c, 0xe0,
+		0x34, 0x88, 0x4c, 0xf0, 0xc4, 0x78, 0xbc, 0x00  },
+	{	0x15, 0xda, 0x8b, 0x44, 0x29, 0xe6, 0xb7, 0x78,
+		0x6d, 0xa2, 0xf3, 0x3c, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xdb, 0x66, 0xa1, 0x1c, 0x2f, 0x92, 0x55, 0xe8,
+		0x33, 0x8e, 0x49, 0xf4, 0xc7, 0x7a, 0xbd, 0x00  },
+	{	0x08, 0xc7, 0x96, 0x59, 0x34, 0xfb, 0xaa, 0x65,
+		0x6d, 0xa2, 0xf3, 0x3c, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xca, 0x74, 0xb6, 0x08, 0x32, 0x8c, 0x4e, 0xf0,
+		0x3a, 0x84, 0x46, 0xf8, 0xc2, 0x7c, 0xbe, 0x00  },
+	{	0x08, 0xc7, 0x96, 0x59, 0x34, 0xfb, 0xaa, 0x65,
+		0x6d, 0xa2, 0xf3, 0x3c, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x4e, 0x4e, 0x53, 0x53, 0x74, 0x74, 0x69, 0x69,
+		0x27, 0x27, 0x3a, 0x3a, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xc5, 0x7a, 0xbb, 0x04, 0x39, 0x86, 0x47, 0xf8,
+		0x3d, 0x82, 0x43, 0xfc, 0xc1, 0x7e, 0xbf, 0x00  },
+	{	0x67, 0xfb, 0x42, 0xde, 0x2d, 0xb1, 0x08, 0x94,
+		0xf3, 0x6f, 0xd6, 0x4a, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x67, 0xfb, 0x42, 0xde, 0x2d, 0xb1, 0x08, 0x94,
+		0xf3, 0x6f, 0xd6, 0x4a, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x4f, 0x8e, 0xcd, 0x0c, 0x4b, 0x8a, 0xc9, 0x08,
+		0x47, 0x86, 0xc5, 0x04, 0x43, 0x82, 0xc1, 0x00  },
+	{	0x7a, 0xe6, 0x5f, 0xc3, 0x30, 0xac, 0x15, 0x89,
+		0xf3, 0x6f, 0xd6, 0x4a, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x5e, 0x9c, 0xda, 0x18, 0x56, 0x94, 0xd2, 0x10,
+		0x4e, 0x8c, 0xca, 0x08, 0x46, 0x84, 0xc2, 0x00  },
+	{	0x7a, 0xe6, 0x5f, 0xc3, 0x30, 0xac, 0x15, 0x89,
+		0xf3, 0x6f, 0xd6, 0x4a, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x51, 0x92, 0xd7, 0x14, 0x5d, 0x9e, 0xdb, 0x18,
+		0x49, 0x8a, 0xcf, 0x0c, 0x45, 0x86, 0xc3, 0x00  },
+	{	0x40, 0xdc, 0x65, 0xf9, 0x17, 0x8b, 0x32, 0xae,
+		0xee, 0x72, 0xcb, 0x57, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x7c, 0xb8, 0xf4, 0x30, 0x6c, 0xa8, 0xe4, 0x20,
+		0x5c, 0x98, 0xd4, 0x10, 0x4c, 0x88, 0xc4, 0x00  },
+	{	0x40, 0xdc, 0x65, 0xf9, 0x17, 0x8b, 0x32, 0xae,
+		0xee, 0x72, 0xcb, 0x57, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x73, 0xb6, 0xf9, 0x3c, 0x67, 0xa2, 0xed, 0x28,
+		0x5b, 0x9e, 0xd1, 0x14, 0x4f, 0x8a, 0xc5, 0x00  },
+	{	0x5d, 0xc1, 0x78, 0xe4, 0x0a, 0x96, 0x2f, 0xb3,
+		0xee, 0x72, 0xcb, 0x57, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x62, 0xa4, 0xee, 0x28, 0x7a, 0xbc, 0xf6, 0x30,
+		0x52, 0x94, 0xde, 0x18, 0x4a, 0x8c, 0xc6, 0x00  },
+	{	0x5d, 0xc1, 0x78, 0xe4, 0x0a, 0x96, 0x2f, 0xb3,
+		0xee, 0x72, 0xcb, 0x57, 0xb9, 0x25, 0x9c, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x6d, 0xaa, 0xe3, 0x24, 0x71, 0xb6, 0xff, 0x38,
+		0x55, 0x92, 0xdb, 0x1c, 0x49, 0x8e, 0xc7, 0x00  },
+	{	0x34, 0xa8, 0x0c, 0x90, 0x44, 0xd8, 0x7c, 0xe0,
+		0xd4, 0x48, 0xec, 0x70, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x38, 0xf0, 0xa8, 0x60, 0x18, 0xd0, 0x88, 0x40,
+		0x78, 0xb0, 0xe8, 0x20, 0x58, 0x90, 0xc8, 0x00  },
+	{	0x34, 0xa8, 0x0c, 0x90, 0x44, 0xd8, 0x7c, 0xe0,
+		0xd4, 0x48, 0xec, 0x70, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x37, 0xfe, 0xa5, 0x6c, 0x13, 0xda, 0x81, 0x48,
+		0x7f, 0xb6, 0xed, 0x24, 0x5b, 0x92, 0xc9, 0x00  },
+	{	0x29, 0xb5, 0x11, 0x8d, 0x59, 0xc5, 0x61, 0xfd,
+		0xd4, 0x48, 0xec, 0x70, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x26, 0xec, 0xb2, 0x78, 0x0e, 0xc4, 0x9a, 0x50,
+		0x76, 0xbc, 0xe2, 0x28, 0x5e, 0x94, 0xca, 0x00  },
+	{	0x29, 0xb5, 0x11, 0x8d, 0x59, 0xc5, 0x61, 0xfd,
+		0xd4, 0x48, 0xec, 0x70, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x29, 0xe2, 0xbf, 0x74, 0x05, 0xce, 0x93, 0x58,
+		0x71, 0xba, 0xe7, 0x2c, 0x5d, 0x96, 0xcb, 0x00  },
+	{	0x13, 0x8f, 0x2b, 0xb7, 0x7e, 0xe2, 0x46, 0xda,
+		0xc9, 0x55, 0xf1, 0x6d, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x04, 0xc8, 0x9c, 0x50, 0x34, 0xf8, 0xac, 0x60,
+		0x64, 0xa8, 0xfc, 0x30, 0x54, 0x98, 0xcc, 0x00  },
+	{	0x13, 0x8f, 0x2b, 0xb7, 0x7e, 0xe2, 0x46, 0xda,
+		0xc9, 0x55, 0xf1, 0x6d, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x0b, 0xc6, 0x91, 0x5c, 0x3f, 0xf2, 0xa5, 0x68,
+		0x63, 0xae, 0xf9, 0x34, 0x57, 0x9a, 0xcd, 0x00  },
+	{	0x0e, 0x92, 0x36, 0xaa, 0x63, 0xff, 0x5b, 0xc7,
+		0xc9, 0x55, 0xf1, 0x6d, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x1a, 0xd4, 0x86, 0x48, 0x22, 0xec, 0xbe, 0x70,
+		0x6a, 0xa4, 0xf6, 0x38, 0x52, 0x9c, 0xce, 0x00  },
+	{	0x0e, 0x92, 0x36, 0xaa, 0x63, 0xff, 0x5b, 0xc7,
+		0xc9, 0x55, 0xf1, 0x6d, 0xa4, 0x38, 0x9c, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x15, 0xda, 0x8b, 0x44, 0x29, 0xe6, 0xb7, 0x78,
+		0x6d, 0xa2, 0xf3, 0x3c, 0x51, 0x9e, 0xcf, 0x00  },
+	{	0xdc, 0x5d, 0xc3, 0x42, 0xe2, 0x63, 0xfd, 0x7c,
+		0xa0, 0x21, 0xbf, 0x3e, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0xdc, 0x5d, 0xc3, 0x42, 0xe2, 0x63, 0xfd, 0x7c,
+		0xa0, 0x21, 0xbf, 0x3e, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xbf, 0x6e, 0x1d, 0xcc, 0xfb, 0x2a, 0x59, 0x88,
+		0x37, 0xe6, 0x95, 0x44, 0x73, 0xa2, 0xd1, 0x00  },
+	{	0xc1, 0x40, 0xde, 0x5f, 0xff, 0x7e, 0xe0, 0x61,
+		0xa0, 0x21, 0xbf, 0x3e, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xae, 0x7c, 0x0a, 0xd8, 0xe6, 0x34, 0x42, 0x90,
+		0x3e, 0xec, 0x9a, 0x48, 0x76, 0xa4, 0xd2, 0x00  },
+	{	0xc1, 0x40, 0xde, 0x5f, 0xff, 0x7e, 0xe0, 0x61,
+		0xa0, 0x21, 0xbf, 0x3e, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xa1, 0x72, 0x07, 0xd4, 0xed, 0x3e, 0x4b, 0x98,
+		0x39, 0xea, 0x9f, 0x4c, 0x75, 0xa6, 0xd3, 0x00  },
+	{	0xfb, 0x7a, 0xe4, 0x65, 0xd8, 0x59, 0xc7, 0x46,
+		0xbd, 0x3c, 0xa2, 0x23, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x8c, 0x58, 0x24, 0xf0, 0xdc, 0x08, 0x74, 0xa0,
+		0x2c, 0xf8, 0x84, 0x50, 0x7c, 0xa8, 0xd4, 0x00  },
+	{	0xfb, 0x7a, 0xe4, 0x65, 0xd8, 0x59, 0xc7, 0x46,
+		0xbd, 0x3c, 0xa2, 0x23, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x83, 0x56, 0x29, 0xfc, 0xd7, 0x02, 0x7d, 0xa8,
+		0x2b, 0xfe, 0x81, 0x54, 0x7f, 0xaa, 0xd5, 0x00  },
+	{	0xe6, 0x67, 0xf9, 0x78, 0xc5, 0x44, 0xda, 0x5b,
+		0xbd, 0x3c, 0xa2, 0x23, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x92, 0x44, 0x3e, 0xe8, 0xca, 0x1c, 0x66, 0xb0,
+		0x22, 0xf4, 0x8e, 0x58, 0x7a, 0xac, 0xd6, 0x00  },
+	{	0xe6, 0x67, 0xf9, 0x78, 0xc5, 0x44, 0xda, 0x5b,
+		0xbd, 0x3c, 0xa2, 0x23, 0x9e, 0x1f, 0x81, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x9d, 0x4a, 0x33, 0xe4, 0xc1, 0x16, 0x6f, 0xb8,
+		0x25, 0xf2, 0x8b, 0x5c, 0x79, 0xae, 0xd7, 0x00  },
+	{	0x8f, 0x0e, 0x8d, 0x0c, 0x8b, 0x0a, 0x89, 0x08,
+		0x87, 0x06, 0x85, 0x04, 0x83, 0x02, 0x81, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xc8, 0x10, 0x78, 0xa0, 0xa8, 0x70, 0x18, 0xc0,
+		0x08, 0xd0, 0xb8, 0x60, 0x68, 0xb0, 0xd8, 0x00  },
+	{	0x8f, 0x0e, 0x8d, 0x0c, 0x8b, 0x0a, 0x89, 0x08,
+		0x87, 0x06, 0x85, 0x04, 0x83, 0x02, 0x81, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xc7, 0x1e, 0x75, 0xac, 0xa3, 0x7a, 0x11, 0xc8,
+		0x0f, 0xd6, 0xbd, 0x64, 0x6b, 0xb2, 0xd9, 0x00  },
+	{	0x92, 0x13, 0x90, 0x11, 0x96, 0x17, 0x94, 0x15,
+		0x87, 0x06, 0x85, 0x04, 0x83, 0x02, 0x81, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xd6, 0x0c, 0x62, 0xb8, 0xbe, 0x64, 0x0a, 0xd0,
+		0x06, 0xdc, 0xb2, 0x68, 0x6e, 0xb4, 0xda, 0x00  },
+	{	0x92, 0x13, 0x90, 0x11, 0x96, 0x17, 0x94, 0x15,
+		0x87, 0x06, 0x85, 0x04, 0x83, 0x02, 0x81, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xd9, 0x02, 0x6f, 0xb4, 0xb5, 0x6e, 0x03, 0xd8,
+		0x01, 0xda, 0xb7, 0x6c, 0x6d, 0xb6, 0xdb, 0x00  },
+	{	0xa8, 0x29, 0xaa, 0x2b, 0xb1, 0x30, 0xb3, 0x32,
+		0x9a, 0x1b, 0x98, 0x19, 0x83, 0x02, 0x81, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xf4, 0x28, 0x4c, 0x90, 0x84, 0x58, 0x3c, 0xe0,
+		0x14, 0xc8, 0xac, 0x70, 0x64, 0xb8, 0xdc, 0x00  },
+	{	0xa8, 0x29, 0xaa, 0x2b, 0xb1, 0x30, 0xb3, 0x32,
+		0x9a, 0x1b, 0x98, 0x19, 0x83, 0x02, 0x81, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xfb, 0x26, 0x41, 0x9c, 0x8f, 0x52, 0x35, 0xe8,
+		0x13, 0xce, 0xa9, 0x74, 0x67, 0xba, 0xdd, 0x00  },
+	{	0xb5, 0x34, 0xb7, 0x36, 0xac, 0x2d, 0xae, 0x2f,
+		0x9a, 0x1b, 0x98, 0x19, 0x83, 0x02, 0x81, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xea, 0x34, 0x56, 0x88, 0x92, 0x4c, 0x2e, 0xf0,
+		0x1a, 0xc4, 0xa6, 0x78, 0x62, 0xbc, 0xde, 0x00  },
+	{	0xb5, 0x34, 0xb7, 0x36, 0xac, 0x2d, 0xae, 0x2f,
+		0x9a, 0x1b, 0x98, 0x19, 0x83, 0x02, 0x81, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x74, 0x74, 0x69, 0x69, 0x53, 0x53, 0x4e, 0x4e,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xe5, 0x3a, 0x5b, 0x84, 0x99, 0x46, 0x27, 0xf8,
+		0x1d, 0xc2, 0xa3, 0x7c, 0x61, 0xbe, 0xdf, 0x00  },
+	{	0x0c, 0xaa, 0x5d, 0xfb, 0xae, 0x08, 0xff, 0x59,
+		0x55, 0xf3, 0x04, 0xa2, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x0c, 0xaa, 0x5d, 0xfb, 0xae, 0x08, 0xff, 0x59,
+		0x55, 0xf3, 0x04, 0xa2, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xaf, 0x4e, 0x6d, 0x8c, 0x2b, 0xca, 0xe9, 0x08,
+		0xa7, 0x46, 0x65, 0x84, 0x23, 0xc2, 0xe1, 0x00  },
+	{	0x11, 0xb7, 0x40, 0xe6, 0xb3, 0x15, 0xe2, 0x44,
+		0x55, 0xf3, 0x04, 0xa2, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xbe, 0x5c, 0x7a, 0x98, 0x36, 0xd4, 0xf2, 0x10,
+		0xae, 0x4c, 0x6a, 0x88, 0x26, 0xc4, 0xe2, 0x00  },
+	{	0x11, 0xb7, 0x40, 0xe6, 0xb3, 0x15, 0xe2, 0x44,
+		0x55, 0xf3, 0x04, 0xa2, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xb1, 0x52, 0x77, 0x94, 0x3d, 0xde, 0xfb, 0x18,
+		0xa9, 0x4a, 0x6f, 0x8c, 0x25, 0xc6, 0xe3, 0x00  },
+	{	0x2b, 0x8d, 0x7a, 0xdc, 0x94, 0x32, 0xc5, 0x63,
+		0x48, 0xee, 0x19, 0xbf, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x9c, 0x78, 0x54, 0xb0, 0x0c, 0xe8, 0xc4, 0x20,
+		0xbc, 0x58, 0x74, 0x90, 0x2c, 0xc8, 0xe4, 0x00  },
+	{	0x2b, 0x8d, 0x7a, 0xdc, 0x94, 0x32, 0xc5, 0x63,
+		0x48, 0xee, 0x19, 0xbf, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x93, 0x76, 0x59, 0xbc, 0x07, 0xe2, 0xcd, 0x28,
+		0xbb, 0x5e, 0x71, 0x94, 0x2f, 0xca, 0xe5, 0x00  },
+	{	0x36, 0x90, 0x67, 0xc1, 0x89, 0x2f, 0xd8, 0x7e,
+		0x48, 0xee, 0x19, 0xbf, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x82, 0x64, 0x4e, 0xa8, 0x1a, 0xfc, 0xd6, 0x30,
+		0xb2, 0x54, 0x7e, 0x98, 0x2a, 0xcc, 0xe6, 0x00  },
+	{	0x36, 0x90, 0x67, 0xc1, 0x89, 0x2f, 0xd8, 0x7e,
+		0x48, 0xee, 0x19, 0xbf, 0xf7, 0x51, 0xa6, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x8d, 0x6a, 0x43, 0xa4, 0x11, 0xf6, 0xdf, 0x38,
+		0xb5, 0x52, 0x7b, 0x9c, 0x29, 0xce, 0xe7, 0x00  },
+	{	0x5f, 0xf9, 0x13, 0xb5, 0xc7, 0x61, 0x8b, 0x2d,
+		0x72, 0xd4, 0x3e, 0x98, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xd8, 0x30, 0x08, 0xe0, 0x78, 0x90, 0xa8, 0x40,
+		0x98, 0x70, 0x48, 0xa0, 0x38, 0xd0, 0xe8, 0x00  },
+	{	0x5f, 0xf9, 0x13, 0xb5, 0xc7, 0x61, 0x8b, 0x2d,
+		0x72, 0xd4, 0x3e, 0x98, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xd7, 0x3e, 0x05, 0xec, 0x73, 0x9a, 0xa1, 0x48,
+		0x9f, 0x76, 0x4d, 0xa4, 0x3b, 0xd2, 0xe9, 0x00  },
+	{	0x42, 0xe4, 0x0e, 0xa8, 0xda, 0x7c, 0x96, 0x30,
+		0x72, 0xd4, 0x3e, 0x98, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xc6, 0x2c, 0x12, 0xf8, 0x6e, 0x84, 0xba, 0x50,
+		0x96, 0x7c, 0x42, 0xa8, 0x3e, 0xd4, 0xea, 0x00  },
+	{	0x42, 0xe4, 0x0e, 0xa8, 0xda, 0x7c, 0x96, 0x30,
+		0x72, 0xd4, 0x3e, 0x98, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xc9, 0x22, 0x1f, 0xf4, 0x65, 0x8e, 0xb3, 0x58,
+		0x91, 0x7a, 0x47, 0xac, 0x3d, 0xd6, 0xeb, 0x00  },
+	{	0x78, 0xde, 0x34, 0x92, 0xfd, 0x5b, 0xb1, 0x17,
+		0x6f, 0xc9, 0x23, 0x85, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xe4, 0x08, 0x3c, 0xd0, 0x54, 0xb8, 0x8c, 0x60,
+		0x84, 0x68, 0x5c, 0xb0, 0x34, 0xd8, 0xec, 0x00  },
+	{	0x78, 0xde, 0x34, 0x92, 0xfd, 0x5b, 0xb1, 0x17,
+		0x6f, 0xc9, 0x23, 0x85, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xeb, 0x06, 0x31, 0xdc, 0x5f, 0xb2, 0x85, 0x68,
+		0x83, 0x6e, 0x59, 0xb4, 0x37, 0xda, 0xed, 0x00  },
+	{	0x65, 0xc3, 0x29, 0x8f, 0xe0, 0x46, 0xac, 0x0a,
+		0x6f, 0xc9, 0x23, 0x85, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xfa, 0x14, 0x26, 0xc8, 0x42, 0xac, 0x9e, 0x70,
+		0x8a, 0x64, 0x56, 0xb8, 0x32, 0xdc, 0xee, 0x00  },
+	{	0x65, 0xc3, 0x29, 0x8f, 0xe0, 0x46, 0xac, 0x0a,
+		0x6f, 0xc9, 0x23, 0x85, 0xea, 0x4c, 0xa6, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0xf5, 0x1a, 0x2b, 0xc4, 0x49, 0xa6, 0x97, 0x78,
+		0x8d, 0x62, 0x53, 0xbc, 0x31, 0xde, 0xef, 0x00  },
+	{	0xb7, 0x0c, 0xdc, 0x67, 0x61, 0xda, 0x0a, 0xb1,
+		0x06, 0xbd, 0x6d, 0xd6, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0xb7, 0x0c, 0xdc, 0x67, 0x61, 0xda, 0x0a, 0xb1,
+		0x06, 0xbd, 0x6d, 0xd6, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0xf0, 0xe0, 0xd0, 0xc0, 0xb0, 0xa0, 0x90, 0x80,
+		0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x5f, 0xae, 0xbd, 0x4c, 0x9b, 0x6a, 0x79, 0x88,
+		0xd7, 0x26, 0x35, 0xc4, 0x13, 0xe2, 0xf1, 0x00  },
+	{	0xaa, 0x11, 0xc1, 0x7a, 0x7c, 0xc7, 0x17, 0xac,
+		0x06, 0xbd, 0x6d, 0xd6, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00,
+		0xe0, 0xc0, 0xa0, 0x80, 0x60, 0x40, 0x20, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x4e, 0xbc, 0xaa, 0x58, 0x86, 0x74, 0x62, 0x90,
+		0xde, 0x2c, 0x3a, 0xc8, 0x16, 0xe4, 0xf2, 0x00  },
+	{	0xaa, 0x11, 0xc1, 0x7a, 0x7c, 0xc7, 0x17, 0xac,
+		0x06, 0xbd, 0x6d, 0xd6, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0x10, 0x20, 0x70, 0x40, 0xd0, 0xe0, 0xb0, 0x80,
+		0x90, 0xa0, 0xf0, 0xc0, 0x50, 0x60, 0x30, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x41, 0xb2, 0xa7, 0x54, 0x8d, 0x7e, 0x6b, 0x98,
+		0xd9, 0x2a, 0x3f, 0xcc, 0x15, 0xe6, 0xf3, 0x00  },
+	{	0x90, 0x2b, 0xfb, 0x40, 0x5b, 0xe0, 0x30, 0x8b,
+		0x1b, 0xa0, 0x70, 0xcb, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00,
+		0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x6c, 0x98, 0x84, 0x70, 0xbc, 0x48, 0x54, 0xa0,
+		0xcc, 0x38, 0x24, 0xd0, 0x1c, 0xe8, 0xf4, 0x00  },
+	{	0x90, 0x2b, 0xfb, 0x40, 0x5b, 0xe0, 0x30, 0x8b,
+		0x1b, 0xa0, 0x70, 0xcb, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0x30, 0x60, 0x90, 0xc0, 0x70, 0x20, 0xd0, 0x80,
+		0xb0, 0xe0, 0x10, 0x40, 0xf0, 0xa0, 0x50, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x63, 0x96, 0x89, 0x7c, 0xb7, 0x42, 0x5d, 0xa8,
+		0xcb, 0x3e, 0x21, 0xd4, 0x1f, 0xea, 0xf5, 0x00  },
+	{	0x8d, 0x36, 0xe6, 0x5d, 0x46, 0xfd, 0x2d, 0x96,
+		0x1b, 0xa0, 0x70, 0xcb, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00,
+		0x20, 0x40, 0xe0, 0x80, 0xa0, 0xc0, 0x60, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x72, 0x84, 0x9e, 0x68, 0xaa, 0x5c, 0x46, 0xb0,
+		0xc2, 0x34, 0x2e, 0xd8, 0x1a, 0xec, 0xf6, 0x00  },
+	{	0x8d, 0x36, 0xe6, 0x5d, 0x46, 0xfd, 0x2d, 0x96,
+		0x1b, 0xa0, 0x70, 0xcb, 0xd0, 0x6b, 0xbb, 0x00  },
+	{	0xd0, 0xa0, 0x30, 0x40, 0x10, 0x60, 0xf0, 0x80,
+		0x50, 0x20, 0xb0, 0xc0, 0x90, 0xe0, 0x70, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x7d, 0x8a, 0x93, 0x64, 0xa1, 0x56, 0x4f, 0xb8,
+		0xc5, 0x32, 0x2b, 0xdc, 0x19, 0xee, 0xf7, 0x00  },
+	{	0xe4, 0x5f, 0x92, 0x29, 0x08, 0xb3, 0x7e, 0xc5,
+		0x21, 0x9a, 0x57, 0xec, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
+		0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x28, 0xd0, 0xd8, 0x20, 0xc8, 0x30, 0x38, 0xc0,
+		0xe8, 0x10, 0x18, 0xe0, 0x08, 0xf0, 0xf8, 0x00  },
+	{	0xe4, 0x5f, 0x92, 0x29, 0x08, 0xb3, 0x7e, 0xc5,
+		0x21, 0x9a, 0x57, 0xec, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x70, 0xe0, 0x50, 0xc0, 0x30, 0xa0, 0x10, 0x80,
+		0xf0, 0x60, 0xd0, 0x40, 0xb0, 0x20, 0x90, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x27, 0xde, 0xd5, 0x2c, 0xc3, 0x3a, 0x31, 0xc8,
+		0xef, 0x16, 0x1d, 0xe4, 0x0b, 0xf2, 0xf9, 0x00  },
+	{	0xf9, 0x42, 0x8f, 0x34, 0x15, 0xae, 0x63, 0xd8,
+		0x21, 0x9a, 0x57, 0xec, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00,
+		0x60, 0xc0, 0x20, 0x80, 0xe0, 0x40, 0xa0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x36, 0xcc, 0xc2, 0x38, 0xde, 0x24, 0x2a, 0xd0,
+		0xe6, 0x1c, 0x12, 0xe8, 0x0e, 0xf4, 0xfa, 0x00  },
+	{	0xf9, 0x42, 0x8f, 0x34, 0x15, 0xae, 0x63, 0xd8,
+		0x21, 0x9a, 0x57, 0xec, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x90, 0x20, 0xf0, 0x40, 0x50, 0xe0, 0x30, 0x80,
+		0x10, 0xa0, 0x70, 0xc0, 0xd0, 0x60, 0xb0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x39, 0xc2, 0xcf, 0x34, 0xd5, 0x2e, 0x23, 0xd8,
+		0xe1, 0x1a, 0x17, 0xec, 0x0d, 0xf6, 0xfb, 0x00  },
+	{	0xc3, 0x78, 0xb5, 0x0e, 0x32, 0x89, 0x44, 0xff,
+		0x3c, 0x87, 0x4a, 0xf1, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00,
+		0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x14, 0xe8, 0xec, 0x10, 0xe4, 0x18, 0x1c, 0xe0,
+		0xf4, 0x08, 0x0c, 0xf0, 0x04, 0xf8, 0xfc, 0x00  },
+	{	0xc3, 0x78, 0xb5, 0x0e, 0x32, 0x89, 0x44, 0xff,
+		0x3c, 0x87, 0x4a, 0xf1, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0xb0, 0x60, 0x10, 0xc0, 0xf0, 0x20, 0x50, 0x80,
+		0x30, 0xe0, 0x90, 0x40, 0x70, 0xa0, 0xd0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x1b, 0xe6, 0xe1, 0x1c, 0xef, 0x12, 0x15, 0xe8,
+		0xf3, 0x0e, 0x09, 0xf4, 0x07, 0xfa, 0xfd, 0x00  },
+	{	0xde, 0x65, 0xa8, 0x13, 0x2f, 0x94, 0x59, 0xe2,
+		0x3c, 0x87, 0x4a, 0xf1, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00,
+		0xa0, 0x40, 0x60, 0x80, 0x20, 0xc0, 0xe0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x0a, 0xf4, 0xf6, 0x08, 0xf2, 0x0c, 0x0e, 0xf0,
+		0xfa, 0x04, 0x06, 0xf8, 0x02, 0xfc, 0xfe, 0x00  },
+	{	0xde, 0x65, 0xa8, 0x13, 0x2f, 0x94, 0x59, 0xe2,
+		0x3c, 0x87, 0x4a, 0xf1, 0xcd, 0x76, 0xbb, 0x00  },
+	{	0x50, 0xa0, 0xb0, 0x40, 0x90, 0x60, 0x70, 0x80,
+		0xd0, 0x20, 0x30, 0xc0, 0x10, 0xe0, 0xf0, 0x00  },
+	{	0x69, 0x69, 0x74, 0x74, 0x4e, 0x4e, 0x53, 0x53,
+		0x3a, 0x3a, 0x27, 0x27, 0x1d, 0x1d, 0x00, 0x00  },
+	{	0x05, 0xfa, 0xfb, 0x04, 0xf9, 0x06, 0x07, 0xf8,
+		0xfd, 0x02, 0x03, 0xfc, 0x01, 0xfe, 0xff, 0x00  }
+};
+/* END CSTYLED */
+#else
+/* BEGIN CSTYLED */
+const uint8_t
+__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
+		0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09,
+		0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
+		0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b,
+		0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12,
+		0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15,
+		0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38,
+		0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
+		0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36,
+		0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31,
+		0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24,
+		0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23,
+		0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a,
+		0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d,
+		0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+		0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+		0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+		0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+		0xf5, 0xe8, 0xcf, 0xd2, 0x81, 0x9c, 0xbb, 0xa6  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+		0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+		0xf5, 0xe8, 0xcf, 0xd2, 0x81, 0x9c, 0xbb, 0xa6  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79,
+		0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+		0xd2, 0xcf, 0xe8, 0xf5, 0xbb, 0xa6, 0x81, 0x9c  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c,
+		0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+		0xd2, 0xcf, 0xe8, 0xf5, 0xbb, 0xa6, 0x81, 0x9c  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b,
+		0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+		0xcf, 0xd2, 0xf5, 0xe8, 0xa6, 0xbb, 0x9c, 0x81  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62,
+		0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+		0xcf, 0xd2, 0xf5, 0xe8, 0xa6, 0xbb, 0x9c, 0x81  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65,
+		0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+		0x9c, 0x81, 0xbb, 0xa6, 0xd2, 0xcf, 0xf5, 0xe8  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48,
+		0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+		0x9c, 0x81, 0xbb, 0xa6, 0xd2, 0xcf, 0xf5, 0xe8  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f,
+		0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+		0x81, 0x9c, 0xa6, 0xbb, 0xcf, 0xd2, 0xe8, 0xf5  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46,
+		0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+		0x81, 0x9c, 0xa6, 0xbb, 0xcf, 0xd2, 0xe8, 0xf5  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,
+		0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+		0xa6, 0xbb, 0x81, 0x9c, 0xf5, 0xe8, 0xd2, 0xcf  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54,
+		0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+		0xa6, 0xbb, 0x81, 0x9c, 0xf5, 0xe8, 0xd2, 0xcf  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+		0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+		0xbb, 0xa6, 0x9c, 0x81, 0xe8, 0xf5, 0xcf, 0xd2  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a,
+		0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa  },
+	{	0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+		0xbb, 0xa6, 0x9c, 0x81, 0xe8, 0xf5, 0xcf, 0xd2  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d,
+		0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+		0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+		0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7,
+		0x08, 0x29, 0x4a, 0x6b, 0x8c, 0xad, 0xce, 0xef  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+		0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee,
+		0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+		0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9,
+		0x18, 0x3b, 0x5e, 0x7d, 0x94, 0xb7, 0xd2, 0xf1  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+		0xf7, 0xcd, 0x83, 0xb9, 0x02, 0x38, 0x76, 0x4c  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc,
+		0x20, 0x04, 0x68, 0x4c, 0xb0, 0x94, 0xf8, 0xdc  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+		0xf7, 0xcd, 0x83, 0xb9, 0x02, 0x38, 0x76, 0x4c  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb,
+		0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+		0xea, 0xd0, 0x9e, 0xa4, 0x1f, 0x25, 0x6b, 0x51  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2,
+		0x30, 0x16, 0x7c, 0x5a, 0xa8, 0x8e, 0xe4, 0xc2  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+		0xea, 0xd0, 0x9e, 0xa4, 0x1f, 0x25, 0x6b, 0x51  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+		0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+		0xb9, 0x83, 0xd0, 0xea, 0x6b, 0x51, 0x02, 0x38  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8,
+		0x40, 0x68, 0x10, 0x38, 0xe0, 0xc8, 0xb0, 0x98  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+		0xb9, 0x83, 0xd0, 0xea, 0x6b, 0x51, 0x02, 0x38  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf,
+		0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+		0xa4, 0x9e, 0xcd, 0xf7, 0x76, 0x4c, 0x1f, 0x25  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6,
+		0x50, 0x7a, 0x04, 0x2e, 0xf8, 0xd2, 0xac, 0x86  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+		0xa4, 0x9e, 0xcd, 0xf7, 0x76, 0x4c, 0x1f, 0x25  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1,
+		0x58, 0x73, 0x0e, 0x25, 0xf4, 0xdf, 0xa2, 0x89  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+		0x83, 0xb9, 0xea, 0xd0, 0x4c, 0x76, 0x25, 0x1f  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4,
+		0x60, 0x4c, 0x38, 0x14, 0xd0, 0xfc, 0x88, 0xa4  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+		0x83, 0xb9, 0xea, 0xd0, 0x4c, 0x76, 0x25, 0x1f  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3,
+		0x68, 0x45, 0x32, 0x1f, 0xdc, 0xf1, 0x86, 0xab  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+		0x9e, 0xa4, 0xf7, 0xcd, 0x51, 0x6b, 0x38, 0x02  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca,
+		0x70, 0x5e, 0x2c, 0x02, 0xc8, 0xe6, 0x94, 0xba  },
+	{	0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+		0x9e, 0xa4, 0xf7, 0xcd, 0x51, 0x6b, 0x38, 0x02  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd,
+		0x78, 0x57, 0x26, 0x09, 0xc4, 0xeb, 0x9a, 0xb5  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+		0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+		0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97,
+		0x88, 0xb9, 0xea, 0xdb, 0x4c, 0x7d, 0x2e, 0x1f  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+		0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e,
+		0x90, 0xa2, 0xf4, 0xc6, 0x58, 0x6a, 0x3c, 0x0e  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+		0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99,
+		0x98, 0xab, 0xfe, 0xcd, 0x54, 0x67, 0x32, 0x01  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+		0x1f, 0x38, 0x51, 0x76, 0x9e, 0xb9, 0xd0, 0xf7  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c,
+		0xa0, 0x94, 0xc8, 0xfc, 0x70, 0x44, 0x18, 0x2c  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+		0x1f, 0x38, 0x51, 0x76, 0x9e, 0xb9, 0xd0, 0xf7  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b,
+		0xa8, 0x9d, 0xc2, 0xf7, 0x7c, 0x49, 0x16, 0x23  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+		0x02, 0x25, 0x4c, 0x6b, 0x83, 0xa4, 0xcd, 0xea  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82,
+		0xb0, 0x86, 0xdc, 0xea, 0x68, 0x5e, 0x04, 0x32  },
+	{	0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+		0x02, 0x25, 0x4c, 0x6b, 0x83, 0xa4, 0xcd, 0xea  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85,
+		0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d  },
+	{	0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+		0x51, 0x76, 0x02, 0x25, 0xf7, 0xd0, 0xa4, 0x83  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8,
+		0xc0, 0xf8, 0xb0, 0x88, 0x20, 0x18, 0x50, 0x68  },
+	{	0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+		0x51, 0x76, 0x02, 0x25, 0xf7, 0xd0, 0xa4, 0x83  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf,
+		0xc8, 0xf1, 0xba, 0x83, 0x2c, 0x15, 0x5e, 0x67  },
+	{	0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+		0x4c, 0x6b, 0x1f, 0x38, 0xea, 0xcd, 0xb9, 0x9e  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+		0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76  },
+	{	0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+		0x4c, 0x6b, 0x1f, 0x38, 0xea, 0xcd, 0xb9, 0x9e  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1,
+		0xd8, 0xe3, 0xae, 0x95, 0x34, 0x0f, 0x42, 0x79  },
+	{	0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+		0x6b, 0x4c, 0x38, 0x1f, 0xd0, 0xf7, 0x83, 0xa4  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4,
+		0xe0, 0xdc, 0x98, 0xa4, 0x10, 0x2c, 0x68, 0x54  },
+	{	0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+		0x6b, 0x4c, 0x38, 0x1f, 0xd0, 0xf7, 0x83, 0xa4  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3,
+		0xe8, 0xd5, 0x92, 0xaf, 0x1c, 0x21, 0x66, 0x5b  },
+	{	0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+		0x76, 0x51, 0x25, 0x02, 0xcd, 0xea, 0x9e, 0xb9  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba,
+		0xf0, 0xce, 0x8c, 0xb2, 0x08, 0x36, 0x74, 0x4a  },
+	{	0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+		0x76, 0x51, 0x25, 0x02, 0xcd, 0xea, 0x9e, 0xb9  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d  },
+	{	0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd,
+		0xf8, 0xc7, 0x86, 0xb9, 0x04, 0x3b, 0x7a, 0x45  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+		0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+		0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x41, 0x82, 0xc3, 0x04, 0x45, 0x86, 0xc7,
+		0x08, 0x49, 0x8a, 0xcb, 0x0c, 0x4d, 0x8e, 0xcf  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+		0x9a, 0xee, 0x72, 0x06, 0x57, 0x23, 0xbf, 0xcb  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x42, 0x84, 0xc6, 0x08, 0x4a, 0x8c, 0xce,
+		0x10, 0x52, 0x94, 0xd6, 0x18, 0x5a, 0x9c, 0xde  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+		0x9a, 0xee, 0x72, 0x06, 0x57, 0x23, 0xbf, 0xcb  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x43, 0x86, 0xc5, 0x0c, 0x4f, 0x8a, 0xc9,
+		0x18, 0x5b, 0x9e, 0xdd, 0x14, 0x57, 0x92, 0xd1  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+		0xbd, 0xc9, 0x55, 0x21, 0x6d, 0x19, 0x85, 0xf1  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x44, 0x88, 0xcc, 0x10, 0x54, 0x98, 0xdc,
+		0x20, 0x64, 0xa8, 0xec, 0x30, 0x74, 0xb8, 0xfc  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+		0xbd, 0xc9, 0x55, 0x21, 0x6d, 0x19, 0x85, 0xf1  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x45, 0x8a, 0xcf, 0x14, 0x51, 0x9e, 0xdb,
+		0x28, 0x6d, 0xa2, 0xe7, 0x3c, 0x79, 0xb6, 0xf3  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+		0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x46, 0x8c, 0xca, 0x18, 0x5e, 0x94, 0xd2,
+		0x30, 0x76, 0xbc, 0xfa, 0x28, 0x6e, 0xa4, 0xe2  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+		0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x47, 0x8e, 0xc9, 0x1c, 0x5b, 0x92, 0xd5,
+		0x38, 0x7f, 0xb6, 0xf1, 0x24, 0x63, 0xaa, 0xed  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+		0xf3, 0x87, 0x06, 0x72, 0x04, 0x70, 0xf1, 0x85  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x48, 0x90, 0xd8, 0x20, 0x68, 0xb0, 0xf8,
+		0x40, 0x08, 0xd0, 0x98, 0x60, 0x28, 0xf0, 0xb8  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+		0xf3, 0x87, 0x06, 0x72, 0x04, 0x70, 0xf1, 0x85  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x49, 0x92, 0xdb, 0x24, 0x6d, 0xb6, 0xff,
+		0x48, 0x01, 0xda, 0x93, 0x6c, 0x25, 0xfe, 0xb7  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+		0xee, 0x9a, 0x1b, 0x6f, 0x19, 0x6d, 0xec, 0x98  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x4a, 0x94, 0xde, 0x28, 0x62, 0xbc, 0xf6,
+		0x50, 0x1a, 0xc4, 0x8e, 0x78, 0x32, 0xec, 0xa6  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+		0xee, 0x9a, 0x1b, 0x6f, 0x19, 0x6d, 0xec, 0x98  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x4b, 0x96, 0xdd, 0x2c, 0x67, 0xba, 0xf1,
+		0x58, 0x13, 0xce, 0x85, 0x74, 0x3f, 0xe2, 0xa9  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+		0xc9, 0xbd, 0x3c, 0x48, 0x23, 0x57, 0xd6, 0xa2  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x4c, 0x98, 0xd4, 0x30, 0x7c, 0xa8, 0xe4,
+		0x60, 0x2c, 0xf8, 0xb4, 0x50, 0x1c, 0xc8, 0x84  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+		0xc9, 0xbd, 0x3c, 0x48, 0x23, 0x57, 0xd6, 0xa2  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x4d, 0x9a, 0xd7, 0x34, 0x79, 0xae, 0xe3,
+		0x68, 0x25, 0xf2, 0xbf, 0x5c, 0x11, 0xc6, 0x8b  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+		0xd4, 0xa0, 0x21, 0x55, 0x3e, 0x4a, 0xcb, 0xbf  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+		0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a  },
+	{	0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+		0xd4, 0xa0, 0x21, 0x55, 0x3e, 0x4a, 0xcb, 0xbf  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x4f, 0x9e, 0xd1, 0x3c, 0x73, 0xa2, 0xed,
+		0x78, 0x37, 0xe6, 0xa9, 0x44, 0x0b, 0xda, 0x95  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+		0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+		0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x51, 0xa2, 0xf3, 0x44, 0x15, 0xe6, 0xb7,
+		0x88, 0xd9, 0x2a, 0x7b, 0xcc, 0x9d, 0x6e, 0x3f  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+		0x72, 0x1b, 0xa0, 0xc9, 0xcb, 0xa2, 0x19, 0x70  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x52, 0xa4, 0xf6, 0x48, 0x1a, 0xec, 0xbe,
+		0x90, 0xc2, 0x34, 0x66, 0xd8, 0x8a, 0x7c, 0x2e  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+		0x72, 0x1b, 0xa0, 0xc9, 0xcb, 0xa2, 0x19, 0x70  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+		0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+		0x55, 0x3c, 0x87, 0xee, 0xf1, 0x98, 0x23, 0x4a  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x54, 0xa8, 0xfc, 0x50, 0x04, 0xf8, 0xac,
+		0xa0, 0xf4, 0x08, 0x5c, 0xf0, 0xa4, 0x58, 0x0c  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+		0x55, 0x3c, 0x87, 0xee, 0xf1, 0x98, 0x23, 0x4a  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x55, 0xaa, 0xff, 0x54, 0x01, 0xfe, 0xab,
+		0xa8, 0xfd, 0x02, 0x57, 0xfc, 0xa9, 0x56, 0x03  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+		0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x56, 0xac, 0xfa, 0x58, 0x0e, 0xf4, 0xa2,
+		0xb0, 0xe6, 0x1c, 0x4a, 0xe8, 0xbe, 0x44, 0x12  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+		0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x57, 0xae, 0xf9, 0x5c, 0x0b, 0xf2, 0xa5,
+		0xb8, 0xef, 0x16, 0x41, 0xe4, 0xb3, 0x4a, 0x1d  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+		0x1b, 0x72, 0xd4, 0xbd, 0x98, 0xf1, 0x57, 0x3e  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x58, 0xb0, 0xe8, 0x60, 0x38, 0xd0, 0x88,
+		0xc0, 0x98, 0x70, 0x28, 0xa0, 0xf8, 0x10, 0x48  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+		0x1b, 0x72, 0xd4, 0xbd, 0x98, 0xf1, 0x57, 0x3e  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x59, 0xb2, 0xeb, 0x64, 0x3d, 0xd6, 0x8f,
+		0xc8, 0x91, 0x7a, 0x23, 0xac, 0xf5, 0x1e, 0x47  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+		0x06, 0x6f, 0xc9, 0xa0, 0x85, 0xec, 0x4a, 0x23  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x5a, 0xb4, 0xee, 0x68, 0x32, 0xdc, 0x86,
+		0xd0, 0x8a, 0x64, 0x3e, 0xb8, 0xe2, 0x0c, 0x56  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+		0x06, 0x6f, 0xc9, 0xa0, 0x85, 0xec, 0x4a, 0x23  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x5b, 0xb6, 0xed, 0x6c, 0x37, 0xda, 0x81,
+		0xd8, 0x83, 0x6e, 0x35, 0xb4, 0xef, 0x02, 0x59  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+		0x21, 0x48, 0xee, 0x87, 0xbf, 0xd6, 0x70, 0x19  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x5c, 0xb8, 0xe4, 0x70, 0x2c, 0xc8, 0x94,
+		0xe0, 0xbc, 0x58, 0x04, 0x90, 0xcc, 0x28, 0x74  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+		0x21, 0x48, 0xee, 0x87, 0xbf, 0xd6, 0x70, 0x19  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x5d, 0xba, 0xe7, 0x74, 0x29, 0xce, 0x93,
+		0xe8, 0xb5, 0x52, 0x0f, 0x9c, 0xc1, 0x26, 0x7b  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+		0x3c, 0x55, 0xf3, 0x9a, 0xa2, 0xcb, 0x6d, 0x04  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x5e, 0xbc, 0xe2, 0x78, 0x26, 0xc4, 0x9a,
+		0xf0, 0xae, 0x4c, 0x12, 0x88, 0xd6, 0x34, 0x6a  },
+	{	0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+		0x3c, 0x55, 0xf3, 0x9a, 0xa2, 0xcb, 0x6d, 0x04  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27  },
+	{	0x00, 0x5f, 0xbe, 0xe1, 0x7c, 0x23, 0xc2, 0x9d,
+		0xf8, 0xa7, 0x46, 0x19, 0x84, 0xdb, 0x3a, 0x65  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+		0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+		0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x61, 0xc2, 0xa3, 0x84, 0xe5, 0x46, 0x27,
+		0x08, 0x69, 0xca, 0xab, 0x8c, 0xed, 0x4e, 0x2f  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+		0x57, 0x19, 0xcb, 0x85, 0x72, 0x3c, 0xee, 0xa0  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x62, 0xc4, 0xa6, 0x88, 0xea, 0x4c, 0x2e,
+		0x10, 0x72, 0xd4, 0xb6, 0x98, 0xfa, 0x5c, 0x3e  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+		0x57, 0x19, 0xcb, 0x85, 0x72, 0x3c, 0xee, 0xa0  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x63, 0xc6, 0xa5, 0x8c, 0xef, 0x4a, 0x29,
+		0x18, 0x7b, 0xde, 0xbd, 0x94, 0xf7, 0x52, 0x31  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+		0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x64, 0xc8, 0xac, 0x90, 0xf4, 0x58, 0x3c,
+		0x20, 0x44, 0xe8, 0x8c, 0xb0, 0xd4, 0x78, 0x1c  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+		0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x65, 0xca, 0xaf, 0x94, 0xf1, 0x5e, 0x3b,
+		0x28, 0x4d, 0xe2, 0x87, 0xbc, 0xd9, 0x76, 0x13  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+		0x6d, 0x23, 0xf1, 0xbf, 0x55, 0x1b, 0xc9, 0x87  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x66, 0xcc, 0xaa, 0x98, 0xfe, 0x54, 0x32,
+		0x30, 0x56, 0xfc, 0x9a, 0xa8, 0xce, 0x64, 0x02  },
+	{	0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+		0x6d, 0x23, 0xf1, 0xbf, 0x55, 0x1b, 0xc9, 0x87  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x67, 0xce, 0xa9, 0x9c, 0xfb, 0x52, 0x35,
+		0x38, 0x5f, 0xf6, 0x91, 0xa4, 0xc3, 0x6a, 0x0d  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+		0x3e, 0x70, 0xbf, 0xf1, 0x21, 0x6f, 0xa0, 0xee  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x68, 0xd0, 0xb8, 0xa0, 0xc8, 0x70, 0x18,
+		0x40, 0x28, 0x90, 0xf8, 0xe0, 0x88, 0x30, 0x58  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+		0x3e, 0x70, 0xbf, 0xf1, 0x21, 0x6f, 0xa0, 0xee  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+		0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+		0x23, 0x6d, 0xa2, 0xec, 0x3c, 0x72, 0xbd, 0xf3  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x6a, 0xd4, 0xbe, 0xa8, 0xc2, 0x7c, 0x16,
+		0x50, 0x3a, 0x84, 0xee, 0xf8, 0x92, 0x2c, 0x46  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+		0x23, 0x6d, 0xa2, 0xec, 0x3c, 0x72, 0xbd, 0xf3  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x6b, 0xd6, 0xbd, 0xac, 0xc7, 0x7a, 0x11,
+		0x58, 0x33, 0x8e, 0xe5, 0xf4, 0x9f, 0x22, 0x49  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+		0x04, 0x4a, 0x85, 0xcb, 0x06, 0x48, 0x87, 0xc9  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x6c, 0xd8, 0xb4, 0xb0, 0xdc, 0x68, 0x04,
+		0x60, 0x0c, 0xb8, 0xd4, 0xd0, 0xbc, 0x08, 0x64  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+		0x04, 0x4a, 0x85, 0xcb, 0x06, 0x48, 0x87, 0xc9  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x6d, 0xda, 0xb7, 0xb4, 0xd9, 0x6e, 0x03,
+		0x68, 0x05, 0xb2, 0xdf, 0xdc, 0xb1, 0x06, 0x6b  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+		0x19, 0x57, 0x98, 0xd6, 0x1b, 0x55, 0x9a, 0xd4  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x6e, 0xdc, 0xb2, 0xb8, 0xd6, 0x64, 0x0a,
+		0x70, 0x1e, 0xac, 0xc2, 0xc8, 0xa6, 0x14, 0x7a  },
+	{	0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+		0x19, 0x57, 0x98, 0xd6, 0x1b, 0x55, 0x9a, 0xd4  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x6f, 0xde, 0xb1, 0xbc, 0xd3, 0x62, 0x0d,
+		0x78, 0x17, 0xa6, 0xc9, 0xc4, 0xab, 0x1a, 0x75  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+		0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+		0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x71, 0xe2, 0x93, 0xc4, 0xb5, 0x26, 0x57,
+		0x88, 0xf9, 0x6a, 0x1b, 0x4c, 0x3d, 0xae, 0xdf  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+		0xbf, 0xec, 0x19, 0x4a, 0xee, 0xbd, 0x48, 0x1b  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x72, 0xe4, 0x96, 0xc8, 0xba, 0x2c, 0x5e,
+		0x90, 0xe2, 0x74, 0x06, 0x58, 0x2a, 0xbc, 0xce  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+		0xbf, 0xec, 0x19, 0x4a, 0xee, 0xbd, 0x48, 0x1b  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x73, 0xe6, 0x95, 0xcc, 0xbf, 0x2a, 0x59,
+		0x98, 0xeb, 0x7e, 0x0d, 0x54, 0x27, 0xb2, 0xc1  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+		0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+		0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+		0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x75, 0xea, 0x9f, 0xd4, 0xa1, 0x3e, 0x4b,
+		0xa8, 0xdd, 0x42, 0x37, 0x7c, 0x09, 0x96, 0xe3  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+		0x85, 0xd6, 0x23, 0x70, 0xc9, 0x9a, 0x6f, 0x3c  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x76, 0xec, 0x9a, 0xd8, 0xae, 0x34, 0x42,
+		0xb0, 0xc6, 0x5c, 0x2a, 0x68, 0x1e, 0x84, 0xf2  },
+	{	0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+		0x85, 0xd6, 0x23, 0x70, 0xc9, 0x9a, 0x6f, 0x3c  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x77, 0xee, 0x99, 0xdc, 0xab, 0x32, 0x45,
+		0xb8, 0xcf, 0x56, 0x21, 0x64, 0x13, 0x8a, 0xfd  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+		0xd6, 0x85, 0x6d, 0x3e, 0xbd, 0xee, 0x06, 0x55  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x78, 0xf0, 0x88, 0xe0, 0x98, 0x10, 0x68,
+		0xc0, 0xb8, 0x30, 0x48, 0x20, 0x58, 0xd0, 0xa8  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+		0xd6, 0x85, 0x6d, 0x3e, 0xbd, 0xee, 0x06, 0x55  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x79, 0xf2, 0x8b, 0xe4, 0x9d, 0x16, 0x6f,
+		0xc8, 0xb1, 0x3a, 0x43, 0x2c, 0x55, 0xde, 0xa7  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+		0xcb, 0x98, 0x70, 0x23, 0xa0, 0xf3, 0x1b, 0x48  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x7a, 0xf4, 0x8e, 0xe8, 0x92, 0x1c, 0x66,
+		0xd0, 0xaa, 0x24, 0x5e, 0x38, 0x42, 0xcc, 0xb6  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+		0xcb, 0x98, 0x70, 0x23, 0xa0, 0xf3, 0x1b, 0x48  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x7b, 0xf6, 0x8d, 0xec, 0x97, 0x1a, 0x61,
+		0xd8, 0xa3, 0x2e, 0x55, 0x34, 0x4f, 0xc2, 0xb9  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+		0xec, 0xbf, 0x57, 0x04, 0x9a, 0xc9, 0x21, 0x72  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x7c, 0xf8, 0x84, 0xf0, 0x8c, 0x08, 0x74,
+		0xe0, 0x9c, 0x18, 0x64, 0x10, 0x6c, 0xe8, 0x94  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+		0xec, 0xbf, 0x57, 0x04, 0x9a, 0xc9, 0x21, 0x72  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x7d, 0xfa, 0x87, 0xf4, 0x89, 0x0e, 0x73,
+		0xe8, 0x95, 0x12, 0x6f, 0x1c, 0x61, 0xe6, 0x9b  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+		0xf1, 0xa2, 0x4a, 0x19, 0x87, 0xd4, 0x3c, 0x6f  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x7e, 0xfc, 0x82, 0xf8, 0x86, 0x04, 0x7a,
+		0xf0, 0x8e, 0x0c, 0x72, 0x08, 0x76, 0xf4, 0x8a  },
+	{	0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+		0xf1, 0xa2, 0x4a, 0x19, 0x87, 0xd4, 0x3c, 0x6f  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+		0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a  },
+	{	0x00, 0x7f, 0xfe, 0x81, 0xfc, 0x83, 0x02, 0x7d,
+		0xf8, 0x87, 0x06, 0x79, 0x04, 0x7b, 0xfa, 0x85  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+		0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+		0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+		0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+		0x0e, 0xe6, 0xc3, 0x2b, 0x89, 0x61, 0x44, 0xac  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x82, 0x04, 0x86, 0x08, 0x8a, 0x0c, 0x8e,
+		0x10, 0x92, 0x14, 0x96, 0x18, 0x9a, 0x1c, 0x9e  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+		0x0e, 0xe6, 0xc3, 0x2b, 0x89, 0x61, 0x44, 0xac  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x83, 0x06, 0x85, 0x0c, 0x8f, 0x0a, 0x89,
+		0x18, 0x9b, 0x1e, 0x9d, 0x14, 0x97, 0x12, 0x91  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+		0x29, 0xc1, 0xe4, 0x0c, 0xb3, 0x5b, 0x7e, 0x96  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x84, 0x08, 0x8c, 0x10, 0x94, 0x18, 0x9c,
+		0x20, 0xa4, 0x28, 0xac, 0x30, 0xb4, 0x38, 0xbc  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+		0x29, 0xc1, 0xe4, 0x0c, 0xb3, 0x5b, 0x7e, 0x96  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x85, 0x0a, 0x8f, 0x14, 0x91, 0x1e, 0x9b,
+		0x28, 0xad, 0x22, 0xa7, 0x3c, 0xb9, 0x36, 0xb3  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+		0x34, 0xdc, 0xf9, 0x11, 0xae, 0x46, 0x63, 0x8b  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x86, 0x0c, 0x8a, 0x18, 0x9e, 0x14, 0x92,
+		0x30, 0xb6, 0x3c, 0xba, 0x28, 0xae, 0x24, 0xa2  },
+	{	0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+		0x34, 0xdc, 0xf9, 0x11, 0xae, 0x46, 0x63, 0x8b  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x87, 0x0e, 0x89, 0x1c, 0x9b, 0x12, 0x95,
+		0x38, 0xbf, 0x36, 0xb1, 0x24, 0xa3, 0x2a, 0xad  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+		0x67, 0x8f, 0xb7, 0x5f, 0xda, 0x32, 0x0a, 0xe2  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x88, 0x10, 0x98, 0x20, 0xa8, 0x30, 0xb8,
+		0x40, 0xc8, 0x50, 0xd8, 0x60, 0xe8, 0x70, 0xf8  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+		0x67, 0x8f, 0xb7, 0x5f, 0xda, 0x32, 0x0a, 0xe2  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x89, 0x12, 0x9b, 0x24, 0xad, 0x36, 0xbf,
+		0x48, 0xc1, 0x5a, 0xd3, 0x6c, 0xe5, 0x7e, 0xf7  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+		0x7a, 0x92, 0xaa, 0x42, 0xc7, 0x2f, 0x17, 0xff  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x8a, 0x14, 0x9e, 0x28, 0xa2, 0x3c, 0xb6,
+		0x50, 0xda, 0x44, 0xce, 0x78, 0xf2, 0x6c, 0xe6  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+		0x7a, 0x92, 0xaa, 0x42, 0xc7, 0x2f, 0x17, 0xff  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x8b, 0x16, 0x9d, 0x2c, 0xa7, 0x3a, 0xb1,
+		0x58, 0xd3, 0x4e, 0xc5, 0x74, 0xff, 0x62, 0xe9  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+		0x5d, 0xb5, 0x8d, 0x65, 0xfd, 0x15, 0x2d, 0xc5  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x8c, 0x18, 0x94, 0x30, 0xbc, 0x28, 0xa4,
+		0x60, 0xec, 0x78, 0xf4, 0x50, 0xdc, 0x48, 0xc4  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+		0x5d, 0xb5, 0x8d, 0x65, 0xfd, 0x15, 0x2d, 0xc5  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x8d, 0x1a, 0x97, 0x34, 0xb9, 0x2e, 0xa3,
+		0x68, 0xe5, 0x72, 0xff, 0x5c, 0xd1, 0x46, 0xcb  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+		0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x8e, 0x1c, 0x92, 0x38, 0xb6, 0x24, 0xaa,
+		0x70, 0xfe, 0x6c, 0xe2, 0x48, 0xc6, 0x54, 0xda  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+		0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x8f, 0x1e, 0x91, 0x3c, 0xb3, 0x22, 0xad,
+		0x78, 0xf7, 0x66, 0xe9, 0x44, 0xcb, 0x5a, 0xd5  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+		0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+		0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x91, 0x22, 0xb3, 0x44, 0xd5, 0x66, 0xf7,
+		0x88, 0x19, 0xaa, 0x3b, 0xcc, 0x5d, 0xee, 0x7f  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+		0xe6, 0x13, 0x11, 0xe4, 0x15, 0xe0, 0xe2, 0x17  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x92, 0x24, 0xb6, 0x48, 0xda, 0x6c, 0xfe,
+		0x90, 0x02, 0xb4, 0x26, 0xd8, 0x4a, 0xfc, 0x6e  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+		0xe6, 0x13, 0x11, 0xe4, 0x15, 0xe0, 0xe2, 0x17  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x93, 0x26, 0xb5, 0x4c, 0xdf, 0x6a, 0xf9,
+		0x98, 0x0b, 0xbe, 0x2d, 0xd4, 0x47, 0xf2, 0x61  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+		0xc1, 0x34, 0x36, 0xc3, 0x2f, 0xda, 0xd8, 0x2d  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x94, 0x28, 0xbc, 0x50, 0xc4, 0x78, 0xec,
+		0xa0, 0x34, 0x88, 0x1c, 0xf0, 0x64, 0xd8, 0x4c  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+		0xc1, 0x34, 0x36, 0xc3, 0x2f, 0xda, 0xd8, 0x2d  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x95, 0x2a, 0xbf, 0x54, 0xc1, 0x7e, 0xeb,
+		0xa8, 0x3d, 0x82, 0x17, 0xfc, 0x69, 0xd6, 0x43  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+		0xdc, 0x29, 0x2b, 0xde, 0x32, 0xc7, 0xc5, 0x30  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x96, 0x2c, 0xba, 0x58, 0xce, 0x74, 0xe2,
+		0xb0, 0x26, 0x9c, 0x0a, 0xe8, 0x7e, 0xc4, 0x52  },
+	{	0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+		0xdc, 0x29, 0x2b, 0xde, 0x32, 0xc7, 0xc5, 0x30  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x97, 0x2e, 0xb9, 0x5c, 0xcb, 0x72, 0xe5,
+		0xb8, 0x2f, 0x96, 0x01, 0xe4, 0x73, 0xca, 0x5d  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+		0x8f, 0x7a, 0x65, 0x90, 0x46, 0xb3, 0xac, 0x59  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x98, 0x30, 0xa8, 0x60, 0xf8, 0x50, 0xc8,
+		0xc0, 0x58, 0xf0, 0x68, 0xa0, 0x38, 0x90, 0x08  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+		0x8f, 0x7a, 0x65, 0x90, 0x46, 0xb3, 0xac, 0x59  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x99, 0x32, 0xab, 0x64, 0xfd, 0x56, 0xcf,
+		0xc8, 0x51, 0xfa, 0x63, 0xac, 0x35, 0x9e, 0x07  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+		0x92, 0x67, 0x78, 0x8d, 0x5b, 0xae, 0xb1, 0x44  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x9a, 0x34, 0xae, 0x68, 0xf2, 0x5c, 0xc6,
+		0xd0, 0x4a, 0xe4, 0x7e, 0xb8, 0x22, 0x8c, 0x16  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+		0x92, 0x67, 0x78, 0x8d, 0x5b, 0xae, 0xb1, 0x44  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x9b, 0x36, 0xad, 0x6c, 0xf7, 0x5a, 0xc1,
+		0xd8, 0x43, 0xee, 0x75, 0xb4, 0x2f, 0x82, 0x19  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+		0xb5, 0x40, 0x5f, 0xaa, 0x61, 0x94, 0x8b, 0x7e  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+		0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+		0xb5, 0x40, 0x5f, 0xaa, 0x61, 0x94, 0x8b, 0x7e  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x9d, 0x3a, 0xa7, 0x74, 0xe9, 0x4e, 0xd3,
+		0xe8, 0x75, 0xd2, 0x4f, 0x9c, 0x01, 0xa6, 0x3b  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+		0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x9e, 0x3c, 0xa2, 0x78, 0xe6, 0x44, 0xda,
+		0xf0, 0x6e, 0xcc, 0x52, 0x88, 0x16, 0xb4, 0x2a  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+		0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53  },
+	{	0x00, 0x9f, 0x3e, 0xa1, 0x7c, 0xe3, 0x42, 0xdd,
+		0xf8, 0x67, 0xc6, 0x59, 0x84, 0x1b, 0xba, 0x25  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+		0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+		0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa1, 0x42, 0xe3, 0x84, 0x25, 0xc6, 0x67,
+		0x08, 0xa9, 0x4a, 0xeb, 0x8c, 0x2d, 0xce, 0x6f  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+		0xc3, 0x11, 0x7a, 0xa8, 0xac, 0x7e, 0x15, 0xc7  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa2, 0x44, 0xe6, 0x88, 0x2a, 0xcc, 0x6e,
+		0x10, 0xb2, 0x54, 0xf6, 0x98, 0x3a, 0xdc, 0x7e  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+		0xc3, 0x11, 0x7a, 0xa8, 0xac, 0x7e, 0x15, 0xc7  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa3, 0x46, 0xe5, 0x8c, 0x2f, 0xca, 0x69,
+		0x18, 0xbb, 0x5e, 0xfd, 0x94, 0x37, 0xd2, 0x71  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+		0xe4, 0x36, 0x5d, 0x8f, 0x96, 0x44, 0x2f, 0xfd  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa4, 0x48, 0xec, 0x90, 0x34, 0xd8, 0x7c,
+		0x20, 0x84, 0x68, 0xcc, 0xb0, 0x14, 0xf8, 0x5c  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+		0xe4, 0x36, 0x5d, 0x8f, 0x96, 0x44, 0x2f, 0xfd  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa5, 0x4a, 0xef, 0x94, 0x31, 0xde, 0x7b,
+		0x28, 0x8d, 0x62, 0xc7, 0xbc, 0x19, 0xf6, 0x53  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+		0xf9, 0x2b, 0x40, 0x92, 0x8b, 0x59, 0x32, 0xe0  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+		0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42  },
+	{	0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+		0xf9, 0x2b, 0x40, 0x92, 0x8b, 0x59, 0x32, 0xe0  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa7, 0x4e, 0xe9, 0x9c, 0x3b, 0xd2, 0x75,
+		0x38, 0x9f, 0x76, 0xd1, 0xa4, 0x03, 0xea, 0x4d  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+		0xaa, 0x78, 0x0e, 0xdc, 0xff, 0x2d, 0x5b, 0x89  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa8, 0x50, 0xf8, 0xa0, 0x08, 0xf0, 0x58,
+		0x40, 0xe8, 0x10, 0xb8, 0xe0, 0x48, 0xb0, 0x18  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+		0xaa, 0x78, 0x0e, 0xdc, 0xff, 0x2d, 0x5b, 0x89  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xa9, 0x52, 0xfb, 0xa4, 0x0d, 0xf6, 0x5f,
+		0x48, 0xe1, 0x1a, 0xb3, 0xec, 0x45, 0xbe, 0x17  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+		0xb7, 0x65, 0x13, 0xc1, 0xe2, 0x30, 0x46, 0x94  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xaa, 0x54, 0xfe, 0xa8, 0x02, 0xfc, 0x56,
+		0x50, 0xfa, 0x04, 0xae, 0xf8, 0x52, 0xac, 0x06  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+		0xb7, 0x65, 0x13, 0xc1, 0xe2, 0x30, 0x46, 0x94  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xab, 0x56, 0xfd, 0xac, 0x07, 0xfa, 0x51,
+		0x58, 0xf3, 0x0e, 0xa5, 0xf4, 0x5f, 0xa2, 0x09  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+		0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xac, 0x58, 0xf4, 0xb0, 0x1c, 0xe8, 0x44,
+		0x60, 0xcc, 0x38, 0x94, 0xd0, 0x7c, 0x88, 0x24  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+		0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xad, 0x5a, 0xf7, 0xb4, 0x19, 0xee, 0x43,
+		0x68, 0xc5, 0x32, 0x9f, 0xdc, 0x71, 0x86, 0x2b  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+		0x8d, 0x5f, 0x29, 0xfb, 0xc5, 0x17, 0x61, 0xb3  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xae, 0x5c, 0xf2, 0xb8, 0x16, 0xe4, 0x4a,
+		0x70, 0xde, 0x2c, 0x82, 0xc8, 0x66, 0x94, 0x3a  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+		0x8d, 0x5f, 0x29, 0xfb, 0xc5, 0x17, 0x61, 0xb3  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xaf, 0x5e, 0xf1, 0xbc, 0x13, 0xe2, 0x4d,
+		0x78, 0xd7, 0x26, 0x89, 0xc4, 0x6b, 0x9a, 0x35  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+		0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+		0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb1, 0x62, 0xd3, 0xc4, 0x75, 0xa6, 0x17,
+		0x88, 0x39, 0xea, 0x5b, 0x4c, 0xfd, 0x2e, 0x9f  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+		0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb2, 0x64, 0xd6, 0xc8, 0x7a, 0xac, 0x1e,
+		0x90, 0x22, 0xf4, 0x46, 0x58, 0xea, 0x3c, 0x8e  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+		0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb3, 0x66, 0xd5, 0xcc, 0x7f, 0xaa, 0x19,
+		0x98, 0x2b, 0xfe, 0x4d, 0x54, 0xe7, 0x32, 0x81  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+		0x0c, 0xc3, 0x8f, 0x40, 0x0a, 0xc5, 0x89, 0x46  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb4, 0x68, 0xdc, 0xd0, 0x64, 0xb8, 0x0c,
+		0xa0, 0x14, 0xc8, 0x7c, 0x70, 0xc4, 0x18, 0xac  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+		0x0c, 0xc3, 0x8f, 0x40, 0x0a, 0xc5, 0x89, 0x46  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb5, 0x6a, 0xdf, 0xd4, 0x61, 0xbe, 0x0b,
+		0xa8, 0x1d, 0xc2, 0x77, 0x7c, 0xc9, 0x16, 0xa3  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+		0x11, 0xde, 0x92, 0x5d, 0x17, 0xd8, 0x94, 0x5b  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb6, 0x6c, 0xda, 0xd8, 0x6e, 0xb4, 0x02,
+		0xb0, 0x06, 0xdc, 0x6a, 0x68, 0xde, 0x04, 0xb2  },
+	{	0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+		0x11, 0xde, 0x92, 0x5d, 0x17, 0xd8, 0x94, 0x5b  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb7, 0x6e, 0xd9, 0xdc, 0x6b, 0xb2, 0x05,
+		0xb8, 0x0f, 0xd6, 0x61, 0x64, 0xd3, 0x0a, 0xbd  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+		0x42, 0x8d, 0xdc, 0x13, 0x63, 0xac, 0xfd, 0x32  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb8, 0x70, 0xc8, 0xe0, 0x58, 0x90, 0x28,
+		0xc0, 0x78, 0xb0, 0x08, 0x20, 0x98, 0x50, 0xe8  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+		0x42, 0x8d, 0xdc, 0x13, 0x63, 0xac, 0xfd, 0x32  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xb9, 0x72, 0xcb, 0xe4, 0x5d, 0x96, 0x2f,
+		0xc8, 0x71, 0xba, 0x03, 0x2c, 0x95, 0x5e, 0xe7  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+		0x5f, 0x90, 0xc1, 0x0e, 0x7e, 0xb1, 0xe0, 0x2f  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xba, 0x74, 0xce, 0xe8, 0x52, 0x9c, 0x26,
+		0xd0, 0x6a, 0xa4, 0x1e, 0x38, 0x82, 0x4c, 0xf6  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+		0x5f, 0x90, 0xc1, 0x0e, 0x7e, 0xb1, 0xe0, 0x2f  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+		0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+		0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xbc, 0x78, 0xc4, 0xf0, 0x4c, 0x88, 0x34,
+		0xe0, 0x5c, 0x98, 0x24, 0x10, 0xac, 0x68, 0xd4  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+		0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xbd, 0x7a, 0xc7, 0xf4, 0x49, 0x8e, 0x33,
+		0xe8, 0x55, 0x92, 0x2f, 0x1c, 0xa1, 0x66, 0xdb  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+		0x65, 0xaa, 0xfb, 0x34, 0x59, 0x96, 0xc7, 0x08  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xbe, 0x7c, 0xc2, 0xf8, 0x46, 0x84, 0x3a,
+		0xf0, 0x4e, 0x8c, 0x32, 0x08, 0xb6, 0x74, 0xca  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+		0x65, 0xaa, 0xfb, 0x34, 0x59, 0x96, 0xc7, 0x08  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+		0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e  },
+	{	0x00, 0xbf, 0x7e, 0xc1, 0xfc, 0x43, 0x82, 0x3d,
+		0xf8, 0x47, 0x86, 0x39, 0x04, 0xbb, 0x7a, 0xc5  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+		0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+		0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc1, 0x82, 0x43, 0x04, 0xc5, 0x86, 0x47,
+		0x08, 0xc9, 0x8a, 0x4b, 0x0c, 0xcd, 0x8e, 0x4f  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+		0x89, 0x15, 0xac, 0x30, 0xc3, 0x5f, 0xe6, 0x7a  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc2, 0x84, 0x46, 0x08, 0xca, 0x8c, 0x4e,
+		0x10, 0xd2, 0x94, 0x56, 0x18, 0xda, 0x9c, 0x5e  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+		0x89, 0x15, 0xac, 0x30, 0xc3, 0x5f, 0xe6, 0x7a  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc3, 0x86, 0x45, 0x0c, 0xcf, 0x8a, 0x49,
+		0x18, 0xdb, 0x9e, 0x5d, 0x14, 0xd7, 0x92, 0x51  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+		0xae, 0x32, 0x8b, 0x17, 0xf9, 0x65, 0xdc, 0x40  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc4, 0x88, 0x4c, 0x10, 0xd4, 0x98, 0x5c,
+		0x20, 0xe4, 0xa8, 0x6c, 0x30, 0xf4, 0xb8, 0x7c  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+		0xae, 0x32, 0x8b, 0x17, 0xf9, 0x65, 0xdc, 0x40  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc5, 0x8a, 0x4f, 0x14, 0xd1, 0x9e, 0x5b,
+		0x28, 0xed, 0xa2, 0x67, 0x3c, 0xf9, 0xb6, 0x73  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+		0xb3, 0x2f, 0x96, 0x0a, 0xe4, 0x78, 0xc1, 0x5d  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc6, 0x8c, 0x4a, 0x18, 0xde, 0x94, 0x52,
+		0x30, 0xf6, 0xbc, 0x7a, 0x28, 0xee, 0xa4, 0x62  },
+	{	0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+		0xb3, 0x2f, 0x96, 0x0a, 0xe4, 0x78, 0xc1, 0x5d  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc7, 0x8e, 0x49, 0x1c, 0xdb, 0x92, 0x55,
+		0x38, 0xff, 0xb6, 0x71, 0x24, 0xe3, 0xaa, 0x6d  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+		0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc8, 0x90, 0x58, 0x20, 0xe8, 0xb0, 0x78,
+		0x40, 0x88, 0xd0, 0x18, 0x60, 0xa8, 0xf0, 0x38  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+		0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xc9, 0x92, 0x5b, 0x24, 0xed, 0xb6, 0x7f,
+		0x48, 0x81, 0xda, 0x13, 0x6c, 0xa5, 0xfe, 0x37  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+		0xfd, 0x61, 0xc5, 0x59, 0x8d, 0x11, 0xb5, 0x29  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xca, 0x94, 0x5e, 0x28, 0xe2, 0xbc, 0x76,
+		0x50, 0x9a, 0xc4, 0x0e, 0x78, 0xb2, 0xec, 0x26  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+		0xfd, 0x61, 0xc5, 0x59, 0x8d, 0x11, 0xb5, 0x29  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xcb, 0x96, 0x5d, 0x2c, 0xe7, 0xba, 0x71,
+		0x58, 0x93, 0xce, 0x05, 0x74, 0xbf, 0xe2, 0x29  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+		0xda, 0x46, 0xe2, 0x7e, 0xb7, 0x2b, 0x8f, 0x13  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xcc, 0x98, 0x54, 0x30, 0xfc, 0xa8, 0x64,
+		0x60, 0xac, 0xf8, 0x34, 0x50, 0x9c, 0xc8, 0x04  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+		0xda, 0x46, 0xe2, 0x7e, 0xb7, 0x2b, 0x8f, 0x13  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xcd, 0x9a, 0x57, 0x34, 0xf9, 0xae, 0x63,
+		0x68, 0xa5, 0xf2, 0x3f, 0x5c, 0x91, 0xc6, 0x0b  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+		0xc7, 0x5b, 0xff, 0x63, 0xaa, 0x36, 0x92, 0x0e  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xce, 0x9c, 0x52, 0x38, 0xf6, 0xa4, 0x6a,
+		0x70, 0xbe, 0xec, 0x22, 0x48, 0x86, 0xd4, 0x1a  },
+	{	0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+		0xc7, 0x5b, 0xff, 0x63, 0xaa, 0x36, 0x92, 0x0e  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+		0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+		0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+		0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd1, 0xa2, 0x73, 0x44, 0x95, 0xe6, 0x37,
+		0x88, 0x59, 0x2a, 0xfb, 0xcc, 0x1d, 0x6e, 0xbf  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+		0x61, 0xe0, 0x7e, 0xff, 0x5f, 0xde, 0x40, 0xc1  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+		0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+		0x61, 0xe0, 0x7e, 0xff, 0x5f, 0xde, 0x40, 0xc1  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd3, 0xa6, 0x75, 0x4c, 0x9f, 0xea, 0x39,
+		0x98, 0x4b, 0x3e, 0xed, 0xd4, 0x07, 0x72, 0xa1  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+		0x46, 0xc7, 0x59, 0xd8, 0x65, 0xe4, 0x7a, 0xfb  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd4, 0xa8, 0x7c, 0x50, 0x84, 0xf8, 0x2c,
+		0xa0, 0x74, 0x08, 0xdc, 0xf0, 0x24, 0x58, 0x8c  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+		0x46, 0xc7, 0x59, 0xd8, 0x65, 0xe4, 0x7a, 0xfb  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd5, 0xaa, 0x7f, 0x54, 0x81, 0xfe, 0x2b,
+		0xa8, 0x7d, 0x02, 0xd7, 0xfc, 0x29, 0x56, 0x83  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+		0x5b, 0xda, 0x44, 0xc5, 0x78, 0xf9, 0x67, 0xe6  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd6, 0xac, 0x7a, 0x58, 0x8e, 0xf4, 0x22,
+		0xb0, 0x66, 0x1c, 0xca, 0xe8, 0x3e, 0x44, 0x92  },
+	{	0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+		0x5b, 0xda, 0x44, 0xc5, 0x78, 0xf9, 0x67, 0xe6  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd7, 0xae, 0x79, 0x5c, 0x8b, 0xf2, 0x25,
+		0xb8, 0x6f, 0x16, 0xc1, 0xe4, 0x33, 0x4a, 0x9d  },
+	{	0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+		0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd8, 0xb0, 0x68, 0x60, 0xb8, 0xd0, 0x08,
+		0xc0, 0x18, 0x70, 0xa8, 0xa0, 0x78, 0x10, 0xc8  },
+	{	0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+		0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xd9, 0xb2, 0x6b, 0x64, 0xbd, 0xd6, 0x0f,
+		0xc8, 0x11, 0x7a, 0xa3, 0xac, 0x75, 0x1e, 0xc7  },
+	{	0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+		0x15, 0x94, 0x17, 0x96, 0x11, 0x90, 0x13, 0x92  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xda, 0xb4, 0x6e, 0x68, 0xb2, 0xdc, 0x06,
+		0xd0, 0x0a, 0x64, 0xbe, 0xb8, 0x62, 0x0c, 0xd6  },
+	{	0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+		0x15, 0x94, 0x17, 0x96, 0x11, 0x90, 0x13, 0x92  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xdb, 0xb6, 0x6d, 0x6c, 0xb7, 0xda, 0x01,
+		0xd8, 0x03, 0x6e, 0xb5, 0xb4, 0x6f, 0x02, 0xd9  },
+	{	0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+		0x32, 0xb3, 0x30, 0xb1, 0x2b, 0xaa, 0x29, 0xa8  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xdc, 0xb8, 0x64, 0x70, 0xac, 0xc8, 0x14,
+		0xe0, 0x3c, 0x58, 0x84, 0x90, 0x4c, 0x28, 0xf4  },
+	{	0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+		0x32, 0xb3, 0x30, 0xb1, 0x2b, 0xaa, 0x29, 0xa8  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xdd, 0xba, 0x67, 0x74, 0xa9, 0xce, 0x13,
+		0xe8, 0x35, 0x52, 0x8f, 0x9c, 0x41, 0x26, 0xfb  },
+	{	0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+		0x2f, 0xae, 0x2d, 0xac, 0x36, 0xb7, 0x34, 0xb5  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xde, 0xbc, 0x62, 0x78, 0xa6, 0xc4, 0x1a,
+		0xf0, 0x2e, 0x4c, 0x92, 0x88, 0x56, 0x34, 0xea  },
+	{	0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+		0x2f, 0xae, 0x2d, 0xac, 0x36, 0xb7, 0x34, 0xb5  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74  },
+	{	0x00, 0xdf, 0xbe, 0x61, 0x7c, 0xa3, 0xc2, 0x1d,
+		0xf8, 0x27, 0x46, 0x99, 0x84, 0x5b, 0x3a, 0xe5  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+		0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+		0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe1, 0xc2, 0x23, 0x84, 0x65, 0x46, 0xa7,
+		0x08, 0xe9, 0xca, 0x2b, 0x8c, 0x6d, 0x4e, 0xaf  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+		0x44, 0xe2, 0x15, 0xb3, 0xe6, 0x40, 0xb7, 0x11  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe2, 0xc4, 0x26, 0x88, 0x6a, 0x4c, 0xae,
+		0x10, 0xf2, 0xd4, 0x36, 0x98, 0x7a, 0x5c, 0xbe  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+		0x44, 0xe2, 0x15, 0xb3, 0xe6, 0x40, 0xb7, 0x11  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe3, 0xc6, 0x25, 0x8c, 0x6f, 0x4a, 0xa9,
+		0x18, 0xfb, 0xde, 0x3d, 0x94, 0x77, 0x52, 0xb1  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+		0x63, 0xc5, 0x32, 0x94, 0xdc, 0x7a, 0x8d, 0x2b  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe4, 0xc8, 0x2c, 0x90, 0x74, 0x58, 0xbc,
+		0x20, 0xc4, 0xe8, 0x0c, 0xb0, 0x54, 0x78, 0x9c  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+		0x63, 0xc5, 0x32, 0x94, 0xdc, 0x7a, 0x8d, 0x2b  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe5, 0xca, 0x2f, 0x94, 0x71, 0x5e, 0xbb,
+		0x28, 0xcd, 0xe2, 0x07, 0xbc, 0x59, 0x76, 0x93  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+		0x7e, 0xd8, 0x2f, 0x89, 0xc1, 0x67, 0x90, 0x36  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe6, 0xcc, 0x2a, 0x98, 0x7e, 0x54, 0xb2,
+		0x30, 0xd6, 0xfc, 0x1a, 0xa8, 0x4e, 0x64, 0x82  },
+	{	0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+		0x7e, 0xd8, 0x2f, 0x89, 0xc1, 0x67, 0x90, 0x36  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe7, 0xce, 0x29, 0x9c, 0x7b, 0x52, 0xb5,
+		0x38, 0xdf, 0xf6, 0x11, 0xa4, 0x43, 0x6a, 0x8d  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+		0x2d, 0x8b, 0x61, 0xc7, 0xb5, 0x13, 0xf9, 0x5f  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+		0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+		0x2d, 0x8b, 0x61, 0xc7, 0xb5, 0x13, 0xf9, 0x5f  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xe9, 0xd2, 0x3b, 0xa4, 0x4d, 0x76, 0x9f,
+		0x48, 0xa1, 0x9a, 0x73, 0xec, 0x05, 0x3e, 0xd7  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+		0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xea, 0xd4, 0x3e, 0xa8, 0x42, 0x7c, 0x96,
+		0x50, 0xba, 0x84, 0x6e, 0xf8, 0x12, 0x2c, 0xc6  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+		0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xeb, 0xd6, 0x3d, 0xac, 0x47, 0x7a, 0x91,
+		0x58, 0xb3, 0x8e, 0x65, 0xf4, 0x1f, 0x22, 0xc9  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+		0x17, 0xb1, 0x5b, 0xfd, 0x92, 0x34, 0xde, 0x78  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xec, 0xd8, 0x34, 0xb0, 0x5c, 0x68, 0x84,
+		0x60, 0x8c, 0xb8, 0x54, 0xd0, 0x3c, 0x08, 0xe4  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+		0x17, 0xb1, 0x5b, 0xfd, 0x92, 0x34, 0xde, 0x78  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xed, 0xda, 0x37, 0xb4, 0x59, 0x6e, 0x83,
+		0x68, 0x85, 0xb2, 0x5f, 0xdc, 0x31, 0x06, 0xeb  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+		0x0a, 0xac, 0x46, 0xe0, 0x8f, 0x29, 0xc3, 0x65  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xee, 0xdc, 0x32, 0xb8, 0x56, 0x64, 0x8a,
+		0x70, 0x9e, 0xac, 0x42, 0xc8, 0x26, 0x14, 0xfa  },
+	{	0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+		0x0a, 0xac, 0x46, 0xe0, 0x8f, 0x29, 0xc3, 0x65  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xef, 0xde, 0x31, 0xbc, 0x53, 0x62, 0x8d,
+		0x78, 0x97, 0xa6, 0x49, 0xc4, 0x2b, 0x1a, 0xf5  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+		0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7  },
+	{	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+		0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7  },
+	{	0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+		0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf1, 0xe2, 0x13, 0xc4, 0x35, 0x26, 0xd7,
+		0x88, 0x79, 0x6a, 0x9b, 0x4c, 0xbd, 0xae, 0x5f  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+		0xac, 0x17, 0xc7, 0x7c, 0x7a, 0xc1, 0x11, 0xaa  },
+	{	0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+		0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf2, 0xe4, 0x16, 0xc8, 0x3a, 0x2c, 0xde,
+		0x90, 0x62, 0x74, 0x86, 0x58, 0xaa, 0xbc, 0x4e  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+		0xac, 0x17, 0xc7, 0x7c, 0x7a, 0xc1, 0x11, 0xaa  },
+	{	0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+		0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf3, 0xe6, 0x15, 0xcc, 0x3f, 0x2a, 0xd9,
+		0x98, 0x6b, 0x7e, 0x8d, 0x54, 0xa7, 0xb2, 0x41  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+		0x8b, 0x30, 0xe0, 0x5b, 0x40, 0xfb, 0x2b, 0x90  },
+	{	0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+		0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf4, 0xe8, 0x1c, 0xd0, 0x24, 0x38, 0xcc,
+		0xa0, 0x54, 0x48, 0xbc, 0x70, 0x84, 0x98, 0x6c  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+		0x8b, 0x30, 0xe0, 0x5b, 0x40, 0xfb, 0x2b, 0x90  },
+	{	0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+		0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+		0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+		0x96, 0x2d, 0xfd, 0x46, 0x5d, 0xe6, 0x36, 0x8d  },
+	{	0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+		0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf6, 0xec, 0x1a, 0xd8, 0x2e, 0x34, 0xc2,
+		0xb0, 0x46, 0x5c, 0xaa, 0x68, 0x9e, 0x84, 0x72  },
+	{	0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+		0x96, 0x2d, 0xfd, 0x46, 0x5d, 0xe6, 0x36, 0x8d  },
+	{	0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+		0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf7, 0xee, 0x19, 0xdc, 0x2b, 0x32, 0xc5,
+		0xb8, 0x4f, 0x56, 0xa1, 0x64, 0x93, 0x8a, 0x7d  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+		0xc5, 0x7e, 0xb3, 0x08, 0x29, 0x92, 0x5f, 0xe4  },
+	{	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+		0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf8, 0xf0, 0x08, 0xe0, 0x18, 0x10, 0xe8,
+		0xc0, 0x38, 0x30, 0xc8, 0x20, 0xd8, 0xd0, 0x28  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+		0xc5, 0x7e, 0xb3, 0x08, 0x29, 0x92, 0x5f, 0xe4  },
+	{	0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+		0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xf9, 0xf2, 0x0b, 0xe4, 0x1d, 0x16, 0xef,
+		0xc8, 0x31, 0x3a, 0xc3, 0x2c, 0xd5, 0xde, 0x27  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+		0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9  },
+	{	0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+		0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xfa, 0xf4, 0x0e, 0xe8, 0x12, 0x1c, 0xe6,
+		0xd0, 0x2a, 0x24, 0xde, 0x38, 0xc2, 0xcc, 0x36  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+		0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9  },
+	{	0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+		0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xfb, 0xf6, 0x0d, 0xec, 0x17, 0x1a, 0xe1,
+		0xd8, 0x23, 0x2e, 0xd5, 0x34, 0xcf, 0xc2, 0x39  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+		0xff, 0x44, 0x89, 0x32, 0x0e, 0xb5, 0x78, 0xc3  },
+	{	0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+		0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xfc, 0xf8, 0x04, 0xf0, 0x0c, 0x08, 0xf4,
+		0xe0, 0x1c, 0x18, 0xe4, 0x10, 0xec, 0xe8, 0x14  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+		0xff, 0x44, 0x89, 0x32, 0x0e, 0xb5, 0x78, 0xc3  },
+	{	0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+		0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xfd, 0xfa, 0x07, 0xf4, 0x09, 0x0e, 0xf3,
+		0xe8, 0x15, 0x12, 0xef, 0x1c, 0xe1, 0xe6, 0x1b  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+		0xe2, 0x59, 0x94, 0x2f, 0x13, 0xa8, 0x65, 0xde  },
+	{	0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+		0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xfe, 0xfc, 0x02, 0xf8, 0x06, 0x04, 0xfa,
+		0xf0, 0x0e, 0x0c, 0xf2, 0x08, 0xf6, 0xf4, 0x0a  },
+	{	0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+		0xe2, 0x59, 0x94, 0x2f, 0x13, 0xa8, 0x65, 0xde  },
+	{	0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+		0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50  },
+	{	0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+		0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69  },
+	{	0x00, 0xff, 0xfe, 0x01, 0xfc, 0x03, 0x02, 0xfd,
+		0xf8, 0x07, 0x06, 0xf9, 0x04, 0xfb, 0xfa, 0x05  }
+};
+/* END CSTYLED */
+#endif // ENDIANNESS
+#endif /* defined(__powerpc__) */

diff --git a/zfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h b/zfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h
new file mode 100644
index 0000000..baf7e7a
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h

@@ -0,0 +1,686 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2019 Romain Dolbeau. All rights reserved.
+ *           <romain.dolbeau@european-processor-initiative.eu>
+ */
+
+#include <sys/types.h>
+#include <sys/simd.h>
+
+#define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define	VR0_(REG, ...) "%[w"#REG"]"
+#define	VR1_(_1, REG, ...) "%[w"#REG"]"
+#define	VR2_(_1, _2, REG, ...) "%[w"#REG"]"
+#define	VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
+#define	VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
+#define	VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
+#define	VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
+#define	VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
+
+/*
+ * Here we need registers not used otherwise.
+ * They will be used in unused ASM for the case
+ * with more registers than required... but GCC
+ * will still need to make sure the constraints
+ * are correct, and duplicate constraints are illegal
+ * ... and we use the "register" number as a name
+ */
+
+#define	VR0(r...) VR0_(r)
+#define	VR1(r...) VR1_(r)
+#define	VR2(r...) VR2_(r, 36)
+#define	VR3(r...) VR3_(r, 36, 35)
+#define	VR4(r...) VR4_(r, 36, 35, 34, 33)
+#define	VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
+#define	VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
+#define	VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define	VR(X) "%[w"#X"]"
+
+#define	RVR0_(REG, ...) [w##REG] "v" (w##REG)
+#define	RVR1_(_1, REG, ...) [w##REG] "v" (w##REG)
+#define	RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG)
+#define	RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG)
+#define	RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG)
+#define	RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG)
+#define	RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG)
+#define	RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG)
+
+#define	RVR0(r...) RVR0_(r)
+#define	RVR1(r...) RVR1_(r)
+#define	RVR2(r...) RVR2_(r, 36)
+#define	RVR3(r...) RVR3_(r, 36, 35)
+#define	RVR4(r...) RVR4_(r, 36, 35, 34, 33)
+#define	RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
+#define	RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
+#define	RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define	RVR(X) [w##X] "v" (w##X)
+
+#define	WVR0_(REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG)
+#define	WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG)
+
+#define	WVR0(r...) WVR0_(r)
+#define	WVR1(r...) WVR1_(r)
+#define	WVR2(r...) WVR2_(r, 36)
+#define	WVR3(r...) WVR3_(r, 36, 35)
+#define	WVR4(r...) WVR4_(r, 36, 35, 34, 33)
+#define	WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
+#define	WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
+#define	WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define	WVR(X) [w##X] "=v" (w##X)
+
+#define	UVR0_(REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG)
+#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG)
+
+#define	UVR0(r...) UVR0_(r)
+#define	UVR1(r...) UVR1_(r)
+#define	UVR2(r...) UVR2_(r, 36)
+#define	UVR3(r...) UVR3_(r, 36, 35)
+#define	UVR4(r...) UVR4_(r, 36, 35, 34, 33)
+#define	UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
+#define	UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
+#define	UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define	UVR(X) [w##X] "+&v" (w##X)
+
+#define	R_01(REG1, REG2, ...) REG1, REG2
+#define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+#define	R_23(REG...) _R_23(REG, 1, 2, 3)
+
+#define	ZFS_ASM_BUG()	ASSERT(0)
+
+#define	OFFSET(ptr, val)	(((unsigned char *)(ptr))+val)
+
+extern const uint8_t gf_clmul_mod_lt[4*256][16];
+
+#define	ELEM_SIZE 16
+
+typedef struct v {
+	uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+#define	XOR_ACC(src, r...)					\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 8:							\
+		__asm__ __volatile__(				\
+		"lvx 21,0,%[SRC0]\n"				\
+		"lvx 20,0,%[SRC1]\n"				\
+		"lvx 19,0,%[SRC2]\n"				\
+		"lvx 18,0,%[SRC3]\n"				\
+		"vxor " VR0(r) "," VR0(r) ",21\n"		\
+		"vxor " VR1(r) "," VR1(r) ",20\n"		\
+		"vxor " VR2(r) "," VR2(r) ",19\n"		\
+		"vxor " VR3(r) "," VR3(r) ",18\n"		\
+		"lvx 21,0,%[SRC4]\n"				\
+		"lvx 20,0,%[SRC5]\n"				\
+		"lvx 19,0,%[SRC6]\n"				\
+		"lvx 18,0,%[SRC7]\n"				\
+		"vxor " VR4(r) "," VR4(r) ",21\n"		\
+		"vxor " VR5(r) "," VR5(r) ",20\n"		\
+		"vxor " VR6(r) "," VR6(r) ",19\n"		\
+		"vxor " VR7(r) "," VR7(r) ",18\n"		\
+		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r),	\
+			UVR4(r), UVR5(r), UVR6(r), UVR7(r)	\
+		:	[SRC0] "r" ((OFFSET(src, 0))),		\
+		[SRC1] "r" ((OFFSET(src, 16))),			\
+		[SRC2] "r" ((OFFSET(src, 32))),			\
+		[SRC3] "r" ((OFFSET(src, 48))),			\
+		[SRC4] "r" ((OFFSET(src, 64))),			\
+		[SRC5] "r" ((OFFSET(src, 80))),			\
+		[SRC6] "r" ((OFFSET(src, 96))),			\
+		[SRC7] "r" ((OFFSET(src, 112)))			\
+		:	"v18", "v19", "v20", "v21");		\
+		break;						\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"lvx 21,0,%[SRC0]\n"				\
+		"lvx 20,0,%[SRC1]\n"				\
+		"lvx 19,0,%[SRC2]\n"				\
+		"lvx 18,0,%[SRC3]\n"				\
+		"vxor " VR0(r) "," VR0(r) ",21\n"		\
+		"vxor " VR1(r) "," VR1(r) ",20\n"		\
+		"vxor " VR2(r) "," VR2(r) ",19\n"		\
+		"vxor " VR3(r) "," VR3(r) ",18\n"		\
+		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)	\
+		:	[SRC0] "r" ((OFFSET(src, 0))),		\
+		[SRC1] "r" ((OFFSET(src, 16))),			\
+		[SRC2] "r" ((OFFSET(src, 32))),			\
+		[SRC3] "r" ((OFFSET(src, 48)))			\
+		:	"v18", "v19", "v20", "v21");		\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+		"lvx 21,0,%[SRC0]\n"				\
+		"lvx 20,0,%[SRC1]\n"				\
+		"vxor " VR0(r) "," VR0(r) ",21\n"		\
+		"vxor " VR1(r) "," VR1(r) ",20\n"		\
+		:	UVR0(r), UVR1(r)			\
+		:	[SRC0] "r" ((OFFSET(src, 0))),		\
+		[SRC1] "r" ((OFFSET(src, 16)))			\
+		:	"v20", "v21");				\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	XOR(r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 8:							\
+		__asm__ __volatile__(				\
+		"vxor " VR4(r) "," VR4(r) "," VR0(r) "\n"	\
+		"vxor " VR5(r) "," VR5(r) "," VR1(r) "\n"	\
+		"vxor " VR6(r) "," VR6(r) "," VR2(r) "\n"	\
+		"vxor " VR7(r) "," VR7(r) "," VR3(r) "\n"	\
+		:	UVR4(r), UVR5(r), UVR6(r), UVR7(r)	\
+		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));	\
+		break;						\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"vxor " VR2(r) "," VR2(r) "," VR0(r) "\n"	\
+		"vxor " VR3(r) "," VR3(r) "," VR1(r) "\n"	\
+		:	UVR2(r), UVR3(r)			\
+		:	RVR0(r), RVR1(r));			\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	ZERO(r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 8:							\
+		__asm__ __volatile__(				\
+		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
+		"vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
+		"vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
+		"vxor " VR4(r) "," VR4(r) "," VR4(r) "\n"	\
+		"vxor " VR5(r) "," VR5(r) "," VR5(r) "\n"	\
+		"vxor " VR6(r) "," VR6(r) "," VR6(r) "\n"	\
+		"vxor " VR7(r) "," VR7(r) "," VR7(r) "\n"	\
+		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),	\
+			WVR4(r), WVR5(r), WVR6(r), WVR7(r));	\
+		break;						\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
+		"vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
+		"vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
+		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r));	\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
+		:	WVR0(r), WVR1(r));			\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	COPY(r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 8:							\
+		__asm__ __volatile__(				\
+		"vor " VR4(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vor " VR5(r) "," VR1(r) "," VR1(r) "\n"	\
+		"vor " VR6(r) "," VR2(r) "," VR2(r) "\n"	\
+		"vor " VR7(r) "," VR3(r) "," VR3(r) "\n"	\
+		:	WVR4(r), WVR5(r), WVR6(r), WVR7(r)	\
+		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));	\
+		break;						\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"vor " VR2(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vor " VR3(r) "," VR1(r) "," VR1(r) "\n"	\
+		:	WVR2(r), WVR3(r)			\
+		:	RVR0(r), RVR1(r));			\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	LOAD(src, r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 8:							\
+		__asm__ __volatile__(				\
+		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
+		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
+		"lvx " VR2(r) " ,0,%[SRC2]\n"			\
+		"lvx " VR3(r) " ,0,%[SRC3]\n"			\
+		"lvx " VR4(r) " ,0,%[SRC4]\n"			\
+		"lvx " VR5(r) " ,0,%[SRC5]\n"			\
+		"lvx " VR6(r) " ,0,%[SRC6]\n"			\
+		"lvx " VR7(r) " ,0,%[SRC7]\n"			\
+		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),	\
+			WVR4(r), WVR5(r), WVR6(r), WVR7(r)	\
+		:	[SRC0] "r" ((OFFSET(src, 0))),		\
+		[SRC1] "r" ((OFFSET(src, 16))),			\
+		[SRC2] "r" ((OFFSET(src, 32))),			\
+		[SRC3] "r" ((OFFSET(src, 48))),			\
+		[SRC4] "r" ((OFFSET(src, 64))),			\
+		[SRC5] "r" ((OFFSET(src, 80))),			\
+		[SRC6] "r" ((OFFSET(src, 96))),			\
+		[SRC7] "r" ((OFFSET(src, 112))));		\
+		break;						\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
+		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
+		"lvx " VR2(r) " ,0,%[SRC2]\n"			\
+		"lvx " VR3(r) " ,0,%[SRC3]\n"			\
+		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r)	\
+		:	[SRC0] "r" ((OFFSET(src, 0))),		\
+		[SRC1] "r" ((OFFSET(src, 16))),			\
+		[SRC2] "r" ((OFFSET(src, 32))),			\
+		[SRC3] "r" ((OFFSET(src, 48))));		\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
+		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
+		:	WVR0(r), WVR1(r)			\
+		:	[SRC0] "r" ((OFFSET(src, 0))),		\
+		[SRC1] "r" ((OFFSET(src, 16))));		\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	STORE(dst, r...)					\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 8:							\
+		__asm__ __volatile__(				\
+		"stvx " VR0(r) " ,0,%[DST0]\n"			\
+		"stvx " VR1(r) " ,0,%[DST1]\n"			\
+		"stvx " VR2(r) " ,0,%[DST2]\n"			\
+		"stvx " VR3(r) " ,0,%[DST3]\n"			\
+		"stvx " VR4(r) " ,0,%[DST4]\n"			\
+		"stvx " VR5(r) " ,0,%[DST5]\n"			\
+		"stvx " VR6(r) " ,0,%[DST6]\n"			\
+		"stvx " VR7(r) " ,0,%[DST7]\n"			\
+		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
+		[DST1] "r" ((OFFSET(dst, 16))),			\
+		[DST2] "r" ((OFFSET(dst, 32))),			\
+		[DST3] "r" ((OFFSET(dst, 48))),			\
+		[DST4] "r" ((OFFSET(dst, 64))),			\
+		[DST5] "r" ((OFFSET(dst, 80))),			\
+		[DST6] "r" ((OFFSET(dst, 96))),			\
+		[DST7] "r" ((OFFSET(dst, 112))),		\
+		RVR0(r), RVR1(r), RVR2(r), RVR3(r),		\
+		RVR4(r), RVR5(r), RVR6(r), RVR7(r)		\
+		:	"memory");				\
+		break;						\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"stvx " VR0(r) " ,0,%[DST0]\n"			\
+		"stvx " VR1(r) " ,0,%[DST1]\n"			\
+		"stvx " VR2(r) " ,0,%[DST2]\n"			\
+		"stvx " VR3(r) " ,0,%[DST3]\n"			\
+		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
+		[DST1] "r" ((OFFSET(dst, 16))),			\
+		[DST2] "r" ((OFFSET(dst, 32))),			\
+		[DST3] "r" ((OFFSET(dst, 48))),			\
+		RVR0(r), RVR1(r), RVR2(r), RVR3(r)		\
+		: "memory");					\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+		"stvx " VR0(r) " ,0,%[DST0]\n"			\
+		"stvx " VR1(r) " ,0,%[DST1]\n"			\
+		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
+		[DST1] "r" ((OFFSET(dst, 16))),			\
+		RVR0(r), RVR1(r) : "memory");			\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+/*
+ * Unfortunately cannot use the macro, because GCC
+ * will try to use the macro name and not value
+ * later on...
+ * Kept as a reference to what a numbered variable is
+ */
+#define	_00	"17"
+#define	_1d	"16"
+#define	_temp0	"19"
+#define	_temp1	"18"
+
+#define	MUL2_SETUP()						\
+{								\
+	__asm__ __volatile__(					\
+		"vspltisb " VR(16) ",14\n"			\
+		"vspltisb " VR(17) ",15\n"			\
+		"vaddubm " VR(16) "," VR(17) "," VR(16) "\n"	\
+		"vxor " VR(17) "," VR(17) "," VR(17) "\n"	\
+		:	WVR(16), WVR(17));			\
+}
+
+#define	MUL2(r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 4:							\
+		__asm__ __volatile__(				\
+		"vcmpgtsb 19," VR(17) "," VR0(r) "\n"		\
+		"vcmpgtsb 18," VR(17) "," VR1(r) "\n"		\
+		"vcmpgtsb 21," VR(17) "," VR2(r) "\n"		\
+		"vcmpgtsb 20," VR(17) "," VR3(r) "\n"		\
+		"vand 19,19," VR(16) "\n"			\
+		"vand 18,18," VR(16) "\n"			\
+		"vand 21,21," VR(16) "\n"			\
+		"vand 20,20," VR(16) "\n"			\
+		"vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
+		"vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
+		"vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
+		"vxor " VR0(r) ",19," VR0(r) "\n"		\
+		"vxor " VR1(r) ",18," VR1(r) "\n"		\
+		"vxor " VR2(r) ",21," VR2(r) "\n"		\
+		"vxor " VR3(r) ",20," VR3(r) "\n"		\
+		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)	\
+		:	RVR(17), RVR(16)			\
+		:	"v18", "v19", "v20", "v21");		\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+		"vcmpgtsb 19," VR(17) "," VR0(r) "\n"		\
+		"vcmpgtsb 18," VR(17) "," VR1(r) "\n"		\
+		"vand 19,19," VR(16) "\n"			\
+		"vand 18,18," VR(16) "\n"			\
+		"vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
+		"vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
+		"vxor " VR0(r) ",19," VR0(r) "\n"		\
+		"vxor " VR1(r) ",18," VR1(r) "\n"		\
+		:	UVR0(r), UVR1(r)			\
+		:	RVR(17), RVR(16)			\
+		:	"v18", "v19");				\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	MUL4(r...)						\
+{								\
+	MUL2(r);						\
+	MUL2(r);						\
+}
+
+/*
+ * Unfortunately cannot use the macro, because GCC
+ * will try to use the macro name and not value
+ * later on...
+ * Kept as a reference to what a register is
+ * (here we're using actual registers for the
+ * clobbered ones)
+ */
+#define	_0f		"15"
+#define	_a_save		"14"
+#define	_b_save		"13"
+#define	_lt_mod_a	"12"
+#define	_lt_clmul_a	"11"
+#define	_lt_mod_b	"10"
+#define	_lt_clmul_b	"15"
+
+#define	_MULx2(c, r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 2:							\
+		__asm__ __volatile__(				\
+		/* lts for upper part */			\
+		"vspltisb 15,15\n"				\
+		"lvx 10,0,%[lt0]\n"				\
+		"lvx 11,0,%[lt1]\n"				\
+		/* upper part */				\
+		"vand 14," VR0(r) ",15\n"			\
+		"vand 13," VR1(r) ",15\n"			\
+		"vspltisb 15,4\n"				\
+		"vsrab " VR0(r) "," VR0(r) ",15\n"		\
+		"vsrab " VR1(r) "," VR1(r) ",15\n"		\
+								\
+		"vperm 12,10,10," VR0(r) "\n"			\
+		"vperm 10,10,10," VR1(r) "\n"			\
+		"vperm 15,11,11," VR0(r) "\n"			\
+		"vperm 11,11,11," VR1(r) "\n"			\
+								\
+		"vxor " VR0(r) ",15,12\n"			\
+		"vxor " VR1(r) ",11,10\n"			\
+		/* lts for lower part */			\
+		"lvx 10,0,%[lt2]\n"				\
+		"lvx 15,0,%[lt3]\n"				\
+		/* lower part */				\
+		"vperm 12,10,10,14\n"				\
+		"vperm 10,10,10,13\n"				\
+		"vperm 11,15,15,14\n"				\
+		"vperm 15,15,15,13\n"				\
+								\
+		"vxor " VR0(r) "," VR0(r) ",12\n"		\
+		"vxor " VR1(r) "," VR1(r) ",10\n"		\
+		"vxor " VR0(r) "," VR0(r) ",11\n"		\
+		"vxor " VR1(r) "," VR1(r) ",15\n"		\
+		: UVR0(r), UVR1(r)				\
+		: [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])),	\
+		[lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])),	\
+		[lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])),	\
+		[lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0]))	\
+		: "v10", "v11", "v12", "v13", "v14", "v15");	\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	MUL(c, r...)						\
+{								\
+	switch (REG_CNT(r)) {					\
+	case 4:							\
+		_MULx2(c, R_23(r));				\
+		_MULx2(c, R_01(r));				\
+		break;						\
+	case 2:							\
+		_MULx2(c, R_01(r));				\
+		break;						\
+	default:						\
+		ZFS_ASM_BUG();					\
+	}							\
+}
+
+#define	raidz_math_begin()	kfpu_begin()
+#define	raidz_math_end()	kfpu_end()
+
+/* Overkill... */
+#if 0 // defined(_KERNEL)
+#define	GEN_X_DEFINE_0_3()	\
+register unsigned char w0 asm("0") __attribute__((vector_size(16)));	\
+register unsigned char w1 asm("1") __attribute__((vector_size(16)));	\
+register unsigned char w2 asm("2") __attribute__((vector_size(16)));	\
+register unsigned char w3 asm("3") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_4_5()	\
+register unsigned char w4 asm("4") __attribute__((vector_size(16)));	\
+register unsigned char w5 asm("5") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_6_7()	\
+register unsigned char w6 asm("6") __attribute__((vector_size(16)));	\
+register unsigned char w7 asm("7") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_8_9()	\
+register unsigned char w8 asm("8") __attribute__((vector_size(16)));	\
+register unsigned char w9 asm("9") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_10_11()	\
+register unsigned char w10 asm("10") __attribute__((vector_size(16)));	\
+register unsigned char w11 asm("11") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_12_15()	\
+register unsigned char w12 asm("12") __attribute__((vector_size(16)));	\
+register unsigned char w13 asm("13") __attribute__((vector_size(16)));	\
+register unsigned char w14 asm("14") __attribute__((vector_size(16)));	\
+register unsigned char w15 asm("15") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_16()	\
+register unsigned char w16 asm("16") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_17()	\
+register unsigned char w17 asm("17") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_18_21()	\
+register unsigned char w18 asm("18") __attribute__((vector_size(16)));	\
+register unsigned char w19 asm("19") __attribute__((vector_size(16)));	\
+register unsigned char w20 asm("20") __attribute__((vector_size(16)));	\
+register unsigned char w21 asm("21") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_22_23()	\
+register unsigned char w22 asm("22") __attribute__((vector_size(16)));	\
+register unsigned char w23 asm("23") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_24_27()	\
+register unsigned char w24 asm("24") __attribute__((vector_size(16)));	\
+register unsigned char w25 asm("25") __attribute__((vector_size(16)));	\
+register unsigned char w26 asm("26") __attribute__((vector_size(16)));	\
+register unsigned char w27 asm("27") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_28_30()	\
+register unsigned char w28 asm("28") __attribute__((vector_size(16)));	\
+register unsigned char w29 asm("29") __attribute__((vector_size(16)));	\
+register unsigned char w30 asm("30") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_31()	\
+register unsigned char w31 asm("31") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_32()	\
+register unsigned char w32 asm("31") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_33_36()	\
+register unsigned char w33 asm("31") __attribute__((vector_size(16)));	\
+register unsigned char w34 asm("31") __attribute__((vector_size(16)));	\
+register unsigned char w35 asm("31") __attribute__((vector_size(16)));	\
+register unsigned char w36 asm("31") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_37_38()	\
+register unsigned char w37 asm("31") __attribute__((vector_size(16)));	\
+register unsigned char w38 asm("31") __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_ALL()	\
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_8_9()	\
+	GEN_X_DEFINE_10_11()	\
+	GEN_X_DEFINE_12_15()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_18_21()	\
+	GEN_X_DEFINE_22_23()	\
+	GEN_X_DEFINE_24_27()	\
+	GEN_X_DEFINE_28_30()	\
+	GEN_X_DEFINE_31()	\
+	GEN_X_DEFINE_32()	\
+	GEN_X_DEFINE_33_36() 	\
+	GEN_X_DEFINE_37_38()
+#else
+#define	GEN_X_DEFINE_0_3()	\
+	unsigned char w0 __attribute__((vector_size(16)));	\
+	unsigned char w1 __attribute__((vector_size(16)));	\
+	unsigned char w2 __attribute__((vector_size(16)));	\
+	unsigned char w3 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_4_5()	\
+	unsigned char w4 __attribute__((vector_size(16)));	\
+	unsigned char w5 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_6_7()	\
+	unsigned char w6 __attribute__((vector_size(16)));	\
+	unsigned char w7 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_8_9()	\
+	unsigned char w8 __attribute__((vector_size(16)));	\
+	unsigned char w9 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_10_11()	\
+	unsigned char w10 __attribute__((vector_size(16)));	\
+	unsigned char w11 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_12_15()	\
+	unsigned char w12 __attribute__((vector_size(16)));	\
+	unsigned char w13 __attribute__((vector_size(16)));	\
+	unsigned char w14 __attribute__((vector_size(16)));	\
+	unsigned char w15 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_16()	\
+	unsigned char w16 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_17()	\
+	unsigned char w17 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_18_21()	\
+	unsigned char w18 __attribute__((vector_size(16)));	\
+	unsigned char w19 __attribute__((vector_size(16)));	\
+	unsigned char w20 __attribute__((vector_size(16)));	\
+	unsigned char w21 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_22_23()	\
+	unsigned char w22 __attribute__((vector_size(16)));	\
+	unsigned char w23 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_24_27()	\
+	unsigned char w24 __attribute__((vector_size(16)));	\
+	unsigned char w25 __attribute__((vector_size(16)));	\
+	unsigned char w26 __attribute__((vector_size(16)));	\
+	unsigned char w27 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_28_30()	\
+	unsigned char w28 __attribute__((vector_size(16)));	\
+	unsigned char w29 __attribute__((vector_size(16)));	\
+	unsigned char w30 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_31()	\
+	unsigned char w31 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_32()	\
+	unsigned char w32 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_33_36()	\
+	unsigned char w33 __attribute__((vector_size(16)));	\
+	unsigned char w34 __attribute__((vector_size(16)));	\
+	unsigned char w35 __attribute__((vector_size(16)));	\
+	unsigned char w36 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_37_38()	\
+	unsigned char w37 __attribute__((vector_size(16)));	\
+	unsigned char w38 __attribute__((vector_size(16)));
+#define	GEN_X_DEFINE_ALL()	\
+	GEN_X_DEFINE_0_3()	\
+	GEN_X_DEFINE_4_5()	\
+	GEN_X_DEFINE_6_7()	\
+	GEN_X_DEFINE_8_9()	\
+	GEN_X_DEFINE_10_11()	\
+	GEN_X_DEFINE_12_15()	\
+	GEN_X_DEFINE_16()	\
+	GEN_X_DEFINE_17()	\
+	GEN_X_DEFINE_18_21()	\
+	GEN_X_DEFINE_22_23()	\
+	GEN_X_DEFINE_24_27()	\
+	GEN_X_DEFINE_28_30()	\
+	GEN_X_DEFINE_31()	\
+	GEN_X_DEFINE_32()	\
+	GEN_X_DEFINE_33_36()	\
+	GEN_X_DEFINE_37_38()
+#endif

diff --git a/zfs/module/zfs/vdev_raidz_math_scalar.c b/zfs/module/zfs/vdev_raidz_math_scalar.c
index cd742e1..9e9c15f 100644
--- a/zfs/module/zfs/vdev_raidz_math_scalar.c
+++ b/zfs/module/zfs/vdev_raidz_math_scalar.c

@@ -142,7 +142,7 @@
 		a.b[6] = mul_lt[a.b[6]];				\
 		a.b[5] = mul_lt[a.b[5]];				\
 		a.b[4] = mul_lt[a.b[4]];				\
-		/* falls through */					\
+		fallthrough;						\
 	case 4:								\
 		a.b[3] = mul_lt[a.b[3]];				\
 		a.b[2] = mul_lt[a.b[2]];				\

diff --git a/zfs/module/zfs/vdev_raidz_math_sse2.c b/zfs/module/zfs/vdev_raidz_math_sse2.c
index 5b3a938..56a0b12 100644
--- a/zfs/module/zfs/vdev_raidz_math_sse2.c
+++ b/zfs/module/zfs/vdev_raidz_math_sse2.c

@@ -27,9 +27,12 @@
 #if defined(__x86_64) && defined(HAVE_SSE2)
 
 #include <sys/types.h>
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
+#include <sys/debug.h>
 
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
 #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
@@ -125,6 +128,8 @@
 		__asm(							\
 		    "movdqa %" VR0(r) ", %" VR1(r));			\
 		break;							\
+	default:							\
+		VERIFY(0);						\
 	}								\
 }
 
@@ -175,6 +180,8 @@
 		    "movdqa %%" VR0(r)", 0x00(%[DST])\n"		\
 		    : : [DST] "r" (dst));				\
 		break;							\
+	default:							\
+		VERIFY(0);						\
 	}								\
 }
 
@@ -508,6 +515,8 @@
 		gf_x1_mul_fns[c]();					\
 		COPY(_mul_x1_acc, r);					\
 		break;							\
+	default:							\
+		VERIFY(0);						\
 	}								\
 }
 

diff --git a/zfs/module/zfs/vdev_raidz_math_ssse3.c b/zfs/module/zfs/vdev_raidz_math_ssse3.c
index 62247cf..5ddc079 100644
--- a/zfs/module/zfs/vdev_raidz_math_ssse3.c
+++ b/zfs/module/zfs/vdev_raidz_math_ssse3.c

@@ -27,9 +27,11 @@
 #if defined(__x86_64) && defined(HAVE_SSSE3)
 
 #include <sys/types.h>
-#include <linux/simd_x86.h>
+#include <sys/simd.h>
 
+#ifdef __linux__
 #define	__asm __asm__ __volatile__
+#endif
 
 #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
 #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)

diff --git a/zfs/module/zfs/vdev_rebuild.c b/zfs/module/zfs/vdev_rebuild.c
new file mode 100644
index 0000000..b180fa1
--- /dev/null
+++ b/zfs/module/zfs/vdev_rebuild.c

@@ -0,0 +1,1171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
+ */
+
+#include <sys/vdev_impl.h>
+#include <sys/vdev_draid.h>
+#include <sys/dsl_scan.h>
+#include <sys/spa_impl.h>
+#include <sys/metaslab_impl.h>
+#include <sys/vdev_rebuild.h>
+#include <sys/zio.h>
+#include <sys/dmu_tx.h>
+#include <sys/arc.h>
+#include <sys/arc_impl.h>
+#include <sys/zap.h>
+
+/*
+ * This file contains the sequential reconstruction implementation for
+ * resilvering.  This form of resilvering is internally referred to as device
+ * rebuild to avoid conflating it with the traditional healing reconstruction
+ * performed by the dsl scan code.
+ *
+ * When replacing a device, or scrubbing the pool, ZFS has historically used
+ * a process called resilvering which is a form of healing reconstruction.
+ * This approach has the advantage that as blocks are read from disk their
+ * checksums can be immediately verified and the data repaired.  Unfortunately,
+ * it also results in a random IO pattern to the disk even when extra care
+ * is taken to sequentialize the IO as much as possible.  This substantially
+ * increases the time required to resilver the pool and restore redundancy.
+ *
+ * For mirrored devices it's possible to implement an alternate sequential
+ * reconstruction strategy when resilvering.  Sequential reconstruction
+ * behaves like a traditional RAID rebuild and reconstructs a device in LBA
+ * order without verifying the checksum.  After this phase completes a second
+ * scrub phase is started to verify all of the checksums.  This two phase
+ * process will take longer than the healing reconstruction described above.
+ * However, it has that advantage that after the reconstruction first phase
+ * completes redundancy has been restored.  At this point the pool can incur
+ * another device failure without risking data loss.
+ *
+ * There are a few noteworthy limitations and other advantages of resilvering
+ * using sequential reconstruction vs healing reconstruction.
+ *
+ * Limitations:
+ *
+ *   - Sequential reconstruction is not possible on RAIDZ due to its
+ *     variable stripe width.  Note dRAID uses a fixed stripe width which
+ *     avoids this issue, but comes at the expense of some usable capacity.
+ *
+ *   - Block checksums are not verified during sequential reconstruction.
+ *     Similar to traditional RAID the parity/mirror data is reconstructed
+ *     but cannot be immediately double checked.  For this reason when the
+ *     last active resilver completes the pool is automatically scrubbed
+ *     by default.
+ *
+ *   - Deferred resilvers using sequential reconstruction are not currently
+ *     supported.  When adding another vdev to an active top-level resilver
+ *     it must be restarted.
+ *
+ * Advantages:
+ *
+ *   - Sequential reconstruction is performed in LBA order which may be faster
+ *     than healing reconstruction particularly when using HDDs (or
+ *     especially with SMR devices).  Only allocated capacity is resilvered.
+ *
+ *   - Sequential reconstruction is not constrained by ZFS block boundaries.
+ *     This allows it to issue larger IOs to disk which span multiple blocks
+ *     allowing all of these logical blocks to be repaired with a single IO.
+ *
+ *   - Unlike a healing resilver or scrub which are pool wide operations,
+ *     sequential reconstruction is handled by the top-level vdevs.  This
+ *     allows for it to be started or canceled on a top-level vdev without
+ *     impacting any other top-level vdevs in the pool.
+ *
+ *   - Data only referenced by a pool checkpoint will be repaired because
+ *     that space is reflected in the space maps.  This differs for a
+ *     healing resilver or scrub which will not repair that data.
+ */
+
+
+/*
+ * Size of rebuild reads; defaults to 1MiB per data disk and is capped at
+ * SPA_MAXBLOCKSIZE.
+ */
+unsigned long zfs_rebuild_max_segment = 1024 * 1024;
+
+/*
+ * Maximum number of parallelly executed bytes per leaf vdev caused by a
+ * sequential resilver.  We attempt to strike a balance here between keeping
+ * the vdev queues full of I/Os at all times and not overflowing the queues
+ * to cause long latency, which would cause long txg sync times.
+ *
+ * A large default value can be safely used here because the default target
+ * segment size is also large (zfs_rebuild_max_segment=1M).  This helps keep
+ * the queue depth short.
+ *
+ * 64MB was observed to deliver the best performance and set as the default.
+ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
+ * and a rebuild rate of 1.2GB/s was measured to the distribute spare.
+ * Smaller values were unable to fully saturate the available pool I/O.
+ */
+unsigned long zfs_rebuild_vdev_limit = 64 << 20;
+
+/*
+ * Automatically start a pool scrub when the last active sequential resilver
+ * completes in order to verify the checksums of all blocks which have been
+ * resilvered. This option is enabled by default and is strongly recommended.
+ */
+int zfs_rebuild_scrub_enabled = 1;
+
+/*
+ * For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
+ */
+static void vdev_rebuild_thread(void *arg);
+static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx);
+
+/*
+ * Clear the per-vdev rebuild bytes value for a vdev tree.
+ */
+static void
+clear_rebuild_bytes(vdev_t *vd)
+{
+	vdev_stat_t *vs = &vd->vdev_stat;
+
+	for (uint64_t i = 0; i < vd->vdev_children; i++)
+		clear_rebuild_bytes(vd->vdev_child[i]);
+
+	mutex_enter(&vd->vdev_stat_lock);
+	vs->vs_rebuild_processed = 0;
+	mutex_exit(&vd->vdev_stat_lock);
+}
+
+/*
+ * Determines whether a vdev_rebuild_thread() should be stopped.
+ */
+static boolean_t
+vdev_rebuild_should_stop(vdev_t *vd)
+{
+	return (!vdev_writeable(vd) || vd->vdev_removing ||
+	    vd->vdev_rebuild_exit_wanted ||
+	    vd->vdev_rebuild_cancel_wanted ||
+	    vd->vdev_rebuild_reset_wanted);
+}
+
+/*
+ * Determine if the rebuild should be canceled.  This may happen when all
+ * vdevs with MISSING DTLs are detached.
+ */
+static boolean_t
+vdev_rebuild_should_cancel(vdev_t *vd)
+{
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+	if (!vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg))
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * The sync task for updating the on-disk state of a rebuild.  This is
+ * scheduled by vdev_rebuild_range().
+ */
+static void
+vdev_rebuild_update_sync(void *arg, dmu_tx_t *tx)
+{
+	int vdev_id = (uintptr_t)arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+	uint64_t txg = dmu_tx_get_txg(tx);
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+
+	if (vr->vr_scan_offset[txg & TXG_MASK] > 0) {
+		vrp->vrp_last_offset = vr->vr_scan_offset[txg & TXG_MASK];
+		vr->vr_scan_offset[txg & TXG_MASK] = 0;
+	}
+
+	vrp->vrp_scan_time_ms = vr->vr_prev_scan_time_ms +
+	    NSEC2MSEC(gethrtime() - vr->vr_pass_start_time);
+
+	VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+	    REBUILD_PHYS_ENTRIES, vrp, tx));
+
+	mutex_exit(&vd->vdev_rebuild_lock);
+}
+
+/*
+ * Initialize the on-disk state for a new rebuild, start the rebuild thread.
+ */
+static void
+vdev_rebuild_initiate_sync(void *arg, dmu_tx_t *tx)
+{
+	int vdev_id = (uintptr_t)arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+	ASSERT(vd->vdev_rebuilding);
+
+	spa_feature_incr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx);
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+	bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES);
+	vrp->vrp_rebuild_state = VDEV_REBUILD_ACTIVE;
+	vrp->vrp_min_txg = 0;
+	vrp->vrp_max_txg = dmu_tx_get_txg(tx);
+	vrp->vrp_start_time = gethrestime_sec();
+	vrp->vrp_scan_time_ms = 0;
+	vr->vr_prev_scan_time_ms = 0;
+
+	/*
+	 * Rebuilds are currently only used when replacing a device, in which
+	 * case there must be DTL_MISSING entries.  In the future, we could
+	 * allow rebuilds to be used in a way similar to a scrub.  This would
+	 * be useful because it would allow us to rebuild the space used by
+	 * pool checkpoints.
+	 */
+	VERIFY(vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg));
+
+	VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+	    REBUILD_PHYS_ENTRIES, vrp, tx));
+
+	spa_history_log_internal(spa, "rebuild", tx,
+	    "vdev_id=%llu vdev_guid=%llu started",
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid);
+
+	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+	vd->vdev_rebuild_thread = thread_create(NULL, 0,
+	    vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+
+	mutex_exit(&vd->vdev_rebuild_lock);
+}
+
+static void
+vdev_rebuild_log_notify(spa_t *spa, vdev_t *vd, char *name)
+{
+	nvlist_t *aux = fnvlist_alloc();
+
+	fnvlist_add_string(aux, ZFS_EV_RESILVER_TYPE, "sequential");
+	spa_event_notify(spa, vd, aux, name);
+	nvlist_free(aux);
+}
+
+/*
+ * Called to request that a new rebuild be started.  The feature will remain
+ * active for the duration of the rebuild, then revert to the enabled state.
+ */
+static void
+vdev_rebuild_initiate(vdev_t *vd)
+{
+	spa_t *spa = vd->vdev_spa;
+
+	ASSERT(vd->vdev_top == vd);
+	ASSERT(MUTEX_HELD(&vd->vdev_rebuild_lock));
+	ASSERT(!vd->vdev_rebuilding);
+
+	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+
+	vd->vdev_rebuilding = B_TRUE;
+
+	dsl_sync_task_nowait(spa_get_dsl(spa), vdev_rebuild_initiate_sync,
+	    (void *)(uintptr_t)vd->vdev_id, tx);
+	dmu_tx_commit(tx);
+
+	vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_START);
+}
+
+/*
+ * Update the on-disk state to completed when a rebuild finishes.
+ */
+static void
+vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
+{
+	int vdev_id = (uintptr_t)arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+
+	/*
+	 * Handle a second device failure if it occurs after all rebuild I/O
+	 * has completed but before this sync task has been executed.
+	 */
+	if (vd->vdev_rebuild_reset_wanted) {
+		mutex_exit(&vd->vdev_rebuild_lock);
+		vdev_rebuild_reset_sync(arg, tx);
+		return;
+	}
+
+	vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
+	vrp->vrp_end_time = gethrestime_sec();
+
+	VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+	    REBUILD_PHYS_ENTRIES, vrp, tx));
+
+	vdev_dtl_reassess(vd, tx->tx_txg, vrp->vrp_max_txg, B_TRUE, B_TRUE);
+	spa_feature_decr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx);
+
+	spa_history_log_internal(spa, "rebuild",  tx,
+	    "vdev_id=%llu vdev_guid=%llu complete",
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid);
+	vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_FINISH);
+
+	/* Handles detaching of spares */
+	spa_async_request(spa, SPA_ASYNC_REBUILD_DONE);
+	vd->vdev_rebuilding = B_FALSE;
+	mutex_exit(&vd->vdev_rebuild_lock);
+
+	/*
+	 * While we're in syncing context take the opportunity to
+	 * setup the scrub when there are no more active rebuilds.
+	 */
+	pool_scan_func_t func = POOL_SCAN_SCRUB;
+	if (dsl_scan_setup_check(&func, tx) == 0 &&
+	    zfs_rebuild_scrub_enabled) {
+		dsl_scan_setup_sync(&func, tx);
+	}
+
+	cv_broadcast(&vd->vdev_rebuild_cv);
+
+	/* Clear recent error events (i.e. duplicate events tracking) */
+	zfs_ereport_clear(spa, NULL);
+}
+
+/*
+ * Update the on-disk state to canceled when a rebuild finishes.
+ */
+static void
+vdev_rebuild_cancel_sync(void *arg, dmu_tx_t *tx)
+{
+	int vdev_id = (uintptr_t)arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+	vrp->vrp_rebuild_state = VDEV_REBUILD_CANCELED;
+	vrp->vrp_end_time = gethrestime_sec();
+
+	VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+	    REBUILD_PHYS_ENTRIES, vrp, tx));
+
+	spa_feature_decr(vd->vdev_spa, SPA_FEATURE_DEVICE_REBUILD, tx);
+
+	spa_history_log_internal(spa, "rebuild",  tx,
+	    "vdev_id=%llu vdev_guid=%llu canceled",
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid);
+	vdev_rebuild_log_notify(spa, vd, ESC_ZFS_RESILVER_FINISH);
+
+	vd->vdev_rebuild_cancel_wanted = B_FALSE;
+	vd->vdev_rebuilding = B_FALSE;
+	mutex_exit(&vd->vdev_rebuild_lock);
+
+	spa_notify_waiters(spa);
+	cv_broadcast(&vd->vdev_rebuild_cv);
+}
+
+/*
+ * Resets the progress of a running rebuild.  This will occur when a new
+ * vdev is added to rebuild.
+ */
+static void
+vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx)
+{
+	int vdev_id = (uintptr_t)arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+
+	ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE);
+	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+
+	vrp->vrp_last_offset = 0;
+	vrp->vrp_min_txg = 0;
+	vrp->vrp_max_txg = dmu_tx_get_txg(tx);
+	vrp->vrp_bytes_scanned = 0;
+	vrp->vrp_bytes_issued = 0;
+	vrp->vrp_bytes_rebuilt = 0;
+	vrp->vrp_bytes_est = 0;
+	vrp->vrp_scan_time_ms = 0;
+	vr->vr_prev_scan_time_ms = 0;
+
+	/* See vdev_rebuild_initiate_sync comment */
+	VERIFY(vdev_resilver_needed(vd, &vrp->vrp_min_txg, &vrp->vrp_max_txg));
+
+	VERIFY0(zap_update(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+	    REBUILD_PHYS_ENTRIES, vrp, tx));
+
+	spa_history_log_internal(spa, "rebuild",  tx,
+	    "vdev_id=%llu vdev_guid=%llu reset",
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid);
+
+	vd->vdev_rebuild_reset_wanted = B_FALSE;
+	ASSERT(vd->vdev_rebuilding);
+
+	vd->vdev_rebuild_thread = thread_create(NULL, 0,
+	    vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+
+	mutex_exit(&vd->vdev_rebuild_lock);
+}
+
+/*
+ * Clear the last rebuild status.
+ */
+void
+vdev_rebuild_clear_sync(void *arg, dmu_tx_t *tx)
+{
+	int vdev_id = (uintptr_t)arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+	objset_t *mos = spa_meta_objset(spa);
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+
+	if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD) ||
+	    vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE) {
+		mutex_exit(&vd->vdev_rebuild_lock);
+		return;
+	}
+
+	clear_rebuild_bytes(vd);
+	bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES);
+
+	if (vd->vdev_top_zap != 0 && zap_contains(mos, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS) == 0) {
+		VERIFY0(zap_update(mos, vd->vdev_top_zap,
+		    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+		    REBUILD_PHYS_ENTRIES, vrp, tx));
+	}
+
+	mutex_exit(&vd->vdev_rebuild_lock);
+}
+
+/*
+ * The zio_done_func_t callback for each rebuild I/O issued.  It's responsible
+ * for updating the rebuild stats and limiting the number of in flight I/Os.
+ */
+static void
+vdev_rebuild_cb(zio_t *zio)
+{
+	vdev_rebuild_t *vr = zio->io_private;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+	vdev_t *vd = vr->vr_top_vdev;
+
+	mutex_enter(&vr->vr_io_lock);
+	if (zio->io_error == ENXIO && !vdev_writeable(vd)) {
+		/*
+		 * The I/O failed because the top-level vdev was unavailable.
+		 * Attempt to roll back to the last completed offset, in order
+		 * resume from the correct location if the pool is resumed.
+		 * (This works because spa_sync waits on spa_txg_zio before
+		 * it runs sync tasks.)
+		 */
+		uint64_t *off = &vr->vr_scan_offset[zio->io_txg & TXG_MASK];
+		*off = MIN(*off, zio->io_offset);
+	} else if (zio->io_error) {
+		vrp->vrp_errors++;
+	}
+
+	abd_free(zio->io_abd);
+
+	ASSERT3U(vr->vr_bytes_inflight, >, 0);
+	vr->vr_bytes_inflight -= zio->io_size;
+	cv_broadcast(&vr->vr_io_cv);
+	mutex_exit(&vr->vr_io_lock);
+
+	spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
+
+/*
+ * Initialize a block pointer that can be used to read the given segment
+ * for sequential rebuild.
+ */
+static void
+vdev_rebuild_blkptr_init(blkptr_t *bp, vdev_t *vd, uint64_t start,
+    uint64_t asize)
+{
+	ASSERT(vd->vdev_ops == &vdev_draid_ops ||
+	    vd->vdev_ops == &vdev_mirror_ops ||
+	    vd->vdev_ops == &vdev_replacing_ops ||
+	    vd->vdev_ops == &vdev_spare_ops);
+
+	uint64_t psize = vd->vdev_ops == &vdev_draid_ops ?
+	    vdev_draid_asize_to_psize(vd, asize) : asize;
+
+	BP_ZERO(bp);
+
+	DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id);
+	DVA_SET_OFFSET(&bp->blk_dva[0], start);
+	DVA_SET_GANG(&bp->blk_dva[0], 0);
+	DVA_SET_ASIZE(&bp->blk_dva[0], asize);
+
+	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
+	BP_SET_LSIZE(bp, psize);
+	BP_SET_PSIZE(bp, psize);
+	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
+	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
+	BP_SET_TYPE(bp, DMU_OT_NONE);
+	BP_SET_LEVEL(bp, 0);
+	BP_SET_DEDUP(bp, 0);
+	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
+}
+
+/*
+ * Issues a rebuild I/O and takes care of rate limiting the number of queued
+ * rebuild I/Os.  The provided start and size must be properly aligned for the
+ * top-level vdev type being rebuilt.
+ */
+static int
+vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
+{
+	uint64_t ms_id __maybe_unused = vr->vr_scan_msp->ms_id;
+	vdev_t *vd = vr->vr_top_vdev;
+	spa_t *spa = vd->vdev_spa;
+	blkptr_t blk;
+
+	ASSERT3U(ms_id, ==, start >> vd->vdev_ms_shift);
+	ASSERT3U(ms_id, ==, (start + size - 1) >> vd->vdev_ms_shift);
+
+	vr->vr_pass_bytes_scanned += size;
+	vr->vr_rebuild_phys.vrp_bytes_scanned += size;
+
+	/*
+	 * Rebuild the data in this range by constructing a special block
+	 * pointer.  It has no relation to any existing blocks in the pool.
+	 * However, by disabling checksum verification and issuing a scrub IO
+	 * we can reconstruct and repair any children with missing data.
+	 */
+	vdev_rebuild_blkptr_init(&blk, vd, start, size);
+	uint64_t psize = BP_GET_PSIZE(&blk);
+
+	if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN))
+		return (0);
+
+	mutex_enter(&vr->vr_io_lock);
+
+	/* Limit in flight rebuild I/Os */
+	while (vr->vr_bytes_inflight >= vr->vr_bytes_inflight_max)
+		cv_wait(&vr->vr_io_cv, &vr->vr_io_lock);
+
+	vr->vr_bytes_inflight += psize;
+	mutex_exit(&vr->vr_io_lock);
+
+	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+	uint64_t txg = dmu_tx_get_txg(tx);
+
+	spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER);
+	mutex_enter(&vd->vdev_rebuild_lock);
+
+	/* This is the first I/O for this txg. */
+	if (vr->vr_scan_offset[txg & TXG_MASK] == 0) {
+		vr->vr_scan_offset[txg & TXG_MASK] = start;
+		dsl_sync_task_nowait(spa_get_dsl(spa),
+		    vdev_rebuild_update_sync,
+		    (void *)(uintptr_t)vd->vdev_id, tx);
+	}
+
+	/* When exiting write out our progress. */
+	if (vdev_rebuild_should_stop(vd)) {
+		mutex_enter(&vr->vr_io_lock);
+		vr->vr_bytes_inflight -= psize;
+		mutex_exit(&vr->vr_io_lock);
+		spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+		mutex_exit(&vd->vdev_rebuild_lock);
+		dmu_tx_commit(tx);
+		return (SET_ERROR(EINTR));
+	}
+	mutex_exit(&vd->vdev_rebuild_lock);
+	dmu_tx_commit(tx);
+
+	vr->vr_scan_offset[txg & TXG_MASK] = start + size;
+	vr->vr_pass_bytes_issued += size;
+	vr->vr_rebuild_phys.vrp_bytes_issued += size;
+
+	zio_nowait(zio_read(spa->spa_txg_zio[txg & TXG_MASK], spa, &blk,
+	    abd_alloc(psize, B_FALSE), psize, vdev_rebuild_cb, vr,
+	    ZIO_PRIORITY_REBUILD, ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_RESILVER, NULL));
+
+	return (0);
+}
+
+/*
+ * Issues rebuild I/Os for all ranges in the provided vr->vr_tree range tree.
+ */
+static int
+vdev_rebuild_ranges(vdev_rebuild_t *vr)
+{
+	vdev_t *vd = vr->vr_top_vdev;
+	zfs_btree_t *t = &vr->vr_scan_tree->rt_root;
+	zfs_btree_index_t idx;
+	int error;
+
+	for (range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL;
+	    rs = zfs_btree_next(t, &idx, &idx)) {
+		uint64_t start = rs_get_start(rs, vr->vr_scan_tree);
+		uint64_t size = rs_get_end(rs, vr->vr_scan_tree) - start;
+
+		/*
+		 * zfs_scan_suspend_progress can be set to disable rebuild
+		 * progress for testing.  See comment in dsl_scan_sync().
+		 */
+		while (zfs_scan_suspend_progress &&
+		    !vdev_rebuild_should_stop(vd)) {
+			delay(hz);
+		}
+
+		while (size > 0) {
+			uint64_t chunk_size;
+
+			/*
+			 * Split range into legally-sized logical chunks
+			 * given the constraints of the top-level vdev
+			 * being rebuilt (dRAID or mirror).
+			 */
+			ASSERT3P(vd->vdev_ops, !=, NULL);
+			chunk_size = vd->vdev_ops->vdev_op_rebuild_asize(vd,
+			    start, size, zfs_rebuild_max_segment);
+
+			error = vdev_rebuild_range(vr, start, chunk_size);
+			if (error != 0)
+				return (error);
+
+			size -= chunk_size;
+			start += chunk_size;
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Calculates the estimated capacity which remains to be scanned.  Since
+ * we traverse the pool in metaslab order only allocated capacity beyond
+ * the vrp_last_offset need be considered.  All lower offsets must have
+ * already been rebuilt and are thus already included in vrp_bytes_scanned.
+ */
+static void
+vdev_rebuild_update_bytes_est(vdev_t *vd, uint64_t ms_id)
+{
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+	uint64_t bytes_est = vrp->vrp_bytes_scanned;
+
+	if (vrp->vrp_last_offset < vd->vdev_ms[ms_id]->ms_start)
+		return;
+
+	for (uint64_t i = ms_id; i < vd->vdev_ms_count; i++) {
+		metaslab_t *msp = vd->vdev_ms[i];
+
+		mutex_enter(&msp->ms_lock);
+		bytes_est += metaslab_allocated_space(msp);
+		mutex_exit(&msp->ms_lock);
+	}
+
+	vrp->vrp_bytes_est = bytes_est;
+}
+
+/*
+ * Load from disk the top-level vdev's rebuild information.
+ */
+int
+vdev_rebuild_load(vdev_t *vd)
+{
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+	spa_t *spa = vd->vdev_spa;
+	int err = 0;
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+	vd->vdev_rebuilding = B_FALSE;
+
+	if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD)) {
+		bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES);
+		mutex_exit(&vd->vdev_rebuild_lock);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	ASSERT(vd->vdev_top == vd);
+
+	err = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+	    VDEV_TOP_ZAP_VDEV_REBUILD_PHYS, sizeof (uint64_t),
+	    REBUILD_PHYS_ENTRIES, vrp);
+
+	/*
+	 * A missing or damaged VDEV_TOP_ZAP_VDEV_REBUILD_PHYS should
+	 * not prevent a pool from being imported.  Clear the rebuild
+	 * status allowing a new resilver/rebuild to be started.
+	 */
+	if (err == ENOENT || err == EOVERFLOW || err == ECKSUM) {
+		bzero(vrp, sizeof (uint64_t) * REBUILD_PHYS_ENTRIES);
+	} else if (err) {
+		mutex_exit(&vd->vdev_rebuild_lock);
+		return (err);
+	}
+
+	vr->vr_prev_scan_time_ms = vrp->vrp_scan_time_ms;
+	vr->vr_top_vdev = vd;
+
+	mutex_exit(&vd->vdev_rebuild_lock);
+
+	return (0);
+}
+
+/*
+ * Each scan thread is responsible for rebuilding a top-level vdev.  The
+ * rebuild progress in tracked on-disk in VDEV_TOP_ZAP_VDEV_REBUILD_PHYS.
+ */
+static void
+vdev_rebuild_thread(void *arg)
+{
+	vdev_t *vd = arg;
+	spa_t *spa = vd->vdev_spa;
+	vdev_t *rvd = spa->spa_root_vdev;
+	int error = 0;
+
+	/*
+	 * If there's a scrub in process request that it be stopped.  This
+	 * is not required for a correct rebuild, but we do want rebuilds to
+	 * emulate the resilver behavior as much as possible.
+	 */
+	dsl_pool_t *dsl = spa_get_dsl(spa);
+	if (dsl_scan_scrubbing(dsl))
+		dsl_scan_cancel(dsl);
+
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+	mutex_enter(&vd->vdev_rebuild_lock);
+
+	ASSERT3P(vd->vdev_top, ==, vd);
+	ASSERT3P(vd->vdev_rebuild_thread, !=, NULL);
+	ASSERT(vd->vdev_rebuilding);
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
+	ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
+
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+	vr->vr_top_vdev = vd;
+	vr->vr_scan_msp = NULL;
+	vr->vr_scan_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+	mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL);
+
+	vr->vr_pass_start_time = gethrtime();
+	vr->vr_pass_bytes_scanned = 0;
+	vr->vr_pass_bytes_issued = 0;
+
+	uint64_t update_est_time = gethrtime();
+	vdev_rebuild_update_bytes_est(vd, 0);
+
+	clear_rebuild_bytes(vr->vr_top_vdev);
+
+	mutex_exit(&vd->vdev_rebuild_lock);
+
+	/*
+	 * Systematically walk the metaslabs and issue rebuild I/Os for
+	 * all ranges in the allocated space map.
+	 */
+	for (uint64_t i = 0; i < vd->vdev_ms_count; i++) {
+		metaslab_t *msp = vd->vdev_ms[i];
+		vr->vr_scan_msp = msp;
+
+		/*
+		 * Calculate the max number of in-flight bytes for top-level
+		 * vdev scanning operations (minimum 1MB, maximum 1/4 of
+		 * arc_c_max shared by all top-level vdevs).  Limits for the
+		 * issuing phase are done per top-level vdev and are handled
+		 * separately.
+		 */
+		uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
+		vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
+		    zfs_rebuild_vdev_limit * vd->vdev_children));
+
+		/*
+		 * Removal of vdevs from the vdev tree may eliminate the need
+		 * for the rebuild, in which case it should be canceled.  The
+		 * vdev_rebuild_cancel_wanted flag is set until the sync task
+		 * completes.  This may be after the rebuild thread exits.
+		 */
+		if (vdev_rebuild_should_cancel(vd)) {
+			vd->vdev_rebuild_cancel_wanted = B_TRUE;
+			error = EINTR;
+			break;
+		}
+
+		ASSERT0(range_tree_space(vr->vr_scan_tree));
+
+		/* Disable any new allocations to this metaslab */
+		spa_config_exit(spa, SCL_CONFIG, FTAG);
+		metaslab_disable(msp);
+
+		mutex_enter(&msp->ms_sync_lock);
+		mutex_enter(&msp->ms_lock);
+
+		/*
+		 * If there are outstanding allocations wait for them to be
+		 * synced.  This is needed to ensure all allocated ranges are
+		 * on disk and therefore will be rebuilt.
+		 */
+		for (int j = 0; j < TXG_SIZE; j++) {
+			if (range_tree_space(msp->ms_allocating[j])) {
+				mutex_exit(&msp->ms_lock);
+				mutex_exit(&msp->ms_sync_lock);
+				txg_wait_synced(dsl, 0);
+				mutex_enter(&msp->ms_sync_lock);
+				mutex_enter(&msp->ms_lock);
+				break;
+			}
+		}
+
+		/*
+		 * When a metaslab has been allocated from read its allocated
+		 * ranges from the space map object into the vr_scan_tree.
+		 * Then add inflight / unflushed ranges and remove inflight /
+		 * unflushed frees.  This is the minimum range to be rebuilt.
+		 */
+		if (msp->ms_sm != NULL) {
+			VERIFY0(space_map_load(msp->ms_sm,
+			    vr->vr_scan_tree, SM_ALLOC));
+
+			for (int i = 0; i < TXG_SIZE; i++) {
+				ASSERT0(range_tree_space(
+				    msp->ms_allocating[i]));
+			}
+
+			range_tree_walk(msp->ms_unflushed_allocs,
+			    range_tree_add, vr->vr_scan_tree);
+			range_tree_walk(msp->ms_unflushed_frees,
+			    range_tree_remove, vr->vr_scan_tree);
+
+			/*
+			 * Remove ranges which have already been rebuilt based
+			 * on the last offset.  This can happen when restarting
+			 * a scan after exporting and re-importing the pool.
+			 */
+			range_tree_clear(vr->vr_scan_tree, 0,
+			    vrp->vrp_last_offset);
+		}
+
+		mutex_exit(&msp->ms_lock);
+		mutex_exit(&msp->ms_sync_lock);
+
+		/*
+		 * To provide an accurate estimate re-calculate the estimated
+		 * size every 5 minutes to account for recent allocations and
+		 * frees made to space maps which have not yet been rebuilt.
+		 */
+		if (gethrtime() > update_est_time + SEC2NSEC(300)) {
+			update_est_time = gethrtime();
+			vdev_rebuild_update_bytes_est(vd, i);
+		}
+
+		/*
+		 * Walk the allocated space map and issue the rebuild I/O.
+		 */
+		error = vdev_rebuild_ranges(vr);
+		range_tree_vacate(vr->vr_scan_tree, NULL, NULL);
+
+		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+		metaslab_enable(msp, B_FALSE, B_FALSE);
+
+		if (error != 0)
+			break;
+	}
+
+	range_tree_destroy(vr->vr_scan_tree);
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
+
+	/* Wait for any remaining rebuild I/O to complete */
+	mutex_enter(&vr->vr_io_lock);
+	while (vr->vr_bytes_inflight > 0)
+		cv_wait(&vr->vr_io_cv, &vr->vr_io_lock);
+
+	mutex_exit(&vr->vr_io_lock);
+
+	mutex_destroy(&vr->vr_io_lock);
+	cv_destroy(&vr->vr_io_cv);
+
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+	dsl_pool_t *dp = spa_get_dsl(spa);
+	dmu_tx_t *tx = dmu_tx_create_dd(dp->dp_mos_dir);
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+	if (error == 0) {
+		/*
+		 * After a successful rebuild clear the DTLs of all ranges
+		 * which were missing when the rebuild was started.  These
+		 * ranges must have been rebuilt as a consequence of rebuilding
+		 * all allocated space.  Note that unlike a scrub or resilver
+		 * the rebuild operation will reconstruct data only referenced
+		 * by a pool checkpoint.  See the dsl_scan_done() comments.
+		 */
+		dsl_sync_task_nowait(dp, vdev_rebuild_complete_sync,
+		    (void *)(uintptr_t)vd->vdev_id, tx);
+	} else if (vd->vdev_rebuild_cancel_wanted) {
+		/*
+		 * The rebuild operation was canceled.  This will occur when
+		 * a device participating in the rebuild is detached.
+		 */
+		dsl_sync_task_nowait(dp, vdev_rebuild_cancel_sync,
+		    (void *)(uintptr_t)vd->vdev_id, tx);
+	} else if (vd->vdev_rebuild_reset_wanted) {
+		/*
+		 * Reset the running rebuild without canceling and restarting
+		 * it.  This will occur when a new device is attached and must
+		 * participate in the rebuild.
+		 */
+		dsl_sync_task_nowait(dp, vdev_rebuild_reset_sync,
+		    (void *)(uintptr_t)vd->vdev_id, tx);
+	} else {
+		/*
+		 * The rebuild operation should be suspended.  This may occur
+		 * when detaching a child vdev or when exporting the pool.  The
+		 * rebuild is left in the active state so it will be resumed.
+		 */
+		ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE);
+		vd->vdev_rebuilding = B_FALSE;
+	}
+
+	dmu_tx_commit(tx);
+
+	vd->vdev_rebuild_thread = NULL;
+	mutex_exit(&vd->vdev_rebuild_lock);
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
+
+	cv_broadcast(&vd->vdev_rebuild_cv);
+
+	thread_exit();
+}
+
+/*
+ * Returns B_TRUE if any top-level vdev are rebuilding.
+ */
+boolean_t
+vdev_rebuild_active(vdev_t *vd)
+{
+	spa_t *spa = vd->vdev_spa;
+	boolean_t ret = B_FALSE;
+
+	if (vd == spa->spa_root_vdev) {
+		for (uint64_t i = 0; i < vd->vdev_children; i++) {
+			ret = vdev_rebuild_active(vd->vdev_child[i]);
+			if (ret)
+				return (ret);
+		}
+	} else if (vd->vdev_top_zap != 0) {
+		vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+		vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+		mutex_enter(&vd->vdev_rebuild_lock);
+		ret = (vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE);
+		mutex_exit(&vd->vdev_rebuild_lock);
+	}
+
+	return (ret);
+}
+
+/*
+ * Start a rebuild operation.  The rebuild may be restarted when the
+ * top-level vdev is currently actively rebuilding.
+ */
+void
+vdev_rebuild(vdev_t *vd)
+{
+	vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+	vdev_rebuild_phys_t *vrp __maybe_unused = &vr->vr_rebuild_phys;
+
+	ASSERT(vd->vdev_top == vd);
+	ASSERT(vdev_is_concrete(vd));
+	ASSERT(!vd->vdev_removing);
+	ASSERT(spa_feature_is_enabled(vd->vdev_spa,
+	    SPA_FEATURE_DEVICE_REBUILD));
+
+	mutex_enter(&vd->vdev_rebuild_lock);
+	if (vd->vdev_rebuilding) {
+		ASSERT3U(vrp->vrp_rebuild_state, ==, VDEV_REBUILD_ACTIVE);
+
+		/*
+		 * Signal a running rebuild operation that it should restart
+		 * from the beginning because a new device was attached.  The
+		 * vdev_rebuild_reset_wanted flag is set until the sync task
+		 * completes.  This may be after the rebuild thread exits.
+		 */
+		if (!vd->vdev_rebuild_reset_wanted)
+			vd->vdev_rebuild_reset_wanted = B_TRUE;
+	} else {
+		vdev_rebuild_initiate(vd);
+	}
+	mutex_exit(&vd->vdev_rebuild_lock);
+}
+
+static void
+vdev_rebuild_restart_impl(vdev_t *vd)
+{
+	spa_t *spa = vd->vdev_spa;
+
+	if (vd == spa->spa_root_vdev) {
+		for (uint64_t i = 0; i < vd->vdev_children; i++)
+			vdev_rebuild_restart_impl(vd->vdev_child[i]);
+
+	} else if (vd->vdev_top_zap != 0) {
+		vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
+		vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+		mutex_enter(&vd->vdev_rebuild_lock);
+		if (vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE &&
+		    vdev_writeable(vd) && !vd->vdev_rebuilding) {
+			ASSERT(spa_feature_is_active(spa,
+			    SPA_FEATURE_DEVICE_REBUILD));
+			vd->vdev_rebuilding = B_TRUE;
+			vd->vdev_rebuild_thread = thread_create(NULL, 0,
+			    vdev_rebuild_thread, vd, 0, &p0, TS_RUN,
+			    maxclsyspri);
+		}
+		mutex_exit(&vd->vdev_rebuild_lock);
+	}
+}
+
+/*
+ * Conditionally restart all of the vdev_rebuild_thread's for a pool.  The
+ * feature flag must be active and the rebuild in the active state.   This
+ * cannot be used to start a new rebuild.
+ */
+void
+vdev_rebuild_restart(spa_t *spa)
+{
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+	vdev_rebuild_restart_impl(spa->spa_root_vdev);
+}
+
+/*
+ * Stop and wait for all of the vdev_rebuild_thread's associated with the
+ * vdev tree provide to be terminated (canceled or stopped).
+ */
+void
+vdev_rebuild_stop_wait(vdev_t *vd)
+{
+	spa_t *spa = vd->vdev_spa;
+
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+	if (vd == spa->spa_root_vdev) {
+		for (uint64_t i = 0; i < vd->vdev_children; i++)
+			vdev_rebuild_stop_wait(vd->vdev_child[i]);
+
+	} else if (vd->vdev_top_zap != 0) {
+		ASSERT(vd == vd->vdev_top);
+
+		mutex_enter(&vd->vdev_rebuild_lock);
+		if (vd->vdev_rebuild_thread != NULL) {
+			vd->vdev_rebuild_exit_wanted = B_TRUE;
+			while (vd->vdev_rebuilding) {
+				cv_wait(&vd->vdev_rebuild_cv,
+				    &vd->vdev_rebuild_lock);
+			}
+			vd->vdev_rebuild_exit_wanted = B_FALSE;
+		}
+		mutex_exit(&vd->vdev_rebuild_lock);
+	}
+}
+
+/*
+ * Stop all rebuild operations but leave them in the active state so they
+ * will be resumed when importing the pool.
+ */
+void
+vdev_rebuild_stop_all(spa_t *spa)
+{
+	vdev_rebuild_stop_wait(spa->spa_root_vdev);
+}
+
+/*
+ * Rebuild statistics reported per top-level vdev.
+ */
+int
+vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs)
+{
+	spa_t *spa = tvd->vdev_spa;
+
+	if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
+		return (SET_ERROR(ENOTSUP));
+
+	if (tvd != tvd->vdev_top || tvd->vdev_top_zap == 0)
+		return (SET_ERROR(EINVAL));
+
+	int error = zap_contains(spa_meta_objset(spa),
+	    tvd->vdev_top_zap, VDEV_TOP_ZAP_VDEV_REBUILD_PHYS);
+
+	if (error == ENOENT) {
+		bzero(vrs, sizeof (vdev_rebuild_stat_t));
+		vrs->vrs_state = VDEV_REBUILD_NONE;
+		error = 0;
+	} else if (error == 0) {
+		vdev_rebuild_t *vr = &tvd->vdev_rebuild_config;
+		vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
+
+		mutex_enter(&tvd->vdev_rebuild_lock);
+		vrs->vrs_state = vrp->vrp_rebuild_state;
+		vrs->vrs_start_time = vrp->vrp_start_time;
+		vrs->vrs_end_time = vrp->vrp_end_time;
+		vrs->vrs_scan_time_ms = vrp->vrp_scan_time_ms;
+		vrs->vrs_bytes_scanned = vrp->vrp_bytes_scanned;
+		vrs->vrs_bytes_issued = vrp->vrp_bytes_issued;
+		vrs->vrs_bytes_rebuilt = vrp->vrp_bytes_rebuilt;
+		vrs->vrs_bytes_est = vrp->vrp_bytes_est;
+		vrs->vrs_errors = vrp->vrp_errors;
+		vrs->vrs_pass_time_ms = NSEC2MSEC(gethrtime() -
+		    vr->vr_pass_start_time);
+		vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned;
+		vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued;
+		mutex_exit(&tvd->vdev_rebuild_lock);
+	}
+
+	return (error);
+}
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_max_segment, ULONG, ZMOD_RW,
+	"Max segment size in bytes of rebuild reads");
+
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_vdev_limit, ULONG, ZMOD_RW,
+	"Max bytes in flight per leaf vdev for sequential resilvers");
+
+ZFS_MODULE_PARAM(zfs, zfs_, rebuild_scrub_enabled, INT, ZMOD_RW,
+	"Automatically scrub after sequential resilver completes");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/vdev_removal.c b/zfs/module/zfs/vdev_removal.c
index 340de25..12cc654 100644
--- a/zfs/module/zfs/vdev_removal.c
+++ b/zfs/module/zfs/vdev_removal.c

@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  */
 
@@ -47,7 +47,7 @@
 #include <sys/abd.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
-#include <sys/trace_vdev.h>
+#include <sys/trace_zfs.h>
 
 /*
  * This file contains the necessary logic to remove vdevs from a
@@ -198,11 +198,12 @@
 	spa_vdev_removal_t *svr = kmem_zalloc(sizeof (*svr), KM_SLEEP);
 	mutex_init(&svr->svr_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&svr->svr_cv, NULL, CV_DEFAULT, NULL);
-	svr->svr_allocd_segs = range_tree_create(NULL, NULL);
+	svr->svr_allocd_segs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
 	svr->svr_vdev_id = vd->vdev_id;
 
 	for (int i = 0; i < TXG_SIZE; i++) {
-		svr->svr_frees[i] = range_tree_create(NULL, NULL);
+		svr->svr_frees[i] = range_tree_create(NULL, RANGE_SEG64, NULL,
+		    0, 0);
 		list_create(&svr->svr_new_segments[i],
 		    sizeof (vdev_indirect_mapping_entry_t),
 		    offsetof(vdev_indirect_mapping_entry_t, vime_node));
@@ -247,9 +248,9 @@
 	vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
 	objset_t *mos = spa->spa_dsl_pool->dp_meta_objset;
 	spa_vdev_removal_t *svr = NULL;
-	ASSERTV(uint64_t txg = dmu_tx_get_txg(tx));
+	uint64_t txg __maybe_unused = dmu_tx_get_txg(tx);
 
-	ASSERT3P(vd->vdev_ops, !=, &vdev_raidz_ops);
+	ASSERT0(vdev_get_nparity(vd));
 	svr = spa_vdev_removal_create(vd);
 
 	ASSERT(vd->vdev_removing);
@@ -267,7 +268,7 @@
 		VERIFY0(zap_add(spa->spa_meta_objset, vd->vdev_top_zap,
 		    VDEV_TOP_ZAP_OBSOLETE_COUNTS_ARE_PRECISE, sizeof (one), 1,
 		    &one, tx));
-		ASSERTV(boolean_t are_precise);
+		boolean_t are_precise __maybe_unused;
 		ASSERT0(vdev_obsolete_counts_are_precise(vd, &are_precise));
 		ASSERT3B(are_precise, ==, B_TRUE);
 	}
@@ -344,11 +345,12 @@
 	vdev_config_dirty(vd);
 
 	zfs_dbgmsg("starting removal thread for vdev %llu (%px) in txg %llu "
-	    "im_obj=%llu", vd->vdev_id, vd, dmu_tx_get_txg(tx),
-	    vic->vic_mapping_object);
+	    "im_obj=%llu", (u_longlong_t)vd->vdev_id, vd,
+	    (u_longlong_t)dmu_tx_get_txg(tx),
+	    (u_longlong_t)vic->vic_mapping_object);
 
 	spa_history_log_internal(spa, "vdev remove started", tx,
-	    "%s vdev %llu %s", spa_name(spa), vd->vdev_id,
+	    "%s vdev %llu %s", spa_name(spa), (u_longlong_t)vd->vdev_id,
 	    (vd->vdev_path != NULL) ? vd->vdev_path : "-");
 	/*
 	 * Setting spa_vdev_removal causes subsequent frees to call
@@ -473,7 +475,8 @@
 	if (!spa_writeable(spa))
 		return;
 
-	zfs_dbgmsg("restarting removal of %llu", svr->svr_vdev_id);
+	zfs_dbgmsg("restarting removal of %llu",
+	    (u_longlong_t)svr->svr_vdev_id);
 	svr->svr_thread = thread_create(NULL, 0, spa_vdev_remove_thread, spa,
 	    0, &p0, TS_RUN, minclsyspri);
 }
@@ -698,6 +701,7 @@
 	spa_vdev_removal_destroy(svr);
 
 	spa_sync_removing_state(spa, tx);
+	spa_notify_waiters(spa);
 
 	vdev_config_dirty(spa->spa_root_vdev);
 }
@@ -723,7 +727,7 @@
 	spa_vdev_removal_t *svr = arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
-	ASSERTV(vdev_indirect_config_t *vic = &vd->vdev_indirect_config);
+	vdev_indirect_config_t *vic __maybe_unused = &vd->vdev_indirect_config;
 	uint64_t txg = dmu_tx_get_txg(tx);
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 
@@ -966,18 +970,15 @@
 		 * the allocation at the end of a segment, thus avoiding
 		 * additional split blocks.
 		 */
-		range_seg_t search;
-		avl_index_t where;
-		search.rs_start = start + maxalloc;
-		search.rs_end = search.rs_start;
-		range_seg_t *rs = avl_find(&segs->rt_root, &search, &where);
-		if (rs == NULL) {
-			rs = avl_nearest(&segs->rt_root, where, AVL_BEFORE);
-		} else {
-			rs = AVL_PREV(&segs->rt_root, rs);
-		}
+		range_seg_max_t search;
+		zfs_btree_index_t where;
+		rs_set_start(&search, segs, start + maxalloc);
+		rs_set_end(&search, segs, start + maxalloc);
+		(void) zfs_btree_find(&segs->rt_root, &search, &where);
+		range_seg_t *rs = zfs_btree_prev(&segs->rt_root, &where,
+		    &where);
 		if (rs != NULL) {
-			size = rs->rs_end - start;
+			size = rs_get_end(rs, segs) - start;
 		} else {
 			/*
 			 * There are no segments that end before maxalloc.
@@ -994,7 +995,7 @@
 	 * An allocation class might not have any remaining vdevs or space
 	 */
 	metaslab_class_t *mc = mg->mg_class;
-	if (mc != spa_normal_class(spa) && mc->mc_groups <= 1)
+	if (mc->mc_groups == 0)
 		mc = spa_normal_class(spa);
 	int error = metaslab_alloc_dva(spa, mc, size, &dst, 0, NULL, txg, 0,
 	    zal, 0);
@@ -1010,20 +1011,22 @@
 	 * relative to the start of the range to be copied (i.e. relative to the
 	 * local variable "start").
 	 */
-	range_tree_t *obsolete_segs = range_tree_create(NULL, NULL);
+	range_tree_t *obsolete_segs = range_tree_create(NULL, RANGE_SEG64, NULL,
+	    0, 0);
 
-	range_seg_t *rs = avl_first(&segs->rt_root);
-	ASSERT3U(rs->rs_start, ==, start);
-	uint64_t prev_seg_end = rs->rs_end;
-	while ((rs = AVL_NEXT(&segs->rt_root, rs)) != NULL) {
-		if (rs->rs_start >= start + size) {
+	zfs_btree_index_t where;
+	range_seg_t *rs = zfs_btree_first(&segs->rt_root, &where);
+	ASSERT3U(rs_get_start(rs, segs), ==, start);
+	uint64_t prev_seg_end = rs_get_end(rs, segs);
+	while ((rs = zfs_btree_next(&segs->rt_root, &where, &where)) != NULL) {
+		if (rs_get_start(rs, segs) >= start + size) {
 			break;
 		} else {
 			range_tree_add(obsolete_segs,
 			    prev_seg_end - start,
-			    rs->rs_start - prev_seg_end);
+			    rs_get_start(rs, segs) - prev_seg_end);
 		}
-		prev_seg_end = rs->rs_end;
+		prev_seg_end = rs_get_end(rs, segs);
 	}
 	/* We don't end in the middle of an obsolete range */
 	ASSERT3U(start + size, <=, prev_seg_end);
@@ -1112,14 +1115,14 @@
 	spa_finish_removal(dmu_tx_pool(tx)->dp_spa, DSS_FINISHED, tx);
 	/* vd->vdev_path is not available here */
 	spa_history_log_internal(spa, "vdev remove completed",  tx,
-	    "%s vdev %llu", spa_name(spa), vd->vdev_id);
+	    "%s vdev %llu", spa_name(spa), (u_longlong_t)vd->vdev_id);
 }
 
 static void
 vdev_remove_enlist_zaps(vdev_t *vd, nvlist_t *zlist)
 {
 	ASSERT3P(zlist, !=, NULL);
-	ASSERT3P(vd->vdev_ops, !=, &vdev_raidz_ops);
+	ASSERT0(vdev_get_nparity(vd));
 
 	if (vd->vdev_leaf_zap != 0) {
 		char zkey[32];
@@ -1166,8 +1169,8 @@
 
 	/* After this, we can not use svr. */
 	tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
-	dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_remove_complete_sync, svr,
-	    0, ZFS_SPACE_CHECK_NONE, tx);
+	dsl_sync_task_nowait(spa->spa_dsl_pool,
+	    vdev_remove_complete_sync, svr, tx);
 	dmu_tx_commit(tx);
 }
 
@@ -1195,7 +1198,7 @@
 	    ESC_ZFS_VDEV_REMOVE_DEV);
 
 	zfs_dbgmsg("finishing device removal for vdev %llu in txg %llu",
-	    vd->vdev_id, txg);
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)txg);
 
 	/*
 	 * Discard allocation state.
@@ -1205,6 +1208,11 @@
 		metaslab_group_destroy(vd->vdev_mg);
 		vd->vdev_mg = NULL;
 	}
+	if (vd->vdev_log_mg != NULL) {
+		ASSERT0(vd->vdev_ms_count);
+		metaslab_group_destroy(vd->vdev_log_mg);
+		vd->vdev_log_mg = NULL;
+	}
 	ASSERT0(vd->vdev_stat.vs_space);
 	ASSERT0(vd->vdev_stat.vs_dspace);
 
@@ -1266,9 +1274,10 @@
 	 * allocated segments that we are copying.  We may also be copying
 	 * free segments (of up to vdev_removal_max_span bytes).
 	 */
-	range_tree_t *segs = range_tree_create(NULL, NULL);
+	range_tree_t *segs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
 	for (;;) {
-		range_seg_t *rs = range_tree_first(svr->svr_allocd_segs);
+		range_tree_t *rt = svr->svr_allocd_segs;
+		range_seg_t *rs = range_tree_first(rt);
 
 		if (rs == NULL)
 			break;
@@ -1277,17 +1286,17 @@
 
 		if (range_tree_is_empty(segs)) {
 			/* need to truncate the first seg based on max_alloc */
-			seg_length =
-			    MIN(rs->rs_end - rs->rs_start, *max_alloc);
+			seg_length = MIN(rs_get_end(rs, rt) - rs_get_start(rs,
+			    rt), *max_alloc);
 		} else {
-			if (rs->rs_start - range_tree_max(segs) >
+			if (rs_get_start(rs, rt) - range_tree_max(segs) >
 			    vdev_removal_max_span) {
 				/*
 				 * Including this segment would cause us to
 				 * copy a larger unneeded chunk than is allowed.
 				 */
 				break;
-			} else if (rs->rs_end - range_tree_min(segs) >
+			} else if (rs_get_end(rs, rt) - range_tree_min(segs) >
 			    *max_alloc) {
 				/*
 				 * This additional segment would extend past
@@ -1296,13 +1305,14 @@
 				 */
 				break;
 			} else {
-				seg_length = rs->rs_end - rs->rs_start;
+				seg_length = rs_get_end(rs, rt) -
+				    rs_get_start(rs, rt);
 			}
 		}
 
-		range_tree_add(segs, rs->rs_start, seg_length);
+		range_tree_add(segs, rs_get_start(rs, rt), seg_length);
 		range_tree_remove(svr->svr_allocd_segs,
-		    rs->rs_start, seg_length);
+		    rs_get_start(rs, rt), seg_length);
 	}
 
 	if (range_tree_is_empty(segs)) {
@@ -1313,7 +1323,7 @@
 
 	if (svr->svr_max_offset_to_sync[txg & TXG_MASK] == 0) {
 		dsl_sync_task_nowait(dmu_tx_pool(tx), vdev_mapping_sync,
-		    svr, 0, ZFS_SPACE_CHECK_NONE, tx);
+		    svr, tx);
 	}
 
 	svr->svr_max_offset_to_sync[txg & TXG_MASK] = range_tree_max(segs);
@@ -1462,6 +1472,10 @@
 			VERIFY0(space_map_load(msp->ms_sm,
 			    svr->svr_allocd_segs, SM_ALLOC));
 
+			range_tree_walk(msp->ms_unflushed_allocs,
+			    range_tree_add, svr->svr_allocd_segs);
+			range_tree_walk(msp->ms_unflushed_frees,
+			    range_tree_remove, svr->svr_allocd_segs);
 			range_tree_walk(msp->ms_freeing,
 			    range_tree_remove, svr->svr_allocd_segs);
 
@@ -1477,8 +1491,9 @@
 
 		vca.vca_msp = msp;
 		zfs_dbgmsg("copying %llu segments for metaslab %llu",
-		    avl_numnodes(&svr->svr_allocd_segs->rt_root),
-		    msp->ms_id);
+		    (u_longlong_t)zfs_btree_numnodes(
+		    &svr->svr_allocd_segs->rt_root),
+		    (u_longlong_t)msp->ms_id);
 
 		while (!svr->svr_thread_exit &&
 		    !range_tree_is_empty(svr->svr_allocd_segs)) {
@@ -1501,10 +1516,6 @@
 			 * specified by zfs_removal_suspend_progress. We do this
 			 * solely from the test suite or during debugging.
 			 */
-			uint64_t bytes_copied =
-			    spa->spa_removing_phys.sr_copied;
-			for (int i = 0; i < TXG_SIZE; i++)
-				bytes_copied += svr->svr_bytes_done[i];
 			while (zfs_removal_suspend_progress &&
 			    !svr->svr_thread_exit)
 				delay(hz);
@@ -1579,14 +1590,16 @@
 		    vca.vca_write_error_bytes > 0)) {
 			zfs_dbgmsg("canceling removal due to IO errors: "
 			    "[read_error_bytes=%llu] [write_error_bytes=%llu]",
-			    vca.vca_read_error_bytes,
-			    vca.vca_write_error_bytes);
+			    (u_longlong_t)vca.vca_read_error_bytes,
+			    (u_longlong_t)vca.vca_write_error_bytes);
 			spa_vdev_remove_cancel_impl(spa);
 		}
 	} else {
 		ASSERT0(range_tree_space(svr->svr_allocd_segs));
 		vdev_remove_complete(spa);
 	}
+
+	thread_exit();
 }
 
 void
@@ -1605,10 +1618,10 @@
 	mutex_exit(&svr->svr_lock);
 }
 
-/* ARGSUSED */
 static int
 spa_vdev_remove_cancel_check(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	if (spa->spa_vdev_removal == NULL)
@@ -1620,10 +1633,10 @@
  * Cancel a removal by freeing all entries from the partial mapping
  * and marking the vdev as no longer being removing.
  */
-/* ARGSUSED */
 static void
 spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
 {
+	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	spa_vdev_removal_t *svr = spa->spa_vdev_removal;
 	vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
@@ -1686,6 +1699,11 @@
 			mutex_enter(&svr->svr_lock);
 			VERIFY0(space_map_load(msp->ms_sm,
 			    svr->svr_allocd_segs, SM_ALLOC));
+
+			range_tree_walk(msp->ms_unflushed_allocs,
+			    range_tree_add, svr->svr_allocd_segs);
+			range_tree_walk(msp->ms_unflushed_frees,
+			    range_tree_remove, svr->svr_allocd_segs);
 			range_tree_walk(msp->ms_freeing,
 			    range_tree_remove, svr->svr_allocd_segs);
 
@@ -1745,10 +1763,11 @@
 	vdev_config_dirty(vd);
 
 	zfs_dbgmsg("canceled device removal for vdev %llu in %llu",
-	    vd->vdev_id, dmu_tx_get_txg(tx));
+	    (u_longlong_t)vd->vdev_id, (u_longlong_t)dmu_tx_get_txg(tx));
 	spa_history_log_internal(spa, "vdev remove canceled", tx,
 	    "%s vdev %llu %s", spa_name(spa),
-	    vd->vdev_id, (vd->vdev_path != NULL) ? vd->vdev_path : "-");
+	    (u_longlong_t)vd->vdev_id,
+	    (vd->vdev_path != NULL) ? vd->vdev_path : "-");
 }
 
 static int
@@ -1764,6 +1783,8 @@
 		spa_config_enter(spa, SCL_ALLOC | SCL_VDEV, FTAG, RW_WRITER);
 		vdev_t *vd = vdev_lookup_top(spa, vdid);
 		metaslab_group_activate(vd->vdev_mg);
+		ASSERT(!vd->vdev_islog);
+		metaslab_group_activate(vd->vdev_log_mg);
 		spa_config_exit(spa, SCL_ALLOC | SCL_VDEV, FTAG);
 	}
 
@@ -1814,19 +1835,14 @@
 	uint64_t id = vd->vdev_id;
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
-	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	vdev_free(vd);
 
-	if (last_vdev) {
-		vdev_compact_children(rvd);
-	} else {
-		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
-		vdev_add_child(rvd, vd);
-	}
+	vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
+	vdev_add_child(rvd, vd);
 	vdev_config_dirty(rvd);
 
 	/*
@@ -1847,6 +1863,7 @@
 
 	ASSERT(vd->vdev_islog);
 	ASSERT(vd == vd->vdev_top);
+	ASSERT3P(vd->vdev_log_mg, ==, NULL);
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	/*
@@ -1882,6 +1899,7 @@
 
 	if (error != 0) {
 		metaslab_group_activate(mg);
+		ASSERT3P(vd->vdev_log_mg, ==, NULL);
 		return (error);
 	}
 	ASSERT0(vd->vdev_stat.vs_alloc);
@@ -1895,6 +1913,26 @@
 	vdev_dirty_leaves(vd, VDD_DTL, *txg);
 	vdev_config_dirty(vd);
 
+	/*
+	 * When the log space map feature is enabled we look at
+	 * the vdev's top_zap to find the on-disk flush data of
+	 * the metaslab we just flushed. Thus, while removing a
+	 * log vdev we make sure to call vdev_metaslab_fini()
+	 * first, which removes all metaslabs of this vdev from
+	 * spa_metaslabs_by_flushed before vdev_remove_empty()
+	 * destroys the top_zap of this log vdev.
+	 *
+	 * This avoids the scenario where we flush a metaslab
+	 * from the log vdev being removed that doesn't have a
+	 * top_zap and end up failing to lookup its on-disk flush
+	 * data.
+	 *
+	 * We don't call metaslab_group_destroy() right away
+	 * though (it will be called in vdev_free() later) as
+	 * during metaslab_sync() of metaslabs from other vdevs
+	 * we may touch the metaslab group of this vdev through
+	 * metaslab_class_histogram_verify()
+	 */
 	vdev_metaslab_fini(vd);
 
 	spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
@@ -1944,32 +1982,38 @@
 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REMOVAL))
 		return (SET_ERROR(ENOTSUP));
 
-	/* available space in the pool's normal class */
-	uint64_t available = dsl_dir_space_available(
-	    spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE);
 
 	metaslab_class_t *mc = vd->vdev_mg->mg_class;
-
-	/*
-	 * When removing a vdev from an allocation class that has
-	 * remaining vdevs, include available space from the class.
-	 */
-	if (mc != spa_normal_class(spa) && mc->mc_groups > 1) {
-		uint64_t class_avail = metaslab_class_get_space(mc) -
-		    metaslab_class_get_alloc(mc);
-
-		/* add class space, adjusted for overhead */
-		available += (class_avail * 94) / 100;
-	}
-
-	/*
-	 * There has to be enough free space to remove the
-	 * device and leave double the "slop" space (i.e. we
-	 * must leave at least 3% of the pool free, in addition to
-	 * the normal slop space).
-	 */
-	if (available < vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) {
-		return (SET_ERROR(ENOSPC));
+	metaslab_class_t *normal = spa_normal_class(spa);
+	if (mc != normal) {
+		/*
+		 * Space allocated from the special (or dedup) class is
+		 * included in the DMU's space usage, but it's not included
+		 * in spa_dspace (or dsl_pool_adjustedsize()).  Therefore
+		 * there is always at least as much free space in the normal
+		 * class, as is allocated from the special (and dedup) class.
+		 * As a backup check, we will return ENOSPC if this is
+		 * violated. See also spa_update_dspace().
+		 */
+		uint64_t available = metaslab_class_get_space(normal) -
+		    metaslab_class_get_alloc(normal);
+		ASSERT3U(available, >=, vd->vdev_stat.vs_alloc);
+		if (available < vd->vdev_stat.vs_alloc)
+			return (SET_ERROR(ENOSPC));
+	} else {
+		/* available space in the pool's normal class */
+		uint64_t available = dsl_dir_space_available(
+		    spa->spa_dsl_pool->dp_root_dir, NULL, 0, B_TRUE);
+		if (available <
+		    vd->vdev_stat.vs_dspace + spa_get_slop_space(spa)) {
+			/*
+			 * This is a normal device. There has to be enough free
+			 * space to remove the device and leave double the
+			 * "slop" space (i.e. we must leave at least 3% of the
+			 * pool free, in addition to the normal slop space).
+			 */
+			return (SET_ERROR(ENOSPC));
+		}
 	}
 
 	/*
@@ -1999,20 +2043,37 @@
 	}
 
 	/*
+	 * A removed special/dedup vdev must have same ashift as normal class.
+	 */
+	ASSERT(!vd->vdev_islog);
+	if (vd->vdev_alloc_bias != VDEV_BIAS_NONE &&
+	    vd->vdev_ashift != spa->spa_max_ashift) {
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
 	 * All vdevs in normal class must have the same ashift
-	 * and not be raidz.
+	 * and not be raidz or draid.
 	 */
 	vdev_t *rvd = spa->spa_root_vdev;
-	int num_indirect = 0;
 	for (uint64_t id = 0; id < rvd->vdev_children; id++) {
 		vdev_t *cvd = rvd->vdev_child[id];
-		if (cvd->vdev_ashift != 0 && !cvd->vdev_islog)
+
+		/*
+		 * A removed special/dedup vdev must have the same ashift
+		 * across all vdevs in its class.
+		 */
+		if (vd->vdev_alloc_bias != VDEV_BIAS_NONE &&
+		    cvd->vdev_alloc_bias == vd->vdev_alloc_bias &&
+		    cvd->vdev_ashift != vd->vdev_ashift) {
+			return (SET_ERROR(EINVAL));
+		}
+		if (cvd->vdev_ashift != 0 &&
+		    cvd->vdev_alloc_bias == VDEV_BIAS_NONE)
 			ASSERT3U(cvd->vdev_ashift, ==, spa->spa_max_ashift);
-		if (cvd->vdev_ops == &vdev_indirect_ops)
-			num_indirect++;
 		if (!vdev_is_concrete(cvd))
 			continue;
-		if (cvd->vdev_ops == &vdev_raidz_ops)
+		if (vdev_get_nparity(cvd) != 0)
 			return (SET_ERROR(EINVAL));
 		/*
 		 * Need the mirror to be mirror of leaf vdevs only
@@ -2063,6 +2124,8 @@
 	 */
 	metaslab_group_t *mg = vd->vdev_mg;
 	metaslab_group_passivate(mg);
+	ASSERT(!vd->vdev_islog);
+	metaslab_group_passivate(vd->vdev_log_mg);
 
 	/*
 	 * Wait for the youngest allocations and frees to sync,
@@ -2099,6 +2162,8 @@
 
 	if (error != 0) {
 		metaslab_group_activate(mg);
+		ASSERT(!vd->vdev_islog);
+		metaslab_group_activate(vd->vdev_log_mg);
 		spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART);
 		spa_async_request(spa, SPA_ASYNC_TRIM_RESTART);
 		spa_async_request(spa, SPA_ASYNC_AUTOTRIM_RESTART);
@@ -2111,8 +2176,7 @@
 	vdev_config_dirty(vd);
 	dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, *txg);
 	dsl_sync_task_nowait(spa->spa_dsl_pool,
-	    vdev_remove_initiate_sync,
-	    (void *)(uintptr_t)vd->vdev_id, 0, ZFS_SPACE_CHECK_NONE, tx);
+	    vdev_remove_initiate_sync, (void *)(uintptr_t)vd->vdev_id, tx);
 	dmu_tx_commit(tx);
 
 	return (0);
@@ -2166,18 +2230,30 @@
 		 * in this pool.
 		 */
 		if (vd == NULL || unspare) {
-			if (vd == NULL)
-				vd = spa_lookup_by_guid(spa, guid, B_TRUE);
-			ev = spa_event_create(spa, vd, NULL,
-			    ESC_ZFS_VDEV_REMOVE_AUX);
+			char *type;
+			boolean_t draid_spare = B_FALSE;
 
-			vd_type = VDEV_TYPE_SPARE;
-			vd_path = spa_strdup(fnvlist_lookup_string(
-			    nv, ZPOOL_CONFIG_PATH));
-			spa_vdev_remove_aux(spa->spa_spares.sav_config,
-			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
-			spa_load_spares(spa);
-			spa->spa_spares.sav_sync = B_TRUE;
+			if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type)
+			    == 0 && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)
+				draid_spare = B_TRUE;
+
+			if (vd == NULL && draid_spare) {
+				error = SET_ERROR(ENOTSUP);
+			} else {
+				if (vd == NULL)
+					vd = spa_lookup_by_guid(spa,
+					    guid, B_TRUE);
+				ev = spa_event_create(spa, vd, NULL,
+				    ESC_ZFS_VDEV_REMOVE_AUX);
+
+				vd_type = VDEV_TYPE_SPARE;
+				vd_path = spa_strdup(fnvlist_lookup_string(
+				    nv, ZPOOL_CONFIG_PATH));
+				spa_vdev_remove_aux(spa->spa_spares.sav_config,
+				    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
+				spa_load_spares(spa);
+				spa->spa_spares.sav_sync = B_TRUE;
+			}
 		} else {
 			error = SET_ERROR(EBUSY);
 		}
@@ -2192,6 +2268,20 @@
 		 * Cache devices can always be removed.
 		 */
 		vd = spa_lookup_by_guid(spa, guid, B_TRUE);
+
+		/*
+		 * Stop trimming the cache device. We need to release the
+		 * config lock to allow the syncing of TRIM transactions
+		 * without releasing the spa_namespace_lock. The same
+		 * strategy is employed in spa_vdev_remove_top().
+		 */
+		spa_vdev_config_exit(spa, NULL,
+		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
+		mutex_enter(&vd->vdev_trim_lock);
+		vdev_trim_stop(vd, VDEV_TRIM_CANCELED, NULL);
+		mutex_exit(&vd->vdev_trim_lock);
+		txg = spa_vdev_config_enter(spa);
+
 		ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_AUX);
 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
@@ -2268,22 +2358,17 @@
 	return (0);
 }
 
-#if defined(_KERNEL)
-module_param(zfs_removal_ignore_errors, int, 0644);
-MODULE_PARM_DESC(zfs_removal_ignore_errors,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_vdev, zfs_, removal_ignore_errors, INT, ZMOD_RW,
 	"Ignore hard IO errors when removing device");
 
-module_param(zfs_remove_max_segment, int, 0644);
-MODULE_PARM_DESC(zfs_remove_max_segment,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_, remove_max_segment, INT, ZMOD_RW,
 	"Largest contiguous segment to allocate when removing device");
 
-module_param(vdev_removal_max_span, int, 0644);
-MODULE_PARM_DESC(vdev_removal_max_span,
+ZFS_MODULE_PARAM(zfs_vdev, vdev_, removal_max_span, INT, ZMOD_RW,
 	"Largest span of free chunks a remap segment can span");
 
-/* BEGIN CSTYLED */
-module_param(zfs_removal_suspend_progress, int, 0644);
-MODULE_PARM_DESC(zfs_removal_suspend_progress,
+ZFS_MODULE_PARAM(zfs_vdev, zfs_, removal_suspend_progress, INT, ZMOD_RW,
 	"Pause device removal after this many bytes are copied "
 	"(debug use only - causes removal to hang)");
 /* END CSTYLED */
@@ -2297,4 +2382,3 @@
 EXPORT_SYMBOL(spa_vdev_remove_cancel);
 EXPORT_SYMBOL(spa_vdev_remove_suspend);
 EXPORT_SYMBOL(svr_sync);
-#endif

diff --git a/zfs/module/zfs/vdev_root.c b/zfs/module/zfs/vdev_root.c
index 7170f70..45ddc2f 100644
--- a/zfs/module/zfs/vdev_root.c
+++ b/zfs/module/zfs/vdev_root.c

@@ -82,7 +82,7 @@
 
 static int
 vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *ashift, uint64_t *pshift)
 {
 	spa_t *spa = vd->vdev_spa;
 	int lasterror = 0;
@@ -98,7 +98,8 @@
 	for (int c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 
-		if (cvd->vdev_open_error && !cvd->vdev_islog) {
+		if (cvd->vdev_open_error && !cvd->vdev_islog &&
+		    cvd->vdev_ops != &vdev_indirect_ops) {
 			lasterror = cvd->vdev_open_error;
 			numerrors++;
 		}
@@ -115,6 +116,7 @@
 	*asize = 0;
 	*max_asize = 0;
 	*ashift = 0;
+	*pshift = 0;
 
 	return (0);
 }
@@ -140,9 +142,13 @@
 }
 
 vdev_ops_t vdev_root_ops = {
+	.vdev_op_init = NULL,
+	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_root_open,
 	.vdev_op_close = vdev_root_close,
 	.vdev_op_asize = vdev_default_asize,
+	.vdev_op_min_asize = vdev_default_min_asize,
+	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = NULL,	/* not applicable to the root */
 	.vdev_op_io_done = NULL,	/* not applicable to the root */
 	.vdev_op_state_change = vdev_root_state_change,
@@ -151,6 +157,11 @@
 	.vdev_op_rele = NULL,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = NULL,
+	.vdev_op_rebuild_asize = NULL,
+	.vdev_op_metaslab_init = NULL,
+	.vdev_op_config_generate = NULL,
+	.vdev_op_nparity = NULL,
+	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_ROOT,	/* name of this vdev type */
 	.vdev_op_leaf = B_FALSE		/* not a leaf vdev */
 };

diff --git a/zfs/module/zfs/vdev_trim.c b/zfs/module/zfs/vdev_trim.c
index b7548fc..c0ce2ac 100644
--- a/zfs/module/zfs/vdev_trim.c
+++ b/zfs/module/zfs/vdev_trim.c

@@ -22,6 +22,8 @@
 /*
  * Copyright (c) 2016 by Delphix. All rights reserved.
  * Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
+ * Copyright 2023 RackTop Systems, Inc.
  */
 
 #include <sys/spa.h>
@@ -29,11 +31,11 @@
 #include <sys/txg.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_trim.h>
-#include <sys/refcount.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_synctask.h>
 #include <sys/zap.h>
 #include <sys/dmu_tx.h>
+#include <sys/arc_impl.h>
 
 /*
  * TRIM is a feature which is used to notify a SSD that some previously
@@ -311,13 +313,14 @@
 			vd->vdev_trim_secure = secure;
 	}
 
-	boolean_t resumed = !!(vd->vdev_trim_state == VDEV_TRIM_SUSPENDED);
+	vdev_trim_state_t old_state = vd->vdev_trim_state;
+	boolean_t resumed = (old_state == VDEV_TRIM_SUSPENDED);
 	vd->vdev_trim_state = new_state;
 
 	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
 	dsl_sync_task_nowait(spa_get_dsl(spa), vdev_trim_zap_update_sync,
-	    guid, 2, ZFS_SPACE_CHECK_NONE, tx);
+	    guid, tx);
 
 	switch (new_state) {
 	case VDEV_TRIM_ACTIVE:
@@ -332,9 +335,12 @@
 		    "vdev=%s suspended", vd->vdev_path);
 		break;
 	case VDEV_TRIM_CANCELED:
-		spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
-		spa_history_log_internal(spa, "trim", tx,
-		    "vdev=%s canceled", vd->vdev_path);
+		if (old_state == VDEV_TRIM_ACTIVE ||
+		    old_state == VDEV_TRIM_SUSPENDED) {
+			spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_CANCEL);
+			spa_history_log_internal(spa, "trim", tx,
+			    "vdev=%s canceled", vd->vdev_path);
+		}
 		break;
 	case VDEV_TRIM_COMPLETE:
 		spa_event_notify(spa, vd, NULL, ESC_ZFS_TRIM_FINISH);
@@ -346,6 +352,9 @@
 	}
 
 	dmu_tx_commit(tx);
+
+	if (new_state != VDEV_TRIM_ACTIVE)
+		spa_notify_waiters(spa);
 }
 
 /*
@@ -420,6 +429,35 @@
 }
 
 /*
+ * The zio_done_func_t done callback for each TRIM issued via
+ * vdev_trim_simple(). It is responsible for updating the TRIM stats and
+ * limiting the number of in flight TRIM I/Os.  Simple TRIM I/Os are best
+ * effort and are never reissued on failure.
+ */
+static void
+vdev_trim_simple_cb(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+
+	mutex_enter(&vd->vdev_trim_io_lock);
+
+	if (zio->io_error != 0) {
+		vd->vdev_stat.vs_trim_errors++;
+		spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+		    0, 0, 0, 0, 1, zio->io_orig_size);
+	} else {
+		spa_iostats_trim_add(vd->vdev_spa, TRIM_TYPE_SIMPLE,
+		    1, zio->io_orig_size, 0, 0, 0, 0);
+	}
+
+	ASSERT3U(vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE], >, 0);
+	vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE]--;
+	cv_broadcast(&vd->vdev_trim_io_cv);
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
+/*
  * Returns the average trim rate in bytes/sec for the ta->trim_vdev.
  */
 static uint64_t
@@ -438,6 +476,7 @@
 {
 	vdev_t *vd = ta->trim_vdev;
 	spa_t *spa = vd->vdev_spa;
+	void *cb;
 
 	mutex_enter(&vd->vdev_trim_io_lock);
 
@@ -448,7 +487,7 @@
 	if (ta->trim_type == TRIM_TYPE_MANUAL) {
 		while (vd->vdev_trim_rate != 0 && !vdev_trim_should_stop(vd) &&
 		    vdev_trim_calculate_rate(ta) > vd->vdev_trim_rate) {
-			cv_timedwait_sig(&vd->vdev_trim_io_cv,
+			cv_timedwait_idle(&vd->vdev_trim_io_cv,
 			    &vd->vdev_trim_io_lock, ddi_get_lbolt() +
 			    MSEC_TO_TICK(10));
 		}
@@ -456,8 +495,8 @@
 	ta->trim_bytes_done += size;
 
 	/* Limit in flight trimming I/Os */
-	while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] >=
-	    zfs_trim_queue_limit) {
+	while (vd->vdev_trim_inflight[0] + vd->vdev_trim_inflight[1] +
+	    vd->vdev_trim_inflight[2] >= zfs_trim_queue_limit) {
 		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
 	}
 	vd->vdev_trim_inflight[ta->trim_type]++;
@@ -477,8 +516,7 @@
 
 		/* This is the first write of this txg. */
 		dsl_sync_task_nowait(spa_get_dsl(spa),
-		    vdev_trim_zap_update_sync, guid, 2,
-		    ZFS_SPACE_CHECK_RESERVED, tx);
+		    vdev_trim_zap_update_sync, guid, tx);
 	}
 
 	/*
@@ -502,10 +540,17 @@
 	if (ta->trim_type == TRIM_TYPE_MANUAL)
 		vd->vdev_trim_offset[txg & TXG_MASK] = start + size;
 
+	if (ta->trim_type == TRIM_TYPE_MANUAL) {
+		cb = vdev_trim_cb;
+	} else if (ta->trim_type == TRIM_TYPE_AUTO) {
+		cb = vdev_autotrim_cb;
+	} else {
+		cb = vdev_trim_simple_cb;
+	}
+
 	zio_nowait(zio_trim(spa->spa_txg_zio[txg & TXG_MASK], vd,
-	    start, size, ta->trim_type == TRIM_TYPE_MANUAL ?
-	    vdev_trim_cb : vdev_autotrim_cb, NULL,
-	    ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL, ta->trim_flags));
+	    start, size, cb, NULL, ZIO_PRIORITY_TRIM, ZIO_FLAG_CANFAIL,
+	    ta->trim_flags));
 	/* vdev_trim_cb and vdev_autotrim_cb release SCL_STATE_ALL */
 
 	dmu_tx_commit(tx);
@@ -523,17 +568,20 @@
 vdev_trim_ranges(trim_args_t *ta)
 {
 	vdev_t *vd = ta->trim_vdev;
-	avl_tree_t *rt = &ta->trim_tree->rt_root;
+	zfs_btree_t *t = &ta->trim_tree->rt_root;
+	zfs_btree_index_t idx;
 	uint64_t extent_bytes_max = ta->trim_extent_bytes_max;
 	uint64_t extent_bytes_min = ta->trim_extent_bytes_min;
 	spa_t *spa = vd->vdev_spa;
+	int error = 0;
 
 	ta->trim_start_time = gethrtime();
 	ta->trim_bytes_done = 0;
 
-	for (range_seg_t *rs = avl_first(rt); rs != NULL;
-	    rs = AVL_NEXT(rt, rs)) {
-		uint64_t size = rs->rs_end - rs->rs_start;
+	for (range_seg_t *rs = zfs_btree_first(t, &idx); rs != NULL;
+	    rs = zfs_btree_next(t, &idx, &idx)) {
+		uint64_t size = rs_get_end(rs, ta->trim_tree) - rs_get_start(rs,
+		    ta->trim_tree);
 
 		if (extent_bytes_min && size < extent_bytes_min) {
 			spa_iostats_trim_add(spa, ta->trim_type,
@@ -545,19 +593,58 @@
 		uint64_t writes_required = ((size - 1) / extent_bytes_max) + 1;
 
 		for (uint64_t w = 0; w < writes_required; w++) {
-			int error;
-
 			error = vdev_trim_range(ta, VDEV_LABEL_START_SIZE +
-			    rs->rs_start + (w * extent_bytes_max),
-			    MIN(size - (w * extent_bytes_max),
-			    extent_bytes_max));
+			    rs_get_start(rs, ta->trim_tree) +
+			    (w *extent_bytes_max), MIN(size -
+			    (w * extent_bytes_max), extent_bytes_max));
 			if (error != 0) {
-				return (error);
+				goto done;
 			}
 		}
 	}
 
-	return (0);
+done:
+	/*
+	 * Make sure all TRIMs for this metaslab have completed before
+	 * returning. TRIM zios have lower priority over regular or syncing
+	 * zios, so all TRIM zios for this metaslab must complete before the
+	 * metaslab is re-enabled. Otherwise it's possible write zios to
+	 * this metaslab could cut ahead of still queued TRIM zios for this
+	 * metaslab causing corruption if the ranges overlap.
+	 */
+	mutex_enter(&vd->vdev_trim_io_lock);
+	while (vd->vdev_trim_inflight[0] > 0) {
+		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+	}
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	return (error);
+}
+
+static void
+vdev_trim_xlate_last_rs_end(void *arg, range_seg64_t *physical_rs)
+{
+	uint64_t *last_rs_end = (uint64_t *)arg;
+
+	if (physical_rs->rs_end > *last_rs_end)
+		*last_rs_end = physical_rs->rs_end;
+}
+
+static void
+vdev_trim_xlate_progress(void *arg, range_seg64_t *physical_rs)
+{
+	vdev_t *vd = (vdev_t *)arg;
+
+	uint64_t size = physical_rs->rs_end - physical_rs->rs_start;
+	vd->vdev_trim_bytes_est += size;
+
+	if (vd->vdev_trim_last_offset >= physical_rs->rs_end) {
+		vd->vdev_trim_bytes_done += size;
+	} else if (vd->vdev_trim_last_offset > physical_rs->rs_start &&
+	    vd->vdev_trim_last_offset <= physical_rs->rs_end) {
+		vd->vdev_trim_bytes_done +=
+		    vd->vdev_trim_last_offset - physical_rs->rs_start;
+	}
 }
 
 /*
@@ -577,27 +664,35 @@
 		metaslab_t *msp = vd->vdev_top->vdev_ms[i];
 		mutex_enter(&msp->ms_lock);
 
-		uint64_t ms_free = msp->ms_size -
-		    metaslab_allocated_space(msp);
-
-		if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
-			ms_free /= vd->vdev_top->vdev_children;
+		uint64_t ms_free = (msp->ms_size -
+		    metaslab_allocated_space(msp)) /
+		    vdev_get_ndisks(vd->vdev_top);
 
 		/*
 		 * Convert the metaslab range to a physical range
 		 * on our vdev. We use this to determine if we are
 		 * in the middle of this metaslab range.
 		 */
-		range_seg_t logical_rs, physical_rs;
+		range_seg64_t logical_rs, physical_rs, remain_rs;
 		logical_rs.rs_start = msp->ms_start;
 		logical_rs.rs_end = msp->ms_start + msp->ms_size;
-		vdev_xlate(vd, &logical_rs, &physical_rs);
 
+		/* Metaslab space after this offset has not been trimmed. */
+		vdev_xlate(vd, &logical_rs, &physical_rs, &remain_rs);
 		if (vd->vdev_trim_last_offset <= physical_rs.rs_start) {
 			vd->vdev_trim_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
 			continue;
-		} else if (vd->vdev_trim_last_offset > physical_rs.rs_end) {
+		}
+
+		/* Metaslab space before this offset has been trimmed */
+		uint64_t last_rs_end = physical_rs.rs_end;
+		if (!vdev_xlate_is_empty(&remain_rs)) {
+			vdev_xlate_walk(vd, &remain_rs,
+			    vdev_trim_xlate_last_rs_end, &last_rs_end);
+		}
+
+		if (vd->vdev_trim_last_offset > last_rs_end) {
 			vd->vdev_trim_bytes_done += ms_free;
 			vd->vdev_trim_bytes_est += ms_free;
 			mutex_exit(&msp->ms_lock);
@@ -611,25 +706,16 @@
 		 */
 		VERIFY0(metaslab_load(msp));
 
-		for (range_seg_t *rs = avl_first(&msp->ms_allocatable->rt_root);
-		    rs; rs = AVL_NEXT(&msp->ms_allocatable->rt_root, rs)) {
-			logical_rs.rs_start = rs->rs_start;
-			logical_rs.rs_end = rs->rs_end;
-			vdev_xlate(vd, &logical_rs, &physical_rs);
+		range_tree_t *rt = msp->ms_allocatable;
+		zfs_btree_t *bt = &rt->rt_root;
+		zfs_btree_index_t idx;
+		for (range_seg_t *rs = zfs_btree_first(bt, &idx);
+		    rs != NULL; rs = zfs_btree_next(bt, &idx, &idx)) {
+			logical_rs.rs_start = rs_get_start(rs, rt);
+			logical_rs.rs_end = rs_get_end(rs, rt);
 
-			uint64_t size = physical_rs.rs_end -
-			    physical_rs.rs_start;
-			vd->vdev_trim_bytes_est += size;
-			if (vd->vdev_trim_last_offset >= physical_rs.rs_end) {
-				vd->vdev_trim_bytes_done += size;
-			} else if (vd->vdev_trim_last_offset >
-			    physical_rs.rs_start &&
-			    vd->vdev_trim_last_offset <=
-			    physical_rs.rs_end) {
-				vd->vdev_trim_bytes_done +=
-				    vd->vdev_trim_last_offset -
-				    physical_rs.rs_start;
-			}
+			vdev_xlate_walk(vd, &logical_rs,
+			    vdev_trim_xlate_progress, vd);
 		}
 		mutex_exit(&msp->ms_lock);
 	}
@@ -697,8 +783,38 @@
 	return (err);
 }
 
+static void
+vdev_trim_xlate_range_add(void *arg, range_seg64_t *physical_rs)
+{
+	trim_args_t *ta = arg;
+	vdev_t *vd = ta->trim_vdev;
+
+	/*
+	 * Only a manual trim will be traversing the vdev sequentially.
+	 * For an auto trim all valid ranges should be added.
+	 */
+	if (ta->trim_type == TRIM_TYPE_MANUAL) {
+
+		/* Only add segments that we have not visited yet */
+		if (physical_rs->rs_end <= vd->vdev_trim_last_offset)
+			return;
+
+		/* Pick up where we left off mid-range. */
+		if (vd->vdev_trim_last_offset > physical_rs->rs_start) {
+			ASSERT3U(physical_rs->rs_end, >,
+			    vd->vdev_trim_last_offset);
+			physical_rs->rs_start = vd->vdev_trim_last_offset;
+		}
+	}
+
+	ASSERT3U(physical_rs->rs_end, >, physical_rs->rs_start);
+
+	range_tree_add(ta->trim_tree, physical_rs->rs_start,
+	    physical_rs->rs_end - physical_rs->rs_start);
+}
+
 /*
- * Convert the logical range into a physical range and add it to the
+ * Convert the logical range into physical ranges and add them to the
  * range tree passed in the trim_args_t.
  */
 static void
@@ -706,7 +822,7 @@
 {
 	trim_args_t *ta = arg;
 	vdev_t *vd = ta->trim_vdev;
-	range_seg_t logical_rs, physical_rs;
+	range_seg64_t logical_rs;
 	logical_rs.rs_start = start;
 	logical_rs.rs_end = start + size;
 
@@ -719,48 +835,11 @@
 		metaslab_t *msp = ta->trim_msp;
 		VERIFY0(metaslab_load(msp));
 		VERIFY3B(msp->ms_loaded, ==, B_TRUE);
-		VERIFY(range_tree_find(msp->ms_allocatable, start, size));
+		VERIFY(range_tree_contains(msp->ms_allocatable, start, size));
 	}
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
-	vdev_xlate(vd, &logical_rs, &physical_rs);
-
-	IMPLY(vd->vdev_top == vd,
-	    logical_rs.rs_start == physical_rs.rs_start);
-	IMPLY(vd->vdev_top == vd,
-	    logical_rs.rs_end == physical_rs.rs_end);
-
-	/*
-	 * Only a manual trim will be traversing the vdev sequentially.
-	 * For an auto trim all valid ranges should be added.
-	 */
-	if (ta->trim_type == TRIM_TYPE_MANUAL) {
-
-		/* Only add segments that we have not visited yet */
-		if (physical_rs.rs_end <= vd->vdev_trim_last_offset)
-			return;
-
-		/* Pick up where we left off mid-range. */
-		if (vd->vdev_trim_last_offset > physical_rs.rs_start) {
-			ASSERT3U(physical_rs.rs_end, >,
-			    vd->vdev_trim_last_offset);
-			physical_rs.rs_start = vd->vdev_trim_last_offset;
-		}
-	}
-
-	ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
-
-	/*
-	 * With raidz, it's possible that the logical range does not live on
-	 * this leaf vdev. We only add the physical range to this vdev's if it
-	 * has a length greater than 0.
-	 */
-	if (physical_rs.rs_end > physical_rs.rs_start) {
-		range_tree_add(ta->trim_tree, physical_rs.rs_start,
-		    physical_rs.rs_end - physical_rs.rs_start);
-	} else {
-		ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
-	}
+	vdev_xlate_walk(vd, &logical_rs, vdev_trim_xlate_range_add, arg);
 }
 
 /*
@@ -798,7 +877,7 @@
 	ta.trim_vdev = vd;
 	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
 	ta.trim_extent_bytes_min = zfs_trim_extent_bytes_min;
-	ta.trim_tree = range_tree_create(NULL, NULL);
+	ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
 	ta.trim_type = TRIM_TYPE_MANUAL;
 	ta.trim_flags = 0;
 
@@ -837,7 +916,7 @@
 		 */
 		if (msp->ms_sm == NULL && vd->vdev_trim_partial) {
 			mutex_exit(&msp->ms_lock);
-			metaslab_enable(msp, B_FALSE);
+			metaslab_enable(msp, B_FALSE, B_FALSE);
 			spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 			vdev_trim_calculate_progress(vd);
 			continue;
@@ -849,7 +928,7 @@
 		mutex_exit(&msp->ms_lock);
 
 		error = vdev_trim_ranges(&ta);
-		metaslab_enable(msp, B_TRUE);
+		metaslab_enable(msp, B_TRUE, B_FALSE);
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 		range_tree_vacate(ta.trim_tree, NULL, NULL);
@@ -858,19 +937,20 @@
 	}
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
-	mutex_enter(&vd->vdev_trim_io_lock);
-	while (vd->vdev_trim_inflight[0] > 0) {
-		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
-	}
-	mutex_exit(&vd->vdev_trim_io_lock);
 
 	range_tree_destroy(ta.trim_tree);
 
 	mutex_enter(&vd->vdev_trim_lock);
-	if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
-		vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
-		    vd->vdev_trim_rate, vd->vdev_trim_partial,
-		    vd->vdev_trim_secure);
+	if (!vd->vdev_trim_exit_wanted) {
+		if (vdev_writeable(vd)) {
+			vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+			    vd->vdev_trim_rate, vd->vdev_trim_partial,
+			    vd->vdev_trim_secure);
+		} else if (vd->vdev_faulted) {
+			vdev_trim_change_state(vd, VDEV_TRIM_CANCELED,
+			    vd->vdev_trim_rate, vd->vdev_trim_partial,
+			    vd->vdev_trim_secure);
+		}
 	}
 	ASSERT(vd->vdev_trim_thread != NULL || vd->vdev_trim_inflight[0] == 0);
 
@@ -888,6 +968,8 @@
 	vd->vdev_trim_thread = NULL;
 	cv_broadcast(&vd->vdev_trim_cv);
 	mutex_exit(&vd->vdev_trim_lock);
+
+	thread_exit();
 }
 
 /*
@@ -931,6 +1013,7 @@
 void
 vdev_trim_stop_wait(spa_t *spa, list_t *vd_list)
 {
+	(void) spa;
 	vdev_t *vd;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -1006,6 +1089,7 @@
 {
 	spa_t *spa = vd->vdev_spa;
 	list_t vd_list;
+	vdev_t *vd_l2cache;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -1013,6 +1097,17 @@
 	    offsetof(vdev_t, vdev_trim_node));
 
 	vdev_trim_stop_all_impl(vd, tgt_state, &vd_list);
+
+	/*
+	 * Iterate over cache devices and request stop trimming the
+	 * whole device in case we export the pool or remove the cache
+	 * device prematurely.
+	 */
+	for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+		vd_l2cache = spa->spa_l2cache.sav_vdevs[i];
+		vdev_trim_stop_all_impl(vd_l2cache, tgt_state, &vd_list);
+	}
+
 	vdev_trim_stop_wait(spa, &vd_list);
 
 	if (vd->vdev_spa->spa_sync_on) {
@@ -1080,7 +1175,7 @@
 
 	VERIFY3B(msp->ms_loaded, ==, B_TRUE);
 	VERIFY3U(msp->ms_disabled, >, 0);
-	VERIFY(range_tree_find(msp->ms_allocatable, start, size) != NULL);
+	VERIFY(range_tree_contains(msp->ms_allocatable, start, size));
 }
 
 /*
@@ -1103,12 +1198,11 @@
 	mutex_exit(&vd->vdev_autotrim_lock);
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
-	uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
-	uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
-
 	while (!vdev_autotrim_should_stop(vd)) {
 		int txgs_per_trim = MAX(zfs_trim_txg_batch, 1);
 		boolean_t issued_trim = B_FALSE;
+		uint64_t extent_bytes_max = zfs_trim_extent_bytes_max;
+		uint64_t extent_bytes_min = zfs_trim_extent_bytes_min;
 
 		/*
 		 * All of the metaslabs are divided in to groups of size
@@ -1154,7 +1248,7 @@
 			if (msp->ms_sm == NULL ||
 			    range_tree_is_empty(msp->ms_trim)) {
 				mutex_exit(&msp->ms_lock);
-				metaslab_enable(msp, B_FALSE);
+				metaslab_enable(msp, B_FALSE, B_FALSE);
 				continue;
 			}
 
@@ -1170,7 +1264,7 @@
 			 */
 			if (msp->ms_disabled > 1) {
 				mutex_exit(&msp->ms_lock);
-				metaslab_enable(msp, B_FALSE);
+				metaslab_enable(msp, B_FALSE, B_FALSE);
 				continue;
 			}
 
@@ -1178,7 +1272,8 @@
 			 * Allocate an empty range tree which is swapped in
 			 * for the existing ms_trim tree while it is processed.
 			 */
-			trim_tree = range_tree_create(NULL, NULL);
+			trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL,
+			    0, 0);
 			range_tree_swap(&msp->ms_trim, &trim_tree);
 			ASSERT(range_tree_is_empty(msp->ms_trim));
 
@@ -1232,7 +1327,8 @@
 				if (!cvd->vdev_ops->vdev_op_leaf)
 					continue;
 
-				ta->trim_tree = range_tree_create(NULL, NULL);
+				ta->trim_tree = range_tree_create(NULL,
+				    RANGE_SEG64, NULL, 0, 0);
 				range_tree_walk(trim_tree,
 				    vdev_trim_range_add, ta);
 			}
@@ -1288,7 +1384,7 @@
 			range_tree_vacate(trim_tree, NULL, NULL);
 			range_tree_destroy(trim_tree);
 
-			metaslab_enable(msp, issued_trim);
+			metaslab_enable(msp, issued_trim, B_FALSE);
 			spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 			for (uint64_t c = 0; c < children; c++) {
@@ -1350,6 +1446,8 @@
 	vd->vdev_autotrim_thread = NULL;
 	cv_broadcast(&vd->vdev_autotrim_cv);
 	mutex_exit(&vd->vdev_autotrim_lock);
+
+	thread_exit();
 }
 
 /*
@@ -1425,7 +1523,189 @@
 		vdev_autotrim(spa);
 }
 
-#if defined(_KERNEL)
+static void
+vdev_trim_l2arc_thread(void *arg)
+{
+	vdev_t		*vd = arg;
+	spa_t		*spa = vd->vdev_spa;
+	l2arc_dev_t	*dev = l2arc_vdev_get(vd);
+	trim_args_t	ta;
+	range_seg64_t 	physical_rs;
+
+	ASSERT(vdev_is_concrete(vd));
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+	vd->vdev_trim_last_offset = 0;
+	vd->vdev_trim_rate = 0;
+	vd->vdev_trim_partial = 0;
+	vd->vdev_trim_secure = 0;
+
+	bzero(&ta, sizeof (ta));
+	ta.trim_vdev = vd;
+	ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+	ta.trim_type = TRIM_TYPE_MANUAL;
+	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+	ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+	ta.trim_flags = 0;
+
+	physical_rs.rs_start = vd->vdev_trim_bytes_done = 0;
+	physical_rs.rs_end = vd->vdev_trim_bytes_est =
+	    vdev_get_min_asize(vd);
+
+	range_tree_add(ta.trim_tree, physical_rs.rs_start,
+	    physical_rs.rs_end - physical_rs.rs_start);
+
+	mutex_enter(&vd->vdev_trim_lock);
+	vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+	mutex_exit(&vd->vdev_trim_lock);
+
+	(void) vdev_trim_ranges(&ta);
+
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
+	mutex_enter(&vd->vdev_trim_io_lock);
+	while (vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] > 0) {
+		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+	}
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	range_tree_vacate(ta.trim_tree, NULL, NULL);
+	range_tree_destroy(ta.trim_tree);
+
+	mutex_enter(&vd->vdev_trim_lock);
+	if (!vd->vdev_trim_exit_wanted && vdev_writeable(vd)) {
+		vdev_trim_change_state(vd, VDEV_TRIM_COMPLETE,
+		    vd->vdev_trim_rate, vd->vdev_trim_partial,
+		    vd->vdev_trim_secure);
+	}
+	ASSERT(vd->vdev_trim_thread != NULL ||
+	    vd->vdev_trim_inflight[TRIM_TYPE_MANUAL] == 0);
+
+	/*
+	 * Drop the vdev_trim_lock while we sync out the txg since it's
+	 * possible that a device might be trying to come online and
+	 * must check to see if it needs to restart a trim. That thread
+	 * will be holding the spa_config_lock which would prevent the
+	 * txg_wait_synced from completing. Same strategy as in
+	 * vdev_trim_thread().
+	 */
+	mutex_exit(&vd->vdev_trim_lock);
+	txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
+	mutex_enter(&vd->vdev_trim_lock);
+
+	/*
+	 * Update the header of the cache device here, before
+	 * broadcasting vdev_trim_cv which may lead to the removal
+	 * of the device. The same applies for setting l2ad_trim_all to
+	 * false.
+	 */
+	spa_config_enter(vd->vdev_spa, SCL_L2ARC, vd,
+	    RW_READER);
+	bzero(dev->l2ad_dev_hdr, dev->l2ad_dev_hdr_asize);
+	l2arc_dev_hdr_update(dev);
+	spa_config_exit(vd->vdev_spa, SCL_L2ARC, vd);
+
+	vd->vdev_trim_thread = NULL;
+	if (vd->vdev_trim_state == VDEV_TRIM_COMPLETE)
+		dev->l2ad_trim_all = B_FALSE;
+
+	cv_broadcast(&vd->vdev_trim_cv);
+	mutex_exit(&vd->vdev_trim_lock);
+
+	thread_exit();
+}
+
+/*
+ * Punches out TRIM threads for the L2ARC devices in a spa and assigns them
+ * to vd->vdev_trim_thread variable. This facilitates the management of
+ * trimming the whole cache device using TRIM_TYPE_MANUAL upon addition
+ * to a pool or pool creation or when the header of the device is invalid.
+ */
+void
+vdev_trim_l2arc(spa_t *spa)
+{
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+	/*
+	 * Locate the spa's l2arc devices and kick off TRIM threads.
+	 */
+	for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+		vdev_t *vd = spa->spa_l2cache.sav_vdevs[i];
+		l2arc_dev_t *dev = l2arc_vdev_get(vd);
+
+		if (dev == NULL || !dev->l2ad_trim_all) {
+			/*
+			 * Don't attempt TRIM if the vdev is UNAVAIL or if the
+			 * cache device was not marked for whole device TRIM
+			 * (ie l2arc_trim_ahead = 0, or the L2ARC device header
+			 * is valid with trim_state = VDEV_TRIM_COMPLETE and
+			 * l2ad_log_entries > 0).
+			 */
+			continue;
+		}
+
+		mutex_enter(&vd->vdev_trim_lock);
+		ASSERT(vd->vdev_ops->vdev_op_leaf);
+		ASSERT(vdev_is_concrete(vd));
+		ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+		ASSERT(!vd->vdev_detached);
+		ASSERT(!vd->vdev_trim_exit_wanted);
+		ASSERT(!vd->vdev_top->vdev_removing);
+		vdev_trim_change_state(vd, VDEV_TRIM_ACTIVE, 0, 0, 0);
+		vd->vdev_trim_thread = thread_create(NULL, 0,
+		    vdev_trim_l2arc_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+		mutex_exit(&vd->vdev_trim_lock);
+	}
+}
+
+/*
+ * A wrapper which calls vdev_trim_ranges(). It is intended to be called
+ * on leaf vdevs.
+ */
+int
+vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
+{
+	trim_args_t		ta;
+	range_seg64_t 		physical_rs;
+	int			error;
+	physical_rs.rs_start = start;
+	physical_rs.rs_end = start + size;
+
+	ASSERT(vdev_is_concrete(vd));
+	ASSERT(vd->vdev_ops->vdev_op_leaf);
+	ASSERT(!vd->vdev_detached);
+	ASSERT(!vd->vdev_top->vdev_removing);
+
+	bzero(&ta, sizeof (ta));
+	ta.trim_vdev = vd;
+	ta.trim_tree = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
+	ta.trim_type = TRIM_TYPE_SIMPLE;
+	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
+	ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
+	ta.trim_flags = 0;
+
+	ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
+
+	if (physical_rs.rs_end > physical_rs.rs_start) {
+		range_tree_add(ta.trim_tree, physical_rs.rs_start,
+		    physical_rs.rs_end - physical_rs.rs_start);
+	} else {
+		ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
+	}
+
+	error = vdev_trim_ranges(&ta);
+
+	mutex_enter(&vd->vdev_trim_io_lock);
+	while (vd->vdev_trim_inflight[TRIM_TYPE_SIMPLE] > 0) {
+		cv_wait(&vd->vdev_trim_io_cv, &vd->vdev_trim_io_lock);
+	}
+	mutex_exit(&vd->vdev_trim_io_lock);
+
+	range_tree_vacate(ta.trim_tree, NULL, NULL);
+	range_tree_destroy(ta.trim_tree);
+
+	return (error);
+}
+
 EXPORT_SYMBOL(vdev_trim);
 EXPORT_SYMBOL(vdev_trim_stop);
 EXPORT_SYMBOL(vdev_trim_stop_all);
@@ -1435,26 +1715,22 @@
 EXPORT_SYMBOL(vdev_autotrim_stop_all);
 EXPORT_SYMBOL(vdev_autotrim_stop_wait);
 EXPORT_SYMBOL(vdev_autotrim_restart);
+EXPORT_SYMBOL(vdev_trim_l2arc);
+EXPORT_SYMBOL(vdev_trim_simple);
 
 /* BEGIN CSTYLED */
-module_param(zfs_trim_extent_bytes_max, uint, 0644);
-MODULE_PARM_DESC(zfs_trim_extent_bytes_max,
+ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_max, UINT, ZMOD_RW,
     "Max size of TRIM commands, larger will be split");
 
-module_param(zfs_trim_extent_bytes_min, uint, 0644);
-MODULE_PARM_DESC(zfs_trim_extent_bytes_min,
+ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, extent_bytes_min, UINT, ZMOD_RW,
     "Min size of TRIM commands, smaller will be skipped");
 
-module_param(zfs_trim_metaslab_skip, uint, 0644);
-MODULE_PARM_DESC(zfs_trim_metaslab_skip,
+ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, metaslab_skip, UINT, ZMOD_RW,
     "Skip metaslabs which have never been initialized");
 
-module_param(zfs_trim_txg_batch, uint, 0644);
-MODULE_PARM_DESC(zfs_trim_txg_batch,
+ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, txg_batch, UINT, ZMOD_RW,
     "Min number of txgs to aggregate frees before issuing TRIM");
 
-module_param(zfs_trim_queue_limit, uint, 0644);
-MODULE_PARM_DESC(zfs_trim_queue_limit,
+ZFS_MODULE_PARAM(zfs_trim, zfs_trim_, queue_limit, UINT, ZMOD_RW,
     "Max queued TRIMs outstanding per leaf vdev");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/zap.c b/zfs/module/zfs/zap.c
index 0d20046..4cd1428 100644
--- a/zfs/module/zfs/zap.c
+++ b/zfs/module/zfs/zap.c

@@ -45,7 +45,6 @@
 #include <sys/zfs_znode.h>
 #include <sys/fs/zfs.h>
 #include <sys/zap.h>
-#include <sys/refcount.h>
 #include <sys/zap_impl.h>
 #include <sys/zap_leaf.h>
 
@@ -81,8 +80,6 @@
 
 int fzap_default_block_shift = 14; /* 16k blocksize */
 
-extern inline zap_phys_t *zap_f_phys(zap_t *zap);
-
 static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
 
 void
@@ -222,7 +219,8 @@
 	tbl->zt_blks_copied++;
 
 	dprintf("copied block %llu of %llu\n",
-	    tbl->zt_blks_copied, tbl->zt_numblks);
+	    (u_longlong_t)tbl->zt_blks_copied,
+	    (u_longlong_t)tbl->zt_numblks);
 
 	if (tbl->zt_blks_copied == tbl->zt_numblks) {
 		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
@@ -235,7 +233,7 @@
 		tbl->zt_blks_copied = 0;
 
 		dprintf("finished; numblocks now %llu (%uk entries)\n",
-		    tbl->zt_numblks, 1<<(tbl->zt_shift-10));
+		    (u_longlong_t)tbl->zt_numblks, 1<<(tbl->zt_shift-10));
 	}
 
 	return (0);
@@ -250,7 +248,8 @@
 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 	ASSERT(tbl->zt_blk != 0);
 
-	dprintf("storing %llx at index %llx\n", val, idx);
+	dprintf("storing %llx at index %llx\n", (u_longlong_t)val,
+	    (u_longlong_t)idx);
 
 	uint64_t blk = idx >> (bs-3);
 	uint64_t off = idx & ((1<<(bs-3))-1);
@@ -1379,11 +1378,7 @@
 	}
 }
 
-#if defined(_KERNEL)
 /* BEGIN CSTYLED */
-module_param(zap_iterate_prefetch, int, 0644);
-MODULE_PARM_DESC(zap_iterate_prefetch,
+ZFS_MODULE_PARAM(zfs, , zap_iterate_prefetch, INT, ZMOD_RW,
 	"When iterating ZAP object, prefetch it");
-
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/zap_leaf.c b/zfs/module/zfs/zap_leaf.c
index b421dd5..fc25344 100644
--- a/zfs/module/zfs/zap_leaf.c
+++ b/zfs/module/zfs/zap_leaf.c

@@ -52,8 +52,6 @@
 
 #define	LEAF_HASH_ENTPTR(l, h)	(&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
 
-extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
-
 static void
 zap_memset(void *a, int c, size_t n)
 {
@@ -467,7 +465,7 @@
 		}
 	}
 
-	return (bestcd == -1U ? ENOENT : 0);
+	return (bestcd == -1U ? SET_ERROR(ENOENT) : 0);
 }
 
 int
@@ -647,7 +645,7 @@
  * form of the name.  But all callers have one of these on hand anyway,
  * so might as well take advantage.  A cleaner but slower interface
  * would accept neither argument, and compute the normalized name as
- * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
+ * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))).
  */
 boolean_t
 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
@@ -668,7 +666,7 @@
 			continue;
 
 		if (zn == NULL) {
-			zn = zap_name_alloc(zap, name, MT_NORMALIZE);
+			zn = zap_name_alloc_str(zap, name, MT_NORMALIZE);
 			allocdzn = B_TRUE;
 		}
 		if (zap_leaf_array_match(zeh->zeh_leaf, zn,

diff --git a/zfs/module/zfs/zap_micro.c b/zfs/module/zfs/zap_micro.c
index 079c234..e3dadf1 100644
--- a/zfs/module/zfs/zap_micro.c
+++ b/zfs/module/zfs/zap_micro.c

@@ -31,10 +31,9 @@
 #include <sys/dmu.h>
 #include <sys/zfs_context.h>
 #include <sys/zap.h>
-#include <sys/refcount.h>
 #include <sys/zap_impl.h>
 #include <sys/zap_leaf.h>
-#include <sys/avl.h>
+#include <sys/btree.h>
 #include <sys/arc.h>
 #include <sys/dmu_objset.h>
 
@@ -42,8 +41,6 @@
 #include <sys/sunddi.h>
 #endif
 
-extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
-
 static int mzap_upgrade(zap_t **zapp,
     void *tag, dmu_tx_t *tx, zap_flags_t flags);
 
@@ -95,7 +92,7 @@
 			    wp++, i++) {
 				uint64_t word = *wp;
 
-				for (int j = 0; j < zn->zn_key_intlen; j++) {
+				for (int j = 0; j < 8; j++) {
 					h = (h >> 8) ^
 					    zfs_crc64_table[(h ^ word) & 0xFF];
 					word >>= NBBY;
@@ -165,18 +162,25 @@
 	}
 }
 
+static zap_name_t *
+zap_name_alloc(zap_t *zap)
+{
+	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+	zn->zn_zap = zap;
+	return (zn);
+}
+
 void
 zap_name_free(zap_name_t *zn)
 {
 	kmem_free(zn, sizeof (zap_name_t));
 }
 
-zap_name_t *
-zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
+static int
+zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
 {
-	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+	zap_t *zap = zn->zn_zap;
 
-	zn->zn_zap = zap;
 	zn->zn_key_intlen = sizeof (*key);
 	zn->zn_key_orig = key;
 	zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
@@ -197,17 +201,13 @@
 		 * what the hash is computed from.
 		 */
 		if (zap_normalize(zap, key, zn->zn_normbuf,
-		    zap->zap_normflags) != 0) {
-			zap_name_free(zn);
-			return (NULL);
-		}
+		    zap->zap_normflags) != 0)
+			return (SET_ERROR(ENOTSUP));
 		zn->zn_key_norm = zn->zn_normbuf;
 		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
 	} else {
-		if (mt != 0) {
-			zap_name_free(zn);
-			return (NULL);
-		}
+		if (mt != 0)
+			return (SET_ERROR(ENOTSUP));
 		zn->zn_key_norm = zn->zn_key_orig;
 		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
 	}
@@ -220,17 +220,26 @@
 		 * what the matching is based on.  (Not the hash!)
 		 */
 		if (zap_normalize(zap, key, zn->zn_normbuf,
-		    zn->zn_normflags) != 0) {
-			zap_name_free(zn);
-			return (NULL);
-		}
+		    zn->zn_normflags) != 0)
+			return (SET_ERROR(ENOTSUP));
 		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
 	}
 
-	return (zn);
+	return (0);
 }
 
 zap_name_t *
+zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
+{
+	zap_name_t *zn = zap_name_alloc(zap);
+	if (zap_name_init_str(zn, key, mt) != 0) {
+		zap_name_free(zn);
+		return (NULL);
+	}
+	return (zn);
+}
+
+static zap_name_t *
 zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
 {
 	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
@@ -280,45 +289,46 @@
 	const mzap_ent_t *mze1 = arg1;
 	const mzap_ent_t *mze2 = arg2;
 
-	int cmp = AVL_CMP(mze1->mze_hash, mze2->mze_hash);
-	if (likely(cmp))
-		return (cmp);
-
-	return (AVL_CMP(mze1->mze_cd, mze2->mze_cd));
+	return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd,
+	    (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
 }
 
 static void
-mze_insert(zap_t *zap, int chunkid, uint64_t hash)
+mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
 {
+	mzap_ent_t mze;
+
 	ASSERT(zap->zap_ismicro);
 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 
-	mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
-	mze->mze_chunkid = chunkid;
-	mze->mze_hash = hash;
-	mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
-	ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
-	avl_add(&zap->zap_m.zap_avl, mze);
+	mze.mze_chunkid = chunkid;
+	ASSERT0(hash & 0xffffffff);
+	mze.mze_hash = hash >> 32;
+	ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff);
+	mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd;
+	ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0);
+	zfs_btree_add(&zap->zap_m.zap_tree, &mze);
 }
 
 static mzap_ent_t *
-mze_find(zap_name_t *zn)
+mze_find(zap_name_t *zn, zfs_btree_index_t *idx)
 {
 	mzap_ent_t mze_tofind;
 	mzap_ent_t *mze;
-	avl_index_t idx;
-	avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
+	zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree;
 
 	ASSERT(zn->zn_zap->zap_ismicro);
 	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
 
-	mze_tofind.mze_hash = zn->zn_hash;
+	ASSERT0(zn->zn_hash & 0xffffffff);
+	mze_tofind.mze_hash = zn->zn_hash >> 32;
 	mze_tofind.mze_cd = 0;
 
-	mze = avl_find(avl, &mze_tofind, &idx);
+	mze = zfs_btree_find(tree, &mze_tofind, idx);
 	if (mze == NULL)
-		mze = avl_nearest(avl, idx, AVL_AFTER);
-	for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
+		mze = zfs_btree_next(tree, idx, idx);
+	for (; mze && mze->mze_hash == mze_tofind.mze_hash;
+	    mze = zfs_btree_next(tree, idx, idx)) {
 		ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
 		if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
 			return (mze);
@@ -331,18 +341,21 @@
 mze_find_unused_cd(zap_t *zap, uint64_t hash)
 {
 	mzap_ent_t mze_tofind;
-	avl_index_t idx;
-	avl_tree_t *avl = &zap->zap_m.zap_avl;
+	zfs_btree_index_t idx;
+	zfs_btree_t *tree = &zap->zap_m.zap_tree;
 
 	ASSERT(zap->zap_ismicro);
 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 
+	ASSERT0(hash & 0xffffffff);
+	hash >>= 32;
 	mze_tofind.mze_hash = hash;
 	mze_tofind.mze_cd = 0;
 
 	uint32_t cd = 0;
-	for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx);
-	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+	for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+	    mze && mze->mze_hash == hash;
+	    mze = zfs_btree_next(tree, &idx, &idx)) {
 		if (mze->mze_cd != cd)
 			break;
 		cd++;
@@ -367,16 +380,18 @@
 {
 	zap_t *zap = zn->zn_zap;
 	mzap_ent_t mze_tofind;
-	mzap_ent_t *mze;
-	avl_index_t idx;
-	avl_tree_t *avl = &zap->zap_m.zap_avl;
+	zfs_btree_index_t idx;
+	zfs_btree_t *tree = &zap->zap_m.zap_tree;
 	uint32_t mzap_ents = 0;
 
+	ASSERT0(hash & 0xffffffff);
+	hash >>= 32;
 	mze_tofind.mze_hash = hash;
 	mze_tofind.mze_cd = 0;
 
-	for (mze = avl_find(avl, &mze_tofind, &idx);
-	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+	for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+	    mze && mze->mze_hash == hash;
+	    mze = zfs_btree_next(tree, &idx, &idx)) {
 		mzap_ents++;
 	}
 
@@ -387,24 +402,10 @@
 }
 
 static void
-mze_remove(zap_t *zap, mzap_ent_t *mze)
-{
-	ASSERT(zap->zap_ismicro);
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
-	avl_remove(&zap->zap_m.zap_avl, mze);
-	kmem_free(mze, sizeof (mzap_ent_t));
-}
-
-static void
 mze_destroy(zap_t *zap)
 {
-	mzap_ent_t *mze;
-	void *avlcookie = NULL;
-
-	while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)))
-		kmem_free(mze, sizeof (mzap_ent_t));
-	avl_destroy(&zap->zap_m.zap_avl);
+	zfs_btree_clear(&zap->zap_m.zap_tree);
+	zfs_btree_destroy(&zap->zap_m.zap_tree);
 }
 
 static zap_t *
@@ -451,21 +452,26 @@
 		zap->zap_salt = zap_m_phys(zap)->mz_salt;
 		zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
 		zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
-		avl_create(&zap->zap_m.zap_avl, mze_compare,
-		    sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 
-		for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) {
+		/*
+		 * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove()
+		 * overhead on massive inserts below.  It still allows to store
+		 * 62 entries before we have to add 2KB B-tree core node.
+		 */
+		zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
+		    sizeof (mzap_ent_t), 512);
+
+		zap_name_t *zn = zap_name_alloc(zap);
+		for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 			mzap_ent_phys_t *mze =
 			    &zap_m_phys(zap)->mz_chunk[i];
 			if (mze->mze_name[0]) {
-				zap_name_t *zn;
-
 				zap->zap_m.zap_num_entries++;
-				zn = zap_name_alloc(zap, mze->mze_name, 0);
+				zap_name_init_str(zn, mze->mze_name, 0);
 				mze_insert(zap, i, zn->zn_hash);
-				zap_name_free(zn);
 			}
 		}
+		zap_name_free(zn);
 	} else {
 		zap->zap_salt = zap_f_phys(zap)->zap_salt;
 		zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
@@ -564,7 +570,7 @@
 		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
 		if (newsz > MZAP_MAX_BLKSZ) {
 			dprintf("upgrading obj %llu: num_entries=%u\n",
-			    obj, zap->zap_m.zap_num_entries);
+			    (u_longlong_t)obj, zap->zap_m.zap_num_entries);
 			*zapp = zap;
 			int err = mzap_upgrade(zapp, tag, tx, 0);
 			if (err != 0)
@@ -657,25 +663,26 @@
 	}
 
 	dprintf("upgrading obj=%llu with %u chunks\n",
-	    zap->zap_object, nchunks);
-	/* XXX destroy the avl later, so we can use the stored hash value */
+	    (u_longlong_t)zap->zap_object, nchunks);
+	/* XXX destroy the tree later, so we can use the stored hash value */
 	mze_destroy(zap);
 
 	fzap_upgrade(zap, tx, flags);
 
+	zap_name_t *zn = zap_name_alloc(zap);
 	for (int i = 0; i < nchunks; i++) {
 		mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
 		if (mze->mze_name[0] == 0)
 			continue;
 		dprintf("adding %s=%llu\n",
-		    mze->mze_name, mze->mze_value);
-		zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0);
+		    mze->mze_name, (u_longlong_t)mze->mze_value);
+		zap_name_init_str(zn, mze->mze_name, 0);
 		/* If we fail here, we would end up losing entries */
 		VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
 		    tag, tx));
 		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
-		zap_name_free(zn);
 	}
+	zap_name_free(zn);
 	vmem_free(mzp, sz);
 	*zapp = zap;
 	return (0);
@@ -917,22 +924,23 @@
  * See also the comment above zap_entry_normalization_conflict().
  */
 static boolean_t
-mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
+mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
+    zfs_btree_index_t *idx)
 {
-	int direction = AVL_BEFORE;
 	boolean_t allocdzn = B_FALSE;
+	mzap_ent_t *other;
+	zfs_btree_index_t oidx;
 
 	if (zap->zap_normflags == 0)
 		return (B_FALSE);
 
-again:
-	for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
+	for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx);
 	    other && other->mze_hash == mze->mze_hash;
-	    other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
+	    other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) {
 
 		if (zn == NULL) {
-			zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
-			    MT_NORMALIZE);
+			zn = zap_name_alloc_str(zap,
+			    MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
 			allocdzn = B_TRUE;
 		}
 		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
@@ -942,9 +950,20 @@
 		}
 	}
 
-	if (direction == AVL_BEFORE) {
-		direction = AVL_AFTER;
-		goto again;
+	for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx);
+	    other && other->mze_hash == mze->mze_hash;
+	    other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) {
+
+		if (zn == NULL) {
+			zn = zap_name_alloc_str(zap,
+			    MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
+			allocdzn = B_TRUE;
+		}
+		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
+			if (allocdzn)
+				zap_name_free(zn);
+			return (B_TRUE);
+		}
 	}
 
 	if (allocdzn)
@@ -972,7 +991,7 @@
 {
 	int err = 0;
 
-	zap_name_t *zn = zap_name_alloc(zap, name, mt);
+	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
 	if (zn == NULL)
 		return (SET_ERROR(ENOTSUP));
 
@@ -980,7 +999,8 @@
 		err = fzap_lookup(zn, integer_size, num_integers, buf,
 		    realname, rn_len, ncp);
 	} else {
-		mzap_ent_t *mze = mze_find(zn);
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
 		if (mze == NULL) {
 			err = SET_ERROR(ENOENT);
 		} else {
@@ -991,11 +1011,13 @@
 			} else {
 				*(uint64_t *)buf =
 				    MZE_PHYS(zap, mze)->mze_value;
-				(void) strlcpy(realname,
-				    MZE_PHYS(zap, mze)->mze_name, rn_len);
+				if (realname != NULL)
+					(void) strlcpy(realname,
+					    MZE_PHYS(zap, mze)->mze_name,
+					    rn_len);
 				if (ncp) {
 					*ncp = mzap_normalization_conflict(zap,
-					    zn, mze);
+					    zn, mze, &idx);
 				}
 			}
 		}
@@ -1032,7 +1054,7 @@
 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
 	if (err)
 		return (err);
-	zn = zap_name_alloc(zap, name, 0);
+	zn = zap_name_alloc_str(zap, name, 0);
 	if (zn == NULL) {
 		zap_unlockdir(zap, FTAG);
 		return (SET_ERROR(ENOTSUP));
@@ -1135,7 +1157,7 @@
 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
 	if (err != 0)
 		return (err);
-	zap_name_t *zn = zap_name_alloc(zap, name, 0);
+	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
 	if (zn == NULL) {
 		zap_unlockdir(zap, FTAG);
 		return (SET_ERROR(ENOTSUP));
@@ -1143,7 +1165,8 @@
 	if (!zap->zap_ismicro) {
 		err = fzap_length(zn, integer_size, num_integers);
 	} else {
-		mzap_ent_t *mze = mze_find(zn);
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
 		if (mze == NULL) {
 			err = SET_ERROR(ENOENT);
 		} else {
@@ -1183,7 +1206,7 @@
 mzap_addent(zap_name_t *zn, uint64_t value)
 {
 	zap_t *zap = zn->zn_zap;
-	int start = zap->zap_m.zap_alloc_next;
+	uint16_t start = zap->zap_m.zap_alloc_next;
 
 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 
@@ -1199,7 +1222,7 @@
 	ASSERT(cd < zap_maxcd(zap));
 
 again:
-	for (int i = start; i < zap->zap_m.zap_num_chunks; i++) {
+	for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) {
 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
 		if (mze->mze_name[0] == 0) {
 			mze->mze_value = value;
@@ -1230,7 +1253,7 @@
 	const uint64_t *intval = val;
 	int err = 0;
 
-	zap_name_t *zn = zap_name_alloc(zap, key, 0);
+	zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
 	if (zn == NULL) {
 		zap_unlockdir(zap, tag);
 		return (SET_ERROR(ENOTSUP));
@@ -1248,7 +1271,8 @@
 		}
 		zap = zn->zn_zap;	/* fzap_add() may change zap */
 	} else {
-		if (mze_find(zn) != NULL) {
+		zfs_btree_index_t idx;
+		if (mze_find(zn, &idx) != NULL) {
 			err = SET_ERROR(EEXIST);
 		} else {
 			mzap_addent(zn, *intval);
@@ -1328,7 +1352,7 @@
 	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
 	if (err != 0)
 		return (err);
-	zap_name_t *zn = zap_name_alloc(zap, name, 0);
+	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
 	if (zn == NULL) {
 		zap_unlockdir(zap, FTAG);
 		return (SET_ERROR(ENOTSUP));
@@ -1340,7 +1364,8 @@
 	} else if (integer_size != 8 || num_integers != 1 ||
 	    strlen(name) >= MZAP_NAME_LEN) {
 		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
-		    zapobj, integer_size, num_integers, name);
+		    (u_longlong_t)zapobj, integer_size,
+		    (u_longlong_t)num_integers, name);
 		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
 		if (err == 0) {
 			err = fzap_update(zn, integer_size, num_integers,
@@ -1348,7 +1373,8 @@
 		}
 		zap = zn->zn_zap;	/* fzap_update() may change zap */
 	} else {
-		mzap_ent_t *mze = mze_find(zn);
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
 		if (mze != NULL) {
 			MZE_PHYS(zap, mze)->mze_value = *intval;
 		} else {
@@ -1398,20 +1424,20 @@
 {
 	int err = 0;
 
-	zap_name_t *zn = zap_name_alloc(zap, name, mt);
+	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
 	if (zn == NULL)
 		return (SET_ERROR(ENOTSUP));
 	if (!zap->zap_ismicro) {
 		err = fzap_remove(zn, tx);
 	} else {
-		mzap_ent_t *mze = mze_find(zn);
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
 		if (mze == NULL) {
 			err = SET_ERROR(ENOENT);
 		} else {
 			zap->zap_m.zap_num_entries--;
-			bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid],
-			    sizeof (mzap_ent_phys_t));
-			mze_remove(zap, mze);
+			memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
+			zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
 		}
 	}
 	zap_name_free(zn);
@@ -1582,29 +1608,30 @@
 	if (!zc->zc_zap->zap_ismicro) {
 		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
 	} else {
-		avl_index_t idx;
+		zfs_btree_index_t idx;
 		mzap_ent_t mze_tofind;
 
-		mze_tofind.mze_hash = zc->zc_hash;
+		mze_tofind.mze_hash = zc->zc_hash >> 32;
 		mze_tofind.mze_cd = zc->zc_cd;
 
-		mzap_ent_t *mze =
-		    avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
+		mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
+		    &mze_tofind, &idx);
 		if (mze == NULL) {
-			mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
-			    idx, AVL_AFTER);
+			mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
+			    &idx, &idx);
 		}
 		if (mze) {
 			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
 			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
 			za->za_normalization_conflict =
-			    mzap_normalization_conflict(zc->zc_zap, NULL, mze);
+			    mzap_normalization_conflict(zc->zc_zap, NULL,
+			    mze, &idx);
 			za->za_integer_length = 8;
 			za->za_num_integers = 1;
 			za->za_first_integer = mzep->mze_value;
 			(void) strlcpy(za->za_name, mzep->mze_name,
 			    sizeof (za->za_name));
-			zc->zc_hash = mze->mze_hash;
+			zc->zc_hash = (uint64_t)mze->mze_hash << 32;
 			zc->zc_cd = mze->mze_cd;
 			err = 0;
 		} else {

diff --git a/zfs/module/zfs/zcp.c b/zfs/module/zfs/zcp.c
index 44e4d23..3f11445 100644
--- a/zfs/module/zfs/zcp.c
+++ b/zfs/module/zfs/zcp.c

@@ -100,6 +100,7 @@
 #include <sys/zcp_iter.h>
 #include <sys/zcp_prop.h>
 #include <sys/zcp_global.h>
+#include <sys/zvol.h>
 
 #ifndef KM_NORMALPRI
 #define	KM_NORMALPRI	0
@@ -276,9 +277,9 @@
 			}
 			break;
 		case LUA_TNUMBER:
-			VERIFY3U(sizeof (buf), >,
-			    snprintf(buf, sizeof (buf), "%lld",
-			    (longlong_t)lua_tonumber(state, -2)));
+			(void) snprintf(buf, sizeof (buf), "%lld",
+			    (longlong_t)lua_tonumber(state, -2));
+
 			key = buf;
 			if (saw_str_could_collide) {
 				key_could_collide = B_TRUE;
@@ -395,7 +396,7 @@
 	case LUA_TTABLE: {
 		nvlist_t *value_nvl = zcp_table_to_nvlist(state, index, depth);
 		if (value_nvl == NULL)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		fnvlist_add_nvlist(nvl, key, value_nvl);
 		fnvlist_free(value_nvl);
@@ -405,7 +406,7 @@
 		(void) lua_pushfstring(state,
 		    "Invalid value type '%s' for key '%s'",
 		    lua_typename(state, lua_type(state, index)), key);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	return (0);
@@ -584,7 +585,7 @@
 			    "Unhandled nvpair type %d for key '%s'",
 			    nvpair_type(pair), nvpair_name(pair));
 		}
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 	}
 	return (err);
@@ -653,7 +654,8 @@
 
 	dbgstring = lua_tostring(state, 1);
 
-	zfs_dbgmsg("txg %lld ZCP: %s", ri->zri_tx->tx_txg, dbgstring);
+	zfs_dbgmsg("txg %lld ZCP: %s", (longlong_t)ri->zri_tx->tx_txg,
+	    dbgstring);
 
 	return (0);
 }
@@ -721,8 +723,6 @@
 zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
 {
 	zcp_alloc_arg_t *allocargs = ud;
-	int flags = (allocargs->aa_must_succeed) ?
-	    KM_SLEEP : (KM_NOSLEEP | KM_NORMALPRI);
 
 	if (nsize == 0) {
 		if (ptr != NULL) {
@@ -745,10 +745,7 @@
 			return (NULL);
 		}
 
-		allocbuf = vmem_alloc(allocsize, flags);
-		if (allocbuf == NULL) {
-			return (NULL);
-		}
+		allocbuf = vmem_alloc(allocsize, KM_SLEEP);
 		allocargs->aa_alloc_remaining -= allocsize;
 
 		*allocbuf = allocsize;
@@ -772,10 +769,10 @@
 	}
 }
 
-/* ARGSUSED */
 static void
 zcp_lua_counthook(lua_State *state, lua_Debug *ar)
 {
+	(void) ar;
 	lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY);
 	zcp_run_info_t *ri = lua_touserdata(state, -1);
 
@@ -977,10 +974,10 @@
  * The txg_wait_synced_sig will continue to wait for the txg to complete
  * after calling this callback.
  */
-/* ARGSUSED */
 static void
 zcp_eval_sig(void *arg, dmu_tx_t *tx)
 {
+	(void) tx;
 	zcp_run_info_t *ri = arg;
 
 	ri->zri_canceled = B_TRUE;
@@ -1149,12 +1146,14 @@
 	runinfo.zri_outnvl = outnvl;
 	runinfo.zri_result = 0;
 	runinfo.zri_cred = CRED();
+	runinfo.zri_proc = curproc;
 	runinfo.zri_timed_out = B_FALSE;
 	runinfo.zri_canceled = B_FALSE;
 	runinfo.zri_sync = sync;
 	runinfo.zri_space_used = 0;
 	runinfo.zri_curinstrs = 0;
 	runinfo.zri_maxinstrs = instrlimit;
+	runinfo.zri_new_zvols = fnvlist_alloc();
 
 	if (sync) {
 		err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync,
@@ -1166,6 +1165,16 @@
 	}
 	lua_close(state);
 
+	/*
+	 * Create device minor nodes for any new zvols.
+	 */
+	for (nvpair_t *pair = nvlist_next_nvpair(runinfo.zri_new_zvols, NULL);
+	    pair != NULL;
+	    pair = nvlist_next_nvpair(runinfo.zri_new_zvols, pair)) {
+		zvol_create_minor(nvpair_name(pair));
+	}
+	fnvlist_free(runinfo.zri_new_zvols);
+
 	return (runinfo.zri_result);
 }
 
@@ -1434,14 +1443,10 @@
 	}
 }
 
-#if defined(_KERNEL)
 /* BEGIN CSTYLED */
-module_param(zfs_lua_max_instrlimit, ulong, 0644);
-MODULE_PARM_DESC(zfs_lua_max_instrlimit,
+ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_instrlimit, ULONG, ZMOD_RW,
 	"Max instruction limit that can be specified for a channel program");
 
-module_param(zfs_lua_max_memlimit, ulong, 0644);
-MODULE_PARM_DESC(zfs_lua_max_memlimit,
+ZFS_MODULE_PARAM(zfs_lua, zfs_lua_, max_memlimit, ULONG, ZMOD_RW,
 	"Max memory limit that can be specified for a channel program");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/zcp_get.c b/zfs/module/zfs/zcp_get.c
index 35996b8..7256e4d 100644
--- a/zfs/module/zfs/zcp_get.c
+++ b/zfs/module/zfs/zcp_get.c

@@ -34,11 +34,13 @@
 #include <sys/zcp.h>
 #include <sys/zcp_iter.h>
 #include <sys/zcp_global.h>
+#include <sys/zcp_prop.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_znode.h>
 #include <sys/zvol.h>
 
 #ifdef _KERNEL
+#include <sys/zfs_quota.h>
 #include <sys/zfs_vfsops.h>
 #endif
 
@@ -206,92 +208,13 @@
 		break;
 	default:
 		mutex_exit(&dd->dd_lock);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 	mutex_exit(&dd->dd_lock);
 	return (0);
 }
 
 /*
- * Takes a dataset, a property, a value and that value's setpoint as
- * found in the ZAP. Checks if the property has been changed in the vfs.
- * If so, val and setpoint will be overwritten with updated content.
- * Otherwise, they are left unchanged.
- */
-static int
-get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
-    char *setpoint)
-{
-#if	!defined(_KERNEL)
-	return (0);
-#else
-	int error;
-	zfsvfs_t *zfvp;
-	vfs_t *vfsp;
-	objset_t *os;
-	uint64_t tmp = *val;
-
-	error = dmu_objset_from_ds(ds, &os);
-	if (error != 0)
-		return (error);
-
-	if (dmu_objset_type(os) != DMU_OST_ZFS)
-		return (EINVAL);
-
-	mutex_enter(&os->os_user_ptr_lock);
-	zfvp = dmu_objset_get_user(os);
-	mutex_exit(&os->os_user_ptr_lock);
-	if (zfvp == NULL)
-		return (ESRCH);
-
-	vfsp = zfvp->z_vfs;
-
-	switch (zfs_prop) {
-	case ZFS_PROP_ATIME:
-		if (vfsp->vfs_do_atime)
-			tmp = vfsp->vfs_atime;
-		break;
-	case ZFS_PROP_RELATIME:
-		if (vfsp->vfs_do_relatime)
-			tmp = vfsp->vfs_relatime;
-		break;
-	case ZFS_PROP_DEVICES:
-		if (vfsp->vfs_do_devices)
-			tmp = vfsp->vfs_devices;
-		break;
-	case ZFS_PROP_EXEC:
-		if (vfsp->vfs_do_exec)
-			tmp = vfsp->vfs_exec;
-		break;
-	case ZFS_PROP_SETUID:
-		if (vfsp->vfs_do_setuid)
-			tmp = vfsp->vfs_setuid;
-		break;
-	case ZFS_PROP_READONLY:
-		if (vfsp->vfs_do_readonly)
-			tmp = vfsp->vfs_readonly;
-		break;
-	case ZFS_PROP_XATTR:
-		if (vfsp->vfs_do_xattr)
-			tmp = vfsp->vfs_xattr;
-		break;
-	case ZFS_PROP_NBMAND:
-		if (vfsp->vfs_do_nbmand)
-			tmp = vfsp->vfs_nbmand;
-		break;
-	default:
-		return (ENOENT);
-	}
-
-	if (tmp != *val) {
-		(void) strcpy(setpoint, "temporary");
-		*val = tmp;
-	}
-	return (0);
-#endif
-}
-
-/*
  * Check if the property we're looking for is stored at the dsl_dataset or
  * dsl_dir level. If so, push the property value and source onto the lua stack
  * and return 0. If it is not present or a failure occurs in lookup, return a
@@ -431,9 +354,9 @@
 			if (strcmp(strval, "") == 0)
 				error = ENOENT;
 
-			strfree(childval);
+			kmem_strfree(childval);
 		}
-		strfree(token);
+		kmem_strfree(token);
 		break;
 	}
 	case ZFS_PROP_VOLSIZE:
@@ -548,9 +471,14 @@
 		error = dsl_prop_get_ds(ds, prop_name, sizeof (numval),
 		    1, &numval, setpoint);
 
+#ifdef _KERNEL
 		/* Fill in temporary value for prop, if applicable */
-		(void) get_temporary_prop(ds, zfs_prop, &numval, setpoint);
-
+		(void) zfs_get_temporary_prop(ds, zfs_prop, &numval, setpoint);
+#else
+		return (luaL_error(state,
+		    "temporary properties only supported in kernel mode",
+		    prop_name));
+#endif
 		/* Push value to lua stack */
 		if (prop_type == PROP_TYPE_INDEX) {
 			const char *propval;
@@ -662,7 +590,7 @@
  * prop type as well as the numeric group/user ids based on the string
  * following the '@' in the property name. On success, returns 0. On failure,
  * returns a non-zero error.
- * 'domain' must be free'd by caller using strfree()
+ * 'domain' must be free'd by caller using kmem_strfree()
  */
 static int
 parse_userquota_prop(const char *prop_name, zfs_userquota_prop_t *type,
@@ -689,7 +617,7 @@
 
 		(void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid);
 		if (*end != '\0') {
-			strfree(domain_val);
+			kmem_strfree(domain_val);
 			return (EINVAL);
 		}
 	} else {
@@ -737,13 +665,13 @@
 			}
 		}
 		if (domain != NULL)
-			strfree(domain);
+			kmem_strfree(domain);
 	}
 	dsl_dataset_rele(ds, FTAG);
 
 	if ((value == 0) && ((type == ZFS_PROP_USERQUOTA) ||
 	    (type == ZFS_PROP_GROUPQUOTA)))
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 	if (error != 0) {
 		return (zcp_handle_error(state, dataset_name,
 		    prop_name, error));

diff --git a/zfs/module/zfs/zcp_iter.c b/zfs/module/zfs/zcp_iter.c
index d6e0b54..f727c56 100644
--- a/zfs/module/zfs/zcp_iter.c
+++ b/zfs/module/zfs/zcp_iter.c

@@ -14,7 +14,7 @@
  */
 
 /*
- * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2018 by Delphix. All rights reserved.
  */
 
 #include <sys/lua/lua.h>
@@ -23,6 +23,7 @@
 #include <sys/dmu.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
+#include <sys/dsl_bookmark.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_pool.h>
 #include <sys/dmu_tx.h>
@@ -124,8 +125,6 @@
 {
 	const char *snapname = lua_tostring(state, 1);
 	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
-	boolean_t issnap;
-	uint64_t dsobj, cursor;
 
 	/*
 	 * zcp_dataset_hold will either successfully return the requested
@@ -135,9 +134,9 @@
 	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, snapname, FTAG);
 	if (ds == NULL)
 		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
-	cursor = 0;
-	issnap = ds->ds_is_snapshot;
-	dsobj = ds->ds_object;
+	boolean_t issnap = ds->ds_is_snapshot;
+	uint64_t cursor = 0;
+	uint64_t dsobj = ds->ds_object;
 	dsl_dataset_rele(ds, FTAG);
 
 	if (!issnap) {
@@ -323,7 +322,7 @@
 }
 
 static int
-zcp_props_list_gc(lua_State *state)
+zcp_user_props_list_gc(lua_State *state)
 {
 	nvlist_t **props = lua_touserdata(state, 1);
 	if (*props != NULL)
@@ -332,7 +331,7 @@
 }
 
 static int
-zcp_props_iter(lua_State *state)
+zcp_user_props_iter(lua_State *state)
 {
 	char *source, *val;
 	nvlist_t *nvprop;
@@ -361,11 +360,33 @@
 	return (3);
 }
 
-static int zcp_props_list(lua_State *);
+static int zcp_user_props_list(lua_State *);
+static zcp_list_info_t zcp_user_props_list_info = {
+	.name = "user_properties",
+	.func = zcp_user_props_list,
+	.gc = zcp_user_props_list_gc,
+	.pargs = {
+	    { .za_name = "filesystem | snapshot | volume",
+	    .za_lua_type = LUA_TSTRING},
+	    {NULL, 0}
+	},
+	.kwargs = {
+	    {NULL, 0}
+	}
+};
+
+/*
+ * 'properties' was the initial name for 'user_properties' seen
+ * above. 'user_properties' is a better name as it distinguishes
+ * these properties from 'system_properties' which are different.
+ * In order to avoid breaking compatibility between different
+ * versions of ZFS, we declare 'properties' as an alias for
+ * 'user_properties'.
+ */
 static zcp_list_info_t zcp_props_list_info = {
 	.name = "properties",
-	.func = zcp_props_list,
-	.gc = zcp_props_list_gc,
+	.func = zcp_user_props_list,
+	.gc = zcp_user_props_list_gc,
 	.pargs = {
 	    { .za_name = "filesystem | snapshot | volume",
 	    .za_lua_type = LUA_TSTRING},
@@ -377,7 +398,7 @@
 };
 
 static int
-zcp_props_list(lua_State *state)
+zcp_user_props_list(lua_State *state)
 {
 	const char *dsname = lua_tostring(state, 1);
 	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
@@ -392,23 +413,24 @@
 	dsl_dataset_rele(ds, FTAG);
 
 	/*
-	 * Set the metatable for the properties list to free it on completion.
+	 * Set the metatable for the properties list to free it on
+	 * completion.
 	 */
-	luaL_getmetatable(state, zcp_props_list_info.name);
+	luaL_getmetatable(state, zcp_user_props_list_info.name);
 	(void) lua_setmetatable(state, -2);
 
 	lua_pushlightuserdata(state, NULL);
-	lua_pushcclosure(state, &zcp_props_iter, 2);
+	lua_pushcclosure(state, &zcp_user_props_iter, 2);
 	return (1);
 }
 
 
 /*
- * Populate nv with all valid properties and their values for the given
+ * Populate nv with all valid system properties and their values for the given
  * dataset.
  */
 static void
-zcp_dataset_props(dsl_dataset_t *ds, nvlist_t *nv)
+zcp_dataset_system_props(dsl_dataset_t *ds, nvlist_t *nv)
 {
 	for (int prop = ZFS_PROP_TYPE; prop < ZFS_NUM_PROPS; prop++) {
 		/* Do not display hidden props */
@@ -435,8 +457,8 @@
 };
 
 /*
- * Get a list of all visible properties and their values for a given dataset.
- * Returned on the stack as a Lua table.
+ * Get a list of all visible system properties and their values for a given
+ * dataset. Returned on the stack as a Lua table.
  */
 static int
 zcp_system_props_list(lua_State *state)
@@ -454,8 +476,8 @@
 	if (ds == NULL)
 		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
 
-	/* Get the names of all valid properties for this dataset */
-	zcp_dataset_props(ds, nv);
+	/* Get the names of all valid system properties for this dataset */
+	zcp_dataset_system_props(ds, nv);
 	dsl_dataset_rele(ds, FTAG);
 
 	/* push list as lua table */
@@ -469,6 +491,213 @@
 }
 
 static int
+zcp_bookmarks_iter(lua_State *state)
+{
+	char ds_name[ZFS_MAX_DATASET_NAME_LEN];
+	char bookmark_name[ZFS_MAX_DATASET_NAME_LEN];
+	uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1));
+	uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2));
+	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+	dsl_dataset_t *ds;
+	zap_attribute_t za;
+	zap_cursor_t zc;
+
+	int err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	if (err == ENOENT) {
+		return (0);
+	} else if (err != 0) {
+		return (luaL_error(state,
+		    "unexpected error %d from dsl_dataset_hold_obj(dsobj)",
+		    err));
+	}
+
+	if (!dsl_dataset_is_zapified(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (0);
+	}
+
+	err = zap_lookup(dp->dp_meta_objset, ds->ds_object,
+	    DS_FIELD_BOOKMARK_NAMES, sizeof (ds->ds_bookmarks_obj), 1,
+	    &ds->ds_bookmarks_obj);
+	if (err != 0 && err != ENOENT) {
+		dsl_dataset_rele(ds, FTAG);
+		return (luaL_error(state,
+		    "unexpected error %d from zap_lookup()", err));
+	}
+	if (ds->ds_bookmarks_obj == 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (0);
+	}
+
+	/* Store the dataset's name so we can append the bookmark's name */
+	dsl_dataset_name(ds, ds_name);
+
+	zap_cursor_init_serialized(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
+	    ds->ds_bookmarks_obj, cursor);
+	dsl_dataset_rele(ds, FTAG);
+
+	err = zap_cursor_retrieve(&zc, &za);
+	if (err != 0) {
+		zap_cursor_fini(&zc);
+		if (err != ENOENT) {
+			return (luaL_error(state,
+			    "unexpected error %d from zap_cursor_retrieve()",
+			    err));
+		}
+		return (0);
+	}
+	zap_cursor_advance(&zc);
+	cursor = zap_cursor_serialize(&zc);
+	zap_cursor_fini(&zc);
+
+	/* Create the full "pool/fs#bookmark" string to return */
+	int n = snprintf(bookmark_name, ZFS_MAX_DATASET_NAME_LEN, "%s#%s",
+	    ds_name, za.za_name);
+	if (n >= ZFS_MAX_DATASET_NAME_LEN) {
+		return (luaL_error(state,
+		    "unexpected error %d from snprintf()", ENAMETOOLONG));
+	}
+
+	lua_pushnumber(state, cursor);
+	lua_replace(state, lua_upvalueindex(2));
+
+	(void) lua_pushstring(state, bookmark_name);
+	return (1);
+}
+
+static int zcp_bookmarks_list(lua_State *);
+static zcp_list_info_t zcp_bookmarks_list_info = {
+	.name = "bookmarks",
+	.func = zcp_bookmarks_list,
+	.pargs = {
+	    { .za_name = "dataset", .za_lua_type = LUA_TSTRING},
+	    {NULL, 0}
+	},
+	.kwargs = {
+	    {NULL, 0}
+	}
+};
+
+static int
+zcp_bookmarks_list(lua_State *state)
+{
+	const char *dsname = lua_tostring(state, 1);
+	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+
+	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dsname, FTAG);
+	if (ds == NULL)
+		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+	boolean_t issnap = ds->ds_is_snapshot;
+	uint64_t dsobj = ds->ds_object;
+	uint64_t cursor = 0;
+	dsl_dataset_rele(ds, FTAG);
+
+	if (issnap) {
+		return (zcp_argerror(state, 1, "%s is a snapshot", dsname));
+	}
+
+	lua_pushnumber(state, dsobj);
+	lua_pushnumber(state, cursor);
+	lua_pushcclosure(state, &zcp_bookmarks_iter, 2);
+	return (1);
+}
+
+static int
+zcp_holds_iter(lua_State *state)
+{
+	uint64_t dsobj = lua_tonumber(state, lua_upvalueindex(1));
+	uint64_t cursor = lua_tonumber(state, lua_upvalueindex(2));
+	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+	dsl_dataset_t *ds;
+	zap_attribute_t za;
+	zap_cursor_t zc;
+
+	int err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	if (err == ENOENT) {
+		return (0);
+	} else if (err != 0) {
+		return (luaL_error(state,
+		    "unexpected error %d from dsl_dataset_hold_obj(dsobj)",
+		    err));
+	}
+
+	if (dsl_dataset_phys(ds)->ds_userrefs_obj == 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (0);
+	}
+
+	zap_cursor_init_serialized(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
+	    dsl_dataset_phys(ds)->ds_userrefs_obj, cursor);
+	dsl_dataset_rele(ds, FTAG);
+
+	err = zap_cursor_retrieve(&zc, &za);
+	if (err != 0) {
+		zap_cursor_fini(&zc);
+		if (err != ENOENT) {
+			return (luaL_error(state,
+			    "unexpected error %d from zap_cursor_retrieve()",
+			    err));
+		}
+		return (0);
+	}
+	zap_cursor_advance(&zc);
+	cursor = zap_cursor_serialize(&zc);
+	zap_cursor_fini(&zc);
+
+	lua_pushnumber(state, cursor);
+	lua_replace(state, lua_upvalueindex(2));
+
+	(void) lua_pushstring(state, za.za_name);
+	(void) lua_pushnumber(state, za.za_first_integer);
+	return (2);
+}
+
+static int zcp_holds_list(lua_State *);
+static zcp_list_info_t zcp_holds_list_info = {
+	.name = "holds",
+	.func = zcp_holds_list,
+	.gc = NULL,
+	.pargs = {
+	    { .za_name = "snapshot", .za_lua_type = LUA_TSTRING},
+	    {NULL, 0}
+	},
+	.kwargs = {
+	    {NULL, 0}
+	}
+};
+
+/*
+ * Iterate over all the holds for a given dataset. Each iteration returns
+ * a hold's tag and its timestamp as an integer.
+ */
+static int
+zcp_holds_list(lua_State *state)
+{
+	const char *snapname = lua_tostring(state, 1);
+	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
+
+	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, snapname, FTAG);
+	if (ds == NULL)
+		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
+
+	boolean_t issnap = ds->ds_is_snapshot;
+	uint64_t dsobj = ds->ds_object;
+	uint64_t cursor = 0;
+	dsl_dataset_rele(ds, FTAG);
+
+	if (!issnap) {
+		return (zcp_argerror(state, 1, "%s is not a snapshot",
+		    snapname));
+	}
+
+	lua_pushnumber(state, dsobj);
+	lua_pushnumber(state, cursor);
+	lua_pushcclosure(state, &zcp_holds_iter, 2);
+	return (1);
+}
+
+static int
 zcp_list_func(lua_State *state)
 {
 	zcp_list_info_t *info = lua_touserdata(state, lua_upvalueindex(1));
@@ -485,9 +714,12 @@
 	zcp_list_info_t *zcp_list_funcs[] = {
 		&zcp_children_list_info,
 		&zcp_snapshots_list_info,
+		&zcp_user_props_list_info,
 		&zcp_props_list_info,
 		&zcp_clones_list_info,
 		&zcp_system_props_list_info,
+		&zcp_bookmarks_list_info,
+		&zcp_holds_list_info,
 		NULL
 	};
 

diff --git a/zfs/module/zfs/zcp_set.c b/zfs/module/zfs/zcp_set.c
new file mode 100644
index 0000000..cebb56a
--- /dev/null
+++ b/zfs/module/zfs/zcp_set.c

@@ -0,0 +1,100 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyrigh 2020 Joyent, Inc.
+ */
+
+#include <sys/lua/lua.h>
+#include <sys/lua/lualib.h>
+#include <sys/lua/lauxlib.h>
+
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zcp.h>
+#include <sys/zcp_set.h>
+#include <sys/zcp_iter.h>
+#include <sys/zcp_global.h>
+#include <sys/zvol.h>
+
+#include <zfs_prop.h>
+
+static void
+zcp_set_user_prop(lua_State *state, dsl_pool_t *dp, const char *dsname,
+    const char *prop_name, const char *prop_val, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dsname, FTAG);
+	if (ds == NULL)
+		return; /* not reached; zcp_dataset_hold() longjmp'd */
+
+	nvlist_t *nvl = fnvlist_alloc();
+	fnvlist_add_string(nvl, prop_name, prop_val);
+
+	dsl_props_set_sync_impl(ds, ZPROP_SRC_LOCAL, nvl, tx);
+
+	fnvlist_free(nvl);
+	dsl_dataset_rele(ds, FTAG);
+}
+
+int
+zcp_set_prop_check(void *arg, dmu_tx_t *tx)
+{
+	zcp_set_prop_arg_t *args = arg;
+	const char *prop_name = args->prop;
+	dsl_props_set_arg_t dpsa = {
+		.dpsa_dsname = args->dsname,
+		.dpsa_source = ZPROP_SRC_LOCAL,
+	};
+	nvlist_t *nvl = NULL;
+	int ret = 0;
+
+	/*
+	 * Only user properties are currently supported. When non-user
+	 * properties are supported, we will want to use
+	 * zfs_valid_proplist() to verify the properties.
+	 */
+	if (!zfs_prop_user(prop_name)) {
+		return (EINVAL);
+	}
+
+	nvl = fnvlist_alloc();
+	fnvlist_add_string(nvl, args->prop, args->val);
+	dpsa.dpsa_props = nvl;
+
+	ret = dsl_props_set_check(&dpsa, tx);
+	nvlist_free(nvl);
+
+	return (ret);
+}
+
+void
+zcp_set_prop_sync(void *arg, dmu_tx_t *tx)
+{
+	zcp_set_prop_arg_t *args = arg;
+	zcp_run_info_t *ri = zcp_run_info(args->state);
+	dsl_pool_t *dp = ri->zri_pool;
+
+	const char *dsname = args->dsname;
+	const char *prop_name = args->prop;
+	const char *prop_val = args->val;
+
+	if (zfs_prop_user(prop_name)) {
+		zcp_set_user_prop(args->state, dp, dsname, prop_name,
+		    prop_val, tx);
+	}
+}

diff --git a/zfs/module/zfs/zcp_synctask.c b/zfs/module/zfs/zcp_synctask.c
index e089666..bfcdbcf 100644
--- a/zfs/module/zfs/zcp_synctask.c
+++ b/zfs/module/zfs/zcp_synctask.c

@@ -15,12 +15,15 @@
 
 /*
  * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
  */
 
 #include <sys/lua/lua.h>
 #include <sys/lua/lauxlib.h>
 
 #include <sys/zcp.h>
+#include <sys/zcp_set.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_prop.h>
@@ -35,6 +38,12 @@
 
 #define	DST_AVG_BLKSHIFT 14
 
+typedef struct zcp_inherit_prop_arg {
+	lua_State		*zipa_state;
+	const char		*zipa_prop;
+	dsl_props_set_arg_t	zipa_dpsa;
+} zcp_inherit_prop_arg_t;
+
 typedef int (zcp_synctask_func_t)(lua_State *, boolean_t, nvlist_t *);
 typedef struct zcp_synctask_info {
 	const char *name;
@@ -45,6 +54,12 @@
 	int blocks_modified;
 } zcp_synctask_info_t;
 
+static void
+zcp_synctask_cleanup(void *arg)
+{
+	fnvlist_free(arg);
+}
+
 /*
  * Generic synctask interface for channel program syncfuncs.
  *
@@ -114,10 +129,10 @@
 	.blocks_modified = 0
 };
 
-/* ARGSUSED */
 static int
 zcp_synctask_destroy(lua_State *state, boolean_t sync, nvlist_t *err_details)
 {
+	(void) err_details;
 	int err;
 	const char *dsname = lua_tostring(state, 1);
 
@@ -177,6 +192,7 @@
 	ddpa.ddpa_clonename = dsname;
 	ddpa.err_ds = err_details;
 	ddpa.cr = ri->zri_cred;
+	ddpa.proc = ri->zri_proc;
 
 	/*
 	 * If there was a snapshot name conflict, then err_ds will be filled
@@ -235,10 +251,10 @@
 	.blocks_modified = 3
 };
 
-/* ARGSUSED */
 static int
 zcp_synctask_snapshot(lua_State *state, boolean_t sync, nvlist_t *err_details)
 {
+	(void) err_details;
 	int err;
 	dsl_dataset_snapshot_arg_t ddsa = { 0 };
 	const char *dsname = lua_tostring(state, 1);
@@ -250,7 +266,7 @@
 	 * context.
 	 */
 	if (spa_version(ri->zri_pool->dp_spa) < SPA_VERSION_FAST_SNAP) {
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	/*
@@ -260,21 +276,193 @@
 	ddsa.ddsa_errors = NULL;
 	ddsa.ddsa_props = NULL;
 	ddsa.ddsa_cr = ri->zri_cred;
+	ddsa.ddsa_proc = ri->zri_proc;
 	ddsa.ddsa_snaps = fnvlist_alloc();
 	fnvlist_add_boolean(ddsa.ddsa_snaps, dsname);
 
 	zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
-	    (zcp_cleanup_t *)&fnvlist_free, ddsa.ddsa_snaps);
+	    zcp_synctask_cleanup, ddsa.ddsa_snaps);
 
 	err = zcp_sync_task(state, dsl_dataset_snapshot_check,
 	    dsl_dataset_snapshot_sync, &ddsa, sync, dsname);
 
+	if (err == 0) {
+		/*
+		 * We may need to create a new device minor node for this
+		 * dataset (if it is a zvol and the "snapdev" property is set).
+		 * Save it in the nvlist so that it can be processed in open
+		 * context.
+		 */
+		fnvlist_add_boolean(ri->zri_new_zvols, dsname);
+	}
+
 	zcp_deregister_cleanup(state, zch);
 	fnvlist_free(ddsa.ddsa_snaps);
 
 	return (err);
 }
 
+static int zcp_synctask_inherit_prop(lua_State *, boolean_t,
+    nvlist_t *err_details);
+static zcp_synctask_info_t zcp_synctask_inherit_prop_info = {
+	.name = "inherit",
+	.func = zcp_synctask_inherit_prop,
+	.space_check = ZFS_SPACE_CHECK_RESERVED,
+	.blocks_modified = 2, /* 2 * numprops */
+	.pargs = {
+		{ .za_name = "dataset", .za_lua_type = LUA_TSTRING },
+		{ .za_name = "property", .za_lua_type = LUA_TSTRING },
+		{ NULL, 0 }
+	},
+	.kwargs = {
+		{ NULL, 0 }
+	},
+};
+
+static int
+zcp_synctask_inherit_prop_check(void *arg, dmu_tx_t *tx)
+{
+	zcp_inherit_prop_arg_t *args = arg;
+	zfs_prop_t prop = zfs_name_to_prop(args->zipa_prop);
+
+	if (prop == ZPROP_INVAL) {
+		if (zfs_prop_user(args->zipa_prop))
+			return (0);
+
+		return (EINVAL);
+	}
+
+	if (zfs_prop_readonly(prop))
+		return (EINVAL);
+
+	if (!zfs_prop_inheritable(prop))
+		return (EINVAL);
+
+	return (dsl_props_set_check(&args->zipa_dpsa, tx));
+}
+
+static void
+zcp_synctask_inherit_prop_sync(void *arg, dmu_tx_t *tx)
+{
+	zcp_inherit_prop_arg_t *args = arg;
+	dsl_props_set_arg_t *dpsa = &args->zipa_dpsa;
+
+	dsl_props_set_sync(dpsa, tx);
+}
+
+static int
+zcp_synctask_inherit_prop(lua_State *state, boolean_t sync,
+    nvlist_t *err_details)
+{
+	(void) err_details;
+	int err;
+	zcp_inherit_prop_arg_t zipa = { 0 };
+	dsl_props_set_arg_t *dpsa = &zipa.zipa_dpsa;
+
+	const char *dsname = lua_tostring(state, 1);
+	const char *prop = lua_tostring(state, 2);
+
+	zipa.zipa_state = state;
+	zipa.zipa_prop = prop;
+	dpsa->dpsa_dsname = dsname;
+	dpsa->dpsa_source = ZPROP_SRC_INHERITED;
+	dpsa->dpsa_props = fnvlist_alloc();
+	fnvlist_add_boolean(dpsa->dpsa_props, prop);
+
+	zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
+	    zcp_synctask_cleanup, dpsa->dpsa_props);
+
+	err = zcp_sync_task(state, zcp_synctask_inherit_prop_check,
+	    zcp_synctask_inherit_prop_sync, &zipa, sync, dsname);
+
+	zcp_deregister_cleanup(state, zch);
+	fnvlist_free(dpsa->dpsa_props);
+
+	return (err);
+}
+
+static int zcp_synctask_bookmark(lua_State *, boolean_t, nvlist_t *);
+static zcp_synctask_info_t zcp_synctask_bookmark_info = {
+	.name = "bookmark",
+	.func = zcp_synctask_bookmark,
+	.pargs = {
+	    {.za_name = "snapshot | bookmark", .za_lua_type = LUA_TSTRING},
+	    {.za_name = "bookmark", .za_lua_type = LUA_TSTRING},
+	    {NULL, 0}
+	},
+	.kwargs = {
+	    {NULL, 0}
+	},
+	.space_check = ZFS_SPACE_CHECK_NORMAL,
+	.blocks_modified = 1,
+};
+
+static int
+zcp_synctask_bookmark(lua_State *state, boolean_t sync, nvlist_t *err_details)
+{
+	(void) err_details;
+	int err;
+	const char *source = lua_tostring(state, 1);
+	const char *new = lua_tostring(state, 2);
+
+	nvlist_t *bmarks = fnvlist_alloc();
+	fnvlist_add_string(bmarks, new, source);
+
+	zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
+	    zcp_synctask_cleanup, bmarks);
+
+	dsl_bookmark_create_arg_t dbca = {
+		.dbca_bmarks = bmarks,
+		.dbca_errors = NULL,
+	};
+	err = zcp_sync_task(state, dsl_bookmark_create_check,
+	    dsl_bookmark_create_sync, &dbca, sync, source);
+
+	zcp_deregister_cleanup(state, zch);
+	fnvlist_free(bmarks);
+
+	return (err);
+}
+
+static int zcp_synctask_set_prop(lua_State *, boolean_t, nvlist_t *err_details);
+static zcp_synctask_info_t zcp_synctask_set_prop_info = {
+	.name = "set_prop",
+	.func = zcp_synctask_set_prop,
+	.space_check = ZFS_SPACE_CHECK_RESERVED,
+	.blocks_modified = 2,
+	.pargs = {
+		{ .za_name = "dataset", .za_lua_type = LUA_TSTRING},
+		{ .za_name = "property", .za_lua_type =  LUA_TSTRING},
+		{ .za_name = "value", .za_lua_type =  LUA_TSTRING},
+		{ NULL, 0 }
+	},
+	.kwargs = {
+		{ NULL, 0 }
+	}
+};
+
+static int
+zcp_synctask_set_prop(lua_State *state, boolean_t sync, nvlist_t *err_details)
+{
+	(void) err_details;
+	int err;
+	zcp_set_prop_arg_t args = { 0 };
+
+	const char *dsname = lua_tostring(state, 1);
+	const char *prop = lua_tostring(state, 2);
+	const char *val = lua_tostring(state, 3);
+
+	args.state = state;
+	args.dsname = dsname;
+	args.prop = prop;
+	args.val = val;
+
+	err = zcp_sync_task(state, zcp_set_prop_check, zcp_set_prop_sync,
+	    &args, sync, dsname);
+
+	return (err);
+}
+
 static int
 zcp_synctask_wrapper(lua_State *state)
 {
@@ -287,8 +475,7 @@
 	 * Make sure err_details is properly freed, even if a fatal error is
 	 * thrown during the synctask.
 	 */
-	zch = zcp_register_cleanup(state,
-	    (zcp_cleanup_t *)&fnvlist_free, err_details);
+	zch = zcp_register_cleanup(state, zcp_synctask_cleanup, err_details);
 
 	zcp_synctask_info_t *info = lua_touserdata(state, lua_upvalueindex(1));
 	boolean_t sync = lua_toboolean(state, lua_upvalueindex(2));
@@ -343,6 +530,9 @@
 		&zcp_synctask_promote_info,
 		&zcp_synctask_rollback_info,
 		&zcp_synctask_snapshot_info,
+		&zcp_synctask_inherit_prop_info,
+		&zcp_synctask_bookmark_info,
+		&zcp_synctask_set_prop_info,
 		NULL
 	};
 

diff --git a/zfs/module/zfs/zfeature.c b/zfs/module/zfs/zfeature.c
index ed6ebcf..9d16fff 100644
--- a/zfs/module/zfs/zfeature.c
+++ b/zfs/module/zfs/zfeature.c

@@ -203,7 +203,7 @@
 			supported = B_FALSE;
 
 			if (NULL != unsup_feat) {
-				char *desc = "";
+				const char *desc = "";
 
 				if (zap_lookup(os, spa->spa_feat_desc_obj,
 				    za->za_name, 1, MAXPATHLEN, buf) == 0)
@@ -279,7 +279,7 @@
 static int
 feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res)
 {
-	ASSERTV(uint64_t enabled_txg_obj = spa->spa_feat_enabled_txg_obj);
+	uint64_t enabled_txg_obj __maybe_unused = spa->spa_feat_enabled_txg_obj;
 
 	ASSERT(zfeature_depends_on(feature->fi_feature,
 	    SPA_FEATURE_ENABLED_TXG));
@@ -397,9 +397,9 @@
 {
 	uint64_t refcount = 0;
 	zfeature_info_t *feature = &spa_feature_table[fid];
-	ASSERTV(uint64_t zapobj =
+	uint64_t zapobj __maybe_unused =
 	    (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
-	    spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj);
+	    spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
 
 	ASSERT(VALID_FEATURE_FID(fid));
 	ASSERT(0 != zapobj);

diff --git a/zfs/module/zfs/zfs_acl.c b/zfs/module/zfs/zfs_acl.c
deleted file mode 100644
index 26af91e..0000000
--- a/zfs/module/zfs/zfs_acl.c
+++ /dev/null

@@ -1,2816 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
- */
-
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/sid.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/sdt.h>
-#include <sys/fs/zfs.h>
-#include <sys/mode.h>
-#include <sys/policy.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_fuid.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/dmu.h>
-#include <sys/dnode.h>
-#include <sys/zap.h>
-#include <sys/sa.h>
-#include <sys/trace_acl.h>
-#include <sys/zpl.h>
-
-#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
-#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
-#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
-#define	MIN_ACE_TYPE	ALLOW
-
-#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
-#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
-    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
-#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
-    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
-#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
-    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
-
-#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
-    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
-    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
-    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
-
-#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
-#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
-    ACE_DELETE|ACE_DELETE_CHILD)
-#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
-
-#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
-    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
-
-#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
-    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
-
-#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
-    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
-
-#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
-
-#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
-    ZFS_ACL_PROTECTED)
-
-#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
-    ZFS_ACL_OBJ_ACE)
-
-#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
-
-#define	IDMAP_WK_CREATOR_OWNER_UID	2147483648U
-
-static uint16_t
-zfs_ace_v0_get_type(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_type);
-}
-
-static uint16_t
-zfs_ace_v0_get_flags(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_flags);
-}
-
-static uint32_t
-zfs_ace_v0_get_mask(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_access_mask);
-}
-
-static uint64_t
-zfs_ace_v0_get_who(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_fuid);
-}
-
-static void
-zfs_ace_v0_set_type(void *acep, uint16_t type)
-{
-	((zfs_oldace_t *)acep)->z_type = type;
-}
-
-static void
-zfs_ace_v0_set_flags(void *acep, uint16_t flags)
-{
-	((zfs_oldace_t *)acep)->z_flags = flags;
-}
-
-static void
-zfs_ace_v0_set_mask(void *acep, uint32_t mask)
-{
-	((zfs_oldace_t *)acep)->z_access_mask = mask;
-}
-
-static void
-zfs_ace_v0_set_who(void *acep, uint64_t who)
-{
-	((zfs_oldace_t *)acep)->z_fuid = who;
-}
-
-/*ARGSUSED*/
-static size_t
-zfs_ace_v0_size(void *acep)
-{
-	return (sizeof (zfs_oldace_t));
-}
-
-static size_t
-zfs_ace_v0_abstract_size(void)
-{
-	return (sizeof (zfs_oldace_t));
-}
-
-static int
-zfs_ace_v0_mask_off(void)
-{
-	return (offsetof(zfs_oldace_t, z_access_mask));
-}
-
-/*ARGSUSED*/
-static int
-zfs_ace_v0_data(void *acep, void **datap)
-{
-	*datap = NULL;
-	return (0);
-}
-
-static acl_ops_t zfs_acl_v0_ops = {
-	.ace_mask_get = zfs_ace_v0_get_mask,
-	.ace_mask_set = zfs_ace_v0_set_mask,
-	.ace_flags_get = zfs_ace_v0_get_flags,
-	.ace_flags_set = zfs_ace_v0_set_flags,
-	.ace_type_get = zfs_ace_v0_get_type,
-	.ace_type_set = zfs_ace_v0_set_type,
-	.ace_who_get = zfs_ace_v0_get_who,
-	.ace_who_set = zfs_ace_v0_set_who,
-	.ace_size = zfs_ace_v0_size,
-	.ace_abstract_size = zfs_ace_v0_abstract_size,
-	.ace_mask_off = zfs_ace_v0_mask_off,
-	.ace_data = zfs_ace_v0_data
-};
-
-static uint16_t
-zfs_ace_fuid_get_type(void *acep)
-{
-	return (((zfs_ace_hdr_t *)acep)->z_type);
-}
-
-static uint16_t
-zfs_ace_fuid_get_flags(void *acep)
-{
-	return (((zfs_ace_hdr_t *)acep)->z_flags);
-}
-
-static uint32_t
-zfs_ace_fuid_get_mask(void *acep)
-{
-	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
-}
-
-static uint64_t
-zfs_ace_fuid_get_who(void *args)
-{
-	uint16_t entry_type;
-	zfs_ace_t *acep = args;
-
-	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
-
-	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
-	    entry_type == ACE_EVERYONE)
-		return (-1);
-	return (((zfs_ace_t *)acep)->z_fuid);
-}
-
-static void
-zfs_ace_fuid_set_type(void *acep, uint16_t type)
-{
-	((zfs_ace_hdr_t *)acep)->z_type = type;
-}
-
-static void
-zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
-{
-	((zfs_ace_hdr_t *)acep)->z_flags = flags;
-}
-
-static void
-zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
-{
-	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
-}
-
-static void
-zfs_ace_fuid_set_who(void *arg, uint64_t who)
-{
-	zfs_ace_t *acep = arg;
-
-	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
-
-	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
-	    entry_type == ACE_EVERYONE)
-		return;
-	acep->z_fuid = who;
-}
-
-static size_t
-zfs_ace_fuid_size(void *acep)
-{
-	zfs_ace_hdr_t *zacep = acep;
-	uint16_t entry_type;
-
-	switch (zacep->z_type) {
-	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-		return (sizeof (zfs_object_ace_t));
-	case ALLOW:
-	case DENY:
-		entry_type =
-		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
-		if (entry_type == ACE_OWNER ||
-		    entry_type == OWNING_GROUP ||
-		    entry_type == ACE_EVERYONE)
-			return (sizeof (zfs_ace_hdr_t));
-		/*FALLTHROUGH*/
-	default:
-		return (sizeof (zfs_ace_t));
-	}
-}
-
-static size_t
-zfs_ace_fuid_abstract_size(void)
-{
-	return (sizeof (zfs_ace_hdr_t));
-}
-
-static int
-zfs_ace_fuid_mask_off(void)
-{
-	return (offsetof(zfs_ace_hdr_t, z_access_mask));
-}
-
-static int
-zfs_ace_fuid_data(void *acep, void **datap)
-{
-	zfs_ace_t *zacep = acep;
-	zfs_object_ace_t *zobjp;
-
-	switch (zacep->z_hdr.z_type) {
-	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-		zobjp = acep;
-		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
-		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
-	default:
-		*datap = NULL;
-		return (0);
-	}
-}
-
-static acl_ops_t zfs_acl_fuid_ops = {
-	.ace_mask_get = zfs_ace_fuid_get_mask,
-	.ace_mask_set = zfs_ace_fuid_set_mask,
-	.ace_flags_get = zfs_ace_fuid_get_flags,
-	.ace_flags_set = zfs_ace_fuid_set_flags,
-	.ace_type_get = zfs_ace_fuid_get_type,
-	.ace_type_set = zfs_ace_fuid_set_type,
-	.ace_who_get = zfs_ace_fuid_get_who,
-	.ace_who_set = zfs_ace_fuid_set_who,
-	.ace_size = zfs_ace_fuid_size,
-	.ace_abstract_size = zfs_ace_fuid_abstract_size,
-	.ace_mask_off = zfs_ace_fuid_mask_off,
-	.ace_data = zfs_ace_fuid_data
-};
-
-/*
- * The following three functions are provided for compatibility with
- * older ZPL version in order to determine if the file use to have
- * an external ACL and what version of ACL previously existed on the
- * file.  Would really be nice to not need this, sigh.
- */
-uint64_t
-zfs_external_acl(znode_t *zp)
-{
-	zfs_acl_phys_t acl_phys;
-	int error;
-
-	if (zp->z_is_sa)
-		return (0);
-
-	/*
-	 * Need to deal with a potential
-	 * race where zfs_sa_upgrade could cause
-	 * z_isa_sa to change.
-	 *
-	 * If the lookup fails then the state of z_is_sa should have
-	 * changed.
-	 */
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
-	    &acl_phys, sizeof (acl_phys))) == 0)
-		return (acl_phys.z_acl_extern_obj);
-	else {
-		/*
-		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
-		 * removed
-		 */
-		VERIFY(zp->z_is_sa && error == ENOENT);
-		return (0);
-	}
-}
-
-/*
- * Determine size of ACL in bytes
- *
- * This is more complicated than it should be since we have to deal
- * with old external ACLs.
- */
-static int
-zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
-    zfs_acl_phys_t *aclphys)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	uint64_t acl_count;
-	int size;
-	int error;
-
-	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-	if (zp->z_is_sa) {
-		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
-		    &size)) != 0)
-			return (error);
-		*aclsize = size;
-		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
-		    &acl_count, sizeof (acl_count))) != 0)
-			return (error);
-		*aclcount = acl_count;
-	} else {
-		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
-		    aclphys, sizeof (*aclphys))) != 0)
-			return (error);
-
-		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
-			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
-			*aclcount = aclphys->z_acl_size;
-		} else {
-			*aclsize = aclphys->z_acl_size;
-			*aclcount = aclphys->z_acl_count;
-		}
-	}
-	return (0);
-}
-
-int
-zfs_znode_acl_version(znode_t *zp)
-{
-	zfs_acl_phys_t acl_phys;
-
-	if (zp->z_is_sa)
-		return (ZFS_ACL_VERSION_FUID);
-	else {
-		int error;
-
-		/*
-		 * Need to deal with a potential
-		 * race where zfs_sa_upgrade could cause
-		 * z_isa_sa to change.
-		 *
-		 * If the lookup fails then the state of z_is_sa should have
-		 * changed.
-		 */
-		if ((error = sa_lookup(zp->z_sa_hdl,
-		    SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
-		    &acl_phys, sizeof (acl_phys))) == 0)
-			return (acl_phys.z_acl_version);
-		else {
-			/*
-			 * After upgrade SA_ZPL_ZNODE_ACL should have
-			 * been removed.
-			 */
-			VERIFY(zp->z_is_sa && error == ENOENT);
-			return (ZFS_ACL_VERSION_FUID);
-		}
-	}
-}
-
-static int
-zfs_acl_version(int version)
-{
-	if (version < ZPL_VERSION_FUID)
-		return (ZFS_ACL_VERSION_INITIAL);
-	else
-		return (ZFS_ACL_VERSION_FUID);
-}
-
-static int
-zfs_acl_version_zp(znode_t *zp)
-{
-	return (zfs_acl_version(ZTOZSB(zp)->z_version));
-}
-
-zfs_acl_t *
-zfs_acl_alloc(int vers)
-{
-	zfs_acl_t *aclp;
-
-	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
-	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
-	    offsetof(zfs_acl_node_t, z_next));
-	aclp->z_version = vers;
-	if (vers == ZFS_ACL_VERSION_FUID)
-		aclp->z_ops = &zfs_acl_fuid_ops;
-	else
-		aclp->z_ops = &zfs_acl_v0_ops;
-	return (aclp);
-}
-
-zfs_acl_node_t *
-zfs_acl_node_alloc(size_t bytes)
-{
-	zfs_acl_node_t *aclnode;
-
-	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
-	if (bytes) {
-		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
-		aclnode->z_allocdata = aclnode->z_acldata;
-		aclnode->z_allocsize = bytes;
-		aclnode->z_size = bytes;
-	}
-
-	return (aclnode);
-}
-
-static void
-zfs_acl_node_free(zfs_acl_node_t *aclnode)
-{
-	if (aclnode->z_allocsize)
-		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
-	kmem_free(aclnode, sizeof (zfs_acl_node_t));
-}
-
-static void
-zfs_acl_release_nodes(zfs_acl_t *aclp)
-{
-	zfs_acl_node_t *aclnode;
-
-	while ((aclnode = list_head(&aclp->z_acl))) {
-		list_remove(&aclp->z_acl, aclnode);
-		zfs_acl_node_free(aclnode);
-	}
-	aclp->z_acl_count = 0;
-	aclp->z_acl_bytes = 0;
-}
-
-void
-zfs_acl_free(zfs_acl_t *aclp)
-{
-	zfs_acl_release_nodes(aclp);
-	list_destroy(&aclp->z_acl);
-	kmem_free(aclp, sizeof (zfs_acl_t));
-}
-
-static boolean_t
-zfs_acl_valid_ace_type(uint_t type, uint_t flags)
-{
-	uint16_t entry_type;
-
-	switch (type) {
-	case ALLOW:
-	case DENY:
-	case ACE_SYSTEM_AUDIT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_ACE_TYPE:
-		entry_type = flags & ACE_TYPE_FLAGS;
-		return (entry_type == ACE_OWNER ||
-		    entry_type == OWNING_GROUP ||
-		    entry_type == ACE_EVERYONE || entry_type == 0 ||
-		    entry_type == ACE_IDENTIFIER_GROUP);
-	default:
-		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
-			return (B_TRUE);
-	}
-	return (B_FALSE);
-}
-
-static boolean_t
-zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
-{
-	/*
-	 * first check type of entry
-	 */
-
-	if (!zfs_acl_valid_ace_type(type, iflags))
-		return (B_FALSE);
-
-	switch (type) {
-	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
-			return (B_FALSE);
-		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
-	}
-
-	/*
-	 * next check inheritance level flags
-	 */
-
-	if (S_ISDIR(obj_mode) &&
-	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
-		aclp->z_hints |= ZFS_INHERIT_ACE;
-
-	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
-		if ((iflags & (ACE_FILE_INHERIT_ACE|
-		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
-			return (B_FALSE);
-		}
-	}
-
-	return (B_TRUE);
-}
-
-static void *
-zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
-    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
-{
-	zfs_acl_node_t *aclnode;
-
-	ASSERT(aclp);
-
-	if (start == NULL) {
-		aclnode = list_head(&aclp->z_acl);
-		if (aclnode == NULL)
-			return (NULL);
-
-		aclp->z_next_ace = aclnode->z_acldata;
-		aclp->z_curr_node = aclnode;
-		aclnode->z_ace_idx = 0;
-	}
-
-	aclnode = aclp->z_curr_node;
-
-	if (aclnode == NULL)
-		return (NULL);
-
-	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
-		aclnode = list_next(&aclp->z_acl, aclnode);
-		if (aclnode == NULL)
-			return (NULL);
-		else {
-			aclp->z_curr_node = aclnode;
-			aclnode->z_ace_idx = 0;
-			aclp->z_next_ace = aclnode->z_acldata;
-		}
-	}
-
-	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
-		void *acep = aclp->z_next_ace;
-		size_t ace_size;
-
-		/*
-		 * Make sure we don't overstep our bounds
-		 */
-		ace_size = aclp->z_ops->ace_size(acep);
-
-		if (((caddr_t)acep + ace_size) >
-		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
-			return (NULL);
-		}
-
-		*iflags = aclp->z_ops->ace_flags_get(acep);
-		*type = aclp->z_ops->ace_type_get(acep);
-		*access_mask = aclp->z_ops->ace_mask_get(acep);
-		*who = aclp->z_ops->ace_who_get(acep);
-		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
-		aclnode->z_ace_idx++;
-
-		return ((void *)acep);
-	}
-	return (NULL);
-}
-
-/*ARGSUSED*/
-static uint64_t
-zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
-    uint16_t *flags, uint16_t *type, uint32_t *mask)
-{
-	zfs_acl_t *aclp = datap;
-	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
-	uint64_t who;
-
-	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
-	    flags, type);
-	return ((uint64_t)(uintptr_t)acep);
-}
-
-/*
- * Copy ACE to internal ZFS format.
- * While processing the ACL each ACE will be validated for correctness.
- * ACE FUIDs will be created later.
- */
-int
-zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *aclp,
-    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
-    zfs_fuid_info_t **fuidp, cred_t *cr)
-{
-	int i;
-	uint16_t entry_type;
-	zfs_ace_t *aceptr = z_acl;
-	ace_t *acep = datap;
-	zfs_object_ace_t *zobjacep;
-	ace_object_t *aceobjp;
-
-	for (i = 0; i != aclcnt; i++) {
-		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
-		aceptr->z_hdr.z_flags = acep->a_flags;
-		aceptr->z_hdr.z_type = acep->a_type;
-		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
-		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
-		    entry_type != ACE_EVERYONE) {
-			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
-			    cr, (entry_type == 0) ?
-			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
-		}
-
-		/*
-		 * Make sure ACE is valid
-		 */
-		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type,
-		    aceptr->z_hdr.z_flags) != B_TRUE)
-			return (SET_ERROR(EINVAL));
-
-		switch (acep->a_type) {
-		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-			zobjacep = (zfs_object_ace_t *)aceptr;
-			aceobjp = (ace_object_t *)acep;
-
-			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
-			    sizeof (aceobjp->a_obj_type));
-			bcopy(aceobjp->a_inherit_obj_type,
-			    zobjacep->z_inherit_type,
-			    sizeof (aceobjp->a_inherit_obj_type));
-			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
-			break;
-		default:
-			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
-		}
-
-		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
-		    aclp->z_ops->ace_size(aceptr));
-	}
-
-	*size = (caddr_t)aceptr - (caddr_t)z_acl;
-
-	return (0);
-}
-
-/*
- * Copy ZFS ACEs to fixed size ace_t layout
- */
-static void
-zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
-    void *datap, int filter)
-{
-	uint64_t who;
-	uint32_t access_mask;
-	uint16_t iflags, type;
-	zfs_ace_hdr_t *zacep = NULL;
-	ace_t *acep = datap;
-	ace_object_t *objacep;
-	zfs_object_ace_t *zobjacep;
-	size_t ace_size;
-	uint16_t entry_type;
-
-	while ((zacep = zfs_acl_next_ace(aclp, zacep,
-	    &who, &access_mask, &iflags, &type))) {
-
-		switch (type) {
-		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-			if (filter) {
-				continue;
-			}
-			zobjacep = (zfs_object_ace_t *)zacep;
-			objacep = (ace_object_t *)acep;
-			bcopy(zobjacep->z_object_type,
-			    objacep->a_obj_type,
-			    sizeof (zobjacep->z_object_type));
-			bcopy(zobjacep->z_inherit_type,
-			    objacep->a_inherit_obj_type,
-			    sizeof (zobjacep->z_inherit_type));
-			ace_size = sizeof (ace_object_t);
-			break;
-		default:
-			ace_size = sizeof (ace_t);
-			break;
-		}
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-		if ((entry_type != ACE_OWNER &&
-		    entry_type != OWNING_GROUP &&
-		    entry_type != ACE_EVERYONE)) {
-			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
-			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
-			    ZFS_ACE_GROUP : ZFS_ACE_USER);
-		} else {
-			acep->a_who = (uid_t)(int64_t)who;
-		}
-		acep->a_access_mask = access_mask;
-		acep->a_flags = iflags;
-		acep->a_type = type;
-		acep = (ace_t *)((caddr_t)acep + ace_size);
-	}
-}
-
-static int
-zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep,
-    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
-{
-	int i;
-	zfs_oldace_t *aceptr = z_acl;
-
-	for (i = 0; i != aclcnt; i++, aceptr++) {
-		aceptr->z_access_mask = acep[i].a_access_mask;
-		aceptr->z_type = acep[i].a_type;
-		aceptr->z_flags = acep[i].a_flags;
-		aceptr->z_fuid = acep[i].a_who;
-		/*
-		 * Make sure ACE is valid
-		 */
-		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type,
-		    aceptr->z_flags) != B_TRUE)
-			return (SET_ERROR(EINVAL));
-	}
-	*size = (caddr_t)aceptr - (caddr_t)z_acl;
-	return (0);
-}
-
-/*
- * convert old ACL format to new
- */
-void
-zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
-{
-	zfs_oldace_t *oldaclp;
-	int i;
-	uint16_t type, iflags;
-	uint32_t access_mask;
-	uint64_t who;
-	void *cookie = NULL;
-	zfs_acl_node_t *newaclnode;
-
-	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
-	/*
-	 * First create the ACE in a contiguous piece of memory
-	 * for zfs_copy_ace_2_fuid().
-	 *
-	 * We only convert an ACL once, so this won't happen
-	 * every time.
-	 */
-	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
-	    KM_SLEEP);
-	i = 0;
-	while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
-	    &access_mask, &iflags, &type))) {
-		oldaclp[i].z_flags = iflags;
-		oldaclp[i].z_type = type;
-		oldaclp[i].z_fuid = who;
-		oldaclp[i++].z_access_mask = access_mask;
-	}
-
-	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
-	    sizeof (zfs_object_ace_t));
-	aclp->z_ops = &zfs_acl_fuid_ops;
-	VERIFY(zfs_copy_ace_2_fuid(ZTOZSB(zp), ZTOI(zp)->i_mode,
-	    aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
-	    &newaclnode->z_size, NULL, cr) == 0);
-	newaclnode->z_ace_count = aclp->z_acl_count;
-	aclp->z_version = ZFS_ACL_VERSION;
-	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
-
-	/*
-	 * Release all previous ACL nodes
-	 */
-
-	zfs_acl_release_nodes(aclp);
-
-	list_insert_head(&aclp->z_acl, newaclnode);
-
-	aclp->z_acl_bytes = newaclnode->z_size;
-	aclp->z_acl_count = newaclnode->z_ace_count;
-
-}
-
-/*
- * Convert unix access mask to v4 access mask
- */
-static uint32_t
-zfs_unix_to_v4(uint32_t access_mask)
-{
-	uint32_t new_mask = 0;
-
-	if (access_mask & S_IXOTH)
-		new_mask |= ACE_EXECUTE;
-	if (access_mask & S_IWOTH)
-		new_mask |= ACE_WRITE_DATA;
-	if (access_mask & S_IROTH)
-		new_mask |= ACE_READ_DATA;
-	return (new_mask);
-}
-
-static void
-zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
-    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
-{
-	uint16_t type = entry_type & ACE_TYPE_FLAGS;
-
-	aclp->z_ops->ace_mask_set(acep, access_mask);
-	aclp->z_ops->ace_type_set(acep, access_type);
-	aclp->z_ops->ace_flags_set(acep, entry_type);
-	if ((type != ACE_OWNER && type != OWNING_GROUP &&
-	    type != ACE_EVERYONE))
-		aclp->z_ops->ace_who_set(acep, fuid);
-}
-
-/*
- * Determine mode of file based on ACL.
- * Also, create FUIDs for any User/Group ACEs
- */
-uint64_t
-zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
-    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
-{
-	int		entry_type;
-	mode_t		mode;
-	mode_t		seen = 0;
-	zfs_ace_hdr_t 	*acep = NULL;
-	uint64_t	who;
-	uint16_t	iflags, type;
-	uint32_t	access_mask;
-	boolean_t	an_exec_denied = B_FALSE;
-
-	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
-
-	while ((acep = zfs_acl_next_ace(aclp, acep, &who,
-	    &access_mask, &iflags, &type))) {
-
-		if (!zfs_acl_valid_ace_type(type, iflags))
-			continue;
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-
-		/*
-		 * Skip over owner@, group@ or everyone@ inherit only ACEs
-		 */
-		if ((iflags & ACE_INHERIT_ONLY_ACE) &&
-		    (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
-		    entry_type == OWNING_GROUP))
-			continue;
-
-		if (entry_type == ACE_OWNER || (entry_type == 0 &&
-		    who == fuid)) {
-			if ((access_mask & ACE_READ_DATA) &&
-			    (!(seen & S_IRUSR))) {
-				seen |= S_IRUSR;
-				if (type == ALLOW) {
-					mode |= S_IRUSR;
-				}
-			}
-			if ((access_mask & ACE_WRITE_DATA) &&
-			    (!(seen & S_IWUSR))) {
-				seen |= S_IWUSR;
-				if (type == ALLOW) {
-					mode |= S_IWUSR;
-				}
-			}
-			if ((access_mask & ACE_EXECUTE) &&
-			    (!(seen & S_IXUSR))) {
-				seen |= S_IXUSR;
-				if (type == ALLOW) {
-					mode |= S_IXUSR;
-				}
-			}
-		} else if (entry_type == OWNING_GROUP ||
-		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
-			if ((access_mask & ACE_READ_DATA) &&
-			    (!(seen & S_IRGRP))) {
-				seen |= S_IRGRP;
-				if (type == ALLOW) {
-					mode |= S_IRGRP;
-				}
-			}
-			if ((access_mask & ACE_WRITE_DATA) &&
-			    (!(seen & S_IWGRP))) {
-				seen |= S_IWGRP;
-				if (type == ALLOW) {
-					mode |= S_IWGRP;
-				}
-			}
-			if ((access_mask & ACE_EXECUTE) &&
-			    (!(seen & S_IXGRP))) {
-				seen |= S_IXGRP;
-				if (type == ALLOW) {
-					mode |= S_IXGRP;
-				}
-			}
-		} else if (entry_type == ACE_EVERYONE) {
-			if ((access_mask & ACE_READ_DATA)) {
-				if (!(seen & S_IRUSR)) {
-					seen |= S_IRUSR;
-					if (type == ALLOW) {
-						mode |= S_IRUSR;
-					}
-				}
-				if (!(seen & S_IRGRP)) {
-					seen |= S_IRGRP;
-					if (type == ALLOW) {
-						mode |= S_IRGRP;
-					}
-				}
-				if (!(seen & S_IROTH)) {
-					seen |= S_IROTH;
-					if (type == ALLOW) {
-						mode |= S_IROTH;
-					}
-				}
-			}
-			if ((access_mask & ACE_WRITE_DATA)) {
-				if (!(seen & S_IWUSR)) {
-					seen |= S_IWUSR;
-					if (type == ALLOW) {
-						mode |= S_IWUSR;
-					}
-				}
-				if (!(seen & S_IWGRP)) {
-					seen |= S_IWGRP;
-					if (type == ALLOW) {
-						mode |= S_IWGRP;
-					}
-				}
-				if (!(seen & S_IWOTH)) {
-					seen |= S_IWOTH;
-					if (type == ALLOW) {
-						mode |= S_IWOTH;
-					}
-				}
-			}
-			if ((access_mask & ACE_EXECUTE)) {
-				if (!(seen & S_IXUSR)) {
-					seen |= S_IXUSR;
-					if (type == ALLOW) {
-						mode |= S_IXUSR;
-					}
-				}
-				if (!(seen & S_IXGRP)) {
-					seen |= S_IXGRP;
-					if (type == ALLOW) {
-						mode |= S_IXGRP;
-					}
-				}
-				if (!(seen & S_IXOTH)) {
-					seen |= S_IXOTH;
-					if (type == ALLOW) {
-						mode |= S_IXOTH;
-					}
-				}
-			}
-		} else {
-			/*
-			 * Only care if this IDENTIFIER_GROUP or
-			 * USER ACE denies execute access to someone,
-			 * mode is not affected
-			 */
-			if ((access_mask & ACE_EXECUTE) && type == DENY)
-				an_exec_denied = B_TRUE;
-		}
-	}
-
-	/*
-	 * Failure to allow is effectively a deny, so execute permission
-	 * is denied if it was never mentioned or if we explicitly
-	 * weren't allowed it.
-	 */
-	if (!an_exec_denied &&
-	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
-	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
-		an_exec_denied = B_TRUE;
-
-	if (an_exec_denied)
-		*pflags &= ~ZFS_NO_EXECS_DENIED;
-	else
-		*pflags |= ZFS_NO_EXECS_DENIED;
-
-	return (mode);
-}
-
-/*
- * Read an external acl object.  If the intent is to modify, always
- * create a new acl and leave any cached acl in place.
- */
-int
-zfs_acl_node_read(struct znode *zp, boolean_t have_lock, zfs_acl_t **aclpp,
-    boolean_t will_modify)
-{
-	zfs_acl_t	*aclp;
-	int		aclsize = 0;
-	int		acl_count = 0;
-	zfs_acl_node_t	*aclnode;
-	zfs_acl_phys_t	znode_acl;
-	int		version;
-	int		error;
-	boolean_t	drop_lock = B_FALSE;
-
-	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-
-	if (zp->z_acl_cached && !will_modify) {
-		*aclpp = zp->z_acl_cached;
-		return (0);
-	}
-
-	/*
-	 * close race where znode could be upgrade while trying to
-	 * read the znode attributes.
-	 *
-	 * But this could only happen if the file isn't already an SA
-	 * znode
-	 */
-	if (!zp->z_is_sa && !have_lock) {
-		mutex_enter(&zp->z_lock);
-		drop_lock = B_TRUE;
-	}
-	version = zfs_znode_acl_version(zp);
-
-	if ((error = zfs_acl_znode_info(zp, &aclsize,
-	    &acl_count, &znode_acl)) != 0) {
-		goto done;
-	}
-
-	aclp = zfs_acl_alloc(version);
-
-	aclp->z_acl_count = acl_count;
-	aclp->z_acl_bytes = aclsize;
-
-	aclnode = zfs_acl_node_alloc(aclsize);
-	aclnode->z_ace_count = aclp->z_acl_count;
-	aclnode->z_size = aclsize;
-
-	if (!zp->z_is_sa) {
-		if (znode_acl.z_acl_extern_obj) {
-			error = dmu_read(ZTOZSB(zp)->z_os,
-			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
-			    aclnode->z_acldata, DMU_READ_PREFETCH);
-		} else {
-			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
-			    aclnode->z_size);
-		}
-	} else {
-		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(ZTOZSB(zp)),
-		    aclnode->z_acldata, aclnode->z_size);
-	}
-
-	if (error != 0) {
-		zfs_acl_free(aclp);
-		zfs_acl_node_free(aclnode);
-		/* convert checksum errors into IO errors */
-		if (error == ECKSUM)
-			error = SET_ERROR(EIO);
-		goto done;
-	}
-
-	list_insert_head(&aclp->z_acl, aclnode);
-
-	*aclpp = aclp;
-	if (!will_modify)
-		zp->z_acl_cached = aclp;
-done:
-	if (drop_lock)
-		mutex_exit(&zp->z_lock);
-	return (error);
-}
-
-/*ARGSUSED*/
-void
-zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
-    boolean_t start, void *userdata)
-{
-	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
-
-	if (start) {
-		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
-	} else {
-		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
-		    cb->cb_acl_node);
-	}
-	*dataptr = cb->cb_acl_node->z_acldata;
-	*length = cb->cb_acl_node->z_size;
-}
-
-int
-zfs_acl_chown_setattr(znode_t *zp)
-{
-	int error;
-	zfs_acl_t *aclp;
-
-	if (ZTOZSB(zp)->z_acl_type == ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	ASSERT(MUTEX_HELD(&zp->z_lock));
-	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-
-	error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
-	if (error == 0 && aclp->z_acl_count > 0)
-		zp->z_mode = ZTOI(zp)->i_mode =
-		    zfs_mode_compute(zp->z_mode, aclp,
-		    &zp->z_pflags, KUID_TO_SUID(ZTOI(zp)->i_uid),
-		    KGID_TO_SGID(ZTOI(zp)->i_gid));
-
-	/*
-	 * Some ZFS implementations (ZEVO) create neither a ZNODE_ACL
-	 * nor a DACL_ACES SA in which case ENOENT is returned from
-	 * zfs_acl_node_read() when the SA can't be located.
-	 * Allow chown/chgrp to succeed in these cases rather than
-	 * returning an error that makes no sense in the context of
-	 * the caller.
-	 */
-	if (error == ENOENT)
-		return (0);
-
-	return (error);
-}
-
-static void
-acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
-    uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
-{
-	*deny1 = *deny2 = *allow0 = *group = 0;
-
-	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
-		*deny1 |= ACE_READ_DATA;
-	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
-		*deny1 |= ACE_WRITE_DATA;
-	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
-		*deny1 |= ACE_EXECUTE;
-
-	if (!(mode & S_IRGRP) && (mode & S_IROTH))
-		*deny2 = ACE_READ_DATA;
-	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
-		*deny2 |= ACE_WRITE_DATA;
-	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
-		*deny2 |= ACE_EXECUTE;
-
-	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
-		*allow0 |= ACE_READ_DATA;
-	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
-		*allow0 |= ACE_WRITE_DATA;
-	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
-		*allow0 |= ACE_EXECUTE;
-
-	*owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
-	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
-	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
-	if (mode & S_IRUSR)
-		*owner |= ACE_READ_DATA;
-	if (mode & S_IWUSR)
-		*owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
-	if (mode & S_IXUSR)
-		*owner |= ACE_EXECUTE;
-
-	*group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
-	    ACE_SYNCHRONIZE;
-	if (mode & S_IRGRP)
-		*group |= ACE_READ_DATA;
-	if (mode & S_IWGRP)
-		*group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
-	if (mode & S_IXGRP)
-		*group |= ACE_EXECUTE;
-
-	*everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
-	    ACE_SYNCHRONIZE;
-	if (mode & S_IROTH)
-		*everyone |= ACE_READ_DATA;
-	if (mode & S_IWOTH)
-		*everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
-	if (mode & S_IXOTH)
-		*everyone |= ACE_EXECUTE;
-}
-
-/*
- * ace_trivial:
- * determine whether an ace_t acl is trivial
- *
- * Trivialness implies that the acl is composed of only
- * owner, group, everyone entries.  ACL can't
- * have read_acl denied, and write_owner/write_acl/write_attributes
- * can only be owner@ entry.
- */
-static int
-ace_trivial_common(void *acep, int aclcnt,
-    uint64_t (*walk)(void *, uint64_t, int aclcnt,
-    uint16_t *, uint16_t *, uint32_t *))
-{
-	uint16_t flags;
-	uint32_t mask;
-	uint16_t type;
-	uint64_t cookie = 0;
-
-	while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
-		switch (flags & ACE_TYPE_FLAGS) {
-		case ACE_OWNER:
-		case ACE_GROUP|ACE_IDENTIFIER_GROUP:
-		case ACE_EVERYONE:
-			break;
-		default:
-			return (1);
-		}
-
-		if (flags & (ACE_FILE_INHERIT_ACE|
-		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
-		    ACE_INHERIT_ONLY_ACE))
-			return (1);
-
-		/*
-		 * Special check for some special bits
-		 *
-		 * Don't allow anybody to deny reading basic
-		 * attributes or a files ACL.
-		 */
-		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
-		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
-			return (1);
-
-		/*
-		 * Delete permissions are never set by default
-		 */
-		if (mask & (ACE_DELETE|ACE_DELETE_CHILD))
-			return (1);
-		/*
-		 * only allow owner@ to have
-		 * write_acl/write_owner/write_attributes/write_xattr/
-		 */
-		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
-		    (!(flags & ACE_OWNER) && (mask &
-		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
-		    ACE_WRITE_NAMED_ATTRS))))
-			return (1);
-
-	}
-
-	return (0);
-}
-
-/*
- * common code for setting ACLs.
- *
- * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
- * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
- * already checked the acl and knows whether to inherit.
- */
-int
-zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
-{
-	int			error;
-	zfsvfs_t		*zfsvfs = ZTOZSB(zp);
-	dmu_object_type_t	otype;
-	zfs_acl_locator_cb_t	locate = { 0 };
-	uint64_t		mode;
-	sa_bulk_attr_t		bulk[5];
-	uint64_t		ctime[2];
-	int			count = 0;
-	zfs_acl_phys_t		acl_phys;
-
-	mode = zp->z_mode;
-
-	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
-	    KUID_TO_SUID(ZTOI(zp)->i_uid), KGID_TO_SGID(ZTOI(zp)->i_gid));
-
-	zp->z_mode = ZTOI(zp)->i_mode = mode;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
-	    &mode, sizeof (mode));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-	    &ctime, sizeof (ctime));
-
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
-	}
-
-	/*
-	 * Upgrade needed?
-	 */
-	if (!zfsvfs->z_use_fuids) {
-		otype = DMU_OT_OLDACL;
-	} else {
-		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
-		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
-			zfs_acl_xform(zp, aclp, cr);
-		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
-		otype = DMU_OT_ACL;
-	}
-
-	/*
-	 * Arrgh, we have to handle old on disk format
-	 * as well as newer (preferred) SA format.
-	 */
-
-	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
-		locate.cb_aclp = aclp;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
-		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
-		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
-	} else { /* Painful legacy way */
-		zfs_acl_node_t *aclnode;
-		uint64_t off = 0;
-		uint64_t aoid;
-
-		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
-		    &acl_phys, sizeof (acl_phys))) != 0)
-			return (error);
-
-		aoid = acl_phys.z_acl_extern_obj;
-
-		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			/*
-			 * If ACL was previously external and we are now
-			 * converting to new ACL format then release old
-			 * ACL object and create a new one.
-			 */
-			if (aoid &&
-			    aclp->z_version != acl_phys.z_acl_version) {
-				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
-				if (error)
-					return (error);
-				aoid = 0;
-			}
-			if (aoid == 0) {
-				aoid = dmu_object_alloc(zfsvfs->z_os,
-				    otype, aclp->z_acl_bytes,
-				    otype == DMU_OT_ACL ?
-				    DMU_OT_SYSACL : DMU_OT_NONE,
-				    otype == DMU_OT_ACL ?
-				    DN_OLD_MAX_BONUSLEN : 0, tx);
-			} else {
-				(void) dmu_object_set_blocksize(zfsvfs->z_os,
-				    aoid, aclp->z_acl_bytes, 0, tx);
-			}
-			acl_phys.z_acl_extern_obj = aoid;
-			for (aclnode = list_head(&aclp->z_acl); aclnode;
-			    aclnode = list_next(&aclp->z_acl, aclnode)) {
-				if (aclnode->z_ace_count == 0)
-					continue;
-				dmu_write(zfsvfs->z_os, aoid, off,
-				    aclnode->z_size, aclnode->z_acldata, tx);
-				off += aclnode->z_size;
-			}
-		} else {
-			void *start = acl_phys.z_ace_data;
-			/*
-			 * Migrating back embedded?
-			 */
-			if (acl_phys.z_acl_extern_obj) {
-				error = dmu_object_free(zfsvfs->z_os,
-				    acl_phys.z_acl_extern_obj, tx);
-				if (error)
-					return (error);
-				acl_phys.z_acl_extern_obj = 0;
-			}
-
-			for (aclnode = list_head(&aclp->z_acl); aclnode;
-			    aclnode = list_next(&aclp->z_acl, aclnode)) {
-				if (aclnode->z_ace_count == 0)
-					continue;
-				bcopy(aclnode->z_acldata, start,
-				    aclnode->z_size);
-				start = (caddr_t)start + aclnode->z_size;
-			}
-		}
-		/*
-		 * If Old version then swap count/bytes to match old
-		 * layout of znode_acl_phys_t.
-		 */
-		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
-			acl_phys.z_acl_size = aclp->z_acl_count;
-			acl_phys.z_acl_count = aclp->z_acl_bytes;
-		} else {
-			acl_phys.z_acl_size = aclp->z_acl_bytes;
-			acl_phys.z_acl_count = aclp->z_acl_count;
-		}
-		acl_phys.z_acl_version = aclp->z_version;
-
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
-		    &acl_phys, sizeof (acl_phys));
-	}
-
-	/*
-	 * Replace ACL wide bits, but first clear them.
-	 */
-	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
-
-	zp->z_pflags |= aclp->z_hints;
-
-	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
-		zp->z_pflags |= ZFS_ACL_TRIVIAL;
-
-	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
-	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
-}
-
-static void
-zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
-{
-	void		*acep = NULL;
-	uint64_t	who;
-	int		new_count, new_bytes;
-	int		ace_size;
-	int		entry_type;
-	uint16_t	iflags, type;
-	uint32_t	access_mask;
-	zfs_acl_node_t	*newnode;
-	size_t		abstract_size = aclp->z_ops->ace_abstract_size();
-	void		*zacep;
-	uint32_t	owner, group, everyone;
-	uint32_t	deny1, deny2, allow0;
-
-	new_count = new_bytes = 0;
-
-	acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2,
-	    &owner, &group, &everyone);
-
-	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
-
-	zacep = newnode->z_acldata;
-	if (allow0) {
-		zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER);
-		zacep = (void *)((uintptr_t)zacep + abstract_size);
-		new_count++;
-		new_bytes += abstract_size;
-	}
-	if (deny1) {
-		zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER);
-		zacep = (void *)((uintptr_t)zacep + abstract_size);
-		new_count++;
-		new_bytes += abstract_size;
-	}
-	if (deny2) {
-		zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP);
-		zacep = (void *)((uintptr_t)zacep + abstract_size);
-		new_count++;
-		new_bytes += abstract_size;
-	}
-
-	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
-	    &iflags, &type))) {
-		uint16_t inherit_flags;
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-		inherit_flags = (iflags & ALL_INHERIT);
-
-		if ((entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
-		    (entry_type == OWNING_GROUP)) &&
-		    ((inherit_flags & ACE_INHERIT_ONLY_ACE) == 0)) {
-			continue;
-		}
-
-		if ((type != ALLOW && type != DENY) ||
-		    (inherit_flags & ACE_INHERIT_ONLY_ACE)) {
-			if (inherit_flags)
-				aclp->z_hints |= ZFS_INHERIT_ACE;
-			switch (type) {
-			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
-				break;
-			}
-		} else {
-
-			/*
-			 * Limit permissions to be no greater than
-			 * group permissions
-			 */
-			if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) {
-				if (!(mode & S_IRGRP))
-					access_mask &= ~ACE_READ_DATA;
-				if (!(mode & S_IWGRP))
-					access_mask &=
-					    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
-				if (!(mode & S_IXGRP))
-					access_mask &= ~ACE_EXECUTE;
-				access_mask &=
-				    ~(ACE_WRITE_OWNER|ACE_WRITE_ACL|
-				    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS);
-			}
-		}
-		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
-		ace_size = aclp->z_ops->ace_size(acep);
-		zacep = (void *)((uintptr_t)zacep + ace_size);
-		new_count++;
-		new_bytes += ace_size;
-	}
-	zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER);
-	zacep = (void *)((uintptr_t)zacep + abstract_size);
-	zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP);
-	zacep = (void *)((uintptr_t)zacep + abstract_size);
-	zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE);
-
-	new_count += 3;
-	new_bytes += abstract_size * 3;
-	zfs_acl_release_nodes(aclp);
-	aclp->z_acl_count = new_count;
-	aclp->z_acl_bytes = new_bytes;
-	newnode->z_ace_count = new_count;
-	newnode->z_size = new_bytes;
-	list_insert_tail(&aclp->z_acl, newnode);
-}
-
-void
-zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
-{
-	mutex_enter(&zp->z_acl_lock);
-	mutex_enter(&zp->z_lock);
-	*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
-	(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
-	zfs_acl_chmod(ZTOZSB(zp), mode, *aclp);
-	mutex_exit(&zp->z_lock);
-	mutex_exit(&zp->z_acl_lock);
-	ASSERT(*aclp);
-}
-
-/*
- * strip off write_owner and write_acl
- */
-static void
-zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep)
-{
-	uint32_t mask = aclp->z_ops->ace_mask_get(acep);
-
-	if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
-	    (aclp->z_ops->ace_type_get(acep) == ALLOW)) {
-		mask &= ~RESTRICTED_CLEAR;
-		aclp->z_ops->ace_mask_set(acep, mask);
-	}
-}
-
-/*
- * Should ACE be inherited?
- */
-static int
-zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags)
-{
-	int	iflags = (acep_flags & 0xf);
-
-	if (S_ISDIR(obj_mode) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
-		return (1);
-	else if (iflags & ACE_FILE_INHERIT_ACE)
-		return (!(S_ISDIR(obj_mode) &&
-		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
-	return (0);
-}
-
-/*
- * inherit inheritable ACEs from parent
- */
-static zfs_acl_t *
-zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *paclp,
-    uint64_t mode, boolean_t *need_chmod)
-{
-	void		*pacep;
-	void		*acep;
-	zfs_acl_node_t  *aclnode;
-	zfs_acl_t	*aclp = NULL;
-	uint64_t	who;
-	uint32_t	access_mask;
-	uint16_t	iflags, newflags, type;
-	size_t		ace_size;
-	void		*data1, *data2;
-	size_t		data1sz, data2sz;
-	boolean_t	vdir = S_ISDIR(obj_mode);
-	boolean_t	vreg = S_ISREG(obj_mode);
-	boolean_t	passthrough, passthrough_x, noallow;
-
-	passthrough_x =
-	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
-	passthrough = passthrough_x ||
-	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
-	noallow =
-	    zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW;
-
-	*need_chmod = B_TRUE;
-	pacep = NULL;
-	aclp = zfs_acl_alloc(paclp->z_version);
-	if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || S_ISLNK(obj_mode))
-		return (aclp);
-	while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
-	    &access_mask, &iflags, &type))) {
-
-		/*
-		 * don't inherit bogus ACEs
-		 */
-		if (!zfs_acl_valid_ace_type(type, iflags))
-			continue;
-
-		if (noallow && type == ALLOW)
-			continue;
-
-		ace_size = aclp->z_ops->ace_size(pacep);
-
-		if (!zfs_ace_can_use(obj_mode, iflags))
-			continue;
-
-		/*
-		 * If owner@, group@, or everyone@ inheritable
-		 * then zfs_acl_chmod() isn't needed.
-		 */
-		if (passthrough &&
-		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
-		    ((iflags & OWNING_GROUP) ==
-		    OWNING_GROUP)) && (vreg || (vdir && (iflags &
-		    ACE_DIRECTORY_INHERIT_ACE)))) {
-			*need_chmod = B_FALSE;
-		}
-
-		if (!vdir && passthrough_x &&
-		    ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
-			access_mask &= ~ACE_EXECUTE;
-		}
-
-		aclnode = zfs_acl_node_alloc(ace_size);
-		list_insert_tail(&aclp->z_acl, aclnode);
-		acep = aclnode->z_acldata;
-
-		zfs_set_ace(aclp, acep, access_mask, type,
-		    who, iflags|ACE_INHERITED_ACE);
-
-		/*
-		 * Copy special opaque data if any
-		 */
-		if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
-			VERIFY((data2sz = aclp->z_ops->ace_data(acep,
-			    &data2)) == data1sz);
-			bcopy(data1, data2, data2sz);
-		}
-
-		aclp->z_acl_count++;
-		aclnode->z_ace_count++;
-		aclp->z_acl_bytes += aclnode->z_size;
-		newflags = aclp->z_ops->ace_flags_get(acep);
-
-		if (vdir)
-			aclp->z_hints |= ZFS_INHERIT_ACE;
-
-		if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) {
-			newflags &= ~ALL_INHERIT;
-			aclp->z_ops->ace_flags_set(acep,
-			    newflags|ACE_INHERITED_ACE);
-			zfs_restricted_update(zfsvfs, aclp, acep);
-			continue;
-		}
-
-		ASSERT(vdir);
-
-		/*
-		 * If only FILE_INHERIT is set then turn on
-		 * inherit_only
-		 */
-		if ((iflags & (ACE_FILE_INHERIT_ACE |
-		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
-			newflags |= ACE_INHERIT_ONLY_ACE;
-			aclp->z_ops->ace_flags_set(acep,
-			    newflags|ACE_INHERITED_ACE);
-		} else {
-			newflags &= ~ACE_INHERIT_ONLY_ACE;
-			aclp->z_ops->ace_flags_set(acep,
-			    newflags|ACE_INHERITED_ACE);
-		}
-	}
-	return (aclp);
-}
-
-/*
- * Create file system object initial permissions
- * including inheritable ACEs.
- */
-int
-zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
-    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
-{
-	int		error;
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	zfs_acl_t	*paclp;
-	gid_t		gid = vap->va_gid;
-	boolean_t	need_chmod = B_TRUE;
-	boolean_t	inherited = B_FALSE;
-
-	bzero(acl_ids, sizeof (zfs_acl_ids_t));
-	acl_ids->z_mode = vap->va_mode;
-
-	if (vsecp)
-		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_mode, vsecp,
-		    cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
-			return (error);
-
-	acl_ids->z_fuid = vap->va_uid;
-	acl_ids->z_fgid = vap->va_gid;
-#ifdef HAVE_KSID
-	/*
-	 * Determine uid and gid.
-	 */
-	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
-	    ((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) {
-		acl_ids->z_fuid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid,
-		    cr, ZFS_OWNER, &acl_ids->z_fuidp);
-		acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
-		    cr, ZFS_GROUP, &acl_ids->z_fuidp);
-		gid = vap->va_gid;
-	} else {
-		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
-		    cr, &acl_ids->z_fuidp);
-		acl_ids->z_fgid = 0;
-		if (vap->va_mask & AT_GID)  {
-			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
-			    (uint64_t)vap->va_gid,
-			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
-			gid = vap->va_gid;
-			if (acl_ids->z_fgid != KGID_TO_SGID(ZTOI(dzp)->i_gid) &&
-			    !groupmember(vap->va_gid, cr) &&
-			    secpolicy_vnode_create_gid(cr) != 0)
-				acl_ids->z_fgid = 0;
-		}
-		if (acl_ids->z_fgid == 0) {
-			if (dzp->z_mode & S_ISGID) {
-				char		*domain;
-				uint32_t	rid;
-
-				acl_ids->z_fgid = KGID_TO_SGID(
-				    ZTOI(dzp)->i_gid);
-				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
-				    cr, ZFS_GROUP);
-
-				if (zfsvfs->z_use_fuids &&
-				    IS_EPHEMERAL(acl_ids->z_fgid)) {
-					domain = zfs_fuid_idx_domain(
-					    &zfsvfs->z_fuid_idx,
-					    FUID_INDEX(acl_ids->z_fgid));
-					rid = FUID_RID(acl_ids->z_fgid);
-					zfs_fuid_node_add(&acl_ids->z_fuidp,
-					    domain, rid,
-					    FUID_INDEX(acl_ids->z_fgid),
-					    acl_ids->z_fgid, ZFS_GROUP);
-				}
-			} else {
-				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
-				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
-				gid = crgetgid(cr);
-			}
-		}
-	}
-#endif /* HAVE_KSID */
-
-	/*
-	 * If we're creating a directory, and the parent directory has the
-	 * set-GID bit set, set in on the new directory.
-	 * Otherwise, if the user is neither privileged nor a member of the
-	 * file's new group, clear the file's set-GID bit.
-	 */
-
-	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
-	    (S_ISDIR(vap->va_mode))) {
-		acl_ids->z_mode |= S_ISGID;
-	} else {
-		if ((acl_ids->z_mode & S_ISGID) &&
-		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
-			acl_ids->z_mode &= ~S_ISGID;
-	}
-
-	if (acl_ids->z_aclp == NULL) {
-		mutex_enter(&dzp->z_acl_lock);
-		mutex_enter(&dzp->z_lock);
-		if (!(flag & IS_ROOT_NODE) && (S_ISDIR(ZTOI(dzp)->i_mode) &&
-		    (dzp->z_pflags & ZFS_INHERIT_ACE)) &&
-		    !(dzp->z_pflags & ZFS_XATTR)) {
-			VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
-			    &paclp, B_FALSE));
-			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
-			    vap->va_mode, paclp, acl_ids->z_mode, &need_chmod);
-			inherited = B_TRUE;
-		} else {
-			acl_ids->z_aclp =
-			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
-			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
-		}
-		mutex_exit(&dzp->z_lock);
-		mutex_exit(&dzp->z_acl_lock);
-		if (need_chmod) {
-			acl_ids->z_aclp->z_hints |= S_ISDIR(vap->va_mode) ?
-			    ZFS_ACL_AUTO_INHERIT : 0;
-			zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp);
-		}
-	}
-
-	if (inherited || vsecp) {
-		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
-		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
-		    acl_ids->z_fuid, acl_ids->z_fgid);
-		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
-			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
-	}
-
-	return (0);
-}
-
-/*
- * Free ACL and fuid_infop, but not the acl_ids structure
- */
-void
-zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
-{
-	if (acl_ids->z_aclp)
-		zfs_acl_free(acl_ids->z_aclp);
-	if (acl_ids->z_fuidp)
-		zfs_fuid_info_free(acl_ids->z_fuidp);
-	acl_ids->z_aclp = NULL;
-	acl_ids->z_fuidp = NULL;
-}
-
-boolean_t
-zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
-{
-	return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
-	    zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
-	    (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
-	    zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
-}
-
-/*
- * Retrieve a file's ACL
- */
-int
-zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
-{
-	zfs_acl_t	*aclp;
-	ulong_t		mask;
-	int		error;
-	int 		count = 0;
-	int		largeace = 0;
-
-	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
-	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
-
-	if (mask == 0)
-		return (SET_ERROR(ENOSYS));
-
-	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
-		return (error);
-
-	mutex_enter(&zp->z_acl_lock);
-
-	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
-	if (error != 0) {
-		mutex_exit(&zp->z_acl_lock);
-		return (error);
-	}
-
-	/*
-	 * Scan ACL to determine number of ACEs
-	 */
-	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
-		void *zacep = NULL;
-		uint64_t who;
-		uint32_t access_mask;
-		uint16_t type, iflags;
-
-		while ((zacep = zfs_acl_next_ace(aclp, zacep,
-		    &who, &access_mask, &iflags, &type))) {
-			switch (type) {
-			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-				largeace++;
-				continue;
-			default:
-				count++;
-			}
-		}
-		vsecp->vsa_aclcnt = count;
-	} else
-		count = (int)aclp->z_acl_count;
-
-	if (mask & VSA_ACECNT) {
-		vsecp->vsa_aclcnt = count;
-	}
-
-	if (mask & VSA_ACE) {
-		size_t aclsz;
-
-		aclsz = count * sizeof (ace_t) +
-		    sizeof (ace_object_t) * largeace;
-
-		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
-		vsecp->vsa_aclentsz = aclsz;
-
-		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
-			zfs_copy_fuid_2_ace(ZTOZSB(zp), aclp, cr,
-			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
-		else {
-			zfs_acl_node_t *aclnode;
-			void *start = vsecp->vsa_aclentp;
-
-			for (aclnode = list_head(&aclp->z_acl); aclnode;
-			    aclnode = list_next(&aclp->z_acl, aclnode)) {
-				bcopy(aclnode->z_acldata, start,
-				    aclnode->z_size);
-				start = (caddr_t)start + aclnode->z_size;
-			}
-			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
-			    aclp->z_acl_bytes);
-		}
-	}
-	if (mask & VSA_ACE_ACLFLAGS) {
-		vsecp->vsa_aclflags = 0;
-		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
-			vsecp->vsa_aclflags |= ACL_DEFAULTED;
-		if (zp->z_pflags & ZFS_ACL_PROTECTED)
-			vsecp->vsa_aclflags |= ACL_PROTECTED;
-		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
-			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
-	}
-
-	mutex_exit(&zp->z_acl_lock);
-
-	return (0);
-}
-
-int
-zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode,
-    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
-{
-	zfs_acl_t *aclp;
-	zfs_acl_node_t *aclnode;
-	int aclcnt = vsecp->vsa_aclcnt;
-	int error;
-
-	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
-		return (SET_ERROR(EINVAL));
-
-	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
-
-	aclp->z_hints = 0;
-	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
-	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
-		if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp,
-		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
-		    aclcnt, &aclnode->z_size)) != 0) {
-			zfs_acl_free(aclp);
-			zfs_acl_node_free(aclnode);
-			return (error);
-		}
-	} else {
-		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_mode, aclp,
-		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
-		    &aclnode->z_size, fuidp, cr)) != 0) {
-			zfs_acl_free(aclp);
-			zfs_acl_node_free(aclnode);
-			return (error);
-		}
-	}
-	aclp->z_acl_bytes = aclnode->z_size;
-	aclnode->z_ace_count = aclcnt;
-	aclp->z_acl_count = aclcnt;
-	list_insert_head(&aclp->z_acl, aclnode);
-
-	/*
-	 * If flags are being set then add them to z_hints
-	 */
-	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
-		if (vsecp->vsa_aclflags & ACL_PROTECTED)
-			aclp->z_hints |= ZFS_ACL_PROTECTED;
-		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
-			aclp->z_hints |= ZFS_ACL_DEFAULTED;
-		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
-			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
-	}
-
-	*zaclp = aclp;
-
-	return (0);
-}
-
-/*
- * Set a file's ACL
- */
-int
-zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	zilog_t		*zilog = zfsvfs->z_log;
-	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
-	dmu_tx_t	*tx;
-	int		error;
-	zfs_acl_t	*aclp;
-	zfs_fuid_info_t	*fuidp = NULL;
-	boolean_t	fuid_dirtied;
-	uint64_t	acl_obj;
-
-	if (mask == 0)
-		return (SET_ERROR(ENOSYS));
-
-	if (zp->z_pflags & ZFS_IMMUTABLE)
-		return (SET_ERROR(EPERM));
-
-	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
-		return (error);
-
-	error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
-	    &aclp);
-	if (error)
-		return (error);
-
-	/*
-	 * If ACL wide flags aren't being set then preserve any
-	 * existing flags.
-	 */
-	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
-		aclp->z_hints |=
-		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
-	}
-top:
-	mutex_enter(&zp->z_acl_lock);
-	mutex_enter(&zp->z_lock);
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-
-	/*
-	 * If old version and ACL won't fit in bonus and we aren't
-	 * upgrading then take out necessary DMU holds
-	 */
-
-	if ((acl_obj = zfs_external_acl(zp)) != 0) {
-		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
-		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
-			dmu_tx_hold_free(tx, acl_obj, 0,
-			    DMU_OBJECT_END);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-			    aclp->z_acl_bytes);
-		} else {
-			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
-		}
-	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
-	}
-
-	zfs_sa_upgrade_txholds(tx, zp);
-	error = dmu_tx_assign(tx, TXG_NOWAIT);
-	if (error) {
-		mutex_exit(&zp->z_acl_lock);
-		mutex_exit(&zp->z_lock);
-
-		if (error == ERESTART) {
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		zfs_acl_free(aclp);
-		return (error);
-	}
-
-	error = zfs_aclset_common(zp, aclp, cr, tx);
-	ASSERT(error == 0);
-	ASSERT(zp->z_acl_cached == NULL);
-	zp->z_acl_cached = aclp;
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
-
-	if (fuidp)
-		zfs_fuid_info_free(fuidp);
-	dmu_tx_commit(tx);
-
-	mutex_exit(&zp->z_lock);
-	mutex_exit(&zp->z_acl_lock);
-
-	return (error);
-}
-
-/*
- * Check accesses of interest (AoI) against attributes of the dataset
- * such as read-only.  Returns zero if no AoI conflict with dataset
- * attributes, otherwise an appropriate errno is returned.
- */
-static int
-zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
-{
-	if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) &&
-	    (!S_ISDEV(ZTOI(zp)->i_mode) ||
-	    (S_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) {
-		return (SET_ERROR(EROFS));
-	}
-
-	/*
-	 * Only check for READONLY on non-directories.
-	 */
-	if ((v4_mode & WRITE_MASK_DATA) &&
-	    ((!S_ISDIR(ZTOI(zp)->i_mode) &&
-	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
-	    (S_ISDIR(ZTOI(zp)->i_mode) &&
-	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
-		return (SET_ERROR(EPERM));
-	}
-
-	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
-	    (zp->z_pflags & ZFS_NOUNLINK)) {
-		return (SET_ERROR(EPERM));
-	}
-
-	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
-	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
-		return (SET_ERROR(EACCES));
-	}
-
-	return (0);
-}
-
-/*
- * The primary usage of this function is to loop through all of the
- * ACEs in the znode, determining what accesses of interest (AoI) to
- * the caller are allowed or denied.  The AoI are expressed as bits in
- * the working_mode parameter.  As each ACE is processed, bits covered
- * by that ACE are removed from the working_mode.  This removal
- * facilitates two things.  The first is that when the working mode is
- * empty (= 0), we know we've looked at all the AoI. The second is
- * that the ACE interpretation rules don't allow a later ACE to undo
- * something granted or denied by an earlier ACE.  Removing the
- * discovered access or denial enforces this rule.  At the end of
- * processing the ACEs, all AoI that were found to be denied are
- * placed into the working_mode, giving the caller a mask of denied
- * accesses.  Returns:
- *	0		if all AoI granted
- *	EACCES 		if the denied mask is non-zero
- *	other error	if abnormal failure (e.g., IO error)
- *
- * A secondary usage of the function is to determine if any of the
- * AoI are granted.  If an ACE grants any access in
- * the working_mode, we immediately short circuit out of the function.
- * This mode is chosen by setting anyaccess to B_TRUE.  The
- * working_mode is not a denied access mask upon exit if the function
- * is used in this manner.
- */
-static int
-zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
-    boolean_t anyaccess, cred_t *cr)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	zfs_acl_t	*aclp;
-	int		error;
-	uid_t		uid = crgetuid(cr);
-	uint64_t	who;
-	uint16_t	type, iflags;
-	uint16_t	entry_type;
-	uint32_t	access_mask;
-	uint32_t	deny_mask = 0;
-	zfs_ace_hdr_t	*acep = NULL;
-	boolean_t	checkit;
-	uid_t		gowner;
-	uid_t		fowner;
-
-	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
-
-	mutex_enter(&zp->z_acl_lock);
-
-	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
-	if (error != 0) {
-		mutex_exit(&zp->z_acl_lock);
-		return (error);
-	}
-
-	ASSERT(zp->z_acl_cached);
-
-	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
-	    &iflags, &type))) {
-		uint32_t mask_matched;
-
-		if (!zfs_acl_valid_ace_type(type, iflags))
-			continue;
-
-		if (S_ISDIR(ZTOI(zp)->i_mode) &&
-		    (iflags & ACE_INHERIT_ONLY_ACE))
-			continue;
-
-		/* Skip ACE if it does not affect any AoI */
-		mask_matched = (access_mask & *working_mode);
-		if (!mask_matched)
-			continue;
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-
-		checkit = B_FALSE;
-
-		switch (entry_type) {
-		case ACE_OWNER:
-			if (uid == fowner)
-				checkit = B_TRUE;
-			break;
-		case OWNING_GROUP:
-			who = gowner;
-			/*FALLTHROUGH*/
-		case ACE_IDENTIFIER_GROUP:
-			checkit = zfs_groupmember(zfsvfs, who, cr);
-			break;
-		case ACE_EVERYONE:
-			checkit = B_TRUE;
-			break;
-
-		/* USER Entry */
-		default:
-			if (entry_type == 0) {
-				uid_t newid;
-
-				newid = zfs_fuid_map_id(zfsvfs, who, cr,
-				    ZFS_ACE_USER);
-				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
-				    uid == newid)
-					checkit = B_TRUE;
-				break;
-			} else {
-				mutex_exit(&zp->z_acl_lock);
-				return (SET_ERROR(EIO));
-			}
-		}
-
-		if (checkit) {
-			if (type == DENY) {
-				DTRACE_PROBE3(zfs__ace__denies,
-				    znode_t *, zp,
-				    zfs_ace_hdr_t *, acep,
-				    uint32_t, mask_matched);
-				deny_mask |= mask_matched;
-			} else {
-				DTRACE_PROBE3(zfs__ace__allows,
-				    znode_t *, zp,
-				    zfs_ace_hdr_t *, acep,
-				    uint32_t, mask_matched);
-				if (anyaccess) {
-					mutex_exit(&zp->z_acl_lock);
-					return (0);
-				}
-			}
-			*working_mode &= ~mask_matched;
-		}
-
-		/* Are we done? */
-		if (*working_mode == 0)
-			break;
-	}
-
-	mutex_exit(&zp->z_acl_lock);
-
-	/* Put the found 'denies' back on the working mode */
-	if (deny_mask) {
-		*working_mode |= deny_mask;
-		return (SET_ERROR(EACCES));
-	} else if (*working_mode) {
-		return (-1);
-	}
-
-	return (0);
-}
-
-/*
- * Return true if any access whatsoever granted, we don't actually
- * care what access is granted.
- */
-boolean_t
-zfs_has_access(znode_t *zp, cred_t *cr)
-{
-	uint32_t have = ACE_ALL_PERMS;
-
-	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
-		uid_t owner;
-
-		owner = zfs_fuid_map_id(ZTOZSB(zp),
-		    KUID_TO_SUID(ZTOI(zp)->i_uid), cr, ZFS_OWNER);
-		return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0);
-	}
-	return (B_TRUE);
-}
-
-static int
-zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
-    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	int err;
-
-	*working_mode = v4_mode;
-	*check_privs = B_TRUE;
-
-	/*
-	 * Short circuit empty requests
-	 */
-	if (v4_mode == 0 || zfsvfs->z_replay) {
-		*working_mode = 0;
-		return (0);
-	}
-
-	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
-		*check_privs = B_FALSE;
-		return (err);
-	}
-
-	/*
-	 * The caller requested that the ACL check be skipped.  This
-	 * would only happen if the caller checked VOP_ACCESS() with a
-	 * 32 bit ACE mask and already had the appropriate permissions.
-	 */
-	if (skipaclchk) {
-		*working_mode = 0;
-		return (0);
-	}
-
-	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
-}
-
-static int
-zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
-    cred_t *cr)
-{
-	if (*working_mode != ACE_WRITE_DATA)
-		return (SET_ERROR(EACCES));
-
-	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
-	    check_privs, B_FALSE, cr));
-}
-
-int
-zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
-{
-	boolean_t owner = B_FALSE;
-	boolean_t groupmbr = B_FALSE;
-	boolean_t is_attr;
-	uid_t uid = crgetuid(cr);
-	int error;
-
-	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
-		return (SET_ERROR(EACCES));
-
-	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
-	    (S_ISDIR(ZTOI(zdp)->i_mode)));
-	if (is_attr)
-		goto slow;
-
-
-	mutex_enter(&zdp->z_acl_lock);
-
-	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
-		mutex_exit(&zdp->z_acl_lock);
-		return (0);
-	}
-
-	if (KUID_TO_SUID(ZTOI(zdp)->i_uid) != 0 ||
-	    KGID_TO_SGID(ZTOI(zdp)->i_gid) != 0) {
-		mutex_exit(&zdp->z_acl_lock);
-		goto slow;
-	}
-
-	if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) {
-		owner = B_TRUE;
-		if (zdp->z_mode & S_IXUSR) {
-			mutex_exit(&zdp->z_acl_lock);
-			return (0);
-		} else {
-			mutex_exit(&zdp->z_acl_lock);
-			goto slow;
-		}
-	}
-	if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) {
-		groupmbr = B_TRUE;
-		if (zdp->z_mode & S_IXGRP) {
-			mutex_exit(&zdp->z_acl_lock);
-			return (0);
-		} else {
-			mutex_exit(&zdp->z_acl_lock);
-			goto slow;
-		}
-	}
-	if (!owner && !groupmbr) {
-		if (zdp->z_mode & S_IXOTH) {
-			mutex_exit(&zdp->z_acl_lock);
-			return (0);
-		}
-	}
-
-	mutex_exit(&zdp->z_acl_lock);
-
-slow:
-	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
-	ZFS_ENTER(ZTOZSB(zdp));
-	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
-	ZFS_EXIT(ZTOZSB(zdp));
-	return (error);
-}
-
-/*
- * Determine whether Access should be granted/denied.
- *
- * The least priv subsystem is always consulted as a basic privilege
- * can define any form of access.
- */
-int
-zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
-{
-	uint32_t	working_mode;
-	int		error;
-	int		is_attr;
-	boolean_t 	check_privs;
-	znode_t		*xzp;
-	znode_t 	*check_zp = zp;
-	mode_t		needed_bits;
-	uid_t		owner;
-
-	is_attr = ((zp->z_pflags & ZFS_XATTR) && S_ISDIR(ZTOI(zp)->i_mode));
-
-	/*
-	 * If attribute then validate against base file
-	 */
-	if (is_attr) {
-		if ((error = zfs_zget(ZTOZSB(zp),
-		    zp->z_xattr_parent, &xzp)) != 0) {
-			return (error);
-		}
-
-		check_zp = xzp;
-
-		/*
-		 * fixup mode to map to xattr perms
-		 */
-
-		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
-			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
-			mode |= ACE_WRITE_NAMED_ATTRS;
-		}
-
-		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
-			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
-			mode |= ACE_READ_NAMED_ATTRS;
-		}
-	}
-
-	owner = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
-	    cr, ZFS_OWNER);
-	/*
-	 * Map the bits required to the standard inode flags
-	 * S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits.  Map the bits
-	 * mapped by working_mode (currently missing) in missing_bits.
-	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
-	 * needed_bits.
-	 */
-	needed_bits = 0;
-
-	working_mode = mode;
-	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
-	    owner == crgetuid(cr))
-		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
-
-	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
-	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
-		needed_bits |= S_IRUSR;
-	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
-	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
-		needed_bits |= S_IWUSR;
-	if (working_mode & ACE_EXECUTE)
-		needed_bits |= S_IXUSR;
-
-	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
-	    &check_privs, skipaclchk, cr)) == 0) {
-		if (is_attr)
-			iput(ZTOI(xzp));
-		return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,
-		    needed_bits, needed_bits));
-	}
-
-	if (error && !check_privs) {
-		if (is_attr)
-			iput(ZTOI(xzp));
-		return (error);
-	}
-
-	if (error && (flags & V_APPEND)) {
-		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
-	}
-
-	if (error && check_privs) {
-		mode_t		checkmode = 0;
-
-		/*
-		 * First check for implicit owner permission on
-		 * read_acl/read_attributes
-		 */
-
-		error = 0;
-		ASSERT(working_mode != 0);
-
-		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
-		    owner == crgetuid(cr)))
-			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
-
-		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
-		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
-			checkmode |= S_IRUSR;
-		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
-		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
-			checkmode |= S_IWUSR;
-		if (working_mode & ACE_EXECUTE)
-			checkmode |= S_IXUSR;
-
-		error = secpolicy_vnode_access2(cr, ZTOI(check_zp), owner,
-		    needed_bits & ~checkmode, needed_bits);
-
-		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
-			error = secpolicy_vnode_chown(cr, owner);
-		if (error == 0 && (working_mode & ACE_WRITE_ACL))
-			error = secpolicy_vnode_setdac(cr, owner);
-
-		if (error == 0 && (working_mode &
-		    (ACE_DELETE|ACE_DELETE_CHILD)))
-			error = secpolicy_vnode_remove(cr);
-
-		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
-			error = secpolicy_vnode_chown(cr, owner);
-		}
-		if (error == 0) {
-			/*
-			 * See if any bits other than those already checked
-			 * for are still present.  If so then return EACCES
-			 */
-			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
-				error = SET_ERROR(EACCES);
-			}
-		}
-	} else if (error == 0) {
-		error = secpolicy_vnode_access2(cr, ZTOI(zp), owner,
-		    needed_bits, needed_bits);
-	}
-
-	if (is_attr)
-		iput(ZTOI(xzp));
-
-	return (error);
-}
-
-/*
- * Translate traditional unix S_IRUSR/S_IWUSR/S_IXUSR mode into
- * native ACL format and call zfs_zaccess()
- */
-int
-zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
-{
-	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
-}
-
-/*
- * Access function for secpolicy_vnode_setattr
- */
-int
-zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
-{
-	int v4_mode = zfs_unix_to_v4(mode >> 6);
-
-	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
-}
-
-static int
-zfs_delete_final_check(znode_t *zp, znode_t *dzp,
-    mode_t available_perms, cred_t *cr)
-{
-	int error;
-	uid_t downer;
-
-	downer = zfs_fuid_map_id(ZTOZSB(dzp), KUID_TO_SUID(ZTOI(dzp)->i_uid),
-	    cr, ZFS_OWNER);
-
-	error = secpolicy_vnode_access2(cr, ZTOI(dzp),
-	    downer, available_perms, S_IWUSR|S_IXUSR);
-
-	if (error == 0)
-		error = zfs_sticky_remove_access(dzp, zp, cr);
-
-	return (error);
-}
-
-/*
- * Determine whether Access should be granted/deny, without
- * consulting least priv subsystem.
- *
- * The following chart is the recommended NFSv4 enforcement for
- * ability to delete an object.
- *
- *      -------------------------------------------------------
- *      |   Parent Dir  |           Target Object Permissions |
- *      |  permissions  |                                     |
- *      -------------------------------------------------------
- *      |               | ACL Allows | ACL Denies| Delete     |
- *      |               |  Delete    |  Delete   | unspecified|
- *      -------------------------------------------------------
- *      |  ACL Allows   | Permit     | Permit    | Permit     |
- *      |  DELETE_CHILD |                                     |
- *      -------------------------------------------------------
- *      |  ACL Denies   | Permit     | Deny      | Deny       |
- *      |  DELETE_CHILD |            |           |            |
- *      -------------------------------------------------------
- *      | ACL specifies |            |           |            |
- *      | only allow    | Permit     | Permit    | Permit     |
- *      | write and     |            |           |            |
- *      | execute       |            |           |            |
- *      -------------------------------------------------------
- *      | ACL denies    |            |           |            |
- *      | write and     | Permit     | Deny      | Deny       |
- *      | execute       |            |           |            |
- *      -------------------------------------------------------
- *         ^
- *         |
- *         No search privilege, can't even look up file?
- *
- */
-int
-zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
-{
-	uint32_t dzp_working_mode = 0;
-	uint32_t zp_working_mode = 0;
-	int dzp_error, zp_error;
-	mode_t available_perms;
-	boolean_t dzpcheck_privs = B_TRUE;
-	boolean_t zpcheck_privs = B_TRUE;
-
-	/*
-	 * We want specific DELETE permissions to
-	 * take precedence over WRITE/EXECUTE.  We don't
-	 * want an ACL such as this to mess us up.
-	 * user:joe:write_data:deny,user:joe:delete:allow
-	 *
-	 * However, deny permissions may ultimately be overridden
-	 * by secpolicy_vnode_access().
-	 *
-	 * We will ask for all of the necessary permissions and then
-	 * look at the working modes from the directory and target object
-	 * to determine what was found.
-	 */
-
-	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
-		return (SET_ERROR(EPERM));
-
-	/*
-	 * First row
-	 * If the directory permissions allow the delete, we are done.
-	 */
-	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
-	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
-		return (0);
-
-	/*
-	 * If target object has delete permission then we are done
-	 */
-	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
-	    &zpcheck_privs, B_FALSE, cr)) == 0)
-		return (0);
-
-	ASSERT(dzp_error && zp_error);
-
-	if (!dzpcheck_privs)
-		return (dzp_error);
-	if (!zpcheck_privs)
-		return (zp_error);
-
-	/*
-	 * Second row
-	 *
-	 * If directory returns EACCES then delete_child was denied
-	 * due to deny delete_child.  In this case send the request through
-	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
-	 * since that *could* allow the delete based on write/execute permission
-	 * and we want delete permissions to override write/execute.
-	 */
-
-	if (dzp_error == EACCES)
-		return (secpolicy_vnode_remove(cr));
-
-	/*
-	 * Third Row
-	 * only need to see if we have write/execute on directory.
-	 */
-
-	dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
-	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
-
-	if (dzp_error != 0 && !dzpcheck_privs)
-		return (dzp_error);
-
-	/*
-	 * Fourth row
-	 */
-
-	available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : S_IWUSR;
-	available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : S_IXUSR;
-
-	return (zfs_delete_final_check(zp, dzp, available_perms, cr));
-
-}
-
-int
-zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
-    znode_t *tzp, cred_t *cr)
-{
-	int add_perm;
-	int error;
-
-	if (szp->z_pflags & ZFS_AV_QUARANTINED)
-		return (SET_ERROR(EACCES));
-
-	add_perm = S_ISDIR(ZTOI(szp)->i_mode) ?
-	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
-
-	/*
-	 * Rename permissions are combination of delete permission +
-	 * add file/subdir permission.
-	 */
-
-	/*
-	 * first make sure we do the delete portion.
-	 *
-	 * If that succeeds then check for add_file/add_subdir permissions
-	 */
-
-	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
-		return (error);
-
-	/*
-	 * If we have a tzp, see if we can delete it?
-	 */
-	if (tzp) {
-		if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
-			return (error);
-	}
-
-	/*
-	 * Now check for add permissions
-	 */
-	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
-
-	return (error);
-}

diff --git a/zfs/module/zfs/zfs_byteswap.c b/zfs/module/zfs/zfs_byteswap.c
index 1b8bb82..cd35849 100644
--- a/zfs/module/zfs/zfs_byteswap.c
+++ b/zfs/module/zfs/zfs_byteswap.c

@@ -30,6 +30,9 @@
 #include <sys/zfs_sa.h>
 #include <sys/zfs_acl.h>
 
+#ifndef _KERNEL
+static
+#endif
 void
 zfs_oldace_byteswap(ace_t *ace, int ace_cnt)
 {
@@ -46,6 +49,9 @@
 /*
  * swap ace_t and ace_object_t
  */
+#ifndef _KERNEL
+static
+#endif
 void
 zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout)
 {

diff --git a/zfs/module/zfs/zfs_ctldir.c b/zfs/module/zfs/zfs_ctldir.c
deleted file mode 100644
index fe21dbf..0000000
--- a/zfs/module/zfs/zfs_ctldir.c
+++ /dev/null

@@ -1,1260 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- *
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * LLNL-CODE-403049.
- * Rewritten for Linux by:
- *   Rohan Puri <rohan.puri15@gmail.com>
- *   Brian Behlendorf <behlendorf1@llnl.gov>
- * Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright (c) 2018 George Melikov. All Rights Reserved.
- * Copyright (c) 2019 Datto, Inc. All rights reserved.
- * Copyright (c) 2020 The MathWorks, Inc. All rights reserved.
- */
-
-/*
- * ZFS control directory (a.k.a. ".zfs")
- *
- * This directory provides a common location for all ZFS meta-objects.
- * Currently, this is only the 'snapshot' and 'shares' directory, but this may
- * expand in the future.  The elements are built dynamically, as the hierarchy
- * does not actually exist on disk.
- *
- * For 'snapshot', we don't want to have all snapshots always mounted, because
- * this would take up a huge amount of space in /etc/mnttab.  We have three
- * types of objects:
- *
- *	ctldir ------> snapshotdir -------> snapshot
- *                                             |
- *                                             |
- *                                             V
- *                                         mounted fs
- *
- * The 'snapshot' node contains just enough information to lookup '..' and act
- * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
- * perform an automount of the underlying filesystem and return the
- * corresponding inode.
- *
- * All mounts are handled automatically by an user mode helper which invokes
- * the mount procedure.  Unmounts are handled by allowing the mount
- * point to expire so the kernel may automatically unmount it.
- *
- * The '.zfs', '.zfs/snapshot', and all directories created under
- * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
- * zfsvfs_t as the head filesystem (what '.zfs' lives under).
- *
- * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
- * (ie: snapshots) are complete ZFS filesystems and have their own unique
- * zfsvfs_t.  However, the fsid reported by these mounts will be the same
- * as that used by the parent zfsvfs_t to make NFS happy.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/pathname.h>
-#include <sys/vfs.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/stat.h>
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_destroy.h>
-#include <sys/dsl_deleg.h>
-#include <sys/zpl.h>
-#include <sys/mntent.h>
-#include "zfs_namecheck.h"
-
-/*
- * Two AVL trees are maintained which contain all currently automounted
- * snapshots.  Every automounted snapshots maps to a single zfs_snapentry_t
- * entry which MUST:
- *
- *   - be attached to both trees, and
- *   - be unique, no duplicate entries are allowed.
- *
- * The zfs_snapshots_by_name tree is indexed by the full dataset name
- * while the zfs_snapshots_by_objsetid tree is indexed by the unique
- * objsetid.  This allows for fast lookups either by name or objsetid.
- */
-static avl_tree_t zfs_snapshots_by_name;
-static avl_tree_t zfs_snapshots_by_objsetid;
-static krwlock_t zfs_snapshot_lock;
-
-/*
- * Control Directory Tunables (.zfs)
- */
-int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
-int zfs_admin_snapshot = 0;
-
-typedef struct {
-	char		*se_name;	/* full snapshot name */
-	char		*se_path;	/* full mount path */
-	spa_t		*se_spa;	/* pool spa */
-	uint64_t	se_objsetid;	/* snapshot objset id */
-	struct dentry   *se_root_dentry; /* snapshot root dentry */
-	taskqid_t	se_taskqid;	/* scheduled unmount taskqid */
-	avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */
-	avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */
-	zfs_refcount_t	se_refcount;	/* reference count */
-} zfs_snapentry_t;
-
-static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
-
-/*
- * Allocate a new zfs_snapentry_t being careful to make a copy of the
- * the snapshot name and provided mount point.  No reference is taken.
- */
-static zfs_snapentry_t *
-zfsctl_snapshot_alloc(char *full_name, char *full_path, spa_t *spa,
-    uint64_t objsetid, struct dentry *root_dentry)
-{
-	zfs_snapentry_t *se;
-
-	se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
-
-	se->se_name = strdup(full_name);
-	se->se_path = strdup(full_path);
-	se->se_spa = spa;
-	se->se_objsetid = objsetid;
-	se->se_root_dentry = root_dentry;
-	se->se_taskqid = TASKQID_INVALID;
-
-	zfs_refcount_create(&se->se_refcount);
-
-	return (se);
-}
-
-/*
- * Free a zfs_snapentry_t the caller must ensure there are no active
- * references.
- */
-static void
-zfsctl_snapshot_free(zfs_snapentry_t *se)
-{
-	zfs_refcount_destroy(&se->se_refcount);
-	strfree(se->se_name);
-	strfree(se->se_path);
-
-	kmem_free(se, sizeof (zfs_snapentry_t));
-}
-
-/*
- * Hold a reference on the zfs_snapentry_t.
- */
-static void
-zfsctl_snapshot_hold(zfs_snapentry_t *se)
-{
-	zfs_refcount_add(&se->se_refcount, NULL);
-}
-
-/*
- * Release a reference on the zfs_snapentry_t.  When the number of
- * references drops to zero the structure will be freed.
- */
-static void
-zfsctl_snapshot_rele(zfs_snapentry_t *se)
-{
-	if (zfs_refcount_remove(&se->se_refcount, NULL) == 0)
-		zfsctl_snapshot_free(se);
-}
-
-/*
- * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
- * zfs_snapshots_by_objsetid trees.  While the zfs_snapentry_t is part
- * of the trees a reference is held.
- */
-static void
-zfsctl_snapshot_add(zfs_snapentry_t *se)
-{
-	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
-	zfsctl_snapshot_hold(se);
-	avl_add(&zfs_snapshots_by_name, se);
-	avl_add(&zfs_snapshots_by_objsetid, se);
-}
-
-/*
- * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
- * zfs_snapshots_by_objsetid trees.  Upon removal a reference is dropped,
- * this can result in the structure being freed if that was the last
- * remaining reference.
- */
-static void
-zfsctl_snapshot_remove(zfs_snapentry_t *se)
-{
-	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
-	avl_remove(&zfs_snapshots_by_name, se);
-	avl_remove(&zfs_snapshots_by_objsetid, se);
-	zfsctl_snapshot_rele(se);
-}
-
-/*
- * Snapshot name comparison function for the zfs_snapshots_by_name.
- */
-static int
-snapentry_compare_by_name(const void *a, const void *b)
-{
-	const zfs_snapentry_t *se_a = a;
-	const zfs_snapentry_t *se_b = b;
-	int ret;
-
-	ret = strcmp(se_a->se_name, se_b->se_name);
-
-	if (ret < 0)
-		return (-1);
-	else if (ret > 0)
-		return (1);
-	else
-		return (0);
-}
-
-/*
- * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
- */
-static int
-snapentry_compare_by_objsetid(const void *a, const void *b)
-{
-	const zfs_snapentry_t *se_a = a;
-	const zfs_snapentry_t *se_b = b;
-
-	if (se_a->se_spa != se_b->se_spa)
-		return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
-
-	if (se_a->se_objsetid < se_b->se_objsetid)
-		return (-1);
-	else if (se_a->se_objsetid > se_b->se_objsetid)
-		return (1);
-	else
-		return (0);
-}
-
-/*
- * Find a zfs_snapentry_t in zfs_snapshots_by_name.  If the snapname
- * is found a pointer to the zfs_snapentry_t is returned and a reference
- * taken on the structure.  The caller is responsible for dropping the
- * reference with zfsctl_snapshot_rele().  If the snapname is not found
- * NULL will be returned.
- */
-static zfs_snapentry_t *
-zfsctl_snapshot_find_by_name(char *snapname)
-{
-	zfs_snapentry_t *se, search;
-
-	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
-
-	search.se_name = snapname;
-	se = avl_find(&zfs_snapshots_by_name, &search, NULL);
-	if (se)
-		zfsctl_snapshot_hold(se);
-
-	return (se);
-}
-
-/*
- * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
- * rather than the snapname.  In all other respects it behaves the same
- * as zfsctl_snapshot_find_by_name().
- */
-static zfs_snapentry_t *
-zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
-{
-	zfs_snapentry_t *se, search;
-
-	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
-
-	search.se_spa = spa;
-	search.se_objsetid = objsetid;
-	se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
-	if (se)
-		zfsctl_snapshot_hold(se);
-
-	return (se);
-}
-
-/*
- * Rename a zfs_snapentry_t in the zfs_snapshots_by_name.  The structure is
- * removed, renamed, and added back to the new correct location in the tree.
- */
-static int
-zfsctl_snapshot_rename(char *old_snapname, char *new_snapname)
-{
-	zfs_snapentry_t *se;
-
-	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
-
-	se = zfsctl_snapshot_find_by_name(old_snapname);
-	if (se == NULL)
-		return (SET_ERROR(ENOENT));
-
-	zfsctl_snapshot_remove(se);
-	strfree(se->se_name);
-	se->se_name = strdup(new_snapname);
-	zfsctl_snapshot_add(se);
-	zfsctl_snapshot_rele(se);
-
-	return (0);
-}
-
-/*
- * Delayed task responsible for unmounting an expired automounted snapshot.
- */
-static void
-snapentry_expire(void *data)
-{
-	zfs_snapentry_t *se = (zfs_snapentry_t *)data;
-	spa_t *spa = se->se_spa;
-	uint64_t objsetid = se->se_objsetid;
-
-	if (zfs_expire_snapshot <= 0) {
-		zfsctl_snapshot_rele(se);
-		return;
-	}
-
-	se->se_taskqid = TASKQID_INVALID;
-	(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
-	zfsctl_snapshot_rele(se);
-
-	/*
-	 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
-	 * This can occur when the snapshot is busy.
-	 */
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
-		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
-		zfsctl_snapshot_rele(se);
-	}
-	rw_exit(&zfs_snapshot_lock);
-}
-
-/*
- * Cancel an automatic unmount of a snapname.  This callback is responsible
- * for dropping the reference on the zfs_snapentry_t which was taken when
- * during dispatch.
- */
-static void
-zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
-{
-	if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
-		se->se_taskqid = TASKQID_INVALID;
-		zfsctl_snapshot_rele(se);
-	}
-}
-
-/*
- * Dispatch the unmount task for delayed handling with a hold protecting it.
- */
-static void
-zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
-{
-	ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID);
-
-	if (delay <= 0)
-		return;
-
-	zfsctl_snapshot_hold(se);
-	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
-	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
-}
-
-/*
- * Schedule an automatic unmount of objset id to occur in delay seconds from
- * now.  Any previous delayed unmount will be cancelled in favor of the
- * updated deadline.  A reference is taken by zfsctl_snapshot_find_by_name()
- * and held until the outstanding task is handled or cancelled.
- */
-int
-zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
-{
-	zfs_snapentry_t *se;
-	int error = ENOENT;
-
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
-		zfsctl_snapshot_unmount_cancel(se);
-		zfsctl_snapshot_unmount_delay_impl(se, delay);
-		zfsctl_snapshot_rele(se);
-		error = 0;
-	}
-	rw_exit(&zfs_snapshot_lock);
-
-	return (error);
-}
-
-/*
- * Check if snapname is currently mounted.  Returned non-zero when mounted
- * and zero when unmounted.
- */
-static boolean_t
-zfsctl_snapshot_ismounted(char *snapname)
-{
-	zfs_snapentry_t *se;
-	boolean_t ismounted = B_FALSE;
-
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
-		zfsctl_snapshot_rele(se);
-		ismounted = B_TRUE;
-	}
-	rw_exit(&zfs_snapshot_lock);
-
-	return (ismounted);
-}
-
-/*
- * Check if the given inode is a part of the virtual .zfs directory.
- */
-boolean_t
-zfsctl_is_node(struct inode *ip)
-{
-	return (ITOZ(ip)->z_is_ctldir);
-}
-
-/*
- * Check if the given inode is a .zfs/snapshots/snapname directory.
- */
-boolean_t
-zfsctl_is_snapdir(struct inode *ip)
-{
-	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
-}
-
-/*
- * Allocate a new inode with the passed id and ops.
- */
-static struct inode *
-zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
-    const struct file_operations *fops, const struct inode_operations *ops)
-{
-	inode_timespec_t now;
-	struct inode *ip;
-	znode_t *zp;
-
-	ip = new_inode(zfsvfs->z_sb);
-	if (ip == NULL)
-		return (NULL);
-
-	now = current_time(ip);
-	zp = ITOZ(ip);
-	ASSERT3P(zp->z_dirlocks, ==, NULL);
-	ASSERT3P(zp->z_acl_cached, ==, NULL);
-	ASSERT3P(zp->z_xattr_cached, ==, NULL);
-	zp->z_id = id;
-	zp->z_unlinked = B_FALSE;
-	zp->z_atime_dirty = B_FALSE;
-	zp->z_zn_prefetch = B_FALSE;
-	zp->z_moved = B_FALSE;
-	zp->z_is_sa = B_FALSE;
-	zp->z_is_mapped = B_FALSE;
-	zp->z_is_ctldir = B_TRUE;
-	zp->z_is_stale = B_FALSE;
-	zp->z_sa_hdl = NULL;
-	zp->z_blksz = 0;
-	zp->z_seq = 0;
-	zp->z_mapcnt = 0;
-	zp->z_size = 0;
-	zp->z_pflags = 0;
-	zp->z_mode = 0;
-	zp->z_sync_cnt = 0;
-	ip->i_generation = 0;
-	ip->i_ino = id;
-	ip->i_mode = (S_IFDIR | S_IRWXUGO);
-	ip->i_uid = SUID_TO_KUID(0);
-	ip->i_gid = SGID_TO_KGID(0);
-	ip->i_blkbits = SPA_MINBLOCKSHIFT;
-	ip->i_atime = now;
-	ip->i_mtime = now;
-	ip->i_ctime = now;
-	ip->i_fop = fops;
-	ip->i_op = ops;
-#if defined(IOP_XATTR)
-	ip->i_opflags &= ~IOP_XATTR;
-#endif
-
-	if (insert_inode_locked(ip)) {
-		unlock_new_inode(ip);
-		iput(ip);
-		return (NULL);
-	}
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
-	membar_producer();
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	unlock_new_inode(ip);
-
-	return (ip);
-}
-
-/*
- * Lookup the inode with given id, it will be allocated if needed.
- */
-static struct inode *
-zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
-    const struct file_operations *fops, const struct inode_operations *ops)
-{
-	struct inode *ip = NULL;
-
-	while (ip == NULL) {
-		ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
-		if (ip)
-			break;
-
-		/* May fail due to concurrent zfsctl_inode_alloc() */
-		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
-	}
-
-	return (ip);
-}
-
-/*
- * Create the '.zfs' directory.  This directory is cached as part of the VFS
- * structure.  This results in a hold on the zfsvfs_t.  The code in zfs_umount()
- * therefore checks against a vfs_count of 2 instead of 1.  This reference
- * is removed when the ctldir is destroyed in the unmount.  All other entities
- * under the '.zfs' directory are created dynamically as needed.
- *
- * Because the dynamically created '.zfs' directory entries assume the use
- * of 64-bit inode numbers this support must be disabled on 32-bit systems.
- */
-int
-zfsctl_create(zfsvfs_t *zfsvfs)
-{
-	ASSERT(zfsvfs->z_ctldir == NULL);
-
-	zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
-	    &zpl_fops_root, &zpl_ops_root);
-	if (zfsvfs->z_ctldir == NULL)
-		return (SET_ERROR(ENOENT));
-
-	return (0);
-}
-
-/*
- * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
- * Only called when the filesystem is unmounted.
- */
-void
-zfsctl_destroy(zfsvfs_t *zfsvfs)
-{
-	if (zfsvfs->z_issnap) {
-		zfs_snapentry_t *se;
-		spa_t *spa = zfsvfs->z_os->os_spa;
-		uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
-
-		rw_enter(&zfs_snapshot_lock, RW_WRITER);
-		se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
-		if (se != NULL)
-			zfsctl_snapshot_remove(se);
-		rw_exit(&zfs_snapshot_lock);
-		if (se != NULL) {
-			zfsctl_snapshot_unmount_cancel(se);
-			zfsctl_snapshot_rele(se);
-		}
-	} else if (zfsvfs->z_ctldir) {
-		iput(zfsvfs->z_ctldir);
-		zfsvfs->z_ctldir = NULL;
-	}
-}
-
-/*
- * Given a root znode, retrieve the associated .zfs directory.
- * Add a hold to the vnode and return it.
- */
-struct inode *
-zfsctl_root(znode_t *zp)
-{
-	ASSERT(zfs_has_ctldir(zp));
-	igrab(ZTOZSB(zp)->z_ctldir);
-	return (ZTOZSB(zp)->z_ctldir);
-}
-
-/*
- * Generate a long fid to indicate a snapdir. We encode whether snapdir is
- * already mounted in gen field. We do this because nfsd lookup will not
- * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
- * this and do automount and return ESTALE to force nfsd revalidate and follow
- * mount.
- */
-static int
-zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
-{
-	zfid_short_t *zfid = (zfid_short_t *)fidp;
-	zfid_long_t *zlfid = (zfid_long_t *)fidp;
-	uint32_t gen = 0;
-	uint64_t object;
-	uint64_t objsetid;
-	int i;
-	struct dentry *dentry;
-
-	if (fidp->fid_len < LONG_FID_LEN) {
-		fidp->fid_len = LONG_FID_LEN;
-		return (SET_ERROR(ENOSPC));
-	}
-
-	object = ip->i_ino;
-	objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
-	zfid->zf_len = LONG_FID_LEN;
-
-	dentry = d_obtain_alias(igrab(ip));
-	if (!IS_ERR(dentry)) {
-		gen = !!d_mountpoint(dentry);
-		dput(dentry);
-	}
-
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
-		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
-		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
-
-	for (i = 0; i < sizeof (zlfid->zf_setid); i++)
-		zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
-
-	for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
-		zlfid->zf_setgen[i] = 0;
-
-	return (0);
-}
-
-/*
- * Generate an appropriate fid for an entry in the .zfs directory.
- */
-int
-zfsctl_fid(struct inode *ip, fid_t *fidp)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	uint64_t	object = zp->z_id;
-	zfid_short_t	*zfid;
-	int		i;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsctl_is_snapdir(ip)) {
-		ZFS_EXIT(zfsvfs);
-		return (zfsctl_snapdir_fid(ip, fidp));
-	}
-
-	if (fidp->fid_len < SHORT_FID_LEN) {
-		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOSPC));
-	}
-
-	zfid = (zfid_short_t *)fidp;
-
-	zfid->zf_len = SHORT_FID_LEN;
-
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
-		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
-	/* .zfs znodes always have a generation number of 0 */
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
-		zfid->zf_gen[i] = 0;
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Construct a full dataset name in full_name: "pool/dataset@snap_name"
- */
-static int
-zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
-    char *full_name)
-{
-	objset_t *os = zfsvfs->z_os;
-
-	if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
-		return (SET_ERROR(EILSEQ));
-
-	dmu_objset_name(os, full_name);
-	if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
-		return (SET_ERROR(ENAMETOOLONG));
-
-	(void) strcat(full_name, "@");
-	(void) strcat(full_name, snap_name);
-
-	return (0);
-}
-
-/*
- * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
- */
-static int
-zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
-    int path_len, char *full_path)
-{
-	objset_t *os = zfsvfs->z_os;
-	fstrans_cookie_t cookie;
-	char *snapname;
-	boolean_t case_conflict;
-	uint64_t id, pos = 0;
-	int error = 0;
-
-	if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
-		return (SET_ERROR(ENOENT));
-
-	cookie = spl_fstrans_mark();
-	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	while (error == 0) {
-		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-		error = dmu_snapshot_list_next(zfsvfs->z_os,
-		    ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
-		    &case_conflict);
-		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
-		if (error)
-			goto out;
-
-		if (id == objsetid)
-			break;
-	}
-
-	snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
-	    zfsvfs->z_vfs->vfs_mntpoint, snapname);
-out:
-	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
-	spl_fstrans_unmark(cookie);
-
-	return (error);
-}
-
-/*
- * Special case the handling of "..".
- */
-int
-zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (strcmp(name, "..") == 0) {
-		*ipp = dip->i_sb->s_root->d_inode;
-	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
-		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
-		    &zpl_fops_snapdir, &zpl_ops_snapdir);
-	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
-		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
-		    &zpl_fops_shares, &zpl_ops_shares);
-	} else {
-		*ipp = NULL;
-	}
-
-	if (*ipp == NULL)
-		error = SET_ERROR(ENOENT);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Lookup entry point for the 'snapshot' directory.  Try to open the
- * snapshot if it exist, creating the pseudo filesystem inode as necessary.
- */
-int
-zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	uint64_t id;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
-	if (error) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
-	    &simple_dir_operations, &simple_dir_inode_operations);
-	if (*ipp == NULL)
-		error = SET_ERROR(ENOENT);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Renaming a directory under '.zfs/snapshot' will automatically trigger
- * a rename of the snapshot to the new given name.  The rename is confined
- * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
- */
-int
-zfsctl_snapdir_rename(struct inode *sdip, char *snm,
-    struct inode *tdip, char *tnm, cred_t *cr, int flags)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(sdip);
-	char *to, *from, *real, *fsname;
-	int error;
-
-	if (!zfs_admin_snapshot)
-		return (SET_ERROR(EACCES));
-
-	ZFS_ENTER(zfsvfs);
-
-	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
-		    ZFS_MAX_DATASET_NAME_LEN, NULL);
-		if (error == 0) {
-			snm = real;
-		} else if (error != ENOTSUP) {
-			goto out;
-		}
-	}
-
-	dmu_objset_name(zfsvfs->z_os, fsname);
-
-	error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
-	    ZFS_MAX_DATASET_NAME_LEN, from);
-	if (error == 0)
-		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
-		    ZFS_MAX_DATASET_NAME_LEN, to);
-	if (error == 0)
-		error = zfs_secpolicy_rename_perms(from, to, cr);
-	if (error != 0)
-		goto out;
-
-	/*
-	 * Cannot move snapshots out of the snapdir.
-	 */
-	if (sdip != tdip) {
-		error = SET_ERROR(EINVAL);
-		goto out;
-	}
-
-	/*
-	 * No-op when names are identical.
-	 */
-	if (strcmp(snm, tnm) == 0) {
-		error = 0;
-		goto out;
-	}
-
-	rw_enter(&zfs_snapshot_lock, RW_WRITER);
-
-	error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
-	if (error == 0)
-		(void) zfsctl_snapshot_rename(snm, tnm);
-
-	rw_exit(&zfs_snapshot_lock);
-out:
-	kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Removing a directory under '.zfs/snapshot' will automatically trigger
- * the removal of the snapshot with the given name.
- */
-int
-zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	char *snapname, *real;
-	int error;
-
-	if (!zfs_admin_snapshot)
-		return (SET_ERROR(EACCES));
-
-	ZFS_ENTER(zfsvfs);
-
-	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
-		    ZFS_MAX_DATASET_NAME_LEN, NULL);
-		if (error == 0) {
-			name = real;
-		} else if (error != ENOTSUP) {
-			goto out;
-		}
-	}
-
-	error = zfsctl_snapshot_name(ITOZSB(dip), name,
-	    ZFS_MAX_DATASET_NAME_LEN, snapname);
-	if (error == 0)
-		error = zfs_secpolicy_destroy_perms(snapname, cr);
-	if (error != 0)
-		goto out;
-
-	error = zfsctl_snapshot_unmount(snapname, MNT_FORCE);
-	if ((error == 0) || (error == ENOENT))
-		error = dsl_destroy_snapshot(snapname, B_FALSE);
-out:
-	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Creating a directory under '.zfs/snapshot' will automatically trigger
- * the creation of a new snapshot with the given name.
- */
-int
-zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
-    struct inode **ipp, cred_t *cr, int flags)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	char *dsname;
-	int error;
-
-	if (!zfs_admin_snapshot)
-		return (SET_ERROR(EACCES));
-
-	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
-		error = SET_ERROR(EILSEQ);
-		goto out;
-	}
-
-	dmu_objset_name(zfsvfs->z_os, dsname);
-
-	error = zfs_secpolicy_snapshot_perms(dsname, cr);
-	if (error != 0)
-		goto out;
-
-	if (error == 0) {
-		error = dmu_objset_snapshot_one(dsname, dirname);
-		if (error != 0)
-			goto out;
-
-		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
-		    0, cr, NULL, NULL);
-	}
-out:
-	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
-
-	return (error);
-}
-
-/*
- * Flush everything out of the kernel's export table and such.
- * This is needed as once the snapshot is used over NFS, its
- * entries in svc_export and svc_expkey caches hold reference
- * to the snapshot mount point. There is no known way of flushing
- * only the entries related to the snapshot.
- */
-static void
-exportfs_flush(void)
-{
-	char *argv[] = { "/usr/sbin/exportfs", "-f", NULL };
-	char *envp[] = { NULL };
-
-	(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-}
-
-/*
- * Attempt to unmount a snapshot by making a call to user space.
- * There is no assurance that this can or will succeed, is just a
- * best effort.  In the case where it does fail, perhaps because
- * it's in use, the unmount will fail harmlessly.
- */
-int
-zfsctl_snapshot_unmount(char *snapname, int flags)
-{
-	char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL,
-	    NULL };
-	char *envp[] = { NULL };
-	zfs_snapentry_t *se;
-	int error;
-
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
-		rw_exit(&zfs_snapshot_lock);
-		return (SET_ERROR(ENOENT));
-	}
-	rw_exit(&zfs_snapshot_lock);
-
-	exportfs_flush();
-
-	if (flags & MNT_FORCE)
-		argv[4] = "-fn";
-	argv[5] = se->se_path;
-	dprintf("unmount; path=%s\n", se->se_path);
-	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	zfsctl_snapshot_rele(se);
-
-
-	/*
-	 * The umount system utility will return 256 on error.  We must
-	 * assume this error is because the file system is busy so it is
-	 * converted to the more sensible EBUSY.
-	 */
-	if (error)
-		error = SET_ERROR(EBUSY);
-
-	return (error);
-}
-
-int
-zfsctl_snapshot_mount(struct path *path, int flags)
-{
-	struct dentry *dentry = path->dentry;
-	struct inode *ip = dentry->d_inode;
-	zfsvfs_t *zfsvfs;
-	zfsvfs_t *snap_zfsvfs;
-	zfs_snapentry_t *se;
-	char *full_name, *full_path;
-	char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL,
-	    NULL };
-	char *envp[] = { NULL };
-	int error;
-	struct path spath;
-
-	if (ip == NULL)
-		return (SET_ERROR(EISDIR));
-
-	zfsvfs = ITOZSB(ip);
-	ZFS_ENTER(zfsvfs);
-
-	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-
-	error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
-	    ZFS_MAX_DATASET_NAME_LEN, full_name);
-	if (error)
-		goto error;
-
-	/*
-	 * Construct a mount point path from sb of the ctldir inode and dirent
-	 * name, instead of from d_path(), so that chroot'd process doesn't fail
-	 * on mount.zfs(8).
-	 */
-	snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
-	    zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
-	    dname(dentry));
-
-	/*
-	 * Multiple concurrent automounts of a snapshot are never allowed.
-	 * The snapshot may be manually mounted as many times as desired.
-	 */
-	if (zfsctl_snapshot_ismounted(full_name)) {
-		error = 0;
-		goto error;
-	}
-
-	/*
-	 * Attempt to mount the snapshot from user space.  Normally this
-	 * would be done using the vfs_kern_mount() function, however that
-	 * function is marked GPL-only and cannot be used.  On error we
-	 * careful to log the real error to the console and return EISDIR
-	 * to safely abort the automount.  This should be very rare.
-	 *
-	 * If the user mode helper happens to return EBUSY, a concurrent
-	 * mount is already in progress in which case the error is ignored.
-	 * Take note that if the program was executed successfully the return
-	 * value from call_usermodehelper() will be (exitcode << 8 + signal).
-	 */
-	dprintf("mount; name=%s path=%s\n", full_name, full_path);
-	argv[5] = full_name;
-	argv[6] = full_path;
-	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	if (error) {
-		if (!(error & MOUNT_BUSY << 8)) {
-			zfs_dbgmsg("Unable to automount %s error=%d",
-			    full_path, error);
-			error = SET_ERROR(EISDIR);
-		} else {
-			/*
-			 * EBUSY, this could mean a concurrent mount, or the
-			 * snapshot has already been mounted at completely
-			 * different place. We return 0 so VFS will retry. For
-			 * the latter case the VFS will retry several times
-			 * and return ELOOP, which is probably not a very good
-			 * behavior.
-			 */
-			error = 0;
-		}
-		goto error;
-	}
-
-	/*
-	 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
-	 * to identify this as an automounted filesystem.
-	 */
-	spath = *path;
-	path_get(&spath);
-	if (zpl_follow_down_one(&spath)) {
-		snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
-		snap_zfsvfs->z_parent = zfsvfs;
-		dentry = spath.dentry;
-		spath.mnt->mnt_flags |= MNT_SHRINKABLE;
-
-		rw_enter(&zfs_snapshot_lock, RW_WRITER);
-		se = zfsctl_snapshot_alloc(full_name, full_path,
-		    snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
-		    dentry);
-		zfsctl_snapshot_add(se);
-		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
-		rw_exit(&zfs_snapshot_lock);
-	}
-	path_put(&spath);
-error:
-	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(full_path, MAXPATHLEN);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Get the snapdir inode from fid
- */
-int
-zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
-    struct inode **ipp)
-{
-	int error;
-	struct path path;
-	char *mnt;
-	struct dentry *dentry;
-
-	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
-	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
-	    MAXPATHLEN, mnt);
-	if (error)
-		goto out;
-
-	/* Trigger automount */
-	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
-	if (error)
-		goto out;
-
-	path_put(&path);
-	/*
-	 * Get the snapdir inode. Note, we don't want to use the above
-	 * path because it contains the root of the snapshot rather
-	 * than the snapdir.
-	 */
-	*ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
-	if (*ipp == NULL) {
-		error = SET_ERROR(ENOENT);
-		goto out;
-	}
-
-	/* check gen, see zfsctl_snapdir_fid */
-	dentry = d_obtain_alias(igrab(*ipp));
-	if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
-		iput(*ipp);
-		*ipp = NULL;
-		error = SET_ERROR(ENOENT);
-	}
-	if (!IS_ERR(dentry))
-		dput(dentry);
-out:
-	kmem_free(mnt, MAXPATHLEN);
-	return (error);
-}
-
-int
-zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	struct inode *ip;
-	znode_t *dzp;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsvfs->z_shares_dir == 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
-		error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
-		iput(ZTOI(dzp));
-	}
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Initialize the various pieces we'll need to create and manipulate .zfs
- * directories.  Currently this is unused but available.
- */
-void
-zfsctl_init(void)
-{
-	avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name,
-	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
-	    se_node_name));
-	avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
-	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
-	    se_node_objsetid));
-	rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
-}
-
-/*
- * Cleanup the various pieces we needed for .zfs directories.  In particular
- * ensure the expiry timer is canceled safely.
- */
-void
-zfsctl_fini(void)
-{
-	avl_destroy(&zfs_snapshots_by_name);
-	avl_destroy(&zfs_snapshots_by_objsetid);
-	rw_destroy(&zfs_snapshot_lock);
-}
-
-module_param(zfs_admin_snapshot, int, 0644);
-MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
-
-module_param(zfs_expire_snapshot, int, 0644);
-MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");

diff --git a/zfs/module/zfs/zfs_debug.c b/zfs/module/zfs/zfs_debug.c
deleted file mode 100644
index cf8bbb3..0000000
--- a/zfs/module/zfs/zfs_debug.c
+++ /dev/null

@@ -1,253 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-
-typedef struct zfs_dbgmsg {
-	procfs_list_node_t	zdm_node;
-	uint64_t		zdm_timestamp;
-	int			zdm_size;
-	char			zdm_msg[1]; /* variable length allocation */
-} zfs_dbgmsg_t;
-
-procfs_list_t zfs_dbgmsgs;
-int zfs_dbgmsg_size = 0;
-int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
-
-/*
- * Internal ZFS debug messages are enabled by default.
- *
- * # Print debug messages
- * cat /proc/spl/kstat/zfs/dbgmsg
- *
- * # Disable the kernel debug message log.
- * echo 0 > /sys/module/zfs/parameters/zfs_dbgmsg_enable
- *
- * # Clear the kernel debug message log.
- * echo 0 >/proc/spl/kstat/zfs/dbgmsg
- */
-int zfs_dbgmsg_enable = 1;
-
-static int
-zfs_dbgmsg_show_header(struct seq_file *f)
-{
-	seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
-	return (0);
-}
-
-static int
-zfs_dbgmsg_show(struct seq_file *f, void *p)
-{
-	zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
-	seq_printf(f, "%-12llu %-s\n",
-	    (u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
-	return (0);
-}
-
-static void
-zfs_dbgmsg_purge(int max_size)
-{
-	while (zfs_dbgmsg_size > max_size) {
-		zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
-		if (zdm == NULL)
-			return;
-
-		int size = zdm->zdm_size;
-		kmem_free(zdm, size);
-		zfs_dbgmsg_size -= size;
-	}
-}
-
-static int
-zfs_dbgmsg_clear(procfs_list_t *procfs_list)
-{
-	mutex_enter(&zfs_dbgmsgs.pl_lock);
-	zfs_dbgmsg_purge(0);
-	mutex_exit(&zfs_dbgmsgs.pl_lock);
-	return (0);
-}
-
-void
-zfs_dbgmsg_init(void)
-{
-	procfs_list_install("zfs",
-	    "dbgmsg",
-	    0600,
-	    &zfs_dbgmsgs,
-	    zfs_dbgmsg_show,
-	    zfs_dbgmsg_show_header,
-	    zfs_dbgmsg_clear,
-	    offsetof(zfs_dbgmsg_t, zdm_node));
-}
-
-void
-zfs_dbgmsg_fini(void)
-{
-	procfs_list_uninstall(&zfs_dbgmsgs);
-	zfs_dbgmsg_purge(0);
-
-	/*
-	 * TODO - decide how to make this permanent
-	 */
-#ifdef _KERNEL
-	procfs_list_destroy(&zfs_dbgmsgs);
-#endif
-}
-
-void
-__set_error(const char *file, const char *func, int line, int err)
-{
-	/*
-	 * To enable this:
-	 *
-	 * $ echo 512 >/sys/module/zfs/parameters/zfs_flags
-	 */
-	if (zfs_flags & ZFS_DEBUG_SET_ERROR)
-		__dprintf(B_FALSE, file, func, line, "error %lu", err);
-}
-
-void
-__zfs_dbgmsg(char *buf)
-{
-	int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
-	zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
-	zdm->zdm_size = size;
-	zdm->zdm_timestamp = gethrestime_sec();
-	strcpy(zdm->zdm_msg, buf);
-
-	mutex_enter(&zfs_dbgmsgs.pl_lock);
-	procfs_list_add(&zfs_dbgmsgs, zdm);
-	zfs_dbgmsg_size += size;
-	zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
-	mutex_exit(&zfs_dbgmsgs.pl_lock);
-}
-
-#ifdef _KERNEL
-
-void
-__dprintf(boolean_t dprint, const char *file, const char *func,
-    int line, const char *fmt, ...)
-{
-	const char *newfile;
-	va_list adx;
-	size_t size;
-	char *buf;
-	char *nl;
-	int i;
-	char *prefix = (dprint) ? "dprintf: " : "";
-
-	size = 1024;
-	buf = kmem_alloc(size, KM_SLEEP);
-
-	/*
-	 * Get rid of annoying prefix to filename.
-	 */
-	newfile = strrchr(file, '/');
-	if (newfile != NULL) {
-		newfile = newfile + 1; /* Get rid of leading / */
-	} else {
-		newfile = file;
-	}
-
-	i = snprintf(buf, size, "%s%s:%d:%s(): ", prefix, newfile, line, func);
-
-	if (i < size) {
-		va_start(adx, fmt);
-		(void) vsnprintf(buf + i, size - i, fmt, adx);
-		va_end(adx);
-	}
-
-	/*
-	 * Get rid of trailing newline for dprintf logs.
-	 */
-	if (dprint && buf[0] != '\0') {
-		nl = &buf[strlen(buf) - 1];
-		if (*nl == '\n')
-			*nl = '\0';
-	}
-
-	/*
-	 * To get this data enable the zfs__dprintf trace point as shown:
-	 *
-	 * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
-	 * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
-	 * $ echo 0 > /sys/kernel/debug/tracing/trace
-	 *
-	 * # Dump the ring buffer.
-	 * $ cat /sys/kernel/debug/tracing/trace
-	 */
-	DTRACE_PROBE1(zfs__dprintf, char *, buf);
-
-	/*
-	 * To get this data:
-	 *
-	 * $ cat /proc/spl/kstat/zfs/dbgmsg
-	 *
-	 * To clear the buffer:
-	 * $ echo 0 > /proc/spl/kstat/zfs/dbgmsg
-	 */
-	__zfs_dbgmsg(buf);
-
-	kmem_free(buf, size);
-}
-
-#else
-
-void
-zfs_dbgmsg_print(const char *tag)
-{
-	ssize_t ret __attribute__((unused));
-
-	/*
-	 * We use write() in this function instead of printf()
-	 * so it is safe to call from a signal handler.
-	 */
-	ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
-	ret = write(STDOUT_FILENO, tag, strlen(tag));
-	ret = write(STDOUT_FILENO, ") START:\n", 9);
-
-	mutex_enter(&zfs_dbgmsgs.pl_lock);
-	for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
-	    zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) {
-		ret = write(STDOUT_FILENO, zdm->zdm_msg,
-		    strlen(zdm->zdm_msg));
-		ret = write(STDOUT_FILENO, "\n", 1);
-	}
-
-	ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
-	ret = write(STDOUT_FILENO, tag, strlen(tag));
-	ret = write(STDOUT_FILENO, ") END\n", 6);
-
-	mutex_exit(&zfs_dbgmsgs.pl_lock);
-}
-#endif /* _KERNEL */
-
-#ifdef _KERNEL
-module_param(zfs_dbgmsg_enable, int, 0644);
-MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
-
-module_param(zfs_dbgmsg_maxsize, int, 0644);
-MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
-#endif

diff --git a/zfs/module/zfs/zfs_dir.c b/zfs/module/zfs/zfs_dir.c
deleted file mode 100644
index 6bdad73..0000000
--- a/zfs/module/zfs/zfs_dir.c
+++ /dev/null

@@ -1,1205 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/mode.h>
-#include <sys/kmem.h>
-#include <sys/uio.h>
-#include <sys/pathname.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/stat.h>
-#include <sys/sunddi.h>
-#include <sys/random.h>
-#include <sys/policy.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_vnops.h>
-#include <sys/fs/zfs.h>
-#include <sys/zap.h>
-#include <sys/dmu.h>
-#include <sys/atomic.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_fuid.h>
-#include <sys/sa.h>
-#include <sys/zfs_sa.h>
-
-/*
- * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
- * of names after deciding which is the appropriate lookup interface.
- */
-static int
-zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, matchtype_t mt,
-    boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
-{
-	boolean_t conflict = B_FALSE;
-	int error;
-
-	if (zfsvfs->z_norm) {
-		size_t bufsz = 0;
-		char *buf = NULL;
-
-		if (rpnp) {
-			buf = rpnp->pn_buf;
-			bufsz = rpnp->pn_bufsize;
-		}
-
-		/*
-		 * In the non-mixed case we only expect there would ever
-		 * be one match, but we need to use the normalizing lookup.
-		 */
-		error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
-		    zoid, mt, buf, bufsz, &conflict);
-	} else {
-		error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
-	}
-
-	/*
-	 * Allow multiple entries provided the first entry is
-	 * the object id.  Non-zpl consumers may safely make
-	 * use of the additional space.
-	 *
-	 * XXX: This should be a feature flag for compatibility
-	 */
-	if (error == EOVERFLOW)
-		error = 0;
-
-	if (zfsvfs->z_norm && !error && deflags)
-		*deflags = conflict ? ED_CASE_CONFLICT : 0;
-
-	*zoid = ZFS_DIRENT_OBJ(*zoid);
-
-	return (error);
-}
-
-/*
- * Lock a directory entry.  A dirlock on <dzp, name> protects that name
- * in dzp's directory zap object.  As long as you hold a dirlock, you can
- * assume two things: (1) dzp cannot be reaped, and (2) no other thread
- * can change the zap entry for (i.e. link or unlink) this name.
- *
- * Input arguments:
- *	dzp	- znode for directory
- *	name	- name of entry to lock
- *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
- *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
- *		  ZSHARED: allow concurrent access with other ZSHARED callers.
- *		  ZXATTR: we want dzp's xattr directory
- *		  ZCILOOK: On a mixed sensitivity file system,
- *			   this lookup should be case-insensitive.
- *		  ZCIEXACT: On a purely case-insensitive file system,
- *			    this lookup should be case-sensitive.
- *		  ZRENAMING: we are locking for renaming, force narrow locks
- *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
- *			     current thread already holds it.
- *
- * Output arguments:
- *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
- *	dlpp	- pointer to the dirlock for this entry (NULL on error)
- *      direntflags - (case-insensitive lookup only)
- *		flags if multiple case-sensitive matches exist in directory
- *      realpnp     - (case-insensitive lookup only)
- *		actual name matched within the directory
- *
- * Return value: 0 on success or errno on failure.
- *
- * NOTE: Always checks for, and rejects, '.' and '..'.
- * NOTE: For case-insensitive file systems we take wide locks (see below),
- *	 but return znode pointers to a single match.
- */
-int
-zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
-    int flag, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	zfs_dirlock_t	*dl;
-	boolean_t	update;
-	matchtype_t	mt = 0;
-	uint64_t	zoid;
-	int		error = 0;
-	int		cmpflags;
-
-	*zpp = NULL;
-	*dlpp = NULL;
-
-	/*
-	 * Verify that we are not trying to lock '.', '..', or '.zfs'
-	 */
-	if ((name[0] == '.' &&
-	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
-	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
-		return (SET_ERROR(EEXIST));
-
-	/*
-	 * Case sensitivity and normalization preferences are set when
-	 * the file system is created.  These are stored in the
-	 * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
-	 * affect what vnodes can be cached in the DNLC, how we
-	 * perform zap lookups, and the "width" of our dirlocks.
-	 *
-	 * A normal dirlock locks a single name.  Note that with
-	 * normalization a name can be composed multiple ways, but
-	 * when normalized, these names all compare equal.  A wide
-	 * dirlock locks multiple names.  We need these when the file
-	 * system is supporting mixed-mode access.  It is sometimes
-	 * necessary to lock all case permutations of file name at
-	 * once so that simultaneous case-insensitive/case-sensitive
-	 * behaves as rationally as possible.
-	 */
-
-	/*
-	 * When matching we may need to normalize & change case according to
-	 * FS settings.
-	 *
-	 * Note that a normalized match is necessary for a case insensitive
-	 * filesystem when the lookup request is not exact because normalization
-	 * can fold case independent of normalizing code point sequences.
-	 *
-	 * See the table above zfs_dropname().
-	 */
-	if (zfsvfs->z_norm != 0) {
-		mt = MT_NORMALIZE;
-
-		/*
-		 * Determine if the match needs to honor the case specified in
-		 * lookup, and if so keep track of that so that during
-		 * normalization we don't fold case.
-		 */
-		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE &&
-		    (flag & ZCIEXACT)) ||
-		    (zfsvfs->z_case == ZFS_CASE_MIXED && !(flag & ZCILOOK))) {
-			mt |= MT_MATCH_CASE;
-		}
-	}
-
-	/*
-	 * Only look in or update the DNLC if we are looking for the
-	 * name on a file system that does not require normalization
-	 * or case folding.  We can also look there if we happen to be
-	 * on a non-normalizing, mixed sensitivity file system IF we
-	 * are looking for the exact name.
-	 *
-	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
-	 * case for performance improvement?
-	 */
-	update = !zfsvfs->z_norm ||
-	    (zfsvfs->z_case == ZFS_CASE_MIXED &&
-	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
-
-	/*
-	 * ZRENAMING indicates we are in a situation where we should
-	 * take narrow locks regardless of the file system's
-	 * preferences for normalizing and case folding.  This will
-	 * prevent us deadlocking trying to grab the same wide lock
-	 * twice if the two names happen to be case-insensitive
-	 * matches.
-	 */
-	if (flag & ZRENAMING)
-		cmpflags = 0;
-	else
-		cmpflags = zfsvfs->z_norm;
-
-	/*
-	 * Wait until there are no locks on this name.
-	 *
-	 * Don't grab the lock if it is already held. However, cannot
-	 * have both ZSHARED and ZHAVELOCK together.
-	 */
-	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
-	if (!(flag & ZHAVELOCK))
-		rw_enter(&dzp->z_name_lock, RW_READER);
-
-	mutex_enter(&dzp->z_lock);
-	for (;;) {
-		if (dzp->z_unlinked && !(flag & ZXATTR)) {
-			mutex_exit(&dzp->z_lock);
-			if (!(flag & ZHAVELOCK))
-				rw_exit(&dzp->z_name_lock);
-			return (SET_ERROR(ENOENT));
-		}
-		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
-			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
-			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
-				break;
-		}
-		if (error != 0) {
-			mutex_exit(&dzp->z_lock);
-			if (!(flag & ZHAVELOCK))
-				rw_exit(&dzp->z_name_lock);
-			return (SET_ERROR(ENOENT));
-		}
-		if (dl == NULL)	{
-			/*
-			 * Allocate a new dirlock and add it to the list.
-			 */
-			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
-			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
-			dl->dl_name = name;
-			dl->dl_sharecnt = 0;
-			dl->dl_namelock = 0;
-			dl->dl_namesize = 0;
-			dl->dl_dzp = dzp;
-			dl->dl_next = dzp->z_dirlocks;
-			dzp->z_dirlocks = dl;
-			break;
-		}
-		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
-			break;
-		cv_wait(&dl->dl_cv, &dzp->z_lock);
-	}
-
-	/*
-	 * If the z_name_lock was NOT held for this dirlock record it.
-	 */
-	if (flag & ZHAVELOCK)
-		dl->dl_namelock = 1;
-
-	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
-		/*
-		 * We're the second shared reference to dl.  Make a copy of
-		 * dl_name in case the first thread goes away before we do.
-		 * Note that we initialize the new name before storing its
-		 * pointer into dl_name, because the first thread may load
-		 * dl->dl_name at any time.  It'll either see the old value,
-		 * which belongs to it, or the new shared copy; either is OK.
-		 */
-		dl->dl_namesize = strlen(dl->dl_name) + 1;
-		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
-		bcopy(dl->dl_name, name, dl->dl_namesize);
-		dl->dl_name = name;
-	}
-
-	mutex_exit(&dzp->z_lock);
-
-	/*
-	 * We have a dirlock on the name.  (Note that it is the dirlock,
-	 * not the dzp's z_lock, that protects the name in the zap object.)
-	 * See if there's an object by this name; if so, put a hold on it.
-	 */
-	if (flag & ZXATTR) {
-		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
-		    sizeof (zoid));
-		if (error == 0)
-			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
-	} else {
-		error = zfs_match_find(zfsvfs, dzp, name, mt,
-		    update, direntflags, realpnp, &zoid);
-	}
-	if (error) {
-		if (error != ENOENT || (flag & ZEXISTS)) {
-			zfs_dirent_unlock(dl);
-			return (error);
-		}
-	} else {
-		if (flag & ZNEW) {
-			zfs_dirent_unlock(dl);
-			return (SET_ERROR(EEXIST));
-		}
-		error = zfs_zget(zfsvfs, zoid, zpp);
-		if (error) {
-			zfs_dirent_unlock(dl);
-			return (error);
-		}
-	}
-
-	*dlpp = dl;
-
-	return (0);
-}
-
-/*
- * Unlock this directory entry and wake anyone who was waiting for it.
- */
-void
-zfs_dirent_unlock(zfs_dirlock_t *dl)
-{
-	znode_t *dzp = dl->dl_dzp;
-	zfs_dirlock_t **prev_dl, *cur_dl;
-
-	mutex_enter(&dzp->z_lock);
-
-	if (!dl->dl_namelock)
-		rw_exit(&dzp->z_name_lock);
-
-	if (dl->dl_sharecnt > 1) {
-		dl->dl_sharecnt--;
-		mutex_exit(&dzp->z_lock);
-		return;
-	}
-	prev_dl = &dzp->z_dirlocks;
-	while ((cur_dl = *prev_dl) != dl)
-		prev_dl = &cur_dl->dl_next;
-	*prev_dl = dl->dl_next;
-	cv_broadcast(&dl->dl_cv);
-	mutex_exit(&dzp->z_lock);
-
-	if (dl->dl_namesize != 0)
-		kmem_free(dl->dl_name, dl->dl_namesize);
-	cv_destroy(&dl->dl_cv);
-	kmem_free(dl, sizeof (*dl));
-}
-
-/*
- * Look up an entry in a directory.
- *
- * NOTE: '.' and '..' are handled as special cases because
- *	no directory entries are actually stored for them.  If this is
- *	the root of a filesystem, then '.zfs' is also treated as a
- *	special pseudo-directory.
- */
-int
-zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
-    int *deflg, pathname_t *rpnp)
-{
-	zfs_dirlock_t *dl;
-	znode_t *zp;
-	int error = 0;
-	uint64_t parent;
-
-	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
-		*ipp = ZTOI(dzp);
-		igrab(*ipp);
-	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
-		zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-
-		/*
-		 * If we are a snapshot mounted under .zfs, return
-		 * the inode pointer for the snapshot directory.
-		 */
-		if ((error = sa_lookup(dzp->z_sa_hdl,
-		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
-			return (error);
-
-		if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
-			error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
-			    "snapshot", ipp, 0, kcred, NULL, NULL);
-			return (error);
-		}
-		rw_enter(&dzp->z_parent_lock, RW_READER);
-		error = zfs_zget(zfsvfs, parent, &zp);
-		if (error == 0)
-			*ipp = ZTOI(zp);
-		rw_exit(&dzp->z_parent_lock);
-	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
-		*ipp = zfsctl_root(dzp);
-	} else {
-		int zf;
-
-		zf = ZEXISTS | ZSHARED;
-		if (flags & FIGNORECASE)
-			zf |= ZCILOOK;
-
-		error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
-		if (error == 0) {
-			*ipp = ZTOI(zp);
-			zfs_dirent_unlock(dl);
-			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
-		}
-		rpnp = NULL;
-	}
-
-	if ((flags & FIGNORECASE) && rpnp && !error)
-		(void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
-
-	return (error);
-}
-
-/*
- * unlinked Set (formerly known as the "delete queue") Error Handling
- *
- * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
- * don't specify the name of the entry that we will be manipulating.  We
- * also fib and say that we won't be adding any new entries to the
- * unlinked set, even though we might (this is to lower the minimum file
- * size that can be deleted in a full filesystem).  So on the small
- * chance that the nlink list is using a fat zap (ie. has more than
- * 2000 entries), we *may* not pre-read a block that's needed.
- * Therefore it is remotely possible for some of the assertions
- * regarding the unlinked set below to fail due to i/o error.  On a
- * nondebug system, this will result in the space being leaked.
- */
-void
-zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-
-	ASSERT(zp->z_unlinked);
-	ASSERT(ZTOI(zp)->i_nlink == 0);
-
-	VERIFY3U(0, ==,
-	    zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
-
-	dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
-}
-
-/*
- * Clean up any znodes that had no links when we either crashed or
- * (force) umounted the file system.
- */
-static void
-zfs_unlinked_drain_task(void *arg)
-{
-	zfsvfs_t *zfsvfs = arg;
-	zap_cursor_t	zc;
-	zap_attribute_t zap;
-	dmu_object_info_t doi;
-	znode_t		*zp;
-	int		error;
-
-	ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
-
-	/*
-	 * Iterate over the contents of the unlinked set.
-	 */
-	for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
-	    zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
-	    zap_cursor_advance(&zc)) {
-
-		/*
-		 * See what kind of object we have in list
-		 */
-
-		error = dmu_object_info(zfsvfs->z_os,
-		    zap.za_first_integer, &doi);
-		if (error != 0)
-			continue;
-
-		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
-		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
-		/*
-		 * We need to re-mark these list entries for deletion,
-		 * so we pull them back into core and set zp->z_unlinked.
-		 */
-		error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
-
-		/*
-		 * We may pick up znodes that are already marked for deletion.
-		 * This could happen during the purge of an extended attribute
-		 * directory.  All we need to do is skip over them, since they
-		 * are already in the system marked z_unlinked.
-		 */
-		if (error != 0)
-			continue;
-
-		zp->z_unlinked = B_TRUE;
-
-		/*
-		 * iput() is Linux's equivalent to illumos' VN_RELE(). It will
-		 * decrement the inode's ref count and may cause the inode to be
-		 * synchronously freed. We interrupt freeing of this inode, by
-		 * checking the return value of dmu_objset_zfs_unmounting() in
-		 * dmu_free_long_range(), when an unmount is requested.
-		 */
-		iput(ZTOI(zp));
-		ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
-	}
-	zap_cursor_fini(&zc);
-
-	zfsvfs->z_draining = B_FALSE;
-	zfsvfs->z_drain_task = TASKQID_INVALID;
-}
-
-/*
- * Sets z_draining then tries to dispatch async unlinked drain.
- * If that fails executes synchronous unlinked drain.
- */
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
-{
-	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
-	ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
-
-	zfsvfs->z_draining = B_TRUE;
-	zfsvfs->z_drain_cancel = B_FALSE;
-
-	zfsvfs->z_drain_task = taskq_dispatch(
-	    dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
-	    zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
-	if (zfsvfs->z_drain_task == TASKQID_INVALID) {
-		zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
-		zfs_unlinked_drain_task(zfsvfs);
-	}
-}
-
-/*
- * Wait for the unlinked drain taskq task to stop. This will interrupt the
- * unlinked set processing if it is in progress.
- */
-void
-zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
-{
-	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
-
-	if (zfsvfs->z_draining) {
-		zfsvfs->z_drain_cancel = B_TRUE;
-		taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
-		    dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
-		zfsvfs->z_drain_task = TASKQID_INVALID;
-		zfsvfs->z_draining = B_FALSE;
-	}
-}
-
-/*
- * Delete the entire contents of a directory.  Return a count
- * of the number of entries that could not be deleted. If we encounter
- * an error, return a count of at least one so that the directory stays
- * in the unlinked set.
- *
- * NOTE: this function assumes that the directory is inactive,
- *	so there is no need to lock its entries before deletion.
- *	Also, it assumes the directory contents is *only* regular
- *	files.
- */
-static int
-zfs_purgedir(znode_t *dzp)
-{
-	zap_cursor_t	zc;
-	zap_attribute_t	zap;
-	znode_t		*xzp;
-	dmu_tx_t	*tx;
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	zfs_dirlock_t	dl;
-	int skipped = 0;
-	int error;
-
-	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
-	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
-	    zap_cursor_advance(&zc)) {
-		error = zfs_zget(zfsvfs,
-		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
-		if (error) {
-			skipped += 1;
-			continue;
-		}
-
-		ASSERT(S_ISREG(ZTOI(xzp)->i_mode) ||
-		    S_ISLNK(ZTOI(xzp)->i_mode));
-
-		tx = dmu_tx_create(zfsvfs->z_os);
-		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
-		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-		/* Is this really needed ? */
-		zfs_sa_upgrade_txholds(tx, xzp);
-		dmu_tx_mark_netfree(tx);
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
-			dmu_tx_abort(tx);
-			zfs_iput_async(ZTOI(xzp));
-			skipped += 1;
-			continue;
-		}
-		bzero(&dl, sizeof (dl));
-		dl.dl_dzp = dzp;
-		dl.dl_name = zap.za_name;
-
-		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
-		if (error)
-			skipped += 1;
-		dmu_tx_commit(tx);
-
-		zfs_iput_async(ZTOI(xzp));
-	}
-	zap_cursor_fini(&zc);
-	if (error != ENOENT)
-		skipped += 1;
-	return (skipped);
-}
-
-void
-zfs_rmnode(znode_t *zp)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	objset_t	*os = zfsvfs->z_os;
-	znode_t		*xzp = NULL;
-	dmu_tx_t	*tx;
-	uint64_t	acl_obj;
-	uint64_t	xattr_obj;
-	uint64_t	links;
-	int		error;
-
-	ASSERT(ZTOI(zp)->i_nlink == 0);
-	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
-
-	/*
-	 * If this is an attribute directory, purge its contents.
-	 */
-	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
-		if (zfs_purgedir(zp) != 0) {
-			/*
-			 * Not enough space to delete some xattrs.
-			 * Leave it in the unlinked set.
-			 */
-			zfs_znode_dmu_fini(zp);
-
-			return;
-		}
-	}
-
-	/*
-	 * Free up all the data in the file.  We don't do this for directories
-	 * because we need truncate and remove to be in the same tx, like in
-	 * zfs_znode_delete(). Otherwise, if we crash here we'll end up with
-	 * an inconsistent truncated zap object in the delete queue.  Note a
-	 * truncated file is harmless since it only contains user data.
-	 */
-	if (S_ISREG(ZTOI(zp)->i_mode)) {
-		error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
-		if (error) {
-			/*
-			 * Not enough space or we were interrupted by unmount.
-			 * Leave the file in the unlinked set.
-			 */
-			zfs_znode_dmu_fini(zp);
-			return;
-		}
-	}
-
-	/*
-	 * If the file has extended attributes, we're going to unlink
-	 * the xattr dir.
-	 */
-	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-	    &xattr_obj, sizeof (xattr_obj));
-	if (error == 0 && xattr_obj) {
-		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
-		ASSERT(error == 0);
-	}
-
-	acl_obj = zfs_external_acl(zp);
-
-	/*
-	 * Set up the final transaction.
-	 */
-	tx = dmu_tx_create(os);
-	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	if (xzp) {
-		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-	}
-	if (acl_obj)
-		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
-
-	zfs_sa_upgrade_txholds(tx, zp);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		/*
-		 * Not enough space to delete the file.  Leave it in the
-		 * unlinked set, leaking it until the fs is remounted (at
-		 * which point we'll call zfs_unlinked_drain() to process it).
-		 */
-		dmu_tx_abort(tx);
-		zfs_znode_dmu_fini(zp);
-		goto out;
-	}
-
-	if (xzp) {
-		ASSERT(error == 0);
-		mutex_enter(&xzp->z_lock);
-		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
-		clear_nlink(ZTOI(xzp));		/* no more links to it */
-		links = 0;
-		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
-		    &links, sizeof (links), tx));
-		mutex_exit(&xzp->z_lock);
-		zfs_unlinked_add(xzp, tx);
-	}
-
-	/* Remove this znode from the unlinked set */
-	VERIFY3U(0, ==,
-	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
-
-	dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
-
-	zfs_znode_delete(zp, tx);
-
-	dmu_tx_commit(tx);
-out:
-	if (xzp)
-		zfs_iput_async(ZTOI(xzp));
-}
-
-static uint64_t
-zfs_dirent(znode_t *zp, uint64_t mode)
-{
-	uint64_t de = zp->z_id;
-
-	if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
-		de |= IFTODT(mode) << 60;
-	return (de);
-}
-
-/*
- * Link zp into dl.  Can fail in the following cases :
- * - if zp has been unlinked.
- * - if the number of entries with the same hash (aka. colliding entries)
- *    exceed the capacity of a leaf-block of fatzap and splitting of the
- *    leaf-block does not help.
- */
-int
-zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
-{
-	znode_t *dzp = dl->dl_dzp;
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	uint64_t value;
-	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
-	sa_bulk_attr_t bulk[5];
-	uint64_t mtime[2], ctime[2];
-	uint64_t links;
-	int count = 0;
-	int error;
-
-	mutex_enter(&zp->z_lock);
-
-	if (!(flag & ZRENAMING)) {
-		if (zp->z_unlinked) {	/* no new links to unlinked zp */
-			ASSERT(!(flag & (ZNEW | ZEXISTS)));
-			mutex_exit(&zp->z_lock);
-			return (SET_ERROR(ENOENT));
-		}
-		if (!(flag & ZNEW)) {
-			/*
-			 * ZNEW nodes come from zfs_mknode() where the link
-			 * count has already been initialised
-			 */
-			inc_nlink(ZTOI(zp));
-			links = ZTOI(zp)->i_nlink;
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-			    NULL, &links, sizeof (links));
-		}
-	}
-
-	value = zfs_dirent(zp, zp->z_mode);
-	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
-	    &value, tx);
-
-	/*
-	 * zap_add could fail to add the entry if it exceeds the capacity of the
-	 * leaf-block and zap_leaf_split() failed to help.
-	 * The caller of this routine is responsible for failing the transaction
-	 * which will rollback the SA updates done above.
-	 */
-	if (error != 0) {
-		if (!(flag & ZRENAMING) && !(flag & ZNEW))
-			drop_nlink(ZTOI(zp));
-		mutex_exit(&zp->z_lock);
-		return (error);
-	}
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
-	    &dzp->z_id, sizeof (dzp->z_id));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-
-	if (!(flag & ZNEW)) {
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-		    ctime, sizeof (ctime));
-		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
-		    ctime);
-	}
-	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-
-	mutex_exit(&zp->z_lock);
-
-	mutex_enter(&dzp->z_lock);
-	dzp->z_size++;
-	if (zp_is_dir)
-		inc_nlink(ZTOI(dzp));
-	links = ZTOI(dzp)->i_nlink;
-	count = 0;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &dzp->z_size, sizeof (dzp->z_size));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
-	    &links, sizeof (links));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
-	    mtime, sizeof (mtime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-	    ctime, sizeof (ctime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &dzp->z_pflags, sizeof (dzp->z_pflags));
-	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
-	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-	mutex_exit(&dzp->z_lock);
-
-	return (0);
-}
-
-/*
- * The match type in the code for this function should conform to:
- *
- * ------------------------------------------------------------------------
- * fs type  | z_norm      | lookup type | match type
- * ---------|-------------|-------------|----------------------------------
- * CS !norm | 0           |           0 | 0 (exact)
- * CS  norm | formX       |           0 | MT_NORMALIZE
- * CI !norm | upper       |   !ZCIEXACT | MT_NORMALIZE
- * CI !norm | upper       |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
- * CI  norm | upper|formX |   !ZCIEXACT | MT_NORMALIZE
- * CI  norm | upper|formX |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
- * CM !norm | upper       |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
- * CM !norm | upper       |     ZCILOOK | MT_NORMALIZE
- * CM  norm | upper|formX |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
- * CM  norm | upper|formX |     ZCILOOK | MT_NORMALIZE
- *
- * Abbreviations:
- *    CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
- *    upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
- *    formX = unicode normalization form set on fs creation
- */
-static int
-zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
-    int flag)
-{
-	int error;
-
-	if (ZTOZSB(zp)->z_norm) {
-		matchtype_t mt = MT_NORMALIZE;
-
-		if ((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE &&
-		    (flag & ZCIEXACT)) ||
-		    (ZTOZSB(zp)->z_case == ZFS_CASE_MIXED &&
-		    !(flag & ZCILOOK))) {
-			mt |= MT_MATCH_CASE;
-		}
-
-		error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id,
-		    dl->dl_name, mt, tx);
-	} else {
-		error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
-		    tx);
-	}
-
-	return (error);
-}
-
-/*
- * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
- * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
- * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
- * If it's non-NULL, we use it to indicate whether the znode needs deletion,
- * and it's the caller's job to do it.
- */
-int
-zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
-    boolean_t *unlinkedp)
-{
-	znode_t *dzp = dl->dl_dzp;
-	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
-	boolean_t unlinked = B_FALSE;
-	sa_bulk_attr_t bulk[5];
-	uint64_t mtime[2], ctime[2];
-	uint64_t links;
-	int count = 0;
-	int error;
-
-	if (!(flag & ZRENAMING)) {
-		mutex_enter(&zp->z_lock);
-
-		if (zp_is_dir && !zfs_dirempty(zp)) {
-			mutex_exit(&zp->z_lock);
-			return (SET_ERROR(ENOTEMPTY));
-		}
-
-		/*
-		 * If we get here, we are going to try to remove the object.
-		 * First try removing the name from the directory; if that
-		 * fails, return the error.
-		 */
-		error = zfs_dropname(dl, zp, dzp, tx, flag);
-		if (error != 0) {
-			mutex_exit(&zp->z_lock);
-			return (error);
-		}
-
-		if (ZTOI(zp)->i_nlink <= zp_is_dir) {
-			zfs_panic_recover("zfs: link count on %lu is %u, "
-			    "should be at least %u", zp->z_id,
-			    (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
-			set_nlink(ZTOI(zp), zp_is_dir + 1);
-		}
-		drop_nlink(ZTOI(zp));
-		if (ZTOI(zp)->i_nlink == zp_is_dir) {
-			zp->z_unlinked = B_TRUE;
-			clear_nlink(ZTOI(zp));
-			unlinked = B_TRUE;
-		} else {
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
-			    NULL, &ctime, sizeof (ctime));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
-			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
-			    ctime);
-		}
-		links = ZTOI(zp)->i_nlink;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-		    NULL, &links, sizeof (links));
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-		count = 0;
-		ASSERT(error == 0);
-		mutex_exit(&zp->z_lock);
-	} else {
-		error = zfs_dropname(dl, zp, dzp, tx, flag);
-		if (error != 0)
-			return (error);
-	}
-
-	mutex_enter(&dzp->z_lock);
-	dzp->z_size--;		/* one dirent removed */
-	if (zp_is_dir)
-		drop_nlink(ZTOI(dzp));	/* ".." link from zp */
-	links = ZTOI(dzp)->i_nlink;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-	    NULL, &links, sizeof (links));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
-	    NULL, &dzp->z_size, sizeof (dzp->z_size));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
-	    NULL, ctime, sizeof (ctime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
-	    NULL, mtime, sizeof (mtime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
-	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
-	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-	mutex_exit(&dzp->z_lock);
-
-	if (unlinkedp != NULL)
-		*unlinkedp = unlinked;
-	else if (unlinked)
-		zfs_unlinked_add(zp, tx);
-
-	return (0);
-}
-
-/*
- * Indicate whether the directory is empty.  Works with or without z_lock
- * held, but can only be consider a hint in the latter case.  Returns true
- * if only "." and ".." remain and there's no work in progress.
- *
- * The internal ZAP size, rather than zp->z_size, needs to be checked since
- * some consumers (Lustre) do not strictly maintain an accurate SA_ZPL_SIZE.
- */
-boolean_t
-zfs_dirempty(znode_t *dzp)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-	uint64_t count;
-	int error;
-
-	if (dzp->z_dirlocks != NULL)
-		return (B_FALSE);
-
-	error = zap_count(zfsvfs->z_os, dzp->z_id, &count);
-	if (error != 0 || count != 0)
-		return (B_FALSE);
-
-	return (B_TRUE);
-}
-
-int
-zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	znode_t *xzp;
-	dmu_tx_t *tx;
-	int error;
-	zfs_acl_ids_t acl_ids;
-	boolean_t fuid_dirtied;
-#ifdef DEBUG
-	uint64_t parent;
-#endif
-
-	*xipp = NULL;
-
-	if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
-		return (error);
-
-	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
-	    &acl_ids)) != 0)
-		return (error);
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
-		zfs_acl_ids_free(&acl_ids);
-		return (SET_ERROR(EDQUOT));
-	}
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		return (error);
-	}
-	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-#ifdef DEBUG
-	error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
-	    &parent, sizeof (parent));
-	ASSERT(error == 0 && parent == zp->z_id);
-#endif
-
-	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
-	    sizeof (xzp->z_id), tx));
-
-	if (!zp->z_unlinked)
-		(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
-		    xzp, "", NULL, acl_ids.z_fuidp, vap);
-
-	zfs_acl_ids_free(&acl_ids);
-	dmu_tx_commit(tx);
-
-	*xipp = ZTOI(xzp);
-
-	return (0);
-}
-
-/*
- * Return a znode for the extended attribute directory for zp.
- * ** If the directory does not already exist, it is created **
- *
- *	IN:	zp	- znode to obtain attribute directory from
- *		cr	- credentials of caller
- *		flags	- flags from the VOP_LOOKUP call
- *
- *	OUT:	xipp	- pointer to extended attribute znode
- *
- *	RETURN:	0 on success
- *		error number on failure
- */
-int
-zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	znode_t		*xzp;
-	zfs_dirlock_t	*dl;
-	vattr_t		va;
-	int		error;
-top:
-	error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
-	if (error)
-		return (error);
-
-	if (xzp != NULL) {
-		*xipp = ZTOI(xzp);
-		zfs_dirent_unlock(dl);
-		return (0);
-	}
-
-	if (!(flags & CREATE_XATTR_DIR)) {
-		zfs_dirent_unlock(dl);
-		return (SET_ERROR(ENOENT));
-	}
-
-	if (zfs_is_readonly(zfsvfs)) {
-		zfs_dirent_unlock(dl);
-		return (SET_ERROR(EROFS));
-	}
-
-	/*
-	 * The ability to 'create' files in an attribute
-	 * directory comes from the write_xattr permission on the base file.
-	 *
-	 * The ability to 'search' an attribute directory requires
-	 * read_xattr permission on the base file.
-	 *
-	 * Once in a directory the ability to read/write attributes
-	 * is controlled by the permissions on the attribute file.
-	 */
-	va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
-	va.va_mode = S_IFDIR | S_ISVTX | 0777;
-	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
-
-	va.va_dentry = NULL;
-	error = zfs_make_xattrdir(zp, &va, xipp, cr);
-	zfs_dirent_unlock(dl);
-
-	if (error == ERESTART) {
-		/* NB: we already did dmu_tx_wait() if necessary */
-		goto top;
-	}
-
-	return (error);
-}
-
-/*
- * Decide whether it is okay to remove within a sticky directory.
- *
- * In sticky directories, write access is not sufficient;
- * you can remove entries from a directory only if:
- *
- *	you own the directory,
- *	you own the entry,
- *	you have write access to the entry,
- *	or you are privileged (checked in secpolicy...).
- *
- * The function returns 0 if remove access is granted.
- */
-int
-zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
-{
-	uid_t		uid;
-	uid_t		downer;
-	uid_t		fowner;
-	zfsvfs_t	*zfsvfs = ZTOZSB(zdp);
-
-	if (zfsvfs->z_replay)
-		return (0);
-
-	if ((zdp->z_mode & S_ISVTX) == 0)
-		return (0);
-
-	downer = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zdp)->i_uid),
-	    cr, ZFS_OWNER);
-	fowner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zp)->i_uid),
-	    cr, ZFS_OWNER);
-
-	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
-	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)
-		return (0);
-	else
-		return (secpolicy_vnode_remove(cr));
-}

diff --git a/zfs/module/zfs/zfs_fm.c b/zfs/module/zfs/zfs_fm.c
index 579aa03..b1c3f7a 100644
--- a/zfs/module/zfs/zfs_fm.c
+++ b/zfs/module/zfs/zfs_fm.c

@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012,2021 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -59,7 +59,7 @@
  * read I/Os, there  are basically three 'types' of I/O, which form a roughly
  * layered diagram:
  *
- *      +---------------+
+ * 	+---------------+
  * 	| Aggregate I/O |	No associated logical data or device
  * 	+---------------+
  *              |
@@ -101,7 +101,286 @@
  * good and bad versions of the buffer (if available), and we annotate the
  * ereport with information about the differences.
  */
+
 #ifdef _KERNEL
+/*
+ * Duplicate ereport Detection
+ *
+ * Some ereports are retained momentarily for detecting duplicates.  These
+ * are kept in a recent_events_node_t in both a time-ordered list and an AVL
+ * tree of recent unique ereports.
+ *
+ * The lifespan of these recent ereports is bounded (15 mins) and a cleaner
+ * task is used to purge stale entries.
+ */
+static list_t recent_events_list;
+static avl_tree_t recent_events_tree;
+static kmutex_t recent_events_lock;
+static taskqid_t recent_events_cleaner_tqid;
+
+/*
+ * Each node is about 128 bytes so 2,000 would consume 1/4 MiB.
+ *
+ * This setting can be changed dynamically and setting it to zero
+ * disables duplicate detection.
+ */
+unsigned int zfs_zevent_retain_max = 2000;
+
+/*
+ * The lifespan for a recent ereport entry. The default of 15 minutes is
+ * intended to outlive the zfs diagnosis engine's threshold of 10 errors
+ * over a period of 10 minutes.
+ */
+unsigned int zfs_zevent_retain_expire_secs = 900;
+
+typedef enum zfs_subclass {
+	ZSC_IO,
+	ZSC_DATA,
+	ZSC_CHECKSUM
+} zfs_subclass_t;
+
+typedef struct {
+	/* common criteria */
+	uint64_t	re_pool_guid;
+	uint64_t	re_vdev_guid;
+	int		re_io_error;
+	uint64_t	re_io_size;
+	uint64_t	re_io_offset;
+	zfs_subclass_t	re_subclass;
+	zio_priority_t	re_io_priority;
+
+	/* logical zio criteria (optional) */
+	zbookmark_phys_t re_io_bookmark;
+
+	/* internal state */
+	avl_node_t	re_tree_link;
+	list_node_t	re_list_link;
+	uint64_t	re_timestamp;
+} recent_events_node_t;
+
+static int
+recent_events_compare(const void *a, const void *b)
+{
+	const recent_events_node_t *node1 = a;
+	const recent_events_node_t *node2 = b;
+	int cmp;
+
+	/*
+	 * The comparison order here is somewhat arbitrary.
+	 * What's important is that if every criteria matches, then it
+	 * is a duplicate (i.e. compare returns 0)
+	 */
+	if ((cmp = TREE_CMP(node1->re_subclass, node2->re_subclass)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(node1->re_pool_guid, node2->re_pool_guid)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(node1->re_vdev_guid, node2->re_vdev_guid)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(node1->re_io_error, node2->re_io_error)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(node1->re_io_priority, node2->re_io_priority)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(node1->re_io_size, node2->re_io_size)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(node1->re_io_offset, node2->re_io_offset)) != 0)
+		return (cmp);
+
+	const zbookmark_phys_t *zb1 = &node1->re_io_bookmark;
+	const zbookmark_phys_t *zb2 = &node2->re_io_bookmark;
+
+	if ((cmp = TREE_CMP(zb1->zb_objset, zb2->zb_objset)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(zb1->zb_object, zb2->zb_object)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(zb1->zb_level, zb2->zb_level)) != 0)
+		return (cmp);
+	if ((cmp = TREE_CMP(zb1->zb_blkid, zb2->zb_blkid)) != 0)
+		return (cmp);
+
+	return (0);
+}
+
+static void zfs_ereport_schedule_cleaner(void);
+
+/*
+ * background task to clean stale recent event nodes.
+ */
+static void
+zfs_ereport_cleaner(void *arg)
+{
+	recent_events_node_t *entry;
+	uint64_t now = gethrtime();
+
+	/*
+	 * purge expired entries
+	 */
+	mutex_enter(&recent_events_lock);
+	while ((entry = list_tail(&recent_events_list)) != NULL) {
+		uint64_t age = NSEC2SEC(now - entry->re_timestamp);
+		if (age <= zfs_zevent_retain_expire_secs)
+			break;
+
+		/* remove expired node */
+		avl_remove(&recent_events_tree, entry);
+		list_remove(&recent_events_list, entry);
+		kmem_free(entry, sizeof (*entry));
+	}
+
+	/* Restart the cleaner if more entries remain */
+	recent_events_cleaner_tqid = 0;
+	if (!list_is_empty(&recent_events_list))
+		zfs_ereport_schedule_cleaner();
+
+	mutex_exit(&recent_events_lock);
+}
+
+static void
+zfs_ereport_schedule_cleaner(void)
+{
+	ASSERT(MUTEX_HELD(&recent_events_lock));
+
+	uint64_t timeout = SEC2NSEC(zfs_zevent_retain_expire_secs + 1);
+
+	recent_events_cleaner_tqid = taskq_dispatch_delay(
+	    system_delay_taskq, zfs_ereport_cleaner, NULL, TQ_SLEEP,
+	    ddi_get_lbolt() + NSEC_TO_TICK(timeout));
+}
+
+/*
+ * Clear entries for a given vdev or all vdevs in a pool when vdev == NULL
+ */
+void
+zfs_ereport_clear(spa_t *spa, vdev_t *vd)
+{
+	uint64_t vdev_guid, pool_guid;
+
+	ASSERT(vd != NULL || spa != NULL);
+	if (vd == NULL) {
+		vdev_guid = 0;
+		pool_guid = spa_guid(spa);
+	} else {
+		vdev_guid = vd->vdev_guid;
+		pool_guid = 0;
+	}
+
+	mutex_enter(&recent_events_lock);
+
+	recent_events_node_t *next = list_head(&recent_events_list);
+	while (next != NULL) {
+		recent_events_node_t *entry = next;
+
+		next = list_next(&recent_events_list, next);
+
+		if (entry->re_vdev_guid == vdev_guid ||
+		    entry->re_pool_guid == pool_guid) {
+			avl_remove(&recent_events_tree, entry);
+			list_remove(&recent_events_list, entry);
+			kmem_free(entry, sizeof (*entry));
+		}
+	}
+
+	mutex_exit(&recent_events_lock);
+}
+
+/*
+ * Check if an ereport would be a duplicate of one recently posted.
+ *
+ * An ereport is considered a duplicate if the set of criteria in
+ * recent_events_node_t all match.
+ *
+ * Only FM_EREPORT_ZFS_IO, FM_EREPORT_ZFS_DATA, and FM_EREPORT_ZFS_CHECKSUM
+ * are candidates for duplicate checking.
+ */
+static boolean_t
+zfs_ereport_is_duplicate(const char *subclass, spa_t *spa, vdev_t *vd,
+    const zbookmark_phys_t *zb, zio_t *zio, uint64_t offset, uint64_t size)
+{
+	recent_events_node_t search = {0}, *entry;
+
+	if (vd == NULL || zio == NULL)
+		return (B_FALSE);
+
+	if (zfs_zevent_retain_max == 0)
+		return (B_FALSE);
+
+	if (strcmp(subclass, FM_EREPORT_ZFS_IO) == 0)
+		search.re_subclass = ZSC_IO;
+	else if (strcmp(subclass, FM_EREPORT_ZFS_DATA) == 0)
+		search.re_subclass = ZSC_DATA;
+	else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0)
+		search.re_subclass = ZSC_CHECKSUM;
+	else
+		return (B_FALSE);
+
+	search.re_pool_guid = spa_guid(spa);
+	search.re_vdev_guid = vd->vdev_guid;
+	search.re_io_error = zio->io_error;
+	search.re_io_priority = zio->io_priority;
+	/* if size is supplied use it over what's in zio */
+	if (size) {
+		search.re_io_size = size;
+		search.re_io_offset = offset;
+	} else {
+		search.re_io_size = zio->io_size;
+		search.re_io_offset = zio->io_offset;
+	}
+
+	/* grab optional logical zio criteria */
+	if (zb != NULL) {
+		search.re_io_bookmark.zb_objset = zb->zb_objset;
+		search.re_io_bookmark.zb_object = zb->zb_object;
+		search.re_io_bookmark.zb_level = zb->zb_level;
+		search.re_io_bookmark.zb_blkid = zb->zb_blkid;
+	}
+
+	uint64_t now = gethrtime();
+
+	mutex_enter(&recent_events_lock);
+
+	/* check if we have seen this one recently */
+	entry = avl_find(&recent_events_tree, &search, NULL);
+	if (entry != NULL) {
+		uint64_t age = NSEC2SEC(now - entry->re_timestamp);
+
+		/*
+		 * There is still an active cleaner (since we're here).
+		 * Reset the last seen time for this duplicate entry
+		 * so that its lifespand gets extended.
+		 */
+		list_remove(&recent_events_list, entry);
+		list_insert_head(&recent_events_list, entry);
+		entry->re_timestamp = now;
+
+		zfs_zevent_track_duplicate();
+		mutex_exit(&recent_events_lock);
+
+		return (age <= zfs_zevent_retain_expire_secs);
+	}
+
+	if (avl_numnodes(&recent_events_tree) >= zfs_zevent_retain_max) {
+		/* recycle oldest node */
+		entry = list_tail(&recent_events_list);
+		ASSERT(entry != NULL);
+		list_remove(&recent_events_list, entry);
+		avl_remove(&recent_events_tree, entry);
+	} else {
+		entry = kmem_alloc(sizeof (recent_events_node_t), KM_SLEEP);
+	}
+
+	/* record this as a recent ereport */
+	*entry = search;
+	avl_add(&recent_events_tree, entry);
+	list_insert_head(&recent_events_list, entry);
+	entry->re_timestamp = now;
+
+	/* Start a cleaner if not already scheduled */
+	if (recent_events_cleaner_tqid == 0)
+		zfs_ereport_schedule_cleaner();
+
+	mutex_exit(&recent_events_lock);
+	return (B_FALSE);
+}
+
 void
 zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
 {
@@ -113,8 +392,8 @@
 }
 
 /*
- * We want to rate limit ZIO delay and checksum events so as to not
- * flood ZED when a disk is acting up.
+ * We want to rate limit ZIO delay, deadman, and checksum events so as to not
+ * flood zevent consumers when a disk is acting up.
  *
  * Returns 1 if we're ratelimiting, 0 if not.
  */
@@ -123,11 +402,13 @@
 {
 	int rc = 0;
 	/*
-	 * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
+	 * zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
 	 * are.  Invert it to get our return value.
 	 */
 	if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
 		rc = !zfs_ratelimit(&vd->vdev_delay_rl);
+	} else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) {
+		rc = !zfs_ratelimit(&vd->vdev_deadman_rl);
 	} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
 		rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
 	}
@@ -153,9 +434,6 @@
 	uint64_t ena;
 	char class[64];
 
-	if (!zfs_ereport_is_valid(subclass, spa, vd, zio))
-		return (B_FALSE);
-
 	if ((ereport = fm_nvlist_create(NULL)) == NULL)
 		return (B_FALSE);
 
@@ -336,6 +614,8 @@
 		    DATA_TYPE_UINT64, zio->io_timestamp, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA,
 		    DATA_TYPE_UINT64, zio->io_delta, NULL);
+		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY,
+		    DATA_TYPE_UINT32, zio->io_priority, NULL);
 
 		/*
 		 * If the 'size' parameter is non-zero, it indicates this is a
@@ -543,9 +823,6 @@
 	const uint64_t *good;
 	const uint64_t *bad;
 
-	uint64_t allset = 0;
-	uint64_t allcleared = 0;
-
 	size_t nui64s = size / sizeof (uint64_t);
 
 	size_t inline_size;
@@ -647,9 +924,6 @@
 			// bits set in good, but not in bad
 			cleared = (good[idx] & (~bad[idx]));
 
-			allset |= set;
-			allcleared |= cleared;
-
 			if (!no_inline) {
 				ASSERT3U(offset, <, inline_size);
 				eip->zei_bits_set[offset] = set;
@@ -708,6 +982,12 @@
 	}
 	return (eip);
 }
+#else
+void
+zfs_ereport_clear(spa_t *spa, vdev_t *vd)
+{
+	(void) spa, (void) vd;
+}
 #endif
 
 /*
@@ -783,29 +1063,41 @@
 	    (zio != NULL) && (!zio->io_timestamp)) {
 		return (B_FALSE);
 	}
+#else
+	(void) subclass, (void) spa, (void) vd, (void) zio;
 #endif
 	return (B_TRUE);
 }
 
 /*
- * Return 0 if event was posted, EINVAL if there was a problem posting it or
- * EBUSY if the event was rate limited.
+ * Post an ereport for the given subclass
+ *
+ * Returns
+ * - 0 if an event was posted
+ * - EINVAL if there was a problem posting event
+ * - EBUSY if the event was rate limited
+ * - EALREADY if the event was already posted (duplicate)
  */
 int
 zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
-    const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
-    uint64_t size)
+    const zbookmark_phys_t *zb, zio_t *zio, uint64_t state)
 {
 	int rc = 0;
 #ifdef _KERNEL
 	nvlist_t *ereport = NULL;
 	nvlist_t *detector = NULL;
 
+	if (!zfs_ereport_is_valid(subclass, spa, vd, zio))
+		return (EINVAL);
+
+	if (zfs_ereport_is_duplicate(subclass, spa, vd, zb, zio, 0, 0))
+		return (SET_ERROR(EALREADY));
+
 	if (zfs_is_ratelimiting_event(subclass, vd))
 		return (SET_ERROR(EBUSY));
 
 	if (!zfs_ereport_start(&ereport, &detector, subclass, spa, vd,
-	    zb, zio, stateoroffset, size))
+	    zb, zio, state, 0))
 		return (SET_ERROR(EINVAL));	/* couldn't post event */
 
 	if (ereport == NULL)
@@ -813,28 +1105,45 @@
 
 	/* Cleanup is handled by the callback function */
 	rc = zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
+#else
+	(void) subclass, (void) spa, (void) vd, (void) zb, (void) zio,
+	    (void) state;
 #endif
 	return (rc);
 }
 
-void
+/*
+ * Prepare a checksum ereport
+ *
+ * Returns
+ * - 0 if an event was posted
+ * - EINVAL if there was a problem posting event
+ * - EBUSY if the event was rate limited
+ * - EALREADY if the event was already posted (duplicate)
+ */
+int
 zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
-    struct zio *zio, uint64_t offset, uint64_t length, void *arg,
-    zio_bad_cksum_t *info)
+    struct zio *zio, uint64_t offset, uint64_t length, zio_bad_cksum_t *info)
 {
 	zio_cksum_report_t *report;
 
 #ifdef _KERNEL
+	if (!zfs_ereport_is_valid(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio))
+		return (SET_ERROR(EINVAL));
+
+	if (zfs_ereport_is_duplicate(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio,
+	    offset, length))
+		return (SET_ERROR(EALREADY));
+
 	if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
-		return;
+		return (SET_ERROR(EBUSY));
+#else
+	(void) zb, (void) offset;
 #endif
 
 	report = kmem_zalloc(sizeof (*report), KM_SLEEP);
 
-	if (zio->io_vsd != NULL)
-		zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
-	else
-		zio_vsd_default_cksum_report(zio, report, arg);
+	zio_vsd_default_cksum_report(zio, report);
 
 	/* copy the checksum failure information if it was provided */
 	if (info != NULL) {
@@ -842,16 +1151,18 @@
 		bcopy(info, report->zcr_ckinfo, sizeof (*info));
 	}
 
-	report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift;
+	report->zcr_sector = 1ULL << vd->vdev_top->vdev_ashift;
+	report->zcr_align =
+	    vdev_psize_to_asize(vd->vdev_top, report->zcr_sector);
 	report->zcr_length = length;
 
 #ifdef _KERNEL
-	zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
+	(void) zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
 	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length);
 
 	if (report->zcr_ereport == NULL) {
 		zfs_ereport_free_checksum(report);
-		return;
+		return (0);
 	}
 #endif
 
@@ -859,6 +1170,7 @@
 	report->zcr_next = zio->io_logical->io_cksum_report;
 	zio->io_logical->io_cksum_report = report;
 	mutex_exit(&spa->spa_errlist_lock);
+	return (0);
 }
 
 void
@@ -879,6 +1191,9 @@
 	report->zcr_ereport = report->zcr_detector = NULL;
 	if (info != NULL)
 		kmem_free(info, sizeof (*info));
+#else
+	(void) report, (void) good_data, (void) bad_data,
+	    (void) drop_if_identical;
 #endif
 }
 
@@ -901,7 +1216,15 @@
 	kmem_free(rpt, sizeof (*rpt));
 }
 
-
+/*
+ * Post a checksum ereport
+ *
+ * Returns
+ * - 0 if an event was posted
+ * - EINVAL if there was a problem posting event
+ * - EBUSY if the event was rate limited
+ * - EALREADY if the event was already posted (duplicate)
+ */
 int
 zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
     struct zio *zio, uint64_t offset, uint64_t length,
@@ -913,8 +1236,15 @@
 	nvlist_t *detector = NULL;
 	zfs_ecksum_info_t *info;
 
+	if (!zfs_ereport_is_valid(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio))
+		return (SET_ERROR(EINVAL));
+
+	if (zfs_ereport_is_duplicate(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio,
+	    offset, length))
+		return (SET_ERROR(EALREADY));
+
 	if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 
 	if (!zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM,
 	    spa, vd, zb, zio, offset, length) || (ereport == NULL)) {
@@ -928,6 +1258,9 @@
 		rc = zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
 		kmem_free(info, sizeof (*info));
 	}
+#else
+	(void) spa, (void) vd, (void) zb, (void) zio, (void) offset,
+	    (void) length, (void) good_data, (void) bad_data, (void) zbc;
 #endif
 	return (rc);
 }
@@ -992,7 +1325,8 @@
 		while ((elem = nvlist_next_nvpair(aux, elem)) != NULL)
 			(void) nvlist_add_nvpair(resource, elem);
 	}
-
+#else
+	(void) spa, (void) vd, (void) type, (void) name, (void) aux;
 #endif
 	return (resource);
 }
@@ -1007,6 +1341,8 @@
 	resource = zfs_event_create(spa, vd, type, name, aux);
 	if (resource)
 		zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
+#else
+	(void) spa, (void) vd, (void) type, (void) name, (void) aux;
 #endif
 }
 
@@ -1070,14 +1406,114 @@
 
 	if (aux)
 		fm_nvlist_destroy(aux, FM_NVA_FREE);
+#else
+	(void) spa, (void) vd, (void) laststate;
 #endif
 }
 
-#if defined(_KERNEL)
+#ifdef _KERNEL
+void
+zfs_ereport_init(void)
+{
+	mutex_init(&recent_events_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&recent_events_list, sizeof (recent_events_node_t),
+	    offsetof(recent_events_node_t, re_list_link));
+	avl_create(&recent_events_tree,  recent_events_compare,
+	    sizeof (recent_events_node_t), offsetof(recent_events_node_t,
+	    re_tree_link));
+}
+
+/*
+ * This 'early' fini needs to run before zfs_fini() which on Linux waits
+ * for the system_delay_taskq to drain.
+ */
+void
+zfs_ereport_taskq_fini(void)
+{
+	mutex_enter(&recent_events_lock);
+	if (recent_events_cleaner_tqid != 0) {
+		taskq_cancel_id(system_delay_taskq, recent_events_cleaner_tqid);
+		recent_events_cleaner_tqid = 0;
+	}
+	mutex_exit(&recent_events_lock);
+}
+
+void
+zfs_ereport_fini(void)
+{
+	recent_events_node_t *entry;
+
+	while ((entry = list_head(&recent_events_list)) != NULL) {
+		avl_remove(&recent_events_tree, entry);
+		list_remove(&recent_events_list, entry);
+		kmem_free(entry, sizeof (*entry));
+	}
+	avl_destroy(&recent_events_tree);
+	list_destroy(&recent_events_list);
+	mutex_destroy(&recent_events_lock);
+}
+
+void
+zfs_ereport_snapshot_post(const char *subclass, spa_t *spa, const char *name)
+{
+	nvlist_t *aux;
+
+	aux = fm_nvlist_create(NULL);
+	nvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME, name);
+
+	zfs_post_common(spa, NULL, FM_RSRC_CLASS, subclass, aux);
+	fm_nvlist_destroy(aux, FM_NVA_FREE);
+}
+
+/*
+ * Post when a event when a zvol is created or removed
+ *
+ * This is currently only used by macOS, since it uses the event to create
+ * symlinks between the volume name (mypool/myvol) and the actual /dev
+ * device (/dev/disk3).  For example:
+ *
+ * /var/run/zfs/dsk/mypool/myvol -> /dev/disk3
+ *
+ * name: The full name of the zvol ("mypool/myvol")
+ * dev_name: The full /dev name for the zvol ("/dev/disk3")
+ * raw_name: The raw  /dev name for the zvol ("/dev/rdisk3")
+ */
+void
+zfs_ereport_zvol_post(const char *subclass, const char *name,
+    const char *dev_name, const char *raw_name)
+{
+	nvlist_t *aux;
+	char *r;
+
+	boolean_t locked = mutex_owned(&spa_namespace_lock);
+	if (!locked) mutex_enter(&spa_namespace_lock);
+	spa_t *spa = spa_lookup(name);
+	if (!locked) mutex_exit(&spa_namespace_lock);
+
+	if (spa == NULL)
+		return;
+
+	aux = fm_nvlist_create(NULL);
+	nvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME, dev_name);
+	nvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME,
+	    raw_name);
+	r = strchr(name, '/');
+	if (r && r[1])
+		nvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_VOLUME, &r[1]);
+
+	zfs_post_common(spa, NULL, FM_RSRC_CLASS, subclass, aux);
+	fm_nvlist_destroy(aux, FM_NVA_FREE);
+}
+
 EXPORT_SYMBOL(zfs_ereport_post);
 EXPORT_SYMBOL(zfs_ereport_is_valid);
 EXPORT_SYMBOL(zfs_ereport_post_checksum);
 EXPORT_SYMBOL(zfs_post_remove);
 EXPORT_SYMBOL(zfs_post_autoreplace);
 EXPORT_SYMBOL(zfs_post_state_change);
+
+ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, retain_max, UINT, ZMOD_RW,
+	"Maximum recent zevents records to retain for duplicate checking");
+ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, retain_expire_secs, UINT, ZMOD_RW,
+	"Expiration time for recent zevents records");
 #endif /* _KERNEL */

diff --git a/zfs/module/zfs/zfs_fuid.c b/zfs/module/zfs/zfs_fuid.c
index e577535..a90bf5f 100644
--- a/zfs/module/zfs/zfs_fuid.c
+++ b/zfs/module/zfs/zfs_fuid.c

@@ -26,7 +26,6 @@
 #include <sys/dmu.h>
 #include <sys/avl.h>
 #include <sys/zap.h>
-#include <sys/refcount.h>
 #include <sys/nvpair.h>
 #ifdef _KERNEL
 #include <sys/sid.h>
@@ -73,7 +72,7 @@
 	const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
 	const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
 
-	return (AVL_CMP(node1->f_idx, node2->f_idx));
+	return (TREE_CMP(node1->f_idx, node2->f_idx));
 }
 
 /*
@@ -88,7 +87,7 @@
 
 	val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
 
-	return (AVL_ISIGN(val));
+	return (TREE_ISIGN(val));
 }
 
 void
@@ -382,17 +381,40 @@
 void
 zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
 {
-	*uidp = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
+	*uidp = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOUID(zp)),
 	    cr, ZFS_OWNER);
-	*gidp = zfs_fuid_map_id(ZTOZSB(zp), KGID_TO_SGID(ZTOI(zp)->i_gid),
+	*gidp = zfs_fuid_map_id(ZTOZSB(zp), KGID_TO_SGID(ZTOGID(zp)),
 	    cr, ZFS_GROUP);
 }
 
+#ifdef __FreeBSD__
 uid_t
 zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
     cred_t *cr, zfs_fuid_type_t type)
 {
-#ifdef HAVE_KSID
+	uint32_t index = FUID_INDEX(fuid);
+
+	if (index == 0)
+		return (fuid);
+
+	return (UID_NOBODY);
+}
+#elif defined(__linux__)
+uid_t
+zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
+    cred_t *cr, zfs_fuid_type_t type)
+{
+	/*
+	 * The Linux port only supports POSIX IDs, use the passed id.
+	 */
+	return (fuid);
+}
+
+#else
+uid_t
+zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
+    cred_t *cr, zfs_fuid_type_t type)
+{
 	uint32_t index = FUID_INDEX(fuid);
 	const char *domain;
 	uid_t id;
@@ -411,13 +433,8 @@
 		    FUID_RID(fuid), &id);
 	}
 	return (id);
-#else
-	/*
-	 * The Linux port only supports POSIX IDs, use the passed id.
-	 */
-	return (fuid);
-#endif /* HAVE_KSID */
 }
+#endif
 
 /*
  * Add a FUID node to the list of fuid's being created for this
@@ -560,9 +577,9 @@
 	const char *domain;
 	char *kdomain;
 	uint32_t fuid_idx = FUID_INDEX(id);
-	uint32_t rid;
+	uint32_t rid = 0;
 	idmap_stat status;
-	uint64_t idx = 0;
+	uint64_t idx = UID_NOBODY;
 	zfs_fuid_t *zfuid = NULL;
 	zfs_fuid_info_t *fuidp = NULL;
 
@@ -711,10 +728,11 @@
 boolean_t
 zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
 {
-#ifdef HAVE_KSID
+	uid_t		gid;
+
+#ifdef illumos
 	ksid_t		*ksid = crgetsid(cr, KSID_GROUP);
 	ksidlist_t	*ksidlist = crgetsidlist(cr);
-	uid_t		gid;
 
 	if (ksid && ksidlist) {
 		int		i;
@@ -747,15 +765,13 @@
 			}
 		}
 	}
+#endif /* illumos */
 
 	/*
 	 * Not found in ksidlist, check posix groups
 	 */
 	gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
 	return (groupmember(gid, cr));
-#else
-	return (B_TRUE);
-#endif
 }
 
 void
@@ -772,4 +788,24 @@
 		    FUID_SIZE_ESTIMATE(zfsvfs));
 	}
 }
+
+/*
+ * buf must be big enough (eg, 32 bytes)
+ */
+int
+zfs_id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
+    char *buf, size_t len, boolean_t addok)
+{
+	uint64_t fuid;
+	int domainid = 0;
+
+	if (domain && domain[0]) {
+		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
+		if (domainid == -1)
+			return (SET_ERROR(ENOENT));
+	}
+	fuid = FUID_ENCODE(domainid, rid);
+	(void) snprintf(buf, len, "%llx", (longlong_t)fuid);
+	return (0);
+}
 #endif

diff --git a/zfs/module/zfs/zfs_ioctl.c b/zfs/module/zfs/zfs_ioctl.c
index 7e31fcf..3f9cbc1 100644
--- a/zfs/module/zfs/zfs_ioctl.c
+++ b/zfs/module/zfs/zfs_ioctl.c

@@ -27,7 +27,7 @@
  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
@@ -37,6 +37,9 @@
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
  */
 
 /*
@@ -156,12 +159,13 @@
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/errno.h>
-#include <sys/uio.h>
+#include <sys/uio_impl.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/stat.h>
 #include <sys/zfs_ioctl.h>
+#include <sys/zfs_quota.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zap.h>
@@ -176,13 +180,13 @@
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_impl.h>
+#include <sys/dmu_redact.h>
 #include <sys/dmu_tx.h>
 #include <sys/sunddi.h>
 #include <sys/policy.h>
 #include <sys/zone.h>
 #include <sys/nvpair.h>
 #include <sys/pathname.h>
-#include <sys/sdt.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
@@ -191,9 +195,12 @@
 #include <sys/dsl_scan.h>
 #include <sys/fm/util.h>
 #include <sys/dsl_crypt.h>
+#include <sys/rrwlock.h>
+#include <sys/zfs_file.h>
 
 #include <sys/dmu_recv.h>
 #include <sys/dmu_send.h>
+#include <sys/dmu_recv.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
@@ -201,14 +208,10 @@
 #include <sys/zcp.h>
 #include <sys/zio_checksum.h>
 #include <sys/vdev_removal.h>
-#include <sys/zfs_sysfs.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
 
-#include <linux/miscdevice.h>
-#include <linux/slab.h>
-
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
@@ -216,70 +219,34 @@
 
 #include <sys/lua/lua.h>
 #include <sys/lua/lauxlib.h>
+#include <sys/zfs_ioctl_impl.h>
 
-/*
- * Limit maximum nvlist size.  We don't want users passing in insane values
- * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
- */
-#define	MAX_NVLIST_SRC_SIZE	KMALLOC_MAX_SIZE
+#define __ZMODDBG__
+#include "zmoddbg.h"
 
 kmutex_t zfsdev_state_lock;
 zfsdev_state_t *zfsdev_state_list;
 
-extern void zfs_init(void);
-extern void zfs_fini(void);
-
-uint_t zfs_fsyncer_key;
-extern uint_t rrw_tsd_key;
-static uint_t zfs_allow_log_key;
-
-typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
-typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
-typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
+/*
+ * Limit maximum nvlist size.  We don't want users passing in insane values
+ * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
+ * Defaults to 0=auto which is handled by platform code.
+ */
+unsigned long zfs_max_nvlist_src_size = 0;
 
 /*
- * IOC Keys are used to document and validate user->kernel interface inputs.
- * See zfs_keys_recv_new for an example declaration. Any key name that is not
- * listed will be rejected as input.
- *
- * The keyname 'optional' is always allowed, and must be an nvlist if present.
- * Arguments which older kernels can safely ignore can be placed under the
- * "optional" key.
- *
- * When adding new keys to an existing ioc for new functionality, consider:
- * 	- adding an entry into zfs_sysfs.c zfs_features[] list
- * 	- updating the libzfs_input_check.c test utility
- *
- * Note: in the ZK_WILDCARDLIST case, the name serves as documentation
- * for the expected name (bookmark, snapshot, property, etc) but there
- * is no validation in the preflight zfs_check_input_nvpairs() check.
+ * When logging the output nvlist of an ioctl in the on-disk history, limit
+ * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
+ * This applies primarily to zfs_ioc_channel_program().
  */
-typedef enum {
-	ZK_OPTIONAL = 1 << 0,		/* pair is optional */
-	ZK_WILDCARDLIST = 1 << 1,	/* one or more unspecified key names */
-} ioc_key_flag_t;
+unsigned long zfs_history_output_max = 1024 * 1024;
+
+uint_t zfs_fsyncer_key;
+uint_t zfs_allow_log_key;
 
 /* DATA_TYPE_ANY is used when zkey_type can vary. */
 #define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
 
-typedef struct zfs_ioc_key {
-	const char	*zkey_name;
-	data_type_t	zkey_type;
-	ioc_key_flag_t	zkey_flags;
-} zfs_ioc_key_t;
-
-typedef enum {
-	NO_NAME,
-	POOL_NAME,
-	DATASET_NAME
-} zfs_ioc_namecheck_t;
-
-typedef enum {
-	POOL_CHECK_NONE		= 1 << 0,
-	POOL_CHECK_SUSPENDED	= 1 << 1,
-	POOL_CHECK_READONLY	= 1 << 2,
-} zfs_ioc_poolcheck_t;
-
 typedef struct zfs_ioc_vec {
 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
 	zfs_ioc_func_t		*zvec_func;
@@ -313,7 +280,7 @@
 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
 static int zfs_check_settable(const char *name, nvpair_t *property,
     cred_t *cr);
-static int zfs_check_clearable(char *dataset, nvlist_t *props,
+static int zfs_check_clearable(const char *dataset, nvlist_t *props,
     nvlist_t **errors);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
@@ -347,23 +314,6 @@
 }
 
 /*
- * Check to see if the named dataset is currently defined as bootable
- */
-static boolean_t
-zfs_is_bootfs(const char *name)
-{
-	objset_t *os;
-
-	if (dmu_objset_hold(name, FTAG, &os) == 0) {
-		boolean_t ret;
-		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
-		dmu_objset_rele(os, FTAG);
-		return (ret);
-	}
-	return (B_FALSE);
-}
-
-/*
  * Return non-zero if the spa version is less than requested version.
  */
 static int
@@ -487,7 +437,8 @@
 {
 	uint64_t zoned;
 
-	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
+	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
+	    &zoned, NULL))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
@@ -498,7 +449,7 @@
 {
 	uint64_t zoned;
 
-	if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
+	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
@@ -557,7 +508,7 @@
  * Returns 0 for success, non-zero for access and other errors.
  */
 static int
-zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
+zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
 {
 #ifdef HAVE_MLSLABEL
 	char		ds_hexsl[MAXNAMELEN];
@@ -612,7 +563,7 @@
 	 */
 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 		objset_t *os;
-		static char *setsl_tag = "setsl_tag";
+		static const char *setsl_tag = "setsl_tag";
 
 		/*
 		 * Try to own the dataset; abort if there is any error,
@@ -683,8 +634,8 @@
 			 * limit on things *under* (ie. contained by)
 			 * the thing they own.
 			 */
-			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
-			    setpoint))
+			if (dsl_prop_get_integer(dsname,
+			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
 				return (SET_ERROR(EPERM));
 			if (!zoned || strlen(dsname) <= strlen(setpoint))
 				return (SET_ERROR(EPERM));
@@ -739,7 +690,7 @@
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
-	char *cp;
+	const char *cp;
 	int error;
 
 	/*
@@ -777,13 +728,13 @@
 	    ZFS_DELEG_PERM_SEND, cr));
 }
 
-int
+static int
 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (SET_ERROR(ENOTSUP));
 }
 
-int
+static int
 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (SET_ERROR(ENOTSUP));
@@ -1044,14 +995,6 @@
 
 /* ARGSUSED */
 static int
-zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
-{
-	return (zfs_secpolicy_write_perms(zc->zc_name,
-	    ZFS_DELEG_PERM_REMAP, cr));
-}
-
-/* ARGSUSED */
-static int
 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair, *nextpair;
@@ -1133,7 +1076,7 @@
  * SYS_CONFIG privilege, which is not available in a local zone.
  */
 /* ARGSUSED */
-static int
+int
 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
@@ -1440,10 +1383,7 @@
 	mutex_enter(&os->os_user_ptr_lock);
 	*zfvp = dmu_objset_get_user(os);
 	/* bump s_active only when non-zero to prevent umount race */
-	if (*zfvp == NULL || (*zfvp)->z_sb == NULL ||
-	    !atomic_inc_not_zero(&((*zfvp)->z_sb->s_active))) {
-		error = SET_ERROR(ESRCH);
-	}
+	error = zfs_vfs_ref(zfvp);
 	mutex_exit(&os->os_user_ptr_lock);
 	return (error);
 }
@@ -1477,15 +1417,17 @@
 	if (getzfsvfs(name, zfvp) != 0)
 		error = zfsvfs_create(name, B_FALSE, zfvp);
 	if (error == 0) {
-		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
-		    RW_READER, tag);
+		if (writer)
+			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
+		else
+			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
 		if ((*zfvp)->z_unmounted) {
 			/*
 			 * XXX we could probably try again, since the unmounting
 			 * thread should be just about to disassociate the
 			 * objset from the zfsvfs.
 			 */
-			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
+			ZFS_TEARDOWN_EXIT(*zfvp, tag);
 			return (SET_ERROR(EBUSY));
 		}
 	}
@@ -1495,10 +1437,10 @@
 static void
 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
 {
-	rrm_exit(&zfsvfs->z_teardown_lock, tag);
+	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
 
-	if (zfsvfs->z_sb) {
-		deactivate_super(zfsvfs->z_sb);
+	if (zfs_vfs_held(zfsvfs)) {
+		zfs_vfs_rele(zfsvfs);
 	} else {
 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
@@ -1513,7 +1455,7 @@
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
 	dsl_crypto_params_t *dcp = NULL;
-	char *spa_name = zc->zc_name;
+	const char *spa_name = zc->zc_name;
 	boolean_t unload_wkey = B_TRUE;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
@@ -1982,6 +1924,10 @@
 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
+	case VDEV_STATE_REMOVED:
+		error = vdev_remove_wanted(spa, zc->zc_guid);
+		break;
+
 	default:
 		error = SET_ERROR(EINVAL);
 	}
@@ -1994,8 +1940,9 @@
 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
-	int replacing = zc->zc_cookie;
 	nvlist_t *config;
+	int replacing = zc->zc_cookie;
+	int rebuild = zc->zc_simple;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
@@ -2003,7 +1950,8 @@
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) == 0) {
-		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
+		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
+		    rebuild);
 		nvlist_free(config);
 	}
 
@@ -2065,7 +2013,7 @@
 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 {
 	spa_t *spa;
-	char *path = zc->zc_value;
+	const char *path = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
@@ -2082,7 +2030,7 @@
 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
 {
 	spa_t *spa;
-	char *fru = zc->zc_value;
+	const char *fru = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
@@ -2345,7 +2293,7 @@
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0) {
-		return (error == ENOENT ? ESRCH : error);
+		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
 	}
 
 	/*
@@ -2419,8 +2367,7 @@
 	const char *propname = nvpair_name(pair);
 	uint64_t *valary;
 	unsigned int vallen;
-	const char *domain;
-	char *dash;
+	const char *dash, *domain;
 	zfs_userquota_prop_t type;
 	uint64_t rid;
 	uint64_t quota;
@@ -2473,7 +2420,7 @@
 	const char *propname = nvpair_name(pair);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval = 0;
-	char *strval = NULL;
+	const char *strval = NULL;
 	int err = -1;
 
 	if (prop == ZPROP_INVAL) {
@@ -2534,6 +2481,15 @@
 	case ZFS_PROP_REFRESERVATION:
 		err = dsl_dataset_set_refreservation(dsname, source, intval);
 		break;
+	case ZFS_PROP_COMPRESSION:
+		err = dsl_dataset_set_compression(dsname, source, intval);
+		/*
+		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
+		 * default path to set the value in the nvlist.
+		 */
+		if (err == 0)
+			err = -1;
+		break;
 	case ZFS_PROP_VOLSIZE:
 		err = zvol_set_volsize(dsname, intval);
 		break;
@@ -2572,6 +2528,26 @@
 	return (err);
 }
 
+static boolean_t
+zfs_is_namespace_prop(zfs_prop_t prop)
+{
+	switch (prop) {
+
+	case ZFS_PROP_ATIME:
+	case ZFS_PROP_RELATIME:
+	case ZFS_PROP_DEVICES:
+	case ZFS_PROP_EXEC:
+	case ZFS_PROP_SETUID:
+	case ZFS_PROP_READONLY:
+	case ZFS_PROP_XATTR:
+	case ZFS_PROP_NBMAND:
+		return (B_TRUE);
+
+	default:
+		return (B_FALSE);
+	}
+}
+
 /*
  * This function is best effort. If it fails to set any of the given properties,
  * it continues to set as many as it can and returns the last error
@@ -2590,7 +2566,8 @@
 	nvpair_t *propval;
 	int rv = 0;
 	uint64_t intval;
-	char *strval;
+	const char *strval;
+	boolean_t should_update_mount_cache = B_FALSE;
 
 	nvlist_t *genericnvl = fnvlist_alloc();
 	nvlist_t *retrynvl = fnvlist_alloc();
@@ -2645,7 +2622,8 @@
 				case PROP_TYPE_INDEX:
 					if (zfs_prop_index_to_string(prop,
 					    intval, &unused) != 0)
-						err = SET_ERROR(EINVAL);
+						err =
+						    SET_ERROR(ZFS_ERR_BADPROP);
 					break;
 				default:
 					cmn_err(CE_PANIC,
@@ -2687,6 +2665,9 @@
 				fnvlist_add_int32(errlist, propname, err);
 			rv = err;
 		}
+
+		if (zfs_is_namespace_prop(prop))
+			should_update_mount_cache = B_TRUE;
 	}
 
 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
@@ -2735,6 +2716,9 @@
 			}
 		}
 	}
+	if (should_update_mount_cache)
+		zfs_ioctl_update_mount_cache(dsname);
+
 	nvlist_free(genericnvl);
 	nvlist_free(retrynvl);
 
@@ -2951,7 +2935,7 @@
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
 			spa_configfile_set(spa, props, B_FALSE);
-			spa_write_cachefile(spa, B_FALSE, B_TRUE);
+			spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
 		}
 		mutex_exit(&spa_namespace_lock);
 		if (spa != NULL) {
@@ -3367,8 +3351,9 @@
 
 			/*
 			 * Volumes will return EBUSY and cannot be destroyed
-			 * until all asynchronous minor handling has completed.
-			 * Wait for the spa_zvol_taskq to drain then retry.
+			 * until all asynchronous minor handling (e.g. from
+			 * setting the volmode property) has completed. Wait for
+			 * the spa_zvol_taskq to drain then retry.
 			 */
 			error2 = dsl_destroy_head(fsname);
 			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
@@ -3406,7 +3391,7 @@
 {
 	int error = 0;
 	nvlist_t *nvprops = NULL;
-	char *origin_name;
+	const char *origin_name;
 
 	origin_name = fnvlist_lookup_string(innvl, "origin");
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
@@ -3440,11 +3425,8 @@
 static int
 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	if (strchr(fsname, '@') ||
-	    strchr(fsname, '%'))
-		return (SET_ERROR(EINVAL));
-
-	return (dmu_objset_remap_indirects(fsname));
+	/* This IOCTL is no longer supported. */
+	return (0);
 }
 
 /*
@@ -3535,10 +3517,10 @@
 static int
 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	char *message;
+	const char *message;
+	char *poolname;
 	spa_t *spa;
 	int error;
-	char *poolname;
 
 	/*
 	 * The poolname in the ioctl is not set, we get it from the TSD,
@@ -3552,7 +3534,7 @@
 		return (SET_ERROR(EINVAL));
 	(void) tsd_set(zfs_allow_log_key, NULL);
 	error = spa_open(poolname, &spa, FTAG);
-	strfree(poolname);
+	kmem_strfree(poolname);
 	if (error != 0)
 		return (error);
 
@@ -3569,6 +3551,56 @@
 }
 
 /*
+ * This ioctl is used to set the bootenv configuration on the current
+ * pool. This configuration is stored in the second padding area of the label,
+ * and it is used by the bootloader(s) to store the bootloader and/or system
+ * specific data.
+ * The data is stored as nvlist data stream, and is protected by
+ * an embedded checksum.
+ * The version can have two possible values:
+ * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
+ * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
+ */
+static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
+	{"version",	DATA_TYPE_UINT64,	0},
+	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
+};
+
+static int
+zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int error;
+	spa_t *spa;
+
+	if ((error = spa_open(name, &spa, FTAG)) != 0)
+		return (error);
+	spa_vdev_state_enter(spa, SCL_ALL);
+	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
+	(void) spa_vdev_state_exit(spa, NULL, 0);
+	spa_close(spa, FTAG);
+	return (error);
+}
+
+static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
+	/* no nvl keys */
+};
+
+static int
+zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	spa_t *spa;
+	int error;
+
+	if ((error = spa_open(name, &spa, FTAG)) != 0)
+		return (error);
+	spa_vdev_state_enter(spa, SCL_ALL);
+	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
+	(void) spa_vdev_state_exit(spa, NULL, 0);
+	spa_close(spa, FTAG);
+	return (error);
+}
+
+/*
  * The dp_config_rwlock must not be held when calling this, because the
  * unmount may need to write out data.
  *
@@ -3584,7 +3616,7 @@
 	if (strchr(snapname, '@') == NULL)
 		return;
 
-	(void) zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
+	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
 }
 
 /* ARGSUSED */
@@ -3631,34 +3663,53 @@
  */
 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
 	{"snaps",	DATA_TYPE_NVLIST,	0},
-	{"defer", 	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
+	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
+	int poollen;
 	nvlist_t *snaps;
 	nvpair_t *pair;
 	boolean_t defer;
+	spa_t *spa;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 	defer = nvlist_exists(innvl, "defer");
 
+	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
+		const char *name = nvpair_name(pair);
+
+		/*
+		 * The snap must be in the specified pool to prevent the
+		 * invalid removal of zvol minors below.
+		 */
+		if (strncmp(name, poolname, poollen) != 0 ||
+		    (name[poollen] != '/' && name[poollen] != '@'))
+			return (SET_ERROR(EXDEV));
+
 		zfs_unmount_snap(nvpair_name(pair));
+		if (spa_open(name, &spa, FTAG) == 0) {
+			zvol_remove_minors(spa, name, B_TRUE);
+			spa_close(spa, FTAG);
+		}
 	}
 
 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
 }
 
 /*
- * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
- * All bookmarks must be in the same pool.
+ * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks and snapshots must be in the same pool.
+ * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
  *
  * innvl: {
- *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ *     new_bookmark1 -> existing_snapshot,
+ *     new_bookmark2 -> existing_bookmark,
  * }
  *
  * outnvl: bookmark -> error code (int32)
@@ -3672,25 +3723,6 @@
 static int
 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
-	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
-		char *snap_name;
-
-		/*
-		 * Verify the snapshot argument.
-		 */
-		if (nvpair_value_string(pair, &snap_name) != 0)
-			return (SET_ERROR(EINVAL));
-
-
-		/* Verify that the keys (bookmarks) are unique */
-		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
-		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
-			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
-				return (SET_ERROR(EINVAL));
-		}
-	}
-
 	return (dsl_bookmark_create(innvl, outnvl));
 }
 
@@ -3716,6 +3748,37 @@
 }
 
 /*
+ * innvl is not used.
+ *
+ * outnvl: {
+ *     property 1, property 2, ...
+ * }
+ *
+ */
+static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
+	/* no nvl keys */
+};
+
+/* ARGSUSED */
+static int
+zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
+    nvlist_t *outnvl)
+{
+	char fsname[ZFS_MAX_DATASET_NAME_LEN];
+	char *bmname;
+
+	bmname = strchr(bookmark, '#');
+	if (bmname == NULL)
+		return (SET_ERROR(EINVAL));
+	bmname++;
+
+	(void) strlcpy(fsname, bookmark, sizeof (fsname));
+	*(strchr(fsname, '#')) = '\0';
+
+	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
+}
+
+/*
  * innvl: {
  *     bookmark name 1, bookmark name 2
  * }
@@ -3789,9 +3852,9 @@
 	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
 
 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
 	    nvarg, outnvl));
@@ -3925,7 +3988,8 @@
 
 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
 	    cmd_type == POOL_INITIALIZE_START ||
-	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
+	    cmd_type == POOL_INITIALIZE_SUSPEND ||
+	    cmd_type == POOL_INITIALIZE_UNINIT)) {
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3959,7 +4023,7 @@
 	fnvlist_free(vdev_errlist);
 
 	spa_close(spa, FTAG);
-	return (total_errors > 0 ? EINVAL : 0);
+	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 }
 
 /*
@@ -4044,7 +4108,134 @@
 	fnvlist_free(vdev_errlist);
 
 	spa_close(spa, FTAG);
-	return (total_errors > 0 ? EINVAL : 0);
+	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
+}
+
+/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a tag is provided, it identifies a particular instance of an activity to
+ * wait for. Currently, this is only valid for use with 'initialize', because
+ * that is the only activity for which there can be multiple instances running
+ * concurrently. In the case of 'initialize', the tag corresponds to the guid of
+ * the vdev on which to wait.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ *     "wait_activity" -> int32_t
+ *     (optional) "wait_tag" -> uint64_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
+	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
+	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
+};
+
+static int
+zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int32_t activity;
+	uint64_t tag;
+	boolean_t waited;
+	int error;
+
+	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
+		return (EINVAL);
+
+	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
+		error = spa_wait_tag(name, activity, tag, &waited);
+	else
+		error = spa_wait(name, activity, &waited);
+
+	if (error == 0)
+		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
+
+	return (error);
+}
+
+/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ *     "wait_activity" -> int32_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
+	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
+};
+
+static int
+zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int32_t activity;
+	boolean_t waited = B_FALSE;
+	int error;
+	dsl_pool_t *dp;
+	dsl_dir_t *dd;
+	dsl_dataset_t *ds;
+
+	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
+		return (SET_ERROR(EINVAL));
+
+	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
+		return (SET_ERROR(EINVAL));
+
+	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
+		return (error);
+
+	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
+
+	dd = ds->ds_dir;
+	mutex_enter(&dd->dd_activity_lock);
+	dd->dd_activity_waiters++;
+
+	/*
+	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
+	 * aren't evicted while we're waiting. Normally this is prevented by
+	 * holding the pool, but we can't do that while we're waiting since
+	 * that would prevent TXGs from syncing out. Some of the functionality
+	 * of long-holds (e.g. preventing deletion) is unnecessary for this
+	 * case, since we would cancel the waiters before proceeding with a
+	 * deletion. An alternative mechanism for keeping the dataset around
+	 * could be developed but this is simpler.
+	 */
+	dsl_dataset_long_hold(ds, FTAG);
+	dsl_pool_rele(dp, FTAG);
+
+	error = dsl_dir_wait(dd, ds, activity, &waited);
+
+	dsl_dataset_long_rele(ds, FTAG);
+	dd->dd_activity_waiters--;
+	if (dd->dd_activity_waiters == 0)
+		cv_signal(&dd->dd_activity_cv);
+	mutex_exit(&dd->dd_activity_lock);
+
+	dsl_dataset_rele(ds, FTAG);
+
+	if (error == 0)
+		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
+
+	return (error);
 }
 
 /*
@@ -4064,7 +4255,7 @@
 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	zfsvfs_t *zfsvfs;
-	zvol_state_t *zv;
+	zvol_state_handle_t *zv;
 	char *target = NULL;
 	int error;
 
@@ -4094,7 +4285,7 @@
 			resume_err = zfs_resume_fs(zfsvfs, ds);
 			error = error ? error : resume_err;
 		}
-		deactivate_super(zfsvfs->z_sb);
+		zfs_vfs_rele(zfsvfs);
 	} else if ((zv = zvol_suspend(fsname)) != NULL) {
 		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
 		    outnvl);
@@ -4113,12 +4304,46 @@
 
 	fullname = kmem_asprintf("%s@%s", fsname, snapname);
 	zfs_unmount_snap(fullname);
-	strfree(fullname);
+	kmem_strfree(fullname);
 
 	return (0);
 }
 
 /*
+ *
+ * snapname is the snapshot to redact.
+ * innvl: {
+ *     "bookname" -> (string)
+ *         shortname of the redaction bookmark to generate
+ *     "snapnv" -> (nvlist, values ignored)
+ *         snapshots to redact snapname with respect to
+ * }
+ *
+ * outnvl is unused
+ */
+
+/* ARGSUSED */
+static const zfs_ioc_key_t zfs_keys_redact[] = {
+	{"bookname",		DATA_TYPE_STRING,	0},
+	{"snapnv",		DATA_TYPE_NVLIST,	0},
+};
+static int
+zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	nvlist_t *redactnvl = NULL;
+	char *redactbook = NULL;
+
+	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
+		return (SET_ERROR(EINVAL));
+	if (fnvlist_num_pairs(redactnvl) == 0)
+		return (SET_ERROR(ENXIO));
+	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
+		return (SET_ERROR(EINVAL));
+
+	return (dmu_redact_snap(snapname, redactnvl, redactbook));
+}
+
+/*
  * inputs:
  * zc_name	old name of dataset
  * zc_value	new name of dataset
@@ -4132,6 +4357,7 @@
 	objset_t *os;
 	dmu_objset_type_t ost;
 	boolean_t recursive = zc->zc_cookie & 1;
+	boolean_t nounmount = !!(zc->zc_cookie & 2);
 	char *at;
 	int err;
 
@@ -4157,7 +4383,7 @@
 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
 			return (SET_ERROR(EXDEV));
 		*at = '\0';
-		if (ost == DMU_OST_ZFS) {
+		if (ost == DMU_OST_ZFS && !nounmount) {
 			error = dmu_objset_find(zc->zc_name,
 			    recursive_unmount, at + 1,
 			    recursive ? DS_FIND_CHILDREN : 0);
@@ -4182,7 +4408,7 @@
 	const char *propname = nvpair_name(pair);
 	boolean_t issnap = (strchr(dsname, '@') != NULL);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
-	uint64_t intval;
+	uint64_t intval, compval;
 	int err;
 
 	if (prop == ZPROP_INVAL) {
@@ -4264,19 +4490,20 @@
 		 * we'll catch them later.
 		 */
 		if (nvpair_value_uint64(pair, &intval) == 0) {
-			if (intval >= ZIO_COMPRESS_GZIP_1 &&
-			    intval <= ZIO_COMPRESS_GZIP_9 &&
+			compval = ZIO_COMPRESS_ALGO(intval);
+			if (compval >= ZIO_COMPRESS_GZIP_1 &&
+			    compval <= ZIO_COMPRESS_GZIP_9 &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_GZIP_COMPRESSION)) {
 				return (SET_ERROR(ENOTSUP));
 			}
 
-			if (intval == ZIO_COMPRESS_ZLE &&
+			if (compval == ZIO_COMPRESS_ZLE &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_ZLE_COMPRESSION))
 				return (SET_ERROR(ENOTSUP));
 
-			if (intval == ZIO_COMPRESS_LZ4) {
+			if (compval == ZIO_COMPRESS_LZ4) {
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
@@ -4290,16 +4517,18 @@
 				spa_close(spa, FTAG);
 			}
 
-			/*
-			 * If this is a bootable dataset then
-			 * verify that the compression algorithm
-			 * is supported for booting. We must return
-			 * something other than ENOTSUP since it
-			 * implies a downrev pool version.
-			 */
-			if (zfs_is_bootfs(dsname) &&
-			    !BOOTFS_COMPRESS_VALID(intval)) {
-				return (SET_ERROR(ERANGE));
+			if (compval == ZIO_COMPRESS_ZSTD) {
+				spa_t *spa;
+
+				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+					return (err);
+
+				if (!spa_feature_is_enabled(spa,
+				    SPA_FEATURE_ZSTD_COMPRESS)) {
+					spa_close(spa, FTAG);
+					return (SET_ERROR(ENOTSUP));
+				}
+				spa_close(spa, FTAG);
 			}
 		}
 		break;
@@ -4342,16 +4571,6 @@
 		    intval != ZFS_DNSIZE_LEGACY) {
 			spa_t *spa;
 
-			/*
-			 * If this is a bootable dataset then
-			 * we don't allow large (>512B) dnodes,
-			 * because GRUB doesn't support them.
-			 */
-			if (zfs_is_bootfs(dsname) &&
-			    intval != ZFS_DNSIZE_LEGACY) {
-				return (SET_ERROR(EDOM));
-			}
-
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
@@ -4441,7 +4660,7 @@
  * pointed at by errlist is NULL.
  */
 static int
-zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
+zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 {
 	zfs_cmd_t *zc;
 	nvpair_t *pair, *next_pair;
@@ -4576,6 +4795,11 @@
 	static const zfs_prop_t delayable[] = {
 		ZFS_PROP_REFQUOTA,
 		ZFS_PROP_KEYLOCATION,
+		/*
+		 * Setting ZFS_PROP_SHARESMB requires the objset type to be
+		 * known, which is not possible prior to receipt of raw sends.
+		 */
+		ZFS_PROP_SHARESMB,
 		0
 	};
 	int i;
@@ -4609,7 +4833,16 @@
 	return (delayprops);
 }
 
-#ifdef	DEBUG
+static void
+zfs_allow_log_destroy(void *arg)
+{
+	char *poolname = arg;
+
+	if (poolname != NULL)
+		kmem_strfree(poolname);
+}
+
+#ifdef	ZFS_DEBUG
 static boolean_t zfs_ioc_recv_inject_err;
 #endif
 
@@ -4620,33 +4853,38 @@
 static int
 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
-    boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
-    int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
-    uint64_t *action_handle, nvlist_t **errors)
+    boolean_t resumable, int input_fd,
+    dmu_replay_record_t *begin_record, uint64_t *read_bytes,
+    uint64_t *errflags, nvlist_t **errors)
 {
 	dmu_recv_cookie_t drc;
 	int error = 0;
 	int props_error = 0;
-	offset_t off;
+	offset_t off, noff;
 	nvlist_t *local_delayprops = NULL;
 	nvlist_t *recv_delayprops = NULL;
+	nvlist_t *inherited_delayprops = NULL;
 	nvlist_t *origprops = NULL; /* existing properties */
 	nvlist_t *origrecvd = NULL; /* existing received properties */
 	boolean_t first_recvd_props = B_FALSE;
-	file_t *input_fp;
+	boolean_t tofs_was_redacted;
+	zfs_file_t *input_fp;
 
 	*read_bytes = 0;
 	*errflags = 0;
 	*errors = fnvlist_alloc();
+	off = 0;
 
-	input_fp = getf(input_fd);
-	if (input_fp == NULL)
+	if ((input_fp = zfs_file_get(input_fd)) == NULL)
 		return (SET_ERROR(EBADF));
 
+	noff = off = zfs_file_off(input_fp);
 	error = dmu_recv_begin(tofs, tosnap, begin_record, force,
-	    resumable, localprops, hidden_args, origin, &drc);
+	    resumable, localprops, hidden_args, origin, &drc, input_fp,
+	    &off);
 	if (error != 0)
 		goto out;
+	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
 
 	/*
 	 * Set properties before we receive the stream so that they are applied
@@ -4740,6 +4978,7 @@
 		local_delayprops = extract_delay_props(oprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 		    oprops, *errors);
+		inherited_delayprops = extract_delay_props(xprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 		    xprops, *errors);
 
@@ -4747,18 +4986,19 @@
 		nvlist_free(xprops);
 	}
 
-	off = input_fp->f_offset;
-	error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
-	    action_handle);
+	error = dmu_recv_stream(&drc, &off);
 
 	if (error == 0) {
 		zfsvfs_t *zfsvfs = NULL;
-		zvol_state_t *zv = NULL;
+		zvol_state_handle_t *zv = NULL;
 
 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
 			/* online recv */
 			dsl_dataset_t *ds;
 			int end_err;
+			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
+			    begin_record->drr_u.drr_begin.
+			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
 
 			ds = dmu_objset_ds(zfsvfs->z_os);
 			error = zfs_suspend_fs(zfsvfs);
@@ -4767,10 +5007,19 @@
 			 * likely also fail, and clean up after itself.
 			 */
 			end_err = dmu_recv_end(&drc, zfsvfs);
-			if (error == 0)
+			/*
+			 * If the dataset was not redacted, but we received a
+			 * redacted stream onto it, we need to unmount the
+			 * dataset.  Otherwise, resume the filesystem.
+			 */
+			if (error == 0 && !drc.drc_newfs &&
+			    stream_is_redacted && !tofs_was_redacted) {
+				error = zfs_end_fs(zfsvfs, ds);
+			} else if (error == 0) {
 				error = zfs_resume_fs(zfsvfs, ds);
+			}
 			error = error ? error : end_err;
-			deactivate_super(zfsvfs->z_sb);
+			zfs_vfs_rele(zfsvfs);
 		} else if ((zv = zvol_suspend(tofs)) != NULL) {
 			error = dmu_recv_end(&drc, zvol_tag(zv));
 			zvol_resume(zv);
@@ -4787,6 +5036,10 @@
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 			    local_delayprops, *errors);
 		}
+		if (inherited_delayprops != NULL && error == 0) {
+			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
+			    inherited_delayprops, *errors);
+		}
 	}
 
 	/*
@@ -4806,12 +5059,13 @@
 		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
 		nvlist_free(local_delayprops);
 	}
+	if (inherited_delayprops != NULL) {
+		ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
+		nvlist_free(inherited_delayprops);
+	}
+	*read_bytes = off - noff;
 
-	*read_bytes = off - input_fp->f_offset;
-	if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
-		input_fp->f_offset = off;
-
-#ifdef	DEBUG
+#ifdef	ZFS_DEBUG
 	if (zfs_ioc_recv_inject_err) {
 		zfs_ioc_recv_inject_err = B_FALSE;
 		error = 1;
@@ -4911,7 +5165,7 @@
 		nvlist_free(inheritprops);
 	}
 out:
-	releasef(input_fd);
+	zfs_file_put(input_fp);
 	nvlist_free(origrecvd);
 	nvlist_free(origprops);
 
@@ -4932,13 +5186,10 @@
  * zc_cookie		file descriptor to recv from
  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
  * zc_guid		force flag
- * zc_cleanup_fd	cleanup-on-exit file descriptor
- * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
  *
  * outputs:
  * zc_cookie		number of bytes read
  * zc_obj		zprop_errflags_t
- * zc_action_handle	handle for this guid/ds mapping
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
@@ -4981,8 +5232,7 @@
 
 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
 	    NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
-	    zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
-	    &zc->zc_action_handle, &errors);
+	    &zc->zc_cookie, &zc->zc_obj, &errors);
 	nvlist_free(recvdprops);
 	nvlist_free(localprops);
 
@@ -5015,15 +5265,14 @@
  *     "input_fd" -> file descriptor to read stream from (int32)
  *     (optional) "force" -> force flag (value ignored)
  *     (optional) "resumable" -> resumable flag (value ignored)
- *     (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
- *     (optional) "action_handle" -> handle for this guid/ds mapping
+ *     (optional) "cleanup_fd" -> unused
+ *     (optional) "action_handle" -> unused
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  * }
  *
  * outnvl: {
  *     "read_bytes" -> number of bytes read
  *     "error_flags" -> zprop_errflags_t
- *     "action_handle" -> handle for this guid/ds mapping
  *     "errors" -> error for each unapplied received property (nvlist)
  * }
  */
@@ -5056,11 +5305,9 @@
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	boolean_t force;
 	boolean_t resumable;
-	uint64_t action_handle = 0;
 	uint64_t read_bytes = 0;
 	uint64_t errflags = 0;
 	int input_fd = -1;
-	int cleanup_fd = -1;
 	int error;
 
 	snapname = fnvlist_lookup_string(innvl, "snapname");
@@ -5088,14 +5335,6 @@
 	force = nvlist_exists(innvl, "force");
 	resumable = nvlist_exists(innvl, "resumable");
 
-	error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
-	if (error && error != ENOENT)
-		return (error);
-
-	error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
-	if (error && error != ENOENT)
-		return (error);
-
 	/* we still use "props" here for backwards compatibility */
 	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
 	if (error && error != ENOENT)
@@ -5110,12 +5349,11 @@
 		return (error);
 
 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
-	    hidden_args, force, resumable, input_fd, begin_record, cleanup_fd,
-	    &read_bytes, &errflags, &action_handle, &errors);
+	    hidden_args, force, resumable, input_fd, begin_record,
+	    &read_bytes, &errflags, &errors);
 
 	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
 	fnvlist_add_uint64(outnvl, "error_flags", errflags);
-	fnvlist_add_uint64(outnvl, "action_handle", action_handle);
 	fnvlist_add_nvlist(outnvl, "errors", errors);
 
 	nvlist_free(errors);
@@ -5125,6 +5363,51 @@
 	return (error);
 }
 
+typedef struct dump_bytes_io {
+	zfs_file_t	*dbi_fp;
+	caddr_t		dbi_buf;
+	int		dbi_len;
+	int		dbi_err;
+} dump_bytes_io_t;
+
+static void
+dump_bytes_cb(void *arg)
+{
+	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
+	zfs_file_t *fp;
+	caddr_t buf;
+
+	fp = dbi->dbi_fp;
+	buf = dbi->dbi_buf;
+
+	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
+}
+
+static int
+dump_bytes(objset_t *os, void *buf, int len, void *arg)
+{
+	dump_bytes_io_t dbi;
+
+	dbi.dbi_fp = arg;
+	dbi.dbi_buf = buf;
+	dbi.dbi_len = len;
+
+#if defined(HAVE_LARGE_STACKS)
+	dump_bytes_cb(&dbi);
+#else
+	/*
+	 * The vn_rdwr() call is performed in a taskq to ensure that there is
+	 * always enough stack space to write safely to the target filesystem.
+	 * The ZIO_TYPE_FREE threads are used because there can be a lot of
+	 * them and they are used in vdev_file.c for a similar purpose.
+	 */
+	spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
+	    ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
+#endif /* HAVE_LARGE_STACKS */
+
+	return (dbi.dbi_err);
+}
+
 /*
  * inputs:
  * zc_name	name of snapshot to send
@@ -5152,7 +5435,10 @@
 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
 	boolean_t compressok = (zc->zc_flags & 0x4);
 	boolean_t rawok = (zc->zc_flags & 0x8);
-	uint8_t block_diff = zc->zc_block_diff;
+	boolean_t savedok = (zc->zc_flags & 0x10);
+	boolean_t block_diff = zc->zc_block_diff;
+
+	KTRACE();
 
 	if ((block_diff) && (block_diff != BLOCK_DIFF_MAGIC))
 		return (SET_ERROR(EINVAL));
@@ -5204,44 +5490,50 @@
 			}
 		}
 
-		error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
-		    &zc->zc_objset_type);
+		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
+		    compressok || rawok, savedok, &zc->zc_objset_type);
 
 		if (fromsnap != NULL)
 			dsl_dataset_rele(fromsnap, FTAG);
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	} else {
-		file_t *fp = getf(zc->zc_cookie);
-		if (fp == NULL)
+		zfs_file_t *fp;
+		dmu_send_outparams_t out = {0};
+
+		if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
 			return (SET_ERROR(EBADF));
 
-		off = fp->f_offset;
+		off = zfs_file_off(fp);
+		out.dso_outfunc = dump_bytes;
+		out.dso_arg = fp;
+		out.dso_dryrun = B_FALSE;
+		// zc->zc_cookie.zc_block_diff;
+		out.block_diff = zc->zc_block_diff;
 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-		    zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
-		    zc->zc_cookie, fp->f_vnode, &off, block_diff);
+		    zc->zc_fromobj, embedok, large_block_ok, compressok,
+		    rawok, savedok, zc->zc_cookie, &off, &out);
 
-		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-			fp->f_offset = off;
-		releasef(zc->zc_cookie);
+		zfs_file_put(fp);
 	}
 	return (error);
 }
 
 /*
  * inputs:
- * zc_name	name of snapshot on which to report progress
- * zc_cookie	file descriptor of send stream
+ * zc_name		name of snapshot on which to report progress
+ * zc_cookie		file descriptor of send stream
  *
  * outputs:
- * zc_cookie	number of bytes written in send stream thus far
+ * zc_cookie		number of bytes written in send stream thus far
+ * zc_objset_type	logical size of data traversed by send thus far
  */
 static int
 zfs_ioc_send_progress(zfs_cmd_t *zc)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
-	dmu_sendarg_t *dsp = NULL;
+	dmu_sendstatus_t *dsp = NULL;
 	int error;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
@@ -5265,15 +5557,19 @@
 
 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
-		if (dsp->dsa_outfd == zc->zc_cookie &&
-		    dsp->dsa_proc->group_leader == curproc->group_leader)
+		if (dsp->dss_outfd == zc->zc_cookie &&
+		    zfs_proc_is_caller(dsp->dss_proc))
 			break;
 	}
 
-	if (dsp != NULL)
-		zc->zc_cookie = *(dsp->dsa_off);
-	else
+	if (dsp != NULL) {
+		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
+		    0, 0);
+		/* This is the closest thing we have to atomic_read_64. */
+		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
+	} else {
 		error = SET_ERROR(ENOENT);
+	}
 
 	mutex_exit(&ds->ds_sendstream_lock);
 	dsl_dataset_rele(ds, FTAG);
@@ -5401,9 +5697,10 @@
 	} else {
 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
 		if (vd == NULL) {
-			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
+			error = SET_ERROR(ENODEV);
+			(void) spa_vdev_state_exit(spa, NULL, error);
 			spa_close(spa, FTAG);
-			return (SET_ERROR(ENODEV));
+			return (error);
 		}
 	}
 
@@ -5434,7 +5731,7 @@
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
-	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	0},
+	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
@@ -5443,11 +5740,13 @@
 {
 	spa_t *spa;
 	int error;
-	boolean_t scrub_restart = B_TRUE;
+	boolean_t rc, scrub_restart = B_TRUE;
 
 	if (innvl) {
-		scrub_restart = fnvlist_lookup_boolean_value(innvl,
-		    "scrub_restart");
+		error = nvlist_lookup_boolean_value(innvl,
+		    "scrub_restart", &rc);
+		if (error == 0)
+			scrub_restart = rc;
 	}
 
 	error = spa_open(pool, &spa, FTAG);
@@ -5617,7 +5916,6 @@
 static int
 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 {
-	objset_t *os;
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 
@@ -5638,19 +5936,54 @@
 				error = zfs_resume_fs(zfsvfs, newds);
 			}
 		}
-		if (error == 0)
-			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
-		deactivate_super(zfsvfs->z_sb);
+		if (error == 0) {
+			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
+			if (zfsvfs->z_os->os_upgrade_id == 0) {
+				/* clear potential error code and retry */
+				zfsvfs->z_os->os_upgrade_status = 0;
+				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
+
+				dsl_pool_config_enter(
+				    dmu_objset_pool(zfsvfs->z_os), FTAG);
+				dmu_objset_userspace_upgrade(zfsvfs->z_os);
+				dsl_pool_config_exit(
+				    dmu_objset_pool(zfsvfs->z_os), FTAG);
+			} else {
+				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
+			}
+
+			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
+			    zfsvfs->z_os->os_upgrade_id);
+			error = zfsvfs->z_os->os_upgrade_status;
+		}
+		zfs_vfs_rele(zfsvfs);
 	} else {
+		objset_t *os;
+
 		/* XXX kind of reading contents without owning */
 		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 		if (error != 0)
 			return (error);
 
-		error = dmu_objset_userspace_upgrade(os);
-		dmu_objset_rele_flags(os, B_TRUE, FTAG);
-	}
+		mutex_enter(&os->os_upgrade_lock);
+		if (os->os_upgrade_id == 0) {
+			/* clear potential error code and retry */
+			os->os_upgrade_status = 0;
+			mutex_exit(&os->os_upgrade_lock);
 
+			dmu_objset_userspace_upgrade(os);
+		} else {
+			mutex_exit(&os->os_upgrade_lock);
+		}
+
+		dsl_pool_rele(dmu_objset_pool(os), FTAG);
+
+		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
+		error = os->os_upgrade_status;
+
+		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
+		    FTAG);
+	}
 	return (error);
 }
 
@@ -5745,25 +6078,24 @@
 {
 	char *snap_name;
 	char *hold_name;
-	int error;
 	minor_t minor;
 
-	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
-	if (error != 0)
-		return (error);
+	zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
 	    (u_longlong_t)ddi_get_lbolt64());
 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
 
-	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
+	int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
 	    hold_name);
 	if (error == 0)
 		(void) strlcpy(zc->zc_value, snap_name,
 		    sizeof (zc->zc_value));
-	strfree(snap_name);
-	strfree(hold_name);
-	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+	kmem_strfree(snap_name);
+	kmem_strfree(hold_name);
+	zfs_onexit_fd_rele(fp);
 	return (error);
 }
 
@@ -5779,21 +6111,17 @@
 static int
 zfs_ioc_diff(zfs_cmd_t *zc)
 {
-	file_t *fp;
+	zfs_file_t *fp;
 	offset_t off;
 	int error;
 
-	fp = getf(zc->zc_cookie);
-	if (fp == NULL)
+	if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
 		return (SET_ERROR(EBADF));
 
-	off = fp->f_offset;
+	off = zfs_file_off(fp);
+	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
 
-	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
-
-	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-		fp->f_offset = off;
-	releasef(zc->zc_cookie);
+	zfs_file_put(fp);
 
 	return (error);
 }
@@ -5829,6 +6157,7 @@
 	int cleanup_fd = -1;
 	int error;
 	minor_t minor = 0;
+	zfs_file_t *fp = NULL;
 
 	holds = fnvlist_lookup_nvlist(args, "holds");
 
@@ -5846,15 +6175,17 @@
 	}
 
 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
-		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
-		if (error != 0)
-			return (error);
+		fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
+		if (fp == NULL)
+			return (SET_ERROR(EBADF));
 	}
 
 	error = dsl_dataset_user_hold(holds, minor, errlist);
-	if (minor != 0)
-		zfs_onexit_fd_rele(cleanup_fd);
-	return (error);
+	if (fp != NULL) {
+		ASSERT3U(minor, !=, 0);
+		zfs_onexit_fd_rele(fp);
+	}
+	return (SET_ERROR(error));
 }
 
 /*
@@ -5916,9 +6247,9 @@
 	uint64_t dropped = 0;
 	int error;
 
-	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
-	if (error != 0)
-		return (error);
+	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	do {
 		error = zfs_zevent_next(ze, &event,
@@ -5940,7 +6271,7 @@
 			break;
 	} while (1);
 
-	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+	zfs_zevent_fd_rele(fp);
 
 	return (error);
 }
@@ -5972,20 +6303,20 @@
 	minor_t minor;
 	int error;
 
-	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
-	if (error != 0)
-		return (error);
+	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	error = zfs_zevent_seek(ze, zc->zc_guid);
-	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+	zfs_zevent_fd_rele(fp);
 
 	return (error);
 }
 
 /*
  * inputs:
- * zc_name		name of new filesystem or snapshot
- * zc_value		full name of old snapshot
+ * zc_name		name of later filesystem or snapshot
+ * zc_value		full name of old snapshot or bookmark
  *
  * outputs:
  * zc_cookie		space in bytes
@@ -5997,7 +6328,7 @@
 {
 	int error;
 	dsl_pool_t *dp;
-	dsl_dataset_t *new, *old;
+	dsl_dataset_t *new;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
@@ -6007,16 +6338,26 @@
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
-	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
-	if (error != 0) {
-		dsl_dataset_rele(new, FTAG);
-		dsl_pool_rele(dp, FTAG);
-		return (error);
-	}
+	if (strchr(zc->zc_value, '#') != NULL) {
+		zfs_bookmark_phys_t bmp;
+		error = dsl_bookmark_lookup(dp, zc->zc_value,
+		    new, &bmp);
+		if (error == 0) {
+			error = dsl_dataset_space_written_bookmark(&bmp, new,
+			    &zc->zc_cookie,
+			    &zc->zc_objset_type, &zc->zc_perm_action);
+		}
+	} else {
+		dsl_dataset_t *old;
+		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
 
-	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
-	    &zc->zc_objset_type, &zc->zc_perm_action);
-	dsl_dataset_rele(old, FTAG);
+		if (error == 0) {
+			error = dsl_dataset_space_written(old, new,
+			    &zc->zc_cookie,
+			    &zc->zc_objset_type, &zc->zc_perm_action);
+			dsl_dataset_rele(old, FTAG);
+		}
+	}
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
@@ -6094,8 +6435,15 @@
  *         presence indicates compressed DRR_WRITE records are permitted
  *     (optional) "rawok" -> (value ignored)
  *         presence indicates raw encrypted records should be used.
+ *     (optional) "savedok" -> (value ignored)
+ *         presence indicates we should send a partially received snapshot
  *     (optional) "resume_object" and "resume_offset" -> (uint64)
  *         if present, resume send stream from specified object and offset.
+ *     (optional) "redactbook" -> (string)
+ *         if present, use this bookmark's redaction list to generate a redacted
+ *         send stream
+ *     (optional) "blockdiff" -> (string)
+ *         if present, send only block ID without data
  * }
  *
  * outnvl is unused
@@ -6107,8 +6455,11 @@
 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
+	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
+	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
+	{"blockdiff",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
@@ -6119,13 +6470,17 @@
 	offset_t off;
 	char *fromname = NULL;
 	int fd;
-	file_t *fp;
+	zfs_file_t *fp;
 	boolean_t largeblockok;
 	boolean_t embedok;
 	boolean_t compressok;
 	boolean_t rawok;
+	boolean_t savedok;
 	uint64_t resumeobj = 0;
 	uint64_t resumeoff = 0;
+	char *redactbook = NULL;
+
+	KTRACE();
 
 	fd = fnvlist_lookup_int32(innvl, "fd");
 
@@ -6135,24 +6490,47 @@
 	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
 	rawok = nvlist_exists(innvl, "rawok");
+	savedok = nvlist_exists(innvl, "savedok");
+	boolean_t blockdiff =  nvlist_exists(innvl, "blockdiff");
 
+#ifndef NO_ZMODDBG
+	if(blockdiff)
+		KDEBUG("Found blockdiff in innvl");
+	else
+		KDEBUG("Did not find blockdiff in innvl");
+#endif
 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 
-	if ((fp = getf(fd)) == NULL)
+	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
+
+	if ((fp = zfs_file_get(fd)) == NULL)
 		return (SET_ERROR(EBADF));
 
-	off = fp->f_offset;
-	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
-	    rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
+	off = zfs_file_off(fp);
 
-	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-		fp->f_offset = off;
+	dmu_send_outparams_t out = {0};
+	out.dso_outfunc = dump_bytes;
+	out.dso_arg = fp;
+	out.dso_dryrun = B_FALSE;
+	out.block_diff = blockdiff;
+	error = dmu_send(snapname, fromname, embedok, largeblockok,
+	    compressok, rawok, savedok, blockdiff, resumeobj, resumeoff,
+	    redactbook, fd, &off, &out);
 
-	releasef(fd);
+	zfs_file_put(fp);
 	return (error);
 }
 
+/* ARGSUSED */
+static int
+send_space_sum(objset_t *os, void *buf, int len, void *arg)
+{
+	uint64_t *size = arg;
+	*size += len;
+	return (0);
+}
+
 /*
  * Determine approximately how large a zfs send stream will be -- the number
  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
@@ -6166,8 +6544,12 @@
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  *     (optional) "compressok" -> (value ignored)
  *         presence indicates compressed DRR_WRITE records are permitted
- *	(optional) "rawok" -> (value ignored)
+ *     (optional) "rawok" -> (value ignored)
  *         presence indicates raw encrypted records should be used.
+ *     (optional) "resume_object" and "resume_offset" -> (uint64)
+ *         if present, resume send stream from specified object and offset.
+ *     (optional) "fd" -> file descriptor to use as a cookie for progress
+ *         tracking (int32)
  * }
  *
  * outnvl: {
@@ -6181,6 +6563,11 @@
 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
+	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
+	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
+	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
+	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
+	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
 };
 
 static int
@@ -6188,12 +6575,25 @@
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *tosnap;
+	dsl_dataset_t *fromsnap = NULL;
 	int error;
-	char *fromname;
+	char *fromname = NULL;
+	char *redactlist_book = NULL;
+	boolean_t largeblockok;
+	boolean_t embedok;
 	boolean_t compressok;
 	boolean_t rawok;
-	uint64_t space;
+	boolean_t savedok;
+	boolean_t blockdiff;
+	uint64_t space = 0;
+	boolean_t full_estimate = B_FALSE;
+	uint64_t resumeobj = 0;
+	uint64_t resumeoff = 0;
+	uint64_t resume_bytes = 0;
+	int32_t fd = -1;
+	zfs_bookmark_phys_t zbm = {0};
 
+	KTRACE();
 	error = dsl_pool_hold(snapname, FTAG, &dp);
 	if (error != 0)
 		return (error);
@@ -6203,61 +6603,103 @@
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
+	(void) nvlist_lookup_int32(innvl, "fd", &fd);
 
+	largeblockok = nvlist_exists(innvl, "largeblockok");
+	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
 	rawok = nvlist_exists(innvl, "rawok");
+	savedok = nvlist_exists(innvl, "savedok");
+	blockdiff = nvlist_exists(innvl, "savedok");
+	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
+	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
+	    &redactlist_book) == 0);
 
-	error = nvlist_lookup_string(innvl, "from", &fromname);
-	if (error == 0) {
-		if (strchr(fromname, '@') != NULL) {
+	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
+	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
+
+	if (altbook) {
+		full_estimate = B_TRUE;
+	} else if (from) {
+		if (strchr(fromname, '#')) {
+			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
+
 			/*
-			 * If from is a snapshot, hold it and use the more
-			 * efficient dmu_send_estimate to estimate send space
-			 * size using deadlists.
+			 * dsl_bookmark_lookup() will fail with EXDEV if
+			 * the from-bookmark and tosnap are at the same txg.
+			 * However, it's valid to do a send (and therefore,
+			 * a send estimate) from and to the same time point,
+			 * if the bookmark is redacted (the incremental send
+			 * can change what's redacted on the target).  In
+			 * this case, dsl_bookmark_lookup() fills in zbm
+			 * but returns EXDEV.  Ignore this error.
 			 */
-			dsl_dataset_t *fromsnap;
+			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
+			    zbm.zbm_guid ==
+			    dsl_dataset_phys(tosnap)->ds_guid)
+				error = 0;
+
+			if (error != 0) {
+				dsl_dataset_rele(tosnap, FTAG);
+				dsl_pool_rele(dp, FTAG);
+				return (error);
+			}
+			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
+			    ZBM_FLAG_HAS_FBN)) {
+				full_estimate = B_TRUE;
+			}
+		} else if (strchr(fromname, '@')) {
 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
-			if (error != 0)
-				goto out;
-			error = dmu_send_estimate(tosnap, fromsnap,
-			    compressok || rawok, &space);
-			dsl_dataset_rele(fromsnap, FTAG);
-		} else if (strchr(fromname, '#') != NULL) {
-			/*
-			 * If from is a bookmark, fetch the creation TXG of the
-			 * snapshot it was created from and use that to find
-			 * blocks that were born after it.
-			 */
-			zfs_bookmark_phys_t frombm;
+			if (error != 0) {
+				dsl_dataset_rele(tosnap, FTAG);
+				dsl_pool_rele(dp, FTAG);
+				return (error);
+			}
 
-			error = dsl_bookmark_lookup(dp, fromname, tosnap,
-			    &frombm);
-			if (error != 0)
-				goto out;
-			error = dmu_send_estimate_from_txg(tosnap,
-			    frombm.zbm_creation_txg, compressok || rawok,
-			    &space);
+			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
+				full_estimate = B_TRUE;
+				dsl_dataset_rele(fromsnap, FTAG);
+			}
 		} else {
 			/*
 			 * from is not properly formatted as a snapshot or
 			 * bookmark
 			 */
-			error = SET_ERROR(EINVAL);
-			goto out;
+			dsl_dataset_rele(tosnap, FTAG);
+			dsl_pool_rele(dp, FTAG);
+			return (SET_ERROR(EINVAL));
 		}
-	} else {
+	}
+
+	if (full_estimate) {
+		dmu_send_outparams_t out = {0};
+		offset_t off = 0;
+		out.dso_outfunc = send_space_sum;
+		out.dso_arg = &space;
+		out.dso_dryrun = B_TRUE;
 		/*
-		 * If estimating the size of a full send, use dmu_send_estimate.
+		 * We have to release these holds so dmu_send can take them.  It
+		 * will do all the error checking we need.
 		 */
-		error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
-		    &space);
+		dsl_dataset_rele(tosnap, FTAG);
+		dsl_pool_rele(dp, FTAG);
+		error = dmu_send(snapname, fromname, embedok, largeblockok,
+		    compressok, rawok, savedok, blockdiff, resumeobj, resumeoff,
+		    redactlist_book, fd, &off, &out);
+	} else {
+		error = dmu_send_estimate_fast(tosnap, fromsnap,
+		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
+		    compressok || rawok, savedok, &space);
+		space -= resume_bytes;
+		if (fromsnap != NULL)
+			dsl_dataset_rele(fromsnap, FTAG);
+		dsl_dataset_rele(tosnap, FTAG);
+		dsl_pool_rele(dp, FTAG);
 	}
 
 	fnvlist_add_uint64(outnvl, "space", space);
 
-out:
-	dsl_dataset_rele(tosnap, FTAG);
-	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
@@ -6284,14 +6726,18 @@
 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
 {
 	int err;
-	boolean_t force = B_FALSE;
+	boolean_t rc, force = B_FALSE;
 	spa_t *spa;
 
+	KTRACE();
 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
 		return (err);
 
-	if (innvl)
-		force = fnvlist_lookup_boolean_value(innvl, "force");
+	if (innvl) {
+		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
+		if (err == 0)
+			force = rc;
+	}
 
 	if (force) {
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
@@ -6302,7 +6748,7 @@
 
 	spa_close(spa, FTAG);
 
-	return (err);
+	return (0);
 }
 
 /*
@@ -6328,6 +6774,7 @@
 	nvlist_t *hidden_args;
 	boolean_t noop = nvlist_exists(innvl, "noop");
 
+	KTRACE();
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = SET_ERROR(EINVAL);
 		goto error;
@@ -6459,7 +6906,7 @@
  * See the block comment at the beginning of this file for details on
  * each argument to this function.
  */
-static void
+void
 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
@@ -6495,7 +6942,7 @@
 	    POOL_NAME, log_history, pool_check);
 }
 
-static void
+void
 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
 {
@@ -6544,6 +6991,8 @@
 static void
 zfs_ioctl_init(void)
 {
+	KTRACE();
+
 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
@@ -6580,7 +7029,7 @@
 	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
 
 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
-	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
+	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
 
@@ -6618,6 +7067,11 @@
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
 
+	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
+	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
+	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
+
 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
 	    POOL_NAME,
@@ -6657,6 +7111,11 @@
 	    B_TRUE, zfs_keys_channel_program,
 	    ARRAY_SIZE(zfs_keys_channel_program));
 
+	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
+	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
+	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
+
 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
@@ -6679,6 +7138,26 @@
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
 
+	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
+	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
+
+	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
+	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
+
+	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
+	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
+	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
+
+	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
+	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
+	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
+
 	/* IOCTLS that use the legacy function signature */
 
 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -6810,15 +7289,14 @@
 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 
-	/*
-	 * ZoL functions
-	 */
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+
+	zfs_ioctl_init_os();
 }
 
 /*
@@ -6836,6 +7314,7 @@
 	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
 	boolean_t required_keys_found = B_FALSE;
 
+	KTRACE();
 	/*
 	 * examine each input pair
 	 */
@@ -6844,6 +7323,7 @@
 		char *name = nvpair_name(pair);
 		data_type_t type = nvpair_type(pair);
 		boolean_t identified = B_FALSE;
+		KDEBUG("nvpair_name=%s", name ? name : "null");
 
 		/*
 		 * check pair against the documented names and type
@@ -6895,14 +7375,15 @@
 	return (0);
 }
 
-int
+static int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
     zfs_ioc_poolcheck_t check)
 {
 	spa_t *spa;
 	int error;
 
-	ASSERT(type == POOL_NAME || type == DATASET_NAME);
+	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
+	    type == ENTITY_NAME);
 
 	if (check & POOL_CHECK_NONE)
 		return (0);
@@ -6918,6 +7399,36 @@
 	return (error);
 }
 
+int
+zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
+{
+	zfsdev_state_t *zs, *fpd;
+
+	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
+
+	fpd = zfs_file_private(fp);
+	if (fpd == NULL)
+		return (SET_ERROR(EBADF));
+
+	mutex_enter(&zfsdev_state_lock);
+
+	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+
+		if (zs->zs_minor == -1)
+			continue;
+
+		if (fpd == zs) {
+			*minorp = fpd->zs_minor;
+			mutex_exit(&zfsdev_state_lock);
+			return (0);
+		}
+	}
+
+	mutex_exit(&zfsdev_state_lock);
+
+	return (SET_ERROR(EBADF));
+}
+
 static void *
 zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
 {
@@ -6925,7 +7436,7 @@
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 		if (zs->zs_minor == minor) {
-			smp_rmb();
+			membar_consumer();
 			switch (which) {
 			case ZST_ONEXIT:
 				return (zs->zs_onexit);
@@ -6950,37 +7461,6 @@
 	return (ptr);
 }
 
-int
-zfsdev_getminor(struct file *filp, minor_t *minorp)
-{
-	zfsdev_state_t *zs, *fpd;
-
-	ASSERT(filp != NULL);
-	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
-
-	fpd = filp->private_data;
-	if (fpd == NULL)
-		return (SET_ERROR(EBADF));
-
-	mutex_enter(&zfsdev_state_lock);
-
-	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-
-		if (zs->zs_minor == -1)
-			continue;
-
-		if (fpd == zs) {
-			*minorp = fpd->zs_minor;
-			mutex_exit(&zfsdev_state_lock);
-			return (0);
-		}
-	}
-
-	mutex_exit(&zfsdev_state_lock);
-
-	return (SET_ERROR(EBADF));
-}
-
 /*
  * Find a free minor number.  The zfsdev_state_list is expected to
  * be short since it is only a list of currently open file handles.
@@ -7005,111 +7485,24 @@
 	return (0);
 }
 
-static int
-zfsdev_state_init(struct file *filp)
+long
+zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
 {
-	zfsdev_state_t *zs, *zsprev = NULL;
-	minor_t minor;
-	boolean_t newzs = B_FALSE;
-
-	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-
-	minor = zfsdev_minor_alloc();
-	if (minor == 0)
-		return (SET_ERROR(ENXIO));
-
-	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-		if (zs->zs_minor == -1)
-			break;
-		zsprev = zs;
-	}
-
-	if (!zs) {
-		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
-		newzs = B_TRUE;
-	}
-
-	zs->zs_file = filp;
-	filp->private_data = zs;
-
-	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
-	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
-
-
-	/*
-	 * In order to provide for lock-free concurrent read access
-	 * to the minor list in zfsdev_get_state_impl(), new entries
-	 * must be completely written before linking them into the
-	 * list whereas existing entries are already linked; the last
-	 * operation must be updating zs_minor (from -1 to the new
-	 * value).
-	 */
-	if (newzs) {
-		zs->zs_minor = minor;
-		smp_wmb();
-		zsprev->zs_next = zs;
-	} else {
-		smp_wmb();
-		zs->zs_minor = minor;
-	}
-
-	return (0);
-}
-
-static int
-zfsdev_state_destroy(struct file *filp)
-{
-	zfsdev_state_t *zs;
-
-	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
-	ASSERT(filp->private_data != NULL);
-
-	zs = filp->private_data;
-	zs->zs_minor = -1;
-	zfs_onexit_destroy(zs->zs_onexit);
-	zfs_zevent_destroy(zs->zs_zevent);
-
-	return (0);
-}
-
-static int
-zfsdev_open(struct inode *ino, struct file *filp)
-{
-	int error;
-
-	mutex_enter(&zfsdev_state_lock);
-	error = zfsdev_state_init(filp);
-	mutex_exit(&zfsdev_state_lock);
-
-	return (-error);
-}
-
-static int
-zfsdev_release(struct inode *ino, struct file *filp)
-{
-	int error;
-
-	mutex_enter(&zfsdev_state_lock);
-	error = zfsdev_state_destroy(filp);
-	mutex_exit(&zfsdev_state_lock);
-
-	return (-error);
-}
-
-static long
-zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
-{
-	zfs_cmd_t *zc;
-	uint_t vecnum;
-	int error, rc, flag = 0;
+	int error, cmd;
 	const zfs_ioc_vec_t *vec;
 	char *saved_poolname = NULL;
+	uint64_t max_nvlist_src_size;
+	size_t saved_poolname_len = 0;
 	nvlist_t *innvl = NULL;
 	fstrans_cookie_t cookie;
+	hrtime_t start_time = gethrtime();
 
-	vecnum = cmd - ZFS_IOC_FIRST;
+	KTRACE();
+	cmd = vecnum;
+	error = 0;
 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
-		return (-SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
+		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
+
 	vec = &zfs_ioc_vec[vecnum];
 
 	/*
@@ -7117,19 +7510,11 @@
 	 * a normal or legacy handler are registered.
 	 */
 	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
-		return (-SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
-
-	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
-
-	error = ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t),
-	    flag);
-	if (error != 0) {
-		error = SET_ERROR(EFAULT);
-		goto out;
-	}
+		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 
 	zc->zc_iflags = flag & FKIOCTL;
-	if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
+	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
+	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
 		/*
 		 * Make sure the user doesn't pass in an insane value for
 		 * zc_nvlist_src_size.  We have to check, since we will end
@@ -7174,10 +7559,18 @@
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
+	case ENTITY_NAME:
+		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
+			error = SET_ERROR(EINVAL);
+		} else {
+			error = pool_status_check(zc->zc_name,
+			    vec->zvec_namecheck, vec->zvec_pool_check);
+		}
+		break;
+
 	case NO_NAME:
 		break;
 	}
-
 	/*
 	 * Ensure that all input pairs are valid before we pass them down
 	 * to the lower layers.
@@ -7202,13 +7595,15 @@
 		goto out;
 
 	/* legacy ioctls can modify zc_name */
-	saved_poolname = strdup(zc->zc_name);
-	if (saved_poolname == NULL) {
-		error = SET_ERROR(ENOMEM);
-		goto out;
-	} else {
-		saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
-	}
+	/*
+	 * Can't use kmem_strdup() as we might truncate the string and
+	 * kmem_strfree() would then free with incorrect size.
+	 */
+	saved_poolname_len = strlen(zc->zc_name) + 1;
+	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
+
+	strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
+	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
 
 	if (vec->zvec_func != NULL) {
 		nvlist_t *outnvl;
@@ -7247,13 +7642,21 @@
 		    vec->zvec_allow_log &&
 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
 			if (!nvlist_empty(outnvl)) {
-				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
-				    outnvl);
+				size_t out_size = fnvlist_size(outnvl);
+				if (out_size > zfs_history_output_max) {
+					fnvlist_add_int64(lognv,
+					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
+				} else {
+					fnvlist_add_nvlist(lognv,
+					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
+				}
 			}
 			if (error != 0) {
 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
 				    error);
 			}
+			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
+			    gethrtime() - start_time);
 			(void) spa_history_log_nvl(spa, lognv);
 			spa_close(spa, FTAG);
 		}
@@ -7281,160 +7684,71 @@
 
 out:
 	nvlist_free(innvl);
-	rc = ddi_copyout(zc, (void *)(uintptr_t)arg, sizeof (zfs_cmd_t), flag);
-	if (error == 0 && rc != 0)
-		error = SET_ERROR(EFAULT);
 	if (error == 0 && vec->zvec_allow_log) {
 		char *s = tsd_get(zfs_allow_log_key);
 		if (s != NULL)
-			strfree(s);
-		(void) tsd_set(zfs_allow_log_key, saved_poolname);
-	} else {
-		if (saved_poolname != NULL)
-			strfree(saved_poolname);
+			kmem_strfree(s);
+		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
 	}
+	if (saved_poolname != NULL)
+		kmem_free(saved_poolname, saved_poolname_len);
 
-	kmem_free(zc, sizeof (zfs_cmd_t));
-	return (-error);
+	return (error);
 }
 
-#ifdef CONFIG_COMPAT
-static long
-zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
-{
-	return (zfsdev_ioctl(filp, cmd, arg));
-}
-#else
-#define	zfsdev_compat_ioctl	NULL
-#endif
-
-static const struct file_operations zfsdev_fops = {
-	.open		= zfsdev_open,
-	.release	= zfsdev_release,
-	.unlocked_ioctl	= zfsdev_ioctl,
-	.compat_ioctl	= zfsdev_compat_ioctl,
-	.owner		= THIS_MODULE,
-};
-
-static struct miscdevice zfs_misc = {
-	.minor		= ZFS_DEVICE_MINOR,
-	.name		= ZFS_DRIVER,
-	.fops		= &zfsdev_fops,
-};
-
-MODULE_ALIAS_MISCDEV(ZFS_DEVICE_MINOR);
-MODULE_ALIAS("devname:zfs");
-
-static int
-zfs_attach(void)
+int
+zfs_kmod_init(void)
 {
 	int error;
+	KTRACE();
+
+	if ((error = zvol_init()) != 0)
+		return (error);
+
+	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
+	zfs_init();
+
+	zfs_ioctl_init();
 
 	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 	zfsdev_state_list->zs_minor = -1;
 
-	error = misc_register(&zfs_misc);
-	if (error == -EBUSY) {
-		/*
-		 * Fallback to dynamic minor allocation in the event of a
-		 * collision with a reserved minor in linux/miscdevice.h.
-		 * In this case the kernel modules must be manually loaded.
-		 */
-		printk(KERN_INFO "ZFS: misc_register() with static minor %d "
-		    "failed %d, retrying with MISC_DYNAMIC_MINOR\n",
-		    ZFS_DEVICE_MINOR, error);
-
-		zfs_misc.minor = MISC_DYNAMIC_MINOR;
-		error = misc_register(&zfs_misc);
-	}
-
-	if (error)
-		printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
-
-	return (error);
-}
-
-static void
-zfs_detach(void)
-{
-	zfsdev_state_t *zs, *zsprev = NULL;
-
-	misc_deregister(&zfs_misc);
-	mutex_destroy(&zfsdev_state_lock);
-
-	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
-		if (zsprev)
-			kmem_free(zsprev, sizeof (zfsdev_state_t));
-		zsprev = zs;
-	}
-	if (zsprev)
-		kmem_free(zsprev, sizeof (zfsdev_state_t));
-}
-
-static void
-zfs_allow_log_destroy(void *arg)
-{
-	char *poolname = arg;
-
-	if (poolname != NULL)
-		strfree(poolname);
-}
-
-#ifdef DEBUG
-#define	ZFS_DEBUG_STR	" (DEBUG mode)"
-#else
-#define	ZFS_DEBUG_STR	""
-#endif
-
-static int __init
-_init(void)
-{
-	int error;
-
-	if ((error = -zvol_init()) != 0)
-		return (error);
-
-	spa_init(FREAD | FWRITE);
-	zfs_init();
-
-	zfs_ioctl_init();
-	zfs_sysfs_init();
-
-	if ((error = zfs_attach()) != 0)
+	if ((error = zfsdev_attach()) != 0)
 		goto out;
 
 	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
-	printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
-	    "ZFS pool version %s, ZFS filesystem version %s\n",
-	    ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
-	    SPA_VERSION_STRING, ZPL_VERSION_STRING);
-#ifndef CONFIG_FS_POSIX_ACL
-	printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
-#endif /* CONFIG_FS_POSIX_ACL */
-
 	return (0);
-
 out:
-	zfs_sysfs_fini();
 	zfs_fini();
 	spa_fini();
-	(void) zvol_fini();
-	printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
-	    ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
-	    ZFS_DEBUG_STR, error);
+	zvol_fini();
 
 	return (error);
 }
 
-static void __exit
-_fini(void)
+void
+zfs_kmod_fini(void)
 {
-	zfs_detach();
-	zfs_sysfs_fini();
+	zfsdev_state_t *zs, *zsnext = NULL;
+
+	zfsdev_detach();
+
+	mutex_destroy(&zfsdev_state_lock);
+
+	for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
+		zsnext = zs->zs_next;
+		if (zs->zs_onexit)
+			zfs_onexit_destroy(zs->zs_onexit);
+		if (zs->zs_zevent)
+			zfs_zevent_destroy(zs->zs_zevent);
+		kmem_free(zs, sizeof (zfsdev_state_t));
+	}
+
+	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
 	zfs_fini();
 	spa_fini();
 	zvol_fini();
@@ -7442,17 +7756,12 @@
 	tsd_destroy(&zfs_fsyncer_key);
 	tsd_destroy(&rrw_tsd_key);
 	tsd_destroy(&zfs_allow_log_key);
-
-	printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
-	    ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
 }
 
-#if defined(_KERNEL)
-module_init(_init);
-module_exit(_fini);
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
+    "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
 
-MODULE_DESCRIPTION("ZFS");
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
-#endif
+ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
+    "Maximum size in bytes of ZFS ioctl output that will be logged");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/zfs_log.c b/zfs/module/zfs/zfs_log.c
index 41b663b..9e52bed 100644
--- a/zfs/module/zfs/zfs_log.c
+++ b/zfs/module/zfs/zfs_log.c

@@ -39,7 +39,6 @@
 #include <sys/byteorder.h>
 #include <sys/policy.h>
 #include <sys/stat.h>
-#include <sys/mode.h>
 #include <sys/acl.h>
 #include <sys/dmu.h>
 #include <sys/dbuf.h>
@@ -109,84 +108,81 @@
 static void
 zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
 {
-	uint32_t	*bitmap;
-	uint64_t	*attrs;
-	uint64_t	*crtime;
-	xoptattr_t	*xoap;
-	void		*scanstamp;
-	int		i;
+	xoptattr_t *xoap;
 
 	xoap = xva_getxoptattr(xvap);
 	ASSERT(xoap);
 
 	lrattr->lr_attr_masksize = xvap->xva_mapsize;
-	bitmap = &lrattr->lr_attr_bitmap;
-	for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
+	uint32_t *bitmap = &lrattr->lr_attr_bitmap;
+	for (int i = 0; i != xvap->xva_mapsize; i++, bitmap++)
 		*bitmap = xvap->xva_reqattrmap[i];
-	}
 
-	/* Now pack the attributes up in a single uint64_t */
-	attrs = (uint64_t *)bitmap;
-	crtime = attrs + 1;
-	scanstamp = (caddr_t)(crtime + 2);
-	*attrs = 0;
+	lr_attr_end_t *end = (lr_attr_end_t *)bitmap;
+	end->lr_attr_attrs = 0;
+	end->lr_attr_crtime[0] = 0;
+	end->lr_attr_crtime[1] = 0;
+	memset(end->lr_attr_scanstamp, 0, AV_SCANSTAMP_SZ);
+
 	if (XVA_ISSET_REQ(xvap, XAT_READONLY))
-		*attrs |= (xoap->xoa_readonly == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_readonly == 0) ? 0 :
 		    XAT0_READONLY;
 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
-		*attrs |= (xoap->xoa_hidden == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_hidden == 0) ? 0 :
 		    XAT0_HIDDEN;
 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
-		*attrs |= (xoap->xoa_system == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_system == 0) ? 0 :
 		    XAT0_SYSTEM;
 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
-		*attrs |= (xoap->xoa_archive == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_archive == 0) ? 0 :
 		    XAT0_ARCHIVE;
 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
-		*attrs |= (xoap->xoa_immutable == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_immutable == 0) ? 0 :
 		    XAT0_IMMUTABLE;
 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
-		*attrs |= (xoap->xoa_nounlink == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_nounlink == 0) ? 0 :
 		    XAT0_NOUNLINK;
 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
-		*attrs |= (xoap->xoa_appendonly == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_appendonly == 0) ? 0 :
 		    XAT0_APPENDONLY;
 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
-		*attrs |= (xoap->xoa_opaque == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_opaque == 0) ? 0 :
 		    XAT0_APPENDONLY;
 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
-		*attrs |= (xoap->xoa_nodump == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_nodump == 0) ? 0 :
 		    XAT0_NODUMP;
 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
-		*attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
 		    XAT0_AV_QUARANTINED;
 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
-		*attrs |= (xoap->xoa_av_modified == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_av_modified == 0) ? 0 :
 		    XAT0_AV_MODIFIED;
 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
-		ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
+		ZFS_TIME_ENCODE(&xoap->xoa_createtime, end->lr_attr_crtime);
 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
 		ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID));
 
-		bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
+		memcpy(end->lr_attr_scanstamp, xoap->xoa_av_scanstamp,
+		    AV_SCANSTAMP_SZ);
 	} else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 		/*
 		 * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid
 		 * at the same time, so we can share the same space.
 		 */
-		bcopy(&xoap->xoa_projid, scanstamp, sizeof (uint64_t));
+		memcpy(end->lr_attr_scanstamp, &xoap->xoa_projid,
+		    sizeof (uint64_t));
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
-		*attrs |= (xoap->xoa_reparse == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_reparse == 0) ? 0 :
 		    XAT0_REPARSE;
 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
-		*attrs |= (xoap->xoa_offline == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_offline == 0) ? 0 :
 		    XAT0_OFFLINE;
 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
-		*attrs |= (xoap->xoa_sparse == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_sparse == 0) ? 0 :
 		    XAT0_SPARSE;
 	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT))
-		*attrs |= (xoap->xoa_projinherit == 0) ? 0 :
+		end->lr_attr_attrs |= (xoap->xoa_projinherit == 0) ? 0 :
 		    XAT0_PROJINHERIT;
 }
 
@@ -232,7 +228,33 @@
 {
 	int unlinked = 0;
 	znode_t *dzp;
-	igrab(ZTOI(zp));
+#ifdef __FreeBSD__
+	znode_t *tzp = zp;
+
+	/*
+	 * zrele drops the vnode lock which violates the VOP locking contract
+	 * on FreeBSD. See comment at the top of zfs_replay.c for more detail.
+	 */
+	/*
+	 * if zp is XATTR node, keep walking up via z_xattr_parent until we
+	 * get the owner
+	 */
+	while (tzp->z_pflags & ZFS_XATTR) {
+		ASSERT3U(zp->z_xattr_parent, !=, 0);
+		if (zfs_zget(ZTOZSB(tzp), tzp->z_xattr_parent, &dzp) != 0) {
+			unlinked = 1;
+			break;
+		}
+
+		if (tzp != zp)
+			zrele(tzp);
+		tzp = dzp;
+		unlinked = tzp->z_unlinked;
+	}
+	if (tzp != zp)
+		zrele(tzp);
+#else
+	zhold(zp);
 	/*
 	 * if zp is XATTR node, keep walking up via z_xattr_parent until we
 	 * get the owner
@@ -243,11 +265,13 @@
 			unlinked = 1;
 			break;
 		}
-		iput(ZTOI(zp));
+
+		zrele(zp);
 		zp = dzp;
 		unlinked = zp->z_unlinked;
 	}
-	iput(ZTOI(zp));
+	zrele(zp);
+#endif
 	return (unlinked);
 }
 
@@ -272,7 +296,7 @@
  */
 void
 zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
+    znode_t *dzp, znode_t *zp, const char *name, vsecattr_t *vsecp,
     zfs_fuid_info_t *fuidp, vattr_t *vap)
 {
 	itx_t *itx;
@@ -322,13 +346,13 @@
 	/* Store dnode slot count in 8 bits above object id. */
 	LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
 	lr->lr_mode = zp->z_mode;
-	if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOI(zp)->i_uid))) {
-		lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOI(zp)->i_uid);
+	if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOUID(zp)))) {
+		lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOUID(zp));
 	} else {
 		lr->lr_uid = fuidp->z_fuid_owner;
 	}
-	if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOI(zp)->i_gid))) {
-		lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOI(zp)->i_gid);
+	if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOGID(zp)))) {
+		lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOGID(zp));
 	} else {
 		lr->lr_gid = fuidp->z_fuid_group;
 	}
@@ -381,14 +405,12 @@
 	zil_itx_assign(zilog, itx, tx);
 }
 
-void zil_remove_async(zilog_t *zilog, uint64_t oid);
-
 /*
  * Handles both TX_REMOVE and TX_RMDIR transactions.
  */
 void
 zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked)
+    znode_t *dzp, const char *name, uint64_t foid, boolean_t unlinked)
 {
 	itx_t *itx;
 	lr_remove_t *lr;
@@ -423,7 +445,7 @@
  */
 void
 zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, znode_t *zp, char *name)
+    znode_t *dzp, znode_t *zp, const char *name)
 {
 	itx_t *itx;
 	lr_link_t *lr;
@@ -446,7 +468,7 @@
  */
 void
 zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *dzp, znode_t *zp, char *name, char *link)
+    znode_t *dzp, znode_t *zp, const char *name, const char *link)
 {
 	itx_t *itx;
 	lr_create_t *lr;
@@ -460,8 +482,8 @@
 	lr = (lr_create_t *)&itx->itx_lr;
 	lr->lr_doid = dzp->z_id;
 	lr->lr_foid = zp->z_id;
-	lr->lr_uid = KUID_TO_SUID(ZTOI(zp)->i_uid);
-	lr->lr_gid = KGID_TO_SGID(ZTOI(zp)->i_gid);
+	lr->lr_uid = KUID_TO_SUID(ZTOUID(zp));
+	lr->lr_gid = KGID_TO_SGID(ZTOGID(zp));
 	lr->lr_mode = zp->z_mode;
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
 	    sizeof (uint64_t));
@@ -477,8 +499,8 @@
  * Handles TX_RENAME transactions.
  */
 void
-zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
-    znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
+zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp,
+    const char *sname, znode_t *tdzp, const char *dname, znode_t *szp)
 {
 	itx_t *itx;
 	lr_rename_t *lr;
@@ -515,6 +537,8 @@
 	uint32_t blocksize = zp->z_blksz;
 	itx_wr_state_t write_state;
 	uintptr_t fsync_cnt;
+	uint64_t gen = 0;
+	ssize_t size = resid;
 
 	if (zil_replaying(zilog, tx) || zp->z_unlinked ||
 	    zfs_xattr_owner_unlinked(zp)) {
@@ -528,7 +552,7 @@
 	else if (!spa_has_slogs(zilog->zl_spa) &&
 	    resid >= zfs_immediate_write_sz)
 		write_state = WR_INDIRECT;
-	else if (ioflag & (FSYNC | FDSYNC))
+	else if (ioflag & (O_SYNC | O_DSYNC))
 		write_state = WR_COPIED;
 	else
 		write_state = WR_NEED_COPY;
@@ -537,6 +561,9 @@
 		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
 	}
 
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
+	    sizeof (gen));
+
 	while (resid) {
 		itx_t *itx;
 		lr_write_t *lr;
@@ -559,15 +586,22 @@
 		    (wr_state == WR_COPIED ? len : 0));
 		lr = (lr_write_t *)&itx->itx_lr;
 
-		DB_DNODE_ENTER(db);
-		if (wr_state == WR_COPIED && dmu_read_by_dnode(DB_DNODE(db),
-		    off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
-			zil_itx_destroy(itx);
-			itx = zil_itx_create(txtype, sizeof (*lr));
-			lr = (lr_write_t *)&itx->itx_lr;
-			wr_state = WR_NEED_COPY;
+		/*
+		 * For WR_COPIED records, copy the data into the lr_write_t.
+		 */
+		if (wr_state == WR_COPIED) {
+			int err;
+			DB_DNODE_ENTER(db);
+			err = dmu_read_by_dnode(DB_DNODE(db), off, len, lr + 1,
+			    DMU_READ_NO_PREFETCH);
+			if (err != 0) {
+				zil_itx_destroy(itx);
+				itx = zil_itx_create(txtype, sizeof (*lr));
+				lr = (lr_write_t *)&itx->itx_lr;
+				wr_state = WR_NEED_COPY;
+			}
+			DB_DNODE_EXIT(db);
 		}
-		DB_DNODE_EXIT(db);
 
 		itx->itx_wr_state = wr_state;
 		lr->lr_foid = zp->z_id;
@@ -577,8 +611,9 @@
 		BP_ZERO(&lr->lr_blkptr);
 
 		itx->itx_private = ZTOZSB(zp);
+		itx->itx_gen = gen;
 
-		if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
+		if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
 		    (fsync_cnt == 0))
 			itx->itx_sync = B_FALSE;
 
@@ -589,6 +624,10 @@
 		off += len;
 		resid -= len;
 	}
+
+	if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+		dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
+	}
 }
 
 /*
@@ -743,7 +782,7 @@
 	zil_itx_assign(zilog, itx, tx);
 }
 
-#if defined(_KERNEL)
-module_param(zfs_immediate_write_sz, long, 0644);
-MODULE_PARM_DESC(zfs_immediate_write_sz, "Largest data block to write to zil");
-#endif
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, LONG, ZMOD_RW,
+	"Largest data block to write to zil");
+/* END CSTYLED */

diff --git a/zfs/module/zfs/zfs_onexit.c b/zfs/module/zfs/zfs_onexit.c
index 31f77ce..7c56dd9 100644
--- a/zfs/module/zfs/zfs_onexit.c
+++ b/zfs/module/zfs/zfs_onexit.c

@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2020 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -101,6 +101,41 @@
 	kmem_free(zo, sizeof (zfs_onexit_t));
 }
 
+/*
+ * Consumers might need to operate by minor number instead of fd, since
+ * they might be running in another thread (e.g. txg_sync_thread). Callers
+ * of this function must call zfs_onexit_fd_rele() when they're finished
+ * using the minor number.
+ */
+zfs_file_t *
+zfs_onexit_fd_hold(int fd, minor_t *minorp)
+{
+	zfs_onexit_t *zo = NULL;
+
+	zfs_file_t *fp = zfs_file_get(fd);
+	if (fp == NULL)
+		return (NULL);
+
+	int error = zfsdev_getminor(fp, minorp);
+	if (error) {
+		zfs_onexit_fd_rele(fp);
+		return (NULL);
+	}
+
+	zo = zfsdev_get_state(*minorp, ZST_ONEXIT);
+	if (zo == NULL) {
+		zfs_onexit_fd_rele(fp);
+		return (NULL);
+	}
+	return (fp);
+}
+
+void
+zfs_onexit_fd_rele(zfs_file_t *fp)
+{
+	zfs_file_put(fp);
+}
+
 static int
 zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
 {
@@ -112,39 +147,6 @@
 }
 
 /*
- * Consumers might need to operate by minor number instead of fd, since
- * they might be running in another thread (e.g. txg_sync_thread). Callers
- * of this function must call zfs_onexit_fd_rele() when they're finished
- * using the minor number.
- */
-int
-zfs_onexit_fd_hold(int fd, minor_t *minorp)
-{
-	file_t *fp;
-	zfs_onexit_t *zo;
-	int error;
-
-	fp = getf(fd);
-	if (fp == NULL)
-		return (SET_ERROR(EBADF));
-
-	error = zfsdev_getminor(fp->f_file, minorp);
-	if (error == 0)
-		error = zfs_onexit_minor_to_state(*minorp, &zo);
-
-	if (error)
-		zfs_onexit_fd_rele(fd);
-
-	return (error);
-}
-
-void
-zfs_onexit_fd_rele(int fd)
-{
-	releasef(fd);
-}
-
-/*
  * Add a callback to be invoked when the calling process exits.
  */
 int
@@ -172,80 +174,3 @@
 
 	return (0);
 }
-
-static zfs_onexit_action_node_t *
-zfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle)
-{
-	zfs_onexit_action_node_t *match;
-	zfs_onexit_action_node_t *ap;
-	list_t *l;
-
-	ASSERT(MUTEX_HELD(&zo->zo_lock));
-
-	match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle;
-	l = &zo->zo_actions;
-	for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) {
-		if (match == ap)
-			break;
-	}
-	return (ap);
-}
-
-/*
- * Delete the callback, triggering it first if 'fire' is set.
- */
-int
-zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
-{
-	zfs_onexit_t *zo;
-	zfs_onexit_action_node_t *ap;
-	int error;
-
-	error = zfs_onexit_minor_to_state(minor, &zo);
-	if (error)
-		return (error);
-
-	mutex_enter(&zo->zo_lock);
-	ap = zfs_onexit_find_cb(zo, action_handle);
-	if (ap != NULL) {
-		list_remove(&zo->zo_actions, ap);
-		mutex_exit(&zo->zo_lock);
-		if (fire)
-			ap->za_func(ap->za_data);
-		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
-	} else {
-		mutex_exit(&zo->zo_lock);
-		error = SET_ERROR(ENOENT);
-	}
-
-	return (error);
-}
-
-/*
- * Return the data associated with this callback.  This allows consumers
- * of the cleanup-on-exit interfaces to stash kernel data across system
- * calls, knowing that it will be cleaned up if the calling process exits.
- */
-int
-zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
-{
-	zfs_onexit_t *zo;
-	zfs_onexit_action_node_t *ap;
-	int error;
-
-	*data = NULL;
-
-	error = zfs_onexit_minor_to_state(minor, &zo);
-	if (error)
-		return (error);
-
-	mutex_enter(&zo->zo_lock);
-	ap = zfs_onexit_find_cb(zo, action_handle);
-	if (ap != NULL)
-		*data = ap->za_data;
-	else
-		error = SET_ERROR(ENOENT);
-	mutex_exit(&zo->zo_lock);
-
-	return (error);
-}

diff --git a/zfs/module/zfs/zfs_quota.c b/zfs/module/zfs/zfs_quota.c
new file mode 100644
index 0000000..e61db5c
--- /dev/null
+++ b/zfs/module/zfs/zfs_quota.c

@@ -0,0 +1,476 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
+ * All rights reserved.
+ * Copyright (c) 2012, 2015, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/* Portions Copyright 2010 Robert Milkowski */
+
+#include <sys/avl.h>
+#include <sys/dmu_objset.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
+#include <sys/zap.h>
+#include <sys/zfs_project.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_znode.h>
+
+int
+zpl_get_file_info(dmu_object_type_t bonustype, const void *data,
+    zfs_file_info_t *zoi)
+{
+	/*
+	 * Is it a valid type of object to track?
+	 */
+	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
+		return (SET_ERROR(ENOENT));
+
+	zoi->zfi_project = ZFS_DEFAULT_PROJID;
+
+	/*
+	 * If we have a NULL data pointer
+	 * then assume the id's aren't changing and
+	 * return EEXIST to the dmu to let it know to
+	 * use the same ids
+	 */
+	if (data == NULL)
+		return (SET_ERROR(EEXIST));
+
+	if (bonustype == DMU_OT_ZNODE) {
+		const znode_phys_t *znp = data;
+		zoi->zfi_user = znp->zp_uid;
+		zoi->zfi_group = znp->zp_gid;
+		zoi->zfi_generation = znp->zp_gen;
+		return (0);
+	}
+
+	const sa_hdr_phys_t *sap = data;
+	if (sap->sa_magic == 0) {
+		/*
+		 * This should only happen for newly created files
+		 * that haven't had the znode data filled in yet.
+		 */
+		zoi->zfi_user = 0;
+		zoi->zfi_group = 0;
+		zoi->zfi_generation = 0;
+		return (0);
+	}
+
+	sa_hdr_phys_t sa = *sap;
+	boolean_t swap = B_FALSE;
+	if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
+		sa.sa_magic = SA_MAGIC;
+		sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
+		swap = B_TRUE;
+	}
+	VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
+
+	int hdrsize = sa_hdrsize(&sa);
+	VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
+
+	uintptr_t data_after_hdr = (uintptr_t)data + hdrsize;
+	zoi->zfi_user = *((uint64_t *)(data_after_hdr + SA_UID_OFFSET));
+	zoi->zfi_group = *((uint64_t *)(data_after_hdr + SA_GID_OFFSET));
+	zoi->zfi_generation = *((uint64_t *)(data_after_hdr + SA_GEN_OFFSET));
+	uint64_t flags = *((uint64_t *)(data_after_hdr + SA_FLAGS_OFFSET));
+	if (swap)
+		flags = BSWAP_64(flags);
+
+	if (flags & ZFS_PROJID) {
+		zoi->zfi_project =
+		    *((uint64_t *)(data_after_hdr + SA_PROJID_OFFSET));
+	}
+
+	if (swap) {
+		zoi->zfi_user = BSWAP_64(zoi->zfi_user);
+		zoi->zfi_group = BSWAP_64(zoi->zfi_group);
+		zoi->zfi_project = BSWAP_64(zoi->zfi_project);
+		zoi->zfi_generation = BSWAP_64(zoi->zfi_generation);
+	}
+	return (0);
+}
+
+static void
+fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
+    char *domainbuf, int buflen, uid_t *ridp)
+{
+	uint64_t fuid;
+	const char *domain;
+
+	fuid = zfs_strtonum(fuidstr, NULL);
+
+	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
+	if (domain)
+		(void) strlcpy(domainbuf, domain, buflen);
+	else
+		domainbuf[0] = '\0';
+	*ridp = FUID_RID(fuid);
+}
+
+static uint64_t
+zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
+{
+	switch (type) {
+	case ZFS_PROP_USERUSED:
+	case ZFS_PROP_USEROBJUSED:
+		return (DMU_USERUSED_OBJECT);
+	case ZFS_PROP_GROUPUSED:
+	case ZFS_PROP_GROUPOBJUSED:
+		return (DMU_GROUPUSED_OBJECT);
+	case ZFS_PROP_PROJECTUSED:
+	case ZFS_PROP_PROJECTOBJUSED:
+		return (DMU_PROJECTUSED_OBJECT);
+	case ZFS_PROP_USERQUOTA:
+		return (zfsvfs->z_userquota_obj);
+	case ZFS_PROP_GROUPQUOTA:
+		return (zfsvfs->z_groupquota_obj);
+	case ZFS_PROP_USEROBJQUOTA:
+		return (zfsvfs->z_userobjquota_obj);
+	case ZFS_PROP_GROUPOBJQUOTA:
+		return (zfsvfs->z_groupobjquota_obj);
+	case ZFS_PROP_PROJECTQUOTA:
+		return (zfsvfs->z_projectquota_obj);
+	case ZFS_PROP_PROJECTOBJQUOTA:
+		return (zfsvfs->z_projectobjquota_obj);
+	default:
+		return (ZFS_NO_OBJECT);
+	}
+}
+
+int
+zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
+{
+	int error;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	zfs_useracct_t *buf = vbuf;
+	uint64_t obj;
+	int offset = 0;
+
+	if (!dmu_objset_userspace_present(zfsvfs->z_os))
+		return (SET_ERROR(ENOTSUP));
+
+	if ((type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
+	    type == ZFS_PROP_PROJECTOBJQUOTA ||
+	    type == ZFS_PROP_PROJECTOBJUSED) &&
+	    !dmu_objset_projectquota_present(zfsvfs->z_os))
+		return (SET_ERROR(ENOTSUP));
+
+	if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
+	    type == ZFS_PROP_PROJECTOBJUSED ||
+	    type == ZFS_PROP_PROJECTOBJQUOTA) &&
+	    !dmu_objset_userobjspace_present(zfsvfs->z_os))
+		return (SET_ERROR(ENOTSUP));
+
+	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
+	if (obj == ZFS_NO_OBJECT) {
+		*bufsizep = 0;
+		return (0);
+	}
+
+	if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+	    type == ZFS_PROP_PROJECTOBJUSED)
+		offset = DMU_OBJACCT_PREFIX_LEN;
+
+	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
+	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
+	    zap_cursor_advance(&zc)) {
+		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
+		    *bufsizep)
+			break;
+
+		/*
+		 * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX)
+		 * when dealing with block quota and vice versa.
+		 */
+		if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX,
+		    DMU_OBJACCT_PREFIX_LEN) == 0))
+			continue;
+
+		fuidstr_to_sid(zfsvfs, za.za_name + offset,
+		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
+
+		buf->zu_space = za.za_first_integer;
+		buf++;
+	}
+	if (error == ENOENT)
+		error = 0;
+
+	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
+	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
+	*cookiep = zap_cursor_serialize(&zc);
+	zap_cursor_fini(&zc);
+	return (error);
+}
+
+int
+zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t *valp)
+{
+	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
+	int offset = 0;
+	int err;
+	uint64_t obj;
+
+	*valp = 0;
+
+	if (!dmu_objset_userspace_present(zfsvfs->z_os))
+		return (SET_ERROR(ENOTSUP));
+
+	if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
+	    type == ZFS_PROP_PROJECTOBJUSED ||
+	    type == ZFS_PROP_PROJECTOBJQUOTA) &&
+	    !dmu_objset_userobjspace_present(zfsvfs->z_os))
+		return (SET_ERROR(ENOTSUP));
+
+	if (type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
+	    type == ZFS_PROP_PROJECTOBJQUOTA ||
+	    type == ZFS_PROP_PROJECTOBJUSED) {
+		if (!dmu_objset_projectquota_present(zfsvfs->z_os))
+			return (SET_ERROR(ENOTSUP));
+		if (!zpl_is_valid_projid(rid))
+			return (SET_ERROR(EINVAL));
+	}
+
+	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
+	if (obj == ZFS_NO_OBJECT)
+		return (0);
+
+	if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+	    type == ZFS_PROP_PROJECTOBJUSED) {
+		strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
+		offset = DMU_OBJACCT_PREFIX_LEN;
+	}
+
+	err = zfs_id_to_fuidstr(zfsvfs, domain, rid, buf + offset,
+	    sizeof (buf) - offset, B_FALSE);
+	if (err)
+		return (err);
+
+	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
+	if (err == ENOENT)
+		err = 0;
+	return (err);
+}
+
+int
+zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t quota)
+{
+	char buf[32];
+	int err;
+	dmu_tx_t *tx;
+	uint64_t *objp;
+	boolean_t fuid_dirtied;
+
+	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
+		return (SET_ERROR(ENOTSUP));
+
+	switch (type) {
+	case ZFS_PROP_USERQUOTA:
+		objp = &zfsvfs->z_userquota_obj;
+		break;
+	case ZFS_PROP_GROUPQUOTA:
+		objp = &zfsvfs->z_groupquota_obj;
+		break;
+	case ZFS_PROP_USEROBJQUOTA:
+		objp = &zfsvfs->z_userobjquota_obj;
+		break;
+	case ZFS_PROP_GROUPOBJQUOTA:
+		objp = &zfsvfs->z_groupobjquota_obj;
+		break;
+	case ZFS_PROP_PROJECTQUOTA:
+		if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
+			return (SET_ERROR(ENOTSUP));
+		if (!zpl_is_valid_projid(rid))
+			return (SET_ERROR(EINVAL));
+
+		objp = &zfsvfs->z_projectquota_obj;
+		break;
+	case ZFS_PROP_PROJECTOBJQUOTA:
+		if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
+			return (SET_ERROR(ENOTSUP));
+		if (!zpl_is_valid_projid(rid))
+			return (SET_ERROR(EINVAL));
+
+		objp = &zfsvfs->z_projectobjquota_obj;
+		break;
+	default:
+		return (SET_ERROR(EINVAL));
+	}
+
+	err = zfs_id_to_fuidstr(zfsvfs, domain, rid, buf, sizeof (buf), B_TRUE);
+	if (err)
+		return (err);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
+	if (*objp == 0) {
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+		    zfs_userquota_prop_prefixes[type]);
+	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err) {
+		dmu_tx_abort(tx);
+		return (err);
+	}
+
+	mutex_enter(&zfsvfs->z_lock);
+	if (*objp == 0) {
+		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
+		    DMU_OT_NONE, 0, tx);
+		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
+		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
+	}
+	mutex_exit(&zfsvfs->z_lock);
+
+	if (quota == 0) {
+		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
+		if (err == ENOENT)
+			err = 0;
+	} else {
+		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
+	}
+	ASSERT(err == 0);
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+	dmu_tx_commit(tx);
+	return (err);
+}
+
+boolean_t
+zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
+{
+	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
+	uint64_t used, quota, quotaobj;
+	int err;
+
+	if (!dmu_objset_userobjspace_present(zfsvfs->z_os)) {
+		if (dmu_objset_userobjspace_upgradable(zfsvfs->z_os)) {
+			dsl_pool_config_enter(
+			    dmu_objset_pool(zfsvfs->z_os), FTAG);
+			dmu_objset_id_quota_upgrade(zfsvfs->z_os);
+			dsl_pool_config_exit(
+			    dmu_objset_pool(zfsvfs->z_os), FTAG);
+		}
+		return (B_FALSE);
+	}
+
+	if (usedobj == DMU_PROJECTUSED_OBJECT) {
+		if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
+			if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
+				dsl_pool_config_enter(
+				    dmu_objset_pool(zfsvfs->z_os), FTAG);
+				dmu_objset_id_quota_upgrade(zfsvfs->z_os);
+				dsl_pool_config_exit(
+				    dmu_objset_pool(zfsvfs->z_os), FTAG);
+			}
+			return (B_FALSE);
+		}
+		quotaobj = zfsvfs->z_projectobjquota_obj;
+	} else if (usedobj == DMU_USERUSED_OBJECT) {
+		quotaobj = zfsvfs->z_userobjquota_obj;
+	} else if (usedobj == DMU_GROUPUSED_OBJECT) {
+		quotaobj = zfsvfs->z_groupobjquota_obj;
+	} else {
+		return (B_FALSE);
+	}
+	if (quotaobj == 0 || zfsvfs->z_replay)
+		return (B_FALSE);
+
+	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)id);
+	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
+	if (err != 0)
+		return (B_FALSE);
+
+	(void) snprintf(buf, sizeof (buf), DMU_OBJACCT_PREFIX "%llx",
+	    (longlong_t)id);
+	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
+	if (err != 0)
+		return (B_FALSE);
+	return (used >= quota);
+}
+
+boolean_t
+zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
+{
+	char buf[20];
+	uint64_t used, quota, quotaobj;
+	int err;
+
+	if (usedobj == DMU_PROJECTUSED_OBJECT) {
+		if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
+			if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
+				dsl_pool_config_enter(
+				    dmu_objset_pool(zfsvfs->z_os), FTAG);
+				dmu_objset_id_quota_upgrade(zfsvfs->z_os);
+				dsl_pool_config_exit(
+				    dmu_objset_pool(zfsvfs->z_os), FTAG);
+			}
+			return (B_FALSE);
+		}
+		quotaobj = zfsvfs->z_projectquota_obj;
+	} else if (usedobj == DMU_USERUSED_OBJECT) {
+		quotaobj = zfsvfs->z_userquota_obj;
+	} else if (usedobj == DMU_GROUPUSED_OBJECT) {
+		quotaobj = zfsvfs->z_groupquota_obj;
+	} else {
+		return (B_FALSE);
+	}
+	if (quotaobj == 0 || zfsvfs->z_replay)
+		return (B_FALSE);
+
+	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)id);
+	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
+	if (err != 0)
+		return (B_FALSE);
+
+	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
+	if (err != 0)
+		return (B_FALSE);
+	return (used >= quota);
+}
+
+boolean_t
+zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
+{
+	return (zfs_id_overblockquota(zfsvfs, usedobj, id) ||
+	    zfs_id_overobjquota(zfsvfs, usedobj, id));
+}
+
+EXPORT_SYMBOL(zpl_get_file_info);
+EXPORT_SYMBOL(zfs_userspace_one);
+EXPORT_SYMBOL(zfs_userspace_many);
+EXPORT_SYMBOL(zfs_set_userquota);
+EXPORT_SYMBOL(zfs_id_overblockquota);
+EXPORT_SYMBOL(zfs_id_overobjquota);
+EXPORT_SYMBOL(zfs_id_overquota);

diff --git a/zfs/module/zfs/zfs_replay.c b/zfs/module/zfs/zfs_replay.c
index 7dea85b..f3d209f 100644
--- a/zfs/module/zfs/zfs_replay.c
+++ b/zfs/module/zfs/zfs_replay.c

@@ -43,13 +43,22 @@
 #include <sys/zil.h>
 #include <sys/byteorder.h>
 #include <sys/stat.h>
-#include <sys/mode.h>
 #include <sys/acl.h>
 #include <sys/atomic.h>
 #include <sys/cred.h>
 #include <sys/zpl.h>
 
 /*
+ * NB: FreeBSD expects to be able to do vnode locking in lookup and
+ * hold the locks across all subsequent VOPs until vput is called.
+ * This means that its zfs vnops routines can't do any internal locking.
+ * In order to have the same contract as the Linux vnops there would
+ * needed to be duplicate locked vnops. If the vnops were used more widely
+ * in common code this would likely be preferable. However, currently
+ * this is the only file where this is the case.
+ */
+
+/*
  * Functions to replay ZFS intent log (ZIL) records
  * The functions are called through a function vector (zfs_replay_vector)
  * which is indexed by the transaction type.
@@ -61,11 +70,13 @@
 {
 	bzero(vap, sizeof (*vap));
 	vap->va_mask = (uint_t)mask;
-	vap->va_type = IFTOVT(mode);
 	vap->va_mode = mode;
+#ifdef __FreeBSD__
+	vap->va_type = IFTOVT(mode);
+#endif
 	vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
 	vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
-	vap->va_rdev = rdev;
+	vap->va_rdev = zfs_cmpldev(rdev);
 	vap->va_nodeid = nodeid;
 }
 
@@ -282,7 +293,7 @@
 	char *name = NULL;		/* location determined later */
 	lr_create_t *lr = (lr_create_t *)lracl;
 	znode_t *dzp;
-	struct inode *ip = NULL;
+	znode_t *zp;
 	xvattr_t xva;
 	int vflg = 0;
 	vsecattr_t vsec = { 0 };
@@ -351,7 +362,7 @@
 		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
 		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
 		    lr->lr_uid, lr->lr_gid);
-		/*FALLTHROUGH*/
+		fallthrough;
 	case TX_CREATE_ACL_ATTR:
 		if (name == NULL) {
 			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
@@ -373,8 +384,8 @@
 			    lr->lr_uid, lr->lr_gid);
 		}
 
-		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
-		    0, 0, &ip, kcred, vflg, &vsec);
+		error = zfs_create(dzp, name, &xva.xva_vattr,
+		    0, 0, &zp, kcred, vflg, &vsec);
 		break;
 	case TX_MKDIR_ACL:
 		aclstart = (caddr_t)(lracl + 1);
@@ -383,7 +394,7 @@
 		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
 		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
 		    lr->lr_uid, lr->lr_gid);
-		/*FALLTHROUGH*/
+		fallthrough;
 	case TX_MKDIR_ACL_ATTR:
 		if (name == NULL) {
 			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
@@ -403,18 +414,21 @@
 			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
 			    lr->lr_uid, lr->lr_gid);
 		}
-		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
-		    &ip, kcred, vflg, &vsec);
+		error = zfs_mkdir(dzp, name, &xva.xva_vattr,
+		    &zp, kcred, vflg, &vsec);
 		break;
 	default:
 		error = SET_ERROR(ENOTSUP);
 	}
 
 bail:
-	if (error == 0 && ip != NULL)
-		iput(ip);
-
-	iput(ZTOI(dzp));
+	if (error == 0 && zp != NULL) {
+#ifdef __FreeBSD__
+		VOP_UNLOCK1(ZTOV(zp));
+#endif
+		zrele(zp);
+	}
+	zrele(dzp);
 
 	if (zfsvfs->z_fuid_replay)
 		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
@@ -431,7 +445,7 @@
 	char *name = NULL;		/* location determined later */
 	char *link;			/* symlink content follows name */
 	znode_t *dzp;
-	struct inode *ip = NULL;
+	znode_t *zp = NULL;
 	xvattr_t xva;
 	int vflg = 0;
 	size_t lrsize = sizeof (lr_create_t);
@@ -505,14 +519,13 @@
 		    zfs_replay_fuid_domain(start, &start,
 		    lr->lr_uid, lr->lr_gid);
 		name = (char *)start;
-
-		/*FALLTHROUGH*/
+		fallthrough;
 	case TX_CREATE:
 		if (name == NULL)
 			name = (char *)start;
 
-		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
-		    0, 0, &ip, kcred, vflg, NULL);
+		error = zfs_create(dzp, name, &xva.xva_vattr,
+		    0, 0, &zp, kcred, vflg, NULL);
 		break;
 	case TX_MKDIR_ATTR:
 		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
@@ -523,33 +536,35 @@
 		    zfs_replay_fuid_domain(start, &start,
 		    lr->lr_uid, lr->lr_gid);
 		name = (char *)start;
-
-		/*FALLTHROUGH*/
+		fallthrough;
 	case TX_MKDIR:
 		if (name == NULL)
 			name = (char *)(lr + 1);
 
-		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
-		    &ip, kcred, vflg, NULL);
+		error = zfs_mkdir(dzp, name, &xva.xva_vattr,
+		    &zp, kcred, vflg, NULL);
 		break;
 	case TX_MKXATTR:
-		error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred);
+		error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &zp, kcred);
 		break;
 	case TX_SYMLINK:
 		name = (char *)(lr + 1);
 		link = name + strlen(name) + 1;
-		error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr,
-		    link, &ip, kcred, vflg);
+		error = zfs_symlink(dzp, name, &xva.xva_vattr,
+		    link, &zp, kcred, vflg);
 		break;
 	default:
 		error = SET_ERROR(ENOTSUP);
 	}
 
 out:
-	if (error == 0 && ip != NULL)
-		iput(ip);
-
-	iput(ZTOI(dzp));
+	if (error == 0 && zp != NULL) {
+#ifdef __FreeBSD__
+		VOP_UNLOCK1(ZTOV(zp));
+#endif
+		zrele(zp);
+	}
+	zrele(dzp);
 
 	if (zfsvfs->z_fuid_replay)
 		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
@@ -578,16 +593,16 @@
 
 	switch ((int)lr->lr_common.lrc_txtype) {
 	case TX_REMOVE:
-		error = zfs_remove(ZTOI(dzp), name, kcred, vflg);
+		error = zfs_remove(dzp, name, kcred, vflg);
 		break;
 	case TX_RMDIR:
-		error = zfs_rmdir(ZTOI(dzp), name, NULL, kcred, vflg);
+		error = zfs_rmdir(dzp, name, NULL, kcred, vflg);
 		break;
 	default:
 		error = SET_ERROR(ENOTSUP);
 	}
 
-	iput(ZTOI(dzp));
+	zrele(dzp);
 
 	return (error);
 }
@@ -609,17 +624,16 @@
 		return (error);
 
 	if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
-		iput(ZTOI(dzp));
+		zrele(dzp);
 		return (error);
 	}
 
 	if (lr->lr_common.lrc_txtype & TX_CI)
 		vflg |= FIGNORECASE;
 
-	error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred, vflg);
-
-	iput(ZTOI(zp));
-	iput(ZTOI(dzp));
+	error = zfs_link(dzp, zp, name, kcred, vflg);
+	zrele(zp);
+	zrele(dzp);
 
 	return (error);
 }
@@ -642,18 +656,17 @@
 		return (error);
 
 	if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
-		iput(ZTOI(sdzp));
+		zrele(sdzp);
 		return (error);
 	}
 
 	if (lr->lr_common.lrc_txtype & TX_CI)
 		vflg |= FIGNORECASE;
 
-	error = zfs_rename(ZTOI(sdzp), sname, ZTOI(tdzp), tname, kcred, vflg);
+	error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg);
 
-	iput(ZTOI(tdzp));
-	iput(ZTOI(sdzp));
-
+	zrele(tdzp);
+	zrele(sdzp);
 	return (error);
 }
 
@@ -664,7 +677,7 @@
 	lr_write_t *lr = arg2;
 	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
 	znode_t	*zp;
-	int error, written;
+	int error;
 	uint64_t eod, offset, length;
 
 	if (byteswap)
@@ -708,15 +721,8 @@
 		if (zp->z_size < eod)
 			zfsvfs->z_replay_eof = eod;
 	}
-
-	written = zpl_write_common(ZTOI(zp), data, length, &offset,
-	    UIO_SYSSPACE, 0, kcred);
-	if (written < 0)
-		error = -written;
-	else if (written < length)
-		error = SET_ERROR(EIO); /* short write */
-
-	iput(ZTOI(zp));
+	error = zfs_write_simple(zp, data, length, offset, NULL);
+	zrele(zp);
 	zfsvfs->z_replay_eof = 0;	/* safety */
 
 	return (error);
@@ -752,7 +758,7 @@
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
-			iput(ZTOI(zp));
+			zrele(zp);
 			if (error == ERESTART) {
 				dmu_tx_wait(tx);
 				dmu_tx_abort(tx);
@@ -770,7 +776,7 @@
 		dmu_tx_commit(tx);
 	}
 
-	iput(ZTOI(zp));
+	zrele(zp);
 
 	return (error);
 }
@@ -796,10 +802,10 @@
 	fl.l_start = lr->lr_offset;
 	fl.l_len = lr->lr_length;
 
-	error = zfs_space(ZTOI(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
+	error = zfs_space(zp, F_FREESP, &fl, O_RDWR | O_LARGEFILE,
 	    lr->lr_offset, kcred);
 
-	iput(ZTOI(zp));
+	zrele(zp);
 
 	return (error);
 }
@@ -851,11 +857,11 @@
 	zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
 	    lr->lr_uid, lr->lr_gid);
 
-	error = zfs_setattr(ZTOI(zp), vap, 0, kcred);
+	error = zfs_setattr(zp, vap, 0, kcred);
 
 	zfs_fuid_info_free(zfsvfs->z_fuid_replay);
 	zfsvfs->z_fuid_replay = NULL;
-	iput(ZTOI(zp));
+	zrele(zp);
 
 	return (error);
 }
@@ -885,9 +891,9 @@
 	vsa.vsa_aclflags = 0;
 	vsa.vsa_aclentp = ace;
 
-	error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
+	error = zfs_setsecattr(zp, &vsa, 0, kcred);
 
-	iput(ZTOI(zp));
+	zrele(zp);
 
 	return (error);
 }
@@ -945,13 +951,13 @@
 		    lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
 	}
 
-	error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
+	error = zfs_setsecattr(zp, &vsa, 0, kcred);
 
 	if (zfsvfs->z_fuid_replay)
 		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
 
 	zfsvfs->z_fuid_replay = NULL;
-	iput(ZTOI(zp));
+	zrele(zp);
 
 	return (error);
 }

diff --git a/zfs/module/zfs/zfs_rlock.c b/zfs/module/zfs/zfs_rlock.c
index 454a02a..06a5e03 100644
--- a/zfs/module/zfs/zfs_rlock.c
+++ b/zfs/module/zfs/zfs_rlock.c

@@ -38,6 +38,20 @@
  *	rangelock_reduce(lr, off, len); // optional
  *	rangelock_exit(lr);
  *
+ * Range locking rules
+ * --------------------
+ * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
+ *    file range needs to be locked as RL_WRITER. Only then can the pages be
+ *    freed etc and zp_size reset. zp_size must be set within range lock.
+ * 2. For writes and punching holes (zfs_write & zfs_space) just the range
+ *    being written or freed needs to be locked as RL_WRITER.
+ *    Multiple writes at the end of the file must coordinate zp_size updates
+ *    to ensure data isn't lost. A compare and swap loop is currently used
+ *    to ensure the file size is at least the offset last written.
+ * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
+ *    read needs to be locked as RL_READER. A check against zp_size can then
+ *    be made for reading beyond end of file.
+ *
  * AVL tree
  * --------
  * An AVL tree is used to maintain the state of the existing ranges
@@ -99,6 +113,7 @@
 #include <sys/zfs_context.h>
 #include <sys/zfs_rlock.h>
 
+
 /*
  * AVL comparison function used to order range locks
  * Locks are ordered on the start offset of the range.
@@ -109,7 +124,7 @@
 	const zfs_locked_range_t *rl1 = (const zfs_locked_range_t *)arg1;
 	const zfs_locked_range_t *rl2 = (const zfs_locked_range_t *)arg2;
 
-	return (AVL_CMP(rl1->lr_offset, rl2->lr_offset));
+	return (TREE_CMP(rl1->lr_offset, rl2->lr_offset));
 }
 
 /*
@@ -135,10 +150,12 @@
 }
 
 /*
- * Check if a write lock can be grabbed, or wait and recheck until available.
+ * Check if a write lock can be grabbed.  If not, fail immediately or sleep and
+ * recheck until available, depending on the value of the "nonblock" parameter.
  */
-static void
-zfs_rangelock_enter_writer(zfs_rangelock_t *rl, zfs_locked_range_t *new)
+static boolean_t
+zfs_rangelock_enter_writer(zfs_rangelock_t *rl, zfs_locked_range_t *new,
+    boolean_t nonblock)
 {
 	avl_tree_t *tree = &rl->rl_tree;
 	zfs_locked_range_t *lr;
@@ -168,7 +185,7 @@
 		 */
 		if (avl_numnodes(tree) == 0) {
 			avl_add(tree, new);
-			return;
+			return (B_TRUE);
 		}
 
 		/*
@@ -189,8 +206,10 @@
 			goto wait;
 
 		avl_insert(tree, new, where);
-		return;
+		return (B_TRUE);
 wait:
+		if (nonblock)
+			return (B_FALSE);
 		if (!lr->lr_write_wanted) {
 			cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL);
 			lr->lr_write_wanted = B_TRUE;
@@ -376,10 +395,12 @@
 }
 
 /*
- * Check if a reader lock can be grabbed, or wait and recheck until available.
+ * Check if a reader lock can be grabbed.  If not, fail immediately or sleep and
+ * recheck until available, depending on the value of the "nonblock" parameter.
  */
-static void
-zfs_rangelock_enter_reader(zfs_rangelock_t *rl, zfs_locked_range_t *new)
+static boolean_t
+zfs_rangelock_enter_reader(zfs_rangelock_t *rl, zfs_locked_range_t *new,
+    boolean_t nonblock)
 {
 	avl_tree_t *tree = &rl->rl_tree;
 	zfs_locked_range_t *prev, *next;
@@ -400,6 +421,8 @@
 	 */
 	if (prev && (off < prev->lr_offset + prev->lr_length)) {
 		if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) {
+			if (nonblock)
+				return (B_FALSE);
 			if (!prev->lr_read_wanted) {
 				cv_init(&prev->lr_read_cv,
 				    NULL, CV_DEFAULT, NULL);
@@ -424,6 +447,8 @@
 		if (off + len <= next->lr_offset)
 			goto got_lock;
 		if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) {
+			if (nonblock)
+				return (B_FALSE);
 			if (!next->lr_read_wanted) {
 				cv_init(&next->lr_read_cv,
 				    NULL, CV_DEFAULT, NULL);
@@ -442,6 +467,7 @@
 	 * locks and bumping ref counts (r_count).
 	 */
 	zfs_rangelock_add_reader(tree, new, prev, where);
+	return (B_TRUE);
 }
 
 /*
@@ -449,11 +475,12 @@
  * (RL_WRITER or RL_APPEND).  If RL_APPEND is specified, rl_cb() will convert
  * it to a RL_WRITER lock (with the offset at the end of the file).  Returns
  * the range lock structure for later unlocking (or reduce range if the
- * entire file is locked as RL_WRITER).
+ * entire file is locked as RL_WRITER), or NULL if nonblock is true and the
+ * lock could not be acquired immediately.
  */
-zfs_locked_range_t *
-zfs_rangelock_enter(zfs_rangelock_t *rl, uint64_t off, uint64_t len,
-    zfs_rangelock_type_t type)
+static zfs_locked_range_t *
+zfs_rangelock_enter_impl(zfs_rangelock_t *rl, uint64_t off, uint64_t len,
+    zfs_rangelock_type_t type, boolean_t nonblock)
 {
 	zfs_locked_range_t *new;
 
@@ -476,18 +503,34 @@
 		/*
 		 * First check for the usual case of no locks
 		 */
-		if (avl_numnodes(&rl->rl_tree) == 0)
+		if (avl_numnodes(&rl->rl_tree) == 0) {
 			avl_add(&rl->rl_tree, new);
-		else
-			zfs_rangelock_enter_reader(rl, new);
-	} else {
-		/* RL_WRITER or RL_APPEND */
-		zfs_rangelock_enter_writer(rl, new);
+		} else if (!zfs_rangelock_enter_reader(rl, new, nonblock)) {
+			kmem_free(new, sizeof (*new));
+			new = NULL;
+		}
+	} else if (!zfs_rangelock_enter_writer(rl, new, nonblock)) {
+		kmem_free(new, sizeof (*new));
+		new = NULL;
 	}
 	mutex_exit(&rl->rl_lock);
 	return (new);
 }
 
+zfs_locked_range_t *
+zfs_rangelock_enter(zfs_rangelock_t *rl, uint64_t off, uint64_t len,
+    zfs_rangelock_type_t type)
+{
+	return (zfs_rangelock_enter_impl(rl, off, len, type, B_FALSE));
+}
+
+zfs_locked_range_t *
+zfs_rangelock_tryenter(zfs_rangelock_t *rl, uint64_t off, uint64_t len,
+    zfs_rangelock_type_t type)
+{
+	return (zfs_rangelock_enter_impl(rl, off, len, type, B_TRUE));
+}
+
 /*
  * Safely free the zfs_locked_range_t.
  */
@@ -642,6 +685,7 @@
 EXPORT_SYMBOL(zfs_rangelock_init);
 EXPORT_SYMBOL(zfs_rangelock_fini);
 EXPORT_SYMBOL(zfs_rangelock_enter);
+EXPORT_SYMBOL(zfs_rangelock_tryenter);
 EXPORT_SYMBOL(zfs_rangelock_exit);
 EXPORT_SYMBOL(zfs_rangelock_reduce);
 #endif

diff --git a/zfs/module/zfs/zfs_sa.c b/zfs/module/zfs/zfs_sa.c
index bd21ba8..67be131 100644
--- a/zfs/module/zfs/zfs_sa.c
+++ b/zfs/module/zfs/zfs_sa.c

@@ -71,7 +71,7 @@
 
 #ifdef _KERNEL
 int
-zfs_sa_readlink(znode_t *zp, uio_t *uio)
+zfs_sa_readlink(znode_t *zp, zfs_uio_t *uio)
 {
 	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
 	size_t bufsz;
@@ -79,15 +79,16 @@
 
 	bufsz = zp->z_size;
 	if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) {
-		error = uiomove((caddr_t)db->db_data +
+		error = zfs_uiomove((caddr_t)db->db_data +
 		    ZFS_OLD_ZNODE_PHYS_SIZE,
-		    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
+		    MIN((size_t)bufsz, zfs_uio_resid(uio)), UIO_READ, uio);
 	} else {
 		dmu_buf_t *dbp;
 		if ((error = dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id,
 		    0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
-			error = uiomove(dbp->db_data,
-			    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
+			error = zfs_uiomove(dbp->db_data,
+			    MIN((size_t)bufsz, zfs_uio_resid(uio)), UIO_READ,
+			    uio);
 			dmu_buf_rele(dbp, FTAG);
 		}
 	}
@@ -300,7 +301,7 @@
 	 * and ready the ACL would require special "locked"
 	 * interfaces that would be messy
 	 */
-	if (zp->z_acl_cached == NULL || S_ISLNK(ZTOI(zp)->i_mode))
+	if (zp->z_acl_cached == NULL || Z_ISLNK(ZTOTYPE(zp)))
 		return;
 
 	/*
@@ -369,13 +370,13 @@
 	    &ctime, 16);
 	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
 	    &crtime, 16);
-	links = ZTOI(zp)->i_nlink;
+	links = ZTONLNK(zp);
 	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
 	    &links, 8);
 	if (dmu_objset_projectquota_enabled(hdl->sa_os))
 		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PROJID(zfsvfs), NULL,
 		    &zp->z_projid, 8);
-	if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
+	if (Z_ISBLK(ZTOTYPE(zp)) || Z_ISCHR(ZTOTYPE(zp)))
 		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
 		    &rdev, 8);
 	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,

diff --git a/zfs/module/zfs/zfs_sysfs.c b/zfs/module/zfs/zfs_sysfs.c
deleted file mode 100644
index bb7f3b6..0000000
--- a/zfs/module/zfs/zfs_sysfs.c
+++ /dev/null

@@ -1,661 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/zfeature.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_sysfs.h>
-#include <sys/kmem.h>
-#include <sys/fs/zfs.h>
-#include <linux/kobject.h>
-
-#include "zfs_prop.h"
-
-#if !defined(_KERNEL)
-#error kernel builds only
-#endif
-
-/*
- * ZFS Module sysfs support
- *
- * This extends our sysfs '/sys/module/zfs' entry to include feature
- * and property attributes. The primary consumer of this information
- * is user processes, like the zfs CLI, that need to know what the
- * current loaded ZFS module supports. The libzfs binary will consult
- * this information when instantiating the zfs|zpool property tables
- * and the pool features table.
- *
- * The added top-level directories are:
- * /sys/module/zfs
- *		├── features.kernel
- *		├── features.pool
- *		├── properties.dataset
- *		└── properties.pool
- *
- * The local interface for the zfs kobjects includes:
- *	zfs_kobj_init()
- *	zfs_kobj_add()
- *	zfs_kobj_release()
- *	zfs_kobj_add_attr()
- *	zfs_kobj_fini()
- */
-
-/*
- * A zfs_mod_kobj_t represents a zfs kobject under '/sys/module/zfs'
- */
-struct zfs_mod_kobj;
-typedef struct zfs_mod_kobj zfs_mod_kobj_t;
-
-struct zfs_mod_kobj {
-	struct kobject		zko_kobj;
-	struct kobj_type	zko_kobj_type;
-	struct sysfs_ops	zko_sysfs_ops;
-	size_t			zko_attr_count;
-	struct attribute	*zko_attr_list;		/* allocated */
-	struct attribute	**zko_default_attrs;	/* allocated */
-	size_t			zko_child_count;
-	zfs_mod_kobj_t		*zko_children;		/* allocated */
-};
-
-#define	ATTR_TABLE_SIZE(cnt)	(sizeof (struct attribute) * (cnt))
-/* Note +1 for NULL terminator slot */
-#define	DEFAULT_ATTR_SIZE(cnt)	(sizeof (struct attribute *) * (cnt + 1))
-#define	CHILD_TABLE_SIZE(cnt)	(sizeof (zfs_mod_kobj_t) * (cnt))
-
-/*
- * These are the top-level kobjects under '/sys/module/zfs/'
- */
-static zfs_mod_kobj_t kernel_features_kobj;
-static zfs_mod_kobj_t pool_features_kobj;
-static zfs_mod_kobj_t dataset_props_kobj;
-static zfs_mod_kobj_t pool_props_kobj;
-
-/*
- * The show function is used to provide the content
- * of an attribute into a PAGE_SIZE buffer.
- */
-typedef ssize_t	(*sysfs_show_func)(struct kobject *, struct attribute *,
-    char *);
-
-static void
-zfs_kobj_fini(zfs_mod_kobj_t *zkobj)
-{
-	/* finalize any child kobjects */
-	if (zkobj->zko_child_count != 0) {
-		ASSERT(zkobj->zko_children);
-		for (int i = 0; i < zkobj->zko_child_count; i++)
-			zfs_kobj_fini(&zkobj->zko_children[i]);
-	}
-
-	/* kobject_put() will call zfs_kobj_release() to release memory */
-	kobject_del(&zkobj->zko_kobj);
-	kobject_put(&zkobj->zko_kobj);
-}
-
-static void
-zfs_kobj_release(struct kobject *kobj)
-{
-	zfs_mod_kobj_t *zkobj = container_of(kobj, zfs_mod_kobj_t, zko_kobj);
-
-	if (zkobj->zko_attr_list != NULL) {
-		ASSERT3S(zkobj->zko_attr_count, !=, 0);
-		kmem_free(zkobj->zko_attr_list,
-		    ATTR_TABLE_SIZE(zkobj->zko_attr_count));
-		zkobj->zko_attr_list = NULL;
-	}
-
-	if (zkobj->zko_default_attrs != NULL) {
-		kmem_free(zkobj->zko_default_attrs,
-		    DEFAULT_ATTR_SIZE(zkobj->zko_attr_count));
-		zkobj->zko_default_attrs = NULL;
-	}
-
-	if (zkobj->zko_child_count != 0) {
-		ASSERT(zkobj->zko_children);
-
-		kmem_free(zkobj->zko_children,
-		    CHILD_TABLE_SIZE(zkobj->zko_child_count));
-		zkobj->zko_child_count = 0;
-		zkobj->zko_children = NULL;
-	}
-
-	zkobj->zko_attr_count = 0;
-}
-
-#ifndef sysfs_attr_init
-#define	sysfs_attr_init(attr) do {} while (0)
-#endif
-
-static void
-zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name)
-{
-	VERIFY3U(attr_num, <, zkobj->zko_attr_count);
-	ASSERT(zkobj->zko_attr_list);
-	ASSERT(zkobj->zko_default_attrs);
-
-	zkobj->zko_attr_list[attr_num].name = attr_name;
-	zkobj->zko_attr_list[attr_num].mode = 0444;
-	zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num];
-	sysfs_attr_init(&zkobj->zko_attr_list[attr_num]);
-}
-
-static int
-zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
-    sysfs_show_func show_func)
-{
-	/*
-	 * Initialize object's attributes. Count can be zero.
-	 */
-	if (attr_cnt > 0) {
-		zkobj->zko_attr_list = kmem_zalloc(ATTR_TABLE_SIZE(attr_cnt),
-		    KM_SLEEP);
-		if (zkobj->zko_attr_list == NULL)
-			return (ENOMEM);
-	}
-	/* this will always have at least one slot for NULL termination */
-	zkobj->zko_default_attrs = kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt),
-	    KM_SLEEP);
-	if (zkobj->zko_default_attrs == NULL) {
-		if (zkobj->zko_attr_list != NULL) {
-			kmem_free(zkobj->zko_attr_list,
-			    ATTR_TABLE_SIZE(attr_cnt));
-		}
-		return (ENOMEM);
-	}
-	zkobj->zko_attr_count = attr_cnt;
-	zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_attrs;
-
-	if (child_cnt > 0) {
-		zkobj->zko_children = kmem_zalloc(CHILD_TABLE_SIZE(child_cnt),
-		    KM_SLEEP);
-		if (zkobj->zko_children == NULL) {
-			if (zkobj->zko_default_attrs != NULL) {
-				kmem_free(zkobj->zko_default_attrs,
-				    DEFAULT_ATTR_SIZE(attr_cnt));
-			}
-			if (zkobj->zko_attr_list != NULL) {
-				kmem_free(zkobj->zko_attr_list,
-				    ATTR_TABLE_SIZE(attr_cnt));
-			}
-			return (ENOMEM);
-		}
-		zkobj->zko_child_count = child_cnt;
-	}
-
-	zkobj->zko_sysfs_ops.show = show_func;
-	zkobj->zko_kobj_type.sysfs_ops = &zkobj->zko_sysfs_ops;
-	zkobj->zko_kobj_type.release = zfs_kobj_release;
-
-	return (0);
-}
-
-static int
-zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name)
-{
-	/* zko_default_attrs must be NULL terminated */
-	ASSERT(zkobj->zko_default_attrs != NULL);
-	ASSERT(zkobj->zko_default_attrs[zkobj->zko_attr_count] == NULL);
-
-	kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type);
-	return (kobject_add(&zkobj->zko_kobj, parent, name));
-}
-
-/*
- * Each zfs property has these common attributes
- */
-static const char *zprop_attrs[]  = {
-	"type",
-	"readonly",
-	"setonce",
-	"visible",
-	"values",
-	"default",
-	"datasets"	/* zfs properties only */
-};
-
-#define	ZFS_PROP_ATTR_COUNT	ARRAY_SIZE(zprop_attrs)
-#define	ZPOOL_PROP_ATTR_COUNT	(ZFS_PROP_ATTR_COUNT - 1)
-
-static const char *zprop_types[]  = {
-	"number",
-	"string",
-	"index",
-};
-
-typedef struct zfs_type_map {
-	zfs_type_t	ztm_type;
-	const char	*ztm_name;
-} zfs_type_map_t;
-
-static zfs_type_map_t type_map[] = {
-	{ZFS_TYPE_FILESYSTEM,	"filesystem"},
-	{ZFS_TYPE_SNAPSHOT,	"snapshot"},
-	{ZFS_TYPE_VOLUME,	"volume"},
-	{ZFS_TYPE_BOOKMARK,	"bookmark"}
-};
-
-/*
- * Show the content for a zfs property attribute
- */
-static ssize_t
-zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property,
-    char *buf, size_t buflen)
-{
-	const char *show_str;
-	char number[32];
-
-	/* For dataset properties list the dataset types that apply */
-	if (strcmp(attr_name, "datasets") == 0 &&
-	    property->pd_types != ZFS_TYPE_POOL) {
-		int len = 0;
-
-		for (int i = 0; i < ARRAY_SIZE(type_map); i++) {
-			if (type_map[i].ztm_type & property->pd_types)  {
-				len += snprintf(buf + len, buflen - len, "%s ",
-				    type_map[i].ztm_name);
-			}
-		}
-		len += snprintf(buf + len, buflen - len, "\n");
-		return (len);
-	}
-
-	if (strcmp(attr_name, "type") == 0) {
-		show_str = zprop_types[property->pd_proptype];
-	} else if (strcmp(attr_name, "readonly") == 0) {
-		show_str = property->pd_attr == PROP_READONLY ? "1" : "0";
-	} else if (strcmp(attr_name, "setonce") == 0) {
-		show_str = property->pd_attr == PROP_ONETIME ? "1" : "0";
-	} else if (strcmp(attr_name, "visible") == 0) {
-		show_str = property->pd_visible ? "1" : "0";
-	} else if (strcmp(attr_name, "values") == 0) {
-		show_str = property->pd_values ? property->pd_values : "";
-	} else if (strcmp(attr_name, "default") == 0) {
-		switch (property->pd_proptype) {
-		case PROP_TYPE_NUMBER:
-			(void) snprintf(number, sizeof (number), "%llu",
-			    (u_longlong_t)property->pd_numdefault);
-			show_str = number;
-			break;
-		case PROP_TYPE_STRING:
-			show_str = property->pd_strdefault ?
-			    property->pd_strdefault : "";
-			break;
-		case PROP_TYPE_INDEX:
-			if (zprop_index_to_string(property->pd_propnum,
-			    property->pd_numdefault, &show_str,
-			    property->pd_types) != 0) {
-				show_str = "";
-			}
-			break;
-		default:
-			return (0);
-		}
-	} else {
-		return (0);
-	}
-
-	return (snprintf(buf, buflen, "%s\n", show_str));
-}
-
-static ssize_t
-dataset_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	zfs_prop_t prop = zfs_name_to_prop(kobject_name(kobj));
-	zprop_desc_t *prop_tbl = zfs_prop_get_table();
-	ssize_t len;
-
-	ASSERT3U(prop, <, ZFS_NUM_PROPS);
-
-	len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
-
-	return (len);
-}
-
-static ssize_t
-pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	zpool_prop_t prop = zpool_name_to_prop(kobject_name(kobj));
-	zprop_desc_t *prop_tbl = zpool_prop_get_table();
-	ssize_t len;
-
-	ASSERT3U(prop, <, ZPOOL_NUM_PROPS);
-
-	len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
-
-	return (len);
-}
-
-/*
- * ZFS kernel feature attributes for '/sys/module/zfs/features.kernel'
- *
- * This list is intended for kernel features that don't have a pool feature
- * association or that extend existing user kernel interfaces.
- *
- * A user processes can easily check if the running zfs kernel module
- * supports the new feature.
- */
-static const char *zfs_kernel_features[] = {
-	/* --> Add new kernel features here */
-	"com.delphix:vdev_initialize",
-	"org.zfsonlinux:vdev_trim",
-};
-
-#define	KERNEL_FEATURE_COUNT	ARRAY_SIZE(zfs_kernel_features)
-
-static ssize_t
-kernel_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	if (strcmp(attr->name, "supported") == 0)
-		return (snprintf(buf, PAGE_SIZE, "yes\n"));
-	return (0);
-}
-
-static void
-kernel_feature_to_kobj(zfs_mod_kobj_t *parent, int slot, const char *name)
-{
-	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[slot];
-
-	ASSERT3U(slot, <, KERNEL_FEATURE_COUNT);
-	ASSERT(name);
-
-	int err = zfs_kobj_init(zfs_kobj, 1, 0, kernel_feature_show);
-	if (err)
-		return;
-
-	zfs_kobj_add_attr(zfs_kobj, 0, "supported");
-
-	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
-	if (err)
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-}
-
-static int
-zfs_kernel_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
-{
-	/*
-	 * Create a parent kobject to host kernel features.
-	 *
-	 * '/sys/module/zfs/features.kernel'
-	 */
-	int err = zfs_kobj_init(zfs_kobj, 0, KERNEL_FEATURE_COUNT,
-	    kernel_feature_show);
-	if (err)
-		return (err);
-	err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_KERNEL_FEATURES);
-	if (err) {
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-		return (err);
-	}
-
-	/*
-	 * Now create a kobject for each feature.
-	 *
-	 * '/sys/module/zfs/features.kernel/<feature>'
-	 */
-	for (int f = 0; f < KERNEL_FEATURE_COUNT; f++)
-		kernel_feature_to_kobj(zfs_kobj, f, zfs_kernel_features[f]);
-
-	return (0);
-}
-
-/*
- * Each pool feature has these common attributes
- */
-static const char *pool_feature_attrs[]  = {
-	"description",
-	"guid",
-	"uname",
-	"readonly_compatible",
-	"required_for_mos",
-	"activate_on_enable",
-	"per_dataset"
-};
-
-#define	ZPOOL_FEATURE_ATTR_COUNT	ARRAY_SIZE(pool_feature_attrs)
-
-/*
- * Show the content for the given zfs pool feature attribute
- */
-static ssize_t
-pool_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	spa_feature_t fid;
-
-	if (zfeature_lookup_guid(kobject_name(kobj), &fid) != 0)
-		return (0);
-
-	ASSERT3U(fid, <, SPA_FEATURES);
-
-	zfeature_flags_t flags = spa_feature_table[fid].fi_flags;
-	const char *show_str = NULL;
-
-	if (strcmp(attr->name, "description") == 0) {
-		show_str = spa_feature_table[fid].fi_desc;
-	} else if (strcmp(attr->name, "guid") == 0) {
-		show_str = spa_feature_table[fid].fi_guid;
-	} else if (strcmp(attr->name, "uname") == 0) {
-		show_str = spa_feature_table[fid].fi_uname;
-	} else if (strcmp(attr->name, "readonly_compatible") == 0) {
-		show_str = flags & ZFEATURE_FLAG_READONLY_COMPAT ? "1" : "0";
-	} else if (strcmp(attr->name, "required_for_mos") == 0) {
-		show_str = flags & ZFEATURE_FLAG_MOS ? "1" : "0";
-	} else if (strcmp(attr->name, "activate_on_enable") == 0) {
-		show_str = flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE ? "1" : "0";
-	} else if (strcmp(attr->name, "per_dataset") == 0) {
-		show_str = flags & ZFEATURE_FLAG_PER_DATASET ? "1" : "0";
-	}
-	if (show_str == NULL)
-		return (0);
-
-	return (snprintf(buf, PAGE_SIZE, "%s\n", show_str));
-}
-
-static void
-pool_feature_to_kobj(zfs_mod_kobj_t *parent, spa_feature_t fid,
-    const char *name)
-{
-	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[fid];
-
-	ASSERT3U(fid, <, SPA_FEATURES);
-	ASSERT(name);
-
-	int err = zfs_kobj_init(zfs_kobj, ZPOOL_FEATURE_ATTR_COUNT, 0,
-	    pool_feature_show);
-	if (err)
-		return;
-
-	for (int i = 0; i < ZPOOL_FEATURE_ATTR_COUNT; i++)
-		zfs_kobj_add_attr(zfs_kobj, i, pool_feature_attrs[i]);
-
-	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
-	if (err)
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-}
-
-static int
-zfs_pool_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
-{
-	/*
-	 * Create a parent kobject to host pool features.
-	 *
-	 * '/sys/module/zfs/features.pool'
-	 */
-	int err = zfs_kobj_init(zfs_kobj, 0, SPA_FEATURES, pool_feature_show);
-	if (err)
-		return (err);
-	err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_POOL_FEATURES);
-	if (err) {
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-		return (err);
-	}
-
-	/*
-	 * Now create a kobject for each feature.
-	 *
-	 * '/sys/module/zfs/features.pool/<feature>'
-	 */
-	for (spa_feature_t i = 0; i < SPA_FEATURES; i++)
-		pool_feature_to_kobj(zfs_kobj, i, spa_feature_table[i].fi_guid);
-
-	return (0);
-}
-
-typedef struct prop_to_kobj_arg {
-	zprop_desc_t	*p2k_table;
-	zfs_mod_kobj_t	*p2k_parent;
-	sysfs_show_func	p2k_show_func;
-	int		p2k_attr_count;
-} prop_to_kobj_arg_t;
-
-static int
-zprop_to_kobj(int prop, void *args)
-{
-	prop_to_kobj_arg_t *data = args;
-	zfs_mod_kobj_t *parent = data->p2k_parent;
-	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[prop];
-	const char *name = data->p2k_table[prop].pd_name;
-	int err;
-
-	ASSERT(name);
-
-	err = zfs_kobj_init(zfs_kobj, data->p2k_attr_count, 0,
-	    data->p2k_show_func);
-	if (err)
-		return (ZPROP_CONT);
-
-	for (int i = 0; i < data->p2k_attr_count; i++)
-		zfs_kobj_add_attr(zfs_kobj, i, zprop_attrs[i]);
-
-	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
-	if (err)
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-
-	return (ZPROP_CONT);
-}
-
-static int
-zfs_sysfs_properties_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent,
-    zfs_type_t type)
-{
-	prop_to_kobj_arg_t context;
-	const char *name;
-	int err;
-
-	/*
-	 * Create a parent kobject to host properties.
-	 *
-	 * '/sys/module/zfs/properties.<type>'
-	 */
-	if (type == ZFS_TYPE_POOL) {
-		name = ZFS_SYSFS_POOL_PROPERTIES;
-		context.p2k_table = zpool_prop_get_table();
-		context.p2k_attr_count = ZPOOL_PROP_ATTR_COUNT;
-		context.p2k_parent = zfs_kobj;
-		context.p2k_show_func = pool_property_show;
-		err = zfs_kobj_init(zfs_kobj, 0, ZPOOL_NUM_PROPS,
-		    pool_property_show);
-	} else {
-		name = ZFS_SYSFS_DATASET_PROPERTIES;
-		context.p2k_table = zfs_prop_get_table();
-		context.p2k_attr_count = ZFS_PROP_ATTR_COUNT;
-		context.p2k_parent = zfs_kobj;
-		context.p2k_show_func = dataset_property_show;
-		err = zfs_kobj_init(zfs_kobj, 0, ZFS_NUM_PROPS,
-		    dataset_property_show);
-	}
-
-	if (err)
-		return (err);
-
-	err = zfs_kobj_add(zfs_kobj, parent, name);
-	if (err) {
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-		return (err);
-	}
-
-	/*
-	 * Create a kobject for each property.
-	 *
-	 * '/sys/module/zfs/properties.<type>/<property>'
-	 */
-	(void) zprop_iter_common(zprop_to_kobj, &context, B_TRUE,
-	    B_FALSE, type);
-
-	return (err);
-}
-
-void
-zfs_sysfs_init(void)
-{
-	struct kobject *parent;
-#if defined(CONFIG_ZFS) && !defined(CONFIG_ZFS_MODULE)
-	parent = kobject_create_and_add("zfs", fs_kobj);
-#else
-	parent = &(((struct module *)(THIS_MODULE))->mkobj).kobj;
-#endif
-	int err;
-
-	if (parent == NULL)
-		return;
-
-	err = zfs_kernel_features_init(&kernel_features_kobj, parent);
-	if (err)
-		return;
-
-	err = zfs_pool_features_init(&pool_features_kobj, parent);
-	if (err) {
-		zfs_kobj_fini(&kernel_features_kobj);
-		return;
-	}
-
-	err = zfs_sysfs_properties_init(&pool_props_kobj, parent,
-	    ZFS_TYPE_POOL);
-	if (err) {
-		zfs_kobj_fini(&kernel_features_kobj);
-		zfs_kobj_fini(&pool_features_kobj);
-		return;
-	}
-
-	err = zfs_sysfs_properties_init(&dataset_props_kobj, parent,
-	    ZFS_TYPE_FILESYSTEM);
-	if (err) {
-		zfs_kobj_fini(&kernel_features_kobj);
-		zfs_kobj_fini(&pool_features_kobj);
-		zfs_kobj_fini(&pool_props_kobj);
-		return;
-	}
-}
-
-void
-zfs_sysfs_fini(void)
-{
-	/*
-	 * Remove top-level kobjects; each will remove any children kobjects
-	 */
-	zfs_kobj_fini(&kernel_features_kobj);
-	zfs_kobj_fini(&pool_features_kobj);
-	zfs_kobj_fini(&dataset_props_kobj);
-	zfs_kobj_fini(&pool_props_kobj);
-}

diff --git a/zfs/module/zfs/zfs_vfsops.c b/zfs/module/zfs/zfs_vfsops.c
deleted file mode 100644
index 0e14cad..0000000
--- a/zfs/module/zfs/zfs_vfsops.c
+++ /dev/null

@@ -1,2462 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
- */
-
-/* Portions Copyright 2010 Robert Milkowski */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/sysmacros.h>
-#include <sys/kmem.h>
-#include <sys/pathname.h>
-#include <sys/vnode.h>
-#include <sys/vfs.h>
-#include <sys/mntent.h>
-#include <sys/cmn_err.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_dir.h>
-#include <sys/zil.h>
-#include <sys/fs/zfs.h>
-#include <sys/dmu.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_deleg.h>
-#include <sys/spa.h>
-#include <sys/zap.h>
-#include <sys/sa.h>
-#include <sys/sa_impl.h>
-#include <sys/policy.h>
-#include <sys/atomic.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_fuid.h>
-#include <sys/sunddi.h>
-#include <sys/dmu_objset.h>
-#include <sys/spa_boot.h>
-#include <sys/zpl.h>
-#include <linux/vfs_compat.h>
-#include "zfs_comutil.h"
-
-enum {
-	TOKEN_RO,
-	TOKEN_RW,
-	TOKEN_SETUID,
-	TOKEN_NOSETUID,
-	TOKEN_EXEC,
-	TOKEN_NOEXEC,
-	TOKEN_DEVICES,
-	TOKEN_NODEVICES,
-	TOKEN_DIRXATTR,
-	TOKEN_SAXATTR,
-	TOKEN_XATTR,
-	TOKEN_NOXATTR,
-	TOKEN_ATIME,
-	TOKEN_NOATIME,
-	TOKEN_RELATIME,
-	TOKEN_NORELATIME,
-	TOKEN_NBMAND,
-	TOKEN_NONBMAND,
-	TOKEN_MNTPOINT,
-	TOKEN_LAST,
-};
-
-static const match_table_t zpl_tokens = {
-	{ TOKEN_RO,		MNTOPT_RO },
-	{ TOKEN_RW,		MNTOPT_RW },
-	{ TOKEN_SETUID,		MNTOPT_SETUID },
-	{ TOKEN_NOSETUID,	MNTOPT_NOSETUID },
-	{ TOKEN_EXEC,		MNTOPT_EXEC },
-	{ TOKEN_NOEXEC,		MNTOPT_NOEXEC },
-	{ TOKEN_DEVICES,	MNTOPT_DEVICES },
-	{ TOKEN_NODEVICES,	MNTOPT_NODEVICES },
-	{ TOKEN_DIRXATTR,	MNTOPT_DIRXATTR },
-	{ TOKEN_SAXATTR,	MNTOPT_SAXATTR },
-	{ TOKEN_XATTR,		MNTOPT_XATTR },
-	{ TOKEN_NOXATTR,	MNTOPT_NOXATTR },
-	{ TOKEN_ATIME,		MNTOPT_ATIME },
-	{ TOKEN_NOATIME,	MNTOPT_NOATIME },
-	{ TOKEN_RELATIME,	MNTOPT_RELATIME },
-	{ TOKEN_NORELATIME,	MNTOPT_NORELATIME },
-	{ TOKEN_NBMAND,		MNTOPT_NBMAND },
-	{ TOKEN_NONBMAND,	MNTOPT_NONBMAND },
-	{ TOKEN_MNTPOINT,	MNTOPT_MNTPOINT "=%s" },
-	{ TOKEN_LAST,		NULL },
-};
-
-static void
-zfsvfs_vfs_free(vfs_t *vfsp)
-{
-	if (vfsp != NULL) {
-		if (vfsp->vfs_mntpoint != NULL)
-			strfree(vfsp->vfs_mntpoint);
-
-		kmem_free(vfsp, sizeof (vfs_t));
-	}
-}
-
-static int
-zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
-{
-	switch (token) {
-	case TOKEN_RO:
-		vfsp->vfs_readonly = B_TRUE;
-		vfsp->vfs_do_readonly = B_TRUE;
-		break;
-	case TOKEN_RW:
-		vfsp->vfs_readonly = B_FALSE;
-		vfsp->vfs_do_readonly = B_TRUE;
-		break;
-	case TOKEN_SETUID:
-		vfsp->vfs_setuid = B_TRUE;
-		vfsp->vfs_do_setuid = B_TRUE;
-		break;
-	case TOKEN_NOSETUID:
-		vfsp->vfs_setuid = B_FALSE;
-		vfsp->vfs_do_setuid = B_TRUE;
-		break;
-	case TOKEN_EXEC:
-		vfsp->vfs_exec = B_TRUE;
-		vfsp->vfs_do_exec = B_TRUE;
-		break;
-	case TOKEN_NOEXEC:
-		vfsp->vfs_exec = B_FALSE;
-		vfsp->vfs_do_exec = B_TRUE;
-		break;
-	case TOKEN_DEVICES:
-		vfsp->vfs_devices = B_TRUE;
-		vfsp->vfs_do_devices = B_TRUE;
-		break;
-	case TOKEN_NODEVICES:
-		vfsp->vfs_devices = B_FALSE;
-		vfsp->vfs_do_devices = B_TRUE;
-		break;
-	case TOKEN_DIRXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_DIR;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_SAXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_SA;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_XATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_DIR;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_NOXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_OFF;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_ATIME:
-		vfsp->vfs_atime = B_TRUE;
-		vfsp->vfs_do_atime = B_TRUE;
-		break;
-	case TOKEN_NOATIME:
-		vfsp->vfs_atime = B_FALSE;
-		vfsp->vfs_do_atime = B_TRUE;
-		break;
-	case TOKEN_RELATIME:
-		vfsp->vfs_relatime = B_TRUE;
-		vfsp->vfs_do_relatime = B_TRUE;
-		break;
-	case TOKEN_NORELATIME:
-		vfsp->vfs_relatime = B_FALSE;
-		vfsp->vfs_do_relatime = B_TRUE;
-		break;
-	case TOKEN_NBMAND:
-		vfsp->vfs_nbmand = B_TRUE;
-		vfsp->vfs_do_nbmand = B_TRUE;
-		break;
-	case TOKEN_NONBMAND:
-		vfsp->vfs_nbmand = B_FALSE;
-		vfsp->vfs_do_nbmand = B_TRUE;
-		break;
-	case TOKEN_MNTPOINT:
-		vfsp->vfs_mntpoint = match_strdup(&args[0]);
-		if (vfsp->vfs_mntpoint == NULL)
-			return (SET_ERROR(ENOMEM));
-
-		break;
-	default:
-		break;
-	}
-
-	return (0);
-}
-
-/*
- * Parse the raw mntopts and return a vfs_t describing the options.
- */
-static int
-zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
-{
-	vfs_t *tmp_vfsp;
-	int error;
-
-	tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
-
-	if (mntopts != NULL) {
-		substring_t args[MAX_OPT_ARGS];
-		char *tmp_mntopts, *p, *t;
-		int token;
-
-		tmp_mntopts = t = strdup(mntopts);
-		if (tmp_mntopts == NULL)
-			return (SET_ERROR(ENOMEM));
-
-		while ((p = strsep(&t, ",")) != NULL) {
-			if (!*p)
-				continue;
-
-			args[0].to = args[0].from = NULL;
-			token = match_token(p, zpl_tokens, args);
-			error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
-			if (error) {
-				strfree(tmp_mntopts);
-				zfsvfs_vfs_free(tmp_vfsp);
-				return (error);
-			}
-		}
-
-		strfree(tmp_mntopts);
-	}
-
-	*vfsp = tmp_vfsp;
-
-	return (0);
-}
-
-boolean_t
-zfs_is_readonly(zfsvfs_t *zfsvfs)
-{
-	return (!!(zfsvfs->z_sb->s_flags & SB_RDONLY));
-}
-
-/*ARGSUSED*/
-int
-zfs_sync(struct super_block *sb, int wait, cred_t *cr)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-
-	/*
-	 * Semantically, the only requirement is that the sync be initiated.
-	 * The DMU syncs out txgs frequently, so there's nothing to do.
-	 */
-	if (!wait)
-		return (0);
-
-	if (zfsvfs != NULL) {
-		/*
-		 * Sync a specific filesystem.
-		 */
-		dsl_pool_t *dp;
-
-		ZFS_ENTER(zfsvfs);
-		dp = dmu_objset_pool(zfsvfs->z_os);
-
-		/*
-		 * If the system is shutting down, then skip any
-		 * filesystems which may exist on a suspended pool.
-		 */
-		if (spa_suspended(dp->dp_spa)) {
-			ZFS_EXIT(zfsvfs);
-			return (0);
-		}
-
-		if (zfsvfs->z_log != NULL)
-			zil_commit(zfsvfs->z_log, 0);
-
-		ZFS_EXIT(zfsvfs);
-	} else {
-		/*
-		 * Sync all ZFS filesystems.  This is what happens when you
-		 * run sync(1M).  Unlike other filesystems, ZFS honors the
-		 * request by waiting for all pools to commit all dirty data.
-		 */
-		spa_sync_allpools();
-	}
-
-	return (0);
-}
-
-static void
-atime_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	struct super_block *sb = zfsvfs->z_sb;
-
-	if (sb == NULL)
-		return;
-	/*
-	 * Update SB_NOATIME bit in VFS super block.  Since atime update is
-	 * determined by atime_needs_update(), atime_needs_update() needs to
-	 * return false if atime is turned off, and not unconditionally return
-	 * false if atime is turned on.
-	 */
-	if (newval)
-		sb->s_flags &= ~SB_NOATIME;
-	else
-		sb->s_flags |= SB_NOATIME;
-}
-
-static void
-relatime_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_relatime = newval;
-}
-
-static void
-xattr_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-
-	if (newval == ZFS_XATTR_OFF) {
-		zfsvfs->z_flags &= ~ZSB_XATTR;
-	} else {
-		zfsvfs->z_flags |= ZSB_XATTR;
-
-		if (newval == ZFS_XATTR_SA)
-			zfsvfs->z_xattr_sa = B_TRUE;
-		else
-			zfsvfs->z_xattr_sa = B_FALSE;
-	}
-}
-
-static void
-acltype_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-
-	switch (newval) {
-	case ZFS_ACLTYPE_OFF:
-		zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
-		zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
-		break;
-	case ZFS_ACLTYPE_POSIXACL:
-#ifdef CONFIG_FS_POSIX_ACL
-		zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIXACL;
-		zfsvfs->z_sb->s_flags |= SB_POSIXACL;
-#else
-		zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
-		zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
-#endif /* CONFIG_FS_POSIX_ACL */
-		break;
-	default:
-		break;
-	}
-}
-
-static void
-blksz_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
-	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
-	ASSERT(ISP2(newval));
-
-	zfsvfs->z_max_blksz = newval;
-}
-
-static void
-readonly_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	struct super_block *sb = zfsvfs->z_sb;
-
-	if (sb == NULL)
-		return;
-
-	if (newval)
-		sb->s_flags |= SB_RDONLY;
-	else
-		sb->s_flags &= ~SB_RDONLY;
-}
-
-static void
-devices_changed_cb(void *arg, uint64_t newval)
-{
-}
-
-static void
-setuid_changed_cb(void *arg, uint64_t newval)
-{
-}
-
-static void
-exec_changed_cb(void *arg, uint64_t newval)
-{
-}
-
-static void
-nbmand_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	struct super_block *sb = zfsvfs->z_sb;
-
-	if (sb == NULL)
-		return;
-
-	if (newval == TRUE)
-		sb->s_flags |= SB_MANDLOCK;
-	else
-		sb->s_flags &= ~SB_MANDLOCK;
-}
-
-static void
-snapdir_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_show_ctldir = newval;
-}
-
-static void
-vscan_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_vscan = newval;
-}
-
-static void
-acl_inherit_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_acl_inherit = newval;
-}
-
-static int
-zfs_register_callbacks(vfs_t *vfsp)
-{
-	struct dsl_dataset *ds = NULL;
-	objset_t *os = NULL;
-	zfsvfs_t *zfsvfs = NULL;
-	int error = 0;
-
-	ASSERT(vfsp);
-	zfsvfs = vfsp->vfs_data;
-	ASSERT(zfsvfs);
-	os = zfsvfs->z_os;
-
-	/*
-	 * The act of registering our callbacks will destroy any mount
-	 * options we may have.  In order to enable temporary overrides
-	 * of mount options, we stash away the current values and
-	 * restore them after we register the callbacks.
-	 */
-	if (zfs_is_readonly(zfsvfs) || !spa_writeable(dmu_objset_spa(os))) {
-		vfsp->vfs_do_readonly = B_TRUE;
-		vfsp->vfs_readonly = B_TRUE;
-	}
-
-	/*
-	 * Register property callbacks.
-	 *
-	 * It would probably be fine to just check for i/o error from
-	 * the first prop_register(), but I guess I like to go
-	 * overboard...
-	 */
-	ds = dmu_objset_ds(os);
-	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-	error = dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
-	    zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);
-	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
-	if (error)
-		goto unregister;
-
-	/*
-	 * Invoke our callbacks to restore temporary mount options.
-	 */
-	if (vfsp->vfs_do_readonly)
-		readonly_changed_cb(zfsvfs, vfsp->vfs_readonly);
-	if (vfsp->vfs_do_setuid)
-		setuid_changed_cb(zfsvfs, vfsp->vfs_setuid);
-	if (vfsp->vfs_do_exec)
-		exec_changed_cb(zfsvfs, vfsp->vfs_exec);
-	if (vfsp->vfs_do_devices)
-		devices_changed_cb(zfsvfs, vfsp->vfs_devices);
-	if (vfsp->vfs_do_xattr)
-		xattr_changed_cb(zfsvfs, vfsp->vfs_xattr);
-	if (vfsp->vfs_do_atime)
-		atime_changed_cb(zfsvfs, vfsp->vfs_atime);
-	if (vfsp->vfs_do_relatime)
-		relatime_changed_cb(zfsvfs, vfsp->vfs_relatime);
-	if (vfsp->vfs_do_nbmand)
-		nbmand_changed_cb(zfsvfs, vfsp->vfs_nbmand);
-
-	return (0);
-
-unregister:
-	dsl_prop_unregister_all(ds, zfsvfs);
-	return (error);
-}
-
-static int
-zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
-    uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
-{
-	sa_hdr_phys_t sa;
-	sa_hdr_phys_t *sap = data;
-	uint64_t flags;
-	int hdrsize;
-	boolean_t swap = B_FALSE;
-
-	/*
-	 * Is it a valid type of object to track?
-	 */
-	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
-		return (SET_ERROR(ENOENT));
-
-	/*
-	 * If we have a NULL data pointer
-	 * then assume the id's aren't changing and
-	 * return EEXIST to the dmu to let it know to
-	 * use the same ids
-	 */
-	if (data == NULL)
-		return (SET_ERROR(EEXIST));
-
-	if (bonustype == DMU_OT_ZNODE) {
-		znode_phys_t *znp = data;
-		*userp = znp->zp_uid;
-		*groupp = znp->zp_gid;
-		*projectp = ZFS_DEFAULT_PROJID;
-		return (0);
-	}
-
-	if (sap->sa_magic == 0) {
-		/*
-		 * This should only happen for newly created files
-		 * that haven't had the znode data filled in yet.
-		 */
-		*userp = 0;
-		*groupp = 0;
-		*projectp = ZFS_DEFAULT_PROJID;
-		return (0);
-	}
-
-	sa = *sap;
-	if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
-		sa.sa_magic = SA_MAGIC;
-		sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
-		swap = B_TRUE;
-	} else {
-		VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
-	}
-
-	hdrsize = sa_hdrsize(&sa);
-	VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
-
-	*userp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_UID_OFFSET));
-	*groupp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_GID_OFFSET));
-	flags = *((uint64_t *)((uintptr_t)data + hdrsize + SA_FLAGS_OFFSET));
-	if (swap)
-		flags = BSWAP_64(flags);
-
-	if (flags & ZFS_PROJID)
-		*projectp = *((uint64_t *)((uintptr_t)data + hdrsize +
-		    SA_PROJID_OFFSET));
-	else
-		*projectp = ZFS_DEFAULT_PROJID;
-
-	if (swap) {
-		*userp = BSWAP_64(*userp);
-		*groupp = BSWAP_64(*groupp);
-		*projectp = BSWAP_64(*projectp);
-	}
-	return (0);
-}
-
-static void
-fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
-    char *domainbuf, int buflen, uid_t *ridp)
-{
-	uint64_t fuid;
-	const char *domain;
-
-	fuid = zfs_strtonum(fuidstr, NULL);
-
-	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
-	if (domain)
-		(void) strlcpy(domainbuf, domain, buflen);
-	else
-		domainbuf[0] = '\0';
-	*ridp = FUID_RID(fuid);
-}
-
-static uint64_t
-zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
-{
-	switch (type) {
-	case ZFS_PROP_USERUSED:
-	case ZFS_PROP_USEROBJUSED:
-		return (DMU_USERUSED_OBJECT);
-	case ZFS_PROP_GROUPUSED:
-	case ZFS_PROP_GROUPOBJUSED:
-		return (DMU_GROUPUSED_OBJECT);
-	case ZFS_PROP_PROJECTUSED:
-	case ZFS_PROP_PROJECTOBJUSED:
-		return (DMU_PROJECTUSED_OBJECT);
-	case ZFS_PROP_USERQUOTA:
-		return (zfsvfs->z_userquota_obj);
-	case ZFS_PROP_GROUPQUOTA:
-		return (zfsvfs->z_groupquota_obj);
-	case ZFS_PROP_USEROBJQUOTA:
-		return (zfsvfs->z_userobjquota_obj);
-	case ZFS_PROP_GROUPOBJQUOTA:
-		return (zfsvfs->z_groupobjquota_obj);
-	case ZFS_PROP_PROJECTQUOTA:
-		return (zfsvfs->z_projectquota_obj);
-	case ZFS_PROP_PROJECTOBJQUOTA:
-		return (zfsvfs->z_projectobjquota_obj);
-	default:
-		return (ZFS_NO_OBJECT);
-	}
-}
-
-int
-zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
-{
-	int error;
-	zap_cursor_t zc;
-	zap_attribute_t za;
-	zfs_useracct_t *buf = vbuf;
-	uint64_t obj;
-	int offset = 0;
-
-	if (!dmu_objset_userspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if ((type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED) &&
-	    !dmu_objset_projectquota_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA) &&
-	    !dmu_objset_userobjspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
-	if (obj == ZFS_NO_OBJECT) {
-		*bufsizep = 0;
-		return (0);
-	}
-
-	if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJUSED)
-		offset = DMU_OBJACCT_PREFIX_LEN;
-
-	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
-	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
-	    zap_cursor_advance(&zc)) {
-		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
-		    *bufsizep)
-			break;
-
-		/*
-		 * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX)
-		 * when dealing with block quota and vice versa.
-		 */
-		if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX,
-		    DMU_OBJACCT_PREFIX_LEN) == 0))
-			continue;
-
-		fuidstr_to_sid(zfsvfs, za.za_name + offset,
-		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
-
-		buf->zu_space = za.za_first_integer;
-		buf++;
-	}
-	if (error == ENOENT)
-		error = 0;
-
-	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
-	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
-	*cookiep = zap_cursor_serialize(&zc);
-	zap_cursor_fini(&zc);
-	return (error);
-}
-
-/*
- * buf must be big enough (eg, 32 bytes)
- */
-static int
-id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
-    char *buf, boolean_t addok)
-{
-	uint64_t fuid;
-	int domainid = 0;
-
-	if (domain && domain[0]) {
-		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
-		if (domainid == -1)
-			return (SET_ERROR(ENOENT));
-	}
-	fuid = FUID_ENCODE(domainid, rid);
-	(void) sprintf(buf, "%llx", (longlong_t)fuid);
-	return (0);
-}
-
-int
-zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    const char *domain, uint64_t rid, uint64_t *valp)
-{
-	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
-	int offset = 0;
-	int err;
-	uint64_t obj;
-
-	*valp = 0;
-
-	if (!dmu_objset_userspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA) &&
-	    !dmu_objset_userobjspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if (type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED) {
-		if (!dmu_objset_projectquota_present(zfsvfs->z_os))
-			return (SET_ERROR(ENOTSUP));
-		if (!zpl_is_valid_projid(rid))
-			return (SET_ERROR(EINVAL));
-	}
-
-	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
-	if (obj == ZFS_NO_OBJECT)
-		return (0);
-
-	if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJUSED) {
-		strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
-		offset = DMU_OBJACCT_PREFIX_LEN;
-	}
-
-	err = id_to_fuidstr(zfsvfs, domain, rid, buf + offset, B_FALSE);
-	if (err)
-		return (err);
-
-	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
-	if (err == ENOENT)
-		err = 0;
-	return (err);
-}
-
-int
-zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    const char *domain, uint64_t rid, uint64_t quota)
-{
-	char buf[32];
-	int err;
-	dmu_tx_t *tx;
-	uint64_t *objp;
-	boolean_t fuid_dirtied;
-
-	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
-		return (SET_ERROR(ENOTSUP));
-
-	switch (type) {
-	case ZFS_PROP_USERQUOTA:
-		objp = &zfsvfs->z_userquota_obj;
-		break;
-	case ZFS_PROP_GROUPQUOTA:
-		objp = &zfsvfs->z_groupquota_obj;
-		break;
-	case ZFS_PROP_USEROBJQUOTA:
-		objp = &zfsvfs->z_userobjquota_obj;
-		break;
-	case ZFS_PROP_GROUPOBJQUOTA:
-		objp = &zfsvfs->z_groupobjquota_obj;
-		break;
-	case ZFS_PROP_PROJECTQUOTA:
-		if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
-			return (SET_ERROR(ENOTSUP));
-		if (!zpl_is_valid_projid(rid))
-			return (SET_ERROR(EINVAL));
-
-		objp = &zfsvfs->z_projectquota_obj;
-		break;
-	case ZFS_PROP_PROJECTOBJQUOTA:
-		if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
-			return (SET_ERROR(ENOTSUP));
-		if (!zpl_is_valid_projid(rid))
-			return (SET_ERROR(EINVAL));
-
-		objp = &zfsvfs->z_projectobjquota_obj;
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
-	if (err)
-		return (err);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
-	if (*objp == 0) {
-		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
-		    zfs_userquota_prop_prefixes[type]);
-	}
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
-		dmu_tx_abort(tx);
-		return (err);
-	}
-
-	mutex_enter(&zfsvfs->z_lock);
-	if (*objp == 0) {
-		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
-		    DMU_OT_NONE, 0, tx);
-		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
-		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
-	}
-	mutex_exit(&zfsvfs->z_lock);
-
-	if (quota == 0) {
-		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
-		if (err == ENOENT)
-			err = 0;
-	} else {
-		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
-	}
-	ASSERT(err == 0);
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-	dmu_tx_commit(tx);
-	return (err);
-}
-
-boolean_t
-zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
-{
-	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
-	uint64_t used, quota, quotaobj;
-	int err;
-
-	if (!dmu_objset_userobjspace_present(zfsvfs->z_os)) {
-		if (dmu_objset_userobjspace_upgradable(zfsvfs->z_os)) {
-			dsl_pool_config_enter(
-			    dmu_objset_pool(zfsvfs->z_os), FTAG);
-			dmu_objset_id_quota_upgrade(zfsvfs->z_os);
-			dsl_pool_config_exit(
-			    dmu_objset_pool(zfsvfs->z_os), FTAG);
-		}
-		return (B_FALSE);
-	}
-
-	if (usedobj == DMU_PROJECTUSED_OBJECT) {
-		if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
-			if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
-				dsl_pool_config_enter(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-				dmu_objset_id_quota_upgrade(zfsvfs->z_os);
-				dsl_pool_config_exit(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-			}
-			return (B_FALSE);
-		}
-		quotaobj = zfsvfs->z_projectobjquota_obj;
-	} else if (usedobj == DMU_USERUSED_OBJECT) {
-		quotaobj = zfsvfs->z_userobjquota_obj;
-	} else if (usedobj == DMU_GROUPUSED_OBJECT) {
-		quotaobj = zfsvfs->z_groupobjquota_obj;
-	} else {
-		return (B_FALSE);
-	}
-	if (quotaobj == 0 || zfsvfs->z_replay)
-		return (B_FALSE);
-
-	(void) sprintf(buf, "%llx", (longlong_t)id);
-	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
-	if (err != 0)
-		return (B_FALSE);
-
-	(void) sprintf(buf, DMU_OBJACCT_PREFIX "%llx", (longlong_t)id);
-	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
-	if (err != 0)
-		return (B_FALSE);
-	return (used >= quota);
-}
-
-boolean_t
-zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
-{
-	char buf[20];
-	uint64_t used, quota, quotaobj;
-	int err;
-
-	if (usedobj == DMU_PROJECTUSED_OBJECT) {
-		if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
-			if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
-				dsl_pool_config_enter(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-				dmu_objset_id_quota_upgrade(zfsvfs->z_os);
-				dsl_pool_config_exit(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-			}
-			return (B_FALSE);
-		}
-		quotaobj = zfsvfs->z_projectquota_obj;
-	} else if (usedobj == DMU_USERUSED_OBJECT) {
-		quotaobj = zfsvfs->z_userquota_obj;
-	} else if (usedobj == DMU_GROUPUSED_OBJECT) {
-		quotaobj = zfsvfs->z_groupquota_obj;
-	} else {
-		return (B_FALSE);
-	}
-	if (quotaobj == 0 || zfsvfs->z_replay)
-		return (B_FALSE);
-
-	(void) sprintf(buf, "%llx", (longlong_t)id);
-	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
-	if (err != 0)
-		return (B_FALSE);
-
-	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
-	if (err != 0)
-		return (B_FALSE);
-	return (used >= quota);
-}
-
-boolean_t
-zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
-{
-	return (zfs_id_overblockquota(zfsvfs, usedobj, id) ||
-	    zfs_id_overobjquota(zfsvfs, usedobj, id));
-}
-
-/*
- * Associate this zfsvfs with the given objset, which must be owned.
- * This will cache a bunch of on-disk state from the objset in the
- * zfsvfs.
- */
-static int
-zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
-{
-	int error;
-	uint64_t val;
-
-	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
-	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
-	zfsvfs->z_os = os;
-
-	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
-	if (error != 0)
-		return (error);
-	if (zfsvfs->z_version >
-	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
-		(void) printk("Can't mount a version %lld file system "
-		    "on a version %lld pool\n. Pool must be upgraded to mount "
-		    "this file system.\n", (u_longlong_t)zfsvfs->z_version,
-		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
-		return (SET_ERROR(ENOTSUP));
-	}
-	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
-	if (error != 0)
-		return (error);
-	zfsvfs->z_norm = (int)val;
-
-	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
-	if (error != 0)
-		return (error);
-	zfsvfs->z_utf8 = (val != 0);
-
-	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
-	if (error != 0)
-		return (error);
-	zfsvfs->z_case = (uint_t)val;
-
-	if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val)) != 0)
-		return (error);
-	zfsvfs->z_acl_type = (uint_t)val;
-
-	/*
-	 * Fold case on file systems that are always or sometimes case
-	 * insensitive.
-	 */
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
-	    zfsvfs->z_case == ZFS_CASE_MIXED)
-		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
-
-	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
-	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
-
-	uint64_t sa_obj = 0;
-	if (zfsvfs->z_use_sa) {
-		/* should either have both of these objects or none */
-		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
-		    &sa_obj);
-		if (error != 0)
-			return (error);
-
-		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
-		if ((error == 0) && (val == ZFS_XATTR_SA))
-			zfsvfs->z_xattr_sa = B_TRUE;
-	}
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
-	    &zfsvfs->z_root);
-	if (error != 0)
-		return (error);
-	ASSERT(zfsvfs->z_root != 0);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
-	    &zfsvfs->z_unlinkedobj);
-	if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
-	    8, 1, &zfsvfs->z_userquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_userquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
-	    8, 1, &zfsvfs->z_groupquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_groupquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
-	    8, 1, &zfsvfs->z_projectquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_projectquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
-	    8, 1, &zfsvfs->z_userobjquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_userobjquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
-	    8, 1, &zfsvfs->z_groupobjquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_groupobjquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
-	    8, 1, &zfsvfs->z_projectobjquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_projectobjquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
-	    &zfsvfs->z_fuid_obj);
-	if (error == ENOENT)
-		zfsvfs->z_fuid_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
-	    &zfsvfs->z_shares_dir);
-	if (error == ENOENT)
-		zfsvfs->z_shares_dir = 0;
-	else if (error != 0)
-		return (error);
-
-	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
-	    &zfsvfs->z_attr_table);
-	if (error != 0)
-		return (error);
-
-	if (zfsvfs->z_version >= ZPL_VERSION_SA)
-		sa_register_update_callback(os, zfs_sa_upgrade);
-
-	return (0);
-}
-
-int
-zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
-{
-	objset_t *os;
-	zfsvfs_t *zfsvfs;
-	int error;
-	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
-
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
-
-	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, &os);
-	if (error != 0) {
-		kmem_free(zfsvfs, sizeof (zfsvfs_t));
-		return (error);
-	}
-
-	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
-	if (error != 0) {
-		dmu_objset_disown(os, B_TRUE, zfsvfs);
-	}
-	return (error);
-}
-
-
-/*
- * Note: zfsvfs is assumed to be malloc'd, and will be freed by this function
- * on a failure.  Do not pass in a statically allocated zfsvfs.
- */
-int
-zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
-{
-	int error;
-
-	zfsvfs->z_vfs = NULL;
-	zfsvfs->z_sb = NULL;
-	zfsvfs->z_parent = zfsvfs;
-
-	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
-	    offsetof(znode_t, z_link_node));
-	rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
-	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
-	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
-
-	int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
-	    ZFS_OBJ_MTX_MAX);
-	zfsvfs->z_hold_size = size;
-	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
-	    KM_SLEEP);
-	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
-	for (int i = 0; i != size; i++) {
-		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
-		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
-		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
-	}
-
-	error = zfsvfs_init(zfsvfs, os);
-	if (error != 0) {
-		*zfvp = NULL;
-		zfsvfs_free(zfsvfs);
-		return (error);
-	}
-
-	zfsvfs->z_drain_task = TASKQID_INVALID;
-	zfsvfs->z_draining = B_FALSE;
-	zfsvfs->z_drain_cancel = B_TRUE;
-
-	*zfvp = zfsvfs;
-	return (0);
-}
-
-static int
-zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
-{
-	int error;
-	boolean_t readonly = zfs_is_readonly(zfsvfs);
-
-	error = zfs_register_callbacks(zfsvfs->z_vfs);
-	if (error)
-		return (error);
-
-	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
-
-	/*
-	 * If we are not mounting (ie: online recv), then we don't
-	 * have to worry about replaying the log as we blocked all
-	 * operations out since we closed the ZIL.
-	 */
-	if (mounting) {
-		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
-		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
-
-		/*
-		 * During replay we remove the read only flag to
-		 * allow replays to succeed.
-		 */
-		if (readonly != 0) {
-			readonly_changed_cb(zfsvfs, B_FALSE);
-		} else {
-			zap_stats_t zs;
-			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
-			    &zs) == 0) {
-				dataset_kstats_update_nunlinks_kstat(
-				    &zfsvfs->z_kstat, zs.zs_num_entries);
-			}
-			dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
-			    "num_entries in unlinked set: %llu",
-			    zs.zs_num_entries);
-			zfs_unlinked_drain(zfsvfs);
-		}
-
-		/*
-		 * Parse and replay the intent log.
-		 *
-		 * Because of ziltest, this must be done after
-		 * zfs_unlinked_drain().  (Further note: ziltest
-		 * doesn't use readonly mounts, where
-		 * zfs_unlinked_drain() isn't called.)  This is because
-		 * ziltest causes spa_sync() to think it's committed,
-		 * but actually it is not, so the intent log contains
-		 * many txg's worth of changes.
-		 *
-		 * In particular, if object N is in the unlinked set in
-		 * the last txg to actually sync, then it could be
-		 * actually freed in a later txg and then reallocated
-		 * in a yet later txg.  This would write a "create
-		 * object N" record to the intent log.  Normally, this
-		 * would be fine because the spa_sync() would have
-		 * written out the fact that object N is free, before
-		 * we could write the "create object N" intent log
-		 * record.
-		 *
-		 * But when we are in ziltest mode, we advance the "open
-		 * txg" without actually spa_sync()-ing the changes to
-		 * disk.  So we would see that object N is still
-		 * allocated and in the unlinked set, and there is an
-		 * intent log record saying to allocate it.
-		 */
-		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
-			if (zil_replay_disable) {
-				zil_destroy(zfsvfs->z_log, B_FALSE);
-			} else {
-				zfsvfs->z_replay = B_TRUE;
-				zil_replay(zfsvfs->z_os, zfsvfs,
-				    zfs_replay_vector);
-				zfsvfs->z_replay = B_FALSE;
-			}
-		}
-
-		/* restore readonly bit */
-		if (readonly != 0)
-			readonly_changed_cb(zfsvfs, B_TRUE);
-	}
-
-	/*
-	 * Set the objset user_ptr to track its zfsvfs.
-	 */
-	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
-	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
-
-	return (0);
-}
-
-void
-zfsvfs_free(zfsvfs_t *zfsvfs)
-{
-	int i, size = zfsvfs->z_hold_size;
-
-	zfs_fuid_destroy(zfsvfs);
-
-	mutex_destroy(&zfsvfs->z_znodes_lock);
-	mutex_destroy(&zfsvfs->z_lock);
-	list_destroy(&zfsvfs->z_all_znodes);
-	rrm_destroy(&zfsvfs->z_teardown_lock);
-	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
-	rw_destroy(&zfsvfs->z_fuid_lock);
-	for (i = 0; i != size; i++) {
-		avl_destroy(&zfsvfs->z_hold_trees[i]);
-		mutex_destroy(&zfsvfs->z_hold_locks[i]);
-	}
-	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
-	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
-	zfsvfs_vfs_free(zfsvfs->z_vfs);
-	dataset_kstats_destroy(&zfsvfs->z_kstat);
-	kmem_free(zfsvfs, sizeof (zfsvfs_t));
-}
-
-static void
-zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
-{
-	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
-	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
-}
-
-void
-zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
-{
-	objset_t *os = zfsvfs->z_os;
-
-	if (!dmu_objset_is_snapshot(os))
-		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
-}
-
-#ifdef HAVE_MLSLABEL
-/*
- * Check that the hex label string is appropriate for the dataset being
- * mounted into the global_zone proper.
- *
- * Return an error if the hex label string is not default or
- * admin_low/admin_high.  For admin_low labels, the corresponding
- * dataset must be readonly.
- */
-int
-zfs_check_global_label(const char *dsname, const char *hexsl)
-{
-	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
-		return (0);
-	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
-		return (0);
-	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
-		/* must be readonly */
-		uint64_t rdonly;
-
-		if (dsl_prop_get_integer(dsname,
-		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
-			return (SET_ERROR(EACCES));
-		return (rdonly ? 0 : EACCES);
-	}
-	return (SET_ERROR(EACCES));
-}
-#endif /* HAVE_MLSLABEL */
-
-static int
-zfs_statfs_project(zfsvfs_t *zfsvfs, znode_t *zp, struct kstatfs *statp,
-    uint32_t bshift)
-{
-	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
-	uint64_t offset = DMU_OBJACCT_PREFIX_LEN;
-	uint64_t quota;
-	uint64_t used;
-	int err;
-
-	strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
-	err = id_to_fuidstr(zfsvfs, NULL, zp->z_projid, buf + offset, B_FALSE);
-	if (err)
-		return (err);
-
-	if (zfsvfs->z_projectquota_obj == 0)
-		goto objs;
-
-	err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectquota_obj,
-	    buf + offset, 8, 1, &quota);
-	if (err == ENOENT)
-		goto objs;
-	else if (err)
-		return (err);
-
-	err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
-	    buf + offset, 8, 1, &used);
-	if (unlikely(err == ENOENT)) {
-		uint32_t blksize;
-		u_longlong_t nblocks;
-
-		/*
-		 * Quota accounting is async, so it is possible race case.
-		 * There is at least one object with the given project ID.
-		 */
-		sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
-		if (unlikely(zp->z_blksz == 0))
-			blksize = zfsvfs->z_max_blksz;
-
-		used = blksize * nblocks;
-	} else if (err) {
-		return (err);
-	}
-
-	statp->f_blocks = quota >> bshift;
-	statp->f_bfree = (quota > used) ? ((quota - used) >> bshift) : 0;
-	statp->f_bavail = statp->f_bfree;
-
-objs:
-	if (zfsvfs->z_projectobjquota_obj == 0)
-		return (0);
-
-	err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectobjquota_obj,
-	    buf + offset, 8, 1, &quota);
-	if (err == ENOENT)
-		return (0);
-	else if (err)
-		return (err);
-
-	err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
-	    buf, 8, 1, &used);
-	if (unlikely(err == ENOENT)) {
-		/*
-		 * Quota accounting is async, so it is possible race case.
-		 * There is at least one object with the given project ID.
-		 */
-		used = 1;
-	} else if (err) {
-		return (err);
-	}
-
-	statp->f_files = quota;
-	statp->f_ffree = (quota > used) ? (quota - used) : 0;
-
-	return (0);
-}
-
-int
-zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
-{
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-	uint64_t refdbytes, availbytes, usedobjs, availobjs;
-	int err = 0;
-
-	ZFS_ENTER(zfsvfs);
-
-	dmu_objset_space(zfsvfs->z_os,
-	    &refdbytes, &availbytes, &usedobjs, &availobjs);
-
-	uint64_t fsid = dmu_objset_fsid_guid(zfsvfs->z_os);
-	/*
-	 * The underlying storage pool actually uses multiple block
-	 * size.  Under Solaris frsize (fragment size) is reported as
-	 * the smallest block size we support, and bsize (block size)
-	 * as the filesystem's maximum block size.  Unfortunately,
-	 * under Linux the fragment size and block size are often used
-	 * interchangeably.  Thus we are forced to report both of them
-	 * as the filesystem's maximum block size.
-	 */
-	statp->f_frsize = zfsvfs->z_max_blksz;
-	statp->f_bsize = zfsvfs->z_max_blksz;
-	uint32_t bshift = fls(statp->f_bsize) - 1;
-
-	/*
-	 * The following report "total" blocks of various kinds in
-	 * the file system, but reported in terms of f_bsize - the
-	 * "preferred" size.
-	 */
-
-	/* Round up so we never have a filesystem using 0 blocks. */
-	refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize);
-	statp->f_blocks = (refdbytes + availbytes) >> bshift;
-	statp->f_bfree = availbytes >> bshift;
-	statp->f_bavail = statp->f_bfree; /* no root reservation */
-
-	/*
-	 * statvfs() should really be called statufs(), because it assumes
-	 * static metadata.  ZFS doesn't preallocate files, so the best
-	 * we can do is report the max that could possibly fit in f_files,
-	 * and that minus the number actually used in f_ffree.
-	 * For f_ffree, report the smaller of the number of objects available
-	 * and the number of blocks (each object will take at least a block).
-	 */
-	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
-	statp->f_files = statp->f_ffree + usedobjs;
-	statp->f_fsid.val[0] = (uint32_t)fsid;
-	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
-	statp->f_type = ZFS_SUPER_MAGIC;
-	statp->f_namelen = MAXNAMELEN - 1;
-
-	/*
-	 * We have all of 40 characters to stuff a string here.
-	 * Is there anything useful we could/should provide?
-	 */
-	bzero(statp->f_spare, sizeof (statp->f_spare));
-
-	if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
-	    dmu_objset_projectquota_present(zfsvfs->z_os)) {
-		znode_t *zp = ITOZ(dentry->d_inode);
-
-		if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid &&
-		    zpl_is_valid_projid(zp->z_projid))
-			err = zfs_statfs_project(zfsvfs, zp, statp, bshift);
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-int
-zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
-{
-	znode_t *rootzp;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
-	if (error == 0)
-		*ipp = ZTOI(rootzp);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-#ifdef HAVE_D_PRUNE_ALIASES
-/*
- * Linux kernels older than 3.1 do not support a per-filesystem shrinker.
- * To accommodate this we must improvise and manually walk the list of znodes
- * attempting to prune dentries in order to be able to drop the inodes.
- *
- * To avoid scanning the same znodes multiple times they are always rotated
- * to the end of the z_all_znodes list.  New znodes are inserted at the
- * end of the list so we're always scanning the oldest znodes first.
- */
-static int
-zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
-{
-	znode_t **zp_array, *zp;
-	int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
-	int objects = 0;
-	int i = 0, j = 0;
-
-	zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
-
-		if ((i++ > nr_to_scan) || (j >= max_array))
-			break;
-
-		ASSERT(list_link_active(&zp->z_link_node));
-		list_remove(&zfsvfs->z_all_znodes, zp);
-		list_insert_tail(&zfsvfs->z_all_znodes, zp);
-
-		/* Skip active znodes and .zfs entries */
-		if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
-			continue;
-
-		if (igrab(ZTOI(zp)) == NULL)
-			continue;
-
-		zp_array[j] = zp;
-		j++;
-	}
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	for (i = 0; i < j; i++) {
-		zp = zp_array[i];
-
-		ASSERT3P(zp, !=, NULL);
-		d_prune_aliases(ZTOI(zp));
-
-		if (atomic_read(&ZTOI(zp)->i_count) == 1)
-			objects++;
-
-		iput(ZTOI(zp));
-	}
-
-	kmem_free(zp_array, max_array * sizeof (znode_t *));
-
-	return (objects);
-}
-#endif /* HAVE_D_PRUNE_ALIASES */
-
-/*
- * The ARC has requested that the filesystem drop entries from the dentry
- * and inode caches.  This can occur when the ARC needs to free meta data
- * blocks but can't because they are all pinned by entries in these caches.
- */
-int
-zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	int error = 0;
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-	struct shrinker *shrinker = &sb->s_shrink;
-	struct shrink_control sc = {
-		.nr_to_scan = nr_to_scan,
-		.gfp_mask = GFP_KERNEL,
-	};
-#endif
-
-	ZFS_ENTER(zfsvfs);
-
-#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
-	defined(SHRINK_CONTROL_HAS_NID) && \
-	defined(SHRINKER_NUMA_AWARE)
-	if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
-		*objects = 0;
-		for_each_online_node(sc.nid) {
-			*objects += (*shrinker->scan_objects)(shrinker, &sc);
-		}
-	} else {
-			*objects = (*shrinker->scan_objects)(shrinker, &sc);
-	}
-
-#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-	*objects = (*shrinker->scan_objects)(shrinker, &sc);
-#elif defined(HAVE_SHRINK)
-	*objects = (*shrinker->shrink)(shrinker, &sc);
-#elif defined(HAVE_D_PRUNE_ALIASES)
-#define	D_PRUNE_ALIASES_IS_DEFAULT
-	*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
-#else
-#error "No available dentry and inode cache pruning mechanism."
-#endif
-
-#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
-#undef	D_PRUNE_ALIASES_IS_DEFAULT
-	/*
-	 * Fall back to zfs_prune_aliases if the kernel's per-superblock
-	 * shrinker couldn't free anything, possibly due to the inodes being
-	 * allocated in a different memcg.
-	 */
-	if (*objects == 0)
-		*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
-#endif
-
-	ZFS_EXIT(zfsvfs);
-
-	dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
-	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
-	    nr_to_scan, *objects, error);
-
-	return (error);
-}
-
-/*
- * Teardown the zfsvfs_t.
- *
- * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
- * and 'z_teardown_inactive_lock' held.
- */
-static int
-zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
-{
-	znode_t	*zp;
-
-	zfs_unlinked_drain_stop_wait(zfsvfs);
-
-	/*
-	 * If someone has not already unmounted this file system,
-	 * drain the iput_taskq to ensure all active references to the
-	 * zfsvfs_t have been handled only then can it be safely destroyed.
-	 */
-	if (zfsvfs->z_os) {
-		/*
-		 * If we're unmounting we have to wait for the list to
-		 * drain completely.
-		 *
-		 * If we're not unmounting there's no guarantee the list
-		 * will drain completely, but iputs run from the taskq
-		 * may add the parents of dir-based xattrs to the taskq
-		 * so we want to wait for these.
-		 *
-		 * We can safely read z_nr_znodes without locking because the
-		 * VFS has already blocked operations which add to the
-		 * z_all_znodes list and thus increment z_nr_znodes.
-		 */
-		int round = 0;
-		while (zfsvfs->z_nr_znodes > 0) {
-			taskq_wait_outstanding(dsl_pool_iput_taskq(
-			    dmu_objset_pool(zfsvfs->z_os)), 0);
-			if (++round > 1 && !unmounting)
-				break;
-		}
-	}
-
-	rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
-
-	if (!unmounting) {
-		/*
-		 * We purge the parent filesystem's super block as the
-		 * parent filesystem and all of its snapshots have their
-		 * inode's super block set to the parent's filesystem's
-		 * super block.  Note,  'z_parent' is self referential
-		 * for non-snapshots.
-		 */
-		shrink_dcache_sb(zfsvfs->z_parent->z_sb);
-	}
-
-	/*
-	 * Close the zil. NB: Can't close the zil while zfs_inactive
-	 * threads are blocked as zil_close can call zfs_inactive.
-	 */
-	if (zfsvfs->z_log) {
-		zil_close(zfsvfs->z_log);
-		zfsvfs->z_log = NULL;
-	}
-
-	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
-
-	/*
-	 * If we are not unmounting (ie: online recv) and someone already
-	 * unmounted this file system while we were doing the switcheroo,
-	 * or a reopen of z_os failed then just bail out now.
-	 */
-	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-		return (SET_ERROR(EIO));
-	}
-
-	/*
-	 * At this point there are no VFS ops active, and any new VFS ops
-	 * will fail with EIO since we have z_teardown_lock for writer (only
-	 * relevant for forced unmount).
-	 *
-	 * Release all holds on dbufs. We also grab an extra reference to all
-	 * the remaining inodes so that the kernel does not attempt to free
-	 * any inodes of a suspended fs. This can cause deadlocks since the
-	 * zfs_resume_fs() process may involve starting threads, which might
-	 * attempt to free unreferenced inodes to free up memory for the new
-	 * thread.
-	 */
-	if (!unmounting) {
-		mutex_enter(&zfsvfs->z_znodes_lock);
-		for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
-		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
-			if (zp->z_sa_hdl)
-				zfs_znode_dmu_fini(zp);
-			if (igrab(ZTOI(zp)) != NULL)
-				zp->z_suspended = B_TRUE;
-
-		}
-		mutex_exit(&zfsvfs->z_znodes_lock);
-	}
-
-	/*
-	 * If we are unmounting, set the unmounted flag and let new VFS ops
-	 * unblock.  zfs_inactive will have the unmounted behavior, and all
-	 * other VFS ops will fail with EIO.
-	 */
-	if (unmounting) {
-		zfsvfs->z_unmounted = B_TRUE;
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-	}
-
-	/*
-	 * z_os will be NULL if there was an error in attempting to reopen
-	 * zfsvfs, so just return as the properties had already been
-	 *
-	 * unregistered and cached data had been evicted before.
-	 */
-	if (zfsvfs->z_os == NULL)
-		return (0);
-
-	/*
-	 * Unregister properties.
-	 */
-	zfs_unregister_callbacks(zfsvfs);
-
-	/*
-	 * Evict cached data. We must write out any dirty data before
-	 * disowning the dataset.
-	 */
-	objset_t *os = zfsvfs->z_os;
-	boolean_t os_dirty = B_FALSE;
-	for (int t = 0; t < TXG_SIZE; t++) {
-		if (dmu_objset_is_dirty(os, t)) {
-			os_dirty = B_TRUE;
-			break;
-		}
-	}
-	if (!zfs_is_readonly(zfsvfs) && os_dirty) {
-		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
-	}
-	dmu_objset_evict_dbufs(zfsvfs->z_os);
-
-	return (0);
-}
-
-#if !defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER) && \
-	!defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER)
-atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
-#endif
-
-int
-zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
-{
-	const char *osname = zm->mnt_osname;
-	struct inode *root_inode;
-	uint64_t recordsize;
-	int error = 0;
-	zfsvfs_t *zfsvfs = NULL;
-	vfs_t *vfs = NULL;
-
-	ASSERT(zm);
-	ASSERT(osname);
-
-	error = zfsvfs_parse_options(zm->mnt_data, &vfs);
-	if (error)
-		return (error);
-
-	error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
-	if (error) {
-		zfsvfs_vfs_free(vfs);
-		goto out;
-	}
-
-	if ((error = dsl_prop_get_integer(osname, "recordsize",
-	    &recordsize, NULL))) {
-		zfsvfs_vfs_free(vfs);
-		goto out;
-	}
-
-	vfs->vfs_data = zfsvfs;
-	zfsvfs->z_vfs = vfs;
-	zfsvfs->z_sb = sb;
-	sb->s_fs_info = zfsvfs;
-	sb->s_magic = ZFS_SUPER_MAGIC;
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_time_gran = 1;
-	sb->s_blocksize = recordsize;
-	sb->s_blocksize_bits = ilog2(recordsize);
-
-	error = -zpl_bdi_setup(sb, "zfs");
-	if (error)
-		goto out;
-
-	sb->s_bdi->ra_pages = 0;
-
-	/* Set callback operations for the file system. */
-	sb->s_op = &zpl_super_operations;
-	sb->s_xattr = zpl_xattr_handlers;
-	sb->s_export_op = &zpl_export_operations;
-#ifdef HAVE_S_D_OP
-	sb->s_d_op = &zpl_dentry_operations;
-#endif /* HAVE_S_D_OP */
-
-	/* Set features for file system. */
-	zfs_set_fuid_feature(zfsvfs);
-
-	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
-		uint64_t pval;
-
-		atime_changed_cb(zfsvfs, B_FALSE);
-		readonly_changed_cb(zfsvfs, B_TRUE);
-		if ((error = dsl_prop_get_integer(osname,
-		    "xattr", &pval, NULL)))
-			goto out;
-		xattr_changed_cb(zfsvfs, pval);
-		if ((error = dsl_prop_get_integer(osname,
-		    "acltype", &pval, NULL)))
-			goto out;
-		acltype_changed_cb(zfsvfs, pval);
-		zfsvfs->z_issnap = B_TRUE;
-		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
-		zfsvfs->z_snap_defer_time = jiffies;
-
-		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
-		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
-	} else {
-		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
-			goto out;
-	}
-
-	/* Allocate a root inode for the filesystem. */
-	error = zfs_root(zfsvfs, &root_inode);
-	if (error) {
-		(void) zfs_umount(sb);
-		goto out;
-	}
-
-	/* Allocate a root dentry for the filesystem */
-	sb->s_root = d_make_root(root_inode);
-	if (sb->s_root == NULL) {
-		(void) zfs_umount(sb);
-		error = SET_ERROR(ENOMEM);
-		goto out;
-	}
-
-	if (!zfsvfs->z_issnap)
-		zfsctl_create(zfsvfs);
-
-	zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
-out:
-	if (error) {
-		if (zfsvfs != NULL) {
-			dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
-			zfsvfs_free(zfsvfs);
-		}
-		/*
-		 * make sure we don't have dangling sb->s_fs_info which
-		 * zfs_preumount will use.
-		 */
-		sb->s_fs_info = NULL;
-	}
-
-	return (error);
-}
-
-/*
- * Called when an unmount is requested and certain sanity checks have
- * already passed.  At this point no dentries or inodes have been reclaimed
- * from their respective caches.  We drop the extra reference on the .zfs
- * control directory to allow everything to be reclaimed.  All snapshots
- * must already have been unmounted to reach this point.
- */
-void
-zfs_preumount(struct super_block *sb)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-
-	/* zfsvfs is NULL when zfs_domount fails during mount */
-	if (zfsvfs) {
-		zfs_unlinked_drain_stop_wait(zfsvfs);
-		zfsctl_destroy(sb->s_fs_info);
-		/*
-		 * Wait for iput_async before entering evict_inodes in
-		 * generic_shutdown_super. The reason we must finish before
-		 * evict_inodes is when lazytime is on, or when zfs_purgedir
-		 * calls zfs_zget, iput would bump i_count from 0 to 1. This
-		 * would race with the i_count check in evict_inodes. This means
-		 * it could destroy the inode while we are still using it.
-		 *
-		 * We wait for two passes. xattr directories in the first pass
-		 * may add xattr entries in zfs_purgedir, so in the second pass
-		 * we wait for them. We don't use taskq_wait here because it is
-		 * a pool wide taskq. Other mounted filesystems can constantly
-		 * do iput_async and there's no guarantee when taskq will be
-		 * empty.
-		 */
-		taskq_wait_outstanding(dsl_pool_iput_taskq(
-		    dmu_objset_pool(zfsvfs->z_os)), 0);
-		taskq_wait_outstanding(dsl_pool_iput_taskq(
-		    dmu_objset_pool(zfsvfs->z_os)), 0);
-	}
-}
-
-/*
- * Called once all other unmount released tear down has occurred.
- * It is our responsibility to release any remaining infrastructure.
- */
-/*ARGSUSED*/
-int
-zfs_umount(struct super_block *sb)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	objset_t *os;
-
-	if (zfsvfs->z_arc_prune != NULL)
-		arc_remove_prune_callback(zfsvfs->z_arc_prune);
-	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
-	os = zfsvfs->z_os;
-	zpl_bdi_destroy(sb);
-
-	/*
-	 * z_os will be NULL if there was an error in
-	 * attempting to reopen zfsvfs.
-	 */
-	if (os != NULL) {
-		/*
-		 * Unset the objset user_ptr.
-		 */
-		mutex_enter(&os->os_user_ptr_lock);
-		dmu_objset_set_user(os, NULL);
-		mutex_exit(&os->os_user_ptr_lock);
-
-		/*
-		 * Finally release the objset
-		 */
-		dmu_objset_disown(os, B_TRUE, zfsvfs);
-	}
-
-	zfsvfs_free(zfsvfs);
-	return (0);
-}
-
-int
-zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	vfs_t *vfsp;
-	boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
-	int error;
-
-	if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
-	    !(*flags & SB_RDONLY)) {
-		*flags |= SB_RDONLY;
-		return (EROFS);
-	}
-
-	error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
-	if (error)
-		return (error);
-
-	if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))
-		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
-
-	zfs_unregister_callbacks(zfsvfs);
-	zfsvfs_vfs_free(zfsvfs->z_vfs);
-
-	vfsp->vfs_data = zfsvfs;
-	zfsvfs->z_vfs = vfsp;
-	if (!issnap)
-		(void) zfs_register_callbacks(vfsp);
-
-	return (error);
-}
-
-int
-zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
-{
-	zfsvfs_t	*zfsvfs = sb->s_fs_info;
-	znode_t		*zp;
-	uint64_t	object = 0;
-	uint64_t	fid_gen = 0;
-	uint64_t	gen_mask;
-	uint64_t	zp_gen;
-	int		i, err;
-
-	*ipp = NULL;
-
-	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
-		zfid_short_t	*zfid = (zfid_short_t *)fidp;
-
-		for (i = 0; i < sizeof (zfid->zf_object); i++)
-			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
-
-		for (i = 0; i < sizeof (zfid->zf_gen); i++)
-			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
-	} else {
-		return (SET_ERROR(EINVAL));
-	}
-
-	/* LONG_FID_LEN means snapdirs */
-	if (fidp->fid_len == LONG_FID_LEN) {
-		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
-		uint64_t	objsetid = 0;
-		uint64_t	setgen = 0;
-
-		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
-			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
-
-		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
-			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
-
-		if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {
-			dprintf("snapdir fid: objsetid (%llu) != "
-			    "ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",
-			    objsetid, ZFSCTL_INO_SNAPDIRS, object);
-
-			return (SET_ERROR(EINVAL));
-		}
-
-		if (fid_gen > 1 || setgen != 0) {
-			dprintf("snapdir fid: fid_gen (%llu) and setgen "
-			    "(%llu)\n", fid_gen, setgen);
-			return (SET_ERROR(EINVAL));
-		}
-
-		return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
-	}
-
-	ZFS_ENTER(zfsvfs);
-	/* A zero fid_gen means we are in the .zfs control directories */
-	if (fid_gen == 0 &&
-	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
-		*ipp = zfsvfs->z_ctldir;
-		ASSERT(*ipp != NULL);
-		if (object == ZFSCTL_INO_SNAPDIR) {
-			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
-			    0, kcred, NULL, NULL) == 0);
-		} else {
-			igrab(*ipp);
-		}
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	gen_mask = -1ULL >> (64 - 8 * i);
-
-	dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
-	if ((err = zfs_zget(zfsvfs, object, &zp))) {
-		ZFS_EXIT(zfsvfs);
-		return (err);
-	}
-
-	/* Don't export xattr stuff */
-	if (zp->z_pflags & ZFS_XATTR) {
-		iput(ZTOI(zp));
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOENT));
-	}
-
-	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
-	    sizeof (uint64_t));
-	zp_gen = zp_gen & gen_mask;
-	if (zp_gen == 0)
-		zp_gen = 1;
-	if ((fid_gen == 0) && (zfsvfs->z_root == object))
-		fid_gen = zp_gen;
-	if (zp->z_unlinked || zp_gen != fid_gen) {
-		dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
-		    fid_gen);
-		iput(ZTOI(zp));
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOENT));
-	}
-
-	*ipp = ZTOI(zp);
-	if (*ipp)
-		zfs_inode_update(ITOZ(*ipp));
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Block out VFS ops and close zfsvfs_t
- *
- * Note, if successful, then we return with the 'z_teardown_lock' and
- * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
- * dataset and objset intact so that they can be atomically handed off during
- * a subsequent rollback or recv operation and the resume thereafter.
- */
-int
-zfs_suspend_fs(zfsvfs_t *zfsvfs)
-{
-	int error;
-
-	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
-		return (error);
-
-	return (0);
-}
-
-/*
- * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
- * is an invariant across any of the operations that can be performed while the
- * filesystem was suspended.  Whether it succeeded or failed, the preconditions
- * are the same: the relevant objset and associated dataset are owned by
- * zfsvfs, held, and long held on entry.
- */
-int
-zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
-{
-	int err, err2;
-	znode_t *zp;
-
-	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
-	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
-
-	/*
-	 * We already own this, so just update the objset_t, as the one we
-	 * had before may have been evicted.
-	 */
-	objset_t *os;
-	VERIFY3P(ds->ds_owner, ==, zfsvfs);
-	VERIFY(dsl_dataset_long_held(ds));
-	VERIFY0(dmu_objset_from_ds(ds, &os));
-
-	err = zfsvfs_init(zfsvfs, os);
-	if (err != 0)
-		goto bail;
-
-	VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
-
-	zfs_set_fuid_feature(zfsvfs);
-	zfsvfs->z_rollback_time = jiffies;
-
-	/*
-	 * Attempt to re-establish all the active inodes with their
-	 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
-	 * and mark it stale.  This prevents a collision if a new
-	 * inode/object is created which must use the same inode
-	 * number.  The stale inode will be be released when the
-	 * VFS prunes the dentry holding the remaining references
-	 * on the stale inode.
-	 */
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
-	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
-		err2 = zfs_rezget(zp);
-		if (err2) {
-			remove_inode_hash(ZTOI(zp));
-			zp->z_is_stale = B_TRUE;
-		}
-
-		/* see comment in zfs_suspend_fs() */
-		if (zp->z_suspended) {
-			zfs_iput_async(ZTOI(zp));
-			zp->z_suspended = B_FALSE;
-		}
-	}
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
-		/*
-		 * zfs_suspend_fs() could have interrupted freeing
-		 * of dnodes. We need to restart this freeing so
-		 * that we don't "leak" the space.
-		 */
-		zfs_unlinked_drain(zfsvfs);
-	}
-
-bail:
-	/* release the VFS ops */
-	rw_exit(&zfsvfs->z_teardown_inactive_lock);
-	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-
-	if (err) {
-		/*
-		 * Since we couldn't setup the sa framework, try to force
-		 * unmount this file system.
-		 */
-		if (zfsvfs->z_os)
-			(void) zfs_umount(zfsvfs->z_sb);
-	}
-	return (err);
-}
-
-int
-zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
-{
-	int error;
-	objset_t *os = zfsvfs->z_os;
-	dmu_tx_t *tx;
-
-	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
-		return (SET_ERROR(EINVAL));
-
-	if (newvers < zfsvfs->z_version)
-		return (SET_ERROR(EINVAL));
-
-	if (zfs_spa_version_map(newvers) >
-	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
-		return (SET_ERROR(ENOTSUP));
-
-	tx = dmu_tx_create(os);
-	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
-	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
-		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
-		    ZFS_SA_ATTRS);
-		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-	}
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		return (error);
-	}
-
-	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
-	    8, 1, &newvers, tx);
-
-	if (error) {
-		dmu_tx_commit(tx);
-		return (error);
-	}
-
-	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
-		uint64_t sa_obj;
-
-		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
-		    SPA_VERSION_SA);
-		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
-		    DMU_OT_NONE, 0, tx);
-
-		error = zap_add(os, MASTER_NODE_OBJ,
-		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
-		ASSERT0(error);
-
-		VERIFY(0 == sa_set_sa_object(os, sa_obj));
-		sa_register_update_callback(os, zfs_sa_upgrade);
-	}
-
-	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
-	    "from %llu to %llu", zfsvfs->z_version, newvers);
-
-	dmu_tx_commit(tx);
-
-	zfsvfs->z_version = newvers;
-	os->os_version = newvers;
-
-	zfs_set_fuid_feature(zfsvfs);
-
-	return (0);
-}
-
-/*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION)
-		pname = ZPL_VERSION_STR;
-	else
-		pname = zfs_prop_to_name(prop);
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_OFF;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
-/*
- * Return true if the corresponding vfs's unmounted flag is set.
- * Otherwise return false.
- * If this function returns true we know VFS unmount has been initiated.
- */
-boolean_t
-zfs_get_vfs_flag_unmounted(objset_t *os)
-{
-	zfsvfs_t *zfvp;
-	boolean_t unmounted = B_FALSE;
-
-	ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
-
-	mutex_enter(&os->os_user_ptr_lock);
-	zfvp = dmu_objset_get_user(os);
-	if (zfvp != NULL && zfvp->z_unmounted)
-		unmounted = B_TRUE;
-	mutex_exit(&os->os_user_ptr_lock);
-
-	return (unmounted);
-}
-
-void
-zfs_init(void)
-{
-	zfsctl_init();
-	zfs_znode_init();
-	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
-	register_filesystem(&zpl_fs_type);
-}
-
-void
-zfs_fini(void)
-{
-	/*
-	 * we don't use outstanding because zpl_posix_acl_free might add more.
-	 */
-	taskq_wait(system_delay_taskq);
-	taskq_wait(system_taskq);
-	unregister_filesystem(&zpl_fs_type);
-	zfs_znode_fini();
-	zfsctl_fini();
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zfs_suspend_fs);
-EXPORT_SYMBOL(zfs_resume_fs);
-EXPORT_SYMBOL(zfs_userspace_one);
-EXPORT_SYMBOL(zfs_userspace_many);
-EXPORT_SYMBOL(zfs_set_userquota);
-EXPORT_SYMBOL(zfs_id_overblockquota);
-EXPORT_SYMBOL(zfs_id_overobjquota);
-EXPORT_SYMBOL(zfs_id_overquota);
-EXPORT_SYMBOL(zfs_set_version);
-EXPORT_SYMBOL(zfsvfs_create);
-EXPORT_SYMBOL(zfsvfs_free);
-EXPORT_SYMBOL(zfs_is_readonly);
-EXPORT_SYMBOL(zfs_domount);
-EXPORT_SYMBOL(zfs_preumount);
-EXPORT_SYMBOL(zfs_umount);
-EXPORT_SYMBOL(zfs_remount);
-EXPORT_SYMBOL(zfs_statvfs);
-EXPORT_SYMBOL(zfs_vget);
-EXPORT_SYMBOL(zfs_prune);
-#endif

diff --git a/zfs/module/zfs/zfs_vnops.c b/zfs/module/zfs/zfs_vnops.c
index af45d10..0987fd0 100644
--- a/zfs/module/zfs/zfs_vnops.c
+++ b/zfs/module/zfs/zfs_vnops.c

@@ -29,20 +29,15 @@
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2010 Robert Milkowski */
 
-
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/vfs.h>
+#include <sys/uio_impl.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
-#include <sys/taskq.h>
-#include <sys/uio.h>
-#include <sys/vmsystm.h>
-#include <sys/atomic.h>
-#include <sys/pathname.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/zfs_dir.h>
@@ -54,191 +49,35 @@
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
-#include <sys/zap.h>
-#include <sys/sa.h>
 #include <sys/policy.h>
-#include <sys/sunddi.h>
-#include <sys/sid.h>
-#include <sys/mode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_fuid.h>
-#include <sys/zfs_sa.h>
 #include <sys/zfs_vnops.h>
-#include <sys/zfs_rlock.h>
-#include <sys/cred.h>
-#include <sys/zpl.h>
-#include <sys/zil.h>
-#include <sys/sa_impl.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
 
-/*
- * Programming rules.
- *
- * Each vnode op performs some logical unit of work.  To do this, the ZPL must
- * properly lock its in-core state, create a DMU transaction, do the work,
- * record this work in the intent log (ZIL), commit the DMU transaction,
- * and wait for the intent log to commit if it is a synchronous operation.
- * Moreover, the vnode ops must work in both normal and log replay context.
- * The ordering of events is important to avoid deadlocks and references
- * to freed memory.  The example below illustrates the following Big Rules:
- *
- *  (1) A check must be made in each zfs thread for a mounted file system.
- *	This is done avoiding races using ZFS_ENTER(zfsvfs).
- *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
- *      can return EIO from the calling function.
- *
- *  (2)	iput() should always be the last thing except for zil_commit()
- *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
- *	First, if it's the last reference, the vnode/znode
- *	can be freed, so the zp may point to freed memory.  Second, the last
- *	reference will call zfs_zinactive(), which may induce a lot of work --
- *	pushing cached pages (which acquires range locks) and syncing out
- *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
- *	which could deadlock the system if you were already holding one.
- *	If you must call iput() within a tx then use zfs_iput_async().
- *
- *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
- *	as they can span dmu_tx_assign() calls.
- *
- *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
- *      dmu_tx_assign().  This is critical because we don't want to block
- *      while holding locks.
- *
- *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
- *	reduces lock contention and CPU usage when we must wait (note that if
- *	throughput is constrained by the storage, nearly every transaction
- *	must wait).
- *
- *      Note, in particular, that if a lock is sometimes acquired before
- *      the tx assigns, and sometimes after (e.g. z_lock), then failing
- *      to use a non-blocking assign can deadlock the system.  The scenario:
- *
- *	Thread A has grabbed a lock before calling dmu_tx_assign().
- *	Thread B is in an already-assigned tx, and blocks for this lock.
- *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
- *	forever, because the previous txg can't quiesce until B's tx commits.
- *
- *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
- *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
- *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
- *	to indicate that this operation has already called dmu_tx_wait().
- *	This will ensure that we don't retry forever, waiting a short bit
- *	each time.
- *
- *  (5)	If the operation succeeded, generate the intent log entry for it
- *	before dropping locks.  This ensures that the ordering of events
- *	in the intent log matches the order in which they actually occurred.
- *	During ZIL replay the zfs_log_* functions will update the sequence
- *	number to indicate the zil transaction has replayed.
- *
- *  (6)	At the end of each vnode op, the DMU tx must always commit,
- *	regardless of whether there were any errors.
- *
- *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
- *	to ensure that synchronous semantics are provided when necessary.
- *
- * In general, this is how things should be ordered in each vnode op:
- *
- *	ZFS_ENTER(zfsvfs);		// exit if unmounted
- * top:
- *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
- *	rw_enter(...);			// grab any other locks you need
- *	tx = dmu_tx_create(...);	// get DMU tx
- *	dmu_tx_hold_*();		// hold each object you might modify
- *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
- *	if (error) {
- *		rw_exit(...);		// drop locks
- *		zfs_dirent_unlock(dl);	// unlock directory entry
- *		iput(...);		// release held vnodes
- *		if (error == ERESTART) {
- *			waited = B_TRUE;
- *			dmu_tx_wait(tx);
- *			dmu_tx_abort(tx);
- *			goto top;
- *		}
- *		dmu_tx_abort(tx);	// abort DMU tx
- *		ZFS_EXIT(zfsvfs);	// finished in zfs
- *		return (error);		// really out of space
- *	}
- *	error = do_real_work();		// do whatever this VOP does
- *	if (error == 0)
- *		zfs_log_*(...);		// on success, make ZIL entry
- *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
- *	rw_exit(...);			// drop locks
- *	zfs_dirent_unlock(dl);		// unlock directory entry
- *	iput(...);			// release held vnodes
- *	zil_commit(zilog, foid);	// synchronous when necessary
- *	ZFS_EXIT(zfsvfs);		// finished in zfs
- *	return (error);			// done, report error
- */
 
-/*
- * Virus scanning is unsupported.  It would be possible to add a hook
- * here to performance the required virus scan.  This could be done
- * entirely in the kernel or potentially as an update to invoke a
- * scanning utility.
- */
-static int
-zfs_vscan(struct inode *ip, cred_t *cr, int async)
-{
-	return (0);
-}
+static ulong_t zfs_fsync_sync_cnt = 4;
 
-/* ARGSUSED */
 int
-zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
+zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
 {
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
 
-	/* Honor ZFS_APPENDONLY file attribute */
-	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
-	    ((flag & O_APPEND) == 0)) {
+	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
+		ZFS_ENTER(zfsvfs);
+		ZFS_VERIFY_ZP(zp);
+		atomic_inc_32(&zp->z_sync_writes_cnt);
+		zil_commit(zfsvfs->z_log, zp->z_id);
+		atomic_dec_32(&zp->z_sync_writes_cnt);
 		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
 	}
+	tsd_set(zfs_fsyncer_key, NULL);
 
-	/* Virus scan eligible files on open */
-	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
-	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
-		if (zfs_vscan(ip, cr, 0) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EACCES));
-		}
-	}
-
-	/* Keep a count of the synchronous opens in the znode */
-	if (flag & O_SYNC)
-		atomic_inc_32(&zp->z_sync_cnt);
-
-	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
-/* ARGSUSED */
-int
-zfs_close(struct inode *ip, int flag, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	/* Decrement the synchronous opens in the znode */
-	if (flag & O_SYNC)
-		atomic_dec_32(&zp->z_sync_cnt);
-
-	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
-	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
-		VERIFY(zfs_vscan(ip, cr, 1) == 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
 
 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
 /*
@@ -246,9 +85,9 @@
  * data (cmd == SEEK_DATA). "off" is an in/out parameter.
  */
 static int
-zfs_holey_common(struct inode *ip, int cmd, loff_t *off)
+zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
 {
-	znode_t	*zp = ITOZ(ip);
+	zfs_locked_range_t *lr;
 	uint64_t noff = (uint64_t)*off; /* new offset */
 	uint64_t file_sz;
 	int error;
@@ -259,17 +98,23 @@
 		return (SET_ERROR(ENXIO));
 	}
 
-	if (cmd == SEEK_HOLE)
+	if (cmd == F_SEEK_HOLE)
 		hole = B_TRUE;
 	else
 		hole = B_FALSE;
 
+	/* Flush any mmap()'d data to disk */
+	if (zn_has_cached_data(zp, 0, file_sz - 1))
+		zn_flush_cached_data(zp, B_FALSE);
+
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
 	error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
+	zfs_rangelock_exit(lr);
 
 	if (error == ESRCH)
 		return (SET_ERROR(ENXIO));
 
-	/* file was dirty, so fall back to using generic logic */
+	/* File was dirty, so fall back to using generic logic */
 	if (error == EBUSY) {
 		if (hole)
 			*off = file_sz;
@@ -296,134 +141,49 @@
 }
 
 int
-zfs_holey(struct inode *ip, int cmd, loff_t *off)
+zfs_holey(znode_t *zp, ulong_t cmd, loff_t *off)
 {
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
-	error = zfs_holey_common(ip, cmd, off);
+	error = zfs_holey_common(zp, cmd, off);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 #endif /* SEEK_HOLE && SEEK_DATA */
 
-#if defined(_KERNEL)
-/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Write:	If we find a memory mapped page, we write to *both*
- *		the page and the dmu buffer.
- */
-static void
-update_pages(struct inode *ip, int64_t start, int len,
-    objset_t *os, uint64_t oid)
+/*ARGSUSED*/
+int
+zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
 {
-	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	uint64_t nbytes;
-	int64_t	off;
-	void *pb;
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int error;
 
-	off = start & (PAGE_SIZE-1);
-	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		nbytes = MIN(PAGE_SIZE - off, len);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
-		if (pp) {
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
+	if (flag & V_ACE_MASK)
+		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
+	else
+		error = zfs_zaccess_rwx(zp, mode, flag, cr);
 
-			pb = kmap(pp);
-			(void) dmu_read(os, oid, start+off, nbytes, pb+off,
-			    DMU_READ_PREFETCH);
-			kunmap(pp);
-
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
-
-			mark_page_accessed(pp);
-			SetPageUptodate(pp);
-			ClearPageError(pp);
-			unlock_page(pp);
-			put_page(pp);
-		}
-
-		len -= nbytes;
-		off = 0;
-	}
-}
-
-/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Read:	We "read" preferentially from memory mapped pages,
- *		else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- *	 the file is memory mapped.
- */
-static int
-mappedread(struct inode *ip, int nbytes, uio_t *uio)
-{
-	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	znode_t *zp = ITOZ(ip);
-	int64_t	start, off;
-	uint64_t bytes;
-	int len = nbytes;
-	int error = 0;
-	void *pb;
-
-	start = uio->uio_loffset;
-	off = start & (PAGE_SIZE-1);
-	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		bytes = MIN(PAGE_SIZE - off, len);
-
-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
-		if (pp) {
-			ASSERT(PageUptodate(pp));
-			unlock_page(pp);
-
-			pb = kmap(pp);
-			error = uiomove(pb + off, bytes, UIO_READ, uio);
-			kunmap(pp);
-
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
-
-			mark_page_accessed(pp);
-			put_page(pp);
-		} else {
-			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
-			    uio, bytes);
-		}
-
-		len -= bytes;
-		off = 0;
-		if (error)
-			break;
-	}
+	ZFS_EXIT(zfsvfs);
 	return (error);
 }
-#endif /* _KERNEL */
 
-unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
-unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
+static unsigned long zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
 
 /*
  * Read bytes from specified file into supplied buffer.
  *
- *	IN:	ip	- inode of file to be read from.
+ *	IN:	zp	- inode of file to be read from.
  *		uio	- structure supplying read location, range info,
  *			  and return buffer.
- *		ioflag	- FSYNC flags; used to provide FRSYNC semantics.
+ *		ioflag	- O_SYNC flags; used to provide FRSYNC semantics.
  *			  O_DIRECT flag; used to bypass page cache.
  *		cr	- credentials of caller.
  *
@@ -436,13 +196,12 @@
  */
 /* ARGSUSED */
 int
-zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
+zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 {
 	int error = 0;
 	boolean_t frsync = B_FALSE;
 
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
@@ -451,10 +210,16 @@
 		return (SET_ERROR(EACCES));
 	}
 
+	/* We don't copy out anything useful for directories. */
+	if (Z_ISDIR(ZTOTYPE(zp))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EISDIR));
+	}
+
 	/*
 	 * Validate file offset
 	 */
-	if (uio->uio_loffset < (offset_t)0) {
+	if (zfs_uio_offset(uio) < (offset_t)0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
@@ -462,7 +227,7 @@
 	/*
 	 * Fasttrack empty reads
 	 */
-	if (uio->uio_resid == 0) {
+	if (zfs_uio_resid(uio) == 0) {
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
@@ -473,7 +238,7 @@
 	 * Only do this for non-snapshots.
 	 *
 	 * Some platforms do not support FRSYNC and instead map it
-	 * to FSYNC, which results in unnecessary calls to zil_commit. We
+	 * to O_SYNC, which results in unnecessary calls to zil_commit. We
 	 * only honor FRSYNC requests on platforms which support it.
 	 */
 	frsync = !!(ioflag & FRSYNC);
@@ -486,59 +251,35 @@
 	 * Lock the range against changes.
 	 */
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
-	    uio->uio_loffset, uio->uio_resid, RL_READER);
+	    zfs_uio_offset(uio), zfs_uio_resid(uio), RL_READER);
 
 	/*
 	 * If we are reading past end-of-file we can skip
 	 * to the end; but we might still need to set atime.
 	 */
-	if (uio->uio_loffset >= zp->z_size) {
+	if (zfs_uio_offset(uio) >= zp->z_size) {
 		error = 0;
 		goto out;
 	}
 
-	ASSERT(uio->uio_loffset < zp->z_size);
-	ssize_t n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
+	ASSERT(zfs_uio_offset(uio) < zp->z_size);
+#if defined(__linux__)
+	ssize_t start_offset = zfs_uio_offset(uio);
+#endif
+	ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio));
 	ssize_t start_resid = n;
 
-#ifdef HAVE_UIO_ZEROCOPY
-	xuio_t *xuio = NULL;
-	if ((uio->uio_extflg == UIO_XUIO) &&
-	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
-		int nblk;
-		int blksz = zp->z_blksz;
-		uint64_t offset = uio->uio_loffset;
-
-		xuio = (xuio_t *)uio;
-		if ((ISP2(blksz))) {
-			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
-			    blksz)) / blksz;
-		} else {
-			ASSERT(offset + n <= blksz);
-			nblk = 1;
-		}
-		(void) dmu_xuio_init(xuio, nblk);
-
-		if (vn_has_cached_data(ip)) {
-			/*
-			 * For simplicity, we always allocate a full buffer
-			 * even if we only expect to read a portion of a block.
-			 */
-			while (--nblk >= 0) {
-				(void) dmu_xuio_add(xuio,
-				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-				    blksz), 0, blksz);
-			}
-		}
-	}
-#endif /* HAVE_UIO_ZEROCOPY */
-
 	while (n > 0) {
-		ssize_t nbytes = MIN(n, zfs_read_chunk_size -
-		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
-
-		if (zp->z_is_mapped && !(ioflag & O_DIRECT)) {
-			error = mappedread(ip, nbytes, uio);
+		ssize_t nbytes = MIN(n, zfs_vnops_read_chunk_size -
+		    P2PHASE(zfs_uio_offset(uio), zfs_vnops_read_chunk_size));
+#ifdef UIO_NOCOPY
+		if (zfs_uio_segflg(uio) == UIO_NOCOPY)
+			error = mappedread_sf(zp, nbytes, uio);
+		else
+#endif
+		if (zn_has_cached_data(zp, zfs_uio_offset(uio),
+		    zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) {
+			error = mappedread(zp, nbytes, uio);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes);
@@ -548,6 +289,18 @@
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
 				error = SET_ERROR(EIO);
+
+#if defined(__linux__)
+			/*
+			 * if we actually read some bytes, bubbling EFAULT
+			 * up to become EAGAIN isn't what we want here...
+			 *
+			 * ...on Linux, at least. On FBSD, doing this breaks.
+			 */
+			if (error == EFAULT &&
+			    (zfs_uio_offset(uio) - start_offset) != 0)
+				error = 0;
+#endif
 			break;
 		}
 
@@ -560,17 +313,74 @@
 out:
 	zfs_rangelock_exit(lr);
 
+	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
+static void
+zfs_clear_setid_bits_if_necessary(zfsvfs_t *zfsvfs, znode_t *zp, cred_t *cr,
+    uint64_t *clear_setid_bits_txgp, dmu_tx_t *tx)
+{
+	zilog_t *zilog = zfsvfs->z_log;
+	const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
+
+	ASSERT(clear_setid_bits_txgp != NULL);
+	ASSERT(tx != NULL);
+
+	/*
+	 * Clear Set-UID/Set-GID bits on successful write if not
+	 * privileged and at least one of the execute bits is set.
+	 *
+	 * It would be nice to do this after all writes have
+	 * been done, but that would still expose the ISUID/ISGID
+	 * to another app after the partial write is committed.
+	 *
+	 * Note: we don't call zfs_fuid_map_id() here because
+	 * user 0 is not an ephemeral uid.
+	 */
+	mutex_enter(&zp->z_acl_lock);
+	if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 &&
+	    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
+	    secpolicy_vnode_setid_retain(zp, cr,
+	    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
+		uint64_t newmode;
+
+		zp->z_mode &= ~(S_ISUID | S_ISGID);
+		newmode = zp->z_mode;
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
+		    (void *)&newmode, sizeof (uint64_t), tx);
+
+		mutex_exit(&zp->z_acl_lock);
+
+		/*
+		 * Make sure SUID/SGID bits will be removed when we replay the
+		 * log. If the setid bits are keep coming back, don't log more
+		 * than one TX_SETATTR per transaction group.
+		 */
+		if (*clear_setid_bits_txgp != dmu_tx_get_txg(tx)) {
+			vattr_t va;
+
+			bzero(&va, sizeof (va));
+			va.va_mask = AT_MODE;
+			va.va_nodeid = zp->z_id;
+			va.va_mode = newmode;
+			zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va, AT_MODE,
+			    NULL);
+			*clear_setid_bits_txgp = dmu_tx_get_txg(tx);
+		}
+	} else {
+		mutex_exit(&zp->z_acl_lock);
+	}
+}
+
 /*
  * Write the bytes to a file.
  *
- *	IN:	ip	- inode of file to be written to.
+ *	IN:	zp	- znode of file to be written to.
  *		uio	- structure supplying write location, range info,
  *			  and data buffer.
- *		ioflag	- FAPPEND flag set if in append mode.
+ *		ioflag	- O_APPEND flag set if in append mode.
  *			  O_DIRECT flag; used to bypass page cache.
  *		cr	- credentials of caller.
  *
@@ -585,10 +395,11 @@
 
 /* ARGSUSED */
 int
-zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
+zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 {
-	int error = 0;
-	ssize_t start_resid = uio->uio_resid;
+	int error = 0, error1;
+	ssize_t start_resid = zfs_uio_resid(uio);
+	uint64_t clear_setid_bits_txg = 0;
 
 	/*
 	 * Fasttrack empty write
@@ -597,11 +408,6 @@
 	if (n == 0)
 		return (0);
 
-	rlim64_t limit = uio->uio_limit;
-	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
-		limit = MAXOFFSET_T;
-
-	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
@@ -626,11 +432,13 @@
 	}
 
 	/*
-	 * If immutable or not appending then return EPERM
+	 * If immutable or not appending then return EPERM.
+	 * Intentionally allow ZFS_READONLY through here.
+	 * See zfs_zaccess_common()
 	 */
-	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
-	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
-	    (uio->uio_loffset < zp->z_size))) {
+	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
+	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
+	    (zfs_uio_offset(uio) < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
@@ -638,36 +446,29 @@
 	/*
 	 * Validate file offset
 	 */
-	offset_t woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
+	offset_t woff = ioflag & O_APPEND ? zp->z_size : zfs_uio_offset(uio);
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
-	int max_blksz = zfsvfs->z_max_blksz;
-	xuio_t *xuio = NULL;
+	const uint64_t max_blksz = zfsvfs->z_max_blksz;
 
 	/*
 	 * Pre-fault the pages to ensure slow (eg NFS) pages
 	 * don't hold up txg.
 	 * Skip this if uio contains loaned arc_buf.
 	 */
-#ifdef HAVE_UIO_ZEROCOPY
-	if ((uio->uio_extflg == UIO_XUIO) &&
-	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
-		xuio = (xuio_t *)uio;
-	else
-#endif
-		if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EFAULT));
-		}
+	if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EFAULT));
+	}
 
 	/*
 	 * If in append mode, set the io offset pointer to eof.
 	 */
 	zfs_locked_range_t *lr;
-	if (ioflag & FAPPEND) {
+	if (ioflag & O_APPEND) {
 		/*
 		 * Obtain an appending range lock to guarantee file append
 		 * semantics.  We reset the write offset once we have the lock.
@@ -682,7 +483,7 @@
 			 */
 			woff = zp->z_size;
 		}
-		uio->uio_loffset = woff;
+		zfs_uio_setoffset(uio, woff);
 	} else {
 		/*
 		 * Note that if the file block size will change as a result of
@@ -692,26 +493,29 @@
 		lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
 	}
 
+	if (zn_rlimit_fsize(zp, uio)) {
+		zfs_rangelock_exit(lr);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EFBIG));
+	}
+
+	const rlim64_t limit = MAXOFFSET_T;
+
 	if (woff >= limit) {
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EFBIG));
 	}
 
-	if ((woff + n) > limit || woff > (limit - n))
+	if (n > limit - woff)
 		n = limit - woff;
 
-	/* Will this write extend the file length? */
-	int write_eof = (woff + n > zp->z_size);
-
 	uint64_t end_size = MAX(zp->z_size, woff + n);
 	zilog_t *zilog = zfsvfs->z_log;
-#ifdef HAVE_UIO_ZEROCOPY
-	int		i_iov = 0;
-	const iovec_t	*iovp = uio->uio_iov;
-	ASSERTV(int	iovcnt = uio->uio_iovcnt);
-#endif
 
+	const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
+	const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
+	const uint64_t projid = zp->z_projid;
 
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
@@ -719,34 +523,19 @@
 	 * and allows us to do more fine-grained space accounting.
 	 */
 	while (n > 0) {
-		woff = uio->uio_loffset;
+		woff = zfs_uio_offset(uio);
 
-		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
-		    KUID_TO_SUID(ip->i_uid)) ||
-		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
-		    KGID_TO_SGID(ip->i_gid)) ||
-		    (zp->z_projid != ZFS_DEFAULT_PROJID &&
+		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, uid) ||
+		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, gid) ||
+		    (projid != ZFS_DEFAULT_PROJID &&
 		    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
-		    zp->z_projid))) {
+		    projid))) {
 			error = SET_ERROR(EDQUOT);
 			break;
 		}
 
 		arc_buf_t *abuf = NULL;
-		const iovec_t *aiov = NULL;
-		if (xuio) {
-#ifdef HAVE_UIO_ZEROCOPY
-			ASSERT(i_iov < iovcnt);
-			ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
-			aiov = &iovp[i_iov];
-			abuf = dmu_xuio_arcbuf(xuio, i_iov);
-			dmu_xuio_clear(xuio, i_iov);
-			ASSERT((aiov->iov_base == abuf->b_data) ||
-			    ((char *)aiov->iov_base - (char *)abuf->b_data +
-			    aiov->iov_len == arc_buf_size(abuf)));
-			i_iov++;
-#endif
-		} else if (n >= max_blksz && woff >= zp->z_size &&
+		if (n >= max_blksz && woff >= zp->z_size &&
 		    P2PHASE(woff, max_blksz) == 0 &&
 		    zp->z_blksz == max_blksz) {
 			/*
@@ -762,12 +551,12 @@
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
-			if ((error = uiocopy(abuf->b_data, max_blksz,
+			if ((error = zfs_uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes))) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
-			ASSERT(cbytes == max_blksz);
+			ASSERT3S(cbytes, ==, max_blksz);
 		}
 
 		/*
@@ -790,6 +579,11 @@
 		}
 
 		/*
+		 * NB: We must call zfs_clear_setid_bits_if_necessary before
+		 * committing the transaction!
+		 */
+
+		/*
 		 * If rangelock_enter() over-locked we grow the blocksize
 		 * and then reduce the lock range.  This will only happen
 		 * on the first iteration since rangelock_reduce() will
@@ -818,68 +612,87 @@
 		 * XXX - should we really limit each write to z_max_blksz?
 		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
 		 */
-		ssize_t nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
+		const ssize_t nbytes =
+		    MIN(n, max_blksz - P2PHASE(woff, max_blksz));
 
 		ssize_t tx_bytes;
 		if (abuf == NULL) {
-			tx_bytes = uio->uio_resid;
-			uio->uio_fault_disable = B_TRUE;
+			tx_bytes = zfs_uio_resid(uio);
+			zfs_uio_fault_disable(uio, B_TRUE);
 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes, tx);
-			uio->uio_fault_disable = B_FALSE;
+			zfs_uio_fault_disable(uio, B_FALSE);
+#ifdef __linux__
 			if (error == EFAULT) {
+				zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
+				    cr, &clear_setid_bits_txg, tx);
 				dmu_tx_commit(tx);
 				/*
 				 * Account for partial writes before
 				 * continuing the loop.
 				 * Update needs to occur before the next
-				 * uio_prefaultpages, or prefaultpages may
+				 * zfs_uio_prefaultpages, or prefaultpages may
 				 * error, and we may break the loop early.
 				 */
-				if (tx_bytes != uio->uio_resid)
-					n -= tx_bytes - uio->uio_resid;
-				if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+				if (tx_bytes != zfs_uio_resid(uio))
+					n -= tx_bytes - zfs_uio_resid(uio);
+				if (zfs_uio_prefaultpages(MIN(n, max_blksz),
+				    uio)) {
 					break;
 				}
 				continue;
-			} else if (error != 0) {
+			}
+#endif
+			/*
+			 * On FreeBSD, EFAULT should be propagated back to the
+			 * VFS, which will handle faulting and will retry.
+			 */
+			if (error != 0 && error != EFAULT) {
+				zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
+				    cr, &clear_setid_bits_txg, tx);
 				dmu_tx_commit(tx);
 				break;
 			}
-			tx_bytes -= uio->uio_resid;
+			tx_bytes -= zfs_uio_resid(uio);
 		} else {
-			tx_bytes = nbytes;
-			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
+			/* Implied by abuf != NULL: */
+			ASSERT3S(n, >=, max_blksz);
+			ASSERT0(P2PHASE(woff, max_blksz));
 			/*
-			 * If this is not a full block write, but we are
-			 * extending the file past EOF and this data starts
-			 * block-aligned, use assign_arcbuf().  Otherwise,
-			 * write via dmu_write().
+			 * We can simplify nbytes to MIN(n, max_blksz) since
+			 * P2PHASE(woff, max_blksz) is 0, and knowing
+			 * n >= max_blksz lets us simplify further:
 			 */
-			if (tx_bytes < max_blksz && (!write_eof ||
-			    aiov->iov_base != abuf->b_data)) {
-				ASSERT(xuio);
-				dmu_write(zfsvfs->z_os, zp->z_id, woff,
-				    /* cppcheck-suppress nullPointer */
-				    aiov->iov_len, aiov->iov_base, tx);
+			ASSERT3S(nbytes, ==, max_blksz);
+			/*
+			 * Thus, we're writing a full block at a block-aligned
+			 * offset and extending the file past EOF.
+			 *
+			 * dmu_assign_arcbuf_by_dbuf() will directly assign the
+			 * arc buffer to a dbuf.
+			 */
+			error = dmu_assign_arcbuf_by_dbuf(
+			    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
+			if (error != 0) {
+				/*
+				 * XXX This might not be necessary if
+				 * dmu_assign_arcbuf_by_dbuf is guaranteed
+				 * to be atomic.
+				 */
+				zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
+				    cr, &clear_setid_bits_txg, tx);
 				dmu_return_arcbuf(abuf);
-				xuio_stat_wbuf_copied();
-			} else {
-				ASSERT(xuio || tx_bytes == max_blksz);
-				error = dmu_assign_arcbuf_by_dbuf(
-				    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
-				if (error != 0) {
-					dmu_return_arcbuf(abuf);
-					dmu_tx_commit(tx);
-					break;
-				}
+				dmu_tx_commit(tx);
+				break;
 			}
-			ASSERT(tx_bytes <= uio->uio_resid);
-			uioskip(uio, tx_bytes);
+			ASSERT3S(nbytes, <=, zfs_uio_resid(uio));
+			zfs_uioskip(uio, nbytes);
+			tx_bytes = nbytes;
 		}
-		if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) {
-			update_pages(ip, woff,
-			    tx_bytes, zfsvfs->z_os, zp->z_id);
+		if (tx_bytes &&
+		    zn_has_cached_data(zp, woff, woff + tx_bytes - 1) &&
+		    !(ioflag & O_DIRECT)) {
+			update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
 		}
 
 		/*
@@ -894,31 +707,8 @@
 			break;
 		}
 
-		/*
-		 * Clear Set-UID/Set-GID bits on successful write if not
-		 * privileged and at least one of the execute bits is set.
-		 *
-		 * It would be nice to do this after all writes have
-		 * been done, but that would still expose the ISUID/ISGID
-		 * to another app after the partial write is committed.
-		 *
-		 * Note: we don't call zfs_fuid_map_id() here because
-		 * user 0 is not an ephemeral uid.
-		 */
-		mutex_enter(&zp->z_acl_lock);
-		uint32_t uid = KUID_TO_SUID(ip->i_uid);
-		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
-		    (S_IXUSR >> 6))) != 0 &&
-		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
-		    secpolicy_vnode_setid_retain(cr,
-		    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
-			uint64_t newmode;
-			zp->z_mode &= ~(S_ISUID | S_ISGID);
-			ip->i_mode = newmode = zp->z_mode;
-			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
-			    (void *)&newmode, sizeof (uint64_t), tx);
-		}
-		mutex_exit(&zp->z_acl_lock);
+		zfs_clear_setid_bits_if_necessary(zfsvfs, zp, cr,
+		    &clear_setid_bits_txg, tx);
 
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 
@@ -926,10 +716,10 @@
 		 * Update the file size (zp_size) if it has changed;
 		 * account for possible concurrent updates.
 		 */
-		while ((end_size = zp->z_size) < uio->uio_loffset) {
+		while ((end_size = zp->z_size) < zfs_uio_offset(uio)) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
-			    uio->uio_loffset);
-			ASSERT(error == 0);
+			    zfs_uio_offset(uio));
+			ASSERT(error == 0 || error == EFAULT);
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
@@ -939,42 +729,53 @@
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		error1 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		if (error1 != 0)
+			/* Avoid clobbering EFAULT. */
+			error = error1;
 
+		/*
+		 * NB: During replay, the TX_SETATTR record logged by
+		 * zfs_clear_setid_bits_if_necessary must precede any of
+		 * the TX_WRITE records logged here.
+		 */
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
 		    NULL, NULL);
+
 		dmu_tx_commit(tx);
 
 		if (error != 0)
 			break;
-		ASSERT(tx_bytes == nbytes);
+		ASSERT3S(tx_bytes, ==, nbytes);
 		n -= nbytes;
 
-		if (!xuio && n > 0) {
-			if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
-				error = EFAULT;
+		if (n > 0) {
+			if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
+				error = SET_ERROR(EFAULT);
 				break;
 			}
 		}
 	}
 
-	zfs_inode_update(zp);
+	zfs_znode_update_vfs(zp);
 	zfs_rangelock_exit(lr);
 
 	/*
-	 * If we're in replay mode, or we made no progress, return error.
-	 * Otherwise, it's at least a partial write, so it's successful.
+	 * If we're in replay mode, or we made no progress, or the
+	 * uio data is inaccessible return an error.  Otherwise, it's
+	 * at least a partial write, so it's successful.
 	 */
-	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
+	if (zfsvfs->z_replay || zfs_uio_resid(uio) == start_resid ||
+	    error == EFAULT) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
-	if (ioflag & (FSYNC | FDSYNC) ||
+	if (ioflag & (O_SYNC | O_DSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
 
-	int64_t nwritten = start_resid - uio->uio_resid;
+	const int64_t nwritten = start_resid - zfs_uio_resid(uio);
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
 	task_io_account_write(nwritten);
 
@@ -982,56 +783,55 @@
 	return (0);
 }
 
-/*
- * Drop a reference on the passed inode asynchronously. This ensures
- * that the caller will never drop the last reference on an inode in
- * the current context. Doing so while holding open a tx could result
- * in a deadlock if iput_final() re-enters the filesystem code.
- */
-void
-zfs_iput_async(struct inode *ip)
+/*ARGSUSED*/
+int
+zfs_getsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
-	objset_t *os = ITOZSB(ip)->z_os;
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int error;
+	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
-	ASSERT(atomic_read(&ip->i_count) > 0);
-	ASSERT(os != NULL);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
+	ZFS_EXIT(zfsvfs);
 
-	if (atomic_read(&ip->i_count) == 1)
-		VERIFY(taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)),
-		    (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
-	else
-		iput(ip);
+	return (error);
 }
 
-/* ARGSUSED */
-void
-zfs_get_done(zgd_t *zgd, int error)
+/*ARGSUSED*/
+int
+zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
-	znode_t *zp = zgd->zgd_private;
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int error;
+	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	zilog_t	*zilog = zfsvfs->z_log;
 
-	if (zgd->zgd_db)
-		dmu_buf_rele(zgd->zgd_db, zgd);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
-	zfs_rangelock_exit(zgd->zgd_lr);
+	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 
-	/*
-	 * Release the vnode asynchronously as we currently have the
-	 * txg stopped from syncing.
-	 */
-	zfs_iput_async(ZTOI(zp));
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
 
-	kmem_free(zgd, sizeof (zgd_t));
+	ZFS_EXIT(zfsvfs);
+	return (error);
 }
 
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 static int zil_fault_io = 0;
 #endif
 
+static void zfs_get_done(zgd_t *zgd, int error);
+
 /*
  * Get data to generate a TX_WRITE intent log record.
  */
 int
-zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
+zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio)
 {
 	zfsvfs_t *zfsvfs = arg;
 	objset_t *os = zfsvfs->z_os;
@@ -1042,6 +842,7 @@
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error = 0;
+	uint64_t zp_gen;
 
 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
@@ -1057,11 +858,21 @@
 		 * Release the vnode asynchronously as we currently have the
 		 * txg stopped from syncing.
 		 */
-		zfs_iput_async(ZTOI(zp));
+		zfs_zrele_async(zp);
+		return (SET_ERROR(ENOENT));
+	}
+	/* check if generation number matches */
+	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+	    sizeof (zp_gen)) != 0) {
+		zfs_zrele_async(zp);
+		return (SET_ERROR(EIO));
+	}
+	if (zp_gen != gen) {
+		zfs_zrele_async(zp);
 		return (SET_ERROR(ENOENT));
 	}
 
-	zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
+	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
 	zgd->zgd_lwb = lwb;
 	zgd->zgd_private = zp;
 
@@ -1105,7 +916,7 @@
 		/* test for truncation needs to be done while range locked */
 		if (lr->lr_offset >= zp->z_size)
 			error = SET_ERROR(ENOENT);
-#ifdef DEBUG
+#ifdef ZFS_DEBUG
 		if (zil_fault_io) {
 			error = SET_ERROR(EIO);
 			zil_fault_io = 0;
@@ -1157,4128 +968,34 @@
 	return (error);
 }
 
-/*ARGSUSED*/
-int
-zfs_access(struct inode *ip, int mode, int flag, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if (flag & V_ACE_MASK)
-		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
-	else
-		error = zfs_zaccess_rwx(zp, mode, flag, cr);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Lookup an entry in a directory, or an extended attribute directory.
- * If it exists, return a held inode reference for it.
- *
- *	IN:	dip	- inode of directory to search.
- *		nm	- name of entry to lookup.
- *		flags	- LOOKUP_XATTR set if looking for an attribute.
- *		cr	- credentials of caller.
- *		direntflags - directory lookup flags
- *		realpnp - returned pathname.
- *
- *	OUT:	ipp	- inode of located entry, NULL if not found.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	NA
- */
-/* ARGSUSED */
-int
-zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
-    cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	znode_t *zdp = ITOZ(dip);
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	int error = 0;
-
-	/*
-	 * Fast path lookup, however we must skip DNLC lookup
-	 * for case folding or normalizing lookups because the
-	 * DNLC code only stores the passed in name.  This means
-	 * creating 'a' and removing 'A' on a case insensitive
-	 * file system would work, but DNLC still thinks 'a'
-	 * exists and won't let you create it again on the next
-	 * pass through fast path.
-	 */
-	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
-
-		if (!S_ISDIR(dip->i_mode)) {
-			return (SET_ERROR(ENOTDIR));
-		} else if (zdp->z_sa_hdl == NULL) {
-			return (SET_ERROR(EIO));
-		}
-
-		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
-			error = zfs_fastaccesschk_execute(zdp, cr);
-			if (!error) {
-				*ipp = dip;
-				igrab(*ipp);
-				return (0);
-			}
-			return (error);
-		}
-	}
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zdp);
-
-	*ipp = NULL;
-
-	if (flags & LOOKUP_XATTR) {
-		/*
-		 * We don't allow recursive attributes..
-		 * Maybe someday we will.
-		 */
-		if (zdp->z_pflags & ZFS_XATTR) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-
-		if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-
-		/*
-		 * Do we have permission to get into attribute directory?
-		 */
-
-		if ((error = zfs_zaccess(ITOZ(*ipp), ACE_EXECUTE, 0,
-		    B_FALSE, cr))) {
-			iput(*ipp);
-			*ipp = NULL;
-		}
-
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (!S_ISDIR(dip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOTDIR));
-	}
-
-	/*
-	 * Check accessibility of directory.
-	 */
-
-	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
-	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-
-	error = zfs_dirlook(zdp, nm, ipp, flags, direntflags, realpnp);
-	if ((error == 0) && (*ipp))
-		zfs_inode_update(ITOZ(*ipp));
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Attempt to create a new entry in a directory.  If the entry
- * already exists, truncate the file if permissible, else return
- * an error.  Return the ip of the created or trunc'd file.
- *
- *	IN:	dip	- inode of directory to put new file entry in.
- *		name	- name of new file entry.
- *		vap	- attributes of new file.
- *		excl	- flag indicating exclusive or non-exclusive mode.
- *		mode	- mode to open file with.
- *		cr	- credentials of caller.
- *		flag	- file flag.
- *		vsecp	- ACL to be set
- *
- *	OUT:	ipp	- inode of created or trunc'd entry.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	dip - ctime|mtime updated if new entry created
- *	 ip - ctime|mtime always, atime if new
- */
 
 /* ARGSUSED */
-int
-zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	objset_t	*os;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	int		error;
-	uid_t		uid;
-	gid_t		gid;
-	zfs_acl_ids_t   acl_ids;
-	boolean_t	fuid_dirtied;
-	boolean_t	have_acl = B_FALSE;
-	boolean_t	waited = B_FALSE;
-
-	/*
-	 * If we have an ephemeral id, ACL, or XVATTR then
-	 * make sure file system is at proper version
-	 */
-
-	gid = crgetgid(cr);
-	uid = crgetuid(cr);
-
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (SET_ERROR(EINVAL));
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	os = zfsvfs->z_os;
-	zilog = zfsvfs->z_log;
-
-	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
-	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-
-	if (vap->va_mask & ATTR_XVATTR) {
-		if ((error = secpolicy_xvattr((xvattr_t *)vap,
-		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-top:
-	*ipp = NULL;
-	if (*name == '\0') {
-		/*
-		 * Null component name refers to the directory itself.
-		 */
-		igrab(dip);
-		zp = dzp;
-		dl = NULL;
-		error = 0;
-	} else {
-		/* possible igrab(zp) */
-		int zflg = 0;
-
-		if (flag & FIGNORECASE)
-			zflg |= ZCILOOK;
-
-		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
-		    NULL, NULL);
-		if (error) {
-			if (have_acl)
-				zfs_acl_ids_free(&acl_ids);
-			if (strcmp(name, "..") == 0)
-				error = SET_ERROR(EISDIR);
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-	if (zp == NULL) {
-		uint64_t txtype;
-		uint64_t projid = ZFS_DEFAULT_PROJID;
-
-		/*
-		 * Create a new file object and update the directory
-		 * to reference it.
-		 */
-		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-			if (have_acl)
-				zfs_acl_ids_free(&acl_ids);
-			goto out;
-		}
-
-		/*
-		 * We only support the creation of regular files in
-		 * extended attribute directories.
-		 */
-
-		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
-			if (have_acl)
-				zfs_acl_ids_free(&acl_ids);
-			error = SET_ERROR(EINVAL);
-			goto out;
-		}
-
-		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-		    cr, vsecp, &acl_ids)) != 0)
-			goto out;
-		have_acl = B_TRUE;
-
-		if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
-			projid = zfs_inherit_projid(dzp);
-		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
-			zfs_acl_ids_free(&acl_ids);
-			error = SET_ERROR(EDQUOT);
-			goto out;
-		}
-
-		tx = dmu_tx_create(os);
-
-		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-		    ZFS_SA_BASE_ATTR_SIZE);
-
-		fuid_dirtied = zfsvfs->z_fuid_dirty;
-		if (fuid_dirtied)
-			zfs_fuid_txhold(zfsvfs, tx);
-		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
-		if (!zfsvfs->z_use_sa &&
-		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, acl_ids.z_aclp->z_acl_bytes);
-		}
-
-		error = dmu_tx_assign(tx,
-		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-		if (error) {
-			zfs_dirent_unlock(dl);
-			if (error == ERESTART) {
-				waited = B_TRUE;
-				dmu_tx_wait(tx);
-				dmu_tx_abort(tx);
-				goto top;
-			}
-			zfs_acl_ids_free(&acl_ids);
-			dmu_tx_abort(tx);
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
-
-		error = zfs_link_create(dl, zp, tx, ZNEW);
-		if (error != 0) {
-			/*
-			 * Since, we failed to add the directory entry for it,
-			 * delete the newly created dnode.
-			 */
-			zfs_znode_delete(zp, tx);
-			remove_inode_hash(ZTOI(zp));
-			zfs_acl_ids_free(&acl_ids);
-			dmu_tx_commit(tx);
-			goto out;
-		}
-
-		if (fuid_dirtied)
-			zfs_fuid_sync(zfsvfs, tx);
-
-		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
-		if (flag & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
-		    vsecp, acl_ids.z_fuidp, vap);
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_commit(tx);
-	} else {
-		int aflags = (flag & FAPPEND) ? V_APPEND : 0;
-
-		if (have_acl)
-			zfs_acl_ids_free(&acl_ids);
-		have_acl = B_FALSE;
-
-		/*
-		 * A directory entry already exists for this name.
-		 */
-		/*
-		 * Can't truncate an existing file if in exclusive mode.
-		 */
-		if (excl) {
-			error = SET_ERROR(EEXIST);
-			goto out;
-		}
-		/*
-		 * Can't open a directory for writing.
-		 */
-		if (S_ISDIR(ZTOI(zp)->i_mode)) {
-			error = SET_ERROR(EISDIR);
-			goto out;
-		}
-		/*
-		 * Verify requested access to file.
-		 */
-		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
-			goto out;
-		}
-
-		mutex_enter(&dzp->z_lock);
-		dzp->z_seq++;
-		mutex_exit(&dzp->z_lock);
-
-		/*
-		 * Truncate regular files if requested.
-		 */
-		if (S_ISREG(ZTOI(zp)->i_mode) &&
-		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
-			/* we can't hold any locks when calling zfs_freesp() */
-			if (dl) {
-				zfs_dirent_unlock(dl);
-				dl = NULL;
-			}
-			error = zfs_freesp(zp, 0, 0, mode, TRUE);
-		}
-	}
-out:
-
-	if (dl)
-		zfs_dirent_unlock(dl);
-
-	if (error) {
-		if (zp)
-			iput(ZTOI(zp));
-	} else {
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-		*ipp = ZTOI(zp);
-	}
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/* ARGSUSED */
-int
-zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
-{
-	znode_t		*zp = NULL, *dzp = ITOZ(dip);
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	objset_t	*os;
-	dmu_tx_t	*tx;
-	int		error;
-	uid_t		uid;
-	gid_t		gid;
-	zfs_acl_ids_t   acl_ids;
-	uint64_t	projid = ZFS_DEFAULT_PROJID;
-	boolean_t	fuid_dirtied;
-	boolean_t	have_acl = B_FALSE;
-	boolean_t	waited = B_FALSE;
-
-	/*
-	 * If we have an ephemeral id, ACL, or XVATTR then
-	 * make sure file system is at proper version
-	 */
-
-	gid = crgetgid(cr);
-	uid = crgetuid(cr);
-
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	os = zfsvfs->z_os;
-
-	if (vap->va_mask & ATTR_XVATTR) {
-		if ((error = secpolicy_xvattr((xvattr_t *)vap,
-		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-top:
-	*ipp = NULL;
-
-	/*
-	 * Create a new file object and update the directory
-	 * to reference it.
-	 */
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		if (have_acl)
-			zfs_acl_ids_free(&acl_ids);
-		goto out;
-	}
-
-	if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-	    cr, vsecp, &acl_ids)) != 0)
-		goto out;
-	have_acl = B_TRUE;
-
-	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
-		projid = zfs_inherit_projid(dzp);
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
-		zfs_acl_ids_free(&acl_ids);
-		error = SET_ERROR(EDQUOT);
-		goto out;
-	}
-
-	tx = dmu_tx_create(os);
-
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	if (!zfsvfs->z_use_sa &&
-	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-		    0, acl_ids.z_aclp->z_acl_bytes);
-	}
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	/* Add to unlinked set */
-	zp->z_unlinked = B_TRUE;
-	zfs_unlinked_add(zp, tx);
-	zfs_acl_ids_free(&acl_ids);
-	dmu_tx_commit(tx);
-out:
-
-	if (error) {
-		if (zp)
-			iput(ZTOI(zp));
-	} else {
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-		*ipp = ZTOI(zp);
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Remove an entry from a directory.
- *
- *	IN:	dip	- inode of directory to remove entry from.
- *		name	- name of entry to remove.
- *		cr	- credentials of caller.
- *		flags	- case flags.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	dip - ctime|mtime
- *	 ip - ctime (if nlink > 0)
- */
-
-uint64_t null_xattr = 0;
-
-/*ARGSUSED*/
-int
-zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	znode_t		*xzp;
-	struct inode	*ip;
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	uint64_t	acl_obj, xattr_obj;
-	uint64_t	xattr_obj_unlinked = 0;
-	uint64_t	obj = 0;
-	uint64_t	links;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	boolean_t	may_delete_now, delete_now = FALSE;
-	boolean_t	unlinked, toobig = FALSE;
-	uint64_t	txtype;
-	pathname_t	*realnmp = NULL;
-	pathname_t	realnm;
-	int		error;
-	int		zflg = ZEXISTS;
-	boolean_t	waited = B_FALSE;
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (flags & FIGNORECASE) {
-		zflg |= ZCILOOK;
-		pn_alloc(&realnm);
-		realnmp = &realnm;
-	}
-
-top:
-	xattr_obj = 0;
-	xzp = NULL;
-	/*
-	 * Attempt to lock directory; fail if entry doesn't exist.
-	 */
-	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
-	    NULL, realnmp))) {
-		if (realnmp)
-			pn_free(realnmp);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	ip = ZTOI(zp);
-
-	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
-		goto out;
-	}
-
-	/*
-	 * Need to use rmdir for removing directories.
-	 */
-	if (S_ISDIR(ip->i_mode)) {
-		error = SET_ERROR(EPERM);
-		goto out;
-	}
-
-	mutex_enter(&zp->z_lock);
-	may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped);
-	mutex_exit(&zp->z_lock);
-
-	/*
-	 * We may delete the znode now, or we may put it in the unlinked set;
-	 * it depends on whether we're the last link, and on whether there are
-	 * other holds on the inode.  So we dmu_tx_hold() the right things to
-	 * allow for either case.
-	 */
-	obj = zp->z_id;
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	zfs_sa_upgrade_txholds(tx, dzp);
-	if (may_delete_now) {
-		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
-		/* if the file is too big, only hold_free a token amount */
-		dmu_tx_hold_free(tx, zp->z_id, 0,
-		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
-	}
-
-	/* are there any extended attributes? */
-	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-	    &xattr_obj, sizeof (xattr_obj));
-	if (error == 0 && xattr_obj) {
-		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
-		ASSERT0(error);
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-	}
-
-	mutex_enter(&zp->z_lock);
-	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
-		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
-	mutex_exit(&zp->z_lock);
-
-	/* charge as an update -- would be nice not to charge at all */
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
-	/*
-	 * Mark this transaction as typically resulting in a net free of space
-	 */
-	dmu_tx_mark_netfree(tx);
-
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			iput(ip);
-			if (xzp)
-				iput(ZTOI(xzp));
-			goto top;
-		}
-		if (realnmp)
-			pn_free(realnmp);
-		dmu_tx_abort(tx);
-		iput(ip);
-		if (xzp)
-			iput(ZTOI(xzp));
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * Remove the directory entry.
-	 */
-	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
-
-	if (error) {
-		dmu_tx_commit(tx);
-		goto out;
-	}
-
-	if (unlinked) {
-		/*
-		 * Hold z_lock so that we can make sure that the ACL obj
-		 * hasn't changed.  Could have been deleted due to
-		 * zfs_sa_upgrade().
-		 */
-		mutex_enter(&zp->z_lock);
-		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
-		delete_now = may_delete_now && !toobig &&
-		    atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) &&
-		    xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
-		    acl_obj;
-	}
-
-	if (delete_now) {
-		if (xattr_obj_unlinked) {
-			ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
-			mutex_enter(&xzp->z_lock);
-			xzp->z_unlinked = B_TRUE;
-			clear_nlink(ZTOI(xzp));
-			links = 0;
-			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
-			    &links, sizeof (links), tx);
-			ASSERT3U(error,  ==,  0);
-			mutex_exit(&xzp->z_lock);
-			zfs_unlinked_add(xzp, tx);
-
-			if (zp->z_is_sa)
-				error = sa_remove(zp->z_sa_hdl,
-				    SA_ZPL_XATTR(zfsvfs), tx);
-			else
-				error = sa_update(zp->z_sa_hdl,
-				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
-				    sizeof (uint64_t), tx);
-			ASSERT0(error);
-		}
-		/*
-		 * Add to the unlinked set because a new reference could be
-		 * taken concurrently resulting in a deferred destruction.
-		 */
-		zfs_unlinked_add(zp, tx);
-		mutex_exit(&zp->z_lock);
-	} else if (unlinked) {
-		mutex_exit(&zp->z_lock);
-		zfs_unlinked_add(zp, tx);
-	}
-
-	txtype = TX_REMOVE;
-	if (flags & FIGNORECASE)
-		txtype |= TX_CI;
-	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
-
-	dmu_tx_commit(tx);
-out:
-	if (realnmp)
-		pn_free(realnmp);
-
-	zfs_dirent_unlock(dl);
-	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
-
-	if (delete_now)
-		iput(ip);
-	else
-		zfs_iput_async(ip);
-
-	if (xzp) {
-		zfs_inode_update(xzp);
-		zfs_iput_async(ZTOI(xzp));
-	}
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Create a new directory and insert it into dip using the name
- * provided.  Return a pointer to the inserted directory.
- *
- *	IN:	dip	- inode of directory to add subdir to.
- *		dirname	- name of new directory.
- *		vap	- attributes of new directory.
- *		cr	- credentials of caller.
- *		flags	- case flags.
- *		vsecp	- ACL to be set
- *
- *	OUT:	ipp	- inode of created directory.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	dip - ctime|mtime updated
- *	ipp - ctime|mtime|atime updated
- */
-/*ARGSUSED*/
-int
-zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
-    cred_t *cr, int flags, vsecattr_t *vsecp)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*dl;
-	uint64_t	txtype;
-	dmu_tx_t	*tx;
-	int		error;
-	int		zf = ZNEW;
-	uid_t		uid;
-	gid_t		gid = crgetgid(cr);
-	zfs_acl_ids_t   acl_ids;
-	boolean_t	fuid_dirtied;
-	boolean_t	waited = B_FALSE;
-
-	ASSERT(S_ISDIR(vap->va_mode));
-
-	/*
-	 * If we have an ephemeral id, ACL, or XVATTR then
-	 * make sure file system is at proper version
-	 */
-
-	uid = crgetuid(cr);
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (SET_ERROR(EINVAL));
-
-	if (dirname == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (dzp->z_pflags & ZFS_XATTR) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(dirname,
-	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-	if (flags & FIGNORECASE)
-		zf |= ZCILOOK;
-
-	if (vap->va_mask & ATTR_XVATTR) {
-		if ((error = secpolicy_xvattr((xvattr_t *)vap,
-		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
-	    vsecp, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	/*
-	 * First make sure the new directory doesn't exist.
-	 *
-	 * Existence is checked first to make sure we don't return
-	 * EACCES instead of EEXIST which can cause some applications
-	 * to fail.
-	 */
-top:
-	*ipp = NULL;
-
-	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
-	    NULL, NULL))) {
-		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EDQUOT));
-	}
-
-	/*
-	 * Add a new entry to the directory.
-	 */
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
-	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-		    acl_ids.z_aclp->z_acl_bytes);
-	}
-
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE);
-
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * Create new node.
-	 */
-	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
-
-	/*
-	 * Now put new name in parent dir.
-	 */
-	error = zfs_link_create(dl, zp, tx, ZNEW);
-	if (error != 0) {
-		zfs_znode_delete(zp, tx);
-		remove_inode_hash(ZTOI(zp));
-		goto out;
-	}
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	*ipp = ZTOI(zp);
-
-	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
-	if (flags & FIGNORECASE)
-		txtype |= TX_CI;
-	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
-	    acl_ids.z_fuidp, vap);
-
-out:
-	zfs_acl_ids_free(&acl_ids);
-
-	dmu_tx_commit(tx);
-
-	zfs_dirent_unlock(dl);
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	if (error != 0) {
-		iput(ZTOI(zp));
-	} else {
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-	}
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Remove a directory subdir entry.  If the current working
- * directory is the same as the subdir to be removed, the
- * remove will fail.
- *
- *	IN:	dip	- inode of directory to remove from.
- *		name	- name of directory to be removed.
- *		cwd	- inode of current working directory.
- *		cr	- credentials of caller.
- *		flags	- case flags
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	dip - ctime|mtime updated
- */
-/*ARGSUSED*/
-int
-zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
-    int flags)
-{
-	znode_t		*dzp = ITOZ(dip);
-	znode_t		*zp;
-	struct inode	*ip;
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	int		error;
-	int		zflg = ZEXISTS;
-	boolean_t	waited = B_FALSE;
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (flags & FIGNORECASE)
-		zflg |= ZCILOOK;
-top:
-	zp = NULL;
-
-	/*
-	 * Attempt to lock directory; fail if entry doesn't exist.
-	 */
-	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
-	    NULL, NULL))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	ip = ZTOI(zp);
-
-	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
-		goto out;
-	}
-
-	if (!S_ISDIR(ip->i_mode)) {
-		error = SET_ERROR(ENOTDIR);
-		goto out;
-	}
-
-	if (ip == cwd) {
-		error = SET_ERROR(EINVAL);
-		goto out;
-	}
-
-	/*
-	 * Grab a lock on the directory to make sure that no one is
-	 * trying to add (or lookup) entries while we are removing it.
-	 */
-	rw_enter(&zp->z_name_lock, RW_WRITER);
-
-	/*
-	 * Grab a lock on the parent pointer to make sure we play well
-	 * with the treewalk and directory rename code.
-	 */
-	rw_enter(&zp->z_parent_lock, RW_WRITER);
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	zfs_sa_upgrade_txholds(tx, zp);
-	zfs_sa_upgrade_txholds(tx, dzp);
-	dmu_tx_mark_netfree(tx);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		rw_exit(&zp->z_parent_lock);
-		rw_exit(&zp->z_name_lock);
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			iput(ip);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		iput(ip);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
-
-	if (error == 0) {
-		uint64_t txtype = TX_RMDIR;
-		if (flags & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
-		    B_FALSE);
-	}
-
-	dmu_tx_commit(tx);
-
-	rw_exit(&zp->z_parent_lock);
-	rw_exit(&zp->z_name_lock);
-out:
-	zfs_dirent_unlock(dl);
-
-	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
-	iput(ip);
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Read directory entries from the given directory cursor position and emit
- * name and position for each entry.
- *
- *	IN:	ip	- inode of directory to read.
- *		ctx	- directory entry context.
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - atime updated
- *
- * Note that the low 4 bits of the cookie returned by zap is always zero.
- * This allows us to use the low range for "special" directory entries:
- * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
- * we use the offset 2 for the '.zfs' directory.
- */
-/* ARGSUSED */
-int
-zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	objset_t	*os;
-	zap_cursor_t	zc;
-	zap_attribute_t	zap;
-	int		error;
-	uint8_t		prefetch;
-	uint8_t		type;
-	int		done = 0;
-	uint64_t	parent;
-	uint64_t	offset; /* must be unsigned; checks for < 1 */
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
-	    &parent, sizeof (parent))) != 0)
-		goto out;
-
-	/*
-	 * Quit if directory has been removed (posix)
-	 */
-	if (zp->z_unlinked)
-		goto out;
-
-	error = 0;
-	os = zfsvfs->z_os;
-	offset = ctx->pos;
-	prefetch = zp->z_zn_prefetch;
-
-	/*
-	 * Initialize the iterator cursor.
-	 */
-	if (offset <= 3) {
-		/*
-		 * Start iteration from the beginning of the directory.
-		 */
-		zap_cursor_init(&zc, os, zp->z_id);
-	} else {
-		/*
-		 * The offset is a serialized cursor.
-		 */
-		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
-	}
-
-	/*
-	 * Transform to file-system independent format
-	 */
-	while (!done) {
-		uint64_t objnum;
-		/*
-		 * Special case `.', `..', and `.zfs'.
-		 */
-		if (offset == 0) {
-			(void) strcpy(zap.za_name, ".");
-			zap.za_normalization_conflict = 0;
-			objnum = zp->z_id;
-			type = DT_DIR;
-		} else if (offset == 1) {
-			(void) strcpy(zap.za_name, "..");
-			zap.za_normalization_conflict = 0;
-			objnum = parent;
-			type = DT_DIR;
-		} else if (offset == 2 && zfs_show_ctldir(zp)) {
-			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
-			zap.za_normalization_conflict = 0;
-			objnum = ZFSCTL_INO_ROOT;
-			type = DT_DIR;
-		} else {
-			/*
-			 * Grab next entry.
-			 */
-			if ((error = zap_cursor_retrieve(&zc, &zap))) {
-				if (error == ENOENT)
-					break;
-				else
-					goto update;
-			}
-
-			/*
-			 * Allow multiple entries provided the first entry is
-			 * the object id.  Non-zpl consumers may safely make
-			 * use of the additional space.
-			 *
-			 * XXX: This should be a feature flag for compatibility
-			 */
-			if (zap.za_integer_length != 8 ||
-			    zap.za_num_integers == 0) {
-				cmn_err(CE_WARN, "zap_readdir: bad directory "
-				    "entry, obj = %lld, offset = %lld, "
-				    "length = %d, num = %lld\n",
-				    (u_longlong_t)zp->z_id,
-				    (u_longlong_t)offset,
-				    zap.za_integer_length,
-				    (u_longlong_t)zap.za_num_integers);
-				error = SET_ERROR(ENXIO);
-				goto update;
-			}
-
-			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
-			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
-		}
-
-		done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name),
-		    objnum, type);
-		if (done)
-			break;
-
-		/* Prefetch znode */
-		if (prefetch) {
-			dmu_prefetch(os, objnum, 0, 0, 0,
-			    ZIO_PRIORITY_SYNC_READ);
-		}
-
-		/*
-		 * Move to the next entry, fill in the previous offset.
-		 */
-		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
-			zap_cursor_advance(&zc);
-			offset = zap_cursor_serialize(&zc);
-		} else {
-			offset += 1;
-		}
-		ctx->pos = offset;
-	}
-	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
-
-update:
-	zap_cursor_fini(&zc);
-	if (error == ENOENT)
-		error = 0;
-out:
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-ulong_t zfs_fsync_sync_cnt = 4;
-
-int
-zfs_fsync(struct inode *ip, int syncflag, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
-
-	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
-		zil_commit(zfsvfs->z_log, zp->z_id);
-		ZFS_EXIT(zfsvfs);
-	}
-	tsd_set(zfs_fsyncer_key, NULL);
-
-	return (0);
-}
-
-
-/*
- * Get the requested file attributes and place them in the provided
- * vattr structure.
- *
- *	IN:	ip	- inode of file.
- *		vap	- va_mask identifies requested attributes.
- *			  If ATTR_XVATTR set, then optional attrs are requested
- *		flags	- ATTR_NOACLCHECK (CIFS server context)
- *		cr	- credentials of caller.
- *
- *	OUT:	vap	- attribute values.
- *
- *	RETURN:	0 (always succeeds)
- */
-/* ARGSUSED */
-int
-zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int	error = 0;
-	uint64_t links;
-	uint64_t atime[2], mtime[2], ctime[2];
-	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
-	xoptattr_t *xoap = NULL;
-	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	sa_bulk_attr_t bulk[3];
-	int count = 0;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-
-	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
-	 * Also, if we are the owner don't bother, since owner should
-	 * always be allowed to read basic attributes of file.
-	 */
-	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
-	    (vap->va_uid != crgetuid(cr))) {
-		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
-		    skipaclchk, cr))) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-	/*
-	 * Return all attributes.  It's cheaper to provide the answer
-	 * than to determine whether we were asked the question.
-	 */
-
-	mutex_enter(&zp->z_lock);
-	vap->va_type = vn_mode_to_vtype(zp->z_mode);
-	vap->va_mode = zp->z_mode;
-	vap->va_fsid = ZTOI(zp)->i_sb->s_dev;
-	vap->va_nodeid = zp->z_id;
-	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
-		links = ZTOI(zp)->i_nlink + 1;
-	else
-		links = ZTOI(zp)->i_nlink;
-	vap->va_nlink = MIN(links, ZFS_LINK_MAX);
-	vap->va_size = i_size_read(ip);
-	vap->va_rdev = ip->i_rdev;
-	vap->va_seq = ip->i_generation;
-
-	/*
-	 * Add in any requested optional attributes and the create time.
-	 * Also set the corresponding bits in the returned attribute bitmap.
-	 */
-	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
-		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
-			xoap->xoa_archive =
-			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
-			XVA_SET_RTN(xvap, XAT_ARCHIVE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
-			xoap->xoa_readonly =
-			    ((zp->z_pflags & ZFS_READONLY) != 0);
-			XVA_SET_RTN(xvap, XAT_READONLY);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
-			xoap->xoa_system =
-			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
-			XVA_SET_RTN(xvap, XAT_SYSTEM);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
-			xoap->xoa_hidden =
-			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
-			XVA_SET_RTN(xvap, XAT_HIDDEN);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-			xoap->xoa_nounlink =
-			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
-			XVA_SET_RTN(xvap, XAT_NOUNLINK);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-			xoap->xoa_immutable =
-			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
-			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-			xoap->xoa_appendonly =
-			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
-			XVA_SET_RTN(xvap, XAT_APPENDONLY);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-			xoap->xoa_nodump =
-			    ((zp->z_pflags & ZFS_NODUMP) != 0);
-			XVA_SET_RTN(xvap, XAT_NODUMP);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
-			xoap->xoa_opaque =
-			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
-			XVA_SET_RTN(xvap, XAT_OPAQUE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
-			xoap->xoa_av_quarantined =
-			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
-			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-			xoap->xoa_av_modified =
-			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
-			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
-		    S_ISREG(ip->i_mode)) {
-			zfs_sa_get_scanstamp(zp, xvap);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
-			uint64_t times[2];
-
-			(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
-			    times, sizeof (times));
-			ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
-			XVA_SET_RTN(xvap, XAT_CREATETIME);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
-			XVA_SET_RTN(xvap, XAT_REPARSE);
-		}
-		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
-			xoap->xoa_generation = ip->i_generation;
-			XVA_SET_RTN(xvap, XAT_GEN);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
-			xoap->xoa_offline =
-			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
-			XVA_SET_RTN(xvap, XAT_OFFLINE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
-			xoap->xoa_sparse =
-			    ((zp->z_pflags & ZFS_SPARSE) != 0);
-			XVA_SET_RTN(xvap, XAT_SPARSE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
-			xoap->xoa_projinherit =
-			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
-			XVA_SET_RTN(xvap, XAT_PROJINHERIT);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
-			xoap->xoa_projid = zp->z_projid;
-			XVA_SET_RTN(xvap, XAT_PROJID);
-		}
-	}
-
-	ZFS_TIME_DECODE(&vap->va_atime, atime);
-	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
-	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
-
-	mutex_exit(&zp->z_lock);
-
-	sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks);
-
-	if (zp->z_blksz == 0) {
-		/*
-		 * Block size hasn't been set; suggest maximal I/O transfers.
-		 */
-		vap->va_blksize = zfsvfs->z_max_blksz;
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Get the basic file attributes and place them in the provided kstat
- * structure.  The inode is assumed to be the authoritative source
- * for most of the attributes.  However, the znode currently has the
- * authoritative atime, blksize, and block count.
- *
- *	IN:	ip	- inode of file.
- *
- *	OUT:	sp	- kstat values.
- *
- *	RETURN:	0 (always succeeds)
- */
-/* ARGSUSED */
-int
-zfs_getattr_fast(struct inode *ip, struct kstat *sp)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	uint32_t blksize;
-	u_longlong_t nblocks;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	mutex_enter(&zp->z_lock);
-
-	generic_fillattr(ip, sp);
-	/*
-	 * +1 link count for root inode with visible '.zfs' directory.
-	 */
-	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
-		if (sp->nlink < ZFS_LINK_MAX)
-			sp->nlink++;
-
-	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
-	sp->blksize = blksize;
-	sp->blocks = nblocks;
-
-	if (unlikely(zp->z_blksz == 0)) {
-		/*
-		 * Block size hasn't been set; suggest maximal I/O transfers.
-		 */
-		sp->blksize = zfsvfs->z_max_blksz;
-	}
-
-	mutex_exit(&zp->z_lock);
-
-	/*
-	 * Required to prevent NFS client from detecting different inode
-	 * numbers of snapshot root dentry before and after snapshot mount.
-	 */
-	if (zfsvfs->z_issnap) {
-		if (ip->i_sb->s_root->d_inode == ip)
-			sp->ino = ZFSCTL_INO_SNAPDIRS -
-			    dmu_objset_id(zfsvfs->z_os);
-	}
-
-	ZFS_EXIT(zfsvfs);
-
-	return (0);
-}
-
-/*
- * For the operation of changing file's user/group/project, we need to
- * handle not only the main object that is assigned to the file directly,
- * but also the ones that are used by the file via hidden xattr directory.
- *
- * Because the xattr directory may contains many EA entries, as to it may
- * be impossible to change all of them via the transaction of changing the
- * main object's user/group/project attributes. Then we have to change them
- * via other multiple independent transactions one by one. It may be not good
- * solution, but we have no better idea yet.
- */
-static int
-zfs_setattr_dir(znode_t *dzp)
-{
-	struct inode	*dxip = ZTOI(dzp);
-	struct inode	*xip = NULL;
-	zfsvfs_t	*zfsvfs = ITOZSB(dxip);
-	objset_t	*os = zfsvfs->z_os;
-	zap_cursor_t	zc;
-	zap_attribute_t	zap;
-	zfs_dirlock_t	*dl;
-	znode_t		*zp;
-	dmu_tx_t	*tx = NULL;
-	uint64_t	uid, gid;
-	sa_bulk_attr_t	bulk[4];
-	int		count;
-	int		err;
-
-	zap_cursor_init(&zc, os, dzp->z_id);
-	while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
-		count = 0;
-		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
-			err = ENXIO;
-			break;
-		}
-
-		err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
-		    ZEXISTS, NULL, NULL);
-		if (err == ENOENT)
-			goto next;
-		if (err)
-			break;
-
-		xip = ZTOI(zp);
-		if (KUID_TO_SUID(xip->i_uid) == KUID_TO_SUID(dxip->i_uid) &&
-		    KGID_TO_SGID(xip->i_gid) == KGID_TO_SGID(dxip->i_gid) &&
-		    zp->z_projid == dzp->z_projid)
-			goto next;
-
-		tx = dmu_tx_create(os);
-		if (!(zp->z_pflags & ZFS_PROJID))
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-		else
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-
-		err = dmu_tx_assign(tx, TXG_WAIT);
-		if (err)
-			break;
-
-		mutex_enter(&dzp->z_lock);
-
-		if (KUID_TO_SUID(xip->i_uid) != KUID_TO_SUID(dxip->i_uid)) {
-			xip->i_uid = dxip->i_uid;
-			uid = zfs_uid_read(dxip);
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
-			    &uid, sizeof (uid));
-		}
-
-		if (KGID_TO_SGID(xip->i_gid) != KGID_TO_SGID(dxip->i_gid)) {
-			xip->i_gid = dxip->i_gid;
-			gid = zfs_gid_read(dxip);
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
-			    &gid, sizeof (gid));
-		}
-
-		if (zp->z_projid != dzp->z_projid) {
-			if (!(zp->z_pflags & ZFS_PROJID)) {
-				zp->z_pflags |= ZFS_PROJID;
-				SA_ADD_BULK_ATTR(bulk, count,
-				    SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
-				    sizeof (zp->z_pflags));
-			}
-
-			zp->z_projid = dzp->z_projid;
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
-			    NULL, &zp->z_projid, sizeof (zp->z_projid));
-		}
-
-		mutex_exit(&dzp->z_lock);
-
-		if (likely(count > 0)) {
-			err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-			dmu_tx_commit(tx);
-		} else {
-			dmu_tx_abort(tx);
-		}
-		tx = NULL;
-		if (err != 0 && err != ENOENT)
-			break;
-
-next:
-		if (xip) {
-			iput(xip);
-			xip = NULL;
-			zfs_dirent_unlock(dl);
-		}
-		zap_cursor_advance(&zc);
-	}
-
-	if (tx)
-		dmu_tx_abort(tx);
-	if (xip) {
-		iput(xip);
-		zfs_dirent_unlock(dl);
-	}
-	zap_cursor_fini(&zc);
-
-	return (err == ENOENT ? 0 : err);
-}
-
-/*
- * Set the file attributes to the values contained in the
- * vattr structure.
- *
- *	IN:	ip	- inode of file to be modified.
- *		vap	- new attribute values.
- *			  If ATTR_XVATTR set, then optional attrs are being set
- *		flags	- ATTR_UTIME set if non-default time values provided.
- *			- ATTR_NOACLCHECK (CIFS context only).
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - ctime updated, mtime updated if size changed.
- */
-/* ARGSUSED */
-int
-zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	objset_t	*os = zfsvfs->z_os;
-	zilog_t		*zilog;
-	dmu_tx_t	*tx;
-	vattr_t		oldva;
-	xvattr_t	*tmpxvattr;
-	uint_t		mask = vap->va_mask;
-	uint_t		saved_mask = 0;
-	int		trim_mask = 0;
-	uint64_t	new_mode;
-	uint64_t	new_kuid = 0, new_kgid = 0, new_uid, new_gid;
-	uint64_t	xattr_obj;
-	uint64_t	mtime[2], ctime[2], atime[2];
-	uint64_t	projid = ZFS_INVALID_PROJID;
-	znode_t		*attrzp;
-	int		need_policy = FALSE;
-	int		err, err2 = 0;
-	zfs_fuid_info_t *fuidp = NULL;
-	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
-	xoptattr_t	*xoap;
-	zfs_acl_t	*aclp;
-	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	boolean_t	fuid_dirtied = B_FALSE;
-	boolean_t	handle_eadir = B_FALSE;
-	sa_bulk_attr_t	*bulk, *xattr_bulk;
-	int		count = 0, xattr_count = 0, bulks = 8;
-
-	if (mask == 0)
-		return (0);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	/*
-	 * If this is a xvattr_t, then get a pointer to the structure of
-	 * optional attributes.  If this is NULL, then we have a vattr_t.
-	 */
-	xoap = xva_getxoptattr(xvap);
-	if (xoap != NULL && (mask & ATTR_XVATTR)) {
-		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
-			if (!dmu_objset_projectquota_enabled(os) ||
-			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
-				ZFS_EXIT(zfsvfs);
-				return (SET_ERROR(ENOTSUP));
-			}
-
-			projid = xoap->xoa_projid;
-			if (unlikely(projid == ZFS_INVALID_PROJID)) {
-				ZFS_EXIT(zfsvfs);
-				return (SET_ERROR(EINVAL));
-			}
-
-			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
-				projid = ZFS_INVALID_PROJID;
-			else
-				need_policy = TRUE;
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
-		    (xoap->xoa_projinherit !=
-		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
-		    (!dmu_objset_projectquota_enabled(os) ||
-		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(ENOTSUP));
-		}
-	}
-
-	zilog = zfsvfs->z_log;
-
-	/*
-	 * Make sure that if we have ephemeral uid/gid or xvattr specified
-	 * that file system is at proper version level
-	 */
-
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
-	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
-	    (mask & ATTR_XVATTR))) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EISDIR));
-	}
-
-	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
-	xva_init(tmpxvattr);
-
-	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
-	xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
-
-	/*
-	 * Immutable files can only alter immutable bit and atime
-	 */
-	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
-	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
-	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
-		err = SET_ERROR(EPERM);
-		goto out3;
-	}
-
-	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
-		err = SET_ERROR(EPERM);
-		goto out3;
-	}
-
-	/*
-	 * Verify timestamps doesn't overflow 32 bits.
-	 * ZFS can handle large timestamps, but 32bit syscalls can't
-	 * handle times greater than 2039.  This check should be removed
-	 * once large timestamps are fully supported.
-	 */
-	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
-		if (((mask & ATTR_ATIME) &&
-		    TIMESPEC_OVERFLOW(&vap->va_atime)) ||
-		    ((mask & ATTR_MTIME) &&
-		    TIMESPEC_OVERFLOW(&vap->va_mtime))) {
-			err = SET_ERROR(EOVERFLOW);
-			goto out3;
-		}
-	}
-
-top:
-	attrzp = NULL;
-	aclp = NULL;
-
-	/* Can this be moved to before the top label? */
-	if (zfs_is_readonly(zfsvfs)) {
-		err = SET_ERROR(EROFS);
-		goto out3;
-	}
-
-	/*
-	 * First validate permissions
-	 */
-
-	if (mask & ATTR_SIZE) {
-		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
-		if (err)
-			goto out3;
-
-		/*
-		 * XXX - Note, we are not providing any open
-		 * mode flags here (like FNDELAY), so we may
-		 * block if there are locks present... this
-		 * should be addressed in openat().
-		 */
-		/* XXX - would it be OK to generate a log record here? */
-		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
-		if (err)
-			goto out3;
-	}
-
-	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
-	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
-	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
-	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
-	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
-	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
-	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
-	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
-		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
-		    skipaclchk, cr);
-	}
-
-	if (mask & (ATTR_UID|ATTR_GID)) {
-		int	idmask = (mask & (ATTR_UID|ATTR_GID));
-		int	take_owner;
-		int	take_group;
-
-		/*
-		 * NOTE: even if a new mode is being set,
-		 * we may clear S_ISUID/S_ISGID bits.
-		 */
-
-		if (!(mask & ATTR_MODE))
-			vap->va_mode = zp->z_mode;
-
-		/*
-		 * Take ownership or chgrp to group we are a member of
-		 */
-
-		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
-		take_group = (mask & ATTR_GID) &&
-		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
-
-		/*
-		 * If both ATTR_UID and ATTR_GID are set then take_owner and
-		 * take_group must both be set in order to allow taking
-		 * ownership.
-		 *
-		 * Otherwise, send the check through secpolicy_vnode_setattr()
-		 *
-		 */
-
-		if (((idmask == (ATTR_UID|ATTR_GID)) &&
-		    take_owner && take_group) ||
-		    ((idmask == ATTR_UID) && take_owner) ||
-		    ((idmask == ATTR_GID) && take_group)) {
-			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
-			    skipaclchk, cr) == 0) {
-				/*
-				 * Remove setuid/setgid for non-privileged users
-				 */
-				(void) secpolicy_setid_clear(vap, cr);
-				trim_mask = (mask & (ATTR_UID|ATTR_GID));
-			} else {
-				need_policy =  TRUE;
-			}
-		} else {
-			need_policy =  TRUE;
-		}
-	}
-
-	mutex_enter(&zp->z_lock);
-	oldva.va_mode = zp->z_mode;
-	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
-	if (mask & ATTR_XVATTR) {
-		/*
-		 * Update xvattr mask to include only those attributes
-		 * that are actually changing.
-		 *
-		 * the bits will be restored prior to actually setting
-		 * the attributes so the caller thinks they were set.
-		 */
-		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-			if (xoap->xoa_appendonly !=
-			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
-				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
-			if (xoap->xoa_projinherit !=
-			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
-				XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-			if (xoap->xoa_nounlink !=
-			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
-				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-			if (xoap->xoa_immutable !=
-			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
-				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-			if (xoap->xoa_nodump !=
-			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_NODUMP);
-				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-			if (xoap->xoa_av_modified !=
-			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
-				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
-			if ((!S_ISREG(ip->i_mode) &&
-			    xoap->xoa_av_quarantined) ||
-			    xoap->xoa_av_quarantined !=
-			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
-				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-			mutex_exit(&zp->z_lock);
-			err = SET_ERROR(EPERM);
-			goto out3;
-		}
-
-		if (need_policy == FALSE &&
-		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
-		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
-			need_policy = TRUE;
-		}
-	}
-
-	mutex_exit(&zp->z_lock);
-
-	if (mask & ATTR_MODE) {
-		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
-			err = secpolicy_setid_setsticky_clear(ip, vap,
-			    &oldva, cr);
-			if (err)
-				goto out3;
-
-			trim_mask |= ATTR_MODE;
-		} else {
-			need_policy = TRUE;
-		}
-	}
-
-	if (need_policy) {
-		/*
-		 * If trim_mask is set then take ownership
-		 * has been granted or write_acl is present and user
-		 * has the ability to modify mode.  In that case remove
-		 * UID|GID and or MODE from mask so that
-		 * secpolicy_vnode_setattr() doesn't revoke it.
-		 */
-
-		if (trim_mask) {
-			saved_mask = vap->va_mask;
-			vap->va_mask &= ~trim_mask;
-		}
-		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
-		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
-		if (err)
-			goto out3;
-
-		if (trim_mask)
-			vap->va_mask |= saved_mask;
-	}
-
-	/*
-	 * secpolicy_vnode_setattr, or take ownership may have
-	 * changed va_mask
-	 */
-	mask = vap->va_mask;
-
-	if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
-		handle_eadir = B_TRUE;
-		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-		    &xattr_obj, sizeof (xattr_obj));
-
-		if (err == 0 && xattr_obj) {
-			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
-			if (err)
-				goto out2;
-		}
-		if (mask & ATTR_UID) {
-			new_kuid = zfs_fuid_create(zfsvfs,
-			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
-			if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
-			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
-			    new_kuid)) {
-				if (attrzp)
-					iput(ZTOI(attrzp));
-				err = SET_ERROR(EDQUOT);
-				goto out2;
-			}
-		}
-
-		if (mask & ATTR_GID) {
-			new_kgid = zfs_fuid_create(zfsvfs,
-			    (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
-			if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
-			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
-			    new_kgid)) {
-				if (attrzp)
-					iput(ZTOI(attrzp));
-				err = SET_ERROR(EDQUOT);
-				goto out2;
-			}
-		}
-
-		if (projid != ZFS_INVALID_PROJID &&
-		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
-			if (attrzp)
-				iput(ZTOI(attrzp));
-			err = EDQUOT;
-			goto out2;
-		}
-	}
-	tx = dmu_tx_create(os);
-
-	if (mask & ATTR_MODE) {
-		uint64_t pmode = zp->z_mode;
-		uint64_t acl_obj;
-		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
-
-		zfs_acl_chmod_setattr(zp, &aclp, new_mode);
-
-		mutex_enter(&zp->z_lock);
-		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
-			/*
-			 * Are we upgrading ACL from old V0 format
-			 * to V1 format?
-			 */
-			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
-			    zfs_znode_acl_version(zp) ==
-			    ZFS_ACL_VERSION_INITIAL) {
-				dmu_tx_hold_free(tx, acl_obj, 0,
-				    DMU_OBJECT_END);
-				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-				    0, aclp->z_acl_bytes);
-			} else {
-				dmu_tx_hold_write(tx, acl_obj, 0,
-				    aclp->z_acl_bytes);
-			}
-		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, aclp->z_acl_bytes);
-		}
-		mutex_exit(&zp->z_lock);
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-	} else {
-		if (((mask & ATTR_XVATTR) &&
-		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
-		    (projid != ZFS_INVALID_PROJID &&
-		    !(zp->z_pflags & ZFS_PROJID)))
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-		else
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	}
-
-	if (attrzp) {
-		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
-	}
-
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-
-	zfs_sa_upgrade_txholds(tx, zp);
-
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err)
-		goto out;
-
-	count = 0;
-	/*
-	 * Set each attribute requested.
-	 * We group settings according to the locks they need to acquire.
-	 *
-	 * Note: you cannot set ctime directly, although it will be
-	 * updated as a side-effect of calling this function.
-	 */
-
-	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
-		/*
-		 * For the existed object that is upgraded from old system,
-		 * its on-disk layout has no slot for the project ID attribute.
-		 * But quota accounting logic needs to access related slots by
-		 * offset directly. So we need to adjust old objects' layout
-		 * to make the project ID to some unified and fixed offset.
-		 */
-		if (attrzp)
-			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
-		if (err == 0)
-			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
-
-		if (unlikely(err == EEXIST))
-			err = 0;
-		else if (err != 0)
-			goto out;
-		else
-			projid = ZFS_INVALID_PROJID;
-	}
-
-	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-		mutex_enter(&zp->z_acl_lock);
-	mutex_enter(&zp->z_lock);
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-
-	if (attrzp) {
-		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-			mutex_enter(&attrzp->z_acl_lock);
-		mutex_enter(&attrzp->z_lock);
-		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
-		    sizeof (attrzp->z_pflags));
-		if (projid != ZFS_INVALID_PROJID) {
-			attrzp->z_projid = projid;
-			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
-			    sizeof (attrzp->z_projid));
-		}
-	}
-
-	if (mask & (ATTR_UID|ATTR_GID)) {
-
-		if (mask & ATTR_UID) {
-			ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
-			new_uid = zfs_uid_read(ZTOI(zp));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
-			    &new_uid, sizeof (new_uid));
-			if (attrzp) {
-				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
-				    sizeof (new_uid));
-				ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
-			}
-		}
-
-		if (mask & ATTR_GID) {
-			ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
-			new_gid = zfs_gid_read(ZTOI(zp));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
-			    NULL, &new_gid, sizeof (new_gid));
-			if (attrzp) {
-				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
-				    sizeof (new_gid));
-				ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
-			}
-		}
-		if (!(mask & ATTR_MODE)) {
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
-			    NULL, &new_mode, sizeof (new_mode));
-			new_mode = zp->z_mode;
-		}
-		err = zfs_acl_chown_setattr(zp);
-		ASSERT(err == 0);
-		if (attrzp) {
-			err = zfs_acl_chown_setattr(attrzp);
-			ASSERT(err == 0);
-		}
-	}
-
-	if (mask & ATTR_MODE) {
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
-		    &new_mode, sizeof (new_mode));
-		zp->z_mode = ZTOI(zp)->i_mode = new_mode;
-		ASSERT3P(aclp, !=, NULL);
-		err = zfs_aclset_common(zp, aclp, cr, tx);
-		ASSERT0(err);
-		if (zp->z_acl_cached)
-			zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = aclp;
-		aclp = NULL;
-	}
-
-	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
-		zp->z_atime_dirty = B_FALSE;
-		ZFS_TIME_ENCODE(&ip->i_atime, atime);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
-		    &atime, sizeof (atime));
-	}
-
-	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
-		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-		ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
-		    vap->va_mtime, ZTOI(zp));
-
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
-		    mtime, sizeof (mtime));
-	}
-
-	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
-		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
-		ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
-		    ZTOI(zp));
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-		    ctime, sizeof (ctime));
-	}
-
-	if (projid != ZFS_INVALID_PROJID) {
-		zp->z_projid = projid;
-		SA_ADD_BULK_ATTR(bulk, count,
-		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
-		    sizeof (zp->z_projid));
-	}
-
-	if (attrzp && mask) {
-		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-		    SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
-		    sizeof (ctime));
-	}
-
-	/*
-	 * Do this after setting timestamps to prevent timestamp
-	 * update from toggling bit
-	 */
-
-	if (xoap && (mask & ATTR_XVATTR)) {
-
-		/*
-		 * restore trimmed off masks
-		 * so that return masks can be set for caller.
-		 */
-
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
-			XVA_SET_REQ(xvap, XAT_APPENDONLY);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
-			XVA_SET_REQ(xvap, XAT_NOUNLINK);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
-			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
-			XVA_SET_REQ(xvap, XAT_NODUMP);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
-			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
-			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
-			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
-			ASSERT(S_ISREG(ip->i_mode));
-
-		zfs_xvattr_set(zp, xvap, tx);
-	}
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	if (mask != 0)
-		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
-
-	mutex_exit(&zp->z_lock);
-	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-		mutex_exit(&zp->z_acl_lock);
-
-	if (attrzp) {
-		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-			mutex_exit(&attrzp->z_acl_lock);
-		mutex_exit(&attrzp->z_lock);
-	}
-out:
-	if (err == 0 && xattr_count > 0) {
-		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
-		    xattr_count, tx);
-		ASSERT(err2 == 0);
-	}
-
-	if (aclp)
-		zfs_acl_free(aclp);
-
-	if (fuidp) {
-		zfs_fuid_info_free(fuidp);
-		fuidp = NULL;
-	}
-
-	if (err) {
-		dmu_tx_abort(tx);
-		if (attrzp)
-			iput(ZTOI(attrzp));
-		if (err == ERESTART)
-			goto top;
-	} else {
-		if (count > 0)
-			err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-		dmu_tx_commit(tx);
-		if (attrzp) {
-			if (err2 == 0 && handle_eadir)
-				err2 = zfs_setattr_dir(attrzp);
-			iput(ZTOI(attrzp));
-		}
-		zfs_inode_update(zp);
-	}
-
-out2:
-	if (os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-out3:
-	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
-	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
-	kmem_free(tmpxvattr, sizeof (xvattr_t));
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-typedef struct zfs_zlock {
-	krwlock_t	*zl_rwlock;	/* lock we acquired */
-	znode_t		*zl_znode;	/* znode we held */
-	struct zfs_zlock *zl_next;	/* next in list */
-} zfs_zlock_t;
-
-/*
- * Drop locks and release vnodes that were held by zfs_rename_lock().
- */
 static void
-zfs_rename_unlock(zfs_zlock_t **zlpp)
+zfs_get_done(zgd_t *zgd, int error)
 {
-	zfs_zlock_t *zl;
+	znode_t *zp = zgd->zgd_private;
 
-	while ((zl = *zlpp) != NULL) {
-		if (zl->zl_znode != NULL)
-			zfs_iput_async(ZTOI(zl->zl_znode));
-		rw_exit(zl->zl_rwlock);
-		*zlpp = zl->zl_next;
-		kmem_free(zl, sizeof (*zl));
-	}
+	if (zgd->zgd_db)
+		dmu_buf_rele(zgd->zgd_db, zgd);
+
+	zfs_rangelock_exit(zgd->zgd_lr);
+
+	/*
+	 * Release the vnode asynchronously as we currently have the
+	 * txg stopped from syncing.
+	 */
+	zfs_zrele_async(zp);
+
+	kmem_free(zgd, sizeof (zgd_t));
 }
 
-/*
- * Search back through the directory tree, using the ".." entries.
- * Lock each directory in the chain to prevent concurrent renames.
- * Fail any attempt to move a directory into one of its own descendants.
- * XXX - z_parent_lock can overlap with map or grow locks
- */
-static int
-zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
-{
-	zfs_zlock_t	*zl;
-	znode_t		*zp = tdzp;
-	uint64_t	rootid = ZTOZSB(zp)->z_root;
-	uint64_t	oidp = zp->z_id;
-	krwlock_t	*rwlp = &szp->z_parent_lock;
-	krw_t		rw = RW_WRITER;
-
-	/*
-	 * First pass write-locks szp and compares to zp->z_id.
-	 * Later passes read-lock zp and compare to zp->z_parent.
-	 */
-	do {
-		if (!rw_tryenter(rwlp, rw)) {
-			/*
-			 * Another thread is renaming in this path.
-			 * Note that if we are a WRITER, we don't have any
-			 * parent_locks held yet.
-			 */
-			if (rw == RW_READER && zp->z_id > szp->z_id) {
-				/*
-				 * Drop our locks and restart
-				 */
-				zfs_rename_unlock(&zl);
-				*zlpp = NULL;
-				zp = tdzp;
-				oidp = zp->z_id;
-				rwlp = &szp->z_parent_lock;
-				rw = RW_WRITER;
-				continue;
-			} else {
-				/*
-				 * Wait for other thread to drop its locks
-				 */
-				rw_enter(rwlp, rw);
-			}
-		}
-
-		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
-		zl->zl_rwlock = rwlp;
-		zl->zl_znode = NULL;
-		zl->zl_next = *zlpp;
-		*zlpp = zl;
-
-		if (oidp == szp->z_id)		/* We're a descendant of szp */
-			return (SET_ERROR(EINVAL));
-
-		if (oidp == rootid)		/* We've hit the top */
-			return (0);
-
-		if (rw == RW_READER) {		/* i.e. not the first pass */
-			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
-			if (error)
-				return (error);
-			zl->zl_znode = zp;
-		}
-		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
-		    &oidp, sizeof (oidp));
-		rwlp = &zp->z_parent_lock;
-		rw = RW_READER;
-
-	} while (zp->z_id != sdzp->z_id);
-
-	return (0);
-}
-
-/*
- * Move an entry from the provided source directory to the target
- * directory.  Change the entry name as indicated.
- *
- *	IN:	sdip	- Source directory containing the "old entry".
- *		snm	- Old entry name.
- *		tdip	- Target directory to contain the "new entry".
- *		tnm	- New entry name.
- *		cr	- credentials of caller.
- *		flags	- case flags
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	sdip,tdip - ctime|mtime updated
- */
-/*ARGSUSED*/
-int
-zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
-    cred_t *cr, int flags)
-{
-	znode_t		*tdzp, *szp, *tzp;
-	znode_t		*sdzp = ITOZ(sdip);
-	zfsvfs_t	*zfsvfs = ITOZSB(sdip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*sdl, *tdl;
-	dmu_tx_t	*tx;
-	zfs_zlock_t	*zl;
-	int		cmp, serr, terr;
-	int		error = 0;
-	int		zflg = 0;
-	boolean_t	waited = B_FALSE;
-
-	if (snm == NULL || tnm == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(sdzp);
-	zilog = zfsvfs->z_log;
-
-	tdzp = ITOZ(tdip);
-	ZFS_VERIFY_ZP(tdzp);
-
-	/*
-	 * We check i_sb because snapshots and the ctldir must have different
-	 * super blocks.
-	 */
-	if (tdip->i_sb != sdip->i_sb || zfsctl_is_node(tdip)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EXDEV));
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(tnm,
-	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-
-	if (flags & FIGNORECASE)
-		zflg |= ZCILOOK;
-
-top:
-	szp = NULL;
-	tzp = NULL;
-	zl = NULL;
-
-	/*
-	 * This is to prevent the creation of links into attribute space
-	 * by renaming a linked file into/outof an attribute directory.
-	 * See the comment in zfs_link() for why this is considered bad.
-	 */
-	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Lock source and target directory entries.  To prevent deadlock,
-	 * a lock ordering must be defined.  We lock the directory with
-	 * the smallest object id first, or if it's a tie, the one with
-	 * the lexically first name.
-	 */
-	if (sdzp->z_id < tdzp->z_id) {
-		cmp = -1;
-	} else if (sdzp->z_id > tdzp->z_id) {
-		cmp = 1;
-	} else {
-		/*
-		 * First compare the two name arguments without
-		 * considering any case folding.
-		 */
-		int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
-
-		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
-		ASSERT(error == 0 || !zfsvfs->z_utf8);
-		if (cmp == 0) {
-			/*
-			 * POSIX: "If the old argument and the new argument
-			 * both refer to links to the same existing file,
-			 * the rename() function shall return successfully
-			 * and perform no other action."
-			 */
-			ZFS_EXIT(zfsvfs);
-			return (0);
-		}
-		/*
-		 * If the file system is case-folding, then we may
-		 * have some more checking to do.  A case-folding file
-		 * system is either supporting mixed case sensitivity
-		 * access or is completely case-insensitive.  Note
-		 * that the file system is always case preserving.
-		 *
-		 * In mixed sensitivity mode case sensitive behavior
-		 * is the default.  FIGNORECASE must be used to
-		 * explicitly request case insensitive behavior.
-		 *
-		 * If the source and target names provided differ only
-		 * by case (e.g., a request to rename 'tim' to 'Tim'),
-		 * we will treat this as a special case in the
-		 * case-insensitive mode: as long as the source name
-		 * is an exact match, we will allow this to proceed as
-		 * a name-change request.
-		 */
-		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
-		    (zfsvfs->z_case == ZFS_CASE_MIXED &&
-		    flags & FIGNORECASE)) &&
-		    u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
-		    &error) == 0) {
-			/*
-			 * case preserving rename request, require exact
-			 * name matches
-			 */
-			zflg |= ZCIEXACT;
-			zflg &= ~ZCILOOK;
-		}
-	}
-
-	/*
-	 * If the source and destination directories are the same, we should
-	 * grab the z_name_lock of that directory only once.
-	 */
-	if (sdzp == tdzp) {
-		zflg |= ZHAVELOCK;
-		rw_enter(&sdzp->z_name_lock, RW_READER);
-	}
-
-	if (cmp < 0) {
-		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
-		    ZEXISTS | zflg, NULL, NULL);
-		terr = zfs_dirent_lock(&tdl,
-		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
-	} else {
-		terr = zfs_dirent_lock(&tdl,
-		    tdzp, tnm, &tzp, zflg, NULL, NULL);
-		serr = zfs_dirent_lock(&sdl,
-		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
-		    NULL, NULL);
-	}
-
-	if (serr) {
-		/*
-		 * Source entry invalid or not there.
-		 */
-		if (!terr) {
-			zfs_dirent_unlock(tdl);
-			if (tzp)
-				iput(ZTOI(tzp));
-		}
-
-		if (sdzp == tdzp)
-			rw_exit(&sdzp->z_name_lock);
-
-		if (strcmp(snm, "..") == 0)
-			serr = EINVAL;
-		ZFS_EXIT(zfsvfs);
-		return (serr);
-	}
-	if (terr) {
-		zfs_dirent_unlock(sdl);
-		iput(ZTOI(szp));
-
-		if (sdzp == tdzp)
-			rw_exit(&sdzp->z_name_lock);
-
-		if (strcmp(tnm, "..") == 0)
-			terr = EINVAL;
-		ZFS_EXIT(zfsvfs);
-		return (terr);
-	}
-
-	/*
-	 * If we are using project inheritance, means if the directory has
-	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
-	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
-	 * such case, we only allow renames into our tree when the project
-	 * IDs are the same.
-	 */
-	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
-	    tdzp->z_projid != szp->z_projid) {
-		error = SET_ERROR(EXDEV);
-		goto out;
-	}
-
-	/*
-	 * Must have write access at the source to remove the old entry
-	 * and write access at the target to create the new entry.
-	 * Note that if target and source are the same, this can be
-	 * done in a single check.
-	 */
-
-	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
-		goto out;
-
-	if (S_ISDIR(ZTOI(szp)->i_mode)) {
-		/*
-		 * Check to make sure rename is valid.
-		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
-		 */
-		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
-			goto out;
-	}
-
-	/*
-	 * Does target exist?
-	 */
-	if (tzp) {
-		/*
-		 * Source and target must be the same type.
-		 */
-		if (S_ISDIR(ZTOI(szp)->i_mode)) {
-			if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
-				error = SET_ERROR(ENOTDIR);
-				goto out;
-			}
-		} else {
-			if (S_ISDIR(ZTOI(tzp)->i_mode)) {
-				error = SET_ERROR(EISDIR);
-				goto out;
-			}
-		}
-		/*
-		 * POSIX dictates that when the source and target
-		 * entries refer to the same file object, rename
-		 * must do nothing and exit without error.
-		 */
-		if (szp->z_id == tzp->z_id) {
-			error = 0;
-			goto out;
-		}
-	}
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
-	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
-	if (sdzp != tdzp) {
-		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
-		zfs_sa_upgrade_txholds(tx, tdzp);
-	}
-	if (tzp) {
-		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
-		zfs_sa_upgrade_txholds(tx, tzp);
-	}
-
-	zfs_sa_upgrade_txholds(tx, szp);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		if (zl != NULL)
-			zfs_rename_unlock(&zl);
-		zfs_dirent_unlock(sdl);
-		zfs_dirent_unlock(tdl);
-
-		if (sdzp == tdzp)
-			rw_exit(&sdzp->z_name_lock);
-
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			iput(ZTOI(szp));
-			if (tzp)
-				iput(ZTOI(tzp));
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		iput(ZTOI(szp));
-		if (tzp)
-			iput(ZTOI(tzp));
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (tzp)	/* Attempt to remove the existing target */
-		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
-
-	if (error == 0) {
-		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
-		if (error == 0) {
-			szp->z_pflags |= ZFS_AV_MODIFIED;
-			if (tdzp->z_pflags & ZFS_PROJINHERIT)
-				szp->z_pflags |= ZFS_PROJINHERIT;
-
-			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
-			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
-			ASSERT0(error);
-
-			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
-			if (error == 0) {
-				zfs_log_rename(zilog, tx, TX_RENAME |
-				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
-				    sdl->dl_name, tdzp, tdl->dl_name, szp);
-			} else {
-				/*
-				 * At this point, we have successfully created
-				 * the target name, but have failed to remove
-				 * the source name.  Since the create was done
-				 * with the ZRENAMING flag, there are
-				 * complications; for one, the link count is
-				 * wrong.  The easiest way to deal with this
-				 * is to remove the newly created target, and
-				 * return the original error.  This must
-				 * succeed; fortunately, it is very unlikely to
-				 * fail, since we just created it.
-				 */
-				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
-				    ZRENAMING, NULL), ==, 0);
-			}
-		} else {
-			/*
-			 * If we had removed the existing target, subsequent
-			 * call to zfs_link_create() to add back the same entry
-			 * but, the new dnode (szp) should not fail.
-			 */
-			ASSERT(tzp == NULL);
-		}
-	}
-
-	dmu_tx_commit(tx);
-out:
-	if (zl != NULL)
-		zfs_rename_unlock(&zl);
-
-	zfs_dirent_unlock(sdl);
-	zfs_dirent_unlock(tdl);
-
-	zfs_inode_update(sdzp);
-	if (sdzp == tdzp)
-		rw_exit(&sdzp->z_name_lock);
-
-	if (sdzp != tdzp)
-		zfs_inode_update(tdzp);
-
-	zfs_inode_update(szp);
-	iput(ZTOI(szp));
-	if (tzp) {
-		zfs_inode_update(tzp);
-		iput(ZTOI(tzp));
-	}
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Insert the indicated symbolic reference entry into the directory.
- *
- *	IN:	dip	- Directory to contain new symbolic link.
- *		name	- Name of directory entry in dip.
- *		vap	- Attributes of new entry.
- *		link	- Name for new symlink entry.
- *		cr	- credentials of caller.
- *		flags	- case flags
- *
- *	OUT:	ipp	- Inode for new symbolic link.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	dip - ctime|mtime updated
- */
-/*ARGSUSED*/
-int
-zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
-    struct inode **ipp, cred_t *cr, int flags)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	uint64_t	len = strlen(link);
-	int		error;
-	int		zflg = ZNEW;
-	zfs_acl_ids_t	acl_ids;
-	boolean_t	fuid_dirtied;
-	uint64_t	txtype = TX_SYMLINK;
-	boolean_t	waited = B_FALSE;
-
-	ASSERT(S_ISLNK(vap->va_mode));
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
-	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-	if (flags & FIGNORECASE)
-		zflg |= ZCILOOK;
-
-	if (len > MAXPATHLEN) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENAMETOOLONG));
-	}
-
-	if ((error = zfs_acl_ids_create(dzp, 0,
-	    vap, cr, NULL, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-top:
-	*ipp = NULL;
-
-	/*
-	 * Attempt to lock directory; fail if entry already exists.
-	 */
-	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
-	if (error) {
-		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EDQUOT));
-	}
-	tx = dmu_tx_create(zfsvfs->z_os);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
-	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE + len);
-	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
-	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-		    acl_ids.z_aclp->z_acl_bytes);
-	}
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * Create a new object for the symlink.
-	 * for version 4 ZPL datsets the symlink will be an SA attribute
-	 */
-	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_is_sa)
-		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
-		    link, len, tx);
-	else
-		zfs_sa_symlink(zp, link, len, tx);
-	mutex_exit(&zp->z_lock);
-
-	zp->z_size = len;
-	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
-	    &zp->z_size, sizeof (zp->z_size), tx);
-	/*
-	 * Insert the new object into the directory.
-	 */
-	error = zfs_link_create(dl, zp, tx, ZNEW);
-	if (error != 0) {
-		zfs_znode_delete(zp, tx);
-		remove_inode_hash(ZTOI(zp));
-	} else {
-		if (flags & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
-
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-	}
-
-	zfs_acl_ids_free(&acl_ids);
-
-	dmu_tx_commit(tx);
-
-	zfs_dirent_unlock(dl);
-
-	if (error == 0) {
-		*ipp = ZTOI(zp);
-
-		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-			zil_commit(zilog, 0);
-	} else {
-		iput(ZTOI(zp));
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Return, in the buffer contained in the provided uio structure,
- * the symbolic path referred to by ip.
- *
- *	IN:	ip	- inode of symbolic link
- *		uio	- structure to contain the link path.
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - atime updated
- */
-/* ARGSUSED */
-int
-zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	int		error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_is_sa)
-		error = sa_lookup_uio(zp->z_sa_hdl,
-		    SA_ZPL_SYMLINK(zfsvfs), uio);
-	else
-		error = zfs_sa_readlink(zp, uio);
-	mutex_exit(&zp->z_lock);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Insert a new entry into directory tdip referencing sip.
- *
- *	IN:	tdip	- Directory to contain new entry.
- *		sip	- inode of new entry.
- *		name	- name of new entry.
- *		cr	- credentials of caller.
- *		flags	- case flags.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	tdip - ctime|mtime updated
- *	 sip - ctime updated
- */
-/* ARGSUSED */
-int
-zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr,
-    int flags)
-{
-	znode_t		*dzp = ITOZ(tdip);
-	znode_t		*tzp, *szp;
-	zfsvfs_t	*zfsvfs = ITOZSB(tdip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	int		error;
-	int		zf = ZNEW;
-	uint64_t	parent;
-	uid_t		owner;
-	boolean_t	waited = B_FALSE;
-	boolean_t	is_tmpfile = 0;
-	uint64_t	txg;
-#ifdef HAVE_TMPFILE
-	is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
-#endif
-	ASSERT(S_ISDIR(tdip->i_mode));
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	/*
-	 * POSIX dictates that we return EPERM here.
-	 * Better choices include ENOTSUP or EISDIR.
-	 */
-	if (S_ISDIR(sip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	szp = ITOZ(sip);
-	ZFS_VERIFY_ZP(szp);
-
-	/*
-	 * If we are using project inheritance, means if the directory has
-	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
-	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
-	 * such case, we only allow hard link creation in our tree when the
-	 * project IDs are the same.
-	 */
-	if (dzp->z_pflags & ZFS_PROJINHERIT && dzp->z_projid != szp->z_projid) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EXDEV));
-	}
-
-	/*
-	 * We check i_sb because snapshots and the ctldir must have different
-	 * super blocks.
-	 */
-	if (sip->i_sb != tdip->i_sb || zfsctl_is_node(sip)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EXDEV));
-	}
-
-	/* Prevent links to .zfs/shares files */
-
-	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
-	    &parent, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	if (parent == zfsvfs->z_shares_dir) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(name,
-	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-	if (flags & FIGNORECASE)
-		zf |= ZCILOOK;
-
-	/*
-	 * We do not support links between attributes and non-attributes
-	 * because of the potential security risk of creating links
-	 * into "normal" file space in order to circumvent restrictions
-	 * imposed in attribute space.
-	 */
-	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
-	    cr, ZFS_OWNER);
-	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-top:
-	/*
-	 * Attempt to lock directory; fail if entry already exists.
-	 */
-	error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
-	if (error) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-	if (is_tmpfile)
-		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
-	zfs_sa_upgrade_txholds(tx, szp);
-	zfs_sa_upgrade_txholds(tx, dzp);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	/* unmark z_unlinked so zfs_link_create will not reject */
-	if (is_tmpfile)
-		szp->z_unlinked = B_FALSE;
-	error = zfs_link_create(dl, szp, tx, 0);
-
-	if (error == 0) {
-		uint64_t txtype = TX_LINK;
-		/*
-		 * tmpfile is created to be in z_unlinkedobj, so remove it.
-		 * Also, we don't log in ZIL, because all previous file
-		 * operation on the tmpfile are ignored by ZIL. Instead we
-		 * always wait for txg to sync to make sure all previous
-		 * operation are sync safe.
-		 */
-		if (is_tmpfile) {
-			VERIFY(zap_remove_int(zfsvfs->z_os,
-			    zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
-		} else {
-			if (flags & FIGNORECASE)
-				txtype |= TX_CI;
-			zfs_log_link(zilog, tx, txtype, dzp, szp, name);
-		}
-	} else if (is_tmpfile) {
-		/* restore z_unlinked since when linking failed */
-		szp->z_unlinked = B_TRUE;
-	}
-	txg = dmu_tx_get_txg(tx);
-	dmu_tx_commit(tx);
-
-	zfs_dirent_unlock(dl);
-
-	if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	if (is_tmpfile)
-		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
-
-	zfs_inode_update(dzp);
-	zfs_inode_update(szp);
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-static void
-zfs_putpage_commit_cb(void *arg)
-{
-	struct page *pp = arg;
-
-	ClearPageError(pp);
-	end_page_writeback(pp);
-}
-
-/*
- * Push a page out to disk, once the page is on stable storage the
- * registered commit callback will be run as notification of completion.
- *
- *	IN:	ip	- page mapped for inode.
- *		pp	- page to push (page is locked)
- *		wbc	- writeback control data
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - ctime|mtime updated
- */
-/* ARGSUSED */
-int
-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	loff_t		offset;
-	loff_t		pgoff;
-	unsigned int	pglen;
-	dmu_tx_t	*tx;
-	caddr_t		va;
-	int		err = 0;
-	uint64_t	mtime[2], ctime[2];
-	sa_bulk_attr_t	bulk[3];
-	int		cnt = 0;
-	struct address_space *mapping;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	ASSERT(PageLocked(pp));
-
-	pgoff = page_offset(pp);	/* Page byte-offset in file */
-	offset = i_size_read(ip);	/* File length in bytes */
-	pglen = MIN(PAGE_SIZE,		/* Page length in bytes */
-	    P2ROUNDUP(offset, PAGE_SIZE)-pgoff);
-
-	/* Page is beyond end of file */
-	if (pgoff >= offset) {
-		unlock_page(pp);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/* Truncate page length to end of file */
-	if (pgoff + pglen > offset)
-		pglen = offset - pgoff;
-
-#if 0
-	/*
-	 * FIXME: Allow mmap writes past its quota.  The correct fix
-	 * is to register a page_mkwrite() handler to count the page
-	 * against its quota when it is about to be dirtied.
-	 */
-	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
-	    KUID_TO_SUID(ip->i_uid)) ||
-	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
-	    KGID_TO_SGID(ip->i_gid)) ||
-	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
-	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
-	    zp->z_projid))) {
-		err = EDQUOT;
-	}
-#endif
-
-	/*
-	 * The ordering here is critical and must adhere to the following
-	 * rules in order to avoid deadlocking in either zfs_read() or
-	 * zfs_free_range() due to a lock inversion.
-	 *
-	 * 1) The page must be unlocked prior to acquiring the range lock.
-	 *    This is critical because zfs_read() calls find_lock_page()
-	 *    which may block on the page lock while holding the range lock.
-	 *
-	 * 2) Before setting or clearing write back on a page the range lock
-	 *    must be held in order to prevent a lock inversion with the
-	 *    zfs_free_range() function.
-	 *
-	 * This presents a problem because upon entering this function the
-	 * page lock is already held.  To safely acquire the range lock the
-	 * page lock must be dropped.  This creates a window where another
-	 * process could truncate, invalidate, dirty, or write out the page.
-	 *
-	 * Therefore, after successfully reacquiring the range and page locks
-	 * the current page state is checked.  In the common case everything
-	 * will be as is expected and it can be written out.  However, if
-	 * the page state has changed it must be handled accordingly.
-	 */
-	mapping = pp->mapping;
-	redirty_page_for_writepage(wbc, pp);
-	unlock_page(pp);
-
-	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
-	    pgoff, pglen, RL_WRITER);
-	lock_page(pp);
-
-	/* Page mapping changed or it was no longer dirty, we're done */
-	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
-		unlock_page(pp);
-		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/* Another process started write block if required */
-	if (PageWriteback(pp)) {
-		unlock_page(pp);
-		zfs_rangelock_exit(lr);
-
-		if (wbc->sync_mode != WB_SYNC_NONE) {
-			if (PageWriteback(pp))
-				wait_on_page_bit(pp, PG_writeback);
-		}
-
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/* Clear the dirty flag the required locks are held */
-	if (!clear_page_dirty_for_io(pp)) {
-		unlock_page(pp);
-		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/*
-	 * Counterpart for redirty_page_for_writepage() above.  This page
-	 * was in fact not skipped and should not be counted as if it were.
-	 */
-	wbc->pages_skipped--;
-	set_page_writeback(pp);
-	unlock_page(pp);
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-
-	err = dmu_tx_assign(tx, TXG_NOWAIT);
-	if (err != 0) {
-		if (err == ERESTART)
-			dmu_tx_wait(tx);
-
-		dmu_tx_abort(tx);
-		__set_page_dirty_nobuffers(pp);
-		ClearPageError(pp);
-		end_page_writeback(pp);
-		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (err);
-	}
-
-	va = kmap(pp);
-	ASSERT3U(pglen, <=, PAGE_SIZE);
-	dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
-	kunmap(pp);
-
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, 8);
-
-	/* Preserve the mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
-	zp->z_atime_dirty = B_FALSE;
-	zp->z_seq++;
-
-	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
-
-	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
-	    zfs_putpage_commit_cb, pp);
-	dmu_tx_commit(tx);
-
-	zfs_rangelock_exit(lr);
-
-	if (wbc->sync_mode != WB_SYNC_NONE) {
-		/*
-		 * Note that this is rarely called under writepages(), because
-		 * writepages() normally handles the entire commit for
-		 * performance reasons.
-		 */
-		zil_commit(zfsvfs->z_log, zp->z_id);
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-/*
- * Update the system attributes when the inode has been dirtied.  For the
- * moment we only update the mode, atime, mtime, and ctime.
- */
-int
-zfs_dirty_inode(struct inode *ip, int flags)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	dmu_tx_t	*tx;
-	uint64_t	mode, atime[2], mtime[2], ctime[2];
-	sa_bulk_attr_t	bulk[4];
-	int		error = 0;
-	int		cnt = 0;
-
-	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
-		return (0);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-#ifdef I_DIRTY_TIME
-	/*
-	 * This is the lazytime semantic introduced in Linux 4.0
-	 * This flag will only be called from update_time when lazytime is set.
-	 * (Note, I_DIRTY_SYNC will also set if not lazytime)
-	 * Fortunately mtime and ctime are managed within ZFS itself, so we
-	 * only need to dirty atime.
-	 */
-	if (flags == I_DIRTY_TIME) {
-		zp->z_atime_dirty = B_TRUE;
-		goto out;
-	}
-#endif
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		goto out;
-	}
-
-	mutex_enter(&zp->z_lock);
-	zp->z_atime_dirty = B_FALSE;
-
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-
-	/* Preserve the mode, mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_atime, atime);
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
-	mode = ip->i_mode;
-
-	zp->z_mode = mode;
-
-	error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
-	mutex_exit(&zp->z_lock);
-
-	dmu_tx_commit(tx);
-out:
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*ARGSUSED*/
-void
-zfs_inactive(struct inode *ip)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	uint64_t atime[2];
-	int error;
-	int need_unlock = 0;
-
-	/* Only read lock if we haven't already write locked, e.g. rollback */
-	if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
-		need_unlock = 1;
-		rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
-	}
-	if (zp->z_sa_hdl == NULL) {
-		if (need_unlock)
-			rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		return;
-	}
-
-	if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) {
-		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
-
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-		zfs_sa_upgrade_txholds(tx, zp);
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
-			dmu_tx_abort(tx);
-		} else {
-			ZFS_TIME_ENCODE(&ip->i_atime, atime);
-			mutex_enter(&zp->z_lock);
-			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
-			    (void *)&atime, sizeof (atime), tx);
-			zp->z_atime_dirty = B_FALSE;
-			mutex_exit(&zp->z_lock);
-			dmu_tx_commit(tx);
-		}
-	}
-
-	zfs_zinactive(zp);
-	if (need_unlock)
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-}
-
-/*
- * Bounds-check the seek operation.
- *
- *	IN:	ip	- inode seeking within
- *		ooff	- old file offset
- *		noffp	- pointer to new file offset
- *
- *	RETURN:	0 if success
- *		EINVAL if new offset invalid
- */
-/* ARGSUSED */
-int
-zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp)
-{
-	if (S_ISDIR(ip->i_mode))
-		return (0);
-	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
-}
-
-/*
- * Fill pages with data from the disk.
- */
-static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	objset_t *os;
-	struct page *cur_pp;
-	u_offset_t io_off, total;
-	size_t io_len;
-	loff_t i_size;
-	unsigned page_idx;
-	int err;
-
-	os = zfsvfs->z_os;
-	io_len = nr_pages << PAGE_SHIFT;
-	i_size = i_size_read(ip);
-	io_off = page_offset(pl[0]);
-
-	if (io_off + io_len > i_size)
-		io_len = i_size - io_off;
-
-	/*
-	 * Iterate over list of pages and read each page individually.
-	 */
-	page_idx = 0;
-	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
-		caddr_t va;
-
-		cur_pp = pl[page_idx++];
-		va = kmap(cur_pp);
-		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
-		    DMU_READ_PREFETCH);
-		kunmap(cur_pp);
-		if (err) {
-			/* convert checksum errors into IO errors */
-			if (err == ECKSUM)
-				err = SET_ERROR(EIO);
-			return (err);
-		}
-	}
-
-	return (0);
-}
-
-/*
- * Uses zfs_fillpage to read data from the file and fill the pages.
- *
- *	IN:	ip	 - inode of file to get data from.
- *		pl	 - list of pages to read
- *		nr_pages - number of pages to read
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	vp - atime updated
- */
-/* ARGSUSED */
-int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
-{
-	znode_t	 *zp  = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int	 err;
-
-	if (pl == NULL)
-		return (0);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	err = zfs_fillpage(ip, pl, nr_pages);
-
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-/*
- * Check ZFS specific permissions to memory map a section of a file.
- *
- *	IN:	ip	- inode of the file to mmap
- *		off	- file offset
- *		addrp	- start address in memory region
- *		len	- length of memory region
- *		vm_flags- address flags
- *
- *	RETURN:	0 if success
- *		error code if failure
- */
-/*ARGSUSED*/
-int
-zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
-    unsigned long vm_flags)
-{
-	znode_t  *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
-	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	if ((vm_flags & (VM_READ | VM_EXEC)) &&
-	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EACCES));
-	}
-
-	if (off < 0 || len > MAXOFFSET_T - off) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENXIO));
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * convoff - converts the given data (start, whence) to the
- * given whence.
- */
-int
-convoff(struct inode *ip, flock64_t *lckdat, int  whence, offset_t offset)
-{
-	vattr_t vap;
-	int error;
-
-	if ((lckdat->l_whence == SEEK_END) || (whence == SEEK_END)) {
-		if ((error = zfs_getattr(ip, &vap, 0, CRED())))
-			return (error);
-	}
-
-	switch (lckdat->l_whence) {
-	case SEEK_CUR:
-		lckdat->l_start += offset;
-		break;
-	case SEEK_END:
-		lckdat->l_start += vap.va_size;
-		/* FALLTHRU */
-	case SEEK_SET:
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (lckdat->l_start < 0)
-		return (SET_ERROR(EINVAL));
-
-	switch (whence) {
-	case SEEK_CUR:
-		lckdat->l_start -= offset;
-		break;
-	case SEEK_END:
-		lckdat->l_start -= vap.va_size;
-		/* FALLTHRU */
-	case SEEK_SET:
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	lckdat->l_whence = (short)whence;
-	return (0);
-}
-
-/*
- * Free or allocate space in a file.  Currently, this function only
- * supports the `F_FREESP' command.  However, this command is somewhat
- * misnamed, as its functionality includes the ability to allocate as
- * well as free space.
- *
- *	IN:	ip	- inode of file to free data in.
- *		cmd	- action to take (only F_FREESP supported).
- *		bfp	- section of file to free/alloc.
- *		flag	- current file open mode flags.
- *		offset	- current file offset.
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	ip - ctime|mtime updated
- */
-/* ARGSUSED */
-int
-zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
-    offset_t offset, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	uint64_t	off, len;
-	int		error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if (cmd != F_FREESP) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Callers might not be able to detect properly that we are read-only,
-	 * so check it explicitly here.
-	 */
-	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EROFS));
-	}
-
-	if ((error = convoff(ip, bfp, SEEK_SET, offset))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (bfp->l_len < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Permissions aren't checked on Solaris because on this OS
-	 * zfs_space() can only be called with an opened file handle.
-	 * On Linux we can get here through truncate_range() which
-	 * operates directly on inodes, so we need to check access rights.
-	 */
-	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	off = bfp->l_start;
-	len = bfp->l_len; /* 0 means from off to end of file */
-
-	error = zfs_freesp(zp, off, len, flag, TRUE);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*ARGSUSED*/
-int
-zfs_fid(struct inode *ip, fid_t *fidp)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	uint32_t	gen;
-	uint64_t	gen64;
-	uint64_t	object = zp->z_id;
-	zfid_short_t	*zfid;
-	int		size, i, error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
-	    &gen64, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	gen = (uint32_t)gen64;
-
-	size = SHORT_FID_LEN;
-
-	zfid = (zfid_short_t *)fidp;
-
-	zfid->zf_len = size;
-
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
-		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
-	/* Must have a non-zero generation number to distinguish from .zfs */
-	if (gen == 0)
-		gen = 1;
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
-		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*ARGSUSED*/
-int
-zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*ARGSUSED*/
-int
-zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	zilog_t	*zilog = zfsvfs->z_log;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-#ifdef HAVE_UIO_ZEROCOPY
-/*
- * The smallest read we may consider to loan out an arcbuf.
- * This must be a power of 2.
- */
-int zcr_blksz_min = (1 << 10);	/* 1K */
-/*
- * If set to less than the file block size, allow loaning out of an
- * arcbuf for a partial block read.  This must be a power of 2.
- */
-int zcr_blksz_max = (1 << 17);	/* 128K */
-
-/*ARGSUSED*/
-static int
-zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int max_blksz = zfsvfs->z_max_blksz;
-	uio_t *uio = &xuio->xu_uio;
-	ssize_t size = uio->uio_resid;
-	offset_t offset = uio->uio_loffset;
-	int blksz;
-	int fullblk, i;
-	arc_buf_t *abuf;
-	ssize_t maxsize;
-	int preamble, postamble;
-
-	if (xuio->xu_type != UIOTYPE_ZEROCOPY)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-	switch (ioflag) {
-	case UIO_WRITE:
-		/*
-		 * Loan out an arc_buf for write if write size is bigger than
-		 * max_blksz, and the file's block size is also max_blksz.
-		 */
-		blksz = max_blksz;
-		if (size < blksz || zp->z_blksz != blksz) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-		/*
-		 * Caller requests buffers for write before knowing where the
-		 * write offset might be (e.g. NFS TCP write).
-		 */
-		if (offset == -1) {
-			preamble = 0;
-		} else {
-			preamble = P2PHASE(offset, blksz);
-			if (preamble) {
-				preamble = blksz - preamble;
-				size -= preamble;
-			}
-		}
-
-		postamble = P2PHASE(size, blksz);
-		size -= postamble;
-
-		fullblk = size / blksz;
-		(void) dmu_xuio_init(xuio,
-		    (preamble != 0) + fullblk + (postamble != 0));
-
-		/*
-		 * Have to fix iov base/len for partial buffers.  They
-		 * currently represent full arc_buf's.
-		 */
-		if (preamble) {
-			/* data begins in the middle of the arc_buf */
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
-			ASSERT(abuf);
-			(void) dmu_xuio_add(xuio, abuf,
-			    blksz - preamble, preamble);
-		}
-
-		for (i = 0; i < fullblk; i++) {
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
-			ASSERT(abuf);
-			(void) dmu_xuio_add(xuio, abuf, 0, blksz);
-		}
-
-		if (postamble) {
-			/* data ends in the middle of the arc_buf */
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
-			ASSERT(abuf);
-			(void) dmu_xuio_add(xuio, abuf, 0, postamble);
-		}
-		break;
-	case UIO_READ:
-		/*
-		 * Loan out an arc_buf for read if the read size is larger than
-		 * the current file block size.  Block alignment is not
-		 * considered.  Partial arc_buf will be loaned out for read.
-		 */
-		blksz = zp->z_blksz;
-		if (blksz < zcr_blksz_min)
-			blksz = zcr_blksz_min;
-		if (blksz > zcr_blksz_max)
-			blksz = zcr_blksz_max;
-		/* avoid potential complexity of dealing with it */
-		if (blksz > max_blksz) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-
-		maxsize = zp->z_size - uio->uio_loffset;
-		if (size > maxsize)
-			size = maxsize;
-
-		if (size < blksz) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-		break;
-	default:
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	uio->uio_extflg = UIO_XUIO;
-	XUIO_XUZC_RW(xuio) = ioflag;
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*ARGSUSED*/
-static int
-zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
-{
-	int i;
-	arc_buf_t *abuf;
-	int ioflag = XUIO_XUZC_RW(xuio);
-
-	ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
-
-	i = dmu_xuio_cnt(xuio);
-	while (i-- > 0) {
-		abuf = dmu_xuio_arcbuf(xuio, i);
-		/*
-		 * if abuf == NULL, it must be a write buffer
-		 * that has been returned in zfs_write().
-		 */
-		if (abuf)
-			dmu_return_arcbuf(abuf);
-		ASSERT(abuf || ioflag == UIO_WRITE);
-	}
-
-	dmu_xuio_fini(xuio);
-	return (0);
-}
-#endif /* HAVE_UIO_ZEROCOPY */
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zfs_open);
-EXPORT_SYMBOL(zfs_close);
+EXPORT_SYMBOL(zfs_access);
+EXPORT_SYMBOL(zfs_fsync);
+EXPORT_SYMBOL(zfs_holey);
 EXPORT_SYMBOL(zfs_read);
 EXPORT_SYMBOL(zfs_write);
-EXPORT_SYMBOL(zfs_access);
-EXPORT_SYMBOL(zfs_lookup);
-EXPORT_SYMBOL(zfs_create);
-EXPORT_SYMBOL(zfs_tmpfile);
-EXPORT_SYMBOL(zfs_remove);
-EXPORT_SYMBOL(zfs_mkdir);
-EXPORT_SYMBOL(zfs_rmdir);
-EXPORT_SYMBOL(zfs_readdir);
-EXPORT_SYMBOL(zfs_fsync);
-EXPORT_SYMBOL(zfs_getattr);
-EXPORT_SYMBOL(zfs_getattr_fast);
-EXPORT_SYMBOL(zfs_setattr);
-EXPORT_SYMBOL(zfs_rename);
-EXPORT_SYMBOL(zfs_symlink);
-EXPORT_SYMBOL(zfs_readlink);
-EXPORT_SYMBOL(zfs_link);
-EXPORT_SYMBOL(zfs_inactive);
-EXPORT_SYMBOL(zfs_space);
-EXPORT_SYMBOL(zfs_fid);
 EXPORT_SYMBOL(zfs_getsecattr);
 EXPORT_SYMBOL(zfs_setsecattr);
-EXPORT_SYMBOL(zfs_getpage);
-EXPORT_SYMBOL(zfs_putpage);
-EXPORT_SYMBOL(zfs_dirty_inode);
-EXPORT_SYMBOL(zfs_map);
 
-/* BEGIN CSTYLED */
-module_param(zfs_delete_blocks, ulong, 0644);
-MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-module_param(zfs_read_chunk_size, ulong, 0644);
-MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
-/* END CSTYLED */
-
-#endif
+ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, ULONG, ZMOD_RW,
+	"Bytes to read per chunk");

diff --git a/zfs/module/zfs/zfs_znode.c b/zfs/module/zfs/zfs_znode.c
deleted file mode 100644
index 7770e0f..0000000
--- a/zfs/module/zfs/zfs_znode.c
+++ /dev/null

@@ -1,2243 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
- */
-
-/* Portions Copyright 2007 Jeremy Teo */
-
-#ifdef _KERNEL
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/mntent.h>
-#include <sys/u8_textprep.h>
-#include <sys/dsl_dataset.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/kmem.h>
-#include <sys/errno.h>
-#include <sys/mode.h>
-#include <sys/atomic.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_rlock.h>
-#include <sys/zfs_fuid.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/dnode.h>
-#include <sys/fs/zfs.h>
-#include <sys/zpl.h>
-#endif /* _KERNEL */
-
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_tx.h>
-#include <sys/refcount.h>
-#include <sys/stat.h>
-#include <sys/zap.h>
-#include <sys/zfs_znode.h>
-#include <sys/sa.h>
-#include <sys/zfs_sa.h>
-#include <sys/zfs_stat.h>
-
-#include "zfs_prop.h"
-#include "zfs_comutil.h"
-
-/*
- * Functions needed for userland (ie: libzpool) are not put under
- * #ifdef_KERNEL; the rest of the functions have dependencies
- * (such as VFS logic) that will not compile easily in userland.
- */
-#ifdef _KERNEL
-
-static kmem_cache_t *znode_cache = NULL;
-static kmem_cache_t *znode_hold_cache = NULL;
-unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
-
-/*
- * This is used by the test suite so that it can delay znodes from being
- * freed in order to inspect the unlinked set.
- */
-int zfs_unlink_suspend_progress = 0;
-
-/*
- * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
- * z_rangelock. It will modify the offset and length of the lock to reflect
- * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
- * called with the rangelock_t's rl_lock held, which avoids races.
- */
-static void
-zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
-{
-	znode_t *zp = arg;
-
-	/*
-	 * If in append mode, convert to writer and lock starting at the
-	 * current end of file.
-	 */
-	if (new->lr_type == RL_APPEND) {
-		new->lr_offset = zp->z_size;
-		new->lr_type = RL_WRITER;
-	}
-
-	/*
-	 * If we need to grow the block size then lock the whole file range.
-	 */
-	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
-	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
-	    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
-		new->lr_offset = 0;
-		new->lr_length = UINT64_MAX;
-	}
-}
-
-/*ARGSUSED*/
-static int
-zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
-{
-	znode_t *zp = buf;
-
-	inode_init_once(ZTOI(zp));
-	list_link_init(&zp->z_link_node);
-
-	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
-	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
-	rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
-	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
-	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
-
-	zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
-
-	zp->z_dirlocks = NULL;
-	zp->z_acl_cached = NULL;
-	zp->z_xattr_cached = NULL;
-	zp->z_xattr_parent = 0;
-	zp->z_moved = B_FALSE;
-	return (0);
-}
-
-/*ARGSUSED*/
-static void
-zfs_znode_cache_destructor(void *buf, void *arg)
-{
-	znode_t *zp = buf;
-
-	ASSERT(!list_link_active(&zp->z_link_node));
-	mutex_destroy(&zp->z_lock);
-	rw_destroy(&zp->z_parent_lock);
-	rw_destroy(&zp->z_name_lock);
-	mutex_destroy(&zp->z_acl_lock);
-	rw_destroy(&zp->z_xattr_lock);
-	zfs_rangelock_fini(&zp->z_rangelock);
-
-	ASSERT(zp->z_dirlocks == NULL);
-	ASSERT(zp->z_acl_cached == NULL);
-	ASSERT(zp->z_xattr_cached == NULL);
-}
-
-static int
-zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
-{
-	znode_hold_t *zh = buf;
-
-	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
-	zfs_refcount_create(&zh->zh_refcount);
-	zh->zh_obj = ZFS_NO_OBJECT;
-
-	return (0);
-}
-
-static void
-zfs_znode_hold_cache_destructor(void *buf, void *arg)
-{
-	znode_hold_t *zh = buf;
-
-	mutex_destroy(&zh->zh_lock);
-	zfs_refcount_destroy(&zh->zh_refcount);
-}
-
-void
-zfs_znode_init(void)
-{
-	/*
-	 * Initialize zcache.  The KMC_SLAB hint is used in order that it be
-	 * backed by kmalloc() when on the Linux slab in order that any
-	 * wait_on_bit() operations on the related inode operate properly.
-	 */
-	ASSERT(znode_cache == NULL);
-	znode_cache = kmem_cache_create("zfs_znode_cache",
-	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
-	    zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
-
-	ASSERT(znode_hold_cache == NULL);
-	znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
-	    sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
-	    zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
-}
-
-void
-zfs_znode_fini(void)
-{
-	/*
-	 * Cleanup zcache
-	 */
-	if (znode_cache)
-		kmem_cache_destroy(znode_cache);
-	znode_cache = NULL;
-
-	if (znode_hold_cache)
-		kmem_cache_destroy(znode_hold_cache);
-	znode_hold_cache = NULL;
-}
-
-/*
- * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
- * serialize access to a znode and its SA buffer while the object is being
- * created or destroyed.  This kind of locking would normally reside in the
- * znode itself but in this case that's impossible because the znode and SA
- * buffer may not yet exist.  Therefore the locking is handled externally
- * with an array of mutexs and AVLs trees which contain per-object locks.
- *
- * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
- * in to the correct AVL tree and finally the per-object lock is held.  In
- * zfs_znode_hold_exit() the process is reversed.  The per-object lock is
- * released, removed from the AVL tree and destroyed if there are no waiters.
- *
- * This scheme has two important properties:
- *
- * 1) No memory allocations are performed while holding one of the z_hold_locks.
- *    This ensures evict(), which can be called from direct memory reclaim, will
- *    never block waiting on a z_hold_locks which just happens to have hashed
- *    to the same index.
- *
- * 2) All locks used to serialize access to an object are per-object and never
- *    shared.  This minimizes lock contention without creating a large number
- *    of dedicated locks.
- *
- * On the downside it does require znode_lock_t structures to be frequently
- * allocated and freed.  However, because these are backed by a kmem cache
- * and very short lived this cost is minimal.
- */
-int
-zfs_znode_hold_compare(const void *a, const void *b)
-{
-	const znode_hold_t *zh_a = (const znode_hold_t *)a;
-	const znode_hold_t *zh_b = (const znode_hold_t *)b;
-
-	return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
-}
-
-boolean_t
-zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
-{
-	znode_hold_t *zh, search;
-	int i = ZFS_OBJ_HASH(zfsvfs, obj);
-	boolean_t held;
-
-	search.zh_obj = obj;
-
-	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
-	held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
-	mutex_exit(&zfsvfs->z_hold_locks[i]);
-
-	return (held);
-}
-
-static znode_hold_t *
-zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
-{
-	znode_hold_t *zh, *zh_new, search;
-	int i = ZFS_OBJ_HASH(zfsvfs, obj);
-	boolean_t found = B_FALSE;
-
-	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
-	zh_new->zh_obj = obj;
-	search.zh_obj = obj;
-
-	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
-	if (likely(zh == NULL)) {
-		zh = zh_new;
-		avl_add(&zfsvfs->z_hold_trees[i], zh);
-	} else {
-		ASSERT3U(zh->zh_obj, ==, obj);
-		found = B_TRUE;
-	}
-	zfs_refcount_add(&zh->zh_refcount, NULL);
-	mutex_exit(&zfsvfs->z_hold_locks[i]);
-
-	if (found == B_TRUE)
-		kmem_cache_free(znode_hold_cache, zh_new);
-
-	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
-	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
-	mutex_enter(&zh->zh_lock);
-
-	return (zh);
-}
-
-static void
-zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
-{
-	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
-	boolean_t remove = B_FALSE;
-
-	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
-	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
-	mutex_exit(&zh->zh_lock);
-
-	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	if (zfs_refcount_remove(&zh->zh_refcount, NULL) == 0) {
-		avl_remove(&zfsvfs->z_hold_trees[i], zh);
-		remove = B_TRUE;
-	}
-	mutex_exit(&zfsvfs->z_hold_locks[i]);
-
-	if (remove == B_TRUE)
-		kmem_cache_free(znode_hold_cache, zh);
-}
-
-static void
-zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
-    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
-{
-	ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
-
-	mutex_enter(&zp->z_lock);
-
-	ASSERT(zp->z_sa_hdl == NULL);
-	ASSERT(zp->z_acl_cached == NULL);
-	if (sa_hdl == NULL) {
-		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
-		    SA_HDL_SHARED, &zp->z_sa_hdl));
-	} else {
-		zp->z_sa_hdl = sa_hdl;
-		sa_set_userp(sa_hdl, zp);
-	}
-
-	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
-
-	mutex_exit(&zp->z_lock);
-}
-
-void
-zfs_znode_dmu_fini(znode_t *zp)
-{
-	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
-	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
-
-	sa_handle_destroy(zp->z_sa_hdl);
-	zp->z_sa_hdl = NULL;
-}
-
-/*
- * Called by new_inode() to allocate a new inode.
- */
-int
-zfs_inode_alloc(struct super_block *sb, struct inode **ip)
-{
-	znode_t *zp;
-
-	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
-	*ip = ZTOI(zp);
-
-	return (0);
-}
-
-/*
- * Called in multiple places when an inode should be destroyed.
- */
-void
-zfs_inode_destroy(struct inode *ip)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	if (list_link_active(&zp->z_link_node)) {
-		list_remove(&zfsvfs->z_all_znodes, zp);
-		zfsvfs->z_nr_znodes--;
-	}
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
-	}
-
-	if (zp->z_xattr_cached) {
-		nvlist_free(zp->z_xattr_cached);
-		zp->z_xattr_cached = NULL;
-	}
-
-	kmem_cache_free(znode_cache, zp);
-}
-
-static void
-zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
-{
-	uint64_t rdev = 0;
-
-	switch (ip->i_mode & S_IFMT) {
-	case S_IFREG:
-		ip->i_op = &zpl_inode_operations;
-		ip->i_fop = &zpl_file_operations;
-		ip->i_mapping->a_ops = &zpl_address_space_operations;
-		break;
-
-	case S_IFDIR:
-		ip->i_op = &zpl_dir_inode_operations;
-		ip->i_fop = &zpl_dir_file_operations;
-		ITOZ(ip)->z_zn_prefetch = B_TRUE;
-		break;
-
-	case S_IFLNK:
-		ip->i_op = &zpl_symlink_inode_operations;
-		break;
-
-	/*
-	 * rdev is only stored in a SA only for device files.
-	 */
-	case S_IFCHR:
-	case S_IFBLK:
-		(void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
-		    sizeof (rdev));
-		/*FALLTHROUGH*/
-	case S_IFIFO:
-	case S_IFSOCK:
-		init_special_inode(ip, ip->i_mode, rdev);
-		ip->i_op = &zpl_special_inode_operations;
-		break;
-
-	default:
-		zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
-		    (u_longlong_t)ip->i_ino, ip->i_mode);
-
-		/* Assume the inode is a file and attempt to continue */
-		ip->i_mode = S_IFREG | 0644;
-		ip->i_op = &zpl_inode_operations;
-		ip->i_fop = &zpl_file_operations;
-		ip->i_mapping->a_ops = &zpl_address_space_operations;
-		break;
-	}
-}
-
-void
-zfs_set_inode_flags(znode_t *zp, struct inode *ip)
-{
-	/*
-	 * Linux and Solaris have different sets of file attributes, so we
-	 * restrict this conversion to the intersection of the two.
-	 */
-#ifdef HAVE_INODE_SET_FLAGS
-	unsigned int flags = 0;
-	if (zp->z_pflags & ZFS_IMMUTABLE)
-		flags |= S_IMMUTABLE;
-	if (zp->z_pflags & ZFS_APPENDONLY)
-		flags |= S_APPEND;
-
-	inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
-#else
-	if (zp->z_pflags & ZFS_IMMUTABLE)
-		ip->i_flags |= S_IMMUTABLE;
-	else
-		ip->i_flags &= ~S_IMMUTABLE;
-
-	if (zp->z_pflags & ZFS_APPENDONLY)
-		ip->i_flags |= S_APPEND;
-	else
-		ip->i_flags &= ~S_APPEND;
-#endif
-}
-
-/*
- * Update the embedded inode given the znode.  We should work toward
- * eliminating this function as soon as possible by removing values
- * which are duplicated between the znode and inode.  If the generic
- * inode has the correct field it should be used, and the ZFS code
- * updated to access the inode.  This can be done incrementally.
- */
-void
-zfs_inode_update(znode_t *zp)
-{
-	zfsvfs_t	*zfsvfs;
-	struct inode	*ip;
-	uint32_t	blksize;
-	u_longlong_t	i_blocks;
-
-	ASSERT(zp != NULL);
-	zfsvfs = ZTOZSB(zp);
-	ip = ZTOI(zp);
-
-	/* Skip .zfs control nodes which do not exist on disk. */
-	if (zfsctl_is_node(ip))
-		return;
-
-	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
-
-	spin_lock(&ip->i_lock);
-	ip->i_blocks = i_blocks;
-	i_size_write(ip, zp->z_size);
-	spin_unlock(&ip->i_lock);
-}
-
-
-/*
- * Construct a znode+inode and initialize.
- *
- * This does not do a call to dmu_set_user() that is
- * up to the caller to do, in case you don't want to
- * return the znode
- */
-static znode_t *
-zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
-    dmu_object_type_t obj_type, sa_handle_t *hdl)
-{
-	znode_t	*zp;
-	struct inode *ip;
-	uint64_t mode;
-	uint64_t parent;
-	uint64_t tmp_gen;
-	uint64_t links;
-	uint64_t z_uid, z_gid;
-	uint64_t atime[2], mtime[2], ctime[2];
-	uint64_t projid = ZFS_DEFAULT_PROJID;
-	sa_bulk_attr_t bulk[11];
-	int count = 0;
-
-	ASSERT(zfsvfs != NULL);
-
-	ip = new_inode(zfsvfs->z_sb);
-	if (ip == NULL)
-		return (NULL);
-
-	zp = ITOZ(ip);
-	ASSERT(zp->z_dirlocks == NULL);
-	ASSERT3P(zp->z_acl_cached, ==, NULL);
-	ASSERT3P(zp->z_xattr_cached, ==, NULL);
-	zp->z_unlinked = B_FALSE;
-	zp->z_atime_dirty = B_FALSE;
-	zp->z_moved = B_FALSE;
-	zp->z_is_mapped = B_FALSE;
-	zp->z_is_ctldir = B_FALSE;
-	zp->z_is_stale = B_FALSE;
-	zp->z_sa_hdl = NULL;
-	zp->z_mapcnt = 0;
-	zp->z_id = db->db_object;
-	zp->z_blksz = blksz;
-	zp->z_seq = 0x7A4653;
-	zp->z_sync_cnt = 0;
-
-	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &zp->z_size, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
-	    &parent, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-
-	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
-	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
-	    (zp->z_pflags & ZFS_PROJID) &&
-	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
-		if (hdl == NULL)
-			sa_handle_destroy(zp->z_sa_hdl);
-		zp->z_sa_hdl = NULL;
-		goto error;
-	}
-
-	zp->z_projid = projid;
-	zp->z_mode = ip->i_mode = mode;
-	ip->i_generation = (uint32_t)tmp_gen;
-	ip->i_blkbits = SPA_MINBLOCKSHIFT;
-	set_nlink(ip, (uint32_t)links);
-	zfs_uid_write(ip, z_uid);
-	zfs_gid_write(ip, z_gid);
-	zfs_set_inode_flags(zp, ip);
-
-	/* Cache the xattr parent id */
-	if (zp->z_pflags & ZFS_XATTR)
-		zp->z_xattr_parent = parent;
-
-	ZFS_TIME_DECODE(&ip->i_atime, atime);
-	ZFS_TIME_DECODE(&ip->i_mtime, mtime);
-	ZFS_TIME_DECODE(&ip->i_ctime, ctime);
-
-	ip->i_ino = zp->z_id;
-	zfs_inode_update(zp);
-	zfs_inode_set_ops(zfsvfs, ip);
-
-	/*
-	 * The only way insert_inode_locked() can fail is if the ip->i_ino
-	 * number is already hashed for this super block.  This can never
-	 * happen because the inode numbers map 1:1 with the object numbers.
-	 *
-	 * The one exception is rolling back a mounted file system, but in
-	 * this case all the active inode are unhashed during the rollback.
-	 */
-	VERIFY3S(insert_inode_locked(ip), ==, 0);
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
-	membar_producer();
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	unlock_new_inode(ip);
-	return (zp);
-
-error:
-	iput(ip);
-	return (NULL);
-}
-
-/*
- * Safely mark an inode dirty.  Inodes which are part of a read-only
- * file system or snapshot may not be dirtied.
- */
-void
-zfs_mark_inode_dirty(struct inode *ip)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
-		return;
-
-	mark_inode_dirty(ip);
-}
-
-static uint64_t empty_xattr;
-static uint64_t pad[4];
-static zfs_acl_phys_t acl_phys;
-/*
- * Create a new DMU object to hold a zfs znode.
- *
- *	IN:	dzp	- parent directory for new znode
- *		vap	- file attributes for new znode
- *		tx	- dmu transaction id for zap operations
- *		cr	- credentials of caller
- *		flag	- flags:
- *			  IS_ROOT_NODE	- new object will be root
- *			  IS_TMPFILE	- new object is of O_TMPFILE
- *			  IS_XATTR	- new object is an attribute
- *		acl_ids	- ACL related attributes
- *
- *	OUT:	zpp	- allocated znode (set to dzp if IS_ROOT_NODE)
- *
- */
-void
-zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
-    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
-{
-	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
-	uint64_t	mode, size, links, parent, pflags;
-	uint64_t	projid = ZFS_DEFAULT_PROJID;
-	uint64_t	rdev = 0;
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	dmu_buf_t	*db;
-	inode_timespec_t now;
-	uint64_t	gen, obj;
-	int		bonuslen;
-	int		dnodesize;
-	sa_handle_t	*sa_hdl;
-	dmu_object_type_t obj_type;
-	sa_bulk_attr_t	*sa_attrs;
-	int		cnt = 0;
-	zfs_acl_locator_cb_t locate = { 0 };
-	znode_hold_t	*zh;
-
-	if (zfsvfs->z_replay) {
-		obj = vap->va_nodeid;
-		now = vap->va_ctime;		/* see zfs_replay_create() */
-		gen = vap->va_nblocks;		/* ditto */
-		dnodesize = vap->va_fsid;	/* ditto */
-	} else {
-		obj = 0;
-		gethrestime(&now);
-		gen = dmu_tx_get_txg(tx);
-		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
-	}
-
-	if (dnodesize == 0)
-		dnodesize = DNODE_MIN_SIZE;
-
-	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
-
-	bonuslen = (obj_type == DMU_OT_SA) ?
-	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
-
-	/*
-	 * Create a new DMU object.
-	 */
-	/*
-	 * There's currently no mechanism for pre-reading the blocks that will
-	 * be needed to allocate a new object, so we accept the small chance
-	 * that there will be an i/o error and we will fail one of the
-	 * assertions below.
-	 */
-	if (S_ISDIR(vap->va_mode)) {
-		if (zfsvfs->z_replay) {
-			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
-			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    obj_type, bonuslen, dnodesize, tx));
-		} else {
-			obj = zap_create_norm_dnsize(zfsvfs->z_os,
-			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    obj_type, bonuslen, dnodesize, tx);
-		}
-	} else {
-		if (zfsvfs->z_replay) {
-			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
-			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    obj_type, bonuslen, dnodesize, tx));
-		} else {
-			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
-			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    obj_type, bonuslen, dnodesize, tx);
-		}
-	}
-
-	zh = zfs_znode_hold_enter(zfsvfs, obj);
-	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
-
-	/*
-	 * If this is the root, fix up the half-initialized parent pointer
-	 * to reference the just-allocated physical data area.
-	 */
-	if (flag & IS_ROOT_NODE) {
-		dzp->z_id = obj;
-	}
-
-	/*
-	 * If parent is an xattr, so am I.
-	 */
-	if (dzp->z_pflags & ZFS_XATTR) {
-		flag |= IS_XATTR;
-	}
-
-	if (zfsvfs->z_use_fuids)
-		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
-	else
-		pflags = 0;
-
-	if (S_ISDIR(vap->va_mode)) {
-		size = 2;		/* contents ("." and "..") */
-		links = 2;
-	} else {
-		size = 0;
-		links = (flag & IS_TMPFILE) ? 0 : 1;
-	}
-
-	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
-		rdev = vap->va_rdev;
-
-	parent = dzp->z_id;
-	mode = acl_ids->z_mode;
-	if (flag & IS_XATTR)
-		pflags |= ZFS_XATTR;
-
-	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
-		/*
-		 * With ZFS_PROJID flag, we can easily know whether there is
-		 * project ID stored on disk or not. See zfs_space_delta_cb().
-		 */
-		if (obj_type != DMU_OT_ZNODE &&
-		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
-			pflags |= ZFS_PROJID;
-
-		/*
-		 * Inherit project ID from parent if required.
-		 */
-		projid = zfs_inherit_projid(dzp);
-		if (dzp->z_pflags & ZFS_PROJINHERIT)
-			pflags |= ZFS_PROJINHERIT;
-	}
-
-	/*
-	 * No execs denied will be determined when zfs_mode_compute() is called.
-	 */
-	pflags |= acl_ids->z_aclp->z_hints &
-	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
-	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
-
-	ZFS_TIME_ENCODE(&now, crtime);
-	ZFS_TIME_ENCODE(&now, ctime);
-
-	if (vap->va_mask & ATTR_ATIME) {
-		ZFS_TIME_ENCODE(&vap->va_atime, atime);
-	} else {
-		ZFS_TIME_ENCODE(&now, atime);
-	}
-
-	if (vap->va_mask & ATTR_MTIME) {
-		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-	} else {
-		ZFS_TIME_ENCODE(&now, mtime);
-	}
-
-	/* Now add in all of the "SA" attributes */
-	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
-	    &sa_hdl));
-
-	/*
-	 * Setup the array of attributes to be replaced/set on the new file
-	 *
-	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
-	 * in the old znode_phys_t format.  Don't change this ordering
-	 */
-	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
-
-	if (obj_type == DMU_OT_ZNODE) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
-		    NULL, &atime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
-		    NULL, &mtime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
-		    NULL, &ctime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
-		    NULL, &crtime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
-		    NULL, &gen, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
-		    NULL, &mode, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
-		    NULL, &size, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
-		    NULL, &parent, 8);
-	} else {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
-		    NULL, &mode, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
-		    NULL, &size, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
-		    NULL, &gen, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
-		    NULL, &acl_ids->z_fuid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
-		    NULL, &acl_ids->z_fgid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
-		    NULL, &parent, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
-		    NULL, &pflags, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
-		    NULL, &atime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
-		    NULL, &mtime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
-		    NULL, &ctime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
-		    NULL, &crtime, 16);
-	}
-
-	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
-
-	if (obj_type == DMU_OT_ZNODE) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
-		    &empty_xattr, 8);
-	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
-	    pflags & ZFS_PROJID) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
-		    NULL, &projid, 8);
-	}
-	if (obj_type == DMU_OT_ZNODE ||
-	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
-		    NULL, &rdev, 8);
-	}
-	if (obj_type == DMU_OT_ZNODE) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
-		    NULL, &pflags, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
-		    &acl_ids->z_fuid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
-		    &acl_ids->z_fgid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
-		    sizeof (uint64_t) * 4);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
-		    &acl_phys, sizeof (zfs_acl_phys_t));
-	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
-		    &acl_ids->z_aclp->z_acl_count, 8);
-		locate.cb_aclp = acl_ids->z_aclp;
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
-		    zfs_acl_data_locator, &locate,
-		    acl_ids->z_aclp->z_acl_bytes);
-		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
-		    acl_ids->z_fuid, acl_ids->z_fgid);
-	}
-
-	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
-
-	if (!(flag & IS_ROOT_NODE)) {
-		/*
-		 * The call to zfs_znode_alloc() may fail if memory is low
-		 * via the call path: alloc_inode() -> inode_init_always() ->
-		 * security_inode_alloc() -> inode_alloc_security().  Since
-		 * the existing code is written such that zfs_mknode() can
-		 * not fail retry until sufficient memory has been reclaimed.
-		 */
-		do {
-			*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
-		} while (*zpp == NULL);
-
-		VERIFY(*zpp != NULL);
-		VERIFY(dzp != NULL);
-	} else {
-		/*
-		 * If we are creating the root node, the "parent" we
-		 * passed in is the znode for the root.
-		 */
-		*zpp = dzp;
-
-		(*zpp)->z_sa_hdl = sa_hdl;
-	}
-
-	(*zpp)->z_pflags = pflags;
-	(*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
-	(*zpp)->z_dnodesize = dnodesize;
-	(*zpp)->z_projid = projid;
-
-	if (obj_type == DMU_OT_ZNODE ||
-	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
-		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
-	}
-	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
-	zfs_znode_hold_exit(zfsvfs, zh);
-}
-
-/*
- * Update in-core attributes.  It is assumed the caller will be doing an
- * sa_bulk_update to push the changes out.
- */
-void
-zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
-{
-	xoptattr_t *xoap;
-	boolean_t update_inode = B_FALSE;
-
-	xoap = xva_getxoptattr(xvap);
-	ASSERT(xoap);
-
-	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
-		uint64_t times[2];
-		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
-		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
-		    &times, sizeof (times), tx);
-		XVA_SET_RTN(xvap, XAT_CREATETIME);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
-		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_READONLY);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
-		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_HIDDEN);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
-		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_SYSTEM);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
-		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_ARCHIVE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
-
-		update_inode = B_TRUE;
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_NOUNLINK);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_APPENDONLY);
-
-		update_inode = B_TRUE;
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_NODUMP);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
-		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_OPAQUE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
-		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
-		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
-		zfs_sa_set_scanstamp(zp, xvap, tx);
-		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_REPARSE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
-		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_OFFLINE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
-		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_SPARSE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
-		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
-	}
-
-	if (update_inode)
-		zfs_set_inode_flags(zp, ZTOI(zp));
-}
-
-int
-zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
-{
-	dmu_object_info_t doi;
-	dmu_buf_t	*db;
-	znode_t		*zp;
-	znode_hold_t	*zh;
-	int err;
-	sa_handle_t	*hdl;
-
-	*zpp = NULL;
-
-again:
-	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
-
-	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
-	if (err) {
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (err);
-	}
-
-	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_SA &&
-	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
-	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
-	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
-		sa_buf_rele(db, NULL);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EINVAL));
-	}
-
-	hdl = dmu_buf_get_user(db);
-	if (hdl != NULL) {
-		zp = sa_get_userdata(hdl);
-
-
-		/*
-		 * Since "SA" does immediate eviction we
-		 * should never find a sa handle that doesn't
-		 * know about the znode.
-		 */
-
-		ASSERT3P(zp, !=, NULL);
-
-		mutex_enter(&zp->z_lock);
-		ASSERT3U(zp->z_id, ==, obj_num);
-		/*
-		 * If zp->z_unlinked is set, the znode is already marked
-		 * for deletion and should not be discovered. Check this
-		 * after checking igrab() due to fsetxattr() & O_TMPFILE.
-		 *
-		 * If igrab() returns NULL the VFS has independently
-		 * determined the inode should be evicted and has
-		 * called iput_final() to start the eviction process.
-		 * The SA handle is still valid but because the VFS
-		 * requires that the eviction succeed we must drop
-		 * our locks and references to allow the eviction to
-		 * complete.  The zfs_zget() may then be retried.
-		 *
-		 * This unlikely case could be optimized by registering
-		 * a sops->drop_inode() callback.  The callback would
-		 * need to detect the active SA hold thereby informing
-		 * the VFS that this inode should not be evicted.
-		 */
-		if (igrab(ZTOI(zp)) == NULL) {
-			if (zp->z_unlinked)
-				err = SET_ERROR(ENOENT);
-			else
-				err = SET_ERROR(EAGAIN);
-		} else {
-			*zpp = zp;
-			err = 0;
-		}
-
-		mutex_exit(&zp->z_lock);
-		sa_buf_rele(db, NULL);
-		zfs_znode_hold_exit(zfsvfs, zh);
-
-		if (err == EAGAIN) {
-			/* inode might need this to finish evict */
-			cond_resched();
-			goto again;
-		}
-		return (err);
-	}
-
-	/*
-	 * Not found create new znode/vnode but only if file exists.
-	 *
-	 * There is a small window where zfs_vget() could
-	 * find this object while a file create is still in
-	 * progress.  This is checked for in zfs_znode_alloc()
-	 *
-	 * if zfs_znode_alloc() fails it will drop the hold on the
-	 * bonus buffer.
-	 */
-	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
-	    doi.doi_bonus_type, NULL);
-	if (zp == NULL) {
-		err = SET_ERROR(ENOENT);
-	} else {
-		*zpp = zp;
-	}
-	zfs_znode_hold_exit(zfsvfs, zh);
-	return (err);
-}
-
-int
-zfs_rezget(znode_t *zp)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	dmu_object_info_t doi;
-	dmu_buf_t *db;
-	uint64_t obj_num = zp->z_id;
-	uint64_t mode;
-	uint64_t links;
-	sa_bulk_attr_t bulk[10];
-	int err;
-	int count = 0;
-	uint64_t gen;
-	uint64_t z_uid, z_gid;
-	uint64_t atime[2], mtime[2], ctime[2];
-	uint64_t projid = ZFS_DEFAULT_PROJID;
-	znode_hold_t *zh;
-
-	/*
-	 * skip ctldir, otherwise they will always get invalidated. This will
-	 * cause funny behaviour for the mounted snapdirs. Especially for
-	 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
-	 * anyone automount it again as long as someone is still using the
-	 * detached mount.
-	 */
-	if (zp->z_is_ctldir)
-		return (0);
-
-	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
-
-	mutex_enter(&zp->z_acl_lock);
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
-	}
-	mutex_exit(&zp->z_acl_lock);
-
-	rw_enter(&zp->z_xattr_lock, RW_WRITER);
-	if (zp->z_xattr_cached) {
-		nvlist_free(zp->z_xattr_cached);
-		zp->z_xattr_cached = NULL;
-	}
-	rw_exit(&zp->z_xattr_lock);
-
-	ASSERT(zp->z_sa_hdl == NULL);
-	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
-	if (err) {
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (err);
-	}
-
-	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_SA &&
-	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
-	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
-	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
-		sa_buf_rele(db, NULL);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EINVAL));
-	}
-
-	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
-
-	/* reload cached values */
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
-	    &gen, sizeof (gen));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &zp->z_size, sizeof (zp->z_size));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
-	    &links, sizeof (links));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
-	    &z_uid, sizeof (z_uid));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
-	    &z_gid, sizeof (z_gid));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
-	    &mode, sizeof (mode));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
-	    &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
-	    &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-	    &ctime, 16);
-
-	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
-		zfs_znode_dmu_fini(zp);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EIO));
-	}
-
-	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
-		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
-		    &projid, 8);
-		if (err != 0 && err != ENOENT) {
-			zfs_znode_dmu_fini(zp);
-			zfs_znode_hold_exit(zfsvfs, zh);
-			return (SET_ERROR(err));
-		}
-	}
-
-	zp->z_projid = projid;
-	zp->z_mode = ZTOI(zp)->i_mode = mode;
-	zfs_uid_write(ZTOI(zp), z_uid);
-	zfs_gid_write(ZTOI(zp), z_gid);
-
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
-
-	if ((uint32_t)gen != ZTOI(zp)->i_generation) {
-		zfs_znode_dmu_fini(zp);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EIO));
-	}
-
-	set_nlink(ZTOI(zp), (uint32_t)links);
-	zfs_set_inode_flags(zp, ZTOI(zp));
-
-	zp->z_blksz = doi.doi_data_block_size;
-	zp->z_atime_dirty = B_FALSE;
-	zfs_inode_update(zp);
-
-	/*
-	 * If the file has zero links, then it has been unlinked on the send
-	 * side and it must be in the received unlinked set.
-	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
-	 * stale data and to prevent automatic removal of the file in
-	 * zfs_zinactive().  The file will be removed either when it is removed
-	 * on the send side and the next incremental stream is received or
-	 * when the unlinked set gets processed.
-	 */
-	zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
-	if (zp->z_unlinked)
-		zfs_znode_dmu_fini(zp);
-
-	zfs_znode_hold_exit(zfsvfs, zh);
-
-	return (0);
-}
-
-void
-zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	objset_t *os = zfsvfs->z_os;
-	uint64_t obj = zp->z_id;
-	uint64_t acl_obj = zfs_external_acl(zp);
-	znode_hold_t *zh;
-
-	zh = zfs_znode_hold_enter(zfsvfs, obj);
-	if (acl_obj) {
-		VERIFY(!zp->z_is_sa);
-		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
-	}
-	VERIFY(0 == dmu_object_free(os, obj, tx));
-	zfs_znode_dmu_fini(zp);
-	zfs_znode_hold_exit(zfsvfs, zh);
-}
-
-void
-zfs_zinactive(znode_t *zp)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	uint64_t z_id = zp->z_id;
-	znode_hold_t *zh;
-
-	ASSERT(zp->z_sa_hdl);
-
-	/*
-	 * Don't allow a zfs_zget() while were trying to release this znode.
-	 */
-	zh = zfs_znode_hold_enter(zfsvfs, z_id);
-
-	mutex_enter(&zp->z_lock);
-
-	/*
-	 * If this was the last reference to a file with no links, remove
-	 * the file from the file system unless the file system is mounted
-	 * read-only.  That can happen, for example, if the file system was
-	 * originally read-write, the file was opened, then unlinked and
-	 * the file system was made read-only before the file was finally
-	 * closed.  The file will remain in the unlinked set.
-	 */
-	if (zp->z_unlinked) {
-		ASSERT(!zfsvfs->z_issnap);
-		if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
-			mutex_exit(&zp->z_lock);
-			zfs_znode_hold_exit(zfsvfs, zh);
-			zfs_rmnode(zp);
-			return;
-		}
-	}
-
-	mutex_exit(&zp->z_lock);
-	zfs_znode_dmu_fini(zp);
-
-	zfs_znode_hold_exit(zfsvfs, zh);
-}
-
-#if defined(HAVE_INODE_TIMESPEC64_TIMES)
-#define	zfs_compare_timespec timespec64_compare
-#else
-#define	zfs_compare_timespec timespec_compare
-#endif
-
-/*
- * Determine whether the znode's atime must be updated.  The logic mostly
- * duplicates the Linux kernel's relatime_need_update() functionality.
- * This function is only called if the underlying filesystem actually has
- * atime updates enabled.
- */
-boolean_t
-zfs_relatime_need_update(const struct inode *ip)
-{
-	inode_timespec_t now;
-
-	gethrestime(&now);
-	/*
-	 * In relatime mode, only update the atime if the previous atime
-	 * is earlier than either the ctime or mtime or if at least a day
-	 * has passed since the last update of atime.
-	 */
-	if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
-		return (B_TRUE);
-
-	if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
-		return (B_TRUE);
-
-	if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
-		return (B_TRUE);
-
-	return (B_FALSE);
-}
-
-/*
- * Prepare to update znode time stamps.
- *
- *	IN:	zp	- znode requiring timestamp update
- *		flag	- ATTR_MTIME, ATTR_CTIME flags
- *
- *	OUT:	zp	- z_seq
- *		mtime	- new mtime
- *		ctime	- new ctime
- *
- *	Note: We don't update atime here, because we rely on Linux VFS to do
- *	atime updating.
- */
-void
-zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
-    uint64_t ctime[2])
-{
-	inode_timespec_t now;
-
-	gethrestime(&now);
-
-	zp->z_seq++;
-
-	if (flag & ATTR_MTIME) {
-		ZFS_TIME_ENCODE(&now, mtime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
-		if (ZTOZSB(zp)->z_use_fuids) {
-			zp->z_pflags |= (ZFS_ARCHIVE |
-			    ZFS_AV_MODIFIED);
-		}
-	}
-
-	if (flag & ATTR_CTIME) {
-		ZFS_TIME_ENCODE(&now, ctime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
-		if (ZTOZSB(zp)->z_use_fuids)
-			zp->z_pflags |= ZFS_ARCHIVE;
-	}
-}
-
-/*
- * Grow the block size for a file.
- *
- *	IN:	zp	- znode of file to free data in.
- *		size	- requested block size
- *		tx	- open transaction.
- *
- * NOTE: this function assumes that the znode is write locked.
- */
-void
-zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
-{
-	int		error;
-	u_longlong_t	dummy;
-
-	if (size <= zp->z_blksz)
-		return;
-	/*
-	 * If the file size is already greater than the current blocksize,
-	 * we will not grow.  If there is more than one block in a file,
-	 * the blocksize cannot change.
-	 */
-	if (zp->z_blksz && zp->z_size > zp->z_blksz)
-		return;
-
-	error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
-	    size, 0, tx);
-
-	if (error == ENOTSUP)
-		return;
-	ASSERT0(error);
-
-	/* What blocksize did we actually get? */
-	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
-}
-
-/*
- * Increase the file length
- *
- *	IN:	zp	- znode of file to free data in.
- *		end	- new end-of-file
- *
- *	RETURN:	0 on success, error code on failure
- */
-static int
-zfs_extend(znode_t *zp, uint64_t end)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	dmu_tx_t *tx;
-	zfs_locked_range_t *lr;
-	uint64_t newblksz;
-	int error;
-
-	/*
-	 * We will change zp_size, lock the whole file.
-	 */
-	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
-
-	/*
-	 * Nothing to do if file already at desired length.
-	 */
-	if (end <= zp->z_size) {
-		zfs_rangelock_exit(lr);
-		return (0);
-	}
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	if (end > zp->z_blksz &&
-	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
-		/*
-		 * We are growing the file past the current block size.
-		 */
-		if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
-			/*
-			 * File's blocksize is already larger than the
-			 * "recordsize" property.  Only let it grow to
-			 * the next power of 2.
-			 */
-			ASSERT(!ISP2(zp->z_blksz));
-			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
-		} else {
-			newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
-		}
-		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
-	} else {
-		newblksz = 0;
-	}
-
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		zfs_rangelock_exit(lr);
-		return (error);
-	}
-
-	if (newblksz)
-		zfs_grow_blocksize(zp, newblksz, tx);
-
-	zp->z_size = end;
-
-	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
-	    &zp->z_size, sizeof (zp->z_size), tx));
-
-	zfs_rangelock_exit(lr);
-
-	dmu_tx_commit(tx);
-
-	return (0);
-}
-
-/*
- * zfs_zero_partial_page - Modeled after update_pages() but
- * with different arguments and semantics for use by zfs_freesp().
- *
- * Zeroes a piece of a single page cache entry for zp at offset
- * start and length len.
- *
- * Caller must acquire a range lock on the file for the region
- * being zeroed in order that the ARC and page cache stay in sync.
- */
-static void
-zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
-{
-	struct address_space *mp = ZTOI(zp)->i_mapping;
-	struct page *pp;
-	int64_t	off;
-	void *pb;
-
-	ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
-
-	off = start & (PAGE_SIZE - 1);
-	start &= PAGE_MASK;
-
-	pp = find_lock_page(mp, start >> PAGE_SHIFT);
-	if (pp) {
-		if (mapping_writably_mapped(mp))
-			flush_dcache_page(pp);
-
-		pb = kmap(pp);
-		bzero(pb + off, len);
-		kunmap(pp);
-
-		if (mapping_writably_mapped(mp))
-			flush_dcache_page(pp);
-
-		mark_page_accessed(pp);
-		SetPageUptodate(pp);
-		ClearPageError(pp);
-		unlock_page(pp);
-		put_page(pp);
-	}
-}
-
-/*
- * Free space in a file.
- *
- *	IN:	zp	- znode of file to free data in.
- *		off	- start of section to free.
- *		len	- length of section to free.
- *
- *	RETURN:	0 on success, error code on failure
- */
-static int
-zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	zfs_locked_range_t *lr;
-	int error;
-
-	/*
-	 * Lock the range being freed.
-	 */
-	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
-
-	/*
-	 * Nothing to do if file already at desired length.
-	 */
-	if (off >= zp->z_size) {
-		zfs_rangelock_exit(lr);
-		return (0);
-	}
-
-	if (off + len > zp->z_size)
-		len = zp->z_size - off;
-
-	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
-
-	/*
-	 * Zero partial page cache entries.  This must be done under a
-	 * range lock in order to keep the ARC and page cache in sync.
-	 */
-	if (zp->z_is_mapped) {
-		loff_t first_page, last_page, page_len;
-		loff_t first_page_offset, last_page_offset;
-
-		/* first possible full page in hole */
-		first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		/* last page of hole */
-		last_page = (off + len) >> PAGE_SHIFT;
-
-		/* offset of first_page */
-		first_page_offset = first_page << PAGE_SHIFT;
-		/* offset of last_page */
-		last_page_offset = last_page << PAGE_SHIFT;
-
-		/* truncate whole pages */
-		if (last_page_offset > first_page_offset) {
-			truncate_inode_pages_range(ZTOI(zp)->i_mapping,
-			    first_page_offset, last_page_offset - 1);
-		}
-
-		/* truncate sub-page ranges */
-		if (first_page > last_page) {
-			/* entire punched area within a single page */
-			zfs_zero_partial_page(zp, off, len);
-		} else {
-			/* beginning of punched area at the end of a page */
-			page_len  = first_page_offset - off;
-			if (page_len > 0)
-				zfs_zero_partial_page(zp, off, page_len);
-
-			/* end of punched area at the beginning of a page */
-			page_len = off + len - last_page_offset;
-			if (page_len > 0)
-				zfs_zero_partial_page(zp, last_page_offset,
-				    page_len);
-		}
-	}
-	zfs_rangelock_exit(lr);
-
-	return (error);
-}
-
-/*
- * Truncate a file
- *
- *	IN:	zp	- znode of file to free data in.
- *		end	- new end-of-file.
- *
- *	RETURN:	0 on success, error code on failure
- */
-static int
-zfs_trunc(znode_t *zp, uint64_t end)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	dmu_tx_t *tx;
-	zfs_locked_range_t *lr;
-	int error;
-	sa_bulk_attr_t bulk[2];
-	int count = 0;
-
-	/*
-	 * We will change zp_size, lock the whole file.
-	 */
-	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
-
-	/*
-	 * Nothing to do if file already at desired length.
-	 */
-	if (end >= zp->z_size) {
-		zfs_rangelock_exit(lr);
-		return (0);
-	}
-
-	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
-	    DMU_OBJECT_END);
-	if (error) {
-		zfs_rangelock_exit(lr);
-		return (error);
-	}
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	dmu_tx_mark_netfree(tx);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		zfs_rangelock_exit(lr);
-		return (error);
-	}
-
-	zp->z_size = end;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
-	    NULL, &zp->z_size, sizeof (zp->z_size));
-
-	if (end == 0) {
-		zp->z_pflags &= ~ZFS_SPARSE;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-		    NULL, &zp->z_pflags, 8);
-	}
-	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
-
-	dmu_tx_commit(tx);
-	zfs_rangelock_exit(lr);
-
-	return (0);
-}
-
-/*
- * Free space in a file
- *
- *	IN:	zp	- znode of file to free data in.
- *		off	- start of range
- *		len	- end of range (0 => EOF)
- *		flag	- current file open mode flags.
- *		log	- TRUE if this action should be logged
- *
- *	RETURN:	0 on success, error code on failure
- */
-int
-zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
-{
-	dmu_tx_t *tx;
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	zilog_t *zilog = zfsvfs->z_log;
-	uint64_t mode;
-	uint64_t mtime[2], ctime[2];
-	sa_bulk_attr_t bulk[3];
-	int count = 0;
-	int error;
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
-	    sizeof (mode))) != 0)
-		return (error);
-
-	if (off > zp->z_size) {
-		error =  zfs_extend(zp, off+len);
-		if (error == 0 && log)
-			goto log;
-		goto out;
-	}
-
-	if (len == 0) {
-		error = zfs_trunc(zp, off);
-	} else {
-		if ((error = zfs_free_range(zp, off, len)) == 0 &&
-		    off + len > zp->z_size)
-			error = zfs_extend(zp, off+len);
-	}
-	if (error || !log)
-		goto out;
-log:
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		goto out;
-	}
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-	    NULL, &zp->z_pflags, 8);
-	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
-	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-
-	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
-
-	dmu_tx_commit(tx);
-
-	zfs_inode_update(zp);
-	error = 0;
-
-out:
-	/*
-	 * Truncate the page cache - for file truncate operations, use
-	 * the purpose-built API for truncations.  For punching operations,
-	 * the truncation is handled under a range lock in zfs_free_range.
-	 */
-	if (len == 0)
-		truncate_setsize(ZTOI(zp), off);
-	return (error);
-}
-
-void
-zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
-{
-	struct super_block *sb;
-	zfsvfs_t	*zfsvfs;
-	uint64_t	moid, obj, sa_obj, version;
-	uint64_t	sense = ZFS_CASE_SENSITIVE;
-	uint64_t	norm = 0;
-	nvpair_t	*elem;
-	int		size;
-	int		error;
-	int		i;
-	znode_t		*rootzp = NULL;
-	vattr_t		vattr;
-	znode_t		*zp;
-	zfs_acl_ids_t	acl_ids;
-
-	/*
-	 * First attempt to create master node.
-	 */
-	/*
-	 * In an empty objset, there are no blocks to read and thus
-	 * there can be no i/o errors (which we assert below).
-	 */
-	moid = MASTER_NODE_OBJ;
-	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
-	    DMU_OT_NONE, 0, tx);
-	ASSERT(error == 0);
-
-	/*
-	 * Set starting attributes.
-	 */
-	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
-	elem = NULL;
-	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
-		/* For the moment we expect all zpl props to be uint64_ts */
-		uint64_t val;
-		char *name;
-
-		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
-		VERIFY(nvpair_value_uint64(elem, &val) == 0);
-		name = nvpair_name(elem);
-		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
-			if (val < version)
-				version = val;
-		} else {
-			error = zap_update(os, moid, name, 8, 1, &val, tx);
-		}
-		ASSERT(error == 0);
-		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
-			norm = val;
-		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
-			sense = val;
-	}
-	ASSERT(version != 0);
-	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
-
-	/*
-	 * Create zap object used for SA attribute registration
-	 */
-
-	if (version >= ZPL_VERSION_SA) {
-		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
-		    DMU_OT_NONE, 0, tx);
-		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
-		ASSERT(error == 0);
-	} else {
-		sa_obj = 0;
-	}
-	/*
-	 * Create a delete queue.
-	 */
-	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
-
-	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
-	ASSERT(error == 0);
-
-	/*
-	 * Create root znode.  Create minimal znode/inode/zfsvfs/sb
-	 * to allow zfs_mknode to work.
-	 */
-	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
-	vattr.va_mode = S_IFDIR|0755;
-	vattr.va_uid = crgetuid(cr);
-	vattr.va_gid = crgetgid(cr);
-
-	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
-	rootzp->z_unlinked = B_FALSE;
-	rootzp->z_atime_dirty = B_FALSE;
-	rootzp->z_moved = B_FALSE;
-	rootzp->z_is_sa = USE_SA(version, os);
-	rootzp->z_pflags = 0;
-
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
-	zfsvfs->z_os = os;
-	zfsvfs->z_parent = zfsvfs;
-	zfsvfs->z_version = version;
-	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
-	zfsvfs->z_use_sa = USE_SA(version, os);
-	zfsvfs->z_norm = norm;
-
-	sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
-	sb->s_fs_info = zfsvfs;
-
-	ZTOI(rootzp)->i_sb = sb;
-
-	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
-	    &zfsvfs->z_attr_table);
-
-	ASSERT(error == 0);
-
-	/*
-	 * Fold case on file systems that are always or sometimes case
-	 * insensitive.
-	 */
-	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
-		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
-
-	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
-	    offsetof(znode_t, z_link_node));
-
-	size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
-	zfsvfs->z_hold_size = size;
-	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
-	    KM_SLEEP);
-	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
-	for (i = 0; i != size; i++) {
-		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
-		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
-		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
-	}
-
-	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
-	    cr, NULL, &acl_ids));
-	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
-	ASSERT3P(zp, ==, rootzp);
-	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
-	ASSERT(error == 0);
-	zfs_acl_ids_free(&acl_ids);
-
-	atomic_set(&ZTOI(rootzp)->i_count, 0);
-	sa_handle_destroy(rootzp->z_sa_hdl);
-	kmem_cache_free(znode_cache, rootzp);
-
-	for (i = 0; i != size; i++) {
-		avl_destroy(&zfsvfs->z_hold_trees[i]);
-		mutex_destroy(&zfsvfs->z_hold_locks[i]);
-	}
-
-	mutex_destroy(&zfsvfs->z_znodes_lock);
-
-	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
-	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
-	kmem_free(sb, sizeof (struct super_block));
-	kmem_free(zfsvfs, sizeof (zfsvfs_t));
-}
-#endif /* _KERNEL */
-
-static int
-zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
-{
-	uint64_t sa_obj = 0;
-	int error;
-
-	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
-	if (error != 0 && error != ENOENT)
-		return (error);
-
-	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
-	return (error);
-}
-
-static int
-zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
-    dmu_buf_t **db, void *tag)
-{
-	dmu_object_info_t doi;
-	int error;
-
-	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
-		return (error);
-
-	dmu_object_info_from_db(*db, &doi);
-	if ((doi.doi_bonus_type != DMU_OT_SA &&
-	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
-	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
-	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
-		sa_buf_rele(*db, tag);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
-	if (error != 0) {
-		sa_buf_rele(*db, tag);
-		return (error);
-	}
-
-	return (0);
-}
-
-void
-zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
-{
-	sa_handle_destroy(hdl);
-	sa_buf_rele(db, tag);
-}
-
-/*
- * Given an object number, return its parent object number and whether
- * or not the object is an extended attribute directory.
- */
-static int
-zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
-    uint64_t *pobjp, int *is_xattrdir)
-{
-	uint64_t parent;
-	uint64_t pflags;
-	uint64_t mode;
-	uint64_t parent_mode;
-	sa_bulk_attr_t bulk[3];
-	sa_handle_t *sa_hdl;
-	dmu_buf_t *sa_db;
-	int count = 0;
-	int error;
-
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
-	    &parent, sizeof (parent));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
-	    &pflags, sizeof (pflags));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
-	    &mode, sizeof (mode));
-
-	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
-		return (error);
-
-	/*
-	 * When a link is removed its parent pointer is not changed and will
-	 * be invalid.  There are two cases where a link is removed but the
-	 * file stays around, when it goes to the delete queue and when there
-	 * are additional links.
-	 */
-	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
-	if (error != 0)
-		return (error);
-
-	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
-	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
-	if (error != 0)
-		return (error);
-
-	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
-
-	/*
-	 * Extended attributes can be applied to files, directories, etc.
-	 * Otherwise the parent must be a directory.
-	 */
-	if (!*is_xattrdir && !S_ISDIR(parent_mode))
-		return (SET_ERROR(EINVAL));
-
-	*pobjp = parent;
-
-	return (0);
-}
-
-/*
- * Given an object number, return some zpl level statistics
- */
-static int
-zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
-    zfs_stat_t *sb)
-{
-	sa_bulk_attr_t bulk[4];
-	int count = 0;
-
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
-	    &sb->zs_mode, sizeof (sb->zs_mode));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
-	    &sb->zs_gen, sizeof (sb->zs_gen));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
-	    &sb->zs_links, sizeof (sb->zs_links));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
-	    &sb->zs_ctime, sizeof (sb->zs_ctime));
-
-	return (sa_bulk_lookup(hdl, bulk, count));
-}
-
-static int
-zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
-    sa_attr_type_t *sa_table, char *buf, int len)
-{
-	sa_handle_t *sa_hdl;
-	sa_handle_t *prevhdl = NULL;
-	dmu_buf_t *prevdb = NULL;
-	dmu_buf_t *sa_db = NULL;
-	char *path = buf + len - 1;
-	int error;
-
-	*path = '\0';
-	sa_hdl = hdl;
-
-	uint64_t deleteq_obj;
-	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
-	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
-	error = zap_lookup_int(osp, deleteq_obj, obj);
-	if (error == 0) {
-		return (ESTALE);
-	} else if (error != ENOENT) {
-		return (error);
-	}
-	error = 0;
-
-	for (;;) {
-		uint64_t pobj = 0;
-		char component[MAXNAMELEN + 2];
-		size_t complen;
-		int is_xattrdir = 0;
-
-		if (prevdb)
-			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
-
-		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
-		    &is_xattrdir)) != 0)
-			break;
-
-		if (pobj == obj) {
-			if (path[0] != '/')
-				*--path = '/';
-			break;
-		}
-
-		component[0] = '/';
-		if (is_xattrdir) {
-			(void) sprintf(component + 1, "<xattrdir>");
-		} else {
-			error = zap_value_search(osp, pobj, obj,
-			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
-			if (error != 0)
-				break;
-		}
-
-		complen = strlen(component);
-		path -= complen;
-		ASSERT(path >= buf);
-		bcopy(component, path, complen);
-		obj = pobj;
-
-		if (sa_hdl != hdl) {
-			prevhdl = sa_hdl;
-			prevdb = sa_db;
-		}
-		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
-		if (error != 0) {
-			sa_hdl = prevhdl;
-			sa_db = prevdb;
-			break;
-		}
-	}
-
-	if (sa_hdl != NULL && sa_hdl != hdl) {
-		ASSERT(sa_db != NULL);
-		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
-	}
-
-	if (error == 0)
-		(void) memmove(buf, path, buf + len - path);
-
-	return (error);
-}
-
-int
-zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
-{
-	sa_attr_type_t *sa_table;
-	sa_handle_t *hdl;
-	dmu_buf_t *db;
-	int error;
-
-	error = zfs_sa_setup(osp, &sa_table);
-	if (error != 0)
-		return (error);
-
-	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
-	if (error != 0)
-		return (error);
-
-	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
-
-	zfs_release_sa_handle(hdl, db, FTAG);
-	return (error);
-}
-
-int
-zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
-    char *buf, int len)
-{
-	char *path = buf + len - 1;
-	sa_attr_type_t *sa_table;
-	sa_handle_t *hdl;
-	dmu_buf_t *db;
-	int error;
-
-	*path = '\0';
-
-	error = zfs_sa_setup(osp, &sa_table);
-	if (error != 0)
-		return (error);
-
-	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
-	if (error != 0)
-		return (error);
-
-	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
-	if (error != 0) {
-		zfs_release_sa_handle(hdl, db, FTAG);
-		return (error);
-	}
-
-	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
-
-	zfs_release_sa_handle(hdl, db, FTAG);
-	return (error);
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zfs_create_fs);
-EXPORT_SYMBOL(zfs_obj_to_path);
-
-/* CSTYLED */
-module_param(zfs_object_mutex_size, uint, 0644);
-MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
-module_param(zfs_unlink_suspend_progress, int, 0644);
-MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
-"(debug - leaks space into the unlinked set)");
-#endif

diff --git a/zfs/module/zfs/zil.c b/zfs/module/zfs/zil.c
index c4d7d6e..a4f7c00 100644
--- a/zfs/module/zfs/zil.c
+++ b/zfs/module/zfs/zil.c

@@ -41,7 +41,7 @@
 #include <sys/dmu_tx.h>
 #include <sys/dsl_pool.h>
 #include <sys/metaslab.h>
-#include <sys/trace_zil.h>
+#include <sys/trace_zfs.h>
 #include <sys/abd.h>
 
 /*
@@ -92,6 +92,14 @@
 int zfs_commit_timeout_pct = 5;
 
 /*
+ * Minimal time we care to delay commit waiting for more ZIL records.
+ * At least FreeBSD kernel can't sleep for less than 2us at its best.
+ * So requests to sleep for less then 5us is a waste of CPU time with
+ * a risk of significant log latency increase due to oversleep.
+ */
+static unsigned long zil_min_commit_timeout = 5000;
+
+/*
  * See zil.h for more information about these fields.
  */
 zil_stats_t zil_stats = {
@@ -135,8 +143,6 @@
 static kmem_cache_t *zil_lwb_cache;
 static kmem_cache_t *zil_zcw_cache;
 
-static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
-
 #define	LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
     sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
 
@@ -146,11 +152,11 @@
 	const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva;
 	const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva;
 
-	int cmp = AVL_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
+	int cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
 	if (likely(cmp))
 		return (cmp);
 
-	return (AVL_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2)));
+	return (TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2)));
 }
 
 static void
@@ -207,8 +213,10 @@
 {
 	zio_cksum_t *zc = &bp->blk_cksum;
 
-	zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL);
-	zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL);
+	(void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_0],
+	    sizeof (zc->zc_word[ZIL_ZC_GUID_0]));
+	(void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_1],
+	    sizeof (zc->zc_word[ZIL_ZC_GUID_1]));
 	zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os);
 	zc->zc_word[ZIL_ZC_SEQ] = 1ULL;
 }
@@ -218,11 +226,10 @@
  */
 static int
 zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
-    blkptr_t *nbp, void *dst, char **end)
+    blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
 {
 	enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
 	arc_flags_t aflags = ARC_FLAG_WAIT;
-	arc_buf_t *abuf = NULL;
 	zbookmark_phys_t zb;
 	int error;
 
@@ -239,7 +246,7 @@
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
-	    &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
+	    abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 
 	if (error == 0) {
 		zio_cksum_t cksum = bp->blk_cksum;
@@ -254,23 +261,23 @@
 		 */
 		cksum.zc_word[ZIL_ZC_SEQ]++;
 
+		uint64_t size = BP_GET_LSIZE(bp);
 		if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
-			zil_chain_t *zilc = abuf->b_data;
+			zil_chain_t *zilc = (*abuf)->b_data;
 			char *lr = (char *)(zilc + 1);
-			uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
 
 			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
-			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
+			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
+			    zilc->zc_nused < sizeof (*zilc) ||
+			    zilc->zc_nused > size) {
 				error = SET_ERROR(ECKSUM);
 			} else {
-				ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
-				bcopy(lr, dst, len);
-				*end = (char *)dst + len;
+				*begin = lr;
+				*end = lr + zilc->zc_nused - sizeof (*zilc);
 				*nbp = zilc->zc_next_blk;
 			}
 		} else {
-			char *lr = abuf->b_data;
-			uint64_t size = BP_GET_LSIZE(bp);
+			char *lr = (*abuf)->b_data;
 			zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
 
 			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
@@ -278,15 +285,11 @@
 			    (zilc->zc_nused > (size - sizeof (*zilc)))) {
 				error = SET_ERROR(ECKSUM);
 			} else {
-				ASSERT3U(zilc->zc_nused, <=,
-				    SPA_OLD_MAXBLOCKSIZE);
-				bcopy(lr, dst, zilc->zc_nused);
-				*end = (char *)dst + zilc->zc_nused;
+				*begin = lr;
+				*end = lr + zilc->zc_nused;
 				*nbp = zilc->zc_next_blk;
 			}
 		}
-
-		arc_buf_destroy(abuf, &abuf);
 	}
 
 	return (error);
@@ -354,7 +357,6 @@
 	uint64_t blk_count = 0;
 	uint64_t lr_count = 0;
 	blkptr_t blk, next_blk;
-	char *lrbuf, *lrp;
 	int error = 0;
 
 	bzero(&next_blk, sizeof (blkptr_t));
@@ -374,13 +376,13 @@
 	 * If the log has been claimed, stop if we encounter a sequence
 	 * number greater than the highest claimed sequence number.
 	 */
-	lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
 	zil_bp_tree_init(zilog);
 
 	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
 		uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
 		int reclen;
-		char *end = NULL;
+		char *lrp, *end;
+		arc_buf_t *abuf = NULL;
 
 		if (blk_seq > claim_blk_seq)
 			break;
@@ -396,24 +398,41 @@
 			break;
 
 		error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
-		    lrbuf, &end);
-		if (error != 0)
-			break;
+		    &lrp, &end, &abuf);
+		if (error != 0) {
+			if (abuf)
+				arc_buf_destroy(abuf, &abuf);
+			if (claimed) {
+				char name[ZFS_MAX_DATASET_NAME_LEN];
 
-		for (lrp = lrbuf; lrp < end; lrp += reclen) {
+				dmu_objset_name(zilog->zl_os, name);
+
+				cmn_err(CE_WARN, "ZFS read log block error %d, "
+				    "dataset %s, seq 0x%llx\n", error, name,
+				    (u_longlong_t)blk_seq);
+			}
+			break;
+		}
+
+		for (; lrp < end; lrp += reclen) {
 			lr_t *lr = (lr_t *)lrp;
 			reclen = lr->lrc_reclen;
 			ASSERT3U(reclen, >=, sizeof (lr_t));
-			if (lr->lrc_seq > claim_lr_seq)
+			if (lr->lrc_seq > claim_lr_seq) {
+				arc_buf_destroy(abuf, &abuf);
 				goto done;
+			}
 
 			error = parse_lr_func(zilog, lr, arg, txg);
-			if (error != 0)
+			if (error != 0) {
+				arc_buf_destroy(abuf, &abuf);
 				goto done;
+			}
 			ASSERT3U(max_lr_seq, <, lr->lrc_seq);
 			max_lr_seq = lr->lrc_seq;
 			lr_count++;
 		}
+		arc_buf_destroy(abuf, &abuf);
 	}
 done:
 	zilog->zl_parse_error = error;
@@ -422,20 +441,16 @@
 	zilog->zl_parse_blk_count = blk_count;
 	zilog->zl_parse_lr_count = lr_count;
 
-	ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
-	    (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) ||
-	    (decrypt && error == EIO));
-
 	zil_bp_tree_fini(zilog);
-	zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
 
 	return (error);
 }
 
-/* ARGSUSED */
 static int
-zil_clear_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
+zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
+    uint64_t first_txg)
 {
+	(void) tx;
 	ASSERT(!BP_IS_HOLE(bp));
 
 	/*
@@ -454,15 +469,17 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
-zil_noop_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
+zil_noop_log_record(zilog_t *zilog, const lr_t *lrc, void *tx,
+    uint64_t first_txg)
 {
+	(void) zilog, (void) lrc, (void) tx, (void) first_txg;
 	return (0);
 }
 
 static int
-zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
+zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
+    uint64_t first_txg)
 {
 	/*
 	 * Claim log block if not already committed and not already claimed.
@@ -478,7 +495,8 @@
 }
 
 static int
-zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
+zil_claim_log_record(zilog_t *zilog, const lr_t *lrc, void *tx,
+    uint64_t first_txg)
 {
 	lr_write_t *lr = (lr_write_t *)lrc;
 	int error;
@@ -503,17 +521,20 @@
 	return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg));
 }
 
-/* ARGSUSED */
 static int
-zil_free_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg)
+zil_free_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
+    uint64_t claim_txg)
 {
+	(void) claim_txg;
+
 	zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
 
 	return (0);
 }
 
 static int
-zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
+zil_free_log_record(zilog_t *zilog, const lr_t *lrc, void *tx,
+    uint64_t claim_txg)
 {
 	lr_write_t *lr = (lr_write_t *)lrc;
 	blkptr_t *bp = &lr->lr_blkptr;
@@ -535,7 +556,7 @@
 	const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
 	const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
 
-	return (AVL_CMP(v1, v2));
+	return (TREE_CMP(v1, v2));
 }
 
 static lwb_t *
@@ -604,7 +625,7 @@
  * Called when we create in-memory log transactions so that we know
  * to cleanup the itxs at the end of spa_sync().
  */
-void
+static void
 zilog_dirty(zilog_t *zilog, uint64_t txg)
 {
 	dsl_pool_t *dp = zilog->zl_dmu_pool;
@@ -630,7 +651,7 @@
  * dirtied (zil_itx_assign) or cleaned (zil_clean) while we check its current
  * state.
  */
-boolean_t
+static boolean_t __maybe_unused
 zilog_is_dirty_in_txg(zilog_t *zilog, uint64_t txg)
 {
 	dsl_pool_t *dp = zilog->zl_dmu_pool;
@@ -644,7 +665,7 @@
  * Determine if the zil is dirty. The zil is considered dirty if it has
  * any pending itx records that have not been cleaned by zil_clean().
  */
-boolean_t
+static boolean_t
 zilog_is_dirty(zilog_t *zilog)
 {
 	dsl_pool_t *dp = zilog->zl_dmu_pool;
@@ -905,10 +926,10 @@
  * Checksum errors are ok as they indicate the end of the chain.
  * Any other error (no device or read failure) returns an error.
  */
-/* ARGSUSED */
 int
 zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
 {
+	(void) dp;
 	zilog_t *zilog;
 	objset_t *os;
 	blkptr_t *bp;
@@ -1142,7 +1163,8 @@
 	lwb->lwb_tx = NULL;
 
 	ASSERT3U(lwb->lwb_issued_timestamp, >, 0);
-	zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp;
+	zilog->zl_last_lwb_latency = (zilog->zl_last_lwb_latency * 3 +
+	    gethrtime() - lwb->lwb_issued_timestamp) / 4;
 
 	lwb->lwb_root_zio = NULL;
 
@@ -1172,6 +1194,20 @@
 
 		ASSERT3P(zcw->zcw_lwb, ==, lwb);
 		zcw->zcw_lwb = NULL;
+		/*
+		 * We expect any ZIO errors from child ZIOs to have been
+		 * propagated "up" to this specific LWB's root ZIO, in
+		 * order for this error handling to work correctly. This
+		 * includes ZIO errors from either this LWB's write or
+		 * flush, as well as any errors from other dependent LWBs
+		 * (e.g. a root LWB ZIO that might be a child of this LWB).
+		 *
+		 * With that said, it's important to note that LWB flush
+		 * errors are not propagated up to the LWB root ZIO.
+		 * This is incorrect behavior, and results in VDEV flush
+		 * errors not being handled correctly here. See the
+		 * comment above the call to "zio_flush" for details.
+		 */
 
 		zcw->zcw_zio_error = zio->io_error;
 
@@ -1226,7 +1262,7 @@
 	ASSERT(!BP_IS_HOLE(zio->io_bp));
 	ASSERT(BP_GET_FILL(zio->io_bp) == 0);
 
-	abd_put(zio->io_abd);
+	abd_free(zio->io_abd);
 
 	mutex_enter(&zilog->zl_lock);
 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
@@ -1245,6 +1281,12 @@
 	 * nodes. We avoid calling zio_flush() since there isn't any
 	 * good reason for doing so, after the lwb block failed to be
 	 * written out.
+	 *
+	 * Additionally, we don't perform any further error handling at
+	 * this point (e.g. setting "zcw_zio_error" appropriately), as
+	 * we expect that to occur in "zil_lwb_flush_vdevs_done" (thus,
+	 * we expect any error seen here, to have been propagated to
+	 * that function).
 	 */
 	if (zio->io_error != 0) {
 		while ((zv = avl_destroy_nodes(t, &cookie)) != NULL)
@@ -1275,8 +1317,17 @@
 
 	while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
 		vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
-		if (vd != NULL)
+		if (vd != NULL) {
+			/*
+			 * The "ZIO_FLAG_DONT_PROPAGATE" is currently
+			 * always used within "zio_flush". This means,
+			 * any errors when flushing the vdev(s), will
+			 * (unfortunately) not be handled correctly,
+			 * since these "zio_flush" errors will not be
+			 * propagated up to "zil_lwb_flush_vdevs_done".
+			 */
 			zio_flush(lwb->lwb_root_zio, vd);
+		}
 		kmem_free(zv, sizeof (*zv));
 	}
 }
@@ -1393,8 +1444,7 @@
 		lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
 		    zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
 		    BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
-		    prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
-		    ZIO_FLAG_FASTWRITE, &zb);
+		    prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
 		ASSERT3P(lwb->lwb_write_zio, !=, NULL);
 
 		lwb->lwb_state = LWB_STATE_OPENED;
@@ -1543,6 +1593,7 @@
 		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
 		ASSERT3U(wsz, <=, lwb->lwb_sz);
 		zio_shrink(lwb->lwb_write_zio, wsz);
+		wsz = lwb->lwb_write_zio->io_size;
 
 	} else {
 		wsz = lwb->lwb_sz;
@@ -1613,7 +1664,7 @@
 	lr_t *lrcb, *lrc;
 	lr_write_t *lrwb, *lrw;
 	char *lr_buf;
-	uint64_t dlen, dnow, lwb_sp, reclen, txg, max_log_data;
+	uint64_t dlen, dnow, dpad, lwb_sp, reclen, txg, max_log_data;
 
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 	ASSERT3P(lwb, !=, NULL);
@@ -1647,8 +1698,9 @@
 	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
 		dlen = P2ROUNDUP_TYPED(
 		    lrw->lr_length, sizeof (uint64_t), uint64_t);
+		dpad = dlen - lrw->lr_length;
 	} else {
-		dlen = 0;
+		dlen = dpad = 0;
 	}
 	reclen = lrc->lrc_reclen;
 	zilog->zl_cur_used += (reclen + dlen);
@@ -1740,7 +1792,11 @@
 			 * completed after "lwb_write_zio" completed.
 			 */
 			error = zilog->zl_get_data(itx->itx_private,
-			    lrwb, dbuf, lwb, lwb->lwb_write_zio);
+			    itx->itx_gen, lrwb, dbuf, lwb,
+			    lwb->lwb_write_zio);
+			if (dbuf != NULL && error == 0 && dnow == dlen)
+				/* Zero any padding bytes in the last block. */
+				bzero((char *)dbuf + lrwb->lr_length, dpad);
 
 			if (error == EIO) {
 				txg_wait_synced(zilog->zl_dmu_pool, txg);
@@ -1778,18 +1834,19 @@
 }
 
 itx_t *
-zil_itx_create(uint64_t txtype, size_t lrsize)
+zil_itx_create(uint64_t txtype, size_t olrsize)
 {
-	size_t itxsize;
+	size_t itxsize, lrsize;
 	itx_t *itx;
 
-	lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t);
+	lrsize = P2ROUNDUP_TYPED(olrsize, sizeof (uint64_t), size_t);
 	itxsize = offsetof(itx_t, itx_lr) + lrsize;
 
 	itx = zio_data_buf_alloc(itxsize);
 	itx->itx_lr.lrc_txtype = txtype;
 	itx->itx_lr.lrc_reclen = lrsize;
 	itx->itx_lr.lrc_seq = 0;	/* defensive */
+	bzero((char *)&itx->itx_lr + olrsize, lrsize - olrsize);
 	itx->itx_sync = B_TRUE;		/* default is synchronous */
 	itx->itx_callback = NULL;
 	itx->itx_callback_data = NULL;
@@ -1815,12 +1872,13 @@
  * so no locks are needed.
  */
 static void
-zil_itxg_clean(itxs_t *itxs)
+zil_itxg_clean(void *arg)
 {
 	itx_t *itx;
 	list_t *list;
 	avl_tree_t *t;
 	void *cookie;
+	itxs_t *itxs = arg;
 	itx_async_node_t *ian;
 
 	list = &itxs->i_sync_list;
@@ -1875,7 +1933,7 @@
 	const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid;
 	const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid;
 
-	return (AVL_CMP(o1, o2));
+	return (TREE_CMP(o1, o2));
 }
 
 /*
@@ -1955,7 +2013,7 @@
 			 * This should be rare.
 			 */
 			zfs_dbgmsg("zil_itx_assign: missed itx cleanup for "
-			    "txg %llu", itxg->itxg_txg);
+			    "txg %llu", (u_longlong_t)itxg->itxg_txg);
 			clean = itxg->itxg_itxs;
 		}
 		itxg->itxg_txg = txg;
@@ -2040,7 +2098,7 @@
 	ASSERT3P(zilog->zl_dmu_pool, !=, NULL);
 	ASSERT3P(zilog->zl_dmu_pool->dp_zil_clean_taskq, !=, NULL);
 	taskqid_t id = taskq_dispatch(zilog->zl_dmu_pool->dp_zil_clean_taskq,
-	    (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP);
+	    zil_itxg_clean, clean_me, TQ_NOSLEEP);
 	if (id == TASKQID_INVALID)
 		zil_itxg_clean(clean_me);
 }
@@ -2095,7 +2153,7 @@
 /*
  * Move the async itxs for a specified object to commit into sync lists.
  */
-static void
+void
 zil_async_to_sync(zilog_t *zilog, uint64_t foid)
 {
 	uint64_t otxg, txg;
@@ -2235,8 +2293,9 @@
 	spa_t *spa = zilog->zl_spa;
 	list_t nolwb_itxs;
 	list_t nolwb_waiters;
-	lwb_t *lwb;
+	lwb_t *lwb, *plwb;
 	itx_t *itx;
+	boolean_t first = B_TRUE;
 
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 
@@ -2258,6 +2317,9 @@
 		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
 		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
 		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
+		first = (lwb->lwb_state != LWB_STATE_OPENED) &&
+		    ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) == NULL ||
+		    plwb->lwb_state == LWB_STATE_FLUSH_DONE);
 	}
 
 	while ((itx = list_head(&zilog->zl_itx_commit_list)) != NULL) {
@@ -2428,7 +2490,23 @@
 		 * try and pack as many itxs into as few lwbs as
 		 * possible, without significantly impacting the latency
 		 * of each individual itx.
+		 *
+		 * If we had no already running or open LWBs, it can be
+		 * the workload is single-threaded.  And if the ZIL write
+		 * latency is very small or if the LWB is almost full, it
+		 * may be cheaper to bypass the delay.
 		 */
+		if (lwb->lwb_state == LWB_STATE_OPENED && first) {
+			hrtime_t sleep = zilog->zl_last_lwb_latency *
+			    zfs_commit_timeout_pct / 100;
+			if (sleep < zil_min_commit_timeout ||
+			    lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
+				lwb = zil_lwb_write_issue(zilog, lwb);
+				zilog->zl_cur_used = 0;
+				if (lwb == NULL)
+					zil_commit_writer_stall(zilog);
+			}
+		}
 	}
 }
 
@@ -2689,11 +2767,11 @@
 			 * timeout is reached; responsibility (2) from
 			 * the comment above this function.
 			 */
-			clock_t timeleft = cv_timedwait_hires(&zcw->zcw_cv,
+			int rc = cv_timedwait_hires(&zcw->zcw_cv,
 			    &zcw->zcw_lock, wakeup, USEC2NSEC(1),
 			    CALLOUT_FLAG_ABSOLUTE);
 
-			if (timeleft >= 0 || zcw->zcw_done)
+			if (rc != -1 || zcw->zcw_done)
 				continue;
 
 			timedout = B_TRUE;
@@ -2771,7 +2849,14 @@
 zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
 {
 	dmu_tx_t *tx = dmu_tx_create(zilog->zl_os);
-	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+
+	/*
+	 * Since we are not going to create any new dirty data, and we
+	 * can even help with clearing the existing dirty data, we
+	 * should not be subject to the dirty data based delays. We
+	 * use TXG_NOTHROTTLE to bypass the delay mechanism.
+	 */
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
 
 	itx_t *itx = zil_itx_create(TX_COMMIT, sizeof (lr_t));
 	itx->itx_sync = B_TRUE;
@@ -3086,10 +3171,10 @@
 	mutex_exit(&zilog->zl_lock);
 }
 
-/* ARGSUSED */
 static int
 zil_lwb_cons(void *vbuf, void *unused, int kmflag)
 {
+	(void) unused, (void) kmflag;
 	lwb_t *lwb = vbuf;
 	list_create(&lwb->lwb_itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
 	list_create(&lwb->lwb_waiters, sizeof (zil_commit_waiter_t),
@@ -3100,10 +3185,10 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 zil_lwb_dest(void *vbuf, void *unused)
 {
+	(void) unused;
 	lwb_t *lwb = vbuf;
 	mutex_destroy(&lwb->lwb_vdev_lock);
 	avl_destroy(&lwb->lwb_vdev_tree);
@@ -3280,7 +3365,8 @@
 		txg_wait_synced(zilog->zl_dmu_pool, txg);
 
 	if (zilog_is_dirty(zilog))
-		zfs_dbgmsg("zil (%px) is dirty, txg %llu", zilog, txg);
+		zfs_dbgmsg("zil (%px) is dirty, txg %llu", zilog,
+		    (u_longlong_t)txg);
 	if (txg < spa_freeze_txg(zilog->zl_spa))
 		VERIFY(!zilog_is_dirty(zilog));
 
@@ -3473,7 +3559,7 @@
 } zil_replay_arg_t;
 
 static int
-zil_replay_error(zilog_t *zilog, lr_t *lr, int error)
+zil_replay_error(zilog_t *zilog, const lr_t *lr, int error)
 {
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 
@@ -3491,7 +3577,8 @@
 }
 
 static int
-zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
+zil_replay_log_record(zilog_t *zilog, const lr_t *lr, void *zra,
+    uint64_t claim_txg)
 {
 	zil_replay_arg_t *zr = zra;
 	const zil_header_t *zh = zilog->zl_header;
@@ -3572,10 +3659,11 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static int
-zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
+zil_incr_blks(zilog_t *zilog, const blkptr_t *bp, void *arg, uint64_t claim_txg)
 {
+	(void) bp, (void) arg, (void) claim_txg;
+
 	zilog->zl_replay_blks++;
 
 	return (0);
@@ -3634,13 +3722,12 @@
 	return (B_FALSE);
 }
 
-/* ARGSUSED */
 int
 zil_reset(const char *osname, void *arg)
 {
-	int error;
+	(void) arg;
 
-	error = zil_suspend(osname, NULL);
+	int error = zil_suspend(osname, NULL);
 	/* EACCES means crypto key not loaded */
 	if ((error == EACCES) || (error == EBUSY))
 		return (SET_ERROR(error));
@@ -3649,7 +3736,6 @@
 	return (0);
 }
 
-#if defined(_KERNEL)
 EXPORT_SYMBOL(zil_alloc);
 EXPORT_SYMBOL(zil_free);
 EXPORT_SYMBOL(zil_open);
@@ -3674,19 +3760,21 @@
 EXPORT_SYMBOL(zil_set_logbias);
 
 /* BEGIN CSTYLED */
-module_param(zfs_commit_timeout_pct, int, 0644);
-MODULE_PARM_DESC(zfs_commit_timeout_pct, "ZIL block open timeout percentage");
+ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, INT, ZMOD_RW,
+	"ZIL block open timeout percentage");
 
-module_param(zil_replay_disable, int, 0644);
-MODULE_PARM_DESC(zil_replay_disable, "Disable intent logging replay");
+ZFS_MODULE_PARAM(zfs_zil, zil_, min_commit_timeout, ULONG, ZMOD_RW,
+	"Minimum delay we care for ZIL block commit");
 
-module_param(zil_nocacheflush, int, 0644);
-MODULE_PARM_DESC(zil_nocacheflush, "Disable ZIL cache flushes");
+ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW,
+	"Disable intent logging replay");
 
-module_param(zil_slog_bulk, ulong, 0644);
-MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit");
+ZFS_MODULE_PARAM(zfs_zil, zil_, nocacheflush, INT, ZMOD_RW,
+	"Disable ZIL cache flushes");
 
-module_param(zil_maxblocksize, int, 0644);
-MODULE_PARM_DESC(zil_maxblocksize, "Limit in bytes of ZIL log block size");
+ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, ULONG, ZMOD_RW,
+	"Limit in bytes slog sync writes per commit");
+
+ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, INT, ZMOD_RW,
+	"Limit in bytes of ZIL log block size");
 /* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/zio.c b/zfs/module/zfs/zio.c
index 411a42e..c367ef7 100644
--- a/zfs/module/zfs/zio.c
+++ b/zfs/module/zfs/zio.c

@@ -20,9 +20,12 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2021, Datto, Inc.
  */
 
 #include <sys/sysmacros.h>
@@ -44,10 +47,10 @@
 #include <sys/dsl_scan.h>
 #include <sys/metaslab_impl.h>
 #include <sys/time.h>
-#include <sys/trace_zio.h>
+#include <sys/trace_zfs.h>
 #include <sys/abd.h>
 #include <sys/dsl_crypt.h>
-#include <sys/cityhash.h>
+#include <cityhash.h>
 
 /*
  * ==========================================================================
@@ -142,7 +145,6 @@
 zio_init(void)
 {
 	size_t c;
-	vmem_t *data_alloc_arena = NULL;
 
 	zio_cache = kmem_cache_create("zio_cache",
 	    sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
@@ -203,6 +205,19 @@
 
 		if (align != 0) {
 			char name[36];
+			if (cflags == data_cflags) {
+				/*
+				 * Resulting kmem caches would be identical.
+				 * Save memory by creating only one.
+				 */
+				(void) snprintf(name, sizeof (name),
+				    "zio_buf_comb_%lu", (ulong_t)size);
+				zio_buf_cache[c] = kmem_cache_create(name,
+				    size, align, NULL, NULL, NULL, NULL, NULL,
+				    cflags);
+				zio_data_buf_cache[c] = zio_buf_cache[c];
+				continue;
+			}
 			(void) snprintf(name, sizeof (name), "zio_buf_%lu",
 			    (ulong_t)size);
 			zio_buf_cache[c] = kmem_cache_create(name, size,
@@ -211,8 +226,7 @@
 			(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
 			    (ulong_t)size);
 			zio_data_buf_cache[c] = kmem_cache_create(name, size,
-			    align, NULL, NULL, NULL, NULL,
-			    data_alloc_arena, data_cflags);
+			    align, NULL, NULL, NULL, NULL, NULL, data_cflags);
 		}
 	}
 
@@ -234,37 +248,50 @@
 void
 zio_fini(void)
 {
-	size_t c;
-	kmem_cache_t *last_cache = NULL;
-	kmem_cache_t *last_data_cache = NULL;
+	size_t n = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT;
 
-	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
-#ifdef _ILP32
-		/*
-		 * Cache size limited to 1M on 32-bit platforms until ARC
-		 * buffers no longer require virtual address space.
-		 */
-		if (((c + 1) << SPA_MINBLOCKSHIFT) > zfs_max_recordsize)
-			break;
-#endif
 #if defined(ZFS_DEBUG) && !defined(_KERNEL)
-		if (zio_buf_cache_allocs[c] != zio_buf_cache_frees[c])
+	for (size_t i = 0; i < n; i++) {
+		if (zio_buf_cache_allocs[i] != zio_buf_cache_frees[i])
 			(void) printf("zio_fini: [%d] %llu != %llu\n",
-			    (int)((c + 1) << SPA_MINBLOCKSHIFT),
-			    (long long unsigned)zio_buf_cache_allocs[c],
-			    (long long unsigned)zio_buf_cache_frees[c]);
+			    (int)((i + 1) << SPA_MINBLOCKSHIFT),
+			    (long long unsigned)zio_buf_cache_allocs[i],
+			    (long long unsigned)zio_buf_cache_frees[i]);
+	}
 #endif
-		if (zio_buf_cache[c] != last_cache) {
-			last_cache = zio_buf_cache[c];
-			kmem_cache_destroy(zio_buf_cache[c]);
-		}
-		zio_buf_cache[c] = NULL;
 
-		if (zio_data_buf_cache[c] != last_data_cache) {
-			last_data_cache = zio_data_buf_cache[c];
-			kmem_cache_destroy(zio_data_buf_cache[c]);
+	/*
+	 * The same kmem cache can show up multiple times in both zio_buf_cache
+	 * and zio_data_buf_cache. Do a wasteful but trivially correct scan to
+	 * sort it out.
+	 */
+	for (size_t i = 0; i < n; i++) {
+		kmem_cache_t *cache = zio_buf_cache[i];
+		if (cache == NULL)
+			continue;
+		for (size_t j = i; j < n; j++) {
+			if (cache == zio_buf_cache[j])
+				zio_buf_cache[j] = NULL;
+			if (cache == zio_data_buf_cache[j])
+				zio_data_buf_cache[j] = NULL;
 		}
-		zio_data_buf_cache[c] = NULL;
+		kmem_cache_destroy(cache);
+	}
+
+	for (size_t i = 0; i < n; i++) {
+		kmem_cache_t *cache = zio_data_buf_cache[i];
+		if (cache == NULL)
+			continue;
+		for (size_t j = i; j < n; j++) {
+			if (cache == zio_data_buf_cache[j])
+				zio_data_buf_cache[j] = NULL;
+		}
+		kmem_cache_destroy(cache);
+	}
+
+	for (size_t i = 0; i < n; i++) {
+		VERIFY3P(zio_buf_cache[i], ==, NULL);
+		VERIFY3P(zio_data_buf_cache[i], ==, NULL);
 	}
 
 	kmem_cache_destroy(zio_link_cache);
@@ -342,6 +369,7 @@
 static void
 zio_abd_free(void *abd, size_t size)
 {
+	(void) size;
 	abd_free((abd_t *)abd);
 }
 
@@ -409,7 +437,8 @@
 	if (zio->io_error == 0) {
 		void *tmp = abd_borrow_buf(data, size);
 		int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
-		    zio->io_abd, tmp, zio->io_size, size);
+		    zio->io_abd, tmp, zio->io_size, size,
+		    &zio->io_prop.zp_complevel);
 		abd_return_buf_copy(data, tmp, size);
 
 		if (zio_injection_enabled && ret == 0)
@@ -459,7 +488,8 @@
 			 */
 			tmp = zio_buf_alloc(lsize);
 			ret = zio_decompress_data(BP_GET_COMPRESS(bp),
-			    zio->io_abd, tmp, zio->io_size, lsize);
+			    zio->io_abd, tmp, zio->io_size, lsize,
+			    &zio->io_prop.zp_complevel);
 			if (ret != 0) {
 				ret = SET_ERROR(EIO);
 				goto error;
@@ -542,8 +572,8 @@
 		zio->io_error = SET_ERROR(EIO);
 		if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
 			spa_log_error(spa, &zio->io_bookmark);
-			zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
-			    spa, NULL, &zio->io_bookmark, zio, 0, 0);
+			(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+			    spa, NULL, &zio->io_bookmark, zio, 0);
 		}
 	} else {
 		zio->io_error = ret;
@@ -848,8 +878,7 @@
 		zio->io_bookmark = *zb;
 
 	if (pio != NULL) {
-		if (zio->io_metaslab_class == NULL)
-			zio->io_metaslab_class = pio->io_metaslab_class;
+		zio->io_metaslab_class = pio->io_metaslab_class;
 		if (zio->io_logical == NULL)
 			zio->io_logical = pio->io_logical;
 		if (zio->io_child_type == ZIO_CHILD_GANG)
@@ -892,35 +921,83 @@
 	return (zio_null(NULL, spa, NULL, done, private, flags));
 }
 
-static void
-zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held)
+static int
+zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
+    enum blk_verify_flag blk_verify, const char *fmt, ...)
 {
+	va_list adx;
+	char buf[256];
+
+	va_start(adx, fmt);
+	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
+	va_end(adx);
+
+	switch (blk_verify) {
+	case BLK_VERIFY_HALT:
+		dprintf_bp(bp, "blkptr at %p dprintf_bp():", bp);
+		zfs_panic_recover("%s: %s", spa_name(spa), buf);
+		break;
+	case BLK_VERIFY_LOG:
+		zfs_dbgmsg("%s: %s", spa_name(spa), buf);
+		break;
+	case BLK_VERIFY_ONLY:
+		break;
+	}
+
+	return (1);
+}
+
+/*
+ * Verify the block pointer fields contain reasonable values.  This means
+ * it only contains known object types, checksum/compression identifiers,
+ * block sizes within the maximum allowed limits, valid DVAs, etc.
+ *
+ * If everything checks out B_TRUE is returned.  The zfs_blkptr_verify
+ * argument controls the behavior when an invalid field is detected.
+ *
+ * Modes for zfs_blkptr_verify:
+ *   1) BLK_VERIFY_ONLY (evaluate the block)
+ *   2) BLK_VERIFY_LOG (evaluate the block and log problems)
+ *   3) BLK_VERIFY_HALT (call zfs_panic_recover on error)
+ */
+boolean_t
+zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held,
+    enum blk_verify_flag blk_verify)
+{
+	int errors = 0;
+
 	if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) {
-		zfs_panic_recover("blkptr at %p has invalid TYPE %llu",
+		errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+		    "blkptr at %p has invalid TYPE %llu",
 		    bp, (longlong_t)BP_GET_TYPE(bp));
 	}
 	if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS ||
 	    BP_GET_CHECKSUM(bp) <= ZIO_CHECKSUM_ON) {
-		zfs_panic_recover("blkptr at %p has invalid CHECKSUM %llu",
+		errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+		    "blkptr at %p has invalid CHECKSUM %llu",
 		    bp, (longlong_t)BP_GET_CHECKSUM(bp));
 	}
 	if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS ||
 	    BP_GET_COMPRESS(bp) <= ZIO_COMPRESS_ON) {
-		zfs_panic_recover("blkptr at %p has invalid COMPRESS %llu",
+		errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+		    "blkptr at %p has invalid COMPRESS %llu",
 		    bp, (longlong_t)BP_GET_COMPRESS(bp));
 	}
 	if (BP_GET_LSIZE(bp) > SPA_MAXBLOCKSIZE) {
-		zfs_panic_recover("blkptr at %p has invalid LSIZE %llu",
+		errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+		    "blkptr at %p has invalid LSIZE %llu",
 		    bp, (longlong_t)BP_GET_LSIZE(bp));
 	}
 	if (BP_GET_PSIZE(bp) > SPA_MAXBLOCKSIZE) {
-		zfs_panic_recover("blkptr at %p has invalid PSIZE %llu",
+		errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+		    "blkptr at %p has invalid PSIZE %llu",
 		    bp, (longlong_t)BP_GET_PSIZE(bp));
 	}
 
 	if (BP_IS_EMBEDDED(bp)) {
 		if (BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES) {
-			zfs_panic_recover("blkptr at %p has invalid ETYPE %llu",
+			errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+			    "blkptr at %p has invalid ETYPE %llu",
 			    bp, (longlong_t)BPE_GET_ETYPE(bp));
 		}
 	}
@@ -930,7 +1007,7 @@
 	 * will be done once the zio is executed in vdev_mirror_map_alloc.
 	 */
 	if (!spa->spa_trust_config)
-		return;
+		return (errors == 0);
 
 	if (!config_held)
 		spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
@@ -945,24 +1022,25 @@
 	 * that are in the log) to be arbitrarily large.
 	 */
 	for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
-		uint64_t vdevid = DVA_GET_VDEV(&bp->blk_dva[i]);
+		const dva_t *dva = &bp->blk_dva[i];
+		uint64_t vdevid = DVA_GET_VDEV(dva);
 
 		if (vdevid >= spa->spa_root_vdev->vdev_children) {
-			zfs_panic_recover("blkptr at %p DVA %u has invalid "
-			    "VDEV %llu",
+			errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+			    "blkptr at %p DVA %u has invalid VDEV %llu",
 			    bp, i, (longlong_t)vdevid);
 			continue;
 		}
 		vdev_t *vd = spa->spa_root_vdev->vdev_child[vdevid];
 		if (vd == NULL) {
-			zfs_panic_recover("blkptr at %p DVA %u has invalid "
-			    "VDEV %llu",
+			errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+			    "blkptr at %p DVA %u has invalid VDEV %llu",
 			    bp, i, (longlong_t)vdevid);
 			continue;
 		}
 		if (vd->vdev_ops == &vdev_hole_ops) {
-			zfs_panic_recover("blkptr at %p DVA %u has hole "
-			    "VDEV %llu",
+			errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+			    "blkptr at %p DVA %u has hole VDEV %llu",
 			    bp, i, (longlong_t)vdevid);
 			continue;
 		}
@@ -974,23 +1052,28 @@
 			 */
 			continue;
 		}
-		uint64_t offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
-		uint64_t asize = DVA_GET_ASIZE(&bp->blk_dva[i]);
-		if (BP_IS_GANG(bp))
-			asize = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+		uint64_t offset = DVA_GET_OFFSET(dva);
+		uint64_t asize = DVA_GET_ASIZE(dva);
+		if (DVA_GET_GANG(dva))
+			asize = vdev_gang_header_asize(vd);
 		if (offset + asize > vd->vdev_asize) {
-			zfs_panic_recover("blkptr at %p DVA %u has invalid "
-			    "OFFSET %llu",
+			errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
+			    "blkptr at %p DVA %u has invalid OFFSET %llu",
 			    bp, i, (longlong_t)offset);
 		}
 	}
+	if (errors > 0)
+		dprintf_bp(bp, "blkptr at %p dprintf_bp():", bp);
 	if (!config_held)
 		spa_config_exit(spa, SCL_VDEV, bp);
+
+	return (errors == 0);
 }
 
 boolean_t
 zfs_dva_valid(spa_t *spa, const dva_t *dva, const blkptr_t *bp)
 {
+	(void) bp;
 	uint64_t vdevid = DVA_GET_VDEV(dva);
 
 	if (vdevid >= spa->spa_root_vdev->vdev_children)
@@ -1010,8 +1093,8 @@
 	uint64_t offset = DVA_GET_OFFSET(dva);
 	uint64_t asize = DVA_GET_ASIZE(dva);
 
-	if (BP_IS_GANG(bp))
-		asize = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
+	if (DVA_GET_GANG(dva))
+		asize = vdev_gang_header_asize(vd);
 	if (offset + asize > vd->vdev_asize)
 		return (B_FALSE);
 
@@ -1025,8 +1108,6 @@
 {
 	zio_t *zio;
 
-	zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER);
-
 	zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
 	    data, size, size, done, private,
 	    ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
@@ -1118,7 +1199,7 @@
 zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
 {
 
-	zfs_blkptr_verify(spa, bp, B_FALSE);
+	(void) zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_HALT);
 
 	/*
 	 * The check for EMBEDDED is a performance optimization.  We
@@ -1134,47 +1215,58 @@
 	 * deferred, and which will not need to do a read (i.e. not GANG or
 	 * DEDUP), can be processed immediately.  Otherwise, put them on the
 	 * in-memory list for later processing.
+	 *
+	 * Note that we only defer frees after zfs_sync_pass_deferred_free
+	 * when the log space map feature is disabled. [see relevant comment
+	 * in spa_sync_iterate_to_convergence()]
 	 */
-	if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp) ||
+	if (BP_IS_GANG(bp) ||
+	    BP_GET_DEDUP(bp) ||
 	    txg != spa->spa_syncing_txg ||
-	    spa_sync_pass(spa) >= zfs_sync_pass_deferred_free) {
+	    (spa_sync_pass(spa) >= zfs_sync_pass_deferred_free &&
+	    !spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))) {
 		bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
 	} else {
-		VERIFY0(zio_wait(zio_free_sync(NULL, spa, txg, bp, 0)));
+		VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL);
 	}
 }
 
+/*
+ * To improve performance, this function may return NULL if we were able
+ * to do the free immediately.  This avoids the cost of creating a zio
+ * (and linking it to the parent, etc).
+ */
 zio_t *
 zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
     enum zio_flag flags)
 {
-	zio_t *zio;
-	enum zio_stage stage = ZIO_FREE_PIPELINE;
-
 	ASSERT(!BP_IS_HOLE(bp));
 	ASSERT(spa_syncing_txg(spa) == txg);
-	ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
 
 	if (BP_IS_EMBEDDED(bp))
-		return (zio_null(pio, spa, NULL, NULL, NULL, 0));
+		return (NULL);
 
 	metaslab_check_free(spa, bp);
 	arc_freed(spa, bp);
 	dsl_scan_freed(spa, bp);
 
-	/*
-	 * GANG and DEDUP blocks can induce a read (for the gang block header,
-	 * or the DDT), so issue them asynchronously so that this thread is
-	 * not tied up.
-	 */
-	if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp))
-		stage |= ZIO_STAGE_ISSUE_ASYNC;
+	if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp)) {
+		/*
+		 * GANG and DEDUP blocks can induce a read (for the gang block
+		 * header, or the DDT), so issue them asynchronously so that
+		 * this thread is not tied up.
+		 */
+		enum zio_stage stage =
+		    ZIO_FREE_PIPELINE | ZIO_STAGE_ISSUE_ASYNC;
 
-	zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
-	    BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW,
-	    flags, NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
-
-	return (zio);
+		return (zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
+		    BP_GET_PSIZE(bp), NULL, NULL,
+		    ZIO_TYPE_FREE, ZIO_PRIORITY_NOW,
+		    flags, NULL, 0, NULL, ZIO_STAGE_OPEN, stage));
+	} else {
+		metaslab_free(spa, bp, txg, B_FALSE);
+		return (NULL);
+	}
 }
 
 zio_t *
@@ -1183,7 +1275,8 @@
 {
 	zio_t *zio;
 
-	zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER);
+	(void) zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER,
+	    BLK_VERIFY_HALT);
 
 	if (BP_IS_EMBEDDED(bp))
 		return (zio_null(pio, spa, NULL, NULL, NULL, 0));
@@ -1203,7 +1296,7 @@
 	ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <,
 	    spa_min_claim_txg(spa));
 	ASSERT(txg == spa_min_claim_txg(spa) || txg == 0);
-	ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa));	/* zdb(1M) */
+	ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa));	/* zdb(8) */
 
 	zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
 	    BP_GET_PSIZE(bp), done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW,
@@ -1613,8 +1706,9 @@
 	if (compress != ZIO_COMPRESS_OFF &&
 	    !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
 		void *cbuf = zio_buf_alloc(lsize);
-		psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
-		if (psize == 0 || psize == lsize) {
+		psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize,
+		    zp->zp_complevel);
+		if (psize == 0 || psize >= lsize) {
 			compress = ZIO_COMPRESS_OFF;
 			zio_buf_free(cbuf, lsize);
 		} else if (!zp->zp_dedup && !zp->zp_encrypt &&
@@ -1634,16 +1728,16 @@
 			return (zio);
 		} else {
 			/*
-			 * Round up compressed size up to the ashift
-			 * of the smallest-ashift device, and zero the tail.
-			 * This ensures that the compressed size of the BP
-			 * (and thus compressratio property) are correct,
+			 * Round compressed size up to the minimum allocation
+			 * size of the smallest-ashift device, and zero the
+			 * tail. This ensures that the compressed size of the
+			 * BP (and thus compressratio property) are correct,
 			 * in that we charge for the padding used to fill out
 			 * the last sector.
 			 */
-			ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT);
-			size_t rounded = (size_t)P2ROUNDUP(psize,
-			    1ULL << spa->spa_min_ashift);
+			ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
+			size_t rounded = (size_t)roundup(psize,
+			    spa->spa_min_alloc);
 			if (rounded >= lsize) {
 				compress = ZIO_COMPRESS_OFF;
 				zio_buf_free(cbuf, lsize);
@@ -1676,9 +1770,27 @@
 		 * to a hole.
 		 */
 		psize = zio_compress_data(ZIO_COMPRESS_EMPTY,
-		    zio->io_abd, NULL, lsize);
-		if (psize == 0)
+		    zio->io_abd, NULL, lsize, zp->zp_complevel);
+		if (psize == 0 || psize >= lsize)
 			compress = ZIO_COMPRESS_OFF;
+	} else if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS &&
+	    !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) {
+		/*
+		 * If we are raw receiving an encrypted dataset we should not
+		 * take this codepath because it will change the on-disk block
+		 * and decryption will fail.
+		 */
+		size_t rounded = MIN((size_t)roundup(psize,
+		    spa->spa_min_alloc), lsize);
+
+		if (rounded != psize) {
+			abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);
+			abd_zero_off(cdata, psize, rounded - psize);
+			abd_copy_off(cdata, zio->io_abd, 0, 0, psize);
+			psize = rounded;
+			zio_push_transform(zio, cdata,
+			    psize, rounded, NULL);
+		}
 	} else {
 		ASSERT3U(psize, !=, 0);
 	}
@@ -1798,21 +1910,22 @@
 	 * to dispatch the zio to another taskq at the same time.
 	 */
 	ASSERT(taskq_empty_ent(&zio->io_tqent));
-	spa_taskq_dispatch_ent(spa, t, q, (task_func_t *)zio_execute, zio,
-	    flags, &zio->io_tqent);
+	spa_taskq_dispatch_ent(spa, t, q, zio_execute, zio, flags,
+	    &zio->io_tqent);
 }
 
 static boolean_t
 zio_taskq_member(zio_t *zio, zio_taskq_type_t q)
 {
-	kthread_t *executor = zio->io_executor;
 	spa_t *spa = zio->io_spa;
 
+	taskq_t *tq = taskq_of_curthread();
+
 	for (zio_type_t t = 0; t < ZIO_TYPES; t++) {
 		spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 		uint_t i;
 		for (i = 0; i < tqs->stqs_count; i++) {
-			if (taskq_member(tqs->stqs_taskq[i], executor))
+			if (tqs->stqs_taskq[i] == tq)
 				return (B_TRUE);
 		}
 	}
@@ -1829,7 +1942,7 @@
 }
 
 void
-zio_interrupt(zio_t *zio)
+zio_interrupt(void *zio)
 {
 	zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE);
 }
@@ -1887,8 +2000,8 @@
 				 * OpenZFS's timeout_generic().
 				 */
 				tid = taskq_dispatch_delay(system_taskq,
-				    (task_func_t *)zio_interrupt,
-				    zio, TQ_NOSLEEP, expire_at_tick);
+				    zio_interrupt, zio, TQ_NOSLEEP,
+				    expire_at_tick);
 				if (tid == TASKQID_INVALID) {
 					/*
 					 * Couldn't allocate a task.  Just
@@ -1920,20 +2033,26 @@
 
 		zfs_dbgmsg("slow zio[%d]: zio=%px timestamp=%llu "
 		    "delta=%llu queued=%llu io=%llu "
-		    "path=%s last=%llu "
-		    "type=%d priority=%d flags=0x%x "
-		    "stage=0x%x pipeline=0x%x pipeline-trace=0x%x "
-		    "objset=%llu object=%llu level=%llu blkid=%llu "
-		    "offset=%llu size=%llu error=%d",
+		    "path=%s "
+		    "last=%llu type=%d "
+		    "priority=%d flags=0x%x stage=0x%x "
+		    "pipeline=0x%x pipeline-trace=0x%x "
+		    "objset=%llu object=%llu "
+		    "level=%llu blkid=%llu "
+		    "offset=%llu size=%llu "
+		    "error=%d",
 		    ziodepth, pio, pio->io_timestamp,
-		    delta, pio->io_delta, pio->io_delay,
-		    vd ? vd->vdev_path : "NULL", vq ? vq->vq_io_complete_ts : 0,
-		    pio->io_type, pio->io_priority, pio->io_flags,
-		    pio->io_stage, pio->io_pipeline, pio->io_pipeline_trace,
-		    zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
-		    pio->io_offset, pio->io_size, pio->io_error);
-		zfs_ereport_post(FM_EREPORT_ZFS_DEADMAN,
-		    pio->io_spa, vd, zb, pio, 0, 0);
+		    (u_longlong_t)delta, pio->io_delta, pio->io_delay,
+		    vd ? vd->vdev_path : "NULL",
+		    vq ? vq->vq_io_complete_ts : 0, pio->io_type,
+		    pio->io_priority, pio->io_flags, pio->io_stage,
+		    pio->io_pipeline, pio->io_pipeline_trace,
+		    (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object,
+		    (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid,
+		    (u_longlong_t)pio->io_offset, (u_longlong_t)pio->io_size,
+		    pio->io_error);
+		(void) zfs_ereport_post(FM_EREPORT_ZFS_DEADMAN,
+		    pio->io_spa, vd, zb, pio, 0);
 
 		if (failmode == ZIO_FAILURE_MODE_CONTINUE &&
 		    taskq_empty_ent(&pio->io_tqent)) {
@@ -2003,7 +2122,7 @@
  * it is externally visible.
  */
 void
-zio_execute(zio_t *zio)
+zio_execute(void *zio)
 {
 	fstrans_cookie_t cookie;
 
@@ -2017,7 +2136,7 @@
  * enough to allow zio_execute() to be called recursively.  A minimum
  * stack size of 16K is required to avoid needing to re-dispatch the zio.
  */
-boolean_t
+static boolean_t
 zio_execute_stack_check(zio_t *zio)
 {
 #if !defined(HAVE_LARGE_STACKS)
@@ -2032,6 +2151,8 @@
 	    !zio_taskq_member(zio, ZIO_TASKQ_ISSUE) &&
 	    !zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))
 		return (B_TRUE);
+#else
+	(void) zio;
 #endif /* HAVE_LARGE_STACKS */
 
 	return (B_FALSE);
@@ -2111,6 +2232,15 @@
 int
 zio_wait(zio_t *zio)
 {
+	/*
+	 * Some routines, like zio_free_sync(), may return a NULL zio
+	 * to avoid the performance overhead of creating and then destroying
+	 * an unneeded zio.  For the callers' simplicity, we accept a NULL
+	 * zio and ignore it.
+	 */
+	if (zio == NULL)
+		return (0);
+
 	long timeout = MSEC_TO_TICK(zfs_deadman_ziotime_ms);
 	int error;
 
@@ -2148,10 +2278,16 @@
 void
 zio_nowait(zio_t *zio)
 {
+	/*
+	 * See comment in zio_wait().
+	 */
+	if (zio == NULL)
+		return;
+
 	ASSERT3P(zio->io_executor, ==, NULL);
 
 	if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
-	    zio_unique_parent(zio) == NULL) {
+	    list_is_empty(&zio->io_parent_list)) {
 		zio_t *pio;
 
 		/*
@@ -2160,9 +2296,7 @@
 		 * will ensure they complete prior to unloading the pool.
 		 */
 		spa_t *spa = zio->io_spa;
-		kpreempt_disable();
-		pio = spa->spa_async_zio_root[CPU_SEQID];
-		kpreempt_enable();
+		pio = spa->spa_async_zio_root[CPU_SEQID_UNSTABLE];
 
 		zio_add_child(pio, zio);
 	}
@@ -2179,8 +2313,9 @@
  */
 
 static void
-zio_reexecute(zio_t *pio)
+zio_reexecute(void *arg)
 {
+	zio_t *pio = arg;
 	zio_t *cio, *cio_next;
 
 	ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL);
@@ -2244,8 +2379,8 @@
 	cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O "
 	    "failure and has been suspended.\n", spa_name(spa));
 
-	zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
-	    NULL, NULL, 0, 0);
+	(void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
+	    NULL, NULL, 0);
 
 	mutex_enter(&spa->spa_suspend_lock);
 
@@ -2368,7 +2503,7 @@
 static void
 zio_gang_issue_func_done(zio_t *zio)
 {
-	abd_put(zio->io_abd);
+	abd_free(zio->io_abd);
 }
 
 static zio_t *
@@ -2412,7 +2547,7 @@
 			zio_checksum_compute(zio, BP_GET_CHECKSUM(bp),
 			    buf, BP_GET_PSIZE(bp));
 
-			abd_put(buf);
+			abd_free(buf);
 		}
 		/*
 		 * If we are here to damage data for testing purposes,
@@ -2430,20 +2565,26 @@
 	return (zio);
 }
 
-/* ARGSUSED */
 static zio_t *
 zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
     uint64_t offset)
 {
-	return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp,
-	    ZIO_GANG_CHILD_FLAGS(pio)));
+	(void) gn, (void) data, (void) offset;
+
+	zio_t *zio = zio_free_sync(pio, pio->io_spa, pio->io_txg, bp,
+	    ZIO_GANG_CHILD_FLAGS(pio));
+	if (zio == NULL) {
+		zio = zio_null(pio, pio->io_spa,
+		    NULL, NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio));
+	}
+	return (zio);
 }
 
-/* ARGSUSED */
 static zio_t *
 zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
     uint64_t offset)
 {
+	(void) gn, (void) data, (void) offset;
 	return (zio_claim(pio, pio->io_spa, pio->io_txg, bp,
 	    NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio)));
 }
@@ -2535,7 +2676,7 @@
 	ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
 	ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
 
-	abd_put(zio->io_abd);
+	abd_free(zio->io_abd);
 
 	for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
 		blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
@@ -2627,7 +2768,7 @@
 	dva_t *cdva = zio->io_bp->blk_dva;
 	dva_t *pdva = pio->io_bp->blk_dva;
 	uint64_t asize;
-	ASSERTV(zio_t *gio = zio->io_gang_leader);
+	zio_t *gio __maybe_unused = zio->io_gang_leader;
 
 	if (BP_IS_HOLE(zio->io_bp))
 		return;
@@ -2659,14 +2800,13 @@
 	 * check for it here as it is cleared in zio_ready.
 	 */
 	if (zio->io_abd != NULL)
-		abd_put(zio->io_abd);
+		abd_free(zio->io_abd);
 }
 
 static zio_t *
-zio_write_gang_block(zio_t *pio)
+zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 {
 	spa_t *spa = pio->io_spa;
-	metaslab_class_t *mc = spa_normal_class(spa);
 	blkptr_t *bp = pio->io_bp;
 	zio_t *gio = pio->io_gang_leader;
 	zio_t *zio;
@@ -2696,8 +2836,8 @@
 		ASSERT(has_data);
 
 		flags |= METASLAB_ASYNC_ALLOC;
-		VERIFY(zfs_refcount_held(&mc->mc_alloc_slots[pio->io_allocator],
-		    pio));
+		VERIFY(zfs_refcount_held(&mc->mc_allocator[pio->io_allocator].
+		    mca_alloc_slots, pio));
 
 		/*
 		 * The logical zio has already placed a reservation for
@@ -2763,6 +2903,7 @@
 
 		zp.zp_checksum = gio->io_prop.zp_checksum;
 		zp.zp_compress = ZIO_COMPRESS_OFF;
+		zp.zp_complevel = gio->io_prop.zp_complevel;
 		zp.zp_type = DMU_OT_NONE;
 		zp.zp_level = 0;
 		zp.zp_copies = gio->io_prop.zp_copies;
@@ -3149,35 +3290,6 @@
 	ddt_exit(ddt);
 }
 
-static void
-zio_ddt_ditto_write_done(zio_t *zio)
-{
-	int p = DDT_PHYS_DITTO;
-	ASSERTV(zio_prop_t *zp = &zio->io_prop);
-	blkptr_t *bp = zio->io_bp;
-	ddt_t *ddt = ddt_select(zio->io_spa, bp);
-	ddt_entry_t *dde = zio->io_private;
-	ddt_phys_t *ddp = &dde->dde_phys[p];
-	ddt_key_t *ddk = &dde->dde_key;
-
-	ddt_enter(ddt);
-
-	ASSERT(ddp->ddp_refcnt == 0);
-	ASSERT(dde->dde_lead_zio[p] == zio);
-	dde->dde_lead_zio[p] = NULL;
-
-	if (zio->io_error == 0) {
-		ASSERT(ZIO_CHECKSUM_EQUAL(bp->blk_cksum, ddk->ddk_cksum));
-		ASSERT(zp->zp_copies < SPA_DVAS_PER_BP);
-		ASSERT(zp->zp_copies == BP_GET_NDVAS(bp) - BP_IS_GANG(bp));
-		if (ddp->ddp_phys_birth != 0)
-			ddt_phys_free(ddt, ddk, ddp, zio->io_txg);
-		ddt_phys_fill(ddp, bp);
-	}
-
-	ddt_exit(ddt);
-}
-
 static zio_t *
 zio_ddt_write(zio_t *zio)
 {
@@ -3186,9 +3298,7 @@
 	uint64_t txg = zio->io_txg;
 	zio_prop_t *zp = &zio->io_prop;
 	int p = zp->zp_copies;
-	int ditto_copies;
 	zio_t *cio = NULL;
-	zio_t *dio = NULL;
 	ddt_t *ddt = ddt_select(spa, bp);
 	ddt_entry_t *dde;
 	ddt_phys_t *ddp;
@@ -3225,41 +3335,6 @@
 		return (zio);
 	}
 
-	ditto_copies = ddt_ditto_copies_needed(ddt, dde, ddp);
-	ASSERT(ditto_copies < SPA_DVAS_PER_BP);
-
-	if (ditto_copies > ddt_ditto_copies_present(dde) &&
-	    dde->dde_lead_zio[DDT_PHYS_DITTO] == NULL) {
-		zio_prop_t czp = *zp;
-
-		czp.zp_copies = ditto_copies;
-
-		/*
-		 * If we arrived here with an override bp, we won't have run
-		 * the transform stack, so we won't have the data we need to
-		 * generate a child i/o.  So, toss the override bp and restart.
-		 * This is safe, because using the override bp is just an
-		 * optimization; and it's rare, so the cost doesn't matter.
-		 */
-		if (zio->io_bp_override) {
-			zio_pop_transforms(zio);
-			zio->io_stage = ZIO_STAGE_OPEN;
-			zio->io_pipeline = ZIO_WRITE_PIPELINE;
-			zio->io_bp_override = NULL;
-			BP_ZERO(bp);
-			ddt_exit(ddt);
-			return (zio);
-		}
-
-		dio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
-		    zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL,
-		    NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
-		    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
-
-		zio_push_transform(dio, zio->io_abd, zio->io_size, 0, NULL);
-		dde->dde_lead_zio[DDT_PHYS_DITTO] = dio;
-	}
-
 	if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) {
 		if (ddp->ddp_phys_birth != 0)
 			ddt_bp_fill(ddp, bp, txg);
@@ -3285,10 +3360,7 @@
 
 	ddt_exit(ddt);
 
-	if (cio)
-		zio_nowait(cio);
-	if (dio)
-		zio_nowait(dio);
+	zio_nowait(cio);
 
 	return (zio);
 }
@@ -3330,9 +3402,9 @@
 {
 	zio_t *zio;
 
-	ASSERT(MUTEX_HELD(&spa->spa_alloc_locks[allocator]));
+	ASSERT(MUTEX_HELD(&spa->spa_allocs[allocator].spaa_lock));
 
-	zio = avl_first(&spa->spa_alloc_trees[allocator]);
+	zio = avl_first(&spa->spa_allocs[allocator].spaa_tree);
 	if (zio == NULL)
 		return (NULL);
 
@@ -3344,11 +3416,11 @@
 	 */
 	ASSERT3U(zio->io_allocator, ==, allocator);
 	if (!metaslab_class_throttle_reserve(zio->io_metaslab_class,
-	    zio->io_prop.zp_copies, zio->io_allocator, zio, 0)) {
+	    zio->io_prop.zp_copies, allocator, zio, 0)) {
 		return (NULL);
 	}
 
-	avl_remove(&spa->spa_alloc_trees[allocator], zio);
+	avl_remove(&spa->spa_allocs[allocator].spaa_tree, zio);
 	ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
 
 	return (zio);
@@ -3372,8 +3444,8 @@
 		return (zio);
 	}
 
+	ASSERT(zio->io_type == ZIO_TYPE_WRITE);
 	ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
-
 	ASSERT3U(zio->io_queued_timestamp, >, 0);
 	ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
 
@@ -3385,14 +3457,14 @@
 	 * into 2^20 block regions, and then hash based on the objset, object,
 	 * level, and region to accomplish both of these goals.
 	 */
-	zio->io_allocator = cityhash4(bm->zb_objset, bm->zb_object,
+	int allocator = (uint_t)cityhash4(bm->zb_objset, bm->zb_object,
 	    bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count;
-	mutex_enter(&spa->spa_alloc_locks[zio->io_allocator]);
-	ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+	zio->io_allocator = allocator;
 	zio->io_metaslab_class = mc;
-	avl_add(&spa->spa_alloc_trees[zio->io_allocator], zio);
-	nio = zio_io_to_allocate(spa, zio->io_allocator);
-	mutex_exit(&spa->spa_alloc_locks[zio->io_allocator]);
+	mutex_enter(&spa->spa_allocs[allocator].spaa_lock);
+	avl_add(&spa->spa_allocs[allocator].spaa_tree, zio);
+	nio = zio_io_to_allocate(spa, allocator);
+	mutex_exit(&spa->spa_allocs[allocator].spaa_lock);
 	return (nio);
 }
 
@@ -3401,9 +3473,9 @@
 {
 	zio_t *zio;
 
-	mutex_enter(&spa->spa_alloc_locks[allocator]);
+	mutex_enter(&spa->spa_allocs[allocator].spaa_lock);
 	zio = zio_io_to_allocate(spa, allocator);
-	mutex_exit(&spa->spa_alloc_locks[allocator]);
+	mutex_exit(&spa->spa_allocs[allocator].spaa_lock);
 	if (zio == NULL)
 		return;
 
@@ -3451,6 +3523,17 @@
 		zio->io_metaslab_class = mc;
 	}
 
+	/*
+	 * Try allocating the block in the usual metaslab class.
+	 * If that's full, allocate it in the normal class.
+	 * If that's full, allocate as a gang block,
+	 * and if all are full, the allocation fails (which shouldn't happen).
+	 *
+	 * Note that we do not fall back on embedded slog (ZIL) space, to
+	 * preserve unfragmented slog space, which is critical for decent
+	 * sync write performance.  If a log allocation fails, we will fall
+	 * back to spa_sync() which is abysmal for performance.
+	 */
 	error = metaslab_alloc(spa, mc, zio->io_size, bp,
 	    zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
 	    &zio->io_alloc_list, zio, zio->io_allocator);
@@ -3470,26 +3553,41 @@
 			    zio->io_prop.zp_copies, zio->io_allocator, zio);
 			zio->io_flags &= ~ZIO_FLAG_IO_ALLOCATING;
 
-			mc = spa_normal_class(spa);
-			VERIFY(metaslab_class_throttle_reserve(mc,
+			VERIFY(metaslab_class_throttle_reserve(
+			    spa_normal_class(spa),
 			    zio->io_prop.zp_copies, zio->io_allocator, zio,
 			    flags | METASLAB_MUST_RESERVE));
-		} else {
-			mc = spa_normal_class(spa);
 		}
-		zio->io_metaslab_class = mc;
+		zio->io_metaslab_class = mc = spa_normal_class(spa);
+		if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
+			zfs_dbgmsg("%s: metaslab allocation failure, "
+			    "trying normal class: zio %px, size %llu, error %d",
+			    spa_name(spa), zio, (u_longlong_t)zio->io_size,
+			    error);
+		}
 
 		error = metaslab_alloc(spa, mc, zio->io_size, bp,
 		    zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
 		    &zio->io_alloc_list, zio, zio->io_allocator);
 	}
 
+	if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) {
+		if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
+			zfs_dbgmsg("%s: metaslab allocation failure, "
+			    "trying ganging: zio %px, size %llu, error %d",
+			    spa_name(spa), zio, (u_longlong_t)zio->io_size,
+			    error);
+		}
+		return (zio_write_gang_block(zio, mc));
+	}
 	if (error != 0) {
-		zfs_dbgmsg("%s: metaslab allocation failure: zio %px, "
-		    "size %llu, error %d", spa_name(spa), zio, zio->io_size,
-		    error);
-		if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
-			return (zio_write_gang_block(zio));
+		if (error != ENOSPC ||
+		    (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC)) {
+			zfs_dbgmsg("%s: metaslab allocation failure: zio %px, "
+			    "size %llu, error %d",
+			    spa_name(spa), zio, (u_longlong_t)zio->io_size,
+			    error);
+		}
 		zio->io_error = error;
 	}
 
@@ -3566,19 +3664,21 @@
 	 * of, so we just hash the objset ID to pick the allocator to get
 	 * some parallelism.
 	 */
+	int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
+	int allocator = (uint_t)cityhash4(0, 0, 0,
+	    os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
 	error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
-	    txg, NULL, METASLAB_FASTWRITE, &io_alloc_list, NULL,
-	    cityhash4(0, 0, 0, os->os_dsl_dataset->ds_object) %
-	    spa->spa_alloc_count);
-	if (error == 0) {
-		*slog = TRUE;
-	} else {
+	    txg, NULL, flags, &io_alloc_list, NULL, allocator);
+	*slog = (error == 0);
+	if (error != 0) {
+		error = metaslab_alloc(spa, spa_embedded_log_class(spa), size,
+		    new_bp, 1, txg, NULL, flags,
+		    &io_alloc_list, NULL, allocator);
+	}
+	if (error != 0) {
 		error = metaslab_alloc(spa, spa_normal_class(spa), size,
-		    new_bp, 1, txg, NULL, METASLAB_FASTWRITE,
-		    &io_alloc_list, NULL, cityhash4(0, 0, 0,
-		    os->os_dsl_dataset->ds_object) % spa->spa_alloc_count);
-		if (error == 0)
-			*slog = FALSE;
+		    new_bp, 1, txg, NULL, flags,
+		    &io_alloc_list, NULL, allocator);
 	}
 	metaslab_trace_fini(&io_alloc_list);
 
@@ -3612,7 +3712,8 @@
 		}
 	} else {
 		zfs_dbgmsg("%s: zil block allocation failure: "
-		    "size %llu, error %d", spa_name(spa), size, error);
+		    "size %llu, error %d", spa_name(spa), (u_longlong_t)size,
+		    error);
 	}
 
 	return (error);
@@ -3735,19 +3836,37 @@
 	 * However, indirect vdevs point off to other vdevs which may have
 	 * DTL's, so we never bypass them.  The child i/os on concrete vdevs
 	 * will be properly bypassed instead.
+	 *
+	 * Leaf DTL_PARTIAL can be empty when a legitimate write comes from
+	 * a dRAID spare vdev. For example, when a dRAID spare is first
+	 * used, its spare blocks need to be written to but the leaf vdev's
+	 * of such blocks can have empty DTL_PARTIAL.
+	 *
+	 * There seemed no clean way to allow such writes while bypassing
+	 * spurious ones. At this point, just avoid all bypassing for dRAID
+	 * for correctness.
 	 */
 	if ((zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
 	    !(zio->io_flags & ZIO_FLAG_SELF_HEAL) &&
 	    zio->io_txg != 0 &&	/* not a delegated i/o */
 	    vd->vdev_ops != &vdev_indirect_ops &&
+	    vd->vdev_top->vdev_ops != &vdev_draid_ops &&
 	    !vdev_dtl_contains(vd, DTL_PARTIAL, zio->io_txg, 1)) {
 		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
 		zio_vdev_io_bypass(zio);
 		return (zio);
 	}
 
-	if (vd->vdev_ops->vdev_op_leaf && (zio->io_type == ZIO_TYPE_READ ||
-	    zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_TRIM)) {
+	/*
+	 * Select the next best leaf I/O to process.  Distributed spares are
+	 * excluded since they dispatch the I/O directly to a leaf vdev after
+	 * applying the dRAID mapping.
+	 */
+	if (vd->vdev_ops->vdev_op_leaf &&
+	    vd->vdev_ops != &vdev_draid_spare_ops &&
+	    (zio->io_type == ZIO_TYPE_READ ||
+	    zio->io_type == ZIO_TYPE_WRITE ||
+	    zio->io_type == ZIO_TYPE_TRIM)) {
 
 		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
 			return (zio);
@@ -3784,8 +3903,8 @@
 	if (zio->io_delay)
 		zio->io_delay = gethrtime() - zio->io_delay;
 
-	if (vd != NULL && vd->vdev_ops->vdev_op_leaf) {
-
+	if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
+	    vd->vdev_ops != &vdev_draid_spare_ops) {
 		vdev_queue_io_done(zio);
 
 		if (zio->io_type == ZIO_TYPE_WRITE)
@@ -3809,7 +3928,7 @@
 
 	ops->vdev_op_io_done(zio);
 
-	if (unexpected_error)
+	if (unexpected_error && vd->vdev_remove_wanted == B_FALSE)
 		VERIFY(vdev_probe(vd, zio) == NULL);
 
 	return (zio);
@@ -3856,9 +3975,8 @@
 	zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE);
 }
 
-/*ARGSUSED*/
 void
-zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
+zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr)
 {
 	void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
 
@@ -3923,6 +4041,9 @@
 	 */
 	if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE &&
 	    vd != NULL && !vd->vdev_ops->vdev_op_leaf) {
+		vdev_dbgmsg(vd, "zio_vdev_io_assess(zio=%px) setting "
+		    "cant_write=TRUE due to write failure with ENXIO",
+		    zio);
 		vd->vdev_cant_write = B_TRUE;
 	}
 
@@ -4187,20 +4308,19 @@
 		if (zio->io_prop.zp_checksum == ZIO_CHECKSUM_OFF)
 			return (zio);
 
-		ASSERT(zio->io_prop.zp_checksum == ZIO_CHECKSUM_LABEL);
+		ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
 	}
 
 	if ((error = zio_checksum_error(zio, &info)) != 0) {
 		zio->io_error = error;
 		if (error == ECKSUM &&
 		    !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+			(void) zfs_ereport_start_checksum(zio->io_spa,
+			    zio->io_vd, &zio->io_bookmark, zio,
+			    zio->io_offset, zio->io_size, &info);
 			mutex_enter(&zio->io_vd->vdev_stat_lock);
 			zio->io_vd->vdev_stat.vs_checksum_errors++;
 			mutex_exit(&zio->io_vd->vdev_stat_lock);
-
-			zfs_ereport_start_checksum(zio->io_spa,
-			    zio->io_vd, &zio->io_bookmark, zio,
-			    zio->io_offset, zio->io_size, NULL, &info);
 		}
 	}
 
@@ -4329,7 +4449,7 @@
 static void
 zio_dva_throttle_done(zio_t *zio)
 {
-	ASSERTV(zio_t *lio = zio->io_logical);
+	zio_t *lio __maybe_unused = zio->io_logical;
 	zio_t *pio = zio_unique_parent(zio);
 	vdev_t *vd = zio->io_vd;
 	int flags = METASLAB_ASYNC_ALLOC;
@@ -4429,9 +4549,8 @@
 
 		metaslab_group_alloc_verify(zio->io_spa, zio->io_bp, zio,
 		    zio->io_allocator);
-		VERIFY(zfs_refcount_not_held(
-		    &zio->io_metaslab_class->mc_alloc_slots[zio->io_allocator],
-		    zio));
+		VERIFY(zfs_refcount_not_held(&zio->io_metaslab_class->
+		    mc_allocator[zio->io_allocator].mca_alloc_slots, zio));
 	}
 
 
@@ -4476,7 +4595,7 @@
 			uint64_t asize = P2ROUNDUP(psize, align);
 			abd_t *adata = zio->io_abd;
 
-			if (asize != psize) {
+			if (adata != NULL && asize != psize) {
 				adata = abd_alloc(asize, B_TRUE);
 				abd_copy(adata, zio->io_abd, psize);
 				abd_zero_off(adata, psize, asize - psize);
@@ -4487,7 +4606,7 @@
 			zcr->zcr_finish(zcr, adata);
 			zfs_ereport_free_checksum(zcr);
 
-			if (asize != psize)
+			if (adata != NULL && asize != psize)
 				abd_free(adata);
 		}
 	}
@@ -4518,9 +4637,9 @@
 				zio->io_vd->vdev_stat.vs_slow_ios++;
 				mutex_exit(&zio->io_vd->vdev_stat_lock);
 
-				zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
+				(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
 				    zio->io_spa, zio->io_vd, &zio->io_bookmark,
-				    zio, 0, 0);
+				    zio, 0);
 			}
 		}
 	}
@@ -4534,16 +4653,16 @@
 		 */
 		if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
 		    !vdev_is_dead(zio->io_vd)) {
-			mutex_enter(&zio->io_vd->vdev_stat_lock);
-			if (zio->io_type == ZIO_TYPE_READ) {
-				zio->io_vd->vdev_stat.vs_read_errors++;
-			} else if (zio->io_type == ZIO_TYPE_WRITE) {
-				zio->io_vd->vdev_stat.vs_write_errors++;
+			int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO,
+			    zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
+			if (ret != EALREADY) {
+				mutex_enter(&zio->io_vd->vdev_stat_lock);
+				if (zio->io_type == ZIO_TYPE_READ)
+					zio->io_vd->vdev_stat.vs_read_errors++;
+				else if (zio->io_type == ZIO_TYPE_WRITE)
+					zio->io_vd->vdev_stat.vs_write_errors++;
+				mutex_exit(&zio->io_vd->vdev_stat_lock);
 			}
-			mutex_exit(&zio->io_vd->vdev_stat_lock);
-
-			zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
-			    zio->io_vd, &zio->io_bookmark, zio, 0, 0);
 		}
 
 		if ((zio->io_error == EIO || !(zio->io_flags &
@@ -4554,8 +4673,8 @@
 			 * error and generate a logical data ereport.
 			 */
 			spa_log_error(zio->io_spa, &zio->io_bookmark);
-			zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa,
-			    NULL, &zio->io_bookmark, zio, 0, 0);
+			(void) zfs_ereport_post(FM_EREPORT_ZFS_DATA,
+			    zio->io_spa, NULL, &zio->io_bookmark, zio, 0);
 		}
 	}
 
@@ -4691,8 +4810,7 @@
 			ASSERT(taskq_empty_ent(&zio->io_tqent));
 			spa_taskq_dispatch_ent(zio->io_spa,
 			    ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE,
-			    (task_func_t *)zio_reexecute, zio, 0,
-			    &zio->io_tqent);
+			    zio_reexecute, zio, 0, &zio->io_tqent);
 		}
 		return (NULL);
 	}
@@ -4832,6 +4950,9 @@
 	    zb1->zb_blkid == zb2->zb_blkid)
 		return (0);
 
+	IMPLY(zb1->zb_level > 0, ibs1 >= SPA_MINBLOCKSHIFT);
+	IMPLY(zb2->zb_level > 0, ibs2 >= SPA_MINBLOCKSHIFT);
+
 	/*
 	 * BP_SPANB calculates the span in blocks.
 	 */
@@ -4887,7 +5008,7 @@
 {
 	zbookmark_phys_t mod_zb = *subtree_root;
 	mod_zb.zb_blkid++;
-	ASSERT(last_block->zb_level == 0);
+	ASSERT0(last_block->zb_level);
 
 	/* The objset_phys_t isn't before anything. */
 	if (dnp == NULL)
@@ -4913,37 +5034,47 @@
 	    last_block) <= 0);
 }
 
-#if defined(_KERNEL)
+/*
+ * This function is similar to zbookmark_subtree_completed(), but returns true
+ * if subtree_root is equal or ahead of last_block, i.e. still to be done.
+ */
+boolean_t
+zbookmark_subtree_tbd(const dnode_phys_t *dnp,
+    const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block)
+{
+	ASSERT0(last_block->zb_level);
+	if (dnp == NULL)
+		return (B_FALSE);
+	return (zbookmark_compare(dnp->dn_datablkszsec, dnp->dn_indblkshift,
+	    1ULL << (DNODE_BLOCK_SHIFT - SPA_MINBLOCKSHIFT), 0, subtree_root,
+	    last_block) >= 0);
+}
+
 EXPORT_SYMBOL(zio_type_name);
 EXPORT_SYMBOL(zio_buf_alloc);
 EXPORT_SYMBOL(zio_data_buf_alloc);
 EXPORT_SYMBOL(zio_buf_free);
 EXPORT_SYMBOL(zio_data_buf_free);
 
-module_param(zio_slow_io_ms, int, 0644);
-MODULE_PARM_DESC(zio_slow_io_ms,
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs_zio, zio_, slow_io_ms, INT, ZMOD_RW,
 	"Max I/O completion time (milliseconds) before marking it as slow");
 
-module_param(zio_requeue_io_start_cut_in_line, int, 0644);
-MODULE_PARM_DESC(zio_requeue_io_start_cut_in_line, "Prioritize requeued I/O");
+ZFS_MODULE_PARAM(zfs_zio, zio_, requeue_io_start_cut_in_line, INT, ZMOD_RW,
+	"Prioritize requeued I/O");
 
-module_param(zfs_sync_pass_deferred_free, int, 0644);
-MODULE_PARM_DESC(zfs_sync_pass_deferred_free,
+ZFS_MODULE_PARAM(zfs, zfs_, sync_pass_deferred_free,  INT, ZMOD_RW,
 	"Defer frees starting in this pass");
 
-module_param(zfs_sync_pass_dont_compress, int, 0644);
-MODULE_PARM_DESC(zfs_sync_pass_dont_compress,
+ZFS_MODULE_PARAM(zfs, zfs_, sync_pass_dont_compress, INT, ZMOD_RW,
 	"Don't compress starting in this pass");
 
-module_param(zfs_sync_pass_rewrite, int, 0644);
-MODULE_PARM_DESC(zfs_sync_pass_rewrite,
+ZFS_MODULE_PARAM(zfs, zfs_, sync_pass_rewrite, INT, ZMOD_RW,
 	"Rewrite new bps starting in this pass");
 
-module_param(zio_dva_throttle_enabled, int, 0644);
-MODULE_PARM_DESC(zio_dva_throttle_enabled,
+ZFS_MODULE_PARAM(zfs_zio, zio_, dva_throttle_enabled, INT, ZMOD_RW,
 	"Throttle block allocations in the ZIO pipeline");
 
-module_param(zio_deadman_log_all, int, 0644);
-MODULE_PARM_DESC(zio_deadman_log_all,
+ZFS_MODULE_PARAM(zfs_zio, zio_, deadman_log_all, INT, ZMOD_RW,
 	"Log all slow ZIOs, not just those with vdevs");
-#endif
+/* END CSTYLED */

diff --git a/zfs/module/zfs/zio_checksum.c b/zfs/module/zfs/zio_checksum.c
index 179fab5..00837f0 100644
--- a/zfs/module/zfs/zio_checksum.c
+++ b/zfs/module/zfs/zio_checksum.c

@@ -91,29 +91,29 @@
  * invocation and passed to the checksum function.
  */
 
-/*ARGSUSED*/
 static void
 abd_checksum_off(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) abd, (void) size, (void) ctx_template;
 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
 }
 
-/*ARGSUSED*/
-void
+static void
 abd_fletcher_2_native(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) ctx_template;
 	fletcher_init(zcp);
 	(void) abd_iterate_func(abd, 0, size,
 	    fletcher_2_incremental_native, zcp);
 }
 
-/*ARGSUSED*/
-void
+static void
 abd_fletcher_2_byteswap(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) ctx_template;
 	fletcher_init(zcp);
 	(void) abd_iterate_func(abd, 0, size,
 	    fletcher_2_incremental_byteswap, zcp);
@@ -127,11 +127,11 @@
 	fletcher_4_abd_ops.acf_fini(acdp);
 }
 
-/*ARGSUSED*/
 void
 abd_fletcher_4_native(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) ctx_template;
 	fletcher_4_ctx_t ctx;
 
 	zio_abd_checksum_data_t acd = {
@@ -144,11 +144,11 @@
 
 }
 
-/*ARGSUSED*/
 void
 abd_fletcher_4_byteswap(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+	(void) ctx_template;
 	fletcher_4_ctx_t ctx;
 
 	zio_abd_checksum_data_t acd = {
@@ -191,10 +191,12 @@
 	    abd_checksum_skein_tmpl_init, abd_checksum_skein_tmpl_free,
 	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
 	    ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"},
+#if !defined(__FreeBSD__)
 	{{abd_checksum_edonr_native,	abd_checksum_edonr_byteswap},
 	    abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free,
 	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
 	    ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
+#endif
 };
 
 /*
@@ -211,8 +213,10 @@
 		return (SPA_FEATURE_SHA512);
 	case ZIO_CHECKSUM_SKEIN:
 		return (SPA_FEATURE_SKEIN);
+#if !defined(__FreeBSD__)
 	case ZIO_CHECKSUM_EDONR:
 		return (SPA_FEATURE_EDONR);
+#endif
 	default:
 		return (SPA_FEATURE_NONE);
 	}

diff --git a/zfs/module/zfs/zio_compress.c b/zfs/module/zfs/zio_compress.c
index 01c5134..cded11f 100644
--- a/zfs/module/zfs/zio_compress.c
+++ b/zfs/module/zfs/zio_compress.c

@@ -29,6 +29,8 @@
 
 /*
  * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
  */
 
 #include <sys/zfs_context.h>
@@ -36,6 +38,7 @@
 #include <sys/zfeature.h>
 #include <sys/zio.h>
 #include <sys/zio_compress.h>
+#include <sys/zstd/zstd.h>
 
 /*
  * If nonzero, every 1/X decompression attempts will fail, simulating
@@ -47,24 +50,43 @@
  * Compression vectors.
  */
 zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
-	{"inherit",		0,	NULL,		NULL},
-	{"on",			0,	NULL,		NULL},
-	{"uncompressed",	0,	NULL,		NULL},
-	{"lzjb",		0,	lzjb_compress,	lzjb_decompress},
-	{"empty",		0,	NULL,		NULL},
-	{"gzip-1",		1,	gzip_compress,	gzip_decompress},
-	{"gzip-2",		2,	gzip_compress,	gzip_decompress},
-	{"gzip-3",		3,	gzip_compress,	gzip_decompress},
-	{"gzip-4",		4,	gzip_compress,	gzip_decompress},
-	{"gzip-5",		5,	gzip_compress,	gzip_decompress},
-	{"gzip-6",		6,	gzip_compress,	gzip_decompress},
-	{"gzip-7",		7,	gzip_compress,	gzip_decompress},
-	{"gzip-8",		8,	gzip_compress,	gzip_decompress},
-	{"gzip-9",		9,	gzip_compress,	gzip_decompress},
-	{"zle",			64,	zle_compress,	zle_decompress},
-	{"lz4",			0,	lz4_compress_zfs, lz4_decompress_zfs}
+	{"inherit",	0,	NULL,		NULL, NULL},
+	{"on",		0,	NULL,		NULL, NULL},
+	{"uncompressed", 0,	NULL,		NULL, NULL},
+	{"lzjb",	0,	lzjb_compress,	lzjb_decompress, NULL},
+	{"empty",	0,	NULL,		NULL, NULL},
+	{"gzip-1",	1,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-2",	2,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-3",	3,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-4",	4,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-5",	5,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-6",	6,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-7",	7,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-8",	8,	gzip_compress,	gzip_decompress, NULL},
+	{"gzip-9",	9,	gzip_compress,	gzip_decompress, NULL},
+	{"zle",		64,	zle_compress,	zle_decompress, NULL},
+	{"lz4",		0,	lz4_compress_zfs, lz4_decompress_zfs, NULL},
+	{"zstd",	ZIO_ZSTD_LEVEL_DEFAULT,	zfs_zstd_compress,
+	    zfs_zstd_decompress, zfs_zstd_decompress_level},
 };
 
+uint8_t
+zio_complevel_select(spa_t *spa, enum zio_compress compress, uint8_t child,
+    uint8_t parent)
+{
+	(void) spa;
+	uint8_t result;
+
+	if (!ZIO_COMPRESS_HASLEVEL(compress))
+		return (0);
+
+	result = child;
+	if (result == ZIO_COMPLEVEL_INHERIT)
+		result = parent;
+
+	return (result);
+}
+
 enum zio_compress
 zio_compress_select(spa_t *spa, enum zio_compress child,
     enum zio_compress parent)
@@ -89,10 +111,11 @@
 	return (result);
 }
 
-/*ARGSUSED*/
 static int
 zio_compress_zeroed_cb(void *data, size_t len, void *private)
 {
+	(void) private;
+
 	uint64_t *end = (uint64_t *)((char *)data + len);
 	for (uint64_t *word = (uint64_t *)data; word < end; word++)
 		if (*word != 0)
@@ -102,9 +125,11 @@
 }
 
 size_t
-zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
+zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len,
+    uint8_t level)
 {
 	size_t c_len, d_len;
+	uint8_t complevel;
 	zio_compress_info_t *ci = &zio_compress_table[c];
 
 	ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
@@ -123,9 +148,24 @@
 	/* Compress at least 12.5% */
 	d_len = s_len - (s_len >> 3);
 
+	complevel = ci->ci_level;
+
+	if (c == ZIO_COMPRESS_ZSTD) {
+		/* If we don't know the level, we can't compress it */
+		if (level == ZIO_COMPLEVEL_INHERIT)
+			return (s_len);
+
+		if (level == ZIO_COMPLEVEL_DEFAULT)
+			complevel = ZIO_ZSTD_LEVEL_DEFAULT;
+		else
+			complevel = level;
+
+		ASSERT3U(complevel, !=, ZIO_COMPLEVEL_INHERIT);
+	}
+
 	/* No compression algorithms can read from ABDs directly */
 	void *tmp = abd_borrow_buf_copy(src, s_len);
-	c_len = ci->ci_compress(tmp, dst, s_len, d_len, ci->ci_level);
+	c_len = ci->ci_compress(tmp, dst, s_len, d_len, complevel);
 	abd_return_buf(src, tmp, s_len);
 
 	if (c_len > d_len)
@@ -137,21 +177,24 @@
 
 int
 zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
-    size_t s_len, size_t d_len)
+    size_t s_len, size_t d_len, uint8_t *level)
 {
 	zio_compress_info_t *ci = &zio_compress_table[c];
 	if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL)
 		return (SET_ERROR(EINVAL));
 
+	if (ci->ci_decompress_level != NULL && level != NULL)
+		return (ci->ci_decompress_level(src, dst, s_len, d_len, level));
+
 	return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
 }
 
 int
 zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
-    size_t s_len, size_t d_len)
+    size_t s_len, size_t d_len, uint8_t *level)
 {
 	void *tmp = abd_borrow_buf_copy(src, s_len);
-	int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len);
+	int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len, level);
 	abd_return_buf(src, tmp, s_len);
 
 	/*
@@ -160,8 +203,20 @@
 	 * in non-ECC RAM), we handle this error (and test it).
 	 */
 	if (zio_decompress_fail_fraction != 0 &&
-	    spa_get_random(zio_decompress_fail_fraction) == 0)
+	    random_in_range(zio_decompress_fail_fraction) == 0)
 		ret = SET_ERROR(EINVAL);
 
 	return (ret);
 }
+
+int
+zio_compress_to_feature(enum zio_compress comp)
+{
+	switch (comp) {
+	case ZIO_COMPRESS_ZSTD:
+		return (SPA_FEATURE_ZSTD_COMPRESS);
+	default:
+		break;
+	}
+	return (SPA_FEATURE_NONE);
+}

diff --git a/zfs/module/zfs/zio_crypt.c b/zfs/module/zfs/zio_crypt.c
deleted file mode 100644
index 7ce2b1b..0000000
--- a/zfs/module/zfs/zio_crypt.c
+++ /dev/null

@@ -1,2036 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2017, Datto, Inc. All rights reserved.
- */
-
-#include <sys/zio_crypt.h>
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dnode.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-#include <sys/zil.h>
-#include <sys/sha2.h>
-#include <sys/hkdf.h>
-#include "qat.h"
-
-/*
- * This file is responsible for handling all of the details of generating
- * encryption parameters and performing encryption and authentication.
- *
- * BLOCK ENCRYPTION PARAMETERS:
- * Encryption /Authentication Algorithm Suite (crypt):
- * The encryption algorithm, mode, and key length we are going to use. We
- * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
- * keys. All authentication is currently done with SHA512-HMAC.
- *
- * Plaintext:
- * The unencrypted data that we want to encrypt.
- *
- * Initialization Vector (IV):
- * An initialization vector for the encryption algorithms. This is used to
- * "tweak" the encryption algorithms so that two blocks of the same data are
- * encrypted into different ciphertext outputs, thus obfuscating block patterns.
- * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
- * never reused with the same encryption key. This value is stored unencrypted
- * and must simply be provided to the decryption function. We use a 96 bit IV
- * (as recommended by NIST) for all block encryption. For non-dedup blocks we
- * derive the IV randomly. The first 64 bits of the IV are stored in the second
- * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
- * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
- * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
- * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
- * level 0 blocks is the number of allocated dnodes in that block. The on-disk
- * format supports at most 2^15 slots per L0 dnode block, because the maximum
- * block size is 16MB (2^24). In either case, for level 0 blocks this number
- * will still be smaller than UINT32_MAX so it is safe to store the IV in the
- * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
- * for the dnode code.
- *
- * Master key:
- * This is the most important secret data of an encrypted dataset. It is used
- * along with the salt to generate that actual encryption keys via HKDF. We
- * do not use the master key to directly encrypt any data because there are
- * theoretical limits on how much data can actually be safely encrypted with
- * any encryption mode. The master key is stored encrypted on disk with the
- * user's wrapping key. Its length is determined by the encryption algorithm.
- * For details on how this is stored see the block comment in dsl_crypt.c
- *
- * Salt:
- * Used as an input to the HKDF function, along with the master key. We use a
- * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
- * can be used for encrypting many blocks, so we cache the current salt and the
- * associated derived key in zio_crypt_t so we do not need to derive it again
- * needlessly.
- *
- * Encryption Key:
- * A secret binary key, generated from an HKDF function used to encrypt and
- * decrypt data.
- *
- * Message Authentication Code (MAC)
- * The MAC is an output of authenticated encryption modes such as AES-GCM and
- * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
- * data on disk and return garbage to the application. Effectively, it is a
- * checksum that can not be reproduced by an attacker. We store the MAC in the
- * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
- * regular checksum of the ciphertext which can be used for scrubbing.
- *
- * OBJECT AUTHENTICATION:
- * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
- * they contain some info that always needs to be readable. To prevent this
- * data from being altered, we authenticate this data using SHA512-HMAC. This
- * will produce a MAC (similar to the one produced via encryption) which can
- * be used to verify the object was not modified. HMACs do not require key
- * rotation or IVs, so we can keep up to the full 3 copies of authenticated
- * data.
- *
- * ZIL ENCRYPTION:
- * ZIL blocks have their bp written to disk ahead of the associated data, so we
- * cannot store the MAC there as we normally do. For these blocks the MAC is
- * stored in the embedded checksum within the zil_chain_t header. The salt and
- * IV are generated for the block on bp allocation instead of at encryption
- * time. In addition, ZIL blocks have some pieces that must be left in plaintext
- * for claiming even though all of the sensitive user data still needs to be
- * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
- * pieces of the block need to be encrypted. All data that is not encrypted is
- * authenticated using the AAD mechanisms that the supported encryption modes
- * provide for. In order to preserve the semantics of the ZIL for encrypted
- * datasets, the ZIL is not protected at the objset level as described below.
- *
- * DNODE ENCRYPTION:
- * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
- * in plaintext for scrubbing and claiming, but the bonus buffers might contain
- * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
- * which which pieces of the block need to be encrypted. For more details about
- * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
- *
- * OBJECT SET AUTHENTICATION:
- * Up to this point, everything we have encrypted and authenticated has been
- * at level 0 (or -2 for the ZIL). If we did not do any further work the
- * on-disk format would be susceptible to attacks that deleted or rearranged
- * the order of level 0 blocks. Ideally, the cleanest solution would be to
- * maintain a tree of authentication MACs going up the bp tree. However, this
- * presents a problem for raw sends. Send files do not send information about
- * indirect blocks so there would be no convenient way to transfer the MACs and
- * they cannot be recalculated on the receive side without the master key which
- * would defeat one of the purposes of raw sends in the first place. Instead,
- * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
- * from the level below. We also include some portable fields from blk_prop such
- * as the lsize and compression algorithm to prevent the data from being
- * misinterpreted.
- *
- * At the objset level, we maintain 2 separate 256 bit MACs in the
- * objset_phys_t. The first one is "portable" and is the logical root of the
- * MAC tree maintained in the metadnode's bps. The second, is "local" and is
- * used as the root MAC for the user accounting objects, which are also not
- * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
- * of the send file. The useraccounting code ensures that the useraccounting
- * info is not present upon a receive, so the local MAC can simply be cleared
- * out at that time. For more info about objset_phys_t authentication, see
- * zio_crypt_do_objset_hmacs().
- *
- * CONSIDERATIONS FOR DEDUP:
- * In order for dedup to work, blocks that we want to dedup with one another
- * need to use the same IV and encryption key, so that they will have the same
- * ciphertext. Normally, one should never reuse an IV with the same encryption
- * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
- * blocks. In this case, however, since we are using the same plaintext as
- * well all that we end up with is a duplicate of the original ciphertext we
- * already had. As a result, an attacker with read access to the raw disk will
- * be able to tell which blocks are the same but this information is given away
- * by dedup anyway. In order to get the same IVs and encryption keys for
- * equivalent blocks of data we use an HMAC of the plaintext. We use an HMAC
- * here so that a reproducible checksum of the plaintext is never available to
- * the attacker. The HMAC key is kept alongside the master key, encrypted on
- * disk. The first 64 bits of the HMAC are used in place of the random salt, and
- * the next 96 bits are used as the IV. As a result of this mechanism, dedup
- * will only work within a clone family since encrypted dedup requires use of
- * the same master and HMAC keys.
- */
-
-/*
- * After encrypting many blocks with the same key we may start to run up
- * against the theoretical limits of how much data can securely be encrypted
- * with a single key using the supported encryption modes. The most obvious
- * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
- * the more IVs we generate (which both GCM and CCM modes strictly forbid).
- * This risk actually grows surprisingly quickly over time according to the
- * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
- * generated n IVs with a cryptographically secure RNG, the approximate
- * probability p(n) of a collision is given as:
- *
- * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
- *
- * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
- *
- * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
- * we must not write more than 398,065,730 blocks with the same encryption key.
- * Therefore, we rotate our keys after 400,000,000 blocks have been written by
- * generating a new random 64 bit salt for our HKDF encryption key generation
- * function.
- */
-#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
-#define	ZFS_CURRENT_MAX_SALT_USES	\
-	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
-unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
-
-typedef struct blkptr_auth_buf {
-	uint64_t bab_prop;			/* blk_prop - portable mask */
-	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */

-	uint64_t bab_pad;			/* reserved for future use */
-} blkptr_auth_buf_t;
-
-zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
-	{"",			ZC_TYPE_NONE,	0,	"inherit"},
-	{"",			ZC_TYPE_NONE,	0,	"on"},
-	{"",			ZC_TYPE_NONE,	0,	"off"},
-	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
-	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
-	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
-	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
-	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
-	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
-};
-
-void
-zio_crypt_key_destroy(zio_crypt_key_t *key)
-{
-	rw_destroy(&key->zk_salt_lock);
-
-	/* free crypto templates */
-	crypto_destroy_ctx_template(key->zk_current_tmpl);
-	crypto_destroy_ctx_template(key->zk_hmac_tmpl);
-
-	/* zero out sensitive data */
-	bzero(key, sizeof (zio_crypt_key_t));
-}
-
-int
-zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	uint_t keydata_len;
-
-	ASSERT(key != NULL);
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-
-	keydata_len = zio_crypt_table[crypt].ci_keylen;
-	bzero(key, sizeof (zio_crypt_key_t));
-
-	/* fill keydata buffers and salt with random data */
-	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
-	if (ret != 0)
-		goto error;
-
-	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
-	if (ret != 0)
-		goto error;
-
-	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
-	if (ret != 0)
-		goto error;
-
-	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
-	if (ret != 0)
-		goto error;
-
-	/* derive the current key from the master key */
-	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
-	    keydata_len);
-	if (ret != 0)
-		goto error;
-
-	/* initialize keys for the ICP */
-	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_current_key.ck_data = key->zk_current_keydata;
-	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
-
-	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_hmac_key.ck_data = &key->zk_hmac_key;
-	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
-
-	/*
-	 * Initialize the crypto templates. It's ok if this fails because
-	 * this is just an optimization.
-	 */
-	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
-	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
-	    &key->zk_current_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_current_tmpl = NULL;
-
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
-	    &key->zk_hmac_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_hmac_tmpl = NULL;
-
-	key->zk_crypt = crypt;
-	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
-	key->zk_salt_count = 0;
-	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
-
-	return (0);
-
-error:
-	zio_crypt_key_destroy(key);
-	return (ret);
-}
-
-static int
-zio_crypt_key_change_salt(zio_crypt_key_t *key)
-{
-	int ret = 0;
-	uint8_t salt[ZIO_DATA_SALT_LEN];
-	crypto_mechanism_t mech;
-	uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
-
-	/* generate a new salt */
-	ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
-	if (ret != 0)
-		goto error;
-
-	rw_enter(&key->zk_salt_lock, RW_WRITER);
-
-	/* someone beat us to the salt rotation, just unlock and return */
-	if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
-		goto out_unlock;
-
-	/* derive the current key from the master key and the new salt */
-	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-	    salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
-	if (ret != 0)
-		goto out_unlock;
-
-	/* assign the salt and reset the usage count */
-	bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
-	key->zk_salt_count = 0;
-
-	/* destroy the old context template and create the new one */
-	crypto_destroy_ctx_template(key->zk_current_tmpl);
-	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
-	    &key->zk_current_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_current_tmpl = NULL;
-
-	rw_exit(&key->zk_salt_lock);
-
-	return (0);
-
-out_unlock:
-	rw_exit(&key->zk_salt_lock);
-error:
-	return (ret);
-}
-
-/* See comment above zfs_key_max_salt_uses definition for details */
-int
-zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
-{
-	int ret;
-	boolean_t salt_change;
-
-	rw_enter(&key->zk_salt_lock, RW_READER);
-
-	bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
-	salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
-	    ZFS_CURRENT_MAX_SALT_USES);
-
-	rw_exit(&key->zk_salt_lock);
-
-	if (salt_change) {
-		ret = zio_crypt_key_change_salt(key);
-		if (ret != 0)
-			goto error;
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-/*
- * This function handles all encryption and decryption in zfs. When
- * encrypting it expects puio to reference the plaintext and cuio to
- * reference the ciphertext. cuio must have enough space for the
- * ciphertext + room for a MAC. datalen should be the length of the
- * plaintext / ciphertext alone.
- */
-static int
-zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
-    crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
-    uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
-{
-	int ret;
-	crypto_data_t plaindata, cipherdata;
-	CK_AES_CCM_PARAMS ccmp;
-	CK_AES_GCM_PARAMS gcmp;
-	crypto_mechanism_t mech;
-	zio_crypt_info_t crypt_info;
-	uint_t plain_full_len, maclen;
-
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-	ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW);
-
-	/* lookup the encryption info */
-	crypt_info = zio_crypt_table[crypt];
-
-	/* the mac will always be the last iovec_t in the cipher uio */
-	maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len;
-
-	ASSERT(maclen <= ZIO_DATA_MAC_LEN);
-
-	/* setup encryption mechanism (same as crypt) */
-	mech.cm_type = crypto_mech2id(crypt_info.ci_mechname);
-
-	/*
-	 * Strangely, the ICP requires that plain_full_len must include
-	 * the MAC length when decrypting, even though the UIO does not
-	 * need to have the extra space allocated.
-	 */
-	if (encrypt) {
-		plain_full_len = datalen;
-	} else {
-		plain_full_len = datalen + maclen;
-	}
-
-	/*
-	 * setup encryption params (currently only AES CCM and AES GCM
-	 * are supported)
-	 */
-	if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) {
-		ccmp.ulNonceSize = ZIO_DATA_IV_LEN;
-		ccmp.ulAuthDataSize = auth_len;
-		ccmp.authData = authbuf;
-		ccmp.ulMACSize = maclen;
-		ccmp.nonce = ivbuf;
-		ccmp.ulDataSize = plain_full_len;
-
-		mech.cm_param = (char *)(&ccmp);
-		mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
-	} else {
-		gcmp.ulIvLen = ZIO_DATA_IV_LEN;
-		gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN);
-		gcmp.ulAADLen = auth_len;
-		gcmp.pAAD = authbuf;
-		gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen);
-		gcmp.pIv = ivbuf;
-
-		mech.cm_param = (char *)(&gcmp);
-		mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
-	}
-
-	/* populate the cipher and plain data structs. */
-	plaindata.cd_format = CRYPTO_DATA_UIO;
-	plaindata.cd_offset = 0;
-	plaindata.cd_uio = puio;
-	plaindata.cd_miscdata = NULL;
-	plaindata.cd_length = plain_full_len;
-
-	cipherdata.cd_format = CRYPTO_DATA_UIO;
-	cipherdata.cd_offset = 0;
-	cipherdata.cd_uio = cuio;
-	cipherdata.cd_miscdata = NULL;
-	cipherdata.cd_length = datalen + maclen;
-
-	/* perform the actual encryption */
-	if (encrypt) {
-		ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata,
-		    NULL);
-		if (ret != CRYPTO_SUCCESS) {
-			ret = SET_ERROR(EIO);
-			goto error;
-		}
-	} else {
-		ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata,
-		    NULL);
-		if (ret != CRYPTO_SUCCESS) {
-			ASSERT3U(ret, ==, CRYPTO_INVALID_MAC);
-			ret = SET_ERROR(ECKSUM);
-			goto error;
-		}
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-int
-zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
-    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
-{
-	int ret;
-	uio_t puio, cuio;
-	uint64_t aad[3];
-	iovec_t plain_iovecs[2], cipher_iovecs[3];
-	uint64_t crypt = key->zk_crypt;
-	uint_t enc_len, keydata_len, aad_len;
-
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
-
-	keydata_len = zio_crypt_table[crypt].ci_keylen;
-
-	/* generate iv for wrapping the master and hmac key */
-	ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
-	if (ret != 0)
-		goto error;
-
-	/* initialize uio_ts */
-	plain_iovecs[0].iov_base = key->zk_master_keydata;
-	plain_iovecs[0].iov_len = keydata_len;
-	plain_iovecs[1].iov_base = key->zk_hmac_keydata;
-	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-
-	cipher_iovecs[0].iov_base = keydata_out;
-	cipher_iovecs[0].iov_len = keydata_len;
-	cipher_iovecs[1].iov_base = hmac_keydata_out;
-	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-	cipher_iovecs[2].iov_base = mac;
-	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
-
-	/*
-	 * Although we don't support writing to the old format, we do
-	 * support rewrapping the key so that the user can move and
-	 * quarantine datasets on the old format.
-	 */
-	if (key->zk_version == 0) {
-		aad_len = sizeof (uint64_t);
-		aad[0] = LE_64(key->zk_guid);
-	} else {
-		ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
-		aad_len = sizeof (uint64_t) * 3;
-		aad[0] = LE_64(key->zk_guid);
-		aad[1] = LE_64(crypt);
-		aad[2] = LE_64(key->zk_version);
-	}
-
-	enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
-	puio.uio_iov = plain_iovecs;
-	puio.uio_iovcnt = 2;
-	puio.uio_segflg = UIO_SYSSPACE;
-	cuio.uio_iov = cipher_iovecs;
-	cuio.uio_iovcnt = 3;
-	cuio.uio_segflg = UIO_SYSSPACE;
-
-	/* encrypt the keys and store the resulting ciphertext and mac */
-	ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len,
-	    &puio, &cuio, (uint8_t *)aad, aad_len);
-	if (ret != 0)
-		goto error;
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-int
-zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
-    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
-    uint8_t *mac, zio_crypt_key_t *key)
-{
-	crypto_mechanism_t mech;
-	uio_t puio, cuio;
-	uint64_t aad[3];
-	iovec_t plain_iovecs[2], cipher_iovecs[3];
-	uint_t enc_len, keydata_len, aad_len;
-	int ret;
-
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
-
-	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
-
-	keydata_len = zio_crypt_table[crypt].ci_keylen;
-
-	/* initialize uio_ts */
-	plain_iovecs[0].iov_base = key->zk_master_keydata;
-	plain_iovecs[0].iov_len = keydata_len;
-	plain_iovecs[1].iov_base = key->zk_hmac_keydata;
-	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-
-	cipher_iovecs[0].iov_base = keydata;
-	cipher_iovecs[0].iov_len = keydata_len;
-	cipher_iovecs[1].iov_base = hmac_keydata;
-	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-	cipher_iovecs[2].iov_base = mac;
-	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
-
-	if (version == 0) {
-		aad_len = sizeof (uint64_t);
-		aad[0] = LE_64(guid);
-	} else {
-		ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
-		aad_len = sizeof (uint64_t) * 3;
-		aad[0] = LE_64(guid);
-		aad[1] = LE_64(crypt);
-		aad[2] = LE_64(version);
-	}
-
-	enc_len = keydata_len + SHA512_HMAC_KEYLEN;
-	puio.uio_iov = plain_iovecs;
-	puio.uio_segflg = UIO_SYSSPACE;
-	puio.uio_iovcnt = 2;
-	cuio.uio_iov = cipher_iovecs;
-	cuio.uio_iovcnt = 3;
-	cuio.uio_segflg = UIO_SYSSPACE;
-
-	/* decrypt the keys and store the result in the output buffers */
-	ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len,
-	    &puio, &cuio, (uint8_t *)aad, aad_len);
-	if (ret != 0)
-		goto error;
-
-	/* generate a fresh salt */
-	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
-	if (ret != 0)
-		goto error;
-
-	/* derive the current key from the master key */
-	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
-	    keydata_len);
-	if (ret != 0)
-		goto error;
-
-	/* initialize keys for ICP */
-	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_current_key.ck_data = key->zk_current_keydata;
-	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
-
-	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
-	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
-
-	/*
-	 * Initialize the crypto templates. It's ok if this fails because
-	 * this is just an optimization.
-	 */
-	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
-	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
-	    &key->zk_current_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_current_tmpl = NULL;
-
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
-	    &key->zk_hmac_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_hmac_tmpl = NULL;
-
-	key->zk_crypt = crypt;
-	key->zk_version = version;
-	key->zk_guid = guid;
-	key->zk_salt_count = 0;
-
-	return (0);
-
-error:
-	zio_crypt_key_destroy(key);
-	return (ret);
-}
-
-int
-zio_crypt_generate_iv(uint8_t *ivbuf)
-{
-	int ret;
-
-	/* randomly generate the IV */
-	ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
-	if (ret != 0)
-		goto error;
-
-	return (0);
-
-error:
-	bzero(ivbuf, ZIO_DATA_IV_LEN);
-	return (ret);
-}
-
-int
-zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
-    uint8_t *digestbuf, uint_t digestlen)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	crypto_data_t in_data, digest_data;
-	uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH];
-
-	ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH);
-
-	/* initialize sha512-hmac mechanism and crypto data */
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	mech.cm_param = NULL;
-	mech.cm_param_len = 0;
-
-	/* initialize the crypto data */
-	in_data.cd_format = CRYPTO_DATA_RAW;
-	in_data.cd_offset = 0;
-	in_data.cd_length = datalen;
-	in_data.cd_raw.iov_base = (char *)data;
-	in_data.cd_raw.iov_len = in_data.cd_length;
-
-	digest_data.cd_format = CRYPTO_DATA_RAW;
-	digest_data.cd_offset = 0;
-	digest_data.cd_length = SHA512_DIGEST_LENGTH;
-	digest_data.cd_raw.iov_base = (char *)raw_digestbuf;
-	digest_data.cd_raw.iov_len = digest_data.cd_length;
-
-	/* generate the hmac */
-	ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl,
-	    &digest_data, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	bcopy(raw_digestbuf, digestbuf, digestlen);
-
-	return (0);
-
-error:
-	bzero(digestbuf, digestlen);
-	return (ret);
-}
-
-int
-zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
-    uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
-{
-	int ret;
-	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
-
-	ret = zio_crypt_do_hmac(key, data, datalen,
-	    digestbuf, SHA512_DIGEST_LENGTH);
-	if (ret != 0)
-		return (ret);
-
-	bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
-	bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
-
-	return (0);
-}
-
-/*
- * The following functions are used to encode and decode encryption parameters
- * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
- * byte strings, which normally means that these strings would not need to deal
- * with byteswapping at all. However, both blkptr_t and zil_header_t may be
- * byteswapped by lower layers and so we must "undo" that byteswap here upon
- * decoding and encoding in a non-native byteorder. These functions require
- * that the byteorder bit is correct before being called.
- */
-void
-zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
-{
-	uint64_t val64;
-	uint32_t val32;
-
-	ASSERT(BP_IS_ENCRYPTED(bp));
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
-		bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
-		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
-		BP_SET_IV2(bp, val32);
-	} else {
-		bcopy(salt, &val64, sizeof (uint64_t));
-		bp->blk_dva[2].dva_word[0] = BSWAP_64(val64);
-
-		bcopy(iv, &val64, sizeof (uint64_t));
-		bp->blk_dva[2].dva_word[1] = BSWAP_64(val64);
-
-		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
-		BP_SET_IV2(bp, BSWAP_32(val32));
-	}
-}
-
-void
-zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
-{
-	uint64_t val64;
-	uint32_t val32;
-
-	ASSERT(BP_IS_PROTECTED(bp));
-
-	/* for convenience, so callers don't need to check */
-	if (BP_IS_AUTHENTICATED(bp)) {
-		bzero(salt, ZIO_DATA_SALT_LEN);
-		bzero(iv, ZIO_DATA_IV_LEN);
-		return;
-	}
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
-		bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
-
-		val32 = (uint32_t)BP_GET_IV2(bp);
-		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
-	} else {
-		val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
-		bcopy(&val64, salt, sizeof (uint64_t));
-
-		val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
-		bcopy(&val64, iv, sizeof (uint64_t));
-
-		val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
-		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
-	}
-}
-
-void
-zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
-{
-	uint64_t val64;
-
-	ASSERT(BP_USES_CRYPT(bp));
-	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
-		bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
-		    sizeof (uint64_t));
-	} else {
-		bcopy(mac, &val64, sizeof (uint64_t));
-		bp->blk_cksum.zc_word[2] = BSWAP_64(val64);
-
-		bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t));
-		bp->blk_cksum.zc_word[3] = BSWAP_64(val64);
-	}
-}
-
-void
-zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
-{
-	uint64_t val64;
-
-	ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
-
-	/* for convenience, so callers don't need to check */
-	if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
-		bzero(mac, ZIO_DATA_MAC_LEN);
-		return;
-	}
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
-		bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
-		    sizeof (uint64_t));
-	} else {
-		val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
-		bcopy(&val64, mac, sizeof (uint64_t));
-
-		val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
-		bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
-	}
-}
-
-void
-zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
-{
-	zil_chain_t *zilc = data;
-
-	bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
-	bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
-	    sizeof (uint64_t));
-}
-
-void
-zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
-{
-	/*
-	 * The ZIL MAC is embedded in the block it protects, which will
-	 * not have been byteswapped by the time this function has been called.
-	 * As a result, we don't need to worry about byteswapping the MAC.
-	 */
-	const zil_chain_t *zilc = data;
-
-	bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
-	bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
-	    sizeof (uint64_t));
-}
-
-/*
- * This routine takes a block of dnodes (src_abd) and copies only the bonus
- * buffers to the same offsets in the dst buffer. datalen should be the size
- * of both the src_abd and the dst buffer (not just the length of the bonus
- * buffers).
- */
-void
-zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
-{
-	uint_t i, max_dnp = datalen >> DNODE_SHIFT;
-	uint8_t *src;
-	dnode_phys_t *dnp, *sdnp, *ddnp;
-
-	src = abd_borrow_buf_copy(src_abd, datalen);
-
-	sdnp = (dnode_phys_t *)src;
-	ddnp = (dnode_phys_t *)dst;
-
-	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
-		dnp = &sdnp[i];
-		if (dnp->dn_type != DMU_OT_NONE &&
-		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
-		    dnp->dn_bonuslen != 0) {
-			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
-			    DN_MAX_BONUS_LEN(dnp));
-		}
-	}
-
-	abd_return_buf(src_abd, src, datalen);
-}
-
-/*
- * This function decides what fields from blk_prop are included in
- * the on-disk various MAC algorithms.
- */
-static void
-zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version)
-{
-	/*
-	 * Version 0 did not properly zero out all non-portable fields
-	 * as it should have done. We maintain this code so that we can
-	 * do read-only imports of pools on this version.
-	 */
-	if (version == 0) {
-		BP_SET_DEDUP(bp, 0);
-		BP_SET_CHECKSUM(bp, 0);
-		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
-		return;
-	}
-
-	ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
-
-	/*
-	 * The hole_birth feature might set these fields even if this bp
-	 * is a hole. We zero them out here to guarantee that raw sends
-	 * will function with or without the feature.
-	 */
-	if (BP_IS_HOLE(bp)) {
-		bp->blk_prop = 0ULL;
-		return;
-	}
-
-	/*
-	 * At L0 we want to verify these fields to ensure that data blocks
-	 * can not be reinterpreted. For instance, we do not want an attacker
-	 * to trick us into returning raw lz4 compressed data to the user
-	 * by modifying the compression bits. At higher levels, we cannot
-	 * enforce this policy since raw sends do not convey any information
-	 * about indirect blocks, so these values might be different on the
-	 * receive side. Fortunately, this does not open any new attack
-	 * vectors, since any alterations that can be made to a higher level
-	 * bp must still verify the correct order of the layer below it.
-	 */
-	if (BP_GET_LEVEL(bp) != 0) {
-		BP_SET_BYTEORDER(bp, 0);
-		BP_SET_COMPRESS(bp, 0);
-
-		/*
-		 * psize cannot be set to zero or it will trigger
-		 * asserts, but the value doesn't really matter as
-		 * long as it is constant.
-		 */
-		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
-	}
-
-	BP_SET_DEDUP(bp, 0);
-	BP_SET_CHECKSUM(bp, 0);
-}
-
-static void
-zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp,
-    blkptr_auth_buf_t *bab, uint_t *bab_len)
-{
-	blkptr_t tmpbp = *bp;
-
-	if (should_bswap)
-		byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
-
-	ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
-	ASSERT0(BP_IS_EMBEDDED(&tmpbp));
-
-	zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac);
-
-	/*
-	 * We always MAC blk_prop in LE to ensure portability. This
-	 * must be done after decoding the mac, since the endianness
-	 * will get zero'd out here.
-	 */
-	zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version);
-	bab->bab_prop = LE_64(tmpbp.blk_prop);
-	bab->bab_pad = 0ULL;
-
-	/* version 0 did not include the padding */
-	*bab_len = sizeof (blkptr_auth_buf_t);
-	if (version == 0)
-		*bab_len -= sizeof (uint64_t);
-}
-
-static int
-zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version,
-    boolean_t should_bswap, blkptr_t *bp)
-{
-	int ret;
-	uint_t bab_len;
-	blkptr_auth_buf_t bab;
-	crypto_data_t cd;
-
-	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
-	cd.cd_format = CRYPTO_DATA_RAW;
-	cd.cd_offset = 0;
-	cd.cd_length = bab_len;
-	cd.cd_raw.iov_base = (char *)&bab;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-static void
-zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version,
-    boolean_t should_bswap, blkptr_t *bp)
-{
-	uint_t bab_len;
-	blkptr_auth_buf_t bab;
-
-	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
-	SHA2Update(ctx, &bab, bab_len);
-}
-
-static void
-zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version,
-    boolean_t should_bswap, blkptr_t *bp)
-{
-	uint_t bab_len;
-	blkptr_auth_buf_t bab;
-
-	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
-	bcopy(&bab, *aadp, bab_len);
-	*aadp += bab_len;
-	*aad_len += bab_len;
-}
-
-static int
-zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version,
-    boolean_t should_bswap, dnode_phys_t *dnp)
-{
-	int ret, i;
-	dnode_phys_t *adnp;
-	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
-	crypto_data_t cd;
-	uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)];
-
-	cd.cd_format = CRYPTO_DATA_RAW;
-	cd.cd_offset = 0;
-
-	/* authenticate the core dnode (masking out non-portable bits) */
-	bcopy(dnp, tmp_dncore, sizeof (tmp_dncore));
-	adnp = (dnode_phys_t *)tmp_dncore;
-	if (le_bswap) {
-		adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
-		adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
-		adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
-		adnp->dn_used = BSWAP_64(adnp->dn_used);
-	}
-	adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
-	adnp->dn_used = 0;
-
-	cd.cd_length = sizeof (tmp_dncore);
-	cd.cd_raw.iov_base = (char *)adnp;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	for (i = 0; i < dnp->dn_nblkptr; i++) {
-		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
-		    should_bswap, &dnp->dn_blkptr[i]);
-		if (ret != 0)
-			goto error;
-	}
-
-	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
-		    should_bswap, DN_SPILL_BLKPTR(dnp));
-		if (ret != 0)
-			goto error;
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-/*
- * objset_phys_t blocks introduce a number of exceptions to the normal
- * authentication process. objset_phys_t's contain 2 separate HMACS for
- * protecting the integrity of their data. The portable_mac protects the
- * metadnode. This MAC can be sent with a raw send and protects against
- * reordering of data within the metadnode. The local_mac protects the user
- * accounting objects which are not sent from one system to another.
- *
- * In addition, objset blocks are the only blocks that can be modified and
- * written to disk without the key loaded under certain circumstances. During
- * zil_claim() we need to be able to update the zil_header_t to complete
- * claiming log blocks and during raw receives we need to write out the
- * portable_mac from the send file. Both of these actions are possible
- * because these fields are not protected by either MAC so neither one will
- * need to modify the MACs without the key. However, when the modified blocks
- * are written out they will be byteswapped into the host machine's native
- * endianness which will modify fields protected by the MAC. As a result, MAC
- * calculation for objset blocks works slightly differently from other block
- * types. Where other block types MAC the data in whatever endianness is
- * written to disk, objset blocks always MAC little endian version of their
- * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
- * and le_bswap indicates whether a byteswap is needed to get this block
- * into little endian format.
- */
-int
-zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
-    boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	crypto_context_t ctx;
-	crypto_data_t cd;
-	objset_phys_t *osp = data;
-	uint64_t intval;
-	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
-	uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH];
-	uint8_t raw_local_mac[SHA512_DIGEST_LENGTH];
-
-	/* initialize HMAC mechanism */
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	mech.cm_param = NULL;
-	mech.cm_param_len = 0;
-
-	cd.cd_format = CRYPTO_DATA_RAW;
-	cd.cd_offset = 0;
-
-	/* calculate the portable MAC from the portable fields and metadnode */
-	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in the os_type */
-	intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
-	cd.cd_length = sizeof (uint64_t);
-	cd.cd_raw.iov_base = (char *)&intval;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in the portable os_flags */
-	intval = osp->os_flags;
-	if (should_bswap)
-		intval = BSWAP_64(intval);
-	intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
-	if (!ZFS_HOST_BYTEORDER)
-		intval = BSWAP_64(intval);
-
-	cd.cd_length = sizeof (uint64_t);
-	cd.cd_raw.iov_base = (char *)&intval;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in fields from the metadnode */
-	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-	    should_bswap, &osp->os_meta_dnode);
-	if (ret)
-		goto error;
-
-	/* store the final digest in a temporary buffer and copy what we need */
-	cd.cd_length = SHA512_DIGEST_LENGTH;
-	cd.cd_raw.iov_base = (char *)raw_portable_mac;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_final(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
-
-	/*
-	 * The local MAC protects the user, group and project accounting.
-	 * If these objects are not present, the local MAC is zeroed out.
-	 */
-	if ((datalen >= OBJSET_PHYS_SIZE_V3 &&
-	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
-	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE &&
-	    osp->os_projectused_dnode.dn_type == DMU_OT_NONE) ||
-	    (datalen >= OBJSET_PHYS_SIZE_V2 &&
-	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
-	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE) ||
-	    (datalen <= OBJSET_PHYS_SIZE_V1)) {
-		bzero(local_mac, ZIO_OBJSET_MAC_LEN);
-		return (0);
-	}
-
-	/* calculate the local MAC from the userused and groupused dnodes */
-	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in the non-portable os_flags */
-	intval = osp->os_flags;
-	if (should_bswap)
-		intval = BSWAP_64(intval);
-	intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
-	if (!ZFS_HOST_BYTEORDER)
-		intval = BSWAP_64(intval);
-
-	cd.cd_length = sizeof (uint64_t);
-	cd.cd_raw.iov_base = (char *)&intval;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in fields from the user accounting dnodes */
-	if (osp->os_userused_dnode.dn_type != DMU_OT_NONE) {
-		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-		    should_bswap, &osp->os_userused_dnode);
-		if (ret)
-			goto error;
-	}
-
-	if (osp->os_groupused_dnode.dn_type != DMU_OT_NONE) {
-		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-		    should_bswap, &osp->os_groupused_dnode);
-		if (ret)
-			goto error;
-	}
-
-	if (osp->os_projectused_dnode.dn_type != DMU_OT_NONE &&
-	    datalen >= OBJSET_PHYS_SIZE_V3) {
-		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-		    should_bswap, &osp->os_projectused_dnode);
-		if (ret)
-			goto error;
-	}
-
-	/* store the final digest in a temporary buffer and copy what we need */
-	cd.cd_length = SHA512_DIGEST_LENGTH;
-	cd.cd_raw.iov_base = (char *)raw_local_mac;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_final(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
-
-	return (0);
-
-error:
-	bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
-	bzero(local_mac, ZIO_OBJSET_MAC_LEN);
-	return (ret);
-}
-
-static void
-zio_crypt_destroy_uio(uio_t *uio)
-{
-	if (uio->uio_iov)
-		kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
-}
-
-/*
- * This function parses an uncompressed indirect block and returns a checksum
- * of all the portable fields from all of the contained bps. The portable
- * fields are the MAC and all of the fields from blk_prop except for the dedup,
- * checksum, and psize bits. For an explanation of the purpose of this, see
- * the comment block on object set authentication.
- */
-static int
-zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf,
-    uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum)
-{
-	blkptr_t *bp;
-	int i, epb = datalen >> SPA_BLKPTRSHIFT;
-	SHA2_CTX ctx;
-	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
-
-	/* checksum all of the MACs from the layer below */
-	SHA2Init(SHA512, &ctx);
-	for (i = 0, bp = buf; i < epb; i++, bp++) {
-		zio_crypt_bp_do_indrect_checksum_updates(&ctx, version,
-		    byteswap, bp);
-	}
-	SHA2Final(digestbuf, &ctx);
-
-	if (generate) {
-		bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
-		return (0);
-	}
-
-	if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0)
-		return (SET_ERROR(ECKSUM));
-
-	return (0);
-}
-
-int
-zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
-    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
-{
-	int ret;
-
-	/*
-	 * Unfortunately, callers of this function will not always have
-	 * easy access to the on-disk format version. This info is
-	 * normally found in the DSL Crypto Key, but the checksum-of-MACs
-	 * is expected to be verifiable even when the key isn't loaded.
-	 * Here, instead of doing a ZAP lookup for the version for each
-	 * zio, we simply try both existing formats.
-	 */
-	ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf,
-	    datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum);
-	if (ret == ECKSUM) {
-		ASSERT(!generate);
-		ret = zio_crypt_do_indirect_mac_checksum_impl(generate,
-		    buf, datalen, 0, byteswap, cksum);
-	}
-
-	return (ret);
-}
-
-int
-zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
-    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
-{
-	int ret;
-	void *buf;
-
-	buf = abd_borrow_buf_copy(abd, datalen);
-	ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
-	    byteswap, cksum);
-	abd_return_buf(abd, buf, datalen);
-
-	return (ret);
-}
-
-/*
- * Special case handling routine for encrypting / decrypting ZIL blocks.
- * We do not check for the older ZIL chain because the encryption feature
- * was not available before the newer ZIL chain was introduced. The goal
- * here is to encrypt everything except the blkptr_t of a lr_write_t and
- * the zil_chain_t header. Everything that is not encrypted is authenticated.
- */
-static int
-zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
-    uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
-    uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
-    boolean_t *no_crypt)
-{
-	int ret;
-	uint64_t txtype, lr_len;
-	uint_t nr_src, nr_dst, crypt_len;
-	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
-	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
-	uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
-	zil_chain_t *zilc;
-	lr_t *lr;
-	uint8_t *aadbuf = zio_buf_alloc(datalen);
-
-	/* cipherbuf always needs an extra iovec for the MAC */
-	if (encrypt) {
-		src = plainbuf;
-		dst = cipherbuf;
-		nr_src = 0;
-		nr_dst = 1;
-	} else {
-		src = cipherbuf;
-		dst = plainbuf;
-		nr_src = 1;
-		nr_dst = 0;
-	}
-
-	/* find the start and end record of the log block */
-	zilc = (zil_chain_t *)src;
-	slrp = src + sizeof (zil_chain_t);
-	aadp = aadbuf;
-	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
-
-	/* calculate the number of encrypted iovecs we will need */
-	for (; slrp < blkend; slrp += lr_len) {
-		lr = (lr_t *)slrp;
-
-		if (!byteswap) {
-			txtype = lr->lrc_txtype;
-			lr_len = lr->lrc_reclen;
-		} else {
-			txtype = BSWAP_64(lr->lrc_txtype);
-			lr_len = BSWAP_64(lr->lrc_reclen);
-		}
-
-		nr_iovecs++;
-		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
-			nr_iovecs++;
-	}
-
-	nr_src += nr_iovecs;
-	nr_dst += nr_iovecs;
-
-	/* allocate the iovec arrays */
-	if (nr_src != 0) {
-		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
-		if (src_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	if (nr_dst != 0) {
-		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
-		if (dst_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	/*
-	 * Copy the plain zil header over and authenticate everything except
-	 * the checksum that will store our MAC. If we are writing the data
-	 * the embedded checksum will not have been calculated yet, so we don't
-	 * authenticate that.
-	 */
-	bcopy(src, dst, sizeof (zil_chain_t));
-	bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
-	aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
-	aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
-
-	/* loop over records again, filling in iovecs */
-	nr_iovecs = 0;
-	slrp = src + sizeof (zil_chain_t);
-	dlrp = dst + sizeof (zil_chain_t);
-
-	for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
-		lr = (lr_t *)slrp;
-
-		if (!byteswap) {
-			txtype = lr->lrc_txtype;
-			lr_len = lr->lrc_reclen;
-		} else {
-			txtype = BSWAP_64(lr->lrc_txtype);
-			lr_len = BSWAP_64(lr->lrc_reclen);
-		}
-
-		/* copy the common lr_t */
-		bcopy(slrp, dlrp, sizeof (lr_t));
-		bcopy(slrp, aadp, sizeof (lr_t));
-		aadp += sizeof (lr_t);
-		aad_len += sizeof (lr_t);
-
-		ASSERT3P(src_iovecs, !=, NULL);
-		ASSERT3P(dst_iovecs, !=, NULL);
-
-		/*
-		 * If this is a TX_WRITE record we want to encrypt everything
-		 * except the bp if exists. If the bp does exist we want to
-		 * authenticate it.
-		 */
-		if (txtype == TX_WRITE) {
-			crypt_len = sizeof (lr_write_t) -
-			    sizeof (lr_t) - sizeof (blkptr_t);
-			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
-			src_iovecs[nr_iovecs].iov_len = crypt_len;
-			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
-			dst_iovecs[nr_iovecs].iov_len = crypt_len;
-
-			/* copy the bp now since it will not be encrypted */
-			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    sizeof (blkptr_t));
-			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    aadp, sizeof (blkptr_t));
-			aadp += sizeof (blkptr_t);
-			aad_len += sizeof (blkptr_t);
-			nr_iovecs++;
-			total_len += crypt_len;
-
-			if (lr_len != sizeof (lr_write_t)) {
-				crypt_len = lr_len - sizeof (lr_write_t);
-				src_iovecs[nr_iovecs].iov_base =
-				    slrp + sizeof (lr_write_t);
-				src_iovecs[nr_iovecs].iov_len = crypt_len;
-				dst_iovecs[nr_iovecs].iov_base =
-				    dlrp + sizeof (lr_write_t);
-				dst_iovecs[nr_iovecs].iov_len = crypt_len;
-				nr_iovecs++;
-				total_len += crypt_len;
-			}
-		} else {
-			crypt_len = lr_len - sizeof (lr_t);
-			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
-			src_iovecs[nr_iovecs].iov_len = crypt_len;
-			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
-			dst_iovecs[nr_iovecs].iov_len = crypt_len;
-			nr_iovecs++;
-			total_len += crypt_len;
-		}
-	}
-
-	*no_crypt = (nr_iovecs == 0);
-	*enc_len = total_len;
-	*authbuf = aadbuf;
-	*auth_len = aad_len;
-
-	if (encrypt) {
-		puio->uio_iov = src_iovecs;
-		puio->uio_iovcnt = nr_src;
-		cuio->uio_iov = dst_iovecs;
-		cuio->uio_iovcnt = nr_dst;
-	} else {
-		puio->uio_iov = dst_iovecs;
-		puio->uio_iovcnt = nr_dst;
-		cuio->uio_iov = src_iovecs;
-		cuio->uio_iovcnt = nr_src;
-	}
-
-	return (0);
-
-error:
-	zio_buf_free(aadbuf, datalen);
-	if (src_iovecs != NULL)
-		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
-	if (dst_iovecs != NULL)
-		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
-
-	*enc_len = 0;
-	*authbuf = NULL;
-	*auth_len = 0;
-	*no_crypt = B_FALSE;
-	puio->uio_iov = NULL;
-	puio->uio_iovcnt = 0;
-	cuio->uio_iov = NULL;
-	cuio->uio_iovcnt = 0;
-	return (ret);
-}
-
-/*
- * Special case handling routine for encrypting / decrypting dnode blocks.
- */
-static int
-zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
-    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
-    uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
-    uint_t *auth_len, boolean_t *no_crypt)
-{
-	int ret;
-	uint_t nr_src, nr_dst, crypt_len;
-	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
-	uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
-	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
-	uint8_t *src, *dst, *aadp;
-	dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
-	uint8_t *aadbuf = zio_buf_alloc(datalen);
-
-	if (encrypt) {
-		src = plainbuf;
-		dst = cipherbuf;
-		nr_src = 0;
-		nr_dst = 1;
-	} else {
-		src = cipherbuf;
-		dst = plainbuf;
-		nr_src = 1;
-		nr_dst = 0;
-	}
-
-	sdnp = (dnode_phys_t *)src;
-	ddnp = (dnode_phys_t *)dst;
-	aadp = aadbuf;
-
-	/*
-	 * Count the number of iovecs we will need to do the encryption by
-	 * counting the number of bonus buffers that need to be encrypted.
-	 */
-	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
-		/*
-		 * This block may still be byteswapped. However, all of the
-		 * values we use are either uint8_t's (for which byteswapping
-		 * is a noop) or a * != 0 check, which will work regardless
-		 * of whether or not we byteswap.
-		 */
-		if (sdnp[i].dn_type != DMU_OT_NONE &&
-		    DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
-		    sdnp[i].dn_bonuslen != 0) {
-			nr_iovecs++;
-		}
-	}
-
-	nr_src += nr_iovecs;
-	nr_dst += nr_iovecs;
-
-	if (nr_src != 0) {
-		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
-		if (src_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	if (nr_dst != 0) {
-		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
-		if (dst_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	nr_iovecs = 0;
-
-	/*
-	 * Iterate through the dnodes again, this time filling in the uios
-	 * we allocated earlier. We also concatenate any data we want to
-	 * authenticate onto aadbuf.
-	 */
-	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
-		dnp = &sdnp[i];
-
-		/* copy over the core fields and blkptrs (kept as plaintext) */
-		bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
-
-		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-			bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
-			    sizeof (blkptr_t));
-		}
-
-		/*
-		 * Handle authenticated data. We authenticate everything in
-		 * the dnode that can be brought over when we do a raw send.
-		 * This includes all of the core fields as well as the MACs
-		 * stored in the bp checksums and all of the portable bits
-		 * from blk_prop. We include the dnode padding here in case it
-		 * ever gets used in the future. Some dn_flags and dn_used are
-		 * not portable so we mask those out values out of the
-		 * authenticated data.
-		 */
-		crypt_len = offsetof(dnode_phys_t, dn_blkptr);
-		bcopy(dnp, aadp, crypt_len);
-		adnp = (dnode_phys_t *)aadp;
-		adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
-		adnp->dn_used = 0;
-		aadp += crypt_len;
-		aad_len += crypt_len;
-
-		for (j = 0; j < dnp->dn_nblkptr; j++) {
-			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
-			    version, byteswap, &dnp->dn_blkptr[j]);
-		}
-
-		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
-			    version, byteswap, DN_SPILL_BLKPTR(dnp));
-		}
-
-		/*
-		 * If this bonus buffer needs to be encrypted, we prepare an
-		 * iovec_t. The encryption / decryption functions will fill
-		 * this in for us with the encrypted or decrypted data.
-		 * Otherwise we add the bonus buffer to the authenticated
-		 * data buffer and copy it over to the destination. The
-		 * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
-		 * we can guarantee alignment with the AES block size
-		 * (128 bits).
-		 */
-		crypt_len = DN_MAX_BONUS_LEN(dnp);
-		if (dnp->dn_type != DMU_OT_NONE &&
-		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
-		    dnp->dn_bonuslen != 0) {
-			ASSERT3U(nr_iovecs, <, nr_src);
-			ASSERT3U(nr_iovecs, <, nr_dst);
-			ASSERT3P(src_iovecs, !=, NULL);
-			ASSERT3P(dst_iovecs, !=, NULL);
-			src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp);
-			src_iovecs[nr_iovecs].iov_len = crypt_len;
-			dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]);
-			dst_iovecs[nr_iovecs].iov_len = crypt_len;
-
-			nr_iovecs++;
-			total_len += crypt_len;
-		} else {
-			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
-			bcopy(DN_BONUS(dnp), aadp, crypt_len);
-			aadp += crypt_len;
-			aad_len += crypt_len;
-		}
-	}
-
-	*no_crypt = (nr_iovecs == 0);
-	*enc_len = total_len;
-	*authbuf = aadbuf;
-	*auth_len = aad_len;
-
-	if (encrypt) {
-		puio->uio_iov = src_iovecs;
-		puio->uio_iovcnt = nr_src;
-		cuio->uio_iov = dst_iovecs;
-		cuio->uio_iovcnt = nr_dst;
-	} else {
-		puio->uio_iov = dst_iovecs;
-		puio->uio_iovcnt = nr_dst;
-		cuio->uio_iov = src_iovecs;
-		cuio->uio_iovcnt = nr_src;
-	}
-
-	return (0);
-
-error:
-	zio_buf_free(aadbuf, datalen);
-	if (src_iovecs != NULL)
-		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
-	if (dst_iovecs != NULL)
-		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
-
-	*enc_len = 0;
-	*authbuf = NULL;
-	*auth_len = 0;
-	*no_crypt = B_FALSE;
-	puio->uio_iov = NULL;
-	puio->uio_iovcnt = 0;
-	cuio->uio_iov = NULL;
-	cuio->uio_iovcnt = 0;
-	return (ret);
-}
-
-static int
-zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
-    uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio,
-    uint_t *enc_len)
-{
-	int ret;
-	uint_t nr_plain = 1, nr_cipher = 2;
-	iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL;
-
-	/* allocate the iovecs for the plain and cipher data */
-	plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t),
-	    KM_SLEEP);
-	if (!plain_iovecs) {
-		ret = SET_ERROR(ENOMEM);
-		goto error;
-	}
-
-	cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t),
-	    KM_SLEEP);
-	if (!cipher_iovecs) {
-		ret = SET_ERROR(ENOMEM);
-		goto error;
-	}
-
-	plain_iovecs[0].iov_base = plainbuf;
-	plain_iovecs[0].iov_len = datalen;
-	cipher_iovecs[0].iov_base = cipherbuf;
-	cipher_iovecs[0].iov_len = datalen;
-
-	*enc_len = datalen;
-	puio->uio_iov = plain_iovecs;
-	puio->uio_iovcnt = nr_plain;
-	cuio->uio_iov = cipher_iovecs;
-	cuio->uio_iovcnt = nr_cipher;
-
-	return (0);
-
-error:
-	if (plain_iovecs != NULL)
-		kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t));
-	if (cipher_iovecs != NULL)
-		kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
-
-	*enc_len = 0;
-	puio->uio_iov = NULL;
-	puio->uio_iovcnt = 0;
-	cuio->uio_iov = NULL;
-	cuio->uio_iovcnt = 0;
-	return (ret);
-}
-
-/*
- * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
- * that they can be used for encryption and decryption by zio_do_crypt_uio().
- * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
- * requiring special handling to parse out pieces that are to be encrypted. The
- * authbuf is used by these special cases to store additional authenticated
- * data (AAD) for the encryption modes.
- */
-static int
-zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
-    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
-    uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
-    uint_t *auth_len, boolean_t *no_crypt)
-{
-	int ret;
-	iovec_t *mac_iov;
-
-	ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
-
-	/* route to handler */
-	switch (ot) {
-	case DMU_OT_INTENT_LOG:
-		ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
-		    datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
-		    no_crypt);
-		break;
-	case DMU_OT_DNODE:
-		ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf,
-		    cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf,
-		    auth_len, no_crypt);
-		break;
-	default:
-		ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
-		    datalen, puio, cuio, enc_len);
-		*authbuf = NULL;
-		*auth_len = 0;
-		*no_crypt = B_FALSE;
-		break;
-	}
-
-	if (ret != 0)
-		goto error;
-
-	/* populate the uios */
-	puio->uio_segflg = UIO_SYSSPACE;
-	cuio->uio_segflg = UIO_SYSSPACE;
-
-	mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
-	mac_iov->iov_base = mac;
-	mac_iov->iov_len = ZIO_DATA_MAC_LEN;
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-/*
- * Primary encryption / decryption entrypoint for zio data.
- */
-int
-zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
-    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
-    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
-    boolean_t *no_crypt)
-{
-	int ret;
-	boolean_t locked = B_FALSE;
-	uint64_t crypt = key->zk_crypt;
-	uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
-	uint_t enc_len, auth_len;
-	uio_t puio, cuio;
-	uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
-	crypto_key_t tmp_ckey, *ckey = NULL;
-	crypto_ctx_template_t tmpl;
-	uint8_t *authbuf = NULL;
-
-	/*
-	 * If the needed key is the current one, just use it. Otherwise we
-	 * need to generate a temporary one from the given salt + master key.
-	 * If we are encrypting, we must return a copy of the current salt
-	 * so that it can be stored in the blkptr_t.
-	 */
-	rw_enter(&key->zk_salt_lock, RW_READER);
-	locked = B_TRUE;
-
-	if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
-		ckey = &key->zk_current_key;
-		tmpl = key->zk_current_tmpl;
-	} else {
-		rw_exit(&key->zk_salt_lock);
-		locked = B_FALSE;
-
-		ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-		    salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
-		if (ret != 0)
-			goto error;
-
-		tmp_ckey.ck_format = CRYPTO_KEY_RAW;
-		tmp_ckey.ck_data = enc_keydata;
-		tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len);
-
-		ckey = &tmp_ckey;
-		tmpl = NULL;
-	}
-
-	/*
-	 * Attempt to use QAT acceleration if we can. We currently don't
-	 * do this for metadnode and ZIL blocks, since they have a much
-	 * more involved buffer layout and the qat_crypt() function only
-	 * works in-place.
-	 */
-	if (qat_crypt_use_accel(datalen) &&
-	    ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) {
-		uint8_t *srcbuf, *dstbuf;
-
-		if (encrypt) {
-			srcbuf = plainbuf;
-			dstbuf = cipherbuf;
-		} else {
-			srcbuf = cipherbuf;
-			dstbuf = plainbuf;
-		}
-
-		ret = qat_crypt((encrypt) ? QAT_ENCRYPT : QAT_DECRYPT, srcbuf,
-		    dstbuf, NULL, 0, iv, mac, ckey, key->zk_crypt, datalen);
-		if (ret == CPA_STATUS_SUCCESS) {
-			if (locked) {
-				rw_exit(&key->zk_salt_lock);
-				locked = B_FALSE;
-			}
-
-			return (0);
-		}
-		/* If the hardware implementation fails fall back to software */
-	}
-
-	bzero(&puio, sizeof (uio_t));
-	bzero(&cuio, sizeof (uio_t));
-
-	/* create uios for encryption */
-	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
-	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
-	    &authbuf, &auth_len, no_crypt);
-	if (ret != 0)
-		goto error;
-
-	/* perform the encryption / decryption in software */
-	ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
-	    &puio, &cuio, authbuf, auth_len);
-	if (ret != 0)
-		goto error;
-
-	if (locked) {
-		rw_exit(&key->zk_salt_lock);
-		locked = B_FALSE;
-	}
-
-	if (authbuf != NULL)
-		zio_buf_free(authbuf, datalen);
-	if (ckey == &tmp_ckey)
-		bzero(enc_keydata, keydata_len);
-	zio_crypt_destroy_uio(&puio);
-	zio_crypt_destroy_uio(&cuio);
-
-	return (0);
-
-error:
-	if (locked)
-		rw_exit(&key->zk_salt_lock);
-	if (authbuf != NULL)
-		zio_buf_free(authbuf, datalen);
-	if (ckey == &tmp_ckey)
-		bzero(enc_keydata, keydata_len);
-	zio_crypt_destroy_uio(&puio);
-	zio_crypt_destroy_uio(&cuio);
-
-	return (ret);
-}
-
-/*
- * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
- * linear buffers.
- */
-int
-zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot,
-    boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac,
-    uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
-{
-	int ret;
-	void *ptmp, *ctmp;
-
-	if (encrypt) {
-		ptmp = abd_borrow_buf_copy(pabd, datalen);
-		ctmp = abd_borrow_buf(cabd, datalen);
-	} else {
-		ptmp = abd_borrow_buf(pabd, datalen);
-		ctmp = abd_borrow_buf_copy(cabd, datalen);
-	}
-
-	ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac,
-	    datalen, ptmp, ctmp, no_crypt);
-	if (ret != 0)
-		goto error;
-
-	if (encrypt) {
-		abd_return_buf(pabd, ptmp, datalen);
-		abd_return_buf_copy(cabd, ctmp, datalen);
-	} else {
-		abd_return_buf_copy(pabd, ptmp, datalen);
-		abd_return_buf(cabd, ctmp, datalen);
-	}
-
-	return (0);
-
-error:
-	if (encrypt) {
-		abd_return_buf(pabd, ptmp, datalen);
-		abd_return_buf_copy(cabd, ctmp, datalen);
-	} else {
-		abd_return_buf_copy(pabd, ptmp, datalen);
-		abd_return_buf(cabd, ctmp, datalen);
-	}
-
-	return (ret);
-}
-
-#if defined(_KERNEL)
-/* BEGIN CSTYLED */
-module_param(zfs_key_max_salt_uses, ulong, 0644);
-MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value "
-	"can be used for generating encryption keys before it is rotated");
-/* END CSTYLED */
-#endif

diff --git a/zfs/module/zfs/zio_inject.c b/zfs/module/zfs/zio_inject.c
index d8af503..feaf41d 100644
--- a/zfs/module/zfs/zio_inject.c
+++ b/zfs/module/zfs/zio_inject.c

@@ -117,7 +117,7 @@
 	 */
 	uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
 
-	return (spa_get_random(maximum) < frequency);
+	return (random_in_range(maximum) < frequency);
 }
 
 /*
@@ -265,6 +265,12 @@
 	if (zio->io_type != ZIO_TYPE_READ)
 		return (0);
 
+	/*
+	 * A rebuild I/O has no checksum to verify.
+	 */
+	if (zio->io_priority == ZIO_PRIORITY_REBUILD && error == ECKSUM)
+		return (0);
+
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
@@ -339,14 +345,14 @@
 static int
 zio_inject_bitflip_cb(void *data, size_t len, void *private)
 {
-	ASSERTV(zio_t *zio = private);
+	zio_t *zio __maybe_unused = private;
 	uint8_t *buffer = data;
-	uint_t byte = spa_get_random(len);
+	uint_t byte = random_in_range(len);
 
 	ASSERT(zio->io_type == ZIO_TYPE_READ);
 
 	/* flip a single random bit in an abd data buffer */
-	buffer[byte] ^= 1 << spa_get_random(8);
+	buffer[byte] ^= 1 << random_in_range(8);
 
 	return (1);	/* stop after first flip */
 }
@@ -487,7 +493,7 @@
 		}
 
 		/* Have a "problem" writing 60% of the time */
-		if (spa_get_random(100) < 60)
+		if (random_in_range(100) < 60)
 			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
 		break;
 	}

diff --git a/zfs/module/zfs/zle.c b/zfs/module/zfs/zle.c
index 613607f..0decebb 100644
--- a/zfs/module/zfs/zle.c
+++ b/zfs/module/zfs/zle.c

@@ -32,6 +32,7 @@
  */
 #include <sys/types.h>
 #include <sys/sysmacros.h>
+#include <sys/zio_compress.h>
 
 size_t
 zle_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)

diff --git a/zfs/module/zfs/zmoddbg.h b/zfs/module/zfs/zmoddbg.h
new file mode 100644
index 0000000..c5b0cd8
--- /dev/null
+++ b/zfs/module/zfs/zmoddbg.h

@@ -0,0 +1,27 @@
+#ifndef _ZMODDBG_H_
+#define _ZMODDBG_H_
+
+
+// #define __ZMODDBG__
+// #define __ZMOD_KDEBUG__
+// #define __ZMOD_KTRACE__
+
+#ifdef __ZMODDBG__
+ #ifdef __ZMOD_KDEBUG__
+  #define KDEBUG(msg, ...) cmn_err(CE_WARN, "KDEBUG %s(%d) %s " msg , __FILE__, __LINE__, __FUNCTION__ __VA_OPT__(,) __VA_ARGS__ )
+ #endif
+
+ #ifdef __ZMOD_KTRACE__
+  #define KTRACE() cmn_err(CE_WARN, "KTRACE %s(%d) %s", __FILE__, __LINE__, __FUNCTION__)
+ #endif
+#endif
+
+#ifndef KDEBUG
+ #define KDEBUG(msg,...)
+#endif
+#ifndef KTRACE
+ #define KTRACE()
+#endif
+
+
+#endif

diff --git a/zfs/module/zfs/zpl_ctldir.c b/zfs/module/zfs/zpl_ctldir.c
deleted file mode 100644
index 6df367b..0000000
--- a/zfs/module/zfs/zpl_ctldir.c
+++ /dev/null

@@ -1,572 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * LLNL-CODE-403049.
- * Rewritten for Linux by:
- *   Rohan Puri <rohan.puri15@gmail.com>
- *   Brian Behlendorf <behlendorf1@llnl.gov>
- */
-
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zpl.h>
-
-/*
- * Common open routine.  Disallow any write access.
- */
-/* ARGSUSED */
-static int
-zpl_common_open(struct inode *ip, struct file *filp)
-{
-	if (filp->f_mode & FMODE_WRITE)
-		return (-EACCES);
-
-	return (generic_file_open(ip, filp));
-}
-
-/*
- * Get root directory contents.
- */
-static int
-zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (!zpl_dir_emit_dots(filp, ctx))
-		goto out;
-
-	if (ctx->pos == 2) {
-		if (!zpl_dir_emit(ctx, ZFS_SNAPDIR_NAME,
-		    strlen(ZFS_SNAPDIR_NAME), ZFSCTL_INO_SNAPDIR, DT_DIR))
-			goto out;
-
-		ctx->pos++;
-	}
-
-	if (ctx->pos == 3) {
-		if (!zpl_dir_emit(ctx, ZFS_SHAREDIR_NAME,
-		    strlen(ZFS_SHAREDIR_NAME), ZFSCTL_INO_SHARES, DT_DIR))
-			goto out;
-
-		ctx->pos++;
-	}
-out:
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_root_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-/*
- * Get root directory attributes.
- */
-/* ARGSUSED */
-static int
-zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
-    u32 request_mask, unsigned int query_flags)
-{
-	struct inode *ip = path->dentry->d_inode;
-
-	generic_fillattr(ip, stat);
-	stat->atime = current_time(ip);
-
-	return (0);
-}
-ZPL_GETATTR_WRAPPER(zpl_root_getattr);
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
-#else
-zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
-#endif
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	int error;
-
-	crhold(cr);
-	error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	if (error) {
-		if (error == -ENOENT)
-			return (d_splice_alias(NULL, dentry));
-		else
-			return (ERR_PTR(error));
-	}
-
-	return (d_splice_alias(ip, dentry));
-}
-
-/*
- * The '.zfs' control directory file and inode operations.
- */
-const struct file_operations zpl_fops_root = {
-	.open		= zpl_common_open,
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#ifdef HAVE_VFS_ITERATE_SHARED
-	.iterate_shared	= zpl_root_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_root_iterate,
-#else
-	.readdir	= zpl_root_readdir,
-#endif
-};
-
-const struct inode_operations zpl_ops_root = {
-	.lookup		= zpl_root_lookup,
-	.getattr	= zpl_root_getattr,
-};
-
-#ifdef HAVE_AUTOMOUNT
-static struct vfsmount *
-zpl_snapdir_automount(struct path *path)
-{
-	int error;
-
-	error = -zfsctl_snapshot_mount(path, 0);
-	if (error)
-		return (ERR_PTR(error));
-
-	/*
-	 * Rather than returning the new vfsmount for the snapshot we must
-	 * return NULL to indicate a mount collision.  This is done because
-	 * the user space mount calls do_add_mount() which adds the vfsmount
-	 * to the name space.  If we returned the new mount here it would be
-	 * added again to the vfsmount list resulting in list corruption.
-	 */
-	return (NULL);
-}
-#endif /* HAVE_AUTOMOUNT */
-
-/*
- * Negative dentries must always be revalidated so newly created snapshots
- * can be detected and automounted.  Normal dentries should be kept because
- * as of the 3.18 kernel revaliding the mountpoint dentry will result in
- * the snapshot being immediately unmounted.
- */
-static int
-#ifdef HAVE_D_REVALIDATE_NAMEIDATA
-zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
-#else
-zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
-#endif
-{
-	return (!!dentry->d_inode);
-}
-
-dentry_operations_t zpl_dops_snapdirs = {
-/*
- * Auto mounting of snapshots is only supported for 2.6.37 and
- * newer kernels.  Prior to this kernel the ops->follow_link()
- * callback was used as a hack to trigger the mount.  The
- * resulting vfsmount was then explicitly grafted in to the
- * name space.  While it might be possible to add compatibility
- * code to accomplish this it would require considerable care.
- */
-#ifdef HAVE_AUTOMOUNT
-	.d_automount	= zpl_snapdir_automount,
-#endif /* HAVE_AUTOMOUNT */
-	.d_revalidate	= zpl_snapdir_revalidate,
-};
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
-    struct nameidata *nd)
-#else
-zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
-    unsigned int flags)
-#endif
-
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	struct inode *ip = NULL;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
-	    0, cr, NULL, NULL);
-	ASSERT3S(error, <=, 0);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	if (error && error != -ENOENT)
-		return (ERR_PTR(error));
-
-	ASSERT(error == 0 || ip == NULL);
-	d_clear_d_op(dentry);
-	d_set_d_op(dentry, &zpl_dops_snapdirs);
-#ifdef HAVE_AUTOMOUNT
-	dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-#endif
-
-	return (d_splice_alias(ip, dentry));
-}
-
-static int
-zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
-	fstrans_cookie_t cookie;
-	char snapname[MAXNAMELEN];
-	boolean_t case_conflict;
-	uint64_t id, pos;
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-	cookie = spl_fstrans_mark();
-
-	if (!zpl_dir_emit_dots(filp, ctx))
-		goto out;
-
-	pos = ctx->pos;
-	while (error == 0) {
-		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
-		error = -dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
-		    snapname, &id, &pos, &case_conflict);
-		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
-		if (error)
-			goto out;
-
-		if (!zpl_dir_emit(ctx, snapname, strlen(snapname),
-		    ZFSCTL_INO_SHARES - id, DT_DIR))
-			goto out;
-
-		ctx->pos = pos;
-	}
-out:
-	spl_fstrans_unmark(cookie);
-	ZFS_EXIT(zfsvfs);
-
-	if (error == -ENOENT)
-		return (0);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_snapdir_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-static int
-zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
-{
-	cred_t *cr = CRED();
-	int error;
-
-	/* We probably don't want to support renameat2(2) in ctldir */
-	if (flags)
-		return (-EINVAL);
-
-	crhold(cr);
-	error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
-	    tdip, dname(tdentry), cr, 0);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	return (error);
-}
-
-#ifndef HAVE_RENAME_WANTS_FLAGS
-static int
-zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry)
-{
-	return (zpl_snapdir_rename2(sdip, sdentry, tdip, tdentry, 0));
-}
-#endif
-
-static int
-zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	int error;
-
-	crhold(cr);
-	error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	return (error);
-}
-
-static int
-zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
-{
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	struct inode *ip;
-	int error;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
-
-	error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
-	if (error == 0) {
-		d_clear_d_op(dentry);
-		d_set_d_op(dentry, &zpl_dops_snapdirs);
-		d_instantiate(dentry, ip);
-	}
-
-	kmem_free(vap, sizeof (vattr_t));
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	return (error);
-}
-
-/*
- * Get snapshot directory attributes.
- */
-/* ARGSUSED */
-static int
-zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
-    u32 request_mask, unsigned int query_flags)
-{
-	struct inode *ip = path->dentry->d_inode;
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	generic_fillattr(ip, stat);
-
-	stat->nlink = stat->size = 2;
-	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
-	stat->atime = current_time(ip);
-	ZFS_EXIT(zfsvfs);
-
-	return (0);
-}
-ZPL_GETATTR_WRAPPER(zpl_snapdir_getattr);
-
-/*
- * The '.zfs/snapshot' directory file operations.  These mainly control
- * generating the list of available snapshots when doing an 'ls' in the
- * directory.  See zpl_snapdir_readdir().
- */
-const struct file_operations zpl_fops_snapdir = {
-	.open		= zpl_common_open,
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#ifdef HAVE_VFS_ITERATE_SHARED
-	.iterate_shared	= zpl_snapdir_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_snapdir_iterate,
-#else
-	.readdir	= zpl_snapdir_readdir,
-#endif
-
-};
-
-/*
- * The '.zfs/snapshot' directory inode operations.  These mainly control
- * creating an inode for a snapshot directory and initializing the needed
- * infrastructure to automount the snapshot.  See zpl_snapdir_lookup().
- */
-const struct inode_operations zpl_ops_snapdir = {
-	.lookup		= zpl_snapdir_lookup,
-	.getattr	= zpl_snapdir_getattr,
-#ifdef HAVE_RENAME_WANTS_FLAGS
-	.rename		= zpl_snapdir_rename2,
-#else
-	.rename		= zpl_snapdir_rename,
-#endif
-	.rmdir		= zpl_snapdir_rmdir,
-	.mkdir		= zpl_snapdir_mkdir,
-};
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
-    struct nameidata *nd)
-#else
-zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
-    unsigned int flags)
-#endif
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	struct inode *ip = NULL;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
-	    0, cr, NULL, NULL);
-	ASSERT3S(error, <=, 0);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	if (error) {
-		if (error == -ENOENT)
-			return (d_splice_alias(NULL, dentry));
-		else
-			return (ERR_PTR(error));
-	}
-
-	return (d_splice_alias(ip, dentry));
-}
-
-static int
-zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
-	znode_t *dzp;
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-	cookie = spl_fstrans_mark();
-
-	if (zfsvfs->z_shares_dir == 0) {
-		zpl_dir_emit_dots(filp, ctx);
-		goto out;
-	}
-
-	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
-	if (error)
-		goto out;
-
-	crhold(cr);
-	error = -zfs_readdir(ZTOI(dzp), ctx, cr);
-	crfree(cr);
-
-	iput(ZTOI(dzp));
-out:
-	spl_fstrans_unmark(cookie);
-	ZFS_EXIT(zfsvfs);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_shares_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-/* ARGSUSED */
-static int
-zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
-    u32 request_mask, unsigned int query_flags)
-{
-	struct inode *ip = path->dentry->d_inode;
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	znode_t *dzp;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsvfs->z_shares_dir == 0) {
-		generic_fillattr(path->dentry->d_inode, stat);
-		stat->nlink = stat->size = 2;
-		stat->atime = current_time(ip);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
-	if (error == 0) {
-		error = -zfs_getattr_fast(ZTOI(dzp), stat);
-		iput(ZTOI(dzp));
-	}
-
-	ZFS_EXIT(zfsvfs);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-ZPL_GETATTR_WRAPPER(zpl_shares_getattr);
-
-/*
- * The '.zfs/shares' directory file operations.
- */
-const struct file_operations zpl_fops_shares = {
-	.open		= zpl_common_open,
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#ifdef HAVE_VFS_ITERATE_SHARED
-	.iterate_shared	= zpl_shares_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_shares_iterate,
-#else
-	.readdir	= zpl_shares_readdir,
-#endif
-
-};
-
-/*
- * The '.zfs/shares' directory inode operations.
- */
-const struct inode_operations zpl_ops_shares = {
-	.lookup		= zpl_shares_lookup,
-	.getattr	= zpl_shares_getattr,
-};

diff --git a/zfs/module/zfs/zpl_export.c b/zfs/module/zfs/zpl_export.c
deleted file mode 100644
index a264d66..0000000
--- a/zfs/module/zfs/zpl_export.c
+++ /dev/null

@@ -1,177 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011 Gunnar Beutner
- * Copyright (c) 2012 Cyril Plisko. All rights reserved.
- */
-
-
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zpl.h>
-
-
-static int
-#ifdef HAVE_ENCODE_FH_WITH_INODE
-zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
-{
-#else
-zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
-{
-	/* CSTYLED */
-	struct inode *ip = dentry->d_inode;
-#endif /* HAVE_ENCODE_FH_WITH_INODE */
-	fstrans_cookie_t cookie;
-	fid_t *fid = (fid_t *)fh;
-	int len_bytes, rc;
-
-	len_bytes = *max_len * sizeof (__u32);
-
-	if (len_bytes < offsetof(fid_t, fid_data))
-		return (255);
-
-	fid->fid_len = len_bytes - offsetof(fid_t, fid_data);
-	cookie = spl_fstrans_mark();
-
-	if (zfsctl_is_node(ip))
-		rc = zfsctl_fid(ip, fid);
-	else
-		rc = zfs_fid(ip, fid);
-
-	spl_fstrans_unmark(cookie);
-	len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
-	*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
-
-	return (rc == 0 ? FILEID_INO32_GEN : 255);
-}
-
-static struct dentry *
-zpl_dentry_obtain_alias(struct inode *ip)
-{
-	struct dentry *result;
-
-#ifdef HAVE_D_OBTAIN_ALIAS
-	result = d_obtain_alias(ip);
-#else
-	result = d_alloc_anon(ip);
-
-	if (result == NULL) {
-		iput(ip);
-		result = ERR_PTR(-ENOMEM);
-	}
-#endif /* HAVE_D_OBTAIN_ALIAS */
-
-	return (result);
-}
-
-static struct dentry *
-zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
-    int fh_len, int fh_type)
-{
-	fid_t *fid = (fid_t *)fh;
-	fstrans_cookie_t cookie;
-	struct inode *ip;
-	int len_bytes, rc;
-
-	len_bytes = fh_len * sizeof (__u32);
-
-	if (fh_type != FILEID_INO32_GEN ||
-	    len_bytes < offsetof(fid_t, fid_data) ||
-	    len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
-		return (ERR_PTR(-EINVAL));
-
-	cookie = spl_fstrans_mark();
-	rc = zfs_vget(sb, &ip, fid);
-	spl_fstrans_unmark(cookie);
-
-	if (rc) {
-		/*
-		 * If we see ENOENT it might mean that an NFSv4 * client
-		 * is using a cached inode value in a file handle and
-		 * that the sought after file has had its inode changed
-		 * by a third party.  So change the error to ESTALE
-		 * which will trigger a full lookup by the client and
-		 * will find the new filename/inode pair if it still
-		 * exists.
-		 */
-		if (rc == ENOENT)
-			rc = ESTALE;
-
-		return (ERR_PTR(-rc));
-	}
-
-	ASSERT((ip != NULL) && !IS_ERR(ip));
-
-	return (zpl_dentry_obtain_alias(ip));
-}
-
-static struct dentry *
-zpl_get_parent(struct dentry *child)
-{
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	struct inode *ip;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_lookup(child->d_inode, "..", &ip, 0, cr, NULL, NULL);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	if (error)
-		return (ERR_PTR(error));
-
-	return (zpl_dentry_obtain_alias(ip));
-}
-
-#ifdef HAVE_COMMIT_METADATA
-static int
-zpl_commit_metadata(struct inode *inode)
-{
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int error;
-
-	if (zfsctl_is_node(inode))
-		return (0);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(inode, 0, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-#endif /* HAVE_COMMIT_METADATA */
-
-const struct export_operations zpl_export_operations = {
-	.encode_fh		= zpl_encode_fh,
-	.fh_to_dentry		= zpl_fh_to_dentry,
-	.get_parent		= zpl_get_parent,
-#ifdef HAVE_COMMIT_METADATA
-	.commit_metadata	= zpl_commit_metadata,
-#endif /* HAVE_COMMIT_METADATA */
-};

diff --git a/zfs/module/zfs/zpl_file.c b/zfs/module/zfs/zpl_file.c
deleted file mode 100644
index acad467..0000000
--- a/zfs/module/zfs/zpl_file.c
+++ /dev/null

@@ -1,1075 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- */
-
-
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-#endif
-#include <sys/file.h>
-#include <sys/dmu_objset.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_project.h>
-
-
-static int
-zpl_open(struct inode *ip, struct file *filp)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	error = generic_file_open(ip, filp);
-	if (error)
-		return (error);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_release(struct inode *ip, struct file *filp)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	if (ITOZ(ip)->z_atime_dirty)
-		zfs_mark_inode_dirty(ip);
-
-	crhold(cr);
-	error = -zfs_close(ip, filp->f_flags, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_readdir(file_inode(filp), ctx, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-#if defined(HAVE_FSYNC_WITH_DENTRY)
-/*
- * Linux 2.6.x - 2.6.34 API,
- * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
- * to the fops->fsync() hook.  For this reason, we must be careful not to
- * use filp unconditionally.
- */
-static int
-zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(dentry->d_inode, datasync, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_FILE_AIO_FSYNC
-static int
-zpl_aio_fsync(struct kiocb *kiocb, int datasync)
-{
-	struct file *filp = kiocb->ki_filp;
-	return (zpl_fsync(filp, file_dentry(filp), datasync));
-}
-#endif
-
-#elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
-/*
- * Linux 2.6.35 - 3.0 API,
- * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
- * redundant.  The dentry is still accessible via filp->f_path.dentry,
- * and we are guaranteed that filp will never be NULL.
- */
-static int
-zpl_fsync(struct file *filp, int datasync)
-{
-	struct inode *inode = filp->f_mapping->host;
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(inode, datasync, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_FILE_AIO_FSYNC
-static int
-zpl_aio_fsync(struct kiocb *kiocb, int datasync)
-{
-	return (zpl_fsync(kiocb->ki_filp, datasync));
-}
-#endif
-
-#elif defined(HAVE_FSYNC_RANGE)
-/*
- * Linux 3.1 - 3.x API,
- * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
- * been pushed down in to the .fsync() vfs hook.  Additionally, the i_mutex
- * lock is no longer held by the caller, for zfs we don't require the lock
- * to be held so we don't acquire it.
- */
-static int
-zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
-{
-	struct inode *inode = filp->f_mapping->host;
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (error)
-		return (error);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(inode, datasync, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_FILE_AIO_FSYNC
-static int
-zpl_aio_fsync(struct kiocb *kiocb, int datasync)
-{
-	return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
-}
-#endif
-
-#else
-#error "Unsupported fops->fsync() implementation"
-#endif
-
-static inline int
-zfs_io_flags(struct kiocb *kiocb)
-{
-	int flags = 0;
-
-#if defined(IOCB_DSYNC)
-	if (kiocb->ki_flags & IOCB_DSYNC)
-		flags |= FDSYNC;
-#endif
-#if defined(IOCB_SYNC)
-	if (kiocb->ki_flags & IOCB_SYNC)
-		flags |= FSYNC;
-#endif
-#if defined(IOCB_APPEND)
-	if (kiocb->ki_flags & IOCB_APPEND)
-		flags |= FAPPEND;
-#endif
-#if defined(IOCB_DIRECT)
-	if (kiocb->ki_flags & IOCB_DIRECT)
-		flags |= FDIRECT;
-#endif
-	return (flags);
-}
-
-static ssize_t
-zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
-    unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
-    cred_t *cr, size_t skip)
-{
-	ssize_t read;
-	uio_t uio = { { 0 }, 0 };
-	int error;
-	fstrans_cookie_t cookie;
-
-	uio.uio_iov = iovp;
-	uio.uio_iovcnt = nr_segs;
-	uio.uio_loffset = *ppos;
-	uio.uio_segflg = segment;
-	uio.uio_limit = MAXOFFSET_T;
-	uio.uio_resid = count;
-	uio.uio_skip = skip;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_read(ip, &uio, flags, cr);
-	spl_fstrans_unmark(cookie);
-	if (error < 0)
-		return (error);
-
-	read = count - uio.uio_resid;
-	*ppos += read;
-
-	return (read);
-}
-
-inline ssize_t
-zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
-    uio_seg_t segment, int flags, cred_t *cr)
-{
-	struct iovec iov;
-
-	iov.iov_base = (void *)buf;
-	iov.iov_len = len;
-
-	return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
-	    flags, cr, 0));
-}
-
-static ssize_t
-zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
-{
-	cred_t *cr = CRED();
-	struct file *filp = kiocb->ki_filp;
-	struct inode *ip = filp->f_mapping->host;
-	zfsvfs_t *zfsvfs = ZTOZSB(ITOZ(ip));
-	ssize_t read;
-	unsigned int f_flags = filp->f_flags;
-
-	f_flags |= zfs_io_flags(kiocb);
-	crhold(cr);
-	read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
-	    nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
-	crfree(cr);
-
-	/*
-	 * If relatime is enabled, call file_accessed() only if
-	 * zfs_relatime_need_update() is true.  This is needed since datasets
-	 * with inherited "relatime" property aren't necessarily mounted with
-	 * MNT_RELATIME flag (e.g. after `zfs set relatime=...`), which is what
-	 * relatime test in VFS by relatime_need_update() is based on.
-	 */
-	if (!IS_NOATIME(ip) && zfsvfs->z_relatime) {
-		if (zfs_relatime_need_update(ip))
-			file_accessed(filp);
-	} else {
-		file_accessed(filp);
-	}
-
-	return (read);
-}
-
-#if defined(HAVE_VFS_RW_ITERATE)
-static ssize_t
-zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
-{
-	ssize_t ret;
-	uio_seg_t seg = UIO_USERSPACE;
-	if (to->type & ITER_KVEC)
-		seg = UIO_SYSSPACE;
-	if (to->type & ITER_BVEC)
-		seg = UIO_BVEC;
-	ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
-	    iov_iter_count(to), seg, to->iov_offset);
-	if (ret > 0)
-		iov_iter_advance(to, ret);
-	return (ret);
-}
-#else
-static ssize_t
-zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, loff_t pos)
-{
-	ssize_t ret;
-	size_t count;
-
-	ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
-	if (ret)
-		return (ret);
-
-	return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
-	    UIO_USERSPACE, 0));
-}
-#endif /* HAVE_VFS_RW_ITERATE */
-
-static ssize_t
-zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
-    unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
-    cred_t *cr, size_t skip)
-{
-	ssize_t wrote;
-	uio_t uio = { { 0 }, 0 };
-	int error;
-	fstrans_cookie_t cookie;
-
-	if (flags & O_APPEND)
-		*ppos = i_size_read(ip);
-
-	uio.uio_iov = iovp;
-	uio.uio_iovcnt = nr_segs;
-	uio.uio_loffset = *ppos;
-	uio.uio_segflg = segment;
-	uio.uio_limit = MAXOFFSET_T;
-	uio.uio_resid = count;
-	uio.uio_skip = skip;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_write(ip, &uio, flags, cr);
-	spl_fstrans_unmark(cookie);
-	if (error < 0)
-		return (error);
-
-	wrote = count - uio.uio_resid;
-	*ppos += wrote;
-
-	return (wrote);
-}
-
-inline ssize_t
-zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
-    uio_seg_t segment, int flags, cred_t *cr)
-{
-	struct iovec iov;
-
-	iov.iov_base = (void *)buf;
-	iov.iov_len = len;
-
-	return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
-	    flags, cr, 0));
-}
-
-static ssize_t
-zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
-{
-	cred_t *cr = CRED();
-	struct file *filp = kiocb->ki_filp;
-	ssize_t wrote;
-	unsigned int f_flags = filp->f_flags;
-
-	f_flags |= zfs_io_flags(kiocb);
-	crhold(cr);
-	wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
-	    nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
-	crfree(cr);
-
-	return (wrote);
-}
-
-#if defined(HAVE_VFS_RW_ITERATE)
-static ssize_t
-zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
-{
-	size_t count;
-	ssize_t ret;
-	uio_seg_t seg = UIO_USERSPACE;
-
-#ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
-	struct file *file = kiocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *ip = mapping->host;
-	int isblk = S_ISBLK(ip->i_mode);
-
-	count = iov_iter_count(from);
-	ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
-	if (ret)
-		return (ret);
-#else
-	/*
-	 * XXX - ideally this check should be in the same lock region with
-	 * write operations, so that there's no TOCTTOU race when doing
-	 * append and someone else grow the file.
-	 */
-	ret = generic_write_checks(kiocb, from);
-	if (ret <= 0)
-		return (ret);
-	count = ret;
-#endif
-
-	if (from->type & ITER_KVEC)
-		seg = UIO_SYSSPACE;
-	if (from->type & ITER_BVEC)
-		seg = UIO_BVEC;
-
-	ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
-	    count, seg, from->iov_offset);
-	if (ret > 0)
-		iov_iter_advance(from, ret);
-
-	return (ret);
-}
-#else
-static ssize_t
-zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, loff_t pos)
-{
-	struct file *file = kiocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *ip = mapping->host;
-	int isblk = S_ISBLK(ip->i_mode);
-	size_t count;
-	ssize_t ret;
-
-	ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
-	if (ret)
-		return (ret);
-
-	ret = generic_write_checks(file, &pos, &count, isblk);
-	if (ret)
-		return (ret);
-
-	return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
-	    UIO_USERSPACE, 0));
-}
-#endif /* HAVE_VFS_RW_ITERATE */
-
-#if defined(HAVE_VFS_RW_ITERATE)
-static ssize_t
-zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter)
-{
-	if (rw == WRITE)
-		return (zpl_iter_write(kiocb, iter));
-	else
-		return (zpl_iter_read(kiocb, iter));
-}
-#if defined(HAVE_VFS_DIRECT_IO_ITER)
-static ssize_t
-zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter)
-{
-	return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
-}
-#elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET)
-static ssize_t
-zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
-{
-	ASSERT3S(pos, ==, kiocb->ki_pos);
-	return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
-}
-#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
-static ssize_t
-zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
-{
-	ASSERT3S(pos, ==, kiocb->ki_pos);
-	return (zpl_direct_IO_impl(rw, kiocb, iter));
-}
-#else
-#error "Unknown direct IO interface"
-#endif
-
-#else
-
-#if defined(HAVE_VFS_DIRECT_IO_IOVEC)
-static ssize_t
-zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iovp,
-    loff_t pos, unsigned long nr_segs)
-{
-	if (rw == WRITE)
-		return (zpl_aio_write(kiocb, iovp, nr_segs, pos));
-	else
-		return (zpl_aio_read(kiocb, iovp, nr_segs, pos));
-}
-#else
-#error "Unknown direct IO interface"
-#endif
-
-#endif /* HAVE_VFS_RW_ITERATE */
-
-static loff_t
-zpl_llseek(struct file *filp, loff_t offset, int whence)
-{
-#if defined(SEEK_HOLE) && defined(SEEK_DATA)
-	fstrans_cookie_t cookie;
-
-	if (whence == SEEK_DATA || whence == SEEK_HOLE) {
-		struct inode *ip = filp->f_mapping->host;
-		loff_t maxbytes = ip->i_sb->s_maxbytes;
-		loff_t error;
-
-		spl_inode_lock_shared(ip);
-		cookie = spl_fstrans_mark();
-		error = -zfs_holey(ip, whence, &offset);
-		spl_fstrans_unmark(cookie);
-		if (error == 0)
-			error = lseek_execute(filp, ip, offset, maxbytes);
-		spl_inode_unlock_shared(ip);
-
-		return (error);
-	}
-#endif /* SEEK_HOLE && SEEK_DATA */
-
-	return (generic_file_llseek(filp, offset, whence));
-}
-
-/*
- * It's worth taking a moment to describe how mmap is implemented
- * for zfs because it differs considerably from other Linux filesystems.
- * However, this issue is handled the same way under OpenSolaris.
- *
- * The issue is that by design zfs bypasses the Linux page cache and
- * leaves all caching up to the ARC.  This has been shown to work
- * well for the common read(2)/write(2) case.  However, mmap(2)
- * is problem because it relies on being tightly integrated with the
- * page cache.  To handle this we cache mmap'ed files twice, once in
- * the ARC and a second time in the page cache.  The code is careful
- * to keep both copies synchronized.
- *
- * When a file with an mmap'ed region is written to using write(2)
- * both the data in the ARC and existing pages in the page cache
- * are updated.  For a read(2) data will be read first from the page
- * cache then the ARC if needed.  Neither a write(2) or read(2) will
- * will ever result in new pages being added to the page cache.
- *
- * New pages are added to the page cache only via .readpage() which
- * is called when the vfs needs to read a page off disk to back the
- * virtual memory region.  These pages may be modified without
- * notifying the ARC and will be written out periodically via
- * .writepage().  This will occur due to either a sync or the usual
- * page aging behavior.  Note because a read(2) of a mmap'ed file
- * will always check the page cache first even when the ARC is out
- * of date correct data will still be returned.
- *
- * While this implementation ensures correct behavior it does have
- * have some drawbacks.  The most obvious of which is that it
- * increases the required memory footprint when access mmap'ed
- * files.  It also adds additional complexity to the code keeping
- * both caches synchronized.
- *
- * Longer term it may be possible to cleanly resolve this wart by
- * mapping page cache pages directly on to the ARC buffers.  The
- * Linux address space operations are flexible enough to allow
- * selection of which pages back a particular index.  The trick
- * would be working out the details of which subsystem is in
- * charge, the ARC, the page cache, or both.  It may also prove
- * helpful to move the ARC buffers to a scatter-gather lists
- * rather than a vmalloc'ed region.
- */
-static int
-zpl_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct inode *ip = filp->f_mapping->host;
-	znode_t *zp = ITOZ(ip);
-	int error;
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
-	    (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
-	spl_fstrans_unmark(cookie);
-	if (error)
-		return (error);
-
-	error = generic_file_mmap(filp, vma);
-	if (error)
-		return (error);
-
-	mutex_enter(&zp->z_lock);
-	zp->z_is_mapped = B_TRUE;
-	mutex_exit(&zp->z_lock);
-
-	return (error);
-}
-
-/*
- * Populate a page with data for the Linux page cache.  This function is
- * only used to support mmap(2).  There will be an identical copy of the
- * data in the ARC which is kept up to date via .write() and .writepage().
- *
- * Current this function relies on zpl_read_common() and the O_DIRECT
- * flag to read in a page.  This works but the more correct way is to
- * update zfs_fillpage() to be Linux friendly and use that interface.
- */
-static int
-zpl_readpage(struct file *filp, struct page *pp)
-{
-	struct inode *ip;
-	struct page *pl[1];
-	int error = 0;
-	fstrans_cookie_t cookie;
-
-	ASSERT(PageLocked(pp));
-	ip = pp->mapping->host;
-	pl[0] = pp;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_getpage(ip, pl, 1);
-	spl_fstrans_unmark(cookie);
-
-	if (error) {
-		SetPageError(pp);
-		ClearPageUptodate(pp);
-	} else {
-		ClearPageError(pp);
-		SetPageUptodate(pp);
-		flush_dcache_page(pp);
-	}
-
-	unlock_page(pp);
-	return (error);
-}
-
-/*
- * Populate a set of pages with data for the Linux page cache.  This
- * function will only be called for read ahead and never for demand
- * paging.  For simplicity, the code relies on read_cache_pages() to
- * correctly lock each page for IO and call zpl_readpage().
- */
-static int
-zpl_readpages(struct file *filp, struct address_space *mapping,
-    struct list_head *pages, unsigned nr_pages)
-{
-	return (read_cache_pages(mapping, pages,
-	    (filler_t *)zpl_readpage, filp));
-}
-
-int
-zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
-{
-	struct address_space *mapping = data;
-	fstrans_cookie_t cookie;
-
-	ASSERT(PageLocked(pp));
-	ASSERT(!PageWriteback(pp));
-
-	cookie = spl_fstrans_mark();
-	(void) zfs_putpage(mapping->host, pp, wbc);
-	spl_fstrans_unmark(cookie);
-
-	return (0);
-}
-
-static int
-zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
-	znode_t		*zp = ITOZ(mapping->host);
-	zfsvfs_t	*zfsvfs = ITOZSB(mapping->host);
-	enum writeback_sync_modes sync_mode;
-	int result;
-
-	ZFS_ENTER(zfsvfs);
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		wbc->sync_mode = WB_SYNC_ALL;
-	ZFS_EXIT(zfsvfs);
-	sync_mode = wbc->sync_mode;
-
-	/*
-	 * We don't want to run write_cache_pages() in SYNC mode here, because
-	 * that would make putpage() wait for a single page to be committed to
-	 * disk every single time, resulting in atrocious performance. Instead
-	 * we run it once in non-SYNC mode so that the ZIL gets all the data,
-	 * and then we commit it all in one go.
-	 */
-	wbc->sync_mode = WB_SYNC_NONE;
-	result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
-	if (sync_mode != wbc->sync_mode) {
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
-		if (zfsvfs->z_log != NULL)
-			zil_commit(zfsvfs->z_log, zp->z_id);
-		ZFS_EXIT(zfsvfs);
-
-		/*
-		 * We need to call write_cache_pages() again (we can't just
-		 * return after the commit) because the previous call in
-		 * non-SYNC mode does not guarantee that we got all the dirty
-		 * pages (see the implementation of write_cache_pages() for
-		 * details). That being said, this is a no-op in most cases.
-		 */
-		wbc->sync_mode = sync_mode;
-		result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
-	}
-	return (result);
-}
-
-/*
- * Write out dirty pages to the ARC, this function is only required to
- * support mmap(2).  Mapped pages may be dirtied by memory operations
- * which never call .write().  These dirty pages are kept in sync with
- * the ARC buffers via this hook.
- */
-static int
-zpl_writepage(struct page *pp, struct writeback_control *wbc)
-{
-	if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		wbc->sync_mode = WB_SYNC_ALL;
-
-	return (zpl_putpage(pp, wbc, pp->mapping));
-}
-
-/*
- * The only flag combination which matches the behavior of zfs_space()
- * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE.  The FALLOC_FL_PUNCH_HOLE
- * flag was introduced in the 2.6.38 kernel.
- */
-#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
-long
-zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
-{
-	int error = -EOPNOTSUPP;
-
-#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
-	cred_t *cr = CRED();
-	flock64_t bf;
-	loff_t olen;
-	fstrans_cookie_t cookie;
-
-	if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-		return (error);
-
-	if (offset < 0 || len <= 0)
-		return (-EINVAL);
-
-	spl_inode_lock(ip);
-	olen = i_size_read(ip);
-
-	if (offset > olen) {
-		spl_inode_unlock(ip);
-		return (0);
-	}
-	if (offset + len > olen)
-		len = olen - offset;
-	bf.l_type = F_WRLCK;
-	bf.l_whence = SEEK_SET;
-	bf.l_start = offset;
-	bf.l_len = len;
-	bf.l_pid = 0;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
-	spl_fstrans_unmark(cookie);
-	spl_inode_unlock(ip);
-
-	crfree(cr);
-#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
-
-	ASSERT3S(error, <=, 0);
-	return (error);
-}
-#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
-
-#ifdef HAVE_FILE_FALLOCATE
-static long
-zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
-{
-	return zpl_fallocate_common(file_inode(filp),
-	    mode, offset, len);
-}
-#endif /* HAVE_FILE_FALLOCATE */
-
-#define	ZFS_FL_USER_VISIBLE	(FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
-#define	ZFS_FL_USER_MODIFIABLE	(FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
-
-static uint32_t
-__zpl_ioctl_getflags(struct inode *ip)
-{
-	uint64_t zfs_flags = ITOZ(ip)->z_pflags;
-	uint32_t ioctl_flags = 0;
-
-	if (zfs_flags & ZFS_IMMUTABLE)
-		ioctl_flags |= FS_IMMUTABLE_FL;
-
-	if (zfs_flags & ZFS_APPENDONLY)
-		ioctl_flags |= FS_APPEND_FL;
-
-	if (zfs_flags & ZFS_NODUMP)
-		ioctl_flags |= FS_NODUMP_FL;
-
-	if (zfs_flags & ZFS_PROJINHERIT)
-		ioctl_flags |= ZFS_PROJINHERIT_FL;
-
-	return (ioctl_flags & ZFS_FL_USER_VISIBLE);
-}
-
-/*
- * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
- * attributes common to both Linux and Solaris are mapped.
- */
-static int
-zpl_ioctl_getflags(struct file *filp, void __user *arg)
-{
-	uint32_t flags;
-	int err;
-
-	flags = __zpl_ioctl_getflags(file_inode(filp));
-	err = copy_to_user(arg, &flags, sizeof (flags));
-
-	return (err);
-}
-
-/*
- * fchange() is a helper macro to detect if we have been asked to change a
- * flag. This is ugly, but the requirement that we do this is a consequence of
- * how the Linux file attribute interface was designed. Another consequence is
- * that concurrent modification of files suffers from a TOCTOU race. Neither
- * are things we can fix without modifying the kernel-userland interface, which
- * is outside of our jurisdiction.
- */
-
-#define	fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
-
-static int
-__zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
-{
-	uint64_t zfs_flags = ITOZ(ip)->z_pflags;
-	xoptattr_t *xoap;
-
-	if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
-	    ZFS_PROJINHERIT_FL))
-		return (-EOPNOTSUPP);
-
-	if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
-		return (-EACCES);
-
-	if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
-	    fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
-	    !capable(CAP_LINUX_IMMUTABLE))
-		return (-EACCES);
-
-	if (!zpl_inode_owner_or_capable(ip))
-		return (-EACCES);
-
-	xva_init(xva);
-	xoap = xva_getxoptattr(xva);
-
-	XVA_SET_REQ(xva, XAT_IMMUTABLE);
-	if (ioctl_flags & FS_IMMUTABLE_FL)
-		xoap->xoa_immutable = B_TRUE;
-
-	XVA_SET_REQ(xva, XAT_APPENDONLY);
-	if (ioctl_flags & FS_APPEND_FL)
-		xoap->xoa_appendonly = B_TRUE;
-
-	XVA_SET_REQ(xva, XAT_NODUMP);
-	if (ioctl_flags & FS_NODUMP_FL)
-		xoap->xoa_nodump = B_TRUE;
-
-	XVA_SET_REQ(xva, XAT_PROJINHERIT);
-	if (ioctl_flags & ZFS_PROJINHERIT_FL)
-		xoap->xoa_projinherit = B_TRUE;
-
-	return (0);
-}
-
-static int
-zpl_ioctl_setflags(struct file *filp, void __user *arg)
-{
-	struct inode *ip = file_inode(filp);
-	uint32_t flags;
-	cred_t *cr = CRED();
-	xvattr_t xva;
-	int err;
-	fstrans_cookie_t cookie;
-
-	if (copy_from_user(&flags, arg, sizeof (flags)))
-		return (-EFAULT);
-
-	err = __zpl_ioctl_setflags(ip, flags, &xva);
-	if (err)
-		return (err);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (err);
-}
-
-static int
-zpl_ioctl_getxattr(struct file *filp, void __user *arg)
-{
-	zfsxattr_t fsx = { 0 };
-	struct inode *ip = file_inode(filp);
-	int err;
-
-	fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
-	fsx.fsx_projid = ITOZ(ip)->z_projid;
-	err = copy_to_user(arg, &fsx, sizeof (fsx));
-
-	return (err);
-}
-
-static int
-zpl_ioctl_setxattr(struct file *filp, void __user *arg)
-{
-	struct inode *ip = file_inode(filp);
-	zfsxattr_t fsx;
-	cred_t *cr = CRED();
-	xvattr_t xva;
-	xoptattr_t *xoap;
-	int err;
-	fstrans_cookie_t cookie;
-
-	if (copy_from_user(&fsx, arg, sizeof (fsx)))
-		return (-EFAULT);
-
-	if (!zpl_is_valid_projid(fsx.fsx_projid))
-		return (-EINVAL);
-
-	err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
-	if (err)
-		return (err);
-
-	xoap = xva_getxoptattr(&xva);
-	XVA_SET_REQ(&xva, XAT_PROJID);
-	xoap->xoa_projid = fsx.fsx_projid;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (err);
-}
-
-static long
-zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case FS_IOC_GETFLAGS:
-		return (zpl_ioctl_getflags(filp, (void *)arg));
-	case FS_IOC_SETFLAGS:
-		return (zpl_ioctl_setflags(filp, (void *)arg));
-	case ZFS_IOC_FSGETXATTR:
-		return (zpl_ioctl_getxattr(filp, (void *)arg));
-	case ZFS_IOC_FSSETXATTR:
-		return (zpl_ioctl_setxattr(filp, (void *)arg));
-	default:
-		return (-ENOTTY);
-	}
-}
-
-#ifdef CONFIG_COMPAT
-static long
-zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case FS_IOC32_GETFLAGS:
-		cmd = FS_IOC_GETFLAGS;
-		break;
-	case FS_IOC32_SETFLAGS:
-		cmd = FS_IOC_SETFLAGS;
-		break;
-	default:
-		return (-ENOTTY);
-	}
-	return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
-}
-#endif /* CONFIG_COMPAT */
-
-
-const struct address_space_operations zpl_address_space_operations = {
-	.readpages	= zpl_readpages,
-	.readpage	= zpl_readpage,
-	.writepage	= zpl_writepage,
-	.writepages	= zpl_writepages,
-	.direct_IO	= zpl_direct_IO,
-};
-
-const struct file_operations zpl_file_operations = {
-	.open		= zpl_open,
-	.release	= zpl_release,
-	.llseek		= zpl_llseek,
-#ifdef HAVE_VFS_RW_ITERATE
-#ifdef HAVE_NEW_SYNC_READ
-	.read		= new_sync_read,
-	.write		= new_sync_write,
-#endif
-	.read_iter	= zpl_iter_read,
-	.write_iter	= zpl_iter_write,
-#else
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= zpl_aio_read,
-	.aio_write	= zpl_aio_write,
-#endif
-	.mmap		= zpl_mmap,
-	.fsync		= zpl_fsync,
-#ifdef HAVE_FILE_AIO_FSYNC
-	.aio_fsync	= zpl_aio_fsync,
-#endif
-#ifdef HAVE_FILE_FALLOCATE
-	.fallocate	= zpl_fallocate,
-#endif /* HAVE_FILE_FALLOCATE */
-	.unlocked_ioctl	= zpl_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= zpl_compat_ioctl,
-#endif
-};
-
-const struct file_operations zpl_dir_file_operations = {
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#if defined(HAVE_VFS_ITERATE_SHARED)
-	.iterate_shared	= zpl_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_iterate,
-#else
-	.readdir	= zpl_readdir,
-#endif
-	.fsync		= zpl_fsync,
-	.unlocked_ioctl = zpl_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl   = zpl_compat_ioctl,
-#endif
-};

diff --git a/zfs/module/zfs/zpl_inode.c b/zfs/module/zfs/zpl_inode.c
deleted file mode 100644
index 1f228dc..0000000
--- a/zfs/module/zfs/zpl_inode.c
+++ /dev/null

@@ -1,830 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- */
-
-
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/dmu_objset.h>
-#include <sys/vfs.h>
-#include <sys/zpl.h>
-#include <sys/file.h>
-
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-#else
-zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
-#endif
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	int error;
-	fstrans_cookie_t cookie;
-	pathname_t *ppn = NULL;
-	pathname_t pn;
-	int zfs_flags = 0;
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-
-	if (dlen(dentry) >= ZAP_MAXNAMELEN)
-		return (ERR_PTR(-ENAMETOOLONG));
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-
-	/* If we are a case insensitive fs, we need the real name */
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		zfs_flags = FIGNORECASE;
-		pn_alloc(&pn);
-		ppn = &pn;
-	}
-
-	error = -zfs_lookup(dir, dname(dentry), &ip, zfs_flags, cr, NULL, ppn);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	spin_lock(&dentry->d_lock);
-	dentry->d_time = jiffies;
-#ifndef HAVE_S_D_OP
-	d_set_d_op(dentry, &zpl_dentry_operations);
-#endif /* HAVE_S_D_OP */
-	spin_unlock(&dentry->d_lock);
-
-	if (error) {
-		/*
-		 * If we have a case sensitive fs, we do not want to
-		 * insert negative entries, so return NULL for ENOENT.
-		 * Fall through if the error is not ENOENT. Also free memory.
-		 */
-		if (ppn) {
-			pn_free(ppn);
-			if (error == -ENOENT)
-				return (NULL);
-		}
-
-		if (error == -ENOENT)
-			return (d_splice_alias(NULL, dentry));
-		else
-			return (ERR_PTR(error));
-	}
-
-	/*
-	 * If we are case insensitive, call the correct function
-	 * to install the name.
-	 */
-	if (ppn) {
-		struct dentry *new_dentry;
-		struct qstr ci_name;
-
-		if (strcmp(dname(dentry), pn.pn_buf) == 0) {
-			new_dentry = d_splice_alias(ip,  dentry);
-		} else {
-			ci_name.name = pn.pn_buf;
-			ci_name.len = strlen(pn.pn_buf);
-			new_dentry = d_add_ci(dentry, ip, &ci_name);
-		}
-		pn_free(ppn);
-		return (new_dentry);
-	} else {
-		return (d_splice_alias(ip, dentry));
-	}
-}
-
-void
-zpl_vap_init(vattr_t *vap, struct inode *dir, zpl_umode_t mode, cred_t *cr)
-{
-	vap->va_mask = ATTR_MODE;
-	vap->va_mode = mode;
-	vap->va_uid = crgetfsuid(cr);
-
-	if (dir && dir->i_mode & S_ISGID) {
-		vap->va_gid = KGID_TO_SGID(dir->i_gid);
-		if (S_ISDIR(mode))
-			vap->va_mode |= S_ISGID;
-	} else {
-		vap->va_gid = crgetfsgid(cr);
-	}
-}
-
-static int
-#ifdef HAVE_CREATE_NAMEIDATA
-zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
-    struct nameidata *nd)
-#else
-zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
-    bool flag)
-#endif
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-
-		if (error)
-			(void) zfs_remove(dir, dname(dentry), cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
-    dev_t rdev)
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	/*
-	 * We currently expect Linux to supply rdev=0 for all sockets
-	 * and fifos, but we want to know if this behavior ever changes.
-	 */
-	if (S_ISSOCK(mode) || S_ISFIFO(mode))
-		ASSERT(rdev == 0);
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
-	vap->va_rdev = rdev;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-
-		if (error)
-			(void) zfs_remove(dir, dname(dentry), cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_TMPFILE
-static int
-zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	/*
-	 * The VFS does not apply the umask, therefore it is applied here
-	 * when POSIX ACLs are not enabled.
-	 */
-	if (!IS_POSIXACL(dir))
-		mode &= ~current_umask();
-	zpl_vap_init(vap, dir, mode, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
-	if (error == 0) {
-		/* d_tmpfile will do drop_nlink, so we should set it first */
-		set_nlink(ip, 1);
-		d_tmpfile(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-		/*
-		 * don't need to handle error here, file is already in
-		 * unlinked set.
-		 */
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-#endif
-
-static int
-zpl_unlink(struct inode *dir, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_remove(dir, dname(dentry), cr, 0);
-
-	/*
-	 * For a CI FS we must invalidate the dentry to prevent the
-	 * creation of negative entries.
-	 */
-	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
-		d_invalidate(dentry);
-
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
-{
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	struct inode *ip;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-
-		if (error)
-			(void) zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
-
-	/*
-	 * For a CI FS we must invalidate the dentry to prevent the
-	 * creation of negative entries.
-	 */
-	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
-		d_invalidate(dentry);
-
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
-    unsigned int query_flags)
-{
-	int error;
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-
-	/*
-	 * XXX request_mask and query_flags currently ignored.
-	 */
-
-	error = -zfs_getattr_fast(path->dentry->d_inode, stat);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-ZPL_GETATTR_WRAPPER(zpl_getattr);
-
-static int
-zpl_setattr(struct dentry *dentry, struct iattr *ia)
-{
-	struct inode *ip = dentry->d_inode;
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	error = setattr_prepare(dentry, ia);
-	if (error)
-		return (error);
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
-	vap->va_mode = ia->ia_mode;
-	vap->va_uid = KUID_TO_SUID(ia->ia_uid);
-	vap->va_gid = KGID_TO_SGID(ia->ia_gid);
-	vap->va_size = ia->ia_size;
-	vap->va_atime = ia->ia_atime;
-	vap->va_mtime = ia->ia_mtime;
-	vap->va_ctime = ia->ia_ctime;
-
-	if (vap->va_mask & ATTR_ATIME)
-		ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_setattr(ip, vap, 0, cr);
-	if (!error && (ia->ia_valid & ATTR_MODE))
-		error = zpl_chmod_acl(ip);
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_rename2(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	/* We don't have renameat2(2) support */
-	if (flags)
-		return (-EINVAL);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifndef HAVE_RENAME_WANTS_FLAGS
-static int
-zpl_rename(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry)
-{
-	return (zpl_rename2(sdip, sdentry, tdip, tdentry, 0));
-}
-#endif
-
-static int
-zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
-{
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	struct inode *ip;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error)
-			(void) zfs_remove(dir, dname(dentry), cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#if defined(HAVE_PUT_LINK_COOKIE)
-static void
-zpl_put_link(struct inode *unused, void *cookie)
-{
-	kmem_free(cookie, MAXPATHLEN);
-}
-#elif defined(HAVE_PUT_LINK_NAMEIDATA)
-static void
-zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
-{
-	const char *link = nd_get_link(nd);
-
-	if (!IS_ERR(link))
-		kmem_free(link, MAXPATHLEN);
-}
-#elif defined(HAVE_PUT_LINK_DELAYED)
-static void
-zpl_put_link(void *ptr)
-{
-	kmem_free(ptr, MAXPATHLEN);
-}
-#endif
-
-static int
-zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	struct iovec iov;
-	uio_t uio = { { 0 }, 0 };
-	int error;
-
-	crhold(cr);
-	*link = NULL;
-	iov.iov_len = MAXPATHLEN;
-	iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_resid = (MAXPATHLEN - 1);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_readlink(ip, &uio, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	if (error)
-		kmem_free(iov.iov_base, MAXPATHLEN);
-	else
-		*link = iov.iov_base;
-
-	return (error);
-}
-
-#if defined(HAVE_GET_LINK_DELAYED)
-const char *
-zpl_get_link(struct dentry *dentry, struct inode *inode,
-    struct delayed_call *done)
-{
-	char *link = NULL;
-	int error;
-
-	if (!dentry)
-		return (ERR_PTR(-ECHILD));
-
-	error = zpl_get_link_common(dentry, inode, &link);
-	if (error)
-		return (ERR_PTR(error));
-
-	set_delayed_call(done, zpl_put_link, link);
-
-	return (link);
-}
-#elif defined(HAVE_GET_LINK_COOKIE)
-const char *
-zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie)
-{
-	char *link = NULL;
-	int error;
-
-	if (!dentry)
-		return (ERR_PTR(-ECHILD));
-
-	error = zpl_get_link_common(dentry, inode, &link);
-	if (error)
-		return (ERR_PTR(error));
-
-	return (*cookie = link);
-}
-#elif defined(HAVE_FOLLOW_LINK_COOKIE)
-const char *
-zpl_follow_link(struct dentry *dentry, void **cookie)
-{
-	char *link = NULL;
-	int error;
-
-	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
-	if (error)
-		return (ERR_PTR(error));
-
-	return (*cookie = link);
-}
-#elif defined(HAVE_FOLLOW_LINK_NAMEIDATA)
-static void *
-zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char *link = NULL;
-	int error;
-
-	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
-	if (error)
-		nd_set_link(nd, ERR_PTR(error));
-	else
-		nd_set_link(nd, link);
-
-	return (NULL);
-}
-#endif
-
-static int
-zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	struct inode *ip = old_dentry->d_inode;
-	int error;
-	fstrans_cookie_t cookie;
-
-	if (ip->i_nlink >= ZFS_LINK_MAX)
-		return (-EMLINK);
-
-	crhold(cr);
-	ip->i_ctime = current_time(ip);
-	igrab(ip); /* Use ihold() if available */
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_link(dir, ip, dname(dentry), cr, 0);
-	if (error) {
-		iput(ip);
-		goto out;
-	}
-
-	d_instantiate(dentry, ip);
-out:
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_INODE_TRUNCATE_RANGE
-static void
-zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
-{
-	cred_t *cr = CRED();
-	flock64_t bf;
-	fstrans_cookie_t cookie;
-
-	ASSERT3S(start, <=, end);
-
-	/*
-	 * zfs_freesp() will interpret (len == 0) as meaning "truncate until
-	 * the end of the file". We don't want that.
-	 */
-	if (start == end)
-		return;
-
-	crhold(cr);
-
-	bf.l_type = F_WRLCK;
-	bf.l_whence = SEEK_SET;
-	bf.l_start = start;
-	bf.l_len = end - start;
-	bf.l_pid = 0;
-	cookie = spl_fstrans_mark();
-	zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr);
-	spl_fstrans_unmark(cookie);
-
-	crfree(cr);
-}
-#endif /* HAVE_INODE_TRUNCATE_RANGE */
-
-#ifdef HAVE_INODE_FALLOCATE
-static long
-zpl_fallocate(struct inode *ip, int mode, loff_t offset, loff_t len)
-{
-	return (zpl_fallocate_common(ip, mode, offset, len));
-}
-#endif /* HAVE_INODE_FALLOCATE */
-
-static int
-#ifdef HAVE_D_REVALIDATE_NAMEIDATA
-zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-	unsigned int flags = (nd ? nd->flags : 0);
-#else
-zpl_revalidate(struct dentry *dentry, unsigned int flags)
-{
-#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
-	/* CSTYLED */
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-	int error;
-
-	if (flags & LOOKUP_RCU)
-		return (-ECHILD);
-
-	/*
-	 * Automounted snapshots rely on periodic dentry revalidation
-	 * to defer snapshots from being automatically unmounted.
-	 */
-	if (zfsvfs->z_issnap) {
-		if (time_after(jiffies, zfsvfs->z_snap_defer_time +
-		    MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
-			zfsvfs->z_snap_defer_time = jiffies;
-			zfsctl_snapshot_unmount_delay(zfsvfs->z_os->os_spa,
-			    dmu_objset_id(zfsvfs->z_os), zfs_expire_snapshot);
-		}
-	}
-
-	/*
-	 * After a rollback negative dentries created before the rollback
-	 * time must be invalidated.  Otherwise they can obscure files which
-	 * are only present in the rolled back dataset.
-	 */
-	if (dentry->d_inode == NULL) {
-		spin_lock(&dentry->d_lock);
-		error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
-		spin_unlock(&dentry->d_lock);
-
-		if (error)
-			return (0);
-	}
-
-	/*
-	 * The dentry may reference a stale inode if a mounted file system
-	 * was rolled back to a point in time where the object didn't exist.
-	 */
-	if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
-		return (0);
-
-	return (1);
-}
-
-const struct inode_operations zpl_inode_operations = {
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-#ifdef HAVE_INODE_TRUNCATE_RANGE
-	.truncate_range = zpl_truncate_range,
-#endif /* HAVE_INODE_TRUNCATE_RANGE */
-#ifdef HAVE_INODE_FALLOCATE
-	.fallocate	= zpl_fallocate,
-#endif /* HAVE_INODE_FALLOCATE */
-#if defined(CONFIG_FS_POSIX_ACL)
-#if defined(HAVE_SET_ACL)
-	.set_acl	= zpl_set_acl,
-#endif
-#if defined(HAVE_GET_ACL)
-	.get_acl	= zpl_get_acl,
-#elif defined(HAVE_CHECK_ACL)
-	.check_acl	= zpl_check_acl,
-#elif defined(HAVE_PERMISSION)
-	.permission	= zpl_permission,
-#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* CONFIG_FS_POSIX_ACL */
-};
-
-const struct inode_operations zpl_dir_inode_operations = {
-	.create		= zpl_create,
-	.lookup		= zpl_lookup,
-	.link		= zpl_link,
-	.unlink		= zpl_unlink,
-	.symlink	= zpl_symlink,
-	.mkdir		= zpl_mkdir,
-	.rmdir		= zpl_rmdir,
-	.mknod		= zpl_mknod,
-#ifdef HAVE_RENAME_WANTS_FLAGS
-	.rename		= zpl_rename2,
-#else
-	.rename		= zpl_rename,
-#endif
-#ifdef HAVE_TMPFILE
-	.tmpfile	= zpl_tmpfile,
-#endif
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-#if defined(CONFIG_FS_POSIX_ACL)
-#if defined(HAVE_SET_ACL)
-	.set_acl	= zpl_set_acl,
-#endif
-#if defined(HAVE_GET_ACL)
-	.get_acl	= zpl_get_acl,
-#elif defined(HAVE_CHECK_ACL)
-	.check_acl	= zpl_check_acl,
-#elif defined(HAVE_PERMISSION)
-	.permission	= zpl_permission,
-#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* CONFIG_FS_POSIX_ACL */
-};
-
-const struct inode_operations zpl_symlink_inode_operations = {
-#ifdef HAVE_GENERIC_READLINK
-	.readlink	= generic_readlink,
-#endif
-#if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE)
-	.get_link	= zpl_get_link,
-#elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA)
-	.follow_link	= zpl_follow_link,
-#endif
-#if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA)
-	.put_link	= zpl_put_link,
-#endif
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-};
-
-const struct inode_operations zpl_special_inode_operations = {
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-#if defined(CONFIG_FS_POSIX_ACL)
-#if defined(HAVE_SET_ACL)
-	.set_acl	= zpl_set_acl,
-#endif
-#if defined(HAVE_GET_ACL)
-	.get_acl	= zpl_get_acl,
-#elif defined(HAVE_CHECK_ACL)
-	.check_acl	= zpl_check_acl,
-#elif defined(HAVE_PERMISSION)
-	.permission	= zpl_permission,
-#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* CONFIG_FS_POSIX_ACL */
-};
-
-dentry_operations_t zpl_dentry_operations = {
-	.d_revalidate	= zpl_revalidate,
-};

diff --git a/zfs/module/zfs/zpl_super.c b/zfs/module/zfs/zpl_super.c
deleted file mode 100644
index 810ab28..0000000
--- a/zfs/module/zfs/zpl_super.c
+++ /dev/null

@@ -1,426 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- */
-
-
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zpl.h>
-
-
-static struct inode *
-zpl_inode_alloc(struct super_block *sb)
-{
-	struct inode *ip;
-
-	VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
-	inode_set_iversion(ip, 1);
-
-	return (ip);
-}
-
-static void
-zpl_inode_destroy(struct inode *ip)
-{
-	ASSERT(atomic_read(&ip->i_count) == 0);
-	zfs_inode_destroy(ip);
-}
-
-/*
- * Called from __mark_inode_dirty() to reflect that something in the
- * inode has changed.  We use it to ensure the znode system attributes
- * are always strictly update to date with respect to the inode.
- */
-#ifdef HAVE_DIRTY_INODE_WITH_FLAGS
-static void
-zpl_dirty_inode(struct inode *ip, int flags)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	zfs_dirty_inode(ip, flags);
-	spl_fstrans_unmark(cookie);
-}
-#else
-static void
-zpl_dirty_inode(struct inode *ip)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	zfs_dirty_inode(ip, 0);
-	spl_fstrans_unmark(cookie);
-}
-#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
-
-/*
- * When ->drop_inode() is called its return value indicates if the
- * inode should be evicted from the inode cache.  If the inode is
- * unhashed and has no links the default policy is to evict it
- * immediately.
- *
- * Prior to 2.6.36 this eviction was accomplished by the vfs calling
- * ->delete_inode().  It was ->delete_inode()'s responsibility to
- * truncate the inode pages and call clear_inode().  The call to
- * clear_inode() synchronously invalidates all the buffers and
- * calls ->clear_inode().  It was ->clear_inode()'s responsibility
- * to cleanup and filesystem specific data before freeing the inode.
- *
- * This elaborate mechanism was replaced by ->evict_inode() which
- * does the job of both ->delete_inode() and ->clear_inode().  It
- * will be called exactly once, and when it returns the inode must
- * be in a state where it can simply be freed.i
- *
- * The ->evict_inode() callback must minimally truncate the inode pages,
- * and call clear_inode().  For 2.6.35 and later kernels this will
- * simply update the inode state, with the sync occurring before the
- * truncate in evict().  For earlier kernels clear_inode() maps to
- * end_writeback() which is responsible for completing all outstanding
- * write back.  In either case, once this is done it is safe to cleanup
- * any remaining inode specific data via zfs_inactive().
- * remaining filesystem specific data.
- */
-#ifdef HAVE_EVICT_INODE
-static void
-zpl_evict_inode(struct inode *ip)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	truncate_setsize(ip, 0);
-	clear_inode(ip);
-	zfs_inactive(ip);
-	spl_fstrans_unmark(cookie);
-}
-
-#else
-
-static void
-zpl_drop_inode(struct inode *ip)
-{
-	generic_delete_inode(ip);
-}
-
-static void
-zpl_clear_inode(struct inode *ip)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	zfs_inactive(ip);
-	spl_fstrans_unmark(cookie);
-}
-
-static void
-zpl_inode_delete(struct inode *ip)
-{
-	truncate_setsize(ip, 0);
-	clear_inode(ip);
-}
-#endif /* HAVE_EVICT_INODE */
-
-static void
-zpl_put_super(struct super_block *sb)
-{
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_umount(sb);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-}
-
-static int
-zpl_sync_fs(struct super_block *sb, int wait)
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_sync(sb, wait, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
-{
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_statvfs(dentry, statp);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	/*
-	 * If required by a 32-bit system call, dynamically scale the
-	 * block size up to 16MiB and decrease the block counts.  This
-	 * allows for a maximum size of 64EiB to be reported.  The file
-	 * counts must be artificially capped at 2^32-1.
-	 */
-	if (unlikely(zpl_is_32bit_api())) {
-		while (statp->f_blocks > UINT32_MAX &&
-		    statp->f_bsize < SPA_MAXBLOCKSIZE) {
-			statp->f_frsize <<= 1;
-			statp->f_bsize <<= 1;
-
-			statp->f_blocks >>= 1;
-			statp->f_bfree >>= 1;
-			statp->f_bavail >>= 1;
-		}
-
-		uint64_t usedobjs = statp->f_files - statp->f_ffree;
-		statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
-		statp->f_files = statp->f_ffree + usedobjs;
-	}
-
-	return (error);
-}
-
-static int
-zpl_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-	zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_remount(sb, flags, &zm);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-__zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
-{
-	seq_printf(seq, ",%s",
-	    zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
-
-#ifdef CONFIG_FS_POSIX_ACL
-	switch (zfsvfs->z_acl_type) {
-	case ZFS_ACLTYPE_POSIXACL:
-		seq_puts(seq, ",posixacl");
-		break;
-	default:
-		seq_puts(seq, ",noacl");
-		break;
-	}
-#endif /* CONFIG_FS_POSIX_ACL */
-
-	return (0);
-}
-
-#ifdef HAVE_SHOW_OPTIONS_WITH_DENTRY
-static int
-zpl_show_options(struct seq_file *seq, struct dentry *root)
-{
-	return (__zpl_show_options(seq, root->d_sb->s_fs_info));
-}
-#else
-static int
-zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
-{
-	return (__zpl_show_options(seq, vfsp->mnt_sb->s_fs_info));
-}
-#endif /* HAVE_SHOW_OPTIONS_WITH_DENTRY */
-
-static int
-zpl_fill_super(struct super_block *sb, void *data, int silent)
-{
-	zfs_mnt_t *zm = (zfs_mnt_t *)data;
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_domount(sb, zm, silent);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_test_super(struct super_block *s, void *data)
-{
-	zfsvfs_t *zfsvfs = s->s_fs_info;
-	objset_t *os = data;
-
-	if (zfsvfs == NULL)
-		return (0);
-
-	return (os == zfsvfs->z_os);
-}
-
-static struct super_block *
-zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
-{
-	struct super_block *s;
-	objset_t *os;
-	int err;
-
-	err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
-	if (err)
-		return (ERR_PTR(-err));
-
-	/*
-	 * The dsl pool lock must be released prior to calling sget().
-	 * It is possible sget() may block on the lock in grab_super()
-	 * while deactivate_super() holds that same lock and waits for
-	 * a txg sync.  If the dsl_pool lock is held over sget()
-	 * this can prevent the pool sync and cause a deadlock.
-	 */
-	dsl_pool_rele(dmu_objset_pool(os), FTAG);
-	s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os);
-	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
-
-	if (IS_ERR(s))
-		return (ERR_CAST(s));
-
-	if (s->s_root == NULL) {
-		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
-		if (err) {
-			deactivate_locked_super(s);
-			return (ERR_PTR(err));
-		}
-		s->s_flags |= SB_ACTIVE;
-	} else if ((flags ^ s->s_flags) & SB_RDONLY) {
-		deactivate_locked_super(s);
-		return (ERR_PTR(-EBUSY));
-	}
-
-	return (s);
-}
-
-#ifdef HAVE_FST_MOUNT
-static struct dentry *
-zpl_mount(struct file_system_type *fs_type, int flags,
-    const char *osname, void *data)
-{
-	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
-
-	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
-	if (IS_ERR(sb))
-		return (ERR_CAST(sb));
-
-	return (dget(sb->s_root));
-}
-#else
-static int
-zpl_get_sb(struct file_system_type *fs_type, int flags,
-    const char *osname, void *data, struct vfsmount *mnt)
-{
-	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
-
-	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
-	if (IS_ERR(sb))
-		return (PTR_ERR(sb));
-
-	(void) simple_set_mnt(mnt, sb);
-
-	return (0);
-}
-#endif /* HAVE_FST_MOUNT */
-
-static void
-zpl_kill_sb(struct super_block *sb)
-{
-	zfs_preumount(sb);
-	kill_anon_super(sb);
-
-#ifdef HAVE_S_INSTANCES_LIST_HEAD
-	sb->s_instances.next = &(zpl_fs_type.fs_supers);
-#endif /* HAVE_S_INSTANCES_LIST_HEAD */
-}
-
-void
-zpl_prune_sb(int64_t nr_to_scan, void *arg)
-{
-	struct super_block *sb = (struct super_block *)arg;
-	int objects = 0;
-
-	(void) -zfs_prune(sb, nr_to_scan, &objects);
-}
-
-#ifdef HAVE_NR_CACHED_OBJECTS
-static int
-zpl_nr_cached_objects(struct super_block *sb)
-{
-	return (0);
-}
-#endif /* HAVE_NR_CACHED_OBJECTS */
-
-#ifdef HAVE_FREE_CACHED_OBJECTS
-static void
-zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
-{
-	/* noop */
-}
-#endif /* HAVE_FREE_CACHED_OBJECTS */
-
-const struct super_operations zpl_super_operations = {
-	.alloc_inode		= zpl_inode_alloc,
-	.destroy_inode		= zpl_inode_destroy,
-	.dirty_inode		= zpl_dirty_inode,
-	.write_inode		= NULL,
-#ifdef HAVE_EVICT_INODE
-	.evict_inode		= zpl_evict_inode,
-#else
-	.drop_inode		= zpl_drop_inode,
-	.clear_inode		= zpl_clear_inode,
-	.delete_inode		= zpl_inode_delete,
-#endif /* HAVE_EVICT_INODE */
-	.put_super		= zpl_put_super,
-	.sync_fs		= zpl_sync_fs,
-	.statfs			= zpl_statfs,
-	.remount_fs		= zpl_remount_fs,
-	.show_options		= zpl_show_options,
-	.show_stats		= NULL,
-#ifdef HAVE_NR_CACHED_OBJECTS
-	.nr_cached_objects	= zpl_nr_cached_objects,
-#endif /* HAVE_NR_CACHED_OBJECTS */
-#ifdef HAVE_FREE_CACHED_OBJECTS
-	.free_cached_objects	= zpl_free_cached_objects,
-#endif /* HAVE_FREE_CACHED_OBJECTS */
-};
-
-struct file_system_type zpl_fs_type = {
-	.owner			= THIS_MODULE,
-	.name			= ZFS_DRIVER,
-#ifdef HAVE_FST_MOUNT
-	.mount			= zpl_mount,
-#else
-	.get_sb			= zpl_get_sb,
-#endif /* HAVE_FST_MOUNT */
-	.kill_sb		= zpl_kill_sb,
-};

diff --git a/zfs/module/zfs/zpl_xattr.c b/zfs/module/zfs/zpl_xattr.c
deleted file mode 100644
index 95523f2..0000000
--- a/zfs/module/zfs/zpl_xattr.c
+++ /dev/null

@@ -1,1548 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- *
- * Extended attributes (xattr) on Solaris are implemented as files
- * which exist in a hidden xattr directory.  These extended attributes
- * can be accessed using the attropen() system call which opens
- * the extended attribute.  It can then be manipulated just like
- * a standard file descriptor.  This has a couple advantages such
- * as practically no size limit on the file, and the extended
- * attributes permissions may differ from those of the parent file.
- * This interface is really quite clever, but it's also completely
- * different than what is supported on Linux.  It also comes with a
- * steep performance penalty when accessing small xattrs because they
- * are not stored with the parent file.
- *
- * Under Linux extended attributes are manipulated by the system
- * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
- * extended attributes to be name/value pairs where the name is a
- * NULL terminated string.  The name must also include one of the
- * following namespace prefixes:
- *
- *   user     - No restrictions and is available to user applications.
- *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
- *   system   - Used for access control lists (system.nfs4_acl, etc).
- *   security - Used by SELinux to store a files security context.
- *
- * The value under Linux to limited to 65536 bytes of binary data.
- * In practice, individual xattrs tend to be much smaller than this
- * and are typically less than 100 bytes.  A good example of this
- * are the security.selinux xattrs which are less than 100 bytes and
- * exist for every file when xattr labeling is enabled.
- *
- * The Linux xattr implementation has been written to take advantage of
- * this typical usage.  When the dataset property 'xattr=sa' is set,
- * then xattrs will be preferentially stored as System Attributes (SA).
- * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
- * up to 64k of xattrs to be stored in the spill block.  If additional
- * xattr space is required, which is unlikely under Linux, they will
- * be stored using the traditional directory approach.
- *
- * This optimization results in roughly a 3x performance improvement
- * when accessing xattrs because it avoids the need to perform a seek
- * for every xattr value.  When multiple xattrs are stored per-file
- * the performance improvements are even greater because all of the
- * xattrs stored in the spill block will be cached.
- *
- * However, by default SA based xattrs are disabled in the Linux port
- * to maximize compatibility with other implementations.  If you do
- * enable SA based xattrs then they will not be visible on platforms
- * which do not support this feature.
- *
- * NOTE: One additional consequence of the xattr directory implementation
- * is that when an extended attribute is manipulated an inode is created.
- * This inode will exist in the Linux inode cache but there will be no
- * associated entry in the dentry cache which references it.  This is
- * safe but it may result in some confusion.  Enabling SA based xattrs
- * largely avoids the issue except in the overflow case.
- */
-
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zap.h>
-#include <sys/vfs.h>
-#include <sys/zpl.h>
-
-typedef struct xattr_filldir {
-	size_t size;
-	size_t offset;
-	char *buf;
-	struct dentry *dentry;
-} xattr_filldir_t;
-
-static const struct xattr_handler *zpl_xattr_handler(const char *);
-
-static int
-zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
-{
-	static const struct xattr_handler *handler;
-	struct dentry *d = xf->dentry;
-
-	handler = zpl_xattr_handler(name);
-	if (!handler)
-		return (0);
-
-	if (handler->list) {
-#if defined(HAVE_XATTR_LIST_SIMPLE)
-		if (!handler->list(d))
-			return (0);
-#elif defined(HAVE_XATTR_LIST_DENTRY)
-		if (!handler->list(d, NULL, 0, name, name_len, 0))
-			return (0);
-#elif defined(HAVE_XATTR_LIST_HANDLER)
-		if (!handler->list(handler, d, NULL, 0, name, name_len))
-			return (0);
-#elif defined(HAVE_XATTR_LIST_INODE)
-		if (!handler->list(d->d_inode, NULL, 0, name, name_len))
-			return (0);
-#endif
-	}
-
-	return (1);
-}
-
-/*
- * Determine is a given xattr name should be visible and if so copy it
- * in to the provided buffer (xf->buf).
- */
-static int
-zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
-{
-	/* Check permissions using the per-namespace list xattr handler. */
-	if (!zpl_xattr_permission(xf, name, name_len))
-		return (0);
-
-	/* When xf->buf is NULL only calculate the required size. */
-	if (xf->buf) {
-		if (xf->offset + name_len + 1 > xf->size)
-			return (-ERANGE);
-
-		memcpy(xf->buf + xf->offset, name, name_len);
-		xf->buf[xf->offset + name_len] = '\0';
-	}
-
-	xf->offset += (name_len + 1);
-
-	return (0);
-}
-
-/*
- * Read as many directory entry names as will fit in to the provided buffer,
- * or when no buffer is provided calculate the required buffer size.
- */
-int
-zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
-{
-	zap_cursor_t zc;
-	zap_attribute_t	zap;
-	int error;
-
-	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
-
-	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
-
-		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
-			error = -ENXIO;
-			break;
-		}
-
-		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
-		if (error)
-			break;
-
-		zap_cursor_advance(&zc);
-	}
-
-	zap_cursor_fini(&zc);
-
-	if (error == -ENOENT)
-		error = 0;
-
-	return (error);
-}
-
-static ssize_t
-zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
-{
-	struct inode *ip = xf->dentry->d_inode;
-	struct inode *dxip = NULL;
-	int error;
-
-	/* Lookup the xattr directory */
-	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
-	if (error) {
-		if (error == -ENOENT)
-			error = 0;
-
-		return (error);
-	}
-
-	error = zpl_xattr_readdir(dxip, xf);
-	iput(dxip);
-
-	return (error);
-}
-
-static ssize_t
-zpl_xattr_list_sa(xattr_filldir_t *xf)
-{
-	znode_t *zp = ITOZ(xf->dentry->d_inode);
-	nvpair_t *nvp = NULL;
-	int error = 0;
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_xattr_cached == NULL)
-		error = -zfs_sa_get_xattr(zp);
-	mutex_exit(&zp->z_lock);
-
-	if (error)
-		return (error);
-
-	ASSERT(zp->z_xattr_cached);
-
-	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
-		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
-
-		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
-		    strlen(nvpair_name(nvp)));
-		if (error)
-			return (error);
-	}
-
-	return (0);
-}
-
-ssize_t
-zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-{
-	znode_t *zp = ITOZ(dentry->d_inode);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int error = 0;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
-	rw_enter(&zp->z_xattr_lock, RW_READER);
-
-	if (zfsvfs->z_use_sa && zp->z_is_sa) {
-		error = zpl_xattr_list_sa(&xf);
-		if (error)
-			goto out;
-	}
-
-	error = zpl_xattr_list_dir(&xf, cr);
-	if (error)
-		goto out;
-
-	error = xf.offset;
-out:
-
-	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (error);
-}
-
-static int
-zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
-    size_t size, cred_t *cr)
-{
-	struct inode *dxip = NULL;
-	struct inode *xip = NULL;
-	loff_t pos = 0;
-	int error;
-
-	/* Lookup the xattr directory */
-	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
-	if (error)
-		goto out;
-
-	/* Lookup a specific xattr name in the directory */
-	error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
-	if (error)
-		goto out;
-
-	if (!size) {
-		error = i_size_read(xip);
-		goto out;
-	}
-
-	if (size < i_size_read(xip)) {
-		error = -ERANGE;
-		goto out;
-	}
-
-	error = zpl_read_common(xip, value, size, &pos, UIO_SYSSPACE, 0, cr);
-out:
-	if (xip)
-		iput(xip);
-
-	if (dxip)
-		iput(dxip);
-
-	return (error);
-}
-
-static int
-zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
-{
-	znode_t *zp = ITOZ(ip);
-	uchar_t *nv_value;
-	uint_t nv_size;
-	int error = 0;
-
-	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_xattr_cached == NULL)
-		error = -zfs_sa_get_xattr(zp);
-	mutex_exit(&zp->z_lock);
-
-	if (error)
-		return (error);
-
-	ASSERT(zp->z_xattr_cached);
-	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
-	    &nv_value, &nv_size);
-	if (error)
-		return (error);
-
-	if (size == 0 || value == NULL)
-		return (nv_size);
-
-	if (size < nv_size)
-		return (-ERANGE);
-
-	memcpy(value, nv_value, nv_size);
-
-	return (nv_size);
-}
-
-static int
-__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
-    cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	int error;
-
-	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
-
-	if (zfsvfs->z_use_sa && zp->z_is_sa) {
-		error = zpl_xattr_get_sa(ip, name, value, size);
-		if (error != -ENOENT)
-			goto out;
-	}
-
-	error = zpl_xattr_get_dir(ip, name, value, size, cr);
-out:
-	if (error == -ENOENT)
-		error = -ENODATA;
-
-	return (error);
-}
-
-#define	XATTR_NOENT	0x0
-#define	XATTR_IN_SA	0x1
-#define	XATTR_IN_DIR	0x2
-/* check where the xattr resides */
-static int
-__zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	int error;
-
-	ASSERT(where);
-	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
-
-	*where = XATTR_NOENT;
-	if (zfsvfs->z_use_sa && zp->z_is_sa) {
-		error = zpl_xattr_get_sa(ip, name, NULL, 0);
-		if (error >= 0)
-			*where |= XATTR_IN_SA;
-		else if (error != -ENOENT)
-			return (error);
-	}
-
-	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
-	if (error >= 0)
-		*where |= XATTR_IN_DIR;
-	else if (error != -ENOENT)
-		return (error);
-
-	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
-		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
-		    " in both SA and dir", ip, name);
-	if (*where == XATTR_NOENT)
-		error = -ENODATA;
-	else
-		error = 0;
-	return (error);
-}
-
-static int
-zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
-	rw_enter(&zp->z_xattr_lock, RW_READER);
-	error = __zpl_xattr_get(ip, name, value, size, cr);
-	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (error);
-}
-
-static int
-zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
-    size_t size, int flags, cred_t *cr)
-{
-	struct inode *dxip = NULL;
-	struct inode *xip = NULL;
-	vattr_t *vap = NULL;
-	ssize_t wrote;
-	int lookup_flags, error;
-	const int xattr_mode = S_IFREG | 0644;
-	loff_t pos = 0;
-
-	/*
-	 * Lookup the xattr directory.  When we're adding an entry pass
-	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
-	 * When removing an entry this flag is not passed to avoid
-	 * unnecessarily creating a new xattr directory.
-	 */
-	lookup_flags = LOOKUP_XATTR;
-	if (value != NULL)
-		lookup_flags |= CREATE_XATTR_DIR;
-
-	error = -zfs_lookup(ip, NULL, &dxip, lookup_flags, cr, NULL, NULL);
-	if (error)
-		goto out;
-
-	/* Lookup a specific xattr name in the directory */
-	error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
-	if (error && (error != -ENOENT))
-		goto out;
-
-	error = 0;
-
-	/* Remove a specific name xattr when value is set to NULL. */
-	if (value == NULL) {
-		if (xip)
-			error = -zfs_remove(dxip, (char *)name, cr, 0);
-
-		goto out;
-	}
-
-	/* Lookup failed create a new xattr. */
-	if (xip == NULL) {
-		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-		vap->va_mode = xattr_mode;
-		vap->va_mask = ATTR_MODE;
-		vap->va_uid = crgetfsuid(cr);
-		vap->va_gid = crgetfsgid(cr);
-
-		error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip,
-		    cr, 0, NULL);
-		if (error)
-			goto out;
-	}
-
-	ASSERT(xip != NULL);
-
-	error = -zfs_freesp(ITOZ(xip), 0, 0, xattr_mode, TRUE);
-	if (error)
-		goto out;
-
-	wrote = zpl_write_common(xip, value, size, &pos, UIO_SYSSPACE, 0, cr);
-	if (wrote < 0)
-		error = wrote;
-
-out:
-
-	if (error == 0) {
-		ip->i_ctime = current_time(ip);
-		zfs_mark_inode_dirty(ip);
-	}
-
-	if (vap)
-		kmem_free(vap, sizeof (vattr_t));
-
-	if (xip)
-		iput(xip);
-
-	if (dxip)
-		iput(dxip);
-
-	if (error == -ENOENT)
-		error = -ENODATA;
-
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
-    size_t size, int flags, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	nvlist_t *nvl;
-	size_t sa_size;
-	int error = 0;
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_xattr_cached == NULL)
-		error = -zfs_sa_get_xattr(zp);
-	mutex_exit(&zp->z_lock);
-
-	if (error)
-		return (error);
-
-	ASSERT(zp->z_xattr_cached);
-	nvl = zp->z_xattr_cached;
-
-	if (value == NULL) {
-		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
-		if (error == -ENOENT)
-			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
-	} else {
-		/* Limited to 32k to keep nvpair memory allocations small */
-		if (size > DXATTR_MAX_ENTRY_SIZE)
-			return (-EFBIG);
-
-		/* Prevent the DXATTR SA from consuming the entire SA region */
-		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
-		if (error)
-			return (error);
-
-		if (sa_size > DXATTR_MAX_SA_SIZE)
-			return (-EFBIG);
-
-		error = -nvlist_add_byte_array(nvl, name,
-		    (uchar_t *)value, size);
-	}
-
-	/*
-	 * Update the SA for additions, modifications, and removals. On
-	 * error drop the inconsistent cached version of the nvlist, it
-	 * will be reconstructed from the ARC when next accessed.
-	 */
-	if (error == 0)
-		error = -zfs_sa_set_xattr(zp);
-
-	if (error) {
-		nvlist_free(nvl);
-		zp->z_xattr_cached = NULL;
-	}
-
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_xattr_set(struct inode *ip, const char *name, const void *value,
-    size_t size, int flags)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int where;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
-	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
-
-	/*
-	 * Before setting the xattr check to see if it already exists.
-	 * This is done to ensure the following optional flags are honored.
-	 *
-	 *   XATTR_CREATE: fail if xattr already exists
-	 *   XATTR_REPLACE: fail if xattr does not exist
-	 *
-	 * We also want to know if it resides in sa or dir, so we can make
-	 * sure we don't end up with duplicate in both places.
-	 */
-	error = __zpl_xattr_where(ip, name, &where, cr);
-	if (error < 0) {
-		if (error != -ENODATA)
-			goto out;
-		if (flags & XATTR_REPLACE)
-			goto out;
-
-		/* The xattr to be removed already doesn't exist */
-		error = 0;
-		if (value == NULL)
-			goto out;
-	} else {
-		error = -EEXIST;
-		if (flags & XATTR_CREATE)
-			goto out;
-	}
-
-	/* Preferentially store the xattr as a SA for better performance */
-	if (zfsvfs->z_use_sa && zp->z_is_sa &&
-	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
-		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
-		if (error == 0) {
-			/*
-			 * Successfully put into SA, we need to clear the one
-			 * in dir.
-			 */
-			if (where & XATTR_IN_DIR)
-				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
-			goto out;
-		}
-	}
-
-	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
-	/*
-	 * Successfully put into dir, we need to clear the one in SA.
-	 */
-	if (error == 0 && (where & XATTR_IN_SA))
-		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
-out:
-	rw_exit(&ITOZ(ip)->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-/*
- * Extended user attributes
- *
- * "Extended user attributes may be assigned to files and directories for
- * storing arbitrary additional information such as the mime type,
- * character set or encoding of a file.  The access permissions for user
- * attributes are defined by the file permission bits: read permission
- * is required to retrieve the attribute value, and writer permission is
- * required to change it.
- *
- * The file permission bits of regular files and directories are
- * interpreted differently from the file permission bits of special
- * files and symbolic links.  For regular files and directories the file
- * permission bits define access to the file's contents, while for
- * device special files they define access to the device described by
- * the special file.  The file permissions of symbolic links are not
- * used in access checks.  These differences would allow users to
- * consume filesystem resources in a way not controllable by disk quotas
- * for group or world writable special files and directories.
- *
- * For this reason, extended user attributes are allowed only for
- * regular files and directories, and access to extended user attributes
- * is restricted to the owner and to users with appropriate capabilities
- * for directories with the sticky bit set (see the chmod(1) manual page
- * for an explanation of the sticky bit)." - xattr(7)
- *
- * ZFS allows extended user attributes to be disabled administratively
- * by setting the 'xattr=off' property on the dataset.
- */
-static int
-__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
-
-static int
-__zpl_xattr_user_get(struct inode *ip, const char *name,
-    void *value, size_t size)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
-		return (-EOPNOTSUPP);
-
-	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
-
-static int
-__zpl_xattr_user_set(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
-		return (-EOPNOTSUPP);
-
-	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
-
-xattr_handler_t zpl_xattr_user_handler =
-{
-	.prefix	= XATTR_USER_PREFIX,
-	.list	= zpl_xattr_user_list,
-	.get	= zpl_xattr_user_get,
-	.set	= zpl_xattr_user_set,
-};
-
-/*
- * Trusted extended attributes
- *
- * "Trusted extended attributes are visible and accessible only to
- * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
- * class are used to implement mechanisms in user space (i.e., outside
- * the kernel) which keep information in extended attributes to which
- * ordinary processes should not have access." - xattr(7)
- */
-static int
-__zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	return (capable(CAP_SYS_ADMIN));
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
-
-static int
-__zpl_xattr_trusted_get(struct inode *ip, const char *name,
-    void *value, size_t size)
-{
-	char *xattr_name;
-	int error;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return (-EACCES);
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
-
-static int
-__zpl_xattr_trusted_set(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	char *xattr_name;
-	int error;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return (-EACCES);
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
-
-xattr_handler_t zpl_xattr_trusted_handler =
-{
-	.prefix	= XATTR_TRUSTED_PREFIX,
-	.list	= zpl_xattr_trusted_list,
-	.get	= zpl_xattr_trusted_get,
-	.set	= zpl_xattr_trusted_set,
-};
-
-/*
- * Extended security attributes
- *
- * "The security attribute namespace is used by kernel security modules,
- * such as Security Enhanced Linux, and also to implement file
- * capabilities (see capabilities(7)).  Read and write access
- * permissions to security attributes depend on the policy implemented
- * for each security attribute by the security module.  When no security
- * module is loaded, all processes have read access to extended security
- * attributes, and write access is limited to processes that have the
- * CAP_SYS_ADMIN capability." - xattr(7)
- */
-static int
-__zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	return (1);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
-
-static int
-__zpl_xattr_security_get(struct inode *ip, const char *name,
-    void *value, size_t size)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
-
-static int
-__zpl_xattr_security_set(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
-
-#ifdef HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY
-static int
-__zpl_xattr_security_init(struct inode *ip, const struct xattr *xattrs,
-    void *fs_info)
-{
-	const struct xattr *xattr;
-	int error = 0;
-
-	for (xattr = xattrs; xattr->name != NULL; xattr++) {
-		error = __zpl_xattr_security_set(ip,
-		    xattr->name, xattr->value, xattr->value_len, 0);
-
-		if (error < 0)
-			break;
-	}
-
-	return (error);
-}
-
-int
-zpl_xattr_security_init(struct inode *ip, struct inode *dip,
-    const struct qstr *qstr)
-{
-	return security_inode_init_security(ip, dip, qstr,
-	    &__zpl_xattr_security_init, NULL);
-}
-
-#else
-int
-zpl_xattr_security_init(struct inode *ip, struct inode *dip,
-    const struct qstr *qstr)
-{
-	int error;
-	size_t len;
-	void *value;
-	char *name;
-
-	error = zpl_security_inode_init_security(ip, dip, qstr,
-	    &name, &value, &len);
-	if (error) {
-		if (error == -EOPNOTSUPP)
-			return (0);
-
-		return (error);
-	}
-
-	error = __zpl_xattr_security_set(ip, name, value, len, 0);
-
-	kfree(name);
-	kfree(value);
-
-	return (error);
-}
-#endif /* HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY */
-
-/*
- * Security xattr namespace handlers.
- */
-xattr_handler_t zpl_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.list	= zpl_xattr_security_list,
-	.get	= zpl_xattr_security_get,
-	.set	= zpl_xattr_security_set,
-};
-
-/*
- * Extended system attributes
- *
- * "Extended system attributes are used by the kernel to store system
- * objects such as Access Control Lists.  Read and write access permissions
- * to system attributes depend on the policy implemented for each system
- * attribute implemented by filesystems in the kernel." - xattr(7)
- */
-#ifdef CONFIG_FS_POSIX_ACL
-int
-zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
-{
-	char *name, *value = NULL;
-	int error = 0;
-	size_t size = 0;
-
-	if (S_ISLNK(ip->i_mode))
-		return (-EOPNOTSUPP);
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		name = XATTR_NAME_POSIX_ACL_ACCESS;
-		if (acl) {
-			zpl_equivmode_t mode = ip->i_mode;
-			error = posix_acl_equiv_mode(acl, &mode);
-			if (error < 0) {
-				return (error);
-			} else {
-				/*
-				 * The mode bits will have been set by
-				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
-				 * using the ZFS ACL conversion.  If they
-				 * differ from the Posix ACL conversion dirty
-				 * the inode to write the Posix mode bits.
-				 */
-				if (ip->i_mode != mode) {
-					ip->i_mode = mode;
-					ip->i_ctime = current_time(ip);
-					zfs_mark_inode_dirty(ip);
-				}
-
-				if (error == 0)
-					acl = NULL;
-			}
-		}
-		break;
-
-	case ACL_TYPE_DEFAULT:
-		name = XATTR_NAME_POSIX_ACL_DEFAULT;
-		if (!S_ISDIR(ip->i_mode))
-			return (acl ? -EACCES : 0);
-		break;
-
-	default:
-		return (-EINVAL);
-	}
-
-	if (acl) {
-		size = posix_acl_xattr_size(acl->a_count);
-		value = kmem_alloc(size, KM_SLEEP);
-
-		error = zpl_acl_to_xattr(acl, value, size);
-		if (error < 0) {
-			kmem_free(value, size);
-			return (error);
-		}
-	}
-
-	error = zpl_xattr_set(ip, name, value, size, 0);
-	if (value)
-		kmem_free(value, size);
-
-	if (!error) {
-		if (acl)
-			zpl_set_cached_acl(ip, type, acl);
-		else
-			zpl_forget_cached_acl(ip, type);
-	}
-
-	return (error);
-}
-
-struct posix_acl *
-zpl_get_acl(struct inode *ip, int type)
-{
-	struct posix_acl *acl;
-	void *value = NULL;
-	char *name;
-	int size;
-
-	/*
-	 * As of Linux 3.14, the kernel get_acl will check this for us.
-	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
-	 * as the kernel get_acl will set it to temporary sentinel value.
-	 */
-#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
-	acl = get_cached_acl(ip, type);
-	if (acl != ACL_NOT_CACHED)
-		return (acl);
-#endif
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		name = XATTR_NAME_POSIX_ACL_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		name = XATTR_NAME_POSIX_ACL_DEFAULT;
-		break;
-	default:
-		return (ERR_PTR(-EINVAL));
-	}
-
-	size = zpl_xattr_get(ip, name, NULL, 0);
-	if (size > 0) {
-		value = kmem_alloc(size, KM_SLEEP);
-		size = zpl_xattr_get(ip, name, value, size);
-	}
-
-	if (size > 0) {
-		acl = zpl_acl_from_xattr(value, size);
-	} else if (size == -ENODATA || size == -ENOSYS) {
-		acl = NULL;
-	} else {
-		acl = ERR_PTR(-EIO);
-	}
-
-	if (size > 0)
-		kmem_free(value, size);
-
-	/* As of Linux 4.7, the kernel get_acl will set this for us */
-#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
-	if (!IS_ERR(acl))
-		zpl_set_cached_acl(ip, type, acl);
-#endif
-
-	return (acl);
-}
-
-#if !defined(HAVE_GET_ACL)
-static int
-__zpl_check_acl(struct inode *ip, int mask)
-{
-	struct posix_acl *acl;
-	int error;
-
-	acl = zpl_get_acl(ip, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl))
-		return (PTR_ERR(acl));
-
-	if (acl) {
-		error = posix_acl_permission(ip, acl, mask);
-		zpl_posix_acl_release(acl);
-		return (error);
-	}
-
-	return (-EAGAIN);
-}
-
-#if defined(HAVE_CHECK_ACL_WITH_FLAGS)
-int
-zpl_check_acl(struct inode *ip, int mask, unsigned int flags)
-{
-	return (__zpl_check_acl(ip, mask));
-}
-#elif defined(HAVE_CHECK_ACL)
-int
-zpl_check_acl(struct inode *ip, int mask)
-{
-	return (__zpl_check_acl(ip, mask));
-}
-#elif defined(HAVE_PERMISSION_WITH_NAMEIDATA)
-int
-zpl_permission(struct inode *ip, int mask, struct nameidata *nd)
-{
-	return (generic_permission(ip, mask, __zpl_check_acl));
-}
-#elif defined(HAVE_PERMISSION)
-int
-zpl_permission(struct inode *ip, int mask)
-{
-	return (generic_permission(ip, mask, __zpl_check_acl));
-}
-#endif /* HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* !HAVE_GET_ACL */
-
-int
-zpl_init_acl(struct inode *ip, struct inode *dir)
-{
-	struct posix_acl *acl = NULL;
-	int error = 0;
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (!S_ISLNK(ip->i_mode)) {
-		acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT);
-		if (IS_ERR(acl))
-			return (PTR_ERR(acl));
-		if (!acl) {
-			ip->i_mode &= ~current_umask();
-			ip->i_ctime = current_time(ip);
-			zfs_mark_inode_dirty(ip);
-			return (0);
-		}
-	}
-
-	if (acl) {
-		umode_t mode;
-
-		if (S_ISDIR(ip->i_mode)) {
-			error = zpl_set_acl(ip, acl, ACL_TYPE_DEFAULT);
-			if (error)
-				goto out;
-		}
-
-		mode = ip->i_mode;
-		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
-		if (error >= 0) {
-			ip->i_mode = mode;
-			zfs_mark_inode_dirty(ip);
-			if (error > 0)
-				error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS);
-		}
-	}
-out:
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-
-int
-zpl_chmod_acl(struct inode *ip)
-{
-	struct posix_acl *acl;
-	int error;
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (S_ISLNK(ip->i_mode))
-		return (-EOPNOTSUPP);
-
-	acl = zpl_get_acl(ip, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl) || !acl)
-		return (PTR_ERR(acl));
-
-	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
-	if (!error)
-		error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS);
-
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-
-static int
-__zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
-	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (list && xattr_size <= list_size)
-		memcpy(list, xattr_name, xattr_size);
-
-	return (xattr_size);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
-
-static int
-__zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
-	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (list && xattr_size <= list_size)
-		memcpy(list, xattr_name, xattr_size);
-
-	return (xattr_size);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
-
-static int
-__zpl_xattr_acl_get_access(struct inode *ip, const char *name,
-    void *buffer, size_t size)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_ACCESS;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	acl = zpl_get_acl(ip, type);
-	if (IS_ERR(acl))
-		return (PTR_ERR(acl));
-	if (acl == NULL)
-		return (-ENODATA);
-
-	error = zpl_acl_to_xattr(acl, buffer, size);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
-
-static int
-__zpl_xattr_acl_get_default(struct inode *ip, const char *name,
-    void *buffer, size_t size)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_DEFAULT;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	acl = zpl_get_acl(ip, type);
-	if (IS_ERR(acl))
-		return (PTR_ERR(acl));
-	if (acl == NULL)
-		return (-ENODATA);
-
-	error = zpl_acl_to_xattr(acl, buffer, size);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
-
-static int
-__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_ACCESS;
-	int error = 0;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	if (!zpl_inode_owner_or_capable(ip))
-		return (-EPERM);
-
-	if (value) {
-		acl = zpl_acl_from_xattr(value, size);
-		if (IS_ERR(acl))
-			return (PTR_ERR(acl));
-		else if (acl) {
-			error = zpl_posix_acl_valid(ip, acl);
-			if (error) {
-				zpl_posix_acl_release(acl);
-				return (error);
-			}
-		}
-	} else {
-		acl = NULL;
-	}
-
-	error = zpl_set_acl(ip, acl, type);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
-
-static int
-__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_DEFAULT;
-	int error = 0;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	if (!zpl_inode_owner_or_capable(ip))
-		return (-EPERM);
-
-	if (value) {
-		acl = zpl_acl_from_xattr(value, size);
-		if (IS_ERR(acl))
-			return (PTR_ERR(acl));
-		else if (acl) {
-			error = zpl_posix_acl_valid(ip, acl);
-			if (error) {
-				zpl_posix_acl_release(acl);
-				return (error);
-			}
-		}
-	} else {
-		acl = NULL;
-	}
-
-	error = zpl_set_acl(ip, acl, type);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
-
-/*
- * ACL access xattr namespace handlers.
- *
- * Use .name instead of .prefix when available. xattr_resolve_name will match
- * whole name and reject anything that has .name only as prefix.
- */
-xattr_handler_t zpl_xattr_acl_access_handler =
-{
-#ifdef HAVE_XATTR_HANDLER_NAME
-	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
-#else
-	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
-#endif
-	.list	= zpl_xattr_acl_list_access,
-	.get	= zpl_xattr_acl_get_access,
-	.set	= zpl_xattr_acl_set_access,
-#if defined(HAVE_XATTR_LIST_SIMPLE) || \
-    defined(HAVE_XATTR_LIST_DENTRY) || \
-    defined(HAVE_XATTR_LIST_HANDLER)
-	.flags	= ACL_TYPE_ACCESS,
-#endif
-};
-
-/*
- * ACL default xattr namespace handlers.
- *
- * Use .name instead of .prefix when available. xattr_resolve_name will match
- * whole name and reject anything that has .name only as prefix.
- */
-xattr_handler_t zpl_xattr_acl_default_handler =
-{
-#ifdef HAVE_XATTR_HANDLER_NAME
-	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
-#else
-	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
-#endif
-	.list	= zpl_xattr_acl_list_default,
-	.get	= zpl_xattr_acl_get_default,
-	.set	= zpl_xattr_acl_set_default,
-#if defined(HAVE_XATTR_LIST_SIMPLE) || \
-    defined(HAVE_XATTR_LIST_DENTRY) || \
-    defined(HAVE_XATTR_LIST_HANDLER)
-	.flags	= ACL_TYPE_DEFAULT,
-#endif
-};
-
-#endif /* CONFIG_FS_POSIX_ACL */
-
-xattr_handler_t *zpl_xattr_handlers[] = {
-	&zpl_xattr_security_handler,
-	&zpl_xattr_trusted_handler,
-	&zpl_xattr_user_handler,
-#ifdef CONFIG_FS_POSIX_ACL
-	&zpl_xattr_acl_access_handler,
-	&zpl_xattr_acl_default_handler,
-#endif /* CONFIG_FS_POSIX_ACL */
-	NULL
-};
-
-static const struct xattr_handler *
-zpl_xattr_handler(const char *name)
-{
-	if (strncmp(name, XATTR_USER_PREFIX,
-	    XATTR_USER_PREFIX_LEN) == 0)
-		return (&zpl_xattr_user_handler);
-
-	if (strncmp(name, XATTR_TRUSTED_PREFIX,
-	    XATTR_TRUSTED_PREFIX_LEN) == 0)
-		return (&zpl_xattr_trusted_handler);
-
-	if (strncmp(name, XATTR_SECURITY_PREFIX,
-	    XATTR_SECURITY_PREFIX_LEN) == 0)
-		return (&zpl_xattr_security_handler);
-
-#ifdef CONFIG_FS_POSIX_ACL
-	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
-	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
-		return (&zpl_xattr_acl_access_handler);
-
-	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
-	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
-		return (&zpl_xattr_acl_default_handler);
-#endif /* CONFIG_FS_POSIX_ACL */
-
-	return (NULL);
-}
-
-#if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
-struct acl_rel_struct {
-	struct acl_rel_struct *next;
-	struct posix_acl *acl;
-	clock_t time;
-};
-
-#define	ACL_REL_GRACE	(60*HZ)
-#define	ACL_REL_WINDOW	(1*HZ)
-#define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
-
-/*
- * Lockless multi-producer single-consumer fifo list.
- * Nodes are added to tail and removed from head. Tail pointer is our
- * synchronization point. It always points to the next pointer of the last
- * node, or head if list is empty.
- */
-static struct acl_rel_struct *acl_rel_head = NULL;
-static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
-
-static void
-zpl_posix_acl_free(void *arg)
-{
-	struct acl_rel_struct *freelist = NULL;
-	struct acl_rel_struct *a;
-	clock_t new_time;
-	boolean_t refire = B_FALSE;
-
-	ASSERT3P(acl_rel_head, !=, NULL);
-	while (acl_rel_head) {
-		a = acl_rel_head;
-		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
-			/*
-			 * If a is the last node we need to reset tail, but we
-			 * need to use cmpxchg to make sure it is still the
-			 * last node.
-			 */
-			if (acl_rel_tail == &a->next) {
-				acl_rel_head = NULL;
-				if (cmpxchg(&acl_rel_tail, &a->next,
-				    &acl_rel_head) == &a->next) {
-					ASSERT3P(a->next, ==, NULL);
-					a->next = freelist;
-					freelist = a;
-					break;
-				}
-			}
-			/*
-			 * a is not last node, make sure next pointer is set
-			 * by the adder and advance the head.
-			 */
-			while (READ_ONCE(a->next) == NULL)
-				cpu_relax();
-			acl_rel_head = a->next;
-			a->next = freelist;
-			freelist = a;
-		} else {
-			/*
-			 * a is still in grace period. We are responsible to
-			 * reschedule the free task, since adder will only do
-			 * so if list is empty.
-			 */
-			new_time = a->time + ACL_REL_SCHED;
-			refire = B_TRUE;
-			break;
-		}
-	}
-
-	if (refire)
-		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
-		    NULL, TQ_SLEEP, new_time);
-
-	while (freelist) {
-		a = freelist;
-		freelist = a->next;
-		kfree(a->acl);
-		kmem_free(a, sizeof (struct acl_rel_struct));
-	}
-}
-
-void
-zpl_posix_acl_release_impl(struct posix_acl *acl)
-{
-	struct acl_rel_struct *a, **prev;
-
-	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
-	a->next = NULL;
-	a->acl = acl;
-	a->time = ddi_get_lbolt();
-	/* atomically points tail to us and get the previous tail */
-	prev = xchg(&acl_rel_tail, &a->next);
-	ASSERT3P(*prev, ==, NULL);
-	*prev = a;
-	/* if it was empty before, schedule the free task */
-	if (prev == &acl_rel_head)
-		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
-		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
-}
-#endif

diff --git a/zfs/module/zfs/zrlock.c b/zfs/module/zfs/zrlock.c
index 014a5cc..8b6755b 100644
--- a/zfs/module/zfs/zrlock.c
+++ b/zfs/module/zfs/zrlock.c

@@ -39,7 +39,7 @@
  * function calls.
  */
 #include <sys/zrlock.h>
-#include <sys/trace_zrlock.h>
+#include <sys/trace_zfs.h>
 
 /*
  * A ZRL can be locked only while there are zero references, so ZRL_LOCKED is
@@ -106,16 +106,16 @@
 void
 zrl_remove(zrlock_t *zrl)
 {
-	uint32_t n;
-
 #ifdef	ZFS_DEBUG
 	if (zrl->zr_owner == curthread) {
 		zrl->zr_owner = NULL;
 		zrl->zr_caller = NULL;
 	}
+	int32_t n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount);
+	ASSERT3S(n, >=, 0);
+#else
+	atomic_dec_32((uint32_t *)&zrl->zr_refcount);
 #endif
-	n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount);
-	ASSERT3S((int32_t)n, >=, 0);
 }
 
 int
@@ -157,15 +157,6 @@
 }
 
 int
-zrl_refcount(zrlock_t *zrl)
-{
-	ASSERT3S(zrl->zr_refcount, >, ZRL_DESTROYED);
-
-	int n = (int)zrl->zr_refcount;
-	return (n <= 0 ? 0 : n);
-}
-
-int
 zrl_is_zero(zrlock_t *zrl)
 {
 	ASSERT3S(zrl->zr_refcount, >, ZRL_DESTROYED);

diff --git a/zfs/module/zfs/zthr.c b/zfs/module/zfs/zthr.c
index 532e8ce..52ddffa 100644
--- a/zfs/module/zfs/zthr.c
+++ b/zfs/module/zfs/zthr.c

@@ -14,7 +14,7 @@
  */
 
 /*
- * Copyright (c) 2017, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2017, 2020 by Delphix. All rights reserved.
  */
 
 /*
@@ -56,7 +56,7 @@
  *
  * == ZTHR creation
  *
- * Every zthr needs three inputs to start running:
+ * Every zthr needs four inputs to start running:
  *
  * 1] A user-defined checker function (checkfunc) that decides whether
  *    the zthr should start working or go to sleep. The function should
@@ -72,6 +72,9 @@
  * 3] A void args pointer that will be passed to checkfunc and func
  *    implicitly by the infrastructure.
  *
+ * 4] A name for the thread. This string must be valid for the lifetime
+ *    of the zthr.
+ *
  * The reason why the above API needs two different functions,
  * instead of one that both checks and does the work, has to do with
  * the zthr's internal state lock (zthr_state_lock) and the allowed
@@ -80,10 +83,11 @@
  * can be cancelled while doing work and not while checking for work.
  *
  * To start a zthr:
- *     zthr_t *zthr_pointer = zthr_create(checkfunc, func, args);
+ *     zthr_t *zthr_pointer = zthr_create(checkfunc, func, args,
+ *         pri);
  * or
  *     zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func,
- *         args, max_sleep);
+ *         args, max_sleep, pri);
  *
  * After that you should be able to wakeup, cancel, and resume the
  * zthr from another thread using the zthr_pointer.
@@ -207,17 +211,24 @@
 	/* flag set to true if we are canceling the zthr */
 	boolean_t	zthr_cancel;
 
+	/* flag set to true if we are waiting for the zthr to finish */
+	boolean_t	zthr_haswaiters;
+	kcondvar_t	zthr_wait_cv;
 	/*
 	 * maximum amount of time that the zthr is spent sleeping;
 	 * if this is 0, the thread doesn't wake up until it gets
 	 * signaled.
 	 */
-	hrtime_t	zthr_wait_time;
+	hrtime_t	zthr_sleep_timeout;
+
+	/* Thread priority */
+	pri_t		zthr_pri;
 
 	/* consumer-provided callbacks & data */
 	zthr_checkfunc_t	*zthr_checkfunc;
 	zthr_func_t	*zthr_func;
 	void		*zthr_arg;
+	const char	*zthr_name;
 };
 
 static void
@@ -234,19 +245,18 @@
 			t->zthr_func(t->zthr_arg, t);
 			mutex_enter(&t->zthr_state_lock);
 		} else {
-			/*
-			 * cv_wait_sig() is used instead of cv_wait() in
-			 * order to prevent this process from incorrectly
-			 * contributing to the system load average when idle.
-			 */
-			if (t->zthr_wait_time == 0) {
-				cv_wait_sig(&t->zthr_cv, &t->zthr_state_lock);
+			if (t->zthr_sleep_timeout == 0) {
+				cv_wait_idle(&t->zthr_cv, &t->zthr_state_lock);
 			} else {
-				(void) cv_timedwait_sig_hires(&t->zthr_cv,
-				    &t->zthr_state_lock, t->zthr_wait_time,
+				(void) cv_timedwait_idle_hires(&t->zthr_cv,
+				    &t->zthr_state_lock, t->zthr_sleep_timeout,
 				    MSEC2NSEC(1), 0);
 			}
 		}
+		if (t->zthr_haswaiters) {
+			t->zthr_haswaiters = B_FALSE;
+			cv_broadcast(&t->zthr_wait_cv);
+		}
 	}
 
 	/*
@@ -262,9 +272,11 @@
 }
 
 zthr_t *
-zthr_create(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg)
+zthr_create(const char *zthr_name, zthr_checkfunc_t *checkfunc,
+    zthr_func_t *func, void *arg, pri_t pri)
 {
-	return (zthr_create_timer(checkfunc, func, arg, (hrtime_t)0));
+	return (zthr_create_timer(zthr_name, checkfunc,
+	    func, arg, (hrtime_t)0, pri));
 }
 
 /*
@@ -273,22 +285,26 @@
  * start working if required) will be triggered.
  */
 zthr_t *
-zthr_create_timer(zthr_checkfunc_t *checkfunc, zthr_func_t *func,
-    void *arg, hrtime_t max_sleep)
+zthr_create_timer(const char *zthr_name, zthr_checkfunc_t *checkfunc,
+    zthr_func_t *func, void *arg, hrtime_t max_sleep, pri_t pri)
 {
 	zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
 	mutex_init(&t->zthr_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&t->zthr_request_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&t->zthr_wait_cv, NULL, CV_DEFAULT, NULL);
 
 	mutex_enter(&t->zthr_state_lock);
 	t->zthr_checkfunc = checkfunc;
 	t->zthr_func = func;
 	t->zthr_arg = arg;
-	t->zthr_wait_time = max_sleep;
+	t->zthr_sleep_timeout = max_sleep;
+	t->zthr_name = zthr_name;
+	t->zthr_pri = pri;
 
-	t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
-	    0, &p0, TS_RUN, minclsyspri);
+	t->zthr_thread = thread_create_named(zthr_name, NULL, 0,
+	    zthr_procedure, t, 0, &p0, TS_RUN, pri);
+
 	mutex_exit(&t->zthr_state_lock);
 
 	return (t);
@@ -303,6 +319,7 @@
 	mutex_destroy(&t->zthr_request_lock);
 	mutex_destroy(&t->zthr_state_lock);
 	cv_destroy(&t->zthr_cv);
+	cv_destroy(&t->zthr_wait_cv);
 	kmem_free(t, sizeof (*t));
 }
 
@@ -355,9 +372,8 @@
 	 *
 	 * [1] The thread has already been cancelled, therefore
 	 *     there is nothing for us to do.
-	 * [2] The thread is sleeping, so we broadcast the CV first
-	 *     to wake it up and then we set the flag and we are
-	 *     waiting for it to exit.
+	 * [2] The thread is sleeping so we set the flag, broadcast
+	 *     the CV and wait for it to exit.
 	 * [3] The thread is doing work, in which case we just set
 	 *     the flag and wait for it to finish.
 	 * [4] The thread was just created/resumed, in which case
@@ -397,6 +413,7 @@
 	ASSERT3P(&t->zthr_checkfunc, !=, NULL);
 	ASSERT3P(&t->zthr_func, !=, NULL);
 	ASSERT(!t->zthr_cancel);
+	ASSERT(!t->zthr_haswaiters);
 
 	/*
 	 * There are 4 states that we find the zthr in at this point
@@ -410,8 +427,8 @@
 	 *     no-op.
 	 */
 	if (t->zthr_thread == NULL) {
-		t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
-		    0, &p0, TS_RUN, minclsyspri);
+		t->zthr_thread = thread_create_named(t->zthr_name, NULL, 0,
+		    zthr_procedure, t, 0, &p0, TS_RUN, t->zthr_pri);
 	}
 
 	mutex_exit(&t->zthr_state_lock);
@@ -451,3 +468,80 @@
 	mutex_exit(&t->zthr_state_lock);
 	return (cancelled);
 }
+
+boolean_t
+zthr_iscurthread(zthr_t *t)
+{
+	return (t->zthr_thread == curthread);
+}
+
+/*
+ * Wait for the zthr to finish its current function. Similar to
+ * zthr_iscancelled, you can use zthr_has_waiters to have the zthr_func end
+ * early. Unlike zthr_cancel, the thread is not destroyed. If the zthr was
+ * sleeping or cancelled, return immediately.
+ */
+void
+zthr_wait_cycle_done(zthr_t *t)
+{
+	mutex_enter(&t->zthr_state_lock);
+
+	/*
+	 * Since we are holding the zthr_state_lock at this point
+	 * we can find the state in one of the following 5 states:
+	 *
+	 * [1] The thread has already cancelled, therefore
+	 *     there is nothing for us to do.
+	 * [2] The thread is sleeping so we set the flag, broadcast
+	 *     the CV and wait for it to exit.
+	 * [3] The thread is doing work, in which case we just set
+	 *     the flag and wait for it to finish.
+	 * [4] The thread was just created/resumed, in which case
+	 *     the behavior is similar to [3].
+	 * [5] The thread is the middle of being cancelled, which is
+	 *     similar to [3]. We'll wait for the cancel, which is
+	 *     waiting for the zthr func.
+	 *
+	 * Since requests are serialized, by the time that we get
+	 * control back we expect that the zthr has completed it's
+	 * zthr_func.
+	 */
+	if (t->zthr_thread != NULL) {
+		t->zthr_haswaiters = B_TRUE;
+
+		/* broadcast in case the zthr is sleeping */
+		cv_broadcast(&t->zthr_cv);
+
+		while ((t->zthr_haswaiters) && (t->zthr_thread != NULL))
+			cv_wait(&t->zthr_wait_cv, &t->zthr_state_lock);
+
+		ASSERT(!t->zthr_haswaiters);
+	}
+
+	mutex_exit(&t->zthr_state_lock);
+}
+
+/*
+ * This function is intended to be used by the zthr itself
+ * to check if another thread is waiting on it to finish
+ *
+ * returns TRUE if we have been asked to finish.
+ *
+ * returns FALSE otherwise.
+ */
+boolean_t
+zthr_has_waiters(zthr_t *t)
+{
+	ASSERT3P(t->zthr_thread, ==, curthread);
+
+	mutex_enter(&t->zthr_state_lock);
+
+	/*
+	 * Similarly to zthr_iscancelled(), we only grab the
+	 * zthr_state_lock so that the zthr itself can use this
+	 * to check for the request.
+	 */
+	boolean_t has_waiters = t->zthr_haswaiters;
+	mutex_exit(&t->zthr_state_lock);
+	return (has_waiters);
+}

diff --git a/zfs/module/zfs/zvol.c b/zfs/module/zfs/zvol.c
index a559029..7d141a1 100644
--- a/zfs/module/zfs/zvol.c
+++ b/zfs/module/zfs/zvol.c

@@ -68,10 +68,6 @@
  * allocated and placed on zvol_state_list, and then other minor operations
  * for this zvol are going to proceed in the order of issue.
  *
- * It is also worth keeping in mind that once add_disk() is called, the zvol is
- * announced to the world, and zvol_open()/zvol_release() can be called at any
- * time. Incidentally, add_disk() itself calls zvol_open()->zvol_first_open()
- * and zvol_release()->zvol_last_close() directly as well.
  */
 
 #include <sys/dataset_kstats.h>
@@ -88,56 +84,17 @@
 #include <sys/zfs_rlock.h>
 #include <sys/spa_impl.h>
 #include <sys/zvol.h>
-
-#include <linux/blkdev_compat.h>
-#include <linux/task_io_accounting_ops.h>
+#include <sys/zvol_impl.h>
 
 unsigned int zvol_inhibit_dev = 0;
-unsigned int zvol_major = ZVOL_MAJOR;
-unsigned int zvol_threads = 32;
-unsigned int zvol_request_sync = 0;
-unsigned int zvol_prefetch_bytes = (128 * 1024);
-unsigned long zvol_max_discard_blocks = 16384;
 unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
 
-static taskq_t *zvol_taskq;
-static krwlock_t zvol_state_lock;
-static list_t zvol_state_list;
-
-#define	ZVOL_HT_SIZE	1024
-static struct hlist_head *zvol_htable;
-#define	ZVOL_HT_HEAD(hash)	(&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
-
-static struct ida zvol_ida;
-
-/*
- * The in-core state of each volume.
- */
-struct zvol_state {
-	char			zv_name[MAXNAMELEN];	/* name */
-	uint64_t		zv_volsize;		/* advertised space */
-	uint64_t		zv_volblocksize;	/* volume block size */
-	objset_t		*zv_objset;	/* objset handle */
-	uint32_t		zv_flags;	/* ZVOL_* flags */
-	uint32_t		zv_open_count;	/* open counts */
-	uint32_t		zv_changed;	/* disk changed */
-	zilog_t			*zv_zilog;	/* ZIL handle */
-	zfs_rangelock_t		zv_rangelock;	/* for range locking */
-	dnode_t			*zv_dn;		/* dnode hold */
-	dev_t			zv_dev;		/* device id */
-	struct gendisk		*zv_disk;	/* generic disk */
-	struct request_queue	*zv_queue;	/* request queue */
-	dataset_kstats_t	zv_kstat;	/* zvol kstats */
-	list_node_t		zv_next;	/* next zvol_state_t linkage */
-	uint64_t		zv_hash;	/* name hash */
-	struct hlist_node	zv_hlink;	/* hash link */
-	kmutex_t		zv_state_lock;	/* protects zvol_state_t */
-	atomic_t		zv_suspend_ref;	/* refcount for suspend */
-	krwlock_t		zv_suspend_lock;	/* suspend lock */
-};
+struct hlist_head *zvol_htable;
+list_t zvol_state_list;
+krwlock_t zvol_state_lock;
+const zvol_platform_ops_t *ops;
 
 typedef enum {
-	ZVOL_ASYNC_CREATE_MINORS,
 	ZVOL_ASYNC_REMOVE_MINORS,
 	ZVOL_ASYNC_RENAME_MINORS,
 	ZVOL_ASYNC_SET_SNAPDEV,
@@ -147,26 +104,17 @@
 
 typedef struct {
 	zvol_async_op_t op;
-	char pool[MAXNAMELEN];
 	char name1[MAXNAMELEN];
 	char name2[MAXNAMELEN];
-	zprop_source_t source;
 	uint64_t value;
 } zvol_task_t;
 
-#define	ZVOL_RDONLY	0x1
-/*
- * Whether the zvol has been written to (as opposed to ZVOL_RDONLY, which
- * specifies whether or not the zvol _can_ be written to)
- */
-#define	ZVOL_WRITTEN_TO	0x2
-
-static uint64_t
+uint64_t
 zvol_name_hash(const char *name)
 {
 	int i;
 	uint64_t crc = -1ULL;
-	uint8_t *p = (uint8_t *)name;
+	const uint8_t *p = (const uint8_t *)name;
 	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
 	for (i = 0; i < MAXNAMELEN - 1 && *p; i++, p++) {
 		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (*p)) & 0xFF];
@@ -175,38 +123,13 @@
 }
 
 /*
- * Find a zvol_state_t given the full major+minor dev_t. If found,
- * return with zv_state_lock taken, otherwise, return (NULL) without
- * taking zv_state_lock.
- */
-static zvol_state_t *
-zvol_find_by_dev(dev_t dev)
-{
-	zvol_state_t *zv;
-
-	rw_enter(&zvol_state_lock, RW_READER);
-	for (zv = list_head(&zvol_state_list); zv != NULL;
-	    zv = list_next(&zvol_state_list, zv)) {
-		mutex_enter(&zv->zv_state_lock);
-		if (zv->zv_dev == dev) {
-			rw_exit(&zvol_state_lock);
-			return (zv);
-		}
-		mutex_exit(&zv->zv_state_lock);
-	}
-	rw_exit(&zvol_state_lock);
-
-	return (NULL);
-}
-
-/*
  * Find a zvol_state_t given the name and hash generated by zvol_name_hash.
  * If found, return with zv_suspend_lock and zv_state_lock taken, otherwise,
  * return (NULL) without the taking locks. The zv_suspend_lock is always taken
  * before zv_state_lock. The mode argument indicates the mode (including none)
  * for zv_suspend_lock to be taken.
  */
-static zvol_state_t *
+zvol_state_t *
 zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
 {
 	zvol_state_t *zv;
@@ -258,29 +181,6 @@
 	return (zvol_find_by_name_hash(name, zvol_name_hash(name), mode));
 }
 
-
-/*
- * Given a path, return TRUE if path is a ZVOL.
- */
-boolean_t
-zvol_is_zvol(const char *device)
-{
-	struct block_device *bdev;
-	unsigned int major;
-
-	bdev = vdev_lookup_bdev(device);
-	if (IS_ERR(bdev))
-		return (B_FALSE);
-
-	major = MAJOR(bdev->bd_dev);
-	bdput(bdev);
-
-	if (major == zvol_major)
-		return (B_TRUE);
-
-	return (B_FALSE);
-}
-
 /*
  * ZFS_IOC_CREATE callback handles dmu zvol and zap object creation.
  */
@@ -407,7 +307,6 @@
 zvol_set_volsize(const char *name, uint64_t volsize)
 {
 	objset_t *os = NULL;
-	struct gendisk *disk = NULL;
 	uint64_t readonly;
 	int error;
 	boolean_t owned = B_FALSE;
@@ -450,7 +349,6 @@
 	if (error == 0 && zv != NULL) {
 		zv->zv_volsize = volsize;
 		zv->zv_changed = 1;
-		disk = zv->zv_disk;
 	}
 out:
 	kmem_free(doi, sizeof (dmu_object_info_t));
@@ -466,8 +364,8 @@
 	if (zv != NULL)
 		mutex_exit(&zv->zv_state_lock);
 
-	if (disk != NULL)
-		revalidate_disk(disk);
+	if (error == 0 && zv != NULL)
+		ops->zv_update_volsize(zv, volsize);
 
 	return (SET_ERROR(error));
 }
@@ -571,7 +469,19 @@
 	offset = lr->lr_offset;
 	length = lr->lr_length;
 
-	return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
+	dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
+	dmu_tx_mark_netfree(tx);
+	int error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error != 0) {
+		dmu_tx_abort(tx);
+	} else {
+		zil_replaying(zv->zv_zilog, tx);
+		dmu_tx_commit(tx);
+		error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset,
+		    length);
+	}
+
+	return (error);
 }
 
 /*
@@ -611,6 +521,7 @@
 		dmu_tx_abort(tx);
 	} else {
 		dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
+		zil_replaying(zv->zv_zilog, tx);
 		dmu_tx_commit(tx);
 	}
 
@@ -657,13 +568,14 @@
  */
 ssize_t zvol_immediate_write_sz = 32768;
 
-static void
+void
 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
     uint64_t size, int sync)
 {
 	uint32_t blocksize = zv->zv_volblocksize;
 	zilog_t *zilog = zv->zv_zilog;
 	itx_wr_state_t write_state;
+	uint64_t sz = size;
 
 	if (zil_replaying(zilog, tx))
 		return;
@@ -715,94 +627,16 @@
 		offset += len;
 		size -= len;
 	}
-}
 
-typedef struct zv_request {
-	zvol_state_t	*zv;
-	struct bio	*bio;
-	zfs_locked_range_t	*lr;
-} zv_request_t;
-
-static void
-uio_from_bio(uio_t *uio, struct bio *bio)
-{
-	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
-	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
-	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
-	uio->uio_segflg = UIO_BVEC;
-	uio->uio_limit = MAXOFFSET_T;
-	uio->uio_resid = BIO_BI_SIZE(bio);
-	uio->uio_skip = BIO_BI_SKIP(bio);
-}
-
-static void
-zvol_write(void *arg)
-{
-	int error = 0;
-
-	zv_request_t *zvr = arg;
-	struct bio *bio = zvr->bio;
-	uio_t uio = { { 0 }, 0 };
-	uio_from_bio(&uio, bio);
-
-	zvol_state_t *zv = zvr->zv;
-	ASSERT(zv && zv->zv_open_count > 0);
-	ASSERT(zv->zv_zilog != NULL);
-
-	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_jif = jiffies;
-	blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio),
-	    &zv->zv_disk->part0);
-
-	boolean_t sync =
-	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
-
-	uint64_t volsize = zv->zv_volsize;
-	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
-		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
-		uint64_t off = uio.uio_loffset;
-		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
-
-		if (bytes > volsize - off)	/* don't write past the end */
-			bytes = volsize - off;
-
-		dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
-
-		/* This will only fail for ENOSPC */
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
-			dmu_tx_abort(tx);
-			break;
-		}
-		error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
-		if (error == 0) {
-			zvol_log_write(zv, tx, off, bytes, sync);
-		}
-		dmu_tx_commit(tx);
-
-		if (error)
-			break;
+	if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+		dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
 	}
-	zfs_rangelock_exit(zvr->lr);
-
-	int64_t nwritten = start_resid - uio.uio_resid;
-	dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
-	task_io_account_write(nwritten);
-
-	if (sync)
-		zil_commit(zv->zv_zilog, ZVOL_OBJ);
-
-	rw_exit(&zv->zv_suspend_lock);
-	blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0,
-	    start_jif);
-	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
 }
 
 /*
  * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
  */
-static void
+void
 zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
     boolean_t sync)
 {
@@ -823,119 +657,6 @@
 	zil_itx_assign(zilog, itx, tx);
 }
 
-static void
-zvol_discard(void *arg)
-{
-	zv_request_t *zvr = arg;
-	struct bio *bio = zvr->bio;
-	zvol_state_t *zv = zvr->zv;
-	uint64_t start = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
-	uint64_t end = start + size;
-	boolean_t sync;
-	int error = 0;
-	dmu_tx_t *tx;
-	unsigned long start_jif;
-
-	ASSERT(zv && zv->zv_open_count > 0);
-	ASSERT(zv->zv_zilog != NULL);
-
-	start_jif = jiffies;
-	blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio),
-	    &zv->zv_disk->part0);
-
-	sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
-
-	if (end > zv->zv_volsize) {
-		error = SET_ERROR(EIO);
-		goto unlock;
-	}
-
-	/*
-	 * Align the request to volume block boundaries when a secure erase is
-	 * not required.  This will prevent dnode_free_range() from zeroing out
-	 * the unaligned parts which is slow (read-modify-write) and useless
-	 * since we are not freeing any space by doing so.
-	 */
-	if (!bio_is_secure_erase(bio)) {
-		start = P2ROUNDUP(start, zv->zv_volblocksize);
-		end = P2ALIGN(end, zv->zv_volblocksize);
-		size = end - start;
-	}
-
-	if (start >= end)
-		goto unlock;
-
-	tx = dmu_tx_create(zv->zv_objset);
-	dmu_tx_mark_netfree(tx);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error != 0) {
-		dmu_tx_abort(tx);
-	} else {
-		zvol_log_truncate(zv, tx, start, size, B_TRUE);
-		dmu_tx_commit(tx);
-		error = dmu_free_long_range(zv->zv_objset,
-		    ZVOL_OBJ, start, size);
-	}
-unlock:
-	zfs_rangelock_exit(zvr->lr);
-
-	if (error == 0 && sync)
-		zil_commit(zv->zv_zilog, ZVOL_OBJ);
-
-	rw_exit(&zv->zv_suspend_lock);
-	blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0,
-	    start_jif);
-	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
-}
-
-static void
-zvol_read(void *arg)
-{
-	int error = 0;
-
-	zv_request_t *zvr = arg;
-	struct bio *bio = zvr->bio;
-	uio_t uio = { { 0 }, 0 };
-	uio_from_bio(&uio, bio);
-
-	zvol_state_t *zv = zvr->zv;
-	ASSERT(zv && zv->zv_open_count > 0);
-
-	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_jif = jiffies;
-	blk_generic_start_io_acct(zv->zv_queue, READ, bio_sectors(bio),
-	    &zv->zv_disk->part0);
-
-	uint64_t volsize = zv->zv_volsize;
-	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
-		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
-
-		/* don't read past the end */
-		if (bytes > volsize - uio.uio_loffset)
-			bytes = volsize - uio.uio_loffset;
-
-		error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
-		if (error) {
-			/* convert checksum errors into IO errors */
-			if (error == ECKSUM)
-				error = SET_ERROR(EIO);
-			break;
-		}
-	}
-	zfs_rangelock_exit(zvr->lr);
-
-	int64_t nread = start_resid - uio.uio_resid;
-	dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
-	task_io_account_read(nread);
-
-	rw_exit(&zv->zv_suspend_lock);
-	blk_generic_end_io_acct(zv->zv_queue, READ, &zv->zv_disk->part0,
-	    start_jif);
-	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
-}
 
 /* ARGSUSED */
 static void
@@ -952,8 +673,9 @@
 /*
  * Get data to generate a TX_WRITE intent log record.
  */
-static int
-zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
+int
+zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio)
 {
 	zvol_state_t *zv = arg;
 	uint64_t offset = lr->lr_offset;
@@ -1017,150 +739,14 @@
 	return (SET_ERROR(error));
 }
 
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-static blk_qc_t
-zvol_submit_bio(struct bio *bio)
-#else
-static MAKE_REQUEST_FN_RET
-zvol_request(struct request_queue *q, struct bio *bio)
-#endif
-{
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-	struct request_queue *q = bio->bi_disk->queue;
-#endif
-	zvol_state_t *zv = q->queuedata;
-	fstrans_cookie_t cookie = spl_fstrans_mark();
-	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
-	int rw = bio_data_dir(bio);
-	zv_request_t *zvr;
-
-	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
-		printk(KERN_INFO
-		    "%s: bad access: offset=%llu, size=%lu\n",
-		    zv->zv_disk->disk_name,
-		    (long long unsigned)offset,
-		    (long unsigned)size);
-
-		BIO_END_IO(bio, -SET_ERROR(EIO));
-		goto out;
-	}
-
-	if (rw == WRITE) {
-		boolean_t need_sync = B_FALSE;
-
-		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
-			BIO_END_IO(bio, -SET_ERROR(EROFS));
-			goto out;
-		}
-
-		/*
-		 * To be released in the I/O function. See the comment on
-		 * rangelock_enter() below.
-		 */
-		rw_enter(&zv->zv_suspend_lock, RW_READER);
-
-		/*
-		 * Open a ZIL if this is the first time we have written to this
-		 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
-		 * than zv_state_lock so that we don't need to acquire an
-		 * additional lock in this path.
-		 */
-		if (zv->zv_zilog == NULL) {
-			rw_exit(&zv->zv_suspend_lock);
-			rw_enter(&zv->zv_suspend_lock, RW_WRITER);
-			if (zv->zv_zilog == NULL) {
-				zv->zv_zilog = zil_open(zv->zv_objset,
-				    zvol_get_data);
-				zv->zv_flags |= ZVOL_WRITTEN_TO;
-			}
-			rw_downgrade(&zv->zv_suspend_lock);
-		}
-
-		/* bio marked as FLUSH need to flush before write */
-		if (bio_is_flush(bio))
-			zil_commit(zv->zv_zilog, ZVOL_OBJ);
-
-		/* Some requests are just for flush and nothing else. */
-		if (size == 0) {
-			rw_exit(&zv->zv_suspend_lock);
-			BIO_END_IO(bio, 0);
-			goto out;
-		}
-
-		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
-		zvr->zv = zv;
-		zvr->bio = bio;
-
-		/*
-		 * To be released in the I/O function. Since the I/O functions
-		 * are asynchronous, we take it here synchronously to make
-		 * sure overlapped I/Os are properly ordered.
-		 */
-		zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size,
-		    RL_WRITER);
-		/*
-		 * Sync writes and discards execute zil_commit() which may need
-		 * to take a RL_READER lock on the whole block being modified
-		 * via its zillog->zl_get_data(): to avoid circular dependency
-		 * issues with taskq threads execute these requests
-		 * synchronously here in zvol_request().
-		 */
-		need_sync = bio_is_fua(bio) ||
-		    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
-		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
-			if (zvol_request_sync || need_sync ||
-			    taskq_dispatch(zvol_taskq, zvol_discard, zvr,
-			    TQ_SLEEP) == TASKQID_INVALID)
-				zvol_discard(zvr);
-		} else {
-			if (zvol_request_sync || need_sync ||
-			    taskq_dispatch(zvol_taskq, zvol_write, zvr,
-			    TQ_SLEEP) == TASKQID_INVALID)
-				zvol_write(zvr);
-		}
-	} else {
-		/*
-		 * The SCST driver, and possibly others, may issue READ I/Os
-		 * with a length of zero bytes.  These empty I/Os contain no
-		 * data and require no additional handling.
-		 */
-		if (size == 0) {
-			BIO_END_IO(bio, 0);
-			goto out;
-		}
-
-		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
-		zvr->zv = zv;
-		zvr->bio = bio;
-
-		rw_enter(&zv->zv_suspend_lock, RW_READER);
-
-		zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size,
-		    RL_READER);
-		if (zvol_request_sync || taskq_dispatch(zvol_taskq,
-		    zvol_read, zvr, TQ_SLEEP) == TASKQID_INVALID)
-			zvol_read(zvr);
-	}
-
-out:
-	spl_fstrans_unmark(cookie);
-#ifdef HAVE_MAKE_REQUEST_FN_RET_INT
-	return (0);
-#elif defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
-	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
-	return (BLK_QC_T_NONE);
-#endif
-}
-
 /*
  * The zvol_state_t's are inserted into zvol_state_list and zvol_htable.
  */
-static void
+
+void
 zvol_insert(zvol_state_t *zv)
 {
 	ASSERT(RW_WRITE_HELD(&zvol_state_lock));
-	ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
 	list_insert_head(&zvol_state_list, zv);
 	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
 }
@@ -1201,19 +787,19 @@
 	if (error)
 		return (SET_ERROR(error));
 
-	error = dnode_hold(os, ZVOL_OBJ, FTAG, &zv->zv_dn);
+	error = dnode_hold(os, ZVOL_OBJ, zv, &zv->zv_dn);
 	if (error)
 		return (SET_ERROR(error));
 
-	set_capacity(zv->zv_disk, volsize >> 9);
+	ops->zv_set_capacity(zv, volsize >> 9);
 	zv->zv_volsize = volsize;
 
 	if (ro || dmu_objset_is_snapshot(os) ||
 	    !spa_writeable(dmu_objset_spa(os))) {
-		set_disk_ro(zv->zv_disk, 1);
+		ops->zv_set_disk_ro(zv, 1);
 		zv->zv_flags |= ZVOL_RDONLY;
 	} else {
-		set_disk_ro(zv->zv_disk, 0);
+		ops->zv_set_disk_ro(zv, 0);
 		zv->zv_flags &= ~ZVOL_RDONLY;
 	}
 	return (0);
@@ -1236,7 +822,7 @@
 
 	zv->zv_zilog = NULL;
 
-	dnode_rele(zv->zv_dn, FTAG);
+	dnode_rele(zv->zv_dn, zv);
 	zv->zv_dn = NULL;
 
 	/*
@@ -1322,61 +908,33 @@
 	return (SET_ERROR(error));
 }
 
-static int
+int
 zvol_first_open(zvol_state_t *zv, boolean_t readonly)
 {
 	objset_t *os;
-	int error, locked = 0;
-	boolean_t ro;
+	int error;
 
 	ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+	ASSERT(mutex_owned(&spa_namespace_lock));
 
-	/*
-	 * In all other cases the spa_namespace_lock is taken before the
-	 * bdev->bd_mutex lock.	 But in this case the Linux __blkdev_get()
-	 * function calls fops->open() with the bdev->bd_mutex lock held.
-	 * This deadlock can be easily observed with zvols used as vdevs.
-	 *
-	 * To avoid a potential lock inversion deadlock we preemptively
-	 * try to take the spa_namespace_lock().  Normally it will not
-	 * be contended and this is safe because spa_open_common() handles
-	 * the case where the caller already holds the spa_namespace_lock.
-	 *
-	 * When it is contended we risk a lock inversion if we were to
-	 * block waiting for the lock.	Luckily, the __blkdev_get()
-	 * function allows us to return -ERESTARTSYS which will result in
-	 * bdev->bd_mutex being dropped, reacquired, and fops->open() being
-	 * called again.  This process can be repeated safely until both
-	 * locks are acquired.
-	 */
-	if (!mutex_owned(&spa_namespace_lock)) {
-		locked = mutex_tryenter(&spa_namespace_lock);
-		if (!locked)
-			return (-SET_ERROR(ERESTARTSYS));
-	}
-
-	ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
+	boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
 	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
 	if (error)
-		goto out_mutex;
+		return (SET_ERROR(error));
 
 	zv->zv_objset = os;
 
 	error = zvol_setup_zv(zv);
-
 	if (error) {
 		dmu_objset_disown(os, 1, zv);
 		zv->zv_objset = NULL;
 	}
 
-out_mutex:
-	if (locked)
-		mutex_exit(&spa_namespace_lock);
-	return (SET_ERROR(-error));
+	return (error);
 }
 
-static void
+void
 zvol_last_close(zvol_state_t *zv)
 {
 	ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
@@ -1388,590 +946,6 @@
 	zv->zv_objset = NULL;
 }
 
-static int
-zvol_open(struct block_device *bdev, fmode_t flag)
-{
-	zvol_state_t *zv;
-	int error = 0;
-	boolean_t drop_suspend = B_TRUE;
-
-	rw_enter(&zvol_state_lock, RW_READER);
-	/*
-	 * Obtain a copy of private_data under the zvol_state_lock to make
-	 * sure that either the result of zvol free code path setting
-	 * bdev->bd_disk->private_data to NULL is observed, or zvol_free()
-	 * is not called on this zv because of the positive zv_open_count.
-	 */
-	zv = bdev->bd_disk->private_data;
-	if (zv == NULL) {
-		rw_exit(&zvol_state_lock);
-		return (SET_ERROR(-ENXIO));
-	}
-
-	mutex_enter(&zv->zv_state_lock);
-	/*
-	 * make sure zvol is not suspended during first open
-	 * (hold zv_suspend_lock) and respect proper lock acquisition
-	 * ordering - zv_suspend_lock before zv_state_lock
-	 */
-	if (zv->zv_open_count == 0) {
-		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
-			mutex_exit(&zv->zv_state_lock);
-			rw_enter(&zv->zv_suspend_lock, RW_READER);
-			mutex_enter(&zv->zv_state_lock);
-			/* check to see if zv_suspend_lock is needed */
-			if (zv->zv_open_count != 0) {
-				rw_exit(&zv->zv_suspend_lock);
-				drop_suspend = B_FALSE;
-			}
-		}
-	} else {
-		drop_suspend = B_FALSE;
-	}
-	rw_exit(&zvol_state_lock);
-
-	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
-	ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
-
-	if (zv->zv_open_count == 0) {
-		error = zvol_first_open(zv, !(flag & FMODE_WRITE));
-		if (error)
-			goto out_mutex;
-	}
-
-	if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
-		error = -EROFS;
-		goto out_open_count;
-	}
-
-	zv->zv_open_count++;
-
-	mutex_exit(&zv->zv_state_lock);
-	if (drop_suspend)
-		rw_exit(&zv->zv_suspend_lock);
-
-	zfs_check_media_change(bdev);
-
-	return (0);
-
-out_open_count:
-	if (zv->zv_open_count == 0)
-		zvol_last_close(zv);
-
-out_mutex:
-	mutex_exit(&zv->zv_state_lock);
-	if (drop_suspend)
-		rw_exit(&zv->zv_suspend_lock);
-	if (error == -ERESTARTSYS)
-		schedule();
-
-	return (SET_ERROR(error));
-}
-
-#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
-static void
-#else
-static int
-#endif
-zvol_release(struct gendisk *disk, fmode_t mode)
-{
-	zvol_state_t *zv;
-	boolean_t drop_suspend = B_TRUE;
-
-	rw_enter(&zvol_state_lock, RW_READER);
-	zv = disk->private_data;
-
-	mutex_enter(&zv->zv_state_lock);
-	ASSERT(zv->zv_open_count > 0);
-	/*
-	 * make sure zvol is not suspended during last close
-	 * (hold zv_suspend_lock) and respect proper lock acquisition
-	 * ordering - zv_suspend_lock before zv_state_lock
-	 */
-	if (zv->zv_open_count == 1) {
-		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
-			mutex_exit(&zv->zv_state_lock);
-			rw_enter(&zv->zv_suspend_lock, RW_READER);
-			mutex_enter(&zv->zv_state_lock);
-			/* check to see if zv_suspend_lock is needed */
-			if (zv->zv_open_count != 1) {
-				rw_exit(&zv->zv_suspend_lock);
-				drop_suspend = B_FALSE;
-			}
-		}
-	} else {
-		drop_suspend = B_FALSE;
-	}
-	rw_exit(&zvol_state_lock);
-
-	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
-	ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
-
-	zv->zv_open_count--;
-	if (zv->zv_open_count == 0)
-		zvol_last_close(zv);
-
-	mutex_exit(&zv->zv_state_lock);
-
-	if (drop_suspend)
-		rw_exit(&zv->zv_suspend_lock);
-
-#ifndef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
-	return (0);
-#endif
-}
-
-static int
-zvol_ioctl(struct block_device *bdev, fmode_t mode,
-    unsigned int cmd, unsigned long arg)
-{
-	zvol_state_t *zv = bdev->bd_disk->private_data;
-	int error = 0;
-
-	ASSERT3U(zv->zv_open_count, >, 0);
-
-	switch (cmd) {
-	case BLKFLSBUF:
-		fsync_bdev(bdev);
-		invalidate_bdev(bdev);
-		rw_enter(&zv->zv_suspend_lock, RW_READER);
-
-		if (!(zv->zv_flags & ZVOL_RDONLY))
-			txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
-
-		rw_exit(&zv->zv_suspend_lock);
-		break;
-
-	case BLKZNAME:
-		mutex_enter(&zv->zv_state_lock);
-		error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
-		mutex_exit(&zv->zv_state_lock);
-		break;
-
-	default:
-		error = -ENOTTY;
-		break;
-	}
-
-	return (SET_ERROR(error));
-}
-
-#ifdef CONFIG_COMPAT
-static int
-zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
-    unsigned cmd, unsigned long arg)
-{
-	return (zvol_ioctl(bdev, mode, cmd, arg));
-}
-#else
-#define	zvol_compat_ioctl	NULL
-#endif
-
-/*
- * Linux 2.6.38 preferred interface.
- */
-#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
-static unsigned int
-zvol_check_events(struct gendisk *disk, unsigned int clearing)
-{
-	unsigned int mask = 0;
-
-	rw_enter(&zvol_state_lock, RW_READER);
-
-	zvol_state_t *zv = disk->private_data;
-	if (zv != NULL) {
-		mutex_enter(&zv->zv_state_lock);
-		mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
-		zv->zv_changed = 0;
-		mutex_exit(&zv->zv_state_lock);
-	}
-
-	rw_exit(&zvol_state_lock);
-
-	return (mask);
-}
-#else
-static int zvol_media_changed(struct gendisk *disk)
-{
-	int changed = 0;
-
-	rw_enter(&zvol_state_lock, RW_READER);
-
-	zvol_state_t *zv = disk->private_data;
-	if (zv != NULL) {
-		mutex_enter(&zv->zv_state_lock);
-		changed = zv->zv_changed;
-		zv->zv_changed = 0;
-		mutex_exit(&zv->zv_state_lock);
-	}
-
-	rw_exit(&zvol_state_lock);
-
-	return (changed);
-}
-#endif
-
-static int zvol_revalidate_disk(struct gendisk *disk)
-{
-	rw_enter(&zvol_state_lock, RW_READER);
-
-	zvol_state_t *zv = disk->private_data;
-	if (zv != NULL) {
-		mutex_enter(&zv->zv_state_lock);
-		set_capacity(zv->zv_disk, zv->zv_volsize >> SECTOR_BITS);
-		mutex_exit(&zv->zv_state_lock);
-	}
-
-	rw_exit(&zvol_state_lock);
-
-	return (0);
-}
-
-/*
- * Provide a simple virtual geometry for legacy compatibility.  For devices
- * smaller than 1 MiB a small head and sector count is used to allow very
- * tiny devices.  For devices over 1 Mib a standard head and sector count
- * is used to keep the cylinders count reasonable.
- */
-static int
-zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	zvol_state_t *zv = bdev->bd_disk->private_data;
-	sector_t sectors;
-
-	ASSERT3U(zv->zv_open_count, >, 0);
-
-	sectors = get_capacity(zv->zv_disk);
-
-	if (sectors > 2048) {
-		geo->heads = 16;
-		geo->sectors = 63;
-	} else {
-		geo->heads = 2;
-		geo->sectors = 4;
-	}
-
-	geo->start = 0;
-	geo->cylinders = sectors / (geo->heads * geo->sectors);
-
-	return (0);
-}
-
-static struct kobject *
-zvol_probe(dev_t dev, int *part, void *arg)
-{
-	zvol_state_t *zv;
-	struct kobject *kobj;
-
-	zv = zvol_find_by_dev(dev);
-	kobj = zv ? get_disk_and_module(zv->zv_disk) : NULL;
-	ASSERT(zv == NULL || MUTEX_HELD(&zv->zv_state_lock));
-	if (zv)
-		mutex_exit(&zv->zv_state_lock);
-
-	return (kobj);
-}
-
-static struct block_device_operations zvol_ops = {
-	.open			= zvol_open,
-	.release		= zvol_release,
-	.ioctl			= zvol_ioctl,
-	.compat_ioctl		= zvol_compat_ioctl,
-#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
-	.check_events		= zvol_check_events,
-#else
-	.media_changed		= zvol_media_changed,
-#endif
-	.revalidate_disk	= zvol_revalidate_disk,
-	.getgeo			= zvol_getgeo,
-	.owner			= THIS_MODULE,
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-    .submit_bio		= zvol_submit_bio,
-#endif
-};
-
-/*
- * Allocate memory for a new zvol_state_t and setup the required
- * request queue and generic disk structures for the block device.
- */
-static zvol_state_t *
-zvol_alloc(dev_t dev, const char *name)
-{
-	zvol_state_t *zv;
-	uint64_t volmode;
-
-	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
-		return (NULL);
-
-	if (volmode == ZFS_VOLMODE_DEFAULT)
-		volmode = zvol_volmode;
-
-	if (volmode == ZFS_VOLMODE_NONE)
-		return (NULL);
-
-	zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
-
-	list_link_init(&zv->zv_next);
-
-	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
-
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-	zv->zv_queue = blk_alloc_queue(NUMA_NO_NODE);
-#else
-	zv->zv_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
-#endif
-	if (zv->zv_queue == NULL)
-		goto out_kmem;
-
-	blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE);
-
-	/* Limit read-ahead to a single page to prevent over-prefetching. */
-	blk_queue_set_read_ahead(zv->zv_queue, 1);
-
-	/* Disable write merging in favor of the ZIO pipeline. */
-	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zv->zv_queue);
-
-	zv->zv_disk = alloc_disk(ZVOL_MINORS);
-	if (zv->zv_disk == NULL)
-		goto out_queue;
-
-	zv->zv_queue->queuedata = zv;
-	zv->zv_dev = dev;
-	zv->zv_open_count = 0;
-	strlcpy(zv->zv_name, name, MAXNAMELEN);
-
-	zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
-	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
-
-	zv->zv_disk->major = zvol_major;
-#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
-	zv->zv_disk->events = DISK_EVENT_MEDIA_CHANGE;
-#endif
-
-	if (volmode == ZFS_VOLMODE_DEV) {
-		/*
-		 * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set
-		 * gendisk->minors = 1 as noted in include/linux/genhd.h.
-		 * Also disable extended partition numbers (GENHD_FL_EXT_DEVT)
-		 * and suppresses partition scanning (GENHD_FL_NO_PART_SCAN)
-		 * setting gendisk->flags accordingly.
-		 */
-		zv->zv_disk->minors = 1;
-#if defined(GENHD_FL_EXT_DEVT)
-		zv->zv_disk->flags &= ~GENHD_FL_EXT_DEVT;
-#endif
-#if defined(GENHD_FL_NO_PART_SCAN)
-		zv->zv_disk->flags |= GENHD_FL_NO_PART_SCAN;
-#endif
-	}
-	zv->zv_disk->first_minor = (dev & MINORMASK);
-	zv->zv_disk->fops = &zvol_ops;
-	zv->zv_disk->private_data = zv;
-	zv->zv_disk->queue = zv->zv_queue;
-	snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "%s%d",
-	    ZVOL_DEV_NAME, (dev & MINORMASK));
-
-	return (zv);
-
-out_queue:
-	blk_cleanup_queue(zv->zv_queue);
-out_kmem:
-	kmem_free(zv, sizeof (zvol_state_t));
-
-	return (NULL);
-}
-
-/*
- * Cleanup then free a zvol_state_t which was created by zvol_alloc().
- * At this time, the structure is not opened by anyone, is taken off
- * the zvol_state_list, and has its private data set to NULL.
- * The zvol_state_lock is dropped.
- */
-static void
-zvol_free(void *arg)
-{
-	zvol_state_t *zv = arg;
-
-	ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
-	ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
-	ASSERT(zv->zv_open_count == 0);
-	ASSERT(zv->zv_disk->private_data == NULL);
-
-	rw_destroy(&zv->zv_suspend_lock);
-	zfs_rangelock_fini(&zv->zv_rangelock);
-
-	del_gendisk(zv->zv_disk);
-	blk_cleanup_queue(zv->zv_queue);
-	put_disk(zv->zv_disk);
-
-	ida_simple_remove(&zvol_ida, MINOR(zv->zv_dev) >> ZVOL_MINOR_BITS);
-
-	mutex_destroy(&zv->zv_state_lock);
-	dataset_kstats_destroy(&zv->zv_kstat);
-
-	kmem_free(zv, sizeof (zvol_state_t));
-}
-
-/*
- * Create a block device minor node and setup the linkage between it
- * and the specified volume.  Once this function returns the block
- * device is live and ready for use.
- */
-static int
-zvol_create_minor_impl(const char *name)
-{
-	zvol_state_t *zv;
-	objset_t *os;
-	dmu_object_info_t *doi;
-	uint64_t volsize;
-	uint64_t len;
-	unsigned minor = 0;
-	int error = 0;
-	int idx;
-	uint64_t hash = zvol_name_hash(name);
-
-	if (zvol_inhibit_dev)
-		return (0);
-
-	idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP));
-	if (idx < 0)
-		return (SET_ERROR(-idx));
-	minor = idx << ZVOL_MINOR_BITS;
-
-	zv = zvol_find_by_name_hash(name, hash, RW_NONE);
-	if (zv) {
-		ASSERT(MUTEX_HELD(&zv->zv_state_lock));
-		mutex_exit(&zv->zv_state_lock);
-		ida_simple_remove(&zvol_ida, idx);
-		return (SET_ERROR(EEXIST));
-	}
-
-	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
-
-	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
-	if (error)
-		goto out_doi;
-
-	error = dmu_object_info(os, ZVOL_OBJ, doi);
-	if (error)
-		goto out_dmu_objset_disown;
-
-	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
-	if (error)
-		goto out_dmu_objset_disown;
-
-	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
-	if (zv == NULL) {
-		error = SET_ERROR(EAGAIN);
-		goto out_dmu_objset_disown;
-	}
-	zv->zv_hash = hash;
-
-	if (dmu_objset_is_snapshot(os))
-		zv->zv_flags |= ZVOL_RDONLY;
-
-	zv->zv_volblocksize = doi->doi_data_block_size;
-	zv->zv_volsize = volsize;
-	zv->zv_objset = os;
-
-	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
-
-	blk_queue_max_hw_sectors(zv->zv_queue, (DMU_MAX_ACCESS / 4) >> 9);
-	blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
-	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
-	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
-	blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
-	blk_queue_max_discard_sectors(zv->zv_queue,
-	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
-	blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
-	blk_queue_flag_set(QUEUE_FLAG_DISCARD, zv->zv_queue);
-#ifdef QUEUE_FLAG_NONROT
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, zv->zv_queue);
-#endif
-#ifdef QUEUE_FLAG_ADD_RANDOM
-	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue);
-#endif
-	/* This flag was introduced in kernel version 4.12. */
-#ifdef QUEUE_FLAG_SCSI_PASSTHROUGH
-	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_queue);
-#endif
-
-	#ifdef _KERNEL
-	printk(KERN_INFO "zvol %s %d:%d prepare to replay\n", name, zvol_major, minor);
-	#endif
-	if (spa_writeable(dmu_objset_spa(os))) {
-		if (zil_replay_disable)
-			zil_destroy(dmu_objset_zil(os), B_FALSE);
-		else
-			zil_replay(os, zv, zvol_replay_vector);
-	}
-	ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
-	dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
-
-	/*
-	 * When udev detects the addition of the device it will immediately
-	 * invoke blkid(8) to determine the type of content on the device.
-	 * Prefetching the blocks commonly scanned by blkid(8) will speed
-	 * up this process.
-	 */
-	len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE);
-	if (len > 0) {
-		dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
-		dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
-		    ZIO_PRIORITY_SYNC_READ);
-	}
-
-	zv->zv_objset = NULL;
-out_dmu_objset_disown:
-	dmu_objset_disown(os, B_TRUE, FTAG);
-out_doi:
-	kmem_free(doi, sizeof (dmu_object_info_t));
-
-	if (error == 0) {
-		rw_enter(&zvol_state_lock, RW_WRITER);
-		zvol_insert(zv);
-		rw_exit(&zvol_state_lock);
-		add_disk(zv->zv_disk);
-		#ifdef _KERNEL
-		printk(KERN_INFO "zvol %s %d:%d registered with kernel\n", name, zvol_major, minor);
-		#endif
-	} else {
-		ida_simple_remove(&zvol_ida, idx);
-	}
-
-	return (SET_ERROR(error));
-}
-
-/*
- * Rename a block device minor mode for the specified volume.
- */
-static void
-zvol_rename_minor(zvol_state_t *zv, const char *newname)
-{
-	int readonly = get_disk_ro(zv->zv_disk);
-
-	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
-	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
-
-	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
-
-	/* move to new hashtable entry  */
-	zv->zv_hash = zvol_name_hash(zv->zv_name);
-	hlist_del(&zv->zv_hlink);
-	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
-
-	/*
-	 * The block device's read-only state is briefly changed causing
-	 * a KOBJ_CHANGE uevent to be issued.  This ensures udev detects
-	 * the name change and fixes the symlinks.  This does not change
-	 * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
-	 * changes.  This would normally be done using kobject_uevent() but
-	 * that is a GPL-only symbol which is why we need this workaround.
-	 */
-	set_disk_ro(zv->zv_disk, !readonly);
-	set_disk_ro(zv->zv_disk, readonly);
-}
-
 typedef struct minors_job {
 	list_t *list;
 	list_node_t link;
@@ -2021,7 +995,7 @@
 		    "%s is not a snapshot name\n", dsname);
 	} else {
 		minors_job_t *job;
-		char *n = strdup(dsname);
+		char *n = kmem_strdup(dsname);
 		if (n == NULL)
 			return (0);
 
@@ -2039,6 +1013,68 @@
 }
 
 /*
+ * If spa_keystore_load_wkey() is called for an encrypted zvol,
+ * we need to look for any clones also using the key. This function
+ * is "best effort" - so we just skip over it if there are failures.
+ */
+static void
+zvol_add_clones(const char *dsname, list_t *minors_list)
+{
+	/* Also check if it has clones */
+	dsl_dir_t *dd = NULL;
+	dsl_pool_t *dp = NULL;
+
+	if (dsl_pool_hold(dsname, FTAG, &dp) != 0)
+		return;
+
+	if (!spa_feature_is_enabled(dp->dp_spa,
+	    SPA_FEATURE_ENCRYPTION))
+		goto out;
+
+	if (dsl_dir_hold(dp, dsname, FTAG, &dd, NULL) != 0)
+		goto out;
+
+	if (dsl_dir_phys(dd)->dd_clones == 0)
+		goto out;
+
+	zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
+	zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+	objset_t *mos = dd->dd_pool->dp_meta_objset;
+
+	for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones);
+	    zap_cursor_retrieve(zc, za) == 0;
+	    zap_cursor_advance(zc)) {
+		dsl_dataset_t *clone;
+		minors_job_t *job;
+
+		if (dsl_dataset_hold_obj(dd->dd_pool,
+		    za->za_first_integer, FTAG, &clone) == 0) {
+
+			char name[ZFS_MAX_DATASET_NAME_LEN];
+			dsl_dataset_name(clone, name);
+
+			char *n = kmem_strdup(name);
+			job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
+			job->name = n;
+			job->list = minors_list;
+			job->error = 0;
+			list_insert_tail(minors_list, job);
+
+			dsl_dataset_rele(clone, FTAG);
+		}
+	}
+	zap_cursor_fini(zc);
+	kmem_free(za, sizeof (zap_attribute_t));
+	kmem_free(zc, sizeof (zap_cursor_t));
+
+out:
+	if (dd != NULL)
+		dsl_dir_rele(dd, FTAG);
+	if (dp != NULL)
+		dsl_pool_rele(dp, FTAG);
+}
+
+/*
  * Mask errors to continue dmu_objset_find() traversal
  */
 static int
@@ -2063,7 +1099,7 @@
 	 */
 	if (strchr(dsname, '@') == 0) {
 		minors_job_t *job;
-		char *n = strdup(dsname);
+		char *n = kmem_strdup(dsname);
 		if (n == NULL)
 			return (0);
 
@@ -2076,12 +1112,14 @@
 		taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
 		    TQ_SLEEP);
 
+		zvol_add_clones(dsname, minors_list);
+
 		if (snapdev == ZFS_SNAPDEV_VISIBLE) {
 			/*
 			 * traverse snapshots only, do not traverse children,
 			 * and skip the 'dsname'
 			 */
-			error = dmu_objset_find((char *)dsname,
+			error = dmu_objset_find(dsname,
 			    zvol_create_snap_minor_cb, (void *)job,
 			    DS_FIND_SNAPSHOTS);
 		}
@@ -2110,17 +1148,14 @@
  * 'visible' (which also verifies that the parent is a zvol), and if so,
  * a minor node for that snapshot is created.
  */
-static int
-zvol_create_minors_impl(const char *name)
+void
+zvol_create_minors_recursive(const char *name)
 {
-	int error = 0;
-	fstrans_cookie_t cookie;
-	char *atp, *parent;
 	list_t minors_list;
 	minors_job_t *job;
 
 	if (zvol_inhibit_dev)
-		return (0);
+		return;
 
 	/*
 	 * This is the list for prefetch jobs. Whenever we found a match
@@ -2134,26 +1169,22 @@
 	list_create(&minors_list, sizeof (minors_job_t),
 	    offsetof(minors_job_t, link));
 
-	parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-	(void) strlcpy(parent, name, MAXPATHLEN);
 
-	if ((atp = strrchr(parent, '@')) != NULL) {
+	if (strchr(name, '@') != NULL) {
 		uint64_t snapdev;
 
-		*atp = '\0';
-		error = dsl_prop_get_integer(parent, "snapdev",
+		int error = dsl_prop_get_integer(name, "snapdev",
 		    &snapdev, NULL);
 
 		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
-			error = zvol_create_minor_impl(name);
+			(void) ops->zv_create_minor(name);
 	} else {
-		cookie = spl_fstrans_mark();
-		error = dmu_objset_find(parent, zvol_create_minors_cb,
+		fstrans_cookie_t cookie = spl_fstrans_mark();
+		(void) dmu_objset_find(name, zvol_create_minors_cb,
 		    &minors_list, DS_FIND_CHILDREN);
 		spl_fstrans_unmark(cookie);
 	}
 
-	kmem_free(parent, MAXPATHLEN);
 	taskq_wait_outstanding(system_taskq, 0);
 
 	/*
@@ -2163,25 +1194,58 @@
 	while ((job = list_head(&minors_list)) != NULL) {
 		list_remove(&minors_list, job);
 		if (!job->error)
-			zvol_create_minor_impl(job->name);
-		strfree(job->name);
+			(void) ops->zv_create_minor(job->name);
+		kmem_strfree(job->name);
 		kmem_free(job, sizeof (minors_job_t));
 	}
 
 	list_destroy(&minors_list);
+}
 
-	return (SET_ERROR(error));
+void
+zvol_create_minor(const char *name)
+{
+	/*
+	 * Note: the dsl_pool_config_lock must not be held.
+	 * Minor node creation needs to obtain the zvol_state_lock.
+	 * zvol_open() obtains the zvol_state_lock and then the dsl pool
+	 * config lock.  Therefore, we can't have the config lock now if
+	 * we are going to wait for the zvol_state_lock, because it
+	 * would be a lock order inversion which could lead to deadlock.
+	 */
+
+	if (zvol_inhibit_dev)
+		return;
+
+	if (strchr(name, '@') != NULL) {
+		uint64_t snapdev;
+
+		int error = dsl_prop_get_integer(name,
+		    "snapdev", &snapdev, NULL);
+
+		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
+			(void) ops->zv_create_minor(name);
+	} else {
+		(void) ops->zv_create_minor(name);
+	}
 }
 
 /*
  * Remove minors for specified dataset including children and snapshots.
  */
+
 static void
+zvol_free_task(void *arg)
+{
+	ops->zv_free(arg);
+}
+
+void
 zvol_remove_minors_impl(const char *name)
 {
 	zvol_state_t *zv, *zv_next;
 	int namelen = ((name) ? strlen(name) : 0);
-	taskqid_t t, tid = TASKQID_INVALID;
+	taskqid_t t;
 	list_t free_list;
 
 	if (zvol_inhibit_dev)
@@ -2218,18 +1282,16 @@
 			 * Cleared while holding zvol_state_lock as a writer
 			 * which will prevent zvol_open() from opening it.
 			 */
-			zv->zv_disk->private_data = NULL;
+			ops->zv_clear_private(zv);
 
 			/* Drop zv_state_lock before zvol_free() */
 			mutex_exit(&zv->zv_state_lock);
 
 			/* Try parallel zv_free, if failed do it in place */
-			t = taskq_dispatch(system_taskq, zvol_free, zv,
+			t = taskq_dispatch(system_taskq, zvol_free_task, zv,
 			    TQ_SLEEP);
 			if (t == TASKQID_INVALID)
 				list_insert_head(&free_list, zv);
-			else
-				tid = t;
 		} else {
 			mutex_exit(&zv->zv_state_lock);
 		}
@@ -2239,11 +1301,8 @@
 	/* Drop zvol_state_lock before calling zvol_free() */
 	while ((zv = list_head(&free_list)) != NULL) {
 		list_remove(&free_list, zv);
-		zvol_free(zv);
+		ops->zv_free(zv);
 	}
-
-	if (tid != TASKQID_INVALID)
-		taskq_wait_outstanding(system_taskq, tid);
 }
 
 /* Remove minor for this specific volume only */
@@ -2275,12 +1334,7 @@
 			}
 			zvol_remove(zv);
 
-			/*
-			 * Cleared while holding zvol_state_lock as a writer
-			 * which will prevent zvol_open() from opening it.
-			 */
-			zv->zv_disk->private_data = NULL;
-
+			ops->zv_clear_private(zv);
 			mutex_exit(&zv->zv_state_lock);
 			break;
 		} else {
@@ -2292,7 +1346,7 @@
 	rw_exit(&zvol_state_lock);
 
 	if (zv != NULL)
-		zvol_free(zv);
+		ops->zv_free(zv);
 }
 
 /*
@@ -2302,13 +1356,12 @@
 zvol_rename_minors_impl(const char *oldname, const char *newname)
 {
 	zvol_state_t *zv, *zv_next;
-	int oldnamelen, newnamelen;
+	int oldnamelen;
 
 	if (zvol_inhibit_dev)
 		return;
 
 	oldnamelen = strlen(oldname);
-	newnamelen = strlen(newname);
 
 	rw_enter(&zvol_state_lock, RW_READER);
 
@@ -2318,15 +1371,15 @@
 		mutex_enter(&zv->zv_state_lock);
 
 		if (strcmp(zv->zv_name, oldname) == 0) {
-			zvol_rename_minor(zv, newname);
+			ops->zv_rename_minor(zv, newname);
 		} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
 		    (zv->zv_name[oldnamelen] == '/' ||
 		    zv->zv_name[oldnamelen] == '@')) {
 			char *name = kmem_asprintf("%s%c%s", newname,
 			    zv->zv_name[oldnamelen],
 			    zv->zv_name + oldnamelen + 1);
-			zvol_rename_minor(zv, name);
-			strfree(name);
+			ops->zv_rename_minor(zv, name);
+			kmem_strfree(name);
 		}
 
 		mutex_exit(&zv->zv_state_lock);
@@ -2349,7 +1402,7 @@
 
 	switch (arg->snapdev) {
 		case ZFS_SNAPDEV_VISIBLE:
-			(void) zvol_create_minor_impl(dsname);
+			(void) ops->zv_create_minor(dsname);
 			break;
 		case ZFS_SNAPDEV_HIDDEN:
 			(void) zvol_remove_minor_impl(dsname);
@@ -2379,7 +1432,9 @@
 static void
 zvol_set_volmode_impl(char *name, uint64_t volmode)
 {
-	fstrans_cookie_t cookie = spl_fstrans_mark();
+	fstrans_cookie_t cookie;
+	uint64_t old_volmode;
+	zvol_state_t *zv;
 
 	if (strchr(name, '@') != NULL)
 		return;
@@ -2387,11 +1442,20 @@
 	/*
 	 * It's unfortunate we need to remove minors before we create new ones:
 	 * this is necessary because our backing gendisk (zvol_state->zv_disk)
-	 * coule be different when we set, for instance, volmode from "geom"
+	 * could be different when we set, for instance, volmode from "geom"
 	 * to "dev" (or vice versa).
-	 * A possible optimization is to modify our consumers so we don't get
-	 * called when "volmode" does not change.
 	 */
+	zv = zvol_find_by_name(name, RW_NONE);
+	if (zv == NULL && volmode == ZFS_VOLMODE_NONE)
+			return;
+	if (zv != NULL) {
+		old_volmode = zv->zv_volmode;
+		mutex_exit(&zv->zv_state_lock);
+		if (old_volmode == volmode)
+			return;
+		zvol_wait_close(zv);
+	}
+	cookie = spl_fstrans_mark();
 	switch (volmode) {
 		case ZFS_VOLMODE_NONE:
 			(void) zvol_remove_minor_impl(name);
@@ -2399,17 +1463,16 @@
 		case ZFS_VOLMODE_GEOM:
 		case ZFS_VOLMODE_DEV:
 			(void) zvol_remove_minor_impl(name);
-			(void) zvol_create_minor_impl(name);
+			(void) ops->zv_create_minor(name);
 			break;
 		case ZFS_VOLMODE_DEFAULT:
 			(void) zvol_remove_minor_impl(name);
 			if (zvol_volmode == ZFS_VOLMODE_NONE)
 				break;
 			else /* if zvol_volmode is invalid defaults to "geom" */
-				(void) zvol_create_minor_impl(name);
+				(void) ops->zv_create_minor(name);
 			break;
 	}
-
 	spl_fstrans_unmark(cookie);
 }
 
@@ -2418,7 +1481,6 @@
     uint64_t value)
 {
 	zvol_task_t *task;
-	char *delim;
 
 	/* Never allow tasks on hidden names. */
 	if (name1[0] == '$')
@@ -2427,8 +1489,6 @@
 	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
 	task->op = op;
 	task->value = value;
-	delim = strchr(name1, '/');
-	strlcpy(task->pool, name1, delim ? (delim - name1 + 1) : MAXNAMELEN);
 
 	strlcpy(task->name1, name1, MAXNAMELEN);
 	if (name2 != NULL)
@@ -2447,14 +1507,11 @@
  * The worker thread function performed asynchronously.
  */
 static void
-zvol_task_cb(void *param)
+zvol_task_cb(void *arg)
 {
-	zvol_task_t *task = (zvol_task_t *)param;
+	zvol_task_t *task = arg;
 
 	switch (task->op) {
-	case ZVOL_ASYNC_CREATE_MINORS:
-		(void) zvol_create_minors_impl(task->name1);
-		break;
 	case ZVOL_ASYNC_REMOVE_MINORS:
 		zvol_remove_minors_impl(task->name1);
 		break;
@@ -2656,21 +1713,6 @@
 }
 
 void
-zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
-{
-	zvol_task_t *task;
-	taskqid_t id;
-
-	task = zvol_task_alloc(ZVOL_ASYNC_CREATE_MINORS, name, NULL, ~0ULL);
-	if (task == NULL)
-		return;
-
-	id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
-	if ((async == B_FALSE) && (id != TASKQID_INVALID))
-		taskq_wait_id(spa->spa_zvol_taskq, id);
-}
-
-void
 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
 {
 	zvol_task_t *task;
@@ -2701,92 +1743,50 @@
 		taskq_wait_id(spa->spa_zvol_taskq, id);
 }
 
-int
-zvol_init(void)
+boolean_t
+zvol_is_zvol(const char *name)
 {
-	int threads = MIN(MAX(zvol_threads, 1), 1024);
-	int i, error;
+
+	return (ops->zv_is_zvol(name));
+}
+
+void
+zvol_register_ops(const zvol_platform_ops_t *zvol_ops)
+{
+	ops = zvol_ops;
+}
+
+int
+zvol_init_impl(void)
+{
+	int i;
 
 	list_create(&zvol_state_list, sizeof (zvol_state_t),
 	    offsetof(zvol_state_t, zv_next));
 	rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL);
-	ida_init(&zvol_ida);
-
-	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
-	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
-	if (zvol_taskq == NULL) {
-		printk(KERN_INFO "ZFS: taskq_create() failed\n");
-		error = -ENOMEM;
-		goto out;
-	}
 
 	zvol_htable = kmem_alloc(ZVOL_HT_SIZE * sizeof (struct hlist_head),
 	    KM_SLEEP);
-	if (!zvol_htable) {
-		error = -ENOMEM;
-		goto out_taskq;
-	}
 	for (i = 0; i < ZVOL_HT_SIZE; i++)
 		INIT_HLIST_HEAD(&zvol_htable[i]);
 
-	error = register_blkdev(zvol_major, ZVOL_DRIVER);
-	if (error) {
-		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
-		goto out_free;
-	}
-
-	blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
-	    THIS_MODULE, zvol_probe, NULL, NULL);
-
 	return (0);
-
-out_free:
-	kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
-out_taskq:
-	taskq_destroy(zvol_taskq);
-out:
-	ida_destroy(&zvol_ida);
-	rw_destroy(&zvol_state_lock);
-	list_destroy(&zvol_state_list);
-
-	return (SET_ERROR(error));
 }
 
 void
-zvol_fini(void)
+zvol_fini_impl(void)
 {
 	zvol_remove_minors_impl(NULL);
 
-	blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
-	unregister_blkdev(zvol_major, ZVOL_DRIVER);
-	kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
+	/*
+	 * The call to "zvol_remove_minors_impl" may dispatch entries to
+	 * the system_taskq, but it doesn't wait for those entries to
+	 * complete before it returns. Thus, we must wait for all of the
+	 * removals to finish, before we can continue.
+	 */
+	taskq_wait_outstanding(system_taskq, 0);
 
-	taskq_destroy(zvol_taskq);
+	kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
 	list_destroy(&zvol_state_list);
 	rw_destroy(&zvol_state_lock);
-
-	ida_destroy(&zvol_ida);
 }
-
-/* BEGIN CSTYLED */
-module_param(zvol_inhibit_dev, uint, 0644);
-MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
-
-module_param(zvol_major, uint, 0444);
-MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
-
-module_param(zvol_threads, uint, 0444);
-MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
-
-module_param(zvol_request_sync, uint, 0644);
-MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
-
-module_param(zvol_max_discard_blocks, ulong, 0444);
-MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
-
-module_param(zvol_prefetch_bytes, uint, 0644);
-MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
-
-module_param(zvol_volmode, uint, 0644);
-MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
-/* END CSTYLED */

diff --git a/zfs/module/zstd/Makefile.in b/zfs/module/zstd/Makefile.in
new file mode 100644
index 0000000..4d9398e
--- /dev/null
+++ b/zfs/module/zstd/Makefile.in

@@ -0,0 +1,45 @@
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
+obj = @abs_builddir@
+zstd_include = $(src)/include
+else
+zstd_include = $(srctree)/$(src)/include
+endif
+
+MODULE := zzstd
+
+obj-$(CONFIG_ZFS) := $(MODULE).o
+
+asflags-y := -I$(zstd_include)
+ccflags-y := -I$(zstd_include)
+
+# Zstd uses -O3 by default, so we should follow
+ccflags-y += -O3
+
+# -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h
+# Set it for other compilers, too.
+$(obj)/lib/zstd.o: c_flags += -fno-tree-vectorize
+
+# SSE register return with SSE disabled if -march=znverX is passed
+$(obj)/lib/zstd.o: c_flags += -U__BMI__
+
+# Quiet warnings about frame size due to unused code in unmodified zstd lib
+$(obj)/lib/zstd.o: c_flags += -Wframe-larger-than=20480
+
+# Disable aarch64 neon SIMD instructions for kernel mode
+$(obj)/lib/zstd.o: c_flags += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w
+
+$(obj)/zfs_zstd.o: c_flags += -include $(zstd_include)/zstd_compat_wrapper.h
+
+$(MODULE)-objs += zfs_zstd.o
+$(MODULE)-objs += lib/zstd.o
+$(MODULE)-objs += zstd_sparc.o
+
+all:
+	mkdir -p lib
+
+gensymbols:
+	@OBJDUMP@ -t lib/zstd.o | awk '$$2 == "g" && !/ zfs_/ {print "#define\t" $$6 " zfs_" $$6}' | sort >> include/zstd_compat_wrapper.h
+
+checksymbols:
+	@OBJDUMP@ -t lib/zstd.o | awk '/file format/ {print}  $$2 == "g" && (!/ zfs_/ && !/ __pfx_zfs_/) {++ret; print}  END {exit ret}'

diff --git a/zfs/module/zstd/README.md b/zfs/module/zstd/README.md
new file mode 100644
index 0000000..eed229e
--- /dev/null
+++ b/zfs/module/zstd/README.md

@@ -0,0 +1,65 @@
+# ZSTD-On-ZFS Library Manual
+
+## Introduction
+
+This subtree contains the ZSTD library used in ZFS. It is heavily cut-down by
+dropping any unneeded files, and combined into a single file, but otherwise is
+intentionally unmodified. Please do not alter the file containing the zstd
+library, besides upgrading to a newer ZSTD release.
+
+Tree structure:
+
+* `zfs_zstd.c` is the actual `zzstd` kernel module.
+* `lib/` contains the unmodified, [_"amalgamated"_](https://github.com/facebook/zstd/blob/dev/contrib/single_file_libs/README.md)
+  version of the `Zstandard` library, generated from our template file
+* `zstd-in.c` is our template file for generating the library
+* `include/`: This directory contains supplemental includes for platform
+  compatibility, which are not expected to be used by ZFS elsewhere in the
+  future. Thus we keep them private to ZSTD.
+
+## Updating ZSTD
+
+To update ZSTD the following steps need to be taken:
+
+1. Grab the latest release of [ZSTD](https://github.com/facebook/zstd/releases).
+2. Update `module/zstd/zstd-in.c` if required. (see
+   `zstd/contrib/single_file_libs/zstd-in.c` in the zstd repository)
+3. Generate the "single-file-library" and put it to `module/zstd/lib/`.
+4. Copy the following files to `module/zstd/lib/`:
+   - `zstd/lib/zstd.h`
+   - `zstd/lib/common/zstd_errors.h`
+
+This can be done using a few shell commands from inside the zfs repo:
+
+~~~sh
+cd PATH/TO/ZFS
+
+url="https://github.com/facebook/zstd"
+release="$(curl -s "${url}"/releases/latest | grep -oP '(?<=v)[\d\.]+')"
+zstd="/tmp/zstd-${release}/"
+
+wget -O /tmp/zstd.tar.gz \
+    "${url}/releases/download/v${release}/zstd-${release}.tar.gz"
+tar -C /tmp -xzf /tmp/zstd.tar.gz
+
+cp ${zstd}/lib/zstd.h module/zstd/lib/
+cp ${zstd}/lib/zstd_errors.h module/zstd/lib/
+${zstd}/contrib/single_file_libs/combine.sh \
+    -r ${zstd}/lib -o module/zstd/lib/zstd.c module/zstd/zstd-in.c
+~~~
+
+Note: if the zstd library for zfs is updated to a newer version,
+the macro list in include/zstd_compat_wrapper.h usually needs to be updated.
+this can be done with some hand crafting of the output of the following
+script: nm zstd.o | awk '{print "#define "$3 " zfs_" $3}' > macrotable
+
+
+## Altering ZSTD and breaking changes
+
+If ZSTD made changes that break compatibility or you need to make breaking
+changes to the way we handle ZSTD, it is required to maintain backwards
+compatibility.
+
+We already save the ZSTD version number within the block header to be used
+to add future compatibility checks and/or fixes. However, currently it is
+not actually used in such a way.

diff --git a/zfs/module/zstd/include/aarch64_compat.h b/zfs/module/zstd/include/aarch64_compat.h
new file mode 100644
index 0000000..088517d
--- /dev/null
+++ b/zfs/module/zstd/include/aarch64_compat.h

@@ -0,0 +1,37 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2018-2020, Sebastian Gottschall
+ */
+
+#ifdef _KERNEL
+#undef __aarch64__
+#endif

diff --git a/zfs/module/zstd/include/limits.h b/zfs/module/zstd/include/limits.h
new file mode 100644
index 0000000..3bf5b67
--- /dev/null
+++ b/zfs/module/zstd/include/limits.h

@@ -0,0 +1,63 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef	_ZSTD_LIMITS_H
+#define	_ZSTD_LIMITS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/limits.h>
+#elif defined(__linux__)
+#include <linux/limits.h>
+#include <linux/kernel.h>
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <limits.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_LIMITS_H */

diff --git a/zfs/module/zstd/include/sparc_compat.h b/zfs/module/zstd/include/sparc_compat.h
new file mode 100644
index 0000000..14c1bdd
--- /dev/null
+++ b/zfs/module/zstd/include/sparc_compat.h

@@ -0,0 +1,4 @@
+#if defined(__sparc)
+uint64_t __bswapdi2(uint64_t in);
+uint32_t __bswapsi2(uint32_t in);
+#endif

diff --git a/zfs/module/zstd/include/stddef.h b/zfs/module/zstd/include/stddef.h
new file mode 100644
index 0000000..3f46fb8
--- /dev/null
+++ b/zfs/module/zstd/include/stddef.h

@@ -0,0 +1,62 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef	_ZSTD_STDDEF_H
+#define	_ZSTD_STDDEF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/types.h>
+#elif defined(__linux__)
+#include <linux/types.h>
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <stddef.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDDEF_H */

diff --git a/zfs/module/zstd/include/stdint.h b/zfs/module/zstd/include/stdint.h
new file mode 100644
index 0000000..2d98a55
--- /dev/null
+++ b/zfs/module/zstd/include/stdint.h

@@ -0,0 +1,62 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef	_ZSTD_STDINT_H
+#define	_ZSTD_STDINT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/stdint.h>
+#elif defined(__linux__)
+#include <linux/types.h>
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <stdint.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDINT_H */

diff --git a/zfs/module/zstd/include/stdio.h b/zfs/module/zstd/include/stdio.h
new file mode 100644
index 0000000..5a7c6ec
--- /dev/null
+++ b/zfs/module/zstd/include/stdio.h

@@ -0,0 +1,54 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef	_ZSTD_STDIO_H
+#define	_ZSTD_STDIO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _KERNEL
+
+#include_next <stdio.h>
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDIO_H */

diff --git a/zfs/module/zstd/include/stdlib.h b/zfs/module/zstd/include/stdlib.h
new file mode 100644
index 0000000..c341a0c
--- /dev/null
+++ b/zfs/module/zstd/include/stdlib.h

@@ -0,0 +1,58 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef	_ZSTD_STDLIB_H
+#define	_ZSTD_STDLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef	GCC_VERSION
+
+/*
+ * Define calloc, malloc, free to make building work. They are never really used
+ * in zstdlib.c since allocation is done in zstd.c.
+ */
+#define	calloc(n, sz)	NULL
+#define	malloc(sz)	NULL
+#define	free(ptr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDLIB_H */

diff --git a/zfs/module/zstd/include/string.h b/zfs/module/zstd/include/string.h
new file mode 100644
index 0000000..78998d3
--- /dev/null
+++ b/zfs/module/zstd/include/string.h

@@ -0,0 +1,62 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef	_ZSTD_STRING_H
+#define	_ZSTD_STRING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/systm.h>    /* memcpy, memset */
+#elif defined(__linux__)
+#include <linux/string.h> /* memcpy, memset */
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <string.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STRING_H */

diff --git a/zfs/module/zstd/include/zstd_compat_wrapper.h b/zfs/module/zstd/include/zstd_compat_wrapper.h
new file mode 100644
index 0000000..5d20623
--- /dev/null
+++ b/zfs/module/zstd/include/zstd_compat_wrapper.h

@@ -0,0 +1,374 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2020, Sebastian Gottschall
+ */
+
+/*
+ * This wrapper fixes a problem, in case the ZFS filesystem driver, is compiled
+ * statically into the kernel.
+ * This will cause a symbol collision with the older in-kernel zstd library.
+ *
+ * On update, truncate this file at the scissor line, rebuild the module,
+ * and make gensymbols.
+ */
+
+#define	BIT_initDStream zfs_BIT_initDStream
+#define	BIT_mask zfs_BIT_mask
+#define	BIT_reloadDStream zfs_BIT_reloadDStream
+
+
+/* -- >8 -- */
+#define	ERR_getErrorString zfs_ERR_getErrorString
+#define	FSE_buildCTable_raw zfs_FSE_buildCTable_raw
+#define	FSE_buildCTable_rle zfs_FSE_buildCTable_rle
+#define	FSE_buildCTable_wksp zfs_FSE_buildCTable_wksp
+#define	FSE_buildCTable zfs_FSE_buildCTable
+#define	FSE_buildDTable_raw zfs_FSE_buildDTable_raw
+#define	FSE_buildDTable_rle zfs_FSE_buildDTable_rle
+#define	FSE_buildDTable zfs_FSE_buildDTable
+#define	FSE_compress2 zfs_FSE_compress2
+#define	FSE_compressBound zfs_FSE_compressBound
+#define	FSE_compress_usingCTable zfs_FSE_compress_usingCTable
+#define	FSE_compress_wksp zfs_FSE_compress_wksp
+#define	FSE_compress zfs_FSE_compress
+#define	FSE_createCTable zfs_FSE_createCTable
+#define	FSE_createDTable zfs_FSE_createDTable
+#define	FSE_decompress_usingDTable zfs_FSE_decompress_usingDTable
+#define	FSE_decompress_wksp zfs_FSE_decompress_wksp
+#define	FSE_decompress zfs_FSE_decompress
+#define	FSE_freeCTable zfs_FSE_freeCTable
+#define	FSE_freeDTable zfs_FSE_freeDTable
+#define	FSE_getErrorName zfs_FSE_getErrorName
+#define	FSE_isError zfs_FSE_isError
+#define	FSE_NCountWriteBound zfs_FSE_NCountWriteBound
+#define	FSE_normalizeCount zfs_FSE_normalizeCount
+#define	FSE_optimalTableLog_internal zfs_FSE_optimalTableLog_internal
+#define	FSE_optimalTableLog zfs_FSE_optimalTableLog
+#define	FSE_readNCount zfs_FSE_readNCount
+#define	FSE_versionNumber zfs_FSE_versionNumber
+#define	FSE_writeNCount zfs_FSE_writeNCount
+#define	g_debuglevel zfs_g_debuglevel
+#define	HIST_countFast_wksp zfs_HIST_countFast_wksp
+#define	HIST_countFast zfs_HIST_countFast
+#define	HIST_count_simple zfs_HIST_count_simple
+#define	HIST_count_wksp zfs_HIST_count_wksp
+#define	HIST_count zfs_HIST_count
+#define	HIST_isError zfs_HIST_isError
+#define	HUF_buildCTable_wksp zfs_HUF_buildCTable_wksp
+#define	HUF_buildCTable zfs_HUF_buildCTable
+#define	HUF_compress1X_repeat zfs_HUF_compress1X_repeat
+#define	HUF_compress1X_usingCTable zfs_HUF_compress1X_usingCTable
+#define	HUF_compress1X_wksp zfs_HUF_compress1X_wksp
+#define	HUF_compress1X zfs_HUF_compress1X
+#define	HUF_compress2 zfs_HUF_compress2
+#define	HUF_compress4X_repeat zfs_HUF_compress4X_repeat
+#define	HUF_compress4X_usingCTable zfs_HUF_compress4X_usingCTable
+#define	HUF_compress4X_wksp zfs_HUF_compress4X_wksp
+#define	HUF_compressBound zfs_HUF_compressBound
+#define	HUF_compress zfs_HUF_compress
+#define	HUF_decompress1X1_DCtx_wksp_bmi2 zfs_HUF_decompress1X1_DCtx_wksp_bmi2
+#define	HUF_decompress1X1_DCtx_wksp zfs_HUF_decompress1X1_DCtx_wksp
+#define	HUF_decompress1X1_DCtx zfs_HUF_decompress1X1_DCtx
+#define	HUF_decompress1X1_usingDTable zfs_HUF_decompress1X1_usingDTable
+#define	HUF_decompress1X1 zfs_HUF_decompress1X1
+#define	HUF_decompress1X2_DCtx_wksp zfs_HUF_decompress1X2_DCtx_wksp
+#define	HUF_decompress1X2_DCtx zfs_HUF_decompress1X2_DCtx
+#define	HUF_decompress1X2_usingDTable zfs_HUF_decompress1X2_usingDTable
+#define	HUF_decompress1X2 zfs_HUF_decompress1X2
+#define	HUF_decompress1X_DCtx_wksp zfs_HUF_decompress1X_DCtx_wksp
+#define	HUF_decompress1X_DCtx zfs_HUF_decompress1X_DCtx
+#define	HUF_decompress1X_usingDTable_bmi2 zfs_HUF_decompress1X_usingDTable_bmi2
+#define	HUF_decompress1X_usingDTable zfs_HUF_decompress1X_usingDTable
+#define	HUF_decompress4X1_DCtx_wksp zfs_HUF_decompress4X1_DCtx_wksp
+#define	HUF_decompress4X1_DCtx zfs_HUF_decompress4X1_DCtx
+#define	HUF_decompress4X1_usingDTable zfs_HUF_decompress4X1_usingDTable
+#define	HUF_decompress4X1 zfs_HUF_decompress4X1
+#define	HUF_decompress4X2_DCtx_wksp zfs_HUF_decompress4X2_DCtx_wksp
+#define	HUF_decompress4X2_DCtx zfs_HUF_decompress4X2_DCtx
+#define	HUF_decompress4X2_usingDTable zfs_HUF_decompress4X2_usingDTable
+#define	HUF_decompress4X2 zfs_HUF_decompress4X2
+#define	HUF_decompress4X_DCtx zfs_HUF_decompress4X_DCtx
+#define	HUF_decompress4X_hufOnly_wksp_bmi2 zfs_HUF_decompress4X_hufOnly_wksp_bmi2
+#define	HUF_decompress4X_hufOnly_wksp zfs_HUF_decompress4X_hufOnly_wksp
+#define	HUF_decompress4X_hufOnly zfs_HUF_decompress4X_hufOnly
+#define	HUF_decompress4X_usingDTable_bmi2 zfs_HUF_decompress4X_usingDTable_bmi2
+#define	HUF_decompress4X_usingDTable zfs_HUF_decompress4X_usingDTable
+#define	HUF_decompress zfs_HUF_decompress
+#define	HUF_estimateCompressedSize zfs_HUF_estimateCompressedSize
+#define	HUF_getErrorName zfs_HUF_getErrorName
+#define	HUF_getNbBits zfs_HUF_getNbBits
+#define	HUF_isError zfs_HUF_isError
+#define	HUF_optimalTableLog zfs_HUF_optimalTableLog
+#define	HUF_readCTable zfs_HUF_readCTable
+#define	HUF_readDTableX1_wksp zfs_HUF_readDTableX1_wksp
+#define	HUF_readDTableX1 zfs_HUF_readDTableX1
+#define	HUF_readDTableX2_wksp zfs_HUF_readDTableX2_wksp
+#define	HUF_readDTableX2 zfs_HUF_readDTableX2
+#define	HUF_readStats zfs_HUF_readStats
+#define	HUF_selectDecoder zfs_HUF_selectDecoder
+#define	HUF_validateCTable zfs_HUF_validateCTable
+#define	HUF_writeCTable zfs_HUF_writeCTable
+#define	POOL_add zfs_POOL_add
+#define	POOL_create_advanced zfs_POOL_create_advanced
+#define	POOL_create zfs_POOL_create
+#define	POOL_free zfs_POOL_free
+#define	POOL_resize zfs_POOL_resize
+#define	POOL_sizeof zfs_POOL_sizeof
+#define	POOL_tryAdd zfs_POOL_tryAdd
+#define	ZSTD_adjustCParams zfs_ZSTD_adjustCParams
+#define	ZSTD_buildCTable zfs_ZSTD_buildCTable
+#define	ZSTD_buildFSETable zfs_ZSTD_buildFSETable
+#define	ZSTD_calloc zfs_ZSTD_calloc
+#define	ZSTD_CCtx_getParameter zfs_ZSTD_CCtx_getParameter
+#define	ZSTD_CCtx_loadDictionary_advanced zfs_ZSTD_CCtx_loadDictionary_advanced
+#define	ZSTD_CCtx_loadDictionary_byReference zfs_ZSTD_CCtx_loadDictionary_byReference
+#define	ZSTD_CCtx_loadDictionary zfs_ZSTD_CCtx_loadDictionary
+#define	ZSTD_CCtxParams_getParameter zfs_ZSTD_CCtxParams_getParameter
+#define	ZSTD_CCtxParams_init_advanced zfs_ZSTD_CCtxParams_init_advanced
+#define	ZSTD_CCtxParams_init zfs_ZSTD_CCtxParams_init
+#define	ZSTD_CCtxParams_reset zfs_ZSTD_CCtxParams_reset
+#define	ZSTD_CCtxParams_setParameter zfs_ZSTD_CCtxParams_setParameter
+#define	ZSTD_CCtx_refCDict zfs_ZSTD_CCtx_refCDict
+#define	ZSTD_CCtx_refPrefix_advanced zfs_ZSTD_CCtx_refPrefix_advanced
+#define	ZSTD_CCtx_refPrefix zfs_ZSTD_CCtx_refPrefix
+#define	ZSTD_CCtx_reset zfs_ZSTD_CCtx_reset
+#define	ZSTD_CCtx_setParametersUsingCCtxParams zfs_ZSTD_CCtx_setParametersUsingCCtxParams
+#define	ZSTD_CCtx_setParameter zfs_ZSTD_CCtx_setParameter
+#define	ZSTD_CCtx_setPledgedSrcSize zfs_ZSTD_CCtx_setPledgedSrcSize
+#define	ZSTD_checkContinuity zfs_ZSTD_checkContinuity
+#define	ZSTD_checkCParams zfs_ZSTD_checkCParams
+#define	ZSTD_compress2 zfs_ZSTD_compress2
+#define	ZSTD_compress_advanced_internal zfs_ZSTD_compress_advanced_internal
+#define	ZSTD_compress_advanced zfs_ZSTD_compress_advanced
+#define	ZSTD_compressBegin_advanced_internal zfs_ZSTD_compressBegin_advanced_internal
+#define	ZSTD_compressBegin_advanced zfs_ZSTD_compressBegin_advanced
+#define	ZSTD_compressBegin_usingCDict_advanced zfs_ZSTD_compressBegin_usingCDict_advanced
+#define	ZSTD_compressBegin_usingCDict zfs_ZSTD_compressBegin_usingCDict
+#define	ZSTD_compressBegin_usingDict zfs_ZSTD_compressBegin_usingDict
+#define	ZSTD_compressBegin zfs_ZSTD_compressBegin
+#define	ZSTD_compressBlock_btlazy2_dictMatchState zfs_ZSTD_compressBlock_btlazy2_dictMatchState
+#define	ZSTD_compressBlock_btlazy2_extDict zfs_ZSTD_compressBlock_btlazy2_extDict
+#define	ZSTD_compressBlock_btlazy2 zfs_ZSTD_compressBlock_btlazy2
+#define	ZSTD_compressBlock_btopt_dictMatchState zfs_ZSTD_compressBlock_btopt_dictMatchState
+#define	ZSTD_compressBlock_btopt_extDict zfs_ZSTD_compressBlock_btopt_extDict
+#define	ZSTD_compressBlock_btopt zfs_ZSTD_compressBlock_btopt
+#define	ZSTD_compressBlock_btultra2 zfs_ZSTD_compressBlock_btultra2
+#define	ZSTD_compressBlock_btultra_dictMatchState zfs_ZSTD_compressBlock_btultra_dictMatchState
+#define	ZSTD_compressBlock_btultra_extDict zfs_ZSTD_compressBlock_btultra_extDict
+#define	ZSTD_compressBlock_btultra zfs_ZSTD_compressBlock_btultra
+#define	ZSTD_compressBlock_doubleFast_dictMatchState zfs_ZSTD_compressBlock_doubleFast_dictMatchState
+#define	ZSTD_compressBlock_doubleFast_extDict zfs_ZSTD_compressBlock_doubleFast_extDict
+#define	ZSTD_compressBlock_doubleFast zfs_ZSTD_compressBlock_doubleFast
+#define	ZSTD_compressBlock_fast_dictMatchState zfs_ZSTD_compressBlock_fast_dictMatchState
+#define	ZSTD_compressBlock_fast_extDict zfs_ZSTD_compressBlock_fast_extDict
+#define	ZSTD_compressBlock_fast zfs_ZSTD_compressBlock_fast
+#define	ZSTD_compressBlock_greedy_dictMatchState zfs_ZSTD_compressBlock_greedy_dictMatchState
+#define	ZSTD_compressBlock_greedy_extDict zfs_ZSTD_compressBlock_greedy_extDict
+#define	ZSTD_compressBlock_greedy zfs_ZSTD_compressBlock_greedy
+#define	ZSTD_compressBlock_lazy2_dictMatchState zfs_ZSTD_compressBlock_lazy2_dictMatchState
+#define	ZSTD_compressBlock_lazy2_extDict zfs_ZSTD_compressBlock_lazy2_extDict
+#define	ZSTD_compressBlock_lazy2 zfs_ZSTD_compressBlock_lazy2
+#define	ZSTD_compressBlock_lazy_dictMatchState zfs_ZSTD_compressBlock_lazy_dictMatchState
+#define	ZSTD_compressBlock_lazy_extDict zfs_ZSTD_compressBlock_lazy_extDict
+#define	ZSTD_compressBlock_lazy zfs_ZSTD_compressBlock_lazy
+#define	ZSTD_compressBlock zfs_ZSTD_compressBlock
+#define	ZSTD_compressBound zfs_ZSTD_compressBound
+#define	ZSTD_compressCCtx zfs_ZSTD_compressCCtx
+#define	ZSTD_compressContinue zfs_ZSTD_compressContinue
+#define	ZSTD_compressEnd zfs_ZSTD_compressEnd
+#define	ZSTD_compressLiterals zfs_ZSTD_compressLiterals
+#define	ZSTD_compressRleLiteralsBlock zfs_ZSTD_compressRleLiteralsBlock
+#define	ZSTD_compressStream2_simpleArgs zfs_ZSTD_compressStream2_simpleArgs
+#define	ZSTD_compressStream2 zfs_ZSTD_compressStream2
+#define	ZSTD_compressStream zfs_ZSTD_compressStream
+#define	ZSTD_compressSuperBlock zfs_ZSTD_compressSuperBlock
+#define	ZSTD_compress_usingCDict_advanced zfs_ZSTD_compress_usingCDict_advanced
+#define	ZSTD_compress_usingCDict zfs_ZSTD_compress_usingCDict
+#define	ZSTD_compress_usingDict zfs_ZSTD_compress_usingDict
+#define	ZSTD_compress zfs_ZSTD_compress
+#define	ZSTD_copyCCtx zfs_ZSTD_copyCCtx
+#define	ZSTD_copyDCtx zfs_ZSTD_copyDCtx
+#define	ZSTD_copyDDictParameters zfs_ZSTD_copyDDictParameters
+#define	ZSTD_cParam_getBounds zfs_ZSTD_cParam_getBounds
+#define	ZSTD_createCCtx_advanced zfs_ZSTD_createCCtx_advanced
+#define	ZSTD_createCCtxParams zfs_ZSTD_createCCtxParams
+#define	ZSTD_createCCtx zfs_ZSTD_createCCtx
+#define	ZSTD_createCDict_advanced zfs_ZSTD_createCDict_advanced
+#define	ZSTD_createCDict_byReference zfs_ZSTD_createCDict_byReference
+#define	ZSTD_createCDict zfs_ZSTD_createCDict
+#define	ZSTD_createCStream_advanced zfs_ZSTD_createCStream_advanced
+#define	ZSTD_createCStream zfs_ZSTD_createCStream
+#define	ZSTD_createDCtx_advanced zfs_ZSTD_createDCtx_advanced
+#define	ZSTD_createDCtx zfs_ZSTD_createDCtx
+#define	ZSTD_createDDict_advanced zfs_ZSTD_createDDict_advanced
+#define	ZSTD_createDDict_byReference zfs_ZSTD_createDDict_byReference
+#define	ZSTD_createDDict zfs_ZSTD_createDDict
+#define	ZSTD_createDStream_advanced zfs_ZSTD_createDStream_advanced
+#define	ZSTD_createDStream zfs_ZSTD_createDStream
+#define	ZSTD_crossEntropyCost zfs_ZSTD_crossEntropyCost
+#define	ZSTD_CStreamInSize zfs_ZSTD_CStreamInSize
+#define	ZSTD_CStreamOutSize zfs_ZSTD_CStreamOutSize
+#define	ZSTD_cycleLog zfs_ZSTD_cycleLog
+#define	ZSTD_DCtx_loadDictionary_advanced zfs_ZSTD_DCtx_loadDictionary_advanced
+#define	ZSTD_DCtx_loadDictionary_byReference zfs_ZSTD_DCtx_loadDictionary_byReference
+#define	ZSTD_DCtx_loadDictionary zfs_ZSTD_DCtx_loadDictionary
+#define	ZSTD_DCtx_refDDict zfs_ZSTD_DCtx_refDDict
+#define	ZSTD_DCtx_refPrefix_advanced zfs_ZSTD_DCtx_refPrefix_advanced
+#define	ZSTD_DCtx_refPrefix zfs_ZSTD_DCtx_refPrefix
+#define	ZSTD_DCtx_reset zfs_ZSTD_DCtx_reset
+#define	ZSTD_DCtx_setFormat zfs_ZSTD_DCtx_setFormat
+#define	ZSTD_DCtx_setMaxWindowSize zfs_ZSTD_DCtx_setMaxWindowSize
+#define	ZSTD_DCtx_setParameter zfs_ZSTD_DCtx_setParameter
+#define	ZSTD_DDict_dictContent zfs_ZSTD_DDict_dictContent
+#define	ZSTD_DDict_dictSize zfs_ZSTD_DDict_dictSize
+#define	ZSTD_decodeLiteralsBlock zfs_ZSTD_decodeLiteralsBlock
+#define	ZSTD_decodeSeqHeaders zfs_ZSTD_decodeSeqHeaders
+#define	ZSTD_decodingBufferSize_min zfs_ZSTD_decodingBufferSize_min
+#define	ZSTD_decompressBegin_usingDDict zfs_ZSTD_decompressBegin_usingDDict
+#define	ZSTD_decompressBegin_usingDict zfs_ZSTD_decompressBegin_usingDict
+#define	ZSTD_decompressBegin zfs_ZSTD_decompressBegin
+#define	ZSTD_decompressBlock_internal zfs_ZSTD_decompressBlock_internal
+#define	ZSTD_decompressBlock zfs_ZSTD_decompressBlock
+#define	ZSTD_decompressBound zfs_ZSTD_decompressBound
+#define	ZSTD_decompressContinue zfs_ZSTD_decompressContinue
+#define	ZSTD_decompressDCtx zfs_ZSTD_decompressDCtx
+#define	ZSTD_decompressStream_simpleArgs zfs_ZSTD_decompressStream_simpleArgs
+#define	ZSTD_decompressStream zfs_ZSTD_decompressStream
+#define	ZSTD_decompress_usingDDict zfs_ZSTD_decompress_usingDDict
+#define	ZSTD_decompress_usingDict zfs_ZSTD_decompress_usingDict
+#define	ZSTD_decompress zfs_ZSTD_decompress
+#define	ZSTD_dParam_getBounds zfs_ZSTD_dParam_getBounds
+#define	ZSTD_DStreamInSize zfs_ZSTD_DStreamInSize
+#define	ZSTD_DStreamOutSize zfs_ZSTD_DStreamOutSize
+#define	ZSTD_encodeSequences zfs_ZSTD_encodeSequences
+#define	ZSTD_endStream zfs_ZSTD_endStream
+#define	ZSTD_estimateCCtxSize_usingCCtxParams zfs_ZSTD_estimateCCtxSize_usingCCtxParams
+#define	ZSTD_estimateCCtxSize_usingCParams zfs_ZSTD_estimateCCtxSize_usingCParams
+#define	ZSTD_estimateCCtxSize zfs_ZSTD_estimateCCtxSize
+#define	ZSTD_estimateCDictSize_advanced zfs_ZSTD_estimateCDictSize_advanced
+#define	ZSTD_estimateCDictSize zfs_ZSTD_estimateCDictSize
+#define	ZSTD_estimateCStreamSize_usingCCtxParams zfs_ZSTD_estimateCStreamSize_usingCCtxParams
+#define	ZSTD_estimateCStreamSize_usingCParams zfs_ZSTD_estimateCStreamSize_usingCParams
+#define	ZSTD_estimateCStreamSize zfs_ZSTD_estimateCStreamSize
+#define	ZSTD_estimateDCtxSize zfs_ZSTD_estimateDCtxSize
+#define	ZSTD_estimateDDictSize zfs_ZSTD_estimateDDictSize
+#define	ZSTD_estimateDStreamSize_fromFrame zfs_ZSTD_estimateDStreamSize_fromFrame
+#define	ZSTD_estimateDStreamSize zfs_ZSTD_estimateDStreamSize
+#define	ZSTD_fillDoubleHashTable zfs_ZSTD_fillDoubleHashTable
+#define	ZSTD_fillHashTable zfs_ZSTD_fillHashTable
+#define	ZSTD_findDecompressedSize zfs_ZSTD_findDecompressedSize
+#define	ZSTD_findFrameCompressedSize zfs_ZSTD_findFrameCompressedSize
+#define	ZSTD_flushStream zfs_ZSTD_flushStream
+#define	ZSTD_frameHeaderSize zfs_ZSTD_frameHeaderSize
+#define	ZSTD_freeCCtxParams zfs_ZSTD_freeCCtxParams
+#define	ZSTD_freeCCtx zfs_ZSTD_freeCCtx
+#define	ZSTD_freeCDict zfs_ZSTD_freeCDict
+#define	ZSTD_freeCStream zfs_ZSTD_freeCStream
+#define	ZSTD_freeDCtx zfs_ZSTD_freeDCtx
+#define	ZSTD_freeDDict zfs_ZSTD_freeDDict
+#define	ZSTD_freeDStream zfs_ZSTD_freeDStream
+#define	ZSTD_free zfs_ZSTD_free
+#define	ZSTD_fseBitCost zfs_ZSTD_fseBitCost
+#define	ZSTD_getBlockSize zfs_ZSTD_getBlockSize
+#define	ZSTD_getcBlockSize zfs_ZSTD_getcBlockSize
+#define	ZSTD_getCParamsFromCCtxParams zfs_ZSTD_getCParamsFromCCtxParams
+#define	ZSTD_getCParamsFromCDict zfs_ZSTD_getCParamsFromCDict
+#define	ZSTD_getCParams zfs_ZSTD_getCParams
+#define	ZSTD_getDecompressedSize zfs_ZSTD_getDecompressedSize
+#define	ZSTD_getDictID_fromDDict zfs_ZSTD_getDictID_fromDDict
+#define	ZSTD_getDictID_fromDict zfs_ZSTD_getDictID_fromDict
+#define	ZSTD_getDictID_fromFrame zfs_ZSTD_getDictID_fromFrame
+#define	ZSTD_getErrorCode zfs_ZSTD_getErrorCode
+#define	ZSTD_getErrorName zfs_ZSTD_getErrorName
+#define	ZSTD_getErrorString zfs_ZSTD_getErrorString
+#define	ZSTD_getFrameContentSize zfs_ZSTD_getFrameContentSize
+#define	ZSTD_getFrameHeader_advanced zfs_ZSTD_getFrameHeader_advanced
+#define	ZSTD_getFrameHeader zfs_ZSTD_getFrameHeader
+#define	ZSTD_getFrameProgression zfs_ZSTD_getFrameProgression
+#define	ZSTD_getParams zfs_ZSTD_getParams
+#define	ZSTD_getSeqStore zfs_ZSTD_getSeqStore
+#define	ZSTD_getSequences zfs_ZSTD_getSequences
+#define	ZSTD_initCStream_advanced zfs_ZSTD_initCStream_advanced
+#define	ZSTD_initCStream_internal zfs_ZSTD_initCStream_internal
+#define	ZSTD_initCStream_srcSize zfs_ZSTD_initCStream_srcSize
+#define	ZSTD_initCStream_usingCDict_advanced zfs_ZSTD_initCStream_usingCDict_advanced
+#define	ZSTD_initCStream_usingCDict zfs_ZSTD_initCStream_usingCDict
+#define	ZSTD_initCStream_usingDict zfs_ZSTD_initCStream_usingDict
+#define	ZSTD_initCStream zfs_ZSTD_initCStream
+#define	ZSTD_initDStream_usingDDict zfs_ZSTD_initDStream_usingDDict
+#define	ZSTD_initDStream_usingDict zfs_ZSTD_initDStream_usingDict
+#define	ZSTD_initDStream zfs_ZSTD_initDStream
+#define	ZSTD_initStaticCCtx zfs_ZSTD_initStaticCCtx
+#define	ZSTD_initStaticCDict zfs_ZSTD_initStaticCDict
+#define	ZSTD_initStaticCStream zfs_ZSTD_initStaticCStream
+#define	ZSTD_initStaticDCtx zfs_ZSTD_initStaticDCtx
+#define	ZSTD_initStaticDDict zfs_ZSTD_initStaticDDict
+#define	ZSTD_initStaticDStream zfs_ZSTD_initStaticDStream
+#define	ZSTD_insertAndFindFirstIndex zfs_ZSTD_insertAndFindFirstIndex
+#define	ZSTD_insertBlock zfs_ZSTD_insertBlock
+#define	ZSTD_invalidateRepCodes zfs_ZSTD_invalidateRepCodes
+#define	ZSTD_isError zfs_ZSTD_isError
+#define	ZSTD_isFrame zfs_ZSTD_isFrame
+#define	ZSTD_ldm_adjustParameters zfs_ZSTD_ldm_adjustParameters
+#define	ZSTD_ldm_blockCompress zfs_ZSTD_ldm_blockCompress
+#define	ZSTD_ldm_fillHashTable zfs_ZSTD_ldm_fillHashTable
+#define	ZSTD_ldm_generateSequences zfs_ZSTD_ldm_generateSequences
+#define	ZSTD_ldm_getMaxNbSeq zfs_ZSTD_ldm_getMaxNbSeq
+#define	ZSTD_ldm_getTableSize zfs_ZSTD_ldm_getTableSize
+#define	ZSTD_ldm_skipSequences zfs_ZSTD_ldm_skipSequences
+#define	ZSTD_loadCEntropy zfs_ZSTD_loadCEntropy
+#define	ZSTD_loadDEntropy zfs_ZSTD_loadDEntropy
+#define	ZSTD_malloc zfs_ZSTD_malloc
+#define	ZSTD_maxCLevel zfs_ZSTD_maxCLevel
+#define	ZSTD_minCLevel zfs_ZSTD_minCLevel
+#define	ZSTD_nextInputType zfs_ZSTD_nextInputType
+#define	ZSTD_nextSrcSizeToDecompress zfs_ZSTD_nextSrcSizeToDecompress
+#define	ZSTD_noCompressLiterals zfs_ZSTD_noCompressLiterals
+#define	ZSTD_referenceExternalSequences zfs_ZSTD_referenceExternalSequences
+#define	ZSTD_reset_compressedBlockState zfs_ZSTD_reset_compressedBlockState
+#define	ZSTD_resetCStream zfs_ZSTD_resetCStream
+#define	ZSTD_resetDStream zfs_ZSTD_resetDStream
+#define	ZSTD_resetSeqStore zfs_ZSTD_resetSeqStore
+#define	ZSTD_selectBlockCompressor zfs_ZSTD_selectBlockCompressor
+#define	ZSTD_selectEncodingType zfs_ZSTD_selectEncodingType
+#define	ZSTD_seqToCodes zfs_ZSTD_seqToCodes
+#define	ZSTD_sizeof_CCtx zfs_ZSTD_sizeof_CCtx
+#define	ZSTD_sizeof_CDict zfs_ZSTD_sizeof_CDict
+#define	ZSTD_sizeof_CStream zfs_ZSTD_sizeof_CStream
+#define	ZSTD_sizeof_DCtx zfs_ZSTD_sizeof_DCtx
+#define	ZSTD_sizeof_DDict zfs_ZSTD_sizeof_DDict
+#define	ZSTD_sizeof_DStream zfs_ZSTD_sizeof_DStream
+#define	ZSTD_toFlushNow zfs_ZSTD_toFlushNow
+#define	ZSTD_updateTree zfs_ZSTD_updateTree
+#define	ZSTD_versionNumber zfs_ZSTD_versionNumber
+#define	ZSTD_versionString zfs_ZSTD_versionString
+#define	ZSTD_writeLastEmptyBlock zfs_ZSTD_writeLastEmptyBlock

diff --git a/zfs/module/zstd/lib/zstd.c b/zfs/module/zstd/lib/zstd.c
new file mode 100644
index 0000000..9dbba5b
--- /dev/null
+++ b/zfs/module/zstd/lib/zstd.c

@@ -0,0 +1,27824 @@
+/*
+ * BSD 3-Clause Clear License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. All rights reserved.
+ * Copyright (c) 2019-2020, Michael Niewöhner. All rights reserved.
+ */
+
+#define	MEM_MODULE
+#define	XXH_NAMESPACE ZSTD_
+#define	XXH_PRIVATE_API
+#define	XXH_INLINE_ALL
+#define	ZSTD_LEGACY_SUPPORT 0
+#define	ZSTD_LIB_DICTBUILDER 0
+#define	ZSTD_LIB_DEPRECATED 0
+#define	ZSTD_NOBENCH
+
+/**** start inlining common/debug.c ****/
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+/**** start inlining debug.h ****/
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+
+/* DEBUGFILE can be defined externally,
+ * typically through compiler command line.
+ * note : currently useless.
+ * Value must be stderr or stdout */
+#ifndef DEBUGFILE
+#  define DEBUGFILE stderr
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=1)
+#  include <assert.h>
+#else
+#  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
+#    define assert(condition) ((void)0)   /* disable assert (default) */
+#  endif
+#endif
+
+#if (DEBUGLEVEL>=2)
+#  include <stdio.h>
+extern int g_debuglevel; /* the variable is only declared,
+                            it actually lives in debug.c,
+                            and is shared by the whole process.
+                            It's not thread-safe.
+                            It's useful when enabling very verbose levels
+                            on selective conditions (such as position in src) */
+
+#  define RAWLOG(l, ...) {                                      \
+                if (l<=g_debuglevel) {                          \
+                    fprintf(stderr, __VA_ARGS__);               \
+            }   }
+#  define DEBUGLOG(l, ...) {                                    \
+                if (l<=g_debuglevel) {                          \
+                    fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
+                    fprintf(stderr, " \n");                     \
+            }   }
+#else
+#  define RAWLOG(l, ...)      {}    /* disabled */
+#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* DEBUG_H_12987983217 */
+/**** ended inlining debug.h ****/
+
+int g_debuglevel = DEBUGLEVEL;
+/**** ended inlining common/debug.c ****/
+/**** start inlining common/entropy_common.c ****/
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+/**** start inlining mem.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>     /* size_t, ptrdiff_t */
+#include <string.h>     /* memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __inline __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+#ifndef __has_builtin
+#  define __has_builtin(x) 0  /* compat. with non-clang compilers */
+#endif
+
+/* code only tested on 32 and 64 bits systems */
+#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
+MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+
+/* detects whether we are being compiled under msan */
+#if defined (__has_feature)
+#  if __has_feature(memory_sanitizer)
+#    define MEMORY_SANITIZER 1
+#  endif
+#endif
+
+#if defined (MEMORY_SANITIZER)
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+#include <stdint.h> /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+   This is a legacy interface that does not update origin information. Use
+   __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+   memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+/* detects whether we are being compiled under asan */
+#if defined (__has_feature)
+#  if __has_feature(address_sanitizer)
+#    define ADDRESS_SANITIZER 1
+#  endif
+#elif defined(__SANITIZE_ADDRESS__)
+#  define ADDRESS_SANITIZER 1
+#endif
+
+#if defined (ADDRESS_SANITIZER)
+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef   uint8_t BYTE;
+  typedef  uint16_t U16;
+  typedef   int16_t S16;
+  typedef  uint32_t U32;
+  typedef   int32_t S32;
+  typedef  uint64_t U64;
+  typedef   int64_t S64;
+#else
+# include <limits.h>
+#if CHAR_BIT != 8
+#  error "this implementation requires char to be exactly 8-bit type"
+#endif
+  typedef unsigned char      BYTE;
+#if USHRT_MAX != 65535
+#  error "this implementation requires short to be exactly 16-bit type"
+#endif
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+#if UINT_MAX != 4294967295
+#  error "this implementation requires int to be exactly 32-bit type"
+#endif
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+/* note : there are no limits defined for long long type in C90.
+ * limits exist in C99, however, in such case, <stdint.h> is preferred */
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
+    __pragma( pack(push, 1) )
+    typedef struct { U16 v; } unalign16;
+    typedef struct { U32 v; } unalign32;
+    typedef struct { U64 v; } unalign64;
+    typedef struct { size_t v; } unalignArch;
+    __pragma( pack(pop) )
+#else
+    typedef struct { U16 v; } __attribute__((packed)) unalign16;
+    typedef struct { U32 v; } __attribute__((packed)) unalign32;
+    typedef struct { U64 v; } __attribute__((packed)) unalign64;
+    typedef struct { size_t v; } __attribute__((packed)) unalignArch;
+#endif
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
+MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC size_t MEM_readST(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_ulong(in);
+#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+  || (defined(__clang__) && __has_builtin(__builtin_bswap32))
+    return __builtin_bswap32(in);
+#else
+    return  ((in << 24) & 0xff000000 ) |
+            ((in <<  8) & 0x00ff0000 ) |
+            ((in >>  8) & 0x0000ff00 ) |
+            ((in >> 24) & 0x000000ff );
+#endif
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_uint64(in);
+#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+  || (defined(__clang__) && __has_builtin(__builtin_bswap64))
+    return __builtin_bswap64(in);
+#else
+    return  ((in << 56) & 0xff00000000000000ULL) |
+            ((in << 40) & 0x00ff000000000000ULL) |
+            ((in << 24) & 0x0000ff0000000000ULL) |
+            ((in << 8)  & 0x000000ff00000000ULL) |
+            ((in >> 8)  & 0x00000000ff000000ULL) |
+            ((in >> 24) & 0x0000000000ff0000ULL) |
+            ((in >> 40) & 0x000000000000ff00ULL) |
+            ((in >> 56) & 0x00000000000000ffULL);
+#endif
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_swap32((U32)in);
+    else
+        return (size_t)MEM_swap64((U64)in);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
+{
+    MEM_writeLE16(memPtr, (U16)val);
+    ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+        return MEM_swap32(MEM_read32(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, val32);
+    else
+        MEM_write32(memPtr, MEM_swap32(val32));
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+        return MEM_swap64(MEM_read64(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, val64);
+    else
+        MEM_write64(memPtr, MEM_swap64(val64));
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_swap32(MEM_read32(memPtr));
+    else
+        return MEM_read32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, MEM_swap32(val32));
+    else
+        MEM_write32(memPtr, val32);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_swap64(MEM_read64(memPtr));
+    else
+        return MEM_read64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, MEM_swap64(val64));
+    else
+        MEM_write64(memPtr, val64);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readBE32(memPtr);
+    else
+        return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeBE32(memPtr, (U32)val);
+    else
+        MEM_writeBE64(memPtr, (U64)val);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+/**** ended inlining mem.h ****/
+/**** start inlining error_private.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>        /* size_t */
+/**** start inlining zstd_errors.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*===== dependency =====*/
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDERRORLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDERRORLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+#endif
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_ERRORS_H_398273423 */
+/**** ended inlining zstd_errors.h ****/
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#undef ERROR   /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/**** ended inlining error_private.h ****/
+#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+/**** start inlining fse.h ****/
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef FSE_H
+#define FSE_H
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                                 const short* normalizedCounter,
+                                 unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif  /* FSE_H */
+
+#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+/**** start inlining bitstream.h ****/
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+/**** skipping file: mem.h ****/
+/**** start inlining compiler.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if !defined(ZSTD_NO_INLINE)
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
+#endif
+
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define FORCE_INLINE_ATTR __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#  define FORCE_INLINE_ATTR __forceinline
+#else
+#  define FORCE_INLINE_ATTR
+#endif
+
+#else
+
+#define INLINE_KEYWORD
+#define FORCE_INLINE_ATTR
+
+#endif
+
+/**
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/**
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+#  define HINT_INLINE static INLINE_KEYWORD
+#else
+#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+#  define UNUSED_ATTR __attribute__((unused))
+#else
+#  define UNUSED_ATTR
+#endif
+
+/* force no inlining */
+#ifdef _MSC_VER
+#  define FORCE_NOINLINE static __declspec(noinline)
+#else
+#  if defined(__GNUC__) || defined(__ICCARM__)
+#    define FORCE_NOINLINE static __attribute__((__noinline__))
+#  else
+#    define FORCE_NOINLINE static
+#  endif
+#endif
+
+/* target attribute */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
+#endif
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+#else
+#  define TARGET_ATTRIBUTE(target)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X86)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if defined(NO_PREFETCH)
+#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
+#else
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+#    elif defined(__aarch64__)
+#     define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#     define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#  else
+#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s)  {            \
+    const char* const _ptr = (const char*)(p);  \
+    size_t const _size = (size_t)(s);     \
+    size_t _pos;                          \
+    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+        PREFETCH_L2(_ptr + _pos);         \
+    }                                     \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+/* disable warnings */
+#ifdef _MSC_VER    /* Visual Studio */
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#endif
+
+#endif /* ZSTD_COMPILER_H */
+/**** ended inlining compiler.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: error_private.h ****/
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#if defined(__BMI__) && defined(__GNUC__)
+#  include <immintrin.h>   /* support for bextr (experimental) */
+#elif defined(__ICCARM__)
+#  include <intrinsics.h>
+#endif
+
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+    size_t bitContainer;
+    unsigned bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+    const char* limitPtr;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (U32 val)
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+        unsigned long r=0;
+        return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
+#   else   /* Software version */
+        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                                 11, 14, 16, 18, 22, 25,  3, 30,
+                                                  8, 12, 20, 28, 15, 17, 24,  7,
+                                                 19, 27, 23,  6, 26,  5,  4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {
+    0,          1,         3,         7,         0xF,       0x1F,
+    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
+    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(size_t)
+ *  @return : 0 if success,
+ *            otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+ *  can add up to 31 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
+{
+    MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    assert(nbBits < BIT_MASK_SIZE);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_,
+ *  meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
+{
+    assert((value>>nbBits) == 0);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+ *            or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+*  bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                /* fall-through */
+
+        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                /* fall-through */
+
+        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                /* fall-through */
+
+        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                /* fall-through */
+
+        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                /* fall-through */
+
+        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                /* fall-through */
+
+        default: break;
+        }
+        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+        }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+    return bitContainer >> start;
+}
+
+MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+    U32 const regMask = sizeof(bitContainer)*8 - 1;
+    /* if start > regMask, bitstream is corrupted, and result is undefined */
+    assert(nbBits < BIT_MASK_SIZE);
+    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+}
+
+MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    /* arbitrate between double-shift and shift+mask */
+#if 1
+    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+     * bitstream is likely corrupted, and result is undefined */
+    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    /* this code path is slower on my os-x laptop */
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+ *  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStream() :
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->limitPtr) {
+        return BIT_reloadDStreamFast(bitD);
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    /* start < ptr < limitPtr */
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/**** ended inlining bitstream.h ****/
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ *  FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/**< same as FSE_optimalTableLog(), which used `minus==2` */
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `(1<<tableLog)`.
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+
+typedef enum {
+   FSE_repeat_none,  /**< Cannot use the previous table */
+   FSE_repeat_check, /**< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
+ } FSE_repeat;
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/**<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/**<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
+{
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+    U32 const threshold = (minNbBits+1) << 16;
+    assert(tableLog < 16);
+    assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
+    {   U32 const tableSize = 1 << tableLog;
+        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+        U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
+        U32 const bitMultiplier = 1 << accuracyLog;
+        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+        assert(normalizedDeltaFromThreshold <= bitMultiplier);
+        return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+    }
+}
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining fse.h ****/
+#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+/**** start inlining huf.h ****/
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+#include <stddef.h>    /* size_t */
+
+
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
+/* ========================== */
+/* ***  simple functions  *** */
+/* ========================== */
+
+/** HUF_compress() :
+ *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/** HUF_decompress() :
+ *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ *  into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ *  Note : in contrast with FSE, HUF_decompress can regenerate
+ *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ *         because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ *           or an error code, which can be tested using HUF_isError()
+ */
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /**< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
+
+/* Error Management */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
+
+
+/* ***   Advanced function   *** */
+
+/** HUF_compress2() :
+ *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               unsigned maxSymbolValue, unsigned tableLog);
+
+/** HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     unsigned maxSymbolValue, unsigned tableLog,
+                                     void* workSpace, size_t wkspSize);
+
+#endif   /* HUF_H_298734234 */
+
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of a dynamic library,
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ * *****************************************************************/
+#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+/**** skipping file: mem.h ****/
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#  error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+    U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
+    void* name##hv = &(name##hb); \
+    HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+#endif
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
+#endif
+
+
+/* ****************************************
+ *  HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ *  2. (optional) refine tableLog using HUF_optimalTableLog()
+ *  3. build Huffman table from count using HUF_buildCTable()
+ *  4. save Huffman table to memory buffer using HUF_writeCTable()
+ *  5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ *  The following API allows targeting specific sub-functions for advanced tasks.
+ *  For example, it's possible to compress several blocks using the same 'CTable',
+ *  or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+   HUF_repeat_none,  /**< Cannot use the previous table */
+   HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
+ } HUF_repeat;
+/** HUF_compress4X_repeat() :
+ *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,    /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+                             void* workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+ *  Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+/** HUF_readCTable() :
+ *  Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
+
+/** HUF_getNbBits() :
+ *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ *  Note 1 : is not inlined, as HUF_CElt definition is private
+ *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+/**
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/** HUF_compress1X_repeat() :
+ *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,   /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+#endif
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
+#endif
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining huf.h ****/
+
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) {
+        /* This function only works when hbSize >= 4 */
+        char buffer[4];
+        memset(buffer, 0, sizeof(buffer));
+        memcpy(buffer, headerBuffer, hbSize);
+        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+                                                    buffer, sizeof(buffer));
+            if (FSE_isError(countSize)) return countSize;
+            if (countSize > hbSize) return ERROR(corruption_detected);
+            return countSize;
+    }   }
+    assert(hbSize >= 4);
+
+    /* init */
+    memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) & (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0 += 24;
+                if (ip < iend-5) {
+                    ip += 2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount   += 16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0 += 3;
+                bitStream >>= 2;
+                bitCount += 2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                assert((bitCount >> 3) <= 3); /* For first condition to work */
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            } else {
+                bitStream >>= 2;
+        }   }
+        {   int const max = (2*threshold-1) - remaining;
+            int count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
+            } else {
+                count = bitStream & (2*threshold-1);
+                if (count >= threshold) count -= max;
+                bitCount += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= count < 0 ? -count : count;   /* -1 means +1 */
+            normalizedCounter[charnum++] = (short)count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
+    if (remaining != 1) return ERROR(corruption_detected);
+    if (bitCount > 32) return ERROR(corruption_detected);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    return ip-istart;
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128) {  /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BIT_highbit32(rest);
+            U32 const lastWeight = BIT_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+/**** ended inlining common/entropy_common.c ****/
+/**** start inlining common/error_private.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+/**** skipping file: error_private.h ****/
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+    (void)code;
+    return "Error strings stripped";
+#else
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(parameter_unsupported): return "Unsupported parameter";
+    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
+        /* following error codes are not stable and may be removed or changed in a future version */
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+#endif
+}
+/**** ended inlining common/error_private.c ****/
+/**** start inlining common/fse_decompress.c ****/
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+/**** skipping file: bitstream.h ****/
+/**** skipping file: compiler.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
+/**** skipping file: error_private.h ****/
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    {   U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state1);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state2);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+
+    /* normal FSE decoding mode */
+    size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(NCountLength)) return NCountLength;
+    /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */  /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
+    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+    ip += NCountLength;
+    cSrcSize -= NCountLength;
+
+    CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
+
+    return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
+{
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/**** ended inlining common/fse_decompress.c ****/
+/**** start inlining common/pool.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ======   Dependencies   ======= */
+#include <stddef.h>    /* size_t */
+/**** skipping file: debug.h ****/
+/**** start inlining zstd_internal.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
+/**** skipping file: compiler.h ****/
+/**** skipping file: mem.h ****/
+/**** skipping file: debug.h ****/
+/**** skipping file: error_private.h ****/
+#define ZSTD_STATIC_LINKING_ONLY
+/**** start inlining ../zstd.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
+#include <limits.h>   /* INT_MAX */
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
+#endif
+
+
+/*******************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    4
+#define ZSTD_VERSION_RELEASE  5
+
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+
+/***************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() :
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+
+
+/***************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
+
+
+/***************************************
+*  Advanced compression API
+***************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value. */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * They return an error otherwise. */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression work is performed in parallel, within worker threads.
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/***************************************
+*  Advanced decompression API
+***************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/****************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /**< start of input buffer */
+  size_t size;        /**< size of input buffer */
+  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /**< start of output buffer */
+  size_t size;        /**< size of output buffer */
+  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() :
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Advanced parameters and dictionary compression can only be used through the
+ * new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/**************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/***********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict(). */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict() */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/********************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/*******************************************************************************
+ * Advanced dictionary and prefix API
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() :
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() :
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/****************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int matchPos; /* Match pos in dst */
+    /* If seqDef.offset > 3, then this is seqDef.offset - 3
+     * If seqDef.offset < 3, then this is the corresponding repeat offset
+     * But if seqDef.offset < 3 and litLength == 0, this is the
+     *   repeat offset before the corresponding repeat offset
+     * And if seqDef.offset == 3 and litLength == 0, this is the
+     *   most recent repeat offset - 1
+     */
+    unsigned int offset;
+    unsigned int litLength; /* Literal length */
+    unsigned int matchLength; /* Match length */
+    /* 0 when seq not rep and seqDef.offset otherwise
+     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+     */
+    unsigned int rep;
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+
+/***************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occured: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+/*! ZSTD_getSequences() :
+ * Extract sequences from the sequence store
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ * @return : number of sequences extracted
+ */
+ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+    size_t outSeqsSize, const void* src, size_t srcSize);
+
+
+/***************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_customMem customMem);
+
+
+
+/***************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
+ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now REDUNDANT.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning in some future version */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_t enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() :
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/***************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/*! ZSTD_DCtx_setFormat() :
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/********************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+/**! ZSTD_initCStream_srcSize() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/**! ZSTD_initCStream_usingDict() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/**! ZSTD_initCStream_advanced() :
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/**! ZSTD_initCStream_usingCDict() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/**! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+/**
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/**
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/**
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/*********************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/**
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/**       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining ../zstd.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: huf.h ****/
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
+#endif
+/**** start inlining xxhash.h ****/
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+*  API modifier
+******************************/
+/** XXH_PRIVATE_API
+*   This is useful if you want to include xxhash functions in `static` mode
+*   in order to inline them, and remove their symbol from the public list.
+*   Methodology :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.h"
+*   `xxhash.c` is automatically included.
+*   It's not useful to compile and link it as a separate module anymore.
+*/
+#ifdef XXH_PRIVATE_API
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_PRIVATE_API */
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  2
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Simple Hash Functions
+******************************/
+typedef unsigned int       XXH32_hash_t;
+typedef unsigned long long XXH64_hash_t;
+
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*!
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+*/
+
+
+/* ****************************
+*  Streaming Hash Functions
+******************************/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+
+/*! State allocation, compatible with dynamic libraries */
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions generate the xxHash of an input provided in multiple segments.
+Note that, for small input, they are slower than single-call functions, due to state management.
+For small input, prefer `XXH32()` and `XXH64()` .
+
+XXH state must first be allocated, using XXH*_createState() .
+
+Start a new hash by initializing state with a seed, using XXH*_reset().
+
+Then, feed the hash state by calling XXH*_update() as many times as necessary.
+Obviously, input must be allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXH*_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and generate some new hashes later on, by calling again XXH*_digest().
+
+When done, free XXH state space if it was allocated dynamically.
+*/
+
+
+/* **************************
+*  Utils
+****************************/
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
+#  define restrict   /* disable restrict */
+#endif
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
+
+
+/* **************************
+*  Canonical representation
+****************************/
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+*/
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+/* ================================================================================================
+   This section contains definitions which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   They shall only be used with static linking.
+   Never use these definitions in association with dynamic linking !
+=================================================================================================== */
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
+#define XXH_STATIC_H_3543687687345
+
+/* These definitions are only meant to allow allocation of XXH state
+   statically, on stack, or in a struct for example.
+   Do not use members directly. */
+
+   struct XXH32_state_s {
+       unsigned total_len_32;
+       unsigned large_len;
+       unsigned v1;
+       unsigned v2;
+       unsigned v3;
+       unsigned v4;
+       unsigned mem32[4];   /* buffer defined as U32 for alignment */
+       unsigned memsize;
+       unsigned reserved;   /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH32_state_t */
+
+   struct XXH64_state_s {
+       unsigned long long total_len;
+       unsigned long long v1;
+       unsigned long long v2;
+       unsigned long long v3;
+       unsigned long long v4;
+       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+       unsigned memsize;
+       unsigned reserved[2];          /* never read nor write, will be removed in a future version */
+   };   /* typedef'd to XXH64_state_t */
+
+
+#  ifdef XXH_PRIVATE_API
+/**** start inlining xxhash.c ****/
+/*
+ *  xxHash - Fast Hash algorithm
+ *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - xxHash homepage: http://www.xxhash.com
+ *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+*/
+
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
+  defined(__ICCARM__)
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+#include <stddef.h>     /* size_t */
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#ifndef XXH_STATIC_LINKING_ONLY
+#  define XXH_STATIC_LINKING_ONLY
+#endif
+/**** skipping file: xxhash.h ****/
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
+#endif
+
+#if defined(__GNUC__) || defined(__ICCARM__)
+#  define FORCE_INLINE_ATTR __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#  define FORCE_INLINE_ATTR __forceinline
+#else
+#  define FORCE_INLINE_ATTR
+#endif
+
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+
+
+#ifdef _MSC_VER
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint8_t  BYTE;
+    typedef uint16_t U16;
+    typedef uint32_t U32;
+    typedef  int32_t S32;
+    typedef uint64_t U64;
+#  else
+    typedef unsigned char      BYTE;
+    typedef unsigned short     U16;
+    typedef unsigned int       U32;
+    typedef   signed int       S32;
+    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
+#  endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#if defined(__ICCARM__)
+#  include <intrinsics.h>
+#  define XXH_rotl32(x,r) __ROR(x,(32 - r))
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* *************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/* *************************************
+*  Constants
+***************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
+
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* **************************
+*  Utils
+****************************/
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+
+/* ***************************
+*  Simple Hash Functions
+*****************************/
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
+FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+        } while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_CREATESTATE_STATIC(state);
+    XXH32_reset(state, seed);
+    XXH32_update(state, input, len);
+    return XXH32_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_CREATESTATE_STATIC(state);
+    XXH64_reset(state, seed);
+    XXH64_update(state, input, len);
+    return XXH64_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+/* **************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    memcpy(statePtr, &state, sizeof(state));
+    return XXH_OK;
+}
+
+
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len_32 += (unsigned)len;
+    state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (unsigned)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* some data left from previous update */
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {   const U32* p32 = state->mem32;
+            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16) {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do {
+            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->v3 /* == seed */ + PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    while (p+4<=bEnd) {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h32 += (*p) * PRIME32_5;
+        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+
+/* **** XXH64 **** */
+
+FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32) {  /* fill in tmp buffer */
+        if (input != NULL) {
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        }
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize) {   /* tmp buffer is full */
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do {
+            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd) {
+        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+        state->memsize = (unsigned)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd) {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd) {
+        h64 ^= (*p) * PRIME64_5;
+        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/* **************************
+*  Canonical representation
+****************************/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+/**** ended inlining xxhash.c ****/
+#  endif
+
+#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/**** ended inlining xxhash.h ****/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define FSE_isError  ERR_isError
+#define HUF_isError  ERR_isError
+
+
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
+/**
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/**
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/**
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM    (1<<12)
+
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML   52
+#define MaxLL   35
+#define DefaultMaxOff 28
+#define MaxOff  31
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+
+static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3,
+                                      4, 6, 7, 8, 9,10,11,12,
+                                     13,14,15,16 };
+static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
+                                             2, 2, 2, 2, 2, 1, 1, 1,
+                                             2, 2, 2, 2, 2, 2, 2, 2,
+                                             2, 3, 2, 1, 1, 1, 1, 1,
+                                            -1,-1,-1,-1 };
+#define LL_DEFAULTNORMLOG 6  /* for static allocation */
+static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3,
+                                      4, 4, 5, 7, 8, 9,10,11,
+                                     12,13,14,15,16 };
+static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
+                                             2, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1,-1,-1,
+                                            -1,-1,-1,-1,-1 };
+#define ML_DEFAULTNORMLOG 6  /* for static allocation */
+static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
+                                                     2, 1, 1, 1, 1, 1, 1, 1,
+                                                     1, 1, 1, 1, 1, 1, 1, 1,
+                                                    -1,-1,-1,-1,-1 };
+#define OF_DEFAULTNORMLOG 5  /* for static allocation */
+static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+    memcpy(dst, src, 8);
+#endif
+}
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#else
+    memcpy(dst, src, 16);
+#endif
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR 
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+#ifndef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        COPY16(op, ip);
+        if (op >= oend) return;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#endif
+    }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offset;
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
+
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
+    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
+    U32   longLengthPos;
+} seqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthID == 1) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthID == 2) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
+/**
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+/* custom memory allocation functions */
+void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_free(void* ptr, ZSTD_customMem customMem);
+
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+        unsigned long r=0;
+        return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
+#   else   /* Software version */
+        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;   /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                       const void* src, size_t srcSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
+/**** ended inlining zstd_internal.h ****/
+/**** start inlining pool.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef POOL_H
+#define POOL_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#include <stddef.h>   /* size_t */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
+/**** skipping file: ../zstd.h ****/
+
+typedef struct POOL_ctx_s POOL_ctx;
+
+/*! POOL_create() :
+ *  Create a thread pool with at most `numThreads` threads.
+ * `numThreads` must be at least 1.
+ *  The maximum number of queued jobs before blocking is `queueSize`.
+ * @return : POOL_ctx pointer on success, else NULL.
+*/
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
+                               ZSTD_customMem customMem);
+
+/*! POOL_free() :
+ *  Free a thread pool returned by POOL_create().
+ */
+void POOL_free(POOL_ctx* ctx);
+
+/*! POOL_resize() :
+ *  Expands or shrinks pool's number of threads.
+ *  This is more efficient than releasing + creating a new context,
+ *  since it tries to preserve and re-use existing threads.
+ * `numThreads` must be at least 1.
+ * @return : 0 when resize was successful,
+ *           !0 (typically 1) if there is an error.
+ *    note : only numThreads can be resized, queueSize remains unchanged.
+ */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads);
+
+/*! POOL_sizeof() :
+ * @return threadpool memory usage
+ *  note : compatible with NULL (returns 0 in this case)
+ */
+size_t POOL_sizeof(POOL_ctx* ctx);
+
+/*! POOL_function :
+ *  The function type that can be added to a thread pool.
+ */
+typedef void (*POOL_function)(void*);
+
+/*! POOL_add() :
+ *  Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
+ *  Possibly blocks until there is room in the queue.
+ *  Note : The function may be executed asynchronously,
+ *         therefore, `opaque` must live until function has been completed.
+ */
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+/*! POOL_tryAdd() :
+ *  Add the job `function(opaque)` to thread pool _if_ a worker is available.
+ *  Returns immediately even if not (does not block).
+ * @return : 1 if successful, 0 if not.
+ */
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
+/**** ended inlining pool.h ****/
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+#ifdef ZSTD_MULTITHREAD
+
+/**** start inlining threading.h ****/
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef THREADING_H_938743
+#define THREADING_H_938743
+
+/**** skipping file: debug.h ****/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+/**
+ * Windows minimalist Pthread Wrapper, based on :
+ * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+ */
+#ifdef WINVER
+#  undef WINVER
+#endif
+#define WINVER       0x0600
+
+#ifdef _WIN32_WINNT
+#  undef _WIN32_WINNT
+#endif
+#define _WIN32_WINNT 0x0600
+
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+
+#undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#include <windows.h>
+#undef ERROR
+#define ERROR(name) ZSTD_ERROR(name)
+
+
+/* mutex */
+#define ZSTD_pthread_mutex_t           CRITICAL_SECTION
+#define ZSTD_pthread_mutex_init(a, b)  ((void)(b), InitializeCriticalSection((a)), 0)
+#define ZSTD_pthread_mutex_destroy(a)  DeleteCriticalSection((a))
+#define ZSTD_pthread_mutex_lock(a)     EnterCriticalSection((a))
+#define ZSTD_pthread_mutex_unlock(a)   LeaveCriticalSection((a))
+
+/* condition variable */
+#define ZSTD_pthread_cond_t             CONDITION_VARIABLE
+#define ZSTD_pthread_cond_init(a, b)    ((void)(b), InitializeConditionVariable((a)), 0)
+#define ZSTD_pthread_cond_destroy(a)    ((void)(a))
+#define ZSTD_pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
+#define ZSTD_pthread_cond_signal(a)     WakeConditionVariable((a))
+#define ZSTD_pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
+
+/* ZSTD_pthread_create() and ZSTD_pthread_join() */
+typedef struct {
+    HANDLE handle;
+    void* (*start_routine)(void*);
+    void* arg;
+} ZSTD_pthread_t;
+
+int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
+                   void* (*start_routine) (void*), void* arg);
+
+int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
+
+/**
+ * add here more wrappers as required
+ */
+
+
+#elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
+/* ===   POSIX Systems   === */
+#  include <pthread.h>
+
+#if DEBUGLEVEL < 1
+
+#define ZSTD_pthread_mutex_t            pthread_mutex_t
+#define ZSTD_pthread_mutex_init(a, b)   pthread_mutex_init((a), (b))
+#define ZSTD_pthread_mutex_destroy(a)   pthread_mutex_destroy((a))
+#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock((a))
+#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock((a))
+
+#define ZSTD_pthread_cond_t             pthread_cond_t
+#define ZSTD_pthread_cond_init(a, b)    pthread_cond_init((a), (b))
+#define ZSTD_pthread_cond_destroy(a)    pthread_cond_destroy((a))
+#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait((a), (b))
+#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal((a))
+#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast((a))
+
+#define ZSTD_pthread_t                  pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
+
+#else /* DEBUGLEVEL >= 1 */
+
+/* Debug implementation of threading.
+ * In this implementation we use pointers for mutexes and condition variables.
+ * This way, if we forget to init/destroy them the program will crash or ASAN
+ * will report leaks.
+ */
+
+#define ZSTD_pthread_mutex_t            pthread_mutex_t*
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
+#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock(*(a))
+#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock(*(a))
+
+#define ZSTD_pthread_cond_t             pthread_cond_t*
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
+#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait(*(a), *(b))
+#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal(*(a))
+#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast(*(a))
+
+#define ZSTD_pthread_t                  pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
+
+#endif
+
+#else  /* ZSTD_MULTITHREAD not defined */
+/* No multithreading support */
+
+typedef int ZSTD_pthread_mutex_t;
+#define ZSTD_pthread_mutex_init(a, b)   ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_mutex_destroy(a)   ((void)(a))
+#define ZSTD_pthread_mutex_lock(a)      ((void)(a))
+#define ZSTD_pthread_mutex_unlock(a)    ((void)(a))
+
+typedef int ZSTD_pthread_cond_t;
+#define ZSTD_pthread_cond_init(a, b)    ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_cond_destroy(a)    ((void)(a))
+#define ZSTD_pthread_cond_wait(a, b)    ((void)(a), (void)(b))
+#define ZSTD_pthread_cond_signal(a)     ((void)(a))
+#define ZSTD_pthread_cond_broadcast(a)  ((void)(a))
+
+/* do not use ZSTD_pthread_t */
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* THREADING_H_938743 */
+/**** ended inlining threading.h ****/
+
+/* A job is a function and an opaque argument */
+typedef struct POOL_job_s {
+    POOL_function function;
+    void *opaque;
+} POOL_job;
+
+struct POOL_ctx_s {
+    ZSTD_customMem customMem;
+    /* Keep track of the threads */
+    ZSTD_pthread_t* threads;
+    size_t threadCapacity;
+    size_t threadLimit;
+
+    /* The queue is a circular buffer */
+    POOL_job *queue;
+    size_t queueHead;
+    size_t queueTail;
+    size_t queueSize;
+
+    /* The number of threads working on jobs */
+    size_t numThreadsBusy;
+    /* Indicates if the queue is empty */
+    int queueEmpty;
+
+    /* The mutex protects the queue */
+    ZSTD_pthread_mutex_t queueMutex;
+    /* Condition variable for pushers to wait on when the queue is full */
+    ZSTD_pthread_cond_t queuePushCond;
+    /* Condition variables for poppers to wait on when the queue is empty */
+    ZSTD_pthread_cond_t queuePopCond;
+    /* Indicates if the queue is shutting down */
+    int shutdown;
+};
+
+/* POOL_thread() :
+ * Work thread for the thread pool.
+ * Waits for jobs and executes them.
+ * @returns : NULL on failure else non-null.
+ */
+static void* POOL_thread(void* opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*)opaque;
+    if (!ctx) { return NULL; }
+    for (;;) {
+        /* Lock the mutex and wait for a non-empty queue or until shutdown */
+        ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+
+        while ( ctx->queueEmpty
+            || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
+            if (ctx->shutdown) {
+                /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
+                 * a few threads will be shutdown while !queueEmpty,
+                 * but enough threads will remain active to finish the queue */
+                ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+                return opaque;
+            }
+            ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
+        }
+        /* Pop a job off the queue */
+        {   POOL_job const job = ctx->queue[ctx->queueHead];
+            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            ctx->numThreadsBusy++;
+            ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
+            /* Unlock the mutex, signal a pusher, and run the job */
+            ZSTD_pthread_cond_signal(&ctx->queuePushCond);
+            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+
+            job.function(job.opaque);
+
+            /* If the intended queue size was 0, signal after finishing job */
+            ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+            ctx->numThreadsBusy--;
+            if (ctx->queueSize == 1) {
+                ZSTD_pthread_cond_signal(&ctx->queuePushCond);
+            }
+            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        }
+    }  /* for (;;) */
+    assert(0);  /* Unreachable */
+}
+
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
+    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
+}
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
+                               ZSTD_customMem customMem) {
+    POOL_ctx* ctx;
+    /* Check parameters */
+    if (!numThreads) { return NULL; }
+    /* Allocate the context and zero initialize */
+    ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
+    if (!ctx) { return NULL; }
+    /* Initialize the job queue.
+     * It needs one extra space since one space is wasted to differentiate
+     * empty and full queues.
+     */
+    ctx->queueSize = queueSize + 1;
+    ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
+    ctx->queueHead = 0;
+    ctx->queueTail = 0;
+    ctx->numThreadsBusy = 0;
+    ctx->queueEmpty = 1;
+    {
+        int error = 0;
+        error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+        if (error) { POOL_free(ctx); return NULL; }
+    }
+    ctx->shutdown = 0;
+    /* Allocate space for the thread handles */
+    ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
+    ctx->threadCapacity = 0;
+    ctx->customMem = customMem;
+    /* Check for errors */
+    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
+    /* Initialize the threads */
+    {   size_t i;
+        for (i = 0; i < numThreads; ++i) {
+            if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
+                ctx->threadCapacity = i;
+                POOL_free(ctx);
+                return NULL;
+        }   }
+        ctx->threadCapacity = numThreads;
+        ctx->threadLimit = numThreads;
+    }
+    return ctx;
+}
+
+/*! POOL_join() :
+    Shutdown the queue, wake any sleeping threads, and join all of the threads.
+*/
+static void POOL_join(POOL_ctx* ctx) {
+    /* Shut down the queue */
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    ctx->shutdown = 1;
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    /* Wake up sleeping threads */
+    ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
+    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+    /* Join all of the threads */
+    {   size_t i;
+        for (i = 0; i < ctx->threadCapacity; ++i) {
+            ZSTD_pthread_join(ctx->threads[i], NULL);  /* note : could fail */
+    }   }
+}
+
+void POOL_free(POOL_ctx *ctx) {
+    if (!ctx) { return; }
+    POOL_join(ctx);
+    ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
+    ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
+    ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
+    ZSTD_free(ctx->queue, ctx->customMem);
+    ZSTD_free(ctx->threads, ctx->customMem);
+    ZSTD_free(ctx, ctx->customMem);
+}
+
+
+
+size_t POOL_sizeof(POOL_ctx *ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    return sizeof(*ctx)
+        + ctx->queueSize * sizeof(POOL_job)
+        + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
+}
+
+
+/* @return : 0 on success, 1 on error */
+static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
+{
+    if (numThreads <= ctx->threadCapacity) {
+        if (!numThreads) return 1;
+        ctx->threadLimit = numThreads;
+        return 0;
+    }
+    /* numThreads > threadCapacity */
+    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
+        if (!threadPool) return 1;
+        /* replace existing thread pool */
+        memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+        ZSTD_free(ctx->threads, ctx->customMem);
+        ctx->threads = threadPool;
+        /* Initialize additional threads */
+        {   size_t threadId;
+            for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
+                if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
+                    ctx->threadCapacity = threadId;
+                    return 1;
+            }   }
+    }   }
+    /* successfully expanded */
+    ctx->threadCapacity = numThreads;
+    ctx->threadLimit = numThreads;
+    return 0;
+}
+
+/* @return : 0 on success, 1 on error */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads)
+{
+    int result;
+    if (ctx==NULL) return 1;
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    result = POOL_resize_internal(ctx, numThreads);
+    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return result;
+}
+
+/**
+ * Returns 1 if the queue is full and 0 otherwise.
+ *
+ * When queueSize is 1 (pool was created with an intended queueSize of 0),
+ * then a queue is empty if there is a thread free _and_ no job is waiting.
+ */
+static int isQueueFull(POOL_ctx const* ctx) {
+    if (ctx->queueSize > 1) {
+        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
+    } else {
+        return (ctx->numThreadsBusy == ctx->threadLimit) ||
+               !ctx->queueEmpty;
+    }
+}
+
+
+static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
+{
+    POOL_job const job = {function, opaque};
+    assert(ctx != NULL);
+    if (ctx->shutdown) return;
+
+    ctx->queueEmpty = 0;
+    ctx->queue[ctx->queueTail] = job;
+    ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
+    ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    /* Wait until there is space in the queue for the new job */
+    while (isQueueFull(ctx) && (!ctx->shutdown)) {
+        ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
+
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    if (isQueueFull(ctx)) {
+        ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        return 0;
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return 1;
+}
+
+
+#else  /* ZSTD_MULTITHREAD  not defined */
+
+/* ========================== */
+/* No multi-threading support */
+/* ========================== */
+
+
+/* We don't need any data, but if it is empty, malloc() might return NULL. */
+struct POOL_ctx_s {
+    int dummy;
+};
+static POOL_ctx g_ctx;
+
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
+    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
+}
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
+    (void)numThreads;
+    (void)queueSize;
+    (void)customMem;
+    return &g_ctx;
+}
+
+void POOL_free(POOL_ctx* ctx) {
+    assert(!ctx || ctx == &g_ctx);
+    (void)ctx;
+}
+
+int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
+    (void)ctx; (void)numThreads;
+    return 0;
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void)ctx;
+    function(opaque);
+}
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void)ctx;
+    function(opaque);
+    return 1;
+}
+
+size_t POOL_sizeof(POOL_ctx* ctx) {
+    if (ctx==NULL) return 0;  /* supports sizeof NULL */
+    assert(ctx == &g_ctx);
+    return sizeof(*ctx);
+}
+
+#endif  /* ZSTD_MULTITHREAD */
+/**** ended inlining common/pool.c ****/
+/**** start inlining common/zstd_common.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdlib.h>      /* malloc, calloc, free */
+#include <string.h>      /* memset */
+/**** skipping file: error_private.h ****/
+/**** skipping file: zstd_internal.h ****/
+
+
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+
+
+
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return malloc(size);
+}
+
+void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        memset(ptr, 0, size);
+        return ptr;
+    }
+    return calloc(1, size);
+}
+
+void ZSTD_free(void* ptr, ZSTD_customMem customMem)
+{
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            free(ptr);
+    }
+}
+/**** ended inlining common/zstd_common.c ****/
+
+/**** start inlining compress/fse_compress.c ****/
+/* ******************************************************************
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/debug.h ****/
+/**** start inlining hist.h ****/
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include <stddef.h>   /* size_t */
+
+
+/* --- simple histogram functions --- */
+
+/*! HIST_count():
+ *  Provides the precise count of each byte within a table 'count'.
+ * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+ *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
+ * @return : count of the most frequent symbol (which isn't identified).
+ *           or an error code, which can be tested using HIST_isError().
+ *           note : if return == srcSize, there is only one symbol.
+ */
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                  const void* src, size_t srcSize);
+
+unsigned HIST_isError(size_t code);  /**< tells if a return value is an error code */
+
+
+/* --- advanced histogram functions --- */
+
+#define HIST_WKSP_SIZE_U32 1024
+#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
+/** HIST_count_wksp() :
+ *  Same as HIST_count(), but using an externally provided scratch buffer.
+ *  Benefit is this function will use very little stack space.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* src, size_t srcSize,
+                       void* workSpace, size_t workSpaceSize);
+
+/** HIST_countFast() :
+ *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
+ *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
+ */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                      const void* src, size_t srcSize);
+
+/** HIST_countFast_wksp() :
+ *  Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize,
+                           void* workSpace, size_t workSpaceSize);
+
+/*! HIST_count_simple() :
+ *  Same as HIST_countFast(), this function is unsafe,
+ *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
+ *  It is also a bit slower for large inputs.
+ *  However, it does not need any additional memory (not even on stack).
+ * @return : count of the most frequent symbol.
+ *  Note this function doesn't produce any error (i.e. it must succeed).
+ */
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize);
+/**** ended inlining hist.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+/**** skipping file: ../common/error_private.h ****/
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                            void* workSpace, size_t wkspSize)
+{
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    U32 const step = FSE_TABLESTEP(tableSize);
+    U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
+
+    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
+    U32 highThreshold = tableSize-1;
+
+    /* CTable header */
+    if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+    assert(tableLog < 16);   /* required for threshold strategy to work */
+
+    /* For explanations on how to distribute symbol values over the table :
+     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+     #ifdef __clang_analyzer__
+     memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+     #endif
+
+    /* symbol start positions */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u <= maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
+    }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurrences;
+            int const freq = normalizedCounter[symbol];
+            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold)
+                    position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+
+        assert(position==0);  /* Must have initialized all positions */
+    }
+
+    /* Build table */
+    {   U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }   }
+
+    /* Build Symbol Transformation Table */
+    {   unsigned total = 0;
+        unsigned s;
+        for (s=0; s<=maxSymbolValue; s++) {
+            switch (normalizedCounter[s])
+            {
+            case  0:
+                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
+                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
+                break;
+
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+    }   }   }   }
+
+#if 0  /* debug : symbol costs */
+    DEBUGLOG(5, "\n --- table statistics : ");
+    {   U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
+                symbol, normalizedCounter[symbol],
+                FSE_getMaxNbBits(symbolTT, symbol),
+                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
+        }
+    }
+#endif
+
+    return 0;
+}
+
+
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
+    return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+
+/*-**************************************************************
+*  FSE NCount encoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static size_t
+FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                         unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream = 0;
+    int bitCount = 0;
+    unsigned symbol = 0;
+    unsigned const alphabetSize = maxSymbolValue + 1;
+    int previousIs0 = 0;
+
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
+        if (previousIs0) {
+            unsigned start = symbol;
+            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
+            if (symbol == alphabetSize) break;   /* incorrect distribution */
+            while (symbol >= start+24) {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (symbol >= start+3) {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (symbol-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16) {
+                if ((!writeIsSafe) && (out > oend - 2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+        }   }
+        {   int count = normalizedCounter[symbol++];
+            int const max = (2*threshold-1) - remaining;
+            remaining -= count < 0 ? -count : count;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold)
+                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previousIs0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
+            while (remaining<threshold) { nbBits--; threshold>>=1; }
+        }
+        if (bitCount>16) {
+            if ((!writeIsSafe) && (out > oend - 2))
+                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+    }   }
+
+    if (remaining != 1)
+        return ERROR(GENERIC);  /* incorrect normalized distribution */
+    assert(symbol <= alphabetSize);
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2))
+        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
+}
+
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size __attribute__ ((unused));
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)malloc(size);
+}
+
+void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 tableLog = maxTableLog;
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
+{
+    short const NOT_YET_ASSIGNED = -2;
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 const lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (count[s] == 0) {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold) {
+            norm[s] = -1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne) {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+
+        norm[s]=NOT_YET_ASSIGNED;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if (ToDistribute == 0)
+        return 0;
+
+    if ((total / ToDistribute) > lowOne) {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+        }   }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1) {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+    }   }   }
+
+    return 0;
+}
+
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        U64 const scale = 62 - tableLog;
+        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (count[s] == total) return 0;   /* rle special case */
+            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
+            if (count[s] <= lowThreshold) {
+                normalizedCounter[s] = -1;
+                stillToDistribute--;
+            } else {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8) {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP) { largestP=proba; largest=s; }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+        }   }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+            /* corner case, need another normalization method */
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+        for (s=0; s<=maxSymbolValue; s++) {
+            symbolTT[s].deltaNbBits = deltaNbBits;
+            symbolTT[s].deltaFindState = s-1;
+    }   }
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle input (always same symbol) */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    symbolTT[symbolValue].deltaNbBits = 0;
+    symbolTT[symbolValue].deltaFindState = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip=iend;
+
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    /* init */
+    if (srcSize <= 2) return 0;
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
+    }
+
+    /* join to mod 4 */
+    srcSize -= 2;
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while ( ip>istart ) {
+
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` size must be `(1<<tableLog)`.
+ */
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned count[FSE_MAX_SYMBOL_VALUE+1];
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    FSE_CTable* CTable = (FSE_CTable*)workSpace;
+    size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
+    void* scratchBuffer = (void*)(CTable + CTableSize);
+    size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
+
+    /* init conditions */
+    if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
+    if (srcSize <= 1) return 0;  /* Not compressible */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
+        if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
+        if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
+
+    /* Write table description header */
+    {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+        op += nc_err;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
+
+    return op-ostart;
+}
+
+typedef struct {
+    FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
+} fseWkspMax_t;
+
+size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
+{
+    fseWkspMax_t scratchBuffer;
+    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
+}
+
+size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/**** ended inlining compress/fse_compress.c ****/
+/**** start inlining compress/hist.c ****/
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/debug.h ****/
+/**** skipping file: ../common/error_private.h ****/
+/**** skipping file: hist.h ****/
+
+
+/* --- Error management --- */
+unsigned HIST_isError(size_t code) { return ERR_isError(code); }
+
+/*-**************************************************************
+ *  Histogram functions
+ ****************************************************************/
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned largestCount=0;
+
+    memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip<end) {
+        assert(*ip <= maxSymbolValue);
+        count[*ip++]++;
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    {   U32 s;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > largestCount) largestCount = count[s];
+    }
+
+    return largestCount;
+}
+
+typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
+
+/* HIST_count_parallel_wksp() :
+ * store histogram into 4 intermediate tables, recombined at the end.
+ * this design makes better use of OoO cpus,
+ * and is noticeably faster when some values are heavily repeated.
+ * But it needs some additional workspace for intermediate tables.
+ * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
+ * @return : largest histogram frequency,
+ *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
+static size_t HIST_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
+                                const void* source, size_t sourceSize,
+                                HIST_checkInput_e check,
+                                U32* const workSpace)
+{
+    const BYTE* ip = (const BYTE*)source;
+    const BYTE* const iend = ip+sourceSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
+
+    memset(workSpace, 0, 4*256*sizeof(unsigned));
+
+    /* safety checks */
+    if (!sourceSize) {
+        memset(count, 0, maxSymbolValue + 1);
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
+
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15) {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) Counting1[*ip++]++;
+
+    if (check) {   /* verify stats will fit into destination table */
+        U32 s; for (s=255; s>maxSymbolValue; s--) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
+    }   }
+
+    {   U32 s;
+        if (maxSymbolValue > 255) maxSymbolValue = 255;
+        for (s=0; s<=maxSymbolValue; s++) {
+            count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+            if (count[s] > max) max = count[s];
+    }   }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+    return (size_t)max;
+}
+
+/* HIST_countFast_wksp() :
+ * Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                          const void* source, size_t sourceSize,
+                          void* workSpace, size_t workSpaceSize)
+{
+    if (sourceSize < 1500) /* heuristic threshold */
+        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
+}
+
+/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                     const void* source, size_t sourceSize)
+{
+    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
+}
+
+/* HIST_count_wksp() :
+ * Same as HIST_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* source, size_t sourceSize,
+                       void* workSpace, size_t workSpaceSize)
+{
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    if (*maxSymbolValuePtr < 255)
+        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
+    *maxSymbolValuePtr = 255;
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
+}
+
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* src, size_t srcSize)
+{
+    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
+    return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
+}
+/**** ended inlining compress/hist.c ****/
+/**** start inlining compress/huf_compress.c ****/
+/* ******************************************************************
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+/**** skipping file: hist.h ****/
+#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/error_private.h ****/
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
+
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt within "huf.h" */
+
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+    BYTE* op = (BYTE*)dst;
+    U32 n;
+
+     /* check conditions */
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+
+    /* convert to weight */
+    bitsToWeight[0] = 0;
+    for (n=1; n<huffLog+1; n++)
+        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
+
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;
+}
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+
+    /* get symbol weights */
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
+
+    /* check result */
+    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill nbBits */
+    *hasZeroWeights = 0;
+    {   U32 n; for (n=0; n<nbSymbols; n++) {
+            const U32 w = huffWeight[n];
+            *hasZeroWeights |= (w == 0);
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
+    }   }
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
+        {   U16 min = 0;
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    *maxSymbolValuePtr = nbSymbols - 1;
+    return readSize;
+}
+
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
+{
+    const HUF_CElt* table = (const HUF_CElt*)symbolTable;
+    assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+    return table[symbolValue].nbBits;
+}
+
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        int n = (int)lastNonNull;
+
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n --;
+        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
+
+        /* renorm totalCost */
+        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_TABLELOG_MAX+2];
+
+            /* Get pos of last (smallest) symbol per rank */
+            memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                int pos;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
+            }   }
+
+            while (totalCost > 0) {
+                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 const highPos = rankLast[nBitsToDecrease];
+                    U32 const lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+                    nBitsToDecrease ++;
+                totalCost -= 1 << (nBitsToDecrease-1);
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
+                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+            }   }   /* while (totalCost > 0) */
+
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    assert(n >= 0);
+                    rankLast[1] = (U32)(n+1);
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+    }   }   }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+typedef struct {
+    U32 base;
+    U32 current;
+} rankPos;
+
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+  huffNodeTable huffNodeTbl;
+  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
+{
+    U32 n;
+
+    memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
+    for (n=0; n<=maxSymbolValue; n++) {
+        U32 r = BIT_highbit32(count[n] + 1);
+        rankPosition[r].base ++;
+    }
+    for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
+    for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
+    for (n=0; n<=maxSymbolValue; n++) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rankPosition[r].current++;
+        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
+            huffNode[pos] = huffNode[pos-1];
+            pos--;
+        }
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+/** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
+
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+{
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+    nodeElt* const huffNode = huffNode0+1;
+    int nonNullRank;
+    int lowS, lowN;
+    int nodeNb = STARTNODE;
+    int n, nodeRoot;
+
+    /* safety checks */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+      return ERROR(workSpace_tooSmall);
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+      return ERROR(maxSymbolValue_tooLarge);
+    memset(huffNode0, 0, sizeof(huffNodeTable));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+
+    /* init for parents */
+    nonNullRank = (int)maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+
+    /* fill result into tree (val, nbBits) */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+        int const alphabetSize = (int)(maxSymbolValue + 1);
+        if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+        for (n=0; n<=nonNullRank; n++)
+            nbPerRank[huffNode[n].nbBits]++;
+        /* determine stating value per rank */
+        {   U16 min = 0;
+            for (n=(int)maxNbBits; n>0; n--) {
+                valPerRank[n] = min;      /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        for (n=0; n<alphabetSize; n++)
+            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+        for (n=0; n<alphabetSize; n++)
+            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
+    }
+
+    return maxNbBits;
+}
+
+/** HUF_buildCTable() :
+ * @return : maxNbBits
+ *  Note : count is used before tree is written, so they can safely overlap
+ */
+size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
+{
+    HUF_buildCTable_wksp_tables workspace;
+    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
+}
+
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
+      if (HUF_isError(initErr)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+                 HUF_FLUSHBITS_2(&bitC);
+		 /* fall-through */
+        case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+                 HUF_FLUSHBITS_1(&bitC);
+		 /* fall-through */
+        case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+                 HUF_FLUSHBITS(&bitC);
+		 /* fall-through */
+        case 0 : /* fall-through */
+        default: break;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+                                      const void* src, size_t srcSize,
+                                      const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    if (bmi2) {
+        return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+    }
+    return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    (void)bmi2;
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, int bmi2)
+{
+    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+2, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+4, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    assert(ip <= iend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
+        if (cSize==0) return 0;
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
+{
+    size_t const cSize = (nbStreams==HUF_singleStream) ?
+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    assert(op >= ostart);
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return (size_t)(op-ostart);
+}
+
+typedef struct {
+    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_buildCTable_wksp_tables buildCTable_wksp;
+} HUF_compress_tables_t;
+
+/* HUF_compress_internal() :
+ * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+static size_t
+HUF_compress_internal (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       HUF_nbStreams_e nbStreams,
+                       void* workSpace, size_t wkspSize,
+                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+                 const int bmi2)
+{
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+
+    /* checks & inits */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
+    if (!srcSize) return 0;  /* Uncompressed */
+    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
+    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+
+    /* Heuristic : If old table is valid, use it for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
+        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
+        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+    }
+
+    /* Check validity of previous table */
+    if ( repeat
+      && *repeat == HUF_repeat_check
+      && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Build Huffman Tree */
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
+                                            maxSymbolValue, huffLog,
+                                            &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
+        CHECK_F(maxBits);
+        huffLog = (U32)maxBits;
+        /* Zero unused symbols in CTable, so we can check it for validity */
+        memset(table->CTable + (maxSymbolValue + 1), 0,
+               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
+    }
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
+        /* Check if using previous huffman table is beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend,
+                                                   src, srcSize,
+                                                   nbStreams, oldHufTable, bmi2);
+        }   }
+
+        /* Use the new huffman table */
+        if (hSize + 12ul >= srcSize) { return 0; }
+        op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable)
+            memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
+    }
+    return HUF_compressCTable_internal(ostart, op, oend,
+                                       src, srcSize,
+                                       nbStreams, table->CTable, bmi2);
+}
+
+
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize, hufTable,
+                                 repeat, preferRepeat, bmi2);
+}
+
+size_t HUF_compress1X (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                 unsigned maxSymbolValue, unsigned huffLog)
+{
+    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
+    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 hufTable, repeat, preferRepeat, bmi2);
+}
+
+size_t HUF_compress2 (void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog)
+{
+    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
+    return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
+size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
+}
+/**** ended inlining compress/huf_compress.c ****/
+/**** start inlining compress/zstd_compress_literals.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+/**** start inlining zstd_compress_literals.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+/**** start inlining zstd_compress_internal.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This header contains definitions
+ * that shall **only** be used by modules within lib/compress.
+ */
+
+#ifndef ZSTD_COMPRESS_H
+#define ZSTD_COMPRESS_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** start inlining zstd_cwksp.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/**
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_free{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    int allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/**
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+/**
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#else
+    return size;
+#endif
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            /* If unaligned allocations down from a too-large top have left us
+             * unaligned, we need to realign our alloc ptr. Technically, this
+             * can consume space that is unaccounted for in the neededSpace
+             * calculation. However, I believe this can only happen when the
+             * workspace is too large, and specifically when it is too large
+             * by a larger margin than the space that will be consumed. */
+            /* TODO: cleaner, compiler warning friendly way to do this??? */
+            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+            if (ws->allocStart < ws->tableValidEnd) {
+                ws->tableValidEnd = ws->allocStart;
+            }
+        }
+        ws->phase = phase;
+    }
+}
+
+/**
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/**
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    void* bottom = ws->tableEnd;
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    alloc = (BYTE *)ws->allocStart - bytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* over-reserve space */
+    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+     * either size. */
+    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+    return alloc;
+}
+
+/**
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/**
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc = ws->tableEnd;
+    void* end = (BYTE *)alloc + bytes;
+    void* top = ws->allocStart;
+
+    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+    return alloc;
+}
+
+/**
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* alloc = ws->objectEnd;
+    void* end = (BYTE*)alloc + roundedBytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* over-reserve space */
+    end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+    DEBUGLOG(5,
+        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+     * either size. */
+    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+    return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the table re-use logic is sound, and that we don't
+     * access table space that we haven't cleaned, we re-"poison" the table
+     * space every time we mark it dirty. */
+    {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        assert(__msan_test_shadow(ws->objectEnd, size) == -1);
+        __msan_poison(ws->objectEnd, size);
+    }
+#endif
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/**
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        __asan_poison_memory_region(ws->objectEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the context re-use logic is sound, and that we don't
+     * access stuff that this compression hasn't initialized, we re-"poison"
+     * the workspace (or at least the non-static, non-table parts of it)
+     * every time we start a new compression. */
+    {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
+        __msan_poison(ws->tableValidEnd, size);
+    }
+#endif
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
+        __asan_poison_memory_region(ws->objectEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_malloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+    ZSTD_cwksp_init(ws, workspace, size);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    void *ptr = ws->workspace;
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_free(ptr, customMem);
+}
+
+/**
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before its used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CWKSP_H */
+/**** ended inlining zstd_cwksp.h ****/
+#ifdef ZSTD_MULTITHREAD
+/**** start inlining zstdmt_compress.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ #ifndef ZSTDMT_COMPRESS_H
+ #define ZSTDMT_COMPRESS_H
+
+ #if defined (__cplusplus)
+ extern "C" {
+ #endif
+
+
+/* Note : This is an internal API.
+ *        These APIs used to be exposed with ZSTDLIB_API,
+ *        because it used to be the only way to invoke MT compression.
+ *        Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
+ *        instead.
+ *
+ *        If you depend on these APIs and can't switch, then define
+ *        ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
+ *        However, we may completely remove these functions in a future
+ *        release, so please switch soon.
+ *
+ *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
+ *        otherwise ZSTDMT_createCCtx*() will fail.
+ */
+
+#ifdef ZSTD_LEGACY_MULTITHREADED_API
+#  define ZSTDMT_API ZSTDLIB_API
+#else
+#  define ZSTDMT_API
+#endif
+
+/* ===   Dependencies   === */
+#include <stddef.h>                /* size_t */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+/**** skipping file: ../zstd.h ****/
+
+
+/* ===   Constants   === */
+#ifndef ZSTDMT_NBWORKERS_MAX
+#  define ZSTDMT_NBWORKERS_MAX 200
+#endif
+#ifndef ZSTDMT_JOBSIZE_MIN
+#  define ZSTDMT_JOBSIZE_MIN (1 MB)
+#endif
+#define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
+#define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
+
+
+/* ===   Memory management   === */
+typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
+/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
+ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
+/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
+ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
+                                                    ZSTD_customMem cMem);
+ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
+
+ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
+
+
+/* ===   Simple one-pass compression function   === */
+
+ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                       int compressionLevel);
+
+
+
+/* ===   Streaming functions   === */
+
+ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
+ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);  /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
+
+ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
+ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+
+
+/* ===   Advanced functions and parameters  === */
+
+ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const ZSTD_CDict* cdict,
+                                          ZSTD_parameters params,
+                                          int overlapLog);
+
+ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
+                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
+                                        ZSTD_parameters params,
+                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */
+
+ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
+                                        const ZSTD_CDict* cdict,
+                                        ZSTD_frameParameters fparams,
+                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */
+
+/* ZSTDMT_parameter :
+ * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
+typedef enum {
+    ZSTDMT_p_jobSize,     /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
+    ZSTDMT_p_overlapLog,  /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
+    ZSTDMT_p_rsyncable    /* Enables rsyncable mode. */
+} ZSTDMT_parameter;
+
+/* ZSTDMT_setMTCtxParameter() :
+ * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
+ * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
+ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
+
+/* ZSTDMT_getMTCtxParameter() :
+ * Query the ZSTDMT_CCtx for a parameter value.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
+ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
+
+
+/*! ZSTDMT_compressStream_generic() :
+ *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
+ *  depending on flush directive.
+ * @return : minimum amount of data still to be flushed
+ *           0 if fully flushed
+ *           or an error code
+ *  note : needs to be init using any ZSTD_initCStream*() variant */
+ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                                ZSTD_outBuffer* output,
+                                                ZSTD_inBuffer* input,
+                                                ZSTD_EndDirective endOp);
+
+
+/* ========================================================
+ * ===  Private interface, for use by ZSTD_compress.c   ===
+ * ===  Not exposed in libzstd. Never invoke directly   ===
+ * ======================================================== */
+
+ /*! ZSTDMT_toFlushNow()
+  *  Tell how many bytes are ready to be flushed immediately.
+  *  Probe the oldest active job (not yet entirely flushed) and check its output buffer.
+  *  If return 0, it means there is no active job,
+  *  or, it means oldest job is still active, but everything produced has been flushed so far,
+  *  therefore flushing is limited by speed of oldest job. */
+size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
+
+/*! ZSTDMT_CCtxParam_setMTCtxParameter()
+ *  like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
+size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
+
+/*! ZSTDMT_CCtxParam_setNbWorkers()
+ *  Set nbWorkers, and clamp it.
+ *  Also reset jobSize and overlapLog */
+size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
+
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ *  Updates only a selected set of compression parameters, to remain compatible with current frame.
+ *  New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
+
+/*! ZSTDMT_getFrameProgression():
+ *  tells how much data has been consumed (input) and produced (output) for current frame.
+ *  able to count progression inside worker threads.
+ */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
+
+
+/*! ZSTDMT_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
+                    const ZSTD_CDict* cdict,
+                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTDMT_COMPRESS_H */
+/**** ended inlining zstdmt_compress.h ****/
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define kSearchStrength      8
+#define HASH_READ_SIZE       8
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
+                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+                                       It's not a big deal though : candidate will just be sorted again.
+                                       Additionally, candidate position 1 will be lost.
+                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+typedef struct ZSTD_prefixDict_s {
+    const void* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+} ZSTD_prefixDict;
+
+typedef struct {
+    void* dictBuffer;
+    void const* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+    ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
+typedef struct {
+    U32 CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_repeat repeatMode;
+} ZSTD_hufCTables_t;
+
+typedef struct {
+    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
+} ZSTD_fseCTables_t;
+
+typedef struct {
+    ZSTD_hufCTables_t huf;
+    ZSTD_fseCTables_t fse;
+} ZSTD_entropyCTables_t;
+
+typedef struct {
+    U32 off;
+    U32 len;
+} ZSTD_match_t;
+
+typedef struct {
+    int price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+
+typedef struct {
+    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+    unsigned* litFreq;           /* table of literals statistics, of size 256 */
+    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+
+    U32  litSum;                 /* nb of literals */
+    U32  litLengthSum;           /* nb of litLength codes */
+    U32  matchLengthSum;         /* nb of matchLength codes */
+    U32  offCodeSum;             /* nb of offset codes */
+    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
+    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
+    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
+    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
+    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
+    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+} optState_t;
+
+typedef struct {
+  ZSTD_entropyCTables_t entropy;
+  U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+    BYTE const* nextSrc;    /* next block here to continue on current prefix */
+    BYTE const* base;       /* All regular indexes relative to this position */
+    BYTE const* dictBase;   /* extDict indexes relative to this position */
+    U32 dictLimit;          /* below that point, need extDict */
+    U32 lowLimit;           /* below that point, no more valid data */
+} ZSTD_window_t;
+
+typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+struct ZSTD_matchState_t {
+    ZSTD_window_t window;   /* State for window round buffer management */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
+                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+                             * When dict referential is copied into active context (i.e. not attached),
+                             * loadedDictEnd == dictSize, since referential starts from zero.
+                             */
+    U32 nextToUpdate;       /* index from which to continue table update */
+    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+    U32* hashTable;
+    U32* hashTable3;
+    U32* chainTable;
+    optState_t opt;         /* optimal parser state */
+    const ZSTD_matchState_t* dictMatchState;
+    ZSTD_compressionParameters cParams;
+};
+
+typedef struct {
+    ZSTD_compressedBlockState_t* prevCBlock;
+    ZSTD_compressedBlockState_t* nextCBlock;
+    ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
+    U32 offset;
+    U32 checksum;
+} ldmEntry_t;
+
+typedef struct {
+    ZSTD_window_t window;   /* State for the window round buffer management */
+    ldmEntry_t* hashTable;
+    U32 loadedDictEnd;
+    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
+    U64 hashPower;          /* Used to compute the rolling hash.
+                             * Depends on ldmParams.minMatchLength */
+} ldmState_t;
+
+typedef struct {
+    U32 enableLdm;          /* 1 if enable long distance matching */
+    U32 hashLog;            /* Log size of hashTable */
+    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
+    U32 minMatchLength;     /* Minimum match length */
+    U32 hashRateLog;       /* Log number of entries to skip */
+    U32 windowLog;          /* Window log for the LDM */
+} ldmParams_t;
+
+typedef struct {
+    U32 offset;
+    U32 litLength;
+    U32 matchLength;
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;     /* The start of the sequences */
+  size_t pos;      /* The position where reading stopped. <= size. */
+  size_t size;     /* The number of sequences. <= capacity. */
+  size_t capacity; /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
+struct ZSTD_CCtx_params_s {
+    ZSTD_format_e format;
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+
+    int compressionLevel;
+    int forceWindow;           /* force back-references to respect limit of
+                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
+    int srcSizeHint;           /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */
+
+    ZSTD_dictAttachPref_e attachDictPref;
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+
+    /* Multithreading: used to pass parameters to mtctx */
+    int nbWorkers;
+    size_t jobSize;
+    int overlapLog;
+    int rsyncable;
+
+    /* Long distance matching parameters */
+    ldmParams_t ldmParams;
+
+    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
+    ZSTD_customMem customMem;
+};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
+
+struct ZSTD_CCtx_s {
+    ZSTD_compressionStage_e stage;
+    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+    ZSTD_CCtx_params requestedParams;
+    ZSTD_CCtx_params appliedParams;
+    U32   dictID;
+
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
+    size_t blockSize;
+    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    unsigned long long consumedSrcSize;
+    unsigned long long producedCSize;
+    XXH64_state_t xxhState;
+    ZSTD_customMem customMem;
+    size_t staticSize;
+    SeqCollector seqCollector;
+    int isFirstBlock;
+    int initialized;
+
+    seqStore_t seqStore;      /* sequences storage ptrs */
+    ldmState_t ldmState;      /* long distance matching state */
+    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
+    size_t maxNbLdmSequences;
+    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+    ZSTD_blockState_t blockState;
+    U32* entropyWorkspace;  /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Dictionary */
+    ZSTD_localDict localDict;
+    const ZSTD_CDict* cdict;
+    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
+
+    /* Multi-threading */
+#ifdef ZSTD_MULTITHREAD
+    ZSTDMT_CCtx* mtctx;
+#endif
+};
+
+typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
+
+typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
+
+
+typedef size_t (*ZSTD_blockCompressor) (
+        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+
+
+MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
+{
+    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                       8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 16, 17, 17, 18, 18, 19, 19,
+                                      20, 20, 20, 20, 21, 21, 21, 21,
+                                      22, 22, 22, 22, 22, 22, 22, 22,
+                                      23, 23, 23, 23, 23, 23, 23, 23,
+                                      24, 24, 24, 24, 24, 24, 24, 24,
+                                      24, 24, 24, 24, 24, 24, 24, 24 };
+    static const U32 LL_deltaCode = 19;
+    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+}
+
+/* ZSTD_MLcode() :
+ * note : mlBase = matchLength - MINMATCH;
+ *        because it's the format it's stored in seqStore->sequences */
+MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
+{
+    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+    static const U32 ML_deltaCode = 36;
+    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                    dstSize_tooSmall, "dst buf too small for uncompressed block");
+    MEM_writeLE24(dst, cBlockHeader24);
+    memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        /* fall-through */
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
+/*! ZSTD_storeSeq() :
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
+*/
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+{
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+    static const BYTE* g_start = NULL;
+    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
+    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
+        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+    }
+#endif
+    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+    /* copy Literals */
+    assert(seqStorePtr->maxNbLit <= 128 KB);
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+	 * First copy 16 bytes, because literals are likely short.
+	 */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 1;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+    /* match offset */
+    seqStorePtr->sequences[0].offset = offCode + 1;
+
+    /* match Length */
+    if (mlBase>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 2;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
+
+    seqStorePtr->sequences++;
+}
+
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes (size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
+#       elif defined(__GNUC__) && (__GNUC__ >= 4)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r=0;
+            return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
+#       elif defined(__GNUC__) && (__GNUC__ >= 4)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r = 0;
+            return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
+{
+    const BYTE* const pStart = pIn;
+    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
+
+    if (pIn < pInLoopLimit) {
+        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+          if (diff) return ZSTD_NbCommonBytes(diff); }
+        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
+        while (pIn < pInLoopLimit) {
+            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+            pIn += ZSTD_NbCommonBytes(diff);
+            return (size_t)(pIn - pStart);
+    }   }
+    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/** ZSTD_count_2segments() :
+ *  can count match length with `ip` & `match` in 2 different segments.
+ *  convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
+    size_t const matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength != mEnd) return matchLength;
+    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
+    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
+    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
+    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
+    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
+    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
+}
+
+
+/*-*************************************
+ *  Hashes
+ ***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+
+static const U32 prime4bytes = 2654435761U;
+static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+
+MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    case 8: return ZSTD_hash8Ptr(p, hBits);
+    }
+}
+
+/** ZSTD_ipow() :
+ * Return base^exponent.
+ */
+static U64 ZSTD_ipow(U64 base, U64 exponent)
+{
+    U64 power = 1;
+    while (exponent) {
+      if (exponent & 1) power *= base;
+      exponent >>= 1;
+      base *= base;
+    }
+    return power;
+}
+
+#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
+
+/** ZSTD_rollingHash_append() :
+ * Add the buffer to the hash value.
+ */
+static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
+{
+    BYTE const* istart = (BYTE const*)buf;
+    size_t pos;
+    for (pos = 0; pos < size; ++pos) {
+        hash *= prime8bytes;
+        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    }
+    return hash;
+}
+
+/** ZSTD_rollingHash_compute() :
+ * Compute the rolling hash value of the buffer.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
+{
+    return ZSTD_rollingHash_append(0, buf, size);
+}
+
+/** ZSTD_rollingHash_primePower() :
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
+ * over a window of length bytes.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
+{
+    return ZSTD_ipow(prime8bytes, length - 1);
+}
+
+/** ZSTD_rollingHash_rotate() :
+ * Rotate the rolling hash by one byte.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
+{
+    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
+    hash *= prime8bytes;
+    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    return hash;
+}
+
+/*-*************************************
+*  Round buffer management
+***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX                                                     \
+    ( ((U32)-1)                  /* Maximum ending current index */            \
+    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
+
+/**
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+    size_t const endT = (size_t)(window->nextSrc - window->base);
+    U32 const end = (U32)endT;
+
+    window->lowLimit = end;
+    window->dictLimit = end;
+}
+
+/**
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+    return window.lowLimit < window.dictLimit;
+}
+
+/**
+ * ZSTD_matchState_dictMode():
+ * Inspects the provided matchState and figures out what dictMode should be
+ * passed to the compressor.
+ */
+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
+{
+    return ZSTD_window_hasExtDict(ms->window) ?
+        ZSTD_extDict :
+        ms->dictMatchState != NULL ?
+            ZSTD_dictMatchState :
+            ZSTD_noDict;
+}
+
+/**
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  void const* srcEnd)
+{
+    U32 const current = (U32)((BYTE const*)srcEnd - window.base);
+    return current > ZSTD_CURRENT_MAX;
+}
+
+/**
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ * NOTE: (maxDist & cycleMask) must be zero.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+                                           U32 maxDist, void const* src)
+{
+    /* preemptive overflow correction:
+     * 1. correction is large enough:
+     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+     *
+     *    current - newCurrent
+     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+     *    > (3<<29) - (1<<chainLog)
+     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
+     *    > 1<<29
+     *
+     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
+     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+     *    In 32-bit mode we are safe, because (chainLog <= 29), so
+     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+     */
+    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const current = (U32)((BYTE const*)src - window->base);
+    U32 const currentCycle0 = current & cycleMask;
+    /* Exclude zero so that newCurrent - maxDist >= 1. */
+    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const correction = current - newCurrent;
+    assert((maxDist & cycleMask) == 0);
+    assert(current > newCurrent);
+    /* Loose bound, should be around 1<<29 (see above) */
+    assert(correction > 1<<28);
+
+    window->base += correction;
+    window->dictBase += correction;
+    if (window->lowLimit <= correction) window->lowLimit = 1;
+    else window->lowLimit -= correction;
+    if (window->dictLimit <= correction) window->dictLimit = 1;
+    else window->dictLimit -= correction;
+
+    /* Ensure we can still reference the full window. */
+    assert(newCurrent >= maxDist);
+    assert(newCurrent - maxDist >= 1);
+    /* Ensure that lowLimit and dictLimit didn't underflow. */
+    assert(window->lowLimit <= newCurrent);
+    assert(window->dictLimit <= newCurrent);
+
+    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+             window->lowLimit);
+    return correction;
+}
+
+/**
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ *
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
+ *
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
+ */
+MEM_STATIC void
+ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
+                     const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
+    if (blockEndIdx > maxDist + loadedDictEnd) {
+        U32 const newLowLimit = blockEndIdx - maxDist;
+        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+        if (window->dictLimit < window->lowLimit) {
+            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
+                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
+            window->dictLimit = window->lowLimit;
+        }
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ *              loadedDictEnd uses same referential as window->base
+ *              maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    assert(loadedDictEndPtr != NULL);
+    assert(dictMatchStatePtr != NULL);
+    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+        U32 const loadedDictEnd = *loadedDictEndPtr;
+        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+        assert(blockEndIdx >= loadedDictEnd);
+
+        if (blockEndIdx > loadedDictEnd + maxDist) {
+            /* On reaching window size, dictionaries are invalidated.
+             * For simplification, if window size is reached anywhere within next block,
+             * the dictionary is invalidated for the full block.
+             */
+            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+            *loadedDictEndPtr = 0;
+            *dictMatchStatePtr = NULL;
+        } else {
+            if (*loadedDictEndPtr != 0) {
+                DEBUGLOG(6, "dictionary considered valid for current block");
+    }   }   }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+    memset(window, 0, sizeof(*window));
+    window->base = (BYTE const*)"";
+    window->dictBase = (BYTE const*)"";
+    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
+    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + 1;   /* see issue #1241 */
+}
+
+/**
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+                                  void const* src, size_t srcSize)
+{
+    BYTE const* const ip = (BYTE const*)src;
+    U32 contiguous = 1;
+    DEBUGLOG(5, "ZSTD_window_update");
+    if (srcSize == 0)
+        return contiguous;
+    assert(window->base != NULL);
+    assert(window->dictBase != NULL);
+    /* Check if blocks follow each other */
+    if (src != window->nextSrc) {
+        /* not contiguous */
+        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
+        window->lowLimit = window->dictLimit;
+        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
+        window->dictLimit = (U32)distanceFromBase;
+        window->dictBase = window->base;
+        window->base = ip - distanceFromBase;
+        /* ms->nextToUpdate = window->dictLimit; */
+        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
+        contiguous = 0;
+    }
+    window->nextSrc = ip + srcSize;
+    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+    if ( (ip+srcSize > window->dictBase + window->lowLimit)
+       & (ip < window->dictBase + window->dictLimit)) {
+        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        window->lowLimit = lowLimitMax;
+        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
+    }
+    return contiguous;
+}
+
+/**
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.lowLimit;
+    U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+/**
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.dictLimit;
+    U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+
+
+/* debug functions */
+#if (DEBUGLEVEL>=2)
+
+MEM_STATIC double ZSTD_fWeight(U32 rawStat)
+{
+    U32 const fp_accuracy = 8;
+    U32 const fp_multiplier = (1 << fp_accuracy);
+    U32 const newStat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(newStat);
+    U32 const BWeight = hb * fp_multiplier;
+    U32 const FWeight = (newStat << fp_accuracy) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + fp_accuracy < 31);
+    return (double)weight / fp_multiplier;
+}
+
+/* display a table content,
+ * listing each element, its frequency, and its predicted bit cost */
+MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
+{
+    unsigned u, sum;
+    for (u=0, sum=0; u<=max; u++) sum += table[u];
+    DEBUGLOG(2, "total nb elts: %u", sum);
+    for (u=0; u<=max; u++) {
+        DEBUGLOG(2, "%2u: %5u  (%.2f)",
+                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
+    }
+}
+
+#endif
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ *               and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         short* offcodeNCount, unsigned* offcodeMaxValue,
+                         const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
+
+/* ==============================================================
+ * Private declarations
+ * These prototypes shall only be called from within lib/compress
+ * ============================================================== */
+
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr);
+
+/*! ZSTD_getCParamsFromCDict() :
+ *  as the name implies */
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
+
+/* ZSTD_compressBegin_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize);
+
+/* ZSTD_compress_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict,size_t dictSize,
+                                 const ZSTD_CCtx_params* params);
+
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
+
+#endif /* ZSTD_COMPRESS_H */
+/**** ended inlining zstd_compress_internal.h ****/
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
+/**** ended inlining zstd_compress_literals.h ****/
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
+    return lhSize+cLitSize;
+}
+/**** ended inlining compress/zstd_compress_literals.c ****/
+/**** start inlining compress/zstd_compress_sequences.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+/**** start inlining zstd_compress_sequences.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+/**** skipping file: ../common/fse.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max);
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
+/**** ended inlining zstd_compress_sequences.h ****/
+
+/**
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/**
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+        return ERROR(GENERIC);
+    }
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        if (bitCost >= badCost) {
+            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+            return ERROR(GENERIC);
+        }
+        cost += (size_t)count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        S16 norm[MaxSeq + 1];
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
+/**** ended inlining compress/zstd_compress_sequences.c ****/
+/**** start inlining compress/zstd_compress_superblock.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+/**** start inlining zstd_compress_superblock.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+
+/**** skipping file: ../zstd.h ****/
+
+/*-*************************************
+*  Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
+/**** ended inlining zstd_compress_superblock.h ****/
+
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: hist.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_compress_sequences.h ****/
+/**** skipping file: zstd_compress_literals.h ****/
+
+/*-*************************************
+*  Superblock entropy buffer structs
+***************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[500]; /* TODO give name to this value */
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[500]; /* TODO give name to this value */
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+
+/** ZSTD_buildSuperBlockEntropy_literal() :
+ *  Builds entropy for the super-block literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = 255;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+/** ZSTD_buildSuperBlockEntropy_sequences() :
+ *  Builds entropy for the super-block sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
+    BYTE* const cTableWksp = countWkspStart + countWkspSize;
+    const size_t cTableWkspSize = wkspEnd-cTableWksp;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+
+    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
+    memset(workspace, 0, wkspSize);
+
+    fseMetadata->lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   U32 LLtype;
+        unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->llType = (symbolEncodingType_e) LLtype;
+    }   }
+    /* build CTable for Offsets */
+    {   U32 Offtype;
+        unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
+    }   }
+    /* build CTable for MatchLengths */
+    {   U32 MLtype;
+        unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
+    }   }
+    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
+    return op-ostart;
+}
+
+
+/** ZSTD_buildSuperBlockEntropy() :
+ *  Builds entropy for the super-block.
+ *  @return : 0 on success or error code */
+static size_t
+ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
+                      const ZSTD_entropyCTables_t* prevEntropy,
+                            ZSTD_entropyCTables_t* nextEntropy,
+                      const ZSTD_CCtx_params* cctxParams,
+                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
+    return 0;
+}
+
+/** ZSTD_compressSubBlock_literal() :
+ *  Compresses literals section for a sub-block.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
+ *  hufMetadata->hType has literals block type info.
+ *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ *  @return : compressed size of literals section of a sub-block
+ *            Or 0 if it unable to compress.
+ *            Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                    const BYTE* literals, size_t litSize,
+                                    void* dst, size_t dstSize,
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart + lhSize;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    size_t cLitSize = 0;
+
+    (void)bmi2; /* TODO bmi2... */
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+    *entropyWritten = 0;
+    if (litSize == 0 || hufMetadata->hType == set_basic) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+    } else if (hufMetadata->hType == set_rle) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+    }
+
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
+        memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+        op += hufMetadata->hufDesSize;
+        cLitSize += hufMetadata->hufDesSize;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+    }
+
+    /* TODO bmi2 */
+    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+        op += cSize;
+        cLitSize += cSize;
+        if (cSize == 0 || ERR_isError(cSize)) {
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
+        }
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+    return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+    const seqDef* const sstart = sequences;
+    const seqDef* const send = sequences + nbSeq;
+    const seqDef* sp = sstart;
+    size_t matchLengthSum = 0;
+    size_t litLengthSum __attribute__ ((unused)) = 0;
+    while (send-sp > 0) {
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
+    }
+    return matchLengthSum + litSize;
+}
+
+/** ZSTD_compressSubBlock_sequences() :
+ *  Compresses sequences section for a sub-block.
+ *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ *  symbol compression modes for the super-block.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
+ *  @return : compressed size of sequences section of a sub-block
+ *            Or 0 if it is unable to compress
+ *            Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                              const seqDef* sequences, size_t nbSeq,
+                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                              void* dst, size_t dstCapacity,
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    BYTE* seqHead;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+    *entropyWritten = 0;
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "");
+    if (nbSeq < 0x7F)
+        *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ)
+        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else
+        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) {
+        return op - ostart;
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+    if (writeEntropy) {
+        const U32 LLtype = fseMetadata->llType;
+        const U32 Offtype = fseMetadata->ofType;
+        const U32 MLtype = fseMetadata->mlType;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+        memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+        op += fseMetadata->fseTablesSize;
+    } else {
+        const U32 repeat = set_repeat;
+        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, oend - op,
+                                        fseTables->matchlengthCTable, mlCode,
+                                        fseTables->offcodeCTable, ofCode,
+                                        fseTables->litlengthCTable, llCode,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+#endif
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+    }
+
+    /* zstd versions <= 1.4.0 mistakenly report error when
+     * sequences section body size is less than 3 bytes.
+     * Fixed by https://github.com/facebook/zstd/pull/1664.
+     * This can happen when the previous sequences section block is compressed
+     * with rle mode and the current block's sequences section is compressed
+     * with repeat mode where sequences section body size can be 1 byte.
+     */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
+        return 0;
+    }
+#endif
+
+    *entropyWritten = 1;
+    return op - ostart;
+}
+
+/** ZSTD_compressSubBlock() :
+ *  Compresses a single sub-block.
+ *  @return : compressed size of the sub-block
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                    const seqDef* sequences, size_t nbSeq,
+                                    const BYTE* literals, size_t litSize,
+                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                    const ZSTD_CCtx_params* cctxParams,
+                                    void* dst, size_t dstCapacity,
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                        &entropyMetadata->hufMetadata, literals, litSize,
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+        if (cLitSize == 0) return 0;
+        op += cLitSize;
+    }
+    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+                                                  &entropyMetadata->fseMetadata,
+                                                  sequences, nbSeq,
+                                                  llCode, mlCode, ofCode,
+                                                  cctxParams,
+                                                  op, oend-op,
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
+        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+        if (cSeqSize == 0) return 0;
+        op += cSeqSize;
+    }
+    /* Write block header */
+    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+        MEM_writeLE24(ostart, cBlockHeader24);
+    }
+    return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = 255;
+    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, unsigned maxCode,
+                        size_t nbSeq, const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+                                         nbSeq, fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+                                         nbSeq, fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                        const BYTE* ofCodeTable,
+                                        const BYTE* llCodeTable,
+                                        const BYTE* mlCodeTable,
+                                        size_t nbSeq,
+                                        const ZSTD_entropyCTables_t* entropy,
+                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                        void* workspace, size_t wkspSize,
+                                        int writeLitEntropy, int writeSeqEntropy) {
+    size_t cSizeEstimate = 0;
+    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
+/** ZSTD_compressSubBlock_multi() :
+ *  Breaks super-block into multiple sub-blocks and compresses them.
+ *  Entropy will be written to the first block.
+ *  The following blocks will use repeat mode to compress.
+ *  All sub-blocks are compressed blocks (no raw or rle blocks).
+ *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
+                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            const ZSTD_CCtx_params* cctxParams,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const int bmi2, U32 lastBlock,
+                            void* workspace, size_t wkspSize)
+{
+    const seqDef* const sstart = seqStorePtr->sequencesStart;
+    const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart;
+    const BYTE* const lstart = seqStorePtr->litStart;
+    const BYTE* const lend = seqStorePtr->lit;
+    const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    const BYTE* llCodePtr = seqStorePtr->llCode;
+    const BYTE* mlCodePtr = seqStorePtr->mlCode;
+    const BYTE* ofCodePtr = seqStorePtr->ofCode;
+    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t litSize, seqCount;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+                (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+    litSize = 0;
+    seqCount = 0;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
+        /* I think there is an optimization opportunity here.
+         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+         * since it recalculates estimate from scratch.
+         * For example, it would recount literal distribution and symbol codes everytime.
+         */
+        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                       sp, seqCount,
+                                                       lp, litSize,
+                                                       llCodePtr, mlCodePtr, ofCodePtr,
+                                                       cctxParams,
+                                                       op, oend-op,
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
+            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                sp += seqCount;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                litSize = 0;
+                seqCount = 0;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+            }
+        }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+    }
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        return 0;
+    }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            memcpy(&rep, prevCBlock->rep, sizeof(rep)); 
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+    return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+          &zc->blockState.prevCBlock->entropy,
+          &zc->blockState.nextCBlock->entropy,
+          &zc->appliedParams,
+          &entropyMetadata,
+          zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+    return ZSTD_compressSubBlock_multi(&zc->seqStore,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
+            &entropyMetadata,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            src, srcSize,
+            zc->bmi2, lastBlock,
+            zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
+/**** ended inlining compress/zstd_compress_superblock.c ****/
+/**** start inlining compress/zstd_compress.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <limits.h>         /* INT_MAX */
+#include <string.h>         /* memset */
+/**** start inlining ../common/cpu.h ****/
+/*
+ * Copyright (c) 2018-2020, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/**
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include <string.h>
+
+/**** skipping file: mem.h ****/
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+    int reg[4];
+    __cpuid((int*)reg, 0);
+    {
+        int const n = reg[0];
+        if (n >= 1) {
+            __cpuid((int*)reg, 1);
+            f1c = (U32)reg[2];
+            f1d = (U32)reg[3];
+        }
+        if (n >= 7) {
+            __cpuidex((int*)reg, 7, 0);
+            f7b = (U32)reg[1];
+            f7c = (U32)reg[2];
+        }
+    }
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__(
+        "pushl %%ebx\n\t"
+        "cpuid\n\t"
+        "popl %%ebx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "popl %%ebx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1));
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "movl %%ebx, %%eax\n\t"
+          "popl %%ebx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7) {
+      U32 f7a;
+      __asm__("cpuid"
+              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+              : "a"(7), "c"(0)
+              : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
+    }
+}
+
+#define X(name, r, bit)                                                        \
+  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
+    return ((cpuid.r) & (1U << bit)) != 0;                                     \
+  }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+  C(sse3, 0)
+  C(pclmuldq, 1)
+  C(dtes64, 2)
+  C(monitor, 3)
+  C(dscpl, 4)
+  C(vmx, 5)
+  C(smx, 6)
+  C(eist, 7)
+  C(tm2, 8)
+  C(ssse3, 9)
+  C(cnxtid, 10)
+  C(fma, 12)
+  C(cx16, 13)
+  C(xtpr, 14)
+  C(pdcm, 15)
+  C(pcid, 17)
+  C(dca, 18)
+  C(sse41, 19)
+  C(sse42, 20)
+  C(x2apic, 21)
+  C(movbe, 22)
+  C(popcnt, 23)
+  C(tscdeadline, 24)
+  C(aes, 25)
+  C(xsave, 26)
+  C(osxsave, 27)
+  C(avx, 28)
+  C(f16c, 29)
+  C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+  D(fpu, 0)
+  D(vme, 1)
+  D(de, 2)
+  D(pse, 3)
+  D(tsc, 4)
+  D(msr, 5)
+  D(pae, 6)
+  D(mce, 7)
+  D(cx8, 8)
+  D(apic, 9)
+  D(sep, 11)
+  D(mtrr, 12)
+  D(pge, 13)
+  D(mca, 14)
+  D(cmov, 15)
+  D(pat, 16)
+  D(pse36, 17)
+  D(psn, 18)
+  D(clfsh, 19)
+  D(ds, 21)
+  D(acpi, 22)
+  D(mmx, 23)
+  D(fxsr, 24)
+  D(sse, 25)
+  D(sse2, 26)
+  D(ss, 27)
+  D(htt, 28)
+  D(tm, 29)
+  D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+  B(bmi1, 3)
+  B(hle, 4)
+  B(avx2, 5)
+  B(smep, 7)
+  B(bmi2, 8)
+  B(erms, 9)
+  B(invpcid, 10)
+  B(rtm, 11)
+  B(mpx, 14)
+  B(avx512f, 16)
+  B(avx512dq, 17)
+  B(rdseed, 18)
+  B(adx, 19)
+  B(smap, 20)
+  B(avx512ifma, 21)
+  B(pcommit, 22)
+  B(clflushopt, 23)
+  B(clwb, 24)
+  B(avx512pf, 26)
+  B(avx512er, 27)
+  B(avx512cd, 28)
+  B(sha, 29)
+  B(avx512bw, 30)
+  B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+  C(prefetchwt1, 0)
+  C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
+/**** ended inlining ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: hist.h ****/
+#define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_compress_sequences.h ****/
+/**** skipping file: zstd_compress_literals.h ****/
+/**** start inlining zstd_fast.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_FAST_H
+#define ZSTD_FAST_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_FAST_H */
+/**** ended inlining zstd_fast.h ****/
+/**** start inlining zstd_double_fast.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_DOUBLE_FAST_H
+#define ZSTD_DOUBLE_FAST_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: zstd_compress_internal.h ****/
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_DOUBLE_FAST_H */
+/**** ended inlining zstd_double_fast.h ****/
+/**** start inlining zstd_lazy.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LAZY_H
+#define ZSTD_LAZY_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: zstd_compress_internal.h ****/
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_LAZY_H */
+/**** ended inlining zstd_lazy.h ****/
+/**** start inlining zstd_opt.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_OPT_H
+#define ZSTD_OPT_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: zstd_compress_internal.h ****/
+
+/* used in ZSTD_loadDictionaryContent() */
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+        /* note : no btultra2 variant for extDict nor dictMatchState,
+         * because btultra2 is not meant to work with dictionaries
+         * and is only specific for the first block (no prefix) */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_OPT_H */
+/**** ended inlining zstd_opt.h ****/
+/**** start inlining zstd_ldm.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_H
+#define ZSTD_LDM_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: ../zstd.h ****/
+
+/*-*************************************
+*  Long distance matching
+***************************************/
+
+#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params);
+
+/**
+ * ZSTD_ldm_generateSequences():
+ *
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
+ *
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ *       sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmParams_t const* params, void const* src, size_t srcSize);
+
+/**
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            void const* src, size_t srcSize);
+
+/**
+ * ZSTD_ldm_skipSequences():
+ *
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+    U32 const minMatch);
+
+
+/** ZSTD_ldm_getTableSize() :
+ *  Estimate the space needed for long distance matching tables or 0 if LDM is
+ *  disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/** ZSTD_ldm_getSeqSpace() :
+ *  Return an upper bound on the number of sequences that can be produced by
+ *  the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
+
+/** ZSTD_ldm_adjustParameters() :
+ *  If the params->hashRateLog is not set, set it to its default value based on
+ *  windowLog and params->hashLog.
+ *
+ *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
+ *  params->hashLog if it is not).
+ *
+ *  Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_FAST_H */
+/**** ended inlining zstd_ldm.h ****/
+/**** skipping file: zstd_compress_superblock.h ****/
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+/* ZSTD_compressBound()
+ * Note that the result from this function is only compatible with the "normal"
+ * full-block strategy.
+ * When there are a lot of small blocks due to frequent flush in streaming mode
+ * the overhead of headers can make the compressed data to be larger than the
+ * return value of ZSTD_compressBound().
+ */
+size_t ZSTD_compressBound(size_t srcSize) {
+    return ZSTD_COMPRESSBOUND(srcSize);
+}
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CDict_s {
+    const void* dictContent;
+    size_t dictContentSize;
+    U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+    ZSTD_cwksp workspace;
+    ZSTD_matchState_t matchState;
+    ZSTD_compressedBlockState_t cBlockState;
+    ZSTD_customMem customMem;
+    U32 dictID;
+    int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
+{
+    assert(cctx != NULL);
+    memset(cctx, 0, sizeof(*cctx));
+    cctx->customMem = memManager;
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
+        assert(!ZSTD_isError(err));
+        (void)err;
+    }
+}
+
+ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+    {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
+        if (!cctx) return NULL;
+        ZSTD_initCCtx(cctx, customMem);
+        return cctx;
+    }
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
+{
+    ZSTD_cwksp ws;
+    ZSTD_CCtx* cctx;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+
+    cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+    if (cctx == NULL) return NULL;
+
+    memset(cctx, 0, sizeof(ZSTD_CCtx));
+    ZSTD_cwksp_move(&cctx->workspace, &ws);
+    cctx->staticSize = workspaceSize;
+
+    /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+    if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+    cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE);
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    return cctx;
+}
+
+/**
+ * Clears and frees all of the dictionaries in the CCtx.
+ */
+static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
+{
+    ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem);
+    ZSTD_freeCDict(cctx->localDict.cdict);
+    memset(&cctx->localDict, 0, sizeof(cctx->localDict));
+    memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
+    cctx->cdict = NULL;
+}
+
+static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
+{
+    size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
+    size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
+    return bufferSize + cdictSize;
+}
+
+static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
+{
+    assert(cctx != NULL);
+    assert(cctx->staticSize == 0);
+    ZSTD_clearAllDicts(cctx);
+#ifdef ZSTD_MULTITHREAD
+    ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
+#endif
+    ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                    "not compatible with static CCtx");
+    {
+        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+        ZSTD_freeCCtxContent(cctx);
+        if (!cctxInWorkspace) {
+            ZSTD_free(cctx, cctx->customMem);
+        }
+    }
+    return 0;
+}
+
+
+static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    return ZSTDMT_sizeof_CCtx(cctx->mtctx);
+#else
+    (void)cctx;
+    return 0;
+#endif
+}
+
+
+size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support sizeof on NULL */
+    /* cctx may be in the workspace */
+    return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+           + ZSTD_cwksp_sizeof(&cctx->workspace)
+           + ZSTD_sizeof_localDict(cctx->localDict)
+           + ZSTD_sizeof_mtctx(cctx);
+}
+
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
+{
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+        ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params cctxParams;
+    memset(&cctxParams, 0, sizeof(cctxParams));
+    cctxParams.cParams = cParams;
+    cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;  /* should not matter, as all cParams are presumed properly defined */
+    assert(!ZSTD_checkCParams(cParams));
+    cctxParams.fParams.contentSizeFlag = 1;
+    return cctxParams;
+}
+
+static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params* params;
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+    params = (ZSTD_CCtx_params*)ZSTD_calloc(
+            sizeof(ZSTD_CCtx_params), customMem);
+    if (!params) { return NULL; }
+    params->customMem = customMem;
+    params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
+    params->fParams.contentSizeFlag = 1;
+    return params;
+}
+
+ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
+{
+    return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
+}
+
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
+{
+    if (params == NULL) { return 0; }
+    ZSTD_free(params, params->customMem);
+    return 0;
+}
+
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
+{
+    return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+}
+
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->fParams.contentSizeFlag = 1;
+    return 0;
+}
+
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+{
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    memset(cctxParams, 0, sizeof(*cctxParams));
+    assert(!ZSTD_checkCParams(params.cParams));
+    cctxParams->cParams = params.cParams;
+    cctxParams->fParams = params.fParams;
+    cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
+    return 0;
+}
+
+/* ZSTD_assignParamsToCCtxParams() :
+ * params is presumed valid at this stage */
+static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
+        const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+{
+    ZSTD_CCtx_params ret = *cctxParams;
+    assert(!ZSTD_checkCParams(params->cParams));
+    ret.cParams = params->cParams;
+    ret.fParams = params->fParams;
+    ret.compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
+    return ret;
+}
+
+ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+        bounds.lowerBound = ZSTD_minCLevel();
+        bounds.upperBound = ZSTD_maxCLevel();
+        return bounds;
+
+    case ZSTD_c_windowLog:
+        bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
+        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_hashLog:
+        bounds.lowerBound = ZSTD_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_chainLog:
+        bounds.lowerBound = ZSTD_CHAINLOG_MIN;
+        bounds.upperBound = ZSTD_CHAINLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_searchLog:
+        bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
+        bounds.upperBound = ZSTD_SEARCHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_minMatch:
+        bounds.lowerBound = ZSTD_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_targetLength:
+        bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
+        bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
+        return bounds;
+
+    case ZSTD_c_strategy:
+        bounds.lowerBound = ZSTD_STRATEGY_MIN;
+        bounds.upperBound = ZSTD_STRATEGY_MAX;
+        return bounds;
+
+    case ZSTD_c_contentSizeFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_checksumFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_dictIDFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_nbWorkers:
+        bounds.lowerBound = 0;
+#ifdef ZSTD_MULTITHREAD
+        bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
+#else
+        bounds.upperBound = 0;
+#endif
+        return bounds;
+
+    case ZSTD_c_jobSize:
+        bounds.lowerBound = 0;
+#ifdef ZSTD_MULTITHREAD
+        bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
+#else
+        bounds.upperBound = 0;
+#endif
+        return bounds;
+
+    case ZSTD_c_overlapLog:
+#ifdef ZSTD_MULTITHREAD
+        bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
+        bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
+#else
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+#endif
+        return bounds;
+
+    case ZSTD_c_enableLongDistanceMatching:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_ldmHashLog:
+        bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmMinMatch:
+        bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmBucketSizeLog:
+        bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmHashRateLog:
+        bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
+        return bounds;
+
+    /* experimental parameters */
+    case ZSTD_c_rsyncable:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_forceMaxWindow :
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_format:
+        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+        bounds.lowerBound = ZSTD_f_zstd1;
+        bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_forceAttachDict:
+        ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
+        bounds.lowerBound = ZSTD_dictDefaultAttach;
+        bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_literalCompressionMode:
+        ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
+        bounds.lowerBound = ZSTD_lcm_auto;
+        bounds.upperBound = ZSTD_lcm_uncompressed;
+        return bounds;
+
+    case ZSTD_c_targetCBlockSize:
+        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+        return bounds;
+
+    case ZSTD_c_srcSizeHint:
+        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+        return bounds;
+
+    default:
+        bounds.error = ERROR(parameter_unsupported);
+        return bounds;
+    }
+}
+
+/* ZSTD_cParam_clampBounds:
+ * Clamps the value into the bounded range.
+ */
+static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return bounds.error;
+    if (*value < bounds.lowerBound) *value = bounds.lowerBound;
+    if (*value > bounds.upperBound) *value = bounds.upperBound;
+    return 0;
+}
+
+#define BOUNDCHECK(cParam, val) { \
+    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+                    parameter_outOfBound, "Param out of bounds"); \
+}
+
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+        return 1;
+
+    case ZSTD_c_format:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow :
+    case ZSTD_c_nbWorkers:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    default:
+        return 0;
+    }
+}
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
+    if (cctx->streamStage != zcss_init) {
+        if (ZSTD_isUpdateAuthorized(param)) {
+            cctx->cParamsChanged = 1;
+        } else {
+            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
+    }   }
+
+    switch(param)
+    {
+    case ZSTD_c_nbWorkers:
+        RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
+                        "MT not compatible with static alloc");
+        break;
+
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_format:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+        break;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+                                    ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
+    switch(param)
+    {
+    case ZSTD_c_format :
+        BOUNDCHECK(ZSTD_c_format, value);
+        CCtxParams->format = (ZSTD_format_e)value;
+        return (size_t)CCtxParams->format;
+
+    case ZSTD_c_compressionLevel : {
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        if (value) {  /* 0 : does not change current level */
+            CCtxParams->compressionLevel = value;
+        }
+        if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
+        return 0;  /* return type (size_t) cannot represent negative values */
+    }
+
+    case ZSTD_c_windowLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_windowLog, value);
+        CCtxParams->cParams.windowLog = (U32)value;
+        return CCtxParams->cParams.windowLog;
+
+    case ZSTD_c_hashLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_hashLog, value);
+        CCtxParams->cParams.hashLog = (U32)value;
+        return CCtxParams->cParams.hashLog;
+
+    case ZSTD_c_chainLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_chainLog, value);
+        CCtxParams->cParams.chainLog = (U32)value;
+        return CCtxParams->cParams.chainLog;
+
+    case ZSTD_c_searchLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_searchLog, value);
+        CCtxParams->cParams.searchLog = (U32)value;
+        return (size_t)value;
+
+    case ZSTD_c_minMatch :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_minMatch, value);
+        CCtxParams->cParams.minMatch = value;
+        return CCtxParams->cParams.minMatch;
+
+    case ZSTD_c_targetLength :
+        BOUNDCHECK(ZSTD_c_targetLength, value);
+        CCtxParams->cParams.targetLength = value;
+        return CCtxParams->cParams.targetLength;
+
+    case ZSTD_c_strategy :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_strategy, value);
+        CCtxParams->cParams.strategy = (ZSTD_strategy)value;
+        return (size_t)CCtxParams->cParams.strategy;
+
+    case ZSTD_c_contentSizeFlag :
+        /* Content size written in frame header _when known_ (default:1) */
+        DEBUGLOG(4, "set content size flag = %u", (value!=0));
+        CCtxParams->fParams.contentSizeFlag = value != 0;
+        return CCtxParams->fParams.contentSizeFlag;
+
+    case ZSTD_c_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        CCtxParams->fParams.checksumFlag = value != 0;
+        return CCtxParams->fParams.checksumFlag;
+
+    case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
+        CCtxParams->fParams.noDictIDFlag = !value;
+        return !CCtxParams->fParams.noDictIDFlag;
+
+    case ZSTD_c_forceMaxWindow :
+        CCtxParams->forceWindow = (value != 0);
+        return CCtxParams->forceWindow;
+
+    case ZSTD_c_forceAttachDict : {
+        const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
+        CCtxParams->attachDictPref = pref;
+        return CCtxParams->attachDictPref;
+    }
+
+    case ZSTD_c_literalCompressionMode : {
+        const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
+        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
+        CCtxParams->literalCompressionMode = lcm;
+        return CCtxParams->literalCompressionMode;
+    }
+
+    case ZSTD_c_nbWorkers :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        CCtxParams->nbWorkers = value;
+        return CCtxParams->nbWorkers;
+#endif
+
+    case ZSTD_c_jobSize :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        /* Adjust to the minimum non-default value. */
+        if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
+            value = ZSTDMT_JOBSIZE_MIN;
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        assert(value >= 0);
+        CCtxParams->jobSize = value;
+        return CCtxParams->jobSize;
+#endif
+
+    case ZSTD_c_overlapLog :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
+        CCtxParams->overlapLog = value;
+        return CCtxParams->overlapLog;
+#endif
+
+    case ZSTD_c_rsyncable :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+#else
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
+        CCtxParams->rsyncable = value;
+        return CCtxParams->rsyncable;
+#endif
+
+    case ZSTD_c_enableLongDistanceMatching :
+        CCtxParams->ldmParams.enableLdm = (value!=0);
+        return CCtxParams->ldmParams.enableLdm;
+
+    case ZSTD_c_ldmHashLog :
+        if (value!=0)   /* 0 ==> auto */
+            BOUNDCHECK(ZSTD_c_ldmHashLog, value);
+        CCtxParams->ldmParams.hashLog = value;
+        return CCtxParams->ldmParams.hashLog;
+
+    case ZSTD_c_ldmMinMatch :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
+        CCtxParams->ldmParams.minMatchLength = value;
+        return CCtxParams->ldmParams.minMatchLength;
+
+    case ZSTD_c_ldmBucketSizeLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
+        CCtxParams->ldmParams.bucketSizeLog = value;
+        return CCtxParams->ldmParams.bucketSizeLog;
+
+    case ZSTD_c_ldmHashRateLog :
+        RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
+                        parameter_outOfBound, "Param out of bounds!");
+        CCtxParams->ldmParams.hashRateLog = value;
+        return CCtxParams->ldmParams.hashRateLog;
+
+    case ZSTD_c_targetCBlockSize :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        CCtxParams->targetCBlockSize = value;
+        return CCtxParams->targetCBlockSize;
+
+    case ZSTD_c_srcSizeHint :
+        if (value!=0)    /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+        CCtxParams->srcSizeHint = value;
+        return CCtxParams->srcSizeHint;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+}
+
+size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
+{
+    return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_getParameter(
+        ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
+{
+    switch(param)
+    {
+    case ZSTD_c_format :
+        *value = CCtxParams->format;
+        break;
+    case ZSTD_c_compressionLevel :
+        *value = CCtxParams->compressionLevel;
+        break;
+    case ZSTD_c_windowLog :
+        *value = (int)CCtxParams->cParams.windowLog;
+        break;
+    case ZSTD_c_hashLog :
+        *value = (int)CCtxParams->cParams.hashLog;
+        break;
+    case ZSTD_c_chainLog :
+        *value = (int)CCtxParams->cParams.chainLog;
+        break;
+    case ZSTD_c_searchLog :
+        *value = CCtxParams->cParams.searchLog;
+        break;
+    case ZSTD_c_minMatch :
+        *value = CCtxParams->cParams.minMatch;
+        break;
+    case ZSTD_c_targetLength :
+        *value = CCtxParams->cParams.targetLength;
+        break;
+    case ZSTD_c_strategy :
+        *value = (unsigned)CCtxParams->cParams.strategy;
+        break;
+    case ZSTD_c_contentSizeFlag :
+        *value = CCtxParams->fParams.contentSizeFlag;
+        break;
+    case ZSTD_c_checksumFlag :
+        *value = CCtxParams->fParams.checksumFlag;
+        break;
+    case ZSTD_c_dictIDFlag :
+        *value = !CCtxParams->fParams.noDictIDFlag;
+        break;
+    case ZSTD_c_forceMaxWindow :
+        *value = CCtxParams->forceWindow;
+        break;
+    case ZSTD_c_forceAttachDict :
+        *value = CCtxParams->attachDictPref;
+        break;
+    case ZSTD_c_literalCompressionMode :
+        *value = CCtxParams->literalCompressionMode;
+        break;
+    case ZSTD_c_nbWorkers :
+#ifndef ZSTD_MULTITHREAD
+        assert(CCtxParams->nbWorkers == 0);
+#endif
+        *value = CCtxParams->nbWorkers;
+        break;
+    case ZSTD_c_jobSize :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+#else
+        assert(CCtxParams->jobSize <= INT_MAX);
+        *value = (int)CCtxParams->jobSize;
+        break;
+#endif
+    case ZSTD_c_overlapLog :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+#else
+        *value = CCtxParams->overlapLog;
+        break;
+#endif
+    case ZSTD_c_rsyncable :
+#ifndef ZSTD_MULTITHREAD
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+#else
+        *value = CCtxParams->rsyncable;
+        break;
+#endif
+    case ZSTD_c_enableLongDistanceMatching :
+        *value = CCtxParams->ldmParams.enableLdm;
+        break;
+    case ZSTD_c_ldmHashLog :
+        *value = CCtxParams->ldmParams.hashLog;
+        break;
+    case ZSTD_c_ldmMinMatch :
+        *value = CCtxParams->ldmParams.minMatchLength;
+        break;
+    case ZSTD_c_ldmBucketSizeLog :
+        *value = CCtxParams->ldmParams.bucketSizeLog;
+        break;
+    case ZSTD_c_ldmHashRateLog :
+        *value = CCtxParams->ldmParams.hashRateLog;
+        break;
+    case ZSTD_c_targetCBlockSize :
+        *value = (int)CCtxParams->targetCBlockSize;
+        break;
+    case ZSTD_c_srcSizeHint :
+        *value = (int)CCtxParams->srcSizeHint;
+        break;
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return 0;
+}
+
+/** ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  just applies `params` into `cctx`
+ *  no action is performed, parameters are merely stored.
+ *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ *    This is possible even if a compression is ongoing.
+ *    In which case, new parameters will be applied on the fly, starting with next compression job.
+ */
+size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "The context is in the wrong stage!");
+    RETURN_ERROR_IF(cctx->cdict, stage_wrong,
+                    "Can't override parameters with cdict attached (some must "
+                    "be inherited from the cdict).");
+
+    cctx->requestedParams = *params;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't set pledgedSrcSize when not in init stage.");
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+/**
+ * Initializes the local dict using the requested parameters.
+ * NOTE: This does not use the pledged src size, because it may be used for more
+ * than one compression.
+ */
+static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+{
+    ZSTD_localDict* const dl = &cctx->localDict;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize);
+    if (dl->dict == NULL) {
+        /* No local dictionary. */
+        assert(dl->dictBuffer == NULL);
+        assert(dl->cdict == NULL);
+        assert(dl->dictSize == 0);
+        return 0;
+    }
+    if (dl->cdict != NULL) {
+        assert(cctx->cdict == dl->cdict);
+        /* Local dictionary already initialized. */
+        return 0;
+    }
+    assert(dl->dictSize > 0);
+    assert(cctx->cdict == NULL);
+    assert(cctx->prefixDict.dict == NULL);
+
+    dl->cdict = ZSTD_createCDict_advanced(
+            dl->dict,
+            dl->dictSize,
+            ZSTD_dlm_byRef,
+            dl->dictContentType,
+            cParams,
+            cctx->customMem);
+    RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
+    cctx->cdict = dl->cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_loadDictionary_advanced(
+        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't load a dictionary when ctx is not in init stage.");
+    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                    "no malloc for static CCtx");
+    DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
+    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
+    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
+        return 0;
+    if (dictLoadMethod == ZSTD_dlm_byRef) {
+        cctx->localDict.dict = dict;
+    } else {
+        void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem);
+        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+        memcpy(dictBuffer, dict, dictSize);
+        cctx->localDict.dictBuffer = dictBuffer;
+        cctx->localDict.dict = dictBuffer;
+    }
+    cctx->localDict.dictSize = dictSize;
+    cctx->localDict.dictContentType = dictContentType;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
+      ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a dict when ctx not in init stage.");
+    /* Free the existing local cdict (if any) to save memory. */
+    ZSTD_clearAllDicts(cctx);
+    cctx->cdict = cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+size_t ZSTD_CCtx_refPrefix_advanced(
+        ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a prefix when ctx not in init stage.");
+    ZSTD_clearAllDicts(cctx);
+    if (prefix != NULL && prefixSize > 0) {
+        cctx->prefixDict.dict = prefix;
+        cctx->prefixDict.dictSize = prefixSize;
+        cctx->prefixDict.dictContentType = dictContentType;
+    }
+    return 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        cctx->streamStage = zcss_init;
+        cctx->pledgedSrcSizePlusOne = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                        "Can't reset parameters only when not in init stage.");
+        ZSTD_clearAllDicts(cctx);
+        return ZSTD_CCtxParams_reset(&cctx->requestedParams);
+    }
+    return 0;
+}
+
+
+/** ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
+    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
+    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
+    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
+    BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
+    return 0;
+}
+
+/** ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters
+ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP_TYPE(cParam, val, type) {                                \
+        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+    }
+#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
+    CLAMP(ZSTD_c_windowLog, cParams.windowLog);
+    CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
+    CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
+    CLAMP(ZSTD_c_searchLog, cParams.searchLog);
+    CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
+    CLAMP(ZSTD_c_targetLength,cParams.targetLength);
+    CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
+    return cParams;
+}
+
+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/** ZSTD_adjustCParams_internal() :
+ *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
+ *  mostly downsize to reduce memory consumption and initialization latency.
+ * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
+ *  note : `srcSize==0` means 0!
+ *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
+static ZSTD_compressionParameters
+ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+                            unsigned long long srcSize,
+                            size_t dictSize)
+{
+    static const U64 minSrcSize = 513; /* (1<<9) + 1 */
+    static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+    assert(ZSTD_checkCParams(cPar)==0);
+
+    if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+        srcSize = minSrcSize;
+
+    /* resize windowLog if input is small enough, to use less memory */
+    if ( (srcSize < maxWindowResize)
+      && (dictSize < maxWindowResize) )  {
+        U32 const tSize = (U32)(srcSize + dictSize);
+        static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
+        U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
+                            ZSTD_highbit32(tSize-1) + 1;
+        if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
+    }
+    if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
+    {   U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cycleLog > cPar.windowLog)
+            cPar.chainLog -= (cycleLog - cPar.windowLog);
+    }
+
+    if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+        cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
+
+    return cPar;
+}
+
+ZSTD_compressionParameters
+ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+                   unsigned long long srcSize,
+                   size_t dictSize)
+{
+    cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
+    if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
+}
+
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
+{
+    ZSTD_compressionParameters cParams;
+    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+      srcSizeHint = CCtxParams->srcSizeHint;
+    }
+    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+    if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+    if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
+    if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
+    if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
+    if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
+    if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch;
+    if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
+    if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
+    assert(!ZSTD_checkCParams(cParams));
+    /* srcSizeHint == 0 means 0 */
+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
+}
+
+static size_t
+ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const U32 forCCtx)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+    /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+     * surrounded by redzones in ASAN. */
+    size_t const tableSpace = chainSize * sizeof(U32)
+                            + hSize * sizeof(U32)
+                            + h3Size * sizeof(U32);
+    size_t const optPotentialSpace =
+        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
+                                ? optPotentialSpace
+                                : 0;
+    DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+                (U32)chainSize, (U32)hSize, (U32)h3Size);
+    return tableSpace + optSpace;
+}
+
+size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    {   ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+        U32    const divider = (cParams.minMatch==3) ? 3 : 4;
+        size_t const maxNbSeq = blockSize / divider;
+        size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                                + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                                + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+        size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
+        size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+        size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
+
+        size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
+        size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq));
+
+        /* estimateCCtxSize is for one-shot compression. So no buffers should
+         * be needed. However, we still allocate two 0-sized buffers, which can
+         * take space under ASAN. */
+        size_t const bufferSpace = ZSTD_cwksp_alloc_size(0)
+                                 + ZSTD_cwksp_alloc_size(0);
+
+        size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx));
+
+        size_t const neededSpace =
+            cctxSpace +
+            entropySpace +
+            blockStateSpace +
+            ldmSpace +
+            ldmSeqSpace +
+            matchStateSize +
+            tokenSpace +
+            bufferSpace;
+
+        DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+        return neededSpace;
+    }
+}
+
+size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+    return ZSTD_estimateCCtxSize_usingCParams(cParams);
+}
+
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    {   ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+        size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+        size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
+        size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
+        size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize)
+                                   + ZSTD_cwksp_alloc_size(outBuffSize);
+
+        return CCtxSize + streamingSize;
+    }
+}
+
+size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+    return ZSTD_estimateCStreamSize_usingCParams(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers > 0) {
+        return ZSTDMT_getFrameProgression(cctx->mtctx);
+    }
+#endif
+    {   ZSTD_frameProgression fp;
+        size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+                                cctx->inBuffPos - cctx->inToCompress;
+        if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+        assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+        fp.ingested = cctx->consumedSrcSize + buffered;
+        fp.consumed = cctx->consumedSrcSize;
+        fp.produced = cctx->producedCSize;
+        fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
+        fp.currentJobID = 0;
+        fp.nbActiveWorkers = 0;
+        return fp;
+}   }
+
+/*! ZSTD_toFlushNow()
+ *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
+ */
+size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers > 0) {
+        return ZSTDMT_toFlushNow(cctx->mtctx);
+    }
+#endif
+    (void)cctx;
+    return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
+}
+
+static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
+                                    ZSTD_compressionParameters cParams2)
+{
+    (void)cParams1;
+    (void)cParams2;
+    assert(cParams1.windowLog    == cParams2.windowLog);
+    assert(cParams1.chainLog     == cParams2.chainLog);
+    assert(cParams1.hashLog      == cParams2.hashLog);
+    assert(cParams1.searchLog    == cParams2.searchLog);
+    assert(cParams1.minMatch     == cParams2.minMatch);
+    assert(cParams1.targetLength == cParams2.targetLength);
+    assert(cParams1.strategy     == cParams2.strategy);
+}
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+    int i;
+    for (i = 0; i < ZSTD_REP_NUM; ++i)
+        bs->rep[i] = repStartValue[i];
+    bs->entropy.huf.repeatMode = HUF_repeat_none;
+    bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ *  Invalidate all the matches in the match finder tables.
+ *  Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+    ZSTD_window_clear(&ms->window);
+
+    ms->nextToUpdate = ms->window.dictLimit;
+    ms->loadedDictEnd = 0;
+    ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
+    ms->dictMatchState = NULL;
+}
+
+/**
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
+/**
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+    ZSTDcrp_makeClean,
+    ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
+
+/**
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+    ZSTDirp_continue,
+    ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
+
+typedef enum {
+    ZSTD_resetTarget_CDict,
+    ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
+
+static size_t
+ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+                      ZSTD_cwksp* ws,
+                const ZSTD_compressionParameters* cParams,
+                const ZSTD_compResetPolicy_e crp,
+                const ZSTD_indexResetPolicy_e forceResetIndex,
+                const ZSTD_resetTarget_e forWho)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+
+    DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    if (forceResetIndex == ZSTDirp_reset) {
+        ZSTD_window_init(&ms->window);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+    }
+
+    ms->hashLog3 = hashLog3;
+
+    ZSTD_invalidateMatchState(ms);
+
+    assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+    ZSTD_cwksp_clear_tables(ws);
+
+    DEBUGLOG(5, "reserving table space");
+    /* table Space */
+    ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+    ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+    ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+    if (crp!=ZSTDcrp_leaveDirty) {
+        /* reset tables only */
+        ZSTD_cwksp_clean_tables(ws);
+    }
+
+    /* opt parser space */
+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
+        DEBUGLOG(4, "reserving optimal parser space");
+        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    }
+
+    ms->cParams = *cParams;
+
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    return 0;
+}
+
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
+
+/*! ZSTD_resetCCtx_internal() :
+    note : `params` are assumed fully validated at this stage */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_CCtx_params params,
+                                      U64 const pledgedSrcSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
+{
+    ZSTD_cwksp* const ws = &zc->workspace;
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
+                (U32)pledgedSrcSize, params.cParams.windowLog);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+
+    zc->isFirstBlock = 1;
+
+    if (params.ldmParams.enableLdm) {
+        /* Adjust long distance matching parameters */
+        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+        assert(params.ldmParams.hashRateLog < 32);
+        zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
+    }
+
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        size_t const maxNbSeq = blockSize / divider;
+        size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                                + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                                + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+        size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
+        size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
+        size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+
+        ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset;
+
+        if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
+            needsIndexReset = ZSTDirp_reset;
+        }
+
+        if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+        /* Check if workspace is large enough, alloc a new one if needed */
+        {   size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+            size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
+            size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+            size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
+            size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
+            size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
+
+            size_t const neededSpace =
+                cctxSpace +
+                entropySpace +
+                blockStateSpace +
+                ldmSpace +
+                ldmSeqSpace +
+                matchStateSize +
+                tokenSpace +
+                bufferSpace;
+
+            int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+            int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
+
+            DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
+                        neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
+            DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
+
+            if (workspaceTooSmall || workspaceWasteful) {
+                DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+                            ZSTD_cwksp_sizeof(ws) >> 10,
+                            neededSpace >> 10);
+
+                RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
+
+                needsIndexReset = ZSTDirp_reset;
+
+                ZSTD_cwksp_free(ws, zc->customMem);
+                FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
+
+                DEBUGLOG(5, "reserving object space");
+                /* Statically sized space.
+                 * entropyWorkspace never moves,
+                 * though prev/next block swap places */
+                assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+                zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+                zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+                zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+        }   }
+
+        ZSTD_cwksp_clear(ws);
+
+        /* init params */
+        zc->appliedParams = params;
+        zc->blockState.matchState.cParams = params.cParams;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        zc->producedCSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(4, "pledged content size : %u ; flag : %u",
+            (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
+
+        XXH64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+
+        ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+        /* ZSTD_wildcopy() is used to copy into the literals buffer,
+         * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
+         */
+        zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
+        zc->seqStore.maxNbLit = blockSize;
+
+        /* buffers */
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
+
+        /* ldm bucketOffsets table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const ldmBucketSize =
+                  ((size_t)1) << (params.ldmParams.hashLog -
+                                  params.ldmParams.bucketSizeLog);
+            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
+            memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
+        }
+
+        /* sequences storage */
+        ZSTD_referenceExternalSequences(zc, NULL, 0);
+        zc->seqStore.maxNbSeq = maxNbSeq;
+        zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+        FORWARD_IF_ERROR(ZSTD_reset_matchState(
+            &zc->blockState.matchState,
+            ws,
+            &params.cParams,
+            crp,
+            needsIndexReset,
+            ZSTD_resetTarget_CCtx), "");
+
+        /* ldm hash table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+            memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+            zc->maxNbLdmSequences = maxNbLdmSeq;
+
+            ZSTD_window_init(&zc->ldmState.window);
+            ZSTD_window_clear(&zc->ldmState.window);
+            zc->ldmState.loadedDictEnd = 0;
+        }
+
+        DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+        zc->initialized = 1;
+
+        return 0;
+    }
+}
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+    assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+}
+
+/* These are the approximate sizes for each strategy past which copying the
+ * dictionary tables into the working context is faster than using them
+ * in-place.
+ */
+static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
+    8 KB,  /* unused */
+    8 KB,  /* ZSTD_fast */
+    16 KB, /* ZSTD_dfast */
+    32 KB, /* ZSTD_greedy */
+    32 KB, /* ZSTD_lazy */
+    32 KB, /* ZSTD_lazy2 */
+    32 KB, /* ZSTD_btlazy2 */
+    32 KB, /* ZSTD_btopt */
+    8 KB,  /* ZSTD_btultra */
+    8 KB   /* ZSTD_btultra2 */
+};
+
+static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
+                                 const ZSTD_CCtx_params* params,
+                                 U64 pledgedSrcSize)
+{
+    size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
+    return ( pledgedSrcSize <= cutoff
+          || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+          || params->attachDictPref == ZSTD_dictForceAttach )
+        && params->attachDictPref != ZSTD_dictForceCopy
+        && !params->forceWindow; /* dictMatchState isn't correctly
+                                 * handled in _enforceMaxDist */
+}
+
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+                        const ZSTD_CDict* cdict,
+                        ZSTD_CCtx_params params,
+                        U64 pledgedSrcSize,
+                        ZSTD_buffered_policy_e zbuff)
+{
+    {   const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
+        unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Resize working context table params for input only, since the dict
+         * has its own tables. */
+        /* pledgeSrcSize == 0 means 0! */
+        params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_makeClean, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
+    }
+
+    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+                                  - cdict->matchState.window.base);
+        const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
+        if (cdictLen == 0) {
+            /* don't even attach dictionaries with no contents */
+            DEBUGLOG(4, "skipping attaching empty dictionary");
+        } else {
+            DEBUGLOG(4, "attaching dictionary into context");
+            cctx->blockState.matchState.dictMatchState = &cdict->matchState;
+
+            /* prep working match state so dict matches never have negative indices
+             * when they are translated to the working context's index space. */
+            if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
+                cctx->blockState.matchState.window.nextSrc =
+                    cctx->blockState.matchState.window.base + cdictEnd;
+                ZSTD_window_clear(&cctx->blockState.matchState.window);
+            }
+            /* loadedDictEnd is expressed within the referential of the active context */
+            cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
+    }   }
+
+    cctx->dictID = cdict->dictID;
+
+    /* copy block state */
+    memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            ZSTD_CCtx_params params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+
+    DEBUGLOG(4, "copying dictionary into context");
+
+    {   unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Copy only compression parameters related to tables. */
+        params.cParams = *cdict_cParams;
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_leaveDirty, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
+        assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
+        assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+        size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
+
+        memcpy(cctx->blockState.matchState.hashTable,
+               cdict->matchState.hashTable,
+               hSize * sizeof(U32));
+        memcpy(cctx->blockState.matchState.chainTable,
+               cdict->matchState.chainTable,
+               chainSize * sizeof(U32));
+    }
+
+    /* Zero the hashTable3, since the cdict never fills it */
+    {   int const h3log = cctx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+        assert(cdict->matchState.hashLog3 == 0);
+        memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
+    /* copy dictionary offsets */
+    {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+        ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+
+    cctx->dictID = cdict->dictID;
+
+    /* copy block state */
+    memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+/* We have a choice between copying the dictionary context into the working
+ * context, or referencing the dictionary context from the working context
+ * in-place. We decide here which strategy to use. */
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            const ZSTD_CCtx_params* params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+
+    DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
+                (unsigned)pledgedSrcSize);
+
+    if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
+        return ZSTD_resetCCtx_byAttachingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    } else {
+        return ZSTD_resetCCtx_byCopyingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    }
+}
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  The "context", in this case, refers to the hash and chain tables,
+ *  entropy tables, and dictionary references.
+ * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
+ * @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
+    RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
+                    "Can't copy a ctx that's not in init stage.");
+
+    memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
+    {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
+        /* Copy only compression parameters related to tables. */
+        params.cParams = srcCCtx->appliedParams.cParams;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+                                ZSTDcrp_leaveDirty, zbuff);
+        assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+        assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+        assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+        assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+        assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
+        int const h3log = srcCCtx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+        memcpy(dstCCtx->blockState.matchState.hashTable,
+               srcCCtx->blockState.matchState.hashTable,
+               hSize * sizeof(U32));
+        memcpy(dstCCtx->blockState.matchState.chainTable,
+               srcCCtx->blockState.matchState.chainTable,
+               chainSize * sizeof(U32));
+        memcpy(dstCCtx->blockState.matchState.hashTable3,
+               srcCCtx->blockState.matchState.hashTable3,
+               h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
+    /* copy dictionary offsets */
+    {
+        const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
+        ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+    dstCCtx->dictID = srcCCtx->dictID;
+
+    /* copy block state */
+    memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
+
+    return 0;
+}
+
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
+                                fParams, pledgedSrcSize,
+                                zbuff);
+}
+
+
+#define ZSTD_ROWSIZE 16
+/*! ZSTD_reduceTable() :
+ *  reduce table indexes by `reducerValue`, or squash to zero.
+ *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ *  It must be set to a clear 0/1 value, to remove branch during inlining.
+ *  Presume table size is a multiple of ZSTD_ROWSIZE
+ *  to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+    int const nbRows = (int)size / ZSTD_ROWSIZE;
+    int cellNb = 0;
+    int rowNb;
+    assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
+    assert(size < (1U<<31));   /* can be casted to int */
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the table re-use logic is sound, and that we don't
+     * access table space that we haven't cleaned, we re-"poison" the table
+     * space every time we mark it dirty.
+     *
+     * This function however is intended to operate on those dirty tables and
+     * re-clean them. So when this function is used correctly, we can unpoison
+     * the memory it operated on. This introduces a blind spot though, since
+     * if we now try to operate on __actually__ poisoned memory, we will not
+     * detect that. */
+    __msan_unpoison(table, size * sizeof(U32));
+#endif
+
+    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+        int column;
+        for (column=0; column<ZSTD_ROWSIZE; column++) {
+            if (preserveMark) {
+                U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+                table[cellNb] += adder;
+            }
+            if (table[cellNb] < reducerValue) table[cellNb] = 0;
+            else table[cellNb] -= reducerValue;
+            cellNb++;
+    }   }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 0);
+}
+
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 1);
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
+{
+    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
+        ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+    }
+
+    if (params->cParams.strategy != ZSTD_fast) {
+        U32 const chainSize = (U32)1 << params->cParams.chainLog;
+        if (params->cParams.strategy == ZSTD_btlazy2)
+            ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+        else
+            ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+    }
+
+    if (ms->hashLog3) {
+        U32 const h3Size = (U32)1 << ms->hashLog3;
+        ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
+    }
+}
+
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* See doc/zstd_compression_format.md for detailed format description */
+
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+{
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    BYTE* const llCodeTable = seqStorePtr->llCode;
+    BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    U32 u;
+    assert(nbSeq <= seqStorePtr->maxNbSeq);
+    for (u=0; u<nbSeq; u++) {
+        U32 const llv = sequences[u].litLength;
+        U32 const mlv = sequences[u].matchLength;
+        llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
+    }
+    if (seqStorePtr->longLengthID==1)
+        llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    if (seqStorePtr->longLengthID==2)
+        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+}
+
+/* ZSTD_useTargetCBlockSize():
+ * Returns if target compressed block size param is being used.
+ * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
+    return (cctxParams->targetCBlockSize != 0);
+}
+
+/* ZSTD_compressSequences_internal():
+ * actually compresses both literals and sequences */
+MEM_STATIC size_t
+ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
+                          const ZSTD_entropyCTables_t* prevEntropy,
+                                ZSTD_entropyCTables_t* nextEntropy,
+                          const ZSTD_CCtx_params* cctxParams,
+                                void* dst, size_t dstCapacity,
+                                void* entropyWorkspace, size_t entropyWkspSize,
+                          const int bmi2)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    unsigned count[MaxSeq+1];
+    FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    BYTE* seqHead;
+    BYTE* lastNCount = NULL;
+
+    DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+
+    /* Compress literals */
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
+        size_t const cSize = ZSTD_compressLiterals(
+                                    &prevEntropy->huf, &nextEntropy->huf,
+                                    cctxParams->cParams.strategy,
+                                    ZSTD_disableLiteralsCompression(cctxParams),
+                                    op, dstCapacity,
+                                    literals, litSize,
+                                    entropyWorkspace, entropyWkspSize,
+                                    bmi2);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
+        assert(cSize <= dstCapacity);
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "Can't fit seq hdr in output buf!");
+    if (nbSeq < 128) {
+        *op++ = (BYTE)nbSeq;
+    } else if (nbSeq < LONGNBSEQ) {
+        op[0] = (BYTE)((nbSeq>>8) + 0x80);
+        op[1] = (BYTE)nbSeq;
+        op+=2;
+    } else {
+        op[0]=0xFF;
+        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+        op+=3;
+    }
+    assert(op <= oend);
+    if (nbSeq==0) {
+        /* Copy the old tables over as if we repeated them */
+        memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
+        return (size_t)(op - ostart);
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+    assert(op <= oend);
+
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->fse.litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                count, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->fse.litlengthCTable,
+                sizeof(prevEntropy->fse.litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->fse.offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                count, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->fse.offcodeCTable,
+                sizeof(prevEntropy->fse.offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                count, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->fse.matchlengthCTable,
+                sizeof(prevEntropy->fse.matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+
+    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, (size_t)(oend - op),
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        assert(op <= oend);
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+        if (lastNCount && (op - lastNCount) < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(op - lastNCount == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
+    return (size_t)(op - ostart);
+}
+
+MEM_STATIC size_t
+ZSTD_compressSequences(seqStore_t* seqStorePtr,
+                       const ZSTD_entropyCTables_t* prevEntropy,
+                             ZSTD_entropyCTables_t* nextEntropy,
+                       const ZSTD_CCtx_params* cctxParams,
+                             void* dst, size_t dstCapacity,
+                             size_t srcSize,
+                             void* entropyWorkspace, size_t entropyWkspSize,
+                             int bmi2)
+{
+    size_t const cSize = ZSTD_compressSequences_internal(
+                            seqStorePtr, prevEntropy, nextEntropy, cctxParams,
+                            dst, dstCapacity,
+                            entropyWorkspace, entropyWkspSize, bmi2);
+    if (cSize == 0) return 0;
+    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
+     */
+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+        return 0;  /* block not compressed */
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed");
+
+    /* Check compressibility */
+    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+        if (cSize >= maxCSize) return 0;  /* block not compressed */
+    }
+
+    return cSize;
+}
+
+/* ZSTD_selectBlockCompressor() :
+ * Not static, but internal use only (used by long distance matcher)
+ * assumption : strat is a valid strategy */
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+{
+    static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast,
+          ZSTD_compressBlock_doubleFast,
+          ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy,
+          ZSTD_compressBlock_lazy2,
+          ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt,
+          ZSTD_compressBlock_btultra,
+          ZSTD_compressBlock_btultra2 },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict,
+          ZSTD_compressBlock_doubleFast_extDict,
+          ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,
+          ZSTD_compressBlock_lazy2_extDict,
+          ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict,
+          ZSTD_compressBlock_btultra_extDict,
+          ZSTD_compressBlock_btultra_extDict },
+        { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
+          ZSTD_compressBlock_fast_dictMatchState,
+          ZSTD_compressBlock_doubleFast_dictMatchState,
+          ZSTD_compressBlock_greedy_dictMatchState,
+          ZSTD_compressBlock_lazy_dictMatchState,
+          ZSTD_compressBlock_lazy2_dictMatchState,
+          ZSTD_compressBlock_btlazy2_dictMatchState,
+          ZSTD_compressBlock_btopt_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState }
+    };
+    ZSTD_blockCompressor selectedCompressor;
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    assert(selectedCompressor != NULL);
+    return selectedCompressor;
+}
+
+static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
+                                   const BYTE* anchor, size_t lastLLSize)
+{
+    memcpy(seqStorePtr->lit, anchor, lastLLSize);
+    seqStorePtr->lit += lastLLSize;
+}
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->sequences = ssPtr->sequencesStart;
+    ssPtr->longLengthID = 0;
+}
+
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    /* Assert that we have correctly flushed the ctx params into the ms's copy */
+    ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+        ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
+    }
+    ZSTD_resetSeqStore(&(zc->seqStore));
+    /* required for optimal parser to read stats from dictionary */
+    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
+    /* tell the optimal parser how we expect to compress literals */
+    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
+    /* a gap between an attached dict and the current window is not safe,
+     * they must remain adjacent,
+     * and when that stops being the case, the dict must be unset */
+    assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
+
+    /* limited update after a very long match */
+    {   const BYTE* const base = ms->window.base;
+        const BYTE* const istart = (const BYTE*)src;
+        const U32 current = (U32)(istart-base);
+        if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
+        if (current > ms->nextToUpdate + 384)
+            ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
+    }
+
+    /* select and store sequences */
+    {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
+        size_t lastLLSize;
+        {   int i;
+            for (i = 0; i < ZSTD_REP_NUM; ++i)
+                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+        }
+        if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+            assert(!zc->appliedParams.ldmParams.enableLdm);
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&zc->externSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+        } else if (zc->appliedParams.ldmParams.enableLdm) {
+            rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
+
+            ldmSeqStore.seq = zc->ldmSequences;
+            ldmSeqStore.capacity = zc->maxNbLdmSequences;
+            /* Updates ldmSeqStore.size */
+            FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+                                               &zc->appliedParams.ldmParams,
+                                               src, srcSize), "");
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&ldmSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(ldmSeqStore.pos == ldmSeqStore.size);
+        } else {   /* not long range mode */
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+        }
+        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+    }   }
+    return ZSTDbss_compress;
+}
+
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+    const seqDef* seqs = seqStore->sequencesStart;
+    size_t seqsSize = seqStore->sequences - seqs;
+
+    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+    size_t i; size_t position; int repIdx;
+
+    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+    for (i = 0, position = 0; i < seqsSize; ++i) {
+        outSeqs[i].offset = seqs[i].offset;
+        outSeqs[i].litLength = seqs[i].litLength;
+        outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
+
+        if (i == seqStore->longLengthPos) {
+            if (seqStore->longLengthID == 1) {
+                outSeqs[i].litLength += 0x10000;
+            } else if (seqStore->longLengthID == 2) {
+                outSeqs[i].matchLength += 0x10000;
+            }
+        }
+
+        if (outSeqs[i].offset <= ZSTD_REP_NUM) {
+            outSeqs[i].rep = outSeqs[i].offset;
+            repIdx = (unsigned int)i - outSeqs[i].offset;
+
+            if (outSeqs[i].litLength == 0) {
+                if (outSeqs[i].offset < 3) {
+                    --repIdx;
+                } else {
+                    repIdx = (unsigned int)i - 1;
+                }
+                ++outSeqs[i].rep;
+            }
+            assert(repIdx >= -3);
+            outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
+            if (outSeqs[i].rep == 4) {
+                --outSeqs[i].offset;
+            }
+        } else {
+            outSeqs[i].offset -= ZSTD_REP_NUM;
+        }
+
+        position += outSeqs[i].litLength;
+        outSeqs[i].matchPos = (unsigned int)position;
+        position += outSeqs[i].matchLength;
+    }
+    zc->seqCollector.seqIndex += seqsSize;
+}
+
+size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+    size_t outSeqsSize, const void* src, size_t srcSize)
+{
+    const size_t dstCapacity = ZSTD_compressBound(srcSize);
+    void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem);
+    SeqCollector seqCollector;
+
+    RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+
+    seqCollector.collectSequences = 1;
+    seqCollector.seqStart = outSeqs;
+    seqCollector.seqIndex = 0;
+    seqCollector.maxSequences = outSeqsSize;
+    zc->seqCollector = seqCollector;
+
+    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+    ZSTD_free(dst, ZSTD_defaultCMem);
+    return zc->seqCollector.seqIndex;
+}
+
+/* Returns true if the given block is a RLE block */
+static int ZSTD_isRLE(const BYTE *ip, size_t length) {
+    size_t i;
+    if (length < 2) return 1;
+    for (i = 1; i < length; ++i) {
+        if (ip[0] != ip[i]) return 0;
+    }
+    return 1;
+}
+
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+    return nbSeqs < 4 && nbLits < 10;
+}
+
+static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+{
+    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+    zc->blockState.nextCBlock = tmp;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 frame)
+{
+    /* This the upper bound for the length of an rle block.
+     * This isn't the actual upper bound. Finding the real threshold
+     * needs further investigation.
+     */
+    const U32 rleMaxLength = 25;
+    size_t cSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        return 0;
+    }
+
+    /* encode sequences and literals */
+    cSize = ZSTD_compressSequences(&zc->seqStore,
+            &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            srcSize,
+            zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+            zc->bmi2);
+
+    if (frame &&
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+         * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+         * This is only an issue for zstd <= v1.4.3
+         */
+        !zc->isFirstBlock &&
+        cSize < rleMaxLength &&
+        ZSTD_isRLE(ip, srcSize))
+    {
+        cSize = 1;
+        op[0] = ip[0];
+    }
+
+out:
+    if (!ZSTD_isError(cSize) && cSize > 1) {
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+    }
+    /* We check that dictionaries have offset codes available for the first
+     * block. After the first block, the offcode table might not have large
+     * enough codes to represent the offsets in the data.
+     */
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               const size_t bss, U32 lastBlock)
+{
+    DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
+    if (bss == ZSTDbss_compress) {
+        if (/* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            !zc->isFirstBlock &&
+            ZSTD_maybeRLE(&zc->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize))
+        {
+            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+        }
+        /* Attempt superblock compression.
+         *
+         * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
+         * standard ZSTD_compressBound(). This is a problem, because even if we have
+         * space now, taking an extra byte now could cause us to run out of space later
+         * and violate ZSTD_compressBound().
+         *
+         * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
+         *
+         * In order to respect ZSTD_compressBound() we must attempt to emit a raw
+         * uncompressed block in these cases:
+         *   * cSize == 0: Return code for an uncompressed block.
+         *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
+         *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
+         *     output space.
+         *   * cSize >= blockBound(srcSize): We have expanded the block too much so
+         *     emit an uncompressed block.
+         */
+        {
+            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+            if (cSize != ERROR(dstSize_tooSmall)) {
+                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    return cSize;
+                }
+            }
+        }
+    }
+
+    DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+    /* Superblock compression failed, attempt to emit a single no compress block.
+     * The decoder will be able to stream this block since it is uncompressed.
+     */
+    return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               U32 lastBlock)
+{
+    size_t cSize = 0;
+    const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+    DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
+    FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+
+    cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         void const* ip,
+                                         void const* iend)
+{
+    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
+        U32 const maxDist = (U32)1 << params->cParams.windowLog;
+        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+        ZSTD_reduceIndex(ms, params, correction);
+        ZSTD_cwksp_mark_tables_clean(ws);
+        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+        else ms->nextToUpdate -= correction;
+        /* invalidate dictionaries on overflow correction */
+        ms->loadedDictEnd = 0;
+        ms->dictMatchState = NULL;
+    }
+}
+
+/*! ZSTD_compress_frameChunk() :
+*   Compress a chunk of data into one or multiple blocks.
+*   All blocks will be terminated, all input will be consumed.
+*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+*   Frame is supposed already started (header already produced)
+*   @return : compressed size, or an error code
+*/
+static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     U32 lastFrameChunk)
+{
+    size_t blockSize = cctx->blockSize;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
+
+    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
+
+    DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
+        XXH64_update(&cctx->xxhState, src, srcSize);
+
+    while (remaining) {
+        ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+        U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+
+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
+                        dstSize_tooSmall,
+                        "not enough space to store compressed block");
+        if (remaining < blockSize) blockSize = remaining;
+
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
+        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+
+        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
+        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
+
+        {   size_t cSize;
+            if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
+                assert(cSize > 0);
+                assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else {
+                cSize = ZSTD_compressBlock_internal(cctx,
+                                        op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
+                                        ip, blockSize, 1 /* frame */);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
+
+                if (cSize == 0) {  /* block is not compressible */
+                    cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+                    FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+                } else {
+                    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+                    MEM_writeLE24(op, cBlockHeader);
+                    cSize += ZSTD_blockHeaderSize;
+                }
+            }
+
+
+            ip += blockSize;
+            assert(remaining >= blockSize);
+            remaining -= blockSize;
+            op += cSize;
+            assert(dstCapacity >= cSize);
+            dstCapacity -= cSize;
+            cctx->isFirstBlock = 0;
+            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
+                        (unsigned)cSize);
+    }   }
+
+    if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
+    return (size_t)(op-ostart);
+}
+
+
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
+{   BYTE* const op = (BYTE*)dst;
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
+    U32   const checksumFlag = params->fParams.checksumFlag>0;
+    U32   const windowSize = (U32)1 << params->cParams.windowLog;
+    U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+    BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+    U32   const fcsCode = params->fParams.contentSizeFlag ?
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
+    BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
+    size_t pos=0;
+
+    assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+    RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
+                    "dst buf is too small to fit worst-case frame header size.");
+    DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+
+    if (params->format == ZSTD_f_zstd1) {
+        MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+        pos = 4;
+    }
+    op[pos++] = frameHeaderDescriptionByte;
+    if (!singleSegment) op[pos++] = windowLogByte;
+    switch(dictIDSizeCode)
+    {
+        default:  assert(0); /* impossible */
+        case 0 : break;
+        case 1 : op[pos] = (BYTE)(dictID); pos++; break;
+        case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
+        case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
+    }
+    switch(fcsCode)
+    {
+        default:  assert(0); /* impossible */
+        case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
+        case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
+        case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
+        case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
+    }
+    return pos;
+}
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
+                    "dst buf is too small to write frame trailer empty block.");
+    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
+        MEM_writeLE24(dst, cBlockHeader24);
+        return ZSTD_blockHeaderSize;
+    }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+                    "wrong cctx stage");
+    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
+                    parameter_unsupported,
+                    "incompatible with ldm");
+    cctx->externSeqStore.seq = seq;
+    cctx->externSeqStore.size = nbSeq;
+    cctx->externSeqStore.capacity = nbSeq;
+    cctx->externSeqStore.pos = 0;
+    return 0;
+}
+
+
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                               U32 frame, U32 lastFrameChunk)
+{
+    ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+    size_t fhSize = 0;
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+                cctx->stage, (unsigned)srcSize);
+    RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
+                    "missing init (ZSTD_compressBegin)");
+
+    if (frame && (cctx->stage==ZSTDcs_init)) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
+                                       cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        assert(fhSize <= dstCapacity);
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
+
+    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+        ms->nextToUpdate = ms->window.dictLimit;
+    }
+    if (cctx->appliedParams.ldmParams.enableLdm) {
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+    }
+
+    if (!frame) {
+        /* overflow check and correction for block mode */
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams,
+            src, (BYTE const*)src + srcSize);
+    }
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+                             ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
+        FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
+        cctx->consumedSrcSize += srcSize;
+        cctx->producedCSize += (cSize + fhSize);
+        assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+        if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+            ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+            RETURN_ERROR_IF(
+                cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
+                srcSize_wrong,
+                "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+                (unsigned)cctx->pledgedSrcSizePlusOne-1,
+                (unsigned)cctx->consumedSrcSize);
+        }
+        return cSize + fhSize;
+    }
+}
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
+}
+
+
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+    ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+    assert(!ZSTD_checkCParams(cParams));
+    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
+}
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
+
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
+}
+
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                         ldmState_t* ls,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         const void* src, size_t srcSize,
+                                         ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+
+    ZSTD_window_update(&ms->window, src, srcSize);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+
+    if (params->ldmParams.enableLdm && ls != NULL) {
+        ZSTD_window_update(&ls->window, src, srcSize);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
+    /* Assert that we the ms params match the params we're being given */
+    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
+    if (srcSize <= HASH_READ_SIZE) return 0;
+
+    while (iend - ip > HASH_READ_SIZE) {
+        size_t const remaining = (size_t)(iend - ip);
+        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
+        const BYTE* const ichunk = ip + chunk;
+
+        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+
+        if (params->ldmParams.enableLdm && ls != NULL)
+            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+
+        switch(params->cParams.strategy)
+        {
+        case ZSTD_fast:
+            ZSTD_fillHashTable(ms, ichunk, dtlm);
+            break;
+        case ZSTD_dfast:
+            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
+            break;
+
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+            break;
+
+        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
+            break;
+
+        default:
+            assert(0);  /* not possible : not a valid strategy id */
+        }
+
+        ip = ichunk;
+    }
+
+    ms->nextToUpdate = (U32)(iend - ms->window.base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
+   that we may encounter during compression.
+   NOTE: This behavior is not standard and could be improved in the future. */
+static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
+    U32 s;
+    RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols");
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols");
+    }
+    return 0;
+}
+
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         short* offcodeNCount, unsigned* offcodeMaxValue,
+                         const void* const dict, size_t dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    dictPtr += 8;
+    bs->entropy.huf.repeatMode = HUF_repeat_check;
+
+    {   unsigned maxSymbolValue = 255;
+        unsigned hasZeroWeights = 1;
+        size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+            dictEnd-dictPtr, &hasZeroWeights);
+
+        /* We only set the loaded table as valid if it contains all non-zero
+         * weights. Otherwise, we set it to check */
+        if (!hasZeroWeights)
+            bs->entropy.huf.repeatMode = HUF_repeat_valid;
+
+        RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
+        dictPtr += hufHeaderSize;
+    }
+
+    {   unsigned offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        /* fill all offset symbols to avoid garbage at end of table */
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.offcodeCTable,
+                offcodeNCount, MaxOff, offcodeLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        /* Every match length code must have non-zero probability */
+        FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.matchlengthCTable,
+                matchlengthNCount, matchlengthMaxValue, matchlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        /* Every literal length code must have non-zero probability */
+        FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.litlengthCTable,
+                litlengthNCount, litlengthMaxValue, litlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    bs->rep[0] = MEM_readLE32(dictPtr+0);
+    bs->rep[1] = MEM_readLE32(dictPtr+4);
+    bs->rep[2] = MEM_readLE32(dictPtr+8);
+    dictPtr += 12;
+
+    return dictPtr - (const BYTE*)dict;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : dictID, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed >= 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                      ZSTD_matchState_t* ms,
+                                      ZSTD_cwksp* ws,
+                                      ZSTD_CCtx_params const* params,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictTableLoadMethod_e dtlm,
+                                      void* workspace)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    size_t dictID;
+    size_t eSize;
+
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(dictSize >= 8);
+    assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+
+    dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
+    eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
+    FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
+    dictPtr += eSize;
+
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable */
+        FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), "");
+        /* All repCodes must be <= dictContentSize and != 0*/
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+                RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
+        }   }
+
+        bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
+        bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
+        bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
+        FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
+        return dictID;
+    }
+}
+
+/** ZSTD_compress_insertDictionary() :
+*   @return : dictID, or an error code */
+static size_t
+ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+                               ZSTD_matchState_t* ms,
+                               ldmState_t* ls,
+                               ZSTD_cwksp* ws,
+                         const ZSTD_CCtx_params* params,
+                         const void* dict, size_t dictSize,
+                               ZSTD_dictContentType_e dictContentType,
+                               ZSTD_dictTableLoadMethod_e dtlm,
+                               void* workspace)
+{
+    DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+    if ((dict==NULL) || (dictSize<8)) {
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        return 0;
+    }
+
+    ZSTD_reset_compressedBlockState(bs);
+
+    /* dict restricted modes */
+    if (dictContentType == ZSTD_dct_rawContent)
+        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
+
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictContentType == ZSTD_dct_auto) {
+            DEBUGLOG(4, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(
+                ms, ls, ws, params, dict, dictSize, dtlm);
+        }
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        assert(0);   /* impossible */
+    }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(
+        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
+}
+
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
+
+/*! ZSTD_compressBegin_internal() :
+ * @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
+                                    ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if ( (cdict)
+      && (cdict->dictContentSize > 0)
+      && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+        || cdict->compressionLevel == 0)
+      && (params->attachDictPref != ZSTD_dictForceLoad) ) {
+        return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
+    }
+
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+                                     ZSTDcrp_makeClean, zbuff) , "");
+    {   size_t const dictID = cdict ?
+                ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+                        cdict->dictContentSize, dictContentType, dtlm,
+                        cctx->entropyWorkspace)
+              : ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+                        dictContentType, dtlm, cctx->entropyWorkspace);
+        FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+        assert(dictID <= UINT_MAX);
+        cctx->dictID = (U32)dictID;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
+    /* compression parameters verification and optimization */
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
+    return ZSTD_compressBegin_internal(cctx,
+                                       dict, dictSize, dictContentType, dtlm,
+                                       cdict,
+                                       params, pledgedSrcSize,
+                                       ZSTDb_not_buffered);
+}
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    ZSTD_CCtx_params const cctxParams =
+            ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+    return ZSTD_compressBegin_advanced_internal(cctx,
+                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                            NULL /*cdict*/,
+                                            &cctxParams, pledgedSrcSize);
+}
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    ZSTD_CCtx_params const cctxParams =
+            ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+    DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                       &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+}
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
+{
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+}
+
+
+/*! ZSTD_writeEpilogue() :
+*   Ends a frame.
+*   @return : nb of bytes written into dst (or an error code) */
+static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    size_t fhSize = 0;
+
+    DEBUGLOG(4, "ZSTD_writeEpilogue");
+    RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
+
+    /* special case : empty frame */
+    if (cctx->stage == ZSTDcs_init) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        dstCapacity -= fhSize;
+        op += fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (cctx->stage != ZSTDcs_ending) {
+        /* write one last empty block, make it the "last" block */
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+    }
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32(op, checksum);
+        op += 4;
+    }
+
+    cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
+    return op-ostart;
+}
+
+size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize)
+{
+    size_t endResult;
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
+    endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
+    FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
+    assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+    if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+        ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+        DEBUGLOG(4, "end of frame : controlling src size");
+        RETURN_ERROR_IF(
+            cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
+            srcSize_wrong,
+             "error : pledgedSrcSize = %u, while realSrcSize = %u",
+            (unsigned)cctx->pledgedSrcSizePlusOne-1,
+            (unsigned)cctx->consumedSrcSize);
+    }
+    return cSize + endResult;
+}
+
+
+static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
+                                      void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const void* dict,size_t dictSize,
+                                const ZSTD_parameters* params)
+{
+    ZSTD_CCtx_params const cctxParams =
+            ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
+    DEBUGLOG(4, "ZSTD_compress_internal");
+    return ZSTD_compress_advanced_internal(cctx,
+                                           dst, dstCapacity,
+                                           src, srcSize,
+                                           dict, dictSize,
+                                           &cctxParams);
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced");
+    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+    return ZSTD_compress_internal(cctx,
+                                  dst, dstCapacity,
+                                  src, srcSize,
+                                  dict, dictSize,
+                                  &params);
+}
+
+/* Internal */
+size_t ZSTD_compress_advanced_internal(
+        ZSTD_CCtx* cctx,
+        void* dst, size_t dstCapacity,
+        const void* src, size_t srcSize,
+        const void* dict,size_t dictSize,
+        const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
+    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                         dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                         params, srcSize, ZSTDb_not_buffered) , "");
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict, size_t dictSize,
+                               int compressionLevel)
+{
+    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
+    ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+    DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
+    assert(params.fParams.contentSizeFlag == 1);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+}
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize,
+                         int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
+    assert(cctx != NULL);
+    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity,
+               const void* src, size_t srcSize,
+                     int compressionLevel)
+{
+    size_t result;
+    ZSTD_CCtx ctxBody;
+    ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
+    result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
+    ZSTD_freeCCtxContent(&ctxBody);   /* can't free ctxBody itself, as it's on stack; free only heap content */
+    return result;
+}
+
+
+/* =====  Dictionary API  ===== */
+
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(
+        size_t dictSize, ZSTD_compressionParameters cParams,
+        ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
+    return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+         + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+            : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
+}
+
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support sizeof on NULL */
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
+    /* cdict may be in the workspace */
+    return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+        + ZSTD_cwksp_sizeof(&cdict->workspace);
+}
+
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    ZSTD_dictLoadMethod_e dictLoadMethod,
+                    ZSTD_dictContentType_e dictContentType,
+                    ZSTD_compressionParameters cParams)
+{
+    DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
+    assert(!ZSTD_checkCParams(cParams));
+    cdict->matchState.cParams = cParams;
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictContent = dictBuffer;
+    } else {
+         void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
+        RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
+        cdict->dictContent = internalBuffer;
+        memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+
+    cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
+    /* Reset the state to no dictionary */
+    ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+    FORWARD_IF_ERROR(ZSTD_reset_matchState(
+        &cdict->matchState,
+        &cdict->workspace,
+        &cParams,
+        ZSTDcrp_makeClean,
+        ZSTDirp_reset,
+        ZSTD_resetTarget_CDict), "");
+    /* (Maybe) load the dictionary
+     * Skips loading the dictionary if it is < 8 bytes.
+     */
+    {   ZSTD_CCtx_params params;
+        memset(&params, 0, sizeof(params));
+        params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+        params.fParams.contentSizeFlag = 1;
+        params.cParams = cParams;
+        {   size_t const dictID = ZSTD_compress_insertDictionary(
+                    &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+                    &params, cdict->dictContent, cdict->dictContentSize,
+                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
+            FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+            assert(dictID <= (size_t)(U32)-1);
+            cdict->dictID = (U32)dictID;
+        }
+    }
+
+    return 0;
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+{
+    DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    {   size_t const workspaceSize =
+            ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+            ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            (dictLoadMethod == ZSTD_dlm_byRef ? 0
+             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
+        void* const workspace = ZSTD_malloc(workspaceSize, customMem);
+        ZSTD_cwksp ws;
+        ZSTD_CDict* cdict;
+
+        if (!workspace) {
+            ZSTD_free(workspace, customMem);
+            return NULL;
+        }
+
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        assert(cdict != NULL);
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+        cdict->customMem = customMem;
+        cdict->compressionLevel = 0; /* signals advanced API usage */
+
+        if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                        dictBuffer, dictSize,
+                                        dictLoadMethod, dictContentType,
+                                        cParams) )) {
+            ZSTD_freeCDict(cdict);
+            return NULL;
+        }
+
+        return cdict;
+    }
+}
+
+ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                                  ZSTD_dlm_byCopy, ZSTD_dct_auto,
+                                                  cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    return ZSTD_createCDict_advanced(dict, dictSize,
+                                     ZSTD_dlm_byRef, ZSTD_dct_auto,
+                                     cParams, ZSTD_defaultCMem);
+}
+
+size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = cdict->customMem;
+        int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+        ZSTD_cwksp_free(&cdict->workspace, cMem);
+        if (!cdictInWorkspace) {
+            ZSTD_free(cdict, cMem);
+        }
+        return 0;
+    }
+}
+
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+const ZSTD_CDict* ZSTD_initStaticCDict(
+                                 void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 ZSTD_dictLoadMethod_e dictLoadMethod,
+                                 ZSTD_dictContentType_e dictContentType,
+                                 ZSTD_compressionParameters cParams)
+{
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+                            + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+                               : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+                            + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+                            + matchStateSize;
+    ZSTD_CDict* cdict;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+
+    {
+        ZSTD_cwksp ws;
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        if (cdict == NULL) return NULL;
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+    }
+
+    DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              dictLoadMethod, dictContentType,
+                                              cParams) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+    assert(cdict != NULL);
+    return cdict->matchState.cParams;
+}
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
+    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
+    {   ZSTD_CCtx_params params = cctx->requestedParams;
+        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+                        || cdict->compressionLevel == 0 )
+                      && (params.attachDictPref != ZSTD_dictForceLoad) ?
+                ZSTD_getCParamsFromCDict(cdict)
+              : ZSTD_getCParams(cdict->compressionLevel,
+                                pledgedSrcSize,
+                                cdict->dictContentSize);
+        /* Increase window log to fit the entire dictionary and source if the
+         * source size is known. Limit the increase to 19, which is the
+         * window log for compression level 1 with the largest source size.
+         */
+        if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+            U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+            U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+            params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
+        }
+        params.fParams = fParams;
+        return ZSTD_compressBegin_internal(cctx,
+                                           NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                           cdict,
+                                           &params, pledgedSrcSize,
+                                           ZSTDb_not_buffered);
+    }
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
+    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
+
+
+/* ******************************************************************
+*  Streaming
+********************************************************************/
+
+ZSTD_CStream* ZSTD_createCStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createCStream");
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
+}
+
+ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
+{
+    return ZSTD_freeCCtx(zcs);   /* same object */
+}
+
+
+
+/*======   Initialization   ======*/
+
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_CStreamOutSize(void)
+{
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
+                    const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
+                    const ZSTD_CDict* const cdict,
+                    ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_resetCStream_internal");
+    /* Finalize the compression parameters */
+    params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+
+    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                                         dict, dictSize, dictContentType, ZSTD_dtlm_fast,
+                                         cdict,
+                                         &params, pledgedSrcSize,
+                                         ZSTDb_buffered) , "");
+
+    cctx->inToCompress = 0;
+    cctx->inBuffPos = 0;
+    cctx->inBuffTarget = cctx->blockSize
+                      + (cctx->blockSize == pledgedSrcSize);   /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
+    cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+    cctx->streamStage = zcss_load;
+    cctx->frameEnded = 0;
+    return 0;   /* ready to go */
+}
+
+/* ZSTD_resetCStream():
+ * pledgedSrcSize == 0 means "unknown" */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : for lib/compress only. Used by zstdmt_compress.c.
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    const ZSTD_CCtx_params* params,
+                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_internal");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    zcs->requestedParams = *params;
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if (dict) {
+        FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    } else {
+        /* Dictionary is cleared if !cdict */
+        FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    }
+    return 0;
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    zcs->requestedParams.fParams = fParams;
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+
+/* ZSTD_initCStream_advanced() :
+ * pledgedSrcSize must be exact.
+ * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                                 const void* dict, size_t dictSize,
+                                 ZSTD_parameters params, unsigned long long pss)
+{
+    /* for compatibility with older programs relying on this behavior.
+     * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
+     * This line will be removed in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, &params);
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_srcSize");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    return 0;
+}
+
+/*======   Compression   ======*/
+
+static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
+{
+    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
+    if (hintInSize==0) hintInSize = cctx->blockSize;
+    return hintInSize;
+}
+
+/** ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants
+ *  non-static, because can be called from zstdmt_compress.c
+ * @return : hint size for next input */
+static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective const flushMode)
+{
+    const char* const istart = (const char*)input->src;
+    const char* const iend = input->size != 0 ? istart + input->size : istart;
+    const char* ip = input->pos != 0 ? istart + input->pos : istart;
+    char* const ostart = (char*)output->dst;
+    char* const oend = output->size != 0 ? ostart + output->size : ostart;
+    char* op = output->pos != 0 ? ostart + output->pos : ostart;
+    U32 someMoreWork = 1;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
+    assert(zcs->inBuff != NULL);
+    assert(zcs->inBuffSize > 0);
+    assert(zcs->outBuff !=  NULL);
+    assert(zcs->outBuffSize > 0);
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
+
+    while (someMoreWork) {
+        switch(zcs->streamStage)
+        {
+        case zcss_init:
+            RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
+
+        case zcss_load:
+            if ( (flushMode == ZSTD_e_end)
+              && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip))  /* enough dstCapacity */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer */
+            {   size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
+                zcs->inBuffPos += loaded;
+                if (loaded != 0)
+                    ip += loaded;
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
+            /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
+            {   void* cDst;
+                size_t cSize;
+                size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
+                size_t oSize = oend-op;
+                unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                if (oSize >= ZSTD_compressBound(iSize))
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
+                else
+                    cDst = zcs->outBuff, oSize = zcs->outBuffSize;
+                cSize = lastBlock ?
+                        ZSTD_compressEnd(zcs, cDst, oSize,
+                                    zcs->inBuff + zcs->inToCompress, iSize) :
+                        ZSTD_compressContinue(zcs, cDst, oSize,
+                                    zcs->inBuff + zcs->inToCompress, iSize);
+                FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                zcs->frameEnded = lastBlock;
+                /* prepare next block */
+                zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+                if (zcs->inBuffTarget > zcs->inBuffSize)
+                    zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                         (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
+                if (!lastBlock)
+                    assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                zcs->inToCompress = zcs->inBuffPos;
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    }
+                    break;
+                }
+                zcs->outBuffContentSize = cSize;
+                zcs->outBuffFlushedSize = 0;
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
+            }
+	    /* fall-through */
+        case zcss_flush:
+            DEBUGLOG(5, "flush stage");
+            {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
+                if (flushed)
+                    op += flushed;
+                zcs->outBuffFlushedSize += flushed;
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
+                zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
+                break;
+            }
+
+        default: /* impossible */
+            assert(0);
+        }
+    }
+
+    input->pos = ip - istart;
+    output->pos = op - ostart;
+    if (zcs->frameEnded) return 0;
+    return ZSTD_nextInputSizeHint(zcs);
+}
+
+static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers >= 1) {
+        assert(cctx->mtctx != NULL);
+        return ZSTDMT_nextInputSizeHint(cctx->mtctx);
+    }
+#endif
+    return ZSTD_nextInputSizeHint(cctx);
+
+}
+
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
+    return ZSTD_nextInputSizeHint_MTorST(zcs);
+}
+
+
+size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                             ZSTD_outBuffer* output,
+                             ZSTD_inBuffer* input,
+                             ZSTD_EndDirective endOp)
+{
+    DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
+    /* check conditions */
+    RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer");
+    RETURN_ERROR_IF(input->pos  > input->size, GENERIC, "invalid buffer");
+    assert(cctx!=NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        ZSTD_CCtx_params params = cctx->requestedParams;
+        ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+        FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+        memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
+        assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
+        DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+        if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1;  /* auto-fix pledgedSrcSize */
+        params.cParams = ZSTD_getCParamsFromCCtxParams(
+                &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
+
+
+#ifdef ZSTD_MULTITHREAD
+        if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
+            params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
+        }
+        if (params.nbWorkers > 0) {
+            /* mt context creation */
+            if (cctx->mtctx == NULL) {
+                DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
+                            params.nbWorkers);
+                cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
+                RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
+            }
+            /* mt compression */
+            DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
+            FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
+                        cctx->mtctx,
+                        prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+                        cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
+            cctx->streamStage = zcss_load;
+            cctx->appliedParams.nbWorkers = params.nbWorkers;
+        } else
+#endif
+        {   FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
+                            prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+                            cctx->cdict,
+                            params, cctx->pledgedSrcSizePlusOne-1) , "");
+            assert(cctx->streamStage == zcss_load);
+            assert(cctx->appliedParams.nbWorkers == 0);
+    }   }
+    /* end of transparent initialization stage */
+
+    /* compression stage */
+#ifdef ZSTD_MULTITHREAD
+    if (cctx->appliedParams.nbWorkers > 0) {
+        int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
+        size_t flushMin;
+        assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */);
+        if (cctx->cParamsChanged) {
+            ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
+            cctx->cParamsChanged = 0;
+        }
+        do {
+            flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+            if ( ZSTD_isError(flushMin)
+              || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+                ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+            }
+            FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
+        } while (forceMaxProgress && flushMin != 0 && output->pos < output->size);
+        DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
+        /* Either we don't require maximum forward progress, we've finished the
+         * flush, or we are out of output space.
+         */
+        assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size);
+        return flushMin;
+    }
+#endif
+    FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
+    DEBUGLOG(5, "completed ZSTD_compressStream2");
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+
+size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
+    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+    {   size_t oPos = 0;
+        size_t iPos = 0;
+        size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
+                                        dst, dstCapacity, &oPos,
+                                        src, srcSize, &iPos,
+                                        ZSTD_e_end);
+        FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
+        if (result != 0) {  /* compression not completed, due to lack of output space */
+            assert(oPos == dstCapacity);
+            RETURN_ERROR(dstSize_tooSmall, "");
+        }
+        assert(iPos == srcSize);   /* all input is expected consumed */
+        return oPos;
+    }
+}
+
+/*======   Finalize   ======*/
+
+/*! ZSTD_flushStream() :
+ * @return : amount of data remaining to flush */
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
+}
+
+
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
+    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
+    if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
+    /* single thread mode : attempt to calculate remaining to flush more precisely */
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
+        size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
+        DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
+        return toFlush;
+    }
+}
+
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_MAX_CLEVEL     22
+int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" - for any srcSize > 256 KB */
+    /* W,  C,  H,  S,  L, TL, strat */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
+    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
+    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
+    { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
+    { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
+    { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
+    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
+    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
+    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
+    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
+    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
+    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
+    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
+    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
+    { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
+    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
+    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
+    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
+    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
+    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
+    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
+    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
+    { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
+    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
+    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
+    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
+    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
+    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
+    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+};
+
+/*! ZSTD_getCParams_internal() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
+ *        Use dictSize == 0 for unknown or unused. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+    int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+    size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+    U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
+    int row = compressionLevel;
+    DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
+    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
+    if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
+    if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel);   /* acceleration factor */
+        /* refine parameters based on srcSize & dictSize */
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
+    }
+}
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
+    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
+    memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    return params;
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
+}
+/**** ended inlining compress/zstd_compress.c ****/
+/**** start inlining compress/zstd_double_fast.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_double_fast.h ****/
+
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashLarge = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash tables.
+     * Insert the other positions into the large hash table if their entry
+     * is empty.
+     */
+    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+        U32 const current = (U32)(ip - base);
+        U32 i;
+        for (i = 0; i < fastHashFillStep; ++i) {
+            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+            if (i == 0)
+                hashSmall[smHash] = current + i;
+            if (i == 0 || hashLarge[lgHash] == 0)
+                hashLarge[lgHash] = current + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
+    }   }
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_doubleFast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    const U32 hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    const U32 hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams =
+                                     dictMode == ZSTD_dictMatchState ?
+                                     &dms->cParams : NULL;
+    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
+                                     dms->hashTable : NULL;
+    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
+                                     dms->chainTable : NULL;
+    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.base : NULL;
+    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
+                                     dictBase + dictStartIndex : NULL;
+    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->hashLog : hBitsL;
+    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->chainLog : hBitsS;
+    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
+    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+
+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+    }
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const current = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
+        U32 const maxRep = current - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+    if (dictMode == ZSTD_dictMatchState) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        U32 offset;
+        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
+        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
+        U32 const current = (U32)(ip-base);
+        U32 const matchIndexL = hashLong[h2];
+        U32 matchIndexS = hashSmall[h];
+        const BYTE* matchLong = base + matchIndexL;
+        const BYTE* match = base + matchIndexS;
+        const U32 repIndex = current + 1 - offset_1;
+        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+                            && repIndex < prefixLowestIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
+
+        /* check dictMatchState repcode */
+        if (dictMode == ZSTD_dictMatchState
+            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        /* check noDict repcode */
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        if (matchIndexL > prefixLowestIndex) {
+            /* check prefix long match */
+            if (MEM_read64(matchLong) == MEM_read64(ip)) {
+                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+                offset = (U32)(ip-matchLong);
+                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+                goto _match_found;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState long match */
+            U32 const dictMatchIndexL = dictHashLong[dictHL];
+            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
+            assert(dictMatchL < dictEnd);
+
+            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
+                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
+                offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
+                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
+                goto _match_found;
+        }   }
+
+        if (matchIndexS > prefixLowestIndex) {
+            /* check prefix short match */
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState short match */
+            U32 const dictMatchIndexS = dictHashSmall[dictHS];
+            match = dictBase + dictMatchIndexS;
+            matchIndexS = dictMatchIndexS + dictIndexDelta;
+
+            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+        }   }
+
+        ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
+        continue;
+
+_search_next_long:
+
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
+            U32 const matchIndexL3 = hashLong[hl3];
+            const BYTE* matchL3 = base + matchIndexL3;
+            hashLong[hl3] = current + 1;
+
+            /* check prefix long +1 match */
+            if (matchIndexL3 > prefixLowestIndex) {
+                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                    ip++;
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+                }
+            } else if (dictMode == ZSTD_dictMatchState) {
+                /* check dict long +1 match */
+                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
+                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
+                assert(dictMatchL3 < dictEnd);
+                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
+                    ip++;
+                    offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
+                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+        }   }   }
+
+        /* if no long +1 match, explore the short match we found */
+        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
+            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
+            offset = (U32)(current - matchIndexS);
+            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        } else {
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            offset = (U32)(ip - match);
+            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        }
+
+        /* fall-through */
+
+_match_found:
+        offset_2 = offset_1;
+        offset_1 = offset;
+
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+_match_stored:
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            if (dictMode == ZSTD_dictMatchState) {
+                while (ip <= ilimit) {
+                    U32 const current2 = (U32)(ip-base);
+                    U32 const repIndex2 = current2 - offset_2;
+                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
+                        && repIndex2 < prefixLowestIndex ?
+                            dictBase + repIndex2 - dictIndexDelta :
+                            base + repIndex2;
+                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                        ip += repLength2;
+                        anchor = ip;
+                        continue;
+                    }
+                    break;
+            }   }
+
+            if (dictMode == ZSTD_noDict) {
+                while ( (ip <= ilimit)
+                     && ( (offset_2>0)
+                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
+                    ip += rLength;
+                    anchor = ip;
+                    continue;   /* faster when present ... (?) */
+        }   }   }
+    }   /* while (ip < ilimit) */
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
+    }
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
+
+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+        const U32 matchIndex = hashSmall[hSmall];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+
+        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+        const U32 matchLongIndex = hashLong[hLong];
+        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* matchLong = matchLongBase + matchLongIndex;
+
+        const U32 current = (U32)(ip-base);
+        const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        size_t mLength;
+        hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */
+
+        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
+            & (repIndex > dictStartIndex))
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else {
+            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
+                offset = current - matchLongIndex;
+                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndex3 = hashLong[h3];
+                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
+                const BYTE* match3 = match3Base + matchIndex3;
+                U32 offset;
+                hashLong[h3] = current + 1;
+                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
+                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
+                    ip++;
+                    offset = current+1 - matchIndex3;
+                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                } else {
+                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                    offset = current - matchIndex;
+                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                }
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else {
+                ip += ((ip-anchor) >> kSearchStrength) + 1;
+                continue;
+        }   }
+
+        /* move to next sequence start */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
+                    & (repIndex2 > dictStartIndex))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+/**** ended inlining compress/zstd_double_fast.c ****/
+/**** start inlining compress/zstd_fast.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_fast.h ****/
+
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hBits = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash table.
+     * Insert the other positions if their hash entry is empty.
+     */
+    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
+        U32 const current = (U32)(ip - base);
+        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
+        hashTable[hash0] = current;
+        if (dtlm == ZSTD_dtlm_fast) continue;
+        /* Only load extra positions for ZSTD_dtlm_full */
+        {   U32 p;
+            for (p = 1; p < fastHashFillStep; ++p) {
+                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
+                if (hashTable[hash] == 0) {  /* not yet filled */
+                    hashTable[hash] = current + p;
+    }   }   }   }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
+    ip0 += (ip0 == prefixStart);
+    ip1 = ip0 + 1;
+    {   U32 const current = (U32)(ip0 - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
+        U32 const maxRep = current - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+#ifdef __INTEL_COMPILER
+    /* From intel 'The vector pragma indicates that the loop should be
+     * vectorized if it is legal to do so'. Can be used together with
+     * #pragma ivdep (but have opted to exclude that because intel
+     * warns against using it).*/
+    #pragma vector always
+#endif
+    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
+        size_t mLength;
+        BYTE const* ip2 = ip0 + 2;
+        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
+        U32 const val0 = MEM_read32(ip0);
+        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
+        U32 const val1 = MEM_read32(ip1);
+        U32 const current0 = (U32)(ip0-base);
+        U32 const current1 = (U32)(ip1-base);
+        U32 const matchIndex0 = hashTable[h0];
+        U32 const matchIndex1 = hashTable[h1];
+        BYTE const* repMatch = ip2 - offset_1;
+        const BYTE* match0 = base + matchIndex0;
+        const BYTE* match1 = base + matchIndex1;
+        U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
+        hashTable[h0] = current0;   /* update hash table */
+        hashTable[h1] = current1;   /* update hash table */
+
+        assert(ip0 + 1 == ip1);
+
+        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
+            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
+            ip0 = ip2 - mLength;
+            match0 = repMatch - mLength;
+            mLength += 4;
+            offcode = 0;
+            goto _match;
+        }
+        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
+            /* found a regular match */
+            goto _offset;
+        }
+        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
+            /* found a regular match after one literal */
+            ip0 = ip1;
+            match0 = match1;
+            goto _offset;
+        }
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+            assert(step >= 2);
+            ip0 += step;
+            ip1 += step;
+            continue;
+        }
+_offset: /* Requires: ip0, match0 */
+        /* Compute the offset code */
+        offset_2 = offset_1;
+        offset_1 = (U32)(ip0-match0);
+        offcode = offset_1 + ZSTD_REP_MOVE;
+        mLength = 4;
+        /* Count the backwards match length */
+        while (((ip0>anchor) & (match0>prefixStart))
+             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
+
+_match: /* Requires: ip0, match0, offcode */
+        /* Count the forward length */
+        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
+        /* match found */
+        ip0 += mLength;
+        anchor = ip0;
+
+        if (ip0 <= ilimit) {
+            /* Fill Table */
+            assert(base+current0+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                    ip0 += rLength;
+                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                    anchor = ip0;
+                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+        }   }   }
+        ip1 = ip0 + 1;
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState == NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   prefixStartIndex = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+    const U32* const dictHashTable = dms->hashTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
+    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
+    const U32 dictHLog             = dictCParams->hashLog;
+
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no no underflow
+     * when translating a dict index into a local index */
+    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
+    ip += (dictAndPrefixLength == 0);
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
+        U32 const current = (U32)(ip-base);
+        U32 const matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 repIndex = current + 1 - offset_1;
+        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashTable[h] = current;   /* update hash table */
+
+        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else if ( (matchIndex <= prefixStartIndex) ) {
+            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+            U32 const dictMatchIndex = dictHashTable[dictHash];
+            const BYTE* dictMatch = dictBase + dictMatchIndex;
+            if (dictMatchIndex <= dictStartIndex ||
+                MEM_read32(dictMatch) != MEM_read32(ip)) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            } else {
+                /* found a dict match */
+                U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
+                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+                while (((ip>anchor) & (dictMatch>dictStart))
+                     && (ip[-1] == dictMatch[-1])) {
+                    ip--; dictMatch--; mLength++;
+                } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            }
+        } else if (MEM_read32(match) != MEM_read32(ip)) {
+            /* it's not a match, and we're not going to check the dictionary */
+            assert(stepSize >= 1);
+            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+            continue;
+        } else {
+            /* found a regular match */
+            U32 const offset = (U32)(ip-match);
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            while (((ip>anchor) & (match>prefixStart))
+                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }
+
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            assert(base+current+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                        dictBase - dictIndexDelta + repIndex2 :
+                        base + repIndex2;
+                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState != NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
+
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
+        const U32    matchIndex = hashTable[h];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE*  match = matchBase + matchIndex;
+        const U32    current = (U32)(ip-base);
+        const U32    repIndex = current + 1 - offset_1;
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        hashTable[h] = current;   /* update hash table */
+        DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
+        assert(offset_1 <= current +1);   /* check repIndex */
+
+        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
+        } else {
+            if ( (matchIndex < dictStartIndex) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            }
+            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 const offset = current - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
+        }   }
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+/**** ended inlining compress/zstd_fast.c ****/
+/**** start inlining compress/zstd_lazy.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: zstd_lazy.h ****/
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+
+static void
+ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+                const BYTE* ip, const BYTE* iend,
+                U32 mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hashLog = cParams->hashLog;
+
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    if (idx != target)
+        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+                    idx, target, ms->window.dictLimit);
+    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
+    (void)iend;
+
+    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
+    for ( ; idx < target ; idx++) {
+        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
+        U32    const matchIndex = hashTable[h];
+
+        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
+        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
+
+        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+        hashTable[h] = idx;   /* Update Hash Table */
+        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
+        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+    }
+    ms->nextToUpdate = target;
+}
+
+
+/** ZSTD_insertDUBT1() :
+ *  sort one already inserted but unsorted position
+ *  assumption : current >= btlow == (current - btmask)
+ *  doesn't fail */
+static void
+ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
+                 U32 current, const BYTE* inputEnd,
+                 U32 nbCompares, U32 btLow,
+                 const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
+    const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+
+
+    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+                current, dictLimit, windowLow);
+    assert(current >= btLow);
+    assert(ip < iend);   /* condition for ZSTD_count */
+
+    while (nbCompares-- && (matchIndex > windowLow)) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < current);
+        /* note : all candidates are now supposed sorted,
+         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
+         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
+
+        if ( (dictMode != ZSTD_extDict)
+          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
+          || (current < dictLimit) /* both in extDict */) {
+            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
+                                     || (matchIndex+matchLength >= dictLimit)) ?
+                                        base : dictBase;
+            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
+                 || (current < dictLimit) );
+            match = mBase + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
+        }
+
+        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+                    current, matchIndex, (U32)matchLength);
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+                        matchIndex, btLow, nextPtr[1]);
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+                        matchIndex, btLow, nextPtr[0]);
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+}
+
+
+static size_t
+ZSTD_DUBT_findBetterDictMatch (
+        ZSTD_matchState_t* ms,
+        const BYTE* const ip, const BYTE* const iend,
+        size_t* offsetPtr,
+        size_t bestLength,
+        U32 nbCompares,
+        U32 const mls,
+        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_matchState_t * const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
+    const U32 * const dictHashTable = dms->hashTable;
+    U32         const hashLog = dmsCParams->hashLog;
+    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32               dictMatchIndex = dictHashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    U32         const current = (U32)(ip-base);
+    const BYTE* const dictBase = dms->window.base;
+    const BYTE* const dictEnd = dms->window.nextSrc;
+    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
+    U32         const dictLowLimit = dms->window.lowLimit;
+    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
+
+    U32*        const dictBt = dms->chainTable;
+    U32         const btLog  = dmsCParams->chainLog - 1;
+    U32         const btMask = (1 << btLog) - 1;
+    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
+
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+
+    (void)dictMode;
+    assert(dictMode == ZSTD_dictMatchState);
+
+    while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
+        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match = dictBase + dictMatchIndex;
+        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+        if (dictMatchIndex+matchLength >= dictHighLimit)
+            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+        if (matchLength > bestLength) {
+            U32 matchIndex = dictMatchIndex + dictIndexDelta;
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
+                    current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+            }
+            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthLarger = matchLength;
+            dictMatchIndex = nextPtr[0];
+        }
+    }
+
+    if (bestLength >= MINMATCH) {
+        U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                    current, (U32)bestLength, (U32)*offsetPtr, mIndex);
+    }
+    return bestLength;
+
+}
+
+
+static size_t
+ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 const mls,
+                        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32          matchIndex  = hashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    U32    const current = (U32)(ip-base);
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
+
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32    const btLow = (btMask >= current) ? 0 : current - btMask;
+    U32    const unsortLimit = MAX(btLow, windowLow);
+
+    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
+    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+    U32          nbCompares = 1U << cParams->searchLog;
+    U32          nbCandidates = nbCompares;
+    U32          previousCandidate = 0;
+
+    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
+    assert(ip <= iend-8);   /* required for h calculation */
+
+    /* reach end of unsorted candidates list */
+    while ( (matchIndex > unsortLimit)
+         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+         && (nbCandidates > 1) ) {
+        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+                    matchIndex);
+        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
+        previousCandidate = matchIndex;
+        matchIndex = *nextCandidate;
+        nextCandidate = bt + 2*(matchIndex&btMask);
+        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+        nbCandidates --;
+    }
+
+    /* nullify last candidate if it's still unsorted
+     * simplification, detrimental to compression ratio, beneficial for speed */
+    if ( (matchIndex > unsortLimit)
+      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+                    matchIndex);
+        *nextCandidate = *unsortedMark = 0;
+    }
+
+    /* batch sort stacked candidates */
+    matchIndex = previousCandidate;
+    while (matchIndex) {  /* will end on matchIndex == 0 */
+        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+        ZSTD_insertDUBT1(ms, matchIndex, iend,
+                         nbCandidates, unsortLimit, dictMode);
+        matchIndex = nextCandidateIdx;
+        nbCandidates++;
+    }
+
+    /* find longest match */
+    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+        const BYTE* const dictBase = ms->window.dictBase;
+        const U32 dictLimit = ms->window.dictLimit;
+        const BYTE* const dictEnd = dictBase + dictLimit;
+        const BYTE* const prefixStart = base + dictLimit;
+        U32* smallerPtr = bt + 2*(current&btMask);
+        U32* largerPtr  = bt + 2*(current&btMask) + 1;
+        U32 matchEndIdx = current + 8 + 1;
+        U32 dummy32;   /* to be nullified at the end */
+        size_t bestLength = 0;
+
+        matchIndex  = hashTable[h];
+        hashTable[h] = current;   /* Update Hash Table */
+
+        while (nbCompares-- && (matchIndex > windowLow)) {
+            U32* const nextPtr = bt + 2*(matchIndex & btMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match;
+
+            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
+                match = base + matchIndex;
+                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+            } else {
+                match = dictBase + matchIndex;
+                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+                if (matchIndex+matchLength >= dictLimit)
+                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+            }
+
+            if (matchLength > bestLength) {
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+                    if (dictMode == ZSTD_dictMatchState) {
+                        nbCompares = 0; /* in addition to avoiding checking any
+                                         * further in this loop, make sure we
+                                         * skip checking in the dictionary. */
+                    }
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (match[matchLength] < ip[matchLength]) {
+                /* match is smaller than current */
+                *smallerPtr = matchIndex;             /* update smaller idx */
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                *largerPtr = matchIndex;
+                commonLengthLarger = matchLength;
+                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                largerPtr = nextPtr;
+                matchIndex = nextPtr[0];
+        }   }
+
+        *smallerPtr = *largerPtr = 0;
+
+        if (dictMode == ZSTD_dictMatchState && nbCompares) {
+            bestLength = ZSTD_DUBT_findBetterDictMatch(
+                    ms, ip, iend,
+                    offsetPtr, bestLength, nbCompares,
+                    mls, dictMode);
+        }
+
+        assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
+        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
+        if (bestLength >= MINMATCH) {
+            U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                        current, (U32)bestLength, (U32)*offsetPtr, mIndex);
+        }
+        return bestLength;
+    }
+}
+
+
+/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iLimit,
+                      size_t* offsetPtr,
+                const U32 mls /* template */,
+                const ZSTD_dictMode_e dictMode)
+{
+    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateDUBT(ms, ip, iLimit, mls);
+    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
+}
+
+
+static size_t
+ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
+                            const BYTE* ip, const BYTE* const iLimit,
+                                  size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
+
+
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainSize = (1 << cParams->chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 current = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 minChain = current > chainSize ? current - chainSize : 0;
+    U32 nbAttempts = 1U << cParams->searchLog;
+    size_t ml=4-1;
+
+    /* HC4 match finder */
+    U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+
+    for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+        size_t currentMl=0;
+        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            assert(match+4 <= dictEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    if (dictMode == ZSTD_dictMatchState) {
+        const ZSTD_matchState_t* const dms = ms->dictMatchState;
+        const U32* const dmsChainTable = dms->chainTable;
+        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
+        const U32 dmsChainMask         = dmsChainSize - 1;
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
+
+        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
+
+        for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
+            size_t currentMl=0;
+            const BYTE* const match = dmsBase + matchIndex;
+            assert(match+4 <= dmsEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+
+            if (matchIndex <= dmsMinChain) break;
+            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
+        }
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth,
+                        ZSTD_dictMode_e const dictMode)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+    searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
+        (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+                                         : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
+        (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
+                                         : ZSTD_HcFindBestMatch_selectMLS);
+    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.base : NULL;
+    const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
+                                     dictBase + dictLowestIndex : NULL;
+    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const current = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
+        U32 const maxRep = current - windowLow;
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }
+    if (dictMode == ZSTD_dictMatchState) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (dictMode == ZSTD_dictMatchState) {
+            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
+            const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+                                && repIndex < prefixLowestIndex) ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                if (depth==0) goto _storeSequence;
+            }
+        }
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            if (depth==0) goto _storeSequence;
+        }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            if ( (dictMode == ZSTD_noDict)
+              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                if ((mlRep >= 4) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            if (dictMode == ZSTD_dictMatchState) {
+                const U32 repIndex = (U32)(ip - base) - offset_1;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                    int const gain2 = (int)(mlRep * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+            }
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                if ( (dictMode == ZSTD_noDict)
+                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                    int const gain2 = (int)(mlRep * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+                if (dictMode == ZSTD_dictMatchState) {
+                    const U32 repIndex = (U32)(ip - base) - offset_1;
+                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                        int const gain2 = (int)(mlRep * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((mlRep >= 4) && (gain2 > gain1))
+                            matchLength = mlRep, offset = 0, start = ip;
+                    }
+                }
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
+        /* catch up */
+        if (offset) {
+            if (dictMode == ZSTD_noDict) {
+                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
+                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                    { start--; matchLength++; }
+            }
+            if (dictMode == ZSTD_dictMatchState) {
+                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
+                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
+                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            }
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        if (dictMode == ZSTD_dictMatchState) {
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex = current2 - offset_2;
+                const BYTE* repMatch = dictMode == ZSTD_dictMatchState
+                    && repIndex < prefixLowestIndex ?
+                        dictBase - dictIndexDelta + repIndex :
+                        base + repIndex;
+                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
+                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    ip += matchLength;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        if (dictMode == ZSTD_noDict) {
+            while ( ((ip <= ilimit) & (offset_2>0))
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                /* store sequence */
+                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1 ? offset_1 : savedOffset;
+    rep[1] = offset_2 ? offset_2 : savedOffset;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
+    const U32 windowLog = ms->cParams.windowLog;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+    U32 offset_1 = rep[0], offset_2 = rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+
+    /* init */
+    ip += (ip == prefixStart);
+
+    /* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 current = (U32)(ip-base);
+
+        /* check repCode */
+        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
+            const U32 repIndex = (U32)(current+1 - offset_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                if (depth==0) goto _storeSequence;
+        }   }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+         if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            current++;
+            /* check repCode */
+            if (offset) {
+                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
+                const U32 repIndex = (U32)(current - offset_1);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((repLength >= 4) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+            }   }
+
+            /* search match, depth 1 */
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                current++;
+                /* check repCode */
+                if (offset) {
+                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
+                    const U32 repIndex = (U32)(current - offset_1);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                }   }
+
+                /* search match, depth 2 */
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit) {
+            const U32 repCurrent = (U32)(ip-base);
+            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+            const U32 repIndex = repCurrent - offset_2;
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+    }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+}
+
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+}
+/**** ended inlining compress/zstd_lazy.c ****/
+/**** start inlining compress/zstd_ldm.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_ldm.h ****/
+
+/**** skipping file: ../common/debug.h ****/
+/**** skipping file: zstd_fast.h ****/
+/**** skipping file: zstd_double_fast.h ****/
+
+#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_RLOG 7
+#define LDM_HASH_CHAR_OFFSET 10
+
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams)
+{
+    params->windowLog = cParams->windowLog;
+    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
+    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    if (cParams->strategy >= ZSTD_btopt) {
+      /* Get out of the way of the optimal parser */
+      U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
+      assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
+      assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
+      params->minMatchLength = minMatch;
+    }
+    if (params->hashLog == 0) {
+        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
+        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
+    }
+    if (params->hashRateLog == 0) {
+        params->hashRateLog = params->windowLog < params->hashLog
+                                   ? 0
+                                   : params->windowLog - params->hashLog;
+    }
+    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
+}
+
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+    size_t const ldmHSize = ((size_t)1) << params.hashLog;
+    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
+    return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
+}
+
+/** ZSTD_ldm_getSmallHash() :
+ *  numBits should be <= 32
+ *  If numBits==0, returns 0.
+ *  @return : the most significant numBits of value. */
+static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
+{
+    assert(numBits <= 32);
+    return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
+}
+
+/** ZSTD_ldm_getChecksum() :
+ *  numBitsToDiscard should be <= 32
+ *  @return : the next most significant 32 bits after numBitsToDiscard */
+static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
+{
+    assert(numBitsToDiscard <= 32);
+    return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
+}
+
+/** ZSTD_ldm_getTag() ;
+ *  Given the hash, returns the most significant numTagBits bits
+ *  after (32 + hbits) bits.
+ *
+ *  If there are not enough bits remaining, return the last
+ *  numTagBits bits. */
+static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
+{
+    assert(numTagBits < 32 && hbits <= 32);
+    if (32 - hbits < numTagBits) {
+        return hash & (((U32)1 << numTagBits) - 1);
+    } else {
+        return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
+    }
+}
+
+/** ZSTD_ldm_getBucket() :
+ *  Returns a pointer to the start of the bucket associated with hash. */
+static ldmEntry_t* ZSTD_ldm_getBucket(
+        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+{
+    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+}
+
+/** ZSTD_ldm_insertEntry() :
+ *  Insert the entry with corresponding hash into the hash table */
+static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
+                                 size_t const hash, const ldmEntry_t entry,
+                                 ldmParams_t const ldmParams)
+{
+    BYTE* const bucketOffsets = ldmState->bucketOffsets;
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
+    bucketOffsets[hash]++;
+    bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
+}
+
+/** ZSTD_ldm_makeEntryAndInsertByTag() :
+ *
+ *  Gets the small hash, checksum, and tag from the rollingHash.
+ *
+ *  If the tag matches (1 << ldmParams.hashRateLog)-1, then
+ *  creates an ldmEntry from the offset, and inserts it into the hash table.
+ *
+ *  hBits is the length of the small hash, which is the most significant hBits
+ *  of rollingHash. The checksum is the next 32 most significant bits, followed
+ *  by ldmParams.hashRateLog bits that make up the tag. */
+static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
+                                             U64 const rollingHash,
+                                             U32 const hBits,
+                                             U32 const offset,
+                                             ldmParams_t const ldmParams)
+{
+    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
+    U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
+    if (tag == tagMask) {
+        U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
+        U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
+        ldmEntry_t entry;
+        entry.offset = offset;
+        entry.checksum = checksum;
+        ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
+    }
+}
+
+/** ZSTD_ldm_countBackwardsMatch() :
+ *  Returns the number of bytes that match backwards before pIn and pMatch.
+ *
+ *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
+static size_t ZSTD_ldm_countBackwardsMatch(
+            const BYTE* pIn, const BYTE* pAnchor,
+            const BYTE* pMatch, const BYTE* pBase)
+{
+    size_t matchLength = 0;
+    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
+        pIn--;
+        pMatch--;
+        matchLength++;
+    }
+    return matchLength;
+}
+
+/** ZSTD_ldm_fillFastTables() :
+ *
+ *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
+ *  This is similar to ZSTD_loadDictionaryContent.
+ *
+ *  The tables for the other strategies are filled within their
+ *  block compressors. */
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+                                      void const* end)
+{
+    const BYTE* const iend = (const BYTE*)end;
+
+    switch(ms->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        break;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    return 0;
+}
+
+/** ZSTD_ldm_fillLdmHashTable() :
+ *
+ *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
+ *  lastHash is the rolling hash that corresponds to lastHashed.
+ *
+ *  Returns the rolling hash corresponding to position iend-1. */
+static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
+                                     U64 lastHash, const BYTE* lastHashed,
+                                     const BYTE* iend, const BYTE* base,
+                                     U32 hBits, ldmParams_t const ldmParams)
+{
+    U64 rollingHash = lastHash;
+    const BYTE* cur = lastHashed + 1;
+
+    while (cur < iend) {
+        rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
+                                              cur[ldmParams.minMatchLength-1],
+                                              state->hashPower);
+        ZSTD_ldm_makeEntryAndInsertByTag(state,
+                                         rollingHash, hBits,
+                                         (U32)(cur - base), ldmParams);
+        ++cur;
+    }
+    return rollingHash;
+}
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params)
+{
+    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+    if ((size_t)(iend - ip) >= params->minMatchLength) {
+        U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
+        ZSTD_ldm_fillLdmHashTable(
+            state, startingHash, ip, iend - params->minMatchLength, state->window.base,
+            params->hashLog - params->bucketSizeLog,
+            *params);
+    }
+}
+
+
+/** ZSTD_ldm_limitTableUpdate() :
+ *
+ *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
+ *  if it is far way
+ *  (after a long match, only update tables a limited amount). */
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+{
+    U32 const current = (U32)(anchor - ms->window.base);
+    if (current > ms->nextToUpdate + 1024) {
+        ms->nextToUpdate =
+            current - MIN(512, current - ms->nextToUpdate - 1024);
+    }
+}
+
+static size_t ZSTD_ldm_generateSequences_internal(
+        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    /* LDM parameters */
+    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+    U32 const minMatchLength = params->minMatchLength;
+    U64 const hashPower = ldmState->hashPower;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    U32 const ldmBucketSize = 1U << params->bucketSizeLog;
+    U32 const hashRateLog = params->hashRateLog;
+    U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
+    /* Prefix and extDict parameters */
+    U32 const dictLimit = ldmState->window.dictLimit;
+    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+    BYTE const* const lowPrefixPtr = base + dictLimit;
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
+    /* Input positions */
+    BYTE const* anchor = istart;
+    BYTE const* ip = istart;
+    /* Rolling hash */
+    BYTE const* lastHashed = NULL;
+    U64 rollingHash = 0;
+
+    while (ip <= ilimit) {
+        size_t mLength;
+        U32 const current = (U32)(ip - base);
+        size_t forwardMatchLength = 0, backwardMatchLength = 0;
+        ldmEntry_t* bestEntry = NULL;
+        if (ip != istart) {
+            rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
+                                                  lastHashed[minMatchLength],
+                                                  hashPower);
+        } else {
+            rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
+        }
+        lastHashed = ip;
+
+        /* Do not insert and do not look for a match */
+        if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
+           ip++;
+           continue;
+        }
+
+        /* Get the best entry and compute the match lengths */
+        {
+            ldmEntry_t* const bucket =
+                ZSTD_ldm_getBucket(ldmState,
+                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
+                                   *params);
+            ldmEntry_t* cur;
+            size_t bestMatchLength = 0;
+            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
+
+            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+                if (extDict) {
+                    BYTE const* const curMatchBase =
+                        cur->offset < dictLimit ? dictBase : base;
+                    BYTE const* const pMatch = curMatchBase + cur->offset;
+                    BYTE const* const matchEnd =
+                        cur->offset < dictLimit ? dictEnd : iend;
+                    BYTE const* const lowMatchPtr =
+                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+
+                    curForwardMatchLength = ZSTD_count_2segments(
+                                                ip, pMatch, iend,
+                                                matchEnd, lowPrefixPtr);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength =
+                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
+                                                     lowMatchPtr);
+                    curTotalMatchLength = curForwardMatchLength +
+                                          curBackwardMatchLength;
+                } else { /* !extDict */
+                    BYTE const* const pMatch = base + cur->offset;
+                    curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength =
+                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
+                                                     lowPrefixPtr);
+                    curTotalMatchLength = curForwardMatchLength +
+                                          curBackwardMatchLength;
+                }
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+        }
+
+        /* No match found -- continue searching */
+        if (bestEntry == NULL) {
+            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
+                                             hBits, current,
+                                             *params);
+            ip++;
+            continue;
+        }
+
+        /* Match found */
+        mLength = forwardMatchLength + backwardMatchLength;
+        ip -= backwardMatchLength;
+
+        {
+            /* Store the sequence:
+             * ip = current - backwardMatchLength
+             * The match is at (bestEntry->offset - backwardMatchLength)
+             */
+            U32 const matchIndex = bestEntry->offset;
+            U32 const offset = current - matchIndex;
+            rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+            /* Out of sequence storage */
+            if (rawSeqStore->size == rawSeqStore->capacity)
+                return ERROR(dstSize_tooSmall);
+            seq->litLength = (U32)(ip - anchor);
+            seq->matchLength = (U32)mLength;
+            seq->offset = offset;
+            rawSeqStore->size++;
+        }
+
+        /* Insert the current entry into the hash table */
+        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
+                                         (U32)(lastHashed - base),
+                                         *params);
+
+        assert(ip + backwardMatchLength == lastHashed);
+
+        /* Fill the hash table from lastHashed+1 to ip+mLength*/
+        /* Heuristic: don't need to fill the entire table at end of block */
+        if (ip + mLength <= ilimit) {
+            rollingHash = ZSTD_ldm_fillLdmHashTable(
+                              ldmState, rollingHash, lastHashed,
+                              ip + mLength, base, hBits, *params);
+            lastHashed = ip + mLength - 1;
+        }
+        ip += mLength;
+        anchor = ip;
+    }
+    return iend - anchor;
+}
+
+/*! ZSTD_ldm_reduceTable() :
+ *  reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+                                 U32 const reducerValue)
+{
+    U32 u;
+    for (u = 0; u < size; u++) {
+        if (table[u].offset < reducerValue) table[u].offset = 0;
+        else table[u].offset -= reducerValue;
+    }
+}
+
+size_t ZSTD_ldm_generateSequences(
+        ldmState_t* ldmState, rawSeqStore_t* sequences,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    U32 const maxDist = 1U << params->windowLog;
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    size_t const kMaxChunkSize = 1 << 20;
+    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+    size_t chunk;
+    size_t leftoverSize = 0;
+
+    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+    /* Check that ZSTD_window_update() has been called for this chunk prior
+     * to passing it to this function.
+     */
+    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+    /* The input could be very large (in zstdmt), so it must be broken up into
+     * chunks to enforce the maximum distance and handle overflow correction.
+     */
+    assert(sequences->pos <= sequences->size);
+    assert(sequences->size <= sequences->capacity);
+    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+        size_t const remaining = (size_t)(iend - chunkStart);
+        BYTE const *const chunkEnd =
+            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+        size_t const chunkSize = chunkEnd - chunkStart;
+        size_t newLeftoverSize;
+        size_t const prevSize = sequences->size;
+
+        assert(chunkStart < iend);
+        /* 1. Perform overflow correction if necessary. */
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+            U32 const ldmHSize = 1U << params->hashLog;
+            U32 const correction = ZSTD_window_correctOverflow(
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
+            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
+        }
+        /* 2. We enforce the maximum offset allowed.
+         *
+         * kMaxChunkSize should be small enough that we don't lose too much of
+         * the window through early invalidation.
+         * TODO: * Test the chunk size.
+         *       * Try invalidation after the sequence generation and test the
+         *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
+         */
+        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
+        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+            ldmState, sequences, params, chunkStart, chunkSize);
+        if (ZSTD_isError(newLeftoverSize))
+            return newLeftoverSize;
+        /* 4. We add the leftover literals from previous iterations to the first
+         *    newly generated sequence, or add the `newLeftoverSize` if none are
+         *    generated.
+         */
+        /* Prepend the leftover literals from the last call */
+        if (prevSize < sequences->size) {
+            sequences->seq[prevSize].litLength += (U32)leftoverSize;
+            leftoverSize = newLeftoverSize;
+        } else {
+            assert(newLeftoverSize == chunkSize);
+            leftoverSize += chunkSize;
+        }
+    }
+    return 0;
+}
+
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+        if (srcSize <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= (U32)srcSize;
+            return;
+        }
+        srcSize -= seq->litLength;
+        seq->litLength = 0;
+        if (srcSize < seq->matchLength) {
+            /* Skip past the first srcSize of the match */
+            seq->matchLength -= (U32)srcSize;
+            if (seq->matchLength < minMatch) {
+                /* The match is too short, omit it */
+                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+                    seq[1].litLength += seq[0].matchLength;
+                }
+                rawSeqStore->pos++;
+            }
+            return;
+        }
+        srcSize -= seq->matchLength;
+        seq->matchLength = 0;
+        rawSeqStore->pos++;
+    }
+}
+
+/**
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+                                 U32 const remaining, U32 const minMatch)
+{
+    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+    assert(sequence.offset > 0);
+    /* Likely: No partial sequence */
+    if (remaining >= sequence.litLength + sequence.matchLength) {
+        rawSeqStore->pos++;
+        return sequence;
+    }
+    /* Cut the sequence short (offset == 0 ==> rest is literals). */
+    if (remaining <= sequence.litLength) {
+        sequence.offset = 0;
+    } else if (remaining < sequence.litLength + sequence.matchLength) {
+        sequence.matchLength = remaining - sequence.litLength;
+        if (sequence.matchLength < minMatch) {
+            sequence.offset = 0;
+        }
+    }
+    /* Skip past `remaining` bytes for the future sequences. */
+    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+    return sequence;
+}
+
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    void const* src, size_t srcSize)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    unsigned const minMatch = cParams->minMatch;
+    ZSTD_blockCompressor const blockCompressor =
+        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    /* Input positions */
+    BYTE const* ip = istart;
+
+    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    assert(rawSeqStore->pos <= rawSeqStore->size);
+    assert(rawSeqStore->size <= rawSeqStore->capacity);
+    /* Loop through each sequence and apply the block compressor to the lits */
+    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+        /* maybeSplitSequence updates rawSeqStore->pos */
+        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+                                                   (U32)(iend - ip), minMatch);
+        int i;
+        /* End signal */
+        if (sequence.offset == 0)
+            break;
+
+        assert(ip + sequence.litLength + sequence.matchLength <= iend);
+
+        /* Fill tables for block compressor */
+        ZSTD_ldm_limitTableUpdate(ms, ip);
+        ZSTD_ldm_fillFastTables(ms, ip);
+        /* Run the block compressor */
+        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
+        {
+            size_t const newLitLength =
+                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
+            ip += sequence.litLength;
+            /* Update the repcodes */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                rep[i] = rep[i-1];
+            rep[0] = sequence.offset;
+            /* Store the sequence */
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
+                          sequence.offset + ZSTD_REP_MOVE,
+                          sequence.matchLength - MINMATCH);
+            ip += sequence.matchLength;
+        }
+    }
+    /* Fill the tables for the block compressor */
+    ZSTD_ldm_limitTableUpdate(ms, ip);
+    ZSTD_ldm_fillFastTables(ms, ip);
+    /* Compress the last literals */
+    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
+}
+/**** ended inlining compress/zstd_ldm.c ****/
+/**** start inlining compress/zstd_opt.c ****/
+/*
+ * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**** skipping file: zstd_compress_internal.h ****/
+/**** skipping file: hist.h ****/
+/**** skipping file: zstd_opt.h ****/
+
+
+#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
+#define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
+#define ZSTD_MAX_PRICE     (1<<30)
+
+#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
+
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+
+#if 0    /* approximation at bit level */
+#  define BITCOST_ACCURACY 0
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
+#elif 0  /* fractional bit accuracy */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
+#else    /* opt==approx, ultra==accurate */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+#endif
+
+MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
+{
+    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
+}
+
+MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+{
+    U32 const stat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(stat);
+    U32 const BWeight = hb * BITCOST_MULTIPLIER;
+    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + BITCOST_ACCURACY < 31);
+    return weight;
+}
+
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
+MEM_STATIC double ZSTD_fCost(U32 price)
+{
+    return (double)price / (BITCOST_MULTIPLIER*8);
+}
+#endif
+
+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
+static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
+    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
+    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
+}
+
+
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
+{
+    U32 s, sum=0;
+    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
+    assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ *    take hints from dictionary if there is one
+ *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+            const BYTE* const src, size_t const srcSize,
+                  int const optLevel)
+{
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
+    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
+    optPtr->priceType = zop_dynamic;
+
+    if (optPtr->litLengthSum == 0) {  /* first block : init */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+            optPtr->priceType = zop_predef;
+        }
+
+        assert(optPtr->symbolCosts != NULL);
+        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+            /* huffman table presumed generated by dictionary */
+            optPtr->priceType = zop_dynamic;
+
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
+                for (lit=0; lit<=MaxLit; lit++) {
+                    U32 const scaleLog = 11;   /* scale to 2K */
+                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
+                    assert(bitCost <= scaleLog);
+                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litSum += optPtr->litFreq[lit];
+            }   }
+
+            {   unsigned ll;
+                FSE_CState_t llstate;
+                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
+                optPtr->litLengthSum = 0;
+                for (ll=0; ll<=MaxLL; ll++) {
+                    U32 const scaleLog = 10;   /* scale to 1K */
+                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
+                    assert(bitCost < scaleLog);
+                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
+            }   }
+
+            {   unsigned ml;
+                FSE_CState_t mlstate;
+                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
+                optPtr->matchLengthSum = 0;
+                for (ml=0; ml<=MaxML; ml++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
+                    assert(bitCost < scaleLog);
+                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
+            }   }
+
+            {   unsigned of;
+                FSE_CState_t ofstate;
+                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
+                optPtr->offCodeSum = 0;
+                for (of=0; of<=MaxOff; of++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
+                    assert(bitCost < scaleLog);
+                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
+            }   }
+
+        } else {  /* not a dictionary */
+
+            assert(optPtr->litFreq != NULL);
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
+                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+            }
+
+            {   unsigned ll;
+                for (ll=0; ll<=MaxLL; ll++)
+                    optPtr->litLengthFreq[ll] = 1;
+            }
+            optPtr->litLengthSum = MaxLL+1;
+
+            {   unsigned ml;
+                for (ml=0; ml<=MaxML; ml++)
+                    optPtr->matchLengthFreq[ml] = 1;
+            }
+            optPtr->matchLengthSum = MaxML+1;
+
+            {   unsigned of;
+                for (of=0; of<=MaxOff; of++)
+                    optPtr->offCodeFreq[of] = 1;
+            }
+            optPtr->offCodeSum = MaxOff+1;
+
+        }
+
+    } else {   /* new block : re-use previous statistics, scaled down */
+
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+    }
+
+    ZSTD_setBasePrices(optPtr, optLevel);
+}
+
+/* ZSTD_rawLiteralsCost() :
+ * price of literals (only) in specified segment (which length can be 0).
+ * does not include price of literalLength symbol */
+static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+                                const optState_t* const optPtr,
+                                int optLevel)
+{
+    if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
+    if (optPtr->priceType == zop_predef)
+        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
+
+    /* dynamic statistics */
+    {   U32 price = litLength * optPtr->litSumBasePrice;
+        U32 u;
+        for (u=0; u < litLength; u++) {
+            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
+            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
+        }
+        return price;
+    }
+}
+
+/* ZSTD_litLengthPrice() :
+ * cost of literalLength symbol */
+static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
+{
+    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+
+    /* dynamic statistics */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+             + optPtr->litLengthSumBasePrice
+             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+    }
+}
+
+/* ZSTD_getMatchPrice() :
+ * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+FORCE_INLINE_TEMPLATE U32
+ZSTD_getMatchPrice(U32 const offset,
+                   U32 const matchLength,
+             const optState_t* const optPtr,
+                   int const optLevel)
+{
+    U32 price;
+    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const mlBase = matchLength - MINMATCH;
+    assert(matchLength >= MINMATCH);
+
+    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
+        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+
+    /* dynamic statistics */
+    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
+    if ((optLevel<2) /*static*/ && offCode >= 20)
+        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
+
+    /* match Length */
+    {   U32 const mlCode = ZSTD_MLcode(mlBase);
+        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
+    }
+
+    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
+
+    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
+    return price;
+}
+
+/* ZSTD_updateStats() :
+ * assumption : literals + litLengtn <= iend */
+static void ZSTD_updateStats(optState_t* const optPtr,
+                             U32 litLength, const BYTE* literals,
+                             U32 offsetCode, U32 matchLength)
+{
+    /* literals */
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
+        for (u=0; u < litLength; u++)
+            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    }
+
+    /* literal Length */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        optPtr->litLengthFreq[llCode]++;
+        optPtr->litLengthSum++;
+    }
+
+    /* match offset code (0-2=>repCode; 3+=>offset+2) */
+    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+        assert(offCode <= MaxOff);
+        optPtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
+    }
+
+    /* match Length */
+    {   U32 const mlBase = matchLength - MINMATCH;
+        U32 const mlCode = ZSTD_MLcode(mlBase);
+        optPtr->matchLengthFreq[mlCode]++;
+        optPtr->matchLengthSum++;
+    }
+}
+
+
+/* ZSTD_readMINMATCH() :
+ * function safe only for comparisons
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
+{
+    U32* const hashTable3 = ms->hashTable3;
+    U32 const hashLog3 = ms->hashLog3;
+    const BYTE* const base = ms->window.base;
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
+    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+    assert(hashLog3 > 0);
+
+    while(idx < target) {
+        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
+        idx++;
+    }
+
+    *nextToUpdate3 = target;
+    return hashTable3[hash3];
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+ *  ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                U32 const mls, const int extDict)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32 matchIndex = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    const U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowLow = ms->window.lowLimit;
+    U32 matchEndIdx = current+8+1;
+    size_t bestLength = 8;
+    U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
+
+    assert(ip <= iend-8);   /* required for h calculation */
+    hashTable[h] = current;   /* Update Hash Table */
+
+    assert(windowLow > 0);
+    while (nbCompares-- && (matchIndex >= windowLow)) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < current);
+
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
+        if (matchIndex == predictedSmall) {
+            /* no need to check length, result known */
+            *smallerPtr = matchIndex;
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
+        }
+        if (matchIndex == predictedLarge) {
+            *largerPtr = matchIndex;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
+        }
+#endif
+
+        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
+            match = base + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > current + 8);
+        return MAX(positions, matchEndIdx - (current + 8));
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+                idx, target, dictMode);
+
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
+    ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
+}
+
+FORCE_INLINE_TEMPLATE
+U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
+                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
+                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
+                    const U32 rep[ZSTD_REP_NUM],
+                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+                    const U32 lengthToBeat,
+                    U32 const mls /* template */)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    const BYTE* const base = ms->window.base;
+    U32 const current = (U32)(ip-base);
+    U32 const hashLog = cParams->hashLog;
+    U32 const minMatch = (mls==3) ? 3 : 4;
+    U32* const hashTable = ms->hashTable;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32 matchIndex  = hashTable[h];
+    U32* const bt   = ms->chainTable;
+    U32 const btLog = cParams->chainLog - 1;
+    U32 const btMask= (1U << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const dictBase = ms->window.dictBase;
+    U32 const dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
+    U32 const matchLow = windowLow ? windowLow : 1;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 mnum = 0;
+    U32 nbCompares = 1U << cParams->searchLog;
+
+    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
+    const ZSTD_compressionParameters* const dmsCParams =
+                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
+    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
+    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
+    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
+    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
+    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
+    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
+    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
+    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
+    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
+
+    size_t bestLength = lengthToBeat-1;
+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
+
+    /* check repCode */
+    assert(ll0 <= 1);   /* necessarily 1 or 0 */
+    {   U32 const lastR = ZSTD_REP_NUM + ll0;
+        U32 repCode;
+        for (repCode = ll0; repCode < lastR; repCode++) {
+            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            U32 const repIndex = current - repOffset;
+            U32 repLen = 0;
+            assert(current >= dictLimit);
+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
+                /* We must validate the repcode offset because when we're using a dictionary the
+                 * valid offset range shrinks when the dictionary goes out of bounds.
+                 */
+                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
+                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
+                }
+            } else {  /* repIndex < dictLimit || repIndex >= current */
+                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
+                                             dmsBase + repIndex - dmsIndexDelta :
+                                             dictBase + repIndex;
+                assert(current >= windowLow);
+                if ( dictMode == ZSTD_extDict
+                  && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
+                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
+                }
+                if (dictMode == ZSTD_dictMatchState
+                  && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `current > repIndex >= dmsLowLimit` */
+                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
+            }   }
+            /* save longer solution */
+            if (repLen > bestLength) {
+                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
+                            repCode, ll0, repOffset, repLen);
+                bestLength = repLen;
+                matches[mnum].off = repCode - ll0;
+                matches[mnum].len = (U32)repLen;
+                mnum++;
+                if ( (repLen > sufficient_len)
+                   | (ip+repLen == iLimit) ) {  /* best possible */
+                    return mnum;
+    }   }   }   }
+
+    /* HC3 match finder */
+    if ((mls == 3) /*static*/ && (bestLength < mls)) {
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
+        if ((matchIndex3 >= matchLow)
+          & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
+            size_t mlen;
+            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
+                const BYTE* const match = base + matchIndex3;
+                mlen = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex3;
+                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
+            }
+
+            /* save best solution */
+            if (mlen >= mls /* == 3 > bestLength */) {
+                DEBUGLOG(8, "found small match with hlog3, of length %u",
+                            (U32)mlen);
+                bestLength = mlen;
+                assert(current > matchIndex3);
+                assert(mnum==0);  /* no prior solution */
+                matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].len = (U32)mlen;
+                mnum = 1;
+                if ( (mlen > sufficient_len) |
+                     (ip+mlen == iLimit) ) {  /* best possible length */
+                    ms->nextToUpdate = current+1;  /* skip insertion */
+                    return 1;
+        }   }   }
+        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
+    }
+
+    hashTable[h] = current;   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex >= matchLow)) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        const BYTE* match;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(current > matchIndex);
+
+        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
+            match = base + matchIndex;
+            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
+        } else {
+            match = dictBase + matchIndex;
+            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* prepare for match[matchLength] read */
+        }
+
+        if (matchLength > bestLength) {
+            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
+                    (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
+            assert(matchEndIdx > matchIndex);
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+            bestLength = matchLength;
+            matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].len = (U32)matchLength;
+            mnum++;
+            if ( (matchLength > ZSTD_OPT_NUM)
+               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
+                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
+        } else {
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    if (dictMode == ZSTD_dictMatchState && nbCompares) {
+        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
+        U32 dictMatchIndex = dms->hashTable[dmsH];
+        const U32* const dmsBt = dms->chainTable;
+        commonLengthSmaller = commonLengthLarger = 0;
+        while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
+            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match = dmsBase + dictMatchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
+            if (dictMatchIndex+matchLength >= dmsHighLimit)
+                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+            if (matchLength > bestLength) {
+                matchIndex = dictMatchIndex + dmsIndexDelta;
+                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
+                        (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                bestLength = matchLength;
+                matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].len = (U32)matchLength;
+                mnum++;
+                if ( (matchLength > ZSTD_OPT_NUM)
+                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
+            if (match[matchLength] < ip[matchLength]) {
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                commonLengthLarger = matchLength;
+                dictMatchIndex = nextPtr[0];
+            }
+        }
+    }
+
+    assert(matchEndIdx > current+8);
+    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
+    return mnum;
+}
+
+
+FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
+                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
+                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const matchLengthSearch = cParams->minMatch;
+    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
+    switch(matchLengthSearch)
+    {
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
+    default :
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
+    case 7 :
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
+    }
+}
+
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+
+
+static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
+{
+    return sol.litlen + sol.mlen;
+}
+
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+    int const nbElts = lastEltID + 1;
+    int enb;
+    for (enb=0; enb < nbElts; enb++) {
+        (void)table;
+        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
+        RAWLOG(2, "%4i,", table[enb]);
+    }
+    RAWLOG(2, " \n");
+}
+
+#endif
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                               seqStore_t* seqStore,
+                               U32 rep[ZSTD_REP_NUM],
+                         const void* src, size_t srcSize,
+                         const int optLevel,
+                         const ZSTD_dictMode_e dictMode)
+{
+    optState_t* const optStatePtr = &ms->opt;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;
+
+    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
+    ZSTD_match_t* const matches = optStatePtr->matchTable;
+    ZSTD_optimal_t lastSequence;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
+    assert(optLevel <= 2);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
+    ip += (ip==prefixStart);
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 cur, last_pos = 0;
+
+        /* find first match */
+        {   U32 const litlen = (U32)(ip - anchor);
+            U32 const ll0 = !litlen;
+            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            if (!nbMatches) { ip++; continue; }
+
+            /* initialize opt[0] */
+            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
+            opt[0].mlen = 0;  /* means is_a_literal */
+            opt[0].litlen = litlen;
+            /* We don't need to include the actual price of the literals because
+             * it is static for the duration of the forward pass, and is included
+             * in every price. We include the literal length to avoid negative
+             * prices when we subtract the previous literal length.
+             */
+            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+
+            /* large match -> immediate encoding */
+            {   U32 const maxML = matches[nbMatches-1].len;
+                U32 const maxOffset = matches[nbMatches-1].off;
+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
+                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+
+                if (maxML > sufficient_len) {
+                    lastSequence.litlen = litlen;
+                    lastSequence.mlen = maxML;
+                    lastSequence.off = maxOffset;
+                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                                maxML, sufficient_len);
+                    cur = 0;
+                    last_pos = ZSTD_totalLen(lastSequence);
+                    goto _shortestPath;
+            }   }
+
+            /* set prices for first matches starting position == 0 */
+            {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 pos;
+                U32 matchNb;
+                for (pos = 1; pos < minMatch; pos++) {
+                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                }
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const end = matches[matchNb].len;
+                    for ( ; pos <= end ; pos++ ) {
+                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
+                                    pos, ZSTD_fCost(sequencePrice));
+                        opt[pos].mlen = pos;
+                        opt[pos].off = offset;
+                        opt[pos].litlen = litlen;
+                        opt[pos].price = sequencePrice;
+                }   }
+                last_pos = pos-1;
+            }
+        }
+
+        /* check further positions */
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const inr = ip + cur;
+            assert(cur < ZSTD_OPT_NUM);
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+
+            /* Fix current position with one literal if cheaper */
+            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+                int const price = opt[cur-1].price
+                                + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+                                + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+                                - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                assert(price < 1000000000); /* overflow check */
+                if (price <= opt[cur].price) {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+                    opt[cur].mlen = 0;
+                    opt[cur].off = 0;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                } else {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
+                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                }
+            }
+
+            /* Set the repcodes of the current position. We must do it here
+             * because we rely on the repcodes of the 2nd to last sequence being
+             * correct to set the next chunks repcodes during the backward
+             * traversal.
+             */
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            assert(cur >= opt[cur].mlen);
+            if (opt[cur].mlen != 0) {
+                U32 const prev = cur - opt[cur].mlen;
+                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+            } else {
+                memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+            }
+
+            /* last match must start at a minimum distance of 8 from oend */
+            if (inr > ilimit) continue;
+
+            if (cur == last_pos) break;
+
+            if ( (optLevel==0) /*static_test*/
+              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
+                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+            }
+
+            {   U32 const ll0 = (opt[cur].mlen != 0);
+                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
+                U32 const previousPrice = opt[cur].price;
+                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 matchNb;
+                if (!nbMatches) {
+                    DEBUGLOG(7, "rPos:%u : no match found", cur);
+                    continue;
+                }
+
+                {   U32 const maxML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
+                                inr-istart, cur, nbMatches, maxML);
+
+                    if ( (maxML > sufficient_len)
+                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
+                        lastSequence.mlen = maxML;
+                        lastSequence.off = matches[nbMatches-1].off;
+                        lastSequence.litlen = litlen;
+                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
+                        last_pos = cur + ZSTD_totalLen(lastSequence);
+                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                        goto _shortestPath;
+                }   }
+
+                /* set prices using matches found at position == cur */
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const lastML = matches[matchNb].len;
+                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                    U32 mlen;
+
+                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
+                                matchNb, matches[matchNb].off, lastML, litlen);
+
+                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
+                        U32 const pos = cur + mlen;
+                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+
+                        if ((pos > last_pos) || (price < opt[pos].price)) {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            opt[pos].mlen = mlen;
+                            opt[pos].off = offset;
+                            opt[pos].litlen = litlen;
+                            opt[pos].price = price;
+                        } else {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
+                        }
+            }   }   }
+        }  /* for (cur = 1; cur <= last_pos; cur++) */
+
+        lastSequence = opt[last_pos];
+        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
+        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+
+_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
+        assert(opt[0].mlen == 0);
+
+        /* Set the next chunk's repcodes based on the repcodes of the beginning
+         * of the last match, and the last sequence. This avoids us having to
+         * update them while traversing the sequences.
+         */
+        if (lastSequence.mlen != 0) {
+            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            memcpy(rep, &reps, sizeof(reps));
+        } else {
+            memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+        }
+
+        {   U32 const storeEnd = cur + 1;
+            U32 storeStart = storeEnd;
+            U32 seqPos = cur;
+
+            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
+                        last_pos, cur); (void)last_pos;
+            assert(storeEnd < ZSTD_OPT_NUM);
+            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
+            opt[storeEnd] = lastSequence;
+            while (seqPos > 0) {
+                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+                storeStart--;
+                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
+                opt[storeStart] = opt[seqPos];
+                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+            }
+
+            /* save sequences */
+            DEBUGLOG(6, "sending selected sequences into seqStore")
+            {   U32 storePos;
+                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
+                    U32 const llen = opt[storePos].litlen;
+                    U32 const mlen = opt[storePos].mlen;
+                    U32 const offCode = opt[storePos].off;
+                    U32 const advance = llen + mlen;
+                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
+                                anchor - istart, (unsigned)llen, (unsigned)mlen);
+
+                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
+                        assert(storePos == storeEnd);   /* must be last sequence */
+                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
+                        continue;   /* will finish */
+                    }
+
+                    assert(anchor + llen <= iend);
+                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    anchor += advance;
+                    ip = anchor;
+            }   }
+            ZSTD_setBasePrices(optStatePtr, optLevel);
+        }
+    }   /* while (ip < ilimit) */
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
+}
+
+
+/* used in 2-pass strategy */
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
+{
+    U32 s, sum=0;
+    assert(ZSTD_FREQ_DIV+bonus >= 0);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] <<= ZSTD_FREQ_DIV+bonus;
+        table[s]--;
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* used in 2-pass strategy */
+MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+}
+
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its contract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                     seqStore_t* seqStore,
+                     U32 rep[ZSTD_REP_NUM],
+               const void* src, size_t srcSize)
+{
+    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+    memcpy(tmpRep, rep, sizeof(tmpRep));
+
+    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+    assert(ms->opt.litLengthSum == 0);    /* first block */
+    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
+    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
+    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+
+    /* invalidate first scan from history */
+    ZSTD_resetSeqStore(seqStore);
+    ms->window.base -= srcSize;
+    ms->window.dictLimit += (U32)srcSize;
+    ms->window.lowLimit = ms->window.dictLimit;
+    ms->nextToUpdate = ms->window.dictLimit;
+
+    /* re-inforce weight of collected statistics */
+    ZSTD_upscaleStats(&ms->opt);
+}
+
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    U32 const current = (U32)((const BYTE*)src - ms->window.base);
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+    /* 2-pass strategy:
+     * this strategy makes a first pass over first block to collect statistics
+     * and seed next round's statistics with it.
+     * After 1st pass, function forgets everything, and starts a new block.
+     * Consequently, this can only work if no data has been previously loaded in tables,
+     * aka, no dictionary, no prefix, no ldm preprocessing.
+     * The compression ratio gain is generally small (~0.5% on first block),
+     * the cost is 2x cpu time on first block. */
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    if ( (ms->opt.litLengthSum==0)   /* first block */
+      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+      && (current == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      ) {
+        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
+    }
+
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
+}
+
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
+}
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */
+/**** ended inlining compress/zstd_opt.c ****/
+
+/**** start inlining decompress/huf_decompress.c ****/
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include <string.h>     /* memcpy, memset */
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/bitstream.h ****/
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/error_private.h ****/
+
+/* **************************************************************
+*  Macros
+****************************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+    defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
+
+#define HUF_DGEN(fn)                                                        \
+                                                                            \
+    static size_t fn##_default(                                             \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        if (bmi2) {                                                         \
+            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+        }                                                                   \
+        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+    }
+
+#else
+
+#define HUF_DGEN(fn)                                                        \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        (void)bmi2;                                                         \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }
+
+#endif
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+
+    U32* rankVal;
+    BYTE* huffWeight;
+    size_t spaceUsed32 = 0;
+
+    rankVal = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
+    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
+
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Calculate starting value for each rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<tableLog+1; n++) {
+            U32 const current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill DTable */
+    {   U32 n;
+        size_t const nEnd = nbSymbols;
+        for (n=0; n<nEnd; n++) {
+            size_t const w = huffWeight[n];
+            size_t const length = (1 << w) >> 1;
+            size_t const uStart = rankVal[w];
+            size_t const uEnd = uStart + length;
+            size_t u;
+            HUF_DEltX1 D;
+            D.byte = (BYTE)n;
+            D.nbBits = (BYTE)(tableLog + 1 - w);
+            rankVal[w] = (U32)uEnd;
+            if (length < 4) {
+                /* Use length in the loop bound so the compiler knows it is short. */
+                for (u = 0; u < length; ++u)
+                    dt[uStart + u] = D;
+            } else {
+                /* Unroll the loop 4 times, we know it is a power of 2. */
+                for (u = uStart; u < uEnd; u += 4) {
+                    dt[u + 0] = D;
+                    dt[u + 1] = D;
+                    dt[u + 2] = D;
+                    dt[u + 3] = D;
+    }   }   }   }
+    return iSize;
+}
+
+size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_readDTableX1_wksp(DTable, src, srcSize,
+                                 workSpace, sizeof(workSpace));
+}
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    }
+
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        }
+
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+                                               const void *cSrc,
+                                               size_t cSrcSize,
+                                               const HUF_DTable *DTable);
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
+
+
+
+size_t HUF_decompress1X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+size_t HUF_decompress4X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
+                                                workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
+
+
+size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX2 DElt;
+    U32 rankVal[HUF_TABLELOG_MAX + 1];
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+            const U32 symbol = sortedSymbols[s].symbol;
+            const U32 weight = sortedSymbols[s].weight;
+            const U32 nbBits = nbBitsBaseline - weight;
+            const U32 length = 1 << (sizeLog-nbBits);
+            const U32 start = rankVal[weight];
+            U32 i = start;
+            const U32 end = start + length;
+
+            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+            DElt.nbBits = (BYTE)(nbBits + consumed);
+            DElt.length = 2;
+            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+            rankVal[weight] += length;
+    }   }
+}
+
+
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            HUF_DEltX2 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 const end = start + length;
+                U32 u;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize)
+{
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+    U32 *rankStart;
+
+    rankValCol_t* rankVal;
+    U32* rankStats;
+    U32* rankStart0;
+    sortedSymbol_t* sortedSymbol;
+    BYTE* weightList;
+    size_t spaceUsed32 = 0;
+
+    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
+    rankStats = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_MAX + 1;
+    rankStart0 = (U32 *)workSpace + spaceUsed32;
+    spaceUsed32 += HUF_TABLELOG_MAX + 2;
+    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
+    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
+    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
+    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+
+    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
+
+    rankStart = rankStart0 + 1;
+    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = weightList[s];
+            U32 const r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 current = nextRankVal;
+                nextRankVal += rankStats[w] << (w+rescale);
+                rankVal0[w] = current;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX2(dt, maxTableLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+  return HUF_readDTableX2_wksp(DTable, src, srcSize,
+                               workSpace, sizeof(workSpace));
+}
+
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+    }   }
+    return 1;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+                const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BIT_DStream_t bitD;
+
+    /* Init */
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal = 1;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = (U32)LIKELY(
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+#endif
+
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    assert(dstSize > 0);
+    assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
+        U32 const D256 = (U32)(dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
+#endif
+}
+
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
+#endif
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
+#else
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+#endif
+    }
+}
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+#endif
+    }
+}
+
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                         workSpace, sizeof(workSpace));
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                            cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                             const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                      workSpace, sizeof(workSpace));
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
+    }
+}
+/**** ended inlining decompress/huf_decompress.c ****/
+/**** start inlining decompress/zstd_ddict.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <string.h>      /* memcpy, memmove, memset */
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** start inlining zstd_decompress_internal.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+
+
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static const U32 LL_base[MaxLL+1] = {
+                 0,    1,    2,     3,     4,     5,     6,      7,
+                 8,    9,   10,    11,    12,    13,    14,     15,
+                16,   18,   20,    22,    24,    28,    32,     40,
+                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                0x2000, 0x4000, 0x8000, 0x10000 };
+
+static const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static const U32 OF_bits[MaxOff+1] = {
+                     0,  1,  2,  3,  4,  5,  6,  7,
+                     8,  9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23,
+                    24, 25, 26, 27, 28, 29, 30, 31 };
+
+static const U32 ML_base[MaxML+1] = {
+                     3,  4,  5,    6,     7,     8,     9,    10,
+                    11, 12, 13,   14,    15,    16,    17,    18,
+                    19, 20, 21,   22,    23,    24,    25,    26,
+                    27, 28, 29,   30,    31,    32,    33,    34,
+                    35, 37, 39,   41,    43,    47,    51,    59,
+                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+ typedef struct {
+     U32 fastMode;
+     U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+     U16  nextState;
+     BYTE nbAdditionalBits;
+     BYTE nbBits;
+     U32  baseValue;
+ } ZSTD_seqSymbol;
+
+ #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
+
+typedef struct {
+    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_entropyDTables_t;
+
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+typedef enum {
+    ZSTD_obm_buffered = 0,  /* Buffer the output */
+    ZSTD_obm_stable = 1     /* ZSTD_outBuffer is stable */
+} ZSTD_outBufferMode_e;
+
+struct ZSTD_DCtx_s
+{
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable* HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
+    const void* previousDstEnd;   /* detect continuity */
+    const void* prefixStart;      /* start of current segment */
+    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
+    size_t expected;
+    ZSTD_frameHeader fParams;
+    U64 decodedSize;
+    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    XXH64_state_t xxhState;
+    size_t headerSize;
+    ZSTD_format_e format;
+    const BYTE* litPtr;
+    ZSTD_customMem customMem;
+    size_t litSize;
+    size_t rleSize;
+    size_t staticSize;
+    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+
+    /* dictionary */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32 dictID;
+    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;
+
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+    int noForwardProgress;
+    ZSTD_outBufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
+};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                   const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
+/**** ended inlining zstd_decompress_internal.h ****/
+/**** start inlining zstd_ddict.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include <stddef.h>   /* size_t */
+/**** skipping file: ../zstd.h ****/
+
+
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+
+#endif /* ZSTD_DDICT_H */
+/**** ended inlining zstd_ddict.h ****/
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+/**** start inlining ../legacy/zstd_legacy.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LEGACY_H
+#define ZSTD_LEGACY_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+/**** skipping file: ../common/mem.h ****/
+/**** skipping file: ../common/error_private.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+
+#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
+#  undef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 8
+#endif
+
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+/**** start inlining zstd_v01.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V01_H_28739879432
+#define ZSTD_V01_H_28739879432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+     note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V01_H_28739879432 */
+/**** ended inlining zstd_v01.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+/**** start inlining zstd_v02.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V02_H_4174539423
+#define ZSTD_V02_H_4174539423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V02_H_4174539423 */
+/**** ended inlining zstd_v02.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+/**** start inlining zstd_v03.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V03_H_298734209782
+#define ZSTD_V03_H_298734209782
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);
+
+    /**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V03_H_298734209782 */
+/**** ended inlining zstd_v03.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+/**** start inlining zstd_v04.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_V04_H_91868324769238
+#define ZSTD_V04_H_91868324769238
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);
+
+/**
+ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
+*/
+unsigned ZSTDv04_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
+ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
+size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Direct Streaming
+***************************************/
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+
+/* *************************************
+*  Buffered Streaming
+***************************************/
+typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
+*  It must be the same content as the one set during compression phase.
+*  Dictionary content must remain accessible during the decompression process.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+unsigned ZBUFFv04_isError(size_t errorCode);
+const char* ZBUFFv04_getErrorName(size_t errorCode);
+
+
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFFv04_recommendedDInSize(void);
+size_t ZBUFFv04_recommendedDOutSize(void);
+
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_V04_H_91868324769238 */
+/**** ended inlining zstd_v04.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+/**** start inlining zstd_v05.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+/**** skipping file: ../common/mem.h ****/
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+ /**
+ ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Simple Dictionary API
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const void* dict,size_t dictSize);
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
+typedef struct {
+    U64 srcSize;
+    U32 windowLog;     /* the only useful information to retrieve */
+    U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
+} ZSTDv05_parameters;
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  ZBUFF API
+*************************/
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
+size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
+/**** ended inlining zstd_v05.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+/**** start inlining zstd_v06.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDv06_H
+#define ZSTDv06_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv06_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1)
+#  define ZSTDLIBv06_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv06_API
+#endif
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv06_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */
+ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+*/
+void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/* *************************************
+*  Helper functions
+***************************************/
+ZSTDLIBv06_API size_t      ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */
+
+/* Error Management */
+ZSTDLIBv06_API unsigned    ZSTDv06_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx;
+ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void);
+ZSTDLIBv06_API size_t     ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv06_decompressDCtx() :
+*   Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */
+ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Dictionary API
+*************************/
+/*! ZSTDv06_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */
+ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-************************
+*  Advanced Streaming API
+***************************/
+struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; };
+typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams;
+
+ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIBv06_API void   ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx);
+
+ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx);
+ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+
+/* *************************************
+*  ZBUFF API
+***************************************/
+
+typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx;
+ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void);
+ZSTDLIBv06_API size_t         ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx);
+
+ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx);
+ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx,
+                                                  void* dst, size_t* dstCapacityPtr,
+                                            const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv06_DCtx object is required to track streaming operations.
+*  Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources.
+*  Use ZBUFFv06_decompressInit() to start a new decompression operation,
+*   or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv06_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv06_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv06_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize()
+*  output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv06_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode);
+ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void);
+ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv06_MAGICNUMBER 0xFD2FB526   /* v0.6 */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv06_BUFFERED_H */
+/**** ended inlining zstd_v06.h ****/
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+/**** start inlining zstd_v07.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTDv07_H_235446
+#define ZSTDv07_H_235446
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv07_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
+#  define ZSTDLIBv07_API __declspec(dllexport)
+#else
+#  define ZSTDLIBv07_API
+#endif
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTDv07_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+       note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
+       note 2 : decompressed size could be wrong or intentionally modified !
+                always ensure results fit within application's authorized limits */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTDv07_decompress() :
+    `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
+                                    const void* src, size_t compressedSize);
+
+/**
+ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+*/
+void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);
+
+/*======  Helper functions  ======*/
+ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
+
+
+/*-*************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
+ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
+ZSTDLIBv07_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv07_decompressDCtx() :
+*   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
+ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-************************
+*  Simple dictionary API
+***************************/
+/*! ZSTDv07_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression.
+*   Note : This function load the dictionary, resulting in a significant startup time */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                                   void* dst, size_t dstCapacity,
+                                             const void* src, size_t srcSize,
+                                             const void* dict,size_t dictSize);
+
+
+/*-**************************
+*  Advanced Dictionary API
+****************************/
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression operation without startup delay.
+*   `dict` can be released after creation */
+typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
+ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
+ZSTDLIBv07_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                                    void* dst, size_t dstCapacity,
+                                              const void* src, size_t srcSize,
+                                              const ZSTDv07_DDict* ddict);
+
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTDv07_frameParams;
+
+ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+
+
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
+ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
+ZSTDLIBv07_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
+ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode);
+ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void);
+ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv07_H_235446 */
+/**** ended inlining zstd_v07.h ****/
+#endif
+
+/** ZSTD_isLegacy() :
+    @return : > 0 if supported by legacy decoder. 0 otherwise.
+              return value is the version.
+*/
+MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
+{
+    U32 magicNumberLE;
+    if (srcSize<4) return 0;
+    magicNumberLE = MEM_readLE32(src);
+    switch(magicNumberLE)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case ZSTDv01_magicNumberLE:return 1;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case ZSTDv02_magicNumber : return 2;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case ZSTDv03_magicNumber : return 3;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case ZSTDv04_magicNumber : return 4;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case ZSTDv05_MAGICNUMBER : return 5;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case ZSTDv06_MAGICNUMBER : return 6;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case ZSTDv07_MAGICNUMBER : return 7;
+#endif
+        default : return 0;
+    }
+}
+
+
+MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
+{
+    U32 const version = ZSTD_isLegacy(src, srcSize);
+    if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+    if (version==5) {
+        ZSTDv05_parameters fParams;
+        size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.srcSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+    if (version==6) {
+        ZSTDv06_frameParams fParams;
+        size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+    if (version==7) {
+        ZSTDv07_frameParams fParams;
+        size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
+        if (frResult != 0) return 0;
+        return fParams.frameContentSize;
+    }
+#endif
+    return 0;   /* should not be possible */
+}
+
+
+MEM_STATIC size_t ZSTD_decompressLegacy(
+                     void* dst, size_t dstCapacity,
+               const void* src, size_t compressedSize,
+               const void* dict,size_t dictSize)
+{
+    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    (void)dst; (void)dstCapacity; (void)dict; (void)dictSize;  /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {   size_t result;
+                ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv05_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            {   size_t result;
+                ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv06_freeDCtx(zd);
+                return result;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {   size_t result;
+                ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv07_freeDCtx(zd);
+                return result;
+            }
+#endif
+        default :
+            return ERROR(prefix_unknown);
+    }
+}
+
+MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    U32 const version = ZSTD_isLegacy(src, srcSize);
+    switch(version)
+    {
+#if (ZSTD_LEGACY_SUPPORT <= 1)
+        case 1 :
+            ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 2)
+        case 2 :
+            ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 3)
+        case 3 :
+            ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
+#endif
+        default :
+            frameSizeInfo.compressedSize = ERROR(prefix_unknown);
+            frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+            break;
+    }
+    if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
+        frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
+        frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    }
+    return frameSizeInfo;
+}
+
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
+{
+    switch(version)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)legacyContext;
+            return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
+#endif
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
+                                        const void* dict, size_t dictSize)
+{
+    DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
+    if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
+    switch(newVersion)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)dict; (void)dictSize;
+            return 0;
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+        {
+            ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv04_decompressInit(dctx);
+            ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+        {
+            ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+        {
+            ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+        {
+            ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
+            if (dctx==NULL) return ERROR(memory_allocation);
+            ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize);
+            *legacyContext = dctx;
+            return 0;
+        }
+#endif
+    }
+}
+
+
+
+MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
+                                              ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
+    switch(version)
+    {
+        default :
+        case 1 :
+        case 2 :
+        case 3 :
+            (void)legacyContext; (void)output; (void)input;
+            return ERROR(version_unsupported);
+#if (ZSTD_LEGACY_SUPPORT <= 4)
+        case 4 :
+            {
+                ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 5)
+        case 5 :
+            {
+                ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 6)
+        case 6 :
+            {
+                ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+#if (ZSTD_LEGACY_SUPPORT <= 7)
+        case 7 :
+            {
+                ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
+                const void* src = (const char*)input->src + input->pos;
+                size_t readSize = input->size - input->pos;
+                void* dst = (char*)output->dst + output->pos;
+                size_t decodedSize = output->size - output->pos;
+                size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
+                output->pos += decodedSize;
+                input->pos += readSize;
+                return hintSize;
+            }
+#endif
+    }
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_LEGACY_H */
+/**** ended inlining ../legacy/zstd_legacy.h ****/
+#endif
+
+
+
+/*-*******************************************************
+*  Types
+*********************************************************/
+struct ZSTD_DDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictSize;
+    ZSTD_entropyDTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
+};  /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_copyDDictParameters");
+    assert(dctx != NULL);
+    assert(ddict != NULL);
+    dctx->dictID = ddict->dictID;
+    dctx->prefixStart = ddict->dictContent;
+    dctx->virtualStart = ddict->dictContent;
+    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    if (ddict->entropyPresent) {
+        dctx->litEntropy = 1;
+        dctx->fseEntropy = 1;
+        dctx->LLTptr = ddict->entropy.LLTable;
+        dctx->MLTptr = ddict->entropy.MLTable;
+        dctx->OFTptr = ddict->entropy.OFTable;
+        dctx->HUFptr = ddict->entropy.hufTable;
+        dctx->entropy.rep[0] = ddict->entropy.rep[0];
+        dctx->entropy.rep[1] = ddict->entropy.rep[1];
+        dctx->entropy.rep[2] = ddict->entropy.rep[2];
+    } else {
+        dctx->litEntropy = 0;
+        dctx->fseEntropy = 0;
+    }
+}
+
+
+static size_t
+ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
+                           ZSTD_dictContentType_e dictContentType)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+    if (ddict->dictSize < 8) {
+        if (dictContentType == ZSTD_dct_fullDict)
+            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+        return 0;   /* pure content mode */
+    }
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            if (dictContentType == ZSTD_dct_fullDict)
+                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+            return 0;   /* pure content mode */
+        }
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+        dictionary_corrupted, "");
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType)
+{
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+        if (!dict) dictSize = 0;
+    } else {
+        void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_customMem customMem)
+{
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
+        if (ddict == NULL) return NULL;
+        ddict->cMem = customMem;
+        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
+                                            dict, dictSize,
+                                            dictLoadMethod, dictContentType);
+            if (ZSTD_isError(initResult)) {
+                ZSTD_freeDDict(ddict);
+                return NULL;
+        }   }
+        return ddict;
+    }
+}
+
+/*! ZSTD_createDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(
+                                void* sBuffer, size_t sBufferSize,
+                                const void* dict, size_t dictSize,
+                                ZSTD_dictLoadMethod_e dictLoadMethod,
+                                ZSTD_dictContentType_e dictContentType)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict)
+                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
+    assert(sBuffer != NULL);
+    assert(dict != NULL);
+    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
+    if (sBufferSize < neededSpace) return NULL;
+    if (dictLoadMethod == ZSTD_dlm_byCopy) {
+        memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
+                                              dict, dictSize,
+                                              ZSTD_dlm_byRef, dictContentType) ))
+        return NULL;
+    return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_free(ddict->dictBuffer, cMem);
+        ZSTD_free(ddict, cMem);
+        return 0;
+    }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support sizeof on NULL */
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
+/**** ended inlining decompress/zstd_ddict.c ****/
+/**** start inlining decompress/zstd_decompress.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 0
+#endif
+
+/*!
+ *  MAXWINDOWSIZE_DEFAULT :
+ *  maximum window size accepted by DStream __by default__.
+ *  Frames requiring more memory will be rejected.
+ *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ *  NO_FORWARD_PROGRESS_MAX :
+ *  maximum allowed nb of calls to ZSTD_decompressStream()
+ *  without any forward progress
+ *  (defined as: no byte read from input, and no byte flushed to output)
+ *  before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+#  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <string.h>      /* memcpy, memmove, memset */
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: zstd_decompress_internal.h ****/
+/**** skipping file: zstd_ddict.h ****/
+/**** start inlining zstd_decompress_block.h ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include <stddef.h>   /* size_t */
+/**** skipping file: ../zstd.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: zstd_decompress_internal.h ****/
+
+
+/* ===   Prototypes   === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ *           or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize, const int frame);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+             const short* normalizedCounter, unsigned maxSymbolValue,
+             const U32* baseValue, const U32* nbAdditionalBits,
+                   unsigned tableLog);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
+/**** ended inlining zstd_decompress_block.h ****/
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+/**** skipping file: ../legacy/zstd_legacy.h ****/
+#endif
+
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format)
+{
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
+    return startingInputLength;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    dctx->format = ZSTD_f_zstd1;  /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
+    dctx->staticSize  = 0;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->ddict       = NULL;
+    dctx->ddictLocal  = NULL;
+    dctx->dictEnd     = NULL;
+    dctx->ddictIsCold = 0;
+    dctx->dictUses = ZSTD_dont_use;
+    dctx->inBuff      = NULL;
+    dctx->inBuffSize  = 0;
+    dctx->outBuffSize = 0;
+    dctx->streamStage = zdss_init;
+    dctx->legacyContext = NULL;
+    dctx->previousLegacyVersion = 0;
+    dctx->noForwardProgress = 0;
+    dctx->oversizedDuration = 0;
+    dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->outBufferMode = ZSTD_obm_buffered;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
+    return dctx;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    DEBUGLOG(3, "ZSTD_createDCtx");
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict = NULL;
+    dctx->dictUses = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_clearDict(dctx);
+        ZSTD_free(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (dctx->legacyContext)
+            ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
+#endif
+        ZSTD_free(dctx, cMem);
+        return 0;
+    }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+}
+
+
+/*-*************************************************************
+ *   Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < ZSTD_FRAMEIDSIZE) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(buffer, size)) return 1;
+#endif
+    return 0;
+}
+
+/** ZSTD_frameHeaderSize_internal() :
+ *  srcSize must be large enough to reach header size fields.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ *           or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
+        U32 const dictID= fhd & 3;
+        U32 const singleSegment = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return minInputSize + !singleSegment
+             + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+             + (singleSegment && !fcsId);
+    }
+}
+
+/** ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/** ZSTD_getFrameHeader_advanced() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+
+    memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+    if (srcSize < minInputSize) return minInputSize;
+    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
+
+    if ( (format != ZSTD_f_zstd1_magicless)
+      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            /* skippable frame */
+            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+            memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
+            zfhPtr->frameType = ZSTD_skippableFrame;
+            return 0;
+        }
+        RETURN_ERROR(prefix_unknown, "");
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+        if (srcSize < fhsize) return fhsize;
+        zfhPtr->headerSize = (U32)fhsize;
+    }
+
+    {   BYTE const fhdByte = ip[minInputSize-1];
+        size_t pos = minInputSize;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const singleSegment = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U64 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");
+
+        if (!singleSegment) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+            windowSize = (1ULL << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+        switch(dictIDSizeCode)
+        {
+            default: assert(0);  /* impossible */
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default: assert(0);  /* impossible */
+            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (singleSegment) windowSize = frameContentSize;
+
+        zfhPtr->frameType = ZSTD_frame;
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+/** ZSTD_getFrameHeader() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/** ZSTD_getFrameContentSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize)) {
+        unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
+        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
+    }
+#endif
+    {   ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame) {
+            return 0;
+        } else {
+            return zfh.frameContentSize;
+    }   }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+{
+    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+    U32 sizeU32;
+
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported, "");
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+        return skippableSize;
+    }
+}
+
+/** ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+        U32 const magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+            if (ZSTD_isError(skippableSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+            assert(skippableSize <= srcSize);
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
+
+    return totalDstSize;
+}
+
+/** ZSTD_getDecompressedSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+             note : 0 can mean any of the following :
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
+                   - frame header unknown / not supported
+                   - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/** ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
+{
+    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+    if (ZSTD_isError(result)) return result;    /* invalid header */
+    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong, "");
+#endif
+    if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
+    return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize))
+        return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+#endif
+
+    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        size_t nbBlocks = 0;
+        ZSTD_frameHeader zfh;
+
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag) {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.compressedSize = ip - ipstart;
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                        ? zfh.frameContentSize
+                                        : nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}
+
+/** ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+/** ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0) {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        size_t const compressedSize = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*)src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}
+
+
+/*-*************************************************************
+ *   Frame decoding
+ ***************************************************************/
+
+/** ZSTD_insertBlock() :
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
+    ZSTD_checkContinuity(dctx, blockStart);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    if (dst == NULL) {
+        if (srcSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                               BYTE b,
+                               size_t regenSize)
+{
+    if (dst == NULL) {
+        if (regenSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+    memset(dst, b, regenSize);
+    return regenSize;
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ *  will update *srcPtr and *srcSizePtr,
+ *  to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                   void* dst, size_t dstCapacity,
+                             const void** srcPtr, size_t *srcSizePtr)
+{
+    const BYTE* ip = (const BYTE*)(*srcPtr);
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+    BYTE* op = ostart;
+    size_t remainingSrcSize = *srcSizePtr;
+
+    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
+
+    /* check */
+    RETURN_ERROR_IF(
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+        srcSize_wrong, "");
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+                        srcSize_wrong, "");
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
+        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSrcSize -= ZSTD_blockHeaderSize;
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
+            break;
+        case bt_reserved :
+        default:
+            RETURN_ERROR(corruption_detected, "invalid block type");
+        }
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        if (dctx->fParams.checksumFlag)
+            XXH64_update(&dctx->xxhState, op, decodedSize);
+        if (decodedSize != 0)
+            op += decodedSize;
+        assert(ip != NULL);
+        ip += cBlockSize;
+        remainingSrcSize -= cBlockSize;
+        if (blockProperties.lastBlock) break;
+    }
+
+    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+                        corruption_detected, "");
+    }
+    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+        U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
+        U32 checkRead;
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+        checkRead = MEM_readLE32(ip);
+        RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        ip += 4;
+        remainingSrcSize -= 4;
+    }
+
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSrcSize;
+    return op-ostart;
+}
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void* dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+    int moreThan1Frame = 0;
+
+    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
+
+    if (ddict) {
+        dict = ZSTD_DDict_dictContent(ddict);
+        dictSize = ZSTD_DDict_dictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (ZSTD_isLegacy(src, srcSize)) {
+            size_t decodedSize;
+            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+            if (ZSTD_isError(frameSize)) return frameSize;
+            RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
+                "legacy support is not compatible with static dctx");
+
+            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
+            if (ZSTD_isError(decodedSize)) return decodedSize;
+
+            assert(decodedSize <=- dstCapacity);
+            dst = (BYTE*)dst + decodedSize;
+            dstCapacity -= decodedSize;
+
+            src = (const BYTE*)src + frameSize;
+            srcSize -= frameSize;
+
+            continue;
+        }
+#endif
+
+        {   U32 const magicNumber = MEM_readLE32(src);
+            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+                assert(skippableSize <= srcSize);
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+        }   }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+        }
+        ZSTD_checkContinuity(dctx, dst);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            RETURN_ERROR_IF(
+                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+             && (moreThan1Frame==1),
+                srcSize_wrong,
+                "at least one frame successfully completed, but following "
+                "bytes are garbage: it's more likely to be a srcSize error, "
+                "specifying more bytes than compressed size of frame(s). This "
+                "error message replaces ERROR(prefix_unknown), which would be "
+                "confusing, as the first header is actually correct. Note that "
+                "one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic "
+                "bytes. But this is _much_ less likely than a srcSize field "
+                "error.");
+            if (ZSTD_isError(res)) return res;
+            assert(res <= dstCapacity);
+            if (res != 0)
+                dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+        moreThan1Frame = 1;
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+    return (BYTE*)dst - (BYTE*)dststart;
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict, size_t dictSize)
+{
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+    switch (dctx->dictUses) {
+    default:
+        assert(0 /* Impossible */);
+        /* fall-through */
+    case ZSTD_dont_use:
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely:
+        return dctx->ddict;
+    case ZSTD_use_once:
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
+    size_t regenSize;
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTD_DCtx dctx;
+    ZSTD_initDCtx_internal(&dctx);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/**
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+        return dctx->expected;
+    if (dctx->bType != bt_raw)
+        return dctx->expected;
+    return MIN(MAX(inputSize, 1), dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch(dctx->stage)
+    {
+    default:   /* should not happen */
+        assert(0);
+    case ZSTDds_getFrameHeaderSize:
+    case ZSTDds_decodeFrameHeader:
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader:
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock:
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock:
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum:
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader:
+    case ZSTDds_skipFrame:
+        return ZSTDnit_skippableFrame;
+    }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/** ZSTD_decompressContinue() :
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
+    /* Sanity check */
+    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
+    if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        assert(src != NULL);
+        if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
+            assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
+            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
+                memcpy(dctx->headerBuffer, src, srcSize);
+                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
+                dctx->stage = ZSTDds_decodeSkippableHeader;
+                return 0;
+        }   }
+        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+        if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
+        memcpy(dctx->headerBuffer, src, srcSize);
+        dctx->expected = dctx->headerSize - srcSize;
+        dctx->stage = ZSTDds_decodeFrameHeader;
+        return 0;
+
+    case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
+        memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+        dctx->expected = ZSTD_blockHeaderSize;
+        dctx->stage = ZSTDds_decodeBlockHeader;
+        return 0;
+
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
+            dctx->expected = cBlockSize;
+            dctx->bType = bp.blockType;
+            dctx->rleSize = bp.origSize;
+            if (cBlockSize) {
+                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+                return 0;
+            }
+            /* empty block */
+            if (bp.lastBlock) {
+                if (dctx->fParams.checksumFlag) {
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0; /* end of frame */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
+                dctx->stage = ZSTDds_decodeBlockHeader;
+            }
+            return 0;
+        }
+
+    case ZSTDds_decompressLastBlock:
+    case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_raw :
+                assert(srcSize <= dctx->expected);
+                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+                assert(rSize == srcSize);
+                dctx->expected -= rSize;
+                break;
+            case bt_rle :
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_reserved :   /* should never happen */
+            default:
+                RETURN_ERROR(corruption_detected, "invalid block type");
+            }
+            FORWARD_IF_ERROR(rSize, "");
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
+            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+            dctx->decodedSize += rSize;
+            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+            dctx->previousDstEnd = (char*)dst + rSize;
+
+            /* Stay on the same stage until we are finished streaming the block. */
+            if (dctx->expected > 0) {
+                return rSize;
+            }
+
+            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
+                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
+                RETURN_ERROR_IF(
+                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && dctx->decodedSize != dctx->fParams.frameContentSize,
+                    corruption_detected, "");
+                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0;   /* ends here */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->stage = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+            }
+            return rSize;
+        }
+
+    case ZSTDds_checkChecksum:
+        assert(srcSize == 4);  /* guaranteed by dctx->expected */
+        {   U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
+            U32 const check32 = MEM_readLE32(src);
+            DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+            RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+
+    case ZSTDds_decodeSkippableHeader:
+        assert(src != NULL);
+        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+        dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
+        dctx->stage = ZSTDds_skipFrame;
+        return 0;
+
+    case ZSTDds_skipFrame:
+        dctx->expected = 0;
+        dctx->stage = ZSTDds_getFrameHeaderSize;
+        return 0;
+
+    default:
+        assert(0);   /* impossible */
+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+    dctx->prefixStart = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                  const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+    {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
+        size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+        /* in minimal huffman, we always use X1 variants */
+        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+                                                dictPtr, dictEnd - dictPtr,
+                                                workspace, workspaceSize);
+#else
+        size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+                                                dictPtr, dictEnd - dictPtr,
+                                                workspace, workspaceSize);
+#endif
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        unsigned offcodeMaxValue = MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->OFTable,
+                            offcodeNCount, offcodeMaxValue,
+                            OF_base, OF_bits,
+                            offcodeLog);
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->MLTable,
+                            matchlengthNCount, matchlengthMaxValue,
+                            ML_base, ML_bits,
+                            matchlengthLog);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->LLTable,
+                            litlengthNCount, litlengthMaxValue,
+                            LL_base, LL_bits,
+                            litlengthLog);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+                            dictionary_corrupted, "");
+            entropy->rep[i] = rep;
+    }   }
+
+    return dictPtr - (const BYTE*)dict;
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+    dctx->litEntropy = dctx->fseEntropy = 1;
+
+    /* reference dictionary content */
+    return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+    assert(dctx != NULL);
+    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->decodedSize = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->prefixStart = NULL;
+    dctx->virtualStart = NULL;
+    dctx->dictEnd = NULL;
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    dctx->bType = bt_reserved;
+    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
+    return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (dict && dictSize)
+        RETURN_ERROR_IF(
+            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+            dictionary_corrupted, "");
+    return 0;
+}
+
+
+/* ======   ZSTD_DDict   ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+    assert(dctx != NULL);
+    if (ddict) {
+        const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
+        size_t const dictSize = ZSTD_DDict_dictSize(ddict);
+        const void* const dictEnd = dictStart + dictSize;
+        dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
+        DEBUGLOG(4, "DDict is %s",
+                    dctx->ddictIsCold ? "~cold~" : "hot!");
+    }
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
+        ZSTD_copyDDictParameters(dctx, ddict);
+    }
+    return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompress frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict)
+{
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
+}
+
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createDStream");
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds)
+{
+    return ZSTD_freeDCtx(zds);
+}
+
+
+/* ***  Initialization  *** */
+
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
+                                   const void* dict, size_t dictSize,
+                                         ZSTD_dictLoadMethod_e dictLoadMethod,
+                                         ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0) {
+        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+        dctx->ddict = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds)
+{
+    DEBUGLOG(4, "ZSTD_initDStream");
+    return ZSTD_initDStream_usingDDict(zds, NULL);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+{
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (ddict) {
+        dctx->ddict = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+    size_t const min = (size_t)1 << bounds.lowerBound;
+    size_t const max = (size_t)1 << bounds.upperBound;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+    dctx->maxWindowSize = maxWindowSize;
+    return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
+{
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+            return bounds;
+        case ZSTD_d_format:
+            bounds.lowerBound = (int)ZSTD_f_zstd1;
+            bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
+            ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+            return bounds;
+        case ZSTD_d_stableOutBuffer:
+            bounds.lowerBound = (int)ZSTD_obm_buffered;
+            bounds.upperBound = (int)ZSTD_obm_stable;
+            return bounds;
+        default:;
+    }
+    bounds.error = ERROR(parameter_unsupported);
+    return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+#define CHECK_DBOUNDS(p,v) {                \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+            dctx->maxWindowSize = ((size_t)1) << value;
+            return 0;
+        case ZSTD_d_format:
+            CHECK_DBOUNDS(ZSTD_d_format, value);
+            dctx->format = (ZSTD_format_e)value;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+            dctx->outBufferMode = (ZSTD_outBufferMode_e)value;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        dctx->streamStage = zdss_init;
+        dctx->noForwardProgress = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+        ZSTD_clearDict(dctx);
+        dctx->format = ZSTD_f_zstd1;
+        dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    }
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+{
+    return ZSTD_sizeof_DCtx(dctx);
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+    size_t const minRBSize = (size_t) neededSize;
+    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+                    frameParameter_windowTooLarge, "");
+    return minRBSize;
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+    ZSTD_frameHeader zfh;
+    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+                    frameParameter_windowTooLarge, "");
+    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
+}
+
+
+/* *****   Decompression   ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else 
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_obm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+            ZSTD_DStream* zds, char** op, char* oend,
+            void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_obm_buffered) {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize = ZSTD_decompressContinue(zds,
+                zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame) {
+            zds->streamStage = zdss_read;
+        } else {
+            zds->outEnd = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    } else {
+        /* Write directly into the output buffer */
+        size_t const dstSize = isSkipFrame ? 0 : oend - *op;
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_obm_stable);
+    }
+    return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    const char* const src = (const char*)input->src;
+    const char* const istart = input->pos != 0 ? src + input->pos : src;
+    const char* const iend = input->size != 0 ? src + input->size : src;
+    const char* ip = istart;
+    char* const dst = (char*)output->dst;
+    char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+    char* const oend = output->size != 0 ? dst + output->size : dst;
+    char* op = ostart;
+    U32 someMoreWork = 1;
+
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    RETURN_ERROR_IF(
+        input->pos > input->size,
+        srcSize_wrong,
+        "forbidden. in: pos: %u   vs size: %u",
+        (U32)input->pos, (U32)input->size);
+    RETURN_ERROR_IF(
+        output->pos > output->size,
+        dstSize_tooSmall,
+        "forbidden. out: pos: %u   vs size: %u",
+        (U32)output->pos, (U32)output->size);
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+    while (someMoreWork) {
+        switch(zds->streamStage)
+        {
+        case zdss_init :
+            DEBUGLOG(5, "stage zdss_init => transparent reset ");
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+            zds->legacyVersion = 0;
+            zds->hostageByte = 0;
+            zds->expectedOutBuffer = *output;
+            /* fall-through */
+
+        case zdss_loadHeader :
+            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+            if (zds->legacyVersion) {
+                RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                    "legacy support is incompatible with static dctx");
+                {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
+                    if (hint==0) zds->streamStage = zdss_init;
+                    return hint;
+            }   }
+#endif
+            {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                DEBUGLOG(5, "header size : %u", (U32)hSize);
+                if (ZSTD_isError(hSize)) {
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
+                    if (legacyVersion) {
+                        ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
+                        const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
+                        size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
+                        DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
+                        RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                            "legacy support is incompatible with static dctx");
+                        FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
+                                    zds->previousLegacyVersion, legacyVersion,
+                                    dict, dictSize), "");
+                        zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
+                        {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
+                            if (hint==0) zds->streamStage = zdss_init;   /* or stay in stage zdss_loadHeader */
+                            return hint;
+                    }   }
+#endif
+                    return hSize;   /* error */
+                }
+                if (hSize != 0) {   /* need more input */
+                    size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
+                    size_t const remainingInput = (size_t)(iend-ip);
+                    assert(iend >= ip);
+                    if (toLoad > remainingInput) {   /* not enough input to load full header */
+                        if (remainingInput > 0) {
+                            memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            zds->lhSize += remainingInput;
+                        }
+                        input->pos = input->size;
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    assert(ip != NULL);
+                    memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
+                if (cSize <= (size_t)(iend-istart)) {
+                    /* shortcut : using single-pass mode */
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+            }   }
+
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_obm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+                        (U32)(zds->fParams.windowSize >>10),
+                        (U32)(zds->maxWindowSize >> 10) );
+            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge, "");
+
+            /* Adapt buffer sizes to frame header instructions */
+            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered
+                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+                    
+                    if (tooSmall || tooLarge) {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u",
+                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u",
+                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+                        if (zds->staticSize) {  /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                            RETURN_ERROR_IF(
+                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                memory_allocation, "");
+                        } else {
+                            ZSTD_free(zds->inBuff, zds->customMem);
+                            zds->inBuffSize = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize = neededInBuffSize;
+                        zds->outBuff = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+            }   }   }
+            zds->streamStage = zdss_read;
+            /* fall-through */
+
+        case zdss_read:
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
+                if (neededInSize==0) {  /* end of frame */
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    ip += neededInSize;
+                    /* Function modifies the stage so we must break */
+                    break;
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            /* fall-through */
+
+        case zdss_load:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                size_t const toLoad = neededInSize - zds->inPos;
+                int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                size_t loadedSize;
+                /* At this point we shouldn't be decompressing a block that we can stream. */
+                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
+                if (isSkipFrame) {
+                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
+                } else {
+                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+                                    corruption_detected,
+                                    "should never happen");
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
+                }
+                ip += loadedSize;
+                zds->inPos += loadedSize;
+                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                zds->inPos = 0;   /* input is consumed */
+                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+                /* Function modifies the stage so we must break */
+                break;
+            }
+        case zdss_flush:
+            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
+                size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
+                op += flushedSize;
+                zds->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {  /* flush completed */
+                    zds->streamStage = zdss_read;
+                    if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+                        DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                                (int)(zds->outBuffSize - zds->outStart),
+                                (U32)zds->fParams.blockSizeMax);
+                        zds->outStart = zds->outEnd = 0;
+                    }
+                    break;
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
+        default:
+            assert(0);    /* impossible */
+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }   }
+
+    /* result */
+    input->pos = (size_t)(ip - (const char*)(input->src));
+    output->pos = (size_t)(op - (char*)(output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
+    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
+        zds->noForwardProgress ++;
+        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
+            assert(0);
+        }
+    } else {
+        zds->noForwardProgress = 0;
+    }
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+        if (!nextSrcSizeHint) {   /* frame fully decoded */
+            if (zds->outEnd == zds->outStart) {  /* output fully flushed */
+                if (zds->hostageByte) {
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
+                    input->pos++;  /* release hostage */
+                }   /* zds->hostageByte */
+                return 0;
+            }  /* zds->outEnd == zds->outStart */
+            if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+                input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
+                zds->hostageByte=1;
+            }
+            return 1;
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
+        assert(zds->inPos <= nextSrcSizeHint);
+        nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+/**** ended inlining decompress/zstd_decompress.c ****/
+/**** start inlining decompress/zstd_decompress_block.c ****/
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <string.h>      /* memcpy, memmove, memset */
+/**** skipping file: ../common/compiler.h ****/
+/**** skipping file: ../common/cpu.h ****/
+/**** skipping file: ../common/mem.h ****/
+#define FSE_STATIC_LINKING_ONLY
+/**** skipping file: ../common/fse.h ****/
+#define HUF_STATIC_LINKING_ONLY
+/**** skipping file: ../common/huf.h ****/
+/**** skipping file: ../common/zstd_internal.h ****/
+/**** skipping file: zstd_decompress_internal.h ****/
+/**** skipping file: zstd_ddict.h ****/
+/**** skipping file: zstd_decompress_block.h ****/
+
+/*_*******************************************************
+*  Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ *   Block decoding
+ ***************************************************************/
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
+{
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 3;
+        bpPtr->lastBlock = cBlockHeader & 1;
+        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle) return 1;
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+        return cSize;
+    }
+}
+
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize);
+/*! ZSTD_decodeLiteralsBlock() :
+ * @return : nb of bytes read from src (< srcSize )
+ *  note : symbol not declared but exposed for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+    {   const BYTE* const istart = (const BYTE*) src;
+        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+        switch(litEncType)
+        {
+        case set_repeat:
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
+            /* fall-through */
+
+        case set_compressed:
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
+            {   size_t lhSize, litSize, litCSize;
+                U32 singleStream=0;
+                U32 const lhlCode = (istart[0] >> 2) & 3;
+                U32 const lhc = MEM_readLE32(istart);
+                size_t hufSuccess;
+                switch(lhlCode)
+                {
+                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    singleStream = !lhlCode;
+                    lhSize = 3;
+                    litSize  = (lhc >> 4) & 0x3FF;
+                    litCSize = (lhc >> 14) & 0x3FF;
+                    break;
+                case 2:
+                    /* 2 - 2 - 14 - 14 */
+                    lhSize = 4;
+                    litSize  = (lhc >> 4) & 0x3FFF;
+                    litCSize = lhc >> 18;
+                    break;
+                case 3:
+                    /* 2 - 2 - 18 - 18 */
+                    lhSize = 5;
+                    litSize  = (lhc >> 4) & 0x3FFFF;
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+
+                /* prefetch huffman table if cold */
+                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
+                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+                }
+
+                if (litEncType==set_repeat) {
+                    if (singleStream) {
+                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    } else {
+                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    }
+                } else {
+                    if (singleStream) {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+                        hufSuccess = HUF_decompress1X_DCtx_wksp(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace));
+#else
+                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+#endif
+                    } else {
+                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+                    }
+                }
+
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                dctx->litEntropy = 1;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return litCSize + lhSize;
+            }
+
+        case set_basic:
+            {   size_t litSize, lhSize;
+                U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    break;
+                }
+
+                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+                    memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    dctx->litPtr = dctx->litBuffer;
+                    dctx->litSize = litSize;
+                    memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                    return lhSize+litSize;
+                }
+                /* direct reference into compressed stream */
+                dctx->litPtr = istart+lhSize;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+
+        case set_rle:
+            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t litSize, lhSize;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                return lhSize+1;
+            }
+        default:
+            RETURN_ERROR(corruption_detected, "impossible");
+        }
+    }
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+     {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+     /* nextState, nbAddBits, nbBits, baseVal */
+     {  0,  0,  4,    0},  { 16,  0,  4,    0},
+     { 32,  0,  5,    1},  {  0,  0,  5,    3},
+     {  0,  0,  5,    4},  {  0,  0,  5,    6},
+     {  0,  0,  5,    7},  {  0,  0,  5,    9},
+     {  0,  0,  5,   10},  {  0,  0,  5,   12},
+     {  0,  0,  6,   14},  {  0,  1,  5,   16},
+     {  0,  1,  5,   20},  {  0,  1,  5,   22},
+     {  0,  2,  5,   28},  {  0,  3,  5,   32},
+     {  0,  4,  5,   48},  { 32,  6,  5,   64},
+     {  0,  7,  5,  128},  {  0,  8,  6,  256},
+     {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
+     { 32,  0,  4,    0},  {  0,  0,  4,    1},
+     {  0,  0,  5,    2},  { 32,  0,  5,    4},
+     {  0,  0,  5,    5},  { 32,  0,  5,    7},
+     {  0,  0,  5,    8},  { 32,  0,  5,   10},
+     {  0,  0,  5,   11},  {  0,  0,  6,   13},
+     { 32,  1,  5,   16},  {  0,  1,  5,   18},
+     { 32,  1,  5,   22},  {  0,  2,  5,   24},
+     { 32,  3,  5,   32},  {  0,  3,  5,   40},
+     {  0,  6,  4,   64},  { 16,  6,  4,   64},
+     { 32,  7,  5,  128},  {  0,  9,  6,  512},
+     {  0, 11,  6, 2048},  { 48,  0,  4,    0},
+     { 16,  0,  4,    1},  { 32,  0,  5,    2},
+     { 32,  0,  5,    3},  { 32,  0,  5,    5},
+     { 32,  0,  5,    6},  { 32,  0,  5,    8},
+     { 32,  0,  5,    9},  { 32,  0,  5,   11},
+     { 32,  0,  5,   12},  {  0,  0,  6,   15},
+     { 32,  1,  5,   18},  { 32,  1,  5,   20},
+     { 32,  2,  5,   24},  { 32,  2,  5,   28},
+     { 32,  3,  5,   40},  { 32,  4,  5,   48},
+     {  0, 16,  6,65536},  {  0, 15,  6,32768},
+     {  0, 14,  6,16384},  {  0, 13,  6, 8192},
+};   /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  5,    0},     {  0,  6,  4,   61},
+    {  0,  9,  5,  509},     {  0, 15,  5,32765},
+    {  0, 21,  5,2097149},   {  0,  3,  5,    5},
+    {  0,  7,  4,  125},     {  0, 12,  5, 4093},
+    {  0, 18,  5,262141},    {  0, 23,  5,8388605},
+    {  0,  5,  5,   29},     {  0,  8,  4,  253},
+    {  0, 14,  5,16381},     {  0, 20,  5,1048573},
+    {  0,  2,  5,    1},     { 16,  7,  4,  125},
+    {  0, 11,  5, 2045},     {  0, 17,  5,131069},
+    {  0, 22,  5,4194301},   {  0,  4,  5,   13},
+    { 16,  8,  4,  253},     {  0, 13,  5, 8189},
+    {  0, 19,  5,524285},    {  0,  1,  5,    1},
+    { 16,  6,  4,   61},     {  0, 10,  5, 1021},
+    {  0, 16,  5,65533},     {  0, 28,  5,268435453},
+    {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
+    {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
+};   /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  6,    3},  {  0,  0,  4,    4},
+    { 32,  0,  5,    5},  {  0,  0,  5,    6},
+    {  0,  0,  5,    8},  {  0,  0,  5,    9},
+    {  0,  0,  5,   11},  {  0,  0,  6,   13},
+    {  0,  0,  6,   16},  {  0,  0,  6,   19},
+    {  0,  0,  6,   22},  {  0,  0,  6,   25},
+    {  0,  0,  6,   28},  {  0,  0,  6,   31},
+    {  0,  0,  6,   34},  {  0,  1,  6,   37},
+    {  0,  1,  6,   41},  {  0,  2,  6,   47},
+    {  0,  3,  6,   59},  {  0,  4,  6,   83},
+    {  0,  7,  6,  131},  {  0,  9,  6,  515},
+    { 16,  0,  4,    4},  {  0,  0,  4,    5},
+    { 32,  0,  5,    6},  {  0,  0,  5,    7},
+    { 32,  0,  5,    9},  {  0,  0,  5,   10},
+    {  0,  0,  6,   12},  {  0,  0,  6,   15},
+    {  0,  0,  6,   18},  {  0,  0,  6,   21},
+    {  0,  0,  6,   24},  {  0,  0,  6,   27},
+    {  0,  0,  6,   30},  {  0,  0,  6,   33},
+    {  0,  1,  6,   35},  {  0,  1,  6,   39},
+    {  0,  2,  6,   43},  {  0,  3,  6,   51},
+    {  0,  4,  6,   67},  {  0,  5,  6,   99},
+    {  0,  8,  6,  259},  { 32,  0,  4,    4},
+    { 48,  0,  4,    4},  { 16,  0,  4,    5},
+    { 32,  0,  5,    7},  { 32,  0,  5,    8},
+    { 32,  0,  5,   10},  { 32,  0,  5,   11},
+    {  0,  0,  6,   14},  {  0,  0,  6,   17},
+    {  0,  0,  6,   20},  {  0,  0,  6,   23},
+    {  0,  0,  6,   26},  {  0,  0,  6,   29},
+    {  0,  0,  6,   32},  {  0, 16,  6,65539},
+    {  0, 15,  6,32771},  {  0, 14,  6,16387},
+    {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
+    {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
+};   /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+    void* ptr = dt;
+    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+    ZSTD_seqSymbol* const cell = dt + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->nbBits = 0;
+    cell->nextState = 0;
+    assert(nbAddBits < 255);
+    cell->nbAdditionalBits = (BYTE)nbAddBits;
+    cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+void
+ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog)
+{
+    ZSTD_seqSymbol* const tableDecode = dt+1;
+    U16 symbolNext[MaxSeq+1];
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    assert(maxSymbolValue <= MaxSeq);
+    assert(tableLog <= MaxFSELog);
+
+    /* Init, lay down lowprob symbols */
+    {   ZSTD_seqSymbol_header DTableH;
+        DTableH.tableLog = tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].baseValue = s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
+        }   }   }
+        memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    {   U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].baseValue = s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            U32 const symbol = tableDecode[u].baseValue;
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+            assert(nbAdditionalBits[symbol] < 255);
+            tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+            tableDecode[u].baseValue = baseValue[symbol];
+    }   }
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ *           or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
+                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const U32* baseValue, const U32* nbAdditionalBits,
+                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
+                                 int ddictIsCold, int nbSeq)
+{
+    switch(type)
+    {
+    case set_rle :
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
+        {   U32 const symbol = *(const BYTE*)src;
+            U32 const baseline = baseValue[symbol];
+            U32 const nbBits = nbAdditionalBits[symbol];
+            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+        }
+        *DTablePtr = DTableSpace;
+        return 1;
+    case set_basic :
+        *DTablePtr = defaultTable;
+        return 0;
+    case set_repeat:
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+        /* prefetch FSE table if used */
+        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
+            const void* const pStart = *DTablePtr;
+            size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+            PREFETCH_AREA(pStart, pSize);
+        }
+        return 0;
+    case set_compressed :
+        {   unsigned tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
+            *DTablePtr = DTableSpace;
+            return headerSize;
+        }
+    default :
+        assert(0);
+        RETURN_ERROR(GENERIC, "impossible");
+    }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+    int nbSeq;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+    /* check */
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+    /* SeqHead */
+    nbSeq = *ip++;
+    if (!nbSeq) {
+        *nbSeqPtr=0;
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+        return 1;
+    }
+    if (nbSeq > 0x7F) {
+        if (nbSeq == 0xFF) {
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+        } else {
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+    }
+    *nbSeqPtr = nbSeq;
+
+    /* FSE table descriptors */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+        ip++;
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
+                                                      LLtype, MaxLL, LLFSELog,
+                                                      ip, iend-ip,
+                                                      LL_base, LL_bits,
+                                                      LL_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += llhSize;
+        }
+
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
+                                                      OFtype, MaxOff, OffFSELog,
+                                                      ip, iend-ip,
+                                                      OF_base, OF_bits,
+                                                      OF_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += ofhSize;
+        }
+
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
+                                                      MLtype, MaxML, MLFSELog,
+                                                      ip, iend-ip,
+                                                      ML_base, ML_bits,
+                                                      ML_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += mlhSize;
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+    const BYTE* match;
+} seq_t;
+
+typedef struct {
+    size_t state;
+    const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    ZSTD_fseState stateLL;
+    ZSTD_fseState stateOffb;
+    ZSTD_fseState stateML;
+    size_t prevOffset[ZSTD_REP_NUM];
+    const BYTE* prefixStart;
+    const BYTE* dictEnd;
+    size_t pos;
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+        match = dictEnd - (prefixStart-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+            iLitEnd > litLimit ||
+            oMatchEnd > oend_w ||
+            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16)) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+    const void* ptr = dt;
+    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+                (U32)DStatePtr->state, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+    ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+{
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
+        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
+        : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+{
+    seq_t seq;
+    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
+    ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
+    ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
+    U32 const llBase = llDInfo.baseValue;
+    U32 const mlBase = mlDInfo.baseValue;
+    U32 const ofBase = ofDInfo.baseValue;
+    BYTE const llBits = llDInfo.nbAdditionalBits;
+    BYTE const mlBits = mlDInfo.nbAdditionalBits;
+    BYTE const ofBits = ofDInfo.nbAdditionalBits;
+    BYTE const totalBits = llBits+mlBits+ofBits;
+
+    /* sequence */
+    {   size_t offset;
+        if (ofBits > 1) {
+            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
+            } else {
+                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        } else {
+            U32 const ll0 = (llBase == 0);
+            if (LIKELY((ofBits == 0))) {
+                if (LIKELY(!ll0))
+                    offset = seqState->prevOffset[0];
+                else {
+                    offset = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset;
+                }
+            } else {
+                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset = temp;
+        }   }   }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = mlBase;
+    if (mlBits > 0)
+        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+    seq.litLength = llBase;
+    if (llBits > 0)
+        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    if (prefetch == ZSTD_p_prefetch) {
+        size_t const pos = seqState->pos + seq.litLength;
+        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
+        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
+        seqState->pos = pos + seq.matchLength;
+    }
+
+    /* ANS state update
+     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
+     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
+     * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
+     * better option, so it is the default for other compilers. But, if you
+     * measure that it is worse, please put up a pull request.
+     */
+    {
+#if defined(__GNUC__) && !defined(__clang__)
+        const int kUseUpdateFseState = 1;
+#else
+        const int kUseUpdateFseState = 0;
+#endif
+        if (kUseUpdateFseState) {
+            ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+            ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+        } else {
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
+        }
+    }
+
+    return seq;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL) return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+        ZSTD_DCtx const* dctx,
+        BYTE const* op, BYTE const* oend,
+        seq_t const seq,
+        BYTE const* prefixStart, BYTE const* virtualStart)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    size_t const sequenceSize = seq.litLength + seq.matchLength;
+    BYTE const* const oLitEnd = op + seq.litLength;
+    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+    assert(op <= oend);
+    assert((size_t)(oend - op) >= sequenceSize);
+    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+        /* Offset must be within the dictionary. */
+        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+        assert(seq.offset <= windowSize + dictSize);
+    } else {
+        /* Offset must be within our window. */
+        assert(seq.offset <= windowSize);
+    }
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        size_t error = 0;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__GNUC__) && defined(__x86_64__)
+        /* Align the decompression loop to 32 + 16 bytes.
+         *
+         * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+         * speed swings based on the alignment of the decompression loop. This
+         * performance swing is caused by parts of the decompression loop falling
+         * out of the DSB. The entire decompression loop should fit in the DSB,
+         * when it can't we get much worse performance. You can measure if you've
+         * hit the good case or the bad case with this perf command for some
+         * compressed file test.zst:
+         *
+         *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+         *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+         *
+         * If you see most cycles served out of the MITE you've hit the bad case.
+         * If you see most cycles served out of the DSB you've hit the good case.
+         * If it is pretty even then you may be in an okay case.
+         *
+         * I've been able to reproduce this issue on the following CPUs:
+         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+         *               Use Instruments->Counters to get DSB/MITE cycles.
+         *               I never got performance swings, but I was able to
+         *               go from the good case of mostly DSB to half of the
+         *               cycles served from MITE.
+         *   - Coffeelake: Intel i9-9900k
+         *
+         * I haven't been able to reproduce the instability or DSB misses on any
+         * of the following CPUS:
+         *   - Haswell
+         *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+         *   - Skylake
+         *
+         * If you are seeing performance stability this script can help test.
+         * It tests on 4 commits in zstd where I saw performance change.
+         *
+         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+         */
+        __asm__(".p2align 5");
+        __asm__("nop");
+        __asm__(".p2align 4");
+#endif
+        for ( ; ; ) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+            BIT_reloadDStream(&(seqState.DStream));
+            /* gcc and clang both don't like early returns in this loop.
+             * gcc doesn't like early breaks either.
+             * Instead save an error and report it at the end.
+             * When there is an error, don't increment op, so we don't
+             * overwrite.
+             */
+            if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
+            else op += oneSeqSize;
+            if (UNLIKELY(!--nbSeq)) break;
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+        if (ZSTD_isError(error)) return error;
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 4
+#define STORED_SEQS_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS 4
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        dctx->fseEntropy = 1;
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        seqState.prefixStart = prefixStart;
+        seqState.pos = (size_t)(op-prefixStart);
+        seqState.dictEnd = dictEnd;
+        assert(dst != NULL);
+        assert(iend >= ip);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+        }
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            sequences[seqNb & STORED_SEQS_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if DYNAMIC_BMI2
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+                            const ZSTD_longOffset_e isLongOffset,
+                            const int frame);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t
+ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                   const void* seqStart, size_t seqSize, int nbSeq,
+                   const ZSTD_longOffset_e isLongOffset,
+                   const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+                             const ZSTD_longOffset_e isLongOffset,
+                             const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ *           compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+    const void* ptr = offTable;
+    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+    const ZSTD_seqSymbol* table = offTable + 1;
+    U32 const max = 1 << tableLog;
+    U32 u, total = 0;
+    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+    assert(max <= (1 << OffFSELog));  /* max not too large */
+    for (u=0; u<max; u++) {
+        if (table[u].nbAdditionalBits > 22) total += 1;
+    }
+
+    assert(tableLog <= OffFSELog);
+    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
+
+    return total;
+}
+#endif
+
+size_t
+ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize, const int frame)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    /* isLongOffset must be true if there are long offsets.
+     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+     * We don't expect that to be the case in 64-bit mode.
+     * In block mode, window size is not known, so we have to be conservative.
+     * (note: but it could be evaluated from current-lowLimit)
+     */
+    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+
+    /* Build Decoding Tables */
+    {
+        /* These macros control at build-time which decompressor implementation
+         * we use. If neither is defined, we do some inspection and dispatch at
+         * runtime.
+         */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        int usePrefetchDecoder = dctx->ddictIsCold;
+#endif
+        int nbSeq;
+        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        srcSize -= seqHSize;
+
+        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if ( !usePrefetchDecoder
+          && (!frame || (dctx->fParams.windowSize > (1<<24)))
+          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
+            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+            usePrefetchDecoder = (shareLongOffsets >= minShare);
+        }
+#endif
+
+        dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if (usePrefetchDecoder)
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+        /* else */
+        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+    }
+}
+
+
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+        dctx->prefixStart = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTD_checkContinuity(dctx, dst);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
+/**** ended inlining decompress/zstd_decompress_block.c ****/

diff --git a/zfs/module/zstd/lib/zstd.h b/zfs/module/zstd/lib/zstd.h
new file mode 100644
index 0000000..b6772f8
--- /dev/null
+++ b/zfs/module/zstd/lib/zstd.h

@@ -0,0 +1,2115 @@
+/*
+ * BSD 3-Clause Clear License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. All rights reserved.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
+#include <limits.h>   /* INT_MAX */
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDLIB_API ZSTDLIB_VISIBILITY
+#endif
+
+
+/*******************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    4
+#define ZSTD_VERSION_RELEASE  5
+
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+
+/***************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() :
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+
+
+/***************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
+
+
+/***************************************
+*  Advanced compression API
+***************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value. */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * They return an error otherwise. */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression work is performed in parallel, within worker threads.
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/***************************************
+*  Advanced decompression API
+***************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/****************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /**< start of input buffer */
+  size_t size;        /**< size of input buffer */
+  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /**< start of output buffer */
+  size_t size;        /**< size of output buffer */
+  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() :
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Advanced parameters and dictionary compression can only be used through the
+ * new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/**************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/***********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict(). */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict() */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/********************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/*******************************************************************************
+ * Advanced dictionary and prefix API
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() :
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() :
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/****************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int matchPos; /* Match pos in dst */
+    /* If seqDef.offset > 3, then this is seqDef.offset - 3
+     * If seqDef.offset < 3, then this is the corresponding repeat offset
+     * But if seqDef.offset < 3 and litLength == 0, this is the
+     *   repeat offset before the corresponding repeat offset
+     * And if seqDef.offset == 3 and litLength == 0, this is the
+     *   most recent repeat offset - 1
+     */
+    unsigned int offset;
+    unsigned int litLength; /* Literal length */
+    unsigned int matchLength; /* Match length */
+    /* 0 when seq not rep and seqDef.offset otherwise
+     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+     */
+    unsigned int rep;
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+
+/***************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occured: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+/*! ZSTD_getSequences() :
+ * Extract sequences from the sequence store
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ * @return : number of sequences extracted
+ */
+ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+    size_t outSeqsSize, const void* src, size_t srcSize);
+
+
+/***************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_customMem customMem);
+
+
+
+/***************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
+ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now REDUNDANT.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning in some future version */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_t enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() :
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/***************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/*! ZSTD_DCtx_setFormat() :
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/********************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+/**! ZSTD_initCStream_srcSize() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/**! ZSTD_initCStream_usingDict() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/**! ZSTD_initCStream_advanced() :
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/**! ZSTD_initCStream_usingCDict() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/**! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+/**
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/**
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/**
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/*********************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/**
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/**       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif

diff --git a/zfs/module/zstd/lib/zstd_errors.h b/zfs/module/zstd/lib/zstd_errors.h
new file mode 100644
index 0000000..998398e
--- /dev/null
+++ b/zfs/module/zstd/lib/zstd_errors.h

@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*===== dependency =====*/
+#include <stddef.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDERRORLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define ZSTDERRORLIB_VISIBILITY
+#  endif
+#endif
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+#endif
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_ERRORS_H_398273423 */

diff --git a/zfs/module/zstd/zfs_zstd.c b/zfs/module/zstd/zfs_zstd.c
new file mode 100644
index 0000000..2c69871
--- /dev/null
+++ b/zfs/module/zstd/zfs_zstd.c

@@ -0,0 +1,794 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016-2018, Klara Inc.
+ * Copyright (c) 2016-2018, Allan Jude
+ * Copyright (c) 2018-2020, Sebastian Gottschall
+ * Copyright (c) 2019-2020, Michael Niewöhner
+ * Copyright (c) 2020, The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ *     under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/zfs_context.h>
+#include <sys/zio_compress.h>
+#include <sys/spa.h>
+#include <sys/zstd/zstd.h>
+
+#define	ZSTD_STATIC_LINKING_ONLY
+#include "lib/zstd.h"
+#include "lib/zstd_errors.h"
+
+kstat_t *zstd_ksp = NULL;
+
+typedef struct zstd_stats {
+	kstat_named_t	zstd_stat_alloc_fail;
+	kstat_named_t	zstd_stat_alloc_fallback;
+	kstat_named_t	zstd_stat_com_alloc_fail;
+	kstat_named_t	zstd_stat_dec_alloc_fail;
+	kstat_named_t	zstd_stat_com_inval;
+	kstat_named_t	zstd_stat_dec_inval;
+	kstat_named_t	zstd_stat_dec_header_inval;
+	kstat_named_t	zstd_stat_com_fail;
+	kstat_named_t	zstd_stat_dec_fail;
+	kstat_named_t	zstd_stat_buffers;
+	kstat_named_t	zstd_stat_size;
+} zstd_stats_t;
+
+static zstd_stats_t zstd_stats = {
+	{ "alloc_fail",			KSTAT_DATA_UINT64 },
+	{ "alloc_fallback",		KSTAT_DATA_UINT64 },
+	{ "compress_alloc_fail",	KSTAT_DATA_UINT64 },
+	{ "decompress_alloc_fail",	KSTAT_DATA_UINT64 },
+	{ "compress_level_invalid",	KSTAT_DATA_UINT64 },
+	{ "decompress_level_invalid",	KSTAT_DATA_UINT64 },
+	{ "decompress_header_invalid",	KSTAT_DATA_UINT64 },
+	{ "compress_failed",		KSTAT_DATA_UINT64 },
+	{ "decompress_failed",		KSTAT_DATA_UINT64 },
+	{ "buffers",			KSTAT_DATA_UINT64 },
+	{ "size",			KSTAT_DATA_UINT64 },
+};
+
+/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
+enum zstd_kmem_type {
+	ZSTD_KMEM_UNKNOWN = 0,
+	/* Allocation type using kmem_vmalloc */
+	ZSTD_KMEM_DEFAULT,
+	/* Pool based allocation using mempool_alloc */
+	ZSTD_KMEM_POOL,
+	/* Reserved fallback memory for decompression only */
+	ZSTD_KMEM_DCTX,
+	ZSTD_KMEM_COUNT,
+};
+
+/* Structure for pooled memory objects */
+struct zstd_pool {
+	void *mem;
+	size_t size;
+	kmutex_t barrier;
+	hrtime_t timeout;
+};
+
+/* Global structure for handling memory allocations */
+struct zstd_kmem {
+	enum zstd_kmem_type kmem_type;
+	size_t kmem_size;
+	struct zstd_pool *pool;
+};
+
+/* Fallback memory structure used for decompression only if memory runs out */
+struct zstd_fallback_mem {
+	size_t mem_size;
+	void *mem;
+	kmutex_t barrier;
+};
+
+struct zstd_levelmap {
+	int16_t zstd_level;
+	enum zio_zstd_levels level;
+};
+
+/*
+ * ZSTD memory handlers
+ *
+ * For decompression we use a different handler which also provides fallback
+ * memory allocation in case memory runs out.
+ *
+ * The ZSTD handlers were split up for the most simplified implementation.
+ */
+static void *zstd_alloc(void *opaque, size_t size);
+static void *zstd_dctx_alloc(void *opaque, size_t size);
+static void zstd_free(void *opaque, void *ptr);
+
+/* Compression memory handler */
+static const ZSTD_customMem zstd_malloc = {
+	zstd_alloc,
+	zstd_free,
+	NULL,
+};
+
+/* Decompression memory handler */
+static const ZSTD_customMem zstd_dctx_malloc = {
+	zstd_dctx_alloc,
+	zstd_free,
+	NULL,
+};
+
+/* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
+static struct zstd_levelmap zstd_levels[] = {
+	{ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
+	{ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
+	{ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
+	{ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
+	{ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
+	{ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
+	{ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
+	{ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
+	{ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
+	{ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
+	{ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
+	{ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
+	{ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
+	{ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
+	{ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
+	{ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
+	{ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
+	{ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
+	{ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
+	{-1, ZIO_ZSTD_LEVEL_FAST_1},
+	{-2, ZIO_ZSTD_LEVEL_FAST_2},
+	{-3, ZIO_ZSTD_LEVEL_FAST_3},
+	{-4, ZIO_ZSTD_LEVEL_FAST_4},
+	{-5, ZIO_ZSTD_LEVEL_FAST_5},
+	{-6, ZIO_ZSTD_LEVEL_FAST_6},
+	{-7, ZIO_ZSTD_LEVEL_FAST_7},
+	{-8, ZIO_ZSTD_LEVEL_FAST_8},
+	{-9, ZIO_ZSTD_LEVEL_FAST_9},
+	{-10, ZIO_ZSTD_LEVEL_FAST_10},
+	{-20, ZIO_ZSTD_LEVEL_FAST_20},
+	{-30, ZIO_ZSTD_LEVEL_FAST_30},
+	{-40, ZIO_ZSTD_LEVEL_FAST_40},
+	{-50, ZIO_ZSTD_LEVEL_FAST_50},
+	{-60, ZIO_ZSTD_LEVEL_FAST_60},
+	{-70, ZIO_ZSTD_LEVEL_FAST_70},
+	{-80, ZIO_ZSTD_LEVEL_FAST_80},
+	{-90, ZIO_ZSTD_LEVEL_FAST_90},
+	{-100, ZIO_ZSTD_LEVEL_FAST_100},
+	{-500, ZIO_ZSTD_LEVEL_FAST_500},
+	{-1000, ZIO_ZSTD_LEVEL_FAST_1000},
+};
+
+/*
+ * This variable represents the maximum count of the pool based on the number
+ * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
+ */
+static int pool_count = 16;
+
+#define	ZSTD_POOL_MAX		pool_count
+#define	ZSTD_POOL_TIMEOUT	60 * 2
+
+static struct zstd_fallback_mem zstd_dctx_fallback;
+static struct zstd_pool *zstd_mempool_cctx;
+static struct zstd_pool *zstd_mempool_dctx;
+
+/*
+ * The library zstd code expects these if ADDRESS_SANITIZER gets defined,
+ * and while ASAN does this, KASAN defines that and does not. So to avoid
+ * changing the external code, we do this.
+ */
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define	ADDRESS_SANITIZER 1
+#endif
+#elif defined(__SANITIZE_ADDRESS__)
+#define	ADDRESS_SANITIZER 1
+#endif
+#if defined(_KERNEL) && defined(ADDRESS_SANITIZER)
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {};
+void __asan_poison_memory_region(void const volatile *addr, size_t size) {};
+#endif
+
+
+static void
+zstd_mempool_reap(struct zstd_pool *zstd_mempool)
+{
+	struct zstd_pool *pool;
+
+	if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) {
+		return;
+	}
+
+	/* free obsolete slots */
+	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+		pool = &zstd_mempool[i];
+		if (pool->mem && mutex_tryenter(&pool->barrier)) {
+			/* Free memory if unused object older than 2 minutes */
+			if (pool->mem && gethrestime_sec() > pool->timeout) {
+				vmem_free(pool->mem, pool->size);
+				ZSTDSTAT_SUB(zstd_stat_buffers, 1);
+				ZSTDSTAT_SUB(zstd_stat_size, pool->size);
+				pool->mem = NULL;
+				pool->size = 0;
+				pool->timeout = 0;
+			}
+			mutex_exit(&pool->barrier);
+		}
+	}
+}
+
+/*
+ * Try to get a cached allocated buffer from memory pool or allocate a new one
+ * if necessary. If a object is older than 2 minutes and does not fit the
+ * requested size, it will be released and a new cached entry will be allocated.
+ * If other pooled objects are detected without being used for 2 minutes, they
+ * will be released, too.
+ *
+ * The concept is that high frequency memory allocations of bigger objects are
+ * expensive. So if a lot of work is going on, allocations will be kept for a
+ * while and can be reused in that time frame.
+ *
+ * The scheduled release will be updated every time a object is reused.
+ */
+
+static void *
+zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
+{
+	struct zstd_pool *pool;
+	struct zstd_kmem *mem = NULL;
+
+	if (!zstd_mempool) {
+		return (NULL);
+	}
+
+	/* Seek for preallocated memory slot and free obsolete slots */
+	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+		pool = &zstd_mempool[i];
+		/*
+		 * This lock is simply a marker for a pool object being in use.
+		 * If it's already hold, it will be skipped.
+		 *
+		 * We need to create it before checking it to avoid race
+		 * conditions caused by running in a threaded context.
+		 *
+		 * The lock is later released by zstd_mempool_free.
+		 */
+		if (mutex_tryenter(&pool->barrier)) {
+			/*
+			 * Check if objects fits the size, if so we take it and
+			 * update the timestamp.
+			 */
+			if (pool->mem && size <= pool->size) {
+				pool->timeout = gethrestime_sec() +
+				    ZSTD_POOL_TIMEOUT;
+				mem = pool->mem;
+				return (mem);
+			}
+			mutex_exit(&pool->barrier);
+		}
+	}
+
+	/*
+	 * If no preallocated slot was found, try to fill in a new one.
+	 *
+	 * We run a similar algorithm twice here to avoid pool fragmentation.
+	 * The first one may generate holes in the list if objects get released.
+	 * We always make sure that these holes get filled instead of adding new
+	 * allocations constantly at the end.
+	 */
+	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+		pool = &zstd_mempool[i];
+		if (mutex_tryenter(&pool->barrier)) {
+			/* Object is free, try to allocate new one */
+			if (!pool->mem) {
+				mem = vmem_alloc(size, KM_SLEEP);
+				if (mem) {
+					ZSTDSTAT_ADD(zstd_stat_buffers, 1);
+					ZSTDSTAT_ADD(zstd_stat_size, size);
+					pool->mem = mem;
+					pool->size = size;
+					/* Keep track for later release */
+					mem->pool = pool;
+					mem->kmem_type = ZSTD_KMEM_POOL;
+					mem->kmem_size = size;
+				}
+			}
+
+			if (size <= pool->size) {
+				/* Update timestamp */
+				pool->timeout = gethrestime_sec() +
+				    ZSTD_POOL_TIMEOUT;
+
+				return (pool->mem);
+			}
+
+			mutex_exit(&pool->barrier);
+		}
+	}
+
+	/*
+	 * If the pool is full or the allocation failed, try lazy allocation
+	 * instead.
+	 */
+	if (!mem) {
+		mem = vmem_alloc(size, KM_NOSLEEP);
+		if (mem) {
+			mem->pool = NULL;
+			mem->kmem_type = ZSTD_KMEM_DEFAULT;
+			mem->kmem_size = size;
+		}
+	}
+
+	return (mem);
+}
+
+/* Mark object as released by releasing the barrier mutex */
+static void
+zstd_mempool_free(struct zstd_kmem *z)
+{
+	mutex_exit(&z->pool->barrier);
+}
+
+/* Convert ZFS internal enum to ZSTD level */
+static int
+zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
+{
+	if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
+		*zstd_level = zstd_levels[level - 1].zstd_level;
+		return (0);
+	}
+	if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
+	    level <= ZIO_ZSTD_LEVEL_FAST_1000) {
+		*zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
+		    + ZIO_ZSTD_LEVEL_19].zstd_level;
+		return (0);
+	}
+
+	/* Invalid/unknown zfs compression enum - this should never happen. */
+	return (1);
+}
+
+
+/* Compress block using zstd */
+size_t
+zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
+    int level)
+{
+	size_t c_len;
+	int16_t zstd_level;
+	zfs_zstdhdr_t *hdr;
+	ZSTD_CCtx *cctx;
+
+	hdr = (zfs_zstdhdr_t *)d_start;
+
+	/* Skip compression if the specified level is invalid */
+	if (zstd_enum_to_level(level, &zstd_level)) {
+		ZSTDSTAT_BUMP(zstd_stat_com_inval);
+		return (s_len);
+	}
+
+	ASSERT3U(d_len, >=, sizeof (*hdr));
+	ASSERT3U(d_len, <=, s_len);
+	ASSERT3U(zstd_level, !=, 0);
+
+	cctx = ZSTD_createCCtx_advanced(zstd_malloc);
+
+	/*
+	 * Out of kernel memory, gently fall through - this will disable
+	 * compression in zio_compress_data
+	 */
+	if (!cctx) {
+		ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
+		return (s_len);
+	}
+
+	/* Set the compression level */
+	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
+
+	/* Use the "magicless" zstd header which saves us 4 header bytes */
+	ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
+
+	/*
+	 * Disable redundant checksum calculation and content size storage since
+	 * this is already done by ZFS itself.
+	 */
+	ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
+	ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
+
+	c_len = ZSTD_compress2(cctx,
+	    hdr->data,
+	    d_len - sizeof (*hdr),
+	    s_start, s_len);
+
+	ZSTD_freeCCtx(cctx);
+
+	/* Error in the compression routine, disable compression. */
+	if (ZSTD_isError(c_len)) {
+		/*
+		 * If we are aborting the compression because the saves are
+		 * too small, that is not a failure. Everything else is a
+		 * failure, so increment the compression failure counter.
+		 */
+		if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
+			ZSTDSTAT_BUMP(zstd_stat_com_fail);
+		}
+		return (s_len);
+	}
+
+	/*
+	 * Encode the compressed buffer size at the start. We'll need this in
+	 * decompression to counter the effects of padding which might be added
+	 * to the compressed buffer and which, if unhandled, would confuse the
+	 * hell out of our decompression function.
+	 */
+	hdr->c_len = BE_32(c_len);
+
+	/*
+	 * Check version for overflow.
+	 * The limit of 24 bits must not be exceeded. This allows a maximum
+	 * version 1677.72.15 which we don't expect to be ever reached.
+	 */
+	ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
+
+	/*
+	 * Encode the compression level as well. We may need to know the
+	 * original compression level if compressed_arc is disabled, to match
+	 * the compression settings to write this block to the L2ARC.
+	 *
+	 * Encode the actual level, so if the enum changes in the future, we
+	 * will be compatible.
+	 *
+	 * The upper 24 bits store the ZSTD version to be able to provide
+	 * future compatibility, since new versions might enhance the
+	 * compression algorithm in a way, where the compressed data will
+	 * change.
+	 *
+	 * As soon as such incompatibility occurs, handling code needs to be
+	 * added, differentiating between the versions.
+	 */
+	zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
+	zfs_set_hdrlevel(hdr, level);
+	hdr->raw_version_level = BE_32(hdr->raw_version_level);
+
+	return (c_len + sizeof (*hdr));
+}
+
+/* Decompress block using zstd and return its stored level */
+int
+zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
+    size_t d_len, uint8_t *level)
+{
+	ZSTD_DCtx *dctx;
+	size_t result;
+	int16_t zstd_level;
+	uint32_t c_len;
+	const zfs_zstdhdr_t *hdr;
+	zfs_zstdhdr_t hdr_copy;
+
+	hdr = (const zfs_zstdhdr_t *)s_start;
+	c_len = BE_32(hdr->c_len);
+
+	/*
+	 * Make a copy instead of directly converting the header, since we must
+	 * not modify the original data that may be used again later.
+	 */
+	hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
+	uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
+
+	/*
+	 * NOTE: We ignore the ZSTD version for now. As soon as any
+	 * incompatibility occurs, it has to be handled accordingly.
+	 * The version can be accessed via `hdr_copy.version`.
+	 */
+
+	/*
+	 * Convert and check the level
+	 * An invalid level is a strong indicator for data corruption! In such
+	 * case return an error so the upper layers can try to fix it.
+	 */
+	if (zstd_enum_to_level(curlevel, &zstd_level)) {
+		ZSTDSTAT_BUMP(zstd_stat_dec_inval);
+		return (1);
+	}
+
+	ASSERT3U(d_len, >=, s_len);
+	ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
+
+	/* Invalid compressed buffer size encoded at start */
+	if (c_len + sizeof (*hdr) > s_len) {
+		ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
+		return (1);
+	}
+
+	dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
+	if (!dctx) {
+		ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
+		return (1);
+	}
+
+	/* Set header type to "magicless" */
+	ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
+
+	/* Decompress the data and release the context */
+	result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
+	ZSTD_freeDCtx(dctx);
+
+	/*
+	 * Returns 0 on success (decompression function returned non-negative)
+	 * and non-zero on failure (decompression function returned negative.
+	 */
+	if (ZSTD_isError(result)) {
+		ZSTDSTAT_BUMP(zstd_stat_dec_fail);
+		return (1);
+	}
+
+	if (level) {
+		*level = curlevel;
+	}
+
+	return (0);
+}
+
+/* Decompress datablock using zstd */
+int
+zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
+    int level __maybe_unused)
+{
+
+	return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len,
+	    NULL));
+}
+
+/* Allocator for zstd compression context using mempool_allocator */
+static void *
+zstd_alloc(void *opaque __maybe_unused, size_t size)
+{
+	size_t nbytes = sizeof (struct zstd_kmem) + size;
+	struct zstd_kmem *z = NULL;
+
+	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
+
+	if (!z) {
+		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
+		return (NULL);
+	}
+
+	return ((void*)z + (sizeof (struct zstd_kmem)));
+}
+
+/*
+ * Allocator for zstd decompression context using mempool_allocator with
+ * fallback to reserved memory if allocation fails
+ */
+static void *
+zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
+{
+	size_t nbytes = sizeof (struct zstd_kmem) + size;
+	struct zstd_kmem *z = NULL;
+	enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
+
+	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
+	if (!z) {
+		/* Try harder, decompression shall not fail */
+		z = vmem_alloc(nbytes, KM_SLEEP);
+		if (z) {
+			z->pool = NULL;
+		}
+		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
+	} else {
+		return ((void*)z + (sizeof (struct zstd_kmem)));
+	}
+
+	/* Fallback if everything fails */
+	if (!z) {
+		/*
+		 * Barrier since we only can handle it in a single thread. All
+		 * other following threads need to wait here until decompression
+		 * is completed. zstd_free will release this barrier later.
+		 */
+		mutex_enter(&zstd_dctx_fallback.barrier);
+
+		z = zstd_dctx_fallback.mem;
+		type = ZSTD_KMEM_DCTX;
+		ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
+	}
+
+	/* Allocation should always be successful */
+	if (!z) {
+		return (NULL);
+	}
+
+	z->kmem_type = type;
+	z->kmem_size = nbytes;
+
+	return ((void*)z + (sizeof (struct zstd_kmem)));
+}
+
+/* Free allocated memory by its specific type */
+static void
+zstd_free(void *opaque __maybe_unused, void *ptr)
+{
+	struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
+	enum zstd_kmem_type type;
+
+	ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
+	ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
+
+	type = z->kmem_type;
+	switch (type) {
+	case ZSTD_KMEM_DEFAULT:
+		vmem_free(z, z->kmem_size);
+		break;
+	case ZSTD_KMEM_POOL:
+		zstd_mempool_free(z);
+		break;
+	case ZSTD_KMEM_DCTX:
+		mutex_exit(&zstd_dctx_fallback.barrier);
+		break;
+	default:
+		break;
+	}
+}
+
+/* Allocate fallback memory to ensure safe decompression */
+static void __init
+create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
+{
+	mem->mem_size = size;
+	mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
+	mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
+}
+
+/* Initialize memory pool barrier mutexes */
+static void __init
+zstd_mempool_init(void)
+{
+	zstd_mempool_cctx = (struct zstd_pool *)
+	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
+	zstd_mempool_dctx = (struct zstd_pool *)
+	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
+
+	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+		mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
+		    MUTEX_DEFAULT, NULL);
+		mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
+		    MUTEX_DEFAULT, NULL);
+	}
+}
+
+/* Initialize zstd-related memory handling */
+static int __init
+zstd_meminit(void)
+{
+	zstd_mempool_init();
+
+	/*
+	 * Estimate the size of the fallback decompression context.
+	 * The expected size on x64 with current ZSTD should be about 160 KB.
+	 */
+	create_fallback_mem(&zstd_dctx_fallback,
+	    P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
+	    PAGESIZE));
+
+	return (0);
+}
+
+/* Release object from pool and free memory */
+static void __exit
+release_pool(struct zstd_pool *pool)
+{
+	mutex_destroy(&pool->barrier);
+	vmem_free(pool->mem, pool->size);
+	pool->mem = NULL;
+	pool->size = 0;
+}
+
+/* Release memory pool objects */
+static void __exit
+zstd_mempool_deinit(void)
+{
+	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+		release_pool(&zstd_mempool_cctx[i]);
+		release_pool(&zstd_mempool_dctx[i]);
+	}
+
+	kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
+	kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
+	zstd_mempool_dctx = NULL;
+	zstd_mempool_cctx = NULL;
+}
+
+/* release unused memory from pool */
+
+void
+zfs_zstd_cache_reap_now(void)
+{
+	/*
+	 * calling alloc with zero size seeks
+	 * and releases old unused objects
+	 */
+	zstd_mempool_reap(zstd_mempool_cctx);
+	zstd_mempool_reap(zstd_mempool_dctx);
+}
+
+extern int __init
+zstd_init(void)
+{
+	/* Set pool size by using maximum sane thread count * 4 */
+	pool_count = (boot_ncpus * 4);
+	zstd_meminit();
+
+	/* Initialize kstat */
+	zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (zstd_ksp != NULL) {
+		zstd_ksp->ks_data = &zstd_stats;
+		kstat_install(zstd_ksp);
+	}
+
+	return (0);
+}
+
+extern void __exit
+zstd_fini(void)
+{
+	/* Deinitialize kstat */
+	if (zstd_ksp != NULL) {
+		kstat_delete(zstd_ksp);
+		zstd_ksp = NULL;
+	}
+
+	/* Release fallback memory */
+	vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
+	mutex_destroy(&zstd_dctx_fallback.barrier);
+
+	/* Deinit memory pool */
+	zstd_mempool_deinit();
+}
+
+#if defined(_KERNEL)
+module_init(zstd_init);
+module_exit(zstd_fini);
+
+ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
+ZFS_MODULE_LICENSE("Dual BSD/GPL");
+ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a");
+
+EXPORT_SYMBOL(zfs_zstd_compress);
+EXPORT_SYMBOL(zfs_zstd_decompress_level);
+EXPORT_SYMBOL(zfs_zstd_decompress);
+EXPORT_SYMBOL(zfs_zstd_cache_reap_now);
+#endif

diff --git a/zfs/module/zstd/zstd-in.c b/zfs/module/zstd/zstd-in.c
new file mode 100644
index 0000000..121f375
--- /dev/null
+++ b/zfs/module/zstd/zstd-in.c

@@ -0,0 +1,68 @@
+/*
+ * BSD 3-Clause Clear License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2019-2020, Michael Niewöhner
+ */
+
+#define	MEM_MODULE
+#define	XXH_NAMESPACE ZSTD_
+#define	XXH_PRIVATE_API
+#define	XXH_INLINE_ALL
+#define	ZSTD_LEGACY_SUPPORT 0
+#define	ZSTD_LIB_DICTBUILDER 0
+#define	ZSTD_LIB_DEPRECATED 0
+#define	ZSTD_NOBENCH
+
+#include "common/debug.c"
+#include "common/entropy_common.c"
+#include "common/error_private.c"
+#include "common/fse_decompress.c"
+#include "common/pool.c"
+#include "common/zstd_common.c"
+
+#include "compress/fse_compress.c"
+#include "compress/hist.c"
+#include "compress/huf_compress.c"
+#include "compress/zstd_compress_literals.c"
+#include "compress/zstd_compress_sequences.c"
+#include "compress/zstd_compress_superblock.c"
+#include "compress/zstd_compress.c"
+#include "compress/zstd_double_fast.c"
+#include "compress/zstd_fast.c"
+#include "compress/zstd_lazy.c"
+#include "compress/zstd_ldm.c"
+#include "compress/zstd_opt.c"
+
+#include "decompress/huf_decompress.c"
+#include "decompress/zstd_ddict.c"
+#include "decompress/zstd_decompress.c"
+#include "decompress/zstd_decompress_block.c"

diff --git a/zfs/module/zstd/zstd_sparc.c b/zfs/module/zstd/zstd_sparc.c
new file mode 100644
index 0000000..463df99
--- /dev/null
+++ b/zfs/module/zstd/zstd_sparc.c

@@ -0,0 +1,11 @@
+#ifdef __sparc__
+#include <stdint.h>
+#include <sys/byteorder.h>
+#include "include/sparc_compat.h"
+uint64_t __bswapdi2(uint64_t in) {
+	return (BSWAP_64(in));
+}
+uint32_t __bswapsi2(uint32_t in) {
+	return (BSWAP_32(in));
+}
+#endif

diff --git a/zfs/rpm/generic/zfs-dkms.spec.in b/zfs/rpm/generic/zfs-dkms.spec.in
index 322c76f..23c3ed6 100644
--- a/zfs/rpm/generic/zfs-dkms.spec.in
+++ b/zfs/rpm/generic/zfs-dkms.spec.in

@@ -1,11 +1,11 @@
 %{?!packager: %define packager Brian Behlendorf <behlendorf1@llnl.gov>}
 
-%if ! 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
+%if ! 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}%{?openEuler}
 %define not_rpm 1
 %endif
 
-# See comment in zfs.spec.in.
-%global __brp_mangle_shebangs_exclude_from arc_summary.py|arcstat.py|dbufstat.py|test-runner.py|zts-report.py
+# Exclude input files from mangling
+%global __brp_mangle_shebangs_exclude_from ^/usr/src/.*$
 
 %define module  @PACKAGE@
 %define mkconf  scripts/dkms.mkconf
@@ -18,20 +18,31 @@
 
 Group:          System Environment/Kernel
 License:        @ZFS_META_LICENSE@
-URL:            http://zfsonlinux.org/
+URL:            https://github.com/openzfs/zfs
 Source0:        %{module}-%{version}.tar.gz
 BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
 BuildArch:      noarch
 
 Requires:       dkms >= 2.2.0.3
+Requires(post): dkms >= 2.2.0.3
+Requires(preun): dkms >= 2.2.0.3
 Requires:       gcc, make, perl, diffutils
-%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
-Requires:       kernel-devel
-Obsoletes:      spl-dkms
+Requires(post): gcc, make, perl, diffutils
+%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}%{?openEuler}
+Requires:       kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999
+Requires(post): kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999
+Obsoletes:      spl-dkms <= %{version}
 %endif
 Provides:       %{module}-kmod = %{version}
 AutoReqProv:    no
 
+%if (0%{?fedora}%{?suse_version}%{?openEuler}) || (0%{?rhel} && 0%{?rhel} < 9)
+# We don't directly use it, but if this isn't installed, rpmbuild as root can
+# crash+corrupt rpmdb
+# See issue #12071
+BuildRequires:  ncompress
+%endif
+
 %description
 This package contains the dkms ZFS kernel modules.
 
@@ -57,46 +68,9 @@
 %defattr(-,root,root)
 /usr/src/%{module}-%{version}
 
-%post
-for POSTINST in /usr/lib/dkms/common.postinst; do
-    if [ -f $POSTINST ]; then
-        $POSTINST %{module} %{version}
-        exit $?
-    fi
-    echo "WARNING: $POSTINST does not exist."
-done
-echo -e "ERROR: DKMS version is too old and %{module} was not"
-echo -e "built with legacy DKMS support."
-echo -e "You must either rebuild %{module} with legacy postinst"
-echo -e "support or upgrade DKMS to a more current version."
-exit 1
-
 %preun
-# Are we doing an upgrade?
-if [ "$1" = "1" -o "$1" = "upgrade" ] ; then
-	# Yes we are.  Are we upgrading to a new ZFS version?
-	NEWEST_VER=$(dkms status zfs | sed 's/,//g' | sort -r -V | awk '/installed/{print $2; exit}')
-	if [ "$NEWEST_VER" != "%{version}" ] ; then
-		# Yes, it's a new ZFS version.  We'll uninstall the old module
-		# later on in this script.
-		true
-	else
-		# No, it's probably an upgrade of the same ZFS version
-		# to a new distro (zfs-dkms-0.7.12.fc28->zfs-dkms-0.7.12.fc29).
-		# Don't remove our modules, since the rebuild for the new
-		# distro will automatically delete the old modules.
-		exit 0
-	fi
-fi
+dkms remove -m %{module} -v %{version} --all
 
-# If we're here then we're doing an uninstall (not upgrade).
-CONFIG_H="/var/lib/dkms/%{module}/%{version}/*/*/%{module}_config.h"
-SPEC_META_ALIAS="@PACKAGE@-@VERSION@-@RELEASE@"
-DKMS_META_ALIAS=`cat $CONFIG_H 2>/dev/null |
-    awk -F'"' '/META_ALIAS\s+"/ { print $2; exit 0 }'`
-if [ "$SPEC_META_ALIAS" = "$DKMS_META_ALIAS" ]; then
-    echo -e
-    echo -e "Uninstall of %{module} module ($SPEC_META_ALIAS) beginning:"
-    dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade}
-fi
-exit 0
+%posttrans
+/usr/lib/dkms/common.postinst %{module} %{version}
+

diff --git a/zfs/rpm/generic/zfs-kmod.spec.in b/zfs/rpm/generic/zfs-kmod.spec.in
index 4a8f662..d086720 100644
--- a/zfs/rpm/generic/zfs-kmod.spec.in
+++ b/zfs/rpm/generic/zfs-kmod.spec.in

@@ -1,10 +1,7 @@
 %define module  @PACKAGE@
 
-# See comment in zfs.spec.in.
-%global __brp_mangle_shebangs_exclude_from arc_summary.py|arcstat.py|dbufstat.py|test-runner.py|zts-report.py
-
 %if !%{defined ksrc}
-%if 0%{?rhel}%{?fedora}
+%if 0%{?rhel}%{?fedora}%{?openEuler}
 %define ksrc    ${kernel_version##*___}
 %else
 %define ksrc    "$( \
@@ -19,7 +16,7 @@
 %endif
 
 %if !%{defined kobj}
-%if 0%{?rhel}%{?fedora}
+%if 0%{?rhel}%{?fedora}%{?openEuler}
 %define kobj    ${kernel_version##*___}
 %else
 %define kobj    "$( \
@@ -41,7 +38,7 @@
 
 %bcond_with     debug
 %bcond_with     debuginfo
-
+%define  debug_package %{nil}
 
 Name:           %{module}-kmod
 
@@ -51,15 +48,22 @@
 
 Group:          System Environment/Kernel
 License:        @ZFS_META_LICENSE@
-URL:            http://zfsonlinux.org/
+URL:            https://github.com/openzfs/zfs
 Source0:        %{module}-%{version}.tar.gz
 Source10:       kmodtool
 BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id} -u -n)
-%if 0%{?rhel}%{?fedora}
+%if 0%{?rhel}%{?fedora}%{?openEuler}
 BuildRequires:  gcc, make
 BuildRequires:  elfutils-libelf-devel
 %endif
 
+%if (0%{?fedora}%{?suse_version}%{?openEuler}) || (0%{?rhel} && 0%{?rhel} < 9)
+# We don't directly use it, but if this isn't installed, rpmbuild as root can
+# crash+corrupt rpmdb
+# See issue #12071
+BuildRequires:  ncompress
+%endif
+
 # The developments headers will conflict with the dkms packages.
 Conflicts:      %{module}-dkms
 
@@ -75,10 +79,11 @@
 # Building local packages attempt to to use the installed kernel.
 %{?rhel:BuildRequires: kernel-devel}
 %{?fedora:BuildRequires: kernel-devel}
+%{?openEuler:BuildRequires: kernel-devel}
 %{?suse_version:BuildRequires: kernel-source}
 
 %if !%{defined kernels} && !%{defined build_src_rpm}
-    %if 0%{?rhel}%{?fedora}%{?suse_version}
+    %if 0%{?rhel}%{?fedora}%{?suse_version}%{?openEuler}
         %define kernels %(ls -1 /usr/src/kernels)
     %else
         %define kernels %(ls -1 /lib/modules)
@@ -91,10 +96,6 @@
 %global __global_ldflags %{nil}
 %endif
 
-%if 0%{?fedora} >= 17
-%define prefix  /usr
-%endif
-
 # Kmodtool does its magic here.  A patched version of kmodtool is shipped
 # with the source rpm until kmod development packages are supported upstream.
 # https://bugzilla.rpmfusion.org/show_bug.cgi?id=2714
@@ -136,11 +137,15 @@
 for kernel_version in %{?kernel_versions}; do
     cd _kmod_build_${kernel_version%%___*}
     %configure \
+        STRIP="/bin/true" \
         --with-config=kernel \
         --with-linux=%{ksrc} \
         --with-linux-obj=%{kobj} \
         %{debug} \
-        %{debuginfo}
+        %{debuginfo} \
+        %{?kernel_cc} \
+        %{?kernel_ld} \
+        %{?kernel_llvm}
     make %{?_smp_mflags}
     cd ..
 done
@@ -158,8 +163,11 @@
         INSTALL_MOD_DIR=%{kmodinstdir_postfix}
     cd ..
 done
+#sign before packaging
+find ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}  -name "*.ko" | while read mod; do bash ../../SOURCES/signmod $mod; done
 # find-debuginfo.sh only considers executables
 chmod u+x ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/*/*/*
+
 %{?akmod_install}
 
 

diff --git a/zfs/rpm/generic/zfs.spec.in b/zfs/rpm/generic/zfs.spec.in
index 5d25364..3dce92a 100644
--- a/zfs/rpm/generic/zfs.spec.in
+++ b/zfs/rpm/generic/zfs.spec.in

@@ -3,7 +3,7 @@
 
 # Set the default udev directory based on distribution.
 %if %{undefined _udevdir}
-%if 0%{?fedora} >= 17 || 0%{?rhel} >= 7 || 0%{?centos} >= 7
+%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler}
 %global _udevdir    %{_prefix}/lib/udev
 %else
 %global _udevdir    /lib/udev
@@ -12,7 +12,7 @@
 
 # Set the default udevrule directory based on distribution.
 %if %{undefined _udevruledir}
-%if 0%{?fedora} >= 17 || 0%{?rhel} >= 7 || 0%{?centos} >= 7
+%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler}
 %global _udevruledir    %{_prefix}/lib/udev/rules.d
 %else
 %global _udevruledir    /lib/udev/rules.d
@@ -21,7 +21,7 @@
 
 # Set the default dracut directory based on distribution.
 %if %{undefined _dracutdir}
-%if 0%{?fedora} >= 17 || 0%{?rhel} >= 7 || 0%{?centos} >= 7
+%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler}
 %global _dracutdir  %{_prefix}/lib/dracut
 %else
 %global _dracutdir  %{_prefix}/share/dracut
@@ -48,63 +48,37 @@
 %global _systemdgeneratordir %{_prefix}/lib/systemd/system-generators
 %endif
 
+%if %{undefined _pkgconfigdir}
+%global _pkgconfigdir %{_prefix}/%{_lib}/pkgconfig
+%endif
+
 %bcond_with    debug
 %bcond_with    debuginfo
 %bcond_with    asan
 %bcond_with    systemd
+%bcond_with    pam
+%bcond_without pyzfs
 
 # Generic enable switch for systemd
 %if %{with systemd}
 %define _systemd 1
 %endif
 
-# RHEL >= 7 comes with systemd
-%if 0%{?rhel} >= 7
+# Distros below support systemd
+%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}
 %define _systemd 1
 %endif
 
-# Fedora >= 15 comes with systemd, but only >= 18 has
-# the proper macros
-%if 0%{?fedora} >= 18
-%define _systemd 1
-%endif
-
-# opensuse >= 12.1 comes with systemd, but only >= 13.1
-# has the proper macros
-%if 0%{?suse_version} >= 1310
-%define _systemd 1
-%endif
-
-# When not specified default to distribution provided version.  This
-# is normally Python 3, but for RHEL <= 7 only Python 2 is provided.
+# When not specified default to distribution provided version.
 %if %{undefined __use_python}
-%if 0%{?rhel} && 0%{?rhel} <= 7
-%define __python                  /usr/bin/python2
-%define __python_pkg_version      2
-%define __python_cffi_pkg         python-cffi
-%define __python_setuptools_pkg   python-setuptools
-%else
 %define __python                  /usr/bin/python3
 %define __python_pkg_version      3
-%define __python_cffi_pkg         python3-cffi
-%define __python_setuptools_pkg   python3-setuptools
-%endif
 %else
 %define __python                  %{__use_python}
 %define __python_pkg_version      %{__use_python_pkg_version}
-%define __python_cffi_pkg         python%{__python_pkg_version}-cffi
-%define __python_setuptools_pkg   python%{__python_pkg_version}-setuptools
 %endif
 %define __python_sitelib          %(%{__python} -Esc "from distutils.sysconfig import get_python_lib; print(get_python_lib())")
 
-# By default python-pyzfs is enabled, with the exception of
-# RHEL 6 which by default uses Python 2.6 which is too old.
-%if 0%{?rhel} == 6
-%bcond_with    pyzfs
-%else
-%bcond_without pyzfs
-%endif
-
 Name:           @PACKAGE@
 Version:        @VERSION@
 Release:        @RELEASE@%{?dist}
@@ -112,22 +86,22 @@
 
 Group:          System Environment/Kernel
 License:        @ZFS_META_LICENSE@
-URL:            http://zfsonlinux.org/
+URL:            https://github.com/openzfs/zfs
 Source0:        %{name}-%{version}.tar.gz
 BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-Requires:       libzpool2 = %{version}
-Requires:       libnvpair1 = %{version}
-Requires:       libuutil1 = %{version}
-Requires:       libzfs2 = %{version}
+Requires:       libzpool5%{?_isa} = %{version}-%{release}
+Requires:       libnvpair3%{?_isa} = %{version}-%{release}
+Requires:       libuutil3%{?_isa} = %{version}-%{release}
+Requires:       libzfs5%{?_isa} = %{version}-%{release}
 Requires:       %{name}-kmod = %{version}
-Provides:       %{name}-kmod-common = %{version}
-Obsoletes:      spl
+Provides:       %{name}-kmod-common = %{version}-%{release}
+Obsoletes:      spl <= %{version}
 
-# zfs-fuse provides the same commands and man pages that ZoL does. Renaming
-# those on either side would conflict with all available documentation.
+# zfs-fuse provides the same commands and man pages that OpenZFS does.
+# Renaming those on either side would conflict with all available documentation.
 Conflicts:      zfs-fuse
 
-%if 0%{?rhel}%{?fedora}%{?suse_version}
+%if 0%{?rhel}%{?centos}%{?fedora}%{?suse_version}%{?openEuler}
 BuildRequires:  gcc, make
 BuildRequires:  zlib-devel
 BuildRequires:  libuuid-devel
@@ -135,13 +109,26 @@
 BuildRequires:  libudev-devel
 BuildRequires:  libattr-devel
 BuildRequires:  openssl-devel
-%if 0%{?fedora} >= 28 || 0%{?rhel} >= 8 || 0%{?centos} >= 8
+%if 0%{?fedora}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8
 BuildRequires:  libtirpc-devel
 %endif
+
+%if (0%{?fedora}%{?suse_version}%{?openEuler}) || (0%{?rhel} && 0%{?rhel} < 9)
+# We don't directly use it, but if this isn't installed, rpmbuild as root can
+# crash+corrupt rpmdb
+# See issue #12071
+BuildRequires:  ncompress
+%endif
+
+%if %{with pam}
+BuildRequires:  pam-devel
+%endif
+
 Requires:       openssl
 %if 0%{?_systemd}
 BuildRequires: systemd
 %endif
+
 %endif
 
 %if 0%{?_systemd}
@@ -157,36 +144,48 @@
 %description
 This package contains the core ZFS command line utilities.
 
-%package -n libzpool2
+%package -n libzpool5
 Summary:        Native ZFS pool library for Linux
 Group:          System Environment/Kernel
+Obsoletes:      libzpool2 <= %{version}
+Obsoletes:      libzpool4 <= %{version}
 
-%description -n libzpool2
+%description -n libzpool5
 This package contains the zpool library, which provides support
 for managing zpools
 
-%post -n libzpool2 -p /sbin/ldconfig
-%postun -n libzpool2 -p /sbin/ldconfig
+%if %{defined ldconfig_scriptlets}
+%ldconfig_scriptlets -n libzpool5
+%else
+%post -n libzpool5 -p /sbin/ldconfig
+%postun -n libzpool5 -p /sbin/ldconfig
+%endif
 
-%package -n libnvpair1
+%package -n libnvpair3
 Summary:        Solaris name-value library for Linux
 Group:          System Environment/Kernel
+Obsoletes:      libnvpair1 <= %{version}
 
-%description -n libnvpair1
+%description -n libnvpair3
 This package contains routines for packing and unpacking name-value
 pairs.  This functionality is used to portably transport data across
 process boundaries, between kernel and user space, and can be used
 to write self describing data structures on disk.
 
-%post -n libnvpair1 -p /sbin/ldconfig
-%postun -n libnvpair1 -p /sbin/ldconfig
+%if %{defined ldconfig_scriptlets}
+%ldconfig_scriptlets -n libnvpair3
+%else
+%post -n libnvpair3 -p /sbin/ldconfig
+%postun -n libnvpair3 -p /sbin/ldconfig
+%endif
 
-%package -n libuutil1
+%package -n libuutil3
 Summary:        Solaris userland utility library for Linux
 Group:          System Environment/Kernel
+Obsoletes:      libuutil1 <= %{version}
 
-%description -n libuutil1
-This library provides a variety of compatibility functions for ZFS on Linux:
+%description -n libuutil3
+This library provides a variety of compatibility functions for OpenZFS:
  * libspl: The Solaris Porting Layer userland library, which provides APIs
    that make it possible to run Solaris user code in a Linux environment
    with relatively minimal modification.
@@ -196,32 +195,47 @@
    partitioning.
  * libshare: NFS, SMB, and iSCSI service integration for ZFS.
 
-%post -n libuutil1 -p /sbin/ldconfig
-%postun -n libuutil1 -p /sbin/ldconfig
+%if %{defined ldconfig_scriptlets}
+%ldconfig_scriptlets -n libuutil3
+%else
+%post -n libuutil3 -p /sbin/ldconfig
+%postun -n libuutil3 -p /sbin/ldconfig
+%endif
 
-%package -n libzfs2
+# The library version is encoded in the package name.  When updating the
+# version information it is important to add an obsoletes line below for
+# the previous version of the package.
+%package -n libzfs5
 Summary:        Native ZFS filesystem library for Linux
 Group:          System Environment/Kernel
+Obsoletes:      libzfs2 <= %{version}
+Obsoletes:      libzfs4 <= %{version}
 
-%description -n libzfs2
+%description -n libzfs5
 This package provides support for managing ZFS filesystems
 
-%post -n libzfs2 -p /sbin/ldconfig
-%postun -n libzfs2 -p /sbin/ldconfig
+%if %{defined ldconfig_scriptlets}
+%ldconfig_scriptlets -n libzfs5
+%else
+%post -n libzfs5 -p /sbin/ldconfig
+%postun -n libzfs5 -p /sbin/ldconfig
+%endif
 
-%package -n libzfs2-devel
+%package -n libzfs5-devel
 Summary:        Development headers
 Group:          System Environment/Kernel
-Requires:       libzfs2 = %{version}
-Requires:       libzpool2 = %{version}
-Requires:       libnvpair1 = %{version}
-Requires:       libuutil1 = %{version}
-Provides:       libzpool2-devel
-Provides:       libnvpair1-devel
-Provides:       libuutil1-devel
-Obsoletes:      zfs-devel
+Requires:       libzfs5%{?_isa} = %{version}-%{release}
+Requires:       libzpool5%{?_isa} = %{version}-%{release}
+Requires:       libnvpair3%{?_isa} = %{version}-%{release}
+Requires:       libuutil3%{?_isa} = %{version}-%{release}
+Provides:       libzpool5-devel = %{version}-%{release}
+Provides:       libnvpair3-devel = %{version}-%{release}
+Provides:       libuutil3-devel = %{version}-%{release}
+Obsoletes:      zfs-devel <= %{version}
+Obsoletes:      libzfs2-devel <= %{version}
+Obsoletes:      libzfs4-devel <= %{version}
 
-%description -n libzfs2-devel
+%description -n libzfs5-devel
 This package contains the header files needed for building additional
 applications against the ZFS libraries.
 
@@ -240,7 +254,7 @@
 Requires:       sysstat
 Requires:       libaio
 Requires:       python%{__python_pkg_version}
-%if 0%{?rhel}%{?fedora}%{?suse_version}
+%if 0%{?rhel}%{?centos}%{?fedora}%{?suse_version}%{?openEuler}
 BuildRequires:  libaio-devel
 %endif
 AutoReqProv:    no
@@ -263,20 +277,37 @@
 image which is ZFS aware.
 
 %if %{with pyzfs}
+# Enforce `python36-` package prefix for CentOS 7
+# since dependencies come from EPEL and are named this way
 %package -n python%{__python_pkg_version}-pyzfs
 Summary:        Python %{python_version} wrapper for libzfs_core
 Group:          Development/Languages/Python
 License:        Apache-2.0
 BuildArch:      noarch
-Requires:       libzfs2 = %{version}
-Requires:       libnvpair1 = %{version}
+Requires:       libzfs5 = %{version}-%{release}
+Requires:       libnvpair3 = %{version}-%{release}
 Requires:       libffi
 Requires:       python%{__python_pkg_version}
-Requires:       %{__python_cffi_pkg}
-%if 0%{?rhel}%{?fedora}%{?suse_version}
+
+%if 0%{?centos} == 7
+Requires:       python36-cffi
+%else
+Requires:       python%{__python_pkg_version}-cffi
+%endif
+
+%if 0%{?rhel}%{?centos}%{?fedora}%{?suse_version}%{?openEuler}
+%if 0%{?centos} == 7
+BuildRequires:  python36-packaging
+BuildRequires:  python36-devel
+BuildRequires:  python36-cffi
+BuildRequires:  python36-setuptools
+%else
+BuildRequires:  python%{__python_pkg_version}-packaging
 BuildRequires:  python%{__python_pkg_version}-devel
-BuildRequires:  %{__python_cffi_pkg}
-BuildRequires:  %{__python_setuptools_pkg}
+BuildRequires:  python%{__python_pkg_version}-cffi
+BuildRequires:  python%{__python_pkg_version}-setuptools
+%endif
+
 BuildRequires:  libffi-devel
 %endif
 
@@ -289,7 +320,6 @@
 Summary:        Initramfs module
 Group:          System Environment/Kernel
 Requires:       %{name}%{?_isa} = %{version}-%{release}
-Requires:       %{name} = %{version}-%{release}
 Requires:       initramfs-tools
 
 %description initramfs
@@ -329,6 +359,12 @@
     %define pyzfs --disable-pyzfs
 %endif
 
+%if %{with pam}
+    %define pam --enable-pam
+%else
+    %define pam --disable-pam
+%endif
+
 %setup -q
 
 %build
@@ -337,12 +373,16 @@
     --with-udevdir=%{_udevdir} \
     --with-udevruledir=%{_udevruledir} \
     --with-dracutdir=%{_dracutdir} \
+    --with-pamconfigsdir=%{_datadir}/pam-configs \
+    --with-pammoduledir=%{_libdir}/security \
     --with-python=%{__python} \
+    --with-pkgconfigdir=%{_pkgconfigdir} \
     --disable-static \
     %{debug} \
     %{debuginfo} \
     %{asan} \
-    %{systemd}\
+    %{systemd} \
+    %{pam} \
     %{pyzfs}
 make %{?_smp_mflags}
 
@@ -372,6 +412,7 @@
 %else
 if [ -x /sbin/chkconfig ]; then
     /sbin/chkconfig --add zfs-import
+    /sbin/chkconfig --add zfs-load-key
     /sbin/chkconfig --add zfs-mount
     /sbin/chkconfig --add zfs-share
     /sbin/chkconfig --add zfs-zed
@@ -402,6 +443,7 @@
 %else
 if [ "$1" = "0" -o "$1" = "remove" ] && [ -x /sbin/chkconfig ]; then
     /sbin/chkconfig --del zfs-import
+    /sbin/chkconfig --del zfs-load-key
     /sbin/chkconfig --del zfs-mount
     /sbin/chkconfig --del zfs-share
     /sbin/chkconfig --del zfs-zed
@@ -422,21 +464,24 @@
 # Core utilities
 %{_sbindir}/*
 %{_bindir}/raidz_test
-%{_bindir}/zgenhostid
+%{_sbindir}/zgenhostid
 %{_bindir}/zvol_wait
-# Optional Python 2/3 scripts
+# Optional Python 3 scripts
 %{_bindir}/arc_summary
 %{_bindir}/arcstat
 %{_bindir}/dbufstat
 # Man pages
 %{_mandir}/man1/*
+%{_mandir}/man4/*
 %{_mandir}/man5/*
+%{_mandir}/man7/*
 %{_mandir}/man8/*
 # Configuration files and scripts
 %{_libexecdir}/%{name}
 %{_udevdir}/vdev_id
 %{_udevdir}/zvol_id
 %{_udevdir}/rules.d/*
+%{_datadir}/%{name}/compatibility.d
 %if ! 0%{?_systemd} || 0%{?_initramfs}
 # Files needed for sysvinit and initramfs-tools
 %{_sysconfdir}/%{name}/zfs-functions
@@ -457,28 +502,36 @@
 %config(noreplace) %{_sysconfdir}/%{name}/zpool.d/*
 %config(noreplace) %{_sysconfdir}/%{name}/vdev_id.conf.*.example
 %attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/*
+%if %{with pam}
+%{_libdir}/security/*
+%{_datadir}/pam-configs/*
+%endif
 
-%files -n libzpool2
+%files -n libzpool5
 %{_libdir}/libzpool.so.*
 
-%files -n libnvpair1
+%files -n libnvpair3
 %{_libdir}/libnvpair.so.*
 
-%files -n libuutil1
+%files -n libuutil3
 %{_libdir}/libuutil.so.*
 
-%files -n libzfs2
+%files -n libzfs5
 %{_libdir}/libzfs*.so.*
 
-%files -n libzfs2-devel
-%{_libdir}/pkgconfig/libzfs.pc
-%{_libdir}/pkgconfig/libzfs_core.pc
+%files -n libzfs5-devel
+%{_pkgconfigdir}/libzfs.pc
+%{_pkgconfigdir}/libzfsbootenv.pc
+%{_pkgconfigdir}/libzfs_core.pc
 %{_libdir}/*.so
 %{_includedir}/*
 %doc AUTHORS COPYRIGHT LICENSE NOTICE README.md
 
 %files test
-%{_datadir}/%{name}
+%{_datadir}/%{name}/zfs-tests
+%{_datadir}/%{name}/test-runner
+%{_datadir}/%{name}/runfiles
+%{_datadir}/%{name}/*.sh
 
 %files dracut
 %doc contrib/dracut/README.dracut.markdown

diff --git a/zfs/rpm/redhat/zfs-kmod.spec.in b/zfs/rpm/redhat/zfs-kmod.spec.in
index f632c48..7b74fdc 100644
--- a/zfs/rpm/redhat/zfs-kmod.spec.in
+++ b/zfs/rpm/redhat/zfs-kmod.spec.in

@@ -1,9 +1,6 @@
 %bcond_with     debug
 %bcond_with     debuginfo
 
-# See comment in zfs.spec.in.
-%global __brp_mangle_shebangs_exclude_from arc_summary.py|arcstat.py|dbufstat.py|test-runner.py|zts-report.py
-
 Name:           @PACKAGE@-kmod
 Version:        @VERSION@
 Release:        @RELEASE@%{?dist}
@@ -11,7 +8,7 @@
 Summary:        Kernel module(s)
 Group:          System Environment/Kernel
 License:        @ZFS_META_LICENSE@
-URL:            http://zfsonlinux.org/
+URL:            https://github.com/openzfs/zfs
 BuildRequires:  %kernel_module_package_buildreqs
 Source0:        @PACKAGE@-%{version}.tar.gz
 BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
@@ -20,8 +17,9 @@
 # by generating a preamble text file which kmodtool can append to the spec file.
 %(/bin/echo -e "\
 Requires:       @PACKAGE@ = %{version}\n\
-Conflicts:      @PACKAGE@-dkms\n\n" > %{_sourcedir}/kmod-preamble\n\
-Obsoletes:      spl-kmod)
+Conflicts:      @PACKAGE@-dkms\n\
+Obsoletes:      kmod-spl\n\
+Obsoletes:      spl-kmod\n\n" > %{_sourcedir}/kmod-preamble)
 
 # LDFLAGS are not sanitized by arch/*/Makefile for these architectures.
 %ifarch ppc ppc64 ppc64le aarch64
@@ -71,7 +69,10 @@
         --with-linux=%{ksrc} \
         --with-linux-obj=%{kobj} \
         %{debug} \
-        %{debuginfo}
+        %{debuginfo} \
+        %{?kernel_cc} \
+        %{?kernel_ld} \
+        %{?kernel_llvm}
 make %{?_smp_mflags}
 
 %install

diff --git a/zfs/scripts/Makefile.am b/zfs/scripts/Makefile.am
index d275a41..047ae7e 100644
--- a/zfs/scripts/Makefile.am
+++ b/zfs/scripts/Makefile.am

@@ -1,31 +1,40 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 pkgdatadir = $(datadir)/@PACKAGE@
 
 dist_pkgdata_SCRIPTS = \
-	$(top_srcdir)/scripts/zimport.sh \
-	$(top_srcdir)/scripts/zfs.sh \
-	$(top_srcdir)/scripts/zfs-tests.sh \
-	$(top_srcdir)/scripts/zloop.sh \
-	$(top_srcdir)/scripts/zfs-helpers.sh
+	zimport.sh \
+	zfs.sh \
+	zfs-tests.sh \
+	zloop.sh \
+	zfs-helpers.sh
 
-EXTRA_DIST = \
+EXTRA_SCRIPTS = \
 	commitcheck.sh \
 	common.sh.in \
-	cstyle.pl \
 	dkms.mkconf \
 	dkms.postbuild \
-	enum-extract.pl \
 	kmodtool \
 	make_gitrev.sh \
 	man-dates.sh \
 	paxcheck.sh \
+	mancheck.sh
+
+EXTRA_DIST = \
+	cstyle.pl \
+	enum-extract.pl \
 	zfs2zol-patch.sed \
-	zol2zfs-patch.sed
+	zol2zfs-patch.sed \
+	$(EXTRA_SCRIPTS)
+
+SHELLCHECK_IGNORE = ,SC1117,SC2086,SC2295
+SHELLCHECKSCRIPTS = $(EXTRA_SCRIPTS)
 
 define EXTRA_ENVIRONMENT
 
 # Only required for in-tree use
 export INTREE="yes"
-export GDB="/usr/bin/libtool --mode=execute gdb"
+export GDB="libtool --mode=execute gdb"
 export LDMOD=/sbin/insmod
 
 export CMD_DIR=@abs_top_builddir@/cmd
@@ -34,8 +43,10 @@
 export ZEDLET_LIBEXEC_DIR=$$CMD_DIR/zed/zed.d
 export ZPOOL_SCRIPT_DIR=$$CMD_DIR/zpool/zpool.d
 export ZPOOL_SCRIPTS_PATH=$$CMD_DIR/zpool/zpool.d
+export ZPOOL_COMPAT_DIR=$$CMD_DIR/zpool/compatibility.d
 export CONTRIB_DIR=@abs_top_builddir@/contrib
 export LIB_DIR=@abs_top_builddir@/lib
+export SYSCONF_DIR=@abs_top_builddir@/etc
 
 export INSTALL_UDEV_DIR=@udevdir@
 export INSTALL_UDEV_RULE_DIR=@udevruledir@
@@ -51,6 +62,8 @@
 export KMOD_ZLUA=@abs_top_builddir@/module/lua/zlua.ko
 export KMOD_ICP=@abs_top_builddir@/module/icp/icp.ko
 export KMOD_ZFS=@abs_top_builddir@/module/zfs/zfs.ko
+export KMOD_FREEBSD=@abs_top_builddir@/module/openzfs.ko
+export KMOD_ZZSTD=@abs_top_builddir@/module/zstd/zzstd.ko
 endef
 
 export EXTRA_ENVIRONMENT
@@ -58,6 +71,7 @@
 all-local:
 	-$(SED) -e '\|^export BIN_DIR=|s|$$|@abs_top_builddir@/bin|' \
 		-e '\|^export SBIN_DIR=|s|$$|@abs_top_builddir@/bin|' \
+		-e '\|^export LIBEXEC_DIR=|s|$$|@abs_top_builddir@/bin|' \
 		-e '\|^export ZTS_DIR=|s|$$|@abs_top_srcdir@/tests|' \
 		-e '\|^export SCRIPT_DIR=|s|$$|@abs_top_srcdir@/scripts|' \
 		$(abs_top_srcdir)/scripts/common.sh.in >common.sh
@@ -69,6 +83,7 @@
 install-data-hook:
 	-$(SED) -e '\|^export BIN_DIR=|s|$$|@bindir@|' \
 		-e '\|^export SBIN_DIR=|s|$$|@sbindir@|' \
+		-e '\|^export LIBEXEC_DIR=|s|$$|@zfsexecdir@|' \
 		-e '\|^export ZTS_DIR=|s|$$|@datadir@/@PACKAGE@|' \
 		-e '\|^export SCRIPT_DIR=|s|$$|@datadir@/@PACKAGE@|' \
 		$(abs_top_srcdir)/scripts/common.sh.in \

diff --git a/zfs/scripts/commitcheck.sh b/zfs/scripts/commitcheck.sh
index 2954b0f..0077eb6 100755
--- a/zfs/scripts/commitcheck.sh
+++ b/zfs/scripts/commitcheck.sh

@@ -1,23 +1,10 @@
-#!/bin/bash
+#!/bin/sh
 
 REF="HEAD"
 
-# test a url
-function test_url()
-{
-    url="$1"
-    if ! curl --output /dev/null --max-time 60 \
-		--silent --head --fail "$url" ; then
-        echo "\"$url\" is unreachable"
-        return 1
-    fi
-
-    return 0
-}
-
 # test commit body for length
 # lines containing urls are exempt for the length limit.
-function test_commit_bodylength()
+test_commit_bodylength()
 {
     length="72"
     body=$(git log -n 1 --pretty=%b "$REF" | grep -Ev "http(s)*://" | grep -E -m 1 ".{$((length + 1))}")
@@ -30,9 +17,9 @@
 }
 
 # check for a tagged line
-function check_tagged_line()
+check_tagged_line()
 {
-    regex='^\s*'"$1"':\s[[:print:]]+\s<[[:graph:]]+>$'
+    regex='^[[:space:]]*'"$1"':[[:space:]][[:print:]]+[[:space:]]<[[:graph:]]+>$'
     foundline=$(git log -n 1 "$REF" | grep -E -m 1 "$regex")
     if [ -z "$foundline" ]; then
         echo "error: missing \"$1\""
@@ -42,30 +29,8 @@
     return 0
 }
 
-# check for a tagged line and check that the link is valid
-function check_tagged_line_with_url()
-{
-    regex='^\s*'"$1"':\s\K([[:graph:]]+)$'
-    foundline=$(git log -n 1 "$REF" | grep -Po "$regex")
-    if [ -z "$foundline" ]; then
-        echo "error: missing \"$1\""
-        return 1
-    fi
-
-    OLDIFS=$IFS
-    IFS=$'\n'
-    for url in $(echo -e "$foundline"); do
-        if ! test_url "$url"; then
-            return 1
-        fi
-    done
-    IFS=$OLDIFS
-
-    return 0
-}
-
 # check commit message for a normal commit
-function new_change_commit()
+new_change_commit()
 {
     error=0
 
@@ -89,57 +54,7 @@
     return $error
 }
 
-function is_openzfs_port()
-{
-    # subject starts with OpenZFS means it's an openzfs port
-    subject=$(git log -n 1 --pretty=%s "$REF" | grep -E -m 1 '^OpenZFS')
-    if [ -n "$subject" ]; then
-        return 0
-    fi
-
-    return 1
-}
-
-function openzfs_port_commit()
-{
-    error=0
-
-    # subject starts with OpenZFS dddd
-    subject=$(git log -n 1 --pretty=%s "$REF" | grep -E -m 1 '^OpenZFS [[:digit:]]+(, [[:digit:]]+)* - ')
-    if [ -z "$subject" ]; then
-        echo "error: OpenZFS patch ports must have a subject line that starts with \"OpenZFS dddd - \""
-        error=1
-    fi
-
-    # need an authored by line
-    if ! check_tagged_line "Authored by" ; then
-        error=1
-    fi
-
-    # need a reviewed by line
-    if ! check_tagged_line "Reviewed by" ; then
-        error=1
-    fi
-
-    # need ported by line
-    if ! check_tagged_line "Ported-by" ; then
-        error=1
-    fi
-
-    # need a url to openzfs commit and it should be valid
-    if ! check_tagged_line_with_url "OpenZFS-commit" ; then
-        error=1
-    fi
-
-    # need a url to illumos issue and it should be valid
-    if ! check_tagged_line_with_url "OpenZFS-issue" ; then
-        error=1
-    fi
-
-    return $error
-}
-
-function is_coverity_fix()
+is_coverity_fix()
 {
     # subject starts with Fix coverity defects means it's a coverity fix
     subject=$(git log -n 1 --pretty=%s "$REF" | grep -E -m 1 '^Fix coverity defects')
@@ -150,7 +65,7 @@
     return 1
 }
 
-function coverity_fix_commit()
+coverity_fix_commit()
 {
     error=0
 
@@ -169,11 +84,10 @@
 
     # test each summary line for the proper format
     OLDIFS=$IFS
-    IFS=$'\n'
+    IFS='
+'
     for line in $(git log -n 1 --pretty=%b "$REF" | grep -E '^CID'); do
-        echo "$line" | grep -E '^CID [[:digit:]]+: ([[:graph:]]+|[[:space:]])+ \(([[:upper:]]|\_)+\)' > /dev/null
-        # shellcheck disable=SC2181
-        if [[ $? -ne 0 ]]; then
+        if ! echo "$line" | grep -qE '^CID [[:digit:]]+: ([[:graph:]]+|[[:space:]])+ \(([[:upper:]]|\_)+\)'; then
             echo "error: commit message has an improperly formatted CID defect line"
             error=1
         fi
@@ -192,15 +106,6 @@
     REF="$1"
 fi
 
-# if openzfs port, test against that
-if is_openzfs_port; then
-    if ! openzfs_port_commit ; then
-        exit 1
-    else
-        exit 0
-    fi
-fi
-
 # if coverity fix, test against that
 if is_coverity_fix; then
     if ! coverity_fix_commit; then

diff --git a/zfs/scripts/common.sh.in b/zfs/scripts/common.sh.in
index 2d9d9c7..8268315 100644
--- a/zfs/scripts/common.sh.in
+++ b/zfs/scripts/common.sh.in

@@ -3,6 +3,7 @@
 # Directories
 export BIN_DIR=
 export SBIN_DIR=
+export LIBEXEC_DIR=
 export ZTS_DIR=
 export SCRIPT_DIR=
 

diff --git a/zfs/scripts/cstyle.pl b/zfs/scripts/cstyle.pl
index 00b33dd..d19718e 100755
--- a/zfs/scripts/cstyle.pl
+++ b/zfs/scripts/cstyle.pl

@@ -58,8 +58,9 @@
 use strict;
 
 my $usage =
-"usage: cstyle [-chpvCP] [-o constructs] file ...
+"usage: cstyle [-cghpvCP] [-o constructs] file ...
 	-c	check continuation indentation inside functions
+	-g	print github actions' workflow commands
 	-h	perform heuristic checks that are sometimes wrong
 	-p	perform some of the more picky checks
 	-v	verbose
@@ -73,12 +74,13 @@
 
 my %opts;
 
-if (!getopts("cho:pvCP", \%opts)) {
+if (!getopts("cgho:pvCP", \%opts)) {
 	print $usage;
 	exit 2;
 }
 
 my $check_continuation = $opts{'c'};
+my $github_workflow = $opts{'g'} || $ENV{'CI'};
 my $heuristic = $opts{'h'};
 my $picky = $opts{'p'};
 my $verbose = $opts{'v'};
@@ -197,7 +199,10 @@
 			printf $fmt, $filename, $., $error, $line;
 		} else {
 			printf $fmt, $filename, $., $error;
-		}	
+		}
+		if ($github_workflow) {
+			printf "::error file=%s,line=%s::%s\n", $filename, $., $error;
+		}
 		$err_stat = 1;
 	}
 }
@@ -415,7 +420,7 @@
 			$prev = $line;
 			next line;
 		} elsif ($picky	&& ! (/^\t/ && $function_header_full_indent != 0)) {
-			
+
 			err("continuation line should be indented by 4 spaces");
 		}
 	}

diff --git a/zfs/scripts/dkms.mkconf b/zfs/scripts/dkms.mkconf
index e1a49dc..4090efa 100755
--- a/zfs/scripts/dkms.mkconf
+++ b/zfs/scripts/dkms.mkconf

@@ -6,42 +6,37 @@
 
 while getopts "n:v:c:f:" opt; do
 	case $opt in
-		n) pkgname=$OPTARG ;;
-		v) pkgver=$OPTARG  ;;
-		c) pkgcfg=$OPTARG ;;
+		n) pkgname=$OPTARG  ;;
+		v) pkgver=$OPTARG   ;;
+		c) pkgcfg=$OPTARG   ;;
 		f) filename=$OPTARG ;;
+    *) err=1            ;;
 	esac
 done
 
-if [ -z "${pkgname}" -o -z "${pkgver}" -o -z "${filename}" ]; then
+if [ -z "${pkgname}" ] || [ -z "${pkgver}" ] || [ -z "${filename}" ] ||
+    [ -n "${err}" ]; then
 	echo "Usage: $PROG -n <pkgname> -v <pkgver> -c <pkgcfg> -f <filename>"
 	exit 1
 fi
 
-cat >${filename} <<EOF
+exec cat >"${filename}" <<EOF
 PACKAGE_NAME="${pkgname}"
 PACKAGE_VERSION="${pkgver}"
 PACKAGE_CONFIG="${pkgcfg}"
+NO_WEAK_MODULES="yes"
 PRE_BUILD="configure
   --prefix=/usr
   --with-config=kernel
   --with-linux=\$(
-    case \`lsb_release -is\` in
-      (Debian|Devuan)
-        if [[ -e \${kernel_source_dir/%build/source} ]]
-        then
-          echo \${kernel_source_dir/%build/source}
-        else
-          # A kpkg exception for Proxmox 2.0
-          echo \${kernel_source_dir}
-        fi
-      ;;
-      (*)
-        echo \${kernel_source_dir}
-      ;;
-    esac
+    if [ -e "\${kernel_source_dir/%build/source}" ]
+    then
+      echo "\${kernel_source_dir/%build/source}"
+    else
+      echo "\${kernel_source_dir}"
+    fi
   )
-  --with-linux-obj=\${kernel_source_dir}
+  --with-linux-obj="\${kernel_source_dir}"
   \$(
     [[ -n \"\${ICP_ROOT}\" ]] && \\
     {
@@ -73,7 +68,6 @@
   -t \${dkms_tree}
 "
 AUTOINSTALL="yes"
-REMAKE_INITRD="no"
 MAKE[0]="make"
 STRIP[0]="\$(
   [[ -r \${PACKAGE_CONFIG} ]] \\
@@ -89,6 +83,7 @@
 STRIP[5]="\${STRIP[0]}"
 STRIP[6]="\${STRIP[0]}"
 STRIP[7]="\${STRIP[0]}"
+STRIP[8]="\${STRIP[0]}"
 BUILT_MODULE_NAME[0]="zavl"
 BUILT_MODULE_LOCATION[0]="module/avl/"
 DEST_MODULE_LOCATION[0]="/extra/avl/avl"
@@ -113,4 +108,7 @@
 BUILT_MODULE_NAME[7]="spl"
 BUILT_MODULE_LOCATION[7]="module/spl/"
 DEST_MODULE_LOCATION[7]="/extra/spl/spl"
+BUILT_MODULE_NAME[8]="zzstd"
+BUILT_MODULE_LOCATION[8]="module/zstd/"
+DEST_MODULE_LOCATION[8]="/extra/zstd/zzstd"
 EOF

diff --git a/zfs/scripts/dkms.postbuild b/zfs/scripts/dkms.postbuild
index 299c02d..a2ceff6 100755
--- a/zfs/scripts/dkms.postbuild
+++ b/zfs/scripts/dkms.postbuild

@@ -9,16 +9,17 @@
 		n) pkgname=$OPTARG ;;
 		t) tree=$OPTARG    ;;
 		v) pkgver=$OPTARG  ;;
+		*) err=1           ;;
 	esac
 done
 
-if [ -z "${arch}" -o -z "${kver}" -o -z "${pkgname}" -o \
-     -z "${tree}" -o -z "${pkgver}" ]; then
+if [ -z "${arch}" ] || [ -z "${kver}" ] || [ -z "${pkgname}" ] || \
+    [ -z "${tree}" ] || [ -z "${pkgver}" ] || [ -n "${err}" ]; then
 	echo "Usage: $PROG -a <arch> -k <kver> -n <pkgname>" \
-	     "-t <tree> -v <pkgver>"
+	    "-t <tree> -v <pkgver>"
 	exit 1
 fi
 
-cp "${tree}/${pkgname}/${pkgver}/build/zfs_config.h"          \
+exec cp "${tree}/${pkgname}/${pkgver}/build/zfs_config.h"     \
    "${tree}/${pkgname}/${pkgver}/build/module/Module.symvers" \
    "${tree}/${pkgname}/${pkgver}/${kver}/${arch}/"

diff --git a/zfs/scripts/enum-extract.pl b/zfs/scripts/enum-extract.pl
index 5112cc8..5dc2e34 100755
--- a/zfs/scripts/enum-extract.pl
+++ b/zfs/scripts/enum-extract.pl

@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 
 my $usage = <<EOT;
 usage: config-enum enum [file ...]

diff --git a/zfs/scripts/kmodtool b/zfs/scripts/kmodtool
index b928c92..b102159 100755
--- a/zfs/scripts/kmodtool
+++ b/zfs/scripts/kmodtool

@@ -1,4 +1,5 @@
-#!/bin/bash
+#!/usr/bin/env bash
+# shellcheck disable=SC2086
 
 # kmodtool - Helper script for building kernel module RPMs
 # Copyright (c) 2003-2012 Ville Skyttä <ville.skytta@iki.fi>,
@@ -38,15 +39,16 @@
 filterfile=
 target=
 buildroot=
+dashvariant=
 
 error_out()
 {
 	local errorlevel=${1}
 	shift
-	echo "Error: $@" >&2
+	echo "Error: $*" >&2
 	# the next line is not multi-line safe -- not needed *yet*
-	echo "%global kmodtool_check echo \"kmodtool error: $@\"; exit ${errorlevel};"
-	exit ${errorlevel}
+	echo "%global kmodtool_check echo \"kmodtool error: $*\"; exit ${errorlevel};"
+	exit "${errorlevel}"
 }
 
 print_rpmtemplate_header()
@@ -182,9 +184,21 @@
 %{?KmodsRequires:Requires: %{KmodsRequires}-uname-r = ${kernel_uname_r}}
 %{?KmodsRequires:BuildRequires: %{KmodsRequires}-uname-r = ${kernel_uname_r}}
 %post          -n kmod-${kmodname}-${kernel_uname_r}
-${prefix}${depmod_path} -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || :
+if [[ -f "/boot/System.map-${kernel_uname_r}" ]]; then
+	${prefix}${depmod_path} -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || :
+elif [[ -f "/lib/modules/${kernel_uname_r}/System.map" ]]; then
+	${prefix}${depmod_path} -aeF /lib/modules/${kernel_uname_r}/System.map ${kernel_uname_r} > /dev/null || :
+else
+	${prefix}${depmod_path} -ae ${kernel_uname_r} &> /dev/null || :
+fi
 %postun        -n kmod-${kmodname}-${kernel_uname_r}
-${prefix}${depmod_path} -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || :
+if [[ -f "/boot/System.map-${kernel_uname_r}" ]]; then
+	${prefix}${depmod_path} -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || :
+elif [[ -f "/lib/modules/${kernel_uname_r}/System.map" ]]; then
+	${prefix}${depmod_path} -aF /lib/modules/${kernel_uname_r}/System.map ${kernel_uname_r} &> /dev/null || :
+else
+	${prefix}${depmod_path} -a ${kernel_uname_r} &> /dev/null || :
+fi
 
 EOF
 	else
@@ -432,7 +446,7 @@
 
 myprog_help ()
 {
-	echo "Usage: $(basename ${0}) [OPTIONS]"
+	echo "Usage: ${0##*/} [OPTIONS]"
 	echo $'\n'"Creates a template to be used during kmod building"
 	echo $'\n'"Available options:"
 	echo " --filterfile <file>  -- filter the results with grep --file <file>"
@@ -567,7 +581,7 @@
 		error_out 2 "please pass kmodname with --kmodname"
 elif [[ ! "${kernels_known_variants}" ]] ; then
 		error_out 2 "could not determine known variants"
-elif ( [[ "${obsolete_name}" ]] && [[ ! "${obsolete_version}" ]] ) ||  ( [[ ! "${obsolete_name}" ]] && [[ "${obsolete_version}" ]] ) ; then
+elif { [[ "${obsolete_name}" ]] && [[ ! "${obsolete_version}" ]]; } ||  { [[ ! "${obsolete_name}" ]] && [[ "${obsolete_version}" ]]; } ; then
 		error_out 2 "you need to provide both --obsolete-name and --obsolete-version"
 fi
 
@@ -585,7 +599,7 @@
 	# we need more sanity checks in this case
 	if [[ ! "${repo}" ]]; then
 		error_out 2 "please provide repo name with --repo"
-	elif ! $(which buildsys-build-${repo}-kerneldevpkgs &> /dev/null) ; then
+	elif ! command -v "buildsys-build-${repo}-kerneldevpkgs" &> /dev/null ; then
 		error_out 2 "buildsys-build-${repo}-kerneldevpkgs not found"
 	fi
 
@@ -599,7 +613,7 @@
 
 	kernel_versions_to_build_for="$(buildsys-build-${repo}-kerneldevpkgs --${build_kernels} ${cmdoptions})"
 	returncode=$?
-	if (( ${returncode} != 0 )); then
+	if (( returncode != 0 )); then
 		error_out 2 "buildsys-build-${repo}-kerneldevpkgs failed: $(buildsys-build-${repo}-kerneldevpkgs --${build_kernels} ${cmdoptions})"
 	fi
 

diff --git a/zfs/scripts/make_gitrev.sh b/zfs/scripts/make_gitrev.sh
index 1cf1437..e7f4ce8 100755
--- a/zfs/scripts/make_gitrev.sh
+++ b/zfs/scripts/make_gitrev.sh

@@ -27,19 +27,52 @@
 
 set -e -u
 
-cleanup() {
-    ZFS_GIT_REV=${ZFS_GIT_REV:-"unknown"}
-    cat << EOF > "$(dirname "$0")"/../include/zfs_gitrev.h
-#define	ZFS_META_GITREV "${ZFS_GIT_REV}"
-EOF
-}
-trap cleanup EXIT
+dist=no
+distdir=.
+while getopts D: flag
+do
+	case $flag in
+		\?) echo "Usage: $0 [-D distdir] [file]" >&2; exit 1;;
+		D)  dist=yes; distdir=${OPTARG};;
+	esac
+done
+shift $((OPTIND - 1))
 
-# Check if git is installed and we are in a git repo.
-git rev-parse --git-dir > /dev/null 2>&1
-# Get the git current git revision
-ZFS_GIT_REV=$(git describe --always --long --dirty 2>/dev/null)
-# Check if header file already contain the exact string
-grep -sq "\"${ZFS_GIT_REV}\"" "$(dirname "$0")"/../include/zfs_gitrev.h &&
-	trap - EXIT
-exit 0
+top_srcdir="$(dirname "$0")/.."
+GITREV="${1:-include/zfs_gitrev.h}"
+
+# GITREV should be a relative path (relative to top_builddir or distdir)
+case "${GITREV}" in
+	/*) echo "Error: ${GITREV} should be a relative path" >&2
+	    exit 1;;
+esac
+
+ZFS_GITREV=$({ cd "${top_srcdir}" &&
+	git describe --always --long --dirty 2>/dev/null; } || :)
+
+if [ -z "${ZFS_GITREV}" ]
+then
+	# If the source directory is not a git repository, check if the file
+	# already exists (in the source)
+	if [ -f "${top_srcdir}/${GITREV}" ]
+	then
+		ZFS_GITREV=$(sed -n \
+			'1s/^#define[[:blank:]]ZFS_META_GITREV "\([^"]*\)"$/\1/p' \
+			"${top_srcdir}/${GITREV}")
+	fi
+elif [ ${dist} = yes ]
+then
+	# Append -dist when creating distributed sources from a git repository
+	ZFS_GITREV="${ZFS_GITREV}-dist"
+fi
+ZFS_GITREV=${ZFS_GITREV:-unknown}
+
+GITREVTMP="${GITREV}~"
+printf '#define\tZFS_META_GITREV "%s"\n' "${ZFS_GITREV}" >"${GITREVTMP}"
+GITREV="${distdir}/${GITREV}"
+if cmp -s "${GITREV}" "${GITREVTMP}"
+then
+	rm -f "${GITREVTMP}"
+else
+	mv -f "${GITREVTMP}" "${GITREV}"
+fi

diff --git a/zfs/scripts/man-dates.sh b/zfs/scripts/man-dates.sh
index 186d946..39f1b5f 100755
--- a/zfs/scripts/man-dates.sh
+++ b/zfs/scripts/man-dates.sh

@@ -7,6 +7,6 @@
 
 find man -type f | while read -r i ; do
     git_date=$(git log -1 --date=short --format="%ad" -- "$i")
-    [ "x$git_date" = "x" ] && continue
+    [ -z "$git_date" ] && continue
     sed -i "s|^\.Dd.*|.Dd $(date -d "$git_date" "+%B %-d, %Y")|" "$i"
 done

diff --git a/zfs/scripts/mancheck.sh b/zfs/scripts/mancheck.sh
new file mode 100755
index 0000000..0793cc4
--- /dev/null
+++ b/zfs/scripts/mancheck.sh

@@ -0,0 +1,53 @@
+#!/bin/sh
+#
+# Permission to use, copy, modify, and/or distribute this software for
+# any purpose with or without fee is hereby granted.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+# shellcheck disable=SC2086,SC2250
+
+trap 'rm -f "$stdout_file" "$stderr_file" "$result_file"' EXIT
+
+if [ "$#" -eq 0 ]; then
+    echo "Usage: $0 manpage-directory..."
+    exit 1
+fi
+
+if ! command -v mandoc > /dev/null; then
+    echo "skipping mancheck because mandoc is not installed"
+    exit 0
+fi
+
+IFS="
+"
+files="$(find "$@" -type f -name '*[1-9]*')" || exit 1
+
+add_excl="$(awk '
+    /^.\\" lint-ok:/ {
+        print "-e"
+        $1 = "mandoc:"
+        $2 = FILENAME ":[[:digit:]]+:[[:digit:]]+:"
+        print
+    }' $files)"
+
+# Redirect to file instead of 2>&1ing because mandoc flushes inconsistently(?) which tears lines
+# https://github.com/openzfs/zfs/pull/12129/checks?check_run_id=2701608671#step:5:3
+stdout_file="$(mktemp)"
+stderr_file="$(mktemp)"
+mandoc -Tlint $files 1>"$stdout_file" 2>"$stderr_file"
+result_file="$(mktemp)"
+grep -vhE -e 'mandoc: outdated mandoc.db' -e 'STYLE: referenced manual not found' $add_excl "$stdout_file" "$stderr_file" > "$result_file"
+
+if [ -s "$result_file" ]; then
+    cat "$result_file"
+    exit 1
+else
+    echo "no errors found"
+fi

diff --git a/zfs/scripts/paxcheck.sh b/zfs/scripts/paxcheck.sh
index 87e8175..27acc95 100755
--- a/zfs/scripts/paxcheck.sh
+++ b/zfs/scripts/paxcheck.sh

@@ -1,7 +1,6 @@
 #!/bin/sh
 
-# shellcheck disable=SC2039
-if ! type scanelf > /dev/null 2>&1; then
+if ! command -v scanelf > /dev/null; then
     echo "scanelf (from pax-utils) is required for these checks." >&2
     exit 3
 fi

diff --git a/zfs/scripts/signmod b/zfs/scripts/signmod
new file mode 100644
index 0000000..90ce02e
--- /dev/null
+++ b/zfs/scripts/signmod

@@ -0,0 +1,9 @@
+#!/bin/bash
+set -x
+echo "Signing module: $1"
+MODSECKEY="${KERNEL_FILES}/signing_key.pem"
+MODPUBKEY="${KERNEL_FILES}/signing_key.x509"
+MOD="$1"
+
+
+"${KDIR}/scripts/sign-file" "sha256" "${MODSECKEY}" "${MODPUBKEY}" "${MOD}"

diff --git a/zfs/scripts/zfs-tests.sh b/zfs/scripts/zfs-tests.sh
index ce766e2..1e0cf66 100755
--- a/zfs/scripts/zfs-tests.sh
+++ b/zfs/scripts/zfs-tests.sh

@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 #
 # CDDL HEADER START
 #
@@ -21,6 +21,10 @@
 # CDDL HEADER END
 #
 
+#
+# Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+#
+
 BASE_DIR=$(dirname "$0")
 SCRIPT_COMMON=common.sh
 if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
@@ -34,22 +38,35 @@
 QUIET=""
 CLEANUP="yes"
 CLEANUPALL="no"
+KMSG=""
 LOOPBACK="yes"
 STACK_TRACER="no"
 FILESIZE="4G"
-RUNFILE=${RUNFILE:-"linux.run"}
+DEFAULT_RUNFILES="common.run,$(uname | tr '[:upper:]' '[:lower:]').run"
+RUNFILES=${RUNFILES:-$DEFAULT_RUNFILES}
 FILEDIR=${FILEDIR:-/var/tmp}
 DISKS=${DISKS:-""}
-SINGLETEST=()
+SINGLETEST=""
 SINGLETESTUSER="root"
 TAGS=""
 ITERATIONS=1
 ZFS_DBGMSG="$STF_SUITE/callbacks/zfs_dbgmsg.ksh"
 ZFS_DMESG="$STF_SUITE/callbacks/zfs_dmesg.ksh"
-ZFS_MMP="$STF_SUITE/callbacks/zfs_mmp.ksh"
-TESTFAIL_CALLBACKS=${TESTFAIL_CALLBACKS:-"$ZFS_DBGMSG:$ZFS_DMESG:$ZFS_MMP"}
-LOSETUP=${LOSETUP:-/sbin/losetup}
-DMSETUP=${DMSETUP:-/sbin/dmsetup}
+UNAME=$(uname -s)
+RERUN=""
+KMEMLEAK=""
+
+# Override some defaults if on FreeBSD
+if [ "$UNAME" = "FreeBSD" ] ; then
+	TESTFAIL_CALLBACKS=${TESTFAIL_CALLBACKS:-"$ZFS_DMESG"}
+	LOSETUP=/sbin/mdconfig
+	DMSETUP=/sbin/gpart
+else
+	ZFS_MMP="$STF_SUITE/callbacks/zfs_mmp.ksh"
+	TESTFAIL_CALLBACKS=${TESTFAIL_CALLBACKS:-"$ZFS_DBGMSG:$ZFS_DMESG:$ZFS_MMP"}
+	LOSETUP=${LOSETUP:-/sbin/losetup}
+	DMSETUP=${DMSETUP:-/sbin/dmsetup}
+fi
 
 #
 # Log an informational message when additional verbosity is enabled.
@@ -64,11 +81,38 @@
 # Log a failure message, cleanup, and return an error.
 #
 fail() {
-        echo -e "$PROG: $1" >&2
+	echo "$PROG: $1" >&2
 	cleanup
 	exit 1
 }
 
+cleanup_freebsd_loopback() {
+	for TEST_LOOPBACK in ${LOOPBACKS}; do
+		if [ -c "/dev/${TEST_LOOPBACK}" ]; then
+			sudo "${LOSETUP}" -d -u "${TEST_LOOPBACK}" ||
+			    echo "Failed to destroy: ${TEST_LOOPBACK}"
+		fi
+	done
+}
+
+cleanup_linux_loopback() {
+	for TEST_LOOPBACK in ${LOOPBACKS}; do
+		LOOP_DEV="${TEST_LOOPBACK##*/}"
+		DM_DEV=$(sudo "${DMSETUP}" ls 2>/dev/null | \
+		    grep "${LOOP_DEV}" | cut -f1)
+
+		if [ -n "$DM_DEV" ]; then
+			sudo "${DMSETUP}" remove "${DM_DEV}" ||
+			    echo "Failed to remove: ${DM_DEV}"
+		fi
+
+		if [ -n "${TEST_LOOPBACK}" ]; then
+			sudo "${LOSETUP}" -d "${TEST_LOOPBACK}" ||
+			    echo "Failed to remove: ${TEST_LOOPBACK}"
+		fi
+	done
+}
+
 #
 # Attempt to remove loopback devices and files which where created earlier
 # by this script to run the test framework.  The '-k' option may be passed
@@ -79,26 +123,17 @@
 		return 0
 	fi
 
+
 	if [ "$LOOPBACK" = "yes" ]; then
-		for TEST_LOOPBACK in ${LOOPBACKS}; do
-			LOOP_DEV=$(basename "$TEST_LOOPBACK")
-			DM_DEV=$(sudo "${DMSETUP}" ls 2>/dev/null | \
-			    grep "${LOOP_DEV}" | cut -f1)
-
-			if [ -n "$DM_DEV" ]; then
-				sudo "${DMSETUP}" remove "${DM_DEV}" ||
-				    echo "Failed to remove: ${DM_DEV}"
-			fi
-
-			if [ -n "${TEST_LOOPBACK}" ]; then
-				sudo "${LOSETUP}" -d "${TEST_LOOPBACK}" ||
-				    echo "Failed to remove: ${TEST_LOOPBACK}"
-			fi
-		done
+		if [ "$UNAME" = "FreeBSD" ] ; then
+			cleanup_freebsd_loopback
+		else
+			cleanup_linux_loopback
+		fi
 	fi
 
 	for TEST_FILE in ${FILES}; do
-		rm -f "${TEST_FILE}" &>/dev/null
+		rm -f "${TEST_FILE}" >/dev/null 2>&1
 	done
 
 	if [ "$STF_PATH_REMOVE" = "yes" ] && [ -d "$STF_PATH" ]; then
@@ -114,11 +149,12 @@
 # be dangerous and should only be used in a dedicated test environment.
 #
 cleanup_all() {
-	local TEST_POOLS
 	TEST_POOLS=$(sudo "$ZPOOL" list -H -o name | grep testpool)
-	local TEST_LOOPBACKS
-	TEST_LOOPBACKS=$(sudo "${LOSETUP}" -a|grep file-vdev|cut -f1 -d:)
-	local TEST_FILES
+	if [ "$UNAME" = "FreeBSD" ] ; then
+		TEST_LOOPBACKS=$(sudo "${LOSETUP}" -l)
+	else
+		TEST_LOOPBACKS=$(sudo "${LOSETUP}" -a|grep file-vdev|cut -f1 -d:)
+	fi
 	TEST_FILES=$(ls /var/tmp/file-vdev* 2>/dev/null)
 
 	msg
@@ -128,13 +164,19 @@
 		sudo "$ZPOOL" destroy "${TEST_POOL}"
 	done
 
-	msg "Removing dm(s):       $(sudo "${DMSETUP}" ls |
-	    grep loop | tr '\n' ' ')"
-	sudo "${DMSETUP}" remove_all
+	if [ "$UNAME" != "FreeBSD" ] ; then
+		msg "Removing dm(s):       $(sudo "${DMSETUP}" ls |
+		    grep loop | tr '\n' ' ')"
+		sudo "${DMSETUP}" remove_all
+	fi
 
 	msg "Removing loopback(s): $(echo "${TEST_LOOPBACKS}" | tr '\n' ' ')"
 	for TEST_LOOPBACK in $TEST_LOOPBACKS; do
-		sudo "${LOSETUP}" -d "${TEST_LOOPBACK}"
+		if [ "$UNAME" = "FreeBSD" ] ; then
+			sudo "${LOSETUP}" -d -u "${TEST_LOOPBACK}"
+		else
+			sudo "${LOSETUP}" -d "${TEST_LOOPBACK}"
+		fi
 	done
 
 	msg "Removing files(s):    $(echo "${TEST_FILES}" | tr '\n' ' ')"
@@ -153,8 +195,8 @@
 # <name>.run
 #
 find_runfile() {
-	local NAME=$1
-	local RESULT=""
+	NAME=$1
+	RESULT=""
 
 	if [ -f "$RUNFILE_DIR/$NAME" ]; then
 		RESULT="$RUNFILE_DIR/$NAME"
@@ -173,8 +215,8 @@
 # Symlink file if it appears under any of the given paths.
 #
 create_links() {
-	local dir_list="$1"
-	local file_list="$2"
+	dir_list="$1"
+	file_list="$2"
 
 	[ -n "$STF_PATH" ] || fail "STF_PATH wasn't correctly set"
 
@@ -183,14 +225,16 @@
 			[ ! -e "$STF_PATH/$i" ] || continue
 
 			if [ ! -d "$j/$i" ] && [ -e "$j/$i" ]; then
-				ln -s "$j/$i" "$STF_PATH/$i" || \
+				ln -sf "$j/$i" "$STF_PATH/$i" || \
 				    fail "Couldn't link $i"
 				break
 			fi
 		done
 
-		[ ! -e "$STF_PATH/$i" ] && STF_MISSING_BIN="$STF_MISSING_BIN$i "
+		[ ! -e "$STF_PATH/$i" ] && \
+		    STF_MISSING_BIN="$STF_MISSING_BIN $i"
 	done
+	STF_MISSING_BIN=${STF_MISSING_BIN# }
 }
 
 #
@@ -201,6 +245,12 @@
 constrain_path() {
 	. "$STF_SUITE/include/commands.cfg"
 
+	# On FreeBSD, base system zfs utils are in /sbin and OpenZFS utils
+	# install to /usr/local/sbin. To avoid testing the wrong utils we
+	# need /usr/local to come before / in the path search order.
+	SYSTEM_DIRS="/usr/local/bin /usr/local/sbin"
+	SYSTEM_DIRS="$SYSTEM_DIRS /usr/bin /usr/sbin /bin /sbin $LIBEXEC_DIR"
+
 	if [ "$INTREE" = "yes" ]; then
 		# Constrained path set to ./zfs/bin/
 		STF_PATH="$BIN_DIR"
@@ -222,34 +272,40 @@
 		create_links "$DIRS" "$ZFSTEST_FILES"
 	else
 		# Constrained path set to /var/tmp/constrained_path.*
-		SYSTEMDIR=${SYSTEMDIR:-/var/tmp/constrained_path.XXXX}
-		STF_PATH=$(/bin/mktemp -d "$SYSTEMDIR")
+		SYSTEMDIR=${SYSTEMDIR:-/var/tmp/constrained_path.XXXXXX}
+		STF_PATH=$(mktemp -d "$SYSTEMDIR")
 		STF_PATH_REMOVE="yes"
 		STF_MISSING_BIN=""
 
 		chmod 755 "$STF_PATH" || fail "Couldn't chmod $STF_PATH"
 
 		# Special case links for standard zfs utilities
-		create_links "/bin /usr/bin /sbin /usr/sbin" "$ZFS_FILES"
+		create_links "$SYSTEM_DIRS" "$ZFS_FILES"
 
 		# Special case links for zfs test suite utilities
 		create_links "$STF_SUITE/bin" "$ZFSTEST_FILES"
 	fi
 
 	# Standard system utilities
-	create_links "/bin /usr/bin /sbin /usr/sbin" "$SYSTEM_FILES"
+	SYSTEM_FILES="$SYSTEM_FILES_COMMON"
+	if [ "$UNAME" = "FreeBSD" ] ; then
+		SYSTEM_FILES="$SYSTEM_FILES $SYSTEM_FILES_FREEBSD"
+	else
+		SYSTEM_FILES="$SYSTEM_FILES $SYSTEM_FILES_LINUX"
+	fi
+	create_links "$SYSTEM_DIRS" "$SYSTEM_FILES"
 
 	# Exceptions
 	ln -fs "$STF_PATH/awk" "$STF_PATH/nawk"
-	ln -fs /sbin/fsck.ext4 "$STF_PATH/fsck"
-	ln -fs /sbin/mkfs.ext4 "$STF_PATH/newfs"
-	ln -fs "$STF_PATH/gzip" "$STF_PATH/compress"
-	ln -fs "$STF_PATH/gunzip" "$STF_PATH/uncompress"
-	ln -fs "$STF_PATH/exportfs" "$STF_PATH/share"
-	ln -fs "$STF_PATH/exportfs" "$STF_PATH/unshare"
-
-	if [ -L "$STF_PATH/arc_summary3" ]; then
-		ln -fs "$STF_PATH/arc_summary3" "$STF_PATH/arc_summary"
+	if [ "$UNAME" = "Linux" ] ; then
+		ln -fs /sbin/fsck.ext4 "$STF_PATH/fsck"
+		ln -fs /sbin/mkfs.ext4 "$STF_PATH/newfs"
+		ln -fs "$STF_PATH/gzip" "$STF_PATH/compress"
+		ln -fs "$STF_PATH/gunzip" "$STF_PATH/uncompress"
+		ln -fs "$STF_PATH/exportfs" "$STF_PATH/share"
+		ln -fs "$STF_PATH/exportfs" "$STF_PATH/unshare"
+	elif [ "$UNAME" = "FreeBSD" ] ; then
+		ln -fs /usr/local/bin/ksh93 "$STF_PATH/ksh"
 	fi
 }
 
@@ -259,7 +315,7 @@
 usage() {
 cat << EOF
 USAGE:
-$0 [hvqxkfS] [-s SIZE] [-r RUNFILE] [-t PATH] [-u USER]
+$0 [-hvqxkfS] [-s SIZE] [-r RUNFILES] [-t PATH] [-u USER]
 
 DESCRIPTION:
 	ZFS Test Suite launch script
@@ -270,14 +326,17 @@
 	-q          Quiet test-runner output
 	-x          Remove all testpools, dm, lo, and files (unsafe)
 	-k          Disable cleanup after test failure
+	-K          Log test names to /dev/kmsg
 	-f          Use files only, disables block device tests
 	-S          Enable stack tracer (negative performance impact)
 	-c          Only create and populate constrained path
+	-R          Automatically rerun failing tests
+	-m          Enable kmemleak reporting (Linux only)
 	-n NFSFILE  Use the nfsfile to determine the NFS configuration
 	-I NUM      Number of iterations
 	-d DIR      Use DIR for files and loopback devices
 	-s SIZE     Use vdevs of SIZE (default: 4G)
-	-r RUNFILE  Run tests in RUNFILE (default: linux.run)
+	-r RUNFILES Run tests in RUNFILES (default: ${DEFAULT_RUNFILES})
 	-t PATH     Run single test at PATH relative to test suite
 	-T TAGS     Comma separated list of tags (default: 'functional')
 	-u USER     Run single test as USER (default: root)
@@ -289,6 +348,9 @@
 # Run a smaller suite of tests designed to run more quickly.
 $0 -r linux-fast
 
+# Run a single test
+$0 -t tests/functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh
+
 # Cleanup a previous run of the test suite prior to testing, run the
 # default (linux) suite of tests and perform no cleanup on exit.
 $0 -x
@@ -296,14 +358,13 @@
 EOF
 }
 
-while getopts 'hvqxkfScn:d:s:r:?t:T:u:I:' OPTION; do
+while getopts 'hvqxkKfScRmn:d:s:r:?t:T:u:I:' OPTION; do
 	case $OPTION in
 	h)
 		usage
 		exit 1
 		;;
 	v)
-		# shellcheck disable=SC2034
 		VERBOSE="yes"
 		;;
 	q)
@@ -315,6 +376,9 @@
 	k)
 		CLEANUP="no"
 		;;
+	K)
+		KMSG="yes"
+		;;
 	f)
 		LOOPBACK="no"
 		;;
@@ -325,9 +389,15 @@
 		constrain_path
 		exit
 		;;
+	R)
+		RERUN="yes"
+		;;
+	m)
+		KMEMLEAK="yes"
+		;;
 	n)
 		nfsfile=$OPTARG
-		[[ -f $nfsfile ]] || fail "Cannot read file: $nfsfile"
+		[ -f "$nfsfile" ] || fail "Cannot read file: $nfsfile"
 		export NFS=1
 		. "$nfsfile"
 		;;
@@ -344,13 +414,13 @@
 		FILESIZE="$OPTARG"
 		;;
 	r)
-		RUNFILE="$OPTARG"
+		RUNFILES="$OPTARG"
 		;;
 	t)
-		if [ ${#SINGLETEST[@]} -ne 0 ]; then
+		if [ -n "$SINGLETEST" ]; then
 			fail "-t can only be provided once."
 		fi
-		SINGLETEST+=("$OPTARG")
+		SINGLETEST="$OPTARG"
 		;;
 	T)
 		TAGS="$OPTARG"
@@ -370,19 +440,19 @@
 FILES=${FILES:-"$FILEDIR/file-vdev0 $FILEDIR/file-vdev1 $FILEDIR/file-vdev2"}
 LOOPBACKS=${LOOPBACKS:-""}
 
-if [ ${#SINGLETEST[@]} -ne 0 ]; then
+if [ -n "$SINGLETEST" ]; then
 	if [ -n "$TAGS" ]; then
 		fail "-t and -T are mutually exclusive."
 	fi
 	RUNFILE_DIR="/var/tmp"
-	RUNFILE="zfs-tests.$$.run"
+	RUNFILES="zfs-tests.$$.run"
 	SINGLEQUIET="False"
 
 	if [ -n "$QUIET" ]; then
 		SINGLEQUIET="True"
 	fi
 
-	cat >$RUNFILE_DIR/$RUNFILE << EOF
+	cat >$RUNFILE_DIR/$RUNFILES << EOF
 [DEFAULT]
 pre =
 quiet = $SINGLEQUIET
@@ -393,22 +463,20 @@
 post =
 outputdir = /var/tmp/test_results
 EOF
-	for t in "${SINGLETEST[@]}"
-	do
-		SINGLETESTDIR=$(dirname "$t")
-		SINGLETESTFILE=$(basename "$t")
-		SETUPSCRIPT=
-		CLEANUPSCRIPT=
+	SINGLETESTDIR=$(dirname "$SINGLETEST")
+	SINGLETESTFILE=$(basename "$SINGLETEST")
+	SETUPSCRIPT=
+	CLEANUPSCRIPT=
 
-		if [ -f "$STF_SUITE/$SINGLETESTDIR/setup.ksh" ]; then
-			SETUPSCRIPT="setup"
-		fi
+	if [ -f "$STF_SUITE/$SINGLETESTDIR/setup.ksh" ]; then
+		SETUPSCRIPT="setup"
+	fi
 
-		if [ -f "$STF_SUITE/$SINGLETESTDIR/cleanup.ksh" ]; then
-			CLEANUPSCRIPT="cleanup"
-		fi
+	if [ -f "$STF_SUITE/$SINGLETESTDIR/cleanup.ksh" ]; then
+		CLEANUPSCRIPT="cleanup"
+	fi
 
-		cat >>$RUNFILE_DIR/$RUNFILE << EOF
+	cat >>$RUNFILE_DIR/$RUNFILES << EOF
 
 [$SINGLETESTDIR]
 tests = ['$SINGLETESTFILE']
@@ -416,7 +484,6 @@
 post = $CLEANUPSCRIPT
 tags = ['functional']
 EOF
-	done
 fi
 
 #
@@ -425,17 +492,24 @@
 TAGS=${TAGS:='functional'}
 
 #
-# Attempt to locate the runfile describing the test workload.
+# Attempt to locate the runfiles describing the test workload.
 #
-if [ -n "$RUNFILE" ]; then
-	SAVED_RUNFILE="$RUNFILE"
-	RUNFILE=$(find_runfile "$RUNFILE")
-	[ -z "$RUNFILE" ] && fail "Cannot find runfile: $SAVED_RUNFILE"
-fi
+R=""
+IFS=,
+for RUNFILE in $RUNFILES; do
+	if [ -n "$RUNFILE" ]; then
+		SAVED_RUNFILE="$RUNFILE"
+		RUNFILE=$(find_runfile "$RUNFILE")
+		[ -z "$RUNFILE" ] && fail "Cannot find runfile: $SAVED_RUNFILE"
+		R="$R,$RUNFILE"
+	fi
 
-if [ ! -r "$RUNFILE" ]; then
-	fail "Cannot read runfile: $RUNFILE"
-fi
+	if [ ! -r "$RUNFILE" ]; then
+		fail "Cannot read runfile: $RUNFILE"
+	fi
+done
+unset IFS
+RUNFILES=${R#,}
 
 #
 # This script should not be run as root.  Instead the test user, which may
@@ -458,6 +532,9 @@
 #
 # Check if ksh exists
 #
+if [ "$UNAME" = "FreeBSD" ]; then
+	sudo ln -fs /usr/local/bin/ksh93 /bin/ksh
+fi
 [ -e "$STF_PATH/ksh" ] || fail "This test suite requires ksh."
 [ -e "$STF_SUITE/include/default.cfg" ] || fail \
     "Missing $STF_SUITE/include/default.cfg file."
@@ -466,9 +543,9 @@
 # Verify the ZFS module stack is loaded.
 #
 if [ "$STACK_TRACER" = "yes" ]; then
-	sudo "${ZFS_SH}" -S &>/dev/null
+	sudo "${ZFS_SH}" -S >/dev/null 2>&1
 else
-	sudo "${ZFS_SH}" &>/dev/null
+	sudo "${ZFS_SH}" >/dev/null 2>&1
 fi
 
 #
@@ -489,7 +566,7 @@
 		KEEP="rpool"
 	fi
 else
-	KEEP="$(echo -e "${KEEP//[[:blank:]]/\n}")"
+	KEEP="$(echo "$KEEP" | tr '[:blank:]' '\n')"
 fi
 
 #
@@ -501,23 +578,26 @@
 #
 # See libzfs/libzfs_config.c for more information.
 #
-__ZFS_POOL_EXCLUDE="$(echo "$KEEP" | sed ':a;N;s/\n/ /g;ba')"
+if [ "$UNAME" = "FreeBSD" ] ; then
+	__ZFS_POOL_EXCLUDE="$(echo "$KEEP" | tr -s '\n' ' ')"
+else
+	__ZFS_POOL_EXCLUDE="$(echo "$KEEP" | sed ':a;N;s/\n/ /g;ba')"
+fi
 
 . "$STF_SUITE/include/default.cfg"
 
-msg
-msg "--- Configuration ---"
-msg "Runfile:         $RUNFILE"
-msg "STF_TOOLS:       $STF_TOOLS"
-msg "STF_SUITE:       $STF_SUITE"
-msg "STF_PATH:        $STF_PATH"
-
 #
 # No DISKS have been provided so a basic file or loopback based devices
 # must be created for the test suite to use.
 #
 if [ -z "${DISKS}" ]; then
 	#
+	# If this is a performance run, prevent accidental use of
+	# loopback devices.
+	#
+	[ "$TAGS" = "perf" ] && fail "Running perf tests without disks."
+
+	#
 	# Create sparse files for the test suite.  These may be used
 	# directory or have loopback devices layered on them.
 	#
@@ -525,44 +605,52 @@
 		[ -f "$TEST_FILE" ] && fail "Failed file exists: ${TEST_FILE}"
 		truncate -s "${FILESIZE}" "${TEST_FILE}" ||
 		    fail "Failed creating: ${TEST_FILE} ($?)"
-		if [[ "$DISKS" ]]; then
-			DISKS="$DISKS $TEST_FILE"
-		else
-			DISKS="$TEST_FILE"
-		fi
 	done
 
 	#
 	# If requested setup loopback devices backed by the sparse files.
 	#
 	if [ "$LOOPBACK" = "yes" ]; then
-		DISKS=""
-
 		test -x "$LOSETUP" || fail "$LOSETUP utility must be installed"
 
 		for TEST_FILE in ${FILES}; do
-			TEST_LOOPBACK=$(sudo "${LOSETUP}" -f)
-			sudo "${LOSETUP}" "${TEST_LOOPBACK}" "${TEST_FILE}" ||
-			    fail "Failed: ${TEST_FILE} -> ${TEST_LOOPBACK}"
-			LOOPBACKS="${LOOPBACKS}${TEST_LOOPBACK} "
-			BASELOOPBACKS=$(basename "$TEST_LOOPBACK")
-			if [[ "$DISKS" ]]; then
-				DISKS="$DISKS $BASELOOPBACKS"
+			if [ "$UNAME" = "FreeBSD" ] ; then
+				MDDEVICE=$(sudo "${LOSETUP}" -a -t vnode -f "${TEST_FILE}")
+				if [ -z "$MDDEVICE" ] ; then
+					fail "Failed: ${TEST_FILE} -> loopback"
+				fi
+				DISKS="$DISKS $MDDEVICE"
+				LOOPBACKS="$LOOPBACKS $MDDEVICE"
 			else
-				DISKS="$BASELOOPBACKS"
+				TEST_LOOPBACK=$(sudo "${LOSETUP}" -f)
+				sudo "${LOSETUP}" "${TEST_LOOPBACK}" "${TEST_FILE}" ||
+				    fail "Failed: ${TEST_FILE} -> ${TEST_LOOPBACK}"
+				BASELOOPBACK="${TEST_LOOPBACK##*/}"
+				DISKS="$DISKS $BASELOOPBACK"
+				LOOPBACKS="$LOOPBACKS $TEST_LOOPBACK"
 			fi
 		done
+		DISKS=${DISKS# }
+		LOOPBACKS=${LOOPBACKS# }
+	else
+		DISKS="$FILES"
 	fi
 fi
 
+#
+# It may be desirable to test with fewer disks than the default when running
+# the performance tests, but the functional tests require at least three.
+#
 NUM_DISKS=$(echo "${DISKS}" | awk '{print NF}')
-[ "$NUM_DISKS" -lt 3 ] && fail "Not enough disks ($NUM_DISKS/3 minimum)"
+if [ "$TAGS" != "perf" ]; then
+	[ "$NUM_DISKS" -lt 3 ] && fail "Not enough disks ($NUM_DISKS/3 minimum)"
+fi
 
 #
 # Disable SELinux until the ZFS Test Suite has been updated accordingly.
 #
 if [ -x "$STF_PATH/setenforce" ]; then
-	sudo setenforce permissive &>/dev/null
+	sudo setenforce permissive >/dev/null 2>&1
 fi
 
 #
@@ -573,6 +661,12 @@
 	sudo /bin/sh -c "echo 0 >/proc/spl/kstat/zfs/dbgmsg"
 fi
 
+msg
+msg "--- Configuration ---"
+msg "Runfiles:        $RUNFILES"
+msg "STF_TOOLS:       $STF_TOOLS"
+msg "STF_SUITE:       $STF_SUITE"
+msg "STF_PATH:        $STF_PATH"
 msg "FILEDIR:         $FILEDIR"
 msg "FILES:           $FILES"
 msg "LOOPBACKS:       $LOOPBACKS"
@@ -596,31 +690,65 @@
 export TESTFAIL_CALLBACKS
 export PATH=$STF_PATH
 
-RESULTS_FILE=$(mktemp -u -t zts-results.XXXX -p "$FILEDIR")
-REPORT_FILE=$(mktemp -u -t zts-report.XXXX -p "$FILEDIR")
+mktemp_file() {
+	if [ "$UNAME" = "FreeBSD" ]; then
+		mktemp -u "${FILEDIR}/$1.XXXXXX"
+	else
+		mktemp -ut "$1.XXXXXX" -p "$FILEDIR"
+	fi
+}
+mkdir -p "$FILEDIR" || :
+RESULTS_FILE=$(mktemp_file zts-results)
+REPORT_FILE=$(mktemp_file zts-report)
 
 #
 # Run all the tests as specified.
 #
-msg "${TEST_RUNNER} ${QUIET:+-q}" \
-    "-c \"${RUNFILE}\"" \
+msg "${TEST_RUNNER}" \
+    "${QUIET:+-q}" \
+    "${KMEMLEAK:+-m}" \
+    "${KMSG:+-K}" \
+    "-c \"${RUNFILES}\"" \
     "-T \"${TAGS}\"" \
     "-i \"${STF_SUITE}\"" \
     "-I \"${ITERATIONS}\""
-${TEST_RUNNER} ${QUIET:+-q} \
-    -c "${RUNFILE}" \
+${TEST_RUNNER} ${QUIET:+-q} ${KMEMLEAK:+-m} \
+    ${KMSG:+-K} \
+    -c "${RUNFILES}" \
     -T "${TAGS}" \
     -i "${STF_SUITE}" \
     -I "${ITERATIONS}" \
     2>&1 | tee "$RESULTS_FILE"
-
 #
 # Analyze the results.
 #
-set -o pipefail
-${ZTS_REPORT} "$RESULTS_FILE" | tee "$REPORT_FILE"
+${ZTS_REPORT} ${RERUN:+--no-maybes} "$RESULTS_FILE" >"$REPORT_FILE"
 RESULT=$?
-set +o pipefail
+
+if [ "$RESULT" -eq "2" ] && [ -n "$RERUN" ]; then
+	MAYBES="$($ZTS_REPORT --list-maybes)"
+	TEMP_RESULTS_FILE=$(mktemp_file zts-results-tmp)
+	TEST_LIST=$(mktemp_file test-list)
+	grep "^Test:.*\[FAIL\]" "$RESULTS_FILE" >"$TEMP_RESULTS_FILE"
+	for test_name in $MAYBES; do
+		grep "$test_name " "$TEMP_RESULTS_FILE" >>"$TEST_LIST"
+	done
+	${TEST_RUNNER} ${QUIET:+-q} ${KMEMLEAK:+-m} \
+	    -c "${RUNFILES}" \
+	    -T "${TAGS}" \
+	    -i "${STF_SUITE}" \
+	    -I "${ITERATIONS}" \
+	    -l "${TEST_LIST}" \
+	    2>&1 | tee "$RESULTS_FILE"
+	#
+	# Analyze the results.
+	#
+	${ZTS_REPORT} --no-maybes "$RESULTS_FILE" >"$REPORT_FILE"
+	RESULT=$?
+fi
+
+
+cat "$REPORT_FILE"
 
 RESULTS_DIR=$(awk '/^Log directory/ { print $3 }' "$RESULTS_FILE")
 if [ -d "$RESULTS_DIR" ]; then
@@ -629,8 +757,8 @@
 
 rm -f "$RESULTS_FILE" "$REPORT_FILE"
 
-if [ ${#SINGLETEST[@]} -ne 0 ]; then
-	rm -f "$RUNFILE" &>/dev/null
+if [ -n "$SINGLETEST" ]; then
+	rm -f "$RUNFILES" >/dev/null 2>&1
 fi
 
 exit ${RESULT}

diff --git a/zfs/scripts/zfs.sh b/zfs/scripts/zfs.sh
index 015b3ba..940c83f 100755
--- a/zfs/scripts/zfs.sh
+++ b/zfs/scripts/zfs.sh

@@ -14,6 +14,7 @@
 PROG=zfs.sh
 VERBOSE="no"
 UNLOAD="no"
+LOAD="yes"
 STACK_TRACER="no"
 
 ZED_PIDFILE=${ZED_PIDFILE:-/var/run/zed.pid}
@@ -29,6 +30,8 @@
 KMOD_ZLUA=${KMOD_ZLUA:-zlua}
 KMOD_ICP=${KMOD_ICP:-icp}
 KMOD_ZFS=${KMOD_ZFS:-zfs}
+KMOD_FREEBSD=${KMOD_FREEBSD:-openzfs}
+KMOD_ZZSTD=${KMOD_ZZSTD:-zzstd}
 
 
 usage() {
@@ -42,12 +45,13 @@
 OPTIONS:
 	-h      Show this message
 	-v      Verbose
+	-r	Reload modules
 	-u      Unload modules
 	-S      Enable kernel stack tracer
 EOF
 }
 
-while getopts 'hvuS' OPTION; do
+while getopts 'hvruS' OPTION; do
 	case $OPTION in
 	h)
 		usage
@@ -56,8 +60,13 @@
 	v)
 		VERBOSE="yes"
 		;;
+	r)
+		UNLOAD="yes"
+		LOAD="yes"
+		;;
 	u)
 		UNLOAD="yes"
+		LOAD="no"
 		;;
 	S)
 		STACK_TRACER="yes"
@@ -76,13 +85,14 @@
 	fi
 }
 
-check_modules() {
+check_modules_linux() {
 	LOADED_MODULES=""
 	MISSING_MODULES=""
 
-	for KMOD in $KMOD_SPL $KMOD_ZAVL $KMOD_ZNVPAIR \
-	    $KMOD_ZUNICODE $KMOD_ZCOMMON $KMOD_ZLUA $KMOD_ICP $KMOD_ZFS; do
-		NAME=$(basename "$KMOD" .ko)
+	for KMOD in $KMOD_SPL $KMOD_ZAVL $KMOD_ZNVPAIR $KMOD_ZUNICODE $KMOD_ZCOMMON \
+	    $KMOD_ZLUA $KMOD_ZZSTD $KMOD_ICP $KMOD_ZFS; do
+		NAME="${KMOD##*/}"
+		NAME="${NAME%.ko}"
 
 		if lsmod | grep -E -q "^${NAME}"; then
 			LOADED_MODULES="$LOADED_MODULES\t$NAME\n"
@@ -108,7 +118,7 @@
 	return 0
 }
 
-load_module() {
+load_module_linux() {
 	KMOD=$1
 
 	FILE=$(modinfo "$KMOD" | awk '/^filename:/ {print $2}')
@@ -118,9 +128,7 @@
 		echo "Loading: $FILE ($VERSION)"
 	fi
 
-	$LDMOD "$KMOD" >/dev/null 2>&1
-	# shellcheck disable=SC2181
-	if [ $? -ne 0 ]; then
+	if ! $LDMOD "$KMOD" >/dev/null 2>&1; then
 		echo "Failed to load $KMOD"
 		return 1
 	fi
@@ -128,7 +136,17 @@
 	return 0
 }
 
-load_modules() {
+load_modules_freebsd() {
+	kldload "$KMOD_FREEBSD" || return 1
+
+	if [ "$VERBOSE" = "yes" ]; then
+		echo "Successfully loaded ZFS module stack"
+	fi
+
+	return 0
+}
+
+load_modules_linux() {
 	mkdir -p /etc/zfs
 
 	if modinfo "$KMOD_ZLIB_DEFLATE" >/dev/null 2>&1; then
@@ -140,8 +158,9 @@
 	fi
 
 	for KMOD in $KMOD_SPL $KMOD_ZAVL $KMOD_ZNVPAIR \
-	    $KMOD_ZUNICODE $KMOD_ZCOMMON $KMOD_ZLUA $KMOD_ICP $KMOD_ZFS; do
-		load_module "$KMOD" || return 1
+	    $KMOD_ZUNICODE $KMOD_ZCOMMON $KMOD_ZLUA $KMOD_ZZSTD \
+	    $KMOD_ICP $KMOD_ZFS; do
+		load_module_linux "$KMOD" || return 1
 	done
 
 	if [ "$VERBOSE" = "yes" ]; then
@@ -151,10 +170,11 @@
 	return 0
 }
 
-unload_module() {
+unload_module_linux() {
 	KMOD=$1
 
-	NAME=$(basename "$KMOD" .ko)
+	NAME="${KMOD##*/}"
+	NAME="${NAME%.ko}"
 	FILE=$(modinfo "$KMOD" | awk '/^filename:/ {print $2}')
 	VERSION=$(modinfo "$KMOD" | awk '/^version:/ {print $2}')
 
@@ -167,14 +187,28 @@
 	return 0
 }
 
-unload_modules() {
-	for KMOD in $KMOD_ZFS $KMOD_ICP $KMOD_ZLUA $KMOD_ZCOMMON $KMOD_ZUNICODE \
-	    $KMOD_ZNVPAIR  $KMOD_ZAVL $KMOD_SPL; do
-		NAME=$(basename "$KMOD" .ko)
-		USE_COUNT=$(lsmod | grep -E "^${NAME} " | awk '{print $3}')
+unload_modules_freebsd() {
+	kldunload "$KMOD_FREEBSD" || echo "Failed to unload $KMOD_FREEBSD"
+
+	if [ "$VERBOSE" = "yes" ]; then
+		echo "Successfully unloaded ZFS module stack"
+	fi
+
+	return 0
+}
+
+unload_modules_linux() {
+	for KMOD in $KMOD_ZFS $KMOD_ICP $KMOD_ZZSTD $KMOD_ZLUA $KMOD_ZCOMMON \
+	    $KMOD_ZUNICODE $KMOD_ZNVPAIR  $KMOD_ZAVL $KMOD_SPL; do
+		NAME="${KMOD##*/}"
+		NAME="${NAME%.ko}"
+		USE_COUNT=$(lsmod | awk '/^'"${NAME}"'/ {print $3}')
 
 		if [ "$USE_COUNT" = "0" ] ; then
-			unload_module "$KMOD" || return 1
+			unload_module_linux "$KMOD" || return 1
+		elif [ "$USE_COUNT" != "" ] ; then
+			echo "Module ${NAME} is still in use!"
+			return 1
 		fi
 	done
 
@@ -193,7 +227,7 @@
 	return 0
 }
 
-stack_clear() {
+stack_clear_linux() {
 	STACK_MAX_SIZE=/sys/kernel/debug/tracing/stack_max_size
 	STACK_TRACER_ENABLED=/proc/sys/kernel/stack_tracer_enabled
 
@@ -203,7 +237,7 @@
 	fi
 }
 
-stack_check() {
+stack_check_linux() {
 	STACK_MAX_SIZE=/sys/kernel/debug/tracing/stack_max_size
 	STACK_TRACE=/sys/kernel/debug/tracing/stack_trace
 	STACK_LIMIT=15362
@@ -224,17 +258,34 @@
 	exit 1
 fi
 
+UNAME=$(uname -s)
+
 if [ "$UNLOAD" = "yes" ]; then
 	kill_zed
 	umount -t zfs -a
-	stack_check
-	unload_modules
-else
-	stack_clear
-	check_modules
-	load_modules "$@"
-	udevadm trigger
-	udevadm settle
+	case $UNAME in
+		FreeBSD)
+	           unload_modules_freebsd
+		   ;;
+		Linux)
+	           stack_check_linux
+	           unload_modules_linux
+		   ;;
+	esac
+fi
+if [ "$LOAD" = "yes" ]; then
+	case $UNAME in
+		FreeBSD)
+		   load_modules_freebsd
+		   ;;
+		Linux)
+		   stack_clear_linux
+		   check_modules_linux
+		   load_modules_linux "$@"
+		   udevadm trigger
+		   udevadm settle
+		   ;;
+	esac
 fi
 
 exit 0

diff --git a/zfs/scripts/zimport.sh b/zfs/scripts/zimport.sh
index d7e82fe..03c766c 100755
--- a/zfs/scripts/zimport.sh
+++ b/zfs/scripts/zimport.sh

@@ -1,10 +1,10 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Verify that an assortment of known good reference pools can be imported
-# using different versions of the ZoL code.
+# using different versions of OpenZFS code.
 #
 # By default references pools for the major ZFS implementation will be
-# checked against the most recent ZoL tags and the master development branch.
+# checked against the most recent OpenZFS tags and the master development branch.
 # Alternate tags or branches may be verified with the '-s <src-tag> option.
 # Passing the keyword "installed" will instruct the script to test whatever
 # version is installed.
@@ -39,7 +39,7 @@
 #      -s "zfs-0.6.2 master installed" \
 #      -p "zevo-1.1.1 zol-0.6.2 zol-0.6.2-173 master installed"
 #
-# --------------------- ZFS on Linux Source Versions --------------
+# ------------------------ OpenZFS Source Versions ----------------
 #                 zfs-0.6.2       master          0.6.2-175_g36eb554
 # -----------------------------------------------------------------
 # Clone ZFS       Local		Local		Skip
@@ -68,9 +68,9 @@
 KEEP="no"
 VERBOSE="no"
 COLOR="yes"
-REPO="https://github.com/zfsonlinux"
+REPO="https://github.com/openzfs"
 IMAGES_DIR="$SCRIPTDIR/zfs-images/"
-IMAGES_TAR="https://github.com/zfsonlinux/zfs-images/tarball/master"
+IMAGES_TAR="https://github.com/openzfs/zfs-images/tarball/master"
 ERROR=0
 
 CONFIG_LOG="configure.log"
@@ -98,7 +98,7 @@
 	-c                No color
 	-k                Keep temporary directory
 	-r <repo>         Source repository ($REPO)
-	-s <src-tag>...   Verify ZoL versions with the listed tags
+	-s <src-tag>...   Verify OpenZFS versions with the listed tags
 	-i <pool-dir>     Pool image directory
 	-p <pool-tag>...  Verify pools created with the listed tags
 	-f <path>         Temporary directory to use
@@ -164,15 +164,13 @@
 	local MAX_DIR_SIZE=$2
 	local MAX_FILE_SIZE=$3
 
-	# shellcheck disable=SC2086
-	mkdir -p $ROOT/{a,b,c,d,e,f,g}/{h,i}
+	mkdir -p "$ROOT"/{a,b,c,d,e,f,g}/{h,i}
 	DIRS=$(find "$ROOT")
 
 	for DIR in $DIRS; do
 		COUNT=$((RANDOM % MAX_DIR_SIZE))
 
-		# shellcheck disable=SC2034
-		for i in $(seq $COUNT); do
+		for _ in $(seq $COUNT); do
 			FILE=$(mktemp -p "$DIR")
 			SIZE=$((RANDOM % MAX_FILE_SIZE))
 			dd if=/dev/urandom of="$FILE" bs=1k \
@@ -334,9 +332,8 @@
 for TAG in $POOL_TAGS; do
 
 	if  [ "$TAG" = "all" ]; then
-		# shellcheck disable=SC2010
-		ALL_TAGS=$(ls "$IMAGES_DIR" | grep "tar.bz2" | \
-		    sed 's/.tar.bz2//' | tr '\n' ' ')
+		ALL_TAGS=$(echo "$IMAGES_DIR"/*.tar.bz2 | \
+		    sed "s|$IMAGES_DIR/||g;s|.tar.bz2||g")
 		NEW_TAGS="$NEW_TAGS $ALL_TAGS"
 	else
 		NEW_TAGS="$NEW_TAGS $TAG"
@@ -365,7 +362,7 @@
 fi
 
 # Print a header for all tags which are being tested.
-echo "--------------------- ZFS on Linux Source Versions --------------"
+echo "------------------------ OpenZFS Source Versions ----------------"
 printf "%-16s" " "
 for TAG in $SRC_TAGS; do
 	src_set_vars "$TAG"
@@ -489,12 +486,10 @@
 		    "$POOL_DIR_COPY" || \
 		    fail "Failed to copy $POOL_DIR_PRISTINE to $POOL_DIR_COPY"
 		POOL_NAME=$($ZPOOL_CMD import -d "$POOL_DIR_COPY" | \
-		    awk '/pool:/ { print $2; exit 0 }')
+		    awk '/pool:/ { print $2; exit }')
 
-		$ZPOOL_CMD import -N -d "$POOL_DIR_COPY" \
-		   "$POOL_NAME" &>/dev/null
-		# shellcheck disable=SC2181
-		if [ $? -ne 0 ]; then
+		if ! $ZPOOL_CMD import -N -d "$POOL_DIR_COPY"
+		    "$POOL_NAME" &>/dev/null; then
 			fail_nonewline
 			ERROR=1
 		else

diff --git a/zfs/scripts/zloop.sh b/zfs/scripts/zloop.sh
index 1f36f86..4a572eb 100755
--- a/zfs/scripts/zloop.sh
+++ b/zfs/scripts/zloop.sh

@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # CDDL HEADER START
@@ -18,6 +18,7 @@
 #
 # Copyright (c) 2015 by Delphix. All rights reserved.
 # Copyright (C) 2016 Lawrence Livermore National Security, LLC.
+# Copyright (c) 2017, Intel Corporation.
 #
 
 BASE_DIR=$(dirname "$0")
@@ -37,54 +38,66 @@
 
 function usage
 {
-	echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \
-	    "[ -- [extra ztest parameters]]\n" \
-	    "\n" \
-	    "  This script runs ztest repeatedly with randomized arguments.\n" \
-	    "  If a crash is encountered, the ztest logs, any associated\n" \
-	    "  vdev files, and core file (if one exists) are moved to the\n" \
-	    "  output directory ($DEFAULTCOREDIR by default). Any options\n" \
-	    "  after the -- end-of-options marker will be passed to ztest.\n" \
-	    "\n" \
-	    "  Options:\n" \
-	    "    -t  Total time to loop for, in seconds. If not provided,\n" \
-	    "        zloop runs forever.\n" \
-	    "    -s  Size of vdev devices.\n" \
-	    "    -f  Specify working directory for ztest vdev files.\n" \
-	    "    -c  Specify a core dump directory to use.\n" \
-	    "    -m  Max number of core dumps to allow before exiting.\n" \
-	    "    -l  Create 'ztest.core.N' symlink to core directory.\n" \
-	    "    -h  Print this help message.\n" \
-	    "" >&2
+	cat >&2 <<EOF
+
+$0 [-hl] [-c <dump directory>] [-f <vdev directory>]
+  [-m <max core dumps>] [-s <vdev size>] [-t <timeout>]
+  [-I <max iterations>] [-- [extra ztest parameters]]
+
+  This script runs ztest repeatedly with randomized arguments.
+  If a crash is encountered, the ztest logs, any associated
+  vdev files, and core file (if one exists) are moved to the
+  output directory ($DEFAULTCOREDIR by default). Any options
+  after the -- end-of-options marker will be passed to ztest.
+
+  Options:
+    -c  Specify a core dump directory to use.
+    -f  Specify working directory for ztest vdev files.
+    -h  Print this help message.
+    -l  Create 'ztest.core.N' symlink to core directory.
+    -m  Max number of core dumps to allow before exiting.
+    -s  Size of vdev devices.
+    -t  Total time to loop for, in seconds. If not provided,
+        zloop runs forever.
+    -I  Max number of iterations to loop before exiting.
+
+EOF
 }
 
 function or_die
 {
 	# shellcheck disable=SC2068
-	$@
-	# shellcheck disable=SC2181
-	if [[ $? -ne 0 ]]; then
-		# shellcheck disable=SC2145
-		echo "Command failed: $@"
+	if ! $@; then
+		echo "Command failed: $*"
 		exit 1
 	fi
 }
 
-# core file helpers
-origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
-coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
+case $(uname) in
+FreeBSD)
+	coreglob="z*.core"
+	;;
+Linux)
+	# core file helpers
+	origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
+	coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
 
-if [[ $coreglob = "*" ]]; then
-        echo "Setting core file pattern..."
-        echo "core" > /proc/sys/kernel/core_pattern
-        coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
-            /proc/sys/kernel/core_pattern)*"
-fi
+	if [[ $coreglob = "*" ]]; then
+		echo "Setting core file pattern..."
+		echo "core" > /proc/sys/kernel/core_pattern
+		coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
+		    /proc/sys/kernel/core_pattern)*"
+	fi
+	;;
+*)
+	exit 1
+	;;
+esac
 
 function core_file
 {
-	# shellcheck disable=SC2012 disable=2086
-        printf "%s" "$(ls -tr1 $coreglob 2> /dev/null | head -1)"
+	# shellcheck disable=SC2012,SC2086
+	ls -tr1 $coreglob 2>/dev/null | head -1
 }
 
 function core_prog
@@ -92,8 +105,7 @@
 	prog=$ZTEST
 	core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
 	    tr  \' ' ')
-	# shellcheck disable=SC2076
-	if [[ "$core_id" =~ "zdb "  ]]; then
+	if [[ "$core_id" == *"zdb "* ]]; then
 		prog=$ZDB
 	fi
 	printf "%s" "$prog"
@@ -178,10 +190,12 @@
 size="512m"
 coremax=0
 symlink=0
-while getopts ":ht:m:s:c:f:l" opt; do
+iterations=0
+while getopts ":ht:m:I:s:c:f:l" opt; do
 	case $opt in
 		t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
 		m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
+		I ) [[ $OPTARG ]] && iterations=$OPTARG ;;
 		s ) [[ $OPTARG ]] && size=$OPTARG ;;
 		c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
 		f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
@@ -226,9 +240,14 @@
 foundcrashes=0	# number of crashes found so far
 starttime=$(date +%s)
 curtime=$starttime
+iteration=0
 
 # if no timeout was specified, loop forever.
-while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
+while (( timeout == 0 )) || (( curtime <= (starttime + timeout) )); do
+	if (( iterations > 0 )) && (( iteration++ == iterations )); then
+		break
+	fi
+
 	zopt="-G -VVVVV"
 
 	# start each run with an empty directory
@@ -236,34 +255,60 @@
 	or_die rm -rf "$workdir"
 	or_die mkdir "$workdir"
 
-	# switch between common arrangements & fully randomized
-	if [[ $((RANDOM % 2)) -eq 0 ]]; then
-		mirrors=2
-		raidz=0
-		parity=1
-		vdevs=2
-	else
-		mirrors=$(((RANDOM % 3) * 1))
-		parity=$(((RANDOM % 3) + 1))
-		raidz=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
-		vdevs=$(((RANDOM % 3) + 3))
-	fi
-	align=$(((RANDOM % 2) * 3 + 9))
-	runtime=$((RANDOM % 100))
-	passtime=$((RANDOM % (runtime / 3 + 1) + 10))
+	# switch between three types of configs
+	# 1/3 basic, 1/3 raidz mix, and 1/3 draid mix
+	choice=$((RANDOM % 3))
 
+	# ashift range 9 - 15
+	align=$(((RANDOM % 2) * 3 + 9))
+
+	# randomly use special classes
+	class="special=random"
+
+	if [[ $choice -eq 0 ]]; then
+		# basic mirror only
+		parity=1
+		mirrors=2
+		draid_data=0
+		draid_spares=0
+		raid_children=0
+		vdevs=2
+		raid_type="raidz"
+	elif [[ $choice -eq 1 ]]; then
+		# fully randomized mirror/raidz (sans dRAID)
+		parity=$(((RANDOM % 3) + 1))
+		mirrors=$(((RANDOM % 3) * 1))
+		draid_data=0
+		draid_spares=0
+		raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
+		vdevs=$(((RANDOM % 3) + 3))
+		raid_type="raidz"
+	else
+		# fully randomized dRAID (sans mirror/raidz)
+		parity=$(((RANDOM % 3) + 1))
+		mirrors=0
+		draid_data=$(((RANDOM % 8) + 3))
+		draid_spares=$(((RANDOM % 2) + parity))
+		stripe=$((draid_data + parity))
+		extra=$((draid_spares + (RANDOM % 4)))
+		raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
+		vdevs=$((RANDOM % 3))
+		raid_type="draid"
+	fi
+
+	zopt="$zopt -K $raid_type"
 	zopt="$zopt -m $mirrors"
-	zopt="$zopt -r $raidz"
+	zopt="$zopt -r $raid_children"
+	zopt="$zopt -D $draid_data"
+	zopt="$zopt -S $draid_spares"
 	zopt="$zopt -R $parity"
 	zopt="$zopt -v $vdevs"
 	zopt="$zopt -a $align"
-	zopt="$zopt -T $runtime"
-	zopt="$zopt -P $passtime"
+	zopt="$zopt -C $class"
 	zopt="$zopt -s $size"
 	zopt="$zopt -f $workdir"
 
-	# shellcheck disable=SC2124
-	cmd="$ZTEST $zopt $@"
+	cmd="$ZTEST $zopt $*"
 	desc="$(date '+%m/%d %T') $cmd"
 	echo "$desc" | tee -a ztest.history
 	echo "$desc" >>ztest.out
@@ -278,8 +323,14 @@
 
 echo "zloop finished, $foundcrashes crashes found"
 
-#restore core pattern
-echo "$origcorepattern" > /proc/sys/kernel/core_pattern
+# restore core pattern.
+case $(uname) in
+Linux)
+	echo "$origcorepattern" > /proc/sys/kernel/core_pattern
+	;;
+*)
+	;;
+esac
 
 uptime >>ztest.out
 

diff --git a/zfs/scripts/zol2zfs-patch.sed b/zfs/scripts/zol2zfs-patch.sed
index bb6d9fa..0ca4b6c 100755
--- a/zfs/scripts/zol2zfs-patch.sed
+++ b/zfs/scripts/zol2zfs-patch.sed

@@ -12,7 +12,7 @@
 s:lib/libzfs_core:usr/src/lib/libzfs_core/common:g
 s:lib/libzpool:lib/libzpool/common:g
 s:lib/libzpool:usr/src/lib/libzpool:g
-s:man/man5/zpool-features.5:usr/src/man/man5/zpool-features.5:g
+s:man/man7/zpool-features.7:usr/src/man/man5/zpool-features.5:g
 s:man/man8/zfs.8:usr/src/man/man1m/zfs.1m:g
 s:module/nvpair:usr/src/common/nvpair:g
 s:module/zcommon:usr/src/common/zfs/:g

diff --git a/zfs/tests/Makefile.am b/zfs/tests/Makefile.am
index 28d6e95..d8277ef 100644
--- a/zfs/tests/Makefile.am
+++ b/zfs/tests/Makefile.am

@@ -1 +1,9 @@
+include $(top_srcdir)/config/Shellcheck.am
+
 SUBDIRS = runfiles test-runner zfs-tests
+
+EXTRA_DIST = README.md
+
+SHELLCHECK_IGNORE = ,SC2155
+SHELLCHECKSCRIPTS = $$(find . -name '*.sh')
+.PHONY: $(SHELLCHECKSCRIPTS)

diff --git a/zfs/tests/README.md b/zfs/tests/README.md
index b2c7f99..72b994f 100644
--- a/zfs/tests/README.md
+++ b/zfs/tests/README.md

@@ -29,7 +29,7 @@
   * Three scratch disks
     * Specify the disks you wish to use in the $DISKS variable, as a
       space delimited list like this: DISKS='vdb vdc vdd'.  By default
-      the zfs-tests.sh sciprt will construct three loopback devices to
+      the zfs-tests.sh script will construct three loopback devices to
       be used for testing: DISKS='loop0 loop1 loop2'.
   * A non-root user with a full set of basic privileges and the ability
     to sudo(8) to root without a password to run the test.
@@ -91,7 +91,7 @@
 
     -s SIZE     Use vdevs of SIZE (default: 4G)
 
-    -r RUNFILE  Run tests in RUNFILE (default: linux.run)
+    -r RUNFILES Run tests in RUNFILES (default: common.run,linux.run)
 
     -t PATH     Run single test at PATH relative to test suite
 
@@ -128,7 +128,7 @@
     STF_PATH:        /var/tmp/constrained_path.G0Sf
     FILEDIR:         /tmp/test
     FILES:           /tmp/test/file-vdev0 /tmp/test/file-vdev1 /tmp/test/file-vdev2
-    LOOPBACKS:       /dev/loop0 /dev/loop1 /dev/loop2 
+    LOOPBACKS:       /dev/loop0 /dev/loop1 /dev/loop2
     DISKS:           loop0 loop1 loop2
     NUM_DISKS:       3
     FILESIZE:        4G
@@ -149,4 +149,4 @@
     
     Running Time:	02:35:33
     Percent passed:	95.6%
-    Log directory:	/var/tmp/test_results/20180515T054509
\ No newline at end of file
+    Log directory:	/var/tmp/test_results/20180515T054509

diff --git a/zfs/tests/runfiles/Makefile.am b/zfs/tests/runfiles/Makefile.am
index 4625806..278e949 100644
--- a/zfs/tests/runfiles/Makefile.am
+++ b/zfs/tests/runfiles/Makefile.am

@@ -1,5 +1,9 @@
 pkgdatadir = $(datadir)/@PACKAGE@/runfiles
 dist_pkgdata_DATA = \
+	common.run \
+	freebsd.run \
 	linux.run \
 	longevity.run \
-	perf-regression.run
+	perf-regression.run \
+	sanity.run \
+	sunos.run

diff --git a/zfs/tests/runfiles/common.run b/zfs/tests/runfiles/common.run
new file mode 100644
index 0000000..6c2296d
--- /dev/null
+++ b/zfs/tests/runfiles/common.run

@@ -0,0 +1,953 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# This run file contains all of the common functional tests.  When
+# adding a new test consider also adding it to the sanity.run file
+# if the new test runs to completion in only a few seconds.
+#
+# Approximate run time: 4-5 hours
+#
+
+[DEFAULT]
+pre = setup
+quiet = False
+pre_user = root
+user = root
+timeout = 600
+post_user = root
+post = cleanup
+failsafe_user = root
+failsafe = callbacks/zfs_failsafe
+outputdir = /var/tmp/test_results
+tags = ['functional']
+
+[tests/functional/acl/off]
+tests = ['posixmode']
+tags = ['functional', 'acl']
+
+[tests/functional/alloc_class]
+tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
+    'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
+    'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
+    'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
+    'alloc_class_013_pos', 'alloc_class_014_neg', 'alloc_class_015_pos']
+tags = ['functional', 'alloc_class']
+
+[tests/functional/arc]
+tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos',
+    'arcstats_runtime_tuning']
+tags = ['functional', 'arc']
+
+[tests/functional/atime]
+tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on']
+tags = ['functional', 'atime']
+
+[tests/functional/bootfs]
+tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos',
+    'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos',
+    'bootfs_008_pos']
+tags = ['functional', 'bootfs']
+
+[tests/functional/btree]
+tests = ['btree_positive', 'btree_negative']
+tags = ['functional', 'btree']
+pre =
+post =
+
+[tests/functional/cache]
+tests = ['cache_001_pos', 'cache_002_pos', 'cache_003_pos', 'cache_004_neg',
+    'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg',
+    'cache_009_pos', 'cache_010_pos', 'cache_011_pos', 'cache_012_pos']
+tags = ['functional', 'cache']
+
+[tests/functional/cachefile]
+tests = ['cachefile_001_pos', 'cachefile_002_pos', 'cachefile_003_pos',
+    'cachefile_004_pos']
+tags = ['functional', 'cachefile']
+
+[tests/functional/casenorm]
+tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure',
+    'sensitive_none_lookup', 'sensitive_none_delete',
+    'sensitive_formd_lookup', 'sensitive_formd_delete',
+    'insensitive_none_lookup', 'insensitive_none_delete',
+    'insensitive_formd_lookup', 'insensitive_formd_delete',
+    'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
+    'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
+tags = ['functional', 'casenorm']
+
+[tests/functional/channel_program/lua_core]
+tests = ['tst.args_to_lua', 'tst.divide_by_zero', 'tst.exists',
+    'tst.integer_illegal', 'tst.integer_overflow', 'tst.language_functions_neg',
+    'tst.language_functions_pos', 'tst.large_prog', 'tst.libraries',
+    'tst.memory_limit', 'tst.nested_neg', 'tst.nested_pos', 'tst.nvlist_to_lua',
+    'tst.recursive_neg', 'tst.recursive_pos', 'tst.return_large',
+    'tst.return_nvlist_neg', 'tst.return_nvlist_pos',
+    'tst.return_recursive_table', 'tst.stack_gsub', 'tst.timeout']
+tags = ['functional', 'channel_program', 'lua_core']
+
+[tests/functional/channel_program/synctask_core]
+tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit',
+    'tst.get_index_props', 'tst.get_mountpoint', 'tst.get_neg',
+    'tst.get_number_props', 'tst.get_string_props', 'tst.get_type',
+    'tst.get_userquota', 'tst.get_written', 'tst.inherit', 'tst.list_bookmarks',
+    'tst.list_children', 'tst.list_clones', 'tst.list_holds',
+    'tst.list_snapshots', 'tst.list_system_props',
+    'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict',
+    'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult',
+    'tst.rollback_one', 'tst.set_props', 'tst.snapshot_destroy', 'tst.snapshot_neg',
+    'tst.snapshot_recursive', 'tst.snapshot_simple',
+    'tst.bookmark.create', 'tst.bookmark.copy',
+    'tst.terminate_by_signal'
+    ]
+tags = ['functional', 'channel_program', 'synctask_core']
+
+[tests/functional/checksum]
+tests = ['run_sha2_test', 'run_skein_test', 'filetest_001_pos',
+    'filetest_002_pos']
+tags = ['functional', 'checksum']
+
+[tests/functional/clean_mirror]
+tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos',
+    'clean_mirror_003_pos', 'clean_mirror_004_pos']
+tags = ['functional', 'clean_mirror']
+
+[tests/functional/cli_root/zdb]
+tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
+    'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
+    'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
+    'zdb_display_block', 'zdb_object_range_neg', 'zdb_object_range_pos',
+    'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
+pre =
+post =
+tags = ['functional', 'cli_root', 'zdb']
+
+[tests/functional/cli_root/zfs]
+tests = ['zfs_001_neg', 'zfs_002_pos']
+tags = ['functional', 'cli_root', 'zfs']
+
+[tests/functional/cli_root/zfs_bookmark]
+tests = ['zfs_bookmark_cliargs']
+tags = ['functional', 'cli_root', 'zfs_bookmark']
+
+[tests/functional/cli_root/zfs_change-key]
+tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
+    'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
+    'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones']
+tags = ['functional', 'cli_root', 'zfs_change-key']
+
+[tests/functional/cli_root/zfs_clone]
+tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos',
+    'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos',
+    'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg',
+    'zfs_clone_010_pos', 'zfs_clone_encrypted', 'zfs_clone_deeply_nested']
+tags = ['functional', 'cli_root', 'zfs_clone']
+
+[tests/functional/cli_root/zfs_copies]
+tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos',
+    'zfs_copies_004_neg', 'zfs_copies_005_neg', 'zfs_copies_006_pos']
+tags = ['functional', 'cli_root', 'zfs_copies']
+
+[tests/functional/cli_root/zfs_create]
+tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
+    'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos',
+    'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg',
+    'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos',
+    'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted',
+    'zfs_create_crypt_combos', 'zfs_create_dryrun', 'zfs_create_nomount',
+    'zfs_create_verbose']
+tags = ['functional', 'cli_root', 'zfs_create']
+
+[tests/functional/cli_root/zfs_destroy]
+tests = ['zfs_clone_livelist_condense_and_disable',
+    'zfs_clone_livelist_condense_races', 'zfs_clone_livelist_dedup',
+    'zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos',
+    'zfs_destroy_004_pos', 'zfs_destroy_005_neg', 'zfs_destroy_006_neg',
+    'zfs_destroy_007_neg', 'zfs_destroy_008_pos', 'zfs_destroy_009_pos',
+    'zfs_destroy_010_pos', 'zfs_destroy_011_pos', 'zfs_destroy_012_pos',
+    'zfs_destroy_013_neg', 'zfs_destroy_014_pos', 'zfs_destroy_015_pos',
+    'zfs_destroy_016_pos', 'zfs_destroy_clone_livelist',
+    'zfs_destroy_dev_removal', 'zfs_destroy_dev_removal_condense']
+tags = ['functional', 'cli_root', 'zfs_destroy']
+
+[tests/functional/cli_root/zfs_diff]
+tests = ['zfs_diff_changes', 'zfs_diff_cliargs', 'zfs_diff_timestamp',
+    'zfs_diff_types', 'zfs_diff_encrypted', 'zfs_diff_mangle']
+tags = ['functional', 'cli_root', 'zfs_diff']
+
+[tests/functional/cli_root/zfs_get]
+tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos',
+    'zfs_get_004_pos', 'zfs_get_005_neg', 'zfs_get_006_neg', 'zfs_get_007_neg',
+    'zfs_get_008_pos', 'zfs_get_009_pos', 'zfs_get_010_neg']
+tags = ['functional', 'cli_root', 'zfs_get']
+
+[tests/functional/cli_root/zfs_ids_to_path]
+tests = ['zfs_ids_to_path_001_pos']
+tags = ['functional', 'cli_root', 'zfs_ids_to_path']
+
+[tests/functional/cli_root/zfs_inherit]
+tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos',
+    'zfs_inherit_mountpoint']
+tags = ['functional', 'cli_root', 'zfs_inherit']
+
+[tests/functional/cli_root/zfs_load-key]
+tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file',
+    'zfs_load-key_https', 'zfs_load-key_location', 'zfs_load-key_noop',
+    'zfs_load-key_recursive']
+tags = ['functional', 'cli_root', 'zfs_load-key']
+
+[tests/functional/cli_root/zfs_mount]
+tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
+    'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos',
+    'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg',
+    'zfs_mount_012_pos', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted',
+    'zfs_mount_remount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints',
+    'zfs_mount_test_race']
+tags = ['functional', 'cli_root', 'zfs_mount']
+
+[tests/functional/cli_root/zfs_program]
+tests = ['zfs_program_json']
+tags = ['functional', 'cli_root', 'zfs_program']
+
+[tests/functional/cli_root/zfs_promote]
+tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos',
+    'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg',
+    'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot']
+tags = ['functional', 'cli_root', 'zfs_promote']
+
+[tests/functional/cli_root/zfs_property]
+tests = ['zfs_written_property_001_pos']
+tags = ['functional', 'cli_root', 'zfs_property']
+
+[tests/functional/cli_root/zfs_receive]
+tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
+    'zfs_receive_004_neg', 'zfs_receive_005_neg', 'zfs_receive_006_pos',
+    'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg',
+    'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos',
+    'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos',
+    'zfs_receive_016_pos', 'receive-o-x_props_override',
+    'zfs_receive_from_encrypted', 'zfs_receive_to_encrypted',
+    'zfs_receive_raw', 'zfs_receive_raw_incremental', 'zfs_receive_-e',
+    'zfs_receive_raw_-d', 'zfs_receive_from_zstd', 'zfs_receive_new_props',
+    'zfs_receive_-wR-encrypted-mix']
+tags = ['functional', 'cli_root', 'zfs_receive']
+
+[tests/functional/cli_root/zfs_rename]
+tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
+    'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos',
+    'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg',
+    'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
+    'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child',
+    'zfs_rename_to_encrypted', 'zfs_rename_mountpoint', 'zfs_rename_nounmount']
+tags = ['functional', 'cli_root', 'zfs_rename']
+
+[tests/functional/cli_root/zfs_reservation]
+tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
+tags = ['functional', 'cli_root', 'zfs_reservation']
+
+[tests/functional/cli_root/zfs_rollback]
+tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
+    'zfs_rollback_003_neg', 'zfs_rollback_004_neg']
+tags = ['functional', 'cli_root', 'zfs_rollback']
+
+[tests/functional/cli_root/zfs_send]
+tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
+    'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos',
+    'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw',
+    'zfs_send_sparse', 'zfs_send-b', 'zfs_send_skip_missing']
+tags = ['functional', 'cli_root', 'zfs_send']
+
+[tests/functional/cli_root/zfs_set]
+tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
+    'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos',
+    'checksum_001_pos', 'compression_001_pos', 'mountpoint_001_pos',
+    'mountpoint_002_pos', 'reservation_001_neg', 'user_property_002_pos',
+    'share_mount_001_neg', 'snapdir_001_pos', 'onoffs_001_pos',
+    'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos',
+    'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg',
+    'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos',
+    'mountpoint_003_pos', 'ro_props_001_pos', 'zfs_set_keylocation',
+    'zfs_set_feature_activation']
+tags = ['functional', 'cli_root', 'zfs_set']
+
+[tests/functional/cli_root/zfs_share]
+tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos',
+    'zfs_share_004_pos', 'zfs_share_006_pos', 'zfs_share_008_neg',
+    'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares']
+tags = ['functional', 'cli_root', 'zfs_share']
+
+[tests/functional/cli_root/zfs_snapshot]
+tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg',
+    'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg',
+    'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg',
+    'zfs_snapshot_009_pos']
+tags = ['functional', 'cli_root', 'zfs_snapshot']
+
+[tests/functional/cli_root/zfs_unload-key]
+tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive']
+tags = ['functional', 'cli_root', 'zfs_unload-key']
+
+[tests/functional/cli_root/zfs_unmount]
+tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos',
+    'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos',
+    'zfs_unmount_007_neg', 'zfs_unmount_008_neg', 'zfs_unmount_009_pos',
+    'zfs_unmount_all_001_pos', 'zfs_unmount_nested', 'zfs_unmount_unload_keys']
+tags = ['functional', 'cli_root', 'zfs_unmount']
+
+[tests/functional/cli_root/zfs_unshare]
+tests = ['zfs_unshare_001_pos', 'zfs_unshare_002_pos', 'zfs_unshare_003_pos',
+    'zfs_unshare_004_neg', 'zfs_unshare_005_neg', 'zfs_unshare_006_pos',
+    'zfs_unshare_007_pos']
+tags = ['functional', 'cli_root', 'zfs_unshare']
+
+[tests/functional/cli_root/zfs_upgrade]
+tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos',
+    'zfs_upgrade_004_pos', 'zfs_upgrade_005_pos', 'zfs_upgrade_006_neg',
+    'zfs_upgrade_007_neg']
+tags = ['functional', 'cli_root', 'zfs_upgrade']
+
+[tests/functional/cli_root/zfs_wait]
+tests = ['zfs_wait_deleteq']
+tags = ['functional', 'cli_root', 'zfs_wait']
+
+[tests/functional/cli_root/zpool]
+tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors']
+tags = ['functional', 'cli_root', 'zpool']
+
+[tests/functional/cli_root/zpool_add]
+tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos',
+    'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg',
+    'zpool_add_008_neg', 'zpool_add_009_neg', 'zpool_add_010_pos',
+    'add-o_ashift', 'add_prop_ashift', 'zpool_add_dryrun_output']
+tags = ['functional', 'cli_root', 'zpool_add']
+
+[tests/functional/cli_root/zpool_attach]
+tests = ['zpool_attach_001_neg', 'attach-o_ashift']
+tags = ['functional', 'cli_root', 'zpool_attach']
+
+[tests/functional/cli_root/zpool_clear]
+tests = ['zpool_clear_001_pos', 'zpool_clear_002_neg', 'zpool_clear_003_neg',
+    'zpool_clear_readonly']
+tags = ['functional', 'cli_root', 'zpool_clear']
+
+[tests/functional/cli_root/zpool_create]
+tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
+    'zpool_create_003_pos', 'zpool_create_004_pos', 'zpool_create_005_pos',
+    'zpool_create_006_pos', 'zpool_create_007_neg', 'zpool_create_008_pos',
+    'zpool_create_009_neg', 'zpool_create_010_neg', 'zpool_create_011_neg',
+    'zpool_create_012_neg', 'zpool_create_014_neg', 'zpool_create_015_neg',
+    'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos',
+    'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos',
+    'zpool_create_023_neg', 'zpool_create_024_pos',
+    'zpool_create_encrypted', 'zpool_create_crypt_combos',
+    'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos',
+    'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos',
+    'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
+    'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
+    'zpool_create_features_005_pos', 'zpool_create_features_006_pos',
+    'zpool_create_features_007_pos', 'zpool_create_features_008_pos',
+    'zpool_create_features_009_pos', 'create-o_ashift',
+    'zpool_create_tempname', 'zpool_create_dryrun_output']
+tags = ['functional', 'cli_root', 'zpool_create']
+
+[tests/functional/cli_root/zpool_destroy]
+tests = ['zpool_destroy_001_pos', 'zpool_destroy_002_pos',
+    'zpool_destroy_003_neg']
+pre =
+post =
+tags = ['functional', 'cli_root', 'zpool_destroy']
+
+[tests/functional/cli_root/zpool_detach]
+tests = ['zpool_detach_001_neg']
+tags = ['functional', 'cli_root', 'zpool_detach']
+
+[tests/functional/cli_root/zpool_events]
+tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow',
+    'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates',
+    'zpool_events_clear_retained']
+tags = ['functional', 'cli_root', 'zpool_events']
+
+[tests/functional/cli_root/zpool_export]
+tests = ['zpool_export_001_pos', 'zpool_export_002_pos',
+    'zpool_export_003_neg', 'zpool_export_004_pos']
+tags = ['functional', 'cli_root', 'zpool_export']
+
+[tests/functional/cli_root/zpool_get]
+tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos',
+    'zpool_get_004_neg', 'zpool_get_005_pos']
+tags = ['functional', 'cli_root', 'zpool_get']
+
+[tests/functional/cli_root/zpool_history]
+tests = ['zpool_history_001_neg', 'zpool_history_002_pos']
+tags = ['functional', 'cli_root', 'zpool_history']
+
+[tests/functional/cli_root/zpool_import]
+tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
+    'zpool_import_003_pos', 'zpool_import_004_pos', 'zpool_import_005_pos',
+    'zpool_import_006_pos', 'zpool_import_007_pos', 'zpool_import_008_pos',
+    'zpool_import_009_neg', 'zpool_import_010_pos', 'zpool_import_011_neg',
+    'zpool_import_012_pos', 'zpool_import_013_neg', 'zpool_import_014_pos',
+    'zpool_import_015_pos', 'zpool_import_016_pos', 'zpool_import_017_pos',
+    'zpool_import_features_001_pos', 'zpool_import_features_002_neg',
+    'zpool_import_features_003_pos', 'zpool_import_missing_001_pos',
+    'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos',
+    'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
+    'zpool_import_encrypted', 'zpool_import_encrypted_load',
+    'zpool_import_errata3', 'zpool_import_errata4',
+    'import_cachefile_device_added',
+    'import_cachefile_device_removed',
+    'import_cachefile_device_replaced',
+    'import_cachefile_mirror_attached',
+    'import_cachefile_mirror_detached',
+    'import_cachefile_paths_changed',
+    'import_cachefile_shared_device',
+    'import_devices_missing', 'import_log_missing',
+    'import_paths_changed',
+    'import_rewind_config_changed',
+    'import_rewind_device_replaced']
+tags = ['functional', 'cli_root', 'zpool_import']
+timeout = 1200
+
+[tests/functional/cli_root/zpool_labelclear]
+tests = ['zpool_labelclear_active', 'zpool_labelclear_exported',
+    'zpool_labelclear_removed', 'zpool_labelclear_valid']
+pre =
+post =
+tags = ['functional', 'cli_root', 'zpool_labelclear']
+
+[tests/functional/cli_root/zpool_initialize]
+tests = ['zpool_initialize_attach_detach_add_remove',
+    'zpool_initialize_fault_export_import_online',
+    'zpool_initialize_import_export',
+    'zpool_initialize_offline_export_import_online',
+    'zpool_initialize_online_offline',
+    'zpool_initialize_split',
+    'zpool_initialize_start_and_cancel_neg',
+    'zpool_initialize_start_and_cancel_pos',
+    'zpool_initialize_suspend_resume',
+    'zpool_initialize_uninit',
+    'zpool_initialize_unsupported_vdevs',
+    'zpool_initialize_verify_checksums',
+    'zpool_initialize_verify_initialized']
+pre =
+tags = ['functional', 'cli_root', 'zpool_initialize']
+
+[tests/functional/cli_root/zpool_offline]
+tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
+    'zpool_offline_003_pos']
+tags = ['functional', 'cli_root', 'zpool_offline']
+
+[tests/functional/cli_root/zpool_online]
+tests = ['zpool_online_001_pos', 'zpool_online_002_neg']
+tags = ['functional', 'cli_root', 'zpool_online']
+
+[tests/functional/cli_root/zpool_remove]
+tests = ['zpool_remove_001_neg', 'zpool_remove_002_pos',
+    'zpool_remove_003_pos']
+tags = ['functional', 'cli_root', 'zpool_remove']
+
+[tests/functional/cli_root/zpool_replace]
+tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
+tags = ['functional', 'cli_root', 'zpool_replace']
+
+[tests/functional/cli_root/zpool_resilver]
+tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
+    'zpool_resilver_concurrent']
+tags = ['functional', 'cli_root', 'zpool_resilver']
+
+[tests/functional/cli_root/zpool_scrub]
+tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
+    'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
+    'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing',
+    'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies']
+tags = ['functional', 'cli_root', 'zpool_scrub']
+
+[tests/functional/cli_root/zpool_set]
+tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
+    'zpool_set_ashift', 'zpool_set_features']
+tags = ['functional', 'cli_root', 'zpool_set']
+
+[tests/functional/cli_root/zpool_split]
+tests = ['zpool_split_cliargs', 'zpool_split_devices',
+    'zpool_split_encryption', 'zpool_split_props', 'zpool_split_vdevs',
+    'zpool_split_resilver', 'zpool_split_indirect',
+    'zpool_split_dryrun_output']
+tags = ['functional', 'cli_root', 'zpool_split']
+
+[tests/functional/cli_root/zpool_status]
+tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
+    'zpool_status_features_001_pos']
+tags = ['functional', 'cli_root', 'zpool_status']
+
+[tests/functional/cli_root/zpool_sync]
+tests = ['zpool_sync_001_pos', 'zpool_sync_002_neg']
+tags = ['functional', 'cli_root', 'zpool_sync']
+
+[tests/functional/cli_root/zpool_trim]
+tests = ['zpool_trim_attach_detach_add_remove',
+    'zpool_trim_fault_export_import_online',
+    'zpool_trim_import_export', 'zpool_trim_multiple', 'zpool_trim_neg',
+    'zpool_trim_offline_export_import_online', 'zpool_trim_online_offline',
+    'zpool_trim_partial', 'zpool_trim_rate', 'zpool_trim_rate_neg',
+    'zpool_trim_secure', 'zpool_trim_split', 'zpool_trim_start_and_cancel_neg',
+    'zpool_trim_start_and_cancel_pos', 'zpool_trim_suspend_resume',
+    'zpool_trim_unsupported_vdevs', 'zpool_trim_verify_checksums',
+    'zpool_trim_verify_trimmed']
+tags = ['functional', 'zpool_trim']
+
+[tests/functional/cli_root/zpool_upgrade]
+tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_002_pos',
+    'zpool_upgrade_003_pos', 'zpool_upgrade_004_pos',
+    'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg',
+    'zpool_upgrade_007_pos', 'zpool_upgrade_008_pos',
+    'zpool_upgrade_009_neg', 'zpool_upgrade_features_001_pos']
+tags = ['functional', 'cli_root', 'zpool_upgrade']
+
+[tests/functional/cli_root/zpool_wait]
+tests = ['zpool_wait_discard', 'zpool_wait_freeing',
+    'zpool_wait_initialize_basic', 'zpool_wait_initialize_cancel',
+    'zpool_wait_initialize_flag', 'zpool_wait_multiple',
+    'zpool_wait_no_activity', 'zpool_wait_remove', 'zpool_wait_remove_cancel',
+    'zpool_wait_trim_basic', 'zpool_wait_trim_cancel', 'zpool_wait_trim_flag',
+    'zpool_wait_usage']
+tags = ['functional', 'cli_root', 'zpool_wait']
+
+[tests/functional/cli_root/zpool_wait/scan]
+tests = ['zpool_wait_replace_cancel', 'zpool_wait_rebuild',
+    'zpool_wait_resilver', 'zpool_wait_scrub_cancel',
+    'zpool_wait_replace', 'zpool_wait_scrub_basic', 'zpool_wait_scrub_flag']
+tags = ['functional', 'cli_root', 'zpool_wait']
+
+[tests/functional/cli_user/misc]
+tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
+    'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg',
+    'zfs_get_001_neg', 'zfs_inherit_001_neg', 'zfs_mount_001_neg',
+    'zfs_promote_001_neg', 'zfs_receive_001_neg', 'zfs_rename_001_neg',
+    'zfs_rollback_001_neg', 'zfs_send_001_neg', 'zfs_set_001_neg',
+    'zfs_share_001_neg', 'zfs_snapshot_001_neg', 'zfs_unallow_001_neg',
+    'zfs_unmount_001_neg', 'zfs_unshare_001_neg', 'zfs_upgrade_001_neg',
+    'zpool_001_neg', 'zpool_add_001_neg', 'zpool_attach_001_neg',
+    'zpool_clear_001_neg', 'zpool_create_001_neg', 'zpool_destroy_001_neg',
+    'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg',
+    'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg',
+    'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg',
+    'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
+    'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
+    'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege']
+user =
+tags = ['functional', 'cli_user', 'misc']
+
+[tests/functional/cli_user/zfs_list]
+tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos',
+    'zfs_list_004_neg', 'zfs_list_007_pos', 'zfs_list_008_neg']
+user =
+tags = ['functional', 'cli_user', 'zfs_list']
+
+[tests/functional/cli_user/zpool_iostat]
+tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos',
+    'zpool_iostat_003_neg', 'zpool_iostat_004_pos',
+    'zpool_iostat_005_pos', 'zpool_iostat_-c_disable',
+    'zpool_iostat_-c_homedir', 'zpool_iostat_-c_searchpath']
+user =
+tags = ['functional', 'cli_user', 'zpool_iostat']
+
+[tests/functional/cli_user/zpool_list]
+tests = ['zpool_list_001_pos', 'zpool_list_002_neg']
+user =
+tags = ['functional', 'cli_user', 'zpool_list']
+
+[tests/functional/cli_user/zpool_status]
+tests = ['zpool_status_003_pos', 'zpool_status_-c_disable',
+    'zpool_status_-c_homedir', 'zpool_status_-c_searchpath']
+user =
+tags = ['functional', 'cli_user', 'zpool_status']
+
+[tests/functional/compression]
+tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
+    'l2arc_compressed_arc', 'l2arc_compressed_arc_disabled',
+    'l2arc_encrypted', 'l2arc_encrypted_no_compressed_arc']
+tags = ['functional', 'compression']
+
+[tests/functional/cp_files]
+tests = ['cp_files_001_pos']
+tags = ['functional', 'cp_files']
+
+[tests/functional/crtime]
+tests = ['crtime_001_pos' ]
+tags = ['functional', 'crtime']
+
+[tests/functional/ctime]
+tests = ['ctime_001_pos' ]
+tags = ['functional', 'ctime']
+
+[tests/functional/deadman]
+tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio']
+pre =
+post =
+tags = ['functional', 'deadman']
+
+[tests/functional/delegate]
+tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
+    'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
+    'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg',
+    'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg',
+    'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos',
+    'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos',
+    'zfs_unallow_007_neg', 'zfs_unallow_008_neg']
+tags = ['functional', 'delegate']
+
+[tests/functional/exec]
+tests = ['exec_001_pos', 'exec_002_neg']
+tags = ['functional', 'exec']
+
+[tests/functional/fallocate]
+tests = ['fallocate_punch-hole']
+tags = ['functional', 'fallocate']
+
+[tests/functional/features/async_destroy]
+tests = ['async_destroy_001_pos']
+tags = ['functional', 'features', 'async_destroy']
+
+[tests/functional/features/large_dnode]
+tests = ['large_dnode_001_pos', 'large_dnode_003_pos', 'large_dnode_004_neg',
+    'large_dnode_005_pos', 'large_dnode_007_neg', 'large_dnode_009_pos']
+tags = ['functional', 'features', 'large_dnode']
+
+[tests/functional/grow]
+pre =
+post =
+tests = ['grow_pool_001_pos', 'grow_replicas_001_pos']
+tags = ['functional', 'grow']
+
+[tests/functional/history]
+tests = ['history_001_pos', 'history_002_pos', 'history_003_pos',
+    'history_004_pos', 'history_005_neg', 'history_006_neg',
+    'history_007_pos', 'history_008_pos', 'history_009_pos',
+    'history_010_pos']
+tags = ['functional', 'history']
+
+[tests/functional/hkdf]
+tests = ['run_hkdf_test']
+tags = ['functional', 'hkdf']
+
+[tests/functional/inheritance]
+tests = ['inherit_001_pos']
+pre =
+tags = ['functional', 'inheritance']
+
+[tests/functional/io]
+tests = ['sync', 'psync', 'posixaio', 'mmap']
+tags = ['functional', 'io']
+
+[tests/functional/inuse]
+tests = ['inuse_004_pos', 'inuse_005_pos', 'inuse_008_pos', 'inuse_009_pos']
+post =
+tags = ['functional', 'inuse']
+
+[tests/functional/large_files]
+tests = ['large_files_001_pos', 'large_files_002_pos']
+tags = ['functional', 'large_files']
+
+[tests/functional/limits]
+tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count',
+    'snapshot_limit']
+tags = ['functional', 'limits']
+
+[tests/functional/link_count]
+tests = ['link_count_001', 'link_count_root_inode']
+tags = ['functional', 'link_count']
+
+[tests/functional/migration]
+tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
+    'migration_004_pos', 'migration_005_pos', 'migration_006_pos',
+    'migration_007_pos', 'migration_008_pos', 'migration_009_pos',
+    'migration_010_pos', 'migration_011_pos', 'migration_012_pos']
+tags = ['functional', 'migration']
+
+[tests/functional/mmap]
+tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos',
+    'mmap_write_001_pos', 'mmap_sync_001_pos']
+tags = ['functional', 'mmap']
+
+[tests/functional/mount]
+tests = ['umount_001', 'umountall_001']
+tags = ['functional', 'mount']
+
+[tests/functional/mv_files]
+tests = ['mv_files_001_pos', 'mv_files_002_pos', 'random_creation']
+tags = ['functional', 'mv_files']
+
+[tests/functional/nestedfs]
+tests = ['nestedfs_001_pos']
+tags = ['functional', 'nestedfs']
+
+[tests/functional/no_space]
+tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos',
+    'enospc_df', 'enospc_rm']
+tags = ['functional', 'no_space']
+
+[tests/functional/nopwrite]
+tests = ['nopwrite_copies', 'nopwrite_mtime', 'nopwrite_negative',
+    'nopwrite_promoted_clone', 'nopwrite_recsize', 'nopwrite_sync',
+    'nopwrite_varying_compression', 'nopwrite_volume']
+tags = ['functional', 'nopwrite']
+
+[tests/functional/online_offline]
+tests = ['online_offline_001_pos', 'online_offline_002_neg',
+    'online_offline_003_neg']
+tags = ['functional', 'online_offline']
+
+[tests/functional/pool_checkpoint]
+tests = ['checkpoint_after_rewind', 'checkpoint_big_rewind',
+    'checkpoint_capacity', 'checkpoint_conf_change', 'checkpoint_discard',
+    'checkpoint_discard_busy', 'checkpoint_discard_many',
+    'checkpoint_indirect', 'checkpoint_invalid', 'checkpoint_lun_expsz',
+    'checkpoint_open', 'checkpoint_removal', 'checkpoint_rewind',
+    'checkpoint_ro_rewind', 'checkpoint_sm_scale', 'checkpoint_twice',
+    'checkpoint_vdev_add', 'checkpoint_zdb', 'checkpoint_zhack_feat']
+tags = ['functional', 'pool_checkpoint']
+timeout = 1800
+
+[tests/functional/pool_names]
+tests = ['pool_names_001_pos', 'pool_names_002_neg']
+pre =
+post =
+tags = ['functional', 'pool_names']
+
+[tests/functional/poolversion]
+tests = ['poolversion_001_pos', 'poolversion_002_pos']
+tags = ['functional', 'poolversion']
+
+[tests/functional/pyzfs]
+tests = ['pyzfs_unittest']
+pre =
+post =
+tags = ['functional', 'pyzfs']
+
+[tests/functional/quota]
+tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos',
+         'quota_004_pos', 'quota_005_pos', 'quota_006_neg']
+tags = ['functional', 'quota']
+
+[tests/functional/redacted_send]
+tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted',
+    'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes',
+    'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones',
+    'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative',
+    'redacted_origin', 'redacted_panic', 'redacted_props', 'redacted_resume',
+    'redacted_size', 'redacted_volume']
+tags = ['functional', 'redacted_send']
+
+[tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos']
+tags = ['functional', 'raidz']
+
+[tests/functional/redundancy]
+tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2',
+    'redundancy_draid3', 'redundancy_draid_damaged1',
+    'redundancy_draid_damaged2', 'redundancy_draid_spare1',
+    'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror',
+    'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2',
+    'redundancy_raidz3', 'redundancy_stripe']
+tags = ['functional', 'redundancy']
+timeout = 1200
+
+[tests/functional/refquota]
+tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos',
+    'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg',
+    'refquota_007_neg', 'refquota_008_neg']
+tags = ['functional', 'refquota']
+
+[tests/functional/refreserv]
+tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos',
+    'refreserv_004_pos', 'refreserv_005_pos', 'refreserv_multi_raidz',
+    'refreserv_raidz']
+tags = ['functional', 'refreserv']
+
+[tests/functional/removal]
+pre =
+tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space',
+    'removal_condense_export', 'removal_multiple_indirection',
+    'removal_nopwrite', 'removal_remap_deadlists',
+    'removal_resume_export', 'removal_sanity', 'removal_with_add',
+    'removal_with_create_fs', 'removal_with_dedup',
+    'removal_with_errors', 'removal_with_export',
+    'removal_with_ganging', 'removal_with_faulted',
+    'removal_with_remove', 'removal_with_scrub', 'removal_with_send',
+    'removal_with_send_recv', 'removal_with_snapshot',
+    'removal_with_write', 'removal_with_zdb', 'remove_expanded',
+    'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
+    'remove_indirect', 'remove_attach_mirror']
+tags = ['functional', 'removal']
+
+[tests/functional/rename_dirs]
+tests = ['rename_dirs_001_pos']
+tags = ['functional', 'rename_dirs']
+
+[tests/functional/replacement]
+tests = ['attach_import', 'attach_multiple', 'attach_rebuild',
+    'attach_resilver', 'detach', 'rebuild_disabled_feature',
+    'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild',
+    'replace_resilver', 'resilver_restart_001', 'resilver_restart_002',
+    'scrub_cancel']
+tags = ['functional', 'replacement']
+
+[tests/functional/reservation]
+tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
+    'reservation_004_pos', 'reservation_005_pos', 'reservation_006_pos',
+    'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos',
+    'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos',
+    'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos',
+    'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos',
+    'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg',
+    'reservation_022_pos']
+tags = ['functional', 'reservation']
+
+[tests/functional/rootpool]
+tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
+tags = ['functional', 'rootpool']
+
+[tests/functional/rsend]
+tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
+    'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'rsend_005_pos',
+    'rsend_006_pos', 'rsend_007_pos', 'rsend_008_pos', 'rsend_009_pos',
+    'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos', 'rsend_013_pos',
+    'rsend_014_pos', 'rsend_016_neg', 'rsend_019_pos', 'rsend_020_pos',
+    'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos',
+    'send-c_verify_ratio', 'send-c_verify_contents', 'send-c_props',
+    'send-c_incremental', 'send-c_volume', 'send-c_zstreamdump',
+    'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
+    'send-c_mixed_compression', 'send-c_stream_size_estimate',
+    'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
+    'send-c_recv_dedup', 'send-L_toggle',
+    'send_encrypted_incremental.ksh', 'send_encrypted_freeobjects',
+    'send_encrypted_hierarchy', 'send_encrypted_props',
+    'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files',
+    'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
+    'send_hole_birth', 'send_mixed_raw', 'send-wR_encrypted_zvol',
+    'send_partial_dataset', 'send_invalid', 'send_doall',
+    'send_raw_spill_block', 'send_raw_ashift', 'send_raw_large_blocks']
+tags = ['functional', 'rsend']
+
+[tests/functional/scrub_mirror]
+tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
+    'scrub_mirror_003_pos', 'scrub_mirror_004_pos']
+tags = ['functional', 'scrub_mirror']
+
+[tests/functional/slog]
+tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
+    'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
+    'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
+    'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001',
+    'slog_replay_fs_002', 'slog_replay_volume']
+tags = ['functional', 'slog']
+
+[tests/functional/snapshot]
+tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
+    'rollback_003_pos', 'snapshot_001_pos', 'snapshot_002_pos',
+    'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos',
+    'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
+    'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
+    'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos',
+    'snapshot_017_pos']
+tags = ['functional', 'snapshot']
+
+[tests/functional/snapused]
+tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos',
+    'snapused_004_pos', 'snapused_005_pos']
+tags = ['functional', 'snapused']
+
+[tests/functional/sparse]
+tests = ['sparse_001_pos']
+tags = ['functional', 'sparse']
+
+[tests/functional/suid]
+tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid',
+    'suid_write_to_none', 'suid_write_zil_replay']
+tags = ['functional', 'suid']
+
+[tests/functional/threadsappend]
+tests = ['threadsappend_001_pos']
+tags = ['functional', 'threadsappend']
+
+[tests/functional/trim]
+tests = ['autotrim_integrity', 'autotrim_config', 'autotrim_trim_integrity',
+    'trim_integrity', 'trim_config', 'trim_l2arc']
+tags = ['functional', 'trim']
+
+[tests/functional/truncate]
+tests = ['truncate_001_pos', 'truncate_002_pos', 'truncate_timestamps']
+tags = ['functional', 'truncate']
+
+[tests/functional/upgrade]
+tests = ['upgrade_userobj_001_pos', 'upgrade_readonly_pool']
+tags = ['functional', 'upgrade']
+
+[tests/functional/userquota]
+tests = [
+    'userquota_001_pos', 'userquota_002_pos', 'userquota_003_pos',
+    'userquota_004_pos', 'userquota_005_neg', 'userquota_006_pos',
+    'userquota_007_pos', 'userquota_008_pos', 'userquota_009_pos',
+    'userquota_010_pos', 'userquota_011_pos', 'userquota_012_neg',
+    'userspace_001_pos', 'userspace_002_pos', 'userspace_encrypted',
+    'userspace_send_encrypted', 'userspace_encrypted_13709']
+tags = ['functional', 'userquota']
+
+[tests/functional/vdev_zaps]
+tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos',
+    'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos',
+    'vdev_zaps_007_pos']
+tags = ['functional', 'vdev_zaps']
+
+[tests/functional/write_dirs]
+tests = ['write_dirs_001_pos', 'write_dirs_002_pos']
+tags = ['functional', 'write_dirs']
+
+[tests/functional/xattr]
+tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos',
+    'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg',
+    'xattr_011_pos', 'xattr_012_pos', 'xattr_013_pos']
+tags = ['functional', 'xattr']
+
+[tests/functional/zvol/zvol_ENOSPC]
+tests = ['zvol_ENOSPC_001_pos']
+tags = ['functional', 'zvol', 'zvol_ENOSPC']
+
+[tests/functional/zvol/zvol_cli]
+tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg']
+tags = ['functional', 'zvol', 'zvol_cli']
+
+[tests/functional/zvol/zvol_misc]
+tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse',
+    'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
+tags = ['functional', 'zvol', 'zvol_misc']
+
+[tests/functional/zvol/zvol_swap]
+tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos']
+tags = ['functional', 'zvol', 'zvol_swap']
+
+[tests/functional/libzfs]
+tests = ['many_fds', 'libzfs_input']
+tags = ['functional', 'libzfs']
+
+[tests/functional/log_spacemap]
+tests = ['log_spacemap_import_logs']
+pre =
+post =
+tags = ['functional', 'log_spacemap']
+
+[tests/functional/l2arc]
+tests = ['l2arc_arcstats_pos', 'l2arc_mfuonly_pos', 'l2arc_l2miss_pos',
+    'persist_l2arc_001_pos', 'persist_l2arc_002_pos',
+    'persist_l2arc_003_neg', 'persist_l2arc_004_pos', 'persist_l2arc_005_pos']
+tags = ['functional', 'l2arc']
+
+[tests/functional/zpool_influxdb]
+tests = ['zpool_influxdb']
+tags = ['functional', 'zpool_influxdb']

diff --git a/zfs/tests/runfiles/freebsd.run b/zfs/tests/runfiles/freebsd.run
new file mode 100644
index 0000000..153b204
--- /dev/null
+++ b/zfs/tests/runfiles/freebsd.run

@@ -0,0 +1,31 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+[DEFAULT]
+pre = setup
+quiet = False
+pre_user = root
+user = root
+timeout = 600
+post_user = root
+post = cleanup
+failsafe_user = root
+failsafe = callbacks/zfs_failsafe
+outputdir = /var/tmp/test_results
+tags = ['functional']
+
+[tests/functional/acl/off:FreeBSD]
+tests = ['dosmode']
+tags = ['functional', 'acl']
+
+[tests/functional/cli_root/zfs_jail:FreeBSD]
+tests = ['zfs_jail_001_pos']
+tags = ['functional', 'cli_root', 'zfs_jail']

diff --git a/zfs/tests/runfiles/linux.run b/zfs/tests/runfiles/linux.run
index d4c4bf4..94c1cbb 100644
--- a/zfs/tests/runfiles/linux.run
+++ b/zfs/tests/runfiles/linux.run

@@ -17,924 +17,170 @@
 timeout = 600
 post_user = root
 post = cleanup
+failsafe_user = root
+failsafe = callbacks/zfs_failsafe
 outputdir = /var/tmp/test_results
 tags = ['functional']
 
-[tests/functional/acl/posix]
-tests = ['posix_001_pos', 'posix_002_pos', 'posix_003_pos']
+[tests/functional/acl/posix:Linux]
+tests = ['posix_001_pos', 'posix_002_pos', 'posix_003_pos', 'posix_004_pos']
 tags = ['functional', 'acl', 'posix']
 
-[tests/functional/alloc_class]
-tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
-    'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
-    'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
-    'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
-    'alloc_class_013_pos']
-tags = ['functional', 'alloc_class']
+[tests/functional/acl/posix-sa:Linux]
+tests = ['posix_001_pos', 'posix_002_pos', 'posix_003_pos', 'posix_004_pos']
+tags = ['functional', 'acl', 'posix-sa']
 
-[tests/functional/arc]
-tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos',
-    'arcstats_runtime_tuning']
-tags = ['functional', 'arc']
-
-[tests/functional/atime]
-tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos', 'root_atime_off',
-    'root_atime_on', 'root_relatime_on']
+[tests/functional/atime:Linux]
+tests = ['atime_003_pos', 'root_relatime_on']
 tags = ['functional', 'atime']
 
-[tests/functional/bootfs]
-tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos',
-    'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos',
-    'bootfs_008_pos']
-tags = ['functional', 'bootfs']
-
-[tests/functional/cache]
-tests = ['cache_001_pos', 'cache_002_pos', 'cache_003_pos', 'cache_004_neg',
-    'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg',
-    'cache_009_pos', 'cache_010_neg', 'cache_011_pos']
-tags = ['functional', 'cache']
-
-[tests/functional/cachefile]
-tests = ['cachefile_001_pos', 'cachefile_002_pos', 'cachefile_003_pos',
-    'cachefile_004_pos']
-tags = ['functional', 'cachefile']
-
-[tests/functional/casenorm]
-tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure',
-    'sensitive_none_lookup', 'sensitive_none_delete',
-    'sensitive_formd_lookup', 'sensitive_formd_delete',
-    'insensitive_none_lookup', 'insensitive_none_delete',
-    'insensitive_formd_lookup', 'insensitive_formd_delete',
-    'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
-    'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
-tags = ['functional', 'casenorm']
-
-[tests/functional/channel_program/lua_core]
-tests = ['tst.args_to_lua', 'tst.divide_by_zero', 'tst.exists',
-    'tst.integer_illegal', 'tst.integer_overflow', 'tst.language_functions_neg',
-    'tst.language_functions_pos', 'tst.large_prog', 'tst.libraries',
-    'tst.memory_limit', 'tst.nested_neg', 'tst.nested_pos', 'tst.nvlist_to_lua',
-    'tst.recursive_neg', 'tst.recursive_pos', 'tst.return_large',
-    'tst.return_nvlist_neg', 'tst.return_nvlist_pos',
-    'tst.return_recursive_table', 'tst.stack_gsub', 'tst.timeout']
-tags = ['functional', 'channel_program', 'lua_core']
-
-[tests/functional/channel_program/synctask_core]
-tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit',
-    'tst.get_index_props', 'tst.get_mountpoint', 'tst.get_neg',
-    'tst.get_number_props', 'tst.get_string_props', 'tst.get_type',
-    'tst.get_userquota', 'tst.get_written', 'tst.list_children',
-    'tst.list_clones', 'tst.list_snapshots', 'tst.list_system_props',
-    'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict',
-    'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult',
-    'tst.rollback_one', 'tst.snapshot_destroy', 'tst.snapshot_neg',
-    'tst.snapshot_recursive', 'tst.snapshot_simple', 'tst.terminate_by_signal']
-tags = ['functional', 'channel_program', 'synctask_core']
-
-[tests/functional/chattr]
+[tests/functional/chattr:Linux]
 tests = ['chattr_001_pos', 'chattr_002_neg']
 tags = ['functional', 'chattr']
 
-[tests/functional/checksum]
-tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test',
-    'filetest_001_pos']
+[tests/functional/checksum:Linux]
+tests = ['run_edonr_test']
 tags = ['functional', 'checksum']
 
-[tests/functional/clean_mirror]
-tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos',
-    'clean_mirror_003_pos', 'clean_mirror_004_pos']
-tags = ['functional', 'clean_mirror']
-
-[tests/functional/cli_root/zdb]
-tests = ['zdb_001_neg', 'zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos',
-    'zdb_005_pos', 'zdb_006_pos', 'zdb_checksum', 'zdb_decompress']
-pre =
-post =
-tags = ['functional', 'cli_root', 'zdb']
-
-[tests/functional/cli_root/zfs]
-tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg']
+[tests/functional/cli_root/zfs:Linux]
+tests = ['zfs_003_neg']
 tags = ['functional', 'cli_root', 'zfs']
 
-[tests/functional/cli_root/zfs_bookmark]
-tests = ['zfs_bookmark_cliargs']
-tags = ['functional', 'cli_root', 'zfs_bookmark']
-
-[tests/functional/cli_root/zfs_change-key]
-tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
-    'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
-    'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones']
-tags = ['functional', 'cli_root', 'zfs_change-key']
-
-[tests/functional/cli_root/zfs_clone]
-tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos',
-    'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos',
-    'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg',
-    'zfs_clone_010_pos', 'zfs_clone_encrypted', 'zfs_clone_deeply_nested']
-tags = ['functional', 'cli_root', 'zfs_clone']
-
-[tests/functional/cli_root/zfs_copies]
-tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos',
-    'zfs_copies_004_neg', 'zfs_copies_005_neg', 'zfs_copies_006_pos']
-tags = ['functional', 'cli_root', 'zfs_copies']
-
-[tests/functional/cli_root/zfs_create]
-tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
-    'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos',
-    'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg',
-    'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos',
-    'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted',
-    'zfs_create_crypt_combos']
-tags = ['functional', 'cli_root', 'zfs_create']
-
-[tests/functional/cli_root/zfs_destroy]
-tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos',
-    'zfs_destroy_004_pos', 'zfs_destroy_005_neg', 'zfs_destroy_006_neg',
-    'zfs_destroy_007_neg', 'zfs_destroy_008_pos', 'zfs_destroy_009_pos',
-    'zfs_destroy_010_pos', 'zfs_destroy_011_pos', 'zfs_destroy_012_pos',
-    'zfs_destroy_013_neg', 'zfs_destroy_014_pos', 'zfs_destroy_015_pos',
-    'zfs_destroy_016_pos']
-tags = ['functional', 'cli_root', 'zfs_destroy']
-
-[tests/functional/cli_root/zfs_diff]
-tests = ['zfs_diff_changes', 'zfs_diff_cliargs', 'zfs_diff_timestamp',
-    'zfs_diff_types', 'zfs_diff_encrypted']
-tags = ['functional', 'cli_root', 'zfs_diff']
-
-[tests/functional/cli_root/zfs_get]
-tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos',
-    'zfs_get_004_pos', 'zfs_get_005_neg', 'zfs_get_006_neg', 'zfs_get_007_neg',
-    'zfs_get_008_pos', 'zfs_get_009_pos', 'zfs_get_010_neg']
-tags = ['functional', 'cli_root', 'zfs_get']
-
-[tests/functional/cli_root/zfs_inherit]
-tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos',
-    'zfs_inherit_mountpoint']
-tags = ['functional', 'cli_root', 'zfs_inherit']
-
-[tests/functional/cli_root/zfs_load-key]
-tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file',
-    'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive']
-tags = ['functional', 'cli_root', 'zfs_load-key']
-
-[tests/functional/cli_root/zfs_mount]
-tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
-    'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos',
-    'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg',
-    'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg',
-    'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount',
-    'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints',
-    'zfs_mount_test_race']
+[tests/functional/cli_root/zfs_mount:Linux]
+tests = ['zfs_mount_006_pos', 'zfs_mount_008_pos', 'zfs_mount_013_pos',
+    'zfs_mount_014_neg', 'zfs_multi_mount']
 tags = ['functional', 'cli_root', 'zfs_mount']
 
-[tests/functional/cli_root/zfs_program]
-tests = ['zfs_program_json']
-tags = ['functional', 'cli_root', 'zfs_program']
-
-[tests/functional/cli_root/zfs_promote]
-tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos',
-    'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg',
-    'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot']
-tags = ['functional', 'cli_root', 'zfs_promote']
-
-[tests/functional/cli_root/zfs_property]
-tests = ['zfs_written_property_001_pos']
-tags = ['functional', 'cli_root', 'zfs_property']
-
-[tests/functional/cli_root/zfs_receive]
-tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
-    'zfs_receive_004_neg', 'zfs_receive_005_neg', 'zfs_receive_006_pos',
-    'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg',
-    'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos',
-    'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos',
-    'receive-o-x_props_override', 'zfs_receive_from_encrypted',
-    'zfs_receive_to_encrypted', 'zfs_receive_raw',
-    'zfs_receive_raw_incremental', 'zfs_receive_-e']
-tags = ['functional', 'cli_root', 'zfs_receive']
-
-[tests/functional/cli_root/zfs_remap]
-tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts']
-tags = ['functional', 'cli_root', 'zfs_remap']
-
-[tests/functional/cli_root/zfs_rename]
-tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
-    'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos',
-    'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg',
-    'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
-    'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child',
-    'zfs_rename_to_encrypted', 'zfs_rename_mountpoint']
-tags = ['functional', 'cli_root', 'zfs_rename']
-
-[tests/functional/cli_root/zfs_reservation]
-tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
-tags = ['functional', 'cli_root', 'zfs_reservation']
-
-[tests/functional/cli_root/zfs_rollback]
-tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
-    'zfs_rollback_003_neg', 'zfs_rollback_004_neg']
-tags = ['functional', 'cli_root', 'zfs_rollback']
-
-[tests/functional/cli_root/zfs_send]
-tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
-    'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos',
-    'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw',
-    'zfs_send_sparse', 'zfs_send-b']
-tags = ['functional', 'cli_root', 'zfs_send']
-
-[tests/functional/cli_root/zfs_set]
-tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
-    'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos',
-    'checksum_001_pos', 'compression_001_pos', 'mountpoint_001_pos',
-    'mountpoint_002_pos', 'reservation_001_neg', 'user_property_002_pos',
-    'share_mount_001_neg', 'snapdir_001_pos', 'onoffs_001_pos',
-    'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos',
-    'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg',
-    'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos',
-    'mountpoint_003_pos', 'ro_props_001_pos', 'zfs_set_keylocation']
-tags = ['functional', 'cli_root', 'zfs_set']
-
-[tests/functional/cli_root/zfs_share]
-tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos',
-    'zfs_share_004_pos', 'zfs_share_005_pos', 'zfs_share_006_pos',
-    'zfs_share_007_neg', 'zfs_share_008_neg', 'zfs_share_009_neg',
-    'zfs_share_010_neg', 'zfs_share_011_pos']
+[tests/functional/cli_root/zfs_share:Linux]
+tests = ['zfs_share_005_pos', 'zfs_share_007_neg', 'zfs_share_009_neg',
+    'zfs_share_012_pos']
 tags = ['functional', 'cli_root', 'zfs_share']
 
-[tests/functional/cli_root/zfs_snapshot]
-tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg',
-    'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg',
-    'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg',
-    'zfs_snapshot_009_pos']
-tags = ['functional', 'cli_root', 'zfs_snapshot']
-
-[tests/functional/cli_root/zfs_sysfs]
+[tests/functional/cli_root/zfs_sysfs:Linux]
 tests = ['zfeature_set_unsupported', 'zfs_get_unsupported',
     'zfs_set_unsupported', 'zfs_sysfs_live', 'zpool_get_unsupported',
     'zpool_set_unsupported']
 tags = ['functional', 'cli_root', 'zfs_sysfs']
 
-[tests/functional/cli_root/zfs_unload-key]
-tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive']
-tags = ['functional', 'cli_root', 'zfs_unload-key']
-
-[tests/functional/cli_root/zfs_unmount]
-tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos',
-    'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos',
-    'zfs_unmount_007_neg', 'zfs_unmount_008_neg', 'zfs_unmount_009_pos',
-    'zfs_unmount_all_001_pos', 'zfs_unmount_nested']
-tags = ['functional', 'cli_root', 'zfs_unmount']
-
-[tests/functional/cli_root/zfs_unshare]
-tests = ['zfs_unshare_001_pos', 'zfs_unshare_002_pos', 'zfs_unshare_003_pos',
-    'zfs_unshare_004_neg', 'zfs_unshare_005_neg', 'zfs_unshare_006_pos',
-    'zfs_unshare_007_pos']
-tags = ['functional', 'cli_root', 'zfs_unshare']
-
-[tests/functional/cli_root/zfs_upgrade]
-tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos',
-    'zfs_upgrade_004_pos', 'zfs_upgrade_005_pos', 'zfs_upgrade_006_neg',
-    'zfs_upgrade_007_neg']
-tags = ['functional', 'cli_root', 'zfs_upgrade']
-
-[tests/functional/cli_root/zpool]
-tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos']
-tags = ['functional', 'cli_root', 'zpool']
-
-[tests/functional/cli_root/zpool_add]
-tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos',
-    'zpool_add_004_pos', 'zpool_add_005_pos', 'zpool_add_006_pos',
-    'zpool_add_007_neg', 'zpool_add_008_neg', 'zpool_add_009_neg',
-    'zpool_add_010_pos',
-    'add-o_ashift', 'add_prop_ashift', 'add_nested_replacing_spare']
+[tests/functional/cli_root/zpool_add:Linux]
+tests = ['add_nested_replacing_spare']
 tags = ['functional', 'cli_root', 'zpool_add']
 
-[tests/functional/cli_root/zpool_attach]
-tests = ['zpool_attach_001_neg', 'attach-o_ashift']
-tags = ['functional', 'cli_root', 'zpool_attach']
-
-[tests/functional/cli_root/zpool_clear]
-tests = ['zpool_clear_001_pos', 'zpool_clear_002_neg', 'zpool_clear_003_neg',
-    'zpool_clear_readonly']
-tags = ['functional', 'cli_root', 'zpool_clear']
-
-[tests/functional/cli_root/zpool_create]
-tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
-    'zpool_create_003_pos', 'zpool_create_004_pos', 'zpool_create_005_pos',
-    'zpool_create_006_pos', 'zpool_create_007_neg', 'zpool_create_008_pos',
-    'zpool_create_009_neg', 'zpool_create_010_neg', 'zpool_create_011_neg',
-    'zpool_create_012_neg', 'zpool_create_014_neg',
-    'zpool_create_015_neg', 'zpool_create_016_pos', 'zpool_create_017_neg',
-    'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos',
-    'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg',
-    'zpool_create_024_pos',
-    'zpool_create_encrypted', 'zpool_create_crypt_combos',
-    'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
-    'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
-    'zpool_create_features_005_pos',
-    'create-o_ashift', 'zpool_create_tempname']
-tags = ['functional', 'cli_root', 'zpool_create']
-
-[tests/functional/cli_root/zpool_destroy]
-tests = ['zpool_destroy_001_pos', 'zpool_destroy_002_pos',
-    'zpool_destroy_003_neg']
-pre =
-post =
-tags = ['functional', 'cli_root', 'zpool_destroy']
-
-[tests/functional/cli_root/zpool_detach]
-tests = ['zpool_detach_001_neg']
-tags = ['functional', 'cli_root', 'zpool_detach']
-
-[tests/functional/cli_root/zpool_events]
-tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow',
-    'zpool_events_poolname', 'zpool_events_errors']
-tags = ['functional', 'cli_root', 'zpool_events']
-
-[tests/functional/cli_root/zpool_expand]
+[tests/functional/cli_root/zpool_expand:Linux]
 tests = ['zpool_expand_001_pos', 'zpool_expand_002_pos',
     'zpool_expand_003_neg', 'zpool_expand_004_pos', 'zpool_expand_005_pos']
 tags = ['functional', 'cli_root', 'zpool_expand']
 
-[tests/functional/cli_root/zpool_export]
-tests = ['zpool_export_001_pos', 'zpool_export_002_pos',
-    'zpool_export_003_neg', 'zpool_export_004_pos']
-tags = ['functional', 'cli_root', 'zpool_export']
-
-[tests/functional/cli_root/zpool_get]
-tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos',
-    'zpool_get_004_neg', 'zpool_get_005_pos']
-tags = ['functional', 'cli_root', 'zpool_get']
-
-[tests/functional/cli_root/zpool_history]
-tests = ['zpool_history_001_neg', 'zpool_history_002_pos']
-tags = ['functional', 'cli_root', 'zpool_history']
-
-[tests/functional/cli_root/zpool_import]
-tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
-    'zpool_import_003_pos', 'zpool_import_004_pos', 'zpool_import_005_pos',
-    'zpool_import_006_pos', 'zpool_import_007_pos', 'zpool_import_008_pos',
-    'zpool_import_009_neg', 'zpool_import_010_pos', 'zpool_import_011_neg',
-    'zpool_import_012_pos', 'zpool_import_013_neg', 'zpool_import_014_pos',
-    'zpool_import_015_pos',
-    'zpool_import_features_001_pos', 'zpool_import_features_002_neg',
-    'zpool_import_features_003_pos', 'zpool_import_missing_001_pos',
-    'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos',
-    'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
-    'zpool_import_encrypted', 'zpool_import_encrypted_load',
-    'zpool_import_errata3', 'zpool_import_errata4',
-    'import_cachefile_device_added',
-    'import_cachefile_device_removed',
-    'import_cachefile_device_replaced',
-    'import_cachefile_mirror_attached',
-    'import_cachefile_mirror_detached',
-    'import_cachefile_shared_device',
-    'import_devices_missing',
-    'import_paths_changed',
-    'import_rewind_config_changed',
-    'import_rewind_device_replaced']
-tags = ['functional', 'cli_root', 'zpool_import']
-
-[tests/functional/cli_root/zpool_labelclear]
-tests = ['zpool_labelclear_active', 'zpool_labelclear_exported',
-    'zpool_labelclear_removed', 'zpool_labelclear_valid']
-pre =
-post =
-tags = ['functional', 'cli_root', 'zpool_labelclear']
-
-[tests/functional/cli_root/zpool_initialize]
-tests = ['zpool_initialize_attach_detach_add_remove',
-    'zpool_initialize_import_export',
-    'zpool_initialize_offline_export_import_online',
-    'zpool_initialize_online_offline',
-    'zpool_initialize_split',
-    'zpool_initialize_start_and_cancel_neg',
-    'zpool_initialize_start_and_cancel_pos',
-    'zpool_initialize_suspend_resume',
-    'zpool_initialize_unsupported_vdevs',
-    'zpool_initialize_verify_checksums',
-    'zpool_initialize_verify_initialized']
-pre =
-tags = ['functional', 'cli_root', 'zpool_initialize']
-
-[tests/functional/cli_root/zpool_offline]
-tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
-    'zpool_offline_003_pos']
-tags = ['functional', 'cli_root', 'zpool_offline']
-
-[tests/functional/cli_root/zpool_online]
-tests = ['zpool_online_001_pos', 'zpool_online_002_neg']
-tags = ['functional', 'cli_root', 'zpool_online']
-
-[tests/functional/cli_root/zpool_remove]
-tests = ['zpool_remove_001_neg', 'zpool_remove_002_pos',
-    'zpool_remove_003_pos']
-tags = ['functional', 'cli_root', 'zpool_remove']
-
-[tests/functional/cli_root/zpool_reopen]
+[tests/functional/cli_root/zpool_reopen:Linux]
 tests = ['zpool_reopen_001_pos', 'zpool_reopen_002_pos',
     'zpool_reopen_003_pos', 'zpool_reopen_004_pos', 'zpool_reopen_005_pos',
     'zpool_reopen_006_neg', 'zpool_reopen_007_pos']
 tags = ['functional', 'cli_root', 'zpool_reopen']
 
-[tests/functional/cli_root/zpool_replace]
-tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
-tags = ['functional', 'cli_root', 'zpool_replace']
-
-[tests/functional/cli_root/zpool_resilver]
-tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
-tags = ['functional', 'cli_root', 'zpool_resilver']
-
-[tests/functional/cli_root/zpool_scrub]
-tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
-    'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
-    'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing',
-    'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies']
-tags = ['functional', 'cli_root', 'zpool_scrub']
-
-[tests/functional/cli_root/zpool_set]
-tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
-    'zpool_set_ashift', 'zpool_set_features']
-tags = ['functional', 'cli_root', 'zpool_set']
-
-[tests/functional/cli_root/zpool_split]
-tests = ['zpool_split_cliargs', 'zpool_split_devices',
-    'zpool_split_encryption', 'zpool_split_props', 'zpool_split_vdevs',
-    'zpool_split_resilver', 'zpool_split_wholedisk']
+[tests/functional/cli_root/zpool_split:Linux]
+tests = ['zpool_split_wholedisk']
 tags = ['functional', 'cli_root', 'zpool_split']
 
-[tests/functional/cli_root/zpool_status]
-tests = ['zpool_status_001_pos', 'zpool_status_002_pos']
-tags = ['functional', 'cli_root', 'zpool_status']
-
-[tests/functional/cli_root/zpool_sync]
-tests = ['zpool_sync_001_pos', 'zpool_sync_002_neg']
-tags = ['functional', 'cli_root', 'zpool_sync']
-
-[tests/functional/cli_root/zpool_trim]
-tests = ['zpool_trim_attach_detach_add_remove',
-    'zpool_trim_import_export', 'zpool_trim_multiple', 'zpool_trim_neg',
-    'zpool_trim_offline_export_import_online', 'zpool_trim_online_offline',
-    'zpool_trim_partial', 'zpool_trim_rate', 'zpool_trim_rate_neg',
-    'zpool_trim_secure', 'zpool_trim_split', 'zpool_trim_start_and_cancel_neg',
-    'zpool_trim_start_and_cancel_pos', 'zpool_trim_suspend_resume',
-    'zpool_trim_unsupported_vdevs', 'zpool_trim_verify_checksums',
-    'zpool_trim_verify_trimmed']
-tags = ['functional', 'zpool_trim']
-
-[tests/functional/cli_root/zpool_upgrade]
-tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_002_pos',
-    'zpool_upgrade_003_pos', 'zpool_upgrade_004_pos',
-    'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg',
-    'zpool_upgrade_007_pos', 'zpool_upgrade_008_pos',
-    'zpool_upgrade_009_neg']
-tags = ['functional', 'cli_root', 'zpool_upgrade']
-
-[tests/functional/cli_user/misc]
-tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
-    'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg',
-    'zfs_get_001_neg', 'zfs_inherit_001_neg', 'zfs_mount_001_neg',
-    'zfs_promote_001_neg', 'zfs_receive_001_neg', 'zfs_rename_001_neg',
-    'zfs_rollback_001_neg', 'zfs_send_001_neg', 'zfs_set_001_neg',
-    'zfs_share_001_neg', 'zfs_snapshot_001_neg', 'zfs_unallow_001_neg',
-    'zfs_unmount_001_neg', 'zfs_unshare_001_neg', 'zfs_upgrade_001_neg',
-    'zpool_001_neg', 'zpool_add_001_neg', 'zpool_attach_001_neg',
-    'zpool_clear_001_neg', 'zpool_create_001_neg', 'zpool_destroy_001_neg',
-    'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg',
-    'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg',
-    'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg',
-    'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
-    'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
-    'arc_summary_001_pos', 'arc_summary_002_neg']
-user =
-tags = ['functional', 'cli_user', 'misc']
-
-[tests/functional/cli_user/zfs_list]
-tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos',
-    'zfs_list_004_neg', 'zfs_list_007_pos', 'zfs_list_008_neg']
-user =
-tags = ['functional', 'cli_user', 'zfs_list']
-
-[tests/functional/cli_user/zpool_iostat]
-tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos',
-    'zpool_iostat_003_neg', 'zpool_iostat_004_pos',
-    'zpool_iostat_005_pos', 'zpool_iostat_-c_disable',
-    'zpool_iostat_-c_homedir', 'zpool_iostat_-c_searchpath']
-user =
-tags = ['functional', 'cli_user', 'zpool_iostat']
-
-[tests/functional/cli_user/zpool_list]
-tests = ['zpool_list_001_pos', 'zpool_list_002_neg']
-user =
-tags = ['functional', 'cli_user', 'zpool_list']
-
-[tests/functional/cli_user/zpool_status]
-tests = ['zpool_status_003_pos', 'zpool_status_-c_disable',
-    'zpool_status_-c_homedir', 'zpool_status_-c_searchpath']
-user =
-tags = ['functional', 'cli_user', 'zpool_status']
-
-[tests/functional/compression]
-tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
-    'compress_004_pos']
+[tests/functional/compression:Linux]
+tests = ['compress_004_pos']
 tags = ['functional', 'compression']
 
-[tests/functional/cp_files]
-tests = ['cp_files_001_pos']
-tags = ['functional', 'cp_files']
-
-[tests/functional/ctime]
-tests = ['ctime_001_pos' ]
-tags = ['functional', 'ctime']
-
-[tests/functional/deadman]
-tests = ['deadman_sync', 'deadman_zio']
-pre =
-post =
-tags = ['functional', 'deadman']
-
-[tests/functional/delegate]
-tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
-    'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
-    'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg',
-    'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg',
-    'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos',
-    'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos',
-    'zfs_unallow_007_neg', 'zfs_unallow_008_neg']
-tags = ['functional', 'delegate']
-
-[tests/functional/devices]
+[tests/functional/devices:Linux]
 tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
 tags = ['functional', 'devices']
 
-[tests/functional/events]
-tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter']
+[tests/functional/events:Linux]
+tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill']
 tags = ['functional', 'events']
 
-[tests/functional/exec]
-tests = ['exec_001_pos', 'exec_002_neg']
-tags = ['functional', 'exec']
+[tests/functional/fallocate:Linux]
+tests = ['fallocate_prealloc', 'fallocate_zero-range']
+tags = ['functional', 'fallocate']
 
-[tests/functional/fault]
-tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
-    'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_ashift',
-    'auto_spare_multiple', 'auto_spare_shared', 'scrub_after_resilver',
-    'decrypt_fault', 'decompress_fault', 'zpool_status_-s']
+[tests/functional/fault:Linux]
+tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
+    'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
+    'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
+    'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
+    'zpool_status_-s']
 tags = ['functional', 'fault']
 
-[tests/functional/features/async_destroy]
-tests = ['async_destroy_001_pos']
-tags = ['functional', 'features', 'async_destroy']
-
-[tests/functional/features/large_dnode]
-tests = ['large_dnode_001_pos', 'large_dnode_002_pos', 'large_dnode_003_pos',
-         'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_006_pos',
-         'large_dnode_007_neg', 'large_dnode_008_pos', 'large_dnode_009_pos']
+[tests/functional/features/large_dnode:Linux]
+tests = ['large_dnode_002_pos', 'large_dnode_006_pos', 'large_dnode_008_pos']
 tags = ['functional', 'features', 'large_dnode']
 
-[tests/functional/grow]
-pre =
-post =
-tests = ['grow_pool_001_pos', 'grow_replicas_001_pos']
-tags = ['functional', 'grow']
-
-[tests/functional/history]
-tests = ['history_001_pos', 'history_002_pos', 'history_003_pos',
-    'history_004_pos', 'history_005_neg', 'history_006_neg',
-    'history_007_pos', 'history_008_pos', 'history_009_pos',
-    'history_010_pos']
-tags = ['functional', 'history']
-
-[tests/functional/hkdf]
-tests = ['run_hkdf_test']
-tags = ['functional', 'hkdf']
-
-[tests/functional/inheritance]
-tests = ['inherit_001_pos']
-pre =
-tags = ['functional', 'inheritance']
-
-[tests/functional/io]
-tests = ['sync', 'psync', 'libaio', 'posixaio', 'mmap']
+[tests/functional/io:Linux]
+tests = ['libaio', 'io_uring']
 tags = ['functional', 'io']
 
-[tests/functional/inuse]
-tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos',
-    'inuse_005_pos', 'inuse_006_pos', 'inuse_007_pos', 'inuse_008_pos',
-    'inuse_009_pos']
-post =
-tags = ['functional', 'inuse']
-
-[tests/functional/large_files]
-tests = ['large_files_001_pos', 'large_files_002_pos']
-tags = ['functional', 'large_files']
-
-[tests/functional/largest_pool]
+[tests/functional/largest_pool:Linux]
 tests = ['largest_pool_001_pos']
 pre =
 post =
 tags = ['functional', 'largest_pool']
 
-[tests/functional/limits]
-tests = ['filesystem_count', 'filesystem_limit', 'snapshot_count',
-    'snapshot_limit']
-tags = ['functional', 'limits']
-
-[tests/functional/link_count]
-tests = ['link_count_001', 'link_count_root_inode']
-tags = ['functional', 'link_count']
-
-[tests/functional/migration]
-tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
-    'migration_004_pos', 'migration_005_pos', 'migration_006_pos',
-    'migration_007_pos', 'migration_008_pos', 'migration_009_pos',
-    'migration_010_pos', 'migration_011_pos', 'migration_012_pos']
-tags = ['functional', 'migration']
-
-[tests/functional/mmap]
-tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_libaio_001_pos']
+[tests/functional/mmap:Linux]
+tests = ['mmap_libaio_001_pos']
 tags = ['functional', 'mmap']
 
-[tests/functional/mmp]
+[tests/functional/mmp:Linux]
 tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
     'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import',
     'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history',
     'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid']
 tags = ['functional', 'mmp']
 
-[tests/functional/mount]
-tests = ['umount_001', 'umount_unlinked_drain', 'umountall_001']
+[tests/functional/mount:Linux]
+tests = ['umount_unlinked_drain']
 tags = ['functional', 'mount']
 
-[tests/functional/mv_files]
-tests = ['mv_files_001_pos', 'mv_files_002_pos', 'random_creation']
-tags = ['functional', 'mv_files']
+[tests/functional/pam:Linux]
+tests = ['pam_basic', 'pam_nounmount']
+tags = ['functional', 'pam']
 
-[tests/functional/nestedfs]
-tests = ['nestedfs_001_pos']
-tags = ['functional', 'nestedfs']
-
-[tests/functional/no_space]
-tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos',
-    'enospc_df']
-tags = ['functional', 'no_space']
-
-[tests/functional/nopwrite]
-tests = ['nopwrite_copies', 'nopwrite_mtime', 'nopwrite_negative',
-    'nopwrite_promoted_clone', 'nopwrite_recsize', 'nopwrite_sync',
-    'nopwrite_varying_compression', 'nopwrite_volume']
-tags = ['functional', 'nopwrite']
-
-[tests/functional/online_offline]
-tests = ['online_offline_001_pos', 'online_offline_002_neg',
-    'online_offline_003_neg']
-tags = ['functional', 'online_offline']
-
-[tests/functional/pool_checkpoint]
-tests = ['checkpoint_after_rewind', 'checkpoint_big_rewind',
-    'checkpoint_capacity', 'checkpoint_conf_change', 'checkpoint_discard',
-    'checkpoint_discard_busy', 'checkpoint_discard_many',
-    'checkpoint_indirect', 'checkpoint_invalid', 'checkpoint_lun_expsz',
-    'checkpoint_open', 'checkpoint_removal', 'checkpoint_rewind',
-    'checkpoint_ro_rewind', 'checkpoint_sm_scale', 'checkpoint_twice',
-    'checkpoint_vdev_add', 'checkpoint_zdb', 'checkpoint_zhack_feat']
-tags = ['functional', 'pool_checkpoint']
-timeout = 1800
-
-[tests/functional/pool_names]
-tests = ['pool_names_001_pos', 'pool_names_002_neg']
-pre =
-post =
-tags = ['functional', 'pool_names']
-
-[tests/functional/poolversion]
-tests = ['poolversion_001_pos', 'poolversion_002_pos']
-tags = ['functional', 'poolversion']
-
-[tests/functional/privilege]
-tests = ['privilege_001_pos', 'privilege_002_pos']
-tags = ['functional', 'privilege']
-
-[tests/functional/procfs]
+[tests/functional/procfs:Linux]
 tests = ['procfs_list_basic', 'procfs_list_concurrent_readers',
     'procfs_list_stale_read', 'pool_state']
 tags = ['functional', 'procfs']
 
-[tests/functional/projectquota]
+[tests/functional/projectquota:Linux]
 tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
     'projectquota_001_pos', 'projectquota_002_pos', 'projectquota_003_pos',
     'projectquota_004_neg', 'projectquota_005_pos', 'projectquota_006_pos',
     'projectquota_007_pos', 'projectquota_008_pos', 'projectquota_009_pos',
     'projectspace_001_pos', 'projectspace_002_pos', 'projectspace_003_pos',
     'projectspace_004_pos',
-    'projecttree_001_pos', 'projecttree_002_pos', 'projecttree_003_neg' ]
+    'projecttree_001_pos', 'projecttree_002_pos', 'projecttree_003_neg']
 tags = ['functional', 'projectquota']
 
-[tests/functional/pyzfs]
-tests = ['pyzfs_unittest']
-pre =
-post =
-tags = ['functional', 'pyzfs']
-
-[tests/functional/quota]
-tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos',
-         'quota_004_pos', 'quota_005_pos', 'quota_006_neg']
-tags = ['functional', 'quota']
-
-[tests/functional/raidz]
-tests = ['raidz_001_neg', 'raidz_002_pos']
-tags = ['functional', 'raidz']
-
-[tests/functional/redundancy]
-tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
-    'redundancy_004_neg']
-tags = ['functional', 'redundancy']
-
-[tests/functional/refquota]
-tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos',
-    'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg',
-    'refquota_007_neg', 'refquota_008_neg']
-tags = ['functional', 'refquota']
-
-[tests/functional/refreserv]
-tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos',
-    'refreserv_004_pos', 'refreserv_005_pos', 'refreserv_multi_raidz',
-    'refreserv_raidz']
-tags = ['functional', 'refreserv']
-
-[tests/functional/removal]
-pre =
-tests = ['removal_all_vdev', 'removal_cancel', 'removal_check_space',
-    'removal_condense_export', 'removal_multiple_indirection',
-    'removal_remap', 'removal_nopwrite', 'removal_remap_deadlists',
-    'removal_resume_export', 'removal_sanity', 'removal_with_add',
-    'removal_with_create_fs', 'removal_with_dedup',
-    'removal_with_errors', 'removal_with_export',
-    'removal_with_ganging', 'removal_with_faulted', 'removal_with_remap',
-    'removal_with_remove', 'removal_with_scrub', 'removal_with_send',
-    'removal_with_send_recv', 'removal_with_snapshot',
-    'removal_with_write', 'removal_with_zdb', 'remove_expanded',
-    'remove_mirror', 'remove_mirror_sanity', 'remove_raidz',
-    'remove_indirect']
-tags = ['functional', 'removal']
-
-[tests/functional/rename_dirs]
-tests = ['rename_dirs_001_pos']
-tags = ['functional', 'rename_dirs']
-
-[tests/functional/replacement]
-tests = ['replacement_001_pos', 'replacement_002_pos', 'replacement_003_pos']
-tags = ['functional', 'replacement']
-
-[tests/functional/reservation]
-tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
-    'reservation_004_pos', 'reservation_005_pos', 'reservation_006_pos',
-    'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos',
-    'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos',
-    'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos',
-    'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos',
-    'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg',
-    'reservation_022_pos']
-tags = ['functional', 'reservation']
-
-[tests/functional/resilver]
-tests = ['resilver_restart_001', 'resilver_restart_002']
-tags = ['functional', 'resilver']
-
-[tests/functional/rootpool]
-tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
-tags = ['functional', 'rootpool']
-
-[tests/functional/rsend]
-tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
-    'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', 'rsend_008_pos',
-    'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos',
-    'rsend_013_pos', 'rsend_014_pos',
-    'rsend_019_pos', 'rsend_020_pos',
-    'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos',
-    'send-c_verify_ratio', 'send-c_verify_contents', 'send-c_props',
-    'send-c_incremental', 'send-c_volume', 'send-c_zstreamdump',
-    'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
-    'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD',
-    'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
-    'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy',
-    'send_encrypted_props', 'send_encrypted_truncated_files',
-    'send_freeobjects', 'send_realloc_dnode_size', 'send_realloc_files',
-    'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
-    'send_hole_birth', 'send_mixed_raw', 'send-wDR_encrypted_zvol']
+[tests/functional/rsend:Linux]
+tests = ['send_realloc_dnode_size', 'send_encrypted_files']
 tags = ['functional', 'rsend']
 
-[tests/functional/scrub_mirror]
-tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
-    'scrub_mirror_003_pos', 'scrub_mirror_004_pos']
-tags = ['functional', 'scrub_mirror']
+[tests/functional/simd:Linux]
+pre =
+post =
+tests = ['simd_supported']
+tags = ['functional', 'simd']
 
-[tests/functional/slog]
-tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
-    'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
-    'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
-    'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001',
-    'slog_replay_fs_002', 'slog_replay_volume']
-tags = ['functional', 'slog']
-
-[tests/functional/snapshot]
-tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
-    'rollback_003_pos', 'snapshot_001_pos', 'snapshot_002_pos',
-    'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos',
-    'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
-    'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
-    'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos',
-    'snapshot_015_pos', 'snapshot_016_pos', 'snapshot_017_pos']
+[tests/functional/snapshot:Linux]
+tests = ['snapshot_015_pos', 'snapshot_016_pos']
 tags = ['functional', 'snapshot']
 
-[tests/functional/snapused]
-tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos',
-    'snapused_004_pos', 'snapused_005_pos']
-tags = ['functional', 'snapused']
-
-[tests/functional/sparse]
-tests = ['sparse_001_pos']
-tags = ['functional', 'sparse']
-
-[tests/functional/suid]
-tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid',
-    'suid_write_to_none']
-tags = ['functional', 'suid']
-
-[tests/functional/threadsappend]
-tests = ['threadsappend_001_pos']
-tags = ['functional', 'threadsappend']
-
-[tests/functional/tmpfile]
+[tests/functional/tmpfile:Linux]
 tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos',
     'tmpfile_stat_mode']
 tags = ['functional', 'tmpfile']
 
-[tests/functional/trim]
-tests = ['autotrim_integrity', 'autotrim_config', 'autotrim_trim_integrity',
-    'trim_integrity', 'trim_config']
-tags = ['functional', 'trim']
-
-[tests/functional/truncate]
-tests = ['truncate_001_pos', 'truncate_002_pos', 'truncate_timestamps']
-tags = ['functional', 'truncate']
-
-[tests/functional/upgrade]
-tests = ['upgrade_userobj_001_pos', 'upgrade_projectquota_001_pos',
-    'upgrade_readonly_pool']
+[tests/functional/upgrade:Linux]
+tests = ['upgrade_projectquota_001_pos']
 tags = ['functional', 'upgrade']
 
-[tests/functional/user_namespace]
+[tests/functional/user_namespace:Linux]
 tests = ['user_namespace_001']
 tags = ['functional', 'user_namespace']
 
-[tests/functional/userquota]
-tests = [
-    'userquota_001_pos', 'userquota_002_pos', 'userquota_003_pos',
-    'userquota_004_pos', 'userquota_005_neg', 'userquota_006_pos',
-    'userquota_007_pos', 'userquota_008_pos', 'userquota_009_pos',
-    'userquota_010_pos', 'userquota_011_pos', 'userquota_012_neg',
-    'userquota_013_pos',
-    'userspace_001_pos', 'userspace_002_pos', 'userspace_003_pos',
-    'groupspace_001_pos', 'groupspace_002_pos', 'groupspace_003_pos' ]
+[tests/functional/userquota:Linux]
+tests = ['groupspace_001_pos', 'groupspace_002_pos', 'groupspace_003_pos',
+    'userquota_013_pos', 'userspace_003_pos']
 tags = ['functional', 'userquota']
-
-[tests/functional/vdev_zaps]
-tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos',
-    'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos',
-    'vdev_zaps_007_pos']
-tags = ['functional', 'vdev_zaps']
-
-[tests/functional/write_dirs]
-tests = ['write_dirs_001_pos', 'write_dirs_002_pos']
-tags = ['functional', 'write_dirs']
-
-[tests/functional/xattr]
-tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos',
-    'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg', 'xattr_008_pos',
-    'xattr_009_neg', 'xattr_010_neg', 'xattr_011_pos', 'xattr_012_pos',
-    'xattr_013_pos']
-tags = ['functional', 'xattr']
-
-[tests/functional/zvol/zvol_ENOSPC]
-tests = ['zvol_ENOSPC_001_pos']
-tags = ['functional', 'zvol', 'zvol_ENOSPC']
-
-[tests/functional/zvol/zvol_cli]
-tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg']
-tags = ['functional', 'zvol', 'zvol_cli']
-
-[tests/functional/zvol/zvol_misc]
-tests = ['zvol_misc_001_neg', 'zvol_misc_002_pos', 'zvol_misc_003_neg',
-    'zvol_misc_004_pos', 'zvol_misc_005_neg', 'zvol_misc_006_pos',
-    'zvol_misc_hierarchy', 'zvol_misc_rename_inuse', 'zvol_misc_snapdev',
-    'zvol_misc_volmode', 'zvol_misc_zil']
-tags = ['functional', 'zvol', 'zvol_misc']
-
-[tests/functional/zvol/zvol_swap]
-tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_003_pos',
-    'zvol_swap_004_pos', 'zvol_swap_005_pos', 'zvol_swap_006_pos']
-tags = ['functional', 'zvol', 'zvol_swap']
-
-[tests/functional/libzfs]
-tests = ['many_fds', 'libzfs_input']
-tags = ['functional', 'libzfs']

diff --git a/zfs/tests/runfiles/sanity.run b/zfs/tests/runfiles/sanity.run
new file mode 100644
index 0000000..0a3d42c
--- /dev/null
+++ b/zfs/tests/runfiles/sanity.run

@@ -0,0 +1,623 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# This run file contains a subset of functional tests which exercise
+# as much functionality as possible while still executing relatively
+# quickly.  The included tests should take no more than a few seconds
+# each to run at most.  This provides a convenient way to sanity test a
+# change before committing to a full test run which takes several hours.
+#
+# Approximate run time: 15 minutes
+#
+
+[DEFAULT]
+pre = setup
+quiet = False
+pre_user = root
+user = root
+timeout = 180
+post_user = root
+post = cleanup
+failsafe_user = root
+failsafe = callbacks/zfs_failsafe
+outputdir = /var/tmp/test_results
+tags = ['functional']
+
+[tests/functional/acl/off]
+tests = ['posixmode']
+tags = ['functional', 'acl']
+
+[tests/functional/alloc_class]
+tests = ['alloc_class_003_pos', 'alloc_class_004_pos', 'alloc_class_005_pos',
+    'alloc_class_006_pos', 'alloc_class_008_pos', 'alloc_class_010_pos',
+    'alloc_class_011_neg']
+tags = ['functional', 'alloc_class']
+
+[tests/functional/arc]
+tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'arcstats_runtime_tuning']
+tags = ['functional', 'arc']
+
+[tests/functional/bootfs]
+tests = ['bootfs_004_neg', 'bootfs_007_pos']
+tags = ['functional', 'bootfs']
+
+[tests/functional/cache]
+tests = ['cache_004_neg', 'cache_005_neg', 'cache_007_neg', 'cache_010_pos']
+tags = ['functional', 'cache']
+
+[tests/functional/cachefile]
+tests = ['cachefile_001_pos', 'cachefile_002_pos', 'cachefile_003_pos',
+    'cachefile_004_pos']
+tags = ['functional', 'cachefile']
+
+[tests/functional/casenorm]
+tests = ['case_all_values', 'norm_all_values', 'sensitive_none_lookup',
+    'sensitive_none_delete', 'insensitive_none_lookup',
+    'insensitive_none_delete', 'mixed_none_lookup', 'mixed_none_delete']
+tags = ['functional', 'casenorm']
+
+[tests/functional/channel_program/lua_core]
+tests = ['tst.args_to_lua', 'tst.divide_by_zero', 'tst.exists',
+    'tst.integer_illegal', 'tst.integer_overflow', 'tst.language_functions_neg',
+    'tst.language_functions_pos', 'tst.large_prog', 'tst.libraries',
+    'tst.memory_limit', 'tst.nested_neg', 'tst.nested_pos', 'tst.nvlist_to_lua',
+    'tst.recursive_neg', 'tst.recursive_pos', 'tst.return_large',
+    'tst.return_nvlist_neg', 'tst.return_nvlist_pos',
+    'tst.return_recursive_table', 'tst.stack_gsub', 'tst.timeout']
+tags = ['functional', 'channel_program', 'lua_core']
+
+[tests/functional/channel_program/synctask_core]
+tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit',
+    'tst.get_index_props', 'tst.get_mountpoint', 'tst.get_neg',
+    'tst.get_number_props', 'tst.get_string_props', 'tst.get_type',
+    'tst.get_userquota', 'tst.get_written', 'tst.inherit', 'tst.list_bookmarks',
+    'tst.list_children', 'tst.list_clones', 'tst.list_holds',
+    'tst.list_snapshots', 'tst.list_system_props',
+    'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict',
+    'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult',
+    'tst.rollback_one', 'tst.set_props', 'tst.snapshot_destroy',
+    'tst.snapshot_neg', 'tst.snapshot_recursive', 'tst.snapshot_simple',
+    'tst.bookmark.create', 'tst.bookmark.copy']
+tags = ['functional', 'channel_program', 'synctask_core']
+
+[tests/functional/cli_root/zdb]
+tests = ['zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos']
+pre =
+post =
+tags = ['functional', 'cli_root', 'zdb']
+
+[tests/functional/cli_root/zfs]
+tests = ['zfs_001_neg', 'zfs_002_pos']
+tags = ['functional', 'cli_root', 'zfs']
+
+[tests/functional/cli_root/zfs_bookmark]
+tests = ['zfs_bookmark_cliargs']
+tags = ['functional', 'cli_root', 'zfs_bookmark']
+
+[tests/functional/cli_root/zfs_change-key]
+tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
+    'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
+    'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones']
+tags = ['functional', 'cli_root', 'zfs_change-key']
+
+[tests/functional/cli_root/zfs_clone]
+tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos',
+    'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos',
+    'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg',
+    'zfs_clone_encrypted']
+tags = ['functional', 'cli_root', 'zfs_clone']
+
+[tests/functional/cli_root/zfs_create]
+tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
+    'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos',
+    'zfs_create_007_pos', 'zfs_create_011_pos', 'zfs_create_012_pos',
+    'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted',
+    'zfs_create_dryrun', 'zfs_create_verbose']
+tags = ['functional', 'cli_root', 'zfs_create']
+
+[tests/functional/cli_root/zfs_destroy]
+tests = ['zfs_destroy_002_pos', 'zfs_destroy_003_pos',
+    'zfs_destroy_004_pos', 'zfs_destroy_006_neg', 'zfs_destroy_007_neg',
+    'zfs_destroy_008_pos', 'zfs_destroy_009_pos', 'zfs_destroy_010_pos',
+    'zfs_destroy_011_pos', 'zfs_destroy_012_pos', 'zfs_destroy_013_neg',
+    'zfs_destroy_014_pos', 'zfs_destroy_dev_removal',
+    'zfs_destroy_dev_removal_condense']
+tags = ['functional', 'cli_root', 'zfs_destroy']
+
+[tests/functional/cli_root/zfs_diff]
+tests = ['zfs_diff_cliargs', 'zfs_diff_encrypted']
+tags = ['functional', 'cli_root', 'zfs_diff']
+
+[tests/functional/cli_root/zfs_get]
+tests = ['zfs_get_003_pos', 'zfs_get_006_neg', 'zfs_get_007_neg',
+    'zfs_get_010_neg']
+tags = ['functional', 'cli_root', 'zfs_get']
+
+[tests/functional/cli_root/zfs_inherit]
+tests = ['zfs_inherit_001_neg', 'zfs_inherit_003_pos', 'zfs_inherit_mountpoint']
+tags = ['functional', 'cli_root', 'zfs_inherit']
+
+[tests/functional/cli_root/zfs_load-key]
+tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file',
+    'zfs_load-key_https', 'zfs_load-key_location', 'zfs_load-key_noop',
+    'zfs_load-key_recursive']
+tags = ['functional', 'cli_root', 'zfs_load-key']
+
+[tests/functional/cli_root/zfs_mount]
+tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
+    'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos',
+    'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg',
+    'zfs_mount_012_pos', 'zfs_mount_encrypted', 'zfs_mount_remount',
+    'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', 'zfs_mount_test_race']
+tags = ['functional', 'cli_root', 'zfs_mount']
+
+[tests/functional/cli_root/zfs_program]
+tests = ['zfs_program_json']
+tags = ['functional', 'cli_root', 'zfs_program']
+
+[tests/functional/cli_root/zfs_promote]
+tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos',
+    'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg',
+    'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot']
+tags = ['functional', 'cli_root', 'zfs_promote']
+
+[tests/functional/cli_root/zfs_receive]
+tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
+    'zfs_receive_004_neg', 'zfs_receive_005_neg', 'zfs_receive_006_pos',
+    'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg',
+    'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos',
+    'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos',
+    'zfs_receive_016_pos', 'zfs_receive_from_encrypted',
+    'zfs_receive_to_encrypted', 'zfs_receive_raw',
+    'zfs_receive_raw_incremental', 'zfs_receive_-e',
+    'zfs_receive_raw_-d', 'zfs_receive_from_zstd', 'zfs_receive_new_props']
+tags = ['functional', 'cli_root', 'zfs_receive']
+
+[tests/functional/cli_root/zfs_rename]
+tests = ['zfs_rename_003_pos', 'zfs_rename_004_neg',
+    'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos',
+    'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg',
+    'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_013_pos',
+    'zfs_rename_encrypted_child', 'zfs_rename_to_encrypted',
+    'zfs_rename_mountpoint', 'zfs_rename_nounmount']
+tags = ['functional', 'cli_root', 'zfs_rename']
+
+[tests/functional/cli_root/zfs_reservation]
+tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
+tags = ['functional', 'cli_root', 'zfs_reservation']
+
+[tests/functional/cli_root/zfs_rollback]
+tests = ['zfs_rollback_003_neg', 'zfs_rollback_004_neg']
+tags = ['functional', 'cli_root', 'zfs_rollback']
+
+[tests/functional/cli_root/zfs_send]
+tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
+    'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_encrypted',
+    'zfs_send_raw']
+tags = ['functional', 'cli_root', 'zfs_send']
+
+[tests/functional/cli_root/zfs_set]
+tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
+    'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos',
+    'checksum_001_pos', 'compression_001_pos', 'mountpoint_001_pos',
+    'mountpoint_002_pos', 'user_property_002_pos',
+    'share_mount_001_neg', 'snapdir_001_pos', 'onoffs_001_pos',
+    'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos',
+    'user_property_004_pos', 'version_001_neg',
+    'zfs_set_003_neg', 'property_alias_001_pos',
+    'zfs_set_keylocation', 'zfs_set_feature_activation']
+tags = ['functional', 'cli_root', 'zfs_set']
+
+[tests/functional/cli_root/zfs_snapshot]
+tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg',
+    'zfs_snapshot_003_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg']
+tags = ['functional', 'cli_root', 'zfs_snapshot']
+
+[tests/functional/cli_root/zfs_unload-key]
+tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive']
+tags = ['functional', 'cli_root', 'zfs_unload-key']
+
+[tests/functional/cli_root/zfs_unmount]
+tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos',
+    'zfs_unmount_004_pos', 'zfs_unmount_007_neg', 'zfs_unmount_008_neg',
+    'zfs_unmount_009_pos', 'zfs_unmount_unload_keys']
+tags = ['functional', 'cli_root', 'zfs_unmount']
+
+[tests/functional/cli_root/zfs_upgrade]
+tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_006_neg',
+    'zfs_upgrade_007_neg']
+tags = ['functional', 'cli_root', 'zfs_upgrade']
+
+[tests/functional/cli_root/zfs_wait]
+tests = ['zfs_wait_deleteq']
+tags = ['functional', 'cli_root', 'zfs_wait']
+
+[tests/functional/cli_root/zpool]
+tests = ['zpool_001_neg', 'zpool_003_pos', 'zpool_colors']
+tags = ['functional', 'cli_root', 'zpool']
+
+[tests/functional/cli_root/zpool_add]
+tests = ['zpool_add_002_pos', 'zpool_add_003_pos',
+    'zpool_add_004_pos', 'zpool_add_006_pos', 'zpool_add_007_neg',
+    'zpool_add_008_neg', 'zpool_add_009_neg']
+tags = ['functional', 'cli_root', 'zpool_add']
+
+[tests/functional/cli_root/zpool_attach]
+tests = ['zpool_attach_001_neg']
+tags = ['functional', 'cli_root', 'zpool_attach']
+
+[tests/functional/cli_root/zpool_clear]
+tests = ['zpool_clear_002_neg']
+tags = ['functional', 'cli_root', 'zpool_clear']
+
+[tests/functional/cli_root/zpool_create]
+tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
+    'zpool_create_003_pos', 'zpool_create_004_pos', 'zpool_create_007_neg',
+    'zpool_create_008_pos', 'zpool_create_010_neg', 'zpool_create_011_neg',
+    'zpool_create_012_neg', 'zpool_create_014_neg', 'zpool_create_015_neg',
+    'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos',
+    'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos',
+    'zpool_create_encrypted',
+    'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
+    'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
+    'zpool_create_features_005_pos']
+tags = ['functional', 'cli_root', 'zpool_create']
+
+[tests/functional/cli_root/zpool_destroy]
+tests = ['zpool_destroy_001_pos', 'zpool_destroy_002_pos',
+    'zpool_destroy_003_neg']
+pre =
+post =
+tags = ['functional', 'cli_root', 'zpool_destroy']
+
+[tests/functional/cli_root/zpool_detach]
+tests = ['zpool_detach_001_neg']
+tags = ['functional', 'cli_root', 'zpool_detach']
+
+[tests/functional/cli_root/zpool_events]
+tests = ['zpool_events_clear', 'zpool_events_follow', 'zpool_events_poolname']
+tags = ['functional', 'cli_root', 'zpool_events']
+
+[tests/functional/cli_root/zpool_export]
+tests = ['zpool_export_001_pos', 'zpool_export_002_pos', 'zpool_export_003_neg']
+tags = ['functional', 'cli_root', 'zpool_export']
+
+[tests/functional/cli_root/zpool_get]
+tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos',
+    'zpool_get_004_neg', 'zpool_get_005_pos']
+tags = ['functional', 'cli_root', 'zpool_get']
+
+[tests/functional/cli_root/zpool_history]
+tests = ['zpool_history_001_neg', 'zpool_history_002_pos']
+tags = ['functional', 'cli_root', 'zpool_history']
+
+[tests/functional/cli_root/zpool_import]
+tests = ['zpool_import_003_pos', 'zpool_import_010_pos', 'zpool_import_011_neg',
+    'zpool_import_014_pos', 'zpool_import_features_001_pos',
+    'zpool_import_all_001_pos', 'zpool_import_encrypted']
+tags = ['functional', 'cli_root', 'zpool_import']
+
+[tests/functional/cli_root/zpool_labelclear]
+tests = ['zpool_labelclear_active', 'zpool_labelclear_exported',
+    'zpool_labelclear_removed', 'zpool_labelclear_valid']
+pre =
+post =
+tags = ['functional', 'cli_root', 'zpool_labelclear']
+
+[tests/functional/cli_root/zpool_initialize]
+tests = ['zpool_initialize_online_offline']
+pre =
+tags = ['functional', 'cli_root', 'zpool_initialize']
+
+[tests/functional/cli_root/zpool_offline]
+tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg']
+tags = ['functional', 'cli_root', 'zpool_offline']
+
+[tests/functional/cli_root/zpool_online]
+tests = ['zpool_online_001_pos', 'zpool_online_002_neg']
+tags = ['functional', 'cli_root', 'zpool_online']
+
+[tests/functional/cli_root/zpool_remove]
+tests = ['zpool_remove_001_neg', 'zpool_remove_002_pos',
+    'zpool_remove_003_pos']
+tags = ['functional', 'cli_root', 'zpool_remove']
+
+[tests/functional/cli_root/zpool_replace]
+tests = ['zpool_replace_001_neg']
+tags = ['functional', 'cli_root', 'zpool_replace']
+
+[tests/functional/cli_root/zpool_resilver]
+tests = ['zpool_resilver_bad_args']
+tags = ['functional', 'cli_root', 'zpool_resilver']
+
+[tests/functional/cli_root/zpool_scrub]
+tests = ['zpool_scrub_001_neg', 'zpool_scrub_003_pos',
+    'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing',
+    'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies']
+tags = ['functional', 'cli_root', 'zpool_scrub']
+
+[tests/functional/cli_root/zpool_set]
+tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
+    'zpool_set_ashift', 'zpool_set_features']
+tags = ['functional', 'cli_root', 'zpool_set']
+
+[tests/functional/cli_root/zpool_split]
+tests = ['zpool_split_cliargs', 'zpool_split_devices',
+    'zpool_split_props', 'zpool_split_vdevs', 'zpool_split_indirect']
+tags = ['functional', 'cli_root', 'zpool_split']
+
+[tests/functional/cli_root/zpool_status]
+tests = ['zpool_status_001_pos', 'zpool_status_002_pos']
+tags = ['functional', 'cli_root', 'zpool_status']
+
+[tests/functional/cli_root/zpool_sync]
+tests = ['zpool_sync_002_neg']
+tags = ['functional', 'cli_root', 'zpool_sync']
+
+[tests/functional/cli_root/zpool_trim]
+tests = ['zpool_trim_attach_detach_add_remove', 'zpool_trim_neg',
+    'zpool_trim_offline_export_import_online', 'zpool_trim_online_offline',
+    'zpool_trim_rate_neg', 'zpool_trim_secure', 'zpool_trim_split',
+    'zpool_trim_start_and_cancel_neg', 'zpool_trim_start_and_cancel_pos']
+tags = ['functional', 'zpool_trim']
+
+[tests/functional/cli_root/zpool_upgrade]
+tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_003_pos',
+    'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg',
+    'zpool_upgrade_009_neg']
+tags = ['functional', 'cli_root', 'zpool_upgrade']
+
+[tests/functional/cli_root/zpool_wait]
+tests = ['zpool_wait_no_activity', 'zpool_wait_usage']
+tags = ['functional', 'cli_root', 'zpool_wait']
+
+[tests/functional/cli_root/zpool_wait/scan]
+tests = ['zpool_wait_scrub_flag']
+tags = ['functional', 'cli_root', 'zpool_wait']
+
+[tests/functional/cli_user/misc]
+tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
+    'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg',
+    'zfs_get_001_neg', 'zfs_inherit_001_neg', 'zfs_mount_001_neg',
+    'zfs_promote_001_neg', 'zfs_receive_001_neg', 'zfs_rename_001_neg',
+    'zfs_rollback_001_neg', 'zfs_send_001_neg', 'zfs_set_001_neg',
+    'zfs_snapshot_001_neg', 'zfs_unallow_001_neg',
+    'zfs_unmount_001_neg', 'zfs_upgrade_001_neg',
+    'zpool_001_neg', 'zpool_add_001_neg', 'zpool_attach_001_neg',
+    'zpool_clear_001_neg', 'zpool_create_001_neg', 'zpool_destroy_001_neg',
+    'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg',
+    'zpool_history_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg',
+    'zpool_remove_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
+    'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
+    'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege']
+user =
+tags = ['functional', 'cli_user', 'misc']
+
+[tests/functional/cli_user/zpool_iostat]
+tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos',
+    'zpool_iostat_003_neg', 'zpool_iostat_004_pos',
+    'zpool_iostat_-c_disable',
+    'zpool_iostat_-c_homedir', 'zpool_iostat_-c_searchpath']
+user =
+tags = ['functional', 'cli_user', 'zpool_iostat']
+
+[tests/functional/cli_user/zpool_list]
+tests = ['zpool_list_001_pos', 'zpool_list_002_neg']
+user =
+tags = ['functional', 'cli_user', 'zpool_list']
+
+[tests/functional/compression]
+tests = ['compress_003_pos']
+tags = ['functional', 'compression']
+
+[tests/functional/exec]
+tests = ['exec_001_pos', 'exec_002_neg']
+tags = ['functional', 'exec']
+
+[tests/functional/features/large_dnode]
+tests = ['large_dnode_003_pos', 'large_dnode_004_neg',
+    'large_dnode_005_pos', 'large_dnode_007_neg']
+tags = ['functional', 'features', 'large_dnode']
+
+[tests/functional/grow]
+pre =
+post =
+tests = ['grow_pool_001_pos', 'grow_replicas_001_pos']
+tags = ['functional', 'grow']
+
+[tests/functional/history]
+tests = ['history_004_pos', 'history_005_neg', 'history_007_pos',
+    'history_009_pos']
+tags = ['functional', 'history']
+
+[tests/functional/hkdf]
+tests = ['run_hkdf_test']
+tags = ['functional', 'hkdf']
+
+[tests/functional/inuse]
+tests = ['inuse_004_pos', 'inuse_005_pos']
+post =
+tags = ['functional', 'inuse']
+
+[tests/functional/large_files]
+tests = ['large_files_001_pos', 'large_files_002_pos']
+tags = ['functional', 'large_files']
+
+[tests/functional/libzfs]
+tests = ['many_fds', 'libzfs_input']
+tags = ['functional', 'libzfs']
+
+[tests/functional/limits]
+tests = ['filesystem_count', 'snapshot_count']
+tags = ['functional', 'limits']
+
+[tests/functional/link_count]
+tests = ['link_count_root_inode']
+tags = ['functional', 'link_count']
+
+[tests/functional/log_spacemap]
+tests = ['log_spacemap_import_logs']
+pre =
+post =
+tags = ['functional', 'log_spacemap']
+
+[tests/functional/migration]
+tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
+    'migration_004_pos', 'migration_005_pos', 'migration_006_pos',
+    'migration_007_pos', 'migration_008_pos', 'migration_009_pos',
+    'migration_010_pos', 'migration_011_pos', 'migration_012_pos']
+tags = ['functional', 'migration']
+
+[tests/functional/mmap]
+tests = ['mmap_read_001_pos']
+tags = ['functional', 'mmap']
+
+[tests/functional/nestedfs]
+tests = ['nestedfs_001_pos']
+tags = ['functional', 'nestedfs']
+
+[tests/functional/nopwrite]
+tests = ['nopwrite_sync', 'nopwrite_volume']
+tags = ['functional', 'nopwrite']
+
+[tests/functional/pool_checkpoint]
+tests = ['checkpoint_conf_change', 'checkpoint_discard_many',
+    'checkpoint_removal', 'checkpoint_sm_scale', 'checkpoint_twice']
+tags = ['functional', 'pool_checkpoint']
+timeout = 1800
+
+[tests/functional/poolversion]
+tests = ['poolversion_001_pos', 'poolversion_002_pos']
+tags = ['functional', 'poolversion']
+
+[tests/functional/redacted_send]
+tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted',
+    'redacted_disabled_feature', 'redacted_incrementals',
+    'redacted_largeblocks', 'redacted_mixed_recsize', 'redacted_negative',
+    'redacted_origin', 'redacted_props', 'redacted_resume', 'redacted_size']
+tags = ['functional', 'redacted_send']
+
+[tests/functional/raidz]
+tests = ['raidz_001_neg']
+tags = ['functional', 'raidz']
+
+[tests/functional/refquota]
+tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos',
+    'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg',
+    'refquota_007_neg']
+tags = ['functional', 'refquota']
+
+[tests/functional/refreserv]
+tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos',
+    'refreserv_005_pos', 'refreserv_multi_raidz']
+tags = ['functional', 'refreserv']
+
+[tests/functional/removal]
+pre =
+tests = ['removal_all_vdev', 'removal_sanity', 'removal_with_dedup',
+    'removal_with_ganging', 'removal_with_faulted']
+tags = ['functional', 'removal']
+
+[tests/functional/replacement]
+tests = ['rebuild_raidz']
+tags = ['functional', 'replacement']
+
+[tests/functional/reservation]
+tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
+    'reservation_004_pos', 'reservation_005_pos', 'reservation_006_pos',
+    'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos',
+    'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos',
+    'reservation_014_pos', 'reservation_015_pos',
+    'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos',
+    'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg',
+    'reservation_022_pos']
+tags = ['functional', 'reservation']
+
+[tests/functional/rsend]
+tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
+    'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'rsend_005_pos',
+    'rsend_006_pos', 'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos',
+    'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents',
+    'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup',
+    'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props',
+    'send_encrypted_freeobjects',
+    'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds',
+    'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset',
+    'send_invalid']
+tags = ['functional', 'rsend']
+
+[tests/functional/scrub_mirror]
+tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos']
+tags = ['functional', 'scrub_mirror']
+
+[tests/functional/slog]
+tests = ['slog_008_neg', 'slog_009_neg', 'slog_010_neg']
+tags = ['functional', 'slog']
+
+[tests/functional/snapshot]
+tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
+    'snapshot_001_pos', 'snapshot_002_pos', 'snapshot_003_pos',
+    'snapshot_004_pos', 'snapshot_005_pos', 'snapshot_006_pos',
+    'snapshot_007_pos', 'snapshot_008_pos', 'snapshot_009_pos',
+    'snapshot_010_pos', 'snapshot_011_pos', 'snapshot_012_pos',
+    'snapshot_013_pos', 'snapshot_014_pos', 'snapshot_017_pos']
+tags = ['functional', 'snapshot']
+
+[tests/functional/snapused]
+tests = ['snapused_002_pos', 'snapused_004_pos', 'snapused_005_pos']
+tags = ['functional', 'snapused']
+
+[tests/functional/sparse]
+tests = ['sparse_001_pos']
+tags = ['functional', 'sparse']
+
+[tests/functional/suid]
+tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid',
+    'suid_write_to_none']
+tags = ['functional', 'suid']
+
+[tests/functional/threadsappend]
+tests = ['threadsappend_001_pos']
+tags = ['functional', 'threadsappend']
+
+[tests/functional/truncate]
+tests = ['truncate_001_pos', 'truncate_002_pos']
+tags = ['functional', 'truncate']
+
+[tests/functional/upgrade]
+tests = ['upgrade_userobj_001_pos', 'upgrade_readonly_pool']
+tags = ['functional', 'upgrade']
+
+[tests/functional/vdev_zaps]
+tests = ['vdev_zaps_001_pos', 'vdev_zaps_003_pos', 'vdev_zaps_004_pos',
+    'vdev_zaps_005_pos', 'vdev_zaps_006_pos']
+tags = ['functional', 'vdev_zaps']
+
+[tests/functional/xattr]
+tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos',
+    'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg',
+    'xattr_011_pos', 'xattr_013_pos']
+tags = ['functional', 'xattr']
+
+[tests/functional/zvol/zvol_ENOSPC]
+tests = ['zvol_ENOSPC_001_pos']
+tags = ['functional', 'zvol', 'zvol_ENOSPC']
+
+[tests/functional/zvol/zvol_cli]
+tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg']
+tags = ['functional', 'zvol', 'zvol_cli']
+
+[tests/functional/zvol/zvol_swap]
+tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos']
+tags = ['functional', 'zvol', 'zvol_swap']
+
+[tests/functional/zpool_influxdb]
+tests = ['zpool_influxdb']
+tags = ['functional', 'zpool_influxdb']

diff --git a/zfs/tests/runfiles/sunos.run b/zfs/tests/runfiles/sunos.run
new file mode 100644
index 0000000..9ba00f4
--- /dev/null
+++ b/zfs/tests/runfiles/sunos.run

@@ -0,0 +1,53 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+[DEFAULT]
+pre = setup
+quiet = False
+pre_user = root
+user = root
+timeout = 600
+post_user = root
+post = cleanup
+failsafe_user = root
+failsafe = callbacks/zfs_failsafe
+outputdir = /var/tmp/test_results
+tags = ['functional']
+
+[tests/functional/inuse:illumos]
+tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_006_pos', 'inuse_007_pos']
+post =
+tags = ['functional', 'inuse']
+
+[tests/functional/cli_root/zpool_add:illumos]
+tests = ['zpool_add_005_pos']
+tags = ['functional', 'cli_root', 'zpool_add']
+
+[tests/functional/cli_root/zpool_create:illumos]
+tests = ['zpool_create_016_pos']
+tags = ['functional', 'cli_root', 'zpool_create']
+
+[tests/functional/privilege]
+tests = ['privilege_001_pos', 'privilege_002_pos']
+tags = ['functional', 'privilege']
+
+[tests/functional/xattr:illumos]
+tests = ['xattr_008_pos', 'xattr_009_neg', 'xattr_010_neg']
+tags = ['functional', 'xattr']
+
+[tests/functional/zvol/zvol_misc:illumos]
+tests = ['zvol_misc_001_neg', 'zvol_misc_003_neg', 'zvol_misc_004_pos',
+    'zvol_misc_005_neg', 'zvol_misc_006_pos']
+tags = ['functional', 'zvol', 'zvol_misc']
+
+[tests/functional/zvol/zvol_swap:illumos]
+tests = ['zvol_swap_003_pos', 'zvol_swap_005_pos', 'zvol_swap_006_pos']
+tags = ['functional', 'zvol', 'zvol_swap']

diff --git a/zfs/tests/test-runner/bin/.gitignore b/zfs/tests/test-runner/bin/.gitignore
new file mode 100644
index 0000000..ff7e2f8
--- /dev/null
+++ b/zfs/tests/test-runner/bin/.gitignore

@@ -0,0 +1,2 @@
+test-runner.py
+zts-report.py

diff --git a/zfs/tests/test-runner/bin/Makefile.am b/zfs/tests/test-runner/bin/Makefile.am
index 2c031f7..e11e55f 100644
--- a/zfs/tests/test-runner/bin/Makefile.am
+++ b/zfs/tests/test-runner/bin/Makefile.am

@@ -1,15 +1,8 @@
+include $(top_srcdir)/config/Substfiles.am
+
 pkgdatadir = $(datadir)/@PACKAGE@/test-runner/bin
-dist_pkgdata_SCRIPTS = \
+pkgdata_SCRIPTS = \
 	test-runner.py \
 	zts-report.py
-#
-# These scripts are compatible with both Python 2.6 and 3.4.  As such the
-# python 3 shebang can be replaced at install time when targeting a python
-# 2 system.  This allows us to maintain a single version of the source.
-#
-if USING_PYTHON_2
-install-data-hook:
-	sed --in-place 's|^#!/usr/bin/env python3|#!/usr/bin/env python2|' \
-	    $(DESTDIR)$(pkgdatadir)/test-runner.py \
-	    $(DESTDIR)$(pkgdatadir)/zts-report.py
-endif
+
+SUBSTFILES += $(pkgdata_SCRIPTS)

diff --git a/zfs/tests/test-runner/bin/test-runner.py b/zfs/tests/test-runner/bin/test-runner.py
deleted file mode 100755
index ca08b37..0000000
--- a/zfs/tests/test-runner/bin/test-runner.py
+++ /dev/null

@@ -1,975 +0,0 @@
-#!/usr/bin/env python3
-
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
-# Copyright (c) 2019 Datto Inc.
-#
-# This script must remain compatible with Python 2.6+ and Python 3.4+.
-#
-
-# some python 2.7 system don't have a configparser shim
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
-
-import os
-import sys
-import ctypes
-
-from datetime import datetime
-from optparse import OptionParser
-from pwd import getpwnam
-from pwd import getpwuid
-from select import select
-from subprocess import PIPE
-from subprocess import Popen
-from threading import Timer
-from time import time
-
-BASEDIR = '/var/tmp/test_results'
-TESTDIR = '/usr/share/zfs/'
-KILL = 'kill'
-TRUE = 'true'
-SUDO = 'sudo'
-LOG_FILE = 'LOG_FILE'
-LOG_OUT = 'LOG_OUT'
-LOG_ERR = 'LOG_ERR'
-LOG_FILE_OBJ = None
-
-# some python 2.7 system don't have a concept of monotonic time
-CLOCK_MONOTONIC_RAW = 4  # see <linux/time.h>
-
-
-class timespec(ctypes.Structure):
-    _fields_ = [
-        ('tv_sec', ctypes.c_long),
-        ('tv_nsec', ctypes.c_long)
-    ]
-
-
-librt = ctypes.CDLL('librt.so.1', use_errno=True)
-clock_gettime = librt.clock_gettime
-clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
-
-
-def monotonic_time():
-    t = timespec()
-    if clock_gettime(CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
-        errno_ = ctypes.get_errno()
-        raise OSError(errno_, os.strerror(errno_))
-    return t.tv_sec + t.tv_nsec * 1e-9
-
-
-class Result(object):
-    total = 0
-    runresults = {'PASS': 0, 'FAIL': 0, 'SKIP': 0, 'KILLED': 0, 'RERAN': 0}
-
-    def __init__(self):
-        self.starttime = None
-        self.returncode = None
-        self.runtime = ''
-        self.stdout = []
-        self.stderr = []
-        self.result = ''
-
-    def done(self, proc, killed, reran):
-        """
-        Finalize the results of this Cmd.
-        """
-        Result.total += 1
-        m, s = divmod(monotonic_time() - self.starttime, 60)
-        self.runtime = '%02d:%02d' % (m, s)
-        self.returncode = proc.returncode
-        if reran is True:
-            Result.runresults['RERAN'] += 1
-        if killed:
-            self.result = 'KILLED'
-            Result.runresults['KILLED'] += 1
-        elif self.returncode == 0:
-            self.result = 'PASS'
-            Result.runresults['PASS'] += 1
-        elif self.returncode == 4:
-            self.result = 'SKIP'
-            Result.runresults['SKIP'] += 1
-        elif self.returncode != 0:
-            self.result = 'FAIL'
-            Result.runresults['FAIL'] += 1
-
-
-class Output(object):
-    """
-    This class is a slightly modified version of the 'Stream' class found
-    here: http://goo.gl/aSGfv
-    """
-    def __init__(self, stream):
-        self.stream = stream
-        self._buf = b''
-        self.lines = []
-
-    def fileno(self):
-        return self.stream.fileno()
-
-    def read(self, drain=0):
-        """
-        Read from the file descriptor. If 'drain' set, read until EOF.
-        """
-        while self._read() is not None:
-            if not drain:
-                break
-
-    def _read(self):
-        """
-        Read up to 4k of data from this output stream. Collect the output
-        up to the last newline, and append it to any leftover data from a
-        previous call. The lines are stored as a (timestamp, data) tuple
-        for easy sorting/merging later.
-        """
-        fd = self.fileno()
-        buf = os.read(fd, 4096)
-        if not buf:
-            return None
-        if b'\n' not in buf:
-            self._buf += buf
-            return []
-
-        buf = self._buf + buf
-        tmp, rest = buf.rsplit(b'\n', 1)
-        self._buf = rest
-        now = datetime.now()
-        rows = tmp.split(b'\n')
-        self.lines += [(now, r) for r in rows]
-
-
-class Cmd(object):
-    verified_users = []
-
-    def __init__(self, pathname, outputdir=None, timeout=None, user=None,
-                 tags=None):
-        self.pathname = pathname
-        self.outputdir = outputdir or 'BASEDIR'
-        """
-        The timeout for tests is measured in wall-clock time
-        """
-        self.timeout = timeout
-        self.user = user or ''
-        self.killed = False
-        self.reran = None
-        self.result = Result()
-
-        if self.timeout is None:
-            self.timeout = 60
-
-    def __str__(self):
-        return "Pathname: %s\nOutputdir: %s\nTimeout: %d\nUser: %s\n" % \
-            (self.pathname, self.outputdir, self.timeout, self.user)
-
-    def kill_cmd(self, proc, keyboard_interrupt=False):
-        """
-        Kill a running command due to timeout, or ^C from the keyboard. If
-        sudo is required, this user was verified previously.
-        """
-        self.killed = True
-        do_sudo = len(self.user) != 0
-        signal = '-TERM'
-
-        cmd = [SUDO, KILL, signal, str(proc.pid)]
-        if not do_sudo:
-            del cmd[0]
-
-        try:
-            kp = Popen(cmd)
-            kp.wait()
-        except Exception:
-            pass
-
-        """
-        If this is not a user-initiated kill and the test has not been
-        reran before we consider if the test needs to be reran:
-        If the test has spent some time hibernating and didn't run the whole
-        length of time before being timed out we will rerun the test.
-        """
-        if keyboard_interrupt is False and self.reran is None:
-            runtime = monotonic_time() - self.result.starttime
-            if int(self.timeout) > runtime:
-                self.killed = False
-                self.reran = False
-                self.run(False)
-                self.reran = True
-
-    def update_cmd_privs(self, cmd, user):
-        """
-        If a user has been specified to run this Cmd and we're not already
-        running as that user, prepend the appropriate sudo command to run
-        as that user.
-        """
-        me = getpwuid(os.getuid())
-
-        if not user or user is me:
-            if os.path.isfile(cmd+'.ksh') and os.access(cmd+'.ksh', os.X_OK):
-                cmd += '.ksh'
-            if os.path.isfile(cmd+'.sh') and os.access(cmd+'.sh', os.X_OK):
-                cmd += '.sh'
-            return cmd
-
-        if not os.path.isfile(cmd):
-            if os.path.isfile(cmd+'.ksh') and os.access(cmd+'.ksh', os.X_OK):
-                cmd += '.ksh'
-            if os.path.isfile(cmd+'.sh') and os.access(cmd+'.sh', os.X_OK):
-                cmd += '.sh'
-
-        ret = '%s -E -u %s %s' % (SUDO, user, cmd)
-        return ret.split(' ')
-
-    def collect_output(self, proc):
-        """
-        Read from stdout/stderr as data becomes available, until the
-        process is no longer running. Return the lines from the stdout and
-        stderr Output objects.
-        """
-        out = Output(proc.stdout)
-        err = Output(proc.stderr)
-        res = []
-        while proc.returncode is None:
-            proc.poll()
-            res = select([out, err], [], [], .1)
-            for fd in res[0]:
-                fd.read()
-        for fd in res[0]:
-            fd.read(drain=1)
-
-        return out.lines, err.lines
-
-    def run(self, dryrun):
-        """
-        This is the main function that runs each individual test.
-        Determine whether or not the command requires sudo, and modify it
-        if needed. Run the command, and update the result object.
-        """
-        if dryrun is True:
-            print(self)
-            return
-
-        privcmd = self.update_cmd_privs(self.pathname, self.user)
-        try:
-            old = os.umask(0)
-            if not os.path.isdir(self.outputdir):
-                os.makedirs(self.outputdir, mode=0o777)
-            os.umask(old)
-        except OSError as e:
-            fail('%s' % e)
-
-        self.result.starttime = monotonic_time()
-        proc = Popen(privcmd, stdout=PIPE, stderr=PIPE)
-        # Allow a special timeout value of 0 to mean infinity
-        if int(self.timeout) == 0:
-            self.timeout = sys.maxsize
-        t = Timer(int(self.timeout), self.kill_cmd, [proc])
-
-        try:
-            t.start()
-            self.result.stdout, self.result.stderr = self.collect_output(proc)
-        except KeyboardInterrupt:
-            self.kill_cmd(proc, True)
-            fail('\nRun terminated at user request.')
-        finally:
-            t.cancel()
-
-        if self.reran is not False:
-            self.result.done(proc, self.killed, self.reran)
-
-    def skip(self):
-        """
-        Initialize enough of the test result that we can log a skipped
-        command.
-        """
-        Result.total += 1
-        Result.runresults['SKIP'] += 1
-        self.result.stdout = self.result.stderr = []
-        self.result.starttime = monotonic_time()
-        m, s = divmod(monotonic_time() - self.result.starttime, 60)
-        self.result.runtime = '%02d:%02d' % (m, s)
-        self.result.result = 'SKIP'
-
-    def log(self, options):
-        """
-        This function is responsible for writing all output. This includes
-        the console output, the logfile of all results (with timestamped
-        merged stdout and stderr), and for each test, the unmodified
-        stdout/stderr/merged in its own file.
-        """
-
-        logname = getpwuid(os.getuid()).pw_name
-        rer = ''
-        if self.reran is True:
-            rer = ' (RERAN)'
-        user = ' (run as %s)' % (self.user if len(self.user) else logname)
-        msga = 'Test: %s%s ' % (self.pathname, user)
-        msgb = '[%s] [%s]%s\n' % (self.result.runtime, self.result.result, rer)
-        pad = ' ' * (80 - (len(msga) + len(msgb)))
-        result_line = msga + pad + msgb
-
-        # The result line is always written to the log file. If -q was
-        # specified only failures are written to the console, otherwise
-        # the result line is written to the console.
-        write_log(bytearray(result_line, encoding='utf-8'), LOG_FILE)
-        if not options.quiet:
-            write_log(result_line, LOG_OUT)
-        elif options.quiet and self.result.result != 'PASS':
-            write_log(result_line, LOG_OUT)
-
-        lines = sorted(self.result.stdout + self.result.stderr,
-                       key=lambda x: x[0])
-
-        # Write timestamped output (stdout and stderr) to the logfile
-        for dt, line in lines:
-            timestamp = bytearray(dt.strftime("%H:%M:%S.%f ")[:11],
-                                  encoding='utf-8')
-            write_log(b'%s %s\n' % (timestamp, line), LOG_FILE)
-
-        # Write the separate stdout/stderr/merged files, if the data exists
-        if len(self.result.stdout):
-            with open(os.path.join(self.outputdir, 'stdout'), 'wb') as out:
-                for _, line in self.result.stdout:
-                    os.write(out.fileno(), b'%s\n' % line)
-        if len(self.result.stderr):
-            with open(os.path.join(self.outputdir, 'stderr'), 'wb') as err:
-                for _, line in self.result.stderr:
-                    os.write(err.fileno(), b'%s\n' % line)
-        if len(self.result.stdout) and len(self.result.stderr):
-            with open(os.path.join(self.outputdir, 'merged'), 'wb') as merged:
-                for _, line in lines:
-                    os.write(merged.fileno(), b'%s\n' % line)
-
-
-class Test(Cmd):
-    props = ['outputdir', 'timeout', 'user', 'pre', 'pre_user', 'post',
-             'post_user', 'tags']
-
-    def __init__(self, pathname, outputdir=None, timeout=None, user=None,
-                 pre=None, pre_user=None, post=None, post_user=None,
-                 tags=None):
-        super(Test, self).__init__(pathname, outputdir, timeout, user)
-        self.pre = pre or ''
-        self.pre_user = pre_user or ''
-        self.post = post or ''
-        self.post_user = post_user or ''
-        self.tags = tags or []
-
-    def __str__(self):
-        post_user = pre_user = ''
-        if len(self.pre_user):
-            pre_user = ' (as %s)' % (self.pre_user)
-        if len(self.post_user):
-            post_user = ' (as %s)' % (self.post_user)
-        return "Pathname: %s\nOutputdir: %s\nTimeout: %d\nPre: %s%s\nPost: " \
-               "%s%s\nUser: %s\nTags: %s\n" % \
-               (self.pathname, self.outputdir, self.timeout, self.pre,
-                pre_user, self.post, post_user, self.user, self.tags)
-
-    def verify(self):
-        """
-        Check the pre/post scripts, user and Test. Omit the Test from this
-        run if there are any problems.
-        """
-        files = [self.pre, self.pathname, self.post]
-        users = [self.pre_user, self.user, self.post_user]
-
-        for f in [f for f in files if len(f)]:
-            if not verify_file(f):
-                write_log("Warning: Test '%s' not added to this run because"
-                          " it failed verification.\n" % f, LOG_ERR)
-                return False
-
-        for user in [user for user in users if len(user)]:
-            if not verify_user(user):
-                write_log("Not adding Test '%s' to this run.\n" %
-                          self.pathname, LOG_ERR)
-                return False
-
-        return True
-
-    def run(self, options):
-        """
-        Create Cmd instances for the pre/post scripts. If the pre script
-        doesn't pass, skip this Test. Run the post script regardless.
-        """
-        odir = os.path.join(self.outputdir, os.path.basename(self.pre))
-        pretest = Cmd(self.pre, outputdir=odir, timeout=self.timeout,
-                      user=self.pre_user)
-        test = Cmd(self.pathname, outputdir=self.outputdir,
-                   timeout=self.timeout, user=self.user)
-        odir = os.path.join(self.outputdir, os.path.basename(self.post))
-        posttest = Cmd(self.post, outputdir=odir, timeout=self.timeout,
-                       user=self.post_user)
-
-        cont = True
-        if len(pretest.pathname):
-            pretest.run(options.dryrun)
-            cont = pretest.result.result == 'PASS'
-            pretest.log(options)
-
-        if cont:
-            test.run(options.dryrun)
-        else:
-            test.skip()
-
-        test.log(options)
-
-        if len(posttest.pathname):
-            posttest.run(options.dryrun)
-            posttest.log(options)
-
-
-class TestGroup(Test):
-    props = Test.props + ['tests']
-
-    def __init__(self, pathname, outputdir=None, timeout=None, user=None,
-                 pre=None, pre_user=None, post=None, post_user=None,
-                 tests=None, tags=None):
-        super(TestGroup, self).__init__(pathname, outputdir, timeout, user,
-                                        pre, pre_user, post, post_user, tags)
-        self.tests = tests or []
-
-    def __str__(self):
-        post_user = pre_user = ''
-        if len(self.pre_user):
-            pre_user = ' (as %s)' % (self.pre_user)
-        if len(self.post_user):
-            post_user = ' (as %s)' % (self.post_user)
-        return "Pathname: %s\nOutputdir: %s\nTests: %s\nTimeout: %s\n" \
-               "Pre: %s%s\nPost: %s%s\nUser: %s\nTags: %s\n" % \
-               (self.pathname, self.outputdir, self.tests, self.timeout,
-                self.pre, pre_user, self.post, post_user, self.user, self.tags)
-
-    def verify(self):
-        """
-        Check the pre/post scripts, user and tests in this TestGroup. Omit
-        the TestGroup entirely, or simply delete the relevant tests in the
-        group, if that's all that's required.
-        """
-        # If the pre or post scripts are relative pathnames, convert to
-        # absolute, so they stand a chance of passing verification.
-        if len(self.pre) and not os.path.isabs(self.pre):
-            self.pre = os.path.join(self.pathname, self.pre)
-        if len(self.post) and not os.path.isabs(self.post):
-            self.post = os.path.join(self.pathname, self.post)
-
-        auxfiles = [self.pre, self.post]
-        users = [self.pre_user, self.user, self.post_user]
-
-        for f in [f for f in auxfiles if len(f)]:
-            if self.pathname != os.path.dirname(f):
-                write_log("Warning: TestGroup '%s' not added to this run. "
-                          "Auxiliary script '%s' exists in a different "
-                          "directory.\n" % (self.pathname, f), LOG_ERR)
-                return False
-
-            if not verify_file(f):
-                write_log("Warning: TestGroup '%s' not added to this run. "
-                          "Auxiliary script '%s' failed verification.\n" %
-                          (self.pathname, f), LOG_ERR)
-                return False
-
-        for user in [user for user in users if len(user)]:
-            if not verify_user(user):
-                write_log("Not adding TestGroup '%s' to this run.\n" %
-                          self.pathname, LOG_ERR)
-                return False
-
-        # If one of the tests is invalid, delete it, log it, and drive on.
-        for test in self.tests:
-            if not verify_file(os.path.join(self.pathname, test)):
-                del self.tests[self.tests.index(test)]
-                write_log("Warning: Test '%s' removed from TestGroup '%s' "
-                          "because it failed verification.\n" %
-                          (test, self.pathname), LOG_ERR)
-
-        return len(self.tests) != 0
-
-    def run(self, options):
-        """
-        Create Cmd instances for the pre/post scripts. If the pre script
-        doesn't pass, skip all the tests in this TestGroup. Run the post
-        script regardless.
-        """
-        # tags assigned to this test group also include the test names
-        if options.tags and not set(self.tags).intersection(set(options.tags)):
-            return
-
-        odir = os.path.join(self.outputdir, os.path.basename(self.pre))
-        pretest = Cmd(self.pre, outputdir=odir, timeout=self.timeout,
-                      user=self.pre_user)
-        odir = os.path.join(self.outputdir, os.path.basename(self.post))
-        posttest = Cmd(self.post, outputdir=odir, timeout=self.timeout,
-                       user=self.post_user)
-
-        cont = True
-        if len(pretest.pathname):
-            pretest.run(options.dryrun)
-            cont = pretest.result.result == 'PASS'
-            pretest.log(options)
-
-        for fname in self.tests:
-            test = Cmd(os.path.join(self.pathname, fname),
-                       outputdir=os.path.join(self.outputdir, fname),
-                       timeout=self.timeout, user=self.user)
-            if cont:
-                test.run(options.dryrun)
-            else:
-                test.skip()
-
-            test.log(options)
-
-        if len(posttest.pathname):
-            posttest.run(options.dryrun)
-            posttest.log(options)
-
-
-class TestRun(object):
-    props = ['quiet', 'outputdir']
-
-    def __init__(self, options):
-        self.tests = {}
-        self.testgroups = {}
-        self.starttime = time()
-        self.timestamp = datetime.now().strftime('%Y%m%dT%H%M%S')
-        self.outputdir = os.path.join(options.outputdir, self.timestamp)
-        self.setup_logging(options)
-        self.defaults = [
-            ('outputdir', BASEDIR),
-            ('quiet', False),
-            ('timeout', 60),
-            ('user', ''),
-            ('pre', ''),
-            ('pre_user', ''),
-            ('post', ''),
-            ('post_user', ''),
-            ('tags', [])
-        ]
-
-    def __str__(self):
-        s = 'TestRun:\n    outputdir: %s\n' % self.outputdir
-        s += 'TESTS:\n'
-        for key in sorted(self.tests.keys()):
-            s += '%s%s' % (self.tests[key].__str__(), '\n')
-        s += 'TESTGROUPS:\n'
-        for key in sorted(self.testgroups.keys()):
-            s += '%s%s' % (self.testgroups[key].__str__(), '\n')
-        return s
-
-    def addtest(self, pathname, options):
-        """
-        Create a new Test, and apply any properties that were passed in
-        from the command line. If it passes verification, add it to the
-        TestRun.
-        """
-        test = Test(pathname)
-        for prop in Test.props:
-            setattr(test, prop, getattr(options, prop))
-
-        if test.verify():
-            self.tests[pathname] = test
-
-    def addtestgroup(self, dirname, filenames, options):
-        """
-        Create a new TestGroup, and apply any properties that were passed
-        in from the command line. If it passes verification, add it to the
-        TestRun.
-        """
-        if dirname not in self.testgroups:
-            testgroup = TestGroup(dirname)
-            for prop in Test.props:
-                setattr(testgroup, prop, getattr(options, prop))
-
-            # Prevent pre/post scripts from running as regular tests
-            for f in [testgroup.pre, testgroup.post]:
-                if f in filenames:
-                    del filenames[filenames.index(f)]
-
-            self.testgroups[dirname] = testgroup
-            self.testgroups[dirname].tests = sorted(filenames)
-
-            testgroup.verify()
-
-    def read(self, options):
-        """
-        Read in the specified runfile, and apply the TestRun properties
-        listed in the 'DEFAULT' section to our TestRun. Then read each
-        section, and apply the appropriate properties to the Test or
-        TestGroup. Properties from individual sections override those set
-        in the 'DEFAULT' section. If the Test or TestGroup passes
-        verification, add it to the TestRun.
-        """
-        config = configparser.RawConfigParser()
-        if not len(config.read(options.runfile)):
-            fail("Coulnd't read config file %s" % options.runfile)
-
-        for opt in TestRun.props:
-            if config.has_option('DEFAULT', opt):
-                setattr(self, opt, config.get('DEFAULT', opt))
-        self.outputdir = os.path.join(self.outputdir, self.timestamp)
-
-        for section in config.sections():
-            if 'tests' in config.options(section):
-                if os.path.isdir(section):
-                    pathname = section
-                elif os.path.isdir(os.path.join(options.testdir, section)):
-                    pathname = os.path.join(options.testdir, section)
-                else:
-                    pathname = section
-
-                testgroup = TestGroup(os.path.abspath(pathname))
-                for prop in TestGroup.props:
-                    for sect in ['DEFAULT', section]:
-                        if config.has_option(sect, prop):
-                            if prop == "tags":
-                                setattr(testgroup, prop,
-                                        eval(config.get(sect, prop)))
-                            else:
-                                setattr(testgroup, prop,
-                                        config.get(sect, prop))
-
-                # Repopulate tests using eval to convert the string to a list
-                testgroup.tests = eval(config.get(section, 'tests'))
-
-                if testgroup.verify():
-                    self.testgroups[section] = testgroup
-            else:
-                test = Test(section)
-                for prop in Test.props:
-                    for sect in ['DEFAULT', section]:
-                        if config.has_option(sect, prop):
-                            setattr(test, prop, config.get(sect, prop))
-
-                if test.verify():
-                    self.tests[section] = test
-
-    def write(self, options):
-        """
-        Create a configuration file for editing and later use. The
-        'DEFAULT' section of the config file is created from the
-        properties that were specified on the command line. Tests are
-        simply added as sections that inherit everything from the
-        'DEFAULT' section. TestGroups are the same, except they get an
-        option including all the tests to run in that directory.
-        """
-
-        defaults = dict([(prop, getattr(options, prop)) for prop, _ in
-                         self.defaults])
-        config = configparser.RawConfigParser(defaults)
-
-        for test in sorted(self.tests.keys()):
-            config.add_section(test)
-
-        for testgroup in sorted(self.testgroups.keys()):
-            config.add_section(testgroup)
-            config.set(testgroup, 'tests', self.testgroups[testgroup].tests)
-
-        try:
-            with open(options.template, 'w') as f:
-                return config.write(f)
-        except IOError:
-            fail('Could not open \'%s\' for writing.' % options.template)
-
-    def complete_outputdirs(self):
-        """
-        Collect all the pathnames for Tests, and TestGroups. Work
-        backwards one pathname component at a time, to create a unique
-        directory name in which to deposit test output. Tests will be able
-        to write output files directly in the newly modified outputdir.
-        TestGroups will be able to create one subdirectory per test in the
-        outputdir, and are guaranteed uniqueness because a group can only
-        contain files in one directory. Pre and post tests will create a
-        directory rooted at the outputdir of the Test or TestGroup in
-        question for their output.
-        """
-        done = False
-        components = 0
-        tmp_dict = dict(list(self.tests.items()) +
-                        list(self.testgroups.items()))
-        total = len(tmp_dict)
-        base = self.outputdir
-
-        while not done:
-            paths = []
-            components -= 1
-            for testfile in list(tmp_dict.keys()):
-                uniq = '/'.join(testfile.split('/')[components:]).lstrip('/')
-                if uniq not in paths:
-                    paths.append(uniq)
-                    tmp_dict[testfile].outputdir = os.path.join(base, uniq)
-                else:
-                    break
-            done = total == len(paths)
-
-    def setup_logging(self, options):
-        """
-        This function creates the output directory and gets a file object
-        for the logfile. This function must be called before write_log()
-        can be used.
-        """
-        if options.dryrun is True:
-            return
-
-        global LOG_FILE_OBJ
-        if options.cmd != 'wrconfig':
-            try:
-                old = os.umask(0)
-                os.makedirs(self.outputdir, mode=0o777)
-                os.umask(old)
-                filename = os.path.join(self.outputdir, 'log')
-                LOG_FILE_OBJ = open(filename, buffering=0, mode='wb')
-            except OSError as e:
-                fail('%s' % e)
-
-    def run(self, options):
-        """
-        Walk through all the Tests and TestGroups, calling run().
-        """
-        try:
-            os.chdir(self.outputdir)
-        except OSError:
-            fail('Could not change to directory %s' % self.outputdir)
-        # make a symlink to the output for the currently running test
-        logsymlink = os.path.join(self.outputdir, '../current')
-        if os.path.islink(logsymlink):
-            os.unlink(logsymlink)
-        if not os.path.exists(logsymlink):
-            os.symlink(self.outputdir, logsymlink)
-        else:
-            write_log('Could not make a symlink to directory %s\n' %
-                      self.outputdir, LOG_ERR)
-        iteration = 0
-        while iteration < options.iterations:
-            for test in sorted(self.tests.keys()):
-                self.tests[test].run(options)
-            for testgroup in sorted(self.testgroups.keys()):
-                self.testgroups[testgroup].run(options)
-            iteration += 1
-
-    def summary(self):
-        if Result.total == 0:
-            return 2
-
-        print('\nResults Summary')
-        for key in list(Result.runresults.keys()):
-            if Result.runresults[key] != 0:
-                print('%s\t% 4d' % (key, Result.runresults[key]))
-
-        m, s = divmod(time() - self.starttime, 60)
-        h, m = divmod(m, 60)
-        print('\nRunning Time:\t%02d:%02d:%02d' % (h, m, s))
-        print('Percent passed:\t%.1f%%' % ((float(Result.runresults['PASS']) /
-                                            float(Result.total)) * 100))
-        print('Log directory:\t%s' % self.outputdir)
-
-        if Result.runresults['FAIL'] > 0:
-            return 1
-
-        if Result.runresults['KILLED'] > 0:
-            return 1
-
-        if Result.runresults['RERAN'] > 0:
-            return 3
-
-        return 0
-
-
-def write_log(msg, target):
-    """
-    Write the provided message to standard out, standard error or
-    the logfile. If specifying LOG_FILE, then `msg` must be a bytes
-    like object. This way we can still handle output from tests that
-    may be in unexpected encodings.
-    """
-    if target == LOG_OUT:
-        os.write(sys.stdout.fileno(), bytearray(msg, encoding='utf-8'))
-    elif target == LOG_ERR:
-        os.write(sys.stderr.fileno(), bytearray(msg, encoding='utf-8'))
-    elif target == LOG_FILE:
-        os.write(LOG_FILE_OBJ.fileno(), msg)
-    else:
-        fail('log_msg called with unknown target "%s"' % target)
-
-
-def verify_file(pathname):
-    """
-    Verify that the supplied pathname is an executable regular file.
-    """
-    if os.path.isdir(pathname) or os.path.islink(pathname):
-        return False
-
-    for ext in '', '.ksh', '.sh':
-        script_path = pathname + ext
-        if os.path.isfile(script_path) and os.access(script_path, os.X_OK):
-            return True
-
-    return False
-
-
-def verify_user(user):
-    """
-    Verify that the specified user exists on this system, and can execute
-    sudo without being prompted for a password.
-    """
-    testcmd = [SUDO, '-n', '-u', user, TRUE]
-
-    if user in Cmd.verified_users:
-        return True
-
-    try:
-        getpwnam(user)
-    except KeyError:
-        write_log("Warning: user '%s' does not exist.\n" % user,
-                  LOG_ERR)
-        return False
-
-    p = Popen(testcmd)
-    p.wait()
-    if p.returncode != 0:
-        write_log("Warning: user '%s' cannot use passwordless sudo.\n" % user,
-                  LOG_ERR)
-        return False
-    else:
-        Cmd.verified_users.append(user)
-
-    return True
-
-
-def find_tests(testrun, options):
-    """
-    For the given list of pathnames, add files as Tests. For directories,
-    if do_groups is True, add the directory as a TestGroup. If False,
-    recursively search for executable files.
-    """
-
-    for p in sorted(options.pathnames):
-        if os.path.isdir(p):
-            for dirname, _, filenames in os.walk(p):
-                if options.do_groups:
-                    testrun.addtestgroup(dirname, filenames, options)
-                else:
-                    for f in sorted(filenames):
-                        testrun.addtest(os.path.join(dirname, f), options)
-        else:
-            testrun.addtest(p, options)
-
-
-def fail(retstr, ret=1):
-    print('%s: %s' % (sys.argv[0], retstr))
-    exit(ret)
-
-
-def options_cb(option, opt_str, value, parser):
-    path_options = ['runfile', 'outputdir', 'template', 'testdir']
-
-    if option.dest == 'runfile' and '-w' in parser.rargs or \
-            option.dest == 'template' and '-c' in parser.rargs:
-        fail('-c and -w are mutually exclusive.')
-
-    if opt_str in parser.rargs:
-        fail('%s may only be specified once.' % opt_str)
-
-    if option.dest == 'runfile':
-        parser.values.cmd = 'rdconfig'
-    if option.dest == 'template':
-        parser.values.cmd = 'wrconfig'
-    if option.dest == 'tags':
-        value = [x.strip() for x in value.split(',')]
-
-    setattr(parser.values, option.dest, value)
-    if option.dest in path_options:
-        setattr(parser.values, option.dest, os.path.abspath(value))
-
-
-def parse_args():
-    parser = OptionParser()
-    parser.add_option('-c', action='callback', callback=options_cb,
-                      type='string', dest='runfile', metavar='runfile',
-                      help='Specify tests to run via config file.')
-    parser.add_option('-d', action='store_true', default=False, dest='dryrun',
-                      help='Dry run. Print tests, but take no other action.')
-    parser.add_option('-g', action='store_true', default=False,
-                      dest='do_groups', help='Make directories TestGroups.')
-    parser.add_option('-o', action='callback', callback=options_cb,
-                      default=BASEDIR, dest='outputdir', type='string',
-                      metavar='outputdir', help='Specify an output directory.')
-    parser.add_option('-i', action='callback', callback=options_cb,
-                      default=TESTDIR, dest='testdir', type='string',
-                      metavar='testdir', help='Specify a test directory.')
-    parser.add_option('-p', action='callback', callback=options_cb,
-                      default='', dest='pre', metavar='script',
-                      type='string', help='Specify a pre script.')
-    parser.add_option('-P', action='callback', callback=options_cb,
-                      default='', dest='post', metavar='script',
-                      type='string', help='Specify a post script.')
-    parser.add_option('-q', action='store_true', default=False, dest='quiet',
-                      help='Silence on the console during a test run.')
-    parser.add_option('-t', action='callback', callback=options_cb, default=60,
-                      dest='timeout', metavar='seconds', type='int',
-                      help='Timeout (in seconds) for an individual test.')
-    parser.add_option('-u', action='callback', callback=options_cb,
-                      default='', dest='user', metavar='user', type='string',
-                      help='Specify a different user name to run as.')
-    parser.add_option('-w', action='callback', callback=options_cb,
-                      default=None, dest='template', metavar='template',
-                      type='string', help='Create a new config file.')
-    parser.add_option('-x', action='callback', callback=options_cb, default='',
-                      dest='pre_user', metavar='pre_user', type='string',
-                      help='Specify a user to execute the pre script.')
-    parser.add_option('-X', action='callback', callback=options_cb, default='',
-                      dest='post_user', metavar='post_user', type='string',
-                      help='Specify a user to execute the post script.')
-    parser.add_option('-T', action='callback', callback=options_cb, default='',
-                      dest='tags', metavar='tags', type='string',
-                      help='Specify tags to execute specific test groups.')
-    parser.add_option('-I', action='callback', callback=options_cb, default=1,
-                      dest='iterations', metavar='iterations', type='int',
-                      help='Number of times to run the test run.')
-    (options, pathnames) = parser.parse_args()
-
-    if not options.runfile and not options.template:
-        options.cmd = 'runtests'
-
-    if options.runfile and len(pathnames):
-        fail('Extraneous arguments.')
-
-    options.pathnames = [os.path.abspath(path) for path in pathnames]
-
-    return options
-
-
-def main():
-    options = parse_args()
-    testrun = TestRun(options)
-
-    if options.cmd == 'runtests':
-        find_tests(testrun, options)
-    elif options.cmd == 'rdconfig':
-        testrun.read(options)
-    elif options.cmd == 'wrconfig':
-        find_tests(testrun, options)
-        testrun.write(options)
-        exit(0)
-    else:
-        fail('Unknown command specified')
-
-    testrun.complete_outputdirs()
-    testrun.run(options)
-    exit(testrun.summary())
-
-
-if __name__ == '__main__':
-    main()

diff --git a/zfs/tests/test-runner/bin/test-runner.py.in b/zfs/tests/test-runner/bin/test-runner.py.in
new file mode 100755
index 0000000..5c868d9
--- /dev/null
+++ b/zfs/tests/test-runner/bin/test-runner.py.in

@@ -0,0 +1,1150 @@
+#!/usr/bin/env @PYTHON_SHEBANG@
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+# Copyright (c) 2019 Datto Inc.
+#
+# This script must remain compatible with Python 3.6+.
+#
+
+import os
+import sys
+import ctypes
+import re
+import configparser
+
+from datetime import datetime
+from optparse import OptionParser
+from pwd import getpwnam
+from pwd import getpwuid
+from select import select
+from subprocess import PIPE
+from subprocess import Popen
+from subprocess import check_output
+from threading import Timer
+from time import time, CLOCK_MONOTONIC
+from os.path import exists
+
+BASEDIR = '/var/tmp/test_results'
+TESTDIR = '/usr/share/zfs/'
+KMEMLEAK_FILE = '/sys/kernel/debug/kmemleak'
+KILL = 'kill'
+TRUE = 'true'
+SUDO = 'sudo'
+LOG_FILE = 'LOG_FILE'
+LOG_OUT = 'LOG_OUT'
+LOG_ERR = 'LOG_ERR'
+LOG_FILE_OBJ = None
+
+
+class timespec(ctypes.Structure):
+    _fields_ = [
+        ('tv_sec', ctypes.c_long),
+        ('tv_nsec', ctypes.c_long)
+    ]
+
+
+librt = ctypes.CDLL('librt.so.1', use_errno=True)
+clock_gettime = librt.clock_gettime
+clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
+
+
+def monotonic_time():
+    t = timespec()
+    if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0:
+        errno_ = ctypes.get_errno()
+        raise OSError(errno_, os.strerror(errno_))
+    return t.tv_sec + t.tv_nsec * 1e-9
+
+
+class Result(object):
+    total = 0
+    runresults = {'PASS': 0, 'FAIL': 0, 'SKIP': 0, 'KILLED': 0, 'RERAN': 0}
+
+    def __init__(self):
+        self.starttime = None
+        self.returncode = None
+        self.runtime = ''
+        self.stdout = []
+        self.stderr = []
+        self.kmemleak = ''
+        self.result = ''
+
+    def done(self, proc, killed, reran):
+        """
+        Finalize the results of this Cmd.
+        """
+        Result.total += 1
+        m, s = divmod(monotonic_time() - self.starttime, 60)
+        self.runtime = '%02d:%02d' % (m, s)
+        self.returncode = proc.returncode
+        if reran is True:
+            Result.runresults['RERAN'] += 1
+        if killed:
+            self.result = 'KILLED'
+            Result.runresults['KILLED'] += 1
+        elif len(self.kmemleak) > 0:
+            self.result = 'FAIL'
+            Result.runresults['FAIL'] += 1
+        elif self.returncode == 0:
+            self.result = 'PASS'
+            Result.runresults['PASS'] += 1
+        elif self.returncode == 4:
+            self.result = 'SKIP'
+            Result.runresults['SKIP'] += 1
+        elif self.returncode != 0:
+            self.result = 'FAIL'
+            Result.runresults['FAIL'] += 1
+
+
+class Output(object):
+    """
+    This class is a slightly modified version of the 'Stream' class found
+    here: http://goo.gl/aSGfv
+    """
+    def __init__(self, stream):
+        self.stream = stream
+        self._buf = b''
+        self.lines = []
+
+    def fileno(self):
+        return self.stream.fileno()
+
+    def read(self, drain=0):
+        """
+        Read from the file descriptor. If 'drain' set, read until EOF.
+        """
+        while self._read() is not None:
+            if not drain:
+                break
+
+    def _read(self):
+        """
+        Read up to 4k of data from this output stream. Collect the output
+        up to the last newline, and append it to any leftover data from a
+        previous call. The lines are stored as a (timestamp, data) tuple
+        for easy sorting/merging later.
+        """
+        fd = self.fileno()
+        buf = os.read(fd, 4096)
+        if not buf:
+            return None
+        if b'\n' not in buf:
+            self._buf += buf
+            return []
+
+        buf = self._buf + buf
+        tmp, rest = buf.rsplit(b'\n', 1)
+        self._buf = rest
+        now = datetime.now()
+        rows = tmp.split(b'\n')
+        self.lines += [(now, r) for r in rows]
+
+
+class Cmd(object):
+    verified_users = []
+
+    def __init__(self, pathname, identifier=None, outputdir=None,
+                 timeout=None, user=None, tags=None):
+        self.pathname = pathname
+        self.identifier = identifier
+        self.outputdir = outputdir or 'BASEDIR'
+        """
+        The timeout for tests is measured in wall-clock time
+        """
+        self.timeout = timeout
+        self.user = user or ''
+        self.killed = False
+        self.reran = None
+        self.result = Result()
+
+        if self.timeout is None:
+            self.timeout = 60
+
+    def __str__(self):
+        return '''\
+Pathname: %s
+Identifier: %s
+Outputdir: %s
+Timeout: %d
+User: %s
+''' % (self.pathname, self.identifier, self.outputdir, self.timeout, self.user)
+
+    def kill_cmd(self, proc, keyboard_interrupt=False):
+        """
+        Kill a running command due to timeout, or ^C from the keyboard. If
+        sudo is required, this user was verified previously.
+        """
+        self.killed = True
+        do_sudo = len(self.user) != 0
+        signal = '-TERM'
+
+        cmd = [SUDO, KILL, signal, str(proc.pid)]
+        if not do_sudo:
+            del cmd[0]
+
+        try:
+            kp = Popen(cmd)
+            kp.wait()
+        except Exception:
+            pass
+
+        """
+        If this is not a user-initiated kill and the test has not been
+        reran before we consider if the test needs to be reran:
+        If the test has spent some time hibernating and didn't run the whole
+        length of time before being timed out we will rerun the test.
+        """
+        if keyboard_interrupt is False and self.reran is None:
+            runtime = monotonic_time() - self.result.starttime
+            if int(self.timeout) > runtime:
+                self.killed = False
+                self.reran = False
+                self.run(False)
+                self.reran = True
+
+    def update_cmd_privs(self, cmd, user):
+        """
+        If a user has been specified to run this Cmd and we're not already
+        running as that user, prepend the appropriate sudo command to run
+        as that user.
+        """
+        me = getpwuid(os.getuid())
+
+        if not user or user is me:
+            if os.path.isfile(cmd+'.ksh') and os.access(cmd+'.ksh', os.X_OK):
+                cmd += '.ksh'
+            if os.path.isfile(cmd+'.sh') and os.access(cmd+'.sh', os.X_OK):
+                cmd += '.sh'
+            return cmd
+
+        if not os.path.isfile(cmd):
+            if os.path.isfile(cmd+'.ksh') and os.access(cmd+'.ksh', os.X_OK):
+                cmd += '.ksh'
+            if os.path.isfile(cmd+'.sh') and os.access(cmd+'.sh', os.X_OK):
+                cmd += '.sh'
+
+        ret = '%s -E -u %s %s' % (SUDO, user, cmd)
+        return ret.split(' ')
+
+    def collect_output(self, proc):
+        """
+        Read from stdout/stderr as data becomes available, until the
+        process is no longer running. Return the lines from the stdout and
+        stderr Output objects.
+        """
+        out = Output(proc.stdout)
+        err = Output(proc.stderr)
+        res = []
+        while proc.returncode is None:
+            proc.poll()
+            res = select([out, err], [], [], .1)
+            for fd in res[0]:
+                fd.read()
+        for fd in res[0]:
+            fd.read(drain=1)
+
+        return out.lines, err.lines
+
+    def run(self, dryrun, kmemleak, kmsg):
+        """
+        This is the main function that runs each individual test.
+        Determine whether or not the command requires sudo, and modify it
+        if needed. Run the command, and update the result object.
+        """
+        if dryrun is True:
+            print(self)
+            return
+
+        privcmd = self.update_cmd_privs(self.pathname, self.user)
+        try:
+            old = os.umask(0)
+            if not os.path.isdir(self.outputdir):
+                os.makedirs(self.outputdir, mode=0o777)
+            os.umask(old)
+        except OSError as e:
+            fail('%s' % e)
+
+        """
+        Log each test we run to /dev/kmsg (on Linux), so if there's a kernel
+        warning we'll be able to match it up to a particular test.
+        """
+        if kmsg is True and exists("/dev/kmsg"):
+            try:
+                kp = Popen([SUDO, "sh", "-c",
+                            f"echo ZTS run {self.pathname} > /dev/kmsg"])
+                kp.wait()
+            except Exception:
+                pass
+
+        self.result.starttime = monotonic_time()
+
+        if kmemleak:
+            cmd = f'echo clear | {SUDO} tee {KMEMLEAK_FILE}'
+            check_output(cmd, shell=True)
+
+        proc = Popen(privcmd, stdout=PIPE, stderr=PIPE)
+        # Allow a special timeout value of 0 to mean infinity
+        if int(self.timeout) == 0:
+            self.timeout = sys.maxsize
+        t = Timer(int(self.timeout), self.kill_cmd, [proc])
+
+        try:
+            t.start()
+            self.result.stdout, self.result.stderr = self.collect_output(proc)
+
+            if kmemleak:
+                cmd = f'echo scan | {SUDO} tee {KMEMLEAK_FILE}'
+                check_output(cmd, shell=True)
+                cmd = f'{SUDO} cat {KMEMLEAK_FILE}'
+                self.result.kmemleak = check_output(cmd, shell=True)
+        except KeyboardInterrupt:
+            self.kill_cmd(proc, True)
+            fail('\nRun terminated at user request.')
+        finally:
+            t.cancel()
+
+        if self.reran is not False:
+            self.result.done(proc, self.killed, self.reran)
+
+    def skip(self):
+        """
+        Initialize enough of the test result that we can log a skipped
+        command.
+        """
+        Result.total += 1
+        Result.runresults['SKIP'] += 1
+        self.result.stdout = self.result.stderr = []
+        self.result.starttime = monotonic_time()
+        m, s = divmod(monotonic_time() - self.result.starttime, 60)
+        self.result.runtime = '%02d:%02d' % (m, s)
+        self.result.result = 'SKIP'
+
+    def log(self, options, suppress_console=False):
+        """
+        This function is responsible for writing all output. This includes
+        the console output, the logfile of all results (with timestamped
+        merged stdout and stderr), and for each test, the unmodified
+        stdout/stderr/merged in its own file.
+        """
+
+        logname = getpwuid(os.getuid()).pw_name
+        rer = ''
+        if self.reran is True:
+            rer = ' (RERAN)'
+        user = ' (run as %s)' % (self.user if len(self.user) else logname)
+        if self.identifier:
+            msga = 'Test (%s): %s%s ' % (self.identifier, self.pathname, user)
+        else:
+            msga = 'Test: %s%s ' % (self.pathname, user)
+        msgb = '[%s] [%s]%s\n' % (self.result.runtime, self.result.result, rer)
+        pad = ' ' * (80 - (len(msga) + len(msgb)))
+        result_line = msga + pad + msgb
+
+        # The result line is always written to the log file. If -q was
+        # specified only failures are written to the console, otherwise
+        # the result line is written to the console. The console output
+        # may be suppressed by calling log() with suppress_console=True.
+        write_log(bytearray(result_line, encoding='utf-8'), LOG_FILE)
+        if not suppress_console:
+            if not options.quiet:
+                write_log(result_line, LOG_OUT)
+            elif options.quiet and self.result.result != 'PASS':
+                write_log(result_line, LOG_OUT)
+
+        lines = sorted(self.result.stdout + self.result.stderr,
+                       key=lambda x: x[0])
+
+        # Write timestamped output (stdout and stderr) to the logfile
+        for dt, line in lines:
+            timestamp = bytearray(dt.strftime("%H:%M:%S.%f ")[:11],
+                                  encoding='utf-8')
+            write_log(b'%s %s\n' % (timestamp, line), LOG_FILE)
+
+        # Write the separate stdout/stderr/merged files, if the data exists
+        if len(self.result.stdout):
+            with open(os.path.join(self.outputdir, 'stdout'), 'wb') as out:
+                for _, line in self.result.stdout:
+                    os.write(out.fileno(), b'%s\n' % line)
+        if len(self.result.stderr):
+            with open(os.path.join(self.outputdir, 'stderr'), 'wb') as err:
+                for _, line in self.result.stderr:
+                    os.write(err.fileno(), b'%s\n' % line)
+        if len(self.result.stdout) and len(self.result.stderr):
+            with open(os.path.join(self.outputdir, 'merged'), 'wb') as merged:
+                for _, line in lines:
+                    os.write(merged.fileno(), b'%s\n' % line)
+        if len(self.result.kmemleak):
+            with open(os.path.join(self.outputdir, 'kmemleak'), 'wb') as kmem:
+                kmem.write(self.result.kmemleak)
+
+
+class Test(Cmd):
+    props = ['outputdir', 'timeout', 'user', 'pre', 'pre_user', 'post',
+             'post_user', 'failsafe', 'failsafe_user', 'tags']
+
+    def __init__(self, pathname,
+                 pre=None, pre_user=None, post=None, post_user=None,
+                 failsafe=None, failsafe_user=None, tags=None, **kwargs):
+        super(Test, self).__init__(pathname, **kwargs)
+        self.pre = pre or ''
+        self.pre_user = pre_user or ''
+        self.post = post or ''
+        self.post_user = post_user or ''
+        self.failsafe = failsafe or ''
+        self.failsafe_user = failsafe_user or ''
+        self.tags = tags or []
+
+    def __str__(self):
+        post_user = pre_user = failsafe_user = ''
+        if len(self.pre_user):
+            pre_user = ' (as %s)' % (self.pre_user)
+        if len(self.post_user):
+            post_user = ' (as %s)' % (self.post_user)
+        if len(self.failsafe_user):
+            failsafe_user = ' (as %s)' % (self.failsafe_user)
+        return '''\
+Pathname: %s
+Identifier: %s
+Outputdir: %s
+Timeout: %d
+User: %s
+Pre: %s%s
+Post: %s%s
+Failsafe: %s%s
+Tags: %s
+''' % (self.pathname, self.identifier, self.outputdir, self.timeout, self.user,
+            self.pre, pre_user, self.post, post_user, self.failsafe,
+            failsafe_user, self.tags)
+
+    def verify(self):
+        """
+        Check the pre/post/failsafe scripts, user and Test. Omit the Test from
+        this run if there are any problems.
+        """
+        files = [self.pre, self.pathname, self.post, self.failsafe]
+        users = [self.pre_user, self.user, self.post_user, self.failsafe_user]
+
+        for f in [f for f in files if len(f)]:
+            if not verify_file(f):
+                write_log("Warning: Test '%s' not added to this run because"
+                          " it failed verification.\n" % f, LOG_ERR)
+                return False
+
+        for user in [user for user in users if len(user)]:
+            if not verify_user(user):
+                write_log("Not adding Test '%s' to this run.\n" %
+                          self.pathname, LOG_ERR)
+                return False
+
+        return True
+
+    def run(self, options):
+        """
+        Create Cmd instances for the pre/post/failsafe scripts. If the pre
+        script doesn't pass, skip this Test. Run the post script regardless.
+        If the Test is killed, also run the failsafe script.
+        """
+        odir = os.path.join(self.outputdir, os.path.basename(self.pre))
+        pretest = Cmd(self.pre, identifier=self.identifier, outputdir=odir,
+                      timeout=self.timeout, user=self.pre_user)
+        test = Cmd(self.pathname, identifier=self.identifier,
+                   outputdir=self.outputdir, timeout=self.timeout,
+                   user=self.user)
+        odir = os.path.join(self.outputdir, os.path.basename(self.failsafe))
+        failsafe = Cmd(self.failsafe, identifier=self.identifier,
+                       outputdir=odir, timeout=self.timeout,
+                       user=self.failsafe_user)
+        odir = os.path.join(self.outputdir, os.path.basename(self.post))
+        posttest = Cmd(self.post, identifier=self.identifier, outputdir=odir,
+                       timeout=self.timeout, user=self.post_user)
+
+        cont = True
+        if len(pretest.pathname):
+            pretest.run(options.dryrun, False, options.kmsg)
+            cont = pretest.result.result == 'PASS'
+            pretest.log(options)
+
+        if cont:
+            test.run(options.dryrun, options.kmemleak, options.kmsg)
+            if test.result.result == 'KILLED' and len(failsafe.pathname):
+                failsafe.run(options.dryrun, False, options.kmsg)
+                failsafe.log(options, suppress_console=True)
+        else:
+            test.skip()
+
+        test.log(options)
+
+        if len(posttest.pathname):
+            posttest.run(options.dryrun, False, options.kmsg)
+            posttest.log(options)
+
+
+class TestGroup(Test):
+    props = Test.props + ['tests']
+
+    def __init__(self, pathname, tests=None, **kwargs):
+        super(TestGroup, self).__init__(pathname, **kwargs)
+        self.tests = tests or []
+
+    def __str__(self):
+        post_user = pre_user = failsafe_user = ''
+        if len(self.pre_user):
+            pre_user = ' (as %s)' % (self.pre_user)
+        if len(self.post_user):
+            post_user = ' (as %s)' % (self.post_user)
+        if len(self.failsafe_user):
+            failsafe_user = ' (as %s)' % (self.failsafe_user)
+        return '''\
+Pathname: %s
+Identifier: %s
+Outputdir: %s
+Tests: %s
+Timeout: %s
+User: %s
+Pre: %s%s
+Post: %s%s
+Failsafe: %s%s
+Tags: %s
+''' % (self.pathname, self.identifier, self.outputdir, self.tests,
+            self.timeout, self.user, self.pre, pre_user, self.post, post_user,
+            self.failsafe, failsafe_user, self.tags)
+
+    def filter(self, keeplist):
+        self.tests = [x for x in self.tests if x in keeplist]
+
+    def verify(self):
+        """
+        Check the pre/post/failsafe scripts, user and tests in this TestGroup.
+        Omit the TestGroup entirely, or simply delete the relevant tests in the
+        group, if that's all that's required.
+        """
+        # If the pre/post/failsafe scripts are relative pathnames, convert to
+        # absolute, so they stand a chance of passing verification.
+        if len(self.pre) and not os.path.isabs(self.pre):
+            self.pre = os.path.join(self.pathname, self.pre)
+        if len(self.post) and not os.path.isabs(self.post):
+            self.post = os.path.join(self.pathname, self.post)
+        if len(self.failsafe) and not os.path.isabs(self.failsafe):
+            self.post = os.path.join(self.pathname, self.post)
+
+        auxfiles = [self.pre, self.post, self.failsafe]
+        users = [self.pre_user, self.user, self.post_user, self.failsafe_user]
+
+        for f in [f for f in auxfiles if len(f)]:
+            if f != self.failsafe and self.pathname != os.path.dirname(f):
+                write_log("Warning: TestGroup '%s' not added to this run. "
+                          "Auxiliary script '%s' exists in a different "
+                          "directory.\n" % (self.pathname, f), LOG_ERR)
+                return False
+
+            if not verify_file(f):
+                write_log("Warning: TestGroup '%s' not added to this run. "
+                          "Auxiliary script '%s' failed verification.\n" %
+                          (self.pathname, f), LOG_ERR)
+                return False
+
+        for user in [user for user in users if len(user)]:
+            if not verify_user(user):
+                write_log("Not adding TestGroup '%s' to this run.\n" %
+                          self.pathname, LOG_ERR)
+                return False
+
+        # If one of the tests is invalid, delete it, log it, and drive on.
+        for test in self.tests:
+            if not verify_file(os.path.join(self.pathname, test)):
+                del self.tests[self.tests.index(test)]
+                write_log("Warning: Test '%s' removed from TestGroup '%s' "
+                          "because it failed verification.\n" %
+                          (test, self.pathname), LOG_ERR)
+
+        return len(self.tests) != 0
+
+    def run(self, options):
+        """
+        Create Cmd instances for the pre/post/failsafe scripts. If the pre
+        script doesn't pass, skip all the tests in this TestGroup. Run the
+        post script regardless. Run the failsafe script when a test is killed.
+        """
+        # tags assigned to this test group also include the test names
+        if options.tags and not set(self.tags).intersection(set(options.tags)):
+            return
+
+        odir = os.path.join(self.outputdir, os.path.basename(self.pre))
+        pretest = Cmd(self.pre, outputdir=odir, timeout=self.timeout,
+                      user=self.pre_user, identifier=self.identifier)
+        odir = os.path.join(self.outputdir, os.path.basename(self.post))
+        posttest = Cmd(self.post, outputdir=odir, timeout=self.timeout,
+                       user=self.post_user, identifier=self.identifier)
+
+        cont = True
+        if len(pretest.pathname):
+            pretest.run(options.dryrun, False, options.kmsg)
+            cont = pretest.result.result == 'PASS'
+            pretest.log(options)
+
+        for fname in self.tests:
+            odir = os.path.join(self.outputdir, fname)
+            test = Cmd(os.path.join(self.pathname, fname), outputdir=odir,
+                       timeout=self.timeout, user=self.user,
+                       identifier=self.identifier)
+            odir = os.path.join(odir, os.path.basename(self.failsafe))
+            failsafe = Cmd(self.failsafe, outputdir=odir, timeout=self.timeout,
+                           user=self.failsafe_user, identifier=self.identifier)
+            if cont:
+                test.run(options.dryrun, options.kmemleak, options.kmsg)
+                if test.result.result == 'KILLED' and len(failsafe.pathname):
+                    failsafe.run(options.dryrun, False, options.kmsg)
+                    failsafe.log(options, suppress_console=True)
+            else:
+                test.skip()
+
+            test.log(options)
+
+        if len(posttest.pathname):
+            posttest.run(options.dryrun, False, options.kmsg)
+            posttest.log(options)
+
+
+class TestRun(object):
+    props = ['quiet', 'outputdir']
+
+    def __init__(self, options):
+        self.tests = {}
+        self.testgroups = {}
+        self.starttime = time()
+        self.timestamp = datetime.now().strftime('%Y%m%dT%H%M%S')
+        self.outputdir = os.path.join(options.outputdir, self.timestamp)
+        self.setup_logging(options)
+        self.defaults = [
+            ('outputdir', BASEDIR),
+            ('quiet', False),
+            ('timeout', 60),
+            ('user', ''),
+            ('pre', ''),
+            ('pre_user', ''),
+            ('post', ''),
+            ('post_user', ''),
+            ('failsafe', ''),
+            ('failsafe_user', ''),
+            ('tags', [])
+        ]
+
+    def __str__(self):
+        s = 'TestRun:\n    outputdir: %s\n' % self.outputdir
+        s += 'TESTS:\n'
+        for key in sorted(self.tests.keys()):
+            s += '%s%s' % (self.tests[key].__str__(), '\n')
+        s += 'TESTGROUPS:\n'
+        for key in sorted(self.testgroups.keys()):
+            s += '%s%s' % (self.testgroups[key].__str__(), '\n')
+        return s
+
+    def addtest(self, pathname, options):
+        """
+        Create a new Test, and apply any properties that were passed in
+        from the command line. If it passes verification, add it to the
+        TestRun.
+        """
+        test = Test(pathname)
+        for prop in Test.props:
+            setattr(test, prop, getattr(options, prop))
+
+        if test.verify():
+            self.tests[pathname] = test
+
+    def addtestgroup(self, dirname, filenames, options):
+        """
+        Create a new TestGroup, and apply any properties that were passed
+        in from the command line. If it passes verification, add it to the
+        TestRun.
+        """
+        if dirname not in self.testgroups:
+            testgroup = TestGroup(dirname)
+            for prop in Test.props:
+                setattr(testgroup, prop, getattr(options, prop))
+
+            # Prevent pre/post/failsafe scripts from running as regular tests
+            for f in [testgroup.pre, testgroup.post, testgroup.failsafe]:
+                if f in filenames:
+                    del filenames[filenames.index(f)]
+
+            self.testgroups[dirname] = testgroup
+            self.testgroups[dirname].tests = sorted(filenames)
+
+            testgroup.verify()
+
+    def filter(self, keeplist):
+        for group in list(self.testgroups.keys()):
+            if group not in keeplist:
+                del self.testgroups[group]
+                continue
+
+            g = self.testgroups[group]
+
+            if g.pre and os.path.basename(g.pre) in keeplist[group]:
+                continue
+
+            g.filter(keeplist[group])
+
+        for test in list(self.tests.keys()):
+            directory, base = os.path.split(test)
+            if directory not in keeplist or base not in keeplist[directory]:
+                del self.tests[test]
+
+    def read(self, options):
+        """
+        Read in the specified runfiles, and apply the TestRun properties
+        listed in the 'DEFAULT' section to our TestRun. Then read each
+        section, and apply the appropriate properties to the Test or
+        TestGroup. Properties from individual sections override those set
+        in the 'DEFAULT' section. If the Test or TestGroup passes
+        verification, add it to the TestRun.
+        """
+        config = configparser.RawConfigParser()
+        parsed = config.read(options.runfiles)
+        failed = options.runfiles - set(parsed)
+        if len(failed):
+            files = ' '.join(sorted(failed))
+            fail("Couldn't read config files: %s" % files)
+
+        for opt in TestRun.props:
+            if config.has_option('DEFAULT', opt):
+                setattr(self, opt, config.get('DEFAULT', opt))
+        self.outputdir = os.path.join(self.outputdir, self.timestamp)
+
+        testdir = options.testdir
+
+        for section in config.sections():
+            if 'tests' in config.options(section):
+                parts = section.split(':', 1)
+                sectiondir = parts[0]
+                identifier = parts[1] if len(parts) == 2 else None
+                if os.path.isdir(sectiondir):
+                    pathname = sectiondir
+                elif os.path.isdir(os.path.join(testdir, sectiondir)):
+                    pathname = os.path.join(testdir, sectiondir)
+                else:
+                    pathname = sectiondir
+
+                testgroup = TestGroup(os.path.abspath(pathname),
+                                      identifier=identifier)
+                for prop in TestGroup.props:
+                    for sect in ['DEFAULT', section]:
+                        if config.has_option(sect, prop):
+                            if prop == 'tags':
+                                setattr(testgroup, prop,
+                                        eval(config.get(sect, prop)))
+                            elif prop == 'failsafe':
+                                failsafe = config.get(sect, prop)
+                                setattr(testgroup, prop,
+                                        os.path.join(testdir, failsafe))
+                            else:
+                                setattr(testgroup, prop,
+                                        config.get(sect, prop))
+
+                # Repopulate tests using eval to convert the string to a list
+                testgroup.tests = eval(config.get(section, 'tests'))
+
+                if testgroup.verify():
+                    self.testgroups[section] = testgroup
+            else:
+                test = Test(section)
+                for prop in Test.props:
+                    for sect in ['DEFAULT', section]:
+                        if config.has_option(sect, prop):
+                            if prop == 'failsafe':
+                                failsafe = config.get(sect, prop)
+                                setattr(test, prop,
+                                        os.path.join(testdir, failsafe))
+                            else:
+                                setattr(test, prop, config.get(sect, prop))
+
+                if test.verify():
+                    self.tests[section] = test
+
+    def write(self, options):
+        """
+        Create a configuration file for editing and later use. The
+        'DEFAULT' section of the config file is created from the
+        properties that were specified on the command line. Tests are
+        simply added as sections that inherit everything from the
+        'DEFAULT' section. TestGroups are the same, except they get an
+        option including all the tests to run in that directory.
+        """
+
+        defaults = dict([(prop, getattr(options, prop)) for prop, _ in
+                         self.defaults])
+        config = configparser.RawConfigParser(defaults)
+
+        for test in sorted(self.tests.keys()):
+            config.add_section(test)
+            for prop in Test.props:
+                if prop not in self.props:
+                    config.set(test, prop,
+                               getattr(self.tests[test], prop))
+
+        for testgroup in sorted(self.testgroups.keys()):
+            config.add_section(testgroup)
+            config.set(testgroup, 'tests', self.testgroups[testgroup].tests)
+            for prop in TestGroup.props:
+                if prop not in self.props:
+                    config.set(testgroup, prop,
+                               getattr(self.testgroups[testgroup], prop))
+
+        try:
+            with open(options.template, 'w') as f:
+                return config.write(f)
+        except IOError:
+            fail('Could not open \'%s\' for writing.' % options.template)
+
+    def complete_outputdirs(self):
+        """
+        Collect all the pathnames for Tests, and TestGroups. Work
+        backwards one pathname component at a time, to create a unique
+        directory name in which to deposit test output. Tests will be able
+        to write output files directly in the newly modified outputdir.
+        TestGroups will be able to create one subdirectory per test in the
+        outputdir, and are guaranteed uniqueness because a group can only
+        contain files in one directory. Pre and post tests will create a
+        directory rooted at the outputdir of the Test or TestGroup in
+        question for their output. Failsafe scripts will create a directory
+        rooted at the outputdir of each Test for their output.
+        """
+        done = False
+        components = 0
+        tmp_dict = dict(list(self.tests.items()) +
+                        list(self.testgroups.items()))
+        total = len(tmp_dict)
+        base = self.outputdir
+
+        while not done:
+            paths = []
+            components -= 1
+            for testfile in list(tmp_dict.keys()):
+                uniq = '/'.join(testfile.split('/')[components:]).lstrip('/')
+                if uniq not in paths:
+                    paths.append(uniq)
+                    tmp_dict[testfile].outputdir = os.path.join(base, uniq)
+                else:
+                    break
+            done = total == len(paths)
+
+    def setup_logging(self, options):
+        """
+        This function creates the output directory and gets a file object
+        for the logfile. This function must be called before write_log()
+        can be used.
+        """
+        if options.dryrun is True:
+            return
+
+        global LOG_FILE_OBJ
+        if not options.template:
+            try:
+                old = os.umask(0)
+                os.makedirs(self.outputdir, mode=0o777)
+                os.umask(old)
+                filename = os.path.join(self.outputdir, 'log')
+                LOG_FILE_OBJ = open(filename, buffering=0, mode='wb')
+            except OSError as e:
+                fail('%s' % e)
+
+    def run(self, options):
+        """
+        Walk through all the Tests and TestGroups, calling run().
+        """
+        try:
+            os.chdir(self.outputdir)
+        except OSError:
+            fail('Could not change to directory %s' % self.outputdir)
+        # make a symlink to the output for the currently running test
+        logsymlink = os.path.join(self.outputdir, '../current')
+        if os.path.islink(logsymlink):
+            os.unlink(logsymlink)
+        if not os.path.exists(logsymlink):
+            os.symlink(self.outputdir, logsymlink)
+        else:
+            write_log('Could not make a symlink to directory %s\n' %
+                      self.outputdir, LOG_ERR)
+
+        if options.kmemleak:
+            cmd = f'echo scan=0 | {SUDO} tee {KMEMLEAK_FILE}'
+            check_output(cmd, shell=True)
+
+        iteration = 0
+        while iteration < options.iterations:
+            for test in sorted(self.tests.keys()):
+                self.tests[test].run(options)
+            for testgroup in sorted(self.testgroups.keys()):
+                self.testgroups[testgroup].run(options)
+            iteration += 1
+
+    def summary(self):
+        if Result.total == 0:
+            return 2
+
+        print('\nResults Summary')
+        for key in list(Result.runresults.keys()):
+            if Result.runresults[key] != 0:
+                print('%s\t% 4d' % (key, Result.runresults[key]))
+
+        m, s = divmod(time() - self.starttime, 60)
+        h, m = divmod(m, 60)
+        print('\nRunning Time:\t%02d:%02d:%02d' % (h, m, s))
+        print('Percent passed:\t%.1f%%' % ((float(Result.runresults['PASS']) /
+                                            float(Result.total)) * 100))
+        print('Log directory:\t%s' % self.outputdir)
+
+        if Result.runresults['FAIL'] > 0:
+            return 1
+
+        if Result.runresults['KILLED'] > 0:
+            return 1
+
+        if Result.runresults['RERAN'] > 0:
+            return 3
+
+        return 0
+
+
+def write_log(msg, target):
+    """
+    Write the provided message to standard out, standard error or
+    the logfile. If specifying LOG_FILE, then `msg` must be a bytes
+    like object. This way we can still handle output from tests that
+    may be in unexpected encodings.
+    """
+    if target == LOG_OUT:
+        os.write(sys.stdout.fileno(), bytearray(msg, encoding='utf-8'))
+    elif target == LOG_ERR:
+        os.write(sys.stderr.fileno(), bytearray(msg, encoding='utf-8'))
+    elif target == LOG_FILE:
+        os.write(LOG_FILE_OBJ.fileno(), msg)
+    else:
+        fail('log_msg called with unknown target "%s"' % target)
+
+
+def verify_file(pathname):
+    """
+    Verify that the supplied pathname is an executable regular file.
+    """
+    if os.path.isdir(pathname) or os.path.islink(pathname):
+        return False
+
+    for ext in '', '.ksh', '.sh':
+        script_path = pathname + ext
+        if os.path.isfile(script_path) and os.access(script_path, os.X_OK):
+            return True
+
+    return False
+
+
+def verify_user(user):
+    """
+    Verify that the specified user exists on this system, and can execute
+    sudo without being prompted for a password.
+    """
+    testcmd = [SUDO, '-n', '-u', user, TRUE]
+
+    if user in Cmd.verified_users:
+        return True
+
+    try:
+        getpwnam(user)
+    except KeyError:
+        write_log("Warning: user '%s' does not exist.\n" % user,
+                  LOG_ERR)
+        return False
+
+    p = Popen(testcmd)
+    p.wait()
+    if p.returncode != 0:
+        write_log("Warning: user '%s' cannot use passwordless sudo.\n" % user,
+                  LOG_ERR)
+        return False
+    else:
+        Cmd.verified_users.append(user)
+
+    return True
+
+
+def find_tests(testrun, options):
+    """
+    For the given list of pathnames, add files as Tests. For directories,
+    if do_groups is True, add the directory as a TestGroup. If False,
+    recursively search for executable files.
+    """
+
+    for p in sorted(options.pathnames):
+        if os.path.isdir(p):
+            for dirname, _, filenames in os.walk(p):
+                if options.do_groups:
+                    testrun.addtestgroup(dirname, filenames, options)
+                else:
+                    for f in sorted(filenames):
+                        testrun.addtest(os.path.join(dirname, f), options)
+        else:
+            testrun.addtest(p, options)
+
+
+def filter_tests(testrun, options):
+    try:
+        fh = open(options.logfile, "r")
+    except Exception as e:
+        fail('%s' % e)
+
+    failed = {}
+    while True:
+        line = fh.readline()
+        if not line:
+            break
+        m = re.match(r'Test: .*(tests/.*)/(\S+).*\[FAIL\]', line)
+        if not m:
+            continue
+        group, test = m.group(1, 2)
+        try:
+            failed[group].append(test)
+        except KeyError:
+            failed[group] = [test]
+    fh.close()
+
+    testrun.filter(failed)
+
+
+def fail(retstr, ret=1):
+    print('%s: %s' % (sys.argv[0], retstr))
+    exit(ret)
+
+
+def kmemleak_cb(option, opt_str, value, parser):
+    if not os.path.exists(KMEMLEAK_FILE):
+        fail(f"File '{KMEMLEAK_FILE}' doesn't exist. " +
+             "Enable CONFIG_DEBUG_KMEMLEAK in kernel configuration.")
+
+    setattr(parser.values, option.dest, True)
+
+
+def options_cb(option, opt_str, value, parser):
+    path_options = ['outputdir', 'template', 'testdir', 'logfile']
+
+    if opt_str in parser.rargs:
+        fail('%s may only be specified once.' % opt_str)
+
+    if option.dest == 'runfiles':
+        parser.values.cmd = 'rdconfig'
+        value = set(os.path.abspath(p) for p in value.split(','))
+    if option.dest == 'tags':
+        value = [x.strip() for x in value.split(',')]
+
+    if option.dest in path_options:
+        setattr(parser.values, option.dest, os.path.abspath(value))
+    else:
+        setattr(parser.values, option.dest, value)
+
+
+def parse_args():
+    parser = OptionParser()
+    parser.add_option('-c', action='callback', callback=options_cb,
+                      type='string', dest='runfiles', metavar='runfiles',
+                      help='Specify tests to run via config files.')
+    parser.add_option('-d', action='store_true', default=False, dest='dryrun',
+                      help='Dry run. Print tests, but take no other action.')
+    parser.add_option('-l', action='callback', callback=options_cb,
+                      default=None, dest='logfile', metavar='logfile',
+                      type='string',
+                      help='Read logfile and re-run tests which failed.')
+    parser.add_option('-g', action='store_true', default=False,
+                      dest='do_groups', help='Make directories TestGroups.')
+    parser.add_option('-o', action='callback', callback=options_cb,
+                      default=BASEDIR, dest='outputdir', type='string',
+                      metavar='outputdir', help='Specify an output directory.')
+    parser.add_option('-i', action='callback', callback=options_cb,
+                      default=TESTDIR, dest='testdir', type='string',
+                      metavar='testdir', help='Specify a test directory.')
+    parser.add_option('-K', action='store_true', default=False, dest='kmsg',
+                      help='Log tests names to /dev/kmsg')
+    parser.add_option('-m', action='callback', callback=kmemleak_cb,
+                      default=False, dest='kmemleak',
+                      help='Enable kmemleak reporting (Linux only)')
+    parser.add_option('-p', action='callback', callback=options_cb,
+                      default='', dest='pre', metavar='script',
+                      type='string', help='Specify a pre script.')
+    parser.add_option('-P', action='callback', callback=options_cb,
+                      default='', dest='post', metavar='script',
+                      type='string', help='Specify a post script.')
+    parser.add_option('-q', action='store_true', default=False, dest='quiet',
+                      help='Silence on the console during a test run.')
+    parser.add_option('-s', action='callback', callback=options_cb,
+                      default='', dest='failsafe', metavar='script',
+                      type='string', help='Specify a failsafe script.')
+    parser.add_option('-S', action='callback', callback=options_cb,
+                      default='', dest='failsafe_user',
+                      metavar='failsafe_user', type='string',
+                      help='Specify a user to execute the failsafe script.')
+    parser.add_option('-t', action='callback', callback=options_cb, default=60,
+                      dest='timeout', metavar='seconds', type='int',
+                      help='Timeout (in seconds) for an individual test.')
+    parser.add_option('-u', action='callback', callback=options_cb,
+                      default='', dest='user', metavar='user', type='string',
+                      help='Specify a different user name to run as.')
+    parser.add_option('-w', action='callback', callback=options_cb,
+                      default=None, dest='template', metavar='template',
+                      type='string', help='Create a new config file.')
+    parser.add_option('-x', action='callback', callback=options_cb, default='',
+                      dest='pre_user', metavar='pre_user', type='string',
+                      help='Specify a user to execute the pre script.')
+    parser.add_option('-X', action='callback', callback=options_cb, default='',
+                      dest='post_user', metavar='post_user', type='string',
+                      help='Specify a user to execute the post script.')
+    parser.add_option('-T', action='callback', callback=options_cb, default='',
+                      dest='tags', metavar='tags', type='string',
+                      help='Specify tags to execute specific test groups.')
+    parser.add_option('-I', action='callback', callback=options_cb, default=1,
+                      dest='iterations', metavar='iterations', type='int',
+                      help='Number of times to run the test run.')
+    (options, pathnames) = parser.parse_args()
+
+    if options.runfiles and len(pathnames):
+        fail('Extraneous arguments.')
+
+    options.pathnames = [os.path.abspath(path) for path in pathnames]
+
+    return options
+
+
+def main():
+    options = parse_args()
+
+    testrun = TestRun(options)
+
+    if options.runfiles:
+        testrun.read(options)
+    else:
+        find_tests(testrun, options)
+
+    if options.logfile:
+        filter_tests(testrun, options)
+
+    if options.template:
+        testrun.write(options)
+        exit(0)
+
+    testrun.complete_outputdirs()
+    testrun.run(options)
+    exit(testrun.summary())
+
+
+if __name__ == '__main__':
+    main()

diff --git a/zfs/tests/test-runner/bin/zts-report.py b/zfs/tests/test-runner/bin/zts-report.py
deleted file mode 100755
index b823b67..0000000
--- a/zfs/tests/test-runner/bin/zts-report.py
+++ /dev/null

@@ -1,401 +0,0 @@
-#!/usr/bin/env python3
-
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright (c) 2017 by Delphix. All rights reserved.
-# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
-#
-# This script must remain compatible with Python 2.6+ and Python 3.4+.
-#
-
-import os
-import re
-import sys
-
-#
-# This script parses the stdout of zfstest, which has this format:
-#
-# Test: /path/to/testa (run as root) [00:00] [PASS]
-# Test: /path/to/testb (run as jkennedy) [00:00] [PASS]
-# Test: /path/to/testc (run as root) [00:00] [FAIL]
-# [...many more results...]
-#
-# Results Summary
-# FAIL      22
-# SKIP      32
-# PASS    1156
-#
-# Running Time:   02:50:31
-# Percent passed: 95.5%
-# Log directory:  /var/tmp/test_results/20180615T205926
-#
-
-#
-# Common generic reasons for a test or test group to be skipped.
-#
-# Some test cases are known to fail in ways which are not harmful or dangerous.
-# In these cases simply mark the test as a known failure until it can be
-# updated and the issue resolved.  Note that it's preferable to open a unique
-# issue on the GitHub issue tracker for each test case failure.
-#
-known_reason = 'Known issue'
-
-#
-# Some tests require that a test user be able to execute the zfs utilities.
-# This may not be possible when testing in-tree due to the default permissions
-# on the user's home directory.  When testing this can be resolved by granting
-# group read access.
-#
-# chmod 0750 $HOME
-#
-exec_reason = 'Test user execute permissions required for utilities'
-
-#
-# Some tests require that the DISKS provided can be partitioned.  This is
-# normally not an issue because loop back devices are used for DISKS and they
-# can be partition.  There is one notable exception, the CentOS 6.x kernel is
-# old enough that it does not support partitioning loop back devices.
-#
-disk_reason = 'Partitionable DISKS required'
-
-#
-# Some tests require a minimum python version of 3.5 and will be skipped when
-# the default system version is too old.  There may also be tests which require
-# additional python modules be installed, for example python-cffi is required
-# by the pyzfs tests.
-#
-python_reason = 'Python v3.5 or newer required'
-python_deps_reason = 'Python modules missing: python-cffi'
-
-#
-# Some tests require the O_TMPFILE flag which was first introduced in the
-# 3.11 kernel.
-#
-tmpfile_reason = 'Kernel O_TMPFILE support required'
-
-#
-# Some tests may depend on udev change events being generated when block
-# devices change capacity.  This functionality wasn't available until the
-# 2.6.38 kernel.
-#
-udev_reason = 'Kernel block device udev change events required'
-
-#
-# Some tests require that the NFS client and server utilities be installed.
-#
-share_reason = 'NFS client and server utilities required'
-
-#
-# Some tests require that the lsattr utility support the project id feature.
-#
-project_id_reason = 'lsattr with set/show project ID required'
-
-#
-# Some tests require that the kernel support user namespaces.
-#
-user_ns_reason = 'Kernel user namespace support required'
-
-#
-# Some rewind tests can fail since nothing guarantees that old MOS blocks
-# are not overwritten.  Snapshots protect datasets and data files but not
-# the MOS.  Reasonable efforts are made in the test case to increase the
-# odds that some txgs will have their MOS data left untouched, but it is
-# never a sure thing.
-#
-rewind_reason = 'Arbitrary pool rewind is not guaranteed'
-
-#
-# Some tests may by structured in a way that relies on exact knowledge
-# of how much free space in available in a pool.  These tests cannot be
-# made completely reliable because the internal details of how free space
-# is managed are not exposed to user space.
-#
-enospc_reason = 'Exact free space reporting is not guaranteed'
-
-#
-# Some tests require a minimum version of the fio benchmark utility.
-# Older distributions such as CentOS 6.x only provide fio-2.0.13.
-#
-fio_reason = 'Fio v2.3 or newer required'
-
-#
-# Some tests require that the DISKS provided support the discard operation.
-# Normally this is not an issue because loop back devices are used for DISKS
-# and they support discard (TRIM/UNMAP).
-#
-trim_reason = 'DISKS must support discard (TRIM/UNMAP)'
-
-#
-# Some tests are not applicable to Linux or need to be updated to operate
-# in the manor required by Linux.  Any tests which are skipped for this
-# reason will be suppressed in the final analysis output.
-#
-na_reason = "N/A on Linux"
-
-summary = {
-    'total': float(0),
-    'passed': float(0),
-    'logfile': "Could not determine logfile location."
-}
-
-#
-# These tests are known to fail, thus we use this list to prevent these
-# failures from failing the job as a whole; only unexpected failures
-# bubble up to cause this script to exit with a non-zero exit status.
-#
-# Format: { 'test-name': ['expected result', 'issue-number | reason'] }
-#
-# For each known failure it is recommended to link to a GitHub issue by
-# setting the reason to the issue number.  Alternately, one of the generic
-# reasons listed above can be used.
-#
-known = {
-    'casenorm/sensitive_formd_lookup': ['FAIL', '7633'],
-    'casenorm/sensitive_formd_delete': ['FAIL', '7633'],
-    'casenorm/mixed_none_lookup_ci': ['FAIL', '7633'],
-    'casenorm/mixed_formd_lookup': ['FAIL', '7633'],
-    'casenorm/mixed_formd_lookup_ci': ['FAIL', '7633'],
-    'casenorm/mixed_formd_delete': ['FAIL', '7633'],
-    'cli_root/zfs_receive/zfs_receive_004_neg': ['FAIL', known_reason],
-    'cli_root/zfs_unshare/zfs_unshare_002_pos': ['SKIP', na_reason],
-    'cli_root/zfs_unshare/zfs_unshare_006_pos': ['SKIP', na_reason],
-    'cli_root/zpool_create/zpool_create_016_pos': ['SKIP', na_reason],
-    'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
-    'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
-    'inuse/inuse_001_pos': ['SKIP', na_reason],
-    'inuse/inuse_003_pos': ['SKIP', na_reason],
-    'inuse/inuse_006_pos': ['SKIP', na_reason],
-    'inuse/inuse_007_pos': ['SKIP', na_reason],
-    'privilege/setup': ['SKIP', na_reason],
-    'refreserv/refreserv_004_pos': ['FAIL', known_reason],
-    'removal/removal_condense_export': ['SKIP', known_reason],
-    'removal/removal_with_zdb': ['SKIP', known_reason],
-    'rootpool/setup': ['SKIP', na_reason],
-    'rsend/rsend_008_pos': ['SKIP', '6066'],
-    'snapshot/rollback_003_pos': ['SKIP', '6143'],
-    'vdev_zaps/vdev_zaps_007_pos': ['FAIL', known_reason],
-    'xattr/xattr_008_pos': ['SKIP', na_reason],
-    'xattr/xattr_009_neg': ['SKIP', na_reason],
-    'xattr/xattr_010_neg': ['SKIP', na_reason],
-    'zvol/zvol_misc/zvol_misc_001_neg': ['SKIP', na_reason],
-    'zvol/zvol_misc/zvol_misc_003_neg': ['SKIP', na_reason],
-    'zvol/zvol_misc/zvol_misc_004_pos': ['SKIP', na_reason],
-    'zvol/zvol_misc/zvol_misc_005_neg': ['SKIP', na_reason],
-    'zvol/zvol_misc/zvol_misc_006_pos': ['SKIP', na_reason],
-    'zvol/zvol_swap/zvol_swap_003_pos': ['SKIP', na_reason],
-    'zvol/zvol_swap/zvol_swap_005_pos': ['SKIP', na_reason],
-    'zvol/zvol_swap/zvol_swap_006_pos': ['SKIP', na_reason],
-}
-
-#
-# These tests may occasionally fail or be skipped.  We want there failures
-# to be reported but only unexpected failures should bubble up to cause
-# this script to exit with a non-zero exit status.
-#
-# Format: { 'test-name': ['expected result', 'issue-number | reason'] }
-#
-# For each known failure it is recommended to link to a GitHub issue by
-# setting the reason to the issue number.  Alternately, one of the generic
-# reasons listed above can be used.
-#
-maybe = {
-    'cache/setup': ['SKIP', disk_reason],
-    'cache/cache_010_neg': ['FAIL', known_reason],
-    'chattr/setup': ['SKIP', exec_reason],
-    'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason],
-    'cli_root/zfs_get/zfs_get_004_pos': ['FAIL', known_reason],
-    'cli_root/zfs_get/zfs_get_009_pos': ['SKIP', '5479'],
-    'cli_root/zfs_rollback/zfs_rollback_001_pos': ['FAIL', '6415'],
-    'cli_root/zfs_rollback/zfs_rollback_002_pos': ['FAIL', '6416'],
-    'cli_root/zfs_share/setup': ['SKIP', share_reason],
-    'cli_root/zfs_snapshot/zfs_snapshot_002_neg': ['FAIL', known_reason],
-    'cli_root/zfs_unshare/setup': ['SKIP', share_reason],
-    'cli_root/zpool_add/setup': ['SKIP', disk_reason],
-    'cli_root/zpool_add/zpool_add_004_pos': ['FAIL', known_reason],
-    'cli_root/zpool_create/setup': ['SKIP', disk_reason],
-    'cli_root/zpool_create/zpool_create_008_pos': ['FAIL', known_reason],
-    'cli_root/zpool_destroy/zpool_destroy_001_pos': ['SKIP', '6145'],
-    'cli_root/zpool_expand/setup': ['SKIP', udev_reason],
-    'cli_root/zpool_export/setup': ['SKIP', disk_reason],
-    'cli_root/zpool_import/setup': ['SKIP', disk_reason],
-    'cli_root/zpool_import/import_rewind_device_replaced':
-        ['FAIL', rewind_reason],
-    'cli_root/zpool_import/import_rewind_config_changed':
-        ['FAIL', rewind_reason],
-    'cli_root/zpool_import/zpool_import_missing_003_pos': ['SKIP', '6839'],
-    'cli_root/zpool_remove/setup': ['SKIP', disk_reason],
-    'cli_root/zpool_trim/setup': ['SKIP', trim_reason],
-    'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', '6141'],
-    'cli_user/misc/arc_summary3_001_pos': ['SKIP', python_reason],
-    'delegate/setup': ['SKIP', exec_reason],
-    'fault/auto_online_001_pos': ['SKIP', disk_reason],
-    'fault/auto_replace_001_pos': ['SKIP', disk_reason],
-    'history/history_004_pos': ['FAIL', '7026'],
-    'history/history_005_neg': ['FAIL', '6680'],
-    'history/history_006_neg': ['FAIL', '5657'],
-    'history/history_008_pos': ['FAIL', known_reason],
-    'history/history_010_pos': ['SKIP', exec_reason],
-    'inuse/inuse_005_pos': ['SKIP', disk_reason],
-    'inuse/inuse_008_pos': ['SKIP', disk_reason],
-    'inuse/inuse_009_pos': ['SKIP', disk_reason],
-    'io/mmap': ['SKIP', fio_reason],
-    'largest_pool/largest_pool_001_pos': ['FAIL', known_reason],
-    'pyzfs/pyzfs_unittest': ['SKIP', python_deps_reason],
-    'no_space/enospc_002_pos': ['FAIL', enospc_reason],
-    'projectquota/setup': ['SKIP', exec_reason],
-    'redundancy/redundancy_004_neg': ['FAIL', '7290'],
-    'reservation/reservation_008_pos': ['FAIL', '7741'],
-    'reservation/reservation_018_pos': ['FAIL', '5642'],
-    'rsend/rsend_019_pos': ['FAIL', '6086'],
-    'rsend/rsend_020_pos': ['FAIL', '6446'],
-    'rsend/rsend_021_pos': ['FAIL', '6446'],
-    'rsend/rsend_024_pos': ['FAIL', '5665'],
-    'rsend/send-c_volume': ['FAIL', '6087'],
-    'snapshot/clone_001_pos': ['FAIL', known_reason],
-    'snapshot/snapshot_009_pos': ['FAIL', '7961'],
-    'snapshot/snapshot_010_pos': ['FAIL', '7961'],
-    'snapused/snapused_004_pos': ['FAIL', '5513'],
-    'tmpfile/setup': ['SKIP', tmpfile_reason],
-    'threadsappend/threadsappend_001_pos': ['FAIL', '6136'],
-    'trim/setup': ['SKIP', trim_reason],
-    'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason],
-    'user_namespace/setup': ['SKIP', user_ns_reason],
-    'userquota/setup': ['SKIP', exec_reason],
-    'vdev_zaps/vdev_zaps_004_pos': ['FAIL', '6935'],
-    'write_dirs/setup': ['SKIP', disk_reason],
-    'zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos': ['FAIL', '5848'],
-    'alloc_class/alloc_class_009_pos': ['FAIL', known_reason],
-    'alloc_class/alloc_class_010_pos': ['FAIL', known_reason],
-    'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
-    'cli_root/zpool_expand/zpool_expand_001_pos': ['FAIL', known_reason],
-    'cli_root/zpool_expand/zpool_expand_005_pos': ['FAIL', known_reason],
-    'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
-    'refreserv/refreserv_raidz': ['FAIL', known_reason],
-    'rsend/rsend_007_pos': ['FAIL', known_reason],
-    'rsend/rsend_010_pos': ['FAIL', known_reason],
-    'rsend/rsend_011_pos': ['FAIL', known_reason],
-    'snapshot/rollback_003_pos': ['FAIL', known_reason],
-
-}
-
-def usage(s):
-    print(s)
-    sys.exit(1)
-
-
-def process_results(pathname):
-    try:
-        f = open(pathname)
-    except IOError as e:
-        print('Error opening file: %s' % e)
-        sys.exit(1)
-
-    prefix = '/zfs-tests/tests/functional/'
-    pattern = \
-        r'^Test:\s*\S*%s(\S+)\s*\(run as (\S+)\)\s*\[(\S+)\]\s*\[(\S+)\]' \
-        % prefix
-    pattern_log = r'^\s*Log directory:\s*(\S*)'
-
-    d = {}
-    for l in f.readlines():
-        m = re.match(pattern, l)
-        if m and len(m.groups()) == 4:
-            summary['total'] += 1
-            if m.group(4) == "PASS":
-                summary['passed'] += 1
-            d[m.group(1)] = m.group(4)
-            continue
-
-        m = re.match(pattern_log, l)
-        if m:
-            summary['logfile'] = m.group(1)
-
-    return d
-
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        usage('usage: %s <pathname>' % sys.argv[0])
-    results = process_results(sys.argv[1])
-
-    if summary['total'] == 0:
-        print("\n\nNo test results were found.")
-        print("Log directory:  %s" % summary['logfile'])
-        sys.exit(0)
-
-    expected = []
-    unexpected = []
-
-    for test in list(results.keys()):
-        if results[test] == "PASS":
-            continue
-
-        setup = test.replace(os.path.basename(test), "setup")
-        if results[test] == "SKIP" and test != setup:
-            if setup in known and known[setup][0] == "SKIP":
-                continue
-            if setup in maybe and maybe[setup][0] == "SKIP":
-                continue
-
-        if ((test not in known or results[test] not in known[test][0]) and
-                (test not in maybe or results[test] not in maybe[test][0])):
-            unexpected.append(test)
-        else:
-            expected.append(test)
-
-    print("\nTests with results other than PASS that are expected:")
-    for test in sorted(expected):
-        issue_url = 'https://github.com/zfsonlinux/zfs/issues/'
-
-        # Include the reason why the result is expected, given the following:
-        # 1. Suppress test results which set the "N/A on Linux" reason.
-        # 2. Numerical reasons are assumed to be GitHub issue numbers.
-        # 3. When an entire test group is skipped only report the setup reason.
-        if test in known:
-            if known[test][1] == na_reason:
-                continue
-            elif known[test][1].isdigit():
-                expect = issue_url + known[test][1]
-            else:
-                expect = known[test][1]
-        elif test in maybe:
-            if maybe[test][1].isdigit():
-                expect = issue_url + maybe[test][1]
-            else:
-                expect = maybe[test][1]
-        elif setup in known and known[setup][0] == "SKIP" and setup != test:
-            continue
-        elif setup in maybe and maybe[setup][0] == "SKIP" and setup != test:
-            continue
-        else:
-            expect = "UNKNOWN REASON"
-        print("    %s %s (%s)" % (results[test], test, expect))
-
-    print("\nTests with result of PASS that are unexpected:")
-    for test in sorted(known.keys()):
-        # We probably should not be silently ignoring the case
-        # where "test" is not in "results".
-        if test not in results or results[test] != "PASS":
-            continue
-        print("    %s %s (expected %s)" % (results[test], test,
-                                           known[test][0]))
-
-    print("\nTests with results other than PASS that are unexpected:")
-    for test in sorted(unexpected):
-        expect = "PASS" if test not in known else known[test][0]
-        print("    %s %s (expected %s)" % (results[test], test, expect))
-
-    if len(unexpected) == 0:
-        sys.exit(0)
-    else:
-        sys.exit(1)

diff --git a/zfs/tests/test-runner/bin/zts-report.py.in b/zfs/tests/test-runner/bin/zts-report.py.in
new file mode 100755
index 0000000..44a7988
--- /dev/null
+++ b/zfs/tests/test-runner/bin/zts-report.py.in

@@ -0,0 +1,490 @@
+#!/usr/bin/env @PYTHON_SHEBANG@
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+#
+# This script must remain compatible with Python 3.6+.
+#
+
+import os
+import re
+import sys
+import argparse
+
+#
+# This script parses the stdout of zfstest, which has this format:
+#
+# Test: /path/to/testa (run as root) [00:00] [PASS]
+# Test: /path/to/testb (run as jkennedy) [00:00] [PASS]
+# Test: /path/to/testc (run as root) [00:00] [FAIL]
+# [...many more results...]
+#
+# Results Summary
+# FAIL      22
+# SKIP      32
+# PASS    1156
+#
+# Running Time:   02:50:31
+# Percent passed: 95.5%
+# Log directory:  /var/tmp/test_results/20180615T205926
+#
+
+#
+# Common generic reasons for a test or test group to be skipped.
+#
+# Some test cases are known to fail in ways which are not harmful or dangerous.
+# In these cases simply mark the test as a known failure until it can be
+# updated and the issue resolved.  Note that it's preferable to open a unique
+# issue on the GitHub issue tracker for each test case failure.
+#
+known_reason = 'Known issue'
+
+#
+# Some tests require that a test user be able to execute the zfs utilities.
+# This may not be possible when testing in-tree due to the default permissions
+# on the user's home directory.  When testing this can be resolved by granting
+# group read access.
+#
+# chmod 0750 $HOME
+#
+exec_reason = 'Test user execute permissions required for utilities'
+
+#
+# Some tests require a minimum python version of 3.6 and will be skipped when
+# the default system version is too old.  There may also be tests which require
+# additional python modules be installed, for example python3-cffi is required
+# by the pyzfs tests.
+#
+python_reason = 'Python v3.6 or newer required'
+python_deps_reason = 'Python modules missing: python3-cffi'
+
+#
+# Some tests require the O_TMPFILE flag which was first introduced in the
+# 3.11 kernel.
+#
+tmpfile_reason = 'Kernel O_TMPFILE support required'
+
+#
+# Some tests require the statx(2) system call on Linux which was first
+# introduced in the 4.11 kernel.
+#
+statx_reason = 'Kernel statx(2) system call required on Linux'
+
+#
+# Some tests require that the NFS client and server utilities be installed.
+#
+share_reason = 'NFS client and server utilities required'
+
+#
+# Some tests require that the lsattr utility support the project id feature.
+#
+project_id_reason = 'lsattr with set/show project ID required'
+
+#
+# Some tests require that the kernel support user namespaces.
+#
+user_ns_reason = 'Kernel user namespace support required'
+
+#
+# Some rewind tests can fail since nothing guarantees that old MOS blocks
+# are not overwritten.  Snapshots protect datasets and data files but not
+# the MOS.  Reasonable efforts are made in the test case to increase the
+# odds that some txgs will have their MOS data left untouched, but it is
+# never a sure thing.
+#
+rewind_reason = 'Arbitrary pool rewind is not guaranteed'
+
+#
+# Some tests may by structured in a way that relies on exact knowledge
+# of how much free space in available in a pool.  These tests cannot be
+# made completely reliable because the internal details of how free space
+# is managed are not exposed to user space.
+#
+enospc_reason = 'Exact free space reporting is not guaranteed'
+
+#
+# Some tests require a minimum version of the fio benchmark utility.
+# Older distributions such as CentOS 6.x only provide fio-2.0.13.
+#
+fio_reason = 'Fio v2.3 or newer required'
+
+#
+# Some tests require that the DISKS provided support the discard operation.
+# Normally this is not an issue because loop back devices are used for DISKS
+# and they support discard (TRIM/UNMAP).
+#
+trim_reason = 'DISKS must support discard (TRIM/UNMAP)'
+
+#
+# Some tests on FreeBSD require the fspacectl(2) system call and the
+# truncate(1) utility supporting the -d option.  The system call was first
+# introduced in FreeBSD version 1400032.
+#
+fspacectl_reason = 'fspacectl(2) and truncate -d support required'
+
+#
+# Some tests are not applicable to a platform or need to be updated to operate
+# in the manor required by the platform.  Any tests which are skipped for this
+# reason will be suppressed in the final analysis output.
+#
+na_reason = "Not applicable"
+
+#
+# Some test cases doesn't have all requirements to run on Github actions CI.
+#
+ci_reason = 'CI runner doesn\'t have all requirements'
+
+summary = {
+    'total': float(0),
+    'passed': float(0),
+    'logfile': "Could not determine logfile location."
+}
+
+#
+# These tests are known to fail, thus we use this list to prevent these
+# failures from failing the job as a whole; only unexpected failures
+# bubble up to cause this script to exit with a non-zero exit status.
+#
+# Format: { 'test-name': ['expected result', 'issue-number | reason'] }
+#
+# For each known failure it is recommended to link to a GitHub issue by
+# setting the reason to the issue number.  Alternately, one of the generic
+# reasons listed above can be used.
+#
+known = {
+    'casenorm/mixed_none_lookup_ci': ['FAIL', 7633],
+    'casenorm/mixed_formd_lookup_ci': ['FAIL', 7633],
+    'cli_root/zfs_unshare/zfs_unshare_002_pos': ['SKIP', na_reason],
+    'cli_root/zfs_unshare/zfs_unshare_006_pos': ['SKIP', na_reason],
+    'cli_root/zpool_import/import_rewind_device_replaced':
+        ['FAIL', rewind_reason],
+    'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
+    'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
+    'privilege/setup': ['SKIP', na_reason],
+    'refreserv/refreserv_004_pos': ['FAIL', known_reason],
+    'rootpool/setup': ['SKIP', na_reason],
+    'rsend/rsend_008_pos': ['SKIP', 6066],
+    'vdev_zaps/vdev_zaps_007_pos': ['FAIL', known_reason],
+}
+
+if sys.platform.startswith('freebsd'):
+    known.update({
+        'cli_root/zfs_receive/receive-o-x_props_override':
+            ['FAIL', known_reason],
+        'cli_root/zpool_resilver/zpool_resilver_concurrent':
+            ['SKIP', na_reason],
+        'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
+        'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
+        'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
+        'link_count/link_count_001': ['SKIP', na_reason],
+        'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
+    })
+elif sys.platform.startswith('linux'):
+    known.update({
+        'casenorm/mixed_formd_lookup': ['FAIL', 7633],
+        'casenorm/mixed_formd_delete': ['FAIL', 7633],
+        'casenorm/sensitive_formd_lookup': ['FAIL', 7633],
+        'casenorm/sensitive_formd_delete': ['FAIL', 7633],
+        'removal/removal_with_zdb': ['SKIP', known_reason],
+    })
+
+
+#
+# These tests may occasionally fail or be skipped.  We want there failures
+# to be reported but only unexpected failures should bubble up to cause
+# this script to exit with a non-zero exit status.
+#
+# Format: { 'test-name': ['expected result', 'issue-number | reason'] }
+#
+# For each known failure it is recommended to link to a GitHub issue by
+# setting the reason to the issue number.  Alternately, one of the generic
+# reasons listed above can be used.
+#
+maybe = {
+    'threadsappend/threadsappend_001_pos': ['FAIL', 6136],
+    'chattr/setup': ['SKIP', exec_reason],
+    'crtime/crtime_001_pos': ['SKIP', statx_reason],
+    'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason],
+    'cli_root/zfs_destroy/zfs_destroy_dev_removal_condense':
+        ['FAIL', known_reason],
+    'cli_root/zfs_get/zfs_get_004_pos': ['FAIL', known_reason],
+    'cli_root/zfs_get/zfs_get_009_pos': ['SKIP', 5479],
+    'cli_root/zfs_rollback/zfs_rollback_001_pos': ['FAIL', known_reason],
+    'cli_root/zfs_rollback/zfs_rollback_002_pos': ['FAIL', known_reason],
+    'cli_root/zfs_share/setup': ['SKIP', share_reason],
+    'cli_root/zfs_snapshot/zfs_snapshot_002_neg': ['FAIL', known_reason],
+    'cli_root/zfs_unshare/setup': ['SKIP', share_reason],
+    'cli_root/zpool_add/zpool_add_004_pos': ['FAIL', known_reason],
+    'cli_root/zpool_destroy/zpool_destroy_001_pos': ['SKIP', 6145],
+    'cli_root/zpool_import/zpool_import_missing_003_pos': ['SKIP', 6839],
+    'cli_root/zpool_initialize/zpool_initialize_import_export':
+        ['FAIL', 11948],
+    'cli_root/zpool_labelclear/zpool_labelclear_removed':
+        ['FAIL', known_reason],
+    'cli_root/zpool_trim/setup': ['SKIP', trim_reason],
+    'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', 6141],
+    'delegate/setup': ['SKIP', exec_reason],
+    'fallocate/fallocate_punch-hole': ['SKIP', fspacectl_reason],
+    'history/history_004_pos': ['FAIL', 7026],
+    'history/history_005_neg': ['FAIL', 6680],
+    'history/history_006_neg': ['FAIL', 5657],
+    'history/history_008_pos': ['FAIL', known_reason],
+    'history/history_010_pos': ['SKIP', exec_reason],
+    'io/mmap': ['SKIP', fio_reason],
+    'largest_pool/largest_pool_001_pos': ['FAIL', known_reason],
+    'mmp/mmp_on_uberblocks': ['FAIL', known_reason],
+    'pyzfs/pyzfs_unittest': ['SKIP', python_deps_reason],
+    'pool_checkpoint/checkpoint_discard_busy': ['FAIL', 11946],
+    'pam/setup': ['SKIP', "pamtester might be not available"],
+    'projectquota/setup': ['SKIP', exec_reason],
+    'removal/removal_condense_export': ['FAIL', known_reason],
+    'reservation/reservation_008_pos': ['FAIL', 7741],
+    'reservation/reservation_018_pos': ['FAIL', 5642],
+    'snapshot/clone_001_pos': ['FAIL', known_reason],
+    'snapshot/snapshot_009_pos': ['FAIL', 7961],
+    'snapshot/snapshot_010_pos': ['FAIL', 7961],
+    'snapused/snapused_004_pos': ['FAIL', 5513],
+    'tmpfile/setup': ['SKIP', tmpfile_reason],
+    'trim/setup': ['SKIP', trim_reason],
+    'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason],
+    'user_namespace/setup': ['SKIP', user_ns_reason],
+    'userquota/setup': ['SKIP', exec_reason],
+    'vdev_zaps/vdev_zaps_004_pos': ['FAIL', known_reason],
+    'zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos': ['FAIL', 5848],
+}
+
+if sys.platform.startswith('freebsd'):
+    maybe.update({
+        'cli_root/zfs_copies/zfs_copies_002_pos': ['FAIL', known_reason],
+        'cli_root/zfs_inherit/zfs_inherit_001_neg': ['FAIL', known_reason],
+        'cli_root/zfs_share/zfs_share_011_pos': ['FAIL', known_reason],
+        'cli_root/zfs_share/zfs_share_concurrent_shares':
+            ['FAIL', known_reason],
+        'cli_root/zpool_import/zpool_import_012_pos': ['FAIL', known_reason],
+        'delegate/zfs_allow_003_pos': ['FAIL', known_reason],
+        'inheritance/inherit_001_pos': ['FAIL', 11829],
+        'resilver/resilver_restart_001': ['FAIL', known_reason],
+        'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622],
+        'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623],
+        'snapshot/snapshot_002_pos': ['FAIL', 14831],
+    })
+elif sys.platform.startswith('linux'):
+    maybe.update({
+        'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
+        'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
+        'fault/auto_online_002_pos': ['FAIL', 11889],
+        'fault/auto_replace_001_pos': ['FAIL', 14851],
+        'fault/auto_spare_002_pos': ['FAIL', 11889],
+        'fault/auto_spare_multiple': ['FAIL', 11889],
+        'fault/auto_spare_shared': ['FAIL', 11889],
+        'fault/decompress_fault': ['FAIL', 11889],
+        'io/io_uring': ['SKIP', 'io_uring support required'],
+        'limits/filesystem_limit': ['SKIP', known_reason],
+        'limits/snapshot_limit': ['SKIP', known_reason],
+        'mmp/mmp_active_import': ['FAIL', known_reason],
+        'mmp/mmp_exported_import': ['FAIL', known_reason],
+        'mmp/mmp_inactive_import': ['FAIL', known_reason],
+        'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
+        'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
+    })
+
+
+# Not all Github actions runners have scsi_debug module, so we may skip
+#   some tests which use it.
+if os.environ.get('CI') == 'true':
+    known.update({
+        'cli_root/zpool_expand/zpool_expand_001_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_expand/zpool_expand_003_neg': ['SKIP', ci_reason],
+        'cli_root/zpool_expand/zpool_expand_005_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/setup': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_001_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_002_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_003_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_004_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_005_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_006_neg': ['SKIP', ci_reason],
+        'cli_root/zpool_reopen/zpool_reopen_007_pos': ['SKIP', ci_reason],
+        'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
+        'fault/auto_offline_001_pos': ['SKIP', ci_reason],
+        'fault/auto_online_001_pos': ['SKIP', ci_reason],
+        'fault/auto_online_002_pos': ['SKIP', ci_reason],
+        'fault/auto_replace_001_pos': ['SKIP', ci_reason],
+        'fault/auto_spare_ashift': ['SKIP', ci_reason],
+        'fault/auto_spare_shared': ['SKIP', ci_reason],
+        'procfs/pool_state': ['SKIP', ci_reason],
+    })
+
+    maybe.update({
+        'events/events_002_pos': ['FAIL', 11546],
+    })
+elif sys.platform.startswith('linux'):
+    maybe.update({
+        'alloc_class/alloc_class_009_pos': ['FAIL', known_reason],
+        'alloc_class/alloc_class_010_pos': ['FAIL', known_reason],
+        'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
+        'cli_root/zpool_expand/zpool_expand_001_pos': ['FAIL', known_reason],
+        'cli_root/zpool_expand/zpool_expand_005_pos': ['FAIL', known_reason],
+        'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
+        'refreserv/refreserv_raidz': ['FAIL', known_reason],
+        'rsend/rsend_007_pos': ['FAIL', known_reason],
+        'rsend/rsend_010_pos': ['FAIL', known_reason],
+        'rsend/rsend_011_pos': ['FAIL', known_reason],
+        'snapshot/rollback_003_pos': ['FAIL', known_reason],
+    })
+
+
+def usage(s):
+    print(s)
+    sys.exit(1)
+
+
+def process_results(pathname):
+    try:
+        f = open(pathname)
+    except IOError as e:
+        print('Error opening file: %s' % e)
+        sys.exit(1)
+
+    prefix = '/zfs-tests/tests/functional/'
+    pattern = \
+        r'^Test(?:\s+\(\S+\))?:' + \
+        r'\s*\S*%s(\S+)\s*\(run as (\S+)\)\s*\[(\S+)\]\s*\[(\S+)\]' \
+        % prefix
+    pattern_log = r'^\s*Log directory:\s*(\S*)'
+
+    d = {}
+    for line in f.readlines():
+        m = re.match(pattern, line)
+        if m and len(m.groups()) == 4:
+            summary['total'] += 1
+            if m.group(4) == "PASS":
+                summary['passed'] += 1
+            d[m.group(1)] = m.group(4)
+            continue
+
+        m = re.match(pattern_log, line)
+        if m:
+            summary['logfile'] = m.group(1)
+
+    return d
+
+
+class ListMaybesAction(argparse.Action):
+    def __init__(self,
+                 option_strings,
+                 dest="SUPPRESS",
+                 default="SUPPRESS",
+                 help="list flaky tests and exit"):
+        super(ListMaybesAction, self).__init__(
+            option_strings=option_strings,
+            dest=dest,
+            default=default,
+            nargs=0,
+            help=help)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        for test in maybe:
+            print(test)
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Analyze ZTS logs')
+    parser.add_argument('logfile')
+    parser.add_argument('--list-maybes', action=ListMaybesAction)
+    parser.add_argument('--no-maybes', action='store_false', dest='maybes')
+    args = parser.parse_args()
+
+    results = process_results(args.logfile)
+
+    if summary['total'] == 0:
+        print("\n\nNo test results were found.")
+        print("Log directory:  %s" % summary['logfile'])
+        sys.exit(0)
+
+    expected = []
+    unexpected = []
+    all_maybes = True
+
+    for test in list(results.keys()):
+        if results[test] == "PASS":
+            continue
+
+        setup = test.replace(os.path.basename(test), "setup")
+        if results[test] == "SKIP" and test != setup:
+            if setup in known and known[setup][0] == "SKIP":
+                continue
+            if setup in maybe and maybe[setup][0] == "SKIP":
+                continue
+
+        if (test in known and results[test] in known[test][0]):
+            expected.append(test)
+        elif test in maybe and results[test] in maybe[test][0]:
+            if results[test] == 'SKIP' or args.maybes:
+                expected.append(test)
+            elif not args.maybes:
+                unexpected.append(test)
+        else:
+            unexpected.append(test)
+            all_maybes = False
+
+    print("\nTests with results other than PASS that are expected:")
+    for test in sorted(expected):
+        issue_url = 'https://github.com/openzfs/zfs/issues/'
+
+        # Include the reason why the result is expected, given the following:
+        # 1. Suppress test results which set the "Not applicable" reason.
+        # 2. Numerical reasons are assumed to be GitHub issue numbers.
+        # 3. When an entire test group is skipped only report the setup reason.
+        if test in known:
+            if known[test][1] == na_reason:
+                continue
+            elif isinstance(known[test][1], int):
+                expect = f"{issue_url}{known[test][1]}"
+            else:
+                expect = known[test][1]
+        elif test in maybe:
+            if isinstance(maybe[test][1], int):
+                expect = f"{issue_url}{maybe[test][1]}"
+            else:
+                expect = maybe[test][1]
+        elif setup in known and known[setup][0] == "SKIP" and setup != test:
+            continue
+        elif setup in maybe and maybe[setup][0] == "SKIP" and setup != test:
+            continue
+        else:
+            expect = "UNKNOWN REASON"
+        print("    %s %s (%s)" % (results[test], test, expect))
+
+    print("\nTests with result of PASS that are unexpected:")
+    for test in sorted(known.keys()):
+        # We probably should not be silently ignoring the case
+        # where "test" is not in "results".
+        if test not in results or results[test] != "PASS":
+            continue
+        print("    %s %s (expected %s)" % (results[test], test,
+                                           known[test][0]))
+
+    print("\nTests with results other than PASS that are unexpected:")
+    for test in sorted(unexpected):
+        expect = "PASS" if test not in known else known[test][0]
+        print("    %s %s (expected %s)" % (results[test], test, expect))
+
+    if len(unexpected) == 0:
+        sys.exit(0)
+    elif not args.maybes and all_maybes:
+        sys.exit(2)
+    else:
+        sys.exit(1)

diff --git a/zfs/tests/test-runner/include/logapi.shlib b/zfs/tests/test-runner/include/logapi.shlib
index cd7982a..9363e24 100644
--- a/zfs/tests/test-runner/include/logapi.shlib
+++ b/zfs/tests/test-runner/include/logapi.shlib

@@ -23,7 +23,7 @@
 # Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2020 by Delphix. All rights reserved.
 #
 
 . ${STF_TOOLS}/include/stf.shlib
@@ -96,16 +96,13 @@
 		out="cat $logfile"
 
 		if (( $status == 0 )); then
-			$out | egrep -i "internal error|assertion failed" \
-				> /dev/null 2>&1
-			# internal error or assertion failed
-			if [[ $? -eq 0 ]]; then
+			if $out | grep -qEi "internal error|assertion failed"; then
 				print -u2 $($out)
 				_printerror "$@" "internal error or" \
 					" assertion failure exited $status"
 				status=1
 			else
-				[[ -n $LOGAPI_DEBUG ]] && print $($out)
+				[[ -n $LOGAPI_DEBUG ]] && cat $logfile
 				_printsuccess "$@"
 			fi
 			break
@@ -165,6 +162,23 @@
 	(( $? != 0 )) && log_fail
 }
 
+# Signal numbers are platform-dependent
+case $(uname) in
+Darwin|FreeBSD)
+	SIGBUS=10
+	SIGSEGV=11
+	;;
+illumos|Linux|*)
+	SIGBUS=7
+	SIGSEGV=11
+	;;
+esac
+EXIT_SUCCESS=0
+EXIT_NOTFOUND=127
+EXIT_SIGNAL=256
+EXIT_SIGBUS=$((EXIT_SIGNAL + SIGBUS))
+EXIT_SIGSEGV=$((EXIT_SIGNAL + SIGSEGV))
+
 # Execute and print command with status where success equals non-zero result
 # or output includes expected keyword
 #
@@ -191,26 +205,23 @@
 	out="cat $logfile"
 
 	# unexpected status
-	if (( $status == 0 )); then
+	if (( $status == EXIT_SUCCESS )); then
 		 print -u2 $($out)
 		_printerror "$@" "unexpectedly exited $status"
 	# missing binary
-	elif (( $status == 127 )); then
+	elif (( $status == EXIT_NOTFOUND )); then
 		print -u2 $($out)
 		_printerror "$@" "unexpectedly exited $status (File not found)"
-	# bus error - core dump (256+signal, SIGBUS=7)
-	elif (( $status == 263 )); then
+	# bus error - core dump
+	elif (( $status == EXIT_SIGBUS )); then
 		print -u2 $($out)
 		_printerror "$@" "unexpectedly exited $status (Bus Error)"
-	# segmentation violation - core dump (256+signal, SIGSEGV=11)
-	elif (( $status == 267 )); then
+	# segmentation violation - core dump
+	elif (( $status == EXIT_SIGSEGV )); then
 		print -u2 $($out)
 		_printerror "$@" "unexpectedly exited $status (SEGV)"
 	else
-		$out | egrep -i "internal error|assertion failed" \
-			> /dev/null 2>&1
-		# internal error or assertion failed
-		if (( $? == 0 )); then
+		if $out | grep -qEi "internal error|assertion failed"; then
 			print -u2 $($out)
 			_printerror "$@" "internal error or assertion failure" \
 				" exited $status"
@@ -227,7 +238,7 @@
 		fi
 
 		if (( $ret == 0 )); then
-			[[ -n $LOGAPI_DEBUG ]] && print $($out)
+			[[ -n $LOGAPI_DEBUG ]] && cat $logfile
 			_printsuccess "$@" "exited $status"
 		fi
 	fi
@@ -258,16 +269,13 @@
 		print -u2 $($out)
 		_printerror "$@" "exited $status"
 	else
-		$out | egrep -i "internal error|assertion failed" \
-			> /dev/null 2>&1
-		# internal error or assertion failed
-		if [[ $? -eq 0 ]]; then
+		if $out | grep -qEi "internal error|assertion failed"; then
 			print -u2 $($out)
 			_printerror "$@" "internal error or assertion failure" \
 				" exited $status"
 			status=1
 		else
-			[[ -n $LOGAPI_DEBUG ]] && print $($out)
+			[[ -n $LOGAPI_DEBUG ]] && cat $logfile
 			_printsuccess "$@"
 		fi
 	fi
@@ -281,7 +289,23 @@
 
 function log_onexit
 {
-	_CLEANUP="$@"
+	_CLEANUP=("$*")
+}
+
+# Push an exit handler on the cleanup stack
+#
+# $@ - function(s) to perform on exit
+
+function log_onexit_push
+{
+	_CLEANUP+=("$*")
+}
+
+# Pop an exit handler off the cleanup stack
+
+function log_onexit_pop
+{
+	_CLEANUP=("${_CLEANUP[@]:0:${#_CLEANUP[@]}-1}")
 }
 
 #
@@ -387,6 +411,11 @@
 	_endlog $STF_OTHER "$@"
 }
 
+function set_main_pid
+{
+	_MAINPID=$1
+}
+
 #
 # Internal functions
 #
@@ -421,16 +450,27 @@
 	shift
 	(( ${#@} > 0 )) && _printline "$@"
 
+	#
+	# If we're running in a subshell then just exit and let
+	# the parent handle the failures
+	#
+	if [[ -n "$_MAINPID" && $$ != "$_MAINPID" ]]; then
+		log_note "subshell exited: "$_MAINPID
+		exit $exitcode
+	fi
+
 	if [[ $exitcode == $STF_FAIL ]] ; then
 		_execute_testfail_callbacks
 	fi
 
-	if [[ -n $_CLEANUP ]] ; then
-		typeset cleanup=$_CLEANUP
-		log_onexit ""
+	typeset stack=("${_CLEANUP[@]}")
+	log_onexit ""
+	typeset i=${#stack[@]}
+	while (( i-- )); do
+		typeset cleanup="${stack[i]}"
 		log_note "Performing local cleanup via log_onexit ($cleanup)"
 		$cleanup
-	fi
+	done
 
 	exit $exitcode
 }

diff --git a/zfs/tests/test-runner/man/test-runner.1 b/zfs/tests/test-runner/man/test-runner.1
index 9525507..b823aaa 100644
--- a/zfs/tests/test-runner/man/test-runner.1
+++ b/zfs/tests/test-runner/man/test-runner.1

@@ -8,300 +8,257 @@
 .\" source.  A copy of the CDDL is also available via the Internet at
 .\" http://www.illumos.org/license/CDDL.
 .\"
-.\"
 .\" Copyright (c) 2012 by Delphix. All rights reserved.
 .\"
-.TH run 1 "23 Sep 2012"
-.SH NAME
-run \- find, execute, and log the results of tests
-.SH SYNOPSIS
-.LP
-.nf
-\fBrun\fR [\fB-dgq] [\fB-o\fR \fIoutputdir\fR] [\fB-pP\fR \fIscript\fR] [\fB-t\fR \fIseconds\fR] [\fB-uxX\fR \fIusername\fR]
-    \fIpathname\fR ...
-.fi
-
-.LP
-.nf
-\fBrun\fR \fB-w\fR \fIrunfile\fR [\fB-gq\fR] [\fB-o\fR \fIoutputdir\fR] [\fB-pP\fR \fIscript\fR] [\fB-t\fR \fIseconds\fR]
-    [\fB-uxX\fR \fIusername\fR] \fIpathname\fR ...
-.fi
-
-.LP
-.nf
-\fBrun\fR \fB-c\fR \fIrunfile\fR [\fB-dq\fR]
-.fi
-
-.LP
-.nf
-\fBrun\fR [\fB-h\fR]
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The \fBrun\fR command has three basic modes of operation. With neither the
-\fB-c\fR nor the \fB-w\fR option, \fBrun\fR processes the arguments provided on
-the command line, adding them to the list for this run. If a specified
-\fIpathname\fR is an executable file, it is added as a test. If a specified
-\fIpathname\fR is a directory, the behavior depends upon the \fB-g\fR option.
-If \fB-g\fR is specified, the directory is treated as a test group. See the
-section on "Test Groups" below. Without the \fB-g\fR option, \fBrun\fR simply
-descends into the directory looking for executable files. The tests are then
-executed, and the results are logged.
-
-With the \fB-w\fR option, \fBrun\fR finds tests in the manner described above.
+.Dd May 26, 2021
+.Dt RUN 1
+.Os
+.
+.Sh NAME
+.Nm run
+.Nd find, execute, and log the results of tests
+.Sh SYNOPSIS
+.Nm
+.Op Fl dgq
+.Op Fl o Ar outputdir
+.Op Fl pP Ar script
+.Op Fl t seconds
+.Op Fl uxX Ar username
+.Ar pathname Ns No …
+.Pp
+.Nm
+.Fl w Ar runfile
+.Op Fl gq
+.Op Fl o Ar outputdir
+.Op Fl pP Ar script
+.Op Fl t seconds
+.Op Fl uxX Ar username
+.Ar pathname Ns No …
+.Pp
+.Nm
+.Fl c Ar runfile
+.Op Fl dq
+.Pp
+.Nm
+.Op Fl h
+.
+.Sh DESCRIPTION
+.Nm
+command has three basic modes of operation.
+With neither
+.Fl c
+nor
+.Fl w ,
+.Nm
+processes the arguments provided on
+the command line, adding them to the list for this run.
+If a specified
+.Ar pathname
+is an executable file, it is added as a test.
+If a specified
+.Ar pathname
+is a directory, the behavior depends upon the presence of
+.Fl g .
+If
+.Fl g
+is specified, the directory is treated as a test group.
+See the section on
+.Sy Test Groups
+below.
+Without
+.Fl g ,
+.Nm
+simply descends into the directory looking for executable files.
+The tests are then executed, and the results are logged.
+.Pp
+With
+.Fl w ,
+.Nm
+finds tests in the manner described above.
 Rather than executing the tests and logging the results, the test configuration
-is stored in a \fIrunfile\fR which can be used in future invocations, or edited
-to modify which tests are executed and which options are applied. Options
-included on the command line with \fB-w\fR become defaults in the
-\fIrunfile\fR.
-
-With the \fB-c\fR option, \fBrun\fR parses a \fIrunfile\fR, which can specify a
-series of tests and test groups to be executed. The tests are then executed,
-and the results are logged.
-.sp
-.SS "Test Groups"
-.sp
-.LP
+is stored in a
+.Ar runfile ,
+which can be used in future invocations, or edited
+to modify which tests are executed and which options are applied.
+Options included on the command line with
+.Fl w
+become defaults in the
+.Ar runfile .
+.Pp
+With
+.Fl c ,
+.Nm
+parses a
+.Ar runfile ,
+which can specify a series of tests and test groups to be executed.
+The tests are then executed, and the results are logged.
+.
+.Ss Test Groups
 A test group is comprised of a set of executable files, all of which exist in
-one directory. The options specified on the command line or in a \fIrunfile\fR
-apply to individual tests in the group. The exception is options pertaining to
-pre and post scripts, which act on all tests as a group. Rather than running
-before and after each test, these scripts are run only once each at the start
-and end of the test group.
-.SS "Test Execution"
-.sp
-.LP
+one directory.
+The options specified on the command line or in a
+.Ar runfile
+apply to individual tests in the group.
+The exception is options pertaining to pre and post scripts, which act on all tests as a group.
+Rather than running before and after each test,
+these scripts are run only once each at the start and end of the test group.
+.Ss Test Execution
 The specified tests run serially, and are typically assigned results according
-to exit values. Tests that exit zero and non-zero are marked "PASS" and "FAIL"
-respectively. When a pre script fails for a test group, only the post script is
-executed, and the remaining tests are marked "SKIPPED." Any test that exceeds
-its \fItimeout\fR is terminated, and marked "KILLED."
-
-By default, tests are executed with the credentials of the \fBrun\fR script.
-Executing tests with other credentials is done via \fBsudo\fR(1m), which must
-be configured to allow execution without prompting for a password. Environment
-variables from the calling shell are available to individual tests. During test
-execution, the working directory is changed to \fIoutputdir\fR.
-.SS "Output Logging"
-.sp
-.LP
-By default, \fBrun\fR will print one line on standard output at the conclusion
-of each test indicating the test name, result and elapsed time. Additionally,
-for each invocation of \fBrun\fR, a directory is created using the ISO 8601
-date format. Within this directory is a file named \fIlog\fR containing all the
-test output with timestamps, and a directory for each test. Within the test
-directories, there is one file each for standard output, standard error and
-merged output. The default location for the \fIoutputdir\fR is
-\fI/var/tmp/test_results\fR.
-.SS "Runfiles"
-.sp
-.LP
-The \fIrunfile\fR is an ini style configuration file that describes a test run.
-The file has one section named "DEFAULT," which contains configuration option
-names and their values in "name = value" format. The values in this section
-apply to all the subsequent sections, unless they are also specified there, in
-which case the default is overridden. The remaining section names are the
-absolute pathnames of files and directories, describing tests and test groups
-respectively. The legal option names are:
-.sp
-.ne 2
-.na
-\fBoutputdir\fR = \fIpathname\fR
-.ad
-.sp .6
-.RS 4n
+to exit values.
+Tests that exit zero and non-zero are marked
+.Sy PASS
+and
+.Sy FAIL ,
+respectively.
+When a pre script fails for a test group, only the post script is executed,
+and the remaining tests are marked
+.Sy SKIPPED .
+Any test that exceeds
+its
+.Ar timeout
+is terminated, and marked
+.Sy KILLED .
+.Pp
+By default, tests are executed with the credentials of the
+.Nm
+script.
+Executing tests with other credentials is done via
+.Xr sudo 1m ,
+which must
+be configured to allow execution without prompting for a password.
+Environment variables from the calling shell are available to individual tests.
+During test execution, the working directory is changed to
+.Ar outputdir .
+.
+.Ss Output Logging
+By default,
+.Nm
+will print one line on standard output at the conclusion
+of each test indicating the test name, result and elapsed time.
+Additionally, for each invocation of
+.Nm ,
+a directory is created using the ISO 8601 date format.
+Within this directory is a file named
+.Sy log
+containing all the
+test output with timestamps, and a directory for each test.
+Within the test directories, there is one file each for standard output,
+standard error and merged output.
+The default location for the
+.Ar outputdir
+is
+.Pa /var/tmp/test_results .
+.Ss "Runfiles"
+The
+.Ar runfile
+is an INI-style configuration file that describes a test run.
+The file has one section named
+.Sy DEFAULT ,
+which contains configuration option
+names and their values in
+.Sy name No = Ar value
+format.
+The values in this section apply to all the subsequent sections,
+unless they are also specified there, in which case the default is overridden.
+The remaining section names are the absolute pathnames of files and directories,
+describing tests and test groups respectively.
+The legal option names are:
+.Bl -tag -width "tests = ['filename', …]"
+.It Sy outputdir No = Ar pathname
 The name of the directory that holds test logs.
-.RE
-.sp
-.ne 2
-.na
-\fBpre\fR = \fIscript\fR
-.ad
-.sp .6
-.RS 4n
-Run \fIscript\fR prior to the test or test group.
-.RE
-.sp
-.ne 2
-.na
-\fBpre_user\fR = \fIusername\fR
-.ad
-.sp .6
-.RS 4n
-Execute the pre script as \fIusername\fR.
-.RE
-.sp
-.ne 2
-.na
-\fBpost\fR = \fIscript\fR
-.ad
-.sp .6
-.RS 4n
-Run \fIscript\fR after the test or test group.
-.RE
-.sp
-.ne 2
-.na
-\fBpost_user\fR = \fIusername\fR
-.ad
-.sp .6
-.RS 4n
-Execute the post script as \fIusername\fR.
-.RE
-.sp
-.ne 2
-.na
-\fBquiet\fR = [\fITrue\fR|\fIFalse\fR]
-.ad
-.sp .6
-.RS 4n
-If set to True, only the results summary is printed to standard out.
-.RE
-.sp
-.ne 2
-.na
-\fBtests\fR = [\fI'filename'\fR [,...]]
-.ad
-.sp .6
-.RS 4n
-Specify a list of \fIfilenames\fR for this test group. Only the basename of the
-absolute path is required. This option is only valid for test groups, and each
-\fIfilename\fR must be single quoted.
-.RE
-.sp
-.ne 2
-.na
-\fBtimeout\fR = \fIn\fR
-.ad
-.sp .6
-.RS 4n
-A timeout value of \fIn\fR seconds.
-.RE
-.sp
-.ne 2
-.na
-\fBuser\fR = \fIusername\fR
-.ad
-.sp .6
-.RS 4n
-Execute the test or test group as \fIusername\fR.
-.RE
-
-.SH OPTIONS
-.sp
-.LP
-The following options are available for the \fBrun\fR command.
-.sp
-.ne 2
-.na
-\fB-c\fR \fIrunfile\fR
-.ad
-.RS 6n
-Specify a \fIrunfile\fR to be consumed by the run command.
-.RE
-
-.ne 2
-.na
-\fB-d\fR
-.ad
-.RS 6n
-Dry run mode. Execute no tests, but print a description of each test that would
-have been run.
-.RE
-
-.ne 2
-.na
-\fB-g\fR
-.ad
-.RS 6n
+.It Sy pre No = Ar script
+Run
+.Ar script
+prior to the test or test group.
+.It Sy pre_user No = Ar username
+Execute the pre script as
+.Ar username .
+.It Sy post No = Ar script
+Run
+.Ar script
+after the test or test group.
+.It Sy post_user No = Ar username
+Execute the post script as
+.Ar username .
+.It Sy quiet No = Sy True Ns | Ns Sy False
+If
+.Sy True ,
+only the results summary is printed to standard out.
+.It Sy tests No = [ Ns Ar 'filename' , No … ]
+Specify a list of
+.Ar filenames
+for this test group.
+Only the basename of the absolute path is required.
+This option is only valid for test groups, and each
+.Ar filename
+must be single quoted.
+.It Sy timeout No = Ar n
+A timeout value of
+.Ar n
+seconds.
+.It Sy user No = Ar username
+Execute the test or test group as
+.Ar username .
+.El
+.
+.Sh OPTIONS
+.Bl -tag -width "-o outputdir"
+.It Fl c Ar runfile
+Specify a
+.Ar runfile
+to be consumed by the run command.
+.It Fl d
+Dry run mode.
+Execute no tests, but print a description of each test that would have been run.
+.It Fl m
+Enable kmemleak reporting (Linux only)
+.It Fl g
 Create test groups from any directories found while searching for tests.
-.RE
-
-.ne 2
-.na
-\fB-o\fR \fIoutputdir\fR
-.ad
-.RS 6n
+.It Fl o Ar outputdir
 Specify the directory in which to write test results.
-.RE
-
-.ne 2
-.na
-\fB-p\fR \fIscript\fR
-.ad
-.RS 6n
-Run \fIscript\fR prior to any test or test group.
-.RE
-
-.ne 2
-.na
-\fB-P\fR \fIscript\fR
-.ad
-.RS 6n
-Run \fIscript\fR after any test or test group.
-.RE
-
-.ne 2
-.na
-\fB-q\fR
-.ad
-.RS 6n
+.It Fl p Ar script
+Run
+.Ar script
+prior to any test or test group.
+.It Fl P Ar script
+Run
+.Ar script
+after any test or test group.
+.It Fl q
 Print only the results summary to the standard output.
-.RE
-
-.ne 2
-.na
-\fB-t\fR \fIn\fR
-.ad
-.RS 6n
-Specify a timeout value of \fIn\fR seconds per test.
-.RE
-
-.ne 2
-.na
-\fB-u\fR \fIusername\fR
-.ad
-.RS 6n
-Execute tests or test groups as \fIusername\fR.
-.RE
-
-.ne 2
-.na
-\fB-w\fR \fIrunfile\fR
-.ad
-.RS 6n
-Specify the name of the \fIrunfile\fR to create.
-.RE
-
-.ne 2
-.na
-\fB-x\fR \fIusername\fR
-.ad
-.RS 6n
-Execute the pre script as \fIusername\fR.
-.RE
-
-.ne 2
-.na
-\fB-X\fR \fIusername\fR
-.ad
-.RS 6n
-Execute the post script as \fIusername\fR.
-.RE
-
-.SH EXAMPLES
-.LP
-\fBExample 1\fR Running ad-hoc tests.
-.sp
-.LP
-This example demonstrates the simplest invocation of \fBrun\fR.
-
-.sp
-.in +2
-.nf
-% \fBrun my-tests\fR
+.It Fl s Ar script
+Run
+.Ar script
+as a failsafe after any test is killed.
+.It Fl S Ar username
+Execute the failsafe script as
+.Ar username .
+.It Fl t Ar n
+Specify a timeout value of
+.Ar n
+seconds per test.
+.It Fl u Ar username
+Execute tests or test groups as
+.Ar username .
+.It Fl w Ar runfile
+Specify the name of the
+.Ar runfile
+to create.
+.It Fl x Ar username
+Execute the pre script as
+.Ar username .
+.It Fl X Ar username
+Execute the post script as
+.Ar username .
+.El
+.
+.Sh EXAMPLES
+.Bl -tag -width "-h"
+.It Sy Example 1 : No Running ad-hoc tests.
+This example demonstrates the simplest invocation of
+.Nm .
+.Bd -literal
+.No % Nm run Ar my-tests
 Test: /home/jkennedy/my-tests/test-01                    [00:02] [PASS]
 Test: /home/jkennedy/my-tests/test-02                    [00:04] [PASS]
 Test: /home/jkennedy/my-tests/test-03                    [00:01] [PASS]
@@ -312,20 +269,14 @@
 Running Time:   00:00:07
 Percent passed: 100.0%
 Log directory:  /var/tmp/test_results/20120923T180654
-.fi
-.in -2
-
-.LP
-\fBExample 2\fR Creating a \fIrunfile\fR for future use.
-.sp
-.LP
-This example demonstrates creating a \fIrunfile\fR with non default options.
-
-.sp
-.in +2
-.nf
-% \fBrun -p setup -x root -g -w new-tests.run new-tests\fR
-% \fBcat new-tests.run\fR
+.Ed
+.It Sy Example 2 : No Creating a Ar runfile No for future use.
+This example demonstrates creating a
+.Ar runfile
+with non-default options.
+.Bd -literal
+.No % Nm run Fl p Ar setup Fl x Ar root Fl g Fl w Ar new-tests.run Ar new-tests
+.No % Nm cat Pa new-tests.run
 [DEFAULT]
 pre = setup
 post_user =
@@ -338,33 +289,8 @@
 
 [/home/jkennedy/new-tests]
 tests = ['test-01', 'test-02', 'test-03']
-.fi
-.in -2
-
-.SH EXIT STATUS
-.sp
-.LP
-The following exit values are returned:
-.sp
-.ne 2
-.na
-\fB\fB0\fR\fR
-.ad
-.sp .6
-.RS 4n
-Successful completion.
-.RE
-.sp
-.ne 2
-.na
-\fB\fB1\fR\fR
-.ad
-.sp .6
-.RS 4n
-An error occurred.
-.RE
-
-.SH SEE ALSO
-.sp
-.LP
-\fBsudo\fR(1m)
+.Ed
+.El
+.
+.Sh SEE ALSO
+.Xr sudo 1m

diff --git a/zfs/tests/zfs-tests/callbacks/Makefile.am b/zfs/tests/zfs-tests/callbacks/Makefile.am
index 30e8472..512a737 100644
--- a/zfs/tests/zfs-tests/callbacks/Makefile.am
+++ b/zfs/tests/zfs-tests/callbacks/Makefile.am

@@ -1,5 +1,6 @@
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/callbacks
 dist_pkgdata_SCRIPTS = \
+	zfs_failsafe.ksh \
 	zfs_dbgmsg.ksh \
 	zfs_dmesg.ksh \
 	zfs_mmp.ksh

diff --git a/zfs/tests/zfs-tests/callbacks/zfs_failsafe.ksh b/zfs/tests/zfs-tests/callbacks/zfs_failsafe.ksh
new file mode 100755
index 0000000..0d14df7
--- /dev/null
+++ b/zfs/tests/zfs-tests/callbacks/zfs_failsafe.ksh

@@ -0,0 +1,8 @@
+#!/bin/ksh
+
+# Commands to perform failsafe-critical cleanup after a test is killed.
+#
+# This should only be used to ensure the system is restored to a functional
+# state in the event of tests being killed (preventing normal cleanup).
+
+zinject -c all

diff --git a/zfs/tests/zfs-tests/cmd/Makefile.am b/zfs/tests/zfs-tests/cmd/Makefile.am
index 39a538d..7ec4cb6 100644
--- a/zfs/tests/zfs-tests/cmd/Makefile.am
+++ b/zfs/tests/zfs-tests/cmd/Makefile.am

@@ -1,13 +1,16 @@
 EXTRA_DIST = file_common.h
 
 SUBDIRS = \
+	badsend \
+	btree_test \
 	chg_usr_exec \
-	user_ns_exec \
 	devname2devid \
 	dir_rd_update \
+	draid \
 	file_check \
 	file_trunc \
 	file_write \
+	get_diff \
 	largest_file \
 	libzfs_input_check \
 	mkbusy \
@@ -16,12 +19,21 @@
 	mktree \
 	mmap_exec \
 	mmap_libaio \
+	mmap_seek \
+	mmap_sync \
 	mmapwrite \
 	nvlist_to_lua \
-	randfree_file \
 	randwritecomp \
 	readmmap \
 	rename_dir \
 	rm_lnkcnt_zero_file \
-	threadsappend \
+	send_doall \
+	stride_dd \
+	threadsappend
+
+if BUILD_LINUX
+SUBDIRS += \
+	randfree_file \
+	user_ns_exec \
 	xattrtest
+endif

diff --git a/zfs/tests/zfs-tests/cmd/badsend/.gitignore b/zfs/tests/zfs-tests/cmd/badsend/.gitignore
new file mode 100644
index 0000000..d2efa62
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/badsend/.gitignore

@@ -0,0 +1 @@
+/badsend

diff --git a/zfs/tests/zfs-tests/cmd/badsend/Makefile.am b/zfs/tests/zfs-tests/cmd/badsend/Makefile.am
new file mode 100644
index 0000000..5a8946f
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/badsend/Makefile.am

@@ -0,0 +1,11 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = badsend
+
+badsend_SOURCES = badsend.c
+badsend_LDADD = \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la

diff --git a/zfs/tests/zfs-tests/cmd/badsend/badsend.c b/zfs/tests/zfs-tests/cmd/badsend/badsend.c
new file mode 100644
index 0000000..af17bc7
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/badsend/badsend.c

@@ -0,0 +1,136 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Portions Copyright 2020 iXsystems, Inc.
+ */
+
+/*
+ * Test some invalid send operations with libzfs/libzfs_core.
+ *
+ * Specifying the to and from snaps in the wrong order should return EXDEV.
+ * We are checking that the early return doesn't accidentally leave any
+ * references held, so this test is designed to trigger a panic when asserts
+ * are verified with the bug present.
+ */
+
+#include <libzfs.h>
+#include <libzfs_core.h>
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sysexits.h>
+#include <err.h>
+
+static void
+usage(const char *name)
+{
+	fprintf(stderr, "usage: %s snap0 snap1\n", name);
+	exit(EX_USAGE);
+}
+
+int
+main(int argc, char const * const argv[])
+{
+	sendflags_t flags = { 0 };
+	libzfs_handle_t *zhdl;
+	zfs_handle_t *zhp;
+	const char *fromfull, *tofull, *fsname, *fromsnap, *tosnap, *p;
+	uint64_t size;
+	int fd, error;
+
+	if (argc != 3)
+		usage(argv[0]);
+
+	fromfull = argv[1];
+	tofull = argv[2];
+
+	p = strchr(fromfull, '@');
+	if (p == NULL)
+		usage(argv[0]);
+	fromsnap = p + 1;
+
+	p = strchr(tofull, '@');
+	if (p == NULL)
+		usage(argv[0]);
+	tosnap = p + 1;
+
+	fsname = strndup(tofull, p - tofull);
+	if (strncmp(fsname, fromfull, p - tofull) != 0)
+		usage(argv[0]);
+
+	fd = open("/dev/null", O_WRONLY);
+	if (fd == -1)
+		err(EX_OSERR, "open(\"/dev/null\", O_WRONLY)");
+
+	zhdl = libzfs_init();
+	if (zhdl == NULL)
+		errx(EX_OSERR, "libzfs_init(): %s", libzfs_error_init(errno));
+
+	zhp = zfs_open(zhdl, fsname, ZFS_TYPE_FILESYSTEM);
+	if (zhp == NULL)
+		err(EX_OSERR, "zfs_open(\"%s\")", fsname);
+
+	/*
+	 * Exercise EXDEV in dmu_send_obj.  The error gets translated to
+	 * EZFS_CROSSTARGET in libzfs.
+	 */
+	error = zfs_send(zhp, tosnap, fromsnap, &flags, fd, NULL, NULL, NULL);
+	if (error == 0 || libzfs_errno(zhdl) != EZFS_CROSSTARGET)
+		errx(EX_OSERR, "zfs_send(\"%s\", \"%s\") should have failed "
+		    "with EZFS_CROSSTARGET, not %d",
+		    tofull, fromfull, libzfs_errno(zhdl));
+	printf("zfs_send(\"%s\", \"%s\"): %s\n",
+	    tofull, fromfull, libzfs_error_description(zhdl));
+
+	zfs_close(zhp);
+
+	/*
+	 * Exercise EXDEV in dmu_send.
+	 */
+	error = lzc_send_resume_redacted(fromfull, tofull, fd, 0, 0, 0, NULL);
+	if (error != EXDEV)
+		errx(EX_OSERR, "lzc_send_resume_redacted(\"%s\", \"%s\")"
+		    " should have failed with EXDEV, not %d",
+		    fromfull, tofull, error);
+	printf("lzc_send_resume_redacted(\"%s\", \"%s\"): %s\n",
+	    fromfull, tofull, strerror(error));
+
+	/*
+	 * Exercise EXDEV in dmu_send_estimate_fast.
+	 */
+	error = lzc_send_space_resume_redacted(fromfull, tofull, 0, 0, 0, 0,
+	    NULL, fd, &size);
+	if (error != EXDEV)
+		errx(EX_OSERR, "lzc_send_space_resume_redacted(\"%s\", \"%s\")"
+		    " should have failed with EXDEV, not %d",
+		    fromfull, tofull, error);
+	printf("lzc_send_space_resume_redacted(\"%s\", \"%s\"): %s\n",
+	    fromfull, tofull, strerror(error));
+
+	close(fd);
+	libzfs_fini(zhdl);
+	free((void *)fsname);
+
+	return (EXIT_SUCCESS);
+}

diff --git a/zfs/tests/zfs-tests/cmd/btree_test/Makefile.am b/zfs/tests/zfs-tests/cmd/btree_test/Makefile.am
new file mode 100644
index 0000000..4c9a1a4
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/btree_test/Makefile.am

@@ -0,0 +1,32 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Delphix. All rights reserved.
+#
+
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+DEFAULT_INCLUDES += \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/lib/libspl/include
+
+# Unconditionally enable ASSERTs
+AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
+
+pkgexec_PROGRAMS = btree_test
+btree_test_SOURCES = btree_test.c
+
+btree_test_LDADD = \
+	$(abs_top_builddir)/lib/libzpool/libzpool.la \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la

diff --git a/zfs/tests/zfs-tests/cmd/btree_test/btree_test.c b/zfs/tests/zfs-tests/cmd/btree_test/btree_test.c
new file mode 100644
index 0000000..8de14ff
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/btree_test/btree_test.c

@@ -0,0 +1,554 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2019 by Delphix. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/avl.h>
+#include <sys/btree.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#define	BUFSIZE 256
+
+int seed = 0;
+int stress_timeout = 180;
+int contents_frequency = 100;
+int tree_limit = 64 * 1024;
+boolean_t stress_only = B_FALSE;
+
+static void
+usage(int exit_value)
+{
+	(void) fprintf(stderr, "Usage:\tbtree_test -n <test_name>\n");
+	(void) fprintf(stderr, "\tbtree_test -s [-r <seed>] [-l <limit>] "
+	    "[-t timeout>] [-c check_contents]\n");
+	(void) fprintf(stderr, "\tbtree_test [-r <seed>] [-l <limit>] "
+	    "[-t timeout>] [-c check_contents]\n");
+	(void) fprintf(stderr, "\n    With the -n option, run the named "
+	    "negative test. With the -s option,\n");
+	(void) fprintf(stderr, "    run the stress test according to the "
+	    "other options passed. With\n");
+	(void) fprintf(stderr, "    neither, run all the positive tests, "
+	    "including the stress test with\n");
+	(void) fprintf(stderr, "    the default options.\n");
+	(void) fprintf(stderr, "\n    Options that control the stress test\n");
+	(void) fprintf(stderr, "\t-c stress iterations after which to compare "
+	    "tree contents [default: 100]\n");
+	(void) fprintf(stderr, "\t-l the largest value to allow in the tree "
+	    "[default: 1M]\n");
+	(void) fprintf(stderr, "\t-r random seed [default: from "
+	    "gettimeofday()]\n");
+	(void) fprintf(stderr, "\t-t seconds to let the stress test run "
+	    "[default: 180]\n");
+	exit(exit_value);
+}
+
+typedef struct int_node {
+	avl_node_t node;
+	uint64_t data;
+} int_node_t;
+
+/*
+ * Utility functions
+ */
+
+static int
+avl_compare(const void *v1, const void *v2)
+{
+	const int_node_t *n1 = v1;
+	const int_node_t *n2 = v2;
+	uint64_t a = n1->data;
+	uint64_t b = n2->data;
+
+	return (TREE_CMP(a, b));
+}
+
+static int
+zfs_btree_compare(const void *v1, const void *v2)
+{
+	const uint64_t *a = v1;
+	const uint64_t *b = v2;
+
+	return (TREE_CMP(*a, *b));
+}
+
+static void
+verify_contents(avl_tree_t *avl, zfs_btree_t *bt)
+{
+	static int count = 0;
+	zfs_btree_index_t bt_idx = {0};
+	int_node_t *node;
+	uint64_t *data;
+
+	boolean_t forward = count % 2 == 0 ? B_TRUE : B_FALSE;
+	count++;
+
+	ASSERT3U(avl_numnodes(avl), ==, zfs_btree_numnodes(bt));
+	if (forward == B_TRUE) {
+		node = avl_first(avl);
+		data = zfs_btree_first(bt, &bt_idx);
+	} else {
+		node = avl_last(avl);
+		data = zfs_btree_last(bt, &bt_idx);
+	}
+
+	while (node != NULL) {
+		ASSERT3U(*data, ==, node->data);
+		if (forward == B_TRUE) {
+			data = zfs_btree_next(bt, &bt_idx, &bt_idx);
+			node = AVL_NEXT(avl, node);
+		} else {
+			data = zfs_btree_prev(bt, &bt_idx, &bt_idx);
+			node = AVL_PREV(avl, node);
+		}
+	}
+}
+
+static void
+verify_node(avl_tree_t *avl, zfs_btree_t *bt, int_node_t *node)
+{
+	zfs_btree_index_t bt_idx = {0};
+	zfs_btree_index_t bt_idx2 = {0};
+	int_node_t *inp;
+	uint64_t data = node->data;
+	uint64_t *rv = NULL;
+
+	ASSERT3U(avl_numnodes(avl), ==, zfs_btree_numnodes(bt));
+	ASSERT3P((rv = (uint64_t *)zfs_btree_find(bt, &data, &bt_idx)), !=,
+	    NULL);
+	ASSERT3S(*rv, ==, data);
+	ASSERT3P(zfs_btree_get(bt, &bt_idx), !=, NULL);
+	ASSERT3S(data, ==, *(uint64_t *)zfs_btree_get(bt, &bt_idx));
+
+	if ((inp = AVL_NEXT(avl, node)) != NULL) {
+		ASSERT3P((rv = zfs_btree_next(bt, &bt_idx, &bt_idx2)), !=,
+		    NULL);
+		ASSERT3P(rv, ==, zfs_btree_get(bt, &bt_idx2));
+		ASSERT3S(inp->data, ==, *rv);
+	} else {
+		ASSERT3U(data, ==, *(uint64_t *)zfs_btree_last(bt, &bt_idx));
+	}
+
+	if ((inp = AVL_PREV(avl, node)) != NULL) {
+		ASSERT3P((rv = zfs_btree_prev(bt, &bt_idx, &bt_idx2)), !=,
+		    NULL);
+		ASSERT3P(rv, ==, zfs_btree_get(bt, &bt_idx2));
+		ASSERT3S(inp->data, ==, *rv);
+	} else {
+		ASSERT3U(data, ==, *(uint64_t *)zfs_btree_first(bt, &bt_idx));
+	}
+}
+
+/*
+ * Tests
+ */
+
+/* Verify that zfs_btree_find works correctly with a NULL index. */
+static int
+find_without_index(zfs_btree_t *bt, char *why)
+{
+	u_longlong_t *p, i = 12345;
+
+	zfs_btree_add(bt, &i);
+	if ((p = (u_longlong_t *)zfs_btree_find(bt, &i, NULL)) == NULL ||
+	    *p != i) {
+		snprintf(why, BUFSIZE, "Unexpectedly found %llu\n",
+		    p == NULL ? 0 : *p);
+		return (1);
+	}
+
+	i++;
+
+	if ((p = (u_longlong_t *)zfs_btree_find(bt, &i, NULL)) != NULL) {
+		snprintf(why, BUFSIZE, "Found bad value: %llu\n", *p);
+		return (1);
+	}
+
+	return (0);
+}
+
+/* Verify simple insertion and removal from the tree. */
+static int
+insert_find_remove(zfs_btree_t *bt, char *why)
+{
+	u_longlong_t *p, i = 12345;
+	zfs_btree_index_t bt_idx = {0};
+
+	/* Insert 'i' into the tree, and attempt to find it again. */
+	zfs_btree_add(bt, &i);
+	if ((p = (u_longlong_t *)zfs_btree_find(bt, &i, &bt_idx)) == NULL) {
+		snprintf(why, BUFSIZE, "Didn't find value in tree\n");
+		return (1);
+	} else if (*p != i) {
+		snprintf(why, BUFSIZE, "Found (%llu) in tree\n", *p);
+		return (1);
+	}
+	ASSERT3S(zfs_btree_numnodes(bt), ==, 1);
+	zfs_btree_verify(bt);
+
+	/* Remove 'i' from the tree, and verify it is not found. */
+	zfs_btree_remove(bt, &i);
+	if ((p = (u_longlong_t *)zfs_btree_find(bt, &i, &bt_idx)) != NULL) {
+		snprintf(why, BUFSIZE, "Found removed value (%llu)\n", *p);
+		return (1);
+	}
+	ASSERT3S(zfs_btree_numnodes(bt), ==, 0);
+	zfs_btree_verify(bt);
+
+	return (0);
+}
+
+/*
+ * Add a number of random entries into a btree and avl tree. Then walk them
+ * backwards and forwards while emptying the tree, verifying the trees look
+ * the same.
+ */
+static int
+drain_tree(zfs_btree_t *bt, char *why)
+{
+	uint64_t *p;
+	avl_tree_t avl;
+	int i = 0;
+	int_node_t *node;
+	avl_index_t avl_idx = {0};
+	zfs_btree_index_t bt_idx = {0};
+
+	avl_create(&avl, avl_compare, sizeof (int_node_t),
+	    offsetof(int_node_t, node));
+
+	/* Fill both trees with the same data */
+	for (i = 0; i < 64 * 1024; i++) {
+		void *ret;
+
+		u_longlong_t randval = random();
+		node = malloc(sizeof (int_node_t));
+		if ((p = (uint64_t *)zfs_btree_find(bt, &randval, &bt_idx)) !=
+		    NULL) {
+			continue;
+		}
+		zfs_btree_add_idx(bt, &randval, &bt_idx);
+
+		node->data = randval;
+		if ((ret = avl_find(&avl, node, &avl_idx)) != NULL) {
+			snprintf(why, BUFSIZE, "Found in avl: %llu\n", randval);
+			return (1);
+		}
+		avl_insert(&avl, node, avl_idx);
+	}
+
+	/* Remove data from either side of the trees, comparing the data */
+	while (avl_numnodes(&avl) != 0) {
+		uint64_t *data;
+
+		ASSERT3U(avl_numnodes(&avl), ==, zfs_btree_numnodes(bt));
+		if (avl_numnodes(&avl) % 2 == 0) {
+			node = avl_first(&avl);
+			data = zfs_btree_first(bt, &bt_idx);
+		} else {
+			node = avl_last(&avl);
+			data = zfs_btree_last(bt, &bt_idx);
+		}
+		ASSERT3U(node->data, ==, *data);
+		zfs_btree_remove_idx(bt, &bt_idx);
+		avl_remove(&avl, node);
+
+		if (avl_numnodes(&avl) == 0) {
+			break;
+		}
+
+		node = avl_first(&avl);
+		ASSERT3U(node->data, ==,
+		    *(uint64_t *)zfs_btree_first(bt, NULL));
+		node = avl_last(&avl);
+		ASSERT3U(node->data, ==, *(uint64_t *)zfs_btree_last(bt, NULL));
+	}
+	ASSERT3S(zfs_btree_numnodes(bt), ==, 0);
+
+	void *avl_cookie = NULL;
+	while ((node = avl_destroy_nodes(&avl, &avl_cookie)) != NULL)
+		free(node);
+	avl_destroy(&avl);
+
+	return (0);
+}
+
+/*
+ * This test uses an avl and btree, and continually processes new random
+ * values. Each value is either removed or inserted, depending on whether
+ * or not it is found in the tree. The test periodically checks that both
+ * trees have the same data and does consistency checks. This stress
+ * option can also be run on its own from the command line.
+ */
+static int
+stress_tree(zfs_btree_t *bt, char *why)
+{
+	avl_tree_t avl;
+	int_node_t *node;
+	struct timeval tp;
+	time_t t0;
+	int insertions = 0, removals = 0, iterations = 0;
+	u_longlong_t max = 0, min = UINT64_MAX;
+
+	(void) gettimeofday(&tp, NULL);
+	t0 = tp.tv_sec;
+
+	avl_create(&avl, avl_compare, sizeof (int_node_t),
+	    offsetof(int_node_t, node));
+
+	while (1) {
+		zfs_btree_index_t bt_idx = {0};
+		avl_index_t avl_idx = {0};
+
+		uint64_t randval = random() % tree_limit;
+		node = malloc(sizeof (*node));
+		node->data = randval;
+
+		max = randval > max ? randval : max;
+		min = randval < min ? randval : min;
+
+		void *ret = avl_find(&avl, node, &avl_idx);
+		if (ret == NULL) {
+			insertions++;
+			avl_insert(&avl, node, avl_idx);
+			ASSERT3P(zfs_btree_find(bt, &randval, &bt_idx), ==,
+			    NULL);
+			zfs_btree_add_idx(bt, &randval, &bt_idx);
+			verify_node(&avl, bt, node);
+		} else {
+			removals++;
+			verify_node(&avl, bt, ret);
+			zfs_btree_remove(bt, &randval);
+			avl_remove(&avl, ret);
+			free(ret);
+			free(node);
+		}
+
+		zfs_btree_verify(bt);
+
+		iterations++;
+		if (iterations % contents_frequency == 0) {
+			verify_contents(&avl, bt);
+		}
+
+		zfs_btree_verify(bt);
+
+		(void) gettimeofday(&tp, NULL);
+		if (tp.tv_sec > t0 + stress_timeout) {
+			fprintf(stderr, "insertions/removals: %u/%u\nmax/min: "
+			    "%llu/%llu\n", insertions, removals, max, min);
+			break;
+		}
+	}
+
+	void *avl_cookie = NULL;
+	while ((node = avl_destroy_nodes(&avl, &avl_cookie)) != NULL)
+		free(node);
+	avl_destroy(&avl);
+
+	if (stress_only) {
+		zfs_btree_index_t *idx = NULL;
+		uint64_t *rv;
+
+		while ((rv = zfs_btree_destroy_nodes(bt, &idx)) != NULL)
+			;
+		zfs_btree_verify(bt);
+	}
+
+	return (0);
+}
+
+/*
+ * Verify inserting a duplicate value will cause a crash.
+ * Note: negative test; return of 0 is a failure.
+ */
+static int
+insert_duplicate(zfs_btree_t *bt)
+{
+	uint64_t *p, i = 23456;
+	zfs_btree_index_t bt_idx = {0};
+
+	if ((p = (uint64_t *)zfs_btree_find(bt, &i, &bt_idx)) != NULL) {
+		fprintf(stderr, "Found value in empty tree.\n");
+		return (0);
+	}
+	zfs_btree_add_idx(bt, &i, &bt_idx);
+	if ((p = (uint64_t *)zfs_btree_find(bt, &i, &bt_idx)) == NULL) {
+		fprintf(stderr, "Did not find expected value.\n");
+		return (0);
+	}
+
+	/* Crash on inserting a duplicate */
+	zfs_btree_add_idx(bt, &i, NULL);
+
+	return (0);
+}
+
+/*
+ * Verify removing a non-existent value will cause a crash.
+ * Note: negative test; return of 0 is a failure.
+ */
+static int
+remove_missing(zfs_btree_t *bt)
+{
+	uint64_t *p, i = 23456;
+	zfs_btree_index_t bt_idx = {0};
+
+	if ((p = (uint64_t *)zfs_btree_find(bt, &i, &bt_idx)) != NULL) {
+		fprintf(stderr, "Found value in empty tree.\n");
+		return (0);
+	}
+
+	/* Crash removing a nonexistent entry */
+	zfs_btree_remove(bt, &i);
+
+	return (0);
+}
+
+static int
+do_negative_test(zfs_btree_t *bt, char *test_name)
+{
+	int rval = 0;
+	struct rlimit rlim = {0};
+	setrlimit(RLIMIT_CORE, &rlim);
+
+	if (strcmp(test_name, "insert_duplicate") == 0) {
+		rval = insert_duplicate(bt);
+	} else if (strcmp(test_name, "remove_missing") == 0) {
+		rval = remove_missing(bt);
+	}
+
+	/*
+	 * Return 0, since callers will expect non-zero return values for
+	 * these tests, and we should have crashed before getting here anyway.
+	 */
+	(void) fprintf(stderr, "Test: %s returned %d.\n", test_name, rval);
+	return (0);
+}
+
+typedef struct btree_test {
+	const char	*name;
+	int		(*func)(zfs_btree_t *, char *);
+} btree_test_t;
+
+static btree_test_t test_table[] = {
+	{ "insert_find_remove",		insert_find_remove	},
+	{ "find_without_index",		find_without_index	},
+	{ "drain_tree",			drain_tree		},
+	{ "stress_tree",		stress_tree		},
+	{ NULL,				NULL			}
+};
+
+int
+main(int argc, char *argv[])
+{
+	char *negative_test = NULL;
+	int failed_tests = 0;
+	struct timeval tp;
+	zfs_btree_t bt;
+	int c;
+
+	while ((c = getopt(argc, argv, "c:l:n:r:st:")) != -1) {
+		switch (c) {
+		case 'c':
+			contents_frequency = atoi(optarg);
+			break;
+		case 'l':
+			tree_limit = atoi(optarg);
+			break;
+		case 'n':
+			negative_test = optarg;
+			break;
+		case 'r':
+			seed = atoi(optarg);
+			break;
+		case 's':
+			stress_only = B_TRUE;
+			break;
+		case 't':
+			stress_timeout = atoi(optarg);
+			break;
+		case 'h':
+		default:
+			usage(1);
+			break;
+		}
+	}
+	argc -= optind;
+	argv += optind;
+	optind = 1;
+
+
+	if (seed == 0) {
+		(void) gettimeofday(&tp, NULL);
+		seed = tp.tv_sec;
+	}
+	srandom(seed);
+
+	zfs_btree_init();
+	zfs_btree_create(&bt, zfs_btree_compare, sizeof (uint64_t));
+
+	/*
+	 * This runs the named negative test. None of them should
+	 * return, as they both cause crashes.
+	 */
+	if (negative_test) {
+		return (do_negative_test(&bt, negative_test));
+	}
+
+	fprintf(stderr, "Seed: %u\n", seed);
+
+	/*
+	 * This is a stress test that does operations on a btree over the
+	 * requested timeout period, verifying them against identical
+	 * operations in an avl tree.
+	 */
+	if (stress_only != 0) {
+		return (stress_tree(&bt, NULL));
+	}
+
+	/* Do the positive tests */
+	btree_test_t *test = &test_table[0];
+	while (test->name) {
+		int retval;
+		uint64_t *rv;
+		char why[BUFSIZE] = {0};
+		zfs_btree_index_t *idx = NULL;
+
+		(void) fprintf(stdout, "%-20s", test->name);
+		retval = test->func(&bt, why);
+
+		if (retval == 0) {
+			(void) fprintf(stdout, "ok\n");
+		} else {
+			(void) fprintf(stdout, "failed with %d\n", retval);
+			if (strlen(why) != 0)
+				(void) fprintf(stdout, "\t%s\n", why);
+			why[0] = '\0';
+			failed_tests++;
+		}
+
+		/* Remove all the elements and re-verify the tree */
+		while ((rv = zfs_btree_destroy_nodes(&bt, &idx)) != NULL)
+			;
+		zfs_btree_verify(&bt);
+
+		test++;
+	}
+
+	zfs_btree_verify(&bt);
+	zfs_btree_fini();
+
+	return (failed_tests);
+}

diff --git a/zfs/tests/zfs-tests/cmd/devname2devid/Makefile.am b/zfs/tests/zfs-tests/cmd/devname2devid/Makefile.am
index a8991bb..b8b630d 100644
--- a/zfs/tests/zfs-tests/cmd/devname2devid/Makefile.am
+++ b/zfs/tests/zfs-tests/cmd/devname2devid/Makefile.am

@@ -5,5 +5,6 @@
 if WANT_DEVNAME2DEVID
 pkgexec_PROGRAMS = devname2devid
 devname2devid_SOURCES = devname2devid.c
-devname2devid_LDADD = -ludev
+devname2devid_CFLAGS = $(AM_CFLAGS) $(LIBUDEV_CFLAGS)
+devname2devid_LDADD = $(LIBUDEV_LIBS)
 endif

diff --git a/zfs/tests/zfs-tests/cmd/draid/.gitignore b/zfs/tests/zfs-tests/cmd/draid/.gitignore
new file mode 100644
index 0000000..911b9f0
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/draid/.gitignore

@@ -0,0 +1 @@
+/draid

diff --git a/zfs/tests/zfs-tests/cmd/draid/Makefile.am b/zfs/tests/zfs-tests/cmd/draid/Makefile.am
new file mode 100644
index 0000000..69fed7a
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/draid/Makefile.am

@@ -0,0 +1,15 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+AM_CFLAGS += $(ZLIB_CFLAGS)
+
+pkgexec_PROGRAMS = draid
+
+draid_SOURCES = draid.c
+
+draid_LDADD = \
+	$(abs_top_builddir)/lib/libzpool/libzpool.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la
+
+draid_LDADD += $(ZLIB_LIBS)

diff --git a/zfs/tests/zfs-tests/cmd/draid/draid.c b/zfs/tests/zfs-tests/cmd/draid/draid.c
new file mode 100644
index 0000000..0a7c4bc
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/draid/draid.c

@@ -0,0 +1,1411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 Intel Corporation.
+ * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ */
+
+#include <stdio.h>
+#include <zlib.h>
+#include <zfs_fletcher.h>
+#include <sys/vdev_draid.h>
+#include <sys/nvpair.h>
+#include <sys/stat.h>
+
+/*
+ * The number of rows to generate for new permutation maps.
+ */
+#define	MAP_ROWS_DEFAULT	256
+
+/*
+ * Key values for dRAID maps when stored as nvlists.
+ */
+#define	MAP_SEED		"seed"
+#define	MAP_CHECKSUM		"checksum"
+#define	MAP_WORST_RATIO		"worst_ratio"
+#define	MAP_AVG_RATIO		"avg_ratio"
+#define	MAP_CHILDREN		"children"
+#define	MAP_NPERMS		"nperms"
+#define	MAP_PERMS		"perms"
+
+static void
+draid_usage(void)
+{
+	(void) fprintf(stderr,
+	    "usage: draid command args ...\n"
+	    "Available commands are:\n"
+	    "\n"
+	    "\tdraid generate [-cv] [-m min] [-n max] [-p passes] FILE\n"
+	    "\tdraid verify [-rv] FILE\n"
+	    "\tdraid dump [-v] [-m min] [-n max] FILE\n"
+	    "\tdraid table FILE\n"
+	    "\tdraid merge FILE SRC SRC...\n");
+	exit(1);
+}
+
+static int
+read_map(const char *filename, nvlist_t **allcfgs)
+{
+	int block_size = 131072;
+	int buf_size = 131072;
+	int tmp_size, error;
+	char *tmp_buf;
+
+	struct stat64 stat;
+	if (lstat64(filename, &stat) != 0)
+		return (errno);
+
+	if (stat.st_size == 0 ||
+	    !(S_ISREG(stat.st_mode) || S_ISLNK(stat.st_mode))) {
+		return (EINVAL);
+	}
+
+	gzFile fp = gzopen(filename, "rb");
+	if (fp == Z_NULL)
+		return (errno);
+
+	char *buf = malloc(buf_size);
+	if (buf == NULL) {
+		(void) gzclose(fp);
+		return (ENOMEM);
+	}
+
+	ssize_t rc, bytes = 0;
+	while (!gzeof(fp)) {
+		rc = gzread(fp, buf + bytes, block_size);
+		if ((rc < 0) || (rc == 0 && !gzeof(fp))) {
+			free(buf);
+			(void) gzclose(fp);
+			(void) gzerror(fp, &error);
+			return (error);
+		} else {
+			bytes += rc;
+
+			if (bytes + block_size >= buf_size) {
+				tmp_size = 2 * buf_size;
+				tmp_buf = malloc(tmp_size);
+				if (tmp_buf == NULL) {
+					free(buf);
+					(void) gzclose(fp);
+					return (ENOMEM);
+				}
+
+				memcpy(tmp_buf, buf, bytes);
+				free(buf);
+				buf = tmp_buf;
+				buf_size = tmp_size;
+			}
+		}
+	}
+
+	(void) gzclose(fp);
+
+	error = nvlist_unpack(buf, bytes, allcfgs, 0);
+	free(buf);
+
+	return (error);
+}
+
+/*
+ * Read a map from the specified filename.  A file contains multiple maps
+ * which are indexed by the number of children. The caller is responsible
+ * for freeing the configuration returned.
+ */
+static int
+read_map_key(const char *filename, char *key, nvlist_t **cfg)
+{
+	nvlist_t *allcfgs, *foundcfg = NULL;
+	int error;
+
+	error = read_map(filename, &allcfgs);
+	if (error != 0)
+		return (error);
+
+	nvlist_lookup_nvlist(allcfgs, key, &foundcfg);
+	if (foundcfg != NULL) {
+		nvlist_dup(foundcfg, cfg, KM_SLEEP);
+		error = 0;
+	} else {
+		error = ENOENT;
+	}
+
+	nvlist_free(allcfgs);
+
+	return (error);
+}
+
+/*
+ * Write all mappings to the map file.
+ */
+static int
+write_map(const char *filename, nvlist_t *allcfgs)
+{
+	size_t buflen = 0;
+	int error;
+
+	error = nvlist_size(allcfgs, &buflen, NV_ENCODE_XDR);
+	if (error)
+		return (error);
+
+	char *buf = malloc(buflen);
+	if (buf == NULL)
+		return (ENOMEM);
+
+	error = nvlist_pack(allcfgs, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
+	if (error) {
+		free(buf);
+		return (error);
+	}
+
+	/*
+	 * Atomically update the file using a temporary file and the
+	 * traditional unlink then rename steps.  This code provides
+	 * no locking, it only guarantees the packed nvlist on disk
+	 * is updated atomically and is internally consistent.
+	 */
+	char *tmpname = calloc(MAXPATHLEN, 1);
+	if (tmpname == NULL) {
+		free(buf);
+		return (ENOMEM);
+	}
+
+	snprintf(tmpname, MAXPATHLEN - 1, "%s.XXXXXX", filename);
+
+	int fd = mkstemp(tmpname);
+	if (fd < 0) {
+		error = errno;
+		free(buf);
+		free(tmpname);
+		return (error);
+	}
+	(void) close(fd);
+
+	gzFile fp = gzopen(tmpname, "w9b");
+	if (fp == Z_NULL) {
+		error = errno;
+		free(buf);
+		free(tmpname);
+		return (errno);
+	}
+
+	ssize_t rc, bytes = 0;
+	while (bytes < buflen) {
+		size_t size = MIN(buflen - bytes, 131072);
+		rc = gzwrite(fp, buf + bytes, size);
+		if (rc < 0) {
+			free(buf);
+			(void) gzerror(fp, &error);
+			(void) gzclose(fp);
+			(void) unlink(tmpname);
+			free(tmpname);
+			return (error);
+		} else if (rc == 0) {
+			break;
+		} else {
+			bytes += rc;
+		}
+	}
+
+	free(buf);
+	(void) gzclose(fp);
+
+	if (bytes != buflen) {
+		(void) unlink(tmpname);
+		free(tmpname);
+		return (EIO);
+	}
+
+	/*
+	 * Unlink the previous config file and replace it with the updated
+	 * version.  If we're able to unlink the file then directory is
+	 * writable by us and the subsequent rename should never fail.
+	 */
+	error = unlink(filename);
+	if (error != 0 && errno != ENOENT) {
+		error = errno;
+		(void) unlink(tmpname);
+		free(tmpname);
+		return (error);
+	}
+
+	error = rename(tmpname, filename);
+	if (error != 0) {
+		error = errno;
+		(void) unlink(tmpname);
+		free(tmpname);
+		return (error);
+	}
+
+	free(tmpname);
+
+	return (0);
+}
+
+/*
+ * Add the dRAID map to the file and write it out.
+ */
+static int
+write_map_key(const char *filename, char *key, draid_map_t *map,
+    double worst_ratio, double avg_ratio)
+{
+	nvlist_t *nv_cfg, *allcfgs;
+	int error;
+
+	/*
+	 * Add the configuration to an existing or new file.  The new
+	 * configuration will replace an existing configuration with the
+	 * same key if it has a lower ratio and is therefore better.
+	 */
+	error = read_map(filename, &allcfgs);
+	if (error == ENOENT) {
+		allcfgs = fnvlist_alloc();
+	} else if (error != 0) {
+		return (error);
+	}
+
+	error = nvlist_lookup_nvlist(allcfgs, key, &nv_cfg);
+	if (error == 0) {
+		uint64_t nv_cfg_worst_ratio = fnvlist_lookup_uint64(nv_cfg,
+		    MAP_WORST_RATIO);
+		double nv_worst_ratio = (double)nv_cfg_worst_ratio / 1000.0;
+
+		if (worst_ratio < nv_worst_ratio) {
+			/* Replace old map with the more balanced new map. */
+			fnvlist_remove(allcfgs, key);
+		} else {
+			/* The old map is preferable, keep it. */
+			nvlist_free(allcfgs);
+			return (EEXIST);
+		}
+	}
+
+	nvlist_t *cfg = fnvlist_alloc();
+	fnvlist_add_uint64(cfg, MAP_SEED, map->dm_seed);
+	fnvlist_add_uint64(cfg, MAP_CHECKSUM, map->dm_checksum);
+	fnvlist_add_uint64(cfg, MAP_CHILDREN, map->dm_children);
+	fnvlist_add_uint64(cfg, MAP_NPERMS, map->dm_nperms);
+	fnvlist_add_uint8_array(cfg, MAP_PERMS,  map->dm_perms,
+	    map->dm_children * map->dm_nperms * sizeof (uint8_t));
+
+	fnvlist_add_uint64(cfg, MAP_WORST_RATIO,
+	    (uint64_t)(worst_ratio * 1000.0));
+	fnvlist_add_uint64(cfg, MAP_AVG_RATIO,
+	    (uint64_t)(avg_ratio * 1000.0));
+
+	error = nvlist_add_nvlist(allcfgs, key, cfg);
+	if (error == 0)
+		error = write_map(filename, allcfgs);
+
+	nvlist_free(cfg);
+	nvlist_free(allcfgs);
+	return (error);
+}
+
+static void
+dump_map(draid_map_t *map, char *key, double worst_ratio, double avg_ratio,
+    int verbose)
+{
+	if (verbose == 0) {
+		return;
+	} else if (verbose == 1) {
+		printf("    \"%s\": seed: 0x%016llx worst_ratio: %2.03f "
+		    "avg_ratio: %2.03f\n", key, (u_longlong_t)map->dm_seed,
+		    worst_ratio, avg_ratio);
+		return;
+	} else {
+		printf("    \"%s\":\n"
+		    "        seed: 0x%016llx\n"
+		    "        checksum: 0x%016llx\n"
+		    "        worst_ratio: %2.03f\n"
+		    "        avg_ratio: %2.03f\n"
+		    "        children: %llu\n"
+		    "        nperms: %llu\n",
+		    key, (u_longlong_t)map->dm_seed,
+		    (u_longlong_t)map->dm_checksum, worst_ratio, avg_ratio,
+		    (u_longlong_t)map->dm_children,
+		    (u_longlong_t)map->dm_nperms);
+
+		if (verbose > 2) {
+			printf("        perms = {\n");
+			for (int i = 0; i < map->dm_nperms; i++) {
+				printf("            { ");
+				for (int j = 0; j < map->dm_children; j++) {
+					printf("%3d%s ", map->dm_perms[
+					    i * map->dm_children + j],
+					    j < map->dm_children - 1 ?
+					    "," : "");
+				}
+				printf(" },\n");
+			}
+			printf("        }\n");
+		} else if (verbose == 2) {
+			printf("        draid_perms = <omitted>\n");
+		}
+	}
+}
+
+static void
+dump_map_nv(char *key, nvlist_t *cfg, int verbose)
+{
+	draid_map_t map;
+	uint_t c;
+
+	uint64_t worst_ratio = fnvlist_lookup_uint64(cfg, MAP_WORST_RATIO);
+	uint64_t avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
+
+	map.dm_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
+	map.dm_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
+	map.dm_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
+	map.dm_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
+	nvlist_lookup_uint8_array(cfg, MAP_PERMS, &map.dm_perms, &c);
+
+	dump_map(&map, key, (double)worst_ratio / 1000.0,
+	    avg_ratio / 1000.0, verbose);
+}
+
+/*
+ * Print a summary of the mapping.
+ */
+static int
+dump_map_key(const char *filename, char *key, int verbose)
+{
+	nvlist_t *cfg;
+	int error;
+
+	error = read_map_key(filename, key, &cfg);
+	if (error != 0)
+		return (error);
+
+	dump_map_nv(key, cfg, verbose);
+
+	return (0);
+}
+
+/*
+ * Allocate a new permutation map for evaluation.
+ */
+static int
+alloc_new_map(uint64_t children, uint64_t nperms, uint64_t seed,
+    draid_map_t **mapp)
+{
+	draid_map_t *map;
+	int error;
+
+	map = malloc(sizeof (draid_map_t));
+	if (map == NULL)
+		return (ENOMEM);
+
+	map->dm_children = children;
+	map->dm_nperms = nperms;
+	map->dm_seed = seed;
+	map->dm_checksum = 0;
+
+	error = vdev_draid_generate_perms(map, &map->dm_perms);
+	if (error) {
+		free(map);
+		return (error);
+	}
+
+	*mapp = map;
+
+	return (0);
+}
+
+/*
+ * Allocate the fixed permutation map for N children.
+ */
+static int
+alloc_fixed_map(uint64_t children, draid_map_t **mapp)
+{
+	const draid_map_t *fixed_map;
+	draid_map_t *map;
+	int error;
+
+	error = vdev_draid_lookup_map(children, &fixed_map);
+	if (error)
+		return (error);
+
+	map = malloc(sizeof (draid_map_t));
+	if (map == NULL)
+		return (ENOMEM);
+
+	memcpy(map, fixed_map, sizeof (draid_map_t));
+	VERIFY3U(map->dm_checksum, !=, 0);
+
+	error = vdev_draid_generate_perms(map, &map->dm_perms);
+	if (error) {
+		free(map);
+		return (error);
+	}
+
+	*mapp = map;
+
+	return (0);
+}
+
+/*
+ * Free a permutation map.
+ */
+static void
+free_map(draid_map_t *map)
+{
+	free(map->dm_perms);
+	free(map);
+}
+
+/*
+ * Check if dev is in the provided list of faulted devices.
+ */
+static inline boolean_t
+is_faulted(int *faulted_devs, int nfaulted, int dev)
+{
+	for (int i = 0; i < nfaulted; i++)
+		if (faulted_devs[i] == dev)
+			return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * Evaluate how resilvering I/O will be distributed given a list of faulted
+ * vdevs.  As a simplification we assume one IO is sufficient to repair each
+ * damaged device in a group.
+ */
+static double
+eval_resilver(draid_map_t *map, uint64_t groupwidth, uint64_t nspares,
+    int *faulted_devs, int nfaulted, int *min_child_ios, int *max_child_ios)
+{
+	uint64_t children = map->dm_children;
+	uint64_t ngroups = 1;
+	uint64_t ndisks = children - nspares;
+
+	/*
+	 * Calculate the minimum number of groups required to fill a slice.
+	 */
+	while (ngroups * (groupwidth) % (children - nspares) != 0)
+		ngroups++;
+
+	int *ios = calloc(map->dm_children, sizeof (uint64_t));
+
+	/* Resilver all rows */
+	for (int i = 0; i < map->dm_nperms; i++) {
+		uint8_t *row = &map->dm_perms[i * map->dm_children];
+
+		/* Resilver all groups with faulted drives */
+		for (int j = 0; j < ngroups; j++) {
+			uint64_t spareidx = map->dm_children - nspares;
+			boolean_t repair_needed = B_FALSE;
+
+			/* See if any devices in this group are faulted */
+			uint64_t groupstart = (j * groupwidth) % ndisks;
+
+			for (int k = 0; k < groupwidth; k++) {
+				uint64_t groupidx = (groupstart + k) % ndisks;
+
+				repair_needed = is_faulted(faulted_devs,
+				    nfaulted, row[groupidx]);
+				if (repair_needed)
+					break;
+			}
+
+			if (repair_needed == B_FALSE)
+				continue;
+
+			/*
+			 * This group is degraded. Calculate the number of
+			 * reads the non-faulted drives require and the number
+			 * of writes to the distributed hot spare for this row.
+			 */
+			for (int k = 0; k < groupwidth; k++) {
+				uint64_t groupidx = (groupstart + k) % ndisks;
+
+				if (!is_faulted(faulted_devs, nfaulted,
+				    row[groupidx])) {
+					ios[row[groupidx]]++;
+				} else if (nspares > 0) {
+					while (is_faulted(faulted_devs,
+					    nfaulted, row[spareidx])) {
+						spareidx++;
+					}
+
+					ASSERT3U(spareidx, <, map->dm_children);
+					ios[row[spareidx]]++;
+					spareidx++;
+				}
+			}
+		}
+	}
+
+	*min_child_ios = INT_MAX;
+	*max_child_ios = 0;
+
+	/*
+	 * Find the drives with fewest and most required I/O.  These values
+	 * are used to calculate the imbalance ratio.  To avoid returning an
+	 * infinite value for permutations which have children that perform
+	 * no IO a floor of 1 IO per child is set.  This ensures a meaningful
+	 * ratio is returned for comparison and it is not an uncommon when
+	 * there are a large number of children.
+	 */
+	for (int i = 0; i < map->dm_children; i++) {
+
+		if (is_faulted(faulted_devs, nfaulted, i)) {
+			ASSERT0(ios[i]);
+			continue;
+		}
+
+		if (ios[i] == 0)
+			ios[i] = 1;
+
+		if (ios[i] < *min_child_ios)
+			*min_child_ios = ios[i];
+
+		if (ios[i] > *max_child_ios)
+			*max_child_ios = ios[i];
+	}
+
+	ASSERT3S(*min_child_ios, !=, INT_MAX);
+	ASSERT3S(*max_child_ios, !=, 0);
+
+	double ratio = (double)(*max_child_ios) / (double)(*min_child_ios);
+
+	free(ios);
+
+	return (ratio);
+}
+
+/*
+ * Evaluate the quality of the permutation mapping by considering possible
+ * device failures.  Returns the imbalance ratio for the worst mapping which
+ * is defined to be the largest number of child IOs over the fewest number
+ * child IOs. A value of 1.0 indicates the mapping is perfectly balance and
+ * all children perform an equal amount of work during reconstruction.
+ */
+static void
+eval_decluster(draid_map_t *map, double *worst_ratiop, double *avg_ratiop)
+{
+	uint64_t children = map->dm_children;
+	double worst_ratio = 1.0;
+	double sum = 0;
+	int worst_min_ios = 0, worst_max_ios = 0;
+	int n = 0;
+
+	/*
+	 * When there are only 2 children there can be no distributed
+	 * spare and no resilver to evaluate.  Default to a ratio of 1.0
+	 * for this degenerate case.
+	 */
+	if (children == VDEV_DRAID_MIN_CHILDREN) {
+		*worst_ratiop = 1.0;
+		*avg_ratiop = 1.0;
+		return;
+	}
+
+	/*
+	 * Score the mapping as if it had either 1 or 2 distributed spares.
+	 */
+	for (int nspares = 1; nspares <= 2; nspares++) {
+		uint64_t faults = nspares;
+
+		/*
+		 * Score groupwidths up to 19.  This value was chosen as the
+		 * largest reasonable width (16d+3p).  dRAID pools may be still
+		 * be created with wider stripes but they are not considered in
+		 * this analysis in order to optimize for the most common cases.
+		 */
+		for (uint64_t groupwidth = 2;
+		    groupwidth <= MIN(children - nspares, 19);
+		    groupwidth++) {
+			int faulted_devs[2];
+			int min_ios, max_ios;
+
+			/*
+			 * Score possible devices faults.  This is limited
+			 * to exactly one fault per distributed spare for
+			 * the purposes of this similation.
+			 */
+			for (int f1 = 0; f1 < children; f1++) {
+				faulted_devs[0] = f1;
+				double ratio;
+
+				if (faults == 1) {
+					ratio = eval_resilver(map, groupwidth,
+					    nspares, faulted_devs, faults,
+					    &min_ios, &max_ios);
+
+					if (ratio > worst_ratio) {
+						worst_ratio = ratio;
+						worst_min_ios = min_ios;
+						worst_max_ios = max_ios;
+					}
+
+					sum += ratio;
+					n++;
+				} else if (faults == 2) {
+					for (int f2 = f1 + 1; f2 < children;
+					    f2++) {
+						faulted_devs[1] = f2;
+
+						ratio = eval_resilver(map,
+						    groupwidth, nspares,
+						    faulted_devs, faults,
+						    &min_ios, &max_ios);
+
+						if (ratio > worst_ratio) {
+							worst_ratio = ratio;
+							worst_min_ios = min_ios;
+							worst_max_ios = max_ios;
+						}
+
+						sum += ratio;
+						n++;
+					}
+				}
+			}
+		}
+	}
+
+	*worst_ratiop = worst_ratio;
+	*avg_ratiop = sum / n;
+
+	/*
+	 * Log the min/max io values for particularly unbalanced maps.
+	 * Since the maps are generated entirely randomly these are possible
+	 * be exceedingly unlikely.  We log it for possible investigation.
+	 */
+	if (worst_ratio > 100.0) {
+		dump_map(map, "DEBUG", worst_ratio, *avg_ratiop, 2);
+		printf("worst_min_ios=%d worst_max_ios=%d\n",
+		    worst_min_ios, worst_max_ios);
+	}
+}
+
+static int
+eval_maps(uint64_t children, int passes, uint64_t *map_seed,
+    draid_map_t **best_mapp, double *best_ratiop, double *avg_ratiop)
+{
+	draid_map_t *best_map = NULL;
+	double best_worst_ratio = 1000.0;
+	double best_avg_ratio = 1000.0;
+
+	/*
+	 * Perform the requested number of passes evaluating randomly
+	 * generated permutation maps.  Only the best version is kept.
+	 */
+	for (int i = 0; i < passes; i++) {
+		double worst_ratio, avg_ratio;
+		draid_map_t *map;
+		int error;
+
+		/*
+		 * Calculate the next seed and generate a new candidate map.
+		 */
+		error = alloc_new_map(children, MAP_ROWS_DEFAULT,
+		    vdev_draid_rand(map_seed), &map);
+		if (error)
+			return (error);
+
+		/*
+		 * Consider maps with a lower worst_ratio to be of higher
+		 * quality.  Some maps may have a lower avg_ratio but they
+		 * are discarded since they might include some particularly
+		 * imbalanced permutations.  The average is tracked to in
+		 * order to get a sense of the average permutation quality.
+		 */
+		eval_decluster(map, &worst_ratio, &avg_ratio);
+
+		if (best_map == NULL || worst_ratio < best_worst_ratio) {
+
+			if (best_map != NULL)
+				free_map(best_map);
+
+			best_map = map;
+			best_worst_ratio = worst_ratio;
+			best_avg_ratio = avg_ratio;
+		} else {
+			free_map(map);
+		}
+	}
+
+	/*
+	 * After determining the best map generate a checksum over the full
+	 * permutation array.  This checksum is verified when opening a dRAID
+	 * pool to ensure the generated in memory permutations are correct.
+	 */
+	zio_cksum_t cksum;
+	fletcher_4_native_varsize(best_map->dm_perms,
+	    sizeof (uint8_t) * best_map->dm_children * best_map->dm_nperms,
+	    &cksum);
+	best_map->dm_checksum = cksum.zc_word[0];
+
+	*best_mapp = best_map;
+	*best_ratiop = best_worst_ratio;
+	*avg_ratiop = best_avg_ratio;
+
+	return (0);
+}
+
+static int
+draid_generate(int argc, char *argv[])
+{
+	char filename[MAXPATHLEN];
+	uint64_t map_seed;
+	int c, fd, error, verbose = 0, passes = 1, continuous = 0;
+	int min_children = VDEV_DRAID_MIN_CHILDREN;
+	int max_children = VDEV_DRAID_MAX_CHILDREN;
+	int restarts = 0;
+
+	while ((c = getopt(argc, argv, ":cm:n:p:v")) != -1) {
+		switch (c) {
+		case 'c':
+			continuous++;
+			break;
+		case 'm':
+			min_children = (int)strtol(optarg, NULL, 0);
+			if (min_children < VDEV_DRAID_MIN_CHILDREN) {
+				(void) fprintf(stderr, "A minimum of 2 "
+				    "children are required.\n");
+				return (1);
+			}
+
+			break;
+		case 'n':
+			max_children = (int)strtol(optarg, NULL, 0);
+			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
+				(void) fprintf(stderr, "A maximum of %d "
+				    "children are allowed.\n",
+				    VDEV_DRAID_MAX_CHILDREN);
+				return (1);
+			}
+			break;
+		case 'p':
+			passes = (int)strtol(optarg, NULL, 0);
+			break;
+		case 'v':
+			/*
+			 * 0 - Only log when a better map is added to the file.
+			 * 1 - Log the current best map for each child count.
+			 *     Minimal output on a single summary line.
+			 * 2 - Log the current best map for each child count.
+			 *     More verbose includes most map fields.
+			 * 3 - Log the current best map for each child count.
+			 *     Very verbose all fields including the full map.
+			 */
+			verbose++;
+			break;
+		case ':':
+			(void) fprintf(stderr,
+			    "missing argument for '%c' option\n", optopt);
+			draid_usage();
+			break;
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			draid_usage();
+			break;
+		}
+	}
+
+	if (argc > optind) {
+		bzero(filename, MAXPATHLEN);
+		strncpy(filename, argv[optind], MAXPATHLEN - 1);
+	} else {
+		(void) fprintf(stderr, "A FILE must be specified.\n");
+		return (1);
+	}
+
+restart:
+	/*
+	 * Start with a fresh seed from /dev/urandom.
+	 */
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd < 0) {
+		printf("Unable to open /dev/urandom: %s\n:", strerror(errno));
+		return (1);
+	} else {
+		ssize_t bytes = sizeof (map_seed);
+		ssize_t bytes_read = 0;
+
+		while (bytes_read < bytes) {
+			ssize_t rc = read(fd, ((char *)&map_seed) + bytes_read,
+			    bytes - bytes_read);
+			if (rc < 0) {
+				printf("Unable to read /dev/urandom: %s\n:",
+				    strerror(errno));
+				return (1);
+			}
+			bytes_read += rc;
+		}
+
+		(void) close(fd);
+	}
+
+	if (restarts == 0)
+		printf("Writing generated mappings to '%s':\n", filename);
+
+	/*
+	 * Generate maps for all requested child counts. The best map for
+	 * each child count is written out to the specified file.  If the file
+	 * already contains a better mapping this map will not be added.
+	 */
+	for (uint64_t children = min_children;
+	    children <= max_children; children++) {
+		char key[8] = { 0 };
+		draid_map_t *map;
+		double worst_ratio = 1000.0;
+		double avg_ratio = 1000.0;
+
+		error = eval_maps(children, passes, &map_seed, &map,
+		    &worst_ratio, &avg_ratio);
+		if (error) {
+			printf("Error eval_maps(): %s\n", strerror(error));
+			return (1);
+		}
+
+		if (worst_ratio < 1.0 || avg_ratio < 1.0) {
+			printf("Error ratio < 1.0: worst_ratio = %2.03f "
+			    "avg_ratio = %2.03f\n", worst_ratio, avg_ratio);
+			return (1);
+		}
+
+		snprintf(key, 7, "%llu", (u_longlong_t)children);
+		error = write_map_key(filename, key, map, worst_ratio,
+		    avg_ratio);
+		if (error == 0) {
+			/* The new map was added to the file. */
+			dump_map(map, key, worst_ratio, avg_ratio,
+			    MAX(verbose, 1));
+		} else if (error == EEXIST) {
+			/* The existing map was preferable and kept. */
+			if (verbose > 0)
+				dump_map_key(filename, key, verbose);
+		} else {
+			printf("Error write_map_key(): %s\n", strerror(error));
+			return (1);
+		}
+
+		free_map(map);
+	}
+
+	/*
+	 * When the continuous option is set restart at the minimum number of
+	 * children instead of exiting. This option is useful as a mechanism
+	 * to continuous try and refine the discovered permutations.
+	 */
+	if (continuous) {
+		restarts++;
+		printf("Restarting by request (-c): %d\n", restarts);
+		goto restart;
+	}
+
+	return (0);
+}
+
+/*
+ * Verify each map in the file by generating its in-memory permutation array
+ * and comfirming its checksum is correct.
+ */
+static int
+draid_verify(int argc, char *argv[])
+{
+	char filename[MAXPATHLEN];
+	int n = 0, c, error, verbose = 1;
+	int check_ratios = 0;
+
+	while ((c = getopt(argc, argv, ":rv")) != -1) {
+		switch (c) {
+		case 'r':
+			check_ratios++;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case ':':
+			(void) fprintf(stderr,
+			    "missing argument for '%c' option\n", optopt);
+			draid_usage();
+			break;
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			draid_usage();
+			break;
+		}
+	}
+
+	if (argc > optind) {
+		char *abspath = malloc(MAXPATHLEN);
+		if (abspath == NULL)
+			return (ENOMEM);
+
+		bzero(filename, MAXPATHLEN);
+		if (realpath(argv[optind], abspath) != NULL)
+			strncpy(filename, abspath, MAXPATHLEN - 1);
+		else
+			strncpy(filename, argv[optind], MAXPATHLEN - 1);
+
+		free(abspath);
+	} else {
+		(void) fprintf(stderr, "A FILE must be specified.\n");
+		return (1);
+	}
+
+	printf("Verifying permutation maps: '%s'\n", filename);
+
+	/*
+	 * Lookup hardcoded permutation map for each valid number of children
+	 * and verify a generated map has the correct checksum.  Then compare
+	 * the generated map values with the nvlist map values read from the
+	 * reference file to cross-check the permutation.
+	 */
+	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
+	    children <= VDEV_DRAID_MAX_CHILDREN;
+	    children++) {
+		draid_map_t *map;
+		char key[8];
+
+		bzero(key, 8);
+		snprintf(key, 8, "%llu", (u_longlong_t)children);
+
+		error = alloc_fixed_map(children, &map);
+		if (error) {
+			printf("Error alloc_fixed_map() failed: %s\n",
+			    error == ECKSUM ? "Invalid checksum" :
+			    strerror(error));
+			return (1);
+		}
+
+		uint64_t nv_seed, nv_checksum, nv_children, nv_nperms;
+		uint8_t *nv_perms;
+		nvlist_t *cfg;
+		uint_t c;
+
+		error = read_map_key(filename, key, &cfg);
+		if (error != 0) {
+			printf("Error read_map_key() failed: %s\n",
+			    strerror(error));
+			free_map(map);
+			return (1);
+		}
+
+		nv_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
+		nv_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
+		nv_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
+		nv_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
+		nvlist_lookup_uint8_array(cfg, MAP_PERMS, &nv_perms, &c);
+
+		/*
+		 * Compare draid_map_t and nvlist reference values.
+		 */
+		if (map->dm_seed != nv_seed) {
+			printf("Error different seeds: 0x%016llx != "
+			    "0x%016llx\n", (u_longlong_t)map->dm_seed,
+			    (u_longlong_t)nv_seed);
+			error = EINVAL;
+		}
+
+		if (map->dm_checksum != nv_checksum) {
+			printf("Error different checksums: 0x%016llx "
+			    "!= 0x%016llx\n",
+			    (u_longlong_t)map->dm_checksum,
+			    (u_longlong_t)nv_checksum);
+			error = EINVAL;
+		}
+
+		if (map->dm_children != nv_children) {
+			printf("Error different children: %llu "
+			    "!= %llu\n", (u_longlong_t)map->dm_children,
+			    (u_longlong_t)nv_children);
+			error = EINVAL;
+		}
+
+		if (map->dm_nperms != nv_nperms) {
+			printf("Error different nperms: %llu "
+			    "!= %llu\n", (u_longlong_t)map->dm_nperms,
+			    (u_longlong_t)nv_nperms);
+			error = EINVAL;
+		}
+
+		for (uint64_t i = 0; i < nv_children * nv_nperms; i++) {
+			if (map->dm_perms[i] != nv_perms[i]) {
+				printf("Error different perms[%llu]: "
+				    "%d != %d\n", (u_longlong_t)i,
+				    (int)map->dm_perms[i],
+				    (int)nv_perms[i]);
+				error = EINVAL;
+				break;
+			}
+		}
+
+		/*
+		 * For good measure recalculate the worst and average
+		 * ratios and confirm they match the nvlist values.
+		 */
+		if (check_ratios) {
+			uint64_t nv_worst_ratio, nv_avg_ratio;
+			double worst_ratio, avg_ratio;
+
+			eval_decluster(map, &worst_ratio, &avg_ratio);
+
+			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
+			    MAP_WORST_RATIO);
+			nv_avg_ratio = fnvlist_lookup_uint64(cfg,
+			    MAP_AVG_RATIO);
+
+			if (worst_ratio < 1.0 || avg_ratio < 1.0) {
+				printf("Error ratio out of range %2.03f, "
+				    "%2.03f\n", worst_ratio, avg_ratio);
+				error = EINVAL;
+			}
+
+			if ((uint64_t)(worst_ratio * 1000.0) !=
+			    nv_worst_ratio) {
+				printf("Error different worst_ratio %2.03f "
+				    "!= %2.03f\n", (double)nv_worst_ratio /
+				    1000.0, worst_ratio);
+				error = EINVAL;
+			}
+
+			if ((uint64_t)(avg_ratio * 1000.0) != nv_avg_ratio) {
+				printf("Error different average_ratio %2.03f "
+				    "!= %2.03f\n", (double)nv_avg_ratio /
+				    1000.0, avg_ratio);
+				error = EINVAL;
+			}
+		}
+
+		if (error) {
+			free_map(map);
+			nvlist_free(cfg);
+			return (1);
+		}
+
+		if (verbose > 0) {
+			printf("- %llu children: good\n",
+			    (u_longlong_t)children);
+		}
+		n++;
+
+		free_map(map);
+		nvlist_free(cfg);
+	}
+
+	if (n != (VDEV_DRAID_MAX_CHILDREN - 1)) {
+		printf("Error permutation maps missing: %d / %d checked\n",
+		    n, VDEV_DRAID_MAX_CHILDREN - 1);
+		return (1);
+	}
+
+	printf("Successfully verified %d / %d permutation maps\n",
+	    n, VDEV_DRAID_MAX_CHILDREN - 1);
+
+	return (0);
+}
+
+/*
+ * Dump the contents of the specified mapping(s) for inspection.
+ */
+static int
+draid_dump(int argc, char *argv[])
+{
+	char filename[MAXPATHLEN];
+	int c, error, verbose = 1;
+	int min_children = VDEV_DRAID_MIN_CHILDREN;
+	int max_children = VDEV_DRAID_MAX_CHILDREN;
+
+	while ((c = getopt(argc, argv, ":vm:n:")) != -1) {
+		switch (c) {
+		case 'm':
+			min_children = (int)strtol(optarg, NULL, 0);
+			if (min_children < 2) {
+				(void) fprintf(stderr, "A minimum of 2 "
+				    "children are required.\n");
+				return (1);
+			}
+
+			break;
+		case 'n':
+			max_children = (int)strtol(optarg, NULL, 0);
+			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
+				(void) fprintf(stderr, "A maximum of %d "
+				    "children are allowed.\n",
+				    VDEV_DRAID_MAX_CHILDREN);
+				return (1);
+			}
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case ':':
+			(void) fprintf(stderr,
+			    "missing argument for '%c' option\n", optopt);
+			draid_usage();
+			break;
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			draid_usage();
+			break;
+		}
+	}
+
+	if (argc > optind) {
+		bzero(filename, MAXPATHLEN);
+		strncpy(filename, argv[optind], MAXPATHLEN - 1);
+	} else {
+		(void) fprintf(stderr, "A FILE must be specified.\n");
+		return (1);
+	}
+
+	/*
+	 * Dump maps for the requested child counts.
+	 */
+	for (uint64_t children = min_children;
+	    children <= max_children; children++) {
+		char key[8] = { 0 };
+
+		snprintf(key, 7, "%llu", (u_longlong_t)children);
+		error = dump_map_key(filename, key, verbose);
+		if (error) {
+			printf("Error dump_map_key(): %s\n", strerror(error));
+			return (1);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Print all of the mappings as a C formatted draid_map_t array.  This table
+ * is found in the module/zcommon/zfs_draid.c file and is the definitive
+ * source for all mapping used by dRAID.  It cannot be updated without
+ * changing the dRAID on disk format.
+ */
+static int
+draid_table(int argc, char *argv[])
+{
+	char filename[MAXPATHLEN];
+	int error;
+
+	if (argc > optind) {
+		bzero(filename, MAXPATHLEN);
+		strncpy(filename, argv[optind], MAXPATHLEN - 1);
+	} else {
+		(void) fprintf(stderr, "A FILE must be specified.\n");
+		return (1);
+	}
+
+	printf("static const draid_map_t "
+	    "draid_maps[VDEV_DRAID_MAX_MAPS] = {\n");
+
+	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
+	    children <= VDEV_DRAID_MAX_CHILDREN;
+	    children++) {
+		uint64_t seed, checksum, nperms, avg_ratio;
+		nvlist_t *cfg;
+		char key[8];
+
+		bzero(key, 8);
+		snprintf(key, 8, "%llu", (u_longlong_t)children);
+
+		error = read_map_key(filename, key, &cfg);
+		if (error != 0) {
+			printf("Error read_map_key() failed: %s\n",
+			    strerror(error));
+			return (1);
+		}
+
+		seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
+		checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
+		children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
+		nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
+		avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
+
+		printf("\t{ %3llu, %3llu, 0x%016llx, 0x%016llx },\t"
+		    "/* %2.03f */\n", (u_longlong_t)children,
+		    (u_longlong_t)nperms, (u_longlong_t)seed,
+		    (u_longlong_t)checksum, (double)avg_ratio / 1000.0);
+
+		nvlist_free(cfg);
+	}
+
+	printf("};\n");
+
+	return (0);
+}
+
+static int
+draid_merge_impl(nvlist_t *allcfgs, const char *srcfilename, int *mergedp)
+{
+	nvlist_t *srccfgs;
+	nvpair_t *elem = NULL;
+	int error, merged = 0;
+
+	error = read_map(srcfilename, &srccfgs);
+	if (error != 0)
+		return (error);
+
+	while ((elem = nvlist_next_nvpair(srccfgs, elem)) != NULL) {
+		uint64_t nv_worst_ratio;
+		uint64_t allcfg_worst_ratio;
+		nvlist_t *cfg, *allcfg;
+		char *key;
+
+		switch (nvpair_type(elem)) {
+		case DATA_TYPE_NVLIST:
+
+			(void) nvpair_value_nvlist(elem, &cfg);
+			key = nvpair_name(elem);
+
+			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
+			    MAP_WORST_RATIO);
+
+			error = nvlist_lookup_nvlist(allcfgs, key, &allcfg);
+			if (error == 0) {
+				allcfg_worst_ratio = fnvlist_lookup_uint64(
+				    allcfg, MAP_WORST_RATIO);
+
+				if (nv_worst_ratio < allcfg_worst_ratio) {
+					fnvlist_remove(allcfgs, key);
+					error = nvlist_add_nvlist(allcfgs,
+					    key, cfg);
+					merged++;
+				}
+			} else if (error == ENOENT) {
+				error = nvlist_add_nvlist(allcfgs, key, cfg);
+				merged++;
+			} else {
+				return (error);
+			}
+
+			break;
+		default:
+			continue;
+		}
+	}
+
+	nvlist_free(srccfgs);
+
+	*mergedp = merged;
+
+	return (0);
+}
+
+/*
+ * Merge the best map for each child count found in the listed files into
+ * a new file.  This allows 'draid generate' to be run in parallel and for
+ * the results maps to be combined.
+ */
+static int
+draid_merge(int argc, char *argv[])
+{
+	char filename[MAXPATHLEN];
+	int c, error, total_merged = 0;
+	nvlist_t *allcfgs;
+
+	while ((c = getopt(argc, argv, ":")) != -1) {
+		switch (c) {
+		case ':':
+			(void) fprintf(stderr,
+			    "missing argument for '%c' option\n", optopt);
+			draid_usage();
+			break;
+		case '?':
+			(void) fprintf(stderr, "invalid option '%c'\n",
+			    optopt);
+			draid_usage();
+			break;
+		}
+	}
+
+	if (argc < 4) {
+		(void) fprintf(stderr,
+		    "A FILE and multiple SRCs must be specified.\n");
+		return (1);
+	}
+
+	bzero(filename, MAXPATHLEN);
+	strncpy(filename, argv[optind], MAXPATHLEN - 1);
+	optind++;
+
+	error = read_map(filename, &allcfgs);
+	if (error == ENOENT) {
+		allcfgs = fnvlist_alloc();
+	} else if (error != 0) {
+		printf("Error read_map(): %s\n", strerror(error));
+		return (error);
+	}
+
+	while (optind < argc) {
+		char srcfilename[MAXPATHLEN];
+		int merged = 0;
+
+		bzero(srcfilename, MAXPATHLEN);
+		strncpy(srcfilename, argv[optind], MAXPATHLEN - 1);
+
+		error = draid_merge_impl(allcfgs, srcfilename, &merged);
+		if (error) {
+			printf("Error draid_merge_impl(): %s\n",
+			    strerror(error));
+			nvlist_free(allcfgs);
+			return (1);
+		}
+
+		total_merged += merged;
+		printf("Merged %d key(s) from '%s' into '%s'\n", merged,
+		    srcfilename, filename);
+
+		optind++;
+	}
+
+	if (total_merged > 0)
+		write_map(filename, allcfgs);
+
+	printf("Merged a total of %d key(s) into '%s'\n", total_merged,
+	    filename);
+
+	nvlist_free(allcfgs);
+
+	return (0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	if (argc < 2)
+		draid_usage();
+
+	char *subcommand = argv[1];
+
+	if (strcmp(subcommand, "generate") == 0) {
+		return (draid_generate(argc - 1, argv + 1));
+	} else if (strcmp(subcommand, "verify") == 0) {
+		return (draid_verify(argc - 1, argv + 1));
+	} else if (strcmp(subcommand, "dump") == 0) {
+		return (draid_dump(argc - 1, argv + 1));
+	} else if (strcmp(subcommand, "table") == 0) {
+		return (draid_table(argc - 1, argv + 1));
+	} else if (strcmp(subcommand, "merge") == 0) {
+		return (draid_merge(argc - 1, argv + 1));
+	} else {
+		draid_usage();
+	}
+}

diff --git a/zfs/tests/zfs-tests/cmd/file_check/file_check.c b/zfs/tests/zfs-tests/cmd/file_check/file_check.c
index 5df0ea7..3d3db75 100644
--- a/zfs/tests/zfs-tests/cmd/file_check/file_check.c
+++ b/zfs/tests/zfs-tests/cmd/file_check/file_check.c

@@ -40,7 +40,6 @@
 	long		i, n;
 	unsigned char	fillchar = DATA;
 	int		bigbuffersize = BIGBUFFERSIZE;
-	int64_t		read_count = 0;
 
 	/*
 	 * Validate arguments
@@ -78,8 +77,6 @@
 				exit(1);
 			}
 		}
-
-		read_count += n;
 	} while (n == bigbuffersize);
 
 	return (0);

diff --git a/zfs/tests/zfs-tests/cmd/file_write/file_write.c b/zfs/tests/zfs-tests/cmd/file_write/file_write.c
index 81fc5de..9d2e71b 100644
--- a/zfs/tests/zfs-tests/cmd/file_write/file_write.c
+++ b/zfs/tests/zfs-tests/cmd/file_write/file_write.c

@@ -34,10 +34,6 @@
 #include <time.h>
 #include <stdint.h>
 
-typedef unsigned char	uchar_t;
-typedef long long	longlong_t;
-typedef longlong_t	offset_t;
-
 static unsigned char bigbuffer[BIGBUFFERSIZE];
 
 /*
@@ -48,9 +44,9 @@
 static void usage(char *);
 
 /*
- * psudo-randomize the buffer
+ * pseudo-randomize the buffer
  */
-void randomize_buffer(int block_size) {
+static void randomize_buffer(int block_size) {
 	int i;
 	char rnd = rand() & 0xff;
 	for (i = 0; i < block_size; i++)
@@ -255,7 +251,7 @@
 	    "\t[-s offset] [-c write_count] [-d data]\n\n"
 	    "Where [data] equal to zero causes chars "
 	    "0->%d to be repeated throughout, or [data]\n"
-	    "equal to 'R' for psudorandom data.\n",
+	    "equal to 'R' for pseudorandom data.\n",
 	    prog, DATA_RANGE);
 
 	exit(1);

diff --git a/zfs/tests/zfs-tests/cmd/get_diff/.gitignore b/zfs/tests/zfs-tests/cmd/get_diff/.gitignore
new file mode 100644
index 0000000..f5fc360
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/get_diff/.gitignore

@@ -0,0 +1 @@
+/get_diff

diff --git a/zfs/tests/zfs-tests/cmd/get_diff/Makefile.am b/zfs/tests/zfs-tests/cmd/get_diff/Makefile.am
new file mode 100644
index 0000000..06c39dd
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/get_diff/Makefile.am

@@ -0,0 +1,6 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = get_diff
+get_diff_SOURCES = get_diff.c

diff --git a/zfs/tests/zfs-tests/cmd/get_diff/get_diff.c b/zfs/tests/zfs-tests/cmd/get_diff/get_diff.c
new file mode 100644
index 0000000..2799f46
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/get_diff/get_diff.c

@@ -0,0 +1,109 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+static void
+usage(char *msg, int exit_value)
+{
+	(void) fprintf(stderr, "get_diff file redacted_file\n");
+	(void) fprintf(stderr, "%s\n", msg);
+	exit(exit_value);
+}
+
+/*
+ * This utility compares two files, an original and its redacted counterpart
+ * (in that order). It compares the files 512 bytes at a time, printing out
+ * any ranges (as offset and length) where the redacted file does not match
+ * the original. This output is used to verify that the expected ranges of
+ * a redacted file do not contain the original data.
+ */
+int
+main(int argc, char *argv[])
+{
+	off_t		diff_off = 0, diff_len = 0, off = 0;
+	int		fd1, fd2;
+	char		*fname1, *fname2;
+	char		buf1[DEV_BSIZE], buf2[DEV_BSIZE];
+	ssize_t		bytes;
+
+	if (argc != 3)
+		usage("Incorrect number of arguments.", 1);
+
+	if ((fname1 = argv[1]) == NULL)
+		usage("Filename missing.", 1);
+	if ((fd1 = open(fname1, O_LARGEFILE | O_RDONLY)) < 0) {
+		perror("open1 failed");
+		exit(1);
+	}
+
+	if ((fname2 = argv[2]) == NULL)
+		usage("Redacted filename missing.", 1);
+	if ((fd2 = open(fname2, O_LARGEFILE | O_RDONLY)) < 0) {
+		perror("open2 failed");
+		exit(1);
+	}
+
+	while ((bytes = pread(fd1, buf1, DEV_BSIZE, off)) > 0) {
+		if (pread(fd2, buf2, DEV_BSIZE, off) < 0) {
+			if (errno == EIO) {
+				/*
+				 * A read in a redacted section of a file will
+				 * fail with EIO. If we get EIO, continue on
+				 * but ensure that a comparison of buf1 and
+				 * buf2 will fail, indicating a redacted block.
+				 */
+				buf2[0] = ~buf1[0];
+			} else {
+				perror("pread failed");
+				exit(1);
+			}
+		}
+		if (memcmp(buf1, buf2, bytes) == 0) {
+			if (diff_len != 0) {
+				(void) fprintf(stdout, "%lld,%lld\n",
+				    (long long)diff_off, (long long)diff_len);
+				assert(off == diff_off + diff_len);
+				diff_len = 0;
+			}
+			diff_off = 0;
+		} else {
+			if (diff_len == 0)
+				diff_off = off;
+			assert(off == diff_off + diff_len);
+			diff_len += bytes;
+		}
+		off += bytes;
+	}
+
+	if (diff_len != 0 && diff_len != 0) {
+		(void) fprintf(stdout, "%lld,%lld\n", (long long)diff_off,
+		    (long long)diff_len);
+	}
+
+	(void) close(fd1);
+	(void) close(fd2);
+
+	return (0);
+}

diff --git a/zfs/tests/zfs-tests/cmd/largest_file/largest_file.c b/zfs/tests/zfs-tests/cmd/largest_file/largest_file.c
index d1eceaf..00e1019 100644
--- a/zfs/tests/zfs-tests/cmd/largest_file/largest_file.c
+++ b/zfs/tests/zfs-tests/cmd/largest_file/largest_file.c

@@ -33,12 +33,9 @@
 #include <signal.h>
 #include <stdio.h>
 #include <string.h>
-#include <sys/types.h>
+#include <sys/stdtypes.h>
 #include <unistd.h>
 
-typedef long long	offset_t;
-#define	MAXOFFSET_T	LLONG_MAX
-
 /*
  * --------------------------------------------------------------
  *

diff --git a/zfs/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am b/zfs/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am
index b62a6bb..cd46220 100644
--- a/zfs/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am
+++ b/zfs/tests/zfs-tests/cmd/libzfs_input_check/Makefile.am

@@ -2,14 +2,16 @@
 
 pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 pkgexec_PROGRAMS = libzfs_input_check
 
+if BUILD_FREEBSD
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/freebsd/zfs
+endif
+if BUILD_LINUX
+DEFAULT_INCLUDES += -I$(top_srcdir)/include/os/linux/zfs
+endif
+
 libzfs_input_check_SOURCES = libzfs_input_check.c
 libzfs_input_check_LDADD = \
-	$(top_builddir)/lib/libspl/libspl.la \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzfs_core/libzfs_core.la
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la

diff --git a/zfs/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/zfs/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
index ef388ea..b671af7 100644
--- a/zfs/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
+++ b/zfs/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c

@@ -22,9 +22,12 @@
 #include <string.h>
 #include <strings.h>
 #include <libzfs_core.h>
+#include <libzutil.h>
 
 #include <sys/nvpair.h>
+#include <sys/vdev_impl.h>
 #include <sys/zfs_ioctl.h>
+#include <sys/zfs_bootenv.h>
 
 /*
  * Test the nvpair inputs for the non-legacy zfs ioctl commands.
@@ -99,10 +102,12 @@
 	ZFS_IOC_SPACE_WRITTEN,
 	ZFS_IOC_POOL_REGUID,
 	ZFS_IOC_SEND_PROGRESS,
-
 	ZFS_IOC_EVENTS_NEXT,
 	ZFS_IOC_EVENTS_CLEAR,
 	ZFS_IOC_EVENTS_SEEK,
+	ZFS_IOC_NEXTBOOT,
+	ZFS_IOC_JAIL,
+	ZFS_IOC_UNJAIL,
 };
 
 
@@ -154,7 +159,7 @@
 	zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
 	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)malloc(zc.zc_nvlist_dst_size);
 
-	if (ioctl(zfs_fd, ioc, &zc) != 0)
+	if (zfs_ioctl_fd(zfs_fd, ioc, &zc) != 0)
 		error = errno;
 
 	if (error != expected) {
@@ -272,13 +277,13 @@
 static void
 test_pool_reopen(const char *pool)
 {
-	nvlist_t *required = fnvlist_alloc();
+	nvlist_t *optional = fnvlist_alloc();
 
-	fnvlist_add_boolean_value(required, "scrub_restart", B_FALSE);
+	fnvlist_add_boolean_value(optional, "scrub_restart", B_FALSE);
 
-	IOC_INPUT_TEST(ZFS_IOC_POOL_REOPEN, pool, required, NULL, 0);
+	IOC_INPUT_TEST(ZFS_IOC_POOL_REOPEN, pool, NULL, optional, 0);
 
-	nvlist_free(required);
+	nvlist_free(optional);
 }
 
 static void
@@ -505,6 +510,7 @@
 	fnvlist_add_string(optional, "fromsnap", from);
 	fnvlist_add_uint64(optional, "resume_object", resumeobj);
 	fnvlist_add_uint64(optional, "resume_offset", offset);
+	fnvlist_add_boolean(optional, "savedok");
 #endif
 	IOC_INPUT_TEST(ZFS_IOC_SEND_NEW, snapshot, required, optional, 0);
 
@@ -552,7 +558,8 @@
 	fnvlist_add_boolean(optional, "resumable");
 	fnvlist_add_uint64(optional, "action_handle", *action_handle);
 #endif
-	IOC_INPUT_TEST(ZFS_IOC_RECV_NEW, dataset, required, optional, EBADE);
+	IOC_INPUT_TEST(ZFS_IOC_RECV_NEW, dataset, required, optional,
+	    ZFS_ERR_STREAM_TRUNCATED);
 
 	nvlist_free(props);
 	nvlist_free(optional);
@@ -685,12 +692,87 @@
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	zc.zc_name[sizeof (zc.zc_name) - 1] = '\0';
-	err = ioctl(zfs_fd, ZFS_IOC_DESTROY, &zc);
+	err = zfs_ioctl_fd(zfs_fd, ZFS_IOC_DESTROY, &zc);
 
 	return (err == 0 ? 0 : errno);
 }
 
 static void
+test_redact(const char *snapshot1, const char *snapshot2)
+{
+	nvlist_t *required = fnvlist_alloc();
+	nvlist_t *snapnv = fnvlist_alloc();
+	char bookmark[MAXNAMELEN + 32];
+
+	fnvlist_add_string(required, "bookname", "testbookmark");
+	fnvlist_add_boolean(snapnv, snapshot2);
+	fnvlist_add_nvlist(required, "snapnv", snapnv);
+
+	IOC_INPUT_TEST(ZFS_IOC_REDACT, snapshot1, required, NULL, 0);
+
+	nvlist_free(snapnv);
+	nvlist_free(required);
+
+	strlcpy(bookmark, snapshot1, sizeof (bookmark));
+	*strchr(bookmark, '@') = '\0';
+	strlcat(bookmark, "#testbookmark", sizeof (bookmark) -
+	    strlen(bookmark));
+	zfs_destroy(bookmark);
+}
+
+static void
+test_get_bookmark_props(const char *bookmark)
+{
+	IOC_INPUT_TEST(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, NULL, NULL, 0);
+}
+
+static void
+test_wait(const char *pool)
+{
+	nvlist_t *required = fnvlist_alloc();
+	nvlist_t *optional = fnvlist_alloc();
+
+	fnvlist_add_int32(required, "wait_activity", 2);
+	fnvlist_add_uint64(optional, "wait_tag", 0xdeadbeefdeadbeef);
+
+	IOC_INPUT_TEST(ZFS_IOC_WAIT, pool, required, optional, EINVAL);
+
+	nvlist_free(required);
+	nvlist_free(optional);
+}
+
+static void
+test_wait_fs(const char *dataset)
+{
+	nvlist_t *required = fnvlist_alloc();
+
+	fnvlist_add_int32(required, "wait_activity", 2);
+
+	IOC_INPUT_TEST(ZFS_IOC_WAIT_FS, dataset, required, NULL, EINVAL);
+
+	nvlist_free(required);
+}
+
+static void
+test_get_bootenv(const char *pool)
+{
+	IOC_INPUT_TEST(ZFS_IOC_GET_BOOTENV, pool, NULL, NULL, 0);
+}
+
+static void
+test_set_bootenv(const char *pool)
+{
+	nvlist_t *required = fnvlist_alloc();
+
+	fnvlist_add_uint64(required, "version", VB_RAW);
+	fnvlist_add_string(required, GRUB_ENVMAP, "test");
+
+	IOC_INPUT_TEST_WILD(ZFS_IOC_SET_BOOTENV, pool, required, NULL, 0);
+
+	nvlist_free(required);
+}
+
+static void
 zfs_ioc_input_tests(const char *pool)
 {
 	char filepath[] = "/tmp/ioc_test_file_XXXXXX";
@@ -700,6 +782,7 @@
 	char bookmark[ZFS_MAX_DATASET_NAME_LEN + 32];
 	char backup[ZFS_MAX_DATASET_NAME_LEN];
 	char clone[ZFS_MAX_DATASET_NAME_LEN];
+	char clonesnap[ZFS_MAX_DATASET_NAME_LEN + 32];
 	int tmpfd, err;
 
 	/*
@@ -710,9 +793,10 @@
 	(void) snprintf(snapshot, sizeof (snapshot), "%s@snapshot", dataset);
 	(void) snprintf(bookmark, sizeof (bookmark), "%s#bookmark", dataset);
 	(void) snprintf(clone, sizeof (clone), "%s/test-fs-clone", pool);
+	(void) snprintf(clonesnap, sizeof (clonesnap), "%s@snap", clone);
 	(void) snprintf(backup, sizeof (backup), "%s/backup", pool);
 
-	err = lzc_create(dataset, DMU_OST_ZFS, NULL, NULL, 0);
+	err = lzc_create(dataset, LZC_DATSET_TYPE_ZFS, NULL, NULL, -1);
 	if (err) {
 		(void) fprintf(stderr, "could not create '%s': %s\n",
 		    dataset, strerror(errno));
@@ -747,6 +831,7 @@
 
 	test_bookmark(pool, snapshot, bookmark);
 	test_get_bookmarks(dataset);
+	test_get_bookmark_props(bookmark);
 	test_destroy_bookmarks(pool, bookmark);
 
 	test_hold(pool, snapshot);
@@ -754,6 +839,9 @@
 	test_release(pool, snapshot);
 
 	test_clone(snapshot, clone);
+	test_snapshot(pool, clonesnap);
+	test_redact(snapshot, clonesnap);
+	zfs_destroy(clonesnap);
 	zfs_destroy(clone);
 
 	test_rollback(dataset, snapshot);
@@ -770,6 +858,12 @@
 	test_vdev_initialize(pool);
 	test_vdev_trim(pool);
 
+	test_wait(pool);
+	test_wait_fs(dataset);
+
+	test_set_bootenv(pool);
+	test_get_bootenv(pool);
+
 	/*
 	 * cleanup
 	 */
@@ -806,7 +900,7 @@
 		if (ioc_tested[cmd])
 			continue;
 
-		if (ioctl(zfs_fd, ioc, &zc) != 0 &&
+		if (zfs_ioctl_fd(zfs_fd, ioc, &zc) != 0 &&
 		    errno != ZFS_ERR_IOC_CMD_UNAVAIL) {
 			(void) fprintf(stderr, "cmd %d is missing a test case "
 			    "(%d)\n", cmd, errno);
@@ -815,16 +909,19 @@
 }
 
 enum zfs_ioc_ref {
+#ifdef __FreeBSD__
+	ZFS_IOC_BASE = 0,
+#else
 	ZFS_IOC_BASE = ('Z' << 8),
-	LINUX_IOC_BASE = ZFS_IOC_BASE + 0x80,
-	FREEBSD_IOC_BASE = ZFS_IOC_BASE + 0xC0,
+#endif
+	ZFS_IOC_PLATFORM_BASE = ZFS_IOC_BASE + 0x80,
 };
 
 /*
  * Canonical reference check of /dev/zfs ioctl numbers.
  * These cannot change and new ioctl numbers must be appended.
  */
-boolean_t
+static boolean_t
 validate_ioc_values(void)
 {
 	boolean_t result = B_TRUE;
@@ -917,9 +1014,18 @@
 	CHECK(ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT);
 	CHECK(ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE);
 	CHECK(ZFS_IOC_BASE + 80 == ZFS_IOC_POOL_TRIM);
-	CHECK(LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
-	CHECK(LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
-	CHECK(LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK);
+	CHECK(ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT);
+	CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS);
+	CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT);
+	CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 4 == ZFS_IOC_NEXTBOOT);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 5 == ZFS_IOC_JAIL);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 6 == ZFS_IOC_UNJAIL);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 7 == ZFS_IOC_SET_BOOTENV);
+	CHECK(ZFS_IOC_PLATFORM_BASE + 8 == ZFS_IOC_GET_BOOTENV);
 
 #undef CHECK
 

diff --git a/zfs/tests/zfs-tests/cmd/mkbusy/mkbusy.c b/zfs/tests/zfs-tests/cmd/mkbusy/mkbusy.c
index 9634904..a03076f 100644
--- a/zfs/tests/zfs-tests/cmd/mkbusy/mkbusy.c
+++ b/zfs/tests/zfs-tests/cmd/mkbusy/mkbusy.c

@@ -30,8 +30,6 @@
 #include <errno.h>
 #include <string.h>
 
-typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
-
 static void
 usage(char *progname)
 {
@@ -104,7 +102,7 @@
 
 		/*
 		 * The argument supplied doesn't exist. Copy the path, and
-		 * remove the trailing slash if presnt.
+		 * remove the trailing slash if present.
 		 */
 		if ((arg = strdup(argv[0])) == NULL)
 			fail("strdup", 1);

diff --git a/zfs/tests/zfs-tests/cmd/mkfile/Makefile.am b/zfs/tests/zfs-tests/cmd/mkfile/Makefile.am
index 016c671..5f0e2e0 100644
--- a/zfs/tests/zfs-tests/cmd/mkfile/Makefile.am
+++ b/zfs/tests/zfs-tests/cmd/mkfile/Makefile.am

@@ -4,3 +4,5 @@
 
 pkgexec_PROGRAMS = mkfile
 mkfile_SOURCES = mkfile.c
+
+mkfile_LDADD = $(LTLIBINTL)

diff --git a/zfs/tests/zfs-tests/cmd/mkfile/mkfile.c b/zfs/tests/zfs-tests/cmd/mkfile/mkfile.c
index 7ebf7bb..f741e84 100644
--- a/zfs/tests/zfs-tests/cmd/mkfile/mkfile.c
+++ b/zfs/tests/zfs-tests/cmd/mkfile/mkfile.c

@@ -34,18 +34,16 @@
 #include <string.h>
 #include <libintl.h>
 #include <errno.h>
+#include <sys/stdtypes.h>
+#include <sys/sysmacros.h>
 
-#define	MIN(a, b)	((a) < (b) ? (a) : (b))
-
-#define	BLOCK_SIZE	512		/* bytes */
+#define	BLOCKSIZE	512		/* bytes */
 #define	KILOBYTE	1024
 #define	MEGABYTE	(KILOBYTE * KILOBYTE)
 #define	GIGABYTE	(KILOBYTE * MEGABYTE)
 
 #define	FILE_MODE	(S_ISVTX + S_IRUSR + S_IWUSR)
 
-typedef long long	offset_t;
-
 static void usage(void);
 
 int
@@ -95,7 +93,7 @@
 			break;
 		case 'b':
 		case 'B':
-			mult = BLOCK_SIZE;
+			mult = BLOCKSIZE;
 			break;
 		case 'm':
 		case 'M':
@@ -141,8 +139,17 @@
 			argv++;
 			argc--;
 			continue;
-		}
-		if (lseek(fd, (off_t)size-1, SEEK_SET) < 0) {
+		} else if (fchown(fd, getuid(), getgid()) < 0) {
+			saverr = errno;
+			(void) fprintf(stderr, gettext(
+			    "Could not set owner/group of %s: %s\n"),
+			    argv[1], strerror(saverr));
+			(void) close(fd);
+			errors++;
+			argv++;
+			argc--;
+			continue;
+		} else if (lseek(fd, (off_t)size-1, SEEK_SET) < 0) {
 			saverr = errno;
 			(void) fprintf(stderr, gettext(
 			    "Could not seek to offset %ld in %s: %s\n"),
@@ -266,7 +273,7 @@
 	return (errors);
 }
 
-static void usage()
+static void usage(void)
 {
 	(void) fprintf(stderr, gettext(
 	    "Usage: mkfile [-nv] <size>[g|k|b|m] <name1> [<name2>] ...\n"));

diff --git a/zfs/tests/zfs-tests/cmd/mkfiles/mkfiles.c b/zfs/tests/zfs-tests/cmd/mkfiles/mkfiles.c
index 62dee16..32abfd0 100644
--- a/zfs/tests/zfs-tests/cmd/mkfiles/mkfiles.c
+++ b/zfs/tests/zfs-tests/cmd/mkfiles/mkfiles.c

@@ -55,6 +55,10 @@
 			(void) fprintf(stderr, "Failed to create %s %s\n", buf,
 			    strerror(errno));
 			return (-4);
+		} else if (fchown(fd, getuid(), getgid()) < 0) {
+			(void) fprintf(stderr, "Failed to chown %s %s\n", buf,
+			    strerror(errno));
+			return (-5);
 		}
 		(void) close(fd);
 	}

diff --git a/zfs/tests/zfs-tests/cmd/mktree/mktree.c b/zfs/tests/zfs-tests/cmd/mktree/mktree.c
index 02d4974..25b26c9 100644
--- a/zfs/tests/zfs-tests/cmd/mktree/mktree.c
+++ b/zfs/tests/zfs-tests/cmd/mktree/mktree.c

@@ -30,7 +30,9 @@
 #include <unistd.h>
 #include <string.h>
 #include <fcntl.h>
+#ifdef __linux__
 #include <sys/xattr.h>
+#endif
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/param.h>
@@ -176,11 +178,13 @@
 		exit(errno);
 	}
 
+#ifdef __linux__
 	if (fsetxattr(fd, "user.xattr", pbuf, 1024, 0) < 0) {
 		(void) fprintf(stderr, "fsetxattr(fd, \"xattr\", pbuf, "
 		    "1024, 0) failed.\n[%d]: %s.\n", errno, strerror(errno));
 		exit(errno);
 	}
+#endif
 
 	(void) close(fd);
 	free(pbuf);

diff --git a/zfs/tests/zfs-tests/cmd/mmap_libaio/Makefile.am b/zfs/tests/zfs-tests/cmd/mmap_libaio/Makefile.am
index 67d0f0e..25f9dda 100644
--- a/zfs/tests/zfs-tests/cmd/mmap_libaio/Makefile.am
+++ b/zfs/tests/zfs-tests/cmd/mmap_libaio/Makefile.am

@@ -5,5 +5,6 @@
 if WANT_MMAP_LIBAIO
 pkgexec_PROGRAMS = mmap_libaio
 mmap_libaio_SOURCES = mmap_libaio.c
-mmap_libaio_LDADD = $(LIBAIO)
+mmap_libaio_CFLAGS = $(AM_CFLAGS) $(LIBAIO_CFLAGS)
+mmap_libaio_LDADD = $(LIBAIO_LIBS)
 endif

diff --git a/zfs/tests/zfs-tests/cmd/mmap_seek/.gitignore b/zfs/tests/zfs-tests/cmd/mmap_seek/.gitignore
new file mode 100644
index 0000000..6b05a79
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/mmap_seek/.gitignore

@@ -0,0 +1 @@
+/mmap_seek

diff --git a/zfs/tests/zfs-tests/cmd/mmap_seek/Makefile.am b/zfs/tests/zfs-tests/cmd/mmap_seek/Makefile.am
new file mode 100644
index 0000000..b938931
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/mmap_seek/Makefile.am

@@ -0,0 +1,6 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = mmap_seek
+mmap_seek_SOURCES = mmap_seek.c

diff --git a/zfs/tests/zfs-tests/cmd/mmap_seek/mmap_seek.c b/zfs/tests/zfs-tests/cmd/mmap_seek/mmap_seek.c
new file mode 100644
index 0000000..bb36527
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/mmap_seek/mmap_seek.c

@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
+static void
+seek_data(int fd, off_t offset, off_t expected)
+{
+	off_t data_offset = lseek(fd, offset, SEEK_DATA);
+	if (data_offset != expected) {
+		fprintf(stderr, "lseek(fd, %d, SEEK_DATA) = %d (expected %d)\n",
+		    (int)offset, (int)data_offset, (int)expected);
+		exit(2);
+	}
+}
+
+static void
+seek_hole(int fd, off_t offset, off_t expected)
+{
+	off_t hole_offset = lseek(fd, offset, SEEK_HOLE);
+	if (hole_offset != expected) {
+		fprintf(stderr, "lseek(fd, %d, SEEK_HOLE) = %d (expected %d)\n",
+		    (int)offset, (int)hole_offset, (int)expected);
+		exit(2);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	char *execname = argv[0];
+	char *file_path = argv[1];
+	char *buf = NULL;
+	int err;
+
+	if (argc != 4) {
+		(void) printf("usage: %s <file name> <file size> "
+		    "<block size>\n", argv[0]);
+		exit(1);
+	}
+
+	int fd = open(file_path, O_RDWR | O_CREAT, 0666);
+	if (fd == -1) {
+		(void) fprintf(stderr, "%s: %s: ", execname, file_path);
+		perror("open");
+		exit(2);
+	}
+
+	off_t file_size = atoi(argv[2]);
+	off_t block_size = atoi(argv[3]);
+
+	if (block_size * 2 > file_size) {
+		(void) fprintf(stderr, "file size must be at least "
+		    "double the block size\n");
+		exit(2);
+	}
+
+	err = ftruncate(fd, file_size);
+	if (err == -1) {
+		perror("ftruncate");
+		exit(2);
+	}
+
+	if ((buf = mmap(NULL, file_size, PROT_READ | PROT_WRITE,
+	    MAP_SHARED, fd, 0)) == MAP_FAILED) {
+		perror("mmap");
+		exit(2);
+	}
+
+	/* Verify the file is sparse and reports no data. */
+	seek_data(fd, 0, -1);
+
+	/* Verify the file is reported as a hole. */
+	seek_hole(fd, 0, 0);
+
+	/* Verify search beyond end of file is an error. */
+	seek_data(fd, 2 * file_size, -1);
+	seek_hole(fd, 2 * file_size, -1);
+
+	/* Dirty the first byte. */
+	memset(buf, 'a', 1);
+	seek_data(fd, 0, 0);
+	seek_data(fd, block_size, -1);
+	seek_hole(fd, 0, block_size);
+	seek_hole(fd, block_size, block_size);
+
+	/* Dirty the first half of the file. */
+	memset(buf, 'b', file_size / 2);
+	seek_data(fd, 0, 0);
+	seek_data(fd, block_size, block_size);
+	seek_hole(fd, 0, P2ROUNDUP(file_size / 2, block_size));
+	seek_hole(fd, block_size, P2ROUNDUP(file_size / 2, block_size));
+
+	/* Dirty the whole file. */
+	memset(buf, 'c', file_size);
+	seek_data(fd, 0, 0);
+	seek_data(fd, file_size * 3 / 4,
+	    P2ROUNDUP(file_size * 3 / 4, block_size));
+	seek_hole(fd, 0, file_size);
+	seek_hole(fd, file_size / 2, file_size);
+
+	/* Punch a hole (required compression be enabled). */
+	memset(buf + block_size, 0, block_size);
+	seek_data(fd, 0, 0);
+	seek_data(fd, block_size, 2 * block_size);
+	seek_hole(fd, 0, block_size);
+	seek_hole(fd, block_size, block_size);
+	seek_hole(fd, 2 * block_size, file_size);
+
+	err = munmap(buf, file_size);
+	if (err == -1) {
+		perror("munmap");
+		exit(2);
+	}
+
+	close(fd);
+
+	return (0);
+}

diff --git a/zfs/tests/zfs-tests/cmd/mmap_sync/.gitignore b/zfs/tests/zfs-tests/cmd/mmap_sync/.gitignore
new file mode 100644
index 0000000..c721f47
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/mmap_sync/.gitignore

@@ -0,0 +1 @@
+/mmap_sync

diff --git a/zfs/tests/zfs-tests/cmd/mmap_sync/Makefile.am b/zfs/tests/zfs-tests/cmd/mmap_sync/Makefile.am
new file mode 100644
index 0000000..313e8db
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/mmap_sync/Makefile.am

@@ -0,0 +1,6 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = mmap_sync
+mmap_sync_SOURCES = mmap_sync.c

diff --git a/zfs/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c b/zfs/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c
new file mode 100644
index 0000000..226e71b
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c

@@ -0,0 +1,152 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+
+static void
+cleanup(char *file)
+{
+	(void) remove(file);
+}
+
+int
+main(int argc, char *argv[])
+{
+	char *testdir = getenv("TESTDIR");
+	if (!testdir) {
+		fprintf(stderr, "environment variable TESTDIR not set\n");
+		return (1);
+	}
+
+	struct stat st;
+	umask(0);
+	if (stat(testdir, &st) != 0 &&
+	    mkdir(testdir, 0777) != 0) {
+		perror("mkdir");
+		return (1);
+	}
+
+	if (argc > 3) {
+		fprintf(stderr, "usage: %s "
+		    "[run time in mins] "
+		    "[max msync time in ms]\n", argv[0]);
+		return (1);
+	}
+
+	int run_time_mins = 1;
+	if (argc >= 2) {
+		run_time_mins = atoi(argv[1]);
+	}
+
+	int max_msync_time_ms = 1000;
+	if (argc >= 3) {
+		max_msync_time_ms = atoi(argv[2]);
+	}
+
+	char filepath[512];
+	filepath[0] = '\0';
+	char *file = &filepath[0];
+
+	(void) snprintf(file, 512, "%s/msync_file", testdir);
+
+	const int LEN = 8;
+	cleanup(file);
+
+	int fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR |
+	    S_IRGRP | S_IROTH);
+
+	if (fd == -1) {
+		(void) fprintf(stderr, "%s: %s: ", argv[0], file);
+		perror("open");
+		return (1);
+	}
+
+	if (ftruncate(fd, LEN) != 0) {
+		perror("ftruncate");
+		cleanup(file);
+		return (1);
+	}
+
+	void *ptr = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+	if (ptr == MAP_FAILED) {
+		perror("mmap");
+		cleanup(file);
+		return (1);
+	}
+
+	struct timeval tstart;
+	gettimeofday(&tstart, NULL);
+
+	long long x = 0LL;
+
+	for (;;) {
+		*((long long *)ptr) = x;
+		x++;
+
+		struct timeval t1, t2;
+		gettimeofday(&t1, NULL);
+		if (msync(ptr, LEN, MS_SYNC|MS_INVALIDATE) != 0) {
+			perror("msync");
+			cleanup(file);
+			return (1);
+		}
+
+		gettimeofday(&t2, NULL);
+
+		double elapsed = (t2.tv_sec - t1.tv_sec) * 1000.0;
+		elapsed += ((t2.tv_usec - t1.tv_usec) / 1000.0);
+		if (elapsed > max_msync_time_ms) {
+			fprintf(stderr, "slow msync: %f ms\n", elapsed);
+			if (munmap(ptr, LEN) != 0)
+				perror("munmap");
+			cleanup(file);
+			return (1);
+		}
+
+		double elapsed_start = (t2.tv_sec - tstart.tv_sec) * 1000.0;
+		elapsed_start += ((t2.tv_usec - tstart.tv_usec) / 1000.0);
+		if (elapsed_start > run_time_mins * 60 * 1000) {
+			break;
+		}
+	}
+
+	if (munmap(ptr, LEN) != 0) {
+		perror("munmap");
+		cleanup(file);
+		return (1);
+	}
+
+	if (close(fd) != 0) {
+		perror("close");
+	}
+
+	cleanup(file);
+	return (0);
+}

diff --git a/zfs/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/zfs/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
index 458d6d8..438cba3 100644
--- a/zfs/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
+++ b/zfs/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c

@@ -42,8 +42,8 @@
  * 2. In the same process, context #2, mmap page fault (which means the mm_sem
  *    is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous
  *    txg "n" completed.
- * 3. context #1 call uiomove to write, however page fault is occurred in
- *    uiomove, which means it needs mm_sem, but mm_sem is hold by
+ * 3. context #1 call zfs_uiomove to write, however page fault is occurred in
+ *    zfs_uiomove, which means it needs mm_sem, but mm_sem is hold by
  *    context #2, so it stuck and can't complete, then txg "n" will not
  *    complete.
  *
@@ -52,6 +52,7 @@
  */
 
 #define	NORMAL_WRITE_TH_NUM	2
+#define	MAX_WRITE_BYTES	262144000
 
 static void *
 normal_writer(void *filename)
@@ -67,18 +68,29 @@
 	}
 
 	char *buf = malloc(1);
-	while (1) {
+	off_t bytes_written = 0;
+
+	while (bytes_written < MAX_WRITE_BYTES) {
 		write_num = write(fd, buf, 1);
 		if (write_num == 0) {
 			err(1, "write failed!");
 			break;
 		}
-		lseek(fd, page_size, SEEK_CUR);
+		if ((bytes_written = lseek(fd, page_size, SEEK_CUR)) == -1) {
+			err(1, "lseek failed on %s: %s", file_path,
+			    strerror(errno));
+			break;
+		}
 	}
 
 	if (buf) {
 		free(buf);
 	}
+
+	if (close(fd) != 0)
+		err(1, "failed to close file");
+
+	return (NULL);
 }
 
 static void *

diff --git a/zfs/tests/zfs-tests/cmd/nvlist_to_lua/Makefile.am b/zfs/tests/zfs-tests/cmd/nvlist_to_lua/Makefile.am
index f509a97..511b6c6 100644
--- a/zfs/tests/zfs-tests/cmd/nvlist_to_lua/Makefile.am
+++ b/zfs/tests/zfs-tests/cmd/nvlist_to_lua/Makefile.am

@@ -2,13 +2,9 @@
 
 pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 pkgexec_PROGRAMS = nvlist_to_lua
 
 nvlist_to_lua_SOURCES = nvlist_to_lua.c
 nvlist_to_lua_LDADD = \
-	$(top_builddir)/lib/libnvpair/libnvpair.la \
-	$(top_builddir)/lib/libzfs_core/libzfs_core.la
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la

diff --git a/zfs/tests/zfs-tests/cmd/send_doall/.gitignore b/zfs/tests/zfs-tests/cmd/send_doall/.gitignore
new file mode 100644
index 0000000..6ba2e60
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/send_doall/.gitignore

@@ -0,0 +1 @@
+/send_doall

diff --git a/zfs/tests/zfs-tests/cmd/send_doall/Makefile.am b/zfs/tests/zfs-tests/cmd/send_doall/Makefile.am
new file mode 100644
index 0000000..33a6b83
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/send_doall/Makefile.am

@@ -0,0 +1,11 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = send_doall
+
+send_doall_SOURCES = send_doall.c
+send_doall_LDADD = \
+	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la

diff --git a/zfs/tests/zfs-tests/cmd/send_doall/send_doall.c b/zfs/tests/zfs-tests/cmd/send_doall/send_doall.c
new file mode 100644
index 0000000..6f47df0
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/send_doall/send_doall.c

@@ -0,0 +1,87 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Portions Copyright 2020 iXsystems, Inc.
+ */
+
+/*
+ * Test a corner case : a "doall" send without children datasets.
+ */
+
+#include <libzfs.h>
+#include <libzfs_core.h>
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sysexits.h>
+#include <err.h>
+
+static void
+usage(const char *name)
+{
+	fprintf(stderr, "usage: %s snap\n", name);
+	exit(EX_USAGE);
+}
+
+int
+main(int argc, char const * const argv[])
+{
+	sendflags_t flags = { 0 };
+	libzfs_handle_t *zhdl;
+	zfs_handle_t *zhp;
+	const char *tofull, *fsname, *tosnap, *p;
+	int error;
+
+	if (argc != 2)
+		usage(argv[0]);
+
+	tofull = argv[1];
+
+	p = strchr(tofull, '@');
+	if (p == NULL)
+		usage(argv[0]);
+	tosnap = p + 1;
+
+	fsname = strndup(tofull, p - tofull);
+
+	zhdl = libzfs_init();
+	if (zhdl == NULL)
+		errx(EX_OSERR, "libzfs_init(): %s", libzfs_error_init(errno));
+
+	zhp = zfs_open(zhdl, fsname, ZFS_TYPE_FILESYSTEM);
+	if (zhp == NULL)
+		err(EX_OSERR, "zfs_open(\"%s\")", fsname);
+
+	flags.doall = B_TRUE;
+
+	error = zfs_send(zhp, NULL, tosnap, &flags,
+	    STDOUT_FILENO, NULL, NULL, NULL);
+
+	zfs_close(zhp);
+
+	libzfs_fini(zhdl);
+	free((void *)fsname);
+
+	return (error);
+}

diff --git a/zfs/tests/zfs-tests/cmd/stride_dd/.gitignore b/zfs/tests/zfs-tests/cmd/stride_dd/.gitignore
new file mode 100644
index 0000000..7c072ee
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/stride_dd/.gitignore

@@ -0,0 +1 @@
+/stride_dd

diff --git a/zfs/tests/zfs-tests/cmd/stride_dd/Makefile.am b/zfs/tests/zfs-tests/cmd/stride_dd/Makefile.am
new file mode 100644
index 0000000..d6f1adb
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/stride_dd/Makefile.am

@@ -0,0 +1,7 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
+
+pkgexec_PROGRAMS = stride_dd
+stride_dd_SOURCES = stride_dd.c
+stride_dd_LDADD = -lrt

diff --git a/zfs/tests/zfs-tests/cmd/stride_dd/stride_dd.c b/zfs/tests/zfs-tests/cmd/stride_dd/stride_dd.c
new file mode 100644
index 0000000..88bd532
--- /dev/null
+++ b/zfs/tests/zfs-tests/cmd/stride_dd/stride_dd.c

@@ -0,0 +1,214 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int bsize = 0;
+static int count = 0;
+static char *ifile = NULL;
+static char *ofile = NULL;
+static int stride = 0;
+static int seek = 0;
+static char *execname = "stride_dd";
+
+static void usage(void);
+static void parse_options(int argc, char *argv[]);
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr,
+	    "usage: %s -i inputfile -o outputfile -b blocksize -c count \n"
+	    "           -s stride [ -k seekblocks]\n"
+	    "\n"
+	    "Simplified version of dd that supports the stride option.\n"
+	    "A stride of n means that for each block written, n - 1 blocks\n"
+	    "are skipped in both the input and output file. A stride of 1\n"
+	    "means that blocks are read and written consecutively.\n"
+	    "All numeric parameters must be integers.\n"
+	    "\n"
+	    "    inputfile:  File to read from\n"
+	    "    outputfile: File to write to\n"
+	    "    blocksize:  Size of each block to read/write\n"
+	    "    count:      Number of blocks to read/write\n"
+	    "    stride:     Read/write a block then skip (stride - 1) blocks\n"
+	    "    seekblocks: Number of blocks to skip at start of output\n",
+	    execname);
+	(void) exit(1);
+}
+
+static void
+parse_options(int argc, char *argv[])
+{
+	int c;
+	int errflag = 0;
+
+	execname = argv[0];
+
+	extern char *optarg;
+	extern int optind, optopt;
+
+	while ((c = getopt(argc, argv, ":b:c:i:o:s:k:")) != -1) {
+		switch (c) {
+			case 'b':
+				bsize = atoi(optarg);
+				break;
+
+			case 'c':
+				count = atoi(optarg);
+				break;
+
+			case 'i':
+				ifile = optarg;
+				break;
+
+			case 'o':
+				ofile = optarg;
+				break;
+
+			case 's':
+				stride = atoi(optarg);
+				break;
+
+			case 'k':
+				seek = atoi(optarg);
+				break;
+
+			case ':':
+				(void) fprintf(stderr,
+				    "Option -%c requires an operand\n", optopt);
+				errflag++;
+				break;
+
+			case '?':
+			default:
+				(void) fprintf(stderr,
+				    "Unrecognized option: -%c\n", optopt);
+				errflag++;
+				break;
+		}
+
+		if (errflag) {
+			(void) usage();
+		}
+	}
+
+	if (bsize <= 0 || count <= 0 || stride <= 0 || ifile == NULL ||
+	    ofile == NULL || seek < 0) {
+		(void) fprintf(stderr,
+		    "Required parameter(s) missing or invalid.\n");
+		(void) usage();
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	int i;
+	int ifd;
+	int ofd;
+	void *buf;
+	int c;
+
+	parse_options(argc, argv);
+
+	ifd = open(ifile, O_RDONLY);
+	if (ifd == -1) {
+		(void) fprintf(stderr, "%s: %s: ", execname, ifile);
+		perror("open");
+		exit(2);
+	}
+
+	ofd = open(ofile, O_WRONLY | O_CREAT, 0666);
+	if (ofd == -1) {
+		(void) fprintf(stderr, "%s: %s: ", execname, ofile);
+		perror("open");
+		exit(2);
+	}
+
+	/*
+	 * We use valloc because some character block devices expect a
+	 * page-aligned buffer.
+	 */
+	int err = posix_memalign(&buf, 4096, bsize);
+	if (err != 0) {
+		(void) fprintf(stderr,
+		    "%s: %s\n", execname, strerror(err));
+		exit(2);
+	}
+
+	if (seek > 0) {
+		if (lseek(ofd, seek * bsize, SEEK_CUR) == -1) {
+			perror("output lseek");
+			exit(2);
+		}
+	}
+
+	for (i = 0; i < count; i++) {
+		c = read(ifd, buf, bsize);
+		if (c != bsize) {
+
+			perror("read");
+			exit(2);
+		}
+		if (c != bsize) {
+			if (c < 0) {
+				perror("read");
+			} else {
+				(void) fprintf(stderr,
+				    "%s: unexpected short read, read %d "
+				    "bytes, expected %d\n", execname,
+				    c, bsize);
+			}
+			exit(2);
+		}
+
+		c = write(ofd, buf, bsize);
+		if (c != bsize) {
+			if (c < 0) {
+				perror("write");
+			} else {
+				(void) fprintf(stderr,
+				    "%s: unexpected short write, wrote %d "
+				    "bytes, expected %d\n", execname,
+				    c, bsize);
+			}
+			exit(2);
+		}
+
+		if (stride > 1) {
+			if (lseek(ifd, (stride - 1) * bsize, SEEK_CUR) == -1) {
+				perror("input lseek");
+				exit(2);
+			}
+			if (lseek(ofd, (stride - 1) * bsize, SEEK_CUR) == -1) {
+				perror("output lseek");
+				exit(2);
+			}
+		}
+	}
+	free(buf);
+
+	(void) close(ofd);
+	(void) close(ifd);
+
+	return (0);
+}

diff --git a/zfs/tests/zfs-tests/cmd/xattrtest/xattrtest.c b/zfs/tests/zfs-tests/cmd/xattrtest/xattrtest.c
index 42c510e..8c4cb88 100644
--- a/zfs/tests/zfs-tests/cmd/xattrtest/xattrtest.c
+++ b/zfs/tests/zfs-tests/cmd/xattrtest/xattrtest.c

@@ -44,11 +44,9 @@
 #include <sys/time.h>
 #include <linux/limits.h>
 
-extern char *program_invocation_short_name;
-
 #define	ERROR(fmt, ...)                                                 \
-	fprintf(stderr, "%s: %s:%d: %s: " fmt "\n",                     \
-		program_invocation_short_name, __FILE__, __LINE__,      \
+	fprintf(stderr, "xattrtest: %s:%d: %s: " fmt "\n",              \
+		__FILE__, __LINE__,      				\
 		__func__, ## __VA_ARGS__);
 
 static const char shortopts[] = "hvycdn:f:x:s:p:t:e:rRko:";

diff --git a/zfs/tests/zfs-tests/include/Makefile.am b/zfs/tests/zfs-tests/include/Makefile.am
index 86c387c..16cdf2c 100644
--- a/zfs/tests/zfs-tests/include/Makefile.am
+++ b/zfs/tests/zfs-tests/include/Makefile.am

@@ -1,3 +1,5 @@
+include $(top_srcdir)/config/Substfiles.am
+
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/include
 dist_pkgdata_DATA = \
 	blkdev.shlib \
@@ -5,17 +7,8 @@
 	libtest.shlib \
 	math.shlib \
 	properties.shlib \
+	tunables.cfg \
 	zpool_script.shlib
 
-EXTRA_DIST = default.cfg.in
-
 nodist_pkgdata_DATA = default.cfg
-
-$(nodist_pkgdata_DATA): %: %.in
-	-$(SED) -e 's,@zfsexecdir\@,$(zfsexecdir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< >'$@'
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) default.cfg
+SUBSTFILES += $(nodist_pkgdata_DATA)

diff --git a/zfs/tests/zfs-tests/include/blkdev.shlib b/zfs/tests/zfs-tests/include/blkdev.shlib
index af33246..0ba52cf 100644
--- a/zfs/tests/zfs-tests/include/blkdev.shlib
+++ b/zfs/tests/zfs-tests/include/blkdev.shlib

@@ -73,13 +73,36 @@
 function block_device_wait
 {
 	if is_linux; then
-		udevadm trigger $*
-		typeset local start=$SECONDS
+		udevadm trigger $* 2>/dev/null
+		typeset start=$SECONDS
 		udevadm settle
-		typeset local elapsed=$((SECONDS - start))
+		typeset elapsed=$((SECONDS - start))
 		[[ $elapsed > 60 ]] && \
 		    log_note udevadm settle time too long: $elapsed
+	elif is_freebsd; then
+		if [[ ${#@} -eq 0 ]]; then
+			# Do something that has to go through the geom event
+			# queue to complete.
+			sysctl kern.geom.conftxt >/dev/null
+			return
+		fi
 	fi
+	# Poll for the given paths to appear, but give up eventually.
+	typeset -i i
+	for (( i = 0; i < 5; ++i )); do
+		typeset missing=false
+		typeset dev
+		for dev in "${@}"; do
+			if ! [[ -e $dev ]]; then
+				missing=true
+				break
+			fi
+		done
+		if ! $missing; then
+			break
+		fi
+		sleep ${#@}
+	done
 }
 
 #
@@ -87,16 +110,23 @@
 #
 function is_physical_device #device
 {
-	typeset device=${1#$DEV_DSKDIR}
-	device=${device#$DEV_RDSKDIR}
+	typeset device=${1#$DEV_DSKDIR/}
+	device=${device#$DEV_RDSKDIR/}
 
 	if is_linux; then
-		[[ -b "$DEV_DSKDIR/$device" ]] && \
-		[[ -f /sys/module/loop/parameters/max_part ]]
-		return $?
+		is_disk_device "$DEV_DSKDIR/$device" && \
+		[ -f /sys/module/loop/parameters/max_part ]
+	elif is_freebsd; then
+		is_disk_device "$DEV_DSKDIR/$device" && \
+		echo $device | grep -qE \
+		    -e '^a?da[0-9]+$' \
+		    -e '^md[0-9]+$' \
+		    -e '^mfid[0-9]+$' \
+		    -e '^nda[0-9]+$' \
+		    -e '^nvd[0-9]+$' \
+		    -e '^vtbd[0-9]+$'
 	else
-		echo $device | egrep "^c[0-F]+([td][0-F]+)+$" > /dev/null 2>&1
-		return $?
+		echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$"
 	fi
 }
 
@@ -110,8 +140,7 @@
 
 	if is_linux; then
 		lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
-		    egrep disk >/dev/null
-		return $?
+		    grep -q disk
 	fi
 }
 
@@ -125,16 +154,22 @@
 
 	if is_linux; then
 		lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
-		    egrep loop >/dev/null
-		return $?
+		    grep -q loop
 	fi
 }
 
 #
+# Linux:
 # Check if the given device is a multipath device and if there is a symbolic
 # link to a device mapper and to a disk
 # Currently no support for dm devices alone without multipath
 #
+# FreeBSD:
+# Check if the given device is a gmultipath device.
+#
+# Others:
+# No multipath detection.
+#
 function is_mpath_device #disk
 {
 	typeset disk=$1
@@ -142,13 +177,32 @@
 
 	if is_linux; then
 		lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
-		   egrep mpath >/dev/null
+		   grep -q mpath
 		if (($? == 0)); then
 			readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
 			return $?
 		else
 			return $?
 		fi
+	elif is_freebsd; then
+		is_disk_device $DEV_MPATHDIR/$disk
+	else
+		false
+	fi
+}
+
+#
+# Check if the given path is the appropriate sort of device special node.
+#
+function is_disk_device #path
+{
+	typeset path=$1
+
+	if is_freebsd; then
+		# FreeBSD doesn't have block devices, only character devices.
+		test -c $path
+	else
+		test -b $path
 	fi
 }
 
@@ -218,11 +272,11 @@
 {
 	typeset device=$1
 
-	if ! $(is_physical_device $device) ; then
+	if ! is_freebsd && ! is_physical_device $device; then
 		if [[ $device != "/" ]]; then
 			device=${device%/*}
 		fi
-		if [[ -b "$DEV_DSKDIR/$device" ]]; then
+		if is_disk_device "$DEV_DSKDIR/$device"; then
 			device="$DEV_DSKDIR"
 		fi
 		echo $device
@@ -242,12 +296,12 @@
 	if is_linux; then
 		if is_real_device $device; then
 			dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \
-			    | egrep disk/by-id | nawk '{print $2; exit}' \
+			    | grep -E "disk/by-id" | nawk '{print $2; exit}' \
 			    | nawk -F / '{print $3}')"
 			echo $dev_id
 		elif is_mpath_device $device; then
 			dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \
-			    | egrep disk/by-id/dm-uuid \
+			    | grep -E "disk/by-id/dm-uuid" \
 			    | nawk '{print $2; exit}' \
 			    | nawk -F / '{print $3}')"
 			echo $dev_id
@@ -279,40 +333,37 @@
 		if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
 			dm_name="$(readlink $DEV_DSKDIR/$disk \
 			    | nawk -F / '{print $2}')"
-			slave="$(ls /sys/block/${dm_name}/slaves \
+			dep="$(ls /sys/block/${dm_name}/slaves \
 			    | nawk '{print $1}')"
-			while [[ -n $slave ]]; do
+			while [[ -n $dep ]]; do
 				#check if disk is online
-				lsscsi | egrep $slave > /dev/null
+				lsscsi | grep -qF $dep
 				if (($? == 0)); then
-					slave_dir="/sys/block/${dm_name}"
-					slave_dir+="/slaves/${slave}/device"
-					ss="${slave_dir}/state"
-					sd="${slave_dir}/delete"
+					dep_dir="/sys/block/${dm_name}"
+					dep_dir+="/slaves/${dep}/device"
+					ss="${dep_dir}/state"
+					sd="${dep_dir}/delete"
 					log_must eval "echo 'offline' > ${ss}"
 					log_must eval "echo '1' > ${sd}"
-					lsscsi | egrep $slave > /dev/null
+					lsscsi | grep -qF $dep
 						if (($? == 0)); then
 							log_fail "Offlining" \
 							    "$disk failed"
 						fi
 				fi
-				slave="$(ls /sys/block/$dm_name/slaves \
+				dep="$(ls /sys/block/$dm_name/slaves \
 				    2>/dev/null | nawk '{print $1}')"
 			done
 		elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
 			#check if disk is online
-			lsscsi | egrep $disk > /dev/null
-			if (($? == 0)); then
+			if lsscsi | grep -qF $disk; then
 				dev_state="/sys/block/$disk/device/state"
 				dev_delete="/sys/block/$disk/device/delete"
 				log_must eval "echo 'offline' > ${dev_state}"
 				log_must eval "echo '1' > ${dev_delete}"
-				lsscsi | egrep $disk > /dev/null
-					if (($? == 0)); then
-						log_fail "Offlining $disk" \
-						    "failed"
-					fi
+				if lsscsi | grep -qF $disk; then
+					log_fail "Offlining $disk failed"
+				fi
 			else
 				log_note "$disk is already offline"
 			fi
@@ -323,16 +374,16 @@
 			if is_mpath_device $disk; then
 				dm_name="$(readlink $DEV_DSKDIR/$disk \
 				    | nawk -F / '{print $2}')"
-				slave="$(ls /sys/block/$dm_name/slaves \
+				dep="$(ls /sys/block/$dm_name/slaves \
 				    | nawk '{print $1}')"
-				lsscsi | egrep $slave > /dev/null
+				lsscsi | grep -qF $dep
 				if (($? != 0)); then
 					log_fail "Onlining $disk failed"
 				fi
 			elif is_real_device $disk; then
 				block_device_wait
 				typeset -i retries=0
-				while ! lsscsi | egrep -q $disk; do
+				while ! lsscsi | grep -qF $disk; do
 					if (( $retries > 2 )); then
 						log_fail "Onlining $disk failed"
 						break
@@ -408,16 +459,14 @@
 			log_unsupported "Platform does not have scsi_debug"
 			    "module"
 		fi
-		lsmod | egrep scsi_debug > /dev/null
-		if (($? == 0)); then
+		if lsmod | grep -q scsi_debug; then
 			log_fail "scsi_debug module already installed"
 		else
 			log_must modprobe scsi_debug dev_size_mb=$devsize \
 			    add_host=$hosts num_tgts=$tgts max_luns=$luns \
 			    sector_size=$sector physblk_exp=$blkexp
 			block_device_wait
-			lsscsi | egrep scsi_debug > /dev/null
-			if (($? == 1)); then
+			if ! lsscsi | grep -q scsi_debug; then
 				log_fail "scsi_debug module install failed"
 			fi
 		fi
@@ -459,7 +508,7 @@
 	typeset devdir=$2
 	typeset out=""
 
-	if is_linux; then
+	if is_linux || is_freebsd; then
 		out=$(zpool status -P $testpool |grep ${devdir} | awk '{print $1}')
 		out=$(echo $out | sed -e "s|${devdir}/||g" | tr '\n' ' ')
 	fi
@@ -485,26 +534,41 @@
 
 	typeset ds="$(zfs list -H -o name $input_file)"
 	typeset pool="${ds%%/*}"
-	typeset inum="$(stat -c '%i' $input_file)"
+	typeset objnum="$(get_objnum $input_file)"
 
 	#
 	# Establish a mapping between vdev ids as shown in a DVA and the
-	# pathnames they correspond to in ${VDEV_MAP[]}.
+	# pathnames they correspond to in ${VDEV_MAP[][]}.
+	#
+	# The vdev bits in a DVA refer to the top level vdev id.
+	# ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev.
 	#
 	eval $(zdb -C $pool | awk '
-		BEGIN {
-			printf("typeset VDEV_MAP\n");
-			looking = 0;
-		}
-		/^            children/ {
-			id = $1;
-			looking = 1;
-		}
-		/path: / && looking == 1 {
-			print id" "$2;
-			looking = 0;
-		}
-	' | sed -n 's/^children\[\([0-9]\)\]: \(.*\)$/VDEV_MAP[\1]=\2/p')
+	    BEGIN { printf "typeset -a VDEV_MAP;" }
+	    function subscript(s) {
+	        # "[#]" is more convenient than the bare "#"
+	        match(s, /\[[0-9]*\]/)
+		return substr(s, RSTART, RLENGTH)
+	    }
+	    id && !/^                / {
+	        # left a top level vdev
+	        id = 0
+	    }
+	    id && $1 ~ /^path:$/ {
+	        # found a vdev path; save it in the map
+	        printf "VDEV_MAP%s%s=%s;", id, child, $2
+	    }
+	    /^            children/ {
+	        # entering a top level vdev
+	        id = subscript($0)
+		child = "[0]" # default in case there is no nested vdev
+		printf "typeset -a VDEV_MAP%s;", id
+	    }
+	    /^                children/ {
+	        # entering a nested vdev (e.g. child of a top level mirror)
+	        child = subscript($0)
+	    }
+	')
 
 	#
 	# The awk below parses the output of zdb, printing out the level
@@ -512,22 +576,40 @@
 	# two are converted to decimal in the while loop. 4M is added to
 	# the offset to compensate for the first two labels and boot
 	# block. Lastly, the offset and length are printed in units of
-	# 512b blocks for ease of use with dd.
+	# 512B blocks for ease of use with dd.
 	#
+	typeset level vdev path offset length
+	if awk -n '' 2>/dev/null; then
+		# gawk needs -n to decode hex
+		AWK='awk -n'
+	else
+		AWK='awk'
+	fi
 	log_must zpool sync -f
-	typeset level path offset length
-	zdb -ddddd $ds $inum | awk -F: '
-		BEGIN { looking = 0 }
-		/^Indirect blocks:/ { looking = 1}
-		/^\t\tsegment / { looking = 0}
-		/L[0-8]/ && looking == 1 { print $0}
-	' | sed -n 's/^.*\(L[0-9]\) \([0-9]*\):\([0-9a-f]*\):\([0-9a-f]*\) .*$/\1 \2 \3 \4/p' | \
-	while read level path offset length; do
-		offset=$((16#$offset))  # Conversion from hex
-		length=$((16#$length))
-		offset="$(((offset + 4 * 1024 * 1024) / 512))"
-		length="$((length / 512))"
-		echo "$level ${VDEV_MAP[$path]} $offset $length"
+	zdb -dddddd $ds $objnum | $AWK -v pad=$((4<<20)) -v bs=512 '
+	    /^$/ { looking = 0 }
+	    looking {
+	        level = $2
+	        field = 3
+	        while (split($field, dva, ":") == 3) {
+	            # top level vdev id
+	            vdev = int(dva[1])
+	            # offset + 4M label/boot pad in 512B blocks
+	            offset = (int("0x"dva[2]) + pad) / bs
+		    # length in 512B blocks
+		    len = int("0x"dva[3]) / bs
+
+	            print level, vdev, offset, len
+
+	            ++field
+	        }
+	    }
+	    /^Indirect blocks:/ { looking = 1 }
+	' | \
+	while read level vdev offset length; do
+		for path in ${VDEV_MAP[$vdev][@]}; do
+			echo "$level $path $offset $length"
+		done
 	done 2>/dev/null
 }
 
@@ -539,14 +621,23 @@
 
 	[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
 
+	if is_freebsd; then
+		# Temporarily allow corrupting an inuse device.
+		debugflags=$(sysctl -n kern.geom.debugflags)
+		sysctl kern.geom.debugflags=16
+	fi
 
-	log_must list_file_blocks $input_file | \
-	    while read level path offset length; do
+	list_file_blocks $input_file | \
+	while read level path offset length; do
 		if [[ $level = $corrupt_level ]]; then
 			log_must dd if=/dev/urandom of=$path bs=512 \
 			    count=$length seek=$offset conv=notrunc
 		fi
-	    done
+	done
+
+	if is_freebsd; then
+		sysctl kern.geom.debugflags=$debugflags
+	fi
 
 	# This is necessary for pools made of loop devices.
 	sync

diff --git a/zfs/tests/zfs-tests/include/commands.cfg b/zfs/tests/zfs-tests/include/commands.cfg
index 4d98e7c..8ac38df 100644
--- a/zfs/tests/zfs-tests/include/commands.cfg
+++ b/zfs/tests/zfs-tests/include/commands.cfg

@@ -1,4 +1,5 @@
 #
+# Copyright (c) 2016, 2019 by Delphix. All rights reserved.
 # These variables are used by zfs-tests.sh to constrain which utilities
 # may be used by the suite. The suite will create a directory which is
 # the only element of $PATH and create symlinks from that dir to the
@@ -7,18 +8,14 @@
 # Please keep the contents of each variable sorted for ease of reading
 # and maintenance.
 #
-export SYSTEM_FILES='arp
+export SYSTEM_FILES_COMMON='arp
     awk
-    attr
     base64
     basename
     bc
-    blkid
-    blockdev
     bunzip2
     bzcat
     cat
-    chattr
     chgrp
     chmod
     chown
@@ -33,31 +30,21 @@
     diff
     dirname
     dmesg
-    dmidecode
     du
     echo
-    egrep
-    exportfs
+    env
     expr
-    fallocate
     false
-    fdisk
     file
     find
     fio
-    free
     getconf
     getent
     getfacl
-    getfattr
     grep
-    groupadd
-    groupdel
-    groupmod
     gunzip
     gzip
     head
-    hostid
     hostname
     id
     iostat
@@ -65,29 +52,17 @@
     ksh
     ln
     logname
-    losetup
     ls
-    lsattr
-    lsblk
-    lscpu
-    lsmod
-    lsscsi
-    md5sum
     mkdir
     mknod
-    mkswap
     mktemp
-    modprobe
     mount
-    mpstat
     mv
     net
-    nproc
     od
     openssl
-    parted
+    pamtester
     pax
-    perf
     pgrep
     ping
     pkill
@@ -95,22 +70,17 @@
     printf
     ps
     pwd
-    python
-    python2
     python3
     quotaon
     readlink
     rm
     rmdir
     scp
+    script
     sed
     seq
-    setenforce
     setfacl
-    setfattr
     sh
-    sha256sum
-    shuf
     sleep
     sort
     ssh
@@ -130,13 +100,10 @@
     tr
     true
     truncate
-    udevadm
     umask
     umount
     uname
-    useradd
-    userdel
-    usermod
+    uniq
     uuidgen
     vmstat
     wait
@@ -144,6 +111,66 @@
     which
     xargs'
 
+export SYSTEM_FILES_FREEBSD='chflags
+    compress
+    diskinfo
+    dumpon
+    fsck
+    getextattr
+    gpart
+    jail
+    jexec
+    jls
+    lsextattr
+    md5
+    mdconfig
+    mkfifo
+    newfs
+    pw
+    rmextattr
+    setextattr
+    sha256
+    showmount
+    swapctl
+    sysctl
+    uncompress'
+
+export SYSTEM_FILES_LINUX='attr
+    bash
+    blkid
+    blockdev
+    chattr
+    dmidecode
+    exportfs
+    fallocate
+    fdisk
+    free
+    getfattr
+    groupadd
+    groupdel
+    groupmod
+    hostid
+    losetup
+    lsattr
+    lsblk
+    lscpu
+    lsmod
+    lsscsi
+    md5sum
+    mkswap
+    modprobe
+    mpstat
+    nproc
+    parted
+    perf
+    setenforce
+    setfattr
+    sha256sum
+    udevadm
+    useradd
+    userdel
+    usermod'
+
 export ZFS_FILES='zdb
     zfs
     zhack
@@ -152,19 +179,25 @@
     ztest
     raidz_test
     arc_summary
-    arc_summary3
     arcstat
     dbufstat
+    mount.zfs
     zed
     zgenhostid
-    zstreamdump'
+    zstream
+    zfs_ids_to_path
+    zpool_influxdb'
 
-export ZFSTEST_FILES='chg_usr_exec
+export ZFSTEST_FILES='badsend
+    btree_test
+    chg_usr_exec
     devname2devid
     dir_rd_update
+    draid
     file_check
     file_trunc
     file_write
+    get_diff
     largest_file
     libzfs_input_check
     mkbusy
@@ -173,6 +206,8 @@
     mktree
     mmap_exec
     mmap_libaio
+    mmap_seek
+    mmap_sync
     mmapwrite
     nvlist_to_lua
     randfree_file
@@ -180,6 +215,8 @@
     readmmap
     rename_dir
     rm_lnkcnt_zero_file
+    send_doall
     threadsappend
     user_ns_exec
-    xattrtest'
+    xattrtest
+    stride_dd'

diff --git a/zfs/tests/zfs-tests/include/default.cfg.in b/zfs/tests/zfs-tests/include/default.cfg.in
index e1e2a7e..1a9cc5a 100644
--- a/zfs/tests/zfs-tests/include/default.cfg.in
+++ b/zfs/tests/zfs-tests/include/default.cfg.in

@@ -1,3 +1,5 @@
+#!/bin/sh
+
 #
 # CDDL HEADER START
 #
@@ -30,12 +32,12 @@
 #
 
 . $STF_SUITE/include/commands.cfg
-. $STF_SUITE/include/libtest.shlib
 
 # ZFS Directories
 export ZEDLET_ETC_DIR=${ZEDLET_ETC_DIR:-@sysconfdir@/zfs/zed.d}
 export ZEDLET_LIBEXEC_DIR=${ZEDLET_LIBEXEC_DIR:-@zfsexecdir@/zed.d}
 export ZPOOL_SCRIPT_DIR=${ZPOOL_SCRIPT_DIR:-@sysconfdir@/zfs/zpool.d}
+export ZPOOL_COMPAT_DIR=${ZPOOL_COMPAT_DIR:-@datadir@/zfs/compatibility.d}
 
 # Define run length constants
 export RT_LONG="3"
@@ -143,17 +145,6 @@
 # For iscsi target support
 export ISCSITGTFILE=/tmp/iscsitgt_file
 export ISCSITGT_FMRI=svc:/system/iscsitgt:default
-if ! is_linux; then
-export AUTO_SNAP=$(svcs -a | grep auto-snapshot | grep online | awk \
-    '{print $3}')
-fi
-
-#
-# finally, if we're running in a local zone
-# we take some additional actions
-if ! is_global_zone; then
-	reexport_pool
-fi
 
 export ZFS_VERSION=5
 export ZFS_ALL_VERSIONS="1 2 3 4 5"
@@ -164,7 +155,8 @@
 
 export MAX_PARTITIONS=8
 
-if is_linux; then
+case $(uname -o) in
+GNU/Linux)
 	unpack_opts="--sparse -xf"
 	pack_opts="--sparse -cf"
 	verbose=" -v"
@@ -173,6 +165,7 @@
 
 	ZVOL_DEVDIR="/dev/zvol"
 	ZVOL_RDEVDIR="/dev/zvol"
+	DEV_DSKDIR="/dev"
 	DEV_RDSKDIR="/dev"
 	DEV_MPATHDIR="/dev/mapper"
 
@@ -182,9 +175,34 @@
 	VDEVID_CONF="$ZEDLET_DIR/vdev_id.conf"
 	VDEVID_CONF_ETC="/etc/zfs/vdev_id.conf"
 
-
 	NEWFS_DEFAULT_FS="ext2"
-else
+	SLICE_PREFIX=""
+	;;
+FreeBSD)
+	unpack_opts="xv"
+	pack_opts="cf"
+	verbose="v"
+	unpack_preserve="xpf"
+	pack_preserve="cpf"
+
+	ZVOL_DEVDIR="/dev/zvol"
+	ZVOL_RDEVDIR="/dev/zvol"
+	DEV_DSKDIR="/dev"
+	DEV_RDSKDIR="/dev"
+	DEV_MPATHDIR="/dev/multipath"
+
+	NEWFS_DEFAULT_FS="ufs"
+	SLICE_PREFIX="p"
+	;;
+illumos)
+	export AUTO_SNAP=$(svcs -a | \
+	    awk '/auto-snapshot/ && /online/ { print $3 }')
+	# finally, if we're running in a local zone
+	# we take some additional actions
+	if [ "$(zonename 2>/dev/null)" != "global" ]; then
+		reexport_pool
+	fi
+
 	unpack_opts="xv"
 	pack_opts="cf"
 	verbose="v"
@@ -197,7 +215,10 @@
 	DEV_RDSKDIR="/dev/rdsk"
 
 	NEWFS_DEFAULT_FS="ufs"
-fi
+	SLICE_PREFIX="s"
+	;;
+esac
 export unpack_opts pack_opts verbose unpack_preserve pack_preserve \
-       ZVOL_DEVDIR ZVOL_RDEVDIR NEWFS_DEFAULT_FS DEV_RDSKDIR DEV_MPATHDIR \
-       ZEDLET_DIR ZED_LOG ZED_DEBUG_LOG VDEVID_CONF VDEVID_CONF_ETC
+       ZVOL_DEVDIR ZVOL_RDEVDIR DEV_DSKDIR DEV_RDSKDIR DEV_MPATHDIR \
+       ZEDLET_DIR ZED_LOG ZED_DEBUG_LOG VDEVID_CONF VDEVID_CONF_ETC \
+       NEWFS_DEFAULT_FS SLICE_PREFIX

diff --git a/zfs/tests/zfs-tests/include/libtest.shlib b/zfs/tests/zfs-tests/include/libtest.shlib
index cd593b6..d686f27 100644
--- a/zfs/tests/zfs-tests/include/libtest.shlib
+++ b/zfs/tests/zfs-tests/include/libtest.shlib

@@ -20,14 +20,14 @@
 #
 
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
-# Copyright (c) 2017 by Tim Chase. All rights reserved.
-# Copyright (c) 2017 by Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
-# Copyright (c) 2017 Datto Inc.
-# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+# Copyright (c) 2009, Sun Microsystems Inc. All rights reserved.
+# Copyright (c) 2012, 2020, Delphix. All rights reserved.
+# Copyright (c) 2017, Tim Chase. All rights reserved.
+# Copyright (c) 2017, Nexenta Systems Inc. All rights reserved.
+# Copyright (c) 2017, Lawrence Livermore National Security LLC.
+# Copyright (c) 2017, Datto Inc. All rights reserved.
+# Copyright (c) 2017, Open-E Inc. All rights reserved.
+# Copyright (c) 2021, The FreeBSD Foundation.
 # Use is subject to license terms.
 #
 
@@ -35,12 +35,14 @@
 . ${STF_SUITE}/include/math.shlib
 . ${STF_SUITE}/include/blkdev.shlib
 
+. ${STF_SUITE}/include/tunables.cfg
+
 #
 # Apply constrained path when available.  This is required since the
 # PATH may have been modified by sudo's secure_path behavior.
 #
 if [ -n "$STF_PATH" ]; then
-	PATH="$STF_PATH"
+	export PATH="$STF_PATH"
 fi
 
 #
@@ -93,6 +95,46 @@
 	fi
 }
 
+# Determine if this is an illumos test system
+#
+# Return 0 if platform illumos, 1 if otherwise
+function is_illumos
+{
+	if [[ $(uname -o) == "illumos" ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
+# Determine if this is a FreeBSD test system
+#
+# Return 0 if platform FreeBSD, 1 if otherwise
+
+function is_freebsd
+{
+	if [[ $(uname -o) == "FreeBSD" ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
+# Determine if this is a DilOS test system
+#
+# Return 0 if platform DilOS, 1 if otherwise
+
+function is_dilos
+{
+	typeset ID=""
+	[[ -f /etc/os-release ]] && . /etc/os-release
+	if [[ $ID == "dilos" ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
 # Determine if this is a 32-bit system
 #
 # Return 0 if platform is 32-bit, 1 if otherwise
@@ -145,17 +187,23 @@
 			fi
 		;;
 		ufs|nfs)
-			out=$(df -F $fstype $1 2>/dev/null)
-			ret=$?
-			(($ret != 0)) && return $ret
+			if is_freebsd; then
+				mount -pt $fstype | while read dev dir _t _flags; do
+					[[ "$1" == "$dev" || "$1" == "$dir" ]] && return 0
+				done
+			else
+				out=$(df -F $fstype $1 2>/dev/null)
+				ret=$?
+				(($ret != 0)) && return $ret
 
-			dir=${out%%\(*}
-			dir=${dir%% *}
-			name=${out##*\(}
-			name=${name%%\)*}
-			name=${name%% *}
+				dir=${out%%\(*}
+				dir=${dir%% *}
+				name=${out##*\(}
+				name=${name%%\)*}
+				name=${name%% *}
 
-			[[ "$1" == "$dir" || "$1" == "$name" ]] && return 0
+				[[ "$1" == "$dir" || "$1" == "$name" ]] && return 0
+			fi
 		;;
 		ext*)
 			out=$(df -t $fstype $1 2>/dev/null)
@@ -204,7 +252,7 @@
 
 function splitline
 {
-	echo $1 | sed "s/,/ /g"
+	echo $1 | tr ',' ' '
 }
 
 function default_setup
@@ -405,7 +453,8 @@
 	log_must eval "zfs send $snap | zfs recv -u $recvfs"
 	log_must mkfile 1m "$mountpoint/data"
 	log_must zfs snapshot $incr
-	log_must eval "zfs send -i $snap $incr | dd bs=10K count=1 > $sendfile"
+	log_must eval "zfs send -i $snap $incr | dd bs=10K count=1 \
+	    iflag=fullblock > $sendfile"
 	log_mustnot eval "zfs recv -su $recvfs < $sendfile"
 	destroy_dataset "$sendfs" "-r"
 	log_must rm -f "$sendfile"
@@ -564,8 +613,8 @@
 				then
 					destroy_pool $pool
 				fi
-				ALL_POOLS=$(get_all_pools)
 			done
+			ALL_POOLS=$(get_all_pools)
 		done
 
 		zfs mount -a
@@ -741,6 +790,18 @@
 }
 
 #
+# Return 0 if a hold exists; $? otherwise
+#
+# $1 - hold tag
+# $2 - snapshot name
+#
+function holdexists
+{
+	zfs holds "$2" | awk '{ print $2 }' | grep "$1" > /dev/null 2>&1
+	return $?
+}
+
+#
 # Set a property to a certain value on a dataset.
 # Sets a property of the dataset to the value as passed in.
 # @param:
@@ -834,7 +895,9 @@
 	typeset diskname=$1
 	typeset i
 
-	if is_linux; then
+	if is_freebsd; then
+		gpart destroy -F $diskname
+	elif is_linux; then
 		DSK=$DEV_DSKDIR/$diskname
 		DSK=$(echo $DSK | sed -e "s|//|/|g")
 		log_must parted $DSK -s -- mklabel gpt
@@ -867,13 +930,15 @@
 	typeset -i slicenum=$1
 	typeset start=$2
 	typeset size=$3
-	typeset disk=$4
+	typeset disk=${4#$DEV_DSKDIR/}
+	disk=${disk#$DEV_RDSKDIR/}
 
-	if is_linux; then
+	case "$(uname)" in
+	Linux)
 		if [[ -z $size || -z $disk ]]; then
 			log_fail "The size or disk name is unspecified."
 		fi
-		[[ -n $DEV_DSKDIR ]] && disk=$DEV_DSKDIR/$disk
+		disk=$DEV_DSKDIR/$disk
 		typeset size_mb=${size%%[mMgG]}
 
 		size_mb=${size_mb%%[mMgG][bB]}
@@ -916,7 +981,36 @@
 
 		blockdev --rereadpt $disk 2>/dev/null
 		block_device_wait $disk
-	else
+		;;
+	FreeBSD)
+		if [[ -z $size || -z $disk ]]; then
+			log_fail "The size or disk name is unspecified."
+		fi
+		disk=$DEV_DSKDIR/$disk
+
+		if [[ $slicenum -eq 0 ]] || ! gpart show $disk >/dev/null 2>&1; then
+			gpart destroy -F $disk >/dev/null 2>&1
+			gpart create -s GPT $disk
+			if [[ $? -ne 0 ]]; then
+				log_note "Failed to create GPT partition table on $disk"
+				return 1
+			fi
+		fi
+
+		typeset index=$((slicenum + 1))
+
+		if [[ -n $start ]]; then
+			start="-b $start"
+		fi
+		gpart add -t freebsd-zfs $start -s $size -i $index $disk
+		if [[ $ret_val -ne 0 ]]; then
+			log_note "Failed to create partition $slicenum on $disk"
+			return 1
+		fi
+
+		block_device_wait $disk
+		;;
+	*)
 		if [[ -z $slicenum || -z $size || -z $disk ]]; then
 			log_fail "The slice, size or disk name is unspecified."
 		fi
@@ -937,7 +1031,8 @@
 		format -e -s -d $disk -f $format_file
 		typeset ret_val=$?
 		rm -f $format_file
-	fi
+		;;
+	esac
 
 	if [[ $ret_val -ne 0 ]]; then
 		log_note "Unable to format $disk slice $slicenum to $size"
@@ -972,6 +1067,14 @@
 				fi
 			done
 		done
+	elif is_freebsd; then
+		for disk in $DISKSARRAY; do
+			if gpart destroy -F $disk; then
+				log_note "Partitions for ${disk} deleted"
+			else
+				log_fail "Partitions for ${disk} not deleted"
+			fi
+		done
 	fi
 }
 
@@ -986,13 +1089,20 @@
 		log_fail "The disk name or slice number is unspecified."
 	fi
 
-	if is_linux; then
+	case "$(uname)" in
+	Linux)
 		endcyl=$(parted -s $DEV_DSKDIR/$disk -- unit cyl print | \
-			grep "part${slice}" | \
-			awk '{print $3}' | \
-			sed 's,cyl,,')
+			awk "/part${slice}/"' {sub(/cyl/, "", $3); print $3}')
 		((endcyl = (endcyl + 1)))
-	else
+		;;
+	FreeBSD)
+		disk=${disk#/dev/zvol/}
+		disk=${disk%p*}
+		slice=$((slice + 1))
+		endcyl=$(gpart show $disk | \
+			awk -v slice=$slice '$3 == slice { print $1 + $2 }')
+		;;
+	*)
 		disk=${disk#/dev/dsk/}
 		disk=${disk#/dev/rdsk/}
 		disk=${disk%s*}
@@ -1010,7 +1120,8 @@
 		    nawk -v token="$slice" '{if ($1==token) print $6}')
 
 		((endcyl = (endcyl + 1) / ratio))
-	fi
+		;;
+	esac
 
 	echo $endcyl
 }
@@ -1063,7 +1174,7 @@
 #	data:	    the data that will be written
 #
 #	E.g.
-#	file_fs /testdir 20 25 1024 256 0
+#	fill_fs /testdir 20 25 1024 256 0
 #
 # Note: bytes * num_writes equals the size of the testfile
 #
@@ -1076,33 +1187,12 @@
 	typeset -i num_writes=${5:-10240}
 	typeset data=${6:-0}
 
-	typeset -i odirnum=1
-	typeset -i idirnum=0
-	typeset -i fn=0
-	typeset -i retval=0
-
-	mkdir -p $destdir/$idirnum
-	while (($odirnum > 0)); do
-		if ((dirnum >= 0 && idirnum >= dirnum)); then
-			odirnum=0
-			break
-		fi
-		file_write -o create -f $destdir/$idirnum/$TESTFILE.$fn \
-		    -b $bytes -c $num_writes -d $data
-		retval=$?
-		if (($retval != 0)); then
-			odirnum=0
-			break
-		fi
-		if (($fn >= $filenum)); then
-			fn=0
-			((idirnum = idirnum + 1))
-			mkdir -p $destdir/$idirnum
-		else
-			((fn = fn + 1))
-		fi
+	mkdir -p $destdir/{1..$dirnum}
+	for f in $destdir/{1..$dirnum}/$TESTFILE{1..$filenum}; do
+		file_write -o create -f $f -b $bytes -c $num_writes -d $data \
+		|| return $?
 	done
-	return $retval
+	return 0
 }
 
 #
@@ -1212,20 +1302,18 @@
 	return 0
 }
 
-function is_shared_impl
+function is_shared_freebsd
+{
+	typeset fs=$1
+
+	pgrep -q mountd && showmount -E | grep -qx $fs
+}
+
+function is_shared_illumos
 {
 	typeset fs=$1
 	typeset mtpt
 
-	if is_linux; then
-		for mtpt in `share | awk '{print $1}'` ; do
-			if [[ $mtpt == $fs ]] ; then
-				return 0
-			fi
-		done
-		return 1
-	fi
-
 	for mtpt in `share | awk '{print $2}'` ; do
 		if [[ $mtpt == $fs ]] ; then
 			return 0
@@ -1240,6 +1328,19 @@
 	return 1
 }
 
+function is_shared_linux
+{
+	typeset fs=$1
+	typeset mtpt
+
+	for mtpt in `share | awk '{print $1}'` ; do
+		if [[ $mtpt == $fs ]] ; then
+			return 0
+		fi
+	done
+	return 1
+}
+
 #
 # Given a mountpoint, or a dataset name, determine if it is shared via NFS.
 #
@@ -1264,7 +1365,85 @@
 		fi
 	fi
 
-	is_shared_impl "$fs"
+	case $(uname) in
+	FreeBSD)	is_shared_freebsd "$fs"	;;
+	Linux)		is_shared_linux "$fs"	;;
+	*)		is_shared_illumos "$fs"	;;
+	esac
+}
+
+function is_exported_illumos
+{
+	typeset fs=$1
+	typeset mtpt
+
+	for mtpt in `awk '{print $1}' /etc/dfs/sharetab` ; do
+		if [[ $mtpt == $fs ]] ; then
+			return 0
+		fi
+	done
+
+	return 1
+}
+
+function is_exported_freebsd
+{
+	typeset fs=$1
+	typeset mtpt
+
+	for mtpt in `awk '{print $1}' /etc/zfs/exports` ; do
+		if [[ $mtpt == $fs ]] ; then
+			return 0
+		fi
+	done
+
+	return 1
+}
+
+function is_exported_linux
+{
+	typeset fs=$1
+	typeset mtpt
+
+	for mtpt in `awk '{print $1}' /etc/exports.d/zfs.exports` ; do
+		if [[ $mtpt == $fs ]] ; then
+			return 0
+		fi
+	done
+
+	return 1
+}
+
+#
+# Given a mountpoint, or a dataset name, determine if it is exported via
+# the os-specific NFS exports file.
+#
+# Returns 0 if exported, 1 otherwise.
+#
+function is_exported
+{
+	typeset fs=$1
+	typeset mtpt
+
+	if [[ $fs != "/"* ]] ; then
+		if datasetnonexists "$fs" ; then
+			return 1
+		else
+			mtpt=$(get_prop mountpoint "$fs")
+			case $mtpt in
+				none|legacy|-) return 1
+					;;
+				*)	fs=$mtpt
+					;;
+			esac
+		fi
+	fi
+
+	case $(uname) in
+	FreeBSD)	is_exported_freebsd "$fs"	;;
+	Linux)		is_exported_linux "$fs"	;;
+	*)		is_exported_illumos "$fs"	;;
+	esac
 }
 
 #
@@ -1280,7 +1459,7 @@
 	if datasetnonexists "$fs" ; then
 		return 1
 	else
-		fs=$(echo $fs | sed 's@/@_@g')
+		fs=$(echo $fs | tr / _)
 	fi
 
 	if is_linux; then
@@ -1291,7 +1470,7 @@
 		done
 		return 1
 	else
-		log_unsupported "Currently unsupported by the test framework"
+		log_note "Currently unsupported by the test framework"
 		return 1
 	fi
 }
@@ -1339,7 +1518,7 @@
 
 	is_shared $fs || is_shared_smb $fs
 	if (($? == 0)); then
-		log_must zfs unshare $fs
+		zfs unshare $fs || log_fail "zfs unshare $fs failed"
 	fi
 
 	return 0
@@ -1417,6 +1596,21 @@
 	return 0
 }
 
+function check_nfs
+{
+	if is_linux; then
+		share -s
+	elif is_freebsd; then
+		showmount -e
+	else
+		log_unsupported "Unknown platform"
+	fi
+
+	if [[ $? -ne 0 ]]; then
+		log_unsupported "The NFS utilities are not installed"
+	fi
+}
+
 #
 # Check NFS server status and trigger it online.
 #
@@ -1438,6 +1632,11 @@
 
 		log_note "NFS server must be started prior to running ZTS."
 		return
+	elif is_freebsd; then
+		kill -s HUP $(cat /var/run/mountd.pid)
+
+		log_note "NFS server must be started prior to running ZTS."
+		return
 	fi
 
 	typeset nfs_fmri="svc:/network/nfs/server:default"
@@ -1485,7 +1684,7 @@
 #
 function is_global_zone
 {
-	if is_linux; then
+	if is_linux || is_freebsd; then
 		return 0
 	else
 		typeset cur_zone=$(zonename 2>/dev/null)
@@ -1923,7 +2122,12 @@
 	log_must zpool scrub $pool
 	log_must wait_scrubbed $pool
 
-	cksum=$(zpool status $pool | awk 'L{print $NF;L=0} /CKSUM$/{L=1}')
+	typeset -i cksum=$(zpool status $pool | awk '
+	    !NF { isvdev = 0 }
+	    isvdev { errors += $NF }
+	    /CKSUM$/ { isvdev = 1 }
+	    END { print errors }
+	')
 	if [[ $cksum != 0 ]]; then
 		log_must zpool status -v
 	        log_fail "Unexpected CKSUM errors found on $pool ($cksum)"
@@ -1939,7 +2143,7 @@
 
 	disklist=$(zpool iostat -v $1 | nawk '(NR >4) {print $1}' | \
 	    grep -v "\-\-\-\-\-" | \
-	    egrep -v -e "^(mirror|raidz[1-3]|spare|log|cache|special|dedup)$")
+	    grep -vEe "^(mirror|raidz[1-3]|draid[1-3]|spare|log|cache|special|dedup)|\-[0-9]$")
 
 	echo $disklist
 }
@@ -2054,7 +2258,7 @@
 #
 # Return 0 is pool/disk matches expected state, 1 otherwise
 #
-function check_vdev_state # pool disk state{online,offline,unavail}
+function check_vdev_state # pool disk state{online,offline,unavail,removed}
 {
 	typeset pool=$1
 	typeset disk=${2#*$DEV_DSKDIR/}
@@ -2111,25 +2315,26 @@
 	if [[ $verbose == true ]]; then
 		log_note $scan
 	fi
-	echo $scan | grep -i "$keyword" > /dev/null 2>&1
-
+	echo $scan | grep -qi "$keyword"
 	return $?
 }
 
 #
-# These 6 following functions are instance of check_pool_status()
-#	is_pool_resilvering - to check if the pool is resilver in progress
-#	is_pool_resilvered - to check if the pool is resilver completed
-#	is_pool_scrubbing - to check if the pool is scrub in progress
-#	is_pool_scrubbed - to check if the pool is scrub completed
-#	is_pool_scrub_stopped - to check if the pool is scrub stopped
-#	is_pool_scrub_paused - to check if the pool has scrub paused
-#	is_pool_removing - to check if the pool is removing a vdev
-#	is_pool_removed - to check if the pool is remove completed
+# The following functions are instance of check_pool_status()
+#	is_pool_resilvering - to check if the pool resilver is in progress
+#	is_pool_resilvered - to check if the pool resilver is completed
+#	is_pool_scrubbing - to check if the pool scrub is in progress
+#	is_pool_scrubbed - to check if the pool scrub is completed
+#	is_pool_scrub_stopped - to check if the pool scrub is stopped
+#	is_pool_scrub_paused - to check if the pool scrub has paused
+#	is_pool_removing - to check if the pool removing is a vdev
+#	is_pool_removed - to check if the pool remove is completed
+#	is_pool_discarding - to check if the pool checkpoint is being discarded
 #
 function is_pool_resilvering #pool <verbose>
 {
-	check_pool_status "$1" "scan" "resilver in progress since " $2
+	check_pool_status "$1" "scan" \
+	    "resilver[ ()0-9A-Za-z:_-]* in progress since" $2
 	return $?
 }
 
@@ -2175,6 +2380,12 @@
 	return $?
 }
 
+function is_pool_discarding #pool
+{
+	check_pool_status "$1" "checkpoint" "discarding"
+	return $?
+}
+
 function wait_for_degraded
 {
 	typeset pool=$1
@@ -2226,7 +2437,7 @@
 function find_disks
 {
 	# Trust provided list, no attempt is made to locate unused devices.
-	if is_linux; then
+	if is_linux || is_freebsd; then
 		echo "$@"
 		return
 	fi
@@ -2294,19 +2505,236 @@
 # each case. limit the number to max_finddisksnum
 	count=0
 	for disk in $unused_candidates; do
-		if [ -b $DEV_DSKDIR/${disk}s0 ]; then
-		if [ $count -lt $max_finddisksnum ]; then
+		if is_disk_device $DEV_DSKDIR/${disk}s0 && \
+		    [ $count -lt $max_finddisksnum ]; then
 			unused="$unused $disk"
 			# do not impose limit if $@ is provided
 			[[ -z $@ ]] && ((count = count + 1))
 		fi
-		fi
 	done
 
 # finally, return our disk list
 	echo $unused
 }
 
+function add_user_freebsd #<group_name> <user_name> <basedir>
+{
+	typeset group=$1
+	typeset user=$2
+	typeset basedir=$3
+
+	# Check to see if the user exists.
+	if id $user > /dev/null 2>&1; then
+		return 0
+	fi
+
+	# Assign 1000 as the base uid
+	typeset -i uid=1000
+	while true; do
+		typeset -i ret
+		pw useradd -u $uid -g $group -d $basedir/$user -m -n $user
+		ret=$?
+		case $ret in
+			0) break ;;
+			# The uid is not unique
+			65) ((uid += 1)) ;;
+			*) return 1 ;;
+		esac
+		if [[ $uid == 65000 ]]; then
+			log_fail "No user id available under 65000 for $user"
+		fi
+	done
+
+	# Silence MOTD
+	touch $basedir/$user/.hushlogin
+
+	return 0
+}
+
+#
+# Delete the specified user.
+#
+# $1 login name
+#
+function del_user_freebsd #<logname>
+{
+	typeset user=$1
+
+	if id $user > /dev/null 2>&1; then
+		log_must pw userdel $user
+	fi
+
+	return 0
+}
+
+#
+# Select valid gid and create specified group.
+#
+# $1 group name
+#
+function add_group_freebsd #<group_name>
+{
+	typeset group=$1
+
+	# See if the group already exists.
+	if pw groupshow $group >/dev/null 2>&1; then
+		return 0
+	fi
+
+	# Assign 1000 as the base gid
+	typeset -i gid=1000
+	while true; do
+		pw groupadd -g $gid -n $group > /dev/null 2>&1
+		typeset -i ret=$?
+		case $ret in
+			0) return 0 ;;
+			# The gid is not  unique
+			65) ((gid += 1)) ;;
+			*) return 1 ;;
+		esac
+		if [[ $gid == 65000 ]]; then
+			log_fail "No user id available under 65000 for $group"
+		fi
+	done
+}
+
+#
+# Delete the specified group.
+#
+# $1 group name
+#
+function del_group_freebsd #<group_name>
+{
+	typeset group=$1
+
+	pw groupdel -n $group > /dev/null 2>&1
+	typeset -i ret=$?
+	case $ret in
+		# Group does not exist, or was deleted successfully.
+		0|6|65) return 0 ;;
+		# Name already exists as a group name
+		9) log_must pw groupdel $group ;;
+		*) return 1 ;;
+	esac
+
+	return 0
+}
+
+function add_user_illumos #<group_name> <user_name> <basedir>
+{
+	typeset group=$1
+	typeset user=$2
+	typeset basedir=$3
+
+	log_must useradd -g $group -d $basedir/$user -m $user
+
+	return 0
+}
+
+function del_user_illumos #<user_name>
+{
+	typeset user=$1
+
+	if id $user > /dev/null 2>&1; then
+		log_must_retry "currently used" 6 userdel $user
+	fi
+
+	return 0
+}
+
+function add_group_illumos #<group_name>
+{
+	typeset group=$1
+
+	typeset -i gid=100
+	while true; do
+		groupadd -g $gid $group > /dev/null 2>&1
+		typeset -i ret=$?
+		case $ret in
+			0) return 0 ;;
+			# The gid is not  unique
+			4) ((gid += 1)) ;;
+			*) return 1 ;;
+		esac
+	done
+}
+
+function del_group_illumos #<group_name>
+{
+	typeset group=$1
+
+	groupmod -n $grp $grp > /dev/null 2>&1
+	typeset -i ret=$?
+	case $ret in
+		# Group does not exist.
+		6) return 0 ;;
+		# Name already exists as a group name
+		9) log_must groupdel $grp ;;
+		*) return 1 ;;
+	esac
+}
+
+function add_user_linux #<group_name> <user_name> <basedir>
+{
+	typeset group=$1
+	typeset user=$2
+	typeset basedir=$3
+
+	log_must useradd -g $group -d $basedir/$user -m $user
+
+	# Add new users to the same group and the command line utils.
+	# This allows them to be run out of the original users home
+	# directory as long as it permissioned to be group readable.
+	cmd_group=$(stat --format="%G" $(which zfs))
+	log_must usermod -a -G $cmd_group $user
+
+	return 0
+}
+
+function del_user_linux #<user_name>
+{
+	typeset user=$1
+
+	if id $user > /dev/null 2>&1; then
+		log_must_retry "currently used" 6 userdel $user
+	fi
+
+	return 0
+}
+
+function add_group_linux #<group_name>
+{
+	typeset group=$1
+
+	# Assign 100 as the base gid, a larger value is selected for
+	# Linux because for many distributions 1000 and under are reserved.
+	while true; do
+		groupadd $group > /dev/null 2>&1
+		typeset -i ret=$?
+		case $ret in
+			0) return 0 ;;
+			*) return 1 ;;
+		esac
+	done
+}
+
+function del_group_linux #<group_name>
+{
+	typeset group=$1
+
+	getent group $group > /dev/null 2>&1
+	typeset -i ret=$?
+	case $ret in
+		# Group does not exist.
+		2) return 0 ;;
+		# Name already exists as a group name
+		0) log_must groupdel $group ;;
+		*) return 1 ;;
+	esac
+
+	return 0
+}
+
 #
 # Add specified user to specified group
 #
@@ -2316,26 +2744,25 @@
 #
 function add_user #<group_name> <user_name> <basedir>
 {
-	typeset gname=$1
-	typeset uname=$2
+	typeset group=$1
+	typeset user=$2
 	typeset basedir=${3:-"/var/tmp"}
 
-	if ((${#gname} == 0 || ${#uname} == 0)); then
+	if ((${#group} == 0 || ${#user} == 0)); then
 		log_fail "group name or user name are not defined."
 	fi
 
-	log_must useradd -g $gname -d $basedir/$uname -m $uname
-	echo "export PATH=\"$STF_PATH\"" >>$basedir/$uname/.profile
-	echo "export PATH=\"$STF_PATH\"" >>$basedir/$uname/.bash_profile
-	echo "export PATH=\"$STF_PATH\"" >>$basedir/$uname/.login
-
-	# Add new users to the same group and the command line utils.
-	# This allows them to be run out of the original users home
-	# directory as long as it permissioned to be group readable.
-	if is_linux; then
-		cmd_group=$(stat --format="%G" $(which zfs))
-		log_must usermod -a -G $cmd_group $uname
-	fi
+	case $(uname) in
+	FreeBSD)
+		add_user_freebsd "$group" "$user" "$basedir"
+		;;
+	Linux)
+		add_user_linux "$group" "$user" "$basedir"
+		;;
+	*)
+		add_user_illumos "$group" "$user" "$basedir"
+		;;
+	esac
 
 	return 0
 }
@@ -2355,9 +2782,17 @@
 		log_fail "login name is necessary."
 	fi
 
-	if id $user > /dev/null 2>&1; then
-		log_must_retry "currently used" 6 userdel $user
-	fi
+	case $(uname) in
+	FreeBSD)
+		del_user_freebsd "$user"
+		;;
+	Linux)
+		del_user_linux "$user"
+		;;
+	*)
+		del_user_illumos "$user"
+		;;
+	esac
 
 	[[ -d $basedir/$user ]] && rm -fr $basedir/$user
 
@@ -2377,30 +2812,19 @@
 		log_fail "group name is necessary."
 	fi
 
-	# Assign 100 as the base gid, a larger value is selected for
-	# Linux because for many distributions 1000 and under are reserved.
-	if is_linux; then
-		while true; do
-			groupadd $group > /dev/null 2>&1
-			typeset -i ret=$?
-			case $ret in
-				0) return 0 ;;
-				*) return 1 ;;
-			esac
-		done
-	else
-		typeset -i gid=100
-		while true; do
-			groupadd -g $gid $group > /dev/null 2>&1
-			typeset -i ret=$?
-			case $ret in
-				0) return 0 ;;
-				# The gid is not  unique
-				4) ((gid += 1)) ;;
-				*) return 1 ;;
-			esac
-		done
-	fi
+	case $(uname) in
+	FreeBSD)
+		add_group_freebsd "$group"
+		;;
+	Linux)
+		add_group_linux "$group"
+		;;
+	*)
+		add_group_illumos "$group"
+		;;
+	esac
+
+	return 0
 }
 
 #
@@ -2410,32 +2834,23 @@
 #
 function del_group #<group_name>
 {
-	typeset grp=$1
-	if ((${#grp} == 0)); then
+	typeset group=$1
+
+	if ((${#group} == 0)); then
 		log_fail "group name is necessary."
 	fi
 
-	if is_linux; then
-		getent group $grp > /dev/null 2>&1
-		typeset -i ret=$?
-		case $ret in
-			# Group does not exist.
-			2) return 0 ;;
-			# Name already exists as a group name
-			0) log_must groupdel $grp ;;
-			*) return 1 ;;
-		esac
-	else
-		groupmod -n $grp $grp > /dev/null 2>&1
-		typeset -i ret=$?
-		case $ret in
-			# Group does not exist.
-			6) return 0 ;;
-			# Name already exists as a group name
-			9) log_must groupdel $grp ;;
-			*) return 1 ;;
-		esac
-	fi
+	case $(uname) in
+	FreeBSD)
+		del_group_freebsd "$group"
+		;;
+	Linux)
+		del_group_linux "$group"
+		;;
+	*)
+		del_group_illumos "$group"
+		;;
+	esac
 
 	return 0
 }
@@ -2512,29 +2927,6 @@
 }
 
 #
-# Get the available ZFS compression options
-# $1 option type zfs_set|zfs_compress
-#
-function get_compress_opts
-{
-	typeset COMPRESS_OPTS
-	typeset GZIP_OPTS="gzip gzip-1 gzip-2 gzip-3 gzip-4 gzip-5 \
-			gzip-6 gzip-7 gzip-8 gzip-9"
-
-	if [[ $1 == "zfs_compress" ]] ; then
-		COMPRESS_OPTS="on lzjb"
-	elif [[ $1 == "zfs_set" ]] ; then
-		COMPRESS_OPTS="on off lzjb"
-	fi
-	typeset valid_opts="$COMPRESS_OPTS"
-	zfs get 2>&1 | grep gzip >/dev/null 2>&1
-	if [[ $? -eq 0 ]]; then
-		valid_opts="$valid_opts $GZIP_OPTS"
-	fi
-	echo "$valid_opts"
-}
-
-#
 # Verify zfs operation with -p option work as expected
 # $1 operation, value could be create, clone or rename
 # $2 dataset type, value could be fs or vol
@@ -2805,7 +3197,7 @@
 	typeset label_file=/var/tmp/labelvtoc.$$
 	typeset arch=$(uname -p)
 
-	if is_linux; then
+	if is_linux || is_freebsd; then
 		log_note "Currently unsupported by the test framework"
 		return 1
 	fi
@@ -2863,7 +3255,9 @@
 {
 	typeset rootfs=""
 
-	if ! is_linux; then
+	if is_freebsd; then
+		rootfs=$(mount -p | awk '$2 == "/" && $3 == "zfs" {print $1}')
+	elif ! is_linux; then
 		rootfs=$(awk '{if ($2 == "/" && $3 == "zfs") print $1}' \
 			/etc/mnttab)
 	fi
@@ -2888,7 +3282,9 @@
 	typeset rootfs=""
 	typeset rootpool=""
 
-	if ! is_linux; then
+	if is_freebsd; then
+		rootfs=$(mount -p | awk '$2 == "/" && $3 == "zfs" {print $1}')
+	elif ! is_linux; then
 		rootfs=$(awk '{if ($2 == "/" && $3 =="zfs") print $1}' \
 			 /etc/mnttab)
 	fi
@@ -2897,24 +3293,13 @@
 	fi
 	zfs list $rootfs > /dev/null 2>&1
 	if (($? == 0)); then
-		rootpool=`echo $rootfs | awk -F\/ '{print $1}'`
-		echo $rootpool
+		echo ${rootfs%%/*}
 	else
 		log_fail "This is not a zfsroot system."
 	fi
 }
 
 #
-# Get the package name
-#
-function get_package_name
-{
-	typeset dirpath=${1:-$STC_NAME}
-
-	echo "SUNWstc-${dirpath}" | /usr/bin/sed -e "s/\//-/g"
-}
-
-#
 # Get the word numbers from a string separated by white space
 #
 function get_word_count
@@ -2976,6 +3361,8 @@
 {
 	if is_linux; then
 		(($(nproc) > 1))
+	elif is_freebsd; then
+		sysctl -n kern.smp.cpus
 	else
 		(($(psrinfo | wc -l) > 1))
 	fi
@@ -2987,6 +3374,8 @@
 {
 	if is_linux; then
 		lscpu | awk '/CPU MHz/ { print $3 }'
+	elif is_freebsd; then
+		sysctl -n hw.clockrate
 	else
 		psrinfo -v 0 | awk '/processor operates at/ {print $6}'
 	fi
@@ -2998,9 +3387,17 @@
 	typeset user=$1
 	shift
 
-	log_note "user:$user $@"
-	eval su - \$user -c \"$@\" > $TEST_BASE_DIR/out 2>$TEST_BASE_DIR/err
-	return $?
+	log_note "user: $user"
+	log_note "cmd: $*"
+
+	typeset out=$TEST_BASE_DIR/out
+	typeset err=$TEST_BASE_DIR/err
+
+	sudo -Eu $user env PATH="$PATH" ksh <<<"$*" >$out 2>$err
+	typeset res=$?
+	log_note "out: $(<$out)"
+	log_note "err: $(<$err)"
+	return $res
 }
 
 #
@@ -3045,7 +3442,7 @@
 	shift
 
 	for i in "$@"; do
-		max=$(echo $((max > i ? max : i)))
+		max=$((max > i ? max : i))
 	done
 
 	echo $max
@@ -3057,21 +3454,12 @@
 	shift
 
 	for i in "$@"; do
-		min=$(echo $((min < i ? min : i)))
+		min=$((min < i ? min : i))
 	done
 
 	echo $min
 }
 
-#
-# Generate a random number between 1 and the argument.
-#
-function random
-{
-        typeset max=$1
-        echo $(( ($RANDOM % $max) + 1 ))
-}
-
 # Write data that can be compressed into a directory
 function write_compressible
 {
@@ -3126,7 +3514,11 @@
 	typeset objnum
 
 	[[ -e $pathname ]] || log_fail "No such file or directory: $pathname"
-	objnum=$(stat -c %i $pathname)
+	if is_freebsd; then
+		objnum=$(stat -f "%i" $pathname)
+	else
+		objnum=$(stat -c %i $pathname)
+	fi
 	echo $objnum
 }
 
@@ -3183,17 +3575,12 @@
 # Wait for a pool to be scrubbed
 #
 # $1 pool name
-# $2 number of seconds to wait (optional)
-#
-# Returns true when pool has been scrubbed, or false if there's a timeout or if
-# no scrub was done.
 #
 function wait_scrubbed
 {
 	typeset pool=${1:-$TESTPOOL}
-	while true ; do
-		is_pool_scrubbed $pool && break
-		log_must sleep 1
+	while ! is_pool_scrubbed $pool ; do
+		sleep 1
 	done
 }
 
@@ -3219,7 +3606,7 @@
 function zed_setup
 {
 	if ! is_linux; then
-		return
+		log_unsupported "No zed on $(uname)"
 	fi
 
 	if [[ ! -d $ZEDLET_DIR ]]; then
@@ -3305,16 +3692,16 @@
 	# Verify the ZED is not already running.
 	pgrep -x zed > /dev/null
 	if (($? == 0)); then
-		log_fail "ZED already running"
+		log_note "ZED already running"
+	else
+		log_note "Starting ZED"
+		# run ZED in the background and redirect foreground logging
+		# output to $ZED_LOG.
+		log_must truncate -s 0 $ZED_DEBUG_LOG
+		log_must eval "zed -vF -d $ZEDLET_DIR -P $PATH" \
+		    "-s $ZEDLET_DIR/state -j 1 2>$ZED_LOG &"
 	fi
 
-	log_note "Starting ZED"
-	# run ZED in the background and redirect foreground logging
-	# output to $ZED_LOG.
-	log_must truncate -s 0 $ZED_DEBUG_LOG
-	log_must eval "zed -vF -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid -P $PATH" \
-	    "-s $ZEDLET_DIR/state 2>$ZED_LOG &"
-
 	return 0
 }
 
@@ -3328,14 +3715,13 @@
 	fi
 
 	log_note "Stopping ZED"
-	if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
-		zedpid=$(<${ZEDLET_DIR}/zed.pid)
-		kill $zedpid
-		while ps -p $zedpid > /dev/null; do
-			sleep 1
-		done
-		rm -f ${ZEDLET_DIR}/zed.pid
-	fi
+	while true; do
+		zedpids="$(pgrep -x zed)"
+		[ "$?" -ne 0 ] && break
+
+		log_must kill $zedpids
+		sleep 1
+	done
 	return 0
 }
 
@@ -3381,6 +3767,8 @@
 
 	if is_linux; then
 		swapon -s | grep -w $(readlink -f $device) > /dev/null 2>&1
+	elif is_freebsd; then
+		swapctl -l | grep -w $device
 	else
 		swap -l | grep -w $device > /dev/null 2>&1
 	fi
@@ -3398,6 +3786,8 @@
 	if is_linux; then
 		log_must eval "mkswap $swapdev > /dev/null 2>&1"
 		log_must swapon $swapdev
+	elif is_freebsd; then
+		log_must swapctl -a $swapdev
 	else
 	        log_must swap -a $swapdev
 	fi
@@ -3415,6 +3805,8 @@
 	if is_swap_inuse $swapdev; then
 		if is_linux; then
 			log_must swapoff $swapdev
+		elif is_freebsd; then
+			log_must swapoff $swapdev
 		else
 			log_must swap -d $swapdev
 		fi
@@ -3426,7 +3818,7 @@
 #
 # Set a global system tunable (64-bit value)
 #
-# $1 tunable name
+# $1 tunable name (use a NAME defined in tunables.cfg)
 # $2 tunable values
 #
 function set_tunable64
@@ -3437,7 +3829,7 @@
 #
 # Set a global system tunable (32-bit value)
 #
-# $1 tunable name
+# $1 tunable name (use a NAME defined in tunables.cfg)
 # $2 tunable values
 #
 function set_tunable32
@@ -3447,12 +3839,23 @@
 
 function set_tunable_impl
 {
-	typeset tunable="$1"
+	typeset name="$1"
 	typeset value="$2"
 	typeset mdb_cmd="$3"
 	typeset module="${4:-zfs}"
 
-	[[ -z "$tunable" ]] && return 1
+	eval "typeset tunable=\$$name"
+	case "$tunable" in
+	UNSUPPORTED)
+		log_unsupported "Tunable '$name' is unsupported on $(uname)"
+		;;
+	"")
+		log_fail "Tunable '$name' must be added to tunables.cfg"
+		;;
+	*)
+		;;
+	esac
+
 	[[ -z "$value" ]] && return 1
 	[[ -z "$mdb_cmd" ]] && return 1
 
@@ -3463,6 +3866,10 @@
 		cat >"$zfs_tunables/$tunable" <<<"$value"
 		return $?
 		;;
+	FreeBSD)
+		sysctl vfs.zfs.$tunable=$value
+		return "$?"
+		;;
 	SunOS)
 		[[ "$module" -eq "zfs" ]] || return 1
 		echo "${tunable}/${mdb_cmd}0t${value}" | mdb -kw
@@ -3474,7 +3881,7 @@
 #
 # Get a global system tunable
 #
-# $1 tunable name
+# $1 tunable name (use a NAME defined in tunables.cfg)
 #
 function get_tunable
 {
@@ -3483,10 +3890,20 @@
 
 function get_tunable_impl
 {
-	typeset tunable="$1"
+	typeset name="$1"
 	typeset module="${2:-zfs}"
 
-	[[ -z "$tunable" ]] && return 1
+	eval "typeset tunable=\$$name"
+	case "$tunable" in
+	UNSUPPORTED)
+		log_unsupported "Tunable '$name' is unsupported on $(uname)"
+		;;
+	"")
+		log_fail "Tunable '$name' must be added to tunables.cfg"
+		;;
+	*)
+		;;
+	esac
 
 	case "$(uname)" in
 	Linux)
@@ -3495,6 +3912,9 @@
 		cat $zfs_tunables/$tunable
 		return $?
 		;;
+	FreeBSD)
+		sysctl -n vfs.zfs.$tunable
+		;;
 	SunOS)
 		[[ "$module" -eq "zfs" ]] || return 1
 		;;
@@ -3572,7 +3992,14 @@
 {
 	typeset file=$1
 
-	md5sum -b $file | awk '{ print $1 }'
+	case $(uname) in
+	FreeBSD)
+		md5 -q $file
+		;;
+	*)
+		md5sum -b $file | awk '{ print $1 }'
+		;;
+	esac
 }
 
 #
@@ -3583,5 +4010,302 @@
 {
 	typeset file=$1
 
-	sha256sum -b $file | awk '{ print $1 }'
+	case $(uname) in
+	FreeBSD)
+		sha256 -q $file
+		;;
+	*)
+		sha256sum -b $file | awk '{ print $1 }'
+		;;
+	esac
+}
+
+function new_fs #<args>
+{
+	case $(uname) in
+	FreeBSD)
+		newfs "$@"
+		;;
+	*)
+		echo y | newfs -v "$@"
+		;;
+	esac
+}
+
+function stat_size #<path>
+{
+	typeset path=$1
+
+	case $(uname) in
+	FreeBSD)
+		stat -f %z "$path"
+		;;
+	*)
+		stat -c %s "$path"
+		;;
+	esac
+}
+
+function stat_ctime #<path>
+{
+	typeset path=$1
+
+	case $(uname) in
+	FreeBSD)
+		stat -f %c "$path"
+		;;
+	*)
+		stat -c %Z "$path"
+		;;
+	esac
+}
+
+function stat_crtime #<path>
+{
+	typeset path=$1
+
+	case $(uname) in
+	FreeBSD)
+		stat -f %B "$path"
+		;;
+	*)
+		stat -c %W "$path"
+		;;
+	esac
+}
+
+# Run a command as if it was being run in a TTY.
+#
+# Usage:
+#
+#    faketty command
+#
+function faketty
+{
+    if is_freebsd; then
+        script -q /dev/null env "$@"
+    else
+        script --return --quiet -c "$*" /dev/null
+    fi
+}
+
+#
+# Produce a random permutation of the integers in a given range (inclusive).
+#
+function range_shuffle # begin end
+{
+	typeset -i begin=$1
+	typeset -i end=$2
+
+	seq ${begin} ${end} | sort -R
+}
+
+#
+# Cross-platform xattr helpers
+#
+
+function get_xattr # name path
+{
+	typeset name=$1
+	typeset path=$2
+
+	case $(uname) in
+	FreeBSD)
+		getextattr -qq user "${name}" "${path}"
+		;;
+	*)
+		attr -qg "${name}" "${path}"
+		;;
+	esac
+}
+
+function set_xattr # name value path
+{
+	typeset name=$1
+	typeset value=$2
+	typeset path=$3
+
+	case $(uname) in
+	FreeBSD)
+		setextattr user "${name}" "${value}" "${path}"
+		;;
+	*)
+		attr -qs "${name}" -V "${value}" "${path}"
+		;;
+	esac
+}
+
+function set_xattr_stdin # name value
+{
+	typeset name=$1
+	typeset path=$2
+
+	case $(uname) in
+	FreeBSD)
+		setextattr -i user "${name}" "${path}"
+		;;
+	*)
+		attr -qs "${name}" "${path}"
+		;;
+	esac
+}
+
+function rm_xattr # name path
+{
+	typeset name=$1
+	typeset path=$2
+
+	case $(uname) in
+	FreeBSD)
+		rmextattr -q user "${name}" "${path}"
+		;;
+	*)
+		attr -qr "${name}" "${path}"
+		;;
+	esac
+}
+
+function ls_xattr # path
+{
+	typeset path=$1
+
+	case $(uname) in
+	FreeBSD)
+		lsextattr -qq user "${path}"
+		;;
+	*)
+		attr -ql "${path}"
+		;;
+	esac
+}
+
+function kstat # stat flags?
+{
+	typeset stat=$1
+	typeset flags=${2-"-n"}
+
+	case $(uname) in
+	FreeBSD)
+		sysctl $flags kstat.zfs.misc.$stat
+		;;
+	Linux)
+		typeset zfs_kstat="/proc/spl/kstat/zfs/$stat"
+		[[ -f "$zfs_kstat" ]] || return 1
+		cat $zfs_kstat
+		;;
+	*)
+		false
+		;;
+	esac
+}
+
+function get_arcstat # stat
+{
+	typeset stat=$1
+
+	case $(uname) in
+	FreeBSD)
+		kstat arcstats.$stat
+		;;
+	Linux)
+		kstat arcstats | awk "/$stat/ { print \$3 }"
+		;;
+	*)
+		false
+		;;
+	esac
+}
+
+function punch_hole # offset length file
+{
+	typeset offset=$1
+	typeset length=$2
+	typeset file=$3
+
+	case $(uname) in
+	FreeBSD)
+		truncate -d -o $offset -l $length "$file"
+		;;
+	Linux)
+		fallocate --punch-hole --offset $offset --length $length "$file"
+		;;
+	*)
+		false
+		;;
+	esac
+}
+
+function zero_range # offset length file
+{
+	typeset offset=$1
+	typeset length=$2
+	typeset file=$3
+
+	case "$UNAME" in
+	Linux)
+		fallocate --zero-range --offset $offset --length $length "$file"
+		;;
+	*)
+		false
+		;;
+	esac
+}
+
+#
+# Wait for the specified arcstat to reach non-zero quiescence.
+# If echo is 1 echo the value after reaching quiescence, otherwise
+# if echo is 0 print the arcstat we are waiting on.
+#
+function arcstat_quiescence # stat echo
+{
+	typeset stat=$1
+	typeset echo=$2
+	typeset do_once=true
+
+	if [[ $echo -eq 0 ]]; then
+		echo "Waiting for arcstat $1 quiescence."
+	fi
+
+	while $do_once || [ $stat1 -ne $stat2 ] || [ $stat2 -eq 0 ]; do
+		typeset stat1=$(get_arcstat $stat)
+		sleep 0.5
+		typeset stat2=$(get_arcstat $stat)
+		do_once=false
+	done
+
+	if [[ $echo -eq 1 ]]; then
+		echo $stat2
+	fi
+}
+
+function arcstat_quiescence_noecho # stat
+{
+	typeset stat=$1
+	arcstat_quiescence $stat 0
+}
+
+function arcstat_quiescence_echo # stat
+{
+	typeset stat=$1
+	arcstat_quiescence $stat 1
+}
+
+#
+# Given an array of pids, wait until all processes
+# have completed and check their return status.
+#
+function wait_for_children #children
+{
+	rv=0
+	children=("$@")
+	for child in "${children[@]}"
+	do
+		child_exit=0
+		wait ${child} || child_exit=$?
+		if [ $child_exit -ne 0 ]; then
+			echo "child ${child} failed with ${child_exit}"
+			rv=1
+		fi
+	done
+	return $rv
 }

diff --git a/zfs/tests/zfs-tests/include/math.shlib b/zfs/tests/zfs-tests/include/math.shlib
index 0c3508e..7ac59f2 100644
--- a/zfs/tests/zfs-tests/include/math.shlib
+++ b/zfs/tests/zfs-tests/include/math.shlib

@@ -30,14 +30,15 @@
 	typeset percent=$3
 
 	# Set $a or $b to $2 such that a >= b
-	[[ '1' = $(echo "if ($2 > $a) 1" | bc) ]] && a=$2 || b=$2
+	[[ '1' = $(echo "if ($2 > $a) 1 else 0" | bc) ]] && a=$2 || b=$2
 
 	# Prevent division by 0
 	[[ $a =~ [1-9] ]] || return 1
 
 	typeset p=$(echo "scale=2; $b * 100 / $a" | bc)
 	log_note "Comparing $a and $b given $percent% (calculated: $p%)"
-	[[ '1' = $(echo "scale=2; if ($p >= $percent) 1" | bc) ]] && return 0
+	[[ '1' = $(echo "scale=2; if ($p >= $percent) 1 else 0" | bc) ]] && \
+	    return 0
 
 	return 1
 }
@@ -119,3 +120,25 @@
 		log_fail "Compared $type should be not equal: $a == $b"
 	fi
 }
+
+# A simple function to get a random number between two bounds (inclusive)
+#
+# Probably not the most efficient for large ranges, but it's okay.
+#
+# Note since we're using $RANDOM, 32767 is the largest number we
+# can accept as the upper bound.
+#
+# $1 lower bound
+# $2 upper bound
+function random_int_between
+{
+	typeset -i min=$1
+	typeset -i max=$2
+	typeset -i rand=0
+
+	while [[ $rand -lt $min ]] ; do
+		rand=$(( $RANDOM % $max + 1))
+	done
+
+	echo $rand
+}

diff --git a/zfs/tests/zfs-tests/include/properties.shlib b/zfs/tests/zfs-tests/include/properties.shlib
index 25a9846..dcf3ab7 100644
--- a/zfs/tests/zfs-tests/include/properties.shlib
+++ b/zfs/tests/zfs-tests/include/properties.shlib

@@ -10,20 +10,25 @@
 #
 
 #
-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2016, Delphix. All rights reserved.
+# Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
 #
 
-typeset -a compress_prop_vals=('on' 'off' 'lzjb' 'gzip' 'gzip-1' 'gzip-2'
-    'gzip-3' 'gzip-4' 'gzip-5' 'gzip-6' 'gzip-7' 'gzip-8' 'gzip-9' 'zle' 'lz4')
+. $STF_SUITE/include/libtest.shlib
+
+typeset -a compress_prop_vals=('off' 'lzjb' 'lz4' 'gzip' 'zle' 'zstd')
 typeset -a checksum_prop_vals=('on' 'off' 'fletcher2' 'fletcher4' 'sha256'
-    'noparity' 'sha512' 'skein' 'edonr')
+    'noparity' 'sha512' 'skein')
+if ! is_freebsd; then
+	checksum_prop_vals+=('edonr')
+fi
 typeset -a recsize_prop_vals=('512' '1024' '2048' '4096' '8192' '16384'
     '32768' '65536' '131072' '262144' '524288' '1048576')
 typeset -a canmount_prop_vals=('on' 'off' 'noauto')
 typeset -a copies_prop_vals=('1' '2' '3')
 typeset -a logbias_prop_vals=('latency' 'throughput')
 typeset -a primarycache_prop_vals=('all' 'none' 'metadata')
-typeset -a redundant_metadata_prop_vals=('all' 'most')
+typeset -a redundant_metadata_prop_vals=('all' 'most' 'some' 'none')
 typeset -a secondarycache_prop_vals=('all' 'none' 'metadata')
 typeset -a snapdir_prop_vals=('hidden' 'visible')
 typeset -a sync_prop_vals=('standard' 'always' 'disabled')
@@ -35,64 +40,66 @@
     'secondarycache' 'redundant_metadata' 'sync')
 
 #
-# Given the property array passed in, return 'num_props' elements to the
-# user, excluding any elements below 'start.' This allows us to exclude
-# 'off' and 'on' which can be either unwanted, or a duplicate of another
-# property respectively.
+# Given the 'prop' passed in, return 'num_vals' elements of the corresponding
+# values array to the user, excluding any elements below 'first.' This allows
+# us to exclude 'off' and 'on' which can be either unwanted, or a duplicate of
+# another property respectively.
 #
-function get_rand_prop
+function get_rand_prop_vals
 {
-	typeset prop_array=($(eval echo \${$1[@]}))
-	typeset -i num_props=$2
-	typeset -i start=$3
+	typeset prop=$1
+	typeset -i num_vals=$2
+	typeset -i first=$3
+
+	[[ -z $prop || -z $num_vals || -z $first ]] && \
+	    log_fail "get_rand_prop_vals: bad arguments"
+
 	typeset retstr=""
 
-	[[ -z $prop_array || -z $num_props || -z $start ]] && \
-	    log_fail "get_rand_prop: bad arguments"
+	typeset prop_vals_var=${prop}_prop_vals
+	typeset -a prop_vals=($(eval echo \${${prop_vals_var}[@]}))
 
-	typeset prop_max=$((${#prop_array[@]} - 1))
+	[[ -z $prop_vals ]] && \
+	    log_fail "get_rand_prop_vals: bad prop $prop"
+
+	typeset -i last=$((${#prop_vals[@]} - 1))
 	typeset -i i
-	for i in $(shuf -i $start-$prop_max -n $num_props); do
-		retstr="${prop_array[$i]} $retstr"
+	for i in $(range_shuffle $first $last | head -n $num_vals); do
+		retstr="${prop_vals[$i]} $retstr"
 	done
 	echo $retstr
 }
 
-function get_rand_compress
-{
-	get_rand_prop compress_prop_vals $1 2
-}
-
-function get_rand_compress_any
-{
-	get_rand_prop compress_prop_vals $1 0
-}
-
 function get_rand_checksum
 {
-	get_rand_prop checksum_prop_vals $1 2
+	get_rand_prop_vals checksum $1 2
 }
 
 function get_rand_checksum_any
 {
-	get_rand_prop checksum_prop_vals $1 0
+	get_rand_prop_vals checksum $1 0
 }
 
 function get_rand_recsize
 {
-	get_rand_prop recsize_prop_vals $1 0
+	get_rand_prop_vals recsize $1 0
 }
 
 function get_rand_large_recsize
 {
-	get_rand_prop recsize_prop_vals $1 9
+	get_rand_prop_vals recsize $1 9
 }
 
 #
 # Functions to toggle on/off properties
 #
-typeset -a binary_props=('atime' 'devices' 'exec' 'readonly' 'setuid' 'xattr'
-    'zoned')
+typeset -a binary_props=('atime' 'devices' 'exec' 'readonly' 'setuid' 'xattr')
+
+if is_freebsd; then
+	binary_props+=('jailed')
+else
+	binary_props+=('zoned')
+fi
 
 if is_linux; then
 	# Only older kernels support non-blocking mandatory locks
@@ -143,7 +150,7 @@
 	fi
 
 	for prop in $proplist; do
-		typeset val=$(get_rand_prop "${prop}_prop_vals" 1 0)
+		typeset val=$(get_rand_prop_vals $prop 1 0)
 		log_must zfs set $prop=$val $ds
 	done
 }

diff --git a/zfs/tests/zfs-tests/include/tunables.cfg b/zfs/tests/zfs-tests/include/tunables.cfg
new file mode 100644
index 0000000..0fd2f48
--- /dev/null
+++ b/zfs/tests/zfs-tests/include/tunables.cfg

@@ -0,0 +1,98 @@
+# This file exports variables for each tunable used in the test suite.
+#
+# Different platforms use different names for most tunables. To avoid littering
+# the tests with conditional logic for deciding how to set each tunable, the
+# logic is instead consolidated to this one file.
+#
+# Any use of tunables in tests must use a name defined here. New entries
+# should be added to the table as needed. Please keep the table sorted
+# alphabetically for ease of maintenance.
+#
+# Platform-specific tunables should still use a NAME from this table for
+# consistency. Enter UNSUPPORTED in the column for platforms on which the
+# tunable is not implemented.
+
+UNAME=$(uname)
+
+# NAME				FreeBSD tunable			Linux tunable
+cat <<%%%% |
+ADMIN_SNAPSHOT			UNSUPPORTED			zfs_admin_snapshot
+ALLOW_REDACTED_DATASET_MOUNT	allow_redacted_dataset_mount	zfs_allow_redacted_dataset_mount
+ARC_MAX				arc.max				zfs_arc_max
+ARC_MIN				arc.min				zfs_arc_min
+ASYNC_BLOCK_MAX_BLOCKS		async_block_max_blocks		zfs_async_block_max_blocks
+CHECKSUM_EVENTS_PER_SECOND	checksum_events_per_second	zfs_checksum_events_per_second
+COMMIT_TIMEOUT_PCT		commit_timeout_pct		zfs_commit_timeout_pct
+COMPRESSED_ARC_ENABLED		compressed_arc_enabled		zfs_compressed_arc_enabled
+CONDENSE_INDIRECT_COMMIT_ENTRY_DELAY_MS	condense.indirect_commit_entry_delay_ms	zfs_condense_indirect_commit_entry_delay_ms
+CONDENSE_INDIRECT_OBSOLETE_PCT	condense.indirect_obsolete_pct	zfs_condense_indirect_obsolete_pct
+CONDENSE_MIN_MAPPING_BYTES	condense.min_mapping_bytes	zfs_condense_min_mapping_bytes
+DBUF_CACHE_SHIFT		dbuf.cache_shift		dbuf_cache_shift
+DEADMAN_CHECKTIME_MS		deadman.checktime_ms		zfs_deadman_checktime_ms
+DEADMAN_FAILMODE		deadman.failmode		zfs_deadman_failmode
+DEADMAN_SYNCTIME_MS		deadman.synctime_ms		zfs_deadman_synctime_ms
+DEADMAN_ZIOTIME_MS		deadman.ziotime_ms		zfs_deadman_ziotime_ms
+DISABLE_IVSET_GUID_CHECK	disable_ivset_guid_check	zfs_disable_ivset_guid_check
+DMU_OFFSET_NEXT_SYNC		dmu_offset_next_sync		zfs_dmu_offset_next_sync
+INITIALIZE_CHUNK_SIZE		initialize_chunk_size		zfs_initialize_chunk_size
+INITIALIZE_VALUE		initialize_value		zfs_initialize_value
+KEEP_LOG_SPACEMAPS_AT_EXPORT	keep_log_spacemaps_at_export	zfs_keep_log_spacemaps_at_export
+LUA_MAX_MEMLIMIT		lua.max_memlimit		zfs_lua_max_memlimit
+L2ARC_MFUONLY			l2arc.mfuonly			l2arc_mfuonly
+L2ARC_NOPREFETCH		l2arc.noprefetch		l2arc_noprefetch
+L2ARC_REBUILD_BLOCKS_MIN_L2SIZE	l2arc.rebuild_blocks_min_l2size	l2arc_rebuild_blocks_min_l2size
+L2ARC_REBUILD_ENABLED		l2arc.rebuild_enabled		l2arc_rebuild_enabled
+L2ARC_TRIM_AHEAD		l2arc.trim_ahead		l2arc_trim_ahead
+L2ARC_WRITE_BOOST		l2arc.write_boost		l2arc_write_boost
+L2ARC_WRITE_MAX			l2arc.write_max			l2arc_write_max
+LIVELIST_CONDENSE_NEW_ALLOC	livelist.condense.new_alloc	zfs_livelist_condense_new_alloc
+LIVELIST_CONDENSE_SYNC_CANCEL	livelist.condense.sync_cancel	zfs_livelist_condense_sync_cancel
+LIVELIST_CONDENSE_SYNC_PAUSE	livelist.condense.sync_pause	zfs_livelist_condense_sync_pause
+LIVELIST_CONDENSE_ZTHR_CANCEL	livelist.condense.zthr_cancel	zfs_livelist_condense_zthr_cancel
+LIVELIST_CONDENSE_ZTHR_PAUSE	livelist.condense.zthr_pause	zfs_livelist_condense_zthr_pause
+LIVELIST_MAX_ENTRIES		livelist.max_entries		zfs_livelist_max_entries
+LIVELIST_MIN_PERCENT_SHARED	livelist.min_percent_shared	zfs_livelist_min_percent_shared
+MAX_DATASET_NESTING		max_dataset_nesting		zfs_max_dataset_nesting
+MAX_MISSING_TVDS		max_missing_tvds		zfs_max_missing_tvds
+METASLAB_DEBUG_LOAD		metaslab.debug_load		metaslab_debug_load
+METASLAB_FORCE_GANGING		metaslab.force_ganging		metaslab_force_ganging
+MULTIHOST_FAIL_INTERVALS	multihost.fail_intervals	zfs_multihost_fail_intervals
+MULTIHOST_HISTORY		multihost.history		zfs_multihost_history
+MULTIHOST_IMPORT_INTERVALS	multihost.import_intervals	zfs_multihost_import_intervals
+MULTIHOST_INTERVAL		multihost.interval		zfs_multihost_interval
+OVERRIDE_ESTIMATE_RECORDSIZE	send.override_estimate_recordsize	zfs_override_estimate_recordsize
+PREFETCH_DISABLE		prefetch.disable		zfs_prefetch_disable
+REBUILD_SCRUB_ENABLED		rebuild_scrub_enabled		zfs_rebuild_scrub_enabled
+REMOVAL_SUSPEND_PROGRESS	removal_suspend_progress	zfs_removal_suspend_progress
+REMOVE_MAX_SEGMENT		remove_max_segment		zfs_remove_max_segment
+RESILVER_MIN_TIME_MS		resilver_min_time_ms		zfs_resilver_min_time_ms
+SCAN_LEGACY			scan_legacy			zfs_scan_legacy
+SCAN_SUSPEND_PROGRESS		scan_suspend_progress		zfs_scan_suspend_progress
+SCAN_VDEV_LIMIT			scan_vdev_limit			zfs_scan_vdev_limit
+SEND_HOLES_WITHOUT_BIRTH_TIME	send_holes_without_birth_time	send_holes_without_birth_time
+SLOW_IO_EVENTS_PER_SECOND	slow_io_events_per_second	zfs_slow_io_events_per_second
+SPA_ASIZE_INFLATION		spa.asize_inflation		spa_asize_inflation
+SPA_DISCARD_MEMORY_LIMIT	spa.discard_memory_limit	zfs_spa_discard_memory_limit
+SPA_LOAD_VERIFY_DATA		spa.load_verify_data		spa_load_verify_data
+SPA_LOAD_VERIFY_METADATA	spa.load_verify_metadata	spa_load_verify_metadata
+TRIM_EXTENT_BYTES_MIN		trim.extent_bytes_min		zfs_trim_extent_bytes_min
+TRIM_METASLAB_SKIP		trim.metaslab_skip		zfs_trim_metaslab_skip
+TRIM_TXG_BATCH			trim.txg_batch			zfs_trim_txg_batch
+TXG_HISTORY			txg.history			zfs_txg_history
+TXG_TIMEOUT			txg.timeout			zfs_txg_timeout
+UNLINK_SUSPEND_PROGRESS		UNSUPPORTED			zfs_unlink_suspend_progress
+VDEV_FILE_LOGICAL_ASHIFT	vdev.file.logical_ashift	vdev_file_logical_ashift
+VDEV_FILE_PHYSICAL_ASHIFT	vdev.file.physical_ashift	vdev_file_physical_ashift
+VDEV_MAX_AUTO_ASHIFT		vdev.max_auto_ashift		zfs_vdev_max_auto_ashift
+VDEV_MIN_MS_COUNT		vdev.min_ms_count		zfs_vdev_min_ms_count
+VDEV_VALIDATE_SKIP		vdev.validate_skip		vdev_validate_skip
+VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
+VOL_MODE			vol.mode			zvol_volmode
+VOL_RECURSIVE			vol.recursive			UNSUPPORTED
+ZEVENT_LEN_MAX			zevent.len_max			zfs_zevent_len_max
+ZEVENT_RETAIN_MAX		zevent.retain_max		zfs_zevent_retain_max
+ZIO_SLOW_IO_MS			zio.slow_io_ms			zio_slow_io_ms
+%%%%
+while read name FreeBSD Linux; do
+	eval "export ${name}=\$${UNAME}"
+done

diff --git a/zfs/tests/zfs-tests/tests/functional/Makefile.am b/zfs/tests/zfs-tests/tests/functional/Makefile.am
index a1fe06c..fd586ec 100644
--- a/zfs/tests/zfs-tests/tests/functional/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/Makefile.am

@@ -4,6 +4,7 @@
 	arc \
 	atime \
 	bootfs \
+	btree \
 	cache \
 	cachefile \
 	casenorm \
@@ -15,12 +16,14 @@
 	cli_user \
 	compression \
 	cp_files \
+	crtime \
 	ctime \
 	deadman \
 	delegate \
 	devices \
 	events \
 	exec \
+	fallocate \
 	fault \
 	features \
 	grow \
@@ -29,12 +32,13 @@
 	inheritance \
 	inuse \
 	io \
+	l2arc \
 	large_files \
 	largest_pool \
 	libzfs \
 	limits \
-	pyzfs \
 	link_count \
+	log_spacemap \
 	migration \
 	mmap \
 	mmp \
@@ -44,14 +48,17 @@
 	no_space \
 	nopwrite \
 	online_offline \
+	pam \
 	pool_checkpoint \
 	pool_names \
 	poolversion \
 	privilege \
 	procfs \
 	projectquota \
+	pyzfs \
 	quota \
 	raidz \
+	redacted_send \
 	redundancy \
 	refquota \
 	refreserv \
@@ -59,7 +66,6 @@
 	rename_dirs \
 	replacement \
 	reservation \
-	resilver \
 	rootpool \
 	rsend \
 	scrub_mirror \
@@ -69,7 +75,6 @@
 	sparse \
 	suid \
 	threadsappend \
-	tmpfile \
 	trim \
 	truncate \
 	upgrade \
@@ -78,4 +83,11 @@
 	vdev_zaps \
 	write_dirs \
 	xattr \
+	zpool_influxdb \
 	zvol
+
+if BUILD_LINUX
+SUBDIRS += \
+	simd \
+	tmpfile
+endif

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/Makefile.am b/zfs/tests/zfs-tests/tests/functional/acl/Makefile.am
index 6086930..d752f63 100644
--- a/zfs/tests/zfs-tests/tests/functional/acl/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/acl/Makefile.am

@@ -3,4 +3,4 @@
 	acl.cfg \
 	acl_common.kshlib
 
-SUBDIRS = posix
+SUBDIRS = off posix posix-sa

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/.gitignore b/zfs/tests/zfs-tests/tests/functional/acl/off/.gitignore
new file mode 100644
index 0000000..f3c9319
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/.gitignore

@@ -0,0 +1 @@
+/dosmode_readonly_write

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/Makefile.am b/zfs/tests/zfs-tests/tests/functional/acl/off/Makefile.am
new file mode 100644
index 0000000..36aa13d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/Makefile.am

@@ -0,0 +1,16 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/off
+
+dist_pkgdata_SCRIPTS = \
+	dosmode.ksh \
+	posixmode.ksh \
+	cleanup.ksh \
+	setup.ksh
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/off
+
+if BUILD_FREEBSD
+pkgexec_PROGRAMS = dosmode_readonly_write
+dosmode_readonly_write_SOURCES = dosmode_readonly_write.c
+endif

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/acl/off/cleanup.ksh
new file mode 100755
index 0000000..bb58a8c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/cleanup.ksh

@@ -0,0 +1,33 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+cleanup_user_group
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/dosmode.ksh b/zfs/tests/zfs-tests/tests/functional/acl/off/dosmode.ksh
new file mode 100755
index 0000000..e232dfd
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/dosmode.ksh

@@ -0,0 +1,199 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+#
+# DESCRIPTION:
+#	Verify that DOS mode flags function correctly.
+#
+#	These flags are not currently exposed on Linux, so the test is
+#	only useful on FreeBSD.
+#
+# STRATEGY:
+#	1. ARCHIVE
+#	2. HIDDEN
+#	3. OFFLINE
+#	4. READONLY
+#	5. REPARSE
+#	6. SPARSE
+#	7. SYSTEM
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	rm -f $testfile
+}
+
+function hasflag
+{
+	typeset flag=$1
+	typeset path=$2
+
+	ls -lo $path | awk '{ gsub(",", "\n", $5); print $5 }' | grep -qxF $flag
+}
+
+log_assert "Verify DOS mode flags function correctly"
+log_onexit cleanup
+
+tests_base=$STF_SUITE/tests/functional/acl/off
+testfile=$TESTDIR/testfile
+owner=$ZFS_ACL_STAFF1
+other=$ZFS_ACL_STAFF2
+
+#
+# ARCHIVE
+#
+# This flag is set by ZFS when a file has been updated to indicate that
+# the file needs to be archived.
+#
+log_must touch $testfile
+log_must hasflag uarch $testfile
+log_must chflags nouarch $testfile
+log_must hasflag - $testfile
+log_must touch $testfile
+log_must hasflag uarch $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must hasflag uarch $testfile
+log_must user_run $owner chflags nouarch $testfile
+log_mustnot user_run $other chflags uarch $testfile
+log_must hasflag - $testfile
+log_must user_run $owner touch $testfile
+log_mustnot user_run $other chflags nouarch $testfile
+log_must hasflag uarch $testfile
+log_must user_run $owner rm $testfile
+
+#
+# HIDDEN
+#
+log_must touch $testfile
+log_must chflags hidden $testfile
+log_must hasflag hidden $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must user_run $owner chflags hidden $testfile
+log_mustnot user_run $other chflags nohidden $testfile
+log_must hasflag hidden $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags hidden $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+
+#
+# OFFLINE
+#
+log_must touch $testfile
+log_must chflags offline $testfile
+log_must hasflag offline $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must user_run $owner chflags offline $testfile
+log_mustnot user_run $other chflags nooffline $testfile
+log_must hasflag offline $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags offline $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+#
+# READONLY
+#
+# This flag prevents users from writing or appending to the file,
+# but root is always allowed the operation.
+#
+log_must touch $testfile
+log_must chflags rdonly $testfile
+log_must hasflag rdonly $testfile
+log_must eval "echo 'root write allowed' >> $testfile"
+log_must cat $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $tesfile
+log_must rm $testfile
+# It is required to still be able to write to an fd that was opened RW before
+# READONLY is set.  We have a special test program for that.
+log_must user_run $owner touch $testfile
+log_mustnot user_run $other chflags rdonly $testfile
+log_must user_run $owner $tests_base/dosmode_readonly_write $testfile
+log_mustnot user_run $other chflags nordonly $testfile
+log_must hasflag rdonly $testfile
+log_mustnot user_run $owner "echo 'user write forbidden' >> $testfile"
+log_must eval "echo 'root write allowed' >> $testfile"
+# We are still allowed to read and remove the file when READONLY is set.
+log_must user_run $owner cat $testfile
+log_must user_run $owner rm $testfile
+
+#
+# REPARSE
+#
+# FIXME: does not work, not sure if broken or testing wrong
+#
+
+#
+# SPARSE
+#
+log_must truncate -s 1m $testfile
+log_must chflags sparse $testfile
+log_must hasflag sparse $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner truncate -s 1m $testfile
+log_must user_run $owner chflags sparse $testfile
+log_mustnot user_run $other chflags nosparse $testfile
+log_must hasflag sparse $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags sparse $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+#
+# SYSTEM
+#
+log_must touch $testfile
+log_must chflags system $testfile
+log_must hasflag system $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must user_run $owner chflags system $testfile
+log_mustnot user_run $other chflags nosystem $testfile
+log_must hasflag system $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags system $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+log_pass "DOS mode flags function correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/dosmode_readonly_write.c b/zfs/tests/zfs-tests/tests/functional/acl/off/dosmode_readonly_write.c
new file mode 100644
index 0000000..372c3f7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/dosmode_readonly_write.c

@@ -0,0 +1,61 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Test for correct behavior of DOS mode READONLY flag on a file.
+ * We should be able to open a file RW, set READONLY, and still write to the fd.
+ */
+
+#include <sys/stat.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int
+main(int argc, const char *argv[])
+{
+	const char *buf = "We should be allowed to write this to the fd.\n";
+	const char *path;
+	int fd;
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s PATH\n", argv[0]);
+		return (EXIT_FAILURE);
+	}
+	path = argv[1];
+	fd = open(path, O_CREAT|O_RDWR, 0777);
+	if (fd == -1)
+		err(EXIT_FAILURE, "%s: open failed", path);
+	if (chflags(path, UF_READONLY) == -1)
+		err(EXIT_FAILURE, "%s: chflags failed", path);
+	if (write(fd, buf, strlen(buf)) == -1)
+		err(EXIT_FAILURE, "%s: write failed", path);
+	if (close(fd) == -1)
+		err(EXIT_FAILURE, "%s: close failed", path);
+	return (EXIT_SUCCESS);
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/posixmode.ksh b/zfs/tests/zfs-tests/tests/functional/acl/off/posixmode.ksh
new file mode 100755
index 0000000..63870ca
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/posixmode.ksh

@@ -0,0 +1,145 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+#
+# DESCRIPTION:
+#	Verify that POSIX mode bits function correctly.
+#
+#	These tests are incomplete and will be added to over time.
+#
+#	NOTE: Creating directory entries behaves differently between platforms.
+#	The parent directory's group is used on FreeBSD, while the effective
+#	group is used on Linux.  We chown to the effective group when creating
+#	directories and files in these tests to achieve consistency across all
+#	platforms.
+#
+# STRATEGY:
+#	1. Sanity check the POSIX mode test on tmpfs
+#	2. Test POSIX mode bits on ZFS
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	umount -f $tmpdir
+	rm -rf $tmpdir $TESTDIR/dir
+}
+
+log_assert "Verify POSIX mode bits function correctly"
+log_onexit cleanup
+
+owner=$ZFS_ACL_STAFF1
+other=$ZFS_ACL_STAFF2
+group=$ZFS_ACL_STAFF_GROUP
+if is_linux; then
+	wheel=root
+else
+	wheel=wheel
+fi
+
+function test_posix_mode # base
+{
+	typeset base=$1
+	typeset dir=$base/dir
+	typeset file=$dir/file
+
+	# dir owned by root
+	log_must mkdir $dir
+	log_must chown :$wheel $dir
+	log_must chmod 007 $dir
+
+	# file owned by root
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_must ls -la $dir
+	log_must rm $file
+
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_must user_run $other rm $file
+
+	# file owned by user
+	log_must user_run $owner touch $file
+	log_must chown :$group $file
+	log_must ls -la $dir
+	log_must user_run $owner rm $file
+
+	log_must user_run $owner touch $file
+	log_must chown :$group $file
+	log_must user_run $other rm $file
+
+	log_must user_run $owner touch $file
+	log_must chown :$group $file
+	log_must rm $file
+
+	log_must rm -rf $dir
+
+	# dir owned by user
+	log_must user_run $owner mkdir $dir
+	log_must chown :$group $dir
+	log_must user_run $owner chmod 007 $dir
+
+	# file owned by root
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_must ls -la $dir
+	log_must rm $file
+
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_mustnot user_run $other rm $file
+	log_must rm $file
+
+	# file owned by user
+	log_mustnot user_run $owner touch $file
+	log_must touch $file
+	log_must chown $owner:$group $file
+	log_must ls -la $dir
+	log_mustnot user_run $owner rm $file
+	log_mustnot user_run $other rm $file
+	log_must rm $file
+
+	log_must rm -rf $dir
+}
+
+# Sanity check on tmpfs first
+tmpdir=$(TMPDIR=$TEST_BASE_DIR mktemp -d)
+log_must mount -t tmpfs tmp $tmpdir
+log_must chmod 777 $tmpdir
+
+test_posix_mode $tmpdir
+
+log_must umount $tmpdir
+log_must rmdir $tmpdir
+
+# Verify ZFS
+test_posix_mode $TESTDIR
+
+log_pass "POSIX mode bits function correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/off/setup.ksh b/zfs/tests/zfs-tests/tests/functional/acl/off/setup.ksh
new file mode 100755
index 0000000..9a5b598
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/off/setup.ksh

@@ -0,0 +1,44 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright (c) 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+DISK=${DISKS%% *}
+
+cleanup_user_group
+
+# Create staff group and add users to it
+log_must add_group $ZFS_ACL_STAFF_GROUP
+log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF1
+log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF2
+
+default_setup_noexit $DISK
+
+log_must zfs set acltype=off $TESTPOOL/$TESTFS
+log_must chmod 0777 $TESTDIR
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/Makefile.am b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/Makefile.am
new file mode 100644
index 0000000..31d1237
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/Makefile.am

@@ -0,0 +1,8 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/posix-sa
+dist_pkgdata_SCRIPTS = \
+	cleanup.ksh \
+	setup.ksh \
+	posix_001_pos.ksh \
+	posix_002_pos.ksh \
+	posix_003_pos.ksh \
+	posix_004_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/cleanup.ksh
new file mode 100755
index 0000000..bb58a8c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/cleanup.ksh

@@ -0,0 +1,33 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+cleanup_user_group
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_001_pos.ksh
new file mode 120000
index 0000000..e6467b3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_001_pos.ksh

@@ -0,0 +1 @@
+../posix/posix_001_pos.ksh
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_002_pos.ksh
new file mode 120000
index 0000000..10140d0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_002_pos.ksh

@@ -0,0 +1 @@
+../posix/posix_002_pos.ksh
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_003_pos.ksh
new file mode 120000
index 0000000..3f3db28
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_003_pos.ksh

@@ -0,0 +1 @@
+../posix/posix_003_pos.ksh
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_004_pos.ksh
new file mode 120000
index 0000000..2c2bab4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/posix_004_pos.ksh

@@ -0,0 +1 @@
+../posix/posix_004_pos.ksh
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/setup.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/setup.ksh
new file mode 100755
index 0000000..d8bf8a6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix-sa/setup.ksh

@@ -0,0 +1,52 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+log_must getfacl --version
+log_must setfacl --version
+
+cleanup_user_group
+
+# Create staff group and add user to it
+log_must add_group $ZFS_ACL_STAFF_GROUP
+log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF1
+
+DISK=${DISKS%% *}
+default_setup_noexit $DISK
+log_must chmod 777 $TESTDIR
+
+# Use POSIX ACLs on filesystem
+log_must zfs set acltype=posix $TESTPOOL/$TESTFS
+log_must zfs set xattr=sa $TESTPOOL/$TESTFS
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix/Makefile.am b/zfs/tests/zfs-tests/tests/functional/acl/posix/Makefile.am
index dcf2788..e63f631 100644
--- a/zfs/tests/zfs-tests/tests/functional/acl/posix/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix/Makefile.am

@@ -4,4 +4,5 @@
 	setup.ksh \
 	posix_001_pos.ksh \
 	posix_002_pos.ksh \
-	posix_003_pos.ksh
+	posix_003_pos.ksh \
+	posix_004_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh
index 66124fe..69ef0a7 100755
--- a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_001_pos.ksh

@@ -34,7 +34,7 @@
 
 #
 # DESCRIPTION:
-#	Verify that user can access file/directory if acltype=posixacl.
+#	Verify that user can access file/directory if acltype=posix.
 #
 # STRATEGY:
 #	1. Test access to file (mode=rw-)
@@ -50,14 +50,14 @@
 	rmdir $TESTDIR/dir.0
 }
 
-log_assert "Verify acltype=posixacl works on file"
+log_assert "Verify acltype=posix works on file"
 log_onexit cleanup
 
 # Test access to FILE
 log_note "Testing access to FILE"
 log_must touch $TESTDIR/file.0
 log_must setfacl -m g:$ZFS_ACL_STAFF_GROUP:rw $TESTDIR/file.0
-getfacl $TESTDIR/file.0 2> /dev/null | egrep -q \
+getfacl $TESTDIR/file.0 2> /dev/null | grep -q \
     "^group:$ZFS_ACL_STAFF_GROUP:rw-$"
 if [ "$?" -eq "0" ]; then
 	# Should be able to write to file
@@ -76,7 +76,7 @@
 		log_note "expected mask drwxrw----+ but found $msk"
 		log_fail "Expected permissions were not set."
 	fi
-	getfacl $TESTDIR/dir.0 2> /dev/null | egrep -q \
+	getfacl $TESTDIR/dir.0 2> /dev/null | grep -q \
 	    "^group:$ZFS_ACL_STAFF_GROUP:rw-$"
 	if [ "$?" -ne "0" ]; then
 		acl=$(getfacl $TESTDIR/dir.0 2> /dev/null)

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh
index 1aceffd..d8424a0 100755
--- a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_002_pos.ksh

@@ -34,7 +34,7 @@
 
 #
 # DESCRIPTION:
-#	Verify that user can access file/directory if acltype=posixacl.
+#	Verify that user can access file/directory if acltype=posix.
 #
 # STRATEGY:
 #	1. Test access to directory (mode=-wx)
@@ -43,7 +43,7 @@
 #
 
 verify_runnable "both"
-log_assert "Verify acltype=posixacl works on directory"
+log_assert "Verify acltype=posix works on directory"
 
 # Test access to DIRECTORY
 log_note "Testing access to DIRECTORY"
@@ -59,7 +59,7 @@
 	log_note "expected mask drwx-wx---+ but found $msk"
 	log_fail "Expected permissions were not set."
 fi
-getfacl $TESTDIR/dir.0 2> /dev/null | egrep -q \
+getfacl $TESTDIR/dir.0 2> /dev/null | grep -q \
     "^group:$ZFS_ACL_STAFF_GROUP:-wx$"
 if [ "$?" -eq "0" ]; then
 	# Should be able to create file in directory

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_003_pos.ksh
index dc6ef0d..bdd77bd 100755
--- a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_003_pos.ksh

@@ -25,7 +25,6 @@
 #
 # DESCRIPTION:
 #	Verify that ACLs survive remount.
-#	Regression test for https://github.com/zfsonlinux/zfs/issues/4520
 #
 # STRATEGY:
 #	1. Test presence of default and regular ACLs after remount
@@ -44,16 +43,14 @@
 log_must mkdir $ACLDIR
 log_must setfacl -m g:$ZFS_ACL_STAFF_GROUP:wx $ACLDIR
 log_must setfacl -d -m g:$ZFS_ACL_STAFF_GROUP:wx $ACLDIR
-getfacl $ACLDIR 2> /dev/null | egrep -q "$acl_str1"
-if [ "$?" -eq "0" ]; then
-	getfacl $ACLDIR 2> /dev/null | egrep -q "$acl_str2"
-fi
 
-if [ "$?" -eq "0" ]; then
+if getfacl $ACLDIR 2> /dev/null | grep -q "$acl_str1" &&
+	getfacl $ACLDIR 2> /dev/null | grep -q "$acl_str2"
+then
 	log_must zfs unmount $TESTPOOL/$TESTFS
 	log_must zfs mount $TESTPOOL/$TESTFS
-	log_must eval "getfacl $ACLDIR 2> /dev/null | egrep -q \"$acl_str1\""
-	log_must eval "getfacl $ACLDIR 2> /dev/null | egrep -q \"$acl_str2\""
+	log_must eval "getfacl $ACLDIR 2> /dev/null | grep -q \"$acl_str1\""
+	log_must eval "getfacl $ACLDIR 2> /dev/null | grep -q \"$acl_str2\""
 	log_pass "POSIX ACLs survive remount"
 else
 	log_fail "Group '$ZFS_ACL_STAFF_GROUP' does not have 'rwx'"

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh
new file mode 100755
index 0000000..8aa2cf4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix/posix_004_pos.ksh

@@ -0,0 +1,52 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2020 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+#
+# DESCRIPTION:
+#	Verify chown works with POSIX ACLs.
+#	Regression test for https://github.com/openzfs/zfs/issues/10043
+#
+# STRATEGY:
+#	1. Prepare an appropriate ACL on the test directory
+#	2. Change the owner of the directory
+#	3. Reset and set the ACLs for test directory owned by the user
+#
+
+verify_runnable "both"
+log_assert "Verify chown works with POSIX ACLs"
+
+log_must setfacl -d -m u:$ZFS_ACL_STAFF1:rwx $TESTDIR
+log_must setfacl -b $TESTDIR
+
+log_must chown $ZFS_ACL_STAFF1 $TESTDIR
+log_must setfacl -b $TESTDIR
+log_must setfacl -d -m u:$ZFS_ACL_STAFF1:rwx $TESTDIR
+log_must chown 0 $TESTDIR
+
+log_pass "chown works with POSIX ACLs"

diff --git a/zfs/tests/zfs-tests/tests/functional/acl/posix/setup.ksh b/zfs/tests/zfs-tests/tests/functional/acl/posix/setup.ksh
index 5d6d158..526c78e 100755
--- a/zfs/tests/zfs-tests/tests/functional/acl/posix/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/acl/posix/setup.ksh

@@ -46,7 +46,6 @@
 log_must chmod 777 $TESTDIR
 
 # Use POSIX ACLs on filesystem
-log_must zfs set acltype=posixacl $TESTPOOL/$TESTFS
-log_must zfs set xattr=sa $TESTPOOL/$TESTFS
+log_must zfs set acltype=posix $TESTPOOL/$TESTFS
 
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/Makefile.am b/zfs/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
index 7cffb2e..82fd9f3 100644
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/Makefile.am

@@ -14,7 +14,9 @@
 	alloc_class_010_pos.ksh \
 	alloc_class_011_neg.ksh \
 	alloc_class_012_pos.ksh \
-	alloc_class_013_pos.ksh
+	alloc_class_013_pos.ksh \
+	alloc_class_014_neg.ksh \
+	alloc_class_015_pos.ksh
 
 dist_pkgdata_DATA = \
 	alloc_class.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
index 4c64cff..e204f43 100644
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib

@@ -62,6 +62,7 @@
 	((ret |= $?))
 
 	kill -9 $pid
+	wait $pid 2> /dev/null
 
 	return $ret
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh
index 79ac936..2aeae65 100755
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_004_pos.ksh

@@ -53,7 +53,7 @@
 	    special $stype $sdisks
 
 	ac_value="$(zpool get -H -o property,value all | \
-	    egrep allocation_classes | nawk '{print $2}')"
+	    grep allocation_classes | nawk '{print $2}')"
 	if [ "$ac_value" = "active" ]; then
 		log_note "feature@allocation_classes is active"
 	else

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh
index 337114c..eeb86b5 100755
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_005_pos.ksh

@@ -42,7 +42,7 @@
 		log_must zpool create $TESTPOOL $type $ZPOOL_DISKS
 	fi
 	ac_value="$(zpool get -H -o property,value all | \
-	    egrep allocation_classes  | awk '{print $2}')"
+	    grep allocation_classes  | awk '{print $2}')"
 	if [ "$ac_value" = "enabled" ]; then
 		log_note "feature@allocation_classes is enabled"
 	else
@@ -57,7 +57,7 @@
 		    $CLASS_DISK0 $CLASS_DISK1
 	fi
 	ac_value="$(zpool get -H -o property,value all | \
-	    egrep allocation_classes | awk '{print $2}')"
+	    grep allocation_classes | awk '{print $2}')"
 	if [ "$ac_value" = "active" ]; then
 		log_note "feature@allocation_classes is active"
 	else

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh
index 7c1d6e1..e8061fd 100755
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_009_pos.ksh

@@ -13,7 +13,7 @@
 
 #
 # Copyright (c) 2017, Intel Corporation.
-# Copyright (c) 2018 by Delphix. All rights reserved.
+# Copyright (c) 2018, 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
@@ -33,22 +33,33 @@
 
 typeset stype=""
 typeset sdisks=""
+typeset props=""
 
 for type in "" "mirror" "raidz"
 do
 	if [ "$type" = "mirror" ]; then
 		stype="mirror"
 		sdisks="${CLASS_DISK0} ${CLASS_DISK1} ${CLASS_DISK2}"
+		props="-o ashift=12"
 	elif [ "$type" = "raidz" ]; then
 		stype="mirror"
 		sdisks="${CLASS_DISK0} ${CLASS_DISK1}"
 	else
 		stype=""
-		special_args="${CLASS_DISK0}"
+		sdisks="${CLASS_DISK0}"
 	fi
 
-	log_must zpool create $TESTPOOL $type $ZPOOL_DISKS \
-	    special $stype $sdisks
+	#
+	# 1/3 of the time add the special vdev after creating the pool
+	#
+	if [ $((RANDOM % 3)) -eq 0 ]; then
+		log_must zpool create ${props} $TESTPOOL $type $ZPOOL_DISKS
+		log_must zpool add ${props} $TESTPOOL special $stype $sdisks
+	else
+		log_must zpool create ${props} $TESTPOOL $type $ZPOOL_DISKS \
+		    special $stype $sdisks
+	fi
+
 	log_must zpool export $TESTPOOL
 	log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
 	log_must display_status $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh
index bd6c663..b49a891 100755
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh

@@ -33,8 +33,9 @@
 {
 	typeset dataset="$1"
 	typeset inum="$2"
+	typeset num_normal=$(echo $ZPOOL_DISKS | wc -w | xargs)
 
-	zdb -dddddd $dataset $inum | awk '{
+	zdb -dddddd $dataset $inum | awk -v d=$num_normal '{
 # find DVAs from string "offset level dva" only for L0 (data) blocks
 if (match($0,"L0 [0-9]+")) {
    dvas[0]=$3
@@ -49,7 +50,7 @@
             exit 1;
          }
          # verify vdev is "special"
-         if (arr[1] < 3) {
+         if (arr[1] < d) {
             exit 1;
          }
       }
@@ -57,57 +58,66 @@
 }}'
 }
 
+#
+# Check that device removal works for special class vdevs
+#
+function check_removal
+{
+	#
+	# Create a non-raidz pool so we can remove top-level vdevs
+	#
+	log_must disk_setup
+	log_must zpool create $TESTPOOL $ZPOOL_DISKS \
+	    special $CLASS_DISK0 special $CLASS_DISK1
+	log_must display_status "$TESTPOOL"
+
+	#
+	# Generate some metadata and small blocks in the special class vdev
+	# before removal
+	#
+	typeset -l i=1
+	typeset -l blocks=25
+
+	log_must zfs create -o special_small_blocks=32K -o recordsize=32K \
+	    $TESTPOOL/$TESTFS
+	for i in 1 2 3 4; do
+		log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/testfile.$i \
+		    bs=1M count=$blocks
+		((blocks = blocks + 25))
+	done
+	log_must sync_pool $TESTPOOL
+	log_must zpool list -v $TESTPOOL
+
+	# Verify the files were written in the special class vdevs
+	for i in 1 2 3 4; do
+		dataset="$TESTPOOL/$TESTFS"
+		inum="$(get_objnum /$TESTPOOL/$TESTFS/testfile.$i)"
+		log_must file_in_special_vdev $dataset $inum
+	done
+
+	log_must zpool remove $TESTPOOL $CLASS_DISK0
+
+	sleep 5
+	log_must sync_pool $TESTPOOL
+	sleep 1
+
+	log_must zdb -bbcc $TESTPOOL
+	log_must zpool list -v $TESTPOOL
+	log_must zpool destroy -f "$TESTPOOL"
+	log_must disk_cleanup
+}
+
 claim="Removing a special device from a pool succeeds."
 
 log_assert $claim
 log_onexit cleanup
 
-#
-# Create a non-raidz pool so we can remove top-level vdevs
-#
-log_must disk_setup
-log_must zpool create $TESTPOOL $ZPOOL_DISK0 $ZPOOL_DISK1 $ZPOOL_DISK2 \
-  special $CLASS_DISK0 special $CLASS_DISK1
-log_must display_status "$TESTPOOL"
-
-#
-# Generate some metadata and small blocks in the special class before removal
-#
-typeset -l i=1
-typeset -l blocks=25
-
-log_must zfs create -o special_small_blocks=32K -o recordsize=32K \
-	$TESTPOOL/$TESTFS
-for i in 1 2 3 4; do
-	log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/testfile.$i bs=1M \
-	    count=$blocks
-	((blocks = blocks + 25))
+typeset CLASS_DEVSIZE=$CLASS_DEVSIZE
+for CLASS_DEVSIZE in $CLASS_DEVSIZE $ZPOOL_DEVSIZE; do
+	typeset ZPOOL_DISKS=$ZPOOL_DISKS
+	for ZPOOL_DISKS in "$ZPOOL_DISKS" $ZPOOL_DISK0; do
+		check_removal
+	done
 done
-log_must sync_pool $TESTPOOL
-log_must zpool list -v $TESTPOOL
-
-# Verify the files were written in the special class vdevs
-for i in 1 2 3 4; do
-	dataset="$TESTPOOL/$TESTFS"
-	inum="$(stat -c '%i' /$TESTPOOL/$TESTFS/testfile.$i)"
-	log_must file_in_special_vdev $dataset $inum
-done
-
-#
-# remove a special allocation vdev and force a remapping
-# N.B. The 'zfs remap' command has been disabled and may be removed.
-#
-export ZFS_REMAP_ENABLED=YES
-
-log_must zpool remove $TESTPOOL $CLASS_DISK0
-log_must zfs remap $TESTPOOL/$TESTFS
-
-sleep 5
-log_must sync_pool $TESTPOOL
-sleep 1
-
-log_must zdb -bbcc $TESTPOOL
-log_must zpool list -v $TESTPOOL
-log_must zpool destroy -f "$TESTPOOL"
 
 log_pass $claim

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
index 22a8f1a..790a47f 100755
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh

@@ -42,7 +42,8 @@
 
 log_must zfs create -o dedup=on -V 2G $TESTPOOL/$TESTVOL
 
-log_must echo y | newfs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null 2>&1
+block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL"
+log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null"
 
 sync_pool
 log_must zpool list -v $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh
new file mode 100755
index 0000000..1b52014
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh

@@ -0,0 +1,38 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
+
+#
+# DESCRIPTION:
+#	Setting the special_small_blocks property greater than recordsize fails.
+#
+
+verify_runnable "global"
+
+claim="Setting the special_small_blocks property greater than recordsize fails"
+
+log_assert $claim
+log_onexit cleanup
+log_must disk_setup
+
+for size in 512 4096 32768 131072 524288 1048576
+do
+	let bigger=$size*2
+	log_mustnot zpool create -O recordsize=$size \
+		-O special_small_blocks=$bigger \
+		$TESTPOOL raidz $ZPOOL_DISKS special mirror \
+		$CLASS_DISK0 $CLASS_DISK1
+done
+
+log_pass $claim

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh
new file mode 100755
index 0000000..49c468a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh

@@ -0,0 +1,45 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
+
+#
+# DESCRIPTION:
+# 	Can set special_small_blocks property less than or equal to recordsize.
+#
+
+verify_runnable "global"
+
+claim="Can set special_small_blocks property less than or equal to recordsize"
+
+log_assert $claim
+log_onexit cleanup
+log_must disk_setup
+
+for size in 8192 32768 131072 524288 1048576
+do
+	let smaller=$size/2
+	log_must zpool create -O recordsize=$size \
+		-O special_small_blocks=$smaller \
+		$TESTPOOL raidz $ZPOOL_DISKS special mirror \
+		$CLASS_DISK0 $CLASS_DISK1
+	log_must zpool destroy -f "$TESTPOOL"
+
+	log_must zpool create -O recordsize=$size \
+		-O special_small_blocks=$size \
+		$TESTPOOL raidz $ZPOOL_DISKS special mirror \
+		$CLASS_DISK0 $CLASS_DISK1
+	log_must zpool destroy -f "$TESTPOOL"
+done
+
+log_pass $claim

diff --git a/zfs/tests/zfs-tests/tests/functional/alloc_class/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/alloc_class/cleanup.ksh
index c12d597..13775da 100755
--- a/zfs/tests/zfs-tests/tests/functional/alloc_class/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/alloc_class/cleanup.ksh

@@ -21,7 +21,7 @@
 
 verify_runnable "global"
 
-default_cleanup
+default_cleanup_noexit
 disk_cleanup
 
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh b/zfs/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh
index 6d007ae..6650b2e 100755
--- a/zfs/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh

@@ -21,25 +21,25 @@
 {
 	# Set tunables to their recorded actual size and then to their original
 	# value: this works for previously unconfigured tunables.
-	log_must set_tunable64 zfs_arc_min "$MINSIZE"
-	log_must set_tunable64 zfs_arc_min "$ZFS_ARC_MIN"
-	log_must set_tunable64 zfs_arc_max "$MAXSIZE"
-	log_must set_tunable64 zfs_arc_max "$ZFS_ARC_MAX"
+	log_must set_tunable64 ARC_MIN "$MINSIZE"
+	log_must set_tunable64 ARC_MIN "$ZFS_ARC_MIN"
+	log_must set_tunable64 ARC_MAX "$MAXSIZE"
+	log_must set_tunable64 ARC_MAX "$ZFS_ARC_MAX"
 }
 
 log_onexit cleanup
 
-ZFS_ARC_MAX="$(get_tunable zfs_arc_max)"
-ZFS_ARC_MIN="$(get_tunable zfs_arc_min)"
+ZFS_ARC_MAX="$(get_tunable ARC_MAX)"
+ZFS_ARC_MIN="$(get_tunable ARC_MIN)"
 MINSIZE="$(get_min_arc_size)"
 MAXSIZE="$(get_max_arc_size)"
 
 log_assert "ARC tunables should be updated dynamically"
 
 for size in $((MAXSIZE/4)) $((MAXSIZE/3)) $((MAXSIZE/2)) $MAXSIZE; do
-	log_must set_tunable64 zfs_arc_max "$size"
+	log_must set_tunable64 ARC_MAX "$size"
 	log_must test "$(get_max_arc_size)" == "$size"
-	log_must set_tunable64 zfs_arc_min "$size"
+	log_must set_tunable64 ARC_MIN "$size"
 	log_must test "$(get_min_arc_size)" == "$size"
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh
index 7ec9eaf..0577a6b 100755
--- a/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh

@@ -55,10 +55,16 @@
 
         [[ -n "$2" ]] && filter="-F $2"
 
-	from_dbufstat=$(grep -w "$name" "$DBUFSTATS_FILE" | awk '{ print $3 }')
+	if is_linux; then
+		from_dbufstat=$(grep -w "$name" "$DBUFSTATS_FILE" |
+		    awk '{ print $3 }')
+	else
+		from_dbufstat=$(awk "/dbufstats\.$name:/ { print \$2 }" \
+		    "$DBUFSTATS_FILE")
+	fi
 	from_dbufs=$(dbufstat -bxn -i "$DBUFS_FILE" "$filter" | wc -l)
 
-	within_tolerance $from_dbufstat $from_dbufs 9 \
+	within_tolerance $from_dbufstat $from_dbufs 15 \
 	    || log_fail "Stat $name exceeded tolerance"
 }
 
@@ -71,8 +77,8 @@
 log_must file_write -o create -f "$TESTDIR/file" -b 1048576 -c 20 -d R
 log_must zpool sync
 
-log_must eval "cat /proc/spl/kstat/zfs/dbufs > $DBUFS_FILE"
-log_must eval "cat /proc/spl/kstat/zfs/dbufstats > $DBUFSTATS_FILE"
+log_must eval "kstat dbufs > $DBUFS_FILE"
+log_must eval "kstat dbufstats '' > $DBUFSTATS_FILE"
 
 for level in {0..11}; do
 	testdbufstat "cache_level_$level" "dbc=1,level=$level"

diff --git a/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_002_pos.ksh
index dc30b66..58d4015 100755
--- a/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/arc/dbufstats_002_pos.ksh

@@ -58,10 +58,10 @@
 log_must file_write -o create -f "$TESTDIR/file" -b 1048576 -c 1 -d R
 log_must zpool sync
 
-objid=$(stat --format="%i" "$TESTDIR/file")
+objid=$(get_objnum "$TESTDIR/file")
 log_note "Object ID for $TESTDIR/file is $objid"
 
-log_must eval "cat /proc/spl/kstat/zfs/dbufs > $DBUFS_FILE"
+log_must eval "kstat dbufs > $DBUFS_FILE"
 dbuf=$(dbufstat -bxn -i "$DBUFS_FILE" -F "object=$objid" | wc -l)
 mru=$(dbufstat -bxn -i "$DBUFS_FILE" -F "object=$objid,list=1" | wc -l)
 mfu=$(dbufstat -bxn -i "$DBUFS_FILE" -F "object=$objid,list=3" | wc -l)
@@ -70,7 +70,7 @@
 verify_eq "0" "$mfu" "mfu count"
 
 log_must eval "cat $TESTDIR/file > /dev/null"
-log_must eval "cat /proc/spl/kstat/zfs/dbufs > $DBUFS_FILE"
+log_must eval "kstat dbufs > $DBUFS_FILE"
 dbuf=$(dbufstat -bxn -i "$DBUFS_FILE" -F "object=$objid" | wc -l)
 mru=$(dbufstat -bxn -i "$DBUFS_FILE" -F "object=$objid,list=1" | wc -l)
 mfu=$(dbufstat -bxn -i "$DBUFS_FILE" -F "object=$objid,list=3" | wc -l)

diff --git a/zfs/tests/zfs-tests/tests/functional/atime/atime_common.kshlib b/zfs/tests/zfs-tests/tests/functional/atime/atime_common.kshlib
index bd6c6dc..fce85c3 100644
--- a/zfs/tests/zfs-tests/tests/functional/atime/atime_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/atime/atime_common.kshlib

@@ -47,6 +47,9 @@
 	if is_linux; then
 		typeset before=$(stat -c %X $filename)
 		sleep 2
+	elif is_freebsd; then
+		typeset before=$(stat -f %a $filename)
+		sleep 2
 	else
 		typeset before=$(ls -Eu $filename | awk '{print $7}')
 	fi
@@ -55,6 +58,8 @@
 
 	if is_linux; then
 		typeset after=$(stat -c %X $filename)
+	elif is_freebsd; then
+		typeset after=$(stat -f %a $filename)
 	else
 		typeset after=$(ls -Eu $filename | awk '{print $7}')
 	fi

diff --git a/zfs/tests/zfs-tests/tests/functional/atime/root_atime_off.ksh b/zfs/tests/zfs-tests/tests/functional/atime/root_atime_off.ksh
index 2fbf06b..7eb2ed9 100755
--- a/zfs/tests/zfs-tests/tests/functional/atime/root_atime_off.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/atime/root_atime_off.ksh

@@ -53,7 +53,7 @@
 
 #
 # Create $TESTFILE, snapshot and clone.
-# Same as 002 except that atime applies to root dataset (ZoL#8675).
+# Same as 002 except that atime applies to root dataset (OpenZFS#8675).
 #
 setup_snap_clone
 reset_atime

diff --git a/zfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh b/zfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh
index 3976523..44d471a 100755
--- a/zfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/atime/root_atime_on.ksh

@@ -52,7 +52,7 @@
 
 #
 # Create $TESTFILE, snapshot and clone.
-# Same as 001 except that atime/relatime applies to root dataset (ZoL#8675).
+# Same as 001 except that atime/relatime applies to root dataset (OpenZFS#8675).
 #
 setup_snap_clone
 reset_atime

diff --git a/zfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh b/zfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh
index c919e9f..1201294 100755
--- a/zfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/atime/root_relatime_on.ksh

@@ -53,7 +53,7 @@
 
 #
 # Create $TESTFILE, snapshot and clone.
-# Same as 003 except that atime/relatime applies to root dataset (ZoL#8675).
+# Same as 003 except that atime/relatime applies to root dataset (OpenZFS#8675).
 #
 setup_snap_clone
 reset_atime

diff --git a/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_002_neg.ksh
index 6a72bfc..a5bc775 100755
--- a/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_002_neg.ksh

@@ -51,14 +51,9 @@
 verify_runnable "global"
 
 function cleanup {
-	if datasetexists $TESTPOOL/vol
-	then
-		log_must zfs destroy $TESTPOOL/vol
-	fi
-	if poolexists $TESTPOOL
-	then
-		log_must zpool destroy $TESTPOOL
-	fi
+	datasetexists $TESTPOOL/vol && destroy_dataset $TESTPOOL/vol
+	poolexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+
 	if [[ -f $VDEV ]]; then
 		log_must rm -f $VDEV
 	fi

diff --git a/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_006_pos.ksh
index e17c06b..d29fe7e 100755
--- a/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/bootfs/bootfs_006_pos.ksh

@@ -117,7 +117,7 @@
 log_must zpool create $TESTPOOL mirror $VDEV1 $VDEV2 spare $VDEV3
 verify_bootfs $TESTPOOL
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	# stripe
 	log_must zpool create $TESTPOOL $VDEV1 $VDEV2
 	verify_bootfs $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/btree/Makefile.am b/zfs/tests/zfs-tests/tests/functional/btree/Makefile.am
new file mode 100644
index 0000000..333209d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/btree/Makefile.am

@@ -0,0 +1,20 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Delphix. All rights reserved.
+#
+
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/btree
+
+dist_pkgdata_SCRIPTS = \
+	btree_positive.ksh \
+	btree_negative.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/btree/btree_negative.ksh b/zfs/tests/zfs-tests/tests/functional/btree/btree_negative.ksh
new file mode 100755
index 0000000..cefcbc5
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/btree/btree_negative.ksh

@@ -0,0 +1,38 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Verify that the btree functions don't allow bad inputs
+#
+# insert_duplicate - Callers may not add values that are already in the tree
+# remove_missing   - Callers may not remove values that are not in the tree
+#
+# Note: These invocations cause btree_test to crash, but the program disables
+# core dumps first. As such, we can't use log_mustnot because it explicitly
+# looks for return values that correspond to a core dump and cause a test
+# failure.
+
+btree_test -n insert_duplicate
+[[ $? -eq 0 ]] && log_fail "Failure from insert_duplicate"
+
+btree_test -n remove_missing
+[[ $? -eq 0 ]] && log_fail "Failure from remove_missing"
+
+log_pass "Btree negative tests passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/btree/btree_positive.ksh b/zfs/tests/zfs-tests/tests/functional/btree/btree_positive.ksh
new file mode 100755
index 0000000..badbac2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/btree/btree_positive.ksh

@@ -0,0 +1,35 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# The `btree_test` binary runs a series of positive tests when called
+# without arguments.
+#
+# insert_find_remove - Basic functionality test
+# find_without_index - Using the find function with a NULL argument
+# drain_tree         - Fill the tree then empty it using the first and last
+#                      functions
+# stress_tree        - Allow the tree to have items added and removed for a
+#                      given amount of time
+#
+
+log_must btree_test
+
+log_pass "Btree positive tests passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/cache/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cache/Makefile.am
index 18dd9c1..f28130e 100644
--- a/zfs/tests/zfs-tests/tests/functional/cache/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cache/Makefile.am

@@ -11,8 +11,9 @@
 	cache_007_neg.ksh \
 	cache_008_neg.ksh \
 	cache_009_pos.ksh \
-	cache_010_neg.ksh \
-	cache_011_pos.ksh
+	cache_010_pos.ksh \
+	cache_011_pos.ksh \
+	cache_012_pos.ksh
 
 dist_pkgdata_DATA = \
 	cache.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/cache/cache_010_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cache/cache_010_neg.ksh
deleted file mode 100755
index 1d0683b..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cache/cache_010_neg.ksh
+++ /dev/null

@@ -1,100 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/cache/cache.cfg
-. $STF_SUITE/tests/functional/cache/cache.kshlib
-
-#
-# DESCRIPTION:
-#	Verify cache device must be a block device.
-#
-# STRATEGY:
-#	1. Create a pool
-#	2. Add different object as cache
-#	3. Verify character devices and files fail
-#
-
-verify_runnable "global"
-
-function cleanup_testenv
-{
-	cleanup
-	if [[ -n $lofidev ]]; then
-		if is_linux; then
-			losetup -d $lofidev
-		else
-			lofiadm -d $lofidev
-		fi
-	fi
-}
-
-log_assert "Cache device can only be block devices."
-log_onexit cleanup_testenv
-
-TESTVOL=testvol1$$
-dsk1=${DISKS%% *}
-log_must zpool create $TESTPOOL ${DISKS#$dsk1}
-
-# Add nomal ${DEV_RDSKDIR} device
-log_must zpool add $TESTPOOL cache \
-    ${DEV_RDSKDIR}/${dsk1}${SLICE_PREFIX}${SLICE0}
-log_must verify_cache_device $TESTPOOL $dsk1 'ONLINE'
-
-# Add normal file
-log_mustnot zpool add $TESTPOOL cache $VDEV2
-
-# Add /dev/rlofi device (allowed under Linux)
-if is_linux; then
-	lofidev=$(losetup -f)
-	lofidev=${lofidev##*/}
-	log_must losetup $lofidev ${VDEV2%% *}
-	log_must zpool add $TESTPOOL cache $lofidev
-	log_must zpool remove $TESTPOOL $lofidev
-	log_must losetup -d $lofidev
-	lofidev=""
-else
-	lofidev=${VDEV2%% *}
-	log_must lofiadm -a $lofidev
-	lofidev=$(lofiadm $lofidev)
-	log_mustnot zpool add $TESTPOOL cache "/dev/rlofi/${lofidev#/dev/lofi/}"
-	log_must lofiadm -d $lofidev
-	lofidev=""
-fi
-
-# Add /dev/zvol/rdsk device (allowed under Linux)
-if ! is_linux; then
-	log_must zpool create $TESTPOOL2 $VDEV2
-	log_must zfs create -V $SIZE $TESTPOOL2/$TESTVOL
-	log_mustnot zpool add $TESTPOOL cache \
-	    ${ZVOL_RDEVDIR}/$TESTPOOL2/$TESTVOL
-fi
-
-log_pass "Cache device can only be block devices."

diff --git a/zfs/tests/zfs-tests/tests/functional/cache/cache_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cache/cache_010_pos.ksh
new file mode 100755
index 0000000..1d9fc5a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cache/cache_010_pos.ksh

@@ -0,0 +1,104 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/cache/cache.cfg
+. $STF_SUITE/tests/functional/cache/cache.kshlib
+
+#
+# DESCRIPTION:
+#	Verify that cache devices can be block devices, files or character devices
+#
+# STRATEGY:
+#	1. Create a pool
+#	2. Add different object as cache
+#	3. Verify character devices and files pass
+#
+
+verify_runnable "global"
+
+function cleanup_testenv
+{
+	cleanup
+	if [[ -n $lofidev ]]; then
+		if is_linux; then
+			losetup -d $lofidev
+		elif is_freebsd; then
+			mdconfig -du ${lofidev#md}
+		else
+			lofiadm -d $lofidev
+		fi
+	fi
+}
+
+log_assert "Verify cache devices can be disk, file, lofi device or any " \
+  "device that presents a block interface"
+
+verify_disk_count "$DISKS" 2
+log_onexit cleanup_testenv
+
+TESTVOL=testvol1$$
+dsk1=${DISKS%% *}
+log_must zpool create $TESTPOOL ${DISKS#$dsk1}
+
+# Add normal ${DEV_RDSKDIR} device
+log_must zpool add $TESTPOOL cache \
+    ${DEV_RDSKDIR}/${dsk1}
+log_must zpool remove $TESTPOOL ${DEV_RDSKDIR}/${dsk1}
+
+
+# Add provided disk
+log_must zpool add $TESTPOOL cache $dsk1
+log_must verify_cache_device $TESTPOOL $dsk1 'ONLINE'
+log_must zpool remove $TESTPOOL $dsk1
+
+# Add normal file
+log_must zpool add $TESTPOOL cache $VDEV
+ldev=$(random_get $VDEV)
+log_must verify_cache_device $TESTPOOL $ldev 'ONLINE'
+
+# Add loop back device
+if is_linux; then
+	lofidev=$(losetup -f)
+	log_must losetup $lofidev ${VDEV2%% *}
+	lofidev=${lofidev##*/}
+elif is_freebsd; then
+	lofidev=$(mdconfig -a ${VDEV2%% *})
+else
+	lofidev=${VDEV2%% *}
+	log_must lofiadm -a $lofidev
+	lofidev=$(lofiadm $lofidev)
+fi
+
+log_must zpool add $TESTPOOL cache $lofidev
+log_must verify_cache_device $TESTPOOL $lofidev 'ONLINE'
+
+log_pass "Verify cache devices can be disk, file, lofi device or any " \
+  "device that presents a block interface"

diff --git a/zfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh
new file mode 100755
index 0000000..edefe9c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh

@@ -0,0 +1,110 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/cache/cache.cfg
+. $STF_SUITE/tests/functional/cache/cache.kshlib
+
+#
+# DESCRIPTION:
+#	Looping around a cache device with l2arc_write_size exceeding
+#	the device size succeeds.
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Set l2arc_write_max to a value larger than the cache device.
+#	3. Create a file larger than the cache device and random read
+#		for 10 sec.
+#	4. Verify that l2arc_write_max is set back to the default.
+#	5. Set l2arc_write_max to a value less than the cache device size but
+#		larger than the default (64MB).
+#	6. Record the l2_size.
+#	7. Random read for 1 sec.
+#	8. Record the l2_size again.
+#	9. If (6) <= (8) then we have not looped around yet.
+#	10. If (6) > (8) then we looped around. Break out of the loop and test.
+#	11. Destroy pool.
+#
+
+verify_runnable "global"
+
+log_assert "Looping around a cache device succeeds."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_WRITE_MAX $write_max
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+}
+log_onexit cleanup
+
+typeset write_max=$(get_tunable L2ARC_WRITE_MAX)
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+
+typeset VDEV="$VDIR/vdev.disk"
+typeset VDEV_SZ=$(( 4 * 1024 * 1024 * 1024 ))
+typeset VCACHE="$VDIR/vdev.cache"
+typeset VCACHE_SZ=$(( $VDEV_SZ / 2 ))
+
+typeset fill_mb=$(( floor($VDEV_SZ * 3 / 4 ) ))
+export DIRECTORY=/$TESTPOOL
+export NUMJOBS=4
+export RUNTIME=10
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))
+
+log_must set_tunable32 L2ARC_WRITE_MAX $(( $VCACHE_SZ * 2 ))
+
+log_must truncate -s $VCACHE_SZ $VCACHE
+log_must truncate -s $VDEV_SZ $VDEV
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VCACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)
+
+log_must test $write_max2 -eq $write_max
+
+log_must set_tunable32 L2ARC_WRITE_MAX $(( 64 * 1024 * 1024 ))
+export RUNTIME=1
+
+typeset do_once=true
+while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
+	typeset l2_size1=$(get_arcstat l2_size)
+	log_must fio $FIO_SCRIPTS/random_reads.fio
+	typeset l2_size2=$(get_arcstat l2_size)
+	do_once=false
+done
+
+log_must test $l2_size1 -gt $l2_size2
+
+log_must zpool destroy $TESTPOOL
+
+log_pass "Looping around a cache device succeeds."

diff --git a/zfs/tests/zfs-tests/tests/functional/cache/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cache/setup.ksh
index d5da5d9..0493637 100755
--- a/zfs/tests/zfs-tests/tests/functional/cache/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cache/setup.ksh

@@ -34,10 +34,6 @@
 
 verify_runnable "global"
 
-if ! is_physical_device $LDEV; then
-	log_unsupported "Only physical disk could be cache device"
-fi
-
 log_must rm -rf $VDIR $VDIR2
 log_must mkdir -p $VDIR $VDIR2
 log_must mkfile $SIZE $VDEV $VDEV2

diff --git a/zfs/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib b/zfs/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib
index 5b08016..f0fe1bb 100644
--- a/zfs/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/casenorm/casenorm.kshlib

@@ -34,7 +34,7 @@
 function destroy_testfs
 {
 	if datasetexists $TESTPOOL/$TESTFS ; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS
+		destroy_dataset $TESTPOOL/$TESTFS -f
 		rm -rf $TESTDIR || log_unresolved Could not remove $TESTDIR
 	fi
 }
@@ -65,10 +65,10 @@
 {
 	typeset name=$1
 
-	if is_linux; then
-		test -f "${TESTDIR}/${name}" >/dev/null 2>&1
-	else
+	if is_illumos; then
 		zlook -l $TESTDIR $name >/dev/null 2>&1
+	else
+		test -f "${TESTDIR}/${name}" >/dev/null 2>&1
 	fi
 }
 
@@ -76,10 +76,10 @@
 {
 	typeset name=$1
 
-	if is_linux; then
-		test -f "${TESTDIR}/${name}" >/dev/null 2>&1
-	else
+	if is_illumos; then
 		zlook -il $TESTDIR $name >/dev/null 2>&1
+	else
+		test -f "${TESTDIR}/${name}" >/dev/null 2>&1
 	fi
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/channel_common.kshlib b/zfs/tests/zfs-tests/tests/functional/channel_program/channel_common.kshlib
index 722a477..a828ba2 100644
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/channel_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/channel_common.kshlib

@@ -141,24 +141,16 @@
 	pool=$1
 	shift
 
-	#
-	# Catch HERE document if it exists and save it within our
-	# temp file. The reason we do this is that since the
-	# log_must_program wrapper calls zfs-program twice (once
-	# for open context and once for syncing) the HERE doc
-	# is consumed in the first invocation and the second one
-	# does not have a program to run.
-	#
-	test -s /dev/stdin && cat > $tmpin
+	infile=$1
+	shift
 
 	#
-	# If $tmpin has contents it means that we consumed a HERE
-	# doc and $1 currently holds "-" (a dash). If there is no
-	# HERE doc and $tmpin is empty, then we copy the contents
-	# of the original channel program to $tmpin.
+	# Copy the contents of the original channel program to $tmpin.
 	#
-	[[ -s $tmpin ]] || cp $1 $tmpin
-	shift
+	# If $infile currently holds "-" (a dash) it means that we consume a
+	# HERE doc from stdin, otherwise $infile is a file path.
+	#
+	cat $infile > $tmpin
 
 	lua_args=$@
 

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.exists.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.exists.ksh
index d486c25..eba01b1 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.exists.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.exists.ksh

@@ -30,7 +30,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS@$TESTSNAP && \
-	    log_must zfs destroy -R $TESTPOOL/$TESTFS@$TESTSNAP
+	    destroy_dataset $TESTPOOL/$TESTFS@$TESTSNAP -R
 }
 
 log_must_program $TESTPOOL $ZCP_ROOT/lua_core/tst.exists.zcp \

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.memory_limit.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.memory_limit.ksh
index c6038bf..0533b8f 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.memory_limit.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.memory_limit.ksh

@@ -62,7 +62,7 @@
 EOF
 
 # Set the memlimit, in case it is a non-default value
-log_must set_tunable32 zfs_lua_max_memlimit 100000000
+log_must set_tunable32 LUA_MAX_MEMLIMIT 100000000
 
 log_mustnot_checkerror_program "Invalid instruction or memory limit" \
     -m 200000000 $TESTPOOL - <<-EOF

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.return_large.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.return_large.ksh
index ba9c407..bbaeb54 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.return_large.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.return_large.ksh

@@ -27,7 +27,7 @@
 
 function cleanup
 {
-	datasetexists $fs && log_must zfs destroy -R $fs
+	datasetexists $fs && destroy_dataset $fs -R
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.timeout.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.timeout.ksh
index 9256e86..22ea375 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.timeout.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/lua_core/tst.timeout.ksh

@@ -37,7 +37,7 @@
 	error=$(zfs program -t $lim $TESTPOOL $ZCP_ROOT/lua_core/tst.timeout.zcp 2>&1)
 	[[ $? -ne 0 ]] || log_fail "Channel program with limit $lim exited 0: $error"
 
-	instrs_run=$(echo $error | sed -n 's/.\+ \([0-9]*\) Lua instructions/\1/p')
+	instrs_run=$(echo $error | awk -F "chunk" '{print $2}' | awk '{print $1}')
 	if [[ $instrs_run -lt $(( $lim - 100 )) ]]; then
 		log_fail "Runtime (${instrs_run} instr) < limit (${lim} - 100 instr)"
 	elif [[ $instrs_run -gt $(( $lim + 100 )) ]]; then

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am
index cc86a2d..4d9aa9c 100644
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am

@@ -13,8 +13,11 @@
 	tst.get_type.ksh \
 	tst.get_userquota.ksh \
 	tst.get_written.ksh \
+	tst.inherit.ksh \
+	tst.list_bookmarks.ksh \
 	tst.list_children.ksh \
 	tst.list_clones.ksh \
+	tst.list_holds.ksh \
 	tst.list_snapshots.ksh \
 	tst.list_system_props.ksh \
 	tst.list_user_props.ksh \
@@ -24,9 +27,12 @@
 	tst.promote_simple.ksh \
 	tst.rollback_mult.ksh \
 	tst.rollback_one.ksh \
+	tst.set_props.ksh \
 	tst.snapshot_destroy.ksh \
 	tst.snapshot_neg.ksh \
 	tst.snapshot_recursive.ksh \
+	tst.bookmark.create.ksh \
+	tst.bookmark.copy.ksh \
 	tst.snapshot_simple.ksh \
 	tst.terminate_by_signal.ksh
 
@@ -38,7 +44,10 @@
 	tst.get_string_props.out \
 	tst.get_string_props.zcp \
 	tst.promote_conflict.zcp \
+	tst.set_props.zcp \
 	tst.snapshot_destroy.zcp \
 	tst.snapshot_neg.zcp \
 	tst.snapshot_recursive.zcp \
-	tst.snapshot_simple.zcp
+	tst.snapshot_simple.zcp \
+	tst.bookmark.create.zcp \
+	tst.bookmark.copy.zcp

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/cleanup.ksh
index 281f639..3ddcb4d 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/cleanup.ksh

@@ -16,4 +16,7 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-default_cleanup
+default_cleanup_noexit
+destroy_pool testpool2
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/setup.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/setup.ksh
index 2516b6b..5837bf1 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/setup.ksh

@@ -18,4 +18,8 @@
 
 DISK=${DISKS%% *}
 
-default_setup ${DISK}
+TESTPOOLDISK=${DISKS%% *}
+TESTPOOL2DISK=${DISKS##* }
+
+default_setup ${TESTPOOLDISK}
+create_pool testpool2 ${TESTPOOL2DISK}

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.copy.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.copy.ksh
new file mode 100755
index 0000000..81f570d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.copy.ksh

@@ -0,0 +1,45 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+#
+# DESCRIPTION: Make sure bookmark copying works in channel programs
+#
+
+verify_runnable "global"
+
+fs=$TESTPOOL/$TESTFS/testchild
+snapname=testsnap
+bookname=testbookmark
+bookcopyname=testbookmark_copy
+
+function cleanup
+{
+	destroy_dataset $fs "-R"
+}
+
+log_onexit cleanup
+
+log_must zfs create $fs
+
+log_must zfs snapshot $fs@$snapname
+log_must zfs bookmark $fs@$snapname "$fs#$bookname"
+
+log_must_program_sync $TESTPOOL \
+    $ZCP_ROOT/synctask_core/tst.bookmark.copy.zcp $fs $bookname $bookcopyname
+
+log_pass "Simple bookmark copying works"

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.copy.zcp b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.copy.zcp
new file mode 100644
index 0000000..9473035
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.copy.zcp

@@ -0,0 +1,32 @@
+--
+-- This file and its contents are supplied under the terms of the
+-- Common Development and Distribution License ("CDDL"), version 1.0.
+-- You may only use this file in accordance with the terms of version
+-- 1.0 of the CDDL.
+--
+-- A full copy of the text of the CDDL should have accompanied this
+-- source.  A copy of the CDDL is also available via the Internet at
+-- http://www.illumos.org/license/CDDL.
+--
+
+--
+-- Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+--
+
+-- This program should be invoked as "zfs program <pool> <prog> <fs> <source_book> <new_book>"
+
+args = ...
+argv = args["argv"]
+fs = argv[1]
+source = fs .. "#" .. argv[2]
+new = fs .. "#" .. argv[3]
+assert(zfs.sync.bookmark(source, new) == 0)
+books = {}
+count = 0
+for s in zfs.list.bookmarks(fs) do
+	count = count + 1
+	books[s] = 1
+end
+assert(count == 2)
+assert(books[source] == 1)
+assert(books[new] == 1)

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.create.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.create.ksh
new file mode 100755
index 0000000..05ec9cc
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.create.ksh

@@ -0,0 +1,43 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+#
+# DESCRIPTION: Make sure basic bookmark functionality works in channel programs
+#
+
+verify_runnable "global"
+
+fs=$TESTPOOL/$TESTFS/testchild
+snapname=testsnap
+bookname=testbookmark
+
+function cleanup
+{
+	destroy_dataset $fs "-R"
+}
+
+log_onexit cleanup
+
+log_must zfs create $fs
+
+log_must zfs snapshot $fs@$snapname
+
+log_must_program_sync $TESTPOOL \
+    $ZCP_ROOT/synctask_core/tst.bookmark.create.zcp $fs $snapname $bookname
+
+log_pass "Simple bookmark creation works"

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.create.zcp b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.create.zcp
new file mode 100644
index 0000000..eb53fd1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.bookmark.create.zcp

@@ -0,0 +1,26 @@
+--
+-- This file and its contents are supplied under the terms of the
+-- Common Development and Distribution License ("CDDL"), version 1.0.
+-- You may only use this file in accordance with the terms of version
+-- 1.0 of the CDDL.
+--
+-- A full copy of the text of the CDDL should have accompanied this
+-- source.  A copy of the CDDL is also available via the Internet at
+-- http://www.illumos.org/license/CDDL.
+--
+
+--
+-- Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+--
+
+-- This program should be invoked as "zfs program <pool> <prog> <fs> <snap> <book>"
+
+args = ...
+argv = args["argv"]
+assert(zfs.sync.bookmark(argv[1] .. "@" .. argv[2], argv[1] .. "#" .. argv[3]) == 0)
+books = {}
+for s in zfs.list.bookmarks(argv[1]) do
+	table.insert(books, s)
+end
+assert(#books == 1)
+assert(books[1] == (argv[1] .. "#" .. argv[3]))

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.ksh
index 6478fa6..eed3e0b 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.ksh

@@ -35,6 +35,7 @@
 log_must zfs create -o version=5 $fs
 create_snapshot $fs $TESTSNAP
 
-log_must_program $TESTPOOL $ZCP_ROOT/synctask_core/tst.get_index_props.zcp $fs $snap
+os=$(uname)
+log_must_program $TESTPOOL $ZCP_ROOT/synctask_core/tst.get_index_props.zcp $fs $snap $os
 
 log_pass "Getting index props should work correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp
index e898cf8..10ef8e7 100644
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp

@@ -16,6 +16,7 @@
 arg = ...
 fs = arg["argv"][1]
 snap = arg["argv"][2]
+os = arg["argv"][3]
 
 props = {}
 
@@ -26,7 +27,11 @@
 props['dedup']              = {{'off',       'default'}, {nil,         nil}}
 props['compression']        = {{'off',       'default'}, {nil,         nil}}
 props['snapdir']            = {{'hidden',    'default'}, {nil,         nil}}
-props['acltype']            = {{'off',       'default'}, {'off', 'default'}}
+if os == "Linux" then
+	props['acltype']    = {{'off',       'default'}, {'off', 'default'}}
+elseif os == "FreeBSD" then
+	props['aclmode']    = {{'discard',   'default'}, {'discard', 'default'}}
+end
 props['aclinherit']         = {{'restricted','default'}, {nil,         nil}}
 props['copies']             = {{'1',         'default'}, {nil,         nil}}
 props['primarycache']       = {{'all',       'default'}, {'all', 'default'}}
@@ -37,7 +42,11 @@
 props['exec']               = {{'on',        'default'}, {'on',  'default'}}
 props['setuid']             = {{'on',        'default'}, {'on',  'default'}}
 props['readonly']           = {{'off',       'default'}, {nil,         nil}}
-props['zoned']              = {{'off',       'default'}, {nil,         nil}}
+if os == "FreeBSD" then
+	props['jailed']     = {{'off',       'default'}, {nil,         nil}}
+else
+	props['zoned']      = {{'off',       'default'}, {nil,         nil}}
+end
 props['vscan']              = {{'off',       'default'}, {nil,         nil}}
 props['nbmand']             = {{'off',       'default'}, {'off', 'default'}}
 props['version']            = {{'5',               nil}, {'5',         nil}}

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_number_props.zcp b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_number_props.zcp
index 7996950..744230d 100644
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_number_props.zcp
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_number_props.zcp

@@ -41,7 +41,7 @@
 props['logicalreferenced']    = {{true,       nil}, {true, nil}, {true,       nil}}
 props['quota']                = {{true, 'default'}, {nil,  nil}, {nil,        nil}}
 props['reservation']          = {{true, 'default'}, {nil,  nil}, {true, 'default'}}
--- Note that zfsonlinux allows volsize for snapshot which differs from openzfs
+-- Note that OpenZFS allows volsize for snapshot
 -- props['volsize']           = {{nil,        nil}, {nil,  nil}, {true,       vol}}
 props['refquota']             = {{true, 'default'}, {nil,  nil}, {nil,        nil}}
 props['refreservation']       = {{true, 'default'}, {nil,  nil}, {true,       vol}}

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_string_props.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_string_props.ksh
index b7d7844..31ae4a5 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_string_props.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_string_props.ksh

@@ -30,8 +30,8 @@
 
 function cleanup
 {
-	datasetexists $clone && log_must zfs destroy $clone
-	datasetexists $fs && log_must zfs destroy -R $fs
+	datasetexists $clone && destroy_dataset $clone
+	datasetexists $fs && destroy_dataset $fs -R
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.inherit.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.inherit.ksh
new file mode 100755
index 0000000..e199b4c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.inherit.ksh

@@ -0,0 +1,39 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+verify_runnable "global"
+
+fs=$TESTPOOL/$TESTFS
+testprop="com.joyent:testprop"
+testval="testval"
+
+log_must dataset_setprop $fs $testprop $testval
+log_must_program_sync $TESTPOOL - $fs $testprop <<-EOF
+	arg = ...
+	fs = arg["argv"][1]
+	prop = arg["argv"][2]
+	err = zfs.sync.inherit(fs, prop)
+	msg = "resetting " .. prop .. " on " .. fs .. " err=" .. err
+	return msg
+EOF
+
+
+prop=$(get_prop $testprop $fs)
+[[ "$prop" == "-" ]] || log_fail "Property still set after inheriting"
+
+log_pass "Inherit/clear property with channel program works."

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_bookmarks.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_bookmarks.ksh
new file mode 100755
index 0000000..7456177
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_bookmarks.ksh

@@ -0,0 +1,120 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+#
+# DESCRIPTION:
+#       Listing zfs bookmarks should work correctly.
+#
+
+verify_runnable "global"
+
+TESTBOOK=$TESTPOOL/$TESTFS#testbook
+TESTBOOK1=$TESTBOOK-1
+TESTBOOK2=$TESTBOOK-2
+TESTBOOK3=$TESTBOOK-3
+
+function cleanup
+{
+	bkmarkexists $TESTBOOK && log_must zfs destroy $TESTBOOK
+	bkmarkexists $TESTBOOK1 && log_must zfs destroy $TESTBOOK1
+	bkmarkexists $TESTBOOK2 && log_must zfs destroy $TESTBOOK2
+	bkmarkexists $TESTBOOK3 && log_must zfs destroy $TESTBOOK3
+	destroy_snapshot
+}
+
+log_onexit cleanup
+
+create_snapshot
+
+# 0 bookmarks handled correctly
+log_must_program $TESTPOOL - <<-EOF
+	n = 0
+	for s in zfs.list.bookmarks("$TESTPOOL/$TESTFS") do
+		n = n + 1
+	end
+	assert(n == 0)
+	return 0
+EOF
+
+# Create a bookmark
+log_must zfs bookmark $TESTPOOL/$TESTFS@$TESTSNAP $TESTBOOK
+
+log_must_program $TESTPOOL - <<-EOF
+	n = 0
+	for s in zfs.list.bookmarks("$TESTPOOL/$TESTFS") do
+		assert(s == "$TESTBOOK")
+		n = n + 1
+	end
+	assert(n == 1)
+	return 0
+EOF
+
+log_must zfs bookmark $TESTPOOL/$TESTFS@$TESTSNAP $TESTBOOK1
+log_must zfs bookmark $TESTPOOL/$TESTFS@$TESTSNAP $TESTBOOK2
+log_must zfs bookmark $TESTPOOL/$TESTFS@$TESTSNAP $TESTBOOK3
+
+# All bookmarks appear exactly once
+log_must_program $TESTPOOL - <<-EOF
+	a = {}
+	a["$TESTBOOK"] = false
+	a["$TESTBOOK1"] = false
+	a["$TESTBOOK2"] = false
+	a["$TESTBOOK3"] = false
+	n = 0
+	for s in zfs.list.bookmarks("$TESTPOOL/$TESTFS") do
+		assert(not a[s])
+		a[s] = true
+		n = n + 1
+	end
+	assert(n == 4)
+	assert(a["$TESTBOOK"] and
+	    a["$TESTBOOK1"] and
+	    a["$TESTBOOK2"] and
+	    a["$TESTBOOK3"])
+	return 0
+EOF
+
+# Nonexistent input
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.bookmarks("$TESTPOOL/nonexistent-fs")
+	return 0
+EOF
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.bookmarks("nonexistent-pool/$TESTFS")
+	return 0
+EOF
+
+# Can't look in a different pool than the one specified on command line
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.bookmarks("testpool2")
+	return 0
+EOF
+
+# Can't have bookmarks on snapshots, only on filesystems
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.bookmarks("$TESTPOOL/$TESTFS@$TESTSNAP")
+	return 0
+EOF
+
+# Can't have bookmarks on bookmarks, only on filesystems
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.bookmarks("$TESTBOOK")
+	return 0
+EOF
+
+log_pass "Listing zfs bookmarks should work correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_holds.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_holds.ksh
new file mode 100755
index 0000000..2a471bd
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_holds.ksh

@@ -0,0 +1,121 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+#
+# DESCRIPTION:
+#       Listing zfs holds should work correctly.
+#
+
+verify_runnable "global"
+
+TESTHOLD=testhold-tag
+TESTHOLD1=$TESTHOLD-1
+TESTHOLD2=$TESTHOLD-2
+TESTHOLD3=$TESTHOLD-3
+SNAP=$TESTPOOL/$TESTFS@$TESTSNAP
+
+function cleanup
+{
+	holdexists $TESTHOLD $SNAP && log_must zfs release $TESTHOLD $SNAP
+	holdexists $TESTHOLD1 $SNAP && log_must zfs release $TESTHOLD1 $SNAP
+	holdexists $TESTHOLD2 $SNAP && log_must zfs release $TESTHOLD2 $SNAP
+	holdexists $TESTHOLD3 $SNAP && log_must zfs release $TESTHOLD3 $SNAP
+	destroy_snapshot
+}
+
+log_onexit cleanup
+
+create_snapshot
+
+# 0 holds handled correctly
+log_must_program $TESTPOOL - <<-EOF
+	n = 0
+	for s in zfs.list.holds("$SNAP") do
+		n = n + 1
+	end
+	assert(n == 0)
+	return 0
+EOF
+
+# Create a hold
+log_must zfs hold $TESTHOLD $SNAP
+
+log_must_program $TESTPOOL - <<-EOF
+	n = 0
+	for s in zfs.list.holds("$SNAP") do
+		assert(s == "$TESTHOLD")
+		n = n + 1
+	end
+	assert(n == 1)
+	return 0
+EOF
+
+log_must zfs hold $TESTHOLD1 $SNAP
+log_must zfs hold $TESTHOLD2 $SNAP
+log_must zfs hold $TESTHOLD3 $SNAP
+
+# All holds appear exactly once
+log_must_program $TESTPOOL - <<-EOF
+	a = {}
+	a["$TESTHOLD"] = false
+	a["$TESTHOLD1"] = false
+	a["$TESTHOLD2"] = false
+	a["$TESTHOLD3"] = false
+	n = 0
+	for s in zfs.list.holds("$SNAP") do
+		assert(not a[s])
+		a[s] = true
+		n = n + 1
+	end
+	assert(n == 4)
+	assert(a["$TESTHOLD"] and
+	    a["$TESTHOLD1"] and
+	    a["$TESTHOLD2"] and
+	    a["$TESTHOLD3"])
+	return 0
+EOF
+
+# Nonexistent input
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.holds("$TESTPOOL/nonexistent-fs@nonexistent-snap")
+	return 0
+EOF
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.holds("nonexistent-pool/$TESTFS")
+	return 0
+EOF
+
+# Can't look in a different pool than the one specified on command line
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.holds("testpool2")
+	return 0
+EOF
+
+# Can't have holds on filesystems
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.holds("$TESTPOOL/$TESTFS")
+	return 0
+EOF
+
+# Can't have holds on bookmarks
+log_mustnot_program $TESTPOOL - <<-EOF
+	zfs.list.holds("$TESTPOOL/$TESTFS#bookmark")
+	return 0
+EOF
+
+log_pass "Listing zfs holds should work correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh
index 2f5d214..a454a27 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.list_user_props.ksh

@@ -20,6 +20,9 @@
 # DESCRIPTION:
 #       Listing zfs user properties should work correctly.
 #
+#       Note, that this file tests both zfs.list.user_properties
+#       and it's alias zfs.list.properties.
+#
 
 verify_runnable "global"
 
@@ -39,6 +42,14 @@
 # 0 properties handled correctly
 log_must_program $TESTPOOL - <<-EOF
 	n = 0
+	for p in zfs.list.user_properties("$TESTPOOL/$TESTFS") do
+		n = n + 1
+	end
+	assert(n == 0)
+	return 0
+EOF
+log_must_program $TESTPOOL - <<-EOF
+	n = 0
 	for p in zfs.list.properties("$TESTPOOL/$TESTFS") do
 		n = n + 1
 	end
@@ -51,6 +62,16 @@
 
 log_must_program $TESTPOOL - <<-EOF
 	n = 0
+	for p,v in zfs.list.user_properties("$TESTPOOL/$TESTFS") do
+		assert(p == "$TESTPROP")
+		assert(v == "$TESTVAL")
+		n = n + 1
+	end
+	assert(n == 1)
+	return 0
+EOF
+log_must_program $TESTPOOL - <<-EOF
+	n = 0
 	for p,v in zfs.list.properties("$TESTPOOL/$TESTFS") do
 		assert(p == "$TESTPROP")
 		assert(v == "$TESTVAL")
@@ -80,6 +101,34 @@
 	m["$TESTPROP3"] = "$TESTVAL3"
 	m["$TESTPROP4"] = "$TESTVAL4"
 	n = 0
+	for p,v in zfs.list.user_properties("$TESTPOOL/$TESTFS") do
+		assert(not a[p])
+		a[p] = true
+		assert(v == m[p])
+		n = n + 1
+	end
+	assert(n == 5)
+	assert(a["$TESTPROP"] and
+	    a["$TESTPROP1"] and
+	    a["$TESTPROP2"] and
+	    a["$TESTPROP3"] and
+	    a["$TESTPROP4"])
+	return 0
+EOF
+log_must_program $TESTPOOL - <<-EOF
+	a = {}
+	a["$TESTPROP"] = false
+	a["$TESTPROP1"] = false
+	a["$TESTPROP2"] = false
+	a["$TESTPROP3"] = false
+	a["$TESTPROP4"] = false
+	m = {}
+	m["$TESTPROP"] = "$TESTVAL"
+	m["$TESTPROP1"] = "$TESTVAL1"
+	m["$TESTPROP2"] = "$TESTVAL2"
+	m["$TESTPROP3"] = "$TESTVAL3"
+	m["$TESTPROP4"] = "$TESTVAL4"
+	n = 0
 	for p,v in zfs.list.properties("$TESTPOOL/$TESTFS") do
 		assert(not a[p])
 		a[p] = true

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.set_props.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.set_props.ksh
new file mode 100755
index 0000000..6ac1c2b
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.set_props.ksh

@@ -0,0 +1,39 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+#
+# DESCRIPTION:
+#	Setting user props should work correctly on datasets.
+#
+
+verify_runnable "global"
+
+fs=$TESTPOOL/$TESTFS/testchild
+
+function cleanup
+{
+	destroy_dataset $fs "-R"
+}
+
+log_onexit cleanup
+
+log_must zfs create $fs
+
+log_must_program_sync $TESTPOOL $ZCP_ROOT/synctask_core/tst.set_props.zcp $fs
+
+log_pass "Setting props from channel program works correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.set_props.zcp b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.set_props.zcp
new file mode 100644
index 0000000..756263a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.set_props.zcp

@@ -0,0 +1,109 @@
+--
+-- This file and its contents are supplied under the terms of the
+-- Common Development and Distribution License ("CDDL"), version 1.0.
+-- You may only use this file in accordance with the terms of version
+-- 1.0 of the CDDL.
+--
+-- A full copy of the text of the CDDL should have accompanied this
+-- source.  A copy of the CDDL is also available via the Internet at
+-- http://www.illumos.org/license/CDDL.
+--
+
+--
+-- Copyright (c) 2017 by Delphix. All rights reserved.
+-- Copyright 2020 Joyent, Inc.
+--
+
+arg = ...
+fs = arg["argv"][1]
+
+-- values from zfs.h
+maxname = 256       -- ZAP_MAXNAMELEN
+maxvalue = 8192     -- ZAP_MAXVALUELEN
+
+pos_props = {}
+neg_props = {}
+
+-- In lua, strings are immutable, so to avoid a bunch of copies, we
+-- build the value in a table and use concat (which appears to be the
+-- recommend method for such things).
+largeprop = {}
+for i = 0,maxvalue,8
+do
+    table.insert(largeprop, "aaaaaaaa")
+end
+-- add an extra character so we spill over the limit
+table.insert(largeprop, "b")
+
+largepropv = table.concat(largeprop)
+
+largepropname = { "b:" }
+for i = 0,maxname,8
+do
+    table.insert(largepropname, "aaaaaaaa")
+end
+largepropnamev = table.concat(largepropname)
+
+pos_props["a:prop"] = {"hello"}
+
+-- For neg_props, an optional expected error value can be added after the
+-- property value as seen below.
+neg_props["notaproperty"] = {"hello", EINVAL}
+neg_props["a:very.long.property.value"] = { largepropv, E2BIG }
+neg_props[largepropnamev] = {"greetings", ENAMETOOLONG }
+
+-- non-user properties aren't currently supported
+-- Even if they were, the argument must be a string due to requirements of
+-- the ZCP api.
+neg_props["mountpoint"] = {"/foo/bar"}
+neg_props["copies"] = { "2" }
+
+-- read-only properties should never succeed
+neg_props["guid"] = { "12345" }
+
+set_fail = {}
+val_fail = {}
+
+-- Test properties that should work
+for prop, values in pairs(pos_props) do
+    for i, val in ipairs(values) do
+        old_val, src = zfs.get_prop(fs, prop)
+
+        -- Attempt to set the property to the specified value
+        err = zfs.sync.set_prop(fs, prop, val)
+
+        if (err ~= 0) then
+            set_fail[prop] = err -- tuple of prop, val that resulted in error
+        else
+            -- use get_prop to check that the set took affect
+            new_val, src = zfs.get_prop(fs, prop)
+            if (tostring(new_val) ~= tostring(val)) then
+                val_fail[prop] = new_val
+            end
+
+            -- We modified the prop, restore old value (if one existed)
+            if (old_val ~= nil) then
+                err = zfs.sync.set_prop(fs, prop, old_val)
+                if (err ~= 0) then return err end
+            else
+                -- Didn't have an old value, delete (inherit) instead
+                err = zfs.sync.inherit(fs, prop)
+                if (err ~= 0) then return err end
+            end
+        end
+    end
+end
+
+-- Test properties that should fail
+for prop, expected in pairs(neg_props) do
+    exp_val = expected[1]
+    exp_err = expected[2]
+
+    -- Attempt to set the property to the specified value
+    err = zfs.sync.set_prop(fs, prop, exp_val)
+    if (err == 0 or (exp_err ~= nil and err ~= exp_err)) then
+        set_fail[prop] = err -- tuple of prop, val that resulted in error
+    end
+end
+
+return {set_fail, val_fail}

diff --git a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh
index 0a5fb80..2c9014a 100755
--- a/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh

@@ -28,7 +28,7 @@
 
 function cleanup
 {
-	datasetexists $rootfs && log_must zfs destroy -R $rootfs
+	datasetexists $rootfs && destroy_dataset $rootfs -R
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/chattr/chattr_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/chattr/chattr_001_pos.ksh
index 4d66146..cb8c2ea 100755
--- a/zfs/tests/zfs-tests/tests/functional/chattr/chattr_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/chattr/chattr_001_pos.ksh

@@ -48,22 +48,37 @@
 function cleanup
 {
 	for i in ${files[*]}; do
-		log_must chattr -ia $TESTDIR/$i
-		log_must rm -f $TESTDIR/$i
+		if is_freebsd ; then
+			log_must chflags noschg $TESTDIR/$i
+			log_must rm -f $TESTDIR/$i
+		else
+			log_must chattr -ia $TESTDIR/$i
+			log_must rm -f $TESTDIR/$i
+		fi
 	done
 }
 
 log_onexit cleanup
 
-log_assert "Check whether chattr works as expected"
+if is_freebsd ; then
+	log_assert "Check whether chflags works as expected"
+else
+	log_assert "Check whether chattr works as expected"
+fi
 
 log_must touch $TESTDIR/writable
 log_must touch $TESTDIR/immutable
 log_must touch $TESTDIR/append
 
-log_must chattr -i $TESTDIR/writable
-log_must chattr +i $TESTDIR/immutable
-log_must chattr +a $TESTDIR/append
+if is_freebsd ; then
+	log_must chflags noschg $TESTDIR/writable
+	log_must chflags schg $TESTDIR/immutable
+	log_must chflags sappnd $TESTDIR/append
+else
+	log_must chattr -i $TESTDIR/writable
+	log_must chattr +i $TESTDIR/immutable
+	log_must chattr +a $TESTDIR/append
+fi
 
 log_must eval "echo test > $TESTDIR/writable"
 log_must eval "echo test >> $TESTDIR/writable"
@@ -72,4 +87,8 @@
 log_mustnot eval "echo test > $TESTDIR/append"
 log_must eval "echo test >> $TESTDIR/append"
 
-log_pass "chattr works as expected"
+if is_freebsd ; then
+	log_pass "chflags works as expected"
+else
+	log_pass "chattr works as expected"
+fi

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/Makefile.am b/zfs/tests/zfs-tests/tests/functional/checksum/Makefile.am
index 905d991..ddabc03 100644
--- a/zfs/tests/zfs-tests/tests/functional/checksum/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/Makefile.am

@@ -1,9 +1,8 @@
 include $(top_srcdir)/config/Rules.am
 
-AM_CPPFLAGS += -I$(top_srcdir)/include
-LDADD = $(top_builddir)/lib/libicp/libicp.la
-
-AUTOMAKE_OPTIONS = subdir-objects
+LDADD = \
+	$(abs_top_builddir)/lib/libicp/libicp.la \
+	$(abs_top_builddir)/lib/libspl/libspl_assert.la
 
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum
 
@@ -13,7 +12,8 @@
 	run_edonr_test.ksh \
 	run_sha2_test.ksh \
 	run_skein_test.ksh \
-	filetest_001_pos.ksh
+	filetest_001_pos.ksh \
+	filetest_002_pos.ksh
 
 dist_pkgdata_DATA = \
 	default.cfg
@@ -21,10 +21,13 @@
 pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum
 
 pkgexec_PROGRAMS = \
-	edonr_test \
 	skein_test \
 	sha2_test
 
-edonr_test_SOURCES = edonr_test.c
 skein_test_SOURCES = skein_test.c
 sha2_test_SOURCES = sha2_test.c
+
+if BUILD_LINUX
+pkgexec_PROGRAMS += edonr_test
+edonr_test_SOURCES = edonr_test.c
+endif

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/default.cfg b/zfs/tests/zfs-tests/tests/functional/checksum/default.cfg
index 138c42b..bc2f6e2 100644
--- a/zfs/tests/zfs-tests/tests/functional/checksum/default.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/default.cfg

@@ -28,4 +28,9 @@
 # Copyright (c) 2013 by Delphix. All rights reserved.
 #
 
-set -A CHECKSUM_TYPES "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr"
+. $STF_SUITE/include/libtest.shlib
+
+set -A CHECKSUM_TYPES "fletcher2" "fletcher4" "sha256" "sha512" "skein"
+if ! is_freebsd; then
+	CHECKSUM_TYPES+=("edonr")
+fi

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/edonr_test.c b/zfs/tests/zfs-tests/tests/functional/checksum/edonr_test.c
index 596ef2b..a887560 100644
--- a/zfs/tests/zfs-tests/tests/functional/checksum/edonr_test.c
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/edonr_test.c

@@ -38,11 +38,7 @@
 #include <stdio.h>
 #include <sys/note.h>
 #include <sys/time.h>
-
-typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
-typedef	unsigned long long	u_longlong_t;
-
-int aok = 0;
+#include <sys/stdtypes.h>
 
 /*
  * Test messages from:

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh
index 27dad07..615b41f 100755
--- a/zfs/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh

@@ -54,9 +54,7 @@
 
 function cleanup
 {
-	echo cleanup
-	[[ -e $TESTDIR ]] && \
-		log_must rm -rf $TESTDIR/* > /dev/null 2>&1
+	rm -fr $TESTDIR/*
 }
 
 log_assert "Create and read back files with using different checksum algorithms"
@@ -64,6 +62,7 @@
 log_onexit cleanup
 
 WRITESZ=1048576
+NWRITES=5
 
 # Get a list of vdevs in our pool
 set -A array $(get_disklist_fullpath)
@@ -77,7 +76,7 @@
 	type=${CHECKSUM_TYPES[i]}
 	log_must zfs set checksum=$type $TESTPOOL
 	log_must file_write -o overwrite -f $TESTDIR/test_$type \
-	    -b $WRITESZ -c 5 -d R
+	    -b $WRITESZ -c $NWRITES -d R
 	(( i = i + 1 ))
 done
 
@@ -98,7 +97,7 @@
 	type=${CHECKSUM_TYPES[$j]}
 	log_must zfs set checksum=$type $TESTPOOL
 	log_must file_write -o overwrite -f $TESTDIR/test_$type \
-	    -b $WRITESZ -c 5 -d R
+	    -b $WRITESZ -c $NWRITES -d R
 
 	# Corrupt the level 0 blocks of this file
 	corrupt_blocks_at_level $TESTDIR/test_$type

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh
new file mode 100755
index 0000000..921a4b3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh

@@ -0,0 +1,91 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/properties.shlib
+. $STF_SUITE/tests/functional/checksum/default.cfg
+
+# DESCRIPTION:
+# Sanity test to make sure checksum algorithms work.
+# For each checksum, create a file in the pool using that checksum.  Verify
+# that there are no checksum errors.  Next, for each checksum, create a single
+# file in the pool using that checksum, corrupt the file, and verify that we
+# correctly catch the checksum errors.
+#
+# STRATEGY:
+# Test 1
+# 1. For each checksum:
+# 2.	Create a file using the checksum
+# 3.	Corrupt all level 1 blocks in the file
+# 4.	Export and import the pool
+# 5.	Verify that there are checksum errors
+
+verify_runnable "both"
+
+function cleanup
+{
+	rm -fr $TESTDIR/*
+}
+
+log_assert "Test corrupting files at L1 and seeing checksum errors"
+
+log_onexit cleanup
+
+WRITESZ=1048576
+NWRITES=5
+
+# Get a list of vdevs in our pool
+set -A array $(get_disklist_fullpath)
+
+# Get the first vdev, since we will corrupt it later
+firstvdev=${array[0]}
+
+typeset -i j=1
+while [[ $j -lt ${#CHECKSUM_TYPES[*]} ]]; do
+	type=${CHECKSUM_TYPES[$j]}
+	log_must zfs set checksum=$type $TESTPOOL
+	log_must file_write -o overwrite -f $TESTDIR/test_$type \
+	    -b $WRITESZ -c $NWRITES -d R
+
+	# Corrupt the level 1 blocks of this file
+	corrupt_blocks_at_level $TESTDIR/test_$type 1
+
+	log_must zpool export $TESTPOOL
+	log_must zpool import $TESTPOOL
+
+	log_mustnot eval "cat $TESTDIR/test_$type >/dev/null"
+
+	cksum=$(zpool status -P -v $TESTPOOL | grep "$firstvdev" | \
+	    awk '{print $5}')
+
+	log_assert "Checksum '$type' caught $cksum checksum errors"
+	log_must [ $cksum -ne 0 ]
+
+	rm -f $TESTDIR/test_$type
+	log_must zpool clear $TESTPOOL
+
+	(( j = j + 1 ))
+done

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/sha2_test.c b/zfs/tests/zfs-tests/tests/functional/checksum/sha2_test.c
index afd6f82..5800002 100644
--- a/zfs/tests/zfs-tests/tests/functional/checksum/sha2_test.c
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/sha2_test.c

@@ -39,9 +39,8 @@
 #include <sys/time.h>
 #define	_SHA2_IMPL
 #include <sys/sha2.h>
+#include <sys/stdtypes.h>
 #define NOTE(x)
-typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
-typedef	unsigned long long	u_longlong_t;
 
 
 /*
@@ -173,20 +172,6 @@
 	}
 };
 
-/*
- * Local reimplementation of cmn_err, since it's used in sha2.c.
- */
-/*ARGSUSED*/
-void
-cmn_err(int level, char *format, ...)
-{
-	va_list ap;
-	va_start(ap, format);
-	/* LINTED: E_SEC_PRINTF_VAR_FMT */
-	(void) vfprintf(stderr, format, ap);
-	va_end(ap);
-}
-
 int
 main(int argc, char *argv[])
 {

diff --git a/zfs/tests/zfs-tests/tests/functional/checksum/skein_test.c b/zfs/tests/zfs-tests/tests/functional/checksum/skein_test.c
index 37548f0..55df907 100644
--- a/zfs/tests/zfs-tests/tests/functional/checksum/skein_test.c
+++ b/zfs/tests/zfs-tests/tests/functional/checksum/skein_test.c

@@ -37,11 +37,9 @@
 #include <strings.h>
 #include <stdio.h>
 #include <sys/time.h>
+#include <sys/stdtypes.h>
 #define NOTE(x)
 
-typedef	enum boolean { B_FALSE, B_TRUE } boolean_t;
-typedef	unsigned long long	u_longlong_t;
-
 /*
  * Skein test suite using values from the Skein V1.3 specification found at:
  * http://www.skein-hash.info/sites/default/files/skein1.3.pdf

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/Makefile.am
index 99f1257..c01ecee 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/Makefile.am

@@ -13,14 +13,15 @@
 	zfs_destroy \
 	zfs_diff \
 	zfs_get \
+	zfs_ids_to_path \
 	zfs_inherit \
+	zfs_jail \
 	zfs_load-key \
 	zfs_mount \
 	zfs_program \
 	zfs_promote \
 	zfs_property \
 	zfs_receive \
-	zfs_remap \
 	zfs_rename \
 	zfs_reservation \
 	zfs_rollback \
@@ -33,6 +34,7 @@
 	zfs_unmount \
 	zfs_unshare \
 	zfs_upgrade \
+	zfs_wait \
 	zpool \
 	zpool_add \
 	zpool_attach \
@@ -60,4 +62,5 @@
 	zpool_status \
 	zpool_sync \
 	zpool_trim \
-	zpool_upgrade
+	zpool_upgrade \
+	zpool_wait

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
index 9f14307..d84a3df 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am

@@ -1,10 +1,19 @@
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zdb
 dist_pkgdata_SCRIPTS = \
-	zdb_001_neg.ksh \
 	zdb_002_pos.ksh \
 	zdb_003_pos.ksh \
 	zdb_004_pos.ksh \
 	zdb_005_pos.ksh \
 	zdb_006_pos.ksh \
+	zdb_args_neg.ksh \
+	zdb_args_pos.ksh \
+	zdb_block_size_histogram.ksh \
 	zdb_checksum.ksh \
-	zdb_decompress.ksh
+	zdb_decompress.ksh \
+	zdb_decompress_zstd.ksh \
+	zdb_object_range_neg.ksh \
+	zdb_object_range_pos.ksh \
+	zdb_display_block.ksh \
+	zdb_objset_id.ksh \
+	zdb_recover.ksh \
+	zdb_recover_2.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh
deleted file mode 100755
index e69779b..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh
+++ /dev/null

@@ -1,73 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-
-#
-# DESCRIPTION:
-# A badly formed parameter passed to zdb(1) should
-# return an error.
-#
-# STRATEGY:
-# 1. Create an array containing bad zdb parameters.
-# 2. For each element, execute the sub-command.
-# 3. Verify it returns an error.
-#
-
-verify_runnable "global"
-
-set -A args "create" "add" "destroy" "import fakepool" \
-    "export fakepool" "create fakepool" "add fakepool" \
-    "create mirror" "create raidz" \
-    "create mirror fakepool" "create raidz fakepool" \
-    "create raidz1 fakepool" "create raidz2 fakepool" \
-    "create fakepool mirror" "create fakepool raidz" \
-    "create fakepool raidz1" "create fakepool raidz2" \
-    "add fakepool mirror" "add fakepool raidz" \
-    "add fakepool raidz1" "add fakepool raidz2" \
-    "add mirror fakepool" "add raidz fakepool" \
-    "add raidz1 fakepool" "add raidz2 fakepool" \
-    "setvprop" "blah blah" "-%" "--?" "-*" "-=" \
-    "-a" "-f" "-g" "-h" "-j" "-m" "-n" "-o" "-p" \
-    "-p /tmp" "-r" "-t" "-w" "-x" "-y" "-z" \
-    "-D" "-E" "-G" "-H" "-I" "-J" "-K" "-M" \
-    "-N" "-Q" "-R" "-S" "-T" "-W" "-Z"
-
-log_assert "Execute zdb using invalid parameters."
-
-typeset -i i=0
-while [[ $i -lt ${#args[*]} ]]; do
-	log_mustnot zdb ${args[i]}
-
-	((i = i + 1))
-done
-
-log_pass "Badly formed zdb parameters fail as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_003_pos.ksh
index 3c444ae..36f1929 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_003_pos.ksh

@@ -34,8 +34,17 @@
 function cleanup
 {
 	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	if is_freebsd ; then
+		log_must sysctl kern.geom.debugflags=$saved_debugflags
+	fi
 }
 
+if is_freebsd ; then
+	# FreeBSD won't allow writing to an in-use device without this set
+	saved_debugflags=$(sysctl -n kern.geom.debugflags)
+	log_must sysctl kern.geom.debugflags=16
+fi
+
 verify_runnable "global"
 verify_disk_count "$DISKS" 2
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_004_pos.ksh
index 91a5c97..2c6e6e9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_004_pos.ksh

@@ -13,6 +13,7 @@
 
 #
 # Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -40,8 +41,17 @@
 	for DISK in $DISKS; do
 		zpool labelclear -f $DEV_RDSKDIR/$DISK
 	done
+	if is_freebsd; then
+		log_must sysctl kern.geom.debugflags=$saved_debugflags
+	fi
 }
 
+if is_freebsd; then
+	# FreeBSD won't allow writing to an in-use device without this set
+	saved_debugflags=$(sysctl -n kern.geom.debugflags)
+	log_must sysctl kern.geom.debugflags=16
+fi
+
 verify_runnable "global"
 verify_disk_count "$DISKS" 2
 set -A DISK $DISKS
@@ -51,7 +61,7 @@
 DEVS=$(get_pool_devices ${TESTPOOL} ${DEV_RDSKDIR})
 [[ -n $DEVS ]] && set -A DISK $DEVS
 
-log_must zpool offline $TESTPOOL ${WHOLE_DISK}
+log_must zpool offline $TESTPOOL $WHOLE_DISK
 log_must dd if=/dev/urandom of=$TESTDIR/testfile bs=1K count=2
 log_must zpool export $TESTPOOL
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_005_pos.ksh
index 49e237c..74975db 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_005_pos.ksh

@@ -37,8 +37,17 @@
 {
 	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm -f $TEMPFILE
+	if is_freebsd ; then
+		log_must sysctl kern.geom.debugflags=$saved_debugflags
+	fi
 }
 
+if is_freebsd ; then
+	# FreeBSD won't allow writing to an in-use device without this set
+	saved_debugflags=$(sysctl -n kern.geom.debugflags)
+	log_must sysctl kern.geom.debugflags=16
+fi
+
 verify_runnable "global"
 verify_disk_count "$DISKS" 2
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
new file mode 100755
index 0000000..cb88def
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh

@@ -0,0 +1,83 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# A badly formed parameter passed to zdb(1) should
+# return an error.
+#
+# STRATEGY:
+# 1. Create an array containing bad zdb parameters.
+# 2. For each element, execute the sub-command.
+# 3. Verify it returns an error.
+#
+
+verify_runnable "global"
+
+set -A args "create" "add" "destroy" "import fakepool" \
+    "export fakepool" "create fakepool" "add fakepool" \
+    "create mirror" "create raidz" \
+    "create mirror fakepool" "create raidz fakepool" \
+    "create raidz1 fakepool" "create raidz2 fakepool" \
+    "create fakepool mirror" "create fakepool raidz" \
+    "create fakepool raidz1" "create fakepool raidz2" \
+    "add fakepool mirror" "add fakepool raidz" \
+    "add fakepool raidz1" "add fakepool raidz2" \
+    "add mirror fakepool" "add raidz fakepool" \
+    "add raidz1 fakepool" "add raidz2 fakepool" \
+    "setvprop" "blah blah" "-%" "--?" "-*" "-=" \
+    "-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" \
+    "-t" "-w" "-z" "-E" "-H" "-I" "-J" "-K" \
+    "-Q" "-R" "-T" "-W"
+
+log_assert "Execute zdb using invalid parameters."
+
+log_onexit cleanup
+
+function cleanup
+{
+	default_cleanup_noexit
+}
+
+function test_imported_pool
+{
+	for i in ${args[@]}; do
+		log_mustnot zdb $i $TESTPOOL
+	done
+}
+
+default_mirror_setup_noexit $DISKS
+
+test_imported_pool
+
+log_pass "Badly formed zdb parameters fail as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_pos.ksh
new file mode 100755
index 0000000..4c2fc15
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_pos.ksh

@@ -0,0 +1,104 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZDB allows a large number of possible inputs
+# and combinations of those inputs. Test for non-zero
+# exit values. These input options are based on the zdb
+# man page
+#
+# STRATEGY:
+# 1. Create an array containing value zdb parameters.
+# 2. For each element, execute the sub-command.
+# 3. Verify it does not return a error.
+#
+
+verify_runnable "global"
+
+log_assert "Execute zdb using valid parameters."
+
+log_onexit cleanup
+
+function cleanup
+{
+	default_cleanup_noexit
+}
+
+function test_imported_pool
+{
+	typeset -a args=("-A" "-b" "-C" "-c" "-d" "-D" "-G" "-h" "-i" "-L" \
+            "-M" "-P" "-s" "-v" "-Y" "-y")
+        for i in ${args[@]}; do
+		log_must eval "zdb $i $TESTPOOL >/dev/null"
+	done
+}
+
+function test_exported_pool
+{
+	log_must zpool export $TESTPOOL
+	typeset -a args=("-A" "-b" "-C" "-c" "-d" "-D" "-F" "-G" "-h" "-i" "-L" "-M" \
+            "-P" "-s" "-v" "-X" "-Y" "-y")
+        for i in ${args[@]}; do
+		log_must eval "zdb -e $i $TESTPOOL >/dev/null"
+	done
+	log_must zpool import $TESTPOOL
+}
+
+function test_vdev
+{
+	typeset -a args=("-A" "-q" "-u" "-Aqu")
+	VDEVS=$(get_pool_devices ${TESTPOOL} ${DEV_RDSKDIR})
+	log_note $VDEVS
+	set -A VDEV_ARRAY $VDEVS
+        for i in ${args[@]}; do
+		log_must eval "zdb -l $i ${VDEV_ARRAY[0]} >/dev/null"
+	done
+}
+
+function test_metaslab
+{
+	typeset -a args=("-A" "-L" "-P" "-Y")
+        for i in ${args[@]}; do
+		log_must eval "zdb -m $i $TESTPOOL >/dev/null"
+	done
+}
+
+default_mirror_setup_noexit $DISKS
+
+test_imported_pool
+test_exported_pool
+test_vdev
+test_metaslab
+
+log_pass "Valid zdb parameters pass as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh
new file mode 100755
index 0000000..ce439e2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh

@@ -0,0 +1,271 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+
+#
+# DESCRIPTION:
+#	Create a pool and populate it with files of various
+#	recordsizes
+#
+# STRATEGY:
+#	1. Create pool
+#	2. Populate it
+#	3. Run zdb -Pbbb on pool
+#	4. Verify variance on blocksizes
+#
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+SPA_MAXBLOCKSHIFT=24
+
+function histo_populate_test_pool
+{
+	if [ $# -ne 1 ]; then
+		log_note "histo_populate_test_pool: insufficient parameters"
+		log_fail "hptp: 1 requested $# received"
+	fi
+	typeset pool=$1
+
+	set -A recordsizes
+	typeset -i min_rsbits=9 #512
+	typeset -i max_rsbits=SPA_MAXBLOCKSHIFT #16 MiB
+	typeset -i sum_filesizes=0
+	re_number='^[0-9]+$'
+
+	let histo_pool_size=$(get_pool_prop size ${pool})
+	if [[ ! ${histo_pool_size} =~ ${re_number} ]]; then
+		log_fail "histo_pool_size is not numeric ${pool_size}"
+	fi
+	let max_pool_record_size=$(get_prop recordsize ${pool})
+	if [[ ! ${max_pool_record_size} =~ ${re_number} ]]; then
+		log_fail "hptp: max_pool_record_size is not numeric ${max_pool_record_size}"
+	fi
+
+	sum_filesizes=$(echo "2^21"|bc)
+	((min_pool_size=12*sum_filesizes))
+	if [ ${histo_pool_size} -lt ${min_pool_size} ]; then
+		log_note "hptp: Your pool size ${histo_pool_size}"
+		log_fail "hptp: is less than minimum ${min_pool_size}"
+	fi
+	this_ri=min_rsbits
+	file_num=0
+	total_count=0
+	###################
+	# generate 10% + 20% + 30% + 31% = 91% of the filespace
+	# attempting to use 100% will lead to no space left on device
+	# Heuristic testing showed that 91% was the practical upper
+	# bound on the default 4G zpool (mirrored) that is used in
+	# testing.
+	#
+	# In order to expedite testing, we will only fill 2G (of 4G)
+	# of the test pool.  You may want to modify this for
+	# standalone testing.
+	# 
+	# In filling only 50% of the pool, we create one object on
+	# each "pass" below to achieve multiple objects per record
+	# size.  Creating one file per object would lead to 
+	# excessive file creation time.
+	###################
+	# for pass in 10 20 30 31  # 91%
+	for pass in 20 20 10 # 50%
+	do
+		((thiscount=(((histo_pool_size*pass)/100)/sum_filesizes)))
+
+		((total_count+=thiscount))
+		for rb in $(seq ${min_rsbits} ${max_rsbits})
+		do
+			this_rs=$(echo "2^${rb}" | bc)
+			if [ ${this_rs} -gt ${max_pool_record_size} ]; then
+				continue
+			fi
+	
+			if [ ! -d /${pool}/B_${this_rs} ]; then
+				zfs create ${pool}/B_${this_rs}
+				zfs set recordsize=${this_rs} \
+				    ${pool}/B_${this_rs}
+			fi
+			####################
+			# Create the files in the devices and datasets
+			# of the right size.  The files are filled
+			# with random data to defeat the compression
+			#
+			# Note that the dd output is suppressed unless
+			# there are errors
+			####################
+
+			dd if=/dev/urandom \
+			    of=/${pool}/B_${this_rs}/file_${filenum} \
+			    bs=${this_rs} count=${thiscount} \
+			    iflag=fullblock 2>&1 | \
+			    grep -ve "records in" -e "records out" -e "bytes.*copied"
+			((filenum+=1))
+		done
+	done
+
+	####################
+	# Testing showed that on some devices, unless the pool is 
+	# synchronized, that the block counts will be below the 
+	# anticipated sizes since not all of the blocks will be flushed
+	# to the device.  This 'sync' command prevents that from 
+	# happening.
+	####################
+	log_must zpool sync ${pool}
+}
+function histo_check_test_pool
+{
+	if [ $# -ne 1 ]; then
+		log_note "histo_check_test_pool: insufficient parameters"
+		log_fail "hctp: 1 requested $# received"
+	fi	
+	typeset pool=$1
+
+	set -A recordsizes
+	set -A recordcounts
+	typeset -i rb
+	typeset -i min_rsbits=9 #512
+	typeset -i max_rsbits=SPA_MAXBLOCKSHIFT+1
+	typeset -i this_rs
+	typeset -i this_ri
+	typeset -i sum_filesizes=0
+	typeset dumped
+	typeset stripped
+
+	let histo_check_pool_size=$(get_pool_prop size ${pool})
+	if [[ ! ${histo_check_pool_size} =~ ${re_number} ]]; then
+		log_fail "histo_check_pool_size is not numeric ${histo_check_pool_size}"
+	fi
+	let max_pool_record_size=$(get_prop recordsize ${pool})
+	if [[ ! ${max_pool_record_size} =~ ${re_number} ]]; then
+		log_fail "hctp: max_pool_record_size is not numeric ${max_pool_record_size}"
+	fi
+
+	dumped="${TEST_BASE_DIR}/${pool}_dump.txt"
+	stripped="${TEST_BASE_DIR}/${pool}_stripped.txt"
+
+	zdb -Pbbb ${pool} | \
+	    tee ${dumped} | \
+	    sed -e '1,/^block[ 	][ 	]*psize[ 	][ 	]*lsize.*$/d' \
+	    -e '/^size[ 	]*Count/d' -e '/^$/,$d' \
+	    > ${stripped}
+
+	sum_filesizes=$(echo "2^21"|bc)
+
+	###################
+	# generate 10% + 20% + 30% + 31% = 91% of the filespace
+	# attempting to use 100% will lead to no space left on device
+	# attempting to use 100% will lead to no space left on device
+	# Heuristic testing showed that 91% was the practical upper
+	# bound on the default 4G zpool (mirrored) that is used in
+	# testing.
+	#
+	# In order to expedite testing, we will only fill 2G (of 4G)
+	# of the test pool.  You may want to modify this for
+	# standalone testing.
+	# 
+	# In filling only 50% of the pool, we create one object on
+	# each "pass" below to achieve multiple objects per record
+	# size.  Creating one file per object would lead to 
+	# excessive file creation time.
+	###################
+	# for pass in 10 20 30 31  # 91%
+	for pass in 20 20 10 # 50%
+	do
+		((thiscount=(((histo_check_pool_size*pass)/100)/sum_filesizes)))
+
+		for rb in $(seq ${min_rsbits} ${max_rsbits})
+		do
+			blksize=$(echo "2^$rb"|bc)
+			if [ $blksize -le $max_pool_record_size ]; then
+				((recordcounts[$blksize]+=thiscount))
+			fi
+		done
+	done
+
+	###################
+	# compare the above computed counts for blocks against
+	# lsize count.  Since some devices have a minimum hardware
+	# blocksize > 512, we cannot compare against the asize count.
+	# E.G., if the HWBlocksize = 4096, then the asize counts for
+	# 512, 1024 and 2048 will be zero and rolled up into the 
+	# 4096 blocksize count for asize.   For verification we stick
+	# to just lsize counts.
+	#
+	# The max_variance is hard-coded here at 12% to leave us some
+	# margin.  Testing has shown this normally to be in the range
+	# of 2%-8%, but it may be as large as 11%.
+	###################
+	let max_variance=12
+	let fail_value=0
+	let error_count=0
+	log_note "Comparisons for ${pool}"
+	log_note "Bsize is the blocksize, Count is predicted value"
+	log_note "Bsize\tCount\tpsize\tlsize\tasize"
+	while read -r blksize pc pl pm lc ll lm ac al am
+	do
+		if [ $blksize -gt $max_pool_record_size ]; then
+			continue
+		fi
+		log_note \
+		    "$blksize\t${recordcounts[${blksize}]}\t$pc\t$lc\t$ac"
+
+		###################
+		# get the computer record count and compute the
+		# difference percentage in integer arithmetic
+		###################
+		rc=${recordcounts[${blksize}]}
+		((rclc=(rc-lc)<0?lc-rc:rc-lc)) # absolute value
+		((dp=(rclc*100)/rc))
+
+		###################
+		# Check against the allowed variance
+		###################
+		if [ $dp -gt ${max_variance} ]; then
+			log_note \
+			"Expected variance < ${max_variance}% observed ${dp}%"
+			if [ ${dp} -gt ${fail_value} ]; then
+				fail_value=${dp}
+				((error_count++))
+			fi
+		fi
+	done < ${stripped}
+	if [ ${fail_value} -gt 0 ]; then
+		if [ ${error_count} -eq 1 ]; then
+			log_note "hctp: There was ${error_count} error"
+		else
+			log_note "hctp:There were a total of ${error_count} errors"
+		fi
+		log_fail \
+		"hctp: Max variance of ${max_variance}% exceeded, saw ${fail_value}%"
+	fi
+}
+
+log_assert "Verify zdb -Pbbb (block histogram) works as expected"
+log_onexit cleanup
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+
+histo_populate_test_pool $TESTPOOL
+
+histo_check_test_pool $TESTPOOL
+
+log_pass "Histogram for zdb"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh
index 9bc3603..4f66126 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_checksum.ksh

@@ -50,12 +50,13 @@
 set -A array $listing
 obj=${array[0]}
 log_note "file $init_data has object number $obj"
+sync_pool $TESTPOOL
 
 output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \
     |grep -m 1 "L0 DVA" |head -n1)
-dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output")
+dva=$(sed -Ene 's/^.+DVA\[0\]=<([^>]+)>.*$/\1/p' <<< "$output")
 log_note "block 0 of $init_data has a DVA of $dva"
-cksum_expected=$(grep -oP '(?<=cksum=)[ A-Za-z0-9:]*' <<< "$output")
+cksum_expected=$(sed -Ene 's/^.+ cksum=([a-z0-9:]+)$/\1/p' <<< "$output")
 log_note "expecting cksum $cksum_expected"
 output=$(zdb -R $TESTPOOL $dva:c 2> /dev/null)
 result=$(grep $cksum_expected <<< "$output")

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh
index 0e468d7..1ebcbfb 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress.ksh

@@ -74,11 +74,11 @@
 
 output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \
     |grep -m 1 "L0 DVA" |head -n1)
-dva=$(grep -oP 'DVA\[0\]=<\K.*?(?=>)' <<< "$output")
+dva=$(sed -Ene 's/^.+DVA\[0\]=<([^>]+)>.*$/\1/p' <<< "$output")
 log_note "block 0 of $init_data has a DVA of $dva"
 
 # use the length reported by zdb -ddddddbbbbbb
-size_str=$(grep -oP 'size=\K.*?(?= )' <<< "$output")
+size_str=$(sed -Ene 's/^.+ size=([^ ]+) .*$/\1/p' <<< "$output")
 log_note "block size $size_str"
 
 vdev=$(echo "$dva" |awk '{split($0,array,":")} END{print array[1]}')

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress_zstd.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress_zstd.ksh
new file mode 100755
index 0000000..238d495
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_decompress_zstd.ksh

@@ -0,0 +1,114 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -Z pool <objid> will display the ZSTD compression header
+#     This will contain the actual length of the compressed data, as well as
+#     the version of ZSTD used to compress the block, and the compression level
+#
+# Strategy:
+# 1. Create a pool, set compression to zstd-<random level>
+# 2. Write some identifiable data to a file
+# 3. Run zdb -Zddddddbbbbbb against the file
+# 4. Record the DVA, lsize, and psize, and ZSTD header of L0 block 0
+# 5. Check that the ZSTD length is less than psize
+# 6. Check that the ZSTD level matches the level we requested
+# 7. Run zdb -R with :dr flags and confirm the size and content match
+#
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_assert "Verify zdb -Z (read ZSTD header) works as expected"
+log_onexit cleanup
+src_data="$STF_SUITE/tests/functional/cli_root/zfs_receive/zstd_test_data.txt"
+init_data=$TESTDIR/file1
+write_count=128
+blksize=131072
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+random_level=$((RANDOM%19 + 1))
+
+default_mirror_setup_noexit $DISKS
+log_must zfs set recordsize=$blksize $TESTPOOL/$TESTFS
+log_must zfs set compression=zstd-$random_level $TESTPOOL/$TESTFS
+
+# write the 1k of text 128 times
+for i in {1..$write_count}
+do
+	cat $src_data >> $init_data
+done
+
+sync_pool $TESTPOOL true
+
+# get object number of file
+listing=$(ls -i $init_data)
+set -A array $listing
+obj=${array[0]}
+log_note "file $init_data has object number $obj"
+
+output=$(zdb -Zddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \
+    |grep -m 1 "L0 DVA" |head -n1)
+dva=$(sed -Ene 's/^.+DVA\[0\]=<([^>]+)>.*$/\1/p' <<< "$output")
+log_note "block 0 of $init_data has a DVA of $dva"
+
+# use the length reported by zdb -ddddddbbbbbb
+size_str=$(sed -Ene 's/^.+ size=([^ ]+) .*$/\1/p' <<< "$output")
+# convert sizes to decimal
+lsize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[1]}')
+lsize_orig=$lsize
+lsize=${lsize%?}
+lsize_bytes=$((16#$lsize))
+psize=$(echo $size_str |awk '{split($0,array,"/")} END{print array[2]}')
+psize_orig=$psize
+psize=${psize%?}
+psize_bytes=$((16#$psize))
+log_note "block size $size_str"
+
+# Get the ZSTD header reported by zdb -Z
+zstd_str=$(sed -Ene 's/^.+ ZSTD:size=([^:]+):version=([^:]+):level=([^:]+):.*$/\1:\2:\3/p' <<< "$output")
+zstd_size=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[1]}')
+log_note "ZSTD compressed size $zstd_size"
+(( $psize_bytes < $zstd_size )) && log_fail \
+"zdb -Z failed: physical block size was less than header content length ($psize_bytes < $zstd_size)"
+
+zstd_version=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[2]}')
+log_note "ZSTD version $zstd_version"
+
+zstd_level=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[3]}')
+log_note "ZSTD level $zstd_level"
+(( $zstd_level != $random_level )) && log_fail \
+"zdb -Z failed: compression level did not match header level ($zstd_level < $random_level)"
+
+vdev=$(echo "$dva" |awk '{split($0,array,":")} END{print array[1]}')
+offset=$(echo "$dva" |awk '{split($0,array,":")} END{print array[2]}')
+# Check the first 1024 bytes
+output=$(ZDB_NO_ZLE="true" zdb -R $TESTPOOL $vdev:$offset:$size_str:dr 2> /dev/null)
+outsize=$(wc -c <<< "$output")
+(( $outsize != $blksize )) && log_fail \
+"zdb -Z failed to decompress the data to the expected length ($outsize != $lsize_bytes)"
+cmp $init_data - <<< "$output"
+(( $? != 0 )) && log_fail "zdb -R :dr failed to decompress the data properly"
+
+log_pass "zdb -Z flag (ZSTD compression header) works as expected"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_display_block.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_display_block.ksh
new file mode 100755
index 0000000..5cc4575
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_display_block.ksh

@@ -0,0 +1,128 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -R pool <DVA>:b will display the block
+#
+# Strategy:
+# 1. Create a pool, set compression to lzjb
+# 2. Write some identifiable data to a file
+# 3. Run zdb -ddddddbbbbbb against the file
+# 4. Record the DVA of the first L1 block;
+#    record the first L0 block display; and
+#    record the 2nd L0 block display.
+# 5. Run zdb -R with :bd displays first L0
+# 6. Run zdb -R with :b80d displays 2nd L0
+# 7. Run zdb -R with :db80 displays 2nd L0
+# 8. Run zdb -R with :id flag displays indirect block
+#     (similar to zdb -ddddddbbbbbb output)
+# 9. Run zdb -R with :id flag and .0 vdev
+#
+
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_assert "Verify zdb -R :b flag (block display) works as expected"
+log_onexit cleanup
+init_data=$TESTDIR/file1
+write_count=256
+blksize=4096
+
+# only read 256 128 byte block pointers in L1 (:i flag)
+# 256 x 128 = 32k / 0x8000
+l1_read_size="8000"
+
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+log_must zfs set recordsize=$blksize $TESTPOOL/$TESTFS
+log_must zfs set compression=lzjb $TESTPOOL/$TESTFS
+
+file_write -d R -o create -w -f $init_data -b $blksize -c $write_count
+sync_pool $TESTPOOL true
+
+# get object number of file
+listing=$(ls -i $init_data)
+set -A array $listing
+obj=${array[0]}
+log_note "file $init_data has object number $obj"
+
+output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \
+    |grep -m 1 "L1  DVA" |head -n1)
+dva=$(sed -Ene 's/^.+DVA\[0\]=<([^>]+)>.*/\1/p' <<< "$output")
+log_note "first L1 block $init_data has a DVA of $dva"
+output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \
+    |grep -m 1 "L0 DVA" |head -n1)
+blk_out0=${output##*>}
+blk_out0=${blk_out0##+([[:space:]])}
+
+output=$(zdb -ddddddbbbbbb $TESTPOOL/$TESTFS $obj 2> /dev/null \
+    |grep -m 1 "1000  L0 DVA" |head -n1)
+blk_out1=${output##*>}
+blk_out1=${blk_out1##+([[:space:]])}
+
+output=$(export ZDB_NO_ZLE=\"true\"; zdb -R $TESTPOOL $dva:bd\
+    2> /dev/null)
+output=${output##*>}
+output=${output##+([[:space:]])}
+if [ "$output" != "$blk_out0" ]; then
+	log_fail "zdb -R :bd (block 0 display/decompress) failed"
+fi
+
+output=$(export ZDB_NO_ZLE=\"true\"; zdb -R $TESTPOOL $dva:db80\
+    2> /dev/null)
+output=${output##*>}
+output=${output##+([[:space:]])}
+if [ "$output" != "$blk_out1" ]; then
+	log_fail "zdb -R :db80 (block 1 display/decompress) failed"
+fi
+
+output=$(export ZDB_NO_ZLE=\"true\"; zdb -R $TESTPOOL $dva:b80d\
+    2> /dev/null)
+output=${output##*>}
+output=${output##+([[:space:]])}
+if [ "$output" != "$blk_out1" ]; then
+	log_fail "zdb -R :b80d (block 1 display/decompress) failed"
+fi
+
+vdev=$(echo "$dva" |awk '{split($0,array,":")} END{print array[1]}')
+offset=$(echo "$dva" |awk '{split($0,array,":")} END{print array[2]}')
+output=$(export ZDB_NO_ZLE=\"true\";\
+    zdb -R $TESTPOOL $vdev:$offset:$l1_read_size:id 2> /dev/null)
+block_cnt=$(echo "$output" | grep 'L0' | wc -l)
+if [ $block_cnt -ne $write_count ]; then
+	log_fail "zdb -R :id (indirect block display) failed"
+fi
+
+# read from specific half of mirror
+vdev="$vdev.0"
+log_note "Reading from DVA $vdev:$offset:$l1_read_size"
+output=$(export ZDB_NO_ZLE=\"true\";\
+    zdb -R $TESTPOOL $vdev:$offset:$l1_read_size:id 2> /dev/null)
+block_cnt=$(echo "$output" | grep 'L0' | wc -l)
+if [ $block_cnt -ne $write_count ]; then
+        log_fail "zdb -R 0.0:offset:length:id (indirect block display) failed"
+fi
+
+log_pass "zdb -R :b flag (block display) works as expected"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_object_range_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_object_range_neg.ksh
new file mode 100755
index 0000000..ae086da
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_object_range_neg.ksh

@@ -0,0 +1,72 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# A badly formed object range parameter passed to zdb -dd should
+# return an error.
+#
+# Strategy:
+# 1. Create a pool
+# 2. Run zdb -dd with assorted invalid object range arguments and
+#    confirm it fails as expected
+# 3. Run zdb -dd with an invalid object identifier and
+#    confirm it fails as expected
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_assert "Execute zdb using invalid object range parameters."
+log_onexit cleanup
+verify_runnable "both"
+verify_disk_count "$DISKS" 2
+default_mirror_setup_noexit $DISKS
+
+log_must zpool sync
+
+set -A bad_flags a b c   e   g h i j k l   n o p q r s t u v w x y   \
+                   B C D E F G H I J K L M N O P Q R S T U V W X Y Z \
+                 0 1 2 3 4 5 6 7 8 9 _ - + % . , :
+
+typeset -i i=0
+while [[ $i -lt ${#bad_flags[*]} ]]; do
+	log_mustnot zdb -dd $TESTPOOL 0:1:${bad_flags[i]}
+	log_mustnot zdb -dd $TESTPOOL 0:1:A-${bad_flags[i]}
+	((i = i + 1))
+done
+
+set -A bad_ranges ":" "::" ":::" ":0" "0:" "0:1:" "0:1::" "0::f" "0a:1" \
+    "a0:1" "a:1" "0:a" "0:1a" "0:a1" "a:b0" "a:0b" "0:1:A-" "1:0" \
+    "0:1:f:f" "0:1:f:"
+
+i=0
+while [[ $i -lt ${#bad_ranges[*]} ]]; do
+	log_mustnot zdb -dd $TESTPOOL ${bad_ranges[i]}
+	((i = i + 1))
+done
+
+# Specifying a non-existent object identifier returns an error
+obj_id_highest=$(zdb -P -dd $TESTPOOL/$TESTFS 2>/dev/null |
+    grep -E "^ +-?([0-9]+ +){7}" | sort -n | tail -n 1 | awk '{print $1}')
+obj_id_invalid=$(( $obj_id_highest + 1 ))
+log_mustnot zdb -dd $TESTPOOL/$TESTFS $obj_id_invalid
+
+log_pass "Badly formed zdb object range parameters fail as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_object_range_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_object_range_pos.ksh
new file mode 100755
index 0000000..386c575
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_object_range_pos.ksh

@@ -0,0 +1,170 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Object range parameters passed to zdb -dd work correctly.
+#
+# Strategy:
+# 1. Create a pool
+# 2. Create some files
+# 3. Run zdb -dd with assorted object range arguments and verify output
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+#
+# Print objects in @dataset with identifiers greater than or equal to
+# @begin and less than or equal to @end, without using object range
+# parameters.
+#
+function get_object_list_range
+{
+	dataset=$1
+	begin=$2
+	end=$3
+	get_object_list $dataset |
+	while read line; do
+		obj=$(echo $line | awk '{print $1}')
+		if [[ $obj -ge $begin && $obj -le $end ]] ; then
+			echo "$line"
+		elif [[ $obj -gt $end ]] ; then
+			break
+		fi
+	done
+}
+
+#
+# Print just the list of objects from 'zdb -dd' with leading whitespace
+# trimmed, discarding other zdb output, sorted by object identifier.
+# Caller must pass in the dataset argument at minimum.
+#
+function get_object_list
+{
+	zdb -P -dd $@ 2>/dev/null |
+	sed -E '/^ +-?([0-9]+ +){7}/!d;s/^[[:space:]]*//' |
+	sort -n
+}
+
+log_assert "Verify zdb -dd object range arguments work correctly."
+log_onexit cleanup
+verify_runnable "both"
+verify_disk_count "$DISKS" 2
+default_mirror_setup_noexit $DISKS
+
+for x in $(seq 0 7); do
+	touch $TESTDIR/file$x
+	mkdir $TESTDIR/dir$x
+done
+
+log_must zpool sync
+
+# Get list of all objects, but filter out user/group objects which don't
+# appear when using object or object range arguments
+all_objects=$(get_object_list $TESTPOOL/$TESTFS | grep -v 'used$')
+
+# Range 0:-1 gets all objects
+expected=$all_objects
+actual=$(get_object_list $TESTPOOL/$TESTFS 0:-1)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:A gets all objects
+expected=$all_objects
+actual=$(get_object_list $TESTPOOL/$TESTFS 0:-1:A)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:f must output all file objects
+expected=$(grep "ZFS plain file" <<< $all_objects)
+actual=$(get_object_list $TESTPOOL/$TESTFS 0:-1:f)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:d must output all directory objects
+expected=$(grep "ZFS directory" <<< $all_objects)
+actual=$(get_object_list $TESTPOOL/$TESTFS 0:-1:d)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:df must output all directory and file objects
+expected=$(grep -e "ZFS directory" -e "ZFS plain file" <<< $all_objects)
+actual=$(get_object_list $TESTPOOL/$TESTFS 0:-1:df)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:A-f-d must output all non-files and non-directories
+expected=$(grep -v -e "ZFS plain file" -e "ZFS directory" <<< $all_objects)
+actual=$(get_object_list $TESTPOOL/$TESTFS 0:-1:A-f-d)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Specifying multiple ranges works
+set -A obj_ids $(ls -i $TESTDIR | awk '{print $1}' | sort -n)
+start1=${obj_ids[0]}
+end1=${obj_ids[5]}
+start2=${obj_ids[8]}
+end2=${obj_ids[13]}
+expected=$(get_object_list_range $TESTPOOL/$TESTFS $start1 $end1;
+    get_object_list_range $TESTPOOL/$TESTFS $start2 $end2)
+actual=$(get_object_list $TESTPOOL/$TESTFS $start1:$end1 $start2:$end2)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Combining ranges with individual object IDs works
+expected=$(get_object_list_range $TESTPOOL/$TESTFS $start1 $end1;
+    get_object_list $TESTPOOL/$TESTFS $start2 $end2)
+actual=$(get_object_list $TESTPOOL/$TESTFS $start1:$end1 $start2 $end2)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Hex conversion must work for ranges and individual object identifiers
+# (this test uses expected result from previous test).
+start1_hex=$(printf "0x%x" $start1)
+end1_hex=$(printf "0x%x" $end1)
+start2_hex=$(printf "0x%x" $start2)
+end2_hex=$(printf "0x%x" $end2)
+actual=$(get_object_list $TESTPOOL/$TESTFS $start1_hex:$end1_hex \
+    $start2_hex $end2_hex)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Specifying individual object IDs works
+objects="$start1 $end1 $start2 $end2"
+expected="$objects"
+actual=$(get_object_list $TESTPOOL/$TESTFS $objects | awk '{print $1}' | xargs)
+log_must test "$actual" == "$expected"
+
+# Get all objects in the meta-objset to test m (spacemap) and z (zap) flags
+all_mos_objects=$(get_object_list $TESTPOOL 0:-1)
+
+# Range 0:-1:m must output all space map objects
+expected=$(grep "SPA space map" <<< $all_mos_objects)
+actual=$(get_object_list $TESTPOOL 0:-1:m)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:z must output all zap objects
+expected=$(grep "zap" <<< $all_mos_objects)
+actual=$(get_object_list $TESTPOOL 0:-1:z)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:A-m-z must output all non-space maps and non-zaps
+expected=$(grep -v -e "zap" -e "SPA space map" <<< $all_mos_objects)
+actual=$(get_object_list $TESTPOOL 0:-1:A-m-z)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+# Range 0:-1:mz must output all space maps and zaps
+expected=$(grep -e "SPA space map" -e "zap" <<< $all_mos_objects)
+actual=$(get_object_list $TESTPOOL 0:-1:mz)
+log_must test "\n$actual\n" == "\n$expected\n"
+
+log_pass "zdb -dd object range arguments work correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh
new file mode 100755
index 0000000..90a8100
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh

@@ -0,0 +1,134 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 by Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -d pool/<objset id> will display the dataset
+#
+# Strategy:
+# 1. Create a pool
+# 2. Write some data to a file
+# 3. Get the inode number (object number) of the file
+# 4. Run zdb -d to get the objset ID of the dataset
+# 5. Run zdb -dddddd pool/objsetID objectID (decimal)
+# 6. Confirm names
+# 7. Run zdb -dddddd pool/objsetID objectID (hex) 
+# 8. Confirm names
+# 9. Repeat with zdb -NNNNNN pool/objsetID objectID
+# 10. Obtain objsetID from /proc/spl/kstat/zfs/testpool/obset-0x<ID>
+#    (linux only)
+# 11. Run zdb -dddddd pool/objsetID (hex)
+# 12. Match name from zdb against proc entry
+# 13. Create dataset with hex numeric name
+# 14. Create dataset with decimal numeric name
+# 15. zdb -d for numeric datasets succeeds
+# 16. zdb -N for numeric datasets fails
+# 17. zdb -dN for numeric datasets fails
+#
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_assert "Verify zdb -d <pool>/<objset ID> generates the correct names."
+log_onexit cleanup
+init_data=$TESTDIR/file1
+write_count=8
+blksize=131072
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+hex_ds=$TESTPOOL/0x400000
+num_ds=$TESTPOOL/100000
+
+default_mirror_setup_noexit $DISKS
+file_write -o create -w -f $init_data -b $blksize -c $write_count
+
+# get object number of file
+listing=$(ls -i $init_data)
+set -A array $listing
+obj=${array[0]}
+log_note "file $init_data has object number $obj"
+sync_pool $TESTPOOL
+
+output=$(zdb -d $TESTPOOL/$TESTFS)
+objset_id=$(echo $output | awk '{split($0,array,",")} END{print array[2]}' |
+    awk '{split($0,array," ")} END{print array[2]}')
+objset_hex=$(printf "0x%X" $objset_id)
+log_note "objset $TESTPOOL/$TESTFS has objset ID $objset_id ($objset_hex)"
+
+for id in "$objset_id" "$objset_hex"
+do
+	log_note "zdb -dddddd $TESTPOOL/$id $obj"
+	output=$(zdb -dddddd $TESTPOOL/$id $obj)
+	reason="($TESTPOOL/$TESTFS not in zdb output)"
+	echo $output |grep "$TESTPOOL/$TESTFS" > /dev/null
+	(( $? != 0 )) && log_fail \
+	    "zdb -dddddd $TESTPOOL/$id $obj failed $reason"
+	reason="(file1 not in zdb output)"
+	echo $output |grep "file1" > /dev/null
+	(( $? != 0 )) && log_fail \
+	    "zdb -dddddd $TESTPOOL/$id $obj failed $reason"
+	obj=$(printf "0x%X" $obj)
+
+ 	log_note "zdb -NNNNNN $TESTPOOL/$id $obj"
+        output=$(zdb -NNNNNN $TESTPOOL/$id $obj)
+        reason="($TESTPOOL/$TESTFS not in zdb output)"
+        echo $output |grep "$TESTPOOL/$TESTFS" > /dev/null
+        (( $? != 0 )) && log_fail \
+            "zdb -NNNNNN $TESTPOOL/$id $obj failed $reason"
+        reason="(file1 not in zdb output)"
+        echo $output |grep "file1" > /dev/null
+        (( $? != 0 )) && log_fail \
+            "zdb -NNNNNN $TESTPOOL/$id $obj failed $reason"
+done
+
+if is_linux; then
+	output=$(ls -1 /proc/spl/kstat/zfs/$TESTPOOL |grep objset- |tail -1)
+	objset_hex=${output#*-}
+	name_from_proc=$(cat /proc/spl/kstat/zfs/$TESTPOOL/$output |
+	    grep dataset_name | awk '{split($0,array," ")} END{print array[3]}')
+	log_note "checking zdb output for $name_from_proc"
+	reason="(name $name_from_proc from proc not in zdb output)"
+	log_note "zdb -dddddd $TESTPOOL/$objset_hex"
+	output=$(zdb -dddddd $TESTPOOL/$objset_hex)
+	echo $output |grep "$name_from_proc" > /dev/null
+	(( $? != 0 )) && log_fail \
+	    "zdb -dddddd $TESTPOOL/$objset_hex failed $reason"
+fi
+
+log_must zfs create $hex_ds
+log_must zfs create $num_ds
+output=$(zdb -d $hex_ds)
+reason="($TESTPOOL/0x400 not in zdb output)"
+echo $output |grep "$hex_ds" > /dev/null
+(( $? != 0 )) && log_fail \
+     "zdb -d $hex_ds failed $reason"
+output=$(zdb -d $num_ds)
+reason="($num_ds not in zdb output)"
+echo $output |grep "$num_ds" > /dev/null
+    "zdb -d $TESTPOOL/0x400 failed $reason"
+
+# force numeric interpretation, should fail
+log_mustnot zdb -N $hex_ds
+log_mustnot zdb -N $num_ds
+log_mustnot zdb -Nd $hex_ds
+log_mustnot zdb -Nd $num_ds
+
+log_pass "zdb -d <pool>/<objset ID> generates the correct names."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh
new file mode 100755
index 0000000..d51edf3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover.ksh

@@ -0,0 +1,55 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Allan Jude.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -r <dataset> <path> <destination>
+# Will extract <path> (relative to <dataset>) to the file <destination>
+# Similar to -R, except it does the work for you to find each record
+#
+# Strategy:
+# 1. Create a pool
+# 2. Write some data to a file
+# 3. Extract the file
+# 4. Compare the file to the original
+#
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm $tmpfile
+}
+
+log_assert "Verify zdb -r <dataset> <path> <dest> extract the correct data."
+log_onexit cleanup
+init_data=$TESTDIR/file1
+tmpfile="$TEST_BASE_DIR/zdb-recover"
+write_count=8
+blksize=131072
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+file_write -o create -w -f $init_data -b $blksize -c $write_count
+log_must zpool sync $TESTPOOL
+
+output=$(zdb -r $TESTPOOL/$TESTFS file1 $tmpfile)
+log_must cmp $init_data $tmpfile
+
+log_pass "zdb -r <dataset> <path> <dest> extracts the correct data."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh
new file mode 100755
index 0000000..91f04c7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_recover_2.ksh

@@ -0,0 +1,57 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Allan Jude.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -r <dataset> <path> <destination>
+# Will extract <path> (relative to <dataset>) to the file <destination>
+# Similar to -R, except it does the work for you to find each record
+#
+# Strategy:
+# 1. Create a pool
+# 2. Write some data to a file
+# 3. Append to the file so it isn't an divisible by 2
+# 4. Extract the file
+# 5. Compare the file to the original
+#
+
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm $tmpfile
+}
+
+log_assert "Verify zdb -r <dataset> <path> <dest> extract the correct data."
+log_onexit cleanup
+init_data=$TESTDIR/file1
+tmpfile="$TEST_BASE_DIR/zdb-recover"
+write_count=8
+blksize=131072
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+file_write -o create -w -f $init_data -b $blksize -c $write_count
+log_must echo "zfs" >> $init_data
+log_must zpool sync $TESTPOOL
+
+output=$(zdb -r $TESTPOOL/$TESTFS file1 $tmpfile)
+log_must cmp $init_data $tmpfile
+
+log_pass "zdb -r <dataset> <path> <dest> extracts the correct data."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg.ksh
index c8fafc3..c6e45c8 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg.ksh

@@ -57,7 +57,7 @@
     "set compressratio=" "set mounted=" "set origin=" "set quota=" \
     "set reservation=" "set volsize=" " set volblocksize=" "set recordsize=" \
     "set mountpoint=" "set devices=" "set exec=" "set setuid=" "set readonly=" \
-    "set zoned=" "set snapdir=" "set aclmode=" "set aclinherit=" \
+    "set snapdir=" "set aclmode=" "set aclinherit=" \
     "set quota=blah" "set reservation=blah" "set atime=blah" "set checksum=blah" \
     "set compression=blah" \
     "upgrade blah" "mount blah" "mount -o" \
@@ -65,6 +65,11 @@
     "share" "unshare" "send" "send -i" "receive" "receive -d" "receive -vnF" \
     "recv" "recv -d" "recv -vnF" "allow" "unallow" \
     "blah blah" "-%" "--" "--?" "-*" "-="
+if is_freebsd; then
+	args+=("set jailed=")
+else
+	args+=("set zoned=")
+fi
 
 log_assert "Badly-formed zfs sub-command should return an error."
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh
index 92382aa..1290d88 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos.ksh

@@ -48,13 +48,15 @@
 {
 	unset ZFS_ABORT
 
+	if is_freebsd && [[ -n $savedcorefile ]]; then
+		sysctl kern.corefile=$savedcorefile
+	fi
+
 	if [[ -d $corepath ]]; then
 		rm -rf $corepath
 	fi
 	for ds in $fs1 $fs $ctr; do
-		if datasetexists $ds; then
-			log_must zfs destroy -rRf $ds
-		fi
+		datasetexists $ds && destroy_dataset $ds -rRf
 	done
 }
 
@@ -63,7 +65,9 @@
 log_onexit cleanup
 
 # Preparation work for testing
+savedcorefile=""
 corepath=$TESTDIR/core
+corefile=$corepath/core.zfs
 if [[ -d $corepath ]]; then
 	rm -rf $corepath
 fi
@@ -91,9 +95,13 @@
 
 if is_linux; then
 	ulimit -c unlimited
-	echo "$corepath/core.zfs" >/proc/sys/kernel/core_pattern
+	echo "$corefile" >/proc/sys/kernel/core_pattern
 	echo 0 >/proc/sys/kernel/core_uses_pid
 	export ASAN_OPTIONS="abort_on_error=1:disable_coredump=0"
+elif is_freebsd; then
+	ulimit -c unlimited
+	savedcorefile=$(sysctl -n kern.corefile)
+	log_must sysctl kern.corefile=$corepath/core.%N
 else
 	log_must coreadm -p ${corepath}/core.%f
 fi
@@ -102,7 +110,6 @@
 
 for subcmd in "${cmds[@]}" "${badparams[@]}"; do
 	zfs $subcmd >/dev/null 2>&1 && log_fail "$subcmd passed incorrectly."
-	corefile=${corepath}/core.zfs
 	if [[ ! -e $corefile ]]; then
 		log_fail "zfs $subcmd cannot generate core file with " \
 		    "ZFS_ABORT set."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/cleanup.ksh
index 6a4e7cf..f84ac43 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/cleanup.ksh

@@ -26,4 +26,6 @@
 
 . $STF_SUITE/include/libtest.shlib
 
+log_must zfs destroy "$TESTPOOL/$TESTFS/child"
+log_must zfs destroy "$TESTPOOL/${TESTFS}_with_suffix"
 default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/setup.ksh
index 2a9de05..4095341 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/setup.ksh

@@ -28,4 +28,8 @@
 
 DISK=${DISKS%% *}
 
-default_volume_setup $DISK
+default_setup_noexit $DISK
+log_must zfs create "$TESTPOOL/$TESTFS/child"
+log_must zfs create "$TESTPOOL/${TESTFS}_with_suffix"
+log_must zfs create "$TESTPOOL/$TESTFS/recv"
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh
index 4a11837..3a1cddb 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_bookmark/zfs_bookmark_cliargs.ksh

@@ -22,6 +22,7 @@
 
 #
 # Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+# Copyright 2019, 2020 by Christian Schwarz. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -32,35 +33,55 @@
 #
 # STRATEGY:
 # 1. Create initial snapshot
+#
 # 2. Verify we can create a bookmark specifying snapshot and bookmark full paths
-# 3. Verify we can create a bookmark specifying the snapshot name
-# 4. Verify we can create a bookmark specifying the bookmark name
+# 3. Verify we can create a bookmark specifying the short snapshot name
+# 4. Verify we can create a bookmark specifying the short bookmark name
 # 5. Verify at least a full dataset path is required and both snapshot and
 #    bookmark name must be valid
 #
+# 6. Verify we can copy a bookmark by specifying the source bookmark and new
+#    bookmark full paths.
+# 7. Verify we can copy a bookmark specifying the short source name
+# 8. Verify we can copy a bookmark specifying the short new name
+# 9. Verify two short paths are not allowed, and test empty paths
+# 10. Verify we cannot copy a bookmark if the new bookmark already exists
+# 11. Verify that copying a bookmark only works if new and source name
+#     have the same dataset
+#
 
 verify_runnable "both"
 
 function cleanup
 {
-	if snapexists "$DATASET@$TESTSNAP"; then
-		log_must zfs destroy "$DATASET@$TESTSNAP"
-	fi
-	if bkmarkexists "$DATASET#$TESTBM"; then
-		log_must zfs destroy "$DATASET#$TESTBM"
-	fi
+	snapexists "$DATASET@$TESTSNAP" && \
+		destroy_dataset "$DATASET@$TESTSNAP"
+
+	bkmarkexists "$DATASET#$TESTBM" && \
+		destroy_dataset "$DATASET#$TESTBM"
+
+	bkmarkexists "$DATASET#$TESTBMCOPY" && \
+		destroy_dataset "$DATASET#$TESTBMCOPY"
 }
 
 log_assert "'zfs bookmark' should work only when passed valid arguments."
 log_onexit cleanup
 
 DATASET="$TESTPOOL/$TESTFS"
+DATASET_TWO="$TESTPOOL/${TESTFS}_two"
 TESTSNAP='snapshot'
+TESTSNAP2='snapshot2'
 TESTBM='bookmark'
+TESTBMCOPY='bookmark_copy'
+
 
 # Create initial snapshot
 log_must zfs snapshot "$DATASET@$TESTSNAP"
 
+#
+# Bookmark creation tests
+#
+
 # Verify we can create a bookmark specifying snapshot and bookmark full paths
 log_must zfs bookmark "$DATASET@$TESTSNAP" "$DATASET#$TESTBM"
 log_must eval "bkmarkexists $DATASET#$TESTBM"
@@ -97,4 +118,120 @@
 log_mustnot zfs bookmark "$TESTSNAP" "$DATASET"
 log_mustnot eval "bkmarkexists $DATASET#$TESTBM"
 
-log_pass "'zfs bookmark' works as expected only when passed valid arguments."
+# Verify that we can create a bookmarks on another origin filesystem
+log_must zfs clone "$DATASET@$TESTSNAP" "$DATASET_TWO"
+log_must zfs bookmark "$DATASET@$TESTSNAP" "$DATASET_TWO#$TESTBM"
+log_must eval "destroy_dataset $DATASET_TWO"
+
+# Verify that we can cannot create bookmarks on a non-origin filesystem
+log_must zfs create "$DATASET_TWO"
+log_mustnot_expect "source is not an ancestor of the new bookmark's dataset" zfs bookmark "$DATASET@$TESTSNAP" "$DATASET_TWO#$TESTBM"
+log_must zfs destroy "$DATASET_TWO"
+
+# Verify that we can create bookmarks of snapshots on the pool dataset
+log_must zfs snapshot "$TESTPOOL@$TESTSNAP"
+log_must zfs bookmark "$TESTPOOL@$TESTSNAP" "$TESTPOOL#$TESTBM"
+log_must zfs destroy "$TESTPOOL#$TESTBM"
+log_must zfs destroy "$TESTPOOL@$TESTSNAP"
+
+#
+# Bookmark copying tests
+#
+
+# create the source bookmark
+log_must zfs bookmark "$DATASET@$TESTSNAP" "$DATASET#$TESTBM"
+
+# Verify we can copy a bookmark by specifying the source bookmark
+# and new bookmark full paths.
+log_must eval "bkmarkexists $DATASET#$TESTBM"
+log_must zfs bookmark "$DATASET#$TESTBM" "$DATASET#$TESTBMCOPY"
+log_must eval "bkmarkexists $DATASET#$TESTBMCOPY"
+## validate destroy once (should be truly independent bookmarks)
+log_must zfs destroy "$DATASET#$TESTBM"
+log_mustnot eval "bkmarkexists $DATASET#$TESTBM"
+log_must eval "bkmarkexists $DATASET#$TESTBMCOPY"
+log_must zfs destroy "$DATASET#$TESTBMCOPY"
+log_mustnot eval "bkmarkexists $DATASET#$TESTBMCOPY"
+log_mustnot eval "bkmarkexists $DATASET#$TESTBM"
+## recreate the source bookmark
+log_must zfs bookmark "$DATASET@$TESTSNAP" "$DATASET#$TESTBM"
+
+# Verify we can copy a bookmark specifying the short source name
+log_must zfs bookmark "#$TESTBM" "$DATASET#$TESTBMCOPY"
+log_must eval "bkmarkexists $DATASET#$TESTBMCOPY"
+log_must zfs destroy "$DATASET#$TESTBMCOPY"
+
+# Verify we can copy a bookmark specifying the short bookmark name
+log_must zfs bookmark "$DATASET#$TESTBM" "#$TESTBMCOPY"
+log_must eval "bkmarkexists $DATASET#$TESTBMCOPY"
+log_must zfs destroy "$DATASET#$TESTBMCOPY"
+
+# Verify two short paths are not allowed, and test empty paths
+log_mustnot zfs bookmark "#$TESTBM" "#$TESTBMCOPY"
+log_mustnot zfs bookmark "#$TESTBM" "#"
+log_mustnot zfs bookmark "#"        "#$TESTBMCOPY"
+log_mustnot zfs bookmark "#"        "#"
+log_mustnot zfs bookmark "#"        ""
+log_mustnot zfs bookmark ""         "#"
+log_mustnot zfs bookmark ""         ""
+
+# Verify that we can copy bookmarks on another origin filesystem
+log_must zfs clone "$DATASET@$TESTSNAP" "$DATASET_TWO"
+log_must zfs bookmark "$DATASET#$TESTBM" "$DATASET_TWO#$TESTBMCOPY"
+log_must zfs destroy "$DATASET_TWO"
+
+# Verify that we can cannot create bookmarks on another non-origin filesystem
+log_must zfs create "$DATASET_TWO"
+log_mustnot_expect "source is not an ancestor of the new bookmark's dataset" zfs bookmark "$DATASET#$TESTBM" "$DATASET_TWO#$TESTBMCOPY"
+log_must zfs destroy "$DATASET_TWO"
+
+# Verify that we can copy bookmarks on the pool dataset
+log_must zfs snapshot "$TESTPOOL@$TESTSNAP"
+log_must zfs bookmark "$TESTPOOL@$TESTSNAP" "$TESTPOOL#$TESTBM"
+log_must zfs bookmark "$TESTPOOL#$TESTBM" "$TESTPOOL#$TESTBMCOPY"
+log_must zfs destroy "$TESTPOOL#$TESTBM"
+log_must zfs destroy "$TESTPOOL#$TESTBMCOPY"
+log_must zfs destroy "$TESTPOOL@$TESTSNAP"
+
+# Verify that copied 'normal' bookmarks are independent of the source bookmark
+log_must zfs bookmark "$DATASET#$TESTBM" "$DATASET#$TESTBMCOPY"
+log_must zfs destroy "$DATASET#$TESTBM"
+log_must eval "zfs send $DATASET@$TESTSNAP > $TEST_BASE_DIR/zfstest_datastream.$$"
+log_must eval "destroy_dataset $TESTPOOL/$TESTFS/recv"
+log_must eval "zfs recv -o mountpoint=none $TESTPOOL/$TESTFS/recv < $TEST_BASE_DIR/zfstest_datastream.$$"
+log_must zfs snapshot "$DATASET@$TESTSNAP2"
+log_must eval "zfs send -i \#$TESTBMCOPY $DATASET@$TESTSNAP2 > $TEST_BASE_DIR/zfstest_datastream.$$"
+log_must eval "zfs recv $TESTPOOL/$TESTFS/recv < $TEST_BASE_DIR/zfstest_datastream.$$"
+# cleanup
+log_must eval "destroy_dataset $DATASET@$TESTSNAP2"
+log_must zfs destroy "$DATASET#$TESTBMCOPY"
+log_must zfs bookmark "$DATASET@$TESTSNAP" "$DATASET#$TESTBM"
+
+# Verify that copied redaction bookmarks are independent of the source bookmark
+## create redaction bookmark
+log_must zfs destroy "$DATASET#$TESTBM"
+log_must zfs destroy "$DATASET@$TESTSNAP"
+log_must eval "echo secret > $TESTDIR/secret"
+log_must zfs snapshot "$DATASET@$TESTSNAP"
+log_must eval "echo redacted > $TESTDIR/secret"
+log_must zfs snapshot "$DATASET@$TESTSNAP2" # TESTSNAP2 is the redaction snapshot
+log_must zfs list -t all -o name,createtxg,guid,mountpoint,written
+log_must zfs redact "$DATASET@$TESTSNAP" "$TESTBM" "$DATASET@$TESTSNAP2"
+# ensure our primitive for testing whether a bookmark is a redaction bookmark works
+log_must eval "zfs get all $DATASET#$TESTBM | grep redact_snaps"
+## copy the redaction bookmark
+log_must zfs bookmark "$DATASET#$TESTBM" "#$TESTBMCOPY"
+log_mustnot eval "zfs get all $DATASET#$TESTBMCOPY | grep redact_snaps"
+log_must eval "zfs send --redact "$TESTBMCOPY" -i $DATASET@$TESTSNAP $DATASET@$TESTSNAP2 2>&1 | head -n 100 | grep 'not a redaction bookmark'"
+# try the above again after destroying the source bookmark, preventive measure for future work
+log_must zfs destroy "$DATASET#$TESTBM"
+log_mustnot eval "zfs get all $DATASET#$TESTBMCOPY | grep redact_snaps"
+log_must eval "zfs send --redact "$TESTBMCOPY" -i $DATASET@$TESTSNAP $DATASET@$TESTSNAP2 2>&1 | head -n 100 | grep 'not a redaction bookmark'"
+## cleanup
+log_must eval "destroy_dataset $DATASET@$TESTSNAP2"
+log_must zfs destroy "$DATASET#$TESTBMCOPY"
+log_must eval "destroy_dataset $DATASET@$TESTSNAP"
+log_must zfs snapshot "$DATASET@$TESTSNAP"
+log_must zfs bookmark "$DATASET@$TESTSNAP" "$DATASET#$TESTBM"
+
+log_pass "'zfs bookmark' works as expected"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh
index 781caae..821abde 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh

@@ -40,7 +40,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
index a886ab8..592f1ec 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh

@@ -44,7 +44,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh
index 497fb99..70a9df6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_clones.ksh

@@ -40,7 +40,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh
index 6344b8d..22212d7 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh

@@ -43,7 +43,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh
index 94820c3..e9b010e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh

@@ -42,7 +42,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh
index 4ed4aad..a5a9976 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh

@@ -38,7 +38,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh
index 5cbe34b..607e220 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh

@@ -40,7 +40,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh
index b167224..224fabf 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh

@@ -52,7 +52,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am
index 0a6c2eb..06099c0 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am

@@ -13,4 +13,5 @@
 	zfs_clone_009_neg.ksh \
 	zfs_clone_010_pos.ksh \
 	zfs_clone_encrypted.ksh \
-	zfs_clone_deeply_nested.ksh
+	zfs_clone_deeply_nested.ksh \
+	zfs_clone_rm_nested.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg.ksh
index b83ccdf..e6ffa26 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg.ksh

@@ -100,11 +100,11 @@
 function cleanup_all
 {
 	for fs in $targets; do
-		datasetexists $fs && log_must zfs destroy -f $fs
+		datasetexists $fs && destroy_dataset $fs -f
 	done
 
 	for snap in $SNAPFS $SNAPFS1 ; do
-		snapexists $snap && log_must zfs destroy -Rf $snap
+		snapexists $snap && destroy_dataset $snap -Rf
 	done
 
 	return 0

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_002_pos.ksh
index 8e69a7a..96eb3ea 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_002_pos.ksh

@@ -60,14 +60,10 @@
 function cleanup_all
 {
 
-	if datasetexists $TESTPOOL/notexist ; then
-		log_must zfs destroy -rRf $TESTPOOL/notexist
-	fi
+	datasetexists $TESTPOOL/notexist && destroy_dataset $TESTPOOL/notexist -rRf
 
 	for snap in $SNAPFS $SNAPFS1 ; do
-		if snapexists $snap ; then
-			log_must zfs destroy -Rf $snap
-		fi
+		snapexists $snap && destroy_dataset $snap -Rf
 	done
 
 	return 0

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_003_pos.ksh
index 5222757..6484de9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_003_pos.ksh

@@ -48,9 +48,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS ; then
-		log_must zfs destroy -Rf $SNAPFS
-	fi
+	snapexists $SNAPFS && destroy_dataset $SNAPFS -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_004_pos.ksh
index 8d86f55..1c4c579 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_004_pos.ksh

@@ -48,9 +48,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS ; then
-		log_must zfs destroy -Rf $SNAPFS
-	fi
+	snapexists $SNAPFS && destroy_dataset $SNAPFS -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_005_pos.ksh
index afa8b46..6f17b17 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_005_pos.ksh

@@ -48,9 +48,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS1 ; then
-		log_must zfs destroy -Rf $SNAPFS1
-	fi
+	snapexists $SNAPFS1 && destroy_dataset $SNAPFS1 -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_006_pos.ksh
index 2127eb1..f2f7a5b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_006_pos.ksh

@@ -49,9 +49,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS1 ; then
-		log_must_busy zfs destroy -Rf $SNAPFS1
-	fi
+	snapexists $SNAPFS1 && destroy_dataset $SNAPFS1 -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_007_pos.ksh
index 6fba72b..4bfb3d5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_007_pos.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS ; then
-			log_must zfs destroy -Rf $SNAPFS
-	fi
+	snapexists $SNAPFS && destroy_dataset $SNAPFS -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_008_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_008_neg.ksh
index 8e306fd..2f2b0ca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_008_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_008_neg.ksh

@@ -48,9 +48,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS ; then
-		log_must zfs destroy -Rf $SNAPFS
-	fi
+	snapexists $SNAPFS && destroy_dataset $SNAPFS -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg.ksh
index 030c6af..6cdf571 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg.ksh

@@ -48,9 +48,7 @@
 
 function cleanup
 {
-	if snapexists $SNAPFS1 ; then
-		log_must zfs destroy -Rf $SNAPFS1
-	fi
+	snapexists $SNAPFS1 && destroy_dataset $SNAPFS1 -Rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh
index dcf8009..13f5418 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos.ksh

@@ -39,8 +39,8 @@
 	typeset -i i=1
 	for ds in $datasets; do
                 datasetexists $ds/$TESTCLONE.$i && \
-		    log_must zfs destroy -rf $ds/$TESTCLONE.$i
-                datasetexists $ds && log_must zfs destroy -Rf $ds
+		    destroy_dataset $ds/$TESTCLONE.$i -rf
+                datasetexists $ds && destroy_dataset $ds -Rf
 		((i=i+1))
 	done
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh
index 86f335b..1f07b9e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh

@@ -44,9 +44,9 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -f
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_rm_nested.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_rm_nested.ksh
new file mode 100755
index 0000000..447fbb3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_rm_nested.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	When a snapshot is destroyed, we used to recurse all clones
+#	that are downstream of the destroyed snapshot (e.g. to remove
+#	its key and merge its deadlist entries to the previous one).
+#	This recursion would break the stack on deeply nested clone
+#	hierarchies. To avoid this problem today, we keep heap-allocated
+#	records of all the clones as we traverse their hierarchy.
+#
+#	This test ensures and showcases that our new method works with
+#	deeply nested clone hierarchies.
+#
+# STRATEGY:
+#	1. Create an fs and take a snapshot of it (snapshot foo)
+#	2. Take a second snapshot of the same fs (snapshot bar) on
+#	   top of snapshot foo
+#	3. Create a clone of snapshot bar and then take a snapshot
+#	   of it.
+#	4. Create a clone of the newly-created snapshot and then
+#	   take a snapshot of it.
+#	5. Repeat step [4] many times to create a deeply nested hierarchy.
+#	6. Destroy snapshot foo.
+#
+
+verify_runnable "both"
+
+typeset FS0=$TESTPOOL/0
+typeset FOO=foo
+typeset BAR=BAR
+
+typeset FS0SNAPFOO=$FS0@$FOO
+typeset FS0SNAPBAR=$FS0@$BAR
+
+typeset -i numds=300
+
+log_must zfs create $FS0
+
+function test_cleanup
+{
+	log_must zfs destroy -Rf $FS0
+
+	return 0
+}
+
+log_must zfs snapshot $FS0SNAPFOO
+log_must zfs snapshot $FS0SNAPBAR
+
+log_onexit test_cleanup
+
+for (( i=1; i<numds; i++ )); do
+	log_must zfs clone $TESTPOOL/$((i-1))@$BAR $TESTPOOL/$i
+	log_must zfs snapshot $TESTPOOL/$i@$BAR
+done
+
+log_must zfs destroy $FS0SNAPFOO
+
+log_pass "Snapshot deletion doesn't break the stack in deeply nested " \
+    "clone hierarchies."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib
index b0ced58..1273ed5 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.kshlib

@@ -83,26 +83,39 @@
 
 	vol=$TESTPOOL/$TESTVOL1
 	vol_b_path=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL1
-	vol_r_path=$ZVOL_RDEVDIR/$TESTPOOL/$TESTVOL1
 
 	log_must zfs create -V $VOLSIZE -o copies=$copies $vol
 	log_must zfs set refreservation=none $vol
-	block_device_wait
+	block_device_wait $vol_b_path
 
 	case "$type" in
 	"ext2")
-		log_must eval "echo y | newfs $vol_r_path >/dev/null 2>&1"
+		if is_freebsd; then
+			log_unsupported "ext2 test not implemented for freebsd"
+		fi
+		log_must eval "new_fs $vol_b_path >/dev/null 2>&1"
 		log_must mount -o rw $vol_b_path $mntp
 		;;
 	"ufs")
 		if is_linux; then
 			log_unsupported "ufs test not implemented for linux"
 		fi
-		log_must eval "newfs $vol_r_path >/dev/null 2>&1"
+		log_must eval "new_fs $vol_b_path >/dev/null 2>&1"
 		log_must mount $vol_b_path $mntp
 		;;
 	"zfs")
-		log_must zpool create $TESTPOOL1 $vol_b_path
+		if is_freebsd; then
+			# Pool creation on zvols is forbidden by default.
+			# Save and restore the current setting.
+			typeset _saved=$(get_tunable VOL_RECURSIVE)
+			log_must set_tunable64 VOL_RECURSIVE 1 # Allow
+			zpool create $TESTPOOL1 $vol_b_path
+			typeset _zpool_create_result=$?
+			log_must set_tunable64 VOL_RECURSIVE $_saved # Restore
+			log_must test $_zpool_create_result = 0
+		else
+			log_must zpool create $TESTPOOL1 $vol_b_path
+		fi
 		log_must zfs create $TESTPOOL1/$TESTFS1
 		;;
 	*)

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_001_pos.ksh
index 2ed881a..672692b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_001_pos.ksh

@@ -49,9 +49,7 @@
 	typeset ds
 
 	for ds in $fs1 $fs2 $vol1 $vol2; do
-		if datasetexists $ds; then
-			log_must zfs destroy $ds
-		fi
+		datasetexists $ds && destroy_dataset $ds
 	done
 }
 
@@ -94,13 +92,13 @@
 		fi
 		for ds in $fs2 $vol2; do
 			cmp_prop $ds $val2
-			log_must zfs destroy $ds
+			destroy_dataset $ds
 			block_device_wait
 		done
 	done
 
 	for ds in $fs1 $vol1; do
-		log_must zfs destroy $ds
+		destroy_dataset $ds
 		block_device_wait
 	done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh
index 11265cd..b644fca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_002_pos.ksh

@@ -50,9 +50,8 @@
 	typeset val
 
 	for val in 1 2 3; do
-		if datasetexists $TESTPOOL/fs_$val; then
-			log_must zfs destroy $TESTPOOL/fs_$val
-		fi
+		datasetexists $TESTPOOL/fs_$val && \
+			destroy_dataset $TESTPOOL/fs_$val
 	done
 }
 
@@ -76,12 +75,12 @@
 log_note "Verify 'zfs list' can correctly list the space charged."
 fsize=${FILESIZE%[m|M]}
 for val in 1 2 3; do
-	used=$(get_used_prop $TESTPOOL/fs_$val)
+	used=$(get_prop used $TESTPOOL/fs_$val)
 	check_used $used $val
 done
 
 log_note "Verify 'ls -s' can correctly list the space charged."
-if is_linux; then
+if is_linux || is_freebsd; then
 	blksize=1024
 else
 	blksize=512
@@ -92,18 +91,27 @@
 	check_used $used $val
 done
 
-log_note "Verify df(1M) can correctly display the space charged."
+log_note "Verify df(1) can correctly display the space charged."
 for val in 1 2 3; do
-	used=`df -F zfs -k /$TESTPOOL/fs_$val/$FILE | grep $TESTPOOL/fs_$val \
-		| awk '{print $3}'`
-	(( used = used * 1024 )) # kb -> bytes
+	if is_freebsd; then
+		used=`df -m /$TESTPOOL/fs_$val | grep $TESTPOOL/fs_$val \
+			| awk -v fs=fs_$val '$4 ~ fs {print $3}'`
+	else
+		used=`df -F zfs -k /$TESTPOOL/fs_$val/$FILE | grep $TESTPOOL/fs_$val \
+			| awk '{print $3}'`
+		(( used = used * 1024 )) # kb -> bytes
+	fi
 	check_used $used $val
 done
 
 log_note "Verify du(1) can correctly display the space charged."
 for val in 1 2 3; do
-	used=`du -k /$TESTPOOL/fs_$val/$FILE | awk '{print $1}'`
-	(( used = used * 1024 )) # kb -> bytes
+	if is_freebsd; then
+		used=`du -h /$TESTPOOL/fs_$val/$FILE | awk '{print $1}'`
+	else
+		used=`du -k /$TESTPOOL/fs_$val/$FILE | awk '{print $1}'`
+		(( used = used * 1024 )) # kb -> bytes
+	fi
 	check_used $used $val
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_003_pos.ksh
index 98420cb..94e72bc 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_003_pos.ksh

@@ -51,9 +51,7 @@
 		destroy_pool $TESTPOOL1
 	fi
 
-	if datasetexists $vol; then
-		log_must zfs destroy $vol
-	fi
+	datasetexists $vol && destroy_dataset $vol
 }
 
 log_assert "Verify that ZFS volume space used by multiple copies is charged correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh
index 4a3ef76..6dc9306 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies_006_pos.ksh

@@ -51,9 +51,7 @@
 		log_must umount $mntp
 	fi
 
-	if datasetexists $vol; then
-		log_must zfs destroy $vol
-	fi
+	datasetexists $vol && destroy_dataset $vol
 
 	if [[ -d $mntp ]]; then
                 rm -rf $mntp

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am
index a36d021..7515753 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am

@@ -17,7 +17,10 @@
 	zfs_create_013_pos.ksh \
 	zfs_create_014_pos.ksh \
 	zfs_create_encrypted.ksh \
-	zfs_create_crypt_combos.ksh
+	zfs_create_crypt_combos.ksh \
+	zfs_create_dryrun.ksh \
+	zfs_create_nomount.ksh \
+	zfs_create_verbose.ksh
 
 dist_pkgdata_DATA = \
 	properties.kshlib \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/properties.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/properties.kshlib
index 00b5ad8..4130ba4 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/properties.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/properties.kshlib

@@ -61,9 +61,13 @@
                     "devices=off" \
                     "exec=off" \
                     "setuid=off" \
-                    "zoned=on" \
                     "snapdir=visible" \
 		    "canmount=off" \
 		    "version=1"
+if is_freebsd; then
+	FS_ONLY_PROP+=("jailed=on")
+else
+	FS_ONLY_PROP+=("zoned=on")
+fi
 
 set -A VOL_ONLY_PROP "volblocksize=16384" "volsize=536870912"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg
index b96908c..9bf2532 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create.cfg

@@ -54,6 +54,12 @@
 export VOL_LIMIT_KEYWORD2="value is too large"
 export VOL_LIMIT_KEYWORD3="volume size exceeds limit"
 
-set -A size "8k" "8K" "1m" "1M" "1mb" "1mB" "1Mb" "1MB" "1g" "1G" \
+set -A size "8k" "8K" "35K" "1m" "1M" "1mb" "1mB" "1Mb" "1MB" "1g" "1G" \
 	"1p" "1P" "1z" "1Z" "1gb" "1gB" "1Gb" "1GB" "1pb" "1pB" "1Pb" \
 	"1PB" "1zb" "1zB" "1Zb" "1ZB"
+
+# If a datasize has a volume size that is not a multiple of the blocksize,
+# explicitly check that its size has been rounded up to the nearest multiple
+# The volume with the exact size must exist in the "size" array above
+set -A explicit_size_check "35K"
+set -A expected_rounded_size "40960"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_001_pos.ksh
index 0e580a8..f74b2c9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_001_pos.ksh

@@ -48,9 +48,11 @@
 	typeset -i i=0
 	while (( $i < ${#datasets[*]} )); do
 		datasetexists ${datasets[$i]} && \
-			log_must zfs destroy -f ${datasets[$i]}
+			destroy_dataset ${datasets[$i]} -f
 		((i = i + 1))
 	done
+
+	zfs destroy -f "$TESTPOOL/with a space"
 }
 
 log_onexit cleanup
@@ -68,4 +70,8 @@
 	((i = i + 1))
 done
 
+log_must zfs create "$TESTPOOL/with a space"
+log_must zfs unmount "$TESTPOOL/with a space"
+log_must zfs mount "$TESTPOOL/with a space"
+
 log_pass "'zfs create <filesystem>' works as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_002_pos.ksh
index 6f36b40..0218e2e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_002_pos.ksh

@@ -31,6 +31,7 @@
 
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create.cfg
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
 
 #
 # DESCRIPTION:
@@ -39,6 +40,8 @@
 # STRATEGY:
 # 1. Create a volume in the storage pool.
 # 2. Verify the volume is created correctly.
+# 3. Verify that the volume created has its volsize rounded to the nearest
+#    multiple of the blocksize (in this case, the default blocksize)
 #
 
 verify_runnable "global"
@@ -76,6 +79,15 @@
 	fi
 
 	((j = j + 1))
-
 done
+
+typeset -i j=0
+while (( $j < ${#explicit_size_check[*]} )); do
+  propertycheck ${TESTPOOL}/${TESTVOL}${explicit_size_check[j]} \
+    volsize=${expected_rounded_size[j]} || \
+    log_fail "volsize ${size[j]} was not rounded up"
+
+	((j = j + 1))
+done
+
 log_pass "'zfs create -s -V <size> <volume>' works as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_003_pos.ksh
index 2906e32..120de10 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_003_pos.ksh

@@ -46,8 +46,7 @@
 
 function cleanup
 {
-	datasetexists $vol && \
-		log_must zfs destroy -f $vol
+	datasetexists $vol && destroy_dataset $vol -f
 }
 
 log_assert "Verify creating volume with specified blocksize works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_004_pos.ksh
index 200b4a0..9e69366 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_004_pos.ksh

@@ -48,8 +48,7 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+	datasetexists $TESTPOOL/$TESTFS1 && destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup
@@ -65,7 +64,7 @@
 		log_fail "zfs create $TESTPOOL/$TESTFS1 fail."
 	propertycheck $TESTPOOL/$TESTFS1 ${RW_FS_PROP[i]} || \
 		log_fail "${RW_FS_PROP[i]} is failed to set."
-	log_must zfs destroy -f $TESTPOOL/$TESTFS1
+	log_must_busy zfs destroy -f $TESTPOOL/$TESTFS1
 	(( i = i + 1 ))
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_005_pos.ksh
index e953c65..98cf709 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_005_pos.ksh

@@ -49,7 +49,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_006_pos.ksh
index 2a664a4..551ae78 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_006_pos.ksh

@@ -50,7 +50,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTVOL1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTVOL1
+		destroy_dataset $TESTPOOL/$TESTVOL1 -f
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_008_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_008_neg.ksh
index 08ede05..a905e50 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_008_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_008_neg.ksh

@@ -46,9 +46,8 @@
 
 function cleanup
 {
-	if datasetexists $TESTPOOL/$TESTFS1 ; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
-	fi
+	datasetexists $TESTPOOL/$TESTFS1 && \
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup
@@ -74,7 +73,6 @@
     "-o readonly=ON" "-o reADOnly=off" "-o rdonly=OFF" "-o rdonly=aaa" \
     "-o readonly=ON -V $VOLSIZE" "-o reADOnly=off -V $VOLSIZE" \
     "-o rdonly=OFF -V $VOLSIZE" "-o rdonly=aaa -V $VOLSIZE" \
-    "-o zoned=ON" "-o ZoNed=off" "-o zoned=aaa" \
     "-o snapdIR=hidden" "-o snapdir=VISible" "-o snapdir=aaa" \
     "-o aclmode=DIScard" "-o aclmODE=groupmask" "-o aclmode=aaa" \
     "-o aclinherit=deny" "-o aclinHerit=secure" "-o aclinherit=aaa" \
@@ -88,13 +86,26 @@
     "-o compressratio=1.00x" "-o compressratio=1.00x -V $VOLSIZE" \
     "-o version=0" "-o version=1.234" "-o version=10K" "-o version=-1" \
     "-o version=aaa" "-o version=999"
+if is_freebsd; then
+	args+=("-o jailed=ON" "-o JaiLed=off" "-o jailed=aaa")
+else
+	args+=("-o zoned=ON" "-o ZoNed=off" "-o zoned=aaa")
+fi
 
 log_assert "'zfs create' should return an error with badly-formed parameters."
 
 typeset -i i=0
 while [[ $i -lt ${#args[*]} ]]; do
-	log_mustnot zfs create ${args[i]} $TESTPOOL/$TESTFS1
-	log_mustnot zfs create -p ${args[i]} $TESTPOOL/$TESTFS1
+	typeset arg=${args[i]}
+	if is_freebsd; then
+		# FreeBSD does not strictly validate share options (yet).
+		if [[ "$arg" == "-o sharenfs="* ]]; then
+			((i = i + 1))
+			continue
+		fi
+	fi
+	log_mustnot zfs create $arg $TESTPOOL/$TESTFS1
+	log_mustnot zfs create -p $arg $TESTPOOL/$TESTFS1
 	((i = i + 1))
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh
index 982a4ea..c5012d4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_011_pos.ksh

@@ -48,9 +48,8 @@
 
 function cleanup
 {
-	if datasetexists $TESTPOOL/$TESTFS1 ; then
-		log_must zfs destroy -rf $TESTPOOL/$TESTFS1
-	fi
+	datasetexists $TESTPOOL/$TESTFS1 && \
+		destroy_dataset $TESTPOOL/$TESTFS1 -rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_012_pos.ksh
index d8aa064..a0b8d52 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_012_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_012_pos.ksh

@@ -48,9 +48,8 @@
 
 function cleanup
 {
-	if datasetexists $TESTPOOL/$TESTFS1 ; then
-		log_must zfs destroy -rf $TESTPOOL/$TESTFS1
-	fi
+	datasetexists $TESTPOOL/$TESTFS1 && \
+		destroy_dataset $TESTPOOL/$TESTFS1 -rf
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh
index d1a8153..2482a68 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh

@@ -43,8 +43,7 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL/$TESTFS1 &&
-		log_must zfs destroy $TESTPOOL/$TESTFS1
+	datasetexists $TESTPOOL/$TESTFS1 && destroy_dataset $TESTPOOL/$TESTFS1
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
index 141b255..758b800 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh

@@ -38,7 +38,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup
@@ -89,7 +89,7 @@
 		propertycheck $TESTPOOL/$TESTFS1 ${KEYFORMATS[j]} || \
 			log_fail "failed to set ${KEYFORMATS[j]}"
 
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		log_must_busy zfs destroy -f $TESTPOOL/$TESTFS1
 		(( j = j + 1 ))
 	done
 	(( i = i + 1 ))

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_dryrun.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_dryrun.ksh
new file mode 100755
index 0000000..703ae80
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_dryrun.ksh

@@ -0,0 +1,168 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+
+#
+# DESCRIPTION:
+# zfs create -n should perform basic sanity checking but should never create a
+# dataset.  If -v and/or -P are used, it should verbose about what would be
+# created if sanity checks pass.
+#
+# STRATEGY:
+# 1. Attempt to create a file system and a volume using various combinations of
+#    -n with -v and -P.
+#
+
+verify_runnable "both"
+
+#
+# Verifies that valid commands with -n and without -[vP]:
+# - succeed
+# - do not create a dataset
+# - do not generate output
+#
+function dry_create_no_output
+{
+	typeset -a cmd=(zfs create -n "$@")
+
+	log_note "$0: ${cmd[@]}"
+	log_must "${cmd[@]}"
+	datasetexists "$TESTPOOL/$TESTFS1" &&
+	    log_fail "$TESTPOOL/$TESTFS1 unexpectedly created by '${cmd[@]}'"
+	typeset out=$("${cmd[@]}" 2>&1)
+	[[ -z "$out" ]] ||
+	    log_fail "unexpected output '$out' from '${cmd[@]}'"
+}
+
+#
+# Verifies that commands with invalid properties or invalid property values
+# - fail
+# - do not create a dataset
+# - generate a message on stderr
+#
+function dry_create_error
+{
+	typeset -a cmd=(zfs create -n "$@")
+
+	log_note "$0: ${cmd[@]}"
+	log_mustnot "${cmd[@]}"
+	datasetexists "$TESTPOOL/$TESTFS1" &&
+	    log_fail "$TESTPOOL/$TESTFS1 unexpectedly created by '${cmd[@]}'"
+	typeset out=$("${cmd[@]}" 2>&1 >/dev/null)
+	[[ -z "$out" ]] &&
+	    log_fail "expected an error message but got none from '${cmd[@]}'"
+}
+
+#
+# Verifies that dry-run commands with parseable output
+# - succeed
+# - do not create datasets
+# - generate parseable output on stdout
+# - output matches expectations
+#
+function dry_create_parseable
+{
+	typeset -n exp=$1
+	shift
+	typeset -a cmd=(zfs create -Pn "$@")
+	typeset ds=${cmd[${#cmd[@]} - 1]}
+	typeset out
+	typeset -a toks
+	typeset -a props
+	typeset found_create=false
+
+	log_note "$0: ${cmd[@]}"
+	out=$("${cmd[@]}")
+	(( $? == 0 )) ||
+	    log_fail "unexpected failure getting stdout from '${cmd[@]}'"
+	datasetexists "$TESTPOOL/$TESTFS1" &&
+	    log_fail "$TESTPOOL/$TESTFS1 unexpectedly created by '${cmd[@]}'"
+	echo "$out" | while IFS=$'\t' read -A toks; do
+		log_note "verifying ${toks[@]}"
+		case ${toks[0]} in
+		create)
+			log_must test "${#toks[@]}" -eq 2
+			log_must test "${toks[1]}" == "$ds"
+			found_create="yes, I found create"
+			;;
+		property)
+			log_must test "${#toks[@]}" -eq 3
+			typeset prop=${toks[1]}
+			typeset val=${toks[2]}
+			if [[ -z "${exp[$prop]}" ]]; then
+				log_fail "unexpectedly got property '$prop'"
+			fi
+			# We may not know the exact value a property will take
+			# on.  This is the case for at least refreservation.
+			if [[ ${exp[$prop]} != "*" ]]; then
+				log_must test "${exp[$prop]}" == "$val"
+			fi
+			unset exp[$prop]
+			;;
+		*)
+			log_fail "Unexpected line ${toks[@]}"
+			;;
+		esac
+	done
+
+	log_must test "$found_create" == "yes, I found create"
+	log_must test "extra props: ${!exp[@]}" == "extra props: "
+}
+
+function cleanup
+{
+	datasetexists "$TESTPOOL/$TESTFS1" && \
+		destroy_dataset "$TESTPOOL/$TESTFS1" -r
+}
+log_onexit cleanup
+
+log_assert "zfs create -n creates nothing but can describe what would be" \
+	"created"
+
+# Typical creations should succeed
+dry_create_no_output "$TESTPOOL/$TESTFS1"
+dry_create_no_output -V 10m "$TESTPOOL/$TESTFS1"
+# It shouldn't do a space check right now
+dry_create_no_output -V 100t "$TESTPOOL/$TESTFS1"
+# It shouldn't create parent datasets either
+dry_create_no_output -p "$TESTPOOL/$TESTFS1/$TESTFS2"
+dry_create_no_output -pV 10m "$TESTPOOL/$TESTFS1/$TESTFS2"
+
+# Various invalid properties should be recognized and result in an error
+dry_create_error -o nosuchprop=42 "$TESTPOOL/$TESTFS1"
+dry_create_error -b 1234 -V 10m  "$TESTPOOL/$TESTFS1"
+
+# Parseable output should be parseable.
+typeset -A expect
+expect=([compression]=on)
+dry_create_parseable expect -o compression=on "$TESTPOOL/$TESTFS1"
+
+# Sparse volumes should not get a gratuitous refreservation
+expect=([volblocksize]=4096 [volsize]=$((1024 * 1024 * 10)))
+dry_create_parseable expect -b 4k -V 10m -s "$TESTPOOL/$TESTFS1"
+
+# Non-sparse volumes should have refreservation
+expect=(
+    [volblocksize]=4096
+    [volsize]=$((1024 * 1024 * 10))
+    [refreservation]="*"
+)
+dry_create_parseable expect -b 4k -V 10m "$TESTPOOL/$TESTFS1"
+
+log_pass "zfs create -n creates nothing but can describe what would be" \
+	"created"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh
index 7e5072f..e32545c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh

@@ -70,9 +70,9 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_nomount.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_nomount.ksh
new file mode 100755
index 0000000..e1fbbe6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_nomount.ksh

@@ -0,0 +1,51 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# zfs create -u should leave the new file system unmounted.
+# It should not work for a volume.
+#
+# STRATEGY:
+# 1. Create a file system using -u and make sure the file system is not mounted.
+# 3. Do it for a volume to verify it fails.
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	local ds
+
+	for ds in "$fs" "$vol"; do
+		datasetexists "$ds" && destroy_dataset "$ds"
+	done
+}
+log_onexit cleanup
+
+log_assert "zfs create -u leaves the new file system unmounted"
+
+typeset fs="$TESTPOOL/$TESTFS1"
+typeset vol="$TESTPOOL/$TESTVOL1"
+
+log_must create_dataset "$fs" "-u"
+log_mustnot ismounted "$fs"
+
+log_mustnot zfs create -V $VOLSIZE -u "$vol"
+
+log_pass "zfs create -u leaves the new file system unmounted"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_verbose.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_verbose.ksh
new file mode 100755
index 0000000..acab500
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_verbose.ksh

@@ -0,0 +1,164 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+
+#
+# DESCRIPTION:
+# zfs create -P without -n should be verbose about dataset creation.
+#
+# STRATEGY:
+# 1. Attempt to create a file system and a volume using various properties
+#    and -P
+# 2. Exercise the combination of -p and -P.
+#
+
+verify_runnable "both"
+
+#
+# Verifies that non dry-run commands with parseable output
+# - succeed
+# - create datasets
+# - generate parseable output on stdout
+# - output matches expectations
+#
+function dry_create_parseable
+{
+	typeset -n exp=$1
+	shift
+	typeset -a cmd=(zfs create -P "$@")
+	typeset ds=${cmd[${#cmd[@]} - 1]}
+	typeset out
+	typeset -a toks
+	typeset -a props
+	typeset found_create=false
+	typeset create_ancestors=
+	typeset opt
+
+	# Parse the arguments to see if -p was used.
+	while getopts :PV:b:ospv opt; do
+		case $opt in
+		p)	create_ancestors=needed ;;
+		*)	continue ;;
+		esac
+	done
+
+	log_note "$0: ${cmd[@]}"
+	out=$("${cmd[@]}")
+	(( $? == 0 )) ||
+	    log_fail "unexpected failure getting stdout from '${cmd[@]}'"
+	datasetexists "$TESTPOOL/$TESTFS1" ||
+	    log_fail "$TESTPOOL/$TESTFS1 unexpectedly created by '${cmd[@]}'"
+	echo "$out" | while IFS=$'\t' read -A toks; do
+		log_note "verifying ${toks[@]}"
+		case ${toks[0]} in
+		create_ancestors)
+			case "$create_ancestors" in
+			needed)
+				log_must test "${toks[1]}" == "$ds"
+				create_ancestors="found ${toks[1]}"
+				;;
+			found*)
+				log_fail "multiple ancestor creation" \
+				    "$create_ancestors and ${toks[1]}"
+				;;
+			"")
+				log_fail "unexpected create_ancestors"
+				;;
+			*)
+				log_fail "impossible error: fix the test"
+				;;
+			esac
+			;;
+		create)
+			log_must test "${#toks[@]}" -eq 2
+			log_must test "${toks[1]}" == "$ds"
+			found_create="yes, I found create"
+			;;
+		property)
+			log_must test "${#toks[@]}" -eq 3
+			typeset prop=${toks[1]}
+			typeset val=${toks[2]}
+			if [[ -z "${exp[$prop]}" ]]; then
+				log_fail "unexpectedly got property '$prop'"
+			fi
+			# We may not know the exact value a property will take
+			# on.  This is the case for at least refreservation.
+			if [[ ${exp[$prop]} != "*" ]]; then
+				log_must test "${exp[$prop]}" == "$val"
+			fi
+			unset exp[$prop]
+			;;
+		*)
+			log_fail "Unexpected line ${toks[@]}"
+			;;
+		esac
+	done
+
+	log_must test "$found_create" == "yes, I found create"
+	log_must test "extra props: ${!exp[@]}" == "extra props: "
+
+	case "$create_ancestors" in
+	"")
+		log_must_busy zfs destroy "$ds"
+		;;
+	"found $ds")
+		log_must_busy zfs destroy -r "$(echo "$ds" | cut -d/ -f1-2)"
+		;;
+	needed)
+		log_fail "Expected but did not find create_ancestors"
+		;;
+	*)
+		log_fail "Unexpected value for create_ancestors:" \
+		    "$create_ancestors"
+		;;
+	esac
+}
+
+function cleanup
+{
+	datasetexists "$TESTPOOL/$TESTFS1" && \
+		destroy_dataset "$TESTPOOL/$TESTFS1" -r
+}
+log_onexit cleanup
+
+log_assert "zfs create -v creates datasets verbosely"
+
+# Parseable output should be parseable.
+typeset -A expect
+expect=([compression]=on)
+dry_create_parseable expect -o compression=on "$TESTPOOL/$TESTFS1"
+
+# Ancestor creation with -p should emit relevant line
+expect=([compression]=on)
+dry_create_parseable expect -p -o compression=on "$TESTPOOL/$TESTFS1"
+expect=([compression]=on)
+dry_create_parseable expect -p -o compression=on "$TESTPOOL/$TESTFS1/$TESTVOL"
+
+# Sparse volumes should not get a gratuitous refreservation
+expect=([volblocksize]=4096 [volsize]=$((1024 * 1024 * 10)))
+dry_create_parseable expect -b 4k -V 10m -s "$TESTPOOL/$TESTFS1"
+
+# Non-sparse volumes should have refreservation
+expect=(
+    [volblocksize]=4096
+    [volsize]=$((1024 * 1024 * 10))
+    [refreservation]="*"
+)
+dry_create_parseable expect -b 4k -V 10m "$TESTPOOL/$TESTFS1"
+
+log_pass "zfs create -v creates datasets verbosely"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am
index 183578d..664f3d8 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile.am

@@ -2,6 +2,9 @@
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
+	zfs_clone_livelist_condense_and_disable.ksh \
+	zfs_clone_livelist_condense_races.ksh \
+	zfs_clone_livelist_dedup.ksh \
 	zfs_destroy_001_pos.ksh \
 	zfs_destroy_002_pos.ksh \
 	zfs_destroy_003_pos.ksh \
@@ -17,7 +20,10 @@
 	zfs_destroy_013_neg.ksh \
 	zfs_destroy_014_pos.ksh \
 	zfs_destroy_015_pos.ksh \
-	zfs_destroy_016_pos.ksh
+	zfs_destroy_016_pos.ksh \
+	zfs_destroy_clone_livelist.ksh \
+	zfs_destroy_dev_removal.ksh \
+	zfs_destroy_dev_removal_condense.ksh
 
 dist_pkgdata_DATA = \
 	zfs_destroy_common.kshlib \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_and_disable.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_and_disable.ksh
new file mode 100755
index 0000000..ab506de
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_and_disable.ksh

@@ -0,0 +1,125 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION
+# Verify zfs destroy test for clones with the livelist feature
+# enabled.
+
+# STRATEGY
+# 1. Clone where livelist is condensed
+#	- create clone, write several files, delete those files
+#	- check that the number of livelist entries decreases
+#	  after the delete
+# 2. Clone where livelist is deactivated
+#	- create clone, write files. Delete those files and the
+#	  file in the filesystem when the snapshot was created
+#	  so the clone and snapshot no longer share data
+#	- check that the livelist is destroyed
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
+
+function cleanup
+{
+	log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
+	# reset the livelist sublist size to the original value
+	set_tunable64 LIVELIST_MAX_ENTRIES $ORIGINAL_MAX
+	# reset the minimum percent shared to 75
+	set_tunable32 LIVELIST_MIN_PERCENT_SHARED $ORIGINAL_MIN
+}
+
+function check_ll_len
+{
+    string="$(zdb -vvvvv $TESTPOOL | grep "Livelist")"
+    substring="$1"
+    msg=$2
+    if test "${string#*$substring}" != "$string"; then
+        return 0    # $substring is in $string
+    else
+	log_note $string
+        log_fail "$msg" # $substring is not in $string
+    fi
+}
+
+function test_condense
+{
+	# set the max livelist entries to a small value to more easily
+	# trigger a condense
+	set_tunable64 LIVELIST_MAX_ENTRIES 20
+	# set a small percent shared threshold so the livelist is not disabled
+	set_tunable32 LIVELIST_MIN_PERCENT_SHARED 10
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	# sync between each write to make sure a new entry is created
+	for i in {0..4}; do
+	    log_must mkfile 5m /$TESTPOOL/$TESTCLONE/testfile$i
+	    log_must zpool sync $TESTPOOL
+	done
+
+	check_ll_len "5 entries" "Unexpected livelist size"
+
+	# sync between each write to allow for a condense of the previous entry
+	for i in {0..4}; do
+	    log_must mkfile 5m /$TESTPOOL/$TESTCLONE/testfile$i
+	    log_must zpool sync $TESTPOOL
+	done
+
+	check_ll_len "6 entries" "Condense did not occur"
+
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	check_livelist_gone
+}
+
+function test_deactivated
+{
+	# Threshold set to 50 percent
+	set_tunable32 LIVELIST_MIN_PERCENT_SHARED 50
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE0
+	log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE1
+	log_must zpool sync $TESTPOOL
+	# snapshot and clone share 'atestfile', 33 percent
+	check_livelist_gone
+	log_must zfs destroy -R $TESTPOOL/$TESTCLONE
+
+	# Threshold set to 20 percent
+	set_tunable32 LIVELIST_MIN_PERCENT_SHARED 20
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE0
+	log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE1
+	log_must mkfile 5m /$TESTPOOL/$TESTCLONE/$TESTFILE2
+	log_must zpool sync $TESTPOOL
+	# snapshot and clone share 'atestfile', 25 percent
+	check_livelist_exists $TESTCLONE
+	log_must rm /$TESTPOOL/$TESTCLONE/atestfile
+	# snapshot and clone share no files
+	check_livelist_gone
+	log_must zfs destroy -R $TESTPOOL/$TESTCLONE
+}
+
+ORIGINAL_MAX=$(get_tunable LIVELIST_MAX_ENTRIES)
+ORIGINAL_MIN=$(get_tunable LIVELIST_MIN_PERCENT_SHARED)
+
+log_onexit cleanup
+log_must zfs create $TESTPOOL/$TESTFS1
+log_must mkfile 5m /$TESTPOOL/$TESTFS1/atestfile
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
+test_condense
+test_deactivated
+
+log_pass "Clone's livelist condenses and disables as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_races.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_races.ksh
new file mode 100755
index 0000000..453b502
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_condense_races.ksh

@@ -0,0 +1,117 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION
+# Test race conditions for livelist condensing
+
+# STRATEGY
+# These tests exercise code paths that deal with a livelist being
+# simultaneously condensed and deactivated (deleted, exported or disabled).
+# If a variable is set, the zthr will pause until it is cancelled or waited
+# and then a counter variable keeps track of whether or not the code path is
+# reached.
+
+# 1. Deletion race: repeatedly overwrite the same file to trigger condense
+# and then delete the clone.
+# 2. Disable race: Overwrite enough files to trigger condenses and disabling of
+# the livelist.
+# 3. Export race: repeatedly overwrite the same file to trigger condense and
+# then export the pool.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
+
+function cleanup
+{
+	log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
+	# reset the livelist sublist size to the original value
+	set_tunable64 LIVELIST_MAX_ENTRIES $ORIGINAL_MAX
+	# reset the condense tests to 0
+	set_tunable32 LIVELIST_CONDENSE_ZTHR_PAUSE 0
+	set_tunable32 LIVELIST_CONDENSE_SYNC_PAUSE 0
+}
+
+function delete_race
+{
+	set_tunable32 "$1" 0
+	log_must zfs clone $TESTPOOL/$TESTFS1@snap $TESTPOOL/$TESTCLONE
+	for i in {1..5}; do
+		log_must zpool sync $TESTPOOL
+		log_must mkfile 5m /$TESTPOOL/$TESTCLONE/out
+	done
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	log_must zpool sync $TESTPOOL
+	[[ "1" == "$(get_tunable "$1")" ]] || \
+	    log_fail "delete/condense race test failed"
+}
+
+function export_race
+{
+	set_tunable32 "$1" 0
+	log_must zfs clone $TESTPOOL/$TESTFS1@snap $TESTPOOL/$TESTCLONE
+	for i in {1..5}; do
+		log_must zpool sync $TESTPOOL
+		log_must mkfile 5m /$TESTPOOL/$TESTCLONE/out
+	done
+	log_must zpool export $TESTPOOL
+	log_must zpool import $TESTPOOL
+	[[ "1" == "$(get_tunable "$1")" ]] || \
+	    log_fail "export/condense race test failed"
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+}
+
+function disable_race
+{
+	set_tunable32 "$1" 0
+	log_must zfs clone $TESTPOOL/$TESTFS1@snap $TESTPOOL/$TESTCLONE
+	for i in {1..5}; do
+		log_must zpool sync $TESTPOOL
+		log_must mkfile 5m /$TESTPOOL/$TESTCLONE/out
+	done
+	# overwrite the file shared with the origin to trigger disable
+	log_must mkfile 100m /$TESTPOOL/$TESTCLONE/atestfile
+	log_must zpool sync $TESTPOOL
+	[[ "1" == "$(get_tunable "$1")" ]] || \
+	    log_fail "disable/condense race test failed"
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+}
+
+ORIGINAL_MAX=$(get_tunable LIVELIST_MAX_ENTRIES)
+
+log_onexit cleanup
+
+log_must zfs create $TESTPOOL/$TESTFS1
+log_must mkfile 100m /$TESTPOOL/$TESTFS1/atestfile
+log_must zpool sync $TESTPOOL
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
+
+# Reduce livelist size to trigger condense more easily
+set_tunable64 LIVELIST_MAX_ENTRIES 20
+
+# Test cancellation path in the zthr
+set_tunable32 LIVELIST_CONDENSE_ZTHR_PAUSE 1
+set_tunable32 LIVELIST_CONDENSE_SYNC_PAUSE 0
+disable_race LIVELIST_CONDENSE_ZTHR_CANCEL
+delete_race LIVELIST_CONDENSE_ZTHR_CANCEL
+export_race LIVELIST_CONDENSE_ZTHR_CANCEL
+
+# Test cancellation path in the synctask
+set_tunable32 LIVELIST_CONDENSE_ZTHR_PAUSE 0
+set_tunable32 LIVELIST_CONDENSE_SYNC_PAUSE 1
+disable_race LIVELIST_CONDENSE_SYNC_CANCEL
+delete_race LIVELIST_CONDENSE_SYNC_CANCEL
+
+log_pass "Clone livelist condense race conditions passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_dedup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_dedup.ksh
new file mode 100755
index 0000000..5f35696
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_clone_livelist_dedup.ksh

@@ -0,0 +1,88 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION
+# Verify zfs destroy test for clones with livelists that contain
+# dedup blocks. This test is a baseline regression test created
+# to ensure that past bugs that we've encountered between dedup
+# and the livelist logic don't resurface.
+
+# STRATEGY
+# 1. Create a clone from a test filesystem and enable dedup.
+# 2. Write some data and create a livelist.
+# 3. Copy the data within the clone to create dedup blocks.
+# 4. Remove some of the dedup data to create multiple free
+#    entries for the same block pointers.
+# 5. Process all the livelist entries by destroying the clone.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
+
+function cleanup
+{
+	log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
+	# Reset the minimum percent shared to 75
+	set_tunable32 LIVELIST_MIN_PERCENT_SHARED $ORIGINAL_MIN_SHARED
+}
+
+function test_dedup
+{
+	# Set a small percent shared threshold so the livelist is not disabled
+	set_tunable32 LIVELIST_MIN_PERCENT_SHARED 10
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	# Enable dedup
+	log_must zfs set dedup=on $TESTPOOL/$TESTCLONE
+
+	# Create some data to be deduped
+	log_must dd if=/dev/urandom of="/$TESTPOOL/$TESTCLONE/data" bs=512 count=10k
+
+	# Create dedup blocks
+	# Note: We sync before and after so all dedup blocks belong to the
+	#       same TXG, otherwise they won't look identical to the livelist
+	#       iterator due to their logical birth TXG being different.
+	log_must zpool sync $TESTPOOL
+	log_must cp /$TESTPOOL/$TESTCLONE/data /$TESTPOOL/$TESTCLONE/data-dup-0
+	log_must cp /$TESTPOOL/$TESTCLONE/data /$TESTPOOL/$TESTCLONE/data-dup-1
+	log_must cp /$TESTPOOL/$TESTCLONE/data /$TESTPOOL/$TESTCLONE/data-dup-2
+	log_must cp /$TESTPOOL/$TESTCLONE/data /$TESTPOOL/$TESTCLONE/data-dup-3
+	log_must zpool sync $TESTPOOL
+	check_livelist_exists $TESTCLONE
+
+	# Introduce "double frees"
+	#   We want to introduce consecutive FREEs of the same block as this
+	#   was what triggered past panics.
+	# Note: Similarly to the previouys step we sync before and after our
+	#       our deletions so all the entries end up in the same TXG.
+	log_must zpool sync $TESTPOOL
+	log_must rm /$TESTPOOL/$TESTCLONE/data-dup-2
+	log_must rm /$TESTPOOL/$TESTCLONE/data-dup-3
+	log_must zpool sync $TESTPOOL
+	check_livelist_exists $TESTCLONE
+
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	check_livelist_gone
+}
+
+ORIGINAL_MIN_SHARED=$(get_tunable LIVELIST_MIN_PERCENT_SHARED)
+
+log_onexit cleanup
+log_must zfs create $TESTPOOL/$TESTFS1
+log_must mkfile 5m /$TESTPOOL/$TESTFS1/atestfile
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
+test_dedup
+
+log_pass "Clone's livelist processes dedup blocks as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh
index 26857d4..11157e9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_001_pos.ksh

@@ -77,9 +77,7 @@
 	fi
 
 	# Clean the test environment and make it clear.
-	if datasetexists $CTR; then
-		log_must zfs destroy -Rf $CTR
-	fi
+	datasetexists $CTR && destroy_dataset $CTR -Rf
 
 	# According to option create test compatible environment.
 	case $opt in

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh
index 04e9713..8b7e59b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh

@@ -50,8 +50,7 @@
 function cleanup
 {
 	for obj in $ctr2 $ctr1 $ctr; do
-		datasetexists $obj && \
-			log_must zfs destroy -Rf $obj
+		datasetexists $obj && destroy_dataset $obj -Rf
 	done
 
 	for mntp in $TESTDIR1 $TESTDIR2; do
@@ -142,14 +141,14 @@
 log_note "Verify that 'zfs destroy -R' succeeds to destroy dataset " \
 	"with dependent clone outside it."
 
-log_must zfs destroy -R $ctr1
+log_must_busy zfs destroy -R $ctr1
 datasetexists $ctr1 && \
 	log_fail "'zfs destroy -R' fails to destroy dataset with clone outside it."
 
 log_note "Verify that 'zfs destroy -r' succeeds to destroy dataset " \
 	"without dependent clone outside it."
 
-log_must zfs destroy -r $ctr
+log_must_busy zfs destroy -r $ctr
 datasetexists $ctr && \
 	log_fail "'zfs destroy -r' fails to destroy dataset with clone outside it."
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_004_pos.ksh
index 3db1331..9a2ff6b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_004_pos.ksh

@@ -49,15 +49,11 @@
 {
 	cd $olddir
 
-	datasetexists $clone && \
-		log_must zfs destroy -f $clone
-
-	snapexists $snap && \
-		log_must zfs destroy -f $snap
+	datasetexists $clone && destroy_dataset $clone -f
+	snapexists $snap && destroy_dataset $snap -f
 
 	for fs in $fs1 $fs2; do
-		datasetexists $fs && \
-			log_must zfs destroy -f $fs
+		datasetexists $fs && destroy_dataset $fs -f
 	done
 
 	for dir in $TESTDIR1 $TESTDIR2; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_007_neg.ksh
index 70ad45a..57eb736 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_007_neg.ksh

@@ -51,8 +51,8 @@
 	if datasetexists $clonesnap; then
 		log_must zfs promote $fs
 	fi
-	datasetexists $clone && log_must zfs destroy $clone
-	datasetexists $fssnap && log_must zfs destroy $fssnap
+	datasetexists $clone && destroy_dataset $clone
+	datasetexists $fssnap && destroy_dataset $fssnap
 }
 
 log_assert "Destroy dataset which is namespace-parent of origin should failed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh
index 58c4cfb..e150cdd 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_014_pos.ksh

@@ -45,7 +45,7 @@
 function cleanup
 {
 	for ds in $datasets; do
-		datasetexists $ds && zfs destroy -rf $ds
+		datasetexists $ds && destroy_dataset $ds -rf
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_015_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_015_pos.ksh
index f399ad2..f1868f5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_015_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_015_pos.ksh

@@ -19,7 +19,7 @@
 #	snapshots from the same datasets
 #
 # STRATEGY
-#	1. Create multiple snapshots for the same datset
+#	1. Create multiple snapshots for the same dataset
 #	2. Run zfs destroy for these snapshots for a mix of valid and
 #	   invalid snapshot names
 #	3. Run zfs destroy for snapshots from different datasets and
@@ -30,8 +30,8 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL/$TESTFS1 && zfs destroy -R $TESTPOOL/$TESTFS1
-	datasetexists $TESTPOOL/$TESTFS2 && zfs destroy -R $TESTPOOL/$TESTFS2
+	datasetexists $TESTPOOL/$TESTFS1 && destroy_dataset $TESTPOOL/$TESTFS1 -R
+	datasetexists $TESTPOOL/$TESTFS2 && destroy_dataset $TESTPOOL/$TESTFS2 -R
 	poolexists $TESTPOOL2 && zpool destroy $TESTPOOL2
 	rm -rf $VIRTUAL_DISK
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh
index 1e129dd..93c8c63 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_016_pos.ksh

@@ -30,18 +30,18 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-	    log_must zfs destroy -R $TESTPOOL/$TESTFS1
+	    destroy_dataset $TESTPOOL/$TESTFS1 -R
 	datasetexists $TESTPOOL/$TESTVOL && \
-	    log_must zfs destroy -Rf $TESTPOOL/$TESTVOL
+	    destroy_dataset $TESTPOOL/$TESTVOL -Rf
 }
 
 function setup_snapshots
 {
 	for i in $snaps; do
 		datasetexists $TESTPOOL/$TESTFS1@snap$i && \
-		    log_must zfs destroy $TESTPOOL/$TESTFS1@snap$i
+		    destroy_dataset $TESTPOOL/$TESTFS1@snap$i
 		datasetexists $TESTPOOL/$TESTVOL@snap$i && \
-		    log_must zfs destroy $TESTPOOL/$TESTVOL@snap$i
+		    destroy_dataset $TESTPOOL/$TESTVOL@snap$i
 		log_must zfs snapshot $TESTPOOL/$TESTFS1@snap$i
 		log_must zfs snapshot $TESTPOOL/$TESTVOL@snap$i
 	done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_clone_livelist.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_clone_livelist.ksh
new file mode 100755
index 0000000..e7663ef
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_clone_livelist.ksh

@@ -0,0 +1,164 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018, 2020 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION
+# Verify zfs destroy test for clones with the livelist feature
+# enabled.
+
+# STRATEGY
+# 1. One clone with an empty livelist
+#	- create the clone, check that livelist exists
+#	- delete the clone, check that livelist is eventually
+#	  destroyed
+# 2. One clone with populated livelist
+#	- create the clone, check that livelist exists
+#	- write multiple files to the clone
+#	- delete the clone, check that livelist is eventually
+#	  destroyed
+# 3. Multiple clones with empty livelists
+#	- same as 1. but with multiple clones
+# 4. Multiple clones with populated livelists
+#	- same as 2. but with multiple clones
+# 5. Clone of clone with populated livelists with promote
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
+
+function cleanup
+{
+	datasetexists $TESTPOOL/$TESTFS1 && destroy_dataset $TESTPOOL/$TESTFS1 -R
+	# reset the livelist sublist size to its original value
+	set_tunable64 LIVELIST_MAX_ENTRIES $ORIGINAL_MAX
+}
+
+function clone_write_file
+{
+	log_must mkfile 1m /$TESTPOOL/$1/$2
+	log_must zpool sync $TESTPOOL
+}
+
+function test_one_empty
+{
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	check_livelist_gone
+}
+
+function test_one
+{
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	clone_write_file $TESTCLONE $TESTFILE0
+	clone_write_file $TESTCLONE $TESTFILE1
+	clone_write_file $TESTCLONE $TESTFILE2
+	log_must rm /$TESTPOOL/$TESTCLONE/$TESTFILE0
+	log_must rm /$TESTPOOL/$TESTCLONE/$TESTFILE2
+	check_livelist_exists $TESTCLONE
+
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	check_livelist_gone
+}
+
+function test_multiple_empty
+{
+	clone_dataset $TESTFS1 snap $TESTCLONE
+	clone_dataset $TESTFS1 snap $TESTCLONE1
+	clone_dataset $TESTFS1 snap $TESTCLONE2
+
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	log_must zfs destroy $TESTPOOL/$TESTCLONE1
+	log_must zfs destroy $TESTPOOL/$TESTCLONE2
+	check_livelist_gone
+}
+
+function test_multiple
+{
+	clone_dataset $TESTFS1 snap $TESTCLONE
+	clone_dataset $TESTFS1 snap $TESTCLONE1
+	clone_dataset $TESTFS1 snap $TESTCLONE2
+
+	clone_write_file $TESTCLONE $TESTFILE0
+
+	clone_write_file $TESTCLONE1 $TESTFILE0
+	clone_write_file $TESTCLONE1 $TESTFILE1
+	clone_write_file $TESTCLONE1 $TESTFILE2
+
+	clone_write_file $TESTCLONE2 $TESTFILE0
+	log_must rm /$TESTPOOL/$TESTCLONE2/$TESTFILE0
+	clone_write_file $TESTCLONE2 $TESTFILE1
+	log_must rm /$TESTPOOL/$TESTCLONE2/$TESTFILE1
+
+	check_livelist_exists $TESTCLONE
+	check_livelist_exists $TESTCLONE1
+	check_livelist_exists $TESTCLONE2
+
+	log_must zfs destroy $TESTPOOL/$TESTCLONE
+	log_must zfs destroy $TESTPOOL/$TESTCLONE1
+	log_must zfs destroy $TESTPOOL/$TESTCLONE2
+	check_livelist_gone
+}
+
+function test_promote
+{
+	clone_dataset $TESTFS1 snap $TESTCLONE
+
+	log_must zfs promote $TESTPOOL/$TESTCLONE
+	check_livelist_gone
+	log_must zfs destroy -R $TESTPOOL/$TESTCLONE
+}
+
+function test_clone_clone_promote
+{
+	log_must zfs create $TESTPOOL/fs
+	log_must dd if=/dev/zero of=/$TESTPOOL/fs/file bs=128k count=100
+	log_must zfs snapshot $TESTPOOL/fs@snap
+	log_must zfs clone $TESTPOOL/fs@snap $TESTPOOL/clone
+	log_must dd if=/dev/zero of=/$TESTPOOL/clone/clonefile bs=128k count=10
+	log_must zfs snapshot $TESTPOOL/clone@csnap
+	log_must zfs clone $TESTPOOL/clone@csnap $TESTPOOL/cloneclone
+
+	check_livelist_exists clone
+	check_livelist_exists cloneclone
+
+	# Promote should remove both clones' livelists
+	log_must zfs promote $TESTPOOL/cloneclone
+	check_livelist_gone
+
+	# This destroy should not use a livelist
+	log_must zfs destroy $TESTPOOL/clone
+	log_must zdb -bcc $TESTPOOL
+}
+
+ORIGINAL_MAX=$(get_tunable LIVELIST_MAX_ENTRIES)
+
+log_onexit cleanup
+log_must zfs create $TESTPOOL/$TESTFS1
+log_must mkfile 20m /$TESTPOOL/$TESTFS1/atestfile
+log_must zfs snapshot $TESTPOOL/$TESTFS1@snap
+
+# set a small livelist entry size to more easily test multiple entry livelists
+set_tunable64 LIVELIST_MAX_ENTRIES 20
+
+test_one_empty
+test_one
+test_multiple_empty
+test_multiple
+test_promote
+test_clone_clone_promote
+
+log_pass "Clone with the livelist feature enabled could be destroyed," \
+	"also could be promoted and destroyed as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
index 9a75dae..1a20b7a 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_common.kshlib

@@ -25,7 +25,7 @@
 #
 
 #
-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -61,12 +61,7 @@
 		log_must zfs create -V $VOLSIZE $VOL
 		block_device_wait
 
-		echo "y" | newfs $ZVOL_DEVDIR/$VOL > /dev/null 2>&1
-		if (( $? == 0 )); then
-			log_note "SUCCESS: newfs $ZVOL_DEVDIR/$VOL>/dev/null"
-		else
-			log_fail "newfs $ZVOL_DEVDIR/$VOL > /dev/null"
-		fi
+		log_must new_fs $ZVOL_DEVDIR/$VOL
 
 		if [[ ! -d $TESTDIR1 ]]; then
 			log_must mkdir $TESTDIR1
@@ -107,9 +102,7 @@
 
 	pkill mkbusy
 
-	if datasetexists $CTR; then
-		log_must zfs destroy -Rf $CTR
-	fi
+	datasetexists $CTR && destroy_dataset $CTR -Rf
 }
 
 #
@@ -146,3 +139,34 @@
 		done
 	fi
 }
+
+# Use zdb to see if a livelist exists for a given clone
+# $1   clone name
+function check_livelist_exists
+{
+	zdb -vvvvv $TESTPOOL/$1 | grep "Livelist" || \
+		log_fail "zdb could not find Livelist"
+}
+
+# Check that a livelist has been removed, waiting for deferred destroy entries
+# to be cleared from zdb.
+function check_livelist_gone
+{
+	log_must zpool wait -t free $TESTPOOL
+	zpool sync
+	zdb -vvvvv $TESTPOOL | grep "Livelist" && \
+		log_fail "zdb found Livelist after the clone is deleted."
+}
+
+# Create a clone in the testpool based on $TESTFS@snap. Verify that the clone
+# was created and that it includes a livelist
+# $1    fs name
+# $2    snap name
+# $3    clone name
+function clone_dataset
+{
+	log_must zfs clone $TESTPOOL/$1@$2 $TESTPOOL/$3
+	datasetexists $TESTPOOL/$3 || \
+		log_fail "zfs clone $TESTPOOL/$3 fail."
+	check_livelist_exists $3
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_dev_removal.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_dev_removal.ksh
new file mode 100755
index 0000000..107c133
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_dev_removal.ksh

@@ -0,0 +1,68 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION
+# Verify that livelists tracking remapped blocks can be
+# properly destroyed.
+
+# STRATEGY
+# 1. Create a pool with disk1 and create a filesystem, snapshot
+# and clone. Write several files to the clone.
+# 2. Add disk2 to the pool and then remove disk1, triggering a
+# remap of the blkptrs tracked in the livelist.
+# 3. Delete the clone
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/removal/removal.kshlib
+
+function cleanup
+{
+	poolexists $TESTPOOL2 && zpool destroy $TESTPOOL2
+	[[ -f $VIRTUAL_DISK1 ]] && log_must rm $VIRTUAL_DISK1
+	[[ -f $VIRTUAL_DISK2 ]] && log_must rm $VIRTUAL_DISK2
+}
+
+log_onexit cleanup
+
+VIRTUAL_DISK1=$TEST_BASE_DIR/disk1
+VIRTUAL_DISK2=$TEST_BASE_DIR/disk2
+log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK1
+log_must truncate -s $(($MINVDEVSIZE * 16)) $VIRTUAL_DISK2
+
+log_must zpool create $TESTPOOL2 $VIRTUAL_DISK1
+log_must poolexists $TESTPOOL2
+
+log_must zfs create $TESTPOOL2/$TESTFS
+log_must mkfile 25m /$TESTPOOL2/$TESTFS/atestfile
+log_must zfs snapshot $TESTPOOL2/$TESTFS@snap
+
+log_must zfs clone $TESTPOOL2/$TESTFS@snap $TESTPOOL2/$TESTCLONE
+
+log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/$TESTFILE0
+log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/$TESTFILE1
+log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/$TESTFILE2
+
+log_must zpool add $TESTPOOL2 $VIRTUAL_DISK2
+log_must zpool remove $TESTPOOL2 $VIRTUAL_DISK1
+wait_for_removal $TESTPOOL2
+
+log_must rm /$TESTPOOL2/$TESTCLONE/$TESTFILE0
+log_must rm /$TESTPOOL2/$TESTCLONE/$TESTFILE1
+
+log_must zfs destroy $TESTPOOL2/$TESTCLONE
+
+log_pass "Clone with the livelist feature and remapped blocks," \
+	"can be destroyed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_dev_removal_condense.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_dev_removal_condense.ksh
new file mode 100755
index 0000000..ab646da
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_dev_removal_condense.ksh

@@ -0,0 +1,94 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION
+# Verify that livelists tracking remapped blocks can be
+# properly condensed.
+
+# STRATEGY
+# 1. Create a pool with disk1 and create a filesystem, snapshot
+# and clone. Create two files for the first livelist entry and
+# pause condensing.
+# 2. Add disk2 to the pool and then remove disk1, triggering a
+# remap of the blkptrs tracked in the livelist.
+# 3. Overwrite the first file several times to trigger a condense,
+# overwrite the second file once and resume condensing, now with
+# extra blkptrs added during the remap
+# 4. Check that the test added new ALLOC blkptrs mid-condense using
+# a variable set in that code path
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/removal/removal.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_destroy/zfs_destroy.cfg
+
+function cleanup
+{
+	poolexists $TESTPOOL2 && zpool destroy $TESTPOOL2
+	# reset livelist max size
+	set_tunable64 LIVELIST_MAX_ENTRIES $ORIGINAL_MAX
+	[[ -f $VIRTUAL_DISK1 ]] && log_must rm $VIRTUAL_DISK1
+	[[ -f $VIRTUAL_DISK2 ]] && log_must rm $VIRTUAL_DISK2
+}
+
+log_onexit cleanup
+
+ORIGINAL_MAX=$(get_tunable LIVELIST_MAX_ENTRIES)
+set_tunable64 LIVELIST_MAX_ENTRIES 20
+
+VIRTUAL_DISK1=$TEST_BASE_DIR/disk1
+VIRTUAL_DISK2=$TEST_BASE_DIR/disk2
+log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK1
+log_must truncate -s $(($MINVDEVSIZE * 16)) $VIRTUAL_DISK2
+
+log_must zpool create $TESTPOOL2 $VIRTUAL_DISK1
+log_must poolexists $TESTPOOL2
+
+log_must zfs create $TESTPOOL2/$TESTFS
+log_must mkfile 100m /$TESTPOOL2/$TESTFS/atestfile
+log_must zfs snapshot $TESTPOOL2/$TESTFS@snap
+
+log_must zfs clone $TESTPOOL2/$TESTFS@snap $TESTPOOL2/$TESTCLONE
+
+# Create initial files and pause condense zthr on next execution
+log_must mkfile 10m /$TESTPOOL2/$TESTCLONE/A
+log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/B
+log_must zpool sync $TESTPOOL2
+set_tunable32 LIVELIST_CONDENSE_SYNC_PAUSE 1
+
+# Add a new dev and remove the old one
+log_must zpool add $TESTPOOL2 $VIRTUAL_DISK2
+log_must zpool remove $TESTPOOL2 $VIRTUAL_DISK1
+wait_for_removal $TESTPOOL2
+
+set_tunable32 LIVELIST_CONDENSE_NEW_ALLOC 0
+# Trigger a condense
+log_must mkfile 10m /$TESTPOOL2/$TESTCLONE/A
+log_must zpool sync $TESTPOOL2
+log_must mkfile 10m /$TESTPOOL2/$TESTCLONE/A
+log_must zpool sync $TESTPOOL2
+# Write remapped blkptrs which will modify the livelist mid-condense
+log_must mkfile 1m /$TESTPOOL2/$TESTCLONE/B
+
+# Resume condense thr
+set_tunable32 LIVELIST_CONDENSE_SYNC_PAUSE 0
+log_must zpool sync $TESTPOOL2
+# Check that we've added new ALLOC blkptrs during the condense
+[[ "0" < "$(get_tunable LIVELIST_CONDENSE_NEW_ALLOC)" ]] || \
+    log_fail "removal/condense test failed"
+
+log_must zfs destroy $TESTPOOL2/$TESTCLONE
+log_pass "Clone with the livelist feature and remapped blocks," \
+	"can be condensed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am
index db90e05..bfb01dc 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am

@@ -8,6 +8,7 @@
 	zfs_diff_changes.ksh \
 	zfs_diff_cliargs.ksh \
 	zfs_diff_encrypted.ksh \
+	zfs_diff_mangle.ksh \
 	zfs_diff_timestamp.ksh \
 	zfs_diff_types.ksh
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/socket.c b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/socket.c
index 2fe9de7..a8c814e 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/socket.c
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/socket.c

@@ -22,6 +22,7 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 /* ARGSUSED */
 int

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh
index c4b42af..67eb18f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh

@@ -32,17 +32,15 @@
 function cleanup
 {
 	for snap in $TESTSNAP1 $TESTSNAP2; do
-		if snapexists "$snap"; then
-			log_must zfs destroy "$snap"
-		fi
+		snapexists "$snap" && destroy_dataset "$snap"
 	done
 }
 
 log_assert "'zfs diff' should only work with supported options."
 log_onexit cleanup
 
-typeset goodopts=("" "-F" "-H" "-t" "-FH" "-Ft" "-Ht" "-FHt")
-typeset badopts=("-f" "-h" "-h" "-T" "-Fx" "-Ho" "-tT" "-")
+typeset goodopts=("" "-h" "-t" "-th" "-H" "-Hh" "-Ht" "-Hth" "-F" "-Fh" "-Ft" "-Fth" "-FH" "-FHh" "-FHt" "-FHth")
+typeset badopts=("-f" "-T" "-Fx" "-Ho" "-tT" "-")
 
 DATASET="$TESTPOOL/$TESTFS"
 TESTSNAP1="$DATASET@snap1"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh
new file mode 100755
index 0000000..ffce9f0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh

@@ -0,0 +1,48 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# 'zfs diff' escapes filenames as expected, 'zfs diff -h' doesn't
+#
+# STRATEGY:
+# 1. Prepare a dataset
+# 2. Create some files
+# 3. verify 'zfs diff' mangles them and 'zfs diff -h' doesn't
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	log_must zfs destroy -r "$DATASET"
+}
+
+log_assert "'zfs diff' mangles filenames, 'zfs diff -h' doesn't"
+log_onexit cleanup
+
+DATASET="$TESTPOOL/$TESTFS/fs"
+TESTSNAP1="$DATASET@snap1"
+
+# 1. Prepare a dataset
+log_must zfs create "$DATASET"
+MNTPOINT="$(get_prop mountpoint "$DATASET")"
+log_must zfs snapshot "$TESTSNAP1"
+
+printf '%c\t'"$MNTPOINT/"'%s\n' M '' + 'śmieszny żupan'                       + 'достопримечательности'                                                                                                                                                                                              | sort > "$MNTPOINT/śmieszny żupan"
+printf '%c\t'"$MNTPOINT/"'%s\n' M '' + '\0305\0233mieszny\0040\0305\0274upan' + '\0320\0264\0320\0276\0321\0201\0321\0202\0320\0276\0320\0277\0321\0200\0320\0270\0320\0274\0320\0265\0321\0207\0320\0260\0321\0202\0320\0265\0320\0273\0321\0214\0320\0275\0320\0276\0321\0201\0321\0202\0320\0270' | sort > "$MNTPOINT/достопримечательности"
+log_must diff -u <(zfs diff -h "$TESTSNAP1" | grep -vF '<xattrdir>' | sort) "$MNTPOINT/śmieszny żupan"
+log_must diff -u <(zfs diff    "$TESTSNAP1" | grep -vF '<xattrdir>' | sort) "$MNTPOINT/достопримечательности"
+
+log_pass "'zfs diff' mangles filenames, 'zfs diff -h' doesn't"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh
index 55dd8b6..0d08cf6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_timestamp.ksh

@@ -31,9 +31,7 @@
 function cleanup
 {
 	for snap in $TESTSNAP1 $TESTSNAP2; do
-		if snapexists "$snap"; then
-			log_must zfs destroy "$snap"
-		fi
+		snapexists "$snap" && destroy_dataset "$snap"
 	done
 	find "$MNTPOINT" -type f -delete
 	rm -f "$FILEDIFF"
@@ -50,7 +48,7 @@
 
 	while (( i < count )); do
 		log_must touch "$fspath/file$i"
-		sleep $(random 3)
+		sleep $(random_int_between 1 3)
 		(( i = i + 1 ))
 	done
 }
@@ -84,7 +82,7 @@
 		continue;
 	fi
 
-	filetime="$(stat -c '%Z' $file)"
+	filetime=$(stat_ctime $file)
 	if [[ "$filetime" != "$ctime" ]]; then
 		log_fail "Unexpected ctime for file $file ($filetime != $ctime)"
 	else

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_types.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_types.ksh
index 9c81084..8e521b9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_types.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_types.ksh

@@ -70,8 +70,13 @@
 TESTSNAP1="$DATASET@snap1"
 TESTSNAP2="$DATASET@snap2"
 FILEDIFF="$TESTDIR/zfs-diff.txt"
-MAJOR=$(stat -c %t /dev/null)
-MINOR=$(stat -c %T /dev/null)
+if is_freebsd; then
+	MAJOR=$(stat -f %Hr /dev/null)
+	MINOR=$(stat -f %Lr /dev/null)
+else
+	MAJOR=$(stat -c %t /dev/null)
+	MINOR=$(stat -c %T /dev/null)
+fi
 
 # 1. Prepare a dataset
 log_must zfs create $DATASET
@@ -106,7 +111,11 @@
 
 # 2. | (Named pipe)
 log_must zfs snapshot "$TESTSNAP1"
-log_must mknod "$MNTPOINT/fifo" p
+if is_freebsd; then
+    log_must mkfifo "$MNTPOINT/fifo"
+else
+    log_must mknod "$MNTPOINT/fifo" p
+fi
 log_must zfs snapshot "$TESTSNAP2"
 verify_object_class "$MNTPOINT/fifo" "|"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh
index 92d5194..3547fb7 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh

@@ -27,6 +27,7 @@
 
 #
 # Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2021 Matt Fiddaman
 #
 
 . $STF_SUITE/tests/functional/cli_root/zfs_get/zfs_get_common.kshlib
@@ -58,16 +59,22 @@
 
 typeset zfs_props=("type" used available creation volsize referenced \
     compressratio mounted origin recordsize quota reservation mountpoint \
-    sharenfs checksum compression atime devices exec readonly setuid zoned \
-    snapdir acltype aclinherit canmount primarycache secondarycache \
-    usedbychildren usedbydataset usedbyrefreservation usedbysnapshots \
-    version)
-
+    sharenfs checksum compression atime devices exec readonly setuid \
+    snapdir aclinherit canmount primarycache secondarycache version \
+    usedbychildren usedbydataset usedbyrefreservation usedbysnapshots)
+if is_freebsd; then
+	typeset zfs_props_os=(jailed aclmode)
+else
+	typeset zfs_props_os=(zoned acltype)
+fi
 typeset userquota_props=(userquota@root groupquota@root userused@root \
     groupused@root)
-typeset all_props=("${zfs_props[@]}" "${userquota_props[@]}")
+typeset all_props=("${zfs_props[@]}" \
+    "${zfs_props_os[@]}" \
+    "${userquota_props[@]}")
 typeset dataset=($TESTPOOL/$TESTCTR $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL \
-	$TESTPOOL/$TESTFS@$TESTSNAP $TESTPOOL/$TESTVOL@$TESTSNAP)
+	$TESTPOOL/$TESTFS@$TESTSNAP $TESTPOOL/$TESTVOL@$TESTSNAP
+	$TESTPOOL/$TESTFS@$TESTSNAP1 $TESTPOOL/$TESTCLONE)
 
 typeset bookmark_props=(creation)
 typeset bookmark=($TESTPOOL/$TESTFS#$TESTBKMARK $TESTPOOL/$TESTVOL#$TESTBKMARK)
@@ -97,6 +104,7 @@
 
 			if [[ $item == $p ]]; then
 				((found += 1))
+				cols=$(echo $line | awk '{print NF}')
 				break
 			fi
 		done < $TESTDIR/$TESTFILE0
@@ -104,6 +112,9 @@
 		if ((found == 0)); then
 			log_fail "'zfs get $opt $props $dst' return " \
 			    "error message.'$p' haven't been found."
+		elif [[ "$opt" == "-p" ]] && ((cols != 4)); then
+			log_fail "'zfs get $opt $props $dst' returned " \
+			    "$cols columns instead of 4."
 		fi
 	done
 
@@ -118,6 +129,10 @@
 create_snapshot $TESTPOOL/$TESTFS $TESTSNAP
 create_snapshot $TESTPOOL/$TESTVOL $TESTSNAP
 
+# Create second snapshot and clone it
+create_snapshot $TESTPOOL/$TESTFS $TESTSNAP1
+create_clone $TESTPOOL/$TESTFS@$TESTSNAP1 $TESTPOOL/$TESTCLONE
+
 # Create filesystem and volume's bookmark
 create_bookmark $TESTPOOL/$TESTFS $TESTSNAP $TESTBKMARK
 create_bookmark $TESTPOOL/$TESTVOL $TESTSNAP $TESTBKMARK

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh
index f49f58e..c374651 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh

@@ -49,13 +49,19 @@
 
 typeset zfs_props=("type" used available creation volsize referenced \
     compressratio mounted origin recordsize quota reservation mountpoint \
-    sharenfs checksum compression atime devices exec readonly setuid zoned \
-    snapdir acltype aclinherit canmount primarycache secondarycache \
-    usedbychildren usedbydataset usedbyrefreservation usedbysnapshots version)
-
+    sharenfs checksum compression atime devices exec readonly setuid \
+    snapdir aclinherit canmount primarycache secondarycache version \
+    usedbychildren usedbydataset usedbyrefreservation usedbysnapshots)
+if is_freebsd; then
+	typeset zfs_props_os=(jailed aclmode)
+else
+	typeset zfs_props_os=(zoned acltype)
+fi
 typeset userquota_props=(userquota@root groupquota@root userused@root \
     groupused@root)
-typeset props=("${zfs_props[@]}" "${userquota_props[@]}")
+typeset props=("${zfs_props[@]}" \
+    "${zfs_props_os[@]}" \
+    "${userquota_props[@]}")
 typeset dataset=($TESTPOOL/$TESTCTR $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL \
 	$TESTPOOL/$TESTFS@$TESTSNAP $TESTPOOL/$TESTVOL@$TESTSNAP)
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh
index 4bd6113..3bc4c62 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_004_pos.ksh

@@ -47,11 +47,9 @@
 {
 	[[ -e $propfile ]] && rm -f $propfile
 
-	datasetexists $clone  && \
-		log_must zfs destroy $clone
+	datasetexists $clone  && destroy_dataset $clone
 	for snap in $fssnap $volsnap ; do
-		snapexists $snap && \
-			log_must zfs destroy $snap
+		snapexists $snap && destroy_dataset $snap
 	done
 
 	if [[ -n $globalzone ]] ; then
@@ -64,8 +62,7 @@
 		done
 	else
 		for fs in $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS2 $TESTPOOL/$TESTFS3; do
-			datasetexists $fs && \
-				log_must zfs destroy -rf $fs
+			datasetexists $fs && destroy_dataset $fs -rf
 		done
 	fi
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh
index 2de640f..510c545 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh

@@ -47,13 +47,19 @@
 typeset val_opts=(p r H)
 typeset v_props=(type used available creation volsize referenced compressratio \
     mounted origin recordsize quota reservation mountpoint sharenfs checksum \
-    compression atime devices exec readonly setuid zoned snapdir acltype \
+    compression atime devices exec readonly setuid snapdir version \
     aclinherit canmount primarycache secondarycache \
-    usedbychildren usedbydataset usedbyrefreservation usedbysnapshots version)
-
+    usedbychildren usedbydataset usedbyrefreservation usedbysnapshots)
+if is_freebsd; then
+	typeset v_props_os=(jailed aclmode)
+else
+	typeset v_props_os=(zoned acltype)
+fi
 typeset  userquota_props=(userquota@root groupquota@root userused@root \
     groupused@root)
-typeset val_props=("${v_props[@]}" "${userquota_props[@]}")
+typeset val_props=("${v_props[@]}" \
+    "${v_props_os[@]}" \
+    "${userquota_props[@]}")
 set -f	# Force shell does not parse '?' and '*' as the wildcard
 typeset inval_opts=(P R h ? *)
 typeset inval_props=(Type 0 ? * -on --on readonl time USED RATIO MOUNTED)

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh
index 29bd10d..296fe99 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh

@@ -52,14 +52,19 @@
 
 set -A props type used available creation volsize referenced compressratio \
 	mounted origin recordsize quota reservation mountpoint sharenfs \
-	checksum compression atime devices exec readonly setuid zoned snapdir \
-	acltype aclinherit canmount primarycache secondarycache \
+	checksum compression atime devices exec readonly setuid snapdir \
+	aclinherit canmount primarycache secondarycache \
 	usedbychildren usedbydataset usedbyrefreservation usedbysnapshots \
 	userquota@root groupquota@root userused@root groupused@root
+if is_freebsd; then
+	set -A props ${props[*]} jailed aclmode
+else
+	set -A props ${props[*]} zoned acltype
+fi
 
 zfs upgrade -v > /dev/null 2>&1
 if [[ $? -eq 0 ]]; then
-	set -A all_props ${all_props[*]} version
+	set -A props ${props[*]} version
 fi
 
 set -A dataset $TESTPOOL/$TESTCTR $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh
index d4ebbb1..2580070 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh

@@ -39,7 +39,7 @@
 # STRATEGY:
 #	1. Create a multiple depth filesystem.
 #	2. 'zfs get -d <n>' to get the output.
-#	3. 'zfs get -r|egrep' to get the expected output.
+#	3. 'zfs get -r|grep' to get the expected output.
 #	4. Compare the two outputs, they should be same.
 #
 
@@ -55,9 +55,14 @@
 set -A all_props type used available creation volsize referenced \
 	compressratio mounted origin recordsize quota reservation mountpoint \
 	sharenfs checksum compression atime devices exec readonly setuid \
-	zoned snapdir acltype aclinherit canmount primarycache secondarycache \
+	snapdir aclinherit canmount primarycache secondarycache \
 	usedbychildren usedbydataset usedbyrefreservation usedbysnapshots \
 	userquota@root groupquota@root userused@root groupused@root
+if is_freebsd; then
+	set -A all_props ${all_props[*]} jailed aclmode
+else
+	set -A all_props ${all_props[*]} zoned acltype
+fi
 
 zfs upgrade -v > /dev/null 2>&1
 if [[ $? -eq 0 ]]; then
@@ -81,7 +86,7 @@
 	done
 	for prop in $(gen_option_str "${all_props[*]}" "" "," $prop_numb); do
 		log_must eval "zfs get -H -d $dp -o name $prop $DEPTH_FS > $DEPTH_OUTPUT"
-		log_must eval "zfs get -rH -o name $prop $DEPTH_FS | egrep -e '$eg_opt' > $EXPECT_OUTPUT"
+		log_must eval "zfs get -rH -o name $prop $DEPTH_FS | grep -E '$eg_opt' > $EXPECT_OUTPUT"
 		log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT
 	done
 	(( old_val=dp ))

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_common.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_common.kshlib
index d8cb9af..9b4eecf 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_common.kshlib

@@ -26,6 +26,7 @@
 
 #
 # Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2021 Matt Fiddaman
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -87,8 +88,8 @@
 }
 
 #
-# Cleanup the volume snapshot, filesystem snapshot, volume bookmark, and
-# filesystem bookmark that were created for this test case.
+# Cleanup the volume snapshot, filesystem snapshots, clone, volume bookmark,
+# and filesystem bookmark that were created for this test case.
 #
 function cleanup
 {
@@ -97,6 +98,11 @@
 	datasetexists $TESTPOOL/$TESTFS@$TESTSNAP && \
 		destroy_snapshot $TESTPOOL/$TESTFS@$TESTSNAP
 
+	datasetexists $TESTPOOL/$TESTCLONE && \
+		destroy_clone $TESTPOOL/$TESTCLONE
+	datasetexists $TESTPOOL/$TESTFS@$TESTSNAP1 && \
+		destroy_snapshot $TESTPOOL/$TESTFS@$TESTSNAP1
+
 	bkmarkexists $TESTPOOL/$TESTVOL#$TESTBKMARK && \
 		destroy_bookmark $TESTPOOL/$TESTVOL#$TESTBKMARK
 	bkmarkexists $TESTPOOL/$TESTFS#$TESTBKMARK && \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib
index 48b3268..d5388e6 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_list_d.kshlib

@@ -78,7 +78,5 @@
 #
 function depth_fs_cleanup
 {
-	log_must zfs destroy -rR $DEPTH_FS
+	datasetexists $DEPTH_FS && destroy_dataset $DEPTH_FS -rR
 }
-
-

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/Makefile.am
new file mode 100644
index 0000000..5f5e385
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/Makefile.am

@@ -0,0 +1,5 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_ids_to_path
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	zfs_ids_to_path_001_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/cleanup.ksh
new file mode 100755
index 0000000..b5ff022
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/cleanup.ksh

@@ -0,0 +1,29 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/setup.ksh
new file mode 100755
index 0000000..fd6f8f8
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/setup.ksh

@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_setup $DISKS

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/zfs_ids_to_path_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/zfs_ids_to_path_001_pos.ksh
new file mode 100755
index 0000000..563b3e0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_ids_to_path/zfs_ids_to_path_001_pos.ksh

@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION: Identify the objset id and the object id of a file in a
+# filesystem, and verify that zfs_ids_to_path behaves correctly with them.
+#
+# STRATEGY:
+# 1. Create a dataset
+# 2. Makes files in the dataset
+# 3. Verify that zfs_ids_to_path outputs the correct format for each one
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_dataset $TESTPOOL/$TESTFS
+	zfs create -o mountpoint=$TESTDIR $TESTPOOL/$TESTFS
+}
+
+function test_one
+{
+	typeset ds_id="$1"
+	typeset ds_path="$2"
+	typeset file_path="$3"
+
+	typeset mntpnt=$(get_prop mountpoint $ds_path)
+	typeset file_id=$(ls -i /$mntpnt/$file_path | sed 's/ .*//')
+	typeset output=$(zfs_ids_to_path $TESTPOOL $ds_id $file_id)
+	[[ "$output" == "$mntpnt/$file_path" ]] || \
+		log_fail "Incorrect output for non-verbose while mounted: $output"
+	output=$(zfs_ids_to_path -v $TESTPOOL $ds_id $file_id)
+	[[ "$output" == "$ds_path:/$file_path" ]] || \
+		log_fail "Incorrect output for verbose while mounted: $output"
+	log_must zfs unmount $ds_path
+	output=$(zfs_ids_to_path $TESTPOOL $ds_id $file_id)
+	[[ "$output" == "$ds_path:/$file_path" ]] || \
+		log_fail "Incorrect output for non-verbose while unmounted: $output"
+	output=$(zfs_ids_to_path -v $TESTPOOL $ds_id $file_id)
+	[[ "$output" == "$ds_path:/$file_path" ]] || \
+		log_fail "Incorrect output for verbose while unmounted: $output"
+	log_must zfs mount $ds_path
+}
+
+log_onexit cleanup
+
+typeset BASE=$TESTPOOL/$TESTFS
+typeset TESTFILE1=f1
+typeset TESTDIR1=d1
+typeset TESTFILE2=d1/f2
+typeset TESTDIR2=d1/d2
+typeset TESTFILE3=d1/d2/f3
+typeset TESTFILE4=d1/d2/f4
+
+typeset mntpnt=$(get_prop mountpoint $BASE)
+
+log_must touch /$mntpnt/$TESTFILE1
+log_must mkdir /$mntpnt/$TESTDIR1
+log_must touch /$mntpnt/$TESTFILE2
+log_must mkdir /$mntpnt/$TESTDIR2
+log_must touch /$mntpnt/$TESTFILE3
+log_must touch /$mntpnt/$TESTFILE4
+
+typeset ds_id=$(zdb $BASE | grep "^Dataset" | sed 's/.* ID \([0-9]*\).*/\1/')
+test_one $ds_id $BASE $TESTFILE1
+test_one $ds_id $BASE $TESTFILE2
+test_one $ds_id $BASE $TESTFILE3
+test_one $ds_id $BASE $TESTFILE4
+
+log_pass "zfs_ids_to_path displayed correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh
index 3ef65b5..8e37e8d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_002_neg.ksh

@@ -45,9 +45,8 @@
 
 function cleanup
 {
-	if snapexists $TESTPOOL/$TESTFS@$TESTSNAP; then
-		log_must zfs destroy $TESTPOOL/$TESTFS@$TESTSNAP
-	fi
+	snapexists $TESTPOOL/$TESTFS@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTFS@$TESTSNAP
 }
 
 log_assert "'zfs inherit' should return an error with bad parameters in" \
@@ -56,8 +55,13 @@
 
 set -A badopts "r" "R" "-R" "-rR" "-a" "-" "-?" "-1" "-2" "-v" "-n"
 set -A props "recordsize" "mountpoint" "sharenfs" "checksum" "compression" \
-    "atime" "devices" "exec" "setuid" "readonly" "zoned" "snapdir" "aclmode" \
+    "atime" "devices" "exec" "setuid" "readonly" "snapdir" "aclmode" \
     "aclinherit" "xattr" "copies"
+if is_freebsd; then
+	props+=("jailed")
+else
+	props+=("zoned")
+fi
 set -A illprops "recordsiz" "mountpont" "sharen" "compres" "atme" "blah"
 
 log_must zfs snapshot $TESTPOOL/$TESTFS@$TESTSNAP

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh
index 3317b09..3f7e4ff 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos.ksh

@@ -47,9 +47,7 @@
 function cleanup
 {
 	for ds in $TESTPOOL $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL ; do
-		if snapexists $ds@$TESTSNAP; then
-			log_must zfs destroy $ds@$TESTSNAP
-		fi
+		snapexists $ds@$TESTSNAP && destroy_dataset $ds@$TESTSNAP
 	done
 	cleanup_user_prop $TESTPOOL
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/Makefile.am
new file mode 100644
index 0000000..b6dd772
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/Makefile.am

@@ -0,0 +1,6 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_jail
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	jail.conf \
+	zfs_jail_001_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/cleanup.ksh
new file mode 100755
index 0000000..79cd6e9
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/cleanup.ksh

@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/jail.conf b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/jail.conf
new file mode 100644
index 0000000..23a9dab
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/jail.conf

@@ -0,0 +1,9 @@
+testjail {
+	allow.mount.zfs;
+	allow.mount;
+	devfs_ruleset = 4;
+	enforce_statfs = 0;
+	mount.devfs;
+	path = "/";
+	persist;
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/setup.ksh
new file mode 100755
index 0000000..6a9af3b
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/setup.ksh

@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+
+default_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/zfs_jail_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/zfs_jail_001_pos.ksh
new file mode 100755
index 0000000..2c08081
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_jail/zfs_jail_001_pos.ksh

@@ -0,0 +1,84 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test basic functionality of `zfs jail` and `zfs unjail`.
+#
+# STRATEGY:
+# 1. Create a jail.
+# 2. Perform some basic ZFS operations on a dataset both in the host and
+#    in the jail to confirm the dataset is functional in the host
+#    and hidden in in the jail.
+# 3. Run `zfs jail` to expose the dataset in the jail.
+# 4. Perform some basic ZFS operations on the dataset both in the host and
+#    in the jail to confirm the dataset is functional in the jail and host.
+# 5. Run `zfs unjail` to return the dataset to the host.
+# 6. Perform some basic ZFS operations on the dataset both in the host and
+#    in the jail to confirm the dataset is functional in the host
+#    and hidden in in the jail.
+#
+
+verify_runnable "global"
+
+JAIL="testjail"
+JAIL_CONF="$STF_SUITE/tests/functional/cli_root/zfs_jail/jail.conf"
+
+function cleanup
+{
+	if jls -j $JAIL name >/dev/null 2>&1; then
+		jail -r -f $JAIL_CONF $JAIL
+	fi
+}
+
+log_onexit cleanup
+
+log_assert "Verify that a dataset can be jailed and unjailed."
+
+# 1. Create a jail.
+log_must jail -c -f $JAIL_CONF $JAIL
+
+# 2. Try some basic ZFS operations.
+log_must zfs list $TESTPOOL
+log_mustnot jexec $JAIL zfs list $TESTPOOL
+
+# 3. Jail the dataset.
+log_must zfs jail $JAIL $TESTPOOL
+
+# 4. Try some basic ZFS operations.
+log_must zfs list $TESTPOOL
+log_must jexec $JAIL zfs list $TESTPOOL
+
+# 5. Unjail the dataset.
+log_must zfs unjail $JAIL $TESTPOOL
+
+# 6. Try some basic ZFS operations.
+log_must zfs list $TESTPOOL
+log_mustnot jexec $JAIL zfs list $TESTPOOL
+
+log_pass "Datasets can be jailed and unjailed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/HEXKEY b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/HEXKEY
new file mode 100644
index 0000000..95ed1c0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/HEXKEY

@@ -0,0 +1 @@
+000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am
index 06b4239..7dfec43 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am

@@ -5,10 +5,14 @@
 	zfs_load-key.ksh \
 	zfs_load-key_all.ksh \
 	zfs_load-key_file.ksh \
+	zfs_load-key_https.ksh \
 	zfs_load-key_location.ksh \
 	zfs_load-key_noop.ksh \
 	zfs_load-key_recursive.ksh
 
 dist_pkgdata_DATA = \
 	zfs_load-key.cfg \
-	zfs_load-key_common.kshlib
+	zfs_load-key_common.kshlib \
+	PASSPHRASE \
+	HEXKEY \
+	RAWKEY

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/PASSPHRASE b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/PASSPHRASE
new file mode 100644
index 0000000..f3097ab
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/PASSPHRASE

@@ -0,0 +1 @@
+password

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/RAWKEY b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/RAWKEY
new file mode 100644
index 0000000..f2d4cbf
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/RAWKEY

@@ -0,0 +1 @@
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh
index 79cd6e9..d397bcf 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh

@@ -26,5 +26,7 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
 
+cleanup_https
 default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh
index 6a9af3b..6cc5528 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh

@@ -26,7 +26,10 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
 
 DISK=${DISKS%% *}
 
-default_setup $DISK
+default_setup_noexit $DISK
+setup_https
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg
index 90d9f63..cc1e3b3 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg

@@ -17,6 +17,9 @@
 # Copyright (c) 2017 Datto, Inc. All rights reserved.
 #
 
+# $PASSPHRASE, $HEXKEY, and $RAWKEY must be kept in sync
+# with the corresponding files in this directory
+
 export PASSPHRASE="password"
 export PASSPHRASE1="password1"
 export PASSPHRASE2="password2"
@@ -24,3 +27,31 @@
 export HEXKEY1="201F1E1D1C1B1A191817161514131211100F0E0D0C0B0A090807060504030201"
 export RAWKEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
 export RAWKEY1="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+
+export SSL_CA_CERT_FILE="/$TESTPOOL/snakeoil.crt"
+export HTTPS_PORT_FILE="/$TESTPOOL/snakeoil.port"
+export HTTPS_HOSTNAME="localhost"
+export HTTPS_PORT=
+export HTTPS_BASE_URL=
+
+function get_https_port
+{
+	if [ -z "$HTTPS_PORT" ]; then
+		read -r HTTPS_PORT < "$HTTPS_PORT_FILE" || return
+	fi
+
+	echo "$HTTPS_PORT"
+}
+
+function get_https_base_url
+{
+	if [ -z "$HTTPS_BASE_URL" ]; then
+		HTTPS_BASE_URL="https://$HTTPS_HOSTNAME:$(get_https_port)" || {
+			typeset ret=$?
+			HTTPS_BASE_URL=
+			return $ret
+		}
+	fi
+
+	echo "$HTTPS_BASE_URL"
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh
index 847a6aa..8af9f80 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh

@@ -46,7 +46,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1
 	poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1
 }
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh
index 5e331fd..3c18e45 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh

@@ -37,9 +37,9 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS1
-	datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol
+	datasetexists $TESTPOOL/$TESTFS1 && destroy_dataset $TESTPOOL/$TESTFS1
+	datasetexists $TESTPOOL/$TESTFS2 && destroy_dataset $TESTPOOL/$TESTFS2
+	datasetexists $TESTPOOL/zvol && destroy_dataset $TESTPOOL/zvol
 	poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1
 }
 log_onexit cleanup
@@ -50,6 +50,9 @@
 log_must zfs create -o encryption=on -o keyformat=passphrase \
 	-o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
 
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+	-o keylocation=$(get_https_base_url)/PASSPHRASE $TESTPOOL/$TESTFS2
+
 log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \
 	-o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol
 
@@ -58,20 +61,25 @@
 	-O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2
 
 log_must zfs unmount $TESTPOOL/$TESTFS1
-log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must_busy zfs unload-key $TESTPOOL/$TESTFS1
 
-log_must zfs unload-key $TESTPOOL/zvol
+log_must zfs unmount $TESTPOOL/$TESTFS2
+log_must_busy zfs unload-key $TESTPOOL/$TESTFS2
+
+log_must_busy zfs unload-key $TESTPOOL/zvol
 
 log_must zfs unmount $TESTPOOL1
-log_must zfs unload-key $TESTPOOL1
+log_must_busy zfs unload-key $TESTPOOL1
 
 log_must zfs load-key -a
 
 log_must key_available $TESTPOOL1
 log_must key_available $TESTPOOL/zvol
 log_must key_available $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS2
 
 log_must zfs mount $TESTPOOL1
 log_must zfs mount $TESTPOOL/$TESTFS1
+log_must zfs mount $TESTPOOL/$TESTFS2
 
 log_pass "'zfs load-key -a' loads keys for all datasets"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
index d9066f9..f746143 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib

@@ -99,3 +99,66 @@
 
 	return 0
 }
+
+function setup_https
+{
+	log_must openssl req -x509 -newkey rsa:4096 -sha256 -days 1 -nodes -keyout "/$TESTPOOL/snakeoil.key" -out "$SSL_CA_CERT_FILE" -subj "/CN=$HTTPS_HOSTNAME"
+
+	python3 -uc "
+import http.server, ssl, sys, os, time, random
+
+sys.stdin.close()
+
+httpd, err, port = None, None, None
+for i in range(1, 100):
+	port = random.randint(0xC000, 0xFFFF) # ephemeral range
+	try:
+		httpd = http.server.HTTPServer(('$HTTPS_HOSTNAME', port), http.server.SimpleHTTPRequestHandler)
+		break
+	except:
+		err = sys.exc_info()[1]
+		time.sleep(i / 100)
+if not httpd:
+	raise err
+
+with open('$HTTPS_PORT_FILE', 'w') as portf:
+	print(port, file=portf)
+
+httpd.socket = ssl.wrap_socket(httpd.socket, server_side=True, keyfile='/$TESTPOOL/snakeoil.key', certfile='$SSL_CA_CERT_FILE', ssl_version=ssl.PROTOCOL_TLS)
+
+os.chdir('$STF_SUITE/tests/functional/cli_root/zfs_load-key')
+
+with open('/$TESTPOOL/snakeoil.pid', 'w') as pidf:
+	if os.fork() != 0:
+	  os._exit(0)
+	print(os.getpid(), file=pidf)
+
+sys.stdout.close()
+sys.stderr.close()
+try:
+	sys.stdout = sys.stderr = open('/tmp/ZTS-snakeoil.log', 'w', buffering=1) # line
+except:
+	sys.stdout = sys.stderr = open('/dev/null', 'w')
+
+print('{} start on {}'.format(os.getpid(), port))
+httpd.serve_forever()
+" || log_fail
+
+	typeset https_pid=
+	for d in $(seq 0 0.1 5); do
+		read -r https_pid 2>/dev/null < "/$TESTPOOL/snakeoil.pid" && [ -n "$https_pid" ] && break
+		sleep "$d"
+	done
+	[ -z "$https_pid" ] && log_fail "Couldn't start HTTPS server"
+	log_note "Started HTTPS server as $https_pid on port $(get_https_port)"
+}
+
+function cleanup_https
+{
+	typeset https_pid=
+	read -r https_pid 2>/dev/null < "/$TESTPOOL/snakeoil.pid" || return 0
+
+	log_must kill "$https_pid"
+	cat /tmp/ZTS-snakeoil.log
+	rm -f "/$TESTPOOL/snakeoil.pid" "/tmp/ZTS-snakeoil.log"
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh
index 7cbda43..73c461f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh

@@ -38,7 +38,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_https.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_https.ksh
new file mode 100755
index 0000000..cac9c41
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_https.ksh

@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key' should load a dataset's key from an https:// URL,
+# but fail to do so if the domain doesn't exist or the file 404s.
+#
+# STRATEGY:
+# 1. Try to create a dataset pointing to an RFC6761-guaranteed unresolvable domain,
+#    one to the sshd port (which will be either unoccupied (ECONNREFUSED)
+#                          or have sshd on it ("wrong version number")).
+#    and one pointing to an URL that will always 404.
+# 2. Create encrypted datasets with keylocation=https://address
+# 3. Unmount the datasets and unload their keys
+# 4. Attempt to load the keys
+# 5. Verify the keys are loaded
+# 6. Attempt to mount the datasets
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	for fs in "$TESTFS1" "$TESTFS2" "$TESTFS3"; do
+		datasetexists $TESTPOOL/$fs && \
+			log_must zfs destroy $TESTPOOL/$fs
+	done
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key' should load a key from a file"
+
+log_mustnot zfs create -o encryption=on -o keyformat=passphrase \
+	-o keylocation=https://invalid./where-ever $TESTPOOL/$TESTFS1
+
+log_mustnot zfs create -o encryption=on -o keyformat=passphrase \
+	-o keylocation=https://$HTTPS_HOSTNAME:22 $TESTPOOL/$TESTFS1
+
+log_mustnot zfs create -o encryption=on -o keyformat=passphrase \
+	-o keylocation=$(get_https_base_url)/ENOENT $TESTPOOL/$TESTFS1
+
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+	-o keylocation=$(get_https_base_url)/PASSPHRASE $TESTPOOL/$TESTFS1
+
+log_must zfs create -o encryption=on -o keyformat=hex \
+	-o keylocation=$(get_https_base_url)/HEXKEY $TESTPOOL/$TESTFS2
+
+log_must zfs create -o encryption=on -o keyformat=raw \
+	-o keylocation=$(get_https_base_url)/RAWKEY $TESTPOOL/$TESTFS3
+
+for fs in "$TESTFS1" "$TESTFS2" "$TESTFS3"; do
+	log_must zfs unmount $TESTPOOL/$fs
+	log_must zfs unload-key $TESTPOOL/$fs
+done
+for fs in "$TESTFS1" "$TESTFS2" "$TESTFS3"; do
+	log_must zfs load-key $TESTPOOL/$fs
+	log_must key_available $TESTPOOL/$fs
+	log_must zfs mount $TESTPOOL/$fs
+done
+
+log_pass "'zfs load-key' loads a key from a file"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh
index d0b1cdb..11f16e4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh

@@ -44,7 +44,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1
 }
 log_onexit cleanup
 
@@ -70,4 +70,9 @@
 log_must key_available $TESTPOOL/$TESTFS1
 log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location"
 
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must zfs load-key -L $(get_https_base_url)/PASSPHRASE $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location"
+
 log_pass "'zfs load-key -L' overrides keylocation with provided value"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh
index bfce786..2ee1783 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh

@@ -37,7 +37,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh
index 7385b69..c0b5553 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh

@@ -39,7 +39,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 log_onexit cleanup
 
@@ -52,15 +52,21 @@
 log_must zfs create -o keyformat=passphrase \
 	-o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child
 
+log_must zfs create -o keyformat=passphrase \
+	-o keylocation=$(get_https_base_url)/PASSPHRASE $TESTPOOL/$TESTFS1/child/child
+
 log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1/child/child
 log_must zfs unload-key $TESTPOOL/$TESTFS1/child
 log_must zfs unload-key $TESTPOOL/$TESTFS1
 
 log_must zfs load-key -r $TESTPOOL
 log_must key_available $TESTPOOL/$TESTFS1
 log_must key_available $TESTPOOL/$TESTFS1/child
+log_must key_available $TESTPOOL/$TESTFS1/child/child
 
 log_must zfs mount $TESTPOOL/$TESTFS1
 log_must zfs mount $TESTPOOL/$TESTFS1/child
+log_must zfs mount $TESTPOOL/$TESTFS1/child/child
 
 log_pass "'zfs load-key -r' recursively loads keys"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am
index 8a137b8..8c90b2e 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am

@@ -13,7 +13,9 @@
 	zfs_mount_009_neg.ksh \
 	zfs_mount_010_neg.ksh \
 	zfs_mount_011_neg.ksh \
-	zfs_mount_012_neg.ksh \
+	zfs_mount_012_pos.ksh \
+	zfs_mount_013_pos.ksh \
+	zfs_mount_014_neg.ksh \
 	zfs_mount_all_001_pos.ksh \
 	zfs_mount_all_fail.ksh \
 	zfs_mount_all_mountpoints.ksh \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
index 2afb9a5..85566e5 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib

@@ -66,7 +66,8 @@
 
 	if [[ $vdev != "" && \
 		$vdev != "mirror" && \
-		$vdev != "raidz" ]] ; then
+		$vdev != "raidz" && \
+		$vdev != "draid" ]] ; then
 
 		log_note "Wrong vdev: (\"$vdev\")"
 		return 1
@@ -110,7 +111,7 @@
 
 	if datasetexists "$pool/$fs" ; then
 		mtpt=$(get_prop mountpoint "$pool/$fs")
-		log_must zfs destroy -r $pool/$fs
+		destroy_dataset "$pool/$fs" "-r"
 
 		[[ -d $mtpt ]] && \
 			log_must rm -rf $mtpt

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_005_pos.ksh
index fc97520..c0cb693 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_005_pos.ksh

@@ -44,13 +44,15 @@
 # 2. Apply 'zfs set mountpoint=path <filesystem>'.
 # 3. Change directory to that given mountpoint.
 # 3. Invoke 'zfs mount <filesystem>'.
-# 4. Verify that mount succeeds on Linux and fails for other platforms.
+# 4. Verify that mount succeeds on Linux and FreeBSD and fails for other
+#    platforms.
 #
 
 verify_runnable "both"
 
 function cleanup
 {
+	[[ "$PWD" = "$TESTDIR" ]] && cd -
 	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 	log_must force_unmount $TESTPOOL/$TESTFS
 	return 0
@@ -74,7 +76,7 @@
 
 zfs $mountcmd $TESTPOOL/$TESTFS
 ret=$?
-if is_linux; then
+if is_linux || is_freebsd; then
     (( ret == 0 )) || \
         log_fail "'zfs $mountcmd $TESTPOOL/$TESTFS' " \
             "unexpected return code of $ret."
@@ -85,7 +87,7 @@
 fi
 
 log_note "Make sure the filesystem $TESTPOOL/$TESTFS is unmounted"
-if is_linux; then
+if is_linux || is_freebsd; then
     mounted $TESTPOOL/$TESTFS || \
         log_fail Filesystem $TESTPOOL/$TESTFS is unmounted
 else

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh
index 5c95435..5edce35 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh

@@ -35,22 +35,20 @@
 
 #
 # DESCRIPTION:
-#	Invoke "zfs mount <filesystem>" with a filesystem
-#	mountpoint that is identical to an existing one.
-#	It will fail with a return code of 1.  For Linux,
-#	place a file in the directory to ensure the failure.
-#	Also for Linux, test overlay=off (default) in which case
-#	the mount will fail, and overlay=on, where the mount
-#	will succeed.
+#	Invoke "zfs mount <filesystem>" with a filesystem mountpoint that is
+#	identical to an existing one.  It will fail with a return code of 1
+#	when overlay=off.  Place a file in the directory to ensure the failure.
+#	Also test overlay=on (default) in which case the mount will not fail.
 #
 # STRATEGY:
 #	1. Prepare an existing mounted filesystem.
-#	2. Setup a new filesystem and make sure that it is unmounted.
-#	3. For Linux, place a file in the mount point folder.
-#       4. Mount the new filesystem using the various combinations
-#		- zfs set mountpoint=<identical path> <filesystem>
-#		- zfs set mountpoint=<top path> <filesystem>
-#       5. Verify that mount failed with return code of 1.
+#	2. Setup a new filesystem with overlay=off and make sure that it is
+#	   unmounted.
+#	3. Place a file in the mount point folder.
+#	4. Mount the new filesystem using the various combinations
+#	   - zfs set mountpoint=<identical path> <filesystem>
+#	   - zfs set mountpoint=<top path> <filesystem>
+#	5. Verify that mount failed with return code of 1.
 #	6. For Linux, also set overlay=on and verify the mount is
 #	   allowed.
 #
@@ -76,7 +74,7 @@
 
 log_assert "Verify that 'zfs $mountcmd <filesystem>'" \
 	"where the mountpoint is identical or on top of an existing one" \
-	"will fail with return code 1."
+	"will fail with return code 1 when overlay=off."
 
 log_onexit cleanup
 
@@ -98,8 +96,8 @@
 log_must zfs set mountpoint=$mtpt $TESTPOOL/$TESTFS
 log_must zfs $mountcmd $TESTPOOL/$TESTFS
 
-if is_linux; then
-	log_must zfs set overlay=off $TESTPOOL/$TESTFS
+log_must zfs set overlay=off $TESTPOOL/$TESTFS
+if ! is_illumos; then
 	touch $mtpt/file.1
 	log_must ls -l $mtpt | grep file
 fi
@@ -107,7 +105,7 @@
 mounted $TESTPOOL/$TESTFS || \
 	log_unresolved "Filesystem $TESTPOOL/$TESTFS is unmounted"
 
-log_must zfs create $TESTPOOL/$TESTFS1
+log_must zfs create -o overlay=off $TESTPOOL/$TESTFS1
 
 unmounted $TESTPOOL/$TESTFS1 || \
 	log_must force_unmount $TESTPOOL/$TESTFS1
@@ -123,9 +121,9 @@
 
 	log_mustnot zfs $mountcmd $TESTPOOL/$TESTFS1
 
-	# For Linux, test the overlay=on feature which allows
-	# mounting of non-empty directory.
-	if is_linux; then
+	if ! is_illumos; then
+		# Test the overlay=on feature which allows
+		# mounting of non-empty directory.
 		log_must zfs set overlay=on $TESTPOOL/$TESTFS1
 		log_must zfs $mountcmd $TESTPOOL/$TESTFS1
 		log_must force_unmount $TESTPOOL/$TESTFS1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh
index 52ae187..409dd06 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_007_pos.ksh

@@ -62,7 +62,10 @@
 	"without affecting the property that is stored on disk."
 log_onexit cleanup
 
-set -A properties "atime" "devices" "exec" "readonly" "setuid"
+set -A properties "atime" "exec" "readonly" "setuid"
+if ! is_freebsd; then
+	properties+=("devices")
+fi
 
 #
 # Get the specified filesystem property reverse mount option.
@@ -78,16 +81,21 @@
 	# Define property value: "reverse if value=on" "reverse if value=off"
 	if is_linux; then
 		set -A values "noatime"   "atime" \
-			      "nodev"     "dev" \
 			      "noexec"    "exec" \
 			      "rw"        "ro" \
-			      "nosuid"    "suid"
-	else
+			      "nosuid"    "suid" \
+			      "nodev"     "dev"
+	elif is_freebsd; then
 		set -A values "noatime"   "atime" \
-			      "nodevices" "devices" \
 			      "noexec"    "exec" \
 			      "rw"        "ro" \
 			      "nosetuid"  "setuid"
+	else
+		set -A values "noatime"   "atime" \
+			      "noexec"    "exec" \
+			      "rw"        "ro" \
+			      "nosetuid"  "setuid" \
+			      "nodevices" "devices"
 	fi
 
 	typeset -i i=0
@@ -123,7 +131,8 @@
 
 	# Set filesystem property temporarily
 	reverse_opt=$(get_reverse_option $fs $property)
-	log_must zfs mount -o remount,$reverse_opt $fs
+	log_must zfs unmount $fs
+	log_must zfs mount -o $reverse_opt $fs
 
 	cur_val=$(get_prop $property $fs)
 	(($? != 0)) && log_fail "get_prop $property $fs"
@@ -135,7 +144,7 @@
 				"be enabled in LZ"
 		fi
 	elif [[ $orig_val == $cur_val ]]; then
-		log_fail "zfs mount -o remount,$reverse_opt " \
+		log_fail "zfs mount -o $reverse_opt " \
 			"doesn't change property."
 	fi
 
@@ -146,7 +155,7 @@
 	cur_val=$(get_prop $property $fs)
 	(($? != 0)) && log_fail "get_prop $property $fs"
 	if [[ $orig_val != $cur_val ]]; then
-		log_fail "zfs mount -o remount,$reverse_opt " \
+		log_fail "zfs mount -o $reverse_opt " \
 			"change the property that is stored on disks"
 	fi
 done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh
index 84835a0..6a25133 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh

@@ -47,9 +47,7 @@
 {
 	! ismounted $fs && log_must zfs mount $fs
 
-	if datasetexists $fs1; then
-		log_must zfs destroy $fs1
-	fi
+	datasetexists $fs1 && destroy_dataset $fs1
 
 	if [[ -f $testfile ]]; then
 		log_must rm -f $testfile
@@ -73,7 +71,8 @@
 
 log_must zfs unmount $fs1
 log_must zfs set mountpoint=$mntpnt $fs1
-log_mustnot zfs mount $fs1
+log_must zfs mount $fs1
+log_must zfs unmount $fs1
 log_must zfs mount -O $fs1
 
 # Create new file in override mountpoint

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh
index 0b5d61f..53ebf1f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_010_neg.ksh

@@ -65,7 +65,7 @@
 log_must zfs umount $fs
 curpath=`dirname $0`
 cd $mpt
-if is_linux; then
+if is_linux || is_freebsd; then
     log_must zfs mount $fs
 else
     log_mustnot zfs mount $fs

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_011_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_011_neg.ksh
index a116b46..95e2bc3 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_011_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_011_neg.ksh

@@ -45,12 +45,11 @@
 
 function cleanup
 {
-	if snapexists $TESTPOOL/$TESTFS@$TESTSNAP; then
-		log_must_busy zfs destroy $TESTPOOL/$TESTFS@$TESTSNAP
-	fi
+	snapexists $TESTPOOL/$TESTFS@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTFS@$TESTSNAP
 
 	if is_global_zone && datasetexists $TESTPOOL/$TESTVOL; then
-		log_must_busy zfs destroy $TESTPOOL/$TESTVOL
+		destroy_dataset $TESTPOOL/$TESTVOL
 	fi
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg.ksh
deleted file mode 100755
index 19fb3b2..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg.ksh
+++ /dev/null

@@ -1,50 +0,0 @@
-#!/bin/ksh -p
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright (c) 2015 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-
-#
-# DESCRIPTION:
-# Verify that zfs mount should fail with a non-empty directory
-#
-# STRATEGY:
-# 1. Unmount the dataset
-# 2. Create a new empty directory
-# 3. Set the dataset's mountpoint
-# 4. Attempt to mount the dataset
-# 5. Verify the mount succeeds
-# 6. Unmount the dataset
-# 7. Create a file in the directory created in step 2
-# 8. Attempt to mount the dataset
-# 9. Verify the mount fails
-#
-
-verify_runnable "both"
-
-log_assert "zfs mount fails with non-empty directory"
-
-fs=$TESTPOOL/$TESTFS
-
-log_must zfs umount $fs
-log_must mkdir -p $TESTDIR
-log_must zfs set mountpoint=$TESTDIR $fs
-log_must zfs mount $fs
-log_must zfs umount $fs
-log_must touch $TESTDIR/testfile.$$
-log_mustnot zfs mount $fs
-log_must rm -rf $TESTDIR
-
-log_pass "zfs mount fails non-empty directory as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh
new file mode 100755
index 0000000..5ff094d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh

@@ -0,0 +1,53 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#   Verify that zfs mount succeeds with a non-empty directory
+#
+
+#
+# STRATEGY:
+# 1. Unmount the dataset
+# 2. Create a new empty directory
+# 3. Set the dataset's mountpoint
+# 4. Attempt to mount the dataset
+# 5. Verify the mount succeeds
+# 6. Unmount the dataset
+# 7. Create a file in the directory created in step 2
+# 8. Attempt to mount the dataset
+# 9. Verify the mount succeeds
+#
+
+verify_runnable "both"
+
+log_assert "zfs mount succeeds with non-empty directory"
+
+fs=$TESTPOOL/$TESTFS
+
+log_must zfs umount $fs
+log_must mkdir -p $TESTDIR
+log_must zfs set mountpoint=$TESTDIR $fs
+log_must zfs mount $fs
+log_must zfs umount $fs
+log_must touch $TESTDIR/testfile.$$
+log_must zfs mount $fs
+log_must zfs umount $fs
+log_must rm -rf $TESTDIR
+
+log_pass "zfs mount succeeds with non-empty directory as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_013_pos.ksh
new file mode 100755
index 0000000..e6a4be1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_013_pos.ksh

@@ -0,0 +1,92 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+#
+# DESCRIPTION:
+# Verify zfs mount helper functions for both devices and pools.
+#
+
+verify_runnable "both"
+
+set -A vdevs $(get_disklist_fullpath $TESTPOOL)
+typeset -r mntpoint=$(get_prop mountpoint $TESTPOOL)
+typeset -r helper="mount.zfs -o zfsutil"
+typeset -r fs=$TESTPOOL/$TESTFS
+
+function cleanup
+{
+	cd $STF_SUITE
+	if [[ -d $TESTDIR/$$ ]]; then
+		log_must rm -rf $TESTDIR/$$
+	fi
+	mounted && zfs $mountcmd $TESTPOOL
+	return 0
+}
+log_onexit cleanup
+
+log_note "Verify zfs mount helper functions for both devices and pools"
+
+# Ensure that the ZFS filesystem is unmounted
+force_unmount $TESTPOOL
+
+log_note "Verify '<dataset> <path>'"
+log_must $helper $fs $mntpoint
+log_must ismounted $fs
+force_unmount $fs
+
+log_note "Verify mount(8) does not canonicalize before calling helper"
+# Canonicalization is confused by files in PWD matching [device|mountpoint]
+log_must mkdir -p $TESTDIR/$$/$TESTPOOL
+log_must cd $TESTDIR/$$
+# The env flag directs zfs to exec /bin/mount, which then calls helper
+log_must eval ZFS_MOUNT_HELPER=1 zfs $mountcmd -v $TESTPOOL
+# mount (2.35.2) still suffers from a cosmetic PWD prefix bug
+log_must mounted $TESTPOOL
+force_unmount $TESTPOOL
+
+log_note "Verify CWD prefix filter <dataset> <path>"
+log_must cd /
+log_must zfs set mountpoint=legacy $TESTPOOL
+log_must mkdir -p $mntpoint
+log_must mount -t zfs $TESTPOOL $mntpoint
+log_must ismounted $TESTPOOL
+log_must umount $mntpoint
+log_must zfs set mountpoint=$mntpoint $TESTPOOL
+log_must cd -
+force_unmount $TESTPOOL
+
+log_note "Verify '-f <dataset> <path>' fakemount"
+log_must $helper -f $fs $mntpoint
+log_mustnot ismounted $fs
+
+log_note "Verify '-o ro -v <dataset> <path>' verbose RO"
+log_must ${helper},ro -v $fs $mntpoint
+log_must ismounted $fs
+force_unmount $fs
+
+log_note "Verify '-o abc -s <device> <path>' sloppy option"
+log_must ${helper},abc -s ${vdevs[0]} $mntpoint
+log_must mounted $mntpoint
+force_unmount $TESTPOOL
+
+log_note "Verify '<device> <path>'"
+log_must $helper ${vdevs[0]} $mntpoint
+log_must mounted $mntpoint
+
+log_pass "zfs mount helper correctly handles both device and pool strings"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_014_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_014_neg.ksh
new file mode 100755
index 0000000..5cf0bc7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_014_neg.ksh

@@ -0,0 +1,68 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+#
+# DESCRIPTION:
+# Verify zfs mount helper failure on known bad parameters
+#
+
+verify_runnable "both"
+
+set -A vdevs $(get_disklist_fullpath $TESTPOOL)
+vdev=${vdevs[0]}
+
+mntpoint="$(get_prop mountpoint $TESTPOOL)"
+helper="mount.zfs -o zfsutil"
+fs=$TESTPOOL/$TESTFS
+
+function cleanup
+{
+	log_must force_unmount $vdev
+	return 0
+}
+log_onexit cleanup
+
+log_note "Verify zfs mount helper failure on known bad parameters"
+
+# Ensure that the ZFS filesystem is unmounted.
+force_unmount $fs
+
+log_note "Verify failure without '-o zfsutil'"
+log_mustnot mount.zfs $fs $mntpoint
+
+log_note "Verify '-o abc <device> <path>' bad option fails"
+log_mustnot ${helper},abc $vdev $mntpoint
+
+log_note "Verify '\$NONEXISTFSNAME <path>' fails"
+log_mustnot $helper $NONEXISTFSNAME $mntpoint
+
+log_note "Verify '<dataset> (\$NONEXISTFSNAME|/dev/null)' fails"
+log_mustnot $helper $fs $NONEXISTFSNAME
+log_mustnot $helper $fs /dev/null
+
+log_note "Verify '/dev/null <path>' fails"
+log_mustnot $helper /dev/null $mntpoint
+
+log_note "Verify '[device|pool]' fails"
+log_mustnot mount.zfs
+log_mustnot $helper
+log_mustnot $helper $vdev
+log_mustnot $helper $TESTPOOL
+
+log_pass "zfs mount helper fails when expected"
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh
index d7fcd20..d1103bd 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh

@@ -30,7 +30,8 @@
 #       1. Create zfs filesystems
 #       2. Unmount a leaf filesystem
 #       3. Create a file in the above filesystem's mountpoint
-#       4. Verify that 'zfs mount -a' fails to mount the above
+#       4. Verify that 'zfs mount -a' succeeds if overlay=on and
+#          fails to mount the above if overlay=off
 #       5. Verify that all other filesystems were mounted
 #
 
@@ -82,15 +83,23 @@
 # Create a stray file in one filesystem's mountpoint
 touch $path/0/strayfile
 
-# Verify that zfs mount -a fails
 export __ZFS_POOL_RESTRICT="$TESTPOOL"
+
+# Verify that zfs mount -a succeeds with overlay=on (default)
+log_must zfs $mountall
+log_must mounted "$TESTPOOL/0"
+log_must zfs $unmountall
+
+# Verify that zfs mount -a succeeds with overlay=off
+log_must zfs set overlay=off "$TESTPOOL/0"
 log_mustnot zfs $mountall
+log_mustnot mounted "$TESTPOOL/0"
+
 unset __ZFS_POOL_RESTRICT
 
-# All filesystems except for "0" should be mounted
-log_mustnot mounted "$TESTPOOL/0"
+# All other filesystems should be mounted
 for ((i=1; i<$fscount; i++)); do
 	log_must mounted "$TESTPOOL/$i"
 done
 
-log_pass "'zfs $mountall' failed as expected."
+log_pass "'zfs $mountall' behaves as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh
index 3e6a24b..faeae42 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh

@@ -109,6 +109,8 @@
 	export __ZFS_POOL_RESTRICT="$TESTPOOL"
 	log_must zfs $unmountall
 	unset __ZFS_POOL_RESTRICT
+	# make sure we leave $TESTPOOL mounted
+	log_must zfs mount $TESTPOOL
 
 	for fs in ${filesystems[@]}; do
 		cleanup_filesystem "$TESTPOOL" "$fs"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh
index 9749a9b..a95e750 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh

@@ -42,7 +42,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh
index 66a4338..ac6103e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh

@@ -48,11 +48,19 @@
 {
 	log_must_busy zpool export $TESTPOOL
 	log_must zpool import $TESTPOOL
-	snapexists $TESTSNAP && log_must zfs destroy $TESTSNAP
+	snapexists $TESTSNAP && destroy_dataset $TESTSNAP
 	[[ -d $MNTPSNAP ]] && log_must rmdir $MNTPSNAP
 	return 0
 }
 
+if is_freebsd; then
+	typeset RO="-t zfs -ur"
+	typeset RW="-t zfs -uw"
+else
+	typeset RO="-o remount,ro"
+	typeset RW="-o remount,rw"
+fi
+
 #
 # Verify the $filesystem is mounted readonly
 # This is preferred over "log_mustnot touch $fs" because we actually want to
@@ -76,8 +84,13 @@
 {
 	typeset dataset="$1"
 	typeset option="$2"
+	typeset options=""
 
-	options="$(awk -v ds="$dataset" '$1 == ds { print $4 }' /proc/mounts)"
+	if is_freebsd; then
+		options=$(mount -p | awk -v ds="$dataset" '$1 == ds { print $4 }')
+	else
+		options=$(awk -v ds="$dataset" '$1 == ds { print $4 }' /proc/mounts)
+	fi
 	if [[ "$options" == '' ]]; then
 		log_fail "Dataset $dataset is not mounted"
 	elif [[ ! -z "${options##*$option*}" ]]; then
@@ -105,21 +118,23 @@
 # 2. Verify we can (re)mount the dataset readonly/read-write
 log_must touch $MNTPFS/file.dat
 checkmount $TESTFS 'rw'
-log_must mount -o remount,ro $TESTFS $MNTPFS
+log_must mount $RO $TESTFS $MNTPFS
 readonlyfs $MNTPFS
 checkmount $TESTFS 'ro'
-log_must mount -o remount,rw $TESTFS $MNTPFS
+log_must mount $RW $TESTFS $MNTPFS
 log_must touch $MNTPFS/file.dat
 checkmount $TESTFS 'rw'
 
-# 3. Verify we can (re)mount the snapshot readonly
-log_must mount -t zfs $TESTSNAP $MNTPSNAP
-readonlyfs $MNTPSNAP
-checkmount $TESTSNAP 'ro'
-log_must mount -o remount,ro $TESTSNAP $MNTPSNAP
-readonlyfs $MNTPSNAP
-checkmount $TESTSNAP 'ro'
-log_must umount $MNTPSNAP
+if is_linux; then
+	# 3. Verify we can (re)mount the snapshot readonly
+	log_must mount -t zfs $TESTSNAP $MNTPSNAP
+	readonlyfs $MNTPSNAP
+	checkmount $TESTSNAP 'ro'
+	log_must mount $RO $TESTSNAP $MNTPSNAP
+	readonlyfs $MNTPSNAP
+	checkmount $TESTSNAP 'ro'
+	log_must umount $MNTPSNAP
+fi
 
 # 4. Verify we can't remount a snapshot read-write
 # The "mount -o rw" command will succeed but the snapshot is mounted readonly.
@@ -127,7 +142,7 @@
 log_must mount -t zfs -o rw $TESTSNAP $MNTPSNAP
 readonlyfs $MNTPSNAP
 checkmount $TESTSNAP 'ro'
-log_mustnot mount -o remount,rw $TESTSNAP $MNTPSNAP
+log_mustnot mount $RW $TESTSNAP $MNTPSNAP
 readonlyfs $MNTPSNAP
 checkmount $TESTSNAP 'ro'
 log_must umount $MNTPSNAP
@@ -138,7 +153,7 @@
     -o encryption=on -o keyformat=passphrase $TESTFS/crypt"
 CRYPT_MNTPFS="$(get_prop mountpoint $TESTFS/crypt)"
 log_must touch $CRYPT_MNTPFS/file.dat
-log_must mount -o remount,ro $TESTFS/crypt $CRYPT_MNTPFS
+log_must mount $RO $TESTFS/crypt $CRYPT_MNTPFS
 log_must umount -f $CRYPT_MNTPFS
 zpool sync $TESTPOOL
 
@@ -149,7 +164,7 @@
 # 7. Verify we can't remount its filesystem read-write
 readonlyfs $MNTPFS
 checkmount $TESTFS 'ro'
-log_mustnot mount -o remount,rw $MNTPFS
+log_mustnot mount $RW $MNTPFS
 readonlyfs $MNTPFS
 checkmount $TESTFS 'ro'
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh
index 404770b..3a5793d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.ksh

@@ -26,7 +26,7 @@
 # under race condition which resulted in undefined mount order.  The purpose
 # of this test is to verify `zfs unmount -a` succeeds (not `zfs mount -a`
 # succeeds, it always does) after `zfs mount -a`, which could fail if threads
-# race.  See github.com/zfsonlinux/zfs/issues/{8450,8833,8878} for details.
+# race.  See github.com/openzfs/zfs/issues/{8450,8833,8878} for details.
 #
 # STRATEGY:
 # 1. Create pools and filesystems.
@@ -87,10 +87,11 @@
 
 # At this point, layout of datasets in two pools will look like below.
 # Previously, on next `zfs mount -a`, pthreads assigned to TESTFS1 and TESTFS2
-# could race, and TESTFS2 usually (actually always) won in ZoL.  Note that the
-# problem is how two or more threads could initially be assigned to the same
-# top level directory, not this specific layout.  This layout is just an example
-# that can reproduce race, and is also the layout reported in #8833.
+# could race, and TESTFS2 usually (actually always) won in OpenZFS.
+# Note that the problem is how two or more threads could initially be assigned
+# to the same top level directory, not this specific layout.
+# This layout is just an example that can reproduce race,
+# and is also the layout reported in #8833.
 #
 # NAME                  MOUNTED  MOUNTPOINT
 # ----------------------------------------------

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_multi_mount.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_multi_mount.ksh
index e015d0a..bd86eaa 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_multi_mount.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_multi_mount.ksh

@@ -59,7 +59,12 @@
 log_must exec 9<> $FILENAME # open file
 
 # 3. Lazy umount
-log_must umount -l $MNTPFS
+if is_freebsd; then
+	# FreeBSD does not support lazy unmount
+	log_must umount $MNTPFS
+else
+	log_must umount -l $MNTPFS
+fi
 if [ -f $FILENAME ]; then
 	log_fail "Lazy unmount failed"
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
index 3788543..b0265c5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh

@@ -92,27 +92,13 @@
     }
 }")
 
-#
-# N.B. json.tool is needed to guarantee consistent ordering of fields,
-# sed is needed to trim trailing space in CentOS 6's json.tool output
-#
-# As of Python 3.5 the behavior of json.tool changed to keep the order
-# the same as the input and the --sort-keys option was added.  Detect when
-# --sort-keys is supported and apply the option to ensure the expected order.
-#
-if python -m json.tool --sort-keys <<< "{}"; then
-	JSON_TOOL_CMD="python -m json.tool --sort-keys"
-else
-	JSON_TOOL_CMD="python -m json.tool"
-fi
-
 typeset -i cnt=0
 typeset cmd
 for cmd in ${pos_cmds[@]}; do
 	log_must zfs program $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1
 	log_must zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1
 	OUTPUT=$(zfs program -j $TESTPOOL $TESTZCP $TESTDS $cmd 2>&1 |
-	    $JSON_TOOL_CMD | sed 's/[[:space:]]*$//')
+	    python3 -m json.tool --sort-keys)
 	if [ "$OUTPUT" != "${pos_cmds_out[$cnt]}" ]; then
 		log_note "Got     :$OUTPUT"
 		log_note "Expected:${pos_cmds_out[$cnt]}"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_001_pos.ksh
index 0bf7c5b..dc3ffd6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_001_pos.ksh

@@ -50,8 +50,7 @@
 	if snapexists $csnap; then
 		log_must zfs promote $fs
 	fi
-	snapexists $snap && \
-		log_must zfs destroy -rR $snap
+	snapexists $snap && destroy_dataset $snap -rR
 
 	typeset data
 	for data in $file0 $file1; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_003_pos.ksh
index e0d0e84..7dedaf9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_003_pos.ksh

@@ -54,8 +54,7 @@
 	typeset ds
 	typeset data
 	for ds in ${snap[*]}; do
-		snapexists $ds && \
-			log_must zfs destroy -rR $ds
+		snapexists $ds && destroy_dataset $ds -rR
 	done
 	for data in ${file[*]}; do
 		[[ -e $data ]] && rm -f $data

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_004_pos.ksh
index 23b5991..b8a5ab9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_004_pos.ksh

@@ -53,8 +53,7 @@
 	typeset ds
 	typeset data
 	for ds in ${snap[*]}; do
-		snapexists $ds && \
-			log_must zfs destroy -rR $ds
+		snapexists $ds && destroy_dataset $ds -rR
 	done
 	for data in ${file[*]}; do
 		[[ -e $data ]] && rm -f $data

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_005_pos.ksh
index c669a44..289ddc6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_005_pos.ksh

@@ -46,8 +46,8 @@
 function cleanup
 {
 	if datasetexists $fssnap ; then
-		datasetexists $clone && log_must zfs destroy $clone
-		log_must zfs destroy $fssnap
+		datasetexists $clone && destroy_dataset $clone
+		destroy_dataset $fssnap
 	fi
 	if datasetexists $clone ; then
 		log_must zfs promote $fs

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_006_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_006_neg.ksh
index 286c14a..7f08f28 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_006_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_006_neg.ksh

@@ -62,13 +62,9 @@
 
 function cleanup
 {
-	if datasetexists $clone; then
-		log_must zfs destroy $clone
-	fi
+	datasetexists $clone && destroy_dataset $clone
 
-	if datasetexists $recvfs; then
-		log_must zfs destroy -r $recvfs
-	fi
+	datasetexists $recvfs && destroy_dataset $recvfs -r
 
 	if snapexists $snap; then
 		destroy_snapshot  $snap

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_007_neg.ksh
index 3f8ee19..95db7d9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_007_neg.ksh

@@ -47,8 +47,7 @@
 
 function cleanup
 {
-	snapexists $snap && \
-		log_must zfs destroy -rR $snap
+	snapexists $snap && destroy_dataset $snap -rR
 
 	typeset data
 	for data in $TESTDIR/$TESTFILE0 $TESTDIR/$TESTFILE1; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh
index 2c7584d..fd6ed7e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh

@@ -42,11 +42,11 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -Rf
 	datasetexists $TESTPOOL/clone1 && \
-		log_must zfs destroy -Rf $TESTPOOL/clone1
+		destroy_dataset $TESTPOOL/clone1 -Rf
 	datasetexists $TESTPOOL/clone2 && \
-		log_must zfs destroy -Rf $TESTPOOL/clone2
+		destroy_dataset $TESTPOOL/clone2 -Rf
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh
index cbbacac..f31ff48 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh

@@ -36,7 +36,7 @@
 function cleanup
 {
 	for ds in $datasets; do
-		datasetexists $ds && log_must zfs destroy -R $TESTPOOL/$TESTFS1
+		datasetexists $ds && destroy_dataset $TESTPOOL/$TESTFS1 -R
 	done
 }
 function get_prop_mb

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am
index bf112a7..773e9f5 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am

@@ -17,9 +17,17 @@
 	zfs_receive_013_pos.ksh \
 	zfs_receive_014_pos.ksh \
 	zfs_receive_015_pos.ksh \
+	zfs_receive_016_pos.ksh \
 	receive-o-x_props_override.ksh \
 	zfs_receive_from_encrypted.ksh \
+	zfs_receive_from_zstd.ksh \
+	zfs_receive_new_props.ksh \
 	zfs_receive_to_encrypted.ksh \
 	zfs_receive_raw.ksh \
 	zfs_receive_raw_incremental.ksh \
-	zfs_receive_-e.ksh
+	zfs_receive_raw_-d.ksh \
+	zfs_receive_-e.ksh \
+	zfs_receive_-wR-encrypted-mix.ksh
+
+dist_pkgdata_DATA = \
+	zstd_test_data.txt

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
index 6f897a9..2d3c15c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh

@@ -259,16 +259,21 @@
 log_must zfs destroy -r -f $dest
 
 #
-# 3.7 Verify we can't receive a send stream overriding or excluding properties
-#     invalid for the dataset type unless the stream it's recursive, in which
-#     case only the appropriate properties are set on the destination.
-#
+# 3.7 Verify we can receive a send stream excluding but not overriding
+#     properties invalid for the dataset type, in which case only the
+#     appropriate properties are set on the destination.
 log_must zfs create -V 128K -s $orig
 log_must zfs snapshot $orig@snap1
 log_must eval "zfs send $orig@snap1 > $streamfile_full"
-log_mustnot eval "zfs receive -x atime $dest < $streamfile_full"
 log_mustnot eval "zfs receive -o atime=off $dest < $streamfile_full"
+log_mustnot eval "zfs receive -o atime=off -x canmount $dest < $streamfile_full"
+log_must eval "zfs receive -x atime -x canmount $dest < $streamfile_full"
+log_must eval "check_prop_source $dest type volume -"
+log_must eval "check_prop_source $dest atime - -"
+log_must eval "check_prop_source $dest canmount - -"
 log_must_busy zfs destroy -r -f $orig
+log_must_busy zfs destroy -r -f $dest
+# Recursive sends also accept (and ignore) such overrides
 log_must zfs create $orig
 log_must zfs create -V 128K -s $origsub
 log_must zfs snapshot -r $orig@snap1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_-wR-encrypted-mix.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_-wR-encrypted-mix.ksh
new file mode 100755
index 0000000..6e27130
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_-wR-encrypted-mix.ksh

@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Attila Fülöp <attila@fueloep.org>
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#      ZFS should receive a raw send of a mix of unencrypted and encrypted
+#      child datasets
+#
+#      The layout of the datasets is:  enc/unenc/enc/unenc
+#
+# STRATEGY:
+# 1. Create the dataset hierarchy
+# 2. Snapshot the dataset hierarchy
+# 3. Send -Rw the dataset hierarchy and receive into a top-level dataset
+# 4. Check the encryption property of the received datasets
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists "$TESTPOOL/$TESTFS1" && \
+		destroy_dataset "$TESTPOOL/$TESTFS1" -r
+
+	datasetexists "$TESTPOOL/$TESTFS2" && \
+		destroy_dataset "$TESTPOOL/$TESTFS2" -r
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should receive a mix of un/encrypted childs"
+
+typeset src="$TESTPOOL/$TESTFS1"
+typeset dst="$TESTPOOL/$TESTFS2"
+typeset snap="snap"
+
+echo "password" | \
+	create_dataset "$src" -o encryption=on -o keyformat=passphrase
+create_dataset "$src/u" "-o encryption=off"
+echo "password" | \
+	create_dataset "$src/u/e" -o encryption=on -o keyformat=passphrase
+create_dataset "$src/u/e/u" -o encryption=off
+
+log_must zfs snapshot -r "$src@$snap"
+log_must eval "zfs send -Rw $src@$snap | zfs receive -u $dst"
+log_must test "$(get_prop 'encryption' $dst)" != "off"
+log_must test "$(get_prop 'encryption' $dst/u)" == "off"
+log_must test "$(get_prop 'encryption' $dst/u/e)" != "off"
+log_must test "$(get_prop 'encryption' $dst/u/e/u)" == "off"
+
+log_pass "ZFS can receive a mix of un/encrypted childs"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh
index f8439dc..8a6cd8c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_001_pos.ksh

@@ -48,11 +48,9 @@
 {
 	typeset -i i=0
 
-	datasetexists $rst_root && \
-		log_must zfs destroy -Rf $rst_root
+	datasetexists $rst_root && destroy_dataset $rst_root -Rf
 	while (( i < 2 )); do
-		snapexists ${orig_snap[$i]} && \
-			log_must zfs destroy -f ${orig_snap[$i]}
+		snapexists ${orig_snap[$i]} && destroy_dataset ${orig_snap[$i]} -f
 		log_must rm -f ${bkup[$i]}
 
 		(( i = i + 1 ))
@@ -63,8 +61,7 @@
 
 function recreate_root
 {
-	datasetexists $rst_root && \
-		log_must zfs destroy -Rf $rst_root
+	datasetexists $rst_root && destroy_dataset $rst_root -Rf
 	if [[ -d $TESTDIR1 ]] ; then
 		log_must rm -rf $TESTDIR1
 	fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_002_pos.ksh
index 36af37a..ba3fc49 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_002_pos.ksh

@@ -50,10 +50,8 @@
 	typeset ds
 
 	while (( i < ${#orig_snap[*]} )); do
-		snapexists ${rst_snap[$i]} && \
-			log_must zfs destroy -f ${rst_snap[$i]}
-		snapexists ${orig_snap[$i]} && \
-			log_must zfs destroy -f ${orig_snap[$i]}
+		snapexists ${rst_snap[$i]} && destroy_dataset ${rst_snap[$i]} -f
+		snapexists ${orig_snap[$i]} && destroy_dataset ${orig_snap[$i]} -f
 		[[ -e ${bkup[$i]} ]] && \
 			log_must rm -rf ${bkup[$i]}
 
@@ -61,8 +59,7 @@
 	done
 
 	for ds in $rst_vol $rst_root; do
-		datasetexists $ds && \
-			log_must zfs destroy -Rf $ds
+		datasetexists $ds && destroy_dataset $ds -Rf
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_003_pos.ksh
index d5f6e09..cce3876 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_003_pos.ksh

@@ -49,7 +49,7 @@
 function cleanup
 {
 	for snap in $snap2 $snap1; do
-		datasetexists $snap && log_must zfs destroy -rf $snap
+		datasetexists $snap && destroy_dataset $snap -rf
 	done
 	for file in $ibackup $mntpnt/file1 $mntpnt/file2; do
 		[[ -f $file ]] && log_must rm -f $file

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh
index 3a9c227..7c115ee 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_004_neg.ksh

@@ -49,8 +49,7 @@
 	typeset bkup
 
 	for snap in $init_snap $inc_snap $init_topsnap $inc_topsnap ; do
-		snapexists $snap && \
-			log_must zfs destroy -Rf $snap
+		snapexists $snap && destroy_dataset $snap -Rf
 	done
 
 	for bkup in $full_bkup $inc_bkup $full_topbkup $inc_topbkup; do
@@ -92,16 +91,11 @@
 
 set -A badargs \
     "" "nonexistent-snap" "blah@blah" "-d" "-d nonexistent-dataset" \
-    "$TESTPOOL/$TESTFS" "$TESTPOOL1" "$TESTPOOL/fs@" "$TESTPOOL/fs@@mysnap" \
+    "$TESTPOOL1" "$TESTPOOL/fs@" "$TESTPOOL/fs@@mysnap" \
     "$TESTPOOL/fs@@" "$TESTPOOL/fs/@mysnap" "$TESTPOOL/fs@/mysnap" \
     "$TESTPOOL/nonexistent-fs/nonexistent-fs" "-d $TESTPOOL/nonexistent-fs" \
     "-d $TESTPOOL/$TESTFS/nonexistent-fs"
 
-if is_global_zone ; then
-	typeset -i n=${#badargs[@]}
-	badargs[$n]="-d $TESTPOOL"
-fi
-
 typeset -i i=0
 while (( i < ${#badargs[*]} ))
 do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_005_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_005_neg.ksh
index 4cbc7e3..d8c71f2 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_005_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_005_neg.ksh

@@ -53,12 +53,10 @@
 	typeset bkup
 
 	for snap in $init_snap $inc_snap; do
-		snapexists $snap && \
-			log_must zfs destroy -f $snap
+		snapexists $snap && destroy_dataset $snap -f
 	done
 
-	datasetexists $rst_root && \
-		log_must zfs destroy -Rf $rst_root
+	datasetexists $rst_root && destroy_dataset $rst_root -Rf
 
 	for bkup in $full_bkup $inc_bkup; do
 		[[ -e $bkup ]] && \
@@ -82,8 +80,8 @@
 log_must eval "zfs send $init_snap > $full_bkup"
 
 log_note "'zfs receive' fails with invalid send streams."
-log_mustnot eval "zfs receive $rst_init_snap < /dev/zero"
-log_mustnot eval "zfs receive -d $rst_root </dev/zero"
+log_mustnot eval "cat </dev/zero | zfs receive $rst_init_snap"
+log_mustnot eval "cat </dev/zero | zfs receive -d $rst_root"
 
 log_must eval "zfs receive $rst_init_snap < $full_bkup"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_006_pos.ksh
index 7338fd2..79f34bd 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_006_pos.ksh

@@ -51,7 +51,7 @@
 function cleanup
 {
 	for snap in $snap2 $snap1; do
-		datasetexists $snap && log_must zfs destroy -rf $snap
+		datasetexists $snap && destroy_dataset $snap -rf
 	done
 	for file in $fbackup1 $fbackup2 $mntpnt/file1 $mntpnt/file2; do
 		[[ -f $file ]] && log_must rm -f $file
@@ -59,10 +59,10 @@
 
 	if is_global_zone; then
 		datasetexists $TESTPOOL/$TESTFS/$TESTFS1 && \
-			log_must zfs destroy -rf $TESTPOOL/$TESTFS/$TESTFS1
+			destroy_dataset $TESTPOOL/$TESTFS/$TESTFS1 -rf
 	else
 		datasetexists $TESTPOOL/${ZONE_CTR}0 && \
-			log_must zfs destroy -rf $TESTPOOL/${ZONE_CTR}0
+			destroy_dataset $TESTPOOL/${ZONE_CTR}0 -rf
 	fi
 
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_007_neg.ksh
index 57454dc..fbf0654 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_007_neg.ksh

@@ -47,7 +47,7 @@
 function cleanup
 {
 	for snap in $snap2 $snap1; do
-		datasetexists $snap && log_must zfs destroy -rf $snap
+		datasetexists $snap && destroy_dataset $snap -rf
 	done
 	for file in $ibackup $mntpnt/file1 $mntpnt/file2; do
 		[[ -f $file ]] && log_must rm -f $file

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_008_pos.ksh
index 1729b59..dc4892b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_008_pos.ksh

@@ -47,9 +47,7 @@
 function cleanup
 {
 	for dset in $rst_snap $rst_fs $orig_snap; do
-		if datasetexists $dset; then
-			log_must zfs destroy -fr $dset
-		fi
+		datasetexists $dset && destroy_dataset $dset -fr
 	done
 
 	for file in $fbackup $mnt_file $tmp_out; do
@@ -59,7 +57,7 @@
 	done
 
 	if datasetexists $TESTPOOL/$TESTFS; then
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS
+		destroy_dataset $TESTPOOL/$TESTFS -Rf
 		log_must zfs create $TESTPOOL/$TESTFS
 		log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 	fi
@@ -99,9 +97,7 @@
 	log_must eval "zfs send $orig_snap > $fbackup"
 
 	for opt in "-v"  "-vn"; do
-		if datasetexists $rst_fs; then
-			log_must zfs destroy -fr $rst_fs
-		fi
+		datasetexists $rst_fs && destroy_dataset $rst_fs -fr
 		log_note "Check ZFS receive $opt [<filesystem|snapshot>]"
 		log_must eval "zfs receive $opt $rst_fs < $fbackup > $tmp_out 2>&1"
 		if [[ $opt == "-v" ]]; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_009_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_009_neg.ksh
index d028aca..37fe515 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_009_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_009_neg.ksh

@@ -48,13 +48,10 @@
 {
 	typeset ds
 
-	if snapexists $snap; then
-		log_must zfs destroy $snap
-	fi
+	snapexists $snap && destroy_dataset $snap
+
 	for ds in $ctr1 $ctr2 $fs1; do
-		if datasetexists $ds; then
-			log_must zfs destroy -rf $ds
-		fi
+		datasetexists $ds && destroy_dataset $ds -rf
 	done
 	if [[ -d $TESTDIR2 ]]; then
 		rm -rf $TESTDIR2

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh
index 5d7a704..e1e93e9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh

@@ -39,7 +39,7 @@
 function cleanup
 {
     for fs in $src_fs $dst_fs; do
-        datasetexists $fs && log_must zfs destroy -rf $fs
+        datasetexists $fs && log_must destroy_dataset $fs -rf
     done
     zpool destroy $temppool
     [[ -f $streamfile ]] && log_must rm -f $streamfile
@@ -67,6 +67,8 @@
 
 log_must eval "zfs send -D -R $src_fs@snap3 > $streamfile"
 log_must eval "zfs receive -v $dst_fs < $streamfile"
+log_must zfs destroy -r $dst_fs
+log_must eval "zstream redup $streamfile | zfs receive -v $dst_fs"
 
 cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos.ksh
index be04aed..989d31b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos.ksh

@@ -55,31 +55,6 @@
 	log_must zfs destroy -rf $dest
 }
 
-#
-# Verify property $2 is set from source $4 on dataset $1 and has value $3.
-#
-# $1 checked dataset
-# $2 user property
-# $3 property value
-# $4 source
-#
-function check_prop_source
-{
-	typeset dataset=$1
-	typeset prop=$2
-	typeset value=$3
-	typeset source=$4
-	typeset chk_value=$(get_prop "$prop" "$dataset")
-	typeset chk_source=$(get_source "$prop" "$dataset")
-	if [[ "$chk_value" != "$value" || \
-	    "$chk_source" != "$4" ]]
-	then
-		return 1
-	else
-		return 0
-	fi
-}
-
 log_assert "ZFS successfully receive and restore properties."
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_016_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_016_pos.ksh
new file mode 100755
index 0000000..04d20eb
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_016_pos.ksh

@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2020 by Mariusz Zaborski <oshogbo@FreeBSD.org>.
+
+#
+# DESCRIPTION:
+# Verify 'zfs recv' can forcibly unmount filesystem while receiving
+# stream.
+#
+# STRATEGY:
+# 1. Create snapshot of file system
+# 2. Make a zfs filesystem mountpoint busy
+# 3. Receive filesystem with force flag.
+# 4. Verify that stream was received or failed on Linux.
+#
+
+. $STF_SUITE/tests/functional/cli_root/cli_common.kshlib
+
+verify_runnable "both"
+
+function cleanup
+{
+	cd $curpath
+
+	for snap in $init_snap $rst_snap; do
+                snapexists $snap && \
+                        destroy_snapshot $snap
+        done
+
+	datasetexists $rst_root && \
+		destroy_dataset $rst_root
+
+	for file in $full_bkup
+	do
+		[[ -e $file ]] && \
+			log_must rm -f $file
+	done
+
+	[[ -d $TESTDIR1 ]] && \
+		log_must rm -rf $TESTDIR1
+}
+
+log_assert "Verify 'zfs recv' can forcibly unmount busy filesystem."
+log_onexit cleanup
+
+curpath=`dirname $0`
+init_snap=$TESTPOOL/$TESTFS@init_snap
+full_bkup=$TEST_BASE_DIR/fullbkup.$$
+rst_root=$TESTPOOL/rst_ctr
+rst_snap=$rst_root@init_snap
+
+log_note "Verify 'zfs recv' can forcible unmount busy filesystem."
+
+# Preparation
+log_must zfs create $rst_root
+[[ ! -d $TESTDIR1 ]] && \
+	log_must mkdir -p $TESTDIR1
+log_must zfs set mountpoint=$TESTDIR1 $rst_root
+
+log_must zfs snapshot $init_snap
+log_must eval "zfs send $init_snap > $full_bkup"
+
+# Test
+log_must cd $TESTDIR1
+if is_linux; then
+    # Linux does not support it.
+    log_mustnot zfs receive -MF $rst_snap < $full_bkup
+else
+    log_must zfs receive -MF $rst_snap < $full_bkup
+fi
+
+log_pass "The busy filesystem was unmounted or busy as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh
index a1d094b..8914326 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh

@@ -41,10 +41,10 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh
new file mode 100755
index 0000000..72eebb4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_zstd.ksh

@@ -0,0 +1,112 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should receive a ZSTD compressed block and be able to determine the level
+#
+# STRATEGY:
+# 1. Create a ZSTD compressed dataset (random level)
+# 2. Create and checksum a file on the compressed dataset
+# 3. Snapshot the compressed dataset
+# 4. Attempt to receive the snapshot into a new dataset
+# 5. Verify the checksum of the file is the same as the original
+# 6. Verify the compression level is correctly stored
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists $TESTPOOL/$TESTFS1 && \
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
+
+	datasetexists $TESTPOOL/$TESTFS2 && \
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should track compression level when receiving a ZSTD stream"
+
+typeset src_data="$STF_SUITE/tests/functional/cli_root/zfs_receive/zstd_test_data.txt"
+typeset snap="$TESTPOOL/$TESTFS1@snap"
+
+random_level=$((RANDOM%19 + 1))
+log_note "Randomly selected ZSTD level: $random_level"
+
+log_must zfs create -o compress=zstd-$random_level $TESTPOOL/$TESTFS1
+# Make a 5kb compressible file
+log_must cat $src_data $src_data $src_data $src_data $src_data \
+    > /$TESTPOOL/$TESTFS1/$TESTFILE0
+typeset checksum=$(md5digest /$TESTPOOL/$TESTFS1/$TESTFILE0)
+
+log_must zfs snapshot $snap
+
+# get object number of file
+listing=$(ls -i /$TESTPOOL/$TESTFS1/$TESTFILE0)
+set -A array $listing
+obj=${array[0]}
+log_note "file /$TESTPOOL/$TESTFS1/$TESTFILE0 has object number $obj"
+
+output=$(zdb -Zddddddbbbbbb $TESTPOOL/$TESTFS1 $obj 2> /dev/null \
+    |grep -m 1 "L0 DVA" |head -n1)
+dva=$(sed -Ene 's/^.+DVA\[0\]=<([^>]+)>.*$/\1/p' <<< "$output")
+log_note "block 0 of /$TESTPOOL/$TESTFS1/$TESTFILE0 has a DVA of $dva"
+
+zstd_str=$(sed -Ene 's/^.+ ZSTD:size=([^:]+):version=([^:]+):level=([^:]+):.*$/\1:\2:\3/p' <<< "$output")
+zstd_size1=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[1]}')
+zstd_version1=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[2]}')
+zstd_level1=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[3]}')
+log_note "ZSTD src: size=$zstd_size1 version=$zstd_version1 level=$zstd_level1"
+
+log_note "Verify ZFS can receive the ZSTD compressed stream"
+log_must eval "zfs send -ec $snap | zfs receive $TESTPOOL/$TESTFS2"
+
+typeset cksum1=$(md5digest /$TESTPOOL/$TESTFS2/$TESTFILE0)
+[[ "$cksum1" == "$checksum" ]] || \
+	log_fail "Checksums differ ($cksum1 != $checksum)"
+
+# get object number of file
+listing=$(ls -i /$TESTPOOL/$TESTFS2/$TESTFILE0)
+set -A array $listing
+obj=${array[0]}
+log_note "file /$TESTPOOL/$TESTFS2/$TESTFILE0 has object number $obj"
+
+output=$(zdb -Zddddddbbbbbb $TESTPOOL/$TESTFS2 $obj 2> /dev/null \
+    |grep -m 1 "L0 DVA" |head -n1)
+dva=$(sed -Ene 's/^.+DVA\[0\]=<([^>]+)>.*$/\1/p' <<< "$output")
+log_note "block 0 of /$TESTPOOL/$TESTFS2/$TESTFILE0 has a DVA of $dva"
+
+zstd_str=$(sed -Ene 's/^.+ ZSTD:size=([^:]+):version=([^:]+):level=([^:]+):.*$/\1:\2:\3/p' <<< "$output")
+zstd_size2=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[1]}')
+(( $zstd_size2 != $zstd_size1 )) && log_fail \
+"ZFS recv failed: compressed size differs ($zstd_size2 != $zstd_size1)"
+zstd_version2=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[2]}')
+zstd_level2=$(echo "$zstd_str" |awk '{split($0,array,":")} END{print array[3]}')
+log_note "ZSTD dest: size=$zstd_size2 version=$zstd_version2 level=$zstd_level2"
+(( $zstd_level2 != $zstd_level1 )) && log_fail \
+"ZFS recv failed: compression level did not match header level ($zstd_level2 != $zstd_level1)"
+
+log_pass "ZFS can receive a ZSTD stream and determine the compression level"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_new_props.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_new_props.ksh
new file mode 100755
index 0000000..54f1335
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_new_props.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS receive test to handle Issue #10698
+#
+# STRATEGY:
+# 1. Create a pool with filesystem_limits disabled
+# 2. Create a filesystem on that pool
+# 3. Enable filesystem limits on that pool
+# 4. On a pool with filesystem limits enabled, create a filesystem and set a
+#    limit
+# 5. Snapshot limited filesystem
+# 6. send -R limited filesystem and receive over filesystem with limits disabled
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_pool "$poolname"
+	destroy_pool "$rpoolname"
+	log_must rm -f "$vdevfile"
+	log_must rm -f "$rvdevfile"
+	log_must rm -f "$streamfile"
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should handle receiving streams with filesystem limits on \
+	pools where the feature was recently enabled"
+
+poolname=sendpool
+rpoolname=recvpool
+vdevfile="$TEST_BASE_DIR/vdevfile.$$"
+rvdevfile="$TEST_BASE_DIR/rvdevfile.$$"
+sendfs="$poolname/fs"
+recvfs="$rpoolname/rfs"
+streamfile="$TEST_BASE_DIR/streamfile.$$"
+
+log_must truncate -s $MINVDEVSIZE "$rvdevfile"
+log_must truncate -s $MINVDEVSIZE "$vdevfile"
+log_must zpool create -O mountpoint=none -o feature@filesystem_limits=disabled \
+	 "$rpoolname" "$rvdevfile"
+log_must zpool create -O mountpoint=none "$poolname" "$vdevfile"
+
+log_must zfs create "$recvfs"
+log_must zpool set feature@filesystem_limits=enabled "$rpoolname"
+
+log_must zfs create -o filesystem_limit=100 "$sendfs"
+log_must zfs snapshot "$sendfs@a"
+
+log_must zfs send -R "$sendfs@a" >"$streamfile"
+log_must eval "zfs recv -svuF $recvfs <$streamfile"
+
+log_pass "ZFS can handle receiving streams with filesystem limits on \
+	pools where the feature was recently enabled"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh
index 9740caf..32b05e5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh

@@ -44,10 +44,10 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_-d.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_-d.ksh
new file mode 100755
index 0000000..662f938
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_-d.ksh

@@ -0,0 +1,62 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# zfs receive -d should create the expected encryption hierarchy.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset and a inheriting child
+# 2. Snapshot the child dataset
+# 2. Create a recursive raw send file from the snapshot
+# 3. Destroy the original child filesystem
+# 4. Receive the snapshot as a child of the second dataset with '-d'
+# 5. Verify the new child can be mounted
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists $TESTPOOL/$TESTFS1 && \
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
+	rm -f $sendfile
+}
+
+log_onexit cleanup
+
+log_assert "zfs receive -d should create the expected encryption hierarchy"
+
+typeset passphrase="password1"
+
+sendfile=$TEST_BASE_DIR/sendfile.$$
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+	"-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+log_must zfs create $TESTPOOL/$TESTFS1/child
+log_must zfs snapshot $TESTPOOL/$TESTFS1/child@snap
+log_must eval "zfs send -Rw $TESTPOOL/$TESTFS1/child@snap > $sendfile"
+log_must zfs destroy -r $TESTPOOL/$TESTFS1/child
+log_must zfs receive -Fd $TESTPOOL < $sendfile
+log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/child"
+
+log_pass "zfs receive -d creates the expected encryption hierarchy"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh
index c52a12e..7826ec9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh

@@ -43,10 +43,10 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 
 	[[ -f $ibackup ]] && log_must rm -f $ibackup
 	[[ -f $ibackup_trunc ]] && log_must rm -f $ibackup_trunc
@@ -77,7 +77,7 @@
 log_must eval "echo $passphrase2 | zfs change-key $TESTPOOL/$TESTFS1"
 log_must eval "zfs send -w -i $snap1 $snap2 > $ibackup"
 
-typeset trunc_size=$(stat -c %s $ibackup)
+typeset trunc_size=$(stat_size $ibackup)
 trunc_size=$(expr $trunc_size - 64)
 log_must cp $ibackup $ibackup_trunc
 log_must truncate -s $trunc_size $ibackup_trunc

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh
index f8e53f0..7e12d30 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh

@@ -25,23 +25,30 @@
 # ZFS should receive to an encrypted child dataset.
 #
 # STRATEGY:
-# 1. Snapshot the default dataset
-# 2. Create an encrypted dataset
-# 3. Attempt to receive a stream to an encrypted child
-# 4. Attempt to receive a stream with properties to an encrypted child
-# 5. Attempt to receive a replication stream to an encrypted child
-# 6. Unmount and unload the encrypted dataset keys
-# 7. Attempt to receive a snapshot stream to an encrypted child
+#  1. Snapshot the default dataset
+#  2. Create an encrypted dataset
+#  3. Attempt to receive a stream to an encrypted child
+#  4. Unload the key
+#  5. Attempt to receive an incremental stream to an encrypted child (must fail)
+#  6. Attempt to receive a stream with properties to an unencrypted child
+#  7. Attempt to receive an incremental stream to an unencrypted child
+#  8. Attempt to receive with -o encryption=off to an unencrypted child
+#  9. Attempt to receive a replication stream to an unencrypted child
+# 10. Attempt to receive a snapshot stream to an encrypted child (must fail)
 #
 
 verify_runnable "both"
 
 function cleanup
 {
-	snapexists $snap && log_must_busy zfs destroy -f $snap
+	datasetexists $TESTPOOL/encrypted && \
+		destroy_dataset $TESTPOOL/encrypted -r
+
+	snapexists $snap && destroy_dataset $snap -f
+	snapexists $snap2 && destroy_dataset $snap2 -f
 
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 
 log_onexit cleanup
@@ -50,28 +57,58 @@
 
 typeset passphrase="password"
 typeset snap="$TESTPOOL/$TESTFS@snap"
+typeset snap2="$TESTPOOL/$TESTFS@snap2"
 typeset testfile="testfile"
 
 log_must zfs snapshot $snap
+log_must zfs snapshot $snap2
 
 log_must eval "echo $passphrase | zfs create -o encryption=on" \
 	"-o keyformat=passphrase $TESTPOOL/$TESTFS1"
 
 log_note "Verifying ZFS will receive to an encrypted child"
-log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1"
+log_must eval "zfs send $snap | zfs receive -u $TESTPOOL/$TESTFS1/c1"
+log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c1)" != "off"
 
-log_note "Verifying 'send -p' will receive to an encrypted child"
-log_must eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2"
+# Unload the key, the following tests won't require it and we will test
+# the receive checks as well.
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_note "Verifying ZFS will not receive an incremental into an encrypted" \
+	 "dataset when the key is unloaded"
+log_mustnot eval "zfs send -i $snap $snap2 | zfs receive $TESTPOOL/$TESTFS1/c1"
+
+log_note "Verifying 'send -p' will receive to an unencrypted child"
+log_must eval "zfs send -p $snap | zfs receive -u $TESTPOOL/$TESTFS1/c2"
 log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c2)" == "off"
 
-log_note "Verifying 'send -R' will receive to an encrypted child"
+log_note "Verifying 'send -i' will receive to an unencrypted child"
+log_must eval "zfs send -i $snap $snap2 | zfs receive $TESTPOOL/$TESTFS1/c2"
+
+# For completeness add the property override case.
+log_note "Verifying recv -o encyption=off' will receive to an unencrypted child"
+log_must eval "zfs send $snap | \
+	zfs receive -o encryption=off $TESTPOOL/$TESTFS1/c2o"
+log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c2o)" == "off"
+
+log_note "Verifying 'send -R' will receive to an unencrypted child"
 log_must eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3"
 log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c3)" == "off"
 
 log_note "Verifying ZFS will not receive to an encrypted child when the" \
 	"parent key is unloaded"
-log_must zfs unmount $TESTPOOL/$TESTFS1
-log_must zfs unload-key $TESTPOOL/$TESTFS1
 log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4"
 
+# Verify that replication can override encryption properties
+log_note "Verifying replication can override encryption properties for plain dataset"
+typeset key_location="/$TESTPOOL/pkey1"
+log_must eval "echo $passphrase > $key_location"
+log_must eval "zfs send -R $snap2 | zfs recv -s -F -o encryption=on" \
+	"-o keyformat=passphrase -o keylocation=file://$key_location" \
+	"-o mountpoint=none $TESTPOOL/encrypted"
+log_must test "$(get_prop 'encryption' $TESTPOOL/encrypted)" != "off"
+log_must test "$(get_prop 'keyformat' $TESTPOOL/encrypted)" == "passphrase"
+log_must test "$(get_prop 'keylocation' $TESTPOOL/encrypted)" == "file://$key_location"
+
 log_pass "ZFS can receive encrypted filesystems into child dataset"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zstd_test_data.txt b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zstd_test_data.txt
new file mode 100644
index 0000000..da6a0c7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zstd_test_data.txt

@@ -0,0 +1 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim..

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile.am
deleted file mode 100644
index 91abff6..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile.am
+++ /dev/null

@@ -1,7 +0,0 @@
-pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_remap
-
-dist_pkgdata_SCRIPTS = \
-	setup.ksh \
-	cleanup.ksh \
-	zfs_remap_cliargs.ksh \
-	zfs_remap_obsolete_counts.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh
deleted file mode 100755
index e78deac..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh
+++ /dev/null

@@ -1,19 +0,0 @@
-#!/bin/ksh -p
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-
-default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh
deleted file mode 100755
index 4497dbd..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh
+++ /dev/null

@@ -1,17 +0,0 @@
-#!/bin/ksh -p
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh
deleted file mode 100755
index 80a5e6e..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh
+++ /dev/null

@@ -1,81 +0,0 @@
-#!/bin/ksh -p
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/removal/removal.kshlib
-
-#
-# DESCRIPTION:
-# 'zfs remap' should only work with supported parameters.
-#
-# STRATEGY:
-# 1. Prepare a pool where a top-level VDEV has been removed
-# 2. Verify every supported parameter to 'zfs remap' is accepted
-# 3. Verify other unsupported parameters raise an error
-#
-
-# The 'zfs remap' command has been disabled and may be removed.
-export ZFS_REMAP_ENABLED=YES
-
-verify_runnable "both"
-
-function cleanup
-{
-	destroy_pool $TESTPOOL
-	rm -f $DISK1 $DISK2
-}
-
-log_assert "'zfs remap' should only work with supported parameters"
-log_onexit cleanup
-
-f="$TESTPOOL/fs"
-v="$TESTPOOL/vol"
-s="$TESTPOOL/fs@snap"
-b="$TESTPOOL/fs#bmark"
-c="$TESTPOOL/clone"
-
-typeset goodparams=("$f" "$v" "$c")
-typeset badparams=("-H" "-p" "-?" "$s" "$b" "$f $f" "$f $v" "$f $s")
-
-DISK1="$TEST_BASE_DIR/zfs_remap-1"
-DISK2="$TEST_BASE_DIR/zfs_remap-2"
-
-# 1. Prepare a pool where a top-level VDEV has been removed
-log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK1
-log_must zpool create $TESTPOOL $DISK1
-log_must zfs create $f
-log_must zfs create -V 1M -s $v
-log_must zfs snap $s
-log_must zfs bookmark $s $b
-log_must zfs clone $s $c
-log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK2
-log_must zpool add $TESTPOOL $DISK2
-log_must zpool remove $TESTPOOL $DISK1
-log_must wait_for_removal $TESTPOOL
-
-# 2. Verify every supported parameter to 'zfs remap' is accepted
-for param in "${goodparams[@]}"
-do
-	log_must zfs remap $param
-done
-
-# 3. Verify other unsupported parameters raise an error
-for param in "${badparams[@]}"
-do
-	log_mustnot zfs remap $param
-done
-
-log_pass "'zfs remap' only works with supported parameters"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh
deleted file mode 100755
index 1f0e0e8..0000000
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh
+++ /dev/null

@@ -1,79 +0,0 @@
-#!/bin/ksh -p
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/removal/removal.kshlib
-
-#
-# DESCRIPTION:
-# 'zfs remap' depends on 'feature@obsolete_counts' being active
-#
-# STRATEGY:
-# 1. Prepare a pool where a top-level VDEV has been removed and with
-#    feature@obsolete_counts disabled
-# 2. Verify any 'zfs remap' command cannot be executed
-# 3. Verify the same commands complete successfully when
-#    feature@obsolete_counts is enabled
-#
-
-# N.B. The 'zfs remap' command has been disabled and may be removed.
-export ZFS_REMAP_ENABLED=YES
-
-verify_runnable "both"
-
-function cleanup
-{
-	destroy_pool $TESTPOOL
-	rm -f $DISK1 $DISK2
-}
-
-log_assert "'zfs remap' depends on feature@obsolete_counts being active"
-log_onexit cleanup
-
-f="$TESTPOOL/fs"
-v="$TESTPOOL/vol"
-s="$TESTPOOL/fs@snap"
-c="$TESTPOOL/clone"
-
-DISK1="$TEST_BASE_DIR/zfs_remap-1"
-DISK2="$TEST_BASE_DIR/zfs_remap-2"
-
-# 1. Prepare a pool where a top-level VDEV has been removed with
-#    feature@obsolete_counts disabled
-log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK1
-log_must zpool create -o feature@obsolete_counts=disabled $TESTPOOL $DISK1
-log_must zfs create $f
-log_must zfs create -V 1M -s $v
-log_must zfs snap $s
-log_must zfs clone $s $c
-log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK2
-log_must zpool add $TESTPOOL $DISK2
-log_must zpool remove $TESTPOOL $DISK1
-log_must wait_for_removal $TESTPOOL
-
-# 2. Verify any 'zfs remap' command cannot be executed
-log_mustnot zfs remap $f
-log_mustnot zfs remap $v
-log_mustnot zfs remap $c
-
-# 3. Verify the same commands complete successfully when
-#    feature@obsolete_counts is enabled
-log_must zpool set feature@obsolete_counts=enabled $TESTPOOL
-log_must zfs remap $f
-log_must zfs remap $v
-log_must zfs remap $c
-
-log_pass "'zfs remap' correctly depends on feature@obsolete_counts being active"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am
index 406e278..f8273d7 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am

@@ -18,7 +18,8 @@
 	zfs_rename_014_neg.ksh \
 	zfs_rename_encrypted_child.ksh \
 	zfs_rename_to_encrypted.ksh \
-	zfs_rename_mountpoint.ksh
+	zfs_rename_mountpoint.ksh \
+	zfs_rename_nounmount.ksh
 
 dist_pkgdata_DATA = \
 	zfs_rename.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.kshlib
index 9b8fb6b..af1c2f7 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.kshlib

@@ -108,13 +108,11 @@
                 ((i = i + 1))
 	done
 
-	if snapexists $TESTPOOL/$TESTFS@snapshot; then
-		log_must zfs destroy -fR $TESTPOOL/$TESTFS@snapshot
-	fi
+	snapexists $TESTPOOL/$TESTFS@snapshot && \
+		 destroy_dataset $TESTPOOL/$TESTFS@snapshot -fR
 
-	if datasetexists $TESTPOOL/$RECVFS; then
-		log_must zfs destroy -r $TESTPOOL/$RECVFS
-	fi
+	datasetexists $TESTPOOL/$RECVFS && \
+		destroy_dataset $TESTPOOL/$RECVFS -r
 }
 
 function cmp_data #<$1 src data, $2 tgt data>

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_003_pos.ksh
index 56c06cf..0bd4aca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_003_pos.ksh

@@ -44,7 +44,7 @@
 
 function cleanup
 {
-	datasetexists $snap && log_must zfs destroy $snap
+	datasetexists $snap && destroy_dataset $snap
 }
 
 log_assert "'zfs rename' can address the abbreviated snapshot name."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_006_pos.ksh
index 3ad7d4e..4d16051 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_006_pos.ksh

@@ -69,7 +69,7 @@
 
 clone=$TESTPOOL/${snap}_clone
 create_clone $vol@$snap $clone
-block_device_wait
+block_device_wait $VOLDATA
 
 #verify data integrity
 for input in $VOL_R_PATH $ZVOL_RDEVDIR/$clone; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_007_pos.ksh
index 3623d2b..2a3f8a8 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_007_pos.ksh

@@ -46,9 +46,8 @@
 
 function cleanup
 {
-	if datasetexists $TESTPOOL/$TESTFS ; then
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS
-	fi
+	datasetexists $TESTPOOL/$TESTFS && \
+		destroy_dataset $TESTPOOL/$TESTFS -Rf
 	log_must zfs create $TESTPOOL/$TESTFS
 	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 
@@ -117,26 +116,26 @@
 if is_global_zone; then
 	vol=$TESTPOOL/$TESTFS/vol.$$ ;	volclone=$TESTPOOL/$TESTFS/volclone.$$
 	log_must zfs create -V 100M $vol
-	block_device_wait
 
 	obj=$(target_obj $vol)
+	block_device_wait $obj
 	log_must dd if=$SRC_FILE of=$obj bs=$BS count=$CNT
 
 	snap=${vol}@snap.$$
 	log_must zfs snapshot $snap
 	log_must zfs clone $snap $volclone
-	block_device_wait
 
 	# Rename dataset & clone
 	log_must zfs rename $vol ${vol}-new
 	log_must zfs rename $volclone ${volclone}-new
-	block_device_wait
 
 	# Compare source file and target file
 	obj=$(target_obj ${vol}-new)
+	block_device_wait $obj
 	log_must dd if=$obj of=$DST_FILE bs=$BS count=$CNT
 	log_must diff $SRC_FILE $DST_FILE
 	obj=$(target_obj ${volclone}-new)
+	block_device_wait $obj
 	log_must dd if=$obj of=$DST_FILE bs=$BS count=$CNT
 	log_must diff $SRC_FILE $DST_FILE
 
@@ -144,10 +143,10 @@
 	log_must zfs rename ${vol}-new $vol
 	log_must zfs rename $snap ${snap}-new
 	log_must zfs clone ${snap}-new $volclone
-	block_device_wait
 
 	# Compare source file and target file
 	obj=$(target_obj $volclone)
+	block_device_wait $obj
 	log_must dd if=$obj of=$DST_FILE bs=$BS count=$CNT
 	log_must diff $SRC_FILE $DST_FILE
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_008_pos.ksh
index 3fc099d..2291638 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_008_pos.ksh

@@ -47,12 +47,11 @@
 {
 	typeset -i i=0
 	while ((i < ${#datasets[@]})); do
-		if datasetexists ${datasets[$i]}@snap ; then
-			log_must zfs destroy ${datasets[$i]}@snap
-		fi
-		if datasetexists ${datasets[$i]}@snap-new ; then
-			log_must zfs destroy ${datasets[$i]}@snap-new
-		fi
+		datasetexists ${datasets[$i]}@snap && \
+			destroy_dataset ${datasets[$i]}@snap
+
+		datasetexists ${datasets[$i]}@snap-new && \
+			destroy_dataset ${datasets[$i]}@snap-new
 
 		((i += 1))
 	done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_011_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_011_pos.ksh
index 2d1220e..71d7261 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_011_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_011_pos.ksh

@@ -46,19 +46,18 @@
 
 function additional_cleanup
 {
-	if datasetexists $TESTPOOL/notexist ; then
-		log_must zfs destroy -Rf $TESTPOOL/notexist
-	fi
+	datasetexists $TESTPOOL/notexist && \
+		destroy_dataset $TESTPOOL/notexist -Rf
 
-	if datasetexists $TESTPOOL/$TESTFS ; then
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS
-	fi
+	datasetexists $TESTPOOL/$TESTFS && \
+		destroy_dataset $TESTPOOL/$TESTFS -Rf
+
 	log_must zfs create $TESTPOOL/$TESTFS
 
 	if is_global_zone ; then
-		if datasetexists $TESTPOOL/$TESTVOL ; then
-			log_must zfs destroy -Rf $TESTPOOL/$TESTVOL
-		fi
+		datasetexists $TESTPOOL/$TESTVOL && \
+			destroy_dataset $TESTPOOL/$TESTVOL -Rf
+
 		log_must zfs create -V $VOLSIZE $TESTPOOL/$TESTVOL
 	fi
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_013_pos.ksh
index b2e0100..73790f5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_013_pos.ksh

@@ -46,21 +46,17 @@
 
 function cleanup
 {
-	if datasetexists $TESTPOOL/$TESTCTR@snap-new ; then
-		log_must zfs destroy -f $TESTPOOL/$TESTCTR@snap-new
-	fi
+	datasetexists $TESTPOOL/$TESTCTR@snap-new && \
+		destroy_dataset $TESTPOOL/$TESTCTR@snap-new -f
 
-	if datasetexists $TESTPOOL/$TESTCTR@snap ; then
-		log_must zfs destroy -f $TESTPOOL/$TESTCTR@snap
-	fi
+	datasetexists $TESTPOOL/$TESTCTR@snap && \
+		destroy_dataset $TESTPOOL/$TESTCTR@snap -f
 
-	if datasetexists $TESTPOOL@snap-new ; then
-		log_must zfs destroy -f $TESTPOOL@snap-new
-	fi
+	datasetexists $TESTPOOL@snap-new && \
+		destroy_dataset $TESTPOOL@snap-new -f
 
-	if datasetexists $TESTPOOL@snap ; then
-		log_must zfs destroy -f $TESTPOOL@snap
-	fi
+	datasetexists $TESTPOOL@snap && \
+		destroy_dataset $TESTPOOL@snap -f
 }
 
 log_assert "zfs rename -r can rename snapshot when child datasets" \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_014_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_014_neg.ksh
index 7d99e9f..1c96260 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_014_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_014_neg.ksh

@@ -81,7 +81,7 @@
 	# before resetting it, it will be left at the modified
 	# value for the remaining tests. That's the reason
 	# we reset it again here just in case.
-	log_must set_tunable_impl zfs_max_dataset_nesting 50 Z zcommon
+	log_must set_tunable_impl MAX_DATASET_NESTING 50 Z zcommon
 }
 
 log_onexit nesting_cleanup
@@ -93,13 +93,13 @@
 log_mustnot zfs rename $TESTPOOL/$dsA02 $TESTPOOL/$dsB15A
 
 # extend limit
-log_must set_tunable_impl zfs_max_dataset_nesting 64 Z zcommon
+log_must set_tunable_impl MAX_DATASET_NESTING 64 Z zcommon
 
 log_mustnot zfs rename $TESTPOOL/$dsA02 $TESTPOOL/$dsB16A
 log_must zfs rename $TESTPOOL/$dsA02 $TESTPOOL/$dsB15A
 
 # bring back old limit
-log_must set_tunable_impl zfs_max_dataset_nesting 50 Z zcommon
+log_must set_tunable_impl MAX_DATASET_NESTING 50 Z zcommon
 
 log_mustnot zfs rename $TESTPOOL/$dsC01 $TESTPOOL/$dsB15A47C
 log_must zfs rename $TESTPOOL/$dsB15A47A $TESTPOOL/$dsB15A47B

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh
index fa57658..2366cf6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh

@@ -42,9 +42,9 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 	datasetexists $TESTPOOL/$TESTFS3 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS3
+		destroy_dataset $TESTPOOL/$TESTFS3 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_mountpoint.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_mountpoint.ksh
index 4d2b94d..7ec6b2a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_mountpoint.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_mountpoint.ksh

@@ -34,8 +34,8 @@
 
 function rename_cleanup
 {
-	log_note zfs destroy -fR $TESTPOOL/rename_test
-	log_note zfs destroy -fR $TESTPOOL/renamed
+	zfs destroy -fR $TESTPOOL/rename_test
+	zfs destroy -fR $TESTPOOL/renamed
 }
 
 log_onexit rename_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_nounmount.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_nounmount.ksh
new file mode 100755
index 0000000..1c70776
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_nounmount.ksh

@@ -0,0 +1,93 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy is of the CDDL is also available via the Internet
+# at http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	zfs rename -u should rename datasets without unmounting them
+#
+# STRATEGY:
+#	1. Create a set of nested datasets.
+#	2. Verify datasets are mounted.
+#	3. Rename with -u and verify all datasets stayed mounted.
+#
+
+verify_runnable "both"
+
+function rename_cleanup
+{
+	cd $back
+	zfs destroy -fR $TESTPOOL/rename_test
+	zfs destroy -fR $TESTPOOL/renamed
+}
+
+back=$(pwd)
+log_onexit rename_cleanup
+
+log_must zfs create $TESTPOOL/rename_test
+log_must zfs create $TESTPOOL/rename_test/child
+log_must zfs create $TESTPOOL/rename_test/child/grandchild
+
+if ! ismounted $TESTPOOL/rename_test; then
+	log_fail "$TESTPOOL/rename_test is not mounted"
+fi
+if ! ismounted $TESTPOOL/rename_test/child; then
+	log_fail "$TESTPOOL/rename_test/child is not mounted"
+fi
+if ! ismounted $TESTPOOL/rename_test/child/grandchild; then
+	log_fail "$TESTPOOL/rename_test/child/grandchild is not mounted"
+fi
+
+mntp_p=$(get_prop mountpoint $TESTPOOL/rename_test)
+mntp_c=$(get_prop mountpoint $TESTPOOL/rename_test/child)
+mntp_g=$(get_prop mountpoint $TESTPOOL/rename_test/child/grandchild)
+
+log_must cd $mntp_g
+log_mustnot zfs rename $TESTPOOL/rename_test $TESTPOOL/renamed
+log_must zfs rename -u $TESTPOOL/rename_test $TESTPOOL/renamed
+
+log_mustnot zfs list $TESTPOOL/rename_test
+log_mustnot zfs list $TESTPOOL/rename_test/child
+log_mustnot zfs list $TESTPOOL/rename_test/child/grandchild
+
+log_must zfs list $TESTPOOL/renamed
+log_must zfs list $TESTPOOL/renamed/child
+log_must zfs list $TESTPOOL/renamed/child/grandchild
+
+missing=$(zfs mount | awk -v pat=$TESTPOOL/renamed '$1 ~ pat' | awk \
+    -v mntp_p=$mntp_p \
+    -v mntp_c=$mntp_c \
+    -v mntp_g=$mntp_g '
+    BEGIN { p = c = g = 0 }
+    $2 == mntp_p { p = 1 }
+    $2 == mntp_c { c = 1 }
+    $2 == mntp_g { g = 1 }
+    END {
+	if (p != 1)
+		print mntp_p
+	if (c != 1)
+		print mntp_c
+	if (g != 1)
+		print mntp_g
+    }')
+[[ -z "$missing" ]] || log_fail "Mountpoints no longer mounted: $missing"
+
+log_pass "Verified rename -u does not unmount datasets"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh
index 1b9c6e3..ab8e1c8 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh

@@ -37,7 +37,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_001_pos.ksh
index 5511f6a..607bbf0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_001_pos.ksh

@@ -76,13 +76,14 @@
 		pkill -x dd
 	fi
 
-	datasetexists $FS && log_must zfs destroy -Rf $FS
+	datasetexists $FS && destroy_dataset $FS -Rf
 	if datasetexists $VOL; then
 		if ismounted $TESTDIR1 $NEWFS_DEFAULT_FS; then
 			log_must umount -f $TESTDIR1
+			sleep 0.1
 		fi
 
-		log_must zfs destroy -Rf $VOL
+		destroy_dataset $VOL -Rf
 	fi
 
 	# Create specified test environment

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_003_neg.ksh
index 0ae13d3..1e31091 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_003_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_003_neg.ksh

@@ -51,9 +51,7 @@
 {
 	pkill ${DD##*/}
 	for snap in $FSSNAP0 $FSSNAP1 $FSSNAP2; do
-		if snapexists $snap; then
-			log_must zfs destroy -Rf $snap
-		fi
+		snapexists $snap && destroy_dataset $snap -Rf
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_004_neg.ksh
index 0c1bb73..9537d50 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_004_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_004_neg.ksh

@@ -51,9 +51,8 @@
 	typeset ds
 
 	for ds in $TESTPOOL $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL; do
-		if snapexists ${ds}@$TESTSNAP; then
-			log_must zfs destroy ${ds}@$TESTSNAP
-		fi
+		snapexists ${ds}@$TESTSNAP && \
+			destroy_dataset ${ds}@$TESTSNAP
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib
index f69ec30..433f240 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_rollback/zfs_rollback_common.kshlib

@@ -76,16 +76,14 @@
 		# mount it. Otherwise, only check if this ufs|ext file system
 		# was mounted.
 		#
-		log_must eval "echo "y" | \
-			newfs -v $ZVOL_DEVDIR/$VOL > /dev/null 2>&1"
+		log_must new_fs $ZVOL_DEVDIR/$VOL
 
 		[[ ! -d $TESTDIR1 ]] && log_must mkdir $TESTDIR1
 
 		# Make sure the ufs|ext filesystem hasn't been mounted,
 		# then mount the new ufs|ext filesystem.
 		if ! ismounted $TESTDIR1 $NEWFS_DEFAULT_FS; then
-			log_must mount \
-				$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL $TESTDIR1
+			log_must mount $ZVOL_DEVDIR/$VOL $TESTDIR1
 		fi
 	fi
 
@@ -117,7 +115,7 @@
 
 			if datasetnonexists $snap; then
 				log_must cp /etc/passwd $fname
-				if is_linux; then
+				if is_linux || is_freebsd; then
 					log_must sync
 				else
 					#
@@ -128,7 +126,21 @@
 						log_must lockfs -f $TESTDIR1
 					fi
 				fi
+				if is_freebsd && [[ $dtst == $VOL ]]; then
+					# Though sync does start a fs sync on
+					# FreeBSD, it does not wait for it to
+					# finish.  We can force a blocking sync
+					# by updating the fs mount instead.
+					# Otherwise, the snapshot might occur
+					# with the fs in an unmountable state.
+					log_must mount -ur \
+					    $ZVOL_DEVDIR/$VOL $TESTDIR1
+				fi
 				log_must zfs snapshot $snap
+				if is_freebsd && [[ $dtst == $VOL ]]; then
+					log_must mount -uw \
+					    $ZVOL_DEVDIR/$VOL $TESTDIR1
+				fi
 			fi
 			if [[ $createclone == "true" ]]; then
 				if datasetnonexists $clone; then
@@ -169,9 +181,7 @@
 
 	for dtst in $FS $VOL; do
 		for snap in $TESTSNAP $TESTSNAP1 $TESTSNAP2; do
-			if snapexists $dtst@$snap; then
-				 log_must zfs destroy -Rf $dtst@$snap
-			fi
+			snapexists $dtst@$snap && destroy_dataset $dtst@$snap -Rf
 		done
 	done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am
index 2a476f3..25c7065 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am

@@ -10,9 +10,11 @@
 	zfs_send_006_pos.ksh \
 	zfs_send_007_pos.ksh \
 	zfs_send_encrypted.ksh \
+	zfs_send_encrypted_unloaded.ksh \
 	zfs_send_raw.ksh \
 	zfs_send_sparse.ksh \
-	zfs_send-b.ksh
+	zfs_send-b.ksh \
+	zfs_send_skip_missing.ksh
 
 dist_pkgdata_DATA = \
 	zfs_send.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send-b.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send-b.ksh
index cd87984..f019c22 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send-b.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send-b.ksh

@@ -35,7 +35,7 @@
 function cleanup
 {
 	for ds in "$SENDFS" "$BACKUP" "$RESTORE"; do
-		datasetexists $ds && log_must zfs destroy -r $ds
+		datasetexists $ds && destroy_dataset $ds -r
 	done
 }
 
@@ -52,20 +52,19 @@
 log_must zfs bookmark "$SENDFS@s1" "$SENDFS#bm"
 log_must zfs snapshot "$SENDFS@s2"
 log_must zfs set "compression=gzip" $SENDFS
-log_must zfs set "org.zfsonlinux:prop=val" $SENDFS
-log_must zfs set "org.zfsonlinux:snapprop=val" "$SENDFS@s1"
+log_must zfs set "org.openzfs:prop=val" $SENDFS
+log_must zfs set "org.openzfs:snapprop=val" "$SENDFS@s1"
 
 # 2. Verify command line options interact with '-b' correctly
 typeset opts=("" "p" "Rp" "cew" "nv" "D" "DLPRcenpvw")
 for opt in ${opts[@]}; do
-	log_must eval "zfs send -b$opt $SENDFS@s1 > /dev/null"
-	log_must eval "zfs send -b$opt -i $SENDFS@s1 $SENDFS@s2 > /dev/null"
-	log_must eval "zfs send -b$opt -I $SENDFS@s1 $SENDFS@s2 > /dev/null"
+	log_must eval "zfs send -b$opt $SENDFS@s1 >$TEST_BASE_DIR/devnull"
+	log_must eval "zfs send -b$opt -i $SENDFS@s1 $SENDFS@s2 >$TEST_BASE_DIR/devnull"
+	log_must eval "zfs send -b$opt -I $SENDFS@s1 $SENDFS@s2 >$TEST_BASE_DIR/devnull"
 done
 for opt in ${opts[@]}; do
-	log_mustnot eval "zfs send -b$opt $SENDFS > /dev/null"
-	log_mustnot eval "zfs send -b$opt $SENDFS#bm > /dev/null"
-	log_mustnot eval "zfs send -b$opt -i $SENDFS#bm $SENDFS@s2 > /dev/null"
+	log_mustnot eval "zfs send -b$opt $SENDFS >$TEST_BASE_DIR/devnull"
+	log_mustnot eval "zfs send -b$opt $SENDFS#bm >$TEST_BASE_DIR/devnull"
 done
 
 # Do 3..6 in a loop to verify various combination of "zfs send" options
@@ -79,21 +78,21 @@
 	# NOTE: override "received" values and set some new properties as well
 	log_must zfs set "compression=lz4" $BACKUP
 	log_must zfs set "exec=off" $BACKUP
-	log_must zfs set "org.zfsonlinux:prop=newval" $BACKUP
-	log_must zfs set "org.zfsonlinux:newprop=newval" $BACKUP
-	log_must zfs set "org.zfsonlinux:snapprop=newval" "$BACKUP@s1"
-	log_must zfs set "org.zfsonlinux:newsnapprop=newval" "$BACKUP@s1"
+	log_must zfs set "org.openzfs:prop=newval" $BACKUP
+	log_must zfs set "org.openzfs:newprop=newval" $BACKUP
+	log_must zfs set "org.openzfs:snapprop=newval" "$BACKUP@s1"
+	log_must zfs set "org.openzfs:newsnapprop=newval" "$BACKUP@s1"
 
 	# 5. Restore the "backup" dataset to a new destination
 	log_must eval "zfs send -b$opt $BACKUP@s1 | zfs recv $RESTORE"
 
 	# 6. Verify only original (received) properties are sent from "backup"
 	log_must eval "check_prop_source $RESTORE compression gzip received"
-	log_must eval "check_prop_source $RESTORE org.zfsonlinux:prop val received"
-	log_must eval "check_prop_source $RESTORE@s1 org.zfsonlinux:snapprop val received"
+	log_must eval "check_prop_source $RESTORE org.openzfs:prop val received"
+	log_must eval "check_prop_source $RESTORE@s1 org.openzfs:snapprop val received"
 	log_must eval "check_prop_source $RESTORE exec on default"
-	log_must eval "check_prop_missing $RESTORE org.zfsonlinux:newprop"
-	log_must eval "check_prop_missing $RESTORE@s1 org.zfsonlinux:newsnapprop"
+	log_must eval "check_prop_missing $RESTORE org.openzfs:newprop"
+	log_must eval "check_prop_missing $RESTORE@s1 org.openzfs:newsnapprop"
 
 	# cleanup
 	log_must zfs destroy -r $BACKUP

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh
index 2c6e3fd..b184330 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_001_pos.ksh

@@ -50,12 +50,10 @@
 function cleanup
 {
 	for snap in $init_snap $inc_snap $rst_snap $rst_inc_snap; do
-                snapexists $snap && \
-                        log_must zfs destroy -f $snap
+                snapexists $snap && destroy_dataset $snap -f
         done
 
-	datasetexists $rst_root && \
-		log_must zfs destroy -Rf $rst_root
+	datasetexists $rst_root && destroy_dataset $rst_root -Rf
 
 	for file in $full_bkup $inc_bkup \
 			$init_data $inc_data

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_002_pos.ksh
index 6359bb4..42bdddd 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_002_pos.ksh

@@ -48,11 +48,8 @@
 
 function cleanup
 {
-	snapexists $snap && \
-		log_must zfs destroy $snap
-
-	datasetexists $ctr && \
-		log_must zfs destroy -r $ctr
+	snapexists $snap && destroy_dataset $snap
+	datasetexists $ctr && destroy_dataset $ctr -r
 
 	[[ -e $origfile ]] && \
 		log_must rm -f $origfile

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_003_pos.ksh
index 825a10d..caa8488 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_003_pos.ksh

@@ -44,8 +44,8 @@
 
 function cleanup
 {
-	datasetexists $snap1 && log_must zfs destroy $snap1
-	datasetexists $snap2 && log_must zfs destroy $snap2
+	datasetexists $snap1 && destroy_dataset $snap1
+	datasetexists $snap2 && destroy_dataset $snap2
 }
 
 log_assert "'zfs send -i' can deal with abbreviated snapshot name."
@@ -61,7 +61,7 @@
 
 typeset -i i=0
 while (( i < ${#args[*]} )); do
-	log_must eval "zfs send -i ${args[i]} > /dev/null"
+	log_must eval "zfs send -i ${args[i]} >$TEST_BASE_DIR/devnull"
 
 	(( i += 1 ))
 done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh
index 4a9d29f..af10e3a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_004_neg.ksh

@@ -48,8 +48,7 @@
 	typeset snap f
 
 	for snap in $snap1 $snap2 $snap3; do
-		snapexists $snap && \
-			log_must zfs destroy -f $snap
+		snapexists $snap && destroy_dataset $snap -f
 	done
 
 	for f in $tmpfile1 $tmpfile2; do
@@ -96,7 +95,7 @@
 typeset -i i=0
 while (( i < ${#badargs[*]} ))
 do
-	log_mustnot eval "zfs send ${badargs[i]} >/dev/null"
+	log_mustnot eval "zfs send ${badargs[i]} >$TEST_BASE_DIR/devnull"
 
 	(( i = i + 1 ))
 done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_005_pos.ksh
index 9f369e3..c4ab7a6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_005_pos.ksh

@@ -50,7 +50,7 @@
 	log_must zpool import $TESTPOOL
 
 	datasetexists $TESTPOOL@snap && \
-	    log_must zfs destroy -r $TESTPOOL@snap
+		destroy_dataset $TESTPOOL@snap -r
 }
 
 log_assert "'zfs send -R' can send from read-only pools"
@@ -61,6 +61,6 @@
 log_must zpool export $TESTPOOL
 log_must zpool import -o readonly=on $TESTPOOL
 
-log_must eval "zfs send -R $TESTPOOL@snap >/dev/null"
+log_must eval "zfs send -R $TESTPOOL@snap >$TEST_BASE_DIR/devnull"
 
 log_pass "'zfs send -R' can send from read-only pools"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh
index 652f7b7..3023ea4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos.ksh

@@ -15,7 +15,7 @@
 #
 
 #
-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -36,6 +36,7 @@
 
 function cleanup
 {
+	log_must set_tunable32 OVERRIDE_ESTIMATE_RECORDSIZE 8192
 	for ds in $datasets; do
                 destroy_dataset $ds "-rf"
 	done
@@ -90,6 +91,7 @@
 
 log_assert "Verify 'zfs send -nvP' generates valid stream estimates"
 log_onexit cleanup
+log_must set_tunable32 OVERRIDE_ESTIMATE_RECORDSIZE 0
 typeset -l block_count=0
 typeset -l block_size
 typeset -i PERCENT=1
@@ -117,33 +119,33 @@
 incremental_size=$(zfs send $incremental_snapshot 2>&1 | wc -c)
 incremental_send=$(zfs send -i $full_snapshot $incremental_snapshot 2>&1 | wc -c)
 
-log_note "verify zfs send -nv"
-options="-nv"
+log_note "verify zfs send -nvV"
+options="-nvV"
 refer_size=$(get_prop refer $full_snapshot)
 estimate_size=$(get_estimate_size $full_snapshot $options)
 log_must verify_size_estimates $options $full_size
 
-log_note "verify zfs send -Pnv"
-options="-Pnv"
+log_note "verify zfs send -PnvV"
+options="-PnvV"
 
 estimate_size=$(get_estimate_size $full_snapshot $options)
 log_must verify_size_estimates $options $full_size
 
-log_note "verify zfs send -nv for multiple snapshot send"
-options="-nv"
+log_note "verify zfs send -nvV for multiple snapshot send"
+options="-nvV"
 refer_size=$(get_prop refer $incremental_snapshot)
 
 estimate_size=$(get_estimate_size $incremental_snapshot $options)
 log_must verify_size_estimates $options $incremental_size
 
-log_note "verify zfs send -vPn for multiple snapshot send"
-options="-vPn"
+log_note "verify zfs send -vVPn for multiple snapshot send"
+options="-vVPn"
 
 estimate_size=$(get_estimate_size $incremental_snapshot $options)
 log_must verify_size_estimates $options $incremental_size
 
-log_note "verify zfs send -inv for incremental send"
-options="-nvi"
+log_note "verify zfs send -invV for incremental send"
+options="-nvVi"
 refer_size=$(get_prop refer $incremental_snapshot)
 deduct_size=$(get_prop refer $full_snapshot)
 refer_size=$(echo "$refer_size - $deduct_size" | bc)
@@ -153,8 +155,8 @@
 estimate_size=$(get_estimate_size $incremental_snapshot $options $full_bookmark)
 log_must verify_size_estimates $options $incremental_send
 
-log_note "verify zfs send -ivPn for incremental send"
-options="-vPni"
+log_note "verify zfs send -ivVPn for incremental send"
+options="-vVPni"
 
 estimate_size=$(get_estimate_size $incremental_snapshot $options $full_snapshot)
 log_must verify_size_estimates $options $incremental_send
@@ -184,16 +186,16 @@
         datasetexists $ds@snap64 || log_fail "Create $ds@snap64 snapshot fail."
 done
 recursive_size=$(zfs send -R $full_snapshot 2>&1 | wc -c)
-log_note "verify zfs send -Rnv for recursive send"
-options="-Rnv"
+log_note "verify zfs send -RnvV for recursive send"
+options="-RnvV"
 refer_size=$(get_prop refer $full_snapshot)
 refer_size=$(echo "$refer_size * 3" | bc)
 
 estimate_size=$(get_estimate_size $full_snapshot $options)
 log_must verify_size_estimates $options $recursive_size
 
-log_note "verify zfs send -RvPn for recursive send"
-options="-RvPn"
+log_note "verify zfs send -RvVPn for recursive send"
+options="-RvVPn"
 estimate_size=$(get_estimate_size $full_snapshot $options)
 log_must verify_size_estimates $options $recursive_size
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh
index 5fdb125..da0aebe 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh

@@ -89,7 +89,7 @@
 }
 
 test_pool $TESTPOOL
-log_must truncate --size=1G $vdev
+log_must truncate -s 1G $vdev
 log_must zpool create -o version=1 tmp_pool $vdev
 test_pool tmp_pool
 log_must zpool destroy tmp_pool

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh
index 490e146..a4c332d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh

@@ -42,7 +42,7 @@
 function cleanup
 {
     datasetexists $TESTPOOL/$TESTFS1 && \
-        log_must zfs destroy -r $TESTPOOL/$TESTFS1
+	    destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 
 log_onexit cleanup
@@ -62,15 +62,15 @@
 
 log_must zfs snapshot -r $snap
 
-log_must eval "zfs send $snap > /dev/null"
-log_mustnot eval "zfs send -p $snap > /dev/null"
-log_mustnot eval "zfs send -R $snap > /dev/null"
+log_must eval "zfs send $snap >$TEST_BASE_DIR/devnull"
+log_mustnot eval "zfs send -p $snap >$TEST_BASE_DIR/devnull"
+log_mustnot eval "zfs send -R $snap >$TEST_BASE_DIR/devnull"
 
 log_must zfs unmount $TESTPOOL/$TESTFS1
 log_must zfs unload-key $TESTPOOL/$TESTFS1
 
-log_mustnot eval "zfs send $snap > /dev/null"
-log_must eval "zfs send $TESTPOOL/$TESTFS1/child@snap > /dev/null"
+log_mustnot eval "zfs send $snap >$TEST_BASE_DIR/devnull"
+log_must eval "zfs send $TESTPOOL/$TESTFS1/child@snap >$TEST_BASE_DIR/devnull"
 
 log_pass "ZFS performs unencrypted sends of encrypted datasets, unless the" \
 	"'-p' or '-R' options are specified"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh
index 112ee11..f268f7b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh

@@ -37,7 +37,7 @@
 function cleanup
 {
     datasetexists $TESTPOOL/$TESTFS1 && \
-        log_must zfs destroy -r $TESTPOOL/$TESTFS1
+	    destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 
 log_onexit cleanup
@@ -53,7 +53,7 @@
 log_must zfs snapshot $snap
 log_must zfs unmount $TESTPOOL/$TESTFS1
 log_must zfs unload-key $TESTPOOL/$TESTFS1
-log_mustnot eval "zfs send $snap > /dev/null"
+log_mustnot eval "zfs send $snap >$TEST_BASE_DIR/devnull"
 
 log_pass "ZFS does not perform unencrypted sends from encrypted datasets" \
 	"with unloaded keys."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh
index 85cc740..03c2e78 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh

@@ -38,11 +38,9 @@
 
 function cleanup
 {
-	snapexists $snap && \
-		log_must zfs destroy $snap
-
+	snapexists $snap && destroy_dataset $snap
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 
 log_onexit cleanup
@@ -59,21 +57,21 @@
 log_must zfs snapshot $snap
 log_must zfs snapshot $snap1
 
-log_must eval "zfs send -w $snap > /dev/null"
-log_must eval "zfs send -w $snap1 > /dev/null"
+log_must eval "zfs send -w $snap >$TEST_BASE_DIR/devnull"
+log_must eval "zfs send -w $snap1 >$TEST_BASE_DIR/devnull"
 
 log_note "Verify ZFS can perform raw sends with properties"
-log_must eval "zfs send -wp $snap > /dev/null"
-log_must eval "zfs send -wp $snap1 > /dev/null"
+log_must eval "zfs send -wp $snap >$TEST_BASE_DIR/devnull"
+log_must eval "zfs send -wp $snap1 >$TEST_BASE_DIR/devnull"
 
 log_note "Verify ZFS can perform raw replication sends"
-log_must eval "zfs send -wR $snap > /dev/null"
-log_must eval "zfs send -wR $snap1 > /dev/null"
+log_must eval "zfs send -wR $snap >$TEST_BASE_DIR/devnull"
+log_must eval "zfs send -wR $snap1 >$TEST_BASE_DIR/devnull"
 
 log_note "Verify ZFS can perform a raw send of an encrypted datasets with" \
 	"its key unloaded"
 log_must zfs unmount $TESTPOOL/$TESTFS1
 log_must zfs unload-key $TESTPOOL/$TESTFS1
-log_must eval "zfs send -w $snap1 > /dev/null"
+log_must eval "zfs send -w $snap1 >$TEST_BASE_DIR/devnull"
 
 log_pass "ZFS performs raw sends of datasets"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_skip_missing.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_skip_missing.ksh
new file mode 100755
index 0000000..2e12d25
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_skip_missing.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016, loli10K. All rights reserved.
+# Copyright (c) 2021, Pablo Correa Gómez. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/cli_root/cli_common.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_send/zfs_send.cfg
+
+#
+# DESCRIPTION:
+#	Verify 'zfs send' will avoid sending replication send
+#	streams when we're missing snapshots in the dataset
+#	hierarchy, unless -s|--skip-missing provided
+#
+# STRATEGY:
+#	1. Create a parent and child fs and then only snapshot the parent
+#	2. Verify sending with replication will fail
+#	3. Verify sending with skip-missing will print a warning but succeed
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	snapexists $SNAP && destroy_dataset $SNAP -f
+
+	datasetexists $PARENT && destroy_dataset $PARENT -rf
+
+	[[ -e $WARNF ]] && log_must rm -f $WARNF
+	rm -f $TEST_BASE_DIR/devnull
+}
+
+log_assert "Verify 'zfs send -Rs' works as expected."
+log_onexit cleanup
+
+PARENT=$TESTPOOL/parent
+CHILD=$PARENT/child
+SNAP=$PARENT@snap
+WARNF=$TEST_BASE_DIR/warn.2
+
+log_note "Verify 'zfs send -R' fails to generate replication stream"\
+	 " for datasets created before"
+
+log_must zfs create $PARENT
+log_must zfs create $CHILD
+log_must zfs snapshot $SNAP
+log_mustnot eval "zfs send -R $SNAP >$TEST_BASE_DIR/devnull"
+
+log_note "Verify 'zfs send -Rs' warns about missing snapshots, "\
+	 "but still succeeds"
+
+log_must eval "zfs send -Rs $SNAP 2> $WARNF >$TEST_BASE_DIR/devnull"
+log_must eval "[[ -s $WARNF ]]"
+
+log_pass "Verify 'zfs send -Rs' works as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh
index 7354305..aeb49af 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh

@@ -24,15 +24,15 @@
 # 1. Create sparse files of various size
 # 2. Snapshot and send these sparse files
 # 3. Verify these files are received correctly and we don't trigger any issue
-#    like the one described in https://github.com/zfsonlinux/zfs/pull/6760
+#    like the one described in https://github.com/openzfs/zfs/pull/6760
 #
 
 verify_runnable "both"
 
 function cleanup
 {
-        datasetexists $SENDFS && log_must zfs destroy -r $SENDFS
-        datasetexists $RECVFS && log_must zfs destroy -r $RECVFS
+        datasetexists $SENDFS && destroy_dataset $SENDFS -r
+        datasetexists $RECVFS && destroy_dataset $RECVFS -r
 }
 
 #
@@ -57,14 +57,14 @@
 	# compare sparse files
 	recvfile="$(get_prop mountpoint $recvfs)/data.bin"
 	log_must cmp $sendfile $recvfile $offset $offset
-	sendsz=$(stat -c '%s' $sendfile)
-	recvsz=$(stat -c '%s' $recvfile)
+	sendsz=$(stat_size $sendfile)
+	recvsz=$(stat_size $recvfile)
 	if [[ $sendsz -ne $recvsz ]]; then
 		log_fail "$sendfile ($sendsz) and $recvfile ($recvsz) differ."
 	fi
 	# cleanup
-	log_must zfs destroy -r $sendfs
-	log_must zfs destroy -r $recvfs
+	destroy_dataset $sendfs -r
+	destroy_dataset $recvfs -r
 }
 
 log_assert "'zfs send' should be able to send (big) sparse files correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am
index 015464b..f7362ff 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am

@@ -28,7 +28,8 @@
 	zfs_set_001_neg.ksh \
 	zfs_set_002_neg.ksh \
 	zfs_set_003_neg.ksh \
-	zfs_set_keylocation.ksh
+	zfs_set_keylocation.ksh \
+	zfs_set_feature_activation.ksh
 
 dist_pkgdata_DATA = \
 	zfs_set_common.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_001_pos.ksh
index dd3397f..ac5fc81 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_001_pos.ksh

@@ -63,12 +63,11 @@
 
 function cleanup
 {
-	if snapexists $TESTPOOL/$TESTFS@$TESTSNAP ; then
-		log_must zfs destroy -R $TESTPOOL/$TESTFS@$TESTSNAP
-	fi
-	if snapexists $TESTPOOL/$TESTVOL@$TESTSNAP ; then
-		log_must zfs destroy -R $TESTPOOL/$TESTVOL@$TESTSNAP
-	fi
+	snapexists $TESTPOOL/$TESTFS@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTFS@$TESTSNAP -R
+
+	snapexists $TESTPOOL/$TESTVOL@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTVOL@$TESTSNAP -R
 
 	[[ -n $old_ctr_canmount ]] && \
 		log_must zfs set canmount=$old_ctr_canmount $TESTPOOL/$TESTCTR

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh
index 3b8b88e..55c71f6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_002_pos.ksh

@@ -41,8 +41,10 @@
 # STRATEGY:
 # 1. Setup a pool and create fs, volume, snapshot clone within it.
 # 2. Set canmount=noauto for each dataset and check the return value
-#    and check if it still can be mounted by mount -a.
+#    and check if it still can be mounted by mount -a or shared by
+#    share -a
 # 3. mount each dataset(except volume) to see if it can be mounted.
+# 4. verify that a mounted dataset can be shared by share -a.
 #
 
 verify_runnable "both"
@@ -74,18 +76,17 @@
 	ds=$TESTPOOL/$TESTCLONE
 	if datasetexists $ds; then
 		mntp=$(get_prop mountpoint $ds)
-		log_must zfs destroy $ds
+		destroy_dataset $ds
 		if [[ -d $mntp ]]; then
 			rm -fr $mntp
 		fi
 	fi
 
-	if snapexists $TESTPOOL/$TESTFS@$TESTSNAP ; then
-		log_must zfs destroy -R $TESTPOOL/$TESTFS@$TESTSNAP
-	fi
-	if snapexists $TESTPOOL/$TESTVOL@$TESTSNAP ; then
-		log_must zfs destroy -R $TESTPOOL/$TESTVOL@$TESTSNAP
-	fi
+	snapexists $TESTPOOL/$TESTFS@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTFS@$TESTSNAP -R
+
+	snapexists $TESTPOOL/$TESTVOL@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTVOL@$TESTSNAP -R
 
 	zfs unmount -a > /dev/null 2>&1
 	log_must zfs mount -a
@@ -100,6 +101,7 @@
 
 set -A old_mnt
 set -A old_canmount
+set -A old_sharenfs
 typeset tmpmnt=/tmpmount$$
 typeset ds
 
@@ -113,6 +115,7 @@
 	ds=${dataset_pos[i]}
 	old_mnt[i]=$(get_prop mountpoint $ds)
 	old_canmount[i]=$(get_prop canmount $ds)
+	old_sharenfs[i]=$(get_prop sharenfs $ds)
 	(( i = i + 1 ))
 done
 
@@ -121,6 +124,7 @@
 	dataset=${dataset_pos[i]}
 	set_n_check_prop "noauto" "canmount" "$dataset"
 	log_must zfs set mountpoint=$tmpmnt $dataset
+	log_must zfs set sharenfs=on $dataset
 	if  ismounted $dataset; then
 		zfs unmount -a > /dev/null 2>&1
 		log_must mounted $dataset
@@ -128,6 +132,8 @@
 		log_must unmounted $dataset
 		log_must zfs mount -a
 		log_must unmounted $dataset
+		log_must zfs share -a
+		log_mustnot is_exported $tmpmnt
 	else
 		log_must zfs mount -a
 		log_must unmounted $dataset
@@ -137,6 +143,10 @@
 
 	log_must zfs mount $dataset
 	log_must mounted $dataset
+	log_must zfs share -a
+	log_must is_exported $tmpmnt
+
+	log_must zfs set sharenfs="${old_sharenfs[i]}" $dataset
 	log_must zfs set canmount="${old_canmount[i]}" $dataset
 	log_must zfs set mountpoint="${old_mnt[i]}" $dataset
 	(( i = i + 1 ))

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_003_pos.ksh
index a11cfb4..e4664d0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_003_pos.ksh

@@ -63,15 +63,14 @@
 	ds=$TESTPOOL/$TESTCLONE
 	if datasetexists $ds; then
 		mntp=$(get_prop mountpoint $ds)
-		log_must zfs destroy $ds
+		destroy_dataset $ds
 		if [[ -d $mntp ]]; then
 			log_must rm -fr $mntp
 		fi
 	fi
 
-	if snapexists $TESTPOOL/$TESTFS@$TESTSNAP ; then
-		log_must zfs destroy -R $TESTPOOL/$TESTFS@$TESTSNAP
-	fi
+	snapexists $TESTPOOL/$TESTFS@$TESTSNAP && \
+		destroy_dataset $TESTPOOL/$TESTFS@$TESTSNAP -R
 
 	zfs unmount -a > /dev/null 2>&1
 	log_must zfs mount -a

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_004_pos.ksh
index 11be1af..e75114e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/canmount_004_pos.ksh

@@ -44,7 +44,7 @@
 verify_runnable "global"
 
 # properties
-set -A sharenfs_prop "off" "on" "rw"
+set -A sharenfs_prop "off" "on" "ro"
 set -A sharesmb_prop "off" "on"
 
 function cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh
index 27003b2..f30d005 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh

@@ -46,7 +46,10 @@
 verify_runnable "both"
 
 set -A dataset "$TESTPOOL" "$TESTPOOL/$TESTFS" "$TESTPOOL/$TESTVOL"
-set -A values "on" "off" "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr" "noparity"
+set -A values "on" "off" "fletcher2" "fletcher4" "sha256" "sha512" "skein" "noparity"
+if is_linux; then
+	values+=("edonr")
+fi
 
 log_assert "Setting a valid checksum on a file system, volume," \
 	"it should be successful."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/compression_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/compression_001_pos.ksh
index f7d06ea..06da5f2 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/compression_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/compression_001_pos.ksh

@@ -26,6 +26,7 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/properties.shlib
 . $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
 
 #
@@ -41,7 +42,7 @@
 verify_runnable "both"
 
 set -A dataset "$TESTPOOL" "$TESTPOOL/$TESTFS" "$TESTPOOL/$TESTVOL"
-set -A values $(get_compress_opts zfs_set)
+set -A values "${compress_prop_vals[@]}"
 
 log_assert "Setting a valid compression on file system and volume, " \
 	"It should be successful."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_003_pos.ksh
index 9bbb480..4d86100 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_003_pos.ksh

@@ -76,6 +76,14 @@
 	if [[ $(linux_version) -lt $(linux_version "4.4") ]]; then
 		args+=("mand" "nomand")
 	fi
+elif is_freebsd; then
+	# 'xattr' and 'devices' are not supported on FreeBSD
+	# Perhaps more options need to be added.
+	set -A args \
+	"noexec"	"exec"	\
+	"ro"		"rw"	\
+	"nosuid"	"suid"	\
+	"atime"		"noatime"
 else
 	set -A args \
 	"devices"	"/devices/"	"nodevices"	"/nodevices/"	\
@@ -96,11 +104,11 @@
 
 typeset i=0
 while ((i < ${#args[@]})); do
-	if is_linux; then
+	if is_linux || is_freebsd; then
 		log_must mount -t zfs -o ${args[$i]} $testfs $tmpmnt
 		
 		msg=$(mount | grep "$tmpmnt ")
-		
+
 		echo $msg | grep "${args[((i))]}" > /dev/null 2>&1
 		if (($? != 0)) ; then
 			echo $msg | grep "${args[((i-1))]}" > /dev/null 2>&1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/onoffs_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/onoffs_001_pos.ksh
index 498567f..7ba6d7f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/onoffs_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/onoffs_001_pos.ksh

@@ -51,7 +51,12 @@
 
 log_onexit cleanup
 
-set -A props "atime" "readonly" "setuid" "zoned"
+set -A props "atime" "readonly" "setuid"
+if is_freebsd; then
+	props+=("jailed")
+else
+	props+=("zoned")
+fi
 set -A values "on" "off"
 
 if is_global_zone ; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/property_alias_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/property_alias_001_pos.ksh
index 19e636b..f1befe6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/property_alias_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/property_alias_001_pos.ksh

@@ -21,14 +21,12 @@
 #
 
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2009, Sun Microsystems Inc. All rights reserved.
+# Copyright (c) 2016, 2017, Delphix. All rights reserved.
 # Use is subject to license terms.
 #
 
-#
-# Copyright (c) 2016, 2017 by Delphix. All rights reserved.
-#
-
+. $STF_SUITE/include/properties.shlib
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
 
@@ -106,7 +104,7 @@
 			done
 			;;
 		compression|compress )
-			for val in $(get_compress_opts zfs_set); do
+			for val in "${compress_prop_vals[@]}"; do
 				set_and_check $ds ${rw_prop[i]} $val ${chk_prop[i]}
 			done
 			;;

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/readonly_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/readonly_001_pos.ksh
index 9af8811..4adac42 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/readonly_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/readonly_001_pos.ksh

@@ -48,7 +48,7 @@
 {
 	for dataset in $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL ; do
 		snapexists ${dataset}@$TESTSNAP && \
-			log_must zfs destroy -R ${dataset}@$TESTSNAP
+			destroy_dataset ${dataset}@$TESTSNAP -R
 	done
 }
 
@@ -113,7 +113,7 @@
 			fi
 			;;
 		volume)
-			$expect eval "echo 'y' | newfs \
+			$expect eval "new_fs \
 			    ${ZVOL_DEVDIR}/$dataset > /dev/null 2>&1"
 			;;
 		*)

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/ro_props_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/ro_props_001_pos.ksh
index 67de2e8..7177fac 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/ro_props_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/ro_props_001_pos.ksh

@@ -55,7 +55,12 @@
 	mounted origin"
 typeset snap_ro_props="volsize recordsize recsize quota reservation reserv mountpoint \
 	sharenfs checksum compression compress atime devices exec readonly rdonly \
-	setuid zoned"
+	setuid"
+if is_freebsd; then
+	snap_ro_props+=" jailed"
+else
+	snap_ro_props+=" zoned"
+fi
 
 zfs upgrade -v > /dev/null 2>&1
 if [[ $? -eq 0 ]]; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/snapdir_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/snapdir_001_pos.ksh
index 079fc77..083a6b1 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/snapdir_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/snapdir_001_pos.ksh

@@ -48,8 +48,7 @@
 function cleanup
 {
 	for dataset in $all_datasets; do
-		snapexists ${dataset}@snap && \
-			log_must zfs destroy ${dataset}@snap
+		snapexists ${dataset}@snap && destroy_dataset ${dataset}@snap
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/user_property_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/user_property_004_pos.ksh
index 1d197fa..bd11ea0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/user_property_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/user_property_004_pos.ksh

@@ -46,9 +46,7 @@
 {
 	for fs in $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL $TESTPOOL ; do
 		typeset fssnap=$fs@snap
-		if datasetexists $fssnap ; then
-			log_must zfs destroy -f $fssnap
-		fi
+		datasetexists $fssnap && destroy_dataset $fssnap -f
 	done
 	cleanup_user_prop $TESTPOOL
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_001_neg.ksh
index 4d8982c..c9bc756 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_001_neg.ksh

@@ -45,7 +45,12 @@
 verify_runnable "both"
 
 set -A props "" "mountpoint" "checksum" "compression" "atime" "readonly" \
-	"setuid" "zoned" "canmount"
+	"setuid" "canmount"
+if is_freebsd; then
+	props+=("jailed")
+else
+	props+=("zoned")
+fi
 
 set -A values "" "mountpoint" "checksum" "compression" "atime" "readonly" \
 	"setuid" "zoned" "0" "-?" "-on" "--on" "*" "?" "Legacy" "NONE" "oN" \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_002_neg.ksh
index b904404..2178175 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_002_neg.ksh

@@ -47,8 +47,13 @@
 
 set -A editable_props "quota" "reservation" "reserv" "volsize" "recordsize" "recsize" \
 		"mountpoint" "checksum" "compression" "compress" "atime" \
-		"devices" "exec" "setuid" "readonly" "zoned" "snapdir" "aclmode" \
+		"devices" "exec" "setuid" "readonly" "snapdir" "aclmode" \
 		"aclinherit" "canmount" "xattr" "copies" "version"
+if is_freebsd; then
+	editable_props+=("jailed")
+else
+	editable_props+=("zoned")
+fi
 
 for ds in $TESTPOOL $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL \
 	$TESTPOOL/$TESTFS@$TESTSNAP; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh
index 102e887..fd5f7f2 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh

@@ -48,9 +48,8 @@
 	if [ -e $badpath ]; then
 		rm -f $badpath
 	fi
-	if datasetexists $TESTPOOL/foo; then
-		log_must zfs destroy $TESTPOOL/foo
-	fi
+
+	datasetexists $TESTPOOL/foo && destroy_dataset $TESTPOOL/foo
 }
 
 log_assert "'zfs set mountpoint/sharenfs' fails with invalid scenarios"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
index 5e9f719..1208207 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib

@@ -287,12 +287,14 @@
         typeset chk_value=$(get_prop "$prop" "$dataset")
         typeset chk_source=$(get_source "$prop" "$dataset")
 
-        if [[ "$chk_value" != "$value" || "$chk_source" != "$4" ]]
-        then
-                return 1
-        else
-                return 0
-        fi
+	if [[ "$chk_value" != "$value" || "$chk_source" != "$source" ]]
+	then
+		log_note "expected (value '$value', source '$source'), got \
+			(value '$chk_value', source '$chk_source')"
+		return 1
+	else
+		return 0
+	fi
 }
 
 #

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_feature_activation.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_feature_activation.ksh
new file mode 100755
index 0000000..c5e6fb9
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_feature_activation.ksh

@@ -0,0 +1,98 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Setting the compression property to any of the zstd levels should activate
+# the zstd feature flag. Destroying the last dataset using the zstd feature flag
+# should revert the feature to the 'enabled' state.
+#
+# STRATEGY:
+# 1. Create pool, then create a file system within it.
+# 2. Check that the zstd feature flag is 'enabled'.
+# 3. Setting the compression property to zstd.
+# 4. Check that the zstd feature flag is now 'active'.
+# 5. Destroy the dataset
+# 6. Confirm that the feature flag reverts to the 'enabled' state.
+#
+
+verify_runnable "both"
+
+log_assert "Setting compression=zstd should activate the"\
+	"org.freebsd:zstd_compress feature flag, and destroying the last"\
+	"dataset using that property, should revert the feature flag to"\
+	"the enabled state."
+
+export VDEV_ZSTD="$TEST_BASE_DIR/vdev-zstd"
+
+function cleanup
+{
+	if poolexists $TESTPOOL-zstd ; then
+		destroy_pool $TESTPOOL-zstd
+	fi
+
+	rm $VDEV_ZSTD
+}
+log_onexit cleanup
+
+log_must truncate -s $SPA_MINDEVSIZE $VDEV_ZSTD
+log_must zpool create $TESTPOOL-zstd $VDEV_ZSTD
+
+featureval="$(get_pool_prop feature@zstd_compress $TESTPOOL-zstd)"
+
+[[ "$featureval" == "disabled" ]] && \
+	log_unsupported "ZSTD feature flag unsupposed"
+
+[[ "$featureval" == "active" ]] && \
+	log_unsupported "ZSTD feature already active before test"
+
+random_level=$((RANDOM%19 + 1))
+log_note "Randomly selected ZSTD level: $random_level"
+
+log_must zfs create -o compress=zstd-$random_level $TESTPOOL-zstd/$TESTFS-zstd
+
+featureval="$(get_pool_prop feature@zstd_compress $TESTPOOL-zstd)"
+
+log_note "After zfs set, feature flag value is: $featureval"
+
+[[ "$featureval" == "active" ]] ||
+	log_fail "ZSTD feature flag not activated"
+
+log_must zfs destroy $TESTPOOL-zstd/$TESTFS-zstd
+
+featureval="$(get_pool_prop feature@zstd_compress $TESTPOOL-zstd)"
+
+log_note "After zfs destroy, feature flag value is: $featureval"
+
+[[ "$featureval" == "enabled" ]] ||
+	log_fail "ZSTD feature flag not deactivated"
+
+log_pass "Setting compression=zstd activated the feature flag, and"\
+	"destroying the dataset deactivated it."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh
index 313fa4e..9791339 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh

@@ -45,12 +45,13 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
+	cleanup_https
 }
 log_onexit cleanup
 
-log_assert "Key location can only be 'prompt' or a file path for encryption" \
-	"roots, and 'none' for unencrypted volumes"
+log_assert "Key location can only be 'prompt', 'file://', or 'https://'" \
+	"for encryption roots, and 'none' for unencrypted volumes"
 
 log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
 
@@ -69,6 +70,10 @@
 log_must zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
 log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey"
 
+setup_https
+log_must zfs set keylocation=$(get_https_base_url)/PASSPHRASE $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "$(get_https_base_url)/PASSPHRASE"
+
 log_must zfs set keylocation=prompt $TESTPOOL/$TESTFS1
 log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt"
 
@@ -89,5 +94,5 @@
 
 log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none"
 
-log_pass "Key location can only be 'prompt' or a file path for encryption" \
-	"roots, and 'none' for unencrypted volumes"
+log_pass "Key location can only be 'prompt', 'file://', or 'https://'" \
+	"for encryption roots, and 'none' for unencrypted volumes"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am
index e200146..bf33ed0 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile.am

@@ -12,7 +12,9 @@
 	zfs_share_008_neg.ksh \
 	zfs_share_009_neg.ksh \
 	zfs_share_010_neg.ksh \
-	zfs_share_011_pos.ksh
+	zfs_share_011_pos.ksh \
+	zfs_share_012_pos.ksh \
+	zfs_share_concurrent_shares.ksh
 
 dist_pkgdata_DATA = \
 	zfs_share.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/setup.ksh
index 29f38e8..1601087 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/setup.ksh

@@ -27,10 +27,7 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-share -s
-if [ $? -ne 0 ]; then
-	log_unsupported "The NFS utilities are not installed"
-fi
+check_nfs
 
 # Make sure NFS server is running before testing.
 setup_nfs_server

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_001_pos.ksh
index a2c06e0..6d4396a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_001_pos.ksh

@@ -26,7 +26,7 @@
 #
 
 #
-# Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2016, 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -66,11 +66,12 @@
 	fi
 
 	datasetexists $TESTPOOL/$TESTFS-clone && \
-		log_must zfs destroy -f $TESTPOOL/$TESTFS-clone
+		destroy_dataset $TESTPOOL/$TESTFS-clone -f
 
-	if snapexists "$TESTPOOL/$TESTFS@snapshot"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS@snapshot
-	fi
+	snapexists "$TESTPOOL/$TESTFS@snapshot" && \
+		destroy_dataset $TESTPOOL/$TESTFS@snapshot -f
+
+	log_must zfs share -a
 }
 
 
@@ -138,11 +139,20 @@
 #
 log_must zfs share -a
 
+#
+# We need to unset __ZFS_POOL_EXCLUDE so that we include all file systems
+# in the os-specific zfs exports file. This will be reset by the next test.
+#
+unset __ZFS_POOL_EXCLUDE
+
 i=0
 while (( i < ${#fs[*]} )); do
 	is_shared ${fs[i]} || \
 	    log_fail "File system ${fs[i]} is not shared (share -a)"
 
+	is_exported ${fs[i]} || \
+	    log_fail "File system ${fs[i]} is not exported (share -a)"
+
 	((i = i + 2))
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_004_pos.ksh
index baa5f4e..6c48875 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_004_pos.ksh

@@ -47,9 +47,8 @@
 
 function cleanup
 {
-	if snapexists $TESTPOOL/$TESTFS@snapshot; then
-		log_must zfs destroy $TESTPOOL/$TESTFS@snapshot
-	fi
+	snapexists $TESTPOOL/$TESTFS@snapshot && \
+		destroy_dataset $TESTPOOL/$TESTFS@snapshot
 
 	log_must zfs set sharenfs=off $TESTPOOL/$TESTFS
 	log_must unshare_fs $TESTPOOL/$TESTFS

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_006_pos.ksh
index 6b06589..d539401 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_006_pos.ksh

@@ -53,7 +53,7 @@
 	fi
 
 	datasetexists $TESTPOOL/$TESTCTR/$TESTFS2 && \
-		log_must zfs destroy $TESTPOOL/$TESTCTR/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTCTR/$TESTFS2
 
 	typeset fs=""
 	for fs in $mntp $TESTDIR1 $TESTDIR2

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_011_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_011_pos.ksh
index f75877e..131b039 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_011_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_011_pos.ksh

@@ -51,13 +51,11 @@
 	log_must zfs set sharenfs=off $TESTPOOL/$TESTFS
 	unshare_fs $TESTPOOL/$TESTFS
 
-	if snapexists "$TESTPOOL/$TESTFS@snapshot"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS@snapshot
-	fi
+	snapexists "$TESTPOOL/$TESTFS@snapshot" && \
+		destroy_dataset $TESTPOOL/$TESTFS@snapshot -f
 
-	if datasetexists $TESTPOOL/$TESTFS/fs2 ; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS/fs2
-	fi
+	datasetexists $TESTPOOL/$TESTFS/fs2 && \
+		destroy_dataset $TESTPOOL/$TESTFS/fs2 -f
 }
 
 log_assert "Verify that umount and destroy fail, and do not unshare the shared" \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_012_pos.ksh
new file mode 100755
index 0000000..fe38d55
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_012_pos.ksh

@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION: Unmounted canmount=noauto export is removed during zfs share -a
+#
+# STRATEGY:
+# 1. Share a dataset that also has canmount set to noauto
+# 2. Capture the zfs exports file when the dataset is mounted + shared
+# 3. Simulate a reboot by unmounting the dataset and restoring the exports file
+# 4. Verify that 'zfs share -a' removes the export since dataset is not mounted
+#
+
+verify_runnable "both"
+
+dataset="$TESTPOOL/$TESTFS"
+mountpt=$(get_prop mountpoint $dataset)
+
+function cleanup
+{
+	zfs set canmount=on $dataset
+	zfs set sharenfs=off $dataset
+	zfs mount -a
+
+	#
+	# unset __ZFS_POOL_EXCLUDE so that we include all file systems when
+	# rebuilding the exports file
+	#
+	unset __ZFS_POOL_EXCLUDE
+	rm /etc/exports.d/zfs.exports
+	zfs share -a
+}
+
+log_assert "Unmounted canmount=noauto export is removed during zfs share -a"
+log_onexit cleanup
+
+log_must zfs set canmount=noauto $dataset
+zfs mount $dataset > /dev/null 2>&1
+log_must mounted $dataset
+log_must zfs set sharenfs=on $dataset
+log_must is_exported $mountpt
+
+log_must cp /etc/exports.d/zfs.exports /etc/exports.d/zfs.exports.save
+log_must zfs umount $dataset
+log_must unmounted $dataset
+log_mustnot is_exported $mountpt
+
+# simulate a reboot condition
+log_must mv /etc/exports.d/zfs.exports.save /etc/exports.d/zfs.exports
+
+log_must is_exported $mountpt
+log_must zfs share -a
+log_mustnot is_exported $mountpt
+
+log_pass "Unmounted canmount=noauto export is removed during zfs share -a"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_concurrent_shares.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_concurrent_shares.ksh
new file mode 100755
index 0000000..dbaaf39
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_concurrent_shares.ksh

@@ -0,0 +1,201 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that 'zfs set sharenfs=on', 'zfs share', and 'zfs unshare' can
+# run concurrently. The test creates 50 filesystem and 50 threads.
+# Each thread will run through the test strategy in parallel.
+#
+# STRATEGY:
+# 1. Verify that the file system is not shared.
+# 2. Enable the 'sharenfs' property
+# 3. Invoke 'zfs unshare' and verify filesystem is no longer shared
+# 4. Invoke 'zfs share'.
+# 4. Verify that the file system is shared.
+# 5. Verify that a shared filesystem cannot be shared again.
+# 6. Verify that share -a succeeds.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	wait
+	for fs in $(seq 0 50)
+	do
+		log_must zfs set sharenfs=off $TESTPOOL/$TESTFS1/$fs
+		log_must zfs set sharenfs=off $TESTPOOL/$TESTFS2/$fs
+		log_must zfs set sharenfs=off $TESTPOOL/$TESTFS3/$fs
+		unshare_fs $TESTPOOL/$TESTFS1/$fs
+		unshare_fs $TESTPOOL/$TESTFS2/$fs
+		unshare_fs $TESTPOOL/$TESTFS3/$fs
+
+		if mounted $TESTPOOL/$TESTFS1/$fs; then
+			log_must zfs unmount $TESTPOOL/$TESTFS1/$fs
+		fi
+		if mounted $TESTPOOL/$TESTFS2/$fs; then
+			log_must zfs unmount $TESTPOOL/$TESTFS2/$fs
+		fi
+		if mounted $TESTPOOL/$TESTFS3/$fs; then
+			log_must zfs unmount $TESTPOOL/$TESTFS3/$fs
+		fi
+
+		datasetexists $TESTPOOL/$TESTFS1/$fs && \
+			destroy_dataset $TESTPOOL/$TESTFS1/$fs -f
+		datasetexists $TESTPOOL/$TESTFS2/$fs && \
+			destroy_dataset $TESTPOOL/$TESTFS2/$fs -f
+		datasetexists $TESTPOOL/$TESTFS3/$fs && \
+			destroy_dataset $TESTPOOL/$TESTFS3/$fs -f
+	done
+
+	log_must zfs share -a
+}
+
+function create_filesystems
+{
+	for fs in $(seq 0 50)
+	do
+		log_must zfs create -p $TESTPOOL/$TESTFS1/$fs
+		log_must zfs create -p $TESTPOOL/$TESTFS2/$fs
+		log_must zfs create -p $TESTPOOL/$TESTFS3/$fs
+	done
+}
+
+#
+# Main test routine.
+#
+# Given a file system this routine will attempt
+# share the mountpoint and then verify it has been shared.
+#
+function test_share # filesystem
+{
+	typeset filesystem=$1
+	typeset mntp=$(get_prop mountpoint $filesystem)
+
+	not_shared $mntp || \
+	    log_fail "File system $filesystem is already shared."
+
+	zfs set sharenfs=on $filesystem || \
+		log_fail "zfs set sharenfs=on $filesystem failed."
+	is_shared $mntp || \
+	    log_fail "File system $filesystem is not shared (set sharenfs)."
+
+	#
+	# Verify 'zfs share' works as well.
+	#
+	zfs unshare $filesystem || \
+		log_fail "zfs unshare $filesystem failed."
+	is_shared $mntp && \
+	    log_fail "File system $filesystem is still shared."
+
+	zfs share $filesystem || \
+		log_fail "zfs share $filesystem failed."
+	is_shared $mntp || \
+	    log_fail "file system $filesystem is not shared (zfs share)."
+
+	#log_note "Sharing a shared file system fails."
+	zfs share $filesystem && \
+		log_fail "zfs share $filesystem did not fail"
+	return 0
+}
+
+#
+# Set the main process id so that we know to capture
+# failures from child processes and allow the parent process
+# to report the failure.
+#
+set_main_pid $$
+log_assert "Verify that 'zfs share' succeeds as root."
+log_onexit cleanup
+
+create_filesystems
+
+child_pids=()
+for fs in $(seq 0 50)
+do
+	test_share $TESTPOOL/$TESTFS1/$fs &
+	child_pids+=($!)
+	log_note "$TESTPOOL/$TESTFS1/$fs ==> $!"
+	test_share $TESTPOOL/$TESTFS2/$fs &
+	child_pids+=($!)
+	log_note "$TESTPOOL/$TESTFS2/$fs ==> $!"
+	test_share $TESTPOOL/$TESTFS3/$fs &
+	child_pids+=($!)
+	log_note "$TESTPOOL/$TESTFS3/$fs ==> $!"
+done
+wait_for_children "${child_pids[@]}" ||
+	log_fail "multithreaded share test failed"
+
+log_note "Verify 'zfs share -a' succeeds."
+
+#
+# Unshare each of the file systems.
+#
+child_pids=()
+for fs in $(seq 0 50)
+do
+	unshare_fs $TESTPOOL/$TESTFS1/$fs &
+	child_pids+=($!)
+	unshare_fs $TESTPOOL/$TESTFS2/$fs &
+	child_pids+=($!)
+	unshare_fs $TESTPOOL/$TESTFS3/$fs &
+	child_pids+=($!)
+done
+wait_for_children "${child_pids[@]}" ||
+	log_fail "multithreaded unshare failed"
+
+#
+# Try a zfs share -a and verify all file systems are shared.
+#
+log_must zfs share -a
+
+#
+# We need to unset __ZFS_POOL_EXCLUDE so that we include all file systems
+# in the os-specific zfs exports file. This will be reset by the next test.
+#
+unset __ZFS_POOL_EXCLUDE
+
+for fs in $(seq 0 50)
+do
+	is_shared $TESTPOOL/$TESTFS1/$fs || \
+	    log_fail "File system $TESTPOOL/$TESTFS1/$fs is not shared"
+	is_shared $TESTPOOL/$TESTFS2/$fs || \
+	    log_fail "File system $TESTPOOL/$TESTFS2/$fs is not shared"
+	is_shared $TESTPOOL/$TESTFS3/$fs || \
+	    log_fail "File system $TESTPOOL/$TESTFS3/$fs is not shared"
+
+	is_exported $TESTPOOL/$TESTFS1/$fs || \
+	    log_fail "File system $TESTPOOL/$TESTFS1/$fs is not exported"
+	is_exported $TESTPOOL/$TESTFS2/$fs || \
+	    log_fail "File system $TESTPOOL/$TESTFS2/$fs is not exported"
+	is_exported $TESTPOOL/$TESTFS3/$fs || \
+	    log_fail "File system $TESTPOOL/$TESTFS3/$fs is not exported"
+done
+
+log_pass "'zfs share [ -a ] <filesystem>' succeeds as root."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh
index 8708d8b..2b89af9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh

@@ -89,7 +89,7 @@
 
 	while (( i < ${#args[*]} )); do
 		for snap in ${args[i]}; do
-			snapexists $snap && log_must zfs destroy -f $snap
+			snapexists $snap && destroy_dataset $snap -f
 		done
 		(( i = i + 1 ))
 	done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh
index 5d8b6e2..4ae68d4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_002_neg.ksh

@@ -53,12 +53,11 @@
 	for snap in $TESTPOOL/$TESTCTR/$TESTFS1@$TESTSNAP \
 		$TESTPOOL/$TESTCTR/$TESTVOL@$TESTSNAP;
 	do
-		snapexists $snap && \
-			log_must zfs destroy $snap
+		snapexists $snap && destroy_dataset $snap
 	done
 
 	datasetexists $TESTPOOL/$TESTCTR/$TESTVOL && \
-		log_must zfs destroy -rf $TESTPOOL/$TESTCTR/$TESTVOL
+		destroy_dataset $TESTPOOL/$TESTCTR/$TESTVOL -rf
 
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_004_neg.ksh
index 96121f1..16926a4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_004_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_004_neg.ksh

@@ -44,9 +44,7 @@
 
 function cleanup
 {
-	if datasetexists $initfs ; then
-		log_must zfs destroy -rf $initfs
-	fi
+	datasetexists $initfs && destroy_dataset $initfs -rf
 }
 
 log_assert "Verify recursive snapshotting could not break ZFS."
@@ -70,9 +68,7 @@
 		# is incorrect
 		#
 		if ((len >= 255)); then
-			if datasetexists $basefs; then
-				log_must zfs destroy -r $basefs
-			fi
+			datasetexists $basefs && destroy_dataset $basefs -r
 			basefs=${basefs%/*}
 			len=$(echo $basefs| wc -c)
 		fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_005_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_005_neg.ksh
index d97dc0f..c133403 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_005_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_005_neg.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $initfs ; then
-		log_must zfs destroy -rf $initfs
-	fi
+	datasetexists $initfs && destroy_dataset $initfs -rf
 }
 
 log_assert "Verify long name filesystem with snapshot should not break ZFS."
@@ -71,9 +69,7 @@
 		# is incorrect
 		#
 		if ((len >= 255)); then
-			if datasetexists $basefs; then
-				log_must zfs destroy -r $basefs
-			fi
+			datasetexists $basefs && destroy_dataset $basefs -r
 			basefs=${basefs%/*}
 			len=$(echo $basefs| wc -c)
 		fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_006_pos.ksh
index 089ebdb..6b71128 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_006_pos.ksh

@@ -46,9 +46,7 @@
 {
 	for fs in $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL $TESTPOOL ; do
 		typeset fssnap=$fs@snap
-		if datasetexists $fssnap ; then
-			log_must zfs destroy -rf $fssnap
-		fi
+		datasetexists $fssnap && destroy_dataset $fssnap -rf
 	done
 	cleanup_user_prop $TESTPOOL
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_007_neg.ksh
index 590d56e..9499dca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_007_neg.ksh

@@ -46,9 +46,7 @@
 {
 	for fs in $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL $TESTPOOL/$TESTCTR $TESTPOOL ; do
 		typeset fssnap=$fs@snap
-		if datasetexists $fssnap ; then
-			log_must zfs destroy -rf $fssnap
-		fi
+		datasetexists $fssnap && destroy_dataset $fssnap -rf
 	done
 	cleanup_user_prop $TESTPOOL
 }
@@ -74,7 +72,12 @@
 	mounted origin"
 typeset snap_ro_props="volsize recordsize recsize quota reservation reserv mountpoint \
 	sharenfs checksum compression compress atime devices exec readonly rdonly \
-	setuid zoned"
+	setuid"
+if is_freebsd; then
+	snap_ro_props+=" jailed"
+else
+	snap_ro_props+=" zoned"
+fi
 
 zfs upgrade -v > /dev/null 2>&1
 if [[ $? -eq 0 ]]; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh
index f0682b8..6fedba9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos.ksh

@@ -12,6 +12,7 @@
 
 #
 # Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Datto Inc. All rights reserved.
 #
 
 #
@@ -23,6 +24,8 @@
 # 2. Create multiple snapshots with a list of valid and invalid
 #    snapshot names
 # 3. Verify the valid snapshot creation
+# 4. Verify creation of snapshots report the correct numbers by
+#    performing a snapshot directory listing
 
 . $STF_SUITE/include/libtest.shlib
 
@@ -31,9 +34,10 @@
 function cleanup
 {
 	for ds in $datasets; do
-		datasetexists $ds && log_must zfs destroy -r $ds
+		datasetexists $ds && destroy_dataset $ds -r
 	done
-	zfs destroy -r $TESTPOOL/TESTFS4
+	destroy_dataset $TESTPOOL/TESTFS4 -r
+	destroy_dataset $TESTPOOL/TESTFS5 -r
 }
 datasets="$TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS2
     $TESTPOOL/$TESTFS3"
@@ -66,8 +70,7 @@
 while (( i < ${#valid_args[*]} )); do
 	log_must zfs snapshot ${valid_args[i]}
 	for token in ${valid_args[i]}; do
-		log_must snapexists $token && \
-		    log_must zfs destroy $token
+		snapexists $token && destroy_dataset $token
 	done
 	((i = i + 1))
 done
@@ -112,4 +115,17 @@
 log_must zfs snapshot -r $TESTPOOL/$TESTFS1@snap1 $TESTPOOL/$TESTFS2@snap1 \
         $TESTPOOL/$TESTFS3@snap1 $TESTPOOL/TESTFS4@snap1
 
+MYTEST="TESTFS5"
+ITERATIONS=10
+NUM_SNAPS=5
+for x in {1..$ITERATIONS}; do
+	log_must zfs create $TESTPOOL/$MYTEST
+	for y in {1..$NUM_SNAPS}; do
+		log_must zfs snapshot $TESTPOOL/$MYTEST@$y
+	done;
+	n=$(ls -1 /$TESTPOOL/$MYTEST/.zfs/snapshot | wc -l)
+	verify_eq $n $NUM_SNAPS "count"
+	zfs destroy -r $TESTPOOL/$MYTEST;
+done;
+
 log_pass "zfs multiple snapshot verified correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/cleanup.ksh
index 79cd6e9..7d6a7e1 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/cleanup.ksh

@@ -27,4 +27,8 @@
 
 . $STF_SUITE/include/libtest.shlib
 
+if ! is_linux ; then
+	log_unsupported "sysfs is linux-only"
+fi
+
 default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/setup.ksh
index 9692385..261bce4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_sysfs/setup.ksh

@@ -27,6 +27,10 @@
 
 . $STF_SUITE/include/libtest.shlib
 
+if ! is_linux ; then
+	log_unsupported "sysfs is linux-only"
+fi
+
 DISK=${DISKS%% *}
 
 default_container_volume_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh
index 9e08ac6..55cfb5c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh

@@ -43,7 +43,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh
index ecb98d1..55da682 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh

@@ -38,9 +38,8 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
-	datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol
+	datasetexists $TESTPOOL/$TESTFS1 && destroy_dataset $TESTPOOL/$TESTFS1 -r
+	datasetexists $TESTPOOL/zvol && destroy_dataset $TESTPOOL/zvol
 	poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1
 }
 log_onexit cleanup
@@ -62,7 +61,7 @@
 log_must zfs unmount $TESTPOOL/$TESTFS1
 log_must zfs unmount $TESTPOOL1
 
-log_must zfs unload-key -a
+log_must_busy zfs unload-key -a
 
 log_must key_unavailable $TESTPOOL/$TESTFS1
 log_must key_unavailable $TESTPOOL/$TESTFS1/child

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh
index 9766b59..01c720c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh

@@ -41,7 +41,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile.am
index 34cbb17..6507b09 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile.am

@@ -12,7 +12,8 @@
 	zfs_unmount_008_neg.ksh \
 	zfs_unmount_009_pos.ksh \
 	zfs_unmount_all_001_pos.ksh \
-	zfs_unmount_nested.ksh
+	zfs_unmount_nested.ksh \
+	zfs_unmount_unload_keys.ksh
 
 dist_pkgdata_DATA = \
 	zfs_unmount.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_001_pos.ksh
index fb4d1d9..6036eb2 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_001_pos.ksh

@@ -55,7 +55,7 @@
 		log_must zfs umount -f $TESTDIR2
 
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2
 
 	[[ -d $TESTDIR2 ]] && \
 		log_must rm -rf $TESTDIR2

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh
index afec9d8..e85a0f3 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_008_neg.ksh

@@ -53,14 +53,10 @@
 function cleanup
 {
 	for ds in $vol $fs1; do
-		if datasetexists $ds; then
-			log_must zfs destroy -f $ds
-		fi
+		datasetexists $ds && destroy_dataset $ds -f
 	done
 
-	if snapexists $snap; then
-		log_must zfs destroy $snap
-	fi
+	snapexists $snap && destroy_dataset $snap
 
 	if [[ -e /tmp/$file ]]; then
 		rm -f /tmp/$file
@@ -133,7 +129,7 @@
 
 # Testing legacy mounted filesystem
 log_must zfs set mountpoint=legacy $fs1
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_must mount -t zfs $fs1 /tmp/$dir
 else
 	log_must mount -F zfs $fs1 /tmp/$dir

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh
index 3575875..814d603 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_009_pos.ksh

@@ -55,9 +55,7 @@
 
 	for fs in $TESTPOOL/$TESTFS $TESTPOOL ; do
 		typeset snap=$fs@$TESTSNAP
-		if snapexists $snap; then
-			log_must zfs destroy $snap
-		fi
+		snapexists $snap && destroy_dataset $snap
 	done
 
 	if ! poolexists $TESTPOOL && is_global_zone; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_nested.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_nested.ksh
index 73eae6a..987ecca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_nested.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_nested.ksh

@@ -45,20 +45,24 @@
 log_onexit nesting_cleanup
 
 set -A test_depths 30 16 3
+typeset mountpoint=/$TESTPOOL/mnt
 
 dsA32=$(printf 'a/%.0s' {1..32})"a"
 log_must zfs create -p $TESTPOOL/$dsA32
 
 dsB32=$(printf 'b/%.0s' {1..32})"b"
 log_must zfs create -o mountpoint=none -p $TESTPOOL/$dsB32
-log_mustnot mount -t zfs $TESTPOOL/$dsB32 /mnt
+# FreeBSD's mount command ignores the mountpoint property.
+if ! is_freebsd; then
+	log_mustnot mount -t zfs $TESTPOOL/$dsB32 /mnt
+fi
 
 dsC32=$(printf 'c/%.0s' {1..32})"c"
 log_must zfs create -o mountpoint=legacy -p $TESTPOOL/$dsC32
 log_must mount -t zfs $TESTPOOL/$dsC32 /mnt
 
 dsD32=$(printf 'd/%.0s' {1..32})"d"
-log_must zfs create -o mountpoint=/$TESTPOOL/mnt -p $TESTPOOL/$dsD32
+log_must zfs create -o mountpoint=$mountpoint -p $TESTPOOL/$dsD32
 
 
 for d in ${test_depths[@]}; do
@@ -152,7 +156,7 @@
 	fi
 
 
-	# mountpoint=testpool/mnt
+	# mountpoint=/testpool/mnt
 	ds_pre=$(printf 'd/%.0s' {1..$(($d-2))})"d"
 	ds=$(printf 'd/%.0s' {1..$(($d-1))})"d"
 	ds_post=$(printf 'd/%.0s' {1..$(($d))})"d"
@@ -182,8 +186,8 @@
 	fi
 done
 
+log_must rmdir $mountpoint # remove the mountpoint we created
 log_must zpool export $TESTPOOL
-log_must rmdir /testpool/mnt # remove the mountpoint we created
 log_must zpool import $TESTPOOL
 
 log_pass "Verified nested dataset are unmounted."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_unload_keys.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_unload_keys.ksh
new file mode 100755
index 0000000..c92287a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unmount/zfs_unmount_unload_keys.ksh

@@ -0,0 +1,79 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_unmount/zfs_unmount.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# "zfs unmount -u" should allow the user to unload their encryption
+# keys while unmounting one or more datasets
+#
+# STRATEGY:
+# 1. Create a hierarchy of encrypted datasets
+# 2. Test that 'zfs unmount -u' unloads keys as it unmounts a dataset
+# 3. Test that 'zfs unmount -u' unloads keys as it unmounts multiple datasets
+# 4. Test that 'zfs unmount -u' returns an error if the key is still in
+#    use by a clone.
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	datasetexists $TESTPOOL/$TESTFS2 && \
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
+	datasetexists $TESTPOOL/$TESTFS2/newroot && \
+		destroy_dataset $TESTPOOL/$TESTFS2/newroot -r
+	datasetexists $TESTPOOL/$TESTFS2/child && \
+		destroy_dataset $TESTPOOL/$TESTFS2/child -r
+
+}
+log_onexit cleanup
+
+log_assert "'zfs unmount -u' should unload keys for datasets as they are unmounted"
+log_must eval "echo 'password' | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS2"
+log_must eval "echo 'password' | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS2/newroot"
+log_must zfs create $TESTPOOL/$TESTFS2/child
+
+log_must zfs umount -u $TESTPOOL/$TESTFS2/newroot
+log_must key_unavailable $TESTPOOL/$TESTFS2/newroot
+log_must eval "echo 'password' | zfs mount -l $TESTPOOL/$TESTFS2/newroot"
+
+log_must zfs umount -u $TESTPOOL/$TESTFS2
+log_must key_unavailable $TESTPOOL/$TESTFS2
+log_must key_unavailable $TESTPOOL/$TESTFS2/newroot
+log_must key_unavailable $TESTPOOL/$TESTFS2/child
+log_must eval "echo 'password' | zfs mount -l $TESTPOOL/$TESTFS2/newroot"
+
+log_must zfs snap $TESTPOOL/$TESTFS2/newroot@1
+log_must zfs clone $TESTPOOL/$TESTFS2/newroot@1 $TESTPOOL/$TESTFS2/clone
+log_mustnot zfs umount -u $TESTPOOL/$TESTFS2/newroot
+log_must key_available $TESTPOOL/$TESTFS2/newroot
+log_must mounted $TESTPOOL/$TESTFS2/newroot
+
+log_pass "'zfs unmount -u' unloads keys for datasets as they are unmounted"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh
index ca625bd..ac16fe9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_001_pos.ksh

@@ -62,17 +62,14 @@
 	[[ -d $TESTDIR2 ]] && \
 		log_must rm -rf $TESTDIR2
 
-	if datasetexists "$TESTPOOL/$TESTCLONE"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTCLONE
-	fi
+	datasetexists "$TESTPOOL/$TESTCLONE" && \
+		destroy_dataset $TESTPOOL/$TESTCLONE -f
 
-	if snapexists "$TESTPOOL/$TESTFS2@snapshot"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS2@snapshot
-	fi
+	snapexists "$TESTPOOL/$TESTFS2@snapshot" && \
+		destroy_dataset $TESTPOOL/$TESTFS2@snapshot -f
 
-	if datasetexists "$TESTPOOL/$TESTFS2"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS2
-	fi
+	datasetexists "$TESTPOOL/$TESTFS2" && \
+		destroy_dataset $TESTPOOL/$TESTFS2 -f
 }
 
 #

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_002_pos.ksh
index 6a9c723..1ded1b4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_002_pos.ksh

@@ -65,17 +65,14 @@
 	[[ -d $TESTDIR2 ]] && \
 		log_must rm -rf $TESTDIR2
 
-	if datasetexists "$TESTPOOL/$TESTCLONE"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTCLONE
-	fi
+	datasetexists "$TESTPOOL/$TESTCLONE" && \
+		destroy_dataset $TESTPOOL/$TESTCLONE -f
 
-	if snapexists "$TESTPOOL/$TESTFS2@snapshot"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS2@snapshot
-	fi
+	snapexists "$TESTPOOL/$TESTFS2@snapshot" && \
+		destroy_dataset $TESTPOOL/$TESTFS2@snapshot -f
 
-	if datasetexists "$TESTPOOL/$TESTFS2"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS2
-	fi
+	datasetexists "$TESTPOOL/$TESTFS2" && \
+		destroy_dataset $TESTPOOL/$TESTFS2 -f
 }
 
 #

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_003_pos.ksh
index 66a7e80..6e66ded 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_003_pos.ksh

@@ -49,9 +49,8 @@
 
 function cleanup
 {
-	if snapexists $TESTPOOL/$TESTFS@snapshot; then
-		log_must zfs destroy $TESTPOOL/$TESTFS@snapshot
-	fi
+	snapexists $TESTPOOL/$TESTFS@snapshot && \
+		destroy_dataset $TESTPOOL/$TESTFS@snapshot
 
 	log_must zfs set sharenfs=off $TESTPOOL/$TESTFS
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_007_pos.ksh
index 0749dc1..36817a0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_unshare/zfs_unshare_007_pos.ksh

@@ -42,9 +42,8 @@
 
 function cleanup
 {
-	if datasetexists "$TESTPOOL/$TESTFS/shared1"; then
-		log_must zfs destroy -f $TESTPOOL/$TESTFS/shared1
-	fi
+	datasetexists "$TESTPOOL/$TESTFS/shared1" && \
+		destroy_dataset $TESTPOOL/$TESTFS/shared1 -f
 }
 
 log_assert "Verify 'zfs destroy' will unshare the dataset"
@@ -57,16 +56,12 @@
 #
 # 2. Verify the datasets is shared.
 #
-# The "non-impl" variant of "is_shared" requires the dataset to exist.
-# Thus, we can only use the "impl" variant in step 4, below. To be
-# consistent with step 4, we also use the "impl" variant here.
-#
-log_must eval "is_shared_impl $TESTDIR/1"
+log_must is_shared $TESTDIR/1
 
 # 3. Invoke 'zfs destroy' on the dataset.
 log_must zfs destroy -f $TESTPOOL/$TESTFS/shared1
 
 # 4. Verify the dataset is not shared.
-log_mustnot eval "is_shared_impl $TESTDIR/1"
+log_mustnot is_shared $TESTDIR/1
 
 log_pass "'zfs destroy' will unshare the dataset."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/setup.ksh
index 4c1348a..c9f3601 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/setup.ksh

@@ -39,4 +39,4 @@
    log_note "This machine is running ZFS Filesystem version $ZFS_VERSION"
 fi
 
-default_setup $DISKS
+default_setup "$DISKS"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh
index d3ed4a7..ab76461 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_001_pos.ksh

@@ -50,9 +50,7 @@
 
 function cleanup
 {
-	if datasetexists $rootfs ; then
-		log_must zfs destroy -Rf $rootfs
-	fi
+	datasetexists $rootfs && destroy_dataset $rootfs -Rf
 	log_must zfs create $rootfs
 
 	for file in $output $oldoutput ; do
@@ -116,9 +114,7 @@
 fi
 
 for fs in $old_datasets ; do
-	if datasetexists $fs ; then
-		log_must zfs destroy -Rf $fs
-	fi
+	datasetexists $fs && destroy_dataset $fs -Rf
 done
 
 log_must eval 'zfs upgrade > $output 2>&1'

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_003_pos.ksh
index 6df47b4..57f74ca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_003_pos.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if datasetexists $rootfs ; then
-		log_must zfs destroy -Rf $rootfs
-	fi
+	datasetexists $rootfs && destroy_dataset $rootfs -Rf
 	log_must zfs create $rootfs
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_004_pos.ksh
index e3ff4f4..0b8fef5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_004_pos.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if datasetexists $rootfs ; then
-		log_must zfs destroy -Rf $rootfs
-	fi
+	datasetexists $rootfs && destroy_dataset $rootfs -Rf
 	log_must zfs create $rootfs
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_005_pos.ksh
index 1a92991..5fcdc6e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/zfs_upgrade_005_pos.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if datasetexists $rootfs ; then
-		log_must zfs destroy -Rf $rootfs
-	fi
+	datasetexists $rootfs && destroy_dataset $rootfs -Rf
 	log_must zfs create $rootfs
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am
new file mode 100644
index 0000000..d401fe6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am

@@ -0,0 +1,8 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_wait
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	zfs_wait_deleteq.ksh
+
+dist_pkgdata_DATA = \
+	zfs_wait.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh
new file mode 100755
index 0000000..456d2d0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh

@@ -0,0 +1,20 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh
new file mode 100755
index 0000000..cca05fe
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh

@@ -0,0 +1,21 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+DISK=${DISKS%% *}
+
+default_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
new file mode 100644
index 0000000..9f62a7c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib

@@ -0,0 +1,80 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+#
+
+typeset -a disk_array=($(find_disks $DISKS))
+
+typeset -r DISK1=${disk_array[0]}
+typeset -r DISK2=${disk_array[1]}
+typeset -r DISK3=${disk_array[2]}
+
+#
+# When the condition it is waiting for becomes true, 'zfs wait' should return
+# promptly. We want to enforce this, but any check will be racey because it will
+# take some small but indeterminate amount of time for the waiting thread to be
+# woken up and for the process to exit.
+#
+# To deal with this, we provide a grace period after the condition becomes true
+# during which 'zfs wait' can exit. If it hasn't exited by the time the grace
+# period expires we assume something is wrong and fail the test. While there is
+# no value that can really be correct, the idea is we choose something large
+# enough that it shouldn't cause issues in practice.
+#
+typeset -r WAIT_EXIT_GRACE=2.0
+
+function proc_exists # pid
+{
+	ps -p $1 >/dev/null
+}
+
+function proc_must_exist # pid
+{
+	proc_exists $1 || log_fail "zpool process exited too soon"
+}
+
+function proc_must_not_exist # pid
+{
+	proc_exists $1 && log_fail "zpool process took too long to exit"
+}
+
+function get_time
+{
+	date +'%H:%M:%S'
+}
+
+function kill_if_running
+{
+	typeset pid=$1
+	[[ $pid ]] && proc_exists $pid && log_must kill -s TERM $pid
+}
+
+# Log a command and then start it running in the background
+function log_bkgrnd
+{
+	log_note "$(get_time) Starting cmd in background '$@'"
+	"$@" &
+}
+
+# Check that a background process has completed and exited with a status of 0
+function bkgrnd_proc_succeeded
+{
+	typeset pid=$1
+
+	log_must sleep $WAIT_EXIT_GRACE
+
+	proc_must_not_exist $pid
+	wait $pid || log_fail "process exited with status $?"
+	log_note "$(get_time) wait completed successfully"
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh
new file mode 100755
index 0000000..00c5a10
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh

@@ -0,0 +1,57 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs wait' works when waiting for checkpoint discard to complete.
+#
+# STRATEGY:
+# 1. Create a file
+# 2. Open a file descriptor pointing to that file.
+# 3. Delete the file.
+# 4. Start a background process waiting for the delete queue to empty.
+# 5. Verify that the command doesn't return immediately.
+# 6. Close the open file descriptor.
+# 7. Verify that the command returns soon after the descriptor is closed.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	exec 3<&-
+}
+
+
+typeset -r TESTFILE="/$TESTPOOL/testfile"
+typeset pid
+
+log_onexit cleanup
+
+log_must touch $TESTFILE
+exec 3<> $TESTFILE
+log_must rm $TESTFILE
+log_bkgrnd zfs wait -t deleteq $TESTPOOL
+pid=$!
+proc_must_exist $pid
+
+exec 3<&-
+log_must sleep 0.5
+bkgrnd_proc_succeeded $pid
+
+log_pass "'zfs wait -t discard' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/Makefile.am
index 2d0046c..327f236 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/Makefile.am

@@ -4,4 +4,5 @@
 	cleanup.ksh \
 	zpool_001_neg.ksh \
 	zpool_002_pos.ksh \
-	zpool_003_pos.ksh
+	zpool_003_pos.ksh \
+	zpool_colors.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/setup.ksh
index 6a9af3b..4e3b6b0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/setup.ksh

@@ -29,4 +29,4 @@
 
 DISK=${DISKS%% *}
 
-default_setup $DISK
+default_mirror_setup $DISKS

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_002_pos.ksh
index 4cdc711..caf8a9a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_002_pos.ksh

@@ -47,31 +47,32 @@
 {
 	unset ZFS_ABORT
 
-	if [[ -d $corepath ]]; then
-		rm -rf $corepath
+	if is_freebsd && [ -n "$old_corefile" ]; then
+		sysctl kern.corefile=$old_corefile
 	fi
 
-	if poolexists $pool; then
-		log_must zpool destroy -f $pool
-	fi
+	# Clean up the pool created if we failed to abort.
+	poolexists $pool && destroy_pool $pool
+
+	rm -rf $corepath $vdev1 $vdev2 $vdev3
 }
 
 log_assert "With ZFS_ABORT set, all zpool commands can abort and generate a core file."
 log_onexit cleanup
 
-#preparation work for testing
 corepath=$TESTDIR/core
+corefile=$corepath/zpool.core
 if [[ -d $corepath ]]; then
-	rm -rf $corepath
+	log_must rm -rf $corepath
 fi
-mkdir $corepath
+log_must mkdir $corepath
 
 pool=pool.$$
 vdev1=$TESTDIR/file1
 vdev2=$TESTDIR/file2
 vdev3=$TESTDIR/file3
 for vdev in $vdev1 $vdev2 $vdev3; do
-	mkfile $MINVDEVSIZE $vdev
+	log_must mkfile $MINVDEVSIZE $vdev
 done
 
 set -A cmds "create $pool mirror $vdev1 $vdev2" "list $pool" "iostat $pool" \
@@ -86,23 +87,25 @@
 		"import" "export" "upgrade" "history -?" "get" "set"
 
 if is_linux; then
-	ulimit -c unlimited
-	echo "$corepath/core.zpool" >/proc/sys/kernel/core_pattern
+	echo $corefile >/proc/sys/kernel/core_pattern
 	echo 0 >/proc/sys/kernel/core_uses_pid
-	export ASAN_OPTIONS="abort_on_error=1:disable_coredump=0"
-else
-	coreadm -p ${corepath}/core.%f
+elif is_freebsd; then
+	old_corefile=$(sysctl -n kern.corefile)
+	log_must sysctl kern.corefile=$corefile
 fi
+ulimit -c unlimited
 
+export ASAN_OPTIONS="abort_on_error=1:disable_coredump=0"
 export ZFS_ABORT=yes
 
 for subcmd in "${cmds[@]}" "${badparams[@]}"; do
-	corefile=${corepath}/core.zpool
 	zpool $subcmd >/dev/null 2>&1
 	if [[ ! -e $corefile ]]; then
-		log_fail "zpool $subcmd cannot generate core file  with ZFS_ABORT set."
+		log_fail "zpool $subcmd cannot generate core file with ZFS_ABORT set."
 	fi
 	rm -f $corefile
 done
 
+unset ZFS_ABORT
+
 log_pass "With ZFS_ABORT set, zpool command can abort and generate core file as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_003_pos.ksh
index 0f04f0c..71d73c0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_003_pos.ksh

@@ -42,11 +42,34 @@
 # 3. Verify it run successfully.
 #
 
+function cleanup
+{
+	unset ZFS_ABORT
+
+	if is_freebsd && [ -n "$old_corefile" ]; then
+		sysctl kern.corefile=$old_corefile
+	fi
+
+	rm -rf $corepath
+
+	# Don't leave the pool frozen.
+	destroy_pool $TESTPOOL
+	default_mirror_setup $DISKS
+}
+
 verify_runnable "both"
 
 log_assert "Debugging features of zpool should succeed."
+log_onexit cleanup
 
-log_must zpool -? > /dev/null 2>&1
+corepath=$TESTDIR/core
+corefile=$corepath/zpool.core
+if [[ -d $corepath ]]; then
+	log_must rm -rf $corepath
+fi
+log_must mkdir $corepath
+
+log_must eval "zpool -? >/dev/null 2>&1"
 
 if is_global_zone ; then
 	log_must zpool freeze $TESTPOOL
@@ -57,21 +80,22 @@
 
 log_mustnot zpool freeze fakepool
 
-# Remove corefile possibly left by previous failing run of this test.
-[[ -f core ]] && log_must rm -f core
-
 if is_linux; then
-	ulimit -c unlimited
-	echo "core" >/proc/sys/kernel/core_pattern
+	echo $corefile >/proc/sys/kernel/core_pattern
 	echo 0 >/proc/sys/kernel/core_uses_pid
-	export ASAN_OPTIONS="abort_on_error=1:disable_coredump=0"
+elif is_freebsd; then
+	old_corefile=$(sysctl -n kern.corefile)
+	log_must sysctl kern.corefile=$corefile
 fi
+ulimit -c unlimited
 
-ZFS_ABORT=1; export ZFS_ABORT
-zpool > /dev/null 2>&1
+export ASAN_OPTIONS="abort_on_error=1:disable_coredump=0"
+export ZFS_ABORT=yes
+
+zpool >/dev/null 2>&1
+
 unset ZFS_ABORT
 
-[[ -f core ]] || log_fail "zpool did not dump core by request."
-[[ -f core ]] && log_must rm -f core
+[[ -f $corefile ]] || log_fail "zpool did not dump core by request."
 
 log_pass "Debugging features of zpool succeed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_colors.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_colors.ksh
new file mode 100755
index 0000000..18f2383
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool/zpool_colors.ksh

@@ -0,0 +1,91 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2019 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	Test that zpool status colored output works.
+#
+# STRATEGY:
+# 1. Create a pool with a bunch of errors and force fault one of the vdevs.
+# 2. Look for 'pool:' in bold.
+# 3. Look for 'DEGRADED' in yellow
+# 3. Look for 'FAULTED' in red
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	zinject -c all
+}
+
+log_onexit cleanup
+
+log_assert "Test colorized zpool status output"
+
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+DISK3="$(echo $DISKS | cut -d' ' -f3)"
+
+log_must dd if=/dev/urandom of=/$TESTDIR/testfile bs=10M count=1
+
+log_must zpool sync
+
+log_must zpool offline -f $TESTPOOL $DISK3
+log_must wait_for_degraded $TESTPOOL
+log_must zinject -d $DISK2 -e io -T read -f 20 $TESTPOOL
+log_must zinject -d $DISK2 -e io -T write -f 20 $TESTPOOL
+
+
+log_must zpool scrub -w $TESTPOOL
+log_must zinject -c all
+
+
+# Use 'script' to fake zpool status into thinking it's running in a tty.
+# Log the output here in case it's needed for postmortem.
+log_note "$(faketty TERM=xterm-256color ZFS_COLOR=1 zpool status)"
+
+# Replace the escape codes with "ESC" so they're easier to grep
+out="$(faketty TERM=xterm-256color ZFS_COLOR=1 zpool status | \
+    grep -E 'pool:|DEGRADED' | \
+    sed -r 's/[[:space:]]+//g;'$(echo -e 's/\033/ESC/g'))"
+
+log_note "$(echo $out)"
+
+log_note "Look for 'pool:' in bold"
+log_must eval "echo \"$out\" | grep -q 'ESC\[1mpool:ESC\[0m' "
+
+log_note "Look for 'DEGRADED' in yellow"
+log_must eval "echo \"$out\" | grep -q 'ESC\[0;33mDEGRADEDESC\[0m'"
+
+#
+# The escape code for 'FAULTED' is a little more tricky.  The line starts like
+# this:
+#
+# <start red escape code> loop2  FAULTED <end escape code>
+#
+# Luckily, awk counts the start and end escape codes as separate fields, so
+# we can easily remove the vdev field to get what we want.
+#
+out="$(faketty TERM=xterm-256color ZFS_COLOR=1 zpool status \
+    | awk '/FAULTED/{print $1$3$4}' | sed -r $(echo -e 's/\033/ESC/g'))"
+
+log_note "Look for 'FAULTED' in red"
+log_must eval "echo \"$out\" | grep -q 'ESC\[0;31mFAULTEDESC\[0m'"
+
+log_pass "zpool status displayed colors"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile.am
index a7f62b6..8d54d13 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile.am

@@ -14,7 +14,8 @@
 	zpool_add_010_pos.ksh \
 	add-o_ashift.ksh \
 	add_prop_ashift.ksh \
-	add_nested_replacing_spare.ksh
+	add_nested_replacing_spare.ksh \
+	zpool_add_dryrun_output.ksh
 
 dist_pkgdata_DATA = \
 	zpool_add.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
index 8556f29..0fa1c00 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh

@@ -22,10 +22,11 @@
 
 #
 # Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_add/zpool_add.kshlib
 
 #
 # DESCRIPTION:
@@ -35,25 +36,31 @@
 # STRATEGY:
 #	1. Create a pool with default values.
 #	2. Verify 'zpool add -o ashift=<n>' works with allowed values (9-16).
-#	3. Verify 'zpool add -o ashift=<n>' doesn't accept other invalid values.
+#	3. Verify setting kernel tunable for file vdevs works correctly.
+#	4. Verify 'zpool add -o ashift=<n>' doesn't accept other invalid values.
 #
 
 verify_runnable "global"
 
 function cleanup
 {
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
-	log_must rm -f $disk1 $disk2
+	rm -f $disk1 $disk2
 }
 
 log_assert "zpool add -o ashift=<n>' works with different ashift values"
 log_onexit cleanup
 
-disk1=$TEST_BASE_DIR/$FILEDISK0
-disk2=$TEST_BASE_DIR/$FILEDISK1
+disk1=$TEST_BASE_DIR/disk1
+disk2=$TEST_BASE_DIR/disk2
 log_must mkfile $SIZE $disk1
 log_must mkfile $SIZE $disk2
 
+logical_ashift=$(get_tunable VDEV_FILE_LOGICAL_ASHIFT)
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+max_auto_ashift=$(get_tunable VDEV_MAX_AUTO_ASHIFT)
+
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 for ashift in ${ashifts[@]}
 do
@@ -69,13 +76,32 @@
 	log_must zpool destroy $TESTPOOL
 	log_must zpool labelclear $disk1
 	log_must zpool labelclear $disk2
+
+	#
+	# Make sure we can also set the ashift using the tunable.
+	#
+	log_must zpool create $TESTPOOL $disk1
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $ashift
+	log_must zpool add $TESTPOOL $disk2
+	exp=$(( (ashift <= max_auto_ashift) ? ashift : logical_ashift ))
+	verify_ashift $disk2 $exp
+	if [[ $? -ne 0 ]]
+	then
+		log_fail "Device was added without setting ashift value to "\
+		    "$ashift"
+	fi
+	# clean things for the next run
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
+	log_must zpool destroy $TESTPOOL
+	log_must zpool labelclear $disk1
+	log_must zpool labelclear $disk2
 done
 
 typeset badvals=("off" "on" "1" "8" "17" "1b" "ff" "-")
 for badval in ${badvals[@]}
 do
 	log_must zpool create $TESTPOOL $disk1
-	log_mustnot zpool add $TESTPOOL -o ashift="$badval" $disk2
+	log_mustnot zpool add -o ashift="$badval" $TESTPOOL $disk2
 	# clean things for the next run
 	log_must zpool destroy $TESTPOOL
 	log_must zpool labelclear $disk1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh
index ec94df8..61f5f6d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh

@@ -25,7 +25,6 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
 
 #
 # DESCRIPTION:

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh
index 29debe1..4637fe0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_prop_ashift.ksh

@@ -22,6 +22,7 @@
 
 #
 # Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -43,6 +44,7 @@
 
 function cleanup
 {
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 	log_must rm -f $disk1 $disk2
 }
@@ -50,11 +52,19 @@
 log_assert "'zpool add' uses the ashift pool property value as default."
 log_onexit cleanup
 
-disk1=$TEST_BASE_DIR/$FILEDISK0
-disk2=$TEST_BASE_DIR/$FILEDISK1
+disk1=$TEST_BASE_DIR/disk1
+disk2=$TEST_BASE_DIR/disk2
 log_must mkfile $SIZE $disk1
 log_must mkfile $SIZE $disk2
 
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 for ashift in ${ashifts[@]}
 do
@@ -77,7 +87,7 @@
 	for cmdval in ${ashifts[@]}
 	do
 		log_must zpool create -o ashift=$ashift $TESTPOOL $disk1
-		log_must zpool add $TESTPOOL -o ashift=$cmdval $disk2
+		log_must zpool add -o ashift=$cmdval $TESTPOOL $disk2
 		verify_ashift $disk2 $cmdval
 		if [[ $? -ne 0 ]]
 		then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh
index 48a6bc3..33bd94f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/cleanup.ksh

@@ -32,11 +32,4 @@
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/cli_root/zpool_add/zpool_add.kshlib
 
-DISK=${DISKS%% *}
-if is_mpath_device $DISK; then
-        delete_partitions
-fi
-
-cleanup_devices $DISKS
-
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/setup.ksh
index 4b5f44a..13bd33e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/setup.ksh

@@ -34,28 +34,4 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
-	log_unsupported "This directory cannot be run on raw files."
-fi
-
-disk1=${DISKS%% *}
-if is_mpath_device $disk1; then
-        delete_partitions
-fi
-
-if [[ -n $DISK ]]; then
-	#
-        # Use 'zpool create' to clean up the information in
-        # in the given disk to avoid slice overlapping.
-        #
-	cleanup_devices $DISK
-
-        partition_disk $SIZE $DISK 7
-else
-	for disk in `echo $DISKSARRAY`; do
-		cleanup_devices $disk
-		partition_disk $SIZE $disk 7
-	done
-fi
-
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.cfg
index e4429b2..a634b8b 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.cfg

@@ -28,59 +28,12 @@
 # Copyright (c) 2012, 2015 by Delphix. All rights reserved.
 #
 
-export DISK_ARRAY_NUM=0
-export DISK_ARRAY_LIMIT=4
-export DISKSARRAY=""
-
-function set_disks
-{
-        set -A disk_array $(find_disks $DISKS)
-
-        if (( ${#disk_array[*]} <= 1 )); then
-                export DISK=${DISKS%% *}
-        else
-                export DISK=""
-                typeset -i i=0
-                while (( i < ${#disk_array[*]} )); do
-                        export DISK${i}="${disk_array[$i]}"
-                        DISKSARRAY="$DISKSARRAY ${disk_array[$i]}"
-                        (( i = i + 1 ))
-                        (( i>$DISK_ARRAY_LIMIT )) && break
-                done
-                export DISK_ARRAY_NUM=$i
-                export DISKSARRAY
-        fi
-
-	if (( $DISK_ARRAY_NUM == 0 )); then
-		export disk=$DISK
-	else
-		export disk=$DISK0
-	fi
-
-}
-
-set_disks
-
 export SIZE="$(((MINVDEVSIZE / (1024 * 1024)) * 2))m"
+export VOLSIZE=$MINVDEVSIZE
+
+echo $DISKS | read DISK0 DISK1 DISK2
 
 if is_linux; then
+	export DISK_ARRAY_NUM=3
 	set_device_dir
-	set_slice_prefix
-	export SLICE0=1
-	export SLICE1=2
-	export SLICE3=4
-	export SLICE4=5
-	export SLICE5=6
-	export SLICE6=7
-else
-	export DEV_DSKDIR="/dev"
-	export SLICE_PREFIX="s"
-	export SLICE0=0
-	export SLICE1=1
-	export SLICE3=3
-	export SLICE4=4
-	export SLICE5=5
-	export SLICE6=6
 fi
-
-export VOLSIZE=$MINVDEVSIZE

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib
index 94615ee..a7a1fb3 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add.kshlib

@@ -40,12 +40,12 @@
 	typeset vfstabdevs=""
 	typeset line
 
-	if is_linux; then
-		vfstab="/etc/fstab"
-		tmpfile="$TEST_BASE_DIR/fstab.tmp"
-	else
+	if is_illumos; then
 		vfstab="/etc/vfstab"
 		tmpfile="$TEST_BASE_DIR/vfstab.tmp"
+	else
+		vfstab="/etc/fstab"
+		tmpfile="$TEST_BASE_DIR/fstab.tmp"
 	fi
 
 	cat $vfstab | grep "^${DEV_DSKDIR}" >$tmpfile
@@ -69,7 +69,12 @@
 	typeset mnttabdevs=""
 	typeset line
 
-	if is_linux; then
+	if is_freebsd; then
+		# FreeBSD doesn't have a mnttab file.
+		mount -p | awk -v dir="^${DEV_DSKDIR}" \
+		    '$1 ~ dir { print $1 }' | xargs
+		return 0
+	elif is_linux; then
 		typeset mnttab="/etc/mtab"
 		typeset tmpfile="$TEST_BASE_DIR/mtab.tmp"
 	else
@@ -95,31 +100,12 @@
 function save_dump_dev
 {
 
-	typeset dumpdev
+	typeset dumpdev=""
 
-	if is_linux; then
-		dumpdev=""
-	else
+	if is_illumos; then
 		typeset fnd="Dump device"
 		dumpdev=`dumpadm | grep "$fnd" | cut -f2 -d : | \
 			awk '{print $1}'`
 	fi
 	echo $dumpdev
 }
-
-#
-# Common cleanup routine for partitions used in testing
-#
-function partition_cleanup
-{
-
-	if [[ -n $DISK ]]; then
-		partition_disk $SIZE $DISK 7
-	else
-		typeset disk=""
-		for disk in $DISK0 $DISK1; do
-			partition_disk $SIZE $disk 7
-		done
-	fi
-
-}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh
index a0fc0ea..191ec83 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_001_pos.ksh

@@ -47,61 +47,41 @@
 
 function cleanup
 {
-	poolexists $TESTPOOL && \
-		destroy_pool $TESTPOOL
-
-	partition_cleanup
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm -f $disk0 $disk1
 }
 
 log_assert "'zpool add <pool> <vdev> ...' can add devices to the pool."
 
 log_onexit cleanup
 
-set -A keywords "" "mirror" "raidz" "raidz1" "spare"
+set -A keywords "" "mirror" "raidz" "raidz1" "draid:1s" "draid1:1s" "spare"
 
-case $DISK_ARRAY_NUM in
-0|1)
-	pooldevs="${disk}${SLICE_PREFIX}${SLICE0} \
-		${DEV_DSKDIR}/${disk}${SLICE_PREFIX}${SLICE0} \
-		\"${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1}\""
-	mirrordevs="\"${DEV_DSKDIR}/${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1}\""
-	raidzdevs="\"${DEV_DSKDIR}/${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1}\""
+pooldevs="${DISK0} \
+	\"${DISK0} ${DISK1}\" \
+	\"${DISK0} ${DISK1} ${DISK2}\""
+mirrordevs="\"${DISK0} ${DISK1}\""
+raidzdevs="\"${DISK0} ${DISK1}\""
+draiddevs="\"${DISK0} ${DISK1} ${DISK2}\""
 
-	;;
-2|*)
-	pooldevs="${DISK0}${SLICE_PREFIX}${SLICE0} \
-		\"${DEV_DSKDIR}/${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0}\" \
-		\"${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK0}${SLICE_PREFIX}${SLICE1} \
-		${DISK1}${SLICE_PREFIX}${SLICE1}\"\
-		\"${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0} \
-		${DISK0}${SLICE_PREFIX}${SLICE1}\
-		${DISK1}${SLICE_PREFIX}${SLICE1}\""
-	mirrordevs="\"${DEV_DSKDIR}/${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0}\""
-	raidzdevs="\"${DEV_DSKDIR}/${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0}\""
-
-	;;
-esac
+disk0=$TEST_BASE_DIR/disk0
+disk1=$TEST_BASE_DIR/disk1
+disk2=$TEST_BASE_DIR/disk2
+truncate -s $MINVDEVSIZE $disk0 $disk1 $disk2
 
 typeset -i i=0
 typeset vdev
 eval set -A poolarray $pooldevs
 eval set -A mirrorarray $mirrordevs
 eval set -A raidzarray $raidzdevs
+eval set -A draidarray $draiddevs
 
 while (( $i < ${#keywords[*]} )); do
 
         case ${keywords[i]} in
         ""|spare)
 		for vdev in "${poolarray[@]}"; do
-			create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE6}"
+			create_pool "$TESTPOOL" "$disk0"
 			log_must poolexists "$TESTPOOL"
 			log_must zpool add -f "$TESTPOOL" ${keywords[i]} $vdev
 			log_must vdevs_in_pool "$TESTPOOL" "$vdev"
@@ -112,8 +92,7 @@
         mirror)
 		for vdev in "${mirrorarray[@]}"; do
 			create_pool "$TESTPOOL" "${keywords[i]}" \
-				"${disk}${SLICE_PREFIX}${SLICE4}" \
-				"${disk}${SLICE_PREFIX}${SLICE5}"
+				"$disk0" "$disk1"
 			log_must poolexists "$TESTPOOL"
 			log_must zpool add "$TESTPOOL" ${keywords[i]} $vdev
 			log_must vdevs_in_pool "$TESTPOOL" "$vdev"
@@ -124,8 +103,7 @@
         raidz|raidz1)
 		for vdev in "${raidzarray[@]}"; do
 			create_pool "$TESTPOOL" "${keywords[i]}" \
-				"${disk}${SLICE_PREFIX}${SLICE4}" \
-				"${disk}${SLICE_PREFIX}${SLICE5}"
+				"$disk0" "$disk1"
 			log_must poolexists "$TESTPOOL"
 			log_must zpool add "$TESTPOOL" ${keywords[i]} $vdev
 			log_must vdevs_in_pool "$TESTPOOL" "$vdev"
@@ -133,6 +111,19 @@
 		done
 
 		;;
+        draid:1s|draid1:1s)
+		for vdev in "${draidarray[@]}"; do
+			create_pool "$TESTPOOL" "${keywords[i]}" \
+				"$disk0" "$disk1" "$disk2"
+			log_must poolexists "$TESTPOOL"
+			log_must zpool add "$TESTPOOL" ${keywords[i]} $vdev
+			log_must vdevs_in_pool "$TESTPOOL" "$vdev"
+			log_must vdevs_in_pool "$TESTPOOL" "draid1-0-0"
+			log_must vdevs_in_pool "$TESTPOOL" "draid1-1-0"
+			destroy_pool "$TESTPOOL"
+		done
+
+		;;
         esac
 
         (( i = i+1 ))

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_002_pos.ksh
index eb49231..67810bb 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_002_pos.ksh

@@ -48,10 +48,7 @@
 
 function cleanup
 {
-        poolexists $TESTPOOL && \
-                destroy_pool $TESTPOOL
-
-	partition_cleanup
+        poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_assert "'zpool add -f <pool> <vdev> ...' can successfully add" \
@@ -59,14 +56,13 @@
 
 log_onexit cleanup
 
-create_pool "$TESTPOOL" mirror "${disk}${SLICE_PREFIX}${SLICE0}" \
-	"${disk}${SLICE_PREFIX}${SLICE1}"
-log_must poolexists "$TESTPOOL"
+create_pool $TESTPOOL mirror $DISK0 $DISK1
+log_must poolexists $TESTPOOL
 
-log_mustnot zpool add "$TESTPOOL" ${disk}${SLICE_PREFIX}${SLICE3}
-log_mustnot vdevs_in_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE3}"
+log_mustnot zpool add $TESTPOOL $DISK2
+log_mustnot vdevs_in_pool $TESTPOOL $DISK2
 
-log_must zpool add -f "$TESTPOOL" ${disk}${SLICE_PREFIX}${SLICE3}
-log_must vdevs_in_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE3}"
+log_must zpool add -f $TESTPOOL $DISK2
+log_must vdevs_in_pool $TESTPOOL $DISK2
 
 log_pass "'zpool add -f <pool> <vdev> ...' executes successfully."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_003_pos.ksh
index cfdc29d..a6b03ff 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_003_pos.ksh

@@ -34,26 +34,23 @@
 
 #
 # DESCRIPTION:
-#	'zpool add -n <pool> <vdev> ...' can display the configuration without
-# adding the specified devices to given pool
+# 'zpool add -n <pool> <vdev> ...' can display the configuration without adding
+# the specified devices to given pool
 #
 # STRATEGY:
-#	1. Create a storage pool
-#	2. Use -n to add a device to the pool
-#	3. Verify the device is not added actually
+# 1. Create a storage pool
+# 2. Use -n to add devices to the pool
+# 3. Verify the devices are not added actually
+# 4. Add devices to the pool for real this time, verify the vdev tree is the
+#    same printed by the dryrun iteration
 #
 
 verify_runnable "global"
 
 function cleanup
 {
-        poolexists $TESTPOOL && \
-                destroy_pool $TESTPOOL
-
-	partition_cleanup
-
-	[[ -e $tmpfile ]] && \
-		log_must rm -f $tmpfile
+	destroy_pool $TESTPOOL
+	rm -f $TMPFILE_PREFIX* $VDEV_PREFIX*
 }
 
 log_assert "'zpool add -n <pool> <vdev> ...' can display the configuration" \
@@ -61,18 +58,40 @@
 
 log_onexit cleanup
 
-tmpfile="$TEST_BASE_DIR/zpool_add_003.tmp$$"
+typeset TMPFILE_PREFIX="$TEST_BASE_DIR/zpool_add_003"
+typeset STR_DRYRUN="would update '$TESTPOOL' to the following configuration:"
+typeset VDEV_PREFIX="$TEST_BASE_DIR/filedev"
+typeset -a VDEV_TYPES=("" "dedup" "special" "log" "cache" "spare")
 
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
+vdevs=""
+config=""
+
+# 1. Create a storage pool
+log_must truncate -s $SPA_MINDEVSIZE "$VDEV_PREFIX-root"
+log_must zpool create "$TESTPOOL" "$VDEV_PREFIX-root"
 log_must poolexists "$TESTPOOL"
+for vdevtype in "${VDEV_TYPES[@]}"; do
+	log_must truncate -s $SPA_MINDEVSIZE "$VDEV_PREFIX-$vdevtype"
+	vdevs="$vdevs $VDEV_PREFIX-$vdevtype"
+	config="$config $vdevtype $VDEV_PREFIX-$vdevtype"
+done
 
-zpool add -n "$TESTPOOL" ${disk}${SLICE_PREFIX}${SLICE1} > $tmpfile
+# 2. Use -n to add devices to the pool
+log_must eval "zpool add -f -n $TESTPOOL $config > $TMPFILE_PREFIX-dryrun"
+log_must grep -q "$STR_DRYRUN" "$TMPFILE_PREFIX-dryrun"
 
-log_mustnot vdevs_in_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE1}"
+# 3. Verify the devices are not added actually
+for vdev in $vdevs; do
+	log_mustnot vdevs_in_pool "$TESTPOOL" "$vdev"
+done
 
-str="would update '$TESTPOOL' to the following configuration:"
-cat $tmpfile | grep "$str" >/dev/null 2>&1
-(( $? != 0 )) && \
-	 log_fail "'zpool add -n <pool> <vdev> ...' is executed as unexpected"
+# 4. Add devices to the pool for real this time, verify the vdev tree is the
+#    same printed by the dryrun iteration
+log_must zpool add -f $TESTPOOL $config
+zpool status $TESTPOOL | awk 'NR == 1, /NAME/ { next } /^$/ {exit}
+	{print $1}' > "$TMPFILE_PREFIX-vdevtree"
+cat "$TMPFILE_PREFIX-dryrun" | awk 'NR == 1, /would/ {next}
+	/^$/ {next} {print $1}' > "$TMPFILE_PREFIX-vdevtree-n"
+log_must eval "diff $TMPFILE_PREFIX-vdevtree-n $TMPFILE_PREFIX-vdevtree"
 
-log_pass "'zpool add -n <pool> <vdev> ...'executes successfully."
+log_pass "'zpool add -n <pool> <vdev> ...' executes successfully."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_004_pos.ksh
index 61ce4ec..64e5296 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_004_pos.ksh

@@ -47,31 +47,30 @@
 
 function cleanup
 {
-	poolexists $TESTPOOL && \
-		destroy_pool "$TESTPOOL"
-
-	datasetexists $TESTPOOL1/$TESTVOL && \
-		log_must zfs destroy -f $TESTPOOL1/$TESTVOL
-	poolexists $TESTPOOL1 && \
-		destroy_pool "$TESTPOOL1"
-
-	partition_cleanup
-
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+	if [ -n "$recursive" ]; then
+		set_tunable64 VOL_RECURSIVE $recursive
+	fi
 }
 
 log_assert "'zpool add <pool> <vdev> ...' can add zfs volume to the pool."
 
 log_onexit cleanup
 
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
-log_must poolexists "$TESTPOOL"
+create_pool $TESTPOOL $DISK0
+log_must poolexists $TESTPOOL
 
-create_pool "$TESTPOOL1" "${disk}${SLICE_PREFIX}${SLICE1}"
-log_must poolexists "$TESTPOOL1"
+create_pool $TESTPOOL1 $DISK1
+log_must poolexists $TESTPOOL1
 log_must zfs create -V $VOLSIZE $TESTPOOL1/$TESTVOL
 block_device_wait
 
-log_must zpool add "$TESTPOOL" $ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL
+if is_freebsd; then
+	recursive=$(get_tunable VOL_RECURSIVE)
+	log_must set_tunable64 VOL_RECURSIVE 1
+fi
+log_must zpool add $TESTPOOL $ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL
 
 log_must vdevs_in_pool "$TESTPOOL" "$ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_005_pos.ksh
index 1516cb2..c40f8db 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_005_pos.ksh

@@ -50,16 +50,12 @@
 
 function cleanup
 {
-	poolexists "$TESTPOOL" && \
-		destroy_pool "$TESTPOOL"
-	poolexists "$TESTPOOL1" && \
-		destroy_pool "$TESTPOOL1"
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
 
 	if [[ -n $saved_dump_dev ]]; then
 		log_must eval "dumpadm -u -d $saved_dump_dev > /dev/null"
 	fi
-
-	partition_cleanup
 }
 
 log_assert "'zpool add' should fail with inapplicable scenarios."
@@ -69,27 +65,27 @@
 mnttab_dev=$(find_mnttab_dev)
 vfstab_dev=$(find_vfstab_dev)
 saved_dump_dev=$(save_dump_dev)
-dump_dev=${disk}${SLICE_PREFIX}${SLICE3}
+dump_dev=$DISK2
 
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
-log_must poolexists "$TESTPOOL"
+create_pool $TESTPOOL $DISK0
+log_must poolexists $TESTPOOL
 
-create_pool "$TESTPOOL1" "${disk}${SLICE_PREFIX}${SLICE1}"
-log_must poolexists "$TESTPOOL1"
+create_pool $TESTPOOL1 $DISK1
+log_must poolexists $TESTPOOL1
 
 unset NOINUSE_CHECK
-log_mustnot zpool add -f "$TESTPOOL" ${disk}${SLICE_PREFIX}${SLICE1}
-log_mustnot zpool add -f "$TESTPOOL" $mnttab_dev
+log_mustnot zpool add -f $TESTPOOL $DISK1
+log_mustnot zpool add -f $TESTPOOL $mnttab_dev
 if is_linux; then
-       log_mustnot zpool add "$TESTPOOL" $vfstab_dev
+       log_mustnot zpool add $TESTPOOL $vfstab_dev
 else
-       log_mustnot zpool add -f "$TESTPOOL" $vfstab_dev
+       log_mustnot zpool add -f $TESTPOOL $vfstab_dev
 fi
 
-if ! is_linux; then
-	log_must echo "y" | newfs ${DEV_DSKDIR}/$dump_dev > /dev/null 2>&1
-	log_must dumpadm -u -d ${DEV_DSKDIR}/$dump_dev > /dev/null
-	log_mustnot zpool add -f "$TESTPOOL" $dump_dev
+if is_illumos; then
+	log_must eval "new_fs ${DEV_DSKDIR}/$dump_dev > /dev/null 2>&1"
+	log_must eval "dumpadm -u -d ${DEV_DSKDIR}/$dump_dev > /dev/null"
+	log_mustnot zpool add -f $TESTPOOL $dump_dev
 fi
 
 log_pass "'zpool add' should fail with inapplicable scenarios."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_006_pos.ksh
index 6d47365..2c3f488 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_006_pos.ksh

@@ -46,14 +46,8 @@
 
 function cleanup
 {
-	poolexists $TESTPOOL1 && \
-		destroy_pool $TESTPOOL1
-
-	poolexists $TESTPOOL && \
-		destroy_pool $TESTPOOL
-
-	[[ -d $TESTDIR ]] && log_must rm -rf $TESTDIR
-	partition_cleanup
+	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+	rm -rf $TESTDIR
 }
 
 log_assert "Adding a large number of file based vdevs to a zpool works."
@@ -66,12 +60,12 @@
 vdevs_list=$(echo $TESTDIR/file.{01..16})
 log_must truncate -s $MINVDEVSIZE $vdevs_list
 
-log_must zpool add -f "$TESTPOOL1" $vdevs_list
-log_must vdevs_in_pool "$TESTPOOL1" "$vdevs_list"
+log_must zpool add -f $TESTPOOL1 $vdevs_list
+log_must vdevs_in_pool $TESTPOOL1 "$vdevs_list"
 
 # Attempt to add a file based vdev that's too small.
 log_must truncate -s 32m $TESTDIR/broken_file
-log_mustnot zpool add -f "$TESTPOOL1" ${TESTDIR}/broken_file
-log_mustnot vdevs_in_pool "$TESTPOOL1" "${TESTDIR}/broken_file"
+log_mustnot zpool add -f $TESTPOOL1 ${TESTDIR}/broken_file
+log_mustnot vdevs_in_pool $TESTPOOL1 ${TESTDIR}/broken_file
 
 log_pass "Adding a large number of file based vdevs to a zpool works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_007_neg.ksh
index 081815b..4e9535c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_007_neg.ksh

@@ -46,10 +46,7 @@
 
 function cleanup
 {
-	poolexists "$TESTPOOL" && \
-		destroy_pool "$TESTPOOL"
-
-	partition_cleanup
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_assert "'zpool add' should return an error with badly-formed parameters."
@@ -57,10 +54,10 @@
 log_onexit cleanup
 
 set -A args "" "-f" "-n" "-?" "-nf" "-fn" "-f -n" "--f" "-blah" \
-	"-? $TESTPOOL ${disk}${SLICE_PREFIX}${SLICE1}"
+	"-? $TESTPOOL $DISK1"
 
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
-log_must poolexists "$TESTPOOL"
+create_pool $TESTPOOL $DISK0
+log_must poolexists $TESTPOOL
 
 typeset -i i=0
 while (( $i < ${#args[*]} )); do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_008_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_008_neg.ksh
index edcdd32..77a899f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_008_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_008_neg.ksh

@@ -46,22 +46,18 @@
 
 function cleanup
 {
-
-        poolexists "$TESTPOOL" && \
-                destroy_pool "$TESTPOOL"
-
-	partition_cleanup
+        poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_assert "'zpool add' should return an error with nonexistent pools and vdevs"
 
 log_onexit cleanup
 
-set -A args "" "-f nonexistent_pool ${disk}${SLICE_PREFIX}${SLICE1}" \
+set -A args "" "-f nonexistent_pool $DISK1" \
 	"-f $TESTPOOL nonexistent_vdev"
 
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
-log_must poolexists "$TESTPOOL"
+create_pool $TESTPOOL $DISK0
+log_must poolexists $TESTPOOL
 
 typeset -i i=0
 while (( $i < ${#args[*]} )); do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh
index 1fc1a04..7ffe951 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_009_neg.ksh

@@ -47,12 +47,7 @@
 
 function cleanup
 {
-
-        poolexists "$TESTPOOL" && \
-                destroy_pool "$TESTPOOL"
-
-	partition_cleanup
-
+        poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_assert "'zpool add' should fail if vdevs are the same or vdev is " \
@@ -60,12 +55,11 @@
 
 log_onexit cleanup
 
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
-log_must poolexists "$TESTPOOL"
+create_pool $TESTPOOL $DISK0
+log_must poolexists $TESTPOOL
 
-log_mustnot zpool add -f "$TESTPOOL" ${disk}${SLICE_PREFIX}${SLICE1} \
-	${disk}${SLICE_PREFIX}${SLICE1}
-log_mustnot zpool add -f "$TESTPOOL" ${disk}${SLICE_PREFIX}${SLICE0}
+log_mustnot zpool add -f $TESTPOOL $DISK1 $DISK1
+log_mustnot zpool add -f $TESTPOOL $DISK0
 
 log_pass "'zpool add' get fail as expected if vdevs are the same or vdev is " \
 	"contained in the given pool."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_010_pos.ksh
index 8b8eade..771b689 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_010_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_010_pos.ksh

@@ -51,7 +51,7 @@
 
 	typeset -i i=0
 	while ((i < 10)); do
-		log_must rm -f $TEST_BASE_DIR/vdev$i
+		rm -f $TEST_BASE_DIR/vdev$i
 		((i += 1))
 	done
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh
new file mode 100755
index 0000000..73dec92
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh

@@ -0,0 +1,175 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Attila Fülöp <attila@fueloep.org>
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+typeset STR_DRYRUN="would update '$TESTPOOL' to the following configuration:"
+typeset VDEV_PREFIX="$TEST_BASE_DIR/filedev"
+
+#
+# DESCRIPTION:
+# 'zpool add -n <pool> <vdev> ...' can display the correct configuration
+#
+# STRATEGY:
+# 1. Create different storage pools, use -n to add devices to the pool and
+#    verify the output is as expected.
+# 2. Create a pool with a hole vdev and verify it's not listed with add -n.
+#
+
+typeset -a dev=(
+	"${VDEV_PREFIX}00" "${VDEV_PREFIX}01" "${VDEV_PREFIX}02"
+	"${VDEV_PREFIX}03" "${VDEV_PREFIX}04" "${VDEV_PREFIX}05"
+	"${VDEV_PREFIX}06" "${VDEV_PREFIX}07" "${VDEV_PREFIX}08"
+	"${VDEV_PREFIX}09" "${VDEV_PREFIX}10" "${VDEV_PREFIX}11"
+)
+
+typeset -a tests=(
+    (
+	tree="'${dev[0]}' log '${dev[1]}' special '${dev[2]}' dedup '${dev[3]}'"
+	add="spare '${dev[4]}' cache '${dev[5]}'"
+	want="$STR_DRYRUN
+
+	$TESTPOOL
+	  ${dev[0]}
+	dedup
+	  ${dev[3]}
+	special
+	  ${dev[2]}
+	logs
+	  ${dev[1]}
+	cache
+	  ${dev[5]}
+	spares
+	  ${dev[4]}"
+    )
+    (
+	tree="'${dev[0]}' log '${dev[1]}' special '${dev[2]}' dedup '${dev[3]}' \
+	    spare '${dev[4]}' cache '${dev[5]}'"
+
+	add="'${dev[6]}' log '${dev[7]}' special '${dev[8]}' dedup '${dev[9]}' \
+	    spare '${dev[10]}' cache '${dev[11]}'"
+
+	want="$STR_DRYRUN
+
+	$TESTPOOL
+	  ${dev[0]}
+	  ${dev[6]}
+	dedup
+	  ${dev[3]}
+	  ${dev[9]}
+	special
+	  ${dev[2]}
+	  ${dev[8]}
+	logs
+	  ${dev[1]}
+	  ${dev[7]}
+	cache
+	  ${dev[5]}
+	  ${dev[11]}
+	spares
+	  ${dev[4]}
+	  ${dev[10]}"
+    )
+    (
+	tree="mirror '${dev[0]}' '${dev[1]}' \
+	    log mirror '${dev[2]}' '${dev[3]}' \
+	    dedup mirror '${dev[6]}' '${dev[7]}' \
+	    spare '${dev[8]}'"
+
+	add="special mirror '${dev[4]}' '${dev[5]}' \
+	    spare '${dev[9]}' cache '${dev[10]}' '${dev[11]}'"
+
+	want="$STR_DRYRUN
+
+	$TESTPOOL
+	  mirror-0
+	    ${dev[0]}
+	    ${dev[1]}
+	dedup
+	  mirror
+	    ${dev[6]}
+	    ${dev[7]}
+	special
+	  mirror
+	    ${dev[4]}
+	    ${dev[5]}
+	logs
+	  mirror
+	    ${dev[2]}
+	    ${dev[3]}
+	cache
+	  ${dev[10]}
+	  ${dev[11]}
+	spares
+	  ${dev[8]}
+	  ${dev[9]}"
+    )
+)
+
+verify_runnable "global"
+
+function cleanup
+{
+	destroy_pool "$TESTPOOL"
+	rm -f "$VDEV_PREFIX"*
+}
+
+log_assert "'zpool add -n <pool> <vdev> ...' can display the configuration"
+
+log_onexit cleanup
+
+# Create needed file vdevs.
+for (( i=0; i < ${#dev[@]}; i+=1 )); do
+	log_must truncate -s $SPA_MINDEVSIZE "${dev[$i]}"
+done
+
+# Foreach test create pool, add -n devices and check output.
+for (( i=0; i < ${#tests[@]}; i+=1 )); do
+	typeset tree="${tests[$i].tree}"
+	typeset add="${tests[$i].add}"
+	typeset want="${tests[$i].want}"
+
+	log_must eval zpool create "$TESTPOOL" $tree
+	log_must poolexists "$TESTPOOL"
+	typeset out="$(log_must eval "zpool add -n '$TESTPOOL' $add" | \
+	    sed /^SUCCESS/d)"
+
+	if [[ "$out" != "$want" ]]; then
+		log_fail "Got:\n" "$out" "\nbut expected:\n" "$want"
+	fi
+	log_must destroy_pool "$TESTPOOL"
+done
+
+# Make sure hole vdevs are skipped in output.
+log_must eval "zpool create '$TESTPOOL' '${dev[0]}' log '${dev[1]}' \
+    cache '${dev[2]}'"
+
+# Create a hole vdev.
+log_must eval "zpool remove '$TESTPOOL' '${dev[1]}'"
+log_mustnot eval "zpool add -n '$TESTPOOL' '${dev[1]}' | \
+    grep -qE '[[:space:]]+hole'"
+
+log_pass "'zpool add -n <pool> <vdev> ...' displays config correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh
index fd33fb9..618c699 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_attach/attach-o_ashift.ksh

@@ -22,6 +22,7 @@
 
 #
 # Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -41,19 +42,27 @@
 
 function cleanup
 {
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
 	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
-	log_must rm -f $disk1
-	log_must rm -f $disk2
+	rm -f $disk1 $disk2
 }
 
 log_assert "zpool attach -o ashift=<n>' works with different ashift values"
 log_onexit cleanup
 
-disk1=$TEST_BASE_DIR/$FILEDISK0
-disk2=$TEST_BASE_DIR/$FILEDISK1
+disk1=$TEST_BASE_DIR/disk1
+disk2=$TEST_BASE_DIR/disk2
 log_must truncate -s $SIZE $disk1
 log_must truncate -s $SIZE $disk2
 
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 for ashift in ${ashifts[@]}
 do
@@ -92,7 +101,7 @@
 for badval in ${badvals[@]}
 do
 	log_must zpool create $TESTPOOL1 $disk1
-	log_mustnot zpool attach $TESTPOOL1 -o ashift=$badval $disk1 $disk2
+	log_mustnot zpool attach -o ashift=$badval $TESTPOOL1 $disk1 $disk2
 	log_must zpool destroy $TESTPOOL1
 	log_must zpool labelclear $disk1
 	log_mustnot zpool labelclear $disk2

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh
index 79ceaab..98b4140 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh

@@ -176,11 +176,7 @@
 	dd if=/dev/zero of=$fbase.$i seek=512 bs=1024 count=$wcount conv=notrunc \
 			> /dev/null 2>&1
 	log_must sync
-	log_must zpool scrub $TESTPOOL1
-	# Wait for the completion of scrub operation
-	while is_pool_scrubbing $TESTPOOL1; do
-		sleep 1
-	done
+	log_must zpool scrub -w $TESTPOOL1
 
 	check_err $TESTPOOL1 && \
 		log_fail "No error generated."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
index 3c59593..5ffaae5 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am

@@ -27,14 +27,24 @@
 	zpool_create_024_pos.ksh \
 	zpool_create_encrypted.ksh \
 	zpool_create_crypt_combos.ksh \
+	zpool_create_draid_001_pos.ksh \
+	zpool_create_draid_002_pos.ksh \
+	zpool_create_draid_003_pos.ksh \
+	zpool_create_draid_004_pos.ksh \
 	zpool_create_features_001_pos.ksh \
 	zpool_create_features_002_pos.ksh \
 	zpool_create_features_003_pos.ksh \
 	zpool_create_features_004_neg.ksh \
 	zpool_create_features_005_pos.ksh \
+	zpool_create_features_006_pos.ksh \
+	zpool_create_features_007_pos.ksh \
+	zpool_create_features_008_pos.ksh \
+	zpool_create_features_009_pos.ksh \
 	create-o_ashift.ksh \
-	zpool_create_tempname.ksh
+	zpool_create_tempname.ksh \
+	zpool_create_dryrun_output.ksh
 
 dist_pkgdata_DATA = \
+	draidcfg.gz \
 	zpool_create.cfg \
 	zpool_create.shlib

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh
index d3134a7..a504877 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/cleanup.ksh

@@ -32,8 +32,6 @@
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
 
-clean_blockfile "$TESTDIR $TESTDIR0 $TESTDIR1"
-
 cleanup_devices $DISKS
 
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/create-o_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/create-o_ashift.ksh
index 6a9c3e2..2c1f6e0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/create-o_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/create-o_ashift.ksh

@@ -44,8 +44,8 @@
 
 function cleanup
 {
-	destroy_pool $TESTPOOL
-	log_must rm -f $disk
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm -f $disk
 }
 
 #
@@ -73,15 +73,21 @@
 	typeset device=$1
 	typeset ubcount=$2
 
-	zdb -quuul $device | egrep '^(\s+)?Uberblock' |
-	    awk -v ubcount=$ubcount 'BEGIN { count=0 } { uberblocks[$0]++; }
+	zdb -quuul $device | awk -v ubcount=$ubcount '
+	    /Uberblock/ && ! /invalid/ { uberblocks[$0]++ }
 	    END {
+	        count = 0
 	        for (i in uberblocks) {
-	            if (i ~ /invalid/) { continue; }
-	            if (uberblocks[i] != 4) { exit 1; }
+	            if (uberblocks[i] != 4) {
+	                printf "%s count: %s != 4\n", i, uberblocks[i]
+	                exit 1
+	            }
 	            count++;
 	        }
-	        if (count != ubcount) { exit 1; }
+	        if (count != ubcount) {
+	            printf "Total uberblock count: %s != %s\n", count, ubcount
+	            exit 1
+	        }
 	    }'
 
 	return $?
@@ -90,8 +96,7 @@
 log_assert "zpool create -o ashift=<n>' works with different ashift values"
 log_onexit cleanup
 
-disk=$TEST_BASE_DIR/$FILEDISK0
-log_must mkfile $SIZE $disk
+disk=$(create_blockfile $SIZE)
 
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 # since Illumos 4958 the largest uberblock is 8K so we have at least of 16/label
@@ -117,7 +122,7 @@
 	# clean things for the next run
 	log_must zpool destroy $TESTPOOL
 	log_must zpool labelclear $disk
-	log_must eval "verify_device_uberblocks $disk 0"
+	log_must verify_device_uberblocks $disk 0
 	((i = i + 1))
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/draidcfg.gz b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/draidcfg.gz
new file mode 100644
index 0000000..b8c0a58
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/draidcfg.gz
Binary files differ

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/setup.ksh
index efdafe5..115126b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/setup.ksh

@@ -34,24 +34,4 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
-	log_unsupported "This directory cannot be run on raw files."
-fi
-
-if [[ -n $DISK ]]; then
-	#
-        # Use 'zpool create' to clean up the information in
-        # in the given disk to avoid slice overlapping.
-        #
-	cleanup_devices $DISK
-
-        partition_disk $((($MINVDEVSIZE / (1024 * 1024)) * 2))m $DISK 7
-else
-	for disk in `echo $DISKSARRAY`; do
-		cleanup_devices $disk
-
-		partition_disk $((($MINVDEVSIZE / (1024 * 1024)) * 2))m $disk 7
-	done
-fi
-
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.cfg
index d58cece..976570d 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.cfg

@@ -30,71 +30,23 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-export DISK_ARRAY_NUM=0
-export DISK_ARRAY_LIMIT=4
-export DISKSARRAY=""
-
-function set_disks
-{
-        typeset -a disk_array=($(find_disks $DISKS))
-
-	if (( ${#disk_array[*]} <= 1 )); then
-		export DISK=${DISKS%% *}
-		export DISK_ARRAY_NUM=1
-	else
-		export DISK=""
-		typeset -i i=0
-		while (( i < ${#disk_array[*]} )); do
-			export DISK${i}="${disk_array[$i]}"
-			DISKSARRAY="$DISKSARRAY ${disk_array[$i]}"
-			(( i = i + 1 ))
-			(( i>$DISK_ARRAY_LIMIT )) && break
-		done
-		export DISK_ARRAY_NUM=$i
-		export DISKSARRAY
-	fi
-}
-
-set_disks
+typeset -a disk_array=($(find_disks $DISKS))
+typeset DISKSARRAY=""
+typeset -i DISK_ARRAY_LIMIT=4
+typeset -i i=0
+while (( i < ${#disk_array[*]} && i <= $DISK_ARRAY_LIMIT )); do
+	export DISK${i}="${disk_array[$i]}"
+	DISKSARRAY="$DISKSARRAY ${disk_array[$i]}"
+	(( i = i + 1 ))
+done
+export DISK_ARRAY_NUM=$i
+export DISKSARRAY
 
 export FILESIZE="$MINVDEVSIZE"
 export FILESIZE1="$(($MINVDEVSIZE * 2))"
 export SIZE="$((MINVDEVSIZE / (1024 * 1024)))"m
 export SIZE1="$(($MINVDEVSIZE * 2 / (1024 * 1024)))m"
 
-if is_linux; then
-	set_device_dir
-	set_slice_prefix
-	export SLICE0=1
-	export SLICE1=2
-	export SLICE2=3
-	export SLICE3=4
-	export SLICE4=5
-	export SLICE5=6
-	export SLICE6=7
-	export SLICE7=8
-	disk1=${DISKS%% *}
-	if is_mpath_device $disk1; then
-		delete_partitions
-	fi
-else
-	export SLICE_PREFIX="s"
-	export SLICE0=0
-	export SLICE1=1
-	export SLICE2=2
-	export SLICE3=3
-	export SLICE4=4
-	export SLICE5=5
-	export SLICE6=6
-	export SLICE7=7
-fi
-
-export FILEDISK=filedisk_create
-export FILEDISK0=filedisk0_create
-export FILEDISK1=filedisk1_create
-export FILEDISK2=filedisk2_create
-export FILEDISK3=filedisk3_create
-
 export BYND_MAX_NAME="byondmaxnamelength\
 012345678901234567890123456789\
 012345678901234567890123456789\

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib
index 31244f4..c98e495 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create.shlib

@@ -53,67 +53,15 @@
 }
 
 #
-# Create a ufs|ext file system and make a file within the file
-# system for storage pool vdev
+# Create a file for storage pool vdev
 # $1, file size
-# $2, file name
-# $3, disk name to create ufs|ext file system
 #
 function create_blockfile
 {
 	typeset size=$1
-	typeset file=$2
-	typeset disk=$3
-	typeset dir=`dirname $file`
-
-	if [[ -d $dir ]]; then
-		ismounted $dir $NEWFS_DEFAULT_FS
-		(( $? == 0 )) && \
-			log_must umount -f $dir
-	else
-		log_must mkdir -p $dir
-	fi
-
-	echo "y" | newfs ${DEV_RDSKDIR}/$disk >/dev/null 2>&1
-	(( $? != 0 )) &&
-		log_fail "Create file system fail."
-
-        log_must mount ${DEV_DSKDIR}/$disk $dir
-        log_must truncate -s $size $file
-}
-
-#
-# Umount the ufs|ext filesystem and remove the mountpoint
-# $1, the mount point
-#
-function clean_blockfile
-{
-	typeset dirs=$1
-
-	for dir in $dirs; do
-		if [[ -d $dir ]]; then
-			if is_linux; then
-				if ismounted $dir ext2; then
-					typeset dev=$(df -lht ext2 | \
-						grep "$dir" | \
-						awk '{print $1}')
-					log_must umount -f $dir
-					create_pool ${TESTPOOL}.tmp $dev
-					destroy_pool ${TESTPOOL}.tmp
-				fi
-			else
-				if ismounted $dir ufs; then
-					typeset dev=$(df -lhF ufs | \
-						grep "$dir" | \
-						awk '{print $1}')
-					log_must umount -f $dir
-					create_pool ${TESTPOOL}.tmp $dev
-					destroy_pool ${TESTPOOL}.tmp
-				fi
-			fi
-			log_must rm -rf $dir
-		fi
-	done
+	typeset file=$(mktemp)
+	truncate -s $size $file
+	echo $file
 }
 
 #
@@ -125,12 +73,12 @@
 	typeset vfstabdevs=""
 	typeset line
 
-	if is_linux; then
-		vfstab="/etc/fstab"
-		tmpfile="$TEST_BASE_DIR/fstab.tmp"
-	else
+	if is_illumos; then
 		vfstab="/etc/vfstab"
 		tmpfile="$TEST_BASE_DIR/vfstab.tmp"
+	else
+		vfstab="/etc/fstab"
+		tmpfile="$TEST_BASE_DIR/fstab.tmp"
 	fi
 
 	cat $vfstab | grep "^${DEV_DSKDIR}" >$tmpfile
@@ -150,14 +98,93 @@
 #
 function save_dump_dev
 {
-	typeset dumpdev
+	typeset dumpdev=""
 
-	if is_linux; then
-		dumpdev=""
-	else
+	if is_illumos; then
 		typeset fnd="Dump device"
 		dumpdev=`dumpadm | grep "$fnd" | cut -f2 -d : | \
 			awk '{print $1}'`
 	fi
 	echo $dumpdev
 }
+
+#
+# Verify a pools enabled features match the provided feature set.
+# $1, pool name
+# $2, feature set(s)
+#
+# check_feature_set $TESTPOOL set1 set2 set3 ...
+#
+function check_feature_set
+{
+	typeset pool=$1
+	typeset feature_set=$2
+	shift
+
+	for set in "$@"; do
+		if test -e "$ZPOOL_COMPAT_DIR/$set"; then
+			file="$ZPOOL_COMPAT_DIR/$set"
+		else
+			log_fail "Missing feature file: $ZPOOL_COMPAT_DIR/$set"
+		fi
+	done
+
+	#
+	# Create a temporary file which contains all features which are
+	# common to the listed feature sets.  This is used for comparison
+	# below to determine which features should be enabled.
+	#
+	typeset tmpfile=$(mktemp)
+
+	while read line; do
+		typeset flag=1
+
+		if [[ "$line" == "#*" ]]; then
+			continue
+		fi
+
+		for set in "$@"; do
+			if ! grep -q "$line" $ZPOOL_COMPAT_DIR/$set; then
+				flag=0
+				break;
+			fi
+		done
+
+		if [[ $flag -eq 1 ]]; then
+			echo "$line" >>$tmpfile
+		fi
+	done <"$file"
+
+	#
+	# Verify every enabled feature appears in the merged feature set.
+	# Verify every disabled feature does not.
+	#
+	for feature in $(zpool get all $pool | \
+	    awk '$2 ~ /feature@/ { print $2 }'); do
+		state=$(get_pool_prop $feature $pool)
+		name=$(cut -d'@' -f2 <<<"$feature")
+
+		if [[ "$state" = "enabled" || "$state" = "active" ]]; then
+			if ! grep -q $name $tmpfile; then
+				cat $tmpfile
+				rm -f $tmpfile
+				log_fail "Enabled feature $name not " \
+				    "in feature set file"
+			fi
+		elif [[ "$state" = "disabled" ]]; then
+			if grep -q $name $tmpfile; then
+				cat $tmpfile
+				rm -f $tmpfile
+				log_fail "Disabled feature $name is " \
+				    "in feature set file"
+			fi
+		else
+			rm -f $tmpfile
+			log_fail "Feature $name in unknown state $state"
+		fi
+	done
+
+	log_note "Checked all features"
+
+	rm -f $tmpfile
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh
index 2a975ed..42f57be 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_001_pos.ksh

@@ -49,17 +49,7 @@
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 
-	clean_blockfile "$TESTDIR0 $TESTDIR1"
-
-	if [[ -n $DISK ]]; then
-		partition_disk $((($MINVDEVSIZE / (1024 * 1024)) * 2))m $DISK 7
-	else
-		typeset disk=""
-		for disk in $DISK0 $DISK1; do
-			partition_disk \
-			    $((($MINVDEVSIZE / (1024 * 1024)) * 2))m $disk 7
-		done
-	fi
+	rm -f $disk1 $disk2
 }
 
 log_assert "'zpool create <pool> <vspec> ...' can successfully create" \
@@ -67,80 +57,23 @@
 
 log_onexit cleanup
 
-set -A keywords "" "mirror" "raidz" "raidz1"
+typeset disk1=$(create_blockfile $FILESIZE)
+typeset disk2=$(create_blockfile $FILESIZE)
 
-case $DISK_ARRAY_NUM in
-0|1)
-	typeset disk=""
-	if (( $DISK_ARRAY_NUM == 0 )); then
-		disk=$DISK
-	else
-		disk=$DISK0
-	fi
-	create_blockfile $FILESIZE $TESTDIR0/$FILEDISK0 \
-		${disk}${SLICE_PREFIX}${SLICE5}
-	create_blockfile $FILESIZE $TESTDIR1/$FILEDISK1 \
-		${disk}${SLICE_PREFIX}${SLICE6}
+pooldevs="${DISK0} \
+	\"${DISK0} ${DISK1}\" \
+	\"${DISK0} ${DISK1} ${DISK2}\" \
+	\"$disk1 $disk2\""
+mirrordevs="\"${DISK0} ${DISK1}\" \
+	$raidzdevs \
+	\"$disk1 $disk2\""
+raidzdevs="\"${DISK0} ${DISK1} ${DISK2}\""
+draiddevs="\"${DISK0} ${DISK1} ${DISK2}\""
 
-	pooldevs="${disk}${SLICE_PREFIX}${SLICE0} \
-		${DEV_DSKDIR}/${disk}${SLICE_PREFIX}${SLICE0} \
-		\"${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1}\" \
-                  $TESTDIR0/$FILEDISK0"
-	raidzdevs="\"${DEV_DSKDIR}/${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1}\" \
-		\"${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1} \
-		${disk}${SLICE_PREFIX}${SLICE3}\" \
-		\"${disk}${SLICE_PREFIX}${SLICE0} \
-		${disk}${SLICE_PREFIX}${SLICE1} \
-		${disk}${SLICE_PREFIX}${SLICE3} \
-		${disk}${SLICE_PREFIX}${SLICE4}\"\
-		\"$TESTDIR0/$FILEDISK0 $TESTDIR1/$FILEDISK1\""
-	mirrordevs=$raidzdevs
-	;;
-2|*)
-	create_blockfile $FILESIZE $TESTDIR0/$FILEDISK0 \
-		${DISK0}${SLICE_PREFIX}${SLICE5}
-        create_blockfile $FILESIZE $TESTDIR1/$FILEDISK1 \
-		${DISK1}${SLICE_PREFIX}${SLICE5}
-
-	pooldevs="${DISK0}${SLICE_PREFIX}${SLICE0} \
-		\"${DEV_DSKDIR}/${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0}\" \
-		\"${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK0}${SLICE_PREFIX}${SLICE1} \
-		${DISK1}${SLICE_PREFIX}${SLICE1}\"\
-		\"${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0} \
-		${DISK0}${SLICE_PREFIX}${SLICE1}\
-		${DISK1}${SLICE_PREFIX}${SLICE1}\" \
-		\"$TESTDIR0/$FILEDISK0 $TESTDIR1/$FILEDISK1\""
-	raidzdevs="\"${DEV_DSKDIR}/${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0}\" \
-		\"${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK0}${SLICE_PREFIX}${SLICE1} \
-		${DISK1}${SLICE_PREFIX}${SLICE1}\" \
-		\"${DISK0}${SLICE_PREFIX}${SLICE0} \
-		${DISK1}${SLICE_PREFIX}${SLICE0} \
-		${DISK0}${SLICE_PREFIX}${SLICE1} \
-		${DISK1}${SLICE_PREFIX}${SLICE1}\" \
-		\"$TESTDIR0/$FILEDISK0 $TESTDIR1/$FILEDISK1\""
-	mirrordevs=$raidzdevs
-	;;
-esac
-
-typeset -i i=0
-while (( $i < ${#keywords[*]} )); do
-	case ${keywords[i]} in
-	"")
-		create_pool_test "$TESTPOOL" "${keywords[i]}" "$pooldevs";;
-	mirror)
-		create_pool_test "$TESTPOOL" "${keywords[i]}" "$mirrordevs";;
-	raidz|raidz1)
-		create_pool_test "$TESTPOOL" "${keywords[i]}" "$raidzdevs" ;;
-	esac
-	(( i = i+1 ))
-done
+create_pool_test "$TESTPOOL" "" "$pooldevs"
+create_pool_test "$TESTPOOL" "mirror" "$mirrordevs"
+create_pool_test "$TESTPOOL" "raidz" "$raidzdevs"
+create_pool_test "$TESTPOOL" "raidz1" "$raidzdevs"
+create_pool_test "$TESTPOOL" "draid" "$draiddevs"
 
 log_pass "'zpool create <pool> <vspec> ...' success."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_002_pos.ksh
index b98e5ac..2f70908 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_002_pos.ksh

@@ -47,22 +47,15 @@
 
 function cleanup
 {
-	for pool in $TESTPOOL $TESTPOOL1 $TESTPOOL2 $TESTPOOL3 $TESTPOOL4 \
-		$TESTPOOL5 $TESTPOOL6
-	do
-		destroy_pool $pool
+	for pool in $TESTPOOL $TESTPOOL1; do
+		poolexists $pool && destroy_pool $pool
 	done
 
-	clean_blockfile "$TESTDIR0 $TESTDIR1"
-
-	for file in $FILEDISK0 $FILEDISK1 $FILEDISK2
-	do
-		if [[ -e $TEST_BASE_DIR/$file ]]; then
-			rm -f $TEST_BASE_DIR/$file
-		fi
-	done
-
-	partition_disk $SIZE $disk 6
+	rm -f $disk1 $disk2
+	if is_freebsd; then
+		umount -f $TESTDIR
+		rm -rf $TESTDIR
+	fi
 }
 
 log_onexit cleanup
@@ -70,57 +63,66 @@
 log_assert "'zpool create -f <pool> <vspec> ...' can successfully create" \
 	"a new pool in some cases."
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-create_pool "$TESTPOOL" "${disk}${SLICE_PREFIX}${SLICE0}"
-log_must echo "y" | newfs \
-	${DEV_RDSKDIR}/${disk}${SLICE_PREFIX}${SLICE1} >/dev/null 2>&1
-create_blockfile $FILESIZE $TESTDIR0/$FILEDISK0 ${disk}${SLICE_PREFIX}${SLICE4}
-create_blockfile $FILESIZE1 $TESTDIR1/$FILEDISK1 ${disk}${SLICE_PREFIX}${SLICE5}
-log_must truncate -s $SIZE $TEST_BASE_DIR/$FILEDISK0
-log_must truncate -s $SIZE $TEST_BASE_DIR/$FILEDISK1
-log_must truncate -s $SIZE $TEST_BASE_DIR/$FILEDISK2
+create_pool $TESTPOOL $DISK0
+log_must eval "new_fs ${DEV_RDSKDIR}/${DISK1} >/dev/null 2>&1"
+typeset disk1=$(create_blockfile $FILESIZE)
+typeset disk2=$(create_blockfile $FILESIZE1)
 
 unset NOINUSE_CHECK
 log_must zpool export $TESTPOOL
 log_note "'zpool create' without '-f' will fail " \
-	"while device is belong to an exported pool."
-log_mustnot zpool create "$TESTPOOL1" "${disk}${SLICE_PREFIX}${SLICE0}"
-create_pool "$TESTPOOL1" "${disk}${SLICE_PREFIX}${SLICE0}"
+	"while device belongs to an exported pool."
+log_mustnot zpool create $TESTPOOL1 $DISK0
+create_pool $TESTPOOL1 $DISK0
 log_must poolexists $TESTPOOL1
 
+log_must destroy_pool $TESTPOOL1
+
 log_note "'zpool create' without '-f' will fail " \
-	"while device is using by an ufs filesystem."
-log_mustnot zpool create "$TESTPOOL2" "${disk}${SLICE_PREFIX}${SLICE1}"
-create_pool "$TESTPOOL2" "${disk}${SLICE_PREFIX}${SLICE1}"
-log_must poolexists $TESTPOOL2
+	"while device is in use by a ufs filesystem."
+if is_freebsd; then
+	# fs must be mounted for create to fail on FreeBSD
+	log_must mkdir -p $TESTDIR
+	log_must mount ${DEV_DSKDIR}/${DISK1} $TESTDIR
+fi
+log_mustnot zpool create $TESTPOOL $DISK1
+if is_freebsd; then
+	# fs must not be mounted to create pool even with -f
+	log_must umount -f $TESTDIR
+	log_must rm -rf $TESTDIR
+fi
+create_pool $TESTPOOL $DISK1
+log_must poolexists $TESTPOOL
+
+log_must destroy_pool $TESTPOOL
 
 log_note "'zpool create' mirror without '-f' will fail " \
 	"while devices have different size."
-log_mustnot zpool create "$TESTPOOL3" "mirror" $TESTDIR0/$FILEDISK0 \
-	$TESTDIR1/$FILEDISK1
-create_pool "$TESTPOOL3" "mirror" $TESTDIR0/$FILEDISK0 $TESTDIR1/$FILEDISK1
-log_must poolexists $TESTPOOL3
+log_mustnot zpool create $TESTPOOL mirror $disk1 $disk2
+create_pool $TESTPOOL mirror $disk1 $disk2
+log_must poolexists $TESTPOOL
 
-log_note "'zpool create' mirror without '-f' will fail " \
-	"while devices are of different types."
-log_mustnot zpool create "$TESTPOOL4" "mirror" $TEST_BASE_DIR/$FILEDISK0 \
-	${disk}${SLICE_PREFIX}${SLICE3}
-create_pool "$TESTPOOL4" "mirror" \
-	$TEST_BASE_DIR/$FILEDISK0 ${disk}${SLICE_PREFIX}${SLICE3}
-log_must poolexists $TESTPOOL4
+log_must destroy_pool $TESTPOOL
+
+if ! is_freebsd; then
+	log_note "'zpool create' mirror without '-f' will fail " \
+		"while devices are of different types."
+	log_mustnot zpool create $TESTPOOL mirror $disk1 $DISK0
+	create_pool $TESTPOOL mirror $disk1 $DISK0
+	log_must poolexists $TESTPOOL
+
+	log_must destroy_pool $TESTPOOL
+fi
 
 log_note "'zpool create' without '-f' will fail " \
-	"while device is part of potentially active pool."
-create_pool "$TESTPOOL5"  "mirror" $TEST_BASE_DIR/$FILEDISK1 \
-	$TEST_BASE_DIR/$FILEDISK2
-log_must zpool offline $TESTPOOL5 $TEST_BASE_DIR/$FILEDISK2
-log_must zpool export $TESTPOOL5
-log_mustnot zpool create "$TESTPOOL6" $TEST_BASE_DIR/$FILEDISK2
-create_pool $TESTPOOL6 $TEST_BASE_DIR/$FILEDISK2
-log_must poolexists $TESTPOOL6
+	"while a device is part of a potentially active pool."
+create_pool $TESTPOOL mirror $DISK0 $DISK1
+log_must zpool offline $TESTPOOL $DISK0
+log_must zpool export $TESTPOOL
+log_mustnot zpool create $TESTPOOL1 $DISK0
+create_pool $TESTPOOL1 $DISK0
+log_must poolexists $TESTPOOL1
+
+log_must destroy_pool $TESTPOOL1
 
 log_pass "'zpool create -f <pool> <vspec> ...' success."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_003_pos.ksh
index 100a24c..dd8d010 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_003_pos.ksh

@@ -47,7 +47,8 @@
 
 function cleanup
 {
-	[[ -e $tmpfile ]] && log_must rm -f $tmpfile
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm -f $tmpfile
 }
 
 tmpfile="$TEST_BASE_DIR/zpool_create_003.tmp$$"
@@ -57,18 +58,6 @@
 
 log_onexit cleanup
 
-if [[ -n $DISK ]]; then
-        disk=$DISK
-else
-        disk=$DISK0
-fi
-
-DISK=${DISKS%% *}
-if is_mpath_device $DISK; then
-	partition_disk $SIZE $disk 1
-fi
-
-typeset vspec="${disk}${SLICE_PREFIX}${SLICE0}"
 typeset goodprops=('' '-o comment=text' '-O checksum=on' '-O ns:prop=value')
 typeset badprops=('-o ashift=9999' '-O doesnotexist=on' '-O volsize=10M')
 
@@ -78,10 +67,10 @@
 	#
 	# Make sure disk is clean before we use it
 	#
-	create_pool $TESTPOOL $vspec > $tmpfile
+	create_pool $TESTPOOL $DISK0 > $tmpfile
 	destroy_pool $TESTPOOL
 
-	log_must eval "zpool create -n $prop $TESTPOOL $vspec > $tmpfile"
+	log_must eval "zpool create -n $prop $TESTPOOL $DISK0 > $tmpfile"
 
 	poolexists $TESTPOOL && \
 		log_fail "'zpool create -n <pool> <vspec> ...' fail."
@@ -97,10 +86,10 @@
 	#
 	# Make sure disk is clean before we use it
 	#
-	create_pool $TESTPOOL $vspec > $tmpfile
+	create_pool $TESTPOOL $DISK0 > $tmpfile
 	destroy_pool $TESTPOOL
 
-	log_mustnot zpool create -n $prop $TESTPOOL $vspec
+	log_mustnot zpool create -n $prop $TESTPOOL $DISK0
 done
 
 log_pass "'zpool create -n <pool> <vspec>...' success."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_004_pos.ksh
index 2697562..835cd1f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_004_pos.ksh

@@ -45,27 +45,23 @@
 
 function cleanup
 {
-	typeset pool=""
-
 	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 
-	[[ -d $TESTDIR ]] && log_must rm -rf $TESTDIR
-	partition_disk $SIZE $disk 6
+	rm -rf $TESTDIR
 }
 
 log_assert "Storage pools with 16 file based vdevs can be created."
 log_onexit cleanup
 
-disk=${DISKS%% *}
-create_pool $TESTPOOL $disk
+create_pool $TESTPOOL $DISK0
 log_must zfs create -o mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 
 vdevs_list=$(echo $TESTDIR/file.{01..16})
 log_must truncate -s $MINVDEVSIZE $vdevs_list
 
-create_pool "$TESTPOOL1" $vdevs_list
-log_must vdevs_in_pool "$TESTPOOL1" "$vdevs_list"
+create_pool $TESTPOOL1 $vdevs_list
+log_must vdevs_in_pool $TESTPOOL1 "$vdevs_list"
 
 if poolexists $TESTPOOL1; then
 	destroy_pool $TESTPOOL1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
index de5e9d8..a291e29 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh

@@ -46,41 +46,35 @@
 
 function cleanup
 {
-	poolexists $TESTPOOL && \
-		log_must zpool destroy -f $TESTPOOL
-
-	for dir in $TESTDIR $TESTDIR1; do
-		[[ -d $dir ]] && rm -rf $dir
-	done
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm -rf $TESTDIR $TESTDIR1
 }
 
 log_assert "'zpool create [-R root][-m mountpoint] <pool> <vdev> ...' can create" \
 	"an alternate pool or a new pool mounted at the specified mountpoint."
 log_onexit cleanup
 
-set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2"
+set -A pooltype "" "mirror" "raidz" "raidz1" "raidz2" "draid" "draid2"
 
 #
 # cleanup the pools created in previous case if zpool_create_004_pos timedout
 #
 for pool in $TESTPOOL2 $TESTPOOL1 $TESTPOOL; do
-	if poolexists $pool; then
-		destroy_pool $pool
-	fi
+	poolexists $pool && destroy_pool $pool
 done
 
 #prepare raw file for file disk
-[[ -d $TESTDIR ]] && rm -rf $TESTDIR
+rm -rf $TESTDIR
 log_must mkdir -p $TESTDIR
 typeset -i i=1
-while (( i < 4 )); do
-	log_must mkfile $FILESIZE $TESTDIR/file.$i
+while (( i < 5 )); do
+	log_must truncate -s $FILESIZE $TESTDIR/file.$i
 
 	(( i = i + 1 ))
 done
 
 #Remove the directory with name as pool name if it exists
-[[ -d /$TESTPOOL ]] && rm -rf /$TESTPOOL
+rm -rf /$TESTPOOL
 file=$TESTDIR/file
 
 for opt in "-R $TESTDIR1" "-m $TESTDIR1" \
@@ -93,10 +87,10 @@
 			log_must zpool destroy -f $TESTPOOL
 		[[ -d $TESTDIR1 ]] && rm -rf $TESTDIR1
 		log_must zpool create $opt $TESTPOOL ${pooltype[i]} \
-			$file.1 $file.2 $file.3
+			$file.1 $file.2 $file.3 $file.4
 		! poolexists $TESTPOOL && \
 			log_fail "Creating pool with $opt fails."
-		mpt=`zfs mount | egrep "^$TESTPOOL[^/]" | awk '{print $2}'`
+		mpt=`zfs mount | grep -E "^$TESTPOOL[^/]" | awk '{print $2}'`
 		(( ${#mpt} == 0 )) && \
 			log_fail "$TESTPOOL created with $opt is not mounted."
 		mpt_val=$(get_prop "mountpoint" $TESTPOOL)

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh
index 79a0060..79b41fd 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_006_pos.ksh

@@ -46,8 +46,8 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL1 && destroy_pool $TESTPOOL1
-	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 
@@ -97,6 +97,20 @@
 	"raidz2 $vdev0 $vdev1 $vdev2 spare $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \
 	"raidz3 $vdev0 $vdev1 $vdev2 $vdev3 \
 		mirror $vdev4 $vdev5 $vdev6 $vdev7" \
+	"draid $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4" \
+	"draid $vdev0 $vdev1 $vdev2 raidz1 $vdev3 $vdev4 $vdev5" \
+	"draid $vdev0 $vdev1 $vdev2 draid1 $vdev3 $vdev4 $vdev5" \
+	"draid $vdev0 $vdev1 $vdev2 special mirror $vdev3 $vdev4" \
+	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 mirror $vdev4 $vdev5 $vdev6" \
+	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \
+	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 draid2 $vdev4 $vdev5 $vdev6 $vdev7"\
+	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 \
+		special mirror $vdev4 $vdev5 $vdev6" \
+	"draid2 $vdev0 $vdev1 $vdev2 $vdev3 \
+		special mirror $vdev4 $vdev5 $vdev6 \
+		cache $vdev7 log mirror $vdev8 $vdev9" \
+	"draid $vdev0 $vdev1 $vdev2 draid $vdev4 $vdev5 $vdev6 $vdev7 \
+		special mirror $vdev8 $vdev9" \
 	"spare $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4 raidz $vdev5 $vdev6"
 
 set -A forced_args \
@@ -109,11 +123,19 @@
 	"raidz $vdev0 $vdev1 raidz2 $vdev2 $vdev3 $vdev4" \
 	"raidz $vdev0 $vdev1 raidz2 $vdev2 $vdev3 $vdev4 spare $vdev5" \
 	"raidz $vdev0 $vdev1 spare $vdev2 raidz2 $vdev3 $vdev4 $vdev5" \
+	"raidz $vdev0 $vdev1 draid2 $vdev2 $vdev3 $vdev4 $vdev5" \
+	"raidz $vdev0 $vdev1 draid3 $vdev2 $vdev3 $vdev4 $vdev5 $vdev6" \
 	"mirror $vdev0 $vdev1 raidz $vdev2 $vdev3 raidz2 $vdev4 $vdev5 $vdev6" \
 	"mirror $vdev0 $vdev1 raidz $vdev2 $vdev3 \
 		raidz2 $vdev4 $vdev5 $vdev6 spare $vdev7" \
 	"mirror $vdev0 $vdev1 raidz $vdev2 $vdev3 \
 		spare $vdev4 raidz2 $vdev5 $vdev6 $vdev7" \
+	"mirror $vdev0 $vdev1 draid $vdev2 $vdev3 $vdev4 \
+		draid2 $vdev5 $vdev6 $vdev7 $vdev8 spare $vdev9" \
+	"draid $vdev0 $vdev1 $vdev2 $vdev3 \
+		draid2 $vdev4 $vdev5 $vdev6 $vdev7 $vdev8" \
+	"draid $vdev0 $vdev1 $vdev2 draid $vdev4 $vdev5 $vdev6 \
+		special mirror $vdev7 $vdev8 $vdev9" \
 	"spare $vdev0 $vdev1 $vdev2 mirror $vdev3 $vdev4 \
 		raidz2 $vdev5 $vdev6 $vdev7"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh
index a7ae5c0..2873202 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_007_neg.ksh

@@ -44,38 +44,29 @@
 
 verify_runnable "global"
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-
 set -A args  "" "-?" "-n" "-f" "-nf" "-fn" "-f -n" "--f" "-e" "-s" \
 	"-m" "-R" "-m -R" "-Rm" "-mR" "-m $TESTDIR $TESTPOOL" \
-	"-R $TESTDIR $TESTPOOL" "-m nodir $TESTPOOL $disk" \
-	"-R nodir $TESTPOOL $disk" "-m nodir -R nodir $TESTPOOL $disk" \
-	"-R nodir -m nodir $TESTPOOL $disk" "-R $TESTDIR -m nodir $TESTPOOL $disk" \
-	"-R nodir -m $TESTDIR $TESTPOOL $disk" \
+	"-R $TESTDIR $TESTPOOL" "-m nodir $TESTPOOL $DISK0" \
+	"-R nodir $TESTPOOL $DISK0" "-m nodir -R nodir $TESTPOOL $DISK0" \
+	"-R nodir -m nodir $TESTPOOL $DISK0" "-R $TESTDIR -m nodir $TESTPOOL $DISK0" \
+	"-R nodir -m $TESTDIR $TESTPOOL $DISK0" \
 	"-blah" "$TESTPOOL" "$TESTPOOL blah" "$TESTPOOL c?t0d0" \
 	"$TESTPOOL c0txd0" "$TESTPOOL c0t0dx" "$TESTPOOL cxtxdx" \
 	"$TESTPOOL mirror" "$TESTPOOL raidz" "$TESTPOOL mirror raidz" \
 	"$TESTPOOL raidz1" "$TESTPOOL mirror raidz1" \
-	"$TESTPOOL mirror c?t?d?" "$TESTPOOL mirror $disk c0t1d?" \
-	"$TESTPOOL RAIDZ ${disk}${SLICE_PREFIX}${SLICE0} \
-	${disk}${SLICE_PREFIX}${SLICE1}" \
-	"$TESTPOOL ${disk}${SLICE_PREFIX}${SLICE0} \
-	log ${disk}${SLICE_PREFIX}${SLICE1} \
-	log ${disk}${SLICE_PREFIX}${SLICE3}" \
-	"$TESTPOOL ${disk}${SLICE_PREFIX}${SLICE0} \
-	spare ${disk}${SLICE_PREFIX}${SLICE1} \
-	spare ${disk}${SLICE_PREFIX}${SLICE3}" \
-	"$TESTPOOL RAIDZ1 ${disk}${SLICE_PREFIX}${SLICE0} \
-	${disk}${SLICE_PREFIX}${SLICE1}" \
-	"$TESTPOOL MIRROR $disk" "$TESTPOOL raidz $disk" \
-	"$TESTPOOL raidz1 $disk" \
-	"1tank $disk" "1234 $disk" "?tank $disk" \
-	"tan%k $disk" "ta@# $disk" "tan+k $disk" \
-	"$BYND_MAX_NAME $disk"
+	"$TESTPOOL draid1" "$TESTPOOL mirror draid1" \
+	"$TESTPOOL mirror c?t?d?" "$TESTPOOL mirror $DISK0 c0t1d?" \
+	"$TESTPOOL RAIDZ $DISK0 $DISK1" \
+	"$TESTPOOL $DISK0 log $DISK1 log $DISK2" \
+	"$TESTPOOL $DISK0 spare $DISK1 spare $DISK2" \
+	"$TESTPOOL RAIDZ1 $DISK0 $DISK1" "$TESTPOOL MIRROR $DISK0" \
+	"$TESTPOOL DRAID $DISK1 $DISK2 $DISK3" "$TESTPOOL raidz $DISK0" \
+	"$TESTPOOL raidz1 $DISK0" "$TESTPOOL draid $DISK0" \
+	"$TESTPOOL draid2 $DISK0 $DISK1" \
+	"$TESTPOOL draid $DISK0 $DISK1 $DISK2 spare s0-draid1-0" \
+	"1tank $DISK0" "1234 $DISK0" "?tank $DISK0" \
+	"tan%k $DISK0" "ta@# $DISK0" "tan+k $DISK0" \
+	"$BYND_MAX_NAME $DISK0"
 
 log_assert "'zpool create' should return an error with badly-formed parameters."
 log_onexit default_cleanup_noexit

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_008_pos.ksh
index 5c5c1d9..56bb64c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_008_pos.ksh

@@ -44,24 +44,11 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	# Versions of libblkid older than 2.27.0 will not always detect member
-	# devices of a pool, therefore skip this test case for old versions.
-	currentver="$(blkid -v | tr ',' ' ' | awk '/libblkid/ { print $6 }')"
-	requiredver="2.27.0"
-
-	if [ "$(printf "$requiredver\n$currentver" | sort -V | head -n1)" ==  \
-	    "$currentver" ] && [ "$currentver" != "$requiredver" ]; then
-		log_unsupported "libblkid ($currentver) may not detect pools"
-	fi
-fi
-
 function cleanup
 {
 	if [[ $exported_pool == true ]]; then
 		if [[ $force_pool == true ]]; then
-			log_must zpool create \
-				-f $TESTPOOL ${disk}${SLICE_PREFIX}${SLICE0}
+			log_must zpool create -f $TESTPOOL $DISK0
 		else
 			log_must zpool import $TESTPOOL
 		fi
@@ -74,49 +61,6 @@
 	if poolexists $TESTPOOL1 ; then
                 destroy_pool $TESTPOOL1
 	fi
-
-	#
-	# recover it back to EFI label
-	#
-	create_pool $TESTPOOL $disk
-	destroy_pool $TESTPOOL
-
-        partition_disk $SIZE $disk 6
-}
-
-#
-# create overlap slice 0 and 1 on $disk
-#
-function create_overlap_slice
-{
-        typeset format_file=$TEST_BASE_DIR/format_overlap.$$
-        typeset disk=$1
-
-        echo "partition" >$format_file
-        echo "0" >> $format_file
-        echo "" >> $format_file
-        echo "" >> $format_file
-        echo "0" >> $format_file
-        echo "200m" >> $format_file
-        echo "1" >> $format_file
-        echo "" >> $format_file
-        echo "" >> $format_file
-        echo "0" >> $format_file
-        echo "400m" >> $format_file
-        echo "label" >> $format_file
-        echo "" >> $format_file
-        echo "q" >> $format_file
-        echo "q" >> $format_file
-
-        format -e -s -d $disk -f $format_file
-	typeset -i ret=$?
-        rm -fr $format_file
-
-	if (( ret != 0 )); then
-                log_fail "unable to create overlap slice."
-        fi
-
-        return 0
 }
 
 log_assert "'zpool create' have to use '-f' scenarios"
@@ -125,42 +69,21 @@
 typeset exported_pool=false
 typeset force_pool=false
 
-if [[ -n $DISK ]]; then
-        disk=$DISK
-else
-        disk=$DISK0
-fi
-
 # overlapped slices as vdev need -f to create pool
 
 # Make the disk is EFI labeled first via pool creation
-create_pool $TESTPOOL $disk
+create_pool $TESTPOOL $DISK0
 destroy_pool $TESTPOOL
 
-if ! is_linux; then
-	# Make the disk is VTOC labeled since only VTOC label supports overlap
-	log_must labelvtoc $disk
-	log_must create_overlap_slice $disk
-
-	unset NOINUSE_CHECK
-	log_mustnot zpool create $TESTPOOL ${disk}${SLICE_PREFIX}${SLICE0}
-	log_must zpool create -f $TESTPOOL ${disk}${SLICE_PREFIX}${SLICE0}
-	destroy_pool $TESTPOOL
-fi
-
 # exported device to be as spare vdev need -f to create pool
 
-log_must zpool create -f $TESTPOOL $disk
+log_must zpool create -f $TESTPOOL $DISK0
 destroy_pool $TESTPOOL
-log_must partition_disk $SIZE $disk 6
-create_pool $TESTPOOL ${disk}${SLICE_PREFIX}${SLICE0} \
-	${disk}${SLICE_PREFIX}${SLICE1}
+create_pool $TESTPOOL $DISK0 $DISK1
 log_must zpool export $TESTPOOL
 exported_pool=true
-log_mustnot zpool create $TESTPOOL1 ${disk}${SLICE_PREFIX}${SLICE3} \
-	spare ${disk}${SLICE_PREFIX}${SLICE1}
-create_pool $TESTPOOL1 ${disk}${SLICE_PREFIX}${SLICE3} \
-	spare ${disk}${SLICE_PREFIX}${SLICE1}
+log_mustnot zpool create $TESTPOOL1 $DISK1 spare $DISK2
+create_pool $TESTPOOL1 $DISK1 spare $DISK2
 force_pool=true
 destroy_pool $TESTPOOL1
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh
index 0b1b18a..e2f3899 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_009_neg.ksh

@@ -50,15 +50,10 @@
 
 function cleanup
 {
-	typeset dtst
-	typeset disk
+	typeset pool
 
-	for dtst in $TESTPOOL $TESTPOOL1; do
-		poolexists $dtst && destroy_pool $dtst
-	done
-
-	for disk in $DISKS; do
-		partition_disk $SIZE $disk 6
+	for pool in $TESTPOOL $TESTPOOL1; do
+		poolexists $pool && destroy_pool $pool
 	done
 }
 
@@ -68,27 +63,25 @@
 
 unset NOINUSE_CHECK
 typeset opt
-for opt in "" "mirror" "raidz" "raidz1"; do
-	typeset disk="$DISKS"
-	(( ${#opt} == 0 )) && disk=${DISKS%% *}
-
-	typeset -i count=$(get_word_count $disk)
-	if (( count < 2  && ${#opt} != 0 )) ; then
-		continue
+for opt in "" "mirror" "raidz" "draid"; do
+	if [[ $opt == "" ]]; then
+		typeset disks=$DISK0
+	else
+		typeset disks=$DISKS
 	fi
 
 	# Create two pools but using the same disks.
-	create_pool $TESTPOOL $opt $disk
-	log_mustnot zpool create -f $TESTPOOL1 $opt $disk
+	create_pool $TESTPOOL $opt $disks
+	log_mustnot zpool create -f $TESTPOOL1 $opt $disks
 	destroy_pool $TESTPOOL
 
 	# Create two pools and part of the devices were overlapped
-	create_pool $TESTPOOL $opt $disk
-	log_mustnot zpool create -f $TESTPOOL1 $opt ${DISKS% *}
+	create_pool $TESTPOOL $opt $disks
+	log_mustnot zpool create -f $TESTPOOL1 $opt $DISK0
 	destroy_pool $TESTPOOL
 
 	# Create one pool but using the same disks twice.
-	log_mustnot zpool create -f $TESTPOOL $opt $disk $disk
+	log_mustnot zpool create -f $TESTPOOL $opt $disks $disks
 done
 
 log_pass "Using overlapping or in-use disks to create a new pool fails as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh
index 1659397..36bbaa7 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_010_neg.ksh

@@ -48,37 +48,31 @@
 
 function cleanup
 {
-        poolexists $TOOSMALL && destroy_pool $TOOSMALL
-        poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+	typeset pool
 
-        poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	for pool in $TOOSMALL $TESTPOOL1 $TESTPOOL; do
+		poolexists $pool && destroy_pool $pool
+	done
 
-	[[ -d $TESTDIR ]] && rm -rf $TESTDIR
-
-	partition_disk $SIZE $disk 6
+	rm -rf $TESTDIR
 }
 log_onexit cleanup
 
-if [[ -n $DISK ]]; then
-        disk=$DISK
-else
-        disk=$DISK0
-fi
-
-create_pool $TESTPOOL $disk
+create_pool $TESTPOOL $DISK0
 log_must zfs create $TESTPOOL/$TESTFS
 log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 
 typeset -l devsize=$(($SPA_MINDEVSIZE - 1024 * 1024))
-for files in $TESTDIR/file1 $TESTDIR/file2
+for files in $TESTDIR/file1 $TESTDIR/file2 $TESTDIR/file3
 do
-	log_must mkfile $devsize $files
+	log_must truncate -s $devsize $files
 done
 
 set -A args \
 	"$TOOSMALL $TESTDIR/file1" "$TESTPOOL1 $TESTDIR/file1 $TESTDIR/file2" \
         "$TOOSMALL mirror $TESTDIR/file1 $TESTDIR/file2" \
-	"$TOOSMALL raidz $TESTDIR/file1 $TESTDIR/file2"
+	"$TOOSMALL raidz $TESTDIR/file1 $TESTDIR/file2" \
+	"$TOOSMALL draid $TESTDIR/file1 $TESTDIR/file2 $TESTDIR/file3"
 
 typeset -i i=0
 while [[ $i -lt ${#args[*]} ]]; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_011_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_011_neg.ksh
index 8ade256..9437033 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_011_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_011_neg.ksh

@@ -54,62 +54,61 @@
 		destroy_pool $pool
 	done
 
+	rm -rf $disk1 $disk2 $disk3 $disk4
+
 	if [[ -n $saved_dump_dev ]]; then
 		log_must dumpadm -u -d $saved_dump_dev
 	fi
-
-	partition_disk $SIZE $disk 7
 }
 
 log_assert "'zpool create' should be failed with inapplicable scenarios."
 log_onexit cleanup
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-pooldev1=${disk}${SLICE_PREFIX}${SLICE0}
-pooldev2=${disk}${SLICE_PREFIX}${SLICE1}
-mirror1="${disk}${SLICE_PREFIX}${SLICE1} ${disk}${SLICE_PREFIX}${SLICE3}"
-mirror2="${disk}${SLICE_PREFIX}${SLICE4} ${disk}${SLICE_PREFIX}${SLICE5}"
+disk1=$(create_blockfile $FILESIZE)
+disk2=$(create_blockfile $FILESIZE)
+disk3=$(create_blockfile $FILESIZE)
+disk4=$(create_blockfile $FILESIZE1)
+mirror1="$DISK0 $DISK1"
+mirror2="$disk1 $disk2"
 raidz1=$mirror1
 raidz2=$mirror2
-diff_size_dev="${disk}${SLICE_PREFIX}${SLICE6} ${disk}${SLICE_PREFIX}${SLICE7}"
+draid1="$DISK0 $DISK1 $DISK2"
+draid2="$disk1 $disk2 $disk3"
+diff_size_dev="$disk2 $disk4"
+draid_diff_size_dev="$disk1 $disk2 $disk4"
 vfstab_dev=$(find_vfstab_dev)
 
-if is_linux; then
-	partition_disk $SIZE $disk 7
-	cyl=$(get_endslice $disk $SLICE5)
-	log_must set_partition $SLICE6 "$cyl" $SIZE1 $disk
-else
-	specified_dump_dev=${disk}${SLICE_PREFIX}${SLICE0}
+if is_illumos; then
+	specified_dump_dev=${DISK0}s0
 	saved_dump_dev=$(save_dump_dev)
 
-	cyl=$(get_endslice $disk $SLICE6)
-	log_must set_partition $SLICE7 "$cyl" $SIZE1 $disk
+	cyl=$(get_endslice $DISK0 6)
+	log_must set_partition 7 "$cyl" $SIZE1 $DISK0
 fi
-create_pool "$TESTPOOL" "$pooldev1"
+create_pool $TESTPOOL $DISK0
 
 #
 # Set up the testing scenarios parameters
 #
-set -A arg "$TESTPOOL $pooldev2" \
-	"$TESTPOOL1 $pooldev1" \
-	"$TESTPOOL1 $TESTDIR0/$FILEDISK0" \
+set -A arg \
+	"$TESTPOOL1 $DISK0" \
 	"$TESTPOOL1 mirror mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 raidz raidz $raidz1 raidz $raidz2" \
 	"$TESTPOOL1 raidz1 raidz1 $raidz1 raidz1 $raidz2" \
+	"$TESTPOOL1 draid draid $draid draid $draid2" \
 	"$TESTPOOL1 mirror raidz $raidz1 raidz $raidz2" \
 	"$TESTPOOL1 mirror raidz1 $raidz1 raidz1 $raidz2" \
+	"$TESTPOOL1 mirror draid $draid1 draid $draid2" \
 	"$TESTPOOL1 raidz mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 raidz1 mirror $mirror1 mirror $mirror2" \
+	"$TESTPOOL1 draid1 mirror $mirror1 mirror $mirror2" \
 	"$TESTPOOL1 mirror $diff_size_dev" \
 	"$TESTPOOL1 raidz $diff_size_dev" \
 	"$TESTPOOL1 raidz1 $diff_size_dev" \
+	"$TESTPOOL1 draid1 $draid_diff_size_dev" \
 	"$TESTPOOL1 mirror $mirror1 spare $mirror2 spare $diff_size_dev" \
 	"$TESTPOOL1 $vfstab_dev" \
-	"$TESTPOOL1 ${disk}s10" \
+	"$TESTPOOL1 ${DISK0}s10" \
 	"$TESTPOOL1 spare $pooldev2"
 
 unset NOINUSE_CHECK
@@ -122,10 +121,10 @@
 # now destroy the pool to be polite
 log_must zpool destroy -f $TESTPOOL
 
-if ! is_linux; then
+if is_illumos; then
 	# create/destroy a pool as a simple way to set the partitioning
 	# back to something normal so we can use this $disk as a dump device
-	log_must zpool create -f $TESTPOOL3 $disk
+	log_must zpool create -f $TESTPOOL3 $DISK1
 	log_must zpool destroy -f $TESTPOOL3
 
 	log_must dumpadm -d ${DEV_DSKDIR}/$specified_dump_dev
@@ -134,7 +133,7 @@
 	# Also check to see that in-use checking prevents us from creating
 	# a zpool from just the first slice on the disk.
 	log_mustnot zpool create \
-		-f $TESTPOOL1 ${specified_dump_dev}${SLICE_PREFIX}${SLICE0}
+		-f $TESTPOOL1 ${specified_dump_dev}s0
 fi
 
 log_pass "'zpool create' is failed as expected with inapplicable scenarios."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_012_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_012_neg.ksh
index 347fdfe..36888e4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_012_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_012_neg.ksh

@@ -47,12 +47,12 @@
 
 function cleanup
 {
-	if poolexists $TESTPOOL; then
-		destroy_pool $TESTPOOL
-	fi
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
-if is_linux; then
+if is_freebsd; then
+	typeset swap_disks=$(swapinfo -l | grep "/dev" | awk '{print $1}')
+elif is_linux; then
 	typeset swap_disks=`swapon -s | grep "/dev" | awk '{print $1}'`
 else
 	typeset swap_disks=`swap -l | grep "c[0-9].*d[0-9].*s[0-9]" | \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_014_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_014_neg.ksh
index fc383be..44ed950 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_014_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_014_neg.ksh

@@ -54,35 +54,26 @@
 		zfs destroy $vol_name
 	fi
 
-	if poolexists $TESTPOOL; then
-		destroy_pool $TESTPOOL
-	fi
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_assert "'zpool create' should fail with regular file in swap."
 log_onexit cleanup
 
-if [[ -n $DISK ]]; then
-        disk=$DISK
-else
-        disk=$DISK0
-fi
-
 if is_linux; then
 	set -A options "" "-f"
 else
 	set -A options "-n" "" "-f"
 fi
 
-typeset pool_dev=${disk}${SLICE_PREFIX}${SLICE0}
 typeset vol_name=$TESTPOOL/$TESTVOL
 typeset mntp=/mnt
 typeset TMP_FILE=$mntp/tmpfile.$$
 
-create_pool $TESTPOOL $pool_dev
+create_pool $TESTPOOL $DISK0
 log_must zfs create -V 100m $vol_name
 block_device_wait
-log_must echo "y" | newfs ${ZVOL_DEVDIR}/$vol_name > /dev/null 2>&1
+log_must eval "new_fs ${ZVOL_DEVDIR}/$vol_name > /dev/null 2>&1"
 log_must mount ${ZVOL_DEVDIR}/$vol_name $mntp
 
 log_must mkfile 50m $TMP_FILE

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_015_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_015_neg.ksh
index 4f605d3..babf5ca 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_015_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_015_neg.ksh

@@ -56,20 +56,11 @@
 	fi
 
 	for pool in $TESTPOOL1 $TESTPOOL; do
-		if poolexists $pool; then
-			destroy_pool $pool
-		fi
+		poolexists $pool && destroy_pool $pool
 	done
 }
 
 unset NOINUSE_CHECK
-if [[ -n $DISK ]]; then
-        disk=$DISK
-else
-        disk=$DISK0
-fi
-
-typeset pool_dev=${disk}${SLICE_PREFIX}${SLICE0}
 typeset vol_name=$TESTPOOL/$TESTVOL
 
 log_assert "'zpool create' should fail with zfs vol device in swap."
@@ -78,12 +69,17 @@
 #
 # use zfs vol device in swap to create pool which should fail.
 #
-create_pool $TESTPOOL $pool_dev
+create_pool $TESTPOOL $DISK0
 log_must zfs create -V 100m $vol_name
 block_device_wait
 swap_setup ${ZVOL_DEVDIR}/$vol_name
 
-for opt in "-n" "" "-f"; do
+if is_freebsd; then
+	typeset -a opts=("" "-f")
+else
+	typeset -a opts=("-n" "" "-f")
+fi
+for opt in "${opts[@]}"; do
 	log_mustnot zpool create $opt $TESTPOOL1 ${ZVOL_DEVDIR}/${vol_name}
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh
index cbb5806..1fa205b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_016_pos.ksh

@@ -46,15 +46,9 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "Test case isn't useful under Linux."
-fi
-
 function cleanup
 {
-	if poolexists $TESTPOOL; then
-		destroy_pool $TESTPOOL
-	fi
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 
 	#recover swap devices
 	FSTAB=$TEST_BASE_DIR/fstab_$$
@@ -73,12 +67,6 @@
 	fi
 }
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-typeset pool_dev=${disk}${SLICE_PREFIX}${SLICE0}
 typeset swap_disks=$(swap -l | grep -v "swapfile" | awk '{print $1}')
 typeset dump_device=$(dumpadm | grep "Dump device" | awk '{print $3}')
 
@@ -94,7 +82,7 @@
 	fi
 done
 
-log_must zpool create $TESTPOOL $pool_dev
+log_must zpool create $TESTPOOL $DISK0
 log_must zpool destroy $TESTPOOL
 
 log_pass "'zpool create' passed as expected with applicable scenario."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_017_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_017_neg.ksh
index 7e75e74..ded1e3c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_017_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_017_neg.ksh

@@ -47,23 +47,10 @@
 
 function cleanup
 {
-	if poolexists $TESTPOOL; then
-		destroy_pool $TESTPOOL
-	fi
-
-	if [[ -d $TESTDIR ]]; then
-		log_must rm -rf $TESTDIR
-	fi
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm -rf $TESTDIR
 }
 
-if [[ -n $DISK ]]; then
-        disk=$DISK
-else
-        disk=$DISK0
-fi
-
-typeset pool_dev=${disk}${SLICE_PREFIX}${SLICE0}
-
 log_assert "'zpool create' should fail with mountpoint exists and not empty."
 log_onexit cleanup
 
@@ -81,7 +68,7 @@
 		log_must touch $TESTDIR/testfile
 	fi
 
-	log_mustnot zpool create -m $TESTDIR -f $TESTPOOL $pool_dev
+	log_mustnot zpool create -m $TESTDIR -f $TESTPOOL $DISK0
 	log_mustnot poolexists $TESTPOOL
 
 	(( i = i + 1 ))

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_018_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_018_pos.ksh
index 1de51e0..6ad662f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_018_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_018_pos.ksh

@@ -47,18 +47,12 @@
 function cleanup
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
-	[[ -f $CPATH ]] && log_must rm $CPATH
+	rm -f $CPATH
 }
 
 log_onexit cleanup
 log_assert "zpool create can create pools with specified properties"
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-
 #
 # we don't include "root" property in this list, as it requires both "cachefile"
 # and "root" to be set at the same time. A test for this is included in
@@ -70,7 +64,7 @@
 typeset -i i=0;
 while [ $i -lt "${#props[@]}" ]
 do
-	log_must zpool create -o ${props[$i]}=${vals[$i]} $TESTPOOL $disk
+	log_must zpool create -o ${props[$i]}=${vals[$i]} $TESTPOOL $DISK0
 	RESULT=$(get_pool_prop ${props[$i]} $TESTPOOL)
 	if [[ $RESULT != ${vals[$i]} ]]
 	then
@@ -86,7 +80,7 @@
 poolexists $TESTPOOL && destroy_pool $TESTPOOL
 
 # pick two properties, and verify we can create with those as well
-log_must zpool create -o delegation=off -o cachefile=$CPATH $TESTPOOL $disk
+log_must zpool create -o delegation=off -o cachefile=$CPATH $TESTPOOL $DISK0
 RESULT=$(get_pool_prop delegation $TESTPOOL)
 if [[ $RESULT != off ]]
 then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_019_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_019_pos.ksh
index 9cf6081..694ea21 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_019_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_019_pos.ksh

@@ -43,21 +43,13 @@
 
 function cleanup
 {
-	if poolexists $TESTPOOL ; then
-                destroy_pool $TESTPOOL
-        fi
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_onexit cleanup
 
 log_assert "zpool create cannot create pools specifying readonly properties"
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-
 set -A props "available" "capacity" "guid"  "health"  "size" "used"
 set -A vals  "100"       "10"       "12345" "HEALTHY" "10"   "10"
 
@@ -65,7 +57,7 @@
 while [ $i -lt "${#props[@]}" ]
 do
         # try to set each property in the prop list with it's corresponding val
-        log_mustnot zpool create -o ${props[$i]}=${vals[$i]} $TESTPOOL $disk
+        log_mustnot zpool create -o ${props[$i]}=${vals[$i]} $TESTPOOL $DISK0
 	if poolexists $TESTPOOL
 	then
 		log_fail "$TESTPOOL was created when setting ${props[$i]}!"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_020_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_020_pos.ksh
index ae06960..104b5ec 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_020_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_020_pos.ksh

@@ -46,14 +46,9 @@
 
 function cleanup
 {
-	if poolexists $TESTPOOL ; then
-                destroy_pool $TESTPOOL
-        fi
-	if [ -d /${TESTPOOL}.root ]
-	then
-		log_must rmdir /${TESTPOOL}.root
-	fi
-	[[ -e $values ]] && log_must rm -f $values
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	rm -rf /${TESTPOOL}.root
+	rm -f $values
 }
 
 log_onexit cleanup
@@ -62,18 +57,12 @@
 
 typeset values=$TEST_BASE_DIR/values.$$
 
-if [[ -n $DISK ]]; then
-	disk=$DISK
-else
-	disk=$DISK0
-fi
-
 log_must rm -f /etc/zfs/zpool.cache
-log_must mkdir /${TESTPOOL}.root
-log_must zpool create -R /${TESTPOOL}.root $TESTPOOL $disk
+log_must rm -rf /${TESTPOOL}.root
+log_must zpool create -R /${TESTPOOL}.root $TESTPOOL $DISK0
 if [ ! -d /${TESTPOOL}.root ]
 then
-	log_fail "Mountpoint was not create when using zpool with -R flag!"
+	log_fail "Mountpoint was not created when using zpool with -R flag!"
 fi
 
 FS=$(zfs list $TESTPOOL)

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh
index 8f64c9d..655f887 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_021_pos.ksh

@@ -68,10 +68,14 @@
 		  "setuid=off" \
 		  "readonly=on" \
 		  "snapdir=visible" \
-		  "acltype=posixacl" \
+		  "acltype=posix" \
 		  "aclinherit=discard" \
-		  "canmount=off" \
-		  "zoned=on"
+		  "canmount=off"
+if is_freebsd; then
+	RW_FS_PROP+=("jailed=on")
+else
+	RW_FS_PROP+=("zoned=on")
+fi
 
 typeset -i i=0
 while (( $i < ${#RW_FS_PROP[*]} )); do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh
index 4e6d255..4a918c0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_022_pos.ksh

@@ -48,7 +48,7 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_onexit cleanup
@@ -68,7 +68,7 @@
 		  "setuid=off" \
 		  "readonly=on" \
 		  "snapdir=visible" \
-		  "acltype=posixacl" \
+		  "acltype=posix" \
 		  "aclinherit=discard" \
 		  "canmount=off"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_023_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_023_neg.ksh
index fb0d480..f101521 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_023_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_023_neg.ksh

@@ -45,7 +45,7 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
 }
 
 log_onexit cleanup
@@ -63,7 +63,6 @@
     "deviCes=on" "devices=OFF" "devices=aaa" \
     "exec=ON" "EXec=off" "exec=aaa" \
     "readonly=ON" "reADOnly=off" "rdonly=OFF" "rdonly=aaa" \
-    "zoned=ON" "ZoNed=off" "zoned=aaa" \
     "snapdIR=hidden" "snapdir=VISible" "snapdir=aaa" \
     "acltype=DIScard" "acltYPE=groupmask" "acltype=aaa" \
     "aclinherit=deny" "aclinHerit=secure" "aclinherit=aaa" \
@@ -72,12 +71,25 @@
     "referenced=10K" "compressratio=1.00x" \
     "version=0" "version=1.234" "version=10K" "version=-1" \
     "version=aaa" "version=999"
+if is_freebsd; then
+	args+=("jailed=ON" "JaiLed=off" "jailed=aaa")
+else
+	args+=("zoned=ON" "ZoNed=off" "zoned=aaa")
+fi
 
 log_assert "'zpool create -O' should return an error with badly formed parameters."
 
 typeset -i i=0
 while (( $i < ${#args[*]} )); do
-	log_mustnot zpool create -O ${args[i]} -f $TESTPOOL $DISKS
+	typeset arg=${args[i]}
+	if is_freebsd; then
+		# FreeBSD does not strictly validate share opts (yet).
+		if [[ $arg == "sharenfs="* ]]; then
+			((i = i + 1))
+			continue
+		fi
+	fi
+	log_mustnot zpool create -O $arg -f $TESTPOOL $DISKS
 	((i = i + 1))
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh
new file mode 100755
index 0000000..9717af5
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_001_pos.ksh

@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Create a variety of dRAID pools using the minimal dRAID vdev syntax.
+#
+# STRATEGY:
+# 1) Create the required number of allowed dRAID vdevs.
+# 2) Create few pools of various sizes using the draid1|draid2|draid3 syntax.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	rm -f $all_vdevs
+	rmdir $TESTDIR
+}
+
+log_assert "'zpool create <pool> <draid1|2|3> ...' can create a pool."
+
+log_onexit cleanup
+
+all_vdevs=$(echo $TESTDIR/file.{01..84})
+
+mkdir $TESTDIR
+log_must truncate -s $MINVDEVSIZE $all_vdevs
+
+# Verify all configurations up to 24 vdevs.
+for parity in {1..3}; do
+	for children in {$((parity + 2))..24}; do
+		vdevs=$(echo $TESTDIR/file.{01..${children}})
+		log_must zpool create $TESTPOOL draid$parity $vdevs
+		log_must poolexists $TESTPOOL
+		destroy_pool $TESTPOOL
+	done
+done
+
+# Spot check a few large configurations.
+children_counts="53 84"
+for children in $children_counts; do
+	vdevs=$(echo $TESTDIR/file.{01..${children}})
+	log_must zpool create $TESTPOOL draid $vdevs
+	log_must poolexists $TESTPOOL
+	destroy_pool $TESTPOOL
+done
+
+log_pass "'zpool create <pool> <draid1|2|3> <vdevs> ...' success."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh
new file mode 100755
index 0000000..2e1ff39
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh

@@ -0,0 +1,82 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Create dRAID pool using the maximum number of vdevs (255).  Then verify
+# that creating a pool with 256 fails as expected.
+#
+# STRATEGY:
+# 1) Verify a pool with fewer than the required vdevs fails.
+# 2) Verify pools with a valid number of vdevs succeed.
+# 3) Verify a pool which exceeds the maximum number of vdevs fails.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	rm -f $all_vdevs
+	rmdir $TESTDIR
+}
+
+log_assert "'zpool create <pool> draid <vdevs>'"
+
+log_onexit cleanup
+
+all_vdevs=$(echo $TESTDIR/file.{01..256})
+
+mkdir $TESTDIR
+log_must truncate -s $MINVDEVSIZE $all_vdevs
+
+# Below maximum dRAID vdev count for specified parity level.
+log_mustnot zpool create $TESTPOOL draid1 $(echo $TESTDIR/file.{01..01})
+log_mustnot zpool create $TESTPOOL draid2 $(echo $TESTDIR/file.{01..02})
+log_mustnot zpool create $TESTPOOL draid3 $(echo $TESTDIR/file.{01..03})
+
+# Verify pool sizes from 2-10.  Values in between are skipped to speed
+# up the test case but will be exercised by the random pool creation
+# done in zpool_create_draid_002_pos.ksh.
+for (( i=2; i<=10; i++ )); do
+	log_must zpool create $TESTPOOL draid:${i}c \
+	    $(echo $TESTDIR/file.{01..$i})
+	log_must destroy_pool $TESTPOOL
+done
+
+# Verify pool sizes from 254-255.
+for (( i=254; i<=255; i++ )); do
+	log_must zpool create $TESTPOOL draid:${i}c \
+	    $(echo $TESTDIR/file.{01..$i})
+	log_must destroy_pool $TESTPOOL
+done
+
+# Exceeds maximum dRAID vdev count (256).
+log_mustnot zpool create $TESTPOOL draid $(echo $TESTDIR/file.{01..256})
+
+log_pass "'zpool create <pool> draid <vdevs>'"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh
new file mode 100755
index 0000000..52cd00c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh

@@ -0,0 +1,112 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify allowed striped widths (data+parity) and hot spares may be
+# configured at pool creation time.
+#
+# STRATEGY:
+# 1) Test valid stripe/spare combinations given the number of children.
+# 2) Test invalid stripe/spare/children combinations outside the allow limits.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	rm -f $draid_vdevs
+	rmdir $TESTDIR
+}
+
+log_assert "'zpool create <pool> draid:#d:#c:#s <vdevs>'"
+
+log_onexit cleanup
+
+mkdir $TESTDIR
+
+# Generate 10 random valid configurations to test.
+for (( i=0; i<10; i++ )); do
+	parity=$(random_int_between 1 3)
+	spares=$(random_int_between 0 3)
+	data=$(random_int_between 1 16)
+
+	(( min_children = (data + parity + spares) ))
+	children=$(random_int_between $min_children 32)
+
+	draid="draid${parity}:${data}d:${children}c:${spares}s"
+
+	draid_vdevs=$(echo $TESTDIR/file.{01..$children})
+	log_must truncate -s $MINVDEVSIZE $draid_vdevs
+
+	log_must zpool create $TESTPOOL $draid $draid_vdevs
+	log_must poolexists $TESTPOOL
+	destroy_pool $TESTPOOL
+
+	rm -f $draid_vdevs
+done
+
+children=32
+draid_vdevs=$(echo $TESTDIR/file.{01..$children})
+log_must truncate -s $MINVDEVSIZE $draid_vdevs
+
+mkdir $TESTDIR
+log_must truncate -s $MINVDEVSIZE $draid_vdevs
+
+# Out of order and unknown suffixes should fail.
+log_mustnot zpool create $TESTPOOL draid:d8 $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid:s3 $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid:c32 $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid:10x $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid:x10 $draid_vdevs
+
+# Exceeds maximum data disks (limited by total children)
+log_must zpool create $TESTPOOL draid2:30d $draid_vdevs
+log_must destroy_pool $TESTPOOL
+log_mustnot zpool create $TESTPOOL draid2:31d $draid_vdevs
+
+# At least one data disk must be requested.
+log_mustnot zpool create $TESTPOOL draid2:0d $draid_vdevs
+
+# Check invalid parity levels.
+log_mustnot zpool create $TESTPOOL draid0 $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid4 $draid_vdevs
+
+# Spares are limited: spares < children - (parity + data).
+log_must zpool create $TESTPOOL draid2:20d:10s $draid_vdevs
+log_must destroy_pool $TESTPOOL
+log_mustnot zpool create $TESTPOOL draid2:20d:11s $draid_vdevs
+
+# The required children argument is enforced.
+log_mustnot zpool create $TESTPOOL draid2:0c $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid2:31c $draid_vdevs
+log_must zpool create $TESTPOOL draid2:32c $draid_vdevs
+destroy_pool $TESTPOOL
+
+log_pass "'zpool create <pool> draid:#d:#c:#s <vdevs>'"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh
new file mode 100755
index 0000000..6b700fa
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh

@@ -0,0 +1,43 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify generated dRAID permutation maps against the authoritative
+# reference file contains the full permutations.
+#
+
+verify_runnable "global"
+
+log_assert "'draid verify'"
+
+DRAIDCFG="$STF_SUITE/tests/functional/cli_root/zpool_create/draidcfg.gz"
+
+log_must draid verify $DRAIDCFG
+
+log_pass "'draid verify'"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh
new file mode 100755
index 0000000..1e4db20
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh

@@ -0,0 +1,138 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Attila Fülöp <attila@fueloep.org>
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+typeset STR_DRYRUN="would create '$TESTPOOL' with the following layout:"
+typeset VDEV_PREFIX="$TEST_BASE_DIR/filedev"
+
+#
+# DESCRIPTION:
+# 'zpool create -n <pool> <vdev> ...' can display the correct configuration
+#
+# STRATEGY:
+# 1. Create -n a storage pool and verify the output is as expected.
+#
+
+typeset -a dev=(
+	"${VDEV_PREFIX}00" "${VDEV_PREFIX}01" "${VDEV_PREFIX}02"
+	"${VDEV_PREFIX}03" "${VDEV_PREFIX}04" "${VDEV_PREFIX}05"
+	"${VDEV_PREFIX}06" "${VDEV_PREFIX}07" "${VDEV_PREFIX}08"
+	"${VDEV_PREFIX}09" "${VDEV_PREFIX}10" "${VDEV_PREFIX}11"
+)
+
+typeset -a tests=(
+    (
+	tree="'${dev[0]}' '${dev[1]}' log '${dev[2]}' '${dev[3]}' \
+	    special '${dev[4]}' '${dev[5]}' dedup '${dev[6]}' '${dev[7]}' \
+		spare '${dev[8]}' '${dev[9]}' cache '${dev[10]}' '${dev[11]}'"
+
+	want="$STR_DRYRUN
+
+	$TESTPOOL
+	  ${dev[0]}
+	  ${dev[1]}
+	dedup
+	  ${dev[6]}
+	  ${dev[7]}
+	special
+	  ${dev[4]}
+	  ${dev[5]}
+	logs
+	  ${dev[2]}
+	  ${dev[3]}
+	cache
+	  ${dev[10]}
+	  ${dev[11]}
+	spares
+	  ${dev[8]}
+	  ${dev[9]}"
+    )
+    (
+	tree="mirror '${dev[0]}' '${dev[1]}' \
+	    log mirror '${dev[2]}' '${dev[3]}' \
+	    special mirror '${dev[4]}' '${dev[5]}' \
+	    dedup mirror '${dev[6]}' '${dev[7]}' \
+		spare '${dev[8]}' '${dev[9]}' \
+	    cache '${dev[10]}' '${dev[11]}'"
+
+	want="$STR_DRYRUN
+
+	$TESTPOOL
+	  mirror
+	    ${dev[0]}
+	    ${dev[1]}
+	dedup
+	  mirror
+	    ${dev[6]}
+	    ${dev[7]}
+	special
+	  mirror
+	    ${dev[4]}
+	    ${dev[5]}
+	logs
+	  mirror
+	    ${dev[2]}
+	    ${dev[3]}
+	cache
+	  ${dev[10]}
+	  ${dev[11]}
+	spares
+	  ${dev[8]}
+	  ${dev[9]}"
+    )
+)
+
+verify_runnable "global"
+
+function cleanup
+{
+	rm -f "$VDEV_PREFIX"*
+}
+
+log_assert "'zpool add -n <pool> <vdev> ...' can display the configuration"
+
+log_onexit cleanup
+
+# Create needed file vdevs.
+for (( i=0; i < ${#dev[@]}; i+=1 )); do
+	log_must truncate -s $SPA_MINDEVSIZE "${dev[$i]}"
+done
+
+# Foreach test create pool, add -n devices and check output.
+for (( i=0; i < ${#tests[@]}; i+=1 )); do
+	typeset tree="${tests[$i].tree}"
+	typeset want="${tests[$i].want}"
+
+	typeset out="$(log_must eval "zpool create -n '$TESTPOOL' $tree" | \
+	    sed /^SUCCESS/d)"
+
+	if [[ "$out" != "$want" ]]; then
+		log_fail "Got:\n" "$out" "\nbut expected:\n" "$want"
+	fi
+done
+
+log_pass "'zpool add -n <pool> <vdev> ...' displays config correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_006_pos.ksh
new file mode 100755
index 0000000..fe98434
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_006_pos.ksh

@@ -0,0 +1,58 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	Verify '-o compatibility' reserved values 'off, legacy'
+#
+# STRATEGY:
+#	1. Create a pool with '-o compatibility=off'
+#	2. Create a pool with '-o compatibility=legacy'
+#	3. Cannot create a pool with '-o compatibility=unknown'
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+}
+
+log_onexit cleanup
+
+log_assert "verify '-o compatibility' reserved values 'off, legacy'"
+
+log_must zpool create -f -o compatibility=off $TESTPOOL $DISKS
+log_must zpool destroy -f $TESTPOOL
+
+log_must zpool create -f -o compatibility=legacy $TESTPOOL $DISKS
+log_must zpool destroy -f $TESTPOOL
+
+log_mustnot zpool create -f -o compatibility=unknown $TESTPOOL $DISKS
+
+log_pass "verify '-o compatibility' reserved values 'off, legacy'"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh
new file mode 100755
index 0000000..8c81291
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh

@@ -0,0 +1,54 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
+
+#
+# DESCRIPTION:
+#	Verify pools can be created with the expected feature set enabled.
+#
+# STRATEGY:
+#	1. Create a pool with a known feature set.
+#	2. Verify only those features are active/enabled.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+}
+
+log_onexit cleanup
+
+log_assert "creates a pool with a specified feature set enabled"
+
+log_must zpool create -f -o compatibility=compat-2020 $TESTPOOL $DISKS
+check_feature_set $TESTPOOL compat-2020
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "creates a pool with a specified feature set enabled"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh
new file mode 100755
index 0000000..0580d44
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh

@@ -0,0 +1,54 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
+
+#
+# DESCRIPTION:
+#	Verify pools can be created with multiple feature sets.
+#
+# STRATEGY:
+#	1. Create a pool with multiple feature sets.
+#	2. Verify only the features common to both sets are enabled.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+}
+
+log_onexit cleanup
+
+log_assert "creates a pool with multiple feature sets enabled"
+
+log_must zpool create -f -o compatibility=freebsd-11.0,zol-0.8 $TESTPOOL $DISKS
+check_feature_set $TESTPOOL freebsd-11.0 zol-0.8
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "creates a pool with multiple feature sets enabled"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh
new file mode 100755
index 0000000..052c18d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh

@@ -0,0 +1,92 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	Verify '-o compatibility' property is updated in both the
+#	pool config MOS object and the cache file.
+#
+# STRATEGY:
+#	1. Create a pool with '-o compatibility=legacy', then verify
+#	   the property exists in the MOS config and cache file.
+#	2. Create a pool, set the 'compatibility=off' property, then
+#	   verify the property exists in the MOS config and cache file.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	datasetexists $TESTPOOL && log_must zpool destroy $TESTPOOL
+	rm -f $CACHE_FILE
+}
+
+function check_config
+{
+	typeset propval=$1
+
+	poolval="$(zpool get -H -o value compatibility $TESTPOOL)"
+	if [ "$poolval" != "$propval" ]; then
+		log_fail "compatibility property set incorrectly $curval"
+	fi
+
+	if ! zdb -C -U $CACHE_FILE | grep "compatibility: '$propval'"; then
+		log_fail "compatibility property missing in cache file"
+	fi
+
+	if ! zdb -C -U $CACHE_FILE $TESTPOOL | grep "compatibility: '$propval'"; then
+		log_fail "compatibility property missing from MOS object"
+	fi
+}
+
+log_onexit cleanup
+
+log_assert "verify '-o compatibility' in MOS object and cache file"
+
+CACHE_FILE=$TEST_BASE_DIR/cachefile.$$
+
+# 1. Create a pool with '-o compatibility=legacy', then verify
+#    the property exists in the MOS config and cache file.
+log_must zpool create -f -o cachefile=$CACHE_FILE -o compatibility=legacy $TESTPOOL $DISKS
+log_must check_config legacy
+log_must zpool export -F $TESTPOOL
+log_must zpool import -c $CACHE_FILE $TESTPOOL
+log_must check_config legacy
+log_must zpool destroy -f $TESTPOOL
+
+# 2. Create a pool, set the 'compatibility=off' property, then
+#    verify the property exists in the MOS config and cache file.
+log_must zpool create -f -o cachefile=$CACHE_FILE $TESTPOOL $DISKS
+log_must zpool set compatibility=legacy $TESTPOOL
+log_must check_config legacy
+log_must zpool export -F $TESTPOOL
+log_must zpool import -c $CACHE_FILE $TESTPOOL
+log_must check_config legacy
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "verify '-o compatibility' in MOS object and cache file"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_tempname.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_tempname.ksh
index 1e6fcea..8fd1cea 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_tempname.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_tempname.ksh

@@ -30,9 +30,11 @@
 
 function cleanup
 {
-	destroy_pool $TESTPOOL
-	destroy_pool $TEMPPOOL
+	typeset pool
 
+	for pool in $TESTPOOL $TEMPPOOL; do
+		poolexists $pool && destroy_pool $pool
+	done
 }
 
 log_assert "'zpool create -t <tempname>' can create a pool with the specified" \
@@ -48,8 +50,8 @@
 for poolprop in "${poolprops[@]}"; do
 	for fsprop in "${fsprops[@]}"; do
 		# 1. Create a pool with '-t' option
-		log_must zpool create $TESTPOOL -t $TEMPPOOL \
-			-O $fsprop -o $poolprop $DISKS
+		log_must zpool create -t $TEMPPOOL -O $fsprop -o $poolprop \
+			$TESTPOOL $DISKS
 		# 2. Verify the pool is created with the specified temporary name
 		log_must poolexists $TEMPPOOL
 		log_mustnot poolexists $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy.cfg
index 65b43da..bf60267 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy.cfg

@@ -28,19 +28,10 @@
 # Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
-export DISK=${DISKS%% *}
 export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
 export DISKSARRAY=$DISKS
+echo $DISKS | read DISK0 DISK1
 
 if is_linux; then
 	set_device_dir
-	set_slice_prefix
-	export SLICE0=1
-	export SLICE1=2
-else
-	export SLICE_PREFIX="s"
-	export SLICE0=0
-	export SLICE1=1
-
 fi
-export SLICE_SIZE=500m

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh
index 2d9ec78..c25b6c9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh

@@ -48,34 +48,31 @@
 function cleanup
 {
 	poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
-	datasetexists $TESTPOOL1/$TESTVOL && \
-		log_must zfs destroy -f $TESTPOOL1/$TESTVOL
+	datasetexists $TESTPOOL1/$TESTVOL && destroy_dataset $TESTPOOL1/$TESTVOL -f
 
 	typeset pool
 	for pool in $TESTPOOL1 $TESTPOOL; do
 		poolexists $pool && destroy_pool $pool
 	done
 
-	zero_partitions $DISK
+	[ -n "$recursive" ] && set_tunable64 VOL_RECURSIVE $recursive
 }
 
 set -A datasets "$TESTPOOL" "$TESTPOOL2"
 
-if ! $(is_physical_device $DISKS) ; then
-	log_unsupported "This case cannot be run on raw files."
-fi
-
 log_assert "'zpool destroy <pool>' can destroy a specified pool."
 
 log_onexit cleanup
 
-partition_disk $SLICE_SIZE $DISK 2
-
-create_pool "$TESTPOOL" "${DISK}${SLICE_PREFIX}${SLICE0}"
-create_pool "$TESTPOOL1" "${DISK}${SLICE_PREFIX}${SLICE1}"
+create_pool $TESTPOOL $DISK0
+create_pool $TESTPOOL1 $DISK1
 log_must zfs create -s -V $VOLSIZE $TESTPOOL1/$TESTVOL
 block_device_wait
-create_pool "$TESTPOOL2" "${ZVOL_DEVDIR}/$TESTPOOL1/$TESTVOL"
+if is_freebsd; then
+	typeset recursive=$(get_tunable VOL_RECURSIVE)
+	log_must set_tunable64 VOL_RECURSIVE 1
+fi
+create_pool $TESTPOOL2 $ZVOL_DEVDIR/$TESTPOOL1/$TESTVOL
 
 typeset -i i=0
 while (( i < ${#datasets[*]} )); do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh
index ad94257..a634f10 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh

@@ -59,7 +59,7 @@
 	typeset -i i=0
 	while (( $i < ${#datasets[*]} )); do
 		datasetexists ${datasets[i]} && \
-			log_must zfs destroy ${datasets[i]}
+			destroy_dataset ${datasets[i]}
 		(( i = i + 1 ))
 	done
 
@@ -73,9 +73,7 @@
 
 log_onexit cleanup
 
-typeset cwd=""
-
-create_pool "$TESTPOOL" "$DISK"
+create_pool $TESTPOOL $DISK0
 log_must zfs create $TESTPOOL/$TESTFS
 log_must mkdir -p $TESTDIR
 log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
@@ -90,7 +88,6 @@
 	((i = i + 1))
 done
 
-cwd=$PWD
 log_note "'zpool destroy' without '-f' will fail " \
 	"while pool is busy."
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore
new file mode 100644
index 0000000..a1f8c14
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/.gitignore

@@ -0,0 +1 @@
+/ereports

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
index 7fb6e4f..765df10 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am

@@ -1,4 +1,8 @@
+include $(top_srcdir)/config/Rules.am
+
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_events
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_events
+
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
@@ -6,8 +10,17 @@
 	zpool_events_cliargs.ksh \
 	zpool_events_follow.ksh \
 	zpool_events_poolname.ksh \
-	zpool_events_errors.ksh
+	zpool_events_errors.ksh \
+	zpool_events_duplicates.ksh \
+	zpool_events_clear_retained.ksh
 
 dist_pkgdata_DATA = \
 	zpool_events.cfg \
 	zpool_events.kshlib
+
+ereports_LDADD = \
+	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
+	$(abs_top_builddir)/lib/libzfs/libzfs.la
+
+pkgexec_PROGRAMS = ereports
+ereports_SOURCES = ereports.c

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c
new file mode 100644
index 0000000..f825240
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/ereports.c

@@ -0,0 +1,174 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <libzfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/nvpair.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/fs/zfs.h>
+
+/*
+ * Command to output io and checksum ereport values, one per line.
+ * Used by zpool_events_duplicates.ksh to check for duplicate events.
+ *
+ * example output line:
+ *
+ * checksum "error_pool" 0x856dd01ce52e336 0x000034 0x000400 0x000a402c00
+ *  0x000004	0x000000	0x000000	0x000000	0x000001
+ */
+
+/*
+ * Our ereport duplicate criteria
+ *
+ * When the class and all of these values match, then an ereport is
+ * considered to be a duplicate.
+ */
+static const char *criteria_name[] = {
+	FM_EREPORT_PAYLOAD_ZFS_POOL,
+	FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY,
+
+	/* logical zio criteriai (optional) */
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
+	FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
+};
+
+#define	CRITERIA_NAMES_COUNT	ARRAY_SIZE(criteria_name)
+
+static void
+print_ereport_line(nvlist_t *nvl)
+{
+	char *class;
+	int last = CRITERIA_NAMES_COUNT - 1;
+
+	/*
+	 * For the test case context, we only want to see 'io' and
+	 * 'checksum' subclass.  We skip 'data' to minimize the output.
+	 */
+	if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0 ||
+	    strstr(class, "ereport.fs.zfs.") == NULL ||
+	    strcmp(class, "ereport.fs.zfs.data") == 0) {
+		return;
+	}
+
+	(void) printf("%s\t", class + strlen("ereport.fs.zfs."));
+
+	for (int i = 0; i < CRITERIA_NAMES_COUNT; i++) {
+		nvpair_t *nvp;
+		uint32_t i32 = 0;
+		uint64_t i64 = 0;
+		char *str = NULL;
+
+		if (nvlist_lookup_nvpair(nvl, criteria_name[i], &nvp) != 0) {
+			/* print a proxy for optional criteria */
+			(void) printf("--------");
+			(void) printf("%c", i == last ? '\n' : '\t');
+			continue;
+		}
+
+		switch (nvpair_type(nvp)) {
+		case DATA_TYPE_STRING:
+			(void) nvpair_value_string(nvp, &str);
+			(void) printf("\"%s\"", str ? str : "<NULL>");
+			break;
+
+		case DATA_TYPE_INT32:
+			(void) nvpair_value_int32(nvp, (void *)&i32);
+			(void) printf("0x%06x", i32);
+			break;
+
+		case DATA_TYPE_UINT32:
+			(void) nvpair_value_uint32(nvp, &i32);
+			(void) printf("0x%06x", i32);
+			break;
+
+		case DATA_TYPE_INT64:
+			(void) nvpair_value_int64(nvp, (void *)&i64);
+			(void) printf("0x%06llx", (u_longlong_t)i64);
+			break;
+
+		case DATA_TYPE_UINT64:
+			(void) nvpair_value_uint64(nvp, &i64);
+			if (strcmp(FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
+			    criteria_name[i]) == 0)
+				(void) printf("0x%010llx", (u_longlong_t)i64);
+			else
+				(void) printf("0x%06llx", (u_longlong_t)i64);
+			break;
+		default:
+			(void) printf("<unknown>");
+			break;
+		}
+		(void) printf("%c", i == last ? '\n' : '\t');
+	}
+}
+
+static void
+ereports_dump(libzfs_handle_t *zhdl, int zevent_fd)
+{
+	nvlist_t *nvl;
+	int ret, dropped;
+
+	while (1) {
+		ret = zpool_events_next(zhdl, &nvl, &dropped, ZEVENT_NONBLOCK,
+		    zevent_fd);
+		if (ret || nvl == NULL)
+			break;
+		if (dropped > 0)
+			(void) fprintf(stdout, "dropped %d events\n", dropped);
+		print_ereport_line(nvl);
+		(void) fflush(stdout);
+		nvlist_free(nvl);
+	}
+}
+
+/* ARGSUSED */
+int
+main(int argc, char **argv)
+{
+	libzfs_handle_t *hdl;
+	int fd;
+
+	hdl = libzfs_init();
+	if (hdl == NULL) {
+		(void) fprintf(stderr, "libzfs_init: %s\n", strerror(errno));
+		exit(2);
+	}
+	fd = open(ZFS_DEV, O_RDWR);
+	if (fd < 0) {
+		(void) fprintf(stderr, "open: %s\n", strerror(errno));
+		libzfs_fini(hdl);
+		exit(2);
+	}
+
+	ereports_dump(hdl, fd);
+
+	(void) close(fd);
+	libzfs_fini(hdl);
+
+	return (0);
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh
index 67038a4..054d39b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear.ksh

@@ -43,14 +43,14 @@
 done
 # wait a bit to allow the kernel module to process new events
 zpool_events_settle
-EVENTS_NUM="$(zpool events -H | wc -l)"
+EVENTS_NUM=$(zpool events -H | wc -l | xargs)
 
 # 3. Verify 'zpool events -c' successfully clear new events
-CLEAR_OUTPUT="$(zpool events -c)"
+CLEAR_OUTPUT=$(zpool events -c)
 if [[ "$CLEAR_OUTPUT" != "cleared $EVENTS_NUM events" ]]; then
 	log_fail "Failed to clear $EVENTS_NUM events: $CLEAR_OUTPUT"
 fi
-EVENTS_NUM="$(zpool events -H | wc -l)"
+EVENTS_NUM=$(zpool events -H | wc -l)
 if [[ $EVENTS_NUM -ne 0 ]];  then
 	log_fail "Unexpected events number: $EVENTS_NUM != 0"
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh
new file mode 100755
index 0000000..fdf56b2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh

@@ -0,0 +1,135 @@
+#!/bin/ksh -p
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION:
+#	Verify that new errors after a pool scrub are considered a duplicate
+#
+# STRATEGY:
+#	1. Create a raidz pool with a file
+#	2. Inject garbage into one of the vdevs
+#	3. Scrub the pool
+#	4. Observe the checksum error counts
+#	5. Repeat inject and pool scrub
+#	6. Verify that second pass also produces similar errors (i.e. not
+#	   treated as a duplicate)
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+MOUNTDIR=$TEST_BASE_DIR/mount
+FILEPATH=$MOUNTDIR/target
+VDEV1=$TEST_BASE_DIR/vfile1
+VDEV2=$TEST_BASE_DIR/vfile2
+VDEV3=$TEST_BASE_DIR/vfile3
+SUPPLY=$TEST_BASE_DIR/supply
+POOL=test_pool
+FILESIZE="15M"
+DAMAGEBLKS=10
+
+OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
+RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
+OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND)
+
+EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
+
+function cleanup
+{
+	log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS
+	log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
+
+	zpool events -c
+	if poolexists $POOL ; then
+		zpool export $POOL
+	fi
+	log_must rm -f $VDEV1 $VDEV2 $VDEV3
+}
+
+function damage_and_repair
+{
+	log_must zpool clear $POOL $VDEV1
+	log_must zpool events -c
+
+	log_note injecting damage to $VDEV1
+	log_must dd conv=notrunc if=$SUPPLY of=$VDEV1 bs=1M seek=4 count=$DAMAGEBLKS
+	log_must zpool scrub $POOL
+	log_must zpool wait -t scrub $POOL
+	log_note "pass $1 observed $($EREPORTS | grep -c checksum) checksum ereports"
+
+	repaired=$(zpool status $POOL | grep "scan: scrub repaired" | awk '{print $4}')
+	if [ "$repaired" == "0B" ]; then
+		log_fail "INVALID TEST -- expected scrub to repair some blocks"
+	else
+		log_note "$repaired repaired during scrub"
+	fi
+}
+
+function checksum_error_count
+{
+	zpool status -p $POOL | grep $VDEV1 | awk '{print $5}'
+}
+
+assertion="Damage to recently repaired blocks should be reported/counted"
+log_assert "$assertion"
+log_note "zevent retain max setting: $RETAIN_MAX"
+
+log_onexit cleanup
+
+# Set our threshold high to avoid dropping events.
+set_tunable64 ZEVENT_LEN_MAX 20000
+set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000
+
+# Initialize resources for the test
+log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3
+log_must dd if=/dev/urandom of=$SUPPLY bs=1M count=$DAMAGEBLKS
+log_must mkdir -p $MOUNTDIR
+log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL raidz $VDEV1 $VDEV2 $VDEV3
+log_must zfs set compression=off recordsize=16k $POOL
+# create a file full of zeros
+log_must mkfile -v $FILESIZE $FILEPATH
+log_must zpool sync $POOL
+
+# run once and observe the checksum errors
+damage_and_repair 1
+errcnt=$(checksum_error_count)
+log_note "$errcnt errors observed"
+# set expectaton of at least 75% of what we observed in first pass
+(( expected = (errcnt * 75) / 100 ))
+
+# run again and we should observe new checksum errors
+damage_and_repair 2
+errcnt=$(checksum_error_count)
+
+log_must zpool destroy $POOL
+
+if (( errcnt < expected )); then
+	log_fail "FAILED -- expecting at least $expected checksum errors but only observed $errcnt"
+else
+	log_note observed $errcnt new checksum errors after a scrub
+	log_pass "$assertion"
+fi
+

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
new file mode 100755
index 0000000..595eacf
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh

@@ -0,0 +1,143 @@
+#!/bin/ksh -p
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION:
+#	Verify that duplicate I/O ereport errors are not posted
+#
+# STRATEGY:
+#	1. Create a mirror pool
+#	2. Inject duplicate read/write IO errors and checksum errors
+#	3. Verify there are no duplicate events being posted
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+MOUNTDIR=$TEST_BASE_DIR/mount
+FILEPATH=$MOUNTDIR/badfile
+VDEV1=$TEST_BASE_DIR/vfile1
+VDEV2=$TEST_BASE_DIR/vfile2
+POOL=error_pool
+FILESIZE="10M"
+OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
+RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
+
+EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
+
+duplicates=false
+
+function cleanup
+{
+	log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
+
+	log_must zinject -c all
+	if poolexists $POOL ; then
+		destroy_pool $POOL
+	fi
+	log_must rm -f $VDEV1 $VDEV2
+}
+
+log_assert "Duplicate I/O ereport errors are not posted"
+log_note "zevent retain max setting: $RETAIN_MAX"
+
+log_onexit cleanup
+
+# Set our threshold high to avoid dropping events.
+set_tunable64 ZEVENT_LEN_MAX 20000
+
+log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2
+log_must mkdir -p $MOUNTDIR
+
+#
+# $1: test type - corrupt (checksum error), io
+# $2: read, write
+function do_dup_test
+{
+	ERR=$1
+	RW=$2
+
+	log_note "Testing $ERR $RW ereports"
+	log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL mirror $VDEV1 $VDEV2
+	log_must zpool events -c
+	log_must zfs set compression=off $POOL
+
+	if [ "$RW" == "read" ] ; then
+		log_must mkfile $FILESIZE $FILEPATH
+
+		# unmount and mount filesystems to purge file from ARC
+		# to force reads to go through error inject handler
+		log_must zfs unmount $POOL
+		log_must zfs mount $POOL
+
+		# all reads from this file get an error
+		if [ "$ERR" == "corrupt" ] ; then
+			log_must zinject -a -t data -e checksum -T read $FILEPATH
+		else
+			log_must zinject -a -t data -e io -T read $FILEPATH
+		fi
+
+		# Read the file a few times to generate some
+		# duplicate errors of the same blocks
+		for _ in {1..15}; do
+			dd if=$FILEPATH of=/dev/null bs=128K > /dev/null 2>&1
+		done
+		log_must zinject -c all
+	fi
+
+	log_must zinject -d $VDEV1 -e $ERR -T $RW -f 100 $POOL
+
+	if [ "$RW" == "write" ] ; then
+		log_must mkfile $FILESIZE $FILEPATH
+		log_must zpool sync $POOL
+	fi
+
+	log_must zinject -c all
+
+	ereports="$($EREPORTS | sort)"
+	actual=$(echo "$ereports" | wc -l)
+	unique=$(echo "$ereports" | uniq | wc -l)
+	log_note "$actual total $ERR $RW ereports where $unique were unique"
+
+	if [ $actual -gt $unique ] ; then
+		log_note "UNEXPECTED -- $((actual-unique)) duplicate $ERR $RW ereports"
+		echo "$ereports"
+		duplicates=true
+	fi
+
+	log_must zpool destroy $POOL
+}
+
+do_dup_test "corrupt" "read"
+do_dup_test "io" "read"
+do_dup_test "io" "write"
+
+if $duplicates; then
+	log_fail "FAILED -- Duplicate I/O ereport errors encountered"
+else
+	log_pass "Duplicate I/O ereport errors are not posted"
+fi
+

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh
index 0dc551b..a6833f1 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh

@@ -28,11 +28,12 @@
 #	in zpool status.
 #
 # STRATEGY:
-#	1. Create a raidz or mirror pool
+#	1. Create a mirror, raidz, or draid pool
 #	2. Inject read/write IO errors or checksum errors
 #	3. Verify the number of errors in zpool status match the corresponding
 #	   number of error events.
-#	4. Repeat for all combinations of raidz/mirror and io/checksum errors.
+#	4. Repeat for all combinations of mirror/raidz/draid and io/checksum
+#	   errors.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -45,13 +46,13 @@
 VDEV3=$TEST_BASE_DIR/file3
 POOL=error_pool
 FILESIZE=$((20 * 1024 * 1024))
-OLD_CHECKSUMS=$(get_tunable zfs_checksum_events_per_second)
-OLD_LEN_MAX=$(get_tunable zfs_zevent_len_max)
+OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND)
+OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
 
 function cleanup
 {
-	log_must set_tunable64 zfs_checksum_events_per_second $OLD_CHECKSUMS
-	log_must set_tunable64 zfs_zevent_len_max $OLD_LEN_MAX
+	log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS
+	log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
 
 	log_must zinject -c all
 	log_must zpool events -c
@@ -66,15 +67,15 @@
 log_onexit cleanup
 
 # Set our thresholds high so we never ratelimit or drop events.
-set_tunable64 zfs_checksum_events_per_second 20000
-set_tunable64 zfs_zevent_len_max 20000
+set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000
+set_tunable64 ZEVENT_LEN_MAX 20000
 
 log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3
 log_must mkdir -p $MOUNTDIR
 
 # Run error test on a specific type of pool
 #
-# $1: pool - raidz, mirror
+# $1: pool - mirror, raidz, draid
 # $2: test type - corrupt (checksum error), io
 # $3: read, write
 function do_test
@@ -129,11 +130,11 @@
 		fi
 	fi
 
-	if [ "$val" == "0" ] || [ "$events" == "" ] ; then
+	if [ -z "$val" -o $val -eq 0 -o -z "$events" -o $events -eq 0 ] ; then
 		log_fail "Didn't see any errors or events ($val/$events)"
 	fi
 
-	if [ "$val" != "$events" ] ; then
+	if [ $val -ne $events ] ; then
 		log_fail "$val $POOLTYPE $str errors != $events events"
 	else
 		log_note "$val $POOLTYPE $str errors == $events events"
@@ -142,8 +143,8 @@
 	log_must zpool destroy $POOL
 }
 
-# Test all types of errors on mirror and raidz pools
-for pooltype in mirror raidz ; do
+# Test all types of errors on mirror, raidz, and draid pools
+for pooltype in mirror raidz draid; do
 	do_test $pooltype corrupt read
 	do_test $pooltype io read
 	do_test $pooltype io write

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_follow.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_follow.ksh
index a996e57..258de03 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_follow.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_follow.ksh

@@ -56,8 +56,8 @@
 zpool_events_settle
 
 # 4. Verify 'zpool events -f' successfully recorded these new events
-EVENTS_LOG="$(cat $EVENTS_FILE | wc -l)"
-if [[ "$EVENTS_LOG" != "$EVENTS_NUM" ]]; then
+EVENTS_LOG=$(cat $EVENTS_FILE | wc -l)
+if [[ $EVENTS_LOG -ne $EVENTS_NUM ]]; then
 	log_fail "Unexpected number of events: $EVENTS_LOG != $EVENTS_NUM"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
index f39e626..8760f48 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh

@@ -54,6 +54,14 @@
 
 verify_runnable "global"
 
+# We override $org_size and $exp_size from zpool_expand.cfg to make sure we get
+# an expected free space value every time.  Otherwise, if we left it
+# configurable, the free space ratio to pool size ratio would diverge too much
+# much at low $org_size values.
+#
+org_size=$((1024 * 1024 * 1024))
+exp_size=$(($org_size * 2))
+
 function cleanup
 {
 	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
@@ -68,11 +76,35 @@
 	unload_scsi_debug
 }
 
+# Wait for the size of a pool to autoexpand to $1 and the total free space to
+# expand to $2 (both values allowing a 10% tolerance).
+#
+# Wait for up to 10 seconds for this to happen (typically takes 1-2 seconds)
+#
+function wait_for_autoexpand
+{
+	typeset exp_new_size=$1
+	typeset exp_new_free=$2
+
+	for i in $(seq 1 10) ; do
+		typeset new_size=$(get_pool_prop size $TESTPOOL1)
+		typeset new_free=$(get_prop avail $TESTPOOL1)
+		# Values need to be within 90% of each other (10% tolerance)
+		if within_percent $new_size $exp_new_size 90 > /dev/null && \
+		    within_percent $new_free $exp_new_free 90 > /dev/null ; then
+			return
+		fi
+		sleep 1
+	done
+	log_fail "$TESTPOOL never expanded to $exp_new_size with $exp_new_free" \
+	    " free space (got $new_size with $new_free free space)"
+}
+
 log_onexit cleanup
 
 log_assert "zpool can be autoexpanded after set autoexpand=on on vdev expansion"
 
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz; do
 	log_note "Setting up loopback, scsi_debug, and file vdevs"
 	log_must truncate -s $org_size $FILE_LO
 	DEV1=$(losetup -f)
@@ -105,62 +137,38 @@
 	log_note "Expanding loopback, scsi_debug, and file vdevs"
 	log_must truncate -s $exp_size $FILE_LO
 	log_must losetup -c $DEV1
-	sleep 3
 
 	echo "2" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb
 	echo "1" > /sys/class/block/$DEV2/device/rescan
 	block_device_wait
-	sleep 3
 
 	log_must truncate -s $exp_size $FILE_RAW
 	log_must zpool online -e $TESTPOOL1 $FILE_RAW
 
-	typeset expand_size=$(get_pool_prop size $TESTPOOL1)
-	typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
 
-	log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
-	    "expanded size: $expand_size"
-	# compare available pool size from zfs
-	if [[ $zfs_expand_size -gt $zfs_prev_size ]]; then
-		# check for zpool history for the pool size expansion
-		if [[ $type == " " ]]; then
-			typeset expansion_size=$(($exp_size-$org_size))
-			typeset	size_addition=$(zpool history -il $TESTPOOL1 |\
-			    grep "pool '$TESTPOOL1' size:" | \
-			    grep "vdev online" | \
-			    grep "(+${expansion_size}" | wc -l)
-
-			if [[ $size_addition -ne 3 ]]; then
-				log_fail "pool $TESTPOOL1 has not expanded, " \
-				    "$size_addition/3 vdevs expanded"
-			fi
-		elif [[ $type == "mirror" ]]; then
-			typeset expansion_size=$(($exp_size-$org_size))
-			zpool history -il $TESTPOOL1 | \
-			    grep "pool '$TESTPOOL1' size:" | \
-			    grep "vdev online" | \
-			    grep "(+${expansion_size})" >/dev/null 2>&1
-
-			if [[ $? -ne 0 ]] ; then
-				log_fail "pool $TESTPOOL1 has not expanded"
-			fi
-		else
-			typeset expansion_size=$((3*($exp_size-$org_size)))
-			zpool history -il $TESTPOOL1 | \
-			    grep "pool '$TESTPOOL1' size:" | \
-			    grep "vdev online" | \
-			    grep "(+${expansion_size})" >/dev/null 2>&1
-
-			if [[ $? -ne 0 ]]; then
-				log_fail "pool $TESTPOOL has not expanded"
-			fi
-		fi
-	else
-		log_fail "pool $TESTPOOL1 is not autoexpanded after vdev " \
-		    "expansion.  Previous size: $zfs_prev_size and expanded " \
-		    "size: $zfs_expand_size"
+	# The expected free space values below were observed at the time of
+	# this commit.  However, we know ZFS overhead will change over time,
+	# and thus we do not do an exact comparison to these values in
+	# wait_for_autoexpand.  Rather, we make sure the free space
+	# is within some small percentage threshold of these values.
+	typeset exp_new_size=$(($prev_size * 2))
+	if [[ "$type" == " " ]] ; then
+		exp_new_free=6045892608
+	elif [[ "$type" == "mirror" ]] ; then
+		exp_new_free=1945997312
+	elif [[ "$type" == "raidz" ]] ; then
+		exp_new_free=3977637338
+	elif [[ "$type" == "draid:1s" ]] then
+		exp_new_free=1946000384
 	fi
 
+	wait_for_autoexpand $exp_new_size $exp_new_free
+
+	expand_size=$(get_pool_prop size $TESTPOOL1)
+
+	log_note "$TESTPOOL1 '$type' grew from $prev_size -> $expand_size with" \
+	    "free space from $zfs_prev_size -> $(get_prop avail $TESTPOOL1)"
+
 	cleanup
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
index a49d4fc..62843b0 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh

@@ -63,7 +63,7 @@
 
 log_assert "zpool can expand after zpool online -e zvol vdevs on vdev expansion"
 
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid:1s; do
 	# Initialize the file devices and the pool
 	for i in 1 2 3; do
 		log_must truncate -s $org_size ${TEMPFILE}.$i
@@ -92,6 +92,8 @@
 
 	if [[ $type == "mirror" ]]; then
 		typeset expected_zpool_expandsize=$(($exp_size-$org_size))
+	elif [[ $type == "draid:1s" ]]; then
+		typeset expected_zpool_expandsize=$((2*($exp_size-$org_size)))
 	else
 		typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
 	fi
@@ -147,6 +149,17 @@
 				log_fail "pool $TESTPOOL1 has not expanded " \
 				    "after zpool online -e"
 			fi
+		elif [[ $type == "draid:1s" ]]; then
+			typeset expansion_size=$((2*($exp_size-$org_size)))
+			zpool history -il $TESTPOOL1 | \
+			    grep "pool '$TESTPOOL1' size:" | \
+			    grep "vdev online" | \
+			    grep "(+${expansion_size})" >/dev/null 2>&1
+
+			if [[ $? -ne 0 ]] ; then
+				log_fail "pool $TESTPOOL1 has not expanded " \
+				    "after zpool online -e"
+			fi
 		else
 			typeset expansion_size=$((3*($exp_size-$org_size)))
 			zpool history -il $TESTPOOL1 | \
@@ -160,9 +173,17 @@
 			fi
 		fi
 	else
-		log_fail "pool $TESTPOOL1 did not expand after vdev expansion " \
-		    "and zpool online -e"
+		log_fail "pool $TESTPOOL1 did not expand after vdev " \
+		    "expansion and zpool online -e"
 	fi
+
+	# For dRAID pools verify the distributed spare was resized after
+	# expansion and it is large enough to be used to replace a pool vdev.
+	if [[ $type == "draid:1s" ]]; then
+		log_must zpool replace -w $TESTPOOL1 $TEMPFILE.3 draid1-0-0
+		verify_pool $TESTPOOL1
+	fi
+
 	log_must zpool destroy $TESTPOOL1
 done
 log_pass "zpool can expand after zpool online -e"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
index 323d0b9..b3c71b6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh

@@ -73,7 +73,7 @@
 
 log_assert "zpool can not expand if set autoexpand=off after vdev expansion"
 
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid; do
 	log_note "Setting up loopback, scsi_debug, and file vdevs"
 	log_must truncate -s $org_size $FILE_LO
 	DEV1=$(losetup -f)

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
index 8a4db82..09e2b6d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh

@@ -61,7 +61,7 @@
 
 log_assert "After vdev expansion, all 4 labels have the same set of uberblocks."
 
-for type in " " mirror raidz raidz2; do
+for type in " " mirror raidz draid; do
 	for i in 1 2 3; do
 		log_must truncate -s $org_size ${TEMPFILE}.$i
 	done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/Makefile.am
index 86452e8..1c06d5b 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/Makefile.am

@@ -8,4 +8,5 @@
 	zpool_export_004_pos.ksh
 
 dist_pkgdata_DATA = \
-	zpool_export.cfg
+	zpool_export.cfg \
+	zpool_export.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/setup.ksh
index 925f3e4..023920d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/setup.ksh

@@ -30,8 +30,4 @@
 
 DISK=${DISKS%% *}
 
-if ! $(is_physical_device $DISK) ; then
-	log_unsupported "Only partitionable physical disks can be used"
-fi
-
 default_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.cfg
index 1501c04..8bfb067 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.cfg

@@ -30,30 +30,15 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-export DISK_ARRAY_NUM=0
-export DISK_ARRAY_LIMIT=4
-export DISKSARRAY=""
-export VDEVS_NUM=32
+export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
+export DISK1=$(echo $DISKS | awk '{print $1}')
+export DISK2=$(echo $DISKS | awk '{print $3}')
 
-function set_disks
-{
-        typeset -a disk_array=($(find_disks $DISKS))
-
-	if (( ${#disk_array[*]} <= 1 )); then
-		export DISK=${DISKS%% *}
-	else
-		export DISK=""
-		typeset -i i=0
-		while (( i < ${#disk_array[*]} )); do
-			export DISK${i}="${disk_array[$i]}"
-			DISKSARRAY="$DISKSARRAY ${disk_array[$i]}"
-			(( i = i + 1 ))
-			(( i>$DISK_ARRAY_LIMIT )) && break
-		done
-		export DISK_ARRAY_NUM=$i
-		export DISKSARRAY
-	fi
-}
-
-set_disks
-set_device_dir
+if is_linux; then
+	set_slice_prefix
+	set_device_dir
+	devs_id[0]=$(get_persistent_disk_name $DISK1)
+	devs_id[1]=$(get_persistent_disk_name $DISK2)
+else
+	DEV_DSKDIR="/dev"
+fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.kshlib
new file mode 100644
index 0000000..5484f20
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export.kshlib

@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, Klara Systems, Inc. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_export/zpool_export.cfg
+
+function zpool_export_cleanup
+{
+	[[ -d $TESTDIR0 ]] && log_must rm -rf $TESTDIR0
+	default_cleanup
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_001_pos.ksh
index b682355..111453c 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_001_pos.ksh

@@ -29,8 +29,7 @@
 # Copyright (c) 2016 by Delphix. All rights reserved.
 #
 
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/cli_root/zpool_export/zpool_export.cfg
+. $STF_SUITE/tests/functional/cli_root/zpool_export/zpool_export.kshlib
 
 #
 # DESCRIPTION:
@@ -46,19 +45,7 @@
 
 verify_runnable "global"
 
-function cleanup
-{
-	typeset dir=$(get_device_dir $DISKS)
-
-	datasetexists "$TESTPOOL/$TESTFS" || \
-		log_must zpool import -d $dir $TESTPOOL
-
-	ismounted "$TESTPOOL/$TESTFS"
-	(( $? != 0 )) && \
-	    log_must zfs mount $TESTPOOL/$TESTFS
-}
-
-log_onexit cleanup
+log_onexit zpool_export_cleanup
 
 log_assert "Verify a pool can be exported."
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_002_pos.ksh
index 81473d9..8040d12 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_002_pos.ksh

@@ -29,7 +29,7 @@
 # Copyright (c) 2016 by Delphix. All rights reserved.
 #
 
-. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_export/zpool_export.kshlib
 
 #
 # DESCRIPTION:
@@ -45,19 +45,10 @@
 
 function cleanup
 {
-	typeset dir=$(get_device_dir $DISKS)
 	cd $olddir || \
 	    log_fail "Couldn't cd back to $olddir"
 
-	datasetexists "$TESTPOOL/$TESTFS" || \
-	    log_must zpool import -d $dir $TESTPOOL
-
-	ismounted "$TESTPOOL/$TESTFS"
-	(( $? != 0 )) && \
-	    log_must zfs mount $TESTPOOL/$TESTFS
-
-	[[ -e $TESTDIR/$TESTFILE0 ]] && \
-	    log_must rm -rf $TESTDIR/$TESTFILE0
+	zpool_export_cleanup
 }
 
 olddir=$PWD

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_003_neg.ksh
index b188f9c..a2ee7fb 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_003_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_003_neg.ksh

@@ -29,7 +29,7 @@
 # Copyright (c) 2016 by Delphix. All rights reserved.
 #
 
-. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_export/zpool_export.kshlib
 
 #
 # DESCRIPTION:
@@ -43,18 +43,7 @@
 
 verify_runnable "global"
 
-function cleanup
-{
-	typeset dir=$(get_device_dir $DISKS)
-	datasetexists "$TESTPOOL/$TESTFS" || \
-		log_must zpool import -d $dir $TESTPOOL
-
-	ismounted "$TESTPOOL/$TESTFS"
-	(( $? != 0 )) && \
-	    log_must zfs mount $TESTPOOL/$TESTFS
-}
-
-log_onexit cleanup
+log_onexit zpool_export_cleanup
 
 set -A args "" "-f" "-? $TESTPOOL" "-QWERTYUIO $TESTPOOL"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_004_pos.ksh
index 0f1a7c6..9be3f23 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_004_pos.ksh

@@ -29,7 +29,7 @@
 # Copyright (c) 2012, 2016 by Delphix. All rights reserved.
 #
 
-. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_export/zpool_export.kshlib
 
 #
 # DESCRIPTION:
@@ -50,25 +50,8 @@
 
 verify_runnable "global"
 
-function cleanup
-{
-	mntpnt=$TESTDIR0
-	datasetexists $TESTPOOL1 || log_must zpool import -d $mntpnt $TESTPOOL1
-	datasetexists $TESTPOOL1 && destroy_pool $TESTPOOL1
-	datasetexists $TESTPOOL2 && destroy_pool $TESTPOOL2
-	typeset -i i=0
-	while ((i < 5)); do
-		if [[ -e $mntpnt/vdev$i ]]; then
-			log_must rm -f $mntpnt/vdev$i
-		fi
-		((i += 1))
-	done
-	log_must rmdir $mntpnt
-}
-
-
 log_assert "Verify zpool export succeed or fail with spare."
-log_onexit cleanup
+log_onexit zpool_export_cleanup
 
 mntpnt=$TESTDIR0
 log_must mkdir -p $mntpnt

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index fdcce8b..6075e1f 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg

@@ -46,7 +46,6 @@
     "failmode"
     "listsnapshots"
     "autoexpand"
-    "dedupditto"
     "dedupratio"
     "free"
     "allocated"
@@ -58,6 +57,7 @@
     "leaked"
     "multihost"
     "autotrim"
+    "compatibility"
     "feature@async_destroy"
     "feature@empty_bpobj"
     "feature@lz4_compress"
@@ -72,15 +72,19 @@
     "feature@large_blocks"
     "feature@sha512"
     "feature@skein"
-    "feature@edonr"
     "feature@device_removal"
     "feature@obsolete_counts"
     "feature@zpool_checkpoint"
     "feature@spacemap_v2"
+    "feature@redaction_bookmarks"
+    "feature@redacted_datasets"
+    "feature@bookmark_written"
+    "feature@log_spacemap"
+    "feature@device_rebuild"
+    "feature@draid"
 )
 
-# Additional properties added for Linux.
-if is_linux; then
+if is_linux || is_freebsd; then
 	properties+=(
 	    "ashift"
 	    "feature@large_dnode"
@@ -90,5 +94,13 @@
 	    "feature@allocation_classes"
 	    "feature@resilver_defer"
 	    "feature@bookmark_v2"
+	    "feature@livelist"
+	    "feature@zstd_compress"
+	)
+fi
+
+if ! is_freebsd; then
+	properties+=(
+	    "feature@edonr"
 	)
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh
index a2b7318..b5cd8d5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_001_neg.ksh

@@ -51,8 +51,8 @@
 
 function cleanup
 {
-	datasetexists $clone && log_must zfs destroy $clone
-	datasetexists $snap && log_must zfs destroy $snap
+	datasetexists $clone && destroy_dataset $clone
+	datasetexists $snap && destroy_dataset $snap
 }
 
 log_assert "Verify 'zpool history' can deal with non-existent pools and " \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
index ad0f9c4..4230ec5 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am

@@ -9,8 +9,10 @@
 	import_cachefile_device_replaced.ksh \
 	import_cachefile_mirror_attached.ksh \
 	import_cachefile_mirror_detached.ksh \
+	import_cachefile_paths_changed.ksh \
 	import_cachefile_shared_device.ksh \
 	import_devices_missing.ksh \
+	import_log_missing.ksh \
 	import_paths_changed.ksh \
 	import_rewind_config_changed.ksh \
 	import_rewind_device_replaced.ksh \
@@ -29,6 +31,8 @@
 	zpool_import_013_neg.ksh \
 	zpool_import_014_pos.ksh \
 	zpool_import_015_pos.ksh \
+	zpool_import_016_pos.ksh \
+	zpool_import_017_pos.ksh \
 	zpool_import_all_001_pos.ksh \
 	zpool_import_features_001_pos.ksh \
 	zpool_import_features_002_neg.ksh \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/cleanup.ksh
index 029fa66..bee0e11 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/cleanup.ksh

@@ -34,11 +34,10 @@
 
 verify_runnable "global"
 
-log_must set_tunable32 zfs_scan_suspend_progress 0
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 
 for pool in "$TESTPOOL" "$TESTPOOL1"; do
-	datasetexists $pool/$TESTFS && \
-		log_must zfs destroy -Rf $pool/$TESTFS
+	datasetexists $pool/$TESTFS && destroy_dataset $pool/$TESTFS -Rf
 	destroy_pool "$pool"
 done
 
@@ -47,20 +46,4 @@
 		log_must rm -rf $dir
 done
 
-DISK=${DISKS%% *}
-if is_mpath_device $DISK; then
-	delete_partitions
-fi
-# recreate and destroy a zpool over the disks to restore the partitions to
-# normal
-case $DISK_COUNT in
-0|1)
-	log_note "No disk devices to restore"
-	;;
-*)
-	log_must cleanup_devices $ZFS_DISK1
-	log_must cleanup_devices $ZFS_DISK2
-	;;
-esac
-
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_added.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_added.ksh
index ab72042..3238faa 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_added.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_added.ksh

@@ -69,6 +69,8 @@
     "mirror $VDEV0 $VDEV1 mirror $VDEV2 $VDEV3"
 test_add_vdevs "$VDEV0" "raidz $VDEV1 $VDEV2 $VDEV3" \
     "$VDEV0 raidz $VDEV1 $VDEV2 $VDEV3"
+test_add_vdevs "$VDEV0" "draid $VDEV1 $VDEV2 $VDEV3" \
+    "$VDEV0 draid $VDEV1 $VDEV2 $VDEV3"
 test_add_vdevs "$VDEV0" "log $VDEV1" "$VDEV0 log $VDEV1"
 test_add_vdevs "$VDEV0 log $VDEV1" "$VDEV2" "$VDEV0 $VDEV2 log $VDEV1"
 test_add_vdevs "$VDEV0" "$VDEV1 log $VDEV2" "$VDEV0 $VDEV1 log $VDEV2"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_replaced.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_replaced.ksh
index e7edb1a..8a81c18 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_replaced.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_replaced.ksh

@@ -59,7 +59,7 @@
 	[[ -n ZFS_TXG_TIMEOUT ]] &&
 	    log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
 
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 	cleanup
 }
 
@@ -87,7 +87,7 @@
 	log_must zpool export $TESTPOOL1
 	log_must cp $CPATHBKP $CPATH
 	log_must zpool import -c $CPATH -o cachefile=$CPATH $TESTPOOL1
-	log_must set_tunable32 zfs_scan_suspend_progress 1
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 	log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
 
 	# Cachefile: pool in resilvering state
@@ -96,7 +96,7 @@
 	# Confirm pool is still replacing
 	log_must pool_is_replacing $TESTPOOL1
 	log_must zpool export $TESTPOOL1
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 
 	( $earlyremove ) && log_must rm $replacevdev
 
@@ -155,6 +155,12 @@
 	"$VDEV0 $VDEV1 $VDEV2" \
 	true 20
 
+test_replacing_vdevs "draid:1s $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4" \
+	"$VDEV1" "$VDEV5" \
+	"draid $VDEV0 $VDEV5 $VDEV2 $VDEV3 $VDEV4 spares draid1-0-0" \
+	"$VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4" \
+	true 30
+
 set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
 
 log_pass "zpool import -c cachefile_unaware_of_replace passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_paths_changed.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_paths_changed.ksh
new file mode 100755
index 0000000..0902bc4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_paths_changed.ksh

@@ -0,0 +1,117 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+#	A pool should be importable from a cachefile even if device paths
+#	have changed.
+#
+# STRATEGY:
+#	1. Create a pool using a cachefile
+#	2. Backup cachefile
+#	3. Export the pool.
+#	4. Change the paths of some of the devices.
+#	5. Verify that we can import the pool using the cachefile.
+#
+
+verify_runnable "global"
+
+log_onexit cleanup
+
+function test_new_paths
+{
+	typeset poolcreate="$1"
+	typeset pathstochange="$2"
+
+	log_note "$0: pool '$poolcreate', changing paths of $pathstochange."
+
+	log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $poolcreate
+
+	log_must cp $CPATH $CPATHBKP
+
+	log_must zpool export $TESTPOOL1
+
+	for dev in $pathstochange; do
+		log_must mv $dev "${dev}_new"
+	done
+
+	log_must zpool import -c $CPATHBKP $TESTPOOL1
+	log_must check_pool_healthy $TESTPOOL1
+
+	# Cleanup
+	log_must zpool destroy $TESTPOOL1
+	log_must rm -f $CPATH $CPATHBKP
+	for dev in $pathstochange; do
+		log_must mv "${dev}_new" $dev
+	done
+
+	log_note ""
+}
+
+function test_duplicate_pools
+{
+	typeset poolcreate="$1"
+	typeset pathstocopy="$2"
+
+	log_note "$0: pool '$poolcreate', creating duplicate pool using $pathstocopy."
+
+	log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $poolcreate
+	log_must zpool export $TESTPOOL1
+
+	for dev in $pathstocopy; do
+		log_must cp $dev "${dev}_orig"
+
+	done
+
+	log_must zpool create -f -o cachefile=$CPATH $TESTPOOL1 $poolcreate
+	log_must cp $CPATH $CPATHBKP
+	log_must zpool export $TESTPOOL1
+
+	for dev in $pathstocopy; do
+		log_must mv $dev "${dev}_new"
+	done
+
+	log_must zpool import -c $CPATHBKP
+	log_must zpool import -c $CPATHBKP $TESTPOOL1
+	log_must check_pool_healthy $TESTPOOL1
+
+	# Cleanup
+	log_must zpool destroy $TESTPOOL1
+	log_must rm -f $CPATH $CPATHBKP
+	for dev in $pathstocopy; do
+		log_must rm "${dev}_orig"
+		log_must mv "${dev}_new" $dev
+	done
+
+	log_note ""
+}
+
+test_new_paths "$VDEV0 $VDEV1" "$VDEV0 $VDEV1"
+test_new_paths "mirror $VDEV0 $VDEV1" "$VDEV0 $VDEV1"
+test_new_paths "$VDEV0 log $VDEV1" "$VDEV0 $VDEV1"
+test_new_paths "raidz $VDEV0 $VDEV1 $VDEV2" "$VDEV0 $VDEV1 $VDEV2"
+test_new_paths "draid $VDEV0 $VDEV1 $VDEV2" "$VDEV0 $VDEV1 $VDEV2"
+
+test_duplicate_pools "$VDEV0 $VDEV1" "$VDEV0 $VDEV1"
+test_duplicate_pools "mirror $VDEV0 $VDEV1" "$VDEV0 $VDEV1"
+test_duplicate_pools "$VDEV0 log $VDEV1" "$VDEV0 $VDEV1"
+test_duplicate_pools "raidz $VDEV0 $VDEV1 $VDEV2" "$VDEV0 $VDEV1 $VDEV2"
+test_duplicate_pools "draid $VDEV0 $VDEV1 $VDEV2" "$VDEV0 $VDEV1 $VDEV2"
+
+log_pass "zpool import with cachefile succeeded after changing device paths."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh
index 887993d..87942b4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_shared_device.ksh

@@ -108,6 +108,7 @@
 test_shared_device "mirror $VDEV0 $VDEV1 $VDEV2" "mirror $VDEV2 $VDEV3" \
     "$VDEV2"
 test_shared_device "raidz $VDEV0 $VDEV1 $VDEV2" "$VDEV2" "$VDEV2"
+test_shared_device "draid $VDEV0 $VDEV1 $VDEV2" "$VDEV2" "$VDEV2"
 test_shared_device "$VDEV0 log $VDEV1" "$VDEV2 log $VDEV1" "$VDEV1" "-m"
 
 log_pass "Pool doesn't write to a device it doesn't own anymore."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh
index 53828c9..af6ac8d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_devices_missing.ksh

@@ -68,7 +68,7 @@
 
 	log_must generate_data $TESTPOOL1 $MD5FILE2 "second"
 
-	log_must zpool export $TESTPOOL1
+	log_must_busy zpool export $TESTPOOL1
 
 	log_must mv $missingvdevs $BACKUP_DEVICE_DIR
 
@@ -85,7 +85,7 @@
 	    "get suspended."
 	verify_data_md5sums $MD5FILE >/dev/null 2>&1
 
-	log_must zpool export $TESTPOOL1
+	log_must_busy zpool export $TESTPOOL1
 
 	typeset newpaths=$(echo "$missingvdevs" | \
 		sed "s:$DEVICE_DIR:$BACKUP_DEVICE_DIR:g")

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
new file mode 100755
index 0000000..f12cac7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh

@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+#	Import with missing log device should not remove spare/cache.
+#
+# STRATEGY:
+#	1. Create a pool.
+#	2. Add spare, cache and log devices to the pool.
+#	3. Export the pool.
+#	4. Remove the log device.
+#	5. Import the pool with -m flag.
+#	6. Verify that spare and cache are still present in the pool.
+#
+
+verify_runnable "global"
+
+log_onexit cleanup
+
+function test_missing_log
+{
+	typeset poolcreate="$1"
+	typeset cachevdev="$2"
+	typeset sparevdev="$3"
+	typeset logvdev="$4"
+	typeset missingvdev="$4"
+
+	log_note "$0: pool '$poolcreate', adding $cachevdev, $sparevdev," \
+		"$logvdev then moving away $missingvdev."
+
+	log_must zpool create $TESTPOOL1 $poolcreate
+
+	log_must zpool add $TESTPOOL1 cache $cachevdev spare $sparevdev \
+		log $logvdev
+
+	log_must_busy zpool export $TESTPOOL1
+
+	log_must mv $missingvdev $BACKUP_DEVICE_DIR
+
+	log_must zpool import -m -d $DEVICE_DIR $TESTPOOL1
+
+	CACHE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $cachevdev)
+
+	SPARE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $sparevdev)
+
+	if [ -z "$CACHE_PRESENT"] || [ -z "SPARE_PRESENT"]
+	then
+		log_fail "cache/spare vdev missing after importing with missing" \
+			"log device"
+	fi
+
+	# Cleanup
+	log_must zpool destroy $TESTPOOL1
+
+	log_note ""
+}
+
+log_must mkdir -p $BACKUP_DEVICE_DIR
+
+test_missing_log "$VDEV0" "$VDEV1" "$VDEV2" "$VDEV3"
+
+log_pass "zpool import succeeded with missing log device"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_paths_changed.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_paths_changed.ksh
index 7ee306e..15f3a0a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_paths_changed.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_paths_changed.ksh

@@ -89,9 +89,11 @@
 test_new_paths "mirror $VDEV0 $VDEV1" "$VDEV0 $VDEV1"
 test_new_paths "$VDEV0 log $VDEV1" "$VDEV1"
 test_new_paths "raidz $VDEV0 $VDEV1 $VDEV2" "$VDEV1"
+test_new_paths "draid $VDEV0 $VDEV1 $VDEV2" "$VDEV1"
 
 test_swap_paths "$VDEV0 $VDEV1" "$VDEV0" "$VDEV1"
 test_swap_paths "raidz $VDEV0 $VDEV1 $VDEV2" "$VDEV0" "$VDEV1"
+test_swap_paths "draid $VDEV0 $VDEV1 $VDEV2" "$VDEV0" "$VDEV1"
 test_swap_paths "mirror $VDEV0 $VDEV1 mirror $VDEV2 $VDEV3" \
     "$VDEV0" "$VDEV2"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh
index f42ba10..d79c757 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_config_changed.ksh

@@ -48,7 +48,7 @@
 {
 	set_vdev_validate_skip 0
 	cleanup
-	log_must set_tunable64 zfs_vdev_min_ms_count 16
+	log_must set_tunable64 VDEV_MIN_MS_COUNT 16
 }
 
 log_onexit custom_cleanup
@@ -61,6 +61,7 @@
 	typeset detachvdev="${4:-}"
 	typeset removevdev="${5:-}"
 	typeset finalpool="${6:-}"
+	typeset retval=1
 
 	typeset poolcheck="$poolcreate"
 
@@ -120,19 +121,30 @@
 	# while having a checkpoint, we take it after the
 	# operation that changes the config.
 	#
+	# However, it is possible the MOS data was overwritten
+	# in which case the pool will either be unimportable, or
+	# may have been rewound prior to the data being written.
+	# In which case an error is returned and test_common()
+	# is retried by the caller to minimize false positives.
+	#
 	log_must zpool checkpoint $TESTPOOL1
 
 	log_must overwrite_data $TESTPOOL1 ""
 
 	log_must zpool export $TESTPOOL1
 
-	log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1
-	log_must check_pool_config $TESTPOOL1 "$poolcheck"
+	zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1
+	if (( $? == 0 )); then
+		verify_data_md5sums $MD5FILE
+		if (( $? == 0 )); then
+			retval=0
+		fi
 
-	log_must verify_data_md5sums $MD5FILE
+		log_must check_pool_config $TESTPOOL1 "$poolcheck"
+		log_must zpool destroy $TESTPOOL1
+	fi
 
 	# Cleanup
-	log_must zpool destroy $TESTPOOL1
 	if [[ -n $pathstochange ]]; then
 		for dev in $pathstochange; do
 			log_must mv "${dev}_new" $dev
@@ -143,6 +155,7 @@
 	log_must zpool destroy $TESTPOOL2
 
 	log_note ""
+	return $retval
 }
 
 function test_add_vdevs
@@ -152,7 +165,12 @@
 
 	log_note "$0: pool '$poolcreate', add $addvdevs."
 
-	test_common "$poolcreate" "$addvdevs"
+	for retry in $(seq 1 5); do
+		test_common "$poolcreate" "$addvdevs" && return
+		log_note "Retry $retry / 5 for test_add_vdevs()"
+	done
+
+	log_fail "Exhausted all 5 retries for test_add_vdevs()"
 }
 
 function test_attach_vdev
@@ -163,7 +181,12 @@
 
 	log_note "$0: pool '$poolcreate', attach $attachvdev to $attachto."
 
-	test_common "$poolcreate" "" "$attachto $attachvdev"
+	for retry in $(seq 1 5); do
+		test_common "$poolcreate" "" "$attachto $attachvdev" && return
+		log_note "Retry $retry / 5 for test_attach_vdev()"
+	done
+
+	log_fail "Exhausted all 5 retries for test_attach_vdev()"
 }
 
 function test_detach_vdev
@@ -173,7 +196,12 @@
 
 	log_note "$0: pool '$poolcreate', detach $detachvdev."
 
-	test_common "$poolcreate" "" "" "$detachvdev"
+	for retry in $(seq 1 5); do
+		test_common "$poolcreate" "" "" "$detachvdev" && return
+		log_note "Retry $retry / 5 for test_detach_vdev()"
+	done
+
+	log_fail "Exhausted all 5 retries for test_detach_vdev()"
 }
 
 function test_attach_detach_vdev
@@ -186,7 +214,13 @@
 	log_note "$0: pool '$poolcreate', attach $attachvdev to $attachto," \
 	    "then detach $detachvdev."
 
-	test_common "$poolcreate" "" "$attachto $attachvdev" "$detachvdev"
+	for retry in $(seq 1 5); do
+		test_common "$poolcreate" "" "$attachto $attachvdev" \
+		    "$detachvdev" && return
+		log_note "Retry $retry / 5 for test_attach_detach_vdev()"
+	done
+
+	log_fail "Exhausted all 5 retries for test_attach_detach_vdev()"
 }
 
 function test_remove_vdev
@@ -197,18 +231,24 @@
 
 	log_note "$0: pool '$poolcreate', remove $removevdev."
 
-	test_common "$poolcreate" "" "" "" "$removevdev" "$finalpool"
+	for retry in $(seq 1 5); do
+		test_common "$poolcreate" "" "" "" "$removevdev" \
+		    "$finalpool" && return
+		log_note "Retry $retry / 5 for test_remove_vdev()"
+	done
+
+	log_fail "Exhausted all 5 retries for test_remove_vdev()"
 }
 
 # Record txg history
-is_linux && log_must set_tunable32 zfs_txg_history 100
+is_linux && log_must set_tunable32 TXG_HISTORY 100
 
 # Make the devices bigger to reduce chances of overwriting MOS metadata.
 increase_device_sizes $(( FILE_SIZE * 4 ))
 
 # Increase the number of metaslabs for small pools temporarily to
 # reduce the chance of reusing a metaslab that holds old MOS metadata.
-log_must set_tunable64 zfs_vdev_min_ms_count 150
+log_must set_tunable64 VDEV_MIN_MS_COUNT 150
 
 # Part of the rewind test is to see how it reacts to path changes
 typeset pathstochange="$VDEV0 $VDEV1 $VDEV2 $VDEV3"
@@ -220,6 +260,7 @@
 test_add_vdevs "$VDEV0" "$VDEV1 $VDEV2"
 test_add_vdevs "mirror $VDEV0 $VDEV1" "mirror $VDEV2 $VDEV3"
 test_add_vdevs "$VDEV0" "raidz $VDEV1 $VDEV2 $VDEV3"
+test_add_vdevs "$VDEV0" "draid $VDEV1 $VDEV2 $VDEV3"
 test_add_vdevs "$VDEV0" "log $VDEV1"
 test_add_vdevs "$VDEV0 log $VDEV1" "$VDEV2"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh
index bc2c611..b03b39d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_rewind_device_replaced.ksh

@@ -60,10 +60,10 @@
 function custom_cleanup
 {
 	# Revert zfs_txg_timeout to defaults
-	[[ -n ZFS_TXG_TIMEOUT ]] &&
+	[[ -n $ZFS_TXG_TIMEOUT ]] &&
 	    log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
 	log_must rm -rf $BACKUP_DEVICE_DIR
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 	cleanup
 }
 
@@ -102,13 +102,13 @@
 	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
 
 	# Ensure resilvering doesn't complete.
-	log_must set_tunable32 zfs_scan_suspend_progress 1
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 	log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
 
 	# Confirm pool is still replacing
 	log_must pool_is_replacing $TESTPOOL1
 	log_must zpool export $TESTPOOL1
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 
 	############################################################
 	# Test 1: rewind while device is resilvering.
@@ -151,7 +151,7 @@
 }
 
 # Record txg history
-is_linux && log_must set_tunable32 zfs_txg_history 100
+is_linux && log_must set_tunable32 TXG_HISTORY 100
 
 log_must mkdir -p $BACKUP_DEVICE_DIR
 # Make the devices bigger to reduce chances of overwriting MOS metadata.
@@ -176,6 +176,11 @@
 	"raidz $VDEV0 $VDEV3 $VDEV2" \
 	"$VDEV0 $VDEV1 $VDEV2" 10
 
+test_replace_vdev "draid $VDEV0 $VDEV1 $VDEV2 $VDEV3" \
+	"$VDEV1" "$VDEV4" \
+	"draid $VDEV0 $VDEV4 $VDEV2 $VDEV3 spares draid1-0-0" \
+	"$VDEV0 $VDEV1 $VDEV2 $VDEV3" 10
+
 set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
 
 log_pass "zpool import rewind after device replacement passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/setup.ksh
index 9f0ccfb..22e619d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/setup.ksh

@@ -33,32 +33,8 @@
 . $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
 
 verify_runnable "global"
-verify_disk_count "$DISKS" 2
 
-if ! $(is_physical_device $ZFS_DISK1) ; then
-	log_unsupported "Only partitionable physical disks can be used"
-fi
-
-DISK=${DISKS%% *}
-
-for dev in $ZFS_DISK1 $ZFS_DISK2 ; do
-	log_must cleanup_devices $dev
-done
-
-typeset -i i=0
-while (( i <= $GROUP_NUM )); do
-	if ! is_linux; then
-		if (( i == 2 )); then
-			(( i = i + 1 ))
-			continue
-		fi
-	fi
-	log_must set_partition $i "$cyl" $SLICE_SIZE $ZFS_DISK1
-	cyl=$(get_endslice $ZFS_DISK1 $i)
-	(( i = i + 1 ))
-done
-
-create_pool "$TESTPOOL" "$ZFSSIDE_DISK1"
+create_pool "$TESTPOOL" "$DISK"
 
 if [[ -d $TESTDIR ]]; then
 	rm -rf $TESTDIR  || log_unresolved Could not remove $TESTDIR
@@ -73,7 +49,7 @@
 
 i=0
 while (( i < $MAX_NUM )); do
-	log_must mkfile $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
+	log_must truncate -s $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
 	(( i = i + 1 ))
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg
index 79423ab..25f541e 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg

@@ -30,87 +30,11 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-export DISKSARRAY=$DISKS
-export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
-typeset -a disk_array=($(find_disks $DISKS))
-case "${#disk_array[*]}" in
-0)
-	#
-	# on stf_configure, disk_freelist returns empty.
-	#
-	DISK_COUNT=0
-	;;
-1)
-	# We need to repartition the single disk to two slices.
-	if is_linux; then
-	        set_device_dir
-	        set_slice_prefix
-		PRIMARY_SLICE=1
-		DISK_COUNT=1
-		ZFS_DISK1=${disk_array[0]}
-		ZFS_DISK2=${disk_array[0]}
-		if is_mpath_device $ZFS_DISK1; then
-			export DEV_DSKDIR=$DEV_MPATHDIR
-		else
-			export DEV_DSKDIR=$DEV_RDSKDIR
-		fi
-		if ( is_mpath_device $ZFS_DISK1 ) && [[ -z $(echo $ZFS_DISK1 | awk 'substr($1,18,1)\
-                    ~ /^[[:digit:]]+$/') ]] || ( is_real_device $ZFS_DISK1 ); then
-			ZFSSIDE_DISK1=${ZFS_DISK1}1
-		elif ( is_mpath_device $ZFS_DISK1 || is_loop_device $ZFS_DISK1 ); then
-			ZFSSIDE_DISK1=${ZFS_DISK1}p1
-		else
-			log_fail "$ZFS_DISK1 not supported for partitioning."
-		fi
-	else
-		export DEV_DSKDIR="/dev"
-		PRIMARY_SLICE=2
-		DISK_COUNT=1
-		ZFS_DISK1=${disk_array[0]}
-		ZFSSIDE_DISK1=${ZFS_DISK1}s0
-		ZFS_DISK2=${disk_array[0]}
-	fi
-	;;
-*)
-	# We need to repartition the single disk to two slices.
-	if is_linux; then
-	        set_device_dir
-	        set_slice_prefix
-		PRIMARY_SLICE=1
-		DISK_COUNT=2
-		ZFS_DISK1=${disk_array[0]}
-		if is_mpath_device $ZFS_DISK1; then
-			export DEV_DSKDIR=$DEV_MPATHDIR
-		else
-			export DEV_DSKDIR=$DEV_RDSKDIR
-		fi
-		if ( is_mpath_device $ZFS_DISK1 ) && [[ -z $(echo $ZFS_DISK1 | awk 'substr($1,18,1)\
-		    ~ /^[[:digit:]]+$/') ]] || ( is_real_device $ZFS_DISK1 ); then
-			ZFSSIDE_DISK1=${ZFS_DISK1}1
-		elif ( is_mpath_device $ZFS_DISK1 || is_loop_device $ZFS_DISK1 ); then
-			ZFSSIDE_DISK1=${ZFS_DISK1}p1
-		else
-			log_fail "$ZFS_DISK1 not supported for partitioning."
-		fi
-		ZFS_DISK2=${disk_array[1]}
-	else
-		export DEV_DSKDIR="/dev"
-		PRIMARY_SLICE=2
-		DISK_COUNT=2
-		ZFS_DISK1=${disk_array[0]}
-		ZFSSIDE_DISK1=${ZFS_DISK1}s0
-		ZFS_DISK2=${disk_array[1]}
-	fi
-	;;
-esac
-
-export DISK_COUNT ZFS_DISK1 ZFSSIDE_DISK1 ZFS_DISK2
-
+export DISK=${DISKS%% *}
 export FS_SIZE="$((($MINVDEVSIZE / (1024 * 1024)) * 32))m"
 export FILE_SIZE="$((MINVDEVSIZE))"
 export SLICE_SIZE="$((($MINVDEVSIZE / (1024 * 1024)) * 2))m"
-export MAX_NUM=5
-export GROUP_NUM=3
+export MAX_NUM=6
 export DEVICE_DIR=$TEST_BASE_DIR/dev_import-test
 export BACKUP_DEVICE_DIR=$TEST_BASE_DIR/bakdev_import-test
 export DEVICE_FILE=disk
@@ -123,6 +47,7 @@
 export MD5FILE=$TEST_BASE_DIR/md5sums.$$
 export MD5FILE2=$TEST_BASE_DIR/md5sums.$$.2
 
+export GROUP_NUM=3
 typeset -i num=0
 while (( num < $GROUP_NUM )); do
 	DEVICE_FILES="$DEVICE_FILES ${DEVICE_DIR}/${DEVICE_FILE}$num"
@@ -135,5 +60,6 @@
 export VDEV2=$DEVICE_DIR/${DEVICE_FILE}2
 export VDEV3=$DEVICE_DIR/${DEVICE_FILE}3
 export VDEV4=$DEVICE_DIR/${DEVICE_FILE}4
+export VDEV5=$DEVICE_DIR/${DEVICE_FILE}5
 
 export ALTER_ROOT=/alter_import-test

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib
index c365ec4..d79e33f 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib

@@ -31,10 +31,10 @@
 	log_must rm -rf $DEVICE_DIR/*
 	typeset i=0
 	while (( i < $MAX_NUM )); do
-		log_must mkfile $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
+		log_must truncate -s $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
 		((i += 1))
 	done
-	is_linux && set_tunable32 "zfs_txg_history" 0
+	is_linux && set_tunable32 TXG_HISTORY 0
 }
 
 #
@@ -163,7 +163,7 @@
 
 	typeset -i i=0
 	while (( i < $MAX_NUM )); do
-		log_must mkfile $newfilesize ${DEVICE_DIR}/${DEVICE_FILE}$i
+		log_must truncate -s $newfilesize ${DEVICE_DIR}/${DEVICE_FILE}$i
 		((i += 1))
 	done
 }
@@ -171,15 +171,18 @@
 #
 # Translate vdev names returned by zpool status into more generic names.
 #
-# eg: mirror-2 --> mirror
-#
 function _translate_vdev
 {
 	typeset vdev=$1
 
-	typeset keywords="mirror replacing raidz1 raidz2 raidz3 indirect"
+	#
+	# eg: mirror-2 --> mirror
+	# eg: draid2:4d:12c:1s-0 --> draid2
+	#
+	typeset keywords="mirror replacing raidz1 raidz2 raidz3 indirect draid1 draid2 draid3"
 	for word in $keywords; do
-		echo $vdev | egrep "^${word}-[0-9]+\$" > /dev/null
+		echo $vdev | grep -qE \
+		    "^${word}-[0-9]+\$|^${word}:[0-9]+d:[0-9]c:[0-9]+s-[0-9]+\$"
 		if [[ $? -eq 0 ]]; then
 			vdev=$word
 			break
@@ -188,6 +191,7 @@
 
 	[[ $vdev == "logs" ]] && echo "log" && return 0
 	[[ $vdev == "raidz1" ]] && echo "raidz" && return 0
+	[[ $vdev == "draid1" ]] && echo "draid" && return 0
 
 	echo $vdev
 	return 0
@@ -317,71 +321,40 @@
 
 function set_vdev_validate_skip
 {
-	set_tunable32 "vdev_validate_skip" "$1"
+	set_tunable32 VDEV_VALIDATE_SKIP "$1"
 }
 
 function get_zfs_txg_timeout
 {
-	get_tunable "zfs_txg_timeout"
+	get_tunable TXG_TIMEOUT
 }
 
 function set_zfs_txg_timeout
 {
-	set_tunable32 "zfs_txg_timeout" "$1"
+	set_tunable32 TXG_TIMEOUT "$1"
 }
 
 function set_spa_load_verify_metadata
 {
-	set_tunable32 "spa_load_verify_metadata" "$1"
+	set_tunable32 SPA_LOAD_VERIFY_METADATA "$1"
 }
 
 function set_spa_load_verify_data
 {
-	set_tunable32 "spa_load_verify_data" "$1"
+	set_tunable32 SPA_LOAD_VERIFY_DATA "$1"
 }
 
 function set_zfs_max_missing_tvds
 {
-	set_tunable32 "zfs_max_missing_tvds" "$1"
+	set_tunable32 MAX_MISSING_TVDS "$1"
 }
 
 #
-# Use mdb to find the last txg that was synced in an active pool.
+# Use zdb to find the last txg that was synced in an active pool.
 #
 function get_last_txg_synced
 {
 	typeset pool=$1
 
-	if is_linux; then
-		txg=$(tail "/proc/spl/kstat/zfs/$pool/txgs" |
-		    awk '$3=="C" {print $1}' | tail -1)
-		[[ "$txg" ]] || txg=0
-		echo $txg
-		return 0
-	fi
-
-	typeset spas
-	spas=$(mdb -k -e "::spa")
-	[[ $? -ne 0 ]] && return 1
-
-	typeset spa=""
-	print "$spas\n" | while read line; do
-		typeset poolname=$(echo "$line" | awk '{print $3}')
-		typeset addr=$(echo "$line" | awk '{print $1}')
-		if [[ $poolname == $pool ]]; then
-			spa=$addr
-			break
-		fi
-	done
-	if [[ -z $spa ]]; then
-		log_fail "Couldn't find pool '$pool'"
-		return 1
-	fi
-	typeset mdbcmd="$spa::print spa_t spa_ubsync.ub_txg | ::eval '.=E'"
-	typeset -i txg
-	txg=$(mdb -k -e "$mdbcmd")
-	[[ $? -ne 0 ]] && return 1
-
-	echo $txg
-	return 0
+	zdb -u $pool | awk '$1 == "txg" { print $3 }' | sort -n | tail -n 1
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_007_pos.ksh
index 6e93fd4..928efeb 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_007_pos.ksh

@@ -63,7 +63,7 @@
 	"other pool, it still can be imported correctly."
 log_onexit cleanup
 
-log_must zpool create $TESTPOOL1 raidz $VDEV0 $VDEV1 $VDEV2 $VDIV3
+log_must zpool create $TESTPOOL1 raidz $VDEV0 $VDEV1 $VDEV2 $VDEV3
 typeset guid=$(get_config $TESTPOOL1 pool_guid)
 typeset target=$TESTPOOL1
 if (( RANDOM % 2 == 0 )) ; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_008_pos.ksh
index 096bbe8..f8da584 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_008_pos.ksh

@@ -63,7 +63,7 @@
 	"other pool, it still can be imported correctly."
 log_onexit cleanup
 
-log_must zpool create $TESTPOOL1 raidz2 $VDEV0 $VDEV1 $VDEV2 $VDIV3
+log_must zpool create $TESTPOOL1 raidz2 $VDEV0 $VDEV1 $VDEV2 $VDEV3
 typeset guid=$(get_config $TESTPOOL1 pool_guid)
 typeset target=$TESTPOOL1
 if (( RANDOM % 2 == 0 )) ; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh
index b337bd0..212024d 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_010_pos.ksh

@@ -39,7 +39,7 @@
 # STRATEGY:
 #	1. Create a 5 ways mirror pool A with dev0/1/2/3/4, then destroy it.
 #	2. Create a stripe pool B with dev1. Then destroy it.
-#	3. Create a raidz2 pool C with dev2/3/4. Then destroy it.
+#	3. Create a draid2 pool C with dev2/3/4/5. Then destroy it.
 #	4. Create a raidz pool D with dev3/4. Then destroy it.
 #	5. Create a stripe pool E with dev4. Then destroy it.
 #	6. Verify 'zpool import -D -a' recover all the pools.
@@ -74,7 +74,7 @@
 log_must zpool create $poolB $VDEV1
 log_must zpool destroy $poolB
 
-log_must zpool create $poolC raidz2 $VDEV2 $VDEV3 $VDEV4
+log_must zpool create $poolC draid2 $VDEV2 $VDEV3 $VDEV4 $VDEV5
 log_must zpool destroy $poolC
 
 log_must zpool create $poolD raidz $VDEV3 $VDEV4

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_012_pos.ksh
index 53d6fd3..ec387b2 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_012_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_012_pos.ksh

@@ -84,9 +84,9 @@
 
 	destroy_pool $TESTPOOL1
 
-	if datasetexists $TESTPOOL/$TESTFS; then
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS
-	fi
+	datasetexists $TESTPOOL/$TESTFS && \
+		destroy_dataset $TESTPOOL/$TESTFS -Rf
+
 	log_must zfs create $TESTPOOL/$TESTFS
 	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 
@@ -138,7 +138,7 @@
 				if ((nfs_share_bit == 1)); then
 					log_note "Set sharenfs=on $pool"
 					log_must zfs set sharenfs=on $pool
-					log_must is_shared $pool
+					! is_freebsd && log_must is_shared $pool
 					f_share="true"
 					nfs_flag="sharenfs=on"
 				fi
@@ -147,9 +147,9 @@
 				while ((guid_bit <= 1)); do
 					typeset guid_flag="pool name"
 					if [[ -z $option ]]; then
-						log_must zpool export $pool
+						log_must_busy zpool export $pool
 					else
-						log_must zpool destroy $pool
+						log_must_busy zpool destroy $pool
 					fi
 
 					typeset target=$pool
@@ -181,19 +181,21 @@
 					for fs in $mount_fs; do
 						log_must ismounted $pool/$fs
 						[[ -n $f_share ]] && \
+						    ! is_freebsd && \
 						    log_must is_shared $pool/$fs
 					done
 
 					for fs in $nomount_fs; do
 						log_mustnot ismounted $pool/$fs
-						log_mustnot is_shared $pool/$fs
+						! is_freebsd && \
+						    log_mustnot is_shared $pool/$fs
 					done
 					((guid_bit = guid_bit + 1))
 				done
 				# reset nfsshare=off
 				if [[ -n $f_share ]]; then
 					log_must zfs set sharenfs=off $pool
-					log_mustnot is_shared $pool
+					! is_freebsd && log_mustnot is_shared $pool
 				fi
 				((nfs_share_bit = nfs_share_bit + 1))
 			done

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_013_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_013_neg.ksh
index 0a221b8..7fef625 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_013_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_013_neg.ksh

@@ -60,9 +60,8 @@
 
 function cleanup
 {
-	poolexists $POOL_NAME && log_must zpool destroy $POOL_NAME
-	[[ -e /$TESTPOOL/$POOL_FILE ]] && rm /$TESTPOOL/$POOL_FILE
-	return 0
+	poolexists $POOL_NAME && destroy_pool $POOL_NAME
+	rm -f /$TESTPOOL/$POOL_FILE
 }
 
 log_assert "'zpool import' fails for pool that was not cleanly exported"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_016_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_016_pos.ksh
new file mode 100755
index 0000000..5434625
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_016_pos.ksh

@@ -0,0 +1,91 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+
+#
+# DESCRIPTION:
+#	For draid, one destroyed pools devices was removed or used by other
+#	pool, it still can be imported correctly.
+#
+# STRATEGY:
+#	1. Create a draid pool A with N disks.
+#	2. Destroy this pool A.
+#	3. Create another pool B with 1 disk which was used by pool A.
+#	4. Verify import this draid pool can succeed.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL2
+	destroy_pool $TESTPOOL1
+
+	log_must rm -rf $DEVICE_DIR/*
+	typeset i=0
+	while (( i < $MAX_NUM )); do
+		log_must mkfile $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
+		((i += 1))
+	done
+}
+
+log_assert "For draid, one destroyed pools devices was removed or used by " \
+	"other pool, it still can be imported correctly."
+log_onexit cleanup
+
+log_must zpool create $TESTPOOL1 draid $VDEV0 $VDEV1 $VDEV2 $VDEV3
+typeset guid=$(get_config $TESTPOOL1 pool_guid)
+typeset target=$TESTPOOL1
+if (( RANDOM % 2 == 0 )) ; then
+	target=$guid
+	log_note "Import by guid."
+fi
+log_must zpool destroy $TESTPOOL1
+
+log_must zpool create $TESTPOOL2 $VDEV0
+log_must zpool import -d $DEVICE_DIR -D -f $target
+log_must zpool destroy $TESTPOOL1
+
+log_must zpool destroy $TESTPOOL2
+log_must rm -rf $VDEV0
+log_must zpool import -d $DEVICE_DIR -D -f $target
+log_must zpool destroy $TESTPOOL1
+
+log_note "For draid, two destroyed pool's devices were used, import failed."
+log_must mkfile $FILE_SIZE $VDEV0
+log_must zpool create $TESTPOOL2 $VDEV0 $VDEV1
+log_mustnot zpool import -d $DEVICE_DIR -D -f $target
+log_must zpool destroy $TESTPOOL2
+
+log_pass "zpool import -D draid passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_017_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_017_pos.ksh
new file mode 100755
index 0000000..2e6cef2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_017_pos.ksh

@@ -0,0 +1,92 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+
+#
+# DESCRIPTION:
+#	For draid2, two destroyed pool's devices were removed or used by other
+#	pool, it still can be imported correctly.
+#
+# STRATEGY:
+#	1. Create a draid2 pool A with N disks.
+#	2. Destroy this pool A.
+#	3. Create another pool B with two disks which were used by pool A.
+#	4. Verify import this draid2 pool can succeed.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL2
+	destroy_pool $TESTPOOL1
+
+	log_must rm -rf $DEVICE_DIR/*
+	typeset i=0
+	while (( i < $MAX_NUM )); do
+		log_must mkfile $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
+		((i += 1))
+	done
+}
+
+log_assert "For draid2, two destroyed pools devices was removed or used by " \
+	"other pool, it still can be imported correctly."
+log_onexit cleanup
+
+log_must zpool create $TESTPOOL1 draid2 $VDEV0 $VDEV1 $VDEV2 $VDEV3
+typeset guid=$(get_config $TESTPOOL1 pool_guid)
+typeset target=$TESTPOOL1
+if (( RANDOM % 2 == 0 )) ; then
+	target=$guid
+	log_note "Import by guid."
+fi
+log_must zpool destroy $TESTPOOL1
+
+log_must zpool create $TESTPOOL2 $VDEV0 $VDEV1
+log_must zpool import -d $DEVICE_DIR -D -f $target
+log_must zpool destroy $TESTPOOL1
+
+log_must zpool destroy $TESTPOOL2
+log_must rm -rf $VDEV0 $VDEV1
+log_must zpool import -d $DEVICE_DIR -D -f $target
+log_must zpool destroy $TESTPOOL1
+
+log_note "For draid2, more than two destroyed pool's devices were used, " \
+	"import failed."
+log_must mkfile $FILE_SIZE $VDEV0 $VDEV1
+log_must zpool create $TESTPOOL2 $VDEV0 $VDEV1 $VDEV2
+log_mustnot zpool import -d $DEVICE_DIR -D -f $target
+log_must zpool destroy $TESTPOOL2
+
+log_pass "zpool import -D draid2 passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh
index 86baf1f..40b6ca1 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh

@@ -72,7 +72,7 @@
 
 old_mntpnt=$(get_prop mountpoint $POOL_NAME/testfs)
 log_must eval "ls $old_mntpnt | grep -q testfile"
-block_device_wait
+block_device_wait /dev/zvol/$POOL_NAME/testvol
 log_mustnot dd if=/dev/zero of=/dev/zvol/$POOL_NAME/testvol bs=512 count=1
 log_must dd if=/dev/zvol/$POOL_NAME/testvol of=/dev/null bs=512 count=1
 
@@ -90,7 +90,7 @@
 	zfs recv $POOL_NAME/encroot/testfs"
 log_must eval "zfs send $POOL_NAME/testvol@snap1 | \
 	zfs recv $POOL_NAME/encroot/testvol"
-block_device_wait
+block_device_wait /dev/zvol/$POOL_NAME/encroot/testvol
 log_must dd if=/dev/zero of=/dev/zvol/$POOL_NAME/encroot/testvol bs=512 count=1
 new_mntpnt=$(get_prop mountpoint $POOL_NAME/encroot/testfs)
 log_must eval "ls $new_mntpnt | grep -q testfile"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh
index d06a9cd..a0f063a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh

@@ -51,7 +51,7 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_disable_ivset_guid_check 0
+	log_must set_tunable32 DISABLE_IVSET_GUID_CHECK 0
 	poolexists $POOL_NAME && log_must zpool destroy $POOL_NAME
 	[[ -e /$TESTPOOL/$POOL_FILE ]] && rm /$TESTPOOL/$POOL_FILE
 	return 0
@@ -91,7 +91,7 @@
 
 # 2. Prepare pool to fix existing datasets
 log_must zpool set feature@bookmark_v2=enabled $POOL_NAME
-log_must set_tunable32 zfs_disable_ivset_guid_check 1
+log_must set_tunable32 DISABLE_IVSET_GUID_CHECK 1
 log_must zfs create $POOL_NAME/fixed
 
 # 3. Use raw sends to fix datasets

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh
index 78e9bbf..3b5167f 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh

@@ -57,8 +57,8 @@
 #	   Using the various combinations.
 #		- Regular import
 #		- Alternate Root Specified
-#	   It should be succeed with single d/m device upon 'raidz' & 'mirror',
-#	   but failed against 'regular' or more d/m devices.
+#	   It should succeed with single d/m device upon 'raidz', 'mirror',
+#	   'draid' but failed against 'regular' or more d/m devices.
 #	6. If import succeed, verify following is true:
 #		- The pool shows up under 'zpool list'.
 #		- The pool's health should be DEGRADED.
@@ -67,7 +67,16 @@
 
 verify_runnable "global"
 
-set -A vdevs "" "mirror" "raidz"
+# Randomly test a subset of combinations to speed up the test.
+(( rc=RANDOM % 3 ))
+if [[ $rc == 0 ]] ; then
+	set -A vdevs "" "mirror" "raidz"
+elif [[ $rc == 1 ]] ; then
+	set -A vdevs "" "mirror" "draid"
+else
+	set -A vdevs "" "raidz" "draid"
+fi
+
 set -A options "" "-R $ALTER_ROOT"
 
 function cleanup
@@ -89,7 +98,8 @@
 	log_must rm -rf $DEVICE_DIR/*
 	typeset i=0
 	while (( i < $MAX_NUM )); do
-		log_must mkfile $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
+		log_must rm -f ${DEVICE_DIR}/${DEVICE_FILE}$i
+		log_must truncate -s $FILE_SIZE ${DEVICE_DIR}/${DEVICE_FILE}$i
 		((i += 1))
 	done
 }
@@ -157,6 +167,9 @@
 				'raidz')  (( count > 1 )) && \
 						action=log_mustnot
 					;;
+				'draid')  (( count > 1 )) && \
+						action=log_mustnot
+					;;
 				'')  action=log_mustnot
 					;;
 			esac

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh
index c6d2637..60af3f3 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh

@@ -43,6 +43,8 @@
 #		  before data integrity is compromised
 #		- Raidz could withstand one devices failing
 #		  before data integrity is compromised
+#		- dRAID could withstand one devices failing
+#		  before data integrity is compromised
 #	Verify that is true.
 #
 # STRATEGY:
@@ -50,6 +52,7 @@
 #		- Regular pool
 #		- Mirror
 #		- Raidz
+#		- dRAID
 #	2. Create necessary filesystem and test files.
 #	3. Export the test pool.
 #	4. Move one or more device files to other directory
@@ -62,7 +65,16 @@
 
 verify_runnable "global"
 
-set -A vdevs "" "mirror" "raidz"
+# Randomly test a subset of combinations to speed up the test.
+(( rc=RANDOM % 3 ))
+if [[ $rc == 0 ]] ; then
+	set -A vdevs "" "mirror" "raidz"
+elif [[ $rc == 1 ]] ; then
+	set -A vdevs "" "mirror" "draid"
+else
+	set -A vdevs "" "raidz" "draid"
+fi
+
 set -A options "" "-R $ALTER_ROOT"
 
 function cleanup
@@ -88,7 +100,8 @@
 	while (( i < $MAX_NUM )); do
 		typeset dev_file=${DEVICE_DIR}/${DEVICE_FILE}$i
 		if [[ ! -e ${dev_file} ]]; then
-			log_must mkfile $FILE_SIZE ${dev_file}
+			log_must rm -f ${dev_file}
+			log_must truncate -s $FILE_SIZE ${dev_file}
 		fi
 		((i += 1))
 	done
@@ -158,7 +171,8 @@
 			# Backup all device files while filesystem prepared.
 			#
 			if [[ -z $backup ]] ; then
-				log_must tar cf $DEVICE_DIR/$DEVICE_ARCHIVE ${DEVICE_FILE}*
+				log_must tar cf $DEVICE_DIR/$DEVICE_ARCHIVE \
+				    ${DEVICE_FILE}0 ${DEVICE_FILE}1 ${DEVICE_FILE}2
 				backup="true"
 			fi
 
@@ -174,6 +188,9 @@
 				'raidz')  (( count == 1 )) && \
 					action=log_must
 					;;
+				'draid')  (( count == 1 )) && \
+					action=log_must
+					;;
 			esac
 
 			typeset target=$TESTPOOL1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh
index 815d409..9d4629a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh

@@ -59,12 +59,12 @@
 
 verify_runnable "global"
 
-# See issue: https://github.com/zfsonlinux/zfs/issues/6839
-if is_linux; then
+# See issue: https://github.com/openzfs/zfs/issues/6839
+if ! is_illumos; then
 	log_unsupported "Test case may be slow"
 fi
 
-set -A vdevs "" "mirror" "raidz"
+set -A vdevs "" "mirror" "raidz" "draid"
 
 function verify
 {
@@ -207,6 +207,9 @@
 				'raidz')  (( overlap > 1 )) && \
 					action=log_mustnot
 					;;
+				'draid')  (( overlap > 1 )) && \
+					action=log_mustnot
+					;;
 				'')  action=log_mustnot
 					;;
 			esac

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
index 2ebc376..483c1c2 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am

@@ -2,6 +2,7 @@
 dist_pkgdata_SCRIPTS = \
 	cleanup.ksh \
 	zpool_initialize_attach_detach_add_remove.ksh \
+	zpool_initialize_fault_export_import_online.ksh \
 	zpool_initialize_import_export.ksh \
 	zpool_initialize_offline_export_import_online.ksh \
 	zpool_initialize_online_offline.ksh \
@@ -9,6 +10,7 @@
 	zpool_initialize_start_and_cancel_neg.ksh \
 	zpool_initialize_start_and_cancel_pos.ksh \
 	zpool_initialize_suspend_resume.ksh \
+	zpool_initialize_uninit.ksh \
 	zpool_initialize_unsupported_vdevs.ksh \
 	zpool_initialize_verify_checksums.ksh \
 	zpool_initialize_verify_initialized.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh
new file mode 100755
index 0000000..11b8a48
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh

@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Miscellaneous complex sequences of operations function as expected.
+#
+# STRATEGY:
+# 1. Create a pool with a two-way mirror.
+# 2. Start initializing, fault, export, import, online and verify along
+#    the way that the initializing was cancelled and not restarted.
+#
+
+DISK1="$(echo $DISKS | cut -d' ' -f1)"
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+
+log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2
+
+log_must zpool initialize $TESTPOOL $DISK1
+progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$progress" ]] && log_fail "Initializing did not start"
+
+log_must zpool offline -f $TESTPOOL $DISK1
+log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED"
+log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized"
+
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+
+log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED"
+log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized"
+
+log_must zpool online $TESTPOOL $DISK1
+log_must zpool clear $TESTPOOL $DISK1
+log_must check_vdev_state $TESTPOOL $DISK1 "ONLINE"
+log_must eval "zpool status -i $TESTPOOL | grep $DISK1 | grep uninitialized"
+
+log_pass "Initializing behaves as expected at each step of:" \
+    "initialize + fault + export + import + online"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh
new file mode 100755
index 0000000..17f776c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh

@@ -0,0 +1,141 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (C) 2023 Lawrence Livermore National Security, LLC.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Starting, stopping, uninitializing, and restart an initialize works.
+#
+# STRATEGY:
+# 1. Create a one-disk pool.
+# 2. Verify uninitialize succeeds for uninitialized pool.
+# 3. Verify pool wide cancel|suspend + uninit
+#   a. Start initializing and verify that initializing is active.
+#   b. Verify uninitialize fails when actively initializing.
+#   c. Cancel or suspend initializing and verify that initializing is not active.
+#   d. Verify uninitialize succeeds after being cancelled.
+# 4. Verify per-disk cancel|suspend + uninit
+#
+
+DISK1="$(echo $DISKS | cut -d' ' -f1)"
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+DISK3="$(echo $DISKS | cut -d' ' -f3)"
+
+function status_check # pool disk1-state disk2-state disk3-state
+{
+        typeset pool="$1"
+        typeset disk1_state="$2"
+        typeset disk2_state="$3"
+        typeset disk3_state="$4"
+
+	state=$(zpool status -i "$pool" | grep "$DISK1" | grep "$disk1_state")
+        if [[ -z "$state" ]]; then
+		log_fail "DISK1 state; expected='$disk1_state' got '$state'"
+	fi
+
+	state=$(zpool status -i "$pool" | grep "$DISK2" | grep "$disk2_state")
+        if [[ -z "$state" ]]; then
+		log_fail "DISK2 state; expected='$disk2_state' got '$state'"
+	fi
+
+	state=$(zpool status -i "$pool" | grep "$DISK3" | grep "$disk3_state")
+        if [[ -z "$state" ]]; then
+		log_fail "DISK3 state; expected='$disk3_state' got '$state'"
+	fi
+}
+
+function status_check_all # pool disk-state
+{
+        typeset pool="$1"
+        typeset disk_state="$2"
+
+	status_check "$pool" "$disk_state" "$disk_state" "$disk_state"
+}
+
+# 1. Create a one-disk pool.
+log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3
+status_check_all $TESTPOOL "uninitialized"
+
+# 2. Verify uninitialize succeeds for uninitialized pool.
+log_must zpool initialize -u $TESTPOOL
+status_check_all $TESTPOOL "uninitialized"
+
+# 3. Verify pool wide cancel + uninit
+log_must zpool initialize $TESTPOOL
+status_check_all $TESTPOOL "[[:digit:]]* initialized"
+
+log_mustnot zpool initialize -u $TESTPOOL
+status_check_all $TESTPOOL "[[:digit:]]* initialized"
+
+log_must zpool initialize -c $TESTPOOL
+status_check_all $TESTPOOL "uninitialized"
+
+log_must zpool initialize -u $TESTPOOL
+status_check_all $TESTPOOL "uninitialized"
+
+# 3. Verify pool wide suspend + uninit
+log_must zpool initialize $TESTPOOL
+status_check_all $TESTPOOL "[[:digit:]]* initialized"
+
+log_mustnot zpool initialize -u $TESTPOOL
+status_check_all $TESTPOOL "[[:digit:]]* initialized"
+
+log_must zpool initialize -s $TESTPOOL
+status_check_all $TESTPOOL "suspended"
+
+log_must zpool initialize -u $TESTPOOL
+status_check_all $TESTPOOL "uninitialized"
+
+# 4. Verify per-disk cancel|suspend + uninit
+log_must zpool initialize $TESTPOOL
+status_check_all $TESTPOOL "[[:digit:]]* initialized"
+
+log_must zpool initialize -c $TESTPOOL $DISK1
+log_must zpool initialize -s $TESTPOOL $DISK2
+log_mustnot zpool initialize -u $TESTPOOL $DISK3
+status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized"
+
+log_must zpool initialize -u $TESTPOOL $DISK1
+status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized"
+
+log_must zpool initialize -u $TESTPOOL $DISK2
+status_check $TESTPOOL "uninitialized" "uninitialized" "[[:digit:]]* initialized"
+
+log_must zpool initialize $TESTPOOL $DISK1
+status_check $TESTPOOL "[[:digit:]]* initialized" "uninitialized" "[[:digit:]]* initialized"
+
+log_must zpool initialize $TESTPOOL $DISK2
+status_check_all $TESTPOOL "[[:digit:]]* initialized"
+
+log_must zpool initialize -s $TESTPOOL
+status_check_all $TESTPOOL "suspended"
+
+log_must zpool initialize -u $TESTPOOL $DISK1 $DISK2 $DISK3
+status_check_all $TESTPOOL "uninitialized"
+
+log_pass "Initialize start + cancel/suspend + uninit + start works"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh
index 0fa6a0b..27a32d5 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh

@@ -24,7 +24,6 @@
 # Copyright (c) 2016 by Delphix. All rights reserved.
 #
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
 
 #
 # DESCRIPTION:
@@ -33,13 +32,13 @@
 # STRATEGY:
 # 1. Create a one-disk pool.
 # 2. Initialize the disk to completion.
-# 3. Load all metaslabs that don't have a spacemap, and make sure the entire
-#    metaslab has been filled with the initializing pattern (deadbeef).
+# 3. Load all metaslabs and make sure that each contains at least
+#    once instance of the initializing pattern (deadbeef).
 #
 
 function cleanup
 {
-	set_tunable64 zfs_initialize_value $ORIG_PATTERN
+	set_tunable64 INITIALIZE_VALUE $ORIG_PATTERN
         zpool import -d $TESTDIR $TESTPOOL
 
         if datasetexists $TESTPOOL ; then
@@ -54,36 +53,38 @@
 PATTERN="deadbeefdeadbeef"
 SMALLFILE="$TESTDIR/smallfile"
 
-ORIG_PATTERN=$(get_tunable zfs_initialize_value)
-log_must set_tunable64 zfs_initialize_value $(printf %llu 0x$PATTERN)
+ORIG_PATTERN=$(get_tunable INITIALIZE_VALUE)
+log_must set_tunable64 INITIALIZE_VALUE $(printf %llu 0x$PATTERN)
 
 log_must mkdir "$TESTDIR"
-log_must mkfile $MINVDEVSIZE "$SMALLFILE"
+log_must truncate -s $MINVDEVSIZE "$SMALLFILE"
 log_must zpool create $TESTPOOL "$SMALLFILE"
-log_must zpool initialize $TESTPOOL
-
-while [[ "$(initialize_progress $TESTPOOL $SMALLFILE)" -lt "100" ]]; do
-        sleep 0.5
-done
-
+log_must zpool initialize -w $TESTPOOL
 log_must zpool export $TESTPOOL
 
-spacemaps=0
+metaslabs=0
 bs=512
-while read -r sm; do
-        typeset offset="$(echo $sm | cut -d ' ' -f1)"
-        typeset size="$(echo $sm | cut -d ' ' -f2)"
+zdb -p $TESTDIR -Pme $TESTPOOL | awk '/metaslab[ ]+[0-9]+/ { print $4, $8 }' |
+while read -r offset_size; do
+	typeset offset=$(echo $offset_size | cut -d ' ' -f1)
+	typeset size=$(echo $offset_size | cut -d ' ' -f2)
 
-	spacemaps=$((spacemaps + 1))
-        offset=$(((4 * 1024 * 1024) + 16#$offset))
-	out=$(dd if=$SMALLFILE skip=$(($offset / $bs)) \
-	    count=$(($size / $bs)) bs=$bs 2>/dev/null | od -t x8 -Ad)
-	echo "$out" | log_must egrep "$PATTERN|\*|$size"
-done <<< "$(zdb -p $TESTDIR -Pme $TESTPOOL | egrep 'spacemap[ ]+0 ' | \
-    awk '{print $4, $8}')"
+	log_note "offset: '$offset'"
+	log_note "size: '$size'"
 
-if [[ $spacemaps -eq 0 ]];then
-	log_fail "Did not find any empty space maps to check"
+	metaslabs=$((metaslabs + 1))
+	offset=$(((4 * 1024 * 1024) + 16#$offset))
+	log_note "vdev file offset: '$offset'"
+
+	# Note we use '-t x4' instead of '-t x8' here because x8 is not
+	# a supported format on FreeBSD.
+	dd if=$SMALLFILE skip=$((offset / bs)) count=$((size / bs)) bs=$bs |
+	    od -t x4 -Ad | grep -qE "deadbeef +deadbeef +deadbeef +deadbeef" ||
+	    log_fail "Pattern not found in metaslab free space"
+done
+
+if [[ $metaslabs -eq 0 ]]; then
+	log_fail "Did not find any metaslabs to check"
 else
-	log_pass "Initializing wrote appropriate amount to disk"
+	log_pass "Initializing wrote to each metaslab"
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/labelclear.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/labelclear.cfg
index 85148d6..b2a10aa 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/labelclear.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/labelclear.cfg

@@ -16,6 +16,13 @@
 . $STF_SUITE/include/libtest.shlib
 
 typeset disks=(${DISKS[*]})
-typeset disk1=${disks[0]}
-typeset disk2=${disks[1]}
-typeset disk3=${disks[2]}
+
+if is_freebsd; then
+	typeset disk1=/dev/${disks[0]}
+	typeset disk2=/dev/${disks[1]}
+	typeset disk3=/dev/${disks[2]}
+else
+	typeset disk1=${disks[0]}
+	typeset disk2=${disks[1]}
+	typeset disk3=${disks[2]}
+fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_valid.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_valid.ksh
index 211829d..31af9fd 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_valid.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/zpool_labelclear_valid.ksh

@@ -39,13 +39,15 @@
 function cleanup
 {
 	poolexists $TESTPOOL && destroy_pool $TESTPOOL
-	rm -f $PATTERN_FILE $DEVICE1 $DEVICE2 $DEVICE3 $DEVICE4
+	rm -f $PATTERN_FILE $DISK_PATTERN_FILE \
+	    $DEVICE1 $DEVICE2 $DEVICE3 $DEVICE4
 }
 
 log_onexit cleanup
 log_assert "zpool labelclear will only clear valid labels"
 
 PATTERN_FILE=$TEST_BASE_DIR/pattern
+DISK_PATTERN_FILE=$TEST_BASE_DIR/disk-pattern
 
 DEVICE1="$TEST_BASE_DIR/device-1"
 DEVICE2="$TEST_BASE_DIR/device-2"
@@ -79,7 +81,8 @@
 
 # Verify the original pattern over the first two labels is intact
 for dev in $DEVICE1 $DEVICE2 $DEVICE3 $DEVICE4; do
-	log_must cmp -n $((4 * 1048576)) $dev $PATTERN_FILE
+	log_must dd if=$dev of=$DISK_PATTERN_FILE bs=1048576 count=4
+	log_must cmp $DISK_PATTERN_FILE $PATTERN_FILE
 	log_mustnot zdb -lq $dev
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/setup.ksh
index f3e3606..8ce094b 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/setup.ksh

@@ -34,10 +34,4 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
-	log_unsupported "This directory cannot be run on raw files."
-fi
-
-partition_disk $SIZE $DISK 6
-
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove.cfg
index 7def918..1b8312e 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove.cfg

@@ -28,30 +28,4 @@
 # Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
-export DISK=${DISKS%% *}
-export SIZE="200m"
-export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
-export DISKSARRAY=$DISKS
-
-if is_linux; then
-	set_device_dir
-	set_slice_prefix
-	export SLICE0=1
-	export SLICE1=2
-	export SLICE2=3
-	export SLICE3=4
-	export SLICE4=5
-	export SLICE5=6
-	export SLICE6=7
-	export SLICE7=8
-else
-	export SLICE_PREFIX="s"
-	export SLICE0=0
-	export SLICE1=1
-	export SLICE2=2
-	export SLICE3=3
-	export SLICE4=4
-	export SLICE5=5
-	export SLICE6=6
-	export SLICE7=7
-fi
+echo $DISKS | read DISK0 DISK1 DISK2

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh
index 365e86c..0c098a6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_001_neg.ksh

@@ -42,14 +42,13 @@
 # 3. Verify that the remove failed.
 #
 
-typeset disk=${DISK}
-typeset vdev_devs="${disk}${SLICE_PREFIX}${SLICE0}"
-typeset mirror_devs="${disk}${SLICE_PREFIX}${SLICE0} ${disk}${SLICE_PREFIX}${SLICE1}"
+typeset vdev_devs="${DISK0}"
+typeset mirror_devs="${DISK0} ${DISK1}"
 typeset raidz_devs=${mirror_devs}
 typeset raidz1_devs=${mirror_devs}
-typeset raidz2_devs="${mirror_devs} ${disk}${SLICE_PREFIX}${SLICE3}"
-typeset spare_devs1="${disk}${SLICE_PREFIX}${SLICE0}"
-typeset spare_devs2="${disk}${SLICE_PREFIX}${SLICE1}"
+typeset raidz2_devs="${mirror_devs} ${DISK2}"
+typeset spare_devs1="${DISK0}"
+typeset spare_devs2="${DISK1}"
 
 function check_remove
 {

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_002_pos.ksh
index 340735a..4ab7ac6 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_002_pos.ksh

@@ -50,10 +50,9 @@
 }
 
 log_onexit cleanup
-typeset disk=${DISK}
 
-typeset spare_devs1="${disk}${SLICE_PREFIX}${SLICE0}"
-typeset spare_devs2="${disk}${SLICE_PREFIX}${SLICE1}"
+typeset spare_devs1="${DISK0}"
+typeset spare_devs2="${DISK1}"
 
 log_assert "zpool remove can only remove inactive hotspare device from pool"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_003_pos.ksh
index c27c4c7..4e132d9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_remove/zpool_remove_003_pos.ksh

@@ -54,17 +54,15 @@
 log_onexit cleanup
 typeset disk=${DISK}
 
-typeset spare_devs1="${disk}${SLICE_PREFIX}${SLICE0}"
-typeset spare_devs2="${disk}${SLICE_PREFIX}${SLICE1}"
-typeset spare_devs3="${disk}${SLICE_PREFIX}${SLICE3}"
-typeset spare_devs4="${disk}${SLICE_PREFIX}${SLICE4}"
+typeset spare_devs1="${DISK0}"
+typeset spare_devs2="${DISK1}"
+typeset spare_devs3="${DISK2}"
 
 log_assert "zpool remove can remove hotspare device which state go though" \
 	" active to inactive in pool"
 
 log_note "Check spare device which state go through active to inactive"
-log_must zpool create $TESTPOOL $spare_devs1 $spare_devs2 spare \
-                 $spare_devs3 $spare_devs4
+log_must zpool create $TESTPOOL $spare_devs1 $spare_devs2 spare $spare_devs3
 log_must zpool replace $TESTPOOL $spare_devs2 $spare_devs3
 log_mustnot zpool remove $TESTPOOL $spare_devs3
 log_must zpool detach $TESTPOOL $spare_devs3

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
index a9fcef7..25fced1 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh

@@ -20,6 +20,10 @@
 
 verify_runnable "global"
 
+if ! is_linux; then
+	log_unsupported "scsi debug module unsupported"
+fi
+
 cleanup_devices $DISKS
 
 # Unplug the disk and remove scsi_debug module

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.cfg b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.cfg
index 3d6a291..7451ffd 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.cfg

@@ -40,6 +40,4 @@
 	devs_id[1]=$(get_persistent_disk_name $DISK2)
 	devs_id[2]=$(get_persistent_disk_name $DISK3)
 	export devs_id
-else
-	DEV_DSKDIR="/dev"
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
index 075ad85..3d142fd 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib

@@ -68,11 +68,11 @@
 {
 	typeset pool=$1
 	typeset -i timeout=$2
-	typeset func=$3
+	typeset funct=$3
 
 	while [ $timeout -gt 0 ]; do
 		(( --timeout ))
-		if ( $func $pool ); then
+		if ( $funct $pool ); then
 			return 0
 		fi
 		sleep 1

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh
index ae41548..1b18b12 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace-o_ashift.ksh

@@ -22,6 +22,7 @@
 
 #
 # Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -41,19 +42,27 @@
 
 function cleanup
 {
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
 	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
-	log_must rm -f $disk1
-	log_must rm -f $disk2
+	rm -f $disk1 $disk2
 }
 
 log_assert "zpool replace -o ashift=<n>' works with different ashift values"
 log_onexit cleanup
 
-disk1=$TEST_BASE_DIR/$FILEDISK0
-disk2=$TEST_BASE_DIR/$FILEDISK1
+disk1=$TEST_BASE_DIR/disk1
+disk2=$TEST_BASE_DIR/disk2
 log_must truncate -s $SIZE $disk1
 log_must truncate -s $SIZE $disk2
 
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 for ashift in ${ashifts[@]}
 do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh
index e740de1..f076f26 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_replace/replace_prop_ashift.ksh

@@ -22,6 +22,7 @@
 
 #
 # Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -43,18 +44,27 @@
 
 function cleanup
 {
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
 	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
-	log_must rm -f $disk1 $disk2
+	rm -f $disk1 $disk2
 }
 
 log_assert "'zpool replace' uses the ashift pool property value as default."
 log_onexit cleanup
 
-disk1=$TEST_BASE_DIR/$FILEDISK0
-disk2=$TEST_BASE_DIR/$FILEDISK1
+disk1=$TEST_BASE_DIR/disk1
+disk2=$TEST_BASE_DIR/disk2
 log_must truncate -s $SIZE $disk1
 log_must truncate -s $SIZE $disk2
 
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 for ashift in ${ashifts[@]}
 do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
index 2cec533..7ca9e81 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am

@@ -3,7 +3,8 @@
 	setup.ksh \
 	cleanup.ksh \
 	zpool_resilver_bad_args.ksh \
-	zpool_resilver_restart.ksh
+	zpool_resilver_restart.ksh \
+	zpool_resilver_concurrent.ksh
 
 dist_pkgdata_DATA = \
 	zpool_resilver.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
new file mode 100755
index 0000000..4c3b097
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh

@@ -0,0 +1,101 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	Verify 'zpool clear' doesn't cause concurrent resilvers
+#
+# STRATEGY:
+#	1. Create N(10) virtual disk files.
+#	2. Create draid pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Force-fault 2 vdevs and verify distributed spare is kicked in.
+#	5. Free the distributed spare by replacing the faulty drive.
+#	6. Run zpool clear and verify that it does not initiate 2 resilvers
+#	   concurrently while distributed spare gets kicked in.
+#
+
+verify_runnable "global"
+
+typeset -ir devs=10
+typeset -ir nparity=1
+typeset -ir ndata=8
+typeset -ir dspare=1
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		log_must rm -f "$BASEDIR/vdev$i"
+	done
+
+	for dir in $BASEDIR; do
+		if [[ -d $dir ]]; then
+			log_must rm -rf $dir
+		fi
+	done
+
+	zed_stop
+	zed_cleanup
+}
+
+log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
+log_onexit cleanup
+
+setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
+
+# ZED needed for sequential resilver
+zed_setup
+log_must zed_start
+
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
+log_must wait_vdev_state  $TESTPOOL draid1-0-0 "ONLINE" 60
+log_must zpool wait -t resilver $TESTPOOL
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
+
+log_must zpool labelclear -f $BASEDIR/vdev5
+log_must zpool labelclear -f $BASEDIR/vdev6
+
+log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
+sync_pool $TESTPOOL
+
+log_must zpool events -c
+log_must zpool clear $TESTPOOL
+log_must wait_vdev_state  $TESTPOOL draid1-0-0 "ONLINE" 60
+log_must zpool wait -t resilver $TESTPOOL
+log_must zpool wait -t scrub $TESTPOOL
+
+nof_resilver=$(zpool events | grep -c resilver_start)
+if [ $nof_resilver = 1 ] ; then
+	log_must verify_pool $TESTPOOL
+	log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
+else
+	log_fail "FAIL: sequential and healing resilver initiated concurrently"
+fi

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh
index cfafbb6..80fc169 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh

@@ -45,7 +45,7 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 	log_must rm -f $mntpnt/biggerfile1
 	log_must rm -f $mntpnt/biggerfile2
 }
@@ -67,7 +67,7 @@
 log_must zpool detach $TESTPOOL $DISK3
 
 # 3. Reattach the drives, causing the second drive's resilver to be deferred
-log_must set_tunable32 zfs_scan_suspend_progress 1
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 
 log_must zpool attach $TESTPOOL $DISK1 $DISK2
 log_must is_pool_resilvering $TESTPOOL true
@@ -78,7 +78,7 @@
 # 4. Manually restart the resilver with all drives
 log_must zpool resilver $TESTPOOL
 log_must is_deferred_scan_started $TESTPOOL
-log_must set_tunable32 zfs_scan_suspend_progress 0
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
 log_must check_state $TESTPOOL "$DISK2" "online"
 log_must check_state $TESTPOOL "$DISK3" "online"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/cleanup.ksh
index b3cb58c..03eb990 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/cleanup.ksh

@@ -30,5 +30,5 @@
 
 verify_runnable "global"
 
-log_must set_tunable32 zfs_scan_suspend_progress 0
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 destroy_mirrors

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_002_pos.ksh
index 71a2040..449bb9a 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_002_pos.ksh

@@ -50,7 +50,7 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 	log_must rm -f $mntpnt/biggerfile
 }
 
@@ -63,7 +63,7 @@
 log_must file_write -b 1048576 -c 1024 -o create -d 0 -f $mntpnt/biggerfile
 log_must sync
 
-log_must set_tunable32 zfs_scan_suspend_progress 1
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 log_must zpool scrub $TESTPOOL
 log_must is_pool_scrubbing $TESTPOOL true
 log_must zpool scrub -p $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_003_pos.ksh
index 5622545..12dc044 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_003_pos.ksh

@@ -47,14 +47,14 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 }
 
 log_onexit cleanup
 
 log_assert "Scrub command fails when there is already a scrub in progress"
 
-log_must set_tunable32 zfs_scan_suspend_progress 1
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 log_must zpool scrub $TESTPOOL
 log_must is_pool_scrubbing $TESTPOOL true
 log_mustnot zpool scrub $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_004_pos.ksh
index 9b6274c..a7ae7f1 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_004_pos.ksh

@@ -46,7 +46,7 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 	rm -f $mntpnt/extra
 }
 
@@ -59,7 +59,7 @@
 mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
 
 # Temporarily prevent scan progress so our test doesn't race
-log_must set_tunable32 zfs_scan_suspend_progress 1
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 
 while ! is_pool_resilvering $TESTPOOL; do
 	log_must zpool detach $TESTPOOL $DISK2
@@ -72,9 +72,7 @@
 log_must is_pool_resilvering $TESTPOOL
 log_mustnot zpool scrub $TESTPOOL
 
-log_must set_tunable32 zfs_scan_suspend_progress 0
-while ! is_pool_resilvered $TESTPOOL; do
-	sleep 1
-done
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+log_must zpool wait -t resilver $TESTPOOL
 
 log_pass "Resilver prevent scrub from starting until the resilver completes"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_005_pos.ksh
index 8db6ae9..69a3398 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_005_pos.ksh

@@ -48,18 +48,10 @@
 
 log_must zpool scrub $TESTPOOL
 log_must zpool detach $TESTPOOL $DISK2
-log_must zpool attach $TESTPOOL $DISK1 $DISK2
-
-while ! is_pool_resilvered $TESTPOOL; do
-	sleep 1
-done
+log_must zpool attach -w $TESTPOOL $DISK1 $DISK2
 
 log_must zpool scrub $TESTPOOL
 log_must zpool detach $TESTPOOL $DISK1
-log_must zpool attach $TESTPOOL $DISK2 $DISK1
-
-while ! is_pool_resilvered $TESTPOOL; do
-	sleep 1
-done
+log_must zpool attach -w $TESTPOOL $DISK2 $DISK1
 
 log_pass "When scrubbing, detach device should not break system."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh
index 483a683..b1f7c62 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh

@@ -39,7 +39,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2
 }
 log_onexit cleanup
 
@@ -58,11 +58,7 @@
 log_must zfs unmount $TESTPOOL/$TESTFS2
 log_must zfs unload-key $TESTPOOL/$TESTFS2
 
-log_must zpool scrub $TESTPOOL
-
-while ! is_pool_scrubbed $TESTPOOL; do
-	sleep 1
-done
+log_must zpool scrub -w $TESTPOOL
 
 log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh
index e4cb2b5..4b51cd9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh

@@ -43,7 +43,7 @@
 function cleanup
 {
 	log_must zinject -c all
-	log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+	log_must set_tunable64 SCAN_VDEV_LIMIT $ZFS_SCAN_VDEV_LIMIT_DEFAULT
 	zpool scrub -s $TESTPOOL || true
 }
 
@@ -54,7 +54,7 @@
 
 # Make the scrub slow
 log_must zinject -d $DISK1 -D10:1 $TESTPOOL
-log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
+log_must set_tunable64 SCAN_VDEV_LIMIT $ZFS_SCAN_VDEV_LIMIT_SLOW
 
 log_must zpool scrub $TESTPOOL
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh
index 3e7ef33..09b5f50 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_ashift.ksh

@@ -22,6 +22,7 @@
 
 #
 # Copyright 2017, loli10K. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -41,6 +42,7 @@
 
 function cleanup
 {
+	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift
 	destroy_pool $TESTPOOL1
 	rm -f $disk
 }
@@ -52,7 +54,15 @@
 
 log_assert "zpool set can modify 'ashift' property"
 
-disk=$TEST_BASE_DIR/$FILEDISK0
+orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+#
+# Set the file vdev's ashift to the max. Overriding
+# the ashift using the -o ashift property should still
+# be honored.
+#
+log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT 16
+
+disk=$TEST_BASE_DIR/disk
 log_must mkfile $SIZE $disk
 log_must zpool create $TESTPOOL1 $disk
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/Makefile.am
index d00f39d..aac5e0d 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/Makefile.am

@@ -11,7 +11,9 @@
 	zpool_split_props.ksh \
 	zpool_split_vdevs.ksh \
 	zpool_split_resilver.ksh \
-	zpool_split_wholedisk.ksh
+	zpool_split_wholedisk.ksh \
+	zpool_split_indirect.ksh \
+	zpool_split_dryrun_output.ksh
 
 dist_pkgdata_DATA = \
 	zpool_split.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh
new file mode 100755
index 0000000..2267ea7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh

@@ -0,0 +1,152 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Attila Fülöp <attila@fueloep.org>
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+typeset NEWPOOL="${TESTPOOL}split"
+typeset STR_DRYRUN="would create '$NEWPOOL' with the following layout:"
+typeset VDEV_PREFIX="$TEST_BASE_DIR/filedev"
+
+#
+# DESCRIPTION:
+# 'zpool split -n <pool> <newpool> [<vdev> ...]' can display the correct
+# configuration
+#
+# STRATEGY:
+# 1. Create a mirrored storage pool, split -n and verify the output is as
+#    expected.
+#
+
+typeset -a dev=(
+	"${VDEV_PREFIX}00" "${VDEV_PREFIX}01" "${VDEV_PREFIX}02"
+	"${VDEV_PREFIX}03" "${VDEV_PREFIX}04" "${VDEV_PREFIX}05"
+	"${VDEV_PREFIX}06" "${VDEV_PREFIX}07" "${VDEV_PREFIX}08"
+	"${VDEV_PREFIX}09" "${VDEV_PREFIX}10" "${VDEV_PREFIX}11"
+)
+
+typeset -a tests=(
+    # Test for hole.
+    (
+	tree="mirror '${dev[0]}' '${dev[1]}' log mirror '${dev[2]}' '${dev[3]}' \
+	    special mirror '${dev[4]}' '${dev[5]}'"
+
+	devs=""
+	want="$STR_DRYRUN
+
+	$NEWPOOL
+	  ${dev[1]}
+	special
+	  ${dev[5]}"
+    )
+    (
+	tree="mirror '${dev[0]}' '${dev[1]}' log mirror '${dev[2]}' '${dev[3]}' \
+	    special mirror '${dev[4]}' '${dev[5]}'"
+
+	devs="'${dev[0]}' '${dev[4]}'"
+	want="$STR_DRYRUN
+
+	$NEWPOOL
+	  ${dev[0]}
+	special
+	  ${dev[4]}"
+    )
+
+	# Full set of vdev types.
+    (
+	tree="mirror '${dev[0]}' '${dev[1]}'
+	    dedup mirror '${dev[2]}' '${dev[3]}' \
+	    special mirror '${dev[4]}' '${dev[5]}' \
+	    cache '${dev[6]}' '${dev[7]}' \
+	    spare '${dev[8]}' '${dev[9]}'\
+		log mirror '${dev[10]}' '${dev[11]}'"
+
+	devs=""
+	want="$STR_DRYRUN
+
+	$NEWPOOL
+	  ${dev[1]}
+	dedup
+	  ${dev[3]}
+	special
+	  ${dev[5]}"
+    )
+    (
+	tree="mirror '${dev[0]}' '${dev[1]}'
+	    dedup mirror '${dev[2]}' '${dev[3]}' \
+	    special mirror '${dev[4]}' '${dev[5]}' \
+	    cache '${dev[6]}' '${dev[7]}' \
+	    spare '${dev[8]}' '${dev[9]}'\
+		log mirror '${dev[10]}' '${dev[11]}'"
+
+	devs="'${dev[0]}' '${dev[2]}' '${dev[4]}'"
+	want="$STR_DRYRUN
+
+	$NEWPOOL
+	  ${dev[0]}
+	dedup
+	  ${dev[2]}
+	special
+	  ${dev[4]}"
+    )
+)
+
+verify_runnable "global"
+
+function cleanup
+{
+	destroy_pool "$TESTPOOL"
+	rm -f "$VDEV_PREFIX"*
+}
+
+log_assert \
+"'zpool split -n <pool> <newpool> [<vdev>]...' can display the configuration"
+
+log_onexit cleanup
+
+# Create needed file vdevs.
+for (( i=0; i < ${#dev[@]}; i+=1 )); do
+	log_must truncate -s $SPA_MINDEVSIZE "${dev[$i]}"
+done
+
+# Foreach test create pool, add -n devices and check output.
+for (( i=0; i < ${#tests[@]}; i+=1 )); do
+	typeset tree="${tests[$i].tree}"
+	typeset devs="${tests[$i].devs}"
+	typeset want="${tests[$i].want}"
+
+	log_must eval zpool create "$TESTPOOL" $tree
+	log_must poolexists "$TESTPOOL"
+	typeset out="$(log_must eval "zpool split -n \
+	    '$TESTPOOL' '$NEWPOOL' $devs" | sed /^SUCCESS/d)"
+
+	if [[ "$out" != "$want" ]]; then
+		log_fail "Got:\n" "$out" "\nbut expected:\n" "$want"
+	fi
+	log_must destroy_pool "$TESTPOOL"
+done
+
+log_pass \
+"'zpool split -n <pool> <newpool> [<vdev>]...' displays config correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_indirect.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_indirect.ksh
new file mode 100755
index 0000000..13f0d08
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_indirect.ksh

@@ -0,0 +1,69 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/removal/removal.kshlib
+
+#
+# DESCRIPTION:
+#	'zpool split' should succeed on pools with indirect vdevs.
+#
+# STRATEGY:
+#	Create a mirrored pool, add a single device, remove it. `zpool split`
+#	should succeed.
+#
+
+verify_runnable "global"
+
+log_assert "'zpool split' works on pools with indirect VDEVs."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+	if poolexists $TESTPOOL2 ; then
+		destroy_pool $TESTPOOL2
+	fi
+	rm -f $VDEV_TEMP $VDEV_M1 $VDEV_M2
+}
+log_onexit cleanup
+
+typeset vdev_m12_mb=400
+typeset vdev_temp_mb=$(( floor($vdev_m12_mb / 2) ))
+typeset VDEV_TEMP="$TEST_BASE_DIR/vdev_temp"
+typeset VDEV_M1="$TEST_BASE_DIR/vdev_m1"
+typeset VDEV_M2="$TEST_BASE_DIR/vdev_m2"
+typeset altroot="$TESTDIR/altroot-$TESTPOOL2"
+
+log_must truncate -s ${vdev_temp_mb}M $VDEV_TEMP
+log_must truncate -s ${vdev_m12_mb}M $VDEV_M1
+log_must truncate -s ${vdev_m12_mb}M $VDEV_M2
+
+log_must zpool create -f $TESTPOOL $VDEV_TEMP
+log_must zpool add -f $TESTPOOL mirror $VDEV_M1 $VDEV_M2
+log_must zpool remove $TESTPOOL $VDEV_TEMP
+log_must wait_for_removal $TESTPOOL
+log_must zpool split -R $altroot $TESTPOOL $TESTPOOL2
+log_must poolexists $TESTPOOL2
+log_must test "$(get_pool_prop 'altroot' $TESTPOOL2)" == "$altroot"
+
+log_pass "'zpool split' works on pools with indirect VDEVs."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_props.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_props.ksh
index 67dbed6..1aff8d3 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_props.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_props.ksh

@@ -35,7 +35,7 @@
 	destroy_pool $TESTPOOL
 	destroy_pool $TESTPOOL2
 	rm -f $DEVICE1 $DEVICE2
-	log_must mmp_clear_hostid
+	! is_freebsd && log_must mmp_clear_hostid
 }
 
 function setup_mirror
@@ -48,23 +48,25 @@
 log_assert "'zpool split' can set new property values on the new pool"
 log_onexit cleanup
 
-if [ -e $HOSTID_FILE ]; then
-	log_unsupported "System has existing $HOSTID_FILE file"
-fi
-
-typeset good_props=('comment=text' 'ashift=12' 'multihost=on'
-    'listsnapshots=on' 'autoexpand=on' 'autoreplace=on' 'dedupditto=1234'
-    'delegation=off' 'failmode=continue')
-typeset bad_props=("bootfs=$TESTPOOL2/bootfs" 'version=28' 'ashift=4'
-    'allocated=1234' 'capacity=5678' 'dedupditto=42' 'multihost=none'
-    'feature@async_destroy=disabled' 'feature@xxx_fake_xxx=enabled'
-    'propname=propval' 'readonly=on')
-
 DEVICE1="$TEST_BASE_DIR/device-1"
 DEVICE2="$TEST_BASE_DIR/device-2"
 
-# Needed to set multihost=on
-log_must mmp_set_hostid $HOSTID1
+typeset good_props=('comment=text' 'ashift=12' 'multihost=on'
+    'listsnapshots=on' 'autoexpand=on' 'autoreplace=on'
+    'delegation=off' 'failmode=continue')
+typeset bad_props=("bootfs=$TESTPOOL2/bootfs" 'version=28' 'ashift=4'
+    'allocated=1234' 'capacity=5678' 'multihost=none'
+    'feature@async_destroy=disabled' 'feature@xxx_fake_xxx=enabled'
+    'propname=propval' 'readonly=on')
+if ! is_freebsd; then
+	good_props+=('multihost=on')
+	bad_props+=('multihost=none')
+	if [ -e $HOSTID_FILE ]; then
+		log_unsupported "System has existing $HOSTID_FILE file"
+	fi
+	# Needed to set multihost=on
+	log_must mmp_set_hostid $HOSTID1
+fi
 
 # Verify we can set a combination of valid property values on the new pool
 for prop in "${good_props[@]}"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_resilver.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_resilver.ksh
index 1a5c319..99a40ec 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_resilver.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_resilver.ksh

@@ -41,7 +41,7 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 	destroy_pool $TESTPOOL
 	destroy_pool $TESTPOOL2
 	rm -f $DEVICE1 $DEVICE2
@@ -69,7 +69,7 @@
 	log_must sync
 
 	# temporarily prevent resilvering progress, so it will not finish too early
-	log_must set_tunable32 zfs_scan_suspend_progress 1
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
 
 	log_must zpool online $TESTPOOL $disk
 
@@ -84,7 +84,7 @@
 
 	log_mustnot zpool split $TESTPOOL $TESTPOOL2
 
-	log_must set_tunable32 zfs_scan_suspend_progress 0
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
 }
 
 log_assert "Verify 'zpool split' will fail if resilver in progress for a disk"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_vdevs.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_vdevs.ksh
index b7ebe55..9866cf7 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_vdevs.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_vdevs.ksh

@@ -125,7 +125,7 @@
 	add_config="$(awk '{$1= "";print $0}' <<< $config)"
 	log_must zpool create $TESTPOOL $(pool_config $create_config)
 	for vdev in $add_config; do
-		log_must zpool add $TESTPOOL -f $(pool_config $vdev)
+		log_must zpool add -f $TESTPOOL $(pool_config $vdev)
 	done
 	log_must zpool split -R $altroot $TESTPOOL $TESTPOOL2
 	log_must poolexists $TESTPOOL2
@@ -140,7 +140,7 @@
 	add_config="$(awk '{$1= "";print $0}' <<< $config)"
 	log_must zpool create $TESTPOOL $(pool_config $create_config)
 	for vdev in $add_config; do
-		log_must zpool add $TESTPOOL -f $(pool_config $vdev)
+		log_must zpool add -f $TESTPOOL $(pool_config $vdev)
 	done
 	log_mustnot zpool split -R $altroot $TESTPOOL $TESTPOOL2
 	log_mustnot poolexists $TESTPOOL2

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am
index beb59e3..5553061 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am

@@ -3,4 +3,5 @@
 	setup.ksh \
 	cleanup.ksh \
 	zpool_status_001_pos.ksh \
-	zpool_status_002_pos.ksh
+	zpool_status_002_pos.ksh \
+	zpool_status_features_001_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh
new file mode 100755
index 0000000..635125f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh

@@ -0,0 +1,63 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
+
+#
+# DESCRIPTION:
+#	Verify zpool status only recommends upgrading the pool when
+#	the enabled features don't match those in the feature set.
+#
+# STRATEGY:
+#	1. Create a pool with a known feature set.
+#	2. Verify there is no `zpool status` notice to upgrade the pool.
+#	3. Set the pool compatibility to a newer feature set.
+#	4. Verify there is a `zpool status` notice to upgrade the pool.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	datasetexists $TESTPOOL1 && log_must zpool destroy $TESTPOOL1
+	rm -f $FILEDEV
+}
+
+FILEDEV="$TEST_BASE_DIR/filedev.$$"
+
+log_onexit cleanup
+
+log_assert "check 'zpool status' upgrade notice"
+
+log_must truncate -s $MINVDEVSIZE $FILEDEV
+log_must zpool create -f -o compatibility=compat-2018 $TESTPOOL1 $FILEDEV
+log_mustnot check_pool_status $TESTPOOL1 "status" "features are not enabled"
+
+log_must zpool set compatibility=compat-2020 $TESTPOOL1
+log_must check_pool_status $TESTPOOL1 "status" "features are not enabled"
+
+log_pass "check 'zpool status' upgrade notice"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am
index d2d3b4a..0411ab4 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile.am

@@ -3,6 +3,7 @@
 	setup.ksh \
 	cleanup.ksh \
 	zpool_trim_attach_detach_add_remove.ksh \
+	zpool_trim_fault_export_import_online.ksh \
 	zpool_trim_import_export.ksh \
 	zpool_trim_multiple.ksh \
 	zpool_trim_neg.ksh \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh
index cdcf038..0948960 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh

@@ -23,15 +23,21 @@
 
 verify_runnable "global"
 
-DISK1=${DISKS%% *}
+if is_freebsd; then
+	log_unsupported "FreeBSD has no hole punching mechanism for the time being."
+	diskinfo -v $DISKS | grep -qE 'No.*# TRIM/UNMAP support' &&
+	    log_unsupported "DISKS do not support discard (TRIM/UNMAP)"
+else
+	DISK1=${DISKS%% *}
 
-typeset -i max_discard=0
-if [[ -b $DEV_RDSKDIR/$DISK1 ]]; then
-	max_discard=$(lsblk -Dbn $DEV_RDSKDIR/$DISK1 | awk '{ print $4; exit }')
-fi
+	typeset -i max_discard=0
+	if is_disk_device $DEV_RDSKDIR/$DISK1; then
+		max_discard=$(lsblk -Dbn $DEV_RDSKDIR/$DISK1 | awk '{ print $4; exit }')
+	fi
 
-if test $max_discard -eq 0; then
-	log_unsupported "DISKS do not support discard (TRIM/UNMAP)"
+	if test $max_discard -eq 0; then
+		log_unsupported "DISKS do not support discard (TRIM/UNMAP)"
+	fi
 fi
 
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib
index 1c54c66..e8d43cc 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib

@@ -30,6 +30,23 @@
 	trim_prog_line "$1" "$2" | sed 's/.*(\([0-9]\{1,\}\)% trimmed.*/\1/g'
 }
 
+#
+# Write a bit of data and sync several times.
+#
+function sync_and_rewrite_some_data_a_few_times
+{
+	typeset pool=$1
+	typeset -i a_few_times=${2:-20}
+
+	typeset file="/$pool/tmpfile"
+	for i in {0..$a_few_times}; do
+		dd if=/dev/urandom of=${file} bs=128k count=10
+		sync_pool "$pool"
+	done
+
+	return 0
+}
+
 function cleanup
 {
 	if poolexists $TESTPOOL; then

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_fault_export_import_online.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_fault_export_import_online.ksh
new file mode 100755
index 0000000..6bb9fc3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_fault_export_import_online.ksh

@@ -0,0 +1,62 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 by Tim Chase. All rights reserved.
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib
+
+#
+# DESCRIPTION:
+# Miscellaneous complex sequences of operations function as expected.
+#
+# STRATEGY:
+# 1. Create a pool with a two-way mirror.
+# 2. Start trimming, fault, export, import, online and verify along
+#    the way that the trim was cancelled and not restarted.
+#
+
+DISK1="$(echo $DISKS | cut -d' ' -f1)"
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+
+log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2
+
+log_must zpool trim -r 128M $TESTPOOL $DISK1
+progress="$(trim_progress $TESTPOOL $DISK1)"
+[[ -z "$progress" ]] && log_fail "Trimming did not start"
+
+log_must zpool offline -f $TESTPOOL $DISK1
+log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED"
+log_must eval "zpool status -t $TESTPOOL | grep $DISK1 | grep untrimmed"
+
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+
+# Note: the expected state here is unsupported since the faulted device
+# cannot be checked to determine if it supports TRIM.
+log_must check_vdev_state $TESTPOOL $DISK1 "FAULTED"
+log_must eval "zpool status -t $TESTPOOL | grep $DISK1 | grep unsupported"
+
+log_must zpool online $TESTPOOL $DISK1
+log_must zpool clear $TESTPOOL $DISK1
+log_must check_vdev_state $TESTPOOL $DISK1 "ONLINE"
+log_must eval "zpool status -t $TESTPOOL | grep $DISK1 | grep untrimmed"
+
+log_pass "Trimming behaves as expected at each step of:" \
+    "trim + fault + export + import + online"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh
index 681cd12..afc9a2e 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh

@@ -27,7 +27,7 @@
 # Trimming automatically resumes across offline/online.
 #
 # STRATEGY:
-# 1. Create a pool with a two-way mirror.
+# 1. Create a pool with a two-way mirror, prepare blocks to trim.
 # 2. Start trimming one of the disks and verify that trimming is active.
 # 3. Offline the disk.
 # 4. Online the disk.
@@ -39,8 +39,10 @@
 DISK1=${DISKS%% *}
 DISK2="$(echo $DISKS | cut -d' ' -f2)"
 
-log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2
-log_must zpool trim -r 128M $TESTPOOL $DISK1
+log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2 -O recordsize=4k
+sync_and_rewrite_some_data_a_few_times $TESTPOOL
+
+log_must zpool trim -r 1 $TESTPOOL $DISK1
 
 log_must zpool offline $TESTPOOL $DISK1
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
index 58e0ef7..5d14b74 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh

@@ -44,9 +44,9 @@
 		rm -rf "$TESTDIR"
 	fi
 
-	log_must set_tunable64 zfs_trim_metaslab_skip 0
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
-	log_must set_tunable64 zfs_vdev_min_ms_count $vdev_min_ms_count
+	log_must set_tunable64 TRIM_METASLAB_SKIP 0
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
+	log_must set_tunable64 VDEV_MIN_MS_COUNT $vdev_min_ms_count
 }
 log_onexit cleanup
 
@@ -55,12 +55,12 @@
 
 # The minimum number of metaslabs is increased in order to simulate the
 # behavior of partial trimming on a more typically sized 1TB disk.
-typeset vdev_min_ms_count=$(get_tunable zfs_vdev_min_ms_count)
-log_must set_tunable64 zfs_vdev_min_ms_count 64
+typeset vdev_min_ms_count=$(get_tunable VDEV_MIN_MS_COUNT)
+log_must set_tunable64 VDEV_MIN_MS_COUNT 64
 
 # Minimum trim size is decreased to verify all trim sizes.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
 log_must mkdir "$TESTDIR"
 log_must truncate -s $LARGESIZE "$LARGEFILE"
@@ -85,9 +85,9 @@
 
 # Perform a partial trim, we expect it to skip most of the new metaslabs
 # which have never been used and therefore do not need be trimmed.
-log_must set_tunable64 zfs_trim_metaslab_skip 1
+log_must set_tunable64 TRIM_METASLAB_SKIP 1
 log_must zpool trim $TESTPOOL
-log_must set_tunable64 zfs_trim_metaslab_skip 0
+log_must set_tunable64 TRIM_METASLAB_SKIP 0
 
 log_must zpool sync
 while [[ "$(trim_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh
index faf134f..68e9909 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh

@@ -39,8 +39,10 @@
 DISK3="$(echo $DISKS | cut -d' ' -f3)"
 
 log_must zpool list -v
-log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3
-log_must zpool trim -r 128M $TESTPOOL $DISK1
+log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3 -O recordsize=4k
+sync_and_rewrite_some_data_a_few_times $TESTPOOL
+
+log_must zpool trim -r 1 $TESTPOOL $DISK1
 
 [[ -z "$(trim_progress $TESTPOOL $DISK1)" ]] && \
     log_fail "Trim did not start"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
index eaa4d90..1978113 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh

@@ -20,29 +20,29 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+. $STF_SUITE/tests/functional/cli_root/zpool_trim/zpool_trim.kshlib
 
 #
 # DESCRIPTION:
-# Starting and stopping an initialize works.
+# Starting and stopping a trim works.
 #
 # STRATEGY:
 # 1. Create a one-disk pool.
-# 2. Start initializing and verify that initializing is active.
-# 3. Cancel initializing and verify that initializing is not active.
+# 2. Start trimming and verify that trimming is active.
+# 3. Cancel trimming and verify that trimming is not active.
 #
 
 DISK1=${DISKS%% *}
 
 log_must zpool create -f $TESTPOOL $DISK1
-log_must zpool initialize $TESTPOOL
+log_must zpool trim -r 1 $TESTPOOL
 
-[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \
-    log_fail "Initialize did not start"
+[[ -z "$(trim_progress $TESTPOOL $DISK1)" ]] && \
+    log_fail "TRIM did not start"
 
-log_must zpool initialize -c $TESTPOOL
+log_must zpool trim -c $TESTPOOL
 
-[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] || \
-    log_fail "Initialize did not stop"
+[[ -z "$(trim_progress $TESTPOOL $DISK1)" ]] || \
+    log_fail "TRIM did not stop"
 
-log_pass "Initialize start + cancel works"
+log_pass "TRIM start + cancel works"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh
index a216d13..d5aaf49 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh

@@ -43,7 +43,7 @@
                 rm -rf "$TESTDIR"
         fi
 
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
 }
 log_onexit cleanup
 
@@ -51,8 +51,8 @@
 LARGEFILE="$TESTDIR/largefile"
 
 # Reduce trim size to allow for tighter tolerance below when checking.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
 log_must mkdir "$TESTDIR"
 log_must truncate -s $LARGESIZE "$LARGEFILE"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile.am
index 18311ed..c7f321a 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile.am

@@ -12,7 +12,8 @@
 	zpool_upgrade_006_neg.ksh \
 	zpool_upgrade_007_pos.ksh \
 	zpool_upgrade_008_pos.ksh \
-	zpool_upgrade_009_neg.ksh
+	zpool_upgrade_009_neg.ksh \
+	zpool_upgrade_features_001_pos.ksh
 
 dist_pkgdata_DATA = \
 	zpool_upgrade.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib
index 7b018da..783ae54 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib

@@ -138,22 +138,3 @@
 		log_fail "$pool: zpool reported version $actual, expected $vers"
 	fi
 }
-
-# A simple function to get a random number between two bounds
-# probably not the most efficient for large ranges, but it's okay.
-# Note since we're using $RANDOM, 32767 is the largest number we
-# can accept as the upper bound.
-# $1 lower bound
-# $2 upper bound
-function random
-{
-	typeset min=$1
-	typeset max=$2
-	typeset rand=0
-
-	while [[ $rand -lt $min ]] ; do
-		rand=$(( $RANDOM % $max + 1))
-	done
-
-	echo $rand
-}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_008_pos.ksh
index 173d7f6..d930919 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_008_pos.ksh

@@ -30,6 +30,7 @@
 # Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
 #
 
+. $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib
 
 #
@@ -67,7 +68,7 @@
 
 for ver_old in $VERSIONS; do
 	typeset -n pool_name=ZPOOL_VERSION_${ver_old}_NAME
-	typeset ver_new=$(random $ver_old $MAX_VER)
+	typeset -i ver_new=$(random_int_between $ver_old $MAX_VER)
 
 	create_old_pool $ver_old
 	log_must zpool upgrade -V $ver_new $pool_name > /dev/null

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_features_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_features_001_pos.ksh
new file mode 100755
index 0000000..5170d31
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/zpool_upgrade_features_001_pos.ksh

@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
+
+#
+# DESCRIPTION:
+#	Verify pools can be upgraded to known feature sets.
+#
+# STRATEGY:
+#	1. Create a pool with a known feature set.
+#	2. Verify only those features are active/enabled.
+#	3. Upgrade the pool to a newer feature set.
+#	4. Verify only those features are active/enabled.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	datasetexists $TESTPOOL1 && log_must zpool destroy $TESTPOOL1
+	rm -f $FILEDEV
+}
+
+FILEDEV="$TEST_BASE_DIR/filedev.$$"
+
+log_onexit cleanup
+
+log_assert "verify pools can be upgraded to known feature sets."
+
+log_must truncate -s $MINVDEVSIZE $FILEDEV
+log_must zpool create -f -o compatibility=compat-2018 $TESTPOOL1 $FILEDEV
+check_feature_set $TESTPOOL1 compat-2018
+log_mustnot check_pool_status $TESTPOOL1 "status" "features are not enabled"
+
+log_must zpool set compatibility=compat-2020 $TESTPOOL1
+log_must check_pool_status $TESTPOOL1 "status" "features are not enabled"
+
+log_must zpool upgrade $TESTPOOL1
+check_feature_set $TESTPOOL1 compat-2020
+log_mustnot check_pool_status $TESTPOOL1 "status" "features are not enabled"
+
+log_pass "verify pools can be upgraded to known feature sets."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/Makefile.am
new file mode 100644
index 0000000..45ab8e3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/Makefile.am

@@ -0,0 +1,22 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_wait
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	zpool_wait_discard.ksh \
+	zpool_wait_freeing.ksh \
+	zpool_wait_initialize_basic.ksh \
+	zpool_wait_initialize_cancel.ksh \
+	zpool_wait_initialize_flag.ksh \
+	zpool_wait_multiple.ksh \
+	zpool_wait_no_activity.ksh \
+	zpool_wait_remove.ksh \
+	zpool_wait_remove_cancel.ksh \
+	zpool_wait_trim_basic.ksh \
+	zpool_wait_trim_cancel.ksh \
+	zpool_wait_trim_flag.ksh \
+	zpool_wait_usage.ksh
+
+dist_pkgdata_DATA = \
+	zpool_wait.kshlib
+
+SUBDIRS = scan

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/cleanup.ksh
new file mode 100755
index 0000000..456d2d0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/cleanup.ksh

@@ -0,0 +1,20 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/Makefile.am
new file mode 100644
index 0000000..451d83a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/Makefile.am

@@ -0,0 +1,11 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_wait/scan
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	zpool_wait_replace.ksh  \
+	zpool_wait_replace_cancel.ksh \
+	zpool_wait_rebuild.ksh \
+	zpool_wait_resilver.ksh  \
+	zpool_wait_scrub_basic.ksh \
+	zpool_wait_scrub_cancel.ksh \
+	zpool_wait_scrub_flag.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/cleanup.ksh
new file mode 100755
index 0000000..456d2d0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/cleanup.ksh

@@ -0,0 +1,20 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/setup.ksh
new file mode 100755
index 0000000..8a6a1a2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/setup.ksh

@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+verify_runnable "global"
+verify_disk_count $DISKS 3
+
+#
+# Set up a pool for use in the tests that do scrubbing and resilvering. Each
+# test leaves the pool in the same state as when it started, so it is safe to
+# share the same setup.
+#
+log_must zpool create -f $TESTPOOL $DISK1
+log_must dd if=/dev/urandom of="/$TESTPOOL/testfile" bs=1k count=256k
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_rebuild.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_rebuild.ksh
new file mode 100755
index 0000000..8cd5864
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_rebuild.ksh

@@ -0,0 +1,64 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for sequential resilvering to complete.
+#
+# STRATEGY:
+# 1. Attach a device to the pool so that sequential resilvering starts.
+# 2. Start 'zpool wait'.
+# 3. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+# 4. Repeat 1-3, except using the '-w' flag with 'zpool attach' instead of using
+#    'zpool wait'.
+#
+
+function cleanup
+{
+	remove_io_delay
+	kill_if_running $pid
+	get_disklist $TESTPOOL | grep $DISK2 >/dev/null && \
+	    log_must zpool detach $TESTPOOL $DISK2
+}
+
+typeset -r IN_PROGRESS_CHECK="is_pool_resilvering $TESTPOOL"
+typeset pid
+
+log_onexit cleanup
+
+add_io_delay $TESTPOOL
+
+# Test 'zpool wait -t resilver'
+log_must zpool attach -s $TESTPOOL $DISK1 $DISK2
+log_bkgrnd zpool wait -t resilver $TESTPOOL
+pid=$!
+check_while_waiting $pid "$IN_PROGRESS_CHECK"
+
+log_must zpool detach $TESTPOOL $DISK2
+
+# Test 'zpool attach -w'
+log_bkgrnd zpool attach -sw $TESTPOOL $DISK1 $DISK2
+pid=$!
+while ! is_pool_resilvering $TESTPOOL && proc_exists $pid; do
+	log_must sleep .5
+done
+check_while_waiting $pid "$IN_PROGRESS_CHECK"
+
+log_pass "'zpool wait -t resilver' and 'zpool attach -w' work."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_replace.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_replace.ksh
new file mode 100755
index 0000000..06df7b5
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_replace.ksh

@@ -0,0 +1,71 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when a replacing disks.
+#
+# STRATEGY:
+# 1. Attach a disk to pool to form two-way mirror.
+# 2. Start a replacement of the new disk.
+# 3. Start 'zpool wait'.
+# 4. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+# 5. Repeat 2-4, except using the '-w' flag with 'zpool replace' instead of
+#    using 'zpool wait'.
+#
+
+function cleanup
+{
+	remove_io_delay
+	kill_if_running $pid
+	get_disklist $TESTPOOL | grep $DISK2 >/dev/null && \
+	    log_must zpool detach $TESTPOOL $DISK2
+	get_disklist $TESTPOOL | grep $DISK3 >/dev/null && \
+	    log_must zpool detach $TESTPOOL $DISK3
+}
+
+function in_progress
+{
+	zpool status $TESTPOOL | grep 'replacing-' >/dev/null
+}
+
+typeset pid
+
+log_onexit cleanup
+
+log_must zpool attach -w $TESTPOOL $DISK1 $DISK2
+
+add_io_delay $TESTPOOL
+
+# Test 'zpool wait -t replace'
+log_must zpool replace $TESTPOOL $DISK2 $DISK3
+log_bkgrnd zpool wait -t replace $TESTPOOL
+pid=$!
+check_while_waiting $pid in_progress
+
+# Test 'zpool replace -w'
+log_bkgrnd zpool replace -w $TESTPOOL $DISK3 $DISK2
+pid=$!
+while ! is_pool_resilvering $TESTPOOL && proc_exists $pid; do
+	log_must sleep .5
+done
+check_while_waiting $pid in_progress
+
+log_pass "'zpool wait -t replace' and 'zpool replace -w' work."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_replace_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_replace_cancel.ksh
new file mode 100755
index 0000000..a899e9f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_replace_cancel.ksh

@@ -0,0 +1,65 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when a replacing disk is detached before the replacement
+# completes.
+#
+# STRATEGY:
+# 1. Attach a disk to pool to form two-way mirror.
+# 2. Modify tunable so that resilver won't complete while test is running.
+# 3. Start a replacement of the new disk.
+# 4. Start a process that waits for the replace.
+# 5. Wait a few seconds and then check that the wait process is actually
+#    waiting.
+# 6. Cancel the replacement by detaching the replacing disk.
+# 7. Check that the wait process returns reasonably promptly.
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+	kill_if_running $pid
+	get_disklist $TESTPOOL | grep $DISK2 >/dev/null && \
+	    log_must zpool detach $TESTPOOL $DISK2
+	get_disklist $TESTPOOL | grep $DISK3 >/dev/null && \
+	    log_must zpool detach $TESTPOOL $DISK3
+	log_must zpool sync $TESTPOOL
+}
+
+typeset pid
+
+log_onexit cleanup
+
+log_must zpool attach -w $TESTPOOL $DISK1 $DISK2
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+log_must zpool replace $TESTPOOL $DISK2 $DISK3
+log_bkgrnd zpool wait -t replace $TESTPOOL
+pid=$!
+
+log_must sleep 3
+proc_must_exist $pid
+
+log_must zpool detach $TESTPOOL $DISK3
+bkgrnd_proc_succeeded $pid
+
+log_pass "'zpool wait -t replace' returns when replacing disk is detached."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_resilver.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_resilver.ksh
new file mode 100755
index 0000000..a938901
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_resilver.ksh

@@ -0,0 +1,64 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for resilvering to complete.
+#
+# STRATEGY:
+# 1. Attach a device to the pool so that resilvering starts.
+# 2. Start 'zpool wait'.
+# 3. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+# 4. Repeat 1-3, except using the '-w' flag with 'zpool attach' instead of using
+#    'zpool wait'.
+#
+
+function cleanup
+{
+	remove_io_delay
+	kill_if_running $pid
+	get_disklist $TESTPOOL | grep $DISK2 >/dev/null && \
+	    log_must zpool detach $TESTPOOL $DISK2
+}
+
+typeset -r IN_PROGRESS_CHECK="is_pool_resilvering $TESTPOOL"
+typeset pid
+
+log_onexit cleanup
+
+add_io_delay $TESTPOOL
+
+# Test 'zpool wait -t resilver'
+log_must zpool attach $TESTPOOL $DISK1 $DISK2
+log_bkgrnd zpool wait -t resilver $TESTPOOL
+pid=$!
+check_while_waiting $pid "$IN_PROGRESS_CHECK"
+
+log_must zpool detach $TESTPOOL $DISK2
+
+# Test 'zpool attach -w'
+log_bkgrnd zpool attach -w $TESTPOOL $DISK1 $DISK2
+pid=$!
+while ! is_pool_resilvering $TESTPOOL && proc_exists $pid; do
+	log_must sleep .5
+done
+check_while_waiting $pid "$IN_PROGRESS_CHECK"
+
+log_pass "'zpool wait -t resilver' and 'zpool attach -w' work."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_basic.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_basic.ksh
new file mode 100755
index 0000000..d4bb170
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_basic.ksh

@@ -0,0 +1,49 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for a scrub to complete.
+#
+# STRATEGY:
+# 1. Start a scrub.
+# 2. Start 'zpool wait -t scrub'.
+# 3. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+#
+
+function cleanup
+{
+	remove_io_delay
+	kill_if_running $pid
+}
+
+typeset pid
+
+log_onexit cleanup
+
+# Slow down scrub so that we actually have something to wait for.
+add_io_delay $TESTPOOL
+
+log_must zpool scrub $TESTPOOL
+log_bkgrnd zpool wait -t scrub $TESTPOOL
+pid=$!
+check_while_waiting $pid "is_pool_scrubbing $TESTPOOL"
+
+log_pass "'zpool wait -t scrub' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_cancel.ksh
new file mode 100755
index 0000000..7adb3b2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_cancel.ksh

@@ -0,0 +1,66 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when a scrub is paused or canceled.
+#
+# STRATEGY:
+# 1. Modify tunable so that scrubs won't complete while test is running.
+# 2. Start a scrub.
+# 3. Start a process that waits for the scrub.
+# 4. Wait a few seconds and then check that the wait process is actually
+#    waiting.
+# 5. Pause the scrub.
+# 6. Check that the wait process returns reasonably promptly.
+# 7. Repeat 2-6, except stop the scrub instead of pausing it.
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+	kill_if_running $pid
+	is_pool_scrubbing $TESTPOOL && log_must zpool scrub -s $TESTPOOL
+}
+
+function do_test
+{
+	typeset stop_cmd=$1
+
+	log_must zpool scrub $TESTPOOL
+	log_bkgrnd zpool wait -t scrub $TESTPOOL
+	pid=$!
+
+	log_must sleep 3
+	proc_must_exist $pid
+
+	log_must eval "$stop_cmd"
+	bkgrnd_proc_succeeded $pid
+}
+
+typeset pid
+
+log_onexit cleanup
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+do_test "zpool scrub -p $TESTPOOL"
+do_test "zpool scrub -s $TESTPOOL"
+
+log_pass "'zpool wait -t scrub' works when scrub is canceled."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_flag.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_flag.ksh
new file mode 100755
index 0000000..aac62cf
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/zpool_wait_scrub_flag.ksh

@@ -0,0 +1,52 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool scrub -w' waits while scrub is in progress.
+#
+# STRATEGY:
+# 1. Start a scrub with the -w flag.
+# 2. Wait a few seconds and then check that the wait process is actually
+#    waiting.
+# 3. Stop the scrub, make sure that the command returns reasonably promptly.
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+	kill_if_running $pid
+}
+
+typeset pid
+
+log_onexit cleanup
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+log_bkgrnd zpool scrub -w $TESTPOOL
+pid=$!
+
+log_must sleep 3
+proc_must_exist $pid
+
+log_must zpool scrub -s $TESTPOOL
+bkgrnd_proc_succeeded $pid
+
+log_pass "'zpool scrub -w' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/setup.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/setup.ksh
new file mode 100755
index 0000000..5a9af18
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/setup.ksh

@@ -0,0 +1,23 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+verify_runnable "global"
+
+verify_disk_count $DISKS 3
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
new file mode 100644
index 0000000..ccb9791
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib

@@ -0,0 +1,124 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+typeset -a disk_array=($(find_disks $DISKS))
+
+typeset -r DISK1=${disk_array[0]}
+typeset -r DISK2=${disk_array[1]}
+typeset -r DISK3=${disk_array[2]}
+
+#
+# When the condition it is waiting for becomes true, 'zpool wait' should return
+# promptly. We want to enforce this, but any check will be racey because it will
+# take some small but indeterminate amount of time for the waiting thread to be
+# woken up and for the process to exit.
+#
+# To deal with this, we provide a grace period after the condition becomes true
+# during which 'zpool wait' can exit. If it hasn't exited by the time the grace
+# period expires we assume something is wrong and fail the test. While there is
+# no value that can really be correct, the idea is we choose something large
+# enough that it shouldn't cause issues in practice.
+#
+typeset -r WAIT_EXIT_GRACE=2.0
+
+function add_io_delay # pool
+{
+	for disk in $(get_disklist $1); do
+		log_must zinject -d $disk -D20:1 $1
+	done
+}
+
+function remove_io_delay
+{
+	log_must zinject -c all
+}
+
+function proc_exists # pid
+{
+	ps -p $1 >/dev/null
+}
+
+function proc_must_exist # pid
+{
+	proc_exists $1 || log_fail "zpool process exited too soon"
+}
+
+function proc_must_not_exist # pid
+{
+	proc_exists $1 && log_fail "zpool process took too long to exit"
+}
+
+function get_time
+{
+	date +'%H:%M:%S'
+}
+
+function kill_if_running
+{
+	typeset pid=$1
+	[[ $pid ]] && proc_exists $pid && log_must kill -s TERM $pid
+}
+
+# Log a command and then start it running in the background
+function log_bkgrnd
+{
+	log_note "$(get_time) Starting cmd in background '$@'"
+	"$@" &
+}
+
+# Check that a background process has completed and exited with a status of 0
+function bkgrnd_proc_succeeded
+{
+	typeset pid=$1
+
+	log_must sleep $WAIT_EXIT_GRACE
+
+	proc_must_not_exist $pid
+	wait $pid || log_fail "zpool process exited with status $?"
+	log_note "$(get_time) wait completed successfully"
+}
+
+#
+# Check that 'zpool wait' returns reasonably promptly after the condition
+# waited for becomes true, and not before.
+#
+function check_while_waiting
+{
+	# The pid of the waiting process
+	typeset wait_proc_pid=$1
+	# A check that should be true while the activity is in progress
+	typeset activity_check=$2
+
+	log_note "$(get_time) waiting for process $wait_proc_pid using" \
+	    "activity check '$activity_check'"
+	while proc_exists $wait_proc_pid && eval "$activity_check"; do
+		log_must sleep .5
+	done
+
+	#
+	# If the activity being waited on is still in progress, then zpool wait
+	# exited too soon.
+	#
+	log_mustnot eval "$activity_check"
+
+	bkgrnd_proc_succeeded $wait_proc_pid
+}
+
+# Whether any vdev in the given pool is initializing
+function is_vdev_initializing # pool
+{
+	zpool status -i "$1" | grep -q 'initialized, started'
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_discard.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_discard.ksh
new file mode 100755
index 0000000..8d5747e
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_discard.ksh

@@ -0,0 +1,87 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for checkpoint discard to complete.
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Add some data to the pool.
+# 3. Checkpoint the pool and delete the data so that the space is unique to the
+#    checkpoint.
+# 4. Discard the checkpoint using the '-w' flag.
+# 5. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+# 6. Repeat 2-5, but using 'zpool wait' instead of the '-w' flag.
+#
+
+function cleanup
+{
+	log_must zinject -c all
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	kill_if_running $pid
+
+	[[ $default_mem_limit ]] && log_must set_tunable64 \
+	    SPA_DISCARD_MEMORY_LIMIT $default_mem_limit
+}
+
+function do_test
+{
+	typeset use_wait_flag=$1
+
+	log_must dd if=/dev/urandom of="$TESTFILE" bs=128k count=1k
+	log_must zpool checkpoint $TESTPOOL
+
+	# Make sure bulk of space is unique to checkpoint
+	log_must rm "$TESTFILE"
+
+	log_must zinject -d $DISK1 -D20:1 $TESTPOOL
+
+	if $use_wait_flag; then
+		log_bkgrnd zpool checkpoint -dw $TESTPOOL
+		pid=$!
+
+		while ! is_pool_discarding $TESTPOOL && proc_exists $pid; do
+			log_must sleep .5
+		done
+	else
+		log_must zpool checkpoint -d $TESTPOOL
+		log_bkgrnd zpool wait -t discard $TESTPOOL
+		pid=$!
+	fi
+
+	check_while_waiting $pid "is_pool_discarding $TESTPOOL"
+	log_must zinject -c all
+}
+
+typeset -r TESTFILE="/$TESTPOOL/testfile"
+typeset pid default_mem_limit
+
+log_onexit cleanup
+
+default_mem_limit=$(get_tunable SPA_DISCARD_MEMORY_LIMIT)
+log_must set_tunable64 SPA_DISCARD_MEMORY_LIMIT 32
+
+log_must zpool create $TESTPOOL $DISK1
+
+do_test true
+do_test false
+
+log_pass "'zpool wait -t discard' and 'zpool checkpoint -dw' work."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_freeing.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_freeing.ksh
new file mode 100755
index 0000000..7f5a9e6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_freeing.ksh

@@ -0,0 +1,112 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for background freeing to complete.
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Modify tunables to make sure freeing is slow enough to observe.
+# 3. Create a file system with some data.
+# 4. Destroy the file system and call 'zpool wait'.
+# 5. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+# 6. Repeat 3-5, except destroy a snapshot instead of a filesystem.
+# 7. Repeat 3-5, except destroy a clone.
+#
+
+function cleanup
+{
+	log_must set_tunable64 ASYNC_BLOCK_MAX_BLOCKS $default_async_block_max_blocks
+	log_must set_tunable64 LIVELIST_MAX_ENTRIES $default_max_livelist_entries
+	log_must set_tunable64 LIVELIST_MIN_PERCENT_SHARED $default_min_pct_shared
+
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	kill_if_running $pid
+}
+
+function test_wait
+{
+	log_bkgrnd zpool wait -t free $TESTPOOL
+	pid=$!
+	check_while_waiting $pid '[[ $(get_pool_prop freeing $TESTPOOL) != "0" ]]'
+}
+
+typeset -r FS="$TESTPOOL/$TESTFS1"
+typeset -r SNAP="$FS@snap1"
+typeset -r CLONE="$TESTPOOL/clone"
+typeset pid default_max_livelist_entries default_min_pct_shared
+typeset default_async_block_max_blocks
+
+log_onexit cleanup
+
+log_must zpool create $TESTPOOL $DISK1
+
+#
+# Limit the number of blocks that can be freed in a single txg. This slows down
+# freeing so that we actually have something to wait for.
+#
+default_async_block_max_blocks=$(get_tunable ASYNC_BLOCK_MAX_BLOCKS)
+log_must set_tunable64 ASYNC_BLOCK_MAX_BLOCKS 8
+#
+# Space from clones gets freed one livelist per txg instead of being controlled
+# by zfs_async_block_max_blocks. Limit the rate at which space is freed by
+# limiting the size of livelists so that we end up with a number of them.
+#
+default_max_livelist_entries=$(get_tunable LIVELIST_MAX_ENTRIES)
+log_must set_tunable64 LIVELIST_MAX_ENTRIES 16
+# Don't disable livelists, no matter how much clone diverges from snapshot
+default_min_pct_shared=$(get_tunable LIVELIST_MIN_PERCENT_SHARED)
+log_must set_tunable64 LIVELIST_MIN_PERCENT_SHARED -1
+
+#
+# Test waiting for space from destroyed filesystem to be freed
+#
+log_must zfs create "$FS"
+log_must dd if=/dev/zero of="/$FS/testfile" bs=1M count=128
+log_must zfs destroy "$FS"
+test_wait
+
+#
+# Test waiting for space from destroyed snapshot to be freed
+#
+log_must zfs create "$FS"
+log_must dd if=/dev/zero of="/$FS/testfile" bs=1M count=128
+log_must zfs snapshot "$SNAP"
+# Make sure bulk of space is unique to snapshot
+log_must rm "/$FS/testfile"
+log_must zfs destroy "$SNAP"
+test_wait
+
+#
+# Test waiting for space from destroyed clone to be freed
+#
+log_must zfs snapshot "$SNAP"
+log_must zfs clone "$SNAP" "$CLONE"
+# Add some data to the clone
+for i in {1..50}; do
+	log_must dd if=/dev/urandom of="/$CLONE/testfile$i" bs=1k count=512
+	# Force each new file to be tracked by a new livelist
+	log_must zpool sync $TESTPOOL
+done
+log_must zfs destroy "$CLONE"
+test_wait
+
+log_pass "'zpool wait -t freeing' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_basic.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_basic.ksh
new file mode 100755
index 0000000..924ae5f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_basic.ksh

@@ -0,0 +1,63 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for devices to complete initializing
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Modify a tunable to make sure initializing is slow enough to observe.
+# 3. Start initializing the vdev in the pool.
+# 4. Start 'zpool wait'.
+# 5. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+        [[ -d "$TESTDIR" ]] && log_must rm -r "$TESTDIR"
+
+        [[ "$default_chunk_sz" ]] && \
+            log_must set_tunable64 INITIALIZE_CHUNK_SIZE $default_chunk_sz
+}
+
+typeset -r FILE_VDEV="$TESTDIR/file_vdev"
+typeset pid default_chunk_sz
+
+log_onexit cleanup
+
+default_chunk_sz=$(get_tunable INITIALIZE_CHUNK_SIZE)
+log_must set_tunable64 INITIALIZE_CHUNK_SIZE 2048
+
+log_must mkdir "$TESTDIR"
+log_must mkfile 256M "$FILE_VDEV"
+log_must zpool create -f $TESTPOOL "$FILE_VDEV"
+
+log_must zpool initialize $TESTPOOL "$FILE_VDEV"
+
+log_bkgrnd zpool wait -t initialize $TESTPOOL
+pid=$!
+
+check_while_waiting $pid "is_vdev_initializing $TESTPOOL"
+
+log_pass "'zpool wait -t initialize' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_cancel.ksh
new file mode 100755
index 0000000..8b19ee6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_cancel.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when an initialization operation is canceled.
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Modify a tunable to make sure initializing is slow enough that it won't
+#    complete before the test finishes.
+# 3. Start initializing the vdev in the pool.
+# 4. Start 'zpool wait'.
+# 5. Wait a few seconds and then check that the wait process is actually
+#    waiting.
+# 6. Cancel the initialization of the device.
+# 7. Check that the wait process returns reasonably promptly.
+# 8. Repeat 3-7, except pause the initialization instead of canceling it.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	[[ "$default_chunk_sz" ]] &&
+	    log_must set_tunable64 INITIALIZE_CHUNK_SIZE $default_chunk_sz
+}
+
+function do_test
+{
+	typeset stop_cmd=$1
+
+	log_must zpool initialize $TESTPOOL $DISK1
+
+	log_bkgrnd zpool wait -t initialize $TESTPOOL
+	pid=$!
+
+	# Make sure that we are really waiting
+	log_must sleep 3
+	proc_must_exist $pid
+
+	# Stop initialization and make sure process returns
+	log_must eval "$stop_cmd"
+	bkgrnd_proc_succeeded $pid
+}
+
+typeset pid default_chunk_sz
+
+log_onexit cleanup
+
+# Make sure the initialization takes a while
+default_chunk_sz=$(get_tunable INITIALIZE_CHUNK_SIZE)
+log_must set_tunable64 INITIALIZE_CHUNK_SIZE 512
+
+log_must zpool create $TESTPOOL $DISK1
+
+do_test "zpool initialize -c $TESTPOOL $DISK1"
+do_test "zpool initialize -s $TESTPOOL $DISK1"
+
+log_pass "'zpool wait' works when initialization is stopped before completion."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_flag.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_flag.ksh
new file mode 100755
index 0000000..8c8c45a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_initialize_flag.ksh

@@ -0,0 +1,88 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# -w flag for 'zpool initialize' waits for the completion of all and only those
+# initializations kicked off by that invocation.
+#
+# STRATEGY:
+# 1. Create a pool with 3 disks.
+# 2. Start initializing disks 1 and 2 with one invocation of
+#    'zpool initialize -w'
+# 3. Start initializing disk 3 with a second invocation of 'zpool initialize -w'
+# 4. Cancel the initialization of disk 1. Check that neither waiting process
+#    exits.
+# 5. Cancel the initialization of disk 3. Check that only the second waiting
+#    process exits.
+# 6. Cancel the initialization of disk 2. Check that the first waiting process
+#    exits.
+#
+
+function cleanup
+{
+	kill_if_running $init12_pid
+	kill_if_running $init3_pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	[[ "$default_chunk_sz" ]] &&
+	    log_must set_tunable64 INITIALIZE_CHUNK_SIZE $default_chunk_sz
+}
+
+typeset init12_pid init3_pid default_chunk_sz
+
+log_onexit cleanup
+
+log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3
+
+# Make sure the initialization takes a while
+default_chunk_sz=$(get_tunable INITIALIZE_CHUNK_SIZE)
+log_must set_tunable64 INITIALIZE_CHUNK_SIZE 512
+
+log_bkgrnd zpool initialize -w $TESTPOOL $DISK1 $DISK2
+init12_pid=$!
+log_bkgrnd zpool initialize -w $TESTPOOL $DISK3
+init3_pid=$!
+
+# Make sure that we are really waiting
+log_must sleep 3
+proc_must_exist $init12_pid
+proc_must_exist $init3_pid
+
+#
+# Cancel initialization of one of disks started by init12, make sure neither
+# process exits
+#
+log_must zpool initialize -c $TESTPOOL $DISK1
+proc_must_exist $init12_pid
+proc_must_exist $init3_pid
+
+#
+# Cancel initialization started by init3, make sure that process exits, but
+# init12 doesn't
+#
+log_must zpool initialize -c $TESTPOOL $DISK3
+proc_must_exist $init12_pid
+bkgrnd_proc_succeeded $init3_pid
+
+# Cancel last initialization started by init12, make sure it returns.
+log_must zpool initialize -c $TESTPOOL $DISK2
+bkgrnd_proc_succeeded $init12_pid
+
+log_pass "'zpool initialize -w' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_multiple.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_multiple.ksh
new file mode 100755
index 0000000..a8107b9
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_multiple.ksh

@@ -0,0 +1,83 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for multiple activities.
+#
+# STRATEGY:
+# 1. Create a pool with some data.
+# 2. Alterate running two different activities (scrub and initialize),
+#    making sure that they overlap such that one of the two is always
+#    running.
+# 3. Wait for both activities with a single invocation of zpool wait.
+# 4. Check that zpool wait doesn't return until both activities have
+#    stopped.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	[[ "$default_chunk_sz" ]] && log_must set_tunable64 \
+	    INITIALIZE_CHUNK_SIZE $default_chunk_sz
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+}
+
+typeset pid default_chunk_sz
+
+log_onexit cleanup
+
+log_must zpool create -f $TESTPOOL $DISK1
+log_must dd if=/dev/urandom of="/$TESTPOOL/testfile" bs=64k count=1k
+
+default_chunk_sz=$(get_tunable INITIALIZE_CHUNK_SIZE)
+log_must set_tunable64 INITIALIZE_CHUNK_SIZE 512
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+log_must zpool scrub $TESTPOOL
+
+log_bkgrnd zpool wait -t scrub,initialize $TESTPOOL
+pid=$!
+
+log_must sleep 2
+
+log_must zpool initialize $TESTPOOL $DISK1
+log_must zpool scrub -s $TESTPOOL
+
+log_must sleep 2
+
+log_must zpool scrub $TESTPOOL
+log_must zpool initialize -s $TESTPOOL $DISK1
+
+log_must sleep 2
+
+log_must zpool initialize $TESTPOOL $DISK1
+log_must zpool scrub -s $TESTPOOL
+
+log_must sleep 2
+
+proc_must_exist $pid
+
+# Cancel last activity, zpool wait should return
+log_must zpool initialize -s $TESTPOOL $DISK1
+bkgrnd_proc_succeeded $pid
+
+log_pass "'zpool wait' works when waiting for multiple activities."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_no_activity.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_no_activity.ksh
new file mode 100755
index 0000000..f4819f3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_no_activity.ksh

@@ -0,0 +1,52 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' returns immediately when there is no activity in progress.
+#
+# STRATEGY:
+# 1. Create an empty pool with no activity
+# 2. Run zpool wait with various activities, make sure it always returns
+#    promptly
+#
+
+function cleanup {
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+typeset -r TIMEOUT_SECS=1
+
+log_onexit cleanup
+log_must zpool create $TESTPOOL $DISK1
+
+# Wait for each activity
+typeset activities=(free discard initialize replace remove resilver scrub)
+for activity in ${activities[@]}; do
+	log_must timeout $TIMEOUT_SECS zpool wait -t $activity $TESTPOOL
+done
+
+# Wait for multiple activities at the same time
+log_must timeout $TIMEOUT_SECS zpool wait -t scrub,initialize $TESTPOOL
+log_must timeout $TIMEOUT_SECS zpool wait -t free,remove,discard $TESTPOOL
+
+# Wait for all activities at the same time
+log_must timeout $TIMEOUT_SECS zpool wait $TESTPOOL
+
+log_pass "'zpool wait' returns immediately when no activity is in progress."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_remove.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_remove.ksh
new file mode 100755
index 0000000..19298d1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_remove.ksh

@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for a device to be removed.
+#
+# STRATEGY:
+# 1. Create a pool with two disks and some data.
+# 2. Modify a tunable to make sure removal doesn't make any progress.
+# 3. Start removing one of the disks.
+# 4. Start 'zpool wait'.
+# 5. Sleep for a few seconds and check that the process is actually waiting.
+# 6. Modify tunable to allow removal to complete.
+# 7. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+# 8. Repeat 1-7, except using the '-w' flag for 'zpool remove' instead of using
+#    'zpool wait'.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 0
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+function do_test
+{
+	typeset use_flag=$1
+
+	log_must zpool create -f $TESTPOOL $DISK1 $DISK2
+	log_must dd if=/dev/urandom of="/$TESTPOOL/testfile" bs=1k count=16k
+
+	# Start removal, but don't allow it to make any progress at first
+	log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 1
+
+	if $use_flag; then
+		log_bkgrnd zpool remove -w $TESTPOOL $DISK1
+		pid=$!
+
+		while ! is_pool_removing $TESTPOOL && proc_exists $pid; do
+			log_must sleep .5
+		done
+	else
+		log_must zpool remove $TESTPOOL $DISK1
+		log_bkgrnd zpool wait -t remove $TESTPOOL
+		pid=$!
+	fi
+
+	# Make sure the 'zpool wait' is actually waiting
+	log_must sleep 3
+	proc_must_exist $pid
+
+	# Unpause removal, and wait for it to finish
+	log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 0
+	check_while_waiting $pid "is_pool_removing $TESTPOOL"
+
+	log_must zpool destroy $TESTPOOL
+}
+
+log_onexit cleanup
+
+typeset pid
+
+do_test true
+do_test false
+
+log_pass "'zpool wait -t remove' and 'zpool remove -w' work."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_remove_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_remove_cancel.ksh
new file mode 100755
index 0000000..4373b57
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_remove_cancel.ksh

@@ -0,0 +1,62 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when device removal is canceled.
+#
+# STRATEGY:
+# 1. Create a pool with two disks and some data.
+# 2. Modify a tunable to make sure removal won't complete while test is running.
+# 3. Start removing one of the disks.
+# 4. Start 'zpool wait'.
+# 5. Sleep for a few seconds and check that the process is actually waiting.
+# 6. Cancel the removal of the device.
+# 7. Check that the wait process returns reasonably promptly.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 0
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_onexit cleanup
+
+typeset pid
+
+log_must zpool create -f $TESTPOOL $DISK1 $DISK2
+
+log_must dd if=/dev/urandom of="/$TESTPOOL/testfile" bs=1k count=16k
+
+# Start removal, but don't allow it to make any progress
+log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 1
+log_must zpool remove $TESTPOOL $DISK1
+
+log_bkgrnd zpool wait -t remove $TESTPOOL
+pid=$!
+
+log_must sleep 3
+proc_must_exist $pid
+
+log_must zpool remove -s $TESTPOOL
+bkgrnd_proc_succeeded $pid
+
+log_pass "'zpool wait -t remove' works when removal is canceled."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh
new file mode 100755
index 0000000..f047050
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh

@@ -0,0 +1,68 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when waiting for devices to finish being trimmed
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Start trimming the vdev in the pool, making sure the rate is slow enough
+#    that the trim can be observed.
+# 3. Start 'zpool wait'.
+# 4. Monitor the waiting process to make sure it returns neither too soon nor
+#    too late.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+	[[ -d "$TESTDIR" ]] && log_must rm -r "$TESTDIR"
+}
+
+# Check whether any vdevs in given pool are being trimmed
+function trim_in_progress
+{
+	typeset pool="$1"
+	zpool status -t "$pool" | grep "trimmed, started"
+}
+
+if is_freebsd; then
+	log_unsupported "FreeBSD has no hole punching mechanism for the time being."
+fi
+
+typeset -r FILE_VDEV="$TESTDIR/file_vdev"
+typeset pid
+
+log_onexit cleanup
+
+log_must mkdir "$TESTDIR"
+log_must truncate -s 10G "$FILE_VDEV"
+log_must zpool create -f $TESTPOOL "$FILE_VDEV"
+
+log_must zpool trim -r 2G $TESTPOOL "$FILE_VDEV"
+
+log_bkgrnd zpool wait -t trim $TESTPOOL
+pid=$!
+
+check_while_waiting $pid "trim_in_progress $TESTPOOL"
+
+log_pass "'zpool wait -t trim' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh
new file mode 100755
index 0000000..26e1aa6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' works when a trim operation is canceled.
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Start trimming the vdev in the pool, setting the rate low enough that the
+#    operation won't complete before the test finishes.
+# 3. Start 'zpool wait'.
+# 4. Wait a few seconds and then check that the wait process is actually
+#    waiting.
+# 5. Cancel the trim.
+# 6. Check that the wait process returns reasonably promptly.
+# 7. Repeat 3-7, except pause the trim instead of canceling it.
+#
+
+function cleanup
+{
+	kill_if_running $pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	[[ -d "$TESTDIR" ]] && log_must rm -r "$TESTDIR"
+}
+
+function do_test
+{
+	typeset stop_cmd=$1
+
+	log_must zpool trim -r 1M $TESTPOOL "$FILE_VDEV"
+
+	log_bkgrnd zpool wait -t trim $TESTPOOL
+	pid=$!
+
+	# Make sure that we are really waiting
+	log_must sleep 3
+	proc_must_exist $pid
+
+	# Stop trimming and make sure process returns
+	log_must eval "$stop_cmd"
+	bkgrnd_proc_succeeded $pid
+}
+
+if is_freebsd; then
+	log_unsupported "FreeBSD has no hole punching mechanism for the time being."
+fi
+
+typeset pid
+typeset -r FILE_VDEV="$TESTDIR/file_vdev1"
+
+log_onexit cleanup
+
+log_must mkdir "$TESTDIR"
+log_must truncate -s 10G "$FILE_VDEV"
+log_must zpool create -f $TESTPOOL "$FILE_VDEV"
+
+do_test "zpool trim -c $TESTPOOL $FILE_VDEV"
+do_test "zpool trim -s $TESTPOOL $FILE_VDEV"
+
+log_pass "'zpool wait' works when trim is stopped before completion."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh
new file mode 100755
index 0000000..effccc1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh

@@ -0,0 +1,88 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# -w flag for 'zpool trim' waits for trimming to complete for all and only those
+# vdevs kicked off by that invocation.
+#
+# STRATEGY:
+# 1. Create a pool with 3 vdevs.
+# 2. Start trimming vdevs 1 and 2 with one invocation of 'zpool trim -w'
+# 3. Start trimming vdev 3 with a second invocation of 'zpool trim -w'
+# 4. Cancel the trim of vdev 1. Check that neither waiting process exits.
+# 5. Cancel the trim of vdev 3. Check that only the second waiting process
+#    exits.
+# 6. Cancel the trim of vdev 2. Check that the first waiting process exits.
+#
+
+function cleanup
+{
+	kill_if_running $trim12_pid
+	kill_if_running $trim3_pid
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+	[[ -d "$TESTDIR" ]] && log_must rm -r "$TESTDIR"
+}
+
+if is_freebsd; then
+	log_unsupported "FreeBSD has no hole punching mechanism for the time being."
+fi
+
+typeset trim12_pid trim3_pid
+typeset -r VDEV1="$TESTDIR/file_vdev1"
+typeset -r VDEV2="$TESTDIR/file_vdev2"
+typeset -r VDEV3="$TESTDIR/file_vdev3"
+
+log_onexit cleanup
+
+log_must mkdir "$TESTDIR"
+log_must truncate -s 10G "$VDEV1" "$VDEV2" "$VDEV3"
+log_must zpool create -f $TESTPOOL "$VDEV1" "$VDEV2" "$VDEV3"
+
+log_bkgrnd zpool trim -r 1M -w $TESTPOOL "$VDEV1" "$VDEV2"
+trim12_pid=$!
+log_bkgrnd zpool trim -r 1M -w $TESTPOOL "$VDEV3"
+trim3_pid=$!
+
+# Make sure that we are really waiting
+log_must sleep 3
+proc_must_exist $trim12_pid
+proc_must_exist $trim3_pid
+
+#
+# Cancel trim of one of disks started by trim12, make sure neither
+# process exits
+#
+log_must zpool trim -c $TESTPOOL "$VDEV1"
+proc_must_exist $trim12_pid
+proc_must_exist $trim3_pid
+
+#
+# Cancel trim started by trim3, make sure that process exits, but
+# trim12 doesn't
+#
+log_must zpool trim -c $TESTPOOL "$VDEV3"
+proc_must_exist $trim12_pid
+bkgrnd_proc_succeeded $trim3_pid
+
+# Cancel last trim started by trim12, make sure it returns.
+log_must zpool trim -c $TESTPOOL "$VDEV2"
+bkgrnd_proc_succeeded $trim12_pid
+
+log_pass "'zpool trim -w' works."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_usage.ksh b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_usage.ksh
new file mode 100755
index 0000000..2d6f897
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_usage.ksh

@@ -0,0 +1,47 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool wait' behaves sensibly when invoked incorrectly.
+#
+# STRATEGY:
+# 1. Invoke 'zpool wait' incorrectly and check that it exits with a non-zero
+#    status.
+# 2. Invoke 'zpool wait' with missing or bad arguments and check that it prints
+#    some sensible error message.
+#
+
+function cleanup {
+	poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_onexit cleanup
+log_must zpool create $TESTPOOL $DISK1
+
+log_mustnot zpool wait
+
+zpool wait 2>&1 | grep -i usage || \
+    log_fail "Usage message did not contain the word 'usage'."
+zpool wait -t scrub fakepool 2>&1 | grep -i 'no such pool' || \
+    log_fail "Error message did not contain phrase 'no such pool'."
+zpool wait -t foo $TESTPOOL 2>&1 | grep -i 'invalid activity' || \
+    log_fail "Error message did not contain phrase 'invalid activity'."
+
+log_pass "'zpool wait' behaves sensibly when invoked incorrectly."

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am
index 49138d9..2d38e65 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/Makefile.am

@@ -45,7 +45,8 @@
 	zpool_upgrade_001_neg.ksh \
 	arcstat_001_pos.ksh \
 	arc_summary_001_pos.ksh \
-	arc_summary_002_neg.ksh
+	arc_summary_002_neg.ksh \
+	zpool_wait_privilege.ksh
 
 dist_pkgdata_DATA = \
 	misc.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh
index a445fbb..befbea9 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/arc_summary_001_pos.ksh

@@ -48,6 +48,9 @@
 	set -A args  "" "-a" "-d" "-p 1"
 fi
 
+# Without this, the below checks aren't going to work the way we hope...
+set -o pipefail
+
 typeset -i i=0
 while [[ $i -lt ${#args[*]} ]]; do
 	log_must eval "arc_summary ${args[i]} > /dev/null"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
index 06d211c..1a96ff5 100644
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg

@@ -41,7 +41,7 @@
 	# zfs get/set subcommands - ordered as per the list above so we
 	# can iterate over both sets in an array
 	PROP_VALS="\
-		posixacl	on		 \
+		posix		on		 \
 		fletcher2	on				on \
 		on		legacy		none		on \
 		128K		none		on		 \
@@ -49,11 +49,37 @@
 
 	# these are an alternate set of property values
 	PROP_ALTVALS="\
-		noacl		off		 \
+		nfsv4		off		 \
 		fletcher4	lzjb				off \
 		off		/tmp/zfstest	100M		off \
 		512		10m		off		 \
 		hidden"
+elif is_freebsd; then
+	PROP_NAMES="\
+		acltype		atime		 \
+		checksum	compression			devices \
+		exec		mountpoint	quota		readonly \
+		recordsize	reservation	setuid		 \
+		snapdir"
+
+	# these are a set of values we apply, for use when testing the
+	# zfs get/set subcommands - ordered as per the list above so we
+	# can iterate over both sets in an array
+	PROP_VALS="\
+		posix		on		 \
+		fletcher2	on				on \
+		on		legacy		none		on \
+		128K		none		on		 \
+		visible"
+
+	# these are an alternate set of property values
+	PROP_ALTVALS="\
+		nfsv4		off		 \
+		fletcher4	lzjb				off \
+		off		/tmp/zfstest	100M		off \
+		512		10m		off		 \
+		hidden"
+
 else
 	# these are the set of setable ZFS properties
 	PROP_NAMES="\

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zdb_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zdb_001_neg.ksh
index 579ab12..3adfc59 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zdb_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zdb_001_neg.ksh

@@ -56,11 +56,7 @@
 
 function cleanup
 {
-	if [ -e $TEST_BASE_DIR/zdb_001_neg.$$.txt ]
-	then
-		rm $TEST_BASE_DIR/zdb_001_neg.$$.txt
-	fi
-
+	rm -f $TEST_BASE_DIR/zdb_001_neg.$$.txt $TEST_BASE_DIR/zdb.$$
 }
 
 verify_runnable "global"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh
index 46171ca..bfe8cf4 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_001_neg.ksh

@@ -44,10 +44,7 @@
 
 function cleanup
 {
-	if [ -e "$TEMPFILE" ]
-	then
-		rm -f "$TEMPFILE"
-	fi
+	rm -f "$TEMPFILE"
 }
 
 log_onexit cleanup
@@ -55,7 +52,7 @@
 
 TEMPFILE="$TEST_BASE_DIR/zfs_001_neg.$$.txt"
 
-eval "zfs > $TEMPFILE 2>&1"
+zfs > $TEMPFILE 2>&1
 log_must grep "usage: zfs command args" "$TEMPFILE"
 
 log_must eval "awk '{if (length(\$0) > 80) exit 1}' < $TEMPFILE"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_share_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_share_001_neg.ksh
index 3f120c2..14c35b3 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_share_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_share_001_neg.ksh

@@ -45,7 +45,7 @@
 
 verify_runnable "global"
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_unsupported "Requires additional dependencies"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_unshare_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_unshare_001_neg.ksh
index 72ed1f5..7ae86fc 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_unshare_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zfs_unshare_001_neg.ksh

@@ -45,7 +45,7 @@
 
 verify_runnable "global"
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_unsupported "Requires additional dependencies"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zpool_wait_privilege.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zpool_wait_privilege.ksh
new file mode 100755
index 0000000..42a2dd2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/misc/zpool_wait_privilege.ksh

@@ -0,0 +1,35 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#
+# zpool wait works when run as an unprivileged user
+#
+
+verify_runnable "global"
+
+log_must zpool wait $TESTPOOL
+
+# Make sure printing status works as unprivileged user.
+output=$(zpool wait -H $TESTPOOL 1) || \
+    log_fail "'zpool wait -H $TESTPOOL 1' failed"
+# There should be one line of status output in a pool with no activity.
+log_must eval '[[ $(wc -l <<<$output) -ge 1 ]]'
+
+log_pass "zpool wait works when run as a user"

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_002_pos.ksh
index 382b2cb..4951097 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_002_pos.ksh

@@ -74,7 +74,7 @@
 fi
 
 #
-# datsets ordered by checksum options (note, Orange, Carrot & Banana have the
+# datasets ordered by checksum options (note, Orange, Carrot & Banana have the
 # same checksum options, so ZFS should revert to sorting them alphabetically by
 # name)
 #

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh
index 8e9009b..0f60113 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/zfs_list/zfs_list_007_pos.ksh

@@ -37,7 +37,7 @@
 #
 # STRATEGY:
 #	1. 'zfs list -d <n>' to get the output.
-#	2. 'zfs list -r|egrep' to get the expected output.
+#	2. 'zfs list -r|grep' to get the expected output.
 #	3. Compare the two outputs, they should be same.
 #
 
@@ -50,8 +50,7 @@
 
 function cleanup
 {
-	log_must rm -f $DEPTH_OUTPUT
-	log_must rm -f $EXPECT_OUTPUT
+	log_must rm -f $DEPTH_OUTPUT $EXPECT_OUTPUT
 }
 
 log_onexit cleanup
@@ -76,10 +75,10 @@
 			log_must eval "zfs list -H -d $dp -o name -t ${fs_type[$fs]} $DEPTH_FS > $DEPTH_OUTPUT"
 			[[ -s "$DEPTH_OUTPUT" ]] && \
 				log_fail "$DEPTH_OUTPUT should be null."
-			log_mustnot zfs list -rH -o name -t ${fs_type[$fs]} $DEPTH_FS | egrep -e '$eg_opt'
+			log_mustnot zfs list -rH -o name -t ${fs_type[$fs]} $DEPTH_FS | grep -E "$eg_opt"
 		else
 			log_must eval "zfs list -H -d $dp -o name -t ${fs_type[$fs]} $DEPTH_FS > $DEPTH_OUTPUT"
-			log_must eval "zfs list -rH -o name -t ${fs_type[$fs]} $DEPTH_FS | egrep -e '$eg_opt' > $EXPECT_OUTPUT"
+			log_must eval "zfs list -rH -o name -t ${fs_type[$fs]} $DEPTH_FS | grep -E '$eg_opt' > $EXPECT_OUTPUT"
 			log_must diff $DEPTH_OUTPUT $EXPECT_OUTPUT
 		fi
 		(( fs+=1 ))

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_homedir.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_homedir.ksh
index 5cb50fd..22450d8 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_homedir.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_homedir.ksh

@@ -30,7 +30,7 @@
 #
 # STRATEGY:
 #	1. Change HOME to /var/tmp
-#	2. Make a simple script that echos a key value pair
+#	2. Make a simple script that echoes a key value pair
 #	   in /var/tmp/.zpool.d
 #	3. Make sure it can be run with -c
 #	4. Remove the script we created

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_searchpath.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_searchpath.ksh
index 1197ea2..11f5135 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_searchpath.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_-c_searchpath.ksh

@@ -30,7 +30,7 @@
 #
 # STRATEGY:
 #	1. Set ZPOOL_SCRIPTS_PATH to contain a couple of non-default dirs
-#	2. Make a simple script that echos a key value pair in each dir
+#	2. Make a simple script that echoes a key value pair in each dir
 #	3. Make sure scripts can be run with -c
 #	4. Remove the scripts we created
 

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh
index 4cc3deb..5363043 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh

@@ -30,7 +30,7 @@
 #
 # STRATEGY:
 #	1. Change HOME to /var/tmp
-#	2. Make a simple script that echos a key value pair
+#	2. Make a simple script that echoes a key value pair
 #	   in /var/tmp/.zpool.d
 #	3. Make sure it can be run with -c
 #	4. Remove the script we created

diff --git a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh
index a075b9a..3f64fdf 100755
--- a/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh

@@ -30,7 +30,7 @@
 #
 # STRATEGY:
 #	1. Set ZPOOL_SCRIPTS_PATH to contain a couple of non-default dirs
-#	2. Make a simple script that echos a key value pair in each dir
+#	2. Make a simple script that echoes a key value pair in each dir
 #	3. Make sure scripts can be run with -c
 #	4. Remove the scripts we created
 

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/Makefile.am b/zfs/tests/zfs-tests/tests/functional/compression/Makefile.am
index 25a5bca..92a9732 100644
--- a/zfs/tests/zfs-tests/tests/functional/compression/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/compression/Makefile.am

@@ -5,7 +5,11 @@
 	compress_001_pos.ksh \
 	compress_002_pos.ksh \
 	compress_003_pos.ksh \
-	compress_004_pos.ksh
+	compress_004_pos.ksh \
+	l2arc_compressed_arc.ksh \
+	l2arc_compressed_arc_disabled.ksh \
+	l2arc_encrypted.ksh \
+	l2arc_encrypted_no_compressed_arc.ksh
 
 dist_pkgdata_DATA = \
 	compress.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/compress_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/compression/compress_001_pos.ksh
index b35b177..fe3a3ac 100755
--- a/zfs/tests/zfs-tests/tests/functional/compression/compress_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/compression/compress_001_pos.ksh

@@ -21,14 +21,11 @@
 #
 
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2007, Sun Microsystems Inc. All rights reserved.
+# Copyright (c) 2013, 2016, Delphix. All rights reserved.
 # Use is subject to license terms.
 #
 
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/compression/compress.cfg
 

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/compress_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/compression/compress_003_pos.ksh
index 713f206..d5b7256 100755
--- a/zfs/tests/zfs-tests/tests/functional/compression/compress_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/compression/compress_003_pos.ksh

@@ -21,14 +21,14 @@
 #
 
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2007, Sun Microsystems Inc. All rights reserved.
+# Copyright (c) 2013, 2016, Delphix. All rights reserved.
+# Copyright (c) 2019, Kjeld Schouten-Lebbing. All rights reserved.
 # Use is subject to license terms.
 #
 
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
 
+. $STF_SUITE/include/properties.shlib
 . $STF_SUITE/include/libtest.shlib
 
 #
@@ -62,7 +62,7 @@
 
 for propname in "compression" "compress"
 do
-	for value in $(get_compress_opts zfs_compress)
+	for value in "${compress_prop_vals[@]:1}"
 	do
 		log_must zfs set $propname=$value $fs
 		if [[ $value == "gzip-6" ]]; then

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/compress_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/compression/compress_004_pos.ksh
index 29d4b3a..b924bcd 100755
--- a/zfs/tests/zfs-tests/tests/functional/compression/compress_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/compression/compress_004_pos.ksh

@@ -21,14 +21,13 @@
 #
 
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2007, Sun Microsystems Inc. All rights reserved.
+# Copyright (c) 2013, 2016, Delphix. All rights reserved.
+# Copyright (c) 2019, Kjeld Schouten-Lebbing. All Rights Reserved.
 # Use is subject to license terms.
 #
 
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
+. $STF_SUITE/include/properties.shlib
 . $STF_SUITE/include/libtest.shlib
 
 #
@@ -94,7 +93,7 @@
 
 for propname in "compression" "compress"
 do
-	for value in $(get_compress_opts zfs_compress)
+	for value in "${compress_prop_vals[@]:1}"
 	do
 		log_must zfs set compression=$value $fs
 		real_val=$(get_prop $propname $fs)

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh
new file mode 100755
index 0000000..5980ce1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh

@@ -0,0 +1,97 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+export SIZE=1G
+export VDIR=$TESTDIR/disk.persist_l2arc
+export VDEV="$VDIR/a"
+export VDEV_CACHE="$VDIR/b"
+
+# fio options
+export DIRECTORY=/$TESTPOOL-l2arc
+export NUMJOBS=4
+export RUNTIME=30
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+
+#
+# DESCRIPTION:
+#	System with compressed_arc disabled succeeds at reading from L2ARC
+#
+# STRATEGY:
+#	1. Enable compressed_arc.
+#	2. Create pool with a cache device and compression enabled.
+#	3. Read the number of L2ARC checksum failures.
+#	4. Create a random file in that pool and random read for 30 sec.
+#	5. Read the number of L2ARC checksum failures.
+#
+
+verify_runnable "global"
+
+log_assert "L2ARC with compressed_arc enabled succeeds."
+
+origin_carc_setting=$(get_tunable COMPRESSED_ARC_ENABLED)
+
+function cleanup
+{
+	if poolexists $TESTPOOL-l2arc ; then
+		destroy_pool $TESTPOOL-l2arc
+	fi
+
+	log_must set_tunable64 COMPRESSED_ARC_ENABLED $origin_carc_setting
+}
+log_onexit cleanup
+
+# Enable Compressed ARC so that in-ARC and on-disk will match
+log_must set_tunable64 COMPRESSED_ARC_ENABLED 1
+
+log_must rm -rf $VDIR
+log_must mkdir -p $VDIR
+log_must mkfile $SIZE $VDEV
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -O compression=lz4 -f $TESTPOOL-l2arc $VDEV cache $VDEV_CACHE
+
+l2_cksum_bad_start=$(get_arcstat l2_cksum_bad)
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+l2_cksum_bad_end=$(get_arcstat l2_cksum_bad)
+
+log_note "L2ARC Failed Checksums before: $l2_cksum_bad_start After:"\
+	"$l2_cksum_bad_end"
+log_must test $(( $l2_cksum_bad_end - $l2_cksum_bad_start )) -eq 0
+
+log_must zpool destroy -f $TESTPOOL-l2arc
+
+log_pass "L2ARC with compressed_arc enabled does not result in checksum errors."

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc_disabled.ksh b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc_disabled.ksh
new file mode 100755
index 0000000..4c3b6a6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc_disabled.ksh

@@ -0,0 +1,98 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+export SIZE=1G
+export VDIR=$TESTDIR/disk.persist_l2arc
+export VDEV="$VDIR/a"
+export VDEV_CACHE="$VDIR/b"
+
+# fio options
+export DIRECTORY=/$TESTPOOL-l2arc
+export NUMJOBS=4
+export RUNTIME=30
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+
+#
+# DESCRIPTION:
+#	System with compressed_arc disabled succeeds at reading from L2ARC
+#
+# STRATEGY:
+#	1. Disable compressed_arc.
+#	2. Create pool with a cache device and compression enabled.
+#	3. Read the number of L2ARC checksum failures.
+#	4. Create a random file in that pool and random read for 30 sec.
+#	5. Read the number of L2ARC checksum failures.
+#
+
+verify_runnable "global"
+
+log_assert "L2ARC with compressed_arc disabled succeeds."
+
+origin_carc_setting=$(get_tunable COMPRESSED_ARC_ENABLED)
+
+function cleanup
+{
+	if poolexists $TESTPOOL-l2arc ; then
+		destroy_pool $TESTPOOL-l2arc
+	fi
+
+	log_must set_tunable64 COMPRESSED_ARC_ENABLED $origin_carc_setting
+}
+log_onexit cleanup
+
+log_must rm -rf $VDIR
+log_must mkdir -p $VDIR
+log_must mkfile $SIZE $VDEV
+
+# Disable Compressed ARC so that in-ARC and on-disk will not match
+log_must set_tunable64 COMPRESSED_ARC_ENABLED 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -O compression=lz4 -f $TESTPOOL-l2arc $VDEV cache $VDEV_CACHE
+
+l2_cksum_bad_start=$(get_arcstat l2_cksum_bad)
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+l2_cksum_bad_end=$(get_arcstat l2_cksum_bad)
+
+log_note "L2ARC Failed Checksums before: $l2_cksum_bad_start After:"\
+	"$l2_cksum_bad_end"
+log_must test $(( $l2_cksum_bad_end - $l2_cksum_bad_start )) -eq 0
+
+log_must zpool destroy -f $TESTPOOL-l2arc
+
+log_pass "L2ARC with compressed_arc disabled does not result in checksum"\
+	"errors."

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh
new file mode 100755
index 0000000..fb460da
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh

@@ -0,0 +1,103 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+export SIZE=1G
+export VDIR=$TESTDIR/disk.persist_l2arc
+export VDEV="$VDIR/a"
+export VDEV_CACHE="$VDIR/b"
+export PASSPHRASE="password"
+
+# fio options
+export DIRECTORY=/$TESTPOOL-l2arc/encrypted
+export NUMJOBS=4
+export RUNTIME=30
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+
+#
+# DESCRIPTION:
+#	System with compressed_arc disabled succeeds at reading from L2ARC
+#
+# STRATEGY:
+#	1. Enable compressed_arc.
+#	2. Create pool with a cache device, encryption, and compression enabled.
+#	3. Read the number of L2ARC checksum failures.
+#	4. Create a random file in that pool and random read for 30 sec.
+#	5. Read the number of L2ARC checksum failures.
+#
+
+verify_runnable "global"
+
+log_assert "L2ARC with encryption enabled succeeds."
+
+origin_carc_setting=$(get_tunable COMPRESSED_ARC_ENABLED)
+
+function cleanup
+{
+	if poolexists $TESTPOOL-l2arc ; then
+		destroy_pool $TESTPOOL-l2arc
+	fi
+
+	log_must set_tunable64 COMPRESSED_ARC_ENABLED $origin_carc_setting
+}
+log_onexit cleanup
+
+# Enable Compressed ARC so that in-ARC and on-disk will match
+log_must set_tunable64 COMPRESSED_ARC_ENABLED 1
+
+log_must rm -rf $VDIR
+log_must mkdir -p $VDIR
+log_must mkfile $SIZE $VDEV
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -O compression=zstd -f $TESTPOOL-l2arc $VDEV cache $VDEV_CACHE
+
+log_must eval "echo $PASSPHRASE | zfs create -o compression=zstd " \
+	"-o encryption=on -o keyformat=passphrase -o keylocation=prompt " \
+	"$TESTPOOL-l2arc/encrypted"
+
+l2_cksum_bad_start=$(get_arcstat l2_cksum_bad)
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+l2_cksum_bad_end=$(get_arcstat l2_cksum_bad)
+
+log_note "L2ARC Failed Checksums before: $l2_cksum_bad_start After:"\
+	"$l2_cksum_bad_end"
+log_must test $(( $l2_cksum_bad_end - $l2_cksum_bad_start )) -eq 0
+
+log_must zpool destroy -f $TESTPOOL-l2arc
+
+log_pass "L2ARC with encryption and compressed_arc enabled does not result in"\
+	"checksum errors."

diff --git a/zfs/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh
new file mode 100755
index 0000000..45ef489
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh

@@ -0,0 +1,103 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 The FreeBSD Foundation [1]
+#
+# [1] Portions of this software were developed by Allan Jude
+#     under sponsorship from the FreeBSD Foundation.
+
+. $STF_SUITE/include/libtest.shlib
+
+export SIZE=1G
+export VDIR=$TESTDIR/disk.persist_l2arc
+export VDEV="$VDIR/a"
+export VDEV_CACHE="$VDIR/b"
+export PASSPHRASE="password"
+
+# fio options
+export DIRECTORY=/$TESTPOOL-l2arc/encrypted
+export NUMJOBS=4
+export RUNTIME=30
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+
+#
+# DESCRIPTION:
+#	System with compressed_arc disabled succeeds at reading from L2ARC
+#
+# STRATEGY:
+#	1. Disable compressed_arc.
+#	2. Create pool with a cache device, encryption, and compression enabled.
+#	3. Read the number of L2ARC checksum failures.
+#	4. Create a random file in that pool and random read for 30 sec.
+#	5. Read the number of L2ARC checksum failures.
+#
+
+verify_runnable "global"
+
+log_assert "L2ARC with compressed_arc disabled succeeds."
+
+origin_carc_setting=$(get_tunable COMPRESSED_ARC_ENABLED)
+
+function cleanup
+{
+	if poolexists $TESTPOOL-l2arc ; then
+		destroy_pool $TESTPOOL-l2arc
+	fi
+
+	log_must set_tunable64 COMPRESSED_ARC_ENABLED $origin_carc_setting
+}
+log_onexit cleanup
+
+log_must rm -rf $VDIR
+log_must mkdir -p $VDIR
+log_must mkfile $SIZE $VDEV
+
+# Disable Compressed ARC so that in-ARC and on-disk will not match
+log_must set_tunable64 COMPRESSED_ARC_ENABLED 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -O compression=zstd -f $TESTPOOL-l2arc $VDEV cache $VDEV_CACHE
+
+log_must eval "echo $PASSPHRASE | zfs create -o compression=zstd " \
+	"-o encryption=on -o keyformat=passphrase -o keylocation=prompt " \
+	"$TESTPOOL-l2arc/encrypted"
+
+l2_cksum_bad_start=$(get_arcstat l2_cksum_bad)
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+l2_cksum_bad_end=$(get_arcstat l2_cksum_bad)
+
+log_note "L2ARC Failed Checksums before: $l2_cksum_bad_start After:"\
+	"$l2_cksum_bad_end"
+log_must test $(( $l2_cksum_bad_end - $l2_cksum_bad_start )) -eq 0
+
+log_must zpool destroy -f $TESTPOOL-l2arc
+
+log_pass "L2ARC with encryption enabled and compressed_arc disabled does not"\
+	"result in checksum errors."

diff --git a/zfs/tests/zfs-tests/tests/functional/crtime/Makefile.am b/zfs/tests/zfs-tests/tests/functional/crtime/Makefile.am
new file mode 100644
index 0000000..13e1c2d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/crtime/Makefile.am

@@ -0,0 +1,5 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/crtime
+dist_pkgdata_SCRIPTS = \
+	cleanup.ksh \
+	setup.ksh \
+	crtime_001_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh
new file mode 100755
index 0000000..3166bd6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/crtime/cleanup.ksh

@@ -0,0 +1,34 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh
new file mode 100755
index 0000000..4f98105
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/crtime/crtime_001_pos.ksh

@@ -0,0 +1,71 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#
+# Verify crtime is functional with xattr=on|sa
+
+verify_runnable "both"
+
+#
+# The statx system call was first added in the 4.11 Linux kernel.  Prior to this
+# change there was no mechanism to obtain birth time on Linux.  Therefore, this
+# test is expected to fail on older kernels and is skipped.
+#
+if is_linux; then
+	if [[ $(linux_version) -lt $(linux_version "4.11") ]]; then
+		log_unsupported "Requires statx(2) system call on Linux"
+	fi
+	typeset stat_version=$(stat --version | awk '{ print $NF; exit }')
+	if compare_version_gte "8.30" "${stat_version}"; then
+		log_unsupported "Requires coreutils stat(1) > 8.30 on Linux"
+	fi
+fi
+
+log_assert "Verify crtime is functional."
+
+set -A args "sa" "on"
+typeset TESTFILE=$TESTDIR/testfile
+
+for arg in ${args[*]}; do
+	log_note "Testing with xattr set to $arg"
+	log_must zfs set xattr=$arg $TESTPOOL
+	rm -f $TESTFILE
+	log_must touch $TESTFILE
+	typeset -i crtime=$(stat_crtime $TESTFILE)
+	typeset -i ctime=$(stat_ctime $TESTFILE)
+	if (( crtime != ctime )); then
+		log_fail "Incorrect crtime ($crtime != $ctime)"
+	fi
+	log_must touch $TESTFILE
+	typeset -i crtime1=$(stat_crtime $TESTFILE)
+	if (( crtime1 != crtime )); then
+		log_fail "touch modified crtime ($crtime1 != $crtime)"
+	fi
+done
+
+log_pass "Verified crtime is functional."

diff --git a/zfs/tests/zfs-tests/tests/functional/crtime/setup.ksh b/zfs/tests/zfs-tests/tests/functional/crtime/setup.ksh
new file mode 100755
index 0000000..fc5cec3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/crtime/setup.ksh

@@ -0,0 +1,35 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+default_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/ctime/ctime.c b/zfs/tests/zfs-tests/tests/functional/ctime/ctime.c
index 1cd1832..d01fa0d 100644
--- a/zfs/tests/zfs-tests/tests/functional/ctime/ctime.c
+++ b/zfs/tests/zfs-tests/tests/functional/ctime/ctime.c

@@ -31,7 +31,9 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#ifndef __FreeBSD__
 #include <sys/xattr.h>
+#endif
 #include <utime.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -251,6 +253,7 @@
 	return (ret);
 }
 
+#ifndef __FreeBSD__
 static int
 do_xattr(const char *pfile)
 {
@@ -268,6 +271,7 @@
 	}
 	return (ret);
 }
+#endif
 
 static void
 cleanup(void)
@@ -289,7 +293,9 @@
 	{ ST_CTIME,	"st_ctime",	do_chown 	},
 	{ ST_CTIME,	"st_ctime",	do_link		},
 	{ ST_CTIME,	"st_ctime",	do_utime	},
+#ifndef __FreeBSD__
 	{ ST_CTIME,	"st_ctime",	do_xattr	},
+#endif
 };
 
 #define	NCOMMAND (sizeof (timetest_table) / sizeof (timetest_table[0]))

diff --git a/zfs/tests/zfs-tests/tests/functional/deadman/Makefile.am b/zfs/tests/zfs-tests/tests/functional/deadman/Makefile.am
index 7b70ca0..097f23e 100644
--- a/zfs/tests/zfs-tests/tests/functional/deadman/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/deadman/Makefile.am

@@ -1,5 +1,6 @@
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman
 dist_pkgdata_SCRIPTS = \
+	deadman_ratelimit.ksh \
 	deadman_sync.ksh \
 	deadman_zio.ksh
 

diff --git a/zfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh b/zfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh
new file mode 100755
index 0000000..469117a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh

@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+# DESCRIPTION:
+#	Verify spa deadman events are rate limited
+#
+# STRATEGY:
+#	1. Reduce the zfs_slow_io_events_per_second to 1.
+#	2. Reduce the zfs_deadman_ziotime_ms to 1ms.
+#	3. Write data to a pool and read it back.
+#	4. Verify deadman events have been produced at a reasonable rate.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/deadman/deadman.cfg
+
+verify_runnable "both"
+
+function cleanup
+{
+	zinject -c all
+	default_cleanup_noexit
+
+	set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
+	set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT
+}
+
+log_assert "Verify spa deadman events are rate limited"
+log_onexit cleanup
+
+OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
+log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1
+log_must set_tunable64 DEADMAN_ZIOTIME_MS 1
+
+# Create a new pool in order to use the updated deadman settings.
+default_setup_noexit $DISK1
+log_must zpool events -c
+
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+log_must zinject -d $DISK1 -D 5:1 $TESTPOOL
+log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync
+
+events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman)
+log_note "events=$events"
+if [ "$events" -lt 1 ]; then
+	log_fail "Expect >= 1 deadman events, $events found"
+fi
+if [ "$events" -gt 10 ]; then
+	log_fail "Expect <= 10 deadman events, $events found"
+fi
+
+log_pass "Verify spa deadman events are rate limited"

diff --git a/zfs/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh b/zfs/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh
index a5537c4..fd6e8c8 100755
--- a/zfs/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh

@@ -46,17 +46,17 @@
 	log_must zinject -c all
 	default_cleanup_noexit
 
-	log_must set_tunable64 zfs_deadman_synctime_ms $SYNCTIME_DEFAULT
-	log_must set_tunable64 zfs_deadman_checktime_ms $CHECKTIME_DEFAULT
-	log_must set_tunable64 zfs_deadman_failmode $FAILMODE_DEFAULT
+	log_must set_tunable64 DEADMAN_SYNCTIME_MS $SYNCTIME_DEFAULT
+	log_must set_tunable64 DEADMAN_CHECKTIME_MS $CHECKTIME_DEFAULT
+	log_must set_tunable64 DEADMAN_FAILMODE $FAILMODE_DEFAULT
 }
 
 log_assert "Verify spa deadman detects a hung txg"
 log_onexit cleanup
 
-log_must set_tunable64 zfs_deadman_synctime_ms 5000
-log_must set_tunable64 zfs_deadman_checktime_ms 1000
-log_must set_tunable64 zfs_deadman_failmode "wait"
+log_must set_tunable64 DEADMAN_SYNCTIME_MS 5000
+log_must set_tunable64 DEADMAN_CHECKTIME_MS 1000
+log_must set_tunable64 DEADMAN_FAILMODE "wait"
 
 # Create a new pool in order to use the updated deadman settings.
 default_setup_noexit $DISK1
@@ -73,13 +73,17 @@
 log_must zpool sync
 
 # Log txg sync times for reference and the zpool event summary.
-log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
+if is_freebsd; then
+	log_must sysctl -n kstat.zfs.$TESTPOOL.txgs
+else
+	log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
+fi
 log_must zpool events
 
-# Verify at least 5 deadman events were logged.  The first after 5 seconds,
+# Verify at least 4 deadman events were logged.  The first after 5 seconds,
 # and another each second thereafter until the delay  is clearer.
 events=$(zpool events | grep -c ereport.fs.zfs.deadman)
-if [ "$events" -lt 5 ]; then
+if [ "$events" -lt 4 ]; then
 	log_fail "Expect >=5 deadman events, $events found"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/deadman/deadman_zio.ksh b/zfs/tests/zfs-tests/tests/functional/deadman/deadman_zio.ksh
index a61be99..c1cfc11 100755
--- a/zfs/tests/zfs-tests/tests/functional/deadman/deadman_zio.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/deadman/deadman_zio.ksh

@@ -49,19 +49,19 @@
 	log_must zinject -c all
 	default_cleanup_noexit
 
-	log_must set_tunable64 zfs_deadman_ziotime_ms $ZIOTIME_DEFAULT
-	log_must set_tunable64 zfs_deadman_checktime_ms $CHECKTIME_DEFAULT
-	log_must set_tunable64 zfs_deadman_failmode $FAILMODE_DEFAULT
+	log_must set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT
+	log_must set_tunable64 DEADMAN_CHECKTIME_MS $CHECKTIME_DEFAULT
+	log_must set_tunable64 DEADMAN_FAILMODE $FAILMODE_DEFAULT
 }
 
 log_assert "Verify zio deadman detects a hung zio"
 log_onexit cleanup
 
 # 1. Reduce the zfs_deadman_ziotime_ms to 5s.
-log_must set_tunable64 zfs_deadman_ziotime_ms 5000
+log_must set_tunable64 DEADMAN_ZIOTIME_MS 5000
 # 2. Reduce the zfs_deadman_checktime_ms to 1s.
-log_must set_tunable64 zfs_deadman_checktime_ms 1000
-log_must set_tunable64 zfs_deadman_failmode "wait"
+log_must set_tunable64 DEADMAN_CHECKTIME_MS 1000
+log_must set_tunable64 DEADMAN_FAILMODE "wait"
 
 # Create a new pool in order to use the updated deadman settings.
 default_setup_noexit $DISK1

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/cleanup.ksh
index 31a5759..1951c00 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/cleanup.ksh

@@ -43,8 +43,12 @@
 	fi
 fi
 
+if is_freebsd; then
+	log_must sysctl vfs.usermount=0
+fi
+
 if is_linux; then
-	log_must set_tunable64 zfs_admin_snapshot 0
+	log_must set_tunable64 ADMIN_SNAPSHOT 0
 fi
 
 default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib b/zfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib
index d088eaf..e39b015 100644
--- a/zfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/delegate_common.kshlib

@@ -101,7 +101,7 @@
 
 			log_note "Check $type $user $perm $dtst"
 			if ((ret != 0)) ; then
-				log_note "Fail: $user should have $perm " \
+				log_note "Fail: $user should have $perm" \
 					"on $dtst"
 				return 1
 			fi
@@ -379,7 +379,7 @@
 	typeset dtst=$3
 
 	typeset oldval
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset snap=$dtst@snap.$stamp
 
 	typeset -i ret=1
@@ -408,7 +408,7 @@
 	typeset fs=$3
 
 	typeset dtst
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset newfs=$fs/newfs.$stamp
 	typeset newvol=$fs/newvol.$stamp
 	typeset bak_user=$TEST_BASE_DIR/bak.$user.$stamp
@@ -480,9 +480,10 @@
 	typeset perm=$2
 	typeset dtst=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 
 	user_run $user zfs set "$user:ts=$stamp" $dtst
+	zpool sync ${dtst%%/*}
 	if [[ $stamp != $(get_prop "$user:ts" $dtst) ]]; then
 		return 1
 	fi
@@ -564,7 +565,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset newfs=$fs/nfs.$stamp
 	typeset newvol=$fs/nvol.$stamp
 
@@ -684,7 +685,7 @@
 
 # Verify that given the correct delegation, a regular user can:
 #	Take a snapshot of an unmounted dataset
-#	Take a snapshot of an mounted dataset
+# 	Take a snapshot of a mounted dataset
 #	Create a snapshot by making a directory in the .zfs/snapshot directory
 function verify_fs_snapshot
 {
@@ -692,7 +693,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset snap=$fs@snap.$stamp
 	typeset mntpt=$(get_prop mountpoint $fs)
 
@@ -716,12 +717,15 @@
 	fi
 	log_must zfs destroy $snap
 
-	typeset snapdir=${mntpt}/.zfs/snapshot/snap.$stamp
-	user_run $user mkdir $snapdir
-	if ! datasetexists $snap ; then
-		return 1
+	# Creating snaps via mkdir is not supported on FreeBSD
+	if ! is_freebsd; then
+		typeset snapdir=${mntpt}/.zfs/snapshot/snap.$stamp
+		user_run $user mkdir $snapdir
+		if ! datasetexists $snap ; then
+			return 1
+		fi
+		log_must zfs destroy $snap
 	fi
-	log_must zfs destroy $snap
 
 	return 0
 }
@@ -733,7 +737,7 @@
 	typeset fs=$3
 
 	typeset oldval
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset snap=$fs@snap.$stamp
 	typeset mntpt=$(get_prop mountpoint $fs)
 
@@ -766,7 +770,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basefs=${fs%/*}
 	typeset snap=$fs@snap.$stamp
 	typeset clone=$basefs/cfs.$stamp
@@ -811,7 +815,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basefs=${fs%/*}
 	typeset snap=$fs@snap.$stamp
 	typeset renamefs=$basefs/nfs.$stamp
@@ -894,7 +898,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset mntpt=$(get_prop mountpoint $fs)
 	typeset newmntpt=$TEST_BASE_DIR/mnt.$stamp
 
@@ -962,7 +966,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset mntpt=$(get_prop mountpoint $fs)
 	typeset newmntpt=$TEST_BASE_DIR/mnt.$stamp
 
@@ -1001,7 +1005,7 @@
 	typeset perm=$2
 	typeset fs=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basefs=${fs%/*}
 	typeset snap=$fs@snap.$stamp
 	typeset clone=$basefs/cfs.$stamp
@@ -1057,7 +1061,7 @@
 	typeset fs=$3
 
 	typeset oldval
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 
 	if ! ismounted $fs ; then
 		set -A modes "on" "off"
@@ -1368,7 +1372,7 @@
 	typeset perm=$2
 	typeset vol=$3
 
-	typeset stamp=${perm}.${user}.$(date +'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basevol=${vol%/*}
 	typeset snap=$vol@snap.$stamp
 
@@ -1393,7 +1397,7 @@
 	typeset perm=$2
 	typeset vol=$3
 
-	typeset stamp=${perm}.${user}.$(date+'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basevol=${vol%/*}
 	typeset snap=$vol@snap.$stamp
 
@@ -1428,7 +1432,7 @@
 	typeset perm=$2
 	typeset vol=$3
 
-	typeset stamp=${perm}.${user}.$(date+'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basevol=${vol%/*}
 	typeset snap=$vol@snap.$stamp
 	typeset clone=$basevol/cvol.$stamp
@@ -1474,7 +1478,7 @@
 	typeset perm=$2
 	typeset vol=$3
 
-	typeset stamp=${perm}.${user}.$(date+'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basevol=${vol%/*}
 	typeset snap=$vol@snap.$stamp
 	typeset clone=$basevol/cvol.$stamp
@@ -1521,7 +1525,7 @@
 	typeset perm=$2
 	typeset vol=$3
 
-	typeset stamp=${perm}.${user}.$(date+'%F-%T-%N')
+	typeset stamp=${perm}.${user}.$RANDOM
 	typeset basevol=${vol%/*}
 	typeset snap=$vol@snap.$stamp
 	typeset clone=$basevol/cvol.$stamp

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/setup.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/setup.ksh
index 149cf78..2f13da7 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/setup.ksh

@@ -33,7 +33,7 @@
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
 
-if ! is_linux; then
+if is_illumos; then
 	# check svc:/network/nis/client:default state
 	# disable it if the state is ON
 	# and the state will be restored during cleanup.ksh
@@ -44,6 +44,11 @@
 	fi
 fi
 
+if is_freebsd; then
+	# To pass user mount tests
+	log_must sysctl vfs.usermount=1
+fi
+
 cleanup_user_group
 
 # Create staff group and add two user to it
@@ -71,7 +76,7 @@
 DISK=${DISKS%% *}
 
 if is_linux; then
-	log_must set_tunable64 zfs_admin_snapshot 1
+	log_must set_tunable64 ADMIN_SNAPSHOT 1
 fi
 
 default_volume_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_001_pos.ksh
index 3db1af5..1e0ed80 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_001_pos.ksh

@@ -83,7 +83,7 @@
 	grep -w 'everyone' > /dev/null 2>&1
 then
 	group_added="TRUE"
-	log_must groupadd everyone
+	log_must add_group everyone
 fi
 
 for dtst in $DATASETS ; do
@@ -92,7 +92,7 @@
 done
 log_must restore_root_datasets
 if [[ $group_added == "TRUE" ]]; then
-	log_must groupdel everyone
+	log_must del_group everyone
 fi
 
 log_pass "everyone is always interpreted as keyword passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_002_pos.ksh
index 23ed806..fc603ea 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_002_pos.ksh

@@ -50,6 +50,14 @@
 {
 	if id $STAFF_GROUP > /dev/null 2>&1; then
 		log_must del_user $STAFF_GROUP
+		if is_freebsd; then
+			# pw userdel also deletes the group with the same name
+			# and has no way to opt out of this behavior (yet).
+			# Recreate the group as a workaround.
+			log_must add_group $STAFF_GROUP
+			log_must add_user $STAFF_GROUP $STAFF1
+			log_must add_user $STAFF_GROUP $STAFF2
+		fi
 	fi
 
 	restore_root_datasets
@@ -71,6 +79,14 @@
 log_must restore_root_datasets
 
 log_must del_user $STAFF_GROUP
+if is_freebsd; then
+	# pw userdel also deletes the group with the same name
+	# and has no way to opt out of this behavior (yet).
+	# Recreate the group as a workaround.
+	log_must add_group $STAFF_GROUP
+	log_must add_user $STAFF_GROUP $STAFF1
+	log_must add_user $STAFF_GROUP $STAFF2
+fi
 for dtst in $datasets ; do
 	log_must zfs allow $STAFF_GROUP $perms $dtst
 	log_must verify_perm $dtst $perms $STAFF1 $STAFF2

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_007_pos.ksh
index ea43fcf..f321325 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_007_pos.ksh

@@ -66,7 +66,7 @@
 log_must zfs create $grandchild
 
 #
-# Setting different permissions to the same set on two level.
+# Setting different permissions to the same set on two levels.
 # But only assign the user at one level.
 #
 log_must zfs allow -s @set $perms1 $ROOT_TESTFS
@@ -74,7 +74,8 @@
 log_must zfs allow $STAFF1 @set $childfs
 
 #
-# Verify only perms2 is valid to user on the level which he was assigned.
+# Verify that the user only has the permissions that they were assigned
+# in each filesystem.
 #
 log_must verify_noperm $ROOT_TESTFS $perms1 $STAFF1
 for fs in $childfs $grandchild ; do

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_008_pos.ksh
index 48de842..b0e1df3 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_008_pos.ksh

@@ -33,13 +33,13 @@
 
 #
 # DESCRIPTION:
-#	non-root user can allow any permissions which he is holding to
-#	other else user when it get 'allow' permission.
+#	A non-root user can use 'zfs allow' to delegate permissions that
+#	they have, if they also have the 'allow' permission.
 #
 # STRATEGY:
 #	1. Set two set permissions to two datasets locally.
-#	2. Verify the non-root user can allow permission if he has allow
-#	   permission.
+#	2. Verify the non-root user can use 'zfs allow' if they have
+#	'allow' permission.
 #
 
 verify_runnable "both"
@@ -69,8 +69,8 @@
 	log_must user_run $STAFF1 zfs allow -l $OTHER1 $perms1 $dtst
 	log_must verify_perm $dtst $perms1 $OTHER1
 
-	# $perms2 was not allow to $STAFF1, so he have no permission to
-	# delegate permission to other else.
+	# $perms2 was not allowed to $STAFF1, so they do not have
+	# permission to delegate permission to other users.
 	log_mustnot user_run $STAFF1 zfs allow $OTHER1 $perms2 $dtst
 	log_must verify_noperm $dtst $perms2 $OTHER1
 done

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh
index 45fdb5b..a6f1224 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_009_neg.ksh

@@ -51,7 +51,6 @@
 for dtst in $DATASETS ; do
 	log_mustnot eval "zfs allow -s @$longset $dtst"
 	# Create non-existent permission set
-	typeset timestamp=$(date +'%F-%R:%S')
 	log_mustnot zfs allow -s @non-existent $dtst
 	log_mustnot zfs allow $STAFF "atime,created,mounted" $dtst
 	log_mustnot zfs allow $dtst $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh
index 7b70e13..3a8ef5e 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh

@@ -87,6 +87,47 @@
 		promote		true		true	\
 		xattr		true		false	\
 		receive		true		false
+
+elif is_freebsd; then
+#				Results in	Results in
+#		Permission	Filesystem	Volume
+#
+# Removed for FreeBSD
+# - jailed	- jailing requires superuser privileges
+# - sharenfs	- sharing requires superuser privileges
+# - share	- sharing requires superuser privileges
+# - xattr	- Not supported on FreeBSD
+#
+set -A perms	create		true		false	\
+		snapshot	true		true	\
+		mount		true		false	\
+		send		true		true	\
+		allow		true		true	\
+		quota		true		false	\
+		reservation	true		true	\
+		dnodesize	true		false	\
+		recordsize	true		false	\
+		mountpoint	true		false	\
+		checksum	true		true	\
+		compression	true		true	\
+		canmount	true		false	\
+		atime		true		false	\
+		devices		true		false	\
+		exec		true		false	\
+		volsize		false		true	\
+		setuid		true		false	\
+		readonly	true		true	\
+		snapdir		true		false	\
+		userprop	true		true	\
+		aclmode		true		false	\
+		aclinherit	true		false	\
+		rollback	true		true	\
+		clone		true		true	\
+		rename		true		true	\
+		promote		true		true	\
+		receive		true		false   \
+		destroy		true		true
+
 else
 
 set -A perms	create		true		false	\

diff --git a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh
index 4da559b..fd95db9 100755
--- a/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh

@@ -61,6 +61,12 @@
 		devices exec volsize setuid readonly snapdir userprop \
 		rollback clone rename promote dnodesize \
 		zoned xattr receive destroy
+elif is_freebsd; then
+set -A perms	create snapshot mount send allow quota reservation \
+		recordsize mountpoint checksum compression canmount atime \
+		devices exec volsize setuid readonly snapdir userprop \
+		aclmode aclinherit rollback clone rename promote dnodesize \
+		jailed receive destroy
 else
 set -A perms	create snapshot mount send allow quota reservation \
 		recordsize mountpoint checksum compression canmount atime \

diff --git a/zfs/tests/zfs-tests/tests/functional/events/.gitignore b/zfs/tests/zfs-tests/tests/functional/events/.gitignore
new file mode 100644
index 0000000..ed5af03
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/events/.gitignore

@@ -0,0 +1 @@
+/zed_fd_spill-zedlet

diff --git a/zfs/tests/zfs-tests/tests/functional/events/Makefile.am b/zfs/tests/zfs-tests/tests/functional/events/Makefile.am
index e1fe490..92ce5db 100644
--- a/zfs/tests/zfs-tests/tests/functional/events/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/events/Makefile.am

@@ -1,11 +1,18 @@
+include $(top_srcdir)/config/Rules.am
+
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/events
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
 	events_001_pos.ksh \
 	events_002_pos.ksh \
-	zed_rc_filter.ksh
+	zed_rc_filter.ksh \
+	zed_fd_spill.ksh
 
 dist_pkgdata_DATA = \
 	events.cfg \
 	events_common.kshlib
+
+pkgexecdir = $(pkgdatadir)
+pkgexec_PROGRAMS = zed_fd_spill-zedlet
+zed_fd_spill_zedlet_SOURCES = zed_fd_spill-zedlet.c

diff --git a/zfs/tests/zfs-tests/tests/functional/events/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/events/cleanup.ksh
index 4905342..699bc28 100755
--- a/zfs/tests/zfs-tests/tests/functional/events/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/events/cleanup.ksh

@@ -26,6 +26,6 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-zed_cleanup all-debug.sh all-syslog.sh
+zed_cleanup all-debug.sh all-syslog.sh all-dumpfds
 
 default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/events/events_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/events/events_001_pos.ksh
index 5121f66..189cf43 100755
--- a/zfs/tests/zfs-tests/tests/functional/events/events_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/events/events_001_pos.ksh

@@ -94,22 +94,22 @@
     -e "resource.fs.zfs.statechange" \
     -e "sysevent.fs.zfs.config_sync" \
     "zpool offline $MPOOL $VDEV1"
-run_and_verify -p "$MPOOL" -d 10 \
+run_and_verify -p "$MPOOL" \
     -e "resource.fs.zfs.statechange" \
     -e "sysevent.fs.zfs.vdev_online" \
-    -e "sysevent.fs.zfs.resilver_start" \
-    -e "sysevent.fs.zfs.resilver_finish" \
-    -e "sysevent.fs.zfs.history_event" \
     -e "sysevent.fs.zfs.config_sync" \
+    -e "sysevent.fs.zfs.resilver_start" \
+    -e "sysevent.fs.zfs.history_event" \
+    -e "sysevent.fs.zfs.resilver_finish" \
     "zpool online $MPOOL $VDEV1"
 
 # Attach then detach a device from the mirror.
-run_and_verify -p "$MPOOL" -d 10 \
+run_and_verify -p "$MPOOL" \
     -e "sysevent.fs.zfs.vdev_attach" \
     -e "sysevent.fs.zfs.resilver_start" \
-    -e "sysevent.fs.zfs.resilver_finish" \
-    -e "sysevent.fs.zfs.history_event" \
     -e "sysevent.fs.zfs.config_sync" \
+    -e "sysevent.fs.zfs.history_event" \
+    -e "sysevent.fs.zfs.resilver_finish" \
     "zpool attach $MPOOL $VDEV1 $VDEV4"
 run_and_verify -p "$MPOOL" \
     -e "sysevent.fs.zfs.vdev_remove" \
@@ -117,20 +117,20 @@
     "zpool detach $MPOOL $VDEV4"
 
 # Replace a device
-run_and_verify -p "$MPOOL" -d 10 \
+run_and_verify -p "$MPOOL" \
     -e "sysevent.fs.zfs.vdev_attach" \
     -e "sysevent.fs.zfs.resilver_start" \
+    -e "sysevent.fs.zfs.config_sync" \
+    -e "sysevent.fs.zfs.history_event" \
     -e "sysevent.fs.zfs.resilver_finish" \
     -e "sysevent.fs.zfs.vdev_remove" \
-    -e "sysevent.fs.zfs.history_event" \
-    -e "sysevent.fs.zfs.config_sync" \
     "zpool replace -f $MPOOL $VDEV1 $VDEV4"
 
 # Scrub a pool.
-run_and_verify -p "$MPOOL" -d 10 \
+run_and_verify -p "$MPOOL" \
     -e "sysevent.fs.zfs.scrub_start" \
-    -e "sysevent.fs.zfs.scrub_finish" \
     -e "sysevent.fs.zfs.history_event" \
+    -e "sysevent.fs.zfs.scrub_finish" \
     "zpool scrub $MPOOL"
 
 # Export then import a pool
@@ -139,9 +139,9 @@
     -e "sysevent.fs.zfs.config_sync" \
     "zpool export $MPOOL"
 run_and_verify -p "$MPOOL" \
-    -e "sysevent.fs.zfs.pool_import" \
-    -e "sysevent.fs.zfs.history_event" \
     -e "sysevent.fs.zfs.config_sync" \
+    -e "sysevent.fs.zfs.history_event" \
+    -e "sysevent.fs.zfs.pool_import" \
     "zpool import -d $TEST_BASE_DIR $MPOOL"
 
 # Destroy the pool

diff --git a/zfs/tests/zfs-tests/tests/functional/events/events_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/events/events_002_pos.ksh
index 495b2bb..af2be33 100755
--- a/zfs/tests/zfs-tests/tests/functional/events/events_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/events/events_002_pos.ksh

@@ -50,11 +50,11 @@
 		[[ -f $file ]] && rm -f $file
 	done
 
-	log_must rm -f $TMP_EVENTS_ZED $TMP_EVENTS_ZED
+	log_must rm -f $TMP_EVENTS_ZED
 	log_must zed_stop
 }
 
-log_assert "Verify ZED handles missed events on when starting"
+log_assert "Verify ZED handles missed events when starting"
 log_onexit cleanup
 
 log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2
@@ -66,7 +66,7 @@
 
 # 2. Start the ZED and verify it handles missed events.
 log_must zed_start
-log_must file_wait $ZED_DEBUG_LOG
+log_must file_wait_event $ZED_DEBUG_LOG 'sysevent\.fs\.zfs\.config_sync' 150
 log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED
 
 awk -v event="sysevent.fs.zfs.pool_create" \
@@ -81,9 +81,7 @@
 # 4. Generate additional events.
 log_must zpool offline $MPOOL $VDEV1
 log_must zpool online $MPOOL $VDEV1
-while ! is_pool_resilvered $MPOOL; do
-	sleep 1
-done
+log_must zpool wait -t resilver $MPOOL
 
 log_must zpool scrub $MPOOL
 
@@ -94,12 +92,11 @@
 
 # 5. Start the ZED and verify it only handled the new missed events.
 log_must zed_start
-log_must file_wait $ZED_DEBUG_LOG 15
+log_must file_wait_event $ZED_DEBUG_LOG 'sysevent\.fs\.zfs\.resilver_finish' 150
 log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED
 
-log_mustnot grep -q "sysevent.fs.zfs.pool_create" $TMP_EVENTS_ZED
+log_mustnot file_wait_event $ZED_DEBUG_LOG 'sysevent\.fs\.zfs\.pool_create' 30
 log_must grep -q "sysevent.fs.zfs.vdev_online" $TMP_EVENTS_ZED
 log_must grep -q "sysevent.fs.zfs.resilver_start" $TMP_EVENTS_ZED
-log_must grep -q "sysevent.fs.zfs.resilver_finish" $TMP_EVENTS_ZED
 
 log_pass "Verify ZED handles missed events on when starting"

diff --git a/zfs/tests/zfs-tests/tests/functional/events/events_common.kshlib b/zfs/tests/zfs-tests/tests/functional/events/events_common.kshlib
index 26afc10..9c58791 100644
--- a/zfs/tests/zfs-tests/tests/functional/events/events_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/events/events_common.kshlib

@@ -23,11 +23,35 @@
 # Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
 # Use is subject to license terms.
 #
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
 
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/events/events.cfg
 
 #
+# wait for 'event' to show up in the log 'file'
+function file_wait_event # file event timeout
+{
+	file=$1
+	event=$2
+	timeout=${3:-120}
+
+	SECONDS=0
+
+	until grep -q "^ZEVENT_CLASS=$event" $ZED_DEBUG_LOG ; do
+		if [[ $SECONDS -gt $timeout ]]; then
+			echo file_wait_event exceeded $SECONDS seconds
+			return 1
+		fi
+
+		sleep 1
+	done
+
+	return 0;
+}
+
+#
 # Wait for up to 'timeout' seconds for the 'file' to settle, i.e.
 # not be updated for a period of 'delay' seconds.
 #
@@ -41,6 +65,7 @@
 
 	while [ $(( $(date +%s) - $(stat -c %Y $file) )) -lt $delay ]; do
 		if [[ $SECONDS -gt $timeout ]]; then
+			echo file_wait exceeded $SECONDS seconds
 			return 1
 		fi
 
@@ -52,30 +77,22 @@
 
 function run_and_verify
 {
-	typeset delay event pool zedlog
+	typeset event pool
 	set -A events
 
-	while getopts "d:e:p:z:" opt; do
+	while getopts "e:p:" opt; do
 		case $opt in
-		d)
-			delay=$OPTARG
-			;;
 		e)
-			events[${#events[*]}+1]=$OPTARG
+			events+=("$OPTARG")
 			;;
 		p)
 			pool=$OPTARG
 			;;
-		z)
-			zedlog=$OPTARG
-			;;
 		esac
 	done
 	shift $(($OPTIND - 1))
 
 	pool=${pool:-$TESTPOOL}
-	delay=${delay:-3}
-	zedlog=${zedlog:-$ZED_DEBUG_LOG}
 	fullcmd="$1"
 	cmd=$(echo $fullcmd | awk '{print $1}')
 
@@ -87,21 +104,38 @@
 
 	# Remove any previous events from the logs.
 	log_must zpool events -c
-	log_must truncate -s 0 $zedlog
+	log_must truncate -s 0 $ZED_DEBUG_LOG
 
 	# Run the command as provided.
 	log_must eval "$fullcmd"
 
 	# Collect the new events and verify there are some.
 	log_must zpool sync -f
-	log_must file_wait $zedlog $delay
-	log_must cp $zedlog $TMP_EVENTS_ZED
 	log_must eval "zpool events >$TMP_EVENTS 2>/dev/null"
 	log_must eval "zpool events -v > $TMP_EVENTS_FULL 2>/dev/null"
 
 	log_must test -s $TMP_EVENTS
 	log_must test -s $TMP_EVENTS_FULL
-	log_must test -s $TMP_EVENTS_ZED
+
+	# If the only event is history then we don't observe zed debug log
+	if [[ "${events[0]}" != "sysevent.fs.zfs.history_event" ]]; then
+		# wait for all the non-history events to show up in the
+		# debug log, all-debug.sh filters history events.
+		for event in ${events[*]}; do
+			if [[ "$event" == \
+			    "sysevent.fs.zfs.history_event" ]]; then
+				continue
+			fi
+
+			log_must file_wait_event $ZED_DEBUG_LOG "$event"
+		done
+
+		log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED
+		log_must test -s $TMP_EVENTS_ZED
+
+		log_note "Events logged:"
+		grep "^ZEVENT_CLASS" $TMP_EVENTS_ZED
+	fi
 
 	log_note "Events generated:"
 	cat $TMP_EVENTS
@@ -118,6 +152,11 @@
 		    $TMP_EVENTS_FULL >$TMP_EVENT_FULL
 		log_must grep -q "pool = \"$pool\"" $TMP_EVENT_FULL
 
+		# all-debug.sh filters history events (seen in ZED_DEBUG_LOG)
+		if [[ "$event" == "sysevent.fs.zfs.history_event" ]]; then
+			continue
+		fi
+
 		# Verify the event was received by the ZED and logged.
 		awk -v event="$event" \
 		    'BEGIN{FS="\n"; RS=""} $0 ~ event { print $0 }' \

diff --git a/zfs/tests/zfs-tests/tests/functional/events/zed_fd_spill-zedlet.c b/zfs/tests/zfs-tests/tests/functional/events/zed_fd_spill-zedlet.c
new file mode 100644
index 0000000..c072f90
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/events/zed_fd_spill-zedlet.c

@@ -0,0 +1,36 @@
+/*
+ * Permission to use, copy, modify, and/or distribute this software for
+ * any purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+int main(void) {
+	if (fork()) {
+		int err;
+		wait(&err);
+		return (err);
+	}
+
+	char buf[64];
+	sprintf(buf, "/tmp/zts-zed_fd_spill-logdir/%d", getppid());
+	dup2(creat(buf, 0644), STDOUT_FILENO);
+
+	snprintf(buf, sizeof (buf), "/proc/%d/fd", getppid());
+	execlp("ls", "ls", buf, NULL);
+	_exit(127);
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/events/zed_fd_spill.ksh b/zfs/tests/zfs-tests/tests/functional/events/zed_fd_spill.ksh
new file mode 100755
index 0000000..8736a7f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/events/zed_fd_spill.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# DESCRIPTION:
+# Verify ZEDLETs only inherit the fds specified in the manpage
+#
+# STRATEGY:
+# 1. Inject a ZEDLET that dumps the fds it gets to a file.
+# 2. Generate some events.
+# 3. Read back the generated files and assert that there is no fd past 3,
+#    and there are exactly 4 fds.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/events/events_common.kshlib
+
+verify_runnable "both"
+
+function cleanup
+{
+	log_must rm -rf "$logdir"
+	log_must rm "/tmp/zts-zed_fd_spill-logdir"
+	log_must zed_stop
+}
+
+log_assert "Verify ZEDLETs inherit only the fds specified"
+log_onexit cleanup
+
+logdir="$(mktemp -d)"
+log_must ln -s "$logdir" /tmp/zts-zed_fd_spill-logdir
+
+self="$(readlink -f "$0")"
+log_must ln -s "${self%/*}/zed_fd_spill-zedlet" "${ZEDLET_DIR}/all-dumpfds"
+
+log_must zpool events -c
+log_must zed_stop
+log_must zed_start
+
+log_must truncate -s 0 $ZED_DEBUG_LOG
+log_must zpool scrub $TESTPOOL
+log_must zfs set compression=off $TESTPOOL/$TESTFS
+log_must wait_scrubbed $TESTPOOL
+log_must file_wait $ZED_DEBUG_LOG 3
+
+if [ -n "$(find "$logdir" -maxdepth 0 -empty)" ]; then
+	log_fail "Our ZEDLET didn't run!"
+fi
+log_must awk '
+	!/^[0123]$/ {
+		print FILENAME ": " $0
+		err=1
+	}
+	END {
+		exit err
+	}
+' "$logdir"/*
+wc -l "$logdir"/* | log_must awk '$1 != "4" && $2 != "total" {print; exit 1}'
+
+log_pass "ZED doesn't leak fds to ZEDLETs"

diff --git a/zfs/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh b/zfs/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh
index 44652ee..0bef0ef 100755
--- a/zfs/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh

@@ -49,6 +49,7 @@
 log_onexit cleanup
 
 log_must zpool events -c
+log_must zed_stop
 log_must zed_start
 
 # Backup our zed.rc

diff --git a/zfs/tests/zfs-tests/tests/functional/fallocate/Makefile.am b/zfs/tests/zfs-tests/tests/functional/fallocate/Makefile.am
new file mode 100644
index 0000000..86364d7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fallocate/Makefile.am

@@ -0,0 +1,7 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/fallocate
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	fallocate_prealloc.ksh \
+	fallocate_punch-hole.ksh \
+	fallocate_zero-range.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/fallocate/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/fallocate/cleanup.ksh
new file mode 100755
index 0000000..bdfa614
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fallocate/cleanup.ksh

@@ -0,0 +1,27 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_prealloc.ksh b/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_prealloc.ksh
new file mode 100755
index 0000000..7bb020f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_prealloc.ksh

@@ -0,0 +1,63 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test fallocate(2) preallocation.
+#
+# STRATEGY:
+# 1. Verify mode 0 fallocate is supported.
+# 2. Verify default 10% reserve space is honored by setting a quota.
+#
+
+verify_runnable "global"
+
+FILE=$TESTDIR/$TESTFILE0
+
+function cleanup
+{
+	log_must zfs set quota=none $TESTPOOL
+
+	[[ -e $TESTDIR ]] && log_must rm -Rf $TESTDIR/*
+}
+
+log_assert "Ensure sparse files can be preallocated"
+
+log_onexit cleanup
+
+# Pre-allocate a sparse 1GB file.
+log_must fallocate -l $((1024 * 1024 * 1024)) $FILE
+log_must rm -Rf $TESTDIR/*
+
+# Verify that an additional ~10% reserve space is required.
+log_must zfs set quota=100M $TESTPOOL
+log_mustnot fallocate -l $((150 * 1024 * 1024)) $FILE
+log_mustnot fallocate -l $((110 * 1024 * 1024)) $FILE
+log_must fallocate -l $((90 * 1024 * 1024)) $FILE
+
+log_pass "Ensure sparse files can be preallocated"

diff --git a/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh b/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh
new file mode 100755
index 0000000..92f4552
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh

@@ -0,0 +1,119 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2021 by The FreeBSD Foundation.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test hole-punching functionality
+#
+# STRATEGY:
+# 1. Create a dense file
+# 2. Punch an assortment of holes in the file and verify the result.
+#
+
+verify_runnable "global"
+
+#
+# Prior to __FreeBSD_version 1400032 there are no mechanism to punch hole in a
+# file on FreeBSD.  truncate -d support is required to call fspacectl(2) on
+# behalf of the script.
+#
+if is_freebsd; then
+	if [[ $(uname -K) -lt 1400032 ]]; then
+		log_unsupported "Requires fspacectl(2) support on FreeBSD"
+	fi
+	if truncate -d 2>&1 | grep "illegal option" > /dev/null; then
+		log_unsupported "Requires truncate(1) -d support on FreeBSD"
+	fi
+fi
+
+FILE=$TESTDIR/$TESTFILE0
+BLKSZ=$(get_prop recordsize $TESTPOOL)
+
+function cleanup
+{
+	[[ -e $TESTDIR ]] && log_must rm -f $FILE
+}
+
+function check_reported_size
+{
+	typeset expected_size=$1
+
+	if ! [ -e "${FILE}" ]; then
+		log_fail "$FILE does not exist"
+	fi
+		
+	reported_size=$(du "${FILE}" | awk '{print $1}')
+	if [ "$reported_size" != "$expected_size" ]; then
+		log_fail "Incorrect reported size: $reported_size != $expected_size"
+	fi
+}
+
+function check_apparent_size
+{
+	typeset expected_size=$1
+
+	apparent_size=$(stat_size "${FILE}")
+	if [ "$apparent_size" != "$expected_size" ]; then
+		log_fail "Incorrect apparent size: $apparent_size != $expected_size"
+	fi
+}
+
+log_assert "Ensure holes can be punched in files making them sparse"
+
+log_onexit cleanup
+
+# Create a dense file and check it is the correct size.
+log_must file_write -o create -f $FILE -b $BLKSZ -c 8
+sync_pool $TESTPOOL
+log_must check_reported_size 1027
+
+# Punch a hole for the first full block.
+log_must punch_hole 0 $BLKSZ $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 899
+
+# Partially punch a hole in the second block.
+log_must punch_hole $BLKSZ $((BLKSZ / 2)) $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 899
+
+# Punch a hole which overlaps the third and fourth block.
+log_must punch_hole $(((BLKSZ * 2) + (BLKSZ / 2))) $((BLKSZ)) $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 899
+
+# Punch a hole from the fifth block past the end of file.  The apparent
+# file size should not change since --keep-size is implied.
+apparent_size=$(stat_size $FILE)
+log_must punch_hole $((BLKSZ * 4)) $((BLKSZ * 10)) $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 387
+log_must check_apparent_size $apparent_size
+
+log_pass "Ensure holes can be punched in files making them sparse"

diff --git a/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh b/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh
new file mode 100755
index 0000000..e907b0f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh

@@ -0,0 +1,119 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2021 by The FreeBSD Foundation.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test FALLOC_FL_ZERO_RANGE functionality
+#
+# STRATEGY:
+# 1. Create a dense file
+# 2. Zero various ranges in the file and verify the result.
+#
+
+verify_runnable "global"
+
+if is_freebsd; then
+	log_unsupported "FreeBSD does not implement an analogue to ZERO_RANGE."
+fi
+
+FILE=$TESTDIR/$TESTFILE0
+BLKSZ=$(get_prop recordsize $TESTPOOL)
+
+function cleanup
+{
+	[[ -e $TESTDIR ]] && log_must rm -f $FILE
+}
+
+# Helpfully, this function expects kilobytes, and check_apparent_size expects bytes.
+function check_reported_size
+{
+	typeset expected_size=$1
+
+	if ! [ -e "${FILE}" ]; then
+		log_fail "$FILE does not exist"
+	fi
+		
+	reported_size=$(du "${FILE}" | awk '{print $1}')
+	if [ "$reported_size" != "$expected_size" ]; then
+		log_fail "Incorrect reported size: $reported_size != $expected_size"
+	fi
+}
+
+function check_apparent_size
+{
+	typeset expected_size=$1
+
+	apparent_size=$(stat_size "${FILE}")
+	if [ "$apparent_size" != "$expected_size" ]; then
+		log_fail "Incorrect apparent size: $apparent_size != $expected_size"
+	fi
+}
+
+log_assert "Ensure ranges can be zeroed in files"
+
+log_onexit cleanup
+
+# Create a dense file and check it is the correct size.
+log_must file_write -o create -f $FILE -b $BLKSZ -c 8
+sync_pool $TESTPOOL
+log_must check_reported_size 1027
+
+# Zero a range covering the first full block.
+log_must zero_range 0 $BLKSZ $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 899
+
+# Partially zero a range in the second block.
+log_must zero_range $BLKSZ $((BLKSZ / 2)) $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 899
+
+# Zero range which overlaps the third and fourth block.
+log_must zero_range $(((BLKSZ * 2) + (BLKSZ / 2))) $((BLKSZ)) $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 899
+
+# Zero range from the fifth block past the end of file, with --keep-size.
+# The apparent file size must not change, since we did specify --keep-size.
+apparent_size=$(stat_size $FILE)
+log_must fallocate --keep-size --zero-range --offset $((BLKSZ * 4)) --length $((BLKSZ * 10)) "$FILE"
+sync_pool $TESTPOOL
+log_must check_reported_size 387
+log_must check_apparent_size $apparent_size
+
+# Zero range from the fifth block past the end of file.  The apparent
+# file size should change since --keep-size is not implied, unlike
+# with PUNCH_HOLE.
+apparent_size=$(stat_size $FILE)
+log_must zero_range $((BLKSZ * 4)) $((BLKSZ * 10)) $FILE
+sync_pool $TESTPOOL
+log_must check_reported_size 387
+log_must check_apparent_size $((BLKSZ * 14))
+
+log_pass "Ensure ranges can be zeroed in files"

diff --git a/zfs/tests/zfs-tests/tests/functional/fallocate/setup.ksh b/zfs/tests/zfs-tests/tests/functional/fallocate/setup.ksh
new file mode 100755
index 0000000..586ac02
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fallocate/setup.ksh

@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+default_setup_noexit $DISK
+log_must zfs set compression=off $TESTPOOL
+log_pass
+

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/Makefile.am b/zfs/tests/zfs-tests/tests/functional/fault/Makefile.am
index f2fc068..ba0d7d6 100644
--- a/zfs/tests/zfs-tests/tests/functional/fault/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/fault/Makefile.am

@@ -4,6 +4,7 @@
 	cleanup.ksh \
 	auto_offline_001_pos.ksh \
 	auto_online_001_pos.ksh \
+	auto_online_002_pos.ksh \
 	auto_replace_001_pos.ksh \
 	auto_spare_001_pos.ksh \
 	auto_spare_002_pos.ksh \

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
index 1bf54b1..78eed0f 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh

@@ -24,29 +24,28 @@
 
 #
 # DESCRIPTION:
-# Testing Fault Management Agent ZED Logic - Physically removed device is
-# made unavail and onlined when reattached
+# Testing Fault Management Agent ZED Logic - Physically detached device is
+# made removed and onlined when reattached
 #
 # STRATEGY:
 # 1. Create a pool
 # 2. Simulate physical removal of one device
-# 3. Verify the device is unvailable
+# 3. Verify the device is removed when detached
 # 4. Reattach the device
 # 5. Verify the device is onlined
 # 6. Repeat the same tests with a spare device:
 #    zed will use the spare to handle the removed data device
 # 7. Repeat the same tests again with a faulted spare device:
-#    the removed data device should be unavailable
+#    the removed data device should be removed
 #
 # NOTE: the use of 'block_device_wait' throughout the test helps avoid race
 # conditions caused by mixing creation/removal events from partitioning the
 # disk (zpool create) and events from physically removing it (remove_disk).
 #
-# NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a
-# vdev to the unavailable state.  The ZED does receive a removal notification
-# but only relies on it to activate a hot spare.  Additional work is planned
-# to extend an existing ioctl interface to allow the ZED to transition the
-# vdev in to a removed state.
+# NOTE: the test relies on ZED to transit state to removed on device removed
+# event.  The ZED does receive a removal notification but only relies on it to
+# activate a hot spare.  Additional work is planned to extend an existing ioctl
+# interface to allow the ZED to transition the vdev in to a removed state.
 #
 verify_runnable "both"
 
@@ -54,7 +53,7 @@
 	# Add one 512b scsi_debug device (4Kn would generate IO errors)
 	# NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
 	# add 32m of fudge
-	load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) 1 1 1 '512b'
+	load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b'
 else
 	log_unsupported "scsi debug module unsupported"
 fi
@@ -85,10 +84,10 @@
     "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev"
 )
 
-log_must truncate -s $SPA_MINDEVSIZE $filedev1
-log_must truncate -s $SPA_MINDEVSIZE $filedev2
-log_must truncate -s $SPA_MINDEVSIZE $filedev3
-log_must truncate -s $SPA_MINDEVSIZE $sparedev
+log_must truncate -s $MINVDEVSIZE $filedev1
+log_must truncate -s $MINVDEVSIZE $filedev2
+log_must truncate -s $MINVDEVSIZE $filedev3
+log_must truncate -s $MINVDEVSIZE $sparedev
 
 for conf in "${poolconfs[@]}"
 do
@@ -104,8 +103,8 @@
 	log_must mkfile 1m $mntpnt/file
 	log_must zpool sync $TESTPOOL
 
-	# 3. Verify the device is unvailable.
-	log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
+	# 3. Verify the device is removed.
+	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
 
 	# 4. Reattach the device
 	insert_disk $removedev
@@ -123,6 +122,11 @@
 #    the removed data device
 for conf in "${poolconfs[@]}"
 do
+	# special vdev can not be replaced by a hot spare
+	if [[ $conf = *"special mirror"* ]]; then
+		continue
+	fi
+
 	# 1. Create a pool with a spare
 	log_must zpool create -f $TESTPOOL $conf
 	block_device_wait ${DEV_DSKDIR}/${removedev}
@@ -138,7 +142,7 @@
 
 	# 3. Verify the device is handled by the spare.
 	log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
-	log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
+	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
 
 	# 4. Reattach the device
 	insert_disk $removedev
@@ -173,8 +177,8 @@
 	log_must mkfile 1m $mntpnt/file
 	log_must zpool sync $TESTPOOL
 
-	# 4. Verify the device is unavailable
-	log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
+	# 4. Verify the device is removed
+	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
 
 	# 5. Reattach the device
 	insert_disk $removedev

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
index 03fc15a..36d30b5 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh

@@ -134,9 +134,8 @@
 		((timeout++))
 
 		sleep 1
-		zpool events $TESTPOOL \
-		    | egrep sysevent.fs.zfs.resilver_finish > /dev/null
-		if (($? == 0)); then
+		if zpool events $TESTPOOL \
+		    | grep -qF sysevent.fs.zfs.resilver_finish; then
 			log_note "Auto-online of $offline_disk is complete"
 			sleep 1
 			break

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh
new file mode 100755
index 0000000..60185ac
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh

@@ -0,0 +1,94 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
+# Copyright (c) 2019 by Delphix. All rights reserved.
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
+# Now with partitioned vdevs.
+#
+# STRATEGY:
+# 1. Partition a scsi_debug device for simulating removal
+# 2. Create a pool
+# 3. Offline disk
+# 4. ZED polls for an event change for online disk to be automatically
+#    added back to the pool.
+#
+verify_runnable "both"
+
+function cleanup
+{
+	poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
+	unload_scsi_debug
+}
+
+log_assert "Testing automated auto-online FMA test with partitioned vdev"
+
+log_onexit cleanup
+
+load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
+SDDEVICE=$(get_debug_device)
+zpool labelclear -f ${SDDEVICE}
+partition_disk ${SDSIZE} ${SDDEVICE} 1
+part=${SDDEVICE}1
+host=$(get_scsi_host ${SDDEVICE})
+
+block_device_wait /dev/${part}
+log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}
+
+# Add some data to the pool
+log_must mkfile ${FSIZE} /${TESTPOOL}/data
+
+remove_disk ${SDDEVICE}
+check_state ${TESTPOOL} "" "degraded" || \
+    log_fail "${TESTPOOL} is not degraded"
+
+# Clear zpool events
+log_must zpool events -c
+
+# Online disk
+insert_disk ${SDDEVICE} ${host}
+
+log_note "Delay for ZED auto-online"
+typeset -i timeout=0
+until is_pool_resilvered ${TESTPOOL}; do
+	if ((timeout++ == MAXTIMEOUT)); then
+		log_fail "Timeout occurred"
+	fi
+	sleep 1
+done
+log_note "Auto-online of ${SDDEVICE} is complete"
+
+# Validate auto-online was successful
+sleep 1
+check_state ${TESTPOOL} "" "online" || \
+    log_fail "${TESTPOOL} is not back online"
+
+log_must zpool destroy ${TESTPOOL}
+
+log_pass "Auto-online with partitioned vdev test successful"

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
index b6af1a3..a932671 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh

@@ -55,36 +55,59 @@
 
 TESTFILE="/$TESTPOOL/$TESTFS/testfile"
 
-for type in "mirror" "raidz" "raidz2"; do
-	# 1. Create a pool with hot spares
-	truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
-	log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
+for type in "mirror" "raidz" "raidz2" "draid:1s"; do
+	if [ "$type" = "draid:1s" ]; then
+		# 1. Create a dRAID pool with a distributed hot spare
+		#
+		# Corruption is injected in the file-2 instead of file-1
+		# vdev since the dRAID permutation at these offsets maps
+		# to distributed spare space and not data devices.
+		#
+		log_must truncate -s $MINVDEVSIZE $VDEV_FILES
+		log_must zpool create -f $TESTPOOL $type $VDEV_FILES
+		SPARE="draid1-0-0"
+		FAULT="$TEST_BASE_DIR/file-2"
+	else
+		# 1. Create a pool with hot spares
+		log_must truncate -s $MINVDEVSIZE $VDEV_FILES $SPARE_FILE
+		log_must zpool create -f $TESTPOOL $type $VDEV_FILES \
+		    spare $SPARE_FILE
+		SPARE=$SPARE_FILE
+		FAULT=$FAULT_FILE
+	fi
 
 	# 2. Create a filesystem with the primary cache disable to force reads
 	log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
 	log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
 
 	# 3. Write a file to the pool to be read back
-	log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
+	log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=64
 
 	# 4. Inject IO ERRORS on read with a zinject error handler
-	log_must zinject -d $FAULT_FILE -e io -T read $TESTPOOL
+	log_must zinject -d $FAULT -e io -T read $TESTPOOL
 	log_must cp $TESTFILE /dev/null
 
 	# 5. Verify the ZED kicks in a hot spare and expected pool/device status
 	log_note "Wait for ZED to auto-spare"
-	log_must wait_vdev_state $TESTPOOL $FAULT_FILE "FAULTED" 60
-	log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
-	log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
+	log_must wait_vdev_state $TESTPOOL $FAULT "FAULTED" 60
+	log_must wait_vdev_state $TESTPOOL $SPARE "ONLINE" 60
+	log_must wait_hotspare_state $TESTPOOL $SPARE "INUSE"
 	log_must check_state $TESTPOOL "" "DEGRADED"
 
+	# The ZED will use a sequential resilver for dRAID. Wait for the
+	# resilver and subsequent scrub to complete before moving on.
+	if [ "$type" = "draid:1s" ]; then
+		log_must wait_scrubbed $TESTPOOL
+	fi
+
 	# 6. Clear the fault
 	log_must zinject -c all
-	log_must zpool clear $TESTPOOL $FAULT_FILE
+	log_must zpool clear $TESTPOOL $FAULT
 
 	# 7. Verify the hot spare is available and expected pool/device status
-	log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
-	log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
+	log_must wait_vdev_state $TESTPOOL $FAULT "ONLINE" 60
+	log_must wait_hotspare_state $TESTPOOL $SPARE "AVAIL"
+
 	log_must is_pool_resilvered $TESTPOOL
 	log_must check_state $TESTPOOL "" "ONLINE"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
index 63aaead..e9517ba 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh

@@ -50,22 +50,26 @@
 
 log_onexit cleanup
 
-# Clear events from previous runs
-zed_events_drain
+# Events not supported on FreeBSD
+if ! is_freebsd; then
+	# Clear events from previous runs
+	zed_events_drain
+fi
 
 TESTFILE="/$TESTPOOL/$TESTFS/testfile"
 
 for type in "mirror" "raidz" "raidz2"; do
 	# 1. Create a pool with hot spares
-	truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
-	log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
+	log_must truncate -s $MINVDEVSIZE $VDEV_FILES $SPARE_FILE
+	log_must zpool create -f $TESTPOOL $type $VDEV_FILES \
+	    spare $SPARE_FILE
 
 	# 2. Create a filesystem with the primary cache disable to force reads
 	log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
 	log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
 
 	# 3. Write a file to the pool to be read back
-	log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
+	log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=64
 
 	# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
 	log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh
index e985751..f4fd21d 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh

@@ -60,7 +60,7 @@
 
 # 1. Create a pool from 512b devices and set "ashift" pool property accordingly
 for vdev in $SAFE_DEVICE $FAIL_DEVICE; do
-	truncate -s $SPA_MINDEVSIZE $vdev
+	truncate -s $MINVDEVSIZE $vdev
 done
 log_must zpool create -f $TESTPOOL mirror $SAFE_DEVICE $FAIL_DEVICE
 # NOTE: file VDEVs should be added as 512b devices, verify this "just in case"
@@ -71,7 +71,7 @@
 
 # 2. Add one 512e spare device (4Kn would generate IO errors on replace)
 # NOTE: must be larger than the existing 512b devices, add 32m of fudge
-load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) $SDHOSTS $SDTGTS $SDLUNS '512e'
+load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) $SDHOSTS $SDTGTS $SDLUNS '512e'
 SPARE_DEVICE=$(get_debug_device)
 log_must_busy zpool add $TESTPOOL spare $SPARE_DEVICE
 

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
index 25c23ae..8a9cf6f 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh

@@ -53,22 +53,53 @@
 log_assert "ZED should be able to handle multiple faulted devices"
 log_onexit cleanup
 
-# Clear events from previous runs
-zed_events_drain
+# Events not supported on FreeBSD
+if ! is_freebsd; then
+	# Clear events from previous runs
+	zed_events_drain
+fi
 
 FAULT_DEV1="$TEST_BASE_DIR/fault-dev1"
 FAULT_DEV2="$TEST_BASE_DIR/fault-dev2"
 SAFE_DEV1="$TEST_BASE_DIR/safe-dev1"
 SAFE_DEV2="$TEST_BASE_DIR/safe-dev2"
-DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2"
+SAFE_DEV3="$TEST_BASE_DIR/safe-dev3"
+SAFE_DEV4="$TEST_BASE_DIR/safe-dev4"
+DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2 $SAFE_DEV3 $SAFE_DEV4"
 SPARE_DEV1="$TEST_BASE_DIR/spare-dev1"
 SPARE_DEV2="$TEST_BASE_DIR/spare-dev2"
 SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2"
 
-for type in "mirror" "raidz" "raidz2" "raidz3"; do
-	# 1. Create a pool with two hot spares
-	truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
-	log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
+for type in "mirror" "raidz" "raidz2" "raidz3" "draid2:1s"; do
+	if [ "$type" = "draid2:1s" ]; then
+		# 1. Create a dRAID pool with a distributed and traditional
+		# hot spare to provide test coverage for both configurations.
+		#
+		# Corruption is injected in the third and fourth vdevs
+		# since the dRAID permutation at these offsets maps to
+		# distributed spare space and not data devices.
+		#
+		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEV1
+		log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
+		    $SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
+		    spare $SPARE_DEV1
+		SPARE1=$SPARE_DEV1
+		SPARE2="draid2-0-0"
+	elif [ "$type" = "mirror" ]; then
+		# 1. Create a 3-way mirror pool with two hot spares
+		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
+		log_must zpool create -f $TESTPOOL $type \
+		    $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 spare $SPARE_DEVS
+		SPARE1=$SPARE_DEV1
+		SPARE2=$SPARE_DEV2
+	else
+		# 1. Create a raidz pool with two hot spares
+		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
+		log_must zpool create -f $TESTPOOL $type $DATA_DEVS \
+		    spare $SPARE_DEVS
+		SPARE1=$SPARE_DEV1
+		SPARE2=$SPARE_DEV2
+	fi
 
 	# 2. Inject IO ERRORS with a zinject error handler on the first device
 	log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL
@@ -76,11 +107,11 @@
 	# 3. Start a scrub
 	log_must zpool scrub $TESTPOOL
 
-	# 4. Verify the ZED kicks in a hot spare and expected pool/device status
+	# 4. Verify the ZED kicks in a hot spare and the pool/device status
 	log_note "Wait for ZED to auto-spare"
 	log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
-	log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
-	log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
+	log_must wait_vdev_state $TESTPOOL $SPARE1 "ONLINE" 60
+	log_must wait_hotspare_state $TESTPOOL $SPARE1 "INUSE"
 	log_must check_state $TESTPOOL "" "DEGRADED"
 
 	# 5. Inject IO ERRORS on a second device
@@ -95,10 +126,14 @@
 	# 7. Verify the ZED kicks in a second hot spare
 	log_note "Wait for ZED to auto-spare"
 	log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
-	log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
-	log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
+	log_must wait_vdev_state $TESTPOOL $SPARE2 "ONLINE" 60
+	log_must wait_hotspare_state $TESTPOOL $SPARE2 "INUSE"
 	log_must check_state $TESTPOOL "" "DEGRADED"
 
+	while is_pool_scrubbing $TESTPOOL || is_pool_resilvering $TESTPOOL; do
+		sleep 1
+	done
+
 	# 8. Clear the fault on both devices
 	log_must zinject -c all
 	log_must zpool clear $TESTPOOL $FAULT_DEV1
@@ -107,8 +142,8 @@
 	# 9. Verify the hot spares are available and expected pool/device status
 	log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60
 	log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60
-	log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL"
-	log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL"
+	log_must wait_hotspare_state $TESTPOOL $SPARE1 "AVAIL"
+	log_must wait_hotspare_state $TESTPOOL $SPARE2 "AVAIL"
 	log_must check_state $TESTPOOL "" "ONLINE"
 
 	# Cleanup
@@ -117,11 +152,37 @@
 
 # Rinse and repeat, this time faulting both devices at the same time
 # NOTE: "raidz" is excluded since it cannot survive 2 faulted devices
-# NOTE: "mirror" is a 4-way mirror here and should survive this test
-for type in "mirror" "raidz2" "raidz3"; do
-	# 1. Create a pool with two hot spares
-	truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
-	log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
+# NOTE: "mirror" is a 3-way mirror here and should survive this test
+for type in "mirror" "raidz2" "raidz3" "draid2:1s"; do
+	if [ "$type" = "draid2:1s" ]; then
+		# 1. Create a dRAID pool with a distributed and traditional
+		# hot spare to provide test coverage for both configurations.
+		#
+		# Corruption is injected in the third and fourth vdevs
+		# since the dRAID permutation at these offsets maps to
+		# distributed spare space and not data devices.
+		#
+		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEV1
+		log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
+		    $SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
+		    spare $SPARE_DEV1
+		SPARE1=$SPARE_DEV1
+		SPARE2="draid2-0-0"
+	elif [ "$type" = "mirror" ]; then
+		# 1. Create a 3-way mirror pool with two hot spares
+		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
+		log_must zpool create -f $TESTPOOL $type \
+		    $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 spare $SPARE_DEVS
+		SPARE1=$SPARE_DEV1
+		SPARE2=$SPARE_DEV2
+	else
+		# 1. Create a raidz pool with two hot spares
+		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
+		log_must zpool create -f $TESTPOOL $type $DATA_DEVS \
+		    spare $SPARE_DEVS
+		SPARE1=$SPARE_DEV1
+		SPARE2=$SPARE_DEV2
+	fi
 
 	# 2. Inject IO ERRORS with a zinject error handler on two devices
 	log_must eval "zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL &"
@@ -130,14 +191,14 @@
 	# 3. Start a scrub
 	log_must zpool scrub $TESTPOOL
 
-	# 4. Verify the ZED kicks in two hot spares and expected pool/device status
+	# 4. Verify the ZED kicks in two hot spares and the pool/device status
 	log_note "Wait for ZED to auto-spare"
 	log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
 	log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
-	log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
-	log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
-	log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
-	log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
+	log_must wait_vdev_state $TESTPOOL $SPARE1 "ONLINE" 60
+	log_must wait_vdev_state $TESTPOOL $SPARE2 "ONLINE" 60
+	log_must wait_hotspare_state $TESTPOOL $SPARE1 "INUSE"
+	log_must wait_hotspare_state $TESTPOOL $SPARE2 "INUSE"
 	log_must check_state $TESTPOOL "" "DEGRADED"
 
 	# 5. Clear the fault on both devices

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_shared.ksh b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_shared.ksh
index 4671613..4229537 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_shared.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/auto_spare_shared.ksh

@@ -42,7 +42,7 @@
 	# Add one 512b spare device (4Kn would generate IO errors on replace)
 	# NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
 	# add 32m of fudge
-	load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) 1 1 1 '512b'
+	load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b'
 else
 	log_unsupported "scsi debug module unsupported"
 fi
@@ -72,7 +72,7 @@
 
 for vdev in $SAFE_FILEDEVPOOL1 $SAFE_FILEDEVPOOL2 $FAIL_FILEDEVPOOL1 \
     $FAIL_FILEDEVPOOL2 $SPARE_FILEDEV; do
-	log_must truncate -s $SPA_MINDEVSIZE $vdev
+	log_must truncate -s $MINVDEVSIZE $vdev
 done
 
 for spare in $SPARE_FILEDEV $SPARE_DISKDEV; do

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/decompress_fault.ksh b/zfs/tests/zfs-tests/tests/functional/fault/decompress_fault.ksh
index ea831ef..81eab56 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/decompress_fault.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/decompress_fault.ksh

@@ -33,7 +33,7 @@
 
 function cleanup
 {
-	log_must set_tunable64 zfs_compressed_arc_enabled 1
+	log_must set_tunable64 COMPRESSED_ARC_ENABLED 1
 	log_must zinject -c all
 	default_cleanup_noexit
 }
@@ -41,15 +41,18 @@
 log_onexit cleanup
 
 default_mirror_setup_noexit $DISK1 $DISK2
-log_must set_tunable64 zfs_compressed_arc_enabled 0
+log_must set_tunable64 COMPRESSED_ARC_ENABLED 0
 log_must zfs create -o compression=on $TESTPOOL/fs
 mntpt=$(get_prop mountpoint $TESTPOOL/fs)
-write_compressible $mntpt 32m 1 0 "testfile"
+write_compressible $mntpt 32m 1 1024k "testfile"
 log_must sync
 log_must zfs umount $TESTPOOL/fs
 log_must zfs mount $TESTPOOL/fs
 log_must zinject -a -t data -e decompress -f 20 $mntpt/testfile.0
 log_mustnot eval "cat $mntpt/testfile.0 > /dev/null"
-log_must eval "zpool events $TESTPOOL | grep -q 'data'"
+if ! is_freebsd; then
+	# Events are not supported on FreeBSD
+	log_must eval "zpool events $TESTPOOL | grep -q 'data'"
+fi
 
 log_pass "Injected decompression errors are handled correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/decrypt_fault.ksh b/zfs/tests/zfs-tests/tests/functional/fault/decrypt_fault.ksh
index ca698f7..d81c4b2 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/decrypt_fault.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/decrypt_fault.ksh

@@ -50,6 +50,9 @@
 log_must zfs mount $TESTPOOL/fs
 
 log_mustnot eval "cat $mntpt/file1 > /dev/null"
-log_must eval "zpool events $TESTPOOL | grep -q 'authentication'"
+# Events are not supported on FreeBSD
+if ! is_freebsd; then
+	log_must eval "zpool events $TESTPOOL | grep -q 'authentication'"
+fi
 
 log_pass "Injected decryption errors are handled correctly"

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/fault.cfg b/zfs/tests/zfs-tests/tests/functional/fault/fault.cfg
index 25601a7..839330e 100644
--- a/zfs/tests/zfs-tests/tests/functional/fault/fault.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/fault/fault.cfg

@@ -47,8 +47,6 @@
 	devs_id[1]=$(get_persistent_disk_name $DISK2)
 	devs_id[2]=$(get_persistent_disk_name $DISK3)
 	export devs_id
-else
-	DEV_DSKDIR="/dev"
 fi
 
 export VDEV_FILES="$TEST_BASE_DIR/file-1 $TEST_BASE_DIR/file-2 \

diff --git a/zfs/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh b/zfs/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh
index c919ae6..85f0083 100755
--- a/zfs/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/fault/zpool_status_-s.ksh

@@ -46,8 +46,8 @@
 function cleanup
 {
 	log_must zinject -c all
-	log_must set_tunable64 zio_slow_io_ms $OLD_SLOW_IO
-	log_must set_tunable64 zfs_slow_io_events_per_second $OLD_SLOW_IO_EVENTS
+	log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+	log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
 	default_cleanup_noexit
 }
 
@@ -56,10 +56,10 @@
 log_must zpool events -c
 
 # Mark any IOs greater than 10ms as slow IOs
-OLD_SLOW_IO=$(get_tunable zio_slow_io_ms)
-OLD_SLOW_IO_EVENTS=$(get_tunable zfs_slow_io_events_per_second)
-log_must set_tunable64 zio_slow_io_ms 10
-log_must set_tunable64 zfs_slow_io_events_per_second 1000
+OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
+OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
+log_must set_tunable64 ZIO_SLOW_IO_MS 10
+log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
 
 # Create 20ms IOs
 log_must zinject -d $DISK -D20:100 $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/features/async_destroy/async_destroy_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/features/async_destroy/async_destroy_001_pos.ksh
index 20b61da..ad0e49f 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/async_destroy/async_destroy_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/async_destroy/async_destroy_001_pos.ksh

@@ -48,8 +48,8 @@
 
 function cleanup
 {
-	datasetexists $TEST_FS && log_must zfs destroy $TEST_FS
-	log_must set_tunable64 zfs_async_block_max_blocks 100000
+	datasetexists $TEST_FS && destroy_dataset $TEST_FS
+	log_must set_tunable64 ASYNC_BLOCK_MAX_BLOCKS 100000
 }
 
 log_onexit cleanup
@@ -64,7 +64,7 @@
 # Decrease the max blocks to free each txg, so that freeing takes
 # long enough that we can observe it.
 #
-log_must set_tunable64 zfs_async_block_max_blocks 100
+log_must set_tunable64 ASYNC_BLOCK_MAX_BLOCKS 100
 
 log_must sync
 log_must zfs destroy $TEST_FS
@@ -88,7 +88,7 @@
 # per txg.
 #
 sleep 10
-log_must set_tunable64 zfs_async_block_max_blocks 100000
+log_must set_tunable64 ASYNC_BLOCK_MAX_BLOCKS 100000
 
 # Wait for everything to be freed.
 while [[ "0" != "$(zpool list -Ho freeing $TESTPOOL)" ]]; do

diff --git a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh
index d353029..cb1e940 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_001_pos.ksh

@@ -47,7 +47,7 @@
 
 function cleanup
 {
-	datasetexists $TEST_FS && log_must zfs destroy $TEST_FS
+	datasetexists $TEST_FS && destroy_dataset $TEST_FS
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh
index c2b32ad..9a00cee 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_002_pos.ksh

@@ -48,7 +48,7 @@
 
 function cleanup
 {
-	datasetexists $TEST_FS && log_must zfs destroy $TEST_FS
+	datasetexists $TEST_FS && destroy_dataset $TEST_FS
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh
index 3fa1cab..2cc587b 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_004_neg.ksh

@@ -44,9 +44,7 @@
 
 function cleanup
 {
-	if datasetexists $TEST_FS ; then
-		log_must zfs destroy -r $TEST_FS
-	fi
+	datasetexists $TEST_FS && destroy_dataset $TEST_FS -r
 
 	if datasetexists $LGCYPOOL ; then
 		log_must zpool destroy -f $LGCYPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh
index a2d9267..2be9894 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_005_pos.ksh

@@ -40,13 +40,8 @@
 
 function cleanup
 {
-	if datasetexists $TEST_SEND_FS ; then
-		log_must zfs destroy -r $TEST_SEND_FS
-	fi
-
-	if datasetexists $TEST_RECV_FS ; then
-		log_must zfs destroy -r $TEST_RECV_FS
-	fi
+	datasetexists $TEST_SEND_FS && destroy_dataset $TEST_SEND_FS -r
+	datasetexists $TEST_RECV_FS && destroy_dataset $TEST_RECV_FS -r
 
 	rm -f $TEST_STREAM
 	rm -f $TEST_STREAMINCR

diff --git a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh
index 38b4ac5..3727bd5 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_006_pos.ksh

@@ -49,7 +49,7 @@
 
 function cleanup
 {
-	datasetexists $TEST_FS && log_must zfs destroy $TEST_FS
+	datasetexists $TEST_FS && destroy_dataset $TEST_FS
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_009_pos.ksh
index fa746c5..1e42202 100755
--- a/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/features/large_dnode/large_dnode_009_pos.ksh

@@ -39,7 +39,7 @@
 
 function cleanup
 {
-	datasetexists $TEST_FS && log_must zfs destroy $TEST_FS
+	datasetexists $TEST_FS && destroy_dataset $TEST_FS
 }
 
 log_onexit cleanup
@@ -64,7 +64,7 @@
 
 log_must wait
 
-log_must zpool export $TESTPOOL
+log_must_busy zpool export $TESTPOOL
 log_must zpool import $TESTPOOL
 log_must ls -lR "/$TEST_FS/" >/dev/null 2>&1
 log_must zdb -d $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history.cfg b/zfs/tests/zfs-tests/tests/functional/history/history.cfg
index bbbd612..e9200a2 100644
--- a/zfs/tests/zfs-tests/tests/functional/history/history.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/history/history.cfg

@@ -37,7 +37,11 @@
 export NEW_HISTORY=$TEST_BASE_DIR/new_history.$$
 
 export MIGRATEDPOOLNAME=${MIGRATEDPOOLNAME:-history_pool}
-export TIMEZONE=${TIMEZONE:-US/Mountain}
+if is_freebsd; then
+	export TIMEZONE=${TIMEZONE:-America/Denver}
+else
+	export TIMEZONE=${TIMEZONE:-US/Mountain}
+fi
 
 export HIST_USER="huser"
 export HIST_GROUP="hgroup"

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/history/history_002_pos.ksh
index 5533287..b431cdc 100755
--- a/zfs/tests/zfs-tests/tests/functional/history/history_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/history/history_002_pos.ksh

@@ -49,7 +49,7 @@
 	[[ -f $tmpfile ]] && rm -f $tmpfile
 	[[ -f $tmpfile2 ]] && rm -f $tmpfile2
 	for dataset in $fs $newfs $fsclone $vol $newvol $volclone; do
-		datasetexists $dataset && zfs destroy -Rf $dataset
+		datasetexists $dataset && destroy_dataset $dataset -Rf
 	done
 	rm -rf /history.$$
 }
@@ -72,8 +72,8 @@
 	mountpoint	/history.$$	mountpoint	legacy
 	mountpoint	none		compression	lz4
 	compression	on		compression	off
-	compression	lzjb		acltype		noacl
-	acltype		posixacl	xattr		sa
+	compression	lzjb		acltype		off
+	acltype		posix		acltype		nfsv4
 	atime		on		atime		off
 	devices		on		devices		off
 	exec		on		exec		off
@@ -84,9 +84,39 @@
 	aclinherit	discard		aclinherit	noallow
 	aclinherit	secure		aclinherit	passthrough
 	canmount	off		canmount	on
-	xattr		on		xattr		off
 	compression	gzip		compression	gzip-$((RANDOM%9 + 1))
-	copies		$((RANDOM%3 + 1))
+	compression     zstd		compression	zstd-$((RANDOM%9 + 1))
+	compression	zstd-fast	copies          $((RANDOM%3 + 1))
+	compression	zstd-fast-$((RANDOM%9 + 1))	xattr	sa
+	xattr		on		xattr		off
+)
+elif is_freebsd; then
+#	property	value		property	value
+#
+props=(
+	quota		64M		recordsize	512
+	reservation	32M		reservation	none
+	mountpoint	/history.$$	mountpoint	legacy
+	mountpoint	none		sharenfs	on
+	sharenfs	off
+	compression	on		compression	off
+	compression	lzjb		aclmode		discard
+	aclmode		groupmask	aclmode		passthrough
+	atime		on		atime		off
+	devices		on		devices		off
+	exec		on		exec		off
+	setuid		on		setuid		off
+	readonly	on		readonly	off
+	jailed		on		jailed		off
+	snapdir		hidden		snapdir		visible
+	aclinherit	discard		aclinherit	noallow
+	aclinherit	secure		aclinherit	passthrough
+	canmount	off		canmount	on
+	compression	gzip		compression	gzip-$((RANDOM%9 + 1))
+	compression     zstd		compression	zstd-$((RANDOM%9 + 1))
+	compression	zstd-fast	copies          $((RANDOM%3 + 1))
+	compression	zstd-fast-$((RANDOM%9 + 1))	acltype	off
+	acltype		posix		acltype		nfsv4
 )
 else
 #	property	value		property	value

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history_006_neg.ksh b/zfs/tests/zfs-tests/tests/functional/history/history_006_neg.ksh
index e97adc4..c3a5e09 100755
--- a/zfs/tests/zfs-tests/tests/functional/history/history_006_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/history/history_006_neg.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if datasetexists $fs ; then
-		log_must zfs destroy -rf $fs
-	fi
+	datasetexists $fs && destroy_dataset $fs -rf
 	log_must zfs create $fs
 }
 
@@ -77,7 +75,9 @@
 	log_must zfs share $fs
 	log_must zfs unshare $fs
 fi
-log_must zfs send -i $snap1 $snap2 > /dev/null
+# https://github.com/openzfs/zfs/issues/11445
+set -o pipefail
+log_must zfs send -i $snap1 $snap2 | cat > /dev/null
 log_must zfs holds $snap1
 
 log_must eval "zpool history $TESTPOOL > $NEW_HISTORY"

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/history/history_007_pos.ksh
index d1c92c5..591d5b8 100755
--- a/zfs/tests/zfs-tests/tests/functional/history/history_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/history/history_007_pos.ksh

@@ -62,7 +62,7 @@
 typeset -i RET=1
 typeset -i linenum=0
 
-[[ ! -d $import_dir ]] && log_must mkdir $import_dir
+[[ ! -d $import_dir ]] && log_must mkdir -p $import_dir
 
 # We test the migrations on both uniform platform and cross platform
 for arch in "i386" "sparc"; do
@@ -73,7 +73,7 @@
 	cat $orig_cmds_f | grep -v "^$" > $orig_cmds_f1
 
 	log_must cp $tst_dir/${arch}.migratedpool.DAT.Z $import_dir
-	log_must uncompress $import_dir/${arch}.migratedpool.DAT.Z
+	log_must uncompress -f $import_dir/${arch}.migratedpool.DAT.Z
 
 	# destroy the pool with same name, so that import operation succeeds.
 	poolexists $migratedpoolname && \

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/history/history_008_pos.ksh
index 996c765..8e174dc 100755
--- a/zfs/tests/zfs-tests/tests/functional/history/history_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/history/history_008_pos.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if datasetexists $root_testfs; then
-		log_must zfs destroy -rf $root_testfs
-	fi
+	datasetexists $root_testfs && destroy_dataset $root_testfs -rf
 	log_must zfs create $root_testfs
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/history/history_010_pos.ksh
index 31fe8ec..2c32b1b 100755
--- a/zfs/tests/zfs-tests/tests/functional/history/history_010_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/history/history_010_pos.ksh

@@ -47,7 +47,7 @@
 {
 	del_user $HIST_USER
 	del_group $HIST_GROUP
-	datasetexists $root_testfs && log_must zfs destroy -rf $root_testfs
+	datasetexists $root_testfs && destroy_dataset $root_testfs -rf
 }
 
 log_assert "Verify internal long history information are correct."

diff --git a/zfs/tests/zfs-tests/tests/functional/history/history_common.kshlib b/zfs/tests/zfs-tests/tests/functional/history/history_common.kshlib
index b82c60c..ff3260f 100644
--- a/zfs/tests/zfs-tests/tests/functional/history/history_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/history/history_common.kshlib

@@ -51,11 +51,11 @@
 	fullcmd="$1"
 	flags="$2"
 
-	if is_linux; then
+	if is_illumos; then
+		histcmd=$(echo $fullcmd | sed 's/\/usr\/sbin\///g')
+	else
 		histcmd=$(echo $fullcmd | sed 's/^.*\/\(zpool .*\).*$/\1/')
 		histcmd=$(echo $histcmd | sed 's/^.*\/\(zfs .*\).*$/\1/')
-	else
-		histcmd=$(echo $fullcmd | sed 's/\/usr\/sbin\///g')
 	fi
 
 	cmd=$(echo $histcmd | awk '{print $1}')
@@ -72,9 +72,9 @@
 	# Run the command as the specified user, and find the new history.
 	zpool history $flags $pool > $OLD_HISTORY 2>/dev/null
 	if [[ $user == "root" ]]; then
-		log_must eval "$fullcmd"
+		log_must_busy eval "$fullcmd"
 	else
-		log_must user_run $user "$fullcmd"
+		log_must_busy user_run $user "$fullcmd"
 	fi
 	zpool history $flags $pool > $TMP_HISTORY 2>/dev/null
 	diff $OLD_HISTORY $TMP_HISTORY | grep "^> " | sed 's/^> //g' \
@@ -112,10 +112,11 @@
 	typeset suffix=""
 	if is_linux; then
 		suffix=":linux"
+	elif is_freebsd; then
+		suffix=":freebsd"
 	fi
 
-	grep "$cmd \[user $uid ($user) on $hname$suffix\]" \
-	    $NEW_HISTORY >/dev/null 2>&1
+	grep -q "$cmd \[user $uid ($user) on $hname$suffix\]" $NEW_HISTORY
 	if [[ $? != 0 ]]; then
 		log_note "Couldn't find long information for \"$cmd\""
 		return 1
@@ -364,7 +365,7 @@
 	typeset cmd=$1
 	typeset flags=$3
 
-	# This function doesn't currently verifiy the zpool command.
+	# This function doesn't currently verify the zpool command.
 	[[ ${cmd%% *} == "zfs" ]] || return 1
 	[[ $flags =~ "i" ]] || return 1
 

diff --git a/zfs/tests/zfs-tests/tests/functional/hkdf/Makefile.am b/zfs/tests/zfs-tests/tests/functional/hkdf/Makefile.am
index b54e353..8ac9053 100644
--- a/zfs/tests/zfs-tests/tests/functional/hkdf/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/hkdf/Makefile.am

@@ -1,11 +1,5 @@
 include $(top_srcdir)/config/Rules.am
 
-AM_CPPFLAGS += -I$(top_srcdir)/include
-AM_CPPFLAGS += -I$(top_srcdir)/lib/libspl/include
-LDADD = $(top_builddir)/lib/libzpool/libzpool.la
-
-AUTOMAKE_OPTIONS = subdir-objects
-
 pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/hkdf
 
 dist_pkgdata_SCRIPTS = \
@@ -19,3 +13,5 @@
 	hkdf_test
 
 hkdf_test_SOURCES = hkdf_test.c
+hkdf_test_LDADD = \
+	$(abs_top_builddir)/lib/libzpool/libzpool.la

diff --git a/zfs/tests/zfs-tests/tests/functional/inheritance/Makefile.am b/zfs/tests/zfs-tests/tests/functional/inheritance/Makefile.am
index 82de708..3c62462 100644
--- a/zfs/tests/zfs-tests/tests/functional/inheritance/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/inheritance/Makefile.am

@@ -4,6 +4,8 @@
 	inherit_001_pos.ksh
 
 dist_pkgdata_DATA = \
+	README.config \
+	README.state \
 	inherit.kshlib \
 	config001.cfg \
 	config002.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh
index b1c24fa..5a5be3b 100755
--- a/zfs/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh

@@ -26,6 +26,7 @@
 
 #
 # Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -380,7 +381,8 @@
 	"sharenfs" "" \
 	"recordsize" "recsize" \
 	"snapdir" "" \
-	"readonly" ""
+	"readonly" "" \
+	"redundant_metadata" ""
 
 #
 # Note except for the mountpoint default value (which is handled in
@@ -391,12 +393,14 @@
 set -A def_val "on" "off" "on" \
 	"off" "" \
 	"hidden" \
-	"off"
+	"off" \
+	"all"
 
 set -A local_val "off" "on" "off" \
 	"on" "" \
 	"visible" \
-	"off"
+	"off" \
+	"none"
 
 #
 # Add system specific values
@@ -406,14 +410,15 @@
 	def_val+=("off")
 	local_val+=("off")
 else
-	prop+=("aclmode" "" \
-		"mountpoint" "")
-	def_val+=("discard" \
-		"")
-	local_val+=("groupmask" \
-		"$TESTDIR")
+	prop+=("aclmode" "")
+	def_val+=("discard")
+	local_val+=("groupmask")
 fi
-
+if is_illumos; then
+	prop+=("mountpoint" "")
+	def_val+=("")
+	local_val+=("$TESTDIR")
+fi
 
 #
 # Global flag indicating whether the default record size had been

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse.cfg b/zfs/tests/zfs-tests/tests/functional/inuse/inuse.cfg
index bbc32f1..631ace7 100644
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse.cfg

@@ -30,101 +30,25 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-if is_linux; then
-	export DISKSARRAY=$DISKS
-	export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
-	set_device_dir
-	set_slice_prefix
-	export SLICE0=1
-	export SLICE1=2
-else
-	export SLICE_PREFIX="s"
-	export SLICE0=0
-	export SLICE1=1
-fi
-
-verify_disk_count "$DISKS" 2
 set -A disk_array $(find_disks $DISKS)
-case "${#disk_array[@]}" in
-2)
-	FS_DISK0=${disk_array[0]}
-	FS_DISK1=${disk_array[1]}
-	FS_DISK2=${disk_array[0]}
-	FS_DISK3=${disk_array[1]}
-	FS_SIDE0=${FS_DISK0}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE1=${FS_DISK0}${SLICE_PREFIX}${SLICE1}
-	FS_SIDE2=${FS_DISK1}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE3=${FS_DISK1}${SLICE_PREFIX}${SLICE1}
-	disk0="${DEV_DSKDIR}/$FS_SIDE0"
-	disk1="${DEV_DSKDIR}/$FS_SIDE1"
-	disk2="${DEV_DSKDIR}/$FS_SIDE2"
-	disk3="${DEV_DSKDIR}/$FS_SIDE3"
-	disktargets="$disk0 $disk2"
-	rawdisk0="${DEV_RDSKDIR}/$FS_SIDE0"
-	rawdisk1="${DEV_RDSKDIR}/$FS_SIDE1"
-	rawdisk2="${DEV_RDSKDIR}/$FS_SIDE2"
-	rawdisk3="${DEV_RDSKDIR}/$FS_SIDE3"
-	rawtargets="$rawdisk0 $rawdisk2"
-	vdisks="$FS_DISK0"
-	sdisks="$FS_DISK1"
-	vslices="$FS_SIDE0 $FS_SIDE1 $FS_SIDE2"
-	sslices="$FS_SIDE3"
-	;;
-3)
-	FS_DISK0=${disk_array[0]}
-	FS_DISK1=${disk_array[1]}
-	FS_DISK2=${disk_array[2]}
-	FS_DISK3=${disk_array[0]}
-	FS_SIDE0=${FS_DISK0}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE1=${FS_DISK0}${SLICE_PREFIX}${SLICE1}
-	FS_SIDE2=${FS_DISK1}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE3=${FS_DISK2}${SLICE_PREFIX}${SLICE0}
-	disk0="${DEV_DSKDIR}/$FS_SIDE0"
-	disk1="${DEV_DSKDIR}/$FS_SIDE1"
-	disk2="${DEV_DSKDIR}/$FS_SIDE2"
-	disk3="${DEV_DSKDIR}/$FS_SIDE3"
-	disktargets="$disk0 $disk2 $disk3"
-	rawdisk0="${DEV_RDSKDIR}/$FS_SIDE0"
-	rawdisk1="${DEV_RDSKDIR}/$FS_SIDE1"
-	rawdisk2="${DEV_RDSKDIR}/$FS_SIDE2"
-	rawdisk3="${DEV_RDSKDIR}/$FS_SIDE3"
-	rawtargets="$rawdisk0 $rawdisk2 $rawdisk3"
-	vdisks="$FS_DISK0 $FS_DISK1"
-	sdisks="$FS_DISK2"
-	vslices="$FS_SIDE0 $FS_SIDE2 $FS_SIDE3"
-	sslices="$FS_SIDE1"
-	;;
-*)
-	FS_DISK0=${disk_array[0]}
-	FS_DISK1=${disk_array[1]}
-	FS_DISK2=${disk_array[2]}
-	FS_DISK3=${disk_array[3]}
-	FS_SIDE0=${FS_DISK0}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE1=${FS_DISK1}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE2=${FS_DISK2}${SLICE_PREFIX}${SLICE0}
-	FS_SIDE3=${FS_DISK3}${SLICE_PREFIX}${SLICE0}
-	disk0="${DEV_DSKDIR}/$FS_SIDE0"
-	disk1="${DEV_DSKDIR}/$FS_SIDE1"
-	disk2="${DEV_DSKDIR}/$FS_SIDE2"
-	disk3="${DEV_DSKDIR}/$FS_SIDE3"
-	disktargets="$disk0 $disk1 $disk2 $disk3"
-	rawdisk0="${DEV_RDSKDIR}/$FS_SIDE0"
-	rawdisk1="${DEV_RDSKDIR}/$FS_SIDE1"
-	rawdisk2="${DEV_RDSKDIR}/$FS_SIDE2"
-	rawdisk3="${DEV_RDSKDIR}/$FS_SIDE3"
-	rawtargets="$rawdisk0 $rawdisk1 $rawdisk2 $rawdisk3"
-	vdisks="$FS_DISK0 $FS_DISK1 $FS_DISK2"
-	sdisks="$FS_DISK3"
-	vslices="$FS_SIDE0 $FS_SIDE1 $FS_SIDE2"
-	sslices="$FS_SIDE3"
-	;;
-esac
+FS_DISK0=${disk_array[0]}
+FS_DISK1=${disk_array[1]}
+FS_DISK2=${disk_array[2]}
+disk0="${DEV_DSKDIR}/$FS_DISK0"
+disk1="${DEV_DSKDIR}/$FS_DISK1"
+disk2="${DEV_DSKDIR}/$FS_DISK2"
+disktargets="$disk0 $disk1 $disk2"
+rawdisk0="${DEV_RDSKDIR}/$FS_DISK0"
+rawdisk1="${DEV_RDSKDIR}/$FS_DISK1"
+rawdisk2="${DEV_RDSKDIR}/$FS_DISK2"
+rawtargets="$rawdisk0 $rawdisk1 $rawdisk2"
+vdisks="$FS_DISK0 $FS_DISK1"
+sdisks="$FS_DISK2"
 
-export FS_DISK0 FS_DISK1 FS_DISK2 FS_DISK3 SINGLE_DISK
-export FS_SIDE0 FS_SIDE1 FS_SIDE2 FS_SIDE3
-export disk0 disk1 disk2 disk3 disktargets
-export rawdisk0 rawdisk1 rawdisk2 rawdisk3 rawtargets
-export vdisks sdisks vslices sslices
+export FS_DISK0 FS_DISK1 FS_DISK2
+export disk0 disk1 disk2 disktargets
+export rawdisk0 rawdisk1 rawdisk2 rawtargets
+export vdisks sdisks
 
 export UFSMP=$TESTDIR/testinuseufsdump
 export FS_SIZE=1g

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh
index aecdc5a..f824661 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_001_pos.ksh

@@ -43,10 +43,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 function cleanup
 {
 	#
@@ -64,27 +60,25 @@
 log_onexit cleanup
 
 typeset dumpdev=""
-typeset diskslice=""
 
 PREVDUMPDEV=`dumpadm | grep "Dump device" | awk '{print $3}'`
 
-log_note "Zero $FS_DISK0 and place free space in to slice 0"
+log_note "Zero $FS_DISK0"
 log_must cleanup_devices $FS_DISK0
 
-diskslice="${DEV_DSKDIR}/${FS_DISK0}${SLICE0}"
-log_note "Configuring $diskslice as dump device"
-log_must dumpadm -d $diskslice > /dev/null
+log_note "Configuring $rawdisk0 as dump device"
+log_must dumpadm -d $rawdisk0 > /dev/null
 
 log_note "Confirm that dump device has been setup"
 dumpdev=`dumpadm | grep "Dump device" | awk '{print $3}'`
 [[ -z "$dumpdev" ]] && log_untested "No dump device has been configured"
 
-[[ "$dumpdev" != "$diskslice" ]] && \
-    log_untested "Dump device has not been configured to $diskslice"
+[[ "$dumpdev" != "$rawdisk0" ]] && \
+    log_untested "Dump device has not been configured to $rawdisk0"
 
 log_note "Attempt to zpool the dump device"
 unset NOINUSE_CHECK
-log_mustnot zpool create $TESTPOOL "$diskslice"
+log_mustnot zpool create $TESTPOOL "$rawdisk0"
 log_mustnot poolexists $TESTPOOL
 
 log_pass "Unable to zpool a device in use by dumpadm"

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_003_pos.ksh
index bdd79d9..07d6ac1 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_003_pos.ksh

@@ -50,10 +50,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 function cleanup
 {
 	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
@@ -98,18 +94,9 @@
 typeset -i dirnum=0
 typeset -i filenum=0
 typeset cwd=""
-typeset cyl=""
-
-for num in 0 1 2; do
-	eval typeset slice=\${FS_SIDE$num}
-	disk=${slice%s*}
-	slice=${slice##*${SLICE_PREFIX}}
-	log_must set_partition $slice "$cyl" $FS_SIZE $disk
-	cyl=$(get_endslice $disk $slice)
-done
 
 log_note "Make a ufs filesystem on source $rawdisk1"
-echo "y" | newfs -v $rawdisk1 > /dev/null 2>&1
+new_fs $rawdisk1 > /dev/null 2>&1
 (($? != 0)) && log_untested "Unable to create ufs filesystem on $rawdisk1"
 
 log_must mkdir -p $UFSMP
@@ -149,7 +136,7 @@
 log_mustnot poolexists $TESTPOOL1
 
 log_note "Attempt to take the source device in use by ufsdump as spare device"
-log_mustnot zpool create $TESTPOOL1 "$FS_SIDE2" spare "$disk1"
+log_mustnot zpool create $TESTPOOL1 "$FS_DISK2" spare "$disk1"
 log_mustnot poolexists $TESTPOOL1
 
 wait $PIDUFSDUMP
@@ -175,7 +162,7 @@
 
 log_note "Attempt to take the restored device in use by ufsrestore as spare" \
     "device"
-log_mustnot zpool create -f $TESTPOOL2 "$FS_SIDE2" spare "$disk1"
+log_mustnot zpool create -f $TESTPOOL2 "$FS_DISK2" spare "$disk1"
 log_mustnot poolexists $TESTPOOL2
 
 log_pass "Unable to zpool over a device in use by ufsdump or ufsrestore"

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh
index b126f66..a9725e0 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_004_pos.ksh

@@ -72,6 +72,9 @@
 	if is_linux; then
 		parted $disk -s -- mklabel gpt
 		typeset -i retval=$?
+	elif is_freebsd; then
+		gpart create -s gpt $disk
+		typeset -i retval=$?
 	else
 		typeset format_file=$TEST_BASE_DIR/format_in.$$.1
 		echo "partition" > $format_file

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_005_pos.ksh
index 6b0abf4..afe30d0 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_005_pos.ksh

@@ -58,15 +58,15 @@
 	cleanup_devices $vdisks $sdisks
 }
 
-function verify_assertion #slices
+function verify_assertion #disks
 {
 	typeset targets=$1
 
 	for t in $targets; do
-		echo "y" | newfs -v $t > /dev/null 2>&1
-		(( $? !=0 )) || \
+		if new_fs $t; then
 			log_fail "newfs over active pool " \
-			"unexpected return code of 0"
+				"unexpected return code of 0"
+		fi
 	done
 
 	return 0
@@ -82,39 +82,11 @@
 
 unset NOINUSE_CHECK
 while (( i < ${#vdevs[*]} )); do
-	for num in 0 1 2 3 ; do
-		eval typeset disk=\${FS_DISK$num}
-		zero_partitions $disk
-	done
-	typeset cyl=""
-	for num in 0 1 2 3 ; do
-		eval typeset slice=\${FS_SIDE$num}
-		disk=${slice%${SLICE_PREFIX}*}
-		[[ -z $SLICE_PREFIX ]] && eval typeset disk=\${FS_DISK$num}
-		slice=$(echo $slice | awk '{ print substr($1,length($1),1) }')
-		log_must set_partition $slice "$cyl" $FS_SIZE $disk
-		[[ $num < 3 ]] && cyl=$(get_endslice $disk $slice)
-	done
+	typeset spare="spare $sdisks"
 
-	if [[ -n $SINGLE_DISK && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices
-	verify_assertion "$rawtargets"
-	destroy_pool $TESTPOOL1
-
-	if [[ ( $FS_DISK0 == $FS_DISK2 ) && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	if [[ ( $FS_DISK0 == $FS_DISK3 ) && ( ${vdevs[i]} == "raidz2" ) ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks spare $sdisks
+	# If this is for raidz2, use 3 disks for the pool.
+	[[ ${vdevs[i]} = "raidz2" ]] && spare="$sdisks"
+	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks $spare
 	verify_assertion "$rawtargets"
 	destroy_pool $TESTPOOL1
 

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_006_pos.ksh
index 0ce45a6..9657322 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_006_pos.ksh

@@ -44,10 +44,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 function cleanup
 {
 	if [[ -n $PREVDUMPDEV ]]; then
@@ -62,7 +58,7 @@
 	cleanup_devices $vdisks $sdisks
 }
 
-function verify_assertion #slices
+function verify_assertion # disks
 {
 	typeset targets=$1
 
@@ -85,39 +81,11 @@
 
 unset NOINUSE_CHECK
 while (( i < ${#vdevs[*]} )); do
+	typeset spare="spare $sdisks"
 
-	for num in 0 1 2 3 ; do
-		eval typeset disk=\${FS_DISK$num}
-		zero_partitions $disk
-	done
-
-	for num in 0 1 2 3 ; do
-		eval typeset slice=\${FS_SIDE$num}
-		disk=${slice%${SLICE_PREFIX}*}
-		slice=${slice##*${SLICE_PREFIX}}
-		log_must set_partition $slice "" $FS_SIZE $disk
-	done
-
-	if [[ -n $SINGLE_DISK && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices
-	verify_assertion "$disktargets"
-	destroy_pool $TESTPOOL1
-
-	if [[ ( $FS_DISK0 == $FS_DISK2 ) && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	if [[ ( $FS_DISK0 == $FS_DISK3 ) && ( ${vdevs[i]} == "raidz2" ) ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks spare $sdisks
+	# If this is for raidz2, use 3 disks for the pool.
+	[[ ${vdevs[i]} = "raidz2" ]] && spare="$sdisks"
+	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks $spare
 	verify_assertion "$disktargets"
 	destroy_pool $TESTPOOL1
 

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_007_pos.ksh
index 22ac064..b96b808 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_007_pos.ksh

@@ -45,10 +45,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 function cleanup
 {
 	if [[ -n $PREVDUMPDEV ]]; then
@@ -65,7 +61,7 @@
 	cleanup_devices $vdisks $sdisks
 }
 
-function verify_assertion #slices
+function verify_assertion # disks
 {
 	typeset targets=$1
 
@@ -89,41 +85,11 @@
 PREVDUMPDEV=`dumpadm | grep "Dump device" | awk '{print $3}'`
 
 while (( i < ${#vdevs[*]} )); do
+	typeset spare="spare $sdisks"
 
-	for num in 0 1 2 3 ; do
-		eval typeset disk=\${FS_DISK$num}
-		zero_partitions $disk
-	done
-
-	for num in 0 1 2 3 ; do
-		eval typeset slice=\${FS_SIDE$num}
-		disk=${slice%${SLICE_PREFIX}*}
-		slice=${slice##*${SLICE_PREFIX}}
-		log_must set_partition $slice "" $FS_SIZE $disk
-	done
-
-	if [[ -n $SINGLE_DISK && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices
-	log_must zpool export $TESTPOOL1
-	verify_assertion "$disktargets"
-	log_must zpool import $TESTPOOL1
-	destroy_pool $TESTPOOL1
-
-	if [[ ( $FS_DISK0 == $FS_DISK2 ) && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	if [[ ( $FS_DISK0 == $FS_DISK3 ) && ( ${vdevs[i]} == "raidz2" ) ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks spare $sdisks
+	# If this is for raidz2, use 3 disks for the pool.
+	[[ ${vdevs[i]} = "raidz2" ]] && spare="$sdisks"
+	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks $spare
 	log_must zpool export $TESTPOOL1
 	verify_assertion "$disktargets"
 	log_must zpool import $TESTPOOL1

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh
index a08beb8..d60ebce 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_008_pos.ksh

@@ -61,15 +61,15 @@
 	cleanup_devices $vdisks $sdisks
 }
 
-function verify_assertion #slices
+function verify_assertion # disks
 {
 	typeset targets=$1
 
 	for t in $targets; do
-		echo "y" | newfs -v $t > /dev/null 2>&1
-		(( $? !=0 )) && \
+		if ! new_fs $t; then
 			log_fail "newfs over exported pool " \
 				"fails unexpectedly."
+		fi
 	done
 
 	return 0
@@ -82,29 +82,12 @@
 set -A vdevs "" "mirror" "raidz" "raidz1" "raidz2"
 
 typeset -i i=0
-typeset cyl=""
-
-for num in 0 1 2 3 ; do
-	eval typeset disk=\${FS_DISK$num}
-	zero_partitions $disk
-done
-
-for num in 0 1 2 3 ; do
-	eval typeset slice=\${FS_SIDE$num}
-	disk=${slice%${SLICE_PREFIX}*}
-	[[ -z $SLICE_PREFIX ]] && eval typeset disk=\${FS_DISK$num}
-	slice=$(echo $slice | awk '{ print substr($1,length($1),1) }')
-	log_must set_partition $slice "$cyl" $FS_SIZE $disk
-	[[ $num < 3 ]] && cyl=$(get_endslice $disk $slice)
-done
-
 while (( i < ${#vdevs[*]} )); do
-	if [[ -n $SINGLE_DISK && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
+	typeset spare="spare $sdisks"
 
-	create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices
+	# If this is for raidz2, use 3 disks for the pool.
+	[[ ${vdevs[i]} = "raidz2" ]] && spare="$sdisks"
+	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks $spare
 	log_must zpool export $TESTPOOL1
 	verify_assertion "$rawtargets"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_009_pos.ksh
index a5e9fda..54d201a 100755
--- a/zfs/tests/zfs-tests/tests/functional/inuse/inuse_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/inuse/inuse_009_pos.ksh

@@ -61,7 +61,7 @@
 	cleanup_devices $vdisks $sdisks
 }
 
-function verify_assertion #disks
+function verify_assertion # disks
 {
 	typeset targets=$1
 
@@ -79,44 +79,12 @@
 set -A vdevs "" "mirror" "raidz" "raidz1" "raidz2"
 
 typeset -i i=0
-
 while (( i < ${#vdevs[*]} )); do
+	typeset spare="spare $sdisks"
 
-	for num in 0 1 2 3 ; do
-		eval typeset disk=\${FS_DISK$num}
-		zero_partitions $disk
-	done
-
-	typeset cyl=""
-	for num in 0 1 2 3 ; do
-		eval typeset slice=\${FS_SIDE$num}
-		disk=${slice%${SLICE_PREFIX}*}
-		[[ -z $SLICE_PREFIX ]] && eval typeset disk=\${FS_DISK$num}
-		slice=$(echo $slice | awk '{ print substr($1,length($1),1) }')
-		log_must set_partition $slice "$cyl" $FS_SIZE $disk
-		[[ $num < 3 ]] && cyl=$(get_endslice $disk $slice)
-	done
-
-	if [[ -n $SINGLE_DISK && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vslices spare $sslices
-	log_must zpool export $TESTPOOL1
-	verify_assertion "$vdisks $sdisks"
-
-	if [[ ( $FS_DISK0 == $FS_DISK2 ) && -n ${vdevs[i]} ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	if [[ ( $FS_DISK0 == $FS_DISK3 ) && ( ${vdevs[i]} == "raidz2" ) ]]; then
-		(( i = i + 1 ))
-		continue
-	fi
-
-	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks spare $sdisks
+	# If this is for raidz2, use 3 disks for the pool.
+	[[ ${vdevs[i]} = "raidz2" ]] && spare="$sdisks"
+	create_pool $TESTPOOL1 ${vdevs[i]} $vdisks $spare
 	log_must zpool export $TESTPOOL1
 	verify_assertion "$vdisks $sdisks"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/io/Makefile.am b/zfs/tests/zfs-tests/tests/functional/io/Makefile.am
index 5253f08..44c0d02 100644
--- a/zfs/tests/zfs-tests/tests/functional/io/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/io/Makefile.am

@@ -5,6 +5,7 @@
 	sync.ksh \
 	psync.ksh \
 	libaio.ksh \
+	io_uring.ksh \
 	posixaio.ksh \
 	mmap.ksh
 

diff --git a/zfs/tests/zfs-tests/tests/functional/io/io_uring.ksh b/zfs/tests/zfs-tests/tests/functional/io/io_uring.ksh
new file mode 100755
index 0000000..189c11f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/io/io_uring.ksh

@@ -0,0 +1,72 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/io/io.cfg
+
+#
+# DESCRIPTION:
+#	Verify Linux io_uring.
+#
+# STRATEGY:
+#	1. Use fio(1) in verify mode to perform write, read,
+#	   random read, and random write workloads.
+#	2. Repeat the test with additional fio(1) options.
+#
+
+verify_runnable "global"
+
+
+if ! $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then
+	log_unsupported "Requires io_uring support"
+fi
+
+fio --ioengine=io_uring --parse-only || log_unsupported "io_uring support required"
+
+function cleanup
+{
+	log_must rm -f "$mntpnt/rw*"
+}
+
+log_assert "Verify Linux io_uring"
+
+log_onexit cleanup
+
+ioengine="--ioengine=io_uring"
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+dir="--directory=$mntpnt"
+
+set -A fio_arg -- "--sync=0" "--sync=1" "--direct=0" "--direct=1"
+
+for arg in "${fio_arg[@]}"; do
+	log_must fio $dir $ioengine $arg $FIO_WRITE_ARGS
+	log_must fio $dir $ioengine $arg $FIO_READ_ARGS
+	log_must fio $dir $ioengine $arg $FIO_RANDWRITE_ARGS
+	log_must fio $dir $ioengine $arg $FIO_RANDREAD_ARGS
+	log_must rm -f "$mntpnt/rw*"
+done
+
+log_pass "Verified Linux io_uring"

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/Makefile.am b/zfs/tests/zfs-tests/tests/functional/l2arc/Makefile.am
new file mode 100644
index 0000000..09f4c1d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/Makefile.am

@@ -0,0 +1,15 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/l2arc
+dist_pkgdata_SCRIPTS = \
+	cleanup.ksh \
+	setup.ksh \
+	l2arc_arcstats_pos.ksh \
+	l2arc_l2miss_pos.ksh \
+	l2arc_mfuonly_pos.ksh \
+	persist_l2arc_001_pos.ksh \
+	persist_l2arc_002_pos.ksh \
+	persist_l2arc_003_neg.ksh \
+	persist_l2arc_004_pos.ksh \
+	persist_l2arc_005_pos.ksh
+
+dist_pkgdata_DATA = \
+	l2arc.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/cleanup.ksh
new file mode 100755
index 0000000..c3d88e3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/cleanup.ksh

@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+verify_runnable "global"
+
+if poolexists $TESTPOOL ; then
+	log_must destroy_pool $TESTPOOL
+fi
+
+log_must rm -rf $VDIR
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg
new file mode 100644
index 0000000..0302392
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg

@@ -0,0 +1,38 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+export SIZE=1G
+export VDIR=$TESTDIR/disk.l2arc
+export VDEV="$VDIR/a"
+export VDEV_CACHE="$VDIR/b"
+export VDEV1="$VDIR/c"
+
+# fio options
+export DIRECTORY=/$TESTPOOL
+export NUMJOBS=4
+export RUNTIME=10
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_arcstats_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_arcstats_pos.ksh
new file mode 100755
index 0000000..3e76347
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_arcstats_pos.ksh

@@ -0,0 +1,106 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	L2ARC MFU/MRU arcstats do not leak
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Create a random file in that pool, smaller than the cache device
+#		and random read for 10 sec.
+#	3. Read l2arc_mfu_asize and l2arc_mru_asize
+#	4. Export pool.
+#	5. Verify l2arc_mfu_asize and l2arc_mru_asize are 0.
+#	6. Import pool.
+#	7. Read random read for 10 sec.
+#	8. Read l2arc_mfu_asize and l2arc_mru_asize
+#	9. Verify that L2ARC MFU increased and MFU+MRU = L2_asize.
+#
+
+verify_runnable "global"
+
+log_assert "L2ARC MFU/MRU arcstats do not leak."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( 1.4 * $fill_mb ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+arcstat_quiescence_noecho l2_size
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+typeset l2_mfu_init=$(get_arcstat l2_mfu_asize)
+typeset l2_mru_init=$(get_arcstat l2_mru_asize)
+typeset l2_prefetch_init=$(get_arcstat l2_prefetch_asize)
+typeset l2_asize_init=$(get_arcstat l2_asize)
+
+log_must zpool online $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+log_must zpool export $TESTPOOL
+arcstat_quiescence_noecho l2_feeds
+
+log_must test $(get_arcstat l2_mfu_asize) -eq 0
+log_must test $(get_arcstat l2_mru_asize) -eq 0
+log_must zpool import -d $VDIR $TESTPOOL
+arcstat_quiescence_noecho l2_size
+
+log_must fio $FIO_SCRIPTS/random_reads.fio
+arcstat_quiescence_noecho l2_size
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+typeset l2_mfu_end=$(get_arcstat l2_mfu_asize)
+typeset l2_mru_end=$(get_arcstat l2_mru_asize)
+typeset l2_prefetch_end=$(get_arcstat l2_prefetch_asize)
+typeset l2_asize_end=$(get_arcstat l2_asize)
+
+log_must test $(( $l2_mru_end + $l2_mfu_end + $l2_prefetch_end - \
+	$l2_asize_end )) -eq 0
+log_must test $(( $l2_mru_init + $l2_mfu_init + $l2_prefetch_init - \
+	$l2_asize_init )) -eq 0
+
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "L2ARC MFU/MRU arcstats do not leak."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_l2miss_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_l2miss_pos.ksh
new file mode 100755
index 0000000..783484f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_l2miss_pos.ksh

@@ -0,0 +1,94 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, Adam Moss. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	l2arc_misses does not increment upon reads from a pool without l2arc
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Create pool without a cache device.
+#	3. Create a random file in the no-cache-device pool,
+#		and random read for 10 sec.
+#	4. Check that l2arc_misses hasn't risen
+#	5. Create a random file in the pool with the cache device,
+#		and random read for 10 sec.
+#	6. Check that l2arc_misses has risen
+#
+
+verify_runnable "global"
+
+log_assert "l2arc_misses does not increment upon reads from a pool without l2arc."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+	if poolexists $TESTPOOL1 ; then
+		destroy_pool $TESTPOOL1
+	fi
+}
+log_onexit cleanup
+
+typeset fill_mb=800
+typeset cache_sz=$(( 1.4 * $fill_mb ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -O compression=off -f $TESTPOOL $VDEV cache $VDEV_CACHE
+log_must zpool create -O compression=off -f $TESTPOOL1 $VDEV1
+
+# I/O to pool without l2arc - expect that l2_misses stays constant
+export DIRECTORY=/$TESTPOOL1
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+# attempt to remove entries for pool from ARC so we would try
+#    to hit the nonexistent L2ARC for subsequent reads
+log_must zpool export $TESTPOOL1
+log_must zpool import $TESTPOOL1 -d $VDEV1
+
+typeset starting_miss_count=$(get_arcstat l2_misses)
+
+log_must fio $FIO_SCRIPTS/random_reads.fio
+log_must test $(get_arcstat l2_misses) -eq $starting_miss_count
+
+# I/O to pool with l2arc - expect that l2_misses rises
+export DIRECTORY=/$TESTPOOL
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+# wait for L2ARC writes to actually happen
+arcstat_quiescence_noecho l2_size
+# attempt to remove entries for pool from ARC so we would try
+#    to hit L2ARC for subsequent reads
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL -d $VDEV
+
+log_must fio $FIO_SCRIPTS/random_reads.fio
+log_must test $(get_arcstat l2_misses) -gt $starting_miss_count
+
+log_must zpool destroy -f $TESTPOOL
+log_must zpool destroy -f $TESTPOOL1
+
+log_pass "l2arc_misses does not increment upon reads from a pool without l2arc."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh
new file mode 100755
index 0000000..5d0198c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/l2arc_mfuonly_pos.ksh

@@ -0,0 +1,94 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	l2arc_mfuonly does not cache MRU buffers
+#
+# STRATEGY:
+#	1. Set l2arc_mfuonly=yes
+#	2. Create pool with a cache device.
+#	3. Create a random file in that pool, smaller than the cache device
+#		and random read for 10 sec.
+#	4. Export and re-import the pool. This is necessary as some MFU ghost
+#		buffers with prefetch status may transition to MRU eventually.
+#		By re-importing the pool the l2 arcstats reflect the ARC state
+#		of L2ARC buffers upon their caching in L2ARC.
+#	5. Verify l2arc_mru_asize is 0.
+#
+
+verify_runnable "global"
+
+log_assert "l2arc_mfuonly does not cache MRU buffers."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+	log_must set_tunable32 L2ARC_MFUONLY $mfuonly
+	log_must set_tunable32 PREFETCH_DISABLE $zfsprefetch
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 1 as some prefetched buffers may
+# transition to MRU.
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+log_must set_tunable32 L2ARC_NOPREFETCH 1
+
+typeset mfuonly=$(get_tunable L2ARC_MFUONLY)
+log_must set_tunable32 L2ARC_MFUONLY 1
+
+typeset zfsprefetch=$(get_tunable PREFETCH_DISABLE)
+log_must set_tunable32 PREFETCH_DISABLE 1
+
+typeset fill_mb=800
+typeset cache_sz=$(( 1.4 * $fill_mb ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+typeset log_blk_start=$(get_arcstat l2_log_blk_writes)
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+log_must zpool export $TESTPOOL
+log_must zpool import -d $VDIR $TESTPOOL
+
+# Regardless of l2arc_noprefetch, some MFU buffers might be evicted
+# from ARC, accessed later on as prefetches and transition to MRU as
+# prefetches.
+# If accessed again they are counted as MRU and the l2arc_mru_asize arcstat
+# will not be 0 (mentioned also in zfs.4)
+# For the purposes of this test we mitigate this by disabling (predictive)
+# ZFS prefetches with zfs_prefetch_disable=1.
+log_must test $(get_arcstat l2_mru_asize) -eq 0
+
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "l2arc_mfuonly does not cache MRU buffers."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
new file mode 100755
index 0000000..8963803
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh

@@ -0,0 +1,108 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	Persistent L2ARC with an unencrypted ZFS file system succeeds
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Create a random file in that pool and random read for 10 sec.
+#	3. Export pool.
+#	4. Read the amount of log blocks written from the header of the
+#		L2ARC device.
+#	5. Import pool.
+#	6. Read the amount of log blocks rebuilt in arcstats and compare to
+#		(5).
+#	7. Check if the labels of the L2ARC device are intact.
+#
+#	* We can predict the minimum bytes of L2ARC restored if we subtract
+#	from the effective size of the cache device the bytes l2arc_evict()
+#	evicts:
+#	l2: L2ARC device size - VDEV_LABEL_START_SIZE - l2ad_dev_hdr_asize
+#	wr_sz: l2arc_write_max + l2arc_write_boost (worst case)
+#	blk_overhead: wr_sz / SPA_MINBLOCKSIZE / (l2 / SPA_MAXBLOCKSIZE) *
+#		sizeof (l2arc_log_blk_phys_t)
+#	min restored size: l2 - (wr_sz + blk_overhead)
+#
+
+verify_runnable "global"
+
+log_assert "Persistent L2ARC with an unencrypted ZFS file system succeeds."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+	log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE \
+		$rebuild_blocks_min_l2size
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+typeset rebuild_blocks_min_l2size=$(get_tunable L2ARC_REBUILD_BLOCKS_MIN_L2SIZE)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -f -o ashift=12 $TESTPOOL $VDEV
+log_must zpool add $TESTPOOL cache $VDEV_CACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+arcstat_quiescence_noecho l2_size
+log_must zpool export $TESTPOOL
+arcstat_quiescence_noecho l2_feeds
+
+typeset l2_dh_log_blk=$(zdb -l $VDEV_CACHE | grep log_blk_count | \
+	awk '{print $2}')
+
+typeset l2_rebuild_log_blk_start=$(get_arcstat l2_rebuild_log_blks)
+
+log_must zpool import -d $VDIR $TESTPOOL
+arcstat_quiescence_noecho l2_size
+
+typeset l2_rebuild_log_blk_end=$(arcstat_quiescence_echo l2_rebuild_log_blks)
+
+log_must test $l2_dh_log_blk -eq $(( $l2_rebuild_log_blk_end -
+	$l2_rebuild_log_blk_start ))
+log_must test $l2_dh_log_blk -gt 0
+
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+log_must zdb -lllq $VDEV_CACHE
+
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "Persistent L2ARC with an unencrypted ZFS file system succeeds."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_002_pos.ksh
new file mode 100755
index 0000000..93982e6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_002_pos.ksh

@@ -0,0 +1,115 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+#	Persistent L2ARC with an encrypted ZFS file system succeeds
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Create a an encrypted ZFS file system.
+#	3. Create a random file in the encrypted file system and random
+#		read for 10 sec.
+#	4. Export pool.
+#	5. Read the amount of log blocks written from the header of the
+#		L2ARC device.
+#	5. Import pool.
+#	6. Mount the encrypted ZFS file system.
+#	7. Read the amount of log blocks rebuilt in arcstats and compare to
+#		(5).
+#	8. Check if the labels of the L2ARC device are intact.
+#
+#	* We can predict the minimum bytes of L2ARC restored if we subtract
+#	from the effective size of the cache device the bytes l2arc_evict()
+#	evicts:
+#	l2: L2ARC device size - VDEV_LABEL_START_SIZE - l2ad_dev_hdr_asize
+#	wr_sz: l2arc_write_max + l2arc_write_boost (worst case)
+#	blk_overhead: wr_sz / SPA_MINBLOCKSIZE / (l2 / SPA_MAXBLOCKSIZE) *
+#		sizeof (l2arc_log_blk_phys_t)
+#	min restored size: l2 - (wr_sz + blk_overhead)
+#
+
+verify_runnable "global"
+
+log_assert "Persistent L2ARC with an encrypted ZFS file system succeeds."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+	log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE \
+		$rebuild_blocks_min_l2size
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+typeset rebuild_blocks_min_l2size=$(get_tunable L2ARC_REBUILD_BLOCKS_MIN_L2SIZE)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+	"-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+arcstat_quiescence_noecho l2_size
+log_must zpool export $TESTPOOL
+arcstat_quiescence_noecho l2_feeds
+
+typeset l2_dh_log_blk=$(zdb -l $VDEV_CACHE | grep log_blk_count | \
+	awk '{print $2}')
+
+typeset l2_rebuild_log_blk_start=$(get_arcstat l2_rebuild_log_blks)
+
+log_must zpool import -d $VDIR $TESTPOOL
+log_must eval "echo $PASSPHRASE | zfs mount -l $TESTPOOL/$TESTFS1"
+arcstat_quiescence_noecho l2_size
+
+typeset l2_rebuild_log_blk_end=$(arcstat_quiescence_echo l2_rebuild_log_blks)
+
+log_must test $l2_dh_log_blk -eq $(( $l2_rebuild_log_blk_end - \
+	$l2_rebuild_log_blk_start ))
+log_must test $l2_dh_log_blk -gt 0
+
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+log_must zdb -lq $VDEV_CACHE
+
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "Persistent L2ARC with an encrypted ZFS file system succeeds."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_003_neg.ksh
new file mode 100755
index 0000000..fe35c8f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_003_neg.ksh

@@ -0,0 +1,87 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	Persistent L2ARC fails as expected when L2ARC_REBUILD_ENABLED = 0
+#
+# STRATEGY:
+#	1. Set L2ARC_REBUILD_ENABLED = 0
+#	2. Create pool with a cache device.
+#	3. Create a random file in that pool and random read for 10 sec.
+#	4. Export pool.
+#	5. Import pool.
+#	6. Check in zpool iostat if the cache device has space allocated.
+#	7. Read the file written in (3) and check if l2_hits in
+#		/proc/spl/kstat/zfs/arcstats increased.
+#
+
+verify_runnable "global"
+
+log_assert "Persistent L2ARC fails as expected when L2ARC_REBUILD_ENABLED = 0."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_REBUILD_ENABLED $rebuild_enabled
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+
+# disable L2ARC rebuild
+typeset rebuild_enabled=$(get_tunable L2ARC_REBUILD_ENABLED)
+log_must set_tunable32 L2ARC_REBUILD_ENABLED 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( 2 * $fill_mb ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+log_must zpool export $TESTPOOL
+
+typeset l2_success_start=$(get_arcstat l2_rebuild_success)
+
+log_must zpool import -d $VDIR $TESTPOOL
+log_mustnot test "$(zpool iostat -Hpv $TESTPOOL $VDEV_CACHE | awk '{print $2}')" -gt 80000000
+
+typeset l2_success_end=$(get_arcstat l2_rebuild_success)
+
+log_mustnot test $l2_success_end -gt $l2_success_start
+
+log_must zpool destroy -f $TESTPOOL
+log_must set_tunable32 L2ARC_REBUILD_ENABLED $rebuild_enabled
+
+log_pass "Persistent L2ARC fails as expected when L2ARC_REBUILD_ENABLED = 0."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_004_pos.ksh
new file mode 100755
index 0000000..b407031
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_004_pos.ksh

@@ -0,0 +1,101 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	Off/onlining an L2ARC device results in rebuilding L2ARC, vdev not
+#	present.
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Create a random file in that pool and random read for 10 sec.
+#	3. Read the amount of log blocks written from the header of the
+#		L2ARC device.
+#	4. Offline the L2ARC device and export pool.
+#	5. Import pool and online the L2ARC device.
+#	6. Read the amount of log blocks rebuilt in arcstats and compare to
+#		(3).
+#	7. Check if the labels of the L2ARC device are intact.
+#
+
+verify_runnable "global"
+
+log_assert "Off/onlining an L2ARC device results in rebuilding L2ARC, vdev not present."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+	log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE \
+		$rebuild_blocks_min_l2size
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+typeset rebuild_blocks_min_l2size=$(get_tunable L2ARC_REBUILD_BLOCKS_MIN_L2SIZE)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+arcstat_quiescence_noecho l2_size
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+log_must zpool export $TESTPOOL
+arcstat_quiescence_noecho l2_feeds
+
+typeset l2_rebuild_log_blk_start=$(get_arcstat l2_rebuild_log_blks)
+typeset l2_dh_log_blk=$(zdb -l $VDEV_CACHE | grep log_blk_count | \
+	awk '{print $2}')
+
+log_must zpool import -d $VDIR $TESTPOOL
+log_must zpool online $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+typeset l2_rebuild_log_blk_end=$(arcstat_quiescence_echo l2_rebuild_log_blks)
+
+log_must test $l2_dh_log_blk -eq $(( $l2_rebuild_log_blk_end - \
+	$l2_rebuild_log_blk_start ))
+log_must test $l2_dh_log_blk -gt 0
+
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+log_must zdb -lq $VDEV_CACHE
+
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "Off/onlining an L2ARC device results in rebuilding L2ARC, vdev not present."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_005_pos.ksh
new file mode 100755
index 0000000..8ad6485
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_005_pos.ksh

@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+#
+# DESCRIPTION:
+#	Off/onlining an L2ARC device results in rebuilding L2ARC, vdev present.
+#
+# STRATEGY:
+#	1. Create pool with a cache device.
+#	2. Create a random file in that pool and random read for 10 sec.
+#	3. Offline the L2ARC device.
+#	4. Read the amount of log blocks written from the header of the
+#		L2ARC device.
+#	5. Online the L2ARC device.
+#	6. Read the amount of log blocks rebuilt in arcstats and compare to
+#		(4).
+#	7. Check if the labels of the L2ARC device are intact.
+#
+
+verify_runnable "global"
+
+log_assert "Off/onlining an L2ARC device results in rebuilding L2ARC, vdev present."
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
+	log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE \
+		$rebuild_blocks_min_l2size
+}
+log_onexit cleanup
+
+# L2ARC_NOPREFETCH is set to 0 to let L2ARC handle prefetches
+typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
+typeset rebuild_blocks_min_l2size=$(get_tunable L2ARC_REBUILD_BLOCKS_MIN_L2SIZE)
+log_must set_tunable32 L2ARC_NOPREFETCH 0
+log_must set_tunable32 L2ARC_REBUILD_BLOCKS_MIN_L2SIZE 0
+
+typeset fill_mb=800
+typeset cache_sz=$(( floor($fill_mb / 2) ))
+export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M
+
+log_must truncate -s ${cache_sz}M $VDEV_CACHE
+
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+arcstat_quiescence_noecho l2_size
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+typeset l2_rebuild_log_blk_start=$(get_arcstat l2_rebuild_log_blks)
+typeset l2_dh_log_blk=$(zdb -l $VDEV_CACHE | grep log_blk_count | \
+	awk '{print $2}')
+
+log_must zpool online $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+typeset l2_rebuild_log_blk_end=$(arcstat_quiescence_echo l2_rebuild_log_blks)
+
+# Upon onlining the cache device we might write additional blocks to it
+# before it is marked for rebuild as the l2ad_* parameters are not cleared
+# when offlining the device. See comment in l2arc_rebuild_vdev().
+# So we cannot compare the amount of rebuilt log blocks to the amount of log
+# blocks read from the header of the device.
+log_must test $(( $l2_rebuild_log_blk_end - \
+	$l2_rebuild_log_blk_start )) -gt 0
+log_must test $l2_dh_log_blk -gt 0
+
+log_must zpool offline $TESTPOOL $VDEV_CACHE
+arcstat_quiescence_noecho l2_size
+
+log_must zdb -lq $VDEV_CACHE
+
+log_must zpool destroy -f $TESTPOOL
+
+log_pass "Off/onlining an L2ARC device results in rebuilding L2ARC, vdev present."

diff --git a/zfs/tests/zfs-tests/tests/functional/l2arc/setup.ksh b/zfs/tests/zfs-tests/tests/functional/l2arc/setup.ksh
new file mode 100755
index 0000000..0df61a9
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/l2arc/setup.ksh

@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/l2arc/l2arc.cfg
+
+verify_runnable "global"
+
+log_must rm -rf $VDIR
+log_must mkdir -p $VDIR
+log_must mkfile $SIZE $VDEV
+log_must mkfile $SIZE $VDEV1
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/large_files/large_files_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/large_files/large_files_002_pos.ksh
index f4d4e5a..255a8f8 100755
--- a/zfs/tests/zfs-tests/tests/functional/large_files/large_files_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/large_files/large_files_002_pos.ksh

@@ -49,7 +49,11 @@
 # Verify 'ulimit -f <size>' works
 log_must ulimit -f 1024
 log_mustnot sh -c 'dd if=/dev/zero of=$TESTDIR/ulimit_write_file bs=1M count=2'
-log_mustnot sh -c 'truncate -s2M $TESTDIR/ulimit_trunc_file'
-log_must rm $TESTDIR/ulimit_write_file $TESTDIR/ulimit_trunc_file
+log_must rm $TESTDIR/ulimit_write_file
+# FreeBSD allows the sparse file because space has not been allocated.
+if ! is_freebsd; then
+	log_mustnot sh -c 'truncate -s2M $TESTDIR/ulimit_trunc_file'
+	log_must rm $TESTDIR/ulimit_trunc_file
+fi
 
 log_pass "Successfully enforced 'ulimit -f' maximum file size"

diff --git a/zfs/tests/zfs-tests/tests/functional/largest_pool/largest_pool_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/largest_pool/largest_pool_001_pos.ksh
index 1bc8f72..6b51598 100755
--- a/zfs/tests/zfs-tests/tests/functional/largest_pool/largest_pool_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/largest_pool/largest_pool_001_pos.ksh

@@ -91,13 +91,13 @@
 		if ismounted $TESTPOOL/$TESTFS ; then
 			log_must zfs unmount $TESTPOOL/$TESTFS
 		fi
-		log_must zfs destroy $TESTPOOL/$TESTFS
+		destroy_dataset $TESTPOOL/$TESTFS
 	fi
 
 	destroy_pool $TESTPOOL
 
 	datasetexists $TESTPOOL2/$TESTVOL && \
-		log_must zfs destroy $TESTPOOL2/$TESTVOL
+		destroy_dataset $TESTPOOL2/$TESTVOL
 
 	destroy_pool $TESTPOOL2
 
@@ -154,8 +154,8 @@
 	log_note "Destroy zfs, volume & zpool"
 	log_must zfs destroy $TESTPOOL/$TESTFS
 	destroy_pool $TESTPOOL
-	log_must zfs destroy $TESTPOOL2/$TESTVOL
+	log_must_busy zfs destroy $TESTPOOL2/$TESTVOL
 	destroy_pool $TESTPOOL2
 done
 
-log_pass "Dateset can be created, mounted & destroy in largest pool succeeded."
+log_pass "Dataset can be created, mounted & destroy in largest pool succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/libzfs/Makefile.am b/zfs/tests/zfs-tests/tests/functional/libzfs/Makefile.am
index ae9be50..53cb635 100644
--- a/zfs/tests/zfs-tests/tests/functional/libzfs/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/libzfs/Makefile.am

@@ -9,12 +9,8 @@
 	setup.ksh \
 	libzfs_input.ksh
 
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/lib/libspl/include
-
 many_fds_LDADD = \
-	$(top_builddir)/lib/libzfs/libzfs.la
+	$(abs_top_builddir)/lib/libzfs/libzfs.la
 
 pkgexec_PROGRAMS = many_fds
 many_fds_SOURCES = many_fds.c

diff --git a/zfs/tests/zfs-tests/tests/functional/limits/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/limits/cleanup.ksh
index e78deac..2a84ab4 100755
--- a/zfs/tests/zfs-tests/tests/functional/limits/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/limits/cleanup.ksh

@@ -15,5 +15,7 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
 
+cleanup_user_group
 default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/limits/filesystem_limit.ksh b/zfs/tests/zfs-tests/tests/functional/limits/filesystem_limit.ksh
index a659792..fbfc141 100755
--- a/zfs/tests/zfs-tests/tests/functional/limits/filesystem_limit.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/limits/filesystem_limit.ksh

@@ -15,10 +15,12 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
 
 #
 # DESCRIPTION:
 # ZFS 'filesystem_limit' is enforced when executing various actions
+# NOTE: the limit should *not* be enforced if the user is allowed to change it.
 #
 # STRATEGY:
 # 1. Verify 'zfs create' and 'zfs clone' cannot exceed the filesystem_limit
@@ -28,14 +30,47 @@
 
 verify_runnable "both"
 
+#
+# The has_capability() function was first exported in the 4.10 Linux kernel
+# then backported to some LTS kernels.  Prior to this change there was no
+# mechanism to perform the needed permission check.  Therefore, this test
+# is expected to fail on older kernels and is skipped.
+#
+if is_linux; then
+	if [[ $(linux_version) -lt $(linux_version "4.10") ]]; then
+		log_unsupported "Requires has_capability() kernel function"
+	fi
+fi
+
 function setup
 {
-	log_must zfs create "$DATASET_TEST"
-	log_must zfs create "$DATASET_UTIL"
+	# We can't delegate 'mount' privs under Linux: to avoid issues with
+	# commands that may need to (re)mount datasets we set mountpoint=none
+	if is_linux; then
+		log_must zfs create -o mountpoint=none "$DATASET_TEST"
+		log_must zfs create -o mountpoint=none "$DATASET_UTIL"
+	else
+		log_must zfs create "$DATASET_TEST"
+		log_must zfs create "$DATASET_UTIL"
+	fi
+	if is_freebsd; then
+		# Ensure our non-root user has the permission to create the
+		# mountpoints and mount the filesystems.
+		sysctl vfs.usermount=1
+		log_must chmod 777 $(get_prop mountpoint "$DATASET_TEST")
+		log_must chmod 777 $(get_prop mountpoint "$DATASET_UTIL")
+	fi
+	log_must zfs allow -d -l $STAFF1 'create,mount,rename,clone,receive' \
+	    "$DATASET_TEST"
+	log_must zfs allow -d -l $STAFF1 'create,mount,rename,clone,receive' \
+	    "$DATASET_UTIL"
 }
 
 function cleanup
 {
+	if is_freebsd; then
+		sysctl vfs.usermount=0
+	fi
 	destroy_dataset "$DATASET_TEST" "-Rf"
 	destroy_dataset "$DATASET_UTIL" "-Rf"
 	rm -f $ZSTREAM
@@ -50,25 +85,39 @@
 
 # 1. Verify 'zfs create' and 'zfs clone' cannot exceed the filesystem_limit
 setup
+# NOTE: we allow 'canmount' to the non-root user so we can use 'log_must' with
+# 'user_run zfs create -o canmount=off' successfully
+log_must zfs allow -d -l $STAFF1 'canmount' "$DATASET_TEST"
 log_must zfs set filesystem_limit=1 "$DATASET_TEST"
-log_must zfs create "$DATASET_TEST/create"
-log_mustnot zfs create "$DATASET_TEST/create_exceed"
+log_must user_run $STAFF1 zfs create -o canmount=off "$DATASET_TEST/create"
+log_mustnot user_run $STAFF1 zfs create -o canmount=off "$DATASET_TEST/create_exceed"
 log_mustnot datasetexists "$DATASET_TEST/create_exceed"
 log_must zfs set filesystem_limit=2 "$DATASET_TEST"
 log_must zfs snapshot "$DATASET_TEST/create@snap"
-log_must zfs clone "$DATASET_TEST/create@snap" "$DATASET_TEST/clone"
-log_mustnot zfs clone "$DATASET_TEST/create@snap" "$DATASET_TEST/clone_exceed"
+log_must user_run $STAFF1 zfs clone -o canmount=off "$DATASET_TEST/create@snap" "$DATASET_TEST/clone"
+log_mustnot user_run $STAFF1 zfs clone -o canmount=off "$DATASET_TEST/create@snap" "$DATASET_TEST/clone_exceed"
 log_mustnot datasetexists "$DATASET_TEST/clone_exceed"
 log_must test "$(get_prop 'filesystem_count' "$DATASET_TEST")" == "2"
+# Verify filesystem_limit is *not* enforced for users allowed to change it
+log_must zfs create "$DATASET_TEST/create_notenforced_root"
+log_must zfs allow -l $STAFF1 'filesystem_limit' "$DATASET_TEST"
+log_must user_run $STAFF1 zfs create -o canmount=off "$DATASET_TEST/create_notenforced_user"
+log_must test "$(get_prop 'filesystem_count' "$DATASET_TEST")" == "4"
 cleanup
 
 # 2. Verify 'zfs rename' cannot move filesystems exceeding the limit
 setup
 log_must zfs set filesystem_limit=0 "$DATASET_UTIL"
 log_must zfs create "$DATASET_TEST/rename"
-log_mustnot zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed"
+log_mustnot user_run $STAFF1 zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed"
 log_mustnot datasetexists "$DATASET_UTIL/renamed"
 log_must test "$(get_prop 'filesystem_count' "$DATASET_UTIL")" == "0"
+# Verify filesystem_limit is *not* enforced for users allowed to change it
+log_must zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed_notenforced_root"
+log_must zfs rename "$DATASET_UTIL/renamed_notenforced_root" "$DATASET_TEST/rename"
+log_must zfs allow -l $STAFF1 'filesystem_limit' "$DATASET_UTIL"
+log_must user_run $STAFF1 zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed_notenforced_user"
+log_must datasetexists "$DATASET_UTIL/renamed_notenforced_user"
 cleanup
 
 # 3. Verify 'zfs receive' cannot exceed the limit
@@ -77,8 +126,14 @@
 log_must zfs create "$DATASET_UTIL/send"
 log_must zfs snapshot "$DATASET_UTIL/send@snap1"
 log_must eval "zfs send $DATASET_UTIL/send@snap1 > $ZSTREAM"
-log_mustnot eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_mustnot user_run $STAFF1 eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
 log_mustnot datasetexists "$DATASET_TEST/received"
 log_must test "$(get_prop 'filesystem_count' "$DATASET_TEST")" == "0"
+# Verify filesystem_limit is *not* enforced for users allowed to change it
+log_must eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_must zfs destroy -r "$DATASET_TEST/received"
+log_must zfs allow -l $STAFF1 'filesystem_limit' "$DATASET_TEST"
+log_must user_run $STAFF1 eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_must datasetexists "$DATASET_TEST/received"
 
 log_pass "'filesystem_limit' property is enforced"

diff --git a/zfs/tests/zfs-tests/tests/functional/limits/setup.ksh b/zfs/tests/zfs-tests/tests/functional/limits/setup.ksh
index af6edbe..94f3e7b 100755
--- a/zfs/tests/zfs-tests/tests/functional/limits/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/limits/setup.ksh

@@ -15,7 +15,14 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
 
 DISK=${DISKS%% *}
 
+cleanup_user_group
+
+# Create staff group and user
+log_must add_group $STAFF_GROUP
+log_must add_user $STAFF_GROUP $STAFF1
+
 default_volume_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/limits/snapshot_limit.ksh b/zfs/tests/zfs-tests/tests/functional/limits/snapshot_limit.ksh
index fa4b6e8..62f1446 100755
--- a/zfs/tests/zfs-tests/tests/functional/limits/snapshot_limit.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/limits/snapshot_limit.ksh

@@ -15,10 +15,12 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
 
 #
 # DESCRIPTION:
 # ZFS 'snapshot_limit' is enforced when executing various actions
+# NOTE: the limit should *not* be enforced if the user is allowed to change it.
 #
 # STRATEGY:
 # 1. Verify 'zfs snapshot' cannot exceed the snapshot_limit
@@ -29,14 +31,47 @@
 
 verify_runnable "both"
 
+#
+# The has_capability() function was first exported in the 4.10 Linux kernel
+# then backported to some LTS kernels.  Prior to this change there was no
+# mechanism to perform the needed permission check.  Therefore, this test
+# is expected to fail on older kernels and is skipped.
+#
+if is_linux; then
+	if [[ $(linux_version) -lt $(linux_version "4.10") ]]; then
+		log_unsupported "Requires has_capability() kernel function"
+	fi
+fi
+
 function setup
 {
-	log_must zfs create "$DATASET_TEST"
-	log_must zfs create "$DATASET_UTIL"
+	# We can't delegate 'mount' privs under Linux: to avoid issues with
+	# commands that may need to (re)mount datasets we set mountpoint=none
+	if is_linux; then
+		log_must zfs create -o mountpoint=none "$DATASET_TEST"
+		log_must zfs create -o mountpoint=none "$DATASET_UTIL"
+	else
+		log_must zfs create "$DATASET_TEST"
+		log_must zfs create "$DATASET_UTIL"
+	fi
+	if is_freebsd; then
+		# Ensure our non-root user has the permission to create the
+		# mountpoints and mount the filesystems.
+		sysctl vfs.usermount=1
+		log_must chmod 777 $(get_prop mountpoint "$DATASET_TEST")
+		log_must chmod 777 $(get_prop mountpoint "$DATASET_UTIL")
+	fi
+	log_must zfs allow -d -l $STAFF1 \
+	    'create,snapshot,rename,mount,promote,receive' "$DATASET_TEST"
+	log_must zfs allow -d -l $STAFF1 \
+	    'create,snapshot,rename,mount,promote,receive' "$DATASET_UTIL"
 }
 
 function cleanup
 {
+	if is_freebsd; then
+		sysctl vfs.usermount=0
+	fi
 	destroy_dataset "$DATASET_TEST" "-Rf"
 	destroy_dataset "$DATASET_UTIL" "-Rf"
 	rm -f $ZSTREAM
@@ -52,10 +87,15 @@
 # 1. Verify 'zfs snapshot' cannot exceed the snapshot_limit
 setup
 log_must zfs set snapshot_limit=1 "$DATASET_TEST"
-log_must zfs snapshot "$DATASET_TEST@snap"
-log_mustnot zfs snapshot "$DATASET_TEST@snap_exceed"
+log_must user_run $STAFF1 zfs snapshot "$DATASET_TEST@snap"
+log_mustnot user_run $STAFF1 zfs snapshot "$DATASET_TEST@snap_exceed"
 log_mustnot datasetexists "$DATASET_TEST@snap_exceed"
 log_must test "$(get_prop 'snapshot_count' "$DATASET_TEST")" == "1"
+# Verify snapshot_limit is *not* enforced for users allowed to change it
+log_must zfs snapshot "$DATASET_TEST@snap_notenforced_root"
+log_must zfs allow -l $STAFF1 'snapshot_limit' "$DATASET_TEST"
+log_must user_run $STAFF1 zfs snapshot "$DATASET_TEST@snap_notenforced_user"
+log_must test "$(get_prop 'snapshot_count' "$DATASET_TEST")" == "3"
 cleanup
 
 # 2. Verify 'zfs rename' cannot move snapshots exceeding the limit
@@ -63,9 +103,19 @@
 log_must zfs set snapshot_limit=0 "$DATASET_UTIL"
 log_must zfs create "$DATASET_TEST/rename"
 log_must zfs snapshot "$DATASET_TEST/rename@snap"
-log_mustnot zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed"
+log_mustnot user_run $STAFF1 \
+    zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed"
 log_mustnot datasetexists "$DATASET_UTIL/renamed"
 log_must test "$(get_prop 'snapshot_count' "$DATASET_UTIL")" == "0"
+# Verify snapshot_limit is *not* enforced for users allowed to change it
+log_must zfs rename "$DATASET_TEST/rename" \
+    "$DATASET_UTIL/renamed_notenforced_root"
+log_must zfs create "$DATASET_TEST/rename"
+log_must zfs snapshot "$DATASET_TEST/rename@snap"
+log_must zfs allow -l $STAFF1 'snapshot_limit' "$DATASET_UTIL"
+log_must user_run $STAFF1 \
+    zfs rename "$DATASET_TEST/rename" "$DATASET_UTIL/renamed_notenforced_user"
+log_must test "$(get_prop 'snapshot_count' "$DATASET_UTIL")" == "2"
 cleanup
 
 # 3. Verify 'zfs promote' cannot exceed the limit
@@ -74,9 +124,15 @@
 log_must zfs create "$DATASET_TEST/promote"
 log_must zfs snapshot "$DATASET_TEST/promote@snap"
 log_must zfs clone "$DATASET_TEST/promote@snap" "$DATASET_UTIL/promoted"
-log_mustnot zfs promote "$DATASET_UTIL/promoted"
+log_mustnot user_run $STAFF1 zfs promote "$DATASET_UTIL/promoted"
 log_mustnot datasetexists "$DATASET_UTIL/promoted@snap"
 log_must test "$(get_prop 'snapshot_count' "$DATASET_UTIL")" == "0"
+# Verify snapshot_limit is *not* enforced for users allowed to change it
+log_must zfs promote "$DATASET_UTIL/promoted"
+log_must zfs promote "$DATASET_TEST/promote"
+log_must zfs allow -l $STAFF1 'snapshot_limit' "$DATASET_UTIL"
+log_must user_run $STAFF1 zfs promote "$DATASET_UTIL/promoted"
+log_must test "$(get_prop 'snapshot_count' "$DATASET_UTIL")" == "1"
 cleanup
 
 # 4. Verify 'zfs receive' cannot exceed the limit
@@ -85,15 +141,26 @@
 log_must zfs create "$DATASET_UTIL/send"
 log_must zfs snapshot "$DATASET_UTIL/send@snap1"
 log_must eval "zfs send $DATASET_UTIL/send@snap1 > $ZSTREAM"
-log_mustnot eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_mustnot user_run $STAFF1 \
+    eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
 log_mustnot datasetexists "$DATASET_TEST/received"
 log_must test "$(get_prop 'snapshot_count' "$DATASET_TEST")" == "0"
 log_must zfs set snapshot_limit=1 "$DATASET_TEST"
-log_must eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_must user_run $STAFF1 \
+    eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
 log_must zfs snapshot "$DATASET_UTIL/send@snap2"
 log_must eval "zfs send -i @snap1 $DATASET_UTIL/send@snap2 > $ZSTREAM"
-log_mustnot eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_mustnot user_run $STAFF1 \
+    eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
 log_mustnot datasetexists "$DATASET_TEST/received@snap2"
 log_must test "$(get_prop 'snapshot_count' "$DATASET_TEST")" == "1"
+# Verify snapshot_limit is *not* enforced for users allowed to change it
+log_must eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_must zfs snapshot "$DATASET_UTIL/send@snap3"
+log_must eval "zfs send -i @snap2 $DATASET_UTIL/send@snap3 > $ZSTREAM"
+log_must zfs allow -l $STAFF1 'snapshot_limit' "$DATASET_TEST"
+log_must user_run $STAFF1 \
+    eval "zfs receive $DATASET_TEST/received < $ZSTREAM"
+log_must test "$(get_prop 'snapshot_count' "$DATASET_TEST")" == "3"
 
 log_pass "'snapshot_limit' property is enforced"

diff --git a/zfs/tests/zfs-tests/tests/functional/link_count/link_count_001.ksh b/zfs/tests/zfs-tests/tests/functional/link_count/link_count_001.ksh
index e121787..3ab3fbe 100755
--- a/zfs/tests/zfs-tests/tests/functional/link_count/link_count_001.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/link_count/link_count_001.ksh

@@ -49,6 +49,10 @@
 export ITERS=10
 export NUMFILES=10000
 
+if is_freebsd; then
+	log_unsupported "Not applicable on FreeBSD"
+fi
+
 # Detect and make sure this test must be executed on a multi-process system
 if ! is_mp; then
 	log_unsupported "This test requires a multi-processor system."

diff --git a/zfs/tests/zfs-tests/tests/functional/log_spacemap/Makefile.am b/zfs/tests/zfs-tests/tests/functional/log_spacemap/Makefile.am
new file mode 100644
index 0000000..a1e5234
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/log_spacemap/Makefile.am

@@ -0,0 +1,2 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/log_spacemap
+dist_pkgdata_SCRIPTS = log_spacemap_import_logs.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/log_spacemap/log_spacemap_import_logs.ksh b/zfs/tests/zfs-tests/tests/functional/log_spacemap/log_spacemap_import_logs.ksh
new file mode 100755
index 0000000..fca0e8e
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/log_spacemap/log_spacemap_import_logs.ksh

@@ -0,0 +1,81 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Log spacemaps are generally destroyed at export in order to
+# not induce performance overheads at import time. As a result,
+# the log spacemap codepaths that read the logs in import times
+# are not tested outside of ztest and pools with DEBUG bits doing
+# many imports/exports while running the test suite.
+#
+# This test uses an internal tunable and forces ZFS to keep the
+# log spacemaps at export, and then re-imports the pool, thus
+# providing explicit testing of those codepaths. It also uses
+# another tunable to load all the metaslabs when the pool is
+# re-imported so more assertions and verifications will be hit.
+#
+# STRATEGY:
+#	1. Create pool.
+#	2. Do a couple of writes to generate some data for spacemap logs.
+#	3. Set tunable to keep logs after export.
+#	4. Export pool and verify that there are logs with zdb.
+#	5. Set tunable to load all metaslabs at import.
+#	6. Import pool.
+#	7. Reset tunables.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0
+	log_must set_tunable64 METASLAB_DEBUG_LOAD 0
+	if poolexists $LOGSM_POOL; then
+		log_must zpool destroy -f $LOGSM_POOL
+	fi
+}
+log_onexit cleanup
+
+LOGSM_POOL="logsm_import"
+TESTDISK="$(echo $DISKS | cut -d' ' -f1)"
+
+log_must zpool create -o cachefile=none -f $LOGSM_POOL $TESTDISK
+log_must zfs create $LOGSM_POOL/fs
+
+log_must dd if=/dev/urandom of=/$LOGSM_POOL/fs/00 bs=128k count=10
+log_must sync
+log_must dd if=/dev/urandom of=/$LOGSM_POOL/fs/00 bs=128k count=10
+log_must sync
+
+log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1
+log_must zpool export $LOGSM_POOL
+
+LOGSM_COUNT=$(zdb -m -e $LOGSM_POOL | grep "Log Spacemap object" | wc -l)
+if (( LOGSM_COUNT == 0 )); then
+	log_fail "Pool does not have any log spacemaps after being exported"
+fi
+
+log_must set_tunable64 METASLAB_DEBUG_LOAD 1
+log_must zpool import $LOGSM_POOL
+
+log_pass "Log spacemaps imported with no errors"

diff --git a/zfs/tests/zfs-tests/tests/functional/migration/migration.cfg b/zfs/tests/zfs-tests/tests/functional/migration/migration.cfg
index 7d86436..12a5a77 100644
--- a/zfs/tests/zfs-tests/tests/functional/migration/migration.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/migration/migration.cfg

@@ -60,7 +60,6 @@
 			log_fail "$ZFS_DISK not supported for partitioning."
 		fi
 	else
-		export DEV_DSKDIR="/dev"
 		ZFSSIDE_DISK=${SINGLE_DISK}s0
 		NONZFSSIDE_DISK=${SINGLE_DISK}s1
 	fi
@@ -93,7 +92,6 @@
 			log_fail "$NONZFS_DISK not supported for partitioning."
 		fi
 	else
-		export DEV_DSKDIR="/dev"
 		ZFSSIDE_DISK=${ZFS_DISK}s0
 		NONZFSSIDE_DISK=${NONZFS_DISK}s0
 	fi

diff --git a/zfs/tests/zfs-tests/tests/functional/migration/setup.ksh b/zfs/tests/zfs-tests/tests/functional/migration/setup.ksh
index cae66aa..58edc0a 100755
--- a/zfs/tests/zfs-tests/tests/functional/migration/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/migration/setup.ksh

@@ -57,9 +57,9 @@
 rm -rf $NONZFS_TESTDIR  || log_unresolved Could not remove $NONZFS_TESTDIR
 mkdir -p $NONZFS_TESTDIR || log_unresolved Could not create $NONZFS_TESTDIR
 
-echo "y" | newfs -v ${DEV_DSKDIR}/$NONZFS_DISK
+new_fs ${DEV_DSKDIR}/$NONZFS_DISK
 (( $? != 0 )) &&
-	log_untested "Unable to setup a UFS file system"
+	log_untested "Unable to setup a $NEWFS_DEFAULT_FS file system"
 
 log_must mount ${DEV_DSKDIR}/$NONZFS_DISK $NONZFS_TESTDIR
 

diff --git a/zfs/tests/zfs-tests/tests/functional/mmap/Makefile.am b/zfs/tests/zfs-tests/tests/functional/mmap/Makefile.am
index 2adc398..5264059 100644
--- a/zfs/tests/zfs-tests/tests/functional/mmap/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/mmap/Makefile.am

@@ -2,9 +2,12 @@
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
+	mmap_mixed.ksh \
 	mmap_read_001_pos.ksh \
 	mmap_write_001_pos.ksh \
-	mmap_libaio_001_pos.ksh
+	mmap_libaio_001_pos.ksh \
+	mmap_seek_001_pos.ksh \
+	mmap_sync_001_pos.ksh
 
 dist_pkgdata_DATA = \
 	mmap.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
new file mode 100755
index 0000000..6c8246d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh

@@ -0,0 +1,86 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/mmap/mmap.cfg
+
+#
+# DESCRIPTION:
+# 	Verify mixed buffered and mmap IO.
+#
+# STRATEGY:
+#	1. Create an empty file.
+#	2. Start a background buffered read/write fio to the file.
+#	3. Start a background mmap read/write fio to the file.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	log_must rm -f "$tmp_file"
+}
+
+log_assert "Verify mixed buffered and mmap IO"
+
+log_onexit cleanup
+
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+tmp_file=$mntpnt/file
+bs=$((128 * 1024))
+blocks=64
+size=$((bs * blocks))
+runtime=60
+
+log_must dd if=/dev/zero of=$tmp_file bs=$bs count=$blocks
+
+# Buffered IO writes
+log_must eval "fio --filename=$tmp_file --name=buffer-write \
+	--rw=randwrite --size=$size --bs=$bs --direct=0 --numjobs=1 \
+	--ioengine=sync --fallocate=none --group_reporting --minimal \
+	--runtime=$runtime --time_based --norandommap &"
+
+# Buffered IO reads
+log_must eval "fio --filename=$tmp_file --name=buffer-read \
+	--rw=randread --size=$size --bs=$bs --direct=0 --numjobs=1 \
+	--ioengine=sync --fallocate=none --group_reporting --minimal \
+	--runtime=$runtime --time_based --norandommap &"
+
+# mmap IO writes
+log_must eval "fio --filename=$tmp_file --name=mmap-write \
+	--rw=randwrite --size=$size --bs=$bs --numjobs=1 \
+	--ioengine=mmap --fallocate=none --group_reporting --minimal \
+	--runtime=$runtime --time_based --norandommap &"
+
+# mmap IO reads
+log_must eval "fio --filename=$tmp_file --name=mmap-read \
+	--rw=randread --size=$size --bs=$bs --numjobs=1 \
+	--ioengine=mmap --fallocate=none --group_reporting --minimal \
+	--runtime=$runtime --time_based --norandommap &"
+
+log_must wait
+
+log_pass "Verfied mixed buffered and mmap IO"

diff --git a/zfs/tests/zfs-tests/tests/functional/mmap/mmap_read_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_read_001_pos.ksh
index 42e1f73..470f10d 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmap/mmap_read_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_read_001_pos.ksh

@@ -40,7 +40,7 @@
 # 1. Create a pool & dataset
 # 2. Call readmmap binary
 # 3. unmount this file system
-# 4. Verify the integrity of this pool & dateset
+# 4. Verify the integrity of this pool & dataset
 #
 
 verify_runnable "global"

diff --git a/zfs/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh
new file mode 100755
index 0000000..6188549
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh

@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/mmap/mmap.cfg
+
+#
+# DESCRIPTION:
+# lseek() data/holes for an mmap()'d file.
+#
+# STRATEGY:
+# 1. Enable compression and hole reporting for dirty files.
+# 2. Call mmap_seek binary test case for various record sizes.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	log_must zfs set compression=off $TESTPOOL/$TESTFS
+	log_must zfs set recordsize=128k $TESTPOOL/$TESTFS
+	log_must rm -f $TESTDIR/test-mmap-file
+	log_must set_tunable64 DMU_OFFSET_NEXT_SYNC $dmu_offset_next_sync
+}
+
+log_assert "lseek() data/holes for an mmap()'d file."
+
+log_onexit cleanup
+
+# Enable hole reporting for dirty files.
+typeset dmu_offset_next_sync=$(get_tunable DMU_OFFSET_NEXT_SYNC)
+log_must set_tunable64 DMU_OFFSET_NEXT_SYNC 1
+
+# Compression must be enabled to convert zero'd blocks to holes.
+# This behavior is checked by the mmap_seek test.
+log_must zfs set compression=on $TESTPOOL/$TESTFS
+
+for bs in 4096 8192 16384 32768 65536 131072; do
+	log_must zfs set recordsize=$bs $TESTPOOL/$TESTFS
+	log_must mmap_seek $TESTDIR/test-mmap-file $((1024*1024)) $bs
+	log_must rm $TESTDIR/test-mmap-file
+done
+
+log_pass "lseek() data/holes for an mmap()'d file succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh
new file mode 100755
index 0000000..b764d66
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh

@@ -0,0 +1,63 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# msync()s of mmap()'ed file should complete quickly during
+# background dirty page writebacks by the kernel.
+#
+
+function cleanup
+{
+	log_must eval "echo $saved_vm_dirty_expire_centisecs > /proc/sys/vm/dirty_expire_centisecs"
+	log_must eval "echo $saved_vm_dirty_background_ratio > /proc/sys/vm/dirty_background_ratio"
+	log_must eval "echo $saved_vm_dirty_writeback_centisecs > /proc/sys/vm/dirty_writeback_centisecs"
+
+	# revert to some sensible defaults if the values we saved
+	# were incorrect due to a previous run being interrupted
+	if [ $(</proc/sys/vm/dirty_expire_centisecs) -eq 1 ]; then
+		log_must eval "echo 3000 > /proc/sys/vm/dirty_expire_centisecs"
+	fi
+
+	if [ $(</proc/sys/vm/dirty_background_ratio) -eq 0 ]; then
+		log_must eval "echo 10 > /proc/sys/vm/dirty_background_ratio"
+	fi
+
+	if [ $(</proc/sys/vm/dirty_writeback_centisecs) -eq 1 ]; then
+		log_must eval "echo 500 > /proc/sys/vm/dirty_writeback_centisecs"
+	fi
+}
+
+if ! is_linux; then
+	log_unsupported "Only supported on Linux, requires /proc/sys/vm/ tunables"
+fi
+
+log_onexit cleanup
+log_assert "Run the tests for mmap_sync"
+
+read -r saved_vm_dirty_expire_centisecs < /proc/sys/vm/dirty_expire_centisecs
+read -r saved_vm_dirty_background_ratio < /proc/sys/vm/dirty_background_ratio
+read -r saved_vm_dirty_writeback_centisecs < /proc/sys/vm/dirty_writeback_centisecs
+
+log_must eval "echo 1 > /proc/sys/vm/dirty_expire_centisecs"
+log_must eval "echo 1 > /proc/sys/vm/dirty_background_bytes"
+log_must eval "echo 1 > /proc/sys/vm/dirty_writeback_centisecs"
+
+log_must mmap_sync
+log_pass "mmap_sync tests passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/cleanup.ksh
index 8146f77..b41d6cc 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/cleanup.ksh

@@ -23,6 +23,6 @@
 
 verify_runnable "global"
 
-log_must set_tunable64 zfs_multihost_history $MMP_HISTORY_OFF
+log_must set_tunable64 MULTIHOST_HISTORY $MMP_HISTORY_OFF
 
 log_pass "mmp cleanup passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp.kshlib b/zfs/tests/zfs-tests/tests/functional/mmp/mmp.kshlib
index fda57c0..661cbf3 100644
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp.kshlib

@@ -173,8 +173,8 @@
 
 	typeset seconds=0
 	typeset devices=${#DISK[@]}
-	typeset import_intervals=$(get_tunable zfs_multihost_import_intervals)
-	typeset import_interval=$(get_tunable zfs_multihost_interval)
+	typeset import_intervals=$(get_tunable MULTIHOST_IMPORT_INTERVALS)
+	typeset import_interval=$(get_tunable MULTIHOST_INTERVAL)
 	typeset tmpfile=$(mktemp)
 	typeset mmp_fail
 	typeset mmp_write
@@ -241,8 +241,8 @@
 
 function clear_mmp_history
 {
-	log_must set_tunable64 zfs_multihost_history $MMP_HISTORY_OFF
-	log_must set_tunable64 zfs_multihost_history $MMP_HISTORY
+	log_must set_tunable64 MULTIHOST_HISTORY $MMP_HISTORY_OFF
+	log_must set_tunable64 MULTIHOST_HISTORY $MMP_HISTORY
 }
 
 function count_skipped_mmp_writes # pool duration

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh
index 64ed9bf..6e7bb63 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh

@@ -43,7 +43,7 @@
 {
 	default_cleanup_noexit
 	log_must mmp_clear_hostid
-	log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
+	log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
 }
 
 log_assert "multihost=on|off inactive pool activity checks"
@@ -103,7 +103,7 @@
 # 9. Verify activity check duration based on mmp_write and mmp_fail
 # Specify a short test via tunables but import pool imported while
 # tunables set to default duration.
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN
 log_must mmp_clear_hostid
 log_must mmp_set_hostid $HOSTID1
 log_must import_activity_check $TESTPOOL "-f" $MMP_TEST_DURATION_DEFAULT

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_interval.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_interval.ksh
index fb44d61..0c080ab 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_interval.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_interval.ksh

@@ -19,11 +19,11 @@
 #
 
 # DESCRIPTION:
-#	zfs_multihost_interval should only accept valid values.
+#	MULTIHOST_INTERVAL should only accept valid values.
 #
 # STRATEGY:
-#	1. Set zfs_multihost_interval to invalid values (negative).
-#	2. Set zfs_multihost_interval to valid values.
+#	1. Set MULTIHOST_INTERVAL to invalid values (negative).
+#	2. Set MULTIHOST_INTERVAL to valid values.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -34,14 +34,14 @@
 
 function cleanup
 {
-	log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
+	log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
 }
 
-log_assert "zfs_multihost_interval cannot be set to an invalid value"
+log_assert "MULTIHOST_INTERVAL cannot be set to an invalid value"
 log_onexit cleanup
 
-log_mustnot set_tunable64 zfs_multihost_interval -1
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
+log_mustnot set_tunable64 MULTIHOST_INTERVAL -1
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
 
-log_pass "zfs_multihost_interval cannot be set to an invalid value"
+log_pass "MULTIHOST_INTERVAL cannot be set to an invalid value"

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_off.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_off.ksh
index 8bef86a..29d771d 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_off.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_off.ksh

@@ -23,7 +23,7 @@
 #
 # STRATEGY:
 #	1. Set multihost=off (disables mmp)
-#	2. Set zfs_txg_timeout to large value
+#	2. Set TXG_TIMEOUT to large value
 #	3. Create a zpool
 #	4. Find the current "best" uberblock
 #	5. Sleep for enough time for uberblocks to change
@@ -44,8 +44,8 @@
 function cleanup
 {
 	default_cleanup_noexit
-	log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT
-	log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
+	log_must set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_DEFAULT
+	log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
 	log_must rm -f $PREV_UBER $CURR_UBER
 	log_must mmp_clear_hostid
 }
@@ -53,8 +53,8 @@
 log_assert "mmp thread won't write uberblocks with multihost=off"
 log_onexit cleanup
 
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN
-log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_LONG
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN
+log_must set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_LONG
 log_must mmp_set_hostid $HOSTID1
 
 default_setup_noexit $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh
index 07384c6..01cca61 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh

@@ -39,7 +39,7 @@
 function cleanup
 {
 	default_cleanup_noexit
-	log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT
+	log_must set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_DEFAULT
 	log_must rm -f $PREV_UBER $CURR_UBER
 	log_must mmp_clear_hostid
 }
@@ -47,7 +47,7 @@
 log_assert "mmp thread writes uberblocks (MMP)"
 log_onexit cleanup
 
-log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_LONG
+log_must set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_LONG
 log_must mmp_set_hostid $HOSTID1
 
 default_setup_noexit $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh
index 9c4552b..007288a 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh

@@ -22,7 +22,7 @@
 #	Ensure that MMP updates uberblocks with MMP info at expected intervals. 
 #
 # STRATEGY:
-#	1. Set zfs_txg_timeout to large value
+#	1. Set TXG_TIMEOUT to large value
 #	2. Create a zpool
 #	3. Clear multihost history
 #	4. Sleep, then collect count of uberblocks written
@@ -47,15 +47,15 @@
 function cleanup
 {
 	default_cleanup_noexit
-	log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
-	set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT
+	log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
+	set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_DEFAULT
 	log_must mmp_clear_hostid
 }
 
 log_assert "Ensure MMP uberblocks update at the correct interval"
 log_onexit cleanup
 
-log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_LONG
+log_must set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_LONG
 log_must mmp_set_hostid $HOSTID1
 
 default_setup_noexit "$DISKS"
@@ -73,7 +73,7 @@
 	log_fail "More uberblock writes occurred than expected ($EXPECTED)"
 fi
 
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN
 SEQ_BEFORE=$(zdb -luuuu ${DISK[0]} | awk '/mmp_seq/ {if ($NF>max) max=$NF}; END {print max}')
 sleep 1
 SEQ_AFTER=$(zdb  -luuuu ${DISK[0]} | awk '/mmp_seq/ {if ($NF>max) max=$NF}; END {print max}')

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh
index 842df28..6e3d1fe 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh

@@ -19,15 +19,15 @@
 #
 
 # DESCRIPTION:
-#	Ensure that the MMP thread is notified when zfs_multihost_interval is
-#	reduced, and that changes to zfs_multihost_interval and
-#	zfs_multihost_fail_intervals do not trigger pool suspensions.
+#	Ensure that the MMP thread is notified when MULTIHOST_INTERVAL is
+#	reduced, and that changes to MULTIHOST_INTERVAL and
+#	MULTIHOST_FAIL_INTERVALS do not trigger pool suspensions.
 #
 # STRATEGY:
-#	1. Set zfs_multihost_interval to much longer than the test duration
+#	1. Set MULTIHOST_INTERVAL to much longer than the test duration
 #	2. Create a zpool and enable multihost
 #	3. Verify no MMP writes occurred
-#	4. Set zfs_multihost_interval to 1 second
+#	4. Set MULTIHOST_INTERVAL to 1 second
 #	5. Sleep briefly
 #	6. Verify MMP writes began
 #	7. Verify mmp_fail and mmp_write in uberblock reflect tunables
@@ -43,34 +43,34 @@
 function cleanup
 {
 	default_cleanup_noexit
-	log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
-	log_must set_tunable64 zfs_multihost_fail_intervals \
+	log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
+	log_must set_tunable64 MULTIHOST_FAIL_INTERVALS \
 	    $MMP_FAIL_INTERVALS_DEFAULT
 	log_must mmp_clear_hostid
 }
 
-log_assert "mmp threads notified when zfs_multihost_interval reduced"
+log_assert "mmp threads notified when MULTIHOST_INTERVAL reduced"
 log_onexit cleanup
 
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_HOUR
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_HOUR
 log_must mmp_set_hostid $HOSTID1
 
 default_setup_noexit $DISK
 log_must zpool set multihost=on $TESTPOOL
 
 clear_mmp_history
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
 uber_count=$(count_mmp_writes $TESTPOOL 1)
 
 if [ $uber_count -eq 0 ]; then
-	log_fail "ERROR: mmp writes did not start when zfs_multihost_interval reduced"
+	log_fail "ERROR: mmp writes did not start when MULTIHOST_INTERVAL reduced"
 fi
 
 # 7. Verify mmp_write and mmp_fail are written
 for fails in $(seq $MMP_FAIL_INTERVALS_MIN $((MMP_FAIL_INTERVALS_MIN*2))); do
 	for interval in $(seq $MMP_INTERVAL_MIN 200 $MMP_INTERVAL_DEFAULT); do
-		log_must set_tunable64 zfs_multihost_fail_intervals $fails
-		log_must set_tunable64 zfs_multihost_interval $interval
+		log_must set_tunable64 MULTIHOST_FAIL_INTERVALS $fails
+		log_must set_tunable64 MULTIHOST_INTERVAL $interval
 		log_must sync_pool $TESTPOOL
 		typeset mmp_fail=$(zdb $TESTPOOL 2>/dev/null |
 		    awk '/mmp_fail/ {print $NF}')
@@ -86,10 +86,10 @@
 done
 
 
-# 8. Repeatedly change zfs_multihost_interval and fail_intervals
+# 8. Repeatedly change MULTIHOST_INTERVAL and fail_intervals
 for x in $(seq 10); do
 	typeset new_interval=$(( (RANDOM % 20 + 1) * $MMP_INTERVAL_MIN ))
-	log_must set_tunable64 zfs_multihost_interval $new_interval
+	log_must set_tunable64 MULTIHOST_INTERVAL $new_interval
 	typeset action=$((RANDOM %10))
 	if [ $action -eq 0 ]; then
 		log_must zpool export -a
@@ -106,14 +106,14 @@
 		log_must zpool import -f $TESTPOOL
 	elif [ $action -eq 3 ]; then
 		log_must zpool export -F $TESTPOOL
-		log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN
+		log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN
 		log_must zpool import $TESTPOOL
 	elif [ $action -eq 4 ]; then
-		log_must set_tunable64 zfs_multihost_fail_intervals \
+		log_must set_tunable64 MULTIHOST_FAIL_INTERVALS \
 		    $((RANDOM % MMP_FAIL_INTERVALS_DEFAULT))
 	fi
 	sleep 5
 done
 
 
-log_pass "mmp threads notified when zfs_multihost_interval reduced"
+log_pass "mmp threads notified when MULTIHOST_INTERVAL reduced"

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh
index 7504caa..b6bdc68 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh

@@ -57,8 +57,8 @@
 # Step 2
 log_must mmp_set_hostid $HOSTID1
 log_must zpool set multihost=on $MMP_POOL
-set_tunable64 zfs_multihost_history 0
-set_tunable64 zfs_multihost_history 40
+set_tunable64 MULTIHOST_HISTORY 0
+set_tunable64 MULTIHOST_HISTORY 40
 
 # Step 3
 # default settings, every leaf written once/second

diff --git a/zfs/tests/zfs-tests/tests/functional/mmp/setup.ksh b/zfs/tests/zfs-tests/tests/functional/mmp/setup.ksh
index c91f619..b1e5431 100755
--- a/zfs/tests/zfs-tests/tests/functional/mmp/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mmp/setup.ksh

@@ -27,8 +27,8 @@
 	log_unsupported "System has existing $HOSTID_FILE file"
 fi
 
-log_must set_tunable64 zfs_multihost_history $MMP_HISTORY
-log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
-log_must set_tunable64 zfs_multihost_fail_intervals $MMP_FAIL_INTERVALS_DEFAULT
+log_must set_tunable64 MULTIHOST_HISTORY $MMP_HISTORY
+log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT
+log_must set_tunable64 MULTIHOST_FAIL_INTERVALS $MMP_FAIL_INTERVALS_DEFAULT
 
 log_pass "mmp setup pass"

diff --git a/zfs/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh b/zfs/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
index 0d26280..6130e2c 100755
--- a/zfs/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh

@@ -32,7 +32,7 @@
 
 function cleanup
 {
-	log_must set_tunable32 zfs_unlink_suspend_progress $default_unlink_sp
+	log_must set_tunable32 UNLINK_SUSPEND_PROGRESS $default_unlink_sp
 	for fs in $(seq 1 3); do
 		mounted $TESTDIR.$fs || zfs mount $TESTPOOL/$TESTFS.$fs
 		rm -f $TESTDIR.$fs/file-*
@@ -66,8 +66,7 @@
 }
 
 
-UNLINK_SP_PARAM=/sys/module/zfs/parameters/zfs_unlink_suspend_progress
-default_unlink_sp=$(get_tunable zfs_unlink_suspend_progress)
+default_unlink_sp=$(get_tunable UNLINK_SUSPEND_PROGRESS)
 
 log_onexit cleanup
 
@@ -89,7 +88,7 @@
 			log_must xattrtest -f 175 -x 3 -r -k -p $TESTDIR.$fs
 		fi
 
-		log_must set_tunable32 zfs_unlink_suspend_progress 1
+		log_must set_tunable32 UNLINK_SUSPEND_PROGRESS 1
 		log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
 
 		# build up unlinked set
@@ -106,7 +105,7 @@
 		log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
 
 		# confirm we can drain and add to unlinked set at the same time
-		log_must set_tunable32 zfs_unlink_suspend_progress 0
+		log_must set_tunable32 UNLINK_SUSPEND_PROGRESS 0
 		log_must zfs umount $TESTPOOL/$TESTFS.$fs
 		log_must zfs mount $TESTPOOL/$TESTFS.$fs
 		for fn in $(seq 101 175); do

diff --git a/zfs/tests/zfs-tests/tests/functional/mount/umountall_001.ksh b/zfs/tests/zfs-tests/tests/functional/mount/umountall_001.ksh
index b8c89c6..814c831 100755
--- a/zfs/tests/zfs-tests/tests/functional/mount/umountall_001.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mount/umountall_001.ksh

@@ -45,6 +45,8 @@
 # Append our ZFS filesystems to the list, not worrying about duplicates.
 if is_linux; then
 	typeset mounts=$(mount | awk '{if ($5 == "zfs") print $3}')
+elif is_freebsd; then
+	typeset mounts=$(mount -p | awk '{if ($3 == "zfs") print $2}')
 else
 	typeset mounts=$(mount -p | awk '{if ($4 == "zfs") print $3}')
 fi
@@ -60,6 +62,9 @@
 	if [[ -z $mounts ]]; then
 		mounts=$(awk '/zfs/ { print $2 }' /proc/mounts)
 	fi
+elif is_freebsd; then
+	# Umountall and umount not supported on FreeBSD
+	mounts=$(mount -t zfs | sort -r | awk '{print $3}')
 else
 	mounts=$(umountall -n -F zfs 2>&1 | awk '{print $2}')
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/mv_files/mv_files_common.kshlib b/zfs/tests/zfs-tests/tests/functional/mv_files/mv_files_common.kshlib
index 24b3fab..6b92550 100644
--- a/zfs/tests/zfs-tests/tests/functional/mv_files/mv_files_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/mv_files/mv_files_common.kshlib

@@ -152,8 +152,7 @@
 #
 function mv_files
 {
-
-        find $1 -type f -print | xargs -i \
+        find $1 -type f -print | xargs -I "{}" \
                 mv {} $2 > /dev/null 2>&1
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/mv_files/random_creation.ksh b/zfs/tests/zfs-tests/tests/functional/mv_files/random_creation.ksh
index 45c46f8..05ddf62 100755
--- a/zfs/tests/zfs-tests/tests/functional/mv_files/random_creation.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/mv_files/random_creation.ksh

@@ -11,7 +11,7 @@
 log_must mkdir "${DIR}"
 
 count=0
-for i in $(shuf -i 1-"${RC_PASS1}") ; do
+for i in $(range_shuffle 1 "${RC_PASS1}") ; do
     if ! touch "${DIR}/${i}" ; then
 	    log_fail "error creating ${i} after ${count} files"
     fi

diff --git a/zfs/tests/zfs-tests/tests/functional/no_space/Makefile.am b/zfs/tests/zfs-tests/tests/functional/no_space/Makefile.am
index c2e42bc..31584fb 100644
--- a/zfs/tests/zfs-tests/tests/functional/no_space/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/no_space/Makefile.am

@@ -5,7 +5,8 @@
 	enospc_001_pos.ksh \
 	enospc_002_pos.ksh \
 	enospc_003_pos.ksh \
-	enospc_df.ksh
+	enospc_df.ksh \
+	enospc_rm.ksh
 
 dist_pkgdata_DATA = \
 	enospc.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/no_space/enospc_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/no_space/enospc_002_pos.ksh
index db6ee6b..ffd14f4 100755
--- a/zfs/tests/zfs-tests/tests/functional/no_space/enospc_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/no_space/enospc_002_pos.ksh

@@ -49,7 +49,7 @@
 #
 log_note "Writing files until ENOSPC."
 
-for i in $(seq 30); do
+for i in $(seq 100); do
 	file_write -o create -f $TESTDIR/file.$i -b $BLOCKSZ \
 	    -c $NUM_WRITES -d $DATA
 	ret=$?
@@ -61,9 +61,6 @@
 
 log_mustnot_expect space zfs create $TESTPOOL/$TESTFS/subfs
 log_mustnot_expect space zfs clone $TESTPOOL/$TESTFS@snap $TESTPOOL/clone
-log_mustnot_expect space zfs snapshot $TESTPOOL/$TESTFS@snap2
-log_mustnot_expect space zfs bookmark \
-    $TESTPOOL/$TESTFS@snap $TESTPOOL/$TESTFS#bookmark
 
 log_must zfs send $TESTPOOL/$TESTFS@snap > $TEST_BASE_DIR/stream.$$
 log_mustnot_expect space zfs receive $TESTPOOL/$TESTFS/recvd < $TEST_BASE_DIR/stream.$$

diff --git a/zfs/tests/zfs-tests/tests/functional/no_space/enospc_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/no_space/enospc_003_pos.ksh
index 40aa500..496e2a0 100755
--- a/zfs/tests/zfs-tests/tests/functional/no_space/enospc_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/no_space/enospc_003_pos.ksh

@@ -44,25 +44,28 @@
 
 function cleanup
 {
-	log_must zpool destroy $TESTPOOL1
+	poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+	rm -f $testfile0
 }
 
 log_onexit cleanup
 
 log_assert "ENOSPC is returned on pools with large physical block size"
 
-log_must zpool create $TESTPOOL1 -o ashift=13 $DISK_LARGE
+typeset testfile0=${TESTDIR}/testfile0
+
+log_must zpool create -o ashift=13 $TESTPOOL1 $DISK_LARGE
 log_must zfs set mountpoint=$TESTDIR $TESTPOOL1
 log_must zfs set compression=off $TESTPOOL1
 log_must zfs set recordsize=512 $TESTPOOL1
 log_must zfs set copies=3 $TESTPOOL1
 
-log_note "Writing file: $TESTFILE0 until ENOSPC."
-file_write -o create -f $TESTDIR/$TESTFILE0 -b $BLOCKSZ \
+log_note "Writing file: $testfile0 until ENOSPC."
+file_write -o create -f $testfile0 -b $BLOCKSZ \
     -c $NUM_WRITES -d $DATA
 ret=$?
 
 (( $ret != $ENOSPC )) && \
-    log_fail "$TESTFILE0 returned: $ret rather than ENOSPC."
+    log_fail "$testfile0 returned: $ret rather than ENOSPC."
 
 log_pass "ENOSPC returned as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh b/zfs/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh
new file mode 100755
index 0000000..065abc7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh

@@ -0,0 +1,60 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/no_space/enospc.cfg
+
+#
+# DESCRIPTION:
+# After filling a filesystem, verify the contents can be removed
+# without encountering an ENOSPC error.
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL
+	log_must rm -f $all_vdevs
+}
+
+log_onexit cleanup
+
+log_assert "Files can be removed from full file system."
+
+all_vdevs=$(echo $TEST_BASE_DIR/file.{01..12})
+
+log_must truncate -s $MINVDEVSIZE $all_vdevs
+
+log_must zpool create -f $TESTPOOL draid2:8d:2s $all_vdevs
+log_must zfs create $TESTPOOL/$TESTFS
+log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
+log_must zfs set compression=off $TESTPOOL/$TESTFS
+
+log_note "Writing files until ENOSPC."
+log_mustnot_expect "No space left on device" fio --name=test \
+    --fallocate=none --rw=write --bs=1M --size=1G --numjobs=4 \
+    --sync=1 --directory=$TESTDIR/ --group_reporting
+
+log_must rm $TESTDIR/test.*
+log_must test -z "$(ls -A $TESTDIR)"
+
+log_pass "All files removed without error"

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_copies.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_copies.ksh
index 3971820..2a61f60 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_copies.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_copies.ksh

@@ -34,7 +34,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_mtime.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_mtime.ksh
index 4d06cfe..0422bba 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_mtime.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_mtime.ksh

@@ -34,7 +34,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 
@@ -51,6 +51,10 @@
 	o_atime=$(stat -c %X $TESTDIR/clone/file)
 	o_ctime=$(stat -c %Z $TESTDIR/clone/file)
 	o_mtime=$(stat -c %Y $TESTDIR/clone/file)
+elif is_freebsd; then
+	o_atime=$(stat -f "%a" $TESTDIR/clone/file)
+	o_ctime=$(stat -f "%c" $TESTDIR/clone/file)
+	o_mtime=$(stat -f "%m" $TESTDIR/clone/file)
 else
 	o_atime=$(ls -E% all $TESTDIR/clone/file | awk '/atime/ {print $4}')
 	o_ctime=$(ls -E% all $TESTDIR/clone/file | awk '/ctime/ {print $4}')
@@ -66,6 +70,10 @@
 	atime=$(stat -c %X $TESTDIR/clone/file)
 	ctime=$(stat -c %Z $TESTDIR/clone/file)
 	mtime=$(stat -c %Y $TESTDIR/clone/file)
+elif is_freebsd; then
+	atime=$(stat -f "%a" $TESTDIR/clone/file)
+	ctime=$(stat -f "%c" $TESTDIR/clone/file)
+	mtime=$(stat -f "%m" $TESTDIR/clone/file)
 else
 	atime=$(ls -E% all $TESTDIR/clone/file | awk '/atime/ {print $4}')
 	ctime=$(ls -E% all $TESTDIR/clone/file | awk '/ctime/ {print $4}')

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_negative.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_negative.ksh
index 8b0b9b5..617c346 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_negative.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_negative.ksh

@@ -36,7 +36,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_promoted_clone.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_promoted_clone.ksh
index f9e6e83..057c59a 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_promoted_clone.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_promoted_clone.ksh

@@ -36,7 +36,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $TESTPOOL/clone
+	datasetexists $origin && destroy_dataset $TESTPOOL/clone -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_recsize.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_recsize.ksh
index 14caedb..38e7ec1 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_recsize.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_recsize.ksh

@@ -34,7 +34,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 
@@ -50,7 +50,7 @@
 for rs in 512 1024 2048 4096 8192 16384 32768 65536 131072 ; do
 	log_must zfs set recsize=$rs $origin/clone
 	dd if=/$TESTDIR/file of=/$TESTDIR/clone/file bs=1024k count=$MEGS \
-	    conv=notrunc > $TEST_BASE_DIR/null 2>&1 || log_fail "dd failed."
+	    conv=notrunc >/dev/null 2>&1 || log_fail "dd failed."
 	log_must verify_nopwrite $origin $origin@a $origin/clone
 done
 

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh
index bd38883..e0721ca 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_sync.ksh

@@ -34,7 +34,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_varying_compression.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_varying_compression.ksh
index d91d553..190bdbd 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_varying_compression.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_varying_compression.ksh

@@ -12,11 +12,12 @@
 #
 
 #
-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2016, Delphix. All rights reserved.
+# Copyright (c) 2019, Kjeld Schouten-Lebbing. All Rights Reserved.
 #
 
-. $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/include/properties.shlib
+. $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/nopwrite/nopwrite.shlib
 
 #
@@ -40,7 +41,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	log_must zfs create -o mountpoint=$TESTDIR $origin
 }
 
@@ -51,8 +52,8 @@
 dd if=/dev/urandom of=$TESTDIR/file bs=1024k count=$MEGS conv=notrunc \
     >/dev/null 2>&1 || log_fail "initial dd failed."
 
-# Verify nop_write for 4 random compression algorithms
-for i in $(get_rand_compress 4); do
+# Verify nop_write for all compression algorithms except "off"
+for i in "${compress_prop_vals[@]:1}"; do
 	zfs snapshot $origin@a || log_fail "zfs snap failed"
 	log_must zfs clone -o compress=$i $origin@a $origin/clone
 	dd if=/$TESTDIR/file of=/$TESTDIR/clone/file bs=1024k count=$MEGS \

diff --git a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh
index 126a00e..1efe0b7 100755
--- a/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/nopwrite/nopwrite_volume.ksh

@@ -37,7 +37,7 @@
 
 function cleanup
 {
-	datasetexists $origin && log_must zfs destroy -R $origin
+	datasetexists $origin && destroy_dataset $origin -R
 	# No need to recreate the volume as no other tests expect it.
 }
 
@@ -51,6 +51,7 @@
 log_must zfs clone $origin@a $clone
 log_must zfs set compress=on $clone
 log_must zfs set checksum=sha256 $clone
+block_device_wait
 dd if=$vol of=$volclone bs=8192 count=4096 conv=notrunc >/dev/null 2>&1 || \
     log_fail "dd into $clone failed."
 log_must verify_nopwrite $origin $origin@a $clone

diff --git a/zfs/tests/zfs-tests/tests/functional/online_offline/online_offline_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/online_offline/online_offline_002_neg.ksh
index 99b9d6b..19576a8 100755
--- a/zfs/tests/zfs-tests/tests/functional/online_offline/online_offline_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/online_offline/online_offline_002_neg.ksh

@@ -90,10 +90,7 @@
 		log_must zpool online $TESTPOOL ${disks[$i]}
 		check_state $TESTPOOL ${disks[$i]} "online" || \
 		    log_fail "Failed to set ${disks[$i]} online"
-		# Delay for resilver to complete
-		while ! is_pool_resilvered $TESTPOOL; do
-			log_must sleep 1
-		done
+		log_must zpool wait -t resilver $TESTPOOL
 		log_must zpool clear $TESTPOOL
 		while [[ $j -lt ${#disks[*]} ]]; do
 			if [[ $j -eq $i ]]; then
@@ -125,10 +122,7 @@
 		log_must zpool online $TESTPOOL ${disks[$i]}
 		check_state $TESTPOOL ${disks[$i]} "online" || \
 		    log_fail "Failed to set ${disks[$i]} online"
-		# Delay for resilver to complete
-		while ! is_pool_resilvered $TESTPOOL; do
-			log_must sleep 1
-		done
+		log_must zpool wait -t resilver $TESTPOOL
 		log_must zpool clear $TESTPOOL
 	fi
 	((i++))

diff --git a/zfs/tests/zfs-tests/tests/functional/pam/Makefile.am b/zfs/tests/zfs-tests/tests/functional/pam/Makefile.am
new file mode 100644
index 0000000..4d9ae17
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/pam/Makefile.am

@@ -0,0 +1,7 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/pam
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	pam_basic.ksh \
+	pam_nounmount.ksh \
+	utilities.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
new file mode 100755
index 0000000..62131c6
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh

@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/pam/utilities.kshlib
+
+destroy_pool $TESTPOOL
+del_user ${username}
+del_group pamtestgroup
+
+rm -rf "$runstatedir"
+for dir in $TESTDIRS; do
+	rm -rf $dir
+done

diff --git a/zfs/tests/zfs-tests/tests/functional/pam/pam_basic.ksh b/zfs/tests/zfs-tests/tests/functional/pam/pam_basic.ksh
new file mode 100755
index 0000000..96ac594
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/pam/pam_basic.ksh

@@ -0,0 +1,49 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/pam/utilities.kshlib
+
+log_mustnot ismounted "$TESTPOOL/pam/${username}"
+keystatus unavailable
+
+genconfig "homes=$TESTPOOL/pam runstatedir=${runstatedir}"
+echo "testpass" | pamtester pam_zfs_key_test ${username} open_session
+references 1
+log_must ismounted "$TESTPOOL/pam/${username}"
+keystatus available
+
+echo "testpass" | pamtester pam_zfs_key_test ${username} open_session
+references 2
+log_must ismounted "$TESTPOOL/pam/${username}"
+keystatus available
+
+log_must pamtester pam_zfs_key_test ${username} close_session
+references 1
+log_must ismounted "$TESTPOOL/pam/${username}"
+keystatus available
+
+log_must pamtester pam_zfs_key_test ${username} close_session
+references 0
+log_mustnot ismounted "$TESTPOOL/pam/${username}"
+keystatus unavailable
+
+log_pass "done."

diff --git a/zfs/tests/zfs-tests/tests/functional/pam/pam_nounmount.ksh b/zfs/tests/zfs-tests/tests/functional/pam/pam_nounmount.ksh
new file mode 100755
index 0000000..8179f39
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/pam/pam_nounmount.ksh

@@ -0,0 +1,51 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/pam/utilities.kshlib
+
+log_mustnot ismounted "$TESTPOOL/pam/${username}"
+keystatus unavailable
+
+genconfig "homes=$TESTPOOL/pam runstatedir=${runstatedir} nounmount"
+echo "testpass" | pamtester pam_zfs_key_test ${username} open_session
+references 1
+log_must ismounted "$TESTPOOL/pam/${username}"
+keystatus available
+
+echo "testpass" | pamtester pam_zfs_key_test ${username} open_session
+references 2
+keystatus available
+log_must ismounted "$TESTPOOL/pam/${username}"
+
+log_must pamtester pam_zfs_key_test ${username} close_session
+references 1
+keystatus available
+log_must ismounted "$TESTPOOL/pam/${username}"
+
+log_must pamtester pam_zfs_key_test ${username} close_session
+references 0
+keystatus available
+log_must ismounted "$TESTPOOL/pam/${username}"
+log_must zfs unmount "$TESTPOOL/pam/${username}"
+log_must zfs unload-key "$TESTPOOL/pam/${username}"
+
+log_pass "done."

diff --git a/zfs/tests/zfs-tests/tests/functional/pam/setup.ksh b/zfs/tests/zfs-tests/tests/functional/pam/setup.ksh
new file mode 100755
index 0000000..23515a5
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/pam/setup.ksh

@@ -0,0 +1,41 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/pam/utilities.kshlib
+
+if ! which pamtester; then
+        log_unsupported "pam tests require the pamtester utility to be installed"
+fi
+
+DISK=${DISKS%% *}
+create_pool $TESTPOOL "$DISK"
+
+log_must zfs create -o mountpoint="$TESTDIR" "$TESTPOOL/pam"
+log_must add_group pamtestgroup
+log_must add_user pamtestgroup ${username}
+log_must mkdir -p "$runstatedir"
+
+echo "testpass" | zfs create -o encryption=aes-256-gcm -o keyformat=passphrase -o keylocation=prompt "$TESTPOOL/pam/${username}"
+log_must zfs unmount "$TESTPOOL/pam/${username}"
+log_must zfs unload-key "$TESTPOOL/pam/${username}"
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/pam/utilities.kshlib b/zfs/tests/zfs-tests/tests/functional/pam/utilities.kshlib
new file mode 100644
index 0000000..35371d1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/pam/utilities.kshlib

@@ -0,0 +1,40 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+username="pamTestuser"
+runstatedir="${TESTDIR}_run"
+function keystatus {
+    log_must [ "$(zfs list -Ho keystatus "$TESTPOOL/pam/${username}")" == "$1" ]
+}
+
+function genconfig {
+    for i in password auth session; do
+	printf "%s\trequired\tpam_permit.so\n%s\toptional\tpam_zfs_key.so\t%s\n" "$i" "$i" "$1"
+    done > /etc/pam.d/pam_zfs_key_test
+}
+
+function references {
+    log_must [ "$(cat "${runstatedir}/$(id -u ${username})")" == "$1" ]
+}
+

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_big_rewind.ksh b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_big_rewind.ksh
index f915d2a..7e523ef 100755
--- a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_big_rewind.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_big_rewind.ksh

@@ -52,6 +52,7 @@
 log_must zpool export $NESTEDPOOL
 log_must zpool import -d $FILEDISKDIR --rewind-to-checkpoint $NESTEDPOOL
 
-log_must zdb $NESTEDPOOL
+log_must zpool export $NESTEDPOOL
+log_must zdb -e -p $FILEDISKDIR $NESTEDPOOL
 
 log_pass "Rewind to checkpoint on a stressed pool."

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_capacity.ksh b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_capacity.ksh
index c473451..b6d3430 100755
--- a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_capacity.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_capacity.ksh

@@ -46,7 +46,7 @@
 function test_cleanup
 {
 	poolexists $NESTEDPOOL && destroy_pool $NESTEDPOOL
-	log_must set_tunable32 spa_asize_inflation 24
+	set_tunable32 SPA_ASIZE_INFLATION 24
 	cleanup_test_pool
 }
 
@@ -54,7 +54,7 @@
 
 setup_test_pool
 log_onexit test_cleanup
-log_must set_tunable32 spa_asize_inflation 4
+log_must set_tunable32 SPA_ASIZE_INFLATION 4
 
 log_must zfs create $DISKFS
 
@@ -80,13 +80,14 @@
 #
 log_must zpool list $NESTEDPOOL
 
-log_must zdb -kc $NESTEDPOOL
-
 log_must zpool export $NESTEDPOOL
+log_must zdb -e -p $FILEDISKDIR -kc $NESTEDPOOL
+
 log_must zpool import -d $FILEDISKDIR --rewind-to-checkpoint $NESTEDPOOL
 
 log_must [ "$(head -c 100 $NESTEDFS0FILE)" = "$FILE0INTRO" ]
 
-log_must zdb $NESTEDPOOL
+log_must zpool export $NESTEDPOOL
+log_must zdb -e -p $FILEDISKDIR $NESTEDPOOL
 
 log_pass "Do not reuse checkpointed space at low capacity."

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
index f1abad0..f970935 100755
--- a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh

@@ -41,7 +41,7 @@
 function test_cleanup
 {
 	# reset memory limit to 16M
-	set_tunable64 zfs_spa_discard_memory_limit 1000000
+	set_tunable64 SPA_DISCARD_MEMORY_LIMIT 1000000
 	cleanup_nested_pools
 }
 
@@ -67,7 +67,7 @@
 #	map, we should have even more time to
 #	verify this.
 #
-set_tunable64 zfs_spa_discard_memory_limit 128
+set_tunable64 SPA_DISCARD_MEMORY_LIMIT 128
 
 log_must zpool checkpoint $NESTEDPOOL
 
@@ -100,11 +100,12 @@
 log_mustnot zpool reguid $NESTEDPOOL
 
 # reset memory limit to 16M
-set_tunable64 zfs_spa_discard_memory_limit 16777216
+set_tunable64 SPA_DISCARD_MEMORY_LIMIT 16777216
 
 nested_wait_discard_finish
 
-log_must zdb $NESTEDPOOL
+log_must zpool export $NESTEDPOOL
+log_must zdb -e -p $FILEDISKDIR $NESTEDPOOL
 
 log_pass "Can export/import but not rewind/checkpoint/discard or " \
     "change pool's config while discarding."

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh
index 59f6408..a18e634 100755
--- a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh

@@ -48,14 +48,18 @@
 log_must truncate -s $EXPSZ $FILEDISK1
 log_must zpool online -e $NESTEDPOOL $FILEDISK1
 NEWSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $2}')
+DEXPSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $6}')
 nested_change_state_after_checkpoint
 log_mustnot [ "$INITSZ" = "$NEWSZ" ]
+log_must [ "$DEXPSZ" = "-" ]
 
 log_must zpool export $NESTEDPOOL
 log_must zpool import -d $FILEDISKDIR --rewind-to-checkpoint $NESTEDPOOL
 
 nested_verify_pre_checkpoint_state
 FINSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $2}')
-log_must [ "$INITSZ" = "$FINSZ" ]
+DEXPSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $6}')
+log_must [ "$EXPSZ" = "$FINSZ" ]
+log_must [ "$DEXPSZ" != "-" ]
 
 log_pass "LUN expansion rewinded correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_ro_rewind.ksh b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_ro_rewind.ksh
index fd74166..f326bf0 100755
--- a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_ro_rewind.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_ro_rewind.ksh

@@ -44,12 +44,12 @@
 log_must zpool checkpoint $TESTPOOL
 test_change_state_after_checkpoint
 
-log_must zpool export $TESTPOOL
+log_must_busy zpool export $TESTPOOL
 log_must zpool import -o readonly=on --rewind-to-checkpoint $TESTPOOL
 
 test_verify_pre_checkpoint_state "ro-check"
 
-log_must zpool export $TESTPOOL
+log_must_busy zpool export $TESTPOOL
 log_must zpool import $TESTPOOL
 
 test_verify_post_checkpoint_state

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib
index ea6c03e..bb8bab6 100644
--- a/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/pool_checkpoint/pool_checkpoint.kshlib

@@ -154,13 +154,18 @@
 
 function cleanup_nested_pool
 {
-	log_must zpool destroy $NESTEDPOOL
+	if poolexists $NESTEDPOOL; then
+		log_must zpool destroy $NESTEDPOOL
+	fi
+
 	log_must rm -f $FILEDISKS
 }
 
 function cleanup_test_pool
 {
-	log_must zpool destroy $TESTPOOL
+	if poolexists $TESTPOOL; then
+		log_must zpool destroy $TESTPOOL
+	fi
 
 	#
 	# We always clear the labels of all disks

diff --git a/zfs/tests/zfs-tests/tests/functional/pool_names/pool_names_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/pool_names/pool_names_002_neg.ksh
index 0c96e19..0b40f7c 100755
--- a/zfs/tests/zfs-tests/tests/functional/pool_names/pool_names_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/pool_names/pool_names_002_neg.ksh

@@ -106,7 +106,7 @@
 done
 
 log_note "Verify invalid pool names fail"
-set -A POOLNAME "c0t0d0s0" "c0t0d0" "c0t0d19" "c0t50000E0108D279d0" \
+set -A POOLNAME \
     "mirror" "raidz" ",," ",,,,,,,,,,,,,,,,,,,,,,,,," \
     "2222222222222222222" "mirror_pool" "raidz_pool" \
     "mirror-pool" "raidz-pool" "spare" "spare_pool" \

diff --git a/zfs/tests/zfs-tests/tests/functional/privilege/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/privilege/cleanup.ksh
index 45a6a0f..99985c6 100755
--- a/zfs/tests/zfs-tests/tests/functional/privilege/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/privilege/cleanup.ksh

@@ -31,7 +31,7 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_unsupported "Privilege tests require pfexec command"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/privilege/privilege_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/privilege/privilege_001_pos.ksh
index ae86938..af4f705 100755
--- a/zfs/tests/zfs-tests/tests/functional/privilege/privilege_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/privilege/privilege_001_pos.ksh

@@ -57,7 +57,7 @@
 # We can only run this in the global zone
 verify_runnable "global"
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_unsupported "Requires pfexec command"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/privilege/privilege_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/privilege/privilege_002_pos.ksh
index 22cfaf5..ab00e32 100755
--- a/zfs/tests/zfs-tests/tests/functional/privilege/privilege_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/privilege/privilege_002_pos.ksh

@@ -60,7 +60,7 @@
 
 verify_runnable "both"
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_unsupported "Requires pfexec command"
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/privilege/setup.ksh b/zfs/tests/zfs-tests/tests/functional/privilege/setup.ksh
index 94576d8..4eb0693 100755
--- a/zfs/tests/zfs-tests/tests/functional/privilege/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/privilege/setup.ksh

@@ -31,10 +31,6 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-if is_linux; then
-	log_unsupported "Requires pfexec command"
-fi
-
 ZFS_USER=zfsrbac
 USES_NIS=false
 

diff --git a/zfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh b/zfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
index f4df839..080fddd 100755
--- a/zfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh

@@ -61,16 +61,18 @@
 			log_must rm -f $BACKUP
 		fi
 
-		# Our disk is back.  Now we can clear errors and destroy the
-		# pool cleanly.
-		log_must zpool clear $TESTPOOL2
+		if poolexists $TESTPOOL2 ; then
+			# Our disk is back.  Now we can clear errors and destroy the
+			# pool cleanly.
+			log_must zpool clear $TESTPOOL2
 
-		# Now that the disk is back and errors cleared, wait for our
-		# hung 'zpool scrub' to finish.
-		wait
+			# Now that the disk is back and errors cleared, wait for our
+			# hung 'zpool scrub' to finish.
+			wait
 
-		destroy_pool $TESTPOOL2
-		log_must rm $REALDISK
+			destroy_pool $TESTPOOL2
+		fi
+		log_must rm -f $REALDISK
 		unload_scsi_debug
 	fi
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh b/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh
index 88911aa..dfc1f1e 100755
--- a/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh

@@ -42,7 +42,7 @@
 
 function cleanup
 {
-	datasetexists $FS && log_must zfs destroy -r $FS
+	datasetexists $FS && destroy_dataset $FS -r
 }
 
 function count_snap_cmds

diff --git a/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh b/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh
index 473de5c..1af1c2c 100755
--- a/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh

@@ -43,7 +43,7 @@
 {
 	[[ -z $msgs1 ]] || log_must rm $msgs1
 	[[ -z $msgs2 ]] || log_must rm $msgs2
-	datasetexists $FS && log_must zfs destroy -r $FS
+	datasetexists $FS && destroy_dataset $FS -r
 }
 
 typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
@@ -75,7 +75,7 @@
 # Truncate the result of the read that completed second in case it picked up an
 # extra message that was logged after the first read completed.
 #
-log_must truncate -s $(stat -c "%s" $msgs1) $msgs2
+log_must truncate -s $(stat_size $msgs1) $msgs2
 
 log_must diff $msgs1 $msgs2
 

diff --git a/zfs/tests/zfs-tests/tests/functional/procfs/setup.ksh b/zfs/tests/zfs-tests/tests/functional/procfs/setup.ksh
index 3444cfc..79fa28f 100755
--- a/zfs/tests/zfs-tests/tests/functional/procfs/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/procfs/setup.ksh

@@ -26,8 +26,4 @@
 
 . $STF_SUITE/include/libtest.shlib
 
-if ! is_linux ; then
-	log_unsupported "procfs is only used on Linux"
-fi
-
 default_mirror_setup $DISKS

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh
index a975d2a..7ca81c3 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_004_neg.ksh

@@ -43,9 +43,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_005_pos.ksh
index b52f302..0736648 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_005_pos.ksh

@@ -43,9 +43,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_008_pos.ksh
index 365b562..b045b2c 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_008_pos.ksh

@@ -48,9 +48,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_009_pos.ksh
index a867b53..da44e73 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectquota_009_pos.ksh

@@ -49,9 +49,7 @@
 function cleanup
 {
 	for ds in $TESTPOOL/fs $TESTPOOL/fs-rename $TESTPOOL/fs-clone; do
-		if datasetexists $ds; then
-			log_must zfs destroy -rRf $ds
-		fi
+		datasetexists $ds && destroy_dataset $ds -rRf
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_001_pos.ksh
index a84ff9f..b7707ea 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_001_pos.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_002_pos.ksh
index 216855e..10edae7 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_002_pos.ksh

@@ -44,9 +44,7 @@
 
 function cleanup
 {
-	if datasetexists $snapfs; then
-		log_must zfs destroy $snapfs
-	fi
+	datasetexists $snapfs && destroy_dataset $snapfs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_003_pos.ksh
index 629b3b3..8db5d0d 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_003_pos.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $snapfs; then
-		log_must zfs destroy $snapfs
-	fi
+	datasetexists $snapfs && destroy_dataset $snapfs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh
index ec299e0..fc4a93f 100755
--- a/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/projectquota/projectspace_004_pos.ksh

@@ -44,9 +44,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_projectquota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/pyzfs/Makefile.am b/zfs/tests/zfs-tests/tests/functional/pyzfs/Makefile.am
index 0c68c25..26c5ac5 100644
--- a/zfs/tests/zfs-tests/tests/functional/pyzfs/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/pyzfs/Makefile.am

@@ -1,19 +1,7 @@
+include $(top_srcdir)/config/Substfiles.am
+
 pkgpyzfsdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/pyzfs
 pkgpyzfs_SCRIPTS = \
 	pyzfs_unittest.ksh
 
-EXTRA_DIST = \
-	pyzfs_unittest.ksh.in
-
-#
-# The pyzfs module is built either for Python 2 or Python 3.  In order
-# to properly test it the unit tests must be updated to the matching version.
-#
-$(pkgpyzfs_SCRIPTS):%:%.in
-	-$(SED) -e 's,@PYTHON\@,$(PYTHON),g' \
-		$< >'$@'
-	-chmod 775 $@
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(pkgpyzfs_SCRIPTS)
+SUBSTFILES += $(pkgpyzfs_SCRIPTS)

diff --git a/zfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in b/zfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in
index 4ca610e..1f58d81 100755
--- a/zfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in
+++ b/zfs/tests/zfs-tests/tests/functional/pyzfs/pyzfs_unittest.ksh.in

@@ -30,7 +30,7 @@
 # Verify that the required dependencies for testing are installed.
 @PYTHON@ -c "import cffi" 2>/dev/null
 if [ $? -eq 1 ]; then
-	log_unsupported "python-cffi not found by Python"
+	log_unsupported "python3-cffi not found by Python"
 fi
 
 # We don't just try to "import libzfs_core" because we want to skip these tests

diff --git a/zfs/tests/zfs-tests/tests/functional/quota/quota.kshlib b/zfs/tests/zfs-tests/tests/functional/quota/quota.kshlib
index 082a77c..0ffe639 100644
--- a/zfs/tests/zfs-tests/tests/functional/quota/quota.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/quota/quota.kshlib

@@ -33,6 +33,8 @@
 # BLOCK_SIZE, QUOTA_VALUE and TOLERANCE set in quota.cfg
 if is_linux; then
 	readonly EDQUOT=122
+elif is_freebsd; then
+	readonly EDQUOT=69
 else
 	readonly EDQUOT=49
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/quota/quota_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/quota/quota_001_pos.ksh
index 9364a9f..d6783e9 100755
--- a/zfs/tests/zfs-tests/tests/functional/quota/quota_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/quota/quota_001_pos.ksh

@@ -62,7 +62,8 @@
 	# pool, otherwise next test will fail trying to set a
 	# quota which is less than the space used.
 	#
-	sleep 5
+	wait_freeing $TESTPOOL
+	sync_pool $TESTPOOL
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/quota/quota_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/quota/quota_002_pos.ksh
index c54968b..2f34072 100755
--- a/zfs/tests/zfs-tests/tests/functional/quota/quota_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/quota/quota_002_pos.ksh

@@ -61,6 +61,9 @@
 
 	[[ -e $TESTDIR/$TESTFILE2 ]] && \
             log_must rm $TESTDIR/$TESTFILE2
+
+	wait_freeing $TESTPOOL
+	sync_pool $TESTPOOL
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/quota/quota_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/quota/quota_003_pos.ksh
index bec2243..6ab25cf 100755
--- a/zfs/tests/zfs-tests/tests/functional/quota/quota_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/quota/quota_003_pos.ksh

@@ -61,11 +61,12 @@
 	    log_must rm $TESTDIR1/$TESTFILE1
 
 	#
-        # Need to allow time for space to be released back to
-        # pool, otherwise next test will fail trying to set a
-        # quota which is less than the space used.
-        #
-        sleep 5
+	# Need to allow time for space to be released back to
+	# pool, otherwise next test will fail trying to set a
+	# quota which is less than the space used.
+	#
+	wait_freeing $TESTPOOL
+	sync_pool $TESTPOOL
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/quota/quota_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/quota/quota_004_pos.ksh
index cc62d8f..3733544 100755
--- a/zfs/tests/zfs-tests/tests/functional/quota/quota_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/quota/quota_004_pos.ksh

@@ -62,6 +62,9 @@
 
 	[[ -e $TESTDIR1/$TESTFILE2 ]] && \
             log_must rm $TESTDIR1/$TESTFILE2
+
+	wait_freeing $TESTPOOL
+	sync_pool $TESTPOOL
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/quota/quota_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/quota/quota_005_pos.ksh
index ed28cc1..e87139a 100755
--- a/zfs/tests/zfs-tests/tests/functional/quota/quota_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/quota/quota_005_pos.ksh

@@ -48,8 +48,7 @@
 
 function cleanup
 {
-	datasetexists $fs_child && \
-		log_must zfs destroy $fs_child
+	datasetexists $fs_child && destroy_dataset $fs_child
 
 	log_must zfs set quota=$quota_val $fs
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/raidz/Makefile.am b/zfs/tests/zfs-tests/tests/functional/raidz/Makefile.am
index 694de18..d93eb73 100644
--- a/zfs/tests/zfs-tests/tests/functional/raidz/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/raidz/Makefile.am

@@ -3,4 +3,6 @@
 	setup.ksh \
 	cleanup.ksh \
 	raidz_001_neg.ksh \
-	raidz_002_pos.ksh
+	raidz_002_pos.ksh \
+	raidz_003_pos.ksh \
+	raidz_004_pos.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/raidz/raidz_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/raidz/raidz_003_pos.ksh
new file mode 100755
index 0000000..bf22632
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/raidz/raidz_003_pos.ksh

@@ -0,0 +1,41 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by vStack. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	Call the raidz_test tool with -S and -e to test all supported raidz
+#	implementations with expanded map and default reflow offset.
+#	This options will test several raidz block geometries and several zio
+#	parameters that affect raidz block layout. Data reconstruction performs
+#	all combinations of failed disks. Wall time is set to 5min, but actual
+#	runtime might be longer.
+#
+
+log_must raidz_test -S -e -t 60
+
+log_pass "raidz_test parameter sweep test with expanded map succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/raidz/raidz_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/raidz/raidz_004_pos.ksh
new file mode 100755
index 0000000..6cd2bf7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/raidz/raidz_004_pos.ksh

@@ -0,0 +1,41 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by vStack. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#	Call the raidz_test tool with -S and -e to test all supported raidz
+#	implementations with expanded map and zero reflow offset.
+#	This options will test several raidz block geometries and several zio
+#	parameters that affect raidz block layout. Data reconstruction performs
+#	all combinations of failed disks. Wall time is set to 5min, but actual
+#	runtime might be longer.
+#
+
+log_must raidz_test -S -e -r 0 -t 60
+
+log_pass "raidz_test parameter sweep test with expanded map succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/Makefile.am b/zfs/tests/zfs-tests/tests/functional/redacted_send/Makefile.am
new file mode 100644
index 0000000..61d0ea2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/Makefile.am

@@ -0,0 +1,26 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/redacted_send
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	redacted_compressed.ksh \
+	redacted_contents.ksh \
+	redacted_deleted.ksh \
+	redacted_disabled_feature.ksh \
+	redacted_embedded.ksh \
+	redacted_holes.ksh \
+	redacted_incrementals.ksh \
+	redacted_largeblocks.ksh \
+	redacted_many_clones.ksh \
+	redacted_mixed_recsize.ksh \
+	redacted_mounts.ksh \
+	redacted_negative.ksh \
+	redacted_origin.ksh \
+	redacted_panic.ksh \
+	redacted_props.ksh \
+	redacted_resume.ksh \
+	redacted_size.ksh \
+	redacted_volume.ksh
+
+dist_pkgdata_DATA = \
+	redacted.cfg \
+	redacted.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/cleanup.ksh
new file mode 100755
index 0000000..1a7c142
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/cleanup.ksh

@@ -0,0 +1,33 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+destroy_pool $POOL
+destroy_pool $POOL2
+log_must set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 0
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted.cfg b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted.cfg
new file mode 100644
index 0000000..f964b37
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted.cfg

@@ -0,0 +1,86 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+export DISK1=$(echo $DISKS | awk '{print $1}')
+export DISK2=$(echo $DISKS | awk '{print $2}')
+
+export POOL=$TESTPOOL
+export POOL2=$TESTPOOL2
+export FS=$TESTFS
+export FS2=$TESTFS2
+
+#
+# These are the byte ranges that differ between files and their redacted
+# counterparts. See compare_files() for more detail.
+#
+typeset RANGE0="0,2097152"
+typeset RANGE1="0,131072"
+typeset RANGE2="1048576,2097152"
+typeset RANGE3="0,131072
+1966080,131072
+3932160,131072"
+typeset RANGE4="0,131072
+262144,131072
+524288,131072
+786432,131072"
+typeset RANGE5="0,1048576
+7340032,1048576"
+typeset RANGE6="393216,131072
+655360,131072
+917504,131072
+1179648,131072
+1441792,393216
+1966080,393216
+2621440,262144
+3145728,262144
+3670016,262144
+4194304,262144
+4718592,262144
+5242880,262144"
+typeset RANGE7="1048576,6291456"
+typeset RANGE8="4063232,131072"
+typeset RANGE9="0,131072
+262144,131072
+524288,131072
+786432,131072
+1048576,131072
+1310720,131072
+1572864,131072
+1835008,131072
+2097152,131072
+2359296,131072
+2621440,131072
+2883584,131072
+3145728,131072
+3407872,131072
+3670016,131072
+3932160,131072"
+typeset RANGE10="0,393216"
+typeset RANGE11="0,1048576"
+typeset RANGE12="0,2097152"
+typeset RANGE13="0,16384"
+typeset RANGE14=""
+typeset RANGE15="0,4194304"
+typeset RANGE16="0,6291456"
\ No newline at end of file

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted.kshlib b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted.kshlib
new file mode 100644
index 0000000..3010193
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted.kshlib

@@ -0,0 +1,266 @@
+#!/bin/ksh
+
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016, 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/redacted_send/redacted.cfg
+
+function setup_dataset
+{
+	typeset ds_name=$1
+	typeset opts=$2
+	typeset file_create_func=$3
+	typeset sendfs="$POOL/$ds_name"
+	[[ -n $file_create_func ]] || file_create_func=setup_common
+
+	log_must zfs create $opts $sendfs
+
+	$file_create_func $sendfs
+
+	log_must zfs snapshot $sendfs@snap
+	log_must zfs clone $opts $sendfs@snap $POOL/${ds_name}_clone
+	log_must zfs snapshot $POOL/${ds_name}_clone@snap
+}
+
+function setup_common
+{
+	typeset sendfs=$1
+
+	typeset mntpnt=$(get_prop mountpoint $sendfs)
+	typeset bs=$(get_prop recsize $sendfs)
+	log_must dd if=/dev/urandom of=$mntpnt/f1 bs=$bs count=16
+	log_must dd if=/dev/urandom of=$mntpnt/f2 bs=$bs count=32
+}
+
+function setup_embedded
+{
+	typeset sendfs=$1
+
+	typeset recsize
+	typeset mntpnt=$(get_prop mountpoint $sendfs)
+	for recsize in 512 1024 2048 4096 8192 16384; do
+		if is_illumos; then
+			log_must mkholes -d $((recsize - 8)):8 $mntpnt/$recsize
+		else
+			log_must dd if=/dev/urandom of=$mntpnt/$recsize bs=8 \
+			    count=1 seek=$(((recsize / 8) - 1))
+		fi
+	done
+}
+
+function setup_holes
+{
+	typeset sendfs=$1
+
+	typeset mntpnt=$(get_prop mountpoint $sendfs)
+	typeset M=$((1024 * 1024))
+
+	if is_illumos; then
+		log_must mkholes -d 0:$((8 * M)) $mntpnt/f1
+		log_must mkholes -d 0:$M -d $((7 * M)):$M $mntpnt/f2
+		log_must mkholes -d $M:$((6 * M)) -h $((7 * M)):$M $mntpnt/f3
+		log_must mkholes -h 0:$((8 * M)) $mntpnt/f4
+	else
+		log_must dd if=/dev/urandom of=$mntpnt/f1 bs=8M count=1
+
+		log_must dd if=/dev/urandom of=$mntpnt/f2 bs=1M count=1
+		log_must dd if=/dev/urandom of=$mntpnt/f2 bs=1M count=1 seek=7 \
+		    conv=notrunc
+
+		log_must dd if=/dev/urandom of=$mntpnt/f3 bs=1M count=6 seek=1
+		log_must truncate -s $((8 * M)) $mntpnt/f3
+
+		log_must truncate -s $((8 * M)) $mntpnt/f4
+	fi
+
+	log_must zfs create $sendfs/manyrm
+	for i in {1..256}; do
+		log_must stride_dd -i /dev/urandom -o $mntpnt/manyrm/f$i -b 512 \
+		    -c $(random_int_between 1 100) -s $(random_int_between 1 4)
+	done
+
+	log_must zfs snapshot $sendfs/manyrm@snap
+	log_must zfs clone $sendfs/manyrm@snap $sendfs/manyrm_clone
+	log_must zfs snapshot $sendfs/manyrm_clone@snap
+}
+
+function setup_incrementals
+{
+	typeset sendfs=$1
+
+	typeset mntpnt=$(get_prop mountpoint $sendfs)
+	typeset bs=$(get_prop recsize $sendfs)
+	log_must dd if=/dev/urandom of=$mntpnt/f1 bs=$bs count=16
+	log_must dd if=/dev/urandom of=$mntpnt/f2 bs=$bs count=32
+	log_must mkdir $mntpnt/d1
+	log_must eval "cat $mntpnt/f1 $mntpnt/f2 >$mntpnt/d1/f1"
+	log_must zfs snapshot $sendfs@snap0
+
+	log_must zfs clone $sendfs@snap0 $POOL/hole
+	mntpnt=$(get_prop mountpoint $POOL/hole)
+	log_must dd if=/dev/zero of=$mntpnt/f2 bs=$bs count=16 conv=notrunc
+	log_must zfs snapshot $POOL/hole@snap
+
+	log_must zfs clone $sendfs@snap0 $POOL/stride3
+	mntpnt=$(get_prop mountpoint $POOL/stride3)
+	log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $bs -c 11 -s 3
+	log_must zfs snapshot $POOL/stride3@snap
+
+	log_must zfs clone $sendfs@snap0 $POOL/stride5
+	mntpnt=$(get_prop mountpoint $POOL/stride5)
+	log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $bs -c 7 -s 5
+	log_must zfs snapshot $POOL/stride5@snap
+
+	log_must zfs clone $sendfs@snap0 $POOL/int
+	log_must zfs snapshot $POOL/int@snap
+
+	log_must zfs clone $POOL/int@snap $POOL/rm
+	mntpnt=$(get_prop mountpoint $POOL/rm)
+	log_must rm -rf $mntpnt/[df][12]
+	log_must zfs snapshot $POOL/rm@snap
+
+	log_must zfs clone $POOL/int@snap $POOL/write
+	mntpnt=$(get_prop mountpoint $POOL/write)
+	log_must dd if=/dev/urandom of=$mntpnt/f1 bs=512 count=16 conv=notrunc
+	log_must dd if=/dev/urandom of=$mntpnt/d1/f1 bs=512 count=16 seek=16 \
+	    conv=notrunc
+	log_must zfs snapshot $POOL/write@snap
+}
+
+function setup_mounts
+{
+	typeset sendfs=$1
+
+	typeset mntpnt=$(get_prop mountpoint $sendfs)
+	log_must touch $mntpnt/empty
+	log_must dd if=/dev/urandom of=$mntpnt/contents1 bs=512 count=2
+	log_must dd if=/dev/urandom of=$mntpnt/contents2 bs=512 count=2
+	log_must mkdir $mntpnt/dir1
+	log_must touch $mntpnt/dir1/empty
+	log_must dd if=/dev/urandom of=$mntpnt/dir1/contents1 bs=512 count=2
+	log_must dd if=/dev/urandom of=$mntpnt/dir1/contents2 bs=512 count=2
+	log_must mkdir $mntpnt/dir1/dir2
+	log_must touch $mntpnt/dir1/dir2/empty
+	log_must dd if=/dev/urandom of=$mntpnt/dir1/dir2/file bs=512 count=2
+
+	log_must zfs create -s -V 16p $sendfs/vol
+	log_must zfs snapshot $sendfs/vol@snap
+	log_must zfs clone $sendfs/vol@snap $sendfs/vol_clone
+	log_must zfs snapshot $sendfs/vol_clone@snap
+}
+
+function mount_redacted
+{
+	typeset flag=''
+	while getopts "f" opt; do
+		case $opt in
+		f)
+			flag='-f'
+			;;
+		esac
+	done
+	shift $(($OPTIND - 1))
+
+	typeset ds=$1
+	log_must set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 1
+	zfs mount $flag -oro $ds || return 1
+	log_must set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 0
+	return 0
+}
+
+function unmount_redacted
+{
+	typeset ds=$1
+
+	zfs unmount $ds
+}
+
+#
+# This function calls a utility that prints out the ranges where a file
+# and its redacted counterpart differ, each range on a new line like this:
+#
+# 0,131072
+# 1966080,131072
+# 3932160,131072
+#
+# The output is then checked against a variable containing the expected
+# output to verify the redacted ranges are the ones expected.
+#
+function compare_files
+{
+	typeset sendfs=$1
+	typeset recvfs=$2
+	typeset file=$3
+	typeset expected="$4"
+	typeset tmpfile="$tmpdir/get_file.out"
+
+	log_must mount_redacted -f $recvfs
+
+	typeset file1="$(get_prop mountpoint $sendfs)/$file"
+	typeset file2="$(get_prop mountpoint $recvfs)/$file"
+	log_note "Comparing $file1 and $file2"
+	[[ -f $file1 ]] || log_fail "File $file1 does not exist."
+	[[ -f $file2 ]] || log_fail "File $file2 does not exist."
+
+	log_must eval "get_diff $file1 $file2 >$tmpfile"
+	typeset range="$(cat $tmpfile)"
+	log_must unmount_redacted $recvfs
+	[[ "$expected" = "$range" ]] || log_fail "Unexpected range: $range"
+}
+
+function redacted_cleanup
+{
+	typeset ds_list=$@
+	typeset ds
+
+	for ds in $ds_list; do
+		zfs destroy -R $ds
+	done
+
+	set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 0
+	rm -f $(get_prop mountpoint $POOL)/tmp/*
+}
+
+# Retrieve the redaction list of a bookmark or snapshot, using
+# the property or zdb output, as requested.
+function get_guid_list
+{
+	typeset filename=$1
+	typeset dataset=$2
+	typeset use_zdb=${3:-false}
+
+	if $use_zdb; then
+		guid_list=$(zdb -vvvv $dataset | sed -e 's/,//g' \
+		    -ne 's/^.*Snapshots: \[\(.*\)\]/\1/p')
+	else
+		guid_list=$(get_prop redact_snaps $dataset)
+	fi
+
+	for guid in $(echo $guid_list | tr ',' ' '); do
+		echo $guid
+	done | sort >$filename
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_compressed.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_compressed.ksh
new file mode 100755
index 0000000..0a8bf39
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_compressed.ksh

@@ -0,0 +1,71 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify that compressed send streams are redacted correctly.
+#
+# Strategy:
+# 1. Receive a redacted compressed send stream, verifying compression and
+#    redaction.
+# 2. Receive an incremental on the full receive, verifying compression and
+#    redaction.
+#
+
+typeset ds_name="compressed"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name "-o compress=lz4"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+log_must stride_dd -i /dev/urandom -o $clone_mnt/f1 -b $((128 * 1024)) -c 4 -s 2
+log_must zfs snapshot $clone@snap1
+log_must rm $clone_mnt/f2
+log_must zfs snapshot $clone@snap2
+
+log_must zfs redact $sendfs@snap book1 $clone@snap1 $clone@snap2
+log_must eval "zfs send -c --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must stream_has_features $stream compressed lz4 redacted
+compare_files $sendfs $recvfs "f1" "$RANGE4"
+verify_stream_size $stream $sendfs
+log_must mount_redacted -f $recvfs
+verify_stream_size $stream $recvfs
+log_must unmount_redacted $recvfs
+
+log_must eval "zfs send -c -i $sendfs@snap $clone@snap1 >$stream"
+log_must eval "zfs recv $POOL2/inc1 <$stream"
+log_must stream_has_features $stream compressed lz4
+typeset mntpnt=$(get_prop mountpoint $POOL2)
+log_must diff $clone_mnt/f1 $mntpnt/inc1/f1
+log_must diff $send_mnt/f2 $mntpnt/inc1/f2
+
+log_must eval "zfs send -c -i $sendfs@snap $clone@snap2 >$stream"
+log_must eval "zfs recv $POOL2/inc2 <$stream"
+log_must stream_has_features $stream compressed lz4
+log_must diff $clone_mnt/f1 $mntpnt/inc1/f1
+[[ -f $mntpnt/inc2/f2 ]] && log_fail "File f2 should not exist."
+
+log_pass "Compressed send streams are redacted correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_contents.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_contents.ksh
new file mode 100755
index 0000000..fb12862
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_contents.ksh

@@ -0,0 +1,162 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify redaction works as expected for various scenarios.
+#
+# Strategy:
+# 1. An unmodified file does not get redacted at all.
+# 2. Empty redaction list redacts everything.
+# 3. A file removed in the clone redacts the whole file.
+# 4. A file moved in the clone does not redact the file.
+# 5. A copied, then removed file in the clone redacts the whole file.
+# 6. Overwriting a file with identical contents redacts the file.
+# 7. A partially modified block redacts the entire block.
+# 8. Only overlapping areas of modified ranges are redacted.
+# 9. Send from the root dataset of a pool work correctly.
+#
+
+typeset ds_name="contents"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name ''
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+# An unmodified file does not get redacted at all.
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book1 $clone@snap1
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must mount_redacted -f $recvfs
+log_must diff $send_mnt/f1 $recv_mnt/f1
+log_must diff $send_mnt/f2 $recv_mnt/f2
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Removing a file in the clone redacts the entire file.
+log_must rm "$clone_mnt/f1"
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book3 $clone@snap1
+log_must eval "zfs send --redact book3 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE0"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Moving a file in the clone does not redact the file.
+log_must mv "$clone_mnt/f1" "$clone_mnt/f1.moved"
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book4 $clone@snap1
+log_must eval "zfs send --redact book4 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must mount_redacted -f $recvfs
+[[ -f $recv_mnt/f1.moved ]] && log_fail "Found moved file in redacted receive."
+log_must diff $send_mnt/f1 $recv_mnt/f1
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Copying, then removing a file in the clone does redact the file.
+log_must cp "$clone_mnt/f1" "$clone_mnt/f1.copied"
+log_must rm "$clone_mnt/f1"
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book5 $clone@snap1
+log_must eval "zfs send --redact book5 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE0"
+log_must mount_redacted -f $recvfs
+[[ -f $recv_mnt/f1.copied ]] && log_fail "Found moved file in redacted receive."
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Overwriting the contents of a block with identical contents redacts the file.
+log_must cp "$clone_mnt/f1" "$clone_mnt/f1.copied"
+log_must cp "$clone_mnt/f1.copied" "$clone_mnt/f1"
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book6 $clone@snap1
+log_must eval "zfs send --redact book6 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE0"
+log_must mount_redacted -f $recvfs
+[[ -f $recv_mnt/f1.copied ]] && log_fail "Found moved file in redacted receive."
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Modifying some of a block redacts the whole block.
+log_must dd if=/dev/urandom of=$clone_mnt/f1 conv=notrunc seek=2 count=1 bs=32k
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book7 $clone@snap1
+log_must eval "zfs send --redact book7 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE1"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Only overlapping areas of modified ranges are redacted.
+log_must dd if=/dev/urandom of=$clone_mnt/f2 bs=1024k count=3 conv=notrunc
+log_must zfs snapshot $clone@snap1
+log_must zfs clone $sendfs@snap $clone/new
+typeset mntpnt="$(get_prop mountpoint $clone/new)"
+log_must dd if=/dev/urandom of=$mntpnt/f2 bs=1024k seek=1 count=3 \
+    conv=notrunc
+log_must zfs snapshot $clone/new@snap
+log_must zfs redact $sendfs@snap book8 $clone@snap1 $clone/new@snap
+log_must eval "zfs send --redact book8 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f2" "$RANGE2"
+log_must zfs destroy -R $clone/new
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# FizzBuzz version
+log_must zfs clone $sendfs@snap $POOL/stride3
+mntpnt="$(get_prop mountpoint $POOL/stride3)"
+log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $((128 * 1024)) -c 11 -s 3
+log_must zfs snapshot $POOL/stride3@snap
+log_must zfs clone $sendfs@snap $POOL/stride5
+mntpnt="$(get_prop mountpoint $POOL/stride5)"
+log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $((128 * 1024)) -c 7 -s 5
+log_must zfs snapshot $POOL/stride5@snap
+log_must zfs redact $sendfs@snap book8a $POOL/stride3@snap $POOL/stride5@snap
+log_must eval "zfs send --redact book8a $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f2" "$RANGE3"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Send from the root dataset of a pool work correctly.
+log_must dd if=/dev/urandom of=/$POOL/f1 bs=128k count=4
+log_must zfs snapshot $POOL@snap
+log_must zfs clone $POOL@snap $POOL/clone
+log_must dd if=/dev/urandom of=/$POOL/clone/f1 bs=128k count=1 conv=notrunc
+log_must zfs snapshot $POOL/clone@snap
+log_must zfs redact $POOL@snap book9 $POOL/clone@snap
+log_must eval "zfs send --redact book9 $POOL@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $POOL $recvfs "f1" "$RANGE1"
+log_must zfs destroy -R $POOL@snap
+
+log_pass "Redaction works as expected for various scenarios."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_deleted.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_deleted.ksh
new file mode 100755
index 0000000..3e2aeb7
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_deleted.ksh

@@ -0,0 +1,103 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017, 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify redaction works as expected with respect to deleted files
+#
+# Strategy:
+# 1. A file on the delete queue counts as deleted when using it to calculate
+#    redaction.
+# 2. A file that is removed in the tosnap of an incremental, where the fromsnap
+#    is a redaction bookmark that contains references to that file, does not
+#    result in records for that file.
+#
+
+typeset ds_name="deleted"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset clone2="$POOL/${ds_name}_clone2"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name ''
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+#
+# A file on the delete queue counts as deleted when using it to calculate
+# redaction.
+#
+
+#
+# Open file descriptor 5 for appending to $clone_mnt/f1 so that it will go on
+# the delete queue when we rm it.
+#
+exec 5>>$clone_mnt/f1
+log_must dd if=/dev/urandom of=$clone_mnt/f1 bs=512 count=1 conv=notrunc
+log_must rm $clone_mnt/f1
+log_must zfs snapshot $clone@snap1
+# Close file descriptor 5
+exec 5>&-
+log_must zfs redact $sendfs@snap book1 $clone@snap1
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must mount_redacted -f $recvfs
+#
+# We have temporarily disabled redaction blkptrs, so this will not
+# fail as was originally intended.  We should uncomment this line
+# when we re-enable redaction blkptrs.
+#
+#log_mustnot dd if=$recv_mnt/f1 of=/dev/null bs=512 count=1
+log_must diff $send_mnt/f2 $recv_mnt/f2
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+#
+# A file that is removed in the tosnap of an incremental, where the fromsnap
+# is a redaction bookmark that contains references to that file, does not
+# result in records for that file.
+#
+log_must zfs clone  $sendfs@snap $clone2
+typeset clone2_mnt="$(get_prop mountpoint $clone2)"
+log_must rm -rf $clone2_mnt/*
+log_must zfs snapshot $clone2@snap
+log_must zfs redact $sendfs@snap book2 $clone2@snap
+log_must zfs destroy -R $clone2
+log_must eval "zfs send --redact book2 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must rm $send_mnt/f1
+log_must zfs snapshot $sendfs@snap2
+log_must zfs clone  $sendfs@snap2 $clone2
+typeset clone2_mnt="$(get_prop mountpoint $clone2)"
+log_must rm $clone2_mnt/*
+log_must zfs snapshot $clone2@snap
+log_must zfs redact $sendfs@snap2 book3 $clone2@snap
+log_must zfs destroy -R $clone2
+log_must eval "zfs send -i $sendfs#book2 --redact book3 $sendfs@snap2 >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must mount_redacted -f $recvfs
+log_must diff <(ls $send_mnt) <(ls $recv_mnt)
+log_must zfs destroy -R $recvfs
+log_must zfs rollback -R $sendfs@snap
+
+log_pass "Verify Redaction works as expected with respect to deleted files."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_disabled_feature.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_disabled_feature.ksh
new file mode 100755
index 0000000..3cf73f0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_disabled_feature.ksh

@@ -0,0 +1,71 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify the functionality of the redaction_bookmarks and redacted_datasets
+# features.
+#
+# Strategy:
+# 1. Create a pool with all features disabled.
+# 2. Verify redacted send fails.
+# 3. Enable redaction_bookmarks and verify redacted sends works.
+# 4. Verify receipt of a redacted stream fails.
+# 5. Enable recacted_datasets and verify zfs receive works.
+#
+
+typeset ds_name="disabled"
+typeset sendfs="$POOL/$ds_name"
+typeset sendfs1="$POOL2/${ds_name}1"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset clone1="$POOL2/${ds_name}_clone1"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name ''
+
+function cleanup
+{
+	destroy_pool $POOL2
+	create_pool $POOL2 $DISK2
+	log_must zfs snapshot $POOL2@init
+	redacted_cleanup $sendfs $recvfs
+}
+
+log_onexit cleanup
+
+destroy_pool $POOL2
+log_must zpool create -d $POOL2 $DISK2
+
+log_must zfs create $sendfs1
+log_must zfs snapshot $sendfs1@snap
+log_must zfs clone $sendfs1@snap $clone1
+log_must zfs snapshot $clone1@snap
+
+log_mustnot zfs redact $sendfs1@snap book1 $clone1@snap
+log_must zpool set feature@redaction_bookmarks=enabled $POOL2
+log_must zfs redact $sendfs1@snap book1 $clone1@snap
+
+log_must zfs redact $sendfs@snap book1 $clone@snap
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_mustnot eval "zfs recv $recvfs <$stream"
+log_must zpool set feature@redacted_datasets=enabled $POOL2
+log_must eval "zfs recv $recvfs <$stream"
+
+log_pass "The redacted send/recv features work correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_embedded.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_embedded.ksh
new file mode 100755
index 0000000..1c5b503
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_embedded.ksh

@@ -0,0 +1,103 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify embedded blocks and redacted send work correctly together.
+#
+# Strategy:
+# 1. Create recsize sized files with embedded blocks from size 512b to 16k.
+# 2. Receive a redacted send stream with nothing redacted.
+# 3. Verify the received files match the source, contain embedded blocks, and
+#    that the stream has the redacted and embedded data features.
+# 4. Receive a redacted send stream with files 512, 2048 and 8192 redacted.
+# 5. Verify that the redacted files no longer match, but the others still
+#    contain embedded blocks and the stream has the redacted and embedded
+#    data features.
+#
+
+typeset ds_name="embedded"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name '-o compress=lz4' setup_embedded
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+typeset recsize send_obj recv_obj
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+log_must zfs redact $sendfs@snap book1 $clone@snap
+log_must eval "zfs send -e --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must stream_has_features $stream redacted embed_data
+
+log_must mount_redacted -f $recvfs
+for recsize in 512 1024 2048 4096 8192 16384; do
+	send_obj=$(get_objnum $send_mnt/$recsize)
+	recv_obj=$(get_objnum $recv_mnt/$recsize)
+
+	log_must diff $send_mnt/$recsize $recv_mnt/$recsize
+	log_must eval "zdb -ddddd $sendfs $send_obj >$tmpdir/send.zdb"
+	log_must eval "zdb -ddddd $recvfs $recv_obj >$tmpdir/recv.zdb"
+
+	grep -q "EMBEDDED" $tmpdir/send.zdb || \
+	    log_fail "Obj $send_obj not embedded in $sendfs"
+	grep -q "EMBEDDED" $tmpdir/recv.zdb || \
+	    log_fail "Obj $recv_obj not embedded in $recvfs"
+
+	cat $stream | zstream dump -v | log_must grep -q \
+	    "WRITE_EMBEDDED object = $send_obj offset = 0"
+done
+
+log_must zfs destroy -R $recvfs
+for recsize in 512 2048 8192; do
+	log_must dd if=/dev/urandom of=$clone_mnt/$recsize bs=$recsize count=1
+done
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book2 $clone@snap1
+log_must eval "zfs send -e --redact book2 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must stream_has_features $stream redacted embed_data
+
+log_must mount_redacted -f $recvfs
+for recsize in 512 2048 8192; do
+	log_mustnot diff $send_mnt/$recsize $recv_mnt/$recsize
+done
+for recsize in 1024 4096 16384; do
+	send_obj=$(get_objnum $send_mnt/$recsize)
+	recv_obj=$(get_objnum $recv_mnt/$recsize)
+
+	log_must diff $send_mnt/$recsize $recv_mnt/$recsize
+	log_must eval "zdb -ddddd $sendfs $send_obj >$tmpdir/send.zdb"
+	log_must eval "zdb -ddddd $recvfs $recv_obj >$tmpdir/recv.zdb"
+
+	grep -q "EMBEDDED" $tmpdir/send.zdb || \
+	    log_fail "Obj $send_obj not embedded in $sendfs"
+	grep -q "EMBEDDED" $tmpdir/recv.zdb || \
+	    log_fail "Obj $recv_obj not embedded in $recvfs"
+
+	cat $stream | zstream dump -v | log_must grep -q \
+	    "WRITE_EMBEDDED object = $send_obj offset = 0"
+done
+
+log_pass "Embedded blocks and redacted send work correctly together."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_holes.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_holes.ksh
new file mode 100755
index 0000000..d111aa0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_holes.ksh

@@ -0,0 +1,120 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify redacted send streams reliably handle holes.
+#
+# Strategy:
+# 1. Holes written at the beginning and end of a non-sparse file in the
+#    redacted list are correctly redacted.
+# 2. Holes written throughout a non-sparse file in the redacted list are
+#    correctly redacted.
+# 3. Data written into a hole in a sparse file in the redacted list are
+#    correctly redacted.
+# 4. Holes in metadata blocks.
+#
+
+typeset ds_name="holes"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name '' setup_holes
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+typeset M=$((1024 * 1024))
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+# Write holes at the start and end of a non-sparse file.
+if is_illumos; then
+	log_must mkholes -h 0:$M -h $((7 * M)):$M $clone_mnt/f1
+else
+	log_must dd if=/dev/zero of=$clone_mnt/f1 bs=1M count=1 conv=notrunc
+	log_must dd if=/dev/zero of=$clone_mnt/f1 bs=1M count=1 conv=notrunc seek=7
+fi
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book1 $clone@snap1
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE5"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Write two overlapping sets of holes into the same non-sparse file.
+log_must stride_dd -i /dev/zero -o $clone_mnt/f1 -b $((128 * 1024)) -c 8 -s 2 -k 3
+log_must stride_dd -i /dev/zero -o $clone_mnt/f1 -b $((256 * 1024)) -c 8 -s 2 -k 6
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book2 $clone@snap1
+log_must eval "zfs send --redact book2 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE6"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Write data into the middle of a hole.
+if is_illumos; then
+	log_must mkholes -d $((3 * M)):$((2 * M)) $clone_mnt/f2
+else
+	log_must dd if=/dev/urandom of=$clone_mnt/f2 bs=1M count=2 seek=3 \
+	    conv=notrunc
+fi
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book3 $clone@snap1
+log_must eval "zfs send --redact book3 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f2" "$RANGE14"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Remove a file with holes.
+log_must rm $clone_mnt/f3
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendfs@snap book4 $clone@snap1
+log_must eval "zfs send --redact book4 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f3" "$RANGE7"
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+# Create a hole in a L0 metadata block by removing files.
+log_must rm $send_mnt/manyrm_clone/f{32..96}
+log_must zfs snapshot $sendfs/manyrm_clone@snap1
+
+log_must zfs redact $sendfs/manyrm@snap book6 $sendfs/manyrm_clone@snap1
+log_must eval "zfs send --redact book6 $sendfs/manyrm@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_must mount_redacted -f $recvfs
+for i in {1..31} {97..256}; do
+	diff $send_mnt/manyrm/f$i $recv_mnt/f$i || log_fail \
+	    "File f$i did not match in the send and recv datasets."
+done
+for i in {32..96}; do
+	file_size=$(stat_size $send_mnt/manyrm/f$i)
+	redacted_size=$(stat_size $recv_mnt/f$i)
+	[[ $file_size -eq $redacted_size ]] || log_fail \
+	    "File f$i has size $file_size and redacted size $redacted_size"
+done
+log_must zfs rollback -R $clone@snap
+log_must zfs destroy -R $recvfs
+
+log_pass "Redacted send streams reliably handle holes."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_incrementals.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_incrementals.ksh
new file mode 100755
index 0000000..1d2ed3a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_incrementals.ksh

@@ -0,0 +1,152 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify that incrementals (redacted and normal) work with redacted datasets.
+#
+# Strategy:
+# 1. Test normal incrementals from the original snap to a subset of the
+#    redaction list.
+# 2. Test receipt of intermediate clones, and their children.
+# 3. Test receipt with origin snap specified by '-o origin='.
+# 4. Test incrementals from redaction bookmarks.
+#
+
+typeset ds_name="incrementals"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name '' setup_incrementals
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+
+log_onexit redacted_cleanup $sendfs $recvfs $POOL2/rfs
+
+# Setup a redacted send using a redaction list at varying depth.
+log_must zfs redact $sendfs@snap0 book1 $POOL/rm@snap $POOL/stride3@snap \
+     $POOL/stride5@snap
+log_must eval "zfs send --redact book1 $sendfs@snap0 >$stream"
+log_must eval "zfs receive $POOL2/rfs <$stream"
+
+# Verify receipt of normal incrementals to redaction list members.
+log_must eval "zfs send -i $sendfs@snap0 $POOL/stride3@snap >$stream"
+log_must eval "zfs recv $POOL2/rstride3 <$stream"
+log_must diff -r /$POOL/stride3 /$POOL2/rstride3
+log_must eval "zfs send -i $sendfs@snap0 $POOL/stride5@snap >$stream"
+log_must eval "zfs recv $POOL2/rstride5 <$stream"
+log_must diff -r /$POOL/stride5 /$POOL2/rstride5
+
+# But not a normal child that we weren't redacted with respect to.
+log_must eval "zfs send -i $sendfs@snap0 $POOL/hole@snap >$stream"
+log_mustnot eval "zfs recv $POOL2/rhole@snap <$stream"
+
+# Verify we can receive an intermediate clone redacted with respect to a
+# subset of the original redaction list.
+log_must zfs redact $POOL/int@snap book2 $POOL/rm@snap
+log_must eval "zfs send -i $sendfs@snap0 --redact book2 $POOL/int@snap >$stream"
+log_must eval "zfs recv $POOL2/rint <$stream"
+compare_files $POOL/int $POOL2/rint "f1" "$RANGE0"
+compare_files $POOL/int $POOL2/rint "f2" "$RANGE15"
+compare_files $POOL/int $POOL2/rint "d1/f1" "$RANGE16"
+log_must mount_redacted -f $POOL2/rint
+
+# Verify we can receive grandchildren on the child.
+log_must eval "zfs send -i $POOL/int@snap $POOL/rm@snap >$stream"
+log_must eval "zfs receive $POOL2/rrm <$stream"
+log_must diff -r /$POOL/rm /$POOL2/rrm
+
+# But not a grandchild that the received child wasn't redacted with respect to.
+log_must eval "zfs send -i $POOL/int@snap $POOL/write@snap >$stream"
+log_mustnot eval "zfs recv $POOL2/rwrite<$stream"
+
+# Verify we cannot receive an intermediate clone that isn't redacted with
+# respect to a subset of the original redaction list.
+log_must zfs redact $POOL/int@snap book4 $POOL/rm@snap $POOL/write@snap
+log_must eval "zfs send -i $sendfs@snap0 --redact book4 $POOL/int@snap >$stream"
+log_mustnot eval "zfs recv $POOL2/rint <$stream"
+log_must zfs redact $POOL/int@snap book5 $POOL/write@snap
+log_must eval "zfs send -i $sendfs@snap0 --redact book5 $POOL/int@snap >$stream"
+log_mustnot eval "zfs recv $POOL2/rint <$stream"
+log_mustnot zfs redact $POOL/int@snap book6 $POOL/hole@snap
+
+# Verify we can receive a full clone of the grandchild on the child.
+log_must eval "zfs send $POOL/write@snap >$stream"
+log_must eval "zfs recv -o origin=$POOL2/rint@snap $POOL2/rwrite <$stream"
+log_must diff -r /$POOL/write /$POOL2/rwrite
+
+# Along with other origins.
+log_must eval "zfs recv -o origin=$POOL2/rfs@snap0 $POOL2/rwrite1 <$stream"
+log_must diff -r /$POOL/write /$POOL2/rwrite1
+log_must eval "zfs recv -o origin=$POOL2@init $POOL2/rwrite2 <$stream"
+log_must diff -r /$POOL/write /$POOL2/rwrite2
+log_must zfs destroy -R $POOL2/rwrite2
+
+log_must zfs destroy -R $POOL2/rfs
+
+# Write some data for tests of incremental sends from bookmarks
+log_must zfs snapshot $sendfs@snap1
+log_must zfs clone $sendfs@snap1 $POOL/hole1
+typeset mntpnt=$(get_prop mountpoint $POOL/hole1)
+log_must dd if=/dev/zero of=$mntpnt/f2 bs=128k count=16 conv=notrunc
+log_must zfs snapshot $POOL/hole1@snap
+log_must zfs clone $sendfs@snap1 $POOL/write1
+mntpnt=$(get_prop mountpoint $POOL/write1)
+log_must dd if=/dev/urandom of=$mntpnt/f2 bs=128k count=16 conv=notrunc
+log_must zfs snapshot $POOL/write1@snap
+log_must zfs clone $POOL/int@snap $POOL/write2
+mntpnt=$(get_prop mountpoint $POOL/write2)
+log_must dd if=/dev/urandom of=$mntpnt/f2 bs=128k count=16 conv=notrunc
+log_must zfs snapshot $POOL/write2@snap
+
+# Setup a redacted send using a redaction list at varying depth.
+log_must zfs redact $sendfs@snap0 book7 $POOL/rm@snap $POOL/stride3@snap \
+     $POOL/stride5@snap
+log_must eval "zfs send --redact book7 $sendfs@snap0 >$stream"
+log_must eval "zfs receive $POOL2/rfs <$stream"
+
+# Verify we can receive a redacted incremental sending from the bookmark.
+log_must zfs redact $sendfs@snap1 book8 $POOL/write1@snap
+log_must eval "zfs send -i $sendfs#book7 --redact book8 $sendfs@snap1 >$stream"
+log_must eval "zfs receive $POOL2/rfs <$stream"
+# The stride3 and stride5 snaps redact 3 128k blocks at block offsets 0 15 and
+# 30 of f2. The write1 snap only covers the first two of those three blocks.
+compare_files $sendfs $POOL2/rfs "f2" "$RANGE12"
+log_must mount_redacted -f $POOL2/rfs
+log_must diff $send_mnt/f1 /$POOL2/rfs/f1
+log_must diff $send_mnt/d1/f1 /$POOL2/rfs/d1/f1
+unmount_redacted $POOL2/rfs
+
+# Verify we can receive a normal child we weren't redacted with respect to by
+# sending from the bookmark.
+log_must eval "zfs send -i $sendfs#book7 $POOL/hole1@snap >$stream"
+log_must eval "zfs recv $POOL2/rhole1 <$stream"
+log_must diff -r /$POOL/hole1 /$POOL2/rhole1
+
+# Verify we can receive an intermediate clone redacted with respect to a
+# non-subset if we send from the bookmark.
+log_must zfs redact $POOL/int@snap book9 $POOL/write2@snap
+log_must eval "zfs send -i $sendfs#book7 --redact book9 $POOL/int@snap >$stream"
+log_must eval "zfs receive $POOL2/rint <$stream"
+compare_files $sendfs $POOL2/rint "f2" "$RANGE12"
+
+log_pass "Incrementals (redacted and normal) work with redacted datasets."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_largeblocks.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_largeblocks.ksh
new file mode 100755
index 0000000..caccdd3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_largeblocks.ksh

@@ -0,0 +1,63 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify large blocks and redacted send work correctly together.
+#
+# Strategy:
+# 1. Create a dataset and clone with a 1m recordsize, modifying a few k
+#    within the first 1m of a 16m file.
+# 2. Verify that the whole first 1m of the file is redacted.
+# 3. Receive an incremental stream from the original snap to the snap it
+#    was redacted with respect to.
+# 4. Verify that the received dataset matches the clone
+#
+
+typeset ds_name="largeblocks"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name '-o recsize=1m'
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+log_must dd if=/dev/urandom of=$clone_mnt/f1 bs=32k count=3 seek=8 conv=notrunc
+log_must zfs snapshot $clone@snap1
+
+log_must zfs redact $sendfs@snap book1 $clone@snap1
+log_must eval "zfs send -L --redact book1 $sendfs@snap >$stream"
+log_must stream_has_features $stream redacted large_blocks
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f1" "$RANGE11"
+log_must mount_redacted -f $recvfs
+log_must diff $send_mnt/f2 $recv_mnt/f2
+unmount_redacted $recvfs
+
+log_must eval "zfs send -L -i $sendfs@snap $clone@snap1 >$stream"
+log_must stream_has_features $stream large_blocks
+log_must eval "zfs recv $recvfs/new <$stream"
+log_must diff -r $clone_mnt $recv_mnt/new
+
+log_pass "Large blocks and redacted send work correctly together."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh
new file mode 100755
index 0000000..3386643
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_many_clones.ksh

@@ -0,0 +1,68 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify redacted send can deal with a large redaction list.
+#
+# Strategy:
+# 1. Create 64 clones of sendfs each of which modifies two blocks in a file.
+#    The first modification is at an offset unique to each clone, and the
+#    second (the last block in the file) is common to them all.
+# 2. Verify a redacted stream with a reasonable redaction list length can
+#    be correctly processed.
+# 3. Verify that if the list is too long, the send fails gracefully.
+#
+
+typeset ds_name="many_clones"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name ''
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+typeset redaction_list=''
+typeset mntpnt
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+# Fill in both the last block, and a different block in every clone.
+for i in {1..64}; do
+	log_must zfs clone $sendfs@snap ${clone}$i
+	mntpnt=$(get_prop mountpoint ${clone}$i)
+	log_must dd if=/dev/urandom of=$mntpnt/f2 bs=64k count=1 seek=$i \
+	    conv=notrunc
+	log_must dd if=/dev/urandom of=$mntpnt/f2 bs=64k count=1 seek=63 \
+	    conv=notrunc
+	log_must zfs snapshot ${clone}$i@snap
+done
+
+# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in
+# the redacted list is determined in dsl_bookmark_create_redacted_check().
+log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+compare_files $sendfs $recvfs "f2" "$RANGE8"
+
+log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap
+
+log_pass "Redacted send can deal with a large redaction list."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_mixed_recsize.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_mixed_recsize.ksh
new file mode 100755
index 0000000..e1cd09e
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_mixed_recsize.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify redacted send works with datasets of different sizes.
+#
+# Strategy:
+# 1. Create two dataset one with recsize 512, and one 1m and create a 2m file.
+# 2. For each dataset, create clones of both 512 and 1m recsize and modify
+#    the first 16k of the file.
+# 3. Send each original dataset, redacted with respect to each of the clones
+#    into both a dataset inheriting a 512 recsize and a 1m one.
+# 4. Verify that the smallest unit of redaction is that of the origin fs.
+#
+
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+typeset mntpnt
+
+log_onexit redacted_cleanup $POOL/512 $POOL/1m $POOL2/512 $POOL2/1m
+
+# Set up the datasets we'll send and redact from.
+log_must zfs create -o recsize=512 $POOL/512
+mntpnt=$(get_prop mountpoint $POOL/512)
+log_must dd if=/dev/urandom of=$mntpnt/f1 bs=1024k count=2
+log_must zfs snapshot $POOL/512@snap
+log_must zfs clone -o recsize=1m $POOL/512@snap $POOL/1mclone
+mntpnt=$(get_prop mountpoint $POOL/1mclone)
+log_must dd if=/dev/urandom of=$mntpnt/f1 bs=512 count=32 conv=notrunc
+log_must zfs snapshot $POOL/1mclone@snap
+
+log_must zfs create -o recsize=1m $POOL/1m
+mntpnt=$(get_prop mountpoint $POOL/1m)
+log_must dd if=/dev/urandom of=$mntpnt/f1 bs=1024k count=2
+log_must zfs snapshot $POOL/1m@snap
+log_must zfs clone -o recsize=512 $POOL/1m@snap $POOL/512clone
+mntpnt=$(get_prop mountpoint $POOL/512clone)
+log_must dd if=/dev/urandom of=$mntpnt/f1 bs=512 count=32 conv=notrunc
+log_must zfs snapshot $POOL/512clone@snap
+
+# Create datasets that allow received datasets to inherit recordsize.
+log_must zfs create -o recsize=512 $POOL2/512
+log_must zfs create -o recsize=1m $POOL2/1m
+
+# Do the sends and verify the contents.
+log_must zfs redact $POOL/512@snap book1 $POOL/1mclone@snap
+log_must eval "zfs send --redact book1 $POOL/512@snap>$stream"
+log_must eval "zfs recv $POOL2/512/recva <$stream"
+compare_files $POOL/512 $POOL2/512/recva "f1" "$RANGE13"
+log_must eval "zfs recv $POOL2/1m/recvb <$stream"
+compare_files $POOL/512 $POOL2/1m/recvb "f1" "$RANGE13"
+
+log_must zfs redact $POOL/1m@snap book2 $POOL/512clone@snap
+log_must eval "zfs send --redact book2 $POOL/1m@snap >$stream"
+log_must eval "zfs recv $POOL2/512/recvc <$stream"
+compare_files $POOL/1m $POOL2/512/recvc "f1" "$RANGE11"
+log_must eval "zfs recv $POOL2/1m/recvd <$stream"
+compare_files $POOL/1m $POOL2/1m/recvd "f1" "$RANGE11"
+
+log_pass "Redaction works correctly with different recordsizes."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_mounts.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_mounts.ksh
new file mode 100755
index 0000000..0bc4bf4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_mounts.ksh

@@ -0,0 +1,109 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify that received redacted datasets are not mounted by default, but
+# can still be mounted after setting ALLOW_REDACTED_DATASET_MOUNT.
+#
+# Strategy:
+# 1. Verify a received redacted stream isn't mounted by default.
+# 2. Set ALLOW_REDACTED_DATASET_MOUNT and verify it can't be mounted
+#    without the -f flag, but can with -f.
+# 3. Receive a redacted volume.
+# 4. Verify the device file isn't present until the kernel variable is set.
+# 5. Verify the files in the send fs are also present in the recv fs.
+#
+
+typeset ds_name="mounts"
+typeset sendfs="$POOL/$ds_name"
+typeset sendvol="$sendfs/vol"
+typeset recvfs="$POOL2/$ds_name"
+typeset recvvol="$POOL2/vol"
+typeset clone="$POOL/${ds_name}_clone"
+typeset clonevol="${sendvol}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name '' setup_mounts
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+typeset recv_vol_file="/dev/zvol/$recvvol"
+
+log_onexit redacted_cleanup $sendfs $recvfs $recvvol
+
+log_must rm $clone_mnt/empty $clone_mnt/contents1
+log_must dd if=/dev/urandom of=$clone_mnt/contents2 bs=512 count=1 conv=notrunc
+log_must rm $clone_mnt/dir1/contents1
+log_must rm -rf $clone_mnt/dir1/dir2
+log_must dd if=/dev/urandom of=$clone_mnt/dir1/contents2 bs=512 count=1 \
+    conv=notrunc
+log_must dd if=/dev/urandom of=$clone_mnt/dir1/empty bs=512 count=1
+log_must zfs snapshot $clone@snap1
+
+log_must zfs redact $sendfs@snap book1 $clone@snap
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs receive $recvfs <$stream"
+log_mustnot ismounted $recvfs
+log_mustnot mount_redacted $recvfs
+log_mustnot ismounted $recvfs
+log_must mount_redacted -f $recvfs
+log_must ismounted $recvfs
+
+# Verify that the send and recv fs both have the same files under their
+# mountpoints by comparing find output with the name of the mountpoint
+# deleted.
+contents=$(log_must find $recv_mnt)
+contents_orig=$(log_must find $send_mnt)
+log_must diff <(echo ${contents//$recv_mnt/}) \
+    <(echo ${contents_orig//$send_mnt/})
+log_must zfs redact $sendvol@snap book2 $clonevol@snap
+log_must eval "zfs send --redact book2 $sendvol@snap >$stream"
+log_must eval "zfs receive $recvvol <$stream"
+is_disk_device $recv_vol_file && log_fail "Volume device file should not exist."
+log_must set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 1
+log_must zpool export $POOL2
+log_must zpool import $POOL2
+udevadm settle
+
+# The device file isn't guaranteed to show up right away.
+if ! is_disk_device $recv_vol_file; then
+	udevadm settle
+	for t in 10 5 3 2 1; do
+		log_note "Polling $t seconds for device file."
+		udevadm settle
+		sleep $t
+		is_disk_device $recv_vol_file && break
+	done
+fi
+is_disk_device $recv_vol_file || log_fail "Volume device file should exist."
+
+log_must dd if=/dev/urandom of=$send_mnt/dir1/contents1 bs=512 count=2
+log_must rm $send_mnt/dir1/dir2/empty
+log_must zfs snapshot $sendfs@snap2
+log_must eval "zfs send -i $sendfs#book1 $sendfs@snap2 >$stream"
+log_must eval "zfs receive $recvfs <$stream"
+log_must mount_redacted -f $recvfs
+log_must ismounted $recvfs
+contents=$(log_must find $recv_mnt)
+contents_orig=$(log_must find $send_mnt)
+log_must diff <(echo ${contents//$recv_mnt/}) \
+    <(echo ${contents_orig//$send_mnt/})
+
+log_pass "Received redacted streams can be mounted."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_negative.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_negative.ksh
new file mode 100755
index 0000000..e591cca
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_negative.ksh

@@ -0,0 +1,94 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Test that redacted send correctly detects invalid arguments.
+#
+
+typeset sendfs="$POOL2/sendfs"
+typeset recvfs="$POOL2/recvfs"
+typeset clone1="$POOL2/clone1"
+typeset clone2="$POOL2/clone2"
+typeset clone3="$POOL2/clone3"
+typeset clone3="$POOL2/clone4"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+
+log_onexit redacted_cleanup $sendfs $recvfs $clone3
+
+log_must zfs create $sendfs
+log_must zfs snapshot $sendfs@snap1
+log_must zfs snapshot $sendfs@snap2
+log_must zfs snapshot $sendfs@snap3
+log_must zfs clone $sendfs@snap2 $clone1
+log_must zfs snapshot $clone1@snap
+log_must zfs bookmark $clone1@snap $clone1#book
+log_must zfs clone $sendfs@snap2 $clone2
+log_must zfs snapshot $clone2@snap
+
+# Incompatible flags
+log_must zfs redact $sendfs@snap2 book $clone1@snap
+log_mustnot eval "zfs send -R --redact book $sendfs@snap2 >$TEST_BASE_DIR/devnull"
+
+typeset arg
+for arg in "$sendfs" "$clone1#book"; do
+	log_mustnot eval "zfs send --redact book $arg >$TEST_BASE_DIR/devnull"
+done
+
+# Bad redaction list arguments
+log_mustnot zfs redact $sendfs@snap1
+log_mustnot zfs redact $sendfs@snap1 book
+log_mustnot zfs redact $sendfs#book1 book4 $clone1
+log_mustnot zfs redact $sendfs@snap1 book snap2 snap3
+log_mustnot zfs redact $sendfs@snap1 book @snap2 @snap3
+log_mustnot eval "zfs send --redact $sendfs#book $sendfs@snap >$TEST_BASE_DIR/devnull"
+
+# Redaction snapshots not a descendant of tosnap
+log_mustnot zfs redact $sendfs@snap2 book $sendfs@snap2
+log_must zfs redact $sendfs@snap2 book2 $clone1@snap $clone2@snap
+log_must eval "zfs send --redact book2 $sendfs@snap2 >$stream"
+log_must zfs redact $sendfs@snap2 book3 $clone1@snap $clone2@snap
+log_must eval "zfs send -i $sendfs@snap1 --redact book3 $sendfs@snap2 \
+    >$TEST_BASE_DIR/devnull"
+log_mustnot zfs redact $sendfs@snap3 $sendfs@snap3 $clone1@snap
+
+# Full redacted sends of redacted datasets are not allowed.
+log_must eval "zfs recv $recvfs <$stream"
+log_must zfs snapshot $recvfs@snap
+log_must zfs clone $recvfs@snap $clone3
+log_must zfs snapshot $clone3@snap
+log_mustnot zfs redact $recvfs@snap book5 $clone3@snap
+
+# Nor may a redacted dataset appear in the redaction list.
+log_mustnot zfs redact testpool2/recvfs@snap2 book7 testpool2/recvfs@snap
+
+# Non-redaction bookmark cannot be sent and produces invalid argument error
+log_must zfs bookmark "$sendfs@snap1" "$sendfs#book8"
+log_must eval "zfs send --redact book8 -i $sendfs@snap1 $sendfs@snap2 2>&1 | head -n 100 | grep 'not a redaction bookmark'"
+
+# Error messages for common usage errors
+log_mustnot_expect "not contain '#'"    zfs redact $sendfs@snap1 \#book $sendfs@snap2
+log_mustnot_expect "not contain '#'"    zfs redact $sendfs@snap1 $sendfs#book $sendfs@snap2
+log_mustnot_expect "full dataset names" zfs redact $sendfs@snap1 book @snap2
+log_mustnot_expect "full dataset names" zfs redact $sendfs@snap1 book @snap2
+log_mustnot_expect "full dataset names" zfs redact $sendfs@snap1 \#book @snap2
+log_mustnot_expect "descendent of snapshot" zfs redact $sendfs@snap2 book $sendfs@snap1
+
+log_pass "Verify that redacted send correctly detects invalid arguments."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_origin.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_origin.ksh
new file mode 100755
index 0000000..74e5914
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_origin.ksh

@@ -0,0 +1,87 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Test that receiving sends from redaction bookmarks and redacted datasets
+# works correctly in certain edge cases.
+# 1. Send A(B,C,D) to pool2.
+# 2. Verify send from A(B, C, D) can be received onto it.
+# 3. Verify send from A(B, C) can be received onto it.
+# 4. Verify send from A() can be received onto it.
+# 5. Verify send from A(E) cannot be received onto it.
+# 6. Verify send from redaction bookmark for A(B, C) can be received onto it.
+# 7. Verify send from redaction bookmark for A() can be received onto it.
+# 8. Verify send from redaction bookmark for A(E) cannot be received onto it.
+#
+
+typeset ds_name="origin"
+typeset sendfs="$POOL/$ds_name"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name '' setup_incrementals
+typeset dsA=$sendfs@snap0
+typeset dsB=$POOL/hole@snap
+typeset dsC=$POOL/rm@snap
+typeset dsD=$POOL/write@snap
+typeset dsE=$POOL/stride3@snap
+typeset dsF=$POOL/stride5@snap
+typeset targ=$POOL2/targfs@snap
+
+log_onexit redacted_cleanup $sendfs $POOL2/rBCD $POOL2/targfs \
+    $POOL2/rBC $POOL2/rE
+
+# Set up all the filesystems and clones.
+log_must zfs redact $dsA BCD $dsB $dsC $dsD
+log_must eval "zfs send --redact BCD $dsA >$stream"
+log_must eval "zfs receive $POOL2/rBCD <$stream"
+log_must eval "zfs receive $targ <$stream"
+
+log_must zfs redact $dsA BC $dsB $dsC
+log_must eval "zfs send --redact BC $dsA >$stream"
+log_must eval "zfs receive $POOL2/rBC <$stream"
+
+log_must zfs redact $dsA E $dsE
+log_must eval "zfs send --redact E $dsA >$stream"
+log_must eval "zfs receive $POOL2/rE <$stream"
+
+log_must eval "zfs send $dsF >$stream"
+log_must eval "zfs receive -o origin=$POOL2/rBCD@snap0 $POOL2/BCDrF <$stream"
+log_must eval "zfs receive -o origin=$POOL2/rBC@snap0 $POOL2/BCrF <$stream"
+log_must eval "zfs receive -o origin=$POOL2/rE@snap0 $POOL2/ErF <$stream"
+
+# Run tests from redacted datasets.
+log_must eval "zfs send -i $POOL2/rBCD@snap0 $POOL2/BCDrF@snap >$stream"
+log_must eval "zfs receive -o origin=$targ $POOL2/tdBCD <$stream"
+
+log_must eval "zfs send -i $POOL2/rBC@snap0 $POOL2/BCrF@snap >$stream"
+log_must eval "zfs receive -o origin=$targ $POOL2/tdBC <$stream"
+
+log_must eval "zfs send -i $POOL2/rE@snap0 $POOL2/ErF@snap >$stream"
+log_mustnot eval "zfs receive -o origin=$targ $POOL2/tdE <$stream"
+
+# Run tests from redaction bookmarks.
+log_must eval "zfs send -i $sendfs#BC $dsF >$stream"
+log_must eval "zfs receive -o origin=$targ $POOL2/tbBC <$stream"
+
+log_must eval "zfs send -i $sendfs#E $dsF >$stream"
+log_mustnot eval "zfs receive -o origin=$targ $POOL2/tbE <$stream"
+
+log_pass "Verify sends from redacted datasets and bookmarks work correctly."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh
new file mode 100755
index 0000000..032d1fb
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh

@@ -0,0 +1,50 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify edge case when midbufid is equal to minbufid for the bug fixed by
+# https://github.com/openzfs/zfs/pull/11297 (Fix kernel panic induced by
+# redacted send)
+#
+
+typeset ds_name="panic"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset stream=$(mktemp $TEST_BASE_DIR/stream.XXXX)
+
+function cleanup
+{
+	redacted_cleanup $sendfs $recvfs
+	rm -f $stream
+}
+
+log_onexit cleanup
+
+log_must zfs create -o recsize=8k $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/file bs=1024k count=2048
+log_must zfs snapshot $sendfs@init
+log_must zfs clone $sendfs@init $clone
+log_must stride_dd -i /dev/urandom -o /$clone/file -b 8192 -s 2 -c 7226
+log_must zfs snapshot $clone@init
+log_must zfs redact $sendfs@init book_init $clone@init
+log_must eval "zfs send --redact $sendfs#book_init $sendfs@init >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_props.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_props.ksh
new file mode 100755
index 0000000..e4163c4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_props.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify the list of redacted snapshot guids as properties.
+#
+# Strategy:
+# 1. Create a redacted dataset and receive it into another pool.
+# 2. Verify that the redaction list in the book mark (according to zdb)
+#    matches the list shown in the redact_snaps property.
+# 3. Verify that the received snapshot has a matching redaction list.
+#
+
+typeset ds_name="props"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name ''
+typeset mntpnt
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+# Verify a plain dataset, snapshot or bookmark has an empty list.
+log_must zfs snapshot $sendfs@empty_snapshot
+log_must zfs bookmark $sendfs@empty_snapshot $sendfs#empty_bookmark
+found_list=$(get_prop redact_snaps $sendfs)
+[[ $found_list = "-" ]] || log_fail "Unexpected dataset list: $found_list"
+found_list=$(get_prop redact_snaps $sendfs@empty_snapshot)
+[[ $found_list = "-" ]] || log_fail "Unexpected snapshot list: $found_list"
+found_list=$(get_prop redact_snaps $sendfs#empty_bookmark)
+[[ $found_list = "-" ]] || log_fail "Unexpected bookmark list: $found_list"
+
+# Fill in a different block in every clone.
+for i in {1..16}; do
+	log_must zfs clone $sendfs@snap ${clone}$i
+	mntpnt=$(get_prop mountpoint ${clone}$i)
+	log_must dd if=/dev/urandom of=$mntpnt/f2 bs=64k count=1 seek=$i \
+	    conv=notrunc
+	log_must zfs snapshot ${clone}$i@snap
+done
+
+log_must zfs redact $sendfs@snap book1 $clone{1..16}@snap
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+
+get_guid_list $tmpdir/prop_list $sendfs#book1
+get_guid_list $tmpdir/zdb_list $sendfs#book1 true
+get_guid_list $tmpdir/recvd_prop_list $recvfs@snap
+
+count=$(wc -l $tmpdir/prop_list | awk '{print $1}')
+[[ $count -eq 16 ]] || log_fail "Found incorrect number of redaction snapshots."
+
+diff $tmpdir/prop_list $tmpdir/zdb_list || \
+    log_fail "Property list differed from zdb output"
+diff $tmpdir/prop_list $tmpdir/recvd_prop_list || \
+    log_fail "Received property list differed from sent"
+
+log_pass "The redaction list is consistent between sent and received datasets."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_resume.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_resume.ksh
new file mode 100755
index 0000000..4ab04a0
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_resume.ksh

@@ -0,0 +1,88 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify that resumable send works correctly with redacted streams.
+#
+# Strategy:
+# 1. Do a full redacted resumable send.
+# 2. Verify the received contents are correct.
+# 3. Do an incremental redacted resumable send.
+# 4. Verify the received contents are correct.
+# 5. Verify that recv -A removes a partially received dataset.
+#
+
+typeset ds_name="resume"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset clone1="$POOL/${ds_name}_clone1"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+setup_dataset $ds_name ''
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+typeset send_mnt="$(get_prop mountpoint $sendfs)"
+typeset recv_mnt="/$POOL2/$ds_name"
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+log_must stride_dd -i /dev/urandom -o $clone_mnt/f2 -b 512 -c 64 -s 512
+log_must zfs snapshot $clone@snap1
+
+# Do the full resumable send
+log_must zfs redact $sendfs@snap book1 $clone@snap1
+resume_test "zfs send --redact book1 $sendfs@snap" $tmpdir $recvfs
+log_must mount_redacted -f $recvfs
+log_must set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 1
+log_must diff $send_mnt/f1 $recv_mnt/f1
+log_must eval "get_diff $send_mnt/f2 $recv_mnt/f2 >$tmpdir/get_diff.out"
+typeset range=$(cat $tmpdir/get_diff.out)
+[[ "$RANGE9" = "$range" ]] || log_fail "Unexpected range: $range"
+
+log_must dd if=/dev/urandom of=$send_mnt/f3 bs=1024k count=3
+log_must zfs snapshot $sendfs@snap2
+log_must zfs clone $sendfs@snap2 $clone1
+typeset clone1_mnt="$(get_prop mountpoint $clone1)"
+log_must dd if=/dev/urandom of=$clone1_mnt/f3 bs=128k count=3 conv=notrunc
+log_must zfs snapshot $clone1@snap
+
+# Do the incremental resumable send
+log_must zfs redact $sendfs@snap2 book2 $clone1@snap
+resume_test "zfs send --redact book2 -i $sendfs#book1 $sendfs@snap2" \
+    $tmpdir $recvfs
+log_must diff $send_mnt/f1 $recv_mnt/f1
+log_must diff $send_mnt/f2 $recv_mnt/f2
+log_must eval "get_diff $send_mnt/f3 $recv_mnt/f3 >$tmpdir/get_diff.out"
+range=$(cat $tmpdir/get_diff.out)
+[[ "$RANGE10" = "$range" ]] || log_fail "Unexpected range: $range"
+
+# Test recv -A works properly and verify saved sends are not allowed
+log_mustnot zfs recv -A $recvfs
+log_must zfs destroy -R $recvfs
+log_mustnot zfs recv -A $recvfs
+log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
+dd if=$stream bs=64k count=1 | log_mustnot zfs receive -s $recvfs
+[[ "-" = $(get_prop receive_resume_token $recvfs) ]] && \
+    log_fail "Receive token not found."
+log_mustnot eval "zfs send --saved --redact book1 $recvfs >$TEST_BASE_DIR/devnull"
+log_must zfs recv -A $recvfs
+log_must datasetnonexists $recvfs
+
+log_pass "Resumable send works correctly with redacted streams."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_size.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_size.ksh
new file mode 100755
index 0000000..7456084
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_size.ksh

@@ -0,0 +1,64 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify that send size estimates of redacted sends work correctly
+#
+# Strategy:
+# 1. Perform a redacted send with -nv and without, and verify the
+#    size estimate is the same as the size of the actual send.
+# 2. Receive an incremental send from the redaction bookmark with
+#    -nv and without, and verify the size estimate is the same as
+#    the size of the actual send.
+#
+
+ds_name="sizes"
+typeset sendfs="$POOL/$ds_name"
+typeset clone="$POOL/${ds_name}_clone2"
+setup_dataset $ds_name "-o compress=lz4"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset size=$(mktemp $tmpdir/size.XXXX)
+typeset size2=$(mktemp $tmpdir/size.XXXX)
+
+log_onexit redacted_cleanup $sendfs $clone
+log_must zfs clone $sendfs@snap $clone
+typeset clone_mnt="$(get_prop mountpoint $clone)"
+log_must rm -rf $clone_mnt/*
+log_must zfs snapshot $clone@snap
+log_must zfs redact $sendfs@snap book $clone@snap
+log_must eval "zfs send -nvP --redact book $sendfs@snap | \
+    grep '^size' | awk '{print \$2}' >$size"
+log_must eval "zfs send --redact book $sendfs@snap | wc -c \
+    >$size2"
+bytes1=$(cat $size | tr -d '[[:space:]]')
+bytes2=$(cat $size2 | tr -d '[[:space:]]')
+[[ "$bytes1" -eq "$bytes2" ]] || \
+    log_fail "Full sizes differ: estimate $bytes1 and actual $bytes2"
+
+log_must zfs snapshot $sendfs@snap2
+log_must eval "zfs send -nvP -i $sendfs#book $sendfs@snap2 | \
+    grep '^size' | awk '{print \$2}' >$size"
+log_must eval "zfs send -i $sendfs#book $sendfs@snap2 | wc -c >$size2"
+bytes1=$(cat $size | tr -d '[[:space:]]')
+bytes2=$(cat $size2 | tr -d '[[:space:]]')
+[[ "$bytes1" -eq "$bytes2" ]] || \
+    log_fail "Incremental sizes differ: estimate $bytes1 and actual $bytes2"
+
+log_pass "Size estimates of redacted sends estimate accurately."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_volume.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_volume.ksh
new file mode 100755
index 0000000..2ea1063
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/redacted_volume.ksh

@@ -0,0 +1,105 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify that redacted send works on volumes.
+#
+# Strategy:
+# 1. Write to a volume, then make a clone of that volume.
+# 2. Receive a redacted stream that sends all blocks.
+# 3. Receive a redacted stream that redacts the first half of the written area.
+#
+
+typeset ds_name="volume"
+typeset sendvol="$POOL/$ds_name"
+typeset recvvol="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+typeset send_file="/dev/zvol/$sendvol"
+typeset recv_file="/dev/zvol/$recvvol"
+typeset clone_file="/dev/zvol/$clone"
+
+log_onexit redacted_cleanup $sendvol $recvvol
+
+log_must zfs create -b 8k -V 1g $sendvol
+sleep 10
+log_must zpool export $POOL
+log_must zpool import $POOL
+udevadm settle
+if ! is_disk_device $send_file; then
+	udevadm settle
+	for t in 10 5 3 2 1; do
+		log_note "Polling $t seconds for device file."
+		udevadm settle
+		sleep $t
+		is_disk_device $send_file && break
+	done
+fi
+log_must dd if=/dev/urandom of=$send_file bs=8k count=64
+log_must zfs snapshot $sendvol@snap
+log_must zfs clone $sendvol@snap $clone
+log_must zfs snapshot $clone@snap
+
+log_must set_tunable32 ALLOW_REDACTED_DATASET_MOUNT 1
+log_must zfs redact $sendvol@snap book1 $clone@snap
+log_must eval "zfs send --redact book1 $sendvol@snap >$stream"
+log_must eval "zfs recv $recvvol <$stream"
+sleep 10
+log_must zpool export $POOL2
+log_must zpool import $POOL2
+udevadm settle
+if ! is_disk_device $recv_file; then
+	udevadm settle
+	for t in 10 5 3 2 1; do
+		log_note "Polling $t seconds for device file."
+		udevadm settle
+		sleep $t
+		is_disk_device $recv_file && break
+	done
+fi
+log_must dd if=$send_file of=$tmpdir/send.dd bs=8k count=64
+log_must dd if=$recv_file of=$tmpdir/recv.dd bs=8k count=64
+log_must diff $tmpdir/send.dd $tmpdir/recv.dd
+log_must zfs destroy -R $recvvol
+
+log_must dd if=/dev/urandom of=$clone_file bs=8k count=32
+log_must zfs snapshot $clone@snap1
+log_must zfs redact $sendvol@snap book2 $clone@snap1
+log_must eval "zfs send --redact book2 $sendvol@snap >$stream"
+log_must eval "zfs recv $recvvol <$stream"
+sleep 10
+log_must zpool export $POOL2
+log_must zpool import $POOL2
+udevadm settle
+if ! is_disk_device $recv_file; then
+	udevadm settle
+	for t in 10 5 3 2 1; do
+		log_note "Polling $t seconds for device file."
+		udevadm settle
+		sleep $t
+		is_disk_device $recv_file && break
+	done
+fi
+log_must dd if=$send_file of=$tmpdir/send.dd bs=8k count=32 skip=32
+log_must dd if=$recv_file of=$tmpdir/recv.dd bs=8k count=32 skip=32
+log_must diff $tmpdir/send.dd $tmpdir/recv.dd
+
+log_pass "Redacted send works correctly with volumes."

diff --git a/zfs/tests/zfs-tests/tests/functional/redacted_send/setup.ksh b/zfs/tests/zfs-tests/tests/functional/redacted_send/setup.ksh
new file mode 100755
index 0000000..3f537f8
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redacted_send/setup.ksh

@@ -0,0 +1,36 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+verify_disk_count "$DISKS" 2
+
+create_pool $POOL $DISK1
+log_must zfs snapshot $POOL@init
+create_pool $POOL2 $DISK2
+log_must zfs snapshot $POOL2@init
+log_must zfs create $POOL/tmp
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am b/zfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
index 6f6cc40..7c1930b 100644
--- a/zfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am

@@ -2,10 +2,21 @@
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
-	redundancy_001_pos.ksh \
-	redundancy_002_pos.ksh \
-	redundancy_003_pos.ksh \
-	redundancy_004_neg.ksh
+	redundancy_draid.ksh \
+	redundancy_draid1.ksh \
+	redundancy_draid2.ksh \
+	redundancy_draid3.ksh \
+	redundancy_draid_damaged1.ksh \
+	redundancy_draid_damaged2.ksh \
+	redundancy_draid_spare1.ksh \
+	redundancy_draid_spare2.ksh \
+	redundancy_draid_spare3.ksh \
+	redundancy_mirror.ksh \
+	redundancy_raidz.ksh \
+	redundancy_raidz1.ksh \
+	redundancy_raidz2.ksh \
+	redundancy_raidz3.ksh \
+	redundancy_stripe.ksh
 
 dist_pkgdata_DATA = \
 	redundancy.cfg \

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
index ab36d00..c9c9747 100644
--- a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib

@@ -67,6 +67,23 @@
 }
 
 #
+# Get the number of checksum errors for the pool.
+#
+# $1 Pool
+#
+function cksum_pool
+{
+	typeset -i cksum=$(zpool status $1 | awk '
+	    !NF { isvdev = 0 }
+	    isvdev { errors += $NF }
+	    /CKSUM$/ { isvdev = 1 }
+	    END { print errors }
+	')
+
+	echo $cksum
+}
+
+#
 # Record the directories construction and checksum all the files which reside
 # within the specified pool
 #
@@ -81,6 +98,7 @@
 	[[ -z $pool ]] && log_fail "No specified pool."
 	[[ -f $recordfile ]] && log_must rm -f $recordfile
 
+	sync_pool $pool
 	typeset mntpnt
 	mntpnt=$(get_prop mountpoint $pool)
 	log_must eval "du -a $mntpnt > $recordfile 2>&1"
@@ -119,22 +137,44 @@
 		destroy_pool $pool
 	fi
 
-	log_must mkfile $MINVDEVSIZE $vdevs
+	log_must truncate -s $MINVDEVSIZE $vdevs
 
-	log_must zpool create -m $TESTDIR $pool $keyword $vdevs
+	log_must zpool create -f -m $TESTDIR $pool $keyword $vdevs
 
 	log_note "Filling up the filesystem ..."
 	typeset -i ret=0
 	typeset -i i=0
 	typeset file=$TESTDIR/file
+	typeset -i limit
+	(( limit = $(get_prop available $pool) / 2 ))
+
 	while true ; do
-		file_write -o create -f $file.$i \
-			-b $BLOCKSZ -c $NUM_WRITES
+		[[ $(get_prop available $pool) -lt $limit ]] && break
+		file_write -o create -f $file.$i -b $BLOCKSZ -c $NUM_WRITES
 		ret=$?
 		(( $ret != 0 )) && break
 		(( i = i + 1 ))
 	done
-	(($ret != 28 )) && log_note "file_write return value($ret) is unexpected."
+
+	record_data $TESTPOOL $PRE_RECORD_FILE
+}
+
+function refill_test_env
+{
+	log_note "Re-filling the filesystem ..."
+	typeset pool=$1
+	typeset -i ret=0
+	typeset -i i=0
+	typeset mntpnt
+	mntpnt=$(get_prop mountpoint $pool)
+	typeset file=$mntpnt/file
+	while [[ -e $file.$i ]]; do
+		log_must rm -f $file.$i
+		file_write -o create -f $file.$i -b $BLOCKSZ -c $NUM_WRITES
+		ret=$?
+		(( $ret != 0 )) && break
+		(( i = i + 1 ))
+	done
 
 	record_data $TESTPOOL $PRE_RECORD_FILE
 }
@@ -178,8 +218,13 @@
 {
 	typeset pool=$1
 
+	log_must zpool scrub -w $pool
+
 	record_data $pool $PST_RECORD_FILE
-	if ! diff $PRE_RECORD_FILE $PST_RECORD_FILE > /dev/null 2>&1; then
+	if ! cmp $PRE_RECORD_FILE $PST_RECORD_FILE > /dev/null; then
+		log_must cat $PRE_RECORD_FILE
+		log_must cat $PST_RECORD_FILE
+		diff -u $PRE_RECORD_FILE $PST_RECORD_FILE
 		return 1
 	fi
 
@@ -198,8 +243,8 @@
 	typeset -i cnt=$2
 
 	typeset all_devs=$(zpool iostat -v $pool | awk '{print $1}'| \
-		egrep -v "^pool$|^capacity$|^mirror$|^raidz1$|^raidz2$|---" | \
-		egrep -v "/old$|^$pool$")
+		grep -vEe "^pool$|^capacity$|^mirror\-[0-9]$|^raidz[1-3]\-[0-9]$|^draid[1-3].*\-[0-9]$|---" | \
+		grep -vEe "/old$|^$pool$")
 	typeset -i i=0
 	typeset vdevs
 	while ((i < cnt)); do
@@ -226,17 +271,10 @@
 
 	typeset vdev
 	for vdev in $@; do
-		log_must gnudd if=/dev/zero of=$vdev \
-		    bs=1024k count=$(($MINDEVSIZE / (1024 * 1024))) \
-		    oflag=fdatasync
-		log_must zpool replace -f $pool $vdev $vdev
-		while true; do
-			if ! is_pool_resilvered $pool ; then
-				log_must sleep 2
-			else
-				break
-			fi
-		done
+		log_must dd if=/dev/zero of=$vdev \
+		    bs=1024k count=$((MINVDEVSIZE / (1024 * 1024))) \
+		    conv=fdatasync
+		log_must zpool replace -wf $pool $vdev $vdev
 	done
 }
 
@@ -254,19 +292,19 @@
 	typeset -i cnt=$2
 	typeset label="$3"
 	typeset vdevs
-	typeset -i bs_count=$((64 * 1024))
+	typeset -i bs_count=$(((MINVDEVSIZE / 1024) - 4096))
 
 	vdevs=$(get_vdevs $pool $cnt)
 	typeset dev
 	if [[ -n $label ]]; then
 		for dev in $vdevs; do
-			dd if=/dev/zero of=$dev seek=512 bs=1024 \
+			log_must dd if=/dev/zero of=$dev seek=512 bs=1024 \
 			    count=$bs_count conv=notrunc >/dev/null 2>&1
 		done
 	else
 		for dev in $vdevs; do
-			dd if=/dev/zero of=$dev bs=1024 count=$bs_count \
-			    conv=notrunc >/dev/null 2>&1
+			log_must dd if=/dev/zero of=$dev bs=1024 \
+			    count=$bs_count conv=notrunc >/dev/null 2>&1
 		done
 	fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh
deleted file mode 100755
index b5557f1..0000000
--- a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_001_pos.ksh
+++ /dev/null

@@ -1,76 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
-
-#
-# DESCRIPTION:
-#	A raidz pool can withstand at most 1 device failing or missing.
-#
-# STRATEGY:
-#	1. Create N(>2,<5) virtual disk files.
-#	2. Create raidz pool based on the virtual disk files.
-#	3. Fill the filesystem with directories and files.
-#	4. Record all the files and directories checksum information.
-#	5. Damaged one of the virtual disk file.
-#	6. Verify the data is correct to prove raidz can withstand 1 device is
-#	   failing.
-#
-
-verify_runnable "global"
-
-log_assert "Verify raidz pool can withstand one device is failing."
-log_onexit cleanup
-
-typeset -i cnt=$(random 2 5)
-setup_test_env $TESTPOOL raidz $cnt
-
-#
-# Inject data corruption error for raidz pool
-#
-damage_devs $TESTPOOL 1 "label"
-log_must is_data_valid $TESTPOOL
-log_must clear_errors $TESTPOOL
-
-#
-# Inject bad device error for raidz pool
-#
-damage_devs $TESTPOOL 1
-log_must is_data_valid $TESTPOOL
-log_must recover_bad_missing_devs $TESTPOOL 1
-
-#
-# Inject missing device error for raidz pool
-#
-remove_devs $TESTPOOL 1
-log_must is_data_valid $TESTPOOL
-
-log_pass "Raidz pool can withstand one devices is failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_002_pos.ksh
deleted file mode 100755
index b16687d..0000000
--- a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_002_pos.ksh
+++ /dev/null

@@ -1,83 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
-
-#
-# DESCRIPTION:
-#	A raidz2 pool can withstand 2 devices are failing or missing.
-#
-# STRATEGY:
-#	1. Create N(>3,<5) virtual disk files.
-#	2. Create raidz2 pool based on the virtual disk files.
-#	3. Fill the filesystem with directories and files.
-#	4. Record all the files and directories checksum information.
-#	5. Damaged at most two of the virtual disk files.
-#	6. Verify the data is correct to prove raidz2 can withstand 2 devices
-#	   are failing.
-#
-
-verify_runnable "global"
-
-log_assert "Verify raidz2 pool can withstand two devices are failing."
-log_onexit cleanup
-
-typeset -i cnt=$(random 3 5)
-setup_test_env $TESTPOOL raidz2 $cnt
-
-#
-# Inject data corruption errors for raidz2 pool
-#
-for i in 1 2; do
-	damage_devs $TESTPOOL $i "label"
-	log_must is_data_valid $TESTPOOL
-	log_must clear_errors $TESTPOOL
-done
-
-#
-# Inject bad devices errors for raidz2 pool
-#
-for i in 1 2; do
-	damage_devs $TESTPOOL $i
-	log_must is_data_valid $TESTPOOL
-	log_must recover_bad_missing_devs $TESTPOOL $i
-done
-
-#
-# Inject missing device errors for raidz2 pool
-#
-for i in 1 2; do
-	remove_devs $TESTPOOL $i
-	log_must is_data_valid $TESTPOOL
-	log_must recover_bad_missing_devs $TESTPOOL $i
-done
-
-log_pass "Raidz2 pool can withstand two devices are failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_003_pos.ksh
deleted file mode 100755
index a1ca2cb..0000000
--- a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_003_pos.ksh
+++ /dev/null

@@ -1,93 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
-
-#
-# DESCRIPTION:
-#	A mirrored pool can withstand N-1 device are failing or missing.
-#
-# STRATEGY:
-#	1. Create N(>2,<5) virtual disk files.
-#	2. Create mirror pool based on the virtual disk files.
-#	3. Fill the filesystem with directories and files.
-#	4. Record all the files and directories checksum information.
-#	5. Damaged at most N-1 of the virtual disk files.
-#	6. Verify the data are correct to prove mirror can withstand N-1 devices
-#	   are failing.
-#
-
-verify_runnable "global"
-
-log_assert "Verify mirrored pool can withstand N-1 devices are failing or missing."
-log_onexit cleanup
-
-typeset -i cnt=$(random 2 5)
-setup_test_env $TESTPOOL mirror $cnt
-
-typeset -i i=1
-
-#
-# Inject data corruption errors for mirrored pool
-#
-while (( i < cnt )); do
-	damage_devs $TESTPOOL $i "label"
-	log_must is_data_valid $TESTPOOL
-	log_must clear_errors $TESTPOOL
-
-	(( i +=1 ))
-done
-
-#
-# Inject  bad devices errors for mirrored pool
-#
-i=1
-while (( i < cnt )); do
-        damage_devs $TESTPOOL $i
-        log_must is_data_valid $TESTPOOL
-	log_must recover_bad_missing_devs $TESTPOOL $i
-
-	(( i +=1 ))
-done
-
-#
-# Inject missing device errors for mirrored pool
-#
-i=1
-while (( i < cnt )); do
-        remove_devs $TESTPOOL $i
-        log_must is_data_valid $TESTPOOL
-	log_must recover_bad_missing_devs $TESTPOOL $i
-
-	(( i +=1 ))
-done
-
-log_pass "Mirrored pool can withstand N-1 devices failing as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_004_neg.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_004_neg.ksh
deleted file mode 100755
index 01b819d..0000000
--- a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_004_neg.ksh
+++ /dev/null

@@ -1,66 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
-
-#
-# DESCRIPTION:
-#	Striped pool have no data redundancy. Any device errors will
-#	cause data corruption.
-#
-# STRATEGY:
-#	1. Create N virtual disk file.
-#	2. Create stripe pool based on the virtual disk files.
-#	3. Fill the filesystem with directories and files.
-#	4. Record all the files and directories checksum information.
-#	5. Damage one of the virtual disk file.
-#	6. Verify the data is error.
-#
-
-verify_runnable "global"
-
-log_assert "Verify striped pool have no data redundancy."
-log_onexit cleanup
-
-typeset -i cnt=$(random 2 5)
-setup_test_env $TESTPOOL "" $cnt
-
-damage_devs $TESTPOOL 1 "keep_label"
-log_must zpool scrub $TESTPOOL
-
-# Wait for the scrub to wrap, or is_healthy will be wrong.
-while ! is_pool_scrubbed $TESTPOOL; do
-	sleep 1
-done
-
-log_mustnot is_healthy $TESTPOOL
-
-log_pass "Striped pool has no data redundancy as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh
new file mode 100755
index 0000000..8015e68
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh

@@ -0,0 +1,248 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by vStack. All rights reserved.
+# Copyright (c) 2021 by Delphix. All rights reserved.
+# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	dRAID should provide redundancy
+#
+# STRATEGY:
+#	1. Create block device files for the test draid pool
+#	2. For each parity value [1..3]
+#	    - create draid pool
+#	    - fill it with some directories/files
+#	    - verify self-healing by overwriting devices
+#	    - verify resilver by replacing devices
+#	    - verify scrub by zeroing devices
+#	    - destroy the draid pool
+
+typeset -r devs=6
+typeset -r dev_size_mb=512
+
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		rm -f "$TEST_BASE_DIR/dev-$i"
+	done
+
+	set_tunable32 PREFETCH_DISABLE $prefetch_disable
+}
+
+function test_selfheal # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	typeset mntpnt=$(get_prop mountpoint $pool/fs)
+	log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	#
+	# Scrub the pool because the find command will only self-heal blocks
+	# from the files which were read.  Before overwriting additional
+	# devices we need to repair all of the blocks in the pool.
+	#
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	typeset mntpnt=$(get_prop mountpoint $pool/fs)
+	log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+}
+
+function test_resilver # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool offline $pool $dir/dev-$i
+	done
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool labelclear -f $dir/dev-$i
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool replace -fw $pool $dir/dev-$i
+	done
+
+	log_must check_pool_status $pool "errors" "No known data errors"
+	resilver_cksum=$(cksum_pool $pool)
+	if [[ $resilver_cksum != 0 ]]; then
+		log_must zpool status -v $pool
+		log_fail "resilver cksum errors: $resilver_cksum"
+	fi
+
+	log_must zpool clear $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool offline $pool $dir/dev-$i
+	done
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool labelclear -f $dir/dev-$i
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool replace -fw $pool $dir/dev-$i
+	done
+
+	log_must check_pool_status $pool "errors" "No known data errors"
+	resilver_cksum=$(cksum_pool $pool)
+	if [[ $resilver_cksum != 0 ]]; then
+		log_must zpool status -v $pool
+		log_fail "resilver cksum errors: $resilver_cksum"
+	fi
+
+	log_must zpool clear $pool
+}
+
+function test_scrub # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+	device=$TEST_BASE_DIR/dev-$i
+	log_must truncate -s ${dev_size_mb}M $device
+	disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+for nparity in 1 2 3; do
+	raid=draid$nparity
+	dir=$TEST_BASE_DIR
+
+	log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
+	log_must zfs set primarycache=metadata $TESTPOOL
+
+	log_must zfs create $TESTPOOL/fs
+	log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R
+
+	log_must zfs create -o compress=on $TESTPOOL/fs2
+	log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R
+
+	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+	log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R
+
+	typeset pool_size=$(get_pool_prop size $TESTPOOL)
+
+	log_must zpool export $TESTPOOL
+	log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	test_selfheal $TESTPOOL $nparity $dir
+	test_resilver $TESTPOOL $nparity $dir
+	test_scrub $TESTPOOL $nparity $dir
+
+	log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "draid redundancy test succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid1.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid1.ksh
new file mode 100755
index 0000000..85d420a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid1.ksh

@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A draid pool can withstand at most 1 device failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>3,<6) virtual disk files.
+#	2. Create draid pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged one of the virtual disk file.
+#	6. Verify the data is correct to prove draid can withstand 1 device is
+#	   failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify draid pool can withstand one device failing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 3 6)
+setup_test_env $TESTPOOL draid $cnt
+
+#
+# Inject data corruption error for draid pool
+#
+damage_devs $TESTPOOL 1 "label"
+log_must is_data_valid $TESTPOOL
+log_must clear_errors $TESTPOOL
+
+#
+# Inject bad device error for draid pool
+#
+damage_devs $TESTPOOL 1
+log_must is_data_valid $TESTPOOL
+log_must recover_bad_missing_devs $TESTPOOL 1
+
+#
+# Inject missing device error for draid pool
+#
+remove_devs $TESTPOOL 1
+log_must is_data_valid $TESTPOOL
+
+log_pass "draid pool can withstand one device failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid2.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid2.ksh
new file mode 100755
index 0000000..04f1fdf
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid2.ksh

@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A draid2 pool can withstand 2 devices are failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>4,<6) virtual disk files.
+#	2. Create draid2 pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged at most two of the virtual disk files.
+#	6. Verify the data is correct to prove draid2 can withstand 2 devices
+#	   are failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify draid2 pool can withstand two devices failing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 4 6)
+setup_test_env $TESTPOOL draid2 $cnt
+
+#
+# Inject data corruption errors for draid2 pool
+#
+for i in 1 2; do
+	damage_devs $TESTPOOL $i "label"
+	log_must is_data_valid $TESTPOOL
+	log_must clear_errors $TESTPOOL
+done
+
+#
+# Inject bad devices errors for draid2 pool
+#
+for i in 1 2; do
+	damage_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+#
+# Inject missing device errors for draid2 pool
+#
+for i in 1 2; do
+	remove_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+log_pass "draid2 pool can withstand two devices failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid3.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid3.ksh
new file mode 100755
index 0000000..d4c823e
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid3.ksh

@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A draid3 pool can withstand 3 devices are failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>5,<6) virtual disk files.
+#	2. Create draid3 pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged at most three of the virtual disk files.
+#	6. Verify the data is correct to prove draid3 can withstand 3 devices
+#	   are failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify draid3 pool can withstand three devices failing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 5 6)
+setup_test_env $TESTPOOL draid3 $cnt
+
+#
+# Inject data corruption errors for draid3 pool
+#
+for i in 1 2 3; do
+	damage_devs $TESTPOOL $i "label"
+	log_must is_data_valid $TESTPOOL
+	log_must clear_errors $TESTPOOL
+done
+
+#
+# Inject bad devices errors for draid3 pool
+#
+for i in 1 2 3; do
+	damage_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+#
+# Inject missing device errors for draid3 pool
+#
+for i in 1 2 3; do
+	remove_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+log_pass "draid3 pool can withstand three devices failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh
new file mode 100755
index 0000000..1c1183c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh

@@ -0,0 +1,140 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	When sequentially resilvering a dRAID pool with multiple vdevs
+#	that contain silent damage a sequential resilver should never
+#	introduce additional unrecoverable damage.
+#
+# STRATEGY:
+#	1. Create block device files for the test draid pool
+#	2. For each parity value [1..3]
+#	    - create draid pool
+#	    - fill it with some directories/files
+#	    - overwrite the maximum number of repairable devices
+#	    - sequentially resilver each overwritten device one at a time;
+#	      the device will not be correctly repaired because the silent
+#	      damage on the other vdevs will cause the parity calculations
+#	      to generate incorrect data for the resilvering vdev.
+#	    - verify that only the resilvering devices had invalid data
+#	      written and that a scrub is still able to repair the pool
+#	    - destroy the draid pool
+#
+
+typeset -r devs=7
+typeset -r dev_size_mb=512
+
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		rm -f "$TEST_BASE_DIR/dev-$i"
+	done
+
+	set_tunable32 PREFETCH_DISABLE $prefetch_disable
+	set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
+}
+
+function test_sequential_resilver # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		spare=draid${nparity}-0-$i
+		log_must zpool replace -fsw $pool $dir/dev-$i $spare
+	done
+
+	log_must zpool scrub -w $pool
+	log_must zpool status $pool
+
+	log_mustnot check_pool_status $pool "scan" "repaired 0B"
+	log_must check_pool_status $pool "errors" "No known data errors"
+	log_must check_pool_status $pool "scan" "with 0 errors"
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+	device=$TEST_BASE_DIR/dev-$i
+	log_must truncate -s ${dev_size_mb}M $device
+	disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+for nparity in 1 2 3; do
+	raid=draid${nparity}:${nparity}s
+	dir=$TEST_BASE_DIR
+
+	log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
+	log_must zfs set primarycache=metadata $TESTPOOL
+
+	log_must zfs create $TESTPOOL/fs
+	log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R
+
+	log_must zfs create -o compress=on $TESTPOOL/fs2
+	log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R
+
+	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+	log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R
+
+	log_must zpool export $TESTPOOL
+	log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	test_sequential_resilver $TESTPOOL $nparity $dir
+
+	log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "draid damaged device(s) test succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh
new file mode 100755
index 0000000..8e06db9
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh

@@ -0,0 +1,157 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	When sequentially resilvering a dRAID pool to a distributed spare
+#	silent damage to an online vdev in a replacing or spare mirror vdev
+#	is not expected to be repaired.  Not only does the rebuild have no
+#	reason to suspect the silent damage but even if it did there's no
+#	checksum available to determine the correct copy and make the repair.
+#	However, the subsequent scrub should detect and repair any damage.
+#
+# STRATEGY:
+#	1. Create block device files for the test draid pool
+#	2. For each parity value [1..3]
+#		a. Create a draid pool
+#		b. Fill it with some directories/files
+#		c. Systematically damage and replace three devices by:
+#			- Overwrite the device
+#			- Replace the damaged vdev with a distributed spare
+#			- Scrub the pool and verify repair IO is issued
+#		d. Detach the distributed spares
+#		e. Scrub the pool and verify there was nothing to repair
+#		f. Destroy the draid pool
+#
+
+typeset -r devs=7
+typeset -r dev_size_mb=512
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		rm -f "$TEST_BASE_DIR/dev-$i"
+	done
+
+	set_tunable32 PREFETCH_DISABLE $prefetch_disable
+	set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+	device=$TEST_BASE_DIR/dev-$i
+	log_must truncate -s ${dev_size_mb}M $device
+	disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+dir=$TEST_BASE_DIR
+
+for nparity in 1 2 3; do
+	raid=draid${nparity}:3s
+
+	log_must zpool create -f -O compression=off -o cachefile=none \
+	    $TESTPOOL $raid ${disks[@]}
+	# log_must zfs set primarycache=metadata $TESTPOOL
+
+	log_must zfs create $TESTPOOL/fs
+	log_must fill_fs /$TESTPOOL/fs 1 256 10 1024 R
+
+	log_must zfs create -o compress=on $TESTPOOL/fs2
+	log_must fill_fs /$TESTPOOL/fs2 1 256 10 1024 R
+
+	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+	log_must fill_fs /$TESTPOOL/fs3 1 256 10 1024 R
+
+	log_must zpool export $TESTPOOL
+	log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	for nspare in 0 1 2; do
+		damaged=$dir/dev-${nspare}
+		spare=draid${nparity}-0-${nspare}
+
+		log_must zpool export $TESTPOOL
+		log_must dd conv=notrunc if=/dev/zero of=$damaged \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+		log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+		log_must zpool replace -fsw $TESTPOOL $damaged $spare
+
+		# Scrub the pool after the sequential resilver and verify
+		# that the silent damage was repaired by the scrub.
+		log_must zpool scrub -w $TESTPOOL
+		log_must zpool status $TESTPOOL
+		log_must check_pool_status $TESTPOOL "errors" \
+		    "No known data errors"
+		log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+		log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B"
+	done
+
+	for nspare in 0 1 2; do
+		log_must check_vdev_state $TESTPOOL \
+		    spare-${nspare} "ONLINE"
+		log_must check_vdev_state $TESTPOOL \
+		    ${dir}/dev-${nspare} "ONLINE"
+		log_must check_vdev_state $TESTPOOL \
+		    draid${nparity}-0-${nspare} "ONLINE"
+	done
+
+	# Detach the distributed spares and scrub the pool again to
+	# verify no damage remained on the originally corrupted vdevs.
+	for nspare in 0 1 2; do
+		log_must zpool detach $TESTPOOL draid${nparity}-0-${nspare}
+	done
+
+	log_must zpool clear $TESTPOOL
+	log_must zpool scrub -w $TESTPOOL
+	log_must zpool status $TESTPOOL
+
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+
+	log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "draid damaged device scrub test succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh
new file mode 100755
index 0000000..8acee15
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh

@@ -0,0 +1,98 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# Verify resilver to dRAID distributed spares.
+#
+# STRATEGY:
+# 1. For resilvers:
+#    a. Create a semi-random dRAID pool configuration which can:
+#       - sustain N failures (1-3), and
+#       - has N distributed spares to replace all faulted vdevs
+#    b. Fill the pool with data
+#    c. Systematically fault a vdev, then replace it with a spare
+#    d. Scrub the pool to verify no data was lost
+#    e. Verify the contents of files in the pool
+#
+
+log_assert "Verify resilver to dRAID distributed spares"
+
+function cleanup_tunable
+{
+	log_must set_tunable32 REBUILD_SCRUB_ENABLED 1
+	cleanup
+}
+
+log_onexit cleanup_tunable
+
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+for replace_mode in "healing" "sequential"; do
+
+	if [[ "$replace_mode" = "sequential" ]]; then
+		flags="-s"
+	else
+		flags=""
+	fi
+
+	parity=$(random_int_between 1 3)
+	spares=$(random_int_between $parity 3)
+	data=$(random_int_between 1 8)
+
+	(( min_children = (data + parity + spares) ))
+	children=$(random_int_between $min_children 16)
+
+	draid="draid${parity}:${data}d:${children}c:${spares}s"
+
+	setup_test_env $TESTPOOL $draid $children
+
+	i=0
+	while [[ $i -lt $spares ]]; do
+		fault_vdev="$BASEDIR/vdev$i"
+		spare_vdev="draid${parity}-0-${i}"
+
+		log_must zpool offline -f $TESTPOOL $fault_vdev
+		log_must check_vdev_state $TESTPOOL $fault_vdev "FAULTED"
+		log_must zpool replace -w $flags $TESTPOOL \
+		    $fault_vdev $spare_vdev
+		log_must check_vdev_state spare-$i "DEGRADED"
+		log_must check_vdev_state $spare_vdev "ONLINE"
+		log_must check_hotspare_state $TESTPOOL $spare_vdev "INUSE"
+		log_must zpool detach $TESTPOOL $fault_vdev
+		log_must verify_pool $TESTPOOL
+		log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+		log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+		(( i += 1 ))
+	done
+
+	log_must is_data_valid $TESTPOOL
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	cleanup
+done
+
+log_pass "Verify resilver to dRAID distributed spares"

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh
new file mode 100755
index 0000000..08fdd55
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh

@@ -0,0 +1,80 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# Verify multiple dRAID spares can be used.
+#
+# STRATEGY:
+# 1. Create a pool and fill it with data.
+# 2. Engage 3 distributed spares and verify the pool
+# 3. Refill the filesystem with new data
+# 4. Clear the pool to online previous faulted devices and resilver
+# 5. Verify the pool and its contents
+#
+
+log_assert "Verify multiple dRAID spares"
+
+log_onexit cleanup
+
+parity=1
+spares=3
+data=$(random_int_between 1 4)
+children=10
+draid="draid${parity}:${data}d:${children}c:${spares}s"
+
+setup_test_env $TESTPOOL $draid $children
+
+# Replace vdev7 -> draid1-0-0
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev7
+log_must zpool replace -w $TESTPOOL $BASEDIR/vdev7 draid1-0-0
+
+# Replace vdev8 -> draid1-0-1
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8
+log_must zpool replace -w $TESTPOOL $BASEDIR/vdev8 draid1-0-1
+
+# Replace vdev9 -> draid1-0-2
+log_must zpool offline -f $TESTPOOL $BASEDIR/vdev9
+log_must zpool replace -w $TESTPOOL $BASEDIR/vdev9 draid1-0-2
+
+# Verify, refill and verify the pool contents.
+verify_pool $TESTPOOL
+refill_test_env $TESTPOOL
+verify_pool $TESTPOOL
+
+# Bring everything back online and check for errors.
+log_must zpool clear $TESTPOOL
+log_must zpool wait -t resilver $TESTPOOL
+
+log_must wait_hotspare_state $TESTPOOL draid1-0-0 "AVAIL"
+log_must wait_hotspare_state $TESTPOOL draid1-0-1 "AVAIL"
+log_must wait_hotspare_state $TESTPOOL draid1-0-2 "AVAIL"
+
+log_must zpool scrub -w $TESTPOOL
+log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+log_must is_data_valid $TESTPOOL
+
+log_pass "Verify multiple dRAID spares"

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh
new file mode 100755
index 0000000..28e8e3c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh

@@ -0,0 +1,193 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# Verify dRAID resilver to traditional and distributed spares for
+# a variety of pool configurations and pool states.
+#
+# STRATEGY:
+# 1. For resilvers:
+#    a. Create a semi-random dRAID pool configuration which can
+#       sustain 1 failure and has 5 distributed spares.
+#    b. Fill the pool with data
+#    c. Systematically fault and replace vdevs in the pools with
+#       spares to test resilving in common pool states.
+#    d. Scrub the pool to verify no data was lost
+#    e. Verify the contents of files in the pool
+#
+
+log_assert "Verify dRAID resilver"
+
+function cleanup_tunable
+{
+	log_must set_tunable32 REBUILD_SCRUB_ENABLED 1
+	cleanup
+}
+
+log_onexit cleanup_tunable
+
+if is_kmemleak; then
+	log_unsupported "Test case runs slowly when kmemleak is enabled"
+fi
+
+#
+# Disable scrubbing after a sequential resilver to verify the resilver
+# alone is able to reconstruct the data without the help of a scrub.
+#
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+for replace_mode in "healing" "sequential"; do
+
+	if [[ "$replace_mode" = "sequential" ]]; then
+		flags="-s"
+	else
+		flags=""
+	fi
+
+	parity=1
+	spares=5
+	data=$(random_int_between 1 4)
+	children=10
+	draid="draid${parity}:${data}d:${children}c:${spares}s"
+
+	setup_test_env $TESTPOOL $draid $children
+
+	#
+	# Perform a variety of replacements to normal and distributed spares
+	# for a variety of different vdev configurations to exercise different
+	# resilver code paths. The final configuration is expected to be:
+	#
+	# NAME                                  STATE     READ WRITE CKSUM
+	# testpool                              DEGRADED     0     0     0
+	#   draid1:1d:10c:5s-0                  DEGRADED     0     0     0
+	#     /var/tmp/basedir.28683/new_vdev0  ONLINE       0     0     0
+	#     /var/tmp/basedir.28683/new_vdev1  ONLINE       0     0     0
+	#     spare-2                           DEGRADED     0     0     0
+	#       /var/tmp/basedir.28683/vdev2    FAULTED      0     0     0
+	#       draid1-0-3                      ONLINE       0     0     0
+	#     spare-3                           DEGRADED     0     0     0
+	#       /var/tmp/basedir.28683/vdev3    FAULTED      0     0     0
+	#       draid1-0-4                      ONLINE       0     0     0
+	#     /var/tmp/basedir.28683/vdev4      ONLINE       0     0     0
+	#     /var/tmp/basedir.28683/vdev5      ONLINE       0     0     0
+	#     /var/tmp/basedir.28683/vdev6      ONLINE       0     0     0
+	#     draid1-0-0                        ONLINE       0     0     0
+	#     spare-8                           DEGRADED     0     0     0
+	#       /var/tmp/basedir.28683/vdev8    FAULTED      0     0     0
+	#       draid1-0-1                      ONLINE       0     0     0
+	#     spare-9                           ONLINE       0     0     0
+	#       /var/tmp/basedir.28683/vdev9    ONLINE       0     0     0
+	#       draid1-0-2                      ONLINE       0     0     0
+	# spares
+	#   draid1-0-0                          INUSE     currently in use
+	#   draid1-0-1                          INUSE     currently in use
+	#   draid1-0-2                          INUSE     currently in use
+	#   draid1-0-3                          INUSE     currently in use
+	#   draid1-0-4                          INUSE     currently in use
+	#
+
+	# Distributed spare which replaces original online device
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev7 "ONLINE"
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev7 draid1-0-0
+	log_must zpool detach $TESTPOOL $BASEDIR/vdev7
+	log_must check_vdev_state $TESTPOOL draid1-0-0 "ONLINE"
+	log_must check_hotspare_state $TESTPOOL draid1-0-0 "INUSE"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Distributed spare in mirror with original device faulted
+	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev8 "FAULTED"
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev8 draid1-0-1
+	log_must check_vdev_state $TESTPOOL spare-8 "DEGRADED"
+	log_must check_vdev_state $TESTPOOL draid1-0-1 "ONLINE"
+	log_must check_hotspare_state $TESTPOOL draid1-0-1 "INUSE"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Distributed spare in mirror with original device still online
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev9 "ONLINE"
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev9 draid1-0-2
+	log_must check_vdev_state $TESTPOOL spare-9 "ONLINE"
+	log_must check_vdev_state $TESTPOOL draid1-0-2 "ONLINE"
+	log_must check_hotspare_state $TESTPOOL draid1-0-2 "INUSE"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Normal faulted device replacement
+	new_vdev0="$BASEDIR/new_vdev0"
+	log_must truncate -s $MINVDEVSIZE $new_vdev0
+	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev0
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev0 "FAULTED"
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev0 $new_vdev0
+	log_must check_vdev_state $TESTPOOL $new_vdev0 "ONLINE"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Distributed spare faulted device replacement
+	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev2
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev2 "FAULTED"
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev2 draid1-0-3
+	log_must check_vdev_state $TESTPOOL spare-2 "DEGRADED"
+	log_must check_vdev_state $TESTPOOL draid1-0-3 "ONLINE"
+	log_must check_hotspare_state $TESTPOOL draid1-0-3 "INUSE"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Normal online device replacement
+	new_vdev1="$BASEDIR/new_vdev1"
+	log_must truncate -s $MINVDEVSIZE $new_vdev1
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev1 "ONLINE"
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev1 $new_vdev1
+	log_must check_vdev_state $TESTPOOL $new_vdev1 "ONLINE"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Distributed spare online device replacement (then fault)
+	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev3 draid1-0-4
+	log_must check_vdev_state $TESTPOOL spare-3 "ONLINE"
+	log_must check_vdev_state $TESTPOOL draid1-0-4 "ONLINE"
+	log_must check_hotspare_state $TESTPOOL draid1-0-4 "INUSE"
+	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev3
+	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev3 "FAULTED"
+	log_must check_vdev_state $TESTPOOL spare-3 "DEGRADED"
+	log_must verify_pool $TESTPOOL
+	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+	# Verify the original data is valid
+	log_must is_data_valid $TESTPOOL
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	cleanup
+done
+
+log_pass "Verify resilver to dRAID distributed spares"

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_mirror.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_mirror.ksh
new file mode 100755
index 0000000..b7b791b
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_mirror.ksh

@@ -0,0 +1,94 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A mirrored pool can withstand N-1 device are failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>2,<5) virtual disk files.
+#	2. Create mirror pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged at most N-1 of the virtual disk files.
+#	6. Verify the data are correct to prove mirror can withstand N-1 devices
+#	   are failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify mirrored pool can withstand N-1 devices are failing or missing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 2 5)
+setup_test_env $TESTPOOL mirror $cnt
+
+typeset -i i=1
+
+#
+# Inject data corruption errors for mirrored pool
+#
+while (( i < cnt )); do
+	damage_devs $TESTPOOL $i "label"
+	log_must is_data_valid $TESTPOOL
+	log_must clear_errors $TESTPOOL
+
+	(( i +=1 ))
+done
+
+#
+# Inject  bad devices errors for mirrored pool
+#
+i=1
+while (( i < cnt )); do
+        damage_devs $TESTPOOL $i
+        log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+
+	(( i +=1 ))
+done
+
+#
+# Inject missing device errors for mirrored pool
+#
+i=1
+while (( i < cnt )); do
+        remove_devs $TESTPOOL $i
+        log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+
+	(( i +=1 ))
+done
+
+log_pass "Mirrored pool can withstand N-1 devices failing as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh
new file mode 100755
index 0000000..d736883
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh

@@ -0,0 +1,248 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by vStack. All rights reserved.
+# Copyright (c) 2021 by Delphix. All rights reserved.
+# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	RAIDZ should provide redundancy
+#
+# STRATEGY:
+#	1. Create block device files for the test raidz pool
+#	2. For each parity value [1..3]
+#	    - create raidz pool
+#	    - fill it with some directories/files
+#	    - verify self-healing by overwriting devices
+#	    - verify resilver by replacing devices
+#	    - verify scrub by zeroing devices
+#	    - destroy the raidz pool
+
+typeset -r devs=6
+typeset -r dev_size_mb=512
+
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+
+function cleanup
+{
+	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+	for i in {0..$devs}; do
+		rm -f "$TEST_BASE_DIR/dev-$i"
+	done
+
+	set_tunable32 PREFETCH_DISABLE $prefetch_disable
+}
+
+function test_selfheal # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	typeset mntpnt=$(get_prop mountpoint $pool/fs)
+	log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	#
+	# Scrub the pool because the find command will only self-heal blocks
+	# from the files which were read.  Before overwriting additional
+	# devices we need to repair all of the blocks in the pool.
+	#
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	typeset mntpnt=$(get_prop mountpoint $pool/fs)
+	log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+}
+
+function test_resilver # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool offline $pool $dir/dev-$i
+	done
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool labelclear -f $dir/dev-$i
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		log_must zpool replace -fw $pool $dir/dev-$i
+	done
+
+	log_must check_pool_status $pool "errors" "No known data errors"
+	resilver_cksum=$(cksum_pool $pool)
+	if [[ $resilver_cksum != 0 ]]; then
+		log_must zpool status -v $pool
+		log_fail "resilver cksum errors: $resilver_cksum"
+	fi
+
+	log_must zpool clear $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool offline $pool $dir/dev-$i
+	done
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool labelclear -f $dir/dev-$i
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		log_must zpool replace -fw $pool $dir/dev-$i
+	done
+
+	log_must check_pool_status $pool "errors" "No known data errors"
+	resilver_cksum=$(cksum_pool $pool)
+	if [[ $resilver_cksum != 0 ]]; then
+		log_must zpool status -v $pool
+		log_fail "resilver cksum errors: $resilver_cksum"
+	fi
+
+	log_must zpool clear $pool
+}
+
+function test_scrub # <pool> <parity> <dir>
+{
+	typeset pool=$1
+	typeset nparity=$2
+	typeset dir=$3
+
+	log_must zpool export $pool
+
+	for (( i=0; i<$nparity; i=i+1 )); do
+		dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+
+	log_must zpool export $pool
+
+	for (( i=$nparity; i<$nparity*2; i=i+1 )); do
+		dd conv=notrunc if=/dev/zero of=$dir/dev-$i \
+		    bs=1M seek=4 count=$(($dev_size_mb-4))
+	done
+
+	log_must zpool import -o cachefile=none -d $dir $pool
+
+	log_must zpool scrub -w $pool
+	log_must check_pool_status $pool "errors" "No known data errors"
+
+	log_must zpool clear $pool
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+	device=$TEST_BASE_DIR/dev-$i
+	log_must truncate -s ${dev_size_mb}M $device
+	disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+for nparity in 1 2 3; do
+	raid=raidz$nparity
+	dir=$TEST_BASE_DIR
+
+	log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
+	log_must zfs set primarycache=metadata $TESTPOOL
+
+	log_must zfs create $TESTPOOL/fs
+	log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R
+
+	log_must zfs create -o compress=on $TESTPOOL/fs2
+	log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R
+
+	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+	log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R
+
+	typeset pool_size=$(get_pool_prop size $TESTPOOL)
+
+	log_must zpool export $TESTPOOL
+	log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+	test_selfheal $TESTPOOL $nparity $dir
+	test_resilver $TESTPOOL $nparity $dir
+	test_scrub $TESTPOOL $nparity $dir
+
+	log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "raidz redundancy test succeeded."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz1.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz1.ksh
new file mode 100755
index 0000000..a73890e
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz1.ksh

@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A raidz pool can withstand at most 1 device failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>2,<5) virtual disk files.
+#	2. Create raidz pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged one of the virtual disk file.
+#	6. Verify the data is correct to prove raidz can withstand 1 device is
+#	   failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify raidz pool can withstand one device failing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 2 5)
+setup_test_env $TESTPOOL raidz $cnt
+
+#
+# Inject data corruption error for raidz pool
+#
+damage_devs $TESTPOOL 1 "label"
+log_must is_data_valid $TESTPOOL
+log_must clear_errors $TESTPOOL
+
+#
+# Inject bad device error for raidz pool
+#
+damage_devs $TESTPOOL 1
+log_must is_data_valid $TESTPOOL
+log_must recover_bad_missing_devs $TESTPOOL 1
+
+#
+# Inject missing device error for raidz pool
+#
+remove_devs $TESTPOOL 1
+log_must is_data_valid $TESTPOOL
+
+log_pass "raidz pool can withstand one device failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz2.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz2.ksh
new file mode 100755
index 0000000..94b9b88
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz2.ksh

@@ -0,0 +1,84 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A raidz2 pool can withstand 2 devices are failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>3,<5) virtual disk files.
+#	2. Create raidz2 pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged at most two of the virtual disk files.
+#	6. Verify the data is correct to prove raidz2 can withstand 2 devices
+#	   are failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify raidz2 pool can withstand two devices failing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 3 5)
+setup_test_env $TESTPOOL raidz2 $cnt
+
+#
+# Inject data corruption errors for raidz2 pool
+#
+for i in 1 2; do
+	damage_devs $TESTPOOL $i "label"
+	log_must is_data_valid $TESTPOOL
+	log_must clear_errors $TESTPOOL
+done
+
+#
+# Inject bad devices errors for raidz2 pool
+#
+for i in 1 2; do
+	damage_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+#
+# Inject missing device errors for raidz2 pool
+#
+for i in 1 2; do
+	remove_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+log_pass "raidz2 pool can withstand two devices failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz3.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz3.ksh
new file mode 100755
index 0000000..0a01c47
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz3.ksh

@@ -0,0 +1,84 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	A raidz3 pool can withstand 3 devices are failing or missing.
+#
+# STRATEGY:
+#	1. Create N(>4,<5) virtual disk files.
+#	2. Create raidz3 pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damaged at most two of the virtual disk files.
+#	6. Verify the data is correct to prove raidz3 can withstand 3 devices
+#	   are failing.
+#
+
+verify_runnable "global"
+
+log_assert "Verify raidz3 pool can withstand three devices failing."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 4 5)
+setup_test_env $TESTPOOL raidz3 $cnt
+
+#
+# Inject data corruption errors for raidz3 pool
+#
+for i in 1 2 3; do
+	damage_devs $TESTPOOL $i "label"
+	log_must is_data_valid $TESTPOOL
+	log_must clear_errors $TESTPOOL
+done
+
+#
+# Inject bad devices errors for raidz3 pool
+#
+for i in 1 2 3; do
+	damage_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+#
+# Inject missing device errors for raidz3 pool
+#
+for i in 1 2 3; do
+	remove_devs $TESTPOOL $i
+	log_must is_data_valid $TESTPOOL
+	log_must recover_bad_missing_devs $TESTPOOL $i
+done
+
+log_pass "raidz3 pool can withstand three devices failing passed."

diff --git a/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_stripe.ksh b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_stripe.ksh
new file mode 100755
index 0000000..b2c4a85
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_stripe.ksh

@@ -0,0 +1,64 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+#	Striped pool have no data redundancy. Any device errors will
+#	cause data corruption.
+#
+# STRATEGY:
+#	1. Create N virtual disk file.
+#	2. Create stripe pool based on the virtual disk files.
+#	3. Fill the filesystem with directories and files.
+#	4. Record all the files and directories checksum information.
+#	5. Damage one of the virtual disk file.
+#	6. Verify the data is error.
+#
+
+verify_runnable "global"
+
+log_assert "Verify striped pool have no data redundancy."
+log_onexit cleanup
+
+typeset -i cnt=$(random_int_between 2 5)
+setup_test_env $TESTPOOL "" $cnt
+
+damage_devs $TESTPOOL 1 "keep_label"
+log_must zpool scrub -w $TESTPOOL
+
+if is_healthy $TESTPOOL ; then
+	log_fail "$pool should not be healthy."
+fi
+
+log_pass "Striped pool has no data redundancy as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh b/zfs/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh
index e7f40ec..6e4da36 100755
--- a/zfs/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/refquota/refquota_008_neg.ksh

@@ -39,10 +39,10 @@
 
 verify_runnable "both"
 
-oldvalue=$(get_tunable spa_asize_inflation)
+oldvalue=$(get_tunable SPA_ASIZE_INFLATION)
 function cleanup
 {
-	set_tunable32 spa_asize_inflation $oldvalue
+	set_tunable32 SPA_ASIZE_INFLATION $oldvalue
         log_must zfs destroy -rf $TESTPOOL/$TESTFS
         log_must zfs create $TESTPOOL/$TESTFS
         log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
@@ -50,7 +50,7 @@
 
 log_onexit cleanup
 
-set_tunable32 spa_asize_inflation 2
+set_tunable32 SPA_ASIZE_INFLATION 2
 
 TESTFILE='testfile'
 FS=$TESTPOOL/$TESTFS

diff --git a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_002_pos.ksh
index d4c0a4f..a8f5863 100755
--- a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_002_pos.ksh

@@ -50,11 +50,9 @@
 	if is_global_zone ; then
 		log_must zfs set refreservation=none $TESTPOOL
 
-		if datasetexists $TESTPOOL@snap ; then
-			log_must zfs destroy -f $TESTPOOL@snap
-		fi
+		datasetexists $TESTPOOL@snap && destroy_dataset $TESTPOOL@snap -f
 	fi
-	log_must zfs destroy -rf $TESTPOOL/$TESTFS
+	destroy_dataset $TESTPOOL/$TESTFS -rf
 	log_must zfs create $TESTPOOL/$TESTFS
 	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_005_pos.ksh
index 8c044ec..1ccc982 100755
--- a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_005_pos.ksh

@@ -45,9 +45,9 @@
 
 function cleanup
 {
-	log_must zfs destroy -rf $TESTPOOL/$TESTFS
-	log_must zfs create $TESTPOOL/$TESTFS
-	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
+	destroy_dataset "$fs" "-rf"
+	log_must zfs create $fs
+	log_must zfs set mountpoint=$TESTDIR $fs
 }
 
 log_assert "Volume (ref)reservation is not limited by volsize"

diff --git a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh
index c904a80..ff79764 100755
--- a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_multi_raidz.ksh

@@ -61,7 +61,11 @@
 # that small test disks may fill before creating small volumes.  However,
 # testing 512b and 1K blocks on ashift=9 pools is an ok approximation for
 # testing the problems that arise from 4K and 8K blocks on ashift=12 pools.
-bps=$(lsblk -nrdo min-io /dev/${alldisks[0]})
+if is_freebsd; then
+	bps=$(diskinfo -v ${alldisks[0]} | awk '/sectorsize/ { print $1 }')
+elif is_linux; then
+	bps=$(lsblk -nrdo min-io /dev/${alldisks[0]})
+fi
 case "$bps" in
 512)
 	allshifts=(9 10 17)

diff --git a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh
index 9f25242..056c791 100755
--- a/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/refreserv/refreserv_raidz.ksh

@@ -63,7 +63,11 @@
 # that small test disks may fill before creating small volumes.  However,
 # testing 512b and 1K blocks on ashift=9 pools is an ok approximation for
 # testing the problems that arise from 4K and 8K blocks on ashift=12 pools.
-bps=$(lsblk -nrdo min-io /dev/${alldisks[0]})
+if is_freebsd; then
+	bps=$(diskinfo -v ${alldisks[0]} | awk '/sectorsize/ { print $1 }')
+elif is_linux; then
+	bps=$(lsblk -nrdo min-io /dev/${alldisks[0]})
+fi
 log_must test "$bps" -eq 512 -o "$bps" -eq 4096
 case "$bps" in
 512)
@@ -106,7 +110,7 @@
 			block_device_wait "/dev/zvol/$vol"
 			log_must dd if=/dev/zero of=/dev/zvol/$vol \
 			    bs=1024k count=$volsize
-			sync
+			sync_pool $TESTPOOL
 
 			ref=$(zfs get -Hpo value referenced "$vol")
 			refres=$(zfs get -Hpo value refreservation "$vol")

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/Makefile.am b/zfs/tests/zfs-tests/tests/functional/removal/Makefile.am
index 1551a92..878935b 100644
--- a/zfs/tests/zfs-tests/tests/functional/removal/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/removal/Makefile.am

@@ -19,17 +19,18 @@
 	cleanup.ksh removal_all_vdev.ksh removal_cancel.ksh \
 	removal_check_space.ksh removal_condense_export.ksh \
 	removal_multiple_indirection.ksh \
-	removal_remap_deadlists.ksh removal_nopwrite.ksh removal_remap.ksh \
+	removal_nopwrite.ksh removal_remap_deadlists.ksh \
 	removal_reservation.ksh removal_resume_export.ksh \
 	removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \
 	removal_with_dedup.ksh removal_with_errors.ksh \
 	removal_with_export.ksh removal_with_faulted.ksh \
-	removal_with_ganging.ksh removal_with_remap.ksh \
+	removal_with_ganging.ksh \
 	removal_with_remove.ksh removal_with_scrub.ksh \
 	removal_with_send.ksh removal_with_send_recv.ksh \
 	removal_with_snapshot.ksh removal_with_write.ksh \
 	removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \
-	remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh
+	remove_raidz.ksh remove_expanded.ksh remove_indirect.ksh \
+	remove_attach_mirror.ksh
 
 dist_pkgdata_DATA = \
 	removal.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal.kshlib b/zfs/tests/zfs-tests/tests/functional/removal/removal.kshlib
index fa0174d..140ac38 100644
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal.kshlib

@@ -28,15 +28,13 @@
 	typeset pool=$1
 	typeset callback=$2
 
-	while is_pool_removing $pool; do
-		sleep 1
-	done
+	log_must zpool wait -t remove $pool
 
 	#
 	# The pool state changes before the TXG finishes syncing; wait for
 	# the removal to be completed on disk.
 	#
-	sync_pool
+	sync_pool $pool
 
 	log_must is_pool_removed $pool
 	return 0
@@ -62,7 +60,8 @@
 	typeset callback=$3
 
 	shift 3
-	set_tunable32 zfs_removal_suspend_progress 1
+	log_onexit_push set_tunable32 REMOVAL_SUSPEND_PROGRESS 0
+	set_tunable32 REMOVAL_SUSPEND_PROGRESS 1
 
 	log_must zpool remove $pool $disk
 
@@ -81,7 +80,8 @@
 	#
 	log_must is_pool_removing $pool
 
-	set_tunable32 zfs_removal_suspend_progress 0
+	set_tunable32 REMOVAL_SUSPEND_PROGRESS 0
+	log_onexit_pop
 
 	log_must wait_for_removal $pool
 	log_mustnot vdevs_in_pool $pool $disk
@@ -99,7 +99,7 @@
 {
 	typeset file=$1
 	typeset block_size=$2
-	typeset file_size=$(stat -c%s $file 2>/dev/null)
+	typeset file_size=$(stat_size $file 2>/dev/null)
 	typeset nblocks=$((file_size / block_size))
 
 	[[ -w $file ]] || return 1

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh
index afb318e..e97dc5e 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_cancel.ksh

@@ -42,7 +42,7 @@
 	#
 	# Reset tunable.
 	#
-	log_must set_tunable32 zfs_removal_suspend_progress 0
+	log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 0
 }
 log_onexit cleanup
 
@@ -67,21 +67,16 @@
 log_must zpool add -f $TESTPOOL $NOTREMOVEDISK
 
 #
+# Block removal.
+#
+log_must set_tunable32 REMOVAL_SUSPEND_PROGRESS 1
+
+#
 # Start removal.
 #
 log_must zpool remove $TESTPOOL $REMOVEDISK
 
 #
-# Sleep a bit and hopefully allow removal to copy some data.
-#
-log_must sleep 1
-
-#
-# Block removal.
-#
-log_must set_tunable32 zfs_removal_suspend_progress 1
-
-#
 # Only for debugging purposes in test logs.
 #
 log_must zpool status $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_condense_export.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_condense_export.ksh
index ad33cae..8de17ff 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_condense_export.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_condense_export.ksh

@@ -21,21 +21,19 @@
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/removal/removal.kshlib
 
-if is_linux; then
-	log_unsupported "ZDB fails during concurrent pool activity."
-fi
-
 function reset
 {
-	log_must set_tunable64 zfs_condense_indirect_commit_entry_delay_ms 0
-	log_must set_tunable64 zfs_condense_min_mapping_bytes 131072
+	log_must set_tunable64 CONDENSE_INDIRECT_COMMIT_ENTRY_DELAY_MS 0
+	log_must set_tunable64 CONDENSE_INDIRECT_OBSOLETE_PCT 25
+	log_must set_tunable64 CONDENSE_MIN_MAPPING_BYTES 131072
 	default_cleanup_noexit
 }
 
 default_setup_noexit "$DISKS" "true"
 log_onexit reset
-log_must set_tunable64 zfs_condense_indirect_commit_entry_delay_ms 1000
-log_must set_tunable64 zfs_condense_min_mapping_bytes 1
+log_must set_tunable64 CONDENSE_INDIRECT_COMMIT_ENTRY_DELAY_MS 5000
+log_must set_tunable64 CONDENSE_INDIRECT_OBSOLETE_PCT 5
+log_must set_tunable64 CONDENSE_MIN_MAPPING_BYTES 1
 
 log_must zfs set recordsize=512 $TESTPOOL/$TESTFS
 
@@ -77,9 +75,16 @@
 log_must wait_for_removal $TESTPOOL
 log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK
 
-log_must zfs remap $TESTPOOL/$TESTFS
+#
+# Touch one block under each L1 indirect block, so that the other data blocks
+# will be remapped to their concrete locations.  These parameters assume
+# recordsize=512, indirect block size of 128K (1024 block pointers per
+# indirect block), and file size of less than 20*1024 blocks (10MB).
+#
+log_must stride_dd -i /dev/urandom -o $TESTDIR/file -b 512 -c 20 -s 1024
+
 sync_pool $TESTPOOL
-sleep 5
+sleep 4
 sync_pool $TESTPOOL
 log_must zpool export $TESTPOOL
 zdb -e -p $REMOVEDISKPATH $TESTPOOL | grep 'Condensing indirect vdev' || \

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_multiple_indirection.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_multiple_indirection.ksh
index 97b67a4..6c52fd7 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_multiple_indirection.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_multiple_indirection.ksh

@@ -57,14 +57,14 @@
 	default_cleanup_noexit
 	log_must rm -f $DISKS
 
-	# reset zfs_remove_max_segment to 1M
-	set_tunable32 zfs_remove_max_segment 1048576
+	# reset REMOVE_MAX_SEGMENT to 1M
+	set_tunable32 REMOVE_MAX_SEGMENT 1048576
 }
 
 log_onexit cleanup
 
-# set zfs_remove_max_segment to 32k
-log_must set_tunable32 zfs_remove_max_segment 32768
+# set REMOVE_MAX_SEGMENT to 32k
+log_must set_tunable32 REMOVE_MAX_SEGMENT 32768
 
 log_must dd if=/dev/urandom of=$TESTDIR/$TESTFILE0 bs=128k count=1
 FILE_CONTENTS=$(<$TESTDIR/$TESTFILE0)

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh
index e5d8261..cede81a 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh

@@ -29,7 +29,7 @@
 origin="$TESTPOOL/$TESTFS"
 
 log_must zfs set compress=on $origin
-log_must zfs set checksum=edonr $origin
+log_must zfs set checksum=skein $origin
 
 log_must zfs set recordsize=8k $origin
 dd if=/dev/urandom of=$TESTDIR/file_8k bs=1024k count=$MEGS oflag=sync \

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_remap.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_remap.ksh
deleted file mode 100755
index 5239ef3..0000000
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_remap.ksh
+++ /dev/null

@@ -1,126 +0,0 @@
-#! /bin/ksh -p
-#
-# CDDL HEADER START
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/removal/removal.kshlib
-
-# N.B. The 'zfs remap' command has been disabled and may be removed.
-export ZFS_REMAP_ENABLED=YES
-
-default_setup_noexit "$DISKS"
-
-
-function cleanup
-{
-	set_tunable64 zfs_condense_min_mapping_bytes 131072
-	default_cleanup_noexit
-}
-
-log_onexit cleanup
-
-log_must set_tunable64 zfs_condense_min_mapping_bytes 1
-
-log_must zfs set recordsize=512 $TESTPOOL/$TESTFS
-
-#
-# Create a large file so that we know some of the blocks will be on the
-# removed device, and hence eligible for remapping.
-#
-log_must dd if=/dev/urandom of=$TESTDIR/file bs=$((2**12)) count=$((2**9))
-
-#
-# Randomly rewrite some of blocks in the file so that there will be holes and
-# we will not be able to remap the entire file in a few huge chunks.
-#
-for i in $(seq $((2**12))); do
-	#
-	# We have to sync periodically so that all the writes don't end up in
-	# the same txg. If they were all in the same txg, only the last write
-	# would go through and we would not have as many allocations to
-	# fragment the file.
-	#
-	((i % 100 > 0 )) || sync_pool || log_fail "Could not sync."
-        random_write $TESTDIR/file $((2**9)) || \
-            log_fail "Could not random write."
-done
-
-#
-# Remap should quietly succeed as a noop before a removal.
-#
-log_must zfs remap $TESTPOOL/$TESTFS
-remaptxg_before=$(zfs get -H -o value remaptxg $TESTPOOL/$TESTFS)
-(( $? == 0 )) || log_fail "Could not get remaptxg."
-[[ $remaptxg_before == "-" ]] || \
-    log_fail "remaptxg ($remaptxg_before) had value before a removal"
-
-log_must zpool remove $TESTPOOL $REMOVEDISK
-log_must wait_for_removal $TESTPOOL
-log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK
-
-#
-# remaptxg should not be set if we haven't done a remap.
-#
-remaptxg_before=$(zfs get -H -o value remaptxg $TESTPOOL/$TESTFS)
-(( $? == 0 )) || log_fail "Could not get remaptxg."
-[[ $remaptxg_before == "-" ]] || \
-    log_fail "remaptxg ($remaptxg_before) had value before a removal"
-
-mapping_size_before=$(indirect_vdev_mapping_size $TESTPOOL)
-log_must zfs remap $TESTPOOL/$TESTFS
-
-# Try to wait for a condense to finish.
-for i in {1..5}; do
-	sleep 5
-	sync_pool
-done
-mapping_size_after=$(indirect_vdev_mapping_size $TESTPOOL)
-
-#
-# After the remap, there should not be very many blocks referenced. The reason
-# why our threshold is as high as 512 is because our ratio of metadata to
-# user data is relatively high, with only 64M of user data on the file system.
-#
-(( mapping_size_after < mapping_size_before )) || \
-    log_fail "Mapping size did not decrease after remap: " \
-    "$mapping_size_before before to $mapping_size_after after."
-(( mapping_size_after < 512 )) || \
-    log_fail "Mapping size not small enough after remap: " \
-    "$mapping_size_before before to $mapping_size_after after."
-
-#
-# After a remap, the remaptxg should be set to a non-zero value.
-#
-remaptxg_after=$(zfs get -H -o value remaptxg $TESTPOOL/$TESTFS)
-(( $? == 0 )) || log_fail "Could not get remaptxg."
-log_note "remap txg after remap is $remaptxg_after"
-(( remaptxg_after > 0 )) || log_fail "remaptxg not increased"
-
-#
-# Remap should quietly succeed as a noop if there have been no removals since
-# the last remap.
-#
-log_must zfs remap $TESTPOOL/$TESTFS
-remaptxg_again=$(zfs get -H -o value remaptxg $TESTPOOL/$TESTFS)
-(( $? == 0 )) || log_fail "Could not get remaptxg."
-log_note "remap txg after second remap is $remaptxg_again"
-(( remaptxg_again == remaptxg_after )) || \
-    log_fail "remap not noop if there has been no removal"
-
-log_pass "Remapping a fs caused mapping size to decrease."

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_remap_deadlists.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_remap_deadlists.ksh
index a2f6580..9348022 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_remap_deadlists.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_remap_deadlists.ksh

@@ -21,9 +21,6 @@
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/removal/removal.kshlib
 
-# N.B. The 'zfs remap' command has been disabled and may be removed.
-export ZFS_REMAP_ENABLED=YES
-
 default_setup_noexit "$DISKS"
 log_onexit default_cleanup_noexit
 
@@ -37,7 +34,7 @@
 log_must dd if=/dev/zero of=$TESTDIR/file bs=1024k count=100 \
     conv=notrunc seek=200
 
-if is_linux; then
+if is_linux || is_freebsd; then
 	log_must attempt_during_removal $TESTPOOL $REMOVEDISK zdb -cd $TESTPOOL
 else
 	log_must attempt_during_removal $TESTPOOL $REMOVEDISK
@@ -45,7 +42,14 @@
 log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK
 log_must zdb -cd $TESTPOOL
 
-log_must zfs remap $TESTPOOL/$TESTFS
+#
+# Touch one block under each L1 indirect block, so that the other data blocks
+# will be remapped to their concrete locations.  These parameters assume
+# recordsize=128K, indirect block size of 128K (1024 block pointers per
+# indirect block), and file size of less than 3*1024 blocks (384MB).
+#
+log_must stride_dd -i /dev/urandom -o $TESTDIR/file -b 131072 -c 3 -s 1024
+
 log_must zdb -cd $TESTPOOL
 
 log_must zfs snapshot $TESTPOOL/$TESTFS@snap-post3

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_resume_export.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_resume_export.ksh
index 4f1e63c..142e727 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_resume_export.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_resume_export.ksh

@@ -43,13 +43,28 @@
 
 function cleanup
 {
-	log_must zinject -c all
+	zinject -c all
 	default_cleanup_noexit
 }
 
 function callback
 {
+	#
+	# Inject an error so export fails after having just suspended
+	# the removal thread. [spa_inject_ref gets incremented]
+	#
+	log_must zinject -d $REMOVEDISK -D 10:1 $TESTPOOL
+
+	#
+	# Because of the above error export should fail.
+	#
 	log_mustnot zpool export $TESTPOOL
+
+	#
+	# Let the removal finish.
+	#
+	log_must zinject -c all
+
 	return 0
 }
 
@@ -78,13 +93,7 @@
 log_must zpool add -f $TESTPOOL $NOTREMOVEDISK
 
 #
-# Inject an error so export fails after having just suspended
-# the removal thread. [spa_inject_ref gets incremented]
-#
-log_must zinject -d $REMOVEDISK -D 10:1 $TESTPOOL
-
-#
-# Because of the above error export should fail.
+# Attempt the export with errors injected.
 #
 log_must attempt_during_removal $TESTPOOL $REMOVEDISK callback
 

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_errors.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_errors.ksh
index 2ef5670..9d5143e 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_errors.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_errors.ksh

@@ -64,9 +64,7 @@
 {
 	typeset pool=$1
 
-	while is_pool_removing $pool; do
-		sleep 1
-	done
+	log_must zpool wait -t remove $pool
 
 	#
 	# The pool state changes before the TXG finishes syncing; wait for
@@ -85,8 +83,11 @@
 
 echo $FILE_CONTENTS  >$TESTDIR/$TESTFILE0
 log_must [ "x$(<$TESTDIR/$TESTFILE0)" = "x$FILE_CONTENTS" ]
-log_must file_write -o create -f $TESTDIR/$TESTFILE1 -b $((2**20)) -c $((2**7))
-sync_pool $TESTPOOL
+log_must file_write -o create -f $TESTDIR/$TESTFILE1 -b $((2**20)) -c $((2**8))
+
+# Flush the ARC to minimize cache effects.
+log_must zpool export $TESTPOOL
+log_must zpool import -d $TMPDIR $TESTPOOL
 
 # Verify that unexpected read errors automatically cancel the removal.
 log_must zinject -d $DISK0 -e io -T all -f 100 $TESTPOOL
@@ -95,6 +96,10 @@
 log_must vdevs_in_pool $TESTPOOL mirror-0
 log_must zinject -c all
 
+# Flush the ARC to minimize cache effects.
+log_must zpool export $TESTPOOL
+log_must zpool import -d $TMPDIR $TESTPOOL
+
 # Verify that unexpected write errors automatically cancel the removal.
 log_must zinject -d $DISK3 -e io -T all -f 100 $TESTPOOL
 log_must zpool remove $TESTPOOL mirror-0

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh
index 0ec358a..f76f76d 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh

@@ -26,7 +26,7 @@
 
 function callback
 {
-	is_linux && test_removal_with_operation_kill
+	test_removal_with_operation_kill
 	log_must zpool export $TESTPOOL
 
 	#

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_ganging.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_ganging.ksh
index 35c90e6..e3e6359 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_ganging.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_ganging.ksh

@@ -23,12 +23,12 @@
 
 function cleanup
 {
-	log_must set_tunable64 metaslab_force_ganging $((2**17 + 1))
+	log_must set_tunable64 METASLAB_FORCE_GANGING $((2**17 + 1))
 	default_cleanup_noexit
 }
 
 default_setup_noexit "$DISKS"
-log_must set_tunable64 metaslab_force_ganging $((2**14))
+log_must set_tunable64 METASLAB_FORCE_GANGING $((2**14))
 log_onexit cleanup
 
 FILE_CONTENTS="Leeloo Dallas mul-ti-pass."

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_remap.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_remap.ksh
deleted file mode 100755
index 6f56740..0000000
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_remap.ksh
+++ /dev/null

@@ -1,32 +0,0 @@
-#! /bin/ksh -p
-#
-# CDDL HEADER START
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright (c) 2015, 2017 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/removal/removal.kshlib
-
-# N.B. The 'zfs remap' command has been disabled and may be removed.
-export ZFS_REMAP_ENABLED=YES
-
-default_setup_noexit "$DISKS"
-log_onexit default_cleanup_noexit
-
-test_removal_with_operation zfs remap $TESTPOOL/$TESTFS
-
-log_pass "Can remap a filesystem during removal"

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_send.ksh b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_send.ksh
index 59e66ac..a082478 100755
--- a/zfs/tests/zfs-tests/tests/functional/removal/removal_with_send.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/removal/removal_with_send.ksh

@@ -28,7 +28,7 @@
 {
 	create_snapshot $TESTPOOL/$TESTFS $TESTSNAP
 	log_must ksh -c \
-	    "zfs send $TESTPOOL/$TESTFS@$TESTSNAP >/dev/null"
+	    "zfs send $TESTPOOL/$TESTFS@$TESTSNAP >$TEST_BASE_DIR/devnull"
 	return 0
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/removal/remove_attach_mirror.ksh b/zfs/tests/zfs-tests/tests/functional/removal/remove_attach_mirror.ksh
new file mode 100755
index 0000000..9bbb07c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/removal/remove_attach_mirror.ksh

@@ -0,0 +1,73 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/removal/removal.kshlib
+
+#
+# DESCRIPTION:
+#	Resilvering results in no CKSUM errors in pools with indirect vdevs.
+#
+# STRATEGY:
+#	1. Create a pool with two top-vdevs
+#	2. Write some files
+#	3. Remove one of the top-vdevs
+#	4. Reattach it to make a mirror
+#
+
+TMPDIR=${TMPDIR:-$TEST_BASE_DIR}
+
+DISK1="$TMPDIR/dsk1"
+DISK2="$TMPDIR/dsk2"
+DISKS="$DISK1 $DISK2"
+
+# fio options
+export DIRECTORY=/$TESTPOOL
+export NUMJOBS=16
+export RUNTIME=10
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export BLOCKSIZE=4K
+export SYNC_TYPE=0
+export DIRECT=1
+export FILE_SIZE=128M
+
+log_must mkfile 4g $DISK1
+log_must mkfile 4g $DISK2
+
+function cleanup
+{
+	default_cleanup_noexit
+	log_must rm -f $DISKS
+}
+
+log_must zpool create -O recordsize=4k $TESTPOOL $DISK1 $DISK2
+log_onexit cleanup
+
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+log_must fio $FIO_SCRIPTS/sequential_reads.fio
+
+log_must zpool remove -w $TESTPOOL $DISK2
+log_must zpool attach -w $TESTPOOL $DISK1 $DISK2
+
+verify_pool $TESTPOOL
+
+log_pass "Resilvering results in no CKSUM errors with indirect vdevs"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/Makefile.am b/zfs/tests/zfs-tests/tests/functional/replacement/Makefile.am
index d47fcd5..fe6e491 100644
--- a/zfs/tests/zfs-tests/tests/functional/replacement/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/Makefile.am

@@ -2,9 +2,20 @@
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
-	replacement_001_pos.ksh \
-	replacement_002_pos.ksh \
-	replacement_003_pos.ksh
+	attach_import.ksh \
+	attach_multiple.ksh \
+	attach_rebuild.ksh \
+	attach_resilver.ksh \
+	detach.ksh \
+	rebuild_disabled_feature.ksh \
+	rebuild_multiple.ksh \
+	rebuild_raidz.ksh \
+	replace_import.ksh \
+	replace_rebuild.ksh \
+	replace_resilver.ksh \
+	resilver_restart_001.ksh \
+	resilver_restart_002.ksh \
+	scrub_cancel.ksh
 
 dist_pkgdata_DATA = \
 	replacement.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/attach_import.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/attach_import.ksh
new file mode 100755
index 0000000..e2749b1
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/attach_import.ksh

@@ -0,0 +1,67 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# Description:
+# Verify that on import an in progress attach operation is resumed.
+#
+# Strategy:
+# 1. For both healing and sequential resilvering.
+#    a. Create a pool
+#    b. Add a vdev with 'zpool attach' and resilver (-s) it.
+#    c. Export the pool
+#    d. Import the pool
+#    e. Verify the 'zpool attach' resumed resilvering
+#    f. Destroy the pool
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]}
+}
+
+log_assert "Verify attach is resumed on import"
+
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]}
+
+# Verify healing and sequential resilver resume on import.
+for arg in "" "-s"; do
+	log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[0]}
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+	log_must zpool attach $arg $TESTPOOL1 ${VDEV_FILES[0]} ${VDEV_FILES[1]}
+	log_must is_pool_resilvering $TESTPOOL1
+	log_must zpool export $TESTPOOL1
+	log_must zpool import -d $TEST_BASE_DIR $TESTPOOL1
+	log_must is_pool_resilvering $TESTPOOL1
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
+	log_must zpool wait -t resilver $TESTPOOL1
+	log_must is_pool_resilvered $TESTPOOL1
+	destroy_pool $TESTPOOL1
+done
+
+log_pass "Verify attach is resumed on import"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/attach_multiple.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/attach_multiple.ksh
new file mode 100755
index 0000000..5c38353
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/attach_multiple.ksh

@@ -0,0 +1,111 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# Description:
+# Verify that attach/detach work while resilvering and attaching
+# multiple vdevs.
+#
+# Strategy:
+# 1. Create a single vdev pool
+# 2. While healing or sequential resilvering:
+#    a. Attach a vdev to convert the pool to a mirror.
+#    b. Attach a vdev to convert the pool to a 3-way mirror.
+#    c. Verify the original vdev cannot be removed (no redundant copies)
+#    d. Detach a vdev.  Healing and sequential resilver remain running.
+#    e. Detach a vdev.  Healing resilver remains running, sequential
+#       resilver is canceled.
+#    f. Wait for resilver to complete.
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]}
+}
+
+log_assert "Verify attach/detach with multiple vdevs"
+
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]}
+
+# Verify resilver resumes on import.
+log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[0]}
+
+for replace_mode in "healing" "sequential"; do
+        #
+        # Resilvers abort the dsl_scan and reconfigure it for resilvering.
+        # Rebuilds cancel the dsl_scan and start the vdev_rebuild thread.
+        #
+        if [[ "$replace_mode" = "healing" ]]; then
+                flags=""
+        else
+                flags="-s"
+        fi
+
+	log_mustnot is_pool_resilvering $TESTPOOL1
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+	# Attach first vdev (stripe -> mirror)
+	log_must zpool attach $flags $TESTPOOL1 \
+	    ${VDEV_FILES[0]} ${VDEV_FILES[1]}
+	log_must is_pool_resilvering $TESTPOOL1
+
+	# Attach second vdev (2-way -> 3-way mirror)
+	log_must zpool attach $flags $TESTPOOL1 \
+	    ${VDEV_FILES[1]} ${VDEV_FILES[2]}
+	log_must is_pool_resilvering $TESTPOOL1
+
+	# Original vdev cannot be detached until there is sufficient redundancy.
+	log_mustnot zpool detach $TESTPOOL1 ${VDEV_FILES[0]}
+
+	# Detach first vdev (resilver keeps running)
+	log_must zpool detach $TESTPOOL1 ${VDEV_FILES[1]}
+	log_must is_pool_resilvering $TESTPOOL1
+
+	#
+	# Detach second vdev.  There's a difference in behavior between
+	# healing and sequential resilvers.  A healing resilver will not be
+	# cancelled even though there's nothing on the original vdev which
+	# needs to be rebuilt.  A sequential resilver on the otherhand is
+	# canceled when returning to a non-redundant striped layout.  At
+	# some point the healing resilver behavior should be updated to match
+	# the sequential resilver behavior.
+	#
+	log_must zpool detach $TESTPOOL1 ${VDEV_FILES[2]}
+
+        if [[ "$replace_mode" = "healing" ]]; then
+		log_must is_pool_resilvering $TESTPOOL1
+        else
+		log_mustnot is_pool_resilvering $TESTPOOL1
+        fi
+
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	log_must zpool wait $TESTPOOL1
+done
+
+log_pass "Verify attach/detach with multiple vdevs"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/attach_rebuild.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/attach_rebuild.ksh
new file mode 100755
index 0000000..998d3ee
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/attach_rebuild.ksh

@@ -0,0 +1,173 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# 	Attaching disks during I/O should pass for supported pools.
+#
+# STRATEGY:
+#	1. Create multidisk pools (stripe/mirror/raidz/draid) and
+#	   start some random I/O
+#	2. Attach a disk to the pool.
+#	3. Verify the integrity of the file system and the resilvering.
+#
+# NOTE: Raidz does not support the sequential resilver (-s) option.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	if [[ -n "$child_pids" ]]; then
+		for wait_pid in $child_pids; do
+			kill $wait_pid
+		done
+	fi
+
+	if poolexists $TESTPOOL1; then
+		destroy_pool $TESTPOOL1
+	fi
+
+	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk during I/O completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+options="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function attach_test
+{
+	typeset -i iters=2
+	typeset -i index=0
+	typeset opt=$1
+	typeset disk1=$2
+	typeset disk2=$3
+
+	typeset i=0
+	while [[ $i -lt $iters ]]; do
+		log_note "Invoking file_trunc with: $options_display"
+		file_trunc $options $TESTDIR/$TESTFILE.$i &
+		typeset pid=$!
+
+		sleep 1
+
+		child_pids="$child_pids $pid"
+		((i = i + 1))
+	done
+
+	log_must zpool attach -sw $opt $TESTPOOL1 $disk1 $disk2
+
+	for wait_pid in $child_pids; do
+		kill $wait_pid
+	done
+	child_pids=""
+
+	log_must zpool export $TESTPOOL1
+	log_must zpool import -d $TESTDIR $TESTPOOL1
+	log_must zfs umount $TESTPOOL1/$TESTFS1
+	log_must zdb -cdui $TESTPOOL1/$TESTFS1
+	log_must zfs mount $TESTPOOL1/$TESTFS1
+	verify_pool $TESTPOOL1
+}
+
+specials_list=""
+i=0
+while [[ $i != 3 ]]; do
+	truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
+	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+	((i = i + 1))
+done
+
+#
+# Create a replacement disk special file.
+#
+truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
+
+for op in "" "-f"; do
+	create_pool $TESTPOOL1 mirror $specials_list
+	log_must zfs create $TESTPOOL1/$TESTFS1
+	log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+	attach_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
+
+	zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
+	if [[ $? -ne 0 ]]; then
+		log_fail "$REPLACEFILE is not present."
+	fi
+
+	destroy_pool $TESTPOOL1
+done
+
+log_note "Verify 'zpool attach' fails with non-mirrors."
+
+for type in "" "raidz" "raidz1" "draid" "draid1"; do
+	for op in "" "-f"; do
+		create_pool $TESTPOOL1 $type $specials_list
+		log_must zfs create $TESTPOOL1/$TESTFS1
+		log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+		log_mustnot zpool attach -s "$opt" $TESTDIR/$TESTFILE1.1 \
+		    $TESTDIR/$REPLACEFILE
+
+		zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
+		if [[ $? -eq 0 ]]; then
+			log_fail "$REPLACEFILE should not be present."
+		fi
+
+		destroy_pool $TESTPOOL1
+	done
+done
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/attach_resilver.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/attach_resilver.ksh
new file mode 100755
index 0000000..e99d681
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/attach_resilver.ksh

@@ -0,0 +1,172 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# 	Attaching disks during I/O should pass for supported pools.
+#
+# STRATEGY:
+#	1. Create multidisk pools (stripe/mirror/raidz/draid) and
+#	   start some random I/O
+#	2. Attach a disk to the pool.
+#	3. Verify the integrity of the file system and the resilvering.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	if [[ -n "$child_pids" ]]; then
+		for wait_pid in $child_pids
+		do
+		        kill $wait_pid
+		done
+	fi
+
+	if poolexists $TESTPOOL1; then
+		destroy_pool $TESTPOOL1
+	fi
+
+	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk during I/O completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+options="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function attach_test
+{
+	typeset -i iters=2
+	typeset -i index=0
+	typeset opt=$1
+	typeset disk1=$2
+	typeset disk2=$3
+
+	typeset i=0
+	while [[ $i -lt $iters ]]; do
+		log_note "Invoking file_trunc with: $options_display"
+		file_trunc $options $TESTDIR/$TESTFILE.$i &
+		typeset pid=$!
+
+		sleep 1
+
+		child_pids="$child_pids $pid"
+		((i = i + 1))
+	done
+
+	log_must zpool attach -w $opt $TESTPOOL1 $disk1 $disk2
+
+	for wait_pid in $child_pids
+	do
+		kill $wait_pid
+	done
+	child_pids=""
+
+        log_must zpool export $TESTPOOL1
+        log_must zpool import -d $TESTDIR $TESTPOOL1
+        log_must zfs umount $TESTPOOL1/$TESTFS1
+        log_must zdb -cdui $TESTPOOL1/$TESTFS1
+        log_must zfs mount $TESTPOOL1/$TESTFS1
+	verify_pool $TESTPOOL1
+}
+
+specials_list=""
+i=0
+while [[ $i != 3 ]]; do
+	truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
+	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+	((i = i + 1))
+done
+
+#
+# Create a replacement disk special file.
+#
+truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
+
+for op in "" "-f"; do
+	create_pool $TESTPOOL1 mirror $specials_list
+	log_must zfs create $TESTPOOL1/$TESTFS1
+	log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+	attach_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
+
+	zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
+	if [[ $? -ne 0 ]]; then
+		log_fail "$REPLACEFILE is not present."
+	fi
+
+	destroy_pool $TESTPOOL1
+done
+
+log_note "Verify 'zpool attach' fails with non-mirrors."
+
+for type in "" "raidz" "raidz1" "draid"; do
+	for op in "" "-f"; do
+		create_pool $TESTPOOL1 $type $specials_list
+		log_must zfs create $TESTPOOL1/$TESTFS1
+		log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+		log_mustnot zpool attach "$opt" $TESTDIR/$TESTFILE1.1 \
+		    $TESTDIR/$REPLACEFILE
+
+		zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
+		if [[ $? -eq 0 ]]; then
+		        log_fail "$REPLACEFILE should not be present."
+		fi
+
+		destroy_pool $TESTPOOL1
+	done
+done
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/detach.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/detach.ksh
new file mode 100755
index 0000000..f049c63
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/detach.ksh

@@ -0,0 +1,161 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# 	Detaching disks during I/O should pass for supported pools.
+#
+# STRATEGY:
+#	1. Create multidisk pools (stripe/mirror/raidz/draid) and
+#	   start some random I/O
+#	2. Detach a disk from the pool.
+#	3. Verify the integrity of the file system and the resilvering.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	if [[ -n "$child_pids" ]]; then
+		for wait_pid in $child_pids
+		do
+		        kill $wait_pid
+		done
+	fi
+
+	if poolexists $TESTPOOL1; then
+		destroy_pool $TESTPOOL1
+	fi
+
+	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk during I/O completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+ptions="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function detach_test
+{
+	typeset -i iters=2
+	typeset -i index=0
+	typeset disk1=$1
+
+	typeset i=0
+	while [[ $i -lt $iters ]]; do
+		log_note "Invoking file_trunc with: $options_display"
+		file_trunc $options $TESTDIR/$TESTFILE.$i &
+		typeset pid=$!
+
+		sleep 1
+
+		child_pids="$child_pids $pid"
+		((i = i + 1))
+	done
+
+	log_must zpool detach $TESTPOOL1 $disk1
+
+	sleep 10
+
+	for wait_pid in $child_pids
+	do
+		kill $wait_pid
+	done
+	child_pids=""
+
+        log_must zpool export $TESTPOOL1
+        log_must zpool import -d $TESTDIR $TESTPOOL1
+        log_must zfs umount $TESTPOOL1/$TESTFS1
+        log_must zdb -cdui $TESTPOOL1/$TESTFS1
+        log_must zfs mount $TESTPOOL1/$TESTFS1
+}
+
+specials_list=""
+i=0
+while [[ $i != 3 ]]; do
+	truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
+	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+	((i = i + 1))
+done
+
+create_pool $TESTPOOL1 mirror $specials_list
+log_must zfs create $TESTPOOL1/$TESTFS1
+log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+detach_test $TESTDIR/$TESTFILE1.1
+
+zpool iostat -v $TESTPOOL1 | grep "$TESTFILE1.1"
+if [[ $? -eq 0 ]]; then
+	log_fail "$TESTFILE1.1 should no longer be present."
+fi
+
+destroy_pool $TESTPOOL1
+
+log_note "Verify 'zpool detach' fails with non-mirrors."
+
+for type in "" "raidz" "raidz1" "draid"; do
+	create_pool $TESTPOOL1 $type $specials_list
+	log_must zfs create $TESTPOOL1/$TESTFS1
+	log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+	log_mustnot zpool detach $TESTDIR/$TESTFILE1.1
+
+	zpool iostat -v $TESTPOOL1 | grep "$TESTFILE1.1"
+	if [[ $? -ne 0 ]]; then
+	        log_fail "$TESTFILE1.1 is not present."
+	fi
+
+	destroy_pool $TESTPOOL1
+done
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_disabled_feature.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_disabled_feature.ksh
new file mode 100755
index 0000000..334f430
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_disabled_feature.ksh

@@ -0,0 +1,78 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# Description:
+# Verify device_rebuild feature flags.
+#
+# Strategy:
+# 1. Create a pool with all features disabled.
+# 2. Verify 'zpool replace -s' fails and the feature is disabled.
+# 3. Enable the device_rebuild feature.
+# 4. Verify 'zpool replace -s' works and the feature is active.
+# 5. Wait for the feature to return to enabled.
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+}
+
+function check_feature_flag
+{
+	feature=$1
+	pool=$2
+	expected_value=$3
+
+	value="$(zpool get -H -o property,value all $pool | \
+	    grep -E "$feature" | awk '{print $2}')"
+	if [ "$value" = "$expected_value" ]; then
+		log_note "$feature verified to be $value"
+	else
+		log_fail "$feature should be $expected_value but is $value"
+	fi
+}
+
+log_assert "Verify device_rebuild feature flags."
+
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+log_must zpool create -d $TESTPOOL1 ${VDEV_FILES[@]}
+
+log_mustnot zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
+check_feature_flag "feature@device_rebuild" "$TESTPOOL1" "disabled"
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+log_must zpool set feature@device_rebuild=enabled $TESTPOOL1
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
+check_feature_flag "feature@device_rebuild" "$TESTPOOL1" "active"
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
+log_must zpool wait -t resilver $TESTPOOL1
+check_feature_flag "feature@device_rebuild" "$TESTPOOL1" "enabled"
+
+log_pass "Verify device_rebuild feature flags."

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_multiple.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_multiple.ksh
new file mode 100755
index 0000000..7775cbf
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_multiple.ksh

@@ -0,0 +1,126 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Sequential reconstruction (unlike healing reconstruction) operate on the
+# top-level vdev.  This means that a sequential resilver operation can be
+# started/stopped on a different top-level vdev without impacting other
+# sequential resilvers.
+#
+# STRATEGY:
+# 1. Create a mirrored pool.
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE $SPARE_VDEV_FILE2
+}
+
+function check_history
+{
+	pool=$1
+	msg=$2
+	exp=$3
+
+	count=$(zpool history -i $pool | grep "rebuild" | grep -c "$msg")
+	if [[ "$count" -ne "$exp" ]]; then
+		log_fail "Expected $exp rebuild '$msg' messages, found $count"
+	else
+		log_note "Found $count/$exp rebuild '$msg' messages"
+	fi
+}
+
+log_assert "Rebuilds operate on the top-level vdevs"
+
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} \
+    $SPARE_VDEV_FILE $SPARE_VDEV_FILE2
+
+# Verify two sequential resilvers can run concurrently.
+log_must zpool create -f $TESTPOOL1 \
+    mirror ${VDEV_FILES[0]} ${VDEV_FILES[1]} \
+    mirror ${VDEV_FILES[2]} ${VDEV_FILES[3]}
+log_must zfs create $TESTPOOL1/$TESTFS
+
+mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS)
+log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=32
+log_must zpool sync $TESTPOOL1
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[3]} $SPARE_VDEV_FILE2
+
+check_history $TESTPOOL1 "started" 2
+check_history $TESTPOOL1 "reset" 0
+check_history $TESTPOOL1 "complete" 0
+check_history $TESTPOOL1 "canceled" 0
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
+log_must zpool wait -t resilver $TESTPOOL1
+
+check_history $TESTPOOL1 "complete" 2
+destroy_pool $TESTPOOL1
+
+# Verify canceling one resilver (zpool detach) does not impact others.
+log_must zpool create -f $TESTPOOL1 \
+    mirror ${VDEV_FILES[0]} ${VDEV_FILES[1]} \
+    mirror ${VDEV_FILES[2]} ${VDEV_FILES[3]}
+log_must zfs create $TESTPOOL1/$TESTFS
+
+mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS)
+log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=32
+log_must zpool sync $TESTPOOL1
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[3]} $SPARE_VDEV_FILE2
+
+check_history $TESTPOOL1 "started" 2
+check_history $TESTPOOL1 "reset" 0
+check_history $TESTPOOL1 "complete" 0
+check_history $TESTPOOL1 "canceled" 0
+
+log_must zpool detach $TESTPOOL1 $SPARE_VDEV_FILE2
+
+check_history $TESTPOOL1 "complete" 0
+check_history $TESTPOOL1 "canceled" 1
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
+log_must zpool wait -t resilver $TESTPOOL1
+
+check_history $TESTPOOL1 "complete" 1
+check_history $TESTPOOL1 "canceled" 1
+destroy_pool $TESTPOOL1
+
+log_pass "Rebuilds operate on the top-level vdevs"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_raidz.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_raidz.ksh
new file mode 100755
index 0000000..26dc6f8
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/rebuild_raidz.ksh

@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Executing 'zpool replace -s' for raidz vdevs failed.  Sequential
+# resilvers are only allowed for stripe/mirror/dRAID pools.
+#
+# STRATEGY:
+# 1. Create a raidz pool, verify 'zpool replace -s' fails
+# 2. Create a stripe/mirror pool, verify 'zpool replace -s' passes
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+}
+
+log_assert "Sequential resilver is not allowed for raidz vdevs"
+
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+
+# raidz[1-3]
+for vdev_type in "raidz" "raidz2" "raidz3"; do
+	log_must zpool create -f $TESTPOOL1 $vdev_type ${VDEV_FILES[@]}
+	log_mustnot zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} \
+	    $SPARE_VDEV_FILE
+	destroy_pool $TESTPOOL1
+done
+
+# stripe
+log_must zpool create $TESTPOOL1 ${VDEV_FILES[@]}
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
+destroy_pool $TESTPOOL1
+
+# mirror
+log_must zpool create $TESTPOOL1 mirror ${VDEV_FILES[0]} ${VDEV_FILES[1]}
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]}  $SPARE_VDEV_FILE
+destroy_pool $TESTPOOL1
+
+# draid
+log_must zpool create $TESTPOOL1 draid ${VDEV_FILES[@]}
+log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[1]} $SPARE_VDEV_FILE
+destroy_pool $TESTPOOL1
+
+log_pass "Sequential resilver is not allowed for raidz vdevs"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replace_import.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/replace_import.ksh
new file mode 100755
index 0000000..37d3c66
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/replace_import.ksh

@@ -0,0 +1,67 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# Description:
+# Verify that on import an in progress replace operation is resumed.
+#
+# Strategy:
+# 1. For both healing and sequential resilvering replace:
+#    a. Create a pool
+#    b. Replace a vdev with 'zpool replace' to resilver (-s) it.
+#    c. Export the pool
+#    d. Import the pool
+#    e. Verify the 'zpool replace' resumed resilvering.
+#    f. Destroy the pool
+#
+
+function cleanup
+{
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+}
+
+log_assert "Verify replace is resumed on import"
+
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+
+# Verify healing and sequential resilver resume on import.
+for arg in "" "-s"; do
+	log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[@]}
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+	log_must zpool replace -s $TESTPOOL1 ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+	log_must is_pool_resilvering $TESTPOOL1
+	log_must zpool export $TESTPOOL1
+	log_must zpool import -d $TEST_BASE_DIR $TESTPOOL1
+	log_must is_pool_resilvering $TESTPOOL1
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS $ORIG_SCAN_SUSPEND_PROGRESS
+	log_must zpool wait -t resilver $TESTPOOL1
+	log_must is_pool_resilvered $TESTPOOL1
+	destroy_pool $TESTPOOL1
+done
+
+log_pass "Verify replace is resumed on import"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replace_rebuild.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/replace_rebuild.ksh
new file mode 100755
index 0000000..b3c7995
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/replace_rebuild.ksh

@@ -0,0 +1,158 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# 	Replacing disks during I/O should pass for supported pools.
+#
+# STRATEGY:
+#	1. Create multidisk pools (stripe/mirror/draid) and
+#	   start some random I/O
+#	2. Replace a disk in the pool with another disk.
+#	3. Verify the integrity of the file system and the rebuilding.
+#
+# NOTE: Raidz does not support the sequential resilver (-s) option.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	if [[ -n "$child_pids" ]]; then
+		for wait_pid in $child_pids
+		do
+			kill $wait_pid
+		done
+	fi
+
+	if poolexists $TESTPOOL1; then
+		destroy_pool $TESTPOOL1
+	fi
+
+	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk with -r during I/O completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+options="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function replace_test
+{
+	typeset -i iters=2
+	typeset -i index=0
+	typeset opt=$1
+	typeset disk1=$2
+	typeset disk2=$3
+
+	typeset i=0
+	while [[ $i -lt $iters ]]; do
+		log_note "Invoking file_trunc with: $options_display"
+		file_trunc $options $TESTDIR/$TESTFILE.$i &
+		typeset pid=$!
+
+		sleep 1
+
+		child_pids="$child_pids $pid"
+		((i = i + 1))
+	done
+
+	log_must zpool replace -sw $opt $TESTPOOL1 $disk1 $disk2
+
+	for wait_pid in $child_pids
+	do
+		kill $wait_pid
+	done
+	child_pids=""
+
+	log_must zpool export $TESTPOOL1
+	log_must zpool import -d $TESTDIR $TESTPOOL1
+	log_must zfs umount $TESTPOOL1/$TESTFS1
+	log_must zdb -cdui $TESTPOOL1/$TESTFS1
+	log_must zfs mount $TESTPOOL1/$TESTFS1
+	verify_pool $TESTPOOL1
+}
+
+specials_list=""
+i=0
+while [[ $i != 3 ]]; do
+	log_must truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
+	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+	((i = i + 1))
+done
+
+#
+# Create a replacement disk special file.
+#
+log_must truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
+
+for type in "" "mirror" "draid"; do
+	for op in "" "-f"; do
+		create_pool $TESTPOOL1 $type $specials_list
+		log_must zfs create $TESTPOOL1/$TESTFS1
+		log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+		replace_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
+
+		zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
+		if [[ $? -ne 0 ]]; then
+			log_fail "$REPLACEFILE is not present."
+		fi
+
+		destroy_pool $TESTPOOL1
+		log_must rm -rf /$TESTPOOL1
+	done
+done
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replace_resilver.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/replace_resilver.ksh
new file mode 100755
index 0000000..2585397
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/replace_resilver.ksh

@@ -0,0 +1,155 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# 	Replacing disks during I/O should pass for supported pools.
+#
+# STRATEGY:
+#	1. Create multidisk pools (stripe/mirror/raidz/draid) and
+#	   start some random I/O
+#	2. Replace a disk in the pool with another disk.
+#	3. Verify the integrity of the file system and the resilvering.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	if [[ -n "$child_pids" ]]; then
+		for wait_pid in $child_pids
+		do
+			kill $wait_pid
+		done
+	fi
+
+	if poolexists $TESTPOOL1; then
+		destroy_pool $TESTPOOL1
+	fi
+
+	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
+}
+
+log_assert "Replacing a disk during I/O completes."
+
+options=""
+options_display="default options"
+
+log_onexit cleanup
+
+[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
+
+[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
+
+[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
+
+[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
+
+[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
+
+options="$options -r "
+
+[[ -n "$options" ]] && options_display=$options
+
+child_pids=""
+
+function replace_test
+{
+	typeset -i iters=2
+	typeset -i index=0
+	typeset opt=$1
+	typeset disk1=$2
+	typeset disk2=$3
+
+	typeset i=0
+	while [[ $i -lt $iters ]]; do
+		log_note "Invoking file_trunc with: $options_display"
+		file_trunc $options $TESTDIR/$TESTFILE.$i &
+		typeset pid=$!
+
+		sleep 1
+
+		child_pids="$child_pids $pid"
+		((i = i + 1))
+	done
+
+	log_must zpool replace -w $opt $TESTPOOL1 $disk1 $disk2
+
+	for wait_pid in $child_pids
+	do
+		kill $wait_pid
+	done
+	child_pids=""
+
+	log_must zpool export $TESTPOOL1
+	log_must zpool import -d $TESTDIR $TESTPOOL1
+	log_must zfs umount $TESTPOOL1/$TESTFS1
+	log_must zdb -cdui $TESTPOOL1/$TESTFS1
+	log_must zfs mount $TESTPOOL1/$TESTFS1
+	verify_pool $TESTPOOL1
+}
+
+specials_list=""
+i=0
+while [[ $i != 3 ]]; do
+	log_must truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
+	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
+
+	((i = i + 1))
+done
+
+#
+# Create a replacement disk special file.
+#
+log_must truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
+
+for type in "" "raidz" "mirror" "draid"; do
+	for op in "" "-f"; do
+		create_pool $TESTPOOL1 $type $specials_list
+		log_must zfs create $TESTPOOL1/$TESTFS1
+		log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
+
+		replace_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
+
+		zpool iostat -v $TESTPOOL1 | grep "$REPLACEFILE"
+		if [[ $? -ne 0 ]]; then
+			log_fail "$REPLACEFILE is not present."
+		fi
+
+		destroy_pool $TESTPOOL1
+		log_must rm -rf /$TESTPOOL1
+	done
+done
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replacement.cfg b/zfs/tests/zfs-tests/tests/functional/replacement/replacement.cfg
index b2ba1b8..271317b 100644
--- a/zfs/tests/zfs-tests/tests/functional/replacement/replacement.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/replacement.cfg

@@ -36,3 +36,8 @@
 export HOLES_FILEOFFSET=${HOLES_FILEOFFSET-""}
 export HOLES_COUNT=${HOLES_COUNT-"16384"}	   # FILESIZE/BLKSIZE/8
 export REPLACEFILE="sparedisk"
+
+set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4}
+export VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 ))
+export SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1
+export SPARE_VDEV_FILE2=$TEST_BASE_DIR/spare-2

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replacement_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/replacement_001_pos.ksh
deleted file mode 100755
index 8f40436..0000000
--- a/zfs/tests/zfs-tests/tests/functional/replacement/replacement_001_pos.ksh
+++ /dev/null

@@ -1,156 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/replacement/replacement.cfg
-
-#
-# DESCRIPTION:
-# 	Replacing disks during I/O should pass for supported pools.
-#
-# STRATEGY:
-#	1. Create multidisk pools (stripe/mirror/raidz) and
-#	   start some random I/O
-#	2. Replace a disk in the pool with another disk.
-#	3. Verify the integrity of the file system and the resilvering.
-#
-
-verify_runnable "global"
-
-function cleanup
-{
-	if [[ -n "$child_pids" ]]; then
-		for wait_pid in $child_pids
-		do
-			kill $wait_pid
-		done
-	fi
-
-	if poolexists $TESTPOOL1; then
-		destroy_pool $TESTPOOL1
-	fi
-
-	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
-}
-
-log_assert "Replacing a disk during I/O completes."
-
-options=""
-options_display="default options"
-
-log_onexit cleanup
-
-[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
-
-[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
-
-[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
-
-[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
-
-[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
-
-options="$options -r "
-
-[[ -n "$options" ]] && options_display=$options
-
-child_pids=""
-
-function replace_test
-{
-	typeset -i iters=2
-	typeset -i index=0
-	typeset opt=$1
-	typeset disk1=$2
-	typeset disk2=$3
-
-	typeset i=0
-	while [[ $i -lt $iters ]]; do
-		log_note "Invoking file_trunc with: $options_display"
-		file_trunc $options $TESTDIR/$TESTFILE.$i &
-		typeset pid=$!
-
-		sleep 1
-
-		child_pids="$child_pids $pid"
-		((i = i + 1))
-	done
-
-	log_must zpool replace $opt $TESTPOOL1 $disk1 $disk2
-
-	sleep 10
-
-	for wait_pid in $child_pids
-	do
-		kill $wait_pid
-	done
-	child_pids=""
-
-	log_must zpool export $TESTPOOL1
-	log_must zpool import -d $TESTDIR $TESTPOOL1
-	log_must zfs umount $TESTPOOL1/$TESTFS1
-	log_must zdb -cdui $TESTPOOL1/$TESTFS1
-	log_must zfs mount $TESTPOOL1/$TESTFS1
-}
-
-specials_list=""
-i=0
-while [[ $i != 2 ]]; do
-	log_must truncate -s $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
-	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
-
-	((i = i + 1))
-done
-
-#
-# Create a replacement disk special file.
-#
-log_must truncate -s $MINVDEVSIZE $TESTDIR/$REPLACEFILE
-
-for type in "" "raidz" "mirror"; do
-	for op in "" "-f"; do
-		create_pool $TESTPOOL1 $type $specials_list
-		log_must zfs create $TESTPOOL1/$TESTFS1
-		log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
-
-		replace_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
-
-		zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$REPLACEFILE"
-		if [[ $? -ne 0 ]]; then
-			log_fail "$REPLACEFILE is not present."
-		fi
-
-		destroy_pool $TESTPOOL1
-		log_must rm -rf /$TESTPOOL1
-	done
-done
-
-log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replacement_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/replacement_002_pos.ksh
deleted file mode 100755
index 391aa5c..0000000
--- a/zfs/tests/zfs-tests/tests/functional/replacement/replacement_002_pos.ksh
+++ /dev/null

@@ -1,174 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/replacement/replacement.cfg
-
-#
-# DESCRIPTION:
-# 	Attaching disks during I/O should pass for supported pools.
-#
-# STRATEGY:
-#	1. Create multidisk pools (stripe/mirror/raidz) and
-#	   start some random I/O
-#	2. Attach a disk to the pool.
-#	3. Verify the integrity of the file system and the resilvering.
-#
-
-verify_runnable "global"
-
-function cleanup
-{
-	if [[ -n "$child_pids" ]]; then
-		for wait_pid in $child_pids
-		do
-		        kill $wait_pid
-		done
-	fi
-
-	if poolexists $TESTPOOL1; then
-		destroy_pool $TESTPOOL1
-	fi
-
-	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
-}
-
-log_assert "Replacing a disk during I/O completes."
-
-options=""
-options_display="default options"
-
-log_onexit cleanup
-
-[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
-
-[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
-
-[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
-
-[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
-
-[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
-
-options="$options -r "
-
-[[ -n "$options" ]] && options_display=$options
-
-child_pids=""
-
-function attach_test
-{
-	typeset -i iters=2
-	typeset -i index=0
-	typeset opt=$1
-	typeset disk1=$2
-	typeset disk2=$3
-
-	typeset i=0
-	while [[ $i -lt $iters ]]; do
-		log_note "Invoking file_trunc with: $options_display"
-		file_trunc $options $TESTDIR/$TESTFILE.$i &
-		typeset pid=$!
-
-		sleep 1
-
-		child_pids="$child_pids $pid"
-		((i = i + 1))
-	done
-
-	log_must zpool attach $opt $TESTPOOL1 $disk1 $disk2
-
-	sleep 10
-
-	for wait_pid in $child_pids
-	do
-		kill $wait_pid
-	done
-	child_pids=""
-
-        log_must zpool export $TESTPOOL1
-        log_must zpool import -d $TESTDIR $TESTPOOL1
-        log_must zfs umount $TESTPOOL1/$TESTFS1
-        log_must zdb -cdui $TESTPOOL1/$TESTFS1
-        log_must zfs mount $TESTPOOL1/$TESTFS1
-
-}
-
-specials_list=""
-i=0
-while [[ $i != 2 ]]; do
-	mkfile $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
-	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
-
-	((i = i + 1))
-done
-
-#
-# Create a replacement disk special file.
-#
-mkfile $MINVDEVSIZE $TESTDIR/$REPLACEFILE
-
-for op in "" "-f"; do
-	create_pool $TESTPOOL1 mirror $specials_list
-	log_must zfs create $TESTPOOL1/$TESTFS1
-	log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
-
-	attach_test "$opt" $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE
-
-	zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$REPLACEFILE"
-	if [[ $? -ne 0 ]]; then
-		log_fail "$REPLACEFILE is not present."
-	fi
-
-	destroy_pool $TESTPOOL1
-done
-
-log_note "Verify 'zpool attach' fails with non-mirrors."
-
-for type in "" "raidz" "raidz1"; do
-	for op in "" "-f"; do
-		create_pool $TESTPOOL1 $type $specials_list
-		log_must zfs create $TESTPOOL1/$TESTFS1
-		log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
-
-		log_mustnot zpool attach "$opt" $TESTDIR/$TESTFILE1.1 \
-		    $TESTDIR/$REPLACEFILE
-
-		zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$REPLACEFILE"
-		if [[ $? -eq 0 ]]; then
-		        log_fail "$REPLACEFILE should not be present."
-		fi
-
-		destroy_pool $TESTPOOL1
-	done
-done
-
-log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/replacement_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/replacement_003_pos.ksh
deleted file mode 100755
index 71b9602..0000000
--- a/zfs/tests/zfs-tests/tests/functional/replacement/replacement_003_pos.ksh
+++ /dev/null

@@ -1,161 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/replacement/replacement.cfg
-
-#
-# DESCRIPTION:
-# 	Detaching disks during I/O should pass for supported pools.
-#
-# STRATEGY:
-#	1. Create multidisk pools (stripe/mirror/raidz) and
-#	   start some random I/O
-#	2. Detach a disk from the pool.
-#	3. Verify the integrity of the file system and the resilvering.
-#
-
-verify_runnable "global"
-
-function cleanup
-{
-	if [[ -n "$child_pids" ]]; then
-		for wait_pid in $child_pids
-		do
-		        kill $wait_pid
-		done
-	fi
-
-	if poolexists $TESTPOOL1; then
-		destroy_pool $TESTPOOL1
-	fi
-
-	[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
-}
-
-log_assert "Replacing a disk during I/O completes."
-
-options=""
-options_display="default options"
-
-log_onexit cleanup
-
-[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
-
-[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
-
-[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
-
-[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
-
-[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
-
-ptions="$options -r "
-
-[[ -n "$options" ]] && options_display=$options
-
-child_pids=""
-
-function detach_test
-{
-	typeset -i iters=2
-	typeset -i index=0
-	typeset disk1=$1
-
-	typeset i=0
-	while [[ $i -lt $iters ]]; do
-		log_note "Invoking file_trunc with: $options_display"
-		file_trunc $options $TESTDIR/$TESTFILE.$i &
-		typeset pid=$!
-
-		sleep 1
-
-		child_pids="$child_pids $pid"
-		((i = i + 1))
-	done
-
-	log_must zpool detach $TESTPOOL1 $disk1
-
-	sleep 10
-
-	for wait_pid in $child_pids
-	do
-		kill $wait_pid
-	done
-	child_pids=""
-
-        log_must zpool export $TESTPOOL1
-        log_must zpool import -d $TESTDIR $TESTPOOL1
-        log_must zfs umount $TESTPOOL1/$TESTFS1
-        log_must zdb -cdui $TESTPOOL1/$TESTFS1
-        log_must zfs mount $TESTPOOL1/$TESTFS1
-}
-
-specials_list=""
-i=0
-while [[ $i != 2 ]]; do
-	mkfile $MINVDEVSIZE $TESTDIR/$TESTFILE1.$i
-	specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
-
-	((i = i + 1))
-done
-
-create_pool $TESTPOOL1 mirror $specials_list
-log_must zfs create $TESTPOOL1/$TESTFS1
-log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
-
-detach_test $TESTDIR/$TESTFILE1.1
-
-zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$TESTFILE1.1"
-if [[ $? -eq 0 ]]; then
-	log_fail "$TESTFILE1.1 should no longer be present."
-fi
-
-destroy_pool $TESTPOOL1
-
-log_note "Verify 'zpool detach' fails with non-mirrors."
-
-for type in "" "raidz" "raidz1" ; do
-	create_pool $TESTPOOL1 $type $specials_list
-	log_must zfs create $TESTPOOL1/$TESTFS1
-	log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
-
-	log_mustnot zpool detach $TESTDIR/$TESTFILE1.1
-
-	zpool iostat -v $TESTPOOL1 | grep "$TESTDIR/$TESTFILE1.1"
-	if [[ $? -ne 0 ]]; then
-	        log_fail "$TESTFILE1.1 is not present."
-	fi
-
-	destroy_pool $TESTPOOL1
-done
-
-log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh
new file mode 100755
index 0000000..7e96ab5
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh

@@ -0,0 +1,187 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Testing resilver restart logic both with and without the deferred resilver
+# feature enabled, verifying that resilver is not restarted when it is
+# unnecessary.
+#
+# STRATEGY:
+# 1. Create a pool
+# 2. Create four filesystems with the primary cache disable to force reads
+# 3. Write four files simultaneously, one to each filesystem
+# 4. Do with and without deferred resilvers enabled
+#    a. Replace a vdev with a spare & suspend resilver immediately
+#    b. Verify resilver starts properly
+#    c. Offline / online another vdev to introduce a new DTL range
+#    d. Verify resilver restart or defer
+#    e. Inject read errors on vdev that was offlined / onlned
+#    f. Verify that resilver did not restart
+#    g. Unsuspend resilver and wait for it to finish
+#    h. Verify that there are two resilvers and nothing is deferred
+#
+
+function cleanup
+{
+	log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX
+	log_must zinject -c all
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+}
+
+# count resilver events in zpool and number of deferred rsilvers on vdevs
+function verify_restarts # <msg> <cnt> <defer>
+{
+	msg=$1
+	cnt=$2
+	defer=$3
+
+	# check the number of resilver start in events log
+	RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start)
+	log_note "expected $cnt resilver start(s)$msg, found $RESILVERS"
+	[[ "$RESILVERS" -ne "$cnt" ]] &&
+	    log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS"
+
+	[[ -z "$defer" ]] && return
+
+	# use zdb to find which vdevs have the resilver defer flag
+	VDEV_DEFERS=$(zdb -C $TESTPOOL1 | awk '
+	    /children/ { gsub(/[^0-9]/, ""); child = $0 }
+	    /com\.datto:resilver_defer$/ { print child }
+	')
+
+	if [[ "$defer" == "-" ]]
+	then
+		[[ -n $VDEV_DEFERS ]] &&
+		    log_fail "didn't expect any vdevs to have resilver deferred"
+		return
+	fi
+
+	[[ $VDEV_DEFERS -eq $defer ]] ||
+	    log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS"
+}
+
+log_assert "Check for unnecessary resilver restarts"
+
+ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
+
+set -A RESTARTS -- '1' '2' '2' '2'
+set -A VDEVS -- '' '' '' ''
+set -A DEFER_RESTARTS -- '1' '1' '1' '2'
+set -A DEFER_VDEVS -- '-' '2' '2' '-'
+
+VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
+
+log_onexit cleanup
+
+# ensure that enough events will be saved
+log_must set_tunable32 ZEVENT_LEN_MAX 512
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+
+log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL1 \
+    raidz ${VDEV_FILES[@]}
+
+# create 4 filesystems
+for fs in fs{0..3}
+do
+	log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL1/$fs
+done
+
+# simultaneously write 16M to each of them
+set -A DATAPATHS /$TESTPOOL1/fs{0..3}/dat.0
+log_note "Writing data files"
+for path in ${DATAPATHS[@]}
+do
+	dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 &
+done
+wait
+
+# test without and with deferred resilve feature enabled
+for test in "without" "with"
+do
+	log_note "Testing $test deferred resilvers"
+
+	if [[ $test == "with" ]]
+	then
+		log_must zpool set feature@resilver_defer=enabled $TESTPOOL1
+		RESTARTS=( "${DEFER_RESTARTS[@]}" )
+		VDEVS=( "${DEFER_VDEVS[@]}" )
+		VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
+	fi
+
+	# clear the events
+	log_must zpool events -c
+
+	# limit scanning time
+	log_must set_tunable32 RESILVER_MIN_TIME_MS 50
+
+	# initiate a resilver and suspend the scan as soon as possible
+	log_must zpool replace $TESTPOOL1 $VDEV_REPLACE
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+	# there should only be 1 resilver start
+	verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}"
+
+	# offline then online a vdev to introduce a new DTL range after current
+	# scan, which should restart (or defer) the resilver
+	log_must zpool offline $TESTPOOL1 ${VDEV_FILES[2]}
+	log_must zpool sync $TESTPOOL1
+	log_must zpool online $TESTPOOL1 ${VDEV_FILES[2]}
+	log_must zpool sync $TESTPOOL1
+
+	# there should now be 2 resilver starts w/o defer, 1 with defer
+	verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}"
+
+	# inject read io errors on vdev and verify resilver does not restart
+	log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL1
+	log_must cat ${DATAPATHS[1]} > /dev/null
+	log_must zinject -c all
+
+	# there should still be 2 resilver starts w/o defer, 1 with defer
+	verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}"
+
+	# unsuspend resilver
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+	log_must set_tunable32 RESILVER_MIN_TIME_MS 3000
+
+	# wait for resilver to finish
+	log_must zpool wait -t resilver $TESTPOOL1
+	log_must is_pool_resilvered $TESTPOOL1
+
+	# wait for a few txg's to see if a resilver happens
+	log_must zpool sync $TESTPOOL1
+	log_must zpool sync $TESTPOOL1
+
+	# there should now be 2 resilver starts
+	verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
+done
+
+log_pass "Resilver did not restart unnecessarily"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_002.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_002.ksh
new file mode 100755
index 0000000..48763f9
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_002.ksh

@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Testing resilver completes when scan errors are encountered, but relevant
+# DTL's have not been lost.
+#
+# STRATEGY:
+# 1. Create a pool (1k recordsize)
+# 2. Create a 32m file (32k records)
+# 3. Inject an error halfway through the file
+# 4. Start a resilver, ensure the error is triggered and that the resilver
+#    does not restart after finishing
+#
+# NB: use legacy scanning to ensure scan of specific block causes error
+#
+
+function cleanup
+{
+	log_must zinject -c all
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+	log_must set_tunable32 SCAN_LEGACY $ORIG_SCAN_LEGACY
+}
+
+log_assert "Check for resilver restarts caused by scan errors"
+
+ORIG_SCAN_LEGACY=$(get_tunable SCAN_LEGACY)
+
+log_onexit cleanup
+
+# use legacy scan to ensure injected error will be triggered
+log_must set_tunable32 SCAN_LEGACY 1
+
+ # create the pool and a 32M file (32k blocks)
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+log_must zpool create -f -O recordsize=1k $TESTPOOL1 ${VDEV_FILES[0]}
+log_must dd if=/dev/urandom of=/$TESTPOOL1/file bs=1M count=32 > /dev/null 2>&1
+
+# determine objset/object
+objset=$(zdb -d $TESTPOOL1/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p')
+object=$(ls -i /$TESTPOOL1/file | awk '{print $1}')
+
+# inject event to cause error during resilver
+log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL1
+
+# clear events and start resilver
+log_must zpool events -c
+log_must zpool attach $TESTPOOL1 ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+
+log_note "waiting for read errors to start showing up"
+for iter in {0..59}
+do
+	zpool sync $TESTPOOL1
+	err=$(zpool status $TESTPOOL1 | grep ${VDEV_FILES[0]} | awk '{print $3}')
+	(( $err > 0 )) && break
+	sleep 1
+done
+
+(( $err == 0 )) && log_fail "Unable to induce errors in resilver"
+
+log_note "waiting for resilver to finish"
+for iter in {0..59}
+do
+	finish=$(zpool events | grep "sysevent.fs.zfs.resilver_finish" | wc -l)
+	(( $finish > 0 )) && break
+	sleep 1
+done
+
+(( $finish == 0 )) && log_fail "resilver took too long to finish"
+
+# wait a few syncs to ensure that zfs does not restart the resilver
+log_must zpool sync $TESTPOOL1
+log_must zpool sync $TESTPOOL1
+
+# check if resilver was restarted
+start=$(zpool events | grep "sysevent.fs.zfs.resilver_start" | wc -l)
+(( $start != 1 )) && log_fail "resilver restarted unnecessarily"
+
+log_pass "Resilver did not restart unnecessarily from scan errors"

diff --git a/zfs/tests/zfs-tests/tests/functional/replacement/scrub_cancel.ksh b/zfs/tests/zfs-tests/tests/functional/replacement/scrub_cancel.ksh
new file mode 100755
index 0000000..da8a0a2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/replacement/scrub_cancel.ksh

@@ -0,0 +1,112 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/replacement/replacement.cfg
+
+#
+# DESCRIPTION:
+# Verify scrub behaves as intended when contending with a healing or
+# sequential resilver.
+#
+# STRATEGY:
+# 1. Create a pool
+# 2. Add a modest amount of data to the pool.
+# 3. For healing and sequential resilver:
+#    a. Start scrubbing.
+#    b. Verify a resilver can be started and it cancels the scrub.
+#    c. Verify a scrub cannot be started when resilvering
+#
+
+function cleanup
+{
+	log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
+	    $ORIG_SCAN_SUSPEND_PROGRESS
+	destroy_pool $TESTPOOL1
+	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+}
+
+log_assert "Scrub was cancelled by resilver"
+
+ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+log_onexit cleanup
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+
+log_must zpool create -f $TESTPOOL1 ${VDEV_FILES[@]}
+log_must zfs create $TESTPOOL1/$TESTFS
+
+mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS)
+log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=64
+log_must zpool sync $TESTPOOL1
+
+# Request a healing or sequential resilver
+for replace_mode in "healing" "sequential"; do
+
+	#
+	# Healing resilvers abort the dsl_scan and reconfigure it for
+	# resilvering.  Sequential resilvers cancel the dsl_scan and start
+	# the vdev_rebuild thread.
+	#
+	if [[ "$replace_mode" = "healing" ]]; then
+		history_msg="scan aborted, restarting"
+		flags=""
+	else
+		history_msg="scan cancelled"
+		flags="-s"
+	fi
+
+	# Limit scanning time and suspend the scan as soon as possible.
+	log_must set_tunable32 RESILVER_MIN_TIME_MS 50
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+
+	# Initiate a scrub.
+	log_must zpool scrub $TESTPOOL1
+
+	# Initiate a resilver to cancel the scrub.
+	log_must zpool replace $flags $TESTPOOL1 ${VDEV_FILES[1]} \
+	    $SPARE_VDEV_FILE
+
+	# Verify the scrub was canceled, it may take a few seconds to exit.
+	while is_pool_scrubbing $TESTPOOL1; do
+		sleep 1
+	done
+	log_mustnot is_pool_scrubbing $TESTPOOL1
+
+	# Verify a scrub cannot be started while resilvering.
+	log_must is_pool_resilvering $TESTPOOL1
+	log_mustnot zpool scrub $TESTPOOL1
+
+	# Unsuspend resilver.
+	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+	log_must set_tunable32 RESILVER_MIN_TIME_MS 3000
+
+	# Wait for resilver to finish then put the original back.
+	log_must zpool wait $TESTPOOL1
+	log_must zpool replace $flags -w $TESTPOOL1 $SPARE_VDEV_FILE \
+	    ${VDEV_FILES[1]}
+done
+log_pass "Scrub was cancelled by resilver"
+

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_002_pos.ksh
index 8ae3593..e0fed63 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_002_pos.ksh

@@ -54,7 +54,7 @@
 function cleanup
 {
 	for obj in $OBJ_LIST; do
-		datasetexists $obj && log_must_busy zfs destroy -f $obj
+		datasetexists $obj && destroy_dataset $obj -f
 	done
 
 	log_must zero_reservation $TESTPOOL/$TESTFS

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_003_pos.ksh
index 48adabe..ee303b5 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_003_pos.ksh

@@ -59,7 +59,7 @@
 	log_must zero_reservation $TESTPOOL/$TESTFS
 
 	for obj in $OBJ_LIST; do
-	datasetexists $obj && log_must zfs destroy -f $obj
+	datasetexists $obj && destroy_dataset $obj -f
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_004_pos.ksh
index f8342ff..eb606a7 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_004_pos.ksh

@@ -56,7 +56,7 @@
 function cleanup {
 
 	for obj in $OBJ_LIST; do
-		datasetexists $obj && log_must_busy zfs destroy -f $obj
+		datasetexists $obj && destroy_dataset $obj -f
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_005_pos.ksh
index 4047fab..535d652 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_005_pos.ksh

@@ -59,7 +59,7 @@
 function cleanup
 {
 	for obj in $OBJ_LIST; do
-		datasetexists $obj && log_must zfs destroy -f $obj
+		datasetexists $obj && destroy_dataset $obj -f
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_006_pos.ksh
index ec1986c..da0d36a 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_006_pos.ksh

@@ -39,7 +39,7 @@
 # for a dataset. Unlike quotas however there should be no restrictions
 # on accessing space outside of the limits of the reservation (if the
 # space is available in the pool). Verify that in a filesystem with a
-# reservation set that its possible to create files both within the
+# reservation set that it's possible to create files both within the
 # reserved space and also outside.
 #
 # STRATEGY:

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_007_pos.ksh
index 48d6b40..a1fffd3 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_007_pos.ksh

@@ -56,10 +56,10 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-	    log_must zfs destroy -f $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -f
 
 	datasetexists $TESTPOOL/$TESTFS1 && \
-	    log_must zfs destroy -f $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup
@@ -100,7 +100,7 @@
 	# available totals should revert back to the values they
 	# had after creating the first dataset.
 	#
-	log_must zfs destroy -f $dataset2
+	log_must_busy zfs destroy -f $dataset2
 
 	avail_dest_dset2=`get_prop available $TESTPOOL`
 	used_dest_dset2=`get_prop used $TESTPOOL`
@@ -112,7 +112,7 @@
 	# After destroying the first dataset the space used and
 	# space available totals should revert back to the values
 	# they had when the pool was first created.
-	log_must zfs destroy -f $dataset1
+	log_must_busy zfs destroy -f $dataset1
 
 	avail_dest_dset1=`get_prop available $TESTPOOL`
 	used_dest_dset1=`get_prop used $TESTPOOL`

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh
index a0cd039..cfc30f4 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_008_pos.ksh

@@ -57,7 +57,7 @@
 	typeset -i loop=0
 	while (($loop < $RESV_NUM_FS)); do
 		datasetexists $TESTPOOL/${TESTFS}$loop && \
-		    log_must zfs destroy -f $TESTPOOL/${TESTFS}$loop
+		    destroy_dataset $TESTPOOL/${TESTFS}$loop -f
 
 		[[ -d ${TESTDIR}$loop ]] && log_must rm -r ${TESTDIR}$loop
 

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_009_pos.ksh
index 171577d..a639abf 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_009_pos.ksh

@@ -58,7 +58,9 @@
 {
 	log_must rm -rf $TESTDIR/$TESTFILE1
 	log_must rm -rf $TESTDIR/$TESTFILE2
-	log_must zfs destroy -f $TESTPOOL/$TESTFS1
+
+	datasetexists $TESTPOOL/$TESTFS1 && \
+		destroy_dataset $TESTPOOL/$TESTFS1 -f
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_010_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_010_pos.ksh
index 2ca279a..f3a64a0 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_010_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_010_pos.ksh

@@ -57,7 +57,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS1 && \
-	    log_must zfs destroy $TESTPOOL/$TESTFS1
+		destroy_dataset $TESTPOOL/$TESTFS1
 
 	[[ -e $TESTDIR/$TESTFILE1 ]] && log_must rm -rf $TESTDIR/$TESTFILE1
 	[[ -e $TESTDIR/$TESTFILE2 ]] && log_must rm -rf $TESTDIR/$TESTFILE2

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_013_pos.ksh
index 6a80bb5..bf09552 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_013_pos.ksh

@@ -58,7 +58,7 @@
 function cleanup
 {
 	for obj in $OBJ_LIST; do
-                datasetexists $obj && log_must zfs destroy -f $obj
+                datasetexists $obj && destroy_dataset $obj -f
         done
 
 	log_must zero_reservation $TESTPOOL/$TESTFS
@@ -91,7 +91,7 @@
 log_must zfs set reservation=$resv_set $TESTPOOL/$TESTFS1/$TESTFS2
 log_must zfs set reservation=$resv_set $TESTPOOL/$TESTVOL2
 
-log_must zpool export $TESTPOOL
+log_must_busy zpool export $TESTPOOL
 log_must zpool import $TESTPOOL
 
 for obj in $TESTPOOL/$TESTFS $OBJ_LIST; do

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_014_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_014_pos.ksh
index e8bd91d..3b7f384 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_014_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_014_pos.ksh

@@ -54,7 +54,7 @@
 	#
 	# Note we don't destroy $TESTFS as it's used by other tests
 	for obj in $OBJ_LIST ; do
-		datasetexists $obj && log_must zfs destroy -f $obj
+		datasetexists $obj && destroy_dataset $obj -f
 	done
 
 	log_must zero_reservation $TESTPOOL/$TESTFS

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_015_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_015_pos.ksh
index d67f8c7..7067a78 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_015_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_015_pos.ksh

@@ -57,7 +57,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTVOL && \
-	log_must zfs destroy $TESTPOOL/$TESTVOL
+		destroy_dataset $TESTPOOL/$TESTVOL
 
 	[[ -e $TESTDIR/$TESTFILE1 ]] && log_must rm -rf $TESTDIR/$TESTFILE1
 	[[ -e $TESTDIR/$TESTFILE2 ]] && log_must rm -rf $TESTDIR/$TESTFILE2
@@ -76,6 +76,7 @@
 resv_size_set=$(floor_volsize $resv_size_set)
 
 log_must zfs create -V $resv_size_set $TESTPOOL/$TESTVOL
+block_device_wait $TESTPOOL/$TESTVOL
 
 space_avail_still=`get_prop available $TESTPOOL`
 

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_016_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_016_pos.ksh
index cbb1db6..82bbcde 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_016_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_016_pos.ksh

@@ -56,7 +56,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTVOL && \
-	    log_must zfs destroy $TESTPOOL/$TESTVOL
+	    destroy_dataset $TESTPOOL/$TESTVOL
 
 	[[ -e $TESTDIR/$TESTFILE1 ]] && log_must rm -rf $TESTDIR/$TESTFILE1
 	[[ -e $TESTDIR/$TESTFILE2 ]] && log_must rm -rf $TESTDIR/$TESTFILE2
@@ -76,6 +76,7 @@
 # Creating a regular volume implicitly sets its reservation
 # property to the same value.
 log_must zfs create -V $vol_set_size $TESTPOOL/$TESTVOL
+block_device_wait $TESTPOOL/$TESTVOL
 
 space_avail_still=$(get_prop available $TESTPOOL)
 fill_size=$((space_avail_still + $RESV_TOLERANCE))

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_018_pos.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_018_pos.ksh
index 0969a68..1f92c88 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_018_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_018_pos.ksh

@@ -47,7 +47,7 @@
 
 function cleanup
 {
-	datasetexists $fs_child && log_must zfs destroy $fs_child
+	datasetexists $fs_child && destroy_dataset $fs_child
 	log_must zfs set reservation=$reserv_val $fs
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_021_neg.ksh b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_021_neg.ksh
index c99a82c..07da7e9 100755
--- a/zfs/tests/zfs-tests/tests/functional/reservation/reservation_021_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/reservation/reservation_021_neg.ksh

@@ -34,7 +34,7 @@
 
 verify_runnable "both"
 
-fs=$TESTPOOL/$TESTFS/$(basename $0).$$
+fs=$TESTPOOL/$TESTFS/${0##*/}.$$
 
 function cleanup
 {

diff --git a/zfs/tests/zfs-tests/tests/functional/resilver/Makefile.am b/zfs/tests/zfs-tests/tests/functional/resilver/Makefile.am
deleted file mode 100644
index 38136a8..0000000
--- a/zfs/tests/zfs-tests/tests/functional/resilver/Makefile.am
+++ /dev/null

@@ -1,9 +0,0 @@
-pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/resilver
-dist_pkgdata_SCRIPTS = \
-	setup.ksh \
-	cleanup.ksh \
-	resilver_restart_001.ksh \
-	resilver_restart_002.ksh
-
-dist_pkgdata_DATA = \
-	resilver.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/resilver/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/resilver/cleanup.ksh
deleted file mode 100755
index 4dfa814..0000000
--- a/zfs/tests/zfs-tests/tests/functional/resilver/cleanup.ksh
+++ /dev/null

@@ -1,31 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-
-#
-# Copyright (c) 2019, Datto Inc. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/resilver/resilver.cfg
-
-verify_runnable "global"
-
-log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/resilver/resilver.cfg b/zfs/tests/zfs-tests/tests/functional/resilver/resilver.cfg
deleted file mode 100644
index 88dfd24..0000000
--- a/zfs/tests/zfs-tests/tests/functional/resilver/resilver.cfg
+++ /dev/null

@@ -1,32 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-
-#
-# Copyright (c) 2019, Datto Inc. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-
-verify_runnable "global"
-
-set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4}
-SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1
-
-VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 ))

diff --git a/zfs/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/zfs/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
deleted file mode 100755
index 876b286..0000000
--- a/zfs/tests/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
+++ /dev/null

@@ -1,191 +0,0 @@
-#!/bin/ksh -p
-
-#
-# CDDL HEADER START
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright (c) 2019, Datto Inc. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/resilver/resilver.cfg
-
-#
-# DESCRIPTION:
-# Testing resilver restart logic both with and without the deferred resilver
-# feature enabled, verifying that resilver is not restarted when it is
-# unecessary.
-#
-# STRATEGY:
-# 1. Create a pool
-# 2. Create four filesystems with the primary cache disable to force reads
-# 3. Write four files simultaneously, one to each filesystem
-# 4. Do with and without deferred resilvers enabled
-#    a. Replace a vdev with a spare & suspend resilver immediately
-#    b. Verify resilver starts properly
-#    c. Offline / online another vdev to introduce a new DTL range
-#    d. Verify resilver restart restart or defer
-#    e. Inject read errors on vdev that was offlined / onlned
-#    f. Verify that resilver did not restart
-#    g. Unsuspend resilver and wait for it to finish
-#    h. Verify that there are two resilvers and nothing is deferred
-#
-
-function cleanup
-{
-	log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME
-	log_must set_tunable32 zfs_scan_suspend_progress \
-	    $ORIG_SCAN_SUSPEND_PROGRESS
-	log_must set_tunable32 zfs_zevent_len_max $ORIG_ZFS_ZEVENT_LEN_MAX
-	log_must zinject -c all
-	destroy_pool $TESTPOOL
-	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
-}
-
-# count resilver events in zpool and number of deferred rsilvers on vdevs
-function verify_restarts # <msg> <cnt> <defer>
-{
-	msg=$1
-	cnt=$2
-	defer=$3
-
-	# check the number of resilver start in events log
-	RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start)
-	log_note "expected $cnt resilver start(s)$msg, found $RESILVERS"
-	[[ "$RESILVERS" -ne "$cnt" ]] &&
-	    log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS"
-
-	[[ -z "$defer" ]] && return
-
-	# use zdb to find which vdevs have the resilver defer flag
-	VDEV_DEFERS=$(zdb -C $TESTPOOL | \
-	    sed -n -e '/^ *children\[[0-9]\].*$/{h}' \
-	    -e '/ *com.datto:resilver_defer$/{g;p}')
-
-	if [[ "$defer" == "-" ]]
-	then
-		[[ -n $VDEV_DEFERS ]] &&
-		    log_fail "didn't expect any vdevs to have resilver deferred"
-		return
-	fi
-
-	[[ "x${VDEV_DEFERS}x" =~ "x +children[$defer]:x" ]] ||
-	    log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS"
-}
-
-log_assert "Check for unnecessary resilver restarts"
-
-ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms)
-ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress)
-ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable zfs_zevent_len_max)
-
-set -A RESTARTS -- '1' '2' '2' '2'
-set -A VDEVS -- '' '' '' ''
-set -A DEFER_RESTARTS -- '1' '1' '1' '2'
-set -A DEFER_VDEVS -- '-' '2' '2' '-'
-
-VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
-
-log_onexit cleanup
-
-# ensure that enough events will be saved
-log_must set_tunable32 zfs_zevent_len_max 512
-
-log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
-
-log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \
-    raidz ${VDEV_FILES[@]}
-
-# create 4 filesystems
-for fs in fs{0..3}
-do
-	log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs
-done
-
-# simultaneously write 16M to each of them
-set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0
-log_note "Writing data files"
-for path in ${DATAPATHS[@]}
-do
-	dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 &
-done
-wait
-
-# test without and with deferred resilve feature enabled
-for test in "without" "with"
-do
-	log_note "Testing $test deferred resilvers"
-
-	if [[ $test == "with" ]]
-	then
-		log_must zpool set feature@resilver_defer=enabled $TESTPOOL
-		RESTARTS=( "${DEFER_RESTARTS[@]}" )
-		VDEVS=( "${DEFER_VDEVS[@]}" )
-		VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
-	fi
-
-	# clear the events
-	log_must zpool events -c
-
-	# limit scanning time
-	log_must set_tunable32 zfs_resilver_min_time_ms 50
-
-	# initiate a resilver and suspend the scan as soon as possible
-	log_must zpool replace $TESTPOOL $VDEV_REPLACE
-	log_must set_tunable32 zfs_scan_suspend_progress 1
-
-	# there should only be 1 resilver start
-	verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}"
-
-	# offline then online a vdev to introduce a new DTL range after current
-	# scan, which should restart (or defer) the resilver
-	log_must zpool offline $TESTPOOL ${VDEV_FILES[2]}
-	log_must zpool sync $TESTPOOL
-	log_must zpool online $TESTPOOL ${VDEV_FILES[2]}
-	log_must zpool sync $TESTPOOL
-
-	# there should now be 2 resilver starts w/o defer, 1 with defer
-	verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}"
-
-	# inject read io errors on vdev and verify resilver does not restart
-	log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL
-	log_must cat ${DATAPATHS[1]} > /dev/null
-	log_must zinject -c all
-
-	# there should still be 2 resilver starts w/o defer, 1 with defer
-	verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}"
-
-	# unsuspend resilver
-	log_must set_tunable32 zfs_scan_suspend_progress 0
-	log_must set_tunable32 zfs_resilver_min_time_ms 3000
-
-	# wait for resilver to finish
-	for iter in {0..59}
-	do
-		is_pool_resilvered $TESTPOOL && break
-		sleep 1
-	done
-	is_pool_resilvered $TESTPOOL ||
-	    log_fail "resilver timed out"
-
-	# wait for a few txg's to see if a resilver happens
-	log_must zpool sync $TESTPOOL
-	log_must zpool sync $TESTPOOL
-
-	# there should now be 2 resilver starts
-	verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
-done
-
-log_pass "Resilver did not restart unnecessarily"

diff --git a/zfs/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh b/zfs/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh
deleted file mode 100755
index 9ea3215..0000000
--- a/zfs/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh
+++ /dev/null

@@ -1,102 +0,0 @@
-#!/bin/ksh -p
-
-#
-# CDDL HEADER START
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright (c) 2020, Datto Inc. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/resilver/resilver.cfg
-
-#
-# DESCRIPTION:
-# Testing resilver completes when scan errors are encountered, but relevant
-# DTL's have not been lost.
-#
-# STRATEGY:
-# 1. Create a pool (1k recordsize)
-# 2. Create a 32m file (32k records)
-# 3. Inject an error halfway through the file
-# 4. Start a resilver, ensure the error is triggered and that the resilver
-#    does not restart after finishing
-#
-# NB: use legacy scanning to ensure scan of specific block causes error
-#
-
-function cleanup
-{
-	log_must zinject -c all
-	destroy_pool $TESTPOOL
-	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
-	log_must set_tunable32 zfs_scan_legacy $ORIG_SCAN_LEGACY
-}
-
-log_assert "Check for resilver restarts caused by scan errors"
-
-ORIG_SCAN_LEGACY=$(get_tunable zfs_scan_legacy)
-
-log_onexit cleanup
-
-# use legacy scan to ensure injected error will be triggered
-log_must set_tunable32 zfs_scan_legacy 1
-
- # create the pool and a 32M file (32k blocks)
-log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE
-log_must zpool create -f -O recordsize=1k $TESTPOOL ${VDEV_FILES[0]}
-log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=32 > /dev/null 2>&1
-
-# determine objset/object
-objset=$(zdb -d $TESTPOOL/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p')
-object=$(ls -i /$TESTPOOL/file | awk '{print $1}')
-
-# inject event to cause error during resilver
-log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL
-
-# clear events and start resilver
-log_must zpool events -c
-log_must zpool attach $TESTPOOL ${VDEV_FILES[0]} $SPARE_VDEV_FILE
-
-log_note "waiting for read errors to start showing up"
-for iter in {0..59}
-do
-	zpool sync $TESTPOOL
-	err=$(zpool status $TESTPOOL | grep ${VDEV_FILES[0]} | awk '{print $3}')
-	(( $err > 0 )) && break
-	sleep 1
-done
-
-(( $err == 0 )) && log_fail "Unable to induce errors in resilver"
-
-log_note "waiting for resilver to finish"
-for iter in {0..59}
-do
-	finish=$(zpool events | grep "sysevent.fs.zfs.resilver_finish" | wc -l)
-	(( $finish > 0 )) && break
-	sleep 1
-done
-
-(( $finish == 0 )) && log_fail "resilver took too long to finish"
-
-# wait a few syncs to ensure that zfs does not restart the resilver
-log_must zpool sync $TESTPOOL
-log_must zpool sync $TESTPOOL
-
-# check if resilver was restarted
-start=$(zpool events | grep "sysevent.fs.zfs.resilver_start" | wc -l)
-(( $start != 1 )) && log_fail "resilver restarted unnecessarily"
-
-log_pass "Resilver did not restart unnecessarily from scan errors"

diff --git a/zfs/tests/zfs-tests/tests/functional/resilver/setup.ksh b/zfs/tests/zfs-tests/tests/functional/resilver/setup.ksh
deleted file mode 100755
index 4dfa814..0000000
--- a/zfs/tests/zfs-tests/tests/functional/resilver/setup.ksh
+++ /dev/null

@@ -1,31 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-
-#
-# Copyright (c) 2019, Datto Inc. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/resilver/resilver.cfg
-
-verify_runnable "global"
-
-log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/rootpool/rootpool_007_pos.ksh b/zfs/tests/zfs-tests/tests/functional/rootpool/rootpool_007_pos.ksh
index e355a0f..e4d4268 100755
--- a/zfs/tests/zfs-tests/tests/functional/rootpool/rootpool_007_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rootpool/rootpool_007_pos.ksh

@@ -45,9 +45,13 @@
 verify_runnable "global"
 
 function cleanup {
-	log_must zfs set compression=$orig_compress $rootfs
+	[[ -n "$orig_compress" ]] && \
+	    log_must zfs set compression=$orig_compress $rootfs
 }
 
+typeset assert_msg="the zfs rootfs's compression property can be set to \
+		   gzip and gzip[1-9]"
+
 log_onexit cleanup
 log_assert $assert_msg
 
@@ -55,9 +59,6 @@
 typeset rootfs=$(get_pool_prop bootfs $rootpool)
 typeset orig_compress=$(get_prop compression $rootfs)
 
-typeset assert_msg="the zfs rootfs's compression property can be set to \
-		   gzip and gzip[1-9]"
-
 set -A gtype "gzip" "gzip-1" "gzip-2" "gzip-3" "gzip-4" "gzip-5" \
 	     "gzip-6" "gzip-7" "gzip-8" "gzip-9"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/rootpool/setup.ksh b/zfs/tests/zfs-tests/tests/functional/rootpool/setup.ksh
index 5c3e56b..8d80971 100755
--- a/zfs/tests/zfs-tests/tests/functional/rootpool/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rootpool/setup.ksh

@@ -37,6 +37,6 @@
 # This functionality is supported under Linux, but these test cases
 # are disabled by default since they manipulate the systems root pool.
 #
-if is_linux; then
+if is_linux || is_freebsd; then
         log_unsupported "Supported but disabled by default"
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/Makefile.am b/zfs/tests/zfs-tests/tests/functional/rsend/Makefile.am
index 585018a..2cedf03 100644
--- a/zfs/tests/zfs-tests/tests/functional/rsend/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/Makefile.am

@@ -2,6 +2,8 @@
 dist_pkgdata_SCRIPTS = \
 	setup.ksh \
 	cleanup.ksh \
+	recv_dedup.ksh \
+	recv_dedup_encrypted_zvol.ksh \
 	rsend_001_pos.ksh \
 	rsend_002_pos.ksh \
 	rsend_003_pos.ksh \
@@ -16,16 +18,17 @@
 	rsend_012_pos.ksh \
 	rsend_013_pos.ksh \
 	rsend_014_pos.ksh \
+	rsend_016_neg.ksh \
 	rsend_019_pos.ksh \
 	rsend_020_pos.ksh \
 	rsend_021_pos.ksh \
 	rsend_022_pos.ksh \
 	rsend_024_pos.ksh \
 	send_encrypted_files.ksh \
+	send_encrypted_freeobjects.ksh \
 	send_encrypted_hierarchy.ksh \
 	send_encrypted_props.ksh \
 	send_encrypted_truncated_files.ksh \
-	send-cD.ksh \
 	send-c_embedded_blocks.ksh \
 	send-c_incremental.ksh \
 	send-c_lz4_disabled.ksh \
@@ -40,16 +43,29 @@
 	send-c_volume.ksh \
 	send-c_zstreamdump.ksh \
 	send-cpL_varied_recsize.ksh \
+	send-L_toggle.ksh \
 	send_freeobjects.ksh \
+	send_partial_dataset.ksh \
 	send_realloc_dnode_size.ksh \
 	send_realloc_files.ksh \
 	send_realloc_encrypted_files.ksh \
 	send_spill_block.ksh \
+	send_raw_spill_block.ksh \
+	send_raw_large_blocks.ksh \
+	send_raw_ashift.ksh \
 	send_holds.ksh \
 	send_hole_birth.ksh \
+	send_invalid.ksh \
 	send_mixed_raw.ksh \
-	send-wDR_encrypted_zvol.ksh
+	send-wR_encrypted_zvol.ksh \
+	send_doall.ksh \
+	send_encrypted_incremental.ksh
 
 dist_pkgdata_DATA = \
+	dedup.zsend.bz2 \
+	dedup_encrypted_zvol.bz2 \
+	dedup_encrypted_zvol.zsend.bz2 \
+	fs.tar.gz \
 	rsend.cfg \
 	rsend.kshlib
+

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/dedup.zsend.bz2 b/zfs/tests/zfs-tests/tests/functional/rsend/dedup.zsend.bz2
new file mode 100644
index 0000000..585e148
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/dedup.zsend.bz2
Binary files differ

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/dedup_encrypted_zvol.bz2 b/zfs/tests/zfs-tests/tests/functional/rsend/dedup_encrypted_zvol.bz2
new file mode 100644
index 0000000..73a5742
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/dedup_encrypted_zvol.bz2
Binary files differ

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/dedup_encrypted_zvol.zsend.bz2 b/zfs/tests/zfs-tests/tests/functional/rsend/dedup_encrypted_zvol.zsend.bz2
new file mode 100644
index 0000000..04a6cb5
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/dedup_encrypted_zvol.zsend.bz2
Binary files differ

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/fs.tar.gz b/zfs/tests/zfs-tests/tests/functional/rsend/fs.tar.gz
new file mode 100644
index 0000000..cb6861c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/fs.tar.gz
Binary files differ

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/recv_dedup.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/recv_dedup.ksh
new file mode 100755
index 0000000..e6e282a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/recv_dedup.ksh

@@ -0,0 +1,53 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# DESCRIPTION:
+# Verifies that we can receive a dedup send stream by processing it with
+# "zstream redup".
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_dataset $TESTPOOL/recv "-r"
+	rm -r /$TESTPOOL/tar
+	rm $sendfile
+}
+log_onexit cleanup
+
+log_assert "Verify zfs can receive dedup send streams with 'zstream redup'"
+
+typeset sendfile_compressed=$STF_SUITE/tests/functional/rsend/dedup.zsend.bz2
+typeset sendfile=/$TESTPOOL/dedup.zsend
+typeset tarfile=$STF_SUITE/tests/functional/rsend/fs.tar.gz
+
+log_must eval "bzcat <$sendfile_compressed >$sendfile"
+log_must zfs create $TESTPOOL/recv
+log_must eval "zstream redup $sendfile | zfs recv -d $TESTPOOL/recv"
+
+log_must mkdir /$TESTPOOL/tar
+log_must tar --directory /$TESTPOOL/tar -xzf $tarfile
+log_must diff -r /$TESTPOOL/tar /$TESTPOOL/recv
+
+log_pass "zfs can receive dedup send streams with 'zstream redup'"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/recv_dedup_encrypted_zvol.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/recv_dedup_encrypted_zvol.ksh
new file mode 100755
index 0000000..daf559d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/recv_dedup_encrypted_zvol.ksh

@@ -0,0 +1,60 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# DESCRIPTION:
+# Verifies that we can receive a dedup send stream of a zvol by processing it
+# with "zstream redup".
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_dataset $TESTPOOL/recv "-r"
+	rm $sendfile
+	rm $volfile
+	rm $keyfile
+}
+log_onexit cleanup
+
+log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams"
+
+typeset keyfile=/$TESTPOOL/pkey
+typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
+typeset sendfile_compressed=$STF_SUITE/tests/functional/rsend/dedup_encrypted_zvol.zsend.bz2
+typeset sendfile=/$TESTPOOL/dedup_encrypted_zvol.zsend
+typeset volfile_compressed=$STF_SUITE/tests/functional/rsend/dedup_encrypted_zvol.bz2
+typeset volfile=/$TESTPOOL/dedup_encrypted_zvol
+
+log_must eval "echo 'password' > $keyfile"
+
+log_must eval "bzcat <$sendfile_compressed >$sendfile"
+log_must eval "zstream redup $sendfile | zfs recv $TESTPOOL/recv"
+
+log_must zfs load-key $TESTPOOL/recv
+block_device_wait $volfile
+
+log_must eval "bzcat <$volfile_compressed >$volfile"
+log_must diff $volfile $recvdev
+
+log_pass "zfs can receive raw, recursive, and deduplicated send streams"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/zfs/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
index 7f88f55..533ca20 100644
--- a/zfs/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/rsend.kshlib

@@ -25,7 +25,8 @@
 #
 
 #
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Datto Inc. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -120,6 +121,14 @@
 	log_must rm -rf $BACKDIR/*
 
 	if is_global_zone ; then
+		#
+		# Linux: Issuing a `df` seems to properly force any negative
+		# dcache entries to be invalidated preventing failures when
+		# accessing the mount point. Additional investigation required.
+		#
+		# https://github.com/openzfs/zfs/issues/6143
+		#
+		log_must df >/dev/null
 		log_must_busy zfs destroy -Rf $pool
 	else
 		typeset list=$(zfs list -H -r -t all -o name $pool)
@@ -217,15 +226,21 @@
 {
 	typeset dtst1=$1
 	typeset dtst2=$2
-
-	for item in "type" "origin" "volblocksize" "acltype" "dnodesize" \
+	typeset -a props=("type" "origin" "volblocksize" "acltype" "dnodesize" \
 	    "atime" "canmount" "checksum" "compression" "copies" "devices" \
 	    "exec" "quota" "readonly" "recordsize" "reservation" "setuid" \
-	    "snapdir" "version" "volsize" "xattr" "zoned" "mountpoint";
+	    "snapdir" "version" "volsize" "xattr" "mountpoint");
+	if is_freebsd; then
+		props+=("jailed")
+	else
+		props+=("zoned")
+	fi
+
+	for prop in $props;
 	do
-		zfs get -H -o property,value,source $item $dtst1 >> \
+		zfs get -H -o property,value,source $prop $dtst1 >> \
 		    $BACKDIR/dtst1
-		zfs get -H -o property,value,source $item $dtst2 >> \
+		zfs get -H -o property,value,source $prop $dtst2 >> \
 		    $BACKDIR/dtst2
 	done
 
@@ -352,7 +367,7 @@
 	else
 		fs_prop=$(zfs inherit 2>&1 | \
 		    awk '$2=="YES" && $3=="YES" {print $1}'|
-		    egrep -v "devices|mlslabel|sharenfs|sharesmb|zoned")
+		    grep -E -v "devices|mlslabel|sharenfs|sharesmb|zoned")
 	fi
 
 	echo $fs_prop
@@ -517,10 +532,9 @@
 				attrlen="$(((RANDOM % 1000) + 1))"
 				attrvalue="$(random_string VALID_NAME_CHAR \
 				    $attrlen)"
-				attr -qr $attrname $file_name || \
+				rm_xattr $attrname $file_name || \
 				    log_fail "Failed to remove $attrname"
-				attr -qs $attrname \
-				    -V "$attrvalue" $file_name || \
+				set_xattr $attrname "$attrvalue" $file_name || \
 				    log_fail "Failed to set $attrname"
 			elif [ $value -eq 1 ]; then
 				dd if=/dev/urandom of=$file_name \
@@ -551,8 +565,8 @@
 					attrlen="$(((RANDOM % 1000) + 1))"
 					attrvalue="$(random_string \
 					    VALID_NAME_CHAR $attrlen)"
-					attr -qs $attrname \
-					    -V "$attrvalue" $file_name || \
+					set_xattr $attrname \
+					    "$attrvalue" $file_name || \
 					    log_fail "Failed to set $attrname"
 				done
 			fi
@@ -563,16 +577,31 @@
 }
 
 #
-# Mess up file contents
+# Mess up a send file's contents
 #
-# $1 The file path
+# $1 The send file path
 #
-function mess_file
+function mess_send_file
 {
 	file=$1
 
-	filesize=$(stat -c '%s' $file)
+	filesize=$(stat_size $file)
+
 	offset=$(($RANDOM * $RANDOM % $filesize))
+
+	# The random offset might truncate the send stream to be
+	# smaller than the DRR_BEGIN record. If this happens, then
+	# the receiving system won't have enough info to create the
+	# partial dataset at all. We use zstream dump to check for
+	# this and retry in this case.
+	nr_begins=$(head -c $offset $file | zstream dump | \
+	    grep DRR_BEGIN | awk '{ print $5 }')
+	while [ "$nr_begins" -eq 0 ]; do
+		offset=$(($RANDOM * $RANDOM % $filesize))
+		nr_begins=$(head -c $offset $file | zstream dump | \
+		    grep DRR_BEGIN | awk '{ print $5 }')
+	done
+
 	if (($RANDOM % 7 <= 1)); then
 		#
 		# We corrupt 2 bytes to minimize the chance that we
@@ -614,29 +643,51 @@
 # $1 The ZFS send command
 # $2 The filesystem where the streams are sent
 # $3 The receive filesystem
+# $4 Test dry-run (optional)
 #
 function resume_test
 {
-	sendcmd=$1
-	streamfs=$2
-	recvfs=$3
+	typeset sendcmd=$1
+	typeset streamfs=$2
+	typeset recvfs=$3
+	typeset dryrun=${4:-1}
 
 	stream_num=1
 	log_must eval "$sendcmd >/$streamfs/$stream_num"
 
 	for ((i=0; i<2; i=i+1)); do
-		mess_file /$streamfs/$stream_num
+		mess_send_file /$streamfs/$stream_num
 		log_mustnot zfs recv -suv $recvfs </$streamfs/$stream_num
 		stream_num=$((stream_num+1))
 
 		token=$(zfs get -Hp -o value receive_resume_token $recvfs)
-		log_must eval "zfs send -v -t $token >/$streamfs/$stream_num"
+
+		# Do a dry-run
+		[ $dryrun -ne 0 ] && \
+			log_must eval "zfs send -nvt $token > /dev/null"
+
+		log_must eval "zfs send -t $token  >/$streamfs/$stream_num"
 		[[ -f /$streamfs/$stream_num ]] || \
 		    log_fail "NO FILE /$streamfs/$stream_num"
 	done
 	log_must zfs recv -suv $recvfs </$streamfs/$stream_num
 }
 
+function get_resume_token
+{
+	sendcmd=$1
+	streamfs=$2
+	recvfs=$3
+
+	log_must eval "$sendcmd > /$streamfs/1"
+	mess_send_file /$streamfs/1
+	log_mustnot zfs recv -suv $recvfs < /$streamfs/1 2>&1
+	token=$(zfs get -Hp -o value receive_resume_token $recvfs)
+	echo "$token" > /$streamfs/resume_token
+
+	return 0
+}
+
 #
 # Setup filesystems for the resumable send/receive tests
 #
@@ -655,7 +706,7 @@
 	datasetexists $recvfs && log_must_busy zfs destroy -r $recvpool
 	datasetexists $streamfs && log_must_busy zfs destroy -r $streamfs
 
-	if $(datasetexists $sendfs || zfs create -o compress=lz4 $sendfs); then
+	if datasetexists $sendfs || zfs create -o compress=lz4 $sendfs; then
 		mk_files 1000 256 0 $sendfs &
 		mk_files 1000 131072 0 $sendfs &
 		mk_files 100 1048576 0 $sendfs &
@@ -698,7 +749,7 @@
 	shift
 
 	[[ -f $file ]] || log_fail "Couldn't find file: $file"
-	typeset flags=$(cat $file | zstreamdump | \
+	typeset flags=$(cat $file | zstream dump | \
 	    awk '/features =/ {features = $3} END {print features}')
 	typeset -A feature
 	feature[dedup]="1"
@@ -731,7 +782,7 @@
 # comparing. This function does not currently handle incremental streams
 # that remove data.
 #
-# $1 The zstreamdump output file
+# $1 The zstream dump output file
 # $2 The dataset to compare against
 #    This can be a source of a send or recv target (fs, not snapshot)
 # $3 The percentage below which verification is deemed a failure
@@ -748,7 +799,7 @@
 	[[ -f $stream ]] || log_fail "No such file: $stream"
 	datasetexists $ds || log_fail "No such dataset: $ds"
 
-	typeset stream_size=$(cat $stream | zstreamdump | sed -n \
+	typeset stream_size=$(cat $stream | zstream dump | sed -n \
 	    's/	Total payload size = \(.*\) (0x.*)/\1/p')
 
 	typeset inc_size=0
@@ -795,10 +846,20 @@
 }
 
 # Generate a recursive checksum of a filesystem which includes the file
-# contents and any associated xattrs.
+# contents and any associated extended attributes.
 function recursive_cksum
 {
-	find $1 -type f -exec sh -c 'sha256sum {}; getfattr \
-	    --absolute-names --only-values -d {} | sha256sum' \; | \
-	    sort -k 2 | awk '{ print $1 }' | sha256sum
+	case "$(uname)" in
+	FreeBSD)
+		find $1 -type f -exec sh -c 'sha256 -q {}; lsextattr -q \
+		    system {} | sha256 -q; lsextattr -q user {} | sha256 -q' \
+		    \; | sort | sha256 -q
+		;;
+	*)
+		find $1 -type f -exec sh -c 'sha256sum {}; getfattr \
+		    --absolute-names --only-values -d {} | sha256sum' \; | \
+		    sort -k 2 | awk '{ print $1 }' | sha256sum | \
+		    awk '{ print $1 }'
+		;;
+	esac
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh
index 5314787..8e1821d 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_008_pos.ksh

@@ -43,7 +43,7 @@
 
 verify_runnable "both"
 
-# See issue: https://github.com/zfsonlinux/zfs/issues/6066
+# See issue: https://github.com/openzfs/zfs/issues/6066
 log_unsupported "Occasionally hangs"
 
 #		Origin			Clone

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
index d85970a..3f12e05 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh

@@ -21,14 +21,12 @@
 #
 
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Copyright (c) 2009, Sun Microsystems Inc. All rights reserved.
+# Copyright (c) 2013, 2016, Delphix. All rights reserved.
 # Use is subject to license terms.
 #
 
-#
-# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-#
-
+. $STF_SUITE/include/properties.shlib
 . $STF_SUITE/tests/functional/rsend/rsend.kshlib
 
 #
@@ -54,7 +52,7 @@
 		"get")
 			typeset props=$(zfs inherit 2>&1 | \
 				awk '$2=="YES" {print $1}' | \
-				egrep -v "^vol|\.\.\.$")
+				grep -E -v "^vol|\.\.\.$")
 			for item in $props ; do
 				if [[ $item == "mlslabel" ]] && \
 					! is_te_enabled ; then
@@ -118,12 +116,10 @@
 	"$POOL/$FS/fs1/fs2" "$POOL/$FS/fs1/fclone" ; do
 	rand_set_prop $fs aclinherit "discard" "noallow" "secure" "passthrough"
 	rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256"
-	rand_set_prop $fs acltype "off" "noacl" "posixacl"
+	rand_set_prop $fs acltype "off" "posix" "nfsv4" "noacl" "posixacl"
 	rand_set_prop $fs atime "on" "off"
 	rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256"
-	rand_set_prop $fs compression "on" "off" "lzjb" "gzip" \
-		"gzip-1" "gzip-2" "gzip-3" "gzip-4" "gzip-5" "gzip-6"   \
-		"gzip-7" "gzip-8" "gzip-9"
+	rand_set_prop $fs compression "${compress_prop_vals[@]}"
 	rand_set_prop $fs copies "1" "2" "3"
 	rand_set_prop $fs devices "on" "off"
 	rand_set_prop $fs exec "on" "off"
@@ -132,15 +128,15 @@
 	rand_set_prop $fs dnodesize "legacy" "auto" "1k" "2k" "4k" "8k" "16k"
 	rand_set_prop $fs setuid "on" "off"
 	rand_set_prop $fs snapdir "hidden" "visible"
-	rand_set_prop $fs xattr "on" "off"
+	if ! is_freebsd; then
+		rand_set_prop $fs xattr "on" "off"
+	fi
 	rand_set_prop $fs user:prop "aaa" "bbb" "23421" "()-+?"
 done
 
 for vol in "$POOL/vol" "$POOL/$FS/vol" ; do
 	rand_set_prop $vol checksum "on" "off" "fletcher2" "fletcher4" "sha256"
-	rand_set_prop $vol compression "on" "off" "lzjb" "gzip" \
-		"gzip-1" "gzip-2" "gzip-3" "gzip-4" "gzip-5" "gzip-6"   \
-		"gzip-7" "gzip-8" "gzip-9"
+	rand_set_prop $vol compression "${compress_prop_vals[@]}"
 	rand_set_prop $vol readonly "on" "off"
 	rand_set_prop $vol copies "1" "2" "3"
 	rand_set_prop $vol user:prop "aaa" "bbb" "23421" "()-+?"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_016_neg.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_016_neg.ksh
new file mode 100755
index 0000000..6dfb342
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_016_neg.ksh

@@ -0,0 +1,33 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify that error conditions don't cause panics in zfs send
+#
+# Strategy:
+# 1. Perform a zfs incremental send from a bookmark that doesn't exist
+#
+
+verify_runnable "both"
+
+log_neg eval "zfs send -i \#bla $POOl/$FS@final > /dev/null"
+
+log_pass "Ensure that error conditions cause appropriate failures."

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh
index 2d9fb01..c44985a 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh

@@ -46,7 +46,7 @@
 
 test_fs_setup $sendfs $recvfs $streamfs
 log_must zfs unmount -f $sendfs
-resume_test "zfs send $sendfs" $streamfs $recvfs
+resume_test "zfs send $sendfs" $streamfs $recvfs 0
 file_check $sendfs $recvfs
 
 log_pass "Verify resumability of a full ZFS send/receive with the source " \

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh
new file mode 100755
index 0000000..483efcc
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh

@@ -0,0 +1,65 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify that send -L can be changed to on in an incremental.
+# Verify that send -L can not be turned off in an incremental.
+#
+
+function cleanup
+{
+	log_must_busy zfs destroy -r $TESTPOOL/fs
+	log_must_busy zfs destroy -r $TESTPOOL/recv
+}
+
+verify_runnable "both"
+
+log_assert "Verify toggling send -L works as expected"
+log_onexit cleanup
+
+log_must zfs create -o compression=on -o recordsize=1m $TESTPOOL/fs
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/fs/file bs=1024 count=1500
+
+log_must zfs snapshot $TESTPOOL/fs@snap
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/fs/file bs=1024 count=1500 conv=notrunc seek=2048
+
+log_must zfs snapshot $TESTPOOL/fs@snap2
+
+log_must zfs create $TESTPOOL/recv
+
+log_must zfs send -c $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/noL-noL
+log_must zfs send -c -i @snap $TESTPOOL/fs@snap2| zfs recv $TESTPOOL/recv/noL-noL
+log_must diff /$TESTPOOL/fs/file /$TESTPOOL/recv/noL-noL/file
+
+log_must zfs send -c -L $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/L-L
+log_must zfs send -c -L -i @snap $TESTPOOL/fs@snap2 | zfs recv $TESTPOOL/recv/L-L
+log_must diff /$TESTPOOL/fs/file /$TESTPOOL/recv/L-L/file
+
+log_must zfs send -c $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/noL-L
+log_must zfs send -c -L -i @snap $TESTPOOL/fs@snap2 | zfs recv $TESTPOOL/recv/noL-L
+log_must diff /$TESTPOOL/fs/file /$TESTPOOL/recv/noL-L/file
+
+log_must zfs send -c -L $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/L-noL
+log_mustnot zfs send -c -i @snap $TESTPOOL/fs@snap2 | zfs recv $TESTPOOL/recv/L-noL
+log_must diff /$TESTPOOL/fs/.zfs/snapshot/snap/file /$TESTPOOL/recv/L-noL/file
+
+log_pass "Verify toggling send -L works as expected"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-cD.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-cD.ksh
deleted file mode 100755
index d0754a4..0000000
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-cD.ksh
+++ /dev/null

@@ -1,77 +0,0 @@
-#!/bin/ksh -p
-
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright (c) 2015, 2018 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/rsend/rsend.kshlib
-
-#
-# Description:
-# Verify that the -c and -D flags do not interfere with each other.
-#
-# Strategy:
-# 1. Write unique data to a filesystem and create a compressed, deduplicated
-#    full stream.
-# 2. Verify that the stream and send dataset show the same size
-# 3. Make several copies of the original data, and create both full and
-#    incremental compressed, deduplicated send streams
-# 4. Verify the full stream is no bigger than the stream from step 1
-# 5. Verify the streams can be received correctly.
-#
-
-verify_runnable "both"
-
-log_assert "Verify that the -c and -D flags do not interfere with each other"
-log_onexit cleanup_pool $POOL2
-
-typeset sendfs=$POOL2/sendfs
-typeset recvfs=$POOL2/recvfs
-typeset stream0=$BACKDIR/stream.0
-typeset stream1=$BACKDIR/stream.1
-typeset inc=$BACKDIR/stream.inc
-
-log_must zfs create -o compress=lz4 $sendfs
-log_must zfs create -o compress=lz4 $recvfs
-typeset dir=$(get_prop mountpoint $sendfs)
-# Don't use write_compressible: we want compressible but undeduplicable data.
-log_must eval "dd if=/dev/urandom bs=1024k count=4 | base64 >$dir/file"
-log_must zfs snapshot $sendfs@snap0
-log_must eval "zfs send -D -c $sendfs@snap0 >$stream0"
-
-# The stream size should match at this point because the data is all unique
-verify_stream_size $stream0 $sendfs
-
-for i in {0..3}; do
-	log_must cp $dir/file $dir/file.$i
-done
-log_must zfs snapshot $sendfs@snap1
-
-# The stream sizes should match, since the second stream contains no new blocks
-log_must eval "zfs send -D -c $sendfs@snap1 >$stream1"
-typeset size0=$(stat -c %s $stream0)
-typeset size1=$(stat -c %s $stream1)
-within_percent $size0 $size1 90 || log_fail "$size0 and $size1"
-
-# Finally, make sure the receive works correctly.
-log_must eval "zfs send -D -c -i snap0 $sendfs@snap1 >$inc"
-log_must eval "zfs recv -d $recvfs <$stream0"
-log_must eval "zfs recv -d $recvfs <$inc"
-cmp_ds_cont $sendfs $recvfs
-
-# The size of the incremental should be the same as the initial send.
-typeset size2=$(stat -c %s $inc)
-within_percent $size0 $size2 90 || log_fail "$size0 and $size1"
-
-log_pass "The -c and -D flags do not interfere with each other"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_embedded_blocks.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_embedded_blocks.ksh
index 1983a3e..3dce217 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_embedded_blocks.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_embedded_blocks.ksh

@@ -53,7 +53,7 @@
 	# For lz4, this method works for blocks up to 16k, but not larger
 	[[ $recsize -eq $((32 * 1024)) ]] && break
 
-	if is_linux; then
+	if is_linux || is_freebsd; then
 		log_must truncate -s $recsize $dir/$recsize
 		log_must dd if=/dev/urandom of=$dir/$recsize \
 		    seek=$((recsize - 8)) bs=1 count=8 conv=notrunc
@@ -63,17 +63,17 @@
 	fi
 done
 
-# Generate the streams and zstreamdump output.
+# Generate the streams and zstream dump output.
 log_must zfs snapshot $sendfs@now
 log_must eval "zfs send -c $sendfs@now >$stream"
-log_must eval "zstreamdump -v <$stream >$dump"
+log_must eval "zstream dump -v <$stream >$dump"
 log_must eval "zfs recv -d $recvfs <$stream"
 cmp_ds_cont $sendfs $recvfs
 verify_stream_size $stream $sendfs
 log_mustnot stream_has_features $stream embed_data
 
 log_must eval "zfs send -c -e $sendfs@now >$stream2"
-log_must eval "zstreamdump -v <$stream2 >$dump2"
+log_must eval "zstream dump -v <$stream2 >$dump2"
 log_must eval "zfs recv -d $recvfs2 <$stream2"
 cmp_ds_cont $sendfs $recvfs2
 verify_stream_size $stream2 $sendfs
@@ -101,9 +101,9 @@
 	    log_fail "Obj $recv2_obj not embedded in $recvfs2"
 
 	grep -q "WRITE_EMBEDDED object = $send_obj offset = 0" $dump && \
-	    log_fail "Obj $obj embedded in zstreamdump output"
+	    log_fail "Obj $obj embedded in zstream dump output"
 	grep -q "WRITE_EMBEDDED object = $send_obj offset = 0" $dump2 || \
-	    log_fail "Obj $obj not embedded in zstreamdump output"
+	    log_fail "Obj $obj not embedded in zstream dump output"
 done
 
 log_pass "Compressed streams can contain embedded blocks."

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_lz4_disabled.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_lz4_disabled.ksh
index 666e11f..bc706ba 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_lz4_disabled.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_lz4_disabled.ksh

@@ -52,8 +52,8 @@
 		poolexists $POOL3 && destroy_pool $POOL3
 		log_must zpool create $pool_opt $POOL3 $DISK3
 
-		datasetexists $send_ds && log_must_busy zfs destroy -r $send_ds
-		datasetexists $recv_ds && log_must_busy zfs destroy -r $recv_ds
+		datasetexists $send_ds && destroy_dataset $send_ds -r
+		datasetexists $recv_ds && destroy_dataset $recv_ds -r
 
 		log_must zfs create -o compress=$compress $send_ds
 		typeset dir=$(get_prop mountpoint $send_ds)

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_recv_lz4_disabled.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_recv_lz4_disabled.ksh
index 614394e..15873ed 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_recv_lz4_disabled.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_recv_lz4_disabled.ksh

@@ -12,10 +12,11 @@
 #
 
 #
-# Copyright (c) 2015 by Delphix. All rights reserved.
+# Copyright (c) 2015, Delphix. All rights reserved.
 #
 
 . $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/include/properties.shlib
 
 #
 # Description:
@@ -34,7 +35,6 @@
 
 log_assert "Verify compressed streams are rejected if incompatible."
 
-typeset compress_types="off gzip lz4"
 typeset send_ds=$POOL2/testds
 typeset recv_ds=$POOL3/testds
 
@@ -49,9 +49,9 @@
 datasetexists $POOL3 && log_must zpool destroy $POOL3
 log_must zpool create -d $POOL3 $DISK3
 
-for compress in $compress_types; do
-	datasetexists $send_ds && log_must_busy zfs destroy -r $send_ds
-	datasetexists $recv_ds && log_must_busy zfs destroy -r $recv_ds
+for compress in "${compress_prop_vals[@]}"; do
+	datasetexists $send_ds && destroy_dataset $send_ds -r
+	datasetexists $recv_ds && destroy_dataset $recv_ds -r
 
 	log_must zfs create -o compress=$compress $send_ds
 	typeset dir=$(get_prop mountpoint $send_ds)

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh
index f110681..056fc2c 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_stream_size_estimate.ksh

@@ -12,10 +12,11 @@
 #
 
 #
-# Copyright (c) 2015 by Delphix. All rights reserved.
+# Copyright (c) 2015, Delphix. All rights reserved.
 #
 
 . $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/include/properties.shlib
 
 #
 # Description:
@@ -28,7 +29,6 @@
 #
 
 verify_runnable "both"
-typeset compress_types="off gzip lz4"
 typeset send_ds="$POOL2/testfs"
 typeset send_vol="$POOL2/vol"
 typeset send_voldev="$ZVOL_DEVDIR/$POOL2/vol"
@@ -40,7 +40,12 @@
 {
 	typeset cmd=$1
 	typeset ds=${cmd##* }
-	typeset tmpfile=$(mktemp -p $BACKDIR)
+	if is_freebsd; then
+		mkdir -p $BACKDIR
+		typeset tmpfile=$(TMPDIR=$BACKDIR mktemp)
+	else
+		typeset tmpfile=$(mktemp -p $BACKDIR)
+	fi
 
 	eval "$cmd >$tmpfile"
 	[[ $? -eq 0 ]] || log_fail "get_estimated_size: $cmd"
@@ -55,12 +60,12 @@
 
 write_compressible $BACKDIR ${megs}m
 
-for compress in $compress_types; do
-	datasetexists $send_ds && log_must_busy zfs destroy -r $send_ds
-	datasetexists $send_vol && log_must_busy zfs destroy -r $send_vol
+for compress in "${compress_prop_vals[@]}"; do
+	datasetexists $send_ds && destroy_dataset $send_ds -r
+	datasetexists $send_vol && destroy_dataset $send_vol -r
 	log_must zfs create -o compress=$compress $send_ds
 	log_must zfs create -V 1g -o compress=$compress $send_vol
-	block_device_wait
+	block_device_wait $send_voldev
 
 	typeset dir=$(get_prop mountpoint $send_ds)
 	log_must cp $file $dir

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_verify_ratio.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_verify_ratio.ksh
index 9b886f8..845349a 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_verify_ratio.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_verify_ratio.ksh

@@ -12,7 +12,8 @@
 #
 
 #
-# Copyright (c) 2015 by Delphix. All rights reserved.
+# Copyright (c) 2015, Delphix. All rights reserved.
+# Copyright (c) 2019, Kjeld Schouten-Lebbing. All rights reserved.
 #
 
 . $STF_SUITE/tests/functional/rsend/rsend.kshlib
@@ -35,9 +36,9 @@
 log_onexit cleanup_pool $POOL2
 
 typeset sendfs=$POOL2/$FS
-typeset megs=128
+typeset megs=64
 
-for prop in $(get_rand_compress_any 6); do
+for prop in "${compress_prop_vals[@]}"; do
 	for compressible in 'yes' 'no'; do
 		log_must zfs create -o compress=$prop $sendfs
 

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
index 988ed91..1bf2348 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh

@@ -29,6 +29,7 @@
 
 function cleanup
 {
+	rm $BACKDIR/copy
 	log_must_busy zfs destroy -r $vol
 	cleanup_pool $POOL2
 }
@@ -60,7 +61,9 @@
 
 verify_stream_size $BACKDIR/full $vol
 verify_stream_size $BACKDIR/full $vol2
-md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest)
+block_device_wait $voldev2
+log_must dd if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs
+md5=$(md5digest $BACKDIR/copy)
 [[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1"
 
 # Repeat, for an incremental send
@@ -72,7 +75,9 @@
 
 verify_stream_size $BACKDIR/inc $vol 90 $vol@snap
 verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap
-md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest)
+block_device_wait $voldev2
+log_must dd skip=$megs if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs
+md5=$(md5digest $BACKDIR/copy)
 [[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2"
 
 log_pass "Verify compressed send works with volumes"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_zstreamdump.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_zstreamdump.ksh
index 52abfe7..5b9939c 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-c_zstreamdump.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-c_zstreamdump.ksh

@@ -13,6 +13,7 @@
 
 #
 # Copyright (c) 2015 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Datto, Inc. All rights reserved.
 #
 
 . $STF_SUITE/tests/functional/rsend/rsend.kshlib
@@ -20,29 +21,35 @@
 
 #
 # Description:
-# Verify compression features show up in zstreamdump
+# Verify compression features show up in zstream dump
 #
 # Strategy:
 # 1. Create a full compressed send stream
-# 2. Verify zstreamdump shows this stream has the relevant features
-# 3. Verify zstreamdump's accounting of logical and compressed size is correct
+# 2. Verify zstream dump shows this stream has the relevant features
+# 3. Verify zstream dump's accounting of logical and compressed size is correct
+# 4. Verify the toname from a resume token
+# 5. Verify it fails with corrupted resume token
+# 6. Verify it fails with missing resume token
 #
 
 verify_runnable "both"
 
-log_assert "Verify zstreamdump correctly interprets compressed send streams."
+log_assert "Verify zstream dump correctly interprets compressed send streams."
 log_onexit cleanup_pool $POOL2
 
 typeset sendfs=$POOL2/fs
+typeset streamfs=$POOL2/fs2
+typeset recvfs=$POOL2/fs3
 
 log_must zfs create -o compress=lz4 $sendfs
+log_must zfs create -o compress=lz4 $streamfs
 typeset dir=$(get_prop mountpoint $sendfs)
 write_compressible $dir 16m
 log_must zfs snapshot $sendfs@full
 
 log_must eval "zfs send -c $sendfs@full >$BACKDIR/full"
 log_must stream_has_features $BACKDIR/full lz4 compressed
-cat $BACKDIR/full | zstreamdump -v > $BACKDIR/dump.out
+cat $BACKDIR/full | zstream dump -v > $BACKDIR/dump.out
 
 lsize=$(awk '/^WRITE [^0]/ {lsize += $24} END {printf("%d", lsize)}' \
     $BACKDIR/dump.out)
@@ -56,4 +63,13 @@
 within_percent $csize $csize_prop 90 || log_fail \
     "$csize and $csize_prop differed by too much"
 
-log_pass "zstreamdump correctly interprets compressed send streams."
+x=$(get_resume_token "zfs send -c $sendfs@full" $streamfs $recvfs)
+resume_token=$(cat /$streamfs/resume_token)
+to_name_fs=$sendfs
+log_must eval "zstream token $resume_token | grep $to_name_fs"
+
+bad_resume_token="1-1162e8285b-100789c6360"
+log_mustnot eval "zstream token $bad_resume_token 2>&1"
+log_mustnot eval "zstream token 2>&1"
+
+log_pass "zstream dump correctly interprets compressed send streams."

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-cpL_varied_recsize.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-cpL_varied_recsize.ksh
index 84c0a5e..25ad8e0 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-cpL_varied_recsize.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-cpL_varied_recsize.ksh

@@ -55,8 +55,8 @@
 
 function cleanup
 {
-	datasetexists $TESTPOOL/128k && log_must_busy zfs destroy $TESTPOOL/128k
-	datasetexists $TESTPOOL/1m && log_must_busy zfs destroy $TESTPOOL/1m
+	datasetexists $TESTPOOL/128k && destroy_dataset $TESTPOOL/128k
+	datasetexists $TESTPOOL/1m && destroy_dataset $TESTPOOL/1m
 	cleanup_pool $POOL2
 	destroy_pool $POOL3
 }
@@ -72,8 +72,12 @@
 	[[ -f $file ]] || log_fail "file '$file' doesn't exist"
 
 	typeset read_recsize=$(get_prop recsize $recv_ds)
-	typeset read_file_bs=$(stat $file | sed -n \
-	    's/.*IO Block: \([0-9]*\).*/\1/p')
+	if is_freebsd; then
+		typeset read_file_bs=$(stat -f "%k" $file)
+	else
+		typeset read_file_bs=$(stat $file | sed -n \
+		    's/.*IO Block: \([0-9]*\).*/\1/p')
+	fi
 
 	[[ $read_recsize = $expected_recsize ]] || log_fail \
 	    "read_recsize: $read_recsize expected_recsize: $expected_recsize"
@@ -130,7 +134,7 @@
 	[[ -f $stream ]] && log_must rm $stream
 	log_must eval "zfs send $flags $send_snap >$stream"
 	$verify eval "zfs recv $recv_ds <$stream"
-	typeset stream_size=$(cat $stream | zstreamdump | sed -n \
+	typeset stream_size=$(cat $stream | zstream dump | sed -n \
 	    's/	Total write size = \(.*\) (0x.*)/\1/p')
 
 	#

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh
deleted file mode 100755
index 0a7ae74..0000000
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh
+++ /dev/null

@@ -1,93 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright (c) 2018 by Datto Inc. All rights reserved.
-#
-
-. $STF_SUITE/tests/functional/rsend/rsend.kshlib
-
-#
-# DESCRIPTION:
-# Verify that zvols with dedup=on and encryption=on can be sent and received
-# with a deduplicated raw send stream.
-#
-# STRATEGY:
-# 1. Create a zvol with dedup and encryption on and put a filesystem on it
-# 2. Copy a file into the zvol a few times and take a snapshot
-# 3. Repeat step 2 a few times to create more snapshots
-# 4. Send all snapshots in a recursive, raw, deduplicated send stream
-# 5. Mount the received zvol and verify that all of the data there is correct
-#
-
-verify_runnable "both"
-
-function cleanup
-{
-	ismounted $recvmnt ext4 && log_must umount $recvmnt
-	ismounted $mntpnt ext4 && log_must umount $mntpnt
-	[[ -d $recvmnt ]] && log_must rm -rf $keyfile
-	[[ -d $mntpnt ]] && log_must rm -rf $keyfile
-	destroy_dataset $TESTPOOL/recv "-r"
-	destroy_dataset $TESTPOOL/$TESTVOL "-r"
-	[[ -f $keyfile ]] && log_must rm $keyfile
-	[[ -f $sendfile ]] && log_must rm $sendfile
-}
-log_onexit cleanup
-
-log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams"
-
-typeset keyfile=/$TESTPOOL/pkey
-typeset snap_count=5
-typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
-typeset mntpnt=$TESTDIR/$TESTVOL
-typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
-typeset recvmnt=$TESTDIR/recvmnt
-typeset sendfile=$TESTDIR/sendfile
-
-log_must eval "echo 'password' > $keyfile"
-
-log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \
-	-o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL
-block_device_wait
-
-log_must eval "echo 'y' | newfs -t ext4 -v $zdev"
-log_must mkdir -p $mntpnt
-log_must mkdir -p $recvmnt
-log_must mount $zdev $mntpnt
-
-for ((i = 1; i <= $snap_count; i++)); do
-	log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1
-	for ((j = 0; j < 10; j++)); do
-		log_must cp $mntpnt/file $mntpnt/file$j
-	done
-
-	log_must sync
-	log_must zfs snap $TESTPOOL/$TESTVOL@snap$i
-done
-
-log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile"
-log_must eval "zfs recv $TESTPOOL/recv < $sendfile"
-log_must zfs load-key $TESTPOOL/recv
-block_device_wait
-
-log_must mount $recvdev $recvmnt
-
-md5_1=$(cat $mntpnt/* | md5digest)
-md5_2=$(cat $recvmnt/* | md5digest)
-[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2"
-
-log_pass "zfs can receive raw, recursive, and deduplicated send streams"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh
new file mode 100755
index 0000000..b95fc3d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send-wR_encrypted_zvol.ksh

@@ -0,0 +1,108 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# DESCRIPTION:
+# Verify that zvols with encryption=on can be sent and received with a raw
+# send stream.
+#
+# STRATEGY:
+# 1. Create a zvol with encryption on and put a filesystem on it
+# 2. Copy a file into the zvol a few times and take a snapshot
+# 3. Repeat step 2 a few times to create more snapshots
+# 4. Send all snapshots in a recursive, raw send stream
+# 5. Mount the received zvol and verify that all of the data there is correct
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	ismounted $recvmnt $fstype && log_must umount $recvmnt
+	ismounted $mntpnt $fstype && log_must umount $mntpnt
+	[[ -d $recvmnt ]] && log_must rm -rf $keyfile
+	[[ -d $mntpnt ]] && log_must rm -rf $keyfile
+	destroy_dataset $TESTPOOL/recv "-r"
+	destroy_dataset $TESTPOOL/$TESTVOL "-r"
+	[[ -f $keyfile ]] && log_must rm $keyfile
+	[[ -f $sendfile ]] && log_must rm $sendfile
+}
+log_onexit cleanup
+
+log_assert "Verify zfs can receive raw, recursive send streams"
+
+typeset keyfile=/$TESTPOOL/pkey
+typeset snap_count=5
+typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
+typeset mntpnt=$TESTDIR/$TESTVOL
+typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
+typeset recvmnt=$TESTDIR/recvmnt
+typeset sendfile=$TESTDIR/sendfile
+typeset fstype=none
+
+log_must eval "echo 'password' > $keyfile"
+
+log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \
+	-o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL
+block_device_wait
+
+if is_linux; then
+	# ext4 only supported on Linux
+	log_must new_fs -t ext4 $zdev
+	fstype=ext4
+	typeset remount_ro="-o remount,ro"
+	typeset remount_rw="-o remount,rw"
+else
+	log_must new_fs $zdev
+	fstype=$NEWFS_DEFAULT_FS
+	typeset remount_ro="-ur"
+	typeset remount_rw="-uw"
+fi
+log_must mkdir -p $mntpnt
+log_must mkdir -p $recvmnt
+log_must mount $zdev $mntpnt
+
+for ((i = 1; i <= $snap_count; i++)); do
+	log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1
+	for ((j = 0; j < 10; j++)); do
+		log_must cp $mntpnt/file $mntpnt/file$j
+	done
+
+	log_must sync
+	log_must mount $remount_ro $zdev $mntpnt
+	log_must zfs snap $TESTPOOL/$TESTVOL@snap$i
+	log_must mount $remount_rw $zdev $mntpnt
+done
+
+log_must eval "zfs send -wR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile"
+log_must eval "zfs recv $TESTPOOL/recv < $sendfile"
+log_must zfs load-key $TESTPOOL/recv
+block_device_wait
+
+log_must mount $recvdev $recvmnt
+
+md5_1=$(cat $mntpnt/* | md5digest)
+md5_2=$(cat $recvmnt/* | md5digest)
+[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2"
+
+log_pass "zfs can receive raw, recursive send streams"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_doall.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_doall.ksh
new file mode 100755
index 0000000..e5c3490
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_doall.ksh

@@ -0,0 +1,67 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify send_doall stream is properly received
+#
+# Strategy:
+# 1) Create a set of snapshots.
+# 2) Send these snapshots (from origin to the last one) to a file using send_doall.
+# 3) Receive the file to newfs to test if the stream is properly handled.
+#
+
+verify_runnable "both"
+
+log_assert "Verify send_doall stream is correct"
+
+function cleanup
+{
+	rm -f $BACKDIR/fs@*
+	destroy_dataset $POOL/fs "-rR"
+	destroy_dataset $POOL/newfs "-rR"
+}
+
+log_onexit cleanup
+
+log_must zfs create $POOL/fs
+log_must zfs create $POOL/fs/child
+
+# Create 3 files and a snapshot between each file creation.
+for i in {1..3}; do
+	file="/$POOL/fs/file$i"
+	log_must mkfile 16384 $file
+
+	file="/$POOL/fs/child/file$i"
+	log_must mkfile 16384 $file
+
+	log_must zfs snapshot -r $POOL/fs@snap$i
+done
+
+# Snapshot the pool and send it to the new dataset.
+log_must eval "send_doall $POOL/fs@snap3 >$BACKDIR/fs@snap3"
+log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap3"
+
+zfs list $POOL/newfs/child
+if [[ $? -eq 0 ]]; then
+	log_fail "Children dataset should not have been received"
+fi
+
+log_pass "Verify send_doall stream is correct"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
index 6288178..661fbe8 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh

@@ -46,9 +46,9 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 	datasetexists $TESTPOOL/recv && \
-		log_must zfs destroy -r $TESTPOOL/recv
+		destroy_dataset $TESTPOOL/recv -r
 	[[ -f $keyfile ]] && log_must rm $keyfile
 	[[ -f $sendfile ]] && log_must rm $sendfile
 }
@@ -84,11 +84,11 @@
 log_must zfs set xattr=sa $TESTPOOL/$TESTFS2
 log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir
 
-# ZoL issue #7432
+# OpenZFS issue #7432
 log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2
 log_must touch /$TESTPOOL/$TESTFS2/attrs
-log_must eval "python -c 'print \"a\" * 4096' | \
-	attr -s bigval /$TESTPOOL/$TESTFS2/attrs"
+log_must eval "python3 -c 'print \"a\" * 4096' | \
+	set_xattr_stdin bigval /$TESTPOOL/$TESTFS2/attrs"
 log_must zfs set compression=off xattr=on $TESTPOOL/$TESTFS2
 
 log_must zfs snapshot $TESTPOOL/$TESTFS2@snap1

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh
new file mode 100755
index 0000000..92451bd
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh

@@ -0,0 +1,87 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2023 by Findity AB
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify that receiving a raw encrypted stream, with a FREEOBJECTS
+# removing all existing objects in a block followed by an OBJECT write
+# to the same block, does not result in a panic.
+#
+# Strategy:
+# 1. Create a new encrypted filesystem
+# 2. Create file f1 as the first object in some block (here object 128)
+# 3. Take snapshot A
+# 4. Create file f2 as the second object in the same block (here object 129)
+# 5. Delete f1
+# 6. Take snapshot B
+# 7. Receive a full raw encrypted send of A
+# 8. Receive an incremental raw send of B
+#
+verify_runnable "both"
+
+function create_object_with_num
+{
+	file=$1
+	num=$2
+
+	tries=100
+	for ((i=0; i<$tries; i++)); do
+		touch $file
+		onum=$(ls -li $file | awk '{print $1}')
+
+		if [[ $onum -ne $num ]] ; then
+			rm -f $file
+		else
+			break
+		fi
+	done
+	if [[ $i -eq $tries ]]; then
+		log_fail "Failed to create object with number $num"
+	fi
+}
+
+log_assert "FREEOBJECTS followed by OBJECT in encrypted stream does not crash"
+
+sendds=sendencfods
+recvds=recvencfods
+keyfile=/$POOL/keyencfods
+f1=/$POOL/$sendds/f1
+f2=/$POOL/$sendds/f2
+
+log_must eval "echo 'password' > $keyfile"
+
+#
+# xattr=sa and dnodesize=legacy for sequential object numbers, see
+# note in send_freeobjects.ksh.
+#
+log_must zfs create -o xattr=sa -o dnodesize=legacy -o encryption=on \
+	-o keyformat=passphrase -o keylocation=file://$keyfile $POOL/$sendds
+
+create_object_with_num $f1 128
+log_must zfs snap $POOL/$sendds@A
+create_object_with_num $f2 129
+log_must rm $f1
+log_must zfs snap $POOL/$sendds@B
+
+log_must eval "zfs send -w $POOL/$sendds@A | zfs recv $POOL/$recvds"
+log_must eval "zfs send -w -i $POOL/$sendds@A $POOL/$sendds@B |" \
+	"zfs recv $POOL/$recvds"
+
+log_pass "FREEOBJECTS followed by OBJECT in encrypted stream did not crash"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_incremental.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_incremental.ksh
new file mode 100755
index 0000000..491ad55
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_incremental.ksh

@@ -0,0 +1,93 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Incrementally receiving a snapshot to an encrypted filesystem succeeds.
+#
+# Strategy:
+# 1) Create a pool and an encrypted fs
+# 2) Create some files and snapshots
+# 3) Send the first snapshot to a second encrypted as well as an
+#	unencrypted fs.
+# 4) Incrementally send the second snapshot to the unencrypted fs.
+# 5) Rollback the second encrypted fs to the first snapshot.
+# 6) Incrementally send the second snapshot from the unencrypted to the
+#	second encrypted fs.
+# 7) Incrementally send the third snapshot from the first encrypted to the
+#	unencrypted fs.
+# 8) Incrementally send the third snapshot from the unencrypted to the second
+#	encrypted fs.
+#
+
+verify_runnable "both"
+
+log_assert "Incrementally receiving a snapshot to an encrypted filesystem succeeds"
+
+function cleanup
+{
+	destroy_pool pool_lb
+	log_must rm -f $TESTDIR/vdev_a
+}
+
+log_onexit cleanup
+
+typeset passphrase="password"
+typeset passphrase2="password2"
+
+typeset file="/pool_lb/encryptme/$TESTFILE0"
+typeset file1="/pool_lb/encryptme/$TESTFILE1"
+typeset file2="/pool_lb/encryptme/$TESTFILE2"
+
+# Create pool
+truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
+log_must zpool create -f pool_lb $TESTDIR/vdev_a
+log_must eval "echo $passphrase > /pool_lb/pwd"
+log_must eval "echo $passphrase2 > /pool_lb/pwd2"
+
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+	-o keylocation=file:///pool_lb/pwd pool_lb/encryptme
+log_must dd if=/dev/urandom of=$file bs=1024 count=1024
+log_must zfs snapshot pool_lb/encryptme@snap1
+log_must dd if=/dev/urandom of=$file1 bs=1024 count=1024
+log_must zfs snapshot pool_lb/encryptme@snap2
+log_must dd if=/dev/urandom of=$file2 bs=1024 count=1024
+log_must zfs snapshot pool_lb/encryptme@snap3
+log_must eval "zfs send -Lc pool_lb/encryptme@snap1 | zfs recv \
+	-o encryption=on -o keyformat=passphrase -o keylocation=file:///pool_lb/pwd2 \
+	pool_lb/encrypttwo"
+log_must eval "zfs send -Lc pool_lb/encryptme@snap1 | zfs recv \
+	pool_lb/unencryptme"
+log_must eval "zfs send -Lc -i pool_lb/encryptme@{snap1,snap2} | zfs recv \
+	pool_lb/unencryptme"
+log_must zfs rollback pool_lb/encrypttwo@snap1
+log_must eval "zfs send -Lc -i pool_lb/unencryptme@{snap1,snap2} | zfs recv \
+	pool_lb/encrypttwo"
+log_must eval "zfs send -Lc -i pool_lb/encryptme@{snap2,snap3} | zfs recv \
+	pool_lb/unencryptme"
+log_must eval "zfs send -Lc -i pool_lb/unencryptme@{snap2,snap3} | zfs recv \
+	-F pool_lb/encrypttwo"
+
+log_pass "Incrementally receiving a snapshot to an encrypted filesystem succeeds"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
index 793904d..c0c7b68 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh

@@ -133,6 +133,14 @@
 log_must test "$recv_cksum" == "$cksum"
 log_must zfs destroy -r $ds
 
+# Test that we can override sharesmb property for encrypted raw stream.
+log_note "Must be able to override sharesmb property for encrypted raw stream"
+ds=$TESTPOOL/recv
+log_must eval "zfs send -w $esnap > $sendfile"
+log_must eval "zfs recv -o sharesmb=on $ds < $sendfile"
+log_must test "$(get_prop 'sharesmb' $ds)" == "on"
+log_must zfs destroy -r $ds
+
 # Test that we can override encryption properties on a properties stream
 # of an unencrypted dataset, turning it into an encryption root.
 log_note "Must be able to receive stream with props as encryption root"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh
index d701bce..5760bf9 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh

@@ -42,9 +42,9 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$TESTFS2 && \
-		log_must zfs destroy -r $TESTPOOL/$TESTFS2
+		destroy_dataset $TESTPOOL/$TESTFS2 -r
 	datasetexists $TESTPOOL/recv && \
-		log_must zfs destroy -r $TESTPOOL/recv
+		destroy_dataset $TESTPOOL/recv -r
 	[[ -f $keyfile ]] && log_must rm $keyfile
 	[[ -f $sendfile ]] && log_must rm $sendfile
 }
@@ -52,8 +52,16 @@
 
 function recursive_cksum
 {
-	find $1 -type f -exec sha256sum {} \; | \
-		sort -k 2 | awk '{ print $1 }' | sha256sum
+	case "$(uname)" in
+	FreeBSD)
+		find $1 -type f -exec sha256 -q {} \; | \
+		    sort | sha256digest
+		;;
+	*)
+		find $1 -type f -exec sha256sum {} \; | \
+		    sort -k 2 | awk '{ print $1 }' | sha256digest
+		;;
+	esac
 }
 
 log_assert "Verify 'zfs send -w' works with many different file layouts"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_freeobjects.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_freeobjects.ksh
index 6533352..925f667 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_freeobjects.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_freeobjects.ksh

@@ -21,7 +21,7 @@
 #
 # Description:
 # Verify FREEOBJECTS record frees sequential objects (See
-# https://github.com/zfsonlinux/zfs/issues/6694)
+# https://github.com/openzfs/zfs/issues/6694)
 #
 # Strategy:
 # 1. Create three files with sequential object numbers, f1 f2 and f3

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh
index c2b5ff7..1dfa97e 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_hole_birth.ksh

@@ -53,7 +53,7 @@
 {
 	cleanup_pool $sendpool
 	cleanup_pool $recvpool
-	set_tunable64 send_holes_without_birth_time 1
+	set_tunable64 SEND_HOLES_WITHOUT_BIRTH_TIME 1
 }
 
 function send_and_verify
@@ -72,7 +72,7 @@
 # to be re-enabled for this test case to verify correctness.  Once we're
 # comfortable that all hole_birth bugs has been resolved this behavior may
 # be re-enabled by default.
-log_must set_tunable64 send_holes_without_birth_time 0
+log_must set_tunable64 SEND_HOLES_WITHOUT_BIRTH_TIME 0
 
 # Incremental send truncating the file and adding new data.
 log_must zfs create -o recordsize=4k $sendfs
@@ -81,7 +81,7 @@
 log_must dd if=/dev/urandom of=/$sendfs/file1 bs=4k count=11264 seek=1152
 log_must zfs snapshot $sendfs@snap1
 
-log_must truncate -s 4194304 /$sendfs/file1
+log_must truncate -s 4M /$sendfs/file1
 log_must dd if=/dev/urandom of=/$sendfs/file1 bs=4k count=152 seek=384 \
     conv=notrunc
 log_must dd if=/dev/urandom of=/$sendfs/file1 bs=4k count=10 seek=1408 \

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_invalid.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_invalid.ksh
new file mode 100755
index 0000000..2ce7ee4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_invalid.ksh

@@ -0,0 +1,52 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version a.0.
+# You may only use this file in accordance with the terms of version
+# a.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Portions Copyright 2020 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify that send with invalid options will fail gracefully.
+#
+# Strategy:
+# 1. Perform zfs send on the cli with the order of the snapshots reversed
+# 2. Perform zfs send using libzfs with the order of the snapshots reversed
+#
+
+verify_runnable "both"
+
+log_assert "Verify that send with invalid options will fail gracefully."
+
+function cleanup
+{
+	datasetexists $testfs && destroy_dataset $testfs -r
+}
+log_onexit cleanup
+
+testfs=$POOL/fs
+
+log_must zfs create $testfs
+log_must zfs snap $testfs@snap0
+log_must zfs snap $testfs@snap1
+
+# Test bad send with the CLI
+log_mustnot eval "zfs send -i $testfs@snap1 $testfs@snap0 >$TEST_BASE_DIR/devnull"
+
+# Test bad send with libzfs/libzfs_core
+log_must badsend $testfs@snap0 $testfs@snap1
+
+log_pass "Send with invalid options fails gracefully."

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh
index eea535a..59b08cc 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh

@@ -49,11 +49,11 @@
 function cleanup
 {
     datasetexists $TESTPOOL/$TESTFS3 && \
-        log_must zfs destroy -r $TESTPOOL/$TESTFS3
+        destroy_dataset $TESTPOOL/$TESTFS3 -r
     datasetexists $TESTPOOL/$TESTFS2 && \
-        log_must zfs destroy -r $TESTPOOL/$TESTFS2
+        destroy_dataset $TESTPOOL/$TESTFS2 -r
     datasetexists $TESTPOOL/$TESTFS1 && \
-        log_must zfs destroy -r $TESTPOOL/$TESTFS1
+        destroy_dataset $TESTPOOL/$TESTFS1 -r
 }
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_partial_dataset.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_partial_dataset.ksh
new file mode 100755
index 0000000..c390327
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_partial_dataset.ksh

@@ -0,0 +1,110 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version a.0.
+# You may only use this file in accordance with the terms of version
+# a.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 Datto Inc.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify that a partially received dataset can be sent with
+# 'zfs send --saved'.
+#
+# Strategy:
+# 1. Setup a pool with partially received filesystem
+# 2. Perform saved send without incremental
+# 3. Perform saved send with incremental
+# 4. Perform saved send with incremental, resuming from a token
+# 5. Perform negative tests for invalid command inputs
+#
+
+verify_runnable "both"
+
+log_assert "Verify that a partially received dataset can be sent with " \
+	"'zfs send --saved'."
+
+function cleanup
+{
+	destroy_dataset $POOL/testfs2 "-r"
+	destroy_dataset $POOL/stream "-r"
+	destroy_dataset $POOL/recvfs "-r"
+	destroy_dataset $POOL/partialfs "-r"
+}
+log_onexit cleanup
+
+log_must zfs create $POOL/testfs2
+log_must zfs create $POOL/stream
+mntpnt=$(get_prop mountpoint $POOL/testfs2)
+
+# Setup a pool with partially received filesystems
+log_must mkfile 1m $mntpnt/filea
+log_must zfs snap $POOL/testfs2@a
+log_must mkfile 1m $mntpnt/fileb
+log_must zfs snap $POOL/testfs2@b
+log_must eval "zfs send $POOL/testfs2@a | zfs recv $POOL/recvfs"
+log_must eval "zfs send -i $POOL/testfs2@a $POOL/testfs2@b > " \
+	"/$POOL/stream/inc.send"
+log_must eval "zfs send $POOL/testfs2@b > /$POOL/stream/full.send"
+mess_send_file /$POOL/stream/full.send
+mess_send_file /$POOL/stream/inc.send
+log_mustnot zfs recv -s $POOL/recvfullfs < /$POOL/stream/full.send
+log_mustnot zfs recv -s $POOL/recvfs < /$POOL/stream/inc.send
+
+# Perform saved send without incremental
+log_mustnot eval "zfs send --saved $POOL/recvfullfs | zfs recv -s " \
+	"$POOL/partialfs"
+token=$(zfs get -Hp -o value receive_resume_token $POOL/partialfs)
+log_must eval "zfs send -t $token | zfs recv -s $POOL/partialfs"
+file_check $POOL/recvfullfs $POOL/partialfs
+log_must zfs destroy -r $POOL/partialfs
+
+# Perform saved send with incremental
+log_must eval "zfs send $POOL/recvfs@a | zfs recv $POOL/partialfs"
+log_mustnot eval "zfs send --saved $POOL/recvfs | " \
+	"zfs recv -s $POOL/partialfs"
+token=$(zfs get -Hp -o value receive_resume_token $POOL/partialfs)
+log_must eval "zfs send -t $token | zfs recv -s $POOL/partialfs"
+file_check $POOL/recvfs $POOL/partialfs
+log_must zfs destroy -r $POOL/partialfs
+
+# Perform saved send with incremental, resuming from token
+log_must eval "zfs send $POOL/recvfs@a | zfs recv $POOL/partialfs"
+log_must eval "zfs send --saved $POOL/recvfs > " \
+	"/$POOL/stream/partial.send"
+mess_send_file /$POOL/stream/partial.send
+log_mustnot zfs recv -s $POOL/partialfs < /$POOL/stream/partial.send
+token=$(zfs get -Hp -o value receive_resume_token $POOL/partialfs)
+log_must eval "zfs send -t $token | zfs recv -s $POOL/partialfs"
+file_check $POOL/recvfs $POOL/partialfs
+
+# Perform negative tests for invalid command inputs
+set -A badargs \
+	"" \
+	"$POOL/recvfs@a" \
+	"-i $POOL/recvfs@a $POOL/recvfs@b" \
+	"-R $POOL/recvfs" \
+	"-p $POOL/recvfs" \
+	"-I $POOL/recvfs" \
+	"-h $POOL/recvfs"
+
+while (( i < ${#badargs[*]} ))
+do
+	log_mustnot eval "zfs send --saved ${badargs[i]} >$TEST_BASE_DIR/devnull"
+	(( i = i + 1 ))
+done
+
+log_pass "A partially received dataset can be sent with 'zfs send --saved'."

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
new file mode 100755
index 0000000..3cea334
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh

@@ -0,0 +1,193 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, Lawrence Livermore National Security, LLC.
+# Copyright (c) 2021, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/properties.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify encrypted raw sending to pools with greater ashift succeeds.
+#
+# Strategy:
+# 1) Create a set of files each containing some file data in an
+#	encrypted filesystem.
+# 2) Snapshot and raw send these files to a pool with greater ashift
+# 3) Verify that all the xattrs (and thus the spill block) were
+#    preserved when receiving the incremental stream.
+# 4) Repeat the test for a non-encrypted filesystem using raw send
+#
+
+verify_runnable "both"
+
+log_assert "Verify raw sending to pools with greater ashift succeeds"
+
+function cleanup
+{
+	rm -f $BACKDIR/fs@*
+	poolexists pool9 && destroy_pool pool9
+	poolexists pool12 && destroy_pool pool12
+	log_must rm -f $TESTDIR/vdev_a $TESTDIR/vdev_b
+}
+
+function xattr_test
+{
+	log_must zfs set xattr=sa pool9/$1
+	log_must zfs set dnodesize=legacy pool9/$1
+	log_must zfs set recordsize=128k pool9/$1
+	rand_set_prop pool9/$1 compression "${compress_prop_vals[@]}"
+
+	# Create 40 files each with a spill block containing xattrs.  Each file
+	# will be modified in a different way to validate the incremental receive.
+	for i in {1..40}; do
+		file="/pool9/$1/file$i"
+
+		log_must mkfile 16384 $file
+		for j in {1..20}; do
+			log_must set_xattr "testattr$j" "$attrvalue" $file
+		done
+	done
+
+	# Snapshot the pool and send it to the new dataset.
+	log_must zfs snapshot pool9/$1@snap1
+	log_must eval "zfs send -w pool9/$1@snap1 >$BACKDIR/$1@snap1"
+	log_must eval "zfs recv pool12/$1 < $BACKDIR/$1@snap1"
+
+	#
+	# Modify file[1-6]'s contents but not the spill blocks.
+	#
+	# file1 - Increase record size; single block
+	# file2 - Increase record size; multiple blocks
+	# file3 - Truncate file to zero size; single block
+	# file4 - Truncate file to smaller size; single block
+	# file5 - Truncate file to much larger size; add holes
+	# file6 - Truncate file to embedded size; embedded data
+	#
+	log_must mkfile 32768 /pool9/$1/file1
+	log_must mkfile 1048576 /pool9/$1/file2
+	log_must truncate -s 0 /pool9/$1/file3
+	log_must truncate -s 8192 /pool9/$1/file4
+	log_must truncate -s 1073741824 /pool9/$1/file5
+	log_must truncate -s 50 /pool9/$1/file6
+
+	#
+	# Modify file[11-16]'s contents and their spill blocks.
+	#
+	# file11 - Increase record size; single block
+	# file12 - Increase record size; multiple blocks
+	# file13 - Truncate file to zero size; single block
+	# file14 - Truncate file to smaller size; single block
+	# file15 - Truncate file to much larger size; add holes
+	# file16 - Truncate file to embedded size; embedded data
+	#
+	log_must mkfile 32768 /pool9/$1/file11
+	log_must mkfile 1048576 /pool9/$1/file12
+	log_must truncate -s 0 /pool9/$1/file13
+	log_must truncate -s 8192 /pool9/$1/file14
+	log_must truncate -s 1073741824 /pool9/$1/file15
+	log_must truncate -s 50 /pool9/$1/file16
+
+	for i in {11..20}; do
+		log_must rm_xattr testattr1 /pool9/$1/file$i
+	done
+
+	#
+	# Modify file[21-26]'s contents and remove their spill blocks.
+	#
+	# file21 - Increase record size; single block
+	# file22 - Increase record size; multiple blocks
+	# file23 - Truncate file to zero size; single block
+	# file24 - Truncate file to smaller size; single block
+	# file25 - Truncate file to much larger size; add holes
+	# file26 - Truncate file to embedded size; embedded data
+	#
+	log_must mkfile 32768 /pool9/$1/file21
+	log_must mkfile 1048576 /pool9/$1/file22
+	log_must truncate -s 0 /pool9/$1/file23
+	log_must truncate -s 8192 /pool9/$1/file24
+	log_must truncate -s 1073741824 /pool9/$1/file25
+	log_must truncate -s 50 /pool9/$1/file26
+
+	for i in {21..30}; do
+		for j in {1..20}; do
+			log_must rm_xattr testattr$j /pool9/$1/file$i
+		done
+	done
+
+	#
+	# Modify file[31-40]'s spill blocks but not the file contents.
+	#
+	for i in {31..40}; do
+		file="/pool9/$1/file$i"
+		log_must rm_xattr testattr$(((RANDOM % 20) + 1)) $file
+		log_must set_xattr testattr$(((RANDOM % 20) + 1)) "$attrvalue" $file
+	done
+
+	# Snapshot the pool and send the incremental snapshot.
+	log_must zfs snapshot pool9/$1@snap2
+	log_must eval "zfs send -w -i pool9/$1@snap1 pool9/$1@snap2 >$BACKDIR/$1@snap2"
+	log_must eval "zfs recv pool12/$1 < $BACKDIR/$1@snap2"
+}
+
+attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
+
+log_onexit cleanup
+
+# Create pools
+truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
+truncate -s $MINVDEVSIZE $TESTDIR/vdev_b
+log_must zpool create -f -o ashift=9 pool9 $TESTDIR/vdev_a
+log_must zpool create -f -o ashift=12 pool12 $TESTDIR/vdev_b
+
+# Create encrypted fs
+log_must eval "echo 'password' | zfs create -o encryption=on" \
+	"-o keyformat=passphrase -o keylocation=prompt " \
+	"pool9/encfs"
+
+# Run xattr tests for encrypted fs
+xattr_test encfs
+
+# Calculate the expected recursive checksum for source encrypted fs
+expected_cksum=$(recursive_cksum /pool9/encfs)
+
+# Mount target encrypted fs
+log_must eval "echo 'password' | zfs load-key pool12/encfs"
+log_must zfs mount pool12/encfs
+
+# Validate the received copy using the received recursive checksum
+actual_cksum=$(recursive_cksum /pool12/encfs)
+if [[ "$expected_cksum" != "$actual_cksum" ]]; then
+	log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
+fi
+
+# Perform the same test but without encryption (send -w)
+log_must zfs create pool9/fs
+
+# Run xattr tests for non-encrypted fs
+xattr_test fs
+
+# Calculate the expected recursive checksum for source non-encrypted fs
+expected_cksum=$(recursive_cksum /pool9/fs)
+
+# Validate the received copy using the received recursive checksum
+actual_cksum=$(recursive_cksum /pool12/fs)
+if [[ "$expected_cksum" != "$actual_cksum" ]]; then
+	log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
+fi
+
+log_pass "Verify raw sending to pools with greater ashift succeeds"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh
new file mode 100755
index 0000000..48a76fc
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_large_blocks.ksh

@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Receiving a snapshot with large blocks and raw sending it succeeds.
+#
+# Strategy:
+# 1) Create a set of files each containing some file data in an
+#	encrypted filesystem with recordsize=1m.
+# 2) Snapshot and send with large_blocks enabled to a new filesystem.
+# 3) Raw send to a file. If the large_blocks feature is not activated
+#	in the filesystem created in (2) the raw send will fail.
+#
+
+verify_runnable "both"
+
+log_assert "Receiving and raw sending a snapshot with large blocks succeeds"
+
+backup=$TEST_BASE_DIR/backup
+raw_backup=$TEST_BASE_DIR/raw_backup
+
+function cleanup
+{
+	log_must rm -f $backup $raw_backup $ibackup $unc_backup
+	destroy_pool pool_lb
+	log_must rm -f $TESTDIR/vdev_a
+}
+
+log_onexit cleanup
+
+typeset passphrase="password"
+typeset file="/pool_lb/fs/$TESTFILE0"
+
+# Create pool
+truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
+log_must zpool create -f -o feature@large_blocks=enabled pool_lb $TESTDIR/vdev_a
+
+log_must eval "echo $passphrase > /pool_lb/pwd"
+
+log_must zfs create -o recordsize=1m pool_lb/fs
+log_must dd if=/dev/urandom of=$file bs=1024 count=1024
+log_must zfs snapshot pool_lb/fs@snap1
+
+log_must eval "zfs send -L pool_lb/fs@snap1 > $backup"
+log_must eval "zfs recv -o encryption=aes-256-ccm -o keyformat=passphrase \
+    -o keylocation=file:///pool_lb/pwd -o primarycache=none \
+    -o recordsize=1m pool_lb/testfs5 < $backup"
+
+log_must eval "zfs send --raw pool_lb/testfs5@snap1 > $raw_backup"
+
+log_pass "Receiving and raw sending a snapshot with large blocks succeeds"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_spill_block.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_spill_block.ksh
new file mode 100755
index 0000000..8d7451a
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_raw_spill_block.ksh

@@ -0,0 +1,161 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019, Lawrence Livermore National Security, LLC.
+# Copyright (c) 2021, George Amanakis. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify spill blocks are correctly preserved in raw sends.
+#
+# Strategy:
+# 1) Create a set of files each containing some file data in an
+#	encrypted filesystem.
+# 2) Add enough xattrs to the file to require a spill block.
+# 3) Snapshot and raw send these files to a new dataset.
+# 4) Modify the files and spill blocks in a variety of ways.
+# 5) Send the changes using an raw incremental send stream.
+# 6) Verify that all the xattrs (and thus the spill block) were
+#    preserved when receiving the incremental stream.
+#
+
+verify_runnable "both"
+
+log_assert "Verify spill blocks are correctly preserved in raw sends"
+
+function cleanup
+{
+	rm -f $BACKDIR/fs@*
+	destroy_dataset $POOL/fs "-rR"
+	destroy_dataset $POOL/newfs "-rR"
+}
+
+attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
+
+log_onexit cleanup
+
+log_must eval "echo 'password' | zfs create -o encryption=on" \
+	"-o keyformat=passphrase -o keylocation=prompt " \
+	"$POOL/fs"
+log_must zfs set xattr=sa $POOL/fs
+log_must zfs set dnodesize=legacy $POOL/fs
+log_must zfs set recordsize=128k $POOL/fs
+
+# Create 40 files each with a spill block containing xattrs.  Each file
+# will be modified in a different way to validate the incremental receive.
+for i in {1..40}; do
+	file="/$POOL/fs/file$i"
+
+	log_must mkfile 16384 $file
+	for j in {1..20}; do
+		log_must set_xattr "testattr$j" "$attrvalue" $file
+	done
+done
+
+# Snapshot the pool and send it to the new dataset.
+log_must zfs snapshot $POOL/fs@snap1
+log_must eval "zfs send -w $POOL/fs@snap1 >$BACKDIR/fs@snap1"
+log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap1"
+
+#
+# Modify file[1-6]'s contents but not the spill blocks.
+#
+# file1 - Increase record size; single block
+# file2 - Increase record size; multiple blocks
+# file3 - Truncate file to zero size; single block
+# file4 - Truncate file to smaller size; single block
+# file5 - Truncate file to much larger size; add holes
+# file6 - Truncate file to embedded size; embedded data
+#
+log_must mkfile 32768 /$POOL/fs/file1
+log_must mkfile 1048576 /$POOL/fs/file2
+log_must truncate -s 0 /$POOL/fs/file3
+log_must truncate -s 8192 /$POOL/fs/file4
+log_must truncate -s 1073741824 /$POOL/fs/file5
+log_must truncate -s 50 /$POOL/fs/file6
+
+#
+# Modify file[11-16]'s contents and their spill blocks.
+#
+# file11 - Increase record size; single block
+# file12 - Increase record size; multiple blocks
+# file13 - Truncate file to zero size; single block
+# file14 - Truncate file to smaller size; single block
+# file15 - Truncate file to much larger size; add holes
+# file16 - Truncate file to embedded size; embedded data
+#
+log_must mkfile 32768 /$POOL/fs/file11
+log_must mkfile 1048576 /$POOL/fs/file12
+log_must truncate -s 0 /$POOL/fs/file13
+log_must truncate -s 8192 /$POOL/fs/file14
+log_must truncate -s 1073741824 /$POOL/fs/file15
+log_must truncate -s 50 /$POOL/fs/file16
+
+for i in {11..20}; do
+	log_must rm_xattr testattr1 /$POOL/fs/file$i
+done
+
+#
+# Modify file[21-26]'s contents and remove their spill blocks.
+#
+# file21 - Increase record size; single block
+# file22 - Increase record size; multiple blocks
+# file23 - Truncate file to zero size; single block
+# file24 - Truncate file to smaller size; single block
+# file25 - Truncate file to much larger size; add holes
+# file26 - Truncate file to embedded size; embedded data
+#
+log_must mkfile 32768 /$POOL/fs/file21
+log_must mkfile 1048576 /$POOL/fs/file22
+log_must truncate -s 0 /$POOL/fs/file23
+log_must truncate -s 8192 /$POOL/fs/file24
+log_must truncate -s 1073741824 /$POOL/fs/file25
+log_must truncate -s 50 /$POOL/fs/file26
+
+for i in {21..30}; do
+	for j in {1..20}; do
+		log_must rm_xattr testattr$j /$POOL/fs/file$i
+	done
+done
+
+#
+# Modify file[31-40]'s spill blocks but not the file contents.
+#
+for i in {31..40}; do
+	file="/$POOL/fs/file$i"
+	log_must rm_xattr testattr$(((RANDOM % 20) + 1)) $file
+	log_must set_xattr testattr$(((RANDOM % 20) + 1)) "$attrvalue" $file
+done
+
+# Calculate the expected recursive checksum for the source.
+expected_cksum=$(recursive_cksum /$POOL/fs)
+
+# Snapshot the pool and send the incremental snapshot.
+log_must zfs snapshot $POOL/fs@snap2
+log_must eval "zfs send -w -i $POOL/fs@snap1 $POOL/fs@snap2 >$BACKDIR/fs@snap2"
+log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap2"
+log_must eval "echo 'password' | zfs load-key $POOL/newfs"
+log_must zfs mount $POOL/newfs
+
+# Validate the received copy using the received recursive checksum.
+actual_cksum=$(recursive_cksum /$POOL/newfs)
+if [[ "$expected_cksum" != "$actual_cksum" ]]; then
+	log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
+fi
+
+log_pass "Verify spill blocks are correctly preserved in raw sends"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
index 12a72fa..bd30488 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh

@@ -49,13 +49,8 @@
 	rm -f $BACKDIR/fs-dn-2k
 	rm -f $BACKDIR/fs-attr
 
-	if datasetexists $POOL/fs ; then
-		log_must zfs destroy -rR $POOL/fs
-	fi
-
-	if datasetexists $POOL/newfs ; then
-		log_must zfs destroy -rR $POOL/newfs
-	fi
+	datasetexists $POOL/fs && destroy_dataset $POOL/fs -rR
+	datasetexists $POOL/newfs && destroy_dataset $POOL/newfs -rR
 }
 
 log_onexit cleanup
@@ -93,7 +88,8 @@
 # 4. Create an empty file and add xattrs to it to exercise reclaiming a
 #    dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
 log_must zfs set compression=on xattr=sa $POOL/fs
-log_must eval "python -c 'print \"a\" * 512' | attr -s bigval /$POOL/fs/attrs"
+log_must eval "python3 -c 'print \"a\" * 512' |
+    set_xattr_stdin bigval /$POOL/fs/attrs"
 log_must zfs snapshot $POOL/fs@d
 
 # 5. Generate initial and incremental streams

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh
index 83a7978..361f6b3 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh

@@ -12,10 +12,12 @@
 #
 
 #
-# Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2019, Lawrence Livermore National Security LLC.
+# Use is subject to license terms.
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/properties.shlib
 . $STF_SUITE/tests/functional/rsend/rsend.kshlib
 
 #
@@ -65,12 +67,13 @@
 # Set atime=off to prevent the recursive_cksum from modifying newfs.
 log_must zfs set atime=off $POOL/newfs
 
-# Due to reduced performance on debug kernels use fewer files by default.
 if is_kmemleak; then
+	# Use fewer files and passes on debug kernels
+	# to avoid timeout due to reduced performance.
 	nr_files=100
 	passes=2
 else
-	nr_files=1000
+	nr_files=300
 	passes=3
 fi
 
@@ -78,7 +81,7 @@
 	# Randomly modify several dataset properties in order to generate
 	# more interesting incremental send streams.
 	rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
-	rand_set_prop $POOL/fs compression "off" "lzjb" "gzip" "lz4"
+	rand_set_prop $POOL/fs compression "${compress_prop_vals[@]}"
 	rand_set_prop $POOL/fs recordsize "32K" "128K"
 	rand_set_prop $POOL/fs dnodesize "legacy" "auto" "4k"
 	rand_set_prop $POOL/fs xattr "on" "sa"

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh
index 8e9db96..187a899 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh

@@ -59,12 +59,13 @@
 # Set atime=off to prevent the recursive_cksum from modifying newfs.
 log_must zfs set atime=off $POOL/newfs
 
-# Due to reduced performance on debug kernels use fewer files by default.
 if is_kmemleak; then
+	# Use fewer files and passes on debug kernels
+	# to avoid timeout due to reduced performance.
 	nr_files=100
 	passes=2
 else
-	nr_files=1000
+	nr_files=300
 	passes=3
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh b/zfs/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
index 9de732e..73f1648 100755
--- a/zfs/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh

@@ -59,7 +59,7 @@
 
 	log_must mkfile 16384 $file
 	for j in {1..20}; do
-		log_must attr -qs "testattr$j" -V "$attrvalue" $file
+		log_must set_xattr "testattr$j" "$attrvalue" $file
 	done
 done
 
@@ -103,7 +103,7 @@
 log_must truncate -s 50 /$POOL/fs/file16
 
 for i in {11..20}; do
-	log_must attr -qr testattr1 /$POOL/fs/file$i
+	log_must rm_xattr testattr1 /$POOL/fs/file$i
 done
 
 #
@@ -125,7 +125,7 @@
 
 for i in {21..30}; do
 	for j in {1..20}; do
-		log_must attr -qr testattr$j /$POOL/fs/file$i
+		log_must rm_xattr testattr$j /$POOL/fs/file$i
 	done
 done
 
@@ -134,8 +134,8 @@
 #
 for i in {31..40}; do
 	file="/$POOL/fs/file$i"
-	log_must attr -qr testattr$(((RANDOM % 20) + 1)) $file
-	log_must attr -qs testattr$(((RANDOM % 20) + 1)) -V "$attrvalue" $file
+	log_must rm_xattr testattr$(((RANDOM % 20) + 1)) $file
+	log_must set_xattr testattr$(((RANDOM % 20) + 1)) "$attrvalue" $file
 done
 
 # Calculate the expected recursive checksum for the source.

diff --git a/zfs/tests/zfs-tests/tests/functional/simd/Makefile.am b/zfs/tests/zfs-tests/tests/functional/simd/Makefile.am
new file mode 100644
index 0000000..bfc2886
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/simd/Makefile.am

@@ -0,0 +1,2 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/simd
+dist_pkgdata_SCRIPTS = simd_supported.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/simd/simd_supported.ksh b/zfs/tests/zfs-tests/tests/functional/simd/simd_supported.ksh
new file mode 100755
index 0000000..d88bc58
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/simd/simd_supported.ksh

@@ -0,0 +1,58 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Attila Fülöp <attila@fueloep.org>
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#       Make sure we have SIMD support, so it will not go away without notice
+#
+# STRATEGY:
+#	1. Test if we are running on a Linux x86 system with SSE support
+#       2. If so, check if the zfs_fletcher_4_impl module parameter contains
+#	   a sse implementation
+#	3. If not fail the test, otherwise pass it
+
+log_note "Testing if we support SIMD instructions (Linux x86 only)"
+
+if !is_linux; then
+    log_unsupported "Not a Linux System"
+fi
+
+case "$(uname -m)" in
+i386|i686|x86_64)
+	typeset -R modparam="/sys/module/zcommon/parameters/zfs_fletcher_4_impl"
+	if cat /proc/cpuinfo | awk '/^flags/ {print; exit;}' | grep -q sse; then
+		log_must grep -q sse "$modparam"
+		log_pass "SIMD instructions supported"
+	else
+		log_unsupported "No FPU present"
+	fi
+	;;
+*)
+	log_unsupported "Not a x86 CPU"
+	;;
+esac

diff --git a/zfs/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh
index d691706..89b3aeb 100755
--- a/zfs/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh

@@ -50,6 +50,8 @@
 	if [[ -n $lofidev ]]; then
 		if is_linux; then
 			losetup -d $lofidev
+		elif is_freebsd; then
+			mdconfig -du ${lofidev#md}
 		else
 			lofiadm -d $lofidev
 		fi
@@ -78,6 +80,8 @@
 	lofidev=$(losetup -f)
 	log_must losetup $lofidev ${LDEV2%% *}
 	lofidev=${lofidev##*/}
+elif is_freebsd; then
+	lofidev=$(mdconfig -a ${LDEV2%% *})
 else
 	lofidev=${LDEV2%% *}
 	log_must lofiadm -a $lofidev

diff --git a/zfs/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh b/zfs/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh
index e8ea29f..f8530a6 100755
--- a/zfs/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh

@@ -64,7 +64,7 @@
 		# Corrupt a pool device to make the pool DEGRADED
 		# The oseek value below is to skip past the vdev label.
 		#
-		if is_linux; then
+		if is_linux || is_freebsd; then
 			log_must dd if=/dev/urandom of=$VDIR/a bs=1024k \
 			   seek=4 conv=notrunc count=50
 		else

diff --git a/zfs/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh b/zfs/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh
index a53aeab..04fb225 100755
--- a/zfs/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh

@@ -40,17 +40,17 @@
 	#
 	wait
 
-	set_tunable64 zfs_commit_timeout_pct $ORIG_TIMEOUT
+	set_tunable64 COMMIT_TIMEOUT_PCT $ORIG_TIMEOUT
 
 	poolexists $TESTPOOL && zpool destroy -f $TESTPOOL
 }
 
-ORIG_TIMEOUT=$(get_tunable zfs_commit_timeout_pct | tail -1 | awk '{print $NF}')
+typeset ORIG_TIMEOUT=$(get_tunable COMMIT_TIMEOUT_PCT)
 log_onexit cleanup
 log_must setup
 
 for PCT in 0 1 2 4 8 16 32 64 128 256 512 1024; do
-	log_must set_tunable64 zfs_commit_timeout_pct $PCT
+	log_must set_tunable64 COMMIT_TIMEOUT_PCT $PCT
 
 	log_must zpool create $TESTPOOL $VDEV log $SDEV
 

diff --git a/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh
index 8954caa..0b78a09 100755
--- a/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh

@@ -108,12 +108,15 @@
 
 # Create a simple validation payload
 log_must mkdir -p $TESTDIR
-log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload \
+    oflag=sync bs=1k count=8
 typeset checksum=$(sha256digest /$TESTPOOL/$TESTFS/payload)
 
 # TX_WRITE (small file with ordering)
-log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file
-log_must mkfile 512b /$TESTPOOL/$TESTFS/small_file
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/small_file \
+    oflag=sync bs=1k count=1
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/small_file \
+    oflag=sync bs=512 count=1
 
 # TX_CREATE, TX_MKDIR, TX_REMOVE, TX_RMDIR
 log_must cp -R /usr/share/dict /$TESTPOOL/$TESTFS
@@ -122,7 +125,11 @@
 # TX_SETATTR
 log_must touch /$TESTPOOL/$TESTFS/setattr
 log_must chmod 567 /$TESTPOOL/$TESTFS/setattr
-log_must chgrp root /$TESTPOOL/$TESTFS/setattr
+if is_freebsd; then
+	log_must chgrp wheel /$TESTPOOL/$TESTFS/setattr
+else
+	log_must chgrp root /$TESTPOOL/$TESTFS/setattr
+fi
 log_must touch -cm -t 201311271200 /$TESTPOOL/$TESTFS/setattr
 
 # TX_TRUNCATE (to zero)
@@ -131,34 +138,39 @@
 
 # TX_WRITE (large file)
 log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \
-    bs=128k count=64 oflag=sync
+    oflag=sync bs=128k count=64
 
 # Write zeros, which compress to holes, in the middle of a file
-log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=8
-log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=2
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 \
+    oflag=sync bs=128k count=8
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.1 \
+    oflag=sync bs=128k count=2
 
-log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.2 bs=128k count=8
-log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.2 bs=128k count=2 seek=2
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.2 \
+    oflag=sync bs=128k count=8
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.2 \
+    oflag=sync bs=128k count=2 seek=2
 
-log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.3 bs=128k count=8
-log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.3 bs=128k count=2 \
-   seek=2 conv=notrunc
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.3 \
+    oflag=sync bs=128k count=8
+log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.3 \
+    oflag=sync bs=128k count=2 seek=2 conv=notrunc
 
 # TX_MKXATTR
 log_must mkdir /$TESTPOOL/$TESTFS/xattr.dir
-log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.dir
-log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.dir
-log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.dir
-
 log_must touch /$TESTPOOL/$TESTFS/xattr.file
-log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
-log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file
-log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file
+log_must set_xattr fileattr HelloWorld /$TESTPOOL/$TESTFS/xattr.dir
+log_must set_xattr tmpattr HelloWorld /$TESTPOOL/$TESTFS/xattr.dir
+log_must rm_xattr fileattr /$TESTPOOL/$TESTFS/xattr.dir
+
+log_must set_xattr fileattr HelloWorld /$TESTPOOL/$TESTFS/xattr.file
+log_must set_xattr tmpattr HelloWorld /$TESTPOOL/$TESTFS/xattr.file
+log_must rm_xattr tmpattr /$TESTPOOL/$TESTFS/xattr.file
 
 # TX_WRITE, TX_LINK, TX_REMOVE
 # Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed
-log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \
-   count=8
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink \
+    oflag=sync bs=128k count=8
 log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \
    /$TESTPOOL/$TESTFS/link_and_unlink.link
 log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link
@@ -197,8 +209,8 @@
 log_must zdb -bcv $TESTPOOL
 
 log_note "Verify copy of xattrs:"
-log_must attr -l /$TESTPOOL/$TESTFS/xattr.dir
-log_must attr -l /$TESTPOOL/$TESTFS/xattr.file
+log_must ls_xattr /$TESTPOOL/$TESTFS/xattr.dir
+log_must ls_xattr /$TESTPOOL/$TESTFS/xattr.file
 
 log_note "Verify working set diff:"
 log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy

diff --git a/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh
index f513d04..d39c6de 100755
--- a/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh

@@ -61,10 +61,11 @@
 
 VOLUME=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
 MNTPNT=$TESTDIR/$TESTVOL
+FSTYPE=none
 
 function cleanup_volume
 {
-	if ismounted $MNTPNT ext4; then
+	if ismounted $MNTPNT $FSTYPE; then
 		log_must umount $MNTPNT
 		rmdir $MNTPNT
 	fi
@@ -88,10 +89,19 @@
 log_must zfs set sync=always $TESTPOOL/$TESTVOL
 log_must mkdir -p $TESTDIR
 block_device_wait
-echo "y" | newfs -t ext4 -v $VOLUME
-log_must mkdir -p $MNTPNT
-log_must mount -o discard $VOLUME $MNTPNT
-log_must rmdir $MNTPNT/lost+found
+if is_linux; then
+	# ext4 only on Linux
+	log_must new_fs -t ext4 -v $VOLUME
+	log_must mkdir -p $MNTPNT
+	log_must mount -o discard $VOLUME $MNTPNT
+	FSTYPE=ext4
+	log_must rmdir $MNTPNT/lost+found
+else
+	log_must new_fs $VOLUME
+	log_must mkdir -p $MNTPNT
+	log_must mount $VOLUME $MNTPNT
+	FSTYPE=$NEWFS_DEFAULT_FS
+fi
 log_must zpool sync
 
 #
@@ -116,13 +126,15 @@
 log_must dd if=/dev/urandom of=$MNTPNT/holes bs=128k count=8
 log_must dd if=/dev/zero of=$MNTPNT/holes bs=128k count=2 seek=2 conv=notrunc
 
-# TX_TRUNCATE
-if fallocate --punch-hole 2>&1 | grep -q "unrecognized option"; then
-	log_note "fallocate(1) does not support --punch-hole"
-else
-	log_must dd if=/dev/urandom of=$MNTPNT/discard bs=128k count=16
-	log_must fallocate --punch-hole -l 128K -o 512K $MNTPNT/discard
-	log_must fallocate --punch-hole -l 512K -o 1M $MNTPNT/discard
+if is_linux; then
+	# TX_TRUNCATE
+	if fallocate --punch-hole 2>&1 | grep -q "unrecognized option"; then
+		log_note "fallocate(1) does not support --punch-hole"
+	else
+		log_must dd if=/dev/urandom of=$MNTPNT/discard bs=128k count=16
+		log_must fallocate --punch-hole -l 128K -o 512K $MNTPNT/discard
+		log_must fallocate --punch-hole -l 512K -o 1M $MNTPNT/discard
+	fi
 fi
 
 #

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/cleanup.ksh
index 12d9509..530a785 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/cleanup.ksh

@@ -32,7 +32,7 @@
 . $STF_SUITE/include/libtest.shlib
 
 if is_linux; then
-	log_must set_tunable64 zfs_admin_snapshot 0
+	log_must set_tunable64 ADMIN_SNAPSHOT 0
 fi
 
 default_container_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/clone_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/clone_001_pos.ksh
index 5268971..1c8a3b2 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/clone_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/clone_001_pos.ksh

@@ -61,7 +61,17 @@
 
 function setup_all
 {
+	if is_freebsd; then
+		# Pool creation on zvols is forbidden by default.
+		# Save and the current setting.
+		typeset _saved=$(get_tunable VOL_RECURSIVE)
+		log_must set_tunable64 VOL_RECURSIVE 1
+	fi
 	create_pool $TESTPOOL1 ${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
+	if is_freebsd; then
+		# Restore the previous setting.
+		log_must set_tunable64 VOL_RECURSIVE $_saved
+	fi
 	log_must zfs create $TESTPOOL1/$TESTFS
 	log_must zfs set mountpoint=$TESTDIR2 $TESTPOOL1/$TESTFS
 
@@ -86,8 +96,8 @@
 		(( i = i + 4 ))
 	done
 
-	datasetexists $TESTPOOL1/$TESTFS  && \
-		log_must zfs destroy -f $TESTPOOL1/$TESTFS
+	datasetexists $TESTPOOL1/$TESTFS && \
+		destroy_dataset $TESTPOOL1/$TESTFS -f
 
 	destroy_pool $TESTPOOL1
 

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/rollback_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/rollback_003_pos.ksh
index 342e7df..036e714 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/rollback_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/rollback_003_pos.ksh

@@ -48,10 +48,6 @@
 
 verify_runnable "both"
 
-if is_linux; then
-	log_unsupported "Test case is known to fail on Linux"
-fi
-
 function cleanup
 {
 	typeset snap=""
@@ -61,18 +57,16 @@
 	log_must zfs mount -a
 	unset __ZFS_POOL_RESTRICT
 
-	for snap in "$SNAPPOOL.1" "$SNAPPOOL"
-	do
-		snapexists $snap
-		[[ $? -eq 0 ]] && \
-			log_must zfs destroy $snap
+	for snap in "$SNAPPOOL.1" "$SNAPPOOL"; do
+		if snapexists $snap; then
+			destroy_snapshot $snap
+		fi
 	done
 
-	for fs in "$TESTPOOL/$TESTFILE/$TESTFILE.1" "$TESTPOOL/$TESTFILE"
-	do
-		datasetexists $fs
-		[[ $? -eq 0 ]] && \
-			log_must zfs destroy -r $fs
+	for fs in "$TESTPOOL/$TESTFILE/$TESTFILE.1" "$TESTPOOL/$TESTFILE"; do
+		if datasetexists $fs; then
+			destroy_dataset $fs -r
+		fi
 	done
 
 	[[ -e /$TESTPOOL ]] && \
@@ -98,6 +92,15 @@
 log_must zfs snapshot $TESTPOOL/$TESTFILE@$TESTSNAP
 log_must zfs snapshot $SNAPPOOL.1
 
+#
+# Linux: Issuing a `df` seems to properly force any negative dcache entries to
+# be invalidated preventing failures when accessing the mount point. Additional
+# investigation required.
+#
+# https://github.com/openzfs/zfs/issues/6143
+#
+log_must df >/dev/null
+
 export __ZFS_POOL_RESTRICT="$TESTPOOL"
 log_must zfs unmount -a
 log_must zfs mount -a
@@ -106,5 +109,6 @@
 log_must touch /$TESTPOOL/$TESTFILE/$TESTFILE.1
 
 log_must zfs rollback $SNAPPOOL.1
+log_must df >/dev/null
 
 log_pass "Rollbacks succeed when nested file systems are present."

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/setup.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/setup.ksh
index 6f06467..a73d1af 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/setup.ksh

@@ -34,7 +34,7 @@
 DISK=${DISKS%% *}
 
 if is_linux; then
-	log_must set_tunable64 zfs_admin_snapshot 1
+	log_must set_tunable64 ADMIN_SNAPSHOT 1
 fi
 
 default_container_volume_setup ${DISK}

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_011_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_011_pos.ksh
index 44e5943..7e0a7f4 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_011_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_011_pos.ksh

@@ -51,8 +51,7 @@
 
 function cleanup
 {
-	snapexists $SNAPPOOL && \
-		log_must zfs destroy -r $SNAPPOOL
+	snapexists $SNAPPOOL && destroy_dataset $SNAPPOOL -r
 
 	[[ -e $TESTDIR ]] && \
 		log_must rm -rf $TESTDIR/* > /dev/null 2>&1

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_012_pos.ksh
index c5717e4..92db9b5 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_012_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_012_pos.ksh

@@ -55,15 +55,14 @@
 {
 	if datasetexists $clone1; then
 		log_must zfs promote $ctrfs
-		log_must zfs destroy $clone1
+		destroy_dataset $clone1
 	fi
 
-	snapexists $snapctr && \
-		log_must zfs destroy -r $snapctr
+	snapexists $snapctr && destroy_dataset $snapctr -r
 
 	if snapexists $clone@$TESTSNAP1; then
 		log_must zfs promote $ctrfs
-		log_must zfs destroy -rR $ctrfs@$TESTSNAP1
+		destroy_dataset $ctrfs@$TESTSNAP1 -rR
 	fi
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_013_pos.ksh
index 31aedb2..e02f6eb 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_013_pos.ksh

@@ -48,11 +48,8 @@
 
 function cleanup
 {
-	datasetexists $ctrfs && \
-		zfs destroy -r $ctrfs
-
-	snapexists $snappool && \
-		log_must zfs destroy -r $snappool
+	datasetexists $ctrfs && destroy_dataset $ctrfs -r
+	snapexists $snappool && destroy_dataset $snappool -r
 
 	[[ -e $TESTDIR ]] && \
 		log_must rm -rf $TESTDIR/* > /dev/null 2>&1

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_014_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_014_pos.ksh
index 3579fbe..d48d404 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_014_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_014_pos.ksh

@@ -51,8 +51,7 @@
 	[[ -e $TESTDIR1 ]] && \
 		log_must rm -rf $TESTDIR1/* > /dev/null 2>&1
 
-	snapexists $SNAPCTR && \
-		log_must zfs destroy $SNAPCTR
+	snapexists $SNAPCTR && destroy_dataset $SNAPCTR
 
 	datasetexists $TESTPOOL/$TESTCTR/$TESTFS1 && \
 		log_must zfs set quota=none $TESTPOOL/$TESTCTR/$TESTFS1

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_015_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_015_pos.ksh
index 1091bcb..5a4d2cc 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_015_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_015_pos.ksh

@@ -54,7 +54,7 @@
 	typeset -i i=0
 	while ((i < snap_cnt)); do
 		typeset snap=$fs@snap.$i
-		datasetexists $snap && log_must zfs destroy -f $snap
+		datasetexists $snap && destroy_dataset $snap -f
 
 		((i += 1))
 	done

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_016_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_016_pos.ksh
index b460c2b..b66023c 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_016_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_016_pos.ksh

@@ -47,19 +47,12 @@
 
 function cleanup
 {
-	datasetexists $SNAPFS && \
-		log_must zfs destroy -Rf $SNAPFS
-	datasetexists $TESTPOOL/$TESTFS@snap_a && \
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS@snap_a
-	datasetexists $TESTPOOL/$TESTFS@snap_b && \
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS@snap_b
-	datasetexists $TESTPOOL/$TESTCLONE@snap_a && \
-		log_must zfs destroy -Rf $TESTPOOL/$TESTCLONE@snap_a
-
-	datasetexists $TESTPOOL/$TESTCLONE && \
-		log_must zfs destroy $TESTPOOL/$TESTCLONE
-	datasetexists $TESTPOOL/$TESTFS && \
-		log_must zfs destroy $TESTPOOL/$TESTFS
+	datasetexists $SNAPFS && destroy_dataset $SNAPFS -Rf
+	datasetexists $TESTPOOL/$TESTFS@snap_a && destroy_dataset $TESTPOOL/$TESTFS@snap_a -Rf
+	datasetexists $TESTPOOL/$TESTFS@snap_b && destroy_dataset $TESTPOOL/$TESTFS@snap_b -Rf
+	datasetexists $TESTPOOL/$TESTCLONE@snap_a && destroy_dataset $TESTPOOL/$TESTCLONE@snap_a -Rf
+	datasetexists $TESTPOOL/$TESTCLONE && destroy_dataset $TESTPOOL/$TESTCLONE
+	datasetexists $TESTPOOL/$TESTFS && destroy_dataset $TESTPOOL/$TESTFS
 
 	log_must zfs create $TESTPOOL/$TESTFS
 	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS

diff --git a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_017_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_017_pos.ksh
index a21f875..6e5b897 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_017_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapshot/snapshot_017_pos.ksh

@@ -56,9 +56,8 @@
 {
 	cd $SAVED_DIR
 
-	if datasetexists $TESTPOOL/$TESTFS ; then
-		log_must zfs destroy -Rf $TESTPOOL/$TESTFS
-	fi
+	datasetexists $TESTPOOL/$TESTFS && \
+		destroy_dataset $TESTPOOL/$TESTFS -Rf
 
 	log_must zfs create $TESTPOOL/$TESTFS
 	log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS

diff --git a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_001_pos.ksh
index 302ba40..c1277f2 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_001_pos.ksh

@@ -51,7 +51,7 @@
 
 function cleanup
 {
-	log_must zfs destroy -rR $USEDTEST
+	datasetexists $USEDTEST && destroy_dataset $USEDTEST -rR
 }
 
 log_assert "Verify used is correct."

diff --git a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_002_pos.ksh
index 96d2df6..a41ca1d 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_002_pos.ksh

@@ -49,7 +49,7 @@
 
 function cleanup
 {
-	log_must zfs destroy -rR $USEDTEST
+	datasetexists $USEDTEST && destroy_dataset $USEDTEST -rR
 }
 
 log_assert "Verify usedbychildren is correct."

diff --git a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_003_pos.ksh
index d4726ff..ff54cba 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_003_pos.ksh

@@ -48,7 +48,7 @@
 
 function cleanup
 {
-	log_must zfs destroy -rR $USEDTEST
+	datasetexists $USEDTEST && destroy_dataset $USEDTEST -rR
 }
 
 log_assert "Verify usedbydataset is correct."

diff --git a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_004_pos.ksh
index 64ca3e2..8fb8b6b 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_004_pos.ksh

@@ -53,7 +53,7 @@
 
 function cleanup
 {
-	log_must zfs destroy -rR $USEDTEST
+	datasetexists $USEDTEST && destroy_dataset $USEDTEST -rR
 }
 
 log_assert "Verify usedbyrefreservation is correct."

diff --git a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_005_pos.ksh
index ac5224c..9d21e1d 100755
--- a/zfs/tests/zfs-tests/tests/functional/snapused/snapused_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/snapused/snapused_005_pos.ksh

@@ -47,7 +47,7 @@
 
 function cleanup
 {
-	log_must zfs destroy -rR $USEDTEST
+	datasetexists $USEDTEST && destroy_dataset $USEDTEST -rR
 }
 
 log_assert "Verify usedbysnapshots is correct."

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/Makefile.am b/zfs/tests/zfs-tests/tests/functional/suid/Makefile.am
index 594d2b7..0145c12 100644
--- a/zfs/tests/zfs-tests/tests/functional/suid/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/suid/Makefile.am

@@ -7,6 +7,7 @@
 	suid_write_to_sgid.ksh \
 	suid_write_to_suid_sgid.ksh \
 	suid_write_to_none.ksh \
+	suid_write_zil_replay.ksh \
 	cleanup.ksh \
 	setup.ksh
 

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c
index 571dc55..f3febb9 100644
--- a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c
+++ b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c

@@ -29,86 +29,16 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <fcntl.h>
-
-static void
-test_stat_mode(mode_t extra)
-{
-	struct stat st;
-	int i, fd;
-	char fpath[1024];
-	char *penv[] = {"TESTDIR", "TESTFILE0"};
-	char buf[] = "test";
-	mode_t res;
-	mode_t mode = 0777 | extra;
-
-	/*
-	 * Get the environment variable values.
-	 */
-	for (i = 0; i < sizeof (penv) / sizeof (char *); i++) {
-		if ((penv[i] = getenv(penv[i])) == NULL) {
-			fprintf(stderr, "getenv(penv[%d])\n", i);
-			exit(1);
-		}
-	}
-
-	umask(0);
-	if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) {
-		perror("mkdir");
-		exit(2);
-	}
-
-	snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]);
-	unlink(fpath);
-	if (stat(fpath, &st) == 0) {
-		fprintf(stderr, "%s exists\n", fpath);
-		exit(3);
-	}
-
-	fd = creat(fpath, mode);
-	if (fd == -1) {
-		perror("creat");
-		exit(4);
-	}
-	close(fd);
-
-	if (setuid(65534) == -1) {
-		perror("setuid");
-		exit(5);
-	}
-
-	fd = open(fpath, O_RDWR);
-	if (fd == -1) {
-		perror("open");
-		exit(6);
-	}
-
-	if (write(fd, buf, sizeof (buf)) == -1) {
-		perror("write");
-		exit(7);
-	}
-	close(fd);
-
-	if (stat(fpath, &st) == -1) {
-		perror("stat");
-		exit(8);
-	}
-	unlink(fpath);
-
-	/* Verify SUID/SGID are dropped */
-	res = st.st_mode & (0777 | S_ISUID | S_ISGID);
-	if (res != (mode & 0777)) {
-		fprintf(stderr, "stat(2) %o\n", res);
-		exit(9);
-	}
-}
+#include <stdbool.h>
 
 int
 main(int argc, char *argv[])
 {
-	const char *name;
+	const char *name, *phase;
 	mode_t extra;
+	struct stat st;
 
-	if (argc < 2) {
+	if (argc < 3) {
 		fprintf(stderr, "Invalid argc\n");
 		exit(1);
 	}
@@ -127,7 +57,77 @@
 		exit(1);
 	}
 
-	test_stat_mode(extra);
+	const char *testdir = getenv("TESTDIR");
+	if (!testdir) {
+		fprintf(stderr, "getenv(TESTDIR)\n");
+		exit(1);
+	}
+
+	umask(0);
+	if (stat(testdir, &st) == -1 && mkdir(testdir, 0777) == -1) {
+		perror("mkdir");
+		exit(2);
+	}
+
+	char fpath[1024];
+	snprintf(fpath, sizeof (fpath), "%s/%s", testdir, name);
+
+
+	phase = argv[2];
+	if (strcmp(phase, "PRECRASH") == 0) {
+
+		/* clean up last run */
+		unlink(fpath);
+		if (stat(fpath, &st) == 0) {
+			fprintf(stderr, "%s exists\n", fpath);
+			exit(3);
+		}
+
+		int fd;
+
+		fd = creat(fpath, 0777 | extra);
+		if (fd == -1) {
+			perror("creat");
+			exit(4);
+		}
+		close(fd);
+
+		if (setuid(65534) == -1) {
+			perror("setuid");
+			exit(5);
+		}
+
+		fd = open(fpath, O_RDWR);
+		if (fd == -1) {
+			perror("open");
+			exit(6);
+		}
+
+		const char buf[] = "test";
+		if (write(fd, buf, sizeof (buf)) == -1) {
+			perror("write");
+			exit(7);
+		}
+		close(fd);
+
+	} else if (strcmp(phase, "REPLAY") == 0) {
+		/* created in PRECRASH run */
+	} else {
+		fprintf(stderr, "Invalid phase %s\n", phase);
+		exit(1);
+	}
+
+	if (stat(fpath, &st) == -1) {
+			perror("stat");
+			exit(8);
+		}
+
+	/* Verify SUID/SGID are dropped */
+	mode_t res = st.st_mode & (0777 | S_ISUID | S_ISGID);
+	if (res != 0777) {
+		fprintf(stderr, "stat(2) %o\n", res);
+		exit(9);
+	}
 
 	return (0);
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh
index dd01978..470350f 100755
--- a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh

@@ -47,6 +47,6 @@
 log_onexit cleanup
 log_note "Verify write(2) to regular file by non-owner"
 
-log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "PRECRASH"
 
 log_pass "Verify write(2) to regular file by non-owner passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh
index 49ae2bd..3c95a40 100755
--- a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh

@@ -47,6 +47,6 @@
 log_onexit cleanup
 log_note "Verify write(2) to SGID file by non-owner"
 
-log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "PRECRASH"
 
 log_pass "Verify write(2) to SGID file by non-owner passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh
index 3983aad..4183cbe 100755
--- a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh

@@ -47,6 +47,6 @@
 log_onexit cleanup
 log_note "Verify write(2) to SUID file by non-owner"
 
-log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "PRECRASH"
 
 log_pass "Verify write(2) to SUID file by non-owner passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh
index a058c7e..f7a08a5 100755
--- a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh

@@ -47,6 +47,6 @@
 log_onexit cleanup
 log_note "Verify write(2) to SUID/SGID file by non-owner"
 
-log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH"
 
 log_pass "Verify write(2) to SUID/SGID file by non-owner passed"

diff --git a/zfs/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh
new file mode 100755
index 0000000..81f431f
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh

@@ -0,0 +1,99 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/tests/functional/slog/slog.kshlib
+
+verify_runnable "global"
+
+function cleanup_fs
+{
+	cleanup
+}
+
+log_assert "Verify ZIL replay results in correct SUID/SGID bits for unprivileged write to SUID/SGID files"
+log_onexit cleanup_fs
+log_must setup
+
+#
+# 1. Create a file system (TESTFS)
+#
+log_must zpool destroy "$TESTPOOL"
+log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
+log_must zfs set compression=on $TESTPOOL
+log_must zfs create -o mountpoint="$TESTDIR" $TESTPOOL/$TESTFS
+
+# Make all the writes from suid_write_to_file.c sync
+log_must zfs set sync=always "$TESTPOOL/$TESTFS"
+
+#
+# This dd command works around an issue where ZIL records aren't created
+# after freezing the pool unless a ZIL header already exists. Create a file
+# synchronously to force ZFS to write one out.
+#
+log_must dd if=/dev/zero of=$TESTDIR/sync \
+    conv=fdatasync,fsync bs=1 count=1
+
+#
+# 2. Freeze TESTFS
+#
+log_must zpool freeze $TESTPOOL
+
+#
+# 3. Unprivileged write to a setuid file
+#
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE"      "PRECRASH"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID"      "PRECRASH"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID"      "PRECRASH"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH"
+
+#
+# 4. Unmount filesystem and export the pool
+#
+# At this stage TESTFS is empty again and frozen, the intent log contains
+# a complete set of deltas to replay.
+#
+log_must zfs unmount $TESTPOOL/$TESTFS
+
+log_note "List transactions to replay:"
+log_must zdb -iv $TESTPOOL/$TESTFS
+
+log_must zpool export $TESTPOOL
+
+#
+# 5. Remount TESTFS <which replays the intent log>
+#
+# Import the pool to unfreeze it and claim log blocks.  It has to be
+# `zpool import -f` because we can't write a frozen pool's labels!
+#
+log_must zpool import -f -d $VDIR $TESTPOOL
+
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE"      "REPLAY"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID"      "REPLAY"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID"      "REPLAY"
+log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "REPLAY"
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c b/zfs/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c
index c2c02c5..b0c2360 100644
--- a/zfs/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c
+++ b/zfs/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c

@@ -28,7 +28,7 @@
 
 #define	BSZ 64
 
-void
+static void
 fill_random(char *buf, int len)
 {
 	int i;

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/Makefile.am b/zfs/tests/zfs-tests/tests/functional/trim/Makefile.am
index 4f260a8..8917ed7 100644
--- a/zfs/tests/zfs-tests/tests/functional/trim/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/trim/Makefile.am

@@ -8,4 +8,5 @@
 	autotrim_config.ksh \
 	autotrim_trim_integrity.ksh \
 	trim_integrity.ksh \
-	trim_config.ksh
+	trim_config.ksh \
+	trim_l2arc.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/zfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh
index e41e325..924b569 100755
--- a/zfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh

@@ -49,35 +49,41 @@
 
 	log_must rm -f $TRIM_VDEVS
 
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
-	log_must set_tunable64 zfs_trim_txg_batch $trim_txg_batch
-	log_must set_tunable64 zfs_vdev_min_ms_count $vdev_min_ms_count
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
+	log_must set_tunable64 TRIM_TXG_BATCH $trim_txg_batch
+	log_must set_tunable64 VDEV_MIN_MS_COUNT $vdev_min_ms_count
 }
 log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
-# Reduced zfs_trim_txg_batch to make trimming more frequent.
-typeset trim_txg_batch=$(get_tunable zfs_trim_txg_batch)
-log_must set_tunable64 zfs_trim_txg_batch 8
+# Reduced TRIM_TXG_BATCH to make trimming more frequent.
+typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
+log_must set_tunable64 TRIM_TXG_BATCH 8
 
 # Increased metaslabs to better simulate larger more realistic devices.
-typeset vdev_min_ms_count=$(get_tunable zfs_vdev_min_ms_count)
-log_must set_tunable64 zfs_vdev_min_ms_count 32
+typeset vdev_min_ms_count=$(get_tunable VDEV_MIN_MS_COUNT)
+log_must set_tunable64 VDEV_MIN_MS_COUNT 32
 
 typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) ))
 typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) ))
 
-for type in "" "mirror" "raidz2"; do
+for type in "" "mirror" "raidz2" "draid"; do
 
 	if [[ "$type" = "" ]]; then
 		VDEVS="$TRIM_VDEV1"
 	elif [[ "$type" = "mirror" ]]; then
 		VDEVS="$TRIM_VDEV1 $TRIM_VDEV2"
-	else
+	elif [[ "$type" = "raidz2" ]]; then
 		VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3"
+	elif [[ "$type" = "draid" ]]; then
+		VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4"
+
+		# The per-vdev utilization is lower due to the capacity
+		# resilverd for the distributed spare.
+		VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.50 / 1024 / 1024) ))
 	fi
 
 	log_must truncate -s $((4 * MINVDEVSIZE)) $VDEVS

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh b/zfs/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh
index c7b3da7..78fe18f 100755
--- a/zfs/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh

@@ -47,20 +47,20 @@
 
 	log_must rm -f $TRIM_VDEVS
 
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
-	log_must set_tunable64 zfs_trim_txg_batch $trim_txg_batch
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
+	log_must set_tunable64 TRIM_TXG_BATCH $trim_txg_batch
 }
 log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
-# Reduced zfs_trim_txg_batch to make trimming more frequent.
-typeset trim_txg_batch=$(get_tunable zfs_trim_txg_batch)
-log_must set_tunable64 zfs_trim_txg_batch 8
+# Reduced TRIM_TXG_BATCH to make trimming more frequent.
+typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
+log_must set_tunable64 TRIM_TXG_BATCH 8
 
-for type in "" "mirror" "raidz" "raidz2" "raidz3"; do
+for type in "" "mirror" "raidz" "draid"; do
 	log_must truncate -s 1G $TRIM_VDEVS
 
 	log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh b/zfs/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh
index c0e850c..13c9b95 100755
--- a/zfs/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh

@@ -48,20 +48,20 @@
 
 	log_must rm -f $TRIM_VDEVS
 
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
-	log_must set_tunable64 zfs_trim_txg_batch $trim_txg_batch
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
+	log_must set_tunable64 TRIM_TXG_BATCH $trim_txg_batch
 }
 log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
-# Reduced zfs_trim_txg_batch to make trimming more frequent.
-typeset trim_txg_batch=$(get_tunable zfs_trim_txg_batch)
-log_must set_tunable64 zfs_trim_txg_batch 8
+# Reduced TRIM_TXG_BATCH to make trimming more frequent.
+typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
+log_must set_tunable64 TRIM_TXG_BATCH 8
 
-for type in "" "mirror" "raidz" "raidz2" "raidz3"; do
+for type in "" "mirror" "raidz" "raidz2" "draid" "draid2"; do
 	log_must truncate -s 1G $TRIM_VDEVS
 
 	log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS
@@ -77,8 +77,7 @@
 		zpool sync
 
 		if [[ $((n % 4)) -eq 0 ]]; then
-			log_must zpool trim $TESTPOOL
-			wait_trim $TESTPOOL $TRIM_VDEVS
+			log_must timeout 120 zpool trim -w $TESTPOOL
 		fi
 	done
 	log_must du -hs /$TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/setup.ksh b/zfs/tests/zfs-tests/tests/functional/trim/setup.ksh
index cdcf038..0948960 100755
--- a/zfs/tests/zfs-tests/tests/functional/trim/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/trim/setup.ksh

@@ -23,15 +23,21 @@
 
 verify_runnable "global"
 
-DISK1=${DISKS%% *}
+if is_freebsd; then
+	log_unsupported "FreeBSD has no hole punching mechanism for the time being."
+	diskinfo -v $DISKS | grep -qE 'No.*# TRIM/UNMAP support' &&
+	    log_unsupported "DISKS do not support discard (TRIM/UNMAP)"
+else
+	DISK1=${DISKS%% *}
 
-typeset -i max_discard=0
-if [[ -b $DEV_RDSKDIR/$DISK1 ]]; then
-	max_discard=$(lsblk -Dbn $DEV_RDSKDIR/$DISK1 | awk '{ print $4; exit }')
-fi
+	typeset -i max_discard=0
+	if is_disk_device $DEV_RDSKDIR/$DISK1; then
+		max_discard=$(lsblk -Dbn $DEV_RDSKDIR/$DISK1 | awk '{ print $4; exit }')
+	fi
 
-if test $max_discard -eq 0; then
-	log_unsupported "DISKS do not support discard (TRIM/UNMAP)"
+	if test $max_discard -eq 0; then
+		log_unsupported "DISKS do not support discard (TRIM/UNMAP)"
+	fi
 fi
 
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/trim.kshlib b/zfs/tests/zfs-tests/tests/functional/trim/trim.kshlib
index ed6a8f9..bede946 100644
--- a/zfs/tests/zfs-tests/tests/functional/trim/trim.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/trim/trim.kshlib

@@ -33,17 +33,18 @@
 {
 	typeset pool="${1-:$TESTPOOL}"
 	typeset type="${2-:ind}"
+	typeset vdev="${3}"
 	typeset rval
 
 	# Sum the ind or agg columns of the trim request size histogram.
 	case "$type" in
 	"ind")
-		rval=$(zpool iostat -pr $pool | awk \
+		rval=$(zpool iostat -pr $pool $vdev | awk \
 		    '$1 ~ /[0-9].*/ { sum += $12 } END { print sum }')
 		echo -n "$rval"
 		;;
 	"agg")
-		rval=$(zpool iostat -pr $pool | awk \
+		rval=$(zpool iostat -pr $pool $vdev | awk \
 		    '$1 ~ /[0-9].*/ { sum += $13 } END { print sum }')
 		echo -n "$rval"
 		;;
@@ -61,9 +62,10 @@
 	typeset pool="${1:-$TESTPOOL}"
 	typeset type="${2:-ind}"
 	typeset min_trim_ios=${3:-100}
+	typeset vdev="${4}"
 	typeset ios
 
-	ios=$(get_trim_io $pool $type)
+	ios=$(get_trim_io $pool $type $vdev)
 	if [[ $ios -ge $min_trim_ios ]]; then
 		log_note "Issued $ios $type trim IOs for pool $pool"
 	else
@@ -118,37 +120,3 @@
 		fi
 	done
 }
-
-#
-# Wait for up to 120 seconds for trimming of the listed vdevs to complete.
-#
-function wait_trim # pool vdevs
-{
-	typeset stop_time=$(( $(date +%s) + 120 ))
-	typeset pool="$1"
-	shift
-	typeset vdevs=$@
-	typeset complete
-
-	while [[ $complete -eq 0 ]]; do
-		complete=1
-
-		for vdev in $vdevs; do
-			if [[ "$(trim_progress $pool $vdev)" -lt "100" ]]; then
-				complete=0
-				break
-			else
-				log_must eval "trim_prog_line $pool $vdev | \
-				    grep complete"
-			fi
-		done
-
-		if [ "$(date +%s)" -ge $stop_time ]; then
-			log_fail "Exceeded trim time limit of 120s"
-		fi
-
-		sleep 0.5
-	done
-
-	log_note "Pool completed trim successfully."
-}

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/zfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh
index 993072b..9a6e19e 100755
--- a/zfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh

@@ -49,35 +49,41 @@
 
 	log_must rm -f $TRIM_VDEVS
 
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
-	log_must set_tunable64 zfs_trim_txg_batch $trim_txg_batch
-	log_must set_tunable64 zfs_vdev_min_ms_count $vdev_min_ms_count
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
+	log_must set_tunable64 TRIM_TXG_BATCH $trim_txg_batch
+	log_must set_tunable64 VDEV_MIN_MS_COUNT $vdev_min_ms_count
 }
 log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
-# Reduced zfs_trim_txg_batch to make trimming more frequent.
-typeset trim_txg_batch=$(get_tunable zfs_trim_txg_batch)
-log_must set_tunable64 zfs_trim_txg_batch 8
+# Reduced TRIM_TXG_BATCH to make trimming more frequent.
+typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
+log_must set_tunable64 TRIM_TXG_BATCH 8
 
 # Increased metaslabs to better simulate larger more realistic devices.
-typeset vdev_min_ms_count=$(get_tunable zfs_vdev_min_ms_count)
-log_must set_tunable64 zfs_vdev_min_ms_count 32
+typeset vdev_min_ms_count=$(get_tunable VDEV_MIN_MS_COUNT)
+log_must set_tunable64 VDEV_MIN_MS_COUNT 32
 
 typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) ))
 typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) ))
 
-for type in "" "mirror" "raidz2"; do
+for type in "" "mirror" "raidz2" "draid"; do
 
 	if [[ "$type" = "" ]]; then
 		VDEVS="$TRIM_VDEV1"
 	elif [[ "$type" = "mirror" ]]; then
 		VDEVS="$TRIM_VDEV1 $TRIM_VDEV2"
-	else
+	elif [[ "$type" = "raidz2" ]]; then
 		VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3"
+	elif [[ "$type" = "draid" ]]; then
+		VDEVS="$TRIM_VDEV1 $TRIM_VDEV2 $TRIM_VDEV3 $TRIM_VDEV4"
+
+		# The per-vdev utilization is lower due to the capacity
+		# resilverd for the distributed spare.
+		VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.50 / 1024 / 1024) ))
 	fi
 
 	log_must truncate -s $((4 * MINVDEVSIZE)) $VDEVS
@@ -92,8 +98,7 @@
 
 	# Remove the file, issue trim, verify the vdevs are now sparse.
 	log_must rm /$TESTPOOL/file
-	log_must zpool trim $TESTPOOL
-	wait_trim $TESTPOOL $VDEVS
+	log_must timeout 120 zpool trim -w $TESTPOOL
 	verify_vdevs "-le" "$VDEV_MIN_MB" $VDEVS
 
 	log_must zpool destroy $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh b/zfs/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh
index 0bbc439..38f226d 100755
--- a/zfs/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh

@@ -47,20 +47,20 @@
 
 	log_must rm -f $TRIM_VDEVS
 
-	log_must set_tunable64 zfs_trim_extent_bytes_min $trim_extent_bytes_min
-	log_must set_tunable64 zfs_trim_txg_batch $trim_txg_batch
+	log_must set_tunable64 TRIM_EXTENT_BYTES_MIN $trim_extent_bytes_min
+	log_must set_tunable64 TRIM_TXG_BATCH $trim_txg_batch
 }
 log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
-typeset trim_extent_bytes_min=$(get_tunable zfs_trim_extent_bytes_min)
-log_must set_tunable64 zfs_trim_extent_bytes_min 4096
+typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
 
-# Reduced zfs_trim_txg_batch to make trimming more frequent.
-typeset trim_txg_batch=$(get_tunable zfs_trim_txg_batch)
-log_must set_tunable64 zfs_trim_txg_batch 8
+# Reduced TRIM_TXG_BATCH to make trimming more frequent.
+typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
+log_must set_tunable64 TRIM_TXG_BATCH 8
 
-for type in "" "mirror" "raidz" "raidz2" "raidz3"; do
+for type in "" "mirror" "raidz" "draid"; do
 	log_must truncate -s 1G $TRIM_VDEVS
 
 	log_must zpool create -f $TESTPOOL $type $TRIM_VDEVS
@@ -76,8 +76,7 @@
 	done
 	log_must du -hs /$TESTPOOL
 
-	log_must zpool trim $TESTPOOL
-	wait_trim $TESTPOOL $TRIM_VDEVS
+	log_must timeout 120 zpool trim -w $TESTPOOL
 
 	verify_trim_io $TESTPOOL "ind" 10
 	verify_pool $TESTPOOL

diff --git a/zfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh b/zfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh
new file mode 100755
index 0000000..04dbf5d
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh

@@ -0,0 +1,106 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/trim/trim.kshlib
+. $STF_SUITE/tests/functional/trim/trim.cfg
+
+#
+# DESCRIPTION:
+# 	Verify trimming of L2ARC
+#
+# STRATEGY:
+#	1. Set 'l2arc_trim_ahead = 1' and `l2arc_write_size = 64MB`.
+#	2. Create a pool on file vdevs to trim.
+#	3. Verify the cache device was trimmed.
+#	4. Fill the pool with a file larger than the L2ARC vdev.
+#	5. Randomly read the previous written file long enough for the
+#		L2ARC vdev to be filled and overwritten 5 times.
+#	6. Verify trim IOs of the expected type were issued for the pool.
+#	7. Verify the allocated space on the cache device is less than
+#		its size.
+#
+
+verify_runnable "global"
+
+log_assert "Trim of L2ARC succeeds."
+
+function cleanup
+{
+	if poolexists $TESTPOOL; then
+		destroy_pool $TESTPOOL
+	fi
+
+	log_must rm -f $VDEVS
+	log_must set_tunable32 L2ARC_TRIM_AHEAD $l2arc_trimahead
+	log_must set_tunable32 L2ARC_WRITE_MAX $l2arc_writemax
+}
+log_onexit cleanup
+
+# The cache device $TRIM_VDEV2 has to be small enough, so that
+# dev->l2ad_hand loops around and dev->l2ad_first=0. Otherwise 
+# l2arc_evict() exits before evicting/trimming.
+typeset l2arc_trimahead=$(get_tunable L2ARC_TRIM_AHEAD)
+typeset l2arc_writemax=$(get_tunable L2ARC_WRITE_MAX)
+log_must set_tunable32 L2ARC_TRIM_AHEAD 1
+log_must set_tunable32 L2ARC_WRITE_MAX $((64 * 1024 * 1024))
+VDEVS="$TRIM_VDEV1 $TRIM_VDEV2"
+log_must truncate -s $((MINVDEVSIZE)) $TRIM_VDEV2
+log_must truncate -s $((4 * MINVDEVSIZE)) $TRIM_VDEV1
+typeset VDEV_MIN_MB=$((MINVDEVSIZE * 0.30 / 1024 / 1024))
+
+log_must zpool create -f $TESTPOOL $TRIM_VDEV1 cache $TRIM_VDEV2
+verify_vdevs "-le" "$VDEV_MIN_MB" $TRIM_VDEV2
+
+typeset fill_mb=$(( floor(3 * MINVDEVSIZE) ))
+export DIRECTORY=/$TESTPOOL
+export NUMJOBS=1
+export FILE_SIZE=${fill_mb}
+export PERF_RANDSEED=1234
+export PERF_COMPPERCENT=66
+export PERF_COMPCHUNK=0
+export RUNTIME=30
+export BLOCKSIZE=128K
+export SYNC_TYPE=0
+export DIRECT=1
+
+# Write to the pool.
+log_must fio $FIO_SCRIPTS/mkfiles.fio
+
+# Read randomly from the pool to fill L2ARC.
+export RUNTIME=30
+log_must fio $FIO_SCRIPTS/random_reads.fio
+
+export RUNTIME=1
+typeset do_once=true
+while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
+	typeset l2_size1=$(get_arcstat l2_size)
+	log_must fio $FIO_SCRIPTS/random_reads.fio
+	typeset l2_size2=$(get_arcstat l2_size)
+	do_once=false
+done
+
+verify_trim_io $TESTPOOL "ind" 5 $TRIM_VDEV2
+
+typeset cache_size=$(zpool list -vp | grep $TRIM_VDEV2 | awk '{print $2}')
+typeset cache_alloc=$(zpool list -vp | grep $TRIM_VDEV2 | awk '{print $3}')
+
+log_must test $cache_alloc -lt $cache_size
+
+log_must zpool destroy $TESTPOOL
+log_must rm -f $VDEVS
+
+log_pass "Trim of L2ARC succeeds."

diff --git a/zfs/tests/zfs-tests/tests/functional/truncate/truncate_timestamps.ksh b/zfs/tests/zfs-tests/tests/functional/truncate/truncate_timestamps.ksh
index c365c74..27b28e8 100755
--- a/zfs/tests/zfs-tests/tests/functional/truncate/truncate_timestamps.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/truncate/truncate_timestamps.ksh

@@ -38,13 +38,23 @@
 	typeset option="$3"
 
 	log_must mkfile $sizeavg $filename # always start with $sizeavg
-	typeset -i timestm="$(stat -c %Y $filename)"
-	typeset -i timestc="$(stat -c %Z $filename)"
-	log_must sleep 1
-	log_must $STF_SUITE/tests/functional/truncate/truncate_test -s $size $filename $option
-	verify_eq $size "$(stat -c %s $filename)" "size"
-	verify_ne $timestm "$(stat -c %Y $filename)" "mtime"
-	verify_ne $timestc "$(stat -c %Z $filename)" "ctime"
+	if is_freebsd; then
+		typeset -i timestm="$(stat -f "%m" $filename)"
+		typeset -i timestc="$(stat -f "%c" $filename)"
+		log_must sleep 1
+		log_must $STF_SUITE/tests/functional/truncate/truncate_test -s $size $filename $option
+		verify_eq $size "$(stat_size $filename)" "size"
+		verify_ne $timestm "$(stat -f "%m" $filename)" "mtime"
+		verify_ne $timestc "$(stat -f "%c" $filename)" "ctime"
+	else
+		typeset -i timestm="$(stat -c %Y $filename)"
+		typeset -i timestc="$(stat -c %Z $filename)"
+		log_must sleep 1
+		log_must $STF_SUITE/tests/functional/truncate/truncate_test -s $size $filename $option
+		verify_eq $size "$(stat_size $filename)" "size"
+		verify_ne $timestm "$(stat -c %Y $filename)" "mtime"
+		verify_ne $timestc "$(stat -c %Z $filename)" "ctime"
+	fi
 	log_must rm -f $filename
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/13709_reproducer.bz2 b/zfs/tests/zfs-tests/tests/functional/userquota/13709_reproducer.bz2
new file mode 100644
index 0000000..9c31682
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/13709_reproducer.bz2
Binary files differ

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/Makefile.am b/zfs/tests/zfs-tests/tests/functional/userquota/Makefile.am
index 8f0287b..dad2f8c 100644
--- a/zfs/tests/zfs-tests/tests/functional/userquota/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/Makefile.am

@@ -20,8 +20,12 @@
 	userquota_013_pos.ksh \
 	userspace_001_pos.ksh \
 	userspace_002_pos.ksh \
-	userspace_003_pos.ksh
+	userspace_003_pos.ksh \
+	userspace_encrypted.ksh \
+	userspace_send_encrypted.ksh \
+	userspace_encrypted_13709.ksh
 
 dist_pkgdata_DATA = \
+	13709_reproducer.bz2 \
 	userquota.cfg \
 	userquota_common.kshlib

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_001_pos.ksh
index fb7a190..762f561 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_001_pos.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_quota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_002_pos.ksh
index 20d0f73..27feafa 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_002_pos.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $snapfs; then
-		log_must zfs destroy $snapfs
-	fi
+	datasetexists $snapfs && destroy_dataset $snapfs
 	log_must cleanup_quota
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_003_pos.ksh
index 56b7af0..37fd389 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/groupspace_003_pos.ksh

@@ -46,9 +46,7 @@
 
 function cleanup
 {
-	if datasetexists $snapfs; then
-		log_must zfs destroy $snapfs
-	fi
+	datasetexists $snapfs && destroy_dataset $snapfs
 
 	log_must rm -f ${QFILE}_*
 	log_must cleanup_quota

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh
index 5684b05..b8e9561 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_005_neg.ksh

@@ -44,9 +44,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_quota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_006_pos.ksh
index c53cb5f..1c2509c 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_006_pos.ksh

@@ -44,9 +44,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_quota
 }
@@ -67,7 +65,11 @@
 
 set -A no_groups "aidsf@dfsd@" "123223-dsfds#sdfsd" "mss_#ss" "1234"
 for group in "${no_groups[@]}"; do
-	log_mustnot eval "groupdel $group > /dev/null 2>&1"
+	if is_freebsd; then
+		log_mustnot eval "pw groupdel -n $group >/dev/null 2>&1"
+	else
+		log_mustnot eval "groupdel $group >/dev/null 2>&1"
+	fi
 	log_must eval "zfs get groupquota@$group $QFS >/dev/null 2>&1"
 	log_must eval "zfs get groupquota@$group $snap_fs >/dev/null 2>&1"
 done

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_009_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_009_pos.ksh
index 1c0fdde..b6f2727 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_009_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_009_pos.ksh

@@ -49,9 +49,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_quota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_011_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_011_pos.ksh
index 93020ae..8917b3b 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_011_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_011_pos.ksh

@@ -49,9 +49,7 @@
 function cleanup
 {
 	for ds in $TESTPOOL/fs $TESTPOOL/fs-rename $TESTPOOL/fs-clone; do
-		if datasetexists $ds; then
-			log_must zfs destroy -rRf $ds
-		fi
+		datasetexists $ds && destroy_dataset $ds -rRf
 	done
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh
index b553f91..12e0231 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userquota_012_neg.ksh

@@ -46,9 +46,7 @@
 {
 	cleanup_quota
 
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_001_pos.ksh
index ef05338..9b89193 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_001_pos.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $snap_fs; then
-		log_must zfs destroy $snap_fs
-	fi
+	datasetexists $snap_fs && destroy_dataset $snap_fs
 
 	log_must cleanup_quota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_002_pos.ksh
index 8161cc1..94593ed 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_002_pos.ksh

@@ -45,9 +45,7 @@
 
 function cleanup
 {
-	if datasetexists $snapfs; then
-		log_must zfs destroy $snapfs
-	fi
+	datasetexists $snapfs && destroy_dataset $snapfs
 
 	log_must cleanup_quota
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_003_pos.ksh
index 96c3b19..70ef78e 100755
--- a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_003_pos.ksh

@@ -47,9 +47,7 @@
 
 function cleanup
 {
-	if datasetexists $snapfs; then
-		log_must zfs destroy $snapfs
-	fi
+	datasetexists $snapfs && destroy_dataset $snapfs
 
 	log_must rm -f ${QFILE}_*
 	log_must cleanup_quota

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_encrypted.ksh
new file mode 100755
index 0000000..429b16e
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_encrypted.ksh

@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/userquota/userquota_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs userspace' and 'zfs groupspace' can be used on encrypted datasets
+#
+#
+# STRATEGY:
+# 1. Create both un-encrypted and encrypted datasets
+# 2. Receive un-encrypted dataset in encrypted hierarchy
+# 3. Verify encrypted datasets support 'zfs userspace' and 'zfs groupspace'
+#
+
+function cleanup
+{
+	destroy_pool $POOLNAME
+	rm -f $FILEDEV
+}
+
+function log_must_unsupported
+{
+	log_must_retry "unsupported" 3 "$@"
+	(( $? != 0 )) && log_fail
+}
+
+log_onexit cleanup
+
+FILEDEV="$TEST_BASE_DIR/userspace_encrypted"
+POOLNAME="testpool$$"
+typeset -a POOL_OPTS=('' # all pool features enabled
+    '-d' # all pool features disabled
+    '-d -o feature@userobj_accounting=enabled' # only userobj_accounting enabled
+    '-d -o feature@project_quota=enabled') # only project_quota enabled
+DATASET_ENCROOT="$POOLNAME/encroot"
+DATASET_SENDFS="$POOLNAME/sendfs"
+
+log_assert "'zfs user/groupspace' should work on encrypted datasets"
+
+for opts in "${POOL_OPTS[@]}"; do
+	# Setup
+	truncate -s $SPA_MINDEVSIZE $FILEDEV
+	log_must zpool create $opts -o feature@encryption=enabled $POOLNAME \
+		$FILEDEV
+
+	# 1. Create both un-encrypted and encrypted datasets
+	log_must zfs create $DATASET_SENDFS
+	log_must eval "echo 'password' | zfs create -o encryption=on" \
+		"-o keyformat=passphrase -o keylocation=prompt " \
+		"$DATASET_ENCROOT"
+	log_must zfs create $DATASET_ENCROOT/fs
+
+	# 2. Receive un-encrypted dataset in encrypted hierarchy
+	log_must zfs snap $DATASET_SENDFS@snap
+	log_must eval "zfs send $DATASET_SENDFS@snap | zfs recv " \
+		"$DATASET_ENCROOT/recvfs"
+
+	# 3. Verify encrypted datasets support 'zfs userspace' and
+	# 'zfs groupspace'
+	log_must zfs userspace $DATASET_ENCROOT/fs
+	log_must zfs groupspace $DATASET_ENCROOT/fs
+	log_must_unsupported zfs userspace $DATASET_ENCROOT/recvfs
+	log_must_unsupported zfs groupspace $DATASET_ENCROOT/recvfs
+
+	# Cleanup
+	cleanup
+done
+
+log_pass "'zfs user/groupspace' works on encrypted datasets"

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_encrypted_13709.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_encrypted_13709.ksh
new file mode 100755
index 0000000..9c1d847
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_encrypted_13709.ksh

@@ -0,0 +1,45 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/userquota/userquota_common.kshlib
+
+#
+# DESCRIPTION:
+# Avoid allowing #11294/#13709 to recur a third time.
+#
+# So we hardcode a copy of a pool with this bug, try unlocking it,
+# and fail on error. Simple.
+
+function cleanup
+{
+	destroy_pool $POOLNAME
+	rm -f $FILEDEV
+}
+
+log_onexit cleanup
+
+FILEDEV="$TEST_BASE_DIR/userspace_13709"
+POOLNAME="testpool_13709"
+
+log_assert "ZFS should be able to unlock pools with #13709's failure mode"
+
+log_must bzcat $STF_SUITE/tests/functional/userquota/13709_reproducer.bz2 > $FILEDEV
+
+log_must zpool import -d $FILEDEV $POOLNAME
+
+echo -e 'password\npassword\n' | log_must zfs mount -al
+
+# Cleanup
+cleanup
+
+log_pass "#13709 not happening here"

diff --git a/zfs/tests/zfs-tests/tests/functional/userquota/userspace_send_encrypted.ksh b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_send_encrypted.ksh
new file mode 100755
index 0000000..e9ef0c4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/userquota/userspace_send_encrypted.ksh

@@ -0,0 +1,119 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021, George Amanakis <gamanakis@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/userquota/userquota_common.kshlib
+
+#
+# DESCRIPTION:
+# Sending raw encrypted datasets back to the source dataset succeeds.
+#
+#
+# STRATEGY:
+# 1. Create encrypted source dataset, set userquota and write a file
+# 2. Create base snapshot
+# 3. Write new file, snapshot, get userspace
+# 4. Raw send both snapshots
+# 5. Destroy latest snapshot at source and rollback
+# 6. Unmount, unload key from source
+# 7. Raw send latest snapshot back to source
+# 8. Mount both source and target datasets
+# 9. Verify encrypted datasets support 'zfs userspace' and 'zfs groupspace'
+#	and the accounting is done correctly
+#
+
+function cleanup
+{
+	destroy_pool $POOLNAME
+	rm -f $FILEDEV
+}
+
+log_onexit cleanup
+
+FILEDEV="$TEST_BASE_DIR/userspace_encrypted"
+POOLNAME="testpool$$"
+ENC_SOURCE="$POOLNAME/source"
+ENC_TARGET="$POOLNAME/target"
+
+log_assert "Sending raw encrypted datasets back to the source dataset succeeds."
+
+# Setup pool and create source
+truncate -s 200m $FILEDEV
+log_must zpool create -o feature@encryption=enabled $POOLNAME \
+	$FILEDEV
+log_must eval "echo 'password' | zfs create -o encryption=on" \
+	"-o keyformat=passphrase -o keylocation=prompt " \
+	"$ENC_SOURCE"
+
+# Set user quota and write file
+log_must zfs set userquota@$QUSER1=50m $ENC_SOURCE
+mkmount_writable $ENC_SOURCE
+mntpnt=$(get_prop mountpoint $ENC_SOURCE)
+log_must user_run $QUSER1 mkfile 10m /$mntpnt/file1
+sync
+
+# Snapshot
+log_must zfs snap $ENC_SOURCE@base
+
+# Write new file, snapshot, get userspace
+log_must user_run $QUSER1 mkfile 20m /$mntpnt/file2
+log_must zfs snap $ENC_SOURCE@s1
+
+# Raw send both snapshots
+log_must eval "zfs send -w $ENC_SOURCE@base | zfs recv " \
+	"$ENC_TARGET"
+log_must eval "zfs send -w -i @base $ENC_SOURCE@s1 | zfs recv " \
+	"$ENC_TARGET"
+
+# Destroy latest snapshot at source and rollback
+log_must zfs destroy $ENC_SOURCE@s1
+log_must zfs rollback $ENC_SOURCE@base
+rollback_uspace=$(zfs userspace -Hp $ENC_SOURCE | \
+	awk "/$QUSER1/"' {printf "%d\n", $4 / 1024 / 1024}')
+
+# Unmount, unload key
+log_must zfs umount $ENC_SOURCE
+log_must zfs unload-key -a
+
+# Raw send latest snapshot back to source
+log_must eval "zfs send -w -i @base $ENC_TARGET@s1 | zfs recv " \
+	"$ENC_SOURCE"
+
+#  Mount encrypted datasets and verify they support 'zfs userspace' and
+# 'zfs groupspace' and the accounting is done correctly
+log_must eval "echo 'password' | zfs load-key $ENC_SOURCE"
+log_must eval "echo 'password' | zfs load-key $ENC_TARGET"
+log_must zfs mount $ENC_SOURCE
+log_must zfs mount $ENC_TARGET
+sync
+
+sleep 5
+
+src_uspace=$(zfs userspace -Hp $ENC_SOURCE | \
+	awk "/$QUSER1/"' {printf "%d\n", $4 / 1024 / 1024}')
+tgt_uspace=$(zfs userspace -Hp $ENC_TARGET | \
+	awk "/$QUSER1/"' {printf "%d\n", $4 / 1024 / 1024}')
+log_must test "$src_uspace" -eq "$tgt_uspace"
+log_must test "$rollback_uspace" -ne "$src_uspace"
+
+src_uquota=$(zfs userspace -Hp $ENC_SOURCE | awk "/$QUSER1/"' {print $5}')
+tgt_uquota=$(zfs userspace -Hp $ENC_TARGET | awk "/$QUSER1/"' {print $5}')
+log_must test "$src_uquota" -eq "$tgt_uquota"
+
+# Cleanup
+cleanup
+
+log_pass "Sending raw encrypted datasets back to the source dataset succeeds."

diff --git a/zfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib b/zfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib
index aae2ab4..41ba54a 100644
--- a/zfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib

@@ -43,14 +43,14 @@
 function get_leaf_vd_zap # dsk conf
 {
         typeset section=$(get_conf_section "$1" "$2")
-        echo "$section" | egrep \
+        echo "$section" | grep -E \
                 "com.delphix:vdev_zap_leaf: [0-9]+" | awk '{print $2}'
 }
 
 function get_top_vd_zap # dsk conf
 {
         typeset section=$(get_conf_section "$1" "$2")
-        echo "$section" | egrep \
+        echo "$section" | grep -E \
                 "com.delphix:vdev_zap_top: [0-9]+" | awk '{print $2}'
 }
 

diff --git a/zfs/tests/zfs-tests/tests/functional/write_dirs/Makefile.am b/zfs/tests/zfs-tests/tests/functional/write_dirs/Makefile.am
index 267a5d1..9bdc46b 100644
--- a/zfs/tests/zfs-tests/tests/functional/write_dirs/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/functional/write_dirs/Makefile.am

@@ -4,6 +4,3 @@
 	cleanup.ksh \
 	write_dirs_001_pos.ksh \
 	write_dirs_002_pos.ksh
-
-dist_pkgdata_DATA = \
-	write_dirs.cfg

diff --git a/zfs/tests/zfs-tests/tests/functional/write_dirs/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/write_dirs/cleanup.ksh
index 5052615..aa03208 100755
--- a/zfs/tests/zfs-tests/tests/functional/write_dirs/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/write_dirs/cleanup.ksh

@@ -30,6 +30,8 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/write_dirs/write_dirs.cfg
 
-default_cleanup
+default_cleanup_noexit
+rm -f $TEST_BASE_DIR/disk0
+
+log_pass

diff --git a/zfs/tests/zfs-tests/tests/functional/write_dirs/setup.ksh b/zfs/tests/zfs-tests/tests/functional/write_dirs/setup.ksh
index 3f10c7f..a14eab2 100755
--- a/zfs/tests/zfs-tests/tests/functional/write_dirs/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/write_dirs/setup.ksh

@@ -30,18 +30,9 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/write_dirs/write_dirs.cfg
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
-	log_unsupported "This directory cannot be run on raw files."
-fi
-
-DISK=${DISKS%% *}
-if is_mpath_device $DISK; then
-         delete_partitions
-fi
-log_must set_partition 0 "" $SIZE $DISK
-
-default_setup "${DISK}${SLICE_PREFIX}${SLICE}"
+DISK=$TEST_BASE_DIR/disk0
+truncate -s 2G $DISK
+default_setup $DISK

diff --git a/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs.cfg b/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs.cfg
deleted file mode 100644
index 400d5bc..0000000
--- a/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs.cfg
+++ /dev/null

@@ -1,47 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# Copyright (c) 2013 by Delphix. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-
-verify_runnable "global"
-
-export SIZE="1gb"
-export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
-export DISKSARRAY=$DISKS
-
-if is_linux; then
-	set_slice_prefix
-	set_device_dir
-	export SLICE=1
-else
-	DEV_DSKDIR="/dev"
-	export SLICE_PREFIX="s"
-	export SLICE=0
-fi

diff --git a/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_001_pos.ksh b/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_001_pos.ksh
index 1835eb3..f7519eb 100755
--- a/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_001_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_001_pos.ksh

@@ -30,7 +30,6 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/write_dirs/write_dirs.cfg
 
 #
 # DESCRIPTION:
@@ -38,7 +37,7 @@
 # is full. The zfs file system should be stable and works well.
 #
 # STRATEGY:
-# 1. Create a pool & dateset
+# 1. Create a pool & dataset
 # 2. Make directories in the zfs file system
 # 3. Create 50 big files in each directories
 # 4. Test case exit when the disk is full.
@@ -48,11 +47,10 @@
 
 function cleanup
 {
-	for file in `find $TESTDIR -type f`; do
-		cat /dev/null > $file
-	done
-	log_must sync
-	log_must rm -rf $TESTDIR/*
+	destroy_dataset $TESTPOOL/$TESTFS
+	wait_freeing $TESTPOOL
+	sync_pool $TESTPOOL
+	zfs create -o mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 }
 
 typeset -i retval=0

diff --git a/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_002_pos.ksh
index 9f0b3f3..0738701 100755
--- a/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/write_dirs/write_dirs_002_pos.ksh

@@ -30,7 +30,6 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/write_dirs/write_dirs.cfg
 
 #
 # DESCRIPTION:
@@ -38,7 +37,7 @@
 # is full. The zfs file system should be work well and stable.
 #
 # STRATEGY:
-# 1. Create a pool & dateset
+# 1. Create a pool & dataset
 # 2. Make directories in the zfs file system
 # 3. Create 5000 files in each directories
 # 4. Test case exit when the disk is full
@@ -48,11 +47,10 @@
 
 function cleanup
 {
-	for file in `find $TESTDIR -type f`; do
-		cat /dev/null > $file
-	done
-	log_must sync
-	log_must rm -rf $TESTDIR/*
+	destroy_dataset $TESTPOOL/$TESTFS
+	wait_freeing $TESTPOOL
+	sync_pool $TESTPOOL
+	zfs create -o mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 }
 
 typeset -i retval=0

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/setup.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/setup.ksh
index e623cd7..d9228c4 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/setup.ksh

@@ -32,9 +32,7 @@
 
 # if we're running NIS, turn it off until we clean up
 # (it can cause useradd to take a long time, hitting our TIMEOUT)
-if is_linux; then
-	USED_NIS=false
-else
+if is_illumos; then
 	USES_NIS=false
 	svcs svc:/network/nis/client:default | grep online > /dev/null
 	if [ $? -eq 0 ]
@@ -42,6 +40,8 @@
 		svcadm disable -t svc:/network/nis/client:default
 		USES_NIS=true
 	fi
+else
+	USES_NIS=false
 fi
 
 # Make sure we use a brand new user for this

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh
index e379d15..4393774 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_002_neg.ksh

@@ -57,6 +57,6 @@
 	# create a file
 	log_must touch $TESTDIR/myfile.$$
 	log_mustnot eval "cat $TESTDIR/myfile.$$ not-here.txt > /dev/null 2>&1"
-
-	log_pass "A read of a non-existent xattr fails"
 done
+
+log_pass "A read of a non-existent xattr fails"

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh
index 0a661e9..ba27d04 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_003_neg.ksh

@@ -43,34 +43,40 @@
 #	4. Check that we're unable to write an xattr as a non-root user
 #
 
-function cleanup {
-
-	log_must rm $TESTDIR/myfile.$$
-
+function cleanup
+{
+	rm -f $testfile $tempfile
 }
 
 log_assert "read/write xattr on a file with no permissions fails"
 log_onexit cleanup
 
-log_must touch $TESTDIR/myfile.$$
-create_xattr $TESTDIR/myfile.$$ passwd /etc/passwd
+typeset testfile=$TESTDIR/testfile.$$
+typeset tempfile=/tmp/tempfile.$$
 
-log_must chmod 000 $TESTDIR/myfile.$$
-if is_linux; then
-	user_run $ZFS_USER eval \
-	    "attr -q -g passwd $TESTDIR/myfile.$$ >/tmp/passwd.$$"
-	log_mustnot diff /etc/passwd /tmp/passwd.$$
-	log_must rm /tmp/passwd.$$
+log_must touch $testfile
+create_xattr $testfile passwd /etc/passwd
 
-	user_run $ZFS_USER eval \
-	    "attr -q -s passwd $TESTDIR/myfile.$$ </etc/group"
-	log_must chmod 644 $TESTDIR/myfile.$$
-	attr -q -g passwd $TESTDIR/myfile.$$ >/tmp/passwd.$$
-	log_must diff /etc/passwd /tmp/passwd.$$
-	log_must rm /tmp/passwd.$$
+log_must chmod 000 $testfile
+if is_illumos; then
+	log_mustnot su $ZFS_USER -c "runat $testfile cat passwd"
+	log_mustnot su $ZFS_USER -c "runat $testfile cp /etc/passwd ."
 else
-	log_mustnot su $ZFS_USER -c "runat $TESTDIR/myfile.$$ cat passwd"
-	log_mustnot su $ZFS_USER -c "runat $TESTDIR/myfile.$$ cp /etc/passwd ."
+	log_mustnot user_run $ZFS_USER "
+. $STF_SUITE/include/libtest.shlib
+get_xattr passwd $testfile >$tempfile
+"
+	log_mustnot diff -q /etc/passwd $tempfile
+	log_must rm $tempfile
+
+	log_mustnot user_run $ZFS_USER "
+. $STF_SUITE/include/libtest.shlib
+set_xattr_stdin passwd $testfile </etc/group
+"
+	log_must chmod 644 $testfile
+	get_xattr passwd $testfile >$tempfile
+	log_must diff -q /etc/passwd $tempfile
+	log_must rm $tempfile
 fi
 
 log_pass "read/write xattr on a file with no permissions fails"

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_004_pos.ksh
index 41320a1..786322b 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_004_pos.ksh

@@ -33,11 +33,11 @@
 #
 # DESCRIPTION:
 #
-# Creating files on ufs|ext and tmpfs, and copying those files to ZFS with
-# appropriate cp flags, the xattrs will still be readable.
+# Create files on ufs|ext, copy those files to ZFS with appropriate cp flags,
+# and verify the xattrs will still be readable.
 #
 # STRATEGY:
-#	1. Create files in ufs|ext and tmpfs with xattrs
+#	1. Create files in ufs|ext with xattrs
 #	2. Copy those files to zfs
 #	3. Ensure the xattrs can be read and written
 #	4. Do the same in reverse.
@@ -54,68 +54,70 @@
 	fi
 }
 
-log_assert "Files from $NEWFS_DEFAULT_FS,tmpfs with xattrs copied to zfs retain xattr info."
+log_assert "Files from $NEWFS_DEFAULT_FS with xattrs copied to zfs retain xattr info."
 log_onexit cleanup
 
 # Create a ufs|ext file system that we can work in
 log_must zfs create -V128m $TESTPOOL/$TESTFS/zvol
 block_device_wait
-log_must eval "echo y | newfs $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol > /dev/null 2>&1"
+log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol > /dev/null 2>&1"
 
 log_must mkdir /tmp/$NEWFS_DEFAULT_FS.$$
-if is_linux; then
-	log_must mount -o user_xattr \
-	    $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol /tmp/$NEWFS_DEFAULT_FS.$$
-
-	# Create files in ext and tmpfs, and set some xattrs on them.
-	# Use small values for xattrs for ext compatibility.
-	log_must touch /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$
-
-	log_must touch /tmp/tmpfs-file.$$
-	echo "TEST XATTR" >/tmp/xattr1
-	echo "1234567890" >/tmp/xattr2
-	log_must attr -q -s xattr1 \
-	    /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ </tmp/xattr1
-	log_must attr -q -s xattr2 /tmp/tmpfs-file.$$ </tmp/xattr2
-
-	# copy those files to ZFS
-	log_must cp -a /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ \
-	    $TESTDIR
-	log_must cp -a /tmp/tmpfs-file.$$ $TESTDIR
-
-	# ensure the xattr information has been copied correctly
-	log_must eval "attr -q -g xattr1 $TESTDIR/$NEWFS_DEFAULT_FS-file.$$ \
-	    >/tmp/xattr1.$$"
-
-	log_must diff /tmp/xattr1.$$ /tmp/xattr1
-	log_must eval "attr -q -g xattr2 $TESTDIR/tmpfs-file.$$ >/tmp/xattr2.$$"
-	log_must diff /tmp/xattr2.$$ /tmp/xattr2
-	log_must rm /tmp/xattr1 /tmp/xattr1.$$ /tmp/xattr2 /tmp/xattr2.$$
-
-	log_must umount /tmp/$NEWFS_DEFAULT_FS.$$
-else
+if is_illumos; then
 	log_must mount $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol \
 	    /tmp/$NEWFS_DEFAULT_FS.$$
 
-	# Create files in ufs and tmpfs, and set some xattrs on them.
+	# Create files in ufs, and set some xattrs on them.
 	log_must touch /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$
-	log_must touch /tmp/tmpfs-file.$$
 
 	log_must runat /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ \
 	     cp /etc/passwd .
-	log_must runat /tmp/tmpfs-file.$$ cp /etc/group .
 
 	# copy those files to ZFS
 	log_must cp -@ /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ \
 	    $TESTDIR
-	log_must cp -@ /tmp/tmpfs-file.$$ $TESTDIR
 
 	# ensure the xattr information has been copied correctly
 	log_must runat $TESTDIR/$NEWFS_DEFAULT_FS-file.$$ \
 	    diff passwd /etc/passwd
-	log_must runat $TESTDIR/tmpfs-file.$$ diff group /etc/group
+
+	log_must umount /tmp/$NEWFS_DEFAULT_FS.$$
+else
+	if is_linux; then
+		options="-o user_xattr"
+	fi
+	log_must mount ${options:+""} \
+	    $ZVOL_DEVDIR/$TESTPOOL/$TESTFS/zvol /tmp/$NEWFS_DEFAULT_FS.$$
+
+	# Create files in ext, and set some xattrs on them.
+	# Use small values for xattrs for ext compatibility.
+	log_must touch /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$
+
+	echo "TEST XATTR" >/tmp/xattr1
+
+	log_must set_xattr_stdin xattr1 \
+	    /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ </tmp/xattr1
+
+	# copy those files to ZFS
+	if is_freebsd; then
+		# cp does not preserve extattrs on FreeBSD
+		export TAPE="-"
+		log_must eval "tar cC /tmp/$NEWFS_DEFAULT_FS.$$ \
+		    $NEWFS_DEFAULT_FS-file.$$ | tar xC $TESTDIR"
+	else
+		log_must cp -a \
+		    /tmp/$NEWFS_DEFAULT_FS.$$/$NEWFS_DEFAULT_FS-file.$$ \
+		    $TESTDIR
+	fi
+
+	# ensure the xattr information has been copied correctly
+	log_must eval "get_xattr xattr1 $TESTDIR/$NEWFS_DEFAULT_FS-file.$$ \
+	    >/tmp/xattr1.$$"
+	log_must diff /tmp/xattr1.$$ /tmp/xattr1
+	log_must rm $TESTDIR/$NEWFS_DEFAULT_FS-file.$$
+	log_must rm /tmp/xattr1 /tmp/xattr1.$$
 
 	log_must umount /tmp/$NEWFS_DEFAULT_FS.$$
 fi
 
-log_pass "Files from $NEWFS_DEFAULT_FS,tmpfs with xattrs copied to zfs retain xattr info."
+log_pass "Files from $NEWFS_DEFAULT_FS with xattrs copied to zfs retain xattr info."

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_007_neg.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_007_neg.ksh
index 201f876..5189b62 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_007_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_007_neg.ksh

@@ -52,7 +52,6 @@
 	log_must rm $TEST_BASE_DIR/output.$$
 	[[ -e $TEST_BASE_DIR/expected_output.$$ ]]  && log_must rm  \
 	$TEST_BASE_DIR/expected_output.$$
-
 }
 
 log_assert "create/write xattr on a snapshot fails"
@@ -69,20 +68,20 @@
 log_must zfs snapshot $TESTPOOL/$TESTFS@snap
 
 # we shouldn't be able to alter the first file's xattr
-if is_linux; then
-	log_mustnot eval "attr -s cp $TESTDIR/.zfs/snapshot/snap/myfile.$$ \
-	     </etc/passwd  > $TEST_BASE_DIR/output.$$  2>&1"
-	log_must grep  -i  Read-only  $TEST_BASE_DIR/output.$$
-	log_must eval "attr -q -l $TESTDIR/.zfs/snapshot/snap/myfile2.$$ \
-	    > $TEST_BASE_DIR/output.$$  2>&1"
-	log_must eval "attr -q -l $TESTDIR/myfile2.$$ > $TEST_BASE_DIR/expected_output.$$"
-else
+if is_illumos; then
 	log_mustnot eval " runat $TESTDIR/.zfs/snapshot/snap/myfile.$$ \
 	    cp /etc/passwd .  > $TEST_BASE_DIR/output.$$  2>&1"
 	log_must grep  -i  Read-only  $TEST_BASE_DIR/output.$$
 	log_must eval "runat $TESTDIR/.zfs/snapshot/snap/myfile2.$$  \
 	    ls > $TEST_BASE_DIR/output.$$  2>&1"
 	create_expected_output  $TEST_BASE_DIR/expected_output.$$ SUNWattr_ro SUNWattr_rw
+else
+	log_mustnot eval "set_xattr_stdin cp $TESTDIR/.zfs/snapshot/snap/myfile.$$ \
+	     </etc/passwd  > $TEST_BASE_DIR/output.$$  2>&1"
+	log_must grep  -i  Read-only  $TEST_BASE_DIR/output.$$
+	log_must eval "ls_xattr $TESTDIR/.zfs/snapshot/snap/myfile2.$$ \
+	    > $TEST_BASE_DIR/output.$$  2>&1"
+	log_must eval "ls_xattr $TESTDIR/myfile2.$$ > $TEST_BASE_DIR/expected_output.$$"
 fi
 
 log_must diff $TEST_BASE_DIR/output.$$ $TEST_BASE_DIR/expected_output.$$

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_008_pos.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_008_pos.ksh
index 45d24ac..099a610 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_008_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_008_pos.ksh

@@ -49,10 +49,6 @@
 	done
 }
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 log_assert "special . and .. dirs work as expected for xattrs"
 log_onexit cleanup
 

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_009_neg.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_009_neg.ksh
index f64f7e4..c9d636e 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_009_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_009_neg.ksh

@@ -40,10 +40,6 @@
 #	3. Verify we're unable to create a hard link
 #
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 function cleanup {
 
 	log_must rm $TESTDIR/myfile.$$

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_010_neg.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_010_neg.ksh
index db218c4..43502bf 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_010_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_010_neg.ksh

@@ -41,10 +41,6 @@
 #
 #
 
-if is_linux; then
-	log_unsupported "Test case isn't applicable to Linux"
-fi
-
 function cleanup {
 
 	log_must rm $TESTDIR/myfile.$$

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh
index 246f077..fdfefbf 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_011_pos.ksh

@@ -65,9 +65,7 @@
 # and try various cpio options extracting the archives
 # with and without xattr support, checking for correct behaviour
 
-if is_linux; then
-	log_note "Checking cpio - unsupported"
-else
+if is_illumos; then
 	log_note "Checking cpio"
 	log_must touch $TESTDIR/cpio.$$
 	create_xattr $TESTDIR/cpio.$$ passwd /etc/passwd
@@ -90,11 +88,15 @@
 	log_must cpio -iu@ < $TEST_BASE_DIR/noxattr.$$.cpio
 	log_mustnot eval "runat $TESTDIR/cpio.$$ cat passwd > /dev/null 2>&1"
 	log_must rm $TESTDIR/cpio.$$ $TEST_BASE_DIR/xattr.$$.cpio $TEST_BASE_DIR/noxattr.$$.cpio
+else
+	log_note "Checking cpio - unsupported"
 fi
 
-log_note "Checking cp"
 # check that with the right flag, the xattr is preserved
-if is_linux; then
+if is_freebsd; then
+	log_note "Checking cp - unsupported"
+elif is_linux; then
+	log_note "Checking cp"
 	log_must cp -a $TESTDIR/myfile.$$ $TESTDIR/myfile2.$$
 
 	compare_xattrs $TESTDIR/myfile.$$ $TESTDIR/myfile2.$$ passwd
@@ -102,9 +104,10 @@
 
 	# without the right flag, there should be no xattr
 	log_must cp $TESTDIR/myfile.$$ $TESTDIR/myfile2.$$
-	log_mustnot attr -q -g passwd $TESTDIR/myfile2.$$
+	log_mustnot get_xattr passwd $TESTDIR/myfile2.$$
 	log_must rm $TESTDIR/myfile2.$$
 else
+	log_note "Checking cp"
 	log_must cp -@ $TESTDIR/myfile.$$ $TESTDIR/myfile2.$$
 
 	compare_xattrs $TESTDIR/myfile.$$ $TESTDIR/myfile2.$$ passwd
@@ -118,9 +121,7 @@
 
 # create a file without xattrs, and check that find -xattr only finds
 # our test file that has an xattr.
-if is_linux; then
-	log_note "Checking find - unsupported"
-else
+if is_illumos; then
 	log_note "Checking find"
 	log_must mkdir $TESTDIR/noxattrs
 	log_must touch $TESTDIR/noxattrs/no-xattr
@@ -132,6 +133,8 @@
 	[[ $? -eq 0 ]] && \
 		log_fail "find -xattr found a file that didn't have an xattr."
 	log_must rm -rf $TESTDIR/noxattrs
+else
+	log_note "Checking find - unsupported"
 fi
 
 log_note "Checking mv"
@@ -143,9 +146,7 @@
 verify_xattr $TESTDIR/mvfile2.$$ passwd /etc/passwd
 log_must rm $TESTDIR/mvfile2.$$
 
-if is_linux; then
-	log_note "Checking pax - unsupported"
-else
+if is_illumos; then
 	log_note "Checking pax"
 	log_must touch $TESTDIR/pax.$$
 	create_xattr $TESTDIR/pax.$$ passwd /etc/passwd
@@ -172,39 +173,12 @@
 	log_must pax -r -f $TESTDIR/xattr.pax $TESTDIR
 	log_mustnot eval "runat $TESTDIR/pax.$$ cat passwd > /dev/null 2>&1"
 	log_must rm $TESTDIR/pax.$$ $TESTDIR/noxattr.pax $TESTDIR/xattr.pax
+else
+	log_note "Checking pax - unsupported"
 fi
 
 log_note "Checking tar"
-if is_linux; then
-	log_must touch $TESTDIR/tar.$$
-	create_xattr $TESTDIR/tar.$$ passwd /etc/passwd
-
-	log_must cd $TESTDIR
-
-	log_must tar -cf noxattr.tar tar.$$
-	log_must tar --xattrs -cf xattr.tar tar.$$
-	log_must rm $TESTDIR/tar.$$
-
-	# we should have no xattr here
-	log_must tar --no-xattrs -xf xattr.tar
-	log_mustnot attr -q -g passwd $TESTDIR/tar.$$
-	log_must rm $TESTDIR/tar.$$
-
-	# we should have an xattr here
-	log_must tar --xattrs -xf xattr.tar
-	verify_xattr tar.$$ passwd /etc/passwd
-	log_must rm $TESTDIR/tar.$$
-
-	# we should have no xattr here
-	log_must tar --no-xattrs -xf $TESTDIR/noxattr.tar
-	log_mustnot attr -q -g passwd $TESTDIR/tar.$$
-	log_must rm $TESTDIR/tar.$$
-
-	# we should have no xattr here
-	log_must tar --xattrs -xf $TESTDIR/noxattr.tar
-	log_mustnot attr -q -g passwd $TESTDIR/tar.$$
-	log_must rm $TESTDIR/tar.$$ $TESTDIR/noxattr.tar $TESTDIR/xattr.tar
-else
+if is_illumos; then
 	log_must touch $TESTDIR/tar.$$
 	create_xattr $TESTDIR/tar.$$ passwd /etc/passwd
 
@@ -233,6 +207,35 @@
 	log_must tar x@f $TESTDIR/noxattr.tar
 	log_mustnot eval "runat $TESTDIR/tar.$$ cat passwd > /dev/null 2>&1"
 	log_must rm $TESTDIR/tar.$$ $TESTDIR/noxattr.tar $TESTDIR/xattr.tar
+else
+	log_must touch $TESTDIR/tar.$$
+	create_xattr $TESTDIR/tar.$$ passwd /etc/passwd
+
+	log_must cd $TESTDIR
+
+	log_must tar --no-xattrs -cf noxattr.tar tar.$$
+	log_must tar --xattrs -cf xattr.tar tar.$$
+	log_must rm $TESTDIR/tar.$$
+
+	# we should have no xattr here
+	log_must tar --no-xattrs -xf xattr.tar
+	log_mustnot get_xattr passwd $TESTDIR/tar.$$
+	log_must rm $TESTDIR/tar.$$
+
+	# we should have an xattr here
+	log_must tar --xattrs -xf xattr.tar
+	verify_xattr tar.$$ passwd /etc/passwd
+	log_must rm $TESTDIR/tar.$$
+
+	# we should have no xattr here
+	log_must tar --no-xattrs -xf $TESTDIR/noxattr.tar
+	log_mustnot get_xattr passwd $TESTDIR/tar.$$
+	log_must rm $TESTDIR/tar.$$
+
+	# we should have no xattr here
+	log_must tar --xattrs -xf $TESTDIR/noxattr.tar
+	log_mustnot get_xattr passwd $TESTDIR/tar.$$
+	log_must rm $TESTDIR/tar.$$ $TESTDIR/noxattr.tar $TESTDIR/xattr.tar
 fi
 
 log_assert "Basic applications work with xattrs: cpio cp find mv pax tar"

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_012_pos.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_012_pos.ksh
index 7241caf..693ca0a 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_012_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_012_pos.ksh

@@ -47,6 +47,9 @@
 
 function cleanup {
 	log_must rm $TESTDIR/myfile.$$
+	if is_freebsd; then
+		log_must rm /tmp/xattr.$$
+	fi
 }
 
 function get_pool_size {
@@ -79,7 +82,17 @@
 
 FS_SIZE=$(zfs get -p -H -o value used $TESTPOOL/$TESTFS)
 
-if is_linux; then
+if is_freebsd; then
+	# FreeBSD setextattr has awful scaling with respect to input size.
+	# It reallocs after every 1024 bytes. For now we'll just break up
+	# the 200MB into 10 20MB attributes, but this test could be revisited
+	# if someone cared about large extattrs and improves setextattr -i.
+	log_must mkfile 20m /tmp/xattr.$$
+	for i in {0..10}; do
+		log_must eval "set_xattr_stdin xattr$i $TESTDIR/myfile.$$ \
+		    < /tmp/xattr.$$"
+	done
+elif is_linux; then
 	# Linux setxattr() syscalls limits individual xattrs to 64k.  Create
 	# 100 files, with 128 xattrs each of size 16k.  100*128*16k=200m
 	log_must xattrtest -k -f 100 -x 128 -s 16384 -p $TESTDIR

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_013_pos.ksh b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_013_pos.ksh
index e9eed69..efa9faa 100755
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_013_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_013_pos.ksh

@@ -65,15 +65,7 @@
 log_must zfs mount -o noxattr $TESTPOOL/$TESTFS
 
 # check that we can't perform xattr operations
-if is_linux; then
-	log_mustnot attr -q -g passwd $TESTDIR/myfile.$$
-	log_mustnot attr -q -r passwd $TESTDIR/myfile.$$
-	log_mustnot attr -q -s passwd $TESTDIR/myfile.$$ </etc/passwd
-
-	log_must touch $TESTDIR/new.$$
-	log_mustnot attr -q -s passwd $TESTDIR/new.$$ </etc/passwd
-	log_mustnot attr -q -r passwd $TESTDIR/new.$$
-else
+if is_illumos; then
 	log_mustnot eval "runat $TESTDIR/myfile.$$ cat passwd > /dev/null 2>&1"
 	log_mustnot eval "runat $TESTDIR/myfile.$$ rm passwd > /dev/null 2>&1"
 	log_mustnot eval "runat $TESTDIR/myfile.$$ cp /etc/passwd . \
@@ -83,6 +75,14 @@
 	log_mustnot eval "runat $TESTDIR/new.$$ cp /etc/passwd . \
 	    > /dev/null 2>&1"
 	log_mustnot eval "runat $TESTDIR/new.$$ rm passwd > /dev/null 2>&1"
+else
+	log_mustnot get_xattr passwd $TESTDIR/myfile.$$
+	log_mustnot rm_xattr passwd $TESTDIR/myfile.$$
+	log_mustnot set_xattr_stdin passwd $TESTDIR/myfile.$$ </etc/passwd
+
+	log_must touch $TESTDIR/new.$$
+	log_mustnot set_xattr_stdin passwd $TESTDIR/new.$$ </etc/passwd
+	log_mustnot rm_xattr passwd $TESTDIR/new.$$
 fi
 
 # now mount the filesystem again as normal
@@ -94,10 +94,10 @@
 
 # there should be no xattr on the file we created while the fs was mounted
 # -o noxattr
-if is_linux; then
-	log_mustnot attr -q -g passwd $TESTDIR/new.$$
-else
+if is_illumos; then
 	log_mustnot eval "runat $TESTDIR/new.$$ cat passwd > /dev/null 2>&1"
+else
+	log_mustnot get_xattr passwd $TESTDIR/new.$$
 fi
 create_xattr $TESTDIR/new.$$ passwd /etc/passwd
 

diff --git a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_common.kshlib b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_common.kshlib
index b80436b..bc06f47 100644
--- a/zfs/tests/zfs-tests/tests/functional/xattr/xattr_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/xattr/xattr_common.kshlib

@@ -39,14 +39,14 @@
 	typeset XATTR_NAME=$2
 	typeset XATTR_CONTENTS=$3
 
-	if is_linux; then
-		log_mustnot attr -q -g $XATTR_NAME $FILE
-		log_must attr -q -s $XATTR_NAME $FILE < $XATTR_CONTENTS
-	else
+	if is_illumos; then
 		# read any empty xattr on that file
 		log_must runat $FILE ls
 		# create the xattr
 		log_must runat $FILE cp $XATTR_CONTENTS $XATTR_NAME
+	else
+		log_mustnot get_xattr $XATTR_NAME $FILE
+		log_must set_xattr_stdin $XATTR_NAME $FILE < $XATTR_CONTENTS
 	fi
 
 	verify_xattr $FILE $XATTR_NAME $XATTR_CONTENTS
@@ -59,12 +59,12 @@
 	typeset FILE2=$2
 	typeset XATTR_NAME=$3
 
-	if is_linux; then
-		attr -q -g $XATTR_NAME $FILE1 > $TEST_BASE_DIR/file1.$$
-		attr -q -g $XATTR_NAME $FILE2 > $TEST_BASE_DIR/file2.$$
-	else
+	if is_illumos; then
 		runat $FILE1 cat $XATTR_NAME > $TEST_BASE_DIR/file1.$$
 		runat $FILE2 cat $XATTR_NAME > $TEST_BASE_DIR/file2.$$
+	else
+		get_xattr $XATTR_NAME $FILE1 > $TEST_BASE_DIR/file1.$$
+		get_xattr $XATTR_NAME $FILE2 > $TEST_BASE_DIR/file2.$$
 	fi
 
 	log_must diff $TEST_BASE_DIR/file1.$$ $TEST_BASE_DIR/file2.$$
@@ -77,12 +77,12 @@
 	typeset XATTR_CONTENTS=$3
 
 	# read the xattr, writing it to a temp file
-	if is_linux; then
-		log_must eval \
-		    "attr -q -g $XATTR_NAME $FILE > $TEST_BASE_DIR/$XATTR_NAME.$$"
-	else
+	if is_illumos; then
 		log_must eval \
 		    "runat $FILE cat $XATTR_NAME > $TEST_BASE_DIR/$XATTR_NAME.$$ 2>&1"
+	else
+		log_must eval \
+		    "get_xattr $XATTR_NAME $FILE > $TEST_BASE_DIR/$XATTR_NAME.$$"
 	fi
 
 	log_must diff $XATTR_CONTENTS $TEST_BASE_DIR/$XATTR_NAME.$$
@@ -94,12 +94,12 @@
         typeset XATTR_NAME=$2
 
         # delete the xattr
-        if is_linux; then
-		log_must attr -r $XATTR_NAME $FILE
-		log_mustnot attr -q -g $XATTR_NAME $FILE
-	else
+        if is_illumos; then
 	        log_must runat $FILE rm $XATTR_NAME
 	        log_mustnot eval "runat $FILE ls $XATTR_NAME > /dev/null 2>&1"
+	else
+		log_must rm_xattr $XATTR_NAME $FILE
+		log_mustnot get_xattr $XATTR_NAME $FILE
 	fi
 }
 
@@ -108,14 +108,14 @@
         typeset FILE=$1
         typeset XATTR_NAME=$2
 
-	if is_linux; then
-                log_must attr -q -s $XATTR_NAME $FILE < /etc/passwd
-		log_must eval \
-		    "attr -q -g $XATTR_NAME $FILE > $TEST_BASE_DIR/$XATTR_NAME.$$"
-	else
+	if is_illumos; then
 	        log_must eval "runat $FILE dd if=/etc/passwd of=$XATTR_NAME"
 	        log_must eval \
 		    "runat $FILE cat $XATTR_NAME > $TEST_BASE_DIR/$XATTR_NAME.$$ 2>&1"
+	else
+		log_must set_xattr_stdin $XATTR_NAME $FILE < /etc/passwd
+		log_must eval \
+		    "get_xattr $XATTR_NAME $FILE > $TEST_BASE_DIR/$XATTR_NAME.$$"
 	fi
         log_must dd if=/etc/passwd of=$TEST_BASE_DIR/passwd_dd.$$
         log_must diff $TEST_BASE_DIR/passwd_dd.$$ $TEST_BASE_DIR/$XATTR_NAME.$$
@@ -123,15 +123,15 @@
 }
 
 # this function is to create the expected output
-function create_expected_output  { # expected_output_file  contents_of_the_output
-   typeset FILE=$1
-   shift
-   if [[ -f $FILE ]]; then
-      log_must rm $FILE
-   fi
+function create_expected_output { # expected_output_file  contents_of_the_output
+	typeset FILE=$1
+	shift
+	if [[ -f $FILE ]]; then
+		log_must rm $FILE
+	fi
 
-   for line in $@
-   do
-      log_must eval "echo $line >> $FILE"
-   done
- }
+	for line in $@
+	do
+		log_must eval "echo $line >> $FILE"
+	done
+}

diff --git a/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/Makefile.am b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/Makefile.am
new file mode 100644
index 0000000..36d08a4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/Makefile.am

@@ -0,0 +1,5 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/zpool_influxdb
+dist_pkgdata_SCRIPTS = \
+	setup.ksh \
+	cleanup.ksh \
+	zpool_influxdb.ksh

diff --git a/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/cleanup.ksh
new file mode 100755
index 0000000..a8cd2e4
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/cleanup.ksh

@@ -0,0 +1,29 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at
+# https://opensource.org/licenses/CDDL-1.0
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Richard Elling
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/setup.ksh b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/setup.ksh
new file mode 100755
index 0000000..43f2c8c
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/setup.ksh

@@ -0,0 +1,29 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at
+# https://opensource.org/licenses/CDDL-1.0
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Richard Elling
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_raidz_setup $DISKS

diff --git a/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/zpool_influxdb.ksh b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/zpool_influxdb.ksh
new file mode 100755
index 0000000..495a4a3
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/functional/zpool_influxdb/zpool_influxdb.ksh

@@ -0,0 +1,71 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at
+# https://opensource.org/licenses/CDDL-1.0
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Richard Elling
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+typeset tmpfile=$TEST_BASE_DIR/zpool_influxdb.out.$$
+function cleanup
+{
+	if [[ -f $tmpfile ]]; then
+		rm -f $tmpfile
+	fi
+}
+log_onexit cleanup
+
+log_assert "zpool_influxdb gathers statistics"
+
+if ! is_global_zone ; then
+	TESTPOOL=${TESTPOOL%%/*}
+fi
+
+function check_for
+{
+    grep "^${1}," $tmpfile >/dev/null 2>/dev/null
+    if [ $? -ne 0 ]; then
+        log_fail "cannot find stats for $1"
+    fi
+}
+
+# by default, all stats and histograms for all pools
+log_must zpool_influxdb > $tmpfile
+
+STATS="
+zpool_io_size
+zpool_latency
+zpool_stats
+zpool_vdev_queue
+zpool_vdev_stats
+"
+for stat in $STATS; do
+    check_for $stat
+done
+
+# scan stats aren't expected to be there until after a scan has started
+zpool scrub $TESTPOOL
+zpool_influxdb > $tmpfile
+check_for zpool_scan_stats
+
+log_pass "zpool_influxdb gathers statistics"

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/setup.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/setup.ksh
index 7ac3815..dcdfc86 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/setup.ksh

@@ -42,8 +42,7 @@
 
 default_zvol_setup $DISK $VOLSIZE $BLOCKSIZE
 
-echo "y" | newfs -v ${ZVOL_RDEVDIR}/$TESTPOOL/$TESTVOL >/dev/null 2>&1
-(( $? != 0 )) && log_fail "Unable to newfs(1M) $TESTPOOL/$TESTVOL"
+log_must new_fs ${ZVOL_RDEVDIR}/$TESTPOOL/$TESTVOL
 
 log_must mkdir $TESTDIR
 log_must mount ${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL $TESTDIR

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/zvol_ENOSPC.cfg b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/zvol_ENOSPC.cfg
index 84986b8..8a99225 100644
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/zvol_ENOSPC.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/zvol_ENOSPC.cfg

@@ -32,17 +32,10 @@
 
 verify_runnable "global"
 
-#export SIZE="1gb"
 export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
 export DISKSARRAY=$DISKS
 
-
 if is_linux; then
 	set_slice_prefix
 	set_device_dir
-#	export SLICE=1
-else
-	DEV_DSKDIR="/dev"
-	export SLICE_PREFIX="s"
-#	export SLICE=0
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli.cfg b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli.cfg
index bede669..8a99225 100644
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli.cfg

@@ -35,11 +35,7 @@
 export DISK_ARRAY_NUM=$(echo ${DISKS} | nawk '{print NF}')
 export DISKSARRAY=$DISKS
 
-
 if is_linux; then
 	set_slice_prefix
 	set_device_dir
-else
-	DEV_DSKDIR="/dev"
-	export SLICE_PREFIX="s"
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli_002_pos.ksh
index e5b6923..7b87497 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_cli/zvol_cli_002_pos.ksh

@@ -45,7 +45,7 @@
 function cleanup
 {
 	datasetexists $TESTPOOL/$LONGVOLNAME && \
-		zfs destroy $TESTPOOL/$LONGVOLNAME
+		destroy_dataset $TESTPOOL/$LONGVOLNAME
 }
 
 log_onexit cleanup

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
index 5f3b47e..4f74c9b 100644
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib

@@ -63,9 +63,8 @@
 #
 function default_zvol_cleanup
 {
-        if datasetexists $TESTPOOL/$TESTVOL ; then
-		log_must zfs destroy $TESTPOOL/$TESTVOL
-	fi
+	datasetexists $TESTPOOL/$TESTVOL && \
+		destroy_dataset $TESTPOOL/$TESTVOL
 
         destroy_pool $TESTPOOL
 }
@@ -87,15 +86,13 @@
 
 	log_must zfs set volsize=64m $volume
 
-	if ! is_linux; then
-		output=$(dumpadm -d /dev/zvol/dsk/$volume 2>&1 | \
-				tail -1 | awk '{print $3}')
+	output=$(dumpadm -d /dev/zvol/dsk/$volume 2>&1 | \
+			tail -1 | awk '{print $3}')
 
-		if [[ -n $output ]]; then
-			(( output = output / 1024 / 1024 ))
-			(( output = output + output / 5 ))
-			log_must zfs set volsize=${output}m $volume
-		fi
+	if [[ -n $output ]]; then
+		(( output = output / 1024 / 1024 ))
+		(( output = output + output / 5 ))
+		log_must zfs set volsize=${output}m $volume
 	fi
 
 	return 0

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_001_neg.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_001_neg.ksh
index fa70044..25325dd 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_001_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_001_neg.ksh

@@ -45,7 +45,7 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
+if ! is_physical_device $DISKS; then
 	log_unsupported "This directory cannot be run on raw files."
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_002_pos.ksh
index 203b552..2ecb00d 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_002_pos.ksh

@@ -49,7 +49,7 @@
 function cleanup
 {
 	snapexists $TESTPOOL/$TESTVOL@snap && \
-		zfs destroy $TESTPOOL/$TESTVOL@snap
+		destroy_dataset $TESTPOOL/$TESTVOL@snap
 
 	ismounted $TESTDIR $NEWFS_DEFAULT_FS
 	(( $? == 0 )) && log_must umount $TESTDIR
@@ -66,8 +66,7 @@
 
 log_must zfs set volsize=128m $TESTPOOL/$TESTVOL
 
-echo "y" | newfs -v ${ZVOL_RDEVDIR}/$TESTPOOL/$TESTVOL >/dev/null 2>&1
-(( $? != 0 )) && log_fail "Unable to newfs(1M) $TESTPOOL/$TESTVOL"
+log_must new_fs ${ZVOL_RDEVDIR}/$TESTPOOL/$TESTVOL
 
 log_must mount ${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL $TESTDIR
 
@@ -84,7 +83,7 @@
 	(( fn = fn + 1 ))
 done
 
-if is_linux; then
+if is_linux || is_freebsd ; then
 	log_must sync
 else
 	log_must lockfs -f $TESTDIR

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_003_neg.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_003_neg.ksh
index 8cf9ffe..2b8e509 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_003_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_003_neg.ksh

@@ -46,7 +46,7 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
+if ! is_physical_device $DISKS; then
 	log_unsupported "This directory cannot be run on raw files."
 fi
 
@@ -74,10 +74,7 @@
 safe_dumpadm $voldev
 
 unset NOINUSE_CHECK
-echo "y" | newfs -v $voldev > /dev/null 2>&1
-if (( $? == 0 )) ; then
-	log_fail "newfs on dump zvol succeeded unexpectedly"
-fi
+log_mustnot eval "new_fs $voldev > /dev/null 2>&1"
 
 log_mustnot zpool create $TESTPOOL1 $voldev
 

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_004_pos.ksh
index 6fea074..e0dce0c 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_004_pos.ksh

@@ -45,7 +45,7 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
+if ! is_physical_device $DISKS; then
 	log_unsupported "This directory cannot be run on raw files."
 fi
 
@@ -65,9 +65,8 @@
 
 	typeset snap
 	for snap in snap0 snap1 ; do
-		if datasetexists $TESTPOOL/$TESTVOL@$snap ; then
-			log_must zfs destroy $TESTPOOL/$TESTVOL@$snap
-		fi
+		datasetexists $TESTPOOL/$TESTVOL@$snap && \
+			 destroy_dataset $TESTPOOL/$TESTVOL@$snap
 	done
 	zfs set volsize=$volsize $TESTPOOL/$TESTVOL
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_005_neg.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_005_neg.ksh
index 8248608..48dfe6d 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_005_neg.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_005_neg.ksh

@@ -45,7 +45,7 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
+if ! is_physical_device $DISKS; then
 	log_unsupported "This directory cannot be run on raw files."
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_006_pos.ksh
index ad841e2..08726a7 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_006_pos.ksh

@@ -45,7 +45,7 @@
 
 verify_runnable "global"
 
-if ! $(is_physical_device $DISKS) ; then
+if ! is_physical_device $DISKS; then
 	log_unsupported "This directory cannot be run on raw files."
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib
index 1b049a9..b69d2ce 100644
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib

@@ -37,7 +37,6 @@
 #
 function udev_wait
 {
-	sleep 1
 	udevadm trigger --action=change
 	udevadm settle
 	for i in {1..3}; do
@@ -58,6 +57,7 @@
 function udev_cleanup
 {
 	log_note "Pruning broken ZVOL symlinks ..."
+
 	udevadm settle
 	# find all dangling links and delete them
 	find -L "${ZVOL_DEVDIR}" -type l -print -delete
@@ -76,8 +76,9 @@
 	# because there are other commands (zfs snap, zfs inherit, zfs destroy)
 	# that can affect device nodes
 	for i in {1..3}; do
-		udev_wait
-		[[ -b "$device" ]] && return 0
+		is_linux && udev_wait
+		block_device_wait "$device"
+		is_disk_device "$device" && return 0
 	done
 	log_fail "$device does not exist as a block device"
 }
@@ -93,8 +94,9 @@
 	# because there are other commands (zfs snap, zfs inherit, zfs destroy)
 	# that can affect device nodes
 	for i in {1..3}; do
-		udev_wait
-		[[ ! -e "$device" ]] && return 0
+		is_linux && udev_wait
+		block_device_wait
+		is_disk_device "$device" || return 0
 	done
 	log_fail "$device exists when not expected"
 }
@@ -124,17 +126,17 @@
 {
 	typeset device="$1"
 
-	if [[ ! -b "$device" ]]; then
+	if ! is_disk_device "$device"; then
 		log_fail "$device is not a block device"
 	fi
 	# create a small dummy partition
-	set_partition 0 1 1m $device
+	set_partition 0 "" 1m $device
 	# verify we can access the partition on the device
 	devname="$(readlink -f "$device")"
-	if is_linux; then
-		[[ -b "$devname""p1" ]]
+	if is_linux || is_freebsd; then
+		is_disk_device "$devname""p1"
 	else
-		[[ -b "$devname""s0" ]]
+		is_disk_device "$devname""s0"
 	fi
 	return $?
 }

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh
index f851229..e9b7bbc 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh

@@ -39,7 +39,7 @@
 	for ds in "$SENDFS" "$ZVOL" "$ZVOL-renamed"; do
 		destroy_dataset "$ds" '-rf'
 	done
-	udev_wait
+	block_device_wait
 }
 
 log_assert "Verify 'zfs rename' works on a ZVOL already in use as block device"
@@ -54,8 +54,8 @@
 log_must zfs create -V $VOLSIZE "$ZVOL"
 
 # 2. Create a filesystem on the ZVOL device and mount it
-udev_wait
-log_must eval "echo y | newfs $ZDEV >/dev/null 2>&1"
+block_device_wait "$ZDEV"
+log_must eval "new_fs $ZDEV >/dev/null 2>&1"
 log_must mkdir "$MNTPFS"
 log_must mount "$ZDEV" "$MNTPFS"
 

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh
index 8d95bfa..9d3fb05 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_snapdev.ksh

@@ -43,11 +43,11 @@
 
 function cleanup
 {
-	datasetexists $VOLFS && log_must zfs destroy -r $VOLFS
-	datasetexists $ZVOL && log_must zfs destroy -r $ZVOL
+	datasetexists $VOLFS && destroy_dataset $VOLFS -r
+	datasetexists $ZVOL && destroy_dataset $ZVOL -r
 	log_must zfs inherit snapdev $TESTPOOL
 	block_device_wait
-	udev_cleanup
+	is_linux && udev_cleanup
 }
 
 log_assert "Verify that ZFS volume property 'snapdev' works as expected."

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_volmode.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_volmode.ksh
index 934d894..af808dc 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_volmode.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_volmode.ksh

@@ -43,20 +43,17 @@
 # 7. Verify "volmode" behaves correctly at import time
 # 8. Verify "volmode" behaves accordingly to zvol_inhibit_dev (Linux only)
 #
-# NOTE: changing volmode may need to remove minors, which could be open, so call
-#       udev_wait() before we "zfs set volmode=<value>".
 
 verify_runnable "global"
 
 function cleanup
 {
-	datasetexists $VOLFS && log_must_busy zfs destroy -r $VOLFS
-	datasetexists $ZVOL && log_must_busy zfs destroy -r $ZVOL
-	log_must zfs inherit volmode $TESTPOOL
-	udev_wait
+	datasetexists $VOLFS && destroy_dataset $VOLFS -r
+	datasetexists $ZVOL && destroy_dataset $ZVOL -r
+	zfs inherit volmode $TESTPOOL
 	sysctl_inhibit_dev 0
 	sysctl_volmode 1
-	udev_cleanup
+	is_linux && udev_cleanup
 }
 
 #
@@ -68,8 +65,7 @@
 
 	if is_linux; then
 		log_note "Setting zvol_inhibit_dev tunable to $value"
-		log_must eval "echo $value > "\
-		    "/sys/module/zfs/parameters/zvol_inhibit_dev"
+		log_must set_tunable32 VOL_INHIBIT_DEV $value
 	fi
 }
 
@@ -81,14 +77,33 @@
 	typeset value="$1"
 
 	log_note "Setting volmode tunable to $value"
-	if is_linux; then
-		echo "$value" > '/sys/module/zfs/parameters/zvol_volmode'
-	else
-		sysctl 'vfs.zfs.vol.mode' "$value"
-	fi
-	if [[ $? -ne 0 ]]; then
-		log_fail "Unable to set volmode tunable to $value"
-	fi
+	log_must set_tunable32 VOL_MODE $value
+}
+
+#
+# Exercise open and close, read and write operations
+#
+function test_io # dev
+{
+	typeset dev=$1
+
+	log_must dd if=$dev of=/dev/null count=1
+	log_must dd if=/dev/zero of=$dev count=1
+}
+
+#
+# Changing volmode may need to remove minors, which could be open, so call
+# udev_wait() before we "zfs set volmode=<value>".  This ensures no udev
+# process has the zvol open (i.e. blkid) and the zvol_remove_minor_impl()
+# function won't skip removing the in use device.
+#
+function set_volmode # value ds
+{
+	typeset value="$1"
+	typeset ds="$2"
+
+	is_linux && udev_wait
+	log_must zfs set volmode="$value" "$ds"
 }
 
 log_assert "Verify that ZFS volume property 'volmode' works as intended"
@@ -96,14 +111,18 @@
 
 VOLFS="$TESTPOOL/volfs"
 ZVOL="$TESTPOOL/vol"
-ZDEV="${ZVOL_DEVDIR}/$ZVOL"
+ZDEV="$ZVOL_DEVDIR/$ZVOL"
 SUBZVOL="$VOLFS/subvol"
-SUBZDEV="${ZVOL_DEVDIR}/$SUBZVOL"
+SUBZDEV="$ZVOL_DEVDIR/$SUBZVOL"
 
+# 0. Verify basic ZVOL functionality
 log_must zfs create -o mountpoint=none $VOLFS
 log_must zfs create -V $VOLSIZE -s $SUBZVOL
 log_must zfs create -V $VOLSIZE -s $ZVOL
-udev_wait
+blockdev_exists $ZDEV
+blockdev_exists $SUBZDEV
+test_io $ZDEV
+test_io $SUBZDEV
 
 # 1. Verify "volmode" property does not accept invalid values
 typeset badvals=("off" "on" "1" "nope" "-")
@@ -113,86 +132,87 @@
 done
 
 # 2. Verify "volmode=none" hides ZVOL device nodes
-log_must zfs set volmode=none $ZVOL
+set_volmode none $ZVOL
 blockdev_missing $ZDEV
 log_must_busy zfs destroy $ZVOL
+blockdev_missing $ZDEV
 
 # 3. Verify "volmode=full" exposes a fully functional device
 log_must zfs create -V $VOLSIZE -s $ZVOL
-udev_wait
-log_must zfs set volmode=full $ZVOL
 blockdev_exists $ZDEV
+set_volmode full $ZVOL
+blockdev_exists $ZDEV
+test_io $ZDEV
 log_must verify_partition $ZDEV
-udev_wait
 # 3.1 Verify "volmode=geom" is an alias for "volmode=full"
-log_must zfs set volmode=geom $ZVOL
+set_volmode geom $ZVOL
 blockdev_exists $ZDEV
 if [[ "$(get_prop 'volmode' $ZVOL)" != "full" ]]; then
 	log_fail " Volmode value 'geom' is not an alias for 'full'"
 fi
-udev_wait
 log_must_busy zfs destroy $ZVOL
+blockdev_missing $ZDEV
 
 # 4. Verify "volmode=dev" hides partition info on the device
 log_must zfs create -V $VOLSIZE -s $ZVOL
-udev_wait
-log_must zfs set volmode=dev $ZVOL
 blockdev_exists $ZDEV
+set_volmode dev $ZVOL
+blockdev_exists $ZDEV
+test_io $ZDEV
 log_mustnot verify_partition $ZDEV
-udev_wait
 log_must_busy zfs destroy $ZVOL
+blockdev_missing $ZDEV
 
 # 5. Verify "volmode=default" behaves accordingly to "volmode" module parameter
 # 5.1 Verify sysctl "volmode=full"
 sysctl_volmode 1
 log_must zfs create -V $VOLSIZE -s $ZVOL
-udev_wait
-log_must zfs set volmode=default $ZVOL
+blockdev_exists $ZDEV
+set_volmode default $ZVOL
 blockdev_exists $ZDEV
 log_must verify_partition $ZDEV
-udev_wait
 log_must_busy zfs destroy $ZVOL
+blockdev_missing $ZDEV
 # 5.2 Verify sysctl "volmode=dev"
 sysctl_volmode 2
 log_must zfs create -V $VOLSIZE -s $ZVOL
-udev_wait
-log_must zfs set volmode=default $ZVOL
+blockdev_exists $ZDEV
+set_volmode default $ZVOL
 blockdev_exists $ZDEV
 log_mustnot verify_partition $ZDEV
-udev_wait
 log_must_busy zfs destroy $ZVOL
+blockdev_missing $ZDEV
 # 5.2 Verify sysctl "volmode=none"
 sysctl_volmode 3
 log_must zfs create -V $VOLSIZE -s $ZVOL
-udev_wait
-log_must zfs set volmode=default $ZVOL
+blockdev_missing $ZDEV
+set_volmode default $ZVOL
 blockdev_missing $ZDEV
 
 # 6. Verify "volmode" property is inherited correctly
 log_must zfs inherit volmode $ZVOL
+blockdev_missing $ZDEV
 # 6.1 Check volmode=full case
-log_must zfs set volmode=full $TESTPOOL
+set_volmode full $TESTPOOL
 verify_inherited 'volmode' 'full' $ZVOL $TESTPOOL
 blockdev_exists $ZDEV
 # 6.2 Check volmode=none case
-log_must zfs set volmode=none $TESTPOOL
+set_volmode none $TESTPOOL
 verify_inherited 'volmode' 'none' $ZVOL $TESTPOOL
 blockdev_missing $ZDEV
 # 6.3 Check volmode=dev case
-log_must zfs set volmode=dev $TESTPOOL
+set_volmode dev $TESTPOOL
 verify_inherited 'volmode' 'dev' $ZVOL $TESTPOOL
 blockdev_exists $ZDEV
 # 6.4 Check volmode=default case
 sysctl_volmode 1
-log_must zfs set volmode=default $TESTPOOL
+set_volmode default $TESTPOOL
 verify_inherited 'volmode' 'default' $ZVOL $TESTPOOL
 blockdev_exists $ZDEV
 # 6.5 Check inheritance on multiple levels
 log_must zfs inherit volmode $SUBZVOL
-udev_wait
-log_must zfs set volmode=none $VOLFS
-udev_wait
-log_must zfs set volmode=full $TESTPOOL
+set_volmode none $VOLFS
+set_volmode full $TESTPOOL
 verify_inherited 'volmode' 'none' $SUBZVOL $VOLFS
 blockdev_missing $SUBZDEV
 blockdev_exists $ZDEV
@@ -206,6 +226,8 @@
 blockdev_missing $SUBZDEV
 log_must_busy zfs destroy $ZVOL
 log_must_busy zfs destroy $SUBZVOL
+blockdev_missing $ZDEV
+blockdev_missing $SUBZDEV
 
 # 8. Verify "volmode" behaves accordingly to zvol_inhibit_dev (Linux only)
 if is_linux; then
@@ -214,21 +236,23 @@
 	sysctl_volmode 1
 	log_must zfs create -V $VOLSIZE -s $ZVOL
 	blockdev_missing $ZDEV
-	log_must zfs set volmode=full $ZVOL
+	set_volmode full $ZVOL
 	blockdev_missing $ZDEV
 	log_must_busy zfs destroy $ZVOL
+	blockdev_missing $ZDEV
 	# 7.1 Verify device nodes not are not created with "volmode=dev"
 	sysctl_volmode 2
 	log_must zfs create -V $VOLSIZE -s $ZVOL
 	blockdev_missing $ZDEV
-	log_must zfs set volmode=dev $ZVOL
+	set_volmode dev $ZVOL
 	blockdev_missing $ZDEV
 	log_must_busy zfs destroy $ZVOL
+	blockdev_missing $ZDEV
 	# 7.1 Verify device nodes not are not created with "volmode=none"
 	sysctl_volmode 3
 	log_must zfs create -V $VOLSIZE -s $ZVOL
 	blockdev_missing $ZDEV
-	log_must zfs set volmode=none $ZVOL
+	set_volmode none $ZVOL
 	blockdev_missing $ZDEV
 fi
 

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_zil.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_zil.ksh
index b8989f4..a393606 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_zil.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_zil.ksh

@@ -42,8 +42,8 @@
 
 function cleanup
 {
-	datasetexists $ZVOL && log_must_busy zfs destroy $ZVOL
-	udev_wait
+	datasetexists $ZVOL && destroy_dataset $ZVOL
+	block_device_wait
 }
 
 log_assert "Verify ZIL functionality on ZVOLs"

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/cleanup.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/cleanup.ksh
index f392714..70574fc 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/cleanup.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/cleanup.ksh

@@ -37,6 +37,8 @@
 
 if is_linux; then
 	log_must swapon -a
+elif is_freebsd; then
+	swapon -a || true
 else
 	log_must swapadd
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap.cfg b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap.cfg
index 2ea8a4c..54ecc18 100644
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap.cfg
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap.cfg

@@ -35,6 +35,8 @@
 #
 if is_linux; then
 	SAVESWAPDEVS=$(swapon -s | nawk '(NR != 1) {print $1}')
+elif is_freebsd; then
+	SAVESWAPDEVS=$(swapctl -l | nawk '(NR != 1) {print $1}')
 else
 	SAVESWAPDEVS=$(swap -l | nawk '(NR != 1) {print $1}')
 fi

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_002_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_002_pos.ksh
index 59d9a46..6106f7e 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_002_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_002_pos.ksh

@@ -56,6 +56,8 @@
 
 log_assert "Using a zvol as swap space, fill /var/tmp to 80%."
 
+log_onexit cleanup
+
 vol=$TESTPOOL/$TESTVOL
 swapdev=${ZVOL_DEVDIR}/$vol
 log_must swap_setup $swapdev

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh
index 9ccf3f9..aafdb7a 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_003_pos.ksh

@@ -46,10 +46,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "Modifies global non-ZFS system config"
-fi
-
 function cleanup
 {
 	[[ -f $TESTDIR/$TESTFILE ]] && log_must rm -f $TESTDIR/$TESTFILE

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_004_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_004_pos.ksh
index be72576..83bf465 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_004_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_004_pos.ksh

@@ -47,7 +47,7 @@
 function cleanup
 {
 	is_swap_inuse $swapname && log_must swap_cleanup $swapname
-	datasetexists $vol && log_must zfs destroy $vol
+	datasetexists $vol && destroy_dataset $vol
 }
 
 log_assert "For an added swap zvol, (2G <= volsize <= 16G)"
@@ -62,7 +62,7 @@
 
 		# Create a sparse volume to test larger sizes
 		log_must zfs create -s -b $vbs -V $volsize $vol
-		block_device_wait
+		block_device_wait $swapname
 		log_must swap_setup $swapname
 
 		new_volsize=$(get_prop volsize $vol)

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_005_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_005_pos.ksh
index 58714a2..b22bfdb 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_005_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_005_pos.ksh

@@ -44,10 +44,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "swaplow + swaplen unsupported Linux options"
-fi
-
 assertion="Verify the sum of swaplow and swaplen is less or equal to volsize"
 log_assert $assertion
 
@@ -63,9 +59,9 @@
 ((max_swaplow = (volblocks - (pageblocks * 2))))
 
 for i in {0..10}; do
-	swaplow=$(shuf -n 1 -i ${pageblocks}-${max_swaplow})
+	swaplow=$(range_shuffle ${pageblocks} ${max_swaplow} | head -n 1)
 	((maxlen = max_swaplow - swaplow))
-	swaplen=$(shuf -n 1 -i ${pageblocks}-${maxlen})
+	swaplen=$(range_shuffle ${pageblocks} ${maxlen} | head -n 1)
 	log_must swap -a $swapname $swaplow $swaplen
 	log_must swap -d $swapname $swaplow
 done

diff --git a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_006_pos.ksh b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_006_pos.ksh
index 08428d9..5788a86 100755
--- a/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_006_pos.ksh
+++ b/zfs/tests/zfs-tests/tests/functional/zvol/zvol_swap/zvol_swap_006_pos.ksh

@@ -45,10 +45,6 @@
 
 verify_runnable "global"
 
-if is_linux; then
-	log_unsupported "swaplow + swaplen unsupported Linux options"
-fi
-
 function cleanup
 {
 	typeset -i i=0

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/Makefile.am b/zfs/tests/zfs-tests/tests/perf/fio/Makefile.am
index 6192516..e9f854d 100644
--- a/zfs/tests/zfs-tests/tests/perf/fio/Makefile.am
+++ b/zfs/tests/zfs-tests/tests/perf/fio/Makefile.am

@@ -6,4 +6,5 @@
 	random_readwrite_fixed.fio \
 	random_writes.fio \
 	sequential_reads.fio \
-	sequential_writes.fio
+	sequential_writes.fio \
+	sequential_readwrite.fio

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/mkfiles.fio b/zfs/tests/zfs-tests/tests/perf/fio/mkfiles.fio
index c7efda8..36ec22d 100644
--- a/zfs/tests/zfs-tests/tests/perf/fio/mkfiles.fio
+++ b/zfs/tests/zfs-tests/tests/perf/fio/mkfiles.fio

@@ -11,6 +11,7 @@
 
 #
 # Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2020, Kjeld Schouten-Lebbing. All rights reserved.
 #
 
 [global]
@@ -24,7 +25,9 @@
 directory=${DIRECTORY}
 numjobs=${NUMJOBS}
 filesize=${FILE_SIZE}
-buffer_compress_percentage=66
-buffer_compress_chunk=4096
+randseed=${PERF_RANDSEED}
+buffer_compress_percentage=${PERF_COMPPERCENT}
+buffer_pattern=0xdeadbeef
+buffer_compress_chunk=${PERF_COMPCHUNK}
 
 [job]

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite.fio b/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite.fio
index 852d4be..f8eb6db 100644
--- a/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite.fio
+++ b/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite.fio

@@ -10,7 +10,8 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2016, Delphix. All rights reserved.
+# Copyright (c) 2020, Kjeld Schouten-Lebbing. All rights reserved.
 #
 
 [global]
@@ -30,7 +31,9 @@
 sync=${SYNC_TYPE}
 direct=${DIRECT}
 numjobs=${NUMJOBS}
-buffer_compress_percentage=66
-buffer_compress_chunk=4096
+randseed=${RANDSEED}
+buffer_compress_percentage=${COMPPERCENT}
+buffer_pattern=0xdeadbeef
+buffer_compress_chunk=${COMPCHUNK}
 
 [job]

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite_fixed.fio b/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite_fixed.fio
index 67b88c0..e83b480 100644
--- a/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite_fixed.fio
+++ b/zfs/tests/zfs-tests/tests/perf/fio/random_readwrite_fixed.fio

@@ -10,7 +10,8 @@
 #
 
 #
-# Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2016, Delphix. All rights reserved.
+# Copyright (c) 2020, Kjeld Schouten-Lebbing. All rights reserved.
 #
 
 [global]
@@ -30,7 +31,9 @@
 sync=${SYNC_TYPE}
 direct=${DIRECT}
 numjobs=${NUMJOBS}
-buffer_compress_percentage=66
-buffer_compress_chunk=4096
+randseed=${RANDSEED}
+buffer_compress_percentage=${COMPPERCENT}
+buffer_pattern=0xdeadbeef
+buffer_compress_chunk=${COMPCHUNK}
 
 [job]

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/random_writes.fio b/zfs/tests/zfs-tests/tests/perf/fio/random_writes.fio
index 90db5ce..3b84b19 100644
--- a/zfs/tests/zfs-tests/tests/perf/fio/random_writes.fio
+++ b/zfs/tests/zfs-tests/tests/perf/fio/random_writes.fio

@@ -10,7 +10,8 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2016, Delphix. All rights reserved.
+# Copyright (c) 2020, Kjeld Schouten-Lebbing. All rights reserved.
 #
 
 [global]
@@ -28,7 +29,9 @@
 direct=${DIRECT}
 numjobs=${NUMJOBS}
 filesize=${FILESIZE}
-buffer_compress_percentage=66
-buffer_compress_chunk=4096
+randseed=${RANDSEED}
+buffer_compress_percentage=${COMPPERCENT}
+buffer_pattern=0xdeadbeef
+buffer_compress_chunk=${COMPCHUNK}
 
 [job]

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/sequential_readwrite.fio b/zfs/tests/zfs-tests/tests/perf/fio/sequential_readwrite.fio
new file mode 100644
index 0000000..2037ba2
--- /dev/null
+++ b/zfs/tests/zfs-tests/tests/perf/fio/sequential_readwrite.fio

@@ -0,0 +1,39 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, 2016, Delphix. All rights reserved.
+# Copyright (c) 2020, Kjeld Schouten-Lebbing. All rights reserved.
+#
+
+[global]
+filename_format=file$jobnum
+nrfiles=16
+group_reporting=1
+fallocate=0
+overwrite=0
+thread=1
+rw=readwrite
+rwmixread=80
+time_based=1
+directory=${DIRECTORY}
+runtime=${RUNTIME}
+bssplit=4k/50:8k/30:128k/10:1m/10
+ioengine=psync
+sync=${SYNC_TYPE}
+direct=${DIRECT}
+numjobs=${NUMJOBS}
+randseed=${RANDSEED}
+buffer_compress_percentage=${COMPPERCENT}
+buffer_pattern=0xdeadbeef
+buffer_compress_chunk=${COMPCHUNK}
+
+[job]

diff --git a/zfs/tests/zfs-tests/tests/perf/fio/sequential_writes.fio b/zfs/tests/zfs-tests/tests/perf/fio/sequential_writes.fio
index 714993e..4582c81 100644
--- a/zfs/tests/zfs-tests/tests/perf/fio/sequential_writes.fio
+++ b/zfs/tests/zfs-tests/tests/perf/fio/sequential_writes.fio

@@ -10,7 +10,8 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2016, Delphix. All rights reserved.
+# Copyright (c) 2020, Kjeld Schouten-Lebbing. All rights reserved.
 #
 
 [global]
@@ -28,7 +29,9 @@
 direct=${DIRECT}
 numjobs=${NUMJOBS}
 filesize=${FILESIZE}
-buffer_compress_percentage=66
-buffer_compress_chunk=4096
+randseed=${RANDSEED}
+buffer_compress_percentage=${COMPPERCENT}
+buffer_pattern=0xdeadbeef
+buffer_compress_chunk=${COMPCHUNK}
 
 [job]

diff --git a/zfs/tests/zfs-tests/tests/perf/perf.shlib b/zfs/tests/zfs-tests/tests/perf/perf.shlib
index e2e84ca..6f4fdc9 100644
--- a/zfs/tests/zfs-tests/tests/perf/perf.shlib
+++ b/zfs/tests/zfs-tests/tests/perf/perf.shlib

@@ -10,18 +10,21 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 # Copyright (c) 2016, Intel Corporation.
 #
 
 . $STF_SUITE/include/libtest.shlib
 
-# If neither is specified, do a nightly run.
-[[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1
+# Defaults common to all the tests in the regression group
+export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
+export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
+export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
+export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
+export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
 
-# Default runtime for each type of test run.
-export PERF_RUNTIME_WEEKLY=$((30 * 60))
-export PERF_RUNTIME_NIGHTLY=$((10 * 60))
+# Default to JSON for fio output
+export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
 
 # Default fs creation options
 export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
@@ -87,8 +90,7 @@
 
 	if $clear_cache; then
 		# Clear the ARC
-		zpool export $PERFPOOL
-		zpool import $PERFPOOL
+		log_must zinject -a
 	fi
 
 	if [[ -n $ZINJECT_DELAYS ]]; then
@@ -106,6 +108,9 @@
 	log_note "DIRECTORY: " $DIRECTORY
 
 	export RUNTIME=$PERF_RUNTIME
+	export RANDSEED=$PERF_RANDSEED
+	export COMPPERCENT=$PERF_COMPPERCENT
+	export COMPCHUNK=$PERF_COMPCHUNK
 	export FILESIZE=$((TOTAL_SIZE / threads))
 	export NUMJOBS=$threads
 	export SYNC_TYPE=$sync
@@ -136,11 +141,14 @@
 	# Start the load
 	if [[ $NFS -eq 1 ]]; then
 		log_must ssh -t $NFS_USER@$NFS_CLIENT "
-			fio --output /tmp/fio.out /tmp/test.fio
+			fio --output-format=${PERF_FIO_FORMAT} \
+			    --output /tmp/fio.out /tmp/test.fio
 		"
 		log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
+		log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
 	else
-		log_must fio --output $outfile $FIO_SCRIPTS/$script
+		log_must fio --output-format=${PERF_FIO_FORMAT} \
+		    --output $outfile $FIO_SCRIPTS/$script
 	fi
 }
 
@@ -150,8 +158,6 @@
 # set before launching zfstest to override the defaults.
 #
 # PERF_RUNTIME: The time in seconds each fio invocation should run.
-# PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are
-#    nightly and weekly.
 # PERF_NTHREADS: A list of how many threads each fio invocation will use.
 # PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
 # PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
@@ -189,7 +195,7 @@
 function do_setup_nfs
 {
 	typeset script=$1
-	zfs  set sharenfs=on $TESTFS
+	zfs set sharenfs=on $TESTFS
 	log_must chmod  -R 777 /$TESTFS
 
 	ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
@@ -375,14 +381,18 @@
 
 function get_min_arc_size
 {
-	if is_linux; then
-		typeset -l min_arc_size=`awk '$1 == "c_min" { print $3 }' \
-		    /proc/spl/kstat/zfs/arcstats`
-	else
-		typeset -l min_arc_size=$(dtrace -qn 'BEGIN {
+	typeset -l min_arc_size
+
+	if is_freebsd; then
+		min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
+	elif is_illumos; then
+		min_arc_size=$(dtrace -qn 'BEGIN {
 		    printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
 		    exit(0);
 		}')
+	elif is_linux; then
+		min_arc_size=`awk '$1 == "c_min" { print $3 }' \
+		    /proc/spl/kstat/zfs/arcstats`
 	fi
 
 	[[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
@@ -392,14 +402,18 @@
 
 function get_max_arc_size
 {
-	if is_linux; then
-		typeset -l max_arc_size=`awk '$1 == "c_max" { print $3 }' \
-		    /proc/spl/kstat/zfs/arcstats`
-	else
-		typeset -l max_arc_size=$(dtrace -qn 'BEGIN {
+	typeset -l max_arc_size
+
+	if is_freebsd; then
+		max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
+	elif is_illumos; then
+		max_arc_size=$(dtrace -qn 'BEGIN {
 		    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
 		    exit(0);
 		}')
+	elif is_linux; then
+		max_arc_size=`awk '$1 == "c_max" { print $3 }' \
+		    /proc/spl/kstat/zfs/arcstats`
 	fi
 
 	[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
@@ -407,22 +421,44 @@
 	echo $max_arc_size
 }
 
-function get_max_dbuf_cache_size
+function get_arc_target
 {
-	typeset -l max_dbuf_cache_size
+	typeset -l arc_c
 
-	if is_linux; then
-		max_dbuf_cache_size=$(get_tunable dbuf_cache_max_bytes)
-	else
-		max_dbuf_cache_size=$(dtrace -qn 'BEGIN {
+	if is_freebsd; then
+		arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c)
+	elif is_illumos; then
+		arc_c=$(dtrace -qn 'BEGIN {
+		    printf("%u\n", `arc_stats.arcstat_c.value.ui64);
+		    exit(0);
+		}')
+	elif is_linux; then
+		arc_c=`awk '$1 == "c" { print $3 }' \
+		    /proc/spl/kstat/zfs/arcstats`
+	fi
+
+	[[ $? -eq 0 ]] || log_fail "get_arc_target failed"
+
+	echo $arc_c
+}
+
+function get_dbuf_cache_size
+{
+	typeset -l dbuf_cache_size dbuf_cache_shift
+
+	if is_illumos; then
+		dbuf_cache_size=$(dtrace -qn 'BEGIN {
 		    printf("%u\n", `dbuf_cache_max_bytes);
 		    exit(0);
 		}')
-
-		[[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed"
+	else
+		dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
+		dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
 	fi
 
-	echo $max_dbuf_cache_size
+	[[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed"
+
+	echo $dbuf_cache_size
 }
 
 # Create a file with some information about how this system is configured.
@@ -523,14 +559,7 @@
 	typeset ctd ctds devname lun
 	typeset lun_list=':'
 
-	if is_linux; then
-		ctds=$(zpool list -HLv $pool | \
-		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
-
-		for ctd in $ctds; do
-			lun_list="$lun_list$ctd:"
-		done
-	else
+	if is_illumos; then
 		ctds=$(zpool list -v $pool |
 		    awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
 
@@ -542,12 +571,31 @@
 		# number to the list for comparison with dev_statname.
 		lun=$(sed 's/"//g' /etc/path_to_inst | grep \
 		    $devname | awk '{print $3$2}')
-		un_list="$lun_list$lun:"
+		lun_list="$lun_list$lun:"
+		done
+	elif is_freebsd; then
+		lun_list+=$(zpool list -HLv $pool | \
+		    awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
+		         { printf "%s:", $1 }')
+	elif is_linux; then
+		ctds=$(zpool list -HLv $pool | \
+		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
+
+		for ctd in $ctds; do
+			lun_list="$lun_list$ctd:"
 		done
 	fi
 	echo $lun_list
 }
 
+function print_perf_settings
+{
+	echo "PERF_NTHREADS: $PERF_NTHREADS"
+	echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
+	echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
+	echo "PERF_IOSIZES: $PERF_IOSIZES"
+}
+
 # Create a perf_data directory to hold performance statistics and
 # configuration information.
 export PERF_DATA_DIR=$(get_perf_output_dir)

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/random_reads.ksh b/zfs/tests/zfs-tests/tests/perf/regression/random_reads.ksh
index 079a536..5c8066d 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/random_reads.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/random_reads.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -55,22 +55,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16 32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 128k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
 
 # Layout the files to be used by the read tests. Create as many files as the
 # largest number of threads. An fio run with fewer threads will use a subset
@@ -103,6 +91,6 @@
 	)
 fi
 
-log_note "Random reads with $PERF_RUNTYPE settings"
+log_note "Random reads with settings: $(print_perf_settings)"
 do_fio_run random_reads.fio false true
 log_pass "Measure IO stats during random read load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh b/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh
index 5090d19..33d7d8c 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -55,22 +55,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'4 8 16 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
-	export PERF_IOSIZES=''		# bssplit used instead
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=''		# bssplit used instead
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'32 64'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=''		# bssplit used instead
 
 # Layout the files to be used by the readwrite tests. Create as many files
 # as the largest number of threads. An fio run with fewer threads will use
@@ -103,6 +91,6 @@
 	)
 fi
 
-log_note "Random reads and writes with $PERF_RUNTYPE settings"
+log_note "Random reads and writes with settings: $(print_perf_settings)"
 do_fio_run random_readwrite.fio false true
 log_pass "Measure IO stats during random read and write load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh b/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh
index e368ed2..bb40145 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh

@@ -1,5 +1,6 @@
 #!/bin/ksh
-# file and its contents are supplied under the terms of the
+
+# This file and its contents are supplied under the terms of the
 # Common Development and Distribution License ("CDDL"), version 1.0.
 # You may only use this file in accordance with the terms of version
 # 1.0 of the CDDL.
@@ -10,7 +11,7 @@
 #
 
 #
-# Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright (c) 2017, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -44,22 +45,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16 32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
-	export PERF_IOSIZES='8k 64k'
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES='8k'
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
 
 # Layout the files to be used by the readwrite tests. Create as many files
 # as the largest number of threads. An fio run with fewer threads will use
@@ -94,6 +83,6 @@
 	)
 fi
 
-log_note "Random reads and writes with $PERF_RUNTYPE settings"
+log_note "Random reads and writes with settings: $(print_perf_settings)"
 do_fio_run random_readwrite_fixed.fio false true
 log_pass "Measure IO stats during random read and write load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/random_writes.ksh b/zfs/tests/zfs-tests/tests/perf/regression/random_writes.ksh
index 3101ac9..4b82683 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/random_writes.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/random_writes.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -54,22 +54,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'1 4 8 16 32 64 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 256k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'32 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'32 128'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
 
 # Set up the scripts and output files that will log performance data.
 lun_list=$(pool_to_lun_list $PERFPOOL)
@@ -94,6 +82,6 @@
 	)
 fi
 
-log_note "Random writes with $PERF_RUNTYPE settings"
+log_note "Random writes with settings: $(print_perf_settings)"
 do_fio_run random_writes.fio true false
 log_pass "Measure IO stats during random write load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh b/zfs/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh
index e0b2532..522ee45 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -43,22 +43,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'1 2 4 8 16 32 64 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0 1'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
-
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'1 4 16 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0 1'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'1 4 16 64'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0 1'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'8k'}
 
 # Until the performance tests over NFS can deal with multiple file systems,
 # force the use of only one file system when testing over NFS.
@@ -89,6 +77,7 @@
 	    "dtrace  -s $PERF_SCRIPTS/offcpu-profile.d" "offcpu-profile"
 	)
 fi
-log_note "ZIL specific random write workload with $PERF_RUNTYPE settings"
+log_note \
+    "ZIL specific random write workload with settings: $(print_perf_settings)"
 do_fio_run random_writes.fio true false
 log_pass "Measure IO stats during ZIL specific random write workload"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh
index 37e2196..2bdfff7 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -55,22 +55,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16 32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 128k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'8 16'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
 
 # Layout the files to be used by the read tests. Create as many files as the
 # largest number of threads. An fio run with fewer threads will use a subset
@@ -105,6 +93,6 @@
 	)
 fi
 
-log_note "Sequential reads with $PERF_RUNTYPE settings"
+log_note "Sequential reads with settings: $(print_perf_settings)"
 do_fio_run sequential_reads.fio false true
 log_pass "Measure IO stats during sequential read load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh
index 9528d97..8127786 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -45,22 +45,10 @@
 # Make sure the working set can be cached in the arc. Aim for 1/2 of arc.
 export TOTAL_SIZE=$(($(get_max_arc_size) / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16 32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 128k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
 
 # Layout the files to be used by the read tests. Create as many files as the
 # largest number of threads. An fio run with fewer threads will use a subset
@@ -95,6 +83,6 @@
 	)
 fi
 
-log_note "Sequential cached reads with $PERF_RUNTYPE settings"
+log_note "Sequential cached reads with settings: $(print_perf_settings)"
 do_fio_run sequential_reads.fio false false
 log_pass "Measure IO stats during sequential cached read load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh
index f274764..8ce1273 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -51,22 +51,10 @@
 # Make sure the working set can be cached in the arc. Aim for 1/2 of arc.
 export TOTAL_SIZE=$(($(get_max_arc_size) / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16 32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 128k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'}
 
 # Layout the files to be used by the read tests. Create as many files as the
 # largest number of threads. An fio run with fewer threads will use a subset
@@ -121,6 +109,7 @@
 	)
 fi
 
-log_note "Sequential cached reads from $DIRECTORY with $PERF_RUNTYPE settings"
+log_note "Sequential cached reads from $DIRECTORY with " \
+    "ettings: $(print_perf_settings)"
 do_fio_run sequential_reads.fio false false
 log_pass "Measure IO stats during sequential cached read load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh
index 884856c..adacdc2 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2016, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -25,7 +25,7 @@
 # for all fio runs. The ARC is not cleared to ensure that all data is cached.
 #
 # This is basically a copy of the sequential_reads_cached test case, but with
-# a smaller dateset so that we can fit everything into the decompressed, linear
+# a smaller dataset so that we can fit everything into the decompressed, linear
 # space in the dbuf cache.
 #
 
@@ -47,24 +47,12 @@
 populate_perf_filesystems
 
 # Ensure the working set can be cached in the dbuf cache.
-export TOTAL_SIZE=$(($(get_max_dbuf_cache_size) * 3 / 4))
+export TOTAL_SIZE=$(($(get_dbuf_cache_size) * 3 / 4))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'8 16 32 64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 128k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'64'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'64k'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'64'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'64k'}
 
 # Layout the files to be used by the read tests. Create as many files as the
 # largest number of threads. An fio run with fewer threads will use a subset
@@ -101,6 +89,6 @@
 	)
 fi
 
-log_note "Sequential cached reads with $PERF_RUNTYPE settings"
+log_note "Sequential cached reads with settings: $(print_perf_settings)"
 do_fio_run sequential_reads.fio false false
 log_pass "Measure IO stats during sequential cached read load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh b/zfs/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh
index 8ea4f84..d32690a 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh

@@ -12,7 +12,7 @@
 #
 
 #
-# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 #
@@ -54,22 +54,10 @@
 # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio.
 export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2))
 
-# Variables for use by fio.
-if [[ -n $PERF_REGRESSION_WEEKLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'1 4 8 16 32 64 128'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 64k 256k'}
-elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then
-	export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY}
-	export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'}
-	export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
-	export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
-	export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
-	export PERF_IOSIZES=${PERF_IOSIZES:-'8k 128k 1m'}
-fi
+# Variables specific to this test for use by fio.
+export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'}
+export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'}
+export PERF_IOSIZES=${PERF_IOSIZES:-'8k 128k 1m'}
 
 # Set up the scripts and output files that will log performance data.
 lun_list=$(pool_to_lun_list $PERFPOOL)
@@ -94,6 +82,6 @@
 	)
 fi
 
-log_note "Sequential writes with $PERF_RUNTYPE settings"
+log_note "Sequential writes with settings: $(print_perf_settings)"
 do_fio_run sequential_writes.fio true false
 log_pass "Measure IO stats during sequential write load"

diff --git a/zfs/tests/zfs-tests/tests/perf/regression/setup.ksh b/zfs/tests/zfs-tests/tests/perf/regression/setup.ksh
index 1544f63..68be00d 100755
--- a/zfs/tests/zfs-tests/tests/perf/regression/setup.ksh
+++ b/zfs/tests/zfs-tests/tests/perf/regression/setup.ksh

@@ -12,12 +12,11 @@
 #
 
 #
-# Copyright (c) 2015 by Delphix. All rights reserved.
+# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
 
 verify_runnable "global"
-verify_disk_count "$DISKS" 3
 
 log_pass

diff --git a/zfs/tests/zfs-tests/tests/perf/scripts/prefetch_io.sh b/zfs/tests/zfs-tests/tests/perf/scripts/prefetch_io.sh
index 75bf08f..07688ef 100755
--- a/zfs/tests/zfs-tests/tests/perf/scripts/prefetch_io.sh
+++ b/zfs/tests/zfs-tests/tests/perf/scripts/prefetch_io.sh

@@ -1,4 +1,5 @@
-#!/bin/bash
+#!/usr/bin/env bash
+# shellcheck disable=SC1004
 
 #
 # This file and its contents are supplied under the terms of the
@@ -24,38 +25,37 @@
 
 function get_prefetch_ios
 {
-        typeset -l data_misses=`awk '$1 == "prefetch_data_misses" \
-            { print $3 }' $zfs_kstats/arcstats`
-        typeset -l metadata_misses=`awk '$1 == "prefetch_metadata_misses" \
-            { print $3 }' $zfs_kstats/arcstats`
-        typeset -l total_misses=$(( $data_misses + $metadata_misses ))
+	typeset -l data_misses="$(awk '$1 == "prefetch_data_misses" \
+	    { print $3; exit }' "$zfs_kstats/arcstats")"
+	typeset -l metadata_misses="$(awk '$1 == "prefetch_metadata_misses" \
+	    { print $3; exit }' "$zfs_kstats/arcstats")"
+	typeset -l total_misses=$(( data_misses + metadata_misses ))
 
-        echo $total_misses
+	echo "$total_misses"
 }
 
 function get_prefetched_demand_reads
 {
-	typeset -l demand_reads=`awk '$1 == "demand_hit_predictive_prefetch" \
-	    { print $3 }' $zfs_kstats/arcstats`
+	typeset -l demand_reads="$(awk '$1 == "demand_hit_predictive_prefetch" \
+	    { print $3; exit }' "$zfs_kstats/arcstats")"
 
-	echo $demand_reads
+	echo "$demand_reads"
 }
 
 function get_async_upgrade_sync
 {
-	typeset -l sync_wait=`awk '$1 == "async_upgrade_sync" \
-	    { print $3 }' $zfs_kstats/arcstats`
+	typeset -l sync_wait="$(awk '$1 == "async_upgrade_sync" \
+	    { print $3; exit }' "$zfs_kstats/arcstats")"
 
-	echo $sync_wait
+	echo "$sync_wait"
 }
 
 if [ $# -ne 2 ]
 then
-	echo "Usage: `basename $0` <poolname> interval" >&2
+	echo "Usage: ${0##*/} poolname interval" >&2
 	exit 1
 fi
 
-poolname=$1
 interval=$2
 prefetch_ios=$(get_prefetch_ios)
 prefetched_demand_reads=$(get_prefetched_demand_reads)
@@ -64,19 +64,19 @@
 while true
 do
 	new_prefetch_ios=$(get_prefetch_ios)
-	printf "%u\n%-24s\t%u\n" $(date +%s) "prefetch_ios" \
-	    $(( $new_prefetch_ios - $prefetch_ios ))
+	printf '%u\n%-24s\t%u\n' "$(date +%s)" "prefetch_ios" \
+	    $(( new_prefetch_ios - prefetch_ios ))
 	prefetch_ios=$new_prefetch_ios
 
 	new_prefetched_demand_reads=$(get_prefetched_demand_reads)
-	printf "%-24s\t%u\n" "prefetched_demand_reads" \
-	    $(( $new_prefetched_demand_reads - $prefetched_demand_reads ))
+	printf '%-24s\t%u\n' "prefetched_demand_reads" \
+	    $(( new_prefetched_demand_reads - prefetched_demand_reads ))
 	prefetched_demand_reads=$new_prefetched_demand_reads
 
 	new_async_upgrade_sync=$(get_async_upgrade_sync)
-	printf "%-24s\t%u\n" "async_upgrade_sync" \
-	    $(( $new_async_upgrade_sync - $async_upgrade_sync ))
+	printf '%-24s\t%u\n' "async_upgrade_sync" \
+	    $(( new_async_upgrade_sync - async_upgrade_sync ))
 	async_upgrade_sync=$new_async_upgrade_sync
 
-	sleep $interval
+	sleep "$interval"
 done

diff --git a/zfs/udev/rules.d/.gitignore b/zfs/udev/rules.d/.gitignore
index e7f7be8..aba2561 100644
--- a/zfs/udev/rules.d/.gitignore
+++ b/zfs/udev/rules.d/.gitignore

@@ -1,4 +1 @@
-69-vdev.rules
-60-zpool.rules
-60-zvol.rules
-90-zfs.rules
+*.rules

diff --git a/zfs/udev/rules.d/60-zvol.rules.in b/zfs/udev/rules.d/60-zvol.rules.in
index 60bbff8..a3c7d2a 100644
--- a/zfs/udev/rules.d/60-zvol.rules.in
+++ b/zfs/udev/rules.d/60-zvol.rules.in

@@ -3,4 +3,4 @@
 # persistent disk links: /dev/zvol/dataset_name
 # also creates compatibility symlink of /dev/dataset_name
 
-KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="@udevdir@/zvol_id $tempnode" SYMLINK+="zvol/%c %c"
+KERNEL=="zd*", SUBSYSTEM=="block", ACTION=="add|change", PROGRAM=="@udevdir@/zvol_id $devnode", SYMLINK+="zvol/%c %c"

diff --git a/zfs/udev/rules.d/Makefile.am b/zfs/udev/rules.d/Makefile.am
index 86c33fc..72f47d6 100644
--- a/zfs/udev/rules.d/Makefile.am
+++ b/zfs/udev/rules.d/Makefile.am

@@ -1,21 +1,8 @@
+include $(top_srcdir)/config/Substfiles.am
+
 udevrule_DATA = \
 	69-vdev.rules \
 	60-zvol.rules \
 	90-zfs.rules
 
-EXTRA_DIST = \
-	$(top_srcdir)/udev/rules.d/69-vdev.rules.in \
-	$(top_srcdir)/udev/rules.d/60-zvol.rules.in \
-	$(top_srcdir)/udev/rules.d/90-zfs.rules.in
-
-$(udevrule_DATA):%:%.in
-	-$(SED) -e 's,@bindir\@,$(bindir),g' \
-		-e 's,@sbindir\@,$(sbindir),g' \
-		-e 's,@udevdir\@,$(udevdir),g' \
-		-e 's,@udevruledir\@,$(udevruledir),g' \
-		-e 's,@sysconfdir\@,$(sysconfdir),g' \
-		$< > '$@'
-
-# Double-colon rules are allowed; there are multiple independent definitions.
-distclean-local::
-	-$(RM) $(udevrule_DATA)
+SUBSTFILES += $(udevrule_DATA)
commit	017c3036efc179312f18eb3dfa01f3deadbb7aa3	[log] [tgz]
author	Phil Abercrombie <pabercrombie@google.com>	Wed Mar 27 03:33:19 2024 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Wed Mar 27 03:33:19 2024 +0000
tree	7d6a022292311ddbcb5d0485a4259ad6ecbcdddd
parent	839ffed0d213f7991a8414e738189fd3eb4a7e11 [diff]
parent	7497c4e43f1fb791dfc4ea04c146ce143101ac9b [diff]